diff --git a/.asf.yaml b/.asf.yaml
index 22042b355b2fa..3935a525ff3c4 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -31,6 +31,8 @@ github:
     merge: false
     squash: true
     rebase: true
+  ghp_branch: master
+  ghp_path: /docs
 
 notifications:
   pullrequests: reviews@spark.apache.org
diff --git a/.github/labeler.yml b/.github/labeler.yml
index a74b4ab5d5a33..6617acbf9187e 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -26,16 +26,14 @@ INFRA:
      '.asf.yaml',
      '.gitattributes',
      '.gitignore',
-     'dev/merge_spark_pr.py',
-     'dev/run-tests-jenkins*'
+     'dev/merge_spark_pr.py'
     ]
 
 BUILD:
   - changed-files:
     - all-globs-to-any-file: [
      'dev/**/*',
-     '!dev/merge_spark_pr.py',
-     '!dev/run-tests-jenkins*'
+     '!dev/merge_spark_pr.py'
     ]
     - any-glob-to-any-file: [
      'build/**/*',
@@ -199,6 +197,7 @@ YARN:
 KUBERNETES:
   - changed-files:
     - any-glob-to-any-file: [
+     'bin/docker-image-tool.sh',
      'resource-managers/kubernetes/**/*'
     ]
 
@@ -225,7 +224,7 @@ DEPLOY:
 CONNECT:
   - changed-files:
     - any-glob-to-any-file: [
-     'connect/**/*',
+     'sql/connect/**/*',
      'connector/connect/**/*',
      'python/pyspark/sql/**/connect/**/*',
      'python/pyspark/ml/**/connect/**/*'
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
index 576f64f3a0869..3117872e21680 100644
--- a/.github/workflows/build_and_test.yml
+++ b/.github/workflows/build_and_test.yml
@@ -58,6 +58,12 @@ jobs:
     outputs:
       required: ${{ steps.set-outputs.outputs.required }}
       image_url: ${{ steps.infra-image-outputs.outputs.image_url }}
+      image_docs_url: ${{ steps.infra-image-docs-outputs.outputs.image_docs_url }}
+      image_docs_url_link: ${{ steps.infra-image-link.outputs.image_docs_url_link }}
+      image_lint_url: ${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}
+      image_lint_url_link: ${{ steps.infra-image-link.outputs.image_lint_url_link }}
+      image_sparkr_url: ${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}
+      image_sparkr_url_link: ${{ steps.infra-image-link.outputs.image_sparkr_url_link }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v4
@@ -134,6 +140,44 @@ jobs:
         IMG_NAME="apache-spark-ci-image:${{ inputs.branch }}-${{ github.run_id }}"
         IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
         echo "image_url=$IMG_URL" >> $GITHUB_OUTPUT
+    - name: Generate infra image URL (Documentation)
+      id: infra-image-docs-outputs
+      run: |
+        # Convert to lowercase to meet Docker repo name requirement
+        REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+        IMG_NAME="apache-spark-ci-image-docs:${{ inputs.branch }}-${{ github.run_id }}"
+        IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
+        echo "image_docs_url=$IMG_URL" >> $GITHUB_OUTPUT
+    - name: Generate infra image URL (Linter)
+      id: infra-image-lint-outputs
+      run: |
+        # Convert to lowercase to meet Docker repo name requirement
+        REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+        IMG_NAME="apache-spark-ci-image-lint:${{ inputs.branch }}-${{ github.run_id }}"
+        IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
+        echo "image_lint_url=$IMG_URL" >> $GITHUB_OUTPUT
+    - name: Generate infra image URL (SparkR)
+      id: infra-image-sparkr-outputs
+      run: |
+        # Convert to lowercase to meet Docker repo name requirement
+        REPO_OWNER=$(echo "${{ github.repository_owner }}" | tr '[:upper:]' '[:lower:]')
+        IMG_NAME="apache-spark-ci-image-sparkr:${{ inputs.branch }}-${{ github.run_id }}"
+        IMG_URL="ghcr.io/$REPO_OWNER/$IMG_NAME"
+        echo "image_sparkr_url=$IMG_URL" >> $GITHUB_OUTPUT
+    - name: Link the docker images
+      id: infra-image-link
+      run: |
+        # Set the image URL for job "docs"
+        # Should delete the link and directly use image_docs_url after SPARK 3.x EOL
+        if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
+          echo "image_docs_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
+          echo "image_lint_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
+          echo "image_sparkr_url_link=${{ steps.infra-image-outputs.outputs.image_url }}" >> $GITHUB_OUTPUT
+        else
+          echo "image_docs_url_link=${{ steps.infra-image-docs-outputs.outputs.image_docs_url }}" >> $GITHUB_OUTPUT
+          echo "image_lint_url_link=${{ steps.infra-image-lint-outputs.outputs.image_lint_url }}" >> $GITHUB_OUTPUT
+          echo "image_sparkr_url_link=${{ steps.infra-image-sparkr-outputs.outputs.image_sparkr_url }}" >> $GITHUB_OUTPUT
+        fi
 
   # Build: build Spark and run the tests for specified modules.
   build:
@@ -264,20 +308,20 @@ jobs:
       with:
         distribution: zulu
         java-version: ${{ matrix.java }}
-    - name: Install Python 3.9
+    - name: Install Python 3.11
       uses: actions/setup-python@v5
       # We should install one Python that is higher than 3+ for SQL and Yarn because:
       # - SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
       # - Yarn has a Python specific test too, for example, YarnClusterSuite.
       if: contains(matrix.modules, 'yarn') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
       with:
-        python-version: '3.9'
+        python-version: '3.11'
         architecture: x64
-    - name: Install Python packages (Python 3.9)
+    - name: Install Python packages (Python 3.11)
       if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect')
       run: |
-        python3.9 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
-        python3.9 -m pip list
+        python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
+        python3.11 -m pip list
     # Run the tests.
     - name: Run tests
       env: ${{ fromJSON(inputs.envs) }}
@@ -304,7 +348,7 @@ jobs:
       uses: actions/upload-artifact@v4
       with:
         name: unit-tests-log-${{ matrix.modules }}-${{ matrix.comment }}-${{ matrix.java }}-${{ matrix.hadoop }}-${{ matrix.hive }}
-        path: "**/target/unit-tests.log"
+        path: "**/target/*.log"
 
   infra-image:
     name: "Base image build"
@@ -343,7 +387,7 @@ jobs:
         uses: docker/setup-buildx-action@v3
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
         with:
           context: ./dev/infra/
           push: true
@@ -351,6 +395,40 @@ jobs:
             ${{ needs.precondition.outputs.image_url }}
           # Use the infra image cache to speed up
           cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ inputs.branch }}
+      - name: Build and push (Documentation)
+        if: hashFiles('dev/spark-test-image/docs/Dockerfile') != ''
+        id: docker_build_docs
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/docs/
+          push: true
+          tags: |
+            ${{ needs.precondition.outputs.image_docs_url }}
+          # Use the infra image cache to speed up
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:${{ inputs.branch }}
+      - name: Build and push (Linter)
+        if: hashFiles('dev/spark-test-image/lint/Dockerfile') != ''
+        id: docker_build_lint
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/lint/
+          push: true
+          tags: |
+            ${{ needs.precondition.outputs.image_lint_url }}
+          # Use the infra image cache to speed up
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ inputs.branch }}
+      - name: Build and push (SparkR)
+        if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
+        id: docker_build_sparkr
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/sparkr/
+          push: true
+          tags: |
+            ${{ needs.precondition.outputs.image_sparkr_url }}
+          # Use the infra image cache to speed up
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ inputs.branch }}
+
 
   pyspark:
     needs: [precondition, infra-image]
@@ -509,7 +587,7 @@ jobs:
     runs-on: ubuntu-latest
     timeout-minutes: 180
     container:
-      image: ${{ needs.precondition.outputs.image_url }}
+      image: ${{ needs.precondition.outputs.image_sparkr_url_link }}
     env:
       HADOOP_PROFILE: ${{ inputs.hadoop }}
       HIVE_PROFILE: hive2.3
@@ -606,18 +684,22 @@ jobs:
     - name: Breaking change detection against branch-3.5
       uses: bufbuild/buf-breaking-action@v1
       with:
-        input: connect/common/src/main
+        input: sql/connect/common/src/main
         against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
-    - name: Install Python 3.9
+    - name: Install Python 3.11
       uses: actions/setup-python@v5
       with:
-        python-version: '3.9'
+        python-version: '3.11'
     - name: Install dependencies for Python CodeGen check
       run: |
-        python3.9 -m pip install 'black==23.9.1' 'protobuf==4.25.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
-        python3.9 -m pip list
-    - name: Python CodeGen check
+        python3.11 -m pip install 'black==23.9.1' 'protobuf==5.28.3' 'mypy==1.8.0' 'mypy-protobuf==3.3.0'
+        python3.11 -m pip list
+    - name: Python CodeGen check for branch-3.5
+      if: inputs.branch == 'branch-3.5'
       run: ./dev/connect-check-protos.py
+    - name: Python CodeGen check
+      if: inputs.branch != 'branch-3.5'
+      run: ./dev/check-protos.py
 
   # Static analysis
   lint:
@@ -635,7 +717,7 @@ jobs:
       PYSPARK_PYTHON: python3.9
       GITHUB_PREV_SHA: ${{ github.event.before }}
     container:
-      image: ${{ needs.precondition.outputs.image_url }}
+      image: ${{ needs.precondition.outputs.image_lint_url_link }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v4
@@ -702,13 +784,6 @@ jobs:
       run: ./dev/lint-java
     - name: Spark connect jvm client mima check
       run: ./dev/connect-jvm-client-mima-check
-    - name: Install Python linter dependencies for branch-3.4
-      if: inputs.branch == 'branch-3.4'
-      run: |
-        # SPARK-44554: Copy from https://github.com/apache/spark/blob/a05c27e85829fe742c1828507a1fd180cdc84b54/.github/workflows/build_and_test.yml#L571-L578
-        # Should delete this section after SPARK 3.4 EOL.
-        python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
-        python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
     - name: Install Python linter dependencies for branch-3.5
       if: inputs.branch == 'branch-3.5'
       run: |
@@ -716,18 +791,8 @@ jobs:
         # Should delete this section after SPARK 3.5 EOL.
         python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0'
         python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.56.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0'
-    - name: Install Python dependencies for python linter and documentation generation
-      if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
-      run: |
-        # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
-        # See 'ipython_genutils' in SPARK-38517
-        # See 'docutils<0.18.0' in SPARK-39421
-        python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-          ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
-          'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
-          'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
-          'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
-        python3.9 -m pip list
+    - name: List Python packages
+      run: python3.9 -m pip list
     - name: Python linter
       run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python
     # Should delete this section after SPARK 3.5 EOL.
@@ -745,16 +810,16 @@ jobs:
       if: inputs.branch == 'branch-3.5'
       run: if test -f ./dev/connect-check-protos.py; then PATH=$PATH:$HOME/buf/bin PYTHON_EXECUTABLE=python3.9 ./dev/connect-check-protos.py; fi
     # Should delete this section after SPARK 3.5 EOL.
-    - name: Install JavaScript linter dependencies for branch-3.4, branch-3.5
-      if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
+    - name: Install JavaScript linter dependencies for branch-3.5
+      if: inputs.branch == 'branch-3.5'
       run: |
         apt update
         apt-get install -y nodejs npm
     - name: JS linter
       run: ./dev/lint-js
     # Should delete this section after SPARK 3.5 EOL.
-    - name: Install R linter dependencies for branch-3.4, branch-3.5
-      if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
+    - name: Install R linter dependencies for branch-3.5
+      if: inputs.branch == 'branch-3.5'
       run: |
         apt update
         apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev \
@@ -783,7 +848,7 @@ jobs:
       PYSPARK_PYTHON: python3.9
       GITHUB_PREV_SHA: ${{ github.event.before }}
     container:
-      image: ${{ needs.precondition.outputs.image_url }}
+      image: ${{ needs.precondition.outputs.image_docs_url_link }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v4
@@ -833,20 +898,8 @@ jobs:
       with:
         distribution: zulu
         java-version: ${{ inputs.java }}
-    - name: Install Python dependencies for python linter and documentation generation
-      if: inputs.branch != 'branch-3.4' && inputs.branch != 'branch-3.5'
-      run: |
-        # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
-        # See 'ipython_genutils' in SPARK-38517
-        # See 'docutils<0.18.0' in SPARK-39421
-        python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
-          ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
-          'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
-          'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
-          'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
-        python3.9 -m pip list
-    - name: Install dependencies for documentation generation for branch-3.4, branch-3.5
-      if: inputs.branch == 'branch-3.4' || inputs.branch == 'branch-3.5'
+    - name: Install dependencies for documentation generation for branch-3.5
+      if: inputs.branch == 'branch-3.5'
       run: |
         # pandoc is required to generate PySpark APIs as well in nbsphinx.
         apt-get update -y
@@ -860,6 +913,8 @@ jobs:
         python3.9 -m pip install ipython_genutils # See SPARK-38517
         python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8'
         python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421
+    - name: List Python packages
+      run: python3.9 -m pip list
     - name: Install dependencies for documentation generation
       run: |
         # Keep the version of Bundler here in sync with the following locations:
@@ -867,20 +922,26 @@ jobs:
         #   - docs/README.md
         gem install bundler -v 2.4.22
         cd docs
-        bundle install
+        bundle install --retry=100
     - name: Run documentation build
       run: |
-        # We need this link because the jekyll build calls `python`.
-        ln -s "$(which python3.9)" "/usr/local/bin/python"
+        # We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages.
+        ln -s "$(which python3.9)" "/usr/local/bin/python3"
         # Build docs first with SKIP_API to ensure they are buildable without requiring any
         # language docs to be built beforehand.
-        cd docs; SKIP_API=1 bundle exec jekyll build; cd ..
+        cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd ..
         if [ -f "./dev/is-changed.py" ]; then
           # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs
           pyspark_modules=`cd dev && python3.9 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"`
           if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi
           if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi
         fi
+        # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC`
+        echo "SKIP_ERRORDOC: $SKIP_ERRORDOC"
+        echo "SKIP_SCALADOC: $SKIP_SCALADOC"
+        echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC"
+        echo "SKIP_RDOC: $SKIP_RDOC"
+        echo "SKIP_SQLDOC: $SKIP_SQLDOC"
         cd docs
         bundle exec jekyll build
     - name: Tar documentation
@@ -1106,14 +1167,16 @@ jobs:
         with:
           distribution: zulu
           java-version: ${{ inputs.java }}
-      - name: start minikube
+      - name: Install R
         run: |
-          # See more in "Installation" https://minikube.sigs.k8s.io/docs/start/
-          curl -LO https://storage.googleapis.com/minikube/releases/latest/minikube-linux-amd64
-          sudo install minikube-linux-amd64 /usr/local/bin/minikube
-          rm minikube-linux-amd64
+          sudo apt update
+          sudo apt-get install r-base
+      - name: Start Minikube
+        uses: medyagh/setup-minikube@v0.0.18
+        with:
           # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic
-          minikube start --cpus 2 --memory 6144
+          cpus: 2
+          memory: 6144m
       - name: Print K8S pods and nodes info
         run: |
           kubectl get pods -A
@@ -1126,11 +1189,10 @@ jobs:
           export PVC_TESTS_VM_PATH=$PVC_TMP_DIR
           minikube mount ${PVC_TESTS_HOST_PATH}:${PVC_TESTS_VM_PATH} --gid=0 --uid=185 &
           kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true
-          kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true
-          if [[ "${{ inputs.branch }}" == 'branch-3.5' || "${{ inputs.branch }}" == 'branch-3.4' ]]; then
+          if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then
             kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true
           else
-            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml || true
+            kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml || true
           fi
           eval $(minikube docker-env)
           build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test"
diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml
index 49b2e2e80d9ac..a6beacedeebd4 100644
--- a/.github/workflows/build_infra_images_cache.yml
+++ b/.github/workflows/build_infra_images_cache.yml
@@ -27,6 +27,9 @@ on:
     - 'branch-*'
     paths:
     - 'dev/infra/Dockerfile'
+    - 'dev/spark-test-image/docs/Dockerfile'
+    - 'dev/spark-test-image/lint/Dockerfile'
+    - 'dev/spark-test-image/sparkr/Dockerfile'
     - '.github/workflows/build_infra_images_cache.yml'
   # Create infra image when cutting down branches/tags
   create:
@@ -51,7 +54,7 @@ jobs:
           password: ${{ secrets.GITHUB_TOKEN }}
       - name: Build and push
         id: docker_build
-        uses: docker/build-push-action@v5
+        uses: docker/build-push-action@v6
         with:
           context: ./dev/infra/
           push: true
@@ -60,3 +63,42 @@ jobs:
           cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-cache:${{ github.ref_name }},mode=max
       - name: Image digest
         run: echo ${{ steps.docker_build.outputs.digest }}
+      - name: Build and push (Documentation)
+        if: hashFiles('dev/spark-test-image/docs/Dockerfile') != ''
+        id: docker_build_docs
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/docs/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (Documentation)
+        if: hashFiles('dev/spark-test-image/docs/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_docs.outputs.digest }}
+      - name: Build and push (Linter)
+        if: hashFiles('dev/spark-test-image/lint/Dockerfile') != ''
+        id: docker_build_lint
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/lint/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-lint-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (Linter)
+        if: hashFiles('dev/spark-test-image/lint/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_lint.outputs.digest }}
+      - name: Build and push (SparkR)
+        if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
+        id: docker_build_sparkr
+        uses: docker/build-push-action@v6
+        with:
+          context: ./dev/spark-test-image/sparkr/
+          push: true
+          tags: ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }}-static
+          cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }}
+          cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-sparkr-cache:${{ github.ref_name }},mode=max
+      - name: Image digest (SparkR)
+        if: hashFiles('dev/spark-test-image/sparkr/Dockerfile') != ''
+        run: echo ${{ steps.docker_build_sparkr.outputs.digest }}
diff --git a/.github/workflows/build_maven.yml b/.github/workflows/build_maven.yml
index c3a23c02f6a61..b5546c61eb11b 100644
--- a/.github/workflows/build_maven.yml
+++ b/.github/workflows/build_maven.yml
@@ -30,8 +30,3 @@ jobs:
     name: Run
     uses: ./.github/workflows/maven_test.yml
     if: github.repository == 'apache/spark'
-    with:
-      envs: >-
-        {
-          "SKIP_SPARK_RELEASE_VERSIONS": "3.4.2"
-        }
diff --git a/.github/workflows/build_maven_java21_macos14.yml b/.github/workflows/build_maven_java21_macos15.yml
similarity index 92%
rename from .github/workflows/build_maven_java21_macos14.yml
rename to .github/workflows/build_maven_java21_macos15.yml
index fb5e609f4eae0..cc6d0ea4e90da 100644
--- a/.github/workflows/build_maven_java21_macos14.yml
+++ b/.github/workflows/build_maven_java21_macos15.yml
@@ -17,7 +17,7 @@
 # under the License.
 #
 
-name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, macos-14)"
+name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-15)"
 
 on:
   schedule:
@@ -32,7 +32,7 @@ jobs:
     if: github.repository == 'apache/spark'
     with:
       java: 21
-      os: macos-14
+      os: macos-15
       envs: >-
         {
           "OBJC_DISABLE_INITIALIZE_FORK_SAFETY": "YES"
diff --git a/.github/workflows/build_python_3.11_macos.yml b/.github/workflows/build_python_3.11_macos.yml
new file mode 100644
index 0000000000000..4caae55b5fea8
--- /dev/null
+++ b/.github/workflows/build_python_3.11_macos.yml
@@ -0,0 +1,32 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: "Build / Python-only (master, Python 3.11, MacOS)"
+
+on:
+  schedule:
+    - cron: '0 21 * * *'
+
+jobs:
+  run-build:
+    permissions:
+      packages: write
+    name: Run
+    uses: ./.github/workflows/python_macos_test.yml
+    if: github.repository == 'apache/spark'
diff --git a/.github/workflows/build_branch34.yml b/.github/workflows/build_python_3.13.yml
similarity index 70%
rename from .github/workflows/build_branch34.yml
rename to .github/workflows/build_python_3.13.yml
index deb6c42407970..6f67cf383584f 100644
--- a/.github/workflows/build_branch34.yml
+++ b/.github/workflows/build_python_3.13.yml
@@ -17,11 +17,11 @@
 # under the License.
 #
 
-name: "Build (branch-3.4, Scala 2.13, Hadoop 3, JDK 8)"
+name: "Build / Python-only (master, Python 3.13)"
 
 on:
   schedule:
-    - cron: '0 9 * * *'
+    - cron: '0 20 * * *'
 
 jobs:
   run-build:
@@ -31,21 +31,15 @@ jobs:
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      java: 8
-      branch: branch-3.4
+      java: 17
+      branch: master
       hadoop: hadoop3
       envs: >-
         {
-          "SCALA_PROFILE": "scala2.13",
-          "PYTHON_TO_TEST": "",
-          "ORACLE_DOCKER_IMAGE_NAME": "gvenzl/oracle-xe:21.3.0"
+          "PYTHON_TO_TEST": "python3.13"
         }
       jobs: >-
         {
-          "build": "true",
-          "sparkr": "true",
-          "tpcds-1g": "true",
-          "docker-integration-tests": "true",
-          "k8s-integration-tests": "true",
-          "lint" : "true"
+          "pyspark": "true",
+          "pyspark-pandas": "true"
         }
diff --git a/.github/workflows/build_branch34_python.yml b/.github/workflows/build_python_3.9.yml
similarity index 88%
rename from .github/workflows/build_branch34_python.yml
rename to .github/workflows/build_python_3.9.yml
index c109ba2dc7922..b2401fcf2aa14 100644
--- a/.github/workflows/build_branch34_python.yml
+++ b/.github/workflows/build_python_3.9.yml
@@ -17,11 +17,11 @@
 # under the License.
 #
 
-name: "Build / Python-only (branch-3.4)"
+name: "Build / Python-only (master, Python 3.9)"
 
 on:
   schedule:
-    - cron: '0 9 * * *'
+    - cron: '0 21 * * *'
 
 jobs:
   run-build:
@@ -31,12 +31,12 @@ jobs:
     uses: ./.github/workflows/build_and_test.yml
     if: github.repository == 'apache/spark'
     with:
-      java: 8
-      branch: branch-3.4
+      java: 17
+      branch: master
       hadoop: hadoop3
       envs: >-
         {
-          "PYTHON_TO_TEST": ""
+          "PYTHON_TO_TEST": "python3.9"
         }
       jobs: >-
         {
diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml
index 8458cdf771b19..471ad31279da4 100644
--- a/.github/workflows/build_python_connect.yml
+++ b/.github/workflows/build_python_connect.yml
@@ -71,7 +71,7 @@ jobs:
           python packaging/connect/setup.py sdist
           cd dist
           pip install pyspark*connect-*.tar.gz
-          pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting
+          pip install 'six==1.16.0' 'pandas<=2.2.2' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' torch torchvision torcheval deepspeed unittest-xml-reporting 'plotly>=4.8'
       - name: Run tests
         env:
           SPARK_TESTING: 1
@@ -84,7 +84,7 @@ jobs:
           # Start a Spark Connect server for local
           PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
-            --jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+            --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
 
           # Remove Py4J and PySpark zipped library to make sure there is no JVM connection
           mv python/lib lib.back
@@ -93,7 +93,7 @@ jobs:
           # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener.
           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect
           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
-          ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3
 
           # Stop Spark Connect server.
           ./sbin/stop-connect-server.sh
@@ -104,7 +104,7 @@ jobs:
           PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --master "local-cluster[2, 4, 1024]" \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
-            --jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+            --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
 
           # Remove Py4J and PySpark zipped library to make sure there is no JVM connection
           mv python/lib lib.back
diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml
index b00fdddb4b0e0..ad250d95fb844 100644
--- a/.github/workflows/build_python_connect35.yml
+++ b/.github/workflows/build_python_connect35.yml
@@ -70,7 +70,7 @@ jobs:
           pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting plotly>=4.8 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*'
 
           # Add Python deps for Spark Connect.
-          pip install 'grpcio>=1.48,<1.57' 'grpcio-status>=1.48,<1.57' 'protobuf==3.20.3' 'googleapis-common-protos==1.56.4' 'graphviz==0.20.3'
+          pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3'
 
           # Add torch as a testing dependency for TorchDistributor
           pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval
@@ -87,7 +87,7 @@ jobs:
           # Start a Spark Connect server for local
           PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
             --driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
-            --jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
+            --jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
 
           # Checkout to branch-3.5 to use the tests in branch-3.5.
           cd ..
@@ -98,7 +98,7 @@ jobs:
           # Run branch-3.5 tests
           ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect
           # None of tests are dependent on each other in Pandas API on Spark so run them in parallel
-          ./python/run-tests --parallelism=4 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
+          ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect
       - name: Upload test results to report
         if: always()
         uses: actions/upload-artifact@v4
diff --git a/.github/workflows/build_python_pypy3.9.yml b/.github/workflows/build_python_pypy3.10.yml
similarity index 96%
rename from .github/workflows/build_python_pypy3.9.yml
rename to .github/workflows/build_python_pypy3.10.yml
index e05071ef034a0..163af2f4aec8b 100644
--- a/.github/workflows/build_python_pypy3.9.yml
+++ b/.github/workflows/build_python_pypy3.10.yml
@@ -17,7 +17,7 @@
 # under the License.
 #
 
-name: "Build / Python-only (master, PyPy 3.9)"
+name: "Build / Python-only (master, PyPy 3.10)"
 
 on:
   schedule:
diff --git a/.github/workflows/build_sparkr_window.yml b/.github/workflows/build_sparkr_window.yml
index cf879d9ebd306..b97251a461715 100644
--- a/.github/workflows/build_sparkr_window.yml
+++ b/.github/workflows/build_sparkr_window.yml
@@ -16,7 +16,7 @@
 # specific language governing permissions and limitations
 # under the License.
 #
-name: "Build / SparkR-only (master, 4.4.1, windows-2022)"
+name: "Build / SparkR-only (master, 4.4.2, windows-2022)"
 
 on:
   schedule:
@@ -50,10 +50,10 @@ jobs:
       with:
         distribution: zulu
         java-version: 17
-    - name: Install R 4.4.1
+    - name: Install R 4.4.2
       uses: r-lib/actions/setup-r@v2
       with:
-        r-version: 4.4.1
+        r-version: 4.4.2
     - name: Install R dependencies
       run: |
         Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'e1071', 'survival', 'arrow', 'xml2'), repos='https://cloud.r-project.org/')"
@@ -85,6 +85,7 @@ jobs:
       shell: cmd
       env:
         NOT_CRAN: true
+        SPARKR_SUPPRESS_DEPRECATION_WARNING: 1
         # See SPARK-27848. Currently installing some dependent packages causes
         # "(converted from warning) unable to identify current timezone 'C':" for an unknown reason.
         # This environment variable works around to test SparkR against a higher version.
diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml
index fa30bd3abc8a5..6965fb4968af3 100644
--- a/.github/workflows/maven_test.yml
+++ b/.github/workflows/maven_test.yml
@@ -40,7 +40,7 @@ on:
         description: OS to run this build.
         required: false
         type: string
-        default: ubuntu-22.04
+        default: ubuntu-latest
       envs:
         description: Additional environment variables to set when running the tests. Should be in JSON format.
         required: false
@@ -178,7 +178,7 @@ jobs:
       - name: Install Python packages (Python 3.11)
         if: (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect')
         run: |
-          python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'protobuf==4.25.1'
+          python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3'
           python3.11 -m pip list
       # Run the tests.
       - name: Run tests
@@ -194,7 +194,7 @@ jobs:
           if [[ "$INCLUDED_TAGS" != "" ]]; then
             ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
-            ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connect/common,connect/server test -fae
+            ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
           elif [[ "$EXCLUDED_TAGS" != "" ]]; then
             ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
           elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
new file mode 100644
index 0000000000000..8729012c2b8d2
--- /dev/null
+++ b/.github/workflows/pages.yml
@@ -0,0 +1,98 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: GitHub Pages deployment
+
+on:
+  push:
+    branches:
+      - master
+
+concurrency:
+  group: 'docs preview'
+  cancel-in-progress: false
+
+jobs:
+  docs:
+    name: Build and deploy documentation
+    runs-on: ubuntu-latest
+    permissions:
+      id-token: write
+      pages: write
+    environment:
+      name: github-pages # https://github.com/actions/deploy-pages/issues/271
+    env:
+      SPARK_TESTING: 1 # Reduce some noise in the logs
+      RELEASE_VERSION: 'In-Progress'
+    if: github.repository == 'apache/spark'
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v4
+        with:
+          repository: apache/spark
+          ref: 'master'
+      - name: Install Java 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: zulu
+          java-version: 17
+      - name: Install Python 3.9
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.9'
+          architecture: x64
+          cache: 'pip'
+      - name: Install Python dependencies
+        run: |
+         pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
+            ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.3' 'plotly>=4.8' 'docutils<0.18.0' \
+            'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
+            'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+            'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
+      - name: Install Ruby for documentation generation
+        uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '3.3'
+          bundler-cache: true
+      - name: Install Pandoc
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install pandoc
+      - name: Install dependencies for documentation generation
+        run: |
+          cd docs
+          gem install bundler -v 2.4.22 -n /usr/local/bin
+          bundle install --retry=100
+      - name: Run documentation build
+        run: |
+          sed -i".tmp1" 's/SPARK_VERSION:.*$/SPARK_VERSION: '"$RELEASE_VERSION"'/g' docs/_config.yml
+          sed -i".tmp2" 's/SPARK_VERSION_SHORT:.*$/SPARK_VERSION_SHORT: '"$RELEASE_VERSION"'/g' docs/_config.yml
+          sed -i".tmp3" "s/'facetFilters':.*$/'facetFilters': [\"version:$RELEASE_VERSION\"]/g" docs/_config.yml
+          sed -i".tmp4" 's/__version__: str = .*$/__version__: str = "'"$RELEASE_VERSION"'"/' python/pyspark/version.py
+          cd docs
+          SKIP_RDOC=1 bundle exec jekyll build
+      - name: Setup Pages
+        uses: actions/configure-pages@v5
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: 'docs/_site'
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.github/workflows/publish_snapshot.yml b/.github/workflows/publish_snapshot.yml
index 1b5bd0ba61288..a5854d96a4d1a 100644
--- a/.github/workflows/publish_snapshot.yml
+++ b/.github/workflows/publish_snapshot.yml
@@ -28,7 +28,7 @@ on:
         description: 'list of branches to publish (JSON)'
         required: true
         # keep in sync with default value of strategy matrix 'branch'
-        default: '["master", "branch-3.5", "branch-3.4"]'
+        default: '["master", "branch-3.5"]'
 
 jobs:
   publish-snapshot:
@@ -38,7 +38,7 @@ jobs:
       fail-fast: false
       matrix:
         # keep in sync with default value of workflow_dispatch input 'branch'
-        branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5", "branch-3.4"]' ) }}
+        branch: ${{ fromJSON( inputs.branch || '["master", "branch-3.5"]' ) }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v4
@@ -52,13 +52,13 @@ jobs:
         restore-keys: |
           snapshot-maven-
     - name: Install Java 8 for branch-3.x
-      if: matrix.branch == 'branch-3.5' || matrix.branch == 'branch-3.4'
+      if: matrix.branch == 'branch-3.5'
       uses: actions/setup-java@v4
       with:
         distribution: temurin
         java-version: 8
     - name: Install Java 17
-      if: matrix.branch != 'branch-3.5' && matrix.branch != 'branch-3.4'
+      if: matrix.branch != 'branch-3.5'
       uses: actions/setup-java@v4
       with:
         distribution: temurin
diff --git a/.github/workflows/python_macos_test.yml b/.github/workflows/python_macos_test.yml
new file mode 100644
index 0000000000000..cca133dab541a
--- /dev/null
+++ b/.github/workflows/python_macos_test.yml
@@ -0,0 +1,162 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+
+name: Build and test PySpark on macOS
+
+on:
+  workflow_call:
+    inputs:
+      java:
+        required: false
+        type: string
+        default: 17
+      python:
+        required: false
+        type: string
+        default: 3.11
+      branch:
+        description: Branch to run the build against
+        required: false
+        type: string
+        default: master
+      hadoop:
+        description: Hadoop version to run with. HADOOP_PROFILE environment variable should accept it.
+        required: false
+        type: string
+        default: hadoop3
+      envs:
+        description: Additional environment variables to set when running the tests. Should be in JSON format.
+        required: false
+        type: string
+        default: '{}'
+jobs:
+  build:
+    name: "PySpark test on macos: ${{ matrix.modules }}"
+    runs-on: macos-15
+    strategy:
+      fail-fast: false
+      matrix:
+        java:
+          - ${{ inputs.java }}
+        python:
+          - ${{inputs.python}}
+        modules:
+          - >-
+            pyspark-sql, pyspark-resource, pyspark-testing
+          - >-
+            pyspark-core, pyspark-errors, pyspark-streaming
+          - >-
+            pyspark-mllib, pyspark-ml, pyspark-ml-connect
+          - >-
+            pyspark-connect
+          - >-
+            pyspark-pandas
+          - >-
+            pyspark-pandas-slow
+          - >-
+            pyspark-pandas-connect-part0
+          - >-
+            pyspark-pandas-connect-part1
+          - >-
+            pyspark-pandas-connect-part2
+          - >-
+            pyspark-pandas-connect-part3
+    env:
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      PYTHON_TO_TEST: python${{inputs.python}}
+      HADOOP_PROFILE: ${{ inputs.hadoop }}
+      HIVE_PROFILE: hive2.3
+      # GitHub Actions' default miniconda to use in pip packaging test.
+      CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
+      SPARK_LOCAL_IP: localhost
+      SKIP_UNIDOC: true
+      SKIP_MIMA: true
+      SKIP_PACKAGING: true
+      METASPACE_SIZE: 1g
+      BRANCH: ${{ inputs.branch }}
+    steps:
+      - name: Checkout Spark repository
+        uses: actions/checkout@v4
+        # In order to fetch changed files
+        with:
+          fetch-depth: 0
+          repository: apache/spark
+          ref: ${{ inputs.branch }}
+      - name: Sync the current branch with the latest in Apache Spark
+        if: github.repository != 'apache/spark'
+        run: |
+          echo "APACHE_SPARK_REF=$(git rev-parse HEAD)" >> $GITHUB_ENV
+          git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF#refs/heads/}
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' merge --no-commit --progress --squash FETCH_HEAD
+          git -c user.name='Apache Spark Test Account' -c user.email='sparktestacc@gmail.com' commit -m "Merged commit" --allow-empty
+      # Cache local repositories. Note that GitHub Actions cache has a 10G limit.
+      - name: Cache SBT and Maven
+        uses: actions/cache@v4
+        with:
+          path: |
+            build/apache-maven-*
+            build/*.jar
+            ~/.sbt
+          key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
+          restore-keys: |
+            build-
+      - name: Cache Coursier local repository
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/coursier
+          key: pyspark-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
+          restore-keys: |
+            pyspark-coursier-
+      - name: Install Java ${{ matrix.java }}
+        uses: actions/setup-java@v4
+        with:
+          distribution: zulu
+          java-version: ${{ matrix.java }}
+      - name: Install Python packages (Python ${{matrix.python}})
+        run: |
+          python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2'
+          python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0'
+          python${{matrix.python}} -m pip install numpy 'pyarrow>=15.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly>=4.8' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \
+          python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.28.3' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \
+          python${{matrix.python}} -m pip cache purge && \
+          python${{matrix.python}} -m pip list
+      # Run the tests.
+      - name: Run tests
+        env: ${{ fromJSON(inputs.envs) }}
+        run: |
+          if [[ "$MODULES_TO_TEST" == *"pyspark-errors"* ]]; then
+            export SKIP_PACKAGING=false
+            echo "Python Packaging Tests Enabled!"
+          fi
+          ./dev/run-tests --parallelism 1 --modules "$MODULES_TO_TEST" --python-executables "$PYTHON_TO_TEST"
+      - name: Upload test results to report
+        env: ${{ fromJSON(inputs.envs) }}
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-results-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }}
+          path: "**/target/test-reports/*.xml"
+      - name: Upload unit tests log files
+        env: ${{ fromJSON(inputs.envs) }}
+        if: ${{ !success() }}
+        uses: actions/upload-artifact@v4
+        with:
+          name: unit-tests-log-${{ matrix.modules }}--${{ matrix.java }}-${{ inputs.hadoop }}-hive2.3-${{ env.PYTHON_TO_TEST }}
+          path: "**/target/unit-tests.log"
diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
index f270673844551..e2db95083efea 100644
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -25,6 +25,7 @@ on:
 
 jobs:
   stale:
+    if: github.repository == 'apache/spark'
     runs-on: ubuntu-latest
     steps:
     - uses: actions/stale@c201d45ef4b0ccbd3bb0616f93bae13e73d0a080 # pin@v1.1.0
diff --git a/.github/workflows/test_report.yml b/.github/workflows/test_report.yml
index c6225e6a1abe5..9ab69af42c818 100644
--- a/.github/workflows/test_report.yml
+++ b/.github/workflows/test_report.yml
@@ -30,14 +30,14 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - name: Download test results to report
-      uses: dawidd6/action-download-artifact@09385b76de790122f4da9c82b17bccf858b9557c # pin@v2
+      uses: dawidd6/action-download-artifact@bf251b5aa9c2f7eeb574a96ee720e24f801b7c11 # pin @v6
       with:
         github_token: ${{ secrets.GITHUB_TOKEN }}
         workflow: ${{ github.event.workflow_run.workflow_id }}
         commit: ${{ github.event.workflow_run.head_commit.id }}
         workflow_conclusion: completed
     - name: Publish test report
-      uses: scacap/action-surefire-report@482f012643ed0560e23ef605a79e8e87ca081648 # pin@v1
+      uses: scacap/action-surefire-report@a2911bd1a4412ec18dde2d93b1758b3e56d2a880 # pin @v1.8.0
       with:
         check_name: Report test results
         github_token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.gitignore b/.gitignore
index 787eb6180c35c..0a4138ec26948 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,7 @@
 *.swp
 *~
 .java-version
+.python-version
 .DS_Store
 .ammonite
 .bloop
@@ -26,6 +27,7 @@
 .scala_dependencies
 .settings
 .vscode
+artifacts/
 /lib/
 R-unit-tests.log
 R/unit-tests.out
diff --git a/.nojekyll b/.nojekyll
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/LICENSE-binary b/LICENSE-binary
index b6971798e5577..40d28fbe71e6b 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -267,6 +267,7 @@ io.fabric8:kubernetes-model-scheduling
 io.fabric8:kubernetes-model-storageclass
 io.fabric8:zjsonpatch
 io.github.java-diff-utils:java-diff-utils
+io.jsonwebtoken:jjwt-api
 io.netty:netty-all
 io.netty:netty-buffer
 io.netty:netty-codec
@@ -401,7 +402,6 @@ org.xerial.snappy:snappy-java
 org.yaml:snakeyaml
 oro:oro
 stax:stax-api
-xerces:xercesImpl
 
 core/src/main/java/org/apache/spark/util/collection/TimSort.java
 core/src/main/resources/org/apache/spark/ui/static/bootstrap*
@@ -421,6 +421,11 @@ Python Software Foundation License
 python/pyspark/loose_version.py
 
 
+BSD 0-Clause
+------------
+org.tukaani:xz
+
+
 BSD 2-Clause
 ------------
 com.github.luben:zstd-jni
@@ -507,7 +512,6 @@ Eclipse Distribution License (EDL) 1.0
 com.sun.istack:istack-commons-runtime
 jakarta.xml.bind:jakarta.xml.bind-api
 org.glassfish.jaxb:jaxb-runtime
-org.glassfish.jaxb:txw2
 
 Eclipse Public License (EPL) 2.0
 --------------------------------
@@ -520,12 +524,6 @@ org.glassfish.hk2:hk2-locator
 org.glassfish.hk2:hk2-utils
 org.glassfish.hk2:osgi-resource-locator
 
-
-Public Domain
--------------
-org.tukaani:xz
-
-
 Creative Commons CC0 1.0 Universal Public Domain Dedication
 -----------------------------------------------------------
 (see LICENSE-CC0.txt)
diff --git a/NOTICE-binary b/NOTICE-binary
index c4cfe0e9f8b31..3f36596b9d6d6 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -448,27 +448,6 @@ which has the following notices:
      * Alec Wysoker
        * Performance and memory usage improvement
 
-The binary distribution of this product bundles binaries of
-Xerces2 Java Parser 2.9.1,
-which has the following notices:
- * =========================================================================
-   ==  NOTICE file corresponding to section 4(d) of the Apache License,   ==
-   ==  Version 2.0, in this case for the Apache Xerces Java distribution. ==
-   =========================================================================
-
-   Apache Xerces Java
-   Copyright 1999-2007 The Apache Software Foundation
-
-   This product includes software developed at
-   The Apache Software Foundation (http://www.apache.org/).
-
-   Portions of this software were originally based on the following:
-     - software copyright (c) 1999, IBM Corporation., http://www.ibm.com.
-     - software copyright (c) 1999, Sun Microsystems., http://www.sun.com.
-     - voluntary contributions made by Paul Eng on behalf of the
-       Apache Software Foundation that were originally developed at iClick, Inc.,
-       software copyright (c) 1999.
-
 Apache Commons Collections
 Copyright 2001-2015 The Apache Software Foundation
 
diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION
index f7dd261c10fd2..49000c62d1063 100644
--- a/R/pkg/DESCRIPTION
+++ b/R/pkg/DESCRIPTION
@@ -57,6 +57,7 @@ Collate:
     'types.R'
     'utils.R'
     'window.R'
+    'zzz.R'
 RoxygenNote: 7.1.2
 VignetteBuilder: knitr
 NeedsCompilation: no
diff --git a/R/pkg/R/functions.R b/R/pkg/R/functions.R
index b91124f96a6fa..9c825a99be180 100644
--- a/R/pkg/R/functions.R
+++ b/R/pkg/R/functions.R
@@ -3965,19 +3965,11 @@ setMethod("row_number",
 #'        yields unresolved \code{a.b.c}
 #' @return Column object wrapping JVM UnresolvedNamedLambdaVariable
 #' @keywords internal
-unresolved_named_lambda_var <- function(...) {
-  jc <- newJObject(
-    "org.apache.spark.sql.Column",
-    newJObject(
-      "org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
-      lapply(list(...), function(x) {
-        handledCallJStatic(
-          "org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
-          "freshVarName",
-          x)
-      })
-    )
-  )
+unresolved_named_lambda_var <- function(name) {
+  jc <- handledCallJStatic(
+    "org.apache.spark.sql.api.python.PythonSQLUtils",
+    "unresolvedNamedLambdaVariable",
+    name)
   column(jc)
 }
 
@@ -3990,7 +3982,6 @@ unresolved_named_lambda_var <- function(...) {
 #' @return JVM \code{LambdaFunction} object
 #' @keywords internal
 create_lambda <- function(fun) {
-  as_jexpr <- function(x) callJMethod(x@jc, "expr")
 
   # Process function arguments
   parameters <- formals(fun)
@@ -4011,22 +4002,18 @@ create_lambda <- function(fun) {
   stopifnot(class(result) == "Column")
 
   # Convert both Columns to Scala expressions
-  jexpr <- as_jexpr(result)
-
   jargs <- handledCallJStatic(
     "org.apache.spark.api.python.PythonUtils",
     "toSeq",
-    handledCallJStatic(
-      "java.util.Arrays", "asList", lapply(args, as_jexpr)
-    )
+    handledCallJStatic("java.util.Arrays", "asList", lapply(args, function(x) { x@jc }))
   )
 
   # Create Scala LambdaFunction
-  newJObject(
-    "org.apache.spark.sql.catalyst.expressions.LambdaFunction",
-    jexpr,
-    jargs,
-    FALSE
+  handledCallJStatic(
+    "org.apache.spark.sql.api.python.PythonSQLUtils",
+    "lambdaFunction",
+    result@jc,
+    jargs
   )
 }
 
@@ -4039,20 +4026,18 @@ create_lambda <- function(fun) {
 #' @return a \code{Column} representing name applied to cols with funs
 #' @keywords internal
 invoke_higher_order_function <- function(name, cols, funs) {
-  as_jexpr <- function(x) {
+  as_col <- function(x) {
     if (class(x) == "character") {
       x <- column(x)
     }
-    callJMethod(x@jc, "expr")
+    x@jc
   }
-
-  jexpr <- do.call(newJObject, c(
-    paste("org.apache.spark.sql.catalyst.expressions", name, sep = "."),
-    lapply(cols, as_jexpr),
-    lapply(funs, create_lambda)
-  ))
-
-  column(newJObject("org.apache.spark.sql.Column", jexpr))
+  jcol <- handledCallJStatic(
+    "org.apache.spark.sql.api.python.PythonSQLUtils",
+    "fn",
+    name,
+    c(lapply(cols, as_col), lapply(funs, create_lambda))) # check varargs invocation
+  column(jcol)
 }
 
 #' @details
@@ -4068,7 +4053,7 @@ setMethod("array_aggregate",
           signature(x = "characterOrColumn", initialValue = "Column", merge = "function"),
           function(x, initialValue, merge, finish = NULL) {
             invoke_higher_order_function(
-              "ArrayAggregate",
+              "aggregate",
               cols = list(x, initialValue),
               funs = if (is.null(finish)) {
                 list(merge)
@@ -4129,7 +4114,7 @@ setMethod("array_exists",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "ArrayExists",
+              "exists",
               cols = list(x),
               funs = list(f)
             )
@@ -4145,7 +4130,7 @@ setMethod("array_filter",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "ArrayFilter",
+              "filter",
               cols = list(x),
               funs = list(f)
             )
@@ -4161,7 +4146,7 @@ setMethod("array_forall",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "ArrayForAll",
+              "forall",
               cols = list(x),
               funs = list(f)
             )
@@ -4291,7 +4276,7 @@ setMethod("array_sort",
                column(callJStatic("org.apache.spark.sql.functions", "array_sort", x@jc))
             } else {
               invoke_higher_order_function(
-                "ArraySort",
+                "array_sort",
                 cols = list(x),
                 funs = list(comparator)
               )
@@ -4309,7 +4294,7 @@ setMethod("array_transform",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "ArrayTransform",
+              "transform",
               cols = list(x),
               funs = list(f)
             )
@@ -4374,7 +4359,7 @@ setMethod("arrays_zip_with",
           signature(x = "characterOrColumn", y = "characterOrColumn", f = "function"),
           function(x, y, f) {
             invoke_higher_order_function(
-              "ZipWith",
+              "zip_with",
               cols = list(x, y),
               funs = list(f)
             )
@@ -4447,7 +4432,7 @@ setMethod("map_filter",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "MapFilter",
+              "map_filter",
               cols = list(x),
               funs = list(f))
           })
@@ -4504,7 +4489,7 @@ setMethod("transform_keys",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "TransformKeys",
+              "transform_keys",
               cols = list(x),
               funs = list(f)
             )
@@ -4521,7 +4506,7 @@ setMethod("transform_values",
           signature(x = "characterOrColumn", f = "function"),
           function(x, f) {
             invoke_higher_order_function(
-              "TransformValues",
+              "transform_values",
               cols = list(x),
               funs = list(f)
            )
@@ -4552,7 +4537,7 @@ setMethod("map_zip_with",
           signature(x = "characterOrColumn", y = "characterOrColumn", f = "function"),
           function(x, y, f) {
             invoke_higher_order_function(
-              "MapZipWith",
+              "map_zip_with",
               cols = list(x, y),
               funs = list(f)
            )
diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 0be7e5da24d23..1b5faad376eaa 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -403,7 +403,6 @@ sparkR.session <- function(
   sparkPackages = "",
   enableHiveSupport = TRUE,
   ...) {
-
   sparkConfigMap <- convertNamedListToEnv(sparkConfig)
   namedParams <- list(...)
   if (length(namedParams) > 0) {
diff --git a/connector/docker/spark-test/base/Dockerfile b/R/pkg/R/zzz.R
similarity index 64%
rename from connector/docker/spark-test/base/Dockerfile
rename to R/pkg/R/zzz.R
index c397abc211e24..947bd543b75e0 100644
--- a/connector/docker/spark-test/base/Dockerfile
+++ b/R/pkg/R/zzz.R
@@ -14,15 +14,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+# zzz.R - package startup message
 
-FROM ubuntu:20.04
-
-# Upgrade package index
-# install a few other useful packages plus Open Java 17
-# Remove unneeded /var/lib/apt/lists/* after install to reduce the
-# docker image size (by ~30MB)
-RUN apt-get update && \
-    apt-get install -y less openjdk-17-jre-headless iproute2 vim-tiny sudo openssh-server && \
-    rm -rf /var/lib/apt/lists/*
-
-ENV SPARK_HOME /opt/spark
+.onAttach <- function(...) {
+  if (Sys.getenv("SPARKR_SUPPRESS_DEPRECATION_WARNING") == "") {
+    packageStartupMessage(
+      paste0(
+        "Warning: ",
+        "SparkR is deprecated in Apache Spark 4.0.0 and will be removed in a future release. ",
+        "To continue using Spark in R, we recommend using sparklyr instead: ",
+        "https://spark.posit.co/get-started/"
+      )
+    )
+  }
+}
diff --git a/R/pkg/README.md b/R/pkg/README.md
index da9f042b4fded..c05a75812245c 100644
--- a/R/pkg/README.md
+++ b/R/pkg/README.md
@@ -1,4 +1,4 @@
-# R on Spark
+# R on Spark (deprecated)
 
 SparkR is an R package that provides a light-weight frontend to use Spark from R.
 
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R b/R/pkg/tests/fulltests/test_sparkSQL.R
index c44924e55087f..c93b92edbff8e 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -4152,7 +4152,8 @@ test_that("catalog APIs, listTables, getTable, listColumns, listFunctions, funct
   c <- listColumns("cars")
   expect_equal(nrow(c), 2)
   expect_equal(colnames(c),
-               c("name", "description", "dataType", "nullable", "isPartition", "isBucket"))
+               c("name", "description", "dataType", "nullable", "isPartition", "isBucket",
+                 "isCluster"))
   expect_equal(collect(c)[[1]][[1]], "speed")
   expect_error(listColumns("zxwtyswklpf", "default"),
                "[TABLE_OR_VIEW_NOT_FOUND]*`spark_catalog`.`default`.`zxwtyswklpf`*")
diff --git a/R/pkg/tests/fulltests/test_streaming.R b/R/pkg/tests/fulltests/test_streaming.R
index 88114f8bd82b8..74734746c129f 100644
--- a/R/pkg/tests/fulltests/test_streaming.R
+++ b/R/pkg/tests/fulltests/test_streaming.R
@@ -147,8 +147,7 @@ test_that("Unsupported operation", {
   # memory sink without aggregation
   df <- read.stream("json", path = jsonDir, schema = schema, maxFilesPerTrigger = 1)
   expect_error(write.stream(df, "memory", queryName = "people", outputMode = "complete"),
-               paste0(".*(start : analysis error - Complete output mode not supported when there ",
-                      "are no streaming aggregations on streaming DataFrames/Datasets).*"))
+               ".*analysis error.*complete.*not supported.*no streaming aggregations*")
 })
 
 test_that("Terminated by error", {
diff --git a/R/pkg/vignettes/sparkr-vignettes.Rmd b/R/pkg/vignettes/sparkr-vignettes.Rmd
index 4a510763afb6c..338b74110fb65 100644
--- a/R/pkg/vignettes/sparkr-vignettes.Rmd
+++ b/R/pkg/vignettes/sparkr-vignettes.Rmd
@@ -52,6 +52,8 @@ old_java_opt <- Sys.getenv("_JAVA_OPTIONS")
 Sys.setenv("_JAVA_OPTIONS" = paste("-XX:-UsePerfData", old_java_opt, sep = " "))
 ```
 
+SparkR is deprecated from Apache Spark 4.0.0 and will be removed in a future version.
+
 ## Overview
 
 SparkR is an R package that provides a light-weight frontend to use Apache Spark from R. With Spark `r packageVersion("SparkR")`, SparkR provides a distributed data frame implementation that supports data processing operations like selection, filtering, aggregation etc. and distributed machine learning using [MLlib](https://spark.apache.org/mllib/).
diff --git a/R/run-tests.sh b/R/run-tests.sh
index 90a60eda03871..3a90b44c2b659 100755
--- a/R/run-tests.sh
+++ b/R/run-tests.sh
@@ -30,9 +30,9 @@ if [[ $(echo $SPARK_AVRO_JAR_PATH | wc -l) -eq 1 ]]; then
 fi
 
 if [ -z "$SPARK_JARS" ]; then
-  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 else
-  SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
+  SPARKR_SUPPRESS_DEPRECATION_WARNING=1 SPARK_TESTING=1 NOT_CRAN=true $FWDIR/../bin/spark-submit --jars $SPARK_JARS --driver-java-options "-Dlog4j.configurationFile=file:$FWDIR/log4j2.properties" --conf spark.hadoop.fs.defaultFS="file:///" --conf spark.driver.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" --conf spark.executor.extraJavaOptions="-Dio.netty.tryReflectionSetAccessible=true -Xss4M" $FWDIR/pkg/tests/run-all.R 2>&1 | tee -a $LOGFILE
 fi
 
 FAILED=$((PIPESTATUS[0]||$FAILED))
diff --git a/README.md b/README.md
index b9a20075f6a17..552b71215cb92 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,8 @@ rich set of higher-level tools including Spark SQL for SQL and DataFrames,
 pandas API on Spark for pandas workloads, MLlib for machine learning, GraphX for graph processing,
 and Structured Streaming for stream processing.
 
-<https://spark.apache.org/>
+- Official version: <https://spark.apache.org/>
+- Development version: <https://apache.github.io/spark/>
 
 [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_main.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_main.yml)
 [![PySpark Coverage](https://codecov.io/gh/apache/spark/branch/master/graph/badge.svg)](https://codecov.io/gh/apache/spark)
diff --git a/assembly/README b/assembly/README
index ad1305c5b4d56..10c8254ae153e 100644
--- a/assembly/README
+++ b/assembly/README
@@ -9,4 +9,4 @@ This module is off by default. To activate it specify the profile in the command
 
 If you need to build an assembly for a different version of Hadoop the
 hadoop-version system property needs to be set as in this example:
-  -Dhadoop.version=3.4.0
+  -Dhadoop.version=3.4.1
diff --git a/assembly/pom.xml b/assembly/pom.xml
index 9377849cf1cdc..17bb81fa023ba 100644
--- a/assembly/pom.xml
+++ b/assembly/pom.xml
@@ -109,11 +109,27 @@
       <version>${project.version}</version>
       <scope>provided</scope>
     </dependency>
+    <!--
+      Included so Spark Connect client was compiled before triggering assembly.
+      See 'get-connect-client-jar' below. This will not be included in the packaging output.
+    -->
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-client-jvm_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
+      <scope>provided</scope>
+    </dependency>
 
     <!--
       Because we don't shade dependencies anymore, we need to restore Guava to compile scope so
       that the libraries Spark depend on have it available. We'll package the version that Spark
-      uses (14.0.1) which is not the same as Hadoop dependencies, but works.
+      uses which is not the same as Hadoop dependencies, but works.
     -->
     <dependency>
       <groupId>com.google.guava</groupId>
@@ -159,6 +175,44 @@
             </target>
           </configuration>
       </plugin>
+      <plugin>
+        <!--
+          Copy Spark Connect client jar and its dependencies for Spark Connect REPL.
+        -->
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>exec-maven-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>copy-connect-client-repl-jars</id>
+            <phase>package</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+            <configuration>
+              <executable>cp</executable>
+              <arguments>
+                <argument>-r</argument>
+                <argument>${basedir}/../connector/connect/client/jvm/target/connect-repl</argument>
+                <argument>${basedir}/target/scala-${scala.binary.version}/jars/</argument>
+              </arguments>
+            </configuration>
+          </execution>
+          <execution>
+            <id>copy-connect-client-jar</id>
+            <phase>package</phase>
+            <goals>
+              <goal>exec</goal>
+            </goals>
+            <configuration>
+              <executable>cp</executable>
+              <arguments>
+                <argument>${basedir}/../connector/connect/client/jvm/target/spark-connect-client-jvm_${scala.binary.version}-${project.version}.jar</argument>
+                <argument>${basedir}/target/scala-${scala.binary.version}/jars/connect-repl</argument>
+              </arguments>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 
@@ -291,6 +345,14 @@
       </properties>
     </profile>
 
+    <!-- Pull in jjwt-impl and jjwt-jackson jars -->
+    <profile>
+      <id>jjwt</id>
+      <properties>
+        <jjwt.deps.scope>compile</jjwt.deps.scope>
+      </properties>
+    </profile>
+
     <!--
      Pull in spark-hadoop-cloud and its associated JARs,
     -->
diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh
index a137a2fba52ee..efbd63a3037d6 100755
--- a/bin/docker-image-tool.sh
+++ b/bin/docker-image-tool.sh
@@ -261,18 +261,18 @@ Examples:
     $0 -m -t testing build
 
   - Build PySpark docker image
-    $0 -r docker.io/myrepo -t v3.4.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
+    $0 -r docker.io/myrepo -t v4.0.0 -p kubernetes/dockerfiles/spark/bindings/python/Dockerfile build
 
-  - Build and push image with tag "v3.4.0" to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.4.0 build
-    $0 -r docker.io/myrepo -t v3.4.0 push
+  - Build and push image with tag "v4.0.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v4.0.0 build
+    $0 -r docker.io/myrepo -t v4.0.0 push
 
-  - Build and push Java11-based image with tag "v3.4.0" to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.4.0 -b java_image_tag=11-jre build
-    $0 -r docker.io/myrepo -t v3.4.0 push
+  - Build and push Java17-based image with tag "v4.0.0" to docker.io/myrepo
+    $0 -r docker.io/myrepo -t v4.0.0 -b java_image_tag=17 build
+    $0 -r docker.io/myrepo -t v4.0.0 push
 
   - Build and push image for multiple archs to docker.io/myrepo
-    $0 -r docker.io/myrepo -t v3.4.0 -X build
+    $0 -r docker.io/myrepo -t v4.0.0 -X build
     # Note: buildx, which does cross building, needs to do the push during build
     # So there is no separate push step with -X
 
diff --git a/bin/spark-shell b/bin/spark-shell
index e920137974980..8411158187260 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -34,7 +34,7 @@ fi
 
 export _SPARK_CMD_USAGE="Usage: ./bin/spark-shell [options]
 
-Scala REPL options:
+Scala REPL options, Spark Classic only:
   -I <file>                   preload <file>, enforcing line-by-line interpretation"
 
 # SPARK-4161: scala does not assume use of the java classpath,
@@ -45,6 +45,9 @@ Scala REPL options:
 SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Dscala.usejavacp=true"
 
 function main() {
+  export SPARK_SCALA_SHELL=1
+  # In case of Spark Connect shell, the main class (and resource) is replaced in
+  # SparkSubmitCommandBuilder.
   if $cygwin; then
     # Workaround for issue involving JLine and Cygwin
     # (see http://sourceforge.net/p/jline/bugs/40/).
diff --git a/binder/Dockerfile b/binder/Dockerfile
new file mode 100644
index 0000000000000..6e3dd9155fb7a
--- /dev/null
+++ b/binder/Dockerfile
@@ -0,0 +1,43 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+FROM python:3.10-slim
+# install the notebook package
+RUN  pip install --no-cache notebook jupyterlab
+
+# create user with a home directory
+ARG NB_USER
+ARG NB_UID
+ENV USER ${NB_USER}
+ENV HOME /home/${NB_USER}
+
+RUN adduser --disabled-password \
+    --gecos "Default user" \
+    --uid ${NB_UID} \
+    ${NB_USER}
+WORKDIR ${HOME}
+USER ${USER}
+
+# Make sure the contents of our repo are in ${HOME}
+COPY . ${HOME}
+USER root
+RUN chown -R ${NB_UID} ${HOME}
+RUN apt-get update && apt-get install -y openjdk-17-jre git coreutils
+USER ${NB_USER}
+
+RUN binder/postBuild
+
diff --git a/binder/apt.txt b/binder/apt.txt
deleted file mode 100644
index aa441a15db3bd..0000000000000
--- a/binder/apt.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-openjdk-17-jre
-git
diff --git a/binder/postBuild b/binder/postBuild
index 26e23b5a940ae..0b326f62e8f51 100644
--- a/binder/postBuild
+++ b/binder/postBuild
@@ -26,7 +26,7 @@ set -o pipefail
 set -e
 
 VERSION=$(python -c "exec(open('python/pyspark/version.py').read()); print(__version__)")
-TAG=$(git describe --tags --exact-match 2>/dev/null)
+TAG=$(git describe --tags --exact-match 2> /dev/null || true)
 
 # If a commit is tagged, exactly specified version of pyspark should be installed to avoid
 # a kind of accident that an old version of pyspark is installed in the live notebook environment.
diff --git a/build/mvn b/build/mvn
index 3735461562e54..fef589fc03476 100755
--- a/build/mvn
+++ b/build/mvn
@@ -56,9 +56,9 @@ install_app() {
   local binary="${_DIR}/$6"
   local remote_tarball="${mirror_host}/${url_path}${url_query}"
   local local_checksum="${local_tarball}.${checksum_suffix}"
-  local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}"
+  local remote_checksum="${mirror_host}/${url_path}.${checksum_suffix}${url_query}"
 
-  local curl_opts="--silent --show-error -L"
+  local curl_opts="--retry 3 --silent --show-error -L"
   local wget_opts="--no-verbose"
 
   if [ ! -f "$binary" ]; then
@@ -115,6 +115,10 @@ function version { echo "$@" | awk -F. '{ printf("%03d%03d%03d\n", $1,$2,$3); }'
 # install maven under the build/ folder if needed.
 install_mvn() {
   local MVN_VERSION=`grep "<maven.version>" "${_DIR}/../pom.xml" | head -n1 | awk -F '[<>]' '{print $3}'`
+  MVN_BIN="${_DIR}/apache-maven-${MVN_VERSION}/bin/mvn"
+  if [ -f "$MVN_BIN" ]; then
+    return
+  fi
   MVN_BIN="$(command -v mvn)"
   if [ "$MVN_BIN" ]; then
     local MVN_DETECTED_VERSION="$(mvn --version | head -n1 | awk '{print $3}')"
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
index 13a9d89f4705c..7f8d6c58aec7e 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java
@@ -255,7 +255,8 @@ public Iterator<T> iterator() {
           iteratorTracker.add(new WeakReference<>(it));
           return it;
         } catch (Exception e) {
-          throw Throwables.propagate(e);
+          Throwables.throwIfUnchecked(e);
+          throw new RuntimeException(e);
         }
       }
     };
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
index 69757fdc65d68..29ed37ffa44e5 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java
@@ -127,7 +127,7 @@ public boolean hasNext() {
       try {
         close();
       } catch (IOException ioe) {
-        throw Throwables.propagate(ioe);
+        throw new RuntimeException(ioe);
       }
     }
     return next != null;
@@ -151,7 +151,8 @@ public T next() {
       next = null;
       return ret;
     } catch (Exception e) {
-      throw Throwables.propagate(e);
+      Throwables.throwIfUnchecked(e);
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
index dc7ad0be5c007..4bc2b233fe12d 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java
@@ -287,7 +287,8 @@ public Iterator<T> iterator() {
           iteratorTracker.add(new WeakReference<>(it));
           return it;
         } catch (Exception e) {
-          throw Throwables.propagate(e);
+          Throwables.throwIfUnchecked(e);
+          throw new RuntimeException(e);
         }
       }
     };
diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
index a98b0482e35cc..e350ddc2d445a 100644
--- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
+++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java
@@ -113,7 +113,7 @@ public boolean hasNext() {
       try {
         close();
       } catch (IOException ioe) {
-        throw Throwables.propagate(ioe);
+        throw new RuntimeException(ioe);
       }
     }
     return next != null;
@@ -137,7 +137,8 @@ public T next() {
       next = null;
       return ret;
     } catch (Exception e) {
-      throw Throwables.propagate(e);
+      Throwables.throwIfUnchecked(e);
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml
index cdb5bd72158a1..cbe4836b58da5 100644
--- a/common/network-common/pom.xml
+++ b/common/network-common/pom.xml
@@ -194,6 +194,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
index 4c144a73a9299..a9df47645d36f 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java
@@ -290,9 +290,11 @@ public void onFailure(Throwable e) {
     try {
       return result.get(timeoutMs, TimeUnit.MILLISECONDS);
     } catch (ExecutionException e) {
-      throw Throwables.propagate(e.getCause());
+      Throwables.throwIfUnchecked(e.getCause());
+      throw new RuntimeException(e.getCause());
     } catch (Exception e) {
-      throw Throwables.propagate(e);
+      Throwables.throwIfUnchecked(e);
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
index e1f19f956cc0a..d64b8c8f838e9 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java
@@ -342,7 +342,8 @@ public void operationComplete(final Future<Channel> handshakeFuture) {
       logger.error("Exception while bootstrapping client after {} ms", e,
         MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, bootstrapTimeMs));
       client.close();
-      throw Throwables.propagate(e);
+      Throwables.throwIfUnchecked(e);
+      throw new RuntimeException(e);
     }
     long postBootstrap = System.nanoTime();
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
index 08e2c084fe67b..2e9ccd0e0ad21 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthClientBootstrap.java
@@ -22,7 +22,6 @@
 import java.security.GeneralSecurityException;
 import java.util.concurrent.TimeoutException;
 
-import com.google.common.base.Throwables;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
 import io.netty.channel.Channel;
@@ -80,7 +79,7 @@ public void doBootstrap(TransportClient client, Channel channel) {
       doSparkAuth(client, channel);
       client.setClientId(appId);
     } catch (GeneralSecurityException | IOException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(e);
     } catch (RuntimeException e) {
       // There isn't a good exception that can be caught here to know whether it's really
       // OK to switch back to SASL (because the server doesn't speak the new protocol). So
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
index 65367743e24f9..087e3d21e22bb 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java
@@ -132,7 +132,8 @@ protected boolean doAuthChallenge(
         try {
           engine.close();
         } catch (Exception e) {
-          throw Throwables.propagate(e);
+          Throwables.throwIfUnchecked(e);
+          throw new RuntimeException(e);
         }
       }
     }
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
index 355c552720185..33494aee4444d 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java
@@ -17,32 +17,12 @@
 
 package org.apache.spark.network.crypto;
 
-import com.google.common.annotations.VisibleForTesting;
-import com.google.crypto.tink.subtle.Hex;
-import com.google.crypto.tink.subtle.Hkdf;
 import io.netty.channel.Channel;
 
-import javax.crypto.spec.SecretKeySpec;
 import java.io.IOException;
-import java.nio.charset.StandardCharsets;
 import java.security.GeneralSecurityException;
 
 interface TransportCipher {
     String getKeyId() throws GeneralSecurityException;
     void addToChannel(Channel channel) throws IOException, GeneralSecurityException;
 }
-
-class TransportCipherUtil {
-    /*
-     * This method is used for testing to verify key derivation.
-     */
-    @VisibleForTesting
-    static String getKeyId(SecretKeySpec key) throws GeneralSecurityException {
-        byte[] keyIdBytes = Hkdf.computeHkdf("HmacSha256",
-                key.getEncoded(),
-                null,
-                "keyID".getBytes(StandardCharsets.UTF_8),
-                32);
-        return Hex.encode(keyIdBytes);
-    }
-}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipherUtil.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipherUtil.java
new file mode 100644
index 0000000000000..1df2732f240cc
--- /dev/null
+++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipherUtil.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.network.crypto;
+
+import java.nio.charset.StandardCharsets;
+import java.security.GeneralSecurityException;
+import javax.crypto.spec.SecretKeySpec;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.crypto.tink.subtle.Hex;
+import com.google.crypto.tink.subtle.Hkdf;
+
+class TransportCipherUtil {
+  /**
+   * This method is used for testing to verify key derivation.
+   */
+  @VisibleForTesting
+  static String getKeyId(SecretKeySpec key) throws GeneralSecurityException {
+    byte[] keyIdBytes = Hkdf.computeHkdf("HmacSha256",
+        key.getEncoded(),
+        null,
+        "keyID".getBytes(StandardCharsets.UTF_8),
+        32);
+    return Hex.encode(keyIdBytes);
+  }
+}
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
index 3600c1045dbf4..a61b1c3c0c416 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslClient.java
@@ -29,7 +29,6 @@
 import javax.security.sasl.SaslClient;
 import javax.security.sasl.SaslException;
 
-import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
 
 import org.apache.spark.internal.SparkLogger;
@@ -62,7 +61,7 @@ public SparkSaslClient(String secretKeyId, SecretKeyHolder secretKeyHolder, bool
       this.saslClient = Sasl.createSaslClient(new String[] { DIGEST }, null, null, DEFAULT_REALM,
         saslProps, new ClientCallbackHandler());
     } catch (SaslException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(e);
     }
   }
 
@@ -72,7 +71,7 @@ public synchronized byte[] firstToken() {
       try {
         return saslClient.evaluateChallenge(new byte[0]);
       } catch (SaslException e) {
-        throw Throwables.propagate(e);
+        throw new RuntimeException(e);
       }
     } else {
       return new byte[0];
@@ -98,7 +97,7 @@ public synchronized byte[] response(byte[] token) {
     try {
       return saslClient != null ? saslClient.evaluateChallenge(token) : new byte[0];
     } catch (SaslException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
index b897650afe832..f32fd5145c7c5 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java
@@ -31,7 +31,6 @@
 import java.util.Map;
 
 import com.google.common.base.Preconditions;
-import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableMap;
 import io.netty.buffer.ByteBuf;
 import io.netty.buffer.Unpooled;
@@ -94,7 +93,7 @@ public SparkSaslServer(
       this.saslServer = Sasl.createSaslServer(DIGEST, null, DEFAULT_REALM, saslProps,
         new DigestCallbackHandler());
     } catch (SaslException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(e);
     }
   }
 
@@ -119,7 +118,7 @@ public synchronized byte[] response(byte[] token) {
     try {
       return saslServer != null ? saslServer.evaluateResponse(token) : new byte[0];
     } catch (SaslException e) {
-      throw Throwables.propagate(e);
+      throw new RuntimeException(e);
     }
   }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java
index 5796e34a6f05e..2ac549775449a 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/LevelDBIterator.java
@@ -17,8 +17,6 @@
 
 package org.apache.spark.network.shuffledb;
 
-import com.google.common.base.Throwables;
-
 import java.io.IOException;
 import java.util.Map;
 import java.util.NoSuchElementException;
@@ -47,7 +45,7 @@ public boolean hasNext() {
         try {
           close();
         } catch (IOException ioe) {
-          throw Throwables.propagate(ioe);
+          throw new RuntimeException(ioe);
         }
       }
       return next != null;
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java
index d33895d6c2d62..2737ab8ed754c 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDB.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 
-import com.google.common.base.Throwables;
 import org.rocksdb.RocksDBException;
 
 /**
@@ -37,7 +36,7 @@ public void put(byte[] key, byte[] value) {
       try {
         db.put(key, value);
       } catch (RocksDBException e) {
-        throw Throwables.propagate(e);
+        throw new RuntimeException(e);
       }
     }
 
@@ -46,7 +45,7 @@ public byte[] get(byte[] key) {
       try {
         return db.get(key);
       } catch (RocksDBException e) {
-        throw Throwables.propagate(e);
+        throw new RuntimeException(e);
       }
     }
 
@@ -55,7 +54,7 @@ public void delete(byte[] key) {
       try {
         db.delete(key);
       } catch (RocksDBException e) {
-        throw Throwables.propagate(e);
+        throw new RuntimeException(e);
       }
     }
 
diff --git a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java
index 78562f91a4b75..829a7ded6330b 100644
--- a/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java
+++ b/common/network-common/src/main/java/org/apache/spark/network/shuffledb/RocksDBIterator.java
@@ -22,7 +22,6 @@
 import java.util.Map;
 import java.util.NoSuchElementException;
 
-import com.google.common.base.Throwables;
 import org.rocksdb.RocksIterator;
 
 /**
@@ -52,7 +51,7 @@ public boolean hasNext() {
         try {
           close();
         } catch (IOException ioe) {
-          throw Throwables.propagate(ioe);
+          throw new RuntimeException(ioe);
         }
       }
       return next != null;
diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml
index 0f7036ef746cc..49e6e08476151 100644
--- a/common/network-shuffle/pom.xml
+++ b/common/network-shuffle/pom.xml
@@ -113,6 +113,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
index f9c0c60c2f2c6..62fcda701d948 100644
--- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
+++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java
@@ -19,10 +19,7 @@
 
 import java.io.*;
 import java.util.concurrent.TimeUnit;
-import java.util.zip.Adler32;
-import java.util.zip.CRC32;
-import java.util.zip.CheckedInputStream;
-import java.util.zip.Checksum;
+import java.util.zip.*;
 
 import com.google.common.io.ByteStreams;
 
@@ -66,6 +63,13 @@ private static Checksum[] getChecksumsByAlgorithm(int num, String algorithm) {
         }
       }
 
+      case "CRC32C"  -> {
+        checksums = new CRC32C[num];
+        for (int i = 0; i < num; i++) {
+          checksums[i] = new CRC32C();
+        }
+      }
+
       default -> throw new UnsupportedOperationException(
         "Unsupported shuffle checksum algorithm: " + algorithm);
     }
diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml
index a5ef9847859a7..cf15301273303 100644
--- a/common/unsafe/pom.xml
+++ b/common/unsafe/pom.xml
@@ -104,6 +104,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
index b9868ca665a65..97c8bbe562aff 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java
@@ -17,6 +17,7 @@
 package org.apache.spark.sql.catalyst.util;
 
 import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.text.BreakIterator;
 import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
@@ -26,15 +27,15 @@
 import org.apache.spark.unsafe.UTF8StringBuilder;
 import org.apache.spark.unsafe.types.UTF8String;
 
-import static org.apache.spark.unsafe.Platform.BYTE_ARRAY_OFFSET;
-import static org.apache.spark.unsafe.Platform.copyMemory;
 import static org.apache.spark.unsafe.types.UTF8String.CodePointIteratorType;
 
 import java.text.CharacterIterator;
 import java.text.StringCharacterIterator;
+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -48,19 +49,28 @@ public class CollationAwareUTF8String {
    */
   private static final int MATCH_NOT_FOUND = -1;
 
+  /**
+   * `COMBINED_ASCII_SMALL_I_COMBINING_DOT` is an internal representation of the combined
+   * lowercase code point for ASCII lowercase letter i with an additional combining dot character
+   * (U+0307). This integer value is not a valid code point itself, but rather an artificial code
+   * point marker used to represent the two lowercase characters that are the result of converting
+   * the uppercase Turkish dotted letter I with a combining dot character (U+0130) to lowercase.
+   */
+  private static final int COMBINED_ASCII_SMALL_I_COMBINING_DOT =
+    SpecialCodePointConstants.ASCII_SMALL_I << 16 | SpecialCodePointConstants.COMBINING_DOT;
+
   /**
    * Returns whether the target string starts with the specified prefix, starting from the
    * specified position (0-based index referring to character position in UTF8String), with respect
-   * to the UTF8_LCASE collation. The method assumes that the prefix is already lowercased
-   * prior to method call to avoid the overhead of calling .toLowerCase() multiple times on the
-   * same prefix string.
+   * to the UTF8_LCASE collation. The method assumes that the prefix is already lowercased prior
+   * to method call to avoid the overhead of lowercasing the same prefix string multiple times.
    *
    * @param target the string to be searched in
    * @param lowercasePattern the string to be searched for
    * @param startPos the start position for searching (in the target string)
    * @return whether the target string starts with the specified prefix in UTF8_LCASE
    */
-  public static boolean lowercaseMatchFrom(
+  private static boolean lowercaseMatchFrom(
       final UTF8String target,
       final UTF8String lowercasePattern,
       int startPos) {
@@ -86,12 +96,44 @@ private static int lowercaseMatchLengthFrom(
       final UTF8String lowercasePattern,
       int startPos) {
     assert startPos >= 0;
-    for (int len = 0; len <= target.numChars() - startPos; ++len) {
-      if (target.substring(startPos, startPos + len).toLowerCase().equals(lowercasePattern)) {
-        return len;
+    // Use code point iterators for efficient string search.
+    Iterator<Integer> targetIterator = target.codePointIterator();
+    Iterator<Integer> patternIterator = lowercasePattern.codePointIterator();
+    // Skip to startPos in the target string.
+    for (int i = 0; i < startPos; ++i) {
+      if (targetIterator.hasNext()) {
+        targetIterator.next();
+      } else {
+        return MATCH_NOT_FOUND;
       }
     }
-    return MATCH_NOT_FOUND;
+    // Compare the characters in the target and pattern strings.
+    int matchLength = 0, codePointBuffer = -1, targetCodePoint, patternCodePoint;
+    while ((targetIterator.hasNext() || codePointBuffer != -1) && patternIterator.hasNext()) {
+      if (codePointBuffer != -1) {
+        targetCodePoint = codePointBuffer;
+        codePointBuffer = -1;
+      } else {
+        // Use buffered lowercase code point iteration to handle one-to-many case mappings.
+        targetCodePoint = getLowercaseCodePoint(targetIterator.next());
+        if (targetCodePoint == COMBINED_ASCII_SMALL_I_COMBINING_DOT) {
+          targetCodePoint = SpecialCodePointConstants.ASCII_SMALL_I;
+          codePointBuffer = SpecialCodePointConstants.COMBINING_DOT;
+        }
+        ++matchLength;
+      }
+      patternCodePoint = patternIterator.next();
+      if (targetCodePoint != patternCodePoint) {
+        return MATCH_NOT_FOUND;
+      }
+    }
+    // If the pattern string has more characters, or the match is found at the middle of a
+    // character that maps to multiple characters in lowercase, then match is not found.
+    if (patternIterator.hasNext() || codePointBuffer != -1) {
+      return MATCH_NOT_FOUND;
+    }
+    // If all characters are equal, return the length of the match in the target string.
+    return matchLength;
   }
 
   /**
@@ -123,15 +165,14 @@ private static int lowercaseFind(
    * Returns whether the target string ends with the specified suffix, ending at the specified
    * position (0-based index referring to character position in UTF8String), with respect to the
    * UTF8_LCASE collation. The method assumes that the suffix is already lowercased prior
-   * to method call to avoid the overhead of calling .toLowerCase() multiple times on the same
-   * suffix string.
+   * to method call to avoid the overhead of lowercasing the same suffix string multiple times.
    *
    * @param target the string to be searched in
    * @param lowercasePattern the string to be searched for
    * @param endPos the end position for searching (in the target string)
    * @return whether the target string ends with the specified suffix in lowercase
    */
-  public static boolean lowercaseMatchUntil(
+  private static boolean lowercaseMatchUntil(
       final UTF8String target,
       final UTF8String lowercasePattern,
       int endPos) {
@@ -156,13 +197,45 @@ private static int lowercaseMatchLengthUntil(
       final UTF8String target,
       final UTF8String lowercasePattern,
       int endPos) {
-    assert endPos <= target.numChars();
-    for (int len = 0; len <= endPos; ++len) {
-      if (target.substring(endPos - len, endPos).toLowerCase().equals(lowercasePattern)) {
-        return len;
+    assert endPos >= 0;
+    // Use code point iterators for efficient string search.
+    Iterator<Integer> targetIterator = target.reverseCodePointIterator();
+    Iterator<Integer> patternIterator = lowercasePattern.reverseCodePointIterator();
+    // Skip to startPos in the target string.
+    for (int i = endPos; i < target.numChars(); ++i) {
+      if (targetIterator.hasNext()) {
+        targetIterator.next();
+      } else {
+        return MATCH_NOT_FOUND;
       }
     }
-    return MATCH_NOT_FOUND;
+    // Compare the characters in the target and pattern strings.
+    int matchLength = 0, codePointBuffer = -1, targetCodePoint, patternCodePoint;
+    while ((targetIterator.hasNext() || codePointBuffer != -1) && patternIterator.hasNext()) {
+      if (codePointBuffer != -1) {
+        targetCodePoint = codePointBuffer;
+        codePointBuffer = -1;
+      } else {
+        // Use buffered lowercase code point iteration to handle one-to-many case mappings.
+        targetCodePoint = getLowercaseCodePoint(targetIterator.next());
+        if (targetCodePoint == COMBINED_ASCII_SMALL_I_COMBINING_DOT) {
+          targetCodePoint = SpecialCodePointConstants.COMBINING_DOT;
+          codePointBuffer = SpecialCodePointConstants.ASCII_SMALL_I;
+        }
+        ++matchLength;
+      }
+      patternCodePoint = patternIterator.next();
+      if (targetCodePoint != patternCodePoint) {
+        return MATCH_NOT_FOUND;
+      }
+    }
+    // If the pattern string has more characters, or the match is found at the middle of a
+    // character that maps to multiple characters in lowercase, then match is not found.
+    if (patternIterator.hasNext() || codePointBuffer != -1) {
+      return MATCH_NOT_FOUND;
+    }
+    // If all characters are equal, return the length of the match in the target string.
+    return matchLength;
   }
 
   /**
@@ -191,10 +264,9 @@ private static int lowercaseRFind(
   }
 
   /**
-   * Lowercase UTF8String comparison used for UTF8_LCASE collation. While the default
-   * UTF8String comparison is equivalent to a.toLowerCase().binaryCompare(b.toLowerCase()), this
-   * method uses code points to compare the strings in a case-insensitive manner using ICU rules,
-   * as well as handling special rules for one-to-many case mappings (see: lowerCaseCodePoints).
+   * Lowercase UTF8String comparison used for UTF8_LCASE collation. This method uses lowercased
+   * code points to compare the strings in a case-insensitive manner using ICU rules, taking into
+   * account special rules for one-to-many case mappings (see: lowerCaseCodePoints).
    *
    * @param left The first UTF8String to compare.
    * @param right The second UTF8String to compare.
@@ -238,115 +310,82 @@ private static int compareLowerCaseSlow(final UTF8String left, final UTF8String
     return lowerCaseCodePoints(left).binaryCompare(lowerCaseCodePoints(right));
   }
 
-  /*
+  /**
    * Performs string replacement for ICU collations by searching for instances of the search
-   * string in the `src` string, with respect to the specified collation, and then replacing
+   * string in the `target` string, with respect to the specified collation, and then replacing
    * them with the replace string. The method returns a new UTF8String with all instances of the
    * search string replaced using the replace string. Similar to UTF8String.findInSet behavior
-   * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty.
+   * used for UTF8_BINARY, the method returns the `target` string if the `search` string is empty.
    *
-   * @param src the string to be searched in
+   * @param target the string to be searched in
    * @param search the string to be searched for
    * @param replace the string to be used as replacement
    * @param collationId the collation ID to use for string search
    * @return the position of the first occurrence of `match` in `set`
    */
-  public static UTF8String replace(final UTF8String src, final UTF8String search,
+  public static UTF8String replace(final UTF8String target, final UTF8String search,
       final UTF8String replace, final int collationId) {
     // This collation aware implementation is based on existing implementation on UTF8String
-    if (src.numBytes() == 0 || search.numBytes() == 0) {
-      return src;
-    }
-
-    StringSearch stringSearch = CollationFactory.getStringSearch(src, search, collationId);
-
-    // Find the first occurrence of the search string.
-    int end = stringSearch.next();
-    if (end == StringSearch.DONE) {
-      // Search string was not found, so string is unchanged.
-      return src;
+    if (target.numBytes() == 0 || search.numBytes() == 0) {
+      return target;
     }
 
-    // Initialize byte positions
-    int c = 0;
-    int byteStart = 0; // position in byte
-    int byteEnd = 0; // position in byte
-    while (byteEnd < src.numBytes() && c < end) {
-      byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd));
-      c += 1;
-    }
+    String targetStr = target.toValidString();
+    String searchStr = search.toValidString();
+    StringSearch stringSearch = CollationFactory.getStringSearch(targetStr, searchStr, collationId);
 
-    // At least one match was found. Estimate space needed for result.
-    // The 16x multiplier here is chosen to match commons-lang3's implementation.
-    int increase = Math.max(0, Math.abs(replace.numBytes() - search.numBytes())) * 16;
-    final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase);
-    while (end != StringSearch.DONE) {
-      buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart, byteEnd - byteStart);
-      buf.append(replace);
-
-      // Move byteStart to the beginning of the current match
-      byteStart = byteEnd;
-      int cs = c;
-      // Move cs to the end of the current match
-      // This is necessary because the search string may contain 'multi-character' characters
-      while (byteStart < src.numBytes() && cs < c + stringSearch.getMatchLength()) {
-        byteStart += UTF8String.numBytesForFirstByte(src.getByte(byteStart));
-        cs += 1;
-      }
-      // Go to next match
-      end = stringSearch.next();
-      // Update byte positions
-      while (byteEnd < src.numBytes() && c < end) {
-        byteEnd += UTF8String.numBytesForFirstByte(src.getByte(byteEnd));
-        c += 1;
-      }
+    StringBuilder sb = new StringBuilder();
+    int start = 0;
+    int matchStart = stringSearch.first();
+    while (matchStart != StringSearch.DONE) {
+      sb.append(targetStr, start, matchStart);
+      sb.append(replace.toValidString());
+      start = matchStart + stringSearch.getMatchLength();
+      matchStart = stringSearch.next();
     }
-    buf.appendBytes(src.getBaseObject(), src.getBaseOffset() + byteStart,
-      src.numBytes() - byteStart);
-    return buf.build();
+    sb.append(targetStr, start, targetStr.length());
+    return UTF8String.fromString(sb.toString());
   }
 
-  /*
+  /**
    * Performs string replacement for UTF8_LCASE collation by searching for instances of the search
-   * string in the src string, with respect to lowercased string versions, and then replacing
+   * string in the target string, with respect to lowercased string versions, and then replacing
    * them with the replace string. The method returns a new UTF8String with all instances of the
    * search string replaced using the replace string. Similar to UTF8String.findInSet behavior
-   * used for UTF8_BINARY, the method returns the `src` string if the `search` string is empty.
+   * used for UTF8_BINARY, the method returns the `target` string if the `search` string is empty.
    *
-   * @param src the string to be searched in
+   * @param target the string to be searched in
    * @param search the string to be searched for
    * @param replace the string to be used as replacement
-   * @param collationId the collation ID to use for string search
    * @return the position of the first occurrence of `match` in `set`
    */
-  public static UTF8String lowercaseReplace(final UTF8String src, final UTF8String search,
+  public static UTF8String lowercaseReplace(final UTF8String target, final UTF8String search,
       final UTF8String replace) {
-    if (src.numBytes() == 0 || search.numBytes() == 0) {
-      return src;
+    if (target.numBytes() == 0 || search.numBytes() == 0) {
+      return target;
     }
 
-    // TODO(SPARK-48725): Use lowerCaseCodePoints instead of UTF8String.toLowerCase.
-    UTF8String lowercaseSearch = search.toLowerCase();
+    UTF8String lowercaseSearch = lowerCaseCodePoints(search);
 
     int start = 0;
-    int end = lowercaseFind(src, lowercaseSearch, start);
+    int end = lowercaseFind(target, lowercaseSearch, start);
     if (end == -1) {
       // Search string was not found, so string is unchanged.
-      return src;
+      return target;
     }
 
     // At least one match was found. Estimate space needed for result.
     // The 16x multiplier here is chosen to match commons-lang3's implementation.
     int increase = Math.max(0, replace.numBytes() - search.numBytes()) * 16;
-    final UTF8StringBuilder buf = new UTF8StringBuilder(src.numBytes() + increase);
+    final UTF8StringBuilder buf = new UTF8StringBuilder(target.numBytes() + increase);
     while (end != -1) {
-      buf.append(src.substring(start, end));
+      buf.append(target.substring(start, end));
       buf.append(replace);
       // Update character positions
-      start = end + lowercaseMatchLengthFrom(src, lowercaseSearch, end);
-      end = lowercaseFind(src, lowercaseSearch, start);
+      start = end + lowercaseMatchLengthFrom(target, lowercaseSearch, end);
+      end = lowercaseFind(target, lowercaseSearch, start);
     }
-    buf.append(src.substring(start, src.numChars()));
+    buf.append(target.substring(start, target.numChars()));
     return buf.build();
   }
 
@@ -433,28 +472,16 @@ private static UTF8String toLowerCaseSlow(final UTF8String target, final int col
    */
   private static void appendLowercaseCodePoint(final int codePoint, final StringBuilder sb) {
     int lowercaseCodePoint = getLowercaseCodePoint(codePoint);
-    if (lowercaseCodePoint == CODE_POINT_COMBINED_LOWERCASE_I_DOT) {
+    if (lowercaseCodePoint == COMBINED_ASCII_SMALL_I_COMBINING_DOT) {
       // Latin capital letter I with dot above is mapped to 2 lowercase characters.
-      sb.appendCodePoint(0x0069);
-      sb.appendCodePoint(0x0307);
+      sb.appendCodePoint(SpecialCodePointConstants.ASCII_SMALL_I);
+      sb.appendCodePoint(SpecialCodePointConstants.COMBINING_DOT);
     } else {
       // All other characters should follow context-unaware ICU single-code point case mapping.
       sb.appendCodePoint(lowercaseCodePoint);
     }
   }
 
-  /**
-   * `CODE_POINT_COMBINED_LOWERCASE_I_DOT` is an internal representation of the combined lowercase
-   * code point for ASCII lowercase letter i with an additional combining dot character (U+0307).
-   * This integer value is not a valid code point itself, but rather an artificial code point
-   * marker used to represent the two lowercase characters that are the result of converting the
-   * uppercase Turkish dotted letter I with a combining dot character (U+0130) to lowercase.
-   */
-  private static final int CODE_POINT_LOWERCASE_I = 0x69;
-  private static final int CODE_POINT_COMBINING_DOT = 0x307;
-  private static final int CODE_POINT_COMBINED_LOWERCASE_I_DOT =
-    CODE_POINT_LOWERCASE_I << 16 | CODE_POINT_COMBINING_DOT;
-
   /**
    * Returns the lowercase version of the provided code point, with special handling for
    * one-to-many case mappings (i.e. characters that map to multiple characters in lowercase) and
@@ -462,15 +489,15 @@ private static void appendLowercaseCodePoint(final int codePoint, final StringBu
    * the position in the string relative to other characters in lowercase).
    */
   private static int getLowercaseCodePoint(final int codePoint) {
-    if (codePoint == 0x0130) {
+    if (codePoint == SpecialCodePointConstants.CAPITAL_I_WITH_DOT_ABOVE) {
       // Latin capital letter I with dot above is mapped to 2 lowercase characters.
-      return CODE_POINT_COMBINED_LOWERCASE_I_DOT;
+      return COMBINED_ASCII_SMALL_I_COMBINING_DOT;
     }
-    else if (codePoint == 0x03C2) {
+    else if (codePoint == SpecialCodePointConstants.GREEK_FINAL_SIGMA) {
       // Greek final and non-final letter sigma should be mapped the same. This is achieved by
       // mapping Greek small final sigma (U+03C2) to Greek small non-final sigma (U+03C3). Capital
       // letter sigma (U+03A3) is mapped to small non-final sigma (U+03C3) in the `else` branch.
-      return 0x03C3;
+      return SpecialCodePointConstants.GREEK_SMALL_SIGMA;
     }
     else {
       // All other characters should follow context-unaware ICU single-code point case mapping.
@@ -522,6 +549,152 @@ public static UTF8String toTitleCase(final UTF8String target, final int collatio
       BreakIterator.getWordInstance(locale)));
   }
 
+  /**
+   * This 'HashMap' is introduced as a performance speedup. Since title-casing a codepoint can
+   * result in more than a single codepoint, for correctness, we would use
+   * 'UCharacter.toTitleCase(String)' which returns a 'String'. If we use
+   * 'UCharacter.toTitleCase(int)' (the version of the same function which converts a single
+   * codepoint to its title-case codepoint), it would be faster than the previously mentioned
+   * version, but the problem here is that we don't handle when title-casing a codepoint yields more
+   * than 1 codepoint. Since there are only 48 codepoints that are mapped to more than 1 codepoint
+   * when title-cased, they are precalculated here, so that the faster function for title-casing
+   * could be used in combination with this 'HashMap' in the method 'appendCodepointToTitleCase'.
+   */
+  private static final HashMap<Integer, String> codepointOneToManyTitleCaseLookupTable =
+    new HashMap<>(){{
+    StringBuilder sb = new StringBuilder();
+    for (int i = Character.MIN_CODE_POINT; i <= Character.MAX_CODE_POINT; ++i) {
+      sb.appendCodePoint(i);
+      String titleCase = UCharacter.toTitleCase(sb.toString(), null);
+      if (titleCase.codePointCount(0, titleCase.length()) > 1) {
+        put(i, titleCase);
+      }
+      sb.setLength(0);
+    }
+  }};
+
+  /**
+   * Title-casing a string using ICU case mappings. Iterates over the string and title-cases
+   * the first character in each word, and lowercases every other character. Handles lowercasing
+   * capital Greek letter sigma ('Σ') separately, taking into account if it should be a small final
+   * Greek sigma ('ς') or small non-final Greek sigma ('σ'). Words are separated by ASCII
+   * space(\u0020).
+   *
+   * @param source UTF8String to be title cased
+   * @return title cased source
+   */
+  public static UTF8String toTitleCaseICU(UTF8String source) {
+    // In the default UTF8String implementation, `toLowerCase` method implicitly does UTF8String
+    // validation (replacing invalid UTF-8 byte sequences with Unicode replacement character
+    // U+FFFD), but now we have to do the validation manually.
+    source = source.makeValid();
+
+    // Building the title cased source with 'sb'.
+    UTF8StringBuilder sb = new UTF8StringBuilder();
+
+    // 'isNewWord' is true if the current character is the beginning of a word, false otherwise.
+    boolean isNewWord = true;
+    // We are maintaining if the current character is preceded by a cased letter.
+    // This is used when lowercasing capital Greek letter sigma ('Σ'), to figure out if it should be
+    // lowercased into σ or ς.
+    boolean precededByCasedLetter = false;
+
+    // 'offset' is a byte offset in source's byte array pointing to the beginning of the character
+    // that we need to process next.
+    int offset = 0;
+    int len = source.numBytes();
+
+    while (offset < len) {
+      // We will actually call 'codePointFrom()' 2 times for each character in the worst case (once
+      // here, and once in 'followedByCasedLetter'). Example of a string where we call it 2 times
+      // for almost every character is 'ΣΣΣΣΣ' (a string consisting only of Greek capital sigma)
+      // and 'Σ`````' (a string consisting of a Greek capital sigma, followed by case-ignorable
+      // characters).
+      int codepoint = source.codePointFrom(offset);
+      // Appending the correctly cased character onto 'sb'.
+      appendTitleCasedCodepoint(sb, codepoint, isNewWord, precededByCasedLetter, source, offset);
+      // Updating 'isNewWord', 'precededByCasedLetter' and 'offset' to be ready for the next
+      // character that we will process.
+      isNewWord = (codepoint == SpecialCodePointConstants.ASCII_SPACE);
+      if (!UCharacter.hasBinaryProperty(codepoint, UProperty.CASE_IGNORABLE)) {
+        precededByCasedLetter = UCharacter.hasBinaryProperty(codepoint, UProperty.CASED);
+      }
+      offset += UTF8String.numBytesForFirstByte(source.getByte(offset));
+    }
+    return sb.build();
+  }
+
+  private static void appendTitleCasedCodepoint(
+      UTF8StringBuilder sb,
+      int codepoint,
+      boolean isAfterAsciiSpace,
+      boolean precededByCasedLetter,
+      UTF8String source,
+      int offset) {
+    if (isAfterAsciiSpace) {
+      // Title-casing a character if it is in the beginning of a new word.
+      appendCodepointToTitleCase(sb, codepoint);
+      return;
+    }
+    if (codepoint == SpecialCodePointConstants.GREEK_CAPITAL_SIGMA) {
+      // Handling capital Greek letter sigma ('Σ').
+      appendLowerCasedGreekCapitalSigma(sb, precededByCasedLetter, source, offset);
+      return;
+    }
+    // If it's not the beginning of a word, or a capital Greek letter sigma ('Σ'), we lowercase the
+    // character. We specially handle 'CAPITAL_I_WITH_DOT_ABOVE'.
+    if (codepoint == SpecialCodePointConstants.CAPITAL_I_WITH_DOT_ABOVE) {
+      sb.appendCodePoint(SpecialCodePointConstants.ASCII_SMALL_I);
+      sb.appendCodePoint(SpecialCodePointConstants.COMBINING_DOT);
+      return;
+    }
+    sb.appendCodePoint(UCharacter.toLowerCase(codepoint));
+  }
+
+  private static void appendLowerCasedGreekCapitalSigma(
+      UTF8StringBuilder sb,
+      boolean precededByCasedLetter,
+      UTF8String source,
+      int offset) {
+    int codepoint = (!followedByCasedLetter(source, offset) && precededByCasedLetter)
+      ? SpecialCodePointConstants.GREEK_FINAL_SIGMA
+      : SpecialCodePointConstants.GREEK_SMALL_SIGMA;
+    sb.appendCodePoint(codepoint);
+  }
+
+  /**
+   * Checks if the character beginning at 'offset'(in 'sources' byte array) is followed by a cased
+   * letter.
+   */
+  private static boolean followedByCasedLetter(UTF8String source, int offset) {
+    // Moving the offset one character forward, so we could start the linear search from there.
+    offset += UTF8String.numBytesForFirstByte(source.getByte(offset));
+    int len = source.numBytes();
+
+    while (offset < len) {
+      int codepoint = source.codePointFrom(offset);
+
+      if (UCharacter.hasBinaryProperty(codepoint, UProperty.CASE_IGNORABLE)) {
+        offset += UTF8String.numBytesForFirstByte(source.getByte(offset));
+        continue;
+      }
+      return UCharacter.hasBinaryProperty(codepoint, UProperty.CASED);
+    }
+    return false;
+  }
+
+  /**
+   * Appends title-case of a single character to a 'StringBuilder' using the ICU root locale rules.
+   */
+  private static void appendCodepointToTitleCase(UTF8StringBuilder sb, int codepoint) {
+    String toTitleCase = codepointOneToManyTitleCaseLookupTable.get(codepoint);
+    if (toTitleCase == null) {
+      sb.appendCodePoint(UCharacter.toTitleCase(codepoint));
+    } else {
+      sb.append(toTitleCase);
+    }
+  }
+
   /*
    * Returns the position of the first occurrence of the match string in the set string,
    * counting ASCII commas as delimiters. The match string is compared in a collation-aware manner,
@@ -559,6 +732,58 @@ public static int findInSet(final UTF8String match, final UTF8String set, int co
     return 0;
   }
 
+  /**
+   * Checks whether the target string contains the pattern string, with respect to the UTF8_LCASE
+   * collation. This method generally works with respect to code-point based comparison logic.
+   *
+   * @param target the string to be searched in
+   * @param pattern the string to be searched for
+   * @return whether the target string contains the pattern string
+   */
+  public static boolean lowercaseContains(final UTF8String target, final UTF8String pattern) {
+    // Fast path for ASCII-only strings.
+    if (target.isFullAscii() && pattern.isFullAscii()) {
+      return target.toLowerCase().contains(pattern.toLowerCase());
+    }
+    // Slow path for non-ASCII strings.
+    return CollationAwareUTF8String.lowercaseIndexOfSlow(target, pattern, 0) >= 0;
+  }
+
+  /**
+   * Checks whether the target string starts with the pattern string, with respect to the UTF8_LCASE
+   * collation. This method generally works with respect to code-point based comparison logic.
+   *
+   * @param target the string to be searched in
+   * @param pattern the string to be searched for
+   * @return whether the target string starts with the pattern string
+   */
+  public static boolean lowercaseStartsWith(final UTF8String target, final UTF8String pattern) {
+    // Fast path for ASCII-only strings.
+    if (target.isFullAscii() && pattern.isFullAscii()) {
+      return target.toLowerCase().startsWith(pattern.toLowerCase());
+    }
+    // Slow path for non-ASCII strings.
+    return CollationAwareUTF8String.lowercaseMatchFrom(target, lowerCaseCodePointsSlow(pattern), 0);
+  }
+
+  /**
+   * Checks whether the target string ends with the pattern string, with respect to the UTF8_LCASE
+   * collation. This method generally works with respect to code-point based comparison logic.
+   *
+   * @param target the string to be searched in
+   * @param pattern the string to be searched for
+   * @return whether the target string ends with the pattern string
+   */
+  public static boolean lowercaseEndsWith(final UTF8String target, final UTF8String pattern) {
+    // Fast path for ASCII-only strings.
+    if (target.isFullAscii() && pattern.isFullAscii()) {
+      return target.toLowerCase().endsWith(pattern.toLowerCase());
+    }
+    // Slow path for non-ASCII strings.
+    return CollationAwareUTF8String.lowercaseMatchUntil(target, lowerCaseCodePointsSlow(pattern),
+      target.numChars());
+  }
+
   /**
    * Returns the position of the first occurrence of the pattern string in the target string,
    * starting from the specified position (0-based index referring to character position in
@@ -573,30 +798,76 @@ public static int findInSet(final UTF8String match, final UTF8String set, int co
   public static int lowercaseIndexOf(final UTF8String target, final UTF8String pattern,
       final int start) {
     if (pattern.numChars() == 0) return target.indexOfEmpty(start);
-    return lowercaseFind(target, pattern.toLowerCase(), start);
+    if (target.isFullAscii() && pattern.isFullAscii()) {
+      return target.toLowerCase().indexOf(pattern.toLowerCase(), start);
+    }
+    return lowercaseIndexOfSlow(target, pattern, start);
+  }
+
+  private static int lowercaseIndexOfSlow(final UTF8String target, final UTF8String pattern,
+      final int start) {
+    return lowercaseFind(target, lowerCaseCodePoints(pattern), start);
   }
 
   public static int indexOf(final UTF8String target, final UTF8String pattern,
       final int start, final int collationId) {
     if (pattern.numBytes() == 0) return target.indexOfEmpty(start);
     if (target.numBytes() == 0) return MATCH_NOT_FOUND;
-
-    StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
-    stringSearch.setIndex(start);
-
-    return stringSearch.next();
+    // Initialize the string search with respect to the specified ICU collation.
+    String targetStr = target.toValidString();
+    String patternStr = pattern.toValidString();
+    // Check if `start` is out of bounds. The provided offset `start` is given in number of
+    // codepoints, so a simple `targetStr.length` check is not sufficient here. This check is
+    // needed because `String.offsetByCodePoints` throws an `IndexOutOfBoundsException`
+    // exception when the offset is out of bounds.
+    if (targetStr.codePointCount(0, targetStr.length()) <= start) return MATCH_NOT_FOUND;
+    StringSearch stringSearch =
+      CollationFactory.getStringSearch(targetStr, patternStr, collationId);
+    stringSearch.setOverlapping(true);
+    // Start the search from `start`-th code point (NOT necessarily from the `start`-th character).
+    int startIndex = targetStr.offsetByCodePoints(0, start);
+    stringSearch.setIndex(startIndex);
+    // Perform the search and return the next result, starting from the specified position.
+    int searchIndex = stringSearch.next();
+    if (searchIndex == StringSearch.DONE) {
+      return MATCH_NOT_FOUND;
+    }
+    // Convert the search index from character count to code point count.
+    int indexOf = targetStr.codePointCount(0, searchIndex);
+    if (indexOf < start) {
+      return MATCH_NOT_FOUND;
+    }
+    return indexOf;
   }
 
-  public static int find(UTF8String target, UTF8String pattern, int start,
-      int collationId) {
-    assert (pattern.numBytes() > 0);
-
-    StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
-    // Set search start position (start from character at start position)
-    stringSearch.setIndex(target.bytePosToChar(start));
+  private static int findIndex(final StringSearch stringSearch, int count) {
+    assert(count >= 0);
+    int index = 0;
+    while (count > 0) {
+      int nextIndex = stringSearch.next();
+      if (nextIndex == StringSearch.DONE) {
+        return MATCH_NOT_FOUND;
+      } else if (nextIndex == index && index != 0) {
+        stringSearch.setIndex(stringSearch.getIndex() + stringSearch.getMatchLength());
+      } else {
+        count--;
+        index = nextIndex;
+      }
+    }
+    return index;
+  }
 
-    // Return either the byte position or -1 if not found
-    return target.charPosToByte(stringSearch.next());
+  private static int findIndexReverse(final StringSearch stringSearch, int count) {
+    assert(count >= 0);
+    int index = 0;
+    while (count > 0) {
+      index = stringSearch.previous();
+      if (index == StringSearch.DONE) {
+        return MATCH_NOT_FOUND;
+      }
+      count--;
+    }
+    return index + stringSearch.getMatchLength();
   }
 
   public static UTF8String subStringIndex(final UTF8String string, final UTF8String delimiter,
@@ -604,63 +875,30 @@ public static UTF8String subStringIndex(final UTF8String string, final UTF8Strin
     if (delimiter.numBytes() == 0 || count == 0 || string.numBytes() == 0) {
       return UTF8String.EMPTY_UTF8;
     }
+    String str = string.toValidString();
+    String delim = delimiter.toValidString();
+    StringSearch stringSearch = CollationFactory.getStringSearch(str, delim, collationId);
+    stringSearch.setOverlapping(true);
     if (count > 0) {
-      int idx = -1;
-      while (count > 0) {
-        idx = find(string, delimiter, idx + 1, collationId);
-        if (idx >= 0) {
-          count --;
-        } else {
-          // can not find enough delim
-          return string;
-        }
-      }
-      if (idx == 0) {
+      // If the count is positive, we search for the count-th delimiter from the left.
+      int searchIndex = findIndex(stringSearch, count);
+      if (searchIndex == MATCH_NOT_FOUND) {
+        return string;
+      } else if (searchIndex == 0) {
         return UTF8String.EMPTY_UTF8;
+      } else {
+        return UTF8String.fromString(str.substring(0, searchIndex));
       }
-      byte[] bytes = new byte[idx];
-      copyMemory(string.getBaseObject(), string.getBaseOffset(), bytes, BYTE_ARRAY_OFFSET, idx);
-      return UTF8String.fromBytes(bytes);
-
     } else {
-      count = -count;
-
-      StringSearch stringSearch = CollationFactory
-        .getStringSearch(string, delimiter, collationId);
-
-      int start = string.numChars() - 1;
-      int lastMatchLength = 0;
-      int prevStart = -1;
-      while (count > 0) {
-        stringSearch.reset();
-        prevStart = -1;
-        int matchStart = stringSearch.next();
-        lastMatchLength = stringSearch.getMatchLength();
-        while (matchStart <= start) {
-          if (matchStart != StringSearch.DONE) {
-            // Found a match, update the start position
-            prevStart = matchStart;
-            matchStart = stringSearch.next();
-          } else {
-            break;
-          }
-        }
-
-        if (prevStart == -1) {
-          // can not find enough delim
+      // If the count is negative, we search for the count-th delimiter from the right.
+      int searchIndex = findIndexReverse(stringSearch, -count);
+      if (searchIndex == MATCH_NOT_FOUND) {
           return string;
-        } else {
-          start = prevStart - 1;
-          count--;
-        }
-      }
-
-      int resultStart = prevStart + lastMatchLength;
-      if (resultStart == string.numChars()) {
-        return UTF8String.EMPTY_UTF8;
+      } else if (searchIndex == str.length()) {
+          return UTF8String.EMPTY_UTF8;
+      } else {
+          return UTF8String.fromString(str.substring(searchIndex));
       }
-
-      return string.substring(resultStart, string.numChars());
     }
   }
 
@@ -670,7 +908,7 @@ public static UTF8String lowercaseSubStringIndex(final UTF8String string,
       return UTF8String.EMPTY_UTF8;
     }
 
-    UTF8String lowercaseDelimiter = delimiter.toLowerCase();
+    UTF8String lowercaseDelimiter = lowerCaseCodePoints(delimiter);
 
     if (count > 0) {
       // Search left to right (note: the start code point is inclusive).
@@ -750,11 +988,11 @@ public static UTF8String lowercaseTranslate(final UTF8String input,
       }
       // Special handling for letter i (U+0069) followed by a combining dot (U+0307). By ensuring
       // that `CODE_POINT_LOWERCASE_I` is buffered, we guarantee finding a max-length match.
-      if (lowercaseDict.containsKey(CODE_POINT_COMBINED_LOWERCASE_I_DOT) &&
-          codePoint == CODE_POINT_LOWERCASE_I && inputIter.hasNext()) {
+      if (lowercaseDict.containsKey(COMBINED_ASCII_SMALL_I_COMBINING_DOT)
+          && codePoint == SpecialCodePointConstants.ASCII_SMALL_I && inputIter.hasNext()) {
         int nextCodePoint = inputIter.next();
-        if (nextCodePoint == CODE_POINT_COMBINING_DOT) {
-          codePoint = CODE_POINT_COMBINED_LOWERCASE_I_DOT;
+        if (nextCodePoint == SpecialCodePointConstants.COMBINING_DOT) {
+          codePoint = COMBINED_ASCII_SMALL_I_COMBINING_DOT;
         } else {
           codePointBuffer = nextCodePoint;
         }
@@ -842,6 +1080,24 @@ public static UTF8String translate(final UTF8String input,
     return UTF8String.fromString(sb.toString());
   }
 
+  /**
+   * Trims the `srcString` string from both ends of the string using the specified `trimString`
+   * characters, with respect to the UTF8_BINARY trim collation. String trimming is performed by
+   * first trimming the left side of the string, and then trimming the right side of the string.
+   * The method returns the trimmed string. If the `trimString` is null, the method returns null.
+   *
+   * @param srcString the input string to be trimmed from both ends of the string
+   * @param trimString the trim string characters to trim
+   * @param collationId the collation ID to use for string trim
+   * @return the trimmed string (for UTF8_BINARY collation)
+   */
+  public static UTF8String binaryTrim(
+      final UTF8String srcString,
+      final UTF8String trimString,
+      final int collationId) {
+    return binaryTrimRight(srcString.trimLeft(trimString), trimString, collationId);
+  }
+
   /**
    * Trims the `srcString` string from both ends of the string using the specified `trimString`
    * characters, with respect to the UTF8_LCASE collation. String trimming is performed by
@@ -850,12 +1106,14 @@ public static UTF8String translate(final UTF8String input,
    *
    * @param srcString the input string to be trimmed from both ends of the string
    * @param trimString the trim string characters to trim
+   * @param collationId the collation ID to use for string trim
    * @return the trimmed string (for UTF8_LCASE collation)
    */
   public static UTF8String lowercaseTrim(
       final UTF8String srcString,
-      final UTF8String trimString) {
-    return lowercaseTrimRight(lowercaseTrimLeft(srcString, trimString), trimString);
+      final UTF8String trimString,
+      final int collationId) {
+    return lowercaseTrimRight(lowercaseTrimLeft(srcString, trimString), trimString, collationId);
   }
 
   /**
@@ -883,7 +1141,8 @@ public static UTF8String trim(
    * the left side, until reaching a character whose lowercased code point is not in the hash set.
    * Finally, the method returns the substring from that position to the end of `srcString`.
    * If `trimString` is null, null is returned. If `trimString` is empty, `srcString` is returned.
-   *
+   * Note: as currently only trimming collation supported is RTRIM, trimLeft is not modified
+   * to support other trim collations, this should be done in case of adding TRIM and LTRIM.
    * @param srcString the input string to be trimmed from the left end of the string
    * @param trimString the trim string characters to trim
    * @return the trimmed string (for UTF8_LCASE collation)
@@ -902,20 +1161,29 @@ public static UTF8String lowercaseTrimLeft(
     while (trimIter.hasNext()) trimChars.add(getLowercaseCodePoint(trimIter.next()));
 
     // Iterate over `srcString` from the left to find the first character that is not in the set.
-    int searchIndex = 0, codePoint;
+    int searchIndex = 0, codePoint, codePointBuffer = -1;
     Iterator<Integer> srcIter = srcString.codePointIterator();
     while (srcIter.hasNext()) {
-      codePoint = getLowercaseCodePoint(srcIter.next());
+      // Get the next code point from either the buffer or the iterator.
+      if (codePointBuffer != -1) {
+        codePoint = codePointBuffer;
+        codePointBuffer = -1;
+      }
+      else {
+        codePoint = getLowercaseCodePoint(srcIter.next());
+      }
       // Special handling for Turkish dotted uppercase letter I.
-      if (codePoint == CODE_POINT_LOWERCASE_I && srcIter.hasNext() &&
-          trimChars.contains(CODE_POINT_COMBINED_LOWERCASE_I_DOT)) {
-        int nextCodePoint = getLowercaseCodePoint(srcIter.next());
-        if ((trimChars.contains(codePoint) && trimChars.contains(nextCodePoint))
-          || nextCodePoint == CODE_POINT_COMBINING_DOT) {
+      if (codePoint == SpecialCodePointConstants.ASCII_SMALL_I && srcIter.hasNext() &&
+          trimChars.contains(COMBINED_ASCII_SMALL_I_COMBINING_DOT)) {
+        codePointBuffer = codePoint;
+        codePoint = getLowercaseCodePoint(srcIter.next());
+        if (codePoint == SpecialCodePointConstants.COMBINING_DOT) {
           searchIndex += 2;
-        }
-        else {
-          if (trimChars.contains(codePoint)) ++searchIndex;
+          codePointBuffer = -1;
+        } else if (trimChars.contains(codePointBuffer)) {
+          ++searchIndex;
+          codePointBuffer = codePoint;
+        } else {
           break;
         }
       } else if (trimChars.contains(codePoint)) {
@@ -937,7 +1205,9 @@ public static UTF8String lowercaseTrimLeft(
    * character in `trimString`, until reaching a character that is not found in `trimString`.
    * Finally, the method returns the substring from that position to the end of `srcString`.
    * If `trimString` is null, null is returned. If `trimString` is empty, `srcString` is returned.
-   *
+   * Note: as currently only trimming collation supported is RTRIM, trimLeft is not modified
+   * to support other trim collations, this should be done in case of adding TRIM and LTRIM
+   * collation.
    * @param srcString the input string to be trimmed from the left end of the string
    * @param trimString the trim string characters to trim
    * @param collationId the collation ID to use for string trimming
@@ -957,7 +1227,7 @@ public static UTF8String trimLeft(
       CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID);
     while (trimIter.hasNext()) {
       int codePoint = trimIter.next();
-      trimChars.putIfAbsent(codePoint, String.valueOf((char) codePoint));
+      trimChars.putIfAbsent(codePoint, new String(Character.toChars(codePoint)));
     }
 
     // Iterate over srcString from the left and find the first character that is not in trimChars.
@@ -985,22 +1255,103 @@ public static UTF8String trimLeft(
     // Return the substring from the calculated position until the end of the string.
     return UTF8String.fromString(src.substring(charIndex));
   }
+  /**
+   * Trims the `srcString` string from the right side using the specified `trimString` characters,
+   * with respect to the UTF8_BINARY trim collation. For UTF8_BINARY trim collation, the method has
+   * one special case to cover with respect to trimRight function for regular UTF8_Binary collation.
+   * Trailing spaces should be ignored in case of trim collation (rtrim for example) and if
+   * trimString does not contain spaces. In this case, the method trims the string from the right
+   * and after call of regular trim functions returns back trimmed spaces as those should not get
+   * removed.
+   * @param srcString the input string to be trimmed from the right end of the string
+   * @param trimString the trim string characters to trim
+   * @param collationId the collation ID to use for string trim
+   * @return the trimmed string (for UTF_BINARY collation)
+   */
+  public static UTF8String binaryTrimRight(
+      final UTF8String srcString,
+      final UTF8String trimString,
+      final int collationId) {
+    // Matching the default UTF8String behavior for null `trimString`.
+    if (trimString == null) {
+      return null;
+    }
+
+    // Create a hash set of code points for all characters of `trimString`.
+    HashSet<Integer> trimChars = new HashSet<>();
+    Iterator<Integer> trimIter = trimString.codePointIterator();
+    while (trimIter.hasNext()) trimChars.add(trimIter.next());
+
+    // Iterate over `srcString` from the right to find the first character that is not in the set.
+    int searchIndex = srcString.numChars(), codePoint, codePointBuffer = -1;
+
+    // In cases of trim collation (rtrim for example) trailing spaces should be ignored.
+    // If trimString contains spaces this behaviour is not important as they would get trimmed
+    // anyway. However, if it is not the case they should be ignored and then appended after
+    // trimming other characters.
+    int lastNonSpaceByteIdx = srcString.numBytes(), lastNonSpaceCharacterIdx = srcString.numChars();
+    if (!trimChars.contains(SpecialCodePointConstants.ASCII_SPACE) &&
+      CollationFactory.ignoresSpacesInTrimFunctions(
+        collationId, /*isLTrim=*/ false, /*isRTrim=*/true)) {
+      while (lastNonSpaceByteIdx > 0 &&
+        srcString.getByte(lastNonSpaceByteIdx - 1) == ' ') {
+        --lastNonSpaceByteIdx;
+      }
+      // In case of src string contains only spaces there is no need to do any trimming, since it's
+      // already checked that trim string does not contain any spaces.
+      if (lastNonSpaceByteIdx == 0) {
+        return srcString;
+      }
+      searchIndex = lastNonSpaceCharacterIdx =
+        srcString.numChars() - (srcString.numBytes() - lastNonSpaceByteIdx);
+    }
+    Iterator<Integer> srcIter = srcString.reverseCodePointIterator();
+    for (int i = lastNonSpaceCharacterIdx; i < srcString.numChars(); i++) {
+      srcIter.next();
+    }
+
+    while (srcIter.hasNext()) {
+      codePoint = srcIter.next();
+      if (trimChars.contains(codePoint)) {
+        --searchIndex;
+      }
+      else {
+        break;
+      }
+    }
+
+    // Return the substring from the start of the string to the calculated position and append
+    // trailing spaces if they were ignored
+    if (searchIndex == srcString.numChars()) {
+      return srcString;
+    }
+    if (lastNonSpaceCharacterIdx == srcString.numChars()) {
+      return srcString.substring(0, searchIndex);
+    }
+    return UTF8String.concat(
+      srcString.substring(0, searchIndex),
+      srcString.substring(lastNonSpaceCharacterIdx, srcString.numChars()));
+  }
 
   /**
    * Trims the `srcString` string from the right side using the specified `trimString` characters,
    * with respect to the UTF8_LCASE collation. For UTF8_LCASE, the method first creates a hash
    * set of lowercased code points in `trimString`, and then iterates over the `srcString` from
    * the right side, until reaching a character whose lowercased code point is not in the hash set.
+   * In case of UTF8_LCASE trim collation and when trimString does not contain spaces, trailing
+   * spaces should be ignored. However, after trimming function call they should be appended back.
    * Finally, the method returns the substring from the start of `srcString` until that position.
    * If `trimString` is null, null is returned. If `trimString` is empty, `srcString` is returned.
    *
    * @param srcString the input string to be trimmed from the right end of the string
    * @param trimString the trim string characters to trim
+   * @param collationId the collation ID to use for string trim
    * @return the trimmed string (for UTF8_LCASE collation)
    */
   public static UTF8String lowercaseTrimRight(
       final UTF8String srcString,
-      final UTF8String trimString) {
+      final UTF8String trimString,
+      final int collationId) {
     // Matching the default UTF8String behavior for null `trimString`.
     if (trimString == null) {
       return null;
@@ -1012,20 +1363,53 @@ public static UTF8String lowercaseTrimRight(
     while (trimIter.hasNext()) trimChars.add(getLowercaseCodePoint(trimIter.next()));
 
     // Iterate over `srcString` from the right to find the first character that is not in the set.
-    int searchIndex = srcString.numChars(), codePoint;
+    int searchIndex = srcString.numChars(), codePoint, codePointBuffer = -1;
+
+    // In cases of trim collation (rtrim for example) trailing spaces should be ignored.
+    // If trimString contains spaces this behaviour is not important as they would get trimmed
+    // anyway. However, if it is not the case they should be ignored and then appended after
+    // trimming other characters.
+    int lastNonSpaceByteIdx = srcString.numBytes(), lastNonSpaceCharacterIdx = srcString.numChars();
+    if (!trimChars.contains(SpecialCodePointConstants.ASCII_SPACE) &&
+      CollationFactory.ignoresSpacesInTrimFunctions(
+        collationId, /*isLTrim=*/ false, /*isRTrim=*/true)) {
+      while (lastNonSpaceByteIdx > 0 &&
+        srcString.getByte(lastNonSpaceByteIdx - 1) == ' ') {
+        --lastNonSpaceByteIdx;
+      }
+      // In case of src string contains only spaces there is no need to do any trimming, since it's
+      // already checked that trim string does not contain any spaces.
+      if (lastNonSpaceByteIdx == 0) {
+        return srcString;
+      }
+      searchIndex = lastNonSpaceCharacterIdx =
+        srcString.numChars() - (srcString.numBytes() - lastNonSpaceByteIdx);
+    }
     Iterator<Integer> srcIter = srcString.reverseCodePointIterator();
+    for (int i = lastNonSpaceCharacterIdx; i < srcString.numChars(); i++) {
+      srcIter.next();
+    }
+
     while (srcIter.hasNext()) {
-      codePoint = getLowercaseCodePoint(srcIter.next());
+      if (codePointBuffer != -1) {
+        codePoint = codePointBuffer;
+        codePointBuffer = -1;
+      }
+      else {
+        codePoint = getLowercaseCodePoint(srcIter.next());
+      }
       // Special handling for Turkish dotted uppercase letter I.
-      if (codePoint == CODE_POINT_COMBINING_DOT && srcIter.hasNext() &&
-          trimChars.contains(CODE_POINT_COMBINED_LOWERCASE_I_DOT)) {
-        int nextCodePoint = getLowercaseCodePoint(srcIter.next());
-        if ((trimChars.contains(codePoint) && trimChars.contains(nextCodePoint))
-          || nextCodePoint == CODE_POINT_LOWERCASE_I) {
+      if (codePoint == SpecialCodePointConstants.COMBINING_DOT && srcIter.hasNext() &&
+          trimChars.contains(COMBINED_ASCII_SMALL_I_COMBINING_DOT)) {
+        codePointBuffer = codePoint;
+        codePoint = getLowercaseCodePoint(srcIter.next());
+        if (codePoint == SpecialCodePointConstants.ASCII_SMALL_I) {
           searchIndex -= 2;
-        }
-        else {
-          if (trimChars.contains(codePoint)) --searchIndex;
+          codePointBuffer = -1;
+        } else if (trimChars.contains(codePointBuffer)) {
+          --searchIndex;
+          codePointBuffer = codePoint;
+        } else {
           break;
         }
       } else if (trimChars.contains(codePoint)) {
@@ -1036,8 +1420,17 @@ public static UTF8String lowercaseTrimRight(
       }
     }
 
-    // Return the substring from the start of the string to the calculated position.
-    return searchIndex == srcString.numChars() ? srcString : srcString.substring(0, searchIndex);
+    // Return the substring from the start of the string to the calculated position and append
+    // trailing spaces if they were ignored
+    if (searchIndex == srcString.numChars()) {
+      return srcString;
+    }
+    if (lastNonSpaceCharacterIdx == srcString.numChars()) {
+      return srcString.substring(0, searchIndex);
+    }
+    return UTF8String.concat(
+      srcString.substring(0, searchIndex),
+      srcString.substring(lastNonSpaceCharacterIdx, srcString.numChars()));
   }
 
   /**
@@ -1067,14 +1460,33 @@ public static UTF8String trimRight(
       CodePointIteratorType.CODE_POINT_ITERATOR_MAKE_VALID);
     while (trimIter.hasNext()) {
       int codePoint = trimIter.next();
-      trimChars.putIfAbsent(codePoint, String.valueOf((char) codePoint));
+      trimChars.putIfAbsent(codePoint, new String(Character.toChars(codePoint)));
     }
 
     // Iterate over srcString from the left and find the first character that is not in trimChars.
     String src = srcString.toValidString();
     CharacterIterator target = new StringCharacterIterator(src);
     Collator collator = CollationFactory.fetchCollation(collationId).collator;
-    int charIndex = src.length(), longestMatchLen;
+    int charIndex = src.length(), longestMatchLen, lastNonSpacePosition = src.length();
+
+    // In cases of trim collation (rtrim for example) trailing spaces should be ignored.
+    // If trimString contains spaces this behaviour is not important as they would get trimmed
+    // anyway. However, if it is not the case they should be ignored and then appended after
+    // trimming other characters.
+    if (!trimChars.containsKey(SpecialCodePointConstants.ASCII_SPACE) &&
+      CollationFactory.ignoresSpacesInTrimFunctions(
+        collationId, /*isLTrim=*/ false, /*isRTrim=*/true)) {
+      while (lastNonSpacePosition > 0 && src.charAt(lastNonSpacePosition - 1) == ' ') {
+        --lastNonSpacePosition;
+      }
+      // In case of src string contains only spaces there is no need to do any trimming, since it's
+      // already checked that trim string does not contain any spaces.
+      if (lastNonSpacePosition == 0) {
+        return UTF8String.fromString(src);
+      }
+      charIndex = lastNonSpacePosition;
+    }
+
     while (charIndex >= 0) {
       longestMatchLen = 0;
       for (String trim : trimChars.values()) {
@@ -1102,8 +1514,91 @@ public static UTF8String trimRight(
       else charIndex -= longestMatchLen;
     }
 
-    // Return the substring from the start of the string until that position.
-    return UTF8String.fromString(src.substring(0, charIndex));
+    // Return the substring from the start of the string until that position and append
+    // trailing spaces if they were ignored
+    if (charIndex == src.length()) {
+      return srcString;
+    }
+    if (lastNonSpacePosition == srcString.numChars()) {
+      return UTF8String.fromString(src.substring(0, charIndex));
+    }
+    return UTF8String.fromString(
+      src.substring(0, charIndex) +
+      src.substring(lastNonSpacePosition)
+    );
+  }
+
+  public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim,
+      final int limit, final int collationId) {
+    if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) {
+      return input.split(delim, limit);
+    } else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) {
+      return lowercaseSplitSQL(input, delim, limit);
+    } else {
+      return icuSplitSQL(input, delim, limit, collationId);
+    }
+  }
+
+  public static UTF8String[] lowercaseSplitSQL(final UTF8String string, final UTF8String delimiter,
+      final int limit) {
+    if (delimiter.numBytes() == 0) return new UTF8String[] { string };
+    if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
+
+    List<UTF8String> strings = new ArrayList<>();
+    UTF8String lowercaseDelimiter = lowerCaseCodePoints(delimiter);
+    int startIndex = 0, nextMatch = 0, nextMatchLength;
+    while (nextMatch != MATCH_NOT_FOUND) {
+      if (limit > 0 && strings.size() == limit - 1) {
+        break;
+      }
+      nextMatch = lowercaseFind(string, lowercaseDelimiter, startIndex);
+      if (nextMatch != MATCH_NOT_FOUND) {
+        nextMatchLength = lowercaseMatchLengthFrom(string, lowercaseDelimiter, nextMatch);
+        strings.add(string.substring(startIndex, nextMatch));
+        startIndex = nextMatch + nextMatchLength;
+      }
+    }
+    if (startIndex <= string.numChars()) {
+      strings.add(string.substring(startIndex, string.numChars()));
+    }
+    if (limit == 0) {
+      // Remove trailing empty strings
+      int i = strings.size() - 1;
+      while (i >= 0 && strings.get(i).numBytes() == 0) {
+        strings.remove(i);
+        i--;
+      }
+    }
+    return strings.toArray(new UTF8String[0]);
+  }
+
+  public static UTF8String[] icuSplitSQL(final UTF8String string, final UTF8String delimiter,
+      final int limit, final int collationId) {
+    if (delimiter.numBytes() == 0) return new UTF8String[] { string };
+    if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
+    List<UTF8String> strings = new ArrayList<>();
+    String target = string.toValidString(), pattern = delimiter.toValidString();
+    StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
+    int start = 0, end;
+    while ((end = stringSearch.next()) != StringSearch.DONE) {
+      if (limit > 0 && strings.size() == limit - 1) {
+        break;
+      }
+      strings.add(UTF8String.fromString(target.substring(start, end)));
+      start = end + stringSearch.getMatchLength();
+    }
+    if (start <= target.length()) {
+      strings.add(UTF8String.fromString(target.substring(start)));
+    }
+    if (limit == 0) {
+      // Remove trailing empty strings
+      int i = strings.size() - 1;
+      while (i >= 0 && strings.get(i).numBytes() == 0) {
+        strings.remove(i);
+        i--;
+      }
+    }
+    return strings.toArray(new UTF8String[0]);
   }
 
   // TODO: Add more collation-aware UTF8String operations here.
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
index f13f66e384e0f..4064f830e92d8 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java
@@ -23,12 +23,14 @@
 import java.util.function.Function;
 import java.util.function.BiFunction;
 import java.util.function.ToLongFunction;
+import java.util.stream.Stream;
 
+import com.ibm.icu.text.CollationKey;
+import com.ibm.icu.text.Collator;
 import com.ibm.icu.text.RuleBasedCollator;
 import com.ibm.icu.text.StringSearch;
 import com.ibm.icu.util.ULocale;
-import com.ibm.icu.text.CollationKey;
-import com.ibm.icu.text.Collator;
+import com.ibm.icu.util.VersionInfo;
 
 import org.apache.spark.SparkException;
 import org.apache.spark.unsafe.types.UTF8String;
@@ -88,6 +90,18 @@ public Optional<String> getVersion() {
     }
   }
 
+  public record CollationMeta(
+    String catalog,
+    String schema,
+    String collationName,
+    String language,
+    String country,
+    String icuVersion,
+    String padAttribute,
+    boolean accentSensitivity,
+    boolean caseSensitivity,
+    String spaceTrimming) { }
+
   /**
    * Entry encapsulating all information about a collation.
    */
@@ -99,7 +113,8 @@ public static class Collation {
 
     /**
      * Version of the collation. This is the version of the ICU library Collator.
-     * For non-ICU collations (e.g. UTF8 Binary) the version is set to "1.0".
+     * For UTF8 Binary the version is set to "1.0". For ICU collations and UTF8_LCASE
+     * (because it uses ICU mappings) the version is set to the version of the ICU library.
      * When using ICU Collator this version is exposed through collator.getVersion().
      * Whenever the collation is updated, the version should be updated as well or kept
      * for backwards compatibility.
@@ -133,13 +148,31 @@ public static class Collation {
     public final boolean supportsBinaryOrdering;
 
     /**
-     * Support for Lowercase Equality implies that it is possible to check equality on
-     * byte by byte level, but only after calling "UTF8String.toLowerCase" on both arguments.
+     * Support for Lowercase Equality implies that it is possible to check equality on byte by
+     * byte level, but only after calling "UTF8String.lowerCaseCodePoints" on both arguments.
      * This allows custom collation support for UTF8_LCASE collation in various Spark
      * expressions, as this particular collation is not supported by the external ICU library.
      */
     public final boolean supportsLowercaseEquality;
 
+    /**
+     * Support for Space Trimming implies that that based on specifier (for now only right trim)
+     * leading, trailing or both spaces are removed from the input string before comparison.
+     */
+    public final boolean supportsSpaceTrimming;
+
+    /**
+     * Is Utf8 binary type as indicator if collation base type is UTF8 binary. Note currently only
+     * collations Utf8_Binary and Utf8_Binary_RTRIM are considered as Utf8 binary type.
+     */
+    public final boolean isUtf8BinaryType;
+
+    /**
+     * Is Utf8 lcase type as indicator if collation base type is UTF8 lcase. Note currently only
+     * collations Utf8_Lcase and Utf8_Lcase_RTRIM are considered as Utf8 Lcase type.
+     */
+    public final boolean isUtf8LcaseType;
+
     public Collation(
         String collationName,
         String provider,
@@ -147,31 +180,27 @@ public Collation(
         Comparator<UTF8String> comparator,
         String version,
         ToLongFunction<UTF8String> hashFunction,
-        boolean supportsBinaryEquality,
-        boolean supportsBinaryOrdering,
-        boolean supportsLowercaseEquality) {
+        BiFunction<UTF8String, UTF8String, Boolean> equalsFunction,
+        boolean isUtf8BinaryType,
+        boolean isUtf8LcaseType,
+        boolean supportsSpaceTrimming) {
       this.collationName = collationName;
       this.provider = provider;
       this.collator = collator;
       this.comparator = comparator;
       this.version = version;
       this.hashFunction = hashFunction;
-      this.supportsBinaryEquality = supportsBinaryEquality;
-      this.supportsBinaryOrdering = supportsBinaryOrdering;
-      this.supportsLowercaseEquality = supportsLowercaseEquality;
-
-      // De Morgan's Law to check supportsBinaryOrdering => supportsBinaryEquality
-      assert(!supportsBinaryOrdering || supportsBinaryEquality);
+      this.isUtf8BinaryType = isUtf8BinaryType;
+      this.isUtf8LcaseType = isUtf8LcaseType;
+      this.equalsFunction = equalsFunction;
+      this.supportsSpaceTrimming = supportsSpaceTrimming;
+      this.supportsBinaryEquality = !supportsSpaceTrimming && isUtf8BinaryType;
+      this.supportsBinaryOrdering = !supportsSpaceTrimming && isUtf8BinaryType;
+      this.supportsLowercaseEquality = !supportsSpaceTrimming && isUtf8LcaseType;
       // No Collation can simultaneously support binary equality and lowercase equality
       assert(!supportsBinaryEquality || !supportsLowercaseEquality);
 
       assert(SUPPORTED_PROVIDERS.contains(provider));
-
-      if (supportsBinaryEquality) {
-        this.equalsFunction = UTF8String::equals;
-      } else {
-        this.equalsFunction = (s1, s2) -> this.comparator.compare(s1, s2) == 0;
-      }
     }
 
     /**
@@ -186,7 +215,8 @@ public Collation(
      * bit 29:    0 for UTF8_BINARY, 1 for ICU collations.
      * bit 28-24: Reserved.
      * bit 23-22: Reserved for version.
-     * bit 21-18: Reserved for space trimming.
+     * bit 21-19 Zeros, reserved for future trimmings.
+     * bit 18 0 = none, 1 = right trim.
      * bit 17-0:  Depend on collation family.
      * ---
      * INDETERMINATE collation ID binary layout:
@@ -201,7 +231,8 @@ public Collation(
      * UTF8_BINARY collation ID binary layout:
      * bit 31-24: Zeroes.
      * bit 23-22: Zeroes, reserved for version.
-     * bit 21-18: Zeroes, reserved for space trimming.
+     * bit 21-19 Zeros, reserved for future trimmings.
+     * bit 18 0 = none, 1 = right trim.
      * bit 17-3:  Zeroes.
      * bit 2:     0, reserved for accent sensitivity.
      * bit 1:     0, reserved for uppercase and case-insensitive.
@@ -212,7 +243,8 @@ public Collation(
      * bit 29:    1
      * bit 28-24: Zeroes.
      * bit 23-22: Zeroes, reserved for version.
-     * bit 21-18: Zeroes, reserved for space trimming.
+     * bit 21-18: Reserved for space trimming.
+     * 0000 = none, 0001 = right trim. Bits 21-19 remain reserved and fixed to 0.
      * bit 17:    0 = case-sensitive, 1 = case-insensitive.
      * bit 16:    0 = accent-sensitive, 1 = accent-insensitive.
      * bit 15-14: Zeroes, reserved for punctuation sensitivity.
@@ -220,14 +252,20 @@ public Collation(
      * bit 11-0:  Locale ID as specified in `ICULocaleToId` mapping.
      * ---
      * Some illustrative examples of collation name to ID mapping:
-     * - UTF8_BINARY       -> 0
-     * - UTF8_LCASE        -> 1
-     * - UNICODE           -> 0x20000000
-     * - UNICODE_AI        -> 0x20010000
-     * - UNICODE_CI        -> 0x20020000
-     * - UNICODE_CI_AI     -> 0x20030000
-     * - af                -> 0x20000001
-     * - af_CI_AI          -> 0x20030001
+     * - UTF8_BINARY        -> 0
+     * - UTF8_BINARY_RTRIM  -> 0x00040000
+     * - UTF8_LCASE         -> 1
+     * - UTF8_LCASE_RTRIM   -> 0x00040001
+     * - UNICODE            -> 0x20000000
+     * - UNICODE_AI         -> 0x20010000
+     * - UNICODE_CI         -> 0x20020000
+     * - UNICODE_RTRIM      -> 0x20040000
+     * - UNICODE_CI_AI      -> 0x20030000
+     * - UNICODE_CI_RTRIM   -> 0x20060000
+     * - UNICODE_AI_RTRIM   -> 0x20050000
+     * - UNICODE_CI_AI_RTRIM-> 0x20070000
+     * - af                 -> 0x20000001
+     * - af_CI_AI           -> 0x20030001
      */
     private abstract static class CollationSpec {
 
@@ -246,6 +284,14 @@ protected enum ImplementationProvider {
         UTF8_BINARY, ICU
       }
 
+      /**
+       * Bit 18 in collation ID having value 0 for none and 1 for right trimming.
+       * Bits 21, 20, 19 remained reserved (and fixed to 0) for future use.
+       */
+      protected enum SpaceTrimming {
+        NONE, RTRIM
+      }
+
       /**
        * Offset in binary collation ID layout.
        */
@@ -266,6 +312,17 @@ protected enum ImplementationProvider {
        */
       protected static final int IMPLEMENTATION_PROVIDER_MASK = 0b1;
 
+
+      /**
+       * Offset in binary collation ID layout.
+       */
+      protected static final int SPACE_TRIMMING_OFFSET = 18;
+
+      /**
+       * Bitmask corresponding to width in bits in binary collation ID layout.
+       */
+      protected static final int SPACE_TRIMMING_MASK = 0b1;
+
       private static final int INDETERMINATE_COLLATION_ID = -1;
 
       /**
@@ -290,6 +347,45 @@ private static DefinitionOrigin getDefinitionOrigin(int collationId) {
           DEFINITION_ORIGIN_OFFSET, DEFINITION_ORIGIN_MASK)];
       }
 
+      /**
+       * Utility function to retrieve `SpaceTrimming` enum instance from collation ID.
+       */
+      protected static SpaceTrimming getSpaceTrimming(int collationId) {
+        return SpaceTrimming.values()[SpecifierUtils.getSpecValue(collationId,
+          SPACE_TRIMMING_OFFSET, SPACE_TRIMMING_MASK)];
+      }
+
+      protected static UTF8String applyTrimmingPolicy(UTF8String s, int collationId) {
+        return applyTrimmingPolicy(s, getSpaceTrimming(collationId));
+      }
+
+      /**
+       * Returns if leading/trailing spaces should be ignored in trim string expressions. This is
+       * needed because space trimming collation directly changes behaviour of trim functions.
+       */
+      protected static boolean ignoresSpacesInTrimFunctions(
+          int collationId,
+          boolean isLTrim,
+          boolean isRTrim) {
+        if (isRTrim && getSpaceTrimming(collationId) == SpaceTrimming.RTRIM) {
+          return true;
+        }
+
+        // In case of adding new trimming collations in the future (LTRIM and TRIM) here logic
+        // should be added.
+        return false;
+      }
+
+      /**
+       * Utility function to trim spaces when collation uses space trimming.
+       */
+      protected static UTF8String applyTrimmingPolicy(UTF8String s, SpaceTrimming spaceTrimming) {
+        if(spaceTrimming == SpaceTrimming.RTRIM){
+          return s.trimRight();
+        }
+        return s; // No trimming.
+      }
+
       /**
        * Main entry point for retrieving `Collation` instance from collation ID.
        */
@@ -342,6 +438,25 @@ private static int collationNameToId(String collationName) throws SparkException
       }
 
       protected abstract Collation buildCollation();
+
+      protected abstract CollationMeta buildCollationMeta();
+
+      protected abstract String normalizedCollationName();
+
+      static List<CollationIdentifier> listCollations() {
+        return Stream.concat(
+          CollationSpecUTF8.listCollations().stream(),
+          CollationSpecICU.listCollations().stream()).toList();
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) {
+        CollationMeta collationSpecUTF8 =
+          CollationSpecUTF8.loadCollationMeta(collationIdentifier);
+        if (collationSpecUTF8 == null) {
+          return CollationSpecICU.loadCollationMeta(collationIdentifier);
+        }
+        return collationSpecUTF8;
+      }
     }
 
     private static class CollationSpecUTF8 extends CollationSpec {
@@ -364,68 +479,227 @@ private enum CaseSensitivity {
        */
       private static final int CASE_SENSITIVITY_MASK = 0b1;
 
+      private static final String UTF8_BINARY_COLLATION_NAME = "UTF8_BINARY";
+      private static final String UTF8_LCASE_COLLATION_NAME = "UTF8_LCASE";
+
       private static final int UTF8_BINARY_COLLATION_ID =
-        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).collationId;
+        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED, SpaceTrimming.NONE).collationId;
       private static final int UTF8_LCASE_COLLATION_ID =
-        new CollationSpecUTF8(CaseSensitivity.LCASE).collationId;
+        new CollationSpecUTF8(CaseSensitivity.LCASE, SpaceTrimming.NONE).collationId;
       protected static Collation UTF8_BINARY_COLLATION =
-        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED).buildCollation();
+        new CollationSpecUTF8(CaseSensitivity.UNSPECIFIED, SpaceTrimming.NONE).buildCollation();
       protected static Collation UTF8_LCASE_COLLATION =
-        new CollationSpecUTF8(CaseSensitivity.LCASE).buildCollation();
+        new CollationSpecUTF8(CaseSensitivity.LCASE, SpaceTrimming.NONE).buildCollation();
 
+      private final CaseSensitivity caseSensitivity;
+      private final SpaceTrimming spaceTrimming;
       private final int collationId;
 
-      private CollationSpecUTF8(CaseSensitivity caseSensitivity) {
-        this.collationId =
+      private CollationSpecUTF8(
+          CaseSensitivity caseSensitivity,
+          SpaceTrimming spaceTrimming) {
+        this.caseSensitivity = caseSensitivity;
+        this.spaceTrimming = spaceTrimming;
+
+        int collationId =
           SpecifierUtils.setSpecValue(0, CASE_SENSITIVITY_OFFSET, caseSensitivity);
+        this.collationId =
+          SpecifierUtils.setSpecValue(collationId, SPACE_TRIMMING_OFFSET, spaceTrimming);
       }
 
       private static int collationNameToId(String originalName, String collationName)
           throws SparkException {
-        if (UTF8_BINARY_COLLATION.collationName.equals(collationName)) {
-          return UTF8_BINARY_COLLATION_ID;
-        } else if (UTF8_LCASE_COLLATION.collationName.equals(collationName)) {
-          return UTF8_LCASE_COLLATION_ID;
+
+        int baseId;
+        String collationNamePrefix;
+
+        if (collationName.startsWith(UTF8_BINARY_COLLATION.collationName)) {
+          baseId = UTF8_BINARY_COLLATION_ID;
+          collationNamePrefix = UTF8_BINARY_COLLATION.collationName;
+        } else if (collationName.startsWith(UTF8_LCASE_COLLATION.collationName)) {
+          baseId = UTF8_LCASE_COLLATION_ID;
+          collationNamePrefix = UTF8_LCASE_COLLATION.collationName;
         } else {
           // Throw exception with original (before case conversion) collation name.
           throw collationInvalidNameException(originalName);
         }
+
+        String remainingSpecifiers = collationName.substring(collationNamePrefix.length());
+        if(remainingSpecifiers.isEmpty()) {
+          return baseId;
+        }
+        if(!remainingSpecifiers.startsWith("_")){
+          throw collationInvalidNameException(originalName);
+        }
+
+        SpaceTrimming spaceTrimming = SpaceTrimming.NONE;
+        String remainingSpec = remainingSpecifiers.substring(1);
+        if (remainingSpec.equals("RTRIM")) {
+          spaceTrimming = SpaceTrimming.RTRIM;
+        } else {
+          throw collationInvalidNameException(originalName);
+        }
+
+        return SpecifierUtils.setSpecValue(baseId, SPACE_TRIMMING_OFFSET, spaceTrimming);
       }
 
       private static CollationSpecUTF8 fromCollationId(int collationId) {
         // Extract case sensitivity from collation ID.
         int caseConversionOrdinal = SpecifierUtils.getSpecValue(collationId,
           CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
-        // Verify only case sensitivity bits were set settable in UTF8_BINARY family of collations.
-        assert (SpecifierUtils.removeSpec(collationId,
-          CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK) == 0);
-        return new CollationSpecUTF8(CaseSensitivity.values()[caseConversionOrdinal]);
+        // Extract space trimming from collation ID.
+        int spaceTrimmingOrdinal = getSpaceTrimming(collationId).ordinal();
+        assert(isValidCollationId(collationId));
+        return new CollationSpecUTF8(
+          CaseSensitivity.values()[caseConversionOrdinal],
+          SpaceTrimming.values()[spaceTrimmingOrdinal]);
+      }
+
+      private static boolean isValidCollationId(int collationId) {
+        collationId = SpecifierUtils.removeSpec(
+          collationId,
+          SPACE_TRIMMING_OFFSET,
+          SPACE_TRIMMING_MASK);
+        collationId = SpecifierUtils.removeSpec(
+          collationId,
+          CASE_SENSITIVITY_OFFSET,
+          CASE_SENSITIVITY_MASK);
+        return collationId == 0;
       }
 
       @Override
       protected Collation buildCollation() {
-        if (collationId == UTF8_BINARY_COLLATION_ID) {
+        if (caseSensitivity == CaseSensitivity.UNSPECIFIED) {
+          Comparator<UTF8String> comparator;
+          ToLongFunction<UTF8String> hashFunction;
+          BiFunction<UTF8String, UTF8String, Boolean> equalsFunction;
+          boolean supportsSpaceTrimming = spaceTrimming != SpaceTrimming.NONE;
+
+          if (spaceTrimming == SpaceTrimming.NONE) {
+            comparator = UTF8String::binaryCompare;
+            hashFunction = s -> (long) s.hashCode();
+            equalsFunction = UTF8String::equals;
+          } else {
+            comparator = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).binaryCompare(
+              applyTrimmingPolicy(s2, spaceTrimming));
+            hashFunction = s -> (long) applyTrimmingPolicy(s, spaceTrimming).hashCode();
+            equalsFunction = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).equals(
+              applyTrimmingPolicy(s2, spaceTrimming));
+          }
+
           return new Collation(
-            "UTF8_BINARY",
+            normalizedCollationName(),
             PROVIDER_SPARK,
             null,
-            UTF8String::binaryCompare,
-            "1.0",
-            s -> (long) s.hashCode(),
-            /* supportsBinaryEquality = */ true,
-            /* supportsBinaryOrdering = */ true,
-            /* supportsLowercaseEquality = */ false);
+            comparator,
+            CollationSpecICU.ICU_VERSION,
+            hashFunction,
+            equalsFunction,
+            /* isUtf8BinaryType = */ true,
+            /* isUtf8LcaseType = */ false,
+            spaceTrimming != SpaceTrimming.NONE);
         } else {
+          Comparator<UTF8String> comparator;
+          ToLongFunction<UTF8String> hashFunction;
+
+          if (spaceTrimming == SpaceTrimming.NONE) {
+            comparator = CollationAwareUTF8String::compareLowerCase;
+            hashFunction = s ->
+              (long) CollationAwareUTF8String.lowerCaseCodePoints(s).hashCode();
+          } else {
+            comparator = (s1, s2) -> CollationAwareUTF8String.compareLowerCase(
+              applyTrimmingPolicy(s1, spaceTrimming),
+              applyTrimmingPolicy(s2, spaceTrimming));
+            hashFunction = s -> (long) CollationAwareUTF8String.lowerCaseCodePoints(
+              applyTrimmingPolicy(s, spaceTrimming)).hashCode();
+          }
+
           return new Collation(
-            "UTF8_LCASE",
+            normalizedCollationName(),
             PROVIDER_SPARK,
             null,
-            CollationAwareUTF8String::compareLowerCase,
-            "1.0",
-            s -> (long) CollationAwareUTF8String.lowerCaseCodePoints(s).hashCode(),
-            /* supportsBinaryEquality = */ false,
-            /* supportsBinaryOrdering = */ false,
-            /* supportsLowercaseEquality = */ true);
+            comparator,
+            CollationSpecICU.ICU_VERSION,
+            hashFunction,
+            (s1, s2) -> comparator.compare(s1, s2) == 0,
+            /* isUtf8BinaryType = */ false,
+            /* isUtf8LcaseType = */ true,
+            spaceTrimming != SpaceTrimming.NONE);
+        }
+      }
+
+      @Override
+      protected CollationMeta buildCollationMeta() {
+        if (caseSensitivity == CaseSensitivity.UNSPECIFIED) {
+          return new CollationMeta(
+            CATALOG,
+            SCHEMA,
+            normalizedCollationName(),
+            /* language = */ null,
+            /* country = */ null,
+            /* icuVersion = */ null,
+            COLLATION_PAD_ATTRIBUTE,
+            /* accentSensitivity = */ true,
+            /* caseSensitivity = */ true,
+            spaceTrimming.toString());
+        } else {
+          return new CollationMeta(
+            CATALOG,
+            SCHEMA,
+            normalizedCollationName(),
+            /* language = */ null,
+            /* country = */ null,
+            /* icuVersion = */ null,
+            COLLATION_PAD_ATTRIBUTE,
+            /* accentSensitivity = */ true,
+            /* caseSensitivity = */ false,
+            spaceTrimming.toString());
+        }
+      }
+
+      /**
+       * Compute normalized collation name. Components of collation name are given in order:
+       * - Base collation name (UTF8_BINARY or UTF8_LCASE)
+       * - Optional space trimming when non-default preceded by underscore
+       * Examples: UTF8_BINARY, UTF8_BINARY_LCASE_LTRIM, UTF8_BINARY_TRIM.
+       */
+      @Override
+      protected String normalizedCollationName() {
+        StringBuilder builder = new StringBuilder();
+        if(caseSensitivity == CaseSensitivity.UNSPECIFIED){
+          builder.append(UTF8_BINARY_COLLATION_NAME);
+        } else{
+          builder.append(UTF8_LCASE_COLLATION_NAME);
+        }
+        if (spaceTrimming != SpaceTrimming.NONE) {
+          builder.append('_');
+          builder.append(spaceTrimming.toString());
+        }
+        return builder.toString();
+      }
+
+      static List<CollationIdentifier> listCollations() {
+        CollationIdentifier UTF8_BINARY_COLLATION_IDENT = new CollationIdentifier(
+            PROVIDER_SPARK,
+            UTF8_BINARY_COLLATION_NAME,
+            CollationSpecICU.ICU_VERSION
+        );
+        CollationIdentifier UTF8_LCASE_COLLATION_IDENT = new CollationIdentifier(
+            PROVIDER_SPARK,
+            UTF8_LCASE_COLLATION_NAME,
+            CollationSpecICU.ICU_VERSION
+        );
+        return Arrays.asList(UTF8_BINARY_COLLATION_IDENT, UTF8_LCASE_COLLATION_IDENT);
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) {
+        try {
+          int collationId = CollationSpecUTF8.collationNameToId(
+            collationIdentifier.name, collationIdentifier.name.toUpperCase());
+          return CollationSpecUTF8.fromCollationId(collationId).buildCollationMeta();
+        } catch (SparkException ignored) {
+          // ignore
+          return null;
         }
       }
     }
@@ -489,9 +763,11 @@ private enum AccentSensitivity {
       private static final Map<String, Integer> ICULocaleToId = new HashMap<>();
 
       /**
-       * ICU library Collator version passed to `Collation` instance.
+       * ICU library version.
        */
-      private static final String ICU_COLLATOR_VERSION = "153.120.0.0";
+      private static final String ICU_VERSION = String.format("%d.%d",
+        VersionInfo.ICU_VERSION.getMajor(),
+        VersionInfo.ICU_VERSION.getMinor());
 
       static {
         ICULocaleMap.put("UNICODE", ULocale.ROOT);
@@ -541,21 +817,33 @@ private enum AccentSensitivity {
         }
       }
 
-      private static final int UNICODE_COLLATION_ID =
-        new CollationSpecICU("UNICODE", CaseSensitivity.CS, AccentSensitivity.AS).collationId;
-      private static final int UNICODE_CI_COLLATION_ID =
-        new CollationSpecICU("UNICODE", CaseSensitivity.CI, AccentSensitivity.AS).collationId;
+      private static final int UNICODE_COLLATION_ID = new CollationSpecICU(
+        "UNICODE",
+        CaseSensitivity.CS,
+        AccentSensitivity.AS,
+        SpaceTrimming.NONE).collationId;
+
+      private static final int UNICODE_CI_COLLATION_ID = new CollationSpecICU(
+        "UNICODE",
+        CaseSensitivity.CI,
+        AccentSensitivity.AS,
+        SpaceTrimming.NONE).collationId;
 
       private final CaseSensitivity caseSensitivity;
       private final AccentSensitivity accentSensitivity;
+      private final SpaceTrimming spaceTrimming;
       private final String locale;
       private final int collationId;
 
-      private CollationSpecICU(String locale, CaseSensitivity caseSensitivity,
-          AccentSensitivity accentSensitivity) {
+      private CollationSpecICU(
+          String locale,
+          CaseSensitivity caseSensitivity,
+          AccentSensitivity accentSensitivity,
+          SpaceTrimming spaceTrimming) {
         this.locale = locale;
         this.caseSensitivity = caseSensitivity;
         this.accentSensitivity = accentSensitivity;
+        this.spaceTrimming = spaceTrimming;
         // Construct collation ID from locale, case-sensitivity and accent-sensitivity specifiers.
         int collationId = ICULocaleToId.get(locale);
         // Mandatory ICU implementation provider.
@@ -565,6 +853,8 @@ private CollationSpecICU(String locale, CaseSensitivity caseSensitivity,
           caseSensitivity);
         collationId = SpecifierUtils.setSpecValue(collationId, ACCENT_SENSITIVITY_OFFSET,
           accentSensitivity);
+        collationId = SpecifierUtils.setSpecValue(collationId, SPACE_TRIMMING_OFFSET,
+          spaceTrimming);
         this.collationId = collationId;
       }
 
@@ -582,58 +872,86 @@ private static int collationNameToId(
         }
         if (lastPos == -1) {
           throw collationInvalidNameException(originalName);
-        } else {
-          String locale = collationName.substring(0, lastPos);
-          int collationId = ICULocaleToId.get(ICULocaleMapUppercase.get(locale));
-
-          // Try all combinations of AS/AI and CS/CI.
-          CaseSensitivity caseSensitivity;
-          AccentSensitivity accentSensitivity;
-          if (collationName.equals(locale) ||
-              collationName.equals(locale + "_AS") ||
-              collationName.equals(locale + "_CS") ||
-              collationName.equals(locale + "_AS_CS") ||
-              collationName.equals(locale + "_CS_AS")
-          ) {
-            caseSensitivity = CaseSensitivity.CS;
-            accentSensitivity = AccentSensitivity.AS;
-          } else if (collationName.equals(locale + "_CI") ||
-              collationName.equals(locale + "_AS_CI") ||
-              collationName.equals(locale + "_CI_AS")) {
-            caseSensitivity = CaseSensitivity.CI;
-            accentSensitivity = AccentSensitivity.AS;
-          } else if (collationName.equals(locale + "_AI") ||
-              collationName.equals(locale + "_CS_AI") ||
-              collationName.equals(locale + "_AI_CS")) {
-            caseSensitivity = CaseSensitivity.CS;
-            accentSensitivity = AccentSensitivity.AI;
-          } else if (collationName.equals(locale + "_AI_CI") ||
-              collationName.equals(locale + "_CI_AI")) {
-            caseSensitivity = CaseSensitivity.CI;
-            accentSensitivity = AccentSensitivity.AI;
-          } else {
-            throw collationInvalidNameException(originalName);
-          }
+        }
+        String locale = collationName.substring(0, lastPos);
+        int collationId = ICULocaleToId.get(ICULocaleMapUppercase.get(locale));
+        collationId = SpecifierUtils.setSpecValue(collationId,
+          IMPLEMENTATION_PROVIDER_OFFSET, ImplementationProvider.ICU);
 
-          // Build collation ID from computed specifiers.
-          collationId = SpecifierUtils.setSpecValue(collationId,
-            IMPLEMENTATION_PROVIDER_OFFSET, ImplementationProvider.ICU);
-          collationId = SpecifierUtils.setSpecValue(collationId,
-            CASE_SENSITIVITY_OFFSET, caseSensitivity);
-          collationId = SpecifierUtils.setSpecValue(collationId,
-            ACCENT_SENSITIVITY_OFFSET, accentSensitivity);
+        // No other specifiers present.
+        if(collationName.equals(locale)){
           return collationId;
         }
+        if(collationName.charAt(locale.length()) != '_'){
+          throw collationInvalidNameException(originalName);
+        }
+        // Extract remaining specifiers and trim "_" separator.
+        String remainingSpecifiers = collationName.substring(lastPos + 1);
+
+        // Initialize default specifier flags.
+        // Case sensitive, accent sensitive, no space trimming.
+        boolean isCaseSpecifierSet = false;
+        boolean isAccentSpecifierSet = false;
+        boolean isSpaceTrimmingSpecifierSet = false;
+        CaseSensitivity caseSensitivity = CaseSensitivity.CS;
+        AccentSensitivity accentSensitivity = AccentSensitivity.AS;
+        SpaceTrimming spaceTrimming = SpaceTrimming.NONE;
+
+        String[] specifiers = remainingSpecifiers.split("_");
+
+        // Iterate through specifiers and set corresponding flags
+        for (String specifier : specifiers) {
+          switch (specifier) {
+            case "CI":
+            case "CS":
+              if (isCaseSpecifierSet) {
+                throw collationInvalidNameException(originalName);
+              }
+              caseSensitivity = CaseSensitivity.valueOf(specifier);
+              isCaseSpecifierSet = true;
+              break;
+            case "AI":
+            case "AS":
+              if (isAccentSpecifierSet) {
+                throw collationInvalidNameException(originalName);
+              }
+              accentSensitivity = AccentSensitivity.valueOf(specifier);
+              isAccentSpecifierSet = true;
+              break;
+            case "RTRIM":
+              if (isSpaceTrimmingSpecifierSet) {
+                throw collationInvalidNameException(originalName);
+              }
+              spaceTrimming = SpaceTrimming.valueOf(specifier);
+              isSpaceTrimmingSpecifierSet = true;
+              break;
+            default:
+              throw collationInvalidNameException(originalName);
+          }
+        }
+
+        // Build collation ID from computed specifiers.
+        collationId = SpecifierUtils.setSpecValue(collationId,
+          CASE_SENSITIVITY_OFFSET, caseSensitivity);
+        collationId = SpecifierUtils.setSpecValue(collationId,
+          ACCENT_SENSITIVITY_OFFSET, accentSensitivity);
+        collationId = SpecifierUtils.setSpecValue(collationId,
+          SPACE_TRIMMING_OFFSET, spaceTrimming);
+        return collationId;
       }
 
       private static CollationSpecICU fromCollationId(int collationId) {
         // Parse specifiers from collation ID.
+        int spaceTrimmingOrdinal = SpecifierUtils.getSpecValue(collationId,
+          SPACE_TRIMMING_OFFSET, SPACE_TRIMMING_MASK);
         int caseSensitivityOrdinal = SpecifierUtils.getSpecValue(collationId,
           CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
         int accentSensitivityOrdinal = SpecifierUtils.getSpecValue(collationId,
           ACCENT_SENSITIVITY_OFFSET, ACCENT_SENSITIVITY_MASK);
         collationId = SpecifierUtils.removeSpec(collationId,
           IMPLEMENTATION_PROVIDER_OFFSET, IMPLEMENTATION_PROVIDER_MASK);
+        collationId = SpecifierUtils.removeSpec(collationId,
+          SPACE_TRIMMING_OFFSET, SPACE_TRIMMING_MASK);
         collationId = SpecifierUtils.removeSpec(collationId,
           CASE_SENSITIVITY_OFFSET, CASE_SENSITIVITY_MASK);
         collationId = SpecifierUtils.removeSpec(collationId,
@@ -644,8 +962,9 @@ private static CollationSpecICU fromCollationId(int collationId) {
         assert(localeId >= 0 && localeId < ICULocaleNames.length);
         CaseSensitivity caseSensitivity = CaseSensitivity.values()[caseSensitivityOrdinal];
         AccentSensitivity accentSensitivity = AccentSensitivity.values()[accentSensitivityOrdinal];
+        SpaceTrimming spaceTrimming = SpaceTrimming.values()[spaceTrimmingOrdinal];
         String locale = ICULocaleNames[localeId];
-        return new CollationSpecICU(locale, caseSensitivity, accentSensitivity);
+        return new CollationSpecICU(locale, caseSensitivity, accentSensitivity, spaceTrimming);
       }
 
       @Override
@@ -672,16 +991,51 @@ protected Collation buildCollation() {
         Collator collator = Collator.getInstance(resultLocale);
         // Freeze ICU collator to ensure thread safety.
         collator.freeze();
+
+        Comparator<UTF8String> comparator;
+        ToLongFunction<UTF8String> hashFunction;
+
+        if (spaceTrimming == SpaceTrimming.NONE) {
+          hashFunction = s -> (long) collator.getCollationKey(
+            s.toValidString()).hashCode();
+          comparator = (s1, s2) ->
+            collator.compare(s1.toValidString(), s2.toValidString());
+        } else {
+          comparator = (s1, s2) -> collator.compare(
+            applyTrimmingPolicy(s1, spaceTrimming).toValidString(),
+            applyTrimmingPolicy(s2, spaceTrimming).toValidString());
+          hashFunction = s -> (long) collator.getCollationKey(
+            applyTrimmingPolicy(s, spaceTrimming).toValidString()).hashCode();
+        }
+
         return new Collation(
-          collationName(),
+          normalizedCollationName(),
           PROVIDER_ICU,
           collator,
-          (s1, s2) -> collator.compare(s1.toString(), s2.toString()),
-          ICU_COLLATOR_VERSION,
-          s -> (long) collator.getCollationKey(s.toString()).hashCode(),
-          /* supportsBinaryEquality = */ false,
-          /* supportsBinaryOrdering = */ false,
-          /* supportsLowercaseEquality = */ false);
+          comparator,
+          ICU_VERSION,
+          hashFunction,
+          (s1, s2) -> comparator.compare(s1, s2) == 0,
+          /* isUtf8BinaryType = */ false,
+          /* isUtf8LcaseType = */ false,
+          spaceTrimming != SpaceTrimming.NONE);
+      }
+
+      @Override
+      protected CollationMeta buildCollationMeta() {
+        String language = ICULocaleMap.get(locale).getDisplayLanguage();
+        String country = ICULocaleMap.get(locale).getDisplayCountry();
+        return new CollationMeta(
+          CATALOG,
+          SCHEMA,
+          normalizedCollationName(),
+          language.isEmpty() ? null : language,
+          country.isEmpty() ? null : country,
+          VersionInfo.ICU_VERSION.toString(),
+          COLLATION_PAD_ATTRIBUTE,
+          accentSensitivity == AccentSensitivity.AS,
+          caseSensitivity == CaseSensitivity.CS,
+          spaceTrimming.toString());
       }
 
       /**
@@ -689,9 +1043,11 @@ protected Collation buildCollation() {
        * - Locale name
        * - Optional case sensitivity when non-default preceded by underscore
        * - Optional accent sensitivity when non-default preceded by underscore
-       * Examples: en, en_USA_CI_AI, sr_Cyrl_SRB_AI.
+       * - Optional space trimming when non-default preceded by underscore
+       * Examples: en, en_USA_CI_LTRIM, en_USA_CI_AI, en_USA_CI_AI_TRIM, sr_Cyrl_SRB_AI.
        */
-      private String collationName() {
+      @Override
+      protected String normalizedCollationName() {
         StringBuilder builder = new StringBuilder();
         builder.append(locale);
         if (caseSensitivity != CaseSensitivity.CS) {
@@ -702,8 +1058,40 @@ private String collationName() {
           builder.append('_');
           builder.append(accentSensitivity.toString());
         }
+        if(spaceTrimming != SpaceTrimming.NONE) {
+          builder.append('_');
+          builder.append(spaceTrimming.toString());
+        }
         return builder.toString();
       }
+
+      private static List<String> allCollationNames() {
+        List<String> collationNames = new ArrayList<>();
+        List<String> caseAccentSpecifiers = Arrays.asList("", "_AI", "_CI", "_CI_AI");
+        for (String locale : ICULocaleToId.keySet()) {
+          for (String caseAccent : caseAccentSpecifiers) {
+            String collationName = locale + caseAccent;
+            collationNames.add(collationName);
+          }
+        }
+        return collationNames.stream().sorted().toList();
+      }
+
+      static List<CollationIdentifier> listCollations() {
+        return allCollationNames().stream().map(name ->
+          new CollationIdentifier(PROVIDER_ICU, name, VersionInfo.ICU_VERSION.toString())).toList();
+      }
+
+      static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) {
+        try {
+          int collationId = CollationSpecICU.collationNameToId(
+            collationIdentifier.name, collationIdentifier.name.toUpperCase());
+          return CollationSpecICU.fromCollationId(collationId).buildCollationMeta();
+        } catch (SparkException ignored) {
+          // ignore
+          return null;
+        }
+      }
     }
 
     /**
@@ -730,9 +1118,12 @@ public CollationIdentifier identifier() {
     }
   }
 
+  public static final String CATALOG = "SYSTEM";
+  public static final String SCHEMA = "BUILTIN";
   public static final String PROVIDER_SPARK = "spark";
   public static final String PROVIDER_ICU = "icu";
   public static final List<String> SUPPORTED_PROVIDERS = List.of(PROVIDER_SPARK, PROVIDER_ICU);
+  public static final String COLLATION_PAD_ATTRIBUTE = "NO_PAD";
 
   public static final int UTF8_BINARY_COLLATION_ID =
     Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION_ID;
@@ -749,12 +1140,15 @@ public CollationIdentifier identifier() {
    * Returns a StringSearch object for the given pattern and target strings, under collation
    * rules corresponding to the given collationId. The external ICU library StringSearch object can
    * be used to find occurrences of the pattern in the target string, while respecting collation.
+   * When given invalid UTF8Strings, the method will first convert them to valid strings, and then
+   * instantiate the StringSearch object. However, original UTF8Strings will remain unchanged.
    */
   public static StringSearch getStringSearch(
       final UTF8String targetUTF8String,
       final UTF8String patternUTF8String,
       final int collationId) {
-    return getStringSearch(targetUTF8String.toString(), patternUTF8String.toString(), collationId);
+    return getStringSearch(targetUTF8String.toValidString(), patternUTF8String.toValidString(),
+      collationId);
   }
 
   /**
@@ -763,9 +1157,9 @@ public static StringSearch getStringSearch(
    * be used to find occurrences of the pattern in the target string, while respecting collation.
    */
   public static StringSearch getStringSearch(
-          final String targetString,
-          final String patternString,
-          final int collationId) {
+      final String targetString,
+      final String patternString,
+      final int collationId) {
     CharacterIterator target = new StringCharacterIterator(targetString);
     Collator collator = CollationFactory.fetchCollation(collationId).collator;
     return new StringSearch(patternString, target, (RuleBasedCollator) collator);
@@ -775,11 +1169,13 @@ public static StringSearch getStringSearch(
    * Returns a collation-unaware StringSearch object for the given pattern and target strings.
    * While this object does not respect collation, it can be used to find occurrences of the pattern
    * in the target string for UTF8_BINARY or UTF8_LCASE (if arguments are lowercased).
+   * When given invalid UTF8Strings, the method will first convert them to valid strings, and then
+   * instantiate the StringSearch object. However, original UTF8Strings will remain unchanged.
    */
   public static StringSearch getStringSearch(
-          final UTF8String targetUTF8String,
-          final UTF8String patternUTF8String) {
-    return new StringSearch(patternUTF8String.toString(), targetUTF8String.toString());
+      final UTF8String targetUTF8String,
+      final UTF8String patternUTF8String) {
+    return new StringSearch(patternUTF8String.toValidString(), targetUTF8String.toValidString());
   }
 
   /**
@@ -789,6 +1185,16 @@ public static int collationNameToId(String collationName) throws SparkException
     return Collation.CollationSpec.collationNameToId(collationName);
   }
 
+  public static boolean isCaseInsensitive(int collationId) {
+    return Collation.CollationSpecICU.fromCollationId(collationId).caseSensitivity ==
+            Collation.CollationSpecICU.CaseSensitivity.CI;
+  }
+
+  public static boolean isAccentInsensitive(int collationId) {
+    return Collation.CollationSpecICU.fromCollationId(collationId).accentSensitivity ==
+            Collation.CollationSpecICU.AccentSensitivity.AI;
+  }
+
   public static void assertValidProvider(String provider) throws SparkException {
     if (!SUPPORTED_PROVIDERS.contains(provider.toLowerCase())) {
       Map<String, String> params = Map.of(
@@ -813,26 +1219,52 @@ public static String[] getICULocaleNames() {
     return Collation.CollationSpecICU.ICULocaleNames;
   }
 
+  /**
+   * Applies trimming policy depending up on trim collation type.
+   */
+  public static UTF8String applyTrimmingPolicy(UTF8String input, int collationId) {
+    return Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
+  }
+
+  /**
+   * Returns if leading/trailing spaces should be ignored in trim string expressions. This is needed
+   * because space trimming collation directly changes behaviour of trim functions.
+   */
+  public static boolean ignoresSpacesInTrimFunctions(
+      int collationId,
+      boolean isLTrim,
+      boolean isRTrim) {
+    return Collation.CollationSpec.ignoresSpacesInTrimFunctions(collationId, isLTrim, isRTrim);
+  }
+
   public static UTF8String getCollationKey(UTF8String input, int collationId) {
     Collation collation = fetchCollation(collationId);
-    if (collation.supportsBinaryEquality) {
+    if (collation.supportsSpaceTrimming) {
+      input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
+    }
+    if (collation.isUtf8BinaryType) {
       return input;
-    } else if (collation.supportsLowercaseEquality) {
-      return input.toLowerCase();
+    } else if (collation.isUtf8LcaseType) {
+      return CollationAwareUTF8String.lowerCaseCodePoints(input);
     } else {
-      CollationKey collationKey = collation.collator.getCollationKey(input.toString());
+      CollationKey collationKey = collation.collator.getCollationKey(
+        input.toValidString());
       return UTF8String.fromBytes(collationKey.toByteArray());
     }
   }
 
   public static byte[] getCollationKeyBytes(UTF8String input, int collationId) {
     Collation collation = fetchCollation(collationId);
-    if (collation.supportsBinaryEquality) {
+    if (collation.supportsSpaceTrimming) {
+      input = Collation.CollationSpec.applyTrimmingPolicy(input, collationId);
+    }
+    if (collation.isUtf8BinaryType) {
       return input.getBytes();
-    } else if (collation.supportsLowercaseEquality) {
-      return input.toLowerCase().getBytes();
+    } else if (collation.isUtf8LcaseType) {
+      return CollationAwareUTF8String.lowerCaseCodePoints(input).getBytes();
     } else {
-      return collation.collator.getCollationKey(input.toString()).toByteArray();
+      return collation.collator.getCollationKey(
+        input.toValidString()).toByteArray();
     }
   }
 
@@ -848,19 +1280,26 @@ public static String getClosestSuggestionsOnInvalidName(
         Collation.CollationSpecUTF8.UTF8_BINARY_COLLATION.collationName,
         Collation.CollationSpecUTF8.UTF8_LCASE_COLLATION.collationName
       };
-      validModifiers = new String[0];
+      validModifiers = new String[]{"_RTRIM"};
     } else {
       validRootNames = getICULocaleNames();
-      validModifiers = new String[]{"_CI", "_AI", "_CS", "_AS"};
+      validModifiers = new String[]{"_CI", "_AI", "_CS", "_AS", "_RTRIM"};
     }
 
     // Split modifiers and locale name.
-    final int MODIFIER_LENGTH = 3;
+    boolean foundModifier = true;
     String localeName = collationName.toUpperCase();
     List<String> modifiers = new ArrayList<>();
-    while (Arrays.stream(validModifiers).anyMatch(localeName::endsWith)) {
-      modifiers.add(localeName.substring(localeName.length() - MODIFIER_LENGTH));
-      localeName = localeName.substring(0, localeName.length() - MODIFIER_LENGTH);
+    while (foundModifier) {
+      foundModifier = false;
+      for (String modifier : validModifiers) {
+        if (localeName.endsWith(modifier)) {
+          modifiers.add(modifier);
+          localeName = localeName.substring(0, localeName.length() - modifier.length());
+          foundModifier = true;
+          break;
+        }
+      }
     }
 
     // Suggest version with unique modifiers.
@@ -918,4 +1357,12 @@ public static String getClosestSuggestionsOnInvalidName(
 
     return String.join(", ", suggestions);
   }
+
+  public static List<CollationIdentifier> listCollations() {
+    return Collation.CollationSpec.listCollations();
+  }
+
+  public static CollationMeta loadCollationMeta(CollationIdentifier collationIdentifier) {
+    return Collation.CollationSpec.loadCollationMeta(collationIdentifier);
+  }
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
index 453423ddbc33d..135250e482b16 100644
--- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java
@@ -20,8 +20,6 @@
 
 import org.apache.spark.unsafe.types.UTF8String;
 
-import java.util.ArrayList;
-import java.util.List;
 import java.util.Map;
 import java.util.regex.Pattern;
 
@@ -37,88 +35,67 @@ public final class CollationSupport {
    */
 
   public static class StringSplitSQL {
-    public static UTF8String[] exec(final UTF8String s, final UTF8String d, final int collationId) {
+    public static UTF8String[] exec(final UTF8String s, UTF8String d, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        d = CollationFactory.applyTrimmingPolicy(d, collationId);
+      }
+      if (collation.isUtf8BinaryType) {
         return execBinary(s, d);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(s, d);
       } else {
         return execICU(s, d, collationId);
       }
     }
     public static String genCode(final String s, final String d, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringSplitSQL.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", s, d);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", s, d);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", s, d, collationId);
+        return String.format(expr + "(%s, %s, %d)", s, d, collationId);
       }
     }
     public static UTF8String[] execBinary(final UTF8String string, final UTF8String delimiter) {
       return string.splitSQL(delimiter, -1);
     }
     public static UTF8String[] execLowercase(final UTF8String string, final UTF8String delimiter) {
-      if (delimiter.numBytes() == 0) return new UTF8String[] { string };
-      if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
-      Pattern pattern = Pattern.compile(Pattern.quote(delimiter.toString()),
-        CollationSupport.lowercaseRegexFlags);
-      String[] splits = pattern.split(string.toString(), -1);
-      UTF8String[] res = new UTF8String[splits.length];
-      for (int i = 0; i < res.length; i++) {
-        res[i] = UTF8String.fromString(splits[i]);
-      }
-      return res;
+      return CollationAwareUTF8String.lowercaseSplitSQL(string, delimiter, -1);
     }
     public static UTF8String[] execICU(final UTF8String string, final UTF8String delimiter,
         final int collationId) {
-      if (delimiter.numBytes() == 0) return new UTF8String[] { string };
-      if (string.numBytes() == 0) return new UTF8String[] { UTF8String.EMPTY_UTF8 };
-      List<UTF8String> strings = new ArrayList<>();
-      String target = string.toString(), pattern = delimiter.toString();
-      StringSearch stringSearch = CollationFactory.getStringSearch(target, pattern, collationId);
-      int start = 0, end;
-      while ((end = stringSearch.next()) != StringSearch.DONE) {
-        strings.add(UTF8String.fromString(target.substring(start, end)));
-        start = end + stringSearch.getMatchLength();
-      }
-      if (start <= target.length()) {
-        strings.add(UTF8String.fromString(target.substring(start)));
-      }
-      return strings.toArray(new UTF8String[0]);
+      return CollationAwareUTF8String.icuSplitSQL(string, delimiter, -1, collationId);
     }
   }
 
   public static class Contains {
-    public static boolean exec(final UTF8String l, final UTF8String r, final int collationId) {
+    public static boolean exec(UTF8String l, UTF8String r, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        l = CollationFactory.applyTrimmingPolicy(l, collationId);
+        r = CollationFactory.applyTrimmingPolicy(r, collationId);
+      }
+      if (collation.isUtf8BinaryType) {
         return execBinary(l, r);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(l, r);
       } else {
         return execICU(l, r, collationId);
       }
     }
     public static String genCode(final String l, final String r, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.Contains.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", l, r);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", l, r);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", l, r, collationId);
+        return String.format(expr + "(%s, %s, %d)", l, r, collationId);
       }
     }
     public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.contains(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return CollationAwareUTF8String.lowercaseIndexOf(l, r, 0) >= 0;
+      return CollationAwareUTF8String.lowercaseContains(l, r);
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
@@ -130,33 +107,35 @@ public static boolean execICU(final UTF8String l, final UTF8String r,
   }
 
   public static class StartsWith {
-    public static boolean exec(final UTF8String l, final UTF8String r,
+    public static boolean exec(UTF8String l, UTF8String r,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        l = CollationFactory.applyTrimmingPolicy(l, collationId);
+        r = CollationFactory.applyTrimmingPolicy(r, collationId);
+      }
+
+      if (collation.isUtf8BinaryType) {
         return execBinary(l, r);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(l, r);
       } else {
         return execICU(l, r, collationId);
       }
     }
     public static String genCode(final String l, final String r, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StartsWith.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", l, r);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", l, r);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", l, r, collationId);
+        return String.format(expr + "(%s, %s, %d)", l, r, collationId);
       }
     }
     public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.startsWith(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return CollationAwareUTF8String.lowercaseMatchFrom(l, r.toLowerCase(), 0);
+      return CollationAwareUTF8String.lowercaseStartsWith(l, r);
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
@@ -168,32 +147,33 @@ public static boolean execICU(final UTF8String l, final UTF8String r,
   }
 
   public static class EndsWith {
-    public static boolean exec(final UTF8String l, final UTF8String r, final int collationId) {
+    public static boolean exec(UTF8String l,  UTF8String r, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+        if (collation.supportsSpaceTrimming) {
+          l = CollationFactory.applyTrimmingPolicy(l, collationId);
+          r = CollationFactory.applyTrimmingPolicy(r, collationId);
+        }
+      if (collation.isUtf8BinaryType) {
         return execBinary(l, r);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(l, r);
       } else {
         return execICU(l, r, collationId);
       }
     }
     public static String genCode(final String l, final String r, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.EndsWith.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", l, r);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", l, r);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", l, r, collationId);
+        return String.format(expr + "(%s, %s, %d)", l, r, collationId);
       }
     }
     public static boolean execBinary(final UTF8String l, final UTF8String r) {
       return l.endsWith(r);
     }
     public static boolean execLowercase(final UTF8String l, final UTF8String r) {
-      return CollationAwareUTF8String.lowercaseMatchUntil(l, r.toLowerCase(), l.numChars());
+      return CollationAwareUTF8String.lowercaseEndsWith(l, r);
     }
     public static boolean execICU(final UTF8String l, final UTF8String r,
         final int collationId) {
@@ -208,9 +188,10 @@ public static boolean execICU(final UTF8String l, final UTF8String r,
   public static class Upper {
     public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression.
+      if (collation.isUtf8BinaryType) {
         return useICU ? execBinaryICU(v) : execBinary(v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(v);
       } else {
         return execICU(v, collationId);
@@ -219,10 +200,10 @@ public static UTF8String exec(final UTF8String v, final int collationId, boolean
     public static String genCode(final String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.Upper.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType) {
         String funcName = useICU ? "BinaryICU" : "Binary";
         return String.format(expr + "%s(%s)", funcName, v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return String.format(expr + "Lowercase(%s)", v);
       } else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
@@ -245,9 +226,10 @@ public static UTF8String execICU(final UTF8String v, final int collationId) {
   public static class Lower {
     public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression.
+      if (collation.isUtf8BinaryType) {
         return useICU ? execBinaryICU(v) : execBinary(v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(v);
       } else {
         return execICU(v, collationId);
@@ -256,10 +238,10 @@ public static UTF8String exec(final UTF8String v, final int collationId, boolean
     public static String genCode(final String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.Lower.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType) {
         String funcName = useICU ? "BinaryICU" : "Binary";
         return String.format(expr + "%s(%s)", funcName, v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return String.format(expr + "Lowercase(%s)", v);
       } else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
@@ -282,9 +264,10 @@ public static UTF8String execICU(final UTF8String v, final int collationId) {
   public static class InitCap {
     public static UTF8String exec(final UTF8String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression.
+      if (collation.isUtf8BinaryType) {
         return useICU ? execBinaryICU(v) : execBinary(v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(v);
       } else {
         return execICU(v, collationId);
@@ -294,10 +277,10 @@ public static UTF8String exec(final UTF8String v, final int collationId, boolean
     public static String genCode(final String v, final int collationId, boolean useICU) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.InitCap.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType) {
         String funcName = useICU ? "BinaryICU" : "Binary";
         return String.format(expr + "%s(%s)", funcName, v);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return String.format(expr + "Lowercase(%s)", v);
       } else {
         return String.format(expr + "ICU(%s, %d)", v, collationId);
@@ -307,7 +290,7 @@ public static UTF8String execBinary(final UTF8String v) {
       return v.toLowerCase().toTitleCase();
     }
     public static UTF8String execBinaryICU(final UTF8String v) {
-      return CollationAwareUTF8String.toLowerCase(v).toTitleCaseICU();
+      return CollationAwareUTF8String.toTitleCaseICU(v);
     }
     public static UTF8String execLowercase(final UTF8String v) {
       return CollationAwareUTF8String.toTitleCase(v);
@@ -319,17 +302,16 @@ public static UTF8String execICU(final UTF8String v, final int collationId) {
 
   public static class FindInSet {
     public static int exec(final UTF8String word, final UTF8String set, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // FindInSet does space trimming collation as comparison is space trimming collation aware
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return execBinary(word, set);
       } else {
         return execCollationAware(word, set, collationId);
       }
     }
     public static String genCode(final String word, final String set, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.FindInSet.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", word, set);
       } else {
         return String.format(expr + "CollationAware(%s, %s, %d)", word, set, collationId);
@@ -345,12 +327,15 @@ public static int execCollationAware(final UTF8String word, final UTF8String set
   }
 
   public static class StringInstr {
-    public static int exec(final UTF8String string, final UTF8String substring,
+    public static int exec(final UTF8String string, UTF8String substring,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        substring = CollationFactory.applyTrimmingPolicy(substring, collationId);
+      }
+      if (collation.isUtf8BinaryType) {
         return execBinary(string, substring);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(string, substring);
       } else {
         return execICU(string, substring, collationId);
@@ -358,14 +343,11 @@ public static int exec(final UTF8String string, final UTF8String substring,
     }
     public static String genCode(final String string, final String substring,
         final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringInstr.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", string, substring);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", string, substring);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", string, substring, collationId);
+        return String.format(expr + "(%s, %s, %d)", string, substring, collationId);
       }
     }
     public static int execBinary(final UTF8String string, final UTF8String substring) {
@@ -384,9 +366,10 @@ public static class StringReplace {
     public static UTF8String exec(final UTF8String src, final UTF8String search,
         final UTF8String replace, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression.
+      if (collation.isUtf8BinaryType) {
         return execBinary(src, search, replace);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(src, search, replace);
       } else {
         return execICU(src, search, replace, collationId);
@@ -396,9 +379,9 @@ public static String genCode(final String src, final String search, final String
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringReplace.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType) {
         return String.format(expr + "Binary(%s, %s, %s)", src, search, replace);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return String.format(expr + "Lowercase(%s, %s, %s)", src, search, replace);
       } else {
         return String.format(expr + "ICU(%s, %s, %s, %d)", src, search, replace, collationId);
@@ -419,12 +402,15 @@ public static UTF8String execICU(final UTF8String src, final UTF8String search,
   }
 
   public static class StringLocate {
-    public static int exec(final UTF8String string, final UTF8String substring, final int start,
+    public static int exec(final UTF8String string, UTF8String substring, final int start,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        substring = CollationFactory.applyTrimmingPolicy(substring, collationId);
+      }
+      if (collation.isUtf8BinaryType) {
         return execBinary(string, substring, start);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(string, substring, start);
       } else {
         return execICU(string, substring, start, collationId);
@@ -432,14 +418,11 @@ public static int exec(final UTF8String string, final UTF8String substring, fina
     }
     public static String genCode(final String string, final String substring, final int start,
         final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringLocate.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s, %d)", string, substring, start);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s, %d)", string, substring, start);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d, %d)", string, substring, start, collationId);
+        return String.format(expr + "(%s, %s, %d, %d)", string, substring, start, collationId);
       }
     }
     public static int execBinary(final UTF8String string, final UTF8String substring,
@@ -457,27 +440,27 @@ public static int execICU(final UTF8String string, final UTF8String substring, f
   }
 
   public static class SubstringIndex {
-    public static UTF8String exec(final UTF8String string, final UTF8String delimiter,
+    public static UTF8String exec(final UTF8String string, UTF8String delimiter,
         final int count, final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.supportsSpaceTrimming) {
+        delimiter = CollationFactory.applyTrimmingPolicy(delimiter, collationId);
+      }
+      if (collation.isUtf8BinaryType) {
         return execBinary(string, delimiter, count);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(string, delimiter, count);
       } else {
         return execICU(string, delimiter, count, collationId);
       }
     }
     public static String genCode(final String string, final String delimiter,
-        final int count, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
+        final String count, final int collationId) {
       String expr = "CollationSupport.SubstringIndex.exec";
-      if (collation.supportsBinaryEquality) {
-        return String.format(expr + "Binary(%s, %s, %d)", string, delimiter, count);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s, %d)", string, delimiter, count);
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
+        return String.format(expr + "Binary(%s, %s, %s)", string, delimiter, count);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d, %d)", string, delimiter, count, collationId);
+        return String.format(expr + "(%s, %s, %s, %d)", string, delimiter, count, collationId);
       }
     }
     public static UTF8String execBinary(final UTF8String string, final UTF8String delimiter,
@@ -490,8 +473,7 @@ public static UTF8String execLowercase(final UTF8String string, final UTF8String
     }
     public static UTF8String execICU(final UTF8String string, final UTF8String delimiter,
         final int count, final int collationId) {
-      return CollationAwareUTF8String.subStringIndex(string, delimiter, count,
-              collationId);
+      return CollationAwareUTF8String.subStringIndex(string, delimiter, count, collationId);
     }
   }
 
@@ -499,25 +481,15 @@ public static class StringTranslate {
     public static UTF8String exec(final UTF8String source, Map<String, String> dict,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression.
+      if (collation.isUtf8BinaryType) {
         return execBinary(source, dict);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(source, dict);
       } else {
         return execICU(source, dict, collationId);
       }
     }
-    public static String genCode(final String source, final String dict, final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      String expr = "CollationSupport.EndsWith.exec";
-      if (collation.supportsBinaryEquality) {
-        return String.format(expr + "Binary(%s, %s)", source, dict);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", source, dict);
-      } else {
-        return String.format(expr + "ICU(%s, %s, %d)", source, dict, collationId);
-      }
-    }
     public static UTF8String execBinary(final UTF8String source, Map<String, String> dict) {
       return source.translate(dict);
     }
@@ -539,10 +511,15 @@ public static UTF8String exec(
         final UTF8String trimString,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType && !collation.supportsSpaceTrimming) {
         return execBinary(srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
-        return execLowercase(srcString, trimString);
+      }
+
+      if (collation.isUtf8BinaryType) {
+        // special handling needed for utf8_binary_rtrim collation.
+        return execBinaryTrim(srcString, trimString, collationId);
+      } else if (collation.isUtf8LcaseType) {
+        return execLowercase(srcString, trimString, collationId);
       } else {
         return execICU(srcString, trimString, collationId);
       }
@@ -554,14 +531,11 @@ public static String genCode(
         final String srcString,
         final String trimString,
         final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringTrim.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", srcString, trimString, collationId);
+        return String.format(expr + "(%s, %s, %d)", srcString, trimString, collationId);
       }
     }
     public static UTF8String execBinary(
@@ -575,8 +549,9 @@ public static UTF8String execBinary(
     }
     public static UTF8String execLowercase(
         final UTF8String srcString,
-        final UTF8String trimString) {
-      return CollationAwareUTF8String.lowercaseTrim(srcString, trimString);
+        final UTF8String trimString,
+        final int collationId) {
+      return CollationAwareUTF8String.lowercaseTrim(srcString, trimString, collationId);
     }
     public static UTF8String execICU(
         final UTF8String srcString,
@@ -584,6 +559,12 @@ public static UTF8String execICU(
         final int collationId) {
       return CollationAwareUTF8String.trim(srcString, trimString, collationId);
     }
+    public static UTF8String execBinaryTrim(
+        final UTF8String srcString,
+        final UTF8String trimString,
+        final int collationId) {
+      return CollationAwareUTF8String.binaryTrim(srcString, trimString, collationId);
+    }
   }
 
   public static class StringTrimLeft {
@@ -591,13 +572,15 @@ public static UTF8String exec(final UTF8String srcString) {
       return execBinary(srcString);
     }
     public static UTF8String exec(
-        final UTF8String srcString,
-        final UTF8String trimString,
-        final int collationId) {
+      final UTF8String srcString,
+      UTF8String trimString,
+      final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      // Space trimming does not affect the output of this expression as currently only supported
+      // space trimming is RTRIM.
+      if (collation.isUtf8BinaryType) {
         return execBinary(srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
+      } else if (collation.isUtf8LcaseType) {
         return execLowercase(srcString, trimString);
       } else {
         return execICU(srcString, trimString, collationId);
@@ -610,14 +593,11 @@ public static String genCode(
         final String srcString,
         final String trimString,
         final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringTrimLeft.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", srcString, trimString, collationId);
+        return String.format(expr + "(%s, %s, %d)", srcString, trimString, collationId);
       }
     }
     public static UTF8String execBinary(final UTF8String srcString) {
@@ -650,10 +630,15 @@ public static UTF8String exec(
         final UTF8String trimString,
         final int collationId) {
       CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
-      if (collation.supportsBinaryEquality) {
+      if (collation.isUtf8BinaryType && !collation.supportsSpaceTrimming) {
         return execBinary(srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
-        return execLowercase(srcString, trimString);
+      }
+
+      if (collation.isUtf8BinaryType) {
+        // special handling needed for utf8_binary_rtrim collation.
+        return execBinaryTrim(srcString, trimString, collationId);
+      } else if (collation.isUtf8LcaseType) {
+        return execLowercase(srcString, trimString, collationId);
       } else {
         return execICU(srcString, trimString, collationId);
       }
@@ -665,14 +650,11 @@ public static String genCode(
         final String srcString,
         final String trimString,
         final int collationId) {
-      CollationFactory.Collation collation = CollationFactory.fetchCollation(collationId);
       String expr = "CollationSupport.StringTrimRight.exec";
-      if (collation.supportsBinaryEquality) {
+      if (collationId == CollationFactory.UTF8_BINARY_COLLATION_ID) {
         return String.format(expr + "Binary(%s, %s)", srcString, trimString);
-      } else if (collation.supportsLowercaseEquality) {
-        return String.format(expr + "Lowercase(%s, %s)", srcString, trimString);
       } else {
-        return String.format(expr + "ICU(%s, %s, %d)", srcString, trimString, collationId);
+        return String.format(expr + "(%s, %s, %d)", srcString, trimString, collationId);
       }
     }
     public static UTF8String execBinary(final UTF8String srcString) {
@@ -685,8 +667,9 @@ public static UTF8String execBinary(
     }
     public static UTF8String execLowercase(
         final UTF8String srcString,
-        final UTF8String trimString) {
-      return CollationAwareUTF8String.lowercaseTrimRight(srcString, trimString);
+        final UTF8String trimString,
+        final int collationId) {
+      return CollationAwareUTF8String.lowercaseTrimRight(srcString, trimString, collationId);
     }
     public static UTF8String execICU(
         final UTF8String srcString,
@@ -694,6 +677,12 @@ public static UTF8String execICU(
         final int collationId) {
       return CollationAwareUTF8String.trimRight(srcString, trimString, collationId);
     }
+    public static UTF8String execBinaryTrim(
+        final UTF8String srcString,
+        final UTF8String trimString,
+        final int collationId) {
+        return CollationAwareUTF8String.binaryTrimRight(srcString, trimString, collationId);
+    }
   }
 
   // TODO: Add more collation-aware string expressions.
@@ -705,10 +694,10 @@ public static UTF8String execICU(
   public static boolean supportsLowercaseRegex(final int collationId) {
     // for regex, only Unicode case-insensitive matching is possible,
     // so UTF8_LCASE is treated as UNICODE_CI in this context
-    return CollationFactory.fetchCollation(collationId).supportsLowercaseEquality;
+    return CollationFactory.fetchCollation(collationId).isUtf8LcaseType;
   }
 
-  private static final int lowercaseRegexFlags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
+  static final int lowercaseRegexFlags = Pattern.UNICODE_CASE | Pattern.CASE_INSENSITIVE;
   public static int collationAwareRegexFlags(final int collationId) {
     return supportsLowercaseRegex(collationId) ? lowercaseRegexFlags : 0;
   }
diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/SpecialCodePointConstants.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/SpecialCodePointConstants.java
new file mode 100644
index 0000000000000..db615d747910b
--- /dev/null
+++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/SpecialCodePointConstants.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util;
+
+/**
+ * 'SpecialCodePointConstants' is introduced in order to keep the codepoints used in
+ * 'CollationAwareUTF8String' in one place.
+ */
+public class SpecialCodePointConstants {
+
+    public static final int COMBINING_DOT = 0x0307;
+    public static final int ASCII_SMALL_I = 0x0069;
+    public static final int ASCII_SPACE = 0x0020;
+    public static final int GREEK_CAPITAL_SIGMA = 0x03A3;
+    public static final int GREEK_SMALL_SIGMA = 0x03C3;
+    public static final int GREEK_FINAL_SIGMA = 0x03C2;
+    public static final int CAPITAL_I_WITH_DOT_ABOVE = 0x0130;
+}
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/UTF8StringBuilder.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/UTF8StringBuilder.java
index 481ea89090b2a..3a697345ce1f5 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/UTF8StringBuilder.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/UTF8StringBuilder.java
@@ -96,4 +96,33 @@ public void appendBytes(Object base, long offset, int length) {
   public UTF8String build() {
     return UTF8String.fromBytes(buffer, 0, totalSize());
   }
+
+  public void appendCodePoint(int codePoint) {
+    if (codePoint <= 0x7F) {
+      grow(1);
+      buffer[cursor - Platform.BYTE_ARRAY_OFFSET] = (byte) codePoint;
+      ++cursor;
+    } else if (codePoint <= 0x7FF) {
+      grow(2);
+      buffer[cursor - Platform.BYTE_ARRAY_OFFSET] = (byte) (0xC0 | (codePoint >> 6));
+      buffer[cursor + 1 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | (codePoint & 0x3F));
+      cursor += 2;
+    } else if (codePoint <= 0xFFFF) {
+      grow(3);
+      buffer[cursor - Platform.BYTE_ARRAY_OFFSET] = (byte) (0xE0 | (codePoint >> 12));
+      buffer[cursor + 1 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | ((codePoint >> 6) & 0x3F));
+      buffer[cursor + 2 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | (codePoint & 0x3F));
+      cursor += 3;
+    } else if (codePoint <= 0x10FFFF) {
+      grow(4);
+      buffer[cursor - Platform.BYTE_ARRAY_OFFSET] = (byte) (0xF0 | (codePoint >> 18));
+      buffer[cursor + 1 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | ((codePoint >> 12) & 0x3F));
+      buffer[cursor + 2 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | ((codePoint >> 6) & 0x3F));
+      buffer[cursor + 3 - Platform.BYTE_ARRAY_OFFSET] = (byte) (0x80 | (codePoint & 0x3F));
+      cursor += 4;
+    } else {
+      throw new IllegalArgumentException("Invalid Unicode codePoint: " + codePoint);
+    }
+  }
+
 }
diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
index e6bddb12da56b..caf8461b0b5d6 100644
--- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
+++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java
@@ -83,6 +83,20 @@ private enum UTF8StringValidity {
    */
   private volatile int numBytesValid = -1;
 
+  /**
+   * The ASCII-ness of the UTF8Strings can be cached to avoid repeated checks, because that
+   * operation requires full string scan. Full ASCII strings contain only ASCII characters.
+   */
+  private enum IsFullAscii {
+    UNKNOWN, FULL_ASCII, NOT_ASCII
+  }
+
+  /**
+   * Internal flag to indicate whether the string is full ASCII or not. Initially, the ASCII-ness
+   * is UNKNOWN, and will be set to either FULL_ASCII or NOT_ASCII after the first check.
+   */
+  private volatile IsFullAscii isFullAscii = IsFullAscii.UNKNOWN;
+
   public Object getBaseObject() { return base; }
   public long getBaseOffset() { return offset; }
 
@@ -127,6 +141,7 @@ private enum UTF8StringValidity {
   private static final UTF8String COMMA_UTF8 = UTF8String.fromString(",");
   public static final UTF8String EMPTY_UTF8 = UTF8String.fromString("");
   public static final UTF8String ZERO_UTF8 = UTF8String.fromString("0");
+  public static final UTF8String SPACE_UTF8 = UTF8String.fromString(" ");
 
 
   /**
@@ -788,12 +803,19 @@ public UTF8String toLowerCase() {
   }
 
   public boolean isFullAscii() {
+    if (isFullAscii == IsFullAscii.UNKNOWN) {
+      isFullAscii = getIsFullAscii();
+    }
+    return isFullAscii == IsFullAscii.FULL_ASCII;
+  }
+
+  private IsFullAscii getIsFullAscii() {
     for (var i = 0; i < numBytes; i++) {
       if (getByte(i) < 0) {
-        return false;
+        return IsFullAscii.NOT_ASCII;
       }
     }
-    return true;
+    return IsFullAscii.FULL_ASCII;
   }
 
   private UTF8String toLowerCaseSlow() {
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
index b082ab21944f7..a696da8cf45b8 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/CollationSupportSuite.java
@@ -40,165 +40,224 @@ public class CollationSupportSuite {
     {"UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI"};
 
   /**
-   * Collation-aware UTF8String comparison.
+   * Collation-aware UTF8String comparison and equality check.
    */
 
-  private void assertStringCompare(String s1, String s2, String collationName, int expected)
+  private void assertCompare(String s1, String s2, String collationName, int expected)
       throws SparkException {
     UTF8String l = UTF8String.fromString(s1);
     UTF8String r = UTF8String.fromString(s2);
+    // Test the comparator, which is the most general way to compare strings with collations.
     int compare = CollationFactory.fetchCollation(collationName).comparator.compare(l, r);
     assertEquals(Integer.signum(expected), Integer.signum(compare));
+    // Test the equals function, which may be faster than the comparator for equality checks.
+    boolean equals = CollationFactory.fetchCollation(collationName).equalsFunction.apply(l ,r);
+    assertEquals(expected == 0, equals);
   }
 
   @Test
   public void testCompare() throws SparkException {
     for (String collationName: testSupportedCollations) {
-      // Edge cases
-      assertStringCompare("", "", collationName, 0);
-      assertStringCompare("a", "", collationName, 1);
-      assertStringCompare("", "a", collationName, -1);
-      // Basic tests
-      assertStringCompare("a", "a", collationName, 0);
-      assertStringCompare("a", "b", collationName, -1);
-      assertStringCompare("b", "a", collationName, 1);
-      assertStringCompare("A", "A", collationName, 0);
-      assertStringCompare("A", "B", collationName, -1);
-      assertStringCompare("B", "A", collationName, 1);
-      assertStringCompare("aa", "a", collationName, 1);
-      assertStringCompare("b", "bb", collationName, -1);
-      assertStringCompare("abc", "a", collationName, 1);
-      assertStringCompare("abc", "b", collationName, -1);
-      assertStringCompare("abc", "ab", collationName, 1);
-      assertStringCompare("abc", "abc", collationName, 0);
-      // ASCII strings
-      assertStringCompare("aaaa", "aaa", collationName, 1);
-      assertStringCompare("hello", "world", collationName, -1);
-      assertStringCompare("Spark", "Spark", collationName, 0);
-      // Non-ASCII strings
-      assertStringCompare("ü", "ü", collationName, 0);
-      assertStringCompare("ü", "", collationName, 1);
-      assertStringCompare("", "ü", collationName, -1);
-      assertStringCompare("äü", "äü", collationName, 0);
-      assertStringCompare("äxx", "äx", collationName, 1);
-      assertStringCompare("a", "ä", collationName, -1);
+      // Empty strings.
+      assertCompare("", "", collationName, 0);
+      assertCompare("a", "", collationName, 1);
+      assertCompare("", "a", collationName, -1);
+      // Basic tests.
+      assertCompare("a", "a", collationName, 0);
+      assertCompare("a", "b", collationName, -1);
+      assertCompare("b", "a", collationName, 1);
+      assertCompare("A", "A", collationName, 0);
+      assertCompare("A", "B", collationName, -1);
+      assertCompare("B", "A", collationName, 1);
+      assertCompare("aa", "a", collationName, 1);
+      assertCompare("b", "bb", collationName, -1);
+      assertCompare("abc", "a", collationName, 1);
+      assertCompare("abc", "b", collationName, -1);
+      assertCompare("abc", "ab", collationName, 1);
+      assertCompare("abc", "abc", collationName, 0);
+      assertCompare("aaaa", "aaa", collationName, 1);
+      assertCompare("hello", "world", collationName, -1);
+      assertCompare("Spark", "Spark", collationName, 0);
+      assertCompare("ü", "ü", collationName, 0);
+      assertCompare("ü", "", collationName, 1);
+      assertCompare("", "ü", collationName, -1);
+      assertCompare("äü", "äü", collationName, 0);
+      assertCompare("äxx", "äx", collationName, 1);
+      assertCompare("a", "ä", collationName, -1);
     }
-    // Non-ASCII strings
-    assertStringCompare("äü", "bü", "UTF8_BINARY", 1);
-    assertStringCompare("bxx", "bü", "UTF8_BINARY", -1);
-    assertStringCompare("äü", "bü", "UTF8_LCASE", 1);
-    assertStringCompare("bxx", "bü", "UTF8_LCASE", -1);
-    assertStringCompare("äü", "bü", "UNICODE", -1);
-    assertStringCompare("bxx", "bü", "UNICODE", 1);
-    assertStringCompare("äü", "bü", "UNICODE_CI", -1);
-    assertStringCompare("bxx", "bü", "UNICODE_CI", 1);
-    // Case variation
-    assertStringCompare("AbCd", "aBcD", "UTF8_BINARY", -1);
-    assertStringCompare("ABCD", "abcd", "UTF8_LCASE", 0);
-    assertStringCompare("AbcD", "aBCd", "UNICODE", 1);
-    assertStringCompare("abcd", "ABCD", "UNICODE_CI", 0);
-    // Accent variation
-    assertStringCompare("aBćD", "ABĆD", "UTF8_BINARY", 1);
-    assertStringCompare("AbCδ", "ABCΔ", "UTF8_LCASE", 0);
-    assertStringCompare("äBCd", "ÄBCD", "UNICODE", -1);
-    assertStringCompare("Ab́cD", "AB́CD", "UNICODE_CI", 0);
-    // Case-variable character length
-    assertStringCompare("i\u0307", "İ", "UTF8_BINARY", -1);
-    assertStringCompare("İ", "i\u0307", "UTF8_BINARY", 1);
-    assertStringCompare("i\u0307", "İ", "UTF8_LCASE", 0);
-    assertStringCompare("İ", "i\u0307", "UTF8_LCASE", 0);
-    assertStringCompare("i\u0307", "İ", "UNICODE", -1);
-    assertStringCompare("İ", "i\u0307", "UNICODE", 1);
-    assertStringCompare("i\u0307", "İ", "UNICODE_CI", 0);
-    assertStringCompare("İ", "i\u0307", "UNICODE_CI", 0);
-    assertStringCompare("i\u0307İ", "i\u0307İ", "UTF8_LCASE", 0);
-    assertStringCompare("i\u0307İ", "İi\u0307", "UTF8_LCASE", 0);
-    assertStringCompare("İi\u0307", "i\u0307İ", "UTF8_LCASE", 0);
-    assertStringCompare("İi\u0307", "İi\u0307", "UTF8_LCASE", 0);
-    assertStringCompare("i\u0307İ", "i\u0307İ", "UNICODE_CI", 0);
-    assertStringCompare("i\u0307İ", "İi\u0307", "UNICODE_CI", 0);
-    assertStringCompare("İi\u0307", "i\u0307İ", "UNICODE_CI", 0);
-    assertStringCompare("İi\u0307", "İi\u0307", "UNICODE_CI", 0);
-    // Conditional case mapping
-    assertStringCompare("ς", "σ", "UTF8_BINARY", -1);
-    assertStringCompare("ς", "Σ", "UTF8_BINARY", 1);
-    assertStringCompare("σ", "Σ", "UTF8_BINARY", 1);
-    assertStringCompare("ς", "σ", "UTF8_LCASE", 0);
-    assertStringCompare("ς", "Σ", "UTF8_LCASE", 0);
-    assertStringCompare("σ", "Σ", "UTF8_LCASE", 0);
-    assertStringCompare("ς", "σ", "UNICODE", 1);
-    assertStringCompare("ς", "Σ", "UNICODE", 1);
-    assertStringCompare("σ", "Σ", "UNICODE", -1);
-    assertStringCompare("ς", "σ", "UNICODE_CI", 0);
-    assertStringCompare("ς", "Σ", "UNICODE_CI", 0);
-    assertStringCompare("σ", "Σ", "UNICODE_CI", 0);
+    // Advanced tests.
+    assertCompare("äü", "bü", "UTF8_BINARY", 1);
+    assertCompare("bxx", "bü", "UTF8_BINARY", -1);
+    assertCompare("äü", "bü", "UTF8_LCASE", 1);
+    assertCompare("bxx", "bü", "UTF8_LCASE", -1);
+    assertCompare("äü", "bü", "UNICODE", -1);
+    assertCompare("bxx", "bü", "UNICODE", 1);
+    assertCompare("äü", "bü", "UNICODE_CI", -1);
+    assertCompare("bxx", "bü", "UNICODE_CI", 1);
+    assertCompare("cČć", "ČćC", "SR_CI_AI", 0);
+    // Case variation.
+    assertCompare("AbCd", "aBcD", "UTF8_BINARY", -1);
+    assertCompare("ABCD", "abcd", "UTF8_LCASE", 0);
+    assertCompare("AbcD", "aBCd", "UNICODE", 1);
+    assertCompare("abcd", "ABCD", "UNICODE_CI", 0);
+    // Accent variation.
+    assertCompare("aBćD", "ABĆD", "UTF8_BINARY", 1);
+    assertCompare("AbCδ", "ABCΔ", "UTF8_LCASE", 0);
+    assertCompare("äBCd", "ÄBCD", "UNICODE", -1);
+    assertCompare("Ab́cD", "AB́CD", "UNICODE_CI", 0);
+    assertCompare("ÈÉÊË", "EeEe", "AF_CI_AI", 0);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertCompare("i\u0307", "İ", "UTF8_BINARY", -1);
+    assertCompare("İ", "i\u0307", "UTF8_BINARY", 1);
+    assertCompare("i\u0307", "İ", "UTF8_LCASE", 0);
+    assertCompare("İ", "i\u0307", "UTF8_LCASE", 0);
+    assertCompare("i\u0307", "İ", "UNICODE", -1);
+    assertCompare("İ", "i\u0307", "UNICODE", 1);
+    assertCompare("i\u0307", "İ", "UNICODE_CI", 0);
+    assertCompare("İ", "i\u0307", "UNICODE_CI", 0);
+    assertCompare("i\u0307İ", "i\u0307İ", "UTF8_LCASE", 0);
+    assertCompare("i\u0307İ", "İi\u0307", "UTF8_LCASE", 0);
+    assertCompare("İi\u0307", "i\u0307İ", "UTF8_LCASE", 0);
+    assertCompare("İi\u0307", "İi\u0307", "UTF8_LCASE", 0);
+    assertCompare("i\u0307İ", "i\u0307İ", "UNICODE_CI", 0);
+    assertCompare("i\u0307İ", "İi\u0307", "UNICODE_CI", 0);
+    assertCompare("İi\u0307", "i\u0307İ", "UNICODE_CI", 0);
+    assertCompare("İi\u0307", "İi\u0307", "UNICODE_CI", 0);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertCompare("ς", "σ", "UTF8_BINARY", -1);
+    assertCompare("ς", "Σ", "UTF8_BINARY", 1);
+    assertCompare("σ", "Σ", "UTF8_BINARY", 1);
+    assertCompare("ς", "σ", "UTF8_LCASE", 0);
+    assertCompare("ς", "Σ", "UTF8_LCASE", 0);
+    assertCompare("σ", "Σ", "UTF8_LCASE", 0);
+    assertCompare("ς", "σ", "UNICODE", 1);
+    assertCompare("ς", "Σ", "UNICODE", 1);
+    assertCompare("σ", "Σ", "UNICODE", -1);
+    assertCompare("ς", "σ", "UNICODE_CI", 0);
+    assertCompare("ς", "Σ", "UNICODE_CI", 0);
+    assertCompare("σ", "Σ", "UNICODE_CI", 0);
+    // Surrogate pairs.
+    assertCompare("a🙃b🙃c", "aaaaa", "UTF8_BINARY", 1);
+    assertCompare("a🙃b🙃c", "aaaaa", "UTF8_LCASE", 1);
+    assertCompare("a🙃b🙃c", "aaaaa", "UNICODE", -1); // != UTF8_BINARY
+    assertCompare("a🙃b🙃c", "aaaaa", "UNICODE_CI", -1); // != UTF8_LCASE
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE", 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", 0);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_BINARY", -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UTF8_LCASE", -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE", -1);
+    assertCompare("a🙃b🙃c", "a🙃b🙃d", "UNICODE_CI", -1);
     // Maximum code point.
     int maxCodePoint = Character.MAX_CODE_POINT;
     String maxCodePointStr = new String(Character.toChars(maxCodePoint));
     for (int i = 0; i < maxCodePoint && Character.isValidCodePoint(i); ++i) {
-      assertStringCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_BINARY", -1);
-      assertStringCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_LCASE", -1);
+      assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_BINARY", -1);
+      assertCompare(new String(Character.toChars(i)), maxCodePointStr, "UTF8_LCASE", -1);
     }
     // Minimum code point.
     int minCodePoint = Character.MIN_CODE_POINT;
     String minCodePointStr = new String(Character.toChars(minCodePoint));
     for (int i = minCodePoint + 1; i <= maxCodePoint && Character.isValidCodePoint(i); ++i) {
-      assertStringCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_BINARY", 1);
-      assertStringCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_LCASE", 1);
+      assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_BINARY", 1);
+      assertCompare(new String(Character.toChars(i)), minCodePointStr, "UTF8_LCASE", 1);
     }
   }
 
-  private void assertLowerCaseCodePoints(UTF8String target, UTF8String expected,
-      Boolean useCodePoints) {
+  /**
+   * Collation-aware UTF8String lowercase conversion.
+   */
+
+  private void assertLowerCaseCodePoints(String string, String expected, Boolean useCodePoints) {
+    UTF8String str = UTF8String.fromString(string);
     if (useCodePoints) {
-      assertEquals(expected, CollationAwareUTF8String.lowerCaseCodePoints(target));
+      UTF8String result = CollationAwareUTF8String.lowerCaseCodePoints(str);
+      assertEquals(UTF8String.fromString(expected), result);
     } else {
-      assertEquals(expected, target.toLowerCase());
+      UTF8String result = str.toLowerCase();
+      assertEquals(UTF8String.fromString(expected), result);
     }
   }
 
   @Test
   public void testLowerCaseCodePoints() {
-    // Edge cases
-    assertLowerCaseCodePoints(UTF8String.fromString(""), UTF8String.fromString(""), false);
-    assertLowerCaseCodePoints(UTF8String.fromString(""), UTF8String.fromString(""), true);
-    // Basic tests
-    assertLowerCaseCodePoints(UTF8String.fromString("abcd"), UTF8String.fromString("abcd"), false);
-    assertLowerCaseCodePoints(UTF8String.fromString("AbCd"), UTF8String.fromString("abcd"), false);
-    assertLowerCaseCodePoints(UTF8String.fromString("abcd"), UTF8String.fromString("abcd"), true);
-    assertLowerCaseCodePoints(UTF8String.fromString("aBcD"), UTF8String.fromString("abcd"), true);
-    // Accent variation
-    assertLowerCaseCodePoints(UTF8String.fromString("AbĆd"), UTF8String.fromString("abćd"), false);
-    assertLowerCaseCodePoints(UTF8String.fromString("aBcΔ"), UTF8String.fromString("abcδ"), true);
-    // Case-variable character length
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("İoDiNe"), UTF8String.fromString("i̇odine"), false);
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("Abi̇o12"), UTF8String.fromString("abi̇o12"), false);
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("İodInE"), UTF8String.fromString("i̇odine"), true);
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("aBi̇o12"), UTF8String.fromString("abi̇o12"), true);
-    // Conditional case mapping
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("ΘΑΛΑΣΣΙΝΟΣ"), UTF8String.fromString("θαλασσινος"), false);
-    assertLowerCaseCodePoints(
-      UTF8String.fromString("ΘΑΛΑΣΣΙΝΟΣ"), UTF8String.fromString("θαλασσινοσ"), true);
-    // Surrogate pairs are treated as invalid UTF8 sequences
-    assertLowerCaseCodePoints(UTF8String.fromBytes(new byte[]
-      {(byte) 0xED, (byte) 0xA0, (byte) 0x80, (byte) 0xED, (byte) 0xB0, (byte) 0x80}),
-      UTF8String.fromString("\uFFFD\uFFFD"), false);
-    assertLowerCaseCodePoints(UTF8String.fromBytes(new byte[]
-      {(byte) 0xED, (byte) 0xA0, (byte) 0x80, (byte) 0xED, (byte) 0xB0, (byte) 0x80}),
-      UTF8String.fromString("\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD"), true); // != Java toLowerCase
+    // Empty strings.
+    assertLowerCaseCodePoints("", "", false);
+    assertLowerCaseCodePoints("", "", true);
+    // Basic tests.
+    assertLowerCaseCodePoints("xyz", "xyz", false);
+    assertLowerCaseCodePoints("xyz", "xyz", true);
+    assertLowerCaseCodePoints("abcd", "abcd", false);
+    assertLowerCaseCodePoints("abcd", "abcd", true);
+    // Advanced tests.
+    assertLowerCaseCodePoints("你好", "你好", false);
+    assertLowerCaseCodePoints("你好", "你好", true);
+    assertLowerCaseCodePoints("Γειά", "γειά", false);
+    assertLowerCaseCodePoints("Γειά", "γειά", true);
+    assertLowerCaseCodePoints("Здраво", "здраво", false);
+    assertLowerCaseCodePoints("Здраво", "здраво", true);
+    // Case variation.
+    assertLowerCaseCodePoints("xYz", "xyz", false);
+    assertLowerCaseCodePoints("xYz", "xyz", true);
+    assertLowerCaseCodePoints("AbCd", "abcd", false);
+    assertLowerCaseCodePoints("aBcD", "abcd", true);
+    // Accent variation.
+    assertLowerCaseCodePoints("äbć", "äbć", false);
+    assertLowerCaseCodePoints("äbć", "äbć", true);
+    assertLowerCaseCodePoints("AbĆd", "abćd", false);
+    assertLowerCaseCodePoints("aBcΔ", "abcδ", true);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertLowerCaseCodePoints("i\u0307", "i\u0307", false);
+    assertLowerCaseCodePoints("i\u0307", "i\u0307", true);
+    assertLowerCaseCodePoints("I\u0307", "i\u0307", false);
+    assertLowerCaseCodePoints("I\u0307", "i\u0307", true);
+    assertLowerCaseCodePoints("İ", "i\u0307", false);
+    assertLowerCaseCodePoints("İ", "i\u0307", true);
+    assertLowerCaseCodePoints("İİİ", "i\u0307i\u0307i\u0307", false);
+    assertLowerCaseCodePoints("İİİ", "i\u0307i\u0307i\u0307", true);
+    assertLowerCaseCodePoints("İiIi\u0307", "i\u0307iii\u0307", false);
+    assertLowerCaseCodePoints("İiIi\u0307", "i\u0307iii\u0307", true);
+    assertLowerCaseCodePoints("İoDiNe", "i\u0307odine", false);
+    assertLowerCaseCodePoints("İodInE", "i\u0307odine", true);
+    assertLowerCaseCodePoints("Abi\u0307o12", "abi\u0307o12", false);
+    assertLowerCaseCodePoints("aBi\u0307o12", "abi\u0307o12", true);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertLowerCaseCodePoints("ς", "ς", false);
+    assertLowerCaseCodePoints("ς", "σ", true);
+    assertLowerCaseCodePoints("σ", "σ", false);
+    assertLowerCaseCodePoints("σ", "σ", true);
+    assertLowerCaseCodePoints("Σ", "σ", false);
+    assertLowerCaseCodePoints("Σ", "σ", true);
+    assertLowerCaseCodePoints("ςΑΛΑΤΑ", "ςαλατα", false);
+    assertLowerCaseCodePoints("ςΑΛΑΤΑ", "σαλατα", true);
+    assertLowerCaseCodePoints("σΑΛΑΤΑ", "σαλατα", false);
+    assertLowerCaseCodePoints("σΑΛΑΤΑ", "σαλατα", true);
+    assertLowerCaseCodePoints("ΣΑΛΑΤΑ", "σαλατα", false);
+    assertLowerCaseCodePoints("ΣΑΛΑΤΑ", "σαλατα", true);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟς", "θαλασσινος", false);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟς", "θαλασσινοσ", true);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟσ", "θαλασσινοσ", false);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟσ", "θαλασσινοσ", true);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟΣ", "θαλασσινος", false);
+    assertLowerCaseCodePoints("ΘΑΛΑΣΣΙΝΟΣ", "θαλασσινοσ", true);
+    // Surrogate pairs.
+    assertLowerCaseCodePoints("a🙃b🙃c", "a🙃b🙃c", false);
+    assertLowerCaseCodePoints("a🙃b🙃c", "a🙃b🙃c", true);
+    assertLowerCaseCodePoints("😀😆😃😄😄😆", "😀😆😃😄😄😆", false);
+    assertLowerCaseCodePoints("😀😆😃😄😄😆", "😀😆😃😄😄😆", true);
+    assertLowerCaseCodePoints("𐐅", "𐐭", false);
+    assertLowerCaseCodePoints("𐐅", "𐐭", true);
+    assertLowerCaseCodePoints("𝔸", "𝔸", false);
+    assertLowerCaseCodePoints("𝔸", "𝔸", true);
   }
 
   /**
-   * Collation-aware string expressions.
+   * Verify the behaviour of the `Contains` collation support class.
    */
 
-  private void assertContains(String pattern, String target, String collationName, boolean expected)
-          throws SparkException {
+  private void assertContains(String pattern, String target, String collationName,
+      boolean expected) throws SparkException {
     UTF8String l = UTF8String.fromString(pattern);
     UTF8String r = UTF8String.fromString(target);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -207,20 +266,42 @@ private void assertContains(String pattern, String target, String collationName,
 
   @Test
   public void testContains() throws SparkException {
-    // Edge cases
-    assertContains("", "", "UTF8_BINARY", true);
-    assertContains("c", "", "UTF8_BINARY", true);
-    assertContains("", "c", "UTF8_BINARY", false);
-    assertContains("", "", "UNICODE", true);
-    assertContains("c", "", "UNICODE", true);
-    assertContains("", "c", "UNICODE", false);
-    assertContains("", "", "UTF8_LCASE", true);
-    assertContains("c", "", "UTF8_LCASE", true);
-    assertContains("", "c", "UTF8_LCASE", false);
-    assertContains("", "", "UNICODE_CI", true);
-    assertContains("c", "", "UNICODE_CI", true);
-    assertContains("", "c", "UNICODE_CI", false);
-    // Basic tests
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertContains("", "", collationName, true);
+      assertContains("a", "", collationName, true);
+      assertContains("", "x", collationName, false);
+      // Basic tests.
+      assertContains("a", "a", collationName, true);
+      assertContains("_a_", "_a_", collationName, true);
+      assertContains("_a_", "a", collationName, true);
+      assertContains("%a%", "%a%", collationName, true);
+      assertContains("%a%", "a", collationName, true);
+      assertContains("*a*", "*a*", collationName, true);
+      assertContains("*a*", "a", collationName, true);
+      assertContains("?a?", "?a?", collationName, true);
+      assertContains("?a?", "a", collationName, true);
+      assertContains("/a/", "/a/", collationName, true);
+      assertContains("/a/", "a", collationName, true);
+      assertContains("abcde", "xyz", collationName, false);
+      assertContains("abcde", "bcd", collationName, true);
+      assertContains("abcde", "abc", collationName, true);
+      assertContains("abcde", "cde", collationName, true);
+      assertContains("abcde", "abcde", collationName, true);
+      assertContains("你好", "x", collationName, false);
+      assertContains("你好", "你", collationName, true);
+      assertContains("你好", "好", collationName, true);
+      assertContains("你好", "你好", collationName, true);
+      assertContains("Γειά", "x", collationName, false);
+      assertContains("Γειά", "ειά", collationName, true);
+      assertContains("Γειά", "Γει", collationName, true);
+      assertContains("Γειά", "Γειά", collationName, true);
+      assertContains("Здраво", "x", collationName, false);
+      assertContains("Здраво", "драво", collationName, true);
+      assertContains("Здраво", "Здрав", collationName, true);
+      assertContains("Здраво", "Здраво", collationName, true);
+    }
+    // Advanced tests.
     assertContains("abcde", "bcd", "UTF8_BINARY", true);
     assertContains("abcde", "bde", "UTF8_BINARY", false);
     assertContains("abcde", "fgh", "UTF8_BINARY", false);
@@ -233,25 +314,6 @@ public void testContains() throws SparkException {
     assertContains("abcde", "c", "UNICODE_CI", true);
     assertContains("abcde", "bCD", "UNICODE_CI", true);
     assertContains("abcde", "123", "UNICODE_CI", false);
-    // Case variation
-    assertContains("aBcDe", "bcd", "UTF8_BINARY", false);
-    assertContains("aBcDe", "BcD", "UTF8_BINARY", true);
-    assertContains("aBcDe", "abcde", "UNICODE", false);
-    assertContains("aBcDe", "aBcDe", "UNICODE", true);
-    assertContains("aBcDe", "bcd", "UTF8_LCASE", true);
-    assertContains("aBcDe", "BCD", "UTF8_LCASE", true);
-    assertContains("aBcDe", "abcde", "UNICODE_CI", true);
-    assertContains("aBcDe", "AbCdE", "UNICODE_CI", true);
-    // Accent variation
-    assertContains("aBcDe", "bćd", "UTF8_BINARY", false);
-    assertContains("aBcDe", "BćD", "UTF8_BINARY", false);
-    assertContains("aBcDe", "abćde", "UNICODE", false);
-    assertContains("aBcDe", "aBćDe", "UNICODE", false);
-    assertContains("aBcDe", "bćd", "UTF8_LCASE", false);
-    assertContains("aBcDe", "BĆD", "UTF8_LCASE", false);
-    assertContains("aBcDe", "abćde", "UNICODE_CI", false);
-    assertContains("aBcDe", "AbĆdE", "UNICODE_CI", false);
-    // Variable byte length characters
     assertContains("ab世De", "b世D", "UTF8_BINARY", true);
     assertContains("ab世De", "B世d", "UTF8_BINARY", false);
     assertContains("äbćδe", "bćδ", "UTF8_BINARY", true);
@@ -268,45 +330,181 @@ public void testContains() throws SparkException {
     assertContains("ab世De", "AB世dE", "UNICODE_CI", true);
     assertContains("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertContains("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    // Characters with the same binary lowercase representation
     assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
     assertContains("The Kelvin.", "Kelvin", "UTF8_LCASE", true);
     assertContains("The KKelvin.", "KKelvin", "UTF8_LCASE", true);
     assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
     assertContains("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
     assertContains("The KKelvin.", "KKelvin,", "UTF8_LCASE", false);
-    // Case-variable character length
-    assertContains("i̇", "i", "UNICODE_CI", false);
-    assertContains("i̇", "\u0307", "UNICODE_CI", false);
-    assertContains("i̇", "İ", "UNICODE_CI", true);
+    assertContains("abčćd", "ABCCD", "SR_CI_AI", true);
+    // Case variation.
+    assertContains("aBcDe", "bcd", "UTF8_BINARY", false);
+    assertContains("aBcDe", "BcD", "UTF8_BINARY", true);
+    assertContains("aBcDe", "abcde", "UNICODE", false);
+    assertContains("aBcDe", "aBcDe", "UNICODE", true);
+    assertContains("aBcDe", "bcd", "UTF8_LCASE", true);
+    assertContains("aBcDe", "BCD", "UTF8_LCASE", true);
+    assertContains("aBcDe", "abcde", "UNICODE_CI", true);
+    assertContains("aBcDe", "AbCdE", "UNICODE_CI", true);
+    // Accent variation.
+    assertContains("aBcDe", "bćd", "UTF8_BINARY", false);
+    assertContains("aBcDe", "BćD", "UTF8_BINARY", false);
+    assertContains("aBcDe", "abćde", "UNICODE", false);
+    assertContains("aBcDe", "aBćDe", "UNICODE", false);
+    assertContains("aBcDe", "bćd", "UTF8_LCASE", false);
+    assertContains("aBcDe", "BĆD", "UTF8_LCASE", false);
+    assertContains("aBcDe", "abćde", "UNICODE_CI", false);
+    assertContains("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    assertContains("abEEE", "Bèêë", "AF_CI_AI", true);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertContains("i\u0307", "i", "UNICODE_CI", false);
+    assertContains("i\u0307", "\u0307", "UNICODE_CI", false);
+    assertContains("i\u0307", "İ", "UNICODE_CI", true);
     assertContains("İ", "i", "UNICODE_CI", false);
     assertContains("adi̇os", "io", "UNICODE_CI", false);
     assertContains("adi̇os", "Io", "UNICODE_CI", false);
-    assertContains("adi̇os", "i̇o", "UNICODE_CI", true);
+    assertContains("adi̇os", "i\u0307o", "UNICODE_CI", true);
     assertContains("adi̇os", "İo", "UNICODE_CI", true);
     assertContains("adİos", "io", "UNICODE_CI", false);
     assertContains("adİos", "Io", "UNICODE_CI", false);
-    assertContains("adİos", "i̇o", "UNICODE_CI", true);
+    assertContains("adİos", "i\u0307o", "UNICODE_CI", true);
     assertContains("adİos", "İo", "UNICODE_CI", true);
-    assertContains("i̇", "i", "UTF8_LCASE", true); // != UNICODE_CI
+    assertContains("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI
     assertContains("İ", "\u0307", "UTF8_LCASE", false);
     assertContains("İ", "i", "UTF8_LCASE", false);
-    assertContains("i̇", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
-    assertContains("i̇", "İ", "UTF8_LCASE", true);
+    assertContains("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
+    assertContains("i\u0307", "İ", "UTF8_LCASE", true);
     assertContains("İ", "i", "UTF8_LCASE", false);
     assertContains("adi̇os", "io", "UTF8_LCASE", false);
     assertContains("adi̇os", "Io", "UTF8_LCASE", false);
-    assertContains("adi̇os", "i̇o", "UTF8_LCASE", true);
+    assertContains("adi̇os", "i\u0307o", "UTF8_LCASE", true);
     assertContains("adi̇os", "İo", "UTF8_LCASE", true);
     assertContains("adİos", "io", "UTF8_LCASE", false);
     assertContains("adİos", "Io", "UTF8_LCASE", false);
-    assertContains("adİos", "i̇o", "UTF8_LCASE", true);
+    assertContains("adİos", "i\u0307o", "UTF8_LCASE", true);
     assertContains("adİos", "İo", "UTF8_LCASE", true);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertContains("σ", "σ", "UTF8_BINARY", true);
+    assertContains("σ", "ς", "UTF8_BINARY", false);
+    assertContains("σ", "Σ", "UTF8_BINARY", false);
+    assertContains("ς", "σ", "UTF8_BINARY", false);
+    assertContains("ς", "ς", "UTF8_BINARY", true);
+    assertContains("ς", "Σ", "UTF8_BINARY", false);
+    assertContains("Σ", "σ", "UTF8_BINARY", false);
+    assertContains("Σ", "ς", "UTF8_BINARY", false);
+    assertContains("Σ", "Σ", "UTF8_BINARY", true);
+    assertContains("σ", "σ", "UTF8_LCASE", true);
+    assertContains("σ", "ς", "UTF8_LCASE", true);
+    assertContains("σ", "Σ", "UTF8_LCASE", true);
+    assertContains("ς", "σ", "UTF8_LCASE", true);
+    assertContains("ς", "ς", "UTF8_LCASE", true);
+    assertContains("ς", "Σ", "UTF8_LCASE", true);
+    assertContains("Σ", "σ", "UTF8_LCASE", true);
+    assertContains("Σ", "ς", "UTF8_LCASE", true);
+    assertContains("Σ", "Σ", "UTF8_LCASE", true);
+    assertContains("σ", "σ", "UNICODE", true);
+    assertContains("σ", "ς", "UNICODE", false);
+    assertContains("σ", "Σ", "UNICODE", false);
+    assertContains("ς", "σ", "UNICODE", false);
+    assertContains("ς", "ς", "UNICODE", true);
+    assertContains("ς", "Σ", "UNICODE", false);
+    assertContains("Σ", "σ", "UNICODE", false);
+    assertContains("Σ", "ς", "UNICODE", false);
+    assertContains("Σ", "Σ", "UNICODE", true);
+    assertContains("σ", "σ", "UNICODE_CI", true);
+    assertContains("σ", "ς", "UNICODE_CI", true);
+    assertContains("σ", "Σ", "UNICODE_CI", true);
+    assertContains("ς", "σ", "UNICODE_CI", true);
+    assertContains("ς", "ς", "UNICODE_CI", true);
+    assertContains("ς", "Σ", "UNICODE_CI", true);
+    assertContains("Σ", "σ", "UNICODE_CI", true);
+    assertContains("Σ", "ς", "UNICODE_CI", true);
+    assertContains("Σ", "Σ", "UNICODE_CI", true);
+    assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true);
+    assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
+    assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
+    assertContains("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true);
+    assertContains("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true);
+    assertContains("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true);
+    assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE", true);
+    assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
+    assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
+    assertContains("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true);
+    assertContains("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true);
+    assertContains("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true);
+    assertContains("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true);
+    // Surrogate pairs.
+    assertContains("a🙃b🙃c", "x", "UTF8_BINARY", false);
+    assertContains("a🙃b🙃c", "x", "UTF8_LCASE", false);
+    assertContains("a🙃b🙃c", "x", "UNICODE", false);
+    assertContains("a🙃b🙃c", "x", "UNICODE_CI", false);
+    assertContains("a🙃b🙃c", "b", "UTF8_BINARY", true);
+    assertContains("a🙃b🙃c", "b", "UTF8_LCASE", true);
+    assertContains("a🙃b🙃c", "b", "UNICODE", true);
+    assertContains("a🙃b🙃c", "b", "UNICODE_CI", true);
+    assertContains("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true);
+    assertContains("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true);
+    assertContains("a🙃b🙃c", "a🙃b", "UNICODE", true);
+    assertContains("a🙃b🙃c", "a🙃b", "UNICODE_CI", true);
+    assertContains("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true);
+    assertContains("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true);
+    assertContains("a🙃b🙃c", "b🙃c", "UNICODE", true);
+    assertContains("a🙃b🙃c", "b🙃c", "UNICODE_CI", true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
+    assertContains("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
+    assertContains("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
+    assertContains("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
+    assertContains("😀😆😃😄", "😄😆", "UNICODE", false);
+    assertContains("😀😆😃😄", "😄😆", "UNICODE_CI", false);
+    assertContains("😀😆😃😄", "😆😃", "UTF8_BINARY", true);
+    assertContains("😀😆😃😄", "😆😃", "UTF8_LCASE", true);
+    assertContains("😀😆😃😄", "😆😃", "UNICODE", true);
+    assertContains("😀😆😃😄", "😆😃", "UNICODE_CI", true);
+    assertContains("😀😆😃😄", "😀😆", "UTF8_BINARY", true);
+    assertContains("😀😆😃😄", "😀😆", "UTF8_LCASE", true);
+    assertContains("😀😆😃😄", "😀😆", "UNICODE", true);
+    assertContains("😀😆😃😄", "😀😆", "UNICODE_CI", true);
+    assertContains("😀😆😃😄", "😃😄", "UTF8_BINARY", true);
+    assertContains("😀😆😃😄", "😃😄", "UTF8_LCASE", true);
+    assertContains("😀😆😃😄", "😃😄", "UNICODE", true);
+    assertContains("😀😆😃😄", "😃😄", "UNICODE_CI", true);
+    assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
+    assertContains("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
+    assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
+    assertContains("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
+    assertContains("𐐅", "𐐅", "UTF8_BINARY", true);
+    assertContains("𐐅", "𐐅", "UTF8_LCASE", true);
+    assertContains("𐐅", "𐐅", "UNICODE", true);
+    assertContains("𐐅", "𐐅", "UNICODE_CI", true);
+    assertContains("𐐅", "𐐭", "UTF8_BINARY", false);
+    assertContains("𐐅", "𐐭", "UTF8_LCASE", true);
+    assertContains("𐐅", "𐐭", "UNICODE", false);
+    assertContains("𐐅", "𐐭", "UNICODE_CI", true);
+    assertContains("𝔸", "𝔸", "UTF8_BINARY", true);
+    assertContains("𝔸", "𝔸", "UTF8_LCASE", true);
+    assertContains("𝔸", "𝔸", "UNICODE", true);
+    assertContains("𝔸", "𝔸", "UNICODE_CI", true);
   }
 
-  private void assertStartsWith(
-          String pattern, String prefix, String collationName, boolean expected)
-          throws SparkException {
+  /**
+   * Verify the behaviour of the `StartsWith` collation support class.
+   */
+
+  private void assertStartsWith(String pattern, String prefix, String collationName,
+      boolean expected) throws SparkException {
     UTF8String l = UTF8String.fromString(pattern);
     UTF8String r = UTF8String.fromString(prefix);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -315,20 +513,42 @@ private void assertStartsWith(
 
   @Test
   public void testStartsWith() throws SparkException {
-    // Edge cases
-    assertStartsWith("", "", "UTF8_BINARY", true);
-    assertStartsWith("c", "", "UTF8_BINARY", true);
-    assertStartsWith("", "c", "UTF8_BINARY", false);
-    assertStartsWith("", "", "UNICODE", true);
-    assertStartsWith("c", "", "UNICODE", true);
-    assertStartsWith("", "c", "UNICODE", false);
-    assertStartsWith("", "", "UTF8_LCASE", true);
-    assertStartsWith("c", "", "UTF8_LCASE", true);
-    assertStartsWith("", "c", "UTF8_LCASE", false);
-    assertStartsWith("", "", "UNICODE_CI", true);
-    assertStartsWith("c", "", "UNICODE_CI", true);
-    assertStartsWith("", "c", "UNICODE_CI", false);
-    // Basic tests
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertStartsWith("", "", collationName, true);
+      assertStartsWith("a", "", collationName, true);
+      assertStartsWith("", "x", collationName, false);
+      // Basic tests.
+      assertStartsWith("a", "a", collationName, true);
+      assertStartsWith("_a_", "_a", collationName, true);
+      assertStartsWith("_a_", "a", collationName, false);
+      assertStartsWith("%a%", "%a", collationName, true);
+      assertStartsWith("%a%", "a", collationName, false);
+      assertStartsWith("*a*", "*a", collationName, true);
+      assertStartsWith("*a*", "a", collationName, false);
+      assertStartsWith("?a?", "?a", collationName, true);
+      assertStartsWith("?a?", "a", collationName, false);
+      assertStartsWith("/a/", "/a", collationName, true);
+      assertStartsWith("/a/", "a", collationName, false);
+      assertStartsWith("abcde", "xyz", collationName, false);
+      assertStartsWith("abcde", "bcd", collationName, false);
+      assertStartsWith("abcde", "abc", collationName, true);
+      assertStartsWith("abcde", "cde", collationName, false);
+      assertStartsWith("abcde", "abcde", collationName, true);
+      assertStartsWith("你好", "x", collationName, false);
+      assertStartsWith("你好", "你", collationName, true);
+      assertStartsWith("你好", "好", collationName, false);
+      assertStartsWith("你好", "你好", collationName, true);
+      assertStartsWith("Γειά", "x", collationName, false);
+      assertStartsWith("Γειά", "ειά", collationName, false);
+      assertStartsWith("Γειά", "Γει", collationName, true);
+      assertStartsWith("Γειά", "Γειά", collationName, true);
+      assertStartsWith("Здраво", "x", collationName, false);
+      assertStartsWith("Здраво", "драво", collationName, false);
+      assertStartsWith("Здраво", "Здрав", collationName, true);
+      assertStartsWith("Здраво", "Здраво", collationName, true);
+    }
+    // Advanced tests.
     assertStartsWith("abcde", "abc", "UTF8_BINARY", true);
     assertStartsWith("abcde", "abd", "UTF8_BINARY", false);
     assertStartsWith("abcde", "fgh", "UTF8_BINARY", false);
@@ -342,25 +562,6 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("abcde", "aBC", "UNICODE_CI", true);
     assertStartsWith("abcde", "bcd", "UNICODE_CI", false);
     assertStartsWith("abcde", "123", "UNICODE_CI", false);
-    // Case variation
-    assertStartsWith("aBcDe", "abc", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "aBc", "UTF8_BINARY", true);
-    assertStartsWith("aBcDe", "abcde", "UNICODE", false);
-    assertStartsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertStartsWith("aBcDe", "abc", "UTF8_LCASE", true);
-    assertStartsWith("aBcDe", "ABC", "UTF8_LCASE", true);
-    assertStartsWith("aBcDe", "abcde", "UNICODE_CI", true);
-    assertStartsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
-    // Accent variation
-    assertStartsWith("aBcDe", "abć", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "aBć", "UTF8_BINARY", false);
-    assertStartsWith("aBcDe", "abćde", "UNICODE", false);
-    assertStartsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertStartsWith("aBcDe", "abć", "UTF8_LCASE", false);
-    assertStartsWith("aBcDe", "ABĆ", "UTF8_LCASE", false);
-    assertStartsWith("aBcDe", "abćde", "UNICODE_CI", false);
-    assertStartsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
-    // Variable byte length characters
     assertStartsWith("ab世De", "ab世", "UTF8_BINARY", true);
     assertStartsWith("ab世De", "aB世", "UTF8_BINARY", false);
     assertStartsWith("äbćδe", "äbć", "UTF8_BINARY", true);
@@ -377,16 +578,38 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("ab世De", "AB世dE", "UNICODE_CI", true);
     assertStartsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertStartsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    // Characters with the same binary lowercase representation
     assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
     assertStartsWith("Kelvin.", "Kelvin", "UTF8_LCASE", true);
     assertStartsWith("KKelvin.", "KKelvin", "UTF8_LCASE", true);
     assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
     assertStartsWith("2 Kelvin.", "2 Kelvin", "UTF8_LCASE", true);
     assertStartsWith("KKelvin.", "KKelvin,", "UTF8_LCASE", false);
-    // Case-variable character length
-    assertStartsWith("i̇", "i", "UNICODE_CI", false);
-    assertStartsWith("i̇", "İ", "UNICODE_CI", true);
+    assertStartsWith("Ћао", "Ца", "sr_Cyrl_CI_AI", false);
+    assertStartsWith("Ћао", "ћа", "sr_Cyrl_CI_AI", true);
+    assertStartsWith("Ćao", "Ca", "SR_CI", false);
+    assertStartsWith("Ćao", "Ca", "SR_CI_AI", true);
+    assertStartsWith("Ćao", "Ća", "SR", true);
+    // Case variation.
+    assertStartsWith("aBcDe", "abc", "UTF8_BINARY", false);
+    assertStartsWith("aBcDe", "aBc", "UTF8_BINARY", true);
+    assertStartsWith("aBcDe", "abcde", "UNICODE", false);
+    assertStartsWith("aBcDe", "aBcDe", "UNICODE", true);
+    assertStartsWith("aBcDe", "abc", "UTF8_LCASE", true);
+    assertStartsWith("aBcDe", "ABC", "UTF8_LCASE", true);
+    assertStartsWith("aBcDe", "abcde", "UNICODE_CI", true);
+    assertStartsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
+    // Accent variation.
+    assertStartsWith("aBcDe", "abć", "UTF8_BINARY", false);
+    assertStartsWith("aBcDe", "aBć", "UTF8_BINARY", false);
+    assertStartsWith("aBcDe", "abćde", "UNICODE", false);
+    assertStartsWith("aBcDe", "aBćDe", "UNICODE", false);
+    assertStartsWith("aBcDe", "abć", "UTF8_LCASE", false);
+    assertStartsWith("aBcDe", "ABĆ", "UTF8_LCASE", false);
+    assertStartsWith("aBcDe", "abćde", "UNICODE_CI", false);
+    assertStartsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStartsWith("i\u0307", "i", "UNICODE_CI", false);
+    assertStartsWith("i\u0307", "İ", "UNICODE_CI", true);
     assertStartsWith("İ", "i", "UNICODE_CI", false);
     assertStartsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
     assertStartsWith("İİİ", "i̇i", "UNICODE_CI", false);
@@ -394,14 +617,14 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("i̇İi̇i̇", "İi̇İi", "UNICODE_CI", false);
     assertStartsWith("i̇onic", "io", "UNICODE_CI", false);
     assertStartsWith("i̇onic", "Io", "UNICODE_CI", false);
-    assertStartsWith("i̇onic", "i̇o", "UNICODE_CI", true);
+    assertStartsWith("i̇onic", "i\u0307o", "UNICODE_CI", true);
     assertStartsWith("i̇onic", "İo", "UNICODE_CI", true);
     assertStartsWith("İonic", "io", "UNICODE_CI", false);
     assertStartsWith("İonic", "Io", "UNICODE_CI", false);
-    assertStartsWith("İonic", "i̇o", "UNICODE_CI", true);
+    assertStartsWith("İonic", "i\u0307o", "UNICODE_CI", true);
     assertStartsWith("İonic", "İo", "UNICODE_CI", true);
-    assertStartsWith("i̇", "i", "UTF8_LCASE", true); // != UNICODE_CI
-    assertStartsWith("i̇", "İ", "UTF8_LCASE", true);
+    assertStartsWith("i\u0307", "i", "UTF8_LCASE", true); // != UNICODE_CI
+    assertStartsWith("i\u0307", "İ", "UTF8_LCASE", true);
     assertStartsWith("İ", "i", "UTF8_LCASE", false);
     assertStartsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
     assertStartsWith("İİİ", "i̇i", "UTF8_LCASE", false);
@@ -409,16 +632,136 @@ public void testStartsWith() throws SparkException {
     assertStartsWith("i̇İi̇i̇", "İi̇İi", "UTF8_LCASE", true); // != UNICODE_CI
     assertStartsWith("i̇onic", "io", "UTF8_LCASE", false);
     assertStartsWith("i̇onic", "Io", "UTF8_LCASE", false);
-    assertStartsWith("i̇onic", "i̇o", "UTF8_LCASE", true);
+    assertStartsWith("i̇onic", "i\u0307o", "UTF8_LCASE", true);
     assertStartsWith("i̇onic", "İo", "UTF8_LCASE", true);
     assertStartsWith("İonic", "io", "UTF8_LCASE", false);
     assertStartsWith("İonic", "Io", "UTF8_LCASE", false);
-    assertStartsWith("İonic", "i̇o", "UTF8_LCASE", true);
+    assertStartsWith("İonic", "i\u0307o", "UTF8_LCASE", true);
     assertStartsWith("İonic", "İo", "UTF8_LCASE", true);
+    assertStartsWith("oİ", "oİ", "UTF8_LCASE", true);
+    assertStartsWith("oİ", "oi̇", "UTF8_LCASE", true);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStartsWith("σ", "σ", "UTF8_BINARY", true);
+    assertStartsWith("σ", "ς", "UTF8_BINARY", false);
+    assertStartsWith("σ", "Σ", "UTF8_BINARY", false);
+    assertStartsWith("ς", "σ", "UTF8_BINARY", false);
+    assertStartsWith("ς", "ς", "UTF8_BINARY", true);
+    assertStartsWith("ς", "Σ", "UTF8_BINARY", false);
+    assertStartsWith("Σ", "σ", "UTF8_BINARY", false);
+    assertStartsWith("Σ", "ς", "UTF8_BINARY", false);
+    assertStartsWith("Σ", "Σ", "UTF8_BINARY", true);
+    assertStartsWith("σ", "σ", "UTF8_LCASE", true);
+    assertStartsWith("σ", "ς", "UTF8_LCASE", true);
+    assertStartsWith("σ", "Σ", "UTF8_LCASE", true);
+    assertStartsWith("ς", "σ", "UTF8_LCASE", true);
+    assertStartsWith("ς", "ς", "UTF8_LCASE", true);
+    assertStartsWith("ς", "Σ", "UTF8_LCASE", true);
+    assertStartsWith("Σ", "σ", "UTF8_LCASE", true);
+    assertStartsWith("Σ", "ς", "UTF8_LCASE", true);
+    assertStartsWith("Σ", "Σ", "UTF8_LCASE", true);
+    assertStartsWith("σ", "σ", "UNICODE", true);
+    assertStartsWith("σ", "ς", "UNICODE", false);
+    assertStartsWith("σ", "Σ", "UNICODE", false);
+    assertStartsWith("ς", "σ", "UNICODE", false);
+    assertStartsWith("ς", "ς", "UNICODE", true);
+    assertStartsWith("ς", "Σ", "UNICODE", false);
+    assertStartsWith("Σ", "σ", "UNICODE", false);
+    assertStartsWith("Σ", "ς", "UNICODE", false);
+    assertStartsWith("Σ", "Σ", "UNICODE", true);
+    assertStartsWith("σ", "σ", "UNICODE_CI", true);
+    assertStartsWith("σ", "ς", "UNICODE_CI", true);
+    assertStartsWith("σ", "Σ", "UNICODE_CI", true);
+    assertStartsWith("ς", "σ", "UNICODE_CI", true);
+    assertStartsWith("ς", "ς", "UNICODE_CI", true);
+    assertStartsWith("ς", "Σ", "UNICODE_CI", true);
+    assertStartsWith("Σ", "σ", "UNICODE_CI", true);
+    assertStartsWith("Σ", "ς", "UNICODE_CI", true);
+    assertStartsWith("Σ", "Σ", "UNICODE_CI", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", true);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
+    assertStartsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", true);
+    assertStartsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", true);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", false);
+    assertStartsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", false);
+    // Surrogate pairs.
+    assertStartsWith("a🙃b🙃c", "x", "UTF8_BINARY", false);
+    assertStartsWith("a🙃b🙃c", "x", "UTF8_LCASE", false);
+    assertStartsWith("a🙃b🙃c", "x", "UNICODE", false);
+    assertStartsWith("a🙃b🙃c", "x", "UNICODE_CI", false);
+    assertStartsWith("a🙃b🙃c", "b", "UTF8_BINARY", false);
+    assertStartsWith("a🙃b🙃c", "b", "UTF8_LCASE", false);
+    assertStartsWith("a🙃b🙃c", "b", "UNICODE", false);
+    assertStartsWith("a🙃b🙃c", "b", "UNICODE_CI", false);
+    assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", true);
+    assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE", false);
+    assertStartsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", false);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
+    assertStartsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
+    assertStartsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
+    assertStartsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
+    assertStartsWith("😀😆😃😄", "😄😆", "UNICODE", false);
+    assertStartsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false);
+    assertStartsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false);
+    assertStartsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false);
+    assertStartsWith("😀😆😃😄", "😆😃", "UNICODE", false);
+    assertStartsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false);
+    assertStartsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", true);
+    assertStartsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", true);
+    assertStartsWith("😀😆😃😄", "😀😆", "UNICODE", true);
+    assertStartsWith("😀😆😃😄", "😀😆", "UNICODE_CI", true);
+    assertStartsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", false);
+    assertStartsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", false);
+    assertStartsWith("😀😆😃😄", "😃😄", "UNICODE", false);
+    assertStartsWith("😀😆😃😄", "😃😄", "UNICODE_CI", false);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
+    assertStartsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
+    assertStartsWith("𐐅", "𐐅", "UTF8_BINARY", true);
+    assertStartsWith("𐐅", "𐐅", "UTF8_LCASE", true);
+    assertStartsWith("𐐅", "𐐅", "UNICODE", true);
+    assertStartsWith("𐐅", "𐐅", "UNICODE_CI", true);
+    assertStartsWith("𐐅", "𐐭", "UTF8_BINARY", false);
+    assertStartsWith("𐐅", "𐐭", "UTF8_LCASE", true);
+    assertStartsWith("𐐅", "𐐭", "UNICODE", false);
+    assertStartsWith("𐐅", "𐐭", "UNICODE_CI", true);
+    assertStartsWith("𝔸", "𝔸", "UTF8_BINARY", true);
+    assertStartsWith("𝔸", "𝔸", "UTF8_LCASE", true);
+    assertStartsWith("𝔸", "𝔸", "UNICODE", true);
+    assertStartsWith("𝔸", "𝔸", "UNICODE_CI", true);
   }
 
-  private void assertEndsWith(String pattern, String suffix, String collationName, boolean expected)
-          throws SparkException {
+  /**
+   * Verify the behaviour of the `EndsWith` collation support class.
+   */
+
+  private void assertEndsWith(String pattern, String suffix, String collationName,
+      boolean expected) throws SparkException {
     UTF8String l = UTF8String.fromString(pattern);
     UTF8String r = UTF8String.fromString(suffix);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -427,20 +770,42 @@ private void assertEndsWith(String pattern, String suffix, String collationName,
 
   @Test
   public void testEndsWith() throws SparkException {
-    // Edge cases
-    assertEndsWith("", "", "UTF8_BINARY", true);
-    assertEndsWith("c", "", "UTF8_BINARY", true);
-    assertEndsWith("", "c", "UTF8_BINARY", false);
-    assertEndsWith("", "", "UNICODE", true);
-    assertEndsWith("c", "", "UNICODE", true);
-    assertEndsWith("", "c", "UNICODE", false);
-    assertEndsWith("", "", "UTF8_LCASE", true);
-    assertEndsWith("c", "", "UTF8_LCASE", true);
-    assertEndsWith("", "c", "UTF8_LCASE", false);
-    assertEndsWith("", "", "UNICODE_CI", true);
-    assertEndsWith("c", "", "UNICODE_CI", true);
-    assertEndsWith("", "c", "UNICODE_CI", false);
-    // Basic tests
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertEndsWith("", "", collationName, true);
+      assertEndsWith("a", "", collationName, true);
+      assertEndsWith("", "x", collationName, false);
+      // Basic tests.
+      assertEndsWith("a", "a", collationName, true);
+      assertEndsWith("_a_", "a_", collationName, true);
+      assertEndsWith("_a_", "a", collationName, false);
+      assertEndsWith("%a%", "a%", collationName, true);
+      assertEndsWith("%a%", "a", collationName, false);
+      assertEndsWith("*a*", "a*", collationName, true);
+      assertEndsWith("*a*", "a", collationName, false);
+      assertEndsWith("?a?", "a?", collationName, true);
+      assertEndsWith("?a?", "a", collationName, false);
+      assertEndsWith("/a/", "a/", collationName, true);
+      assertEndsWith("/a/", "a", collationName, false);
+      assertEndsWith("abcde", "xyz", collationName, false);
+      assertEndsWith("abcde", "bcd", collationName, false);
+      assertEndsWith("abcde", "abc", collationName, false);
+      assertEndsWith("abcde", "cde", collationName, true);
+      assertEndsWith("abcde", "abcde", collationName, true);
+      assertEndsWith("你好", "x", collationName, false);
+      assertEndsWith("你好", "你", collationName, false);
+      assertEndsWith("你好", "好", collationName, true);
+      assertEndsWith("你好", "你好", collationName, true);
+      assertEndsWith("Γειά", "x", collationName, false);
+      assertEndsWith("Γειά", "ειά", collationName, true);
+      assertEndsWith("Γειά", "Γει", collationName, false);
+      assertEndsWith("Γειά", "Γειά", collationName, true);
+      assertEndsWith("Здраво", "x", collationName, false);
+      assertEndsWith("Здраво", "драво", collationName, true);
+      assertEndsWith("Здраво", "Здрав", collationName, false);
+      assertEndsWith("Здраво", "Здраво", collationName, true);
+    }
+    // Advanced tests.
     assertEndsWith("abcde", "cde", "UTF8_BINARY", true);
     assertEndsWith("abcde", "bde", "UTF8_BINARY", false);
     assertEndsWith("abcde", "fgh", "UTF8_BINARY", false);
@@ -454,25 +819,6 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("abcde", "CDe", "UNICODE_CI", true);
     assertEndsWith("abcde", "bcd", "UNICODE_CI", false);
     assertEndsWith("abcde", "123", "UNICODE_CI", false);
-    // Case variation
-    assertEndsWith("aBcDe", "cde", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "cDe", "UTF8_BINARY", true);
-    assertEndsWith("aBcDe", "abcde", "UNICODE", false);
-    assertEndsWith("aBcDe", "aBcDe", "UNICODE", true);
-    assertEndsWith("aBcDe", "cde", "UTF8_LCASE", true);
-    assertEndsWith("aBcDe", "CDE", "UTF8_LCASE", true);
-    assertEndsWith("aBcDe", "abcde", "UNICODE_CI", true);
-    assertEndsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
-    // Accent variation
-    assertEndsWith("aBcDe", "ćde", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "ćDe", "UTF8_BINARY", false);
-    assertEndsWith("aBcDe", "abćde", "UNICODE", false);
-    assertEndsWith("aBcDe", "aBćDe", "UNICODE", false);
-    assertEndsWith("aBcDe", "ćde", "UTF8_LCASE", false);
-    assertEndsWith("aBcDe", "ĆDE", "UTF8_LCASE", false);
-    assertEndsWith("aBcDe", "abćde", "UNICODE_CI", false);
-    assertEndsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
-    // Variable byte length characters
     assertEndsWith("ab世De", "世De", "UTF8_BINARY", true);
     assertEndsWith("ab世De", "世dE", "UTF8_BINARY", false);
     assertEndsWith("äbćδe", "ćδe", "UTF8_BINARY", true);
@@ -489,53 +835,196 @@ public void testEndsWith() throws SparkException {
     assertEndsWith("ab世De", "AB世dE", "UNICODE_CI", true);
     assertEndsWith("äbćδe", "ÄbćδE", "UNICODE_CI", true);
     assertEndsWith("äbćδe", "ÄBcΔÉ", "UNICODE_CI", false);
-    // Characters with the same binary lowercase representation
     assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
     assertEndsWith("The Kelvin", "Kelvin", "UTF8_LCASE", true);
     assertEndsWith("The KKelvin", "KKelvin", "UTF8_LCASE", true);
     assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
     assertEndsWith("The 2 Kelvin", "2 Kelvin", "UTF8_LCASE", true);
     assertEndsWith("The KKelvin", "KKelvin,", "UTF8_LCASE", false);
-    // Case-variable character length
-    assertEndsWith("i̇", "\u0307", "UNICODE_CI", false);
-    assertEndsWith("i̇", "İ", "UNICODE_CI", true);
+    assertEndsWith("Ћевапчићи", "цици", "sr_Cyrl_CI_AI", false);
+    assertEndsWith("Ћевапчићи", "чИЋи", "sr_Cyrl_CI_AI", true);
+    assertEndsWith("Ćevapčići", "cici", "SR_CI", false);
+    assertEndsWith("Ćevapčići", "cici", "SR_CI_AI", true);
+    assertEndsWith("Ćevapčići", "čići", "SR", true);
+    // Case variation.
+    assertEndsWith("aBcDe", "cde", "UTF8_BINARY", false);
+    assertEndsWith("aBcDe", "cDe", "UTF8_BINARY", true);
+    assertEndsWith("aBcDe", "abcde", "UNICODE", false);
+    assertEndsWith("aBcDe", "aBcDe", "UNICODE", true);
+    assertEndsWith("aBcDe", "cde", "UTF8_LCASE", true);
+    assertEndsWith("aBcDe", "CDE", "UTF8_LCASE", true);
+    assertEndsWith("aBcDe", "abcde", "UNICODE_CI", true);
+    assertEndsWith("aBcDe", "AbCdE", "UNICODE_CI", true);
+    // Accent variation.
+    assertEndsWith("aBcDe", "ćde", "UTF8_BINARY", false);
+    assertEndsWith("aBcDe", "ćDe", "UTF8_BINARY", false);
+    assertEndsWith("aBcDe", "abćde", "UNICODE", false);
+    assertEndsWith("aBcDe", "aBćDe", "UNICODE", false);
+    assertEndsWith("aBcDe", "ćde", "UTF8_LCASE", false);
+    assertEndsWith("aBcDe", "ĆDE", "UTF8_LCASE", false);
+    assertEndsWith("aBcDe", "abćde", "UNICODE_CI", false);
+    assertEndsWith("aBcDe", "AbĆdE", "UNICODE_CI", false);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertEndsWith("i\u0307", "\u0307", "UNICODE_CI", false);
+    assertEndsWith("i\u0307", "İ", "UNICODE_CI", true);
     assertEndsWith("İ", "i", "UNICODE_CI", false);
     assertEndsWith("İİİ", "i̇i̇", "UNICODE_CI", true);
     assertEndsWith("İİİ", "ii̇", "UNICODE_CI", false);
     assertEndsWith("İi̇İ", "İi̇", "UNICODE_CI", true);
     assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UNICODE_CI", false);
-    assertEndsWith("the i̇o", "io", "UNICODE_CI", false);
-    assertEndsWith("the i̇o", "Io", "UNICODE_CI", false);
-    assertEndsWith("the i̇o", "i̇o", "UNICODE_CI", true);
-    assertEndsWith("the i̇o", "İo", "UNICODE_CI", true);
+    assertEndsWith("the i\u0307o", "io", "UNICODE_CI", false);
+    assertEndsWith("the i\u0307o", "Io", "UNICODE_CI", false);
+    assertEndsWith("the i\u0307o", "i\u0307o", "UNICODE_CI", true);
+    assertEndsWith("the i\u0307o", "İo", "UNICODE_CI", true);
     assertEndsWith("the İo", "io", "UNICODE_CI", false);
     assertEndsWith("the İo", "Io", "UNICODE_CI", false);
-    assertEndsWith("the İo", "i̇o", "UNICODE_CI", true);
+    assertEndsWith("the İo", "i\u0307o", "UNICODE_CI", true);
     assertEndsWith("the İo", "İo", "UNICODE_CI", true);
-    assertEndsWith("i̇", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
-    assertEndsWith("i̇", "İ", "UTF8_LCASE", true);
+    assertEndsWith("i\u0307", "\u0307", "UTF8_LCASE", true); // != UNICODE_CI
+    assertEndsWith("i\u0307", "İ", "UTF8_LCASE", true);
     assertEndsWith("İ", "\u0307", "UTF8_LCASE", false);
     assertEndsWith("İİİ", "i̇i̇", "UTF8_LCASE", true);
     assertEndsWith("İİİ", "ii̇", "UTF8_LCASE", false);
     assertEndsWith("İi̇İ", "İi̇", "UTF8_LCASE", true);
     assertEndsWith("i̇İi̇i̇", "\u0307İi̇İ", "UTF8_LCASE", true); // != UNICODE_CI
     assertEndsWith("i̇İi̇i̇", "\u0307İİ", "UTF8_LCASE", false);
-    assertEndsWith("the i̇o", "io", "UTF8_LCASE", false);
-    assertEndsWith("the i̇o", "Io", "UTF8_LCASE", false);
-    assertEndsWith("the i̇o", "i̇o", "UTF8_LCASE", true);
-    assertEndsWith("the i̇o", "İo", "UTF8_LCASE", true);
+    assertEndsWith("the i\u0307o", "io", "UTF8_LCASE", false);
+    assertEndsWith("the i\u0307o", "Io", "UTF8_LCASE", false);
+    assertEndsWith("the i\u0307o", "i\u0307o", "UTF8_LCASE", true);
+    assertEndsWith("the i\u0307o", "İo", "UTF8_LCASE", true);
     assertEndsWith("the İo", "io", "UTF8_LCASE", false);
     assertEndsWith("the İo", "Io", "UTF8_LCASE", false);
-    assertEndsWith("the İo", "i̇o", "UTF8_LCASE", true);
+    assertEndsWith("the İo", "i\u0307o", "UTF8_LCASE", true);
     assertEndsWith("the İo", "İo", "UTF8_LCASE", true);
+    assertEndsWith("İo", "İo", "UTF8_LCASE", true);
+    assertEndsWith("İo", "i̇o", "UTF8_LCASE", true);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertEndsWith("σ", "σ", "UTF8_BINARY", true);
+    assertEndsWith("σ", "ς", "UTF8_BINARY", false);
+    assertEndsWith("σ", "Σ", "UTF8_BINARY", false);
+    assertEndsWith("ς", "σ", "UTF8_BINARY", false);
+    assertEndsWith("ς", "ς", "UTF8_BINARY", true);
+    assertEndsWith("ς", "Σ", "UTF8_BINARY", false);
+    assertEndsWith("Σ", "σ", "UTF8_BINARY", false);
+    assertEndsWith("Σ", "ς", "UTF8_BINARY", false);
+    assertEndsWith("Σ", "Σ", "UTF8_BINARY", true);
+    assertEndsWith("σ", "σ", "UTF8_LCASE", true);
+    assertEndsWith("σ", "ς", "UTF8_LCASE", true);
+    assertEndsWith("σ", "Σ", "UTF8_LCASE", true);
+    assertEndsWith("ς", "σ", "UTF8_LCASE", true);
+    assertEndsWith("ς", "ς", "UTF8_LCASE", true);
+    assertEndsWith("ς", "Σ", "UTF8_LCASE", true);
+    assertEndsWith("Σ", "σ", "UTF8_LCASE", true);
+    assertEndsWith("Σ", "ς", "UTF8_LCASE", true);
+    assertEndsWith("Σ", "Σ", "UTF8_LCASE", true);
+    assertEndsWith("σ", "σ", "UNICODE", true);
+    assertEndsWith("σ", "ς", "UNICODE", false);
+    assertEndsWith("σ", "Σ", "UNICODE", false);
+    assertEndsWith("ς", "σ", "UNICODE", false);
+    assertEndsWith("ς", "ς", "UNICODE", true);
+    assertEndsWith("ς", "Σ", "UNICODE", false);
+    assertEndsWith("Σ", "σ", "UNICODE", false);
+    assertEndsWith("Σ", "ς", "UNICODE", false);
+    assertEndsWith("Σ", "Σ", "UNICODE", true);
+    assertEndsWith("σ", "σ", "UNICODE_CI", true);
+    assertEndsWith("σ", "ς", "UNICODE_CI", true);
+    assertEndsWith("σ", "Σ", "UNICODE_CI", true);
+    assertEndsWith("ς", "σ", "UNICODE_CI", true);
+    assertEndsWith("ς", "ς", "UNICODE_CI", true);
+    assertEndsWith("ς", "Σ", "UNICODE_CI", true);
+    assertEndsWith("Σ", "σ", "UNICODE_CI", true);
+    assertEndsWith("Σ", "ς", "UNICODE_CI", true);
+    assertEndsWith("Σ", "Σ", "UNICODE_CI", true);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_BINARY", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_BINARY", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_BINARY", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_BINARY", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_BINARY", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_BINARY", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UTF8_LCASE", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UTF8_LCASE", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UTF8_LCASE", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UTF8_LCASE", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UTF8_LCASE", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UTF8_LCASE", true);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "Σ", "UNICODE_CI", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "σ", "UNICODE_CI", false);
+    assertEndsWith("ΣΑΛΑΤΑ", "ς", "UNICODE_CI", false);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "Σ", "UNICODE_CI", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "σ", "UNICODE_CI", true);
+    assertEndsWith("ΘΑΛΑΣΣΙΝΟΣ", "ς", "UNICODE_CI", true);
+    // Surrogate pairs.
+    assertEndsWith("a🙃b🙃c", "x", "UTF8_BINARY", false);
+    assertEndsWith("a🙃b🙃c", "x", "UTF8_LCASE", false);
+    assertEndsWith("a🙃b🙃c", "x", "UNICODE", false);
+    assertEndsWith("a🙃b🙃c", "x", "UNICODE_CI", false);
+    assertEndsWith("a🙃b🙃c", "b", "UTF8_BINARY", false);
+    assertEndsWith("a🙃b🙃c", "b", "UTF8_LCASE", false);
+    assertEndsWith("a🙃b🙃c", "b", "UNICODE", false);
+    assertEndsWith("a🙃b🙃c", "b", "UNICODE_CI", false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_BINARY", false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", "UTF8_LCASE", false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE", false);
+    assertEndsWith("a🙃b🙃c", "a🙃b", "UNICODE_CI", false);
+    assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_BINARY", true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", "UTF8_LCASE", true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE", true);
+    assertEndsWith("a🙃b🙃c", "b🙃c", "UNICODE_CI", true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_BINARY", true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UTF8_LCASE", true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE", true);
+    assertEndsWith("a🙃b🙃c", "a🙃b🙃c", "UNICODE_CI", true);
+    assertEndsWith("😀😆😃😄", "😄😆", "UTF8_BINARY", false);
+    assertEndsWith("😀😆😃😄", "😄😆", "UTF8_LCASE", false);
+    assertEndsWith("😀😆😃😄", "😄😆", "UNICODE", false);
+    assertEndsWith("😀😆😃😄", "😄😆", "UNICODE_CI", false);
+    assertEndsWith("😀😆😃😄", "😆😃", "UTF8_BINARY", false);
+    assertEndsWith("😀😆😃😄", "😆😃", "UTF8_LCASE", false);
+    assertEndsWith("😀😆😃😄", "😆😃", "UNICODE", false);
+    assertEndsWith("😀😆😃😄", "😆😃", "UNICODE_CI", false);
+    assertEndsWith("😀😆😃😄", "😀😆", "UTF8_BINARY", false);
+    assertEndsWith("😀😆😃😄", "😀😆", "UTF8_LCASE", false);
+    assertEndsWith("😀😆😃😄", "😀😆", "UNICODE", false);
+    assertEndsWith("😀😆😃😄", "😀😆", "UNICODE_CI", false);
+    assertEndsWith("😀😆😃😄", "😃😄", "UTF8_BINARY", true);
+    assertEndsWith("😀😆😃😄", "😃😄", "UTF8_LCASE", true);
+    assertEndsWith("😀😆😃😄", "😃😄", "UNICODE", true);
+    assertEndsWith("😀😆😃😄", "😃😄", "UNICODE_CI", true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_BINARY", true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UTF8_LCASE", true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE", true);
+    assertEndsWith("😀😆😃😄", "😀😆😃😄", "UNICODE_CI", true);
+    assertEndsWith("𐐅", "𐐅", "UTF8_BINARY", true);
+    assertEndsWith("𐐅", "𐐅", "UTF8_LCASE", true);
+    assertEndsWith("𐐅", "𐐅", "UNICODE", true);
+    assertEndsWith("𐐅", "𐐅", "UNICODE_CI", true);
+    assertEndsWith("𐐅", "𐐭", "UTF8_BINARY", false);
+    assertEndsWith("𐐅", "𐐭", "UTF8_LCASE", true);
+    assertEndsWith("𐐅", "𐐭", "UNICODE", false);
+    assertEndsWith("𐐅", "𐐭", "UNICODE_CI", true);
+    assertEndsWith("𝔸", "𝔸", "UTF8_BINARY", true);
+    assertEndsWith("𝔸", "𝔸", "UTF8_LCASE", true);
+    assertEndsWith("𝔸", "𝔸", "UNICODE", true);
+    assertEndsWith("𝔸", "𝔸", "UNICODE_CI", true);
   }
 
+  /**
+   * Verify the behaviour of the `StringSplitSQL` collation support class.
+   */
+
   private void assertStringSplitSQL(String str, String delimiter, String collationName,
       UTF8String[] expected) throws SparkException {
     UTF8String s = UTF8String.fromString(str);
     UTF8String d = UTF8String.fromString(delimiter);
     int collationId = CollationFactory.collationNameToId(collationName);
-    assertArrayEquals(expected, CollationSupport.StringSplitSQL.exec(s, d, collationId));
+    UTF8String[] result = CollationSupport.StringSplitSQL.exec(s, d, collationId);
+    assertArrayEquals(expected, result);
   }
 
   @Test
@@ -553,7 +1042,21 @@ public void testStringSplitSQL() throws SparkException {
     var array_A_B = new UTF8String[] { UTF8String.fromString("A"), UTF8String.fromString("B") };
     var array_a_e = new UTF8String[] { UTF8String.fromString("ä"), UTF8String.fromString("e") };
     var array_Aa_bB = new UTF8String[] { UTF8String.fromString("Aa"), UTF8String.fromString("bB") };
-    // Edge cases
+    var array_Turkish_uppercase_dotted_I = new UTF8String[] { UTF8String.fromString("İ") };
+    var array_Turkish_lowercase_dotted_i = new UTF8String[] { UTF8String.fromString("i\u0307") };
+    var array_i = new UTF8String[] { UTF8String.fromString("i"), UTF8String.fromString("") };
+    var array_dot = new UTF8String[] { UTF8String.fromString(""), UTF8String.fromString("\u0307") };
+    var array_AiB = new UTF8String[] { UTF8String.fromString("Ai\u0307B") };
+    var array_AIB = new UTF8String[] { UTF8String.fromString("AİB") };
+    var array_small_nonfinal_sigma = new UTF8String[] { UTF8String.fromString("σ") };
+    var array_small_final_sigma = new UTF8String[] { UTF8String.fromString("ς") };
+    var array_capital_sigma = new UTF8String[] { UTF8String.fromString("Σ") };
+    var array_a_b_c = new UTF8String[] { UTF8String.fromString("a"), UTF8String.fromString("b"),
+      UTF8String.fromString("c") };
+    var array_emojis = new UTF8String[] { UTF8String.fromString("😀"), UTF8String.fromString("😄") };
+    var array_AOB = new UTF8String[] { UTF8String.fromString("A𐐅B") };
+    var array_AoB = new UTF8String[] { UTF8String.fromString("A𐐭B") };
+    // Empty strings.
     assertStringSplitSQL("", "", "UTF8_BINARY", empty_match);
     assertStringSplitSQL("abc", "", "UTF8_BINARY", array_abc);
     assertStringSplitSQL("", "abc", "UTF8_BINARY", empty_match);
@@ -566,7 +1069,7 @@ public void testStringSplitSQL() throws SparkException {
     assertStringSplitSQL("", "", "UNICODE_CI", empty_match);
     assertStringSplitSQL("abc", "", "UNICODE_CI", array_abc);
     assertStringSplitSQL("", "abc", "UNICODE_CI", empty_match);
-    // Basic tests
+    // Basic tests.
     assertStringSplitSQL("1a2", "a", "UTF8_BINARY", array_1_2);
     assertStringSplitSQL("1a2", "A", "UTF8_BINARY", array_1a2);
     assertStringSplitSQL("1a2", "b", "UTF8_BINARY", array_1a2);
@@ -580,25 +1083,7 @@ public void testStringSplitSQL() throws SparkException {
     assertStringSplitSQL("1a2", "A", "UNICODE_CI", array_1_2);
     assertStringSplitSQL("1a2", "1A2", "UNICODE_CI", full_match);
     assertStringSplitSQL("1a2", "123", "UNICODE_CI", array_1a2);
-    // Case variation
-    assertStringSplitSQL("AaXbB", "x", "UTF8_BINARY", array_AaXbB);
-    assertStringSplitSQL("AaXbB", "X", "UTF8_BINARY", array_Aa_bB);
-    assertStringSplitSQL("AaXbB", "axb", "UNICODE", array_AaXbB);
-    assertStringSplitSQL("AaXbB", "aXb", "UNICODE", array_A_B);
-    assertStringSplitSQL("AaXbB", "axb", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AaXbB", "AXB", "UTF8_LCASE", array_A_B);
-    assertStringSplitSQL("AaXbB", "axb", "UNICODE_CI", array_A_B);
-    assertStringSplitSQL("AaXbB", "AxB", "UNICODE_CI", array_A_B);
-    // Accent variation
-    assertStringSplitSQL("aBcDe", "bćd", "UTF8_BINARY", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "BćD", "UTF8_BINARY", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "abćde", "UNICODE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "aBćDe", "UNICODE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "bćd", "UTF8_LCASE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "BĆD", "UTF8_LCASE", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "abćde", "UNICODE_CI", array_aBcDe);
-    assertStringSplitSQL("aBcDe", "AbĆdE", "UNICODE_CI", array_aBcDe);
-    // Variable byte length characters
+    // Advanced tests.
     assertStringSplitSQL("äb世De", "b世D", "UTF8_BINARY", array_a_e);
     assertStringSplitSQL("äb世De", "B世d", "UTF8_BINARY", array_special);
     assertStringSplitSQL("äbćδe", "bćδ", "UTF8_BINARY", array_a_e);
@@ -615,10 +1100,123 @@ public void testStringSplitSQL() throws SparkException {
     assertStringSplitSQL("äb世De", "AB世dE", "UNICODE_CI", array_special);
     assertStringSplitSQL("äbćδe", "ÄbćδE", "UNICODE_CI", full_match);
     assertStringSplitSQL("äbćδe", "ÄBcΔÉ", "UNICODE_CI", array_abcde);
+    // Case variation.
+    assertStringSplitSQL("AaXbB", "x", "UTF8_BINARY", array_AaXbB);
+    assertStringSplitSQL("AaXbB", "X", "UTF8_BINARY", array_Aa_bB);
+    assertStringSplitSQL("AaXbB", "axb", "UNICODE", array_AaXbB);
+    assertStringSplitSQL("AaXbB", "aXb", "UNICODE", array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AaXbB", "AXB", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AaXbB", "axb", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("AaXbB", "AxB", "UNICODE_CI", array_A_B);
+    // Accent variation.
+    assertStringSplitSQL("aBcDe", "bćd", "UTF8_BINARY", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BćD", "UTF8_BINARY", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", "UNICODE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "aBćDe", "UNICODE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "bćd", "UTF8_LCASE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "BĆD", "UTF8_LCASE", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "abćde", "UNICODE_CI", array_aBcDe);
+    assertStringSplitSQL("aBcDe", "AbĆdE", "UNICODE_CI", array_aBcDe);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringSplitSQL("İ", "i", "UTF8_BINARY", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", "UTF8_LCASE", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", "UNICODE", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "i", "UNICODE_CI", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", "UTF8_BINARY", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", "UTF8_LCASE", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", "UNICODE", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("İ", "\u0307", "UNICODE_CI", array_Turkish_uppercase_dotted_I);
+    assertStringSplitSQL("i\u0307", "i", "UTF8_BINARY", array_dot);
+    assertStringSplitSQL("i\u0307", "i", "UTF8_LCASE", array_dot);
+    assertStringSplitSQL("i\u0307", "i", "UNICODE", array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "i", "UNICODE_CI", array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "\u0307", "UTF8_BINARY", array_i);
+    assertStringSplitSQL("i\u0307", "\u0307", "UTF8_LCASE", array_i);
+    assertStringSplitSQL("i\u0307", "\u0307", "UNICODE", array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("i\u0307", "\u0307", "UNICODE_CI", array_Turkish_lowercase_dotted_i);
+    assertStringSplitSQL("AİB", "İ", "UTF8_BINARY", array_A_B);
+    assertStringSplitSQL("AİB", "İ", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AİB", "İ", "UNICODE", array_A_B);
+    assertStringSplitSQL("AİB", "İ", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("AİB", "i\u0307", "UTF8_BINARY", array_AIB);
+    assertStringSplitSQL("AİB", "i\u0307", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("AİB", "i\u0307", "UNICODE", array_AIB);
+    assertStringSplitSQL("AİB", "i\u0307", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_BINARY", array_AiB);
+    assertStringSplitSQL("Ai\u0307B", "İ", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE", array_AiB);
+    assertStringSplitSQL("Ai\u0307B", "İ", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_BINARY", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE", array_A_B);
+    assertStringSplitSQL("Ai\u0307B", "i\u0307", "UNICODE_CI", array_A_B);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringSplitSQL("σ", "σ", "UTF8_BINARY", full_match);
+    assertStringSplitSQL("σ", "σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("σ", "σ", "UNICODE", full_match);
+    assertStringSplitSQL("σ", "σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("σ", "ς", "UTF8_BINARY", array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "ς", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("σ", "ς", "UNICODE", array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "ς", "UNICODE_CI", full_match);
+    assertStringSplitSQL("σ", "Σ", "UTF8_BINARY", array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "Σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("σ", "Σ", "UNICODE", array_small_nonfinal_sigma);
+    assertStringSplitSQL("σ", "Σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("ς", "σ", "UTF8_BINARY", array_small_final_sigma);
+    assertStringSplitSQL("ς", "σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("ς", "σ", "UNICODE", array_small_final_sigma);
+    assertStringSplitSQL("ς", "σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("ς", "ς", "UTF8_BINARY", full_match);
+    assertStringSplitSQL("ς", "ς", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("ς", "ς", "UNICODE", full_match);
+    assertStringSplitSQL("ς", "ς", "UNICODE_CI", full_match);
+    assertStringSplitSQL("ς", "Σ", "UTF8_BINARY", array_small_final_sigma);
+    assertStringSplitSQL("ς", "Σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("ς", "Σ", "UNICODE", array_small_final_sigma);
+    assertStringSplitSQL("ς", "Σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("Σ", "σ", "UTF8_BINARY", array_capital_sigma);
+    assertStringSplitSQL("Σ", "σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("Σ", "σ", "UNICODE", array_capital_sigma);
+    assertStringSplitSQL("Σ", "σ", "UNICODE_CI", full_match);
+    assertStringSplitSQL("Σ", "ς", "UTF8_BINARY", array_capital_sigma);
+    assertStringSplitSQL("Σ", "ς", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("Σ", "ς", "UNICODE", array_capital_sigma);
+    assertStringSplitSQL("Σ", "ς", "UNICODE_CI", full_match);
+    assertStringSplitSQL("Σ", "Σ", "UTF8_BINARY", full_match);
+    assertStringSplitSQL("Σ", "Σ", "UTF8_LCASE", full_match);
+    assertStringSplitSQL("Σ", "Σ", "UNICODE", full_match);
+    assertStringSplitSQL("Σ", "Σ", "UNICODE_CI", full_match);
+    // Surrogate pairs.
+    assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_BINARY", array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", "UTF8_LCASE", array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE", array_a_b_c);
+    assertStringSplitSQL("a🙃b🙃c", "🙃", "UNICODE_CI", array_a_b_c);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_BINARY", array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", "UTF8_LCASE", array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE", array_emojis);
+    assertStringSplitSQL("😀😆😃😄", "😆😃", "UNICODE_CI", array_emojis);
+    assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_BINARY", array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE", array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐅", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_BINARY", array_AOB);
+    assertStringSplitSQL("A𐐅B", "𐐭", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE", array_AOB);
+    assertStringSplitSQL("A𐐅B", "𐐭", "UNICODE_CI", array_A_B);
+    assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_BINARY", array_AoB);
+    assertStringSplitSQL("A𐐭B", "𐐅", "UTF8_LCASE", array_A_B);
+    assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE", array_AoB);
+    assertStringSplitSQL("A𐐭B", "𐐅", "UNICODE_CI", array_A_B);
   }
 
+  /**
+   * Verify the behaviour of the `Upper` collation support class.
+   */
+
   private void assertUpper(String target, String collationName, String expected)
-          throws SparkException {
+      throws SparkException {
     UTF8String target_utf8 = UTF8String.fromString(target);
     UTF8String expected_utf8 = UTF8String.fromString(expected);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -631,52 +1229,57 @@ private void assertUpper(String target, String collationName, String expected)
 
   @Test
   public void testUpper() throws SparkException {
-    // Edge cases
-    assertUpper("", "UTF8_BINARY", "");
-    assertUpper("", "UTF8_LCASE", "");
-    assertUpper("", "UNICODE", "");
-    assertUpper("", "UNICODE_CI", "");
-    // Basic tests
-    assertUpper("abcde", "UTF8_BINARY", "ABCDE");
-    assertUpper("abcde", "UTF8_LCASE", "ABCDE");
-    assertUpper("abcde", "UNICODE", "ABCDE");
-    assertUpper("abcde", "UNICODE_CI", "ABCDE");
-    // Uppercase present
-    assertUpper("AbCdE", "UTF8_BINARY", "ABCDE");
-    assertUpper("aBcDe", "UTF8_BINARY", "ABCDE");
-    assertUpper("AbCdE", "UTF8_LCASE", "ABCDE");
-    assertUpper("aBcDe", "UTF8_LCASE", "ABCDE");
-    assertUpper("AbCdE", "UNICODE", "ABCDE");
-    assertUpper("aBcDe", "UNICODE", "ABCDE");
-    assertUpper("AbCdE", "UNICODE_CI", "ABCDE");
-    assertUpper("aBcDe", "UNICODE_CI", "ABCDE");
-    // Accent letters
-    assertUpper("aBćDe","UTF8_BINARY", "ABĆDE");
-    assertUpper("aBćDe","UTF8_LCASE", "ABĆDE");
-    assertUpper("aBćDe","UNICODE", "ABĆDE");
-    assertUpper("aBćDe","UNICODE_CI", "ABĆDE");
-    // Variable byte length characters
-    assertUpper("ab世De", "UTF8_BINARY", "AB世DE");
-    assertUpper("äbćδe", "UTF8_BINARY", "ÄBĆΔE");
-    assertUpper("ab世De", "UTF8_LCASE", "AB世DE");
-    assertUpper("äbćδe", "UTF8_LCASE", "ÄBĆΔE");
-    assertUpper("ab世De", "UNICODE", "AB世DE");
-    assertUpper("äbćδe", "UNICODE", "ÄBĆΔE");
-    assertUpper("ab世De", "UNICODE_CI", "AB世DE");
-    assertUpper("äbćδe", "UNICODE_CI", "ÄBĆΔE");
-    // Case-variable character length
-    assertUpper("i\u0307o", "UTF8_BINARY","I\u0307O");
-    assertUpper("i\u0307o", "UTF8_LCASE","I\u0307O");
-    assertUpper("i\u0307o", "UNICODE","I\u0307O");
-    assertUpper("i\u0307o", "UNICODE_CI","I\u0307O");
-    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_BINARY","SS FI FFI FF ST \u0399\u0308\u0342");
-    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_LCASE","SS FI FFI FF ST \u0399\u0308\u0342");
-    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
-    assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE","SS FI FFI FF ST \u0399\u0308\u0342");
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertUpper("", collationName, "");
+      // Basic tests.
+      assertUpper("abcde", collationName, "ABCDE");
+      assertUpper("AbCdE", collationName, "ABCDE");
+      assertUpper("aBcDe", collationName, "ABCDE");
+      assertUpper("ABCDE", collationName, "ABCDE");
+      // Advanced tests.
+      assertUpper("aBćDe", collationName, "ABĆDE");
+      assertUpper("ab世De", collationName, "AB世DE");
+      assertUpper("äbćδe", collationName, "ÄBĆΔE");
+      assertUpper("AbĆdE", collationName, "ABĆDE");
+      assertUpper("aB世De", collationName, "AB世DE");
+      assertUpper("ÄBĆΔE", collationName, "ÄBĆΔE");
+      // One-to-many case mapping (e.g. Turkish dotted I).
+      assertUpper("İ", collationName, "İ");
+      assertUpper("i\u0307", collationName,"I\u0307");
+      assertUpper("İonic", collationName, "İONIC");
+      assertUpper("i\u0307onic", collationName,"I\u0307ONIC");
+      assertUpper("FIDELİO", collationName, "FIDELİO");
+      // Conditional case mapping (e.g. Greek sigmas).
+      assertUpper("σ", collationName, "Σ");
+      assertUpper("σ", collationName, "Σ");
+      assertUpper("ς", collationName, "Σ");
+      assertUpper("Σ", collationName, "Σ");
+      assertUpper("ΣΑΛΑΤΑ", collationName, "ΣΑΛΑΤΑ");
+      assertUpper("σαλατα", collationName, "ΣΑΛΑΤΑ");
+      assertUpper("ςαλατα", collationName, "ΣΑΛΑΤΑ");
+      assertUpper("ΘΑΛΑΣΣΙΝΟΣ", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
+      assertUpper("θαλασσινοσ", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
+      assertUpper("θαλασσινος", collationName, "ΘΑΛΑΣΣΙΝΟΣ");
+      // Surrogate pairs.
+      assertUpper("a🙃B🙃c", collationName, "A🙃B🙃C");
+      assertUpper("😄 😆", collationName, "😄 😆");
+      assertUpper("😀😆😃😄", collationName, "😀😆😃😄");
+      assertUpper("𝔸", collationName, "𝔸");
+      assertUpper("𐐅", collationName, "𐐅");
+      assertUpper("𐐭", collationName, "𐐅");
+      assertUpper("𐐭𝔸", collationName, "𐐅𝔸");
+      // Ligatures.
+      assertUpper("ß ﬁ ﬃ ﬀ ﬆ ῗ", collationName,"SS FI FFI FF ST \u0399\u0308\u0342");
+    }
   }
 
+  /**
+   * Verify the behaviour of the `Lower` collation support class.
+   */
+
   private void assertLower(String target, String collationName, String expected)
-          throws SparkException {
+      throws SparkException {
     UTF8String target_utf8 = UTF8String.fromString(target);
     UTF8String expected_utf8 = UTF8String.fromString(expected);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -689,48 +1292,56 @@ private void assertLower(String target, String collationName, String expected)
 
   @Test
   public void testLower() throws SparkException {
-    // Edge cases
-    assertLower("", "UTF8_BINARY", "");
-    assertLower("", "UTF8_LCASE", "");
-    assertLower("", "UNICODE", "");
-    assertLower("", "UNICODE_CI", "");
-    // Basic tests
-    assertLower("ABCDE", "UTF8_BINARY", "abcde");
-    assertLower("ABCDE", "UTF8_LCASE", "abcde");
-    assertLower("ABCDE", "UNICODE", "abcde");
-    assertLower("ABCDE", "UNICODE_CI", "abcde");
-    // Uppercase present
-    assertLower("AbCdE", "UTF8_BINARY", "abcde");
-    assertLower("aBcDe", "UTF8_BINARY", "abcde");
-    assertLower("AbCdE", "UTF8_LCASE", "abcde");
-    assertLower("aBcDe", "UTF8_LCASE", "abcde");
-    assertLower("AbCdE", "UNICODE", "abcde");
-    assertLower("aBcDe", "UNICODE", "abcde");
-    assertLower("AbCdE", "UNICODE_CI", "abcde");
-    assertLower("aBcDe", "UNICODE_CI", "abcde");
-    // Accent letters
-    assertLower("AbĆdE","UTF8_BINARY", "abćde");
-    assertLower("AbĆdE","UTF8_LCASE", "abćde");
-    assertLower("AbĆdE","UNICODE", "abćde");
-    assertLower("AbĆdE","UNICODE_CI", "abćde");
-    // Variable byte length characters
-    assertLower("aB世De", "UTF8_BINARY", "ab世de");
-    assertLower("ÄBĆΔE", "UTF8_BINARY", "äbćδe");
-    assertLower("aB世De", "UTF8_LCASE", "ab世de");
-    assertLower("ÄBĆΔE", "UTF8_LCASE", "äbćδe");
-    assertLower("aB世De", "UNICODE", "ab世de");
-    assertLower("ÄBĆΔE", "UNICODE", "äbćδe");
-    assertLower("aB世De", "UNICODE_CI", "ab世de");
-    assertLower("ÄBĆΔE", "UNICODE_CI", "äbćδe");
-    // Case-variable character length
-    assertLower("İo", "UTF8_BINARY","i\u0307o");
-    assertLower("İo", "UTF8_LCASE","i\u0307o");
-    assertLower("İo", "UNICODE","i\u0307o");
-    assertLower("İo", "UNICODE_CI","i\u0307o");
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertLower("", collationName, "");
+      // Basic tests.
+      assertLower("abcde", collationName, "abcde");
+      assertLower("AbCdE", collationName, "abcde");
+      assertLower("aBcDe", collationName, "abcde");
+      assertLower("ABCDE", collationName, "abcde");
+      // Advanced tests.
+      assertUpper("aBćDe", collationName, "ABĆDE");
+      assertUpper("ab世De", collationName, "AB世DE");
+      assertUpper("äbćδe", collationName, "ÄBĆΔE");
+      assertLower("AbĆdE", collationName, "abćde");
+      assertLower("aB世De", collationName, "ab世de");
+      assertLower("ÄBĆΔE", collationName, "äbćδe");
+      // One-to-many case mapping (e.g. Turkish dotted I).
+      assertLower("İ", collationName, "i\u0307");
+      assertLower("I\u0307", collationName,"i\u0307");
+      assertLower("İonic", collationName, "i\u0307onic");
+      assertLower("i\u0307onic", collationName,"i\u0307onic");
+      assertLower("FIDELİO", collationName, "fideli\u0307o");
+      // Conditional case mapping (e.g. Greek sigmas).
+      assertLower("σ", collationName, "σ");
+      assertLower("ς", collationName, "ς");
+      assertLower("Σ", collationName, "σ");
+      assertLower("ΣΑΛΑΤΑ", collationName, "σαλατα");
+      assertLower("σαλατα", collationName, "σαλατα");
+      assertLower("ςαλατα", collationName, "ςαλατα");
+      assertLower("ΘΑΛΑΣΣΙΝΟΣ", collationName, "θαλασσινος");
+      assertLower("θαλασσινοσ", collationName, "θαλασσινοσ");
+      assertLower("θαλασσινος", collationName, "θαλασσινος");
+      // Surrogate pairs.
+      assertLower("a🙃B🙃c", collationName, "a🙃b🙃c");
+      assertLower("😄 😆", collationName, "😄 😆");
+      assertLower("😀😆😃😄", collationName, "😀😆😃😄");
+      assertLower("𝔸", collationName, "𝔸");
+      assertLower("𐐅", collationName, "𐐭");
+      assertLower("𐐭", collationName, "𐐭");
+      assertLower("𐐭𝔸", collationName, "𐐭𝔸");
+      // Ligatures.
+      assertLower("ß ﬁ ﬃ ﬀ ﬆ ῗ", collationName,"ß ﬁ ﬃ ﬀ ﬆ ῗ");
+    }
   }
 
+  /**
+   * Verify the behaviour of the `InitCap` collation support class.
+   */
+
   private void assertInitCap(String target, String collationName, String expected)
-          throws SparkException {
+      throws SparkException {
     UTF8String target_utf8 = UTF8String.fromString(target);
     UTF8String expected_utf8 = UTF8String.fromString(expected);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -741,43 +1352,65 @@ private void assertInitCap(String target, String collationName, String expected)
     // Note: results should be the same in these tests for both ICU and JVM-based implementations.
   }
 
+  private void assertInitCap(
+      String target,
+      String collationName,
+      String expectedICU,
+      String expectedNonICU) throws SparkException {
+    UTF8String target_utf8 = UTF8String.fromString(target);
+    UTF8String expectedICU_utf8 = UTF8String.fromString(expectedICU);
+    UTF8String expectedNonICU_utf8 = UTF8String.fromString(expectedNonICU);
+    int collationId = CollationFactory.collationNameToId(collationName);
+    // Testing the new ICU-based implementation of the Lower function.
+    assertEquals(expectedICU_utf8, CollationSupport.InitCap.exec(target_utf8, collationId, true));
+    // Testing the old JVM-based implementation of the Lower function.
+    assertEquals(expectedNonICU_utf8, CollationSupport.InitCap.exec(target_utf8, collationId,
+      false));
+    // Note: results should be the same in these tests for both ICU and JVM-based implementations.
+  }
+
   @Test
   public void testInitCap() throws SparkException {
-    // Edge cases
-    assertInitCap("", "UTF8_BINARY", "");
-    assertInitCap("", "UTF8_LCASE", "");
-    assertInitCap("", "UNICODE", "");
-    assertInitCap("", "UNICODE_CI", "");
-    // Basic tests
-    assertInitCap("ABCDE", "UTF8_BINARY", "Abcde");
-    assertInitCap("ABCDE", "UTF8_LCASE", "Abcde");
-    assertInitCap("ABCDE", "UNICODE", "Abcde");
-    assertInitCap("ABCDE", "UNICODE_CI", "Abcde");
-    // Uppercase present
-    assertInitCap("AbCdE", "UTF8_BINARY", "Abcde");
-    assertInitCap("aBcDe", "UTF8_BINARY", "Abcde");
-    assertInitCap("AbCdE", "UTF8_LCASE", "Abcde");
-    assertInitCap("aBcDe", "UTF8_LCASE", "Abcde");
-    assertInitCap("AbCdE", "UNICODE", "Abcde");
-    assertInitCap("aBcDe", "UNICODE", "Abcde");
-    assertInitCap("AbCdE", "UNICODE_CI", "Abcde");
-    assertInitCap("aBcDe", "UNICODE_CI", "Abcde");
-    // Accent letters
-    assertInitCap("AbĆdE", "UTF8_BINARY", "Abćde");
-    assertInitCap("AbĆdE", "UTF8_LCASE", "Abćde");
-    assertInitCap("AbĆdE", "UNICODE", "Abćde");
-    assertInitCap("AbĆdE", "UNICODE_CI", "Abćde");
-    // Variable byte length characters
-    assertInitCap("aB 世 De", "UTF8_BINARY", "Ab 世 De");
+    for (String collationName: testSupportedCollations) {
+      // Empty strings.
+      assertInitCap("", collationName, "");
+      // Basic tests.
+      assertInitCap("abcde", collationName, "Abcde");
+      assertInitCap("AbCdE", collationName, "Abcde");
+      assertInitCap("aBcDe", collationName, "Abcde");
+      assertInitCap("ABCDE", collationName, "Abcde");
+      // Conditional case mapping (e.g. Greek sigmas).
+      assertInitCap("σ", collationName, "Σ");
+      assertInitCap("ς", collationName, "Σ");
+      assertInitCap("Σ", collationName, "Σ");
+      assertInitCap("ΣΑΛΑΤΑ", collationName, "Σαλατα");
+      assertInitCap("σαλατα", collationName, "Σαλατα");
+      assertInitCap("ςαλατα", collationName, "Σαλατα");
+      assertInitCap("ΘΑΛΑΣΣΙΝΟΣ", collationName, "Θαλασσινος");
+      assertInitCap("θαλασσινοσ", collationName, "Θαλασσινοσ");
+      assertInitCap("θαλασσινος", collationName, "Θαλασσινος");
+    }
+    // Advanced tests.
+    assertInitCap("aBćDe", "UTF8_BINARY", "Abćde");
+    assertInitCap("aBćDe", "UTF8_LCASE", "Abćde");
+    assertInitCap("aBćDe", "UNICODE", "Abćde");
+    assertInitCap("aBćDe", "UNICODE_CI", "Abćde");
+    assertInitCap("ab世De", "UTF8_BINARY", "Ab世de");
+    assertInitCap("ab世De", "UTF8_LCASE", "Ab世De");
+    assertInitCap("ab世De", "UNICODE", "Ab世De");
+    assertInitCap("ab世De", "UNICODE_CI", "Ab世De");
+    assertInitCap("äbćδe", "UTF8_BINARY", "Äbćδe");
+    assertInitCap("äbćδe", "UTF8_LCASE", "Äbćδe");
+    assertInitCap("äbćδe", "UNICODE", "Äbćδe");
+    assertInitCap("äbćδe", "UNICODE_CI", "Äbćδe");
     assertInitCap("ÄBĆΔE", "UTF8_BINARY", "Äbćδe");
-    assertInitCap("aB 世 De", "UTF8_LCASE", "Ab 世 De");
     assertInitCap("ÄBĆΔE", "UTF8_LCASE", "Äbćδe");
-    assertInitCap("aB 世 De", "UNICODE", "Ab 世 De");
     assertInitCap("ÄBĆΔE", "UNICODE", "Äbćδe");
-    assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
     assertInitCap("ÄBĆΔE", "UNICODE_CI", "Äbćδe");
+    assertInitCap("êéfgh", "AF_CI_AI", "Êéfgh");
+    assertInitCap("öoAÄ", "DE_CI_AI", "Öoaä");
     // Case-variable character length
-    assertInitCap("İo", "UTF8_BINARY", "I\u0307o");
+    assertInitCap("İo", "UTF8_BINARY", "İo", "I\u0307o");
     assertInitCap("İo", "UTF8_LCASE", "İo");
     assertInitCap("İo", "UNICODE", "İo");
     assertInitCap("İo", "UNICODE_CI", "İo");
@@ -786,6 +1419,67 @@ public void testInitCap() throws SparkException {
     assertInitCap("i\u0307o", "UNICODE", "I\u0307o");
     assertInitCap("i\u0307o", "UNICODE_CI", "I\u0307o");
     // Different possible word boundaries
+    assertInitCap("aB 世 de", "UTF8_BINARY", "Ab 世 De");
+    assertInitCap("aB 世 de", "UTF8_LCASE", "Ab 世 De");
+    assertInitCap("aB 世 de", "UNICODE", "Ab 世 De");
+    assertInitCap("aB 世 de", "UNICODE_CI", "Ab 世 De");
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertInitCap("İ", "UTF8_BINARY", "İ", "I\u0307");
+    assertInitCap("İ", "UTF8_LCASE", "İ");
+    assertInitCap("İ", "UNICODE", "İ");
+    assertInitCap("İ", "UNICODE_CI", "İ");
+    assertInitCap("I\u0307", "UTF8_BINARY","I\u0307");
+    assertInitCap("I\u0307", "UTF8_LCASE","I\u0307");
+    assertInitCap("I\u0307", "UNICODE","I\u0307");
+    assertInitCap("I\u0307", "UNICODE_CI","I\u0307");
+    assertInitCap("İonic", "UTF8_BINARY", "İonic", "I\u0307onic");
+    assertInitCap("İonic", "UTF8_LCASE", "İonic");
+    assertInitCap("İonic", "UNICODE", "İonic");
+    assertInitCap("İonic", "UNICODE_CI", "İonic");
+    assertInitCap("i\u0307onic", "UTF8_BINARY","I\u0307onic");
+    assertInitCap("i\u0307onic", "UTF8_LCASE","I\u0307onic");
+    assertInitCap("i\u0307onic", "UNICODE","I\u0307onic");
+    assertInitCap("i\u0307onic", "UNICODE_CI","I\u0307onic");
+    assertInitCap("FIDELİO", "UTF8_BINARY", "Fideli\u0307o");
+    assertInitCap("FIDELİO", "UTF8_LCASE", "Fideli\u0307o");
+    assertInitCap("FIDELİO", "UNICODE", "Fideli\u0307o");
+    assertInitCap("FIDELİO", "UNICODE_CI", "Fideli\u0307o");
+    // Surrogate pairs.
+    assertInitCap("a🙃B🙃c", "UTF8_BINARY", "A🙃b🙃c");
+    assertInitCap("a🙃B🙃c", "UTF8_LCASE", "A🙃B🙃C");
+    assertInitCap("a🙃B🙃c", "UNICODE", "A🙃B🙃C");
+    assertInitCap("a🙃B🙃c", "UNICODE_CI", "A🙃B🙃C");
+    assertInitCap("😄 😆", "UTF8_BINARY", "😄 😆");
+    assertInitCap("😄 😆", "UTF8_LCASE", "😄 😆");
+    assertInitCap("😄 😆", "UNICODE", "😄 😆");
+    assertInitCap("😄 😆", "UNICODE_CI", "😄 😆");
+    assertInitCap("😀😆😃😄", "UTF8_BINARY", "😀😆😃😄");
+    assertInitCap("😀😆😃😄", "UTF8_LCASE", "😀😆😃😄");
+    assertInitCap("😀😆😃😄", "UNICODE", "😀😆😃😄");
+    assertInitCap("😀😆😃😄", "UNICODE_CI", "😀😆😃😄");
+    assertInitCap("𝔸", "UTF8_BINARY", "𝔸");
+    assertInitCap("𝔸", "UTF8_LCASE", "𝔸");
+    assertInitCap("𝔸", "UNICODE", "𝔸");
+    assertInitCap("𝔸", "UNICODE_CI", "𝔸");
+    assertInitCap("𐐅", "UTF8_BINARY", "\uD801\uDC05", "𐐭");
+    assertInitCap("𐐅", "UTF8_LCASE", "𐐅");
+    assertInitCap("𐐅", "UNICODE", "𐐅");
+    assertInitCap("𐐅", "UNICODE_CI", "𐐅");
+    assertInitCap("𐐭", "UTF8_BINARY", "\uD801\uDC05", "𐐭");
+    assertInitCap("𐐭", "UTF8_LCASE", "𐐅");
+    assertInitCap("𐐭", "UNICODE", "𐐅");
+    assertInitCap("𐐭", "UNICODE_CI", "𐐅");
+    assertInitCap("𐐭𝔸", "UTF8_BINARY", "\uD801\uDC05\uD835\uDD38", "𐐭𝔸");
+    assertInitCap("𐐭𝔸", "UTF8_LCASE", "𐐅𝔸");
+    assertInitCap("𐐭𝔸", "UNICODE", "𐐅𝔸");
+    assertInitCap("𐐭𝔸", "UNICODE_CI", "𐐅𝔸");
+    // Ligatures.
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_BINARY", "Ss Fi Ffi Ff St Ϊ͂", "ß ﬁ ﬃ ﬀ ﬆ ῗ");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UTF8_LCASE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ῗ", "UNICODE", "Ss Fi Ffi Ff St \u0399\u0308\u0342");
+    assertInitCap("œ ǽ", "UTF8_BINARY", "Œ Ǽ", "Œ Ǽ");
+    // Different possible word boundaries.
     assertInitCap("a b c", "UTF8_BINARY", "A B C");
     assertInitCap("a b c", "UNICODE", "A B C");
     assertInitCap("a b c", "UTF8_LCASE", "A B C");
@@ -802,7 +1496,7 @@ public void testInitCap() throws SparkException {
     assertInitCap("a?b世c", "UNICODE", "A?B世C");
     assertInitCap("a?b世c", "UTF8_LCASE", "A?B世C");
     assertInitCap("a?b世c", "UNICODE_CI", "A?B世C");
-    // Titlecase characters that are different from uppercase characters
+    // Titlecase characters that are different from uppercase characters.
     assertInitCap("ǳǱǲ", "UTF8_BINARY", "ǲǳǳ");
     assertInitCap("ǳǱǲ", "UNICODE", "ǲǳǳ");
     assertInitCap("ǳǱǲ", "UTF8_LCASE", "ǲǳǳ");
@@ -812,17 +1506,50 @@ public void testInitCap() throws SparkException {
     assertInitCap("ǆaba ǈubav Ǌegova", "UTF8_LCASE", "ǅaba ǈubav ǋegova");
     assertInitCap("ǆaba ǈubav Ǌegova", "UNICODE_CI", "ǅaba ǈubav ǋegova");
     assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_BINARY",
-      "ß ﬁ ﬃ ﬀ ﬆ Σημερινος Ασημενιος I\u0307ota");
+      "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota","ß ﬁ ﬃ ﬀ ﬆ Σημερινος Ασημενιος I\u0307ota");
     assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UTF8_LCASE",
       "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
     assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE",
       "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
-    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE_CI",
-      "Ss Fi Ffi Ff St Σημερινος Ασημενιος İota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ ΣΗΜΕΡςΙΝΟΣ ΑΣΗΜΕΝΙΟΣ İOTA", "UNICODE_CI",
+      "Ss Fi Ffi Ff St Σημερςινος Ασημενιος İota");
+    // Characters that map to multiple characters when titlecased and lowercased.
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ İOTA", "UTF8_BINARY", "Ss Fi Ffi Ff St İota", "ß ﬁ ﬃ ﬀ ﬆ İota");
+    assertInitCap("ß ﬁ ﬃ ﬀ ﬆ OİOTA", "UTF8_BINARY",
+      "Ss Fi Ffi Ff St Oi\u0307ota", "ß ﬁ ﬃ ﬀ ﬆ Oi̇ota");
+    // Lowercasing Greek letter sigma ('Σ') when case-ignorable character present.
+    assertInitCap("`Σ", "UTF8_BINARY", "`σ", "`σ");
+    assertInitCap("1`Σ`` AΣ", "UTF8_BINARY", "1`σ`` Aς", "1`σ`` Aς");
+    assertInitCap("a1`Σ``", "UTF8_BINARY", "A1`σ``", "A1`σ``");
+    assertInitCap("a`Σ``", "UTF8_BINARY", "A`ς``", "A`σ``");
+    assertInitCap("a`Σ``1", "UTF8_BINARY", "A`ς``1", "A`σ``1");
+    assertInitCap("a`Σ``A", "UTF8_BINARY", "A`σ``a", "A`σ``a");
+    assertInitCap("ΘΑ�Σ�ΟΣ�", "UTF8_BINARY", "Θα�σ�ος�", "Θα�σ�ος�");
+    assertInitCap("ΘΑᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θαᵩς�οᵩςᵩ�", "Θαᵩς�οᵩςᵩ�");
+    assertInitCap("ΘΑ�ᵩΣ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσ�οᵩςᵩ�", "Θα�ᵩσ�οᵩςᵩ�");
+    assertInitCap("ΘΑ�ᵩΣᵩ�ΟᵩΣᵩ�", "UTF8_BINARY", "Θα�ᵩσᵩ�οᵩςᵩ�", "Θα�ᵩσᵩ�οᵩςᵩ�");
+    assertInitCap("ΘΑ�Σ�Ο�Σ�", "UTF8_BINARY", "Θα�σ�ο�σ�", "Θα�σ�ο�σ�");
+    // Disallowed bytes and invalid sequences.
+    assertInitCap(UTF8String.fromBytes(new byte[] { (byte)0xC0, (byte)0xC1, (byte)0xF5}).toString(),
+      "UTF8_BINARY", "���", "���");
+    assertInitCap(UTF8String.fromBytes(
+      new byte[]{(byte)0xC0, (byte)0xC1, (byte)0xF5, 0x20, 0x61, 0x41, (byte)0xC0}).toString(),
+      "UTF8_BINARY",
+      "��� Aa�", "��� Aa�");
+    assertInitCap(UTF8String.fromBytes(new byte[]{(byte)0xC2,(byte)0xC2}).toString(),
+      "UTF8_BINARY", "��", "��");
+    assertInitCap(UTF8String.fromBytes(
+      new byte[]{0x61, 0x41, (byte)0xC2, (byte)0xC2, 0x41}).toString(),
+      "UTF8_BINARY",
+      "Aa��a", "Aa��a");
   }
 
-  private void assertStringInstr(String string, String substring, String collationName,
-          Integer expected) throws SparkException {
+  /**
+   * Verify the behaviour of the `StringInstr` collation support class.
+   */
+
+  private void assertStringInstr(String string, String substring,
+      String collationName, int expected) throws SparkException {
     UTF8String str = UTF8String.fromString(string);
     UTF8String substr = UTF8String.fromString(substring);
     int collationId = CollationFactory.collationNameToId(collationName);
@@ -831,143 +1558,402 @@ private void assertStringInstr(String string, String substring, String collation
 
   @Test
   public void testStringInstr() throws SparkException {
-    assertStringInstr("aaads", "Aa", "UTF8_BINARY", 0);
-    assertStringInstr("aaaDs", "de", "UTF8_BINARY", 0);
+    // Empty strings.
+    assertStringInstr("", "", "UTF8_BINARY", 1);
+    assertStringInstr("", "", "UTF8_LCASE", 1);
+    assertStringInstr("", "", "UNICODE_CI", 1);
+    assertStringInstr("", "", "UNICODE", 1);
+    assertStringInstr("a", "", "UTF8_BINARY", 1);
+    assertStringInstr("a", "", "UTF8_LCASE", 1);
+    assertStringInstr("a", "", "UNICODE", 1);
+    assertStringInstr("a", "", "UNICODE_CI", 1);
+    assertStringInstr("", "x", "UTF8_BINARY", 0);
+    assertStringInstr("", "x", "UTF8_LCASE", 0);
+    assertStringInstr("", "x", "UNICODE", 0);
+    assertStringInstr("", "x", "UNICODE_CI", 0);
+    // Basic tests.
+    assertStringInstr("aaads", "aa", "UTF8_BINARY", 1);
+    assertStringInstr("aaads", "aa", "UTF8_LCASE", 1);
+    assertStringInstr("aaads", "aa", "UNICODE", 1);
+    assertStringInstr("aaads", "aa", "UNICODE_CI", 1);
     assertStringInstr("aaads", "ds", "UTF8_BINARY", 4);
-    assertStringInstr("xxxx", "", "UTF8_BINARY", 1);
-    assertStringInstr("", "xxxx", "UTF8_BINARY", 0);
-    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_BINARY", 5);
-    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_BINARY", 8);
+    assertStringInstr("aaads", "ds", "UTF8_LCASE", 4);
+    assertStringInstr("aaads", "ds", "UNICODE", 4);
+    assertStringInstr("aaads", "ds", "UNICODE_CI", 4);
+    assertStringInstr("aaads", "Aa", "UTF8_BINARY", 0);
     assertStringInstr("aaads", "Aa", "UTF8_LCASE", 1);
+    assertStringInstr("aaads", "Aa", "UNICODE", 0);
+    assertStringInstr("aaads", "Aa", "UNICODE_CI", 1);
+    assertStringInstr("aaaDs", "de", "UTF8_BINARY", 0);
     assertStringInstr("aaaDs", "de", "UTF8_LCASE", 0);
+    assertStringInstr("aaaDs", "de", "UNICODE", 0);
+    assertStringInstr("aaaDs", "de", "UNICODE_CI", 0);
+    assertStringInstr("aaaDs", "ds", "UTF8_BINARY", 0);
     assertStringInstr("aaaDs", "ds", "UTF8_LCASE", 4);
-    assertStringInstr("xxxx", "", "UTF8_LCASE", 1);
-    assertStringInstr("", "xxxx", "UTF8_LCASE", 0);
+    assertStringInstr("aaaDs", "ds", "UNICODE", 0);
+    assertStringInstr("aaaDs", "ds", "UNICODE_CI", 4);
+    assertStringInstr("aaadS", "Ds", "UTF8_BINARY", 0);
+    assertStringInstr("aaadS", "Ds", "UTF8_LCASE", 4);
+    assertStringInstr("aaadS", "Ds", "UNICODE", 0);
+    assertStringInstr("aaadS", "Ds", "UNICODE_CI", 4);
+    assertStringInstr("aaaČŠčšcs", "cs", "SR", 8);
+    assertStringInstr("aaaČŠčšcs", "cs", "SR_CI_AI", 4);
+    // Advanced tests.
+    assertStringInstr("test大千世界X大千世界", "大千", "UTF8_BINARY", 5);
     assertStringInstr("test大千世界X大千世界", "大千", "UTF8_LCASE", 5);
+    assertStringInstr("test大千世界X大千世界", "大千", "UNICODE", 5);
+    assertStringInstr("test大千世界X大千世界", "大千", "UNICODE_CI", 5);
+    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_BINARY", 8);
+    assertStringInstr("test大千世界X大千世界", "界X", "UTF8_LCASE", 8);
+    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE", 8);
+    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE_CI", 8);
+    assertStringInstr("test大千世界X大千世界", "界x", "UTF8_BINARY", 0);
     assertStringInstr("test大千世界X大千世界", "界x", "UTF8_LCASE", 8);
-    assertStringInstr("aaads", "Aa", "UNICODE", 0);
-    assertStringInstr("aaads", "aa", "UNICODE", 1);
-    assertStringInstr("aaads", "de", "UNICODE", 0);
-    assertStringInstr("xxxx", "", "UNICODE", 1);
-    assertStringInstr("", "xxxx", "UNICODE", 0);
     assertStringInstr("test大千世界X大千世界", "界x", "UNICODE", 0);
-    assertStringInstr("test大千世界X大千世界", "界X", "UNICODE", 8);
-    assertStringInstr("xxxx", "", "UNICODE_CI", 1);
-    assertStringInstr("", "xxxx", "UNICODE_CI", 0);
-    assertStringInstr("aaads", "AD", "UNICODE_CI", 3);
-    assertStringInstr("aaads", "dS", "UNICODE_CI", 4);
-    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE_CI", 0);
     assertStringInstr("test大千世界X大千世界", "界x", "UNICODE_CI", 8);
-    assertStringInstr("i̇", "i", "UNICODE_CI", 0);
-    assertStringInstr("i̇", "\u0307", "UNICODE_CI", 0);
-    assertStringInstr("i̇", "İ", "UNICODE_CI", 1);
+    assertStringInstr("test大千世界X大千世界", "界y", "UTF8_BINARY", 0);
+    assertStringInstr("test大千世界X大千世界", "界y", "UTF8_LCASE", 0);
+    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE", 0);
+    assertStringInstr("test大千世界X大千世界", "界y", "UNICODE_CI", 0);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringInstr("i\u0307", "i", "UNICODE_CI", 0);
+    assertStringInstr("i\u0307", "\u0307", "UNICODE_CI", 0);
+    assertStringInstr("i\u0307", "İ", "UNICODE_CI", 1);
     assertStringInstr("İ", "i", "UNICODE_CI", 0);
-    assertStringInstr("İoi̇o12", "i̇o", "UNICODE_CI", 1);
+    assertStringInstr("İoi̇o12", "i\u0307o", "UNICODE_CI", 1);
     assertStringInstr("i̇oİo12", "İo", "UNICODE_CI", 1);
-    assertStringInstr("abİoi̇o", "i̇o", "UNICODE_CI", 3);
+    assertStringInstr("abİoi̇o", "i\u0307o", "UNICODE_CI", 3);
     assertStringInstr("abi̇oİo", "İo", "UNICODE_CI", 3);
     assertStringInstr("ai̇oxXİo", "Xx", "UNICODE_CI", 5);
     assertStringInstr("aİoi̇oxx", "XX", "UNICODE_CI", 7);
-    assertStringInstr("i̇", "i", "UTF8_LCASE", 1); // != UNICODE_CI
-    assertStringInstr("i̇", "\u0307", "UTF8_LCASE", 2); // != UNICODE_CI
-    assertStringInstr("i̇", "İ", "UTF8_LCASE", 1);
+    assertStringInstr("i\u0307", "i", "UTF8_LCASE", 1); // != UNICODE_CI
+    assertStringInstr("i\u0307", "\u0307", "UTF8_LCASE", 2); // != UNICODE_CI
+    assertStringInstr("i\u0307", "İ", "UTF8_LCASE", 1);
     assertStringInstr("İ", "i", "UTF8_LCASE", 0);
-    assertStringInstr("İoi̇o12", "i̇o", "UTF8_LCASE", 1);
+    assertStringInstr("İoi̇o12", "i\u0307o", "UTF8_LCASE", 1);
     assertStringInstr("i̇oİo12", "İo", "UTF8_LCASE", 1);
-    assertStringInstr("abİoi̇o", "i̇o", "UTF8_LCASE", 3);
+    assertStringInstr("abİoi̇o", "i\u0307o", "UTF8_LCASE", 3);
     assertStringInstr("abi̇oİo", "İo", "UTF8_LCASE", 3);
     assertStringInstr("abI\u0307oi̇o", "İo", "UTF8_LCASE", 3);
     assertStringInstr("ai̇oxXİo", "Xx", "UTF8_LCASE", 5);
     assertStringInstr("abİoi̇o", "\u0307o", "UTF8_LCASE", 6);
     assertStringInstr("aİoi̇oxx", "XX", "UTF8_LCASE", 7);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringInstr("σ", "σ", "UTF8_BINARY", 1);
+    assertStringInstr("σ", "ς", "UTF8_BINARY", 0);
+    assertStringInstr("σ", "Σ", "UTF8_BINARY", 0);
+    assertStringInstr("ς", "σ", "UTF8_BINARY", 0);
+    assertStringInstr("ς", "ς", "UTF8_BINARY", 1);
+    assertStringInstr("ς", "Σ", "UTF8_BINARY", 0);
+    assertStringInstr("Σ", "σ", "UTF8_BINARY", 0);
+    assertStringInstr("Σ", "ς", "UTF8_BINARY", 0);
+    assertStringInstr("Σ", "Σ", "UTF8_BINARY", 1);
+    assertStringInstr("σ", "σ", "UTF8_LCASE", 1);
+    assertStringInstr("σ", "ς", "UTF8_LCASE", 1);
+    assertStringInstr("σ", "Σ", "UTF8_LCASE", 1);
+    assertStringInstr("ς", "σ", "UTF8_LCASE", 1);
+    assertStringInstr("ς", "ς", "UTF8_LCASE", 1);
+    assertStringInstr("ς", "Σ", "UTF8_LCASE", 1);
+    assertStringInstr("Σ", "σ", "UTF8_LCASE", 1);
+    assertStringInstr("Σ", "ς", "UTF8_LCASE", 1);
+    assertStringInstr("Σ", "Σ", "UTF8_LCASE", 1);
+    assertStringInstr("σ", "σ", "UNICODE", 1);
+    assertStringInstr("σ", "ς", "UNICODE", 0);
+    assertStringInstr("σ", "Σ", "UNICODE", 0);
+    assertStringInstr("ς", "σ", "UNICODE", 0);
+    assertStringInstr("ς", "ς", "UNICODE", 1);
+    assertStringInstr("ς", "Σ", "UNICODE", 0);
+    assertStringInstr("Σ", "σ", "UNICODE", 0);
+    assertStringInstr("Σ", "ς", "UNICODE", 0);
+    assertStringInstr("Σ", "Σ", "UNICODE", 1);
+    assertStringInstr("σ", "σ", "UNICODE_CI", 1);
+    assertStringInstr("σ", "ς", "UNICODE_CI", 1);
+    assertStringInstr("σ", "Σ", "UNICODE_CI", 1);
+    assertStringInstr("ς", "σ", "UNICODE_CI", 1);
+    assertStringInstr("ς", "ς", "UNICODE_CI", 1);
+    assertStringInstr("ς", "Σ", "UNICODE_CI", 1);
+    assertStringInstr("Σ", "σ", "UNICODE_CI", 1);
+    assertStringInstr("Σ", "ς", "UNICODE_CI", 1);
+    assertStringInstr("Σ", "Σ", "UNICODE_CI", 1);
+    // Surrogate pairs.
+    assertStringInstr("a🙃b", "a", "UTF8_BINARY", 1);
+    assertStringInstr("a🙃b", "a", "UTF8_LCASE", 1);
+    assertStringInstr("a🙃b", "a", "UNICODE", 1);
+    assertStringInstr("a🙃b", "a", "UNICODE_CI", 1);
+    assertStringInstr("a🙃b", "🙃", "UTF8_BINARY", 2);
+    assertStringInstr("a🙃b", "🙃", "UTF8_LCASE", 2);
+    assertStringInstr("a🙃b", "🙃", "UNICODE", 2);
+    assertStringInstr("a🙃b", "🙃", "UNICODE_CI", 2);
+    assertStringInstr("a🙃b", "b", "UTF8_BINARY", 3);
+    assertStringInstr("a🙃b", "b", "UTF8_LCASE", 3);
+    assertStringInstr("a🙃b", "b", "UNICODE", 3);
+    assertStringInstr("a🙃b", "b", "UNICODE_CI", 3);
+    assertStringInstr("a🙃🙃b", "🙃", "UTF8_BINARY", 2);
+    assertStringInstr("a🙃🙃b", "🙃", "UTF8_LCASE", 2);
+    assertStringInstr("a🙃🙃b", "🙃", "UNICODE", 2);
+    assertStringInstr("a🙃🙃b", "🙃", "UNICODE_CI", 2);
+    assertStringInstr("a🙃🙃b", "b", "UTF8_BINARY", 4);
+    assertStringInstr("a🙃🙃b", "b", "UTF8_LCASE", 4);
+    assertStringInstr("a🙃🙃b", "b", "UNICODE", 4);
+    assertStringInstr("a🙃🙃b", "b", "UNICODE_CI", 4);
+    assertStringInstr("a🙃x🙃b", "b", "UTF8_BINARY", 5);
+    assertStringInstr("a🙃x🙃b", "b", "UTF8_LCASE", 5);
+    assertStringInstr("a🙃x🙃b", "b", "UNICODE", 5);
+    assertStringInstr("a🙃x🙃b", "b", "UNICODE_CI", 5);
   }
 
+  /**
+   * Verify the behaviour of the `FindInSet` collation support class.
+   */
+
   private void assertFindInSet(String word, UTF8String set, String collationName,
-      Integer expected) throws SparkException {
+      int expected) throws SparkException {
     UTF8String w = UTF8String.fromString(word);
     int collationId = CollationFactory.collationNameToId(collationName);
-    assertEquals(expected, CollationSupport.FindInSet.exec(w, set, collationId));
+    int result = CollationSupport.FindInSet.exec(w, set, collationId);
+    assertEquals(expected, result);
   }
 
   @Test
   public void testFindInSet() throws SparkException {
-    assertFindInSet("AB", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 1);
-    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 5);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    // Empty strings.
+    assertFindInSet("", UTF8String.fromString(""), "UTF8_BINARY", 1);
+    assertFindInSet("", UTF8String.fromString(""), "UTF8_LCASE", 1);
+    assertFindInSet("", UTF8String.fromString(""), "UNICODE", 1);
+    assertFindInSet("", UTF8String.fromString(""), "UNICODE_CI", 1);
     assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_BINARY", 1);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_BINARY", 6);
-    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_BINARY", 0);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("c", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 4);
-    assertFindInSet("AB", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 3);
-    assertFindInSet("AbC", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 1);
-    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
-    assertFindInSet("XX", UTF8String.fromString("xx"), "UTF8_LCASE", 1);
     assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_BINARY", 1);
     assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UTF8_LCASE", 1);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_LCASE", 6);
-    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_LCASE", 0);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 4);
-    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 3);
-    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
-    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
     assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE", 1);
+    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE_CI", 1);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_BINARY", 6);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UTF8_LCASE", 6);
     assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE", 6);
+    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE_CI", 6);
+    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_BINARY", 0);
+    assertFindInSet("", UTF8String.fromString("abc"), "UTF8_LCASE", 0);
     assertFindInSet("", UTF8String.fromString("abc"), "UNICODE", 0);
+    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE_CI", 0);
+    // Basic tests.
+    assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_BINARY", 1);
+    assertFindInSet("xx", UTF8String.fromString("xx"), "UTF8_LCASE", 1);
     assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE", 1);
-    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 0);
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 5);
+    assertFindInSet("xx", UTF8String.fromString("xx"), "UNICODE_CI", 1);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
     assertFindInSet("a", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 1);
+    assertFindInSet("abc", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 1);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("abcd", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 5);
+    assertFindInSet("def", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 5);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("xyz", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 3);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("Ab", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 3);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
+    assertFindInSet("d,ef", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_BINARY", 0);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UTF8_LCASE", 4);
+    assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE", 0);
     assertFindInSet("C", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 4);
-    assertFindInSet("DeF", UTF8String.fromString("abc,b,ab,c,dEf"), "UNICODE_CI", 5);
-    assertFindInSet("DEFG", UTF8String.fromString("abc,b,ab,c,def"), "UNICODE_CI", 0);
-    assertFindInSet("", UTF8String.fromString(",abc,b,ab,c,def"), "UNICODE_CI", 1);
-    assertFindInSet("", UTF8String.fromString("abc,b,ab,c,def,"), "UNICODE_CI", 6);
-    assertFindInSet("", UTF8String.fromString("abc"), "UNICODE_CI", 0);
-    assertFindInSet("XX", UTF8String.fromString("xx"), "UNICODE_CI", 1);
+    // Advanced tests.
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 5);
+    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 5);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_BINARY", 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UTF8_LCASE", 4);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE", 0);
     assertFindInSet("界x", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 4);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_BINARY", 0);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UTF8_LCASE", 5);
+    assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE", 0);
     assertFindInSet("界x", UTF8String.fromString("test,大千,界Xx,世,界X,大,千,世界"), "UNICODE_CI", 5);
-    assertFindInSet("大", UTF8String.fromString("test,大千,世,界X,大,千,世界"), "UNICODE_CI", 5);
-    assertFindInSet("i̇", UTF8String.fromString("İ"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("i̇"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("i̇"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("İ,"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("i̇,"), "UNICODE_CI", 1);
-    assertFindInSet("i", UTF8String.fromString("i̇,"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,İ"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,i̇"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i̇"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,i̇,12"), "UNICODE_CI", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i̇,12"), "UNICODE_CI", 0);
-    assertFindInSet("i̇o", UTF8String.fromString("ab,İo,12"), "UNICODE_CI", 2);
-    assertFindInSet("İo", UTF8String.fromString("ab,i̇o,12"), "UNICODE_CI", 2);
-    assertFindInSet("i̇", UTF8String.fromString("İ"), "UTF8_LCASE", 1);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_BINARY", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UTF8_LCASE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_BINARY", 0);
     assertFindInSet("i", UTF8String.fromString("İ"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("i̇"), "UTF8_LCASE", 1);
-    assertFindInSet("i", UTF8String.fromString("i̇"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("İ,"), "UTF8_LCASE", 1);
+    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("İ"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_BINARY", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UTF8_LCASE", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_BINARY", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UTF8_LCASE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("İ,"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_BINARY", 0);
     assertFindInSet("i", UTF8String.fromString("İ,"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("i̇,"), "UTF8_LCASE", 1);
-    assertFindInSet("i", UTF8String.fromString("i̇,"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("İ,"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE", 1);
+    assertFindInSet("i\u0307", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 1);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_BINARY", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UTF8_LCASE", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("i\u0307,"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_BINARY", 0);
     assertFindInSet("i", UTF8String.fromString("ab,İ"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,i̇"), "UTF8_LCASE", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i̇"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_BINARY", 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UTF8_LCASE", 2);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE", 0);
+    assertFindInSet("İ", UTF8String.fromString("ab,i\u0307"), "UNICODE_CI", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_BINARY", 0);
     assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇", UTF8String.fromString("ab,i̇,12"), "UTF8_LCASE", 2);
-    assertFindInSet("i", UTF8String.fromString("ab,i̇,12"), "UTF8_LCASE", 0);
-    assertFindInSet("i̇o", UTF8String.fromString("ab,İo,12"), "UTF8_LCASE", 2);
-    assertFindInSet("İo", UTF8String.fromString("ab,i̇o,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,İ,12"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 2);
+    assertFindInSet("i\u0307", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 2);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_BINARY", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UTF8_LCASE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE", 0);
+    assertFindInSet("i", UTF8String.fromString("ab,i\u0307,12"), "UNICODE_CI", 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_BINARY", 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UTF8_LCASE", 2);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE", 0);
+    assertFindInSet("i\u0307o", UTF8String.fromString("ab,İo,12"), "UNICODE_CI", 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_BINARY", 0);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UTF8_LCASE", 2);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE", 0);
+    assertFindInSet("İo", UTF8String.fromString("ab,i\u0307o,12"), "UNICODE_CI", 2);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_BINARY", 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 0);
+    assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_BINARY", 0);
+    assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_BINARY", 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_BINARY", 0);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_BINARY", 0);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_BINARY", 0);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_BINARY", 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
+    assertFindInSet("ς", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
+    assertFindInSet("ς", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), "UTF8_LCASE", 1);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), "UTF8_LCASE", 1);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UTF8_LCASE", 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE", 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE", 0);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE", 0);
+    assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE", 0);
+    assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE", 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE", 0);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE", 0);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE", 0);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE", 1);
+    assertFindInSet("σ", UTF8String.fromString("σ"), "UNICODE_CI", 1);
+    assertFindInSet("σ", UTF8String.fromString("ς"), "UNICODE_CI", 1);
+    assertFindInSet("σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
+    assertFindInSet("ς", UTF8String.fromString("σ"), "UNICODE_CI", 1);
+    assertFindInSet("ς", UTF8String.fromString("ς"), "UNICODE_CI", 1);
+    assertFindInSet("ς", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
+    assertFindInSet("Σ", UTF8String.fromString("σ"), "UNICODE_CI", 1);
+    assertFindInSet("Σ", UTF8String.fromString("ς"), "UNICODE_CI", 1);
+    assertFindInSet("Σ", UTF8String.fromString("Σ"), "UNICODE_CI", 1);
+    // Surrogate pairs.
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 0);
+    assertFindInSet("a", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 0);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 1);
+    assertFindInSet("a🙃", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 1);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 2);
+    assertFindInSet("b", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 2);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_BINARY", 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UTF8_LCASE", 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE", 3);
+    assertFindInSet("🙃c", UTF8String.fromString("a🙃,b,🙃c"), "UNICODE_CI", 3);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 0);
+    assertFindInSet("😄😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 0);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 1);
+    assertFindInSet("😀😆", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 1);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_BINARY", 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UTF8_LCASE", 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE", 2);
+    assertFindInSet("😃😄", UTF8String.fromString("😀😆,😃😄"), "UNICODE_CI", 2);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
+    assertFindInSet("x", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 0);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 1);
+    assertFindInSet("a", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 1);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
+    assertFindInSet("A", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 3);
+    assertFindInSet("𝔸", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 1);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 2);
+    assertFindInSet("𐐅", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_BINARY", 0);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UTF8_LCASE", 2);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE", 0);
+    assertFindInSet("𐐭", UTF8String.fromString("a,𐐅,𝔸"), "UNICODE_CI", 2);
     // Invalid UTF8 strings
     assertFindInSet("C", UTF8String.fromBytes(
       new byte[] { 0x41, (byte) 0xC2, 0x2C, 0x42, 0x2C, 0x43, 0x2C, 0x43, 0x2C, 0x56 }),
@@ -983,147 +1969,581 @@ public void testFindInSet() throws SparkException {
       "UNICODE_CI", 2);
   }
 
-  private void assertReplace(String source, String search, String replace, String collationName,
-        String expected) throws SparkException {
-    UTF8String src = UTF8String.fromString(source);
+  /**
+   * Verify the behaviour of the `StringReplace` collation support class.
+   */
+
+  private void assertStringReplace(String source, String search, String replace,
+      String collationName, String expected) throws SparkException {
+    UTF8String src = UTF8String.fromString(source);
     UTF8String sear = UTF8String.fromString(search);
     UTF8String repl = UTF8String.fromString(replace);
     int collationId = CollationFactory.collationNameToId(collationName);
-    assertEquals(expected, CollationSupport.StringReplace
-      .exec(src, sear, repl, collationId).toString());
+    UTF8String result = CollationSupport.StringReplace.exec(src, sear, repl, collationId);
+    assertEquals(UTF8String.fromString(expected), result);
   }
 
   @Test
-  public void testReplace() throws SparkException {
-    assertReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace");
-    assertReplace("replace", "pl", "", "UTF8_BINARY", "reace");
-    assertReplace("repl世ace", "Pl", "", "UTF8_BINARY", "repl世ace");
-    assertReplace("replace", "", "123", "UTF8_BINARY", "replace");
-    assertReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c");
-    assertReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad");
-    assertReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace");
-    assertReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace");
-    assertReplace("Replace", "", "123", "UTF8_LCASE", "Replace");
-    assertReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace");
-    assertReplace("abcaBc", "B", "12", "UTF8_LCASE", "a12ca12c");
-    assertReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad");
-    assertReplace("re世place", "plx", "123", "UNICODE", "re世place");
-    assertReplace("世Replace", "re", "", "UNICODE", "世Replace");
-    assertReplace("replace世", "", "123", "UNICODE", "replace世");
-    assertReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c");
-    assertReplace("abcdabcd", "bc", "", "UNICODE", "adad");
-    assertReplace("replace", "plx", "123", "UNICODE_CI", "replace");
-    assertReplace("Replace", "re", "", "UNICODE_CI", "place");
-    assertReplace("replace", "", "123", "UNICODE_CI", "replace");
-    assertReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c");
-    assertReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad");
-    assertReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12");
-    assertReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12");
-    assertReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12");
-    assertReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12");
-    assertReplace("İi̇İi̇İi̇", "i̇", "x", "UNICODE_CI", "xxxxxx");
-    assertReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇");
-    assertReplace("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx");
-    assertReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy");
-    assertReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI
-    assertReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI
-    assertReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12");
-    assertReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12");
-    assertReplace("İi̇İi̇İi̇", "i̇", "x", "UTF8_LCASE", "xxxxxx");
-    assertReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE",
+  public void testStringReplace() throws SparkException {
+    // Empty strings.
+    assertStringReplace("", "", "", "UTF8_BINARY", "");
+    assertStringReplace("", "", "", "UTF8_LCASE", "");
+    assertStringReplace("", "", "", "UNICODE", "");
+    assertStringReplace("", "", "", "UNICODE_CI", "");
+    assertStringReplace("abc", "", "", "UTF8_BINARY", "abc");
+    assertStringReplace("abc", "", "", "UTF8_LCASE", "abc");
+    assertStringReplace("abc", "", "", "UNICODE", "abc");
+    assertStringReplace("abc", "", "", "UNICODE_CI", "abc");
+    assertStringReplace("", "x", "", "UTF8_BINARY", "");
+    assertStringReplace("", "x", "", "UTF8_LCASE", "");
+    assertStringReplace("", "x", "", "UNICODE", "");
+    assertStringReplace("", "x", "", "UNICODE_CI", "");
+    assertStringReplace("", "", "x", "UTF8_BINARY", "");
+    assertStringReplace("", "", "x", "UTF8_LCASE", "");
+    assertStringReplace("", "", "x", "UNICODE", "");
+    assertStringReplace("", "", "x", "UNICODE_CI", "");
+    assertStringReplace("", "b", "x", "UTF8_BINARY", "");
+    assertStringReplace("", "b", "x", "UTF8_LCASE", "");
+    assertStringReplace("", "b", "x", "UNICODE", "");
+    assertStringReplace("", "b", "x", "UNICODE_CI", "");
+    assertStringReplace("abc", "b", "", "UTF8_BINARY", "ac");
+    assertStringReplace("abc", "b", "", "UTF8_LCASE", "ac");
+    assertStringReplace("abc", "b", "", "UNICODE", "ac");
+    assertStringReplace("abc", "b", "", "UNICODE_CI", "ac");
+    assertStringReplace("abc", "", "x", "UTF8_BINARY", "abc");
+    assertStringReplace("abc", "", "x", "UTF8_LCASE", "abc");
+    assertStringReplace("abc", "", "x", "UNICODE", "abc");
+    assertStringReplace("abc", "", "x", "UNICODE_CI", "abc");
+    // Basic tests.
+    assertStringReplace("replace", "pl", "", "UTF8_BINARY", "reace");
+    assertStringReplace("replace", "pl", "", "UTF8_LCASE", "reace");
+    assertStringReplace("replace", "pl", "", "UNICODE", "reace");
+    assertStringReplace("replace", "pl", "", "UNICODE_CI", "reace");
+    assertStringReplace("replace", "", "123", "UTF8_BINARY", "replace");
+    assertStringReplace("replace", "", "123", "UTF8_LCASE", "replace");
+    assertStringReplace("replace", "", "123", "UNICODE", "replace");
+    assertStringReplace("replace", "", "123", "UNICODE_CI", "replace");
+    assertStringReplace("abcabc", "b", "12", "UTF8_BINARY", "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", "UTF8_LCASE", "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", "UNICODE", "a12ca12c");
+    assertStringReplace("abcabc", "b", "12", "UNICODE_CI", "a12ca12c");
+    assertStringReplace("replace", "plx", "123", "UTF8_BINARY", "replace");
+    assertStringReplace("replace", "plx", "123", "UTF8_LCASE", "replace");
+    assertStringReplace("replace", "plx", "123", "UNICODE", "replace");
+    assertStringReplace("replace", "plx", "123", "UNICODE_CI", "replace");
+    assertStringReplace("Replace", "re", "", "UTF8_BINARY", "Replace");
+    assertStringReplace("Replace", "re", "", "UTF8_LCASE", "place");
+    assertStringReplace("Replace", "re", "", "UNICODE", "Replace");
+    assertStringReplace("Replace", "re", "", "UNICODE_CI", "place");
+    assertStringReplace("abcdabcd", "Bc", "", "UTF8_BINARY", "abcdabcd");
+    assertStringReplace("abcdabcd", "Bc", "", "UTF8_LCASE", "adad");
+    assertStringReplace("abcdabcd", "Bc", "", "UNICODE", "abcdabcd");
+    assertStringReplace("abcdabcd", "Bc", "", "UNICODE_CI", "adad");
+    assertStringReplace("AbcdabCd", "Bc", "", "UTF8_BINARY", "AbcdabCd");
+    assertStringReplace("AbcdabCd", "Bc", "", "UTF8_LCASE", "Adad");
+    assertStringReplace("AbcdabCd", "Bc", "", "UNICODE", "AbcdabCd");
+    assertStringReplace("AbcdabCd", "Bc", "", "UNICODE_CI", "Adad");
+    // Advanced tests.
+    assertStringReplace("abcdabcd", "bc", "", "UTF8_BINARY", "adad");
+    assertStringReplace("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace");
+    assertStringReplace("世Replace", "re", "", "UTF8_BINARY", "世Replace");
+    assertStringReplace("r世eplace", "pl", "xx", "UTF8_LCASE", "r世exxace");
+    assertStringReplace("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace");
+    assertStringReplace("re世place", "世", "x", "UTF8_LCASE", "rexplace");
+    assertStringReplace("re世place", "plx", "123", "UNICODE", "re世place");
+    assertStringReplace("replace世", "", "123", "UNICODE", "replace世");
+    assertStringReplace("aBc世abc", "b", "12", "UNICODE", "aBc世a12c");
+    assertStringReplace("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c");
+    assertStringReplace("a世Bcdabcd", "bC", "", "UNICODE_CI", "a世dad");
+    assertStringReplace("repl世ace", "Pl", "", "UNICODE_CI", "re世ace");
+    assertStringReplace("abcčšdabĆŠscd", "cs", "", "SR_CI_AI", "abcdabscd");
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringReplace("abi̇12", "i", "X", "UNICODE_CI", "abi̇12");
+    assertStringReplace("abi̇12", "\u0307", "X", "UNICODE_CI", "abi̇12");
+    assertStringReplace("abi̇12", "İ", "X", "UNICODE_CI", "abX12");
+    assertStringReplace("abİ12", "i", "X", "UNICODE_CI", "abİ12");
+    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UNICODE_CI", "xxxxxx");
+    assertStringReplace("İi̇İi̇İi̇", "i", "x", "UNICODE_CI", "İi̇İi̇İi̇");
+    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UNICODE_CI", "abxx12xx");
+    assertStringReplace("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy");
+    assertStringReplace("abi̇12", "i", "X", "UTF8_LCASE", "abX\u030712"); // != UNICODE_CI
+    assertStringReplace("abi̇12", "\u0307", "X", "UTF8_LCASE", "abiX12"); // != UNICODE_CI
+    assertStringReplace("abi̇12", "İ", "X", "UTF8_LCASE", "abX12");
+    assertStringReplace("abİ12", "i", "X", "UTF8_LCASE", "abİ12");
+    assertStringReplace("İi̇İi̇İi̇", "i\u0307", "x", "UTF8_LCASE", "xxxxxx");
+    assertStringReplace("İi̇İi̇İi̇", "i", "x", "UTF8_LCASE",
       "İx\u0307İx\u0307İx\u0307"); // != UNICODE_CI
-    assertReplace("abİo12i̇o", "i̇o", "xx", "UTF8_LCASE", "abxx12xx");
-    assertReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy");
+    assertStringReplace("abİo12i̇o", "i\u0307o", "xx", "UTF8_LCASE", "abxx12xx");
+    assertStringReplace("abi̇o12i̇o", "İo", "yy", "UTF8_LCASE", "abyy12yy");
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringReplace("σ", "σ", "x", "UTF8_BINARY", "x");
+    assertStringReplace("σ", "ς", "x", "UTF8_BINARY", "σ");
+    assertStringReplace("σ", "Σ", "x", "UTF8_BINARY", "σ");
+    assertStringReplace("ς", "σ", "x", "UTF8_BINARY", "ς");
+    assertStringReplace("ς", "ς", "x", "UTF8_BINARY", "x");
+    assertStringReplace("ς", "Σ", "x", "UTF8_BINARY", "ς");
+    assertStringReplace("Σ", "σ", "x", "UTF8_BINARY", "Σ");
+    assertStringReplace("Σ", "ς", "x", "UTF8_BINARY", "Σ");
+    assertStringReplace("Σ", "Σ", "x", "UTF8_BINARY", "x");
+    assertStringReplace("σ", "σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("σ", "ς", "x", "UTF8_LCASE", "x");
+    assertStringReplace("σ", "Σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("ς", "σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("ς", "ς", "x", "UTF8_LCASE", "x");
+    assertStringReplace("ς", "Σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("Σ", "σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("Σ", "ς", "x", "UTF8_LCASE", "x");
+    assertStringReplace("Σ", "Σ", "x", "UTF8_LCASE", "x");
+    assertStringReplace("σ", "σ", "x", "UNICODE", "x");
+    assertStringReplace("σ", "ς", "x", "UNICODE", "σ");
+    assertStringReplace("σ", "Σ", "x", "UNICODE", "σ");
+    assertStringReplace("ς", "σ", "x", "UNICODE", "ς");
+    assertStringReplace("ς", "ς", "x", "UNICODE", "x");
+    assertStringReplace("ς", "Σ", "x", "UNICODE", "ς");
+    assertStringReplace("Σ", "σ", "x", "UNICODE", "Σ");
+    assertStringReplace("Σ", "ς", "x", "UNICODE", "Σ");
+    assertStringReplace("Σ", "Σ", "x", "UNICODE", "x");
+    assertStringReplace("σ", "σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("σ", "ς", "x", "UNICODE_CI", "x");
+    assertStringReplace("σ", "Σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("ς", "σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("ς", "ς", "x", "UNICODE_CI", "x");
+    assertStringReplace("ς", "Σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("Σ", "σ", "x", "UNICODE_CI", "x");
+    assertStringReplace("Σ", "ς", "x", "UNICODE_CI", "x");
+    assertStringReplace("Σ", "Σ", "x", "UNICODE_CI", "x");
+    // Surrogate pairs.
+    assertStringReplace("a🙃b", "a", "x", "UTF8_BINARY", "x🙃b");
+    assertStringReplace("a🙃b", "b", "x", "UTF8_BINARY", "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", "UTF8_BINARY", "axb");
+    assertStringReplace("a🙃b", "b", "c", "UTF8_LCASE", "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", "UTF8_LCASE", "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", "UTF8_LCASE", "axb");
+    assertStringReplace("a🙃b", "b", "c", "UNICODE", "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", "UNICODE", "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", "UNICODE", "axb");
+    assertStringReplace("a🙃b", "b", "c", "UNICODE_CI", "a🙃c");
+    assertStringReplace("a🙃b", "b", "x", "UNICODE_CI", "a🙃x");
+    assertStringReplace("a🙃b", "🙃", "x", "UNICODE_CI", "axb");
   }
 
-  private void assertLocate(String substring, String string, Integer start, String collationName,
-        Integer expected) throws SparkException {
+  /**
+   * Verify the behaviour of the `StringLocate` collation support class.
+   */
+
+  private void assertStringLocate(String substring, String string, int start,
+      String collationName, int expected) throws SparkException {
+    // Note: When using start < 1, be careful to understand the behavior of the `indexOf`
+    // method and the implications of using `indexOf` in the `StringLocate` case class.
     UTF8String substr = UTF8String.fromString(substring);
     UTF8String str = UTF8String.fromString(string);
     int collationId = CollationFactory.collationNameToId(collationName);
-    assertEquals(expected, CollationSupport.StringLocate.exec(str, substr,
-      start - 1, collationId) + 1);
+    int result = CollationSupport.StringLocate.exec(str, substr, start - 1, collationId) + 1;
+    assertEquals(expected, result);
   }
 
   @Test
-  public void testLocate() throws SparkException {
-    // If you add tests with start < 1 be careful to understand the behavior of the indexOf method
-    // and usage of indexOf in the StringLocate class.
-    assertLocate("aa", "aaads", 1, "UTF8_BINARY", 1);
-    assertLocate("aa", "aaads", 2, "UTF8_BINARY", 2);
-    assertLocate("aa", "aaads", 3, "UTF8_BINARY", 0);
-    assertLocate("Aa", "aaads", 1, "UTF8_BINARY", 0);
-    assertLocate("Aa", "aAads", 1, "UTF8_BINARY", 2);
-    assertLocate("界x", "test大千世界X大千世界", 1, "UTF8_BINARY", 0);
-    assertLocate("界X", "test大千世界X大千世界", 1, "UTF8_BINARY", 8);
-    assertLocate("界", "test大千世界X大千世界", 13, "UTF8_BINARY", 13);
-    assertLocate("AA", "aaads", 1, "UTF8_LCASE", 1);
-    assertLocate("aa", "aAads", 2, "UTF8_LCASE", 2);
-    assertLocate("aa", "aaAds", 3, "UTF8_LCASE", 0);
-    assertLocate("abC", "abcabc", 1, "UTF8_LCASE", 1);
-    assertLocate("abC", "abCabc", 2, "UTF8_LCASE", 4);
-    assertLocate("abc", "abcabc", 4, "UTF8_LCASE", 4);
-    assertLocate("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8);
-    assertLocate("界X", "test大千世界Xtest大千世界", 1, "UTF8_LCASE", 8);
-    assertLocate("界", "test大千世界X大千世界", 13, "UTF8_LCASE", 13);
-    assertLocate("大千", "test大千世界大千世界", 1, "UTF8_LCASE", 5);
-    assertLocate("大千", "test大千世界大千世界", 9, "UTF8_LCASE", 9);
-    assertLocate("大千", "大千世界大千世界", 1, "UTF8_LCASE", 1);
-    assertLocate("aa", "Aaads", 1, "UNICODE", 2);
-    assertLocate("AA", "aaads", 1, "UNICODE", 0);
-    assertLocate("aa", "aAads", 2, "UNICODE", 0);
-    assertLocate("aa", "aaAds", 3, "UNICODE", 0);
-    assertLocate("abC", "abcabc", 1, "UNICODE", 0);
-    assertLocate("abC", "abCabc", 2, "UNICODE", 0);
-    assertLocate("abC", "abCabC", 2, "UNICODE", 4);
-    assertLocate("abc", "abcabc", 1, "UNICODE", 1);
-    assertLocate("abc", "abcabc", 3, "UNICODE", 4);
-    assertLocate("界x", "test大千世界X大千世界", 1, "UNICODE", 0);
-    assertLocate("界X", "test大千世界X大千世界", 1, "UNICODE", 8);
-    assertLocate("界", "test大千世界X大千世界", 13, "UNICODE", 13);
-    assertLocate("AA", "aaads", 1, "UNICODE_CI", 1);
-    assertLocate("aa", "aAads", 2, "UNICODE_CI", 2);
-    assertLocate("aa", "aaAds", 3, "UNICODE_CI", 0);
-    assertLocate("abC", "abcabc", 1, "UNICODE_CI", 1);
-    assertLocate("abC", "abCabc", 2, "UNICODE_CI", 4);
-    assertLocate("abc", "abcabc", 4, "UNICODE_CI", 4);
-    assertLocate("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8);
-    assertLocate("界", "test大千世界X大千世界", 13, "UNICODE_CI", 13);
-    assertLocate("大千", "test大千世界大千世界", 1, "UNICODE_CI", 5);
-    assertLocate("大千", "test大千世界大千世界", 9, "UNICODE_CI", 9);
-    assertLocate("大千", "大千世界大千世界", 1, "UNICODE_CI", 1);
-    // Case-variable character length
-    assertLocate("\u0307", "i̇", 1, "UTF8_BINARY", 2);
-    assertLocate("\u0307", "İ", 1, "UTF8_LCASE", 0); // != UTF8_BINARY
-    assertLocate("i", "i̇", 1, "UNICODE_CI", 0);
-    assertLocate("\u0307", "i̇", 1, "UNICODE_CI", 0);
-    assertLocate("i̇", "i", 1, "UNICODE_CI", 0);
-    assertLocate("İ", "i̇", 1, "UNICODE_CI", 1);
-    assertLocate("İ", "i", 1, "UNICODE_CI", 0);
-    assertLocate("i", "i̇", 1, "UTF8_LCASE", 1); // != UNICODE_CI
-    assertLocate("\u0307", "i̇", 1, "UTF8_LCASE", 2); // != UNICODE_CI
-    assertLocate("i̇", "i", 1, "UTF8_LCASE", 0);
-    assertLocate("İ", "i̇", 1, "UTF8_LCASE", 1);
-    assertLocate("İ", "i", 1, "UTF8_LCASE", 0);
-    assertLocate("i̇o", "İo世界大千世界", 1, "UNICODE_CI", 1);
-    assertLocate("i̇o", "大千İo世界大千世界", 1, "UNICODE_CI", 3);
-    assertLocate("i̇o", "世界İo大千世界大千İo", 4, "UNICODE_CI", 11);
-    assertLocate("İo", "i̇o世界大千世界", 1, "UNICODE_CI", 1);
-    assertLocate("İo", "大千i̇o世界大千世界", 1, "UNICODE_CI", 3);
-    assertLocate("İo", "世界i̇o大千世界大千i̇o", 4, "UNICODE_CI", 12);
+  public void testStringLocate() throws SparkException {
+    // Empty strings.
+    assertStringLocate("", "", -1, "UTF8_BINARY", 1);
+    assertStringLocate("", "", -1, "UTF8_LCASE", 1);
+    assertStringLocate("", "", -1, "UNICODE", 1);
+    assertStringLocate("", "", -1, "UNICODE_CI", 1);
+    assertStringLocate("", "", 0, "UTF8_BINARY", 1);
+    assertStringLocate("", "", 0, "UTF8_LCASE", 1);
+    assertStringLocate("", "", 0, "UNICODE", 1);
+    assertStringLocate("", "", 0, "UNICODE_CI", 1);
+    assertStringLocate("", "", 1, "UTF8_BINARY", 1);
+    assertStringLocate("", "", 1, "UTF8_LCASE", 1);
+    assertStringLocate("", "", 1, "UNICODE", 1);
+    assertStringLocate("", "", 1, "UNICODE_CI", 1);
+    assertStringLocate("a", "", -1, "UTF8_BINARY", 0);
+    assertStringLocate("a", "", -1, "UTF8_LCASE", 0);
+    assertStringLocate("a", "", -1, "UNICODE", 0);
+    assertStringLocate("a", "", -1, "UNICODE_CI", 0);
+    assertStringLocate("a", "", 0, "UTF8_BINARY", 0);
+    assertStringLocate("a", "", 0, "UTF8_LCASE", 0);
+    assertStringLocate("a", "", 0, "UNICODE", 0);
+    assertStringLocate("a", "", 0, "UNICODE_CI", 0);
+    assertStringLocate("a", "", 1, "UTF8_BINARY", 0);
+    assertStringLocate("a", "", 1, "UTF8_LCASE", 0);
+    assertStringLocate("a", "", 1, "UNICODE", 0);
+    assertStringLocate("a", "", 1, "UNICODE_CI", 0);
+    assertStringLocate("", "x", -1, "UTF8_BINARY", 1);
+    assertStringLocate("", "x", -1, "UTF8_LCASE", 1);
+    assertStringLocate("", "x", -1, "UNICODE", 1);
+    assertStringLocate("", "x", -1, "UNICODE_CI", 1);
+    assertStringLocate("", "x", 0, "UTF8_BINARY", 1);
+    assertStringLocate("", "x", 0, "UTF8_LCASE", 1);
+    assertStringLocate("", "x", 0, "UNICODE", 1);
+    assertStringLocate("", "x", 0, "UNICODE_CI", 1);
+    assertStringLocate("", "x", 1, "UTF8_BINARY", 1);
+    assertStringLocate("", "x", 1, "UTF8_LCASE", 1);
+    assertStringLocate("", "x", 1, "UNICODE", 1);
+    assertStringLocate("", "x", 1, "UNICODE_CI", 1);
+    // Basic tests.
+    assertStringLocate("aa", "aaads", 1, "UTF8_BINARY", 1);
+    assertStringLocate("aa", "aaads", 1, "UTF8_LCASE", 1);
+    assertStringLocate("aa", "aaads", 1, "UNICODE", 1);
+    assertStringLocate("aa", "aaads", 1, "UNICODE_CI", 1);
+    assertStringLocate("aa", "aaads", 2, "UTF8_BINARY", 2);
+    assertStringLocate("aa", "aaads", 2, "UTF8_LCASE", 2);
+    assertStringLocate("aa", "aaads", 2, "UNICODE", 2);
+    assertStringLocate("aa", "aaads", 2, "UNICODE_CI", 2);
+    assertStringLocate("aa", "aaads", 3, "UTF8_BINARY", 0);
+    assertStringLocate("aa", "aaads", 3, "UTF8_LCASE", 0);
+    assertStringLocate("aa", "aaads", 3, "UNICODE", 0);
+    assertStringLocate("aa", "aaads", 3, "UNICODE_CI", 0);
+    assertStringLocate("Aa", "aaads", 1, "UTF8_BINARY", 0);
+    assertStringLocate("Aa", "aaads", 1, "UTF8_LCASE", 1);
+    assertStringLocate("Aa", "aaads", 1, "UNICODE", 0);
+    assertStringLocate("Aa", "aaads", 1, "UNICODE_CI", 1);
+    assertStringLocate("Aa", "aaads", 2, "UTF8_BINARY", 0);
+    assertStringLocate("Aa", "aaads", 2, "UTF8_LCASE", 2);
+    assertStringLocate("Aa", "aaads", 2, "UNICODE", 0);
+    assertStringLocate("Aa", "aaads", 2, "UNICODE_CI", 2);
+    assertStringLocate("Aa", "aaads", 3, "UTF8_BINARY", 0);
+    assertStringLocate("Aa", "aaads", 3, "UTF8_LCASE", 0);
+    assertStringLocate("Aa", "aaads", 3, "UNICODE", 0);
+    assertStringLocate("Aa", "aaads", 3, "UNICODE_CI", 0);
+    assertStringLocate("Aa", "aAads", 1, "UTF8_BINARY", 2);
+    assertStringLocate("Aa", "aAads", 1, "UTF8_LCASE", 1);
+    assertStringLocate("Aa", "aAads", 1, "UNICODE", 2);
+    assertStringLocate("Aa", "aAads", 1, "UNICODE_CI", 1);
+    assertStringLocate("AA", "aaads", 1, "UTF8_BINARY", 0);
+    assertStringLocate("AA", "aaads", 1, "UTF8_LCASE", 1);
+    assertStringLocate("AA", "aaads", 1, "UNICODE", 0);
+    assertStringLocate("AA", "aaads", 1, "UNICODE_CI", 1);
+    assertStringLocate("aa", "aAads", 2, "UTF8_BINARY", 0);
+    assertStringLocate("aa", "aAads", 2, "UTF8_LCASE", 2);
+    assertStringLocate("aa", "aAads", 2, "UNICODE", 0);
+    assertStringLocate("aa", "aAads", 2, "UNICODE_CI", 2);
+    assertStringLocate("aa", "aaAds", 3, "UTF8_BINARY", 0);
+    assertStringLocate("aa", "aaAds", 3, "UTF8_LCASE", 0);
+    assertStringLocate("aa", "aaAds", 3, "UNICODE", 0);
+    assertStringLocate("aa", "aaAds", 3, "UNICODE_CI", 0);
+    assertStringLocate("abC", "abcabc", 1, "UTF8_BINARY", 0);
+    assertStringLocate("abC", "abcabc", 1, "UTF8_LCASE", 1);
+    assertStringLocate("abC", "abcabc", 1, "UNICODE", 0);
+    assertStringLocate("abC", "abcabc", 1, "UNICODE_CI", 1);
+    assertStringLocate("abC", "abCabc", 2, "UTF8_BINARY", 0);
+    assertStringLocate("abC", "abCabc", 2, "UTF8_LCASE", 4);
+    assertStringLocate("abC", "abCabc", 2, "UNICODE", 0);
+    assertStringLocate("abC", "abCabc", 2, "UNICODE_CI", 4);
+    assertStringLocate("abc", "abcabc", 1, "UTF8_BINARY", 1);
+    assertStringLocate("abc", "abcabc", 1, "UTF8_LCASE", 1);
+    assertStringLocate("abc", "abcabc", 1, "UNICODE", 1);
+    assertStringLocate("abc", "abcabc", 1, "UNICODE_CI", 1);
+    assertStringLocate("abc", "abcabc", 2, "UTF8_BINARY", 4);
+    assertStringLocate("abc", "abcabc", 2, "UTF8_LCASE", 4);
+    assertStringLocate("abc", "abcabc", 2, "UNICODE", 4);
+    assertStringLocate("abc", "abcabc", 2, "UNICODE_CI", 4);
+    assertStringLocate("abc", "abcabc", 3, "UTF8_BINARY", 4);
+    assertStringLocate("abc", "abcabc", 3, "UTF8_LCASE", 4);
+    assertStringLocate("abc", "abcabc", 3, "UNICODE", 4);
+    assertStringLocate("abc", "abcabc", 3, "UNICODE_CI", 4);
+    assertStringLocate("abc", "abcabc", 4, "UTF8_BINARY", 4);
+    assertStringLocate("abc", "abcabc", 4, "UTF8_LCASE", 4);
+    assertStringLocate("abc", "abcabc", 4, "UNICODE", 4);
+    assertStringLocate("abc", "abcabc", 4, "UNICODE_CI", 4);
+    assertStringLocate("aa", "Aaads", 1, "UTF8_BINARY", 2);
+    assertStringLocate("aa", "Aaads", 1, "UTF8_LCASE", 1);
+    assertStringLocate("aa", "Aaads", 1, "UNICODE", 2);
+    assertStringLocate("aa", "Aaads", 1, "UNICODE_CI", 1);
+    assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR", 14);
+    assertStringLocate("ćČ", "CćČČćCČĆČcČcććČč", 3, "SR_CI_AI", 3);
+    // Advanced tests.
+    assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_BINARY", 0);
+    assertStringLocate("界X", "test大千世界X大千世界", 1, "UTF8_BINARY", 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_BINARY", 13);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8);
+    assertStringLocate("界X", "test大千世界Xtest大千世界", 1, "UTF8_LCASE", 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, "UTF8_LCASE", 13);
+    assertStringLocate("大千", "test大千世界大千世界", 1, "UTF8_LCASE", 5);
+    assertStringLocate("大千", "test大千世界大千世界", 9, "UTF8_LCASE", 9);
+    assertStringLocate("大千", "大千世界大千世界", 1, "UTF8_LCASE", 1);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE", 0);
+    assertStringLocate("界X", "test大千世界X大千世界", 1, "UNICODE", 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE", 13);
+    assertStringLocate("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8);
+    assertStringLocate("界", "test大千世界X大千世界", 13, "UNICODE_CI", 13);
+    assertStringLocate("大千", "test大千世界大千世界", 1, "UNICODE_CI", 5);
+    assertStringLocate("大千", "test大千世界大千世界", 9, "UNICODE_CI", 9);
+    assertStringLocate("大千", "大千世界大千世界", 1, "UNICODE_CI", 1);
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringLocate("\u0307", "i\u0307", 1, "UTF8_BINARY", 2);
+    assertStringLocate("\u0307", "İ", 1, "UTF8_LCASE", 0); // != UTF8_BINARY
+    assertStringLocate("i", "i\u0307", 1, "UNICODE_CI", 0);
+    assertStringLocate("\u0307", "i\u0307", 1, "UNICODE_CI", 0);
+    assertStringLocate("i\u0307", "i", 1, "UNICODE_CI", 0);
+    assertStringLocate("İ", "i\u0307", 1, "UNICODE_CI", 1);
+    assertStringLocate("İ", "i", 1, "UNICODE_CI", 0);
+    assertStringLocate("i", "i\u0307", 1, "UTF8_LCASE", 1); // != UNICODE_CI
+    assertStringLocate("\u0307", "i\u0307", 1, "UTF8_LCASE", 2); // != UNICODE_CI
+    assertStringLocate("i\u0307", "i", 1, "UTF8_LCASE", 0);
+    assertStringLocate("İ", "i\u0307", 1, "UTF8_LCASE", 1);
+    assertStringLocate("İ", "i", 1, "UTF8_LCASE", 0);
+    assertStringLocate("i\u0307o", "İo世界大千世界", 1, "UNICODE_CI", 1);
+    assertStringLocate("i\u0307o", "大千İo世界大千世界", 1, "UNICODE_CI", 3);
+    assertStringLocate("i\u0307o", "世界İo大千世界大千İo", 4, "UNICODE_CI", 11);
+    assertStringLocate("İo", "i̇o世界大千世界", 1, "UNICODE_CI", 1);
+    assertStringLocate("İo", "大千i̇o世界大千世界", 1, "UNICODE_CI", 3);
+    assertStringLocate("İo", "世界i̇o大千世界大千i̇o", 4, "UNICODE_CI", 12);
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringLocate("σ", "σ", 1, "UTF8_BINARY", 1);
+    assertStringLocate("σ", "ς", 1, "UTF8_BINARY", 0);
+    assertStringLocate("σ", "Σ", 1, "UTF8_BINARY", 0);
+    assertStringLocate("ς", "σ", 1, "UTF8_BINARY", 0);
+    assertStringLocate("ς", "ς", 1, "UTF8_BINARY", 1);
+    assertStringLocate("ς", "Σ", 1, "UTF8_BINARY", 0);
+    assertStringLocate("Σ", "σ", 1, "UTF8_BINARY", 0);
+    assertStringLocate("Σ", "ς", 1, "UTF8_BINARY", 0);
+    assertStringLocate("Σ", "Σ", 1, "UTF8_BINARY", 1);
+    assertStringLocate("σ", "σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("σ", "ς", 1, "UTF8_LCASE", 1);
+    assertStringLocate("σ", "Σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("ς", "σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("ς", "ς", 1, "UTF8_LCASE", 1);
+    assertStringLocate("ς", "Σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("Σ", "σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("Σ", "ς", 1, "UTF8_LCASE", 1);
+    assertStringLocate("Σ", "Σ", 1, "UTF8_LCASE", 1);
+    assertStringLocate("σ", "σ", 1, "UNICODE", 1);
+    assertStringLocate("σ", "ς", 1, "UNICODE", 0);
+    assertStringLocate("σ", "Σ", 1, "UNICODE", 0);
+    assertStringLocate("ς", "σ", 1, "UNICODE", 0);
+    assertStringLocate("ς", "ς", 1, "UNICODE", 1);
+    assertStringLocate("ς", "Σ", 1, "UNICODE", 0);
+    assertStringLocate("Σ", "σ", 1, "UNICODE", 0);
+    assertStringLocate("Σ", "ς", 1, "UNICODE", 0);
+    assertStringLocate("Σ", "Σ", 1, "UNICODE", 1);
+    assertStringLocate("σ", "σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("σ", "ς", 1, "UNICODE_CI", 1);
+    assertStringLocate("σ", "Σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("ς", "σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("ς", "ς", 1, "UNICODE_CI", 1);
+    assertStringLocate("ς", "Σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("Σ", "σ", 1, "UNICODE_CI", 1);
+    assertStringLocate("Σ", "ς", 1, "UNICODE_CI", 1);
+    assertStringLocate("Σ", "Σ", 1, "UNICODE_CI", 1);
+    // Surrogate pairs.
+    assertStringLocate("a", "a🙃b", 1, "UTF8_BINARY", 1);
+    assertStringLocate("a", "a🙃b", 1, "UTF8_LCASE", 1);
+    assertStringLocate("a", "a🙃b", 1, "UNICODE", 1);
+    assertStringLocate("a", "a🙃b", 1, "UNICODE_CI", 1);
+    assertStringLocate("a", "a🙃b", 2, "UTF8_BINARY", 0);
+    assertStringLocate("a", "a🙃b", 2, "UTF8_LCASE", 0);
+    assertStringLocate("a", "a🙃b", 2, "UNICODE", 0);
+    assertStringLocate("a", "a🙃b", 2, "UNICODE_CI", 0);
+    assertStringLocate("a", "a🙃b", 3, "UTF8_BINARY", 0);
+    assertStringLocate("a", "a🙃b", 3, "UTF8_LCASE", 0);
+    assertStringLocate("a", "a🙃b", 3, "UNICODE", 0);
+    assertStringLocate("a", "a🙃b", 3, "UNICODE_CI", 0);
+    assertStringLocate("🙃", "a🙃b", 1, "UTF8_BINARY", 2);
+    assertStringLocate("🙃", "a🙃b", 1, "UTF8_LCASE", 2);
+    assertStringLocate("🙃", "a🙃b", 1, "UNICODE", 2);
+    assertStringLocate("🙃", "a🙃b", 1, "UNICODE_CI", 2);
+    assertStringLocate("🙃", "a🙃b", 2, "UTF8_BINARY", 2);
+    assertStringLocate("🙃", "a🙃b", 2, "UTF8_LCASE", 2);
+    assertStringLocate("🙃", "a🙃b", 2, "UNICODE", 2);
+    assertStringLocate("🙃", "a🙃b", 2, "UNICODE_CI", 2);
+    assertStringLocate("🙃", "a🙃b", 3, "UTF8_BINARY", 0);
+    assertStringLocate("🙃", "a🙃b", 3, "UTF8_LCASE", 0);
+    assertStringLocate("🙃", "a🙃b", 3, "UNICODE", 0);
+    assertStringLocate("🙃", "a🙃b", 3, "UNICODE_CI", 0);
+    assertStringLocate("b", "a🙃b", 1, "UTF8_BINARY", 3);
+    assertStringLocate("b", "a🙃b", 1, "UTF8_LCASE", 3);
+    assertStringLocate("b", "a🙃b", 1, "UNICODE", 3);
+    assertStringLocate("b", "a🙃b", 1, "UNICODE_CI", 3);
+    assertStringLocate("b", "a🙃b", 2, "UTF8_BINARY", 3);
+    assertStringLocate("b", "a🙃b", 2, "UTF8_LCASE", 3);
+    assertStringLocate("b", "a🙃b", 2, "UNICODE", 3);
+    assertStringLocate("b", "a🙃b", 2, "UNICODE_CI", 3);
+    assertStringLocate("b", "a🙃b", 3, "UTF8_BINARY", 3);
+    assertStringLocate("b", "a🙃b", 3, "UTF8_LCASE", 3);
+    assertStringLocate("b", "a🙃b", 3, "UNICODE", 3);
+    assertStringLocate("b", "a🙃b", 3, "UNICODE_CI", 3);
+    assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_BINARY", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, "UTF8_LCASE", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 1, "UNICODE_CI", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_BINARY", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, "UTF8_LCASE", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 2, "UNICODE_CI", 2);
+    assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_BINARY", 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, "UTF8_LCASE", 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE", 3);
+    assertStringLocate("🙃", "a🙃🙃b", 3, "UNICODE_CI", 3);
+    assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_BINARY", 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, "UTF8_LCASE", 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE", 0);
+    assertStringLocate("🙃", "a🙃🙃b", 4, "UNICODE_CI", 0);
+    assertStringLocate("b", "a🙃🙃b", 1, "UTF8_BINARY", 4);
+    assertStringLocate("b", "a🙃🙃b", 1, "UTF8_LCASE", 4);
+    assertStringLocate("b", "a🙃🙃b", 1, "UNICODE", 4);
+    assertStringLocate("b", "a🙃🙃b", 1, "UNICODE_CI", 4);
+    assertStringLocate("b", "a🙃🙃b", 2, "UTF8_BINARY", 4);
+    assertStringLocate("b", "a🙃🙃b", 2, "UTF8_LCASE", 4);
+    assertStringLocate("b", "a🙃🙃b", 2, "UNICODE", 4);
+    assertStringLocate("b", "a🙃🙃b", 2, "UNICODE_CI", 4);
+    assertStringLocate("b", "a🙃🙃b", 3, "UTF8_BINARY", 4);
+    assertStringLocate("b", "a🙃🙃b", 3, "UTF8_LCASE", 4);
+    assertStringLocate("b", "a🙃🙃b", 3, "UNICODE", 4);
+    assertStringLocate("b", "a🙃🙃b", 3, "UNICODE_CI", 4);
+    assertStringLocate("b", "a🙃🙃b", 4, "UTF8_BINARY", 4);
+    assertStringLocate("b", "a🙃🙃b", 4, "UTF8_LCASE", 4);
+    assertStringLocate("b", "a🙃🙃b", 4, "UNICODE", 4);
+    assertStringLocate("b", "a🙃🙃b", 4, "UNICODE_CI", 4);
+    assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_BINARY", 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, "UTF8_LCASE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 1, "UNICODE_CI", 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_BINARY", 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, "UTF8_LCASE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 2, "UNICODE_CI", 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_BINARY", 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, "UTF8_LCASE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 3, "UNICODE_CI", 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_BINARY", 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, "UTF8_LCASE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE", 5);
+    assertStringLocate("b", "a🙃x🙃b", 4, "UNICODE_CI", 5);
+    // Out of bounds test cases.
+    assertStringLocate("a", "asd", 4, "UTF8_BINARY", 0);
+    assertStringLocate("a", "asd", 4, "UTF8_LCASE", 0);
+    assertStringLocate("a", "asd", 4, "UNICODE", 0);
+    assertStringLocate("a", "asd", 4, "UNICODE_CI", 0);
+    assertStringLocate("a", "asd", 100, "UTF8_BINARY", 0);
+    assertStringLocate("a", "asd", 100, "UTF8_LCASE", 0);
+    assertStringLocate("a", "asd", 100, "UNICODE", 0);
+    assertStringLocate("a", "asd", 100, "UNICODE_CI", 0);
+    assertStringLocate("a", "🙃🙃", 4, "UTF8_BINARY", 0);
+    assertStringLocate("a", "🙃🙃", 4, "UTF8_LCASE", 0);
+    assertStringLocate("a", "🙃🙃", 4, "UNICODE", 0);
+    assertStringLocate("a", "🙃🙃", 4, "UNICODE_CI", 0);
+    assertStringLocate("", "asd", 100, "UTF8_BINARY", 1);
+    assertStringLocate("", "asd", 100, "UTF8_LCASE", 1);
+    assertStringLocate("", "asd", 100, "UNICODE", 1);
+    assertStringLocate("", "asd", 100, "UNICODE_CI", 1);
+    assertStringLocate("asd", "", 100, "UTF8_BINARY", 0);
+    assertStringLocate("asd", "", 100, "UTF8_LCASE", 0);
+    assertStringLocate("asd", "", 100, "UNICODE", 0);
+    assertStringLocate("asd", "", 100, "UNICODE_CI", 0);
   }
 
-  private void assertSubstringIndex(String string, String delimiter, Integer count,
-        String collationName, String expected) throws SparkException {
+  /**
+   * Verify the behaviour of the `SubstringIndex` collation support class.
+   */
+
+  private void assertSubstringIndex(String string, String delimiter, int count,
+      String collationName, String expected) throws SparkException {
     UTF8String str = UTF8String.fromString(string);
     UTF8String delim = UTF8String.fromString(delimiter);
     int collationId = CollationFactory.collationNameToId(collationName);
-    assertEquals(expected,
-      CollationSupport.SubstringIndex.exec(str, delim, count, collationId).toString());
+    UTF8String result = CollationSupport.SubstringIndex.exec(str, delim, count, collationId);
+    assertEquals(UTF8String.fromString(expected), result);
   }
 
   @Test
   public void testSubstringIndex() throws SparkException {
+    // Empty strings.
+    assertSubstringIndex("", "", 0, "UTF8_BINARY", "");
+    assertSubstringIndex("", "", 0, "UTF8_LCASE", "");
+    assertSubstringIndex("", "", 0, "UNICODE", "");
+    assertSubstringIndex("", "", 0, "UNICODE_CI", "");
+    assertSubstringIndex("", "", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("", "", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("", "", 1, "UNICODE", "");
+    assertSubstringIndex("", "", 1, "UNICODE_CI", "");
+    assertSubstringIndex("", "", -1, "UTF8_BINARY", "");
+    assertSubstringIndex("", "", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("", "", -1, "UNICODE", "");
+    assertSubstringIndex("", "", -1, "UNICODE_CI", "");
+    assertSubstringIndex("", "x", 0, "UTF8_BINARY", "");
+    assertSubstringIndex("", "x", 0, "UTF8_LCASE", "");
+    assertSubstringIndex("", "x", 0, "UNICODE", "");
+    assertSubstringIndex("", "x", 0, "UNICODE_CI", "");
+    assertSubstringIndex("", "x", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("", "x", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("", "x", 1, "UNICODE", "");
+    assertSubstringIndex("", "x", 1, "UNICODE_CI", "");
+    assertSubstringIndex("", "x", -1, "UTF8_BINARY", "");
+    assertSubstringIndex("", "x", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("", "x", -1, "UNICODE", "");
+    assertSubstringIndex("", "x", -1, "UNICODE_CI", "");
+    assertSubstringIndex("abc", "", 0, "UTF8_BINARY", "");
+    assertSubstringIndex("abc", "", 0, "UTF8_LCASE", "");
+    assertSubstringIndex("abc", "", 0, "UNICODE", "");
+    assertSubstringIndex("abc", "", 0, "UNICODE_CI", "");
+    assertSubstringIndex("abc", "", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("abc", "", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("abc", "", 1, "UNICODE", "");
+    assertSubstringIndex("abc", "", 1, "UNICODE_CI", "");
+    assertSubstringIndex("abc", "", -1, "UTF8_BINARY", "");
+    assertSubstringIndex("abc", "", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("abc", "", -1, "UNICODE", "");
+    assertSubstringIndex("abc", "", -1, "UNICODE_CI", "");
+    // Basic tests.
+    assertSubstringIndex("axbxc", "a", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("axbxc", "a", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("axbxc", "a", 1, "UNICODE", "");
+    assertSubstringIndex("axbxc", "a", 1, "UNICODE_CI", "");
+    assertSubstringIndex("axbxc", "x", 1, "UTF8_BINARY", "a");
+    assertSubstringIndex("axbxc", "x", 1, "UTF8_LCASE", "a");
+    assertSubstringIndex("axbxc", "x", 1, "UNICODE", "a");
+    assertSubstringIndex("axbxc", "x", 1, "UNICODE_CI", "a");
+    assertSubstringIndex("axbxc", "b", 1, "UTF8_BINARY", "ax");
+    assertSubstringIndex("axbxc", "b", 1, "UTF8_LCASE", "ax");
+    assertSubstringIndex("axbxc", "b", 1, "UNICODE", "ax");
+    assertSubstringIndex("axbxc", "b", 1, "UNICODE_CI", "ax");
+    assertSubstringIndex("axbxc", "x", 2, "UTF8_BINARY", "axb");
+    assertSubstringIndex("axbxc", "x", 2, "UTF8_LCASE", "axb");
+    assertSubstringIndex("axbxc", "x", 2, "UNICODE", "axb");
+    assertSubstringIndex("axbxc", "x", 2, "UNICODE_CI", "axb");
+    assertSubstringIndex("axbxc", "c", 1, "UTF8_BINARY", "axbx");
+    assertSubstringIndex("axbxc", "c", 1, "UTF8_LCASE", "axbx");
+    assertSubstringIndex("axbxc", "c", 1, "UNICODE", "axbx");
+    assertSubstringIndex("axbxc", "c", 1, "UNICODE_CI", "axbx");
+    assertSubstringIndex("axbxc", "x", 3, "UTF8_BINARY", "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, "UTF8_LCASE", "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, "UNICODE", "axbxc");
+    assertSubstringIndex("axbxc", "x", 3, "UNICODE_CI", "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, "UTF8_BINARY", "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, "UTF8_LCASE", "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, "UNICODE", "axbxc");
+    assertSubstringIndex("axbxc", "d", 1, "UNICODE_CI", "axbxc");
+    assertSubstringIndex("axbxc", "c", -1, "UTF8_BINARY", "");
+    assertSubstringIndex("axbxc", "c", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("axbxc", "c", -1, "UNICODE", "");
+    assertSubstringIndex("axbxc", "c", -1, "UNICODE_CI", "");
+    assertSubstringIndex("axbxc", "x", -1, "UTF8_BINARY", "c");
+    assertSubstringIndex("axbxc", "x", -1, "UTF8_LCASE", "c");
+    assertSubstringIndex("axbxc", "x", -1, "UNICODE", "c");
+    assertSubstringIndex("axbxc", "x", -1, "UNICODE_CI", "c");
+    assertSubstringIndex("axbxc", "b", -1, "UTF8_BINARY", "xc");
+    assertSubstringIndex("axbxc", "b", -1, "UTF8_LCASE", "xc");
+    assertSubstringIndex("axbxc", "b", -1, "UNICODE", "xc");
+    assertSubstringIndex("axbxc", "b", -1, "UNICODE_CI", "xc");
+    assertSubstringIndex("axbxc", "x", -2, "UTF8_BINARY", "bxc");
+    assertSubstringIndex("axbxc", "x", -2, "UTF8_LCASE", "bxc");
+    assertSubstringIndex("axbxc", "x", -2, "UNICODE", "bxc");
+    assertSubstringIndex("axbxc", "x", -2, "UNICODE_CI", "bxc");
+    assertSubstringIndex("axbxc", "a", -1, "UTF8_BINARY", "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, "UTF8_LCASE", "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, "UNICODE", "xbxc");
+    assertSubstringIndex("axbxc", "a", -1, "UNICODE_CI", "xbxc");
+    assertSubstringIndex("axbxc", "x", -3, "UTF8_BINARY", "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, "UTF8_LCASE", "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, "UNICODE", "axbxc");
+    assertSubstringIndex("axbxc", "x", -3, "UNICODE_CI", "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, "UTF8_BINARY", "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, "UTF8_LCASE", "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, "UNICODE", "axbxc");
+    assertSubstringIndex("axbxc", "d", -1, "UNICODE_CI", "axbxc");
+    // Advanced tests.
     assertSubstringIndex("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg");
     assertSubstringIndex("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache");
     assertSubstringIndex("aaaaaaaaaa", "aa", 2, "UTF8_BINARY", "a");
@@ -1182,8 +2602,10 @@ public void testSubstringIndex() throws SparkException {
     assertSubstringIndex("test大千世界X大千世界", "X", 1, "UNICODE_CI", "test大千世界");
     assertSubstringIndex("test大千世界大千世界", "千", 2, "UNICODE_CI", "test大千世界大");
     assertSubstringIndex("www||APACHE||org", "||", 2, "UNICODE_CI", "www||APACHE");
-    assertSubstringIndex("abİo12", "i̇o", 1, "UNICODE_CI", "ab");
-    assertSubstringIndex("abİo12", "i̇o", -1, "UNICODE_CI", "12");
+    assertSubstringIndex("wwwèapacheËorg", "Ê", -3, "AF_CI_AI", "apacheËorg");
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertSubstringIndex("abİo12", "i\u0307o", 1, "UNICODE_CI", "ab");
+    assertSubstringIndex("abİo12", "i\u0307o", -1, "UNICODE_CI", "12");
     assertSubstringIndex("abi̇o12", "İo", 1, "UNICODE_CI", "ab");
     assertSubstringIndex("abi̇o12", "İo", -1, "UNICODE_CI", "12");
     assertSubstringIndex("ai̇bi̇o12", "İo", 1, "UNICODE_CI", "ai̇b");
@@ -1191,59 +2613,153 @@ public void testSubstringIndex() throws SparkException {
     assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -1, "UNICODE_CI", "");
     assertSubstringIndex("ai̇bi̇o12i̇o", "İo", -2, "UNICODE_CI", "12i̇o");
     assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o");
     assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo");
     assertSubstringIndex("abi̇12", "i", 1, "UNICODE_CI", "abi̇12");
     assertSubstringIndex("abi̇12", "\u0307", 1, "UNICODE_CI", "abi̇12");
     assertSubstringIndex("abi̇12", "İ", 1, "UNICODE_CI", "ab");
     assertSubstringIndex("abİ12", "i", 1, "UNICODE_CI", "abİ12");
     assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UNICODE_CI", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UNICODE_CI", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UNICODE_CI", "İo12İoi̇o");
     assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UNICODE_CI", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UNICODE_CI", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UNICODE_CI", "i̇o12i̇oİo");
     assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UNICODE_CI", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", 3, "UNICODE_CI", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UNICODE_CI", "ai̇bi̇oİo12");
     assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UNICODE_CI", "ai̇bİoi̇o12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", 3, "UNICODE_CI", "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UNICODE_CI", "ai̇bİoi̇o12");
     assertSubstringIndex("abi̇12", "i", 1, "UTF8_LCASE", "ab"); // != UNICODE_CI
     assertSubstringIndex("abi̇12", "\u0307", 1, "UTF8_LCASE", "abi"); // != UNICODE_CI
     assertSubstringIndex("abi̇12", "İ", 1, "UTF8_LCASE", "ab");
     assertSubstringIndex("abİ12", "i", 1, "UTF8_LCASE", "abİ12");
     assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", -4, "UTF8_LCASE", "İo12İoi̇o");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", -4, "UTF8_LCASE", "İo12İoi̇o");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", -4, "UTF8_LCASE", "İo12İoi̇o");
     assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", -4, "UTF8_LCASE", "i̇o12i̇oİo");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", -4, "UTF8_LCASE", "i̇o12i̇oİo");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", -4, "UTF8_LCASE", "i̇o12i̇oİo");
     assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
     assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "İo", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
-    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i̇o", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
+    assertSubstringIndex("ai̇bi̇oİo12İoi̇o", "i\u0307o", 3, "UTF8_LCASE", "ai̇bi̇oİo12");
     assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "İo", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
-    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i̇o", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
+    assertSubstringIndex("ai̇bİoi̇o12i̇oİo", "i\u0307o", 3, "UTF8_LCASE", "ai̇bİoi̇o12");
     assertSubstringIndex("bİoi̇o12i̇o", "\u0307oi", 1, "UTF8_LCASE", "bİoi̇o12i̇o");
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertSubstringIndex("σ", "σ", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("σ", "ς", 1, "UTF8_BINARY", "σ");
+    assertSubstringIndex("σ", "Σ", 1, "UTF8_BINARY", "σ");
+    assertSubstringIndex("ς", "σ", 1, "UTF8_BINARY", "ς");
+    assertSubstringIndex("ς", "ς", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("ς", "Σ", 1, "UTF8_BINARY", "ς");
+    assertSubstringIndex("Σ", "σ", 1, "UTF8_BINARY", "Σ");
+    assertSubstringIndex("Σ", "ς", 1, "UTF8_BINARY", "Σ");
+    assertSubstringIndex("Σ", "Σ", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("σ", "σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("σ", "ς", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("σ", "Σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("ς", "σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("ς", "ς", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("ς", "Σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("Σ", "σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("Σ", "ς", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("Σ", "Σ", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("σ", "σ", 1, "UNICODE", "");
+    assertSubstringIndex("σ", "ς", 1, "UNICODE", "σ");
+    assertSubstringIndex("σ", "Σ", 1, "UNICODE", "σ");
+    assertSubstringIndex("ς", "σ", 1, "UNICODE", "ς");
+    assertSubstringIndex("ς", "ς", 1, "UNICODE", "");
+    assertSubstringIndex("ς", "Σ", 1, "UNICODE", "ς");
+    assertSubstringIndex("Σ", "σ", 1, "UNICODE", "Σ");
+    assertSubstringIndex("Σ", "ς", 1, "UNICODE", "Σ");
+    assertSubstringIndex("Σ", "Σ", 1, "UNICODE", "");
+    assertSubstringIndex("σ", "σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("σ", "ς", 1, "UNICODE_CI", "");
+    assertSubstringIndex("σ", "Σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("ς", "σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("ς", "ς", 1, "UNICODE_CI", "");
+    assertSubstringIndex("ς", "Σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("Σ", "σ", 1, "UNICODE_CI", "");
+    assertSubstringIndex("Σ", "ς", 1, "UNICODE_CI", "");
+    assertSubstringIndex("Σ", "Σ", 1, "UNICODE_CI", "");
+    // Surrogate pairs.
+    assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_BINARY", "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, "UTF8_LCASE", "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE", "");
+    assertSubstringIndex("a🙃b🙃c", "a", 1, "UNICODE_CI", "");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_BINARY", "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UTF8_LCASE", "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE", "a");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 1, "UNICODE_CI", "a");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_BINARY", "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, "UTF8_LCASE", "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE", "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "b", 1, "UNICODE_CI", "a🙃");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_BINARY", "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UTF8_LCASE", "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE", "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 2, "UNICODE_CI", "a🙃b");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_BINARY", "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, "UTF8_LCASE", "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE", "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "c", 1, "UNICODE_CI", "a🙃b🙃");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_BINARY", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UTF8_LCASE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", 3, "UNICODE_CI", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_BINARY", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, "UTF8_LCASE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", 1, "UNICODE_CI", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_BINARY", "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, "UTF8_LCASE", "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE", "");
+    assertSubstringIndex("a🙃b🙃c", "c", -1, "UNICODE_CI", "");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_BINARY", "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UTF8_LCASE", "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE", "c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -1, "UNICODE_CI", "c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_BINARY", "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, "UTF8_LCASE", "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE", "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "b", -1, "UNICODE_CI", "🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_BINARY", "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UTF8_LCASE", "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE", "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -2, "UNICODE_CI", "b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_BINARY", "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, "UTF8_LCASE", "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE", "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "a", -1, "UNICODE_CI", "🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_BINARY", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UTF8_LCASE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "🙃", -3, "UNICODE_CI", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_BINARY", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, "UTF8_LCASE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE", "a🙃b🙃c");
+    assertSubstringIndex("a🙃b🙃c", "d", -1, "UNICODE_CI", "a🙃b🙃c");
   }
 
-  private void assertStringTrim(
-      String collation,
-      String sourceString,
-      String trimString,
-      String expectedResultString) throws SparkException {
+  /**
+   * Verify the behaviour of the `StringTrim` collation support class.
+   */
+
+  private void assertStringTrim(String collationName, String sourceString, String trimString,
+      String expected) throws SparkException {
     // Prepare the input and expected result.
-    int collationId = CollationFactory.collationNameToId(collation);
+    int collationId = CollationFactory.collationNameToId(collationName);
     UTF8String src = UTF8String.fromString(sourceString);
     UTF8String trim = UTF8String.fromString(trimString);
-    UTF8String resultTrimLeftRight, resultTrimRightLeft;
-    String resultTrim;
+    UTF8String result, resultTrimLeftRight, resultTrimRightLeft;
 
     if (trimString == null) {
       // Trim string is ASCII space.
-      resultTrim = CollationSupport.StringTrim.exec(src).toString();
+      result = CollationSupport.StringTrim.exec(src);
       UTF8String trimLeft = CollationSupport.StringTrimLeft.exec(src);
       resultTrimLeftRight = CollationSupport.StringTrimRight.exec(trimLeft);
       UTF8String trimRight = CollationSupport.StringTrimRight.exec(src);
       resultTrimRightLeft = CollationSupport.StringTrimLeft.exec(trimRight);
     } else {
       // Trim string is specified.
-      resultTrim = CollationSupport.StringTrim.exec(src, trim, collationId).toString();
+      result = CollationSupport.StringTrim.exec(src, trim, collationId);
       UTF8String trimLeft = CollationSupport.StringTrimLeft.exec(src, trim, collationId);
       resultTrimLeftRight = CollationSupport.StringTrimRight.exec(trimLeft, trim, collationId);
       UTF8String trimRight = CollationSupport.StringTrimRight.exec(src, trim, collationId);
@@ -1251,61 +2767,15 @@ private void assertStringTrim(
     }
 
     // Test that StringTrim result is as expected.
-    assertEquals(expectedResultString, resultTrim);
+    assertEquals(UTF8String.fromString(expected), result);
     // Test that the order of the trims is not important.
-    assertEquals(resultTrimLeftRight.toString(), resultTrim);
-    assertEquals(resultTrimRightLeft.toString(), resultTrim);
-  }
-
-  private void assertStringTrimLeft(
-      String collation,
-      String sourceString,
-      String trimString,
-      String expectedResultString) throws SparkException {
-    // Prepare the input and expected result.
-    int collationId = CollationFactory.collationNameToId(collation);
-    UTF8String src = UTF8String.fromString(sourceString);
-    UTF8String trim = UTF8String.fromString(trimString);
-    String result;
-
-    if (trimString == null) {
-      // Trim string is ASCII space.
-      result = CollationSupport.StringTrimLeft.exec(src).toString();
-    } else {
-      // Trim string is specified.
-      result = CollationSupport.StringTrimLeft.exec(src, trim, collationId).toString();
-    }
-
-    // Test that StringTrimLeft result is as expected.
-    assertEquals(expectedResultString, result);
-  }
-
-  private void assertStringTrimRight(
-      String collation,
-      String sourceString,
-      String trimString,
-      String expectedResultString) throws SparkException {
-    // Prepare the input and expected result.
-    int collationId = CollationFactory.collationNameToId(collation);
-    UTF8String src = UTF8String.fromString(sourceString);
-    UTF8String trim = UTF8String.fromString(trimString);
-    String result;
-
-    if (trimString == null) {
-      // Trim string is ASCII space.
-      result = CollationSupport.StringTrimRight.exec(src).toString();
-    } else {
-      // Trim string is specified.
-      result = CollationSupport.StringTrimRight.exec(src, trim, collationId).toString();
-    }
-
-    // Test that StringTrimRight result is as expected.
-    assertEquals(expectedResultString, result);
+    assertEquals(resultTrimLeftRight, result);
+    assertEquals(resultTrimRightLeft, result);
   }
 
   @Test
   public void testStringTrim() throws SparkException {
-    // Basic tests - UTF8_BINARY.
+    // Basic tests.
     assertStringTrim("UTF8_BINARY", "", "", "");
     assertStringTrim("UTF8_BINARY", "", "xyz", "");
     assertStringTrim("UTF8_BINARY", "asd", "", "asd");
@@ -1315,25 +2785,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UTF8_BINARY", "asd", "x", "asd");
     assertStringTrim("UTF8_BINARY", "xxasdxx", "x", "asd");
     assertStringTrim("UTF8_BINARY", "xa世ax", "x", "a世a");
-    assertStringTrimLeft("UTF8_BINARY", "", "", "");
-    assertStringTrimLeft("UTF8_BINARY", "", "xyz", "");
-    assertStringTrimLeft("UTF8_BINARY", "asd", "", "asd");
-    assertStringTrimLeft("UTF8_BINARY", "asd", null, "asd");
-    assertStringTrimLeft("UTF8_BINARY", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UTF8_BINARY", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UTF8_BINARY", "asd", "x", "asd");
-    assertStringTrimLeft("UTF8_BINARY", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UTF8_BINARY", "xa世ax", "x", "a世ax");
-    assertStringTrimRight("UTF8_BINARY", "", "", "");
-    assertStringTrimRight("UTF8_BINARY", "", "xyz", "");
-    assertStringTrimRight("UTF8_BINARY", "asd", "", "asd");
-    assertStringTrimRight("UTF8_BINARY", "asd", null, "asd");
-    assertStringTrimRight("UTF8_BINARY", "  asd  ", null, "  asd");
-    assertStringTrimRight("UTF8_BINARY", " a世a ", null, " a世a");
-    assertStringTrimRight("UTF8_BINARY", "asd", "x", "asd");
-    assertStringTrimRight("UTF8_BINARY", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UTF8_BINARY", "xa世ax", "x", "xa世a");
-    // Basic tests - UTF8_LCASE.
     assertStringTrim("UTF8_LCASE", "", "", "");
     assertStringTrim("UTF8_LCASE", "", "xyz", "");
     assertStringTrim("UTF8_LCASE", "asd", "", "asd");
@@ -1343,25 +2794,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UTF8_LCASE", "asd", "x", "asd");
     assertStringTrim("UTF8_LCASE", "xxasdxx", "x", "asd");
     assertStringTrim("UTF8_LCASE", "xa世ax", "x", "a世a");
-    assertStringTrimLeft("UTF8_LCASE", "", "", "");
-    assertStringTrimLeft("UTF8_LCASE", "", "xyz", "");
-    assertStringTrimLeft("UTF8_LCASE", "asd", "", "asd");
-    assertStringTrimLeft("UTF8_LCASE", "asd", null, "asd");
-    assertStringTrimLeft("UTF8_LCASE", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UTF8_LCASE", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UTF8_LCASE", "asd", "x", "asd");
-    assertStringTrimLeft("UTF8_LCASE", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UTF8_LCASE", "xa世ax", "x", "a世ax");
-    assertStringTrimRight("UTF8_LCASE", "", "", "");
-    assertStringTrimRight("UTF8_LCASE", "", "xyz", "");
-    assertStringTrimRight("UTF8_LCASE", "asd", "", "asd");
-    assertStringTrimRight("UTF8_LCASE", "asd", null, "asd");
-    assertStringTrimRight("UTF8_LCASE", "  asd  ", null, "  asd");
-    assertStringTrimRight("UTF8_LCASE", " a世a ", null, " a世a");
-    assertStringTrimRight("UTF8_LCASE", "asd", "x", "asd");
-    assertStringTrimRight("UTF8_LCASE", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UTF8_LCASE", "xa世ax", "x", "xa世a");
-    // Basic tests - UNICODE.
     assertStringTrim("UNICODE", "", "", "");
     assertStringTrim("UNICODE", "", "xyz", "");
     assertStringTrim("UNICODE", "asd", "", "asd");
@@ -1371,25 +2803,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UNICODE", "asd", "x", "asd");
     assertStringTrim("UNICODE", "xxasdxx", "x", "asd");
     assertStringTrim("UNICODE", "xa世ax", "x", "a世a");
-    assertStringTrimLeft("UNICODE", "", "", "");
-    assertStringTrimLeft("UNICODE", "", "xyz", "");
-    assertStringTrimLeft("UNICODE", "asd", "", "asd");
-    assertStringTrimLeft("UNICODE", "asd", null, "asd");
-    assertStringTrimLeft("UNICODE", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UNICODE", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UNICODE", "asd", "x", "asd");
-    assertStringTrimLeft("UNICODE", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UNICODE", "xa世ax", "x", "a世ax");
-    assertStringTrimRight("UNICODE", "", "", "");
-    assertStringTrimRight("UNICODE", "", "xyz", "");
-    assertStringTrimRight("UNICODE", "asd", "", "asd");
-    assertStringTrimRight("UNICODE", "asd", null, "asd");
-    assertStringTrimRight("UNICODE", "  asd  ", null, "  asd");
-    assertStringTrimRight("UNICODE", " a世a ", null, " a世a");
-    assertStringTrimRight("UNICODE", "asd", "x", "asd");
-    assertStringTrimRight("UNICODE", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UNICODE", "xa世ax", "x", "xa世a");
-    // Basic tests - UNICODE_CI.
     assertStringTrim("UNICODE_CI", "", "", "");
     assertStringTrim("UNICODE_CI", "", "xyz", "");
     assertStringTrim("UNICODE_CI", "asd", "", "asd");
@@ -1399,98 +2812,44 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UNICODE_CI", "asd", "x", "asd");
     assertStringTrim("UNICODE_CI", "xxasdxx", "x", "asd");
     assertStringTrim("UNICODE_CI", "xa世ax", "x", "a世a");
-    assertStringTrimLeft("UNICODE_CI", "", "", "");
-    assertStringTrimLeft("UNICODE_CI", "", "xyz", "");
-    assertStringTrimLeft("UNICODE_CI", "asd", "", "asd");
-    assertStringTrimLeft("UNICODE_CI", "asd", null, "asd");
-    assertStringTrimLeft("UNICODE_CI", "  asd  ", null, "asd  ");
-    assertStringTrimLeft("UNICODE_CI", " a世a ", null, "a世a ");
-    assertStringTrimLeft("UNICODE_CI", "asd", "x", "asd");
-    assertStringTrimLeft("UNICODE_CI", "xxasdxx", "x", "asdxx");
-    assertStringTrimLeft("UNICODE_CI", "xa世ax", "x", "a世ax");
-    assertStringTrimRight("UNICODE_CI", "", "", "");
-    assertStringTrimRight("UNICODE_CI", "", "xyz", "");
-    assertStringTrimRight("UNICODE_CI", "asd", "", "asd");
-    assertStringTrimRight("UNICODE_CI", "asd", null, "asd");
-    assertStringTrimRight("UNICODE_CI", "  asd  ", null, "  asd");
-    assertStringTrimRight("UNICODE_CI", " a世a ", null, " a世a");
-    assertStringTrimRight("UNICODE_CI", "asd", "x", "asd");
-    assertStringTrimRight("UNICODE_CI", "xxasdxx", "x", "xxasd");
-    assertStringTrimRight("UNICODE_CI", "xa世ax", "x", "xa世a");
-
-    // Case variation - UTF8_BINARY.
+    // Case variation.
     assertStringTrim("UTF8_BINARY", "asd", "A", "asd");
     assertStringTrim("UTF8_BINARY", "ddsXXXaa", "asd", "XXX");
     assertStringTrim("UTF8_BINARY", "ASD", "a", "ASD");
-    assertStringTrimLeft("UTF8_BINARY", "ddsXXXaa", "asd", "XXXaa");
-    assertStringTrimRight("UTF8_BINARY", "ddsXXXaa", "asd", "ddsXXX");
-    // Case variation - UTF8_LCASE.
     assertStringTrim("UTF8_LCASE", "asd", "A", "sd");
     assertStringTrim("UTF8_LCASE", "ASD", "a", "SD");
     assertStringTrim("UTF8_LCASE", "ddsXXXaa", "ASD", "XXX");
-    assertStringTrimLeft("UTF8_LCASE", "ddsXXXaa", "aSd", "XXXaa");
-    assertStringTrimRight("UTF8_LCASE", "ddsXXXaa", "AsD", "ddsXXX");
-    // Case variation - UNICODE.
     assertStringTrim("UNICODE", "asd", "A", "asd");
     assertStringTrim("UNICODE", "ASD", "a", "ASD");
     assertStringTrim("UNICODE", "ddsXXXaa", "asd", "XXX");
-    assertStringTrimLeft("UNICODE", "ddsXXXaa", "asd", "XXXaa");
-    assertStringTrimRight("UNICODE", "ddsXXXaa", "asd", "ddsXXX");
-    // Case variation - UNICODE_CI.
     assertStringTrim("UNICODE_CI", "asd", "A", "sd");
     assertStringTrim("UNICODE_CI", "ASD", "a", "SD");
     assertStringTrim("UNICODE_CI", "ddsXXXaa", "ASD", "XXX");
-    assertStringTrimLeft("UNICODE_CI", "ddsXXXaa", "aSd", "XXXaa");
-    assertStringTrimRight("UNICODE_CI", "ddsXXXaa", "AsD", "ddsXXX");
-
-    // Case-variable character length - UTF8_BINARY.
+    assertStringTrim("SR_CI_AI", "cSCšćČXXXsčšČŠsć", "čš", "XXX");
+    // One-to-many case mapping (e.g. Turkish dotted I)..
     assertStringTrim("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimLeft("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimRight("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
     assertStringTrim("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimLeft("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimRight("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
     assertStringTrim("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrimLeft("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimRight("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    // Case-variable character length - UTF8_LCASE.
     assertStringTrim("UTF8_LCASE", "ẞaaaẞ", "ß", "aaa");
-    assertStringTrimLeft("UTF8_LCASE", "ẞaaaẞ", "ß", "aaaẞ");
-    assertStringTrimRight("UTF8_LCASE", "ẞaaaẞ", "ß", "ẞaaa");
     assertStringTrim("UTF8_LCASE", "ßaaaß", "ẞ", "aaa");
-    assertStringTrimLeft("UTF8_LCASE", "ßaaaß", "ẞ", "aaaß");
-    assertStringTrimRight("UTF8_LCASE", "ßaaaß", "ẞ", "ßaaa");
     assertStringTrim("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrimLeft("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimRight("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    // Case-variable character length - UNICODE.
     assertStringTrim("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimLeft("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
-    assertStringTrimRight("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
     assertStringTrim("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimLeft("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
-    assertStringTrimRight("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
     assertStringTrim("UNICODE", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrimLeft("UNICODE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimRight("UNICODE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-    // Case-variable character length - UNICODE_CI.
     assertStringTrim("UNICODE_CI", "ẞaaaẞ", "ß", "aaa");
-    assertStringTrimLeft("UNICODE_CI", "ẞaaaẞ", "ß", "aaaẞ");
-    assertStringTrimRight("UNICODE_CI", "ẞaaaẞ", "ß", "ẞaaa");
     assertStringTrim("UNICODE_CI", "ßaaaß", "ẞ", "aaa");
-    assertStringTrimLeft("UNICODE_CI", "ßaaaß", "ẞ", "aaaß");
-    assertStringTrimRight("UNICODE_CI", "ßaaaß", "ẞ", "ßaaa");
     assertStringTrim("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaa");
-    assertStringTrimLeft("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaaẞ");
-    assertStringTrimRight("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "Ëaaa");
-
-    // One-to-many case mapping - UTF8_BINARY.
+    // One-to-many case mapping (e.g. Turkish dotted I).
     assertStringTrim("UTF8_BINARY", "i", "i", "");
     assertStringTrim("UTF8_BINARY", "iii", "I", "iii");
     assertStringTrim("UTF8_BINARY", "I", "iii", "I");
     assertStringTrim("UTF8_BINARY", "ixi", "i", "x");
     assertStringTrim("UTF8_BINARY", "i", "İ", "i");
     assertStringTrim("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
+    assertStringTrim("UTF8_BINARY", "ii\u0307", "İi", "\u0307");
+    assertStringTrim("UTF8_BINARY", "iii\u0307", "İi", "\u0307");
+    assertStringTrim("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307");
+    assertStringTrim("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
     assertStringTrim("UTF8_BINARY", "i\u0307", "i", "\u0307");
     assertStringTrim("UTF8_BINARY", "i\u0307", "\u0307", "i");
     assertStringTrim("UTF8_BINARY", "i\u0307", "i\u0307", "");
@@ -1510,63 +2869,16 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
     assertStringTrim("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
     assertStringTrim("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi");
-    assertStringTrimLeft("UTF8_BINARY", "i", "i", "");
-    assertStringTrimLeft("UTF8_BINARY", "iii", "I", "iii");
-    assertStringTrimLeft("UTF8_BINARY", "I", "iii", "I");
-    assertStringTrimLeft("UTF8_BINARY", "ixi", "i", "xi");
-    assertStringTrimLeft("UTF8_BINARY", "i", "İ", "i");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i", "\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "i\u0307", "");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307İ");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "İ", "");
-    assertStringTrimLeft("UTF8_BINARY", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "i", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimLeft("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimLeft("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i", "i", "");
-    assertStringTrimRight("UTF8_BINARY", "iii", "I", "iii");
-    assertStringTrimRight("UTF8_BINARY", "I", "iii", "I");
-    assertStringTrimRight("UTF8_BINARY", "ixi", "i", "ix");
-    assertStringTrimRight("UTF8_BINARY", "i", "İ", "i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "\u0307", "i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "i\u0307", "");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrimRight("UTF8_BINARY", "İ", "İ", "");
-    assertStringTrimRight("UTF8_BINARY", "IXi", "İ", "IXi");
-    assertStringTrimRight("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
-    assertStringTrimRight("UTF8_BINARY", "İ", "i", "İ");
-    assertStringTrimRight("UTF8_BINARY", "İ", "\u0307", "İ");
-    assertStringTrimRight("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi");
-    // One-to-many case mapping - UTF8_LCASE.
     assertStringTrim("UTF8_LCASE", "i", "i", "");
     assertStringTrim("UTF8_LCASE", "iii", "I", "");
     assertStringTrim("UTF8_LCASE", "I", "iii", "");
     assertStringTrim("UTF8_LCASE", "ixi", "i", "x");
     assertStringTrim("UTF8_LCASE", "i", "İ", "i");
     assertStringTrim("UTF8_LCASE", "i\u0307", "İ", "");
+    assertStringTrim("UTF8_LCASE", "ii\u0307", "İi", "");
+    assertStringTrim("UTF8_LCASE", "iii\u0307", "İi", "");
+    assertStringTrim("UTF8_LCASE", "iiii\u0307", "iİ", "");
+    assertStringTrim("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
     assertStringTrim("UTF8_LCASE", "i\u0307", "i", "\u0307");
     assertStringTrim("UTF8_LCASE", "i\u0307", "\u0307", "i");
     assertStringTrim("UTF8_LCASE", "i\u0307", "i\u0307", "");
@@ -1586,63 +2898,16 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UTF8_LCASE", "Ixİ", "i\u0307", "xİ");
     assertStringTrim("UTF8_LCASE", "IXİ", "ix\u0307", "İ");
     assertStringTrim("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    assertStringTrimLeft("UTF8_LCASE", "i", "i", "");
-    assertStringTrimLeft("UTF8_LCASE", "iii", "I", "");
-    assertStringTrimLeft("UTF8_LCASE", "I", "iii", "");
-    assertStringTrimLeft("UTF8_LCASE", "ixi", "i", "xi");
-    assertStringTrimLeft("UTF8_LCASE", "i", "İ", "i");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i", "\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "i\u0307", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "İ", "i");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "İ", "");
-    assertStringTrimLeft("UTF8_LCASE", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "IXİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "i", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "Ixİ", "i\u0307", "xİ");
-    assertStringTrimLeft("UTF8_LCASE", "IXİ", "ix\u0307", "İ");
-    assertStringTrimLeft("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    assertStringTrimRight("UTF8_LCASE", "i", "i", "");
-    assertStringTrimRight("UTF8_LCASE", "iii", "I", "");
-    assertStringTrimRight("UTF8_LCASE", "I", "iii", "");
-    assertStringTrimRight("UTF8_LCASE", "ixi", "i", "ix");
-    assertStringTrimRight("UTF8_LCASE", "i", "İ", "i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "\u0307", "i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "i\u0307", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "İ", "İ", "");
-    assertStringTrimRight("UTF8_LCASE", "IXi", "İ", "IXi");
-    assertStringTrimRight("UTF8_LCASE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "IXİ", "");
-    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
-    assertStringTrimRight("UTF8_LCASE", "İ", "i", "İ");
-    assertStringTrimRight("UTF8_LCASE", "İ", "\u0307", "İ");
-    assertStringTrimRight("UTF8_LCASE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UTF8_LCASE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
-    // One-to-many case mapping - UNICODE.
     assertStringTrim("UNICODE", "i", "i", "");
     assertStringTrim("UNICODE", "iii", "I", "iii");
     assertStringTrim("UNICODE", "I", "iii", "I");
     assertStringTrim("UNICODE", "ixi", "i", "x");
     assertStringTrim("UNICODE", "i", "İ", "i");
     assertStringTrim("UNICODE", "i\u0307", "İ", "i\u0307");
+    assertStringTrim("UNICODE", "ii\u0307", "İi", "i\u0307");
+    assertStringTrim("UNICODE", "iii\u0307", "İi", "i\u0307");
+    assertStringTrim("UNICODE", "iiii\u0307", "iİ", "i\u0307");
+    assertStringTrim("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
     assertStringTrim("UNICODE", "i\u0307", "i", "i\u0307");
     assertStringTrim("UNICODE", "i\u0307", "\u0307", "i\u0307");
     assertStringTrim("UNICODE", "i\u0307", "i\u0307", "i\u0307");
@@ -1663,65 +2928,16 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UNICODE", "Ixİ", "i\u0307", "Ixİ");
     assertStringTrim("UNICODE", "IXİ", "ix\u0307", "IXİ");
     assertStringTrim("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimLeft("UNICODE", "i", "i", "");
-    assertStringTrimLeft("UNICODE", "iii", "I", "iii");
-    assertStringTrimLeft("UNICODE", "I", "iii", "I");
-    assertStringTrimLeft("UNICODE", "ixi", "i", "xi");
-    assertStringTrimLeft("UNICODE", "i", "İ", "i");
-    assertStringTrimLeft("UNICODE", "i\u0307", "İ", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307i", "i\u0307", "i\u0307i");
-    assertStringTrimLeft("UNICODE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimLeft("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimLeft("UNICODE", "i\u0307İ", "İ", "i\u0307İ");
-    assertStringTrimLeft("UNICODE", "İ", "İ", "");
-    assertStringTrimLeft("UNICODE", "IXi", "İ", "IXi");
-    assertStringTrimLeft("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimLeft("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimLeft("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307x");
-    assertStringTrimLeft("UNICODE", "İ", "i", "İ");
-    assertStringTrimLeft("UNICODE", "İ", "\u0307", "İ");
-    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimLeft("UNICODE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimLeft("UNICODE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimLeft("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    assertStringTrimRight("UNICODE", "i", "i", "");
-    assertStringTrimRight("UNICODE", "iii", "I", "iii");
-    assertStringTrimRight("UNICODE", "I", "iii", "I");
-    assertStringTrimRight("UNICODE", "ixi", "i", "ix");
-    assertStringTrimRight("UNICODE", "i", "İ", "i");
-    assertStringTrimRight("UNICODE", "i\u0307", "İ", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "i", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307i", "İ", "i\u0307i");
-    assertStringTrimRight("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
-    assertStringTrimRight("UNICODE", "i\u0307İ", "İ", "i\u0307");
-    assertStringTrimRight("UNICODE", "İ", "İ", "");
-    assertStringTrimRight("UNICODE", "IXi", "İ", "IXi");
-    assertStringTrimRight("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
-    assertStringTrimRight("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
-    assertStringTrimRight("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307");
-    assertStringTrimRight("UNICODE", "İ", "i", "İ");
-    assertStringTrimRight("UNICODE", "İ", "\u0307", "İ");
-    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UNICODE", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
-    // One-to-many case mapping - UNICODE_CI.
     assertStringTrim("UNICODE_CI", "i", "i", "");
     assertStringTrim("UNICODE_CI", "iii", "I", "");
     assertStringTrim("UNICODE_CI", "I", "iii", "");
     assertStringTrim("UNICODE_CI", "ixi", "i", "x");
     assertStringTrim("UNICODE_CI", "i", "İ", "i");
     assertStringTrim("UNICODE_CI", "i\u0307", "İ", "");
+    assertStringTrim("UNICODE_CI", "ii\u0307", "İi", "");
+    assertStringTrim("UNICODE_CI", "iii\u0307", "İi", "");
+    assertStringTrim("UNICODE_CI", "iiii\u0307", "iİ", "");
+    assertStringTrim("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
     assertStringTrim("UNICODE_CI", "i\u0307", "i", "i\u0307");
     assertStringTrim("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
     assertStringTrim("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
@@ -1742,12 +2958,282 @@ public void testStringTrim() throws SparkException {
     assertStringTrim("UNICODE_CI", "Ixİ", "i\u0307", "xİ");
     assertStringTrim("UNICODE_CI", "IXİ", "ix\u0307", "İ");
     assertStringTrim("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307");
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringTrim("UTF8_BINARY", "ςxς", "σ", "ςxς");
+    assertStringTrim("UTF8_BINARY", "ςxς", "ς", "x");
+    assertStringTrim("UTF8_BINARY", "ςxς", "Σ", "ςxς");
+    assertStringTrim("UTF8_BINARY", "σxσ", "σ", "x");
+    assertStringTrim("UTF8_BINARY", "σxσ", "ς", "σxσ");
+    assertStringTrim("UTF8_BINARY", "σxσ", "Σ", "σxσ");
+    assertStringTrim("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrim("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrim("UTF8_BINARY", "ΣxΣ", "Σ", "x");
+    assertStringTrim("UTF8_LCASE", "ςxς", "σ", "x");
+    assertStringTrim("UTF8_LCASE", "ςxς", "ς", "x");
+    assertStringTrim("UTF8_LCASE", "ςxς", "Σ", "x");
+    assertStringTrim("UTF8_LCASE", "σxσ", "σ", "x");
+    assertStringTrim("UTF8_LCASE", "σxσ", "ς", "x");
+    assertStringTrim("UTF8_LCASE", "σxσ", "Σ", "x");
+    assertStringTrim("UTF8_LCASE", "ΣxΣ", "σ", "x");
+    assertStringTrim("UTF8_LCASE", "ΣxΣ", "ς", "x");
+    assertStringTrim("UTF8_LCASE", "ΣxΣ", "Σ", "x");
+    assertStringTrim("UNICODE", "ςxς", "σ", "ςxς");
+    assertStringTrim("UNICODE", "ςxς", "ς", "x");
+    assertStringTrim("UNICODE", "ςxς", "Σ", "ςxς");
+    assertStringTrim("UNICODE", "σxσ", "σ", "x");
+    assertStringTrim("UNICODE", "σxσ", "ς", "σxσ");
+    assertStringTrim("UNICODE", "σxσ", "Σ", "σxσ");
+    assertStringTrim("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrim("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrim("UNICODE", "ΣxΣ", "Σ", "x");
+    assertStringTrim("UNICODE_CI", "ςxς", "σ", "x");
+    assertStringTrim("UNICODE_CI", "ςxς", "ς", "x");
+    assertStringTrim("UNICODE_CI", "ςxς", "Σ", "x");
+    assertStringTrim("UNICODE_CI", "σxσ", "σ", "x");
+    assertStringTrim("UNICODE_CI", "σxσ", "ς", "x");
+    assertStringTrim("UNICODE_CI", "σxσ", "Σ", "x");
+    assertStringTrim("UNICODE_CI", "ΣxΣ", "σ", "x");
+    assertStringTrim("UNICODE_CI", "ΣxΣ", "ς", "x");
+    assertStringTrim("UNICODE_CI", "ΣxΣ", "Σ", "x");
+    // Unicode normalization.
+    assertStringTrim("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrim("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
+    assertStringTrim("UNICODE", "åβγδa\u030A", "å", "βγδ");
+    assertStringTrim("UNICODE_CI", "åβγδa\u030A", "Å", "βγδ");
+    // Surrogate pairs.
+    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim("UNICODE", "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "ac", "🙃b🙃");
+    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim("UNICODE", "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "a🙃c", "b");
+    assertStringTrim("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim("UNICODE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim("UNICODE", "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😄", "😆😃");
+    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim("UNICODE", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrim("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrim("UTF8_BINARY", "𐐅", "𐐅", "");
+    assertStringTrim("UTF8_LCASE", "𐐅", "𐐅", "");
+    assertStringTrim("UNICODE", "𐐅", "𐐅", "");
+    assertStringTrim("UNICODE_CI", "𐐅", "𐐅", "");
+    assertStringTrim("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
+    assertStringTrim("UTF8_LCASE", "𐐅", "𐐭", "");
+    assertStringTrim("UNICODE", "𐐅", "𐐭", "𐐅");
+    assertStringTrim("UNICODE_CI", "𐐅", "𐐭", "");
+    assertStringTrim("UTF8_BINARY", "𝔸", "𝔸", "");
+    assertStringTrim("UTF8_LCASE", "𝔸", "𝔸", "");
+    assertStringTrim("UNICODE", "𝔸", "𝔸", "");
+    assertStringTrim("UNICODE_CI", "𝔸", "𝔸", "");
+    assertStringTrim("UTF8_BINARY", "𝔸", "A", "𝔸");
+    assertStringTrim("UTF8_LCASE", "𝔸", "A", "𝔸");
+    assertStringTrim("UNICODE", "𝔸", "A", "𝔸");
+    assertStringTrim("UNICODE_CI", "𝔸", "A", "");
+    assertStringTrim("UTF8_BINARY", "𝔸", "a", "𝔸");
+    assertStringTrim("UTF8_LCASE", "𝔸", "a", "𝔸");
+    assertStringTrim("UNICODE", "𝔸", "a", "𝔸");
+    assertStringTrim("UNICODE_CI", "𝔸", "a", "");
+  }
+
+  /**
+   * Verify the behaviour of the `StringTrimLeft` collation support class.
+   */
+
+  private void assertStringTrimLeft(String collationName, String sourceString, String trimString,
+      String expected) throws SparkException {
+    // Prepare the input and expected result.
+    int collationId = CollationFactory.collationNameToId(collationName);
+    UTF8String src = UTF8String.fromString(sourceString);
+    UTF8String trim = UTF8String.fromString(trimString);
+    UTF8String result;
+
+    if (trimString == null) {
+      // Trim string is ASCII space.
+      result = CollationSupport.StringTrimLeft.exec(src);
+    } else {
+      // Trim string is specified.
+      result = CollationSupport.StringTrimLeft.exec(src, trim, collationId);
+    }
+
+    // Test that StringTrimLeft result is as expected.
+    assertEquals(UTF8String.fromString(expected), result);
+  }
+
+  @Test
+  public void testStringTrimLeft() throws SparkException {
+    // Basic tests - UTF8_BINARY.
+    assertStringTrimLeft("UTF8_BINARY", "", "", "");
+    assertStringTrimLeft("UTF8_BINARY", "", "xyz", "");
+    assertStringTrimLeft("UTF8_BINARY", "asd", "", "asd");
+    assertStringTrimLeft("UTF8_BINARY", "asd", null, "asd");
+    assertStringTrimLeft("UTF8_BINARY", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UTF8_BINARY", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UTF8_BINARY", "asd", "x", "asd");
+    assertStringTrimLeft("UTF8_BINARY", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UTF8_BINARY", "xa世ax", "x", "a世ax");
+    // Basic tests - UTF8_LCASE.
+    assertStringTrimLeft("UTF8_LCASE", "", "", "");
+    assertStringTrimLeft("UTF8_LCASE", "", "xyz", "");
+    assertStringTrimLeft("UTF8_LCASE", "asd", "", "asd");
+    assertStringTrimLeft("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrimLeft("UTF8_LCASE", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UTF8_LCASE", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrimLeft("UTF8_LCASE", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UTF8_LCASE", "xa世ax", "x", "a世ax");
+    // Basic tests - UNICODE.
+    assertStringTrimLeft("UNICODE", "", "", "");
+    assertStringTrimLeft("UNICODE", "", "xyz", "");
+    assertStringTrimLeft("UNICODE", "asd", "", "asd");
+    assertStringTrimLeft("UNICODE", "asd", null, "asd");
+    assertStringTrimLeft("UNICODE", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UNICODE", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UNICODE", "asd", "x", "asd");
+    assertStringTrimLeft("UNICODE", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UNICODE", "xa世ax", "x", "a世ax");
+    // Basic tests - UNICODE_CI.
+    assertStringTrimLeft("UNICODE_CI", "", "", "");
+    assertStringTrimLeft("UNICODE_CI", "", "xyz", "");
+    assertStringTrimLeft("UNICODE_CI", "asd", "", "asd");
+    assertStringTrimLeft("UNICODE_CI", "asd", null, "asd");
+    assertStringTrimLeft("UNICODE_CI", "  asd  ", null, "asd  ");
+    assertStringTrimLeft("UNICODE_CI", " a世a ", null, "a世a ");
+    assertStringTrimLeft("UNICODE_CI", "asd", "x", "asd");
+    assertStringTrimLeft("UNICODE_CI", "xxasdxx", "x", "asdxx");
+    assertStringTrimLeft("UNICODE_CI", "xa世ax", "x", "a世ax");
+    // Case variation.
+    assertStringTrimLeft("UTF8_BINARY", "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimLeft("UTF8_LCASE", "ddsXXXaa", "aSd", "XXXaa");
+    assertStringTrimLeft("UNICODE", "ddsXXXaa", "asd", "XXXaa");
+    assertStringTrimLeft("UNICODE_CI", "ddsXXXaa", "aSd", "XXXaa");
+    // One-to-many case mapping (e.g. Turkish dotted I)..
+    assertStringTrimLeft("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimLeft("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimLeft("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft("UTF8_LCASE", "ẞaaaẞ", "ß", "aaaẞ");
+    assertStringTrimLeft("UTF8_LCASE", "ßaaaß", "ẞ", "aaaß");
+    assertStringTrimLeft("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimLeft("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimLeft("UNICODE", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    assertStringTrimLeft("UNICODE_CI", "ẞaaaẞ", "ß", "aaaẞ");
+    assertStringTrimLeft("UNICODE_CI", "ßaaaß", "ẞ", "aaaß");
+    assertStringTrimLeft("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "aaaẞ");
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringTrimLeft("UTF8_BINARY", "i", "i", "");
+    assertStringTrimLeft("UTF8_BINARY", "iii", "I", "iii");
+    assertStringTrimLeft("UTF8_BINARY", "I", "iii", "I");
+    assertStringTrimLeft("UTF8_BINARY", "ixi", "i", "xi");
+    assertStringTrimLeft("UTF8_BINARY", "i", "İ", "i");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "ii\u0307", "İi", "\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "iii\u0307", "İi", "\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "iiii\u0307", "iİ", "\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "\u0307ii\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i", "\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "i\u0307", "");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "i\u0307", "İ");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307İ");
+    assertStringTrimLeft("UTF8_BINARY", "İ", "İ", "");
+    assertStringTrimLeft("UTF8_BINARY", "IXi", "İ", "IXi");
+    assertStringTrimLeft("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimLeft("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
+    assertStringTrimLeft("UTF8_BINARY", "İ", "i", "İ");
+    assertStringTrimLeft("UTF8_BINARY", "İ", "\u0307", "İ");
+    assertStringTrimLeft("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimLeft("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimLeft("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi\u0307");
+    assertStringTrimLeft("UTF8_LCASE", "i", "i", "");
+    assertStringTrimLeft("UTF8_LCASE", "iii", "I", "");
+    assertStringTrimLeft("UTF8_LCASE", "I", "iii", "");
+    assertStringTrimLeft("UTF8_LCASE", "ixi", "i", "xi");
+    assertStringTrimLeft("UTF8_LCASE", "i", "İ", "i");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "İ", "");
+    assertStringTrimLeft("UTF8_LCASE", "ii\u0307", "İi", "");
+    assertStringTrimLeft("UTF8_LCASE", "iii\u0307", "İi", "");
+    assertStringTrimLeft("UTF8_LCASE", "iiii\u0307", "iİ", "");
+    assertStringTrimLeft("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i", "\u0307");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "i\u0307", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307i", "İ", "i");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "i\u0307", "İ");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307İ", "İ", "");
+    assertStringTrimLeft("UTF8_LCASE", "İ", "İ", "");
+    assertStringTrimLeft("UTF8_LCASE", "IXi", "İ", "IXi");
+    assertStringTrimLeft("UTF8_LCASE", "ix\u0307", "Ixİ", "\u0307");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "IXİ", "");
+    assertStringTrimLeft("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimLeft("UTF8_LCASE", "İ", "i", "İ");
+    assertStringTrimLeft("UTF8_LCASE", "İ", "\u0307", "İ");
+    assertStringTrimLeft("UTF8_LCASE", "Ixİ", "i\u0307", "xİ");
+    assertStringTrimLeft("UTF8_LCASE", "IXİ", "ix\u0307", "İ");
+    assertStringTrimLeft("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
+    assertStringTrimLeft("UNICODE", "i", "i", "");
+    assertStringTrimLeft("UNICODE", "iii", "I", "iii");
+    assertStringTrimLeft("UNICODE", "I", "iii", "I");
+    assertStringTrimLeft("UNICODE", "ixi", "i", "xi");
+    assertStringTrimLeft("UNICODE", "i", "İ", "i");
+    assertStringTrimLeft("UNICODE", "i\u0307", "İ", "i\u0307");
+    assertStringTrimLeft("UNICODE", "ii\u0307", "İi", "i\u0307");
+    assertStringTrimLeft("UNICODE", "iii\u0307", "İi", "i\u0307");
+    assertStringTrimLeft("UNICODE", "iiii\u0307", "iİ", "i\u0307");
+    assertStringTrimLeft("UNICODE", "ii\u0307ii\u0307", "iİ", "i\u0307ii\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307", "i", "i\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307i", "i\u0307", "i\u0307i");
+    assertStringTrimLeft("UNICODE", "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimLeft("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimLeft("UNICODE", "i\u0307İ", "İ", "i\u0307İ");
+    assertStringTrimLeft("UNICODE", "İ", "İ", "");
+    assertStringTrimLeft("UNICODE", "IXi", "İ", "IXi");
+    assertStringTrimLeft("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimLeft("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimLeft("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307x");
+    assertStringTrimLeft("UNICODE", "İ", "i", "İ");
+    assertStringTrimLeft("UNICODE", "İ", "\u0307", "İ");
+    assertStringTrimLeft("UNICODE", "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimLeft("UNICODE", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimLeft("UNICODE", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimLeft("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
     assertStringTrimLeft("UNICODE_CI", "i", "i", "");
     assertStringTrimLeft("UNICODE_CI", "iii", "I", "");
     assertStringTrimLeft("UNICODE_CI", "I", "iii", "");
     assertStringTrimLeft("UNICODE_CI", "ixi", "i", "xi");
     assertStringTrimLeft("UNICODE_CI", "i", "İ", "i");
     assertStringTrimLeft("UNICODE_CI", "i\u0307", "İ", "");
+    assertStringTrimLeft("UNICODE_CI", "ii\u0307", "İi", "");
+    assertStringTrimLeft("UNICODE_CI", "iii\u0307", "İi", "");
+    assertStringTrimLeft("UNICODE_CI", "iiii\u0307", "iİ", "");
+    assertStringTrimLeft("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
     assertStringTrimLeft("UNICODE_CI", "i\u0307", "i", "i\u0307");
     assertStringTrimLeft("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
     assertStringTrimLeft("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
@@ -1768,12 +3254,283 @@ public void testStringTrim() throws SparkException {
     assertStringTrimLeft("UNICODE_CI", "Ixİ", "i\u0307", "xİ");
     assertStringTrimLeft("UNICODE_CI", "IXİ", "ix\u0307", "İ");
     assertStringTrimLeft("UNICODE_CI", "xi\u0307", "\u0307IX", "i\u0307");
+    // Conditional case mapping (e.g. Greek sigmas).
+    assertStringTrimLeft("UTF8_BINARY", "ςxς", "σ", "ςxς");
+    assertStringTrimLeft("UTF8_BINARY", "ςxς", "ς", "xς");
+    assertStringTrimLeft("UTF8_BINARY", "ςxς", "Σ", "ςxς");
+    assertStringTrimLeft("UTF8_BINARY", "σxσ", "σ", "xσ");
+    assertStringTrimLeft("UTF8_BINARY", "σxσ", "ς", "σxσ");
+    assertStringTrimLeft("UTF8_BINARY", "σxσ", "Σ", "σxσ");
+    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft("UTF8_LCASE", "ςxς", "σ", "xς");
+    assertStringTrimLeft("UTF8_LCASE", "ςxς", "ς", "xς");
+    assertStringTrimLeft("UTF8_LCASE", "ςxς", "Σ", "xς");
+    assertStringTrimLeft("UTF8_LCASE", "σxσ", "σ", "xσ");
+    assertStringTrimLeft("UTF8_LCASE", "σxσ", "ς", "xσ");
+    assertStringTrimLeft("UTF8_LCASE", "σxσ", "Σ", "xσ");
+    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "σ", "xΣ");
+    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "ς", "xΣ");
+    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft("UNICODE", "ςxς", "σ", "ςxς");
+    assertStringTrimLeft("UNICODE", "ςxς", "ς", "xς");
+    assertStringTrimLeft("UNICODE", "ςxς", "Σ", "ςxς");
+    assertStringTrimLeft("UNICODE", "σxσ", "σ", "xσ");
+    assertStringTrimLeft("UNICODE", "σxσ", "ς", "σxσ");
+    assertStringTrimLeft("UNICODE", "σxσ", "Σ", "σxσ");
+    assertStringTrimLeft("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
+    assertStringTrimLeft("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
+    assertStringTrimLeft("UNICODE", "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimLeft("UNICODE_CI", "ςxς", "σ", "xς");
+    assertStringTrimLeft("UNICODE_CI", "ςxς", "ς", "xς");
+    assertStringTrimLeft("UNICODE_CI", "ςxς", "Σ", "xς");
+    assertStringTrimLeft("UNICODE_CI", "σxσ", "σ", "xσ");
+    assertStringTrimLeft("UNICODE_CI", "σxσ", "ς", "xσ");
+    assertStringTrimLeft("UNICODE_CI", "σxσ", "Σ", "xσ");
+    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "σ", "xΣ");
+    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "ς", "xΣ");
+    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "Σ", "xΣ");
+    // Unicode normalization.
+    assertStringTrimLeft("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrimLeft("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
+    assertStringTrimLeft("UNICODE", "åβγδa\u030A", "å", "βγδa\u030A");
+    assertStringTrimLeft("UNICODE_CI", "åβγδa\u030A", "Å", "βγδa\u030A");
+    // Surrogate pairs.
+    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a", "🙃b🙃c");
+    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃", "b🙃c");
+    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "a🙃b", "c");
+    assertStringTrimLeft("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft("UNICODE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆", "😃😄");
+    assertStringTrimLeft("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐅", "");
+    assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐅", "");
+    assertStringTrimLeft("UNICODE", "𐐅", "𐐅", "");
+    assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐅", "");
+    assertStringTrimLeft("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
+    assertStringTrimLeft("UTF8_LCASE", "𐐅", "𐐭", "");
+    assertStringTrimLeft("UNICODE", "𐐅", "𐐭", "𐐅");
+    assertStringTrimLeft("UNICODE_CI", "𐐅", "𐐭", "");
+    assertStringTrimLeft("UTF8_BINARY", "𝔸", "𝔸", "");
+    assertStringTrimLeft("UTF8_LCASE", "𝔸", "𝔸", "");
+    assertStringTrimLeft("UNICODE", "𝔸", "𝔸", "");
+    assertStringTrimLeft("UNICODE_CI", "𝔸", "𝔸", "");
+    assertStringTrimLeft("UTF8_BINARY", "𝔸", "A", "𝔸");
+    assertStringTrimLeft("UTF8_LCASE", "𝔸", "A", "𝔸");
+    assertStringTrimLeft("UNICODE", "𝔸", "A", "𝔸");
+    assertStringTrimLeft("UNICODE_CI", "𝔸", "A", "");
+    assertStringTrimLeft("UTF8_BINARY", "𝔸", "a", "𝔸");
+    assertStringTrimLeft("UTF8_LCASE", "𝔸", "a", "𝔸");
+    assertStringTrimLeft("UNICODE", "𝔸", "a", "𝔸");
+    assertStringTrimLeft("UNICODE_CI", "𝔸", "a", "");
+  }
+
+  /**
+   * Verify the behaviour of the `StringTrimRight` collation support class.
+   */
+
+  private void assertStringTrimRight(String collationName, String sourceString, String trimString,
+      String expected) throws SparkException {
+    // Prepare the input and expected result.
+    int collationId = CollationFactory.collationNameToId(collationName);
+    UTF8String src = UTF8String.fromString(sourceString);
+    UTF8String trim = UTF8String.fromString(trimString);
+    UTF8String result;
+
+    if (trimString == null) {
+      // Trim string is ASCII space.
+      result = CollationSupport.StringTrimRight.exec(src);
+    } else {
+      // Trim string is specified.
+      result = CollationSupport.StringTrimRight.exec(src, trim, collationId);
+    }
+
+    // Test that StringTrimRight result is as expected.
+    assertEquals(UTF8String.fromString(expected), result);
+  }
+
+  @Test
+  public void testStringTrimRight() throws SparkException {
+    // Basic tests.
+    assertStringTrimRight("UTF8_BINARY", "", "", "");
+    assertStringTrimRight("UTF8_BINARY", "", "xyz", "");
+    assertStringTrimRight("UTF8_BINARY", "asd", "", "asd");
+    assertStringTrimRight("UTF8_BINARY", "asd", null, "asd");
+    assertStringTrimRight("UTF8_BINARY", "  asd  ", null, "  asd");
+    assertStringTrimRight("UTF8_BINARY", " a世a ", null, " a世a");
+    assertStringTrimRight("UTF8_BINARY", "asd", "x", "asd");
+    assertStringTrimRight("UTF8_BINARY", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UTF8_BINARY", "xa世ax", "x", "xa世a");
+    assertStringTrimRight("UTF8_LCASE", "", "", "");
+    assertStringTrimRight("UTF8_LCASE", "", "xyz", "");
+    assertStringTrimRight("UTF8_LCASE", "asd", "", "asd");
+    assertStringTrimRight("UTF8_LCASE", "asd", null, "asd");
+    assertStringTrimRight("UTF8_LCASE", "  asd  ", null, "  asd");
+    assertStringTrimRight("UTF8_LCASE", " a世a ", null, " a世a");
+    assertStringTrimRight("UTF8_LCASE", "asd", "x", "asd");
+    assertStringTrimRight("UTF8_LCASE", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UTF8_LCASE", "xa世ax", "x", "xa世a");
+    assertStringTrimRight("UNICODE", "", "", "");
+    assertStringTrimRight("UNICODE", "", "xyz", "");
+    assertStringTrimRight("UNICODE", "asd", "", "asd");
+    assertStringTrimRight("UNICODE", "asd", null, "asd");
+    assertStringTrimRight("UNICODE", "  asd  ", null, "  asd");
+    assertStringTrimRight("UNICODE", " a世a ", null, " a世a");
+    assertStringTrimRight("UNICODE", "asd", "x", "asd");
+    assertStringTrimRight("UNICODE", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UNICODE", "xa世ax", "x", "xa世a");
+    assertStringTrimRight("UNICODE_CI", "", "", "");
+    assertStringTrimRight("UNICODE_CI", "", "xyz", "");
+    assertStringTrimRight("UNICODE_CI", "asd", "", "asd");
+    assertStringTrimRight("UNICODE_CI", "asd", null, "asd");
+    assertStringTrimRight("UNICODE_CI", "  asd  ", null, "  asd");
+    assertStringTrimRight("UNICODE_CI", " a世a ", null, " a世a");
+    assertStringTrimRight("UNICODE_CI", "asd", "x", "asd");
+    assertStringTrimRight("UNICODE_CI", "xxasdxx", "x", "xxasd");
+    assertStringTrimRight("UNICODE_CI", "xa世ax", "x", "xa世a");
+    // Case variation.
+    assertStringTrimRight("UTF8_BINARY", "ddsXXXaa", "asd", "ddsXXX");
+    assertStringTrimRight("UTF8_LCASE", "ddsXXXaa", "AsD", "ddsXXX");
+    assertStringTrimRight("UNICODE", "ddsXXXaa", "asd", "ddsXXX");
+    assertStringTrimRight("UNICODE_CI", "ddsXXXaa", "AsD", "ddsXXX");
+    // One-to-many case mapping (e.g. Turkish dotted I)..
+    assertStringTrimRight("UTF8_BINARY", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimRight("UTF8_BINARY", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimRight("UTF8_BINARY", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight("UTF8_LCASE", "ẞaaaẞ", "ß", "ẞaaa");
+    assertStringTrimRight("UTF8_LCASE", "ßaaaß", "ẞ", "ßaaa");
+    assertStringTrimRight("UTF8_LCASE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight("UNICODE", "ẞaaaẞ", "ß", "ẞaaaẞ");
+    assertStringTrimRight("UNICODE", "ßaaaß", "ẞ", "ßaaaß");
+    assertStringTrimRight("UNICODE", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    assertStringTrimRight("UNICODE_CI", "ẞaaaẞ", "ß", "ẞaaa");
+    assertStringTrimRight("UNICODE_CI", "ßaaaß", "ẞ", "ßaaa");
+    assertStringTrimRight("UNICODE_CI", "Ëaaaẞ", "Ëẞ", "Ëaaa");
+    // One-to-many case mapping (e.g. Turkish dotted I).
+    assertStringTrimRight("UTF8_BINARY", "i", "i", "");
+    assertStringTrimRight("UTF8_BINARY", "iii", "I", "iii");
+    assertStringTrimRight("UTF8_BINARY", "I", "iii", "I");
+    assertStringTrimRight("UTF8_BINARY", "ixi", "i", "ix");
+    assertStringTrimRight("UTF8_BINARY", "i", "İ", "i");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307", "İ", "i\u0307");
+    assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i", "i\u0307");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307", "\u0307", "i");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "i\u0307", "");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307İ", "İ", "i\u0307");
+    assertStringTrimRight("UTF8_BINARY", "İ", "İ", "");
+    assertStringTrimRight("UTF8_BINARY", "IXi", "İ", "IXi");
+    assertStringTrimRight("UTF8_BINARY", "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimRight("UTF8_BINARY", "i\u0307x", "ix\u0307İ", "");
+    assertStringTrimRight("UTF8_BINARY", "İ", "i", "İ");
+    assertStringTrimRight("UTF8_BINARY", "İ", "\u0307", "İ");
+    assertStringTrimRight("UTF8_BINARY", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight("UTF8_BINARY", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight("UTF8_BINARY", "xi\u0307", "\u0307IX", "xi");
+    assertStringTrimRight("UTF8_LCASE", "i", "i", "");
+    assertStringTrimRight("UTF8_LCASE", "iii", "I", "");
+    assertStringTrimRight("UTF8_LCASE", "I", "iii", "");
+    assertStringTrimRight("UTF8_LCASE", "ixi", "i", "ix");
+    assertStringTrimRight("UTF8_LCASE", "i", "İ", "i");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307", "İ", "");
+    assertStringTrimRight("UTF8_LCASE", "ii\u0307", "İi", "");
+    assertStringTrimRight("UTF8_LCASE", "iii\u0307", "İi", "");
+    assertStringTrimRight("UTF8_LCASE", "iiii\u0307", "iİ", "");
+    assertStringTrimRight("UTF8_LCASE", "ii\u0307ii\u0307", "iİ", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i", "i\u0307");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307", "\u0307", "i");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307i\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307\u0307", "i\u0307", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "i\u0307", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307İ", "İ", "");
+    assertStringTrimRight("UTF8_LCASE", "İ", "İ", "");
+    assertStringTrimRight("UTF8_LCASE", "IXi", "İ", "IXi");
+    assertStringTrimRight("UTF8_LCASE", "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "IXİ", "");
+    assertStringTrimRight("UTF8_LCASE", "i\u0307x", "I\u0307xİ", "");
+    assertStringTrimRight("UTF8_LCASE", "İ", "i", "İ");
+    assertStringTrimRight("UTF8_LCASE", "İ", "\u0307", "İ");
+    assertStringTrimRight("UTF8_LCASE", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight("UTF8_LCASE", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight("UTF8_LCASE", "xi\u0307", "\u0307IX", "");
+    assertStringTrimRight("UNICODE", "i", "i", "");
+    assertStringTrimRight("UNICODE", "iii", "I", "iii");
+    assertStringTrimRight("UNICODE", "I", "iii", "I");
+    assertStringTrimRight("UNICODE", "ixi", "i", "ix");
+    assertStringTrimRight("UNICODE", "i", "İ", "i");
+    assertStringTrimRight("UNICODE", "i\u0307", "İ", "i\u0307");
+    assertStringTrimRight("UTF8_BINARY", "ii\u0307", "İi", "ii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "iii\u0307", "İi", "iii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "iiii\u0307", "iİ", "iiii\u0307");
+    assertStringTrimRight("UTF8_BINARY", "ii\u0307ii\u0307", "iİ", "ii\u0307ii\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307", "i", "i\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307", "\u0307", "i\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307i\u0307", "i\u0307", "i\u0307i\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307\u0307", "i\u0307", "i\u0307\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307i", "i\u0307", "i\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307i", "İ", "i\u0307i");
+    assertStringTrimRight("UNICODE", "i\u0307İ", "i\u0307", "i\u0307İ");
+    assertStringTrimRight("UNICODE", "i\u0307İ", "İ", "i\u0307");
+    assertStringTrimRight("UNICODE", "İ", "İ", "");
+    assertStringTrimRight("UNICODE", "IXi", "İ", "IXi");
+    assertStringTrimRight("UNICODE", "ix\u0307", "Ixİ", "ix\u0307");
+    assertStringTrimRight("UNICODE", "i\u0307x", "IXİ", "i\u0307x");
+    assertStringTrimRight("UNICODE", "i\u0307x", "ix\u0307İ", "i\u0307");
+    assertStringTrimRight("UNICODE", "İ", "i", "İ");
+    assertStringTrimRight("UNICODE", "İ", "\u0307", "İ");
+    assertStringTrimRight("UNICODE", "i\u0307", "i\u0307", "i\u0307");
+    assertStringTrimRight("UNICODE", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight("UNICODE", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight("UNICODE", "xi\u0307", "\u0307IX", "xi\u0307");
     assertStringTrimRight("UNICODE_CI", "i", "i", "");
     assertStringTrimRight("UNICODE_CI", "iii", "I", "");
     assertStringTrimRight("UNICODE_CI", "I", "iii", "");
     assertStringTrimRight("UNICODE_CI", "ixi", "i", "ix");
     assertStringTrimRight("UNICODE_CI", "i", "İ", "i");
     assertStringTrimRight("UNICODE_CI", "i\u0307", "İ", "");
+    assertStringTrimRight("UNICODE_CI", "ii\u0307", "İi", "");
+    assertStringTrimRight("UNICODE_CI", "iii\u0307", "İi", "");
+    assertStringTrimRight("UNICODE_CI", "iiii\u0307", "iİ", "");
+    assertStringTrimRight("UNICODE_CI", "ii\u0307ii\u0307", "iİ", "");
     assertStringTrimRight("UNICODE_CI", "i\u0307", "i", "i\u0307");
     assertStringTrimRight("UNICODE_CI", "i\u0307", "\u0307", "i\u0307");
     assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
@@ -1791,29 +3548,10 @@ public void testStringTrim() throws SparkException {
     assertStringTrimRight("UNICODE_CI", "İ", "i", "İ");
     assertStringTrimRight("UNICODE_CI", "İ", "\u0307", "İ");
     assertStringTrimRight("UNICODE_CI", "i\u0307", "i\u0307", "i\u0307");
-    assertStringTrimRight("UNICODE_CI", "Ixİ", "i\u0307", "Ixİ");
-    assertStringTrimRight("UNICODE_CI", "IXİ", "ix\u0307", "IXİ");
-    assertStringTrimRight("UNICODE_CI", "xi\u0307", "\u0307IX", "xi\u0307");
-
-    // Greek sigmas - UTF8_BINARY.
-    assertStringTrim("UTF8_BINARY", "ςxς", "σ", "ςxς");
-    assertStringTrim("UTF8_BINARY", "ςxς", "ς", "x");
-    assertStringTrim("UTF8_BINARY", "ςxς", "Σ", "ςxς");
-    assertStringTrim("UTF8_BINARY", "σxσ", "σ", "x");
-    assertStringTrim("UTF8_BINARY", "σxσ", "ς", "σxσ");
-    assertStringTrim("UTF8_BINARY", "σxσ", "Σ", "σxσ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrim("UTF8_BINARY", "ΣxΣ", "Σ", "x");
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "σ", "ςxς");
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UTF8_BINARY", "ςxς", "Σ", "ςxς");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "ς", "σxσ");
-    assertStringTrimLeft("UTF8_BINARY", "σxσ", "Σ", "σxσ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimLeft("UTF8_BINARY", "ΣxΣ", "Σ", "xΣ");
+    assertStringTrimRight("UNICODE_CI", "Ixİ", "i\u0307", "Ixİ");
+    assertStringTrimRight("UNICODE_CI", "IXİ", "ix\u0307", "IXİ");
+    assertStringTrimRight("UNICODE_CI", "xi\u0307", "\u0307IX", "xi\u0307");
+    // Conditional case mapping (e.g. Greek sigmas).
     assertStringTrimRight("UTF8_BINARY", "ςxς", "σ", "ςxς");
     assertStringTrimRight("UTF8_BINARY", "ςxς", "ς", "ςx");
     assertStringTrimRight("UTF8_BINARY", "ςxς", "Σ", "ςxς");
@@ -1823,25 +3561,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "σ", "ΣxΣ");
     assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "ς", "ΣxΣ");
     assertStringTrimRight("UTF8_BINARY", "ΣxΣ", "Σ", "Σx");
-    // Greek sigmas - UTF8_LCASE.
-    assertStringTrim("UTF8_LCASE", "ςxς", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "ςxς", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "ςxς", "Σ", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "σxσ", "Σ", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "σ", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "ς", "x");
-    assertStringTrim("UTF8_LCASE", "ΣxΣ", "Σ", "x");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "σ", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "ςxς", "Σ", "xς");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "ς", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "σxσ", "Σ", "xσ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "σ", "xΣ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "ς", "xΣ");
-    assertStringTrimLeft("UTF8_LCASE", "ΣxΣ", "Σ", "xΣ");
     assertStringTrimRight("UTF8_LCASE", "ςxς", "σ", "ςx");
     assertStringTrimRight("UTF8_LCASE", "ςxς", "ς", "ςx");
     assertStringTrimRight("UTF8_LCASE", "ςxς", "Σ", "ςx");
@@ -1851,25 +3570,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "σ", "Σx");
     assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "ς", "Σx");
     assertStringTrimRight("UTF8_LCASE", "ΣxΣ", "Σ", "Σx");
-    // Greek sigmas - UNICODE.
-    assertStringTrim("UNICODE", "ςxς", "σ", "ςxς");
-    assertStringTrim("UNICODE", "ςxς", "ς", "x");
-    assertStringTrim("UNICODE", "ςxς", "Σ", "ςxς");
-    assertStringTrim("UNICODE", "σxσ", "σ", "x");
-    assertStringTrim("UNICODE", "σxσ", "ς", "σxσ");
-    assertStringTrim("UNICODE", "σxσ", "Σ", "σxσ");
-    assertStringTrim("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrim("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrim("UNICODE", "ΣxΣ", "Σ", "x");
-    assertStringTrimLeft("UNICODE", "ςxς", "σ", "ςxς");
-    assertStringTrimLeft("UNICODE", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UNICODE", "ςxς", "Σ", "ςxς");
-    assertStringTrimLeft("UNICODE", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UNICODE", "σxσ", "ς", "σxσ");
-    assertStringTrimLeft("UNICODE", "σxσ", "Σ", "σxσ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
-    assertStringTrimLeft("UNICODE", "ΣxΣ", "Σ", "xΣ");
     assertStringTrimRight("UNICODE", "ςxς", "σ", "ςxς");
     assertStringTrimRight("UNICODE", "ςxς", "ς", "ςx");
     assertStringTrimRight("UNICODE", "ςxς", "Σ", "ςxς");
@@ -1879,25 +3579,6 @@ public void testStringTrim() throws SparkException {
     assertStringTrimRight("UNICODE", "ΣxΣ", "σ", "ΣxΣ");
     assertStringTrimRight("UNICODE", "ΣxΣ", "ς", "ΣxΣ");
     assertStringTrimRight("UNICODE", "ΣxΣ", "Σ", "Σx");
-    // Greek sigmas - UNICODE_CI.
-    assertStringTrim("UNICODE_CI", "ςxς", "σ", "x");
-    assertStringTrim("UNICODE_CI", "ςxς", "ς", "x");
-    assertStringTrim("UNICODE_CI", "ςxς", "Σ", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "σ", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "ς", "x");
-    assertStringTrim("UNICODE_CI", "σxσ", "Σ", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "σ", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "ς", "x");
-    assertStringTrim("UNICODE_CI", "ΣxΣ", "Σ", "x");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "σ", "xς");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "ς", "xς");
-    assertStringTrimLeft("UNICODE_CI", "ςxς", "Σ", "xς");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "σ", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "ς", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "σxσ", "Σ", "xσ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "σ", "xΣ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "ς", "xΣ");
-    assertStringTrimLeft("UNICODE_CI", "ΣxΣ", "Σ", "xΣ");
     assertStringTrimRight("UNICODE_CI", "ςxς", "σ", "ςx");
     assertStringTrimRight("UNICODE_CI", "ςxς", "ς", "ςx");
     assertStringTrimRight("UNICODE_CI", "ςxς", "Σ", "ςx");
@@ -1907,186 +3588,287 @@ public void testStringTrim() throws SparkException {
     assertStringTrimRight("UNICODE_CI", "ΣxΣ", "σ", "Σx");
     assertStringTrimRight("UNICODE_CI", "ΣxΣ", "ς", "Σx");
     assertStringTrimRight("UNICODE_CI", "ΣxΣ", "Σ", "Σx");
-
-    // Unicode normalization - UTF8_BINARY.
-    assertStringTrim("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
-    assertStringTrimLeft("UTF8_BINARY", "åβγδa\u030A", "å", "βγδa\u030A");
+    // Unicode normalization.
     assertStringTrimRight("UTF8_BINARY", "åβγδa\u030A", "å", "åβγδa\u030A");
-    // Unicode normalization - UTF8_LCASE.
-    assertStringTrim("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
-    assertStringTrimLeft("UTF8_LCASE", "åβγδa\u030A", "Å", "βγδa\u030A");
     assertStringTrimRight("UTF8_LCASE", "åβγδa\u030A", "Å", "åβγδa\u030A");
-    // Unicode normalization - UNICODE.
-    assertStringTrim("UNICODE", "åβγδa\u030A", "å", "βγδ");
-    assertStringTrimLeft("UNICODE", "åβγδa\u030A", "å", "βγδa\u030A");
     assertStringTrimRight("UNICODE", "åβγδa\u030A", "å", "åβγδ");
-    // Unicode normalization - UNICODE_CI.
-    assertStringTrim("UNICODE_CI", "åβγδa\u030A", "Å", "βγδ");
-    assertStringTrimLeft("UNICODE_CI", "åβγδa\u030A", "Å", "βγδa\u030A");
     assertStringTrimRight("UNICODE_CI", "åβγδa\u030A", "Å", "åβγδ");
+    // Surrogate pairs.
+    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight("UNICODE", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "🙃", "a🙃b🙃c");
+    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c", "a🙃b🙃");
+    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃", "a🙃b");
+    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight("UNICODE", "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "c🙃b", "a");
+    assertStringTrimRight("UTF8_BINARY", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight("UTF8_LCASE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight("UNICODE", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight("UNICODE_CI", "a🙃b🙃c", "abc🙃", "");
+    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight("UNICODE", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😆😃", "😀😆😃😄");
+    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight("UNICODE", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😃😄", "😀😆");
+    assertStringTrimRight("UTF8_BINARY", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight("UTF8_LCASE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight("UNICODE", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight("UNICODE_CI", "😀😆😃😄", "😀😆😃😄", "");
+    assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐅", "");
+    assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐅", "");
+    assertStringTrimRight("UNICODE", "𐐅", "𐐅", "");
+    assertStringTrimRight("UNICODE_CI", "𐐅", "𐐅", "");
+    assertStringTrimRight("UTF8_BINARY", "𐐅", "𐐭", "𐐅");
+    assertStringTrimRight("UTF8_LCASE", "𐐅", "𐐭", "");
+    assertStringTrimRight("UNICODE", "𐐅", "𐐭", "𐐅");
+    assertStringTrimRight("UNICODE_CI", "𐐅", "𐐭", "");
+    assertStringTrimRight("UTF8_BINARY", "𝔸", "𝔸", "");
+    assertStringTrimRight("UTF8_LCASE", "𝔸", "𝔸", "");
+    assertStringTrimRight("UNICODE", "𝔸", "𝔸", "");
+    assertStringTrimRight("UNICODE_CI", "𝔸", "𝔸", "");
+    assertStringTrimRight("UTF8_BINARY", "𝔸", "A", "𝔸");
+    assertStringTrimRight("UTF8_LCASE", "𝔸", "A", "𝔸");
+    assertStringTrimRight("UNICODE", "𝔸", "A", "𝔸");
+    assertStringTrimRight("UNICODE_CI", "𝔸", "A", "");
+    assertStringTrimRight("UTF8_BINARY", "𝔸", "a", "𝔸");
+    assertStringTrimRight("UTF8_LCASE", "𝔸", "a", "𝔸");
+    assertStringTrimRight("UNICODE", "𝔸", "a", "𝔸");
+    assertStringTrimRight("UNICODE_CI", "𝔸", "a", "");
   }
 
-  private void assertStringTranslate(
-      String inputString,
-      String matchingString,
-      String replaceString,
-      String collationName,
-      String expectedResultString) throws SparkException {
+  /**
+   * Verify the behaviour of the `StringTranslate` collation support class.
+   */
+
+  private void assertStringTranslate(String inputString, String matchingString,
+    String replaceString, String collationName, String expected) throws SparkException {
     int collationId = CollationFactory.collationNameToId(collationName);
     Map<String, String> dict = buildDict(matchingString, replaceString);
     UTF8String source = UTF8String.fromString(inputString);
     UTF8String result = CollationSupport.StringTranslate.exec(source, dict, collationId);
-    assertEquals(expectedResultString, result.toString());
+    assertEquals(UTF8String.fromString(expected), result);
   }
 
   @Test
   public void testStringTranslate() throws SparkException {
-    // Basic tests - UTF8_BINARY.
+    // Empty strings.
+    assertStringTranslate("", "", "", "UTF8_BINARY", "");
+    assertStringTranslate("", "", "", "UTF8_LCASE", "");
+    assertStringTranslate("", "", "", "UNICODE", "");
+    assertStringTranslate("", "", "", "UNICODE_CI", "");
+    assertStringTranslate("abc", "", "", "UTF8_BINARY", "abc");
+    assertStringTranslate("abc", "", "", "UTF8_LCASE", "abc");
+    assertStringTranslate("abc", "", "", "UNICODE", "abc");
+    assertStringTranslate("abc", "", "", "UNICODE_CI", "abc");
+    assertStringTranslate("", "b", "", "UTF8_BINARY", "");
+    assertStringTranslate("", "b", "", "UTF8_LCASE", "");
+    assertStringTranslate("", "b", "", "UNICODE", "");
+    assertStringTranslate("", "b", "", "UNICODE_CI", "");
+    assertStringTranslate("", "", "x", "UTF8_BINARY", "");
+    assertStringTranslate("", "", "x", "UTF8_LCASE", "");
+    assertStringTranslate("", "", "x", "UNICODE", "");
+    assertStringTranslate("", "", "x", "UNICODE_CI", "");
+    assertStringTranslate("abc", "b", "", "UTF8_BINARY", "ac");
+    assertStringTranslate("abc", "b", "", "UTF8_LCASE", "ac");
+    assertStringTranslate("abc", "b", "", "UNICODE", "ac");
+    assertStringTranslate("abc", "b", "", "UNICODE_CI", "ac");
+    assertStringTranslate("abc", "", "x", "UTF8_BINARY", "abc");
+    assertStringTranslate("abc", "", "x", "UTF8_LCASE", "abc");
+    assertStringTranslate("abc", "", "x", "UNICODE", "abc");
+    assertStringTranslate("abc", "", "x", "UNICODE_CI", "abc");
+    assertStringTranslate("", "b", "x", "UTF8_BINARY", "");
+    assertStringTranslate("", "b", "x", "UTF8_LCASE", "");
+    assertStringTranslate("", "b", "x", "UNICODE", "");
+    assertStringTranslate("", "b", "x", "UNICODE_CI", "");
+    // Basic tests.
+    assertStringTranslate("abc", "b", "x", "UTF8_BINARY", "axc");
+    assertStringTranslate("abc", "b", "x", "UTF8_LCASE", "axc");
+    assertStringTranslate("abc", "b", "x", "UNICODE", "axc");
+    assertStringTranslate("abc", "b", "x", "UNICODE_CI", "axc");
     assertStringTranslate("Translate", "Rnlt", "12", "UTF8_BINARY", "Tra2sae");
-    assertStringTranslate("Translate", "Rn", "1234", "UTF8_BINARY", "Tra2slate");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_BINARY", "Tra2s3a4e");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_BINARY", "TRaxsXaxe");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_BINARY", "TxaxsXaxeX");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_BINARY", "TXaxsXaxex");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_BINARY", "test大千世AX大千世A");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_BINARY", "大千世界test大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_BINARY", "Oeso大千世界大千世界");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_BINARY", "大千世界大千世界oesO");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_BINARY", "世世世界世世世界tesT");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_BINARY", "Tr4234e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_BINARY", "Tra2s3a4e");
-    assertStringTranslate("abcdef", "abcde", "123", "UTF8_BINARY", "123f");
-    // Basic tests - UTF8_LCASE.
     assertStringTranslate("Translate", "Rnlt", "12", "UTF8_LCASE", "1a2sae");
-    assertStringTranslate("Translate", "Rn", "1234", "UTF8_LCASE", "T1a2slate");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_LCASE", "xXaxsXaxe");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_LCASE", "xxaxsXaxex");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_LCASE", "xXaxsXaxeX");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_LCASE", "test大千世AB大千世A");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_LCASE", "大千世界abca大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_LCASE", "oeso大千世界大千世界");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_LCASE", "大千世界大千世界OesO");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_LCASE", "世世世界世世世界tesT");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_LCASE", "14234e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_LCASE", "41a2s3a4e");
-    assertStringTranslate("abcdef", "abcde", "123", "UTF8_LCASE", "123f");
-    // Basic tests - UNICODE.
     assertStringTranslate("Translate", "Rnlt", "12", "UNICODE", "Tra2sae");
-    assertStringTranslate("Translate", "Rn", "1234", "UNICODE", "Tra2slate");
-    assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE", "Tra2s3a4e");
-    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE", "TRaxsXaxe");
-    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE", "TxaxsXaxeX");
-    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE", "TXaxsXaxex");
-    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE", "test大千世AX大千世A");
-    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE", "大千世界test大千世界");
-    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE", "Oeso大千世界大千世界");
-    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE", "大千世界大千世界oesO");
-    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE", "世世世界世世世界tesT");
-    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE", "Tr4234e");
-    assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE", "Tra2s3a4e");
-    assertStringTranslate("abcdef", "abcde", "123", "UNICODE", "123f");
-    // Basic tests - UNICODE_CI.
     assertStringTranslate("Translate", "Rnlt", "12", "UNICODE_CI", "1a2sae");
+    assertStringTranslate("Translate", "Rn", "1234", "UTF8_BINARY", "Tra2slate");
+    assertStringTranslate("Translate", "Rn", "1234", "UTF8_LCASE", "T1a2slate");
+    assertStringTranslate("Translate", "Rn", "1234", "UNICODE", "Tra2slate");
     assertStringTranslate("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate");
+    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_BINARY", "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE", "Tra2s3a4e");
     assertStringTranslate("Translate", "Rnlt", "1234", "UNICODE_CI", "41a2s3a4e");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_BINARY", "TRaxsXaxe");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UTF8_LCASE", "xXaxsXaxe");
+    assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE", "TRaxsXaxe");
     assertStringTranslate("TRanslate", "rnlt", "XxXx", "UNICODE_CI", "xXaxsXaxe");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_BINARY", "TxaxsXaxeX");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UTF8_LCASE", "xxaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE", "TxaxsXaxeX");
     assertStringTranslate("TRanslater", "Rrnlt", "xXxXx", "UNICODE_CI", "xxaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_BINARY", "TXaxsXaxex");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UTF8_LCASE", "xXaxsXaxeX");
+    assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE", "TXaxsXaxex");
     assertStringTranslate("TRanslater", "Rrnlt", "XxxXx", "UNICODE_CI", "xXaxsXaxeX");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_BINARY", "test大千世AX大千世A");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UTF8_LCASE", "test大千世AB大千世A");
+    assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE", "test大千世AX大千世A");
     assertStringTranslate("test大千世界X大千世界", "界x", "AB", "UNICODE_CI", "test大千世AB大千世A");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_BINARY", "大千世界test大千世界");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UTF8_LCASE", "大千世界abca大千世界");
+    assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE", "大千世界test大千世界");
     assertStringTranslate("大千世界test大千世界", "TEST", "abcd", "UNICODE_CI", "大千世界abca大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_BINARY", "Oeso大千世界大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UTF8_LCASE", "oeso大千世界大千世界");
+    assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE", "Oeso大千世界大千世界");
     assertStringTranslate("Test大千世界大千世界", "tT", "oO", "UNICODE_CI", "oeso大千世界大千世界");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_BINARY", "大千世界大千世界oesO");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UTF8_LCASE", "大千世界大千世界OesO");
+    assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE", "大千世界大千世界oesO");
     assertStringTranslate("大千世界大千世界tesT", "Tt", "Oo", "UNICODE_CI", "大千世界大千世界OesO");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_BINARY", "世世世界世世世界tesT");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UTF8_LCASE", "世世世界世世世界tesT");
+    assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE", "世世世界世世世界tesT");
     assertStringTranslate("大千世界大千世界tesT", "大千", "世世", "UNICODE_CI", "世世世界世世世界tesT");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_BINARY", "Tr4234e");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UTF8_LCASE", "14234e");
+    assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE", "Tr4234e");
     assertStringTranslate("Translate", "Rnlasdfjhgadt", "1234", "UNICODE_CI", "14234e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_BINARY", "Tra2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", "UTF8_LCASE", "41a2s3a4e");
+    assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE", "Tra2s3a4e");
     assertStringTranslate("Translate", "Rnlt", "123495834634", "UNICODE_CI", "41a2s3a4e");
+    assertStringTranslate("abcdef", "abcde", "123", "UTF8_BINARY", "123f");
+    assertStringTranslate("abcdef", "abcde", "123", "UTF8_LCASE", "123f");
+    assertStringTranslate("abcdef", "abcde", "123", "UNICODE", "123f");
     assertStringTranslate("abcdef", "abcde", "123", "UNICODE_CI", "123f");
-
-    // One-to-many case mapping - UTF8_BINARY.
+    assertStringTranslate("abcdëÈêf", "ÊèË", "123", "AF_CI", "abcd321f");
+    // One-to-many case mapping (e.g. Turkish dotted I).
     assertStringTranslate("İ", "i\u0307", "xy", "UTF8_BINARY", "İ");
-    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_BINARY", "i\u0307");
-    assertStringTranslate("i\u030A", "İ", "x", "UTF8_BINARY", "i\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_BINARY", "y\u030A");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_BINARY", "123");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_BINARY", "1i\u0307");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_BINARY", "İ23");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_BINARY", "12bc3");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_BINARY", "a2bcå");
-    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UTF8_BINARY", "3\u030Aβφδ1\u0307");
-    // One-to-many case mapping - UTF8_LCASE.
     assertStringTranslate("İ", "i\u0307", "xy", "UTF8_LCASE", "İ");
-    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_LCASE", "x");
-    assertStringTranslate("i\u030A", "İ", "x", "UTF8_LCASE", "i\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_LCASE", "y\u030A");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_LCASE", "11");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_LCASE", "11");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_LCASE", "İ23");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_LCASE", "12bc3");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_LCASE", "12bc3");
-    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UTF8_LCASE", "3\u030Aβφδ2");
-    // One-to-many case mapping - UNICODE.
     assertStringTranslate("İ", "i\u0307", "xy", "UNICODE", "İ");
-    assertStringTranslate("i\u0307", "İ", "xy", "UNICODE", "i\u0307");
-    assertStringTranslate("i\u030A", "İ", "x", "UNICODE", "i\u030A");
-    assertStringTranslate("i\u030A", "İi", "xy", "UNICODE", "i\u030A");
-    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE", "1i\u0307");
-    assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE", "1i\u0307");
-    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE", "İi\u0307");
-    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE", "3bc3");
-    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE", "a\u030Abcå");
-    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UNICODE", "4βφδ2");
-    // One-to-many case mapping - UNICODE_CI.
     assertStringTranslate("İ", "i\u0307", "xy", "UNICODE_CI", "İ");
+    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_BINARY", "i\u0307");
+    assertStringTranslate("i\u0307", "İ", "xy", "UTF8_LCASE", "x");
+    assertStringTranslate("i\u0307", "İ", "xy", "UNICODE", "i\u0307");
     assertStringTranslate("i\u0307", "İ", "xy", "UNICODE_CI", "x");
+    assertStringTranslate("i\u030A", "İ", "x", "UTF8_BINARY", "i\u030A");
+    assertStringTranslate("i\u030A", "İ", "x", "UTF8_LCASE", "i\u030A");
+    assertStringTranslate("i\u030A", "İ", "x", "UNICODE", "i\u030A");
     assertStringTranslate("i\u030A", "İ", "x", "UNICODE_CI", "i\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_BINARY", "y\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", "UTF8_LCASE", "y\u030A");
+    assertStringTranslate("i\u030A", "İi", "xy", "UNICODE", "i\u030A");
     assertStringTranslate("i\u030A", "İi", "xy", "UNICODE_CI", "i\u030A");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_BINARY", "123");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UTF8_LCASE", "11");
+    assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE", "1i\u0307");
     assertStringTranslate("İi\u0307", "İi\u0307", "123", "UNICODE_CI", "11");
+    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_BINARY", "1i\u0307");
+    assertStringTranslate("İi\u0307", "İyz", "123", "UTF8_LCASE", "11");
+    assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE", "1i\u0307");
     assertStringTranslate("İi\u0307", "İyz", "123", "UNICODE_CI", "11");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_BINARY", "İ23");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UTF8_LCASE", "İ23");
+    assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE", "İi\u0307");
     assertStringTranslate("İi\u0307", "xi\u0307", "123", "UNICODE_CI", "İi\u0307");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_BINARY", "12bc3");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UTF8_LCASE", "12bc3");
+    assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE", "3bc3");
     assertStringTranslate("a\u030Abcå", "a\u030Aå", "123", "UNICODE_CI", "3bc3");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_BINARY", "a2bcå");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UTF8_LCASE", "12bc3");
+    assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE", "a\u030Abcå");
     assertStringTranslate("a\u030Abcå", "A\u030AÅ", "123", "UNICODE_CI", "3bc3");
+    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UTF8_BINARY", "3\u030Aβφδ1\u0307");
+    assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UTF8_LCASE", "3\u030Aβφδ2");
+    assertStringTranslate("a\u030AβφδI\u0307", "Iİaå", "1234", "UNICODE", "4βφδ2");
     assertStringTranslate("A\u030Aβφδi\u0307", "Iİaå", "1234", "UNICODE_CI", "4βφδ2");
-
-    // Greek sigmas - UTF8_BINARY.
+    // Conditional case mapping (e.g. Greek sigmas).
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_BINARY", "σΥσΤΗΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_BINARY", "ςΥςΤΗΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_BINARY", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_BINARY", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_BINARY", "σιστιματικοσ");
-    // Greek sigmas - UTF8_LCASE.
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
-    // Greek sigmas - UNICODE.
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE", "σΥσΤΗΜΑΤΙΚΟσ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE", "ςΥςΤΗΜΑΤΙΚΟς");
-    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
-    assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE", "σιστιματικος");
-    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE", "σιστιματικοσ");
-    // Greek sigmas - UNICODE_CI.
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UTF8_LCASE", "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "σιι", "UNICODE_CI", "σισΤιΜΑΤΙΚΟσ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_BINARY", "ςΥςΤΗΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE", "ςΥςΤΗΜΑΤΙΚΟς");
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "Συη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_BINARY", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UTF8_LCASE", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE", "ΣΥΣΤΗΜΑΤΙΚΟΣ");
     assertStringTranslate("ΣΥΣΤΗΜΑΤΙΚΟΣ", "ςυη", "ςιι", "UNICODE_CI", "ςιςΤιΜΑΤΙΚΟς");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_BINARY", "σιστιματικος");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE", "σιστιματικος");
     assertStringTranslate("συστηματικος", "Συη", "σιι", "UNICODE_CI", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_BINARY", "σιστιματικος");
+    assertStringTranslate("συστηματικος", "συη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE", "σιστιματικος");
     assertStringTranslate("συστηματικος", "συη", "σιι", "UNICODE_CI", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_BINARY", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UTF8_LCASE", "σιστιματικοσ");
+    assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE", "σιστιματικοσ");
     assertStringTranslate("συστηματικος", "ςυη", "σιι", "UNICODE_CI", "σιστιματικοσ");
+    // Surrogate pairs.
+    assertStringTranslate("a🙃b🙃c", "a", "x", "UTF8_BINARY", "x🙃b🙃c");
+    assertStringTranslate("a🙃b🙃c", "a🙃", "xy", "UTF8_BINARY", "xybyc");
+    assertStringTranslate("a🙃b🙃c", "a🙃b", "xyz", "UTF8_BINARY", "xyzyc");
+    assertStringTranslate("a🙃b🙃c", "a🙃bc", "xyzw", "UTF8_BINARY", "xyzyw");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_BINARY", "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UTF8_LCASE", "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE", "😀😂😃😅");
+    assertStringTranslate("😀😆😃😄", "😄😆", "😅😂", "UNICODE_CI", "😀😂😃😅");
+    assertStringTranslate("𐐅", "𐐅", "x", "UTF8_BINARY", "x");
+    assertStringTranslate("𐐅", "𐐅", "x", "UTF8_LCASE", "x");
+    assertStringTranslate("𐐅", "𐐅", "x", "UNICODE", "x");
+    assertStringTranslate("𐐅", "𐐅", "x", "UNICODE_CI", "x");
+    assertStringTranslate("𐐅", "𐐭", "x", "UTF8_BINARY", "𐐅");
+    assertStringTranslate("𐐅", "𐐭", "x", "UTF8_LCASE", "x");
+    assertStringTranslate("𐐅", "𐐭", "x", "UNICODE", "𐐅");
+    assertStringTranslate("𐐅", "𐐭", "x", "UNICODE_CI", "x");
+    assertStringTranslate("A", "A", "𐐅", "UTF8_BINARY", "𐐅");
+    assertStringTranslate("A", "A", "𐐅", "UTF8_LCASE", "𐐅");
+    assertStringTranslate("A", "A", "𐐅", "UNICODE", "𐐅");
+    assertStringTranslate("A", "A", "𐐅", "UNICODE_CI", "𐐅");
+    assertStringTranslate("A", "a", "𐐅", "UTF8_BINARY", "A");
+    assertStringTranslate("A", "a", "𐐅", "UTF8_LCASE", "𐐅");
+    assertStringTranslate("A", "a", "𐐅", "UNICODE", "A");
+    assertStringTranslate("A", "a", "𐐅", "UNICODE_CI", "𐐅");
+    assertStringTranslate("a", "A", "𐐅", "UTF8_BINARY", "a");
+    assertStringTranslate("a", "A", "𐐅", "UTF8_LCASE", "𐐅");
+    assertStringTranslate("a", "A", "𐐅", "UNICODE", "a");
+    assertStringTranslate("a", "A", "𐐅", "UNICODE_CI", "𐐅");
+    assertStringTranslate("𝔸", "𝔸", "x", "UTF8_BINARY", "x");
+    assertStringTranslate("𝔸", "𝔸", "x", "UTF8_LCASE", "x");
+    assertStringTranslate("𝔸", "𝔸", "x", "UNICODE", "x");
+    assertStringTranslate("𝔸", "𝔸", "x", "UNICODE_CI", "x");
+    assertStringTranslate("𝔸", "𝕒", "x", "UTF8_BINARY", "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", "UTF8_LCASE", "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", "UNICODE", "𝔸");
+    assertStringTranslate("𝔸", "𝕒", "x", "UNICODE_CI", "x");
   }
 
   private Map<String, String> buildDict(String matching, String replace) {
diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
index 2428d40fe8016..c4a66fdffdd4d 100644
--- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
+++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java
@@ -26,6 +26,8 @@
 
 import com.google.common.collect.ImmutableMap;
 import org.apache.spark.unsafe.Platform;
+import org.apache.spark.unsafe.UTF8StringBuilder;
+
 import org.junit.jupiter.api.Test;
 
 import static org.apache.spark.unsafe.types.UTF8String.fromString;
@@ -1362,4 +1364,27 @@ public void toBinaryString() {
       UTF8String.fromString("111111111111111111111111111111111111111111111111111111111111111"),
       UTF8String.toBinaryString(Long.MAX_VALUE));
   }
+
+  /**
+   * This tests whether appending a codepoint to a 'UTF8StringBuilder' correctly appends every
+   * single codepoint. We test it against an already existing 'StringBuilder.appendCodePoint' and
+   * 'UTF8String.fromString'. We skip testing the surrogate codepoints because at some point while
+   * converting the surrogate codepoint to 'UTF8String' (via 'StringBuilder' and 'UTF8String') we
+   * get an ill-formated byte sequence (probably because 'String' is in UTF-16 format, and a single
+   * surrogate codepoint is handled differently in UTF-16 than in UTF-8, so somewhere during those
+   * conversions some different behaviour happens).
+   */
+  @Test
+  public void testAppendCodepointToUTF8StringBuilder() {
+    int surrogateRangeLowerBound = 0xD800;
+    int surrogateRangeUpperBound = 0xDFFF;
+    for (int i = Character.MIN_CODE_POINT; i <= Character.MAX_CODE_POINT; ++i) {
+      if(surrogateRangeLowerBound <= i && i <= surrogateRangeUpperBound) continue;
+      UTF8StringBuilder usb = new UTF8StringBuilder();
+      usb.appendCodePoint(i);
+      StringBuilder sb = new StringBuilder();
+      sb.appendCodePoint(i);
+      assert(usb.build().equals(UTF8String.fromString(sb.toString())));
+    }
+  }
 }
diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
index 23dae47f6ff2c..1f64547da7415 100644
--- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
+++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala
@@ -32,28 +32,35 @@ import org.apache.spark.sql.catalyst.util.CollationFactory._
 import org.apache.spark.unsafe.types.UTF8String.{fromString => toUTF8}
 
 class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ignore funsuite
+
+  val currentIcuVersion: String = "76.1"
+
   test("collationId stability") {
     assert(INDETERMINATE_COLLATION_ID == -1)
 
     assert(UTF8_BINARY_COLLATION_ID == 0)
     val utf8Binary = fetchCollation(UTF8_BINARY_COLLATION_ID)
     assert(utf8Binary.collationName == "UTF8_BINARY")
-    assert(utf8Binary.supportsBinaryEquality)
+    assert(utf8Binary.isUtf8BinaryType)
+    assert(utf8Binary.version == currentIcuVersion)
 
     assert(UTF8_LCASE_COLLATION_ID == 1)
-    val utf8BinaryLcase = fetchCollation(UTF8_LCASE_COLLATION_ID)
-    assert(utf8BinaryLcase.collationName == "UTF8_LCASE")
-    assert(!utf8BinaryLcase.supportsBinaryEquality)
+    val utf8Lcase = fetchCollation(UTF8_LCASE_COLLATION_ID)
+    assert(utf8Lcase.collationName == "UTF8_LCASE")
+    assert(!utf8Lcase.isUtf8BinaryType)
+    assert(utf8Lcase.version == currentIcuVersion)
 
     assert(UNICODE_COLLATION_ID == (1 << 29))
     val unicode = fetchCollation(UNICODE_COLLATION_ID)
     assert(unicode.collationName == "UNICODE")
-    assert(!unicode.supportsBinaryEquality)
+    assert(!unicode.isUtf8BinaryType)
+    assert(unicode.version == currentIcuVersion)
 
     assert(UNICODE_CI_COLLATION_ID == ((1 << 29) | (1 << 17)))
     val unicodeCi = fetchCollation(UNICODE_CI_COLLATION_ID)
     assert(unicodeCi.collationName == "UNICODE_CI")
-    assert(!unicodeCi.supportsBinaryEquality)
+    assert(!unicodeCi.isUtf8BinaryType)
+    assert(unicodeCi.version == currentIcuVersion)
   }
 
   test("UTF8_BINARY and ICU root locale collation names") {
@@ -93,27 +100,33 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
     Seq(
       ("UTF8_BINARY_CS", "UTF8_BINARY"),
       ("UTF8_BINARY_AS", "UTF8_BINARY"), // this should be UNICODE_AS
-      ("UTF8_BINARY_CS_AS","UTF8_BINARY"), // this should be UNICODE_CS_AS
-      ("UTF8_BINARY_AS_CS","UTF8_BINARY"),
-      ("UTF8_BINARY_CI","UTF8_BINARY"),
-      ("UTF8_BINARY_AI","UTF8_BINARY"),
-      ("UTF8_BINARY_CI_AI","UTF8_BINARY"),
-      ("UTF8_BINARY_AI_CI","UTF8_BINARY"),
-      ("UTF8_BS","UTF8_LCASE"),
-      ("BINARY_UTF8","ar_SAU"),
-      ("UTF8_BINARY_A","UTF8_BINARY"),
-      ("UNICODE_X","UNICODE"),
-      ("UNICODE_CI_X","UNICODE"),
-      ("UNICODE_LCASE_X","UNICODE"),
-      ("UTF8_UNICODE","UTF8_LCASE"),
-      ("UTF8_BINARY_UNICODE","UTF8_BINARY"),
+      ("UTF8_BINARY_CS_AS", "UTF8_BINARY"), // this should be UNICODE_CS_AS
+      ("UTF8_BINARY_AS_CS", "UTF8_BINARY"),
+      ("UTF8_BINARY_CI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI", "UTF8_BINARY"),
+      ("UTF8_BINARY_CI_AI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI_CI", "UTF8_BINARY"),
+      ("UTF8_BINARY_AI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BINARY_CI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BINARY_AI_CI_RTRIM", "UTF8_BINARY_RTRIM"),
+      ("UTF8_BS", "UTF8_LCASE"),
+      ("BINARY_UTF8", "ar_SAU"),
+      ("UTF8_BINARY_A", "UTF8_BINARY"),
+      ("UNICODE_X", "UNICODE"),
+      ("UNICODE_CI_X", "UNICODE"),
+      ("UNICODE_LCASE_X", "UNICODE"),
+      ("UNICODE_RTRIM_LCASE_X", "UNICODE"),
+      ("UTF8_UNICODE", "UTF8_LCASE"),
+      ("UTF8_BINARY_UNICODE", "UTF8_BINARY"),
       ("CI_UNICODE", "UNICODE"),
       ("LCASE_UNICODE", "UNICODE"),
+      ("RTRIM_UNICODE", "UNICODE"),
       ("UNICODE_UNSPECIFIED", "UNICODE"),
       ("UNICODE_CI_UNSPECIFIED", "UNICODE"),
       ("UNICODE_UNSPECIFIED_CI_UNSPECIFIED", "UNICODE"),
       ("UNICODE_INDETERMINATE", "UNICODE"),
-      ("UNICODE_CI_INDETERMINATE", "UNICODE")
+      ("UNICODE_CI_INDETERMINATE", "UNICODE"),
+      ("UNICODE_RTRIM_INDETERMINATE", "UNICODE")
     ).foreach{case (collationName, proposals) =>
       checkCollationNameError(collationName, proposals)
     }
@@ -127,6 +140,11 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UTF8_BINARY", "aaa", "AAA", false),
       CollationTestCase("UTF8_BINARY", "aaa", "bbb", false),
       CollationTestCase("UTF8_BINARY", "å", "a\u030A", false),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa", "aaa", true),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa", "aaa  ", true),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa  ", "aaa ", true),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa", " aaa ", false),
+      CollationTestCase("UTF8_BINARY_RTRIM", "    ", "  ", true),
       CollationTestCase("UTF8_LCASE", "aaa", "aaa", true),
       CollationTestCase("UTF8_LCASE", "aaa", "AAA", true),
       CollationTestCase("UTF8_LCASE", "aaa", "AaA", true),
@@ -134,15 +152,36 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UTF8_LCASE", "aaa", "aa", false),
       CollationTestCase("UTF8_LCASE", "aaa", "bbb", false),
       CollationTestCase("UTF8_LCASE", "å", "a\u030A", false),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa", "AaA", true),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa", "AaA ", true),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa ", "AaA ", true),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa", " AaA ", false),
+      CollationTestCase("UTF8_LCASE_RTRIM", "    ", "  ", true),
       CollationTestCase("UNICODE", "aaa", "aaa", true),
       CollationTestCase("UNICODE", "aaa", "AAA", false),
       CollationTestCase("UNICODE", "aaa", "bbb", false),
       CollationTestCase("UNICODE", "å", "a\u030A", true),
+      CollationTestCase("UNICODE_RTRIM", "aaa", "aaa", true),
+      CollationTestCase("UNICODE_RTRIM", "aaa", "aaa  ", true),
+      CollationTestCase("UNICODE_RTRIM", "aaa  ", "aaa ", true),
+      CollationTestCase("UNICODE_RTRIM", "aaa", " aaa ", false),
+      CollationTestCase("UNICODE_RTRIM", "    ", "  ", true),
       CollationTestCase("UNICODE_CI", "aaa", "aaa", true),
       CollationTestCase("UNICODE_CI", "aaa", "AAA", true),
       CollationTestCase("UNICODE_CI", "aaa", "bbb", false),
       CollationTestCase("UNICODE_CI", "å", "a\u030A", true),
-      CollationTestCase("UNICODE_CI", "Å", "a\u030A", true)
+      CollationTestCase("UNICODE_CI", "Å", "a\u030A", true),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa", "AaA", true),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa", "AaA ", true),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa ", "AaA ", true),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa", " AaA ", false),
+      CollationTestCase("UNICODE_RTRIM", "    ", "  ", true),
+      CollationTestCase("SR_CI", "cČć", "CčĆ", true),
+      CollationTestCase("SR_CI", "cCc", "CčĆ", false),
+      CollationTestCase("SR_CI_AI", "cCc", "CčĆ", true),
+      CollationTestCase("sr_Cyrl_CI", "цЧћ", "ЦчЋ", true),
+      CollationTestCase("sr_Cyrl_CI", "цЦц", "ЦчЋ", false),
+      CollationTestCase("sr_Cyrl_CI_AI", "цЦц", "ЦчЋ", false)
     )
 
     checks.foreach(testCase => {
@@ -162,19 +201,50 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UTF8_BINARY", "aaa", "AAA", 1),
       CollationTestCase("UTF8_BINARY", "aaa", "bbb", -1),
       CollationTestCase("UTF8_BINARY", "aaa", "BBB", 1),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa  ", "aaa", 0),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa  ", "aaa ", 0),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa ", "bbb", -1),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa ", "bbb ", -1),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa", "BBB" , 1),
+      CollationTestCase("UTF8_BINARY_RTRIM", "aaa  ", "BBB " , 1),
+      CollationTestCase("UTF8_BINARY_RTRIM", "   ", " " , 0),
       CollationTestCase("UTF8_LCASE", "aaa", "aaa", 0),
       CollationTestCase("UTF8_LCASE", "aaa", "AAA", 0),
       CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
       CollationTestCase("UTF8_LCASE", "aaa", "AaA", 0),
       CollationTestCase("UTF8_LCASE", "aaa", "aa", 1),
       CollationTestCase("UTF8_LCASE", "aaa", "bbb", -1),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa ", "AAA", 0),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa ", "AAA  ", 0),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa", "bbb ", -1),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa  ", "bbb ", -1),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa ", "aa", 1),
+      CollationTestCase("UTF8_LCASE_RTRIM", "aaa ", "aa  ", 1),
+      CollationTestCase("UTF8_LCASE_RTRIM", " ", "  ", 0),
       CollationTestCase("UNICODE", "aaa", "aaa", 0),
       CollationTestCase("UNICODE", "aaa", "AAA", -1),
       CollationTestCase("UNICODE", "aaa", "bbb", -1),
       CollationTestCase("UNICODE", "aaa", "BBB", -1),
+      CollationTestCase("UNICODE_RTRIM", "aaa  ", "aaa", 0),
+      CollationTestCase("UNICODE_RTRIM", "aaa  ", "aaa ", 0),
+      CollationTestCase("UNICODE_RTRIM", "aaa ", "bbb", -1),
+      CollationTestCase("UNICODE_RTRIM", "aaa ", "bbb ", -1),
+      CollationTestCase("UNICODE_RTRIM", "aaa", "BBB" , -1),
+      CollationTestCase("UNICODE_RTRIM", "aaa  ", "BBB " , -1),
+      CollationTestCase("UNICODE_RTRIM", " ", "  ", 0),
       CollationTestCase("UNICODE_CI", "aaa", "aaa", 0),
       CollationTestCase("UNICODE_CI", "aaa", "AAA", 0),
-      CollationTestCase("UNICODE_CI", "aaa", "bbb", -1))
+      CollationTestCase("UNICODE_CI", "aaa", "bbb", -1),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa ", "AAA", 0),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa ", "AAA  ", 0),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa", "bbb ", -1),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa  ", "bbb ", -1),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa ", "aa", 1),
+      CollationTestCase("UNICODE_CI_RTRIM", "aaa ", "aa  ", 1),
+      CollationTestCase("UNICODE_CI_RTRIM", " ", "   ", 0),
+      CollationTestCase("SR_CI_AI", "cČć", "ČćC", 0),
+      CollationTestCase("SR_CI", "cČć", "ČćC", -1)
+    )
 
     checks.foreach(testCase => {
       val collation = fetchCollation(testCase.collationName)
@@ -192,7 +262,10 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       CollationTestCase("UNICODE_CI", "abcde", "abcde", 5),
       CollationTestCase("UNICODE_CI", "abcde", "ABCDE", 5),
       CollationTestCase("UNICODE_CI", "abcde", "fgh", 0),
-      CollationTestCase("UNICODE_CI", "abcde", "FGH", 0)
+      CollationTestCase("UNICODE_CI", "abcde", "FGH", 0),
+      CollationTestCase("SR_CI_AI", "abcčċ", "CCC", 3),
+      CollationTestCase("SR_CI", "abcčċ", "C", 1),
+      CollationTestCase("SR", "abcčċ", "CCC", 0)
     )
 
     checks.foreach(testCase => {
@@ -229,7 +302,9 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       "UNICODE_CI",
       "UNICODE_AI",
       "UNICODE_CI_AI",
-      "UNICODE_AI_CI"
+      "UNICODE_AI_CI",
+      "DE_CI_AI",
+      "MT_CI"
     ).foreach(collationId => {
       val col1 = fetchCollation(collationId)
       val col2 = fetchCollation(collationId)
@@ -303,15 +378,23 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       ("CI_en", "ceb"),
       ("USA_CI_en", "UNICODE"),
       ("en_CI_USA", "en_USA"),
+      ("en_RTRIM_USA", "en_USA"),
       ("CI_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("RTRIM_sr_Cyrl_SRB", "sr_Cyrl_SRB"),
       ("sr_CI_Cyrl_SRB", "sr_Cyrl_SRB"),
+      ("sr_RTRIM_Cyrl_SRB", "sr_Cyrl_SRB"),
       ("sr_Cyrl_CI_SRB", "sr_Cyrl_SRB"),
+      ("sr_Cyrl_RTRIM_SRB", "sr_Cyrl_SRB"),
       ("CI_Cyrl_sr", "sr_Cyrl_SRB"),
+      ("RTRIM_Cyrl_sr", "sr_Cyrl_SRB"),
       ("Cyrl_CI_sr", "he_ISR"),
       ("Cyrl_CI_sr_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_RTRIM_sr_SRB", "sr_Cyrl_SRB"),
       ("Cyrl_sr_CI_SRB", "sr_Cyrl_SRB"),
+      ("Cyrl_sr_RTRIM_SRB", "sr_Cyrl_SRB"),
       // no locale specified
       ("_CI_AI", "af_CI_AI, am_CI_AI, ar_CI_AI"),
+      ("_CI_AI_RTRIM", "af_CI_AI_RTRIM, am_CI_AI_RTRIM, ar_CI_AI_RTRIM"),
       ("", "af, am, ar")
     ).foreach { case (collationName, proposals) =>
       checkCollationNameError(collationName, proposals)
@@ -369,9 +452,9 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       1 << 15, // UTF8_BINARY mandatory zero bit 15 breach.
       1 << 16, // UTF8_BINARY mandatory zero bit 16 breach.
       1 << 17, // UTF8_BINARY mandatory zero bit 17 breach.
-      1 << 18, // UTF8_BINARY mandatory zero bit 18 breach.
       1 << 19, // UTF8_BINARY mandatory zero bit 19 breach.
       1 << 20, // UTF8_BINARY mandatory zero bit 20 breach.
+      1 << 21, // UTF8_BINARY mandatory zero bit 21 breach.
       1 << 23, // UTF8_BINARY mandatory zero bit 23 breach.
       1 << 24, // UTF8_BINARY mandatory zero bit 24 breach.
       1 << 25, // UTF8_BINARY mandatory zero bit 25 breach.
@@ -382,7 +465,6 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       (1 << 29) | (1 << 13), // ICU mandatory zero bit 13 breach.
       (1 << 29) | (1 << 14), // ICU mandatory zero bit 14 breach.
       (1 << 29) | (1 << 15), // ICU mandatory zero bit 15 breach.
-      (1 << 29) | (1 << 18), // ICU mandatory zero bit 18 breach.
       (1 << 29) | (1 << 19), // ICU mandatory zero bit 19 breach.
       (1 << 29) | (1 << 20), // ICU mandatory zero bit 20 breach.
       (1 << 29) | (1 << 21), // ICU mandatory zero bit 21 breach.
@@ -408,6 +490,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       ("UNICODE_CI_CI", "UNICODE_CI"),
       ("UNICODE_CI_CS", "UNICODE_CS"),
       ("UNICODE_CS_CI", "UNICODE_CS"),
+      ("UNICODE_RTRIM_RTRIM", "UNICODE_RTRIM"),
       ("UNICODE_AS_AS", "UNICODE_AS"),
       ("UNICODE_AI_AI", "UNICODE_AI"),
       ("UNICODE_AS_AI", "UNICODE_AS"),
@@ -417,6 +500,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
       ("UNICODE_CS_AS_CI_AI", "UNICODE_CS_AS"),
       ("UNICODE__CS__AS", "UNICODE_AS"),
       ("UNICODE-CS-AS", "UNICODE"),
+      ("UNICODE__CS__RTRIM", "UNICODE_RTRIM"),
       ("UNICODECSAS", "UNICODE"),
       ("_CS_AS_UNICODE", "UNICODE")
     ).foreach { case (collationName, proposals) =>
@@ -457,7 +541,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig
     val e = intercept[SparkException] {
       fetchCollation(collationName)
     }
-    assert(e.getErrorClass === "COLLATION_INVALID_NAME")
+    assert(e.getCondition === "COLLATION_INVALID_NAME")
     assert(e.getMessageParameters.asScala === Map(
       "collationName" -> collationName, "proposals" -> proposals))
   }
diff --git a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
index e1235b2982ba0..39808f58b08ae 100644
--- a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
+++ b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java
@@ -35,19 +35,29 @@
  */
 @Evolving
 public interface SparkThrowable {
-  // Succinct, human-readable, unique, and consistent representation of the error category
-  // If null, error class is not set
-  String getErrorClass();
+  /**
+   * Succinct, human-readable, unique, and consistent representation of the error condition.
+   * If null, error condition is not set.
+   */
+  String getCondition();
+
+  /**
+   * Succinct, human-readable, unique, and consistent representation of the error category.
+   * If null, error class is not set.
+   * @deprecated Use {@link #getCondition()} instead.
+   */
+  @Deprecated
+  default String getErrorClass() { return getCondition(); }
 
   // Portable error identifier across SQL engines
   // If null, error class or SQLSTATE is not set
   default String getSqlState() {
-    return SparkThrowableHelper.getSqlState(this.getErrorClass());
+    return SparkThrowableHelper.getSqlState(this.getCondition());
   }
 
   // True if this error is an internal error.
   default boolean isInternalError() {
-    return SparkThrowableHelper.isInternalError(this.getErrorClass());
+    return SparkThrowableHelper.isInternalError(this.getCondition());
   }
 
   default Map<String, String> getMessageParameters() {
diff --git a/common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
similarity index 100%
rename from common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java
rename to common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java
diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json
index d8edc89ba83ea..77437f6c56179 100644
--- a/common/utils/src/main/resources/error/error-conditions.json
+++ b/common/utils/src/main/resources/error/error-conditions.json
@@ -1,4 +1,10 @@
 {
+  "ADD_DEFAULT_UNSUPPORTED" : {
+    "message" : [
+      "Failed to execute <statementType> command because DEFAULT values are not supported when adding new columns to previously existing target data source with table provider: \"<dataSource>\"."
+    ],
+    "sqlState" : "42623"
+  },
   "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION" : {
     "message" : [
       "Non-deterministic expression <sqlExpr> should not appear in the arguments of an aggregate function."
@@ -121,10 +127,16 @@
   },
   "BINARY_ARITHMETIC_OVERFLOW" : {
     "message" : [
-      "<value1> <symbol> <value2> caused overflow."
+      "<value1> <symbol> <value2> caused overflow. Use <functionName> to ignore overflow problem and return NULL."
     ],
     "sqlState" : "22003"
   },
+  "BOOLEAN_STATEMENT_WITH_EMPTY_ROW" : {
+    "message" : [
+      "Boolean statement <invalidStatement> is invalid. Expected single row with a value of the BOOLEAN type, but got an empty row."
+    ],
+    "sqlState" : "21000"
+  },
   "CALL_ON_STREAMING_DATASET_UNSUPPORTED" : {
     "message" : [
       "The method <methodName> can not be called on streaming Dataset/DataFrame."
@@ -256,6 +268,26 @@
           "Error reading streaming state file of <fileToRead> does not exist. If the stream job is restarted with a new or updated state operation, please create a new checkpoint location or clear the existing checkpoint location."
         ]
       },
+      "HDFS_STORE_PROVIDER_OUT_OF_MEMORY" : {
+        "message" : [
+          "Could not load HDFS state store with id <stateStoreId> because of an out of memory exception."
+        ]
+      },
+      "INVALID_CHANGE_LOG_READER_VERSION" : {
+        "message" : [
+          "The change log reader version cannot be <version>."
+        ]
+      },
+      "INVALID_CHANGE_LOG_WRITER_VERSION" : {
+        "message" : [
+          "The change log writer version cannot be <version>."
+        ]
+      },
+      "ROCKSDB_STORE_PROVIDER_OUT_OF_MEMORY" : {
+        "message" : [
+          "Could not load RocksDB state store with id <stateStoreId> because of an out of memory exception."
+        ]
+      },
       "SNAPSHOT_PARTITION_ID_NOT_FOUND" : {
         "message" : [
           "Partition id <snapshotPartitionId> not found for state of operator <operatorId> at <checkpointLocation>."
@@ -344,6 +376,12 @@
     ],
     "sqlState" : "429BB"
   },
+  "CANNOT_REMOVE_RESERVED_PROPERTY" : {
+    "message" : [
+      "Cannot remove reserved property: <property>."
+    ],
+    "sqlState" : "42000"
+  },
   "CANNOT_RENAME_ACROSS_SCHEMA" : {
     "message" : [
       "Renaming a <type> across schemas is not allowed."
@@ -368,12 +406,6 @@
     ],
     "sqlState" : "58030"
   },
-  "CANNOT_SAVE_VARIANT" : {
-    "message" : [
-      "Cannot save variant data type into external storage."
-    ],
-    "sqlState" : "0A000"
-  },
   "CANNOT_UPDATE_FIELD" : {
     "message" : [
       "Cannot update <table> field <fieldName> type:"
@@ -414,6 +446,12 @@
     ],
     "sqlState" : "42846"
   },
+  "CANNOT_USE_KRYO" : {
+    "message" : [
+      "Cannot load Kryo serialization codec. Kryo serialization cannot be used in the Spark Connect client. Use Java serialization, provide a custom Codec, or use Spark Classic instead."
+    ],
+    "sqlState" : "22KD3"
+  },
   "CANNOT_WRITE_STATE_STORE" : {
     "message" : [
       "Error writing state store files for provider <providerClass>."
@@ -429,13 +467,13 @@
   },
   "CAST_INVALID_INPUT" : {
     "message" : [
-      "The value <expression> of the type <sourceType> cannot be cast to <targetType> because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "The value <expression> of the type <sourceType> cannot be cast to <targetType> because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead."
     ],
     "sqlState" : "22018"
   },
   "CAST_OVERFLOW" : {
     "message" : [
-      "The value <value> of the type <sourceType> cannot be cast to <targetType> due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "The value <value> of the type <sourceType> cannot be cast to <targetType> due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead."
     ],
     "sqlState" : "22003"
   },
@@ -459,6 +497,12 @@
     ],
     "sqlState" : "56000"
   },
+  "CIRCULAR_CLASS_REFERENCE" : {
+    "message" : [
+      "Cannot have circular references in class, but got the circular reference of class <t>."
+    ],
+    "sqlState" : "42602"
+  },
   "CLASS_NOT_OVERRIDE_EXPECTED_METHOD" : {
     "message" : [
       "<className> must override either <method1> or <method2>."
@@ -471,6 +515,20 @@
     ],
     "sqlState" : "0A000"
   },
+  "CLUSTERING_COLUMNS_MISMATCH" : {
+    "message" : [
+      "Specified clustering does not match that of the existing table <tableName>.",
+      "Specified clustering columns: [<specifiedClusteringString>].",
+      "Existing clustering columns: [<existingClusteringString>]."
+    ],
+    "sqlState" : "42P10"
+  },
+  "CLUSTERING_NOT_SUPPORTED" : {
+    "message" : [
+      "'<operation>' does not support clustering."
+    ],
+    "sqlState" : "42000"
+  },
   "CODEC_NOT_AVAILABLE" : {
     "message" : [
       "The codec <codecName> is not available."
@@ -519,7 +577,7 @@
       },
       "IMPLICIT" : {
         "message" : [
-          "Error occurred due to the mismatch between multiple implicit non-default collations. Use COLLATE function to set the collation explicitly."
+          "Error occurred due to the mismatch between implicit collations: [<implicitTypes>]. Use COLLATE function to set the collation explicitly."
         ]
       }
     },
@@ -560,6 +618,12 @@
     ],
     "sqlState" : "42711"
   },
+  "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH" : {
+    "message" : [
+      "Some values in field <pos> are incompatible with the column array type. Expected type <type>."
+    ],
+    "sqlState" : "0A000"
+  },
   "COLUMN_NOT_DEFINED_IN_TABLE" : {
     "message" : [
       "<colType> column <colName> is not defined in table <tableName>, defined table columns are: <tableCols>."
@@ -572,6 +636,13 @@
     ],
     "sqlState" : "42703"
   },
+  "COLUMN_ORDINAL_OUT_OF_BOUNDS" : {
+    "message" : [
+      "Column ordinal out of bounds. The number of columns in the table is <attributesLength>, but the column ordinal is <ordinal>.",
+      "Attributes are the following: <attributes>."
+    ],
+    "sqlState" : "22003"
+  },
   "COMPARATOR_RETURNS_NULL" : {
     "message" : [
       "The comparator has returned a NULL for a comparison between <firstValue> and <secondValue>.",
@@ -585,6 +656,11 @@
       "Cannot process input data types for the expression: <expression>."
     ],
     "subClass" : {
+      "BAD_INPUTS" : {
+        "message" : [
+          "The input data types to <functionName> must be valid, but found the input types <dataType>."
+        ]
+      },
       "MISMATCHED_TYPES" : {
         "message" : [
           "All input types must be the same except nullable, containsNull, valueContainsNull flags, but found the input types <inputTypes>."
@@ -611,6 +687,27 @@
     ],
     "sqlState" : "40000"
   },
+  "CONFLICTING_DIRECTORY_STRUCTURES" : {
+    "message" : [
+      "Conflicting directory structures detected.",
+      "Suspicious paths:",
+      "<discoveredBasePaths>",
+      "If provided paths are partition directories, please set \"basePath\" in the options of the data source to specify the root directory of the table.",
+      "If there are multiple root directories, please load them separately and then union them."
+    ],
+    "sqlState" : "KD009"
+  },
+  "CONFLICTING_PARTITION_COLUMN_NAMES" : {
+    "message" : [
+      "Conflicting partition column names detected:",
+      "<distinctPartColLists>",
+      "For partitioned table directories, data files should only live in leaf directories.",
+      "And directories at the same level should have the same partition column name.",
+      "Please check the following directories for unexpected files or inconsistent partition column names:",
+      "<suspiciousPaths>"
+    ],
+    "sqlState" : "KD009"
+  },
   "CONNECT" : {
     "message" : [
       "Generic Spark Connect error."
@@ -853,7 +950,7 @@
       },
       "NON_STRING_TYPE" : {
         "message" : [
-          "all arguments must be strings."
+          "all arguments of the function <funcName> must be strings."
         ]
       },
       "NULL_TYPE" : {
@@ -868,7 +965,7 @@
       },
       "RANGE_FRAME_INVALID_TYPE" : {
         "message" : [
-          "The data type <orderSpecType> used in the order specification does not match the data type <valueBoundaryType> which is used in the range frame."
+          "The data type <orderSpecType> used in the order specification does not support the data type <valueBoundaryType> which is used in the range frame."
         ]
       },
       "RANGE_FRAME_MULTI_ORDER" : {
@@ -954,16 +1051,6 @@
           "The input of <functionName> can't be <dataType> type data."
         ]
       },
-      "UNSUPPORTED_UDF_INPUT_TYPE" : {
-        "message" : [
-          "UDFs do not support '<dataType>' as an input data type."
-        ]
-      },
-      "UNSUPPORTED_UDF_OUTPUT_TYPE" : {
-        "message" : [
-          "UDFs do not support '<dataType>' as an output data type."
-        ]
-      },
       "VALUE_OUT_OF_RANGE" : {
         "message" : [
           "The <exprName> must be between <valueRange> (current value = <currentValue>)."
@@ -994,6 +1081,12 @@
     ],
     "sqlState" : "42710"
   },
+  "DATA_SOURCE_EXTERNAL_ERROR" : {
+    "message" : [
+      "Encountered error when saving to external data source."
+    ],
+    "sqlState" : "KD010"
+  },
   "DATA_SOURCE_NOT_EXIST" : {
     "message" : [
       "Data source '<provider>' not found. Please make sure the data source is registered."
@@ -1014,6 +1107,12 @@
     ],
     "sqlState" : "42K03"
   },
+  "DATETIME_FIELD_OUT_OF_BOUNDS" : {
+    "message" : [
+      "<rangeMessage>. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22023"
+  },
   "DATETIME_OVERFLOW" : {
     "message" : [
       "Datetime operation overflow: <operation>."
@@ -1039,6 +1138,12 @@
     ],
     "sqlState" : "42608"
   },
+  "DEFAULT_UNSUPPORTED" : {
+    "message" : [
+      "Failed to execute <statementType> command because DEFAULT values are not supported for target data source with table provider: \"<dataSource>\"."
+    ],
+    "sqlState" : "42623"
+  },
   "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED" : {
     "message" : [
       "Distinct window functions are not supported: <windowExpr>."
@@ -1051,6 +1156,12 @@
     ],
     "sqlState" : "22012"
   },
+  "DUPLICATED_CTE_NAMES" : {
+    "message" : [
+      "CTE definition can't have duplicate names: <duplicateNames>."
+    ],
+    "sqlState" : "42602"
+  },
   "DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT" : {
     "message" : [
       "Duplicated field names in Arrow Struct are not allowed, got <fieldNames>."
@@ -1121,6 +1232,12 @@
     ],
     "sqlState" : "42604"
   },
+  "EMPTY_SCHEMA_NOT_SUPPORTED_FOR_DATASOURCE" : {
+    "message" : [
+      "The <format> datasource does not support writing empty or nested empty schemas. Please make sure the data schema has at least one or more column(s)."
+    ],
+    "sqlState" : "0A000"
+  },
   "ENCODER_NOT_FOUND" : {
     "message" : [
       "Not found an encoder of the type <typeName> to Spark SQL internal representation.",
@@ -1387,6 +1504,12 @@
     ],
     "sqlState" : "2203G"
   },
+  "FAILED_TO_LOAD_ROUTINE" : {
+    "message" : [
+      "Failed to load routine <routineName>."
+    ],
+    "sqlState" : "38000"
+  },
   "FAILED_TO_PARSE_TOO_COMPLEX" : {
     "message" : [
       "The statement, including potential SQL functions and referenced views, was too complex to parse.",
@@ -1412,6 +1535,12 @@
     ],
     "sqlState" : "42704"
   },
+  "FLATMAPGROUPSWITHSTATE_USER_FUNCTION_ERROR" : {
+    "message" : [
+      "An error occurred in the user provided function in flatMapGroupsWithState. Reason: <reason>"
+    ],
+    "sqlState" : "39000"
+  },
   "FORBIDDEN_OPERATION" : {
     "message" : [
       "The operation <statement> is not allowed on the <objectType>: <objectName>."
@@ -1424,6 +1553,12 @@
     ],
     "sqlState" : "39000"
   },
+  "FOREACH_USER_FUNCTION_ERROR" : {
+    "message" : [
+      "An error occurred in the user provided function in foreach sink. Reason: <reason>"
+    ],
+    "sqlState" : "39000"
+  },
   "FOUND_MULTIPLE_DATA_SOURCES" : {
     "message" : [
       "Detected multiple data sources with the name '<provider>'. Please check the data source isn't simultaneously registered and located in the classpath."
@@ -1520,6 +1655,36 @@
     ],
     "sqlState" : "42601"
   },
+  "IDENTITY_COLUMNS_DUPLICATED_SEQUENCE_GENERATOR_OPTION" : {
+    "message" : [
+      "Duplicated IDENTITY column sequence generator option: <sequenceGeneratorOption>."
+    ],
+    "sqlState" : "42601"
+  },
+  "IDENTITY_COLUMNS_ILLEGAL_STEP" : {
+    "message" : [
+      "IDENTITY column step cannot be 0."
+    ],
+    "sqlState" : "42611"
+  },
+  "IDENTITY_COLUMNS_UNSUPPORTED_DATA_TYPE" : {
+    "message" : [
+      "DataType <dataType> is not supported for IDENTITY columns."
+    ],
+    "sqlState" : "428H2"
+  },
+  "IDENTITY_COLUMN_WITH_DEFAULT_VALUE" : {
+    "message" : [
+      "A column cannot have both a default value and an identity column specification but column <colName> has default value: (<defaultValue>) and identity column specification: (<identityColumnSpec>)."
+    ],
+    "sqlState" : "42623"
+  },
+  "ILLEGAL_DAY_OF_WEEK" : {
+    "message" : [
+      "Illegal input for day of week: <string>."
+    ],
+    "sqlState" : "22009"
+  },
   "ILLEGAL_STATE_STORE_VALUE" : {
     "message" : [
       "Illegal value provided to the State Store"
@@ -1859,8 +2024,20 @@
   },
   "INTERVAL_ARITHMETIC_OVERFLOW" : {
     "message" : [
-      "<message>.<alternative>"
+      "Integer overflow while operating with intervals."
     ],
+    "subClass" : {
+      "WITHOUT_SUGGESTION" : {
+        "message" : [
+          "Try devising appropriate values for the interval parameters."
+        ]
+      },
+      "WITH_SUGGESTION" : {
+        "message" : [
+          "Use <functionName> to tolerate overflow and return NULL instead."
+        ]
+      }
+    },
     "sqlState" : "22015"
   },
   "INTERVAL_DIVIDED_BY_ZERO" : {
@@ -1897,6 +2074,12 @@
     },
     "sqlState" : "42903"
   },
+  "INVALID_AGNOSTIC_ENCODER" : {
+    "message" : [
+      "Found an invalid agnostic encoder. Expects an instance of AgnosticEncoder but got <encoderType>. For more information consult '<docroot>/api/java/index.html?org/apache/spark/sql/Encoder.html'."
+    ],
+    "sqlState" : "42001"
+  },
   "INVALID_ARRAY_INDEX" : {
     "message" : [
       "The index <indexValue> is out of bounds. The array has <arraySize> elements. Use the SQL function `get()` to tolerate accessing element at invalid index and return NULL instead. If necessary set <ansiConfig> to \"false\" to bypass this error."
@@ -1909,12 +2092,24 @@
     ],
     "sqlState" : "22003"
   },
+  "INVALID_ATTRIBUTE_NAME_SYNTAX" : {
+    "message" : [
+      "Syntax error in the attribute name: <name>. Check that backticks appear in pairs, a quoted string is a complete name part and use a backtick only inside quoted name parts."
+    ],
+    "sqlState" : "42601"
+  },
   "INVALID_BITMAP_POSITION" : {
     "message" : [
       "The 0-indexed bitmap position <bitPosition> is out of bounds. The bitmap has <bitmapNumBits> bits (<bitmapNumBytes> bytes)."
     ],
     "sqlState" : "22003"
   },
+  "INVALID_BOOLEAN_STATEMENT" : {
+    "message" : [
+      "Boolean statement is expected in the condition, but <invalidStatement> was found."
+    ],
+    "sqlState" : "22546"
+  },
   "INVALID_BOUNDARY" : {
     "message" : [
       "The boundary <boundary> is invalid: <invalidValue>."
@@ -1981,6 +2176,12 @@
     },
     "sqlState" : "22022"
   },
+  "INVALID_CORRUPT_RECORD_TYPE" : {
+    "message" : [
+      "The column <columnName> for corrupt records must have the nullable STRING type, but got <actualType>."
+    ],
+    "sqlState" : "42804"
+  },
   "INVALID_CURSOR" : {
     "message" : [
       "The cursor is invalid."
@@ -2023,6 +2224,11 @@
         "message" : [
           "Too many letters in datetime pattern: <pattern>. Please reduce pattern length."
         ]
+      },
+      "SECONDS_FRACTION" : {
+        "message" : [
+          "Cannot detect a seconds fraction pattern of variable length. Please make sure the pattern contains 'S', and does not contain illegal characters."
+        ]
       }
     },
     "sqlState" : "22007"
@@ -2126,6 +2332,12 @@
     ],
     "sqlState" : "42001"
   },
+  "INVALID_EXTERNAL_TYPE" : {
+    "message" : [
+      "The external type <externalType> is not valid for the type <type> at the expression <expr>."
+    ],
+    "sqlState" : "42K0N"
+  },
   "INVALID_EXTRACT_BASE_FIELD_TYPE" : {
     "message" : [
       "Can't extract a value from <base>. Need a complex type [STRUCT, ARRAY, MAP] but got <other>."
@@ -2215,7 +2427,8 @@
   },
   "INVALID_FRACTION_OF_SECOND" : {
     "message" : [
-      "The fraction of sec must be zero. Valid range is [0, 60]. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "Valid range for seconds is [0, 60] (inclusive), but the provided value is <secAndMicros>. To avoid this error, use `try_make_timestamp`, which returns NULL on error.",
+      "If you do not want to use the session default timestamp version of this function, use `try_make_timestamp_ntz` or `try_make_timestamp_ltz`."
     ],
     "sqlState" : "22023"
   },
@@ -2315,6 +2528,11 @@
           "Uncaught arithmetic exception while parsing '<input>'."
         ]
       },
+      "DAY_TIME_PARSING" : {
+        "message" : [
+          "Error parsing interval day-time string: <msg>."
+        ]
+      },
       "INPUT_IS_EMPTY" : {
         "message" : [
           "Interval string cannot be empty."
@@ -2325,6 +2543,11 @@
           "Interval string cannot be null."
         ]
       },
+      "INTERVAL_PARSING" : {
+        "message" : [
+          "Error parsing interval <interval> string."
+        ]
+      },
       "INVALID_FRACTION" : {
         "message" : [
           "<unit> cannot have fractional part."
@@ -2360,19 +2583,50 @@
           "Expect a unit name after <word> but hit EOL."
         ]
       },
+      "SECOND_NANO_FORMAT" : {
+        "message" : [
+          "Interval string does not match second-nano format of ss.nnnnnnnnn."
+        ]
+      },
+      "TIMEZONE_INTERVAL_OUT_OF_RANGE" : {
+        "message" : [
+          "The interval value must be in the range of [-18, +18] hours with second precision."
+        ]
+      },
       "UNKNOWN_PARSING_ERROR" : {
         "message" : [
           "Unknown error when parsing <word>."
         ]
       },
+      "UNMATCHED_FORMAT_STRING" : {
+        "message" : [
+          "Interval string does not match <intervalStr> format of <supportedFormat> when cast to <typeName>: <input>."
+        ]
+      },
+      "UNMATCHED_FORMAT_STRING_WITH_NOTICE" : {
+        "message" : [
+          "Interval string does not match <intervalStr> format of <supportedFormat> when cast to <typeName>: <input>. Set \"spark.sql.legacy.fromDayTimeString.enabled\" to \"true\" to restore the behavior before Spark 3.0."
+        ]
+      },
       "UNRECOGNIZED_NUMBER" : {
         "message" : [
           "Unrecognized number <number>."
         ]
+      },
+      "UNSUPPORTED_FROM_TO_EXPRESSION" : {
+        "message" : [
+          "Cannot support (interval '<input>' <from> to <to>) expression."
+        ]
       }
     },
     "sqlState" : "22006"
   },
+  "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION" : {
+    "message" : [
+      "Cannot add an interval to a date because its microseconds part is not 0. If necessary set <ansiConfig> to \"false\" to bypass this error."
+    ],
+    "sqlState" : "22006"
+  },
   "INVALID_INVERSE_DISTRIBUTION_FUNCTION" : {
     "message" : [
       "Invalid inverse distribution function <funcName>."
@@ -2396,6 +2650,13 @@
     },
     "sqlState" : "42K0K"
   },
+  "INVALID_JAVA_IDENTIFIER_AS_FIELD_NAME" : {
+    "message" : [
+      "<fieldName> is not a valid identifier of Java and cannot be used as field name",
+      "<walkedTypePath>."
+    ],
+    "sqlState" : "46121"
+  },
   "INVALID_JOIN_TYPE_FOR_JOINWITH" : {
     "message" : [
       "Invalid join type in joinWith: <joinType>."
@@ -2414,6 +2675,12 @@
     ],
     "sqlState" : "2203G"
   },
+  "INVALID_JSON_RECORD_TYPE" : {
+    "message" : [
+      "Detected an invalid type of a JSON record while inferring a common schema in the mode <failFastMode>. Expected a STRUCT type, but found <invalidType>."
+    ],
+    "sqlState" : "22023"
+  },
   "INVALID_JSON_ROOT_FIELD" : {
     "message" : [
       "Cannot convert JSON root field to target Spark type."
@@ -2432,6 +2699,24 @@
     ],
     "sqlState" : "F0000"
   },
+  "INVALID_LABEL_USAGE" : {
+    "message" : [
+      "The usage of the label <labelName> is invalid."
+    ],
+    "subClass" : {
+      "DOES_NOT_EXIST" : {
+        "message" : [
+          "Label was used in the <statementType> statement, but the label does not belong to any surrounding block."
+        ]
+      },
+      "ITERATE_IN_COMPOUND" : {
+        "message" : [
+          "ITERATE statement cannot be used with a label that belongs to a compound (BEGIN...END) body."
+        ]
+      }
+    },
+    "sqlState" : "42K0L"
+  },
   "INVALID_LAMBDA_FUNCTION_CALL" : {
     "message" : [
       "Invalid lambda function call."
@@ -2598,6 +2883,11 @@
           "expects an integer value in [0, <upper>), but got <invalidValue>."
         ]
       },
+      "BOOLEAN" : {
+        "message" : [
+          "expects a boolean literal, but got <invalidValue>."
+        ]
+      },
       "CHARSET" : {
         "message" : [
           "expects one of the <charsets>, but got <charset>."
@@ -2608,11 +2898,31 @@
           "expects one of the units without quotes YEAR, QUARTER, MONTH, WEEK, DAY, DAYOFYEAR, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND, but got the string literal <invalidValue>."
         ]
       },
+      "DOUBLE" : {
+        "message" : [
+          "expects an double literal, but got <invalidValue>."
+        ]
+      },
+      "DTYPE" : {
+        "message" : [
+          "Unsupported dtype: <invalidValue>. Valid values: float64, float32."
+        ]
+      },
+      "INTEGER" : {
+        "message" : [
+          "expects an integer literal, but got <invalidValue>."
+        ]
+      },
       "LENGTH" : {
         "message" : [
           "Expects `length` greater than or equal to 0, but got <length>."
         ]
       },
+      "LONG" : {
+        "message" : [
+          "expects a long literal, but got <invalidValue>."
+        ]
+      },
       "NULL" : {
         "message" : [
           "expects a non-NULL value."
@@ -2633,6 +2943,11 @@
           "Expects a positive or a negative value for `start`, but got 0."
         ]
       },
+      "STRING" : {
+        "message" : [
+          "expects a string literal, but got <invalidValue>."
+        ]
+      },
       "ZERO_INDEX" : {
         "message" : [
           "expects %1$, %2$ and so on, but got %0$."
@@ -2665,6 +2980,12 @@
     },
     "sqlState" : "42601"
   },
+  "INVALID_PARTITION_VALUE" : {
+    "message" : [
+      "Failed to cast value <value> to data type <dataType> for partition column <columnName>. Ensure the value matches the expected data type for this partition column."
+    ],
+    "sqlState" : "42846"
+  },
   "INVALID_PROPERTY_KEY" : {
     "message" : [
       "<key> is an invalid property key, please use quotes, e.g. SET <key>=<value>."
@@ -2683,6 +3004,18 @@
     ],
     "sqlState" : "42613"
   },
+  "INVALID_REGEXP_REPLACE" : {
+    "message" : [
+      "Could not perform regexp_replace for source = \"<source>\", pattern = \"<pattern>\", replacement = \"<replacement>\" and position = <position>."
+    ],
+    "sqlState" : "22023"
+  },
+  "INVALID_RESET_COMMAND_FORMAT" : {
+    "message" : [
+      "Expected format is 'RESET' or 'RESET key'. If you want to include special characters in key, please use quotes, e.g., RESET `key`."
+    ],
+    "sqlState" : "42000"
+  },
   "INVALID_SAVE_MODE" : {
     "message" : [
       "The specified save mode <mode> is invalid. Valid save modes include \"append\", \"overwrite\", \"ignore\", \"error\", \"errorifexists\", and \"default\"."
@@ -2799,7 +3132,7 @@
       },
       "MULTI_PART_NAME" : {
         "message" : [
-          "<statement> with multiple part function name(<funcName>) is not allowed."
+          "<statement> with multiple part name(<name>) is not allowed."
         ]
       },
       "OPTION_IS_INVALID" : {
@@ -2837,6 +3170,11 @@
           "Unsupported function name <funcName>."
         ]
       },
+      "UNSUPPORTED_SQL_STATEMENT" : {
+        "message" : [
+          "Unsupported SQL statement: <sqlText>."
+        ]
+      },
       "VARIABLE_TYPE_OR_DEFAULT_REQUIRED" : {
         "message" : [
           "The definition of a SQL variable requires either a datatype or a DEFAULT clause.",
@@ -2877,6 +3215,12 @@
     ],
     "sqlState" : "42K0F"
   },
+  "INVALID_TIMEZONE" : {
+    "message" : [
+      "The timezone: <timeZone> is invalid. The timezone must be either a region-based zone ID or a zone offset. Region IDs must have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in the format '(+|-)HH', '(+|-)HH:mm’ or '(+|-)HH:mm:ss', e.g '-08' , '+01:00' or '-13:33:33', and must be in the range from -18:00 to +18:00. 'Z' and 'UTC' are accepted as synonyms for '+00:00'."
+    ],
+    "sqlState" : "22009"
+  },
   "INVALID_TIME_TRAVEL_SPEC" : {
     "message" : [
       "Cannot specify both version and timestamp when time travelling the table."
@@ -2925,7 +3269,7 @@
   },
   "INVALID_URL" : {
     "message" : [
-      "The url is invalid: <url>. If necessary set <ansiConfig> to \"false\" to bypass this error."
+      "The url is invalid: <url>. Use `try_parse_url` to tolerate invalid URL and return NULL instead."
     ],
     "sqlState" : "22P02"
   },
@@ -2941,6 +3285,24 @@
     ],
     "sqlState" : "22029"
   },
+  "INVALID_VARIABLE_DECLARATION" : {
+    "message" : [
+      "Invalid variable declaration."
+    ],
+    "subClass" : {
+      "NOT_ALLOWED_IN_SCOPE" : {
+        "message" : [
+          "Declaration of the variable <varName> is not allowed in this scope."
+        ]
+      },
+      "ONLY_AT_BEGINNING" : {
+        "message" : [
+          "Variable <varName> can only be declared at the beginning of the compound."
+        ]
+      }
+    },
+    "sqlState" : "42K0M"
+  },
   "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : {
     "message" : [
       "Variable type must be string type but got <varType>."
@@ -2983,6 +3345,12 @@
     ],
     "sqlState" : "22023"
   },
+  "INVALID_VARIANT_SHREDDING_SCHEMA" : {
+    "message" : [
+      "The schema `<schema>` is not a valid variant shredding schema."
+    ],
+    "sqlState" : "22023"
+  },
   "INVALID_WHERE_CONDITION" : {
     "message" : [
       "The WHERE condition <condition> contains invalid expressions: <expressionList>.",
@@ -3043,6 +3411,12 @@
     ],
     "sqlState" : "42K0L"
   },
+  "LABEL_ALREADY_EXISTS" : {
+    "message" : [
+      "The label <label> already exists. Choose another name or rename the existing label."
+    ],
+    "sqlState" : "42K0L"
+  },
   "LOAD_DATA_PATH_NOT_EXISTS" : {
     "message" : [
       "LOAD DATA input path does not exist: <path>."
@@ -3116,6 +3490,12 @@
     ],
     "sqlState" : "23K01"
   },
+  "MERGE_WITHOUT_WHEN" : {
+    "message" : [
+      "There must be at least one WHEN clause in a MERGE statement."
+    ],
+    "sqlState" : "42601"
+  },
   "MISSING_AGGREGATION" : {
     "message" : [
       "The non-aggregating expression <expression> is based on columns which are not participating in the GROUP BY clause.",
@@ -3142,12 +3522,36 @@
     },
     "sqlState" : "XX000"
   },
+  "MISSING_DATABASE_FOR_V1_SESSION_CATALOG" : {
+    "message" : [
+      "Database name is not specified in the v1 session catalog. Please ensure to provide a valid database name when interacting with the v1 catalog."
+    ],
+    "sqlState" : "3F000"
+  },
   "MISSING_GROUP_BY" : {
     "message" : [
       "The query does not include a GROUP BY clause. Add GROUP BY or turn it into the window functions using OVER clauses."
     ],
     "sqlState" : "42803"
   },
+  "MISSING_TIMEOUT_CONFIGURATION" : {
+    "message" : [
+      "The operation has timed out, but no timeout duration is configured. To set a processing time-based timeout, use 'GroupState.setTimeoutDuration()' in your 'mapGroupsWithState' or 'flatMapGroupsWithState' operation. For event-time-based timeout, use 'GroupState.setTimeoutTimestamp()' and define a watermark using 'Dataset.withWatermark()'."
+    ],
+    "sqlState" : "HY000"
+  },
+  "MISSING_WINDOW_SPECIFICATION" : {
+    "message" : [
+      "Window specification is not defined in the WINDOW clause for <windowName>. For more information about WINDOW clauses, please refer to '<docroot>/sql-ref-syntax-qry-select-window.html'."
+    ],
+    "sqlState" : "42P20"
+  },
+  "MULTIPLE_QUERY_RESULT_CLAUSES_WITH_PIPE_OPERATORS" : {
+    "message" : [
+      "<clause1> and <clause2> cannot coexist in the same SQL pipe operator using '|>'. Please separate the multiple result clauses into separate pipe operators and then retry the query again."
+    ],
+    "sqlState" : "42000"
+  },
   "MULTIPLE_TIME_TRAVEL_SPEC" : {
     "message" : [
       "Cannot specify time travel in both the time travel clause and options."
@@ -3186,6 +3590,12 @@
     ],
     "sqlState" : "0A000"
   },
+  "NEGATIVE_VALUES_IN_FREQUENCY_EXPRESSION" : {
+    "message" : [
+      "Found the negative value in <frequencyExpression>: <negativeValue>, but expected a positive integral value."
+    ],
+    "sqlState" : "22003"
+  },
   "NESTED_AGGREGATE_FUNCTION" : {
     "message" : [
       "It is not allowed to use an aggregate function in the argument of another aggregate function. Please use the inner aggregate function in a sub-query."
@@ -3270,6 +3680,19 @@
     },
     "sqlState" : "42601"
   },
+  "NOT_ALLOWED_IN_PIPE_OPERATOR_WHERE" : {
+    "message" : [
+      "Not allowed in the pipe WHERE clause:"
+    ],
+    "subClass" : {
+      "WINDOW_CLAUSE" : {
+        "message" : [
+          "WINDOW clause."
+        ]
+      }
+    },
+    "sqlState" : "42601"
+  },
   "NOT_A_CONSTANT_STRING" : {
     "message" : [
       "The expression <expr> used for the routine or clause <name> must be a constant STRING which is NOT NULL."
@@ -3474,12 +3897,59 @@
     ],
     "sqlState" : "42805"
   },
+  "PARQUET_CONVERSION_FAILURE" : {
+    "message" : [
+      "Unable to create a Parquet converter for the data type <dataType> whose Parquet type is <parquetType>."
+    ],
+    "subClass" : {
+      "DECIMAL" : {
+        "message" : [
+          "Parquet DECIMAL type can only be backed by INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BINARY."
+        ]
+      },
+      "UNSUPPORTED" : {
+        "message" : [
+          "Please modify the conversion making sure it is supported."
+        ]
+      },
+      "WITHOUT_DECIMAL_METADATA" : {
+        "message" : [
+          "Please read this column/field as Spark BINARY type."
+        ]
+      }
+    },
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_ILLEGAL" : {
+    "message" : [
+      "Illegal Parquet type: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_RECOGNIZED" : {
+    "message" : [
+      "Unrecognized Parquet type: <field>."
+    ],
+    "sqlState" : "42846"
+  },
+  "PARQUET_TYPE_NOT_SUPPORTED" : {
+    "message" : [
+      "Parquet type not yet supported: <parquetType>."
+    ],
+    "sqlState" : "42846"
+  },
   "PARSE_EMPTY_STATEMENT" : {
     "message" : [
       "Syntax error, unexpected empty statement."
     ],
     "sqlState" : "42617"
   },
+  "PARSE_MODE_UNSUPPORTED" : {
+    "message" : [
+      "The function <funcName> doesn't support the <mode> mode. Acceptable modes are PERMISSIVE and FAILFAST."
+    ],
+    "sqlState" : "42601"
+  },
   "PARSE_SYNTAX_ERROR" : {
     "message" : [
       "Syntax error at or near <error><hint>."
@@ -3501,6 +3971,18 @@
     ],
     "sqlState" : "428FT"
   },
+  "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA" : {
+    "message" : [
+      "Partition column <column> not found in schema <schema>. Please provide the existing column for partitioning."
+    ],
+    "sqlState" : "42000"
+  },
+  "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY" : {
+    "message" : [
+      "The expression <expression> must be inside 'partitionedBy'."
+    ],
+    "sqlState" : "42S23"
+  },
   "PATH_ALREADY_EXISTS" : {
     "message" : [
       "Path <outputPath> already exists. Set mode as \"overwrite\" to overwrite the existing path."
@@ -3513,6 +3995,18 @@
     ],
     "sqlState" : "42K03"
   },
+  "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION" : {
+    "message" : [
+      "Non-grouping expression <expr> is provided as an argument to the |> AGGREGATE pipe operator but does not contain any aggregate function; please update it to include an aggregate function and then retry the query again."
+    ],
+    "sqlState" : "0A000"
+  },
+  "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION" : {
+    "message" : [
+      "Aggregate function <expr> is not allowed when using the pipe operator |> <clause> clause; please use the pipe operator |> AGGREGATE clause instead."
+    ],
+    "sqlState" : "0A000"
+  },
   "PIVOT_VALUE_DATA_TYPE_MISMATCH" : {
     "message" : [
       "Invalid pivot value '<value>': value data type <valueType> does not match pivot column data type <pivotType>."
@@ -3567,6 +4061,13 @@
     ],
     "sqlState" : "42K0G"
   },
+  "PROTOBUF_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE" : {
+    "message" : [
+      "Cannot call the <functionName> SQL function because the Protobuf data source is not loaded.",
+      "Please restart your job or session with the 'spark-protobuf' package loaded, such as by using the --packages argument on the command line, and then retry your query or command again."
+    ],
+    "sqlState" : "22KD3"
+  },
   "PROTOBUF_TYPE_NOT_SUPPORT" : {
     "message" : [
       "Protobuf type not yet supported: <protobufType>."
@@ -3587,7 +4088,7 @@
   },
   "RECURSIVE_PROTOBUF_SCHEMA" : {
     "message" : [
-      "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 0 to 10. Going beyond 10 levels of recursion is not allowed."
+      "Found recursive reference in Protobuf schema, which can not be processed by Spark by default: <fieldDescriptor>. try setting the option `recursive.fields.max.depth` 1 to 10. Going beyond 10 levels of recursion is not allowed."
     ],
     "sqlState" : "42K0G"
   },
@@ -3629,8 +4130,8 @@
   },
   "ROUTINE_ALREADY_EXISTS" : {
     "message" : [
-      "Cannot create the routine <routineName> because it already exists.",
-      "Choose a different name, drop or replace the existing routine, or add the IF NOT EXISTS clause to tolerate a pre-existing routine."
+      "Cannot create the <newRoutineType> <routineName> because a <existingRoutineType> of that name already exists.",
+      "Choose a different name, drop or replace the existing <existingRoutineType>, or add the IF NOT EXISTS clause to tolerate a pre-existing <newRoutineType>."
     ],
     "sqlState" : "42723"
   },
@@ -3648,12 +4149,30 @@
     ],
     "sqlState" : "21000"
   },
+  "ROW_VALUE_IS_NULL" : {
+    "message" : [
+      "Found NULL in a row at the index <index>, expected a non-NULL value."
+    ],
+    "sqlState" : "22023"
+  },
   "RULE_ID_NOT_FOUND" : {
     "message" : [
       "Not found an id for the rule name \"<ruleName>\". Please modify RuleIdCollection.scala if you are adding a new rule."
     ],
     "sqlState" : "22023"
   },
+  "SCALAR_FUNCTION_NOT_COMPATIBLE" : {
+    "message" : [
+      "ScalarFunction <scalarFunc> not overrides method 'produceResult(InternalRow)' with custom implementation."
+    ],
+    "sqlState" : "42K0O"
+  },
+  "SCALAR_FUNCTION_NOT_FULLY_IMPLEMENTED" : {
+    "message" : [
+      "ScalarFunction <scalarFunc> not implements or overrides method 'produceResult(InternalRow)'."
+    ],
+    "sqlState" : "42K0P"
+  },
   "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION" : {
     "message" : [
       "The correlated scalar subquery '<sqlExpr>' is neither present in GROUP BY, nor in an aggregate function.",
@@ -3701,6 +4220,12 @@
     ],
     "sqlState" : "42K08"
   },
+  "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE" : {
+    "message" : [
+      "SHOW COLUMNS with conflicting namespaces: <namespaceA> != <namespaceB>."
+    ],
+    "sqlState" : "42K05"
+  },
   "SORT_BY_WITHOUT_BUCKETING" : {
     "message" : [
       "sortBy must be used together with bucketBy."
@@ -3765,9 +4290,9 @@
     ],
     "sqlState" : "42802"
   },
-  "STATEFUL_PROCESSOR_CANNOT_REINITIALIZE_STATE_ON_KEY" : {
+  "STATEFUL_PROCESSOR_DUPLICATE_STATE_VARIABLE_DEFINED" : {
     "message" : [
-      "Cannot re-initialize state on the same grouping key during initial state handling for stateful processor. Invalid grouping key=<groupingKey>."
+      "State variable with name <stateVarName> has already been defined in the StatefulProcessor."
     ],
     "sqlState" : "42802"
   },
@@ -3783,6 +4308,12 @@
     ],
     "sqlState" : "42802"
   },
+  "STATEFUL_PROCESSOR_UNKNOWN_TIME_MODE" : {
+    "message" : [
+      "Unknown time mode <timeMode>. Accepted timeMode modes are 'none', 'processingTime', 'eventTime'"
+    ],
+    "sqlState" : "42802"
+  },
   "STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS" : {
     "message" : [
       "Failed to create column family with unsupported starting character and name=<colFamilyName>."
@@ -3820,12 +4351,24 @@
     ],
     "sqlState" : "42802"
   },
+  "STATE_STORE_INVALID_CONFIG_AFTER_RESTART" : {
+    "message" : [
+      "Cannot change <configName> from <oldConfig> to <newConfig> between restarts. Please set <configName> to <oldConfig>, or restart with a new checkpoint directory."
+    ],
+    "sqlState" : "42K06"
+  },
   "STATE_STORE_INVALID_PROVIDER" : {
     "message" : [
       "The given State Store Provider <inputClass> does not extend org.apache.spark.sql.execution.streaming.state.StateStoreProvider."
     ],
     "sqlState" : "42K06"
   },
+  "STATE_STORE_INVALID_VARIABLE_TYPE_CHANGE" : {
+    "message" : [
+      "Cannot change <stateVarName> to <newType> between query restarts. Please set <stateVarName> to <oldType>, or restart with a new checkpoint directory."
+    ],
+    "sqlState" : "42K06"
+  },
   "STATE_STORE_KEY_ROW_FORMAT_VALIDATION_FAILURE" : {
     "message" : [
       "The streaming query failed to validate written state for key row.",
@@ -3925,6 +4468,14 @@
     ],
     "sqlState" : "42613"
   },
+  "STDS_FAILED_TO_READ_OPERATOR_METADATA" : {
+    "message" : [
+      "Failed to read the operator metadata for checkpointLocation=<checkpointLocation> and batchId=<batchId>.",
+      "Either the file does not exist, or the file is corrupted.",
+      "Rerun the streaming query to construct the operator metadata, and report to the corresponding communities or vendors if the error persists."
+    ],
+    "sqlState" : "42K03"
+  },
   "STDS_FAILED_TO_READ_STATE_SCHEMA" : {
     "message" : [
       "Failed to read the state schema. Either the file does not exist, or the file is corrupted. options: <sourceOptions>.",
@@ -3988,6 +4539,29 @@
     ],
     "sqlState" : "42601"
   },
+  "STREAMING_OUTPUT_MODE" : {
+    "message" : [
+      "Invalid streaming output mode: <outputMode>."
+    ],
+    "subClass" : {
+      "INVALID" : {
+        "message" : [
+          "Accepted output modes are 'Append', 'Complete', 'Update'."
+        ]
+      },
+      "UNSUPPORTED_DATASOURCE" : {
+        "message" : [
+          "This output mode is not supported in Data Source <className>."
+        ]
+      },
+      "UNSUPPORTED_OPERATION" : {
+        "message" : [
+          "This output mode is not supported for <operation> on streaming DataFrames/DataSets."
+        ]
+      }
+    },
+    "sqlState" : "42KDE"
+  },
   "STREAMING_PYTHON_RUNNER_INITIALIZATION_FAILURE" : {
     "message" : [
       "Streaming Runner initialization failed, returned <resFromPython>. Cause: <msg>"
@@ -4094,6 +4668,42 @@
     ],
     "sqlState" : "428EK"
   },
+  "TRAILING_COMMA_IN_SELECT" : {
+    "message" : [
+      "Trailing comma detected in SELECT clause. Remove the trailing comma before the FROM clause."
+    ],
+    "sqlState" : "42601"
+  },
+  "TRANSPOSE_EXCEED_ROW_LIMIT" : {
+    "message" : [
+      "Number of rows exceeds the allowed limit of <maxValues> for TRANSPOSE. If this was intended, set <config> to at least the current row count."
+    ],
+    "sqlState" : "54006"
+  },
+  "TRANSPOSE_INVALID_INDEX_COLUMN" : {
+    "message" : [
+      "Invalid index column for TRANSPOSE because: <reason>"
+    ],
+    "sqlState" : "42804"
+  },
+  "TRANSPOSE_NO_LEAST_COMMON_TYPE" : {
+    "message" : [
+      "Transpose requires non-index columns to share a least common type, but <dt1> and <dt2> do not."
+    ],
+    "sqlState" : "42K09"
+  },
+  "TUPLE_IS_EMPTY" : {
+    "message" : [
+      "Due to Scala's limited support of tuple, empty tuple is not supported."
+    ],
+    "sqlState" : "22004"
+  },
+  "TUPLE_SIZE_EXCEEDS_LIMIT" : {
+    "message" : [
+      "Due to Scala's limited support of tuple, tuples with more than 22 elements are not supported."
+    ],
+    "sqlState" : "54011"
+  },
   "UDTF_ALIAS_NUMBER_MISMATCH" : {
     "message" : [
       "The number of aliases supplied in the AS clause does not match the number of columns output by the UDTF.",
@@ -4168,6 +4778,12 @@
     ],
     "sqlState" : "42846"
   },
+  "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT" : {
+    "message" : [
+      "Unknown primitive type with id <id> was found in a variant value."
+    ],
+    "sqlState" : "22023"
+  },
   "UNKNOWN_PROTOBUF_MESSAGE_TYPE" : {
     "message" : [
       "Attempting to treat <descriptorName> as a Message, but it was <containingType>."
@@ -4210,6 +4826,12 @@
     ],
     "sqlState" : "42704"
   },
+  "UNRECOGNIZED_STATISTIC" : {
+    "message" : [
+      "The statistic <stats> is not recognized. Valid statistics include `count`, `count_distinct`, `approx_count_distinct`, `mean`, `stddev`, `min`, `max`, and percentile values. Percentile must be a numeric value followed by '%', within the range 0% to 100%."
+    ],
+    "sqlState" : "42704"
+  },
   "UNRESOLVABLE_TABLE_VALUED_FUNCTION" : {
     "message" : [
       "Could not resolve <name> to a table-valued function.",
@@ -4359,6 +4981,64 @@
     },
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_CONNECT_FEATURE" : {
+    "message" : [
+      "Feature is not supported in Spark Connect:"
+    ],
+    "subClass" : {
+      "DATASET_QUERY_EXECUTION" : {
+        "message" : [
+          "Access to the Dataset Query Execution. This is server side developer API."
+        ]
+      },
+      "RDD" : {
+        "message" : [
+          "Resilient Distributed Datasets (RDDs)."
+        ]
+      },
+      "SESSION_BASE_RELATION_TO_DATAFRAME" : {
+        "message" : [
+          "Invoking SparkSession 'baseRelationToDataFrame'. This is server side developer API"
+        ]
+      },
+      "SESSION_EXECUTE_COMMAND" : {
+        "message" : [
+          "Invoking SparkSession 'executeCommand'."
+        ]
+      },
+      "SESSION_EXPERIMENTAL_METHODS" : {
+        "message" : [
+          "Access to SparkSession Experimental (methods). This is server side developer API"
+        ]
+      },
+      "SESSION_LISTENER_MANAGER" : {
+        "message" : [
+          "Access to the SparkSession Listener Manager. This is server side developer API"
+        ]
+      },
+      "SESSION_SESSION_STATE" : {
+        "message" : [
+          "Access to the SparkSession Session State. This is server side developer API."
+        ]
+      },
+      "SESSION_SHARED_STATE" : {
+        "message" : [
+          "Access to the SparkSession Shared State. This is server side developer API."
+        ]
+      },
+      "SESSION_SPARK_CONTEXT" : {
+        "message" : [
+          "Access to the SparkContext."
+        ]
+      },
+      "SESSION_SQL_CONTEXT" : {
+        "message" : [
+          "Access to the SparkSession SQL Context."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY" : {
     "message" : [
       "Unsupported data source type for direct query on files: <dataSourceType>"
@@ -4473,6 +5153,11 @@
           "<functionName> with AES-<mode> does not support initialization vectors (IVs)."
         ]
       },
+      "ALTER_TABLE_SERDE_FOR_DATASOURCE_TABLE" : {
+        "message" : [
+          "ALTER TABLE SET SERDE is not supported for table <tableName> created with the datasource API. Consider using an external Hive table or updating the table properties with compatible options for your table format."
+        ]
+      },
       "ANALYZE_UNCACHED_TEMP_VIEW" : {
         "message" : [
           "The ANALYZE TABLE FOR COLUMNS command can operate on temporary views that have been cached already. Consider to cache the view <viewName>."
@@ -4493,9 +5178,9 @@
           "Catalog <catalogName> does not support <operation>."
         ]
       },
-      "COLLATION" : {
+      "CLAUSE_WITH_PIPE_OPERATORS" : {
         "message" : [
-          "Collation is not yet supported."
+          "The SQL pipe operator syntax using |> does not support <clauses>."
         ]
       },
       "COMBINATION_QUERY_RESULT_CLAUSES" : {
@@ -4608,6 +5293,11 @@
           "Invalid partitioning: <cols> is missing or is in a map or array."
         ]
       },
+      "PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE" : {
+        "message" : [
+          "The SQL pipe operator syntax with aggregation (using |> AGGREGATE) does not support <case>."
+        ]
+      },
       "PIVOT_AFTER_GROUP_BY" : {
         "message" : [
           "PIVOT clause following a GROUP BY clause. Consider pushing the GROUP BY into a subquery."
@@ -4633,6 +5323,18 @@
           "Python UDF in the ON clause of a <joinType> JOIN. In case of an INNER JOIN consider rewriting to a CROSS JOIN with a WHERE clause."
         ]
       },
+      "QUERY_ONLY_CORRUPT_RECORD_COLUMN" : {
+        "message" : [
+          "Queries from raw JSON/CSV/XML files are disallowed when the",
+          "referenced columns only include the internal corrupt record column",
+          "(named _corrupt_record by default). For example:",
+          "spark.read.schema(schema).json(file).filter($\"_corrupt_record\".isNotNull).count()",
+          "and spark.read.schema(schema).json(file).select(\"_corrupt_record\").show().",
+          "Instead, you can cache or save the parsed results and then send the same query.",
+          "For example, val df = spark.read.schema(schema).json(file).cache() and then",
+          "df.filter($\"_corrupt_record\".isNotNull).count()."
+        ]
+      },
       "REMOVE_NAMESPACE_COMMENT" : {
         "message" : [
           "Remove a comment from the namespace <namespace>."
@@ -4668,6 +5370,16 @@
           "<variableName> is a VARIABLE and cannot be updated using the SET statement. Use SET VARIABLE <variableName> = ... instead."
         ]
       },
+      "SQL_SCRIPTING" : {
+        "message" : [
+          "SQL Scripting is under development and not all features are supported. SQL Scripting enables users to write procedural SQL including control flow and error handling. To enable existing features set <sqlScriptingEnabled> to `true`."
+        ]
+      },
+      "SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS" : {
+        "message" : [
+          "Positional parameters are not supported with SQL Scripting."
+        ]
+      },
       "STATE_STORE_MULTIPLE_COLUMN_FAMILIES" : {
         "message" : [
           "Creating multiple column families with <stateStoreProvider> is not supported."
@@ -4712,6 +5424,21 @@
         "message" : [
           "TRANSFORM with SERDE is only supported in hive mode."
         ]
+      },
+      "TRIM_COLLATION" : {
+        "message" : [
+          "TRIM specifier in the collation."
+        ]
+      },
+      "UPDATE_COLUMN_NULLABILITY" : {
+        "message" : [
+          "Update column nullability for MySQL and MS SQL Server."
+        ]
+      },
+      "WRITE_FOR_BINARY_SOURCE" : {
+        "message" : [
+          "Write for the binary file data source."
+        ]
       }
     },
     "sqlState" : "0A000"
@@ -4830,24 +5557,73 @@
     },
     "sqlState" : "42902"
   },
-  "UNSUPPORTED_SAVE_MODE" : {
+  "UNSUPPORTED_PARTITION_TRANSFORM" : {
+    "message" : [
+      "Unsupported partition transform: <transform>. The supported transforms are `identity`, `bucket`, and `clusterBy`. Ensure your transform expression uses one of these."
+    ],
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_SAVE_MODE" : {
+    "message" : [
+      "The save mode <saveMode> is not supported for:"
+    ],
+    "subClass" : {
+      "EXISTENT_PATH" : {
+        "message" : [
+          "an existent path."
+        ]
+      },
+      "NON_EXISTENT_PATH" : {
+        "message" : [
+          "a non-existent path."
+        ]
+      }
+    },
+    "sqlState" : "0A000"
+  },
+  "UNSUPPORTED_SHOW_CREATE_TABLE" : {
     "message" : [
-      "The save mode <saveMode> is not supported for:"
+      "Unsupported a SHOW CREATE TABLE command."
     ],
     "subClass" : {
-      "EXISTENT_PATH" : {
+      "ON_DATA_SOURCE_TABLE_WITH_AS_SERDE" : {
         "message" : [
-          "an existent path."
+          "The table <tableName> is a Spark data source table. Please use SHOW CREATE TABLE without AS SERDE instead."
         ]
       },
-      "NON_EXISTENT_PATH" : {
+      "ON_TEMPORARY_VIEW" : {
         "message" : [
-          "a non-existent path."
+          "The command is not supported on a temporary view <tableName>."
+        ]
+      },
+      "ON_TRANSACTIONAL_HIVE_TABLE" : {
+        "message" : [
+          "Failed to execute the command against transactional Hive table <tableName>.",
+          "Please use SHOW CREATE TABLE <tableName> AS SERDE to show Hive DDL instead."
+        ]
+      },
+      "WITH_UNSUPPORTED_FEATURE" : {
+        "message" : [
+          "Failed to execute the command against table/view <tableName> which is created by Hive and uses the following unsupported features",
+          "<unsupportedFeatures>"
+        ]
+      },
+      "WITH_UNSUPPORTED_SERDE_CONFIGURATION" : {
+        "message" : [
+          "Failed to execute the command against the table <tableName> which is created by Hive and uses the following unsupported serde configuration",
+          "<configs>",
+          "Please use SHOW CREATE TABLE <tableName> AS SERDE to show Hive DDL instead."
         ]
       }
     },
     "sqlState" : "0A000"
   },
+  "UNSUPPORTED_STREAMING_OPERATOR_WITHOUT_WATERMARK" : {
+    "message" : [
+      "<outputMode> output mode not supported for <statefulOperator> on streaming DataFrames/DataSets without watermark."
+    ],
+    "sqlState" : "0A000"
+  },
   "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY" : {
     "message" : [
       "Unsupported subquery expression:"
@@ -4906,6 +5682,11 @@
           "<treeNode>"
         ]
       },
+      "SCALAR_SUBQUERY_IN_VALUES" : {
+        "message" : [
+          "Scalar subqueries in the VALUES clause."
+        ]
+      },
       "UNSUPPORTED_CORRELATED_EXPRESSION_IN_JOIN_CONDITION" : {
         "message" : [
           "Correlated subqueries in the join predicate cannot reference both join inputs:",
@@ -5092,11 +5873,6 @@
       "The number of inserted values cannot match the fields."
     ]
   },
-  "_LEGACY_ERROR_TEMP_0008" : {
-    "message" : [
-      "There must be at least one WHEN clause in a MERGE statement."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_0012" : {
     "message" : [
       "DISTRIBUTE BY is not supported."
@@ -5137,11 +5913,6 @@
       "The value of from-to unit must be a string."
     ]
   },
-  "_LEGACY_ERROR_TEMP_0028" : {
-    "message" : [
-      "Intervals FROM <from> TO <to> are not supported."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_0029" : {
     "message" : [
       "Cannot mix year-month and day-time fields: <literal>."
@@ -5177,26 +5948,6 @@
       "It is not allowed to add catalog/namespace prefix <quoted> to the table name in CACHE TABLE AS SELECT."
     ]
   },
-  "_LEGACY_ERROR_TEMP_0038" : {
-    "message" : [
-      "CTE definition can't have duplicate names: <duplicateNames>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_0039" : {
-    "message" : [
-      "Unsupported SQL statement."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_0043" : {
-    "message" : [
-      "Expected format is 'RESET' or 'RESET key'. If you want to include special characters in key, please use quotes, e.g., RESET `key`."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_0044" : {
-    "message" : [
-      "The interval value must be in the range of [-18, +18] hours with second precision."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_0045" : {
     "message" : [
       "Invalid time zone displacement value."
@@ -5273,11 +6024,6 @@
       "Try moving this class out of its parent class."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1004" : {
-    "message" : [
-      "Window specification <windowName> is not defined in the WINDOW clause."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1005" : {
     "message" : [
       "<expr> doesn't show up in the GROUP BY list <groupByAliases>."
@@ -5394,11 +6140,6 @@
       "<hintName> Hint expects a partition number as a parameter."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1049" : {
-    "message" : [
-      "Syntax error in attribute name: <name>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1050" : {
     "message" : [
       "Can only star expand struct data types. Attribute: `<attributes>`."
@@ -5409,21 +6150,6 @@
       "ADD COLUMN with v1 tables cannot specify NOT NULL."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1053" : {
-    "message" : [
-      "ALTER COLUMN with v1 tables cannot specify NOT NULL."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1054" : {
-    "message" : [
-      "ALTER COLUMN cannot find column <colName> in v1 table. Available: <fieldNames>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1057" : {
-    "message" : [
-      "SHOW COLUMNS with conflicting databases: '<dbA>' != '<dbB>'."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1058" : {
     "message" : [
       "Cannot create table with both USING <provider> and <serDeInfo>."
@@ -5514,21 +6240,11 @@
       "Column statistics serialization is not supported for column <colName> of data type: <dataType>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1097" : {
-    "message" : [
-      "The field for corrupt records must be string type and nullable."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1098" : {
     "message" : [
       "DataType '<x>' is not supported by <className>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1099" : {
-    "message" : [
-      "<funcName>() doesn't support the <mode> mode. Acceptable modes are <permissiveMode> and <failFastMode>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1103" : {
     "message" : [
       "Unsupported component type <clz> in arrays."
@@ -5591,11 +6307,6 @@
       "Cannot rename a table with ALTER VIEW. Please use ALTER TABLE instead."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1125" : {
-    "message" : [
-      "Database from v1 session catalog is not specified."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1126" : {
     "message" : [
       "Nested databases are not supported by v1 session catalog: <catalog>."
@@ -5611,11 +6322,6 @@
       "Failed to resolve the schema for <format> for the partition column: <partitionColumn>. It must be specified manually."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1131" : {
-    "message" : [
-      "Data source <className> does not support <outputMode> output mode."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1132" : {
     "message" : [
       "A schema needs to be specified when using <className>."
@@ -5639,11 +6345,6 @@
       "<className> is not a valid Spark SQL Data Source."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1136" : {
-    "message" : [
-      "Cannot save interval data type into external storage."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1137" : {
     "message" : [
       "Unable to resolve <name> given [<outputStr>]."
@@ -5669,11 +6370,6 @@
       "Multiple sources found for <provider> (<sourceNames>), please specify the fully qualified class name."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1142" : {
-    "message" : [
-      "Datasource does not support writing empty or nested empty schemas. Please make sure the data schema has at least one or more column(s)."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1143" : {
     "message" : [
       "The data to be inserted needs to have the same number of columns as the target table: target table has <targetSize> column(s) but the inserted data has <actualSize> column(s), which contain <staticPartitionsSize> partition column(s) having assigned constant values."
@@ -5714,11 +6410,6 @@
       "Partition column `<col>` not found in schema <schemaCatalog>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1156" : {
-    "message" : [
-      "Column <colName> not found in schema <tableSchema>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1158" : {
     "message" : [
       "Saving data into a view is not allowed."
@@ -5785,21 +6476,6 @@
       "createTableColumnTypes option column <col> not found in schema <schema>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1172" : {
-    "message" : [
-      "Parquet type not yet supported: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1173" : {
-    "message" : [
-      "Illegal Parquet type: <parquetType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1174" : {
-    "message" : [
-      "Unrecognized Parquet type: <field>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1181" : {
     "message" : [
       "Stream-stream join without equality predicate is not supported."
@@ -5915,7 +6591,7 @@
       "Detected implicit cartesian product for <joinType> join between logical plans",
       "<leftPlan>",
       "and",
-      "rightPlan",
+      "<rightPlan>",
       "Join condition is missing or trivial.",
       "Either: use the CROSS JOIN syntax to allow cartesian products between these relations, or: enable implicit cartesian products by setting the configuration variable spark.sql.crossJoin.enabled=true."
     ]
@@ -6013,11 +6689,6 @@
       "Operation not allowed: ALTER TABLE SET [SERDE | SERDEPROPERTIES] for a specific partition is not supported for tables created with the datasource API."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1248" : {
-    "message" : [
-      "Operation not allowed: ALTER TABLE SET SERDE is not supported for tables created with the datasource API."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1250" : {
     "message" : [
       "<action> is not allowed on <tableName> since filesource partition management is disabled (spark.sql.hive.manageFilesourcePartitions = false)."
@@ -6028,16 +6699,6 @@
       "<action> is not allowed on <tableName> since its partition metadata is not stored in the Hive metastore. To import this information into the metastore, run `msck repair table <tableName>`."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1252" : {
-    "message" : [
-      "Cannot alter a view with ALTER TABLE. Please use ALTER VIEW instead."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1253" : {
-    "message" : [
-      "Cannot alter a table with ALTER VIEW. Please use ALTER TABLE instead."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1255" : {
     "message" : [
       "Cannot drop built-in function '<functionName>'."
@@ -6098,41 +6759,6 @@
       "Failed to truncate table <tableIdentWithDB> when removing data of the path: <path>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1270" : {
-    "message" : [
-      "SHOW CREATE TABLE is not supported on a temporary view: <table>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1271" : {
-    "message" : [
-      "Failed to execute SHOW CREATE TABLE against table <table>, which is created by Hive and uses the following unsupported feature(s)",
-      "<unsupportedFeatures>",
-      "Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1272" : {
-    "message" : [
-      "SHOW CREATE TABLE doesn't support transactional Hive table. Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1273" : {
-    "message" : [
-      "Failed to execute SHOW CREATE TABLE against table <table>, which is created by Hive and uses the following unsupported serde configuration",
-      "<configs>",
-      "Please use `SHOW CREATE TABLE <table> AS SERDE` to show Hive DDL instead."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1274" : {
-    "message" : [
-      "<table> is a Spark data source table. Use `SHOW CREATE TABLE` without `AS SERDE` instead."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1275" : {
-    "message" : [
-      "Failed to execute SHOW CREATE TABLE against table/view <table>, which is created by Hive and uses the following unsupported feature(s)",
-      "<features>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1276" : {
     "message" : [
       "The logical plan that represents the view is not analyzed."
@@ -6143,18 +6769,6 @@
       "It is not allowed to create a persisted view from the Dataset API."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1285" : {
-    "message" : [
-      "Since Spark 2.3, the queries from raw JSON/CSV files are disallowed when the",
-      "referenced columns only include the internal corrupt record column",
-      "(named _corrupt_record by default). For example:",
-      "spark.read.schema(schema).csv(file).filter($\"_corrupt_record\".isNotNull).count()",
-      "and spark.read.schema(schema).csv(file).select(\"_corrupt_record\").show().",
-      "Instead, you can cache or save the parsed results and then send the same query.",
-      "For example, val df = spark.read.schema(schema).csv(file).cache() and then",
-      "df.filter($\"_corrupt_record\".isNotNull).count()."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1286" : {
     "message" : [
       "User-defined partition column <columnName> not found in the JDBC relation: <schema>."
@@ -6290,11 +6904,6 @@
       "The pivot column <pivotColumn> has more than <maxValues> distinct values, this could indicate an error. If this was intended, set <config> to at least the number of distinct values of the pivot column."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1325" : {
-    "message" : [
-      "Cannot modify the value of a static config: <key>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1327" : {
     "message" : [
       "Command execution is not supported in runner <runner>."
@@ -6315,12 +6924,6 @@
       "Class <className> doesn't implement interface UserDefinedAggregateFunction."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1331" : {
-    "message" : [
-      "Missing field <fieldName> in table <table> with schema:",
-      "<schema>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_1332" : {
     "message" : [
       "<errorMessage>"
@@ -6331,26 +6934,6 @@
       "Sinks cannot request distribution and ordering in continuous execution mode."
     ]
   },
-  "_LEGACY_ERROR_TEMP_1344" : {
-    "message" : [
-      "Invalid DEFAULT value for column <fieldName>: <defaultValue> fails to parse as a valid literal value."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1345" : {
-    "message" : [
-      "Failed to execute <statementType> command because DEFAULT values are not supported for target data source with table provider: \"<dataSource>\"."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_1346" : {
-    "message" : [
-      "Failed to execute <statementType> command because DEFAULT values are not supported when adding new columns to previously existing target data source with table provider: \"<dataSource>\"."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2000" : {
-    "message" : [
-      "<message>. If necessary set <ansiConfig> to false to bypass this error."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2003" : {
     "message" : [
       "Unsuccessful try to zip maps with <size> unique keys due to exceeding the array size limit <maxRoundedArrayLength>."
@@ -6361,16 +6944,6 @@
       "Type <dataType> does not support ordered operations."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2011" : {
-    "message" : [
-      "Unexpected data type <dataType>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2013" : {
-    "message" : [
-      "Negative values found in <frequencyExpression>"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2017" : {
     "message" : [
       "not resolved."
@@ -6451,11 +7024,6 @@
       "<methodName> is not implemented."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2042" : {
-    "message" : [
-      "<message>. If necessary set <ansiConfig> to false to bypass this error."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2045" : {
     "message" : [
       "Unsupported table change: <message>"
@@ -6506,11 +7074,6 @@
       "Unable to clear partition directory <path> prior to writing to it."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2058" : {
-    "message" : [
-      "Failed to cast value `<value>` to `<dataType>` for partition column `<columnName>`."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2059" : {
     "message" : [
       "End of stream."
@@ -6538,21 +7101,11 @@
       "Invalid namespace name: <namespace>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2067" : {
-    "message" : [
-      "Unsupported partition transform: <transform>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2068" : {
     "message" : [
       "Missing database location."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2069" : {
-    "message" : [
-      "Cannot remove reserved property: <property>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2070" : {
     "message" : [
       "Writing job failed."
@@ -6573,11 +7126,6 @@
       "user-specified schema."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2075" : {
-    "message" : [
-      "Write is not supported for binary file data source."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2076" : {
     "message" : [
       "The length of <path> is <len>, which exceeds the max length allowed: <maxLength>."
@@ -6638,11 +7186,6 @@
       "Invalid value `<n>` for parameter `<jdbcNumPartitions>` in table writing via JDBC. The minimum value is 1."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2088" : {
-    "message" : [
-      "<dataType> is not supported yet."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2089" : {
     "message" : [
       "DataType: <catalogString>."
@@ -6675,7 +7218,7 @@
   },
   "_LEGACY_ERROR_TEMP_2097" : {
     "message" : [
-      "Could not execute broadcast in <timeout> secs. You can increase the timeout for broadcasts via <broadcastTimeout> or disable broadcast join by setting <autoBroadcastJoinThreshold> to -1."
+      "Could not execute broadcast in <timeout> secs. You can increase the timeout for broadcasts via <broadcastTimeout> or disable broadcast join by setting <autoBroadcastJoinThreshold> to -1 or remove the broadcast hint if it exists in your code."
     ]
   },
   "_LEGACY_ERROR_TEMP_2098" : {
@@ -6698,11 +7241,6 @@
       "Dictionary encoding should not be used because of dictionary overflow."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2104" : {
-    "message" : [
-      "End of the iterator."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2105" : {
     "message" : [
       "Could not allocate memory to grow BytesToBytesMap."
@@ -6743,16 +7281,6 @@
       "Unexpected window function frame <frame>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2113" : {
-    "message" : [
-      "Unable to parse <stats> as a percentile."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2114" : {
-    "message" : [
-      "<stats> is not a recognised statistic."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2115" : {
     "message" : [
       "Unknown column: <unknownColumn>."
@@ -6803,22 +7331,6 @@
       "Exception when registering StreamingQueryListener."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2138" : {
-    "message" : [
-      "Cannot have circular references in bean class, but got the circular reference of class <clazz>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2139" : {
-    "message" : [
-      "cannot have circular references in class, but got the circular reference of class <t>."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2140" : {
-    "message" : [
-      "`<fieldName>` is not a valid identifier of Java and cannot be used as field name",
-      "<walkedTypePath>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2144" : {
     "message" : [
       "Unable to find constructor for <tpe>. This could happen if <tpe> is an interface, or a trait without companion object constructor."
@@ -6844,11 +7356,6 @@
       "null value found but field <name> is not nullable."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2150" : {
-    "message" : [
-      "Due to Scala's limited support of tuple, tuple with more than 22 elements are not supported."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2154" : {
     "message" : [
       "Failed to get outer pointer for <innerCls>."
@@ -6869,21 +7376,11 @@
       "Initial type <dataType> must be an <arrayType>, a <structType> or a <mapType>."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2165" : {
-    "message" : [
-      "Malformed records are detected in schema inference. Parse Mode: <failFastMode>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2166" : {
     "message" : [
       "Malformed JSON."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2167" : {
-    "message" : [
-      "Malformed records are detected in schema inference. Parse Mode: <failFastMode>. Reasons: Failed to infer a common schema. Struct types are expected, but `<dataType>` was found."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2168" : {
     "message" : [
       "Decorrelate inner query through <plan> is not supported."
@@ -7053,11 +7550,6 @@
       "StreamingRelationExec cannot be executed."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2211" : {
-    "message" : [
-      "Invalid output mode: <outputMode>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2212" : {
     "message" : [
       "Invalid catalog name: <name>."
@@ -7143,11 +7635,6 @@
       "Primitive types are not supported."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2232" : {
-    "message" : [
-      "Value at index <index> is null."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2233" : {
     "message" : [
       "Only Data Sources providing FileFormat are supported: <providingClass>."
@@ -7173,21 +7660,6 @@
       "<className>.getParentLogger is not yet implemented."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2238" : {
-    "message" : [
-      "Unable to create Parquet converter for <typeName> whose Parquet type is <parquetType> without decimal metadata. Please read this column/field as Spark BINARY type."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2239" : {
-    "message" : [
-      "Unable to create Parquet converter for decimal type <t> whose Parquet type is <parquetType>.  Parquet DECIMAL type can only be backed by INT32, INT64, FIXED_LEN_BYTE_ARRAY, or BINARY."
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_2240" : {
-    "message" : [
-      "Unable to create Parquet converter for data type <t> whose Parquet type is <parquetType>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2241" : {
     "message" : [
       "Nonatomic partition table <tableName> can not add multiple partitions."
@@ -7332,11 +7804,6 @@
       "comment on table is not supported."
     ]
   },
-  "_LEGACY_ERROR_TEMP_2271" : {
-    "message" : [
-      "UpdateColumnNullability is not supported."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_2272" : {
     "message" : [
       "Rename column is only supported for MySQL version 8.0 and above."
@@ -7601,11 +8068,6 @@
       "<expr> is not currently supported"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3055" : {
-    "message" : [
-      "ScalarFunction '<scalarFunc.name>' neither implement magic method nor override 'produceResult'"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3056" : {
     "message" : [
       "Unexpected row-level read relations (allow multiple = <allowMultipleReads>): <other>"
@@ -7739,11 +8201,6 @@
       "No handler for UDF/UDAF/UDTF '<clazz>': <e>"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3085" : {
-    "message" : [
-      "from_avro() doesn't support the <name> mode. Acceptable modes are <permissiveMode> and <failFastMode>."
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3086" : {
     "message" : [
       "Cannot persist <tableName> into Hive metastore as table property keys may not start with 'spark.sql.': <invalidKeys>"
@@ -7964,11 +8421,6 @@
       "Partitions truncate is not supported"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3146" : {
-    "message" : [
-      "Cannot find a compatible ScalarFunction#produceResult"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3147" : {
     "message" : [
       "<description>: Batch scan are not supported"
@@ -8034,11 +8486,6 @@
       "continuous mode is not supported!"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3168" : {
-    "message" : [
-      "hasTimedOut is true however there's no timeout configured"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3169" : {
     "message" : [
       "AcceptsLatestSeenOffset is not supported with DSv1 streaming source: <unsupportedSources>"
@@ -8209,36 +8656,6 @@
       "The number of fields (<numFields>) in the partition identifier is not equal to the partition schema length (<schemaLen>). The identifier might not refer to one partition."
     ]
   },
-  "_LEGACY_ERROR_TEMP_3209" : {
-    "message" : [
-      "Illegal input for day of week: <string>"
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3210" : {
-    "message" : [
-      "Interval string does not match second-nano format of ss.nnnnnnnnn"
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3211" : {
-    "message" : [
-      "Error parsing interval day-time string: <msg>"
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3212" : {
-    "message" : [
-      "Cannot support (interval '<input>' <from> to <to>) expression"
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3213" : {
-    "message" : [
-      "Error parsing interval <interval> string: <msg>"
-    ]
-  },
-  "_LEGACY_ERROR_TEMP_3214" : {
-    "message" : [
-      "Interval string does not match <intervalStr> format of <supportedFormat> when cast to <typeName>: <input><fallBackNotice>"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3215" : {
     "message" : [
       "Expected a Boolean type expression in replaceNullWithFalse, but got the type <dataType> in <expr>."
@@ -8289,11 +8706,6 @@
       "Cannot delete map key"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3227" : {
-    "message" : [
-      "Cannot find field: <fieldName>"
-    ]
-  },
   "_LEGACY_ERROR_TEMP_3228" : {
     "message" : [
       "AFTER column not found: <afterCol>"
@@ -8379,14 +8791,24 @@
       "'<s>' is an invalid timestamp"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3261" : {
+  "_LEGACY_ERROR_TEMP_3262" : {
+    "message" : [
+      "Doesn't support month or year interval: <interval>"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3300" : {
     "message" : [
-      "Unknown output mode <outputMode>. Accepted output modes are 'append', 'complete', 'update'"
+      "error while calling spill() on <consumerToSpill> : <message>"
     ]
   },
-  "_LEGACY_ERROR_TEMP_3262" : {
+  "_LEGACY_ERROR_TEMP_3301" : {
     "message" : [
-      "Doesn't support month or year interval: <interval>"
+      "Not enough memory to grow pointer array"
+    ]
+  },
+  "_LEGACY_ERROR_TEMP_3302" : {
+    "message" : [
+      "No enough memory for aggregation"
     ]
   },
   "_LEGACY_ERROR_USER_RAISED_EXCEPTION" : {
diff --git a/common/utils/src/main/resources/error/error-states.json b/common/utils/src/main/resources/error/error-states.json
index 0cd55bda7ba35..fb899e4eb207e 100644
--- a/common/utils/src/main/resources/error/error-states.json
+++ b/common/utils/src/main/resources/error/error-states.json
@@ -4619,6 +4619,30 @@
         "standard": "N",
         "usedBy": ["Spark"]
     },
+    "42K0M": {
+        "description": "Invalid variable declaration.",
+        "origin": "Spark,",
+        "standard": "N",
+        "usedBy": ["Spark"]
+    },
+    "42K0N": {
+        "description": "Invalid external type.",
+        "origin": "Spark",
+        "standard": "N",
+        "usedBy": ["Spark"]
+    },
+    "42K0O": {
+        "description": "ScalarFunction not overrides method 'produceResult(InternalRow)' with custom implementation.",
+        "origin": "Spark",
+        "standard": "N",
+        "usedBy": ["Spark"]
+    },
+    "42K0P": {
+        "description": "ScalarFunction not implements or overrides method 'produceResult(InternalRow)'.",
+        "origin": "Spark",
+        "standard": "N",
+        "usedBy": ["Spark"]
+    },
     "42KD0": {
         "description": "Ambiguous name reference.",
         "origin": "Databricks",
@@ -4889,6 +4913,12 @@
         "standard": "N",
         "usedBy": ["SQL Server"]
     },
+    "42S23": {
+        "description": "Partition transform expression not in 'partitionedBy'",
+        "origin": "Spark",
+        "standard": "N",
+        "usedBy": ["Spark"]
+    },
     "44000": {
         "description": "with check option violation",
         "origin": "SQL/Foundation",
@@ -7405,6 +7435,12 @@
         "standard": "N",
         "usedBy": ["Databricks"]
     },
+    "KD010": {
+        "description": "external data source failure",
+        "origin": "Databricks",
+        "standard": "N",
+        "usedBy": ["Databricks"]
+    },
     "P0000": {
         "description": "procedural logic error",
         "origin": "PostgreSQL",
diff --git a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
index 99b993f1127af..e2dd0da1aac85 100644
--- a/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
+++ b/common/utils/src/main/scala/org/apache/spark/ErrorClassesJSONReader.scala
@@ -19,7 +19,6 @@ package org.apache.spark
 
 import java.net.URL
 
-import scala.collection.immutable.Map
 import scala.jdk.CollectionConverters._
 
 import com.fasterxml.jackson.annotation.JsonIgnore
@@ -43,12 +42,16 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
   private[spark] val errorInfoMap =
     jsonFileURLs.map(ErrorClassesJsonReader.readAsMap).reduce(_ ++ _)
 
-  def getErrorMessage(errorClass: String, messageParameters: Map[String, String]): String = {
+  def getErrorMessage(errorClass: String, messageParameters: Map[String, Any]): String = {
     val messageTemplate = getMessageTemplate(errorClass)
-    val sub = new StringSubstitutor(messageParameters.asJava)
+    val sanitizedParameters = messageParameters.map {
+      case (key, null) => key -> "null"
+      case (key, value) => key -> value
+    }
+    val sub = new StringSubstitutor(sanitizedParameters.asJava)
     sub.setEnableUndefinedVariableException(true)
     sub.setDisableSubstitutionInValues(true)
-    try {
+    val errorMessage = try {
       sub.replace(ErrorClassesJsonReader.TEMPLATE_REGEX.replaceAllIn(
         messageTemplate, "\\$\\{$1\\}"))
     } catch {
@@ -57,6 +60,17 @@ class ErrorClassesJsonReader(jsonFileURLs: Seq[URL]) {
           s"MessageTemplate: $messageTemplate, " +
           s"Parameters: $messageParameters", i)
     }
+    if (util.SparkEnvUtils.isTesting) {
+      val placeHoldersNum = ErrorClassesJsonReader.TEMPLATE_REGEX.findAllIn(messageTemplate).length
+      if (placeHoldersNum < sanitizedParameters.size) {
+        throw SparkException.internalError(
+          s"Found unused message parameters of the error class '$errorClass'. " +
+          s"Its error message format has $placeHoldersNum placeholders, " +
+          s"but the passed message parameters map has ${sanitizedParameters.size} items. " +
+          "Consider to add placeholders to the error format or remove unused message parameters.")
+      }
+    }
+    errorMessage
   }
 
   def getMessageParameters(errorClass: String): Seq[String] = {
diff --git a/common/utils/src/main/scala/org/apache/spark/SparkException.scala b/common/utils/src/main/scala/org/apache/spark/SparkException.scala
index 398cb1fad6726..0c0a1902ee2a1 100644
--- a/common/utils/src/main/scala/org/apache/spark/SparkException.scala
+++ b/common/utils/src/main/scala/org/apache/spark/SparkException.scala
@@ -69,7 +69,7 @@ class SparkException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
 
   override def getQueryContext: Array[QueryContext] = context
 }
@@ -179,7 +179,7 @@ private[spark] class SparkUpgradeException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
 }
 
 /**
@@ -212,7 +212,7 @@ private[spark] class SparkArithmeticException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -250,7 +250,7 @@ private[spark] class SparkUnsupportedOperationException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
 }
 
 private[spark] object SparkUnsupportedOperationException {
@@ -280,7 +280,7 @@ private[spark] class SparkClassNotFoundException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
 
 /**
@@ -296,7 +296,7 @@ private[spark] class SparkConcurrentModificationException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
 
 /**
@@ -306,8 +306,9 @@ private[spark] class SparkDateTimeException private(
     message: String,
     errorClass: Option[String],
     messageParameters: Map[String, String],
-    context: Array[QueryContext])
-  extends DateTimeException(message) with SparkThrowable {
+    context: Array[QueryContext],
+    cause: Option[Throwable])
+  extends DateTimeException(message, cause.orNull) with SparkThrowable {
 
   def this(
     errorClass: String,
@@ -318,7 +319,23 @@ private[spark] class SparkDateTimeException private(
       SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
       Option(errorClass),
       messageParameters,
-      context
+      context,
+      cause = None
+    )
+  }
+
+  def this(
+    errorClass: String,
+    messageParameters: Map[String, String],
+    context: Array[QueryContext],
+    summary: String,
+    cause: Option[Throwable]) = {
+    this(
+      SparkThrowableHelper.getMessage(errorClass, messageParameters, summary),
+      Option(errorClass),
+      messageParameters,
+      context,
+      cause.orElse(None)
     )
   }
 
@@ -329,7 +346,7 @@ private[spark] class SparkDateTimeException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -345,7 +362,7 @@ private[spark] class SparkFileNotFoundException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
 
 /**
@@ -379,7 +396,7 @@ private[spark] class SparkNumberFormatException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -431,7 +448,7 @@ private[spark] class SparkIllegalArgumentException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -460,7 +477,7 @@ private[spark] class SparkRuntimeException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -489,7 +506,7 @@ private[spark] class SparkPythonException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -507,7 +524,7 @@ private[spark] class SparkNoSuchElementException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 
   override def getQueryContext: Array[QueryContext] = context
 }
@@ -524,7 +541,7 @@ private[spark] class SparkSecurityException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
 
 /**
@@ -558,7 +575,7 @@ private[spark] class SparkArrayIndexOutOfBoundsException private(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
   override def getQueryContext: Array[QueryContext] = context
 }
 
@@ -574,7 +591,7 @@ private[spark] class SparkSQLException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
 
 /**
@@ -589,5 +606,5 @@ private[spark] class SparkSQLFeatureNotSupportedException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
diff --git a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
index db5eff72e124a..b6c2b176de62b 100644
--- a/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
+++ b/common/utils/src/main/scala/org/apache/spark/SparkThrowableHelper.scala
@@ -74,14 +74,14 @@ private[spark] object SparkThrowableHelper {
   }
 
   def isInternalError(errorClass: String): Boolean = {
-    errorClass.startsWith("INTERNAL_ERROR")
+    errorClass != null && errorClass.startsWith("INTERNAL_ERROR")
   }
 
   def getMessage(e: SparkThrowable with Throwable, format: ErrorMessageFormat.Value): String = {
     import ErrorMessageFormat._
     format match {
       case PRETTY => e.getMessage
-      case MINIMAL | STANDARD if e.getErrorClass == null =>
+      case MINIMAL | STANDARD if e.getCondition == null =>
         toJsonString { generator =>
           val g = generator.useDefaultPrettyPrinter()
           g.writeStartObject()
@@ -92,7 +92,7 @@ private[spark] object SparkThrowableHelper {
           g.writeEndObject()
         }
       case MINIMAL | STANDARD =>
-        val errorClass = e.getErrorClass
+        val errorClass = e.getCondition
         toJsonString { generator =>
           val g = generator.useDefaultPrettyPrinter()
           g.writeStartObject()
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
index a6184038b5230..c365797cec690 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala
@@ -266,6 +266,7 @@ private[spark] object LogKeys {
   case object FEATURE_NAME extends LogKey
   case object FETCH_SIZE extends LogKey
   case object FIELD_NAME extends LogKey
+  case object FIELD_TYPE extends LogKey
   case object FILES extends LogKey
   case object FILE_ABSOLUTE_PATH extends LogKey
   case object FILE_END_OFFSET extends LogKey
@@ -396,6 +397,7 @@ private[spark] object LogKeys {
   case object MAX_NUM_PARTITIONS extends LogKey
   case object MAX_NUM_POSSIBLE_BINS extends LogKey
   case object MAX_NUM_ROWS_IN_MEMORY_BUFFER extends LogKey
+  case object MAX_SEEN_VERSION extends LogKey
   case object MAX_SERVICE_NAME_LENGTH extends LogKey
   case object MAX_SIZE extends LogKey
   case object MAX_SLOTS extends LogKey
@@ -420,9 +422,11 @@ private[spark] object LogKeys {
   case object MIN_NUM_FREQUENT_PATTERN extends LogKey
   case object MIN_POINT_PER_CLUSTER extends LogKey
   case object MIN_RATE extends LogKey
+  case object MIN_SEEN_VERSION extends LogKey
   case object MIN_SHARE extends LogKey
   case object MIN_SIZE extends LogKey
   case object MIN_TIME extends LogKey
+  case object MIN_VERSIONS_TO_DELETE extends LogKey
   case object MIN_VERSION_NUM extends LogKey
   case object MISSING_PARENT_STAGES extends LogKey
   case object MODEL_WEIGHTS extends LogKey
@@ -649,6 +653,7 @@ private[spark] object LogKeys {
   case object RECEIVER_IDS extends LogKey
   case object RECORDS extends LogKey
   case object RECOVERY_STATE extends LogKey
+  case object RECURSIVE_DEPTH extends LogKey
   case object REDACTED_STATEMENT extends LogKey
   case object REDUCE_ID extends LogKey
   case object REGEX extends LogKey
@@ -687,6 +692,7 @@ private[spark] object LogKeys {
   case object RPC_ENDPOINT_REF extends LogKey
   case object RPC_MESSAGE_CAPACITY extends LogKey
   case object RPC_SSL_ENABLED extends LogKey
+  case object RULE_EXECUTOR_NAME extends LogKey
   case object RULE_NAME extends LogKey
   case object RUN_ID extends LogKey
   case object SCALA_VERSION extends LogKey
@@ -746,6 +752,7 @@ private[spark] object LogKeys {
   case object START_INDEX extends LogKey
   case object START_TIME extends LogKey
   case object STATEMENT_ID extends LogKey
+  case object STATE_NAME extends LogKey
   case object STATE_STORE_ID extends LogKey
   case object STATE_STORE_PROVIDER extends LogKey
   case object STATE_STORE_VERSION extends LogKey
@@ -850,6 +857,7 @@ private[spark] object LogKeys {
   case object USER_NAME extends LogKey
   case object UUID extends LogKey
   case object VALUE extends LogKey
+  case object VERSIONS_TO_DELETE extends LogKey
   case object VERSION_NUM extends LogKey
   case object VIEW_ACLS extends LogKey
   case object VIEW_ACLS_GROUPS extends LogKey
diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
index 8eea9b44da26d..7471b764bd2b3 100644
--- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
+++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala
@@ -165,11 +165,17 @@ trait Logging {
   }
 
   protected def withLogContext(context: java.util.HashMap[String, String])(body: => Unit): Unit = {
-    val threadContext = CloseableThreadContext.putAll(context)
+    // put into thread context only when structured logging is enabled
+    val closeableThreadContextOpt = if (Logging.isStructuredLoggingEnabled) {
+      Some(CloseableThreadContext.putAll(context))
+    } else {
+      None
+    }
+
     try {
       body
     } finally {
-      threadContext.close()
+      closeableThreadContextOpt.foreach(_.close())
     }
   }
 
@@ -328,7 +334,7 @@ trait Logging {
       // If Log4j 2 is used but is initialized by default configuration,
       // load a default properties file
       // scalastyle:off println
-      if (Logging.islog4j2DefaultConfigured()) {
+      if (Logging.defaultSparkLog4jConfig || Logging.islog4j2DefaultConfigured()) {
         Logging.defaultSparkLog4jConfig = true
         val defaultLogProps = if (Logging.isStructuredLoggingEnabled) {
           "org/apache/spark/log4j2-defaults.properties"
@@ -418,7 +424,6 @@ private[spark] object Logging {
   def uninitialize(): Unit = initLock.synchronized {
     if (isLog4j2()) {
       if (defaultSparkLog4jConfig) {
-        defaultSparkLog4jConfig = false
         val context = LogManager.getContext(false).asInstanceOf[LoggerContext]
         context.reconfigure()
       } else {
diff --git a/common/utils/src/main/scala/org/apache/spark/scheduler/SparkListenerEvent.scala b/common/utils/src/main/scala/org/apache/spark/scheduler/SparkListenerEvent.scala
new file mode 100644
index 0000000000000..1f1d3492d6ac5
--- /dev/null
+++ b/common/utils/src/main/scala/org/apache/spark/scheduler/SparkListenerEvent.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.scheduler
+
+import com.fasterxml.jackson.annotation.JsonTypeInfo
+
+import org.apache.spark.annotation.DeveloperApi
+
+@DeveloperApi
+@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "Event")
+trait SparkListenerEvent {
+  /* Whether output this event to the event log */
+  protected[spark] def logEvent: Boolean = true
+}
diff --git a/common/utils/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala b/common/utils/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
index 259f4330224c9..1972ef05d8759 100644
--- a/common/utils/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
+++ b/common/utils/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryException.scala
@@ -84,7 +84,7 @@ class StreamingQueryException private[sql](
     s"""${classOf[StreamingQueryException].getName}: ${cause.getMessage}
        |$queryDebugString""".stripMargin
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 }
diff --git a/common/utils/src/main/scala/org/apache/spark/util/JsonUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/JsonUtils.scala
index 4d729adfbb7eb..f88f267727c11 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/JsonUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/JsonUtils.scala
@@ -24,6 +24,7 @@ import com.fasterxml.jackson.core.{JsonEncoding, JsonGenerator}
 import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
 import com.fasterxml.jackson.module.scala.DefaultScalaModule
 
+import org.apache.spark.util.SparkErrorUtils.tryWithResource
 
 private[spark] trait JsonUtils {
 
@@ -31,12 +32,12 @@ private[spark] trait JsonUtils {
     .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
 
   def toJsonString(block: JsonGenerator => Unit): String = {
-    val baos = new ByteArrayOutputStream()
-    val generator = mapper.createGenerator(baos, JsonEncoding.UTF8)
-    block(generator)
-    generator.close()
-    baos.close()
-    new String(baos.toByteArray, StandardCharsets.UTF_8)
+    tryWithResource(new ByteArrayOutputStream()) { baos =>
+      tryWithResource(mapper.createGenerator(baos, JsonEncoding.UTF8)) { generator =>
+        block(generator)
+      }
+      new String(baos.toByteArray, StandardCharsets.UTF_8)
+    }
   }
 }
 
diff --git a/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala
index 5a798ffad3a92..8b41f10339271 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/LogUtils.scala
@@ -29,9 +29,9 @@ object LogUtils {
   /**
    * Schema for structured Spark logs.
    * Example usage:
-   *   val logDf = spark.read.schema(LOG_SCHEMA).json("path/to/logs")
+   *   val logDf = spark.read.schema(SPARK_LOG_SCHEMA).json("path/to/logs")
    */
-  val LOG_SCHEMA: String = """
+  val SPARK_LOG_SCHEMA: String = """
     |ts TIMESTAMP,
     |level STRING,
     |msg STRING,
diff --git a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
index 42a1d1612aeeb..d54a2f2ed9cea 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/MavenUtils.scala
@@ -342,7 +342,7 @@ private[spark] object MavenUtils extends Logging {
   }
 
   /* Set ivy settings for location of cache, if option is supplied */
-  private def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = {
+  private[util] def processIvyPathArg(ivySettings: IvySettings, ivyPath: Option[String]): Unit = {
     val alternateIvyDir = ivyPath.filterNot(_.trim.isEmpty).getOrElse {
       // To protect old Ivy-based systems like old Spark from Apache Ivy 2.5.2's incompatibility.
       System.getProperty("ivy.home",
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala
index 7a4ef4a5ce81f..307006315a3c4 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkClassUtils.scala
@@ -69,6 +69,73 @@ private[spark] trait SparkClassUtils {
       targetClass == null || targetClass.isAssignableFrom(cls)
     }.getOrElse(false)
   }
+
+  /** Return the class name of the given object, removing all dollar signs */
+  def getFormattedClassName(obj: AnyRef): String = {
+    getSimpleName(obj.getClass).replace("$", "")
+  }
+
+  /**
+   * Safer than Class obj's getSimpleName which may throw Malformed class name error in scala.
+   * This method mimics scalatest's getSimpleNameOfAnObjectsClass.
+   */
+  def getSimpleName(cls: Class[_]): String = {
+    try {
+      cls.getSimpleName
+    } catch {
+      // TODO: the value returned here isn't even quite right; it returns simple names
+      // like UtilsSuite$MalformedClassObject$MalformedClass instead of MalformedClass
+      // The exact value may not matter much as it's used in log statements
+      case _: InternalError =>
+        stripDollars(stripPackages(cls.getName))
+    }
+  }
+
+  /**
+   * Remove the packages from full qualified class name
+   */
+  private def stripPackages(fullyQualifiedName: String): String = {
+    fullyQualifiedName.split("\\.").takeRight(1)(0)
+  }
+
+  /**
+   * Remove trailing dollar signs from qualified class name,
+   * and return the trailing part after the last dollar sign in the middle
+   */
+  @scala.annotation.tailrec
+  final def stripDollars(s: String): String = {
+    val lastDollarIndex = s.lastIndexOf('$')
+    if (lastDollarIndex < s.length - 1) {
+      // The last char is not a dollar sign
+      if (lastDollarIndex == -1 || !s.contains("$iw")) {
+        // The name does not have dollar sign or is not an interpreter
+        // generated class, so we should return the full string
+        s
+      } else {
+        // The class name is interpreter generated,
+        // return the part after the last dollar sign
+        // This is the same behavior as getClass.getSimpleName
+        s.substring(lastDollarIndex + 1)
+      }
+    }
+    else {
+      // The last char is a dollar sign
+      // Find last non-dollar char
+      val lastNonDollarChar = s.findLast(_ != '$')
+      lastNonDollarChar match {
+        case None => s
+        case Some(c) =>
+          val lastNonDollarIndex = s.lastIndexOf(c)
+          if (lastNonDollarIndex == -1) {
+            s
+          } else {
+            // Strip the trailing dollar signs
+            // Invoke stripDollars again to get the simple name
+            stripDollars(s.substring(0, lastNonDollarIndex + 1))
+          }
+      }
+    }
+  }
 }
 
 private[spark] object SparkClassUtils extends SparkClassUtils
diff --git a/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala b/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
index 9f604e4bf47f2..872c89e5a29a2 100644
--- a/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
+++ b/common/utils/src/main/scala/org/apache/spark/util/SparkErrorUtils.scala
@@ -48,6 +48,22 @@ private[spark] trait SparkErrorUtils extends Logging {
     try f.apply(resource) finally resource.close()
   }
 
+  /**
+   * Try to initialize a resource. If an exception is throw during initialization, closes the
+   * resource before propagating the error. Otherwise, the caller is responsible for closing
+   * the resource. This means that [[T]] should provide some way to close the resource.
+   */
+  def tryInitializeResource[R <: Closeable, T](createResource: => R)(initialize: R => T): T = {
+    val resource = createResource
+    try {
+      initialize(resource)
+    } catch {
+      case e: Throwable =>
+        resource.close()
+        throw e
+    }
+  }
+
   /**
    * Execute a block of code, then a finally block, but if exceptions happen in
    * the finally block, do not suppress the original exception.
diff --git a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
index 76062074edcaf..140de836622f4 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/IvyTestUtils.scala
@@ -365,7 +365,7 @@ private[spark] object IvyTestUtils {
       useIvyLayout: Boolean = false,
       withPython: Boolean = false,
       withR: Boolean = false,
-      ivySettings: IvySettings = new IvySettings)(f: String => Unit): Unit = {
+      ivySettings: IvySettings = defaultIvySettings())(f: String => Unit): Unit = {
     val deps = dependencies.map(MavenUtils.extractMavenCoordinates)
     purgeLocalIvyCache(artifact, deps, ivySettings)
     val repo = createLocalRepositoryForTests(artifact, dependencies, rootDir, useIvyLayout,
@@ -401,4 +401,16 @@ private[spark] object IvyTestUtils {
       }
     }
   }
+
+  /**
+   * Creates and initializes a new instance of IvySettings with default configurations.
+   * The method processes the Ivy path argument using MavenUtils to ensure proper setup.
+   *
+   * @return A newly created and configured instance of IvySettings.
+   */
+  private def defaultIvySettings(): IvySettings = {
+    val settings = new IvySettings
+    MavenUtils.processIvyPathArg(ivySettings = settings, ivyPath = None)
+    settings
+  }
 }
diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
index b3e103f46337c..48951c2084f17 100644
--- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
+++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala
@@ -40,7 +40,7 @@ trait LoggingSuiteBase
   }
 
   // Return the newly added log contents in the log file after executing the function `f`
-  private def captureLogOutput(f: () => Unit): String = {
+  protected def captureLogOutput(f: () => Unit): String = {
     val content = if (logFile.exists()) {
       Files.readString(logFile.toPath)
     } else {
@@ -438,6 +438,44 @@ class StructuredLoggingSuite extends LoggingSuiteBase {
     assert((log"\r" + log"\n" + log"\t" + log"\b").message == "\r\n\t\b")
     assert((log"\r${MDC(LogKeys.EXECUTOR_ID, 1)}\n".message == "\r1\n"))
   }
+
+  test("disabled structured logging won't log context") {
+    Logging.disableStructuredLogging()
+    val expectedPatternWithoutContext = compactAndToRegexPattern(
+      s"""
+        {
+          "ts": "<timestamp>",
+          "level": "INFO",
+          "msg": "Lost executor 1.",
+          "logger": "$className"
+        }""")
+
+    Seq(
+      () => logInfo(log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."),
+      () => logInfo( // blocked when explicitly constructing the MessageWithContext
+        MessageWithContext(
+          "Lost executor 1.",
+          new java.util.HashMap[String, String] { put(LogKeys.EXECUTOR_ID.name, "1") }
+        )
+      )
+    ).foreach { f =>
+      val logOutput = captureLogOutput(f)
+      assert(expectedPatternWithoutContext.r.matches(logOutput))
+    }
+    Logging.enableStructuredLogging()
+  }
+
+  test("setting to MDC gets logged") {
+    val mdcPattern = s""""${LogKeys.DATA.name}":"some-data""""
+
+    org.slf4j.MDC.put(LogKeys.DATA.name, "some-data")
+    val logOutputWithMDCSet = captureLogOutput(() => logInfo(msgWithMDC))
+    assert(mdcPattern.r.findFirstIn(logOutputWithMDCSet).isDefined)
+
+    org.slf4j.MDC.remove(LogKeys.DATA.name)
+    val logOutputWithoutMDCSet = captureLogOutput(() => logInfo(msgWithMDC))
+    assert(mdcPattern.r.findFirstIn(logOutputWithoutMDCSet).isEmpty)
+  }
 }
 
 object CustomLogKeys {
diff --git a/common/variant/README.md b/common/variant/README.md
index 3e1b00c494755..58ebab7bd2651 100644
--- a/common/variant/README.md
+++ b/common/variant/README.md
@@ -322,8 +322,6 @@ Each `array_val` and `object_val` must contain exactly `num_elements + 1` values
 
 The "short string" basic type may be used as an optimization to fold string length into the type byte for strings less than 64 bytes. It is semantically identical to the "string" primitive type.
 
-String and binary values may also be represented as an index into the metadata dictionary. (See “string from metadata” and “binary from metadata” in the “Primitive Types” table) Writers may choose to use this mechanism to avoid repeating identical string values in a Variant object.
-
 The Decimal type contains a scale, but no precision. The implied precision of a decimal value is `floor(log_10(val)) + 1`.
 
 # Encoding types
@@ -335,27 +333,25 @@ The Decimal type contains a scale, but no precision. The implied precision of a
 | Object       | `2` | A collection of (string-key, variant-value) pairs |
 | Array        | `3` | An ordered sequence of variant values             |
 
-| Primitive Type              | Type ID | Equivalent Parquet Type   | Binary format                                                                                             |
-|-----------------------------|---------|---------------------------|-----------------------------------------------------------------------------------------------------------|
-| null                        | `0`     | any                       | none                                                                                                      |
-| boolean (True)              | `1`     | BOOLEAN                   | none                                                                                                      |
-| boolean (False)             | `2`     | BOOLEAN                   | none                                                                                                      |
-| int8                        | `3`     | INT(8, signed)            | 1 byte                                                                                                    |
-| int16                       | `4`     | INT(16, signed)           | 2 byte little-endian                                                                                      |
-| int32                       | `5`     | INT(32, signed)           | 4 byte little-endian                                                                                      |
-| int64                       | `6`     | INT(64, signed)           | 8 byte little-endian                                                                                      |
-| double                      | `7`     | DOUBLE                    | IEEE little-endian                                                                                        |
-| decimal4                    | `8`     | DECIMAL(precision, scale) | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)               |
-| decimal8                    | `9`     | DECIMAL(precision, scale) | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)               |
-| decimal16                   | `10`    | DECIMAL(precision, scale) | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)               |
-| date                        | `11`    | DATE                      | 4 byte little-endian                                                                                      |
-| timestamp                   | `12`    | TIMESTAMP(true, MICROS)   | 8-byte little-endian                                                                                      |
-| timestamp without time zone | `13`    | TIMESTAMP(false, MICROS)  | 8-byte little-endian                                                                                      |
-| float                       | `14`    | FLOAT                     | IEEE little-endian                                                                                        |
-| binary                      | `15`    | BINARY                    | 4 byte little-endian size, followed by bytes                                                              |
-| string                      | `16`    | STRING                    | 4 byte little-endian size, followed by UTF-8 encoded bytes                                                |
-| binary from metadata        | `17`    | BINARY                    | Little-endian index into the metadata dictionary. Number of bytes is equal to the metadata `offset_size`. |
-| string from metadata        | `18`    | STRING                    | Little-endian index into the metadata dictionary. Number of bytes is equal to the metadata `offset_size`. |
+| Logical Type         | Physical Type               | Type ID | Equivalent Parquet Type     | Binary format                                                                                                       |
+|----------------------|-----------------------------|---------|-----------------------------|---------------------------------------------------------------------------------------------------------------------|
+| NullType             | null                        | `0`     | any                         | none                                                                                                                |
+| Boolean              | boolean (True)              | `1`     | BOOLEAN                     | none                                                                                                                |
+| Boolean              | boolean (False)             | `2`     | BOOLEAN                     | none                                                                                                                |
+| Exact Numeric        | int8                        | `3`     | INT(8, signed)              | 1 byte                                                                                                              |
+| Exact Numeric        | int16                       | `4`     | INT(16, signed)             | 2 byte little-endian                                                                                                |
+| Exact Numeric        | int32                       | `5`     | INT(32, signed)             | 4 byte little-endian                                                                                                |
+| Exact Numeric        | int64                       | `6`     | INT(64, signed)             | 8 byte little-endian                                                                                                |
+| Double               | double                      | `7`     | DOUBLE                      | IEEE little-endian                                                                                                  |
+| Exact Numeric        | decimal4                    | `8`     | DECIMAL(precision, scale)   | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)                         |
+| Exact Numeric        | decimal8                    | `9`     | DECIMAL(precision, scale)   | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)                         |
+| Exact Numeric        | decimal16                   | `10`    | DECIMAL(precision, scale)   | 1 byte scale in range [0, 38], followed by little-endian unscaled value (see decimal table)                         |
+| Date                 | date                        | `11`    | DATE                        | 4 byte little-endian                                                                                                |
+| Timestamp            | timestamp                   | `12`    | TIMESTAMP(true, MICROS)     | 8-byte little-endian                                                                                                |
+| TimestampNTZ         | timestamp without time zone | `13`    | TIMESTAMP(false, MICROS)    | 8-byte little-endian                                                                                                |
+| Float                | float                       | `14`    | FLOAT                       | IEEE little-endian                                                                                                  |
+| Binary               | binary                      | `15`    | BINARY                      | 4 byte little-endian size, followed by bytes                                                                        |
+| String               | string                      | `16`    | STRING                      | 4 byte little-endian size, followed by UTF-8 encoded bytes                                                          |
 
 | Decimal Precision     | Decimal value type |
 |-----------------------|--------------------|
@@ -364,6 +360,8 @@ The Decimal type contains a scale, but no precision. The implied precision of a
 | 18 <= precision <= 38 | int128             |
 | > 38                  | Not supported      |
 
+The *Logical Type* column indicates logical equivalence of physically encoded types. For example, a user expression operating on a string value containing "hello" should behave the same, whether it is encoded with the short string optimization, or long string encoding. Similarly, user expressions operating on an *int8* value of 1 should behave the same as a decimal16 with scale 2 and unscaled value 100.
+
 # Field ID order and uniqueness
 
 For objects, field IDs and offsets must be listed in the order of the corresponding field names, sorted lexicographically. Note that the fields themselves are not required to follow this order. As a result, offsets will not necessarily be listed in ascending order.
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java b/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java
index a705daaf323b2..a6fc6b534ee02 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/Variant.java
@@ -113,6 +113,11 @@ public String getString() {
     return VariantUtil.getString(value, pos);
   }
 
+  // Get the type info bits from a variant value.
+  public int getTypeInfo() {
+    return VariantUtil.getTypeInfo(value, pos);
+  }
+
   // Get the value type of the variant.
   public Type getType() {
     return VariantUtil.getType(value, pos);
@@ -188,6 +193,18 @@ public ObjectField getFieldAtIndex(int index) {
     });
   }
 
+  // Get the dictionary ID for the object field at the `index` slot. Throws malformedVariant if
+  // `index` is out of the bound of `[0, objectSize())`.
+  // It is only legal to call it when `getType()` is `Type.OBJECT`.
+  public int getDictionaryIdAtIndex(int index) {
+    return handleObject(value, pos, (size, idSize, offsetSize, idStart, offsetStart, dataStart) -> {
+      if (index < 0 || index >= size) {
+        throw malformedVariant();
+      }
+      return readUnsigned(value, idStart + idSize * index, idSize);
+    });
+  }
+
   // Get the number of array elements in the variant.
   // It is only legal to call it when `getType()` is `Type.ARRAY`.
   public int arraySize() {
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
index 2afba81d192e9..32595baf6a4f2 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantBuilder.java
@@ -26,10 +26,7 @@
 import java.math.BigDecimal;
 import java.math.BigInteger;
 import java.nio.charset.StandardCharsets;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashMap;
+import java.util.*;
 
 import com.fasterxml.jackson.core.JsonFactory;
 import com.fasterxml.jackson.core.JsonParser;
@@ -43,24 +40,29 @@
  * Build variant value and metadata by parsing JSON values.
  */
 public class VariantBuilder {
+  public VariantBuilder(boolean allowDuplicateKeys) {
+    this.allowDuplicateKeys = allowDuplicateKeys;
+  }
+
   /**
    * Parse a JSON string as a Variant value.
    * @throws VariantSizeLimitException if the resulting variant value or metadata would exceed
    * the SIZE_LIMIT (for example, this could be a maximum of 16 MiB).
    * @throws IOException if any JSON parsing error happens.
    */
-  public static Variant parseJson(String json) throws IOException {
+  public static Variant parseJson(String json, boolean allowDuplicateKeys) throws IOException {
     try (JsonParser parser = new JsonFactory().createParser(json)) {
       parser.nextToken();
-      return parseJson(parser);
+      return parseJson(parser, allowDuplicateKeys);
     }
   }
 
   /**
-   * Similar {@link #parseJson(String)}, but takes a JSON parser instead of string input.
+   * Similar {@link #parseJson(String, boolean)}, but takes a JSON parser instead of string input.
    */
-  public static Variant parseJson(JsonParser parser) throws IOException {
-    VariantBuilder builder = new VariantBuilder();
+  public static Variant parseJson(JsonParser parser, boolean allowDuplicateKeys)
+      throws IOException {
+    VariantBuilder builder = new VariantBuilder(allowDuplicateKeys);
     builder.buildJson(parser);
     return builder.result();
   }
@@ -105,6 +107,14 @@ public Variant result() {
     return new Variant(Arrays.copyOfRange(writeBuffer, 0, writePos), metadata);
   }
 
+  // Return the variant value only, without metadata.
+  // Used in shredding to produce a final value, where all shredded values refer to a common
+  // metadata. It is expected to be called instead of `result()`, although it is valid to call both
+  // methods, in any order.
+  public byte[] valueWithoutMetadata() {
+    return Arrays.copyOfRange(writeBuffer, 0, writePos);
+  }
+
   public void appendString(String str) {
     byte[] text = str.getBytes(StandardCharsets.UTF_8);
     boolean longStr = text.length > MAX_SHORT_STR_SIZE;
@@ -258,23 +268,63 @@ public int getWritePos() {
   // record the offset of the field. The offset is computed as `getWritePos() - start`.
   // 3. The caller calls `finishWritingObject` to finish writing a variant object.
   //
-  // This function is responsible to sort the fields by key and check for any duplicate field keys.
+  // This function is responsible to sort the fields by key. If there are duplicate field keys:
+  // - when `allowDuplicateKeys` is true, the field with the greatest offset value (the last
+  // appended one) is kept.
+  // - otherwise, throw an exception.
   public void finishWritingObject(int start, ArrayList<FieldEntry> fields) {
-    int dataSize = writePos - start;
     int size = fields.size();
     Collections.sort(fields);
     int maxId = size == 0 ? 0 : fields.get(0).id;
-    // Check for duplicate field keys. Only need to check adjacent key because they are sorted.
-    for (int i = 1; i < size; ++i) {
-      maxId = Math.max(maxId, fields.get(i).id);
-      String key = fields.get(i).key;
-      if (key.equals(fields.get(i - 1).key)) {
-        @SuppressWarnings("unchecked")
-        Map<String, String> parameters = Map$.MODULE$.<String, String>empty().updated("key", key);
-        throw new SparkRuntimeException("VARIANT_DUPLICATE_KEY", parameters,
-            null, new QueryContext[]{}, "");
+    if (allowDuplicateKeys) {
+      int distinctPos = 0;
+      // Maintain a list of distinct keys in-place.
+      for (int i = 1; i < size; ++i) {
+        maxId = Math.max(maxId, fields.get(i).id);
+        if (fields.get(i).id == fields.get(i - 1).id) {
+          // Found a duplicate key. Keep the field with a greater offset.
+          if (fields.get(distinctPos).offset < fields.get(i).offset) {
+            fields.set(distinctPos, fields.get(distinctPos).withNewOffset(fields.get(i).offset));
+          }
+        } else {
+          // Found a distinct key. Add the field to the list.
+          ++distinctPos;
+          fields.set(distinctPos, fields.get(i));
+        }
+      }
+      if (distinctPos + 1 < fields.size()) {
+        size = distinctPos + 1;
+        // Resize `fields` to `size`.
+        fields.subList(size, fields.size()).clear();
+        // Sort the fields by offsets so that we can move the value data of each field to the new
+        // offset without overwriting the fields after it.
+        fields.sort(Comparator.comparingInt(f -> f.offset));
+        int currentOffset = 0;
+        for (int i = 0; i < size; ++i) {
+          int oldOffset = fields.get(i).offset;
+          int fieldSize = VariantUtil.valueSize(writeBuffer, start + oldOffset);
+          System.arraycopy(writeBuffer, start + oldOffset,
+              writeBuffer, start + currentOffset, fieldSize);
+          fields.set(i, fields.get(i).withNewOffset(currentOffset));
+          currentOffset += fieldSize;
+        }
+        writePos = start + currentOffset;
+        // Change back to the sort order by field keys to meet the variant spec.
+        Collections.sort(fields);
+      }
+    } else {
+      for (int i = 1; i < size; ++i) {
+        maxId = Math.max(maxId, fields.get(i).id);
+        String key = fields.get(i).key;
+        if (key.equals(fields.get(i - 1).key)) {
+          @SuppressWarnings("unchecked")
+          Map<String, String> parameters = Map$.MODULE$.<String, String>empty().updated("key", key);
+          throw new SparkRuntimeException("VARIANT_DUPLICATE_KEY", parameters,
+              null, new QueryContext[]{}, "");
+        }
       }
     }
+    int dataSize = writePos - start;
     boolean largeSize = size > U8_MAX;
     int sizeBytes = largeSize ? U32_SIZE : 1;
     int idSize = getIntegerSize(maxId);
@@ -362,15 +412,26 @@ private void appendVariantImpl(byte[] value, byte[] metadata, int pos) {
         });
         break;
       default:
-        int size = valueSize(value, pos);
-        checkIndex(pos + size - 1, value.length);
-        checkCapacity(size);
-        System.arraycopy(value, pos, writeBuffer, writePos, size);
-        writePos += size;
+        shallowAppendVariantImpl(value, pos);
         break;
     }
   }
 
+  // Append the variant value without rewriting or creating any metadata. This is used when
+  // building an object during shredding, where there is a fixed pre-existing metadata that
+  // all shredded values will refer to.
+  public void shallowAppendVariant(Variant v) {
+    shallowAppendVariantImpl(v.value, v.pos);
+  }
+
+  private void shallowAppendVariantImpl(byte[] value, int pos) {
+    int size = valueSize(value, pos);
+    checkIndex(pos + size - 1, value.length);
+    checkCapacity(size);
+    System.arraycopy(value, pos, writeBuffer, writePos, size);
+    writePos += size;
+  }
+
   private void checkCapacity(int additional) {
     int required = writePos + additional;
     if (required > writeBuffer.length) {
@@ -399,6 +460,10 @@ public FieldEntry(String key, int id, int offset) {
       this.offset = offset;
     }
 
+    FieldEntry withNewOffset(int newOffset) {
+      return new FieldEntry(key, id, newOffset);
+    }
+
     @Override
     public int compareTo(FieldEntry other) {
       return key.compareTo(other.key);
@@ -502,4 +567,5 @@ private boolean tryParseDecimal(String input) {
   private final HashMap<String, Integer> dictionary = new HashMap<>();
   // Store all keys in `dictionary` in the order of id.
   private final ArrayList<byte[]> dictionaryKeys = new ArrayList<>();
+  private final boolean allowDuplicateKeys;
 }
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java
new file mode 100644
index 0000000000000..551e46214859a
--- /dev/null
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantSchema.java
@@ -0,0 +1,153 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.types.variant;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Defines a valid shredding schema, as described in
+ * https://github.com/apache/parquet-format/blob/master/VariantShredding.md.
+ * A shredding schema contains a value and optional typed_value field.
+ * If a typed_value is an array or struct, it recursively contain its own shredding schema for
+ * elements and fields, respectively.
+ * The schema also contains a metadata field at the top level, but not in recursively shredded
+ * fields.
+ */
+public class VariantSchema {
+
+  // Represents one field of an object in the shredding schema.
+  public static final class ObjectField {
+    public final String fieldName;
+    public final VariantSchema schema;
+
+    public ObjectField(String fieldName, VariantSchema schema) {
+      this.fieldName = fieldName;
+      this.schema = schema;
+    }
+
+    @Override
+    public String toString() {
+      return "ObjectField{" +
+          "fieldName=" + fieldName +
+          ", schema=" + schema +
+          '}';
+    }
+  }
+
+  public abstract static class ScalarType {
+  }
+
+  public static final class StringType extends ScalarType {
+  }
+
+  public enum IntegralSize {
+    BYTE, SHORT, INT, LONG
+  }
+
+  public static final class IntegralType extends ScalarType {
+    public final IntegralSize size;
+
+    public IntegralType(IntegralSize size) {
+      this.size = size;
+    }
+  }
+
+  public static final class FloatType extends ScalarType {
+  }
+
+  public static final class DoubleType extends ScalarType {
+  }
+
+  public static final class BooleanType extends ScalarType {
+  }
+
+  public static final class BinaryType extends ScalarType {
+  }
+
+  public static final class DecimalType extends ScalarType {
+    public final int precision;
+    public final int scale;
+
+    public DecimalType(int precision, int scale) {
+      this.precision = precision;
+      this.scale = scale;
+    }
+  }
+
+  public static final class DateType extends ScalarType {
+  }
+
+  public static final class TimestampType extends ScalarType {
+  }
+
+  public static final class TimestampNTZType extends ScalarType {
+  }
+
+  // The index of the typed_value, value, and metadata fields in the schema, respectively. If a
+  // given field is not in the schema, its value must be set to -1 to indicate that it is invalid.
+  // The indices of valid fields should be contiguous and start from 0.
+  public final int typedIdx;
+  public final int variantIdx;
+  // topLevelMetadataIdx must be non-negative in the top-level schema, and -1 at all other nesting
+  // levels.
+  public final int topLevelMetadataIdx;
+  // The number of fields in the schema. I.e. a value between 1 and 3, depending on which of value,
+  // typed_value and metadata are present.
+  public final int numFields;
+
+  public final ScalarType scalarSchema;
+  public final ObjectField[] objectSchema;
+  // Map for fast lookup of object fields by name. The values are an index into `objectSchema`.
+  public final Map<String, Integer> objectSchemaMap;
+  public final VariantSchema arraySchema;
+
+  public VariantSchema(int typedIdx, int variantIdx, int topLevelMetadataIdx, int numFields,
+                       ScalarType scalarSchema, ObjectField[] objectSchema,
+                       VariantSchema arraySchema) {
+    this.typedIdx = typedIdx;
+    this.numFields = numFields;
+    this.variantIdx = variantIdx;
+    this.topLevelMetadataIdx = topLevelMetadataIdx;
+    this.scalarSchema = scalarSchema;
+    this.objectSchema = objectSchema;
+    if (objectSchema != null) {
+      objectSchemaMap = new HashMap<>();
+      for (int i = 0; i < objectSchema.length; i++) {
+        objectSchemaMap.put(objectSchema[i].fieldName, i);
+      }
+    } else {
+      objectSchemaMap = null;
+    }
+
+    this.arraySchema = arraySchema;
+  }
+
+  @Override
+  public String toString() {
+    return "VariantSchema{" +
+        "typedIdx=" + typedIdx +
+        ", variantIdx=" + variantIdx +
+        ", topLevelMetadataIdx=" + topLevelMetadataIdx +
+        ", numFields=" + numFields +
+        ", scalarSchema=" + scalarSchema +
+        ", objectSchema=" + objectSchema +
+        ", arraySchema=" + arraySchema +
+        '}';
+  }
+}
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java
new file mode 100644
index 0000000000000..b5f8ea0a1484b
--- /dev/null
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantShreddingWriter.java
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.types.variant;
+
+import java.math.BigDecimal;
+import java.math.RoundingMode;
+import java.util.ArrayList;
+
+/**
+ * Class to implement shredding a Variant value.
+ */
+public class VariantShreddingWriter {
+
+  // Interface to build up a shredded result. Callers should implement a ShreddedResultBuilder to
+  // create an empty result with a given schema. The castShredded method will call one or more of
+  // the add* methods to populate it.
+  public interface ShreddedResult {
+    // Create an array. The elements are the result of shredding each element.
+    void addArray(ShreddedResult[] array);
+    // Create an object. The values are the result of shredding each field, order by the index in
+    // objectSchema. Missing fields are populated with an empty result.
+    void addObject(ShreddedResult[] values);
+    void addVariantValue(byte[] result);
+    // Add a scalar to typed_value. The type of Object depends on the scalarSchema in the shredding
+    // schema.
+    void addScalar(Object result);
+    void addMetadata(byte[] result);
+  }
+
+  public interface ShreddedResultBuilder {
+    ShreddedResult createEmpty(VariantSchema schema);
+
+    // If true, we will shred decimals to a different scale or to integers, as long as they are
+    // numerically equivalent. Similarly, integers will be allowed to shred to decimals.
+    boolean allowNumericScaleChanges();
+  }
+
+  /**
+   * Converts an input variant into shredded components. Returns the shredded result, as well
+   * as the original Variant with shredded fields removed.
+   * `dataType` must be a valid shredding schema, as described in
+   * https://github.com/apache/parquet-format/blob/master/VariantShredding.md.
+   */
+  public static ShreddedResult castShredded(
+          Variant v,
+          VariantSchema schema,
+          ShreddedResultBuilder builder) {
+    VariantUtil.Type variantType = v.getType();
+    ShreddedResult result = builder.createEmpty(schema);
+
+    if (schema.topLevelMetadataIdx >= 0) {
+      result.addMetadata(v.getMetadata());
+    }
+
+    if (schema.arraySchema != null && variantType == VariantUtil.Type.ARRAY) {
+      // The array element is always a struct containing untyped and typed fields.
+      VariantSchema elementSchema = schema.arraySchema;
+      int size = v.arraySize();
+      ShreddedResult[] array = new ShreddedResult[size];
+      for (int i = 0; i < size; ++i) {
+        ShreddedResult shreddedArray = castShredded(v.getElementAtIndex(i), elementSchema, builder);
+        array[i] = shreddedArray;
+      }
+      result.addArray(array);
+    } else if (schema.objectSchema != null && variantType == VariantUtil.Type.OBJECT) {
+      VariantSchema.ObjectField[] objectSchema = schema.objectSchema;
+      ShreddedResult[] shreddedValues = new ShreddedResult[objectSchema.length];
+
+      // Create a variantBuilder for any field that exist in `v`, but not in the shredding schema.
+      VariantBuilder variantBuilder = new VariantBuilder(false);
+      ArrayList<VariantBuilder.FieldEntry> fieldEntries = new ArrayList<>();
+      // Keep track of which schema fields we actually found in the Variant value.
+      int numFieldsMatched = 0;
+      int start = variantBuilder.getWritePos();
+      for (int i = 0; i < v.objectSize(); ++i) {
+        Variant.ObjectField field = v.getFieldAtIndex(i);
+        Integer fieldIdx = schema.objectSchemaMap.get(field.key);
+        if (fieldIdx != null) {
+          // The field exists in the shredding schema. Recursively shred, and write the result.
+          ShreddedResult shreddedField = castShredded(
+              field.value, objectSchema[fieldIdx].schema, builder);
+          shreddedValues[fieldIdx] = shreddedField;
+          numFieldsMatched++;
+        } else {
+          // The field is not shredded. Put it in the untyped_value column.
+          int id = v.getDictionaryIdAtIndex(i);
+          fieldEntries.add(new VariantBuilder.FieldEntry(
+              field.key, id, variantBuilder.getWritePos() - start));
+          variantBuilder.appendVariant(field.value);
+        }
+      }
+      if (numFieldsMatched < objectSchema.length) {
+        // Set missing fields to non-null with all fields set to null.
+        for (int i = 0; i < objectSchema.length; ++i) {
+          if (shreddedValues[i] == null) {
+            VariantSchema.ObjectField fieldSchema = objectSchema[i];
+            ShreddedResult emptyChild = builder.createEmpty(fieldSchema.schema);
+            shreddedValues[i] = emptyChild;
+            numFieldsMatched += 1;
+          }
+        }
+      }
+      if (numFieldsMatched != objectSchema.length) {
+        // Since we just filled in all the null entries, this can only happen if we tried to write
+        // to the same field twice; i.e. the Variant contained duplicate fields, which is invalid.
+        throw VariantUtil.malformedVariant();
+      }
+      result.addObject(shreddedValues);
+      if (variantBuilder.getWritePos() != start) {
+        // We added something to the untyped value.
+        variantBuilder.finishWritingObject(start, fieldEntries);
+        result.addVariantValue(variantBuilder.valueWithoutMetadata());
+      }
+    } else if (schema.scalarSchema != null) {
+      VariantSchema.ScalarType scalarType = schema.scalarSchema;
+      Object typedValue = tryTypedShred(v, variantType, scalarType, builder);
+      if (typedValue != null) {
+        // Store the typed value.
+        result.addScalar(typedValue);
+      } else {
+        VariantBuilder variantBuilder = new VariantBuilder(false);
+        variantBuilder.appendVariant(v);
+        result.addVariantValue(v.getValue());
+      }
+    } else {
+      // Store in untyped.
+      result.addVariantValue(v.getValue());
+    }
+    return result;
+  }
+
+  /**
+   * Tries to cast a Variant into a typed value. If the cast fails, returns null.
+   *
+   * @param v
+   * @param variantType The Variant Type of v
+   * @param targetType The target type
+   * @return The scalar value, or null if the cast is not valid.
+   */
+  private static Object tryTypedShred(
+          Variant v,
+          VariantUtil.Type variantType,
+          VariantSchema.ScalarType targetType,
+          ShreddedResultBuilder builder) {
+    switch (variantType) {
+      case LONG:
+        if (targetType instanceof VariantSchema.IntegralType integralType) {
+          // Check that the target type can hold the actual value.
+          VariantSchema.IntegralSize size = integralType.size;
+          long value = v.getLong();
+          switch (size) {
+            case BYTE:
+              if (value == (byte) value) {
+                  return (byte) value;
+              }
+              break;
+            case SHORT:
+              if (value == (short) value) {
+                  return (short) value;
+              }
+              break;
+            case INT:
+              if (value == (int) value) {
+                  return (int) value;
+              }
+              break;
+            case LONG:
+              return value;
+          }
+        } else if (targetType instanceof VariantSchema.DecimalType decimalType &&
+                   builder.allowNumericScaleChanges()) {
+          // If the integer can fit in the given decimal precision, allow it.
+          long value = v.getLong();
+          // Set to the requested scale, and check if the precision is large enough.
+          BigDecimal decimalValue = BigDecimal.valueOf(value);
+          BigDecimal scaledValue = decimalValue.setScale(decimalType.scale);
+          // The initial value should have scale 0, so rescaling shouldn't lose information.
+          assert(decimalValue.compareTo(scaledValue) == 0);
+          if (scaledValue.precision() <= decimalType.precision) {
+            return scaledValue;
+          }
+        }
+        break;
+      case DECIMAL:
+        if (targetType instanceof VariantSchema.DecimalType decimalType) {
+          // Use getDecimalWithOriginalScale so that we retain scale information if
+          // allowNumericScaleChanges() is false.
+          BigDecimal value = VariantUtil.getDecimalWithOriginalScale(v.value, v.pos);
+          if (value.precision() <= decimalType.precision &&
+              value.scale() == decimalType.scale) {
+            return value;
+          }
+          if (builder.allowNumericScaleChanges()) {
+            // Convert to the target scale, and see if it fits. Rounding mode doesn't matter,
+            // since we'll reject it if it turned out to require rounding.
+            BigDecimal scaledValue = value.setScale(decimalType.scale, RoundingMode.FLOOR);
+            if (scaledValue.compareTo(value) == 0 &&
+                    scaledValue.precision() <= decimalType.precision) {
+              return scaledValue;
+            }
+          }
+        } else if (targetType instanceof VariantSchema.IntegralType integralType &&
+          builder.allowNumericScaleChanges()) {
+          // Check if the decimal happens to be an integer.
+          BigDecimal value = v.getDecimal();
+          VariantSchema.IntegralSize size = integralType.size;
+          // Try to cast to the appropriate type, and check if any information is lost.
+          switch (size) {
+            case BYTE:
+              if (value.compareTo(BigDecimal.valueOf(value.byteValue())) == 0) {
+                return value.byteValue();
+              }
+              break;
+            case SHORT:
+              if (value.compareTo(BigDecimal.valueOf(value.shortValue())) == 0) {
+                return value.shortValue();
+              }
+              break;
+            case INT:
+              if (value.compareTo(BigDecimal.valueOf(value.intValue())) == 0) {
+                return value.intValue();
+              }
+              break;
+            case LONG:
+              if (value.compareTo(BigDecimal.valueOf(value.longValue())) == 0) {
+                return value.longValue();
+              }
+          }
+        }
+        break;
+      case BOOLEAN:
+        if (targetType instanceof VariantSchema.BooleanType) {
+          return v.getBoolean();
+        }
+        break;
+      case STRING:
+        if (targetType instanceof VariantSchema.StringType) {
+          return v.getString();
+        }
+        break;
+      case DOUBLE:
+        if (targetType instanceof VariantSchema.DoubleType) {
+          return v.getDouble();
+        }
+        break;
+      case DATE:
+        if (targetType instanceof VariantSchema.DateType) {
+          return (int) v.getLong();
+        }
+        break;
+      case TIMESTAMP:
+        if (targetType instanceof VariantSchema.TimestampType) {
+          return v.getLong();
+        }
+        break;
+      case TIMESTAMP_NTZ:
+        if (targetType instanceof VariantSchema.TimestampNTZType) {
+          return v.getLong();
+        }
+        break;
+      case FLOAT:
+        if (targetType instanceof VariantSchema.FloatType) {
+          return v.getFloat();
+        }
+        break;
+      case BINARY:
+        if (targetType instanceof VariantSchema.BinaryType) {
+          return v.getBinary();
+        }
+        break;
+    }
+    // The stored type does not match the requested shredding type. Return null, and the caller
+    // will store the result in untyped_value.
+    return null;
+  }
+
+  // Add the result to the shredding result.
+  private static void addVariantValueVariant(Variant variantResult,
+      VariantSchema schema, ShreddedResult result) {
+    result.addVariantValue(variantResult.getValue());
+  }
+
+}
diff --git a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
index 84e3a45e4b0ee..86609eef5d908 100644
--- a/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
+++ b/common/variant/src/main/java/org/apache/spark/types/variant/VariantUtil.java
@@ -171,6 +171,12 @@ static SparkRuntimeException malformedVariant() {
         Map$.MODULE$.<String, String>empty(), null, new QueryContext[]{}, "");
   }
 
+  static SparkRuntimeException unknownPrimitiveTypeInVariant(int id) {
+    return new SparkRuntimeException("UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT",
+            new scala.collection.immutable.Map.Map1<>("id", Integer.toString(id)), null,
+            new QueryContext[]{}, "");
+  }
+
   // An exception indicating that an external caller tried to call the Variant constructor with
   // value or metadata exceeding the 16MiB size limit. We will never construct a Variant this large,
   // so it should only be possible to encounter this exception when reading a Variant produced by
@@ -235,6 +241,11 @@ public enum Type {
     BINARY,
   }
 
+  public static int getTypeInfo(byte[] value, int pos) {
+    checkIndex(pos, value.length);
+    return (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
+  }
+
   // Get the value type of variant value `value[pos...]`. It is only legal to call `get*` if
   // `getType` returns this type (for example, it is only legal to call `getLong` if `getType`
   // returns `Type.Long`).
@@ -281,7 +292,7 @@ public static Type getType(byte[] value, int pos) {
           case LONG_STR:
             return Type.STRING;
           default:
-            throw malformedVariant();
+            throw unknownPrimitiveTypeInVariant(typeInfo);
         }
     }
   }
@@ -332,7 +343,7 @@ public static int valueSize(byte[] value, int pos) {
           case LONG_STR:
             return 1 + U32_SIZE + readUnsigned(value, pos + 1, U32_SIZE);
           default:
-            throw malformedVariant();
+            throw unknownPrimitiveTypeInVariant(typeInfo);
         }
     }
   }
@@ -356,8 +367,9 @@ public static boolean getBoolean(byte[] value, int pos) {
   // Get a long value from variant value `value[pos...]`.
   // It is only legal to call it if `getType` returns one of `Type.LONG/DATE/TIMESTAMP/
   // TIMESTAMP_NTZ`. If the type is `DATE`, the return value is guaranteed to fit into an int and
-  // represents the number of days from the Unix epoch. If the type is `TIMESTAMP/TIMESTAMP_NTZ`,
-  // the return value represents the number of microseconds from the Unix epoch.
+  // represents the number of days from the Unix epoch.
+  // If the type is `TIMESTAMP/TIMESTAMP_NTZ`, the return value represents the number of
+  // microseconds from the Unix epoch.
   // Throw `MALFORMED_VARIANT` if the variant is malformed.
   public static long getLong(byte[] value, int pos) {
     checkIndex(pos, value.length);
@@ -401,7 +413,7 @@ private static void checkDecimal(BigDecimal d, int maxPrecision) {
 
   // Get a decimal value from variant value `value[pos...]`.
   // Throw `MALFORMED_VARIANT` if the variant is malformed.
-  public static BigDecimal getDecimal(byte[] value, int pos) {
+  public static BigDecimal getDecimalWithOriginalScale(byte[] value, int pos) {
     checkIndex(pos, value.length);
     int basicType = value[pos] & BASIC_TYPE_MASK;
     int typeInfo = (value[pos] >> BASIC_TYPE_BITS) & TYPE_INFO_MASK;
@@ -433,7 +445,11 @@ public static BigDecimal getDecimal(byte[] value, int pos) {
       default:
         throw unexpectedType(Type.DECIMAL);
     }
-    return result.stripTrailingZeros();
+    return result;
+  }
+
+  public static BigDecimal getDecimal(byte[] value, int pos) {
+    return getDecimalWithOriginalScale(value, pos).stripTrailingZeros();
   }
 
   // Get a float value from variant value `value[pos...]`.
diff --git a/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain b/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain
deleted file mode 100644
index 1ef91ef8c36ac..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [from_avro(bytes#0, {"type": "int", "name": "id"}, (mode,FAILFAST), (compression,zstandard)) AS from_avro(bytes)#0]
-+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain b/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain
deleted file mode 100644
index 6eb4805b4fcc4..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [from_protobuf(bytes#0, StorageLevel, Some([B)) AS from_protobuf(bytes)#0]
-+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain b/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain
deleted file mode 100644
index c4a47b1aef07b..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [from_protobuf(bytes#0, StorageLevel, Some([B), (recursive.fields.max.depth,2)) AS from_protobuf(bytes)#0]
-+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain b/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
deleted file mode 100644
index 31fe84066f8c7..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [aggregate(e#0, 0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false), lambdafunction(lambda x#0, lambda x#0, false)) AS aggregate(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain b/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
deleted file mode 100644
index a78195c4ae295..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [filter(e#0, lambdafunction(isnotnull(lambda arg#0), lambda arg#0, false)) AS array_compact(e)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain b/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
deleted file mode 100644
index cd86bcc5ffdf5..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [array_sort(e#0, lambdafunction((lambda x#0 - lambda y#0), lambda x#0, lambda y#0, false), false) AS array_sort(e, lambdafunction((namedlambdavariable() - namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain b/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
deleted file mode 100644
index 1fab4ccb3a86a..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [exists(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS exists(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain b/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
deleted file mode 100644
index a92b212666c05..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [filter(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS filter(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain b/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
deleted file mode 100644
index 63ab17bd1e55e..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [filter(e#0, lambdafunction(((lambda x#0 > 10) AND (lambda y#0 > 2)), lambda x#0, lambda y#0, false)) AS filter(e, lambdafunction(((namedlambdavariable() > 10) AND (namedlambdavariable() > 2)), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain b/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
deleted file mode 100644
index e69389808a457..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [forall(e#0, lambdafunction((lambda x#0 > 10), lambda x#0, false)) AS forall(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain b/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain
deleted file mode 100644
index 50ab91560e64a..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [json_array_length(g#0) AS json_array_length(g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain b/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
deleted file mode 100644
index 4e2502d0c988d..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [map_filter(f#0, lambdafunction(Contains(lambda x#0, baz), lambda x#0, lambda y#0, false)) AS map_filter(f, lambdafunction(contains(namedlambdavariable(), baz), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain b/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
deleted file mode 100644
index 737900bef096d..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [map_from_entries(transform(e#0, lambdafunction(struct(y, lambda y#0, x, lambda x#0), lambda x#0, lambda y#0, false))) AS map_from_entries(transform(e, lambdafunction(struct(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable())))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain b/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
deleted file mode 100644
index 2c053fa655853..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [map_zip_with(f#0, f#0, lambdafunction((lambda y#0.id + lambda z#0.id), lambda x#0, lambda y#0, lambda z#0, false)) AS map_zip_with(f, f, lambdafunction((namedlambdavariable().id + namedlambdavariable().id), namedlambdavariable(), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain b/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
deleted file mode 100644
index 3c874b5c8b6a4..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [parse_url(g#0, g#0, false) AS parse_url(g, g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain b/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
deleted file mode 100644
index eba1c5c814fe3..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [parse_url(g#0, g#0, g#0, false) AS parse_url(g, g, g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain b/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain
deleted file mode 100644
index d25e5a306e217..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [reduce(e#0, 0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false), lambdafunction(lambda x#0, lambda x#0, false)) AS reduce(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain b/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
deleted file mode 100644
index ecd181a4292de..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [schema_of_csv(1|abc, (sep,|)) AS schema_of_csv(1|abc)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain b/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
deleted file mode 100644
index 8ec799bc58084..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [schema_of_json([{"col":01}]) AS schema_of_json([{"col":01}])#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain b/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
deleted file mode 100644
index 13867949177a4..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [schema_of_json([{"col":01}], (allowNumericLeadingZeros,true)) AS schema_of_json([{"col":01}])#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain b/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain
deleted file mode 100644
index 7819f9b542340..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_locale.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [sentences(g#0, en, US) AS sentences(g, en, US)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain b/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
deleted file mode 100644
index cd72b12ee19b6..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [to_json((timestampFormat,dd/MM/yyyy), d#0, Some(America/Los_Angeles)) AS to_json(d)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain b/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
deleted file mode 100644
index 1eb446551f130..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [transform(e#0, lambdafunction((lambda x#0 + 1), lambda x#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + 1), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain b/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
deleted file mode 100644
index aae92957bcd0d..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [transform_keys(f#0, lambdafunction(concat(lambda x#0, cast(lambda y#0.id as string)), lambda x#0, lambda y#0, false)) AS transform_keys(f, lambdafunction(concat(namedlambdavariable(), namedlambdavariable().id), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain b/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
deleted file mode 100644
index 3837ff0b78f02..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [transform_values(f#0, lambdafunction(update_fields(lambda y#0, WithField(key, lambda x#0)), lambda x#0, lambda y#0, false)) AS transform_values(f, lambdafunction(update_fields(namedlambdavariable(), WithField(namedlambdavariable())), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain b/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
deleted file mode 100644
index 99c7733b1f734..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [transform(e#0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain b/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain
deleted file mode 100644
index 74b360a6b5f38..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [tryeval(static_invoke(UrlCodec.decode(g#0, UTF-8))) AS try_url_decode(g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain b/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
deleted file mode 100644
index 6111cc1374fb6..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [static_invoke(UrlCodec.decode(g#0, UTF-8)) AS url_decode(g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain b/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
deleted file mode 100644
index 871842d41ba4f..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [static_invoke(UrlCodec.encode(g#0, UTF-8)) AS url_encode(g)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
deleted file mode 100644
index d9e2e55d9b12e..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath(s#0, a/b/text()) AS xpath(s, a/b/text())#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
deleted file mode 100644
index 9b75f81802467..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_boolean(s#0, a/b) AS xpath_boolean(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
deleted file mode 100644
index 9ce47136df242..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_double(s#0, a/b) AS xpath_double(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
deleted file mode 100644
index 02b29ec4afa9c..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_float(s#0, a/b) AS xpath_float(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
deleted file mode 100644
index cdd56eaa73199..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_int(s#0, a/b) AS xpath_int(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
deleted file mode 100644
index 3acefb13d0f8c..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_long(s#0, a/b) AS xpath_long(s, a/b)#0L]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
deleted file mode 100644
index 0a30685f0c6d2..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_number(s#0, a/b) AS xpath_number(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
deleted file mode 100644
index ed440972bf490..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_short(s#0, a/b) AS xpath_short(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain b/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
deleted file mode 100644
index f4103f68c3bc3..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [xpath_string(s#0, a/b) AS xpath_string(s, a/b)#0]
-+- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain b/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
deleted file mode 100644
index 53c9298360735..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [zip_with(e#0, e#0, lambdafunction((lambda x#0 + lambda y#0), lambda x#0, lambda y#0, false)) AS zip_with(e, e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain b/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
deleted file mode 100644
index f61fc30a3a529..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain b/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
deleted file mode 100644
index b5742d976dee9..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Expand [[a#0, id, id#0L]], [a#0, #0, value#0L]
-+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain b/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
deleted file mode 100644
index 1698c562732e8..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [to_json(struct(id, id#0L, a, a#0, b, b#0, d, d#0, e, e#0, f, f#0, g, g#0), Some(America/Los_Angeles)) AS to_json(struct(id, a, b, d, e, f, g))#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain b/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain
deleted file mode 100644
index a5371c70ac78a..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [to_avro(id#0L, None) AS to_avro(id)#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain b/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain
deleted file mode 100644
index 7c688cc446947..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [to_protobuf(bytes#0, StorageLevel, Some([B)) AS to_protobuf(bytes)#0]
-+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain b/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain
deleted file mode 100644
index 9f05bb03c9c6d..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Project [to_protobuf(bytes#0, StorageLevel, Some([B), (recursive.fields.max.depth,2)) AS to_protobuf(bytes)#0]
-+- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain b/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
deleted file mode 100644
index f61fc30a3a529..0000000000000
--- a/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
+++ /dev/null
@@ -1,2 +0,0 @@
-Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, #0, value#0]
-+- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin b/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin
deleted file mode 100644
index d46e40b39dcfe..0000000000000
Binary files a/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin and /dev/null differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin b/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin
deleted file mode 100644
index 651274b1afcac..0000000000000
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin and /dev/null differ
diff --git a/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin b/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin
deleted file mode 100644
index 64dbb597c3650..0000000000000
Binary files a/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin and /dev/null differ
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
deleted file mode 100644
index 4ef4f632204b3..0000000000000
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.connect.execution
-
-import scala.concurrent.{ExecutionContext, Promise}
-import scala.jdk.CollectionConverters._
-import scala.util.Try
-import scala.util.control.NonFatal
-
-import com.google.protobuf.Message
-import org.apache.commons.lang3.StringUtils
-
-import org.apache.spark.SparkSQLException
-import org.apache.spark.connect.proto
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.connect.common.ProtoUtils
-import org.apache.spark.sql.connect.planner.SparkConnectPlanner
-import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteSessionTag, SparkConnectService}
-import org.apache.spark.sql.connect.utils.ErrorUtils
-import org.apache.spark.util.{ThreadUtils, Utils}
-
-/**
- * This class launches the actual execution in an execution thread. The execution pushes the
- * responses to a ExecuteResponseObserver in executeHolder.
- */
-private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends Logging {
-
-  private val promise: Promise[Unit] = Promise[Unit]()
-
-  // The newly created thread will inherit all InheritableThreadLocals used by Spark,
-  // e.g. SparkContext.localProperties. If considering implementing a thread-pool,
-  // forwarding of thread locals needs to be taken into account.
-  private val executionThread: ExecutionThread = new ExecutionThread(promise)
-
-  private var started: Boolean = false
-
-  private var interrupted: Boolean = false
-
-  private var completed: Boolean = false
-
-  private val lock = new Object
-
-  /** Launches the execution in a background thread, returns immediately. */
-  private[connect] def start(): Unit = {
-    lock.synchronized {
-      assert(!started)
-      // Do not start if already interrupted.
-      if (!interrupted) {
-        executionThread.start()
-        started = true
-      }
-    }
-  }
-
-  /**
-   * Register a callback that gets executed after completion/interruption of the execution thread.
-   */
-  private[connect] def processOnCompletion(callback: Try[Unit] => Unit): Unit = {
-    promise.future.onComplete(callback)(ExecuteThreadRunner.namedExecutionContext)
-  }
-
-  /**
-   * Interrupt the executing thread.
-   * @return
-   *   true if it was not interrupted before, false if it was already interrupted or completed.
-   */
-  private[connect] def interrupt(): Boolean = {
-    lock.synchronized {
-      if (!started && !interrupted) {
-        // execution thread hasn't started yet, and will not be started.
-        // handle the interrupted error here directly.
-        interrupted = true
-        ErrorUtils.handleError(
-          "execute",
-          executeHolder.responseObserver,
-          executeHolder.sessionHolder.userId,
-          executeHolder.sessionHolder.sessionId,
-          Some(executeHolder.eventsManager),
-          interrupted)(new SparkSQLException("OPERATION_CANCELED", Map.empty))
-        true
-      } else if (!interrupted && !completed) {
-        // checking completed prevents sending interrupt onError after onCompleted
-        interrupted = true
-        executionThread.interrupt()
-        true
-      } else {
-        false
-      }
-    }
-  }
-
-  private def execute(): Unit = {
-    // Outer execute handles errors.
-    // Separate it from executeInternal to save on indent and improve readability.
-    try {
-      try {
-        executeInternal()
-      } catch {
-        // Need to catch throwable instead of NonFatal, because e.g. InterruptedException is fatal.
-        case e: Throwable =>
-          logDebug(s"Exception in execute: $e")
-          // Always cancel all remaining execution after error.
-          executeHolder.sessionHolder.session.sparkContext.cancelJobsWithTag(executeHolder.jobTag)
-          // Rely on an internal interrupted flag, because Thread.interrupted() could be cleared,
-          // and different exceptions like InterruptedException, ClosedByInterruptException etc.
-          // could be thrown.
-          if (interrupted) {
-            throw new SparkSQLException("OPERATION_CANCELED", Map.empty)
-          } else {
-            // Rethrown the original error.
-            throw e
-          }
-      } finally {
-        executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag)
-        SparkConnectService.executionListener.foreach(_.removeJobTag(executeHolder.jobTag))
-        executeHolder.sparkSessionTags.foreach { tag =>
-          executeHolder.sessionHolder.session.sparkContext.removeJobTag(
-            ExecuteSessionTag(
-              executeHolder.sessionHolder.userId,
-              executeHolder.sessionHolder.sessionId,
-              tag))
-        }
-      }
-    } catch {
-      ErrorUtils.handleError(
-        "execute",
-        executeHolder.responseObserver,
-        executeHolder.sessionHolder.userId,
-        executeHolder.sessionHolder.sessionId,
-        Some(executeHolder.eventsManager),
-        interrupted)
-    }
-  }
-
-  // Inner executeInternal is wrapped by execute() for error handling.
-  private def executeInternal() = {
-    // synchronized - check if already got interrupted while starting.
-    lock.synchronized {
-      if (interrupted) {
-        throw new InterruptedException()
-      }
-    }
-
-    // `withSession` ensures that session-specific artifacts (such as JARs and class files) are
-    // available during processing.
-    executeHolder.sessionHolder.withSession { session =>
-      val debugString = requestString(executeHolder.request)
-
-      // Set tag for query cancellation
-      session.sparkContext.addJobTag(executeHolder.jobTag)
-      // Register the job for progress reports.
-      SparkConnectService.executionListener.foreach(_.registerJobTag(executeHolder.jobTag))
-      // Also set all user defined tags as Spark Job tags.
-      executeHolder.sparkSessionTags.foreach { tag =>
-        session.sparkContext.addJobTag(
-          ExecuteSessionTag(
-            executeHolder.sessionHolder.userId,
-            executeHolder.sessionHolder.sessionId,
-            tag))
-      }
-      session.sparkContext.setJobDescription(
-        s"Spark Connect - ${StringUtils.abbreviate(debugString, 128)}")
-      session.sparkContext.setInterruptOnCancel(true)
-
-      // Add debug information to the query execution so that the jobs are traceable.
-      session.sparkContext.setLocalProperty(
-        "callSite.short",
-        s"Spark Connect - ${StringUtils.abbreviate(debugString, 128)}")
-      session.sparkContext.setLocalProperty(
-        "callSite.long",
-        StringUtils.abbreviate(debugString, 2048))
-
-      executeHolder.request.getPlan.getOpTypeCase match {
-        case proto.Plan.OpTypeCase.COMMAND => handleCommand(executeHolder.request)
-        case proto.Plan.OpTypeCase.ROOT => handlePlan(executeHolder.request)
-        case _ =>
-          throw new UnsupportedOperationException(
-            s"${executeHolder.request.getPlan.getOpTypeCase} not supported.")
-      }
-
-      val observedMetrics: Map[String, Seq[(Option[String], Any)]] = {
-        executeHolder.observations.map { case (name, observation) =>
-          val values = observation.getOrEmpty.map { case (key, value) =>
-            (Some(key), value)
-          }.toSeq
-          name -> values
-        }.toMap
-      }
-      val accumulatedInPython: Map[String, Seq[(Option[String], Any)]] = {
-        executeHolder.sessionHolder.pythonAccumulator.flatMap { accumulator =>
-          accumulator.synchronized {
-            val value = accumulator.value.asScala.toSeq
-            if (value.nonEmpty) {
-              accumulator.reset()
-              Some("__python_accumulator__" -> value.map(value => (None, value)))
-            } else {
-              None
-            }
-          }
-        }.toMap
-      }
-      if (observedMetrics.nonEmpty || accumulatedInPython.nonEmpty) {
-        executeHolder.responseObserver.onNext(
-          SparkConnectPlanExecution
-            .createObservedMetricsResponse(
-              executeHolder.sessionHolder.sessionId,
-              executeHolder.sessionHolder.serverSessionId,
-              executeHolder.request.getPlan.getRoot.getCommon.getPlanId,
-              observedMetrics ++ accumulatedInPython))
-      }
-
-      lock.synchronized {
-        // Synchronized before sending ResultComplete, and up until completing the result stream
-        // to prevent a situation in which a client of reattachable execution receives
-        // ResultComplete, and proceeds to send ReleaseExecute, and that triggers an interrupt
-        // before it finishes.
-
-        if (interrupted) {
-          // check if it got interrupted at the very last moment
-          throw new InterruptedException()
-        }
-        completed = true // no longer interruptible
-
-        if (executeHolder.reattachable) {
-          // Reattachable execution sends a ResultComplete at the end of the stream
-          // to signal that there isn't more coming.
-          executeHolder.responseObserver.onNextComplete(createResultComplete())
-        } else {
-          executeHolder.responseObserver.onCompleted()
-        }
-      }
-    }
-  }
-
-  private def handlePlan(request: proto.ExecutePlanRequest): Unit = {
-    val responseObserver = executeHolder.responseObserver
-
-    val execution = new SparkConnectPlanExecution(executeHolder)
-    execution.handlePlan(responseObserver)
-  }
-
-  private def handleCommand(request: proto.ExecutePlanRequest): Unit = {
-    val responseObserver = executeHolder.responseObserver
-
-    val command = request.getPlan.getCommand
-    val planner = new SparkConnectPlanner(executeHolder)
-    planner.process(command = command, responseObserver = responseObserver)
-  }
-
-  private def requestString(request: Message) = {
-    try {
-      Utils.redact(
-        executeHolder.sessionHolder.session.sessionState.conf.stringRedactionPattern,
-        ProtoUtils.abbreviate(request).toString)
-    } catch {
-      case NonFatal(e) =>
-        logWarning("Fail to extract debug information", e)
-        "UNKNOWN"
-    }
-  }
-
-  private def createResultComplete(): proto.ExecutePlanResponse = {
-    // Send the Spark data type
-    proto.ExecutePlanResponse
-      .newBuilder()
-      .setResultComplete(proto.ExecutePlanResponse.ResultComplete.newBuilder().build())
-      .build()
-  }
-
-  private class ExecutionThread(onCompletionPromise: Promise[Unit])
-      extends Thread(s"SparkConnectExecuteThread_opId=${executeHolder.operationId}") {
-    override def run(): Unit = {
-      try {
-        execute()
-        onCompletionPromise.success(())
-      } catch {
-        case NonFatal(e) =>
-          onCompletionPromise.failure(e)
-      }
-    }
-  }
-}
-
-private[connect] object ExecuteThreadRunner {
-  private implicit val namedExecutionContext: ExecutionContext = ExecutionContext
-    .fromExecutor(ThreadUtils.newDaemonSingleThreadExecutor("SparkConnectExecuteThreadCallback"))
-}
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
index e0d9f9b90121f..0f4579f5da24f 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-jdk21-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2124           2129           8          7.4         135.0       1.0X
+Sum                                                2061           2066           7          7.6         131.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2124           2129           7          7.4         135.1       1.0X
+Sum                                                2032           2033           2          7.7         129.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2082           2096          19          7.6         132.4       1.0X
+Sum                                                1992           2005          19          7.9         126.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2079           2091          17          7.6         132.2       1.0X
+Sum                                                1992           2017          35          7.9         126.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2070           2078          11          7.6         131.6       1.0X
+Sum                                                1981           1981           0          7.9         125.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                2005           2023          25          7.8         127.5       1.0X
+Sum                                                1981           1984           4          7.9         126.0       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3598           3606          12          2.9         343.1       1.0X
+Sum of columns                                     3863           3867           5          2.7         368.4       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        2081           2094          19          7.6         132.3       1.0X
-Partition column                                   1913           1917           5          8.2         121.7       1.1X
-Both columns                                       2141           2171          43          7.3         136.1       1.0X
+Data column                                        2129           2143          20          7.4         135.4       1.0X
+Partition column                                   1984           1986           2          7.9         126.1       1.1X
+Both columns                                       2209           2231          31          7.1         140.4       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2299           2337          53          4.6         219.3       1.0X
+Sum of string length                               2511           2564          75          4.2         239.5       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3094           3118          33          3.4         295.1       1.0X
+Sum of string length                               3303           3317          20          3.2         315.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2162           2213          72          4.9         206.2       1.0X
+Sum of string length                               2221           2252          44          4.7         211.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1205           1210           7          8.7         114.9       1.0X
+Sum of string length                               1170           1174           6          9.0         111.6       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             17970          18066         135          0.0       35940.5       1.0X
+Select of all columns                             19095          19150          78          0.0       38190.4       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               3222           3242          29          0.3        3072.7       1.0X
+Sum of single column                               3188           3192           4          0.3        3040.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               6336           6343           9          0.2        6043.0       1.0X
+Sum of single column                               6229           6254          35          0.2        5940.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               9410           9463          75          0.1        8974.0       1.0X
+Sum of single column                               9279           9318          56          0.1        8848.8       1.0X
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5535           5544          10          0.2        5535.0       1.0X
-pushdown disabled                                  5450           5479          29          0.2        5450.1       1.0X
-w/ filters                                         2335           2340           9          0.4        2334.5       2.4X
+w/o filters                                        5538           5544           7          0.2        5537.5       1.0X
+pushdown disabled                                  5546           5571          24          0.2        5546.5       1.0X
+w/ filters                                         2312           2324          18          0.4        2312.4       2.4X
 
diff --git a/connector/avro/benchmarks/AvroReadBenchmark-results.txt b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
index f1065f98b81a2..db6193e67ac39 100644
--- a/connector/avro/benchmarks/AvroReadBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroReadBenchmark-results.txt
@@ -2,140 +2,140 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1986           2030          63          7.9         126.3       1.0X
+Sum                                                1959           1993          47          8.0         124.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1983           2021          54          7.9         126.1       1.0X
+Sum                                                1929           1949          28          8.2         122.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1955           1977          30          8.0         124.3       1.0X
+Sum                                                1965           1974          13          8.0         124.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1939           1958          26          8.1         123.3       1.0X
+Sum                                                1904           1918          20          8.3         121.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1961           1963           3          8.0         124.7       1.0X
+Sum                                                1916           1934          26          8.2         121.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum                                                1944           1946           3          8.1         123.6       1.0X
+Sum                                                1911           1917           8          8.2         121.5       1.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of columns                                     3345           3376          44          3.1         319.0       1.0X
+Sum of columns                                     3426           3450          34          3.1         326.8       1.0X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column                                        2006           2022          22          7.8         127.5       1.0X
-Partition column                                   1761           1765           5          8.9         112.0       1.1X
-Both columns                                       2054           2068          20          7.7         130.6       1.0X
+Data column                                        1871           1888          24          8.4         118.9       1.0X
+Partition column                                   1713           1720           9          9.2         108.9       1.1X
+Both columns                                       1962           1970          12          8.0         124.7       1.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2002           2024          31          5.2         191.0       1.0X
+Sum of string length                               2042           2055          18          5.1         194.7       1.0X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               3103           3141          54          3.4         295.9       1.0X
+Sum of string length                               3089           3109          28          3.4         294.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               2056           2064          11          5.1         196.1       1.0X
+Sum of string length                               2103           2104           2          5.0         200.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of string length                               1084           1086           3          9.7         103.3       1.0X
+Sum of string length                               1073           1079          10          9.8         102.3       1.0X
 
 
 ================================================================================================
 Select All From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Wide Column Scan from 1000 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select of all columns                             19331          19457         177          0.0       38662.8       1.0X
+Select of all columns                             18300          18346          64          0.0       36600.5       1.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               3178           3191          18          0.3        3030.7       1.0X
+Sum of single column                               3144           3148           5          0.3        2998.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               6288           6406         167          0.2        5996.4       1.0X
+Sum of single column                               6216           6229          19          0.2        5927.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Sum of single column                               9478           9487          12          0.1        9039.2       1.0X
+Sum of single column                               9286           9585         423          0.1        8855.6       1.0X
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5488           5511          22          0.2        5488.5       1.0X
-pushdown disabled                                  5495           5510          15          0.2        5494.9       1.0X
-w/ filters                                         2218           2232          12          0.5        2218.1       2.5X
+w/o filters                                        5308           5326          23          0.2        5307.5       1.0X
+pushdown disabled                                  5253           5288          33          0.2        5252.7       1.0X
+w/ filters                                         2036           2061          24          0.5        2036.3       2.6X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
index f49e7db17093e..a071bc767cfaa 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1589           1616          39          9.9         101.0       1.0X
-Output Single Double Column                        1522           1536          20         10.3          96.8       1.0X
-Output Int and String Column                       3264           3266           2          4.8         207.5       0.5X
-Output Partitions                                  3054           3094          57          5.1         194.2       0.5X
-Output Buckets                                     4024           4078          76          3.9         255.9       0.4X
+Output Single Int Column                           1566           1588          30         10.0          99.6       1.0X
+Output Single Double Column                        1655           1668          18          9.5         105.3       0.9X
+Output Int and String Column                       3493           3496           5          4.5         222.1       0.4X
+Output Partitions                                  3062           3112          71          5.1         194.7       0.5X
+Output Buckets                                     3937           3952          20          4.0         250.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           115682         115702          29          0.0     1156821.8       1.0X
-DEFLATE:                                           6294           6309          20          0.0       62944.7      18.4X
-UNCOMPRESSED:                                      5130           5148          26          0.0       51301.8      22.5X
-SNAPPY:                                            4611           4643          45          0.0       46106.1      25.1X
-XZ:                                               64308          64406         139          0.0      643084.7       1.8X
-ZSTANDARD:                                         4651           4687          51          0.0       46509.7      24.9X
+BZIP2:                                           115765         115975         297          0.0     1157649.1       1.0X
+DEFLATE:                                           6345           6370          35          0.0       63448.5      18.2X
+UNCOMPRESSED:                                      5183           5184           1          0.0       51827.4      22.3X
+SNAPPY:                                            4611           4614           3          0.0       46112.5      25.1X
+XZ:                                               54096          57854        5315          0.0      540956.3       2.1X
+ZSTANDARD:                                         4877           4888          15          0.0       48770.9      23.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           4666           4669           5          0.0       46656.5       1.0X
-DEFLATE: deflate.level=3                           4646           4648           3          0.0       46463.0       1.0X
-DEFLATE: deflate.level=5                           6223           6230          11          0.0       62226.1       0.7X
-DEFLATE: deflate.level=7                           6272           6282          15          0.0       62715.3       0.7X
-DEFLATE: deflate.level=9                           6628           6635           9          0.0       66283.6       0.7X
+DEFLATE: deflate.level=1                           4807           4847          57          0.0       48065.8       1.0X
+DEFLATE: deflate.level=3                           4803           4809           7          0.0       48033.5       1.0X
+DEFLATE: deflate.level=5                           6373           6389          22          0.0       63728.7       0.8X
+DEFLATE: deflate.level=7                           6427           6460          47          0.0       64266.6       0.7X
+DEFLATE: deflate.level=9                           6628           6634          10          0.0       66277.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    12512          12545          46          0.0      125121.6       1.0X
-XZ: xz.level=3                                    23744          23832         124          0.0      237441.6       0.5X
-XZ: xz.level=5                                    48209          50241        2874          0.0      482091.5       0.3X
-XZ: xz.level=7                                    69424          69655         327          0.0      694240.2       0.2X
-XZ: xz.level=9                                   142278         142354         108          0.0     1422778.3       0.1X
+XZ: xz.level=1                                    11974          12000          37          0.0      119736.9       1.0X
+XZ: xz.level=3                                    21671          21813         201          0.0      216709.0       0.6X
+XZ: xz.level=5                                    47055          47335         397          0.0      470545.5       0.3X
+XZ: xz.level=7                                    74766          75069         428          0.0      747658.3       0.2X
+XZ: xz.level=9                                   146478         146490          16          0.0     1464783.7       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4669           4670           2          0.0       46688.6       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4689           4701          16          0.0       46893.1       1.0X
-ZSTANDARD: zstandard.level=3                                              4805           4819          20          0.0       48048.3       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4667           4670           4          0.0       46666.8       1.0X
-ZSTANDARD: zstandard.level=5                                              4985           5014          41          0.0       49852.2       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           4950           4991          59          0.0       49499.4       0.9X
-ZSTANDARD: zstandard.level=7                                              5282           5291          13          0.0       52820.2       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5221           5260          55          0.0       52208.0       0.9X
-ZSTANDARD: zstandard.level=9                                              5997           6034          52          0.0       59974.4       0.8X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           5888           5949          85          0.0       58885.0       0.8X
+ZSTANDARD: zstandard.level=1                                              4760           4797          53          0.0       47598.3       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4666           4696          43          0.0       46656.0       1.0X
+ZSTANDARD: zstandard.level=3                                              4845           4869          33          0.0       48452.8       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4790           4801          16          0.0       47896.5       1.0X
+ZSTANDARD: zstandard.level=5                                              5125           5164          55          0.0       51248.6       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           4912           4928          22          0.0       49122.5       1.0X
+ZSTANDARD: zstandard.level=7                                              5319           5333          19          0.0       53192.1       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5250           5284          48          0.0       52501.2       0.9X
+ZSTANDARD: zstandard.level=9                                              6087           6087           0          0.0       60869.7       0.8X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6219           6234          21          0.0       62191.3       0.8X
 
diff --git a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
index 658b9ad7851d4..1a605c0ea0e90 100644
--- a/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
+++ b/connector/avro/benchmarks/AvroWriteBenchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1566           1615          69         10.0          99.6       1.0X
-Output Single Double Column                        1718           1720           3          9.2         109.2       0.9X
-Output Int and String Column                       3250           3250           0          4.8         206.6       0.5X
-Output Partitions                                  2869           2870           0          5.5         182.4       0.5X
-Output Buckets                                     3655           3660           7          4.3         232.4       0.4X
+Output Single Int Column                           1544           1567          34         10.2          98.1       1.0X
+Output Single Double Column                        1635           1647          17          9.6         104.0       0.9X
+Output Int and String Column                       3324           3334          15          4.7         211.3       0.5X
+Output Partitions                                  2961           3047         122          5.3         188.2       0.5X
+Output Buckets                                     3776           3778           3          4.2         240.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro compression with different codec:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-BZIP2:                                           131005         132600        2255          0.0     1310049.5       1.0X
-DEFLATE:                                           6673           6696          34          0.0       66725.6      19.6X
-UNCOMPRESSED:                                      5469           5506          51          0.0       54692.2      24.0X
-SNAPPY:                                            4970           5003          47          0.0       49696.0      26.4X
-XZ:                                               55374          55620         347          0.0      553743.6       2.4X
-ZSTANDARD:                                         4998           5044          64          0.0       49984.1      26.2X
+BZIP2:                                           130388         131379        1402          0.0     1303881.3       1.0X
+DEFLATE:                                           6523           6538          21          0.0       65227.6      20.0X
+UNCOMPRESSED:                                      5394           5425          43          0.0       53944.9      24.2X
+SNAPPY:                                            4813           4816           3          0.0       48134.6      27.1X
+XZ:                                               54364          54382          26          0.0      543640.7       2.4X
+ZSTANDARD:                                         4864           4873          13          0.0       48635.9      26.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro deflate with different levels:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-DEFLATE: deflate.level=1                           4996           5017          30          0.0       49961.8       1.0X
-DEFLATE: deflate.level=3                           5013           5026          18          0.0       50129.7       1.0X
-DEFLATE: deflate.level=5                           6557           6574          23          0.0       65574.0       0.8X
-DEFLATE: deflate.level=7                           6593           6624          44          0.0       65929.1       0.8X
-DEFLATE: deflate.level=9                           6973           6983          14          0.0       69725.4       0.7X
+DEFLATE: deflate.level=1                           4909           4916           9          0.0       49091.1       1.0X
+DEFLATE: deflate.level=3                           4874           4903          42          0.0       48735.8       1.0X
+DEFLATE: deflate.level=5                           6460           6473          19          0.0       64601.7       0.8X
+DEFLATE: deflate.level=7                           6450           6482          46          0.0       64497.5       0.8X
+DEFLATE: deflate.level=9                           6875           6878           5          0.0       68745.4       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro xz with different levels:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XZ: xz.level=1                                    12335          12414         113          0.0      123346.1       1.0X
-XZ: xz.level=3                                    22830          22901         101          0.0      228298.8       0.5X
-XZ: xz.level=5                                    47861          48099         336          0.0      478610.6       0.3X
-XZ: xz.level=7                                    71299          71967         944          0.0      712993.0       0.2X
-XZ: xz.level=9                                   159311         159585         388          0.0     1593106.7       0.1X
+XZ: xz.level=1                                    12210          12226          22          0.0      122101.7       1.0X
+XZ: xz.level=3                                    22235          22235           0          0.0      222346.3       0.5X
+XZ: xz.level=5                                    47597          47659          88          0.0      475969.7       0.3X
+XZ: xz.level=7                                    69231          69482         356          0.0      692308.3       0.2X
+XZ: xz.level=9                                   147042         148998        2766          0.0     1470415.9       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Avro zstandard with different levels:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ZSTANDARD: zstandard.level=1                                              4917           4951          48          0.0       49169.8       1.0X
-ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4885           4904          26          0.0       48848.3       1.0X
-ZSTANDARD: zstandard.level=3                                              5045           5051           9          0.0       50448.8       1.0X
-ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4926           4931           7          0.0       49258.9       1.0X
-ZSTANDARD: zstandard.level=5                                              5366           5437         101          0.0       53656.6       0.9X
-ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5261           5305          62          0.0       52610.6       0.9X
-ZSTANDARD: zstandard.level=7                                              5673           5680           9          0.0       56731.6       0.9X
-ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5592           5615          33          0.0       55917.0       0.9X
-ZSTANDARD: zstandard.level=9                                              6662           6663           2          0.0       66620.2       0.7X
-ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6759           6760           1          0.0       67591.8       0.7X
+ZSTANDARD: zstandard.level=1                                              4750           4817          94          0.0       47504.2       1.0X
+ZSTANDARD: zstandard.level=1, zstandard.bufferPool.enabled=true           4753           4802          69          0.0       47532.8       1.0X
+ZSTANDARD: zstandard.level=3                                              4920           4924           6          0.0       49198.5       1.0X
+ZSTANDARD: zstandard.level=3, zstandard.bufferPool.enabled=true           4792           4799           9          0.0       47921.8       1.0X
+ZSTANDARD: zstandard.level=5                                              5240           5276          51          0.0       52404.0       0.9X
+ZSTANDARD: zstandard.level=5, zstandard.bufferPool.enabled=true           5072           5101          41          0.0       50722.5       0.9X
+ZSTANDARD: zstandard.level=7                                              5542           5591          69          0.0       55416.5       0.9X
+ZSTANDARD: zstandard.level=7, zstandard.bufferPool.enabled=true           5605           5617          17          0.0       56050.4       0.8X
+ZSTANDARD: zstandard.level=9                                              6311           6403         130          0.0       63109.5       0.8X
+ZSTANDARD: zstandard.level=9, zstandard.bufferPool.enabled=true           6324           6331          10          0.0       63236.4       0.8X
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
index 7d80998d96eb1..62c185a91d85c 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
@@ -24,10 +24,10 @@ import org.apache.avro.generic.GenericDatumReader
 import org.apache.avro.io.{BinaryDecoder, DecoderFactory}
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression, SpecificInternalRow, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
 import org.apache.spark.sql.catalyst.util.{FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 
 private[sql] case class AvroDataToCatalyst(
@@ -42,7 +42,8 @@ private[sql] case class AvroDataToCatalyst(
     val dt = SchemaConverters.toSqlType(
       expectedSchema,
       avroOptions.useStableIdForUnionType,
-      avroOptions.stableIdPrefixForUnionType).dataType
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth).dataType
     parseMode match {
       // With PermissiveMode, the output Catalyst row might contain columns of null values for
       // corrupt records, even if some of the columns are not nullable in the user-provided schema.
@@ -69,7 +70,8 @@ private[sql] case class AvroDataToCatalyst(
       dataType,
       avroOptions.datetimeRebaseModeInRead,
       avroOptions.useStableIdForUnionType,
-      avroOptions.stableIdPrefixForUnionType)
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth)
 
   @transient private var decoder: BinaryDecoder = _
 
@@ -78,12 +80,9 @@ private[sql] case class AvroDataToCatalyst(
   @transient private lazy val parseMode: ParseMode = {
     val mode = avroOptions.parseMode
     if (mode != PermissiveMode && mode != FailFastMode) {
-      throw new AnalysisException(
-        errorClass = "_LEGACY_ERROR_TEMP_3085",
-        messageParameters = Map(
-          "name" -> mode.name,
-          "permissiveMode" -> PermissiveMode.name,
-          "failFastMode" -> FailFastMode.name))
+      throw QueryCompilationErrors.parseModeUnsupportedError(
+        prettyName, mode
+      )
     }
     mode
   }
@@ -91,7 +90,7 @@ private[sql] case class AvroDataToCatalyst(
   @transient private lazy val nullResultRow: Any = dataType match {
       case st: StructType =>
         val resultRow = new SpecificInternalRow(st.map(_.dataType))
-        for(i <- 0 until st.length) {
+        for (i <- 0 until st.length) {
           resultRow.setNullAt(i)
         }
         resultRow
@@ -121,12 +120,9 @@ private[sql] case class AvroDataToCatalyst(
             s"Current parse Mode: ${FailFastMode.name}. To process malformed records as null " +
             "result, try setting the option 'mode' as 'PERMISSIVE'.", e)
         case _ =>
-          throw new AnalysisException(
-            errorClass = "_LEGACY_ERROR_TEMP_3085",
-            messageParameters = Map(
-              "name" -> parseMode.name,
-              "permissiveMode" -> PermissiveMode.name,
-              "failFastMode" -> FailFastMode.name))
+          throw QueryCompilationErrors.parseModeUnsupportedError(
+            prettyName, parseMode
+          )
       }
     }
   }
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
deleted file mode 100755
index 5830b2ec42383..0000000000000
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.avro
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.Column
-
-
-// scalastyle:off: object.name
-object functions {
-// scalastyle:on: object.name
-
-  /**
-   * Converts a binary column of avro format into its corresponding catalyst value. The specified
-   * schema must match the read data, otherwise the behavior is undefined: it may fail or return
-   * arbitrary result.
-   *
-   * @param data the binary column.
-   * @param jsonFormatSchema the avro schema in JSON string format.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def from_avro(
-      data: Column,
-      jsonFormatSchema: String): Column = {
-    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, Map.empty))
-  }
-
-  /**
-   * Converts a binary column of Avro format into its corresponding catalyst value.
-   * The specified schema must match actual schema of the read data, otherwise the behavior
-   * is undefined: it may fail or return arbitrary result.
-   * To deserialize the data with a compatible and evolved schema, the expected Avro schema can be
-   * set via the option avroSchema.
-   *
-   * @param data the binary column.
-   * @param jsonFormatSchema the avro schema in JSON string format.
-   * @param options options to control how the Avro record is parsed.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def from_avro(
-      data: Column,
-      jsonFormatSchema: String,
-      options: java.util.Map[String, String]): Column = {
-    new Column(AvroDataToCatalyst(data.expr, jsonFormatSchema, options.asScala.toMap))
-  }
-
-  /**
-   * Converts a column into binary of avro format.
-   *
-   * @param data the data column.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def to_avro(data: Column): Column = {
-    new Column(CatalystDataToAvro(data.expr, None))
-  }
-
-  /**
-   * Converts a column into binary of avro format.
-   *
-   * @param data the data column.
-   * @param jsonFormatSchema user-specified output avro schema in JSON string format.
-   *
-   * @since 3.0.0
-   */
-  @Experimental
-  def to_avro(data: Column, jsonFormatSchema: String): Column = {
-    new Column(CatalystDataToAvro(data.expr, Some(jsonFormatSchema)))
-  }
-}
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
index 1083c99160724..a13faf3b51560 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroPartitionReaderFactory.scala
@@ -105,7 +105,8 @@ case class AvroPartitionReaderFactory(
           datetimeRebaseMode,
           avroFilters,
           options.useStableIdForUnionType,
-          options.stableIdPrefixForUnionType)
+          options.stableIdPrefixForUnionType,
+          options.recursiveFieldMaxDepth)
         override val stopPosition = partitionedFile.start + partitionedFile.length
 
         override def next(): Boolean = hasNextRow
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
index fe61fe3db8786..e898253be1168 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/v2/avro/AvroTable.scala
@@ -37,15 +37,17 @@ case class AvroTable(
     fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
   override def newScanBuilder(options: CaseInsensitiveStringMap): AvroScanBuilder =
-    new AvroScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    AvroScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     AvroUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = AvroWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        AvroWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = AvroUtils.supportsDataType(dataType)
 
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
index 388347537a4d6..311eda3a1b6ae 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala
@@ -291,7 +291,8 @@ class AvroCatalystDataConversionSuite extends SparkFunSuite
       RebaseSpec(LegacyBehaviorPolicy.CORRECTED),
       filters,
       false,
-      "")
+      "",
+      -1)
     val deserialized = deserializer.deserialize(data)
     expected match {
       case None => assert(deserialized == None)
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
index 256b608feaa1f..0db9d284c4512 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroCodecSuite.scala
@@ -54,7 +54,7 @@ class AvroCodecSuite extends FileSourceCodecSuite {
           s"""CREATE TABLE avro_t
              |USING $format OPTIONS('compression'='unsupported')
              |AS SELECT 1 as id""".stripMargin)),
-      errorClass = "CODEC_SHORT_NAME_NOT_FOUND",
+      condition = "CODEC_SHORT_NAME_NOT_FOUND",
       sqlState = Some("42704"),
       parameters = Map("codecName" -> "unsupported")
     )
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index c807685db0f0c..096cdfe0b9ee4 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -22,16 +22,18 @@ import java.io.ByteArrayOutputStream
 import scala.jdk.CollectionConverters._
 
 import org.apache.avro.{Schema, SchemaBuilder}
-import org.apache.avro.generic.{GenericDatumWriter, GenericRecord, GenericRecordBuilder}
-import org.apache.avro.io.EncoderFactory
+import org.apache.avro.file.SeekableByteArrayInput
+import org.apache.avro.generic.{GenericDatumReader, GenericDatumWriter, GenericRecord, GenericRecordBuilder}
+import org.apache.avro.io.{DecoderFactory, EncoderFactory}
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.avro.{functions => Fns}
 import org.apache.spark.sql.execution.LocalTableScanExec
 import org.apache.spark.sql.functions.{col, lit, struct}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{BinaryType, IntegerType, StructField, StructType}
 
 class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
@@ -104,6 +106,17 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
        functions.from_avro(
           $"avro", avroTypeStruct, Map("mode" -> "PERMISSIVE").asJava)),
       expected)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        avroStructDF.select(
+          functions.from_avro(
+            $"avro", avroTypeStruct, Map("mode" -> "DROPMALFORMED").asJava)).collect()
+      },
+      condition = "PARSE_MODE_UNSUPPORTED",
+      parameters = Map(
+        "funcName" -> "`from_avro`",
+        "mode" -> "DROPMALFORMED"))
   }
 
   test("roundtrip in to_avro and from_avro - array with null") {
@@ -316,6 +329,10 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
         spark.sql(s"select from_avro(result, '$jsonFormatSchema', map()).u from ($toAvroSql)"),
         Seq(Row(Row(1, null)),
           Row(Row(null, "a"))))
+      // The 'jsonFormatSchema' argument of the 'to_avro' function is optional.
+      checkAnswer(
+        spark.sql(s"select length(to_avro(s)) > 0 from t"),
+        Seq(Row(true), Row(true)))
 
       // Negative tests.
       checkError(
@@ -323,8 +340,8 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
           s"""
              |select to_avro(s, 42) as result from t
              |""".stripMargin)),
-        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-        parameters = Map("sqlExpr" -> "\"toavro(s, 42)\"",
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map("sqlExpr" -> "\"to_avro(s, 42)\"",
           "msg" -> ("The second argument of the TO_AVRO SQL function must be a constant string " +
             "containing the JSON representation of the schema to use for converting the value to " +
             "AVRO format"),
@@ -338,8 +355,8 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
           s"""
              |select from_avro(s, 42, '') as result from t
              |""".stripMargin)),
-        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-        parameters = Map("sqlExpr" -> "\"fromavro(s, 42, )\"",
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map("sqlExpr" -> "\"from_avro(s, 42, )\"",
           "msg" -> ("The second argument of the FROM_AVRO SQL function must be a constant string " +
             "containing the JSON representation of the schema to use for converting the value " +
             "from AVRO format"),
@@ -353,10 +370,10 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
           s"""
              |select from_avro(s, '$jsonFormatSchema', 42) as result from t
              |""".stripMargin)),
-        errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
         parameters = Map(
           "sqlExpr" ->
-            s"\"fromavro(s, $jsonFormatSchema, 42)\"".stripMargin,
+            s"\"from_avro(s, $jsonFormatSchema, 42)\"".stripMargin,
           "msg" -> ("The third argument of the FROM_AVRO SQL function must be a constant map of " +
             "strings to strings containing the options to use for converting the value " +
             "from AVRO format"),
@@ -367,4 +384,249 @@ class AvroFunctionsSuite extends QueryTest with SharedSparkSession {
           stop = 138)))
     }
   }
+
+
+  test("roundtrip in to_avro and from_avro - recursive schema") {
+    val catalystSchema =
+      StructType(Seq(
+        StructField("Id", IntegerType),
+        StructField("Name", StructType(Seq(
+          StructField("Id", IntegerType),
+          StructField("Name", StructType(Seq(
+            StructField("Id", IntegerType)))))))))
+
+    val avroSchema = s"""
+                        |{
+                        |  "type" : "record",
+                        |  "name" : "test_schema",
+                        |  "fields" : [
+                        |    {"name": "Id", "type": "int"},
+                        |    {"name": "Name", "type": ["null", "test_schema"]}
+                        |  ]
+                        |}
+    """.stripMargin
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(2, Row(3, Row(4))), Row(1, null))),
+      catalystSchema).select(struct("Id", "Name").as("struct"))
+
+    val avroStructDF = df.select(functions.to_avro($"struct", avroSchema).as("avro"))
+    checkAnswer(avroStructDF.select(
+      functions.from_avro($"avro", avroSchema, Map(
+        "recursiveFieldMaxDepth" -> "3").asJava)), df)
+  }
+
+  private def serialize(record: GenericRecord, avroSchema: String): Array[Byte] = {
+    val schema = new Schema.Parser().parse(avroSchema)
+    val datumWriter = new GenericDatumWriter[GenericRecord](schema)
+    var outputStream: ByteArrayOutputStream = null
+    var bytes: Array[Byte] = null
+    try {
+      outputStream = new ByteArrayOutputStream()
+      val encoder = EncoderFactory.get.binaryEncoder(outputStream, null)
+      datumWriter.write(record, encoder)
+      encoder.flush()
+      bytes = outputStream.toByteArray
+    } finally {
+      if (outputStream != null) {
+        outputStream.close()
+      }
+    }
+    bytes
+  }
+
+  private def deserialize(bytes: Array[Byte], avroSchema: String): GenericRecord = {
+    val schema = new Schema.Parser().parse(avroSchema)
+    val datumReader = new GenericDatumReader[GenericRecord](schema)
+    var inputStream: SeekableByteArrayInput = null
+    var record: GenericRecord = null
+    try {
+      inputStream = new SeekableByteArrayInput(bytes)
+      val decoder = DecoderFactory.get.binaryDecoder(inputStream, null)
+      record = datumReader.read(null, decoder)
+    } finally {
+      if (inputStream != null) {
+        inputStream.close()
+      }
+    }
+    record
+  }
+
+  // write: `GenericRecord` -> binary (by `serialize`) -> dataframe
+  // read: dataframe -> binary -> `GenericRecord` (by `deserialize`)
+  test("roundtrip in serialize and deserialize - GenericRecord") {
+    val avroSchema =
+      """
+        |{
+        |  "type": "record",
+        |  "name": "person",
+        |  "fields": [
+        |    {"name": "name", "type": "string"},
+        |    {"name": "age", "type": "int"},
+        |    {"name": "country", "type": "string"}
+        |  ]
+        |}
+        |""".stripMargin
+    val testTable = "test_avro"
+    withTable(testTable) {
+      val schema = new Schema.Parser().parse(avroSchema)
+      val person1 = new GenericRecordBuilder(schema)
+        .set("name", "sparkA")
+        .set("age", 18)
+        .set("country", "usa")
+        .build()
+      val person2 = new GenericRecordBuilder(schema)
+        .set("name", "sparkB")
+        .set("age", 19)
+        .set("country", "usb")
+        .build()
+      Seq(person1, person2)
+        .map(p => serialize(p, avroSchema))
+        .toDF("data")
+        .repartition(1)
+        .writeTo(testTable)
+        .create()
+
+      val expectedSchema = new StructType().add("data", BinaryType)
+      assert(spark.table(testTable).schema === expectedSchema)
+
+      // Note that what is returned here is `Row[Array[Byte]]`
+      val avroDF = sql(s"SELECT data FROM $testTable")
+      val readbacks = avroDF
+        .collect()
+        .map(row => deserialize(row.get(0).asInstanceOf[Array[Byte]], avroSchema))
+
+      val readbackPerson1 = readbacks.head
+      assert(readbackPerson1.get(0).toString === person1.get(0))
+      assert(readbackPerson1.get(1).asInstanceOf[Int] === person1.get(1).asInstanceOf[Int])
+      assert(readbackPerson1.get(2).toString === person1.get(2))
+
+      val readbackPerson2 = readbacks(1)
+      assert(readbackPerson2.get(0).toString === person2.get(0))
+      assert(readbackPerson2.get(1).asInstanceOf[Int] === person2.get(1).asInstanceOf[Int])
+      assert(readbackPerson2.get(2).toString === person2.get(2))
+    }
+  }
+
+  // write: `GenericRecord` -> binary (by `serialize`) -> dataframe
+  // read: dataframe -> binary -> struct (by `from_avro`) -> `GenericRecord`
+  test("use `serialize` to write GenericRecord and `from_avro` to read GenericRecord") {
+    val avroSchema =
+      """
+        |{
+        |  "type": "record",
+        |  "name": "person",
+        |  "fields": [
+        |    {"name": "name", "type": "string"},
+        |    {"name": "age", "type": "int"},
+        |    {"name": "country", "type": "string"}
+        |  ]
+        |}
+        |""".stripMargin
+    val testTable = "test_avro"
+    withTable(testTable) {
+      val schema = new Schema.Parser().parse(avroSchema)
+      val person1 = new GenericRecordBuilder(schema)
+        .set("name", "sparkA")
+        .set("age", 18)
+        .set("country", "usa")
+        .build()
+      val person2 = new GenericRecordBuilder(schema)
+        .set("name", "sparkB")
+        .set("age", 19)
+        .set("country", "usb")
+        .build()
+      Seq(person1, person2)
+        .map(p => serialize(p, avroSchema))
+        .toDF("data")
+        .repartition(1)
+        .writeTo(testTable)
+        .create()
+
+      val expectedSchema = new StructType().add("data", BinaryType)
+      assert(spark.table(testTable).schema === expectedSchema)
+
+      // Note that what is returned here is `Row[Struct]`
+      val avroDF = sql(s"SELECT from_avro(data, '$avroSchema', map()) FROM $testTable")
+      val readbacks = avroDF
+        .collect()
+        .map(row =>
+          new GenericRecordBuilder(schema)
+            .set("name", row.getStruct(0).getString(0))
+            .set("age", row.getStruct(0).getInt(1))
+            .set("country", row.getStruct(0).getString(2))
+            .build())
+
+      val readbackPerson1 = readbacks.head
+      assert(readbackPerson1.get(0) === person1.get(0))
+      assert(readbackPerson1.get(1).asInstanceOf[Int] === person1.get(1).asInstanceOf[Int])
+      assert(readbackPerson1.get(2) === person1.get(2))
+
+      val readbackPerson2 = readbacks(1)
+      assert(readbackPerson2.get(0) === person2.get(0))
+      assert(readbackPerson2.get(1).asInstanceOf[Int] === person2.get(1).asInstanceOf[Int])
+      assert(readbackPerson2.get(2) === person2.get(2))
+    }
+  }
+
+  // write: `GenericRecord` (to `struct`) -> binary (by `to_avro`) -> dataframe
+  // read: dataframe -> binary -> `GenericRecord` (by `deserialize`)
+  test("use `to_avro` to write GenericRecord and `deserialize` to read GenericRecord") {
+    val avroSchema =
+      """
+        |{
+        |  "type": "record",
+        |  "name": "person",
+        |  "fields": [
+        |    {"name": "name", "type": "string"},
+        |    {"name": "age", "type": "int"},
+        |    {"name": "country", "type": "string"}
+        |  ]
+        |}
+        |""".stripMargin
+    val testTable = "test_avro"
+    withTable(testTable) {
+      val schema = new Schema.Parser().parse(avroSchema)
+      val person1 = new GenericRecordBuilder(schema)
+        .set("name", "sparkA")
+        .set("age", 18)
+        .set("country", "usa")
+        .build()
+      val person2 = new GenericRecordBuilder(schema)
+        .set("name", "sparkB")
+        .set("age", 19)
+        .set("country", "usb")
+        .build()
+      Seq(person1, person2)
+        .map(p => (
+          p.get(0).asInstanceOf[String],
+          p.get(1).asInstanceOf[Int],
+          p.get(2).asInstanceOf[String]))
+        .toDF("name", "age", "country")
+        .select(Fns.to_avro(struct($"name", $"age", $"country"), avroSchema).as("data"))
+        .repartition(1)
+        .writeTo(testTable)
+        .create()
+
+      val expectedSchema = new StructType().add("data", BinaryType)
+      assert(spark.table(testTable).schema === expectedSchema)
+
+      // Note that what is returned here is `Row[Array[Byte]]`
+      val avroDF = sql(s"select data from $testTable")
+      val readbacks = avroDF
+        .collect()
+        .map(row => row.get(0).asInstanceOf[Array[Byte]])
+        .map(bytes => deserialize(bytes, avroSchema))
+
+      val readbackPerson1 = readbacks.head
+      assert(readbackPerson1.get(0).toString === person1.get(0))
+      assert(readbackPerson1.get(1).asInstanceOf[Int] === person1.get(1).asInstanceOf[Int])
+      assert(readbackPerson1.get(2).toString === person1.get(2))
+
+      val readbackPerson2 = readbacks(1)
+      assert(readbackPerson2.get(0).toString === person2.get(0))
+      assert(readbackPerson2.get(1).asInstanceOf[Int] === person2.get(1).asInstanceOf[Int])
+      assert(readbackPerson2.get(2).toString === person2.get(2))
+    }
+  }
 }
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
index 429f3c0deca6a..bb0858decdf8f 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroLogicalTypeSuite.scala
@@ -436,10 +436,10 @@ abstract class AvroLogicalTypeSuite extends QueryTest with SharedSparkSession {
       val ex = intercept[SparkException] {
         spark.read.format("avro").load(s"$dir.avro").collect()
       }
-      assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+      assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
       checkError(
         exception = ex.getCause.asInstanceOf[SparkArithmeticException],
-        errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+        condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
         parameters = Map(
           "value" -> "0",
           "precision" -> "4",
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
index 9b3bb929a700d..c1ab96a63eb26 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroRowReaderSuite.scala
@@ -77,7 +77,8 @@ class AvroRowReaderSuite
           RebaseSpec(CORRECTED),
           new NoopFilters,
           false,
-          "")
+          "",
+          -1)
         override val stopPosition = fileSize
 
         override def hasNext: Boolean = hasNextRow
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
index cbcbc2e7e76a6..3643a95abe19c 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSerdeSuite.scala
@@ -228,7 +228,8 @@ object AvroSerdeSuite {
         RebaseSpec(CORRECTED),
         new NoopFilters,
         false,
-        "")
+        "",
+        -1)
   }
 
   /**
diff --git a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
index ce38ada7c9e40..0df6a7c4bc90e 100644
--- a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
+++ b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.avro
 
 import java.io._
-import java.net.URL
+import java.net.URI
 import java.nio.file.{Files, Paths, StandardCopyOption}
 import java.sql.{Date, Timestamp}
 import java.util.UUID
@@ -648,7 +648,7 @@ abstract class AvroSuite
         assert(message.contains("No Avro files found."))
 
         Files.copy(
-          Paths.get(new URL(episodesAvro).toURI),
+          Paths.get(new URI(episodesAvro)),
           Paths.get(dir.getCanonicalPath, "episodes.avro"))
 
         val result = spark.read.format("avro").load(episodesAvro).collect()
@@ -760,7 +760,7 @@ abstract class AvroSuite
       assert(uncompressSize > deflateSize)
       assert(snappySize > deflateSize)
       assert(snappySize > bzip2Size)
-      assert(bzip2Size > xzSize)
+      assert(xzSize > bzip2Size)
       assert(uncompressSize > zstandardSize)
     }
   }
@@ -891,10 +891,10 @@ abstract class AvroSuite
         val ex = intercept[SparkException] {
           spark.read.schema("a DECIMAL(4, 3)").format("avro").load(path.toString).collect()
         }
-        assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+        assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
         checkError(
           exception = ex.getCause.asInstanceOf[AnalysisException],
-          errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+          condition = "AVRO_INCOMPATIBLE_READ_TYPE",
           parameters = Map("avroPath" -> "field 'a'",
             "sqlPath" -> "field 'a'",
             "avroType" -> "decimal\\(12,10\\)",
@@ -921,6 +921,39 @@ abstract class AvroSuite
     }
   }
 
+  test("SPARK-49082: Widening type promotions in AvroDeserializer") {
+    withTempPath { tempPath =>
+      // Int -> Long
+      val intPath = s"$tempPath/int_data"
+      val intDf = Seq(1, Int.MinValue, Int.MaxValue).toDF("col")
+      intDf.write.format("avro").save(intPath)
+      checkAnswer(
+        spark.read.schema("col Long").format("avro").load(intPath),
+        Seq(Row(1L), Row(-2147483648L), Row(2147483647L))
+      )
+
+      // Int -> Double
+      checkAnswer(
+        spark.read.schema("col Double").format("avro").load(intPath),
+        Seq(Row(1D), Row(-2147483648D), Row(2147483647D))
+      )
+
+      // Float -> Double
+      val floatPath = s"$tempPath/float_data"
+      val floatDf = Seq(1F,
+        Float.MinValue, Float.MinPositiveValue, Float.MaxValue,
+        Float.NaN, Float.NegativeInfinity, Float.PositiveInfinity
+      ).toDF("col")
+      floatDf.write.format("avro").save(floatPath)
+      checkAnswer(
+        spark.read.schema("col Double").format("avro").load(floatPath),
+        Seq(Row(1D),
+          Row(-3.4028234663852886E38D), Row(1.401298464324817E-45D), Row(3.4028234663852886E38D),
+          Row(Double.NaN), Row(Double.NegativeInfinity), Row(Double.PositiveInfinity))
+      )
+    }
+  }
+
   test("SPARK-43380: Fix Avro data type conversion" +
     " of DayTimeIntervalType to avoid producing incorrect results") {
     withTempPath { path =>
@@ -936,10 +969,10 @@ abstract class AvroSuite
           val ex = intercept[SparkException] {
             spark.read.schema(s"a $sqlType").format("avro").load(path.toString).collect()
           }
-          assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+          assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
           checkError(
             exception = ex.getCause.asInstanceOf[AnalysisException],
-            errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+            condition = "AVRO_INCOMPATIBLE_READ_TYPE",
             parameters = Map("avroPath" -> "field 'a'",
               "sqlPath" -> "field 'a'",
               "avroType" -> "interval day to second",
@@ -973,10 +1006,10 @@ abstract class AvroSuite
           val ex = intercept[SparkException] {
             spark.read.schema(s"a $sqlType").format("avro").load(path.toString).collect()
           }
-          assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+          assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
           checkError(
             exception = ex.getCause.asInstanceOf[AnalysisException],
-            errorClass = "AVRO_INCOMPATIBLE_READ_TYPE",
+            condition = "AVRO_INCOMPATIBLE_READ_TYPE",
             parameters = Map("avroPath" -> "field 'a'",
               "sqlPath" -> "field 'a'",
               "avroType" -> "interval year to month",
@@ -1482,7 +1515,7 @@ abstract class AvroSuite
           .write.format("avro").option("avroSchema", avroSchema)
           .save(s"$tempDir/${UUID.randomUUID()}")
       }
-      assert(ex.getErrorClass == "TASK_WRITE_FAILED")
+      assert(ex.getCondition == "TASK_WRITE_FAILED")
       assert(ex.getCause.isInstanceOf[java.lang.NullPointerException])
       assert(ex.getCause.getMessage.contains(
         "null value for (non-nullable) string at test_schema.Name"))
@@ -1640,15 +1673,19 @@ abstract class AvroSuite
           exception = intercept[AnalysisException] {
             sql("select interval 1 days").write.format("avro").mode("overwrite").save(tempDir)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1136",
-          parameters = Map.empty
+          condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+          parameters = Map(
+            "format" -> "Avro",
+            "columnName" -> "`INTERVAL '1 days'`",
+            "columnType" -> "\"INTERVAL\""
+        )
         )
         checkError(
           exception = intercept[AnalysisException] {
             spark.udf.register("testType", () => new IntervalData())
             sql("select testType()").write.format("avro").mode("overwrite").save(tempDir)
           },
-          errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+          condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
           parameters = Map(
             "columnName" -> "`testType()`",
             "columnType" -> "UDT(\"INTERVAL\")",
@@ -2106,7 +2143,7 @@ abstract class AvroSuite
   test("SPARK-24805: do not ignore files without .avro extension by default") {
     withTempDir { dir =>
       Files.copy(
-        Paths.get(new URL(episodesAvro).toURI),
+        Paths.get(new URI(episodesAvro)),
         Paths.get(dir.getCanonicalPath, "episodes"))
 
       val fileWithoutExtension = s"${dir.getCanonicalPath}/episodes"
@@ -2145,7 +2182,7 @@ abstract class AvroSuite
   test("SPARK-24836: ignoreExtension must override hadoop's config") {
     withTempDir { dir =>
       Files.copy(
-        Paths.get(new URL(episodesAvro).toURI),
+        Paths.get(new URI(episodesAvro)),
         Paths.get(dir.getCanonicalPath, "episodes"))
 
       val hadoopConf = spark.sessionState.newHadoopConf()
@@ -2187,7 +2224,8 @@ abstract class AvroSuite
     }
   }
 
-  private def checkSchemaWithRecursiveLoop(avroSchema: String): Unit = {
+  private def checkSchemaWithRecursiveLoop(avroSchema: String, recursiveFieldMaxDepth: Int):
+    Unit = {
     val message = intercept[IncompatibleSchemaException] {
       SchemaConverters.toSqlType(new Schema.Parser().parse(avroSchema), false, "")
     }.getMessage
@@ -2196,7 +2234,79 @@ abstract class AvroSuite
   }
 
   test("Detect recursive loop") {
-    checkSchemaWithRecursiveLoop("""
+    for (recursiveFieldMaxDepth <- Seq(-1, 0)) {
+      checkSchemaWithRecursiveLoop(
+        """
+          |{
+          |  "type": "record",
+          |  "name": "LongList",
+          |  "fields" : [
+          |    {"name": "value", "type": "long"},             // each element has a long
+          |    {"name": "next", "type": ["null", "LongList"]} // optional next element
+          |  ]
+          |}
+    """.stripMargin, recursiveFieldMaxDepth)
+
+      checkSchemaWithRecursiveLoop(
+        """
+          |{
+          |  "type": "record",
+          |  "name": "LongList",
+          |  "fields": [
+          |    {
+          |      "name": "value",
+          |      "type": {
+          |        "type": "record",
+          |        "name": "foo",
+          |        "fields": [
+          |          {
+          |            "name": "parent",
+          |            "type": "LongList"
+          |          }
+          |        ]
+          |      }
+          |    }
+          |  ]
+          |}
+    """.stripMargin, recursiveFieldMaxDepth)
+
+      checkSchemaWithRecursiveLoop(
+        """
+          |{
+          |  "type": "record",
+          |  "name": "LongList",
+          |  "fields" : [
+          |    {"name": "value", "type": "long"},
+          |    {"name": "array", "type": {"type": "array", "items": "LongList"}}
+          |  ]
+          |}
+    """.stripMargin, recursiveFieldMaxDepth)
+
+      checkSchemaWithRecursiveLoop(
+        """
+          |{
+          |  "type": "record",
+          |  "name": "LongList",
+          |  "fields" : [
+          |    {"name": "value", "type": "long"},
+          |    {"name": "map", "type": {"type": "map", "values": "LongList"}}
+          |  ]
+          |}
+    """.stripMargin, recursiveFieldMaxDepth)
+    }
+  }
+
+  private def checkSparkSchemaEquals(
+      avroSchema: String, expectedSchema: StructType, recursiveFieldMaxDepth: Int): Unit = {
+    val sparkSchema =
+      SchemaConverters.toSqlType(
+        new Schema.Parser().parse(avroSchema), false, "", recursiveFieldMaxDepth).dataType
+
+    assert(sparkSchema === expectedSchema)
+  }
+
+  test("Translate recursive schema - union") {
+    val avroSchema = """
       |{
       |  "type": "record",
       |  "name": "LongList",
@@ -2205,9 +2315,57 @@ abstract class AvroSuite
       |    {"name": "next", "type": ["null", "LongList"]} // optional next element
       |  ]
       |}
-    """.stripMargin)
+    """.stripMargin
+    val nonRecursiveFields = new StructType().add("value", LongType, nullable = false)
+    var expectedSchema = nonRecursiveFields
+    for (i <- 1 to 5) {
+      checkSparkSchemaEquals(avroSchema, expectedSchema, i)
+      expectedSchema = nonRecursiveFields.add("next", expectedSchema)
+    }
+  }
 
-    checkSchemaWithRecursiveLoop("""
+  test("Translate recursive schema - union - 2 non-null fields") {
+    val avroSchema = """
+     |{
+     |  "type": "record",
+     |  "name": "TreeNode",
+     |  "fields": [
+     |    {
+     |      "name": "name",
+     |      "type": "string"
+     |    },
+     |    {
+     |      "name": "value",
+     |      "type": [
+     |        "long"
+     |      ]
+     |    },
+     |    {
+     |      "name": "children",
+     |      "type": [
+     |        "null",
+     |        {
+     |          "type": "array",
+     |          "items": "TreeNode"
+     |        }
+     |      ],
+     |      "default": null
+     |    }
+     |  ]
+     |}
+    """.stripMargin
+    val nonRecursiveFields = new StructType().add("name", StringType, nullable = false)
+      .add("value", LongType, nullable = false)
+    var expectedSchema = nonRecursiveFields
+    for (i <- 1 to 5) {
+      checkSparkSchemaEquals(avroSchema, expectedSchema, i)
+      expectedSchema = nonRecursiveFields.add("children",
+        new ArrayType(expectedSchema, false), nullable = true)
+    }
+  }
+
+  test("Translate recursive schema - record") {
+    val avroSchema = """
       |{
       |  "type": "record",
       |  "name": "LongList",
@@ -2227,9 +2385,18 @@ abstract class AvroSuite
       |    }
       |  ]
       |}
-    """.stripMargin)
+    """.stripMargin
+    val nonRecursiveFields = new StructType().add("value", StructType(Seq()), nullable = false)
+    var expectedSchema = nonRecursiveFields
+    for (i <- 1 to 5) {
+      checkSparkSchemaEquals(avroSchema, expectedSchema, i)
+      expectedSchema = new StructType().add("value",
+          new StructType().add("parent", expectedSchema, nullable = false), nullable = false)
+    }
+  }
 
-    checkSchemaWithRecursiveLoop("""
+  test("Translate recursive schema - array") {
+    val avroSchema = """
       |{
       |  "type": "record",
       |  "name": "LongList",
@@ -2238,9 +2405,18 @@ abstract class AvroSuite
       |    {"name": "array", "type": {"type": "array", "items": "LongList"}}
       |  ]
       |}
-    """.stripMargin)
+    """.stripMargin
+    val nonRecursiveFields = new StructType().add("value", LongType, nullable = false)
+    var expectedSchema = nonRecursiveFields
+    for (i <- 1 to 5) {
+      checkSparkSchemaEquals(avroSchema, expectedSchema, i)
+      expectedSchema =
+        nonRecursiveFields.add("array", new ArrayType(expectedSchema, false), nullable = false)
+    }
+  }
 
-    checkSchemaWithRecursiveLoop("""
+  test("Translate recursive schema - map") {
+    val avroSchema = """
       |{
       |  "type": "record",
       |  "name": "LongList",
@@ -2249,7 +2425,70 @@ abstract class AvroSuite
       |    {"name": "map", "type": {"type": "map", "values": "LongList"}}
       |  ]
       |}
-    """.stripMargin)
+    """.stripMargin
+    val nonRecursiveFields = new StructType().add("value", LongType, nullable = false)
+    var expectedSchema = nonRecursiveFields
+    for (i <- 1 to 5) {
+      checkSparkSchemaEquals(avroSchema, expectedSchema, i)
+      expectedSchema =
+        nonRecursiveFields.add("map",
+          new MapType(StringType, expectedSchema, false), nullable = false)
+    }
+  }
+
+  test("recursive schema integration test") {
+    val catalystSchema =
+      StructType(Seq(
+        StructField("Id", IntegerType),
+        StructField("Name", StructType(Seq(
+          StructField("Id", IntegerType),
+          StructField("Name", StructType(Seq(
+            StructField("Id", IntegerType),
+            StructField("Name", NullType)))))))))
+
+    val avroSchema = s"""
+      |{
+      |  "type" : "record",
+      |  "name" : "test_schema",
+      |  "fields" : [
+      |    {"name": "Id", "type": "int"},
+      |    {"name": "Name", "type": ["null", "test_schema"]}
+      |  ]
+      |}
+    """.stripMargin
+
+    val df = spark.createDataFrame(
+      spark.sparkContext.parallelize(Seq(Row(2, Row(3, Row(4, null))), Row(1, null))),
+      catalystSchema)
+
+    withTempPath { tempDir =>
+      df.write.format("avro").save(tempDir.getPath)
+
+      val exc = intercept[AnalysisException] {
+        spark.read
+          .format("avro")
+          .option("avroSchema", avroSchema)
+          .option("recursiveFieldMaxDepth", 16)
+          .load(tempDir.getPath)
+      }
+      assert(exc.getMessage.contains("Should not be greater than 15."))
+
+      checkAnswer(
+        spark.read
+          .format("avro")
+          .option("avroSchema", avroSchema)
+          .option("recursiveFieldMaxDepth", 10)
+          .load(tempDir.getPath),
+        df)
+
+      checkAnswer(
+        spark.read
+          .format("avro")
+          .option("avroSchema", avroSchema)
+          .option("recursiveFieldMaxDepth", 1)
+          .load(tempDir.getPath),
+        df.select("Id"))
+    }
   }
 
   test("log a warning of ignoreExtension deprecation") {
@@ -2394,7 +2633,7 @@ abstract class AvroSuite
             val e = intercept[SparkException] {
               df.write.format("avro").option("avroSchema", avroSchema).save(path3_x)
             }
-            assert(e.getErrorClass == "TASK_WRITE_FAILED")
+            assert(e.getCondition == "TASK_WRITE_FAILED")
             assert(e.getCause.isInstanceOf[SparkUpgradeException])
           }
           checkDefaultLegacyRead(oldPath)
@@ -2649,7 +2888,7 @@ abstract class AvroSuite
           val e = intercept[SparkException] {
             df.write.format("avro").option("avroSchema", avroSchema).save(dir.getCanonicalPath)
           }
-          assert(e.getErrorClass == "TASK_WRITE_FAILED")
+          assert(e.getCondition == "TASK_WRITE_FAILED")
           val errMsg = e.getCause.asInstanceOf[SparkUpgradeException].getMessage
           assert(errMsg.contains("You may get a different result due to the upgrading"))
         }
@@ -2660,7 +2899,7 @@ abstract class AvroSuite
         val e = intercept[SparkException] {
           df.write.format("avro").save(dir.getCanonicalPath)
         }
-        assert(e.getErrorClass == "TASK_WRITE_FAILED")
+        assert(e.getCondition == "TASK_WRITE_FAILED")
         val errMsg = e.getCause.asInstanceOf[SparkUpgradeException].getMessage
         assert(errMsg.contains("You may get a different result due to the upgrading"))
       }
@@ -2693,7 +2932,7 @@ abstract class AvroSuite
                    |LOCATION '${dir}'
                    |AS SELECT ID, IF(ID=1,1,0) FROM v""".stripMargin)
             },
-            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            condition = "INVALID_COLUMN_NAME_AS_PATH",
             parameters = Map(
               "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
           )
@@ -2744,7 +2983,7 @@ abstract class AvroSuite
   }
 
   test("SPARK-40667: validate Avro Options") {
-    assert(AvroOptions.getAllOptions.size == 11)
+    assert(AvroOptions.getAllOptions.size == 12)
     // Please add validation on any new Avro options here
     assert(AvroOptions.isValidOption("ignoreExtension"))
     assert(AvroOptions.isValidOption("mode"))
@@ -2757,6 +2996,7 @@ abstract class AvroSuite
     assert(AvroOptions.isValidOption("datetimeRebaseMode"))
     assert(AvroOptions.isValidOption("enableStableIdentifiersForUnionType"))
     assert(AvroOptions.isValidOption("stableIdentifierPrefixForUnionType"))
+    assert(AvroOptions.isValidOption("recursiveFieldMaxDepth"))
   }
 
   test("SPARK-46633: read file with empty blocks") {
@@ -2798,7 +3038,7 @@ class AvroV1Suite extends AvroSuite {
             sql("SELECT ID, IF(ID=1,1,0) FROM v").write.mode(SaveMode.Overwrite)
               .format("avro").save(dir.getCanonicalPath)
           },
-          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          condition = "INVALID_COLUMN_NAME_AS_PATH",
           parameters = Map(
             "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
         )
@@ -2811,7 +3051,7 @@ class AvroV1Suite extends AvroSuite {
               .write.mode(SaveMode.Overwrite)
               .format("avro").save(dir.getCanonicalPath)
           },
-          errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+          condition = "INVALID_COLUMN_NAME_AS_PATH",
           parameters = Map(
             "datasource" -> "AvroFileFormat", "columnName" -> "`(IF((ID = 1), 1, 0))`")
         )
diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml
index 81ffb140226e8..2fdb2d4bafe01 100644
--- a/connector/connect/client/jvm/pom.xml
+++ b/connector/connect/client/jvm/pom.xml
@@ -45,6 +45,11 @@
       <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sketch_${scala.binary.version}</artifactId>
@@ -88,6 +93,13 @@
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <classifier>tests</classifier>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-common-utils_${scala.binary.version}</artifactId>
@@ -194,6 +206,28 @@
           </transformers>
         </configuration>
       </plugin>
+      <plugin>
+        <!--
+          Here we download Spark Connect cleint dependencies for REPL and copy
+          Spark Connect client to target's jars/connect-repl directory (at assembly/pom.xml).
+          Those jars will only be loaded when we run Spark Connect shell, see also SPARK-49198
+        -->
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-dependency-plugin</artifactId>
+        <executions>
+          <execution>
+            <id>get-ammonite-jar</id>
+            <phase>package</phase>
+            <goals>
+              <goal>copy-dependencies</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>${basedir}/target/connect-repl</outputDirectory>
+              <includeScope>provided</includeScope>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-jar-plugin</artifactId>
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index 04970f67e99f5..3777f82594aae 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -17,117 +17,27 @@
 
 package org.apache.spark.sql
 
-import java.util.Locale
-
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.connect.proto.{NAReplace, Relation}
 import org.apache.spark.connect.proto.Expression.{Literal => GLiteral}
 import org.apache.spark.connect.proto.NAReplace.Replacement
-import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.sql.connect.ConnectConversions._
 
 /**
  * Functionality for working with missing data in `DataFrame`s.
  *
  * @since 3.4.0
  */
-final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root: Relation) {
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing any null or NaN values.
-   *
-   * @since 3.4.0
-   */
-  def drop(): DataFrame = buildDropDataFrame(None, None)
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing null or NaN values.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values. If `how` is "all", then
-   * drop rows only if every column is null or NaN for that row.
-   *
-   * @since 3.4.0
-   */
-  def drop(how: String): DataFrame = {
-    buildDropDataFrame(None, buildMinNonNulls(how))
-  }
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing any null or NaN values in the specified
-   * columns.
-   *
-   * @since 3.4.0
-   */
-  def drop(cols: Array[String]): DataFrame = drop(cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values
-   * in the specified columns.
-   *
-   * @since 3.4.0
-   */
-  def drop(cols: Seq[String]): DataFrame = buildDropDataFrame(Some(cols), None)
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing null or NaN values in the specified
-   * columns.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
-   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
-   *
-   * @since 3.4.0
-   */
-  def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values in
-   * the specified columns.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
-   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
-   *
-   * @since 3.4.0
-   */
-  def drop(how: String, cols: Seq[String]): DataFrame = {
-    buildDropDataFrame(Some(cols), buildMinNonNulls(how))
-  }
+final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root: Relation)
+    extends api.DataFrameNaFunctions {
+  import sparkSession.RichColumn
 
-  /**
-   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
-   * non-NaN values.
-   *
-   * @since 3.4.0
-   */
-  def drop(minNonNulls: Int): DataFrame = {
-    buildDropDataFrame(None, Some(minNonNulls))
-  }
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
-   * non-NaN values in the specified columns.
-   *
-   * @since 3.4.0
-   */
-  def drop(minNonNulls: Int, cols: Array[String]): DataFrame =
-    drop(minNonNulls, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than `minNonNulls`
-   * non-null and non-NaN values in the specified columns.
-   *
-   * @since 3.4.0
-   */
-  def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
-    buildDropDataFrame(Some(cols), Some(minNonNulls))
-  }
+  override protected def drop(minNonNulls: Option[Int]): Dataset[Row] =
+    buildDropDataFrame(None, minNonNulls)
 
-  private def buildMinNonNulls(how: String): Option[Int] = {
-    how.toLowerCase(Locale.ROOT) match {
-      case "any" => None // No-Op. Do nothing.
-      case "all" => Some(1)
-      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
-    }
-  }
+  override protected def drop(minNonNulls: Option[Int], cols: Seq[String]): Dataset[Row] =
+    buildDropDataFrame(Option(cols), minNonNulls)
 
   private def buildDropDataFrame(
       cols: Option[Seq[String]],
@@ -139,110 +49,42 @@ final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root:
     }
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Long): DataFrame = {
     buildFillDataFrame(None, GLiteral.newBuilder().setLong(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
-   * specified column is not a numeric column, it is ignored.
-   *
-   * @since 3.4.0
-   */
-  def fill(value: Long, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
-   * numeric columns. If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Long, cols: Seq[String]): DataFrame = {
     buildFillDataFrame(Some(cols), GLiteral.newBuilder().setLong(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Double): DataFrame = {
     buildFillDataFrame(None, GLiteral.newBuilder().setDouble(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
-   * specified column is not a numeric column, it is ignored.
-   *
-   * @since 3.4.0
-   */
-  def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
-   * numeric columns. If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Double, cols: Seq[String]): DataFrame = {
     buildFillDataFrame(Some(cols), GLiteral.newBuilder().setDouble(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in string columns with `value`.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: String): DataFrame = {
     buildFillDataFrame(None, GLiteral.newBuilder().setString(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in specified string columns. If a
-   * specified column is not a string column, it is ignored.
-   *
-   * @since 3.4.0
-   */
-  def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified string
-   * columns. If a specified column is not a string column, it is ignored.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: String, cols: Seq[String]): DataFrame = {
     buildFillDataFrame(Some(cols), GLiteral.newBuilder().setString(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in boolean columns with `value`.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Boolean): DataFrame = {
     buildFillDataFrame(None, GLiteral.newBuilder().setBoolean(value).build())
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in specified boolean columns. If a
-   * specified column is not a boolean column, it is ignored.
-   *
-   * @since 3.4.0
-   */
-  def fill(value: Boolean, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified boolean
-   * columns. If a specified column is not a boolean column, it is ignored.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def fill(value: Boolean, cols: Seq[String]): DataFrame = {
     buildFillDataFrame(Some(cols), GLiteral.newBuilder().setBoolean(value).build())
   }
@@ -255,43 +97,7 @@ final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root:
     }
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null values.
-   *
-   * The key of the map is the column name, and the value of the map is the replacement value. The
-   * value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`,
-   * `Boolean`. Replacement values are cast to the column data type.
-   *
-   * For example, the following replaces null values in column "A" with string "unknown", and null
-   * values in column "B" with numeric value 1.0.
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def fill(valueMap: java.util.Map[String, Any]): DataFrame = fillMap(valueMap.asScala.toSeq)
-
-  /**
-   * Returns a new `DataFrame` that replaces null values.
-   *
-   * The key of the map is the column name, and the value of the map is the replacement value. The
-   * value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`,
-   * `Boolean`. Replacement values are cast to the column data type.
-   *
-   * For example, the following replaces null values in column "A" with string "unknown", and null
-   * values in column "B" with numeric value 1.0.
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def fill(valueMap: Map[String, Any]): DataFrame = fillMap(valueMap.toSeq)
-
-  private def fillMap(values: Seq[(String, Any)]): DataFrame = {
+  protected def fillMap(values: Seq[(String, Any)]): DataFrame = {
     sparkSession.newDataFrame { builder =>
       val fillNaBuilder = builder.getFillNaBuilder.setInput(root)
       values.map { case (colName, replaceValue) =>
@@ -300,104 +106,13 @@ final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root:
     }
   }
 
-  /**
-   * Replaces values matching keys in `replacement` map with the corresponding values.
-   *
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
-   * }}}
-   *
-   * @param col
-   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
-   *   on all string, numeric or boolean columns.
-   * @param replacement
-   *   value replacement map. Key and value of `replacement` map must have the same type, and can
-   *   only be doubles, strings or booleans. The map value can have nulls.
-   * @since 3.4.0
-   */
-  def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame =
-    replace(col, replacement.asScala.toMap)
-
-  /**
-   * (Scala-specific) Replaces values matching keys in `replacement` map.
-   *
-   * {{{
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.na.replace("height", Map(1.0 -> 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
-   * }}}
-   *
-   * @param col
-   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
-   *   on all string, numeric or boolean columns.
-   * @param replacement
-   *   value replacement map. Key and value of `replacement` map must have the same type, and can
-   *   only be doubles, strings or booleans. The map value can have nulls.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
     val cols = if (col != "*") Some(Seq(col)) else None
     buildReplaceDataFrame(cols, buildReplacement(replacement))
   }
 
-  /**
-   * Replaces values matching keys in `replacement` map with the corresponding values.
-   *
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
-   * }}}
-   *
-   * @param cols
-   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
-   *   all string, numeric or boolean columns.
-   * @param replacement
-   *   value replacement map. Key and value of `replacement` map must have the same type, and can
-   *   only be doubles, strings or booleans. The map value can have nulls.
-   * @since 3.4.0
-   */
-  def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
-    replace(cols.toImmutableArraySeq, replacement.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Replaces values matching keys in `replacement` map.
-   *
-   * {{{
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
-   * }}}
-   *
-   * @param cols
-   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
-   *   all string, numeric or boolean columns.
-   * @param replacement
-   *   value replacement map. Key and value of `replacement` map must have the same type, and can
-   *   only be doubles, strings or booleans. The map value can have nulls.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = {
     buildReplaceDataFrame(Some(cols), buildReplacement(replacement))
   }
@@ -440,4 +155,59 @@ final class DataFrameNaFunctions private[sql] (sparkSession: SparkSession, root:
     case v =>
       throw new IllegalArgumentException(s"Unsupported value type ${v.getClass.getName} ($v).")
   }
+
+  /** @inheritdoc */
+  override def drop(): DataFrame = super.drop()
+
+  /** @inheritdoc */
+  override def drop(cols: Array[String]): DataFrame = super.drop(cols)
+
+  /** @inheritdoc */
+  override def drop(cols: Seq[String]): DataFrame = super.drop(cols)
+
+  /** @inheritdoc */
+  override def drop(how: String, cols: Array[String]): DataFrame = super.drop(how, cols)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int, cols: Array[String]): DataFrame =
+    super.drop(minNonNulls, cols)
+
+  /** @inheritdoc */
+  override def drop(how: String): DataFrame = super.drop(how)
+
+  /** @inheritdoc */
+  override def drop(how: String, cols: Seq[String]): DataFrame = super.drop(how, cols)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int): DataFrame = super.drop(minNonNulls)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int, cols: Seq[String]): DataFrame =
+    super.drop(minNonNulls, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Long, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Double, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: String, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Boolean, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(valueMap: java.util.Map[String, Any]): DataFrame = super.fill(valueMap)
+
+  /** @inheritdoc */
+  override def fill(valueMap: Map[String, Any]): DataFrame = super.fill(valueMap)
+
+  /** @inheritdoc */
+  override def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame =
+    super.replace[T](col, replacement)
+
+  /** @inheritdoc */
+  override def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame =
+    super.replace(cols, replacement)
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 1ad98dc91b216..1fbc887901ecc 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -22,12 +22,12 @@ import java.util.Properties
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.connect.proto.Parse.ParseFormat
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, SparkCharVarcharUtils}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.connect.ConnectClientUnsupportedErrors
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.common.DataTypeProtoConverter
-import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -37,144 +37,44 @@ import org.apache.spark.sql.types.StructType
  * @since 3.4.0
  */
 @Stable
-class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging {
-
-  /**
-   * Specifies the input data source format.
-   *
-   * @since 3.4.0
-   */
-  def format(source: String): DataFrameReader = {
-    this.source = source
-    this
-  }
+class DataFrameReader private[sql] (sparkSession: SparkSession) extends api.DataFrameReader {
+  type DS[U] = Dataset[U]
 
-  /**
-   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
-   * automatically from data. By specifying the schema here, the underlying data source can skip
-   * the schema inference step, and thus speed up data loading.
-   *
-   * @since 3.4.0
-   */
-  def schema(schema: StructType): DataFrameReader = {
-    if (schema != null) {
-      val replaced = SparkCharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
-      this.userSpecifiedSchema = Option(replaced)
-    }
-    this
-  }
+  /** @inheritdoc */
+  override def format(source: String): this.type = super.format(source)
 
-  /**
-   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON)
-   * can infer the input schema automatically from data. By specifying the schema here, the
-   * underlying data source can skip the schema inference step, and thus speed up data loading.
-   *
-   * {{{
-   *   spark.read.schema("a INT, b STRING, c DOUBLE").csv("test.csv")
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def schema(schemaString: String): DataFrameReader = {
-    schema(StructType.fromDDL(schemaString))
-  }
+  /** @inheritdoc */
+  override def schema(schema: StructType): this.type = super.schema(schema)
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def option(key: String, value: String): DataFrameReader = {
-    this.extraOptions = this.extraOptions + (key -> value)
-    this
-  }
+  /** @inheritdoc */
+  override def schema(schemaString: String): this.type = super.schema(schemaString)
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def option(key: String, value: Boolean): DataFrameReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def option(key: String, value: Long): DataFrameReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def option(key: String, value: Double): DataFrameReader = option(key, value.toString)
-
-  /**
-   * (Scala-specific) Adds input options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataFrameReader = {
-    this.extraOptions ++= options
-    this
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = super.option(key, value)
 
-  /**
-   * Adds input options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names. If a new option
-   * has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 3.4.0
-   */
-  def options(options: java.util.Map[String, String]): DataFrameReader = {
-    this.options(options.asScala)
-    this
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external
-   * key-value stores).
-   *
-   * @since 3.4.0
-   */
-  def load(): DataFrame = {
-    load(Seq.empty: _*) // force invocation of `load(...varargs...)`
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by a
-   * local or distributed file system).
-   *
-   * @since 3.4.0
-   */
-  def load(path: String): DataFrame = {
-    // force invocation of `load(...varargs...)`
-    load(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type =
+    super.options(options)
+
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type = super.options(options)
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that support multiple paths. Only works if
-   * the source is a HadoopFsRelationProvider.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
+  override def load(): DataFrame = load(Nil: _*)
+
+  /** @inheritdoc */
+  def load(path: String): DataFrame = load(Seq(path): _*)
+
+  /** @inheritdoc */
   @scala.annotation.varargs
   def load(paths: String*): DataFrame = {
     sparkSession.newDataFrame { builder =>
@@ -190,93 +90,29 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
     }
   }
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
-   * table and connection properties.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
-   * in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.4.0
-   */
-  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
-    // properties should override settings in extraOptions.
-    this.extraOptions ++= properties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
-    format("jdbc").load()
-  }
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String, properties: Properties): DataFrame =
+    super.jdbc(url, table, properties)
 
-  // scalastyle:off line.size.limit
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
-   * table. Partitions of the table will be retrieved in parallel based on the parameters passed
-   * to this function.
-   *
-   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
-   * your external database systems.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
-   * in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @param table
-   *   Name of the table in the external database.
-   * @param columnName
-   *   Alias of `partitionColumn` option. Refer to `partitionColumn` in <a
-   *   href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   * @param connectionProperties
-   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
-   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
-   *   number of rows per fetch and "queryTimeout" can be used to wait for a Statement object to
-   *   execute to the given number of seconds.
-   * @since 3.4.0
-   */
-  // scalastyle:on line.size.limit
-  def jdbc(
+  /** @inheritdoc */
+  override def jdbc(
       url: String,
       table: String,
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
       numPartitions: Int,
-      connectionProperties: Properties): DataFrame = {
-    // columnName, lowerBound, upperBound and numPartitions override settings in extraOptions.
-    this.extraOptions ++= Map(
-      "partitionColumn" -> columnName,
-      "lowerBound" -> lowerBound.toString,
-      "upperBound" -> upperBound.toString,
-      "numPartitions" -> numPartitions.toString)
-    jdbc(url, table, connectionProperties)
-  }
-
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
-   * table using connection properties. The `predicates` parameter gives a list expressions
-   * suitable for inclusion in WHERE clauses; each one defines one partition of the `DataFrame`.
-   *
-   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
-   * your external database systems.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
-   * in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @param table
-   *   Name of the table in the external database.
-   * @param predicates
-   *   Condition in the where clause for each partition.
-   * @param connectionProperties
-   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
-   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
-   *   number of rows per fetch.
-   * @since 3.4.0
-   */
+      connectionProperties: Properties): DataFrame =
+    super.jdbc(
+      url,
+      table,
+      columnName,
+      lowerBound,
+      upperBound,
+      numPartitions,
+      connectionProperties)
+
+  /** @inheritdoc */
   def jdbc(
       url: String,
       table: String,
@@ -296,207 +132,64 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
     }
   }
 
-  /**
-   * Loads a JSON file and returns the results as a `DataFrame`.
-   *
-   * See the documentation on the overloaded `json()` method with varargs for more details.
-   *
-   * @since 3.4.0
-   */
-  def json(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    json(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def json(path: String): DataFrame = super.json(path)
 
-  /**
-   * Loads JSON files and returns the results as a `DataFrame`.
-   *
-   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `multiLine` option to true.
-   *
-   * This function goes through the input once to determine the input schema. If you know the
-   * schema in advance, use the version that specifies the schema to avoid the extra scan.
-   *
-   * You can find the JSON-specific options for reading JSON files in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def json(paths: String*): DataFrame = {
-    format("json").load(paths: _*)
-  }
+  override def json(paths: String*): DataFrame = super.json(paths: _*)
 
-  /**
-   * Loads a `Dataset[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
-   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
-   *
-   * Unless the schema is specified using `schema` function, this function goes through the input
-   * once to determine the input schema.
-   *
-   * @param jsonDataset
-   *   input Dataset with one JSON object per record
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def json(jsonDataset: Dataset[String]): DataFrame =
     parse(jsonDataset, ParseFormat.PARSE_FORMAT_JSON)
 
-  /**
-   * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the other
-   * overloaded `csv()` method for more details.
-   *
-   * @since 3.4.0
-   */
-  def csv(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    csv(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def json(jsonRDD: JavaRDD[String]): Dataset[Row] =
+    throw ConnectClientUnsupportedErrors.rdd()
 
-  /**
-   * Loads CSV files and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can find the CSV-specific options for reading CSV files in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
+  override def json(jsonRDD: RDD[String]): Dataset[Row] =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def csv(path: String): DataFrame = super.csv(path)
+
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def csv(paths: String*): DataFrame = format("csv").load(paths: _*)
-
-  /**
-   * Loads an `Dataset[String]` storing CSV rows and returns the result as a `DataFrame`.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
-   * this function goes through the input once to determine the input schema.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is disabled,
-   * it determines the columns as string types and it reads only the first line to determine the
-   * names and the number of fields.
-   *
-   * If the enforceSchema is set to `false`, only the CSV header in the first line is checked to
-   * conform specified or inferred schema.
-   *
-   * @note
-   *   if `header` option is set to `true` when calling this API, all lines same with the header
-   *   will be removed if exists.
-   * @param csvDataset
-   *   input Dataset with one CSV row per record
-   * @since 3.4.0
-   */
+  override def csv(paths: String*): DataFrame = super.csv(paths: _*)
+
+  /** @inheritdoc */
   def csv(csvDataset: Dataset[String]): DataFrame =
     parse(csvDataset, ParseFormat.PARSE_FORMAT_CSV)
 
-  /**
-   * Loads a XML file and returns the result as a `DataFrame`. See the documentation on the other
-   * overloaded `xml()` method for more details.
-   *
-   * @since 4.0.0
-   */
-  def xml(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    xml(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def xml(path: String): DataFrame = super.xml(path)
 
-  /**
-   * Loads XML files and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can find the XML-specific options for reading XML files in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def xml(paths: String*): DataFrame = format("xml").load(paths: _*)
-
-  /**
-   * Loads an `Dataset[String]` storing XML object and returns the result as a `DataFrame`.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
-   * this function goes through the input once to determine the input schema.
-   *
-   * @param xmlDataset
-   *   input Dataset with one XML object per record
-   * @since 4.0.0
-   */
+  override def xml(paths: String*): DataFrame = super.xml(paths: _*)
+
+  /** @inheritdoc */
   def xml(xmlDataset: Dataset[String]): DataFrame =
     parse(xmlDataset, ParseFormat.PARSE_FORMAT_UNSPECIFIED)
 
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation on the
-   * other overloaded `parquet()` method for more details.
-   *
-   * @since 3.4.0
-   */
-  def parquet(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    parquet(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def parquet(path: String): DataFrame = super.parquet(path)
 
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`.
-   *
-   * Parquet-specific option(s) for reading Parquet files can be found in <a href=
-   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
-   * Source Option</a> in the version you use.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def parquet(paths: String*): DataFrame = {
-    format("parquet").load(paths: _*)
-  }
+  override def parquet(paths: String*): DataFrame = super.parquet(paths: _*)
 
-  /**
-   * Loads an ORC file and returns the result as a `DataFrame`.
-   *
-   * @param path
-   *   input path
-   * @since 3.4.0
-   */
-  def orc(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    orc(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def orc(path: String): DataFrame = super.orc(path)
 
-  /**
-   * Loads ORC files and returns the result as a `DataFrame`.
-   *
-   * ORC-specific option(s) for reading ORC files can be found in <a href=
-   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
-   * Source Option</a> in the version you use.
-   *
-   * @param paths
-   *   input paths
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
-
-  /**
-   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
-   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
-   * view, the returned DataFrame is simply the query plan of the view, which can either be a
-   * batch or streaming query plan.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table or view. If a database is
-   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
-   *   find a temporary view with the given name and then match the table/view from the current
-   *   database. Note that, the global temporary view database is also valid here.
-   * @since 3.4.0
-   */
+  override def orc(paths: String*): DataFrame = super.orc(paths: _*)
+
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
+    assertNoSpecifiedSchema("table")
     sparkSession.newDataFrame { builder =>
       builder.getReadBuilder.getNamedTableBuilder
         .setUnparsedIdentifier(tableName)
@@ -504,80 +197,19 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
     }
   }
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any. See the documentation on the
-   * other overloaded `text()` method for more details.
-   *
-   * @since 3.4.0
-   */
-  def text(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    text(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def text(path: String): DataFrame = super.text(path)
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any. The text files must be encoded
-   * as UTF-8.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.read.text("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.read().text("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can find the text-specific options for reading text files in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @param paths
-   *   input paths
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def text(paths: String*): DataFrame = format("text").load(paths: _*)
-
-  /**
-   * Loads text files and returns a [[Dataset]] of String. See the documentation on the other
-   * overloaded `textFile()` method for more details.
-   * @since 3.4.0
-   */
-  def textFile(path: String): Dataset[String] = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    textFile(Seq(path): _*)
-  }
+  override def text(paths: String*): DataFrame = super.text(paths: _*)
 
-  /**
-   * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset
-   * contains a single string column named "value". The text files must be encoded as UTF-8.
-   *
-   * If the directory structure of the text files contains partitioning information, those are
-   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.read.textFile("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.read().textFile("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can set the text-specific options as specified in `DataFrameReader.text`.
-   *
-   * @param paths
-   *   input path
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
+  override def textFile(path: String): Dataset[String] = super.textFile(path)
+
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def textFile(paths: String*): Dataset[String] = {
-    assertNoSpecifiedSchema("textFile")
-    text(paths: _*).select("value").as(StringEncoder)
-  }
+  override def textFile(paths: String*): Dataset[String] = super.textFile(paths: _*)
 
   private def assertSourceFormatSpecified(): Unit = {
     if (source == null) {
@@ -597,24 +229,4 @@ class DataFrameReader private[sql] (sparkSession: SparkSession) extends Logging
       }
     }
   }
-
-  /**
-   * A convenient function for schema validation in APIs.
-   */
-  private def assertNoSpecifiedSchema(operation: String): Unit = {
-    if (userSpecifiedSchema.nonEmpty) {
-      throw DataTypeErrors.userSpecifiedSchemaUnsupportedError(operation)
-    }
-  }
-
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // Builder pattern config options
-  ///////////////////////////////////////////////////////////////////////////////////////
-
-  private var source: String = _
-
-  private var userSpecifiedSchema: Option[StructType] = None
-
-  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
-
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 4eef26da706f0..bb7cfa75a9ab9 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -18,88 +18,24 @@
 package org.apache.spark.sql
 
 import java.{lang => jl, util => ju}
-import java.io.ByteArrayInputStream
-
-import scala.jdk.CollectionConverters._
 
 import org.apache.spark.connect.proto.{Relation, StatSampleBy}
 import org.apache.spark.sql.DataFrameStatFunctions.approxQuantileResultEncoder
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BinaryEncoder, PrimitiveDoubleEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, PrimitiveDoubleEncoder}
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
 
 /**
  * Statistic functions for `DataFrame`s.
  *
  * @since 3.4.0
  */
-final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, root: Relation) {
-
-  /**
-   * Calculates the approximate quantiles of a numerical column of a DataFrame.
-   *
-   * The result of this algorithm has the following deterministic bound: If the DataFrame has N
-   * elements and if we request the quantile at probability `p` up to error `err`, then the
-   * algorithm will return a sample `x` from the DataFrame so that the *exact* rank of `x` is
-   * close to (p * N). More precisely,
-   *
-   * {{{
-   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N)
-   * }}}
-   *
-   * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
-   * optimizations). The algorithm was first present in <a
-   * href="https://doi.org/10.1145/375663.375670"> Space-efficient Online Computation of Quantile
-   * Summaries</a> by Greenwald and Khanna.
-   *
-   * @param col
-   *   the name of the numerical column
-   * @param probabilities
-   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
-   *   minimum, 0.5 is the median, 1 is the maximum.
-   * @param relativeError
-   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
-   *   exact quantiles are computed, which could be very expensive. Note that values greater than
-   *   1 are accepted but give the same result as 1.
-   * @return
-   *   the approximate quantiles at the given probabilities
-   *
-   * @note
-   *   null and NaN values will be removed from the numerical column before calculation. If the
-   *   dataframe is empty or the column only contains null or NaN, an empty array is returned.
-   *
-   * @since 3.4.0
-   */
-  def approxQuantile(
-      col: String,
-      probabilities: Array[Double],
-      relativeError: Double): Array[Double] = {
-    approxQuantile(Array(col), probabilities, relativeError).head
-  }
+final class DataFrameStatFunctions private[sql] (protected val df: DataFrame)
+    extends api.DataFrameStatFunctions {
+  private def root: Relation = df.plan.getRoot
+  private val sparkSession: SparkSession = df.sparkSession
 
-  /**
-   * Calculates the approximate quantiles of numerical columns of a DataFrame.
-   * @see
-   *   `approxQuantile(col:Str* approxQuantile)` for detailed description.
-   *
-   * @param cols
-   *   the names of the numerical columns
-   * @param probabilities
-   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
-   *   minimum, 0.5 is the median, 1 is the maximum.
-   * @param relativeError
-   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
-   *   exact quantiles are computed, which could be very expensive. Note that values greater than
-   *   1 are accepted but give the same result as 1.
-   * @return
-   *   the approximate quantiles at the given probabilities of each column
-   *
-   * @note
-   *   null and NaN values will be ignored in numerical columns before calculation. For columns
-   *   only containing null or NaN values, an empty array is returned.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def approxQuantile(
       cols: Array[String],
       probabilities: Array[Double],
@@ -119,24 +55,7 @@ final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, roo
       .head()
   }
 
-  /**
-   * Calculate the sample covariance of two numerical columns of a DataFrame.
-   * @param col1
-   *   the name of the first column
-   * @param col2
-   *   the name of the second column
-   * @return
-   *   the covariance of the two columns.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.cov("rand1", "rand2")
-   *    res1: Double = 0.065...
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def cov(col1: String, col2: String): Double = {
     sparkSession
       .newDataset(PrimitiveDoubleEncoder) { builder =>
@@ -145,27 +64,7 @@ final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, roo
       .head()
   }
 
-  /**
-   * Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
-   * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
-   * MLlib's Statistics.
-   *
-   * @param col1
-   *   the name of the column
-   * @param col2
-   *   the name of the column to calculate the correlation against
-   * @return
-   *   The Pearson Correlation Coefficient as a Double.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.corr("rand1", "rand2")
-   *    res1: Double = 0.613...
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def corr(col1: String, col2: String, method: String): Double = {
     require(
       method == "pearson",
@@ -178,289 +77,48 @@ final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, roo
       .head()
   }
 
-  /**
-   * Calculates the Pearson Correlation Coefficient of two columns of a DataFrame.
-   *
-   * @param col1
-   *   the name of the column
-   * @param col2
-   *   the name of the column to calculate the correlation against
-   * @return
-   *   The Pearson Correlation Coefficient as a Double.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.corr("rand1", "rand2", "pearson")
-   *    res1: Double = 0.613...
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def corr(col1: String, col2: String): Double = {
-    corr(col1, col2, "pearson")
-  }
-
-  /**
-   * Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
-   * The first column of each row will be the distinct values of `col1` and the column names will
-   * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts
-   * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
-   * Null elements will be replaced by "null", and back ticks will be dropped from elements if
-   * they exist.
-   *
-   * @param col1
-   *   The name of the first column. Distinct items will make the first item of each row.
-   * @param col2
-   *   The name of the second column. Distinct items will make the column names of the DataFrame.
-   * @return
-   *   A DataFrame containing for the contingency table.
-   *
-   * {{{
-   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)))
-   *      .toDF("key", "value")
-   *    val ct = df.stat.crosstab("key", "value")
-   *    ct.show()
-   *    +---------+---+---+---+
-   *    |key_value|  1|  2|  3|
-   *    +---------+---+---+---+
-   *    |        2|  2|  0|  1|
-   *    |        1|  1|  1|  0|
-   *    |        3|  0|  1|  1|
-   *    +---------+---+---+---+
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def crosstab(col1: String, col2: String): DataFrame = {
     sparkSession.newDataFrame { builder =>
       builder.getCrosstabBuilder.setInput(root).setCol1(col1).setCol2(col2)
     }
   }
 
-  /**
-   * Finding frequent items for columns, possibly with false positives. Using the frequent element
-   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
-   * proposed by Karp, Schenker, and Papadimitriou. The `support` should be greater than 1e-4.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols
-   *   the names of the columns to search frequent items in.
-   * @param support
-   *   The minimum frequency for an item to be considered `frequent`. Should be greater than 1e-4.
-   * @return
-   *   A Local DataFrame with the Array of frequent items for each column.
-   *
-   * {{{
-   *    val rows = Seq.tabulate(100) { i =>
-   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
-   *    }
-   *    val df = spark.createDataFrame(rows).toDF("a", "b")
-   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
-   *    // "a" and "b"
-   *    val freqSingles = df.stat.freqItems(Array("a", "b"), 0.4)
-   *    freqSingles.show()
-   *    +-----------+-------------+
-   *    |a_freqItems|  b_freqItems|
-   *    +-----------+-------------+
-   *    |    [1, 99]|[-1.0, -99.0]|
-   *    +-----------+-------------+
-   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
-   *    val pairDf = df.select(struct("a", "b").as("a-b"))
-   *    val freqPairs = pairDf.stat.freqItems(Array("a-b"), 0.1)
-   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
-   *    +----------+
-   *    |   freq_ab|
-   *    +----------+
-   *    |  [1,-1.0]|
-   *    |   ...    |
-   *    +----------+
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def freqItems(cols: Array[String], support: Double): DataFrame = {
-    sparkSession.newDataFrame { builder =>
-      val freqItemsBuilder = builder.getFreqItemsBuilder.setInput(root).setSupport(support)
-      cols.foreach(freqItemsBuilder.addCols)
-    }
-  }
+  /** @inheritdoc */
+  override def freqItems(cols: Array[String], support: Double): DataFrame =
+    super.freqItems(cols, support)
 
-  /**
-   * Finding frequent items for columns, possibly with false positives. Using the frequent element
-   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
-   * proposed by Karp, Schenker, and Papadimitriou. Uses a `default` support of 1%.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols
-   *   the names of the columns to search frequent items in.
-   * @return
-   *   A Local DataFrame with the Array of frequent items for each column.
-   *
-   * @since 3.4.0
-   */
-  def freqItems(cols: Array[String]): DataFrame = {
-    freqItems(cols, 0.01)
-  }
+  /** @inheritdoc */
+  override def freqItems(cols: Array[String]): DataFrame = super.freqItems(cols)
+
+  /** @inheritdoc */
+  override def freqItems(cols: Seq[String]): DataFrame = super.freqItems(cols)
 
-  /**
-   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in <a
-   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
-   * Papadimitriou.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols
-   *   the names of the columns to search frequent items in.
-   * @return
-   *   A Local DataFrame with the Array of frequent items for each column.
-   *
-   * {{{
-   *    val rows = Seq.tabulate(100) { i =>
-   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
-   *    }
-   *    val df = spark.createDataFrame(rows).toDF("a", "b")
-   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
-   *    // "a" and "b"
-   *    val freqSingles = df.stat.freqItems(Seq("a", "b"), 0.4)
-   *    freqSingles.show()
-   *    +-----------+-------------+
-   *    |a_freqItems|  b_freqItems|
-   *    +-----------+-------------+
-   *    |    [1, 99]|[-1.0, -99.0]|
-   *    +-----------+-------------+
-   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
-   *    val pairDf = df.select(struct("a", "b").as("a-b"))
-   *    val freqPairs = pairDf.stat.freqItems(Seq("a-b"), 0.1)
-   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
-   *    +----------+
-   *    |   freq_ab|
-   *    +----------+
-   *    |  [1,-1.0]|
-   *    |   ...    |
-   *    +----------+
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def freqItems(cols: Seq[String], support: Double): DataFrame = {
-    freqItems(cols.toArray, support)
+    df.sparkSession.newDataFrame { builder =>
+      val freqItemsBuilder = builder.getFreqItemsBuilder
+        .setInput(df.plan.getRoot)
+        .setSupport(support)
+      cols.foreach(freqItemsBuilder.addCols)
+    }
   }
 
-  /**
-   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in <a
-   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
-   * Papadimitriou. Uses a `default` support of 1%.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols
-   *   the names of the columns to search frequent items in.
-   * @return
-   *   A Local DataFrame with the Array of frequent items for each column.
-   *
-   * @since 3.4.0
-   */
-  def freqItems(cols: Seq[String]): DataFrame = {
-    freqItems(cols.toArray, 0.01)
-  }
+  /** @inheritdoc */
+  override def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): DataFrame =
+    super.sampleBy(col, fractions, seed)
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col
-   *   column that defines strata
-   * @param fractions
-   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
-   *   zero.
-   * @param seed
-   *   random seed
-   * @tparam T
-   *   stratum type
-   * @return
-   *   a new `DataFrame` that represents the stratified sample
-   *
-   * {{{
-   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2),
-   *      (3, 3))).toDF("key", "value")
-   *    val fractions = Map(1 -> 1.0, 3 -> 0.5)
-   *    df.stat.sampleBy("key", fractions, 36L).show()
-   *    +---+-----+
-   *    |key|value|
-   *    +---+-----+
-   *    |  1|    1|
-   *    |  1|    2|
-   *    |  3|    2|
-   *    +---+-----+
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): DataFrame = {
-    sampleBy(Column(col), fractions, seed)
-  }
+  /** @inheritdoc */
+  override def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame =
+    super.sampleBy(col, fractions, seed)
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col
-   *   column that defines strata
-   * @param fractions
-   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
-   *   zero.
-   * @param seed
-   *   random seed
-   * @tparam T
-   *   stratum type
-   * @return
-   *   a new `DataFrame` that represents the stratified sample
-   *
-   * @since 3.4.0
-   */
-  def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
-    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
-  }
+  /** @inheritdoc */
+  override def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame =
+    super.sampleBy(col, fractions, seed)
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col
-   *   column that defines strata
-   * @param fractions
-   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
-   *   zero.
-   * @param seed
-   *   random seed
-   * @tparam T
-   *   stratum type
-   * @return
-   *   a new `DataFrame` that represents the stratified sample
-   *
-   * The stratified sample can be performed over multiple columns:
-   * {{{
-   *    import org.apache.spark.sql.Row
-   *    import org.apache.spark.sql.functions.struct
-   *
-   *    val df = spark.createDataFrame(Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 17),
-   *      ("Alice", 10))).toDF("name", "age")
-   *    val fractions = Map(Row("Alice", 10) -> 0.3, Row("Nico", 8) -> 1.0)
-   *    df.stat.sampleBy(struct($"name", $"age"), fractions, 36L).show()
-   *    +-----+---+
-   *    | name|age|
-   *    +-----+---+
-   *    | Nico|  8|
-   *    |Alice| 10|
-   *    +-----+---+
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def sampleBy[T](col: Column, fractions: Map[T, Double], seed: Long): DataFrame = {
+    import sparkSession.RichColumn
     require(
       fractions.values.forall(p => p >= 0.0 && p <= 1.0),
       s"Fractions must be in [0, 1], but got $fractions.")
@@ -478,180 +136,6 @@ final class DataFrameStatFunctions private[sql] (sparkSession: SparkSession, roo
       }
     }
   }
-
-  /**
-   * (Java-specific) Returns a stratified sample without replacement based on the fraction given
-   * on each stratum.
-   * @param col
-   *   column that defines strata
-   * @param fractions
-   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
-   *   zero.
-   * @param seed
-   *   random seed
-   * @tparam T
-   *   stratum type
-   * @return
-   *   a new `DataFrame` that represents the stratified sample
-   *
-   * @since 3.4.0
-   */
-  def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
-    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param colName
-   *   name of the column over which the sketch is built
-   * @param depth
-   *   depth of the sketch
-   * @param width
-   *   width of the sketch
-   * @param seed
-   *   random seed
-   * @return
-   *   a `CountMinSketch` over column `colName`
-   * @since 3.4.0
-   */
-  def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = {
-    countMinSketch(Column(colName), depth, width, seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param colName
-   *   name of the column over which the sketch is built
-   * @param eps
-   *   relative error of the sketch
-   * @param confidence
-   *   confidence of the sketch
-   * @param seed
-   *   random seed
-   * @return
-   *   a `CountMinSketch` over column `colName`
-   * @since 3.4.0
-   */
-  def countMinSketch(
-      colName: String,
-      eps: Double,
-      confidence: Double,
-      seed: Int): CountMinSketch = {
-    countMinSketch(Column(colName), eps, confidence, seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param col
-   *   the column over which the sketch is built
-   * @param depth
-   *   depth of the sketch
-   * @param width
-   *   width of the sketch
-   * @param seed
-   *   random seed
-   * @return
-   *   a `CountMinSketch` over column `colName`
-   * @since 3.4.0
-   */
-  def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = {
-    countMinSketch(col, eps = 2.0 / width, confidence = 1 - 1 / Math.pow(2, depth), seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param col
-   *   the column over which the sketch is built
-   * @param eps
-   *   relative error of the sketch
-   * @param confidence
-   *   confidence of the sketch
-   * @param seed
-   *   random seed
-   * @return
-   *   a `CountMinSketch` over column `colName`
-   * @since 3.4.0
-   */
-  def countMinSketch(col: Column, eps: Double, confidence: Double, seed: Int): CountMinSketch = {
-    val agg = Column.fn("count_min_sketch", col, lit(eps), lit(confidence), lit(seed))
-    val ds = sparkSession.newDataset(BinaryEncoder) { builder =>
-      builder.getProjectBuilder
-        .setInput(root)
-        .addExpressions(agg.expr)
-    }
-    CountMinSketch.readFrom(ds.head())
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param colName
-   *   name of the column over which the filter is built
-   * @param expectedNumItems
-   *   expected number of items which will be put into the filter.
-   * @param fpp
-   *   expected false positive probability of the filter.
-   * @since 3.5.0
-   */
-  def bloomFilter(colName: String, expectedNumItems: Long, fpp: Double): BloomFilter = {
-    bloomFilter(Column(colName), expectedNumItems, fpp)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param col
-   *   the column over which the filter is built
-   * @param expectedNumItems
-   *   expected number of items which will be put into the filter.
-   * @param fpp
-   *   expected false positive probability of the filter.
-   * @since 3.5.0
-   */
-  def bloomFilter(col: Column, expectedNumItems: Long, fpp: Double): BloomFilter = {
-    val numBits = BloomFilter.optimalNumOfBits(expectedNumItems, fpp)
-    bloomFilter(col, expectedNumItems, numBits)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param colName
-   *   name of the column over which the filter is built
-   * @param expectedNumItems
-   *   expected number of items which will be put into the filter.
-   * @param numBits
-   *   expected number of bits of the filter.
-   * @since 3.5.0
-   */
-  def bloomFilter(colName: String, expectedNumItems: Long, numBits: Long): BloomFilter = {
-    bloomFilter(Column(colName), expectedNumItems, numBits)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param col
-   *   the column over which the filter is built
-   * @param expectedNumItems
-   *   expected number of items which will be put into the filter.
-   * @param numBits
-   *   expected number of bits of the filter.
-   * @since 3.5.0
-   */
-  def bloomFilter(col: Column, expectedNumItems: Long, numBits: Long): BloomFilter = {
-    val agg = Column.fn("bloom_filter_agg", col, lit(expectedNumItems), lit(numBits))
-    val ds = sparkSession.newDataset(BinaryEncoder) { builder =>
-      builder.getProjectBuilder
-        .setInput(root)
-        .addExpressions(agg.expr)
-    }
-    BloomFilter.readFrom(new ByteArrayInputStream(ds.head()))
-  }
 }
 
 private object DataFrameStatFunctions {
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
index d6cea3723f956..631e9057f8d15 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql
 
-import java.util.{Collections, Locale}
+import java.util
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
@@ -26,17 +26,23 @@ import scala.util.control.NonFatal
 
 import org.apache.spark.SparkException
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.api.java.function._
 import org.apache.spark.connect.proto
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
 import org.apache.spark.sql.catalyst.expressions.OrderUtils
+import org.apache.spark.sql.connect.ConnectClientUnsupportedErrors
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.client.SparkResult
-import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, StorageLevelProtoConverter, UdfUtils}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, StorageLevelProtoConverter}
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
-import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.expressions.SparkUserDefinedFunction
 import org.apache.spark.sql.functions.{struct, to_json}
+import org.apache.spark.sql.internal.{ColumnNodeToProtoConverter, DataFrameWriterImpl, DataFrameWriterV2Impl, MergeIntoWriterImpl, ToScalaUDF, UDFAdaptors, UnresolvedAttribute, UnresolvedRegex}
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types.{Metadata, StructType}
 import org.apache.spark.storage.StorageLevel
@@ -133,11 +139,15 @@ class Dataset[T] private[sql] (
     val sparkSession: SparkSession,
     @DeveloperApi val plan: proto.Plan,
     val encoder: Encoder[T])
-    extends Serializable {
+    extends api.Dataset[T] {
+  type DS[U] = Dataset[U]
+
+  import sparkSession.RichColumn
+
   // Make sure we don't forget to set plan id.
   assert(plan.getRoot.getCommon.hasPlanId)
 
-  private[sql] val agnosticEncoder: AgnosticEncoder[T] = encoderFor(encoder)
+  private[sql] val agnosticEncoder: AgnosticEncoder[T] = agnosticEncoderFor(encoder)
 
   override def toString: String = {
     try {
@@ -161,35 +171,10 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Converts this strongly typed collection of data to generic Dataframe. In contrast to the
-   * strongly typed objects that Dataset operations work on, a Dataframe returns generic [[Row]]
-   * objects that allow fields to be accessed by ordinal or name.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def toDF(): DataFrame = new Dataset(sparkSession, plan, UnboundRowEncoder)
 
-  /**
-   * Returns a new Dataset where each record has been mapped on to the specified type. The method
-   * used to map columns depend on the type of `U`: <ul> <li>When `U` is a class, fields for the
-   * class will be mapped to columns of the same name (case sensitivity is determined by
-   * `spark.sql.caseSensitive`).</li> <li>When `U` is a tuple, the columns will be mapped by
-   * ordinal (i.e. the first column will be assigned to `_1`).</li> <li>When `U` is a primitive
-   * type (i.e. String, Int, etc), then the first column of the `DataFrame` will be used.</li>
-   * </ul>
-   *
-   * If the schema of the Dataset does not match the desired `U` type, you can use `select` along
-   * with `alias` or `as` to rearrange or rename as required.
-   *
-   * Note that `as[]` only changes the view of the data that is passed into typed operations, such
-   * as `map()`, and does not eagerly project away any columns that are not present in the
-   * specified class.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def as[U: Encoder]: Dataset[U] = {
     val encoder = implicitly[Encoder[U]].asInstanceOf[AgnosticEncoder[U]]
     // We should add some validation/coercion here. We cannot use `to`
@@ -197,19 +182,7 @@ class Dataset[T] private[sql] (
     new Dataset[U](sparkSession, plan, encoder)
   }
 
-  /**
-   * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
-   * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
-   * meaningful names. For example:
-   * {{{
-   *   val rdd: RDD[(Int, String)] = ...
-   *   rdd.toDF()  // this implicit conversion creates a DataFrame with column name `_1` and `_2`
-   *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
-   * }}}
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def toDF(colNames: String*): DataFrame = sparkSession.newDataFrame { builder =>
     builder.getToDfBuilder
@@ -217,34 +190,14 @@ class Dataset[T] private[sql] (
       .addAllColumnNames(colNames.asJava)
   }
 
-  /**
-   * Returns a new DataFrame where each row is reconciled to match the specified schema. Spark
-   * will: <ul> <li>Reorder columns and/or inner fields by name to match the specified
-   * schema.</li> <li>Project away columns and/or inner fields that are not needed by the
-   * specified schema. Missing columns and/or inner fields (present in the specified schema but
-   * not input DataFrame) lead to failures.</li> <li>Cast the columns and/or inner fields to match
-   * the data types in the specified schema, if the types are compatible, e.g., numeric to numeric
-   * (error if overflows), but not string to int.</li> <li>Carry over the metadata from the
-   * specified schema, while the columns and/or inner fields still keep their own metadata if not
-   * overwritten by the specified schema.</li> <li>Fail if the nullability is not compatible. For
-   * example, the column and/or inner field is nullable but the specified schema requires them to
-   * be not nullable.</li> </ul>
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def to(schema: StructType): DataFrame = sparkSession.newDataFrame { builder =>
     builder.getToSchemaBuilder
       .setInput(plan.getRoot)
       .setSchema(DataTypeProtoConverter.toConnectProtoType(schema))
   }
 
-  /**
-   * Returns the schema of this Dataset.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def schema: StructType = cachedSchema
 
   /**
@@ -266,38 +219,9 @@ class Dataset[T] private[sql] (
       .asInstanceOf[StructType]
   }
 
-  /**
-   * Prints the schema to the console in a nice tree format.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def printSchema(): Unit = printSchema(Int.MaxValue)
-
-  // scalastyle:off println
-  /**
-   * Prints the schema up to the given level to the console in a nice tree format.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def printSchema(level: Int): Unit = println(schema.treeString(level))
-  // scalastyle:on println
-
-  /**
-   * Prints the plans (logical and physical) with a format specified by a given explain mode.
-   *
-   * @param mode
-   *   specifies the expected output format of plans. <ul> <li>`simple` Print only a physical
-   *   plan.</li> <li>`extended`: Print both logical and physical plans.</li> <li>`codegen`: Print
-   *   a physical plan and generated codes if they are available.</li> <li>`cost`: Print a logical
-   *   plan and statistics if they are available.</li> <li>`formatted`: Split explain output into
-   *   two sections: a physical plan outline and node details.</li> </ul>
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def explain(mode: String): Unit = {
-    val protoMode = mode.trim.toLowerCase(Locale.ROOT) match {
+    val protoMode = mode.trim.toLowerCase(util.Locale.ROOT) match {
       case "simple" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
       case "extended" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
       case "codegen" => proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_CODEGEN
@@ -308,32 +232,6 @@ class Dataset[T] private[sql] (
     explain(protoMode)
   }
 
-  /**
-   * Prints the plans (logical and physical) to the console for debugging purposes.
-   *
-   * @param extended
-   *   default `false`. If `false`, prints only the physical plan.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def explain(extended: Boolean): Unit = {
-    val mode = if (extended) {
-      proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_EXTENDED
-    } else {
-      proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE
-    }
-    explain(mode)
-  }
-
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def explain(): Unit = explain(proto.AnalyzePlanRequest.Explain.ExplainMode.EXPLAIN_MODE_SIMPLE)
-
   private def explain(mode: proto.AnalyzePlanRequest.Explain.ExplainMode): Unit = {
     // scalastyle:off println
     println(
@@ -344,198 +242,31 @@ class Dataset[T] private[sql] (
     // scalastyle:on println
   }
 
-  /**
-   * Returns all column names and their data types as an array.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def dtypes: Array[(String, String)] = schema.fields.map { field =>
-    (field.name, field.dataType.toString)
-  }
-
-  /**
-   * Returns all column names as an array.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def columns: Array[String] = schema.fields.map(_.name)
-
-  /**
-   * Returns true if the `collect` and `take` methods can be run locally (without any Spark
-   * executors).
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def isLocal: Boolean = sparkSession
     .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_LOCAL)
     .getIsLocal
     .getIsLocal
 
-  /**
-   * Returns true if the `Dataset` is empty.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def isEmpty: Boolean = select().limit(1).withResult { result =>
     result.length == 0
   }
 
-  /**
-   * Returns true if this Dataset contains one or more sources that continuously return data as it
-   * arrives. A Dataset that reads data from a streaming source must be executed as a
-   * `StreamingQuery` using the `start()` method in `DataStreamWriter`.
-   *
-   * @group streaming
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def isStreaming: Boolean = sparkSession
     .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.IS_STREAMING)
     .getIsStreaming
     .getIsStreaming
 
-  /**
-   * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
-   * and all cells will be aligned right. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * @param numRows
-   *   Number of rows to show
-   *
-   * @group action
-   * @since 3.4.0
-   */
-  def show(numRows: Int): Unit = show(numRows, truncate = true)
-
-  /**
-   * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters will
-   * be truncated, and all cells will be aligned right.
-   *
-   * @group action
-   * @since 3.4.0
-   */
-  def show(): Unit = show(20)
-
-  /**
-   * Displays the top 20 rows of Dataset in a tabular form.
-   *
-   * @param truncate
-   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
-   *   and all cells will be aligned right
-   *
-   * @group action
-   * @since 3.4.0
-   */
-  def show(truncate: Boolean): Unit = show(20, truncate)
-
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   * @param numRows
-   *   Number of rows to show
-   * @param truncate
-   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
-   *   and all cells will be aligned right
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   // scalastyle:off println
   def show(numRows: Int, truncate: Boolean): Unit = {
     val truncateValue = if (truncate) 20 else 0
     show(numRows, truncateValue, vertical = false)
   }
 
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * @param numRows
-   *   Number of rows to show
-   * @param truncate
-   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
-   *   aligned right.
-   * @group action
-   * @since 3.4.0
-   */
-  def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, vertical = false)
-
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * If `vertical` enabled, this command prints output rows vertically (one line per column
-   * value)?
-   *
-   * {{{
-   * -RECORD 0-------------------
-   *  year            | 1980
-   *  month           | 12
-   *  AVG('Adj Close) | 0.503218
-   *  AVG('Adj Close) | 0.595103
-   * -RECORD 1-------------------
-   *  year            | 1981
-   *  month           | 01
-   *  AVG('Adj Close) | 0.523289
-   *  AVG('Adj Close) | 0.570307
-   * -RECORD 2-------------------
-   *  year            | 1982
-   *  month           | 02
-   *  AVG('Adj Close) | 0.436504
-   *  AVG('Adj Close) | 0.475256
-   * -RECORD 3-------------------
-   *  year            | 1983
-   *  month           | 03
-   *  AVG('Adj Close) | 0.410516
-   *  AVG('Adj Close) | 0.442194
-   * -RECORD 4-------------------
-   *  year            | 1984
-   *  month           | 04
-   *  AVG('Adj Close) | 0.450090
-   *  AVG('Adj Close) | 0.483521
-   * }}}
-   *
-   * @param numRows
-   *   Number of rows to show
-   * @param truncate
-   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
-   *   aligned right.
-   * @param vertical
-   *   If set to true, prints output rows vertically (one line per column value).
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def show(numRows: Int, truncate: Int, vertical: Boolean): Unit = {
     val df = sparkSession.newDataset(StringEncoder) { builder =>
       builder.getShowStringBuilder
@@ -547,35 +278,15 @@ class Dataset[T] private[sql] (
     df.withResult { result =>
       assert(result.length == 1)
       assert(result.schema.size == 1)
-      // scalastyle:off println
-      println(result.toArray.head)
-      // scalastyle:on println
+      print(result.toArray.head)
     }
   }
 
-  /**
-   * Returns a [[DataFrameNaFunctions]] for working with missing data.
-   * {{{
-   *   // Dropping rows containing any null values.
-   *   ds.na.drop()
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def na: DataFrameNaFunctions = new DataFrameNaFunctions(sparkSession, plan.getRoot)
 
-  /**
-   * Returns a [[DataFrameStatFunctions]] for working statistic functions support.
-   * {{{
-   *   // Finding frequent items in column with name 'a'.
-   *   ds.stat.freqItems(Seq("a"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def stat: DataFrameStatFunctions = new DataFrameStatFunctions(sparkSession, plan.getRoot)
+  /** @inheritdoc */
+  def stat: DataFrameStatFunctions = new DataFrameStatFunctions(toDF())
 
   private def buildJoin(right: Dataset[_])(f: proto.Join.Builder => Unit): DataFrame = {
     checkSameSparkSession(right)
@@ -587,7 +298,7 @@ class Dataset[T] private[sql] (
   }
 
   private def toJoinType(name: String, skipSemiAnti: Boolean = false): proto.Join.JoinType = {
-    name.trim.toLowerCase(Locale.ROOT) match {
+    name.trim.toLowerCase(util.Locale.ROOT) match {
       case "inner" =>
         proto.Join.JoinType.JOIN_TYPE_INNER
       case "cross" =>
@@ -607,170 +318,12 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Join with another `DataFrame`.
-   *
-   * Behaves as an INNER JOIN and requires a subsequent join predicate.
-   *
-   * @param right
-   *   Right side of the join operation.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_]): DataFrame = buildJoin(right) { builder =>
     builder.setJoinType(proto.Join.JoinType.JOIN_TYPE_INNER)
   }
 
-  /**
-   * Inner equi-join with another `DataFrame` using the given column.
-   *
-   * Different from other join functions, the join column will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * {{{
-   *   // Joining df1 and df2 using the column "user_id"
-   *   df1.join(df2, "user_id")
-   * }}}
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumn
-   *   Name of the column to join on. This column must exist on both sides.
-   *
-   * @note
-   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
-   *   will NOT be able to reference any columns after the join, since there is no way to
-   *   disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumn: String): DataFrame = {
-    join(right, Seq(usingColumn))
-  }
-
-  /**
-   * (Java-specific) Inner equi-join with another `DataFrame` using the given columns. See the
-   * Scala-specific overload for more details.
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumns
-   *   Names of the columns to join on. This columns must exist on both sides.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumns: Array[String]): DataFrame = {
-    join(right, usingColumns.toImmutableArraySeq)
-  }
-
-  /**
-   * (Scala-specific) Inner equi-join with another `DataFrame` using the given columns.
-   *
-   * Different from other join functions, the join columns will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * {{{
-   *   // Joining df1 and df2 using the columns "user_id" and "user_name"
-   *   df1.join(df2, Seq("user_id", "user_name"))
-   * }}}
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumns
-   *   Names of the columns to join on. This columns must exist on both sides.
-   *
-   * @note
-   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
-   *   will NOT be able to reference any columns after the join, since there is no way to
-   *   disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame = {
-    join(right, usingColumns, "inner")
-  }
-
-  /**
-   * Equi-join with another `DataFrame` using the given column. A cross join with a predicate is
-   * specified as an inner join. If you would explicitly like to perform a cross join use the
-   * `crossJoin` method.
-   *
-   * Different from other join functions, the join column will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumn
-   *   Name of the column to join on. This column must exist on both sides.
-   * @param joinType
-   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
-   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
-   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
-   *   `left_anti`.
-   *
-   * @note
-   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
-   *   will NOT be able to reference any columns after the join, since there is no way to
-   *   disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame = {
-    join(right, Seq(usingColumn), joinType)
-  }
-
-  /**
-   * (Java-specific) Equi-join with another `DataFrame` using the given columns. See the
-   * Scala-specific overload for more details.
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumns
-   *   Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType
-   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
-   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
-   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
-   *   `left_anti`.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame = {
-    join(right, usingColumns.toImmutableArraySeq, joinType)
-  }
-
-  /**
-   * (Scala-specific) Equi-join with another `DataFrame` using the given columns. A cross join
-   * with a predicate is specified as an inner join. If you would explicitly like to perform a
-   * cross join use the `crossJoin` method.
-   *
-   * Different from other join functions, the join columns will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * @param right
-   *   Right side of the join operation.
-   * @param usingColumns
-   *   Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType
-   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
-   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
-   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
-   *   `left_anti`.
-   *
-   * @note
-   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
-   *   will NOT be able to reference any columns after the join, since there is no way to
-   *   disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame = {
     buildJoin(right) { builder =>
       builder
@@ -779,47 +332,7 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Inner join with another `DataFrame`, using the given join expression.
-   *
-   * {{{
-   *   // The following two are equivalent:
-   *   df1.join(df2, $"df1Key" === $"df2Key")
-   *   df1.join(df2).where($"df1Key" === $"df2Key")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
-
-  /**
-   * Join with another `DataFrame`, using the given join expression. The following performs a full
-   * outer join between `df1` and `df2`.
-   *
-   * {{{
-   *   // Scala:
-   *   import org.apache.spark.sql.functions._
-   *   df1.join(df2, $"df1Key" === $"df2Key", "outer")
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df1.join(df2, col("df1Key").equalTo(col("df2Key")), "outer");
-   * }}}
-   *
-   * @param right
-   *   Right side of the join.
-   * @param joinExprs
-   *   Join expression.
-   * @param joinType
-   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
-   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
-   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
-   *   `left_anti`.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame = {
     buildJoin(right) { builder =>
       builder
@@ -828,54 +341,12 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Explicit cartesian join with another `DataFrame`.
-   *
-   * @param right
-   *   Right side of the join operation.
-   *
-   * @note
-   *   Cartesian joins are very expensive without an extra filter that can be pushed down.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def crossJoin(right: Dataset[_]): DataFrame = buildJoin(right) { builder =>
     builder.setJoinType(proto.Join.JoinType.JOIN_TYPE_CROSS)
   }
 
-  private def buildSort(global: Boolean, sortExprs: Seq[Column]): Dataset[T] = {
-    sparkSession.newDataset(agnosticEncoder) { builder =>
-      builder.getSortBuilder
-        .setInput(plan.getRoot)
-        .setIsGlobal(global)
-        .addAllOrder(sortExprs.map(_.sortOrder).asJava)
-    }
-  }
-
-  /**
-   * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to true.
-   *
-   * This is similar to the relation `join` function with one important difference in the result
-   * schema. Since `joinWith` preserves objects present on either side of the join, the result
-   * schema is similarly nested into a tuple under the column names `_1` and `_2`.
-   *
-   * This type of join can be useful both for preserving type-safety with the original object
-   * types as well as working with relational data where either side of the join has column names
-   * in common.
-   *
-   * @param other
-   *   Right side of the join.
-   * @param condition
-   *   Join expression.
-   * @param joinType
-   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
-   *   `full`, `fullouter`,`full_outer`, `left`, `leftouter`, `left_outer`, `right`, `rightouter`,
-   *   `right_outer`.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)] = {
     val joinTypeValue = toJoinType(joinType, skipSemiAnti = true)
     val (leftNullable, rightNullable) = joinTypeValue match {
@@ -912,121 +383,19 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair where
-   * `condition` evaluates to true.
-   *
-   * @param other
-   *   Right side of the join.
-   * @param condition
-   *   Join expression.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = {
-    joinWith(other, condition, "inner")
-  }
-
-  /**
-   * Returns a new Dataset with each partition sorted by the given expressions.
-   *
-   * This is the same operation as "SORT BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] = {
-    sortWithinPartitions((sortCol +: sortCols).map(Column(_)): _*)
-  }
-
-  /**
-   * Returns a new Dataset with each partition sorted by the given expressions.
-   *
-   * This is the same operation as "SORT BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def sortWithinPartitions(sortExprs: Column*): Dataset[T] = {
-    buildSort(global = false, sortExprs)
-  }
-
-  /**
-   * Returns a new Dataset sorted by the specified column, all in ascending order.
-   * {{{
-   *   // The following 3 are equivalent
-   *   ds.sort("sortcol")
-   *   ds.sort($"sortcol")
-   *   ds.sort($"sortcol".asc)
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def sort(sortCol: String, sortCols: String*): Dataset[T] = {
-    sort((sortCol +: sortCols).map(Column(_)): _*)
-  }
-
-  /**
-   * Returns a new Dataset sorted by the given expressions. For example:
-   * {{{
-   *   ds.sort($"col1", $"col2".desc)
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def sort(sortExprs: Column*): Dataset[T] = {
-    buildSort(global = true, sortExprs)
+  override protected def sortInternal(global: Boolean, sortCols: Seq[Column]): Dataset[T] = {
+    val sortExprs = sortCols.map { c =>
+      ColumnNodeToProtoConverter(c.sortOrder).getSortOrder
+    }
+    sparkSession.newDataset(agnosticEncoder) { builder =>
+      builder.getSortBuilder
+        .setInput(plan.getRoot)
+        .setIsGlobal(global)
+        .addAllOrder(sortExprs.asJava)
+    }
   }
 
-  /**
-   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
-   * function.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols: _*)
-
-  /**
-   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
-   * function.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(sortExprs: Column*): Dataset[T] = sort(sortExprs: _*)
-
-  /**
-   * Selects column based on the column name and returns it as a [[Column]].
-   *
-   * @note
-   *   The column name can also reference to a nested column like `a.b`.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def apply(colName: String): Column = col(colName)
-
-  /**
-   * Specifies some hint on the current Dataset. As an example, the following code specifies that
-   * one of the plan can be broadcasted:
-   *
-   * {{{
-   *   df1.join(df2.hint("broadcast"))
-   * }}}
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def hint(name: String, parameters: Any*): Dataset[T] =
     sparkSession.newDataset(agnosticEncoder) { builder =>
@@ -1043,161 +412,49 @@ class Dataset[T] private[sql] (
       None
     }
 
-  /**
-   * Selects column based on the column name and returns it as a [[Column]].
-   *
-   * @note
-   *   The column name can also reference to a nested column like `a.b`.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def col(colName: String): Column = {
-    Column.apply(colName, getPlanId)
-  }
+  /** @inheritdoc */
+  def col(colName: String): Column = new Column(colName, getPlanId)
 
-  /**
-   * Selects a metadata column based on its logical column name, and returns it as a [[Column]].
-   *
-   * A metadata column can be accessed this way even if the underlying data source defines a data
-   * column with a conflicting name.
-   *
-   * @group untypedrel
-   * @since 3.5.0
-   */
-  def metadataColumn(colName: String): Column = Column { builder =>
-    val attributeBuilder = builder.getUnresolvedAttributeBuilder
-      .setUnparsedIdentifier(colName)
-      .setIsMetadataColumn(true)
-    getPlanId.foreach(attributeBuilder.setPlanId)
+  /** @inheritdoc */
+  def metadataColumn(colName: String): Column = {
+    Column(UnresolvedAttribute(colName, getPlanId, isMetadataColumn = true))
   }
 
-  /**
-   * Selects column based on the column name specified as a regex and returns it as [[Column]].
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def colRegex(colName: String): Column = {
-    Column { builder =>
-      val unresolvedRegexBuilder = builder.getUnresolvedRegexBuilder.setColName(colName)
-      getPlanId.foreach(unresolvedRegexBuilder.setPlanId)
-    }
+    Column(UnresolvedRegex(colName, getPlanId))
   }
 
-  /**
-   * Returns a new Dataset with an alias set.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def as(alias: String): Dataset[T] = sparkSession.newDataset(agnosticEncoder) { builder =>
     builder.getSubqueryAliasBuilder
       .setInput(plan.getRoot)
       .setAlias(alias)
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset with an alias set.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def as(alias: Symbol): Dataset[T] = as(alias.name)
-
-  /**
-   * Returns a new Dataset with an alias set. Same as `as`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def alias(alias: String): Dataset[T] = as(alias)
-
-  /**
-   * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def alias(alias: Symbol): Dataset[T] = as(alias)
-
-  /**
-   * Selects a set of column based expressions.
-   * {{{
-   *   ds.select($"colA", $"colB" + 1)
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def select(cols: Column*): DataFrame =
     selectUntyped(UnboundRowEncoder, cols).asInstanceOf[DataFrame]
 
-  /**
-   * Selects a set of columns. This is a variant of `select` that can only select existing columns
-   * using column names (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // The following two are equivalent:
-   *   ds.select("colA", "colB")
-   *   ds.select($"colA", $"colB")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)): _*)
-
-  /**
-   * Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions.
-   *
-   * {{{
-   *   // The following are equivalent:
-   *   ds.selectExpr("colA", "colB as newName", "abs(colC)")
-   *   ds.select(expr("colA"), expr("colB as newName"), expr("abs(colC)"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def selectExpr(exprs: String*): DataFrame = {
-    select(exprs.map(functions.expr): _*)
-  }
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expression for each element.
-   *
-   * {{{
-   *   val ds = Seq(1, 2, 3).toDS()
-   *   val newDS = ds.select(expr("value + 1").as[Int])
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = {
-    val encoder = c1.encoder
-    val expr = if (encoder.schema == encoder.dataType) {
-      functions.inline(functions.array(c1)).expr
+    val encoder = agnosticEncoderFor(c1.encoder)
+    val col = if (encoder.schema == encoder.dataType) {
+      functions.inline(functions.array(c1))
     } else {
-      c1.expr
+      c1
     }
     sparkSession.newDataset(encoder) { builder =>
       builder.getProjectBuilder
         .setInput(plan.getRoot)
-        .addExpressions(expr)
+        .addExpressions(col.typedExpr(this.encoder))
     }
   }
 
-  /**
-   * Internal helper function for building typed selects that return tuples. For simplicity and
-   * code reuse, we do this without the help of the type system and then use helper functions that
-   * cast appropriately for the user facing interface.
-   */
-  private def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
-    val encoder = ProductEncoder.tuple(columns.map(_.encoder))
+  /** @inheritdoc */
+  protected def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
+    val encoder = ProductEncoder.tuple(columns.map(c => agnosticEncoderFor(c.encoder)))
     selectUntyped(encoder, columns)
   }
 
@@ -1209,109 +466,16 @@ class Dataset[T] private[sql] (
     sparkSession.newDataset(encoder) { builder =>
       builder.getProjectBuilder
         .setInput(plan.getRoot)
-        .addAllExpressions(cols.map(_.expr).asJava)
+        .addAllExpressions(cols.map(_.typedExpr(this.encoder)).asJava)
     }
   }
 
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
-    selectUntyped(c1, c2).asInstanceOf[Dataset[(U1, U2)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def select[U1, U2, U3](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] =
-    selectUntyped(c1, c2, c3).asInstanceOf[Dataset[(U1, U2, U3)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def select[U1, U2, U3, U4](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3],
-      c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] =
-    selectUntyped(c1, c2, c3, c4).asInstanceOf[Dataset[(U1, U2, U3, U4)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def select[U1, U2, U3, U4, U5](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3],
-      c4: TypedColumn[T, U4],
-      c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] =
-    selectUntyped(c1, c2, c3, c4, c5).asInstanceOf[Dataset[(U1, U2, U3, U4, U5)]]
-
-  /**
-   * Filters rows using the given condition.
-   * {{{
-   *   // The following are equivalent:
-   *   peopleDs.filter($"age" > 15)
-   *   peopleDs.where($"age" > 15)
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def filter(condition: Column): Dataset[T] = sparkSession.newDataset(agnosticEncoder) {
     builder =>
       builder.getFilterBuilder.setInput(plan.getRoot).setCondition(condition.expr)
   }
 
-  /**
-   * Filters rows using the given SQL expression.
-   * {{{
-   *   peopleDs.filter("age > 15")
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def filter(conditionExpr: String): Dataset[T] = filter(functions.expr(conditionExpr))
-
-  /**
-   * Filters rows using the given condition. This is an alias for `filter`.
-   * {{{
-   *   // The following are equivalent:
-   *   peopleDs.filter($"age" > 15)
-   *   peopleDs.where($"age" > 15)
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def where(condition: Column): Dataset[T] = filter(condition)
-
-  /**
-   * Filters rows using the given SQL expression.
-   * {{{
-   *   peopleDs.where("age > 15")
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def where(conditionExpr: String): Dataset[T] = filter(conditionExpr)
-
   private def buildUnpivot(
       ids: Array[Column],
       valuesOption: Option[Array[Column]],
@@ -1320,7 +484,7 @@ class Dataset[T] private[sql] (
     val unpivot = builder.getUnpivotBuilder
       .setInput(plan.getRoot)
       .addAllIds(ids.toImmutableArraySeq.map(_.expr).asJava)
-      .setValueColumnName(variableColumnName)
+      .setVariableColumnName(variableColumnName)
       .setValueColumnName(valueColumnName)
     valuesOption.foreach { values =>
       unpivot.getValuesBuilder
@@ -1328,67 +492,23 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Groups the Dataset using the specified columns, so we can run aggregation on them. See
-   * [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns grouped by department.
-   *   ds.groupBy($"department").avg()
-   *
-   *   // Compute the max age and average salary, grouped by department and gender.
-   *   ds.groupBy($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  private def buildTranspose(indices: Seq[Column]): DataFrame =
+    sparkSession.newDataFrame { builder =>
+      val transpose = builder.getTransposeBuilder.setInput(plan.getRoot)
+      indices.foreach { indexColumn =>
+        transpose.addIndexColumns(indexColumn.expr)
+      }
+    }
+
+  /** @inheritdoc */
   @scala.annotation.varargs
   def groupBy(cols: Column*): RelationalGroupedDataset = {
     new RelationalGroupedDataset(toDF(), cols, proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
   }
 
-  /**
-   * Groups the Dataset using the specified columns, so that we can run aggregation on them. See
-   * [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * This is a variant of groupBy that can only group by existing columns using column names (i.e.
-   * cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns grouped by department.
-   *   ds.groupBy("department").avg()
-   *
-   *   // Compute the max age and average salary, grouped by department and gender.
-   *   ds.groupBy($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def groupBy(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    new RelationalGroupedDataset(
-      toDF(),
-      colNames.map(colName => Column(colName)),
-      proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
-  }
-
-  /**
-   * (Scala-specific) Reduces the elements of this Dataset using the specified binary function.
-   * The given `func` must be commutative and associative or the result may be non-deterministic.
-   *
-   * @group action
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def reduce(func: (T, T) => T): T = {
-    val udf = ScalaUserDefinedFunction(
+    val udf = SparkUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: agnosticEncoder :: Nil,
       outputEncoder = agnosticEncoder)
@@ -1406,164 +526,24 @@ class Dataset[T] private[sql] (
     result(0)
   }
 
-  /**
-   * (Java-specific) Reduces the elements of this Dataset using the specified binary function. The
-   * given `func` must be commutative and associative or the result may be non-deterministic.
-   *
-   * @group action
-   * @since 3.5.0
-   */
-  def reduce(func: ReduceFunction[T]): T = reduce(UdfUtils.mapReduceFuncToScalaFunc(func))
-
-  /**
-   * (Scala-specific) Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given
-   * key `func`.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T] = {
-    KeyValueGroupedDatasetImpl[K, T](this, encoderFor[K], func)
+    KeyValueGroupedDatasetImpl[K, T](this, agnosticEncoderFor[K], func)
   }
 
-  /**
-   * (Java-specific) Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given
-   * key `func`.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T] =
-    groupByKey(UdfUtils.mapFunctionToScalaFunc(func))(encoder)
-
-  /**
-   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
-   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
-   * functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolled up by department and group.
-   *   ds.rollup($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, rolled up by department and gender.
-   *   ds.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def rollup(cols: Column*): RelationalGroupedDataset = {
     new RelationalGroupedDataset(toDF(), cols, proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
   }
 
-  /**
-   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
-   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
-   * functions.
-   *
-   * This is a variant of rollup that can only group by existing columns using column names (i.e.
-   * cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolled up by department and group.
-   *   ds.rollup("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, rolled up by department and gender.
-   *   ds.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def rollup(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    new RelationalGroupedDataset(
-      toDF(),
-      colNames.map(colName => Column(colName)),
-      proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
-  }
-
-  /**
-   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
-   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
-   * functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   ds.cube($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   ds.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def cube(cols: Column*): RelationalGroupedDataset = {
     new RelationalGroupedDataset(toDF(), cols, proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
   }
 
-  /**
-   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
-   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
-   * functions.
-   *
-   * This is a variant of cube that can only group by existing columns using column names (i.e.
-   * cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   ds.cube("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   ds.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def cube(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    new RelationalGroupedDataset(
-      toDF(),
-      colNames.map(colName => Column(colName)),
-      proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
-  }
-
-  /**
-   * Create multi-dimensional aggregation for the current Dataset using the specified grouping
-   * sets, so we can run aggregation on them. See [[RelationalGroupedDataset]] for all the
-   * available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns group by specific grouping sets.
-   *   ds.groupingSets(Seq(Seq($"department", $"group"), Seq()), $"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, group by specific grouping sets.
-   *   ds.groupingSets(Seq($"department", $"gender"), Seq()), $"department", $"group").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def groupingSets(groupingSets: Seq[Seq[Column]], cols: Column*): RelationalGroupedDataset = {
     val groupingSetMsgs = groupingSets.map { groupingSet =>
@@ -1580,119 +560,7 @@ class Dataset[T] private[sql] (
       groupingSets = Some(groupingSetMsgs))
   }
 
-  /**
-   * (Scala-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg("age" -> "max", "salary" -> "avg")
-   *   ds.groupBy().agg("age" -> "max", "salary" -> "avg")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
-    groupBy().agg(aggExpr, aggExprs: _*)
-  }
-
-  /**
-   * (Scala-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
-   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
-
-  /**
-   * (Java-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
-   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
-
-  /**
-   * Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(max($"age"), avg($"salary"))
-   *   ds.groupBy().agg(max($"age"), avg($"salary"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs: _*)
-
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
-   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed.
-   *
-   * This function is useful to massage a DataFrame into a format where some columns are
-   * identifier columns ("ids"), while all other columns ("values") are "unpivoted" to the rows,
-   * leaving just two non-id columns, named as given by `variableColumnName` and
-   * `valueColumnName`.
-   *
-   * {{{
-   *   val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
-   *   df.show()
-   *   // output:
-   *   // +---+---+----+
-   *   // | id|int|long|
-   *   // +---+---+----+
-   *   // |  1| 11|  12|
-   *   // |  2| 21|  22|
-   *   // +---+---+----+
-   *
-   *   df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
-   *   // output:
-   *   // +---+--------+-----+
-   *   // | id|variable|value|
-   *   // +---+--------+-----+
-   *   // |  1|     int|   11|
-   *   // |  1|    long|   12|
-   *   // |  2|     int|   21|
-   *   // |  2|    long|   22|
-   *   // +---+--------+-----+
-   *   // schema:
-   *   //root
-   *   // |-- id: integer (nullable = false)
-   *   // |-- variable: string (nullable = false)
-   *   // |-- value: long (nullable = true)
-   * }}}
-   *
-   * When no "id" columns are given, the unpivoted DataFrame consists of only the "variable" and
-   * "value" columns.
-   *
-   * All "value" columns must share a least common data type. Unless they are the same data type,
-   * all "value" columns are cast to the nearest common data type. For instance, types
-   * `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType` do
-   * not have a common data type and `unpivot` fails with an `AnalysisException`.
-   *
-   * @param ids
-   *   Id columns
-   * @param values
-   *   Value columns to unpivot
-   * @param variableColumnName
-   *   Name of the variable column
-   * @param valueColumnName
-   *   Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unpivot(
       ids: Array[Column],
       values: Array[Column],
@@ -1701,27 +569,7 @@ class Dataset[T] private[sql] (
     buildUnpivot(ids, Option(values), variableColumnName, valueColumnName)
   }
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
-   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed.
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
-   * is set to all non-id columns that exist in the DataFrame.
-   *
-   * @param ids
-   *   Id columns
-   * @param variableColumnName
-   *   Name of the variable column
-   * @param valueColumnName
-   *   Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unpivot(
       ids: Array[Column],
       variableColumnName: String,
@@ -1729,77 +577,35 @@ class Dataset[T] private[sql] (
     buildUnpivot(ids, None, variableColumnName, valueColumnName)
   }
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
-   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed. This is an alias for `unpivot`.
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * @param ids
-   *   Id columns
-   * @param values
-   *   Value columns to unpivot
-   * @param variableColumnName
-   *   Name of the variable column
-   * @param valueColumnName
-   *   Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def melt(
-      ids: Array[Column],
-      values: Array[Column],
-      variableColumnName: String,
-      valueColumnName: String): DataFrame =
-    unpivot(ids, values, variableColumnName, valueColumnName)
+  /** @inheritdoc */
+  def transpose(indexColumn: Column): DataFrame =
+    buildTranspose(Seq(indexColumn))
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
-   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed. This is an alias for `unpivot`.
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
-   * is set to all non-id columns that exist in the DataFrame.
-   *
-   * @param ids
-   *   Id columns
-   * @param variableColumnName
-   *   Name of the variable column
-   * @param valueColumnName
-   *   Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def melt(ids: Array[Column], variableColumnName: String, valueColumnName: String): DataFrame =
-    unpivot(ids, variableColumnName, valueColumnName)
+  /** @inheritdoc */
+  def transpose(): DataFrame =
+    buildTranspose(Seq.empty)
 
-  /**
-   * Returns a new Dataset by taking the first `n` rows. The difference between this function and
-   * `head` is that `head` is an action and returns an array (by triggering query execution) while
-   * `limit` returns a new Dataset.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  // TODO(SPARK-50134): Support scalar Subquery API in Spark Connect
+  // scalastyle:off not.implemented.error.usage
+  /** @inheritdoc */
+  def scalar(): Column = {
+    ???
+  }
+
+  /** @inheritdoc */
+  def exists(): Column = {
+    ???
+  }
+  // scalastyle:on not.implemented.error.usage
+
+  /** @inheritdoc */
   def limit(n: Int): Dataset[T] = sparkSession.newDataset(agnosticEncoder) { builder =>
     builder.getLimitBuilder
       .setInput(plan.getRoot)
       .setLimit(n)
   }
 
-  /**
-   * Returns a new Dataset by skipping the first `n` rows.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def offset(n: Int): Dataset[T] = sparkSession.newDataset(agnosticEncoder) { builder =>
     builder.getOffsetBuilder
       .setInput(plan.getRoot)
@@ -1827,260 +633,49 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
-   * deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * Also as standard in SQL, this function resolves columns by position (not by name):
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
-   *   df1.union(df2).show
-   *
-   *   // output:
-   *   // +----+----+----+
-   *   // |col0|col1|col2|
-   *   // +----+----+----+
-   *   // |   1|   2|   3|
-   *   // |   4|   5|   6|
-   *   // +----+----+----+
-   * }}}
-   *
-   * Notice that the column positions in the schema aren't necessarily matched with the fields in
-   * the strongly typed objects in a Dataset. This function resolves columns by their positions in
-   * the schema, not the fields in the strongly typed objects. Use [[unionByName]] to resolve
-   * columns by field name in the typed objects.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def union(other: Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_UNION) { builder =>
       builder.setIsAll(true)
     }
   }
 
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset. This is
-   * an alias for `union`.
-   *
-   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
-   * deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * Also as standard in SQL, this function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def unionAll(other: Dataset[T]): Dataset[T] = union(other)
-
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
-   * union (that does deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * The difference between this function and [[union]] is that this function resolves columns by
-   * name (not by position):
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
-   *   df1.unionByName(df2).show
-   *
-   *   // output:
-   *   // +----+----+----+
-   *   // |col0|col1|col2|
-   *   // +----+----+----+
-   *   // |   1|   2|   3|
-   *   // |   6|   4|   5|
-   *   // +----+----+----+
-   * }}}
-   *
-   * Note that this supports nested columns in struct and array types. Nested columns in map types
-   * are not currently supported.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def unionByName(other: Dataset[T]): Dataset[T] = unionByName(other, allowMissingColumns = false)
-
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * The difference between this function and [[union]] is that this function resolves columns by
-   * name (not by position).
-   *
-   * When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
-   * `Dataset` can differ; missing columns will be filled with null. Further, the missing columns
-   * of this `Dataset` will be added at the end in the schema of the union result:
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col0", "col3")
-   *   df1.unionByName(df2, true).show
-   *
-   *   // output: "col3" is missing at left df1 and added at the end of schema.
-   *   // +----+----+----+----+
-   *   // |col0|col1|col2|col3|
-   *   // +----+----+----+----+
-   *   // |   1|   2|   3|null|
-   *   // |   5|   4|null|   6|
-   *   // +----+----+----+----+
-   *
-   *   df2.unionByName(df1, true).show
-   *
-   *   // output: "col2" is missing at left df2 and added at the end of schema.
-   *   // +----+----+----+----+
-   *   // |col1|col0|col3|col2|
-   *   // +----+----+----+----+
-   *   // |   4|   5|   6|null|
-   *   // |   2|   1|null|   3|
-   *   // +----+----+----+----+
-   * }}}
-   *
-   * Note that this supports nested columns in struct and array types. With `allowMissingColumns`,
-   * missing nested columns of struct columns with the same name will also be filled with null
-   * values and added to the end of struct. Nested columns in map types are not currently
-   * supported.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_UNION) { builder =>
       builder.setByName(true).setIsAll(true).setAllowMissingColumns(allowMissingColumns)
     }
   }
 
-  /**
-   * Returns a new Dataset containing rows only in both this Dataset and another Dataset. This is
-   * equivalent to `INTERSECT` in SQL.
-   *
-   * @note
-   *   Equality checking is performed directly on the encoded representation of the data and thus
-   *   is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def intersect(other: Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT) { builder =>
       builder.setIsAll(false)
     }
   }
 
-  /**
-   * Returns a new Dataset containing rows only in both this Dataset and another Dataset while
-   * preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
-   *
-   * @note
-   *   Equality checking is performed directly on the encoded representation of the data and thus
-   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
-   *   function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def intersectAll(other: Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_INTERSECT) { builder =>
       builder.setIsAll(true)
     }
   }
 
-  /**
-   * Returns a new Dataset containing rows in this Dataset but not in another Dataset. This is
-   * equivalent to `EXCEPT DISTINCT` in SQL.
-   *
-   * @note
-   *   Equality checking is performed directly on the encoded representation of the data and thus
-   *   is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def except(other: Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT) { builder =>
       builder.setIsAll(false)
     }
   }
 
-  /**
-   * Returns a new Dataset containing rows in this Dataset but not in another Dataset while
-   * preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
-   *
-   * @note
-   *   Equality checking is performed directly on the encoded representation of the data and thus
-   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
-   *   function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def exceptAll(other: Dataset[T]): Dataset[T] = {
     buildSetOp(other, proto.SetOperation.SetOpType.SET_OP_TYPE_EXCEPT) { builder =>
       builder.setIsAll(true)
     }
   }
 
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
-   * user-supplied seed.
-   *
-   * @param fraction
-   *   Fraction of rows to generate, range [0.0, 1.0].
-   * @param seed
-   *   Seed for sampling.
-   *
-   * @note
-   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
-   *   [[Dataset]].
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def sample(fraction: Double, seed: Long): Dataset[T] = {
-    sample(withReplacement = false, fraction = fraction, seed = seed)
-  }
-
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
-   * random seed.
-   *
-   * @param fraction
-   *   Fraction of rows to generate, range [0.0, 1.0].
-   *
-   * @note
-   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
-   *   [[Dataset]].
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def sample(fraction: Double): Dataset[T] = {
-    sample(withReplacement = false, fraction = fraction)
-  }
-
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
-   *
-   * @param withReplacement
-   *   Sample with replacement or not.
-   * @param fraction
-   *   Fraction of rows to generate, range [0.0, 1.0].
-   * @param seed
-   *   Seed for sampling.
-   *
-   * @note
-   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
-   *   [[Dataset]].
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] = {
     sparkSession.newDataset(agnosticEncoder) { builder =>
       builder.getSampleBuilder
@@ -2092,38 +687,7 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
-   *
-   * @param withReplacement
-   *   Sample with replacement or not.
-   * @param fraction
-   *   Fraction of rows to generate, range [0.0, 1.0].
-   *
-   * @note
-   *   This is NOT guaranteed to provide exactly the fraction of the total count of the given
-   *   [[Dataset]].
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def sample(withReplacement: Boolean, fraction: Double): Dataset[T] = {
-    sample(withReplacement, fraction, SparkClassUtils.random.nextLong)
-  }
-
-  /**
-   * Randomly splits this Dataset with the provided weights.
-   *
-   * @param weights
-   *   weights for splits, will be normalized if they don't sum to 1.
-   * @param seed
-   *   Seed for sampling.
-   *
-   * For Java API, use [[randomSplitAsList]].
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]] = {
     require(
       weights.forall(_ >= 0),
@@ -2162,35 +726,20 @@ class Dataset[T] private[sql] (
       .toArray
   }
 
-  /**
-   * Returns a Java list that contains randomly split Dataset with the provided weights.
-   *
-   * @param weights
-   *   weights for splits, will be normalized if they don't sum to 1.
-   * @param seed
-   *   Seed for sampling.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def randomSplitAsList(weights: Array[Double], seed: Long): java.util.List[Dataset[T]] = {
-    val values = randomSplit(weights, seed)
-    java.util.Arrays.asList(values: _*)
-  }
+  /** @inheritdoc */
+  override def randomSplitAsList(weights: Array[Double], seed: Long): util.List[Dataset[T]] =
+    util.Arrays.asList(randomSplit(weights, seed): _*)
 
-  /**
-   * Randomly splits this Dataset with the provided weights.
-   *
-   * @param weights
-   *   weights for splits, will be normalized if they don't sum to 1.
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def randomSplit(weights: Array[Double]): Array[Dataset[T]] = {
-    randomSplit(weights, SparkClassUtils.random.nextLong)
-  }
+  /** @inheritdoc */
+  override def randomSplit(weights: Array[Double]): Array[Dataset[T]] =
+    randomSplit(weights, SparkClassUtils.random.nextLong())
 
-  private def withColumns(names: Seq[String], values: Seq[Column]): DataFrame = {
+  /** @inheritdoc */
+  protected def withColumns(names: Seq[String], values: Seq[Column]): DataFrame = {
+    require(
+      names.size == values.size,
+      s"The size of column names: ${names.size} isn't equal to " +
+        s"the size of columns: ${values.size}")
     val aliases = values.zip(names).map { case (value, name) =>
       value.name(name).expr.getAlias
     }
@@ -2201,109 +750,27 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Returns a new Dataset by adding a column or replacing the existing column that has the same
-   * name.
-   *
-   * `column`'s expression must only refer to attributes supplied by this Dataset. It is an error
-   * to add a column that refers to some other Dataset.
-   *
-   * @note
-   *   this method introduces a projection internally. Therefore, calling it multiple times, for
-   *   instance, via loops in order to add multiple columns can generate big plans which can cause
-   *   performance issues and even `StackOverflowException`. To avoid this, use `select` with the
-   *   multiple columns at once.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumn(colName: String, col: Column): DataFrame = withColumns(Seq(colName), Seq(col))
-
-  /**
-   * (Scala-specific) Returns a new Dataset by adding columns or replacing the existing columns
-   * that has the same names.
-   *
-   * `colsMap` is a map of column name and column, the column must only refer to attributes
-   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumns(colsMap: Map[String, Column]): DataFrame = {
-    val (colNames, newCols) = colsMap.toSeq.unzip
-    withColumns(colNames, newCols)
-  }
-
-  /**
-   * (Java-specific) Returns a new Dataset by adding columns or replacing the existing columns
-   * that has the same names.
-   *
-   * `colsMap` is a map of column name and column, the column must only refer to attribute
-   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumns(colsMap: java.util.Map[String, Column]): DataFrame = withColumns(
-    colsMap.asScala.toMap)
-
-  /**
-   * Returns a new Dataset with a column renamed. This is a no-op if schema doesn't contain
-   * existingName.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumnRenamed(existingName: String, newName: String): DataFrame = {
-    withColumnsRenamed(Collections.singletonMap(existingName, newName))
-  }
-
-  /**
-   * (Scala-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
-   * doesn't contain existingName.
-   *
-   * `colsMap` is a map of existing column name and new column name.
-   *
-   * @throws AnalysisException
-   *   if there are duplicate names in resulting projection
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @throws[AnalysisException]
-  def withColumnsRenamed(colsMap: Map[String, String]): DataFrame = {
-    withColumnsRenamed(colsMap.asJava)
-  }
-
-  /**
-   * (Java-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
-   * doesn't contain existingName.
-   *
-   * `colsMap` is a map of existing column name and new column name.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumnsRenamed(colsMap: java.util.Map[String, String]): DataFrame = {
+  override protected def withColumnsRenamed(
+      colNames: Seq[String],
+      newColNames: Seq[String]): DataFrame = {
+    require(
+      colNames.size == newColNames.size,
+      s"The size of existing column names: ${colNames.size} isn't equal to " +
+        s"the size of new column names: ${newColNames.size}")
     sparkSession.newDataFrame { builder =>
-      builder.getWithColumnsRenamedBuilder
+      val b = builder.getWithColumnsRenamedBuilder
         .setInput(plan.getRoot)
-        .addAllRenames(colsMap.asScala.toSeq.map { case (colName, newColName) =>
+      colNames.zip(newColNames).foreach { case (colName, newColName) =>
+        b.addRenames(
           proto.WithColumnsRenamed.Rename
             .newBuilder()
             .setColName(colName)
-            .setNewColName(newColName)
-            .build()
-        }.asJava)
+            .setNewColName(newColName))
+      }
     }
   }
 
-  /**
-   * Returns a new Dataset by updating an existing column with metadata.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def withMetadata(columnName: String, metadata: Metadata): DataFrame = {
     val newAlias = proto.Expression.Alias
       .newBuilder()
@@ -2317,88 +784,7 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Registers this Dataset as a temporary table using the given name. The lifetime of this
-   * temporary table is tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  @deprecated("Use createOrReplaceTempView(viewName) instead.", "3.4.0")
-  def registerTempTable(tableName: String): Unit = {
-    createOrReplaceTempView(tableName)
-  }
-
-  /**
-   * Creates a local temporary view using the given name. The lifetime of this temporary view is
-   * tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
-   * created it, i.e. it will be automatically dropped when the session terminates. It's not tied
-   * to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
-   *
-   * @throws AnalysisException
-   *   if the view name is invalid or already exists
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  @throws[AnalysisException]
-  def createTempView(viewName: String): Unit = {
-    buildAndExecuteTempView(viewName, replace = false, global = false)
-  }
-
-  /**
-   * Creates a local temporary view using the given name. The lifetime of this temporary view is
-   * tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def createOrReplaceTempView(viewName: String): Unit = {
-    buildAndExecuteTempView(viewName, replace = true, global = false)
-  }
-
-  /**
-   * Creates a global temporary view using the given name. The lifetime of this temporary view is
-   * tied to this Spark application.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
-   * application, i.e. it will be automatically dropped when the application terminates. It's tied
-   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
-   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @throws AnalysisException
-   *   if the view name is invalid or already exists
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  @throws[AnalysisException]
-  def createGlobalTempView(viewName: String): Unit = {
-    buildAndExecuteTempView(viewName, replace = false, global = true)
-  }
-
-  /**
-   * Creates or replaces a global temporary view using the given name. The lifetime of this
-   * temporary view is tied to this Spark application.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
-   * application, i.e. it will be automatically dropped when the application terminates. It's tied
-   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
-   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def createOrReplaceGlobalTempView(viewName: String): Unit = {
-    buildAndExecuteTempView(viewName, replace = true, global = true)
-  }
-
-  private def buildAndExecuteTempView(
-      viewName: String,
-      replace: Boolean,
-      global: Boolean): Unit = {
+  protected def createTempView(viewName: String, replace: Boolean, global: Boolean): Unit = {
     val command = sparkSession.newCommand { builder =>
       builder.getCreateDataframeViewBuilder
         .setInput(plan.getRoot)
@@ -2409,56 +795,11 @@ class Dataset[T] private[sql] (
     sparkSession.execute(command)
   }
 
-  /**
-   * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain column
-   * name.
-   *
-   * This method can only be used to drop top level columns. the colName string is treated
-   * literally without further interpretation.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def drop(colName: String): DataFrame = {
-    drop(Seq(colName): _*)
-  }
-
-  /**
-   * Returns a new Dataset with columns dropped. This is a no-op if schema doesn't contain column
-   * name(s).
-   *
-   * This method can only be used to drop top level columns. the colName string is treated
-   * literally without further interpretation.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def drop(colNames: String*): DataFrame = buildDropByNames(colNames)
 
-  /**
-   * Returns a new Dataset with column dropped.
-   *
-   * This method can only be used to drop top level column. This version of drop accepts a
-   * [[Column]] rather than a name. This is a no-op if the Dataset doesn't have a column with an
-   * equivalent expression.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def drop(col: Column): DataFrame = {
-    buildDrop(col :: Nil)
-  }
-
-  /**
-   * Returns a new Dataset with columns dropped.
-   *
-   * This method can only be used to drop top level columns. This is a no-op if the Dataset
-   * doesn't have a columns with an equivalent expression.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def drop(col: Column, cols: Column*): DataFrame = buildDrop(col +: cols)
 
@@ -2489,167 +830,32 @@ class Dataset[T] private[sql] (
       }
   }
 
-  /**
-   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
-   * for `distinct`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def dropDuplicates(): Dataset[T] = buildDropDuplicates(None, withinWaterMark = false)
 
-  /**
-   * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only the
-   * subset of columns.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def dropDuplicates(colNames: Seq[String]): Dataset[T] = {
     buildDropDuplicates(Option(colNames), withinWaterMark = false)
   }
 
-  /**
-   * Returns a new Dataset with duplicate rows removed, considering only the subset of columns.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def dropDuplicates(colNames: Array[String]): Dataset[T] =
-    dropDuplicates(colNames.toImmutableArraySeq)
-
-  /**
-   * Returns a new [[Dataset]] with duplicate rows removed, considering only the subset of
-   * columns.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def dropDuplicates(col1: String, cols: String*): Dataset[T] = {
-    dropDuplicates(col1 +: cols)
-  }
-
+  /** @inheritdoc */
   def dropDuplicatesWithinWatermark(): Dataset[T] =
     buildDropDuplicates(None, withinWaterMark = true)
 
+  /** @inheritdoc */
   def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T] = {
     buildDropDuplicates(Option(colNames), withinWaterMark = true)
   }
 
-  def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T] = {
-    dropDuplicatesWithinWatermark(colNames.toImmutableArraySeq)
-  }
-
-  @scala.annotation.varargs
-  def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T] = {
-    dropDuplicatesWithinWatermark(col1 +: cols)
-  }
-
-  /**
-   * Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
-   * and max. If no columns are given, this function computes statistics for all numerical or
-   * string columns.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting Dataset. If you want to
-   * programmatically compute summary statistics, use the `agg` function instead.
-   *
-   * {{{
-   *   ds.describe("age", "height").show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // mean    53.3  178.05
-   *   // stddev  11.6  15.7
-   *   // min     18.0  163.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * Use [[summary]] for expanded statistics and control over which statistics to compute.
-   *
-   * @param cols
-   *   Columns to compute statistics on.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def describe(cols: String*): DataFrame = sparkSession.newDataFrame { builder =>
+  override def describe(cols: String*): DataFrame = sparkSession.newDataFrame { builder =>
     builder.getDescribeBuilder
       .setInput(plan.getRoot)
       .addAllCols(cols.asJava)
   }
 
-  /**
-   * Computes specified statistics for numeric and string columns. Available statistics are: <ul>
-   * <li>count</li> <li>mean</li> <li>stddev</li> <li>min</li> <li>max</li> <li>arbitrary
-   * approximate percentiles specified as a percentage (e.g. 75%)</li> <li>count_distinct</li>
-   * <li>approx_count_distinct</li> </ul>
-   *
-   * If no statistics are given, this function computes count, mean, stddev, min, approximate
-   * quartiles (percentiles at 25%, 50%, and 75%), and max.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting Dataset. If you want to
-   * programmatically compute summary statistics, use the `agg` function instead.
-   *
-   * {{{
-   *   ds.summary().show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // mean    53.3  178.05
-   *   // stddev  11.6  15.7
-   *   // min     18.0  163.0
-   *   // 25%     24.0  176.0
-   *   // 50%     24.0  176.0
-   *   // 75%     32.0  180.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * {{{
-   *   ds.summary("count", "min", "25%", "75%", "max").show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // min     18.0  163.0
-   *   // 25%     24.0  176.0
-   *   // 75%     32.0  180.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * To do a summary for specific columns first select them:
-   *
-   * {{{
-   *   ds.select("age", "height").summary().show()
-   * }}}
-   *
-   * Specify statistics to output custom summaries:
-   *
-   * {{{
-   *   ds.summary("count", "count_distinct").show()
-   * }}}
-   *
-   * The distinct count isn't included by default.
-   *
-   * You can also run approximate distinct counts which are faster:
-   *
-   * {{{
-   *   ds.summary("count", "approx_count_distinct").show()
-   * }}}
-   *
-   * See also [[describe]] for basic statistics.
-   *
-   * @param statistics
-   *   Statistics from above list to be computed.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def summary(statistics: String*): DataFrame = sparkSession.newDataFrame { builder =>
     builder.getSummaryBuilder
@@ -2657,56 +863,12 @@ class Dataset[T] private[sql] (
       .addAllStatistics(statistics.asJava)
   }
 
-  /**
-   * Returns the first `n` rows.
-   *
-   * @note
-   *   this method should only be used if the resulting array is expected to be small, as all the
-   *   data is loaded into the driver's memory.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def head(n: Int): Array[T] = limit(n).collect()
 
-  /**
-   * Returns the first row.
-   * @group action
-   * @since 3.4.0
-   */
-  def head(): T = head(1).head
-
-  /**
-   * Returns the first row. Alias for head().
-   * @group action
-   * @since 3.4.0
-   */
-  def first(): T = head()
-
-  /**
-   * Concise syntax for chaining custom transformations.
-   * {{{
-   *   def featurize(ds: Dataset[T]): Dataset[U] = ...
-   *
-   *   ds
-   *     .transform(featurize)
-   *     .transform(...)
-   * }}}
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this)
-
-  /**
-   * (Scala-specific) Returns a new Dataset that only contains elements where `func` returns
-   * `true`.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def filter(func: T => Boolean): Dataset[T] = {
-    val udf = ScalaUserDefinedFunction(
+    val udf = SparkUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: Nil,
       outputEncoder = PrimitiveBooleanEncoder)
@@ -2717,49 +879,25 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * (Java-specific) Returns a new Dataset that only contains elements where `func` returns
-   * `true`.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def filter(f: FilterFunction[T]): Dataset[T] = {
-    filter(UdfUtils.filterFuncToScalaFunc(f))
+    filter(ToScalaUDF(f))
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset that contains the result of applying `func` to each
-   * element.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def map[U: Encoder](f: T => U): Dataset[U] = {
-    mapPartitions(UdfUtils.mapFuncToMapPartitionsAdaptor(f))
+    mapPartitions(UDFAdaptors.mapToMapPartitions(f))
   }
 
-  /**
-   * (Java-specific) Returns a new Dataset that contains the result of applying `func` to each
-   * element.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def map[U](f: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
-    map(UdfUtils.mapFunctionToScalaFunc(f))(encoder)
+    mapPartitions(UDFAdaptors.mapToMapPartitions(f))(encoder)
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset that contains the result of applying `func` to each
-   * partition.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def mapPartitions[U: Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = {
-    val outputEncoder = encoderFor[U]
-    val udf = ScalaUserDefinedFunction(
+    val outputEncoder = agnosticEncoderFor[U]
+    val udf = SparkUserDefinedFunction(
       function = func,
       inputEncoders = agnosticEncoder :: Nil,
       outputEncoder = outputEncoder)
@@ -2770,166 +908,34 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * (Java-specific) Returns a new Dataset that contains the result of applying `f` to each
-   * partition.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
-    mapPartitions(UdfUtils.mapPartitionsFuncToScalaFunc(f))(encoder)
-  }
-
-  /**
-   * (Scala-specific) Returns a new Dataset by first applying a function to all elements of this
-   * Dataset, and then flattening the results.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def flatMap[U: Encoder](func: T => IterableOnce[U]): Dataset[U] =
-    mapPartitions(UdfUtils.flatMapFuncToMapPartitionsAdaptor(func))
-
-  /**
-   * (Java-specific) Returns a new Dataset by first applying a function to all elements of this
-   * Dataset, and then flattening the results.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
-    flatMap(UdfUtils.flatMapFuncToScalaFunc(f))(encoder)
-  }
-
-  /**
-   * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more rows
-   * by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of the
-   * input row are implicitly joined with each row that is output by the function.
-   *
-   * Given that this is deprecated, as an alternative, you can explode columns either using
-   * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count
-   * the number of books that contain a given word:
-   *
-   * {{{
-   *   case class Book(title: String, words: String)
-   *   val ds: Dataset[Book]
-   *
-   *   val allWords = ds.select($"title", explode(split($"words", " ")).as("word"))
-   *
-   *   val bookCountPerWord = allWords.groupBy("word").agg(count_distinct("title"))
-   * }}}
-   *
-   * Using `flatMap()` this can similarly be exploded as:
-   *
-   * {{{
-   *   ds.flatMap(_.words.split(" "))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @deprecated("use flatMap() or select() with functions.explode() instead", "3.5.0")
   def explode[A <: Product: TypeTag](input: Column*)(f: Row => IterableOnce[A]): DataFrame = {
-    val generator = ScalaUserDefinedFunction(
-      UdfUtils.iterableOnceToSeq(f),
+    val generator = SparkUserDefinedFunction(
+      UDFAdaptors.iterableOnceToSeq(f),
       UnboundRowEncoder :: Nil,
       ScalaReflection.encoderFor[Seq[A]])
     select(col("*"), functions.inline(generator(struct(input: _*))))
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero or
-   * more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
-   * columns of the input row are implicitly joined with each value that is output by the
-   * function.
-   *
-   * Given that this is deprecated, as an alternative, you can explode columns either using
-   * `functions.explode()`:
-   *
-   * {{{
-   *   ds.select(explode(split($"words", " ")).as("word"))
-   * }}}
-   *
-   * or `flatMap()`:
-   *
-   * {{{
-   *   ds.flatMap(_.words.split(" "))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @deprecated("use flatMap() or select() with functions.explode() instead", "3.5.0")
   def explode[A, B: TypeTag](inputColumn: String, outputColumn: String)(
       f: A => IterableOnce[B]): DataFrame = {
-    val generator = ScalaUserDefinedFunction(
-      UdfUtils.iterableOnceToSeq(f),
+    val generator = SparkUserDefinedFunction(
+      UDFAdaptors.iterableOnceToSeq(f),
       Nil,
       ScalaReflection.encoderFor[Seq[B]])
     select(col("*"), functions.explode(generator(col(inputColumn))).as((outputColumn)))
   }
 
-  /**
-   * Applies a function `f` to all rows.
-   *
-   * @group action
-   * @since 3.5.0
-   */
-  def foreach(f: T => Unit): Unit = {
-    foreachPartition(UdfUtils.foreachFuncToForeachPartitionsAdaptor(f))
-  }
-
-  /**
-   * (Java-specific) Runs `func` on each element of this Dataset.
-   *
-   * @group action
-   * @since 3.5.0
-   */
-  def foreach(func: ForeachFunction[T]): Unit = foreach(UdfUtils.foreachFuncToScalaFunc(func))
-
-  /**
-   * Applies a function `f` to each partition of this Dataset.
-   *
-   * @group action
-   * @since 3.5.0
-   */
-  def foreachPartition(f: Iterator[T] => Unit): Unit = {
-    // Delegate to mapPartition with empty result.
-    mapPartitions(UdfUtils.foreachPartitionFuncToMapPartitionsAdaptor(f))(RowEncoder(Seq.empty))
-      .collect()
-  }
-
-  /**
-   * (Java-specific) Runs `func` on each partition of this Dataset.
-   *
-   * @group action
-   * @since 3.5.0
-   */
-  def foreachPartition(func: ForeachPartitionFunction[T]): Unit = {
-    foreachPartition(UdfUtils.foreachPartitionFuncToScalaFunc(func))
-  }
-
-  /**
-   * Returns the first `n` rows in the Dataset.
-   *
-   * Running take requires moving data into the application's driver process, and doing so with a
-   * very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 3.4.0
-   */
-  def take(n: Int): Array[T] = head(n)
-
-  /**
-   * Returns the last `n` rows in the Dataset.
-   *
-   * Running tail requires moving data into the application's driver process, and doing so with a
-   * very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
+  def foreachPartition(f: Iterator[T] => Unit): Unit = {
+    // Delegate to mapPartition with empty result.
+    mapPartitions(UDFAdaptors.foreachPartitionToMapPartitions(f))(NullEncoder).collect()
+  }
+
+  /** @inheritdoc */
   def tail(n: Int): Array[T] = {
     val lastN = sparkSession.newDataset(agnosticEncoder) { builder =>
       builder.getTailBuilder
@@ -2939,64 +945,22 @@ class Dataset[T] private[sql] (
     lastN.collect()
   }
 
-  /**
-   * Returns the first `n` rows in the Dataset as a list.
-   *
-   * Running take requires moving data into the application's driver process, and doing so with a
-   * very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 3.4.0
-   */
-  def takeAsList(n: Int): java.util.List[T] = java.util.Arrays.asList(take(n): _*)
-
-  /**
-   * Returns an array that contains all rows in this Dataset.
-   *
-   * Running collect requires moving all the data into the application's driver process, and doing
-   * so on a very large dataset can crash the driver process with OutOfMemoryError.
-   *
-   * For Java API, use [[collectAsList]].
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def collect(): Array[T] = withResult { result =>
     result.toArray
   }
 
-  /**
-   * Returns a Java list that contains all rows in this Dataset.
-   *
-   * Running collect requires moving all the data into the application's driver process, and doing
-   * so on a very large dataset can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def collectAsList(): java.util.List[T] = {
     java.util.Arrays.asList(collect(): _*)
   }
 
-  /**
-   * Returns an iterator that contains all rows in this Dataset.
-   *
-   * The returned iterator implements [[AutoCloseable]]. For resource management it is better to
-   * close it once you are done. If you don't close it, it and the underlying data will be cleaned
-   * up once the iterator is garbage collected.
-   *
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def toLocalIterator(): java.util.Iterator[T] = {
     collectResult().destructiveIterator.asJava
   }
 
-  /**
-   * Returns the number of rows in the Dataset.
-   * @group action
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def count(): Long = {
     groupBy().count().as(PrimitiveLongEncoder).collect().head
   }
@@ -3020,17 +984,12 @@ class Dataset[T] private[sql] (
       numPartitions.foreach(repartitionBuilder.setNumPartitions)
   }
 
-  /**
-   * Returns a new Dataset that has exactly `numPartitions` partitions.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def repartition(numPartitions: Int): Dataset[T] = {
     buildRepartition(numPartitions, shuffle = true)
   }
 
-  private def repartitionByExpression(
+  protected def repartitionByExpression(
       numPartitions: Option[Int],
       partitionExprs: Seq[Column]): Dataset[T] = {
     // The underlying `LogicalPlan` operator special-cases all-`SortOrder` arguments.
@@ -3045,36 +1004,7 @@ class Dataset[T] private[sql] (
     buildRepartitionByExpression(numPartitions, partitionExprs)
   }
 
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
-   * The resulting Dataset is hash partitioned.
-   *
-   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
-    repartitionByExpression(Some(numPartitions), partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions, using
-   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is hash
-   * partitioned.
-   *
-   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def repartition(partitionExprs: Column*): Dataset[T] = {
-    repartitionByExpression(None, partitionExprs)
-  }
-
-  private def repartitionByRange(
+  protected def repartitionByRange(
       numPartitions: Option[Int],
       partitionExprs: Seq[Column]): Dataset[T] = {
     require(partitionExprs.nonEmpty, "At least one partition-by expression must be specified.")
@@ -3085,93 +1015,12 @@ class Dataset[T] private[sql] (
     buildRepartitionByExpression(numPartitions, sortExprs)
   }
 
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
-   * The resulting Dataset is range partitioned.
-   *
-   * At least one partition-by expression must be specified. When no explicit sort order is
-   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
-   * partition of the resulting Dataset.
-   *
-   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
-   * the output may not be consistent, since sampling can return different values. The sample size
-   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
-    repartitionByRange(Some(numPartitions), partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions, using
-   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is range
-   * partitioned.
-   *
-   * At least one partition-by expression must be specified. When no explicit sort order is
-   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
-   * partition of the resulting Dataset.
-   *
-   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
-   * the output may not be consistent, since sampling can return different values. The sample size
-   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def repartitionByRange(partitionExprs: Column*): Dataset[T] = {
-    repartitionByRange(None, partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset that has exactly `numPartitions` partitions, when the fewer partitions
-   * are requested. If a larger number of partitions is requested, it will stay at the current
-   * number of partitions. Similar to coalesce defined on an `RDD`, this operation results in a
-   * narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a
-   * shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.
-   *
-   * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1, this may result in
-   * your computation taking place on fewer nodes than you like (e.g. one node in the case of
-   * numPartitions = 1). To avoid this, you can call repartition. This will add a shuffle step,
-   * but means the current upstream partitions will be executed in parallel (per whatever the
-   * current partitioning is).
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def coalesce(numPartitions: Int): Dataset[T] = {
     buildRepartition(numPartitions, shuffle = false)
   }
 
-  /**
-   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
-   * for `dropDuplicates`.
-   *
-   * Note that for a streaming [[Dataset]], this method returns distinct rows only once regardless
-   * of the output mode, which the behavior may not be same with `DISTINCT` in SQL against
-   * streaming [[Dataset]].
-   *
-   * @note
-   *   Equality checking is performed directly on the encoded representation of the data and thus
-   *   is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
-  def distinct(): Dataset[T] = dropDuplicates()
-
-  /**
-   * Returns a best-effort snapshot of the files that compose this Dataset. This method simply
-   * asks each constituent BaseRelation for its respective files and takes the union of all
-   * results. Depending on the source relations, this may not find all input files. Duplicates are
-   * removed.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def inputFiles: Array[String] =
     sparkSession
       .analyze(plan, proto.AnalyzePlanRequest.AnalyzeCase.INPUT_FILES)
@@ -3180,61 +1029,17 @@ class Dataset[T] private[sql] (
       .asScala
       .toArray
 
-  /**
-   * Interface for saving the content of the non-streaming Dataset out into external storage.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def write: DataFrameWriter[T] = {
-    new DataFrameWriter[T](this)
+    new DataFrameWriterImpl[T](this)
   }
 
-  /**
-   * Create a write configuration builder for v2 sources.
-   *
-   * This builder is used to configure and execute write operations. For example, to append to an
-   * existing table, run:
-   *
-   * {{{
-   *   df.writeTo("catalog.db.table").append()
-   * }}}
-   *
-   * This can also be used to create or replace existing tables:
-   *
-   * {{{
-   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
-   * }}}
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def writeTo(table: String): DataFrameWriterV2[T] = {
-    new DataFrameWriterV2[T](table, this)
+    new DataFrameWriterV2Impl[T](table, this)
   }
 
-  /**
-   * Merges a set of updates, insertions, and deletions based on a source table into a target
-   * table.
-   *
-   * Scala Examples:
-   * {{{
-   *   spark.table("source")
-   *     .mergeInto("target", $"source.id" === $"target.id")
-   *     .whenMatched($"salary" === 100)
-   *     .delete()
-   *     .whenNotMatched()
-   *     .insertAll()
-   *     .whenNotMatchedBySource($"salary" === 100)
-   *     .update(Map(
-   *       "salary" -> lit(200)
-   *     ))
-   *     .merge()
-   * }}}
-   *
-   * @group basic
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   def mergeInto(table: String, condition: Column): MergeIntoWriter[T] = {
     if (isStreaming) {
       throw new AnalysisException(
@@ -3242,25 +1047,18 @@ class Dataset[T] private[sql] (
         messageParameters = Map("methodName" -> toSQLId("mergeInto")))
     }
 
-    new MergeIntoWriter[T](table, this, condition)
+    new MergeIntoWriterImpl[T](table, this, condition)
   }
 
-  /**
-   * Interface for saving the content of the streaming Dataset out into external storage.
-   *
-   * @group basic
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def writeStream: DataStreamWriter[T] = {
     new DataStreamWriter[T](this)
   }
 
-  /**
-   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
+  override def cache(): this.type = persist()
+
+  /** @inheritdoc */
   def persist(): this.type = {
     sparkSession.analyze { builder =>
       builder.getPersistBuilder.setRelation(plan.getRoot)
@@ -3268,15 +1066,7 @@ class Dataset[T] private[sql] (
     this
   }
 
-  /**
-   * Persist this Dataset with the given storage level.
-   *
-   * @param newLevel
-   *   One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`, `MEMORY_AND_DISK_SER`,
-   *   `DISK_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK_2`, etc.
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def persist(newLevel: StorageLevel): this.type = {
     sparkSession.analyze { builder =>
       builder.getPersistBuilder
@@ -3286,15 +1076,7 @@ class Dataset[T] private[sql] (
     this
   }
 
-  /**
-   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
-   * will not un-persist any cached data that is built upon this Dataset.
-   *
-   * @param blocking
-   *   Whether to block until all blocks are deleted.
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unpersist(blocking: Boolean): this.type = {
     sparkSession.analyze { builder =>
       builder.getUnpersistBuilder
@@ -3304,29 +1086,10 @@ class Dataset[T] private[sql] (
     this
   }
 
-  /**
-   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
-   * will not un-persist any cached data that is built upon this Dataset.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def unpersist(): this.type = unpersist(blocking = false)
-
-  /**
-   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
-   *
-   * @group basic
-   * @since 3.4.0
-   */
-  def cache(): this.type = persist()
+  /** @inheritdoc */
+  override def unpersist(): this.type = unpersist(blocking = false)
 
-  /**
-   * Get the Dataset's current storage level, or StorageLevel.NONE if not persisted.
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def storageLevel: StorageLevel = {
     StorageLevelProtoConverter.toStorageLevel(
       sparkSession
@@ -3337,30 +1100,7 @@ class Dataset[T] private[sql] (
         .getStorageLevel)
   }
 
-  /**
-   * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
-   * before which we assume no more late data is going to arrive.
-   *
-   * Spark will use this watermark for several purposes: <ul> <li>To know when a given time window
-   * aggregation can be finalized and thus can be emitted when using output modes that do not
-   * allow updates.</li> <li>To minimize the amount of state that we need to keep for on-going
-   * aggregations, `mapGroupsWithState` and `dropDuplicates` operators.</li> </ul> The current
-   * watermark is computed by looking at the `MAX(eventTime)` seen across all of the partitions in
-   * the query minus a user specified `delayThreshold`. Due to the cost of coordinating this value
-   * across partitions, the actual watermark used is only guaranteed to be at least
-   * `delayThreshold` behind the actual event time. In some cases we may still process records
-   * that arrive more than `delayThreshold` late.
-   *
-   * @param eventTime
-   *   the name of the column that contains the event time of the row.
-   * @param delayThreshold
-   *   the minimum delay to wait to data to arrive late, relative to the latest record that has
-   *   been processed in the form of an interval (e.g. "1 minute" or "5 hours"). NOTE: This should
-   *   not be negative.
-   *
-   * @group streaming
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = {
     sparkSession.newDataset(agnosticEncoder) { builder =>
       builder.getWithWatermarkBuilder
@@ -3370,32 +1110,7 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
-   * that returns the same result as the input, with the following guarantees: <ul> <li>It will
-   * compute the defined aggregates (metrics) on all the data that is flowing through the Dataset
-   * at that point.</li> <li>It will report the value of the defined aggregate columns as soon as
-   * we reach a completion point. A completion point is currently defined as the end of a
-   * query.</li> </ul> Please note that continuous execution is currently not supported.
-   *
-   * The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
-   * more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
-   * contain references to the input Dataset's columns must always be wrapped in an aggregate
-   * function.
-   *
-   * A user can retrieve the metrics by calling
-   * `org.apache.spark.sql.Dataset.collectResult().getObservedMetrics`.
-   *
-   * {{{
-   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
-   *   val observed_ds = ds.observe("my_metrics", count(lit(1)).as("rows"), max($"id").as("maxid"))
-   *   observed_ds.write.parquet("ds.parquet")
-   *   val metrics = observed_ds.collectResult().getObservedMetrics
-   * }}}
-   *
-   * @group typedrel
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = {
     sparkSession.newDataset(agnosticEncoder) { builder =>
@@ -3406,28 +1121,7 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Observe (named) metrics through an `org.apache.spark.sql.Observation` instance. This is
-   * equivalent to calling `observe(String, Column, Column*)` but does not require to collect all
-   * results before returning the metrics - the metrics are filled during iterating the results,
-   * as soon as they are available. This method does not support streaming datasets.
-   *
-   * A user can retrieve the metrics by accessing `org.apache.spark.sql.Observation.get`.
-   *
-   * {{{
-   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
-   *   val observation = Observation("my_metrics")
-   *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
-   *   observed_ds.write.parquet("ds.parquet")
-   *   val metrics = observation.get
-   * }}}
-   *
-   * @throws IllegalArgumentException
-   *   If this is a streaming Dataset (this.isStreaming == true)
-   *
-   * @group typedrel
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = {
     val df = observe(observation.name, expr, exprs: _*)
@@ -3435,88 +1129,21 @@ class Dataset[T] private[sql] (
     df
   }
 
-  /**
-   * Eagerly checkpoint a Dataset and return the new Dataset. Checkpointing can be used to
-   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
-   * where the plan may grow exponentially. It will be saved to files inside the checkpoint
-   * directory set with `SparkContext#setCheckpointDir`.
-   *
-   * @group basic
-   * @since 4.0.0
-   */
-  def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true)
-
-  /**
-   * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
-   * logical plan of this Dataset, which is especially useful in iterative algorithms where the
-   * plan may grow exponentially. It will be saved to files inside the checkpoint directory set
-   * with `SparkContext#setCheckpointDir`.
-   *
-   * @param eager
-   *   Whether to checkpoint this dataframe immediately
-   *
-   * @note
-   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
-   *   first action may be different from the data that was used during the job due to
-   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
-   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
-   *   only deterministic after the first execution, after the checkpoint was finalized.
-   *
-   * @group basic
-   * @since 4.0.0
-   */
-  def checkpoint(eager: Boolean): Dataset[T] =
-    checkpoint(eager = eager, reliableCheckpoint = true)
-
-  /**
-   * Eagerly locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used
-   * to truncate the logical plan of this Dataset, which is especially useful in iterative
-   * algorithms where the plan may grow exponentially. Local checkpoints are written to executor
-   * storage and despite potentially faster they are unreliable and may compromise job completion.
-   *
-   * @group basic
-   * @since 4.0.0
-   */
-  def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false)
-
-  /**
-   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to
-   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
-   * where the plan may grow exponentially. Local checkpoints are written to executor storage and
-   * despite potentially faster they are unreliable and may compromise job completion.
-   *
-   * @param eager
-   *   Whether to checkpoint this dataframe immediately
-   *
-   * @note
-   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
-   *   first action may be different from the data that was used during the job due to
-   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
-   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
-   *   only deterministic after the first execution, after the checkpoint was finalized.
-   *
-   * @group basic
-   * @since 4.0.0
-   */
-  def localCheckpoint(eager: Boolean): Dataset[T] =
-    checkpoint(eager = eager, reliableCheckpoint = false)
-
-  /**
-   * Returns a checkpointed version of this Dataset.
-   *
-   * @param eager
-   *   Whether to checkpoint this dataframe immediately
-   * @param reliableCheckpoint
-   *   Whether to create a reliable checkpoint saved to files inside the checkpoint directory. If
-   *   false creates a local checkpoint using the caching subsystem
-   */
-  private def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
+  /** @inheritdoc */
+  protected def checkpoint(
+      eager: Boolean,
+      reliableCheckpoint: Boolean,
+      storageLevel: Option[StorageLevel]): Dataset[T] = {
     sparkSession.newDataset(agnosticEncoder) { builder =>
       val command = sparkSession.newCommand { builder =>
-        builder.getCheckpointCommandBuilder
+        val checkpointBuilder = builder.getCheckpointCommandBuilder
           .setLocal(!reliableCheckpoint)
           .setEager(eager)
           .setRelation(this.plan.getRoot)
+        storageLevel.foreach { storageLevel =>
+          checkpointBuilder.setStorageLevel(
+            StorageLevelProtoConverter.toConnectProtoType(storageLevel))
+        }
       }
       val responseIter = sparkSession.execute(command)
       try {
@@ -3536,38 +1163,19 @@ class Dataset[T] private[sql] (
     }
   }
 
-  /**
-   * Returns `true` when the logical query plans inside both [[Dataset]]s are equal and therefore
-   * return same results.
-   *
-   * @note
-   *   The equality comparison here is simplified by tolerating the cosmetic differences such as
-   *   attribute names.
-   * @note
-   *   This API can compare both [[Dataset]]s but can still return `false` on the [[Dataset]] that
-   *   return the same results, for instance, from different plans. Such false negative semantic
-   *   can be useful when caching as an example. This comparison may not be fast because it will
-   *   execute a RPC call.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def sameSemantics(other: Dataset[T]): Boolean = {
     sparkSession.sameSemantics(this.plan, other.plan)
   }
 
-  /**
-   * Returns a `hashCode` of the logical query plan against this [[Dataset]].
-   *
-   * @note
-   *   Unlike the standard `hashCode`, the hash is calculated against the query plan simplified by
-   *   tolerating the cosmetic differences such as attribute names.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def semanticHash(): Int = {
     sparkSession.semanticHash(this.plan)
   }
 
+  /** @inheritdoc */
   def toJSON: Dataset[String] = {
     select(to_json(struct(col("*")))).as(StringEncoder)
   }
@@ -3590,5 +1198,306 @@ class Dataset[T] private[sql] (
    * We cannot deserialize a connect [[Dataset]] because of a class clash on the server side. We
    * null out the instance for now.
    */
+  @scala.annotation.unused("this is used by java serialization")
   private def writeReplace(): Any = null
+
+  ////////////////////////////////////////////////////////////////////////////
+  // Return type overrides to make sure we return the implementation instead
+  // of the interface. This is done for a couple of reasons:
+  // - Retain the old signatures for binary compatibility;
+  // - Java compatibility . The java compiler uses the byte code signatures,
+  //   and those would point to api.Dataset being returned instead of Dataset.
+  //   This causes issues when the java code tries to materialize results, or
+  //   tries to use functionality that is implementation specfic.
+  // - Scala method resolution runs into problems when the ambiguous methods are
+  //   scattered across the interface and implementation. `drop` and `select`
+  //   suffered from this.
+  ////////////////////////////////////////////////////////////////////////////
+
+  /** @inheritdoc */
+  override def drop(colName: String): DataFrame = super.drop(colName)
+
+  /** @inheritdoc */
+  override def drop(col: Column): DataFrame = super.drop(col)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumn: String): DataFrame =
+    super.join(right, usingColumn)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumns: Array[String]): DataFrame =
+    super.join(right, usingColumns)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame =
+    super.join(right, usingColumns)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame =
+    super.join(right, usingColumn, joinType)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame =
+    super.join(right, usingColumns, joinType)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], joinExprs: Column): DataFrame =
+    super.join(right, joinExprs)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def select(col: String, cols: String*): DataFrame = super.select(col, cols: _*)
+
+  /** @inheritdoc */
+  override def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
+    super.select(c1, c2)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] =
+    super.select(c1, c2, c3)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3, U4](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] =
+    super.select(c1, c2, c3, c4)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3, U4, U5](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4],
+      c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] =
+    super.select(c1, c2, c3, c4, c5)
+
+  override def melt(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    super.melt(ids, values, variableColumnName, valueColumnName)
+
+  /** @inheritdoc */
+  override def melt(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    super.melt(ids, variableColumnName, valueColumnName)
+
+  /** @inheritdoc */
+  override def withColumn(colName: String, col: Column): DataFrame =
+    super.withColumn(colName, col)
+
+  /** @inheritdoc */
+  override def withColumns(colsMap: Map[String, Column]): DataFrame =
+    super.withColumns(colsMap)
+
+  /** @inheritdoc */
+  override def withColumns(colsMap: util.Map[String, Column]): DataFrame =
+    super.withColumns(colsMap)
+
+  /** @inheritdoc */
+  override def withColumnRenamed(existingName: String, newName: String): DataFrame =
+    super.withColumnRenamed(existingName, newName)
+
+  /** @inheritdoc */
+  override def withColumnsRenamed(colsMap: Map[String, String]): DataFrame =
+    super.withColumnsRenamed(colsMap)
+
+  /** @inheritdoc */
+  override def withColumnsRenamed(colsMap: util.Map[String, String]): DataFrame =
+    super.withColumnsRenamed(colsMap)
+
+  /** @inheritdoc */
+  override def checkpoint(): Dataset[T] = super.checkpoint()
+
+  /** @inheritdoc */
+  override def checkpoint(eager: Boolean): Dataset[T] = super.checkpoint(eager)
+
+  /** @inheritdoc */
+  override def localCheckpoint(): Dataset[T] = super.localCheckpoint()
+
+  /** @inheritdoc */
+  override def localCheckpoint(eager: Boolean): Dataset[T] = super.localCheckpoint(eager)
+
+  /** @inheritdoc */
+  override def localCheckpoint(eager: Boolean, storageLevel: StorageLevel): Dataset[T] =
+    super.localCheckpoint(eager, storageLevel)
+
+  /** @inheritdoc */
+  override def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] =
+    super.joinWith(other, condition)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] =
+    super.sortWithinPartitions(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sortWithinPartitions(sortExprs: Column*): Dataset[T] =
+    super.sortWithinPartitions(sortExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sort(sortCol: String, sortCols: String*): Dataset[T] =
+    super.sort(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sort(sortExprs: Column*): Dataset[T] = super.sort(sortExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def orderBy(sortCol: String, sortCols: String*): Dataset[T] =
+    super.orderBy(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def orderBy(sortExprs: Column*): Dataset[T] = super.orderBy(sortExprs: _*)
+
+  /** @inheritdoc */
+  override def as(alias: Symbol): Dataset[T] = super.as(alias)
+
+  /** @inheritdoc */
+  override def alias(alias: String): Dataset[T] = super.alias(alias)
+
+  /** @inheritdoc */
+  override def alias(alias: Symbol): Dataset[T] = super.alias(alias)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def selectExpr(exprs: String*): DataFrame = super.selectExpr(exprs: _*)
+
+  /** @inheritdoc */
+  override def filter(conditionExpr: String): Dataset[T] = super.filter(conditionExpr)
+
+  /** @inheritdoc */
+  override def where(condition: Column): Dataset[T] = super.where(condition)
+
+  /** @inheritdoc */
+  override def where(conditionExpr: String): Dataset[T] = super.where(conditionExpr)
+
+  /** @inheritdoc */
+  override def unionAll(other: Dataset[T]): Dataset[T] = super.unionAll(other)
+
+  /** @inheritdoc */
+  override def unionByName(other: Dataset[T]): Dataset[T] = super.unionByName(other)
+
+  /** @inheritdoc */
+  override def sample(fraction: Double, seed: Long): Dataset[T] = super.sample(fraction, seed)
+
+  /** @inheritdoc */
+  override def sample(fraction: Double): Dataset[T] = super.sample(fraction)
+
+  /** @inheritdoc */
+  override def sample(withReplacement: Boolean, fraction: Double): Dataset[T] =
+    super.sample(withReplacement, fraction)
+
+  /** @inheritdoc */
+  override def dropDuplicates(colNames: Array[String]): Dataset[T] =
+    super.dropDuplicates(colNames)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def dropDuplicates(col1: String, cols: String*): Dataset[T] =
+    super.dropDuplicates(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T] =
+    super.dropDuplicatesWithinWatermark(colNames)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T] =
+    super.dropDuplicatesWithinWatermark(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] =
+    super.mapPartitions(f, encoder)
+
+  /** @inheritdoc */
+  override def flatMap[U: Encoder](func: T => IterableOnce[U]): Dataset[U] =
+    super.flatMap(func)
+
+  /** @inheritdoc */
+  override def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] =
+    super.flatMap(f, encoder)
+
+  /** @inheritdoc */
+  override def foreachPartition(func: ForeachPartitionFunction[T]): Unit =
+    super.foreachPartition(func)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] =
+    super.repartition(numPartitions, partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartition(partitionExprs: Column*): Dataset[T] =
+    super.repartition(partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] =
+    super.repartitionByRange(numPartitions, partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartitionByRange(partitionExprs: Column*): Dataset[T] =
+    super.repartitionByRange(partitionExprs: _*)
+
+  /** @inheritdoc */
+  override def distinct(): Dataset[T] = super.distinct()
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def groupBy(col1: String, cols: String*): RelationalGroupedDataset =
+    super.groupBy(col1, cols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def rollup(col1: String, cols: String*): RelationalGroupedDataset =
+    super.rollup(col1, cols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def cube(col1: String, cols: String*): RelationalGroupedDataset =
+    super.cube(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame =
+    super.agg(aggExpr, aggExprs: _*)
+
+  /** @inheritdoc */
+  override def agg(exprs: Map[String, String]): DataFrame = super.agg(exprs)
+
+  /** @inheritdoc */
+  override def agg(exprs: java.util.Map[String, String]): DataFrame = super.agg(exprs)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def agg(expr: Column, exprs: Column*): DataFrame = super.agg(expr, exprs: _*)
+
+  /** @inheritdoc */
+  override def groupByKey[K](
+      func: MapFunction[T, K],
+      encoder: Encoder[K]): KeyValueGroupedDataset[K, T] =
+    super.groupByKey(func, encoder).asInstanceOf[KeyValueGroupedDataset[K, T]]
+
+  /** @inheritdoc */
+  override def rdd: RDD[T] = throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def toJavaRDD: JavaRDD[T] = throw ConnectClientUnsupportedErrors.rdd()
+
+  override def queryExecution: QueryExecution =
+    throw ConnectClientUnsupportedErrors.queryExecution()
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index d2875dfe48c88..63b5f27c4745e 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -19,16 +19,19 @@ package org.apache.spark.sql
 
 import java.util.Arrays
 
+import scala.annotation.unused
 import scala.jdk.CollectionConverters._
-import scala.language.existentials
 
 import org.apache.spark.api.java.function._
 import org.apache.spark.connect.proto
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.ProductEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, ProductEncoder}
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.common.UdfUtils
-import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
+import org.apache.spark.sql.expressions.SparkUserDefinedFunction
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.ColumnNodeToProtoConverter.toExpr
+import org.apache.spark.sql.internal.UDFAdaptors
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode, StatefulProcessor, StatefulProcessorWithInitialState, TimeMode}
 
 /**
@@ -38,7 +41,10 @@ import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode
  *
  * @since 3.5.0
  */
-class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
+class KeyValueGroupedDataset[K, V] private[sql] () extends api.KeyValueGroupedDataset[K, V] {
+  type KVDS[KY, VL] = KeyValueGroupedDataset[KY, VL]
+
+  private def unsupported(): Nothing = throw new UnsupportedOperationException()
 
   /**
    * Returns a new [[KeyValueGroupedDataset]] where the type of the key has been mapped to the
@@ -47,499 +53,52 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
    *
    * @since 3.5.0
    */
-  def keyAs[L: Encoder]: KeyValueGroupedDataset[L, V] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied to
-   * the data. The grouping key is unchanged by this.
-   *
-   * {{{
-   *   // Create values grouped by key from a Dataset[(K, V)]
-   *   ds.groupByKey(_._1).mapValues(_._2) // Scala
-   * }}}
-   *
-   * @since 3.5.0
-   */
-  def mapValues[W: Encoder](valueFunc: V => W): KeyValueGroupedDataset[K, W] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied to
-   * the data. The grouping key is unchanged by this.
-   *
-   * {{{
-   *   // Create Integer values grouped by String key from a Dataset<Tuple2<String, Integer>>
-   *   Dataset<Tuple2<String, Integer>> ds = ...;
-   *   KeyValueGroupedDataset<String, Integer> grouped =
-   *     ds.groupByKey(t -> t._1, Encoders.STRING()).mapValues(t -> t._2, Encoders.INT());
-   * }}}
-   *
-   * @since 3.5.0
-   */
-  def mapValues[W](func: MapFunction[V, W], encoder: Encoder[W]): KeyValueGroupedDataset[K, W] = {
-    mapValues(UdfUtils.mapFunctionToScalaFunc(func))(encoder)
-  }
-
-  /**
-   * Returns a [[Dataset]] that contains each unique key. This is equivalent to doing mapping over
-   * the Dataset to extract the keys and then running a distinct operation on those.
-   *
-   * @since 3.5.0
-   */
-  def keys: Dataset[K] = {
-    throw new UnsupportedOperationException
-  }
+  def keyAs[L: Encoder]: KeyValueGroupedDataset[L, V] = unsupported()
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and an iterator that contains all of the elements in
-   * the group. The function can return an iterator containing elements of an arbitrary type which
-   * will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * @since 3.5.0
-   */
-  def flatMapGroups[U: Encoder](f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
-    flatMapSortedGroups()(f)
-  }
+  /** @inheritdoc */
+  def mapValues[W: Encoder](valueFunc: V => W): KeyValueGroupedDataset[K, W] =
+    unsupported()
 
-  /**
-   * (Java-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and an iterator that contains all of the elements in
-   * the group. The function can return an iterator containing elements of an arbitrary type which
-   * will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * @since 3.5.0
-   */
-  def flatMapGroups[U](f: FlatMapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
-    flatMapGroups(UdfUtils.flatMapGroupsFuncToScalaFunc(f))(encoder)
-  }
+  /** @inheritdoc */
+  def keys: Dataset[K] = unsupported()
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and a sorted iterator that contains all of the elements
-   * in the group. The function can return an iterator containing elements of an arbitrary type
-   * which will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator to be
-   * sorted according to the given sort expressions. That sorting does not add computational
-   * complexity.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def flatMapSortedGroups[U: Encoder](sortExprs: Column*)(
-      f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and a sorted iterator that contains all of the elements
-   * in the group. The function can return an iterator containing elements of an arbitrary type
-   * which will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator to be
-   * sorted according to the given sort expressions. That sorting does not add computational
-   * complexity.
-   *
-   * @since 3.5.0
-   */
-  def flatMapSortedGroups[U](
-      SortExprs: Array[Column],
-      f: FlatMapGroupsFunction[K, V, U],
-      encoder: Encoder[U]): Dataset[U] = {
-    import org.apache.spark.util.ArrayImplicits._
-    flatMapSortedGroups(SortExprs.toImmutableArraySeq: _*)(
-      UdfUtils.flatMapGroupsFuncToScalaFunc(f))(encoder)
-  }
-
-  /**
-   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and an iterator that contains all of the elements in
-   * the group. The function can return an element of arbitrary type which will be returned as a
-   * new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * @since 3.5.0
-   */
-  def mapGroups[U: Encoder](f: (K, Iterator[V]) => U): Dataset[U] = {
-    flatMapGroups(UdfUtils.mapGroupsFuncToFlatMapAdaptor(f))
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each group of data. For each unique group, the
-   * function will be passed the group key and an iterator that contains all of the elements in
-   * the group. The function can return an element of arbitrary type which will be returned as a
-   * new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory. However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the
-   * memory constraints of their cluster.
-   *
-   * @since 3.5.0
-   */
-  def mapGroups[U](f: MapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
-    mapGroups(UdfUtils.mapGroupsFuncToScalaFunc(f))(encoder)
-  }
-
-  /**
-   * (Scala-specific) Reduces the elements of each group of data using the specified binary
-   * function. The given function must be commutative and associative or the result may be
-   * non-deterministic.
-   *
-   * @since 3.5.0
-   */
-  def reduceGroups(f: (V, V) => V): Dataset[(K, V)] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * (Java-specific) Reduces the elements of each group of data using the specified binary
-   * function. The given function must be commutative and associative or the result may be
-   * non-deterministic.
-   *
-   * @since 3.5.0
-   */
-  def reduceGroups(f: ReduceFunction[V]): Dataset[(K, V)] = {
-    reduceGroups(UdfUtils.mapReduceFuncToScalaFunc(f))
-  }
-
-  /**
-   * Internal helper function for building typed aggregations that return tuples. For simplicity
-   * and code reuse, we do this without the help of the type system and then use helper functions
-   * that cast appropriately for the user facing interface.
-   */
-  protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * Computes the given aggregation, returning a [[Dataset]] of tuples for each unique key and the
-   * result of computing this aggregation over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)] =
-    aggUntyped(col1).asInstanceOf[Dataset[(K, U1)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2](col1: TypedColumn[V, U1], col2: TypedColumn[V, U2]): Dataset[(K, U1, U2)] =
-    aggUntyped(col1, col2).asInstanceOf[Dataset[(K, U1, U2)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3]): Dataset[(K, U1, U2, U3)] =
-    aggUntyped(col1, col2, col3).asInstanceOf[Dataset[(K, U1, U2, U3)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3, U4](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4]): Dataset[(K, U1, U2, U3, U4)] =
-    aggUntyped(col1, col2, col3, col4).asInstanceOf[Dataset[(K, U1, U2, U3, U4)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3, U4, U5](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4],
-      col5: TypedColumn[V, U5]): Dataset[(K, U1, U2, U3, U4, U5)] =
-    aggUntyped(col1, col2, col3, col4, col5).asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4],
-      col5: TypedColumn[V, U5],
-      col6: TypedColumn[V, U6]): Dataset[(K, U1, U2, U3, U4, U5, U6)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6, U7](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4],
-      col5: TypedColumn[V, U5],
-      col6: TypedColumn[V, U6],
-      col7: TypedColumn[V, U7]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6, col7)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7)]]
-
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
-   * the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.5.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6, U7, U8](
-      col1: TypedColumn[V, U1],
-      col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4],
-      col5: TypedColumn[V, U5],
-      col6: TypedColumn[V, U6],
-      col7: TypedColumn[V, U7],
-      col8: TypedColumn[V, U8]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6, col7, col8)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)]]
-
-  /**
-   * Returns a [[Dataset]] that contains a tuple with each key and the number of items present for
-   * that key.
-   *
-   * @since 3.5.0
-   */
-  def count(): Dataset[(K, Long)] = agg(functions.count("*"))
+      f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] =
+    unsupported()
 
-  /**
-   * (Scala-specific) Applies the given function to each cogrouped data. For each unique group,
-   * the function will be passed the grouping key and 2 iterators containing all elements in the
-   * group from [[Dataset]] `this` and `other`. The function can return an iterator containing
-   * elements of an arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * @since 3.5.0
-   */
-  def cogroup[U, R: Encoder](other: KeyValueGroupedDataset[K, U])(
-      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
-    cogroupSorted(other)()()(f)
-  }
+  /** @inheritdoc */
+  def reduceGroups(f: (V, V) => V): Dataset[(K, V)] = unsupported()
 
-  /**
-   * (Java-specific) Applies the given function to each cogrouped data. For each unique group, the
-   * function will be passed the grouping key and 2 iterators containing all elements in the group
-   * from [[Dataset]] `this` and `other`. The function can return an iterator containing elements
-   * of an arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * @since 3.5.0
-   */
-  def cogroup[U, R](
-      other: KeyValueGroupedDataset[K, U],
-      f: CoGroupFunction[K, V, U, R],
-      encoder: Encoder[R]): Dataset[R] = {
-    cogroup(other)(UdfUtils.coGroupFunctionToScalaFunc(f))(encoder)
-  }
+  /** @inheritdoc */
+  protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = unsupported()
 
-  /**
-   * (Scala-specific) Applies the given function to each sorted cogrouped data. For each unique
-   * group, the function will be passed the grouping key and 2 sorted iterators containing all
-   * elements in the group from [[Dataset]] `this` and `other`. The function can return an
-   * iterator containing elements of an arbitrary type which will be returned as a new
-   * [[Dataset]].
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators to be
-   * sorted according to the given sort expressions. That sorting does not add computational
-   * complexity.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def cogroupSorted[U, R: Encoder](other: KeyValueGroupedDataset[K, U])(thisSortExprs: Column*)(
-      otherSortExprs: Column*)(
-      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each sorted cogrouped data. For each unique
-   * group, the function will be passed the grouping key and 2 sorted iterators containing all
-   * elements in the group from [[Dataset]] `this` and `other`. The function can return an
-   * iterator containing elements of an arbitrary type which will be returned as a new
-   * [[Dataset]].
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators to be
-   * sorted according to the given sort expressions. That sorting does not add computational
-   * complexity.
-   *
-   * @since 3.5.0
-   */
-  def cogroupSorted[U, R](
-      other: KeyValueGroupedDataset[K, U],
-      thisSortExprs: Array[Column],
-      otherSortExprs: Array[Column],
-      f: CoGroupFunction[K, V, U, R],
-      encoder: Encoder[R]): Dataset[R] = {
-    import org.apache.spark.util.ArrayImplicits._
-    cogroupSorted(other)(thisSortExprs.toImmutableArraySeq: _*)(
-      otherSortExprs.toImmutableArraySeq: _*)(UdfUtils.coGroupFunctionToScalaFunc(f))(encoder)
-  }
+      otherSortExprs: Column*)(f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] =
+    unsupported()
 
   protected[sql] def flatMapGroupsWithStateHelper[S: Encoder, U: Encoder](
       outputMode: Option[OutputMode],
       timeoutConf: GroupStateTimeout,
       initialState: Option[KeyValueGroupedDataset[K, S]],
       isMapGroupWithState: Boolean)(
-      func: (K, Iterator[V], GroupState[S]) => Iterator[U]): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
+      func: (K, Iterator[V], GroupState[S]) => Iterator[U]): Dataset[U] = unsupported()
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See
-   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](
       func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
     mapGroupsWithState(GroupStateTimeout.NoTimeout)(func)
   }
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See
-   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](timeoutConf: GroupStateTimeout)(
       func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
     flatMapGroupsWithStateHelper(None, timeoutConf, None, isMapGroupWithState = true)(
-      UdfUtils.mapGroupsWithStateFuncToFlatMapAdaptor(func))
+      UDFAdaptors.mapGroupsWithStateToFlatMapWithState(func))
   }
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See
-   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param timeoutConf
-   *   Timeout Conf, see GroupStateTimeout for more details
-   * @param initialState
-   *   The user provided state that will be initialized when the first batch of data is processed
-   *   in the streaming query. The user defined function will be called on the state data even if
-   *   there are no other values in the group. To convert a Dataset ds of type Dataset[(K, S)] to
-   *   a KeyValueGroupedDataset[K, S] do {{{ds.groupByKey(x => x._1).mapValues(_._2)}}}
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](
       timeoutConf: GroupStateTimeout,
       initialState: KeyValueGroupedDataset[K, S])(
@@ -548,134 +107,10 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
       None,
       timeoutConf,
       Some(initialState),
-      isMapGroupWithState = true)(UdfUtils.mapGroupsWithStateFuncToFlatMapAdaptor(func))
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param stateEncoder
-   *   Encoder for the state type.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    mapGroupsWithState[S, U](UdfUtils.mapGroupsWithStateFuncToScalaFunc(func))(
-      stateEncoder,
-      outputEncoder)
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param stateEncoder
-   *   Encoder for the state type.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout): Dataset[U] = {
-    mapGroupsWithState[S, U](timeoutConf)(UdfUtils.mapGroupsWithStateFuncToScalaFunc(func))(
-      stateEncoder,
-      outputEncoder)
-  }
-
-  /**
-   * (Java-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param stateEncoder
-   *   Encoder for the state type.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   * @param initialState
-   *   The user provided state that will be initialized when the first batch of data is processed
-   *   in the streaming query. The user defined function will be called on the state data even if
-   *   there are no other values in the group.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout,
-      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
-    mapGroupsWithState[S, U](timeoutConf, initialState)(
-      UdfUtils.mapGroupsWithStateFuncToScalaFunc(func))(stateEncoder, outputEncoder)
+      isMapGroupWithState = true)(UDFAdaptors.mapGroupsWithStateToFlatMapWithState(func))
   }
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param outputMode
-   *   The output mode of the function.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def flatMapGroupsWithState[S: Encoder, U: Encoder](
       outputMode: OutputMode,
       timeoutConf: GroupStateTimeout)(
@@ -687,33 +122,7 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
       isMapGroupWithState = false)(func)
   }
 
-  /**
-   * (Scala-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param outputMode
-   *   The output mode of the function.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   * @param initialState
-   *   The user provided state that will be initialized when the first batch of data is processed
-   *   in the streaming query. The user defined function will be called on the state data even if
-   *   there are no other values in the group. To covert a Dataset `ds` of type of type
-   *   `Dataset[(K, S)]` to a `KeyValueGroupedDataset[K, S]`, use
-   *   {{{ds.groupByKey(x => x._1).mapValues(_._2)}}} See [[Encoder]] for more details on what
-   *   types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def flatMapGroupsWithState[S: Encoder, U: Encoder](
       outputMode: OutputMode,
       timeoutConf: GroupStateTimeout,
@@ -726,201 +135,244 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
       isMapGroupWithState = false)(func)
   }
 
-  /**
-   * (Java-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param outputMode
-   *   The output mode of the function.
-   * @param stateEncoder
-   *   Encoder for the state type.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
-  def flatMapGroupsWithState[S, U](
+  /** @inheritdoc */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      timeMode: TimeMode,
+      outputMode: OutputMode): Dataset[U] =
+    unsupported()
+
+  /** @inheritdoc */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] =
+    unsupported()
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode): Dataset[U] = unsupported()
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = unsupported()
+
+  // Overrides...
+  /** @inheritdoc */
+  override def mapValues[W](
+      func: MapFunction[V, W],
+      encoder: Encoder[W]): KeyValueGroupedDataset[K, W] = super.mapValues(func, encoder)
+
+  /** @inheritdoc */
+  override def flatMapGroups[U: Encoder](f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] =
+    super.flatMapGroups(f)
+
+  /** @inheritdoc */
+  override def flatMapGroups[U](
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = super.flatMapGroups(f, encoder)
+
+  /** @inheritdoc */
+  override def flatMapSortedGroups[U](
+      SortExprs: Array[Column],
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = super.flatMapSortedGroups(SortExprs, f, encoder)
+
+  /** @inheritdoc */
+  override def mapGroups[U: Encoder](f: (K, Iterator[V]) => U): Dataset[U] = super.mapGroups(f)
+
+  /** @inheritdoc */
+  override def mapGroups[U](f: MapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] =
+    super.mapGroups(f, encoder)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U]): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder, timeoutConf)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder, timeoutConf, initialState)
+
+  /** @inheritdoc */
+  override def flatMapGroupsWithState[S, U](
       func: FlatMapGroupsWithStateFunction[K, V, S, U],
       outputMode: OutputMode,
       stateEncoder: Encoder[S],
       outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout): Dataset[U] = {
-    val f = UdfUtils.flatMapGroupsWithStateFuncToScalaFunc(func)
-    flatMapGroupsWithState[S, U](outputMode, timeoutConf)(f)(stateEncoder, outputEncoder)
-  }
+      timeoutConf: GroupStateTimeout): Dataset[U] =
+    super.flatMapGroupsWithState(func, outputMode, stateEncoder, outputEncoder, timeoutConf)
 
-  /**
-   * (Java-specific) Applies the given function to each group of data, while maintaining a
-   * user-defined per-group state. The result Dataset will represent the objects returned by the
-   * function. For a static batch Dataset, the function will be invoked once per group. For a
-   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
-   * and updates to each group's state will be saved across invocations. See `GroupState` for more
-   * details.
-   *
-   * @tparam S
-   *   The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func
-   *   Function to be called on every group.
-   * @param outputMode
-   *   The output mode of the function.
-   * @param stateEncoder
-   *   Encoder for the state type.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   * @param timeoutConf
-   *   Timeout configuration for groups that do not receive data for a while.
-   * @param initialState
-   *   The user provided state that will be initialized when the first batch of data is processed
-   *   in the streaming query. The user defined function will be called on the state data even if
-   *   there are no other values in the group. To covert a Dataset `ds` of type of type
-   *   `Dataset[(K, S)]` to a `KeyValueGroupedDataset[K, S]`, use
-   *   {{{ds.groupByKey(x => x._1).mapValues(_._2)}}}
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.5.0
-   */
-  def flatMapGroupsWithState[S, U](
+  /** @inheritdoc */
+  override def flatMapGroupsWithState[S, U](
       func: FlatMapGroupsWithStateFunction[K, V, S, U],
       outputMode: OutputMode,
       stateEncoder: Encoder[S],
       outputEncoder: Encoder[U],
       timeoutConf: GroupStateTimeout,
-      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
-    val f = UdfUtils.flatMapGroupsWithStateFuncToScalaFunc(func)
-    flatMapGroupsWithState[S, U](outputMode, timeoutConf, initialState)(f)(
-      stateEncoder,
-      outputEncoder)
-  }
-
-  /**
-   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
-   * API v2. We allow the user to act on per-group set of input rows along with keyed state and
-   * the user can choose to output/return 0 or more rows. For a streaming dataframe, we will
-   * repeatedly invoke the interface methods for new rows in each trigger and the user's
-   * state/state variables will be stored persistently across invocations. Currently this operator
-   * is not supported with Spark Connect.
-   *
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor
-   *   Instance of statefulProcessor whose functions will be invoked by the operator.
-   * @param timeMode
-   *   The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode
-   *   The output mode of the stateful processor.
-   */
-  def transformWithState[U: Encoder](
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = super.flatMapGroupsWithState(
+    func,
+    outputMode,
+    stateEncoder,
+    outputEncoder,
+    timeoutConf,
+    initialState)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder](
       statefulProcessor: StatefulProcessor[K, V, U],
       timeMode: TimeMode,
-      outputMode: OutputMode): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]) =
+    super.transformWithState(statefulProcessor, timeMode, outputMode, outputEncoder)
 
-  /**
-   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
-   * v2. We allow the user to act on per-group set of input rows along with keyed state and the
-   * user can choose to output/return 0 or more rows. For a streaming dataframe, we will
-   * repeatedly invoke the interface methods for new rows in each trigger and the user's
-   * state/state variables will be stored persistently across invocations. Currently this operator
-   * is not supported with Spark Connect.
-   *
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor
-   *   Instance of statefulProcessor whose functions will be invoked by the operator.
-   * @param timeMode
-   *   The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode
-   *   The output mode of the stateful processor.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   */
-  def transformWithState[U: Encoder](
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder](
       statefulProcessor: StatefulProcessor[K, V, U],
-      timeMode: TimeMode,
+      eventTimeColumnName: String,
       outputMode: OutputMode,
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
+      outputEncoder: Encoder[U]) =
+    super.transformWithState(statefulProcessor, eventTimeColumnName, outputMode, outputEncoder)
 
-  /**
-   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
-   * API v2. Functions as the function above, but with additional initial state. Currently this
-   * operator is not supported with Spark Connect.
-   *
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S
-   *   The type of initial state objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor
-   *   Instance of statefulProcessor whose functions will be invoked by the operator.
-   * @param timeMode
-   *   The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode
-   *   The output mode of the stateful processor.
-   * @param initialState
-   *   User provided initial state that will be used to initiate state for the query in the first
-   *   batch.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  def transformWithState[U: Encoder, S: Encoder](
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder, S: Encoder](
       statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
       timeMode: TimeMode,
       outputMode: OutputMode,
-      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
-
-  /**
-   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
-   * v2. Functions as the function above, but with additional initial state. Currently this
-   * operator is not supported with Spark Connect.
-   *
-   * @tparam U
-   *   The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S
-   *   The type of initial state objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor
-   *   Instance of statefulProcessor whose functions will be invoked by the operator.
-   * @param timeMode
-   *   The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode
-   *   The output mode of the stateful processor.
-   * @param initialState
-   *   User provided initial state that will be used to initiate state for the query in the first
-   *   batch.
-   * @param outputEncoder
-   *   Encoder for the output type.
-   * @param initialStateEncoder
-   *   Encoder for the initial state type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      initialState: KeyValueGroupedDataset[K, S],
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]) = super.transformWithState(
+    statefulProcessor,
+    timeMode,
+    outputMode,
+    initialState,
+    outputEncoder,
+    initialStateEncoder)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder, S: Encoder](
       statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
-      timeMode: TimeMode,
       outputMode: OutputMode,
       initialState: KeyValueGroupedDataset[K, S],
+      eventTimeColumnName: String,
       outputEncoder: Encoder[U],
-      initialStateEncoder: Encoder[S]): Dataset[U] = {
-    throw new UnsupportedOperationException
-  }
+      initialStateEncoder: Encoder[S]) = super.transformWithState(
+    statefulProcessor,
+    outputMode,
+    initialState,
+    eventTimeColumnName,
+    outputEncoder,
+    initialStateEncoder)
+
+  /** @inheritdoc */
+  override def reduceGroups(f: ReduceFunction[V]): Dataset[(K, V)] = super.reduceGroups(f)
+
+  /** @inheritdoc */
+  override def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)] = super.agg(col1)
+
+  /** @inheritdoc */
+  override def agg[U1, U2](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2]): Dataset[(K, U1, U2)] = super.agg(col1, col2)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3]): Dataset[(K, U1, U2, U3)] = super.agg(col1, col2, col3)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4]): Dataset[(K, U1, U2, U3, U4)] = super.agg(col1, col2, col3, col4)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5]): Dataset[(K, U1, U2, U3, U4, U5)] =
+    super.agg(col1, col2, col3, col4, col5)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6]): Dataset[(K, U1, U2, U3, U4, U5, U6)] =
+    super.agg(col1, col2, col3, col4, col5, col6)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6, U7](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6],
+      col7: TypedColumn[V, U7]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7)] =
+    super.agg(col1, col2, col3, col4, col5, col6, col7)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6, U7, U8](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6],
+      col7: TypedColumn[V, U7],
+      col8: TypedColumn[V, U8]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)] =
+    super.agg(col1, col2, col3, col4, col5, col6, col7, col8)
+
+  /** @inheritdoc */
+  override def count(): Dataset[(K, Long)] = super.count()
+
+  /** @inheritdoc */
+  override def cogroup[U, R: Encoder](other: KeyValueGroupedDataset[K, U])(
+      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] =
+    super.cogroup(other)(f)
+
+  /** @inheritdoc */
+  override def cogroup[U, R](
+      other: KeyValueGroupedDataset[K, U],
+      f: CoGroupFunction[K, V, U, R],
+      encoder: Encoder[R]): Dataset[R] = super.cogroup(other, f, encoder)
+
+  /** @inheritdoc */
+  override def cogroupSorted[U, R](
+      other: KeyValueGroupedDataset[K, U],
+      thisSortExprs: Array[Column],
+      otherSortExprs: Array[Column],
+      f: CoGroupFunction[K, V, U, R],
+      encoder: Encoder[R]): Dataset[R] =
+    super.cogroupSorted(other, thisSortExprs, otherSortExprs, f, encoder)
 }
 
 /**
@@ -933,21 +385,20 @@ class KeyValueGroupedDataset[K, V] private[sql] () extends Serializable {
 private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
     private val sparkSession: SparkSession,
     private val plan: proto.Plan,
-    private val ikEncoder: AgnosticEncoder[IK],
     private val kEncoder: AgnosticEncoder[K],
     private val ivEncoder: AgnosticEncoder[IV],
     private val vEncoder: AgnosticEncoder[V],
     private val groupingExprs: java.util.List[proto.Expression],
-    private val valueMapFunc: IV => V,
+    private val valueMapFunc: Option[IV => V],
     private val keysFunc: () => Dataset[IK])
     extends KeyValueGroupedDataset[K, V] {
+  import sparkSession.RichColumn
 
   override def keyAs[L: Encoder]: KeyValueGroupedDataset[L, V] = {
     new KeyValueGroupedDatasetImpl[L, V, IK, IV](
       sparkSession,
       plan,
-      ikEncoder,
-      encoderFor[L],
+      agnosticEncoderFor[L],
       ivEncoder,
       vEncoder,
       groupingExprs,
@@ -959,12 +410,13 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
     new KeyValueGroupedDatasetImpl[K, W, IK, IV](
       sparkSession,
       plan,
-      ikEncoder,
       kEncoder,
       ivEncoder,
-      encoderFor[W],
+      agnosticEncoderFor[W],
       groupingExprs,
-      valueMapFunc.andThen(valueFunc),
+      valueMapFunc
+        .map(_.andThen(valueFunc))
+        .orElse(Option(valueFunc.asInstanceOf[IV => W])),
       keysFunc)
   }
 
@@ -977,9 +429,8 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
   override def flatMapSortedGroups[U: Encoder](sortExprs: Column*)(
       f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
     // Apply mapValues changes to the udf
-    val nf =
-      if (valueMapFunc == UdfUtils.identical()) f else UdfUtils.mapValuesAdaptor(f, valueMapFunc)
-    val outputEncoder = encoderFor[U]
+    val nf = UDFAdaptors.flatMapGroupsWithMappedValues(f, valueMapFunc)
+    val outputEncoder = agnosticEncoderFor[U]
     sparkSession.newDataset[U](outputEncoder) { builder =>
       builder.getGroupMapBuilder
         .setInput(plan.getRoot)
@@ -992,11 +443,10 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
   override def cogroupSorted[U, R: Encoder](other: KeyValueGroupedDataset[K, U])(
       thisSortExprs: Column*)(otherSortExprs: Column*)(
       f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
-    assert(other.isInstanceOf[KeyValueGroupedDatasetImpl[K, U, _, _]])
-    val otherImpl = other.asInstanceOf[KeyValueGroupedDatasetImpl[K, U, _, _]]
+    val otherImpl = other.asInstanceOf[KeyValueGroupedDatasetImpl[K, U, _, Any]]
     // Apply mapValues changes to the udf
-    val nf = UdfUtils.mapValuesAdaptor(f, valueMapFunc, otherImpl.valueMapFunc)
-    val outputEncoder = encoderFor[R]
+    val nf = UDFAdaptors.coGroupWithMappedValues(f, valueMapFunc, otherImpl.valueMapFunc)
+    val outputEncoder = agnosticEncoderFor[R]
     sparkSession.newDataset[R](outputEncoder) { builder =>
       builder.getCoGroupMapBuilder
         .setInput(plan.getRoot)
@@ -1010,26 +460,26 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
   }
 
   override protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
-    // TODO(SPARK-43415): For each column, apply the valueMap func first
-    val rEnc = ProductEncoder.tuple(kEncoder +: columns.map(_.encoder)) // apply keyAs change
+    // TODO(SPARK-43415): For each column, apply the valueMap func first...
+    val rEnc = ProductEncoder.tuple(kEncoder +: columns.map(c => agnosticEncoderFor(c.encoder)))
     sparkSession.newDataset(rEnc) { builder =>
       builder.getAggregateBuilder
         .setInput(plan.getRoot)
         .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
         .addAllGroupingExpressions(groupingExprs)
-        .addAllAggregateExpressions(columns.map(_.expr).asJava)
+        .addAllAggregateExpressions(columns.map(_.typedExpr(vEncoder)).asJava)
     }
   }
 
   override def reduceGroups(f: (V, V) => V): Dataset[(K, V)] = {
     val inputEncoders = Seq(vEncoder, vEncoder)
-    val udf = ScalaUserDefinedFunction(
+    val udf = SparkUserDefinedFunction(
       function = f,
       inputEncoders = inputEncoders,
       outputEncoder = vEncoder)
     val input = udf.apply(inputEncoders.map(_ => col("*")): _*)
-    val expr = Column.fn("reduce", input).expr
-    val aggregator: TypedColumn[V, V] = new TypedColumn[V, V](expr, vEncoder)
+    val expr = Column.fn("reduce", input)
+    val aggregator: TypedColumn[V, V] = new TypedColumn[V, V](expr.node, vEncoder)
     agg(aggregator)
   }
 
@@ -1044,22 +494,15 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
       throw new IllegalArgumentException("The output mode of function should be append or update")
     }
 
-    if (initialState.isDefined) {
-      assert(initialState.get.isInstanceOf[KeyValueGroupedDatasetImpl[K, S, _, _]])
-    }
-
     val initialStateImpl = if (initialState.isDefined) {
+      assert(initialState.get.isInstanceOf[KeyValueGroupedDatasetImpl[K, S, _, _]])
       initialState.get.asInstanceOf[KeyValueGroupedDatasetImpl[K, S, _, _]]
     } else {
       null
     }
 
-    val outputEncoder = encoderFor[U]
-    val nf = if (valueMapFunc == UdfUtils.identical()) {
-      func
-    } else {
-      UdfUtils.mapValuesAdaptor(func, valueMapFunc)
-    }
+    val outputEncoder = agnosticEncoderFor[U]
+    val nf = UDFAdaptors.flatMapGroupsWithStateWithMappedValues(func, valueMapFunc)
 
     sparkSession.newDataset[U](outputEncoder) { builder =>
       val groupMapBuilder = builder.getGroupMapBuilder
@@ -1083,7 +526,7 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
   private def getUdf[U: Encoder](nf: AnyRef, outputEncoder: AgnosticEncoder[U])(
       inEncoders: AgnosticEncoder[_]*): proto.CommonInlineUserDefinedFunction = {
     val inputEncoders = kEncoder +: inEncoders // Apply keyAs changes by setting kEncoder
-    val udf = ScalaUserDefinedFunction(
+    val udf = SparkUserDefinedFunction(
       function = nf,
       inputEncoders = inputEncoders,
       outputEncoder = outputEncoder)
@@ -1094,6 +537,7 @@ private class KeyValueGroupedDatasetImpl[K, V, IK, IV](
    * We cannot deserialize a connect [[KeyValueGroupedDataset]] because of a class clash on the
    * server side. We null out the instance for now.
    */
+  @unused("this is used by java serialization")
   private def writeReplace(): Any = null
 }
 
@@ -1102,19 +546,19 @@ private object KeyValueGroupedDatasetImpl {
       ds: Dataset[V],
       kEncoder: AgnosticEncoder[K],
       groupingFunc: V => K): KeyValueGroupedDatasetImpl[K, V, K, V] = {
-    val gf = ScalaUserDefinedFunction(
+    val gf = SparkUserDefinedFunction(
       function = groupingFunc,
       inputEncoders = ds.agnosticEncoder :: Nil, // Using the original value and key encoders
       outputEncoder = kEncoder)
+    val session = ds.sparkSession
     new KeyValueGroupedDatasetImpl(
-      ds.sparkSession,
+      session,
       ds.plan,
       kEncoder,
-      kEncoder,
       ds.agnosticEncoder,
       ds.agnosticEncoder,
-      Arrays.asList(gf.apply(col("*")).expr),
-      UdfUtils.identical(),
+      Arrays.asList(toExpr(gf.apply(col("*")))),
+      None,
       () => ds.map(groupingFunc)(kEncoder))
   }
 
@@ -1124,20 +568,19 @@ private object KeyValueGroupedDatasetImpl {
       vEncoder: AgnosticEncoder[V],
       groupingExprs: Seq[Column]): KeyValueGroupedDatasetImpl[K, V, K, V] = {
     // Use a dummy udf to pass the K V encoders
-    val dummyGroupingFunc = ScalaUserDefinedFunction(
+    val dummyGroupingFunc = SparkUserDefinedFunction(
       function = UdfUtils.noOp[V, K](),
       inputEncoders = vEncoder :: Nil,
       outputEncoder = kEncoder).apply(col("*"))
-
+    val session = df.sparkSession
     new KeyValueGroupedDatasetImpl(
-      df.sparkSession,
+      session,
       df.plan,
       kEncoder,
-      kEncoder,
       vEncoder,
       vEncoder,
-      (Seq(dummyGroupingFunc) ++ groupingExprs).map(_.expr).asJava,
-      UdfUtils.identical(),
+      (Seq(dummyGroupingFunc) ++ groupingExprs).map(toExpr).asJava,
+      None,
       () => df.select(groupingExprs: _*).as(kEncoder))
   }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 754906f736cf0..5bded40b0d132 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql
 
-import java.util.Locale
-
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFor
+import org.apache.spark.sql.connect.ConnectConversions._
 
 /**
  * A set of methods for aggregations on a `DataFrame`, created by [[Dataset#groupBy groupBy]],
@@ -36,343 +36,125 @@ import org.apache.spark.connect.proto
  * @since 3.4.0
  */
 class RelationalGroupedDataset private[sql] (
-    private[sql] val df: DataFrame,
+    protected val df: DataFrame,
     private[sql] val groupingExprs: Seq[Column],
     groupType: proto.Aggregate.GroupType,
     pivot: Option[proto.Aggregate.Pivot] = None,
-    groupingSets: Option[Seq[proto.Aggregate.GroupingSets]] = None) {
+    groupingSets: Option[Seq[proto.Aggregate.GroupingSets]] = None)
+    extends api.RelationalGroupedDataset {
+  import df.sparkSession.RichColumn
 
-  private[this] def toDF(aggExprs: Seq[Column]): DataFrame = {
+  protected def toDF(aggExprs: Seq[Column]): DataFrame = {
     df.sparkSession.newDataFrame { builder =>
-      builder.getAggregateBuilder
+      val aggBuilder = builder.getAggregateBuilder
         .setInput(df.plan.getRoot)
-        .addAllGroupingExpressions(groupingExprs.map(_.expr).asJava)
-        .addAllAggregateExpressions(aggExprs.map(e => e.expr).asJava)
+      groupingExprs.foreach(c => aggBuilder.addGroupingExpressions(c.expr))
+      aggExprs.foreach(c => aggBuilder.addAggregateExpressions(c.typedExpr(df.encoder)))
 
       groupType match {
         case proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP =>
-          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
+          aggBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_ROLLUP)
         case proto.Aggregate.GroupType.GROUP_TYPE_CUBE =>
-          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
+          aggBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_CUBE)
         case proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY =>
-          builder.getAggregateBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
+          aggBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY)
         case proto.Aggregate.GroupType.GROUP_TYPE_PIVOT =>
           assert(pivot.isDefined)
-          builder.getAggregateBuilder
+          aggBuilder
             .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_PIVOT)
             .setPivot(pivot.get)
         case proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS =>
           assert(groupingSets.isDefined)
-          val aggBuilder = builder.getAggregateBuilder
-            .setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS)
+          aggBuilder.setGroupType(proto.Aggregate.GroupType.GROUP_TYPE_GROUPING_SETS)
           groupingSets.get.foreach(aggBuilder.addGroupingSets)
         case g => throw new UnsupportedOperationException(g.toString)
       }
     }
   }
 
-  /**
-   * Returns a `KeyValueGroupedDataset` where the data is grouped by the grouping expressions of
-   * current `RelationalGroupedDataset`.
-   *
-   * @since 3.5.0
-   */
-  def as[K: Encoder, T: Encoder]: KeyValueGroupedDataset[K, T] = {
-    KeyValueGroupedDatasetImpl[K, T](df, encoderFor[K], encoderFor[T], groupingExprs)
+  protected def selectNumericColumns(colNames: Seq[String]): Seq[Column] = {
+    // This behaves different than the classic implementation. The classic implementation validates
+    // if a column is actually a number, and if it is not it throws an error immediately. In connect
+    // it depends on the input type (casting) rules for the method invoked. If the input violates
+    // the a different error will be thrown. However it is also possible to get a result for a
+    // non-numeric column in connect, for example when you use min/max.
+    colNames.map(df.col)
   }
 
-  /**
-   * (Scala-specific) Compute aggregates by specifying the column names and aggregate methods. The
-   * resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   df.groupBy("department").agg(
-   *     "age" -> "max",
-   *     "expense" -> "sum"
-   *   )
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
-    toDF((aggExpr +: aggExprs).map { case (colName, expr) =>
-      strToColumn(expr, df(colName))
-    })
+  /** @inheritdoc */
+  def as[K: Encoder, T: Encoder]: KeyValueGroupedDataset[K, T] = {
+    KeyValueGroupedDatasetImpl[K, T](
+      df,
+      agnosticEncoderFor[K],
+      agnosticEncoderFor[T],
+      groupingExprs)
   }
 
-  /**
-   * (Scala-specific) Compute aggregates by specifying a map from column name to aggregate
-   * methods. The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   df.groupBy("department").agg(Map(
-   *     "age" -> "max",
-   *     "expense" -> "sum"
-   *   ))
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def agg(exprs: Map[String, String]): DataFrame = {
-    toDF(exprs.map { case (colName, expr) =>
-      strToColumn(expr, df(colName))
-    }.toSeq)
-  }
+  /** @inheritdoc */
+  override def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame =
+    super.agg(aggExpr, aggExprs: _*)
 
-  /**
-   * (Java-specific) Compute aggregates by specifying a map from column name to aggregate methods.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   import com.google.common.collect.ImmutableMap;
-   *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  def agg(exprs: java.util.Map[String, String]): DataFrame = {
-    agg(exprs.asScala.toMap)
-  }
+  /** @inheritdoc */
+  override def agg(exprs: Map[String, String]): DataFrame = super.agg(exprs)
 
-  private[this] def strToColumn(expr: String, inputExpr: Column): Column = {
-    expr.toLowerCase(Locale.ROOT) match {
-      case "avg" | "average" | "mean" => functions.avg(inputExpr)
-      case "stddev" | "std" => functions.stddev(inputExpr)
-      case "count" | "size" => functions.count(inputExpr)
-      case name => Column.fn(name, inputExpr)
-    }
-  }
+  /** @inheritdoc */
+  override def agg(exprs: java.util.Map[String, String]): DataFrame = super.agg(exprs)
 
-  /**
-   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
-   * default retains the grouping columns in its output. To not retain grouping columns, set
-   * `spark.sql.retainGroupColumns` to false.
-   *
-   * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
-   *
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *
-   *   // Scala:
-   *   import org.apache.spark.sql.functions._
-   *   df.groupBy("department").agg(max("age"), sum("expense"))
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.groupBy("department").agg(max("age"), sum("expense"));
-   * }}}
-   *
-   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
-   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
-   * {{{
-   *   // Scala, 1.3.x:
-   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
-   *
-   *   // Java, 1.3.x:
-   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def agg(expr: Column, exprs: Column*): DataFrame = {
-    toDF((expr +: exprs).map { case c =>
-      c
-    // TODO: deal with typed columns.
-    })
-  }
+  override def agg(expr: Column, exprs: Column*): DataFrame = super.agg(expr, exprs: _*)
 
-  /**
-   * Count the number of rows for each group. The resulting `DataFrame` will also contain the
-   * grouping columns.
-   *
-   * @since 3.4.0
-   */
-  def count(): DataFrame = toDF(Seq(functions.count(functions.lit(1)).alias("count")))
+  /** @inheritdoc */
+  override def count(): DataFrame = super.count()
 
-  /**
-   * Compute the average value for each numeric columns for each group. This is an alias for
-   * `avg`. The resulting `DataFrame` will also contain the grouping columns. When specified
-   * columns are given, only compute the average values for them.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def mean(colNames: String*): DataFrame = {
-    toDF(colNames.map(colName => functions.mean(colName)))
-  }
+  override def mean(colNames: String*): DataFrame = super.mean(colNames: _*)
 
-  /**
-   * Compute the max value for each numeric columns for each group. The resulting `DataFrame` will
-   * also contain the grouping columns. When specified columns are given, only compute the max
-   * values for them.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def max(colNames: String*): DataFrame = {
-    toDF(colNames.map(colName => functions.max(colName)))
-  }
+  override def max(colNames: String*): DataFrame = super.max(colNames: _*)
 
-  /**
-   * Compute the mean value for each numeric columns for each group. The resulting `DataFrame`
-   * will also contain the grouping columns. When specified columns are given, only compute the
-   * mean values for them.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def avg(colNames: String*): DataFrame = {
-    toDF(colNames.map(colName => functions.avg(colName)))
-  }
+  override def avg(colNames: String*): DataFrame = super.avg(colNames: _*)
 
-  /**
-   * Compute the min value for each numeric column for each group. The resulting `DataFrame` will
-   * also contain the grouping columns. When specified columns are given, only compute the min
-   * values for them.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def min(colNames: String*): DataFrame = {
-    toDF(colNames.map(colName => functions.min(colName)))
-  }
+  override def min(colNames: String*): DataFrame = super.min(colNames: _*)
 
-  /**
-   * Compute the sum for each numeric columns for each group. The resulting `DataFrame` will also
-   * contain the grouping columns. When specified columns are given, only compute the sum for
-   * them.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def sum(colNames: String*): DataFrame = {
-    toDF(colNames.map(colName => functions.sum(colName)))
-  }
+  override def sum(colNames: String*): DataFrame = super.sum(colNames: _*)
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   *
-   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine the
-   * resulting schema of the transformation. To avoid any eager computations, provide an explicit
-   * list of values via `pivot(pivotColumn: String, values: Seq[Any])`.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course").sum("earnings")
-   * }}}
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   Name of the column to pivot.
-   * @since 3.4.0
-   */
-  def pivot(pivotColumn: String): RelationalGroupedDataset = pivot(Column(pivotColumn))
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String): RelationalGroupedDataset = super.pivot(pivotColumn)
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation. There are
-   * two versions of pivot function: one that requires the caller to specify the list of distinct
-   * values to pivot on, and one that does not. The latter is more concise but less efficient,
-   * because Spark needs to first compute the list of distinct values internally.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings")
-   *
-   *   // Or without specifying column values (less efficient)
-   *   df.groupBy("year").pivot("course").sum("earnings")
-   * }}}
-   *
-   * From Spark 3.0.0, values can be literal columns, for instance, struct. For pivoting by
-   * multiple columns, use the `struct` function to combine the columns and values:
-   *
-   * {{{
-   *   df.groupBy("year")
-   *     .pivot("trainingCourse", Seq(struct(lit("java"), lit("Experts"))))
-   *     .agg(sum($"earnings"))
-   * }}}
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   Name of the column to pivot.
-   * @param values
-   *   List of values that will be translated to columns in the output DataFrame.
-   * @since 3.4.0
-   */
-  def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset = {
-    pivot(Column(pivotColumn), values)
-  }
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset =
+    super.pivot(pivotColumn, values)
+
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String, values: java.util.List[Any]): RelationalGroupedDataset =
+    super.pivot(pivotColumn, values)
 
-  /**
-   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
-   * aggregation.
-   *
-   * There are two versions of pivot function: one that requires the caller to specify the list of
-   * distinct values to pivot on, and one that does not. The latter is more concise but less
-   * efficient, because Spark needs to first compute the list of distinct values internally.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course", Arrays.<Object>asList("dotNET", "Java")).sum("earnings");
-   *
-   *   // Or without specifying column values (less efficient)
-   *   df.groupBy("year").pivot("course").sum("earnings");
-   * }}}
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   Name of the column to pivot.
-   * @param values
-   *   List of values that will be translated to columns in the output DataFrame.
-   * @since 3.4.0
-   */
-  def pivot(pivotColumn: String, values: java.util.List[Any]): RelationalGroupedDataset = {
-    pivot(Column(pivotColumn), values)
+  /** @inheritdoc */
+  override def pivot(
+      pivotColumn: Column,
+      values: java.util.List[Any]): RelationalGroupedDataset = {
+    super.pivot(pivotColumn, values)
   }
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation. This is an
-   * overloaded version of the `pivot` method with `pivotColumn` of the `String` type.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
-   * }}}
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   the column to pivot.
-   * @param values
-   *   List of values that will be translated to columns in the output DataFrame.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataset = {
     groupType match {
       case proto.Aggregate.GroupType.GROUP_TYPE_GROUPBY =>
-        val valueExprs = values.map(_ match {
+        val valueExprs = values.map {
           case c: Column if c.expr.hasLiteral => c.expr.getLiteral
           case c: Column if !c.expr.hasLiteral =>
             throw new IllegalArgumentException("values only accept literal Column")
           case v => functions.lit(v).expr.getLiteral
-        })
+        }
         new RelationalGroupedDataset(
           df,
           groupingExprs,
@@ -388,46 +170,8 @@ class RelationalGroupedDataset private[sql] (
     }
   }
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   *
-   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine the
-   * resulting schema of the transformation. To avoid any eager computations, provide an explicit
-   * list of values via `pivot(pivotColumn: Column, values: Seq[Any])`.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy($"year").pivot($"course").sum($"earnings");
-   * }}}
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   he column to pivot.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def pivot(pivotColumn: Column): RelationalGroupedDataset = {
-    pivot(pivotColumn, Seq())
-  }
-
-  /**
-   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
-   * aggregation. This is an overloaded version of the `pivot` method with `pivotColumn` of the
-   * `String` type.
-   *
-   * @see
-   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
-   *   aggregation.
-   *
-   * @param pivotColumn
-   *   the column to pivot.
-   * @param values
-   *   List of values that will be translated to columns in the output DataFrame.
-   * @since 3.4.0
-   */
-  def pivot(pivotColumn: Column, values: java.util.List[Any]): RelationalGroupedDataset = {
-    pivot(pivotColumn, values.asScala.toSeq)
+    pivot(pivotColumn, Nil)
   }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index 7799d395d5c6a..4690253da808b 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -16,283 +16,8 @@
  */
 package org.apache.spark.sql
 
-import scala.collection.Map
-import scala.language.implicitConversions
-import scala.reflect.classTag
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
-
-/**
- * A collection of implicit methods for converting names and Symbols into [[Column]]s, and for
- * converting common Scala objects into [[Dataset]]s.
- *
- * @since 3.4.0
- */
-abstract class SQLImplicits private[sql] (session: SparkSession) extends LowPrioritySQLImplicits {
-
-  /**
-   * Converts $"col name" into a [[Column]].
-   *
-   * @since 3.4.0
-   */
-  implicit class StringToColumn(val sc: StringContext) {
-    def $(args: Any*): ColumnName = {
-      new ColumnName(sc.s(args: _*))
-    }
-  }
-
-  /**
-   * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
-   * @since 3.4.0
-   */
-  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
-
-  /** @since 3.4.0 */
-  implicit val newIntEncoder: Encoder[Int] = PrimitiveIntEncoder
-
-  /** @since 3.4.0 */
-  implicit val newLongEncoder: Encoder[Long] = PrimitiveLongEncoder
-
-  /** @since 3.4.0 */
-  implicit val newDoubleEncoder: Encoder[Double] = PrimitiveDoubleEncoder
-
-  /** @since 3.4.0 */
-  implicit val newFloatEncoder: Encoder[Float] = PrimitiveFloatEncoder
-
-  /** @since 3.4.0 */
-  implicit val newByteEncoder: Encoder[Byte] = PrimitiveByteEncoder
-
-  /** @since 3.4.0 */
-  implicit val newShortEncoder: Encoder[Short] = PrimitiveShortEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBooleanEncoder: Encoder[Boolean] = PrimitiveBooleanEncoder
-
-  /** @since 3.4.0 */
-  implicit val newStringEncoder: Encoder[String] = StringEncoder
-
-  /** @since 3.4.0 */
-  implicit val newJavaDecimalEncoder: Encoder[java.math.BigDecimal] =
-    AgnosticEncoders.DEFAULT_JAVA_DECIMAL_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newScalaDecimalEncoder: Encoder[scala.math.BigDecimal] =
-    AgnosticEncoders.DEFAULT_SCALA_DECIMAL_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newDateEncoder: Encoder[java.sql.Date] = AgnosticEncoders.STRICT_DATE_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newLocalDateEncoder: Encoder[java.time.LocalDate] =
-    AgnosticEncoders.STRICT_LOCAL_DATE_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] =
-    AgnosticEncoders.LocalDateTimeEncoder
-
-  /** @since 3.4.0 */
-  implicit val newTimeStampEncoder: Encoder[java.sql.Timestamp] =
-    AgnosticEncoders.STRICT_TIMESTAMP_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newInstantEncoder: Encoder[java.time.Instant] =
-    AgnosticEncoders.STRICT_INSTANT_ENCODER
-
-  /** @since 3.4.0 */
-  implicit val newDurationEncoder: Encoder[java.time.Duration] = DayTimeIntervalEncoder
-
-  /** @since 3.4.0 */
-  implicit val newPeriodEncoder: Encoder[java.time.Period] = YearMonthIntervalEncoder
-
-  /** @since 3.4.0 */
-  implicit def newJavaEnumEncoder[A <: java.lang.Enum[_]: TypeTag]: Encoder[A] = {
-    ScalaReflection.encoderFor[A]
-  }
-
-  // Boxed primitives
-
-  /** @since 3.4.0 */
-  implicit val newBoxedIntEncoder: Encoder[java.lang.Integer] = BoxedIntEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedLongEncoder: Encoder[java.lang.Long] = BoxedLongEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedDoubleEncoder: Encoder[java.lang.Double] = BoxedDoubleEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedFloatEncoder: Encoder[java.lang.Float] = BoxedFloatEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedByteEncoder: Encoder[java.lang.Byte] = BoxedByteEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedShortEncoder: Encoder[java.lang.Short] = BoxedShortEncoder
-
-  /** @since 3.4.0 */
-  implicit val newBoxedBooleanEncoder: Encoder[java.lang.Boolean] = BoxedBooleanEncoder
-
-  // Seqs
-  private def newSeqEncoder[E](elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Seq[E]] = {
-    IterableEncoder(
-      classTag[Seq[E]],
-      elementEncoder,
-      elementEncoder.nullable,
-      elementEncoder.lenientSerialization)
-  }
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newIntSeqEncoder: Encoder[Seq[Int]] = newSeqEncoder(PrimitiveIntEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newLongSeqEncoder: Encoder[Seq[Long]] = newSeqEncoder(PrimitiveLongEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newDoubleSeqEncoder: Encoder[Seq[Double]] = newSeqEncoder(PrimitiveDoubleEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newFloatSeqEncoder: Encoder[Seq[Float]] = newSeqEncoder(PrimitiveFloatEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newByteSeqEncoder: Encoder[Seq[Byte]] = newSeqEncoder(PrimitiveByteEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newShortSeqEncoder: Encoder[Seq[Short]] = newSeqEncoder(PrimitiveShortEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newBooleanSeqEncoder: Encoder[Seq[Boolean]] = newSeqEncoder(PrimitiveBooleanEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  val newStringSeqEncoder: Encoder[Seq[String]] = newSeqEncoder(StringEncoder)
-
-  /**
-   * @since 3.4.0
-   * @deprecated
-   *   use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newProductSeqEncoder[A <: Product: TypeTag]: Encoder[Seq[A]] =
-    newSeqEncoder(ScalaReflection.encoderFor[A])
-
-  /** @since 3.4.0 */
-  implicit def newSequenceEncoder[T <: Seq[_]: TypeTag]: Encoder[T] =
-    ScalaReflection.encoderFor[T]
-
-  // Maps
-  /** @since 3.4.0 */
-  implicit def newMapEncoder[T <: Map[_, _]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
-
-  /**
-   * Notice that we serialize `Set` to Catalyst array. The set property is only kept when
-   * manipulating the domain objects. The serialization format doesn't keep the set property. When
-   * we have a Catalyst array which contains duplicated elements and convert it to
-   * `Dataset[Set[T]]` by using the encoder, the elements will be de-duplicated.
-   *
-   * @since 3.4.0
-   */
-  implicit def newSetEncoder[T <: Set[_]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
-
-  // Arrays
-  private def newArrayEncoder[E](
-      elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Array[E]] = {
-    ArrayEncoder(elementEncoder, elementEncoder.nullable)
-  }
-
-  /** @since 3.4.0 */
-  implicit val newIntArrayEncoder: Encoder[Array[Int]] = newArrayEncoder(PrimitiveIntEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newLongArrayEncoder: Encoder[Array[Long]] = newArrayEncoder(PrimitiveLongEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newDoubleArrayEncoder: Encoder[Array[Double]] =
-    newArrayEncoder(PrimitiveDoubleEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newFloatArrayEncoder: Encoder[Array[Float]] = newArrayEncoder(
-    PrimitiveFloatEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newByteArrayEncoder: Encoder[Array[Byte]] = BinaryEncoder
-
-  /** @since 3.4.0 */
-  implicit val newShortArrayEncoder: Encoder[Array[Short]] = newArrayEncoder(
-    PrimitiveShortEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newBooleanArrayEncoder: Encoder[Array[Boolean]] =
-    newArrayEncoder(PrimitiveBooleanEncoder)
-
-  /** @since 3.4.0 */
-  implicit val newStringArrayEncoder: Encoder[Array[String]] = newArrayEncoder(StringEncoder)
-
-  /** @since 3.4.0 */
-  implicit def newProductArrayEncoder[A <: Product: TypeTag]: Encoder[Array[A]] = {
-    newArrayEncoder(ScalaReflection.encoderFor[A])
-  }
-
-  /**
-   * Creates a [[Dataset]] from a local Seq.
-   * @since 3.4.0
-   */
-  implicit def localSeqToDatasetHolder[T: Encoder](s: Seq[T]): DatasetHolder[T] = {
-    DatasetHolder(session.createDataset(s))
-  }
-}
-
-/**
- * Lower priority implicit methods for converting Scala objects into [[Dataset]]s. Conflicting
- * implicits are placed here to disambiguate resolution.
- *
- * Reasons for including specific implicits: newProductEncoder - to disambiguate for `List`s which
- * are both `Seq` and `Product`
- */
-trait LowPrioritySQLImplicits {
-
-  /** @since 3.4.0 */
-  implicit def newProductEncoder[T <: Product: TypeTag]: Encoder[T] =
-    ScalaReflection.encoderFor[T]
+/** @inheritdoc */
+abstract class SQLImplicits private[sql] (override val session: SparkSession)
+    extends api.SQLImplicits {
+  type DS[U] = Dataset[U]
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 2e54617928aad..b74d0c2ff2243 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -16,35 +16,42 @@
  */
 package org.apache.spark.sql
 
-import java.io.Closeable
 import java.net.URI
+import java.nio.file.{Files, Paths}
 import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.TimeUnit._
-import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
+import java.util.concurrent.atomic.AtomicLong
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.runtime.universe.TypeTag
+import scala.util.Try
 
 import com.google.common.cache.{CacheBuilder, CacheLoader}
 import io.grpc.ClientInterceptor
 import org.apache.arrow.memory.RootAllocator
 
+import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.annotation.{DeveloperApi, Experimental, Since}
+import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.internal.Logging
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
 import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BoxedLongEncoder, UnboundRowEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, BoxedLongEncoder, UnboundRowEncoder}
+import org.apache.spark.sql.connect.ConnectClientUnsupportedErrors
 import org.apache.spark.sql.connect.client.{ClassFinder, CloseableIterator, SparkConnectClient, SparkResult}
 import org.apache.spark.sql.connect.client.SparkConnectClient.Configuration
 import org.apache.spark.sql.connect.client.arrow.ArrowSerializer
 import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.{CatalogImpl, SessionCleaner, SqlApiConf}
+import org.apache.spark.sql.internal.{CatalogImpl, ConnectRuntimeConfig, SessionCleaner, SessionState, SharedState, SqlApiConf}
+import org.apache.spark.sql.internal.ColumnNodeToProtoConverter.{toExpr, toTypedExpr}
+import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming.DataStreamReader
 import org.apache.spark.sql.streaming.StreamingQueryManager
 import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ExecutionListenerManager
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -68,8 +75,7 @@ import org.apache.spark.util.ArrayImplicits._
 class SparkSession private[sql] (
     private[sql] val client: SparkConnectClient,
     private val planIdGenerator: AtomicLong)
-    extends Serializable
-    with Closeable
+    extends api.SparkSession
     with Logging {
 
   private[this] val allocator = new RootAllocator()
@@ -84,50 +90,22 @@ class SparkSession private[sql] (
 
   private[sql] val observationRegistry = new ConcurrentHashMap[Long, Observation]()
 
-  private[sql] def hijackServerSideSessionIdForTesting(suffix: String) = {
+  private[sql] def hijackServerSideSessionIdForTesting(suffix: String): Unit = {
     client.hijackServerSideSessionIdForTesting(suffix)
   }
 
-  /**
-   * Runtime configuration interface for Spark.
-   *
-   * This is the interface through which the user can get and set all Spark configurations that
-   * are relevant to Spark SQL. When getting the value of a config, his defaults to the value set
-   * in server, if any.
-   *
-   * @since 3.4.0
-   */
-  val conf: RuntimeConfig = new RuntimeConfig(client)
+  /** @inheritdoc */
+  override def sparkContext: SparkContext =
+    throw ConnectClientUnsupportedErrors.sparkContext()
 
-  /**
-   * Executes some code block and prints to stdout the time taken to execute the block. This is
-   * available in Scala only and is used primarily for interactive testing and debugging.
-   *
-   * @since 3.4.0
-   */
-  def time[T](f: => T): T = {
-    val start = System.nanoTime()
-    val ret = f
-    val end = System.nanoTime()
-    // scalastyle:off println
-    println(s"Time taken: ${NANOSECONDS.toMillis(end - start)} ms")
-    // scalastyle:on println
-    ret
-  }
+  /** @inheritdoc */
+  val conf: RuntimeConfig = new ConnectRuntimeConfig(client)
 
-  /**
-   * Returns a `DataFrame` with no rows or columns.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @transient
   val emptyDataFrame: DataFrame = emptyDataset(UnboundRowEncoder)
 
-  /**
-   * Creates a new [[Dataset]] of type T containing zero elements.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def emptyDataset[T: Encoder]: Dataset[T] = createDataset[T](Nil)
 
   private def createDataset[T](encoder: AgnosticEncoder[T], data: Iterator[T]): Dataset[T] = {
@@ -150,113 +128,120 @@ class SparkSession private[sql] (
     }
   }
 
-  /**
-   * Creates a `DataFrame` from a local Seq of Product.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def createDataFrame[A <: Product: TypeTag](data: Seq[A]): DataFrame = {
     createDataset(ScalaReflection.encoderFor[A], data.iterator).toDF()
   }
 
-  /**
-   * :: DeveloperApi :: Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using
-   * the given schema. It is important to make sure that the structure of every [[Row]] of the
-   * provided List matches the provided schema. Otherwise, there will be runtime exception.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = {
     createDataset(RowEncoder.encoderFor(schema), rows.iterator().asScala).toDF()
   }
 
-  /**
-   * Applies a schema to a List of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
-   * will return the columns in an undefined order.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = {
     val encoder = JavaTypeInference.encoderFor(beanClass.asInstanceOf[Class[Any]])
     createDataset(encoder, data.iterator().asScala).toDF()
   }
 
-  /**
-   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
-   * representation) that is generally created automatically through implicits from a
-   * `SparkSession`, or can be created explicitly by calling static methods on [[Encoders]].
-   *
-   * ==Example==
-   *
-   * {{{
-   *
-   *   import spark.implicits._
-   *   case class Person(name: String, age: Long)
-   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
-   *   val ds = spark.createDataset(data)
-   *
-   *   ds.show()
-   *   // +-------+---+
-   *   // |   name|age|
-   *   // +-------+---+
-   *   // |Michael| 29|
-   *   // |   Andy| 30|
-   *   // | Justin| 19|
-   *   // +-------+---+
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def createDataset[T: Encoder](data: Seq[T]): Dataset[T] = {
-    createDataset(encoderFor[T], data.iterator)
+    createDataset(agnosticEncoderFor[T], data.iterator)
   }
 
-  /**
-   * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
-   * representation) that is generally created automatically through implicits from a
-   * `SparkSession`, or can be created explicitly by calling static methods on [[Encoders]].
-   *
-   * ==Java Example==
-   *
-   * {{{
-   *     List<String> data = Arrays.asList("hello", "world");
-   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def createDataset[T: Encoder](data: java.util.List[T]): Dataset[T] = {
     createDataset(data.asScala.toSeq)
   }
 
-  /**
-   * Executes a SQL query substituting positional parameters by the given arguments, returning the
-   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText
-   *   A SQL statement with positional parameters to execute.
-   * @param args
-   *   An array of Java/Scala objects that can be converted to SQL literal expressions. See <a
-   *   href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html"> Supported Data
-   *   Types</a> for supported value types in Scala/Java. For example: 1, "Steven",
-   *   LocalDate.of(2023, 4, 2). A value can be also a `Column` of a literal or collection
-   *   constructor functions such as `map()`, `array()`, `struct()`, in that case it is taken as
-   *   is.
-   *
-   * @since 3.5.0
-   */
-  @Experimental
-  def sql(sqlText: String, args: Array[_]): DataFrame = newDataFrame { builder =>
+  /** @inheritdoc */
+  override def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): DataFrame =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def createDataset[T: Encoder](data: RDD[T]): Dataset[T] =
+    throw ConnectClientUnsupportedErrors.rdd()
+
+  /** @inheritdoc */
+  override def sharedState: SharedState =
+    throw ConnectClientUnsupportedErrors.sharedState()
+
+  /** @inheritdoc */
+  override def sessionState: SessionState =
+    throw ConnectClientUnsupportedErrors.sessionState()
+
+  /** @inheritdoc */
+  override def sqlContext: SQLContext =
+    throw ConnectClientUnsupportedErrors.sqlContext()
+
+  /** @inheritdoc */
+  override def listenerManager: ExecutionListenerManager =
+    throw ConnectClientUnsupportedErrors.listenerManager()
+
+  /** @inheritdoc */
+  override def experimental: ExperimentalMethods =
+    throw ConnectClientUnsupportedErrors.experimental()
+
+  /** @inheritdoc */
+  override def baseRelationToDataFrame(baseRelation: BaseRelation): api.Dataset[Row] =
+    throw ConnectClientUnsupportedErrors.baseRelationToDataFrame()
+
+  /** @inheritdoc */
+  override def executeCommand(
+      runner: String,
+      command: String,
+      options: Map[String, String]): DataFrame =
+    throw ConnectClientUnsupportedErrors.executeCommand()
+
+  /** @inheritdoc */
+  def sql(sqlText: String, args: Array[_]): DataFrame = {
+    val sqlCommand = proto.SqlCommand
+      .newBuilder()
+      .setSql(sqlText)
+      .addAllPosArguments(args.map(lit(_).expr).toImmutableArraySeq.asJava)
+      .build()
+    sql(sqlCommand)
+  }
+
+  /** @inheritdoc */
+  def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
+    sql(sqlText, args.asJava)
+  }
+
+  /** @inheritdoc */
+  override def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
+    val sqlCommand = proto.SqlCommand
+      .newBuilder()
+      .setSql(sqlText)
+      .putAllNamedArguments(args.asScala.map { case (k, v) => (k, lit(v).expr) }.asJava)
+      .build()
+    sql(sqlCommand)
+  }
+
+  /** @inheritdoc */
+  override def sql(query: String): DataFrame = {
+    sql(query, Array.empty)
+  }
+
+  private def sql(sqlCommand: proto.SqlCommand): DataFrame = newDataFrame { builder =>
     // Send the SQL once to the server and then check the output.
-    val cmd = newCommand(b =>
-      b.setSqlCommand(
-        proto.SqlCommand
-          .newBuilder()
-          .setSql(sqlText)
-          .addAllPosArguments(args.map(lit(_).expr).toImmutableArraySeq.asJava)))
+    val cmd = newCommand(b => b.setSqlCommand(sqlCommand))
     val plan = proto.Plan.newBuilder().setCommand(cmd)
     val responseIter = client.execute(plan.build())
 
@@ -272,206 +257,53 @@ class SparkSession private[sql] (
     }
   }
 
-  /**
-   * Executes a SQL query substituting named parameters by the given arguments, returning the
-   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText
-   *   A SQL statement with named parameters to execute.
-   * @param args
-   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
-   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
-   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
-   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
-   *   can be also a `Column` of a literal or collection constructor functions such as `map()`,
-   *   `array()`, `struct()`, in that case it is taken as is.
-   *
-   * @since 3.4.0
-   */
-  @Experimental
-  def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
-    sql(sqlText, args.asJava)
-  }
-
-  /**
-   * Executes a SQL query substituting named parameters by the given arguments, returning the
-   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText
-   *   A SQL statement with named parameters to execute.
-   * @param args
-   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
-   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
-   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
-   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
-   *   can be also a `Column` of a literal or collection constructor functions such as `map()`,
-   *   `array()`, `struct()`, in that case it is taken as is.
-   *
-   * @since 3.4.0
-   */
-  @Experimental
-  def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = newDataFrame {
-    builder =>
-      // Send the SQL once to the server and then check the output.
-      val cmd = newCommand(b =>
-        b.setSqlCommand(
-          proto.SqlCommand
-            .newBuilder()
-            .setSql(sqlText)
-            .putAllNamedArguments(args.asScala.map { case (k, v) => (k, lit(v).expr) }.asJava)))
-      val plan = proto.Plan.newBuilder().setCommand(cmd)
-      val responseIter = client.execute(plan.build())
-
-      try {
-        val response = responseIter
-          .find(_.hasSqlCommandResult)
-          .getOrElse(throw new RuntimeException("SQLCommandResult must be present"))
-        // Update the builder with the values from the result.
-        builder.mergeFrom(response.getSqlCommandResult.getRelation)
-      } finally {
-        // consume the rest of the iterator
-        responseIter.foreach(_ => ())
-      }
-  }
-
-  /**
-   * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
-   * runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @since 3.4.0
-   */
-  def sql(query: String): DataFrame = {
-    sql(query, Array.empty)
-  }
-
-  /**
-   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
-   * `DataFrame`.
-   * {{{
-   *   sparkSession.read.parquet("/path/to/file.parquet")
-   *   sparkSession.read.schema(schema).json("/path/to/file.json")
-   * }}}
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def read: DataFrameReader = new DataFrameReader(this)
 
-  /**
-   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
-   * {{{
-   *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
-   *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
-   * }}}
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def readStream: DataStreamReader = new DataStreamReader(this)
 
+  /** @inheritdoc */
+  def tvf: TableValuedFunction = new TableValuedFunction(this)
+
+  /** @inheritdoc */
   lazy val streams: StreamingQueryManager = new StreamingQueryManager(this)
 
-  /**
-   * Interface through which the user may create, drop, alter or query underlying databases,
-   * tables, functions etc.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   lazy val catalog: Catalog = new CatalogImpl(this)
 
-  /**
-   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
-   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
-   * view, the returned DataFrame is simply the query plan of the view, which can either be a
-   * batch or streaming query plan.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table or view. If a database is
-   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
-   *   find a temporary view with the given name and then match the table/view from the current
-   *   database. Note that, the global temporary view database is also valid here.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
     read.table(tableName)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
-   * range from 0 to `end` (exclusive) with step value 1.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def range(end: Long): Dataset[java.lang.Long] = range(0, end)
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
-   * range from `start` to `end` (exclusive) with step value 1.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long): Dataset[java.lang.Long] = {
     range(start, end, step = 1)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
-   * range from `start` to `end` (exclusive) with a step value.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
     range(start, end, step, None)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
-   * range from `start` to `end` (exclusive) with a step value, with partition number specified.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
     range(start, end, step, Option(numPartitions))
   }
 
-  /**
-   * A collection of methods for registering user-defined functions (UDF).
-   *
-   * The following example registers a Scala closure as UDF:
-   * {{{
-   *   sparkSession.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
-   * }}}
-   *
-   * The following example registers a UDF in Java:
-   * {{{
-   *   sparkSession.udf().register("myUDF",
-   *       (Integer arg1, String arg2) -> arg2 + arg1,
-   *       DataTypes.StringType);
-   * }}}
-   *
-   * @note
-   *   The user-defined functions must be deterministic. Due to optimization, duplicate
-   *   invocations may be eliminated or the function may even be invoked more times than it is
-   *   present in the query.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   lazy val udf: UDFRegistration = new UDFRegistration(this)
 
   // scalastyle:off
-  // Disable style checker so "implicits" object can start with lowercase i
-  /**
-   * (Scala-specific) Implicit methods available in Scala for converting common names and Symbols
-   * into [[Column]]s, and for converting common Scala objects into DataFrame`s.
-   *
-   * {{{
-   *   val sparkSession = SparkSession.builder.getOrCreate()
-   *   import sparkSession.implicits._
-   * }}}
-   *
-   * @since 3.4.0
-   */
-  object implicits extends SQLImplicits(this) with Serializable
+  /** @inheritdoc */
+  object implicits extends SQLImplicits(this)
   // scalastyle:on
 
+  /** @inheritdoc */
   def newSession(): SparkSession = {
     SparkSession.builder().client(client.copy()).create()
   }
@@ -574,77 +406,30 @@ class SparkSession private[sql] (
     execute(command)
   }
 
-  /**
-   * Add a single artifact to the client session.
-   *
-   * Currently only local files with extensions .jar and .class are supported.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @Experimental
-  def addArtifact(path: String): Unit = client.addArtifact(path)
+  override def addArtifact(path: String): Unit = client.addArtifact(path)
 
-  /**
-   * Add a single artifact to the client session.
-   *
-   * Currently it supports local files with extensions .jar and .class and Apache Ivy URIs
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @Experimental
-  def addArtifact(uri: URI): Unit = client.addArtifact(uri)
+  override def addArtifact(uri: URI): Unit = client.addArtifact(uri)
 
-  /**
-   * Add a single in-memory artifact to the session while preserving the directory structure
-   * specified by `target` under the session's working directory of that particular file
-   * extension.
-   *
-   * Supported target file extensions are .jar and .class.
-   *
-   * ==Example==
-   * {{{
-   *  addArtifact(bytesBar, "foo/bar.class")
-   *  addArtifact(bytesFlat, "flat.class")
-   *  // Directory structure of the session's working directory for class files would look like:
-   *  // ${WORKING_DIR_FOR_CLASS_FILES}/flat.class
-   *  // ${WORKING_DIR_FOR_CLASS_FILES}/foo/bar.class
-   * }}}
-   *
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @Experimental
-  def addArtifact(bytes: Array[Byte], target: String): Unit = client.addArtifact(bytes, target)
+  override def addArtifact(bytes: Array[Byte], target: String): Unit = {
+    client.addArtifact(bytes, target)
+  }
 
-  /**
-   * Add a single artifact to the session while preserving the directory structure specified by
-   * `target` under the session's working directory of that particular file extension.
-   *
-   * Supported target file extensions are .jar and .class.
-   *
-   * ==Example==
-   * {{{
-   *  addArtifact("/Users/dummyUser/files/foo/bar.class", "foo/bar.class")
-   *  addArtifact("/Users/dummyUser/files/flat.class", "flat.class")
-   *  // Directory structure of the session's working directory for class files would look like:
-   *  // ${WORKING_DIR_FOR_CLASS_FILES}/flat.class
-   *  // ${WORKING_DIR_FOR_CLASS_FILES}/foo/bar.class
-   * }}}
-   *
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @Experimental
-  def addArtifact(source: String, target: String): Unit = client.addArtifact(source, target)
+  override def addArtifact(source: String, target: String): Unit = {
+    client.addArtifact(source, target)
+  }
 
-  /**
-   * Add one or more artifacts to the session.
-   *
-   * Currently it supports local files with extensions .jar and .class and Apache Ivy URIs
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @Experimental
   @scala.annotation.varargs
-  def addArtifacts(uri: URI*): Unit = client.addArtifacts(uri)
+  override def addArtifacts(uri: URI*): Unit = client.addArtifacts(uri)
 
   /**
    * Register a ClassFinder for dynamically generated classes.
@@ -671,7 +456,7 @@ class SparkSession private[sql] (
    *
    * @since 3.5.0
    */
-  def interruptAll(): Seq[String] = {
+  override def interruptAll(): Seq[String] = {
     client.interruptAll().getInterruptedIdsList.asScala.toSeq
   }
 
@@ -684,7 +469,7 @@ class SparkSession private[sql] (
    *
    * @since 3.5.0
    */
-  def interruptTag(tag: String): Seq[String] = {
+  override def interruptTag(tag: String): Seq[String] = {
     client.interruptTag(tag).getInterruptedIdsList.asScala.toSeq
   }
 
@@ -697,17 +482,10 @@ class SparkSession private[sql] (
    *
    * @since 3.5.0
    */
-  def interruptOperation(operationId: String): Seq[String] = {
+  override def interruptOperation(operationId: String): Seq[String] = {
     client.interruptOperation(operationId).getInterruptedIdsList.asScala.toSeq
   }
 
-  /**
-   * Synonym for `close()`.
-   *
-   * @since 3.4.0
-   */
-  def stop(): Unit = close()
-
   /**
    * Close the [[SparkSession]].
    *
@@ -735,65 +513,17 @@ class SparkSession private[sql] (
     SparkSession.onSessionClose(this)
   }
 
-  /**
-   * Add a tag to be assigned to all the operations started by this thread in this session.
-   *
-   * Often, a unit of execution in an application consists of multiple Spark executions.
-   * Application programmers can use this method to group all those jobs together and give a group
-   * tag. The application can use `org.apache.spark.sql.SparkSession.interruptTag` to cancel all
-   * running running executions with this tag. For example:
-   * {{{
-   * // In the main thread:
-   * spark.addTag("myjobs")
-   * spark.range(10).map(i => { Thread.sleep(10); i }).collect()
-   *
-   * // In a separate thread:
-   * spark.interruptTag("myjobs")
-   * }}}
-   *
-   * There may be multiple tags present at the same time, so different parts of application may
-   * use different tags to perform cancellation at different levels of granularity.
-   *
-   * @param tag
-   *   The tag to be added. Cannot contain ',' (comma) character or be an empty string.
-   *
-   * @since 3.5.0
-   */
-  def addTag(tag: String): Unit = {
-    client.addTag(tag)
-  }
+  /** @inheritdoc */
+  override def addTag(tag: String): Unit = client.addTag(tag)
 
-  /**
-   * Remove a tag previously added to be assigned to all the operations started by this thread in
-   * this session. Noop if such a tag was not added earlier.
-   *
-   * @param tag
-   *   The tag to be removed. Cannot contain ',' (comma) character or be an empty string.
-   *
-   * @since 3.5.0
-   */
-  def removeTag(tag: String): Unit = {
-    client.removeTag(tag)
-  }
+  /** @inheritdoc */
+  override def removeTag(tag: String): Unit = client.removeTag(tag)
 
-  /**
-   * Get the tags that are currently set to be assigned to all the operations started by this
-   * thread.
-   *
-   * @since 3.5.0
-   */
-  def getTags(): Set[String] = {
-    client.getTags()
-  }
+  /** @inheritdoc */
+  override def getTags(): Set[String] = client.getTags()
 
-  /**
-   * Clear the current thread's operation tags.
-   *
-   * @since 3.5.0
-   */
-  def clearTags(): Unit = {
-    client.clearTags()
-  }
+  /** @inheritdoc */
+  override def clearTags(): Unit = client.clearTags()
 
   /**
    * We cannot deserialize a connect [[SparkSession]] because of a class clash on the server side.
@@ -807,26 +537,36 @@ class SparkSession private[sql] (
   private[sql] var releaseSessionOnClose = true
 
   private[sql] def registerObservation(planId: Long, observation: Observation): Unit = {
-    if (observationRegistry.putIfAbsent(planId, observation) != null) {
-      throw new IllegalArgumentException("An Observation can be used with a Dataset only once")
-    }
+    observation.markRegistered()
+    observationRegistry.putIfAbsent(planId, observation)
   }
 
-  private[sql] def setMetricsAndUnregisterObservation(
-      planId: Long,
-      metrics: Map[String, Any]): Unit = {
+  private[sql] def setMetricsAndUnregisterObservation(planId: Long, metrics: Row): Unit = {
     val observationOrNull = observationRegistry.remove(planId)
     if (observationOrNull != null) {
-      observationOrNull.setMetricsAndNotify(Some(metrics))
+      observationOrNull.setMetricsAndNotify(metrics)
     }
   }
+
+  override private[sql] def isUsable: Boolean = client.isSessionValid
+
+  implicit class RichColumn(c: Column) {
+    def expr: proto.Expression = toExpr(c)
+    def typedExpr[T](e: Encoder[T]): proto.Expression = toTypedExpr(c, e)
+  }
 }
 
 // The minimal builder needed to create a spark session.
 // TODO: implements all methods mentioned in the scaladoc of [[SparkSession]]
-object SparkSession extends Logging {
+object SparkSession extends api.BaseSparkSessionCompanion with Logging {
+  override private[sql] type Session = SparkSession
+
   private val MAX_CACHED_SESSIONS = 100
   private val planIdGenerator = new AtomicLong
+  private var server: Option[Process] = None
+  private[sql] val sparkOptions = sys.props.filter { p =>
+    p._1.startsWith("spark.") && p._2.nonEmpty
+  }.toMap
 
   private val sessions = CacheBuilder
     .newBuilder()
@@ -836,27 +576,49 @@ object SparkSession extends Logging {
       override def load(c: Configuration): SparkSession = create(c)
     })
 
-  /** The active SparkSession for the current thread. */
-  private val activeThreadSession = new InheritableThreadLocal[SparkSession]
+  /**
+   * Create a new Spark Connect server to connect locally.
+   */
+  private[sql] def withLocalConnectServer[T](f: => T): T = {
+    synchronized {
+      val remoteString = sparkOptions
+        .get("spark.remote")
+        .orElse(Option(System.getProperty("spark.remote"))) // Set from Spark Submit
+        .orElse(sys.env.get(SparkConnectClient.SPARK_REMOTE))
+
+      val maybeConnectScript =
+        Option(System.getenv("SPARK_HOME")).map(Paths.get(_, "sbin", "start-connect-server.sh"))
+
+      if (server.isEmpty &&
+        remoteString.exists(_.startsWith("local")) &&
+        maybeConnectScript.exists(Files.exists(_))) {
+        server = Some {
+          val args =
+            Seq(maybeConnectScript.get.toString, "--master", remoteString.get) ++ sparkOptions
+              .filter(p => !p._1.startsWith("spark.remote"))
+              .flatMap { case (k, v) => Seq("--conf", s"$k=$v") }
+          val pb = new ProcessBuilder(args: _*)
+          // So don't exclude spark-sql jar in classpath
+          pb.environment().remove(SparkConnectClient.SPARK_REMOTE)
+          pb.start()
+        }
 
-  /** Reference to the root SparkSession. */
-  private val defaultSession = new AtomicReference[SparkSession]
+        // Let the server start. We will directly request to set the configurations
+        // and this sleep makes less noisy with retries.
+        Thread.sleep(2000L)
+        System.setProperty("spark.remote", "sc://localhost")
 
-  /**
-   * Set the (global) default [[SparkSession]], and (thread-local) active [[SparkSession]] when
-   * they are not set yet or the associated [[SparkConnectClient]] is unusable.
-   */
-  private def setDefaultAndActiveSession(session: SparkSession): Unit = {
-    val currentDefault = defaultSession.getAcquire
-    if (currentDefault == null || !currentDefault.client.isSessionValid) {
-      // Update `defaultSession` if it is null or the contained session is not valid. There is a
-      // chance that the following `compareAndSet` fails if a new default session has just been set,
-      // but that does not matter since that event has happened after this method was invoked.
-      defaultSession.compareAndSet(currentDefault, session)
-    }
-    if (getActiveSession.isEmpty) {
-      setActiveSession(session)
+        // scalastyle:off runtimeaddshutdownhook
+        Runtime.getRuntime.addShutdownHook(new Thread() {
+          override def run(): Unit = if (server.isDefined) {
+            new ProcessBuilder(maybeConnectScript.get.toString)
+              .start()
+          }
+        })
+        // scalastyle:on runtimeaddshutdownhook
+      }
     }
+    f
   }
 
   /**
@@ -866,17 +628,6 @@ object SparkSession extends Logging {
     new SparkSession(configuration.toSparkConnectClient, planIdGenerator)
   }
 
-  /**
-   * Hook called when a session is closed.
-   */
-  private[sql] def onSessionClose(session: SparkSession): Unit = {
-    sessions.invalidate(session.client.configuration)
-    defaultSession.compareAndSet(session, null)
-    if (getActiveSession.contains(session)) {
-      clearActiveSession()
-    }
-  }
-
   /**
    * Creates a [[SparkSession.Builder]] for constructing a [[SparkSession]].
    *
@@ -884,15 +635,15 @@ object SparkSession extends Logging {
    */
   def builder(): Builder = new Builder()
 
-  class Builder() extends Logging {
+  class Builder() extends api.SparkSessionBuilder {
     // Initialize the connection string of the Spark Connect client builder from SPARK_REMOTE
     // by default, if it exists. The connection string can be overridden using
     // the remote() function, as it takes precedence over the SPARK_REMOTE environment variable.
     private val builder = SparkConnectClient.builder().loadFromEnvironment()
     private var client: SparkConnectClient = _
-    private[this] val options = new scala.collection.mutable.HashMap[String, String]
 
-    def remote(connectionString: String): Builder = {
+    /** @inheritdoc */
+    def remote(connectionString: String): this.type = {
       builder.connectionString(connectionString)
       this
     }
@@ -904,93 +655,52 @@ object SparkSession extends Logging {
      *
      * @since 3.5.0
      */
-    def interceptor(interceptor: ClientInterceptor): Builder = {
+    def interceptor(interceptor: ClientInterceptor): this.type = {
       builder.interceptor(interceptor)
       this
     }
 
-    private[sql] def client(client: SparkConnectClient): Builder = {
+    private[sql] def client(client: SparkConnectClient): this.type = {
       this.client = client
       this
     }
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to the
-     * Spark Connect session. Only runtime options are supported.
-     *
-     * @since 3.5.0
-     */
-    def config(key: String, value: String): Builder = synchronized {
-      options += key -> value
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: String): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to the
-     * Spark Connect session. Only runtime options are supported.
-     *
-     * @since 3.5.0
-     */
-    def config(key: String, value: Long): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Long): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to the
-     * Spark Connect session. Only runtime options are supported.
-     *
-     * @since 3.5.0
-     */
-    def config(key: String, value: Double): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Double): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to the
-     * Spark Connect session. Only runtime options are supported.
-     *
-     * @since 3.5.0
-     */
-    def config(key: String, value: Boolean): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Boolean): this.type = super.config(key, value)
 
-    /**
-     * Sets a config a map of options. Options set using this method are automatically propagated
-     * to the Spark Connect session. Only runtime options are supported.
-     *
-     * @since 3.5.0
-     */
-    def config(map: Map[String, Any]): Builder = synchronized {
-      map.foreach { kv: (String, Any) =>
-        {
-          options += kv._1 -> kv._2.toString
-        }
-      }
-      this
-    }
+    /** @inheritdoc */
+    override def config(map: Map[String, Any]): this.type = super.config(map)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to both
-     * `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 3.5.0
-     */
-    def config(map: java.util.Map[String, Any]): Builder = synchronized {
-      config(map.asScala.toMap)
-    }
+    /** @inheritdoc */
+    override def config(map: java.util.Map[String, Any]): this.type = super.config(map)
 
+    /** @inheritdoc */
+    override def config(conf: SparkConf): Builder.this.type = super.config(conf)
+
+    /** @inheritdoc */
     @deprecated("enableHiveSupport does not work in Spark Connect")
-    def enableHiveSupport(): Builder = this
+    override def enableHiveSupport(): this.type = this
 
+    /** @inheritdoc */
     @deprecated("master does not work in Spark Connect, please use remote instead")
-    def master(master: String): Builder = this
+    override def master(master: String): this.type = this
 
+    /** @inheritdoc */
     @deprecated("appName does not work in Spark Connect")
-    def appName(name: String): Builder = this
+    override def appName(name: String): this.type = this
+
+    /** @inheritdoc */
+    @deprecated("withExtensions does not work in Spark Connect")
+    override def withExtensions(f: SparkSessionExtensions => Unit): this.type = this
 
     private def tryCreateSessionFromClient(): Option[SparkSession] = {
       if (client != null && client.isSessionValid) {
@@ -1001,6 +711,16 @@ object SparkSession extends Logging {
     }
 
     private def applyOptions(session: SparkSession): Unit = {
+      // Only attempts to set Spark SQL configurations.
+      // If the configurations are static, it might throw an exception so
+      // simply ignore it for now.
+      sparkOptions
+        .filter { case (k, _) =>
+          k.startsWith("spark.sql.")
+        }
+        .foreach { case (key, value) =>
+          Try(session.conf.set(key, value))
+        }
       options.foreach { case (key, value) =>
         session.conf.set(key, value)
       }
@@ -1023,7 +743,7 @@ object SparkSession extends Logging {
      *
      * @since 3.5.0
      */
-    def create(): SparkSession = {
+    def create(): SparkSession = withLocalConnectServer {
       val session = tryCreateSessionFromClient()
         .getOrElse(SparkSession.this.create(builder.configuration))
       setDefaultAndActiveSession(session)
@@ -1043,7 +763,7 @@ object SparkSession extends Logging {
      *
      * @since 3.5.0
      */
-    def getOrCreate(): SparkSession = {
+    def getOrCreate(): SparkSession = withLocalConnectServer {
       val session = tryCreateSessionFromClient()
         .getOrElse({
           var existingSession = sessions.get(builder.configuration)
@@ -1061,71 +781,12 @@ object SparkSession extends Logging {
     }
   }
 
-  /**
-   * Returns the default SparkSession. If the previously set default SparkSession becomes
-   * unusable, returns None.
-   *
-   * @since 3.5.0
-   */
-  def getDefaultSession: Option[SparkSession] =
-    Option(defaultSession.get()).filter(_.client.isSessionValid)
+  /** @inheritdoc */
+  override def getActiveSession: Option[SparkSession] = super.getActiveSession
 
-  /**
-   * Sets the default SparkSession.
-   *
-   * @since 3.5.0
-   */
-  def setDefaultSession(session: SparkSession): Unit = {
-    defaultSession.set(session)
-  }
+  /** @inheritdoc */
+  override def getDefaultSession: Option[SparkSession] = super.getDefaultSession
 
-  /**
-   * Clears the default SparkSession.
-   *
-   * @since 3.5.0
-   */
-  def clearDefaultSession(): Unit = {
-    defaultSession.set(null)
-  }
-
-  /**
-   * Returns the active SparkSession for the current thread. If the previously set active
-   * SparkSession becomes unusable, returns None.
-   *
-   * @since 3.5.0
-   */
-  def getActiveSession: Option[SparkSession] =
-    Option(activeThreadSession.get()).filter(_.client.isSessionValid)
-
-  /**
-   * Changes the SparkSession that will be returned in this thread and its children when
-   * SparkSession.getOrCreate() is called. This can be used to ensure that a given thread receives
-   * an isolated SparkSession.
-   *
-   * @since 3.5.0
-   */
-  def setActiveSession(session: SparkSession): Unit = {
-    activeThreadSession.set(session)
-  }
-
-  /**
-   * Clears the active SparkSession for current thread.
-   *
-   * @since 3.5.0
-   */
-  def clearActiveSession(): Unit = {
-    activeThreadSession.remove()
-  }
-
-  /**
-   * Returns the currently active SparkSession, otherwise the default one. If there is no default
-   * SparkSession, throws an exception.
-   *
-   * @since 3.5.0
-   */
-  def active: SparkSession = {
-    getActiveSession
-      .orElse(getDefaultSession)
-      .getOrElse(throw new IllegalStateException("No active or default Spark session found"))
-  }
+  /** @inheritdoc */
+  override def active: SparkSession = super.active
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
new file mode 100644
index 0000000000000..4f2687b537862
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.sql.internal.ColumnNodeToProtoConverter.toExpr
+
+class TableValuedFunction(sparkSession: SparkSession) extends api.TableValuedFunction {
+
+  /** @inheritdoc */
+  override def range(end: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(end)
+  }
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end)
+  }
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end, step)
+  }
+
+  /** @inheritdoc */
+  override def range(
+      start: Long,
+      end: Long,
+      step: Long,
+      numPartitions: Int): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end, step, numPartitions)
+  }
+
+  private def fn(name: String, args: Seq[Column]): Dataset[Row] = {
+    sparkSession.newDataFrame { builder =>
+      builder.getUnresolvedTableValuedFunctionBuilder
+        .setFunctionName(name)
+        .addAllArguments(args.map(toExpr).asJava)
+    }
+  }
+
+  /** @inheritdoc */
+  override def explode(collection: Column): Dataset[Row] =
+    fn("explode", Seq(collection))
+
+  /** @inheritdoc */
+  override def explode_outer(collection: Column): Dataset[Row] =
+    fn("explode_outer", Seq(collection))
+
+  /** @inheritdoc */
+  override def inline(input: Column): Dataset[Row] =
+    fn("inline", Seq(input))
+
+  /** @inheritdoc */
+  override def inline_outer(input: Column): Dataset[Row] =
+    fn("inline_outer", Seq(input))
+
+  /** @inheritdoc */
+  override def json_tuple(input: Column, fields: Column*): Dataset[Row] =
+    fn("json_tuple", input +: fields)
+
+  /** @inheritdoc */
+  override def posexplode(collection: Column): Dataset[Row] =
+    fn("posexplode", Seq(collection))
+
+  /** @inheritdoc */
+  override def posexplode_outer(collection: Column): Dataset[Row] =
+    fn("posexplode_outer", Seq(collection))
+
+  /** @inheritdoc */
+  override def stack(n: Column, fields: Column*): Dataset[Row] =
+    fn("stack", n +: fields)
+
+  /** @inheritdoc */
+  override def collations(): Dataset[Row] =
+    fn("collations", Seq.empty)
+
+  /** @inheritdoc */
+  override def sql_keywords(): Dataset[Row] =
+    fn("sql_keywords", Seq.empty)
+
+  /** @inheritdoc */
+  override def variant_explode(input: Column): Dataset[Row] =
+    fn("variant_explode", Seq(input))
+
+  /** @inheritdoc */
+  override def variant_explode_outer(input: Column): Dataset[Row] =
+    fn("variant_explode_outer", Seq(input))
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index 5965a2b7a61de..93d085a25c7b5 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark.sql
 
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
-
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.api.java._
-import org.apache.spark.sql.connect.common.UdfUtils
-import org.apache.spark.sql.expressions.{ScalaUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.expressions.UserDefinedFunction
+import org.apache.spark.sql.internal.UdfToProtoUtils
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -34,1261 +30,19 @@ import org.apache.spark.sql.types.DataType
  *
  * @since 3.5.0
  */
-class UDFRegistration(session: SparkSession) extends Logging {
-
-  /**
-   * Registers a user-defined function (UDF), for a UDF that's already defined using the Dataset
-   * API (i.e. of type UserDefinedFunction). To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`. To change a UDF to nonNullable, call the API
-   * `UserDefinedFunction.asNonNullable()`.
-   *
-   * Example:
-   * {{{
-   *   val foo = udf(() => Math.random())
-   *   spark.udf.register("random", foo.asNondeterministic())
-   *
-   *   val bar = udf(() => "bar")
-   *   spark.udf.register("stringLit", bar.asNonNullable())
-   * }}}
-   *
-   * @param name
-   *   the name of the UDF.
-   * @param udf
-   *   the UDF needs to be registered.
-   * @return
-   *   the registered UDF.
-   *
-   * @since 3.5.0
-   */
-  def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
-    udf.withName(name) match {
-      case scalarUdf: ScalaUserDefinedFunction =>
-        session.registerUdf(scalarUdf.toProto)
-        scalarUdf
-      case other =>
-        throw new UnsupportedOperationException(
-          s"Registering a UDF of type " +
-            s"${other.getClass.getSimpleName} is currently unsupported.")
-    }
-  }
-
-  // scalastyle:off line.size.limit
-
-  /* register 0-22 were generated by this script:
-    (0 to 22).foreach { x =>
-      val params = (1 to x).map(num => s"A$num").mkString(", ")
-      val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
-      println(s"""
-        |/**
-        | * Registers a deterministic Scala closure of $x arguments as user-defined function (UDF).
-        | * @tparam RT return type of UDF.
-        | * @since 3.5.0
-        | */
-        |def register[$typeTags](name: String, func: ($params) => RT): UserDefinedFunction = {
-        |  register(name, functions.udf(func))
-        |}""".stripMargin)
-    }
-   */
-
-  /**
-   * Registers a deterministic Scala closure of 0 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag](name: String, func: () => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(func, typeTag[RT])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 1 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag, A1: TypeTag](name: String, func: (A1) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 2 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](
-      name: String,
-      func: (A1, A2) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 3 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
-      name: String,
-      func: (A1, A2, A3) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(func, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 4 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 5 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 6 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag](name: String, func: (A1, A2, A3, A4, A5, A6) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 7 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 8 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 9 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 10 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 11 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 12 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 13 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13) => RT)
-      : UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 14 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14) => RT)
-      : UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 15 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15) => RT)
-      : UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 16 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16) => RT)
-      : UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 17 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag](
-      name: String,
-      func: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17) => RT)
-      : UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 18 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag,
-      A18: TypeTag](
-      name: String,
-      func: (
-          A1,
-          A2,
-          A3,
-          A4,
-          A5,
-          A6,
-          A7,
-          A8,
-          A9,
-          A10,
-          A11,
-          A12,
-          A13,
-          A14,
-          A15,
-          A16,
-          A17,
-          A18) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17],
-      typeTag[A18])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 19 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag,
-      A18: TypeTag,
-      A19: TypeTag](
-      name: String,
-      func: (
-          A1,
-          A2,
-          A3,
-          A4,
-          A5,
-          A6,
-          A7,
-          A8,
-          A9,
-          A10,
-          A11,
-          A12,
-          A13,
-          A14,
-          A15,
-          A16,
-          A17,
-          A18,
-          A19) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17],
-      typeTag[A18],
-      typeTag[A19])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 20 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag,
-      A18: TypeTag,
-      A19: TypeTag,
-      A20: TypeTag](
-      name: String,
-      func: (
-          A1,
-          A2,
-          A3,
-          A4,
-          A5,
-          A6,
-          A7,
-          A8,
-          A9,
-          A10,
-          A11,
-          A12,
-          A13,
-          A14,
-          A15,
-          A16,
-          A17,
-          A18,
-          A19,
-          A20) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17],
-      typeTag[A18],
-      typeTag[A19],
-      typeTag[A20])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 21 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag,
-      A18: TypeTag,
-      A19: TypeTag,
-      A20: TypeTag,
-      A21: TypeTag](
-      name: String,
-      func: (
-          A1,
-          A2,
-          A3,
-          A4,
-          A5,
-          A6,
-          A7,
-          A8,
-          A9,
-          A10,
-          A11,
-          A12,
-          A13,
-          A14,
-          A15,
-          A16,
-          A17,
-          A18,
-          A19,
-          A20,
-          A21) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17],
-      typeTag[A18],
-      typeTag[A19],
-      typeTag[A20],
-      typeTag[A21])
-    register(name, udf)
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 22 arguments as user-defined function (UDF).
-   * @tparam RT
-   *   return type of UDF.
-   * @since 3.5.0
-   */
-  def register[
-      RT: TypeTag,
-      A1: TypeTag,
-      A2: TypeTag,
-      A3: TypeTag,
-      A4: TypeTag,
-      A5: TypeTag,
-      A6: TypeTag,
-      A7: TypeTag,
-      A8: TypeTag,
-      A9: TypeTag,
-      A10: TypeTag,
-      A11: TypeTag,
-      A12: TypeTag,
-      A13: TypeTag,
-      A14: TypeTag,
-      A15: TypeTag,
-      A16: TypeTag,
-      A17: TypeTag,
-      A18: TypeTag,
-      A19: TypeTag,
-      A20: TypeTag,
-      A21: TypeTag,
-      A22: TypeTag](
-      name: String,
-      func: (
-          A1,
-          A2,
-          A3,
-          A4,
-          A5,
-          A6,
-          A7,
-          A8,
-          A9,
-          A10,
-          A11,
-          A12,
-          A13,
-          A14,
-          A15,
-          A16,
-          A17,
-          A18,
-          A19,
-          A20,
-          A21,
-          A22) => RT): UserDefinedFunction = {
-    val udf = ScalaUserDefinedFunction(
-      func,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10],
-      typeTag[A11],
-      typeTag[A12],
-      typeTag[A13],
-      typeTag[A14],
-      typeTag[A15],
-      typeTag[A16],
-      typeTag[A17],
-      typeTag[A18],
-      typeTag[A19],
-      typeTag[A20],
-      typeTag[A21],
-      typeTag[A22])
-    register(name, udf)
-  }
-
-  //  (0 to 22).foreach { i =>
-  //    val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
-  //    val version = "3.5.0"
-  //    println(s"""
-  //               |/**
-  //               | * Register a deterministic Java UDF$i instance as user-defined function (UDF).
-  //               | * @since $version
-  //               | */
-  //               |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
-  //               |  val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-  //               |  register(name, udf)
-  //               |}""".stripMargin)
-  //  }
-
-  /**
-   * Register a deterministic Java UDF0 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF1 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF2 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF3 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF4 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF5 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF6 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF7 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF8 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF9 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF9[_, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF10 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF10[_, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF11 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF12 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF13 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF14 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF15 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF16 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF17 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF18 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF19 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF20 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
-  }
-
-  /**
-   * Register a deterministic Java UDF21 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
-      name: String,
-      f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
+class UDFRegistration(session: SparkSession) extends api.UDFRegistration {
+  override def registerJava(name: String, className: String, returnDataType: DataType): Unit = {
+    throw new UnsupportedOperationException(
+      "registerJava is currently not supported in Spark Connect.")
   }
 
-  /**
-   * Register a deterministic Java UDF22 instance as user-defined function (UDF).
-   * @since 3.5.0
-   */
-  def register(
+  override protected def register(
       name: String,
-      f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
-      returnType: DataType): Unit = {
-    val udf = ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
-    register(name, udf)
+      udf: UserDefinedFunction,
+      source: String,
+      validateParameterCount: Boolean): UserDefinedFunction = {
+    val named = udf.withName(name)
+    session.registerUdf(UdfToProtoUtils.toProto(named))
+    named
   }
-  // scalastyle:on line.size.limit
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
index 9fd3ae4368f4c..bff6db25a21f2 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/application/ConnectRepl.scala
@@ -23,11 +23,16 @@ import scala.util.control.NonFatal
 
 import ammonite.compiler.CodeClassWrapper
 import ammonite.compiler.iface.CodeWrapper
-import ammonite.util.{Bind, Imports, Name, Util}
+import ammonite.interp.{Interpreter, Watchable}
+import ammonite.main.Defaults
+import ammonite.repl.Repl
+import ammonite.util.{Bind, Imports, Name, PredefInfo, Ref, Res, Util}
+import ammonite.util.Util.newLine
 
+import org.apache.spark.SparkBuildInfo.spark_version
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.SparkSession.withLocalConnectServer
 import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkConnectClientParser}
 
 /**
@@ -37,29 +42,29 @@ import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkConnectClie
 object ConnectRepl {
   private val name = "Spark Connect REPL"
 
-  private val splash =
-    """
-      |Spark session available as 'spark'.
-      |   _____                  __      ______                            __
-      |  / ___/____  ____ ______/ /__   / ____/___  ____  ____  ___  _____/ /_
-      |  \__ \/ __ \/ __ `/ ___/ //_/  / /   / __ \/ __ \/ __ \/ _ \/ ___/ __/
-      | ___/ / /_/ / /_/ / /  / ,<    / /___/ /_/ / / / / / / /  __/ /__/ /_
-      |/____/ .___/\__,_/_/  /_/|_|   \____/\____/_/ /_/_/ /_/\___/\___/\__/
-      |    /_/
-      |""".stripMargin
+  private val splash: String = """Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version %s
+      /_/
+
+Type in expressions to have them evaluated.
+Spark connect server version %s.
+Spark session available as 'spark'.
+   """
 
   def main(args: Array[String]): Unit = doMain(args)
 
+  private var server: Option[Process] = None
+  private val sparkHome = System.getenv("SPARK_HOME")
+
   private[application] def doMain(
       args: Array[String],
       semaphore: Option[Semaphore] = None,
       inputStream: InputStream = System.in,
       outputStream: OutputStream = System.out,
-      errorStream: OutputStream = System.err): Unit = {
-    // For interpreters, structured logging is disabled by default to avoid generating mixed
-    // plain text and structured logs on the same console.
-    Logging.disableStructuredLogging()
-
+      errorStream: OutputStream = System.err): Unit = withLocalConnectServer {
     // Build the client.
     val client =
       try {
@@ -97,14 +102,65 @@ object ConnectRepl {
         |""".stripMargin
     // Please note that we make ammonite generate classes instead of objects.
     // Classes tend to have superior serialization behavior when using UDFs.
-    val main = ammonite.Main(
-      welcomeBanner = Option(splash),
+    val main = new ammonite.Main(
+      welcomeBanner = Option(splash.format(spark_version, spark.version)),
       predefCode = predefCode,
       replCodeWrapper = ExtendedCodeClassWrapper,
       scriptCodeWrapper = ExtendedCodeClassWrapper,
       inputStream = inputStream,
       outputStream = outputStream,
-      errorStream = errorStream)
+      errorStream = errorStream) {
+
+      override def instantiateRepl(replArgs: IndexedSeq[Bind[_]] = Vector.empty)
+          : Either[(Res.Failure, Seq[(Watchable.Path, Long)]), Repl] = {
+        loadedPredefFile.map { predefFileInfoOpt =>
+          val augmentedImports =
+            if (defaultPredef) Defaults.replImports ++ Interpreter.predefImports
+            else Imports()
+
+          val argString = replArgs.zipWithIndex
+            .map { case (b, idx) =>
+              s"""
+        val ${b.name} = ammonite
+          .repl
+          .ReplBridge
+          .value
+          .Internal
+          .replArgs($idx)
+          .value
+          .asInstanceOf[${b.typeName.value}]
+        """
+            }
+            .mkString(newLine)
+
+          new Repl(
+            this.inputStream,
+            this.outputStream,
+            this.errorStream,
+            storage = storageBackend,
+            baseImports = augmentedImports,
+            basePredefs = Seq(PredefInfo(Name("ArgsPredef"), argString, false, None)),
+            customPredefs = predefFileInfoOpt.toSeq ++ Seq(
+              PredefInfo(Name("CodePredef"), this.predefCode, false, Some(wd / "(console)"))),
+            wd = wd,
+            welcomeBanner = welcomeBanner,
+            replArgs = replArgs,
+            initialColors = colors,
+            replCodeWrapper = replCodeWrapper,
+            scriptCodeWrapper = scriptCodeWrapper,
+            alreadyLoadedDependencies = alreadyLoadedDependencies,
+            importHooks = importHooks,
+            compilerBuilder = compilerBuilder,
+            parser = parser(),
+            initialClassLoader = initialClassLoader,
+            classPathWhitelist = classPathWhitelist,
+            warnings = warnings) {
+            override val prompt = Ref("scala> ")
+          }
+        }
+      }
+    }
+
     if (semaphore.nonEmpty) {
       // Used for testing.
       main.run(sparkBind, new Bind[Semaphore]("semaphore", semaphore.get))
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index cf0fef147ee84..86b1dbe4754e6 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -17,660 +17,152 @@
 
 package org.apache.spark.sql.catalog
 
-import scala.jdk.CollectionConverters._
+import java.util
 
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
+import org.apache.spark.sql.{api, DataFrame, Dataset}
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.storage.StorageLevel
 
-/**
- * Catalog interface for Spark. To access this, use `SparkSession.catalog`.
- *
- * @since 3.5.0
- */
-abstract class Catalog {
-
-  /**
-   * Returns the current database (namespace) in this session.
-   *
-   * @since 3.5.0
-   */
-  def currentDatabase: String
-
-  /**
-   * Sets the current database (namespace) in this session.
-   *
-   * @since 3.5.0
-   */
-  def setCurrentDatabase(dbName: String): Unit
-
-  /**
-   * Returns a list of databases (namespaces) available within the current catalog.
-   *
-   * @since 3.5.0
-   */
-  def listDatabases(): Dataset[Database]
-
-  /**
-   * Returns a list of databases (namespaces) which name match the specify pattern and available
-   * within the current catalog.
-   *
-   * @since 3.5.0
-   */
-  def listDatabases(pattern: String): Dataset[Database]
-
-  /**
-   * Returns a list of tables/views in the current database (namespace). This includes all
-   * temporary views.
-   *
-   * @since 3.5.0
-   */
-  def listTables(): Dataset[Table]
-
-  /**
-   * Returns a list of tables/views in the specified database (namespace) (the name can be
-   * qualified with catalog). This includes all temporary views.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listTables(dbName: String): Dataset[Table]
-
-  /**
-   * Returns a list of tables/views in the specified database (namespace) which name match the
-   * specify pattern (the name can be qualified with catalog). This includes all temporary views.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listTables(dbName: String, pattern: String): Dataset[Table]
-
-  /**
-   * Returns a list of functions registered in the current database (namespace). This includes all
-   * temporary functions.
-   *
-   * @since 3.5.0
-   */
-  def listFunctions(): Dataset[Function]
-
-  /**
-   * Returns a list of functions registered in the specified database (namespace) (the name can be
-   * qualified with catalog). This includes all built-in and temporary functions.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listFunctions(dbName: String): Dataset[Function]
-
-  /**
-   * Returns a list of functions registered in the specified database (namespace) which name match
-   * the specify pattern (the name can be qualified with catalog). This includes all built-in and
-   * temporary functions.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listFunctions(dbName: String, pattern: String): Dataset[Function]
-
-  /**
-   * Returns a list of columns for the given table/view or temporary view.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. It follows the same
-   *   resolution rule with SQL: search for temp views first then table/views in the current
-   *   database (namespace).
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("table does not exist")
-  def listColumns(tableName: String): Dataset[Column]
-
-  /**
-   * Returns a list of columns for the given table/view in the specified database under the Hive
-   * Metastore.
-   *
-   * To list columns for table/view in other catalogs, please use `listColumns(tableName)` with
-   * qualified table/view name instead.
-   *
-   * @param dbName
-   *   is an unqualified name that designates a database.
-   * @param tableName
-   *   is an unqualified name that designates a table/view.
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database or table does not exist")
-  def listColumns(dbName: String, tableName: String): Dataset[Column]
-
-  /**
-   * Get the database (namespace) with the specified name (can be qualified with catalog). This
-   * throws an AnalysisException when the database (namespace) cannot be found.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def getDatabase(dbName: String): Database
-
-  /**
-   * Get the table or view with the specified name. This table can be a temporary view or a
-   * table/view. This throws an AnalysisException when no Table can be found.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. It follows the same
-   *   resolution rule with SQL: search for temp views first then table/views in the current
-   *   database (namespace).
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("table does not exist")
-  def getTable(tableName: String): Table
-
-  /**
-   * Get the table or view with the specified name in the specified database under the Hive
-   * Metastore. This throws an AnalysisException when no Table can be found.
-   *
-   * To get table/view in other catalogs, please use `getTable(tableName)` with qualified
-   * table/view name instead.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database or table does not exist")
-  def getTable(dbName: String, tableName: String): Table
-
-  /**
-   * Get the function with the specified name. This function can be a temporary function or a
-   * function. This throws an AnalysisException when the function cannot be found.
-   *
-   * @param functionName
-   *   is either a qualified or unqualified name that designates a function. It follows the same
-   *   resolution rule with SQL: search for built-in/temp functions first then functions in the
-   *   current database (namespace).
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("function does not exist")
-  def getFunction(functionName: String): Function
-
-  /**
-   * Get the function with the specified name in the specified database under the Hive Metastore.
-   * This throws an AnalysisException when the function cannot be found.
-   *
-   * To get functions in other catalogs, please use `getFunction(functionName)` with qualified
-   * function name instead.
-   *
-   * @param dbName
-   *   is an unqualified name that designates a database.
-   * @param functionName
-   *   is an unqualified name that designates a function in the specified database
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database or function does not exist")
-  def getFunction(dbName: String, functionName: String): Function
-
-  /**
-   * Check if the database (namespace) with the specified name exists (the name can be qualified
-   * with catalog).
-   *
-   * @since 3.5.0
-   */
-  def databaseExists(dbName: String): Boolean
-
-  /**
-   * Check if the table or view with the specified name exists. This can either be a temporary
-   * view or a table/view.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. It follows the same
-   *   resolution rule with SQL: search for temp views first then table/views in the current
-   *   database (namespace).
-   * @since 3.5.0
-   */
-  def tableExists(tableName: String): Boolean
-
-  /**
-   * Check if the table or view with the specified name exists in the specified database under the
-   * Hive Metastore.
-   *
-   * To check existence of table/view in other catalogs, please use `tableExists(tableName)` with
-   * qualified table/view name instead.
-   *
-   * @param dbName
-   *   is an unqualified name that designates a database.
-   * @param tableName
-   *   is an unqualified name that designates a table.
-   * @since 3.5.0
-   */
-  def tableExists(dbName: String, tableName: String): Boolean
-
-  /**
-   * Check if the function with the specified name exists. This can either be a temporary function
-   * or a function.
-   *
-   * @param functionName
-   *   is either a qualified or unqualified name that designates a function. It follows the same
-   *   resolution rule with SQL: search for built-in/temp functions first then functions in the
-   *   current database (namespace).
-   * @since 3.5.0
-   */
-  def functionExists(functionName: String): Boolean
-
-  /**
-   * Check if the function with the specified name exists in the specified database under the Hive
-   * Metastore.
-   *
-   * To check existence of functions in other catalogs, please use `functionExists(functionName)`
-   * with qualified function name instead.
-   *
-   * @param dbName
-   *   is an unqualified name that designates a database.
-   * @param functionName
-   *   is an unqualified name that designates a function.
-   * @since 3.5.0
-   */
-  def functionExists(dbName: String, functionName: String): Boolean
-
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame. It will use the
-   * default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    createTable(tableName, path)
-  }
-
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame. It will use the
-   * default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(tableName: String, path: String): DataFrame
-
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
-    createTable(tableName, path, source)
-  }
-
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(tableName: String, path: String, source: String): DataFrame
-
-  /**
-   * Creates a table from the given path based on a data source and a set of options. Then,
-   * returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+/** @inheritdoc */
+abstract class Catalog extends api.Catalog {
+
+  /** @inheritdoc */
+  override def listDatabases(): Dataset[Database]
+
+  /** @inheritdoc */
+  override def listDatabases(pattern: String): Dataset[Database]
+
+  /** @inheritdoc */
+  override def listTables(): Dataset[Table]
+
+  /** @inheritdoc */
+  override def listTables(dbName: String): Dataset[Table]
+
+  /** @inheritdoc */
+  override def listTables(dbName: String, pattern: String): Dataset[Table]
+
+  /** @inheritdoc */
+  override def listFunctions(): Dataset[Function]
+
+  /** @inheritdoc */
+  override def listFunctions(dbName: String): Dataset[Function]
+
+  /** @inheritdoc */
+  override def listFunctions(dbName: String, pattern: String): Dataset[Function]
+
+  /** @inheritdoc */
+  override def listColumns(tableName: String): Dataset[Column]
+
+  /** @inheritdoc */
+  override def listColumns(dbName: String, tableName: String): Dataset[Column]
+
+  /** @inheritdoc */
+  override def createTable(tableName: String, path: String): DataFrame
+
+  /** @inheritdoc */
+  override def createTable(tableName: String, path: String, source: String): DataFrame
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
-  /**
-   * Creates a table based on the dataset in a data source and a set of options. Then, returns the
-   * corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+      options: Map[String, String]): DataFrame
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Creates a table from the given path based on a data source and a set of
-   * options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+      description: String,
+      options: Map[String, String]): DataFrame
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
-
-  /**
-   * (Scala-specific) Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(tableName: String, source: String, options: Map[String, String]): DataFrame
-
-  /**
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+      schema: StructType,
+      options: Map[String, String]): DataFrame
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * Creates a table based on the dataset in a data source and a set of options. Then, returns the
-   * corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+      description: String,
+      options: Map[String, String]): DataFrame
+
+  /** @inheritdoc */
+  override def listCatalogs(): Dataset[CatalogMetadata]
+
+  /** @inheritdoc */
+  override def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String): DataFrame =
+    super.createExternalTable(tableName, path)
+
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String, source: String): DataFrame =
+    super.createExternalTable(tableName, path, source)
+
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      description: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(
-      tableName,
-      source = source,
-      description = description,
-      options = options.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+      options: util.Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, options)
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      description: String,
-      options: Map[String, String]): DataFrame
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, options)
 
-  /**
-   * Create a table based on the dataset in a data source, a schema and a set of options. Then,
-   * returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Create a table from the given path based on a data source, a schema and a
-   * set of options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+      options: Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, options)
+
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
-
-  /**
-   * (Scala-specific) Create a table based on the dataset in a data source, a schema and a set of
-   * options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+      options: util.Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, schema, options)
+
+  /** @inheritdoc */
+  override def createTable(
+      tableName: String,
+      source: String,
+      description: String,
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, description, options)
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: Map[String, String]): DataFrame
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, schema, options)
 
-  /**
-   * Create a table based on the dataset in a data source, a schema and a set of options. Then,
-   * returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      description: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(
-      tableName,
-      source = source,
-      schema = schema,
-      description = description,
-      options = options.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Create a table based on the dataset in a data source, a schema and a set of
-   * options. Then, returns the corresponding DataFrame.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def createTable(
+      options: Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, schema, options)
+
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
       schema: StructType,
       description: String,
-      options: Map[String, String]): DataFrame
-
-  /**
-   * Drops the local temporary view with the given view name in the catalog. If the view has been
-   * cached before, then it will also be uncached.
-   *
-   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
-   * created it, i.e. it will be automatically dropped when the session terminates. It's not tied
-   * to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
-   *
-   * Note that, the return type of this method was Unit in Spark 2.0, but changed to Boolean in
-   * Spark 2.1.
-   *
-   * @param viewName
-   *   the name of the temporary view to be dropped.
-   * @return
-   *   true if the view is dropped successfully, false otherwise.
-   * @since 3.5.0
-   */
-  def dropTempView(viewName: String): Boolean
-
-  /**
-   * Drops the global temporary view with the given view name in the catalog. If the view has been
-   * cached before, then it will also be uncached.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
-   * application, i.e. it will be automatically dropped when the application terminates. It's tied
-   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
-   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @param viewName
-   *   the unqualified name of the temporary view to be dropped.
-   * @return
-   *   true if the view is dropped successfully, false otherwise.
-   * @since 3.5.0
-   */
-  def dropGlobalTempView(viewName: String): Boolean
-
-  /**
-   * Recovers all the partitions in the directory of a table and update the catalog. Only works
-   * with a partitioned table, and not a view.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table. If no database
-   *   identifier is provided, it refers to a table in the current database.
-   * @since 3.5.0
-   */
-  def recoverPartitions(tableName: String): Unit
-
-  /**
-   * Returns true if the table is currently cached in-memory.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. If no database
-   *   identifier is provided, it refers to a temporary view or a table/view in the current
-   *   database.
-   * @since 3.5.0
-   */
-  def isCached(tableName: String): Boolean
-
-  /**
-   * Caches the specified table in-memory.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. If no database
-   *   identifier is provided, it refers to a temporary view or a table/view in the current
-   *   database.
-   * @since 3.5.0
-   */
-  def cacheTable(tableName: String): Unit
-
-  /**
-   * Caches the specified table with the given storage level.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. If no database
-   *   identifier is provided, it refers to a temporary view or a table/view in the current
-   *   database.
-   * @param storageLevel
-   *   storage level to cache table.
-   * @since 3.5.0
-   */
-  def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
-
-  /**
-   * Removes the specified table from the in-memory cache.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. If no database
-   *   identifier is provided, it refers to a temporary view or a table/view in the current
-   *   database.
-   * @since 3.5.0
-   */
-  def uncacheTable(tableName: String): Unit
-
-  /**
-   * Removes all cached tables from the in-memory cache.
-   *
-   * @since 3.5.0
-   */
-  def clearCache(): Unit
-
-  /**
-   * Invalidates and refreshes all the cached data and metadata of the given table. For
-   * performance reasons, Spark SQL or the external data source library it uses might cache
-   * certain metadata about a table, such as the location of blocks. When those change outside of
-   * Spark SQL, users should call this function to invalidate the cache.
-   *
-   * If this table is cached as an InMemoryRelation, drop the original cached version and make the
-   * new version cached lazily.
-   *
-   * @param tableName
-   *   is either a qualified or unqualified name that designates a table/view. If no database
-   *   identifier is provided, it refers to a temporary view or a table/view in the current
-   *   database.
-   * @since 3.5.0
-   */
-  def refreshTable(tableName: String): Unit
-
-  /**
-   * Invalidates and refreshes all the cached data (and the associated metadata) for any `Dataset`
-   * that contains the given data source path. Path matching is by prefix, i.e. "/" would
-   * invalidate everything that is cached.
-   *
-   * @since 3.5.0
-   */
-  def refreshByPath(path: String): Unit
-
-  /**
-   * Returns the current catalog in this session.
-   *
-   * @since 3.5.0
-   */
-  def currentCatalog(): String
-
-  /**
-   * Sets the current catalog in this session.
-   *
-   * @since 3.5.0
-   */
-  def setCurrentCatalog(catalogName: String): Unit
-
-  /**
-   * Returns a list of catalogs available in this session.
-   *
-   * @since 3.5.0
-   */
-  def listCatalogs(): Dataset[CatalogMetadata]
-
-  /**
-   * Returns a list of catalogs which name match the specify pattern and available in this
-   * session.
-   *
-   * @since 3.5.0
-   */
-  def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, schema, description, options)
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala
new file mode 100644
index 0000000000000..e73bcb8a0059d
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectClientUnsupportedErrors.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import org.apache.spark.SparkUnsupportedOperationException
+
+private[sql] object ConnectClientUnsupportedErrors {
+
+  private def unsupportedFeatureException(
+      subclass: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      "UNSUPPORTED_CONNECT_FEATURE." + subclass,
+      Map.empty[String, String])
+  }
+
+  def rdd(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("RDD")
+
+  def queryExecution(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("DATASET_QUERY_EXECUTION")
+
+  def executeCommand(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_EXECUTE_COMMAND")
+
+  def baseRelationToDataFrame(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_BASE_RELATION_TO_DATAFRAME")
+
+  def experimental(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_EXPERIMENTAL_METHODS")
+
+  def listenerManager(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_LISTENER_MANAGER")
+
+  def sessionState(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_SESSION_STATE")
+
+  def sharedState(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_SHARED_STATE")
+
+  def sparkContext(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_SPARK_CONTEXT")
+
+  def sqlContext(): SparkUnsupportedOperationException =
+    unsupportedFeatureException("SESSION_SQL_CONTEXT")
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectConversions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectConversions.scala
new file mode 100644
index 0000000000000..0344152be86e6
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/ConnectConversions.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import scala.language.implicitConversions
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.connect.proto
+import org.apache.spark.sql._
+import org.apache.spark.sql.internal.ProtoColumnNode
+
+/**
+ * Conversions from sql interfaces to the Connect specific implementation.
+ *
+ * This class is mainly used by the implementation. It is also meant to be used by extension
+ * developers.
+ *
+ * We provide both a trait and an object. The trait is useful in situations where an extension
+ * developer needs to use these conversions in a project covering multiple Spark versions. They
+ * can create a shim for these conversions, the Spark 4+ version of the shim implements this
+ * trait, and shims for older versions do not.
+ */
+@DeveloperApi
+trait ConnectConversions {
+  implicit def castToImpl(session: api.SparkSession): SparkSession =
+    session.asInstanceOf[SparkSession]
+
+  implicit def castToImpl[T](ds: api.Dataset[T]): Dataset[T] =
+    ds.asInstanceOf[Dataset[T]]
+
+  implicit def castToImpl(rgds: api.RelationalGroupedDataset): RelationalGroupedDataset =
+    rgds.asInstanceOf[RelationalGroupedDataset]
+
+  implicit def castToImpl[K, V](
+      kvds: api.KeyValueGroupedDataset[K, V]): KeyValueGroupedDataset[K, V] =
+    kvds.asInstanceOf[KeyValueGroupedDataset[K, V]]
+
+  /**
+   * Create a [[Column]] from a [[proto.Expression]]
+   *
+   * This method is meant to be used by Connect plugins. We do not guarantee any compatibility
+   * between (minor) versions.
+   */
+  @DeveloperApi
+  def column(expr: proto.Expression): Column = {
+    Column(ProtoColumnNode(expr))
+  }
+
+  /**
+   * Create a [[Column]] using a function that manipulates an [[proto.Expression.Builder]].
+   *
+   * This method is meant to be used by Connect plugins. We do not guarantee any compatibility
+   * between (minor) versions.
+   */
+  @DeveloperApi
+  def column(f: proto.Expression.Builder => Unit): Column = {
+    val builder = proto.Expression.newBuilder()
+    f(builder)
+    column(builder.build())
+  }
+
+  /**
+   * Implicit helper that makes it easy to construct a Column from an Expression or an Expression
+   * builder. This allows developers to create a Column in the same way as in earlier versions of
+   * Spark (before 4.0).
+   */
+  @DeveloperApi
+  implicit class ColumnConstructorExt(val c: Column.type) {
+    def apply(e: proto.Expression): Column = column(e)
+  }
+}
+
+object ConnectConversions extends ConnectConversions
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/AmmoniteClassFinder.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/AmmoniteClassFinder.scala
index 4ebc22202b0b7..b359a871d8c28 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/AmmoniteClassFinder.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/connect/client/AmmoniteClassFinder.scala
@@ -22,6 +22,8 @@ import java.nio.file.Paths
 import ammonite.repl.api.Session
 import ammonite.runtime.SpecialClassLoader
 
+import org.apache.spark.sql.Artifact
+
 /**
  * A special [[ClassFinder]] for the Ammonite REPL to handle in-memory class files.
  *
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
deleted file mode 100644
index b177c110285dd..0000000000000
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ /dev/null
@@ -1,166 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.expressions
-
-import scala.reflect.runtime.universe._
-
-import org.apache.spark.connect.proto
-import org.apache.spark.sql.{encoderFor, Encoder, TypedColumn}
-import org.apache.spark.sql.catalyst.ScalaReflection
-
-/**
- * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
- * all of the elements of a group and reduce them to a single value.
- *
- * For example, the following aggregator extracts an `int` from a specific class and adds them up:
- * {{{
- *   case class Data(i: Int)
- *
- *   val customSummer =  new Aggregator[Data, Int, Int] {
- *     def zero: Int = 0
- *     def reduce(b: Int, a: Data): Int = b + a.i
- *     def merge(b1: Int, b2: Int): Int = b1 + b2
- *     def finish(r: Int): Int = r
- *     def bufferEncoder: Encoder[Int] = Encoders.scalaInt
- *     def outputEncoder: Encoder[Int] = Encoders.scalaInt
- *   }
- *
- *   spark.udf.register("customSummer", udaf(customSummer))
- *   val ds: Dataset[Data] = ...
- *   val aggregated = ds.selectExpr("customSummer(i)")
- * }}}
- *
- * Based loosely on Aggregator from Algebird: https://github.com/twitter/algebird
- *
- * @tparam IN
- *   The input type for the aggregation.
- * @tparam BUF
- *   The type of the intermediate value of the reduction.
- * @tparam OUT
- *   The type of the final output result.
- * @since 4.0.0
- */
-@SerialVersionUID(2093413866369130093L)
-abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
-
-  /**
-   * A zero value for this aggregation. Should satisfy the property that any b + zero = b.
-   * @since 4.0.0
-   */
-  def zero: BUF
-
-  /**
-   * Combine two values to produce a new value. For performance, the function may modify `b` and
-   * return it instead of constructing new object for b.
-   * @since 4.0.0
-   */
-  def reduce(b: BUF, a: IN): BUF
-
-  /**
-   * Merge two intermediate values.
-   * @since 4.0.0
-   */
-  def merge(b1: BUF, b2: BUF): BUF
-
-  /**
-   * Transform the output of the reduction.
-   * @since 4.0.0
-   */
-  def finish(reduction: BUF): OUT
-
-  /**
-   * Specifies the `Encoder` for the intermediate value type.
-   * @since 4.0.0
-   */
-  def bufferEncoder: Encoder[BUF]
-
-  /**
-   * Specifies the `Encoder` for the final output value type.
-   * @since 4.0.0
-   */
-  def outputEncoder: Encoder[OUT]
-
-  /**
-   * Returns this `Aggregator` as a `TypedColumn` that can be used in `Dataset` operations.
-   * @since 4.0.0
-   */
-  def toColumn: TypedColumn[IN, OUT] = {
-    val ttpe = getInputTypeTag[IN]
-    val inputEncoder = ScalaReflection.encoderFor(ttpe)
-    val udaf =
-      ScalaUserDefinedFunction(
-        this,
-        Seq(inputEncoder),
-        encoderFor(outputEncoder),
-        aggregate = true)
-
-    val builder = proto.TypedAggregateExpression.newBuilder()
-    builder.setScalarScalaUdf(udaf.udf)
-    val expr = proto.Expression.newBuilder().setTypedAggregateExpression(builder).build()
-
-    new TypedColumn(expr, encoderFor(outputEncoder))
-  }
-
-  private final def getInputTypeTag[T]: TypeTag[T] = {
-    val mirror = runtimeMirror(this.getClass.getClassLoader)
-    val tpe = mirror.classSymbol(this.getClass).toType
-    // Find the most generic (last in the tree) Aggregator class
-    val baseAgg =
-      tpe.baseClasses
-        .findLast(_.asClass.toType <:< typeOf[Aggregator[_, _, _]])
-        .getOrElse(throw new IllegalStateException("Could not find the Aggregator base class."))
-    val typeArgs = tpe.baseType(baseAgg).typeArgs
-    assert(
-      typeArgs.length == 3,
-      s"Aggregator should have 3 type arguments, " +
-        s"but found ${typeArgs.length}: ${typeArgs.mkString}.")
-    val inType = typeArgs.head
-
-    import scala.reflect.api._
-    def areCompatibleMirrors(one: Mirror[_], another: Mirror[_]): Boolean = {
-      def checkAllParents(target: JavaMirror, candidate: JavaMirror): Boolean = {
-        var current = candidate.classLoader
-        while (current != null) {
-          if (current == target.classLoader) {
-            return true
-          }
-          current = current.getParent
-        }
-        false
-      }
-
-      (one, another) match {
-        case (a: JavaMirror, b: JavaMirror) =>
-          a == b || checkAllParents(a, b) || checkAllParents(b, a)
-        case _ => one == another
-      }
-    }
-
-    TypeTag(
-      mirror,
-      new TypeCreator {
-        def apply[U <: Universe with Singleton](m: Mirror[U]): U#Type =
-          if (areCompatibleMirrors(m, mirror)) {
-            inType.asInstanceOf[U#Type]
-          } else {
-            throw new IllegalArgumentException(
-              s"Type tag defined in [$mirror] cannot be migrated to another mirror [$m].")
-          }
-      })
-  }
-}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
deleted file mode 100644
index dcf7f67551d30..0000000000000
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ /dev/null
@@ -1,216 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.expressions
-
-import scala.collection.mutable
-import scala.jdk.CollectionConverters._
-import scala.reflect.runtime.universe.TypeTag
-import scala.util.control.NonFatal
-
-import com.google.protobuf.ByteString
-
-import org.apache.spark.SparkException
-import org.apache.spark.connect.proto
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
-import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, UdfPacket}
-import org.apache.spark.sql.types.DataType
-import org.apache.spark.util.{ClosureCleaner, SparkClassUtils, SparkSerDeUtils}
-
-/**
- * A user-defined function. To create one, use the `udf` functions in `functions`.
- *
- * As an example:
- * {{{
- *   // Define a UDF that returns true or false based on some numeric score.
- *   val predict = udf((score: Double) => score > 0.5)
- *
- *   // Projects a column that adds a prediction column based on the score column.
- *   df.select( predict(df("score")) )
- * }}}
- *
- * @since 3.4.0
- */
-sealed abstract class UserDefinedFunction {
-
-  /**
-   * Returns true when the UDF can return a nullable value.
-   *
-   * @since 3.4.0
-   */
-  def nullable: Boolean
-
-  /**
-   * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the
-   * same input.
-   *
-   * @since 3.4.0
-   */
-  def deterministic: Boolean
-
-  /**
-   * Returns an expression that invokes the UDF, using the given arguments.
-   *
-   * @since 3.4.0
-   */
-  @scala.annotation.varargs
-  def apply(exprs: Column*): Column
-
-  /**
-   * Updates UserDefinedFunction with a given name.
-   *
-   * @since 3.4.0
-   */
-  def withName(name: String): UserDefinedFunction
-
-  /**
-   * Updates UserDefinedFunction to non-nullable.
-   *
-   * @since 3.4.0
-   */
-  def asNonNullable(): UserDefinedFunction
-
-  /**
-   * Updates UserDefinedFunction to nondeterministic.
-   *
-   * @since 3.4.0
-   */
-  def asNondeterministic(): UserDefinedFunction
-}
-
-/**
- * Holder class for a scala user-defined function and it's input/output encoder(s).
- */
-case class ScalaUserDefinedFunction private[sql] (
-    // SPARK-43198: Eagerly serialize to prevent the UDF from containing a reference to this class.
-    serializedUdfPacket: Array[Byte],
-    inputTypes: Seq[proto.DataType],
-    outputType: proto.DataType,
-    name: Option[String],
-    override val nullable: Boolean,
-    override val deterministic: Boolean,
-    aggregate: Boolean)
-    extends UserDefinedFunction {
-
-  private[expressions] lazy val udf = {
-    val scalaUdfBuilder = proto.ScalarScalaUDF
-      .newBuilder()
-      .setPayload(ByteString.copyFrom(serializedUdfPacket))
-      // Send the real inputs and return types to obtain the types without deser the udf bytes.
-      .addAllInputTypes(inputTypes.asJava)
-      .setOutputType(outputType)
-      .setNullable(nullable)
-      .setAggregate(aggregate)
-
-    scalaUdfBuilder.build()
-  }
-
-  @scala.annotation.varargs
-  override def apply(exprs: Column*): Column = Column { builder =>
-    val udfBuilder = builder.getCommonInlineUserDefinedFunctionBuilder
-    udfBuilder
-      .setDeterministic(deterministic)
-      .setScalarScalaUdf(udf)
-      .addAllArguments(exprs.map(_.expr).asJava)
-
-    name.foreach(udfBuilder.setFunctionName)
-  }
-
-  override def withName(name: String): ScalaUserDefinedFunction = copy(name = Option(name))
-
-  override def asNonNullable(): ScalaUserDefinedFunction = copy(nullable = false)
-
-  override def asNondeterministic(): ScalaUserDefinedFunction = copy(deterministic = false)
-
-  def toProto: proto.CommonInlineUserDefinedFunction = {
-    val builder = proto.CommonInlineUserDefinedFunction.newBuilder()
-    builder
-      .setDeterministic(deterministic)
-      .setScalarScalaUdf(udf)
-
-    name.foreach(builder.setFunctionName)
-    builder.build()
-  }
-}
-
-object ScalaUserDefinedFunction {
-  private val LAMBDA_DESERIALIZATION_ERR_MSG: String =
-    "cannot assign instance of java.lang.invoke.SerializedLambda to field"
-
-  private def checkDeserializable(bytes: Array[Byte]): Unit = {
-    try {
-      SparkSerDeUtils.deserialize(bytes, SparkClassUtils.getContextOrSparkClassLoader)
-    } catch {
-      case e: ClassCastException if e.getMessage.contains(LAMBDA_DESERIALIZATION_ERR_MSG) =>
-        throw new SparkException(
-          "UDF cannot be executed on a Spark cluster: it cannot be deserialized. " +
-            "This is very likely to be caused by the lambda function (the UDF) having a " +
-            "self-reference. This is not supported by java serialization.")
-      case NonFatal(e) =>
-        throw new SparkException(
-          "UDF cannot be executed on a Spark cluster: it cannot be deserialized.",
-          e)
-    }
-  }
-
-  private[sql] def apply(
-      function: AnyRef,
-      returnType: TypeTag[_],
-      parameterTypes: TypeTag[_]*): ScalaUserDefinedFunction = {
-
-    ScalaUserDefinedFunction(
-      function = function,
-      // Input can be a row because the input data schema can be found from the plan.
-      inputEncoders =
-        parameterTypes.map(tag => ScalaReflection.encoderForWithRowEncoderSupport(tag)),
-      // Output cannot be a row as there is no good way to get the return data type.
-      outputEncoder = ScalaReflection.encoderFor(returnType))
-  }
-
-  private[sql] def apply(
-      function: AnyRef,
-      inputEncoders: Seq[AgnosticEncoder[_]],
-      outputEncoder: AgnosticEncoder[_],
-      aggregate: Boolean = false): ScalaUserDefinedFunction = {
-    SparkConnectClosureCleaner.clean(function)
-    val udfPacketBytes =
-      SparkSerDeUtils.serialize(UdfPacket(function, inputEncoders, outputEncoder))
-    checkDeserializable(udfPacketBytes)
-    ScalaUserDefinedFunction(
-      serializedUdfPacket = udfPacketBytes,
-      inputTypes = inputEncoders.map(_.dataType).map(DataTypeProtoConverter.toConnectProtoType),
-      outputType = DataTypeProtoConverter.toConnectProtoType(outputEncoder.dataType),
-      name = None,
-      nullable = true,
-      deterministic = true,
-      aggregate = aggregate)
-  }
-
-  private[sql] def apply(function: AnyRef, returnType: DataType): ScalaUserDefinedFunction = {
-    ScalaUserDefinedFunction(
-      function = function,
-      inputEncoders = Seq.empty[AgnosticEncoder[_]],
-      outputEncoder = RowEncoder.encoderForDataType(returnType, lenient = false))
-  }
-}
-
-private object SparkConnectClosureCleaner {
-  def clean(closure: AnyRef): Unit = {
-    ClosureCleaner.clean(closure, cleanTransitively = true, mutable.Map.empty)
-  }
-}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
similarity index 67%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
rename to connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
index f77dd512ef257..be1a13cb2fed2 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/ConnectRuntimeConfig.scala
@@ -14,10 +14,11 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.spark.sql
+package org.apache.spark.sql.internal
 
 import org.apache.spark.connect.proto.{ConfigRequest, ConfigResponse, KeyValue}
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.connect.client.SparkConnectClient
 
 /**
@@ -25,61 +26,31 @@ import org.apache.spark.sql.connect.client.SparkConnectClient
  *
  * @since 3.4.0
  */
-class RuntimeConfig private[sql] (client: SparkConnectClient) extends Logging {
+class ConnectRuntimeConfig private[sql] (client: SparkConnectClient)
+    extends RuntimeConfig
+    with Logging {
 
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def set(key: String, value: String): Unit = {
     executeConfigRequest { builder =>
       builder.getSetBuilder.addPairsBuilder().setKey(key).setValue(value)
     }
   }
 
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 3.4.0
-   */
-  def set(key: String, value: Boolean): Unit = set(key, String.valueOf(value))
-
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 3.4.0
-   */
-  def set(key: String, value: Long): Unit = set(key, String.valueOf(value))
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @throws java.util.NoSuchElementException
-   *   if the key is not set and does not have a default value
-   * @since 3.4.0
-   */
-  @throws[NoSuchElementException]("if the key is not set")
+  /** @inheritdoc */
+  @throws[NoSuchElementException]("if the key is not set and there is no default value")
   def get(key: String): String = getOption(key).getOrElse {
     throw new NoSuchElementException(key)
   }
 
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def get(key: String, default: String): String = {
     executeConfigRequestSingleValue { builder =>
       builder.getGetWithDefaultBuilder.addPairsBuilder().setKey(key).setValue(default)
     }
   }
 
-  /**
-   * Returns all properties set in this conf.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def getAll: Map[String, String] = {
     val response = executeConfigRequest { builder =>
       builder.getGetAllBuilder
@@ -92,11 +63,7 @@ class RuntimeConfig private[sql] (client: SparkConnectClient) extends Logging {
     builder.result()
   }
 
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def getOption(key: String): Option[String] = {
     val pair = executeConfigRequestSinglePair { builder =>
       builder.getGetOptionBuilder.addKeys(key)
@@ -108,27 +75,14 @@ class RuntimeConfig private[sql] (client: SparkConnectClient) extends Logging {
     }
   }
 
-  /**
-   * Resets the configuration property for the given key.
-   *
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unset(key: String): Unit = {
     executeConfigRequest { builder =>
       builder.getUnsetBuilder.addKeys(key)
     }
   }
 
-  /**
-   * Indicates whether the configuration property with the given key is modifiable in the current
-   * session.
-   *
-   * @return
-   *   `true` if the configuration property is modifiable. For static SQL, Spark Core, invalid
-   *   (not existing) and other non-modifiable configuration properties, the returned value is
-   *   `false`.
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def isModifiable(key: String): Boolean = {
     val modifiable = executeConfigRequestSingleValue { builder =>
       builder.getIsModifiableBuilder.addKeys(key)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
new file mode 100644
index 0000000000000..58fbfea48afec
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.{DataFrameWriter, Dataset, SaveMode}
+
+/**
+ * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems, key-value
+ * stores, etc). Use `Dataset.write` to access this.
+ *
+ * @since 3.4.0
+ */
+@Stable
+final class DataFrameWriterImpl[T] private[sql] (ds: Dataset[T]) extends DataFrameWriter[T] {
+
+  /** @inheritdoc */
+  override def mode(saveMode: SaveMode): this.type = super.mode(saveMode)
+
+  /** @inheritdoc */
+  override def mode(saveMode: String): this.type = super.mode(saveMode)
+
+  /** @inheritdoc */
+  override def format(source: String): this.type = super.format(source)
+
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type =
+    super.options(options)
+
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type =
+    super.options(options)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def partitionBy(colNames: String*): this.type = super.partitionBy(colNames: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def bucketBy(numBuckets: Int, colName: String, colNames: String*): this.type =
+    super.bucketBy(numBuckets, colName, colNames: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sortBy(colName: String, colNames: String*): this.type =
+    super.sortBy(colName, colNames: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def clusterBy(colName: String, colNames: String*): this.type =
+    super.clusterBy(colName, colNames: _*)
+
+  /** @inheritdoc */
+  def save(path: String): Unit = {
+    saveInternal(Some(path))
+  }
+
+  /** @inheritdoc */
+  def save(): Unit = saveInternal(None)
+
+  private def saveInternal(path: Option[String]): Unit = {
+    executeWriteOperation(builder => path.foreach(builder.setPath))
+  }
+
+  private def executeWriteOperation(f: proto.WriteOperation.Builder => Unit): Unit = {
+    val builder = proto.WriteOperation.newBuilder()
+
+    builder.setInput(ds.plan.getRoot)
+
+    // Set path or table
+    f(builder)
+
+    // Cannot both be set
+    require(!(builder.hasPath && builder.hasTable))
+
+    builder.setMode(mode match {
+      case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
+      case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
+      case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
+      case SaveMode.ErrorIfExists => proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
+    })
+
+    if (source.nonEmpty) {
+      builder.setSource(source)
+    }
+    sortColumnNames.foreach(names => builder.addAllSortColumnNames(names.asJava))
+    partitioningColumns.foreach(cols => builder.addAllPartitioningColumns(cols.asJava))
+    clusteringColumns.foreach(cols => builder.addAllClusteringColumns(cols.asJava))
+
+    numBuckets.foreach(n => {
+      val bucketBuilder = proto.WriteOperation.BucketBy.newBuilder()
+      bucketBuilder.setNumBuckets(n)
+      bucketColumnNames.foreach(names => bucketBuilder.addAllBucketColumnNames(names.asJava))
+      builder.setBucketBy(bucketBuilder)
+    })
+
+    extraOptions.foreach { case (k, v) =>
+      builder.putOptions(k, v)
+    }
+
+    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperation(builder).build())
+  }
+
+  /** @inheritdoc */
+  def insertInto(tableName: String): Unit = {
+    executeWriteOperation(builder => {
+      builder.setTable(
+        proto.WriteOperation.SaveTable
+          .newBuilder()
+          .setTableName(tableName)
+          .setSaveMethod(
+            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO))
+    })
+  }
+
+  /** @inheritdoc */
+  def saveAsTable(tableName: String): Unit = {
+    executeWriteOperation(builder => {
+      builder.setTable(
+        proto.WriteOperation.SaveTable
+          .newBuilder()
+          .setTableName(tableName)
+          .setSaveMethod(
+            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE))
+    })
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
new file mode 100644
index 0000000000000..4afa8b6d566c5
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.{Column, DataFrameWriterV2, Dataset}
+
+/**
+ * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2
+ * API.
+ *
+ * @since 3.4.0
+ */
+@Experimental
+final class DataFrameWriterV2Impl[T] private[sql] (table: String, ds: Dataset[T])
+    extends DataFrameWriterV2[T] {
+  import ds.sparkSession.RichColumn
+
+  private val builder = proto.WriteOperationV2
+    .newBuilder()
+    .setInput(ds.plan.getRoot)
+    .setTableName(table)
+
+  /** @inheritdoc */
+  override def using(provider: String): this.type = {
+    builder.setProvider(provider)
+    this
+  }
+
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = {
+    builder.putOptions(key, value)
+    this
+  }
+
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type = {
+    builder.putAllOptions(options.asJava)
+    this
+  }
+
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type = {
+    builder.putAllOptions(options)
+    this
+  }
+
+  /** @inheritdoc */
+  override def tableProperty(property: String, value: String): this.type = {
+    builder.putTableProperties(property, value)
+    this
+  }
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def partitionedBy(column: Column, columns: Column*): this.type = {
+    builder.addAllPartitioningColumns((column +: columns).map(_.expr).asJava)
+    this
+  }
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def clusterBy(colName: String, colNames: String*): this.type = {
+    builder.addAllClusteringColumns((colName +: colNames).asJava)
+    this
+  }
+
+  /** @inheritdoc */
+  override def create(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE)
+  }
+
+  /** @inheritdoc */
+  override def replace(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_REPLACE)
+  }
+
+  /** @inheritdoc */
+  override def createOrReplace(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
+  }
+
+  /** @inheritdoc */
+  def append(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_APPEND)
+  }
+
+  /** @inheritdoc */
+  def overwrite(condition: Column): Unit = {
+    builder.setOverwriteCondition(condition.expr)
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE)
+  }
+
+  /** @inheritdoc */
+  def overwritePartitions(): Unit = {
+    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS)
+  }
+
+  private def executeWriteOperation(mode: proto.WriteOperationV2.Mode): Unit = {
+    val command = proto.Command
+      .newBuilder()
+      .setWriteOperationV2(builder.setMode(mode))
+      .build()
+    ds.sparkSession.execute(command)
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
new file mode 100644
index 0000000000000..fba3c6343558b
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
@@ -0,0 +1,135 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.{Expression, MergeAction, MergeIntoTableCommand}
+import org.apache.spark.connect.proto.MergeAction.ActionType._
+import org.apache.spark.sql.{Column, Dataset, MergeIntoWriter}
+import org.apache.spark.sql.functions.expr
+
+/**
+ * `MergeIntoWriter` provides methods to define and execute merge actions based on specified
+ * conditions.
+ *
+ * @tparam T
+ *   the type of data in the Dataset.
+ * @param table
+ *   the name of the target table for the merge operation.
+ * @param ds
+ *   the source Dataset to merge into the target table.
+ * @param on
+ *   the merge condition.
+ *
+ * @since 4.0.0
+ */
+@Experimental
+class MergeIntoWriterImpl[T] private[sql] (table: String, ds: Dataset[T], on: Column)
+    extends MergeIntoWriter[T] {
+  import ds.sparkSession.RichColumn
+
+  private val builder = MergeIntoTableCommand
+    .newBuilder()
+    .setTargetTableName(table)
+    .setSourceTablePlan(ds.plan.getRoot)
+    .setMergeCondition(on.expr)
+
+  /**
+   * Executes the merge operation.
+   */
+  def merge(): Unit = {
+    if (builder.getMatchActionsCount == 0 &&
+      builder.getNotMatchedActionsCount == 0 &&
+      builder.getNotMatchedBySourceActionsCount == 0) {
+      throw new SparkRuntimeException(
+        errorClass = "NO_MERGE_ACTION_SPECIFIED",
+        messageParameters = Map.empty)
+    }
+    ds.sparkSession.execute(
+      proto.Command
+        .newBuilder()
+        .setMergeIntoTableCommand(builder.setWithSchemaEvolution(schemaEvolutionEnabled))
+        .build())
+  }
+
+  override protected[sql] def insertAll(condition: Option[Column]): MergeIntoWriter[T] = {
+    builder.addNotMatchedActions(buildMergeAction(ACTION_TYPE_INSERT_STAR, condition))
+    this
+  }
+
+  override protected[sql] def insert(
+      condition: Option[Column],
+      map: Map[String, Column]): MergeIntoWriter[T] = {
+    builder.addNotMatchedActions(buildMergeAction(ACTION_TYPE_INSERT, condition, map))
+    this
+  }
+
+  override protected[sql] def updateAll(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(
+      buildMergeAction(ACTION_TYPE_UPDATE_STAR, condition),
+      notMatchedBySource)
+  }
+
+  override protected[sql] def update(
+      condition: Option[Column],
+      map: Map[String, Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(
+      buildMergeAction(ACTION_TYPE_UPDATE, condition, map),
+      notMatchedBySource)
+  }
+
+  override protected[sql] def delete(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(buildMergeAction(ACTION_TYPE_DELETE, condition), notMatchedBySource)
+  }
+
+  private def appendUpdateDeleteAction(
+      action: Expression,
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    if (notMatchedBySource) {
+      builder.addNotMatchedBySourceActions(action)
+    } else {
+      builder.addMatchActions(action)
+    }
+    this
+  }
+
+  private def buildMergeAction(
+      actionType: MergeAction.ActionType,
+      condition: Option[Column],
+      assignments: Map[String, Column] = Map.empty): Expression = {
+    val builder = proto.MergeAction.newBuilder().setActionType(actionType)
+    condition.foreach(c => builder.setCondition(c.expr))
+    assignments.foreach { case (k, v) =>
+      builder
+        .addAssignmentsBuilder()
+        .setKey(expr(k).expr)
+        .setValue(v.expr)
+    }
+    Expression
+      .newBuilder()
+      .setMergeAction(builder)
+      .build()
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/UdfToProtoUtils.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/UdfToProtoUtils.scala
new file mode 100644
index 0000000000000..409c43f480b8e
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/UdfToProtoUtils.scala
@@ -0,0 +1,107 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import scala.collection.mutable
+import scala.jdk.CollectionConverters._
+import scala.util.control.NonFatal
+
+import com.google.protobuf.ByteString
+
+import org.apache.spark.SparkException
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFor
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter.toConnectProtoType
+import org.apache.spark.sql.connect.common.UdfPacket
+import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
+import org.apache.spark.util.{ClosureCleaner, SparkClassUtils, SparkSerDeUtils}
+
+/**
+ * Utility for converting a `UserDefinedFunction` into a Connect Protobuf message.
+ */
+private[sql] object UdfToProtoUtils {
+  private val LAMBDA_DESERIALIZATION_ERR_MSG: String =
+    "cannot assign instance of java.lang.invoke.SerializedLambda to field"
+
+  private def checkDeserializable(bytes: Array[Byte]): Unit = {
+    try {
+      SparkSerDeUtils.deserialize(bytes, SparkClassUtils.getContextOrSparkClassLoader)
+    } catch {
+      case e: ClassCastException if e.getMessage.contains(LAMBDA_DESERIALIZATION_ERR_MSG) =>
+        throw new SparkException(
+          "UDF cannot be executed on a Spark cluster: it cannot be deserialized. " +
+            "This is very likely to be caused by the lambda function (the UDF) having a " +
+            "self-reference. This is not supported by java serialization.")
+      case NonFatal(e) =>
+        throw new SparkException(
+          "UDF cannot be executed on a Spark cluster: it cannot be deserialized.",
+          e)
+    }
+  }
+
+  private[sql] def toUdfPacketBytes(
+      function: AnyRef,
+      inputEncoders: Seq[AgnosticEncoder[_]],
+      outputEncoder: AgnosticEncoder[_]): ByteString = {
+    ClosureCleaner.clean(function, cleanTransitively = true, mutable.Map.empty)
+    val bytes = SparkSerDeUtils.serialize(UdfPacket(function, inputEncoders, outputEncoder))
+    checkDeserializable(bytes)
+    ByteString.copyFrom(bytes)
+  }
+
+  /**
+   * Convert a [[UserDefinedFunction]] to a [[proto.CommonInlineUserDefinedFunction]].
+   */
+  private[sql] def toProto(
+      udf: UserDefinedFunction,
+      arguments: Seq[proto.Expression] = Nil): proto.CommonInlineUserDefinedFunction = {
+    val invokeUdf = proto.CommonInlineUserDefinedFunction
+      .newBuilder()
+      .setDeterministic(udf.deterministic)
+      .addAllArguments(arguments.asJava)
+    val protoUdf = invokeUdf.getScalarScalaUdfBuilder
+      .setNullable(udf.nullable)
+    udf match {
+      case f: SparkUserDefinedFunction =>
+        val outputEncoder = f.outputEncoder
+          .map(e => agnosticEncoderFor(e))
+          .getOrElse(RowEncoder.encoderForDataType(f.dataType, lenient = false))
+        val inputEncoders = if (f.inputEncoders.forall(_.isEmpty)) {
+          Nil // Java UDFs have no bindings for their inputs.
+        } else {
+          f.inputEncoders.map(e => agnosticEncoderFor(e.get)) // TODO support Any and UnboundRow.
+        }
+        inputEncoders.foreach(e => protoUdf.addInputTypes(toConnectProtoType(e.dataType)))
+        protoUdf
+          .setPayload(toUdfPacketBytes(f.f, inputEncoders, outputEncoder))
+          .setOutputType(toConnectProtoType(outputEncoder.dataType))
+          .setAggregate(false)
+        f.givenName.foreach(invokeUdf.setFunctionName)
+      case f: UserDefinedAggregator[_, _, _] =>
+        val outputEncoder = agnosticEncoderFor(f.aggregator.outputEncoder)
+        val inputEncoder = agnosticEncoderFor(f.inputEncoder)
+        protoUdf
+          .setPayload(toUdfPacketBytes(f.aggregator, inputEncoder :: Nil, outputEncoder))
+          .addInputTypes(toConnectProtoType(inputEncoder.dataType))
+          .setOutputType(toConnectProtoType(outputEncoder.dataType))
+          .setAggregate(true)
+        f.givenName.foreach(invokeUdf.setFunctionName)
+    }
+    invokeUdf.build()
+  }
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
new file mode 100644
index 0000000000000..34a8a91a0ddf8
--- /dev/null
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
@@ -0,0 +1,217 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.SparkException
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.Expression
+import org.apache.spark.connect.proto.Expression.SortOrder.NullOrdering.{SORT_NULLS_FIRST, SORT_NULLS_LAST}
+import org.apache.spark.connect.proto.Expression.SortOrder.SortDirection.{SORT_DIRECTION_ASCENDING, SORT_DIRECTION_DESCENDING}
+import org.apache.spark.connect.proto.Expression.Window.WindowFrame.{FrameBoundary, FrameType}
+import org.apache.spark.sql.{Column, Encoder}
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProtoBuilder
+import org.apache.spark.sql.expressions.{Aggregator, UserDefinedAggregator, UserDefinedFunction}
+
+/**
+ * Converter for [[ColumnNode]] to [[proto.Expression]] conversions.
+ */
+object ColumnNodeToProtoConverter extends (ColumnNode => proto.Expression) {
+  def toExpr(column: Column): proto.Expression = apply(column.node, None)
+
+  def toTypedExpr[I](column: Column, encoder: Encoder[I]): proto.Expression = {
+    apply(column.node, Option(encoder))
+  }
+
+  override def apply(node: ColumnNode): Expression = apply(node, None)
+
+  private def apply(node: ColumnNode, e: Option[Encoder[_]]): proto.Expression = {
+    val builder = proto.Expression.newBuilder()
+    // TODO(SPARK-49273) support Origin in Connect Scala Client.
+    node match {
+      case Literal(value, None, _) =>
+        builder.setLiteral(toLiteralProtoBuilder(value))
+
+      case Literal(value, Some(dataType), _) =>
+        builder.setLiteral(toLiteralProtoBuilder(value, dataType))
+
+      case u @ UnresolvedAttribute(unparsedIdentifier, planId, isMetadataColumn, _) =>
+        val escapedName = u.sql
+        val b = builder.getUnresolvedAttributeBuilder
+          .setUnparsedIdentifier(escapedName)
+        if (isMetadataColumn) {
+          // We only set this field when it is needed. If we would always set it,
+          // too many of the verbatims we use for testing would have to be regenerated.
+          b.setIsMetadataColumn(true)
+        }
+        planId.foreach(b.setPlanId)
+
+      case UnresolvedStar(unparsedTarget, planId, _) =>
+        val b = builder.getUnresolvedStarBuilder
+        unparsedTarget.foreach(b.setUnparsedTarget)
+        planId.foreach(b.setPlanId)
+
+      case UnresolvedRegex(regex, planId, _) =>
+        val b = builder.getUnresolvedRegexBuilder
+          .setColName(regex)
+        planId.foreach(b.setPlanId)
+
+      case UnresolvedFunction(functionName, arguments, isDistinct, isUserDefinedFunction, _, _) =>
+        // TODO(SPARK-49087) use internal namespace.
+        builder.getUnresolvedFunctionBuilder
+          .setFunctionName(functionName)
+          .setIsUserDefinedFunction(isUserDefinedFunction)
+          .setIsDistinct(isDistinct)
+          .addAllArguments(arguments.map(apply(_, e)).asJava)
+
+      case Alias(child, name, metadata, _) =>
+        val b = builder.getAliasBuilder.setExpr(apply(child, e))
+        name.foreach(b.addName)
+        metadata.foreach(m => b.setMetadata(m.json))
+
+      case Cast(child, dataType, evalMode, _) =>
+        val b = builder.getCastBuilder
+          .setExpr(apply(child, e))
+          .setType(DataTypeProtoConverter.toConnectProtoType(dataType))
+        evalMode.foreach { mode =>
+          val convertedMode = mode match {
+            case Cast.Try => proto.Expression.Cast.EvalMode.EVAL_MODE_TRY
+            case Cast.Ansi => proto.Expression.Cast.EvalMode.EVAL_MODE_ANSI
+            case Cast.Legacy => proto.Expression.Cast.EvalMode.EVAL_MODE_LEGACY
+          }
+          b.setEvalMode(convertedMode)
+        }
+
+      case SqlExpression(expression, _) =>
+        builder.getExpressionStringBuilder.setExpression(expression)
+
+      case s: SortOrder =>
+        builder.setSortOrder(convertSortOrder(s, e))
+
+      case Window(windowFunction, windowSpec, _) =>
+        val b = builder.getWindowBuilder
+          .setWindowFunction(apply(windowFunction, e))
+          .addAllPartitionSpec(windowSpec.partitionColumns.map(apply(_, e)).asJava)
+          .addAllOrderSpec(windowSpec.sortColumns.map(convertSortOrder(_, e)).asJava)
+        windowSpec.frame.foreach { frame =>
+          b.getFrameSpecBuilder
+            .setFrameType(frame.frameType match {
+              case WindowFrame.Row => FrameType.FRAME_TYPE_ROW
+              case WindowFrame.Range => FrameType.FRAME_TYPE_RANGE
+            })
+            .setLower(convertFrameBoundary(frame.lower, e))
+            .setUpper(convertFrameBoundary(frame.upper, e))
+        }
+
+      case UnresolvedExtractValue(child, extraction, _) =>
+        builder.getUnresolvedExtractValueBuilder
+          .setChild(apply(child, e))
+          .setExtraction(apply(extraction, e))
+
+      case UpdateFields(structExpression, fieldName, valueExpression, _) =>
+        val b = builder.getUpdateFieldsBuilder
+          .setStructExpression(apply(structExpression, e))
+          .setFieldName(fieldName)
+        valueExpression.foreach(v => b.setValueExpression(apply(v, e)))
+
+      case v: UnresolvedNamedLambdaVariable =>
+        builder.setUnresolvedNamedLambdaVariable(convertNamedLambdaVariable(v))
+
+      case LambdaFunction(function, arguments, _) =>
+        builder.getLambdaFunctionBuilder
+          .setFunction(apply(function, e))
+          .addAllArguments(arguments.map(convertNamedLambdaVariable).asJava)
+
+      case InvokeInlineUserDefinedFunction(
+            a: Aggregator[Any @unchecked, Any @unchecked, Any @unchecked],
+            Nil,
+            false,
+            _) =>
+        // TODO we should probably 'just' detect this particular scenario
+        //  in the planner instead of wrapping it in a separate method.
+        val protoUdf = UdfToProtoUtils.toProto(UserDefinedAggregator(a, e.get))
+        builder.getTypedAggregateExpressionBuilder.setScalarScalaUdf(protoUdf.getScalarScalaUdf)
+
+      case InvokeInlineUserDefinedFunction(udf: UserDefinedFunction, args, false, _) =>
+        builder.setCommonInlineUserDefinedFunction(
+          UdfToProtoUtils.toProto(udf, args.map(apply(_, e))))
+
+      case CaseWhenOtherwise(branches, otherwise, _) =>
+        val b = builder.getUnresolvedFunctionBuilder
+          .setFunctionName("when")
+        branches.foreach { case (condition, value) =>
+          b.addArguments(apply(condition, e))
+          b.addArguments(apply(value, e))
+        }
+        otherwise.foreach { value =>
+          b.addArguments(apply(value, e))
+        }
+
+      case ProtoColumnNode(e, _) =>
+        return e
+
+      case node =>
+        throw SparkException.internalError("Unsupported ColumnNode: " + node)
+    }
+    builder.build()
+  }
+
+  private def convertSortOrder(
+      s: SortOrder,
+      e: Option[Encoder[_]]): proto.Expression.SortOrder = {
+    proto.Expression.SortOrder
+      .newBuilder()
+      .setChild(apply(s.child, e))
+      .setDirection(s.sortDirection match {
+        case SortOrder.Ascending => SORT_DIRECTION_ASCENDING
+        case SortOrder.Descending => SORT_DIRECTION_DESCENDING
+      })
+      .setNullOrdering(s.nullOrdering match {
+        case SortOrder.NullsFirst => SORT_NULLS_FIRST
+        case SortOrder.NullsLast => SORT_NULLS_LAST
+      })
+      .build()
+  }
+
+  private def convertFrameBoundary(
+      boundary: WindowFrame.FrameBoundary,
+      e: Option[Encoder[_]]): FrameBoundary = {
+    val builder = FrameBoundary.newBuilder()
+    boundary match {
+      case WindowFrame.UnboundedPreceding => builder.setUnbounded(true)
+      case WindowFrame.UnboundedFollowing => builder.setUnbounded(true)
+      case WindowFrame.CurrentRow => builder.setCurrentRow(true)
+      case WindowFrame.Value(value) => builder.setValue(apply(value, e))
+    }
+    builder.build()
+  }
+
+  private def convertNamedLambdaVariable(
+      v: UnresolvedNamedLambdaVariable): proto.Expression.UnresolvedNamedLambdaVariable = {
+    proto.Expression.UnresolvedNamedLambdaVariable.newBuilder().addNameParts(v.name).build()
+  }
+}
+
+case class ProtoColumnNode(
+    expr: proto.Expression,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override def sql: String = expr.toString
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala
index 556b472283a37..ada94b76fcbcd 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/package.scala
@@ -17,12 +17,6 @@
 
 package org.apache.spark
 
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
-
 package object sql {
   type DataFrame = Dataset[Row]
-
-  private[sql] def encoderFor[E: Encoder]: AgnosticEncoder[E] = {
-    implicitly[Encoder[E]].asInstanceOf[AgnosticEncoder[E]]
-  }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 789425c9daea1..2ff34a6343644 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -21,11 +21,9 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.connect.proto.Read.DataSource
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.Dataset
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.{api, DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.connect.ConnectConversions._
+import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -35,101 +33,49 @@ import org.apache.spark.sql.types.StructType
  * @since 3.5.0
  */
 @Evolving
-final class DataStreamReader private[sql] (sparkSession: SparkSession) extends Logging {
+final class DataStreamReader private[sql] (sparkSession: SparkSession)
+    extends api.DataStreamReader {
 
-  /**
-   * Specifies the input data source format.
-   *
-   * @since 3.5.0
-   */
-  def format(source: String): DataStreamReader = {
+  private val sourceBuilder = DataSource.newBuilder()
+
+  /** @inheritdoc */
+  def format(source: String): this.type = {
     sourceBuilder.setFormat(source)
     this
   }
 
-  /**
-   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
-   * automatically from data. By specifying the schema here, the underlying data source can skip
-   * the schema inference step, and thus speed up data loading.
-   *
-   * @since 3.5.0
-   */
-  def schema(schema: StructType): DataStreamReader = {
+  /** @inheritdoc */
+  def schema(schema: StructType): this.type = {
     if (schema != null) {
       sourceBuilder.setSchema(schema.json) // Use json. DDL does not retail all the attributes.
     }
     this
   }
 
-  /**
-   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON)
-   * can infer the input schema automatically from data. By specifying the schema here, the
-   * underlying data source can skip the schema inference step, and thus speed up data loading.
-   *
-   * @since 3.5.0
-   */
-  def schema(schemaString: String): DataStreamReader = {
+  /** @inheritdoc */
+  override def schema(schemaString: String): this.type = {
     sourceBuilder.setSchema(schemaString)
     this
   }
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: String): DataStreamReader = {
+  /** @inheritdoc */
+  def option(key: String, value: String): this.type = {
     sourceBuilder.putOptions(key, value)
     this
   }
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Boolean): DataStreamReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Long): DataStreamReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Double): DataStreamReader = option(key, value.toString)
-
-  /**
-   * (Scala-specific) Adds input options for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataStreamReader = {
+  /** @inheritdoc */
+  def options(options: scala.collection.Map[String, String]): this.type = {
     this.options(options.asJava)
-    this
   }
 
-  /**
-   * (Java-specific) Adds input options for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def options(options: java.util.Map[String, String]): DataStreamReader = {
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type = {
     sourceBuilder.putAllOptions(options)
     this
   }
 
-  /**
-   * Loads input data stream in as a `DataFrame`, for data streams that don't require a path (e.g.
-   * external key-value stores).
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def load(): DataFrame = {
     sparkSession.newDataFrame { relationBuilder =>
       relationBuilder.getReadBuilder
@@ -138,120 +84,14 @@ final class DataStreamReader private[sql] (sparkSession: SparkSession) extends L
     }
   }
 
-  /**
-   * Loads input in as a `DataFrame`, for data streams that read from some path.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def load(path: String): DataFrame = {
     sourceBuilder.clearPaths()
     sourceBuilder.addPaths(path)
     load()
   }
 
-  /**
-   * Loads a JSON file stream and returns the results as a `DataFrame`.
-   *
-   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `multiLine` option to true.
-   *
-   * This function goes through the input once to determine the input schema. If you know the
-   * schema in advance, use the version that specifies the schema to avoid the extra scan.
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * You can find the JSON-specific options for reading JSON file stream in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.5.0
-   */
-  def json(path: String): DataFrame = {
-    format("json").load(path)
-  }
-
-  /**
-   * Loads a CSV file stream and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * You can find the CSV-specific options for reading CSV file stream in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.5.0
-   */
-  def csv(path: String): DataFrame = format("csv").load(path)
-
-  /**
-   * Loads a XML file stream and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * You can find the XML-specific options for reading XML file stream in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 4.0.0
-   */
-  def xml(path: String): DataFrame = format("xml").load(path)
-
-  /**
-   * Loads a ORC file stream, returning the result as a `DataFrame`.
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * ORC-specific option(s) for reading ORC file stream can be found in <a href=
-   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
-   * Source Option</a> in the version you use.
-   *
-   * @since 3.5.0
-   */
-  def orc(path: String): DataFrame = format("orc").load(path)
-
-  /**
-   * Loads a Parquet file stream, returning the result as a `DataFrame`.
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * Parquet-specific option(s) for reading Parquet file stream can be found in <a href=
-   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
-   * Source Option</a> in the version you use.
-   *
-   * @since 3.5.0
-   */
-  def parquet(path: String): DataFrame = format("parquet").load(path)
-
-  /**
-   * Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
-   * support streaming mode.
-   * @param tableName
-   *   The name of the table
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
     require(tableName != null, "The table name can't be null")
     sparkSession.newDataFrame { builder =>
@@ -263,59 +103,44 @@ final class DataStreamReader private[sql] (sparkSession: SparkSession) extends L
     }
   }
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any. The text files must be encoded
-   * as UTF-8.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.readStream.text("/path/to/directory/")
-   *
-   *   // Java:
-   *   spark.readStream().text("/path/to/directory/")
-   * }}}
-   *
-   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
-   * sets the maximum number of new files to be considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
-   * be considered in every trigger.</li> </ul>
-   *
-   * You can find the text-specific options for reading text files in <a
-   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 3.5.0
-   */
-  def text(path: String): DataFrame = format("text").load(path)
-
-  /**
-   * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
-   * contains a single string column named "value". The text files must be encoded as UTF-8.
-   *
-   * If the directory structure of the text files contains partitioning information, those are
-   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
-   *
-   * By default, each line in the text file is a new element in the resulting Dataset. For
-   * example:
-   * {{{
-   *   // Scala:
-   *   spark.readStream.textFile("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.readStream().textFile("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can set the text-specific options as specified in `DataStreamReader.text`.
-   *
-   * @param path
-   *   input path
-   * @since 3.5.0
-   */
-  def textFile(path: String): Dataset[String] = {
-    text(path).select("value").as[String](StringEncoder)
+  override protected def assertNoSpecifiedSchema(operation: String): Unit = {
+    if (sourceBuilder.hasSchema) {
+      throw DataTypeErrors.userSpecifiedSchemaUnsupportedError(operation)
+    }
   }
 
-  private val sourceBuilder = DataSource.newBuilder()
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Covariant overrides.
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def json(path: String): DataFrame = super.json(path)
+
+  /** @inheritdoc */
+  override def csv(path: String): DataFrame = super.csv(path)
+
+  /** @inheritdoc */
+  override def xml(path: String): DataFrame = super.xml(path)
+
+  /** @inheritdoc */
+  override def orc(path: String): DataFrame = super.orc(path)
+
+  /** @inheritdoc */
+  override def parquet(path: String): DataFrame = super.parquet(path)
+
+  /** @inheritdoc */
+  override def text(path: String): DataFrame = super.text(path)
+
+  /** @inheritdoc */
+  override def textFile(path: String): Dataset[String] = super.textFile(path)
+
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index fe68f3cb0b572..9fcc31e562682 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -29,9 +29,8 @@ import org.apache.spark.api.java.function.VoidFunction2
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.Command
 import org.apache.spark.connect.proto.WriteStreamOperationStart
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Dataset, ForeachWriter}
-import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, UdfUtils}
+import org.apache.spark.sql.{api, Dataset, ForeachWriter}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket}
 import org.apache.spark.sql.execution.streaming.AvailableNowTrigger
 import org.apache.spark.sql.execution.streaming.ContinuousTrigger
 import org.apache.spark.sql.execution.streaming.OneTimeTrigger
@@ -47,63 +46,23 @@ import org.apache.spark.util.SparkSerDeUtils
  * @since 3.5.0
  */
 @Evolving
-final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
+final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends api.DataStreamWriter[T] {
+  override type DS[U] = Dataset[U]
 
-  /**
-   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink. <ul> <li>
-   * `OutputMode.Append()`: only the new rows in the streaming DataFrame/Dataset will be written
-   * to the sink.</li> <li> `OutputMode.Complete()`: all the rows in the streaming
-   * DataFrame/Dataset will be written to the sink every time there are some updates.</li> <li>
-   * `OutputMode.Update()`: only the rows that were updated in the streaming DataFrame/Dataset
-   * will be written to the sink every time there are some updates. If the query doesn't contain
-   * aggregations, it will be equivalent to `OutputMode.Append()` mode.</li> </ul>
-   *
-   * @since 3.5.0
-   */
-  def outputMode(outputMode: OutputMode): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def outputMode(outputMode: OutputMode): this.type = {
     sinkBuilder.setOutputMode(outputMode.toString.toLowerCase(Locale.ROOT))
     this
   }
 
-  /**
-   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink. <ul> <li>
-   * `append`: only the new rows in the streaming DataFrame/Dataset will be written to the
-   * sink.</li> <li> `complete`: all the rows in the streaming DataFrame/Dataset will be written
-   * to the sink every time there are some updates.</li> <li> `update`: only the rows that were
-   * updated in the streaming DataFrame/Dataset will be written to the sink every time there are
-   * some updates. If the query doesn't contain aggregations, it will be equivalent to `append`
-   * mode.</li> </ul>
-   *
-   * @since 3.5.0
-   */
-  def outputMode(outputMode: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def outputMode(outputMode: String): this.type = {
     sinkBuilder.setOutputMode(outputMode)
     this
   }
 
-  /**
-   * Set the trigger for the stream query. The default value is `ProcessingTime(0)` and it will
-   * run the query as fast as possible.
-   *
-   * Scala Example:
-   * {{{
-   *   df.writeStream.trigger(ProcessingTime("10 seconds"))
-   *
-   *   import scala.concurrent.duration._
-   *   df.writeStream.trigger(ProcessingTime(10.seconds))
-   * }}}
-   *
-   * Java Example:
-   * {{{
-   *   df.writeStream().trigger(ProcessingTime.create("10 seconds"))
-   *
-   *   import java.util.concurrent.TimeUnit
-   *   df.writeStream().trigger(ProcessingTime.create(10, TimeUnit.SECONDS))
-   * }}}
-   *
-   * @since 3.5.0
-   */
-  def trigger(trigger: Trigger): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def trigger(trigger: Trigger): this.type = {
     trigger match {
       case ProcessingTimeTrigger(intervalMs) =>
         sinkBuilder.setProcessingTimeInterval(s"$intervalMs milliseconds")
@@ -117,106 +76,54 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
     this
   }
 
-  /**
-   * Specifies the name of the [[StreamingQuery]] that can be started with `start()`. This name
-   * must be unique among all the currently active queries in the associated SQLContext.
-   *
-   * @since 3.5.0
-   */
-  def queryName(queryName: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def queryName(queryName: String): this.type = {
     sinkBuilder.setQueryName(queryName)
     this
   }
 
-  /**
-   * Specifies the underlying output data source.
-   *
-   * @since 3.5.0
-   */
-  def format(source: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def format(source: String): this.type = {
     sinkBuilder.setFormat(source)
     this
   }
 
-  /**
-   * Partitions the output by the given columns on the file system. If specified, the output is
-   * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
-   * partition a dataset by year and then month, the directory layout would look like:
-   *
-   * <ul> <li> year=2016/month=01/</li> <li> year=2016/month=02/</li> </ul>
-   *
-   * Partitioning is one of the most widely used techniques to optimize physical data layout. It
-   * provides a coarse-grained index for skipping unnecessary data reads when queries have
-   * predicates on the partitioned columns. In order for partitioning to work well, the number of
-   * distinct values in each column should typically be less than tens of thousands.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def partitionBy(colNames: String*): DataStreamWriter[T] = {
+  def partitionBy(colNames: String*): this.type = {
     sinkBuilder.clearPartitioningColumnNames()
     sinkBuilder.addAllPartitioningColumnNames(colNames.asJava)
     this
   }
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: String): DataStreamWriter[T] = {
-    sinkBuilder.putOptions(key, value)
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  def clusterBy(colNames: String*): this.type = {
+    sinkBuilder.clearClusteringColumnNames()
+    sinkBuilder.addAllClusteringColumnNames(colNames.asJava)
     this
   }
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Boolean): DataStreamWriter[T] = option(key, value.toString)
-
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Long): DataStreamWriter[T] = option(key, value.toString)
-
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def option(key: String, value: Double): DataStreamWriter[T] = option(key, value.toString)
+  /** @inheritdoc */
+  def option(key: String, value: String): this.type = {
+    sinkBuilder.putOptions(key, value)
+    this
+  }
 
-  /**
-   * (Scala-specific) Adds output options for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def options(options: scala.collection.Map[String, String]): this.type = {
     this.options(options.asJava)
     this
   }
 
-  /**
-   * Adds output options for the underlying data source.
-   *
-   * @since 3.5.0
-   */
-  def options(options: java.util.Map[String, String]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def options(options: java.util.Map[String, String]): this.type = {
     sinkBuilder.putAllOptions(options)
     this
   }
 
-  /**
-   * Sets the output of the streaming query to be processed using the provided writer object.
-   * object. See [[org.apache.spark.sql.ForeachWriter]] for more details on the lifecycle and
-   * semantics.
-   * @since 3.5.0
-   */
-  def foreach(writer: ForeachWriter[T]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def foreach(writer: ForeachWriter[T]): this.type = {
     val serialized = SparkSerDeUtils.serialize(ForeachWriterPacket(writer, ds.agnosticEncoder))
     val scalaWriterBuilder = proto.ScalarScalaUDF
       .newBuilder()
@@ -225,21 +132,9 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
     this
   }
 
-  /**
-   * :: Experimental ::
-   *
-   * (Scala-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only in the micro-batch execution modes (that is, when the
-   * trigger is not continuous). In every micro-batch, the provided function will be called in
-   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. The
-   * batchId can be used to deduplicate and transactionally write the output (that is, the
-   * provided Dataset) to external systems. The output Dataset is guaranteed to be exactly the
-   * same for the same batchId (assuming all operations are deterministic in the query).
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @Evolving
-  def foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T] = {
+  def foreachBatch(function: (Dataset[T], Long) => Unit): this.type = {
     val serializedFn = SparkSerDeUtils.serialize(function)
     sinkBuilder.getForeachBatchBuilder.getScalaFunctionBuilder
       .setPayload(ByteString.copyFrom(serializedFn))
@@ -248,48 +143,13 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
     this
   }
 
-  /**
-   * :: Experimental ::
-   *
-   * (Java-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only in the micro-batch execution modes (that is, when the
-   * trigger is not continuous). In every micro-batch, the provided function will be called in
-   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. The
-   * batchId can be used to deduplicate and transactionally write the output (that is, the
-   * provided Dataset) to external systems. The output Dataset is guaranteed to be exactly the
-   * same for the same batchId (assuming all operations are deterministic in the query).
-   *
-   * @since 3.5.0
-   */
-  @Evolving
-  def foreachBatch(function: VoidFunction2[Dataset[T], java.lang.Long]): DataStreamWriter[T] = {
-    foreachBatch(UdfUtils.foreachBatchFuncToScalaFunc(function))
-  }
-
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the
-   * given path as new data arrives. The returned [[StreamingQuery]] object can be used to
-   * interact with the stream.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def start(path: String): StreamingQuery = {
     sinkBuilder.setPath(path)
     start()
   }
 
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the
-   * given path as new data arrives. The returned [[StreamingQuery]] object can be used to
-   * interact with the stream. Throws a `TimeoutException` if the following conditions are met:
-   *   - Another run of the same streaming query, that is a streaming query sharing the same
-   *     checkpoint location, is already active on the same Spark Driver
-   *   - The SQL configuration `spark.sql.streaming.stopActiveRunOnRestart` is enabled
-   *   - The active run cannot be stopped within the timeout controlled by the SQL configuration
-   *     `spark.sql.streaming.stopTimeout`
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @throws[TimeoutException]
   def start(): StreamingQuery = {
     val startCmd = Command
@@ -306,22 +166,7 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
     RemoteStreamingQuery.fromStartCommandResponse(ds.sparkSession, resp)
   }
 
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the
-   * given table as new data arrives. The returned [[StreamingQuery]] object can be used to
-   * interact with the stream.
-   *
-   * For v1 table, partitioning columns provided by `partitionBy` will be respected no matter the
-   * table exists or not. A new table will be created if the table not exists.
-   *
-   * For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will
-   * be respected only if the v2 table does not exist. Besides, the v2 table created by this API
-   * lacks some functionalities (e.g., customized properties, options, and serde info). If you
-   * need them, please create the v2 table manually before the execution to avoid creating a table
-   * with incomplete information.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @Evolving
   @throws[TimeoutException]
   def toTable(tableName: String): StreamingQuery = {
@@ -329,6 +174,24 @@ final class DataStreamWriter[T] private[sql] (ds: Dataset[T]) extends Logging {
     start()
   }
 
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Covariant Overrides
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  @Evolving
+  override def foreachBatch(function: VoidFunction2[Dataset[T], java.lang.Long]): this.type =
+    super.foreachBatch(function)
+
   private val sinkBuilder = WriteStreamOperationStart
     .newBuilder()
     .setInput(ds.plan.getRoot)
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 13a26fa79085e..29fbcc443deb9 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -18,166 +18,21 @@
 package org.apache.spark.sql.streaming
 
 import java.util.UUID
-import java.util.concurrent.TimeoutException
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.annotation.Evolving
 import org.apache.spark.connect.proto.Command
 import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.connect.proto.StreamingQueryCommand
 import org.apache.spark.connect.proto.StreamingQueryCommandResult
 import org.apache.spark.connect.proto.StreamingQueryManagerCommandResult.StreamingQueryInstance
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{api, SparkSession}
 
-/**
- * A handle to a query that is executing continuously in the background as new data arrives. All
- * these methods are thread-safe.
- * @since 3.5.0
- */
-@Evolving
-trait StreamingQuery {
-  // This is a copy of StreamingQuery in sql/core/.../streaming/StreamingQuery.scala
-
-  /**
-   * Returns the user-specified name of the query, or null if not specified. This name can be
-   * specified in the `org.apache.spark.sql.streaming.DataStreamWriter` as
-   * `dataframe.writeStream.queryName("query").start()`. This name, if set, must be unique across
-   * all active queries.
-   *
-   * @since 3.5.0
-   */
-  def name: String
-
-  /**
-   * Returns the unique id of this query that persists across restarts from checkpoint data. That
-   * is, this id is generated when a query is started for the first time, and will be the same
-   * every time it is restarted from checkpoint data. Also see [[runId]].
-   *
-   * @since 3.5.0
-   */
-  def id: UUID
-
-  /**
-   * Returns the unique id of this run of the query. That is, every start/restart of a query will
-   * generate a unique runId. Therefore, every time a query is restarted from checkpoint, it will
-   * have the same [[id]] but different [[runId]]s.
-   */
-  def runId: UUID
-
-  /**
-   * Returns the `SparkSession` associated with `this`.
-   *
-   * @since 3.5.0
-   */
-  def sparkSession: SparkSession
-
-  /**
-   * Returns `true` if this query is actively running.
-   *
-   * @since 3.5.0
-   */
-  def isActive: Boolean
-
-  /**
-   * Returns the [[StreamingQueryException]] if the query was terminated by an exception.
-   * @since 3.5.0
-   */
-  def exception: Option[StreamingQueryException]
-
-  /**
-   * Returns the current status of the query.
-   *
-   * @since 3.5.0
-   */
-  def status: StreamingQueryStatus
-
-  /**
-   * Returns an array of the most recent [[StreamingQueryProgress]] updates for this query. The
-   * number of progress updates retained for each stream is configured by Spark session
-   * configuration `spark.sql.streaming.numRecentProgressUpdates`.
-   *
-   * @since 3.5.0
-   */
-  def recentProgress: Array[StreamingQueryProgress]
-
-  /**
-   * Returns the most recent [[StreamingQueryProgress]] update of this streaming query.
-   *
-   * @since 3.5.0
-   */
-  def lastProgress: StreamingQueryProgress
-
-  /**
-   * Waits for the termination of `this` query, either by `query.stop()` or by an exception. If
-   * the query has terminated with an exception, then the exception will be thrown.
-   *
-   * If the query has terminated, then all subsequent calls to this method will either return
-   * immediately (if the query was terminated by `stop()`), or throw the exception immediately (if
-   * the query has terminated with exception).
-   *
-   * @throws StreamingQueryException
-   *   if the query has terminated with an exception.
-   * @since 3.5.0
-   */
-  @throws[StreamingQueryException]
-  def awaitTermination(): Unit
-
-  /**
-   * Waits for the termination of `this` query, either by `query.stop()` or by an exception. If
-   * the query has terminated with an exception, then the exception will be thrown. Otherwise, it
-   * returns whether the query has terminated or not within the `timeoutMs` milliseconds.
-   *
-   * If the query has terminated, then all subsequent calls to this method will either return
-   * `true` immediately (if the query was terminated by `stop()`), or throw the exception
-   * immediately (if the query has terminated with exception).
-   *
-   * @throws StreamingQueryException
-   *   if the query has terminated with an exception
-   * @since 3.5.0
-   */
-  @throws[StreamingQueryException]
-  def awaitTermination(timeoutMs: Long): Boolean
-
-  /**
-   * Blocks until all available data in the source has been processed and committed to the sink.
-   * This method is intended for testing. Note that in the case of continually arriving data, this
-   * method may block forever. Additionally, this method is only guaranteed to block until data
-   * that has been synchronously appended data to a
-   * `org.apache.spark.sql.execution.streaming.Source` prior to invocation. (i.e. `getOffset` must
-   * immediately reflect the addition).
-   * @since 3.5.0
-   */
-  def processAllAvailable(): Unit
-
-  /**
-   * Stops the execution of this query if it is running. This waits until the termination of the
-   * query execution threads or until a timeout is hit.
-   *
-   * By default stop will block indefinitely. You can configure a timeout by the configuration
-   * `spark.sql.streaming.stopTimeout`. A timeout of 0 (or negative) milliseconds will block
-   * indefinitely. If a `TimeoutException` is thrown, users can retry stopping the stream. If the
-   * issue persists, it is advisable to kill the Spark application.
-   *
-   * @since 3.5.0
-   */
-  @throws[TimeoutException]
-  def stop(): Unit
-
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   * @since 3.5.0
-   */
-  def explain(): Unit
-
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   *
-   * @param extended
-   *   whether to do extended explain or not
-   * @since 3.5.0
-   */
-  def explain(extended: Boolean): Unit
+/** @inheritdoc */
+trait StreamingQuery extends api.StreamingQuery {
+
+  /** @inheritdoc */
+  override def sparkSession: SparkSession
 }
 
 class RemoteStreamingQuery(
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 7efced227d6d1..647d29c714dbb 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -27,7 +27,7 @@ import org.apache.spark.connect.proto.Command
 import org.apache.spark.connect.proto.StreamingQueryManagerCommand
 import org.apache.spark.connect.proto.StreamingQueryManagerCommandResult
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.{api, SparkSession}
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 
 /**
@@ -36,7 +36,9 @@ import org.apache.spark.sql.connect.common.InvalidPlanInput
  * @since 3.5.0
  */
 @Evolving
-class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Logging {
+class StreamingQueryManager private[sql] (sparkSession: SparkSession)
+    extends api.StreamingQueryManager
+    with Logging {
 
   // Mapping from id to StreamingQueryListener. There's another mapping from id to
   // StreamingQueryListener on server side. This is used by removeListener() to find the id
@@ -53,29 +55,17 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     streamingQueryListenerBus.close()
   }
 
-  /**
-   * Returns a list of active queries associated with this SQLContext
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def active: Array[StreamingQuery] = {
     executeManagerCmd(_.setActive(true)).getActive.getActiveQueriesList.asScala.map { q =>
       RemoteStreamingQuery.fromStreamingQueryInstanceResponse(sparkSession, q)
     }.toArray
   }
 
-  /**
-   * Returns the query if there is an active query with the given id, or null.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def get(id: UUID): StreamingQuery = get(id.toString)
 
-  /**
-   * Returns the query if there is an active query with the given id, or null.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def get(id: String): StreamingQuery = {
     val response = executeManagerCmd(_.setGetQuery(id))
     if (response.hasQuery) {
@@ -85,52 +75,13 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
     }
   }
 
-  /**
-   * Wait until any of the queries on the associated SQLContext has terminated since the creation
-   * of the context, or since `resetTerminated()` was called. If any query was terminated with an
-   * exception, then the exception will be thrown.
-   *
-   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
-   * return immediately (if the query was terminated by `query.stop()`), or throw the exception
-   * immediately (if the query was terminated with exception). Use `resetTerminated()` to clear
-   * past terminations and wait for new terminations.
-   *
-   * In the case where multiple queries have terminated since `resetTermination()` was called, if
-   * any query has terminated with exception, then `awaitAnyTermination()` will throw any of the
-   * exception. For correctly documenting exceptions across multiple queries, users need to stop
-   * all of them after any of them terminates with exception, and then check the
-   * `query.exception()` for each query.
-   *
-   * @throws StreamingQueryException
-   *   if any query has terminated with an exception
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @throws[StreamingQueryException]
   def awaitAnyTermination(): Unit = {
     executeManagerCmd(_.getAwaitAnyTerminationBuilder.build())
   }
 
-  /**
-   * Wait until any of the queries on the associated SQLContext has terminated since the creation
-   * of the context, or since `resetTerminated()` was called. Returns whether any query has
-   * terminated or not (multiple may have terminated). If any query has terminated with an
-   * exception, then the exception will be thrown.
-   *
-   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
-   * return `true` immediately (if the query was terminated by `query.stop()`), or throw the
-   * exception immediately (if the query was terminated with exception). Use `resetTerminated()`
-   * to clear past terminations and wait for new terminations.
-   *
-   * In the case where multiple queries have terminated since `resetTermination()` was called, if
-   * any query has terminated with exception, then `awaitAnyTermination()` will throw any of the
-   * exception. For correctly documenting exceptions across multiple queries, users need to stop
-   * all of them after any of them terminates with exception, and then check the
-   * `query.exception()` for each query.
-   *
-   * @throws StreamingQueryException
-   *   if any query has terminated with an exception
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   @throws[StreamingQueryException]
   def awaitAnyTermination(timeoutMs: Long): Boolean = {
     require(timeoutMs > 0, "Timeout has to be positive")
@@ -139,40 +90,22 @@ class StreamingQueryManager private[sql] (sparkSession: SparkSession) extends Lo
         timeoutMs)).getAwaitAnyTermination.getTerminated
   }
 
-  /**
-   * Forget about past terminated queries so that `awaitAnyTermination()` can be used again to
-   * wait for new terminations.
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def resetTerminated(): Unit = {
     executeManagerCmd(_.setResetTerminated(true))
   }
 
-  /**
-   * Register a [[StreamingQueryListener]] to receive up-calls for life cycle events of
-   * [[StreamingQuery]].
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def addListener(listener: StreamingQueryListener): Unit = {
     streamingQueryListenerBus.append(listener)
   }
 
-  /**
-   * Deregister a [[StreamingQueryListener]].
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def removeListener(listener: StreamingQueryListener): Unit = {
     streamingQueryListenerBus.remove(listener)
   }
 
-  /**
-   * List all [[StreamingQueryListener]]s attached to this [[StreamingQueryManager]].
-   *
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def listListeners(): Array[StreamingQueryListener] = {
     streamingQueryListenerBus.list()
   }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
index 0e3a683d2701d..ce552bdd4f0f0 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CatalogSuite.scala
@@ -69,7 +69,7 @@ class CatalogSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelpe
       val exception = intercept[SparkException] {
         spark.catalog.setCurrentCatalog("notExists")
       }
-      assert(exception.getErrorClass == "CATALOG_NOT_FOUND")
+      assert(exception.getCondition == "CATALOG_NOT_FOUND")
       spark.catalog.setCurrentCatalog("testcat")
       assert(spark.catalog.currentCatalog().equals("testcat"))
       val catalogsAfterChange = spark.catalog.listCatalogs().collect()
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala
index e57b051890f56..0d9685d9c710f 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/CheckpointSuite.scala
@@ -27,6 +27,7 @@ import org.scalatest.exceptions.TestFailedDueToTimeoutException
 import org.apache.spark.SparkException
 import org.apache.spark.connect.proto
 import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession, SQLHelper}
+import org.apache.spark.storage.StorageLevel
 
 class CheckpointSuite extends ConnectFunSuite with RemoteSparkSession with SQLHelper {
 
@@ -50,12 +51,20 @@ class CheckpointSuite extends ConnectFunSuite with RemoteSparkSession with SQLHe
     checkFragments(captureStdOut(block), fragmentsToCheck)
   }
 
-  test("checkpoint") {
+  test("localCheckpoint") {
     val df = spark.range(100).localCheckpoint()
     testCapturedStdOut(df.explain(), "ExistingRDD")
   }
 
-  test("checkpoint gc") {
+  test("localCheckpoint with StorageLevel") {
+    // We don't have a way to reach into the server and assert the storage level server side, but
+    // this test should cover for unexpected errors in the API.
+    val df =
+      spark.range(100).localCheckpoint(eager = true, storageLevel = StorageLevel.DISK_ONLY)
+    df.collect()
+  }
+
+  test("localCheckpoint gc") {
     val df = spark.range(100).localCheckpoint(eager = true)
     val encoder = df.agnosticEncoder
     val dfId = df.plan.getRoot.getCachedRemoteRelation.getRelationId
@@ -77,7 +86,7 @@ class CheckpointSuite extends ConnectFunSuite with RemoteSparkSession with SQLHe
 
   // This test is flaky because cannot guarantee GC
   // You can locally run this to verify the behavior.
-  ignore("checkpoint gc derived DataFrame") {
+  ignore("localCheckpoint gc derived DataFrame") {
     var df1 = spark.range(100).localCheckpoint(eager = true)
     var derived = df1.repartition(10)
     val encoder = df1.agnosticEncoder
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
index 88281352f2479..84ed624a95214 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDataFrameStatSuite.scala
@@ -251,16 +251,16 @@ class ClientDataFrameStatSuite extends ConnectFunSuite with RemoteSparkSession {
     val error1 = intercept[AnalysisException] {
       df.stat.bloomFilter("id", -1000, 100)
     }
-    assert(error1.getErrorClass === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
+    assert(error1.getCondition === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
 
     val error2 = intercept[AnalysisException] {
       df.stat.bloomFilter("id", 1000, -100)
     }
-    assert(error2.getErrorClass === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
+    assert(error2.getCondition === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
 
     val error3 = intercept[AnalysisException] {
       df.stat.bloomFilter("id", 1000, -1.0)
     }
-    assert(error3.getErrorClass === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
+    assert(error3.getCondition === "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE")
   }
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
index 9d6f07cf603aa..04367d3b95f14 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientDatasetSuite.scala
@@ -71,33 +71,46 @@ class ClientDatasetSuite extends ConnectFunSuite with BeforeAndAfterEach {
   test("write") {
     val df = ss.newDataFrame(_ => ()).limit(10)
 
-    val builder = proto.WriteOperation.newBuilder()
-    builder
+    def toPlan(builder: proto.WriteOperation.Builder): proto.Plan = {
+      proto.Plan
+        .newBuilder()
+        .setCommand(proto.Command.newBuilder().setWriteOperation(builder))
+        .build()
+    }
+
+    val builder = proto.WriteOperation
+      .newBuilder()
       .setInput(df.plan.getRoot)
       .setPath("my/test/path")
       .setMode(proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS)
       .setSource("parquet")
-      .addSortColumnNames("col1")
-      .addPartitioningColumns("col99")
-      .setBucketBy(
-        proto.WriteOperation.BucketBy
-          .newBuilder()
-          .setNumBuckets(2)
-          .addBucketColumnNames("col1")
-          .addBucketColumnNames("col2"))
 
-    val expectedPlan = proto.Plan
-      .newBuilder()
-      .setCommand(proto.Command.newBuilder().setWriteOperation(builder))
-      .build()
+    val partitionedPlan = toPlan(
+      builder
+        .clone()
+        .addSortColumnNames("col1")
+        .addPartitioningColumns("col99")
+        .setBucketBy(
+          proto.WriteOperation.BucketBy
+            .newBuilder()
+            .setNumBuckets(2)
+            .addBucketColumnNames("col1")
+            .addBucketColumnNames("col2")))
 
     df.write
       .sortBy("col1")
       .partitionBy("col99")
       .bucketBy(2, "col1", "col2")
       .parquet("my/test/path")
-    val actualPlan = service.getAndClearLatestInputPlan()
-    assert(actualPlan.equals(expectedPlan))
+    val actualPartionedPlan = service.getAndClearLatestInputPlan()
+    assert(actualPartionedPlan.equals(partitionedPlan))
+
+    val clusteredPlan = toPlan(builder.clone().addClusteringColumns("col3"))
+    df.write
+      .clusterBy("col3")
+      .parquet("my/test/path")
+    val actualClusteredPlan = service.getAndClearLatestInputPlan()
+    assert(actualClusteredPlan.equals(clusteredPlan))
   }
 
   test("write jdbc") {
@@ -134,8 +147,9 @@ class ClientDatasetSuite extends ConnectFunSuite with BeforeAndAfterEach {
     builder
       .setInput(df.plan.getRoot)
       .setTableName("t1")
-      .addPartitioningColumns(col("col99").expr)
+      .addPartitioningColumns(toExpr(col("col99")))
       .setProvider("json")
+      .addClusteringColumns("col3")
       .putTableProperties("key", "value")
       .putOptions("key2", "value2")
       .setMode(proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
@@ -147,6 +161,7 @@ class ClientDatasetSuite extends ConnectFunSuite with BeforeAndAfterEach {
 
     df.writeTo("t1")
       .partitionedBy(col("col99"))
+      .clusterBy("col3")
       .using("json")
       .tableProperty("key", "value")
       .options(Map("key2" -> "value2"))
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
index a66f790e08740..0371981b728d1 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ClientE2ETestSuite.scala
@@ -23,7 +23,7 @@ import java.util.Properties
 
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
-import scala.concurrent.duration.DurationInt
+import scala.concurrent.duration.{DurationInt, FiniteDuration}
 import scala.jdk.CollectionConverters._
 
 import org.apache.commons.io.FileUtils
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException,
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
 import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.catalyst.parser.ParseException
-import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult}
+import org.apache.spark.sql.connect.client.{RetryPolicy, SparkConnectClient, SparkResult}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.test.{ConnectFunSuite, IntegrationTestUtils, RemoteSparkSession, SQLHelper}
@@ -95,7 +95,7 @@ class ClientE2ETestSuite
             .collect()
         }
         assert(
-          ex.getErrorClass ===
+          ex.getCondition ===
             "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER")
         assert(
           ex.getMessageParameters.asScala == Map(
@@ -122,12 +122,12 @@ class ClientE2ETestSuite
         Seq("1").toDS().withColumn("udf_val", throwException($"value")).collect()
       }
 
-      assert(ex.getErrorClass != null)
+      assert(ex.getCondition != null)
       assert(!ex.getMessageParameters.isEmpty)
       assert(ex.getCause.isInstanceOf[SparkException])
 
       val cause = ex.getCause.asInstanceOf[SparkException]
-      assert(cause.getErrorClass == null)
+      assert(cause.getCondition == null)
       assert(cause.getMessageParameters.isEmpty)
       assert(cause.getMessage.contains("test" * 10000))
     }
@@ -141,7 +141,7 @@ class ClientE2ETestSuite
         val ex = intercept[AnalysisException] {
           spark.sql("select x").collect()
         }
-        assert(ex.getErrorClass != null)
+        assert(ex.getCondition != null)
         assert(!ex.messageParameters.isEmpty)
         assert(ex.getSqlState != null)
         assert(!ex.isInternalError)
@@ -169,14 +169,14 @@ class ClientE2ETestSuite
     val ex = intercept[NoSuchNamespaceException] {
       spark.sql("use database123")
     }
-    assert(ex.getErrorClass != null)
+    assert(ex.getCondition != null)
   }
 
   test("table not found for spark.catalog.getTable") {
     val ex = intercept[AnalysisException] {
       spark.catalog.getTable("test_table")
     }
-    assert(ex.getErrorClass != null)
+    assert(ex.getCondition != null)
   }
 
   test("throw NamespaceAlreadyExistsException") {
@@ -185,7 +185,7 @@ class ClientE2ETestSuite
       val ex = intercept[NamespaceAlreadyExistsException] {
         spark.sql("create database test_db")
       }
-      assert(ex.getErrorClass != null)
+      assert(ex.getCondition != null)
     } finally {
       spark.sql("drop database test_db")
     }
@@ -197,7 +197,7 @@ class ClientE2ETestSuite
       val ex = intercept[TempTableAlreadyExistsException] {
         spark.sql("create temporary view test_view as select 1")
       }
-      assert(ex.getErrorClass != null)
+      assert(ex.getCondition != null)
     } finally {
       spark.sql("drop view test_view")
     }
@@ -209,7 +209,7 @@ class ClientE2ETestSuite
       val ex = intercept[TableAlreadyExistsException] {
         spark.sql(s"create table testcat.test_table (id int)")
       }
-      assert(ex.getErrorClass != null)
+      assert(ex.getCondition != null)
     }
   }
 
@@ -217,7 +217,7 @@ class ClientE2ETestSuite
     val ex = intercept[ParseException] {
       spark.sql("selet 1").collect()
     }
-    assert(ex.getErrorClass != null)
+    assert(ex.getCondition != null)
     assert(!ex.messageParameters.isEmpty)
     assert(ex.getSqlState != null)
     assert(!ex.isInternalError)
@@ -308,6 +308,7 @@ class ClientE2ETestSuite
     val testDataPath = java.nio.file.Paths
       .get(
         IntegrationTestUtils.sparkHome,
+        "sql",
         "connect",
         "common",
         "src",
@@ -347,6 +348,7 @@ class ClientE2ETestSuite
     val testDataPath = java.nio.file.Paths
       .get(
         IntegrationTestUtils.sparkHome,
+        "sql",
         "connect",
         "common",
         "src",
@@ -377,6 +379,7 @@ class ClientE2ETestSuite
     val testDataPath = java.nio.file.Paths
       .get(
         IntegrationTestUtils.sparkHome,
+        "sql",
         "connect",
         "common",
         "src",
@@ -1563,6 +1566,25 @@ class ClientE2ETestSuite
     val result = df.select(trim(col("col"), " ").as("trimmed_col")).collect()
     assert(result sameElements Array(Row("a"), Row("b"), Row("c")))
   }
+
+  test("SPARK-49673: new batch size, multiple batches") {
+    val maxBatchSize = spark.conf.get("spark.connect.grpc.arrow.maxBatchSize").dropRight(1).toInt
+    // Adjust client grpcMaxMessageSize to maxBatchSize (10MiB; set in RemoteSparkSession config)
+    val sparkWithLowerMaxMessageSize = SparkSession
+      .builder()
+      .client(
+        SparkConnectClient
+          .builder()
+          .userId("test")
+          .port(port)
+          .grpcMaxMessageSize(maxBatchSize)
+          .retryPolicy(RetryPolicy
+            .defaultPolicy()
+            .copy(maxRetries = Some(10), maxBackoff = Some(FiniteDuration(30, "s"))))
+          .build())
+      .create()
+    assert(sparkWithLowerMaxMessageSize.range(maxBatchSize).collect().length == maxBatchSize)
+  }
 }
 
 private[sql] case class ClassData(a: String, b: Int)
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala
index 0fb6894e457ae..86c7a20136851 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/ColumnTestSuite.scala
@@ -170,11 +170,11 @@ class ColumnTestSuite extends ConnectFunSuite {
     val x = fn.col("a") + fn.col("b")
     val explain1 = captureStdOut(x.explain(false))
     val explain2 = captureStdOut(x.explain(true))
-    assert(explain1 == explain2)
-    val expectedFragments = Seq("unresolved_function", "function_name: \"+\"", "arguments")
-    expectedFragments.foreach { fragment =>
-      assert(explain1.contains(fragment))
-    }
+    assert(explain1 != explain2)
+    assert(explain1.strip() == "+(a, b)")
+    assert(explain2.contains("UnresolvedFunction(+"))
+    assert(explain2.contains("UnresolvedAttribute(List(a"))
+    assert(explain2.contains("UnresolvedAttribute(List(b"))
   }
 
   private def testColName(dataType: DataType, f: ColumnName => StructField): Unit = {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
new file mode 100644
index 0000000000000..4c0357a3ed984
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
@@ -0,0 +1,271 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.{QueryTest, RemoteSparkSession}
+
+class DataFrameTableValuedFunctionsSuite extends QueryTest with RemoteSparkSession {
+
+  test("explode") {
+    val actual1 = spark.tvf.explode(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM explode(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.explode(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM explode(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.explode(array())
+    val expected3 = spark.sql("SELECT * FROM explode(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.explode(map())
+    val expected4 = spark.sql("SELECT * FROM explode(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.explode(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM explode(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.explode(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM explode(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("explode_outer") {
+    val actual1 = spark.tvf.explode_outer(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM explode_outer(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.explode_outer(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM explode_outer(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.explode_outer(array())
+    val expected3 = spark.sql("SELECT * FROM explode_outer(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.explode_outer(map())
+    val expected4 = spark.sql("SELECT * FROM explode_outer(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.explode_outer(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM explode_outer(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.explode_outer(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM explode_outer(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("inline") {
+    val actual1 = spark.tvf.inline(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
+    val expected1 = spark.sql("SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b')))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.inline(array().cast("array<struct<a:int,b:int>>"))
+    val expected2 = spark.sql("SELECT * FROM inline(array() :: array<struct<a:int,b:int>>)")
+    checkAnswer(actual2, expected2)
+
+    val actual3 = spark.tvf.inline(
+      array(
+        named_struct(lit("a"), lit(1), lit("b"), lit(2)),
+        lit(null),
+        named_struct(lit("a"), lit(3), lit("b"), lit(4))))
+    val expected3 = spark.sql(
+      "SELECT * FROM " +
+        "inline(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))")
+    checkAnswer(actual3, expected3)
+  }
+
+  test("inline_outer") {
+    val actual1 =
+      spark.tvf.inline_outer(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
+    val expected1 = spark.sql("SELECT * FROM inline_outer(array(struct(1, 'a'), struct(2, 'b')))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.inline_outer(array().cast("array<struct<a:int,b:int>>"))
+    val expected2 = spark.sql("SELECT * FROM inline_outer(array() :: array<struct<a:int,b:int>>)")
+    checkAnswer(actual2, expected2)
+
+    val actual3 = spark.tvf.inline_outer(
+      array(
+        named_struct(lit("a"), lit(1), lit("b"), lit(2)),
+        lit(null),
+        named_struct(lit("a"), lit(3), lit("b"), lit(4))))
+    val expected3 = spark.sql(
+      "SELECT * FROM " +
+        "inline_outer(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))")
+    checkAnswer(actual3, expected3)
+  }
+
+  test("json_tuple") {
+    val actual = spark.tvf.json_tuple(lit("""{"a":1,"b":2}"""), lit("a"), lit("b"))
+    val expected = spark.sql("""SELECT * FROM json_tuple('{"a":1,"b":2}', 'a', 'b')""")
+    checkAnswer(actual, expected)
+
+    val ex = intercept[AnalysisException] {
+      spark.tvf.json_tuple(lit("""{"a":1,"b":2}""")).collect()
+    }
+    assert(ex.errorClass.get == "WRONG_NUM_ARGS.WITHOUT_SUGGESTION")
+    assert(ex.messageParameters("functionName") == "`json_tuple`")
+  }
+
+  test("posexplode") {
+    val actual1 = spark.tvf.posexplode(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM posexplode(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.posexplode(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM posexplode(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.posexplode(array())
+    val expected3 = spark.sql("SELECT * FROM posexplode(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.posexplode(map())
+    val expected4 = spark.sql("SELECT * FROM posexplode(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.posexplode(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM posexplode(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.posexplode(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM posexplode(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("posexplode_outer") {
+    val actual1 = spark.tvf.posexplode_outer(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM posexplode_outer(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.posexplode_outer(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM posexplode_outer(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.posexplode_outer(array())
+    val expected3 = spark.sql("SELECT * FROM posexplode_outer(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.posexplode_outer(map())
+    val expected4 = spark.sql("SELECT * FROM posexplode_outer(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.posexplode_outer(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM posexplode_outer(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.posexplode_outer(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM posexplode_outer(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("stack") {
+    val actual = spark.tvf.stack(lit(2), lit(1), lit(2), lit(3))
+    val expected = spark.sql("SELECT * FROM stack(2, 1, 2, 3)")
+    checkAnswer(actual, expected)
+  }
+
+  test("collations") {
+    val actual = spark.tvf.collations()
+    val expected = spark.sql("SELECT * FROM collations()")
+    checkAnswer(actual, expected)
+  }
+
+  test("sql_keywords") {
+    val actual = spark.tvf.sql_keywords()
+    val expected = spark.sql("SELECT * FROM sql_keywords()")
+    checkAnswer(actual, expected)
+  }
+
+  // TODO(SPARK-50063): Support VARIANT in Spark Connect Scala client
+  ignore("variant_explode") {
+    val actual1 = spark.tvf.variant_explode(parse_json(lit("""["hello", "world"]""")))
+    val expected1 =
+      spark.sql("""SELECT * FROM variant_explode(parse_json('["hello", "world"]'))""")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.variant_explode(parse_json(lit("""{"a": true, "b": 3.14}""")))
+    val expected2 =
+      spark.sql("""SELECT * FROM variant_explode(parse_json('{"a": true, "b": 3.14}'))""")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.variant_explode(parse_json(lit("[]")))
+    val expected3 = spark.sql("SELECT * FROM variant_explode(parse_json('[]'))")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.variant_explode(parse_json(lit("{}")))
+    val expected4 = spark.sql("SELECT * FROM variant_explode(parse_json('{}'))")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.variant_explode(lit(null).cast("variant"))
+    val expected5 = spark.sql("SELECT * FROM variant_explode(null :: variant)")
+    checkAnswer(actual5, expected5)
+
+    // not a variant object/array
+    val actual6 = spark.tvf.variant_explode(parse_json(lit("1")))
+    val expected6 = spark.sql("SELECT * FROM variant_explode(parse_json('1'))")
+    checkAnswer(actual6, expected6)
+  }
+
+  // TODO(SPARK-50063): Support VARIANT in Spark Connect Scala client
+  ignore("variant_explode_outer") {
+    val actual1 = spark.tvf.variant_explode_outer(parse_json(lit("""["hello", "world"]""")))
+    val expected1 =
+      spark.sql("""SELECT * FROM variant_explode_outer(parse_json('["hello", "world"]'))""")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.variant_explode_outer(parse_json(lit("""{"a": true, "b": 3.14}""")))
+    val expected2 =
+      spark.sql("""SELECT * FROM variant_explode_outer(parse_json('{"a": true, "b": 3.14}'))""")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.variant_explode_outer(parse_json(lit("[]")))
+    val expected3 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('[]'))")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.variant_explode_outer(parse_json(lit("{}")))
+    val expected4 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('{}'))")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.variant_explode_outer(lit(null).cast("variant"))
+    val expected5 = spark.sql("SELECT * FROM variant_explode_outer(null :: variant)")
+    checkAnswer(actual5, expected5)
+
+    // not a variant object/array
+    val actual6 = spark.tvf.variant_explode_outer(parse_json(lit("1")))
+    val expected6 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('1'))")
+    checkAnswer(actual6, expected6)
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
index 748843ec9910c..40b66bcb8358d 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/FunctionTestSuite.scala
@@ -22,7 +22,6 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.sql.avro.{functions => avroFn}
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.protobuf.{functions => pbFn}
 import org.apache.spark.sql.test.ConnectFunSuite
 import org.apache.spark.sql.types.{DataType, StructType}
 
@@ -196,20 +195,18 @@ class FunctionTestSuite extends ConnectFunSuite {
     lead("a", 2, null),
     lead(a, 2, null, false))
   testEquals(
-    "aggregate",
-    aggregate(a, lit(0), (l, r) => l + r),
-    aggregate(a, lit(0), (l, r) => l + r, id => id))
-  testEquals(
-    "from_json",
+    "from_json with sql schema",
     from_json(a, schema.asInstanceOf[DataType]),
     from_json(a, schema),
-    from_json(a, lit(schema.json)),
-    from_json(a, schema.json, Map.empty[String, String]),
-    from_json(a, schema.json, Collections.emptyMap[String, String]),
     from_json(a, schema.asInstanceOf[DataType], Map.empty[String, String]),
     from_json(a, schema.asInstanceOf[DataType], Collections.emptyMap[String, String]),
     from_json(a, schema, Map.empty[String, String]),
-    from_json(a, schema, Collections.emptyMap[String, String]),
+    from_json(a, schema, Collections.emptyMap[String, String]))
+  testEquals(
+    "from_json with json schema",
+    from_json(a, lit(schema.json)),
+    from_json(a, schema.json, Map.empty[String, String]),
+    from_json(a, schema.json, Collections.emptyMap[String, String]),
     from_json(a, lit(schema.json), Collections.emptyMap[String, String]))
   testEquals("schema_of_json", schema_of_json(lit("x,y")), schema_of_json("x,y"))
   testEquals(
@@ -229,13 +226,15 @@ class FunctionTestSuite extends ConnectFunSuite {
     schema_of_csv(lit("x,y"), Collections.emptyMap()))
   testEquals("to_csv", to_csv(a), to_csv(a, Collections.emptyMap[String, String]))
   testEquals(
-    "from_xml",
+    "from_xml with sql schema",
     from_xml(a, schema),
+    from_xml(a, schema, Map.empty[String, String].asJava),
+    from_xml(a, schema, Collections.emptyMap[String, String]))
+  testEquals(
+    "from_xml with json schema",
     from_xml(a, lit(schema.json)),
     from_xml(a, schema.json, Collections.emptyMap[String, String]),
     from_xml(a, schema.json, Map.empty[String, String].asJava),
-    from_xml(a, schema, Map.empty[String, String].asJava),
-    from_xml(a, schema, Collections.emptyMap[String, String]),
     from_xml(a, lit(schema.json), Collections.emptyMap[String, String]))
   testEquals(
     "schema_of_xml",
@@ -251,28 +250,16 @@ class FunctionTestSuite extends ConnectFunSuite {
       a,
       """{"type": "int", "name": "id"}""",
       Collections.emptyMap[String, String]))
-  testEquals(
-    "from_protobuf",
-    pbFn.from_protobuf(
-      a,
-      "FakeMessage",
-      "fakeBytes".getBytes(),
-      Map.empty[String, String].asJava),
-    pbFn.from_protobuf(a, "FakeMessage", "fakeBytes".getBytes()))
-  testEquals(
-    "to_protobuf",
-    pbFn.to_protobuf(a, "FakeMessage", "fakeBytes".getBytes(), Map.empty[String, String].asJava),
-    pbFn.to_protobuf(a, "FakeMessage", "fakeBytes".getBytes()))
 
   testEquals("call_udf", callUDF("bob", lit(1)), call_udf("bob", lit(1)))
 
   test("assert_true no message") {
-    val e = assert_true(a).expr
+    val e = toExpr(assert_true(a))
     assert(e.hasUnresolvedFunction)
     val fn = e.getUnresolvedFunction
     assert(fn.getFunctionName == "assert_true")
     assert(fn.getArgumentsCount == 1)
-    assert(fn.getArguments(0) == a.expr)
+    assert(fn.getArguments(0) == toExpr(a))
   }
 
   test("json_tuple zero args") {
@@ -280,7 +267,7 @@ class FunctionTestSuite extends ConnectFunSuite {
   }
 
   test("rand no seed") {
-    val e = rand().expr
+    val e = toExpr(rand())
     assert(e.hasUnresolvedFunction)
     val fn = e.getUnresolvedFunction
     assert(fn.getFunctionName == "rand")
@@ -288,7 +275,7 @@ class FunctionTestSuite extends ConnectFunSuite {
   }
 
   test("randn no seed") {
-    val e = randn().expr
+    val e = toExpr(randn())
     assert(e.hasUnresolvedFunction)
     val fn = e.getUnresolvedFunction
     assert(fn.getFunctionName == "randn")
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
index 07b03c4564bcc..1b973ac8ea344 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala
@@ -37,6 +37,7 @@ import org.apache.spark.sql.avro.{functions => avroFn}
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
 import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.connect.ConnectConversions._
 import org.apache.spark.sql.connect.client.SparkConnectClient
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions.lit
@@ -71,7 +72,7 @@ import org.apache.spark.util.SparkFileUtils
  * compatibility.
  *
  * Note that the plan protos are used as the input for the `ProtoToParsedPlanTestSuite` in the
- * `connect/server` module
+ * `sql/connect/server` module
  */
 // scalastyle:on
 class PlanGenerationTestSuite
@@ -88,7 +89,7 @@ class PlanGenerationTestSuite
 
   protected val queryFilePath: Path = commonResourcePath.resolve("query-tests/queries")
 
-  // A relative path to /connect/server, used by `ProtoToParsedPlanTestSuite` to run
+  // A relative path to /sql/connect/server, used by `ProtoToParsedPlanTestSuite` to run
   // with the datasource.
   protected val testDataPath: Path = java.nio.file.Paths.get(
     "../",
@@ -118,6 +119,7 @@ class PlanGenerationTestSuite
 
   override protected def beforeEach(): Unit = {
     session.resetPlanIdGenerator()
+    internal.UnresolvedNamedLambdaVariable.resetIdGenerator()
   }
 
   override protected def afterAll(): Unit = {
@@ -551,6 +553,14 @@ class PlanGenerationTestSuite
       valueColumnName = "value")
   }
 
+  test("transpose index_column") {
+    simple.transpose(indexColumn = fn.col("id"))
+  }
+
+  test("transpose no_index_column") {
+    simple.transpose()
+  }
+
   test("offset") {
     simple.offset(1000)
   }
@@ -1800,7 +1810,11 @@ class PlanGenerationTestSuite
     fn.sentences(fn.col("g"))
   }
 
-  functionTest("sentences with locale") {
+  functionTest("sentences with language") {
+    fn.sentences(fn.col("g"), lit("en"))
+  }
+
+  functionTest("sentences with language and country") {
     fn.sentences(fn.col("g"), lit("en"), lit("US"))
   }
 
@@ -1911,6 +1925,47 @@ class PlanGenerationTestSuite
     fn.make_interval()
   }
 
+  functionTest("try_make_interval years months weeks days hours mins secs") {
+    fn.try_make_interval(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"))
+  }
+
+  functionTest("try_make_interval years months weeks days hours mins") {
+    fn.try_make_interval(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"))
+  }
+
+  functionTest("try_make_interval years months weeks days hours") {
+    fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"))
+  }
+
+  functionTest("try_make_interval years months weeks days") {
+    fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"), fn.col("a"))
+  }
+
+  functionTest("try_make_interval years months weeks") {
+    fn.try_make_interval(fn.col("a"), fn.col("a"), fn.col("a"))
+  }
+
+  functionTest("try_make_interval years months") {
+    fn.try_make_interval(fn.col("a"), fn.col("a"))
+  }
+
+  functionTest("try_make_interval years") {
+    fn.try_make_interval(fn.col("a"))
+  }
+
   functionTest("make_timestamp with timezone") {
     fn.make_timestamp(
       fn.col("a"),
@@ -1963,6 +2018,58 @@ class PlanGenerationTestSuite
       fn.col("b"))
   }
 
+  functionTest("try_make_timestamp with timezone") {
+    fn.try_make_timestamp(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"),
+      fn.col("g"))
+  }
+
+  functionTest("try_make_timestamp without timezone") {
+    fn.try_make_timestamp(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"))
+  }
+
+  functionTest("try_make_timestamp_ltz with timezone") {
+    fn.try_make_timestamp_ltz(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"),
+      fn.col("g"))
+  }
+
+  functionTest("try_make_timestamp_ltz without timezone") {
+    fn.try_make_timestamp_ltz(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"))
+  }
+
+  functionTest("try_make_timestamp_ntz") {
+    fn.try_make_timestamp_ntz(
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("a"),
+      fn.col("b"))
+  }
+
   functionTest("make_ym_interval years months") {
     fn.make_ym_interval(fn.col("a"), fn.col("a"))
   }
@@ -2433,6 +2540,10 @@ class PlanGenerationTestSuite
     fn.aggregate(fn.col("e"), lit(0), (x, y) => x + y)
   }
 
+  functionTest("aggregate with finish lambda") {
+    fn.aggregate(fn.col("e"), lit(0), (x, y) => x + y, x => x + lit(2))
+  }
+
   functionTest("reduce") {
     fn.reduce(fn.col("e"), lit(0), (x, y) => x + y)
   }
@@ -2493,6 +2604,10 @@ class PlanGenerationTestSuite
     fn.from_json(fn.col("g"), simpleSchema)
   }
 
+  functionTest("from_json with json schema") {
+    fn.from_json(fn.col("g"), fn.lit(simpleSchema.json))
+  }
+
   functionTest("schema_of_json") {
     fn.schema_of_json(lit("""[{"col":01}]"""))
   }
@@ -2669,6 +2784,14 @@ class PlanGenerationTestSuite
     fn.parse_url(fn.col("g"), fn.col("g"), fn.col("g"))
   }
 
+  functionTest("try_parse_url") {
+    fn.try_parse_url(fn.col("g"), fn.col("g"))
+  }
+
+  functionTest("try_parse_url with key") {
+    fn.try_parse_url(fn.col("g"), fn.col("g"), fn.col("g"))
+  }
+
   functionTest("printf") {
     fn.printf(fn.col("g"), fn.col("a"), fn.col("g"))
   }
@@ -2968,6 +3091,14 @@ class PlanGenerationTestSuite
     fn.call_function("lower", fn.col("g"))
   }
 
+  functionTest("from_xml") {
+    fn.from_xml(fn.col("g"), simpleSchema)
+  }
+
+  functionTest("from_xml with json schema") {
+    fn.from_xml(fn.col("g"), fn.lit(simpleSchema.json))
+  }
+
   test("hll_sketch_agg with column lgConfigK") {
     binary.select(fn.hll_sketch_agg(fn.col("bytes"), lit(0)))
   }
@@ -3264,7 +3395,7 @@ class PlanGenerationTestSuite
             .setUnparsedIdentifier("id")))
       .setCustomField("abc")
       .build()
-    simple.select(Column(_.setExtension(com.google.protobuf.Any.pack(extension))))
+    simple.select(column(_.setExtension(com.google.protobuf.Any.pack(extension))))
   }
 
   test("crosstab") {
@@ -3325,10 +3456,10 @@ class PlanGenerationTestSuite
   /* Protobuf functions */
   // scalastyle:off line.size.limit
   // If `common.desc` needs to be updated, execute the following command to regenerate it:
-  //  1. cd connect/common/src/main/protobuf/spark/connect
+  //  1. cd sql/connect/common/src/main/protobuf/spark/connect
   //  2. protoc --include_imports --descriptor_set_out=../../../../test/resources/protobuf-tests/common.desc common.proto
   // scalastyle:on line.size.limit
-  private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/connect/" +
+  private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/sql/connect/" +
     "common/src/test/resources/protobuf-tests/common.desc"
 
   // TODO(SPARK-45030): Re-enable this test when all Maven test scenarios succeed and there
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
index 57342e12fcb51..b3b8020b1e4c7 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SQLImplicitsTestSuite.scala
@@ -26,6 +26,7 @@ import org.apache.arrow.memory.RootAllocator
 import org.apache.commons.lang3.SystemUtils
 import org.scalatest.BeforeAndAfterAll
 
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFor
 import org.apache.spark.sql.connect.client.SparkConnectClient
 import org.apache.spark.sql.connect.client.arrow.{ArrowDeserializers, ArrowSerializer}
 import org.apache.spark.sql.test.ConnectFunSuite
@@ -55,7 +56,7 @@ class SQLImplicitsTestSuite extends ConnectFunSuite with BeforeAndAfterAll {
     import org.apache.spark.util.ArrayImplicits._
     import spark.implicits._
     def testImplicit[T: Encoder](expected: T): Unit = {
-      val encoder = encoderFor[T]
+      val encoder = agnosticEncoderFor[T]
       val allocator = new RootAllocator()
       try {
         val batch = ArrowSerializer.serialize(
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
similarity index 52%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
rename to connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
index 66f591bf1fb99..ed930882ac2fd 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
@@ -16,26 +16,18 @@
  */
 package org.apache.spark.sql
 
+import org.apache.spark.sql.api.SparkSessionBuilder
+import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
+
 /**
- * A container for a [[Dataset]], used for implicit conversions in Scala.
- *
- * To use this, import implicit conversions in SQL:
- * {{{
- *   val spark: SparkSession = ...
- *   import spark.implicits._
- * }}}
- *
- * @since 3.4.0
+ * Make sure the api.SparkSessionBuilder binds to Connect implementation.
  */
-case class DatasetHolder[T] private[sql] (private val ds: Dataset[T]) {
-
-  // This is declared with parentheses to prevent the Scala compiler from treating
-  // `rdd.toDS("1")` as invoking this toDS and then apply on the returned Dataset.
-  def toDS(): Dataset[T] = ds
-
-  // This is declared with parentheses to prevent the Scala compiler from treating
-  // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
-  def toDF(): DataFrame = ds.toDF()
-
-  def toDF(colNames: String*): DataFrame = ds.toDF(colNames: _*)
+class SparkSessionBuilderImplementationBindingSuite
+    extends ConnectFunSuite
+    with api.SparkSessionBuilderImplementationBindingSuite
+    with RemoteSparkSession {
+  override protected def configure(builder: SparkSessionBuilder): builder.type = {
+    // We need to set this configuration because the port used by the server is random.
+    builder.remote(s"sc://localhost:$serverPort")
+  }
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionSuite.scala
index 8abc41639fdd2..dec56554d143e 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/SparkSessionSuite.scala
@@ -22,6 +22,7 @@ import scala.util.control.NonFatal
 
 import io.grpc.{CallOptions, Channel, ClientCall, ClientInterceptor, MethodDescriptor}
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.test.ConnectFunSuite
 import org.apache.spark.util.SparkSerDeUtils
 
@@ -113,7 +114,7 @@ class SparkSessionSuite extends ConnectFunSuite {
     SparkSession.clearActiveSession()
     assert(SparkSession.getDefaultSession.isEmpty)
     assert(SparkSession.getActiveSession.isEmpty)
-    intercept[IllegalStateException](SparkSession.active)
+    intercept[SparkException](SparkSession.active)
 
     // Create a session
     val session1 = SparkSession.builder().remote(connectionString1).getOrCreate()
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
index 1d8d164c9541c..c1e44b6fb11b2 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UDFClassLoadingE2ESuite.scala
@@ -21,8 +21,10 @@ import java.nio.file.{Files, Paths}
 
 import scala.util.Properties
 
+import com.google.protobuf.ByteString
+
+import org.apache.spark.connect.proto
 import org.apache.spark.sql.connect.common.ProtoDataTypes
-import org.apache.spark.sql.expressions.ScalaUserDefinedFunction
 import org.apache.spark.sql.test.{ConnectFunSuite, RemoteSparkSession}
 
 class UDFClassLoadingE2ESuite extends ConnectFunSuite with RemoteSparkSession {
@@ -39,15 +41,17 @@ class UDFClassLoadingE2ESuite extends ConnectFunSuite with RemoteSparkSession {
     new File(s"src/test/resources/udf$scalaVersion.jar").toURI.toURL
 
   private def registerUdf(session: SparkSession): Unit = {
-    val udf = ScalaUserDefinedFunction(
-      serializedUdfPacket = udfByteArray,
-      inputTypes = Seq(ProtoDataTypes.IntegerType),
-      outputType = ProtoDataTypes.IntegerType,
-      name = Some("dummyUdf"),
-      nullable = true,
-      deterministic = true,
-      aggregate = false)
-    session.registerUdf(udf.toProto)
+    val builder = proto.CommonInlineUserDefinedFunction
+      .newBuilder()
+      .setDeterministic(true)
+      .setFunctionName("dummyUdf")
+    builder.getScalarScalaUdfBuilder
+      .setPayload(ByteString.copyFrom(udfByteArray))
+      .addInputTypes(ProtoDataTypes.IntegerType)
+      .setOutputType(ProtoDataTypes.IntegerType)
+      .setNullable(true)
+      .setAggregate(false)
+    session.registerUdf(builder.build())
   }
 
   test("update class loader after stubbing: new session") {
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala
new file mode 100644
index 0000000000000..6a26cf581751d
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UnsupportedFeaturesSuite.scala
@@ -0,0 +1,119 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.test.ConnectFunSuite
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Test suite that test the errors thrown when using unsupported features.
+ */
+class UnsupportedFeaturesSuite extends ConnectFunSuite {
+  private def session = SparkSession.builder().getOrCreate()
+
+  private def testUnsupportedFeature(name: String, errorCode: String)(
+      f: SparkSession => Any): Unit = {
+    test(name) {
+      val e = intercept[SparkUnsupportedOperationException](f(session))
+      assert(e.getCondition == "UNSUPPORTED_CONNECT_FEATURE." + errorCode)
+    }
+  }
+
+  testUnsupportedFeature("SparkSession.createDataFrame(RDD)", "RDD") { session =>
+    session.createDataFrame(new RDD[(Int, Int)])
+  }
+
+  testUnsupportedFeature("SparkSession.createDataFrame(RDD, StructType)", "RDD") { session =>
+    val schema = new StructType().add("_1", "int").add("_2", "int")
+    session.createDataFrame(new RDD[Row], schema)
+  }
+
+  testUnsupportedFeature("SparkSession.createDataFrame(JavaRDD, StructType)", "RDD") { session =>
+    val schema = new StructType().add("_1", "int").add("_2", "int")
+    session.createDataFrame(new JavaRDD[Row], schema)
+  }
+
+  testUnsupportedFeature("SparkSession.createDataFrame(RDD, Class)", "RDD") { session =>
+    session.createDataFrame(new RDD[Int], classOf[Int])
+  }
+
+  testUnsupportedFeature("SparkSession.createDataFrame(JavaRDD, Class)", "RDD") { session =>
+    session.createDataFrame(new JavaRDD[Int], classOf[Int])
+  }
+
+  testUnsupportedFeature("SparkSession.createDataset(RDD)", "RDD") { session =>
+    session.createDataset(new RDD[Int])(Encoders.scalaInt)
+  }
+
+  testUnsupportedFeature("SparkSession.experimental", "SESSION_EXPERIMENTAL_METHODS") {
+    _.experimental
+  }
+
+  testUnsupportedFeature("SparkSession.sessionState", "SESSION_SESSION_STATE") {
+    _.sessionState
+  }
+
+  testUnsupportedFeature("SparkSession.sharedState", "SESSION_SHARED_STATE") {
+    _.sharedState
+  }
+
+  testUnsupportedFeature("SparkSession.listenerManager", "SESSION_LISTENER_MANAGER") {
+    _.listenerManager
+  }
+
+  testUnsupportedFeature("SparkSession.sqlContext", "SESSION_SQL_CONTEXT") {
+    _.sqlContext
+  }
+
+  testUnsupportedFeature(
+    "SparkSession.baseRelationToDataFrame",
+    "SESSION_BASE_RELATION_TO_DATAFRAME") {
+    _.baseRelationToDataFrame(new BaseRelation)
+  }
+
+  testUnsupportedFeature("SparkSession.executeCommand", "SESSION_EXECUTE_COMMAND") {
+    _.executeCommand("ds", "exec", Map.empty)
+  }
+
+  testUnsupportedFeature("Dataset.queryExecution", "DATASET_QUERY_EXECUTION") {
+    _.range(1).queryExecution
+  }
+
+  testUnsupportedFeature("Dataset.rdd", "RDD") {
+    _.range(1).rdd
+  }
+
+  testUnsupportedFeature("Dataset.javaRDD", "RDD") {
+    _.range(1).javaRDD
+  }
+
+  testUnsupportedFeature("Dataset.toJavaRDD", "RDD") {
+    _.range(1).toJavaRDD
+  }
+
+  testUnsupportedFeature("DataFrameReader.json(RDD)", "RDD") {
+    _.read.json(new RDD[String])
+  }
+
+  testUnsupportedFeature("DataFrameReader.json(JavaRDD)", "RDD") {
+    _.read.json(new JavaRDD[String])
+  }
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
index 4aec0e6348c09..ca754c7b542f7 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionE2ETestSuite.scala
@@ -315,13 +315,20 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession
       "c")
   }
 
-  test("(deprecated) scala UDF with dataType") {
+  // TODO re-enable this after we hooked SqlApiConf into the session confs.
+  ignore("(deprecated) scala UDF with dataType") {
     val session: SparkSession = spark
     import session.implicits._
     val fn = udf(((i: Long) => (i + 1).toInt), IntegerType)
     checkDataset(session.range(2).select(fn($"id")).as[Int], 1, 2)
   }
 
+  test("(deprecated) scala UDF with dataType should fail") {
+    intercept[AnalysisException] {
+      udf(((i: Long) => (i + 1).toInt), IntegerType)
+    }
+  }
+
   test("java UDF") {
     val session: SparkSession = spark
     import session.implicits._
@@ -380,23 +387,23 @@ class UserDefinedFunctionE2ETestSuite extends QueryTest with RemoteSparkSession
   }
 
   test("UDAF custom Aggregator - toColumn") {
-    val session: SparkSession = spark
-    import session.implicits._
+    val encoder = Encoders.product[UdafTestInput]
     val aggCol = new CompleteUdafTestInputAggregator().toColumn
-    val ds = spark.range(10).withColumn("extra", col("id") * 2).as[UdafTestInput]
-
+    val ds = spark.range(10).withColumn("extra", col("id") * 2).as(encoder)
     assert(ds.select(aggCol).head() == 135) // 45 + 90
-    assert(ds.agg(aggCol).head().getLong(0) == 135) // 45 + 90
   }
 
   test("UDAF custom Aggregator - multiple extends - toColumn") {
-    val session: SparkSession = spark
-    import session.implicits._
+    val encoder = Encoders.product[UdafTestInput]
     val aggCol = new CompleteGrandChildUdafTestInputAggregator().toColumn
-    val ds = spark.range(10).withColumn("extra", col("id") * 2).as[UdafTestInput]
-
+    val ds = spark.range(10).withColumn("extra", col("id") * 2).as(encoder)
     assert(ds.select(aggCol).head() == 540) // (45 + 90) * 4
-    assert(ds.agg(aggCol).head().getLong(0) == 540) // (45 + 90) * 4
+  }
+
+  test("UDAF custom aggregator - with rows - toColumn") {
+    val ds = spark.range(10).withColumn("extra", col("id") * 2)
+    assert(ds.select(RowAggregator.toColumn).head() == 405)
+    assert(ds.agg(RowAggregator.toColumn).head().getLong(0) == 405)
   }
 }
 
@@ -441,3 +448,17 @@ final class CompleteGrandChildUdafTestInputAggregator
     (b._1 + a.id, b._2 + a.extra)
   override def finish(reduction: (Long, Long)): Long = (reduction._1 + reduction._2) * 4
 }
+
+object RowAggregator extends Aggregator[Row, (Long, Long), Long] {
+  override def zero: (Long, Long) = (0, 0)
+  override def reduce(b: (Long, Long), a: Row): (Long, Long) = {
+    (b._1 + a.getLong(0), b._2 + a.getLong(1))
+  }
+  override def merge(b1: (Long, Long), b2: (Long, Long)): (Long, Long) = {
+    (b1._1 + b2._1, b1._2 + b2._2)
+  }
+  override def finish(r: (Long, Long)): Long = (r._1 + r._2) * 3
+  override def bufferEncoder: Encoder[(Long, Long)] =
+    Encoders.tuple(Encoders.scalaLong, Encoders.scalaLong)
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala
index 923aa5af75ba8..403d019f01e2a 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/UserDefinedFunctionSuite.scala
@@ -21,7 +21,7 @@ import scala.reflect.runtime.universe.typeTag
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.connect.common.UdfPacket
-import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.functions.{lit, udf}
 import org.apache.spark.sql.test.ConnectFunSuite
 import org.apache.spark.util.SparkSerDeUtils
 
@@ -33,13 +33,13 @@ class UserDefinedFunctionSuite extends ConnectFunSuite {
     val myUdf = udf(func _)
     val colWithUdf = myUdf(Column("dummy"))
 
-    val udfExpr = colWithUdf.expr.getCommonInlineUserDefinedFunction
+    val udfExpr = toExpr(colWithUdf).getCommonInlineUserDefinedFunction
     assert(udfExpr.getDeterministic)
     assert(udfExpr.getArgumentsCount == 1)
-    assert(udfExpr.getArguments(0) == Column("dummy").expr)
+    assert(udfExpr.getArguments(0) == toExpr(Column("dummy")))
     val udfObj = udfExpr.getScalarScalaUdf
 
-    assert(udfObj.getNullable)
+    assert(!udfObj.getNullable)
 
     val deSer = SparkSerDeUtils.deserialize[UdfPacket](udfObj.getPayload.toByteArray)
 
@@ -49,7 +49,7 @@ class UserDefinedFunctionSuite extends ConnectFunSuite {
   }
 
   private def testNonDeserializable(f: Int => Int): Unit = {
-    val e = intercept[SparkException](udf(f))
+    val e = intercept[SparkException](toExpr(udf(f).apply(lit(1))))
     assert(
       e.getMessage.contains(
         "UDF cannot be executed on a Spark cluster: it cannot be deserialized."))
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
index d7977fbeb108f..bb7d1b25738c1 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/application/ReplE2ESuite.scala
@@ -64,6 +64,7 @@ class ReplE2ESuite extends ConnectFunSuite with RemoteSparkSession with BeforeAn
     val args = Array("--port", serverPort.toString)
     val task = new Runnable {
       override def run(): Unit = {
+        System.setProperty("spark.sql.abc", "abc")
         ConnectRepl.doMain(
           args = args,
           semaphore = Some(semaphore),
@@ -555,4 +556,13 @@ class ReplE2ESuite extends ConnectFunSuite with RemoteSparkSession with BeforeAn
     val output = runCommandsInShell(input)
     assertContains(": Long = 1045", output)
   }
+
+  test("Simple configuration set in startup") {
+    val input =
+      """
+        |spark.conf.get("spark.sql.abc")
+      """.stripMargin
+    val output = runCommandsInShell(input)
+    assertContains("abc", output)
+  }
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala
index bbc396a937c3e..66a2c943af5f6 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/ArtifactSuite.scala
@@ -30,6 +30,7 @@ import org.apache.commons.codec.digest.DigestUtils.sha256Hex
 import org.scalatest.BeforeAndAfterEach
 
 import org.apache.spark.connect.proto.AddArtifactsRequest
+import org.apache.spark.sql.Artifact
 import org.apache.spark.sql.connect.client.SparkConnectClient.Configuration
 import org.apache.spark.sql.test.ConnectFunSuite
 import org.apache.spark.util.IvyTestUtils
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
index a6be271b5c7d1..d9ff8d9122ead 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/CheckConnectJvmClientCompatibility.scala
@@ -21,6 +21,8 @@ import java.nio.charset.StandardCharsets
 import java.nio.file.{Files, Paths}
 import java.util.regex.Pattern
 
+import scala.collection.mutable.{Set => MutableSet}
+
 import com.typesafe.tools.mima.core._
 import com.typesafe.tools.mima.lib.MiMaLib
 
@@ -143,21 +145,6 @@ object CheckConnectJvmClientCompatibility {
     checkMiMaCompatibility(clientJar, protobufJar, includedRules, excludeRules)
   }
 
-  private lazy val mergeIntoWriterExcludeRules: Seq[ProblemFilter] = {
-    // Exclude some auto-generated methods in [[MergeIntoWriter]] classes.
-    // The incompatible changes are due to the uses of [[proto.Expression]] instead
-    // of [[catalyst.Expression]] in the method signature.
-    val classNames = Seq("WhenMatched", "WhenNotMatched", "WhenNotMatchedBySource")
-    val methodNames = Seq("apply", "condition", "copy", "copy$*", "unapply")
-
-    classNames.flatMap { className =>
-      methodNames.map { methodName =>
-        ProblemFilters.exclude[IncompatibleSignatureProblem](
-          s"org.apache.spark.sql.$className.$methodName")
-      }
-    }
-  }
-
   private def checkMiMaCompatibilityWithSqlModule(
       clientJar: File,
       sqlJar: File): List[Problem] = {
@@ -166,20 +153,23 @@ object CheckConnectJvmClientCompatibility {
       // Filter unsupported rules:
       // Note when muting errors for a method, checks on all overloading methods are also muted.
 
+      // Skip any avro files
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.avro.*"),
+
       // Skip unsupported packages
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.api.*"), // Java, Python, R
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.columnar.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.connector.*"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.classic.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.execution.*"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.expressions.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.internal.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.jdbc.*"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.scripting.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.sources.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.streaming.ui.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.test.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.util.*"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.scripting.*"),
 
       // Skip private[sql] constructors
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.*.this"),
@@ -194,12 +184,8 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.ExtendedExplainGenerator"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UDTFRegistration"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.UDFRegistration$"),
       ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataSourceRegistration"),
 
-      // DataFrame Reader & Writer
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameReader.json"), // rdd
-
       // DataFrameNaFunctions
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.DataFrameNaFunctions.fillValue"),
 
@@ -207,65 +193,23 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.Dataset$" // private[sql]
       ),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.ofRows"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.DATASET_ID_TAG"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.COL_POS_KEY"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.DATASET_ID_KEY"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.curId"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ObservationListener"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ObservationListener$"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.queryExecution"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.sqlContext"),
+      // TODO (SPARK-49096):
+      // Mima check might complain the following Dataset rules does not filter any problem.
+      // This is due to a potential bug in Mima that all methods in `class Dataset` are not being
+      // checked for problems due to the presence of a private[sql] companion object.
+      // Further investigation is needed.
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.selectUntyped"), // protected
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.rdd"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.toJavaRDD"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.Dataset.javaRDD"),
-
-      // functions
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.functions.unwrap_udt"),
-
-      // KeyValueGroupedDataset
-      ProblemFilters.exclude[Problem](
-        "org.apache.spark.sql.KeyValueGroupedDataset.queryExecution"),
 
       // RelationalGroupedDataset
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.RelationalGroupedDataset$*" // private[sql]
       ),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.RelationalGroupedDataset.apply"),
 
       // SparkSession
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sparkContext"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sharedState"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sessionState"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.sqlContext"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.listenerManager"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.experimental"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.udtf"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.dataSource"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.createDataFrame"),
-      ProblemFilters.exclude[Problem](
-        "org.apache.spark.sql.SparkSession.baseRelationToDataFrame"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.createDataset"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.executeCommand"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SparkSession.this"),
-
-      // SparkSession#implicits
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#implicits._sqlContext"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#implicits.session"),
-
-      // SparkSession#Builder
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#Builder.config"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#Builder.withExtensions"),
-
-      // RuntimeConfig
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.RuntimeConfig$"),
+        "org.apache.spark.sql.SparkSession.baseRelationToDataFrame"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.RuntimeConfig.sqlConf"),
+        "org.apache.spark.sql.SparkSession.canUseSession"),
 
       // DataStreamWriter
       ProblemFilters.exclude[MissingClassProblem](
@@ -279,44 +223,52 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.streaming.TestGroupState"),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.streaming.TestGroupState$"),
+
+      // Artifact Manager, client has a totally different implementation.
+      ProblemFilters.exclude[MissingClassProblem](
+        "org.apache.spark.sql.artifact.ArtifactManager"),
       ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.streaming.PythonStreamingQueryListener"),
+        "org.apache.spark.sql.artifact.ArtifactManager$"),
       ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.streaming.PythonStreamingQueryListenerWrapper"),
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener$Event"),
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener$QueryIdleEvent"),
+        "org.apache.spark.sql.artifact.ArtifactManager$SparkContextResourceType$"),
+
+      // ColumnNode conversions
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.SparkSession.Converter"),
+      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SparkSession$Converter$"),
+
+      // UDFRegistration
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.UDFRegistration.register"),
+      ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.UDFRegistration"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.UDFRegistration.log*"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener#QueryIdleEvent.logEvent"),
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent"),
+        "org.apache.spark.sql.UDFRegistration.LogStringContext"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener#QueryProgressEvent.logEvent"),
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent"),
+        "org.apache.spark.sql.UDFRegistration.withLogContext"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener#QueryStartedEvent.logEvent"),
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent"),
+        "org.apache.spark.sql.UDFRegistration.isTraceEnabled"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListener#QueryTerminatedEvent.logEvent"),
+        "org.apache.spark.sql.UDFRegistration.initializeForcefully"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.UDFRegistration.initializeLogIfNecessary"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.UDFRegistration.initializeLogIfNecessary$default$2"),
 
-      // SQLImplicits
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.rddToDatasetHolder"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits._sqlContext"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.session"),
+      // Protected DataFrameReader methods...
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.DataFrameReader.validateSingleVariantColumn"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.DataFrameReader.validateJsonSchema"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.DataFrameReader.validateXmlSchema"),
 
-      // Artifact Manager
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.artifact.ArtifactManager"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.artifact.ArtifactManager$"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.artifact.util.ArtifactUtils"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.artifact.util.ArtifactUtils$")) ++
-      mergeIntoWriterExcludeRules
+      // Protected DataStreamReader methods...
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.streaming.DataStreamReader.validateJsonSchema"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.streaming.DataStreamReader.validateXmlSchema"))
 
     checkMiMaCompatibility(clientJar, sqlJar, includedRules, excludeRules)
   }
@@ -330,9 +282,9 @@ object CheckConnectJvmClientCompatibility {
     val includedRules = Seq(IncludeByName("org.apache.spark.sql.*"))
     val excludeRules = Seq(
       // Skipped packages
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.avro.*"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.application.*"),
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.connect.*"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.protobuf.*"),
+      ProblemFilters.exclude[Problem]("org.apache.spark.sql.internal.*"),
 
       // private[sql]
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.*.this"),
@@ -341,45 +293,14 @@ object CheckConnectJvmClientCompatibility {
         "org.apache.spark.sql.KeyValueGroupedDatasetImpl"),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.KeyValueGroupedDatasetImpl$"),
-      ProblemFilters.exclude[ReversedMissingMethodProblem](
-        "org.apache.spark.sql.SQLImplicits._sqlContext" // protected
-      ),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.internal.SessionCleaner"),
-
-      // private
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.internal.CleanupTask"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.internal.CleanupTaskWeakReference"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.internal.CleanupCachedRemoteRelation"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.internal.CleanupCachedRemoteRelation$"),
 
-      // Catalyst Refactoring
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.util.SparkCollectionUtils"),
-      ProblemFilters.exclude[Problem]("org.apache.spark.sql.catalyst.util.SparkCollectionUtils$"),
+      // ColumnNode conversions
+      ProblemFilters.exclude[IncompatibleResultTypeProblem](
+        "org.apache.spark.sql.SparkSession#RichColumn.expr"),
+      ProblemFilters.exclude[DirectMissingMethodProblem](
+        "org.apache.spark.sql.SparkSession#RichColumn.typedExpr"),
 
       // New public APIs added in the client
-      // ScalaUserDefinedFunction
-      ProblemFilters
-        .exclude[MissingClassProblem](
-          "org.apache.spark.sql.expressions.ScalaUserDefinedFunction"),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.expressions.ScalaUserDefinedFunction$"),
-
-      // New private API added in the client
-      ProblemFilters
-        .exclude[MissingClassProblem](
-          "org.apache.spark.sql.expressions.SparkConnectClosureCleaner"),
-      ProblemFilters
-        .exclude[MissingClassProblem](
-          "org.apache.spark.sql.expressions.SparkConnectClosureCleaner$"),
-
-      // Column
-      // developer API
-      ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.Column.apply"),
-      ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.Column.expr"),
-
       // Dataset
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.Dataset.plan"
@@ -387,27 +308,6 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.Dataset.collectResult"),
 
-      // RuntimeConfig
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.RuntimeConfig" // Client version extends Logging
-      ),
-      ProblemFilters.exclude[Problem](
-        "org.apache.spark.sql.RuntimeConfig.*" // Mute missing Logging methods
-      ),
-      // ConnectRepl
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.application.ConnectRepl" // developer API
-      ),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.application.ConnectRepl$" // developer API
-      ),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.application.ExtendedCodeClassWrapper" // developer API
-      ),
-      ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.application.ExtendedCodeClassWrapper$" // developer API
-      ),
-
       // SparkSession
       // developer API
       ProblemFilters.exclude[DirectMissingMethodProblem](
@@ -417,37 +317,16 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession.execute"),
       // Experimental
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.addArtifact"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.addArtifacts"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession.registerClassFinder"),
-      // public
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.interruptAll"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.interruptTag"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.interruptOperation"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.addTag"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.removeTag"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.getTags"),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession.clearTags"),
+      ProblemFilters.exclude[IncompatibleSignatureProblem](
+        "org.apache.spark.sql.SparkSession.baseRelationToDataFrame"),
       // SparkSession#Builder
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#Builder.remote"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession#Builder.client"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession#Builder.build" // deprecated
       ),
-      ProblemFilters.exclude[DirectMissingMethodProblem](
-        "org.apache.spark.sql.SparkSession#Builder.create"),
       ProblemFilters.exclude[DirectMissingMethodProblem](
         "org.apache.spark.sql.SparkSession#Builder.interceptor"),
 
@@ -455,24 +334,13 @@ object CheckConnectJvmClientCompatibility {
       ProblemFilters.exclude[Problem]("org.apache.spark.sql.SQLImplicits.session"),
 
       // Steaming API
-      ProblemFilters.exclude[MissingTypesProblem](
-        "org.apache.spark.sql.streaming.DataStreamWriter" // Client version extends Logging
-      ),
-      ProblemFilters.exclude[Problem](
-        "org.apache.spark.sql.streaming.DataStreamWriter.*" // Mute missing Logging methods
-      ),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.streaming.RemoteStreamingQuery"),
       ProblemFilters.exclude[MissingClassProblem](
         "org.apache.spark.sql.streaming.RemoteStreamingQuery$"),
       // Skip client side listener specific class
       ProblemFilters.exclude[MissingClassProblem](
-        "org.apache.spark.sql.streaming.StreamingQueryListenerBus"),
-
-      // Encoders are in the wrong JAR
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Encoders"),
-      ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Encoders$")) ++
-      mergeIntoWriterExcludeRules
+        "org.apache.spark.sql.streaming.StreamingQueryListenerBus"))
 
     checkMiMaCompatibility(sqlJar, clientJar, includedRules, excludeRules)
   }
@@ -490,13 +358,24 @@ object CheckConnectJvmClientCompatibility {
       excludeRules: Seq[ProblemFilter]): List[Problem] = {
     val mima = new MiMaLib(Seq(newJar, oldJar))
     val allProblems = mima.collectProblems(oldJar, newJar, List.empty)
+
+    val effectiveExcludeRules = MutableSet.empty[ProblemFilter]
     val problems = allProblems
       .filter { p =>
         includedRules.exists(rule => rule(p))
       }
       .filter { p =>
-        excludeRules.forall(rule => rule(p))
+        excludeRules.forall { rule =>
+          val passedRule = rule(p)
+          if (!passedRule) {
+            effectiveExcludeRules += rule
+          }
+          passedRule
+        }
       }
+    excludeRules.filterNot(effectiveExcludeRules.contains).foreach { rule =>
+      println(s"Warning: $rule did not filter out any problems.")
+    }
     problems
   }
 
@@ -511,11 +390,14 @@ object CheckConnectJvmClientCompatibility {
       resultWriter.write(
         s"ERROR: Comparing Client jar: $clientModule and $targetName jar: $targetModule \n")
       resultWriter.write(s"problems with $targetName module: \n")
-      resultWriter.write(s"${problems.map(p => p.description(description)).mkString("\n")}")
-      resultWriter.write("\n")
-      resultWriter.write(
-        "Exceptions to binary compatibility can be added in " +
-          s"'CheckConnectJvmClientCompatibility#checkMiMaCompatibilityWith${targetName}Module'\n")
+      val problemDescriptions =
+        problems.map(p => s"${p.getClass.getSimpleName}: ${p.description(description)}")
+      resultWriter.write(problemDescriptions.mkString("\n"))
+      resultWriter.write("\n\n")
+      resultWriter.write("Exceptions to binary compatibility can be added in " +
+        s"'CheckConnectJvmClientCompatibility#checkMiMaCompatibilityWith${targetName}Module':\n")
+      resultWriter.write(problems.flatMap(_.howToFilter).distinct.mkString(",\n"))
+      resultWriter.write("\n\n")
     }
   }
 
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
index 46aeaeff43d2f..ac56600392aa3 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/SparkConnectClientSuite.scala
@@ -224,7 +224,7 @@ class SparkConnectClientSuite extends ConnectFunSuite with BeforeAndAfterEach {
         val error = constructor(testParams).asInstanceOf[Throwable with SparkThrowable]
         assert(error.getMessage.contains(testParams.message))
         assert(error.getCause == null)
-        assert(error.getErrorClass == testParams.errorClass.get)
+        assert(error.getCondition == testParams.errorClass.get)
         assert(error.getMessageParameters.asScala == testParams.messageParameters)
         assert(error.getQueryContext.isEmpty)
       }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
index 709e2cf0e84ea..10e4c11c406fe 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderSuite.scala
@@ -30,11 +30,11 @@ import org.apache.arrow.memory.{BufferAllocator, RootAllocator}
 import org.apache.arrow.vector.VarBinaryVector
 import org.scalatest.BeforeAndAfterAll
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.{SparkRuntimeException, SparkUnsupportedOperationException}
+import org.apache.spark.sql.{AnalysisException, Encoders, Row}
 import org.apache.spark.sql.catalyst.{DefinedByConstructorParams, JavaTypeInference, ScalaReflection}
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, OuterScopes}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, NullEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, RowEncoder, ScalaDecimalEncoder, StringEncoder, TimestampEncoder, UDTEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, Codec, OuterScopes}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, BinaryEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLongEncoder, BoxedShortEncoder, CalendarIntervalEncoder, DateEncoder, DayTimeIntervalEncoder, EncoderField, InstantEncoder, IterableEncoder, JavaDecimalEncoder, LocalDateEncoder, LocalDateTimeEncoder, NullEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, RowEncoder, ScalaDecimalEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.catalyst.encoders.RowEncoder.{encoderFor => toRowEncoder}
 import org.apache.spark.sql.catalyst.util.{DateFormatter, SparkStringUtils, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.MICROS_PER_SECOND
@@ -44,7 +44,7 @@ import org.apache.spark.sql.catalyst.util.SparkIntervalUtils._
 import org.apache.spark.sql.connect.client.CloseableIterator
 import org.apache.spark.sql.connect.client.arrow.FooEnum.FooEnum
 import org.apache.spark.sql.test.ConnectFunSuite
-import org.apache.spark.sql.types.{ArrayType, DataType, DayTimeIntervalType, Decimal, DecimalType, IntegerType, Metadata, SQLUserDefinedType, StructType, UserDefinedType, YearMonthIntervalType}
+import org.apache.spark.sql.types.{ArrayType, DataType, DayTimeIntervalType, Decimal, DecimalType, IntegerType, Metadata, SQLUserDefinedType, StringType, StructType, UserDefinedType, YearMonthIntervalType}
 
 /**
  * Tests for encoding external data to and from arrow.
@@ -769,6 +769,34 @@ class ArrowEncoderSuite extends ConnectFunSuite with BeforeAndAfterAll {
     }
   }
 
+  test("java serialization") {
+    val encoder = agnosticEncoderFor(Encoders.javaSerialization[(Int, String)])
+    roundTripAndCheckIdentical(encoder) { () =>
+      Iterator.tabulate(10)(i => (i, "itr_" + i))
+    }
+  }
+
+  test("kryo serialization") {
+    val e = intercept[SparkRuntimeException] {
+      val encoder = agnosticEncoderFor(Encoders.kryo[(Int, String)])
+      roundTripAndCheckIdentical(encoder) { () =>
+        Iterator.tabulate(10)(i => (i, "itr_" + i))
+      }
+    }
+    assert(e.getCondition == "CANNOT_USE_KRYO")
+  }
+
+  test("transforming encoder") {
+    val schema = new StructType()
+      .add("key", IntegerType)
+      .add("value", StringType)
+    val encoder =
+      TransformingEncoder(classTag[(Int, String)], toRowEncoder(schema), () => new TestCodec)
+    roundTripAndCheckIdentical(encoder) { () =>
+      Iterator.tabulate(10)(i => (i, "v" + i))
+    }
+  }
+
   /* ******************************************************************** *
    * Arrow deserialization upcasting
    * ******************************************************************** */
@@ -1136,3 +1164,8 @@ class UDTNotSupported extends UserDefinedType[UDTNotSupportedClass] {
     case i: Int => UDTNotSupportedClass(i)
   }
 }
+
+class TestCodec extends Codec[(Int, String), Row] {
+  override def encode(in: (Int, String)): Row = Row(in._1, in._2)
+  override def decode(out: Row): (Int, String) = (out.getInt(0), out.getString(1))
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala
new file mode 100644
index 0000000000000..2efd396735191
--- /dev/null
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToProtoConverterSuite.scala
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import org.apache.spark.SparkException
+import org.apache.spark.connect.proto
+import org.apache.spark.connect.proto.Expression.Window.WindowFrame.FrameBoundary
+import org.apache.spark.sql.{Column, Encoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{PrimitiveIntEncoder, PrimitiveLongEncoder}
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ProtoDataTypes}
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator}
+import org.apache.spark.sql.test.ConnectFunSuite
+import org.apache.spark.sql.types.{BinaryType, DataType, DoubleType, LongType, MetadataBuilder, ShortType, StringType, StructType}
+
+/**
+ * Test suite for [[ColumnNode]] to [[proto.Expression]] conversions.
+ */
+class ColumnNodeToProtoConverterSuite extends ConnectFunSuite {
+  private def testConversion(
+      node: => ColumnNode,
+      expected: proto.Expression): proto.Expression = {
+    val expression = ColumnNodeToProtoConverter(node)
+    assert(expression == expected)
+    expression
+  }
+
+  private def expr(f: proto.Expression.Builder => Unit): proto.Expression = {
+    val builder = proto.Expression.newBuilder()
+    f(builder)
+    builder.build()
+  }
+
+  private def attribute(name: String): proto.Expression =
+    expr(_.getUnresolvedAttributeBuilder.setUnparsedIdentifier(name))
+
+  private def structField(
+      name: String,
+      dataType: proto.DataType,
+      nullable: Boolean = true): proto.DataType.StructField = {
+    proto.DataType.StructField
+      .newBuilder()
+      .setName(name)
+      .setDataType(dataType)
+      .setNullable(nullable)
+      .build()
+  }
+
+  test("literal") {
+    testConversion(Literal(1), expr(_.getLiteralBuilder.setInteger(1).build()))
+    testConversion(
+      Literal("foo", Option(StringType)),
+      expr(_.getLiteralBuilder.setString("foo").build()))
+    val dataType = new StructType()
+      .add("_1", DoubleType)
+      .add("_2", StringType)
+      .add("_3", DoubleType)
+      .add("_4", StringType)
+    val stringTypeWithCollation = proto.DataType
+      .newBuilder()
+      .setString(proto.DataType.String.newBuilder().setCollation("UTF8_BINARY"))
+      .build()
+    testConversion(
+      Literal((12.0, "north", 60.0, "west"), Option(dataType)),
+      expr { b =>
+        val builder = b.getLiteralBuilder.getStructBuilder
+        builder.getStructTypeBuilder.getStructBuilder
+          .addFields(structField("_1", ProtoDataTypes.DoubleType))
+          .addFields(structField("_2", stringTypeWithCollation))
+          .addFields(structField("_3", ProtoDataTypes.DoubleType))
+          .addFields(structField("_4", stringTypeWithCollation))
+        builder.addElements(proto.Expression.Literal.newBuilder().setDouble(12.0))
+        builder.addElements(proto.Expression.Literal.newBuilder().setString("north"))
+        builder.addElements(proto.Expression.Literal.newBuilder().setDouble(60.0))
+        builder.addElements(proto.Expression.Literal.newBuilder().setString("west"))
+      })
+  }
+
+  test("attribute") {
+    testConversion(UnresolvedAttribute("x"), attribute("x"))
+    testConversion(
+      UnresolvedAttribute("y", Option(44L), isMetadataColumn = true),
+      expr(
+        _.getUnresolvedAttributeBuilder
+          .setUnparsedIdentifier("y")
+          .setPlanId(44L)
+          .setIsMetadataColumn(true)))
+  }
+
+  test("star") {
+    testConversion(UnresolvedStar(None), expr(_.getUnresolvedStarBuilder))
+    testConversion(
+      UnresolvedStar(Option("x.y.z.*")),
+      expr(_.getUnresolvedStarBuilder.setUnparsedTarget("x.y.z.*")))
+    testConversion(
+      UnresolvedStar(None, Option(10L)),
+      expr(_.getUnresolvedStarBuilder.setPlanId(10L)))
+  }
+
+  test("regex") {
+    testConversion(
+      UnresolvedRegex("`(_1)?+.+`"),
+      expr(_.getUnresolvedRegexBuilder.setColName("`(_1)?+.+`")))
+    testConversion(
+      UnresolvedRegex("a", planId = Option(11L)),
+      expr(_.getUnresolvedRegexBuilder.setColName("a").setPlanId(11L)))
+  }
+
+  test("function") {
+    testConversion(
+      UnresolvedFunction("+", Seq(UnresolvedAttribute("a"), Literal(1))),
+      expr(
+        _.getUnresolvedFunctionBuilder
+          .setFunctionName("+")
+          .setIsDistinct(false)
+          .addArguments(attribute("a"))
+          .addArguments(expr(_.getLiteralBuilder.setInteger(1)))))
+    testConversion(
+      UnresolvedFunction(
+        "db1.myAgg",
+        Seq(UnresolvedAttribute("a")),
+        isDistinct = true,
+        isUserDefinedFunction = true),
+      expr(
+        _.getUnresolvedFunctionBuilder
+          .setFunctionName("db1.myAgg")
+          .setIsDistinct(true)
+          .setIsUserDefinedFunction(true)
+          .addArguments(attribute("a"))))
+  }
+
+  test("alias") {
+    testConversion(
+      Alias(Literal("qwe"), "newA" :: Nil),
+      expr(
+        _.getAliasBuilder
+          .setExpr(expr(_.getLiteralBuilder.setString("qwe")))
+          .addName("newA")))
+    val metadata = new MetadataBuilder().putLong("q", 10).build()
+    testConversion(
+      Alias(UnresolvedAttribute("a"), "b" :: Nil, Option(metadata)),
+      expr(
+        _.getAliasBuilder
+          .setExpr(attribute("a"))
+          .addName("b")
+          .setMetadata("""{"q":10}""")))
+    testConversion(
+      Alias(UnresolvedAttribute("complex"), "newA" :: "newB" :: Nil),
+      expr(
+        _.getAliasBuilder
+          .setExpr(attribute("complex"))
+          .addName("newA")
+          .addName("newB")))
+  }
+
+  private def testCast(
+      dataType: DataType,
+      colEvalMode: Cast.EvalMode,
+      catEvalMode: proto.Expression.Cast.EvalMode): Unit = {
+    testConversion(
+      Cast(UnresolvedAttribute("attr"), dataType, Option(colEvalMode)),
+      expr(
+        _.getCastBuilder
+          .setExpr(attribute("attr"))
+          .setType(DataTypeProtoConverter.toConnectProtoType(dataType))
+          .setEvalMode(catEvalMode)))
+  }
+
+  test("cast") {
+    testConversion(
+      Cast(UnresolvedAttribute("str"), DoubleType),
+      expr(
+        _.getCastBuilder
+          .setExpr(attribute("str"))
+          .setType(ProtoDataTypes.DoubleType)))
+
+    testCast(LongType, Cast.Legacy, proto.Expression.Cast.EvalMode.EVAL_MODE_LEGACY)
+    testCast(BinaryType, Cast.Try, proto.Expression.Cast.EvalMode.EVAL_MODE_TRY)
+    testCast(ShortType, Cast.Ansi, proto.Expression.Cast.EvalMode.EVAL_MODE_ANSI)
+  }
+
+  private def testSortOrder(
+      colDirection: SortOrder.SortDirection,
+      colNullOrdering: SortOrder.NullOrdering,
+      catDirection: proto.Expression.SortOrder.SortDirection,
+      catNullOrdering: proto.Expression.SortOrder.NullOrdering): Unit = {
+    testConversion(
+      SortOrder(UnresolvedAttribute("unsorted"), colDirection, colNullOrdering),
+      expr(
+        _.getSortOrderBuilder
+          .setChild(attribute("unsorted"))
+          .setNullOrdering(catNullOrdering)
+          .setDirection(catDirection)))
+  }
+
+  test("sortOrder") {
+    testSortOrder(
+      SortOrder.Ascending,
+      SortOrder.NullsFirst,
+      proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING,
+      proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
+    testSortOrder(
+      SortOrder.Ascending,
+      SortOrder.NullsLast,
+      proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_ASCENDING,
+      proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST)
+    testSortOrder(
+      SortOrder.Descending,
+      SortOrder.NullsFirst,
+      proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING,
+      proto.Expression.SortOrder.NullOrdering.SORT_NULLS_FIRST)
+    testSortOrder(
+      SortOrder.Descending,
+      SortOrder.NullsLast,
+      proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING,
+      proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST)
+  }
+
+  private def testWindowFrame(
+      colFrameType: WindowFrame.FrameType,
+      colLower: WindowFrame.FrameBoundary,
+      colUpper: WindowFrame.FrameBoundary,
+      catFrameType: proto.Expression.Window.WindowFrame.FrameType,
+      catLower: proto.Expression.Window.WindowFrame.FrameBoundary,
+      catUpper: proto.Expression.Window.WindowFrame.FrameBoundary): Unit = {
+    testConversion(
+      Window(
+        UnresolvedFunction("sum", Seq(UnresolvedAttribute("a"))),
+        WindowSpec(
+          Seq(UnresolvedAttribute("b"), UnresolvedAttribute("c")),
+          Seq(SortOrder(UnresolvedAttribute("d"), SortOrder.Descending, SortOrder.NullsLast)),
+          Option(WindowFrame(colFrameType, colLower, colUpper)))),
+      expr(
+        _.getWindowBuilder
+          .setWindowFunction(
+            expr(_.getUnresolvedFunctionBuilder
+              .setFunctionName("sum")
+              .setIsDistinct(false)
+              .addArguments(attribute("a"))))
+          .addPartitionSpec(attribute("b"))
+          .addPartitionSpec(attribute("c"))
+          .addOrderSpec(proto.Expression.SortOrder
+            .newBuilder()
+            .setChild(attribute("d"))
+            .setDirection(proto.Expression.SortOrder.SortDirection.SORT_DIRECTION_DESCENDING)
+            .setNullOrdering(proto.Expression.SortOrder.NullOrdering.SORT_NULLS_LAST))
+          .getFrameSpecBuilder
+          .setFrameType(catFrameType)
+          .setLower(catLower)
+          .setUpper(catUpper)))
+  }
+
+  test("window") {
+    testConversion(
+      Window(
+        UnresolvedFunction("sum", Seq(UnresolvedAttribute("a"))),
+        WindowSpec(Seq(UnresolvedAttribute("b"), UnresolvedAttribute("c")), Nil, None)),
+      expr(
+        _.getWindowBuilder
+          .setWindowFunction(
+            expr(
+              _.getUnresolvedFunctionBuilder
+                .setFunctionName("sum")
+                .setIsDistinct(false)
+                .addArguments(attribute("a"))))
+          .addPartitionSpec(attribute("b"))
+          .addPartitionSpec(attribute("c"))))
+    testWindowFrame(
+      WindowFrame.Row,
+      WindowFrame.Value(Literal(-10)),
+      WindowFrame.UnboundedFollowing,
+      proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW,
+      FrameBoundary.newBuilder().setValue(expr(_.getLiteralBuilder.setInteger(-10))).build(),
+      FrameBoundary.newBuilder().setUnbounded(true).build())
+    testWindowFrame(
+      WindowFrame.Range,
+      WindowFrame.UnboundedPreceding,
+      WindowFrame.CurrentRow,
+      proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_RANGE,
+      FrameBoundary.newBuilder().setUnbounded(true).build(),
+      FrameBoundary.newBuilder().setCurrentRow(true).build())
+  }
+
+  test("lambda") {
+    val colX = UnresolvedNamedLambdaVariable("x")
+    val catX = proto.Expression.UnresolvedNamedLambdaVariable
+      .newBuilder()
+      .addNameParts(colX.name)
+      .build()
+    testConversion(
+      LambdaFunction(UnresolvedFunction("+", Seq(colX, UnresolvedAttribute("y"))), Seq(colX)),
+      expr(
+        _.getLambdaFunctionBuilder
+          .setFunction(
+            expr(
+              _.getUnresolvedFunctionBuilder
+                .setFunctionName("+")
+                .addArguments(expr(_.setUnresolvedNamedLambdaVariable(catX)))
+                .addArguments(attribute("y"))))
+          .addArguments(catX)))
+  }
+
+  test("sql") {
+    testConversion(
+      SqlExpression("1 + 1"),
+      expr(_.getExpressionStringBuilder.setExpression("1 + 1")))
+  }
+
+  test("caseWhen") {
+    testConversion(
+      CaseWhenOtherwise(
+        Seq(UnresolvedAttribute("c1") -> Literal("r1")),
+        Option(Literal("fallback"))),
+      expr(
+        _.getUnresolvedFunctionBuilder
+          .setFunctionName("when")
+          .addArguments(attribute("c1"))
+          .addArguments(expr(_.getLiteralBuilder.setString("r1")))
+          .addArguments(expr(_.getLiteralBuilder.setString("fallback")))))
+  }
+
+  test("extract field") {
+    testConversion(
+      UnresolvedExtractValue(UnresolvedAttribute("struct"), Literal("cl_a")),
+      expr(
+        _.getUnresolvedExtractValueBuilder
+          .setChild(attribute("struct"))
+          .setExtraction(expr(_.getLiteralBuilder.setString("cl_a")))))
+  }
+
+  test("update field") {
+    testConversion(
+      UpdateFields(UnresolvedAttribute("struct"), "col_b", Option(Literal("cl_a"))),
+      expr(
+        _.getUpdateFieldsBuilder
+          .setStructExpression(attribute("struct"))
+          .setFieldName("col_b")
+          .setValueExpression(expr(_.getLiteralBuilder.setString("cl_a")))))
+
+    testConversion(
+      UpdateFields(UnresolvedAttribute("struct"), "col_c", None),
+      expr(
+        _.getUpdateFieldsBuilder
+          .setStructExpression(attribute("struct"))
+          .setFieldName("col_c")))
+  }
+
+  test("udf") {
+    val fn = (i: Int) => i + 1
+    val udf = SparkUserDefinedFunction(fn, PrimitiveIntEncoder :: Nil, PrimitiveIntEncoder)
+    val named = udf.withName("boo").asNondeterministic()
+    testConversion(
+      InvokeInlineUserDefinedFunction(named, Seq(UnresolvedAttribute("a"))),
+      expr(
+        _.getCommonInlineUserDefinedFunctionBuilder
+          .setFunctionName("boo")
+          .setDeterministic(false)
+          .addArguments(attribute("a"))
+          .getScalarScalaUdfBuilder
+          .setPayload(
+            UdfToProtoUtils.toUdfPacketBytes(fn, PrimitiveIntEncoder :: Nil, PrimitiveIntEncoder))
+          .addInputTypes(ProtoDataTypes.IntegerType)
+          .setOutputType(ProtoDataTypes.IntegerType)
+          .setNullable(false)
+          .setAggregate(false)))
+
+    val aggregator = new Aggregator[Long, Long, Long] {
+      override def zero: Long = 0
+      override def reduce(b: Long, a: Long): Long = a + b
+      override def merge(b1: Long, b2: Long): Long = b1 + b2
+      override def finish(reduction: Long): Long = reduction
+      override def bufferEncoder: Encoder[Long] = PrimitiveLongEncoder
+      override def outputEncoder: Encoder[Long] = PrimitiveLongEncoder
+    }
+    val uda = UserDefinedAggregator(aggregator, PrimitiveLongEncoder)
+      .withName("lsum")
+      .asNonNullable()
+    testConversion(
+      InvokeInlineUserDefinedFunction(uda, Seq(UnresolvedAttribute(("a")))),
+      expr(
+        _.getCommonInlineUserDefinedFunctionBuilder
+          .setFunctionName("lsum")
+          .setDeterministic(true)
+          .addArguments(attribute("a"))
+          .getScalarScalaUdfBuilder
+          .setPayload(UdfToProtoUtils
+            .toUdfPacketBytes(aggregator, PrimitiveLongEncoder :: Nil, PrimitiveLongEncoder))
+          .addInputTypes(ProtoDataTypes.LongType)
+          .setOutputType(ProtoDataTypes.LongType)
+          .setNullable(false)
+          .setAggregate(true)))
+
+    val result = ColumnNodeToProtoConverter.toTypedExpr(
+      Column(InvokeInlineUserDefinedFunction(aggregator, Nil)),
+      PrimitiveLongEncoder)
+    val expected = expr(
+      _.getTypedAggregateExpressionBuilder.getScalarScalaUdfBuilder
+        .setPayload(UdfToProtoUtils
+          .toUdfPacketBytes(aggregator, PrimitiveLongEncoder :: Nil, PrimitiveLongEncoder))
+        .addInputTypes(ProtoDataTypes.LongType)
+        .setOutputType(ProtoDataTypes.LongType)
+        .setNullable(true)
+        .setAggregate(true))
+    assert(result == expected)
+  }
+
+  test("extension") {
+    val e = attribute("name")
+    testConversion(ProtoColumnNode(e), e)
+  }
+
+  test("unsupported") {
+    intercept[SparkException](ColumnNodeToProtoConverter(Nope()))
+  }
+}
+
+private[internal] case class Nope(override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override def sql: String = "nope"
+}
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
index fbb2b4af1b128..b1a7d81916e92 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/streaming/ClientStreamingQuerySuite.scala
@@ -42,6 +42,7 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
   private val testDataPath = Paths
     .get(
       IntegrationTestUtils.sparkHome,
+      "sql",
       "connect",
       "common",
       "src",
@@ -268,6 +269,42 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
     }
   }
 
+  test("clusterBy") {
+    withSQLConf(
+      "spark.sql.shuffle.partitions" -> "1" // Avoid too many reducers.
+    ) {
+      spark.sql("DROP TABLE IF EXISTS my_table").collect()
+
+      withTempPath { ckpt =>
+        val q1 = spark.readStream
+          .format("rate")
+          .load()
+          .writeStream
+          .clusterBy("value")
+          .option("checkpointLocation", ckpt.getCanonicalPath)
+          .toTable("my_table")
+
+        try {
+          q1.processAllAvailable()
+          eventually(timeout(30.seconds)) {
+            checkAnswer(
+              spark.sql("DESCRIBE my_table"),
+              Seq(
+                Row("timestamp", "timestamp", null),
+                Row("value", "bigint", null),
+                Row("# Clustering Information", "", ""),
+                Row("# col_name", "data_type", "comment"),
+                Row("value", "bigint", null)))
+            assert(spark.table("my_sink").count() > 0)
+          }
+        } finally {
+          q1.stop()
+          spark.sql("DROP TABLE my_table")
+        }
+      }
+    }
+  }
+
   test("throw exception in streaming") {
     try {
       val session = spark
@@ -294,11 +331,9 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
         query.awaitTermination()
       }
 
-      assert(exception.getErrorClass != null)
+      assert(exception.getCondition != null)
       assert(exception.getMessageParameters().get("id") == query.id.toString)
       assert(exception.getMessageParameters().get("runId") == query.runId.toString)
-      assert(!exception.getMessageParameters().get("startOffset").isEmpty)
-      assert(!exception.getMessageParameters().get("endOffset").isEmpty)
       assert(exception.getCause.isInstanceOf[SparkException])
       assert(exception.getCause.getCause.isInstanceOf[SparkException])
       assert(
@@ -334,11 +369,9 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
       spark.streams.awaitAnyTermination()
     }
 
-    assert(exception.getErrorClass != null)
+    assert(exception.getCondition != null)
     assert(exception.getMessageParameters().get("id") == query.id.toString)
     assert(exception.getMessageParameters().get("runId") == query.runId.toString)
-    assert(!exception.getMessageParameters().get("startOffset").isEmpty)
-    assert(!exception.getMessageParameters().get("endOffset").isEmpty)
     assert(exception.getCause.isInstanceOf[SparkException])
     assert(exception.getCause.getCause.isInstanceOf[SparkException])
     assert(
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
index 608d9b477148c..f46b98646ae4f 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/ConnectFunSuite.scala
@@ -20,6 +20,10 @@ import java.nio.file.Path
 
 import org.scalatest.funsuite.AnyFunSuite // scalastyle:ignore funsuite
 
+import org.apache.spark.connect.proto
+import org.apache.spark.sql.Column
+import org.apache.spark.sql.internal.ColumnNodeToProtoConverter
+
 /**
  * The basic testsuite the client tests should extend from.
  */
@@ -35,10 +39,19 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
   }
 
   protected def baseResourcePath: Path = {
-    getWorkspaceFilePath("connect", "client", "jvm", "src", "test", "resources").toAbsolutePath
+    getWorkspaceFilePath(
+      "sql",
+      "connect",
+      "client",
+      "jvm",
+      "src",
+      "test",
+      "resources").toAbsolutePath
   }
 
   protected def commonResourcePath: Path = {
-    getWorkspaceFilePath("connect", "common", "src", "test", "resources").toAbsolutePath
+    getWorkspaceFilePath("sql", "connect", "common", "src", "test", "resources").toAbsolutePath
   }
+
+  protected def toExpr(c: Column): proto.Expression = ColumnNodeToProtoConverter.toExpr(c)
 }
diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
index 95326ce7e8cec..36aaa2cc7fbf6 100644
--- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
+++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/RemoteSparkSession.scala
@@ -24,6 +24,9 @@ import java.util.concurrent.TimeUnit
 import scala.concurrent.duration.FiniteDuration
 
 import org.scalatest.{BeforeAndAfterAll, Suite}
+import org.scalatest.concurrent.Eventually.eventually
+import org.scalatest.concurrent.Futures.timeout
+import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.SparkBuildInfo
 import org.apache.spark.sql.SparkSession
@@ -60,7 +63,7 @@ object SparkConnectServerUtils {
   private lazy val sparkConnect: java.lang.Process = {
     debug("Starting the Spark Connect Server...")
     val connectJar =
-      findJar("connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath
+      findJar("sql/connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath
 
     val command = Seq.newBuilder[String]
     command += "bin/spark-submit"
@@ -121,6 +124,8 @@ object SparkConnectServerUtils {
       // to make the tests exercise reattach.
       "spark.connect.execute.reattachable.senderMaxStreamDuration=1s",
       "spark.connect.execute.reattachable.senderMaxStreamSize=123",
+      // Testing SPARK-49673, setting maxBatchSize to 10MiB
+      s"spark.connect.grpc.arrow.maxBatchSize=${10 * 1024 * 1024}",
       // Disable UI
       "spark.ui.enabled=false")
     Seq("--jars", catalystTestJar) ++ confs.flatMap(v => "--conf" :: v :: Nil)
@@ -184,12 +189,14 @@ object SparkConnectServerUtils {
           .port(port)
           .retryPolicy(RetryPolicy
             .defaultPolicy()
-            .copy(maxRetries = Some(7), maxBackoff = Some(FiniteDuration(10, "s"))))
+            .copy(maxRetries = Some(10), maxBackoff = Some(FiniteDuration(30, "s"))))
           .build())
       .create()
 
     // Execute an RPC which will get retried until the server is up.
-    assert(spark.version == SparkBuildInfo.spark_version)
+    eventually(timeout(1.minute)) {
+      assert(spark.version == SparkBuildInfo.spark_version)
+    }
 
     // Auto-sync dependencies.
     SparkConnectServerUtils.syncTestDependencies(spark)
diff --git a/connector/docker-integration-tests/README.md b/connector/docker-integration-tests/README.md
index 03d3fe706a606..50000ae196107 100644
--- a/connector/docker-integration-tests/README.md
+++ b/connector/docker-integration-tests/README.md
@@ -45,7 +45,7 @@ the container bootstrapping. To run an individual Docker integration test, use t
 
 Besides the default Docker images, the integration tests can be run with custom Docker images. For example,
 
-    ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.4-slim-faststart ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"
+    ORACLE_DOCKER_IMAGE_NAME=gvenzl/oracle-free:23.5-slim-faststart ./build/sbt -Pdocker-integration-tests "docker-integration-tests/testOnly *OracleIntegrationSuite"
 
 The following environment variables can be used to specify the custom Docker images for different databases:
 
diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml
index 9003c2190be22..91d0c68c73158 100644
--- a/connector/docker-integration-tests/pom.xml
+++ b/connector/docker-integration-tests/pom.xml
@@ -98,7 +98,7 @@
     </dependency>
     <dependency>
       <groupId>com.oracle.database.jdbc</groupId>
-      <artifactId>ojdbc11</artifactId>
+      <artifactId>ojdbc17</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/connector/docker-integration-tests/src/test/resources/db2_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/db2-krb-setup.sh
similarity index 100%
rename from connector/docker-integration-tests/src/test/resources/db2_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/db2-krb-setup.sh
diff --git a/connector/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh b/connector/docker-integration-tests/src/test/resources/mariadb-docker-entrypoint.sh
similarity index 100%
rename from connector/docker-integration-tests/src/test/resources/mariadb_docker_entrypoint.sh
rename to connector/docker-integration-tests/src/test/resources/mariadb-docker-entrypoint.sh
diff --git a/connector/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/mariadb-krb-setup.sh
similarity index 100%
rename from connector/docker-integration-tests/src/test/resources/mariadb_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/mariadb-krb-setup.sh
diff --git a/connector/docker-integration-tests/src/test/resources/postgres_krb_setup.sh b/connector/docker-integration-tests/src/test/resources/postgres-krb-setup.sh
similarity index 100%
rename from connector/docker-integration-tests/src/test/resources/postgres_krb_setup.sh
rename to connector/docker-integration-tests/src/test/resources/postgres-krb-setup.sh
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2DatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2DatabaseOnDocker.scala
index 5cfd998b20a75..34db51ab0fd72 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2DatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2DatabaseOnDocker.scala
@@ -18,7 +18,8 @@
 package org.apache.spark.sql.jdbc
 
 class DB2DatabaseOnDocker extends DatabaseOnDocker {
-  override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME", "ibmcom/db2:11.5.8.0")
+  override val imageName = sys.env.getOrElse("DB2_DOCKER_IMAGE_NAME",
+    "icr.io/db2_community/db2:11.5.9.0")
   override val env = Map(
     "DB2INST1_PASSWORD" -> "rootpass",
     "LICENSE" -> "accept",
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index 72b2ac8074f4a..1d33acfdee013 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -29,9 +29,9 @@ import org.apache.spark.sql.types.{ByteType, ShortType, StructType}
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.8.0):
+ * To run this test suite for a specific version (e.g., icr.io/db2_community/db2:11.5.9.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.8.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=icr.io/db2_community/db2:11.5.9.0
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.DB2IntegrationSuite"
  * }}}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
index 4899de2b2a14c..c7d8fc43393ed 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2KrbIntegrationSuite.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.{DB2Connection
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.8.0):
+ * To run this test suite for a specific version (e.g., icr.io/db2_community/db2:11.5.9.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.8.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=icr.io/db2_community/db2:11.5.9.0
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *DB2KrbIntegrationSuite"
  * }}}
@@ -57,7 +57,7 @@ class DB2KrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
     override def beforeContainerStart(
         hostConfigBuilder: HostConfig,
         containerConfigBuilder: ContainerConfig): Unit = {
-      copyExecutableResource("db2_krb_setup.sh", initDbDir, replaceIp)
+      copyExecutableResource("db2-krb-setup.sh", initDbDir, replaceIp)
 
       val newBind = new Bind(
         initDbDir.getAbsolutePath,
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
index 8d17e0b4e36e6..1df01bd3bfb62 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DockerJDBCIntegrationSuite.scala
@@ -115,7 +115,7 @@ abstract class DockerJDBCIntegrationSuite
   protected val startContainerTimeout: Long =
     timeStringAsSeconds(sys.props.getOrElse("spark.test.docker.startContainerTimeout", "5min"))
   protected val connectionTimeout: PatienceConfiguration.Timeout = {
-    val timeoutStr = sys.props.getOrElse("spark.test.docker.connectionTimeout", "5min")
+    val timeoutStr = sys.props.getOrElse("spark.test.docker.connectionTimeout", "10min")
     timeout(timeStringAsSeconds(timeoutStr).seconds)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
index efb2fa09f6a3f..32c552eb8c7eb 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MariaDBKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mariadb:10.5.25):
+ * To run this test suite for a specific version (e.g., mariadb:10.6.19):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.5.25
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MARIADB_DOCKER_IMAGE_NAME=mariadb:10.6.19
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MariaDBKrbIntegrationSuite"
  * }}}
@@ -38,7 +38,7 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val keytabFileName = "mariadb.keytab"
 
   override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.5.25")
+    override val imageName = sys.env.getOrElse("MARIADB_DOCKER_IMAGE_NAME", "mariadb:10.6.19")
     override val env = Map(
       "MYSQL_ROOT_PASSWORD" -> "rootpass"
     )
@@ -49,13 +49,13 @@ class MariaDBKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
       s"jdbc:mysql://$ip:$port/mysql?user=$principal"
 
     override def getEntryPoint: Option[String] =
-      Some("/docker-entrypoint/mariadb_docker_entrypoint.sh")
+      Some("/docker-entrypoint/mariadb-docker-entrypoint.sh")
 
     override def beforeContainerStart(
         hostConfigBuilder: HostConfig,
         containerConfigBuilder: ContainerConfig): Unit = {
-      copyExecutableResource("mariadb_docker_entrypoint.sh", entryPointDir, replaceIp)
-      copyExecutableResource("mariadb_krb_setup.sh", initDbDir, replaceIp)
+      copyExecutableResource("mariadb-docker-entrypoint.sh", entryPointDir, replaceIp)
+      copyExecutableResource("mariadb-krb-setup.sh", initDbDir, replaceIp)
 
       val binds =
         Seq(entryPointDir -> "/docker-entrypoint", initDbDir -> "/docker-entrypoint-initdb.d")
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
index 61530f713eb86..6bd33356cab3d 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSQLServerDatabaseOnDocker.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.jdbc
 
 class MsSQLServerDatabaseOnDocker extends DatabaseOnDocker {
   override val imageName = sys.env.getOrElse("MSSQLSERVER_DOCKER_IMAGE_NAME",
-    "mcr.microsoft.com/mssql/server:2022-CU12-GDR1-ubuntu-22.04")
+    "mcr.microsoft.com/mssql/server:2022-CU15-ubuntu-22.04")
   override val env = Map(
     "SA_PASSWORD" -> "Sapass123",
     "ACCEPT_EULA" -> "Y"
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
index 623f404339e9e..62f088ebc2b6d 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MsSqlServerIntegrationSuite.scala
@@ -31,10 +31,10 @@ import org.apache.spark.sql.types.{BinaryType, DecimalType}
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., 2019-CU13-ubuntu-20.04):
+ * To run this test suite for a specific version (e.g., 2022-CU15-ubuntu-22.04):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1
- *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2022-CU15-ubuntu-22.04
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MsSqlServerIntegrationSuite"
  * }}}
@@ -490,7 +490,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationSuite {
           .option("query", "SELECT @myvariant1 as variant1, @myvariant2 as variant2")
           .load()
       },
-      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      condition = "UNRECOGNIZED_SQL_TYPE",
       parameters = Map("typeName" -> "sql_variant", "jdbcType" -> "-156"))
   }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
index 97f076eccf941..d5b61d479af1a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLDatabaseOnDocker.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.jdbc
 
 class MySQLDatabaseOnDocker extends DatabaseOnDocker {
-  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.0.0")
+  override val imageName = sys.env.getOrElse("MYSQL_DOCKER_IMAGE_NAME", "mysql:9.1.0")
   override val env = Map(
     "MYSQL_ROOT_PASSWORD" -> "rootpass"
   )
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index b0ba86a9f6c4f..deeb52267a145 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -31,9 +31,9 @@ import org.apache.spark.sql.types.ShortType
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mysql:9.0.0):
+ * To run this test suite for a specific version (e.g., mysql:9.1.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.0.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.1.0
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.MySQLIntegrationSuite"
  * }}}
@@ -351,13 +351,22 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite {
     val df = spark.read.jdbc(jdbcUrl, "smallint_round_trip", new Properties)
     assert(df.schema.fields.head.dataType === ShortType)
   }
+
+  test("SPARK-44638: Char/Varchar in Custom Schema") {
+    val df = spark.read.option("url", jdbcUrl)
+      .option("query", "SELECT c, d from strings")
+      .option("customSchema", "c CHAR(10), d VARCHAR(10)")
+      .format("jdbc")
+      .load()
+    checkAnswer(df, Row("brown     ", "fox"))
+  }
 }
 
 
 /**
- * To run this test suite for a specific version (e.g., mysql:9.0.0):
+ * To run this test suite for a specific version (e.g., mysql:9.1.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.0.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.1.0
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *MySQLOverMariaConnectorIntegrationSuite"
  * }}}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
index dd6bbf0af8a33..87e83ff59a641 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleDatabaseOnDocker.scala
@@ -21,7 +21,7 @@ import org.apache.spark.internal.Logging
 
 class OracleDatabaseOnDocker extends DatabaseOnDocker with Logging {
   lazy override val imageName =
-    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.4-slim")
+    sys.env.getOrElse("ORACLE_DOCKER_IMAGE_NAME", "gvenzl/oracle-free:23.5-slim")
   val oracle_password = "Th1s1sThe0racle#Pass"
   override val env = Map(
     "ORACLE_PWD" -> oracle_password, // oracle images uses this
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
index 2b2596289548c..cf547b93aa0ba 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.{DataFrame, Row, SaveMode}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, WholeStageCodegenExec}
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.LogicalRelationWithTable
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCPartition, JDBCRelation}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -456,7 +456,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
       .load()
 
     df1.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+      case LogicalRelationWithTable(JDBCRelation(_, parts, _), _) =>
         val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
         assert(whereClauses === Set(
           """"D" < '2018-07-11' or "D" is null""",
@@ -479,7 +479,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark
       .load()
 
     df2.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+      case LogicalRelationWithTable(JDBCRelation(_, parts, _), _) =>
         val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
         assert(whereClauses === Set(
           """"T" < '2018-07-15 20:50:32.5' or "T" is null""",
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala
new file mode 100644
index 0000000000000..db2495ad3c698
--- /dev/null
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresDatabaseOnDocker.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.jdbc
+
+import org.apache.spark.internal.Logging
+
+class PostgresDatabaseOnDocker extends DatabaseOnDocker with Logging {
+  lazy override val imageName: String =
+    sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:17.2-alpine")
+  private val postgres_user = "postgres"
+  private val postgres_password = "rootpass"
+  override val env: Map[String, String] = Map(
+    "POSTGRES_PASSWORD" -> postgres_password
+  )
+  override val usesIpc = false
+  override val jdbcPort: Int = 5432
+
+  override def getJdbcUrl(ip: String, port: Int): String = {
+    s"jdbc:postgresql://$ip:$port/postgres?user=$postgres_user&password=$postgres_password"
+  }
+}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
index 12a71dbd7c7f8..5c985da226b06 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresIntegrationSuite.scala
@@ -24,33 +24,24 @@ import java.time.LocalDateTime
 import java.util.Properties
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{Column, DataFrame, Row}
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement("CREATE DATABASE foo").executeUpdate()
@@ -303,7 +294,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationSuite {
       ArrayType(DecimalType(2, 2), true))
     // Test write null values.
     df.select(df.queryExecution.analyzed.output.map { a =>
-      Column(Literal.create(null, a.dataType)).as(a.name)
+      lit(null).cast(a.dataType).as(a.name)
     }: _*).write.jdbc(jdbcUrl, "public.barcopy2", new Properties)
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
index af1cd464ad5fe..b3cfe8bd77e2b 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/PostgresKrbIntegrationSuite.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.execution.datasources.jdbc.connection.SecureConnecti
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *PostgresKrbIntegrationSuite"
  * }}}
@@ -37,21 +37,14 @@ class PostgresKrbIntegrationSuite extends DockerKrbJDBCIntegrationSuite {
   override protected val userName = s"postgres/$dockerIp"
   override protected val keytabFileName = "postgres.keytab"
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-
+  override val db = new PostgresDatabaseOnDocker {
     override def getJdbcUrl(ip: String, port: Int): String =
       s"jdbc:postgresql://$ip:$port/postgres?user=$principal&gsslib=gssapi"
 
     override def beforeContainerStart(
         hostConfigBuilder: HostConfig,
         containerConfigBuilder: ContainerConfig): Unit = {
-      copyExecutableResource("postgres_krb_setup.sh", initDbDir, replaceIp)
+      copyExecutableResource("postgres-krb-setup.sh", initDbDir, replaceIp)
       val newBind = new Bind(
         initDbDir.getAbsolutePath,
         new Volume("/docker-entrypoint-initdb.d"),
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
index b526599482daf..3a1d5e18b7e5a 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/GeneratedSubquerySuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.tags.DockerTest
 
 /**
  * This suite is used to generate subqueries, and test Spark against Postgres.
- * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.GeneratedSubquerySuite"
  * }}}
@@ -38,16 +38,7 @@ import org.apache.spark.tags.DockerTest
 @DockerTest
 class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGeneratorHelper {
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   private val FIRST_COLUMN = "a"
   private val SECOND_COLUMN = "b"
@@ -145,10 +136,6 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera
       None
     }
 
-    // SPARK-46446: offset operator in correlated subquery is not supported
-    // as it creates incorrect results for now.
-    val requireNoOffsetInCorrelatedSubquery = correlationConditions.nonEmpty
-
     // For the Limit clause, consider whether the subquery needs to return 1 row, or whether the
     // operator to be included is a Limit.
     val limitAndOffsetClause = if (requiresExactlyOneRowOutput) {
@@ -156,11 +143,10 @@ class GeneratedSubquerySuite extends DockerJDBCIntegrationSuite with QueryGenera
     } else {
       operatorInSubquery match {
         case lo: LimitAndOffset =>
-          val offsetValue = if (requireNoOffsetInCorrelatedSubquery) 0 else lo.offsetValue
-          if (offsetValue == 0 && lo.limitValue == 0) {
+          if (lo.offsetValue == 0 && lo.limitValue == 0) {
             None
           } else {
-            Some(LimitAndOffset(lo.limitValue, offsetValue))
+            Some(lo)
           }
         case _ => None
       }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
similarity index 82%
rename from connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
rename to connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
index de28e16b325ce..28320a9e0a949 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgreSQLQueryTestSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/querytest/PostgresSQLQueryTestSuite.scala
@@ -30,31 +30,21 @@ import org.apache.spark.tags.DockerTest
  *    confidence, and you won't have to manually verify the golden files generated with your test.
  * 2. Add this line to your .sql file: --ONLY_IF spark
  *
- * Note: To run this test suite for a specific version (e.g., postgres:16.3-alpine):
+ * Note: To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests
  *     "testOnly org.apache.spark.sql.jdbc.PostgreSQLQueryTestSuite"
  * }}}
  */
 @DockerTest
-class PostgreSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
+class PostgresSQLQueryTestSuite extends CrossDbmsQueryTestSuite {
 
   val DATABASE_NAME = CrossDbmsQueryTestSuite.POSTGRES
   // Scope to only subquery directory for now.
   protected val customInputFilePath: String = new File(inputFilePath, "subquery").getAbsolutePath
 
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   override def dataPreparation(conn: Connection): Unit = {
     conn.prepareStatement(
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
index 57129e9d846f6..91a82075a3607 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2IntegrationSuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.8.0):
+ * To run this test suite for a specific version (e.g., icr.io/db2_community/db2:11.5.9.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.8.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=icr.io/db2_community/db2:11.5.9.0
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2IntegrationSuite"
  * }}}
  */
@@ -87,7 +87,7 @@ class DB2IntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+      condition = "NOT_SUPPORTED_CHANGE_COLUMN",
       parameters = Map(
         "originType" -> "\"DOUBLE\"",
         "newType" -> "\"VARCHAR(10)\"",
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
index 528b26e61e761..385039fb6bd51 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/DB2NamespaceSuite.scala
@@ -26,9 +26,9 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., ibmcom/db2:11.5.8.0):
+ * To run this test suite for a specific version (e.g., icr.io/db2_community/db2:11.5.9.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=ibmcom/db2:11.5.8.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 DB2_DOCKER_IMAGE_NAME=icr.io/db2_community/db2:11.5.9.0
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.DB2NamespaceSuite"
  * }}}
  */
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index 9ddd79fb257d8..fd7efb1efb764 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -20,23 +20,38 @@ package org.apache.spark.sql.jdbc.v2
 import java.sql.Connection
 
 import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException}
+import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
 import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., 2019-CU13-ubuntu-20.04):
+ * To run this test suite for a specific version (e.g., 2022-CU15-ubuntu-22.04):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1
- *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2022-CU15-ubuntu-22.04
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MsSqlServerIntegrationSuite"
  * }}}
  */
 @DockerTest
 class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
 
+  def getExternalEngineQuery(executedPlan: SparkPlan): String = {
+    getExternalEngineRdd(executedPlan).asInstanceOf[JDBCRDD].getExternalEngineQuery
+  }
+
+  def getExternalEngineRdd(executedPlan: SparkPlan): RDD[InternalRow] = {
+    val queryNode = executedPlan.collect { case r: RowDataSourceScanExec =>
+      r
+    }.head
+    queryNode.rdd
+  }
+
   override def excluded: Seq[String] = Seq(
     "simple scan with OFFSET",
     "simple scan with LIMIT and OFFSET",
@@ -97,7 +112,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+      condition = "NOT_SUPPORTED_CHANGE_COLUMN",
       parameters = Map(
         "originType" -> "\"STRING\"",
         "newType" -> "\"INT\"",
@@ -115,7 +130,7 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
       exception = intercept[SparkSQLFeatureNotSupportedException] {
         sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2271")
+      condition = "UNSUPPORTED_FEATURE.UPDATE_COLUMN_NULLABILITY")
   }
 
   test("SPARK-47440: SQLServer does not support boolean expression in binary comparison") {
@@ -146,4 +161,68 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD
         |""".stripMargin)
     assert(df.collect().length == 2)
   }
+
+  test("SPARK-50087: SqlServer handle booleans in CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN name = 'Legolas' THEN name = 'Elf' ELSE NOT (name = 'Wizard') END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE IIF(("name" <> 'Wizard'), 1, 0) END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle booleans in CASE WHEN with always true test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN (name = 'Elf') ELSE (1=1) END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE 1 END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle booleans in nested CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN
+          | CASE WHEN (name = 'Elf') THEN (name = 'Elrond') ELSE (name = 'Gandalf') END
+          | ELSE (name = 'Sauron') END
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF((CASE WHEN ("name" = 'Elf') THEN IIF(("name" = 'Elrond'), 1, 0) ELSE IIF(("name" = 'Gandalf'), 1, 0) END = 1), 1, 0) ELSE IIF(("name" = 'Sauron'), 1, 0) END = 1)  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
+
+  test("SPARK-50087: SqlServer handle non-booleans in nested CASE WHEN test") {
+    val df = sql(
+      s"""|SELECT * FROM $catalogName.employee
+          |WHERE CASE WHEN (name = 'Legolas') THEN
+          | CASE WHEN (name = 'Elf') THEN 'Elf' ELSE 'Wizard' END
+          | ELSE 'Sauron' END = name
+          |""".stripMargin
+    )
+
+    // scalastyle:off
+    assert(getExternalEngineQuery(df.queryExecution.executedPlan) ==
+      """SELECT  "dept","name","salary","bonus" FROM "employee"  WHERE ("name" IS NOT NULL) AND ((CASE WHEN "name" = 'Legolas' THEN CASE WHEN "name" = 'Elf' THEN 'Elf' ELSE 'Wizard' END ELSE 'Sauron' END) = "name")  """
+    )
+    // scalastyle:on
+    df.collect()
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
index e010a0caf13fa..724c394a4f052 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerNamespaceSuite.scala
@@ -26,10 +26,10 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., 2019-CU13-ubuntu-20.04):
+ * To run this test suite for a specific version (e.g., 2022-CU15-ubuntu-22.04):
  * {{{
  *   ENABLE_DOCKER_INTEGRATION_TESTS=1
- *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2019-CU13-ubuntu-20.04
+ *   MSSQLSERVER_DOCKER_IMAGE_NAME=mcr.microsoft.com/mssql/server:2022-CU15-ubuntu-22.04
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.MsSqlServerNamespaceSuite"
  * }}}
  */
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
index c48505ff153fd..9c07ce939fe3e 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLIntegrationSuite.scala
@@ -27,9 +27,9 @@ import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mysql:9.0.0):
+ * To run this test suite for a specific version (e.g., mysql:9.1.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.0.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.1.0
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLIntegrationSuite"
  * }}}
  */
@@ -77,8 +77,19 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
       s"""CREATE TABLE pattern_testing_table (
          |pattern_testing_col LONGTEXT
          |)
-                   """.stripMargin
+         |""".stripMargin
     ).executeUpdate()
+    connection.prepareStatement(
+      "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+      .executeUpdate()
+  }
+
+  override def dataPreparation(connection: Connection): Unit = {
+    super.dataPreparation(connection)
+    connection.prepareStatement("INSERT INTO datetime VALUES " +
+      "('amy', '2022-05-19', '2022-05-19 00:00:00')").executeUpdate()
+    connection.prepareStatement("INSERT INTO datetime VALUES " +
+      "('alex', '2022-05-18', '2022-05-18 00:00:00')").executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
@@ -98,7 +109,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+      condition = "NOT_SUPPORTED_CHANGE_COLUMN",
       parameters = Map(
         "originType" -> "\"STRING\"",
         "newType" -> "\"INT\"",
@@ -131,7 +142,7 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
       exception = intercept[SparkSQLFeatureNotSupportedException] {
         sql(s"ALTER TABLE $tbl ALTER COLUMN ID DROP NOT NULL")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2271")
+      condition = "UNSUPPORTED_FEATURE.UPDATE_COLUMN_NULLABILITY")
   }
 
   override def testCreateTableWithProperty(tbl: String): Unit = {
@@ -157,12 +168,85 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest
       assert(sql(s"SELECT char_length(c1) from $tableName").head().get(0) === 65536)
     }
   }
+
+  override def testDatetime(tbl: String): Unit = {
+    val df1 = sql(s"SELECT name FROM $tbl WHERE " +
+      "dayofyear(date1) > 100 AND dayofmonth(date1) > 10 ")
+    checkFilterPushed(df1)
+    val rows1 = df1.collect()
+    assert(rows1.length === 2)
+    assert(rows1(0).getString(0) === "amy")
+    assert(rows1(1).getString(0) === "alex")
+
+    val df2 = sql(s"SELECT name FROM $tbl WHERE year(date1) = 2022 AND quarter(date1) = 2")
+    checkFilterPushed(df2)
+    val rows2 = df2.collect()
+    assert(rows2.length === 2)
+    assert(rows2(0).getString(0) === "amy")
+    assert(rows2(1).getString(0) === "alex")
+
+    val df3 = sql(s"SELECT name FROM $tbl WHERE second(time1) = 0 AND month(date1) = 5")
+    checkFilterPushed(df3)
+    val rows3 = df3.collect()
+    assert(rows3.length === 2)
+    assert(rows3(0).getString(0) === "amy")
+    assert(rows3(1).getString(0) === "alex")
+
+    val df4 = sql(s"SELECT name FROM $tbl WHERE hour(time1) = 0 AND minute(time1) = 0")
+    checkFilterPushed(df4)
+    val rows4 = df4.collect()
+    assert(rows4.length === 2)
+    assert(rows4(0).getString(0) === "amy")
+    assert(rows4(1).getString(0) === "alex")
+
+    val df5 = sql(s"SELECT name FROM $tbl WHERE " +
+      "extract(WEEk from date1) > 10 AND extract(YEAROFWEEK from date1) = 2022")
+    checkFilterPushed(df5)
+    val rows5 = df5.collect()
+    assert(rows5.length === 2)
+    assert(rows5(0).getString(0) === "amy")
+    assert(rows5(1).getString(0) === "alex")
+
+    val df6 = sql(s"SELECT name FROM $tbl WHERE date_add(date1, 1) = date'2022-05-20' " +
+      "AND datediff(date1, '2022-05-10') > 0")
+    checkFilterPushed(df6)
+    val rows6 = df6.collect()
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(0) === "amy")
+
+    val df7 = sql(s"SELECT name FROM $tbl WHERE weekday(date1) = 2")
+    checkFilterPushed(df7)
+    val rows7 = df7.collect()
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) === "alex")
+
+    val df8 = sql(s"SELECT name FROM $tbl WHERE dayofweek(date1) = 4")
+    checkFilterPushed(df8)
+    val rows8 = df8.collect()
+    assert(rows8.length === 1)
+    assert(rows8(0).getString(0) === "alex")
+
+    val df9 = sql(s"SELECT name FROM $tbl WHERE " +
+      "dayofyear(date1) > 100 order by dayofyear(date1) limit 1")
+    checkFilterPushed(df9)
+    val rows9 = df9.collect()
+    assert(rows9.length === 1)
+    assert(rows9(0).getString(0) === "alex")
+
+    // MySQL does not support
+    val df10 = sql(s"SELECT name FROM $tbl WHERE trunc(date1, 'week') = date'2022-05-16'")
+    checkFilterPushed(df10, false)
+    val rows10 = df10.collect()
+    assert(rows10.length === 2)
+    assert(rows10(0).getString(0) === "amy")
+    assert(rows10(1).getString(0) === "alex")
+  }
 }
 
 /**
- * To run this test suite for a specific version (e.g., mysql:9.0.0):
+ * To run this test suite for a specific version (e.g., mysql:9.1.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.0.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.1.0
  *     ./build/sbt -Pdocker-integration-tests
  *     "docker-integration-tests/testOnly *MySQLOverMariaConnectorIntegrationSuite"
  * }}}
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
index b49f6901034b1..5db1b9f863adb 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MySQLNamespaceSuite.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., mysql:9.0.0):
+ * To run this test suite for a specific version (e.g., mysql:9.1.0):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.0.0
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 MYSQL_DOCKER_IMAGE_NAME=mysql:9.1.0
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2*MySQLNamespaceSuite"
  * }}}
  */
@@ -62,7 +62,7 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
       exception = intercept[SparkSQLFeatureNotSupportedException] {
         catalog.createNamespace(Array("foo"), Map("comment" -> "test comment").asJava)
       },
-      errorClass = "UNSUPPORTED_FEATURE.COMMENT_NAMESPACE",
+      condition = "UNSUPPORTED_FEATURE.COMMENT_NAMESPACE",
       parameters = Map("namespace" -> "`foo`")
     )
     assert(catalog.namespaceExists(Array("foo")) === false)
@@ -74,7 +74,7 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
           Array("foo"),
           NamespaceChange.setProperty("comment", "comment for foo"))
       },
-      errorClass = "UNSUPPORTED_FEATURE.COMMENT_NAMESPACE",
+      condition = "UNSUPPORTED_FEATURE.COMMENT_NAMESPACE",
       parameters = Map("namespace" -> "`foo`")
     )
 
@@ -82,7 +82,7 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
       exception = intercept[SparkSQLFeatureNotSupportedException] {
         catalog.alterNamespace(Array("foo"), NamespaceChange.removeProperty("comment"))
       },
-      errorClass = "UNSUPPORTED_FEATURE.REMOVE_NAMESPACE_COMMENT",
+      condition = "UNSUPPORTED_FEATURE.REMOVE_NAMESPACE_COMMENT",
       parameters = Map("namespace" -> "`foo`")
     )
 
@@ -90,7 +90,7 @@ class MySQLNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespac
       exception = intercept[SparkSQLFeatureNotSupportedException] {
         catalog.dropNamespace(Array("foo"), cascade = false)
       },
-      errorClass = "UNSUPPORTED_FEATURE.DROP_NAMESPACE",
+      condition = "UNSUPPORTED_FEATURE.DROP_NAMESPACE",
       parameters = Map("namespace" -> "`foo`")
     )
     catalog.dropNamespace(Array("foo"), cascade = true)
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
index 342fb4bb38e60..2c97a588670a8 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/OracleIntegrationSuite.scala
@@ -118,7 +118,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+      condition = "NOT_SUPPORTED_CHANGE_COLUMN",
       parameters = Map(
         "originType" -> "\"DECIMAL(19,0)\"",
         "newType" -> "\"INT\"",
@@ -139,7 +139,7 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTes
         exception = intercept[SparkRuntimeException] {
           sql(s"INSERT INTO $tableName SELECT rpad('hi', 256, 'spark')")
         },
-        errorClass = "EXCEED_LIMIT_LENGTH",
+        condition = "EXCEED_LIMIT_LENGTH",
         parameters = Map("limit" -> "255")
       )
     }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
index 7c439d449d86f..ef52aebd723a6 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala
@@ -19,34 +19,25 @@ package org.apache.spark.sql.jdbc.v2
 
 import java.sql.Connection
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkSQLException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.jdbc.DatabaseOnDocker
+import org.apache.spark.sql.jdbc.PostgresDatabaseOnDocker
 import org.apache.spark.sql.types._
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.3-alpine)
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine)
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresIntegrationSuite"
  * }}}
  */
 @DockerTest
 class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest {
   override val catalogName: String = "postgresql"
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
   override def sparkConf: SparkConf = super.sparkConf
     .set("spark.sql.catalog.postgresql", classOf[JDBCTableCatalog].getName)
     .set("spark.sql.catalog.postgresql.url", db.getJdbcUrl(dockerIp, externalPort))
@@ -65,6 +56,134 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
          |)
                    """.stripMargin
     ).executeUpdate()
+
+    connection.prepareStatement("CREATE TABLE array_test_table (int_array int[]," +
+      "float_array FLOAT8[], timestamp_array TIMESTAMP[], string_array TEXT[]," +
+      "datetime_array TIMESTAMPTZ[], array_of_int_arrays INT[][])").executeUpdate()
+
+    val query =
+      """
+        INSERT INTO array_test_table
+        (int_array, float_array, timestamp_array, string_array,
+        datetime_array, array_of_int_arrays)
+        VALUES
+        (
+            ARRAY[1, 2, 3],                       -- Array of integers
+            ARRAY[1.1, 2.2, 3.3],                 -- Array of floats
+            ARRAY['2023-01-01 12:00'::timestamp, '2023-06-01 08:30'::timestamp],
+            ARRAY['hello', 'world'],              -- Array of strings
+            ARRAY['2023-10-04 12:00:00+00'::timestamptz,
+            '2023-12-01 14:15:00+00'::timestamptz],
+            ARRAY[ARRAY[1, 2]]    -- Array of arrays of integers
+        ),
+        (
+            ARRAY[10, 20, 30],                    -- Another set of data
+            ARRAY[10.5, 20.5, 30.5],
+            ARRAY['2022-01-01 09:15'::timestamp, '2022-03-15 07:45'::timestamp],
+            ARRAY['postgres', 'arrays'],
+            ARRAY['2022-11-22 09:00:00+00'::timestamptz,
+            '2022-12-31 23:59:59+00'::timestamptz],
+            ARRAY[ARRAY[10, 20]]
+        );
+      """
+    connection.prepareStatement(query).executeUpdate()
+
+    connection.prepareStatement("CREATE TABLE array_int (col int[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_bigint(col bigint[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_smallint (col smallint[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_boolean (col boolean[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_float (col real[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_double (col float8[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_timestamp (col timestamp[])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE array_timestamptz (col timestamptz[])")
+      .executeUpdate()
+
+    connection.prepareStatement("INSERT INTO array_int VALUES (array[10]), (array[array[10]])")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO array_bigint VALUES (array[10]), " +
+      "(array[array[10]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_smallint VALUES (array[10]), " +
+      "(array[array[10]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_boolean VALUES (array[true]), " +
+      "(array[array[true]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_float VALUES (array[10.5]), " +
+      "(array[array[10.5]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_double VALUES (array[10.1]), " +
+      "(array[array[10.1]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_timestamp VALUES " +
+      "(array['2022-01-01 09:15'::timestamp]), " +
+      "(array[array['2022-01-01 09:15'::timestamp]])").executeUpdate()
+    connection.prepareStatement("INSERT INTO array_timestamptz VALUES " +
+      "(array['2022-01-01 09:15'::timestamptz]), " +
+      "(array[array['2022-01-01 09:15'::timestamptz]])").executeUpdate()
+    connection.prepareStatement(
+    "CREATE TABLE datetime (name VARCHAR(32), date1 DATE, time1 TIMESTAMP)")
+    .executeUpdate()
+
+    connection.prepareStatement("CREATE TABLE array_of_int (col int[])")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO array_of_int " +
+      "VALUES (array[1])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE ctas_array_of_int " +
+      "AS SELECT * FROM array_of_int").executeUpdate()
+
+    connection.prepareStatement("CREATE TABLE array_of_array_of_int (col int[][])")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO array_of_array_of_int " +
+      "VALUES (array[array[1],array[2]])").executeUpdate()
+    connection.prepareStatement("CREATE TABLE ctas_array_of_array_of_int " +
+      "AS SELECT * FROM array_of_array_of_int").executeUpdate()
+
+    connection.prepareStatement("CREATE TABLE unsupported_array_of_array_of_int (col int[][])")
+      .executeUpdate()
+    connection.prepareStatement("INSERT INTO unsupported_array_of_array_of_int " +
+      "VALUES (array[array[1],array[2]]), (array[3])").executeUpdate()
+  }
+
+  test("Test multi-dimensional column types") {
+    // This test is used to verify that the multi-dimensional
+    // column types are supported by the JDBC V2 data source.
+    // We do not verify any result output
+    //
+    val df = spark.read.format("jdbc")
+      .option("url", jdbcUrl)
+      .option("dbtable", "array_test_table")
+      .load()
+    df.collect()
+
+    val array_tables = Array(
+      ("array_int", "\"ARRAY<INT>\""),
+      ("array_bigint", "\"ARRAY<BIGINT>\""),
+      ("array_smallint", "\"ARRAY<SMALLINT>\""),
+      ("array_boolean", "\"ARRAY<BOOLEAN>\""),
+      ("array_float", "\"ARRAY<FLOAT>\""),
+      ("array_double", "\"ARRAY<DOUBLE>\""),
+      ("array_timestamp", "\"ARRAY<TIMESTAMP>\""),
+      ("array_timestamptz", "\"ARRAY<TIMESTAMP>\"")
+    )
+
+    array_tables.foreach { case (dbtable, arrayType) =>
+      checkError(
+        exception = intercept[SparkSQLException] {
+          val df = spark.read.format("jdbc")
+            .option("url", jdbcUrl)
+            .option("dbtable", dbtable)
+            .load()
+          df.collect()
+        },
+        condition = "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH",
+        parameters = Map("pos" -> "0", "type" -> arrayType),
+        sqlState = Some("0A000")
+      )
+    }
+  }
+
+  override def dataPreparation(connection: Connection): Unit = {
+    super.dataPreparation(connection)
+    connection.prepareStatement("INSERT INTO datetime VALUES " +
+      "('amy', '2022-05-19', '2022-05-19 00:00:00')").executeUpdate()
+    connection.prepareStatement("INSERT INTO datetime VALUES " +
+      "('alex', '2022-05-18', '2022-05-18 00:00:00')").executeUpdate()
   }
 
   override def testUpdateColumnType(tbl: String): Unit = {
@@ -84,7 +203,7 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+      condition = "NOT_SUPPORTED_CHANGE_COLUMN",
       parameters = Map(
         "originType" -> "\"STRING\"",
         "newType" -> "\"INT\"",
@@ -118,9 +237,112 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT
       sql(s"CREATE TABLE $t2(c int)")
       checkError(
         exception = intercept[TableAlreadyExistsException](sql(s"ALTER TABLE $t1 RENAME TO t2")),
-        errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map("relationName" -> "`t2`")
       )
     }
   }
+
+  override def testDatetime(tbl: String): Unit = {
+    val df1 = sql(s"SELECT name FROM $tbl WHERE " +
+      "dayofyear(date1) > 100 AND dayofmonth(date1) > 10 ")
+    checkFilterPushed(df1)
+    val rows1 = df1.collect()
+    assert(rows1.length === 2)
+    assert(rows1(0).getString(0) === "amy")
+    assert(rows1(1).getString(0) === "alex")
+
+    val df2 = sql(s"SELECT name FROM $tbl WHERE year(date1) = 2022 AND quarter(date1) = 2")
+    checkFilterPushed(df2)
+    val rows2 = df2.collect()
+    assert(rows2.length === 2)
+    assert(rows2(0).getString(0) === "amy")
+    assert(rows2(1).getString(0) === "alex")
+
+    val df3 = sql(s"SELECT name FROM $tbl WHERE second(time1) = 0 AND month(date1) = 5")
+    checkFilterPushed(df3)
+    val rows3 = df3.collect()
+    assert(rows3.length === 2)
+    assert(rows3(0).getString(0) === "amy")
+    assert(rows3(1).getString(0) === "alex")
+
+    val df4 = sql(s"SELECT name FROM $tbl WHERE hour(time1) = 0 AND minute(time1) = 0")
+    checkFilterPushed(df4)
+    val rows4 = df4.collect()
+    assert(rows4.length === 2)
+    assert(rows4(0).getString(0) === "amy")
+    assert(rows4(1).getString(0) === "alex")
+
+    val df5 = sql(s"SELECT name FROM $tbl WHERE " +
+      "extract(WEEk from date1) > 10 AND extract(YEAROFWEEK from date1) = 2022")
+    checkFilterPushed(df5)
+    val rows5 = df5.collect()
+    assert(rows5.length === 2)
+    assert(rows5(0).getString(0) === "amy")
+    assert(rows5(1).getString(0) === "alex")
+
+    val df6 = sql(s"SELECT name FROM $tbl WHERE date_add(date1, 1) = date'2022-05-20' " +
+      "AND datediff(date1, '2022-05-10') > 0")
+    checkFilterPushed(df6, false)
+    val rows6 = df6.collect()
+    assert(rows6.length === 1)
+    assert(rows6(0).getString(0) === "amy")
+
+    val df7 = sql(s"SELECT name FROM $tbl WHERE weekday(date1) = 2")
+    checkFilterPushed(df7)
+    val rows7 = df7.collect()
+    assert(rows7.length === 1)
+    assert(rows7(0).getString(0) === "alex")
+
+    val df8 = sql(s"SELECT name FROM $tbl WHERE dayofweek(date1) = 4")
+    checkFilterPushed(df8)
+    val rows8 = df8.collect()
+    assert(rows8.length === 1)
+    assert(rows8(0).getString(0) === "alex")
+
+    val df9 = sql(s"SELECT name FROM $tbl WHERE " +
+      "dayofyear(date1) > 100 order by dayofyear(date1) limit 1")
+    checkFilterPushed(df9)
+    val rows9 = df9.collect()
+    assert(rows9.length === 1)
+    assert(rows9(0).getString(0) === "alex")
+
+    // Postgres does not support
+    val df10 = sql(s"SELECT name FROM $tbl WHERE trunc(date1, 'week') = date'2022-05-16'")
+    checkFilterPushed(df10, false)
+    val rows10 = df10.collect()
+    assert(rows10.length === 2)
+    assert(rows10(0).getString(0) === "amy")
+    assert(rows10(1).getString(0) === "alex")
+  }
+
+  test("Test reading 2d array from table created via CTAS command - positive test") {
+    val dfNoCTASTable = sql(s"SELECT * FROM $catalogName.array_of_int")
+    val dfWithCTASTable = sql(s"SELECT * FROM $catalogName.ctas_array_of_int")
+
+    checkAnswer(dfWithCTASTable, dfNoCTASTable.collect())
+  }
+
+  test("Test reading 2d array from table created via CTAS command - negative test") {
+    val dfNoCTASTable = sql(s"SELECT * FROM $catalogName.array_of_int")
+
+    checkError(
+      exception = intercept[org.apache.spark.SparkSQLException] {
+        // This should fail as only 1D CTAS tables are supported
+        sql(s"SELECT * FROM $catalogName.ctas_array_of_array_of_int").collect()
+      },
+      condition = "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH",
+      parameters = Map("pos" -> "0", "type" -> "\"ARRAY<INT>\"")
+    )
+  }
+
+  test("Test reading multiple dimension array from table created via CTAS command") {
+    checkError(
+      exception = intercept[org.apache.spark.SparkSQLException] {
+        sql(s"SELECT * FROM $catalogName.unsupported_array_of_array_of_int").collect()
+      },
+      condition = "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH",
+      parameters = Map("pos" -> "0", "type" -> "\"ARRAY<ARRAY<INT>>\"")
+    )
+  }
 }
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
index 8a2d0ded84381..f84bdb46850f2 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresNamespaceSuite.scala
@@ -21,29 +21,20 @@ import java.sql.Connection
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.jdbc.{DatabaseOnDocker, DockerJDBCIntegrationSuite}
+import org.apache.spark.sql.jdbc.{DockerJDBCIntegrationSuite, PostgresDatabaseOnDocker}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.tags.DockerTest
 
 /**
- * To run this test suite for a specific version (e.g., postgres:16.3-alpine):
+ * To run this test suite for a specific version (e.g., postgres:17.2-alpine):
  * {{{
- *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:16.3-alpine
+ *   ENABLE_DOCKER_INTEGRATION_TESTS=1 POSTGRES_DOCKER_IMAGE_NAME=postgres:17.2-alpine
  *     ./build/sbt -Pdocker-integration-tests "testOnly *v2.PostgresNamespaceSuite"
  * }}}
  */
 @DockerTest
 class PostgresNamespaceSuite extends DockerJDBCIntegrationSuite with V2JDBCNamespaceTest {
-  override val db = new DatabaseOnDocker {
-    override val imageName = sys.env.getOrElse("POSTGRES_DOCKER_IMAGE_NAME", "postgres:16.3-alpine")
-    override val env = Map(
-      "POSTGRES_PASSWORD" -> "rootpass"
-    )
-    override val usesIpc = false
-    override val jdbcPort = 5432
-    override def getJdbcUrl(ip: String, port: Int): String =
-      s"jdbc:postgresql://$ip:$port/postgres?user=postgres&password=rootpass"
-  }
+  override val db = new PostgresDatabaseOnDocker
 
   val map = new CaseInsensitiveStringMap(
     Map("url" -> db.getJdbcUrl(dockerIp, externalPort),
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
index e4cc88cec0f5e..3b1a457214be7 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCNamespaceTest.scala
@@ -92,7 +92,7 @@ private[v2] trait V2JDBCNamespaceTest extends SharedSparkSession with DockerInte
       catalog.listNamespaces(Array("foo"))
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`foo`"))
   }
 
diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
index 88ba00a8a1aea..54635f69f8b65 100644
--- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
+++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/V2JDBCTest.scala
@@ -66,10 +66,17 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     expectedSchema = new StructType().add("ID", StringType, true, defaultMetadata())
     assert(t.schema === expectedSchema)
     // Update nullability of not existing column
-    val msg = intercept[AnalysisException] {
-      sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL")
-    }.getMessage
-    assert(msg.contains("Missing field bad_column"))
+    val sqlText = s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column DROP NOT NULL"
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(sqlText)
+      },
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = "42703",
+      parameters = Map(
+        "objectName" -> "`bad_column`",
+        "proposal" -> "`ID`"),
+      context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length -1))
   }
 
   def testRenameColumn(tbl: String): Unit = {
@@ -85,11 +92,11 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
 
   private def checkErrorFailedJDBC(
       e: AnalysisException,
-      errorClass: String,
+      condition: String,
       tbl: String): Unit = {
     checkErrorMatchPVals(
       exception = e,
-      errorClass = errorClass,
+      condition = condition,
       parameters = Map(
         "url" -> "jdbc:.*",
         "tableName" -> s"`$tbl`")
@@ -119,7 +126,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $catalogName.alt_table ADD COLUMNS (C3 DOUBLE)")
         },
-        errorClass = "FIELD_ALREADY_EXISTS",
+        condition = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "add",
           "fieldNames" -> "`C3`",
@@ -147,10 +154,17 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         .add("C2", StringType, true, defaultMetadata())
       assert(t.schema === expectedSchema)
       // Drop not existing column
-      val msg = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column")
-      }.getMessage
-      assert(msg.contains(s"Missing field bad_column in table $catalogName.alt_table"))
+      val sqlText = s"ALTER TABLE $catalogName.alt_table DROP COLUMN bad_column"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`C2`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length -1))
     }
     // Drop a column from a not existing table
     val e = intercept[AnalysisException] {
@@ -163,10 +177,17 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     withTable(s"$catalogName.alt_table") {
       testUpdateColumnType(s"$catalogName.alt_table")
       // Update not existing column
-      val msg2 = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE")
-      }.getMessage
-      assert(msg2.contains("Missing field bad_column"))
+      val sqlText = s"ALTER TABLE $catalogName.alt_table ALTER COLUMN bad_column TYPE DOUBLE"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`ID`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length -1))
     }
     // Update column type in not existing table
     val e = intercept[AnalysisException] {
@@ -185,7 +206,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $catalogName.alt_table RENAME COLUMN ID1 TO ID2")
         },
-        errorClass = "FIELD_ALREADY_EXISTS",
+        condition = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "rename",
           "fieldNames" -> "`ID2`",
@@ -287,7 +308,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
           exception = intercept[IndexAlreadyExistsException] {
             sql(s"CREATE index i1 ON $catalogName.new_table (col1)")
           },
-          errorClass = "INDEX_ALREADY_EXISTS",
+          condition = "INDEX_ALREADY_EXISTS",
           parameters = Map("indexName" -> "`i1`", "tableName" -> "`new_table`")
         )
 
@@ -312,7 +333,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
           exception = intercept[NoSuchIndexException] {
             sql(s"DROP index i1 ON $catalogName.new_table")
           },
-          errorClass = "INDEX_NOT_FOUND",
+          condition = "INDEX_NOT_FOUND",
           parameters = Map("indexName" -> "`i1`", "tableName" -> "`new_table`")
         )
       }
@@ -332,7 +353,7 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
     }
   }
 
-  private def checkFilterPushed(df: DataFrame, pushed: Boolean = true): Unit = {
+  protected def checkFilterPushed(df: DataFrame, pushed: Boolean = true): Unit = {
     val filter = df.queryExecution.optimizedPlan.collect {
       case f: Filter => f
     }
@@ -944,4 +965,25 @@ private[v2] trait V2JDBCTest extends SharedSparkSession with DockerIntegrationFu
       assert(row(2).getDouble(0) === 0.0)
     }
   }
+
+  test("SPARK-48618: Renaming the table to the name of an existing table") {
+    withTable(s"$catalogName.tbl1", s"$catalogName.tbl2") {
+      sql(s"CREATE TABLE $catalogName.tbl1 (col1 INT, col2 INT)")
+      sql(s"CREATE TABLE $catalogName.tbl2 (col3 INT, col4 INT)")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"ALTER TABLE $catalogName.tbl2 RENAME TO tbl1")
+        },
+        condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
+        parameters = Map("relationName" -> "`tbl1`")
+      )
+    }
+  }
+
+  def testDatetime(tbl: String): Unit = {}
+
+  test("scan with filter push-down with date time functions") {
+    testDatetime(s"$catalogAndNamespace.${caseConvert("datetime")}")
+  }
 }
diff --git a/connector/docker/README.md b/connector/docker/README.md
deleted file mode 100644
index 40ba9c3065946..0000000000000
--- a/connector/docker/README.md
+++ /dev/null
@@ -1,7 +0,0 @@
-Spark docker files
-===========
-
-Drawn from Matt Massie's docker files (https://github.com/massie/dockerfiles),
-as well as some updates from Andre Schumacher (https://github.com/AndreSchumacher/docker).
-
-Tested with Docker version 0.8.1.
diff --git a/connector/docker/spark-test/README.md b/connector/docker/spark-test/README.md
deleted file mode 100644
index ec0baf6e6d419..0000000000000
--- a/connector/docker/spark-test/README.md
+++ /dev/null
@@ -1,11 +0,0 @@
-Spark Docker files usable for testing and development purposes.
-
-These images are intended to be run like so:
-
-	docker run -v $SPARK_HOME:/opt/spark spark-test-master
-	docker run -v $SPARK_HOME:/opt/spark spark-test-worker spark://<master_ip>:7077
-
-Using this configuration, the containers will have their Spark directories
-mounted to your actual `SPARK_HOME`, allowing you to modify and recompile
-your Spark source and have them immediately usable in the docker images
-(without rebuilding them).
diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml
index 35f58134f1a85..66e1c24e821c8 100644
--- a/connector/kafka-0-10-sql/pom.xml
+++ b/connector/kafka-0-10-sql/pom.xml
@@ -148,6 +148,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
index 13a68e72269f0..c4adb6b3f26e1 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaExceptions.scala
@@ -184,5 +184,5 @@ private[kafka010] class KafkaIllegalStateException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
index 4c0620a35cc21..ae3c50f82e2d5 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculator.scala
@@ -21,22 +21,26 @@ import org.apache.kafka.common.TopicPartition
 
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-
 /**
- * Class to calculate offset ranges to process based on the from and until offsets, and
- * the configured `minPartitions`.
+ * Class to calculate offset ranges to process based on the from and until offsets, and the
+ * configured `minPartitions` and `maxRecordsPerPartition`.
  */
-private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int]) {
+private[kafka010] class KafkaOffsetRangeCalculator(
+    val minPartitions: Option[Int],
+    val maxRecordsPerPartition: Option[Long]) {
   require(minPartitions.isEmpty || minPartitions.get > 0)
+  require(maxRecordsPerPartition.isEmpty || maxRecordsPerPartition.get > 0)
 
   /**
-   * Calculate the offset ranges that we are going to process this batch. If `minPartitions`
-   * is not set or is set less than or equal the number of `topicPartitions` that we're going to
-   * consume, then we fall back to a 1-1 mapping of Spark tasks to Kafka partitions. If
-   * `minPartitions` is set higher than the number of our `topicPartitions`, then we will split up
-   * the read tasks of the skewed partitions to multiple Spark tasks.
-   * The number of Spark tasks will be *approximately* `minPartitions`. It can be less or more
-   * depending on rounding errors or Kafka partitions that didn't receive any new data.
+   * Calculate the offset ranges that we are going to process this batch. If `minPartitions` is
+   * not set or is set less than or equal the number of `topicPartitions` that we're going to
+   * consume and, `maxRecordsPerPartition` is not set then we fall back to a 1-1 mapping of Spark
+   * tasks to Kafka partitions. If `maxRecordsPerPartition` is set, then we will split up read
+   * task to multiple tasks as per `maxRecordsPerPartition` value. If `minPartitions` is set
+   * higher than the number of our `topicPartitions`, then we will split up the read tasks of the
+   * skewed partitions to multiple Spark tasks. The number of Spark tasks will be *approximately*
+   * max of `(recordsPerPartition/maxRecordsPerPartition)` and `minPartitions`. It can be less or
+   * more depending on rounding errors or Kafka partitions that didn't receive any new data.
    *
    * Empty (`KafkaOffsetRange.size == 0`) or invalid (`KafkaOffsetRange.size < 0`) ranges  will be
    * dropped.
@@ -47,51 +51,81 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
     val offsetRanges = ranges.filter(_.size > 0)
 
     // If minPartitions not set or there are enough partitions to satisfy minPartitions
-    if (minPartitions.isEmpty || offsetRanges.size >= minPartitions.get) {
+    // and maxRecordsPerPartition is empty
+    if ((minPartitions.isEmpty || offsetRanges.size >= minPartitions.get)
+      && maxRecordsPerPartition.isEmpty) {
       // Assign preferred executor locations to each range such that the same topic-partition is
       // preferentially read from the same executor and the KafkaConsumer can be reused.
       offsetRanges.map { range =>
         range.copy(preferredLoc = getLocation(range.topicPartition, executorLocations))
       }
     } else {
+      val dividedOffsetRanges = if (maxRecordsPerPartition.isDefined) {
+        val maxRecords = maxRecordsPerPartition.get
+        offsetRanges
+          .flatMap { range =>
+            val size = range.size
+            // number of partitions to divvy up this topic partition to
+            val parts = math.ceil(size.toDouble / maxRecords).toInt
+            getDividedPartition(parts, range)
+          }
+          .filter(_.size > 0)
+      } else {
+        offsetRanges
+      }
 
-      // Splits offset ranges with relatively large amount of data to smaller ones.
-      val totalSize = offsetRanges.map(_.size).sum
+      if (minPartitions.isDefined && minPartitions.get > dividedOffsetRanges.size) {
+        // Splits offset ranges with relatively large amount of data to smaller ones.
+        val totalSize = dividedOffsetRanges.map(_.size).sum
+
+        // First distinguish between any small (i.e. unsplit) ranges and large (i.e. split) ranges,
+        // in order to exclude the contents of unsplit ranges from the proportional math applied to
+        // split ranges
+        val unsplitRanges = dividedOffsetRanges.filter { range =>
+          getPartCount(range.size, totalSize, minPartitions.get) == 1
+        }
 
-      // First distinguish between any small (i.e. unsplit) ranges and large (i.e. split) ranges,
-      // in order to exclude the contents of unsplit ranges from the proportional math applied to
-      // split ranges
-      val unsplitRanges = offsetRanges.filter { range =>
-        getPartCount(range.size, totalSize, minPartitions.get) == 1
+        val unsplitRangeTotalSize = unsplitRanges.map(_.size).sum
+        val splitRangeTotalSize = totalSize - unsplitRangeTotalSize
+        val unsplitRangeTopicPartitions = unsplitRanges.map(_.topicPartition).toSet
+        val splitRangeMinPartitions = math.max(minPartitions.get - unsplitRanges.size, 1)
+
+        // Now we can apply the main calculation logic
+        dividedOffsetRanges
+          .flatMap { range =>
+            val tp = range.topicPartition
+            val size = range.size
+            // number of partitions to divvy up this topic partition to
+            val parts = if (unsplitRangeTopicPartitions.contains(tp)) {
+              1
+            } else {
+              getPartCount(size, splitRangeTotalSize, splitRangeMinPartitions)
+            }
+            getDividedPartition(parts, range)
+          }
+          .filter(_.size > 0)
+      } else {
+        dividedOffsetRanges
       }
+    }
+  }
 
-      val unsplitRangeTotalSize = unsplitRanges.map(_.size).sum
-      val splitRangeTotalSize = totalSize - unsplitRangeTotalSize
-      val unsplitRangeTopicPartitions = unsplitRanges.map(_.topicPartition).toSet
-      val splitRangeMinPartitions = math.max(minPartitions.get - unsplitRanges.size, 1)
-
-      // Now we can apply the main calculation logic
-      offsetRanges.flatMap { range =>
-        val tp = range.topicPartition
-        val size = range.size
-        // number of partitions to divvy up this topic partition to
-        val parts = if (unsplitRangeTopicPartitions.contains(tp)) {
-          1
-        } else {
-          getPartCount(size, splitRangeTotalSize, splitRangeMinPartitions)
-        }
-        var remaining = size
-        var startOffset = range.fromOffset
-        (0 until parts).map { part =>
-          // Fine to do integer division. Last partition will consume all the round off errors
-          val thisPartition = remaining / (parts - part)
-          remaining -= thisPartition
-          val endOffset = math.min(startOffset + thisPartition, range.untilOffset)
-          val offsetRange = KafkaOffsetRange(tp, startOffset, endOffset, None)
-          startOffset = endOffset
-          offsetRange
-        }
-      }.filter(_.size > 0)
+  private def getDividedPartition(
+      parts: Int,
+      offsetRange: KafkaOffsetRange): IndexedSeq[KafkaOffsetRange] = {
+    var remaining = offsetRange.size
+    var startOffset = offsetRange.fromOffset
+    val tp = offsetRange.topicPartition
+    val untilOffset = offsetRange.untilOffset
+
+    (0 until parts).map { part =>
+      // Fine to do integer division. Last partition will consume all the round off errors
+      val thisPartition = remaining / (parts - part)
+      remaining -= thisPartition
+      val endOffset = math.min(startOffset + thisPartition, untilOffset)
+      val offsetRange = KafkaOffsetRange(tp, startOffset, endOffset, None)
+      startOffset = endOffset
+      offsetRange
     }
   }
 
@@ -114,9 +148,12 @@ private[kafka010] class KafkaOffsetRangeCalculator(val minPartitions: Option[Int
 private[kafka010] object KafkaOffsetRangeCalculator {
 
   def apply(options: CaseInsensitiveStringMap): KafkaOffsetRangeCalculator = {
-    val optionalValue = Option(options.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY))
+    val minPartition = Option(options.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY))
       .map(_.toInt)
-    new KafkaOffsetRangeCalculator(optionalValue)
+    val maxRecordsPerPartition =
+      Option(options.get(KafkaSourceProvider.MAX_RECORDS_PER_PARTITION_OPTION_KEY))
+        .map(_.toLong)
+    new KafkaOffsetRangeCalculator(minPartition, maxRecordsPerPartition)
   }
 }
 
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
index bb4f14686f976..0bdd931028aef 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderAdmin.scala
@@ -99,14 +99,18 @@ private[kafka010] class KafkaOffsetReaderAdmin(
    */
   private val minPartitions =
     readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
+  private val maxRecordsPerPartition =
+    readerOptions.get(KafkaSourceProvider.MAX_RECORDS_PER_PARTITION_OPTION_KEY).map(_.toLong)
 
-  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
+  private val rangeCalculator =
+    new KafkaOffsetRangeCalculator(minPartitions, maxRecordsPerPartition)
 
   /**
    * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
    */
-  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
-    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
+  private def shouldDivvyUpLargePartitions(offsetRanges: Seq[KafkaOffsetRange]): Boolean = {
+    minPartitions.map(_ > offsetRanges.size).getOrElse(false) ||
+    offsetRanges.exists(_.size > maxRecordsPerPartition.getOrElse(Long.MaxValue))
   }
 
   override def toString(): String = consumerStrategy.toString
@@ -397,7 +401,7 @@ private[kafka010] class KafkaOffsetReaderAdmin(
       KafkaOffsetRange(tp, fromOffset, untilOffset, None)
     }.toSeq
 
-    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
+    if (shouldDivvyUpLargePartitions(offsetRangesBase)) {
       val fromOffsetsMap =
         offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
       val untilOffsetsMap =
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
index fa53d6373176e..f7530dcba6b85 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderConsumer.scala
@@ -98,8 +98,11 @@ private[kafka010] class KafkaOffsetReaderConsumer(
    */
   private val minPartitions =
     readerOptions.get(KafkaSourceProvider.MIN_PARTITIONS_OPTION_KEY).map(_.toInt)
+  private val maxRecordsPerPartition =
+    readerOptions.get(KafkaSourceProvider.MAX_RECORDS_PER_PARTITION_OPTION_KEY).map(_.toLong)
 
-  private val rangeCalculator = new KafkaOffsetRangeCalculator(minPartitions)
+  private val rangeCalculator =
+    new KafkaOffsetRangeCalculator(minPartitions, maxRecordsPerPartition)
 
   private[kafka010] val offsetFetchAttemptIntervalMs =
     readerOptions.getOrElse(KafkaSourceProvider.FETCH_OFFSET_RETRY_INTERVAL_MS, "1000").toLong
@@ -107,8 +110,9 @@ private[kafka010] class KafkaOffsetReaderConsumer(
   /**
    * Whether we should divide Kafka TopicPartitions with a lot of data into smaller Spark tasks.
    */
-  private def shouldDivvyUpLargePartitions(numTopicPartitions: Int): Boolean = {
-    minPartitions.map(_ > numTopicPartitions).getOrElse(false)
+  private def shouldDivvyUpLargePartitions(offsetRanges: Seq[KafkaOffsetRange]): Boolean = {
+    minPartitions.map(_ > offsetRanges.size).getOrElse(false) ||
+    offsetRanges.exists(_.size > maxRecordsPerPartition.getOrElse(Long.MaxValue))
   }
 
   private def nextGroupId(): String = {
@@ -446,7 +450,7 @@ private[kafka010] class KafkaOffsetReaderConsumer(
       KafkaOffsetRange(tp, fromOffset, untilOffset, None)
     }.toSeq
 
-    if (shouldDivvyUpLargePartitions(offsetRangesBase.size)) {
+    if (shouldDivvyUpLargePartitions(offsetRangesBase)) {
       val fromOffsetsMap =
         offsetRangesBase.map(range => (range.topicPartition, range.fromOffset)).toMap
       val untilOffsetsMap =
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
index 56456f9b1f776..8d0bcc5816775 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaRecordToRowConverter.scala
@@ -50,7 +50,7 @@ private[kafka010] class KafkaRecordToRowConverter {
         new GenericArrayData(cr.headers.iterator().asScala
           .map(header =>
             InternalRow(UTF8String.fromString(header.key()), header.value())
-          ).toArray)
+          ).toArray[Any])
       } else {
         null
       }
diff --git a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
index e1fdbfb183c39..4cb9fa8df8052 100644
--- a/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
+++ b/connector/kafka-0-10-sql/src/main/scala/org/apache/spark/sql/kafka010/KafkaSourceProvider.scala
@@ -271,6 +271,14 @@ private[kafka010] class KafkaSourceProvider extends DataSourceRegister
       if (p <= 0) throw new IllegalArgumentException("minPartitions must be positive")
     }
 
+    if (params.contains(MAX_RECORDS_PER_PARTITION_OPTION_KEY)) {
+      val p = params(MAX_RECORDS_PER_PARTITION_OPTION_KEY).toLong
+      if (p <= 0) {
+        throw new IllegalArgumentException(
+          s"$MAX_RECORDS_PER_PARTITION_OPTION_KEY must be positive")
+      }
+    }
+
     // Validate user-specified Kafka options
 
     if (params.contains(s"kafka.${ConsumerConfig.GROUP_ID_CONFIG}")) {
@@ -557,6 +565,7 @@ private[kafka010] object KafkaSourceProvider extends Logging {
   private[kafka010] val ENDING_TIMESTAMP_OPTION_KEY = "endingtimestamp"
   private val FAIL_ON_DATA_LOSS_OPTION_KEY = "failondataloss"
   private[kafka010] val MIN_PARTITIONS_OPTION_KEY = "minpartitions"
+  private[kafka010] val MAX_RECORDS_PER_PARTITION_OPTION_KEY = "maxrecordsperpartition"
   private[kafka010] val MAX_OFFSET_PER_TRIGGER = "maxoffsetspertrigger"
   private[kafka010] val MIN_OFFSET_PER_TRIGGER = "minoffsetspertrigger"
   private[kafka010] val MAX_TRIGGER_DELAY = "maxtriggerdelay"
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
index 9ae6a9290f80a..1d119de43970f 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaMicroBatchSourceSuite.scala
@@ -1156,7 +1156,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with
 
   test("allow group.id prefix") {
     // Group ID prefix is only supported by consumer based offset reader
-    if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
+    if (sqlConf.getConf(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
       testGroupId("groupIdPrefix", (expected, actual) => {
         assert(actual.exists(_.startsWith(expected)) && !actual.exists(_ === expected),
           "Valid consumer groups don't contain the expected group id - " +
@@ -1167,7 +1167,7 @@ abstract class KafkaMicroBatchSourceSuiteBase extends KafkaSourceSuiteBase with
 
   test("allow group.id override") {
     // Group ID override is only supported by consumer based offset reader
-    if (spark.conf.get(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
+    if (sqlConf.getConf(SQLConf.USE_DEPRECATED_KAFKA_OFFSET_FETCHING)) {
       testGroupId("kafka.group.id", (expected, actual) => {
         assert(actual.exists(_ === expected), "Valid consumer groups don't " +
           s"contain the expected group id - Valid consumer groups: $actual / " +
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
index 89ab0902f4d6f..516aee6ad537d 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetRangeCalculatorSuite.scala
@@ -34,6 +34,30 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
     }
   }
 
+  def testWithMaxRecordsPerPartition(name: String, maxRecordsPerPartition: Long)(
+      f: KafkaOffsetRangeCalculator => Unit): Unit = {
+    val options = new CaseInsensitiveStringMap(
+      Map("maxRecordsPerPartition" -> maxRecordsPerPartition.toString).asJava)
+    test(s"with maxRecordsPerPartition = $maxRecordsPerPartition: $name") {
+      f(KafkaOffsetRangeCalculator(options))
+    }
+  }
+
+  def testWithMinPartitionsAndMaxRecordsPerPartition(
+      name: String,
+      minPartitions: Int,
+      maxRecordsPerPartition: Long)(f: KafkaOffsetRangeCalculator => Unit): Unit = {
+    val options = new CaseInsensitiveStringMap(
+      Map(
+        "minPartitions" -> minPartitions.toString,
+        "maxRecordsPerPartition" -> maxRecordsPerPartition.toString).asJava)
+    test(
+      s"with minPartitions = $minPartitions " +
+        s"and maxRecordsPerPartition = $maxRecordsPerPartition: $name") {
+      f(KafkaOffsetRangeCalculator(options))
+    }
+  }
+
   test("with no minPartition: N TopicPartitions to N offset ranges") {
     val calc = KafkaOffsetRangeCalculator(CaseInsensitiveStringMap.empty())
     assert(
@@ -253,6 +277,59 @@ class KafkaOffsetRangeCalculatorSuite extends SparkFunSuite {
           KafkaOffsetRange(tp3, 7500, 10000, None)))
   }
 
+  testWithMaxRecordsPerPartition("SPARK-49259: 1 TopicPartition to N offset ranges", 4) { calc =>
+    assert(
+      calc.getRanges(Seq(KafkaOffsetRange(tp1, 1, 5))) == Seq(KafkaOffsetRange(tp1, 1, 5, None)))
+
+    assert(
+      calc.getRanges(Seq(KafkaOffsetRange(tp1, 1, 2))) == Seq(KafkaOffsetRange(tp1, 1, 2, None)))
+
+    assert(
+      calc.getRanges(Seq(KafkaOffsetRange(tp1, 1, 6)), executorLocations = Seq("location")) ==
+        Seq(KafkaOffsetRange(tp1, 1, 3, None), KafkaOffsetRange(tp1, 3, 6, None))
+    ) // location pref not set when maxRecordsPerPartition is set
+  }
+
+  testWithMaxRecordsPerPartition("SPARK-49259: N TopicPartition to N offset ranges", 20) { calc =>
+    assert(
+      calc.getRanges(
+        Seq(
+          KafkaOffsetRange(tp1, 1, 40),
+          KafkaOffsetRange(tp2, 1, 50),
+          KafkaOffsetRange(tp3, 1, 60))) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 20, None),
+          KafkaOffsetRange(tp1, 20, 40, None),
+          KafkaOffsetRange(tp2, 1, 17, None),
+          KafkaOffsetRange(tp2, 17, 33, None),
+          KafkaOffsetRange(tp2, 33, 50, None),
+          KafkaOffsetRange(tp3, 1, 20, None),
+          KafkaOffsetRange(tp3, 20, 40, None),
+          KafkaOffsetRange(tp3, 40, 60, None)))
+  }
+
+  testWithMinPartitionsAndMaxRecordsPerPartition(
+    "SPARK-49259: 1 TopicPartition with low minPartitions value",
+    1,
+    20) { calc =>
+    assert(
+      calc.getRanges(Seq(KafkaOffsetRange(tp1, 1, 40))) ==
+        Seq(KafkaOffsetRange(tp1, 1, 20, None), KafkaOffsetRange(tp1, 20, 40, None)))
+  }
+
+  testWithMinPartitionsAndMaxRecordsPerPartition(
+    "SPARK-49259: 1 TopicPartition with high minPartitions value",
+    4,
+    20) { calc =>
+    assert(
+      calc.getRanges(Seq(KafkaOffsetRange(tp1, 1, 40))) ==
+        Seq(
+          KafkaOffsetRange(tp1, 1, 10, None),
+          KafkaOffsetRange(tp1, 10, 20, None),
+          KafkaOffsetRange(tp1, 20, 30, None),
+          KafkaOffsetRange(tp1, 30, 40, None)))
+  }
+
   private val tp1 = new TopicPartition("t1", 1)
   private val tp2 = new TopicPartition("t2", 1)
   private val tp3 = new TopicPartition("t3", 1)
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
index 320485a79e59d..6fc22e7ac5e03 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaOffsetReaderSuite.scala
@@ -153,7 +153,7 @@ class KafkaOffsetReaderSuite extends QueryTest with SharedSparkSession with Kafk
     }
     checkError(
       exception = ex,
-      errorClass = "KAFKA_START_OFFSET_DOES_NOT_MATCH_ASSIGNED",
+      condition = "KAFKA_START_OFFSET_DOES_NOT_MATCH_ASSIGNED",
       parameters = Map(
         "specifiedPartitions" -> "Set\\(.*,.*\\)",
         "assignedPartitions" -> "Set\\(.*,.*,.*\\)"),
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
index 8d4e3e5c1d364..6087447fa3045 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaRelationSuite.scala
@@ -638,7 +638,7 @@ class KafkaRelationSuiteV1 extends KafkaRelationSuiteBase {
     val topic = newTopic()
     val df = createDF(topic)
     assert(df.logicalPlan.collect {
-      case LogicalRelation(_, _, _, _) => true
+      case _: LogicalRelation => true
     }.nonEmpty)
   }
 }
diff --git a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
index 0737658e65256..9e06b6c6ff4a2 100644
--- a/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
+++ b/connector/kafka-0-10-sql/src/test/scala/org/apache/spark/sql/kafka010/KafkaTestUtils.scala
@@ -28,6 +28,7 @@ import scala.io.Source
 import scala.jdk.CollectionConverters._
 
 import com.google.common.io.Files
+import kafka.log.LogManager
 import kafka.server.{HostedPartition, KafkaConfig, KafkaServer}
 import kafka.server.checkpoints.OffsetCheckpointFile
 import kafka.zk.KafkaZkClient
@@ -42,11 +43,12 @@ import org.apache.kafka.common.network.ListenerName
 import org.apache.kafka.common.requests.FetchRequest
 import org.apache.kafka.common.security.auth.SecurityProtocol.{PLAINTEXT, SASL_PLAINTEXT}
 import org.apache.kafka.common.serialization.StringSerializer
-import org.apache.kafka.common.utils.SystemTime
+import org.apache.kafka.common.utils.Time
 import org.apache.zookeeper.client.ZKClientConfig
 import org.apache.zookeeper.server.{NIOServerCnxnFactory, ZooKeeperServer}
 import org.apache.zookeeper.server.auth.SASLAuthenticationProvider
 import org.scalatest.Assertions._
+import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.Eventually._
 import org.scalatest.time.SpanSugar._
 
@@ -65,7 +67,7 @@ import org.apache.spark.util.ArrayImplicits._
  */
 class KafkaTestUtils(
     withBrokerProps: Map[String, Object] = Map.empty,
-    secure: Boolean = false) extends Logging {
+    secure: Boolean = false) extends PrivateMethodTester with Logging {
 
   private val JAVA_AUTH_CONFIG = "java.security.auth.login.config"
 
@@ -174,7 +176,7 @@ class KafkaTestUtils(
     }
 
     kdc.getKrb5conf.delete()
-    Files.write(krb5confStr, kdc.getKrb5conf, StandardCharsets.UTF_8)
+    Files.asCharSink(kdc.getKrb5conf, StandardCharsets.UTF_8).write(krb5confStr)
     logDebug(s"krb5.conf file content: $krb5confStr")
   }
 
@@ -238,7 +240,7 @@ class KafkaTestUtils(
       |  principal="$kafkaServerUser@$realm";
       |};
       """.stripMargin.trim
-    Files.write(content, file, StandardCharsets.UTF_8)
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(content)
     logDebug(s"Created JAAS file: ${file.getPath}")
     logDebug(s"JAAS file content: $content")
     file.getAbsolutePath()
@@ -251,7 +253,7 @@ class KafkaTestUtils(
     // Get the actual zookeeper binding port
     zkPort = zookeeper.actualPort
     zkClient = KafkaZkClient(s"$zkHost:$zkPort", isSecure = false, zkSessionTimeout,
-      zkConnectionTimeout, 1, new SystemTime(), "test", new ZKClientConfig)
+      zkConnectionTimeout, 1, Time.SYSTEM, "test", new ZKClientConfig)
     zkReady = true
   }
 
@@ -447,8 +449,9 @@ class KafkaTestUtils(
     sendMessages(msgs.toImmutableArraySeq)
   }
 
+  private val cleanupLogsPrivateMethod = PrivateMethod[LogManager](Symbol("cleanupLogs"))
   def cleanupLogs(): Unit = {
-    server.logManager.cleanupLogs()
+    server.logManager.invokePrivate(cleanupLogsPrivateMethod())
   }
 
   private def getOffsets(topics: Set[String], offsetSpec: OffsetSpec): Map[TopicPartition, Long] = {
diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml
index 2b2707b9da320..3cbfc34e7d806 100644
--- a/connector/kafka-0-10-token-provider/pom.xml
+++ b/connector/kafka-0-10-token-provider/pom.xml
@@ -64,6 +64,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client-runtime</artifactId>
diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml
index 1b26839a371ce..a42410e6ce885 100644
--- a/connector/kafka-0-10/pom.xml
+++ b/connector/kafka-0-10/pom.xml
@@ -119,6 +119,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
index a97ee71ef4fe0..212693f6e02cc 100644
--- a/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
+++ b/connector/kafka-0-10/src/test/scala/org/apache/spark/streaming/kafka010/KafkaRDDSuite.scala
@@ -28,8 +28,9 @@ import kafka.log.{LogCleaner, UnifiedLog}
 import kafka.server.BrokerTopicStats
 import kafka.utils.Pool
 import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.compress.Compression
 import org.apache.kafka.common.config.TopicConfig
-import org.apache.kafka.common.record.{CompressionType, MemoryRecords, SimpleRecord}
+import org.apache.kafka.common.record.{MemoryRecords, SimpleRecord}
 import org.apache.kafka.common.serialization.StringDeserializer
 import org.apache.kafka.storage.internals.log.{CleanerConfig, LogConfig, LogDirFailureChannel, ProducerStateManagerConfig}
 import org.scalatest.concurrent.Eventually.{eventually, interval, timeout}
@@ -117,7 +118,7 @@ class KafkaRDDSuite extends SparkFunSuite {
     )
     messages.foreach { case (k, v) =>
       val record = new SimpleRecord(k.getBytes, v.getBytes)
-      log.appendAsLeader(MemoryRecords.withRecords(CompressionType.NONE, record), 0);
+      log.appendAsLeader(MemoryRecords.withRecords(Compression.NONE, record), 0);
     }
     log.roll()
     logs.put(topicPartition, log)
diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml
index 9a7f40443bbc9..7eba26ffdff74 100644
--- a/connector/kinesis-asl/pom.xml
+++ b/connector/kinesis-asl/pom.xml
@@ -81,6 +81,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml
index 8c52576c3531f..e85481ef9e1c8 100644
--- a/connector/protobuf/pom.xml
+++ b/connector/protobuf/pom.xml
@@ -146,7 +146,7 @@
                   <inputDirectories>
                     <include>src/test/resources/protobuf</include>
                   </inputDirectories>
-                  <includeStdTypes>true</includeStdTypes>
+                  <includeMavenTypes>direct</includeMavenTypes>
                   <outputTargets>
                     <outputTarget>
                       <type>java</type>
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
deleted file mode 100644
index 91e87dee50482..0000000000000
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.spark.sql.protobuf
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.protobuf.utils.ProtobufUtils
-
-// scalastyle:off: object.name
-object functions {
-// scalastyle:on: object.name
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value. The
-   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf message name to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using `protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(
-      data: Column,
-      messageName: String,
-      descFilePath: String,
-      options: java.util.Map[String, String]): Column = {
-    val descriptorFileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
-    from_protobuf(data, messageName, descriptorFileContent, options)
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value.The
-   * Protobuf definition is provided through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` options.
-   *   @param options
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(
-    data: Column,
-    messageName: String,
-    binaryFileDescriptorSet: Array[Byte],
-    options: java.util.Map[String, String]): Column = {
-    new Column(
-      ProtobufDataToCatalyst(
-        data.expr, messageName, Some(binaryFileDescriptorSet), options.asScala.toMap
-      )
-    )
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value. The
-   * Protobuf definition is provided through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using `protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
-    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
-    new Column(ProtobufDataToCatalyst(data.expr, messageName, Some(fileContent)))
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value.The
-   * Protobuf definition is provided through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageName: String, binaryFileDescriptorSet: Array[Byte])
-  : Column = {
-    new Column(ProtobufDataToCatalyst(data.expr, messageName, Some(binaryFileDescriptorSet)))
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageClassName: String): Column = {
-    new Column(ProtobufDataToCatalyst(data.expr, messageClassName))
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def from_protobuf(
-    data: Column,
-    messageClassName: String,
-    options: java.util.Map[String, String]): Column = {
-    new Column(ProtobufDataToCatalyst(data.expr, messageClassName, None, options.asScala.toMap))
-  }
-
-  /**
-   * Converts a column into binary of protobuf format. The Protobuf definition is provided
-   * through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the data column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using `protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
-    to_protobuf(data, messageName, descFilePath, Map.empty[String, String].asJava)
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.The Protobuf definition is provided
-   * through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` options.
-   *
-   * @since 3.5.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageName: String, binaryFileDescriptorSet: Array[Byte])
-  : Column = {
-    new Column(CatalystDataToProtobuf(data.expr, messageName, Some(binaryFileDescriptorSet)))
-  }
-  /**
-   * Converts a column into binary of protobuf format. The Protobuf definition is provided
-   * through Protobuf <i>descriptor file</i>.
-   *
-   * @param data
-   *   the data column.
-   * @param messageName
-   *   the protobuf MessageName to look for in descriptor file.
-   * @param descFilePath
-   *   the protobuf descriptor file.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(
-    data: Column,
-    messageName: String,
-    descFilePath: String,
-    options: java.util.Map[String, String]): Column = {
-    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
-    new Column(
-      CatalystDataToProtobuf(data.expr, messageName, Some(fileContent), options.asScala.toMap)
-    )
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.The Protobuf definition is provided
-   * through Protobuf `FileDescriptorSet`.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageName
-   *   the protobuf MessageName to look for in the descriptor set.
-   * @param binaryFileDescriptorSet
-   *   Serialized Protobuf descriptor (`FileDescriptorSet`). Typically contents of file created
-   *   using `protoc` with `--descriptor_set_out` and `--include_imports` options.
-   * @param options
-   * @since 3.5.0
-   */
-  @Experimental
-  def to_protobuf(
-    data: Column,
-    messageName: String,
-    binaryFileDescriptorSet: Array[Byte],
-    options: java.util.Map[String, String]
-  ): Column = {
-    new Column(
-      CatalystDataToProtobuf(
-        data.expr, messageName, Some(binaryFileDescriptorSet), options.asScala.toMap
-      )
-    )
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the data column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageClassName: String): Column = {
-    new Column(CatalystDataToProtobuf(data.expr, messageClassName))
-  }
-
-  /**
-   * Converts a column into binary of protobuf format.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the data column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @param options
-   * @since 3.4.0
-   */
-  @Experimental
-  def to_protobuf(data: Column, messageClassName: String, options: java.util.Map[String, String])
-  : Column = {
-    new Column(CatalystDataToProtobuf(data.expr, messageClassName, None, options.asScala.toMap))
-  }
-}
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala
index 82cdc6b9c5816..046b8acdb5e5c 100644
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/package.scala
@@ -17,5 +17,4 @@
 package org.apache.spark.sql
 
 package object protobuf {
-  protected[protobuf] object ScalaReflectionLock
 }
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
index 6644bce98293b..e85097a272f24 100644
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufOptions.scala
@@ -43,8 +43,8 @@ private[sql] class ProtobufOptions(
 
   /**
    * Adds support for recursive fields. If this option is is not specified, recursive fields are
-   * not permitted. Setting it to 0 drops the recursive fields, 1 allows it to be recursed once,
-   * and 2 allows it to be recursed twice and so on, up to 10. Values larger than 10 are not
+   * not permitted. Setting it to 1 drops the recursive fields, 0 allows it to be recursed once,
+   * and 3 allows it to be recursed twice and so on, up to 10. Values larger than 10 are not
    * allowed in order avoid inadvertently creating very large schemas. If a Protobuf message
    * has depth beyond this limit, the Spark struct returned is truncated after the recursion limit.
    *
@@ -52,8 +52,8 @@ private[sql] class ProtobufOptions(
    *   `message Person { string name = 1; Person friend = 2; }`
    * The following lists the schema with different values for this setting.
    *  1:  `struct<name: string>`
-   *  2:  `struct<name string, friend: struct<name: string>>`
-   *  3:  `struct<name string, friend: struct<name string, friend: struct<name: string>>>`
+   *  2:  `struct<name: string, friend: struct<name: string>>`
+   *  3:  `struct<name: string, friend: struct<name: string, friend: struct<name: string>>>`
    * and so on.
    */
   val recursiveFieldMaxDepth: Int = parameters.getOrElse("recursive.fields.max.depth", "-1").toInt
@@ -181,7 +181,7 @@ private[sql] class ProtobufOptions(
   val upcastUnsignedInts: Boolean =
     parameters.getOrElse("upcast.unsigned.ints", false.toString).toBoolean
 
-  // Whether to unwrap the struct representation for well known primitve wrapper types when
+  // Whether to unwrap the struct representation for well known primitive wrapper types when
   // deserializing. By default, the wrapper types for primitives (i.e. google.protobuf.Int32Value,
   // google.protobuf.Int64Value, etc.) will get deserialized as structs. We allow the option to
   // deserialize them as their respective primitives.
@@ -221,7 +221,7 @@ private[sql] class ProtobufOptions(
   // By default, in the spark schema field a will be dropped, which result in schema
   // b struct<name: string>
   // If retain.empty.message.types=true, field a will be retained by inserting a dummy column.
-  // b struct<a struct<__dummy_field_in_empty_struct: string>, name: string>
+  // b struct<a: struct<__dummy_field_in_empty_struct: string>, name: string>
   val retainEmptyMessage: Boolean =
     parameters.getOrElse("retain.empty.message.types", false.toString).toBoolean
 }
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala
index fee1bcdc9670f..3d7bba7a82e88 100644
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/ProtobufUtils.scala
@@ -17,19 +17,14 @@
 
 package org.apache.spark.sql.protobuf.utils
 
-import java.io.File
-import java.io.FileNotFoundException
-import java.nio.file.NoSuchFileException
 import java.util.Locale
 
 import scala.jdk.CollectionConverters._
-import scala.util.control.NonFatal
 
 import com.google.protobuf.{DescriptorProtos, Descriptors, InvalidProtocolBufferException, Message}
 import com.google.protobuf.DescriptorProtos.{FileDescriptorProto, FileDescriptorSet}
 import com.google.protobuf.Descriptors.{Descriptor, FieldDescriptor}
 import com.google.protobuf.TypeRegistry
-import org.apache.commons.io.FileUtils
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -228,18 +223,6 @@ private[sql] object ProtobufUtils extends Logging {
     }
   }
 
-  def readDescriptorFileContent(filePath: String): Array[Byte] = {
-    try {
-      FileUtils.readFileToByteArray(new File(filePath))
-    } catch {
-      case ex: FileNotFoundException =>
-        throw QueryCompilationErrors.cannotFindDescriptorFileError(filePath, ex)
-      case ex: NoSuchFileException =>
-        throw QueryCompilationErrors.cannotFindDescriptorFileError(filePath, ex)
-      case NonFatal(ex) => throw QueryCompilationErrors.descriptorParseError(ex)
-    }
-  }
-
   private def parseFileDescriptorSet(bytes: Array[Byte]): List[Descriptors.FileDescriptor] = {
     var fileDescriptorSet: DescriptorProtos.FileDescriptorSet = null
     try {
diff --git a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala
index feb5aed03451a..56c1f81850614 100644
--- a/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala
+++ b/connector/protobuf/src/main/scala/org/apache/spark/sql/protobuf/utils/SchemaConverters.scala
@@ -176,16 +176,16 @@ object SchemaConverters extends Logging {
         }
       case MESSAGE =>
         // If the `recursive.fields.max.depth` value is not specified, it will default to -1,
-        // and recursive fields are not permitted. Setting it to 0 drops all recursive fields,
-        // 1 allows it to be recursed once, and 2 allows it to be recursed twice and so on.
-        // A value greater than 10 is not allowed, and if a protobuf record has more depth for
-        // recursive fields than the allowed value, it will be truncated and some fields may be
-        // discarded.
+        // and recursive fields are not permitted. Setting it to 1 drops all recursive fields,
+        // 2 allows it to be recursed once, and 3 allows it to be recursed twice and so on.
+        // A value less than or equal to 0 or greater than 10 is not allowed, and if a protobuf
+        // record has more depth for recursive fields than the allowed value, it will be truncated
+        // and some fields may be discarded.
         // SQL Schema for protob2uf `message Person { string name = 1; Person bff = 2;}`
         // will vary based on the value of "recursive.fields.max.depth".
         // 1: struct<name: string>
-        // 2: struct<name string, bff: struct<name: string>>
-        // 3: struct<name string, bff: struct<name string, bff: struct<name: string>>>
+        // 2: struct<name: string, bff: struct<name: string>>
+        // 3: struct<name: string, bff: struct<name: string, bff: struct<name: string>>>
         // and so on.
         // TODO(rangadi): A better way to terminate would be replace the remaining recursive struct
         //      with the byte array of corresponding protobuf. This way no information is lost.
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
index ad6a88640140a..abae1d622d3cf 100644
--- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufCatalystDataConversionSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.protobuf.utils.{ProtobufUtils, SchemaConverters}
 import org.apache.spark.sql.sources.{EqualTo, Not}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.{ProtobufUtils => CommonProtobufUtils}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
 
@@ -39,7 +40,7 @@ class ProtobufCatalystDataConversionSuite
     with ProtobufTestBase {
 
   private val testFileDescFile = protobufDescriptorFile("catalyst_types.desc")
-  private val testFileDesc = ProtobufUtils.readDescriptorFileContent(testFileDescFile)
+  private val testFileDesc = CommonProtobufUtils.readDescriptorFileContent(testFileDescFile)
   private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.CatalystTypes$"
 
   private def checkResultWithEval(
@@ -47,7 +48,7 @@ class ProtobufCatalystDataConversionSuite
       descFilePath: String,
       messageName: String,
       expected: Any): Unit = {
-    val descBytes = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    val descBytes = CommonProtobufUtils.readDescriptorFileContent(descFilePath)
     withClue("(Eval check with Java class name)") {
       val className = s"$javaClassNamePrefix$messageName"
       checkEvaluation(
@@ -72,7 +73,7 @@ class ProtobufCatalystDataConversionSuite
       actualSchema: String,
       badSchema: String): Unit = {
 
-    val descBytes = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    val descBytes = CommonProtobufUtils.readDescriptorFileContent(descFilePath)
     val binary = CatalystDataToProtobuf(data, actualSchema, Some(descBytes))
 
     intercept[Exception] {
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
index 5233e06883498..44a8339ac1f02 100644
--- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufFunctionsSuite.scala
@@ -33,6 +33,7 @@ import org.apache.spark.sql.protobuf.utils.ProtobufOptions
 import org.apache.spark.sql.protobuf.utils.ProtobufUtils
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.util.{ProtobufUtils => CommonProtobufUtils}
 
 class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with ProtobufTestBase
   with Serializable {
@@ -40,11 +41,11 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
   import testImplicits._
 
   val testFileDescFile = protobufDescriptorFile("functions_suite.desc")
-  private val testFileDesc = ProtobufUtils.readDescriptorFileContent(testFileDescFile)
+  private val testFileDesc = CommonProtobufUtils.readDescriptorFileContent(testFileDescFile)
   private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.SimpleMessageProtos$"
 
   val proto2FileDescFile = protobufDescriptorFile("proto2_messages.desc")
-  val proto2FileDesc = ProtobufUtils.readDescriptorFileContent(proto2FileDescFile)
+  val proto2FileDesc = CommonProtobufUtils.readDescriptorFileContent(proto2FileDescFile)
   private val proto2JavaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.Proto2Messages$"
 
   private def emptyBinaryDF = Seq(Array[Byte]()).toDF("binary")
@@ -467,7 +468,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
 
   test("Handle extra fields : oldProducer -> newConsumer") {
     val catalystTypesFile = protobufDescriptorFile("catalyst_types.desc")
-    val descBytes = ProtobufUtils.readDescriptorFileContent(catalystTypesFile)
+    val descBytes = CommonProtobufUtils.readDescriptorFileContent(catalystTypesFile)
 
     val oldProducer = ProtobufUtils.buildDescriptor(descBytes, "oldProducer")
     val newConsumer = ProtobufUtils.buildDescriptor(descBytes, "newConsumer")
@@ -509,7 +510,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
 
   test("Handle extra fields : newProducer -> oldConsumer") {
     val catalystTypesFile = protobufDescriptorFile("catalyst_types.desc")
-    val descBytes = ProtobufUtils.readDescriptorFileContent(catalystTypesFile)
+    val descBytes = CommonProtobufUtils.readDescriptorFileContent(catalystTypesFile)
 
     val newProducer = ProtobufUtils.buildDescriptor(descBytes, "newProducer")
     val oldConsumer = ProtobufUtils.buildDescriptor(descBytes, "oldConsumer")
@@ -708,7 +709,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
     }
     checkError(
       exception = e,
-      errorClass = "PROTOBUF_DEPENDENCY_NOT_FOUND",
+      condition = "PROTOBUF_DEPENDENCY_NOT_FOUND",
       parameters = Map("dependencyName" -> "nestedenum.proto"))
   }
 
@@ -1057,7 +1058,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
     }
     checkError(
       ex,
-      errorClass = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
+      condition = "PROTOBUF_DESCRIPTOR_FILE_NOT_FOUND",
       parameters = Map("filePath" -> "/non/existent/path.desc")
     )
     assert(ex.getCause != null)
@@ -1699,7 +1700,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
       }
       checkError(
         exception = parseError,
-        errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
+        condition = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
         parameters = Map(
           "sqlColumn" -> "`basic_enum`",
           "protobufColumn" -> "field 'basic_enum'",
@@ -1711,7 +1712,7 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
       }
       checkError(
         exception = parseError,
-        errorClass = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
+        condition = "CANNOT_CONVERT_SQL_VALUE_TO_PROTOBUF_ENUM_TYPE",
         parameters = Map(
           "sqlColumn" -> "`basic_enum`",
           "protobufColumn" -> "field 'basic_enum'",
@@ -2042,6 +2043,168 @@ class ProtobufFunctionsSuite extends QueryTest with SharedSparkSession with Prot
     }
   }
 
+  test("SPARK-49121: from_protobuf and to_protobuf SQL functions") {
+    withTable("protobuf_test_table") {
+      sql(
+        """
+          |CREATE TABLE protobuf_test_table AS
+          |  SELECT named_struct(
+          |    'id', 1L,
+          |    'string_value', 'test_string',
+          |    'int32_value', 32,
+          |    'int64_value', 64L,
+          |    'double_value', CAST(123.456 AS DOUBLE),
+          |    'float_value', CAST(789.01 AS FLOAT),
+          |    'bool_value', true,
+          |    'bytes_value', CAST('sample_bytes' AS BINARY)
+          |  ) AS complex_struct
+          |""".stripMargin)
+
+      val toProtobufSql =
+        s"""
+           |SELECT
+           |  to_protobuf(
+           |    complex_struct, 'SimpleMessageJavaTypes', '$testFileDescFile', map()
+           |  ) AS protobuf_data
+           |FROM protobuf_test_table
+           |""".stripMargin
+
+      val protobufResult = spark.sql(toProtobufSql).collect()
+      assert(protobufResult != null)
+
+      val fromProtobufSql =
+        s"""
+           |SELECT
+           |  from_protobuf(protobuf_data, 'SimpleMessageJavaTypes', '$testFileDescFile', map())
+           |FROM
+           |  ($toProtobufSql)
+           |""".stripMargin
+
+      checkAnswer(
+        spark.sql(fromProtobufSql),
+        Seq(Row(Row(1L, "test_string", 32, 64L, 123.456, 789.01F, true, "sample_bytes".getBytes)))
+      )
+
+      // Negative tests for to_protobuf.
+      var fragment = s"to_protobuf(complex_struct, 42, '$testFileDescFile', map())"
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT
+             |  to_protobuf(complex_struct, 42, '$testFileDescFile', map())
+             |FROM protobuf_test_table
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" -> s"""\"to_protobuf(complex_struct, 42, $testFileDescFile, map())\"""",
+          "msg" -> ("The second argument of the TO_PROTOBUF SQL function must be a constant " +
+            "string representing the Protobuf message name"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = fragment,
+          start = 10,
+          stop = fragment.length + 9))
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT
+             |  to_protobuf(complex_struct, 'SimpleMessageJavaTypes', 42, map())
+             |FROM protobuf_test_table
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" -> "\"to_protobuf(complex_struct, SimpleMessageJavaTypes, 42, map())\"",
+          "msg" -> ("The third argument of the TO_PROTOBUF SQL function must be a constant " +
+            "string or binary data representing the Protobuf descriptor file path"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = "to_protobuf(complex_struct, 'SimpleMessageJavaTypes', 42, map())",
+          start = 10,
+          stop = 73))
+      )
+      fragment = s"to_protobuf(complex_struct, 'SimpleMessageJavaTypes', '$testFileDescFile', 42)"
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT
+             |  to_protobuf(complex_struct, 'SimpleMessageJavaTypes', '$testFileDescFile', 42)
+             |FROM protobuf_test_table
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" ->
+            s"""\"to_protobuf(complex_struct, SimpleMessageJavaTypes, $testFileDescFile, 42)\"""",
+          "msg" -> ("The fourth argument of the TO_PROTOBUF SQL function must be a constant " +
+            "map of strings to strings containing the options to use for converting the value " +
+            "to Protobuf format"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = fragment,
+          start = 10,
+          stop = fragment.length + 9))
+      )
+
+      // Negative tests for from_protobuf.
+      fragment = s"from_protobuf(protobuf_data, 42, '$testFileDescFile', map())"
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT from_protobuf(protobuf_data, 42, '$testFileDescFile', map())
+             |FROM ($toProtobufSql)
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" -> s"""\"from_protobuf(protobuf_data, 42, $testFileDescFile, map())\"""",
+          "msg" -> ("The second argument of the FROM_PROTOBUF SQL function must be a constant " +
+            "string representing the Protobuf message name"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = fragment,
+          start = 8,
+          stop = fragment.length + 7))
+      )
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT from_protobuf(protobuf_data, 'SimpleMessageJavaTypes', 42, map())
+             |FROM ($toProtobufSql)
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" -> "\"from_protobuf(protobuf_data, SimpleMessageJavaTypes, 42, map())\"",
+          "msg" -> ("The third argument of the FROM_PROTOBUF SQL function must be a constant " +
+            "string or binary data representing the Protobuf descriptor file path"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = "from_protobuf(protobuf_data, 'SimpleMessageJavaTypes', 42, map())",
+          start = 8,
+          stop = 72))
+      )
+      fragment = s"from_protobuf(protobuf_data, 'SimpleMessageJavaTypes', '$testFileDescFile', 42)"
+      checkError(
+        exception = intercept[AnalysisException](sql(
+          s"""
+             |SELECT
+             |  from_protobuf(protobuf_data, 'SimpleMessageJavaTypes', '$testFileDescFile', 42)
+             |FROM ($toProtobufSql)
+             |""".stripMargin)),
+        condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+        parameters = Map(
+          "sqlExpr" ->
+            s"""\"from_protobuf(protobuf_data, SimpleMessageJavaTypes, $testFileDescFile, 42)\"""",
+          "msg" -> ("The fourth argument of the FROM_PROTOBUF SQL function must be a constant " +
+            "map of strings to strings containing the options to use for converting the value " +
+            "from Protobuf format"),
+          "hint" -> ""),
+        queryContext = Array(ExpectedContext(
+          fragment = fragment,
+          start = 10,
+          stop = fragment.length + 9))
+      )
+    }
+  }
+
   def testFromProtobufWithOptions(
     df: DataFrame,
     expectedDf: DataFrame,
diff --git a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala
index 03285c73f1ff1..f3bd49e1b24af 100644
--- a/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala
+++ b/connector/protobuf/src/test/scala/org/apache/spark/sql/protobuf/ProtobufSerdeSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.Cast.toSQLType
 import org.apache.spark.sql.protobuf.utils.ProtobufUtils
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StructType}
+import org.apache.spark.sql.util.{ProtobufUtils => CommonProtobufUtils}
 
 /**
  * Tests for [[ProtobufSerializer]] and [[ProtobufDeserializer]] with a more specific focus on
@@ -37,12 +38,12 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
   import ProtoSerdeSuite.MatchType._
 
   private val testFileDescFile = protobufDescriptorFile("serde_suite.desc")
-  private val testFileDesc = ProtobufUtils.readDescriptorFileContent(testFileDescFile)
+  private val testFileDesc = CommonProtobufUtils.readDescriptorFileContent(testFileDescFile)
 
   private val javaClassNamePrefix = "org.apache.spark.sql.protobuf.protos.SerdeSuiteProtos$"
 
   private val proto2DescFile = protobufDescriptorFile("proto2_messages.desc")
-  private val proto2Desc = ProtobufUtils.readDescriptorFileContent(proto2DescFile)
+  private val proto2Desc = CommonProtobufUtils.readDescriptorFileContent(proto2DescFile)
 
   test("Test basic conversion") {
     withFieldMatchType { fieldMatch =>
@@ -95,7 +96,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
         protoFile,
         Deserializer,
         fieldMatch,
-        errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+        condition = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
         params = Map(
           "protobufType" -> "MissMatchTypeInRoot",
           "toType" -> toSQLType(CATALYST_STRUCT)))
@@ -104,7 +105,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
         protoFile,
         Serializer,
         fieldMatch,
-        errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+        condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
         params = Map(
           "protobufType" -> "MissMatchTypeInRoot",
           "toType" -> toSQLType(CATALYST_STRUCT)))
@@ -122,7 +123,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
       protoFile,
       Serializer,
       BY_NAME,
-      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
       params = Map(
         "protobufType" -> "FieldMissingInProto",
         "toType" -> toSQLType(CATALYST_STRUCT)))
@@ -132,7 +133,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
       Serializer,
       BY_NAME,
       nonnullCatalyst,
-      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
       params = Map(
         "protobufType" -> "FieldMissingInProto",
         "toType" -> toSQLType(nonnullCatalyst)))
@@ -150,7 +151,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
         Deserializer,
         fieldMatch,
         catalyst,
-        errorClass = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
+        condition = "CANNOT_CONVERT_PROTOBUF_MESSAGE_TYPE_TO_SQL_TYPE",
         params = Map(
           "protobufType" -> "MissMatchTypeInDeepNested",
           "toType" -> toSQLType(catalyst)))
@@ -160,7 +161,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
         Serializer,
         fieldMatch,
         catalyst,
-        errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+        condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
         params = Map(
           "protobufType" -> "MissMatchTypeInDeepNested",
           "toType" -> toSQLType(catalyst)))
@@ -177,7 +178,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
       Serializer,
       BY_NAME,
       catalystSchema = foobarSQLType,
-      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
       params = Map(
         "protobufType" -> "FoobarWithRequiredFieldBar",
         "toType" -> toSQLType(foobarSQLType)))
@@ -199,7 +200,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
       Serializer,
       BY_NAME,
       catalystSchema = nestedFoobarSQLType,
-      errorClass = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
+      condition = "UNABLE_TO_CONVERT_TO_PROTOBUF_MESSAGE_TYPE",
       params = Map(
         "protobufType" -> "NestedFoobarWithRequiredFieldBar",
         "toType" -> toSQLType(nestedFoobarSQLType)))
@@ -215,17 +216,17 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
 
     val e1 = intercept[AnalysisException] {
       ProtobufUtils.buildDescriptor(
-        ProtobufUtils.readDescriptorFileContent(fileDescFile),
+        CommonProtobufUtils.readDescriptorFileContent(fileDescFile),
         "SerdeBasicMessage"
       )
     }
 
     checkError(
       exception = e1,
-      errorClass = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR")
+      condition = "CANNOT_PARSE_PROTOBUF_DESCRIPTOR")
 
     val basicMessageDescWithoutImports = descriptorSetWithoutImports(
-      ProtobufUtils.readDescriptorFileContent(
+      CommonProtobufUtils.readDescriptorFileContent(
         protobufDescriptorFile("basicmessage.desc")
       ),
       "BasicMessage"
@@ -240,7 +241,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
 
     checkError(
       exception = e2,
-      errorClass = "PROTOBUF_DEPENDENCY_NOT_FOUND",
+      condition = "PROTOBUF_DEPENDENCY_NOT_FOUND",
       parameters = Map("dependencyName" -> "nestedenum.proto"))
   }
 
@@ -254,7 +255,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
       serdeFactory: SerdeFactory[_],
       fieldMatchType: MatchType,
       catalystSchema: StructType = CATALYST_STRUCT,
-      errorClass: String,
+      condition: String,
       params: Map[String, String]): Unit = {
 
     val e = intercept[AnalysisException] {
@@ -274,7 +275,7 @@ class ProtobufSerdeSuite extends SharedSparkSession with ProtobufTestBase {
     assert(e.getMessage === expectMsg)
     checkError(
       exception = e,
-      errorClass = errorClass,
+      condition = condition,
       parameters = params)
   }
 
diff --git a/core/benchmarks/ChecksumBenchmark-jdk21-results.txt b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..85370450f355c
--- /dev/null
+++ b/core/benchmarks/ChecksumBenchmark-jdk21-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+Benchmark Checksum Algorithms
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+CRC32                                              2743           2746           3          0.0     2678409.9       1.0X
+CRC32C                                             1974           2055          70          0.0     1928129.2       1.4X
+Adler32                                           12689          12709          17          0.0    12391425.9       0.2X
+hadoop PureJavaCrc32C                             23027          23041          13          0.0    22487098.9       0.1X
+
+
diff --git a/core/benchmarks/ChecksumBenchmark-results.txt b/core/benchmarks/ChecksumBenchmark-results.txt
new file mode 100644
index 0000000000000..cce5a61abf637
--- /dev/null
+++ b/core/benchmarks/ChecksumBenchmark-results.txt
@@ -0,0 +1,14 @@
+================================================================================================
+Benchmark Checksum Algorithms
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+Checksum Algorithms:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+CRC32                                              2757           2758           1          0.0     2692250.2       1.0X
+CRC32C                                             2142           2244         116          0.0     2091901.8       1.3X
+Adler32                                           12699          12712          15          0.0    12401205.6       0.2X
+hadoop PureJavaCrc32C                             23049          23066          15          0.0    22508320.3       0.1X
+
+
diff --git a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
index 2c1c69ac42db7..1daac7b710bbf 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               269            278          10          0.4        2693.1       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5               135            140           5          0.7        1345.0       2.0X
-Coalesce Num Partitions: 100 Num Hosts: 10              109            115           6          0.9        1091.6       2.5X
-Coalesce Num Partitions: 100 Num Hosts: 20              100            101           1          1.0         999.4       2.7X
-Coalesce Num Partitions: 100 Num Hosts: 40               96             98           3          1.0         961.5       2.8X
-Coalesce Num Partitions: 100 Num Hosts: 80               93            100           9          1.1         933.4       2.9X
-Coalesce Num Partitions: 500 Num Hosts: 1               875            902          28          0.1        8754.7       0.3X
-Coalesce Num Partitions: 500 Num Hosts: 5               262            265           3          0.4        2619.9       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10              179            182           4          0.6        1792.2       1.5X
-Coalesce Num Partitions: 500 Num Hosts: 20              136            139           6          0.7        1357.2       2.0X
-Coalesce Num Partitions: 500 Num Hosts: 40              115            116           1          0.9        1145.7       2.4X
-Coalesce Num Partitions: 500 Num Hosts: 80              105            110           7          1.0        1047.2       2.6X
-Coalesce Num Partitions: 1000 Num Hosts: 1             1655           1656           2          0.1       16546.0       0.2X
-Coalesce Num Partitions: 1000 Num Hosts: 5              425            428           4          0.2        4251.4       0.6X
-Coalesce Num Partitions: 1000 Num Hosts: 10             263            267           3          0.4        2634.4       1.0X
-Coalesce Num Partitions: 1000 Num Hosts: 20             182            188           9          0.5        1822.5       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 40             142            143           1          0.7        1424.5       1.9X
-Coalesce Num Partitions: 1000 Num Hosts: 80             123            131           8          0.8        1226.2       2.2X
-Coalesce Num Partitions: 5000 Num Hosts: 1             7484           7491          10          0.0       74836.3       0.0X
-Coalesce Num Partitions: 5000 Num Hosts: 5             1873           1880          11          0.1       18725.7       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 10             995           1005          11          0.1        9950.1       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             557            561           6          0.2        5570.9       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             337            341           4          0.3        3369.7       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 80             222            223           1          0.5        2222.0       1.2X
-Coalesce Num Partitions: 10000 Num Hosts: 1           14102          14133          45          0.0      141020.7       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            3681           3702          31          0.0       36811.9       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10           1871           1884          22          0.1       18706.8       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 20            998           1004           5          0.1        9980.9       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            570            575           5          0.2        5696.7       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            345            346           2          0.3        3447.7       0.8X
+Coalesce Num Partitions: 100 Num Hosts: 1               359            371          12          0.3        3586.9       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               181            190          14          0.6        1812.1       2.0X
+Coalesce Num Partitions: 100 Num Hosts: 10              178            185           6          0.6        1779.9       2.0X
+Coalesce Num Partitions: 100 Num Hosts: 20              153            156           4          0.7        1531.2       2.3X
+Coalesce Num Partitions: 100 Num Hosts: 40              148            149           1          0.7        1479.1       2.4X
+Coalesce Num Partitions: 100 Num Hosts: 80              166            170           5          0.6        1657.8       2.2X
+Coalesce Num Partitions: 500 Num Hosts: 1              1054           1064          14          0.1       10543.7       0.3X
+Coalesce Num Partitions: 500 Num Hosts: 5               331            339          13          0.3        3311.1       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 10              230            235           8          0.4        2295.7       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 20              218            220           1          0.5        2182.0       1.6X
+Coalesce Num Partitions: 500 Num Hosts: 40              161            164           2          0.6        1614.8       2.2X
+Coalesce Num Partitions: 500 Num Hosts: 80              137            142           7          0.7        1371.6       2.6X
+Coalesce Num Partitions: 1000 Num Hosts: 1             1926           1929           3          0.1       19264.6       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              501            507          10          0.2        5011.1       0.7X
+Coalesce Num Partitions: 1000 Num Hosts: 10             327            331           4          0.3        3268.5       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 20             256            264           8          0.4        2556.1       1.4X
+Coalesce Num Partitions: 1000 Num Hosts: 40             185            191           7          0.5        1853.2       1.9X
+Coalesce Num Partitions: 1000 Num Hosts: 80             160            166           5          0.6        1603.5       2.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1             8672           9054         615          0.0       86716.9       0.0X
+Coalesce Num Partitions: 5000 Num Hosts: 5             2016           2020           6          0.0       20159.9       0.2X
+Coalesce Num Partitions: 5000 Num Hosts: 10            1084           1096          10          0.1       10844.7       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             625            636          11          0.2        6245.6       0.6X
+Coalesce Num Partitions: 5000 Num Hosts: 40             418            425           6          0.2        4182.3       0.9X
+Coalesce Num Partitions: 5000 Num Hosts: 80             270            276           8          0.4        2704.6       1.3X
+Coalesce Num Partitions: 10000 Num Hosts: 1           16208          16391         226          0.0      162076.8       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            3930           3949          23          0.0       39300.4       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10           2021           2031          11          0.0       20213.1       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20           1114           1115           1          0.1       11139.0       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            628            639          17          0.2        6275.3       0.6X
+Coalesce Num Partitions: 10000 Num Hosts: 80            402            408          10          0.2        4016.4       0.9X
 
 
diff --git a/core/benchmarks/CoalescedRDDBenchmark-results.txt b/core/benchmarks/CoalescedRDDBenchmark-results.txt
index aba428c1729e7..d370e6956116d 100644
--- a/core/benchmarks/CoalescedRDDBenchmark-results.txt
+++ b/core/benchmarks/CoalescedRDDBenchmark-results.txt
@@ -2,39 +2,39 @@
 Coalesced RDD , large scale
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Coalesced RDD:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Coalesce Num Partitions: 100 Num Hosts: 1               134            143           8          0.7        1343.4       1.0X
-Coalesce Num Partitions: 100 Num Hosts: 5                96             97           2          1.0         962.0       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 10               90             92           3          1.1         898.7       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 20               90             91           1          1.1         898.9       1.5X
-Coalesce Num Partitions: 100 Num Hosts: 40               96            100           4          1.0         957.0       1.4X
-Coalesce Num Partitions: 100 Num Hosts: 80               87             93           9          1.2         866.3       1.6X
-Coalesce Num Partitions: 500 Num Hosts: 1               312            314           3          0.3        3115.2       0.4X
-Coalesce Num Partitions: 500 Num Hosts: 5               135            136           1          0.7        1352.9       1.0X
-Coalesce Num Partitions: 500 Num Hosts: 10              110            111           1          0.9        1103.7       1.2X
-Coalesce Num Partitions: 500 Num Hosts: 20              103            109           9          1.0        1034.3       1.3X
-Coalesce Num Partitions: 500 Num Hosts: 40               95             97           2          1.1         948.3       1.4X
-Coalesce Num Partitions: 500 Num Hosts: 80               90             93           3          1.1         899.9       1.5X
-Coalesce Num Partitions: 1000 Num Hosts: 1              527            535          13          0.2        5267.7       0.3X
-Coalesce Num Partitions: 1000 Num Hosts: 5              179            180           1          0.6        1788.4       0.8X
-Coalesce Num Partitions: 1000 Num Hosts: 10             132            138           8          0.8        1321.6       1.0X
-Coalesce Num Partitions: 1000 Num Hosts: 20             116            122           5          0.9        1157.1       1.2X
-Coalesce Num Partitions: 1000 Num Hosts: 40              99            104           7          1.0         988.1       1.4X
-Coalesce Num Partitions: 1000 Num Hosts: 80              95             97           2          1.1         948.9       1.4X
-Coalesce Num Partitions: 5000 Num Hosts: 1             2326           2336          10          0.0       23263.2       0.1X
-Coalesce Num Partitions: 5000 Num Hosts: 5              735            743           7          0.1        7351.3       0.2X
-Coalesce Num Partitions: 5000 Num Hosts: 10             400            405           6          0.2        4002.9       0.3X
-Coalesce Num Partitions: 5000 Num Hosts: 20             263            266           2          0.4        2631.4       0.5X
-Coalesce Num Partitions: 5000 Num Hosts: 40             175            180           7          0.6        1746.4       0.8X
-Coalesce Num Partitions: 5000 Num Hosts: 80             139            141           1          0.7        1389.1       1.0X
-Coalesce Num Partitions: 10000 Num Hosts: 1            4250           4263          21          0.0       42497.5       0.0X
-Coalesce Num Partitions: 10000 Num Hosts: 5            1508           1512           6          0.1       15082.5       0.1X
-Coalesce Num Partitions: 10000 Num Hosts: 10            765            770           7          0.1        7645.5       0.2X
-Coalesce Num Partitions: 10000 Num Hosts: 20            420            424           4          0.2        4198.5       0.3X
-Coalesce Num Partitions: 10000 Num Hosts: 40            277            279           3          0.4        2768.6       0.5X
-Coalesce Num Partitions: 10000 Num Hosts: 80            187            189           2          0.5        1868.9       0.7X
+Coalesce Num Partitions: 100 Num Hosts: 1               126            145          24          0.8        1257.7       1.0X
+Coalesce Num Partitions: 100 Num Hosts: 5               104            105           1          1.0        1039.8       1.2X
+Coalesce Num Partitions: 100 Num Hosts: 10               87             89           2          1.1         873.7       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 20               89             90           1          1.1         893.4       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 40               88             91           6          1.1         880.4       1.4X
+Coalesce Num Partitions: 100 Num Hosts: 80               88             94          10          1.1         875.9       1.4X
+Coalesce Num Partitions: 500 Num Hosts: 1               308            314           8          0.3        3078.0       0.4X
+Coalesce Num Partitions: 500 Num Hosts: 5               133            136           2          0.7        1334.6       0.9X
+Coalesce Num Partitions: 500 Num Hosts: 10              112            114           4          0.9        1118.8       1.1X
+Coalesce Num Partitions: 500 Num Hosts: 20              100            111          16          1.0        1004.0       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 40              100            106           6          1.0         999.7       1.3X
+Coalesce Num Partitions: 500 Num Hosts: 80               95            100           6          1.0         954.0       1.3X
+Coalesce Num Partitions: 1000 Num Hosts: 1              531            539          14          0.2        5311.0       0.2X
+Coalesce Num Partitions: 1000 Num Hosts: 5              197            201           4          0.5        1970.3       0.6X
+Coalesce Num Partitions: 1000 Num Hosts: 10             139            141           2          0.7        1392.4       0.9X
+Coalesce Num Partitions: 1000 Num Hosts: 20             114            115           1          0.9        1137.9       1.1X
+Coalesce Num Partitions: 1000 Num Hosts: 40             105            108           3          0.9        1054.9       1.2X
+Coalesce Num Partitions: 1000 Num Hosts: 80             105            109           4          1.0        1047.4       1.2X
+Coalesce Num Partitions: 5000 Num Hosts: 1             2336           2354          16          0.0       23362.8       0.1X
+Coalesce Num Partitions: 5000 Num Hosts: 5              680            684           4          0.1        6798.7       0.2X
+Coalesce Num Partitions: 5000 Num Hosts: 10             381            390           8          0.3        3810.5       0.3X
+Coalesce Num Partitions: 5000 Num Hosts: 20             253            255           2          0.4        2529.6       0.5X
+Coalesce Num Partitions: 5000 Num Hosts: 40             171            174           3          0.6        1706.1       0.7X
+Coalesce Num Partitions: 5000 Num Hosts: 80             137            139           3          0.7        1365.5       0.9X
+Coalesce Num Partitions: 10000 Num Hosts: 1            4220           4253          30          0.0       42203.4       0.0X
+Coalesce Num Partitions: 10000 Num Hosts: 5            1377           1394          16          0.1       13769.0       0.1X
+Coalesce Num Partitions: 10000 Num Hosts: 10            704            717          12          0.1        7036.4       0.2X
+Coalesce Num Partitions: 10000 Num Hosts: 20            420            422           1          0.2        4201.7       0.3X
+Coalesce Num Partitions: 10000 Num Hosts: 40            267            271           4          0.4        2669.3       0.5X
+Coalesce Num Partitions: 10000 Num Hosts: 80            184            191           6          0.5        1842.2       0.7X
 
 
diff --git a/core/benchmarks/KryoBenchmark-jdk21-results.txt b/core/benchmarks/KryoBenchmark-jdk21-results.txt
index 8488d15602228..aee420e8ca26a 100644
--- a/core/benchmarks/KryoBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       173            174           1          5.8         172.9       1.0X
-basicTypes: Long with unsafe:true                      178            179           1          5.6         177.7       1.0X
-basicTypes: Float with unsafe:true                     180            182           1          5.6         179.6       1.0X
-basicTypes: Double with unsafe:true                    183            191          15          5.5         182.8       0.9X
-Array: Int with unsafe:true                              1              1           0        753.9           1.3     130.3X
-Array: Long with unsafe:true                             2              2           0        486.0           2.1      84.0X
-Array: Float with unsafe:true                            1              1           0        759.7           1.3     131.3X
-Array: Double with unsafe:true                           2              2           0        473.8           2.1      81.9X
-Map of string->Double  with unsafe:true                 27             27           1         37.5          26.7       6.5X
-basicTypes: Int with unsafe:false                      204            205           1          4.9         203.7       0.8X
-basicTypes: Long with unsafe:false                     229            230           1          4.4         229.1       0.8X
-basicTypes: Float with unsafe:false                    208            209           1          4.8         208.1       0.8X
-basicTypes: Double with unsafe:false                   204            205           2          4.9         203.7       0.8X
-Array: Int with unsafe:false                            13             13           0         79.4          12.6      13.7X
-Array: Long with unsafe:false                           21             21           0         47.8          20.9       8.3X
-Array: Float with unsafe:false                           6              6           0        168.3           5.9      29.1X
-Array: Double with unsafe:false                         11             12           0         87.2          11.5      15.1X
-Map of string->Double  with unsafe:false                30             31           0         32.9          30.4       5.7X
+basicTypes: Int with unsafe:true                       174            176           1          5.7         174.3       1.0X
+basicTypes: Long with unsafe:true                      178            184           5          5.6         178.1       1.0X
+basicTypes: Float with unsafe:true                     185            187           1          5.4         185.3       0.9X
+basicTypes: Double with unsafe:true                    187            189           1          5.3         187.0       0.9X
+Array: Int with unsafe:true                              1              1           0        752.1           1.3     131.1X
+Array: Long with unsafe:true                             2              2           0        490.6           2.0      85.5X
+Array: Float with unsafe:true                            1              1           0        757.7           1.3     132.1X
+Array: Double with unsafe:true                           2              2           0        483.9           2.1      84.4X
+Map of string->Double  with unsafe:true                 26             26           2         38.5          26.0       6.7X
+basicTypes: Int with unsafe:false                      206            207           1          4.9         205.7       0.8X
+basicTypes: Long with unsafe:false                     222            223           1          4.5         221.7       0.8X
+basicTypes: Float with unsafe:false                    217            218           1          4.6         216.5       0.8X
+basicTypes: Double with unsafe:false                   217            218           2          4.6         216.6       0.8X
+Array: Int with unsafe:false                            13             13           0         79.5          12.6      13.9X
+Array: Long with unsafe:false                           21             22           0         46.6          21.4       8.1X
+Array: Float with unsafe:false                           6              6           0        167.8           6.0      29.3X
+Array: Double with unsafe:false                         16             16           0         64.2          15.6      11.2X
+Map of string->Double  with unsafe:false                28             28           1         36.3          27.5       6.3X
 
 
diff --git a/core/benchmarks/KryoBenchmark-results.txt b/core/benchmarks/KryoBenchmark-results.txt
index 5c35cc6affc1f..ca80b13a5346d 100644
--- a/core/benchmarks/KryoBenchmark-results.txt
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark Kryo Unsafe vs safe Serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Kryo Unsafe vs safe Serialization:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-basicTypes: Int with unsafe:true                       171            172           1          5.9         170.5       1.0X
-basicTypes: Long with unsafe:true                      185            187           2          5.4         185.5       0.9X
-basicTypes: Float with unsafe:true                     190            192           1          5.3         190.0       0.9X
-basicTypes: Double with unsafe:true                    199            200           1          5.0         198.8       0.9X
-Array: Int with unsafe:true                              1              1           0        751.1           1.3     128.1X
-Array: Long with unsafe:true                             2              2           0        483.9           2.1      82.5X
-Array: Float with unsafe:true                            1              1           0        734.7           1.4     125.3X
-Array: Double with unsafe:true                           2              2           0        478.2           2.1      81.6X
-Map of string->Double  with unsafe:true                 26             26           0         38.8          25.8       6.6X
-basicTypes: Int with unsafe:false                      207            209           1          4.8         207.5       0.8X
-basicTypes: Long with unsafe:false                     239            241           2          4.2         239.1       0.7X
-basicTypes: Float with unsafe:false                    213            213           1          4.7         212.6       0.8X
-basicTypes: Double with unsafe:false                   224            226           1          4.5         224.2       0.8X
-Array: Int with unsafe:false                            14             14           0         73.5          13.6      12.5X
-Array: Long with unsafe:false                           21             21           0         47.8          20.9       8.1X
-Array: Float with unsafe:false                           6              6           0        169.9           5.9      29.0X
-Array: Double with unsafe:false                         10             10           0        101.4           9.9      17.3X
-Map of string->Double  with unsafe:false                28             28           0         35.9          27.9       6.1X
+basicTypes: Int with unsafe:true                       171            173           2          5.9         170.8       1.0X
+basicTypes: Long with unsafe:true                      190            193           3          5.3         189.6       0.9X
+basicTypes: Float with unsafe:true                     186            188           2          5.4         186.1       0.9X
+basicTypes: Double with unsafe:true                    189            190           1          5.3         188.5       0.9X
+Array: Int with unsafe:true                              1              2           0        720.0           1.4     123.0X
+Array: Long with unsafe:true                             2              3           0        462.4           2.2      79.0X
+Array: Float with unsafe:true                            1              2           0        719.2           1.4     122.9X
+Array: Double with unsafe:true                           2              3           0        459.8           2.2      78.5X
+Map of string->Double  with unsafe:true                 27             28           1         37.2          26.9       6.3X
+basicTypes: Int with unsafe:false                      219            220           1          4.6         218.7       0.8X
+basicTypes: Long with unsafe:false                     242            244           2          4.1         242.1       0.7X
+basicTypes: Float with unsafe:false                    215            220          10          4.7         214.8       0.8X
+basicTypes: Double with unsafe:false                   222            224           2          4.5         221.7       0.8X
+Array: Int with unsafe:false                            15             15           0         66.9          15.0      11.4X
+Array: Long with unsafe:false                           22             22           0         45.9          21.8       7.8X
+Array: Float with unsafe:false                           6              6           1        170.1           5.9      29.1X
+Array: Double with unsafe:false                         10             10           0        103.0           9.7      17.6X
+Map of string->Double  with unsafe:false                31             32           2         32.4          30.9       5.5X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
index da82b05fd59e6..e3922382068dd 100644
--- a/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-jdk21-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         618.2       1.0X
-Colletion of int with 10 elements, useIterator: true                      14             15           0          0.7        1444.0       0.4X
-Colletion of int with 100 elements, useIterator: true                     92             92           1          0.1        9168.2       0.1X
-Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         777.9       0.8X
-Colletion of string with 10 elements, useIterator: true                   22             23           0          0.5        2221.0       0.3X
-Colletion of string with 100 elements, useIterator: true                 166            167           1          0.1       16617.2       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         730.2       0.8X
-Colletion of Array[int] with 10 elements, useIterator: true               20             20           0          0.5        1967.8       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             145            146           1          0.1       14469.9       0.0X
-Colletion of int with 1 elements, useIterator: false                       7              7           0          1.5         653.6       0.9X
-Colletion of int with 10 elements, useIterator: false                     15             16           0          0.7        1528.7       0.4X
-Colletion of int with 100 elements, useIterator: false                    98             98           1          0.1        9755.3       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         718.5       0.9X
-Colletion of string with 10 elements, useIterator: false                  21             22           2          0.5        2093.0       0.3X
-Colletion of string with 100 elements, useIterator: false                157            157           1          0.1       15666.5       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         698.1       0.9X
-Colletion of Array[int] with 10 elements, useIterator: false              18             19           0          0.5        1831.7       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            134            135           0          0.1       13430.8       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              6           0          1.7         590.7       1.0X
+Colletion of int with 10 elements, useIterator: true                      14             15           1          0.7        1431.1       0.4X
+Colletion of int with 100 elements, useIterator: true                     94             96           1          0.1        9429.2       0.1X
+Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         760.8       0.8X
+Colletion of string with 10 elements, useIterator: true                   22             23           0          0.4        2246.1       0.3X
+Colletion of string with 100 elements, useIterator: true                 167            167           1          0.1       16659.4       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.4         735.4       0.8X
+Colletion of Array[int] with 10 elements, useIterator: true               20             20           1          0.5        1976.5       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             154            154           1          0.1       15356.7       0.0X
+Colletion of int with 1 elements, useIterator: false                       6              7           0          1.6         623.6       0.9X
+Colletion of int with 10 elements, useIterator: false                     13             14           0          0.7        1334.3       0.4X
+Colletion of int with 100 elements, useIterator: false                    82             82           0          0.1        8164.4       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              8           0          1.4         727.0       0.8X
+Colletion of string with 10 elements, useIterator: false                  22             22           0          0.5        2166.5       0.3X
+Colletion of string with 100 elements, useIterator: false                159            160           0          0.1       15925.0       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.4         712.1       0.8X
+Colletion of Array[int] with 10 elements, useIterator: false              19             20           0          0.5        1932.3       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            142            143           1          0.1       14220.2       0.0X
 
 
diff --git a/core/benchmarks/KryoIteratorBenchmark-results.txt b/core/benchmarks/KryoIteratorBenchmark-results.txt
index e2cbfb871e7ba..77452144ac01d 100644
--- a/core/benchmarks/KryoIteratorBenchmark-results.txt
+++ b/core/benchmarks/KryoIteratorBenchmark-results.txt
@@ -2,27 +2,27 @@
 Benchmark of kryo asIterator on deserialization stream
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark of kryo asIterator on deserialization stream:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------
-Colletion of int with 1 elements, useIterator: true                        6              6           1          1.6         611.4       1.0X
-Colletion of int with 10 elements, useIterator: true                      14             15           0          0.7        1443.2       0.4X
-Colletion of int with 100 elements, useIterator: true                     93             94           0          0.1        9331.3       0.1X
-Colletion of string with 1 elements, useIterator: true                     8              8           0          1.3         753.6       0.8X
-Colletion of string with 10 elements, useIterator: true                   22             22           0          0.5        2150.8       0.3X
-Colletion of string with 100 elements, useIterator: true                 163            164           1          0.1       16325.7       0.0X
-Colletion of Array[int] with 1 elements, useIterator: true                 7              8           0          1.3         741.1       0.8X
-Colletion of Array[int] with 10 elements, useIterator: true               20             20           0          0.5        1989.2       0.3X
-Colletion of Array[int] with 100 elements, useIterator: true             147            147           1          0.1       14659.2       0.0X
-Colletion of int with 1 elements, useIterator: false                       6              6           0          1.7         597.3       1.0X
-Colletion of int with 10 elements, useIterator: false                     13             14           0          0.8        1323.4       0.5X
-Colletion of int with 100 elements, useIterator: false                    83             84           3          0.1        8272.9       0.1X
-Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         714.5       0.9X
-Colletion of string with 10 elements, useIterator: false                  21             22           1          0.5        2146.0       0.3X
-Colletion of string with 100 elements, useIterator: false                157            157           0          0.1       15690.5       0.0X
-Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.5         668.5       0.9X
-Colletion of Array[int] with 10 elements, useIterator: false              18             18           0          0.6        1802.1       0.3X
-Colletion of Array[int] with 100 elements, useIterator: false            134            135           1          0.1       13393.9       0.0X
+Colletion of int with 1 elements, useIterator: true                        6              6           0          1.6         621.6       1.0X
+Colletion of int with 10 elements, useIterator: true                      14             14           0          0.7        1422.5       0.4X
+Colletion of int with 100 elements, useIterator: true                     90             92           1          0.1        9030.9       0.1X
+Colletion of string with 1 elements, useIterator: true                     7              8           0          1.4         726.3       0.9X
+Colletion of string with 10 elements, useIterator: true                   23             23           1          0.4        2251.0       0.3X
+Colletion of string with 100 elements, useIterator: true                 172            172           0          0.1       17183.3       0.0X
+Colletion of Array[int] with 1 elements, useIterator: true                 7              7           0          1.4         718.6       0.9X
+Colletion of Array[int] with 10 elements, useIterator: true               21             21           1          0.5        2078.4       0.3X
+Colletion of Array[int] with 100 elements, useIterator: true             162            162           0          0.1       16189.6       0.0X
+Colletion of int with 1 elements, useIterator: false                       6              6           0          1.7         599.6       1.0X
+Colletion of int with 10 elements, useIterator: false                     13             13           0          0.8        1313.8       0.5X
+Colletion of int with 100 elements, useIterator: false                    81             82           0          0.1        8132.1       0.1X
+Colletion of string with 1 elements, useIterator: false                    7              7           0          1.4         705.6       0.9X
+Colletion of string with 10 elements, useIterator: false                  22             23           0          0.4        2240.1       0.3X
+Colletion of string with 100 elements, useIterator: false                170            170           0          0.1       16995.3       0.0X
+Colletion of Array[int] with 1 elements, useIterator: false                7              7           0          1.5         675.7       0.9X
+Colletion of Array[int] with 10 elements, useIterator: false              18             19           0          0.5        1842.7       0.3X
+Colletion of Array[int] with 100 elements, useIterator: false            138            139           0          0.1       13801.7       0.0X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
index bb234fd6a13b3..c00cd9152b278 100644
--- a/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 3937           5805        1793          0.0     7874251.0       1.0X
-KryoPool:false                                                6392           8372        1513          0.0    12783860.9       0.6X
+KryoPool:true                                                 4166           5737        1977          0.0     8331992.4       1.0X
+KryoPool:false                                                6201           7778        1281          0.0    12402118.8       0.7X
 
 
diff --git a/core/benchmarks/KryoSerializerBenchmark-results.txt b/core/benchmarks/KryoSerializerBenchmark-results.txt
index 79b87a83b34fa..a86338957cc37 100644
--- a/core/benchmarks/KryoSerializerBenchmark-results.txt
+++ b/core/benchmarks/KryoSerializerBenchmark-results.txt
@@ -2,11 +2,11 @@
 Benchmark KryoPool vs old"pool of 1" implementation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark KryoPool vs old"pool of 1" implementation:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-KryoPool:true                                                 4180           6016        1956          0.0     8359077.2       1.0X
-KryoPool:false                                                6204           8232        1447          0.0    12408361.0       0.7X
+KryoPool:true                                                 3973           5797        1879          0.0     7945107.3       1.0X
+KryoPool:false                                                6041           7623        1484          0.0    12082153.5       0.7X
 
 
diff --git a/core/benchmarks/LZFBenchmark-jdk21-results.txt b/core/benchmarks/LZFBenchmark-jdk21-results.txt
index e1566f201a1f6..7104879c5c753 100644
--- a/core/benchmarks/LZFBenchmark-jdk21-results.txt
+++ b/core/benchmarks/LZFBenchmark-jdk21-results.txt
@@ -2,18 +2,18 @@
 Benchmark LZFCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Compression 256000000 int values in parallel                598            600           2        428.2           2.3       1.0X
-Compression 256000000 int values single-threaded            568            570           2        451.0           2.2       1.1X
+Compression 256000000 int values in parallel                599            601           4        427.4           2.3       1.0X
+Compression 256000000 int values single-threaded            608            615           7        420.9           2.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Compression 1024 array values in 1 threads                39             45           5          0.0       38475.4       1.0X
-Compression 1024 array values single-threaded             32             33           1          0.0       31154.5       1.2X
+Compression 1024 array values in 1 threads                37             45           5          0.0       35857.3       1.0X
+Compression 1024 array values single-threaded             31             31           0          0.0       30334.5       1.2X
 
 
diff --git a/core/benchmarks/LZFBenchmark-results.txt b/core/benchmarks/LZFBenchmark-results.txt
index facc67f9cf4a8..142d3aad2f1ba 100644
--- a/core/benchmarks/LZFBenchmark-results.txt
+++ b/core/benchmarks/LZFBenchmark-results.txt
@@ -2,18 +2,18 @@
 Benchmark LZFCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Compress small objects:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Compression 256000000 int values in parallel                602            612           6        425.1           2.4       1.0X
-Compression 256000000 int values single-threaded            610            617           5        419.8           2.4       1.0X
+Compression 256000000 int values in parallel                601            610           9        426.3           2.3       1.0X
+Compression 256000000 int values single-threaded            610            619           7        419.3           2.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Compress large objects:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Compression 1024 array values in 1 threads                35             43           6          0.0       33806.8       1.0X
-Compression 1024 array values single-threaded             32             32           0          0.0       30990.4       1.1X
+Compression 1024 array values in 1 threads                35             44           5          0.0       34512.1       1.0X
+Compression 1024 array values single-threaded             31             32           1          0.0       30396.7       1.1X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
index a15442496b244..7c1b1eb4ac803 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-jdk21-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                674            685          12          0.0   673772738.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1579           1590          12          0.0  1579383970.0       0.4X
-Num Maps: 50000 Fetch partitions:1500              2435           2472          37          0.0  2434530380.0       0.3X
+Num Maps: 50000 Fetch partitions:500                696            705          13          0.0   696430567.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1620           1628           7          0.0  1620094001.0       0.4X
+Num Maps: 50000 Fetch partitions:1500              2507           2522          13          0.0  2507485825.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesConvertBenchmark-results.txt b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
index b9f36af4a6531..4ca2e502b9404 100644
--- a/core/benchmarks/MapStatusesConvertBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesConvertBenchmark-results.txt
@@ -2,12 +2,12 @@
 MapStatuses Convert Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 MapStatuses Convert:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Num Maps: 50000 Fetch partitions:500                703            716          11          0.0   703103575.0       1.0X
-Num Maps: 50000 Fetch partitions:1000              1707           1723          14          0.0  1707060398.0       0.4X
-Num Maps: 50000 Fetch partitions:1500              2626           2638          14          0.0  2625981097.0       0.3X
+Num Maps: 50000 Fetch partitions:500                775            782           8          0.0   774584162.0       1.0X
+Num Maps: 50000 Fetch partitions:1000              1605           1634          29          0.0  1604801022.0       0.5X
+Num Maps: 50000 Fetch partitions:1500              2568           2585          22          0.0  2568404459.0       0.3X
 
 
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
index f4846ce8b0fb3..1ffe7594c22cd 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-jdk21-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         84             88           7          2.4         419.8       1.0X
-Deserialization                                      140            148           8          1.4         700.9       0.6X
+Serialization                                         81             85           4          2.5         406.5       1.0X
+Deserialization                                      147            155           9          1.4         734.2       0.6X
 
-Compressed Serialized MapStatus sizes: 427.0 B
+Compressed Serialized MapStatus sizes: 426.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          80             81           2          2.5         400.1       1.0X
-Deserialization                                       139            146           6          1.4         694.8       0.6X
+Serialization                                          73             75           3          2.7         365.9       1.0X
+Deserialization                                       146            153          10          1.4         732.1       0.5X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         153            158          11          1.3         767.3       1.0X
-Deserialization                                       159            165           5          1.3         794.1       1.0X
+Serialization                                         152            157           6          1.3         759.6       1.0X
+Deserialization                                       162            166           4          1.2         811.8       0.9X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          140            143           4          1.4         701.0       1.0X
-Deserialization                                        154            159           7          1.3         771.3       0.9X
+Serialization                                          141            142           1          1.4         703.4       1.0X
+Deserialization                                        161            165           5          1.2         807.0       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          707            726          21          0.3        3533.4       1.0X
-Deserialization                                        316            342          17          0.6        1582.2       2.2X
+Serialization                                          686            714          31          0.3        3431.6       1.0X
+Deserialization                                        329            346          18          0.6        1645.6       2.1X
 
-Compressed Serialized MapStatus sizes: 570.0 B
+Compressed Serialized MapStatus sizes: 569.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           566            569           5          0.4        2828.3       1.0X
-Deserialization                                         324            343          19          0.6        1617.8       1.7X
+Serialization                                           560            562           3          0.4        2797.7       1.0X
+Deserialization                                         317            334          20          0.6        1587.0       1.8X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
index 3b0b9b756d32b..edd6207a12f8b 100644
--- a/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
+++ b/core/benchmarks/MapStatusesSerDeserBenchmark-results.txt
@@ -1,64 +1,64 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Serialization                                         89             95           4          2.2         445.9       1.0X
-Deserialization                                      138            145           6          1.4         689.8       0.6X
+Serialization                                         93             99           8          2.1         466.1       1.0X
+Deserialization                                      140            151          12          1.4         698.8       0.7X
 
-Compressed Serialized MapStatus sizes: 427.0 B
+Compressed Serialized MapStatus sizes: 426.0 B
 Compressed Serialized Broadcast MapStatus sizes: 2.5 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 10 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                          80             83           2          2.5         399.4       1.0X
-Deserialization                                       134            145          10          1.5         671.0       0.6X
+Serialization                                          82             83           1          2.4         409.0       1.0X
+Deserialization                                       139            142           8          1.4         692.8       0.6X
 
 Compressed Serialized MapStatus sizes: 2.5 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Serialization                                         159            163           7          1.3         793.4       1.0X
-Deserialization                                       155            163           8          1.3         776.8       1.0X
+Serialization                                         161            168           7          1.2         802.6       1.0X
+Deserialization                                       155            169          13          1.3         777.3       1.0X
 
 Compressed Serialized MapStatus sizes: 442.0 B
 Compressed Serialized Broadcast MapStatus sizes: 13.6 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 100 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          144            147           1          1.4         718.8       1.0X
-Deserialization                                        154            159           4          1.3         770.4       0.9X
+Serialization                                          145            147           1          1.4         726.2       1.0X
+Deserialization                                        155            160          10          1.3         772.7       0.9X
 
 Compressed Serialized MapStatus sizes: 13.6 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/ broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Serialization                                          710            713           4          0.3        3549.7       1.0X
-Deserialization                                        346            355           7          0.6        1730.4       2.1X
+Serialization                                          693            714          23          0.3        3465.9       1.0X
+Deserialization                                        326            351          13          0.6        1628.7       2.1X
 
-Compressed Serialized MapStatus sizes: 569.0 B
+Compressed Serialized MapStatus sizes: 568.0 B
 Compressed Serialized Broadcast MapStatus sizes: 122.3 MiB
 
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200000 MapOutputs, 1000 blocks w/o broadcast:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Serialization                                           557            564           5          0.4        2783.1       1.0X
-Deserialization                                         325            337          10          0.6        1626.5       1.7X
+Serialization                                           564            576          13          0.4        2817.6       1.0X
+Deserialization                                         339            354           9          0.6        1694.4       1.7X
 
 Compressed Serialized MapStatus sizes: 122.3 MiB
 Compressed Serialized Broadcast MapStatus sizes: 0.0 B
diff --git a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
index 8da4b4953cad0..7262ea63a6ef9 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-jdk21-results.txt
@@ -2,17 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     6876           7518         567          0.0     6875910.8       1.0X
-FileSystemPersistenceEngine with JavaSerializer                    2973           3015          55          0.0     2973365.8       2.3X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               813            836          26          0.0      813019.4       8.5X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               756            772          21          0.0      755574.7       9.1X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            755            788          46          0.0      754897.8       9.1X
-FileSystemPersistenceEngine with JavaSerializer (zstd)              930            964          35          0.0      930157.5       7.4X
-RocksDBPersistenceEngine with JavaSerializer                        302            305           3          0.0      302099.4      22.8X
-BlackHolePersistenceEngine                                            0              0           0          5.9         168.2   40871.1X
+ZooKeeperPersistenceEngine with JavaSerializer                     5620           5811         236          0.0     5619587.2       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    2876           2924          42          0.0     2876068.8       2.0X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               825            829           4          0.0      824880.6       6.8X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               742            774          39          0.0      742492.3       7.6X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            785            832          42          0.0      784738.0       7.2X
+FileSystemPersistenceEngine with JavaSerializer (zstd)              966            982          14          0.0      965925.8       5.8X
+RocksDBPersistenceEngine with JavaSerializer                        299            301           2          0.0      299470.1      18.8X
+BlackHolePersistenceEngine                                            0              0           0          6.0         166.6   33740.5X
 
 
diff --git a/core/benchmarks/PersistenceEngineBenchmark-results.txt b/core/benchmarks/PersistenceEngineBenchmark-results.txt
index f927e3c57aa33..c373d88842d2e 100644
--- a/core/benchmarks/PersistenceEngineBenchmark-results.txt
+++ b/core/benchmarks/PersistenceEngineBenchmark-results.txt
@@ -2,17 +2,17 @@
 PersistenceEngineBenchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 1000 Workers:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-ZooKeeperPersistenceEngine with JavaSerializer                     6080           6179         119          0.0     6079694.4       1.0X
-FileSystemPersistenceEngine with JavaSerializer                    3011           3060          42          0.0     3011244.1       2.0X
-FileSystemPersistenceEngine with JavaSerializer (lz4)               799            827          28          0.0      799357.3       7.6X
-FileSystemPersistenceEngine with JavaSerializer (lzf)               800            839          35          0.0      800038.9       7.6X
-FileSystemPersistenceEngine with JavaSerializer (snappy)            786            797          11          0.0      785847.0       7.7X
-FileSystemPersistenceEngine with JavaSerializer (zstd)             1025           1028           3          0.0     1024806.3       5.9X
-RocksDBPersistenceEngine with JavaSerializer                        309            311           3          0.0      308522.6      19.7X
-BlackHolePersistenceEngine                                            0              0           0          5.8         173.5   35032.8X
+ZooKeeperPersistenceEngine with JavaSerializer                     6146           6314         215          0.0     6146007.1       1.0X
+FileSystemPersistenceEngine with JavaSerializer                    2944           2957          17          0.0     2944099.7       2.1X
+FileSystemPersistenceEngine with JavaSerializer (lz4)               827            869          37          0.0      827379.0       7.4X
+FileSystemPersistenceEngine with JavaSerializer (lzf)               799            826          25          0.0      799318.1       7.7X
+FileSystemPersistenceEngine with JavaSerializer (snappy)            775            805          50          0.0      774802.8       7.9X
+FileSystemPersistenceEngine with JavaSerializer (zstd)              972           1002          28          0.0      971773.9       6.3X
+RocksDBPersistenceEngine with JavaSerializer                        310            312           3          0.0      310401.2      19.8X
+BlackHolePersistenceEngine                                            0              0           0          6.0         165.7   37097.7X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
index 06701357609a5..ccae104413f6a 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-jdk21-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3356.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     115.7X
+SerializationUtils.clone                              0              0           0          0.3        3296.0       1.0X
+Utils.cloneProperties                                 0              0           0         34.5          29.0     113.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      160560.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.3        3085.0      52.0X
+SerializationUtils.clone                              0              0           0          0.0      156331.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2595.0      60.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      253143.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.3        3696.0      68.5X
+SerializationUtils.clone                              0              0           0          0.0      255154.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2624.0      97.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      907866.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       20328.0      44.7X
+SerializationUtils.clone                              1              1           0          0.0      887239.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.1       14908.0      59.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1699706.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       40867.0      41.6X
+SerializationUtils.clone                              2              2           0          0.0     1655264.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       30837.0      53.7X
 
 
diff --git a/core/benchmarks/PropertiesCloneBenchmark-results.txt b/core/benchmarks/PropertiesCloneBenchmark-results.txt
index 13c241f53d182..f6c6c8781dc25 100644
--- a/core/benchmarks/PropertiesCloneBenchmark-results.txt
+++ b/core/benchmarks/PropertiesCloneBenchmark-results.txt
@@ -2,39 +2,39 @@
 Properties Cloning
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Empty Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.3        3617.0       1.0X
-Utils.cloneProperties                                 0              0           0         34.5          29.0     124.7X
+SerializationUtils.clone                              0              0           0          0.3        3466.0       1.0X
+Utils.cloneProperties                                 0              0           0         34.5          29.0     119.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 System Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      148347.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.4        2815.0      52.7X
+SerializationUtils.clone                              0              0           0          0.0      156422.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.4        2685.0      58.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Small Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              0              0           0          0.0      280142.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.3        3686.0      76.0X
+SerializationUtils.clone                              0              0           0          0.0      277017.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.3        3666.0      75.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Medium Properties:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              1              1           0          0.0      941434.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       20278.0      46.4X
+SerializationUtils.clone                              1              1           0          0.0      920141.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       20097.0      45.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Large Properties:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SerializationUtils.clone                              2              2           0          0.0     1737205.0       1.0X
-Utils.cloneProperties                                 0              0           0          0.0       40696.0      42.7X
+SerializationUtils.clone                              2              2           0          0.0     1714798.0       1.0X
+Utils.cloneProperties                                 0              0           0          0.0       40385.0      42.5X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
index 6f1f333363e96..9f2baa5d9bf80 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-jdk21-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    451            451           0        221.9           4.5       1.0X
+java.util.Random                                    451            451           0        222.0           4.5       1.0X
 XORShiftRandom                                      185            185           0        539.4           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    894            899           7        111.8           8.9       1.0X
-XORShiftRandom                                      371            372           2        269.4           3.7       2.4X
+java.util.Random                                    897            897           0        111.5           9.0       1.0X
+XORShiftRandom                                      371            371           0        269.5           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    900            900           0        111.1           9.0       1.0X
-XORShiftRandom                                      371            371           0        269.6           3.7       2.4X
+java.util.Random                                    914            914           0        109.4           9.1       1.0X
+XORShiftRandom                                      371            371           1        269.5           3.7       2.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   3373           3374           2         29.7          33.7       1.0X
-XORShiftRandom                                     2454           2460          11         40.8          24.5       1.4X
+java.util.Random                                   3381           3384           3         29.6          33.8       1.0X
+XORShiftRandom                                     2480           2498          29         40.3          24.8       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      12974.9           0.1       1.0X
+XORShiftRandom.hashSeed                               1              1           0      12973.9           0.1       1.0X
 
 
diff --git a/core/benchmarks/XORShiftRandomBenchmark-results.txt b/core/benchmarks/XORShiftRandomBenchmark-results.txt
index 9701c0ca237bf..de5f7c04fddfc 100644
--- a/core/benchmarks/XORShiftRandomBenchmark-results.txt
+++ b/core/benchmarks/XORShiftRandomBenchmark-results.txt
@@ -2,43 +2,43 @@
 Pseudo random
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextInt:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    442            442           0        226.4           4.4       1.0X
-XORShiftRandom                                      185            185           0        539.4           1.9       2.4X
+java.util.Random                                    439            439           0        227.7           4.4       1.0X
+XORShiftRandom                                      185            185           0        539.5           1.9       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextLong:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    913            913           0        109.6           9.1       1.0X
-XORShiftRandom                                      371            372           1        269.6           3.7       2.5X
+java.util.Random                                    911            912           1        109.8           9.1       1.0X
+XORShiftRandom                                      371            371           1        269.7           3.7       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextDouble:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                    906            906           0        110.4           9.1       1.0X
-XORShiftRandom                                      371            371           1        269.6           3.7       2.4X
+java.util.Random                                    904            904           0        110.6           9.0       1.0X
+XORShiftRandom                                      371            371           0        269.7           3.7       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 nextGaussian:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java.util.Random                                   4170           4171           1         24.0          41.7       1.0X
-XORShiftRandom                                     2993           2996           3         33.4          29.9       1.4X
+java.util.Random                                   3997           3998           1         25.0          40.0       1.0X
+XORShiftRandom                                     2926           2929           2         34.2          29.3       1.4X
 
 
 ================================================================================================
 hash seed
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash seed:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-XORShiftRandom.hashSeed                               1              1           0      12975.0           0.1       1.0X
+XORShiftRandom.hashSeed                               2              2           0       6487.7           0.2       1.0X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
index 66c89ac139dc2..f6bd681451d5e 100644
--- a/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-jdk21-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            654            675          20          0.0       65380.3       1.0X
-Compression 10000 times at level 2 without buffer pool            714            715           1          0.0       71445.4       0.9X
-Compression 10000 times at level 3 without buffer pool            811            814           3          0.0       81142.7       0.8X
-Compression 10000 times at level 1 with buffer pool               605            606           1          0.0       60518.7       1.1X
-Compression 10000 times at level 2 with buffer pool               634            637           2          0.0       63441.4       1.0X
-Compression 10000 times at level 3 with buffer pool               743            743           0          0.0       74258.7       0.9X
+Compression 10000 times at level 1 without buffer pool            659            676          16          0.0       65860.7       1.0X
+Compression 10000 times at level 2 without buffer pool            721            723           2          0.0       72135.5       0.9X
+Compression 10000 times at level 3 without buffer pool            815            816           1          0.0       81500.6       0.8X
+Compression 10000 times at level 1 with buffer pool               608            609           0          0.0       60846.6       1.1X
+Compression 10000 times at level 2 with buffer pool               645            647           3          0.0       64476.3       1.0X
+Compression 10000 times at level 3 with buffer pool               746            746           1          0.0       74584.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            824            826           4          0.0       82358.5       1.0X
-Decompression 10000 times from level 2 without buffer pool            824            824           0          0.0       82394.3       1.0X
-Decompression 10000 times from level 3 without buffer pool            823            824           1          0.0       82343.3       1.0X
-Decompression 10000 times from level 1 with buffer pool               748            749           1          0.0       74792.0       1.1X
-Decompression 10000 times from level 2 with buffer pool               748            749           2          0.0       74773.6       1.1X
-Decompression 10000 times from level 3 with buffer pool               749            749           1          0.0       74868.3       1.1X
+Decompression 10000 times from level 1 without buffer pool            828            829           1          0.0       82822.6       1.0X
+Decompression 10000 times from level 2 without buffer pool            829            829           1          0.0       82900.7       1.0X
+Decompression 10000 times from level 3 without buffer pool            828            833           8          0.0       82784.4       1.0X
+Decompression 10000 times from level 1 with buffer pool               758            760           2          0.0       75756.5       1.1X
+Decompression 10000 times from level 2 with buffer pool               758            758           1          0.0       75772.3       1.1X
+Decompression 10000 times from level 3 with buffer pool               759            759           0          0.0       75852.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  48             49           1          0.0      377356.2       1.0X
-Parallel Compression with 1 workers                  36             37           2          0.0      279079.1       1.4X
-Parallel Compression with 2 workers                  36             38           1          0.0      283760.8       1.3X
-Parallel Compression with 4 workers                  38             40           2          0.0      298581.6       1.3X
-Parallel Compression with 8 workers                  41             43           1          0.0      320669.0       1.2X
-Parallel Compression with 16 workers                 46             48           1          0.0      356997.0       1.1X
+Parallel Compression with 0 workers                  58             59           1          0.0      452489.9       1.0X
+Parallel Compression with 1 workers                  42             45           4          0.0      330066.0       1.4X
+Parallel Compression with 2 workers                  40             42           1          0.0      312560.3       1.4X
+Parallel Compression with 4 workers                  40             42           2          0.0      308802.7       1.5X
+Parallel Compression with 8 workers                  41             45           3          0.0      321331.3       1.4X
+Parallel Compression with 16 workers                 44             45           1          0.0      343311.5       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 159            161           1          0.0     1242020.8       1.0X
-Parallel Compression with 1 workers                 187            188           1          0.0     1463507.3       0.8X
-Parallel Compression with 2 workers                 114            118           5          0.0      888481.5       1.4X
-Parallel Compression with 4 workers                 107            110           2          0.0      836926.1       1.5X
-Parallel Compression with 8 workers                 110            115           3          0.0      856838.0       1.4X
-Parallel Compression with 16 workers                112            115           2          0.0      874554.2       1.4X
+Parallel Compression with 0 workers                 158            160           2          0.0     1234257.6       1.0X
+Parallel Compression with 1 workers                 193            194           1          0.0     1507686.4       0.8X
+Parallel Compression with 2 workers                 113            127          11          0.0      881068.0       1.4X
+Parallel Compression with 4 workers                 109            111           2          0.0      849241.3       1.5X
+Parallel Compression with 8 workers                 111            115           3          0.0      869455.2       1.4X
+Parallel Compression with 16 workers                113            116           2          0.0      881832.5       1.4X
 
 
diff --git a/core/benchmarks/ZStandardBenchmark-results.txt b/core/benchmarks/ZStandardBenchmark-results.txt
index 7093636c4bc2b..136f0333590cc 100644
--- a/core/benchmarks/ZStandardBenchmark-results.txt
+++ b/core/benchmarks/ZStandardBenchmark-results.txt
@@ -2,48 +2,48 @@
 Benchmark ZStandardCompressionCodec
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Compression 10000 times at level 1 without buffer pool            259            260           1          0.0       25854.0       1.0X
-Compression 10000 times at level 2 without buffer pool            678            678           1          0.0       67756.2       0.4X
-Compression 10000 times at level 3 without buffer pool            775            775           1          0.0       77452.6       0.3X
-Compression 10000 times at level 1 with buffer pool               572            573           1          0.0       57218.3       0.5X
-Compression 10000 times at level 2 with buffer pool               600            602           1          0.0       60041.6       0.4X
-Compression 10000 times at level 3 with buffer pool               710            711           2          0.0       70967.2       0.4X
+Compression 10000 times at level 1 without buffer pool            257            259           2          0.0       25704.2       1.0X
+Compression 10000 times at level 2 without buffer pool            674            676           2          0.0       67396.3       0.4X
+Compression 10000 times at level 3 without buffer pool            775            787          11          0.0       77497.9       0.3X
+Compression 10000 times at level 1 with buffer pool               573            574           0          0.0       57347.3       0.4X
+Compression 10000 times at level 2 with buffer pool               602            603           2          0.0       60162.8       0.4X
+Compression 10000 times at level 3 with buffer pool               722            725           3          0.0       72247.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark ZStandardCompressionCodec:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------
-Decompression 10000 times from level 1 without buffer pool            588            588           1          0.0       58764.1       1.0X
-Decompression 10000 times from level 2 without buffer pool            589            595          10          0.0       58919.5       1.0X
-Decompression 10000 times from level 3 without buffer pool            591            597          12          0.0       59059.2       1.0X
-Decompression 10000 times from level 1 with buffer pool               520            521           1          0.0       52039.6       1.1X
-Decompression 10000 times from level 2 with buffer pool               521            522           1          0.0       52116.4       1.1X
-Decompression 10000 times from level 3 with buffer pool               521            521           0          0.0       52100.6       1.1X
+Decompression 10000 times from level 1 without buffer pool            176            177           1          0.1       17641.2       1.0X
+Decompression 10000 times from level 2 without buffer pool            176            178           1          0.1       17628.9       1.0X
+Decompression 10000 times from level 3 without buffer pool            175            176           0          0.1       17506.1       1.0X
+Decompression 10000 times from level 1 with buffer pool               151            152           1          0.1       15051.5       1.2X
+Decompression 10000 times from level 2 with buffer pool               150            151           1          0.1       14998.0       1.2X
+Decompression 10000 times from level 3 with buffer pool               150            151           0          0.1       15019.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 3:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                  46             48           1          0.0      362926.1       1.0X
-Parallel Compression with 1 workers                  34             36           4          0.0      265302.5       1.4X
-Parallel Compression with 2 workers                  32             36           1          0.0      252423.6       1.4X
-Parallel Compression with 4 workers                  36             38           2          0.0      282974.7       1.3X
-Parallel Compression with 8 workers                  38             40           1          0.0      298633.6       1.2X
-Parallel Compression with 16 workers                 42             44           1          0.0      329766.4       1.1X
+Parallel Compression with 0 workers                  57             57           0          0.0      444425.2       1.0X
+Parallel Compression with 1 workers                  42             44           3          0.0      325107.6       1.4X
+Parallel Compression with 2 workers                  38             39           2          0.0      294840.0       1.5X
+Parallel Compression with 4 workers                  36             37           1          0.0      282143.1       1.6X
+Parallel Compression with 8 workers                  39             40           1          0.0      303793.6       1.5X
+Parallel Compression with 16 workers                 41             43           1          0.0      324165.5       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.8.0-1014-azure
 AMD EPYC 7763 64-Core Processor
 Parallel Compression at level 9:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parallel Compression with 0 workers                 155            156           1          0.0     1212983.5       1.0X
-Parallel Compression with 1 workers                 191            195           7          0.0     1492776.4       0.8X
-Parallel Compression with 2 workers                 110            118           7          0.0      861496.4       1.4X
-Parallel Compression with 4 workers                 105            108           2          0.0      821249.1       1.5X
-Parallel Compression with 8 workers                 110            114           3          0.0      856234.4       1.4X
-Parallel Compression with 16 workers                110            113           2          0.0      855949.6       1.4X
+Parallel Compression with 0 workers                 156            158           1          0.0     1220298.8       1.0X
+Parallel Compression with 1 workers                 188            189           1          0.0     1467911.4       0.8X
+Parallel Compression with 2 workers                 111            118           7          0.0      866985.2       1.4X
+Parallel Compression with 4 workers                 106            109           2          0.0      827592.1       1.5X
+Parallel Compression with 8 workers                 114            116           2          0.0      888419.5       1.4X
+Parallel Compression with 16 workers                111            115           2          0.0      868463.5       1.4X
 
 
diff --git a/core/pom.xml b/core/pom.xml
index adb1b3034b427..7805a3f37ae53 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -118,6 +118,18 @@
       <groupId>org.apache.zookeeper</groupId>
       <artifactId>zookeeper</artifactId>
     </dependency>
+    <dependency>
+      <groupId>io.jsonwebtoken</groupId>
+      <artifactId>jjwt-api</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>io.jsonwebtoken</groupId>
+      <artifactId>jjwt-impl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>io.jsonwebtoken</groupId>
+      <artifactId>jjwt-jackson</artifactId>
+    </dependency>
 
     <!-- Jetty dependencies promoted to compile here so they are shaded
          and inlined into spark-core jar -->
@@ -381,6 +393,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
       <artifactId>scalacheck_${scala.binary.version}</artifactId>
@@ -546,6 +568,7 @@
               <include>org.eclipse.jetty:jetty-util</include>
               <include>org.eclipse.jetty:jetty-server</include>
               <include>com.google.guava:guava</include>
+              <include>com.google.guava:failureaccess</include>
               <include>com.google.protobuf:*</include>
               <!-- While shading new dependencies, please also update LICENSE/NOTICE files in the
                 core/src/main/resources/META-INF if necessary -->
@@ -599,6 +622,12 @@
         <script.extension>.sh</script.extension>
       </properties>
     </profile>
+    <profile>
+      <id>jjwt</id>
+      <properties>
+        <jjwt.deps.scope>compile</jjwt.deps.scope>
+      </properties>
+    </profile>
     <profile>
       <id>sparkr</id>
       <build>
diff --git a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
index 4e251a1c2901b..412d612c7f1d5 100644
--- a/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/NioBufferedFileInputStream.java
@@ -17,6 +17,7 @@
 package org.apache.spark.io;
 
 import org.apache.spark.storage.StorageUtils;
+import org.apache.spark.unsafe.Platform;
 
 import java.io.File;
 import java.io.IOException;
@@ -47,7 +48,7 @@ public final class NioBufferedFileInputStream extends InputStream {
   private final FileChannel fileChannel;
 
   public NioBufferedFileInputStream(File file, int bufferSizeInBytes) throws IOException {
-    byteBuffer = ByteBuffer.allocateDirect(bufferSizeInBytes);
+    byteBuffer = Platform.allocateDirectBuffer(bufferSizeInBytes);
     fileChannel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
     byteBuffer.flip();
     this.cleanable = CLEANER.register(this, new ResourceCleaner(fileChannel, byteBuffer));
diff --git a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
index 5e9f1b78273a5..7dd87df713e6e 100644
--- a/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
+++ b/core/src/main/java/org/apache/spark/io/ReadAheadInputStream.java
@@ -120,7 +120,8 @@ private boolean isEndOfStream() {
 
   private void checkReadException() throws IOException {
     if (readAborted) {
-      Throwables.propagateIfPossible(readException, IOException.class);
+      Throwables.throwIfInstanceOf(readException, IOException.class);
+      Throwables.throwIfUnchecked(readException);
       throw new IOException(readException);
     }
   }
diff --git a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
index 8ec5c2221b6e9..0e35ebecfd270 100644
--- a/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
+++ b/core/src/main/java/org/apache/spark/memory/SparkOutOfMemoryError.java
@@ -32,14 +32,6 @@ public final class SparkOutOfMemoryError extends OutOfMemoryError implements Spa
     String errorClass;
     Map<String, String> messageParameters;
 
-    public SparkOutOfMemoryError(String s) {
-        super(s);
-    }
-
-    public SparkOutOfMemoryError(OutOfMemoryError e) {
-        super(e.getMessage());
-    }
-
     public SparkOutOfMemoryError(String errorClass, Map<String, String> messageParameters) {
         super(SparkThrowableHelper.getMessage(errorClass, messageParameters));
         this.errorClass = errorClass;
@@ -52,7 +44,7 @@ public Map<String, String> getMessageParameters() {
     }
 
     @Override
-    public String getErrorClass() {
+    public String getCondition() {
         return errorClass;
     }
 }
diff --git a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
index fe798e40a6ad7..bd9f58bf7415f 100644
--- a/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
+++ b/core/src/main/java/org/apache/spark/memory/TaskMemoryManager.java
@@ -21,13 +21,7 @@
 import java.io.InterruptedIOException;
 import java.io.IOException;
 import java.nio.channels.ClosedByInterruptException;
-import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.BitSet;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import java.util.*;
 
 import com.google.common.annotations.VisibleForTesting;
 
@@ -122,6 +116,30 @@ public class TaskMemoryManager {
    */
   private volatile long acquiredButNotUsed = 0L;
 
+  /**
+   * Current off heap memory usage by this task.
+   */
+  private long currentOffHeapMemory = 0L;
+
+  private final Object offHeapMemoryLock = new Object();
+
+  /*
+   * Current on heap memory usage by this task.
+   */
+  private long currentOnHeapMemory = 0L;
+
+  private final Object onHeapMemoryLock = new Object();
+
+  /**
+   * Peak off heap memory usage by this task.
+   */
+  private volatile long peakOffHeapMemory = 0L;
+
+  /**
+   * Peak on heap memory usage by this task.
+   */
+  private volatile long peakOnHeapMemory = 0L;
+
   /**
    * Construct a new TaskMemoryManager.
    */
@@ -202,6 +220,19 @@ public long acquireExecutionMemory(long required, MemoryConsumer requestingConsu
         logger.debug("Task {} acquired {} for {}", taskAttemptId, Utils.bytesToString(got),
           requestingConsumer);
       }
+
+      if (mode == MemoryMode.OFF_HEAP) {
+        synchronized (offHeapMemoryLock) {
+          currentOffHeapMemory += got;
+          peakOffHeapMemory = Math.max(peakOffHeapMemory, currentOffHeapMemory);
+        }
+      } else {
+        synchronized (onHeapMemoryLock) {
+          currentOnHeapMemory += got;
+          peakOnHeapMemory = Math.max(peakOnHeapMemory, currentOnHeapMemory);
+        }
+      }
+
       return got;
     }
   }
@@ -254,8 +285,12 @@ private long trySpillAndAcquire(
       logger.error("error while calling spill() on {}", e,
         MDC.of(LogKeys.MEMORY_CONSUMER$.MODULE$, consumerToSpill));
       // checkstyle.off: RegexpSinglelineJava
-      throw new SparkOutOfMemoryError("error while calling spill() on " + consumerToSpill + " : "
-        + e.getMessage());
+      throw new SparkOutOfMemoryError(
+        "_LEGACY_ERROR_TEMP_3300",
+        new HashMap<String, String>() {{
+          put("consumerToSpill", consumerToSpill.toString());
+          put("message", e.getMessage());
+        }});
       // checkstyle.on: RegexpSinglelineJava
     }
   }
@@ -269,6 +304,15 @@ public void releaseExecutionMemory(long size, MemoryConsumer consumer) {
         consumer);
     }
     memoryManager.releaseExecutionMemory(size, taskAttemptId, consumer.getMode());
+    if (consumer.getMode() == MemoryMode.OFF_HEAP) {
+      synchronized (offHeapMemoryLock) {
+        currentOffHeapMemory -= size;
+      }
+    } else {
+      synchronized (onHeapMemoryLock) {
+        currentOnHeapMemory -= size;
+      }
+    }
   }
 
   /**
@@ -507,4 +551,19 @@ public long getMemoryConsumptionForThisTask() {
   public MemoryMode getTungstenMemoryMode() {
     return tungstenMemoryMode;
   }
+
+  /**
+   * Returns peak task-level off-heap memory usage in bytes.
+   *
+   */
+  public long getPeakOnHeapExecutionMemory() {
+    return peakOnHeapMemory;
+  }
+
+  /**
+   * Returns peak task-level on-heap memory usage in bytes.
+   */
+  public long getPeakOffHeapExecutionMemory() {
+    return peakOffHeapMemory;
+  }
 }
diff --git a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
index 13fd18c0942b1..ac9d335d63591 100644
--- a/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
+++ b/core/src/main/java/org/apache/spark/shuffle/sort/UnsafeShuffleWriter.java
@@ -87,7 +87,7 @@ public class UnsafeShuffleWriter<K, V> extends ShuffleWriter<K, V> {
   private final SparkConf sparkConf;
   private final boolean transferToEnabled;
   private final int initialSortBufferSize;
-  private final int inputBufferSizeInBytes;
+  private final int mergeBufferSizeInBytes;
 
   @Nullable private MapStatus mapStatus;
   @Nullable private ShuffleExternalSorter sorter;
@@ -140,8 +140,8 @@ public UnsafeShuffleWriter(
     this.transferToEnabled = (boolean) sparkConf.get(package$.MODULE$.SHUFFLE_MERGE_PREFER_NIO());
     this.initialSortBufferSize =
       (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_SORT_INIT_BUFFER_SIZE());
-    this.inputBufferSizeInBytes =
-      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_BUFFER_SIZE()) * 1024;
+    this.mergeBufferSizeInBytes =
+      (int) (long) sparkConf.get(package$.MODULE$.SHUFFLE_FILE_MERGE_BUFFER_SIZE()) * 1024;
     open();
   }
 
@@ -372,7 +372,7 @@ private void mergeSpillsWithFileStream(
       for (int i = 0; i < spills.length; i++) {
         spillInputStreams[i] = new NioBufferedFileInputStream(
           spills[i].file,
-          inputBufferSizeInBytes);
+          mergeBufferSizeInBytes);
         // Only convert the partitionLengths when debug level is enabled.
         if (logger.isDebugEnabled()) {
           logger.debug("Partition lengths for mapId {} in Spill {}: {}", mapId, i,
diff --git a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
index 7579c0aefb250..761ced66f78cf 100644
--- a/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
+++ b/core/src/main/java/org/apache/spark/util/collection/unsafe/sort/UnsafeInMemorySorter.java
@@ -18,6 +18,7 @@
 package org.apache.spark.util.collection.unsafe.sort;
 
 import java.util.Comparator;
+import java.util.HashMap;
 import java.util.LinkedList;
 
 import javax.annotation.Nullable;
@@ -215,7 +216,7 @@ public void expandPointerArray(LongArray newArray) {
     if (array != null) {
       if (newArray.size() < array.size()) {
         // checkstyle.off: RegexpSinglelineJava
-        throw new SparkOutOfMemoryError("Not enough memory to grow pointer array");
+        throw new SparkOutOfMemoryError("_LEGACY_ERROR_TEMP_3301", new HashMap());
         // checkstyle.on: RegexpSinglelineJava
       }
       Platform.copyMemory(
diff --git a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.css b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.css
deleted file mode 100644
index 6db36f6e75d39..0000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.css
+++ /dev/null
@@ -1 +0,0 @@
-:root{--dt-row-selected: 2, 117, 216;--dt-row-selected-text: 255, 255, 255;--dt-row-selected-link: 9, 10, 11;--dt-row-stripe: 0, 0, 0;--dt-row-hover: 0, 0, 0;--dt-column-ordering: 0, 0, 0;--dt-html-background: white}:root.dark{--dt-html-background: rgb(33, 37, 41)}table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{display:inline-block;color:rgba(0, 0, 0, 0.5);content:"►"}table.dataTable tr.dt-hasChild td.dt-control:before{content:"▼"}html.dark table.dataTable td.dt-control:before{color:rgba(255, 255, 255, 0.5)}html.dark table.dataTable tr.dt-hasChild td.dt-control:before{color:rgba(255, 255, 255, 0.5)}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-right:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;right:10px;line-height:9px;font-size:.8em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▲";content:"▲"/""}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▼";content:"▼"/""}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody>table.dataTable>thead>tr>th:before,div.dataTables_scrollBody>table.dataTable>thead>tr>th:after,div.dataTables_scrollBody>table.dataTable>thead>tr>td:before,div.dataTables_scrollBody>table.dataTable>thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgb(2, 117, 216);background:rgb(var(--dt-row-selected));animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{clear:both;margin-top:6px !important;margin-bottom:6px !important;max-width:none !important;border-collapse:separate !important;border-spacing:0}table.dataTable td,table.dataTable th{-webkit-box-sizing:content-box;box-sizing:content-box}table.dataTable td.dataTables_empty,table.dataTable th.dataTables_empty{text-align:center}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.table-striped>tbody>tr:nth-of-type(2n+1){background-color:transparent}table.dataTable>tbody>tr{background-color:transparent}table.dataTable>tbody>tr.selected>*{box-shadow:inset 0 0 0 9999px rgb(2, 117, 216);box-shadow:inset 0 0 0 9999px rgb(var(--dt-row-selected));color:rgb(255, 255, 255);color:rgb(var(--dt-row-selected-text))}table.dataTable>tbody>tr.selected a{color:rgb(9, 10, 11);color:rgb(var(--dt-row-selected-link))}table.dataTable.table-striped>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-stripe), 0.05)}table.dataTable.table-striped>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(2, 117, 216, 0.95);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.95)}table.dataTable.table-hover>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.075)}table.dataTable.table-hover>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px rgba(2, 117, 216, 0.975);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.975)}div.dataTables_wrapper div.dataTables_length label{font-weight:normal;text-align:left;white-space:nowrap}div.dataTables_wrapper div.dataTables_length select{width:auto;display:inline-block}div.dataTables_wrapper div.dataTables_filter{text-align:right}div.dataTables_wrapper div.dataTables_filter label{font-weight:normal;white-space:nowrap;text-align:left}div.dataTables_wrapper div.dataTables_filter input{margin-left:.5em;display:inline-block;width:auto}div.dataTables_wrapper div.dataTables_info{padding-top:.85em}div.dataTables_wrapper div.dataTables_paginate{margin:0;white-space:nowrap;text-align:right}div.dataTables_wrapper div.dataTables_paginate ul.pagination{margin:2px 0;white-space:nowrap;justify-content:flex-end}div.dataTables_wrapper div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:1em 0}div.dataTables_scrollHead table.dataTable{margin-bottom:0 !important}div.dataTables_scrollBody>table{border-top:none;margin-top:0 !important;margin-bottom:0 !important}div.dataTables_scrollBody>table>thead .sorting:before,div.dataTables_scrollBody>table>thead .sorting_asc:before,div.dataTables_scrollBody>table>thead .sorting_desc:before,div.dataTables_scrollBody>table>thead .sorting:after,div.dataTables_scrollBody>table>thead .sorting_asc:after,div.dataTables_scrollBody>table>thead .sorting_desc:after{display:none}div.dataTables_scrollBody>table>tbody tr:first-child th,div.dataTables_scrollBody>table>tbody tr:first-child td{border-top:none}div.dataTables_scrollFoot>.dataTables_scrollFootInner{box-sizing:content-box}div.dataTables_scrollFoot>.dataTables_scrollFootInner>table{margin-top:0 !important;border-top:none}@media screen and (max-width: 767px){div.dataTables_wrapper div.dataTables_length,div.dataTables_wrapper div.dataTables_filter,div.dataTables_wrapper div.dataTables_info,div.dataTables_wrapper div.dataTables_paginate{text-align:center}div.dataTables_wrapper div.dataTables_paginate ul.pagination{justify-content:center !important}}table.dataTable.table-sm>thead>tr>th:not(.sorting_disabled){padding-right:20px}table.table-bordered.dataTable{border-right-width:0}table.table-bordered.dataTable th,table.table-bordered.dataTable td{border-left-width:0}table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable td:last-child,table.table-bordered.dataTable td:last-child{border-right-width:1px}table.table-bordered.dataTable tbody th,table.table-bordered.dataTable tbody td{border-bottom-width:0}div.dataTables_scrollHead table.table-bordered{border-bottom-width:0}div.table-responsive>div.dataTables_wrapper>div.row{margin:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^=col-]:first-child{padding-left:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^=col-]:last-child{padding-right:0}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.css b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.css
new file mode 100644
index 0000000000000..d344f78a39748
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.css
@@ -0,0 +1 @@
+:root{--dt-row-selected: 2, 117, 216;--dt-row-selected-text: 255, 255, 255;--dt-row-selected-link: 9, 10, 11;--dt-row-stripe: 0, 0, 0;--dt-row-hover: 0, 0, 0;--dt-column-ordering: 0, 0, 0;--dt-html-background: white}:root.dark{--dt-html-background: rgb(33, 37, 41)}table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{display:inline-block;color:rgba(0, 0, 0, 0.5);content:"▶"}table.dataTable tr.dt-hasChild td.dt-control:before{content:"▼"}html.dark table.dataTable td.dt-control:before,:root[data-bs-theme=dark] table.dataTable td.dt-control:before{color:rgba(255, 255, 255, 0.5)}html.dark table.dataTable tr.dt-hasChild td.dt-control:before,:root[data-bs-theme=dark] table.dataTable tr.dt-hasChild td.dt-control:before{color:rgba(255, 255, 255, 0.5)}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-right:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;right:10px;line-height:9px;font-size:.8em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▲";content:"▲"/""}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▼";content:"▼"/""}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody>table.dataTable>thead>tr>th:before,div.dataTables_scrollBody>table.dataTable>thead>tr>th:after,div.dataTables_scrollBody>table.dataTable>thead>tr>td:before,div.dataTables_scrollBody>table.dataTable>thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px;z-index:10}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgb(2, 117, 216);background:rgb(var(--dt-row-selected));animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{clear:both;margin-top:6px !important;margin-bottom:6px !important;max-width:none !important;border-collapse:separate !important;border-spacing:0}table.dataTable td,table.dataTable th{-webkit-box-sizing:content-box;box-sizing:content-box}table.dataTable td.dataTables_empty,table.dataTable th.dataTables_empty{text-align:center}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable.table-striped>tbody>tr:nth-of-type(2n+1){background-color:transparent}table.dataTable>tbody>tr{background-color:transparent}table.dataTable>tbody>tr.selected>*{box-shadow:inset 0 0 0 9999px rgb(2, 117, 216);box-shadow:inset 0 0 0 9999px rgb(var(--dt-row-selected));color:rgb(255, 255, 255);color:rgb(var(--dt-row-selected-text))}table.dataTable>tbody>tr.selected a{color:rgb(9, 10, 11);color:rgb(var(--dt-row-selected-link))}table.dataTable.table-striped>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-stripe), 0.05)}table.dataTable.table-striped>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(2, 117, 216, 0.95);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.95)}table.dataTable.table-hover>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.075)}table.dataTable.table-hover>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px rgba(2, 117, 216, 0.975);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.975)}div.dataTables_wrapper div.dataTables_length label{font-weight:normal;text-align:left;white-space:nowrap}div.dataTables_wrapper div.dataTables_length select{width:auto;display:inline-block}div.dataTables_wrapper div.dataTables_filter{text-align:right}div.dataTables_wrapper div.dataTables_filter label{font-weight:normal;white-space:nowrap;text-align:left}div.dataTables_wrapper div.dataTables_filter input{margin-left:.5em;display:inline-block;width:auto}div.dataTables_wrapper div.dataTables_info{padding-top:.85em}div.dataTables_wrapper div.dataTables_paginate{margin:0;white-space:nowrap;text-align:right}div.dataTables_wrapper div.dataTables_paginate ul.pagination{margin:2px 0;white-space:nowrap;justify-content:flex-end}div.dataTables_wrapper div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:1em 0}div.dataTables_scrollHead table.dataTable{margin-bottom:0 !important}div.dataTables_scrollBody>table{border-top:none;margin-top:0 !important;margin-bottom:0 !important}div.dataTables_scrollBody>table>thead .sorting:before,div.dataTables_scrollBody>table>thead .sorting_asc:before,div.dataTables_scrollBody>table>thead .sorting_desc:before,div.dataTables_scrollBody>table>thead .sorting:after,div.dataTables_scrollBody>table>thead .sorting_asc:after,div.dataTables_scrollBody>table>thead .sorting_desc:after{display:none}div.dataTables_scrollBody>table>tbody tr:first-child th,div.dataTables_scrollBody>table>tbody tr:first-child td{border-top:none}div.dataTables_scrollFoot>.dataTables_scrollFootInner{box-sizing:content-box}div.dataTables_scrollFoot>.dataTables_scrollFootInner>table{margin-top:0 !important;border-top:none}@media screen and (max-width: 767px){div.dataTables_wrapper div.dataTables_length,div.dataTables_wrapper div.dataTables_filter,div.dataTables_wrapper div.dataTables_info,div.dataTables_wrapper div.dataTables_paginate{text-align:center}div.dataTables_wrapper div.dataTables_paginate ul.pagination{justify-content:center !important}}table.dataTable.table-sm>thead>tr>th:not(.sorting_disabled){padding-right:20px}table.table-bordered.dataTable{border-right-width:0}table.table-bordered.dataTable th,table.table-bordered.dataTable td{border-left-width:0}table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable th:last-child,table.table-bordered.dataTable td:last-child,table.table-bordered.dataTable td:last-child{border-right-width:1px}table.table-bordered.dataTable tbody th,table.table-bordered.dataTable tbody td{border-bottom-width:0}div.dataTables_scrollHead table.table-bordered{border-bottom-width:0}div.table-responsive>div.dataTables_wrapper>div.row{margin:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^=col-]:first-child{padding-left:0}div.table-responsive>div.dataTables_wrapper>div.row>div[class^=col-]:last-child{padding-right:0}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.js b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.js
similarity index 83%
rename from core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.js
rename to core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.js
index 04de9c97cc514..c99016713ab1f 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.1.13.5.min.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/dataTables.bootstrap4.min.js
@@ -1,4 +1,4 @@
 /*! DataTables Bootstrap 4 integration
  * ©2011-2017 SpryMedia Ltd - datatables.net/license
  */
-!function(t){var n,o;"function"==typeof define&&define.amd?define(["jquery","datatables.net"],function(e){return t(e,window,document)}):"object"==typeof exports?(n=require("jquery"),o=function(e,a){a.fn.dataTable||require("datatables.net")(e,a)},"undefined"==typeof window?module.exports=function(e,a){return e=e||window,a=a||n(e),o(e,a),t(a,0,e.document)}:(o(window,n),module.exports=t(n,window,window.document))):t(jQuery,window,document)}(function(x,e,n,o){"use strict";var r=x.fn.dataTable;return x.extend(!0,r.defaults,{dom:"<'row'<'col-sm-12 col-md-6'l><'col-sm-12 col-md-6'f>><'row'<'col-sm-12'tr>><'row'<'col-sm-12 col-md-5'i><'col-sm-12 col-md-7'p>>",renderer:"bootstrap"}),x.extend(r.ext.classes,{sWrapper:"dataTables_wrapper dt-bootstrap4",sFilterInput:"form-control form-control-sm",sLengthSelect:"custom-select custom-select-sm form-control form-control-sm",sProcessing:"dataTables_processing card",sPageButton:"paginate_button page-item"}),r.ext.renderer.pageButton.bootstrap=function(i,e,d,a,l,c){function u(e,a){for(var t,n,o=function(e){e.preventDefault(),x(e.currentTarget).hasClass("disabled")||m.page()==e.data.action||m.page(e.data.action).draw("page")},r=0,s=a.length;r<s;r++)if(t=a[r],Array.isArray(t))u(e,t);else{switch(f=p="",t){case"ellipsis":p="&#x2026;",f="disabled";break;case"first":p=g.sFirst,f=t+(0<l?"":" disabled");break;case"previous":p=g.sPrevious,f=t+(0<l?"":" disabled");break;case"next":p=g.sNext,f=t+(l<c-1?"":" disabled");break;case"last":p=g.sLast,f=t+(l<c-1?"":" disabled");break;default:p=t+1,f=l===t?"active":""}p&&(n=-1!==f.indexOf("disabled"),n=x("<li>",{class:b.sPageButton+" "+f,id:0===d&&"string"==typeof t?i.sTableId+"_"+t:null}).append(x("<a>",{href:n?null:"#","aria-controls":i.sTableId,"aria-disabled":n?"true":null,"aria-label":w[t],role:"link","aria-current":"active"===f?"page":null,"data-dt-idx":t,tabindex:i.iTabIndex,class:"page-link"}).html(p)).appendTo(e),i.oApi._fnBindAction(n,{action:t},o))}}var p,f,t,m=new r.Api(i),b=i.oClasses,g=i.oLanguage.oPaginate,w=i.oLanguage.oAria.paginate||{};try{t=x(e).find(n.activeElement).data("dt-idx")}catch(e){}u(x(e).empty().html('<ul class="pagination"/>').children("ul"),a),t!==o&&x(e).find("[data-dt-idx="+t+"]").trigger("focus")},r});
\ No newline at end of file
+!function(t){var n,o;"function"==typeof define&&define.amd?define(["jquery","datatables.net"],function(e){return t(e,window,document)}):"object"==typeof exports?(n=require("jquery"),o=function(e,a){a.fn.dataTable||require("datatables.net")(e,a)},"undefined"==typeof window?module.exports=function(e,a){return e=e||window,a=a||n(e),o(e,a),t(a,0,e.document)}:(o(window,n),module.exports=t(n,window,window.document))):t(jQuery,window,document)}(function(x,e,n,o){"use strict";var r=x.fn.dataTable;return x.extend(!0,r.defaults,{dom:"<'row'<'col-sm-12 col-md-6'l><'col-sm-12 col-md-6'f>><'row'<'col-sm-12'tr>><'row'<'col-sm-12 col-md-5'i><'col-sm-12 col-md-7'p>>",renderer:"bootstrap"}),x.extend(r.ext.classes,{sWrapper:"dataTables_wrapper dt-bootstrap4",sFilterInput:"form-control form-control-sm",sLengthSelect:"custom-select custom-select-sm form-control form-control-sm",sProcessing:"dataTables_processing card",sPageButton:"paginate_button page-item"}),r.ext.renderer.pageButton.bootstrap=function(i,e,d,a,l,c){function u(e,a){for(var t,n,o=function(e){e.preventDefault(),x(e.currentTarget).hasClass("disabled")||m.page()==e.data.action||m.page(e.data.action).draw("page")},r=0,s=a.length;r<s;r++)if(t=a[r],Array.isArray(t))u(e,t);else{switch(f=p="",t){case"ellipsis":p="&#x2026;",f="disabled";break;case"first":p=g.sFirst,f=t+(0<l?"":" disabled");break;case"previous":p=g.sPrevious,f=t+(0<l?"":" disabled");break;case"next":p=g.sNext,f=t+(l<c-1?"":" disabled");break;case"last":p=g.sLast,f=t+(l<c-1?"":" disabled");break;default:p=t+1,f=l===t?"active":""}p&&(n=-1!==f.indexOf("disabled"),n=x("<li>",{class:b.sPageButton+" "+f,id:0===d&&"string"==typeof t?i.sTableId+"_"+t:null}).append(x("<a>",{href:n?null:"#","aria-controls":i.sTableId,"aria-disabled":n?"true":null,"aria-label":w[t],role:"link","aria-current":"active"===f?"page":null,"data-dt-idx":t,tabindex:n?-1:i.iTabIndex,class:"page-link"}).html(p)).appendTo(e),i.oApi._fnBindAction(n,{action:t},o))}}var p,f,t,m=new r.Api(i),b=i.oClasses,g=i.oLanguage.oPaginate,w=i.oLanguage.oAria.paginate||{};try{t=x(e).find(n.activeElement).data("dt-idx")}catch(e){}u(x(e).empty().html('<ul class="pagination"/>').children("ul"),a),t!==o&&x(e).find("[data-dt-idx="+t+"]").trigger("focus")},r});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.13.5.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.13.5.min.js
deleted file mode 100644
index e71f4cd8ec92a..0000000000000
--- a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.1.13.5.min.js
+++ /dev/null
@@ -1,4 +0,0 @@
-/*! DataTables 1.13.5
- * ©2008-2023 SpryMedia Ltd - datatables.net/license
- */
-!function(n){"use strict";var a;"function"==typeof define&&define.amd?define(["jquery"],function(t){return n(t,window,document)}):"object"==typeof exports?(a=require("jquery"),"undefined"!=typeof window?module.exports=function(t,e){return t=t||window,e=e||a(t),n(e,t,t.document)}:n(a,window,window.document)):window.DataTable=n(jQuery,window,document)}(function(P,j,y,H){"use strict";function d(t){var e=parseInt(t,10);return!isNaN(e)&&isFinite(t)?e:null}function l(t,e,n){var a=typeof t,r="string"==a;return"number"==a||"bigint"==a||!!h(t)||(e&&r&&(t=$(t,e)),n&&r&&(t=t.replace(q,"")),!isNaN(parseFloat(t))&&isFinite(t))}function a(t,e,n){var a;return!!h(t)||(h(a=t)||"string"==typeof a)&&!!l(t.replace(V,"").replace(/<script/i,""),e,n)||null}function m(t,e,n,a){var r=[],o=0,i=e.length;if(a!==H)for(;o<i;o++)t[e[o]][n]&&r.push(t[e[o]][n][a]);else for(;o<i;o++)r.push(t[e[o]][n]);return r}function f(t,e){var n,a=[];e===H?(e=0,n=t):(n=e,e=t);for(var r=e;r<n;r++)a.push(r);return a}function _(t){for(var e=[],n=0,a=t.length;n<a;n++)t[n]&&e.push(t[n]);return e}function s(t,e){return-1!==this.indexOf(t,e=e===H?0:e)}var p,e,t,w=function(t,v){if(w.factory(t,v))return w;if(this instanceof w)return P(t).DataTable(v);v=t,this.$=function(t,e){return this.api(!0).$(t,e)},this._=function(t,e){return this.api(!0).rows(t,e).data()},this.api=function(t){return new B(t?ge(this[p.iApiIndex]):this)},this.fnAddData=function(t,e){var n=this.api(!0),t=(Array.isArray(t)&&(Array.isArray(t[0])||P.isPlainObject(t[0]))?n.rows:n.row).add(t);return e!==H&&!e||n.draw(),t.flatten().toArray()},this.fnAdjustColumnSizing=function(t){var e=this.api(!0).columns.adjust(),n=e.settings()[0],a=n.oScroll;t===H||t?e.draw(!1):""===a.sX&&""===a.sY||Qt(n)},this.fnClearTable=function(t){var e=this.api(!0).clear();t!==H&&!t||e.draw()},this.fnClose=function(t){this.api(!0).row(t).child.hide()},this.fnDeleteRow=function(t,e,n){var a=this.api(!0),t=a.rows(t),r=t.settings()[0],o=r.aoData[t[0][0]];return t.remove(),e&&e.call(this,r,o),n!==H&&!n||a.draw(),o},this.fnDestroy=function(t){this.api(!0).destroy(t)},this.fnDraw=function(t){this.api(!0).draw(t)},this.fnFilter=function(t,e,n,a,r,o){var i=this.api(!0);(null===e||e===H?i:i.column(e)).search(t,n,a,o),i.draw()},this.fnGetData=function(t,e){var n,a=this.api(!0);return t!==H?(n=t.nodeName?t.nodeName.toLowerCase():"",e!==H||"td"==n||"th"==n?a.cell(t,e).data():a.row(t).data()||null):a.data().toArray()},this.fnGetNodes=function(t){var e=this.api(!0);return t!==H?e.row(t).node():e.rows().nodes().flatten().toArray()},this.fnGetPosition=function(t){var e=this.api(!0),n=t.nodeName.toUpperCase();return"TR"==n?e.row(t).index():"TD"==n||"TH"==n?[(n=e.cell(t).index()).row,n.columnVisible,n.column]:null},this.fnIsOpen=function(t){return this.api(!0).row(t).child.isShown()},this.fnOpen=function(t,e,n){return this.api(!0).row(t).child(e,n).show().child()[0]},this.fnPageChange=function(t,e){t=this.api(!0).page(t);e!==H&&!e||t.draw(!1)},this.fnSetColumnVis=function(t,e,n){t=this.api(!0).column(t).visible(e);n!==H&&!n||t.columns.adjust().draw()},this.fnSettings=function(){return ge(this[p.iApiIndex])},this.fnSort=function(t){this.api(!0).order(t).draw()},this.fnSortListener=function(t,e,n){this.api(!0).order.listener(t,e,n)},this.fnUpdate=function(t,e,n,a,r){var o=this.api(!0);return(n===H||null===n?o.row(e):o.cell(e,n)).data(t),r!==H&&!r||o.columns.adjust(),a!==H&&!a||o.draw(),0},this.fnVersionCheck=p.fnVersionCheck;var e,y=this,D=v===H,_=this.length;for(e in D&&(v={}),this.oApi=this.internal=p.internal,w.ext.internal)e&&(this[e]=$e(e));return this.each(function(){var r=1<_?be({},v,!0):v,o=0,t=this.getAttribute("id"),i=!1,e=w.defaults,l=P(this);if("table"!=this.nodeName.toLowerCase())W(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{K(e),Q(e.column),C(e,e,!0),C(e.column,e.column,!0),C(e,P.extend(r,l.data()),!0);for(var n=w.settings,o=0,s=n.length;o<s;o++){var a=n[o];if(a.nTable==this||a.nTHead&&a.nTHead.parentNode==this||a.nTFoot&&a.nTFoot.parentNode==this){var u=(r.bRetrieve!==H?r:e).bRetrieve,c=(r.bDestroy!==H?r:e).bDestroy;if(D||u)return a.oInstance;if(c){a.oInstance.fnDestroy();break}return void W(a,0,"Cannot reinitialise DataTable",3)}if(a.sTableId==this.id){n.splice(o,1);break}}null!==t&&""!==t||(t="DataTables_Table_"+w.ext._unique++,this.id=t);var f,d,h=P.extend(!0,{},w.models.oSettings,{sDestroyWidth:l[0].style.width,sInstance:t,sTableId:t}),p=(h.nTable=this,h.oApi=y.internal,h.oInit=r,n.push(h),h.oInstance=1===y.length?y:l.dataTable(),K(r),Z(r.oLanguage),r.aLengthMenu&&!r.iDisplayLength&&(r.iDisplayLength=(Array.isArray(r.aLengthMenu[0])?r.aLengthMenu[0]:r.aLengthMenu)[0]),r=be(P.extend(!0,{},e),r),F(h.oFeatures,r,["bPaginate","bLengthChange","bFilter","bSort","bSortMulti","bInfo","bProcessing","bAutoWidth","bSortClasses","bServerSide","bDeferRender"]),F(h,r,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod","aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"]]),F(h.oScroll,r,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]),F(h.oLanguage,r,"fnInfoCallback"),L(h,"aoDrawCallback",r.fnDrawCallback,"user"),L(h,"aoServerParams",r.fnServerParams,"user"),L(h,"aoStateSaveParams",r.fnStateSaveParams,"user"),L(h,"aoStateLoadParams",r.fnStateLoadParams,"user"),L(h,"aoStateLoaded",r.fnStateLoaded,"user"),L(h,"aoRowCallback",r.fnRowCallback,"user"),L(h,"aoRowCreatedCallback",r.fnCreatedRow,"user"),L(h,"aoHeaderCallback",r.fnHeaderCallback,"user"),L(h,"aoFooterCallback",r.fnFooterCallback,"user"),L(h,"aoInitComplete",r.fnInitComplete,"user"),L(h,"aoPreDrawCallback",r.fnPreDrawCallback,"user"),h.rowIdFn=A(r.rowId),tt(h),h.oClasses),g=(P.extend(p,w.ext.classes,r.oClasses),l.addClass(p.sTable),h.iInitDisplayStart===H&&(h.iInitDisplayStart=r.iDisplayStart,h._iDisplayStart=r.iDisplayStart),null!==r.iDeferLoading&&(h.bDeferLoading=!0,t=Array.isArray(r.iDeferLoading),h._iRecordsDisplay=t?r.iDeferLoading[0]:r.iDeferLoading,h._iRecordsTotal=t?r.iDeferLoading[1]:r.iDeferLoading),h.oLanguage),t=(P.extend(!0,g,r.oLanguage),g.sUrl?(P.ajax({dataType:"json",url:g.sUrl,success:function(t){C(e.oLanguage,t),Z(t),P.extend(!0,g,t,h.oInit.oLanguage),R(h,null,"i18n",[h]),Jt(h)},error:function(){Jt(h)}}),i=!0):R(h,null,"i18n",[h]),null===r.asStripeClasses&&(h.asStripeClasses=[p.sStripeOdd,p.sStripeEven]),h.asStripeClasses),b=l.children("tbody").find("tr").eq(0),m=(-1!==P.inArray(!0,P.map(t,function(t,e){return b.hasClass(t)}))&&(P("tbody tr",this).removeClass(t.join(" ")),h.asDestroyStripes=t.slice()),[]),t=this.getElementsByTagName("thead");if(0!==t.length&&(wt(h.aoHeader,t[0]),m=Ct(h)),null===r.aoColumns)for(f=[],o=0,s=m.length;o<s;o++)f.push(null);else f=r.aoColumns;for(o=0,s=f.length;o<s;o++)nt(h,m?m[o]:null);st(h,r.aoColumnDefs,f,function(t,e){at(h,t,e)}),b.length&&(d=function(t,e){return null!==t.getAttribute("data-"+e)?e:null},P(b[0]).children("th, td").each(function(t,e){var n,a=h.aoColumns[t];a||W(h,0,"Incorrect column count",18),a.mData===t&&(n=d(e,"sort")||d(e,"order"),e=d(e,"filter")||d(e,"search"),null===n&&null===e||(a.mData={_:t+".display",sort:null!==n?t+".@data-"+n:H,type:null!==n?t+".@data-"+n:H,filter:null!==e?t+".@data-"+e:H},a._isArrayHost=!0,at(h,t)))}));var S=h.oFeatures,t=function(){if(r.aaSorting===H){var t=h.aaSorting;for(o=0,s=t.length;o<s;o++)t[o][1]=h.aoColumns[o].asSorting[0]}ce(h),S.bSort&&L(h,"aoDrawCallback",function(){var t,n;h.bSorted&&(t=I(h),n={},P.each(t,function(t,e){n[e.src]=e.dir}),R(h,null,"order",[h,t,n]),le(h))}),L(h,"aoDrawCallback",function(){(h.bSorted||"ssp"===E(h)||S.bDeferRender)&&ce(h)},"sc");var e=l.children("caption").each(function(){this._captionSide=P(this).css("caption-side")}),n=l.children("thead"),a=(0===n.length&&(n=P("<thead/>").appendTo(l)),h.nTHead=n[0],l.children("tbody")),n=(0===a.length&&(a=P("<tbody/>").insertAfter(n)),h.nTBody=a[0],l.children("tfoot"));if(0===(n=0===n.length&&0<e.length&&(""!==h.oScroll.sX||""!==h.oScroll.sY)?P("<tfoot/>").appendTo(l):n).length||0===n.children().length?l.addClass(p.sNoFooter):0<n.length&&(h.nTFoot=n[0],wt(h.aoFooter,h.nTFoot)),r.aaData)for(o=0;o<r.aaData.length;o++)x(h,r.aaData[o]);else!h.bDeferLoading&&"dom"!=E(h)||ut(h,P(h.nTBody).children("tr"));h.aiDisplay=h.aiDisplayMaster.slice(),!(h.bInitialised=!0)===i&&Jt(h)};L(h,"aoDrawCallback",de,"state_save"),r.bStateSave?(S.bStateSave=!0,he(h,0,t)):t()}}),y=null,this},c={},U=/[\r\n\u2028]/g,V=/<.*?>/g,X=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,J=new RegExp("(\\"+["/",".","*","+","?","|","(",")","[","]","{","}","\\","$","^","-"].join("|\\")+")","g"),q=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,h=function(t){return!t||!0===t||"-"===t},$=function(t,e){return c[e]||(c[e]=new RegExp(Ot(e),"g")),"string"==typeof t&&"."!==e?t.replace(/\./g,"").replace(c[e],"."):t},N=function(t,e,n){var a=[],r=0,o=t.length;if(n!==H)for(;r<o;r++)t[r]&&t[r][e]&&a.push(t[r][e][n]);else for(;r<o;r++)t[r]&&a.push(t[r][e]);return a},G=function(t){if(!(t.length<2))for(var e=t.slice().sort(),n=e[0],a=1,r=e.length;a<r;a++){if(e[a]===n)return!1;n=e[a]}return!0},z=function(t){if(G(t))return t.slice();var e,n,a,r=[],o=t.length,i=0;t:for(n=0;n<o;n++){for(e=t[n],a=0;a<i;a++)if(r[a]===e)continue t;r.push(e),i++}return r},Y=function(t,e){if(Array.isArray(e))for(var n=0;n<e.length;n++)Y(t,e[n]);else t.push(e);return t};function i(n){var a,r,o={};P.each(n,function(t,e){(a=t.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(a[1]+" ")&&(r=t.replace(a[0],a[2].toLowerCase()),o[r]=t,"o"===a[1])&&i(n[t])}),n._hungarianMap=o}function C(n,a,r){var o;n._hungarianMap||i(n),P.each(a,function(t,e){(o=n._hungarianMap[t])===H||!r&&a[o]!==H||("o"===o.charAt(0)?(a[o]||(a[o]={}),P.extend(!0,a[o],a[t]),C(n[o],a[o],r)):a[o]=a[t])})}function Z(t){var e,n=w.defaults.oLanguage,a=n.sDecimal;a&&Me(a),t&&(e=t.sZeroRecords,!t.sEmptyTable&&e&&"No data available in table"===n.sEmptyTable&&F(t,t,"sZeroRecords","sEmptyTable"),!t.sLoadingRecords&&e&&"Loading..."===n.sLoadingRecords&&F(t,t,"sZeroRecords","sLoadingRecords"),t.sInfoThousands&&(t.sThousands=t.sInfoThousands),e=t.sDecimal)&&a!==e&&Me(e)}Array.isArray||(Array.isArray=function(t){return"[object Array]"===Object.prototype.toString.call(t)}),Array.prototype.includes||(Array.prototype.includes=s),String.prototype.trim||(String.prototype.trim=function(){return this.replace(/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,"")}),String.prototype.includes||(String.prototype.includes=s),w.util={throttle:function(a,t){var r,o,i=t!==H?t:200;return function(){var t=this,e=+new Date,n=arguments;r&&e<r+i?(clearTimeout(o),o=setTimeout(function(){r=H,a.apply(t,n)},i)):(r=e,a.apply(t,n))}},escapeRegex:function(t){return t.replace(J,"\\$1")},set:function(a){var d;return P.isPlainObject(a)?w.util.set(a._):null===a?function(){}:"function"==typeof a?function(t,e,n){a(t,"set",e,n)}:"string"!=typeof a||-1===a.indexOf(".")&&-1===a.indexOf("[")&&-1===a.indexOf("(")?function(t,e){t[a]=e}:(d=function(t,e,n){for(var a,r,o,i,l=dt(n),n=l[l.length-1],s=0,u=l.length-1;s<u;s++){if("__proto__"===l[s]||"constructor"===l[s])throw new Error("Cannot set prototype values");if(a=l[s].match(ft),r=l[s].match(g),a){if(l[s]=l[s].replace(ft,""),t[l[s]]=[],(a=l.slice()).splice(0,s+1),i=a.join("."),Array.isArray(e))for(var c=0,f=e.length;c<f;c++)d(o={},e[c],i),t[l[s]].push(o);else t[l[s]]=e;return}r&&(l[s]=l[s].replace(g,""),t=t[l[s]](e)),null!==t[l[s]]&&t[l[s]]!==H||(t[l[s]]={}),t=t[l[s]]}n.match(g)?t[n.replace(g,"")](e):t[n.replace(ft,"")]=e},function(t,e){return d(t,e,a)})},get:function(r){var o,d;return P.isPlainObject(r)?(o={},P.each(r,function(t,e){e&&(o[t]=w.util.get(e))}),function(t,e,n,a){var r=o[e]||o._;return r!==H?r(t,e,n,a):t}):null===r?function(t){return t}:"function"==typeof r?function(t,e,n,a){return r(t,e,n,a)}:"string"!=typeof r||-1===r.indexOf(".")&&-1===r.indexOf("[")&&-1===r.indexOf("(")?function(t,e){return t[r]}:(d=function(t,e,n){var a,r,o;if(""!==n)for(var i=dt(n),l=0,s=i.length;l<s;l++){if(f=i[l].match(ft),a=i[l].match(g),f){if(i[l]=i[l].replace(ft,""),""!==i[l]&&(t=t[i[l]]),r=[],i.splice(0,l+1),o=i.join("."),Array.isArray(t))for(var u=0,c=t.length;u<c;u++)r.push(d(t[u],e,o));var f=f[0].substring(1,f[0].length-1);t=""===f?r:r.join(f);break}if(a)i[l]=i[l].replace(g,""),t=t[i[l]]();else{if(null===t||null===t[i[l]])return null;if(t===H||t[i[l]]===H)return H;t=t[i[l]]}}return t},function(t,e){return d(t,e,r)})}};var r=function(t,e,n){t[e]!==H&&(t[n]=t[e])};function K(t){r(t,"ordering","bSort"),r(t,"orderMulti","bSortMulti"),r(t,"orderClasses","bSortClasses"),r(t,"orderCellsTop","bSortCellsTop"),r(t,"order","aaSorting"),r(t,"orderFixed","aaSortingFixed"),r(t,"paging","bPaginate"),r(t,"pagingType","sPaginationType"),r(t,"pageLength","iDisplayLength"),r(t,"searching","bFilter"),"boolean"==typeof t.sScrollX&&(t.sScrollX=t.sScrollX?"100%":""),"boolean"==typeof t.scrollX&&(t.scrollX=t.scrollX?"100%":"");var e=t.aoSearchCols;if(e)for(var n=0,a=e.length;n<a;n++)e[n]&&C(w.models.oSearch,e[n])}function Q(t){r(t,"orderable","bSortable"),r(t,"orderData","aDataSort"),r(t,"orderSequence","asSorting"),r(t,"orderDataType","sortDataType");var e=t.aDataSort;"number"!=typeof e||Array.isArray(e)||(t.aDataSort=[e])}function tt(t){var e,n,a,r;w.__browser||(w.__browser=e={},r=(a=(n=P("<div/>").css({position:"fixed",top:0,left:-1*P(j).scrollLeft(),height:1,width:1,overflow:"hidden"}).append(P("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(P("<div/>").css({width:"100%",height:10}))).appendTo("body")).children()).children(),e.barWidth=a[0].offsetWidth-a[0].clientWidth,e.bScrollOversize=100===r[0].offsetWidth&&100!==a[0].clientWidth,e.bScrollbarLeft=1!==Math.round(r.offset().left),e.bBounding=!!n[0].getBoundingClientRect().width,n.remove()),P.extend(t.oBrowser,w.__browser),t.oScroll.iBarWidth=w.__browser.barWidth}function et(t,e,n,a,r,o){var i,l=a,s=!1;for(n!==H&&(i=n,s=!0);l!==r;)t.hasOwnProperty(l)&&(i=s?e(i,t[l],l,t):t[l],s=!0,l+=o);return i}function nt(t,e){var n=w.defaults.column,a=t.aoColumns.length,n=P.extend({},w.models.oColumn,n,{nTh:e||y.createElement("th"),sTitle:n.sTitle||(e?e.innerHTML:""),aDataSort:n.aDataSort||[a],mData:n.mData||a,idx:a}),n=(t.aoColumns.push(n),t.aoPreSearchCols);n[a]=P.extend({},w.models.oSearch,n[a]),at(t,a,P(e).data())}function at(t,e,n){function a(t){return"string"==typeof t&&-1!==t.indexOf("@")}var e=t.aoColumns[e],r=t.oClasses,o=P(e.nTh),i=(!e.sWidthOrig&&(e.sWidthOrig=o.attr("width")||null,u=(o.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/))&&(e.sWidthOrig=u[1]),n!==H&&null!==n&&(Q(n),C(w.defaults.column,n,!0),n.mDataProp===H||n.mData||(n.mData=n.mDataProp),n.sType&&(e._sManualType=n.sType),n.className&&!n.sClass&&(n.sClass=n.className),n.sClass&&o.addClass(n.sClass),u=e.sClass,P.extend(e,n),F(e,n,"sWidth","sWidthOrig"),u!==e.sClass&&(e.sClass=u+" "+e.sClass),n.iDataSort!==H&&(e.aDataSort=[n.iDataSort]),F(e,n,"aDataSort")),e.mData),l=A(i),s=e.mRender?A(e.mRender):null,u=(e._bAttrSrc=P.isPlainObject(i)&&(a(i.sort)||a(i.type)||a(i.filter)),e._setter=null,e.fnGetData=function(t,e,n){var a=l(t,e,H,n);return s&&e?s(a,e,t,n):a},e.fnSetData=function(t,e,n){return b(i)(t,e,n)},"number"==typeof i||e._isArrayHost||(t._rowReadObject=!0),t.oFeatures.bSort||(e.bSortable=!1,o.addClass(r.sSortableNone)),-1!==P.inArray("asc",e.asSorting)),n=-1!==P.inArray("desc",e.asSorting);e.bSortable&&(u||n)?u&&!n?(e.sSortingClass=r.sSortableAsc,e.sSortingClassJUI=r.sSortJUIAscAllowed):!u&&n?(e.sSortingClass=r.sSortableDesc,e.sSortingClassJUI=r.sSortJUIDescAllowed):(e.sSortingClass=r.sSortable,e.sSortingClassJUI=r.sSortJUI):(e.sSortingClass=r.sSortableNone,e.sSortingClassJUI="")}function O(t){if(!1!==t.oFeatures.bAutoWidth){var e=t.aoColumns;ee(t);for(var n=0,a=e.length;n<a;n++)e[n].nTh.style.width=e[n].sWidth}var r=t.oScroll;""===r.sY&&""===r.sX||Qt(t),R(t,null,"column-sizing",[t])}function rt(t,e){t=it(t,"bVisible");return"number"==typeof t[e]?t[e]:null}function ot(t,e){t=it(t,"bVisible"),e=P.inArray(e,t);return-1!==e?e:null}function T(t){var n=0;return P.each(t.aoColumns,function(t,e){e.bVisible&&"none"!==P(e.nTh).css("display")&&n++}),n}function it(t,n){var a=[];return P.map(t.aoColumns,function(t,e){t[n]&&a.push(e)}),a}function lt(t){for(var e,n,a,r,o,i,l,s=t.aoColumns,u=t.aoData,c=w.ext.type.detect,f=0,d=s.length;f<d;f++)if(l=[],!(o=s[f]).sType&&o._sManualType)o.sType=o._sManualType;else if(!o.sType){for(e=0,n=c.length;e<n;e++){for(a=0,r=u.length;a<r&&(l[a]===H&&(l[a]=S(t,a,f,"type")),(i=c[e](l[a],t))||e===c.length-1)&&("html"!==i||h(l[a]));a++);if(i){o.sType=i;break}}o.sType||(o.sType="string")}}function st(t,e,n,a){var r,o,i,l,s=t.aoColumns;if(e)for(r=e.length-1;0<=r;r--)for(var u,c=(u=e[r]).target!==H?u.target:u.targets!==H?u.targets:u.aTargets,f=0,d=(c=Array.isArray(c)?c:[c]).length;f<d;f++)if("number"==typeof c[f]&&0<=c[f]){for(;s.length<=c[f];)nt(t);a(c[f],u)}else if("number"==typeof c[f]&&c[f]<0)a(s.length+c[f],u);else if("string"==typeof c[f])for(i=0,l=s.length;i<l;i++)"_all"!=c[f]&&!P(s[i].nTh).hasClass(c[f])||a(i,u);if(n)for(r=0,o=n.length;r<o;r++)a(r,n[r])}function x(t,e,n,a){for(var r=t.aoData.length,o=P.extend(!0,{},w.models.oRow,{src:n?"dom":"data",idx:r}),i=(o._aData=e,t.aoData.push(o),t.aoColumns),l=0,s=i.length;l<s;l++)i[l].sType=null;t.aiDisplayMaster.push(r);e=t.rowIdFn(e);return e!==H&&(t.aIds[e]=o),!n&&t.oFeatures.bDeferRender||St(t,r,n,a),r}function ut(n,t){var a;return(t=t instanceof P?t:P(t)).map(function(t,e){return a=mt(n,e),x(n,a.data,e,a.cells)})}function S(t,e,n,a){"search"===a?a="filter":"order"===a&&(a="sort");var r=t.iDraw,o=t.aoColumns[n],i=t.aoData[e]._aData,l=o.sDefaultContent,s=o.fnGetData(i,a,{settings:t,row:e,col:n});if(s===H)return t.iDrawError!=r&&null===l&&(W(t,0,"Requested unknown parameter "+("function"==typeof o.mData?"{function}":"'"+o.mData+"'")+" for row "+e+", column "+n,4),t.iDrawError=r),l;if(s!==i&&null!==s||null===l||a===H){if("function"==typeof s)return s.call(i)}else s=l;return null===s&&"display"===a?"":"filter"===a&&(e=w.ext.type.search)[o.sType]?e[o.sType](s):s}function ct(t,e,n,a){var r=t.aoColumns[n],o=t.aoData[e]._aData;r.fnSetData(o,a,{settings:t,row:e,col:n})}var ft=/\[.*?\]$/,g=/\(\)$/;function dt(t){return P.map(t.match(/(\\.|[^\.])+/g)||[""],function(t){return t.replace(/\\\./g,".")})}var A=w.util.get,b=w.util.set;function ht(t){return N(t.aoData,"_aData")}function pt(t){t.aoData.length=0,t.aiDisplayMaster.length=0,t.aiDisplay.length=0,t.aIds={}}function gt(t,e,n){for(var a=-1,r=0,o=t.length;r<o;r++)t[r]==e?a=r:t[r]>e&&t[r]--;-1!=a&&n===H&&t.splice(a,1)}function bt(n,a,t,e){function r(t,e){for(;t.childNodes.length;)t.removeChild(t.firstChild);t.innerHTML=S(n,a,e,"display")}var o,i,l=n.aoData[a];if("dom"!==t&&(t&&"auto"!==t||"dom"!==l.src)){var s=l.anCells;if(s)if(e!==H)r(s[e],e);else for(o=0,i=s.length;o<i;o++)r(s[o],o)}else l._aData=mt(n,l,e,e===H?H:l._aData).data;l._aSortData=null,l._aFilterData=null;var u=n.aoColumns;if(e!==H)u[e].sType=null;else{for(o=0,i=u.length;o<i;o++)u[o].sType=null;vt(n,l)}}function mt(t,e,n,a){function r(t,e){var n;"string"==typeof t&&-1!==(n=t.indexOf("@"))&&(n=t.substring(n+1),b(t)(a,e.getAttribute(n)))}function o(t){n!==H&&n!==f||(l=d[f],s=t.innerHTML.trim(),l&&l._bAttrSrc?(b(l.mData._)(a,s),r(l.mData.sort,t),r(l.mData.type,t),r(l.mData.filter,t)):h?(l._setter||(l._setter=b(l.mData)),l._setter(a,s)):a[f]=s),f++}var i,l,s,u=[],c=e.firstChild,f=0,d=t.aoColumns,h=t._rowReadObject;a=a!==H?a:h?{}:[];if(c)for(;c;)"TD"!=(i=c.nodeName.toUpperCase())&&"TH"!=i||(o(c),u.push(c)),c=c.nextSibling;else for(var p=0,g=(u=e.anCells).length;p<g;p++)o(u[p]);var e=e.firstChild?e:e.nTr;return e&&(e=e.getAttribute("id"))&&b(t.rowId)(a,e),{data:a,cells:u}}function St(t,e,n,a){var r,o,i,l,s,u,c=t.aoData[e],f=c._aData,d=[];if(null===c.nTr){for(r=n||y.createElement("tr"),c.nTr=r,c.anCells=d,r._DT_RowIndex=e,vt(t,c),l=0,s=t.aoColumns.length;l<s;l++)i=t.aoColumns[l],(o=(u=!n)?y.createElement(i.sCellType):a[l])||W(t,0,"Incorrect column count",18),o._DT_CellIndex={row:e,column:l},d.push(o),!u&&(!i.mRender&&i.mData===l||P.isPlainObject(i.mData)&&i.mData._===l+".display")||(o.innerHTML=S(t,e,l,"display")),i.sClass&&(o.className+=" "+i.sClass),i.bVisible&&!n?r.appendChild(o):!i.bVisible&&n&&o.parentNode.removeChild(o),i.fnCreatedCell&&i.fnCreatedCell.call(t.oInstance,o,S(t,e,l),f,e,l);R(t,"aoRowCreatedCallback",null,[r,f,e,d])}}function vt(t,e){var n=e.nTr,a=e._aData;n&&((t=t.rowIdFn(a))&&(n.id=t),a.DT_RowClass&&(t=a.DT_RowClass.split(" "),e.__rowc=e.__rowc?z(e.__rowc.concat(t)):t,P(n).removeClass(e.__rowc.join(" ")).addClass(a.DT_RowClass)),a.DT_RowAttr&&P(n).attr(a.DT_RowAttr),a.DT_RowData)&&P(n).data(a.DT_RowData)}function yt(t){var e,n,a,r=t.nTHead,o=t.nTFoot,i=0===P("th, td",r).length,l=t.oClasses,s=t.aoColumns;for(i&&(n=P("<tr/>").appendTo(r)),c=0,f=s.length;c<f;c++)a=s[c],e=P(a.nTh).addClass(a.sClass),i&&e.appendTo(n),t.oFeatures.bSort&&(e.addClass(a.sSortingClass),!1!==a.bSortable)&&(e.attr("tabindex",t.iTabIndex).attr("aria-controls",t.sTableId),ue(t,a.nTh,c)),a.sTitle!=e[0].innerHTML&&e.html(a.sTitle),ve(t,"header")(t,e,a,l);if(i&&wt(t.aoHeader,r),P(r).children("tr").children("th, td").addClass(l.sHeaderTH),P(o).children("tr").children("th, td").addClass(l.sFooterTH),null!==o)for(var u=t.aoFooter[0],c=0,f=u.length;c<f;c++)(a=s[c])?(a.nTf=u[c].cell,a.sClass&&P(a.nTf).addClass(a.sClass)):W(t,0,"Incorrect column count",18)}function Dt(t,e,n){var a,r,o,i,l,s,u,c,f,d=[],h=[],p=t.aoColumns.length;if(e){for(n===H&&(n=!1),a=0,r=e.length;a<r;a++){for(d[a]=e[a].slice(),d[a].nTr=e[a].nTr,o=p-1;0<=o;o--)t.aoColumns[o].bVisible||n||d[a].splice(o,1);h.push([])}for(a=0,r=d.length;a<r;a++){if(u=d[a].nTr)for(;s=u.firstChild;)u.removeChild(s);for(o=0,i=d[a].length;o<i;o++)if(f=c=1,h[a][o]===H){for(u.appendChild(d[a][o].cell),h[a][o]=1;d[a+c]!==H&&d[a][o].cell==d[a+c][o].cell;)h[a+c][o]=1,c++;for(;d[a][o+f]!==H&&d[a][o].cell==d[a][o+f].cell;){for(l=0;l<c;l++)h[a+l][o+f]=1;f++}P(d[a][o].cell).attr("rowspan",c).attr("colspan",f)}}}}function v(t,e){n="ssp"==E(s=t),(l=s.iInitDisplayStart)!==H&&-1!==l&&(s._iDisplayStart=!n&&l>=s.fnRecordsDisplay()?0:l,s.iInitDisplayStart=-1);var n=R(t,"aoPreDrawCallback","preDraw",[t]);if(-1!==P.inArray(!1,n))D(t,!1);else{var a=[],r=0,o=t.asStripeClasses,i=o.length,l=t.oLanguage,s="ssp"==E(t),u=t.aiDisplay,n=t._iDisplayStart,c=t.fnDisplayEnd();if(t.bDrawing=!0,t.bDeferLoading)t.bDeferLoading=!1,t.iDraw++,D(t,!1);else if(s){if(!t.bDestroying&&!e)return void xt(t)}else t.iDraw++;if(0!==u.length)for(var f=s?t.aoData.length:c,d=s?0:n;d<f;d++){var h,p=u[d],g=t.aoData[p],b=(null===g.nTr&&St(t,p),g.nTr);0!==i&&(h=o[r%i],g._sRowStripe!=h)&&(P(b).removeClass(g._sRowStripe).addClass(h),g._sRowStripe=h),R(t,"aoRowCallback",null,[b,g._aData,r,d,p]),a.push(b),r++}else{e=l.sZeroRecords;1==t.iDraw&&"ajax"==E(t)?e=l.sLoadingRecords:l.sEmptyTable&&0===t.fnRecordsTotal()&&(e=l.sEmptyTable),a[0]=P("<tr/>",{class:i?o[0]:""}).append(P("<td />",{valign:"top",colSpan:T(t),class:t.oClasses.sRowEmpty}).html(e))[0]}R(t,"aoHeaderCallback","header",[P(t.nTHead).children("tr")[0],ht(t),n,c,u]),R(t,"aoFooterCallback","footer",[P(t.nTFoot).children("tr")[0],ht(t),n,c,u]);s=P(t.nTBody);s.children().detach(),s.append(P(a)),R(t,"aoDrawCallback","draw",[t]),t.bSorted=!1,t.bFiltered=!1,t.bDrawing=!1}}function u(t,e){var n=t.oFeatures,a=n.bSort,n=n.bFilter;a&&ie(t),n?Rt(t,t.oPreviousSearch):t.aiDisplay=t.aiDisplayMaster.slice(),!0!==e&&(t._iDisplayStart=0),t._drawHold=e,v(t),t._drawHold=!1}function _t(t){for(var e,n,a,r,o,i,l,s=t.oClasses,u=P(t.nTable),u=P("<div/>").insertBefore(u),c=t.oFeatures,f=P("<div/>",{id:t.sTableId+"_wrapper",class:s.sWrapper+(t.nTFoot?"":" "+s.sNoFooter)}),d=(t.nHolding=u[0],t.nTableWrapper=f[0],t.nTableReinsertBefore=t.nTable.nextSibling,t.sDom.split("")),h=0;h<d.length;h++){if(e=null,"<"==(n=d[h])){if(a=P("<div/>")[0],"'"==(r=d[h+1])||'"'==r){for(o="",i=2;d[h+i]!=r;)o+=d[h+i],i++;"H"==o?o=s.sJUIHeader:"F"==o&&(o=s.sJUIFooter),-1!=o.indexOf(".")?(l=o.split("."),a.id=l[0].substr(1,l[0].length-1),a.className=l[1]):"#"==o.charAt(0)?a.id=o.substr(1,o.length-1):a.className=o,h+=i}f.append(a),f=P(a)}else if(">"==n)f=f.parent();else if("l"==n&&c.bPaginate&&c.bLengthChange)e=Gt(t);else if("f"==n&&c.bFilter)e=Lt(t);else if("r"==n&&c.bProcessing)e=Zt(t);else if("t"==n)e=Kt(t);else if("i"==n&&c.bInfo)e=Ut(t);else if("p"==n&&c.bPaginate)e=zt(t);else if(0!==w.ext.feature.length)for(var p=w.ext.feature,g=0,b=p.length;g<b;g++)if(n==p[g].cFeature){e=p[g].fnInit(t);break}e&&((l=t.aanFeatures)[n]||(l[n]=[]),l[n].push(e),f.append(e))}u.replaceWith(f),t.nHolding=null}function wt(t,e){var n,a,r,o,i,l,s,u,c,f,d=P(e).children("tr");for(t.splice(0,t.length),r=0,l=d.length;r<l;r++)t.push([]);for(r=0,l=d.length;r<l;r++)for(a=(n=d[r]).firstChild;a;){if("TD"==a.nodeName.toUpperCase()||"TH"==a.nodeName.toUpperCase())for(u=(u=+a.getAttribute("colspan"))&&0!=u&&1!=u?u:1,c=(c=+a.getAttribute("rowspan"))&&0!=c&&1!=c?c:1,s=function(t,e,n){for(var a=t[e];a[n];)n++;return n}(t,r,0),f=1==u,i=0;i<u;i++)for(o=0;o<c;o++)t[r+o][s+i]={cell:a,unique:f},t[r+o].nTr=n;a=a.nextSibling}}function Ct(t,e,n){var a=[];n||(n=t.aoHeader,e&&wt(n=[],e));for(var r=0,o=n.length;r<o;r++)for(var i=0,l=n[r].length;i<l;i++)!n[r][i].unique||a[i]&&t.bSortCellsTop||(a[i]=n[r][i].cell);return a}function Tt(r,t,n){function e(t){var e=r.jqXHR?r.jqXHR.status:null;(null===t||"number"==typeof e&&204==e)&&Ft(r,t={},[]),(e=t.error||t.sError)&&W(r,0,e),r.json=t,R(r,null,"xhr",[r,t,r.jqXHR]),n(t)}R(r,"aoServerParams","serverParams",[t]),t&&Array.isArray(t)&&(a={},o=/(.*?)\[\]$/,P.each(t,function(t,e){var n=e.name.match(o);n?(n=n[0],a[n]||(a[n]=[]),a[n].push(e.value)):a[e.name]=e.value}),t=a);var a,o,i,l=r.ajax,s=r.oInstance,u=(P.isPlainObject(l)&&l.data&&(u="function"==typeof(i=l.data)?i(t,r):i,t="function"==typeof i&&u?u:P.extend(!0,t,u),delete l.data),{data:t,success:e,dataType:"json",cache:!1,type:r.sServerMethod,error:function(t,e,n){var a=R(r,null,"xhr",[r,null,r.jqXHR]);-1===P.inArray(!0,a)&&("parsererror"==e?W(r,0,"Invalid JSON response",1):4===t.readyState&&W(r,0,"Ajax error",7)),D(r,!1)}});r.oAjaxData=t,R(r,null,"preXhr",[r,t]),r.fnServerData?r.fnServerData.call(s,r.sAjaxSource,P.map(t,function(t,e){return{name:e,value:t}}),e,r):r.sAjaxSource||"string"==typeof l?r.jqXHR=P.ajax(P.extend(u,{url:l||r.sAjaxSource})):"function"==typeof l?r.jqXHR=l.call(s,t,e,r):(r.jqXHR=P.ajax(P.extend(u,l)),l.data=i)}function xt(e){e.iDraw++,D(e,!0);var n=e._drawHold;Tt(e,At(e),function(t){e._drawHold=n,It(e,t),e._drawHold=!1})}function At(t){for(var e,n,a,r=t.aoColumns,o=r.length,i=t.oFeatures,l=t.oPreviousSearch,s=t.aoPreSearchCols,u=[],c=I(t),f=t._iDisplayStart,d=!1!==i.bPaginate?t._iDisplayLength:-1,h=function(t,e){u.push({name:t,value:e})},p=(h("sEcho",t.iDraw),h("iColumns",o),h("sColumns",N(r,"sName").join(",")),h("iDisplayStart",f),h("iDisplayLength",d),{draw:t.iDraw,columns:[],order:[],start:f,length:d,search:{value:l.sSearch,regex:l.bRegex}}),g=0;g<o;g++)n=r[g],a=s[g],e="function"==typeof n.mData?"function":n.mData,p.columns.push({data:e,name:n.sName,searchable:n.bSearchable,orderable:n.bSortable,search:{value:a.sSearch,regex:a.bRegex}}),h("mDataProp_"+g,e),i.bFilter&&(h("sSearch_"+g,a.sSearch),h("bRegex_"+g,a.bRegex),h("bSearchable_"+g,n.bSearchable)),i.bSort&&h("bSortable_"+g,n.bSortable);i.bFilter&&(h("sSearch",l.sSearch),h("bRegex",l.bRegex)),i.bSort&&(P.each(c,function(t,e){p.order.push({column:e.col,dir:e.dir}),h("iSortCol_"+t,e.col),h("sSortDir_"+t,e.dir)}),h("iSortingCols",c.length));f=w.ext.legacy.ajax;return null===f?t.sAjaxSource?u:p:f?u:p}function It(t,n){function e(t,e){return n[t]!==H?n[t]:n[e]}var a=Ft(t,n),r=e("sEcho","draw"),o=e("iTotalRecords","recordsTotal"),i=e("iTotalDisplayRecords","recordsFiltered");if(r!==H){if(+r<t.iDraw)return;t.iDraw=+r}a=a||[],pt(t),t._iRecordsTotal=parseInt(o,10),t._iRecordsDisplay=parseInt(i,10);for(var l=0,s=a.length;l<s;l++)x(t,a[l]);t.aiDisplay=t.aiDisplayMaster.slice(),v(t,!0),t._bInitComplete||qt(t,n),D(t,!1)}function Ft(t,e,n){t=P.isPlainObject(t.ajax)&&t.ajax.dataSrc!==H?t.ajax.dataSrc:t.sAjaxDataProp;if(!n)return"data"===t?e.aaData||e[t]:""!==t?A(t)(e):e;b(t)(e,n)}function Lt(n){function e(t){i.f;var e=this.value||"";o.return&&"Enter"!==t.key||e!=o.sSearch&&(Rt(n,{sSearch:e,bRegex:o.bRegex,bSmart:o.bSmart,bCaseInsensitive:o.bCaseInsensitive,return:o.return}),n._iDisplayStart=0,v(n))}var t=n.oClasses,a=n.sTableId,r=n.oLanguage,o=n.oPreviousSearch,i=n.aanFeatures,l='<input type="search" class="'+t.sFilterInput+'"/>',s=(s=r.sSearch).match(/_INPUT_/)?s.replace("_INPUT_",l):s+l,l=P("<div/>",{id:i.f?null:a+"_filter",class:t.sFilter}).append(P("<label/>").append(s)),t=null!==n.searchDelay?n.searchDelay:"ssp"===E(n)?400:0,u=P("input",l).val(o.sSearch).attr("placeholder",r.sSearchPlaceholder).on("keyup.DT search.DT input.DT paste.DT cut.DT",t?ne(e,t):e).on("mouseup.DT",function(t){setTimeout(function(){e.call(u[0],t)},10)}).on("keypress.DT",function(t){if(13==t.keyCode)return!1}).attr("aria-controls",a);return P(n.nTable).on("search.dt.DT",function(t,e){if(n===e)try{u[0]!==y.activeElement&&u.val(o.sSearch)}catch(t){}}),l[0]}function Rt(t,e,n){function a(t){o.sSearch=t.sSearch,o.bRegex=t.bRegex,o.bSmart=t.bSmart,o.bCaseInsensitive=t.bCaseInsensitive,o.return=t.return}function r(t){return t.bEscapeRegex!==H?!t.bEscapeRegex:t.bRegex}var o=t.oPreviousSearch,i=t.aoPreSearchCols;if(lt(t),"ssp"!=E(t)){Ht(t,e.sSearch,n,r(e),e.bSmart,e.bCaseInsensitive),a(e);for(var l=0;l<i.length;l++)jt(t,i[l].sSearch,l,r(i[l]),i[l].bSmart,i[l].bCaseInsensitive);Pt(t)}else a(e);t.bFiltered=!0,R(t,null,"search",[t])}function Pt(t){for(var e,n,a=w.ext.search,r=t.aiDisplay,o=0,i=a.length;o<i;o++){for(var l=[],s=0,u=r.length;s<u;s++)n=r[s],e=t.aoData[n],a[o](t,e._aFilterData,n,e._aData,s)&&l.push(n);r.length=0,P.merge(r,l)}}function jt(t,e,n,a,r,o){if(""!==e){for(var i,l=[],s=t.aiDisplay,u=Nt(e,a,r,o),c=0;c<s.length;c++)i=t.aoData[s[c]]._aFilterData[n],u.test(i)&&l.push(s[c]);t.aiDisplay=l}}function Ht(t,e,n,a,r,o){var i,l,s,u=Nt(e,a,r,o),r=t.oPreviousSearch.sSearch,o=t.aiDisplayMaster,c=[];if(0!==w.ext.search.length&&(n=!0),l=Wt(t),e.length<=0)t.aiDisplay=o.slice();else{for((l||n||a||r.length>e.length||0!==e.indexOf(r)||t.bSorted)&&(t.aiDisplay=o.slice()),i=t.aiDisplay,s=0;s<i.length;s++)u.test(t.aoData[i[s]]._sFilterRow)&&c.push(i[s]);t.aiDisplay=c}}function Nt(t,e,n,a){return t=e?t:Ot(t),n&&(t="^(?=.*?"+P.map(t.match(/["\u201C][^"\u201D]+["\u201D]|[^ ]+/g)||[""],function(t){var e;return'"'===t.charAt(0)?t=(e=t.match(/^"(.*)"$/))?e[1]:t:"“"===t.charAt(0)&&(t=(e=t.match(/^\u201C(.*)\u201D$/))?e[1]:t),t.replace('"',"")}).join(")(?=.*?")+").*$"),new RegExp(t,a?"i":"")}var Ot=w.util.escapeRegex,kt=P("<div>")[0],Mt=kt.textContent!==H;function Wt(t){for(var e,n,a,r,o,i=t.aoColumns,l=!1,s=0,u=t.aoData.length;s<u;s++)if(!(o=t.aoData[s])._aFilterData){for(a=[],e=0,n=i.length;e<n;e++)i[e].bSearchable?"string"!=typeof(r=null===(r=S(t,s,e,"filter"))?"":r)&&r.toString&&(r=r.toString()):r="",r.indexOf&&-1!==r.indexOf("&")&&(kt.innerHTML=r,r=Mt?kt.textContent:kt.innerText),r.replace&&(r=r.replace(/[\r\n\u2028]/g,"")),a.push(r);o._aFilterData=a,o._sFilterRow=a.join("  "),l=!0}return l}function Et(t){return{search:t.sSearch,smart:t.bSmart,regex:t.bRegex,caseInsensitive:t.bCaseInsensitive}}function Bt(t){return{sSearch:t.search,bSmart:t.smart,bRegex:t.regex,bCaseInsensitive:t.caseInsensitive}}function Ut(t){var e=t.sTableId,n=t.aanFeatures.i,a=P("<div/>",{class:t.oClasses.sInfo,id:n?null:e+"_info"});return n||(t.aoDrawCallback.push({fn:Vt,sName:"information"}),a.attr("role","status").attr("aria-live","polite"),P(t.nTable).attr("aria-describedby",e+"_info")),a[0]}function Vt(t){var e,n,a,r,o,i,l=t.aanFeatures.i;0!==l.length&&(i=t.oLanguage,e=t._iDisplayStart+1,n=t.fnDisplayEnd(),a=t.fnRecordsTotal(),o=(r=t.fnRecordsDisplay())?i.sInfo:i.sInfoEmpty,r!==a&&(o+=" "+i.sInfoFiltered),o=Xt(t,o+=i.sInfoPostFix),null!==(i=i.fnInfoCallback)&&(o=i.call(t.oInstance,t,e,n,a,r,o)),P(l).html(o))}function Xt(t,e){var n=t.fnFormatNumber,a=t._iDisplayStart+1,r=t._iDisplayLength,o=t.fnRecordsDisplay(),i=-1===r;return e.replace(/_START_/g,n.call(t,a)).replace(/_END_/g,n.call(t,t.fnDisplayEnd())).replace(/_MAX_/g,n.call(t,t.fnRecordsTotal())).replace(/_TOTAL_/g,n.call(t,o)).replace(/_PAGE_/g,n.call(t,i?1:Math.ceil(a/r))).replace(/_PAGES_/g,n.call(t,i?1:Math.ceil(o/r)))}function Jt(n){var a,t,e,r=n.iInitDisplayStart,o=n.aoColumns,i=n.oFeatures,l=n.bDeferLoading;if(n.bInitialised){for(_t(n),yt(n),Dt(n,n.aoHeader),Dt(n,n.aoFooter),D(n,!0),i.bAutoWidth&&ee(n),a=0,t=o.length;a<t;a++)(e=o[a]).sWidth&&(e.nTh.style.width=M(e.sWidth));R(n,null,"preInit",[n]),u(n);i=E(n);"ssp"==i&&!l||("ajax"==i?Tt(n,[],function(t){var e=Ft(n,t);for(a=0;a<e.length;a++)x(n,e[a]);n.iInitDisplayStart=r,u(n),D(n,!1),qt(n,t)}):(D(n,!1),qt(n)))}else setTimeout(function(){Jt(n)},200)}function qt(t,e){t._bInitComplete=!0,(e||t.oInit.aaData)&&O(t),R(t,null,"plugin-init",[t,e]),R(t,"aoInitComplete","init",[t,e])}function $t(t,e){e=parseInt(e,10);t._iDisplayLength=e,Se(t),R(t,null,"length",[t,e])}function Gt(a){for(var t=a.oClasses,e=a.sTableId,n=a.aLengthMenu,r=Array.isArray(n[0]),o=r?n[0]:n,i=r?n[1]:n,l=P("<select/>",{name:e+"_length","aria-controls":e,class:t.sLengthSelect}),s=0,u=o.length;s<u;s++)l[0][s]=new Option("number"==typeof i[s]?a.fnFormatNumber(i[s]):i[s],o[s]);var c=P("<div><label/></div>").addClass(t.sLength);return a.aanFeatures.l||(c[0].id=e+"_length"),c.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",l[0].outerHTML)),P("select",c).val(a._iDisplayLength).on("change.DT",function(t){$t(a,P(this).val()),v(a)}),P(a.nTable).on("length.dt.DT",function(t,e,n){a===e&&P("select",c).val(n)}),c[0]}function zt(t){function c(t){v(t)}var e=t.sPaginationType,f=w.ext.pager[e],d="function"==typeof f,e=P("<div/>").addClass(t.oClasses.sPaging+e)[0],h=t.aanFeatures;return d||f.fnInit(t,e,c),h.p||(e.id=t.sTableId+"_paginate",t.aoDrawCallback.push({fn:function(t){if(d)for(var e=t._iDisplayStart,n=t._iDisplayLength,a=t.fnRecordsDisplay(),r=-1===n,o=r?0:Math.ceil(e/n),i=r?1:Math.ceil(a/n),l=f(o,i),s=0,u=h.p.length;s<u;s++)ve(t,"pageButton")(t,h.p[s],s,l,o,i);else f.fnUpdate(t,c)},sName:"pagination"})),e}function Yt(t,e,n){var a=t._iDisplayStart,r=t._iDisplayLength,o=t.fnRecordsDisplay(),o=(0===o||-1===r?a=0:"number"==typeof e?o<(a=e*r)&&(a=0):"first"==e?a=0:"previous"==e?(a=0<=r?a-r:0)<0&&(a=0):"next"==e?a+r<o&&(a+=r):"last"==e?a=Math.floor((o-1)/r)*r:W(t,0,"Unknown paging action: "+e,5),t._iDisplayStart!==a);return t._iDisplayStart=a,o?(R(t,null,"page",[t]),n&&v(t)):R(t,null,"page-nc",[t]),o}function Zt(t){return P("<div/>",{id:t.aanFeatures.r?null:t.sTableId+"_processing",class:t.oClasses.sProcessing,role:"status"}).html(t.oLanguage.sProcessing).append("<div><div></div><div></div><div></div><div></div></div>").insertBefore(t.nTable)[0]}function D(t,e){t.oFeatures.bProcessing&&P(t.aanFeatures.r).css("display",e?"block":"none"),R(t,null,"processing",[t,e])}function Kt(t){var e,n,a,r,o,i,l,s,u,c,f,d,h=P(t.nTable),p=t.oScroll;return""===p.sX&&""===p.sY?t.nTable:(e=p.sX,n=p.sY,a=t.oClasses,o=(r=h.children("caption")).length?r[0]._captionSide:null,s=P(h[0].cloneNode(!1)),i=P(h[0].cloneNode(!1)),u=function(t){return t?M(t):null},(l=h.children("tfoot")).length||(l=null),s=P(f="<div/>",{class:a.sScrollWrapper}).append(P(f,{class:a.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollHeadInner}).css({"box-sizing":"content-box",width:p.sXInner||"100%"}).append(s.removeAttr("id").css("margin-left",0).append("top"===o?r:null).append(h.children("thead"))))).append(P(f,{class:a.sScrollBody}).css({position:"relative",overflow:"auto",width:u(e)}).append(h)),l&&s.append(P(f,{class:a.sScrollFoot}).css({overflow:"hidden",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollFootInner}).append(i.removeAttr("id").css("margin-left",0).append("bottom"===o?r:null).append(h.children("tfoot"))))),u=s.children(),c=u[0],f=u[1],d=l?u[2]:null,e&&P(f).on("scroll.DT",function(t){var e=this.scrollLeft;c.scrollLeft=e,l&&(d.scrollLeft=e)}),P(f).css("max-height",n),p.bCollapse||P(f).css("height",n),t.nScrollHead=c,t.nScrollBody=f,t.nScrollFoot=d,t.aoDrawCallback.push({fn:Qt,sName:"scrolling"}),s[0])}function Qt(n){function t(t){(t=t.style).paddingTop="0",t.paddingBottom="0",t.borderTopWidth="0",t.borderBottomWidth="0",t.height=0}var e,a,r,o,i,l=n.oScroll,s=l.sX,u=l.sXInner,c=l.sY,l=l.iBarWidth,f=P(n.nScrollHead),d=f[0].style,h=f.children("div"),p=h[0].style,h=h.children("table"),g=n.nScrollBody,b=P(g),m=g.style,S=P(n.nScrollFoot).children("div"),v=S.children("table"),y=P(n.nTHead),D=P(n.nTable),_=D[0],w=_.style,C=n.nTFoot?P(n.nTFoot):null,T=n.oBrowser,x=T.bScrollOversize,A=(N(n.aoColumns,"nTh"),[]),I=[],F=[],L=[],R=g.scrollHeight>g.clientHeight;n.scrollBarVis!==R&&n.scrollBarVis!==H?(n.scrollBarVis=R,O(n)):(n.scrollBarVis=R,D.children("thead, tfoot").remove(),C&&(R=C.clone().prependTo(D),i=C.find("tr"),a=R.find("tr"),R.find("[id]").removeAttr("id")),R=y.clone().prependTo(D),y=y.find("tr"),e=R.find("tr"),R.find("th, td").removeAttr("tabindex"),R.find("[id]").removeAttr("id"),s||(m.width="100%",f[0].style.width="100%"),P.each(Ct(n,R),function(t,e){r=rt(n,t),e.style.width=n.aoColumns[r].sWidth}),C&&k(function(t){t.style.width=""},a),f=D.outerWidth(),""===s?(w.width="100%",x&&(D.find("tbody").height()>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(D.outerWidth()-l)),f=D.outerWidth()):""!==u&&(w.width=M(u),f=D.outerWidth()),k(t,e),k(function(t){var e=j.getComputedStyle?j.getComputedStyle(t).width:M(P(t).width());F.push(t.innerHTML),A.push(e)},e),k(function(t,e){t.style.width=A[e]},y),P(e).css("height",0),C&&(k(t,a),k(function(t){L.push(t.innerHTML),I.push(M(P(t).css("width")))},a),k(function(t,e){t.style.width=I[e]},i),P(a).height(0)),k(function(t,e){t.innerHTML='<div class="dataTables_sizing">'+F[e]+"</div>",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=A[e]},e),C&&k(function(t,e){t.innerHTML='<div class="dataTables_sizing">'+L[e]+"</div>",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=I[e]},a),Math.round(D.outerWidth())<Math.round(f)?(o=g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y")?f+l:f,x&&(g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(o-l)),""!==s&&""===u||W(n,1,"Possible column misalignment",6)):o="100%",m.width=M(o),d.width=M(o),C&&(n.nScrollFoot.style.width=M(o)),c||x&&(m.height=M(_.offsetHeight+l)),R=D.outerWidth(),h[0].style.width=M(R),p.width=M(R),y=D.height()>g.clientHeight||"scroll"==b.css("overflow-y"),p[i="padding"+(T.bScrollbarLeft?"Left":"Right")]=y?l+"px":"0px",C&&(v[0].style.width=M(R),S[0].style.width=M(R),S[0].style[i]=y?l+"px":"0px"),D.children("colgroup").insertBefore(D.children("thead")),b.trigger("scroll"),!n.bSorted&&!n.bFiltered||n._drawHold||(g.scrollTop=0))}function k(t,e,n){for(var a,r,o=0,i=0,l=e.length;i<l;){for(a=e[i].firstChild,r=n?n[i].firstChild:null;a;)1===a.nodeType&&(n?t(a,r,o):t(a,o),o++),a=a.nextSibling,r=n?r.nextSibling:null;i++}}var te=/<.*?>/g;function ee(t){var e,n,a=t.nTable,r=t.aoColumns,o=t.oScroll,i=o.sY,l=o.sX,o=o.sXInner,s=r.length,u=it(t,"bVisible"),c=P("th",t.nTHead),f=a.getAttribute("width"),d=a.parentNode,h=!1,p=t.oBrowser,g=p.bScrollOversize,b=a.style.width;for(b&&-1!==b.indexOf("%")&&(f=b),D=0;D<u.length;D++)null!==(e=r[u[D]]).sWidth&&(e.sWidth=ae(e.sWidthOrig,d),h=!0);if(g||!h&&!l&&!i&&s==T(t)&&s==c.length)for(D=0;D<s;D++){var m=rt(t,D);null!==m&&(r[m].sWidth=M(c.eq(D).width()))}else{var b=P(a).clone().css("visibility","hidden").removeAttr("id"),S=(b.find("tbody tr").remove(),P("<tr/>").appendTo(b.find("tbody")));for(b.find("thead, tfoot").remove(),b.append(P(t.nTHead).clone()).append(P(t.nTFoot).clone()),b.find("tfoot th, tfoot td").css("width",""),c=Ct(t,b.find("thead")[0]),D=0;D<u.length;D++)e=r[u[D]],c[D].style.width=null!==e.sWidthOrig&&""!==e.sWidthOrig?M(e.sWidthOrig):"",e.sWidthOrig&&l&&P(c[D]).append(P("<div/>").css({width:e.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(t.aoData.length)for(D=0;D<u.length;D++)e=r[n=u[D]],P(re(t,n)).clone(!1).append(e.sContentPadding).appendTo(S);P("[name]",b).removeAttr("name");for(var v=P("<div/>").css(l||i?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(b).appendTo(d),y=(l&&o?b.width(o):l?(b.css("width","auto"),b.removeAttr("width"),b.width()<d.clientWidth&&f&&b.width(d.clientWidth)):i?b.width(d.clientWidth):f&&b.width(f),0),D=0;D<u.length;D++){var _=P(c[D]),w=_.outerWidth()-_.width(),_=p.bBounding?Math.ceil(c[D].getBoundingClientRect().width):_.outerWidth();y+=_,r[u[D]].sWidth=M(_-w)}a.style.width=M(y),v.remove()}f&&(a.style.width=M(f)),!f&&!l||t._reszEvt||(o=function(){P(j).on("resize.DT-"+t.sInstance,ne(function(){O(t)}))},g?setTimeout(o,1e3):o(),t._reszEvt=!0)}var ne=w.util.throttle;function ae(t,e){return t?(e=(t=P("<div/>").css("width",M(t)).appendTo(e||y.body))[0].offsetWidth,t.remove(),e):0}function re(t,e){var n,a=oe(t,e);return a<0?null:(n=t.aoData[a]).nTr?n.anCells[e]:P("<td/>").html(S(t,a,e,"display"))[0]}function oe(t,e){for(var n,a=-1,r=-1,o=0,i=t.aoData.length;o<i;o++)(n=(n=(n=S(t,o,e,"display")+"").replace(te,"")).replace(/&nbsp;/g," ")).length>a&&(a=n.length,r=o);return r}function M(t){return null===t?"0px":"number"==typeof t?t<0?"0px":t+"px":t.match(/\d$/)?t+"px":t}function I(t){function e(t){t.length&&!Array.isArray(t[0])?h.push(t):P.merge(h,t)}var n,a,r,o,i,l,s,u=[],c=t.aoColumns,f=t.aaSortingFixed,d=P.isPlainObject(f),h=[];for(Array.isArray(f)&&e(f),d&&f.pre&&e(f.pre),e(t.aaSorting),d&&f.post&&e(f.post),n=0;n<h.length;n++)for(r=(o=c[s=h[n][a=0]].aDataSort).length;a<r;a++)l=c[i=o[a]].sType||"string",h[n]._idx===H&&(h[n]._idx=P.inArray(h[n][1],c[i].asSorting)),u.push({src:s,col:i,dir:h[n][1],index:h[n]._idx,type:l,formatter:w.ext.type.order[l+"-pre"]});return u}function ie(t){var e,n,a,r,c,f=[],u=w.ext.type.order,d=t.aoData,o=(t.aoColumns,0),i=t.aiDisplayMaster;for(lt(t),e=0,n=(c=I(t)).length;e<n;e++)(r=c[e]).formatter&&o++,fe(t,r.col);if("ssp"!=E(t)&&0!==c.length){for(e=0,a=i.length;e<a;e++)f[i[e]]=e;o===c.length?i.sort(function(t,e){for(var n,a,r,o,i=c.length,l=d[t]._aSortData,s=d[e]._aSortData,u=0;u<i;u++)if(0!=(r=(n=l[(o=c[u]).col])<(a=s[o.col])?-1:a<n?1:0))return"asc"===o.dir?r:-r;return(n=f[t])<(a=f[e])?-1:a<n?1:0}):i.sort(function(t,e){for(var n,a,r,o=c.length,i=d[t]._aSortData,l=d[e]._aSortData,s=0;s<o;s++)if(n=i[(r=c[s]).col],a=l[r.col],0!==(r=(u[r.type+"-"+r.dir]||u["string-"+r.dir])(n,a)))return r;return(n=f[t])<(a=f[e])?-1:a<n?1:0})}t.bSorted=!0}function le(t){for(var e=t.aoColumns,n=I(t),a=t.oLanguage.oAria,r=0,o=e.length;r<o;r++){var i=e[r],l=i.asSorting,s=i.ariaTitle||i.sTitle.replace(/<.*?>/g,""),u=i.nTh;u.removeAttribute("aria-sort"),i=i.bSortable?s+("asc"===(0<n.length&&n[0].col==r&&(u.setAttribute("aria-sort","asc"==n[0].dir?"ascending":"descending"),l[n[0].index+1])||l[0])?a.sSortAscending:a.sSortDescending):s,u.setAttribute("aria-label",i)}}function se(t,e,n,a){function r(t,e){var n=t._idx;return(n=n===H?P.inArray(t[1],s):n)+1<s.length?n+1:e?null:0}var o,i=t.aoColumns[e],l=t.aaSorting,s=i.asSorting;"number"==typeof l[0]&&(l=t.aaSorting=[l]),n&&t.oFeatures.bSortMulti?-1!==(i=P.inArray(e,N(l,"0")))?null===(o=null===(o=r(l[i],!0))&&1===l.length?0:o)?l.splice(i,1):(l[i][1]=s[o],l[i]._idx=o):(l.push([e,s[0],0]),l[l.length-1]._idx=0):l.length&&l[0][0]==e?(o=r(l[0]),l.length=1,l[0][1]=s[o],l[0]._idx=o):(l.length=0,l.push([e,s[0]]),l[0]._idx=0),u(t),"function"==typeof a&&a(t)}function ue(e,t,n,a){var r=e.aoColumns[n];me(t,{},function(t){!1!==r.bSortable&&(e.oFeatures.bProcessing?(D(e,!0),setTimeout(function(){se(e,n,t.shiftKey,a),"ssp"!==E(e)&&D(e,!1)},0)):se(e,n,t.shiftKey,a))})}function ce(t){var e,n,a,r=t.aLastSort,o=t.oClasses.sSortColumn,i=I(t),l=t.oFeatures;if(l.bSort&&l.bSortClasses){for(e=0,n=r.length;e<n;e++)a=r[e].src,P(N(t.aoData,"anCells",a)).removeClass(o+(e<2?e+1:3));for(e=0,n=i.length;e<n;e++)a=i[e].src,P(N(t.aoData,"anCells",a)).addClass(o+(e<2?e+1:3))}t.aLastSort=i}function fe(t,e){for(var n,a,r,o=t.aoColumns[e],i=w.ext.order[o.sSortDataType],l=(i&&(n=i.call(t.oInstance,t,e,ot(t,e))),w.ext.type.order[o.sType+"-pre"]),s=0,u=t.aoData.length;s<u;s++)(a=t.aoData[s])._aSortData||(a._aSortData=[]),a._aSortData[e]&&!i||(r=i?n[s]:S(t,s,e,"sort"),a._aSortData[e]=l?l(r):r)}function de(n){var t;n._bLoadingState||(t={time:+new Date,start:n._iDisplayStart,length:n._iDisplayLength,order:P.extend(!0,[],n.aaSorting),search:Et(n.oPreviousSearch),columns:P.map(n.aoColumns,function(t,e){return{visible:t.bVisible,search:Et(n.aoPreSearchCols[e])}})},n.oSavedState=t,R(n,"aoStateSaveParams","stateSaveParams",[n,t]),n.oFeatures.bStateSave&&!n.bDestroying&&n.fnStateSaveCallback.call(n.oInstance,n,t))}function he(e,t,n){var a;if(e.oFeatures.bStateSave)return(a=e.fnStateLoadCallback.call(e.oInstance,e,function(t){pe(e,t,n)}))!==H&&pe(e,a,n),!0;n()}function pe(n,t,e){var a,r,o=n.aoColumns,i=(n._bLoadingState=!0,n._bInitComplete?new w.Api(n):null);if(t&&t.time){var l=R(n,"aoStateLoadParams","stateLoadParams",[n,t]);if(-1!==P.inArray(!1,l))n._bLoadingState=!1;else{l=n.iStateDuration;if(0<l&&t.time<+new Date-1e3*l)n._bLoadingState=!1;else if(t.columns&&o.length!==t.columns.length)n._bLoadingState=!1;else{if(n.oLoadedState=P.extend(!0,{},t),t.length!==H&&(i?i.page.len(t.length):n._iDisplayLength=t.length),t.start!==H&&(null===i?(n._iDisplayStart=t.start,n.iInitDisplayStart=t.start):Yt(n,t.start/n._iDisplayLength)),t.order!==H&&(n.aaSorting=[],P.each(t.order,function(t,e){n.aaSorting.push(e[0]>=o.length?[0,e[1]]:e)})),t.search!==H&&P.extend(n.oPreviousSearch,Bt(t.search)),t.columns){for(a=0,r=t.columns.length;a<r;a++){var s=t.columns[a];s.visible!==H&&(i?i.column(a).visible(s.visible,!1):o[a].bVisible=s.visible),s.search!==H&&P.extend(n.aoPreSearchCols[a],Bt(s.search))}i&&i.columns.adjust()}n._bLoadingState=!1,R(n,"aoStateLoaded","stateLoaded",[n,t])}}}else n._bLoadingState=!1;e()}function ge(t){var e=w.settings,t=P.inArray(t,N(e,"nTable"));return-1!==t?e[t]:null}function W(t,e,n,a){if(n="DataTables warning: "+(t?"table id="+t.sTableId+" - ":"")+n,a&&(n+=". For more information about this error, please see http://datatables.net/tn/"+a),e)j.console&&console.log&&console.log(n);else{e=w.ext,e=e.sErrMode||e.errMode;if(t&&R(t,null,"error",[t,a,n]),"alert"==e)alert(n);else{if("throw"==e)throw new Error(n);"function"==typeof e&&e(t,a,n)}}}function F(n,a,t,e){Array.isArray(t)?P.each(t,function(t,e){Array.isArray(e)?F(n,a,e[0],e[1]):F(n,a,e)}):(e===H&&(e=t),a[t]!==H&&(n[e]=a[t]))}function be(t,e,n){var a,r;for(r in e)e.hasOwnProperty(r)&&(a=e[r],P.isPlainObject(a)?(P.isPlainObject(t[r])||(t[r]={}),P.extend(!0,t[r],a)):n&&"data"!==r&&"aaData"!==r&&Array.isArray(a)?t[r]=a.slice():t[r]=a);return t}function me(e,t,n){P(e).on("click.DT",t,function(t){P(e).trigger("blur"),n(t)}).on("keypress.DT",t,function(t){13===t.which&&(t.preventDefault(),n(t))}).on("selectstart.DT",function(){return!1})}function L(t,e,n,a){n&&t[e].push({fn:n,sName:a})}function R(n,t,e,a){var r=[];return t&&(r=P.map(n[t].slice().reverse(),function(t,e){return t.fn.apply(n.oInstance,a)})),null!==e&&(t=P.Event(e+".dt"),(e=P(n.nTable)).trigger(t,a),0===e.parents("body").length&&P("body").trigger(t,a),r.push(t.result)),r}function Se(t){var e=t._iDisplayStart,n=t.fnDisplayEnd(),a=t._iDisplayLength;n<=e&&(e=n-a),e-=e%a,t._iDisplayStart=e=-1===a||e<0?0:e}function ve(t,e){var t=t.renderer,n=w.ext.renderer[e];return P.isPlainObject(t)&&t[e]?n[t[e]]||n._:"string"==typeof t&&n[t]||n._}function E(t){return t.oFeatures.bServerSide?"ssp":t.ajax||t.sAjaxSource?"ajax":"dom"}function ye(t,n){var a;return Array.isArray(t)?P.map(t,function(t){return ye(t,n)}):"number"==typeof t?[n[t]]:(a=P.map(n,function(t,e){return t.nTable}),P(a).filter(t).map(function(t){var e=P.inArray(this,a);return n[e]}).toArray())}function De(r,o,t){var e,n;t&&(e=new B(r)).one("draw",function(){t(e.ajax.json())}),"ssp"==E(r)?u(r,o):(D(r,!0),(n=r.jqXHR)&&4!==n.readyState&&n.abort(),Tt(r,[],function(t){pt(r);for(var e=Ft(r,t),n=0,a=e.length;n<a;n++)x(r,e[n]);u(r,o),D(r,!1)}))}function _e(t,e,n,a,r){for(var o,i,l,s,u=[],c=typeof e,f=0,d=(e=e&&"string"!=c&&"function"!=c&&e.length!==H?e:[e]).length;f<d;f++)for(l=0,s=(i=e[f]&&e[f].split&&!e[f].match(/[\[\(:]/)?e[f].split(","):[e[f]]).length;l<s;l++)(o=n("string"==typeof i[l]?i[l].trim():i[l]))&&o.length&&(u=u.concat(o));var h=p.selector[t];if(h.length)for(f=0,d=h.length;f<d;f++)u=h[f](a,r,u);return z(u)}function we(t){return(t=t||{}).filter&&t.search===H&&(t.search=t.filter),P.extend({search:"none",order:"current",page:"all"},t)}function Ce(t){for(var e=0,n=t.length;e<n;e++)if(0<t[e].length)return t[0]=t[e],t[0].length=1,t.length=1,t.context=[t.context[e]],t;return t.length=0,t}function Te(o,t,e,n){function i(t,e){var n;if(Array.isArray(t)||t instanceof P)for(var a=0,r=t.length;a<r;a++)i(t[a],e);else t.nodeName&&"tr"===t.nodeName.toLowerCase()?l.push(t):(n=P("<tr><td></td></tr>").addClass(e),P("td",n).addClass(e).html(t)[0].colSpan=T(o),l.push(n[0]))}var l=[];i(e,n),t._details&&t._details.detach(),t._details=P(l),t._detailsShow&&t._details.insertAfter(t.nTr)}function xe(t,e){var n=t.context;if(n.length&&t.length){var a=n[0].aoData[t[0]];if(a._details){(a._detailsShow=e)?(a._details.insertAfter(a.nTr),P(a.nTr).addClass("dt-hasChild")):(a._details.detach(),P(a.nTr).removeClass("dt-hasChild")),R(n[0],null,"childRow",[e,t.row(t[0])]);var s=n[0],r=new B(s),a=".dt.DT_details",e="draw"+a,t="column-sizing"+a,a="destroy"+a,u=s.aoData;if(r.off(e+" "+t+" "+a),N(u,"_details").length>0){r.on(e,function(t,e){if(s!==e)return;r.rows({page:"current"}).eq(0).each(function(t){var e=u[t];if(e._detailsShow)e._details.insertAfter(e.nTr)})});r.on(t,function(t,e,n,a){if(s!==e)return;var r,o=T(e);for(var i=0,l=u.length;i<l;i++){r=u[i];if(r._details)r._details.children("td[colspan]").attr("colspan",o)}});r.on(a,function(t,e){if(s!==e)return;for(var n=0,a=u.length;n<a;n++)if(u[n]._details)Re(r,n)})}Le(n)}}}function Ae(t,e,n,a,r){for(var o=[],i=0,l=r.length;i<l;i++)o.push(S(t,r[i],e));return o}var Ie=[],o=Array.prototype,B=function(t,e){if(!(this instanceof B))return new B(t,e);function n(t){var e,n,a,r;t=t,a=w.settings,r=P.map(a,function(t,e){return t.nTable}),(t=t?t.nTable&&t.oApi?[t]:t.nodeName&&"table"===t.nodeName.toLowerCase()?-1!==(e=P.inArray(t,r))?[a[e]]:null:t&&"function"==typeof t.settings?t.settings().toArray():("string"==typeof t?n=P(t):t instanceof P&&(n=t),n?n.map(function(t){return-1!==(e=P.inArray(this,r))?a[e]:null}).toArray():void 0):[])&&o.push.apply(o,t)}var o=[];if(Array.isArray(t))for(var a=0,r=t.length;a<r;a++)n(t[a]);else n(t);this.context=z(o),e&&P.merge(this,e),this.selector={rows:null,cols:null,opts:null},B.extend(this,this,Ie)},Fe=(w.Api=B,P.extend(B.prototype,{any:function(){return 0!==this.count()},concat:o.concat,context:[],count:function(){return this.flatten().length},each:function(t){for(var e=0,n=this.length;e<n;e++)t.call(this,this[e],e,this);return this},eq:function(t){var e=this.context;return e.length>t?new B(e[t],this[t]):null},filter:function(t){var e=[];if(o.filter)e=o.filter.call(this,t,this);else for(var n=0,a=this.length;n<a;n++)t.call(this,this[n],n,this)&&e.push(this[n]);return new B(this.context,e)},flatten:function(){var t=[];return new B(this.context,t.concat.apply(t,this.toArray()))},join:o.join,indexOf:o.indexOf||function(t,e){for(var n=e||0,a=this.length;n<a;n++)if(this[n]===t)return n;return-1},iterator:function(t,e,n,a){var r,o,i,l,s,u,c,f,d=[],h=this.context,p=this.selector;for("string"==typeof t&&(a=n,n=e,e=t,t=!1),o=0,i=h.length;o<i;o++){var g=new B(h[o]);if("table"===e)(r=n.call(g,h[o],o))!==H&&d.push(r);else if("columns"===e||"rows"===e)(r=n.call(g,h[o],this[o],o))!==H&&d.push(r);else if("column"===e||"column-rows"===e||"row"===e||"cell"===e)for(c=this[o],"column-rows"===e&&(u=Fe(h[o],p.opts)),l=0,s=c.length;l<s;l++)f=c[l],(r="cell"===e?n.call(g,h[o],f.row,f.column,o,l):n.call(g,h[o],f,o,l,u))!==H&&d.push(r)}return d.length||a?((t=(a=new B(h,t?d.concat.apply([],d):d)).selector).rows=p.rows,t.cols=p.cols,t.opts=p.opts,a):this},lastIndexOf:o.lastIndexOf||function(t,e){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(t){var e=[];if(o.map)e=o.map.call(this,t,this);else for(var n=0,a=this.length;n<a;n++)e.push(t.call(this,this[n],n));return new B(this.context,e)},pluck:function(t){var e=w.util.get(t);return this.map(function(t){return e(t)})},pop:o.pop,push:o.push,reduce:o.reduce||function(t,e){return et(this,t,e,0,this.length,1)},reduceRight:o.reduceRight||function(t,e){return et(this,t,e,this.length-1,-1,-1)},reverse:o.reverse,selector:null,shift:o.shift,slice:function(){return new B(this.context,this)},sort:o.sort,splice:o.splice,toArray:function(){return o.slice.call(this)},to$:function(){return P(this)},toJQuery:function(){return P(this)},unique:function(){return new B(this.context,z(this))},unshift:o.unshift}),B.extend=function(t,e,n){if(n.length&&e&&(e instanceof B||e.__dt_wrapper))for(var a,r=0,o=n.length;r<o;r++)e[(a=n[r]).name]="function"===a.type?function(e,n,a){return function(){var t=n.apply(e,arguments);return B.extend(t,t,a.methodExt),t}}(t,a.val,a):"object"===a.type?{}:a.val,e[a.name].__dt_wrapper=!0,B.extend(t,e[a.name],a.propExt)},B.register=e=function(t,e){if(Array.isArray(t))for(var n=0,a=t.length;n<a;n++)B.register(t[n],e);else for(var r=t.split("."),o=Ie,i=0,l=r.length;i<l;i++){var s,u,c=function(t,e){for(var n=0,a=t.length;n<a;n++)if(t[n].name===e)return t[n];return null}(o,u=(s=-1!==r[i].indexOf("()"))?r[i].replace("()",""):r[i]);c||o.push(c={name:u,val:{},methodExt:[],propExt:[],type:"object"}),i===l-1?(c.val=e,c.type="function"==typeof e?"function":P.isPlainObject(e)?"object":"other"):o=s?c.methodExt:c.propExt}},B.registerPlural=t=function(t,e,n){B.register(t,n),B.register(e,function(){var t=n.apply(this,arguments);return t===this?this:t instanceof B?t.length?Array.isArray(t[0])?new B(t.context,t[0]):t[0]:H:t})},e("tables()",function(t){return t!==H&&null!==t?new B(ye(t,this.context)):this}),e("table()",function(t){var t=this.tables(t),e=t.context;return e.length?new B(e[0]):t}),t("tables().nodes()","table().node()",function(){return this.iterator("table",function(t){return t.nTable},1)}),t("tables().body()","table().body()",function(){return this.iterator("table",function(t){return t.nTBody},1)}),t("tables().header()","table().header()",function(){return this.iterator("table",function(t){return t.nTHead},1)}),t("tables().footer()","table().footer()",function(){return this.iterator("table",function(t){return t.nTFoot},1)}),t("tables().containers()","table().container()",function(){return this.iterator("table",function(t){return t.nTableWrapper},1)}),e("draw()",function(e){return this.iterator("table",function(t){"page"===e?v(t):u(t,!1===(e="string"==typeof e?"full-hold"!==e:e))})}),e("page()",function(e){return e===H?this.page.info().page:this.iterator("table",function(t){Yt(t,e)})}),e("page.info()",function(t){var e,n,a,r,o;return 0===this.context.length?H:(n=(e=this.context[0])._iDisplayStart,a=e.oFeatures.bPaginate?e._iDisplayLength:-1,r=e.fnRecordsDisplay(),{page:(o=-1===a)?0:Math.floor(n/a),pages:o?1:Math.ceil(r/a),start:n,end:e.fnDisplayEnd(),length:a,recordsTotal:e.fnRecordsTotal(),recordsDisplay:r,serverSide:"ssp"===E(e)})}),e("page.len()",function(e){return e===H?0!==this.context.length?this.context[0]._iDisplayLength:H:this.iterator("table",function(t){$t(t,e)})}),e("ajax.json()",function(){var t=this.context;if(0<t.length)return t[0].json}),e("ajax.params()",function(){var t=this.context;if(0<t.length)return t[0].oAjaxData}),e("ajax.reload()",function(e,n){return this.iterator("table",function(t){De(t,!1===n,e)})}),e("ajax.url()",function(e){var t=this.context;return e===H?0===t.length?H:(t=t[0]).ajax?P.isPlainObject(t.ajax)?t.ajax.url:t.ajax:t.sAjaxSource:this.iterator("table",function(t){P.isPlainObject(t.ajax)?t.ajax.url=e:t.ajax=e})}),e("ajax.url().load()",function(e,n){return this.iterator("table",function(t){De(t,!1===n,e)})}),function(t,e){var n,a=[],r=t.aiDisplay,o=t.aiDisplayMaster,i=e.search,l=e.order,e=e.page;if("ssp"==E(t))return"removed"===i?[]:f(0,o.length);if("current"==e)for(u=t._iDisplayStart,c=t.fnDisplayEnd();u<c;u++)a.push(r[u]);else if("current"==l||"applied"==l){if("none"==i)a=o.slice();else if("applied"==i)a=r.slice();else if("removed"==i){for(var s={},u=0,c=r.length;u<c;u++)s[r[u]]=null;a=P.map(o,function(t){return s.hasOwnProperty(t)?null:t})}}else if("index"==l||"original"==l)for(u=0,c=t.aoData.length;u<c;u++)("none"==i||-1===(n=P.inArray(u,r))&&"removed"==i||0<=n&&"applied"==i)&&a.push(u);return a}),Le=(e("rows()",function(e,n){e===H?e="":P.isPlainObject(e)&&(n=e,e=""),n=we(n);var t=this.iterator("table",function(t){return _e("row",e,function(n){var t=d(n),a=r.aoData;if(null!==t&&!o)return[t];if(i=i||Fe(r,o),null!==t&&-1!==P.inArray(t,i))return[t];if(null===n||n===H||""===n)return i;if("function"==typeof n)return P.map(i,function(t){var e=a[t];return n(t,e._aData,e.nTr)?t:null});if(n.nodeName)return t=n._DT_RowIndex,e=n._DT_CellIndex,t!==H?a[t]&&a[t].nTr===n?[t]:[]:e?a[e.row]&&a[e.row].nTr===n.parentNode?[e.row]:[]:(t=P(n).closest("*[data-dt-row]")).length?[t.data("dt-row")]:[];if("string"==typeof n&&"#"===n.charAt(0)){var e=r.aIds[n.replace(/^#/,"")];if(e!==H)return[e.idx]}t=_(m(r.aoData,i,"nTr"));return P(t).filter(n).map(function(){return this._DT_RowIndex}).toArray()},r=t,o=n);var r,o,i},1);return t.selector.rows=e,t.selector.opts=n,t}),e("rows().nodes()",function(){return this.iterator("row",function(t,e){return t.aoData[e].nTr||H},1)}),e("rows().data()",function(){return this.iterator(!0,"rows",function(t,e){return m(t.aoData,e,"_aData")},1)}),t("rows().cache()","row().cache()",function(n){return this.iterator("row",function(t,e){t=t.aoData[e];return"search"===n?t._aFilterData:t._aSortData},1)}),t("rows().invalidate()","row().invalidate()",function(n){return this.iterator("row",function(t,e){bt(t,e,n)})}),t("rows().indexes()","row().index()",function(){return this.iterator("row",function(t,e){return e},1)}),t("rows().ids()","row().id()",function(t){for(var e=[],n=this.context,a=0,r=n.length;a<r;a++)for(var o=0,i=this[a].length;o<i;o++){var l=n[a].rowIdFn(n[a].aoData[this[a][o]]._aData);e.push((!0===t?"#":"")+l)}return new B(n,e)}),t("rows().remove()","row().remove()",function(){var f=this;return this.iterator("row",function(t,e,n){var a,r,o,i,l,s,u=t.aoData,c=u[e];for(u.splice(e,1),a=0,r=u.length;a<r;a++)if(s=(l=u[a]).anCells,null!==l.nTr&&(l.nTr._DT_RowIndex=a),null!==s)for(o=0,i=s.length;o<i;o++)s[o]._DT_CellIndex.row=a;gt(t.aiDisplayMaster,e),gt(t.aiDisplay,e),gt(f[n],e,!1),0<t._iRecordsDisplay&&t._iRecordsDisplay--,Se(t);n=t.rowIdFn(c._aData);n!==H&&delete t.aIds[n]}),this.iterator("table",function(t){for(var e=0,n=t.aoData.length;e<n;e++)t.aoData[e].idx=e}),this}),e("rows.add()",function(o){var t=this.iterator("table",function(t){for(var e,n=[],a=0,r=o.length;a<r;a++)(e=o[a]).nodeName&&"TR"===e.nodeName.toUpperCase()?n.push(ut(t,e)[0]):n.push(x(t,e));return n},1),e=this.rows(-1);return e.pop(),P.merge(e,t),e}),e("row()",function(t,e){return Ce(this.rows(t,e))}),e("row().data()",function(t){var e,n=this.context;return t===H?n.length&&this.length?n[0].aoData[this[0]]._aData:H:((e=n[0].aoData[this[0]])._aData=t,Array.isArray(t)&&e.nTr&&e.nTr.id&&b(n[0].rowId)(t,e.nTr.id),bt(n[0],this[0],"data"),this)}),e("row().node()",function(){var t=this.context;return t.length&&this.length&&t[0].aoData[this[0]].nTr||null}),e("row.add()",function(e){e instanceof P&&e.length&&(e=e[0]);var t=this.iterator("table",function(t){return e.nodeName&&"TR"===e.nodeName.toUpperCase()?ut(t,e)[0]:x(t,e)});return this.row(t[0])}),P(y).on("plugin-init.dt",function(t,e){var n=new B(e),a="on-plugin-init",r="stateSaveParams."+a,o="destroy. "+a,a=(n.on(r,function(t,e,n){for(var a=e.rowIdFn,r=e.aoData,o=[],i=0;i<r.length;i++)r[i]._detailsShow&&o.push("#"+a(r[i]._aData));n.childRows=o}),n.on(o,function(){n.off(r+" "+o)}),n.state.loaded());a&&a.childRows&&n.rows(P.map(a.childRows,function(t){return t.replace(/:/g,"\\:")})).every(function(){R(e,null,"requestChild",[this])})}),w.util.throttle(function(t){de(t[0])},500)),Re=function(t,e){var n=t.context;n.length&&(e=n[0].aoData[e!==H?e:t[0]])&&e._details&&(e._details.remove(),e._detailsShow=H,e._details=H,P(e.nTr).removeClass("dt-hasChild"),Le(n))},Pe="row().child",je=Pe+"()",He=(e(je,function(t,e){var n=this.context;return t===H?n.length&&this.length?n[0].aoData[this[0]]._details:H:(!0===t?this.child.show():!1===t?Re(this):n.length&&this.length&&Te(n[0],n[0].aoData[this[0]],t,e),this)}),e([Pe+".show()",je+".show()"],function(t){return xe(this,!0),this}),e([Pe+".hide()",je+".hide()"],function(){return xe(this,!1),this}),e([Pe+".remove()",je+".remove()"],function(){return Re(this),this}),e(Pe+".isShown()",function(){var t=this.context;return t.length&&this.length&&t[0].aoData[this[0]]._detailsShow||!1}),/^([^:]+):(name|visIdx|visible)$/),Ne=(e("columns()",function(n,a){n===H?n="":P.isPlainObject(n)&&(a=n,n=""),a=we(a);var t=this.iterator("table",function(t){return e=n,l=a,s=(i=t).aoColumns,u=N(s,"sName"),c=N(s,"nTh"),_e("column",e,function(n){var a,t=d(n);if(""===n)return f(s.length);if(null!==t)return[0<=t?t:s.length+t];if("function"==typeof n)return a=Fe(i,l),P.map(s,function(t,e){return n(e,Ae(i,e,0,0,a),c[e])?e:null});var r="string"==typeof n?n.match(He):"";if(r)switch(r[2]){case"visIdx":case"visible":var e,o=parseInt(r[1],10);return o<0?[(e=P.map(s,function(t,e){return t.bVisible?e:null}))[e.length+o]]:[rt(i,o)];case"name":return P.map(u,function(t,e){return t===r[1]?e:null});default:return[]}return n.nodeName&&n._DT_CellIndex?[n._DT_CellIndex.column]:(t=P(c).filter(n).map(function(){return P.inArray(this,c)}).toArray()).length||!n.nodeName?t:(t=P(n).closest("*[data-dt-column]")).length?[t.data("dt-column")]:[]},i,l);var i,e,l,s,u,c},1);return t.selector.cols=n,t.selector.opts=a,t}),t("columns().header()","column().header()",function(t,e){return this.iterator("column",function(t,e){return t.aoColumns[e].nTh},1)}),t("columns().footer()","column().footer()",function(t,e){return this.iterator("column",function(t,e){return t.aoColumns[e].nTf},1)}),t("columns().data()","column().data()",function(){return this.iterator("column-rows",Ae,1)}),t("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(t,e){return t.aoColumns[e].mData},1)}),t("columns().cache()","column().cache()",function(o){return this.iterator("column-rows",function(t,e,n,a,r){return m(t.aoData,r,"search"===o?"_aFilterData":"_aSortData",e)},1)}),t("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(t,e,n,a,r){return m(t.aoData,r,"anCells",e)},1)}),t("columns().visible()","column().visible()",function(f,n){var e=this,t=this.iterator("column",function(t,e){if(f===H)return t.aoColumns[e].bVisible;var n,a,r=e,e=f,o=t.aoColumns,i=o[r],l=t.aoData;if(e===H)i.bVisible;else if(i.bVisible!==e){if(e)for(var s=P.inArray(!0,N(o,"bVisible"),r+1),u=0,c=l.length;u<c;u++)a=l[u].nTr,n=l[u].anCells,a&&a.insertBefore(n[r],n[s]||null);else P(N(t.aoData,"anCells",r)).detach();i.bVisible=e}});return f!==H&&this.iterator("table",function(t){Dt(t,t.aoHeader),Dt(t,t.aoFooter),t.aiDisplay.length||P(t.nTBody).find("td[colspan]").attr("colspan",T(t)),de(t),e.iterator("column",function(t,e){R(t,null,"column-visibility",[t,e,f,n])}),n!==H&&!n||e.columns.adjust()}),t}),t("columns().indexes()","column().index()",function(n){return this.iterator("column",function(t,e){return"visible"===n?ot(t,e):e},1)}),e("columns.adjust()",function(){return this.iterator("table",function(t){O(t)},1)}),e("column.index()",function(t,e){var n;if(0!==this.context.length)return n=this.context[0],"fromVisible"===t||"toData"===t?rt(n,e):"fromData"===t||"toVisible"===t?ot(n,e):void 0}),e("column()",function(t,e){return Ce(this.columns(t,e))}),e("cells()",function(g,t,b){var a,r,o,i,l,s,e;return P.isPlainObject(g)&&(g.row===H?(b=g,g=null):(b=t,t=null)),P.isPlainObject(t)&&(b=t,t=null),null===t||t===H?this.iterator("table",function(t){return a=t,t=g,e=we(b),f=a.aoData,d=Fe(a,e),n=_(m(f,d,"anCells")),h=P(Y([],n)),p=a.aoColumns.length,_e("cell",t,function(t){var e,n="function"==typeof t;if(null===t||t===H||n){for(o=[],i=0,l=d.length;i<l;i++)for(r=d[i],s=0;s<p;s++)u={row:r,column:s},(!n||(c=f[r],t(u,S(a,r,s),c.anCells?c.anCells[s]:null)))&&o.push(u);return o}return P.isPlainObject(t)?t.column!==H&&t.row!==H&&-1!==P.inArray(t.row,d)?[t]:[]:(e=h.filter(t).map(function(t,e){return{row:e._DT_CellIndex.row,column:e._DT_CellIndex.column}}).toArray()).length||!t.nodeName?e:(c=P(t).closest("*[data-dt-row]")).length?[{row:c.data("dt-row"),column:c.data("dt-column")}]:[]},a,e);var a,e,r,o,i,l,s,u,c,f,d,n,h,p}):(e=b?{page:b.page,order:b.order,search:b.search}:{},a=this.columns(t,e),r=this.rows(g,e),e=this.iterator("table",function(t,e){var n=[];for(o=0,i=r[e].length;o<i;o++)for(l=0,s=a[e].length;l<s;l++)n.push({row:r[e][o],column:a[e][l]});return n},1),e=b&&b.selected?this.cells(e,b):e,P.extend(e.selector,{cols:t,rows:g,opts:b}),e)}),t("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(t,e,n){t=t.aoData[e];return t&&t.anCells?t.anCells[n]:H},1)}),e("cells().data()",function(){return this.iterator("cell",function(t,e,n){return S(t,e,n)},1)}),t("cells().cache()","cell().cache()",function(a){return a="search"===a?"_aFilterData":"_aSortData",this.iterator("cell",function(t,e,n){return t.aoData[e][a][n]},1)}),t("cells().render()","cell().render()",function(a){return this.iterator("cell",function(t,e,n){return S(t,e,n,a)},1)}),t("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(t,e,n){return{row:e,column:n,columnVisible:ot(t,n)}},1)}),t("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(t,e,n){bt(t,e,a,n)})}),e("cell()",function(t,e,n){return Ce(this.cells(t,e,n))}),e("cell().data()",function(t){var e=this.context,n=this[0];return t===H?e.length&&n.length?S(e[0],n[0].row,n[0].column):H:(ct(e[0],n[0].row,n[0].column,t),bt(e[0],n[0].row,"data",n[0].column),this)}),e("order()",function(e,t){var n=this.context;return e===H?0!==n.length?n[0].aaSorting:H:("number"==typeof e?e=[[e,t]]:e.length&&!Array.isArray(e[0])&&(e=Array.prototype.slice.call(arguments)),this.iterator("table",function(t){t.aaSorting=e.slice()}))}),e("order.listener()",function(e,n,a){return this.iterator("table",function(t){ue(t,e,n,a)})}),e("order.fixed()",function(e){var t;return e?this.iterator("table",function(t){t.aaSortingFixed=P.extend(!0,{},e)}):(t=(t=this.context).length?t[0].aaSortingFixed:H,Array.isArray(t)?{pre:t}:t)}),e(["columns().order()","column().order()"],function(a){var r=this;return this.iterator("table",function(t,e){var n=[];P.each(r[e],function(t,e){n.push([e,a])}),t.aaSorting=n})}),e("search()",function(e,n,a,r){var t=this.context;return e===H?0!==t.length?t[0].oPreviousSearch.sSearch:H:this.iterator("table",function(t){t.oFeatures.bFilter&&Rt(t,P.extend({},t.oPreviousSearch,{sSearch:e+"",bRegex:null!==n&&n,bSmart:null===a||a,bCaseInsensitive:null===r||r}),1)})}),t("columns().search()","column().search()",function(a,r,o,i){return this.iterator("column",function(t,e){var n=t.aoPreSearchCols;if(a===H)return n[e].sSearch;t.oFeatures.bFilter&&(P.extend(n[e],{sSearch:a+"",bRegex:null!==r&&r,bSmart:null===o||o,bCaseInsensitive:null===i||i}),Rt(t,t.oPreviousSearch,1))})}),e("state()",function(){return this.context.length?this.context[0].oSavedState:null}),e("state.clear()",function(){return this.iterator("table",function(t){t.fnStateSaveCallback.call(t.oInstance,t,{})})}),e("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null}),e("state.save()",function(){return this.iterator("table",function(t){de(t)})}),w.use=function(t,e){"lib"===e||t.fn?P=t:"win"==e||t.document?y=(j=t).document:"datetime"!==e&&"DateTime"!==t.type||(w.DateTime=t)},w.factory=function(t,e){var n=!1;return t&&t.document&&(y=(j=t).document),e&&e.fn&&e.fn.jquery&&(P=e,n=!0),n},w.versionCheck=w.fnVersionCheck=function(t){for(var e,n,a=w.version.split("."),r=t.split("."),o=0,i=r.length;o<i;o++)if((e=parseInt(a[o],10)||0)!==(n=parseInt(r[o],10)||0))return n<e;return!0},w.isDataTable=w.fnIsDataTable=function(t){var r=P(t).get(0),o=!1;return t instanceof w.Api||(P.each(w.settings,function(t,e){var n=e.nScrollHead?P("table",e.nScrollHead)[0]:null,a=e.nScrollFoot?P("table",e.nScrollFoot)[0]:null;e.nTable!==r&&n!==r&&a!==r||(o=!0)}),o)},w.tables=w.fnTables=function(e){var t=!1,n=(P.isPlainObject(e)&&(t=e.api,e=e.visible),P.map(w.settings,function(t){if(!e||P(t.nTable).is(":visible"))return t.nTable}));return t?new B(n):n},w.camelToHungarian=C,e("$()",function(t,e){e=this.rows(e).nodes(),e=P(e);return P([].concat(e.filter(t).toArray(),e.find(t).toArray()))}),P.each(["on","one","off"],function(t,n){e(n+"()",function(){var t=Array.prototype.slice.call(arguments),e=(t[0]=P.map(t[0].split(/\s/),function(t){return t.match(/\.dt\b/)?t:t+".dt"}).join(" "),P(this.tables().nodes()));return e[n].apply(e,t),this})}),e("clear()",function(){return this.iterator("table",function(t){pt(t)})}),e("settings()",function(){return new B(this.context,this.context)}),e("init()",function(){var t=this.context;return t.length?t[0].oInit:null}),e("data()",function(){return this.iterator("table",function(t){return N(t.aoData,"_aData")}).flatten()}),e("destroy()",function(c){return c=c||!1,this.iterator("table",function(e){var n,t=e.oClasses,a=e.nTable,r=e.nTBody,o=e.nTHead,i=e.nTFoot,l=P(a),r=P(r),s=P(e.nTableWrapper),u=P.map(e.aoData,function(t){return t.nTr}),i=(e.bDestroying=!0,R(e,"aoDestroyCallback","destroy",[e]),c||new B(e).columns().visible(!0),s.off(".DT").find(":not(tbody *)").off(".DT"),P(j).off(".DT-"+e.sInstance),a!=o.parentNode&&(l.children("thead").detach(),l.append(o)),i&&a!=i.parentNode&&(l.children("tfoot").detach(),l.append(i)),e.aaSorting=[],e.aaSortingFixed=[],ce(e),P(u).removeClass(e.asStripeClasses.join(" ")),P("th, td",o).removeClass(t.sSortable+" "+t.sSortableAsc+" "+t.sSortableDesc+" "+t.sSortableNone),r.children().detach(),r.append(u),e.nTableWrapper.parentNode),o=c?"remove":"detach",u=(l[o](),s[o](),!c&&i&&(i.insertBefore(a,e.nTableReinsertBefore),l.css("width",e.sDestroyWidth).removeClass(t.sTable),n=e.asDestroyStripes.length)&&r.children().each(function(t){P(this).addClass(e.asDestroyStripes[t%n])}),P.inArray(e,w.settings));-1!==u&&w.settings.splice(u,1)})}),P.each(["column","row","cell"],function(t,s){e(s+"s().every()",function(o){var i=this.selector.opts,l=this;return this.iterator(s,function(t,e,n,a,r){o.call(l[s](e,"cell"===s?n:i,"cell"===s?i:H),e,n,a,r)})})}),e("i18n()",function(t,e,n){var a=this.context[0],t=A(t)(a.oLanguage);return t===H&&(t=e),"string"==typeof(t=n!==H&&P.isPlainObject(t)?t[n]!==H?t[n]:t._:t)?t.replace("%d",n):t}),w.version="1.13.5",w.settings=[],w.models={},w.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0,return:!1},w.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1},w.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null},w.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(t){return t.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(t){try{return JSON.parse((-1===t.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+t.sInstance+"_"+location.pathname))}catch(t){return{}}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(t,e){try{(-1===t.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+t.sInstance+"_"+location.pathname,JSON.stringify(e))}catch(t){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:P.extend({},w.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"},i(w.defaults),w.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null},i(w.defaults.column),w.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,jqXHR:null,json:H,oAjaxData:H,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==E(this)?+this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==E(this)?+this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var t=this._iDisplayLength,e=this._iDisplayStart,n=e+t,a=this.aiDisplay.length,r=this.oFeatures,o=r.bPaginate;return r.bServerSide?!1===o||-1===t?e+a:Math.min(e+t,this._iRecordsDisplay):!o||a<n||-1===t?a:n},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null},w.ext=p={buttons:{},classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:w.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:w.version},P.extend(p,{afnFiltering:p.search,aTypes:p.type.detect,ofnSearch:p.type.search,oSort:p.type.order,afnSortData:p.order,aoFeatures:p.feature,oApi:p.internal,oStdClasses:p.classes,oPagination:p.pager}),P.extend(w.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_desc_disabled",sSortableDesc:"sorting_asc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""}),w.ext.pager);function Oe(t,e){var n=[],a=Ne.numbers_length,r=Math.floor(a/2);return e<=a?n=f(0,e):t<=r?((n=f(0,a-2)).push("ellipsis"),n.push(e-1)):((e-1-r<=t?n=f(e-(a-2),e):((n=f(t-r+2,t+r-1)).push("ellipsis"),n.push(e-1),n)).splice(0,0,"ellipsis"),n.splice(0,0,0)),n.DT_el="span",n}P.extend(Ne,{simple:function(t,e){return["previous","next"]},full:function(t,e){return["first","previous","next","last"]},numbers:function(t,e){return[Oe(t,e)]},simple_numbers:function(t,e){return["previous",Oe(t,e),"next"]},full_numbers:function(t,e){return["first","previous",Oe(t,e),"next","last"]},first_last_numbers:function(t,e){return["first",Oe(t,e),"last"]},_numbers:Oe,numbers_length:7}),P.extend(!0,w.ext.renderer,{pageButton:{_:function(c,t,f,e,d,h){function p(t,e){for(var n,a,r,o=m.sPageButtonDisabled,i=function(t){Yt(c,t.data.action,!0)},l=0,s=e.length;l<s;l++)if(n=e[l],Array.isArray(n)){var u=P("<"+(n.DT_el||"div")+"/>").appendTo(t);p(u,n)}else{switch(g=null,b=n,a=c.iTabIndex,n){case"ellipsis":t.append('<span class="ellipsis">&#x2026;</span>');break;case"first":g=S.sFirst,0===d&&(a=-1,b+=" "+o);break;case"previous":g=S.sPrevious,0===d&&(a=-1,b+=" "+o);break;case"next":g=S.sNext,0!==h&&d!==h-1||(a=-1,b+=" "+o);break;case"last":g=S.sLast,0!==h&&d!==h-1||(a=-1,b+=" "+o);break;default:g=c.fnFormatNumber(n+1),b=d===n?m.sPageButtonActive:""}null!==g&&(u=c.oInit.pagingTag||"a",r=-1!==b.indexOf(o),me(P("<"+u+">",{class:m.sPageButton+" "+b,"aria-controls":c.sTableId,"aria-disabled":r?"true":null,"aria-label":v[n],role:"link","aria-current":b===m.sPageButtonActive?"page":null,"data-dt-idx":n,tabindex:a,id:0===f&&"string"==typeof n?c.sTableId+"_"+n:null}).html(g).appendTo(t),{action:n},i))}}var g,b,n,m=c.oClasses,S=c.oLanguage.oPaginate,v=c.oLanguage.oAria.paginate||{};try{n=P(t).find(y.activeElement).data("dt-idx")}catch(t){}p(P(t).empty(),e),n!==H&&P(t).find("[data-dt-idx="+n+"]").trigger("focus")}}}),P.extend(w.ext.type.detect,[function(t,e){e=e.oLanguage.sDecimal;return l(t,e)?"num"+e:null},function(t,e){var n;return(!t||t instanceof Date||X.test(t))&&(null!==(n=Date.parse(t))&&!isNaN(n)||h(t))?"date":null},function(t,e){e=e.oLanguage.sDecimal;return l(t,e,!0)?"num-fmt"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e)?"html-num"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e,!0)?"html-num-fmt"+e:null},function(t,e){return h(t)||"string"==typeof t&&-1!==t.indexOf("<")?"html":null}]),P.extend(w.ext.type.search,{html:function(t){return h(t)?t:"string"==typeof t?t.replace(U," ").replace(V,""):""},string:function(t){return!h(t)&&"string"==typeof t?t.replace(U," "):t}});function ke(t,e,n,a){var r;return 0===t||t&&"-"!==t?"number"==(r=typeof t)||"bigint"==r?t:+(t=(t=e?$(t,e):t).replace&&(n&&(t=t.replace(n,"")),a)?t.replace(a,""):t):-1/0}function Me(n){P.each({num:function(t){return ke(t,n)},"num-fmt":function(t){return ke(t,n,q)},"html-num":function(t){return ke(t,n,V)},"html-num-fmt":function(t){return ke(t,n,V,q)}},function(t,e){p.type.order[t+n+"-pre"]=e,t.match(/^html\-/)&&(p.type.search[t+n]=p.type.search.html)})}P.extend(p.type.order,{"date-pre":function(t){t=Date.parse(t);return isNaN(t)?-1/0:t},"html-pre":function(t){return h(t)?"":t.replace?t.replace(/<.*?>/g,"").toLowerCase():t+""},"string-pre":function(t){return h(t)?"":"string"==typeof t?t.toLowerCase():t.toString?t.toString():""},"string-asc":function(t,e){return t<e?-1:e<t?1:0},"string-desc":function(t,e){return t<e?1:e<t?-1:0}}),Me(""),P.extend(!0,w.ext.renderer,{header:{_:function(r,o,i,l){P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass))})},jqueryui:function(r,o,i,l){P("<div/>").addClass(l.sSortJUIWrapper).append(o.contents()).append(P("<span/>").addClass(l.sSortIcon+" "+i.sSortingClassJUI)).appendTo(o),P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass),o.find("span."+l.sSortIcon).removeClass(l.sSortJUIAsc+" "+l.sSortJUIDesc+" "+l.sSortJUI+" "+l.sSortJUIAscAllowed+" "+l.sSortJUIDescAllowed).addClass("asc"==a[e]?l.sSortJUIAsc:"desc"==a[e]?l.sSortJUIDesc:i.sSortingClassJUI))})}}});function We(t){return"string"==typeof(t=Array.isArray(t)?t.join(","):t)?t.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):t}function Ee(t,e,n,a,r){return j.moment?t[e](r):j.luxon?t[n](r):a?t[a](r):t}var Be=!1;function Ue(t,e,n){var a;if(j.moment){if(!(a=j.moment.utc(t,e,n,!0)).isValid())return null}else if(j.luxon){if(!(a=e&&"string"==typeof t?j.luxon.DateTime.fromFormat(t,e):j.luxon.DateTime.fromISO(t)).isValid)return null;a.setLocale(n)}else e?(Be||alert("DataTables warning: Formatted date without Moment.js or Luxon - https://datatables.net/tn/17"),Be=!0):a=new Date(t);return a}function Ve(s){return function(a,r,o,i){0===arguments.length?(o="en",a=r=null):1===arguments.length?(o="en",r=a,a=null):2===arguments.length&&(o=r,r=a,a=null);var l="datetime-"+r;return w.ext.type.order[l]||(w.ext.type.detect.unshift(function(t){return t===l&&l}),w.ext.type.order[l+"-asc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:t<e?-1:1},w.ext.type.order[l+"-desc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:e<t?-1:1}),function(t,e){var n;return null!==t&&t!==H||(t="--now"===i?(n=new Date,new Date(Date.UTC(n.getFullYear(),n.getMonth(),n.getDate(),n.getHours(),n.getMinutes(),n.getSeconds()))):""),"type"===e?l:""===t?"sort"!==e?"":Ue("0000-01-01 00:00:00",null,o):!(null===r||a!==r||"sort"===e||"type"===e||t instanceof Date)||null===(n=Ue(t,a,o))?t:"sort"===e?n:(t=null===r?Ee(n,"toDate","toJSDate","")[s]():Ee(n,"format","toFormat","toISOString",r),"display"===e?We(t):t)}}}var Xe=",",Je=".";if(j.Intl!==H)try{for(var qe=(new Intl.NumberFormat).formatToParts(100000.1),n=0;n<qe.length;n++)"group"===qe[n].type?Xe=qe[n].value:"decimal"===qe[n].type&&(Je=qe[n].value)}catch(t){}function $e(e){return function(){var t=[ge(this[w.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return w.ext.internal[e].apply(this,t)}}return w.datetime=function(n,a){var r="datetime-detect-"+n;a=a||"en",w.ext.type.order[r]||(w.ext.type.detect.unshift(function(t){var e=Ue(t,n,a);return!(""!==t&&!e)&&r}),w.ext.type.order[r+"-pre"]=function(t){return Ue(t,n,a)||0})},w.render={date:Ve("toLocaleDateString"),datetime:Ve("toLocaleString"),time:Ve("toLocaleTimeString"),number:function(a,r,o,i,l){return null!==a&&a!==H||(a=Xe),null!==r&&r!==H||(r=Je),{display:function(t){if("number"!=typeof t&&"string"!=typeof t)return t;if(""===t||null===t)return t;var e=t<0?"-":"",n=parseFloat(t);if(isNaN(n))return We(t);n=n.toFixed(o),t=Math.abs(n);n=parseInt(t,10),t=o?r+(t-n).toFixed(o).substring(2):"";return(e=0===n&&0===parseFloat(t)?"":e)+(i||"")+n.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+t+(l||"")}}},text:function(){return{display:We,filter:We}}},P.extend(w.ext.internal,{_fnExternApiFunc:$e,_fnBuildAjax:Tt,_fnAjaxUpdate:xt,_fnAjaxParameters:At,_fnAjaxUpdateDraw:It,_fnAjaxDataSrc:Ft,_fnAddColumn:nt,_fnColumnOptions:at,_fnAdjustColumnSizing:O,_fnVisibleToColumnIndex:rt,_fnColumnIndexToVisible:ot,_fnVisbleColumns:T,_fnGetColumns:it,_fnColumnTypes:lt,_fnApplyColumnDefs:st,_fnHungarianMap:i,_fnCamelToHungarian:C,_fnLanguageCompat:Z,_fnBrowserDetect:tt,_fnAddData:x,_fnAddTr:ut,_fnNodeToDataIndex:function(t,e){return e._DT_RowIndex!==H?e._DT_RowIndex:null},_fnNodeToColumnIndex:function(t,e,n){return P.inArray(n,t.aoData[e].anCells)},_fnGetCellData:S,_fnSetCellData:ct,_fnSplitObjNotation:dt,_fnGetObjectDataFn:A,_fnSetObjectDataFn:b,_fnGetDataMaster:ht,_fnClearTable:pt,_fnDeleteIndex:gt,_fnInvalidate:bt,_fnGetRowElements:mt,_fnCreateTr:St,_fnBuildHead:yt,_fnDrawHead:Dt,_fnDraw:v,_fnReDraw:u,_fnAddOptionsHtml:_t,_fnDetectHeader:wt,_fnGetUniqueThs:Ct,_fnFeatureHtmlFilter:Lt,_fnFilterComplete:Rt,_fnFilterCustom:Pt,_fnFilterColumn:jt,_fnFilter:Ht,_fnFilterCreateSearch:Nt,_fnEscapeRegex:Ot,_fnFilterData:Wt,_fnFeatureHtmlInfo:Ut,_fnUpdateInfo:Vt,_fnInfoMacros:Xt,_fnInitialise:Jt,_fnInitComplete:qt,_fnLengthChange:$t,_fnFeatureHtmlLength:Gt,_fnFeatureHtmlPaginate:zt,_fnPageChange:Yt,_fnFeatureHtmlProcessing:Zt,_fnProcessingDisplay:D,_fnFeatureHtmlTable:Kt,_fnScrollDraw:Qt,_fnApplyToChildren:k,_fnCalculateColumnWidths:ee,_fnThrottle:ne,_fnConvertToWidth:ae,_fnGetWidestNode:re,_fnGetMaxLenString:oe,_fnStringToCss:M,_fnSortFlatten:I,_fnSort:ie,_fnSortAria:le,_fnSortListener:se,_fnSortAttachListener:ue,_fnSortingClasses:ce,_fnSortData:fe,_fnSaveState:de,_fnLoadState:he,_fnImplementState:pe,_fnSettingsFromNode:ge,_fnLog:W,_fnMap:F,_fnBindAction:me,_fnCallbackReg:L,_fnCallbackFire:R,_fnLengthOverflow:Se,_fnRenderer:ve,_fnDataSource:E,_fnRowAttributes:vt,_fnExtend:be,_fnCalculateEnd:function(){}}),((P.fn.dataTable=w).$=P).fn.dataTableSettings=w.settings,P.fn.dataTableExt=w.ext,P.fn.DataTable=function(t){return P(this).dataTable(t).api()},P.each(w,function(t,e){P.fn.DataTable[t]=e}),w});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.css b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.css
new file mode 100644
index 0000000000000..6d9f1754cd733
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.css
@@ -0,0 +1 @@
+:root{--dt-row-selected: 13, 110, 253;--dt-row-selected-text: 255, 255, 255;--dt-row-selected-link: 9, 10, 11;--dt-row-stripe: 0, 0, 0;--dt-row-hover: 0, 0, 0;--dt-column-ordering: 0, 0, 0;--dt-html-background: white}:root.dark{--dt-html-background: rgb(33, 37, 41)}table.dataTable td.dt-control{text-align:center;cursor:pointer}table.dataTable td.dt-control:before{display:inline-block;color:rgba(0, 0, 0, 0.5);content:"▶"}table.dataTable tr.dt-hasChild td.dt-control:before{content:"▼"}html.dark table.dataTable td.dt-control:before,:root[data-bs-theme=dark] table.dataTable td.dt-control:before{color:rgba(255, 255, 255, 0.5)}html.dark table.dataTable tr.dt-hasChild td.dt-control:before,:root[data-bs-theme=dark] table.dataTable tr.dt-hasChild td.dt-control:before{color:rgba(255, 255, 255, 0.5)}table.dataTable thead>tr>th.sorting,table.dataTable thead>tr>th.sorting_asc,table.dataTable thead>tr>th.sorting_desc,table.dataTable thead>tr>th.sorting_asc_disabled,table.dataTable thead>tr>th.sorting_desc_disabled,table.dataTable thead>tr>td.sorting,table.dataTable thead>tr>td.sorting_asc,table.dataTable thead>tr>td.sorting_desc,table.dataTable thead>tr>td.sorting_asc_disabled,table.dataTable thead>tr>td.sorting_desc_disabled{cursor:pointer;position:relative;padding-right:26px}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after{position:absolute;display:block;opacity:.125;right:10px;line-height:9px;font-size:.8em}table.dataTable thead>tr>th.sorting:before,table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:before,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>th.sorting_desc_disabled:before,table.dataTable thead>tr>td.sorting:before,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:before,table.dataTable thead>tr>td.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:before{bottom:50%;content:"▲";content:"▲"/""}table.dataTable thead>tr>th.sorting:after,table.dataTable thead>tr>th.sorting_asc:after,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>th.sorting_asc_disabled:after,table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting:after,table.dataTable thead>tr>td.sorting_asc:after,table.dataTable thead>tr>td.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc_disabled:after,table.dataTable thead>tr>td.sorting_desc_disabled:after{top:50%;content:"▼";content:"▼"/""}table.dataTable thead>tr>th.sorting_asc:before,table.dataTable thead>tr>th.sorting_desc:after,table.dataTable thead>tr>td.sorting_asc:before,table.dataTable thead>tr>td.sorting_desc:after{opacity:.6}table.dataTable thead>tr>th.sorting_desc_disabled:after,table.dataTable thead>tr>th.sorting_asc_disabled:before,table.dataTable thead>tr>td.sorting_desc_disabled:after,table.dataTable thead>tr>td.sorting_asc_disabled:before{display:none}table.dataTable thead>tr>th:active,table.dataTable thead>tr>td:active{outline:none}div.dataTables_scrollBody>table.dataTable>thead>tr>th:before,div.dataTables_scrollBody>table.dataTable>thead>tr>th:after,div.dataTables_scrollBody>table.dataTable>thead>tr>td:before,div.dataTables_scrollBody>table.dataTable>thead>tr>td:after{display:none}div.dataTables_processing{position:absolute;top:50%;left:50%;width:200px;margin-left:-100px;margin-top:-26px;text-align:center;padding:2px;z-index:10}div.dataTables_processing>div:last-child{position:relative;width:80px;height:15px;margin:1em auto}div.dataTables_processing>div:last-child>div{position:absolute;top:0;width:13px;height:13px;border-radius:50%;background:rgb(13, 110, 253);background:rgb(var(--dt-row-selected));animation-timing-function:cubic-bezier(0, 1, 1, 0)}div.dataTables_processing>div:last-child>div:nth-child(1){left:8px;animation:datatables-loader-1 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(2){left:8px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(3){left:32px;animation:datatables-loader-2 .6s infinite}div.dataTables_processing>div:last-child>div:nth-child(4){left:56px;animation:datatables-loader-3 .6s infinite}@keyframes datatables-loader-1{0%{transform:scale(0)}100%{transform:scale(1)}}@keyframes datatables-loader-3{0%{transform:scale(1)}100%{transform:scale(0)}}@keyframes datatables-loader-2{0%{transform:translate(0, 0)}100%{transform:translate(24px, 0)}}table.dataTable.nowrap th,table.dataTable.nowrap td{white-space:nowrap}table.dataTable th.dt-left,table.dataTable td.dt-left{text-align:left}table.dataTable th.dt-center,table.dataTable td.dt-center,table.dataTable td.dataTables_empty{text-align:center}table.dataTable th.dt-right,table.dataTable td.dt-right{text-align:right}table.dataTable th.dt-justify,table.dataTable td.dt-justify{text-align:justify}table.dataTable th.dt-nowrap,table.dataTable td.dt-nowrap{white-space:nowrap}table.dataTable thead th,table.dataTable thead td,table.dataTable tfoot th,table.dataTable tfoot td{text-align:left}table.dataTable thead th.dt-head-left,table.dataTable thead td.dt-head-left,table.dataTable tfoot th.dt-head-left,table.dataTable tfoot td.dt-head-left{text-align:left}table.dataTable thead th.dt-head-center,table.dataTable thead td.dt-head-center,table.dataTable tfoot th.dt-head-center,table.dataTable tfoot td.dt-head-center{text-align:center}table.dataTable thead th.dt-head-right,table.dataTable thead td.dt-head-right,table.dataTable tfoot th.dt-head-right,table.dataTable tfoot td.dt-head-right{text-align:right}table.dataTable thead th.dt-head-justify,table.dataTable thead td.dt-head-justify,table.dataTable tfoot th.dt-head-justify,table.dataTable tfoot td.dt-head-justify{text-align:justify}table.dataTable thead th.dt-head-nowrap,table.dataTable thead td.dt-head-nowrap,table.dataTable tfoot th.dt-head-nowrap,table.dataTable tfoot td.dt-head-nowrap{white-space:nowrap}table.dataTable tbody th.dt-body-left,table.dataTable tbody td.dt-body-left{text-align:left}table.dataTable tbody th.dt-body-center,table.dataTable tbody td.dt-body-center{text-align:center}table.dataTable tbody th.dt-body-right,table.dataTable tbody td.dt-body-right{text-align:right}table.dataTable tbody th.dt-body-justify,table.dataTable tbody td.dt-body-justify{text-align:justify}table.dataTable tbody th.dt-body-nowrap,table.dataTable tbody td.dt-body-nowrap{white-space:nowrap}table.dataTable{width:100%;margin:0 auto;clear:both;border-collapse:separate;border-spacing:0}table.dataTable thead th,table.dataTable tfoot th{font-weight:bold}table.dataTable>thead>tr>th,table.dataTable>thead>tr>td{padding:10px;border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable>thead>tr>th:active,table.dataTable>thead>tr>td:active{outline:none}table.dataTable>tfoot>tr>th,table.dataTable>tfoot>tr>td{padding:10px 10px 6px 10px;border-top:1px solid rgba(0, 0, 0, 0.3)}table.dataTable tbody tr{background-color:transparent}table.dataTable tbody tr.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.9);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.9);color:rgb(255, 255, 255);color:rgb(var(--dt-row-selected-text))}table.dataTable tbody tr.selected a{color:rgb(9, 10, 11);color:rgb(var(--dt-row-selected-link))}table.dataTable tbody th,table.dataTable tbody td{padding:8px 10px}table.dataTable.row-border>tbody>tr>th,table.dataTable.row-border>tbody>tr>td,table.dataTable.display>tbody>tr>th,table.dataTable.display>tbody>tr>td{border-top:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.row-border>tbody>tr:first-child>th,table.dataTable.row-border>tbody>tr:first-child>td,table.dataTable.display>tbody>tr:first-child>th,table.dataTable.display>tbody>tr:first-child>td{border-top:none}table.dataTable.row-border>tbody>tr.selected+tr.selected>td,table.dataTable.display>tbody>tr.selected+tr.selected>td{border-top-color:rgba(13, 110, 253, 0.65);border-top-color:rgba(var(--dt-row-selected), 0.65)}table.dataTable.cell-border>tbody>tr>th,table.dataTable.cell-border>tbody>tr>td{border-top:1px solid rgba(0, 0, 0, 0.15);border-right:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border>tbody>tr>th:first-child,table.dataTable.cell-border>tbody>tr>td:first-child{border-left:1px solid rgba(0, 0, 0, 0.15)}table.dataTable.cell-border>tbody>tr:first-child>th,table.dataTable.cell-border>tbody>tr:first-child>td{border-top:none}table.dataTable.stripe>tbody>tr.odd>*,table.dataTable.display>tbody>tr.odd>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.023);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-stripe), 0.023)}table.dataTable.stripe>tbody>tr.odd.selected>*,table.dataTable.display>tbody>tr.odd.selected>*{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.923);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.923)}table.dataTable.hover>tbody>tr:hover>*,table.dataTable.display>tbody>tr:hover>*{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.035);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.035)}table.dataTable.hover>tbody>tr.selected:hover>*,table.dataTable.display>tbody>tr.selected:hover>*{box-shadow:inset 0 0 0 9999px #0d6efd !important;box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 1) !important}table.dataTable.order-column>tbody tr>.sorting_1,table.dataTable.order-column>tbody tr>.sorting_2,table.dataTable.order-column>tbody tr>.sorting_3,table.dataTable.display>tbody tr>.sorting_1,table.dataTable.display>tbody tr>.sorting_2,table.dataTable.display>tbody tr>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.019)}table.dataTable.order-column>tbody tr.selected>.sorting_1,table.dataTable.order-column>tbody tr.selected>.sorting_2,table.dataTable.order-column>tbody tr.selected>.sorting_3,table.dataTable.display>tbody tr.selected>.sorting_1,table.dataTable.display>tbody tr.selected>.sorting_2,table.dataTable.display>tbody tr.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.919)}table.dataTable.display>tbody>tr.odd>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.054);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.054)}table.dataTable.display>tbody>tr.odd>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.047);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.047)}table.dataTable.display>tbody>tr.odd>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.039);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.039)}table.dataTable.display>tbody>tr.odd.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.954);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.954)}table.dataTable.display>tbody>tr.odd.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.947);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.947)}table.dataTable.display>tbody>tr.odd.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.odd.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.939);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.939)}table.dataTable.display>tbody>tr.even>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.019);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.019)}table.dataTable.display>tbody>tr.even>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.011);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.011)}table.dataTable.display>tbody>tr.even>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.003);box-shadow:inset 0 0 0 9999px rgba(var(--dt-column-ordering), 0.003)}table.dataTable.display>tbody>tr.even.selected>.sorting_1,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.919);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.919)}table.dataTable.display>tbody>tr.even.selected>.sorting_2,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.911);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.911)}table.dataTable.display>tbody>tr.even.selected>.sorting_3,table.dataTable.order-column.stripe>tbody>tr.even.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.903);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.903)}table.dataTable.display tbody tr:hover>.sorting_1,table.dataTable.order-column.hover tbody tr:hover>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.082);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.082)}table.dataTable.display tbody tr:hover>.sorting_2,table.dataTable.order-column.hover tbody tr:hover>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.074);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.074)}table.dataTable.display tbody tr:hover>.sorting_3,table.dataTable.order-column.hover tbody tr:hover>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(0, 0, 0, 0.062);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-hover), 0.062)}table.dataTable.display tbody tr:hover.selected>.sorting_1,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_1{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.982);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.982)}table.dataTable.display tbody tr:hover.selected>.sorting_2,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_2{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.974);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.974)}table.dataTable.display tbody tr:hover.selected>.sorting_3,table.dataTable.order-column.hover tbody tr:hover.selected>.sorting_3{box-shadow:inset 0 0 0 9999px rgba(13, 110, 253, 0.962);box-shadow:inset 0 0 0 9999px rgba(var(--dt-row-selected), 0.962)}table.dataTable.no-footer{border-bottom:1px solid rgba(0, 0, 0, 0.3)}table.dataTable.compact thead th,table.dataTable.compact thead td,table.dataTable.compact tfoot th,table.dataTable.compact tfoot td,table.dataTable.compact tbody th,table.dataTable.compact tbody td{padding:4px}table.dataTable th,table.dataTable td{box-sizing:content-box}.dataTables_wrapper{position:relative;clear:both}.dataTables_wrapper .dataTables_length{float:left}.dataTables_wrapper .dataTables_length select{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;color:inherit;padding:4px}.dataTables_wrapper .dataTables_filter{float:right;text-align:right}.dataTables_wrapper .dataTables_filter input{border:1px solid #aaa;border-radius:3px;padding:5px;background-color:transparent;color:inherit;margin-left:3px}.dataTables_wrapper .dataTables_info{clear:both;float:left;padding-top:.755em}.dataTables_wrapper .dataTables_paginate{float:right;text-align:right;padding-top:.25em}.dataTables_wrapper .dataTables_paginate .paginate_button{box-sizing:border-box;display:inline-block;min-width:1.5em;padding:.5em 1em;margin-left:2px;text-align:center;text-decoration:none !important;cursor:pointer;color:inherit !important;border:1px solid transparent;border-radius:2px;background:transparent}.dataTables_wrapper .dataTables_paginate .paginate_button.current,.dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{color:inherit !important;border:1px solid rgba(0, 0, 0, 0.3);background-color:rgba(0, 0, 0, 0.05);background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, rgba(230, 230, 230, 0.05)), color-stop(100%, rgba(0, 0, 0, 0.05)));background:-webkit-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-moz-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-ms-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:-o-linear-gradient(top, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%);background:linear-gradient(to bottom, rgba(230, 230, 230, 0.05) 0%, rgba(0, 0, 0, 0.05) 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button.disabled,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,.dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{cursor:default;color:#666 !important;border:1px solid transparent;background:transparent;box-shadow:none}.dataTables_wrapper .dataTables_paginate .paginate_button:hover{color:white !important;border:1px solid #111;background-color:#111;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #585858), color-stop(100%, #111));background:-webkit-linear-gradient(top, #585858 0%, #111 100%);background:-moz-linear-gradient(top, #585858 0%, #111 100%);background:-ms-linear-gradient(top, #585858 0%, #111 100%);background:-o-linear-gradient(top, #585858 0%, #111 100%);background:linear-gradient(to bottom, #585858 0%, #111 100%)}.dataTables_wrapper .dataTables_paginate .paginate_button:active{outline:none;background-color:#0c0c0c;background:-webkit-gradient(linear, left top, left bottom, color-stop(0%, #2b2b2b), color-stop(100%, #0c0c0c));background:-webkit-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-moz-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-ms-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:-o-linear-gradient(top, #2b2b2b 0%, #0c0c0c 100%);background:linear-gradient(to bottom, #2b2b2b 0%, #0c0c0c 100%);box-shadow:inset 0 0 3px #111}.dataTables_wrapper .dataTables_paginate .ellipsis{padding:0 1em}.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter,.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_processing,.dataTables_wrapper .dataTables_paginate{color:inherit}.dataTables_wrapper .dataTables_scroll{clear:both}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody{-webkit-overflow-scrolling:touch}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td{vertical-align:middle}.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>thead>tr>td>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>th>div.dataTables_sizing,.dataTables_wrapper .dataTables_scroll div.dataTables_scrollBody>table>tbody>tr>td>div.dataTables_sizing{height:0;overflow:hidden;margin:0 !important;padding:0 !important}.dataTables_wrapper.no-footer .dataTables_scrollBody{border-bottom:1px solid rgba(0, 0, 0, 0.3)}.dataTables_wrapper.no-footer div.dataTables_scrollHead table.dataTable,.dataTables_wrapper.no-footer div.dataTables_scrollBody>table{border-bottom:none}.dataTables_wrapper:after{visibility:hidden;display:block;content:"";clear:both;height:0}@media screen and (max-width: 767px){.dataTables_wrapper .dataTables_info,.dataTables_wrapper .dataTables_paginate{float:none;text-align:center}.dataTables_wrapper .dataTables_paginate{margin-top:.5em}}@media screen and (max-width: 640px){.dataTables_wrapper .dataTables_length,.dataTables_wrapper .dataTables_filter{float:none;text-align:center}.dataTables_wrapper .dataTables_filter{margin-top:.5em}}html.dark{--dt-row-hover: 255, 255, 255;--dt-row-stripe: 255, 255, 255;--dt-column-ordering: 255, 255, 255}html.dark table.dataTable>thead>tr>th,html.dark table.dataTable>thead>tr>td{border-bottom:1px solid rgb(89, 91, 94)}html.dark table.dataTable>thead>tr>th:active,html.dark table.dataTable>thead>tr>td:active{outline:none}html.dark table.dataTable>tfoot>tr>th,html.dark table.dataTable>tfoot>tr>td{border-top:1px solid rgb(89, 91, 94)}html.dark table.dataTable.row-border>tbody>tr>th,html.dark table.dataTable.row-border>tbody>tr>td,html.dark table.dataTable.display>tbody>tr>th,html.dark table.dataTable.display>tbody>tr>td{border-top:1px solid rgb(64, 67, 70)}html.dark table.dataTable.row-border>tbody>tr.selected+tr.selected>td,html.dark table.dataTable.display>tbody>tr.selected+tr.selected>td{border-top-color:rgba(13, 110, 253, 0.65);border-top-color:rgba(var(--dt-row-selected), 0.65)}html.dark table.dataTable.cell-border>tbody>tr>th,html.dark table.dataTable.cell-border>tbody>tr>td{border-top:1px solid rgb(64, 67, 70);border-right:1px solid rgb(64, 67, 70)}html.dark table.dataTable.cell-border>tbody>tr>th:first-child,html.dark table.dataTable.cell-border>tbody>tr>td:first-child{border-left:1px solid rgb(64, 67, 70)}html.dark .dataTables_wrapper .dataTables_filter input,html.dark .dataTables_wrapper .dataTables_length select{border:1px solid rgba(255, 255, 255, 0.2);background-color:var(--dt-html-background)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.current,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.current:hover{border:1px solid rgb(89, 91, 94);background:rgba(255, 255, 255, 0.15)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled:hover,html.dark .dataTables_wrapper .dataTables_paginate .paginate_button.disabled:active{color:#666 !important}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button:hover{border:1px solid rgb(53, 53, 53);background:rgb(53, 53, 53)}html.dark .dataTables_wrapper .dataTables_paginate .paginate_button:active{background:#3a3a3a}
diff --git a/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.js b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.js
new file mode 100644
index 0000000000000..b706c55328aaf
--- /dev/null
+++ b/core/src/main/resources/org/apache/spark/ui/static/jquery.dataTables.min.js
@@ -0,0 +1,4 @@
+/*! DataTables 1.13.11
+ * ©2008-2024 SpryMedia Ltd - datatables.net/license
+ */
+!function(n){"use strict";var a;"function"==typeof define&&define.amd?define(["jquery"],function(t){return n(t,window,document)}):"object"==typeof exports?(a=require("jquery"),"undefined"==typeof window?module.exports=function(t,e){return t=t||window,e=e||a(t),n(e,t,t.document)}:module.exports=n(a,window,window.document)):window.DataTable=n(jQuery,window,document)}(function(P,j,v,H){"use strict";function d(t){var e=parseInt(t,10);return!isNaN(e)&&isFinite(t)?e:null}function l(t,e,n){var a=typeof t,r="string"==a;return"number"==a||"bigint"==a||!!h(t)||(e&&r&&(t=$(t,e)),n&&r&&(t=t.replace(q,"")),!isNaN(parseFloat(t))&&isFinite(t))}function a(t,e,n){var a;return!!h(t)||(h(a=t)||"string"==typeof a)&&!!l(t.replace(V,"").replace(/<script/i,""),e,n)||null}function m(t,e,n,a){var r=[],o=0,i=e.length;if(a!==H)for(;o<i;o++)t[e[o]][n]&&r.push(t[e[o]][n][a]);else for(;o<i;o++)r.push(t[e[o]][n]);return r}function f(t,e){var n,a=[];e===H?(e=0,n=t):(n=e,e=t);for(var r=e;r<n;r++)a.push(r);return a}function _(t){for(var e=[],n=0,a=t.length;n<a;n++)t[n]&&e.push(t[n]);return e}function s(t,e){return-1!==this.indexOf(t,e=e===H?0:e)}var p,e,t,w=function(t,v){if(w.factory(t,v))return w;if(this instanceof w)return P(t).DataTable(v);v=t,this.$=function(t,e){return this.api(!0).$(t,e)},this._=function(t,e){return this.api(!0).rows(t,e).data()},this.api=function(t){return new B(t?ge(this[p.iApiIndex]):this)},this.fnAddData=function(t,e){var n=this.api(!0),t=(Array.isArray(t)&&(Array.isArray(t[0])||P.isPlainObject(t[0]))?n.rows:n.row).add(t);return e!==H&&!e||n.draw(),t.flatten().toArray()},this.fnAdjustColumnSizing=function(t){var e=this.api(!0).columns.adjust(),n=e.settings()[0],a=n.oScroll;t===H||t?e.draw(!1):""===a.sX&&""===a.sY||Qt(n)},this.fnClearTable=function(t){var e=this.api(!0).clear();t!==H&&!t||e.draw()},this.fnClose=function(t){this.api(!0).row(t).child.hide()},this.fnDeleteRow=function(t,e,n){var a=this.api(!0),t=a.rows(t),r=t.settings()[0],o=r.aoData[t[0][0]];return t.remove(),e&&e.call(this,r,o),n!==H&&!n||a.draw(),o},this.fnDestroy=function(t){this.api(!0).destroy(t)},this.fnDraw=function(t){this.api(!0).draw(t)},this.fnFilter=function(t,e,n,a,r,o){var i=this.api(!0);(null===e||e===H?i:i.column(e)).search(t,n,a,o),i.draw()},this.fnGetData=function(t,e){var n,a=this.api(!0);return t!==H?(n=t.nodeName?t.nodeName.toLowerCase():"",e!==H||"td"==n||"th"==n?a.cell(t,e).data():a.row(t).data()||null):a.data().toArray()},this.fnGetNodes=function(t){var e=this.api(!0);return t!==H?e.row(t).node():e.rows().nodes().flatten().toArray()},this.fnGetPosition=function(t){var e=this.api(!0),n=t.nodeName.toUpperCase();return"TR"==n?e.row(t).index():"TD"==n||"TH"==n?[(n=e.cell(t).index()).row,n.columnVisible,n.column]:null},this.fnIsOpen=function(t){return this.api(!0).row(t).child.isShown()},this.fnOpen=function(t,e,n){return this.api(!0).row(t).child(e,n).show().child()[0]},this.fnPageChange=function(t,e){t=this.api(!0).page(t);e!==H&&!e||t.draw(!1)},this.fnSetColumnVis=function(t,e,n){t=this.api(!0).column(t).visible(e);n!==H&&!n||t.columns.adjust().draw()},this.fnSettings=function(){return ge(this[p.iApiIndex])},this.fnSort=function(t){this.api(!0).order(t).draw()},this.fnSortListener=function(t,e,n){this.api(!0).order.listener(t,e,n)},this.fnUpdate=function(t,e,n,a,r){var o=this.api(!0);return(n===H||null===n?o.row(e):o.cell(e,n)).data(t),r!==H&&!r||o.columns.adjust(),a!==H&&!a||o.draw(),0},this.fnVersionCheck=p.fnVersionCheck;var e,y=this,D=v===H,_=this.length;for(e in D&&(v={}),this.oApi=this.internal=p.internal,w.ext.internal)e&&(this[e]=$e(e));return this.each(function(){var r=1<_?be({},v,!0):v,o=0,t=this.getAttribute("id"),i=!1,e=w.defaults,l=P(this);if("table"!=this.nodeName.toLowerCase())W(null,0,"Non-table node initialisation ("+this.nodeName+")",2);else{K(e),Q(e.column),C(e,e,!0),C(e.column,e.column,!0),C(e,P.extend(r,l.data()),!0);for(var n=w.settings,o=0,s=n.length;o<s;o++){var a=n[o];if(a.nTable==this||a.nTHead&&a.nTHead.parentNode==this||a.nTFoot&&a.nTFoot.parentNode==this){var u=(r.bRetrieve!==H?r:e).bRetrieve,c=(r.bDestroy!==H?r:e).bDestroy;if(D||u)return a.oInstance;if(c){a.oInstance.fnDestroy();break}return void W(a,0,"Cannot reinitialise DataTable",3)}if(a.sTableId==this.id){n.splice(o,1);break}}null!==t&&""!==t||(t="DataTables_Table_"+w.ext._unique++,this.id=t);var f,d,h=P.extend(!0,{},w.models.oSettings,{sDestroyWidth:l[0].style.width,sInstance:t,sTableId:t}),p=(h.nTable=this,h.oApi=y.internal,h.oInit=r,n.push(h),h.oInstance=1===y.length?y:l.dataTable(),K(r),Z(r.oLanguage),r.aLengthMenu&&!r.iDisplayLength&&(r.iDisplayLength=(Array.isArray(r.aLengthMenu[0])?r.aLengthMenu[0]:r.aLengthMenu)[0]),r=be(P.extend(!0,{},e),r),F(h.oFeatures,r,["bPaginate","bLengthChange","bFilter","bSort","bSortMulti","bInfo","bProcessing","bAutoWidth","bSortClasses","bServerSide","bDeferRender"]),F(h,r,["asStripeClasses","ajax","fnServerData","fnFormatNumber","sServerMethod","aaSorting","aaSortingFixed","aLengthMenu","sPaginationType","sAjaxSource","sAjaxDataProp","iStateDuration","sDom","bSortCellsTop","iTabIndex","fnStateLoadCallback","fnStateSaveCallback","renderer","searchDelay","rowId",["iCookieDuration","iStateDuration"],["oSearch","oPreviousSearch"],["aoSearchCols","aoPreSearchCols"],["iDisplayLength","_iDisplayLength"]]),F(h.oScroll,r,[["sScrollX","sX"],["sScrollXInner","sXInner"],["sScrollY","sY"],["bScrollCollapse","bCollapse"]]),F(h.oLanguage,r,"fnInfoCallback"),L(h,"aoDrawCallback",r.fnDrawCallback,"user"),L(h,"aoServerParams",r.fnServerParams,"user"),L(h,"aoStateSaveParams",r.fnStateSaveParams,"user"),L(h,"aoStateLoadParams",r.fnStateLoadParams,"user"),L(h,"aoStateLoaded",r.fnStateLoaded,"user"),L(h,"aoRowCallback",r.fnRowCallback,"user"),L(h,"aoRowCreatedCallback",r.fnCreatedRow,"user"),L(h,"aoHeaderCallback",r.fnHeaderCallback,"user"),L(h,"aoFooterCallback",r.fnFooterCallback,"user"),L(h,"aoInitComplete",r.fnInitComplete,"user"),L(h,"aoPreDrawCallback",r.fnPreDrawCallback,"user"),h.rowIdFn=A(r.rowId),tt(h),h.oClasses),g=(P.extend(p,w.ext.classes,r.oClasses),l.addClass(p.sTable),h.iInitDisplayStart===H&&(h.iInitDisplayStart=r.iDisplayStart,h._iDisplayStart=r.iDisplayStart),null!==r.iDeferLoading&&(h.bDeferLoading=!0,t=Array.isArray(r.iDeferLoading),h._iRecordsDisplay=t?r.iDeferLoading[0]:r.iDeferLoading,h._iRecordsTotal=t?r.iDeferLoading[1]:r.iDeferLoading),h.oLanguage),t=(P.extend(!0,g,r.oLanguage),g.sUrl?(P.ajax({dataType:"json",url:g.sUrl,success:function(t){C(e.oLanguage,t),Z(t),P.extend(!0,g,t,h.oInit.oLanguage),R(h,null,"i18n",[h]),Jt(h)},error:function(){Jt(h)}}),i=!0):R(h,null,"i18n",[h]),null===r.asStripeClasses&&(h.asStripeClasses=[p.sStripeOdd,p.sStripeEven]),h.asStripeClasses),b=l.children("tbody").find("tr").eq(0),m=(-1!==P.inArray(!0,P.map(t,function(t,e){return b.hasClass(t)}))&&(P("tbody tr",this).removeClass(t.join(" ")),h.asDestroyStripes=t.slice()),[]),t=this.getElementsByTagName("thead");if(0!==t.length&&(wt(h.aoHeader,t[0]),m=Ct(h)),null===r.aoColumns)for(f=[],o=0,s=m.length;o<s;o++)f.push(null);else f=r.aoColumns;for(o=0,s=f.length;o<s;o++)nt(h,m?m[o]:null);st(h,r.aoColumnDefs,f,function(t,e){at(h,t,e)}),b.length&&(d=function(t,e){return null!==t.getAttribute("data-"+e)?e:null},P(b[0]).children("th, td").each(function(t,e){var n,a=h.aoColumns[t];a||W(h,0,"Incorrect column count",18),a.mData===t&&(n=d(e,"sort")||d(e,"order"),e=d(e,"filter")||d(e,"search"),null===n&&null===e||(a.mData={_:t+".display",sort:null!==n?t+".@data-"+n:H,type:null!==n?t+".@data-"+n:H,filter:null!==e?t+".@data-"+e:H},a._isArrayHost=!0,at(h,t)))}));var S=h.oFeatures,t=function(){if(r.aaSorting===H){var t=h.aaSorting;for(o=0,s=t.length;o<s;o++)t[o][1]=h.aoColumns[o].asSorting[0]}ce(h),S.bSort&&L(h,"aoDrawCallback",function(){var t,n;h.bSorted&&(t=I(h),n={},P.each(t,function(t,e){n[e.src]=e.dir}),R(h,null,"order",[h,t,n]),le(h))}),L(h,"aoDrawCallback",function(){(h.bSorted||"ssp"===E(h)||S.bDeferRender)&&ce(h)},"sc");var e=l.children("caption").each(function(){this._captionSide=P(this).css("caption-side")}),n=l.children("thead"),a=(0===n.length&&(n=P("<thead/>").appendTo(l)),h.nTHead=n[0],l.children("tbody")),n=(0===a.length&&(a=P("<tbody/>").insertAfter(n)),h.nTBody=a[0],l.children("tfoot"));if(0===(n=0===n.length&&0<e.length&&(""!==h.oScroll.sX||""!==h.oScroll.sY)?P("<tfoot/>").appendTo(l):n).length||0===n.children().length?l.addClass(p.sNoFooter):0<n.length&&(h.nTFoot=n[0],wt(h.aoFooter,h.nTFoot)),r.aaData)for(o=0;o<r.aaData.length;o++)x(h,r.aaData[o]);else!h.bDeferLoading&&"dom"!=E(h)||ut(h,P(h.nTBody).children("tr"));h.aiDisplay=h.aiDisplayMaster.slice(),!(h.bInitialised=!0)===i&&Jt(h)};L(h,"aoDrawCallback",de,"state_save"),r.bStateSave?(S.bStateSave=!0,he(h,0,t)):t()}}),y=null,this},c={},U=/[\r\n\u2028]/g,V=/<.*?>/g,X=/^\d{2,4}[\.\/\-]\d{1,2}[\.\/\-]\d{1,2}([T ]{1}\d{1,2}[:\.]\d{2}([\.:]\d{2})?)?$/,J=new RegExp("(\\"+["/",".","*","+","?","|","(",")","[","]","{","}","\\","$","^","-"].join("|\\")+")","g"),q=/['\u00A0,$£€¥%\u2009\u202F\u20BD\u20a9\u20BArfkɃΞ]/gi,h=function(t){return!t||!0===t||"-"===t},$=function(t,e){return c[e]||(c[e]=new RegExp(Ot(e),"g")),"string"==typeof t&&"."!==e?t.replace(/\./g,"").replace(c[e],"."):t},N=function(t,e,n){var a=[],r=0,o=t.length;if(n!==H)for(;r<o;r++)t[r]&&t[r][e]&&a.push(t[r][e][n]);else for(;r<o;r++)t[r]&&a.push(t[r][e]);return a},G=function(t){if(!(t.length<2))for(var e=t.slice().sort(),n=e[0],a=1,r=e.length;a<r;a++){if(e[a]===n)return!1;n=e[a]}return!0},z=function(t){if(G(t))return t.slice();var e,n,a,r=[],o=t.length,i=0;t:for(n=0;n<o;n++){for(e=t[n],a=0;a<i;a++)if(r[a]===e)continue t;r.push(e),i++}return r},Y=function(t,e){if(Array.isArray(e))for(var n=0;n<e.length;n++)Y(t,e[n]);else t.push(e);return t};function i(n){var a,r,o={};P.each(n,function(t,e){(a=t.match(/^([^A-Z]+?)([A-Z])/))&&-1!=="a aa ai ao as b fn i m o s ".indexOf(a[1]+" ")&&(r=t.replace(a[0],a[2].toLowerCase()),o[r]=t,"o"===a[1])&&i(n[t])}),n._hungarianMap=o}function C(n,a,r){var o;n._hungarianMap||i(n),P.each(a,function(t,e){(o=n._hungarianMap[t])===H||!r&&a[o]!==H||("o"===o.charAt(0)?(a[o]||(a[o]={}),P.extend(!0,a[o],a[t]),C(n[o],a[o],r)):a[o]=a[t])})}function Z(t){var e,n=w.defaults.oLanguage,a=n.sDecimal;a&&Me(a),t&&(e=t.sZeroRecords,!t.sEmptyTable&&e&&"No data available in table"===n.sEmptyTable&&F(t,t,"sZeroRecords","sEmptyTable"),!t.sLoadingRecords&&e&&"Loading..."===n.sLoadingRecords&&F(t,t,"sZeroRecords","sLoadingRecords"),t.sInfoThousands&&(t.sThousands=t.sInfoThousands),e=t.sDecimal)&&a!==e&&Me(e)}Array.isArray||(Array.isArray=function(t){return"[object Array]"===Object.prototype.toString.call(t)}),Array.prototype.includes||(Array.prototype.includes=s),String.prototype.trim||(String.prototype.trim=function(){return this.replace(/^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g,"")}),String.prototype.includes||(String.prototype.includes=s),w.util={throttle:function(a,t){var r,o,i=t!==H?t:200;return function(){var t=this,e=+new Date,n=arguments;r&&e<r+i?(clearTimeout(o),o=setTimeout(function(){r=H,a.apply(t,n)},i)):(r=e,a.apply(t,n))}},escapeRegex:function(t){return t.replace(J,"\\$1")},set:function(a){var d;return P.isPlainObject(a)?w.util.set(a._):null===a?function(){}:"function"==typeof a?function(t,e,n){a(t,"set",e,n)}:"string"!=typeof a||-1===a.indexOf(".")&&-1===a.indexOf("[")&&-1===a.indexOf("(")?function(t,e){t[a]=e}:(d=function(t,e,n){for(var a,r,o,i,l=dt(n),n=l[l.length-1],s=0,u=l.length-1;s<u;s++){if("__proto__"===l[s]||"constructor"===l[s])throw new Error("Cannot set prototype values");if(a=l[s].match(ft),r=l[s].match(g),a){if(l[s]=l[s].replace(ft,""),t[l[s]]=[],(a=l.slice()).splice(0,s+1),i=a.join("."),Array.isArray(e))for(var c=0,f=e.length;c<f;c++)d(o={},e[c],i),t[l[s]].push(o);else t[l[s]]=e;return}r&&(l[s]=l[s].replace(g,""),t=t[l[s]](e)),null!==t[l[s]]&&t[l[s]]!==H||(t[l[s]]={}),t=t[l[s]]}n.match(g)?t[n.replace(g,"")](e):t[n.replace(ft,"")]=e},function(t,e){return d(t,e,a)})},get:function(r){var o,d;return P.isPlainObject(r)?(o={},P.each(r,function(t,e){e&&(o[t]=w.util.get(e))}),function(t,e,n,a){var r=o[e]||o._;return r!==H?r(t,e,n,a):t}):null===r?function(t){return t}:"function"==typeof r?function(t,e,n,a){return r(t,e,n,a)}:"string"!=typeof r||-1===r.indexOf(".")&&-1===r.indexOf("[")&&-1===r.indexOf("(")?function(t,e){return t[r]}:(d=function(t,e,n){var a,r,o;if(""!==n)for(var i=dt(n),l=0,s=i.length;l<s;l++){if(f=i[l].match(ft),a=i[l].match(g),f){if(i[l]=i[l].replace(ft,""),""!==i[l]&&(t=t[i[l]]),r=[],i.splice(0,l+1),o=i.join("."),Array.isArray(t))for(var u=0,c=t.length;u<c;u++)r.push(d(t[u],e,o));var f=f[0].substring(1,f[0].length-1);t=""===f?r:r.join(f);break}if(a)i[l]=i[l].replace(g,""),t=t[i[l]]();else{if(null===t||null===t[i[l]])return null;if(t===H||t[i[l]]===H)return H;t=t[i[l]]}}return t},function(t,e){return d(t,e,r)})}};var r=function(t,e,n){t[e]!==H&&(t[n]=t[e])};function K(t){r(t,"ordering","bSort"),r(t,"orderMulti","bSortMulti"),r(t,"orderClasses","bSortClasses"),r(t,"orderCellsTop","bSortCellsTop"),r(t,"order","aaSorting"),r(t,"orderFixed","aaSortingFixed"),r(t,"paging","bPaginate"),r(t,"pagingType","sPaginationType"),r(t,"pageLength","iDisplayLength"),r(t,"searching","bFilter"),"boolean"==typeof t.sScrollX&&(t.sScrollX=t.sScrollX?"100%":""),"boolean"==typeof t.scrollX&&(t.scrollX=t.scrollX?"100%":"");var e=t.aoSearchCols;if(e)for(var n=0,a=e.length;n<a;n++)e[n]&&C(w.models.oSearch,e[n])}function Q(t){r(t,"orderable","bSortable"),r(t,"orderData","aDataSort"),r(t,"orderSequence","asSorting"),r(t,"orderDataType","sortDataType");var e=t.aDataSort;"number"!=typeof e||Array.isArray(e)||(t.aDataSort=[e])}function tt(t){var e,n,a,r;w.__browser||(w.__browser=e={},r=(a=(n=P("<div/>").css({position:"fixed",top:0,left:-1*P(j).scrollLeft(),height:1,width:1,overflow:"hidden"}).append(P("<div/>").css({position:"absolute",top:1,left:1,width:100,overflow:"scroll"}).append(P("<div/>").css({width:"100%",height:10}))).appendTo("body")).children()).children(),e.barWidth=a[0].offsetWidth-a[0].clientWidth,e.bScrollOversize=100===r[0].offsetWidth&&100!==a[0].clientWidth,e.bScrollbarLeft=1!==Math.round(r.offset().left),e.bBounding=!!n[0].getBoundingClientRect().width,n.remove()),P.extend(t.oBrowser,w.__browser),t.oScroll.iBarWidth=w.__browser.barWidth}function et(t,e,n,a,r,o){var i,l=a,s=!1;for(n!==H&&(i=n,s=!0);l!==r;)t.hasOwnProperty(l)&&(i=s?e(i,t[l],l,t):t[l],s=!0,l+=o);return i}function nt(t,e){var n=w.defaults.column,a=t.aoColumns.length,n=P.extend({},w.models.oColumn,n,{nTh:e||v.createElement("th"),sTitle:n.sTitle||(e?e.innerHTML:""),aDataSort:n.aDataSort||[a],mData:n.mData||a,idx:a}),n=(t.aoColumns.push(n),t.aoPreSearchCols);n[a]=P.extend({},w.models.oSearch,n[a]),at(t,a,P(e).data())}function at(t,e,n){function a(t){return"string"==typeof t&&-1!==t.indexOf("@")}var e=t.aoColumns[e],r=t.oClasses,o=P(e.nTh),i=(!e.sWidthOrig&&(e.sWidthOrig=o.attr("width")||null,u=(o.attr("style")||"").match(/width:\s*(\d+[pxem%]+)/))&&(e.sWidthOrig=u[1]),n!==H&&null!==n&&(Q(n),C(w.defaults.column,n,!0),n.mDataProp===H||n.mData||(n.mData=n.mDataProp),n.sType&&(e._sManualType=n.sType),n.className&&!n.sClass&&(n.sClass=n.className),n.sClass&&o.addClass(n.sClass),u=e.sClass,P.extend(e,n),F(e,n,"sWidth","sWidthOrig"),u!==e.sClass&&(e.sClass=u+" "+e.sClass),n.iDataSort!==H&&(e.aDataSort=[n.iDataSort]),F(e,n,"aDataSort"),e.ariaTitle||(e.ariaTitle=o.attr("aria-label"))),e.mData),l=A(i),s=e.mRender?A(e.mRender):null,u=(e._bAttrSrc=P.isPlainObject(i)&&(a(i.sort)||a(i.type)||a(i.filter)),e._setter=null,e.fnGetData=function(t,e,n){var a=l(t,e,H,n);return s&&e?s(a,e,t,n):a},e.fnSetData=function(t,e,n){return b(i)(t,e,n)},"number"==typeof i||e._isArrayHost||(t._rowReadObject=!0),t.oFeatures.bSort||(e.bSortable=!1,o.addClass(r.sSortableNone)),-1!==P.inArray("asc",e.asSorting)),n=-1!==P.inArray("desc",e.asSorting);e.bSortable&&(u||n)?u&&!n?(e.sSortingClass=r.sSortableAsc,e.sSortingClassJUI=r.sSortJUIAscAllowed):!u&&n?(e.sSortingClass=r.sSortableDesc,e.sSortingClassJUI=r.sSortJUIDescAllowed):(e.sSortingClass=r.sSortable,e.sSortingClassJUI=r.sSortJUI):(e.sSortingClass=r.sSortableNone,e.sSortingClassJUI="")}function O(t){if(!1!==t.oFeatures.bAutoWidth){var e=t.aoColumns;ee(t);for(var n=0,a=e.length;n<a;n++)e[n].nTh.style.width=e[n].sWidth}var r=t.oScroll;""===r.sY&&""===r.sX||Qt(t),R(t,null,"column-sizing",[t])}function rt(t,e){t=it(t,"bVisible");return"number"==typeof t[e]?t[e]:null}function ot(t,e){t=it(t,"bVisible"),e=P.inArray(e,t);return-1!==e?e:null}function T(t){var n=0;return P.each(t.aoColumns,function(t,e){e.bVisible&&"none"!==P(e.nTh).css("display")&&n++}),n}function it(t,n){var a=[];return P.map(t.aoColumns,function(t,e){t[n]&&a.push(e)}),a}function lt(t){for(var e,n,a,r,o,i,l,s=t.aoColumns,u=t.aoData,c=w.ext.type.detect,f=0,d=s.length;f<d;f++)if(l=[],!(o=s[f]).sType&&o._sManualType)o.sType=o._sManualType;else if(!o.sType){for(e=0,n=c.length;e<n;e++){for(a=0,r=u.length;a<r&&(l[a]===H&&(l[a]=S(t,a,f,"type")),(i=c[e](l[a],t))||e===c.length-1)&&("html"!==i||h(l[a]));a++);if(i){o.sType=i;break}}o.sType||(o.sType="string")}}function st(t,e,n,a){var r,o,i,l,s=t.aoColumns;if(e)for(r=e.length-1;0<=r;r--)for(var u,c=(u=e[r]).target!==H?u.target:u.targets!==H?u.targets:u.aTargets,f=0,d=(c=Array.isArray(c)?c:[c]).length;f<d;f++)if("number"==typeof c[f]&&0<=c[f]){for(;s.length<=c[f];)nt(t);a(c[f],u)}else if("number"==typeof c[f]&&c[f]<0)a(s.length+c[f],u);else if("string"==typeof c[f])for(i=0,l=s.length;i<l;i++)"_all"!=c[f]&&!P(s[i].nTh).hasClass(c[f])||a(i,u);if(n)for(r=0,o=n.length;r<o;r++)a(r,n[r])}function x(t,e,n,a){for(var r=t.aoData.length,o=P.extend(!0,{},w.models.oRow,{src:n?"dom":"data",idx:r}),i=(o._aData=e,t.aoData.push(o),t.aoColumns),l=0,s=i.length;l<s;l++)i[l].sType=null;t.aiDisplayMaster.push(r);e=t.rowIdFn(e);return e!==H&&(t.aIds[e]=o),!n&&t.oFeatures.bDeferRender||St(t,r,n,a),r}function ut(n,t){var a;return(t=t instanceof P?t:P(t)).map(function(t,e){return a=mt(n,e),x(n,a.data,e,a.cells)})}function S(t,e,n,a){"search"===a?a="filter":"order"===a&&(a="sort");var r=t.iDraw,o=t.aoColumns[n],i=t.aoData[e]._aData,l=o.sDefaultContent,s=o.fnGetData(i,a,{settings:t,row:e,col:n});if(s===H)return t.iDrawError!=r&&null===l&&(W(t,0,"Requested unknown parameter "+("function"==typeof o.mData?"{function}":"'"+o.mData+"'")+" for row "+e+", column "+n,4),t.iDrawError=r),l;if(s!==i&&null!==s||null===l||a===H){if("function"==typeof s)return s.call(i)}else s=l;return null===s&&"display"===a?"":"filter"===a&&(e=w.ext.type.search)[o.sType]?e[o.sType](s):s}function ct(t,e,n,a){var r=t.aoColumns[n],o=t.aoData[e]._aData;r.fnSetData(o,a,{settings:t,row:e,col:n})}var ft=/\[.*?\]$/,g=/\(\)$/;function dt(t){return P.map(t.match(/(\\.|[^\.])+/g)||[""],function(t){return t.replace(/\\\./g,".")})}var A=w.util.get,b=w.util.set;function ht(t){return N(t.aoData,"_aData")}function pt(t){t.aoData.length=0,t.aiDisplayMaster.length=0,t.aiDisplay.length=0,t.aIds={}}function gt(t,e,n){for(var a=-1,r=0,o=t.length;r<o;r++)t[r]==e?a=r:t[r]>e&&t[r]--;-1!=a&&n===H&&t.splice(a,1)}function bt(n,a,t,e){function r(t,e){for(;t.childNodes.length;)t.removeChild(t.firstChild);t.innerHTML=S(n,a,e,"display")}var o,i,l=n.aoData[a];if("dom"!==t&&(t&&"auto"!==t||"dom"!==l.src)){var s=l.anCells;if(s)if(e!==H)r(s[e],e);else for(o=0,i=s.length;o<i;o++)r(s[o],o)}else l._aData=mt(n,l,e,e===H?H:l._aData).data;l._aSortData=null,l._aFilterData=null;var u=n.aoColumns;if(e!==H)u[e].sType=null;else{for(o=0,i=u.length;o<i;o++)u[o].sType=null;vt(n,l)}}function mt(t,e,n,a){function r(t,e){var n;"string"==typeof t&&-1!==(n=t.indexOf("@"))&&(n=t.substring(n+1),b(t)(a,e.getAttribute(n)))}function o(t){n!==H&&n!==f||(l=d[f],s=t.innerHTML.trim(),l&&l._bAttrSrc?(b(l.mData._)(a,s),r(l.mData.sort,t),r(l.mData.type,t),r(l.mData.filter,t)):h?(l._setter||(l._setter=b(l.mData)),l._setter(a,s)):a[f]=s),f++}var i,l,s,u=[],c=e.firstChild,f=0,d=t.aoColumns,h=t._rowReadObject;a=a!==H?a:h?{}:[];if(c)for(;c;)"TD"!=(i=c.nodeName.toUpperCase())&&"TH"!=i||(o(c),u.push(c)),c=c.nextSibling;else for(var p=0,g=(u=e.anCells).length;p<g;p++)o(u[p]);var e=e.firstChild?e:e.nTr;return e&&(e=e.getAttribute("id"))&&b(t.rowId)(a,e),{data:a,cells:u}}function St(t,e,n,a){var r,o,i,l,s,u,c=t.aoData[e],f=c._aData,d=[];if(null===c.nTr){for(r=n||v.createElement("tr"),c.nTr=r,c.anCells=d,r._DT_RowIndex=e,vt(t,c),l=0,s=t.aoColumns.length;l<s;l++)i=t.aoColumns[l],(o=(u=!n)?v.createElement(i.sCellType):a[l])||W(t,0,"Incorrect column count",18),o._DT_CellIndex={row:e,column:l},d.push(o),!u&&(!i.mRender&&i.mData===l||P.isPlainObject(i.mData)&&i.mData._===l+".display")||(o.innerHTML=S(t,e,l,"display")),i.sClass&&(o.className+=" "+i.sClass),i.bVisible&&!n?r.appendChild(o):!i.bVisible&&n&&o.parentNode.removeChild(o),i.fnCreatedCell&&i.fnCreatedCell.call(t.oInstance,o,S(t,e,l),f,e,l);R(t,"aoRowCreatedCallback",null,[r,f,e,d])}}function vt(t,e){var n=e.nTr,a=e._aData;n&&((t=t.rowIdFn(a))&&(n.id=t),a.DT_RowClass&&(t=a.DT_RowClass.split(" "),e.__rowc=e.__rowc?z(e.__rowc.concat(t)):t,P(n).removeClass(e.__rowc.join(" ")).addClass(a.DT_RowClass)),a.DT_RowAttr&&P(n).attr(a.DT_RowAttr),a.DT_RowData)&&P(n).data(a.DT_RowData)}function yt(t){var e,n,a,r=t.nTHead,o=t.nTFoot,i=0===P("th, td",r).length,l=t.oClasses,s=t.aoColumns;for(i&&(n=P("<tr/>").appendTo(r)),c=0,f=s.length;c<f;c++)a=s[c],e=P(a.nTh).addClass(a.sClass),i&&e.appendTo(n),t.oFeatures.bSort&&(e.addClass(a.sSortingClass),!1!==a.bSortable)&&(e.attr("tabindex",t.iTabIndex).attr("aria-controls",t.sTableId),ue(t,a.nTh,c)),a.sTitle!=e[0].innerHTML&&e.html(a.sTitle),ve(t,"header")(t,e,a,l);if(i&&wt(t.aoHeader,r),P(r).children("tr").children("th, td").addClass(l.sHeaderTH),P(o).children("tr").children("th, td").addClass(l.sFooterTH),null!==o)for(var u=t.aoFooter[0],c=0,f=u.length;c<f;c++)(a=s[c])?(a.nTf=u[c].cell,a.sClass&&P(a.nTf).addClass(a.sClass)):W(t,0,"Incorrect column count",18)}function Dt(t,e,n){var a,r,o,i,l,s,u,c,f,d=[],h=[],p=t.aoColumns.length;if(e){for(n===H&&(n=!1),a=0,r=e.length;a<r;a++){for(d[a]=e[a].slice(),d[a].nTr=e[a].nTr,o=p-1;0<=o;o--)t.aoColumns[o].bVisible||n||d[a].splice(o,1);h.push([])}for(a=0,r=d.length;a<r;a++){if(u=d[a].nTr)for(;s=u.firstChild;)u.removeChild(s);for(o=0,i=d[a].length;o<i;o++)if(f=c=1,h[a][o]===H){for(u.appendChild(d[a][o].cell),h[a][o]=1;d[a+c]!==H&&d[a][o].cell==d[a+c][o].cell;)h[a+c][o]=1,c++;for(;d[a][o+f]!==H&&d[a][o].cell==d[a][o+f].cell;){for(l=0;l<c;l++)h[a+l][o+f]=1;f++}P(d[a][o].cell).attr("rowspan",c).attr("colspan",f)}}}}function y(t,e){n="ssp"==E(s=t),(l=s.iInitDisplayStart)!==H&&-1!==l&&(s._iDisplayStart=!n&&l>=s.fnRecordsDisplay()?0:l,s.iInitDisplayStart=-1);var n=R(t,"aoPreDrawCallback","preDraw",[t]);if(-1!==P.inArray(!1,n))D(t,!1);else{var a=[],r=0,o=t.asStripeClasses,i=o.length,l=t.oLanguage,s="ssp"==E(t),u=t.aiDisplay,n=t._iDisplayStart,c=t.fnDisplayEnd();if(t.bDrawing=!0,t.bDeferLoading)t.bDeferLoading=!1,t.iDraw++,D(t,!1);else if(s){if(!t.bDestroying&&!e)return void xt(t)}else t.iDraw++;if(0!==u.length)for(var f=s?t.aoData.length:c,d=s?0:n;d<f;d++){var h,p=u[d],g=t.aoData[p],b=(null===g.nTr&&St(t,p),g.nTr);0!==i&&(h=o[r%i],g._sRowStripe!=h)&&(P(b).removeClass(g._sRowStripe).addClass(h),g._sRowStripe=h),R(t,"aoRowCallback",null,[b,g._aData,r,d,p]),a.push(b),r++}else{e=l.sZeroRecords;1==t.iDraw&&"ajax"==E(t)?e=l.sLoadingRecords:l.sEmptyTable&&0===t.fnRecordsTotal()&&(e=l.sEmptyTable),a[0]=P("<tr/>",{class:i?o[0]:""}).append(P("<td />",{valign:"top",colSpan:T(t),class:t.oClasses.sRowEmpty}).html(e))[0]}R(t,"aoHeaderCallback","header",[P(t.nTHead).children("tr")[0],ht(t),n,c,u]),R(t,"aoFooterCallback","footer",[P(t.nTFoot).children("tr")[0],ht(t),n,c,u]);s=P(t.nTBody);s.children().detach(),s.append(P(a)),R(t,"aoDrawCallback","draw",[t]),t.bSorted=!1,t.bFiltered=!1,t.bDrawing=!1}}function u(t,e){var n=t.oFeatures,a=n.bSort,n=n.bFilter;a&&ie(t),n?Rt(t,t.oPreviousSearch):t.aiDisplay=t.aiDisplayMaster.slice(),!0!==e&&(t._iDisplayStart=0),t._drawHold=e,y(t),t._drawHold=!1}function _t(t){for(var e,n,a,r,o,i,l,s=t.oClasses,u=P(t.nTable),u=P("<div/>").insertBefore(u),c=t.oFeatures,f=P("<div/>",{id:t.sTableId+"_wrapper",class:s.sWrapper+(t.nTFoot?"":" "+s.sNoFooter)}),d=(t.nHolding=u[0],t.nTableWrapper=f[0],t.nTableReinsertBefore=t.nTable.nextSibling,t.sDom.split("")),h=0;h<d.length;h++){if(e=null,"<"==(n=d[h])){if(a=P("<div/>")[0],"'"==(r=d[h+1])||'"'==r){for(o="",i=2;d[h+i]!=r;)o+=d[h+i],i++;"H"==o?o=s.sJUIHeader:"F"==o&&(o=s.sJUIFooter),-1!=o.indexOf(".")?(l=o.split("."),a.id=l[0].substr(1,l[0].length-1),a.className=l[1]):"#"==o.charAt(0)?a.id=o.substr(1,o.length-1):a.className=o,h+=i}f.append(a),f=P(a)}else if(">"==n)f=f.parent();else if("l"==n&&c.bPaginate&&c.bLengthChange)e=Gt(t);else if("f"==n&&c.bFilter)e=Lt(t);else if("r"==n&&c.bProcessing)e=Zt(t);else if("t"==n)e=Kt(t);else if("i"==n&&c.bInfo)e=Ut(t);else if("p"==n&&c.bPaginate)e=zt(t);else if(0!==w.ext.feature.length)for(var p=w.ext.feature,g=0,b=p.length;g<b;g++)if(n==p[g].cFeature){e=p[g].fnInit(t);break}e&&((l=t.aanFeatures)[n]||(l[n]=[]),l[n].push(e),f.append(e))}u.replaceWith(f),t.nHolding=null}function wt(t,e){var n,a,r,o,i,l,s,u,c,f,d=P(e).children("tr");for(t.splice(0,t.length),r=0,l=d.length;r<l;r++)t.push([]);for(r=0,l=d.length;r<l;r++)for(a=(n=d[r]).firstChild;a;){if("TD"==a.nodeName.toUpperCase()||"TH"==a.nodeName.toUpperCase())for(u=(u=+a.getAttribute("colspan"))&&0!=u&&1!=u?u:1,c=(c=+a.getAttribute("rowspan"))&&0!=c&&1!=c?c:1,s=function(t,e,n){for(var a=t[e];a[n];)n++;return n}(t,r,0),f=1==u,i=0;i<u;i++)for(o=0;o<c;o++)t[r+o][s+i]={cell:a,unique:f},t[r+o].nTr=n;a=a.nextSibling}}function Ct(t,e,n){var a=[];n||(n=t.aoHeader,e&&wt(n=[],e));for(var r=0,o=n.length;r<o;r++)for(var i=0,l=n[r].length;i<l;i++)!n[r][i].unique||a[i]&&t.bSortCellsTop||(a[i]=n[r][i].cell);return a}function Tt(r,t,n){function e(t){var e=r.jqXHR?r.jqXHR.status:null;(null===t||"number"==typeof e&&204==e)&&Ft(r,t={},[]),(e=t.error||t.sError)&&W(r,0,e),r.json=t,R(r,null,"xhr",[r,t,r.jqXHR]),n(t)}R(r,"aoServerParams","serverParams",[t]),t&&Array.isArray(t)&&(a={},o=/(.*?)\[\]$/,P.each(t,function(t,e){var n=e.name.match(o);n?(n=n[0],a[n]||(a[n]=[]),a[n].push(e.value)):a[e.name]=e.value}),t=a);var a,o,i,l=r.ajax,s=r.oInstance,u=(P.isPlainObject(l)&&l.data&&(u="function"==typeof(i=l.data)?i(t,r):i,t="function"==typeof i&&u?u:P.extend(!0,t,u),delete l.data),{data:t,success:e,dataType:"json",cache:!1,type:r.sServerMethod,error:function(t,e,n){var a=R(r,null,"xhr",[r,null,r.jqXHR]);-1===P.inArray(!0,a)&&("parsererror"==e?W(r,0,"Invalid JSON response",1):4===t.readyState&&W(r,0,"Ajax error",7)),D(r,!1)}});r.oAjaxData=t,R(r,null,"preXhr",[r,t]),r.fnServerData?r.fnServerData.call(s,r.sAjaxSource,P.map(t,function(t,e){return{name:e,value:t}}),e,r):r.sAjaxSource||"string"==typeof l?r.jqXHR=P.ajax(P.extend(u,{url:l||r.sAjaxSource})):"function"==typeof l?r.jqXHR=l.call(s,t,e,r):(r.jqXHR=P.ajax(P.extend(u,l)),l.data=i)}function xt(e){e.iDraw++,D(e,!0);var n=e._drawHold;Tt(e,At(e),function(t){e._drawHold=n,It(e,t),e._drawHold=!1})}function At(t){for(var e,n,a,r=t.aoColumns,o=r.length,i=t.oFeatures,l=t.oPreviousSearch,s=t.aoPreSearchCols,u=[],c=I(t),f=t._iDisplayStart,d=!1!==i.bPaginate?t._iDisplayLength:-1,h=function(t,e){u.push({name:t,value:e})},p=(h("sEcho",t.iDraw),h("iColumns",o),h("sColumns",N(r,"sName").join(",")),h("iDisplayStart",f),h("iDisplayLength",d),{draw:t.iDraw,columns:[],order:[],start:f,length:d,search:{value:l.sSearch,regex:l.bRegex}}),g=0;g<o;g++)n=r[g],a=s[g],e="function"==typeof n.mData?"function":n.mData,p.columns.push({data:e,name:n.sName,searchable:n.bSearchable,orderable:n.bSortable,search:{value:a.sSearch,regex:a.bRegex}}),h("mDataProp_"+g,e),i.bFilter&&(h("sSearch_"+g,a.sSearch),h("bRegex_"+g,a.bRegex),h("bSearchable_"+g,n.bSearchable)),i.bSort&&h("bSortable_"+g,n.bSortable);i.bFilter&&(h("sSearch",l.sSearch),h("bRegex",l.bRegex)),i.bSort&&(P.each(c,function(t,e){p.order.push({column:e.col,dir:e.dir}),h("iSortCol_"+t,e.col),h("sSortDir_"+t,e.dir)}),h("iSortingCols",c.length));f=w.ext.legacy.ajax;return null===f?t.sAjaxSource?u:p:f?u:p}function It(t,n){function e(t,e){return n[t]!==H?n[t]:n[e]}var a=Ft(t,n),r=e("sEcho","draw"),o=e("iTotalRecords","recordsTotal"),i=e("iTotalDisplayRecords","recordsFiltered");if(r!==H){if(+r<t.iDraw)return;t.iDraw=+r}a=a||[],pt(t),t._iRecordsTotal=parseInt(o,10),t._iRecordsDisplay=parseInt(i,10);for(var l=0,s=a.length;l<s;l++)x(t,a[l]);t.aiDisplay=t.aiDisplayMaster.slice(),y(t,!0),t._bInitComplete||qt(t,n),D(t,!1)}function Ft(t,e,n){t=P.isPlainObject(t.ajax)&&t.ajax.dataSrc!==H?t.ajax.dataSrc:t.sAjaxDataProp;if(!n)return"data"===t?e.aaData||e[t]:""!==t?A(t)(e):e;b(t)(e,n)}function Lt(n){function e(t){i.f;var e=this.value||"";o.return&&"Enter"!==t.key||e!=o.sSearch&&(Rt(n,{sSearch:e,bRegex:o.bRegex,bSmart:o.bSmart,bCaseInsensitive:o.bCaseInsensitive,return:o.return}),n._iDisplayStart=0,y(n))}var t=n.oClasses,a=n.sTableId,r=n.oLanguage,o=n.oPreviousSearch,i=n.aanFeatures,l='<input type="search" class="'+t.sFilterInput+'"/>',s=(s=r.sSearch).match(/_INPUT_/)?s.replace("_INPUT_",l):s+l,l=P("<div/>",{id:i.f?null:a+"_filter",class:t.sFilter}).append(P("<label/>").append(s)),t=null!==n.searchDelay?n.searchDelay:"ssp"===E(n)?400:0,u=P("input",l).val(o.sSearch).attr("placeholder",r.sSearchPlaceholder).on("keyup.DT search.DT input.DT paste.DT cut.DT",t?ne(e,t):e).on("mouseup.DT",function(t){setTimeout(function(){e.call(u[0],t)},10)}).on("keypress.DT",function(t){if(13==t.keyCode)return!1}).attr("aria-controls",a);return P(n.nTable).on("search.dt.DT",function(t,e){if(n===e)try{u[0]!==v.activeElement&&u.val(o.sSearch)}catch(t){}}),l[0]}function Rt(t,e,n){function a(t){o.sSearch=t.sSearch,o.bRegex=t.bRegex,o.bSmart=t.bSmart,o.bCaseInsensitive=t.bCaseInsensitive,o.return=t.return}function r(t){return t.bEscapeRegex!==H?!t.bEscapeRegex:t.bRegex}var o=t.oPreviousSearch,i=t.aoPreSearchCols;if(lt(t),"ssp"!=E(t)){Ht(t,e.sSearch,n,r(e),e.bSmart,e.bCaseInsensitive),a(e);for(var l=0;l<i.length;l++)jt(t,i[l].sSearch,l,r(i[l]),i[l].bSmart,i[l].bCaseInsensitive);Pt(t)}else a(e);t.bFiltered=!0,R(t,null,"search",[t])}function Pt(t){for(var e,n,a=w.ext.search,r=t.aiDisplay,o=0,i=a.length;o<i;o++){for(var l=[],s=0,u=r.length;s<u;s++)n=r[s],e=t.aoData[n],a[o](t,e._aFilterData,n,e._aData,s)&&l.push(n);r.length=0,P.merge(r,l)}}function jt(t,e,n,a,r,o){if(""!==e){for(var i,l=[],s=t.aiDisplay,u=Nt(e,a,r,o),c=0;c<s.length;c++)i=t.aoData[s[c]]._aFilterData[n],u.test(i)&&l.push(s[c]);t.aiDisplay=l}}function Ht(t,e,n,a,r,o){var i,l,s,u=Nt(e,a,r,o),r=t.oPreviousSearch.sSearch,o=t.aiDisplayMaster,c=[];if(0!==w.ext.search.length&&(n=!0),l=Wt(t),e.length<=0)t.aiDisplay=o.slice();else{for((l||n||a||r.length>e.length||0!==e.indexOf(r)||t.bSorted)&&(t.aiDisplay=o.slice()),i=t.aiDisplay,s=0;s<i.length;s++)u.test(t.aoData[i[s]]._sFilterRow)&&c.push(i[s]);t.aiDisplay=c}}function Nt(t,e,n,a){return t=e?t:Ot(t),n&&(t="^(?=.*?"+P.map(t.match(/["\u201C][^"\u201D]+["\u201D]|[^ ]+/g)||[""],function(t){var e;return'"'===t.charAt(0)?t=(e=t.match(/^"(.*)"$/))?e[1]:t:"“"===t.charAt(0)&&(t=(e=t.match(/^\u201C(.*)\u201D$/))?e[1]:t),t.replace('"',"")}).join(")(?=.*?")+").*$"),new RegExp(t,a?"i":"")}var Ot=w.util.escapeRegex,kt=P("<div>")[0],Mt=kt.textContent!==H;function Wt(t){for(var e,n,a,r,o,i=t.aoColumns,l=!1,s=0,u=t.aoData.length;s<u;s++)if(!(o=t.aoData[s])._aFilterData){for(a=[],e=0,n=i.length;e<n;e++)i[e].bSearchable?"string"!=typeof(r=null===(r=S(t,s,e,"filter"))?"":r)&&r.toString&&(r=r.toString()):r="",r.indexOf&&-1!==r.indexOf("&")&&(kt.innerHTML=r,r=Mt?kt.textContent:kt.innerText),r.replace&&(r=r.replace(/[\r\n\u2028]/g,"")),a.push(r);o._aFilterData=a,o._sFilterRow=a.join("  "),l=!0}return l}function Et(t){return{search:t.sSearch,smart:t.bSmart,regex:t.bRegex,caseInsensitive:t.bCaseInsensitive}}function Bt(t){return{sSearch:t.search,bSmart:t.smart,bRegex:t.regex,bCaseInsensitive:t.caseInsensitive}}function Ut(t){var e=t.sTableId,n=t.aanFeatures.i,a=P("<div/>",{class:t.oClasses.sInfo,id:n?null:e+"_info"});return n||(t.aoDrawCallback.push({fn:Vt,sName:"information"}),a.attr("role","status").attr("aria-live","polite"),P(t.nTable).attr("aria-describedby",e+"_info")),a[0]}function Vt(t){var e,n,a,r,o,i,l=t.aanFeatures.i;0!==l.length&&(i=t.oLanguage,e=t._iDisplayStart+1,n=t.fnDisplayEnd(),a=t.fnRecordsTotal(),o=(r=t.fnRecordsDisplay())?i.sInfo:i.sInfoEmpty,r!==a&&(o+=" "+i.sInfoFiltered),o=Xt(t,o+=i.sInfoPostFix),null!==(i=i.fnInfoCallback)&&(o=i.call(t.oInstance,t,e,n,a,r,o)),P(l).html(o))}function Xt(t,e){var n=t.fnFormatNumber,a=t._iDisplayStart+1,r=t._iDisplayLength,o=t.fnRecordsDisplay(),i=-1===r;return e.replace(/_START_/g,n.call(t,a)).replace(/_END_/g,n.call(t,t.fnDisplayEnd())).replace(/_MAX_/g,n.call(t,t.fnRecordsTotal())).replace(/_TOTAL_/g,n.call(t,o)).replace(/_PAGE_/g,n.call(t,i?1:Math.ceil(a/r))).replace(/_PAGES_/g,n.call(t,i?1:Math.ceil(o/r)))}function Jt(n){var a,t,e,r=n.iInitDisplayStart,o=n.aoColumns,i=n.oFeatures,l=n.bDeferLoading;if(n.bInitialised){for(_t(n),yt(n),Dt(n,n.aoHeader),Dt(n,n.aoFooter),D(n,!0),i.bAutoWidth&&ee(n),a=0,t=o.length;a<t;a++)(e=o[a]).sWidth&&(e.nTh.style.width=M(e.sWidth));R(n,null,"preInit",[n]),u(n);i=E(n);"ssp"==i&&!l||("ajax"==i?Tt(n,[],function(t){var e=Ft(n,t);for(a=0;a<e.length;a++)x(n,e[a]);n.iInitDisplayStart=r,u(n),D(n,!1),qt(n,t)}):(D(n,!1),qt(n)))}else setTimeout(function(){Jt(n)},200)}function qt(t,e){t._bInitComplete=!0,(e||t.oInit.aaData)&&O(t),R(t,null,"plugin-init",[t,e]),R(t,"aoInitComplete","init",[t,e])}function $t(t,e){e=parseInt(e,10);t._iDisplayLength=e,Se(t),R(t,null,"length",[t,e])}function Gt(a){for(var t=a.oClasses,e=a.sTableId,n=a.aLengthMenu,r=Array.isArray(n[0]),o=r?n[0]:n,i=r?n[1]:n,l=P("<select/>",{name:e+"_length","aria-controls":e,class:t.sLengthSelect}),s=0,u=o.length;s<u;s++)l[0][s]=new Option("number"==typeof i[s]?a.fnFormatNumber(i[s]):i[s],o[s]);var c=P("<div><label/></div>").addClass(t.sLength);return a.aanFeatures.l||(c[0].id=e+"_length"),c.children().append(a.oLanguage.sLengthMenu.replace("_MENU_",l[0].outerHTML)),P("select",c).val(a._iDisplayLength).on("change.DT",function(t){$t(a,P(this).val()),y(a)}),P(a.nTable).on("length.dt.DT",function(t,e,n){a===e&&P("select",c).val(n)}),c[0]}function zt(t){function c(t){y(t)}var e=t.sPaginationType,f=w.ext.pager[e],d="function"==typeof f,e=P("<div/>").addClass(t.oClasses.sPaging+e)[0],h=t.aanFeatures;return d||f.fnInit(t,e,c),h.p||(e.id=t.sTableId+"_paginate",t.aoDrawCallback.push({fn:function(t){if(d)for(var e=t._iDisplayStart,n=t._iDisplayLength,a=t.fnRecordsDisplay(),r=-1===n,o=r?0:Math.ceil(e/n),i=r?1:Math.ceil(a/n),l=f(o,i),s=0,u=h.p.length;s<u;s++)ve(t,"pageButton")(t,h.p[s],s,l,o,i);else f.fnUpdate(t,c)},sName:"pagination"})),e}function Yt(t,e,n){var a=t._iDisplayStart,r=t._iDisplayLength,o=t.fnRecordsDisplay(),o=(0===o||-1===r?a=0:"number"==typeof e?o<(a=e*r)&&(a=0):"first"==e?a=0:"previous"==e?(a=0<=r?a-r:0)<0&&(a=0):"next"==e?a+r<o&&(a+=r):"last"==e?a=Math.floor((o-1)/r)*r:W(t,0,"Unknown paging action: "+e,5),t._iDisplayStart!==a);return t._iDisplayStart=a,o?(R(t,null,"page",[t]),n&&y(t)):R(t,null,"page-nc",[t]),o}function Zt(t){return P("<div/>",{id:t.aanFeatures.r?null:t.sTableId+"_processing",class:t.oClasses.sProcessing,role:"status"}).html(t.oLanguage.sProcessing).append("<div><div></div><div></div><div></div><div></div></div>").insertBefore(t.nTable)[0]}function D(t,e){t.oFeatures.bProcessing&&P(t.aanFeatures.r).css("display",e?"block":"none"),R(t,null,"processing",[t,e])}function Kt(t){var e,n,a,r,o,i,l,s,u,c,f,d,h=P(t.nTable),p=t.oScroll;return""===p.sX&&""===p.sY?t.nTable:(e=p.sX,n=p.sY,a=t.oClasses,o=(r=h.children("caption")).length?r[0]._captionSide:null,s=P(h[0].cloneNode(!1)),i=P(h[0].cloneNode(!1)),u=function(t){return t?M(t):null},(l=h.children("tfoot")).length||(l=null),s=P(f="<div/>",{class:a.sScrollWrapper}).append(P(f,{class:a.sScrollHead}).css({overflow:"hidden",position:"relative",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollHeadInner}).css({"box-sizing":"content-box",width:p.sXInner||"100%"}).append(s.removeAttr("id").css("margin-left",0).append("top"===o?r:null).append(h.children("thead"))))).append(P(f,{class:a.sScrollBody}).css({position:"relative",overflow:"auto",width:u(e)}).append(h)),l&&s.append(P(f,{class:a.sScrollFoot}).css({overflow:"hidden",border:0,width:e?u(e):"100%"}).append(P(f,{class:a.sScrollFootInner}).append(i.removeAttr("id").css("margin-left",0).append("bottom"===o?r:null).append(h.children("tfoot"))))),u=s.children(),c=u[0],f=u[1],d=l?u[2]:null,e&&P(f).on("scroll.DT",function(t){var e=this.scrollLeft;c.scrollLeft=e,l&&(d.scrollLeft=e)}),P(f).css("max-height",n),p.bCollapse||P(f).css("height",n),t.nScrollHead=c,t.nScrollBody=f,t.nScrollFoot=d,t.aoDrawCallback.push({fn:Qt,sName:"scrolling"}),s[0])}function Qt(n){function t(t){(t=t.style).paddingTop="0",t.paddingBottom="0",t.borderTopWidth="0",t.borderBottomWidth="0",t.height=0}var e,a,r,o,i,l=n.oScroll,s=l.sX,u=l.sXInner,c=l.sY,l=l.iBarWidth,f=P(n.nScrollHead),d=f[0].style,h=f.children("div"),p=h[0].style,h=h.children("table"),g=n.nScrollBody,b=P(g),m=g.style,S=P(n.nScrollFoot).children("div"),v=S.children("table"),y=P(n.nTHead),D=P(n.nTable),_=D[0],w=_.style,C=n.nTFoot?P(n.nTFoot):null,T=n.oBrowser,x=T.bScrollOversize,A=(N(n.aoColumns,"nTh"),[]),I=[],F=[],L=[],R=g.scrollHeight>g.clientHeight;n.scrollBarVis!==R&&n.scrollBarVis!==H?(n.scrollBarVis=R,O(n)):(n.scrollBarVis=R,D.children("thead, tfoot").remove(),C&&(R=C.clone().prependTo(D),i=C.find("tr"),a=R.find("tr"),R.find("[id]").removeAttr("id")),R=y.clone().prependTo(D),y=y.find("tr"),e=R.find("tr"),R.find("th, td").removeAttr("tabindex"),R.find("[id]").removeAttr("id"),s||(m.width="100%",f[0].style.width="100%"),P.each(Ct(n,R),function(t,e){r=rt(n,t),e.style.width=n.aoColumns[r].sWidth}),C&&k(function(t){t.style.width=""},a),f=D.outerWidth(),""===s?(w.width="100%",x&&(D.find("tbody").height()>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(D.outerWidth()-l)),f=D.outerWidth()):""!==u&&(w.width=M(u),f=D.outerWidth()),k(t,e),k(function(t){var e=j.getComputedStyle?j.getComputedStyle(t).width:M(P(t).width());F.push(t.innerHTML),A.push(e)},e),k(function(t,e){t.style.width=A[e]},y),P(e).css("height",0),C&&(k(t,a),k(function(t){L.push(t.innerHTML),I.push(M(P(t).css("width")))},a),k(function(t,e){t.style.width=I[e]},i),P(a).height(0)),k(function(t,e){t.innerHTML='<div class="dataTables_sizing">'+F[e]+"</div>",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=A[e]},e),C&&k(function(t,e){t.innerHTML='<div class="dataTables_sizing">'+L[e]+"</div>",t.childNodes[0].style.height="0",t.childNodes[0].style.overflow="hidden",t.style.width=I[e]},a),Math.round(D.outerWidth())<Math.round(f)?(o=g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y")?f+l:f,x&&(g.scrollHeight>g.offsetHeight||"scroll"==b.css("overflow-y"))&&(w.width=M(o-l)),""!==s&&""===u||W(n,1,"Possible column misalignment",6)):o="100%",m.width=M(o),d.width=M(o),C&&(n.nScrollFoot.style.width=M(o)),c||x&&(m.height=M(_.offsetHeight+l)),R=D.outerWidth(),h[0].style.width=M(R),p.width=M(R),y=D.height()>g.clientHeight||"scroll"==b.css("overflow-y"),p[i="padding"+(T.bScrollbarLeft?"Left":"Right")]=y?l+"px":"0px",C&&(v[0].style.width=M(R),S[0].style.width=M(R),S[0].style[i]=y?l+"px":"0px"),D.children("colgroup").insertBefore(D.children("thead")),b.trigger("scroll"),!n.bSorted&&!n.bFiltered||n._drawHold||(g.scrollTop=0))}function k(t,e,n){for(var a,r,o=0,i=0,l=e.length;i<l;){for(a=e[i].firstChild,r=n?n[i].firstChild:null;a;)1===a.nodeType&&(n?t(a,r,o):t(a,o),o++),a=a.nextSibling,r=n?r.nextSibling:null;i++}}var te=/<.*?>/g;function ee(t){var e,n,a=t.nTable,r=t.aoColumns,o=t.oScroll,i=o.sY,l=o.sX,o=o.sXInner,s=r.length,u=it(t,"bVisible"),c=P("th",t.nTHead),f=a.getAttribute("width"),d=a.parentNode,h=!1,p=t.oBrowser,g=p.bScrollOversize,b=a.style.width,m=(b&&-1!==b.indexOf("%")&&(f=b),ae(N(r,"sWidthOrig"),d));for(_=0;_<u.length;_++)null!==(e=r[u[_]]).sWidth&&(e.sWidth=m[_],h=!0);if(g||!h&&!l&&!i&&s==T(t)&&s==c.length)for(_=0;_<s;_++){var S=rt(t,_);null!==S&&(r[S].sWidth=M(c.eq(_).width()))}else{var b=P(a).clone().css("visibility","hidden").removeAttr("id"),v=(b.find("tbody tr").remove(),P("<tr/>").appendTo(b.find("tbody")));for(b.find("thead, tfoot").remove(),b.append(P(t.nTHead).clone()).append(P(t.nTFoot).clone()),b.find("tfoot th, tfoot td").css("width",""),c=Ct(t,b.find("thead")[0]),_=0;_<u.length;_++)e=r[u[_]],c[_].style.width=null!==e.sWidthOrig&&""!==e.sWidthOrig?M(e.sWidthOrig):"",e.sWidthOrig&&l&&P(c[_]).append(P("<div/>").css({width:e.sWidthOrig,margin:0,padding:0,border:0,height:1}));if(t.aoData.length)for(_=0;_<u.length;_++)e=r[n=u[_]],P(re(t,n)).clone(!1).append(e.sContentPadding).appendTo(v);P("[name]",b).removeAttr("name");for(var y=P("<div/>").css(l||i?{position:"absolute",top:0,left:0,height:1,right:0,overflow:"hidden"}:{}).append(b).appendTo(d),D=(l&&o?b.width(o):l?(b.css("width","auto"),b.removeAttr("width"),b.width()<d.clientWidth&&f&&b.width(d.clientWidth)):i?b.width(d.clientWidth):f&&b.width(f),0),_=0;_<u.length;_++){var w=P(c[_]),C=w.outerWidth()-w.width(),w=p.bBounding?Math.ceil(c[_].getBoundingClientRect().width):w.outerWidth();D+=w,r[u[_]].sWidth=M(w-C)}a.style.width=M(D),y.remove()}f&&(a.style.width=M(f)),!f&&!l||t._reszEvt||(o=function(){P(j).on("resize.DT-"+t.sInstance,ne(function(){O(t)}))},g?setTimeout(o,1e3):o(),t._reszEvt=!0)}var ne=w.util.throttle;function ae(t,e){for(var n=[],a=[],r=0;r<t.length;r++)t[r]?n.push(P("<div/>").css("width",M(t[r])).appendTo(e||v.body)):n.push(null);for(r=0;r<t.length;r++)a.push(n[r]?n[r][0].offsetWidth:null);return P(n).remove(),a}function re(t,e){var n,a=oe(t,e);return a<0?null:(n=t.aoData[a]).nTr?n.anCells[e]:P("<td/>").html(S(t,a,e,"display"))[0]}function oe(t,e){for(var n,a=-1,r=-1,o=0,i=t.aoData.length;o<i;o++)(n=(n=(n=S(t,o,e,"display")+"").replace(te,"")).replace(/&nbsp;/g," ")).length>a&&(a=n.length,r=o);return r}function M(t){return null===t?"0px":"number"==typeof t?t<0?"0px":t+"px":t.match(/\d$/)?t+"px":t}function I(t){function e(t){t.length&&!Array.isArray(t[0])?h.push(t):P.merge(h,t)}var n,a,r,o,i,l,s,u=[],c=t.aoColumns,f=t.aaSortingFixed,d=P.isPlainObject(f),h=[];for(Array.isArray(f)&&e(f),d&&f.pre&&e(f.pre),e(t.aaSorting),d&&f.post&&e(f.post),n=0;n<h.length;n++)for(r=(o=c[s=h[n][a=0]].aDataSort).length;a<r;a++)l=c[i=o[a]].sType||"string",h[n]._idx===H&&(h[n]._idx=P.inArray(h[n][1],c[i].asSorting)),u.push({src:s,col:i,dir:h[n][1],index:h[n]._idx,type:l,formatter:w.ext.type.order[l+"-pre"]});return u}function ie(t){var e,n,a,r,c,f=[],u=w.ext.type.order,d=t.aoData,o=(t.aoColumns,0),i=t.aiDisplayMaster;for(lt(t),e=0,n=(c=I(t)).length;e<n;e++)(r=c[e]).formatter&&o++,fe(t,r.col);if("ssp"!=E(t)&&0!==c.length){for(e=0,a=i.length;e<a;e++)f[i[e]]=e;o===c.length?i.sort(function(t,e){for(var n,a,r,o,i=c.length,l=d[t]._aSortData,s=d[e]._aSortData,u=0;u<i;u++)if(0!=(r=(n=l[(o=c[u]).col])<(a=s[o.col])?-1:a<n?1:0))return"asc"===o.dir?r:-r;return(n=f[t])<(a=f[e])?-1:a<n?1:0}):i.sort(function(t,e){for(var n,a,r,o=c.length,i=d[t]._aSortData,l=d[e]._aSortData,s=0;s<o;s++)if(n=i[(r=c[s]).col],a=l[r.col],0!==(r=(u[r.type+"-"+r.dir]||u["string-"+r.dir])(n,a)))return r;return(n=f[t])<(a=f[e])?-1:a<n?1:0})}t.bSorted=!0}function le(t){for(var e=t.aoColumns,n=I(t),a=t.oLanguage.oAria,r=0,o=e.length;r<o;r++){var i=e[r],l=i.asSorting,s=i.ariaTitle||i.sTitle.replace(/<.*?>/g,""),u=i.nTh;u.removeAttribute("aria-sort"),i=i.bSortable?s+("asc"===(0<n.length&&n[0].col==r&&(u.setAttribute("aria-sort","asc"==n[0].dir?"ascending":"descending"),l[n[0].index+1])||l[0])?a.sSortAscending:a.sSortDescending):s,u.setAttribute("aria-label",i)}}function se(t,e,n,a){function r(t,e){var n=t._idx;return(n=n===H?P.inArray(t[1],s):n)+1<s.length?n+1:e?null:0}var o,i=t.aoColumns[e],l=t.aaSorting,s=i.asSorting;"number"==typeof l[0]&&(l=t.aaSorting=[l]),n&&t.oFeatures.bSortMulti?-1!==(i=P.inArray(e,N(l,"0")))?null===(o=null===(o=r(l[i],!0))&&1===l.length?0:o)?l.splice(i,1):(l[i][1]=s[o],l[i]._idx=o):(l.push([e,s[0],0]),l[l.length-1]._idx=0):l.length&&l[0][0]==e?(o=r(l[0]),l.length=1,l[0][1]=s[o],l[0]._idx=o):(l.length=0,l.push([e,s[0]]),l[0]._idx=0),u(t),"function"==typeof a&&a(t)}function ue(e,t,n,a){var r=e.aoColumns[n];me(t,{},function(t){!1!==r.bSortable&&(e.oFeatures.bProcessing?(D(e,!0),setTimeout(function(){se(e,n,t.shiftKey,a),"ssp"!==E(e)&&D(e,!1)},0)):se(e,n,t.shiftKey,a))})}function ce(t){var e,n,a,r=t.aLastSort,o=t.oClasses.sSortColumn,i=I(t),l=t.oFeatures;if(l.bSort&&l.bSortClasses){for(e=0,n=r.length;e<n;e++)a=r[e].src,P(N(t.aoData,"anCells",a)).removeClass(o+(e<2?e+1:3));for(e=0,n=i.length;e<n;e++)a=i[e].src,P(N(t.aoData,"anCells",a)).addClass(o+(e<2?e+1:3))}t.aLastSort=i}function fe(t,e){for(var n,a,r,o=t.aoColumns[e],i=w.ext.order[o.sSortDataType],l=(i&&(n=i.call(t.oInstance,t,e,ot(t,e))),w.ext.type.order[o.sType+"-pre"]),s=0,u=t.aoData.length;s<u;s++)(a=t.aoData[s])._aSortData||(a._aSortData=[]),a._aSortData[e]&&!i||(r=i?n[s]:S(t,s,e,"sort"),a._aSortData[e]=l?l(r):r)}function de(n){var t;n._bLoadingState||(t={time:+new Date,start:n._iDisplayStart,length:n._iDisplayLength,order:P.extend(!0,[],n.aaSorting),search:Et(n.oPreviousSearch),columns:P.map(n.aoColumns,function(t,e){return{visible:t.bVisible,search:Et(n.aoPreSearchCols[e])}})},n.oSavedState=t,R(n,"aoStateSaveParams","stateSaveParams",[n,t]),n.oFeatures.bStateSave&&!n.bDestroying&&n.fnStateSaveCallback.call(n.oInstance,n,t))}function he(e,t,n){var a;if(e.oFeatures.bStateSave)return(a=e.fnStateLoadCallback.call(e.oInstance,e,function(t){pe(e,t,n)}))!==H&&pe(e,a,n),!0;n()}function pe(n,t,e){var a,r,o=n.aoColumns,i=(n._bLoadingState=!0,n._bInitComplete?new w.Api(n):null);if(t&&t.time){var l=R(n,"aoStateLoadParams","stateLoadParams",[n,t]);if(-1!==P.inArray(!1,l))n._bLoadingState=!1;else{l=n.iStateDuration;if(0<l&&t.time<+new Date-1e3*l)n._bLoadingState=!1;else if(t.columns&&o.length!==t.columns.length)n._bLoadingState=!1;else{if(n.oLoadedState=P.extend(!0,{},t),t.length!==H&&(i?i.page.len(t.length):n._iDisplayLength=t.length),t.start!==H&&(null===i?(n._iDisplayStart=t.start,n.iInitDisplayStart=t.start):Yt(n,t.start/n._iDisplayLength)),t.order!==H&&(n.aaSorting=[],P.each(t.order,function(t,e){n.aaSorting.push(e[0]>=o.length?[0,e[1]]:e)})),t.search!==H&&P.extend(n.oPreviousSearch,Bt(t.search)),t.columns){for(a=0,r=t.columns.length;a<r;a++){var s=t.columns[a];s.visible!==H&&(i?i.column(a).visible(s.visible,!1):o[a].bVisible=s.visible),s.search!==H&&P.extend(n.aoPreSearchCols[a],Bt(s.search))}i&&i.columns.adjust()}n._bLoadingState=!1,R(n,"aoStateLoaded","stateLoaded",[n,t])}}}else n._bLoadingState=!1;e()}function ge(t){var e=w.settings,t=P.inArray(t,N(e,"nTable"));return-1!==t?e[t]:null}function W(t,e,n,a){if(n="DataTables warning: "+(t?"table id="+t.sTableId+" - ":"")+n,a&&(n+=". For more information about this error, please see https://datatables.net/tn/"+a),e)j.console&&console.log&&console.log(n);else{e=w.ext,e=e.sErrMode||e.errMode;if(t&&R(t,null,"error",[t,a,n]),"alert"==e)alert(n);else{if("throw"==e)throw new Error(n);"function"==typeof e&&e(t,a,n)}}}function F(n,a,t,e){Array.isArray(t)?P.each(t,function(t,e){Array.isArray(e)?F(n,a,e[0],e[1]):F(n,a,e)}):(e===H&&(e=t),a[t]!==H&&(n[e]=a[t]))}function be(t,e,n){var a,r;for(r in e)e.hasOwnProperty(r)&&(a=e[r],P.isPlainObject(a)?(P.isPlainObject(t[r])||(t[r]={}),P.extend(!0,t[r],a)):n&&"data"!==r&&"aaData"!==r&&Array.isArray(a)?t[r]=a.slice():t[r]=a);return t}function me(e,t,n){P(e).on("click.DT",t,function(t){P(e).trigger("blur"),n(t)}).on("keypress.DT",t,function(t){13===t.which&&(t.preventDefault(),n(t))}).on("selectstart.DT",function(){return!1})}function L(t,e,n,a){n&&t[e].push({fn:n,sName:a})}function R(n,t,e,a){var r=[];return t&&(r=P.map(n[t].slice().reverse(),function(t,e){return t.fn.apply(n.oInstance,a)})),null!==e&&(t=P.Event(e+".dt"),(e=P(n.nTable)).trigger(t,a),0===e.parents("body").length&&P("body").trigger(t,a),r.push(t.result)),r}function Se(t){var e=t._iDisplayStart,n=t.fnDisplayEnd(),a=t._iDisplayLength;n<=e&&(e=n-a),e-=e%a,t._iDisplayStart=e=-1===a||e<0?0:e}function ve(t,e){var t=t.renderer,n=w.ext.renderer[e];return P.isPlainObject(t)&&t[e]?n[t[e]]||n._:"string"==typeof t&&n[t]||n._}function E(t){return t.oFeatures.bServerSide?"ssp":t.ajax||t.sAjaxSource?"ajax":"dom"}function ye(t,n){var a;return Array.isArray(t)?P.map(t,function(t){return ye(t,n)}):"number"==typeof t?[n[t]]:(a=P.map(n,function(t,e){return t.nTable}),P(a).filter(t).map(function(t){var e=P.inArray(this,a);return n[e]}).toArray())}function De(r,o,t){var e,n;t&&(e=new B(r)).one("draw",function(){t(e.ajax.json())}),"ssp"==E(r)?u(r,o):(D(r,!0),(n=r.jqXHR)&&4!==n.readyState&&n.abort(),Tt(r,[],function(t){pt(r);for(var e=Ft(r,t),n=0,a=e.length;n<a;n++)x(r,e[n]);u(r,o),D(r,!1)}))}function _e(t,e,n,a,r){for(var o,i,l,s,u=[],c=typeof e,f=0,d=(e=e&&"string"!=c&&"function"!=c&&e.length!==H?e:[e]).length;f<d;f++)for(l=0,s=(i=e[f]&&e[f].split&&!e[f].match(/[\[\(:]/)?e[f].split(","):[e[f]]).length;l<s;l++)(o=n("string"==typeof i[l]?i[l].trim():i[l]))&&o.length&&(u=u.concat(o));var h=p.selector[t];if(h.length)for(f=0,d=h.length;f<d;f++)u=h[f](a,r,u);return z(u)}function we(t){return(t=t||{}).filter&&t.search===H&&(t.search=t.filter),P.extend({search:"none",order:"current",page:"all"},t)}function Ce(t){for(var e=0,n=t.length;e<n;e++)if(0<t[e].length)return t[0]=t[e],t[0].length=1,t.length=1,t.context=[t.context[e]],t;return t.length=0,t}function Te(o,t,e,n){function i(t,e){var n;if(Array.isArray(t)||t instanceof P)for(var a=0,r=t.length;a<r;a++)i(t[a],e);else t.nodeName&&"tr"===t.nodeName.toLowerCase()?l.push(t):(n=P("<tr><td></td></tr>").addClass(e),P("td",n).addClass(e).html(t)[0].colSpan=T(o),l.push(n[0]))}var l=[];i(e,n),t._details&&t._details.detach(),t._details=P(l),t._detailsShow&&t._details.insertAfter(t.nTr)}function xe(t,e){var n=t.context;if(n.length&&t.length){var a=n[0].aoData[t[0]];if(a._details){(a._detailsShow=e)?(a._details.insertAfter(a.nTr),P(a.nTr).addClass("dt-hasChild")):(a._details.detach(),P(a.nTr).removeClass("dt-hasChild")),R(n[0],null,"childRow",[e,t.row(t[0])]);var s=n[0],r=new B(s),a=".dt.DT_details",e="draw"+a,t="column-sizing"+a,a="destroy"+a,u=s.aoData;if(r.off(e+" "+t+" "+a),N(u,"_details").length>0){r.on(e,function(t,e){if(s!==e)return;r.rows({page:"current"}).eq(0).each(function(t){var e=u[t];if(e._detailsShow)e._details.insertAfter(e.nTr)})});r.on(t,function(t,e,n,a){if(s!==e)return;var r,o=T(e);for(var i=0,l=u.length;i<l;i++){r=u[i];if(r._details)r._details.each(function(){var t=P(this).children("td");if(t.length==1)t.attr("colspan",o)})}});r.on(a,function(t,e){if(s!==e)return;for(var n=0,a=u.length;n<a;n++)if(u[n]._details)Re(r,n)})}Le(n)}}}function Ae(t,e,n,a,r){for(var o=[],i=0,l=r.length;i<l;i++)o.push(S(t,r[i],e));return o}var Ie=[],o=Array.prototype,B=function(t,e){if(!(this instanceof B))return new B(t,e);function n(t){var e,n,a,r;t=t,a=w.settings,r=P.map(a,function(t,e){return t.nTable}),(t=t?t.nTable&&t.oApi?[t]:t.nodeName&&"table"===t.nodeName.toLowerCase()?-1!==(e=P.inArray(t,r))?[a[e]]:null:t&&"function"==typeof t.settings?t.settings().toArray():("string"==typeof t?n=P(t):t instanceof P&&(n=t),n?n.map(function(t){return-1!==(e=P.inArray(this,r))?a[e]:null}).toArray():void 0):[])&&o.push.apply(o,t)}var o=[];if(Array.isArray(t))for(var a=0,r=t.length;a<r;a++)n(t[a]);else n(t);this.context=z(o),e&&P.merge(this,e),this.selector={rows:null,cols:null,opts:null},B.extend(this,this,Ie)},Fe=(w.Api=B,P.extend(B.prototype,{any:function(){return 0!==this.count()},concat:o.concat,context:[],count:function(){return this.flatten().length},each:function(t){for(var e=0,n=this.length;e<n;e++)t.call(this,this[e],e,this);return this},eq:function(t){var e=this.context;return e.length>t?new B(e[t],this[t]):null},filter:function(t){var e=[];if(o.filter)e=o.filter.call(this,t,this);else for(var n=0,a=this.length;n<a;n++)t.call(this,this[n],n,this)&&e.push(this[n]);return new B(this.context,e)},flatten:function(){var t=[];return new B(this.context,t.concat.apply(t,this.toArray()))},join:o.join,indexOf:o.indexOf||function(t,e){for(var n=e||0,a=this.length;n<a;n++)if(this[n]===t)return n;return-1},iterator:function(t,e,n,a){var r,o,i,l,s,u,c,f,d=[],h=this.context,p=this.selector;for("string"==typeof t&&(a=n,n=e,e=t,t=!1),o=0,i=h.length;o<i;o++){var g=new B(h[o]);if("table"===e)(r=n.call(g,h[o],o))!==H&&d.push(r);else if("columns"===e||"rows"===e)(r=n.call(g,h[o],this[o],o))!==H&&d.push(r);else if("column"===e||"column-rows"===e||"row"===e||"cell"===e)for(c=this[o],"column-rows"===e&&(u=Fe(h[o],p.opts)),l=0,s=c.length;l<s;l++)f=c[l],(r="cell"===e?n.call(g,h[o],f.row,f.column,o,l):n.call(g,h[o],f,o,l,u))!==H&&d.push(r)}return d.length||a?((t=(a=new B(h,t?d.concat.apply([],d):d)).selector).rows=p.rows,t.cols=p.cols,t.opts=p.opts,a):this},lastIndexOf:o.lastIndexOf||function(t,e){return this.indexOf.apply(this.toArray.reverse(),arguments)},length:0,map:function(t){var e=[];if(o.map)e=o.map.call(this,t,this);else for(var n=0,a=this.length;n<a;n++)e.push(t.call(this,this[n],n));return new B(this.context,e)},pluck:function(t){var e=w.util.get(t);return this.map(function(t){return e(t)})},pop:o.pop,push:o.push,reduce:o.reduce||function(t,e){return et(this,t,e,0,this.length,1)},reduceRight:o.reduceRight||function(t,e){return et(this,t,e,this.length-1,-1,-1)},reverse:o.reverse,selector:null,shift:o.shift,slice:function(){return new B(this.context,this)},sort:o.sort,splice:o.splice,toArray:function(){return o.slice.call(this)},to$:function(){return P(this)},toJQuery:function(){return P(this)},unique:function(){return new B(this.context,z(this))},unshift:o.unshift}),B.extend=function(t,e,n){if(n.length&&e&&(e instanceof B||e.__dt_wrapper))for(var a,r=0,o=n.length;r<o;r++)e[(a=n[r]).name]="function"===a.type?function(e,n,a){return function(){var t=n.apply(e,arguments);return B.extend(t,t,a.methodExt),t}}(t,a.val,a):"object"===a.type?{}:a.val,e[a.name].__dt_wrapper=!0,B.extend(t,e[a.name],a.propExt)},B.register=e=function(t,e){if(Array.isArray(t))for(var n=0,a=t.length;n<a;n++)B.register(t[n],e);else for(var r=t.split("."),o=Ie,i=0,l=r.length;i<l;i++){var s,u,c=function(t,e){for(var n=0,a=t.length;n<a;n++)if(t[n].name===e)return t[n];return null}(o,u=(s=-1!==r[i].indexOf("()"))?r[i].replace("()",""):r[i]);c||o.push(c={name:u,val:{},methodExt:[],propExt:[],type:"object"}),i===l-1?(c.val=e,c.type="function"==typeof e?"function":P.isPlainObject(e)?"object":"other"):o=s?c.methodExt:c.propExt}},B.registerPlural=t=function(t,e,n){B.register(t,n),B.register(e,function(){var t=n.apply(this,arguments);return t===this?this:t instanceof B?t.length?Array.isArray(t[0])?new B(t.context,t[0]):t[0]:H:t})},e("tables()",function(t){return t!==H&&null!==t?new B(ye(t,this.context)):this}),e("table()",function(t){var t=this.tables(t),e=t.context;return e.length?new B(e[0]):t}),t("tables().nodes()","table().node()",function(){return this.iterator("table",function(t){return t.nTable},1)}),t("tables().body()","table().body()",function(){return this.iterator("table",function(t){return t.nTBody},1)}),t("tables().header()","table().header()",function(){return this.iterator("table",function(t){return t.nTHead},1)}),t("tables().footer()","table().footer()",function(){return this.iterator("table",function(t){return t.nTFoot},1)}),t("tables().containers()","table().container()",function(){return this.iterator("table",function(t){return t.nTableWrapper},1)}),e("draw()",function(e){return this.iterator("table",function(t){"page"===e?y(t):u(t,!1===(e="string"==typeof e?"full-hold"!==e:e))})}),e("page()",function(e){return e===H?this.page.info().page:this.iterator("table",function(t){Yt(t,e)})}),e("page.info()",function(t){var e,n,a,r,o;return 0===this.context.length?H:(n=(e=this.context[0])._iDisplayStart,a=e.oFeatures.bPaginate?e._iDisplayLength:-1,r=e.fnRecordsDisplay(),{page:(o=-1===a)?0:Math.floor(n/a),pages:o?1:Math.ceil(r/a),start:n,end:e.fnDisplayEnd(),length:a,recordsTotal:e.fnRecordsTotal(),recordsDisplay:r,serverSide:"ssp"===E(e)})}),e("page.len()",function(e){return e===H?0!==this.context.length?this.context[0]._iDisplayLength:H:this.iterator("table",function(t){$t(t,e)})}),e("ajax.json()",function(){var t=this.context;if(0<t.length)return t[0].json}),e("ajax.params()",function(){var t=this.context;if(0<t.length)return t[0].oAjaxData}),e("ajax.reload()",function(e,n){return this.iterator("table",function(t){De(t,!1===n,e)})}),e("ajax.url()",function(e){var t=this.context;return e===H?0===t.length?H:(t=t[0]).ajax?P.isPlainObject(t.ajax)?t.ajax.url:t.ajax:t.sAjaxSource:this.iterator("table",function(t){P.isPlainObject(t.ajax)?t.ajax.url=e:t.ajax=e})}),e("ajax.url().load()",function(e,n){return this.iterator("table",function(t){De(t,!1===n,e)})}),function(t,e){var n,a=[],r=t.aiDisplay,o=t.aiDisplayMaster,i=e.search,l=e.order,e=e.page;if("ssp"==E(t))return"removed"===i?[]:f(0,o.length);if("current"==e)for(u=t._iDisplayStart,c=t.fnDisplayEnd();u<c;u++)a.push(r[u]);else if("current"==l||"applied"==l){if("none"==i)a=o.slice();else if("applied"==i)a=r.slice();else if("removed"==i){for(var s={},u=0,c=r.length;u<c;u++)s[r[u]]=null;a=P.map(o,function(t){return s.hasOwnProperty(t)?null:t})}}else if("index"==l||"original"==l)for(u=0,c=t.aoData.length;u<c;u++)("none"==i||-1===(n=P.inArray(u,r))&&"removed"==i||0<=n&&"applied"==i)&&a.push(u);return a}),Le=(e("rows()",function(e,n){e===H?e="":P.isPlainObject(e)&&(n=e,e=""),n=we(n);var t=this.iterator("table",function(t){return _e("row",e,function(n){var t=d(n),a=r.aoData;if(null!==t&&!o)return[t];if(i=i||Fe(r,o),null!==t&&-1!==P.inArray(t,i))return[t];if(null===n||n===H||""===n)return i;if("function"==typeof n)return P.map(i,function(t){var e=a[t];return n(t,e._aData,e.nTr)?t:null});if(n.nodeName)return t=n._DT_RowIndex,e=n._DT_CellIndex,t!==H?a[t]&&a[t].nTr===n?[t]:[]:e?a[e.row]&&a[e.row].nTr===n.parentNode?[e.row]:[]:(t=P(n).closest("*[data-dt-row]")).length?[t.data("dt-row")]:[];if("string"==typeof n&&"#"===n.charAt(0)){var e=r.aIds[n.replace(/^#/,"")];if(e!==H)return[e.idx]}t=_(m(r.aoData,i,"nTr"));return P(t).filter(n).map(function(){return this._DT_RowIndex}).toArray()},r=t,o=n);var r,o,i},1);return t.selector.rows=e,t.selector.opts=n,t}),e("rows().nodes()",function(){return this.iterator("row",function(t,e){return t.aoData[e].nTr||H},1)}),e("rows().data()",function(){return this.iterator(!0,"rows",function(t,e){return m(t.aoData,e,"_aData")},1)}),t("rows().cache()","row().cache()",function(n){return this.iterator("row",function(t,e){t=t.aoData[e];return"search"===n?t._aFilterData:t._aSortData},1)}),t("rows().invalidate()","row().invalidate()",function(n){return this.iterator("row",function(t,e){bt(t,e,n)})}),t("rows().indexes()","row().index()",function(){return this.iterator("row",function(t,e){return e},1)}),t("rows().ids()","row().id()",function(t){for(var e=[],n=this.context,a=0,r=n.length;a<r;a++)for(var o=0,i=this[a].length;o<i;o++){var l=n[a].rowIdFn(n[a].aoData[this[a][o]]._aData);e.push((!0===t?"#":"")+l)}return new B(n,e)}),t("rows().remove()","row().remove()",function(){var f=this;return this.iterator("row",function(t,e,n){var a,r,o,i,l,s,u=t.aoData,c=u[e];for(u.splice(e,1),a=0,r=u.length;a<r;a++)if(s=(l=u[a]).anCells,null!==l.nTr&&(l.nTr._DT_RowIndex=a),null!==s)for(o=0,i=s.length;o<i;o++)s[o]._DT_CellIndex.row=a;gt(t.aiDisplayMaster,e),gt(t.aiDisplay,e),gt(f[n],e,!1),0<t._iRecordsDisplay&&t._iRecordsDisplay--,Se(t);n=t.rowIdFn(c._aData);n!==H&&delete t.aIds[n]}),this.iterator("table",function(t){for(var e=0,n=t.aoData.length;e<n;e++)t.aoData[e].idx=e}),this}),e("rows.add()",function(o){var t=this.iterator("table",function(t){for(var e,n=[],a=0,r=o.length;a<r;a++)(e=o[a]).nodeName&&"TR"===e.nodeName.toUpperCase()?n.push(ut(t,e)[0]):n.push(x(t,e));return n},1),e=this.rows(-1);return e.pop(),P.merge(e,t),e}),e("row()",function(t,e){return Ce(this.rows(t,e))}),e("row().data()",function(t){var e,n=this.context;return t===H?n.length&&this.length?n[0].aoData[this[0]]._aData:H:((e=n[0].aoData[this[0]])._aData=t,Array.isArray(t)&&e.nTr&&e.nTr.id&&b(n[0].rowId)(t,e.nTr.id),bt(n[0],this[0],"data"),this)}),e("row().node()",function(){var t=this.context;return t.length&&this.length&&t[0].aoData[this[0]].nTr||null}),e("row.add()",function(e){e instanceof P&&e.length&&(e=e[0]);var t=this.iterator("table",function(t){return e.nodeName&&"TR"===e.nodeName.toUpperCase()?ut(t,e)[0]:x(t,e)});return this.row(t[0])}),P(v).on("plugin-init.dt",function(t,e){var n=new B(e),a="on-plugin-init",r="stateSaveParams."+a,o="destroy. "+a,a=(n.on(r,function(t,e,n){for(var a=e.rowIdFn,r=e.aoData,o=[],i=0;i<r.length;i++)r[i]._detailsShow&&o.push("#"+a(r[i]._aData));n.childRows=o}),n.on(o,function(){n.off(r+" "+o)}),n.state.loaded());a&&a.childRows&&n.rows(P.map(a.childRows,function(t){return t.replace(/:/g,"\\:")})).every(function(){R(e,null,"requestChild",[this])})}),w.util.throttle(function(t){de(t[0])},500)),Re=function(t,e){var n=t.context;n.length&&(e=n[0].aoData[e!==H?e:t[0]])&&e._details&&(e._details.remove(),e._detailsShow=H,e._details=H,P(e.nTr).removeClass("dt-hasChild"),Le(n))},Pe="row().child",je=Pe+"()",He=(e(je,function(t,e){var n=this.context;return t===H?n.length&&this.length?n[0].aoData[this[0]]._details:H:(!0===t?this.child.show():!1===t?Re(this):n.length&&this.length&&Te(n[0],n[0].aoData[this[0]],t,e),this)}),e([Pe+".show()",je+".show()"],function(t){return xe(this,!0),this}),e([Pe+".hide()",je+".hide()"],function(){return xe(this,!1),this}),e([Pe+".remove()",je+".remove()"],function(){return Re(this),this}),e(Pe+".isShown()",function(){var t=this.context;return t.length&&this.length&&t[0].aoData[this[0]]._detailsShow||!1}),/^([^:]+):(name|visIdx|visible)$/),Ne=(e("columns()",function(n,a){n===H?n="":P.isPlainObject(n)&&(a=n,n=""),a=we(a);var t=this.iterator("table",function(t){return e=n,l=a,s=(i=t).aoColumns,u=N(s,"sName"),c=N(s,"nTh"),_e("column",e,function(n){var a,t=d(n);if(""===n)return f(s.length);if(null!==t)return[0<=t?t:s.length+t];if("function"==typeof n)return a=Fe(i,l),P.map(s,function(t,e){return n(e,Ae(i,e,0,0,a),c[e])?e:null});var r="string"==typeof n?n.match(He):"";if(r)switch(r[2]){case"visIdx":case"visible":var e,o=parseInt(r[1],10);return o<0?[(e=P.map(s,function(t,e){return t.bVisible?e:null}))[e.length+o]]:[rt(i,o)];case"name":return P.map(u,function(t,e){return t===r[1]?e:null});default:return[]}return n.nodeName&&n._DT_CellIndex?[n._DT_CellIndex.column]:(t=P(c).filter(n).map(function(){return P.inArray(this,c)}).toArray()).length||!n.nodeName?t:(t=P(n).closest("*[data-dt-column]")).length?[t.data("dt-column")]:[]},i,l);var i,e,l,s,u,c},1);return t.selector.cols=n,t.selector.opts=a,t}),t("columns().header()","column().header()",function(t,e){return this.iterator("column",function(t,e){return t.aoColumns[e].nTh},1)}),t("columns().footer()","column().footer()",function(t,e){return this.iterator("column",function(t,e){return t.aoColumns[e].nTf},1)}),t("columns().data()","column().data()",function(){return this.iterator("column-rows",Ae,1)}),t("columns().dataSrc()","column().dataSrc()",function(){return this.iterator("column",function(t,e){return t.aoColumns[e].mData},1)}),t("columns().cache()","column().cache()",function(o){return this.iterator("column-rows",function(t,e,n,a,r){return m(t.aoData,r,"search"===o?"_aFilterData":"_aSortData",e)},1)}),t("columns().nodes()","column().nodes()",function(){return this.iterator("column-rows",function(t,e,n,a,r){return m(t.aoData,r,"anCells",e)},1)}),t("columns().visible()","column().visible()",function(f,n){var e=this,t=this.iterator("column",function(t,e){if(f===H)return t.aoColumns[e].bVisible;var n,a,r=e,e=f,o=t.aoColumns,i=o[r],l=t.aoData;if(e===H)i.bVisible;else if(i.bVisible!==e){if(e)for(var s=P.inArray(!0,N(o,"bVisible"),r+1),u=0,c=l.length;u<c;u++)a=l[u].nTr,n=l[u].anCells,a&&a.insertBefore(n[r],n[s]||null);else P(N(t.aoData,"anCells",r)).detach();i.bVisible=e}});return f!==H&&this.iterator("table",function(t){Dt(t,t.aoHeader),Dt(t,t.aoFooter),t.aiDisplay.length||P(t.nTBody).find("td[colspan]").attr("colspan",T(t)),de(t),e.iterator("column",function(t,e){R(t,null,"column-visibility",[t,e,f,n])}),n!==H&&!n||e.columns.adjust()}),t}),t("columns().indexes()","column().index()",function(n){return this.iterator("column",function(t,e){return"visible"===n?ot(t,e):e},1)}),e("columns.adjust()",function(){return this.iterator("table",function(t){O(t)},1)}),e("column.index()",function(t,e){var n;if(0!==this.context.length)return n=this.context[0],"fromVisible"===t||"toData"===t?rt(n,e):"fromData"===t||"toVisible"===t?ot(n,e):void 0}),e("column()",function(t,e){return Ce(this.columns(t,e))}),e("cells()",function(g,t,b){var a,r,o,i,l,s,e;return P.isPlainObject(g)&&(g.row===H?(b=g,g=null):(b=t,t=null)),P.isPlainObject(t)&&(b=t,t=null),null===t||t===H?this.iterator("table",function(t){return a=t,t=g,e=we(b),f=a.aoData,d=Fe(a,e),n=_(m(f,d,"anCells")),h=P(Y([],n)),p=a.aoColumns.length,_e("cell",t,function(t){var e,n="function"==typeof t;if(null===t||t===H||n){for(o=[],i=0,l=d.length;i<l;i++)for(r=d[i],s=0;s<p;s++)u={row:r,column:s},(!n||(c=f[r],t(u,S(a,r,s),c.anCells?c.anCells[s]:null)))&&o.push(u);return o}return P.isPlainObject(t)?t.column!==H&&t.row!==H&&-1!==P.inArray(t.row,d)?[t]:[]:(e=h.filter(t).map(function(t,e){return{row:e._DT_CellIndex.row,column:e._DT_CellIndex.column}}).toArray()).length||!t.nodeName?e:(c=P(t).closest("*[data-dt-row]")).length?[{row:c.data("dt-row"),column:c.data("dt-column")}]:[]},a,e);var a,e,r,o,i,l,s,u,c,f,d,n,h,p}):(e=b?{page:b.page,order:b.order,search:b.search}:{},a=this.columns(t,e),r=this.rows(g,e),e=this.iterator("table",function(t,e){var n=[];for(o=0,i=r[e].length;o<i;o++)for(l=0,s=a[e].length;l<s;l++)n.push({row:r[e][o],column:a[e][l]});return n},1),e=b&&b.selected?this.cells(e,b):e,P.extend(e.selector,{cols:t,rows:g,opts:b}),e)}),t("cells().nodes()","cell().node()",function(){return this.iterator("cell",function(t,e,n){t=t.aoData[e];return t&&t.anCells?t.anCells[n]:H},1)}),e("cells().data()",function(){return this.iterator("cell",function(t,e,n){return S(t,e,n)},1)}),t("cells().cache()","cell().cache()",function(a){return a="search"===a?"_aFilterData":"_aSortData",this.iterator("cell",function(t,e,n){return t.aoData[e][a][n]},1)}),t("cells().render()","cell().render()",function(a){return this.iterator("cell",function(t,e,n){return S(t,e,n,a)},1)}),t("cells().indexes()","cell().index()",function(){return this.iterator("cell",function(t,e,n){return{row:e,column:n,columnVisible:ot(t,n)}},1)}),t("cells().invalidate()","cell().invalidate()",function(a){return this.iterator("cell",function(t,e,n){bt(t,e,a,n)})}),e("cell()",function(t,e,n){return Ce(this.cells(t,e,n))}),e("cell().data()",function(t){var e=this.context,n=this[0];return t===H?e.length&&n.length?S(e[0],n[0].row,n[0].column):H:(ct(e[0],n[0].row,n[0].column,t),bt(e[0],n[0].row,"data",n[0].column),this)}),e("order()",function(e,t){var n=this.context;return e===H?0!==n.length?n[0].aaSorting:H:("number"==typeof e?e=[[e,t]]:e.length&&!Array.isArray(e[0])&&(e=Array.prototype.slice.call(arguments)),this.iterator("table",function(t){t.aaSorting=e.slice()}))}),e("order.listener()",function(e,n,a){return this.iterator("table",function(t){ue(t,e,n,a)})}),e("order.fixed()",function(e){var t;return e?this.iterator("table",function(t){t.aaSortingFixed=P.extend(!0,{},e)}):(t=(t=this.context).length?t[0].aaSortingFixed:H,Array.isArray(t)?{pre:t}:t)}),e(["columns().order()","column().order()"],function(a){var r=this;return this.iterator("table",function(t,e){var n=[];P.each(r[e],function(t,e){n.push([e,a])}),t.aaSorting=n})}),e("search()",function(e,n,a,r){var t=this.context;return e===H?0!==t.length?t[0].oPreviousSearch.sSearch:H:this.iterator("table",function(t){t.oFeatures.bFilter&&Rt(t,P.extend({},t.oPreviousSearch,{sSearch:e+"",bRegex:null!==n&&n,bSmart:null===a||a,bCaseInsensitive:null===r||r}),1)})}),t("columns().search()","column().search()",function(a,r,o,i){return this.iterator("column",function(t,e){var n=t.aoPreSearchCols;if(a===H)return n[e].sSearch;t.oFeatures.bFilter&&(P.extend(n[e],{sSearch:a+"",bRegex:null!==r&&r,bSmart:null===o||o,bCaseInsensitive:null===i||i}),Rt(t,t.oPreviousSearch,1))})}),e("state()",function(){return this.context.length?this.context[0].oSavedState:null}),e("state.clear()",function(){return this.iterator("table",function(t){t.fnStateSaveCallback.call(t.oInstance,t,{})})}),e("state.loaded()",function(){return this.context.length?this.context[0].oLoadedState:null}),e("state.save()",function(){return this.iterator("table",function(t){de(t)})}),w.use=function(t,e){"lib"===e||t.fn?P=t:"win"==e||t.document?v=(j=t).document:"datetime"!==e&&"DateTime"!==t.type||(w.DateTime=t)},w.factory=function(t,e){var n=!1;return t&&t.document&&(v=(j=t).document),e&&e.fn&&e.fn.jquery&&(P=e,n=!0),n},w.versionCheck=w.fnVersionCheck=function(t){for(var e,n,a=w.version.split("."),r=t.split("."),o=0,i=r.length;o<i;o++)if((e=parseInt(a[o],10)||0)!==(n=parseInt(r[o],10)||0))return n<e;return!0},w.isDataTable=w.fnIsDataTable=function(t){var r=P(t).get(0),o=!1;return t instanceof w.Api||(P.each(w.settings,function(t,e){var n=e.nScrollHead?P("table",e.nScrollHead)[0]:null,a=e.nScrollFoot?P("table",e.nScrollFoot)[0]:null;e.nTable!==r&&n!==r&&a!==r||(o=!0)}),o)},w.tables=w.fnTables=function(e){var t=!1,n=(P.isPlainObject(e)&&(t=e.api,e=e.visible),P.map(w.settings,function(t){if(!e||P(t.nTable).is(":visible"))return t.nTable}));return t?new B(n):n},w.camelToHungarian=C,e("$()",function(t,e){e=this.rows(e).nodes(),e=P(e);return P([].concat(e.filter(t).toArray(),e.find(t).toArray()))}),P.each(["on","one","off"],function(t,n){e(n+"()",function(){var t=Array.prototype.slice.call(arguments),e=(t[0]=P.map(t[0].split(/\s/),function(t){return t.match(/\.dt\b/)?t:t+".dt"}).join(" "),P(this.tables().nodes()));return e[n].apply(e,t),this})}),e("clear()",function(){return this.iterator("table",function(t){pt(t)})}),e("settings()",function(){return new B(this.context,this.context)}),e("init()",function(){var t=this.context;return t.length?t[0].oInit:null}),e("data()",function(){return this.iterator("table",function(t){return N(t.aoData,"_aData")}).flatten()}),e("destroy()",function(c){return c=c||!1,this.iterator("table",function(e){var n,t=e.oClasses,a=e.nTable,r=e.nTBody,o=e.nTHead,i=e.nTFoot,l=P(a),r=P(r),s=P(e.nTableWrapper),u=P.map(e.aoData,function(t){return t.nTr}),i=(e.bDestroying=!0,R(e,"aoDestroyCallback","destroy",[e]),c||new B(e).columns().visible(!0),s.off(".DT").find(":not(tbody *)").off(".DT"),P(j).off(".DT-"+e.sInstance),a!=o.parentNode&&(l.children("thead").detach(),l.append(o)),i&&a!=i.parentNode&&(l.children("tfoot").detach(),l.append(i)),e.aaSorting=[],e.aaSortingFixed=[],ce(e),P(u).removeClass(e.asStripeClasses.join(" ")),P("th, td",o).removeClass(t.sSortable+" "+t.sSortableAsc+" "+t.sSortableDesc+" "+t.sSortableNone),r.children().detach(),r.append(u),e.nTableWrapper.parentNode),o=c?"remove":"detach",u=(l[o](),s[o](),!c&&i&&(i.insertBefore(a,e.nTableReinsertBefore),l.css("width",e.sDestroyWidth).removeClass(t.sTable),n=e.asDestroyStripes.length)&&r.children().each(function(t){P(this).addClass(e.asDestroyStripes[t%n])}),P.inArray(e,w.settings));-1!==u&&w.settings.splice(u,1)})}),P.each(["column","row","cell"],function(t,s){e(s+"s().every()",function(o){var i=this.selector.opts,l=this;return this.iterator(s,function(t,e,n,a,r){o.call(l[s](e,"cell"===s?n:i,"cell"===s?i:H),e,n,a,r)})})}),e("i18n()",function(t,e,n){var a=this.context[0],t=A(t)(a.oLanguage);return t===H&&(t=e),"string"==typeof(t=n!==H&&P.isPlainObject(t)?t[n]!==H?t[n]:t._:t)?t.replace("%d",n):t}),w.version="1.13.11",w.settings=[],w.models={},w.models.oSearch={bCaseInsensitive:!0,sSearch:"",bRegex:!1,bSmart:!0,return:!1},w.models.oRow={nTr:null,anCells:null,_aData:[],_aSortData:null,_aFilterData:null,_sFilterRow:null,_sRowStripe:"",src:null,idx:-1},w.models.oColumn={idx:null,aDataSort:null,asSorting:null,bSearchable:null,bSortable:null,bVisible:null,_sManualType:null,_bAttrSrc:!1,fnCreatedCell:null,fnGetData:null,fnSetData:null,mData:null,mRender:null,nTh:null,nTf:null,sClass:null,sContentPadding:null,sDefaultContent:null,sName:null,sSortDataType:"std",sSortingClass:null,sSortingClassJUI:null,sTitle:null,sType:null,sWidth:null,sWidthOrig:null},w.defaults={aaData:null,aaSorting:[[0,"asc"]],aaSortingFixed:[],ajax:null,aLengthMenu:[10,25,50,100],aoColumns:null,aoColumnDefs:null,aoSearchCols:[],asStripeClasses:null,bAutoWidth:!0,bDeferRender:!1,bDestroy:!1,bFilter:!0,bInfo:!0,bLengthChange:!0,bPaginate:!0,bProcessing:!1,bRetrieve:!1,bScrollCollapse:!1,bServerSide:!1,bSort:!0,bSortMulti:!0,bSortCellsTop:!1,bSortClasses:!0,bStateSave:!1,fnCreatedRow:null,fnDrawCallback:null,fnFooterCallback:null,fnFormatNumber:function(t){return t.toString().replace(/\B(?=(\d{3})+(?!\d))/g,this.oLanguage.sThousands)},fnHeaderCallback:null,fnInfoCallback:null,fnInitComplete:null,fnPreDrawCallback:null,fnRowCallback:null,fnServerData:null,fnServerParams:null,fnStateLoadCallback:function(t){try{return JSON.parse((-1===t.iStateDuration?sessionStorage:localStorage).getItem("DataTables_"+t.sInstance+"_"+location.pathname))}catch(t){return{}}},fnStateLoadParams:null,fnStateLoaded:null,fnStateSaveCallback:function(t,e){try{(-1===t.iStateDuration?sessionStorage:localStorage).setItem("DataTables_"+t.sInstance+"_"+location.pathname,JSON.stringify(e))}catch(t){}},fnStateSaveParams:null,iStateDuration:7200,iDeferLoading:null,iDisplayLength:10,iDisplayStart:0,iTabIndex:0,oClasses:{},oLanguage:{oAria:{sSortAscending:": activate to sort column ascending",sSortDescending:": activate to sort column descending"},oPaginate:{sFirst:"First",sLast:"Last",sNext:"Next",sPrevious:"Previous"},sEmptyTable:"No data available in table",sInfo:"Showing _START_ to _END_ of _TOTAL_ entries",sInfoEmpty:"Showing 0 to 0 of 0 entries",sInfoFiltered:"(filtered from _MAX_ total entries)",sInfoPostFix:"",sDecimal:"",sThousands:",",sLengthMenu:"Show _MENU_ entries",sLoadingRecords:"Loading...",sProcessing:"",sSearch:"Search:",sSearchPlaceholder:"",sUrl:"",sZeroRecords:"No matching records found"},oSearch:P.extend({},w.models.oSearch),sAjaxDataProp:"data",sAjaxSource:null,sDom:"lfrtip",searchDelay:null,sPaginationType:"simple_numbers",sScrollX:"",sScrollXInner:"",sScrollY:"",sServerMethod:"GET",renderer:null,rowId:"DT_RowId"},i(w.defaults),w.defaults.column={aDataSort:null,iDataSort:-1,asSorting:["asc","desc"],bSearchable:!0,bSortable:!0,bVisible:!0,fnCreatedCell:null,mData:null,mRender:null,sCellType:"td",sClass:"",sContentPadding:"",sDefaultContent:null,sName:"",sSortDataType:"std",sTitle:null,sType:null,sWidth:null},i(w.defaults.column),w.models.oSettings={oFeatures:{bAutoWidth:null,bDeferRender:null,bFilter:null,bInfo:null,bLengthChange:null,bPaginate:null,bProcessing:null,bServerSide:null,bSort:null,bSortMulti:null,bSortClasses:null,bStateSave:null},oScroll:{bCollapse:null,iBarWidth:0,sX:null,sXInner:null,sY:null},oLanguage:{fnInfoCallback:null},oBrowser:{bScrollOversize:!1,bScrollbarLeft:!1,bBounding:!1,barWidth:0},ajax:null,aanFeatures:[],aoData:[],aiDisplay:[],aiDisplayMaster:[],aIds:{},aoColumns:[],aoHeader:[],aoFooter:[],oPreviousSearch:{},aoPreSearchCols:[],aaSorting:null,aaSortingFixed:[],asStripeClasses:null,asDestroyStripes:[],sDestroyWidth:0,aoRowCallback:[],aoHeaderCallback:[],aoFooterCallback:[],aoDrawCallback:[],aoRowCreatedCallback:[],aoPreDrawCallback:[],aoInitComplete:[],aoStateSaveParams:[],aoStateLoadParams:[],aoStateLoaded:[],sTableId:"",nTable:null,nTHead:null,nTFoot:null,nTBody:null,nTableWrapper:null,bDeferLoading:!1,bInitialised:!1,aoOpenRows:[],sDom:null,searchDelay:null,sPaginationType:"two_button",iStateDuration:0,aoStateSave:[],aoStateLoad:[],oSavedState:null,oLoadedState:null,sAjaxSource:null,sAjaxDataProp:null,jqXHR:null,json:H,oAjaxData:H,fnServerData:null,aoServerParams:[],sServerMethod:null,fnFormatNumber:null,aLengthMenu:null,iDraw:0,bDrawing:!1,iDrawError:-1,_iDisplayLength:10,_iDisplayStart:0,_iRecordsTotal:0,_iRecordsDisplay:0,oClasses:{},bFiltered:!1,bSorted:!1,bSortCellsTop:null,oInit:null,aoDestroyCallback:[],fnRecordsTotal:function(){return"ssp"==E(this)?+this._iRecordsTotal:this.aiDisplayMaster.length},fnRecordsDisplay:function(){return"ssp"==E(this)?+this._iRecordsDisplay:this.aiDisplay.length},fnDisplayEnd:function(){var t=this._iDisplayLength,e=this._iDisplayStart,n=e+t,a=this.aiDisplay.length,r=this.oFeatures,o=r.bPaginate;return r.bServerSide?!1===o||-1===t?e+a:Math.min(e+t,this._iRecordsDisplay):!o||a<n||-1===t?a:n},oInstance:null,sInstance:null,iTabIndex:0,nScrollHead:null,nScrollFoot:null,aLastSort:[],oPlugins:{},rowIdFn:null,rowId:null},w.ext=p={buttons:{},classes:{},builder:"-source-",errMode:"alert",feature:[],search:[],selector:{cell:[],column:[],row:[]},internal:{},legacy:{ajax:null},pager:{},renderer:{pageButton:{},header:{}},order:{},type:{detect:[],search:{},order:{}},_unique:0,fnVersionCheck:w.fnVersionCheck,iApiIndex:0,oJUIClasses:{},sVersion:w.version},P.extend(p,{afnFiltering:p.search,aTypes:p.type.detect,ofnSearch:p.type.search,oSort:p.type.order,afnSortData:p.order,aoFeatures:p.feature,oApi:p.internal,oStdClasses:p.classes,oPagination:p.pager}),P.extend(w.ext.classes,{sTable:"dataTable",sNoFooter:"no-footer",sPageButton:"paginate_button",sPageButtonActive:"current",sPageButtonDisabled:"disabled",sStripeOdd:"odd",sStripeEven:"even",sRowEmpty:"dataTables_empty",sWrapper:"dataTables_wrapper",sFilter:"dataTables_filter",sInfo:"dataTables_info",sPaging:"dataTables_paginate paging_",sLength:"dataTables_length",sProcessing:"dataTables_processing",sSortAsc:"sorting_asc",sSortDesc:"sorting_desc",sSortable:"sorting",sSortableAsc:"sorting_desc_disabled",sSortableDesc:"sorting_asc_disabled",sSortableNone:"sorting_disabled",sSortColumn:"sorting_",sFilterInput:"",sLengthSelect:"",sScrollWrapper:"dataTables_scroll",sScrollHead:"dataTables_scrollHead",sScrollHeadInner:"dataTables_scrollHeadInner",sScrollBody:"dataTables_scrollBody",sScrollFoot:"dataTables_scrollFoot",sScrollFootInner:"dataTables_scrollFootInner",sHeaderTH:"",sFooterTH:"",sSortJUIAsc:"",sSortJUIDesc:"",sSortJUI:"",sSortJUIAscAllowed:"",sSortJUIDescAllowed:"",sSortJUIWrapper:"",sSortIcon:"",sJUIHeader:"",sJUIFooter:""}),w.ext.pager);function Oe(t,e){var n=[],a=Ne.numbers_length,r=Math.floor(a/2);return e<=a?n=f(0,e):t<=r?((n=f(0,a-2)).push("ellipsis"),n.push(e-1)):((e-1-r<=t?n=f(e-(a-2),e):((n=f(t-r+2,t+r-1)).push("ellipsis"),n.push(e-1),n)).splice(0,0,"ellipsis"),n.splice(0,0,0)),n.DT_el="span",n}P.extend(Ne,{simple:function(t,e){return["previous","next"]},full:function(t,e){return["first","previous","next","last"]},numbers:function(t,e){return[Oe(t,e)]},simple_numbers:function(t,e){return["previous",Oe(t,e),"next"]},full_numbers:function(t,e){return["first","previous",Oe(t,e),"next","last"]},first_last_numbers:function(t,e){return["first",Oe(t,e),"last"]},_numbers:Oe,numbers_length:7}),P.extend(!0,w.ext.renderer,{pageButton:{_:function(u,t,c,e,f,d){function h(t,e){for(var n,a=b.sPageButtonDisabled,r=function(t){Yt(u,t.data.action,!0)},o=0,i=e.length;o<i;o++)if(n=e[o],Array.isArray(n)){var l=P("<"+(n.DT_el||"div")+"/>").appendTo(t);h(l,n)}else{var s=!1;switch(p=null,g=n){case"ellipsis":t.append('<span class="ellipsis">&#x2026;</span>');break;case"first":p=m.sFirst,0===f&&(s=!0);break;case"previous":p=m.sPrevious,0===f&&(s=!0);break;case"next":p=m.sNext,0!==d&&f!==d-1||(s=!0);break;case"last":p=m.sLast,0!==d&&f!==d-1||(s=!0);break;default:p=u.fnFormatNumber(n+1),g=f===n?b.sPageButtonActive:""}null!==p&&(l=u.oInit.pagingTag||"a",s&&(g+=" "+a),me(P("<"+l+">",{class:b.sPageButton+" "+g,"aria-controls":u.sTableId,"aria-disabled":s?"true":null,"aria-label":S[n],role:"link","aria-current":g===b.sPageButtonActive?"page":null,"data-dt-idx":n,tabindex:s?-1:u.iTabIndex,id:0===c&&"string"==typeof n?u.sTableId+"_"+n:null}).html(p).appendTo(t),{action:n},r))}}var p,g,n,b=u.oClasses,m=u.oLanguage.oPaginate,S=u.oLanguage.oAria.paginate||{};try{n=P(t).find(v.activeElement).data("dt-idx")}catch(t){}h(P(t).empty(),e),n!==H&&P(t).find("[data-dt-idx="+n+"]").trigger("focus")}}}),P.extend(w.ext.type.detect,[function(t,e){e=e.oLanguage.sDecimal;return l(t,e)?"num"+e:null},function(t,e){var n;return(!t||t instanceof Date||X.test(t))&&(null!==(n=Date.parse(t))&&!isNaN(n)||h(t))?"date":null},function(t,e){e=e.oLanguage.sDecimal;return l(t,e,!0)?"num-fmt"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e)?"html-num"+e:null},function(t,e){e=e.oLanguage.sDecimal;return a(t,e,!0)?"html-num-fmt"+e:null},function(t,e){return h(t)||"string"==typeof t&&-1!==t.indexOf("<")?"html":null}]),P.extend(w.ext.type.search,{html:function(t){return h(t)?t:"string"==typeof t?t.replace(U," ").replace(V,""):""},string:function(t){return!h(t)&&"string"==typeof t?t.replace(U," "):t}});function ke(t,e,n,a){var r;return 0===t||t&&"-"!==t?"number"==(r=typeof t)||"bigint"==r?t:+(t=(t=e?$(t,e):t).replace&&(n&&(t=t.replace(n,"")),a)?t.replace(a,""):t):-1/0}function Me(n){P.each({num:function(t){return ke(t,n)},"num-fmt":function(t){return ke(t,n,q)},"html-num":function(t){return ke(t,n,V)},"html-num-fmt":function(t){return ke(t,n,V,q)}},function(t,e){p.type.order[t+n+"-pre"]=e,t.match(/^html\-/)&&(p.type.search[t+n]=p.type.search.html)})}P.extend(p.type.order,{"date-pre":function(t){t=Date.parse(t);return isNaN(t)?-1/0:t},"html-pre":function(t){return h(t)?"":t.replace?t.replace(/<.*?>/g,"").toLowerCase():t+""},"string-pre":function(t){return h(t)?"":"string"==typeof t?t.toLowerCase():t.toString?t.toString():""},"string-asc":function(t,e){return t<e?-1:e<t?1:0},"string-desc":function(t,e){return t<e?1:e<t?-1:0}}),Me(""),P.extend(!0,w.ext.renderer,{header:{_:function(r,o,i,l){P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass))})},jqueryui:function(r,o,i,l){P("<div/>").addClass(l.sSortJUIWrapper).append(o.contents()).append(P("<span/>").addClass(l.sSortIcon+" "+i.sSortingClassJUI)).appendTo(o),P(r.nTable).on("order.dt.DT",function(t,e,n,a){r===e&&(e=i.idx,o.removeClass(l.sSortAsc+" "+l.sSortDesc).addClass("asc"==a[e]?l.sSortAsc:"desc"==a[e]?l.sSortDesc:i.sSortingClass),o.find("span."+l.sSortIcon).removeClass(l.sSortJUIAsc+" "+l.sSortJUIDesc+" "+l.sSortJUI+" "+l.sSortJUIAscAllowed+" "+l.sSortJUIDescAllowed).addClass("asc"==a[e]?l.sSortJUIAsc:"desc"==a[e]?l.sSortJUIDesc:i.sSortingClassJUI))})}}});function We(t){return"string"==typeof(t=Array.isArray(t)?t.join(","):t)?t.replace(/&/g,"&amp;").replace(/</g,"&lt;").replace(/>/g,"&gt;").replace(/"/g,"&quot;"):t}function Ee(t,e,n,a,r){return j.moment?t[e](r):j.luxon?t[n](r):a?t[a](r):t}var Be=!1;function Ue(t,e,n){var a;if(j.moment){if(!(a=j.moment.utc(t,e,n,!0)).isValid())return null}else if(j.luxon){if(!(a=e&&"string"==typeof t?j.luxon.DateTime.fromFormat(t,e):j.luxon.DateTime.fromISO(t)).isValid)return null;a.setLocale(n)}else e?(Be||alert("DataTables warning: Formatted date without Moment.js or Luxon - https://datatables.net/tn/17"),Be=!0):a=new Date(t);return a}function Ve(s){return function(a,r,o,i){0===arguments.length?(o="en",a=r=null):1===arguments.length?(o="en",r=a,a=null):2===arguments.length&&(o=r,r=a,a=null);var l="datetime-"+r;return w.ext.type.order[l]||(w.ext.type.detect.unshift(function(t){return t===l&&l}),w.ext.type.order[l+"-asc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:t<e?-1:1},w.ext.type.order[l+"-desc"]=function(t,e){t=t.valueOf(),e=e.valueOf();return t===e?0:e<t?-1:1}),function(t,e){var n;return null!==t&&t!==H||(t="--now"===i?(n=new Date,new Date(Date.UTC(n.getFullYear(),n.getMonth(),n.getDate(),n.getHours(),n.getMinutes(),n.getSeconds()))):""),"type"===e?l:""===t?"sort"!==e?"":Ue("0000-01-01 00:00:00",null,o):!(null===r||a!==r||"sort"===e||"type"===e||t instanceof Date)||null===(n=Ue(t,a,o))?t:"sort"===e?n:(t=null===r?Ee(n,"toDate","toJSDate","")[s]():Ee(n,"format","toFormat","toISOString",r),"display"===e?We(t):t)}}}var Xe=",",Je=".";if(j.Intl!==H)try{for(var qe=(new Intl.NumberFormat).formatToParts(100000.1),n=0;n<qe.length;n++)"group"===qe[n].type?Xe=qe[n].value:"decimal"===qe[n].type&&(Je=qe[n].value)}catch(t){}function $e(e){return function(){var t=[ge(this[w.ext.iApiIndex])].concat(Array.prototype.slice.call(arguments));return w.ext.internal[e].apply(this,t)}}return w.datetime=function(n,a){var r="datetime-detect-"+n;a=a||"en",w.ext.type.order[r]||(w.ext.type.detect.unshift(function(t){var e=Ue(t,n,a);return!(""!==t&&!e)&&r}),w.ext.type.order[r+"-pre"]=function(t){return Ue(t,n,a)||0})},w.render={date:Ve("toLocaleDateString"),datetime:Ve("toLocaleString"),time:Ve("toLocaleTimeString"),number:function(a,r,o,i,l){return null!==a&&a!==H||(a=Xe),null!==r&&r!==H||(r=Je),{display:function(t){if("number"!=typeof t&&"string"!=typeof t)return t;if(""===t||null===t)return t;var e=t<0?"-":"",n=parseFloat(t);if(isNaN(n))return We(t);n=n.toFixed(o),t=Math.abs(n);n=parseInt(t,10),t=o?r+(t-n).toFixed(o).substring(2):"";return(e=0===n&&0===parseFloat(t)?"":e)+(i||"")+n.toString().replace(/\B(?=(\d{3})+(?!\d))/g,a)+t+(l||"")}}},text:function(){return{display:We,filter:We}}},P.extend(w.ext.internal,{_fnExternApiFunc:$e,_fnBuildAjax:Tt,_fnAjaxUpdate:xt,_fnAjaxParameters:At,_fnAjaxUpdateDraw:It,_fnAjaxDataSrc:Ft,_fnAddColumn:nt,_fnColumnOptions:at,_fnAdjustColumnSizing:O,_fnVisibleToColumnIndex:rt,_fnColumnIndexToVisible:ot,_fnVisbleColumns:T,_fnGetColumns:it,_fnColumnTypes:lt,_fnApplyColumnDefs:st,_fnHungarianMap:i,_fnCamelToHungarian:C,_fnLanguageCompat:Z,_fnBrowserDetect:tt,_fnAddData:x,_fnAddTr:ut,_fnNodeToDataIndex:function(t,e){return e._DT_RowIndex!==H?e._DT_RowIndex:null},_fnNodeToColumnIndex:function(t,e,n){return P.inArray(n,t.aoData[e].anCells)},_fnGetCellData:S,_fnSetCellData:ct,_fnSplitObjNotation:dt,_fnGetObjectDataFn:A,_fnSetObjectDataFn:b,_fnGetDataMaster:ht,_fnClearTable:pt,_fnDeleteIndex:gt,_fnInvalidate:bt,_fnGetRowElements:mt,_fnCreateTr:St,_fnBuildHead:yt,_fnDrawHead:Dt,_fnDraw:y,_fnReDraw:u,_fnAddOptionsHtml:_t,_fnDetectHeader:wt,_fnGetUniqueThs:Ct,_fnFeatureHtmlFilter:Lt,_fnFilterComplete:Rt,_fnFilterCustom:Pt,_fnFilterColumn:jt,_fnFilter:Ht,_fnFilterCreateSearch:Nt,_fnEscapeRegex:Ot,_fnFilterData:Wt,_fnFeatureHtmlInfo:Ut,_fnUpdateInfo:Vt,_fnInfoMacros:Xt,_fnInitialise:Jt,_fnInitComplete:qt,_fnLengthChange:$t,_fnFeatureHtmlLength:Gt,_fnFeatureHtmlPaginate:zt,_fnPageChange:Yt,_fnFeatureHtmlProcessing:Zt,_fnProcessingDisplay:D,_fnFeatureHtmlTable:Kt,_fnScrollDraw:Qt,_fnApplyToChildren:k,_fnCalculateColumnWidths:ee,_fnThrottle:ne,_fnConvertToWidth:ae,_fnGetWidestNode:re,_fnGetMaxLenString:oe,_fnStringToCss:M,_fnSortFlatten:I,_fnSort:ie,_fnSortAria:le,_fnSortListener:se,_fnSortAttachListener:ue,_fnSortingClasses:ce,_fnSortData:fe,_fnSaveState:de,_fnLoadState:he,_fnImplementState:pe,_fnSettingsFromNode:ge,_fnLog:W,_fnMap:F,_fnBindAction:me,_fnCallbackReg:L,_fnCallbackFire:R,_fnLengthOverflow:Se,_fnRenderer:ve,_fnDataSource:E,_fnRowAttributes:vt,_fnExtend:be,_fnCalculateEnd:function(){}}),((P.fn.dataTable=w).$=P).fn.dataTableSettings=w.settings,P.fn.dataTableExt=w.ext,P.fn.DataTable=function(t){return P(this).dataTable(t).api()},P.each(w,function(t,e){P.fn.DataTable[t]=e}),w});
\ No newline at end of file
diff --git a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
index 8b2b6ef1297cb..55305dee3899c 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/stagepage.js
@@ -654,6 +654,7 @@ $(document).ready(function () {
             }
             executorSummaryTableSelector.column(13).visible(dataToShow.showBytesSpilledData);
             executorSummaryTableSelector.column(14).visible(dataToShow.showBytesSpilledData);
+            reselectCheckboxesBasedOnTaskTableState();
           });
 
         // Prepare data for speculation metrics
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css
index ca7c1f8ba65e2..59accf1e398a7 100755
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.css
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css
@@ -361,6 +361,10 @@ a.downloadbutton {
   width: 170px;
 }
 
+.shuffle-write-time-checkbox-div {
+  width: 155px;
+}
+
 .result-serialization-time-checkbox-div {
   width: 185px;
 }
diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.js b/core/src/main/resources/org/apache/spark/ui/static/webui.js
index b365082c1e14b..4c7cf8c8ea90a 100644
--- a/core/src/main/resources/org/apache/spark/ui/static/webui.js
+++ b/core/src/main/resources/org/apache/spark/ui/static/webui.js
@@ -75,6 +75,7 @@ $(function() {
   collapseTablePageLoad('collapse-aggregated-systemProperties','aggregated-systemProperties');
   collapseTablePageLoad('collapse-aggregated-metricsProperties','aggregated-metricsProperties');
   collapseTablePageLoad('collapse-aggregated-classpathEntries','aggregated-classpathEntries');
+  collapseTablePageLoad('collapse-aggregated-environmentVariables','aggregated-environmentVariables');
   collapseTablePageLoad('collapse-aggregated-activeJobs','aggregated-activeJobs');
   collapseTablePageLoad('collapse-aggregated-completedJobs','aggregated-completedJobs');
   collapseTablePageLoad('collapse-aggregated-failedJobs','aggregated-failedJobs');
diff --git a/core/src/main/scala/org/apache/spark/Dependency.scala b/core/src/main/scala/org/apache/spark/Dependency.scala
index 3b7c7778e26ce..573608c4327e0 100644
--- a/core/src/main/scala/org/apache/spark/Dependency.scala
+++ b/core/src/main/scala/org/apache/spark/Dependency.scala
@@ -173,7 +173,7 @@ class ShuffleDependency[K: ClassTag, V: ClassTag, C: ClassTag](
   }
 
   private def canShuffleMergeBeEnabled(): Boolean = {
-    val isPushShuffleEnabled = Utils.isPushBasedShuffleEnabled(rdd.sparkContext.getConf,
+    val isPushShuffleEnabled = Utils.isPushBasedShuffleEnabled(rdd.sparkContext.conf,
       // invoked at driver
       isDriver = true)
     if (isPushShuffleEnabled && rdd.isBarrier()) {
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
index f5dee3c9dab95..e4edd7c8419d6 100644
--- a/core/src/main/scala/org/apache/spark/FutureAction.scala
+++ b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -38,10 +38,15 @@ trait FutureAction[T] extends Future[T] {
   // Note that we redefine methods of the Future trait here explicitly so we can specify a different
   // documentation (with reference to the word "action").
 
+  /**
+   * Cancels the execution of this action with an optional reason.
+   */
+  def cancel(reason: Option[String]): Unit
+
   /**
    * Cancels the execution of this action.
    */
-  def cancel(): Unit
+  def cancel(): Unit = cancel(None)
 
   /**
    * Blocks until this action completes.
@@ -114,9 +119,9 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
 
   @volatile private var _cancelled: Boolean = false
 
-  override def cancel(): Unit = {
+  override def cancel(reason: Option[String]): Unit = {
     _cancelled = true
-    jobWaiter.cancel()
+    jobWaiter.cancel(reason)
   }
 
   override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = {
@@ -188,10 +193,10 @@ class ComplexFutureAction[T](run : JobSubmitter => Future[T])
   // A promise used to signal the future.
   private val p = Promise[T]().completeWith(run(jobSubmitter))
 
-  override def cancel(): Unit = synchronized {
+  override def cancel(reason: Option[String]): Unit = synchronized {
     _cancelled = true
     p.tryFailure(new SparkException("Action has been cancelled"))
-    subActions.foreach(_.cancel())
+    subActions.foreach(_.cancel(reason))
   }
 
   private def jobSubmitter = new JobSubmitter {
diff --git a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
index ef4609e6d6456..505634d5bb048 100644
--- a/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
+++ b/core/src/main/scala/org/apache/spark/InternalAccumulator.scala
@@ -41,6 +41,8 @@ private[spark] object InternalAccumulator {
   val MEMORY_BYTES_SPILLED = METRICS_PREFIX + "memoryBytesSpilled"
   val DISK_BYTES_SPILLED = METRICS_PREFIX + "diskBytesSpilled"
   val PEAK_EXECUTION_MEMORY = METRICS_PREFIX + "peakExecutionMemory"
+  val PEAK_ON_HEAP_EXECUTION_MEMORY = METRICS_PREFIX + "peakOnHeapExecutionMemory"
+  val PEAK_OFF_HEAP_EXECUTION_MEMORY = METRICS_PREFIX + "peakOffHeapExecutionMemory"
   val UPDATED_BLOCK_STATUSES = METRICS_PREFIX + "updatedBlockStatuses"
   val TEST_ACCUM = METRICS_PREFIX + "testAccumulator"
 
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
index 485f0abcd25ee..042179d86c31a 100644
--- a/core/src/main/scala/org/apache/spark/SparkContext.scala
+++ b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -27,6 +27,7 @@ import scala.collection.Map
 import scala.collection.concurrent.{Map => ScalaConcurrentMap}
 import scala.collection.immutable
 import scala.collection.mutable.HashMap
+import scala.concurrent.{Future, Promise}
 import scala.jdk.CollectionConverters._
 import scala.reflect.{classTag, ClassTag}
 import scala.util.control.NonFatal
@@ -909,10 +910,20 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @since 3.5.0
    */
-  def addJobTag(tag: String): Unit = {
-    SparkContext.throwIfInvalidTag(tag)
+  def addJobTag(tag: String): Unit = addJobTags(Set(tag))
+
+  /**
+   * Add multiple tags to be assigned to all the jobs started by this thread.
+   * See [[addJobTag]] for more details.
+   *
+   * @param tags The tags to be added. Cannot contain ',' (comma) character.
+   *
+   * @since 4.0.0
+   */
+  def addJobTags(tags: Set[String]): Unit = {
+    tags.foreach(SparkContext.throwIfInvalidTag)
     val existingTags = getJobTags()
-    val newTags = (existingTags + tag).mkString(SparkContext.SPARK_JOB_TAGS_SEP)
+    val newTags = (existingTags ++ tags).mkString(SparkContext.SPARK_JOB_TAGS_SEP)
     setLocalProperty(SparkContext.SPARK_JOB_TAGS, newTags)
   }
 
@@ -924,10 +935,20 @@ class SparkContext(config: SparkConf) extends Logging {
    *
    * @since 3.5.0
    */
-  def removeJobTag(tag: String): Unit = {
-    SparkContext.throwIfInvalidTag(tag)
+  def removeJobTag(tag: String): Unit = removeJobTags(Set(tag))
+
+  /**
+   * Remove multiple tags to be assigned to all the jobs started by this thread.
+   * See [[removeJobTag]] for more details.
+   *
+   * @param tags The tags to be removed. Cannot contain ',' (comma) character.
+   *
+   * @since 4.0.0
+   */
+  def removeJobTags(tags: Set[String]): Unit = {
+    tags.foreach(SparkContext.throwIfInvalidTag)
     val existingTags = getJobTags()
-    val newTags = (existingTags - tag).mkString(SparkContext.SPARK_JOB_TAGS_SEP)
+    val newTags = (existingTags -- tags).mkString(SparkContext.SPARK_JOB_TAGS_SEP)
     if (newTags.isEmpty) {
       clearJobTags()
     } else {
@@ -2684,6 +2705,25 @@ class SparkContext(config: SparkConf) extends Logging {
     dagScheduler.cancelJobGroup(groupId, cancelFutureJobs = true, None)
   }
 
+  /**
+   * Cancel active jobs that have the specified tag. See `org.apache.spark.SparkContext.addJobTag`.
+   *
+   * @param tag The tag to be cancelled. Cannot contain ',' (comma) character.
+   * @param reason reason for cancellation.
+   * @return A future with [[ActiveJob]]s, allowing extraction of information such as Job ID and
+   *   tags.
+   */
+  private[spark] def cancelJobsWithTagWithFuture(
+      tag: String,
+      reason: String): Future[Seq[ActiveJob]] = {
+    SparkContext.throwIfInvalidTag(tag)
+    assertNotStopped()
+
+    val cancelledJobs = Promise[Seq[ActiveJob]]()
+    dagScheduler.cancelJobsWithTag(tag, Some(reason), Some(cancelledJobs))
+    cancelledJobs.future
+  }
+
   /**
    * Cancel active jobs that have the specified tag. See `org.apache.spark.SparkContext.addJobTag`.
    *
@@ -2695,7 +2735,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def cancelJobsWithTag(tag: String, reason: String): Unit = {
     SparkContext.throwIfInvalidTag(tag)
     assertNotStopped()
-    dagScheduler.cancelJobsWithTag(tag, Option(reason))
+    dagScheduler.cancelJobsWithTag(tag, Option(reason), cancelledJobs = None)
   }
 
   /**
@@ -2708,7 +2748,7 @@ class SparkContext(config: SparkConf) extends Logging {
   def cancelJobsWithTag(tag: String): Unit = {
     SparkContext.throwIfInvalidTag(tag)
     assertNotStopped()
-    dagScheduler.cancelJobsWithTag(tag, None)
+    dagScheduler.cancelJobsWithTag(tag, reason = None, cancelledJobs = None)
   }
 
   /** Cancel all jobs that have been scheduled or are running.  */
diff --git a/core/src/main/scala/org/apache/spark/SparkEnv.scala b/core/src/main/scala/org/apache/spark/SparkEnv.scala
index f8b7cdcf7a8b0..6b7fc1b0804b7 100644
--- a/core/src/main/scala/org/apache/spark/SparkEnv.scala
+++ b/core/src/main/scala/org/apache/spark/SparkEnv.scala
@@ -73,7 +73,7 @@ class SparkEnv (
 
   // We initialize the ShuffleManager later in SparkContext and Executor to allow
   // user jars to define custom ShuffleManagers.
-  private var _shuffleManager: ShuffleManager = _
+  @volatile private var _shuffleManager: ShuffleManager = _
 
   def shuffleManager: ShuffleManager = _shuffleManager
 
diff --git a/core/src/main/scala/org/apache/spark/SparkFileAlreadyExistsException.scala b/core/src/main/scala/org/apache/spark/SparkFileAlreadyExistsException.scala
index 0e578f045452e..82a0261f32ae7 100644
--- a/core/src/main/scala/org/apache/spark/SparkFileAlreadyExistsException.scala
+++ b/core/src/main/scala/org/apache/spark/SparkFileAlreadyExistsException.scala
@@ -33,5 +33,5 @@ private[spark] class SparkFileAlreadyExistsException(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass
+  override def getCondition: String = errorClass
 }
diff --git a/core/src/main/scala/org/apache/spark/SparkFiles.scala b/core/src/main/scala/org/apache/spark/SparkFiles.scala
index 44f4444a1fa8d..f4165c2fc6f28 100644
--- a/core/src/main/scala/org/apache/spark/SparkFiles.scala
+++ b/core/src/main/scala/org/apache/spark/SparkFiles.scala
@@ -27,8 +27,12 @@ object SparkFiles {
   /**
    * Get the absolute path of a file added through `SparkContext.addFile()`.
    */
-  def get(filename: String): String =
-    new File(getRootDirectory(), filename).getAbsolutePath()
+  def get(filename: String): String = {
+    val jobArtifactUUID = JobArtifactSet
+      .getCurrentJobArtifactState.map(_.uuid).getOrElse("default")
+    val withUuid = if (jobArtifactUUID == "default") filename else s"$jobArtifactUUID/$filename"
+    new File(getRootDirectory(), withUuid).getAbsolutePath
+  }
 
   /**
    * Get the root directory that contains files added through `SparkContext.addFile()`.
diff --git a/core/src/main/scala/org/apache/spark/TestUtils.scala b/core/src/main/scala/org/apache/spark/TestUtils.scala
index 5e3078d7292ba..fed15a067c00f 100644
--- a/core/src/main/scala/org/apache/spark/TestUtils.scala
+++ b/core/src/main/scala/org/apache/spark/TestUtils.scala
@@ -421,7 +421,7 @@ private[spark] object TestUtils extends SparkTestUtils {
   def createTempScriptWithExpectedOutput(dir: File, prefix: String, output: String): String = {
     val file = File.createTempFile(prefix, ".sh", dir)
     val script = s"cat <<EOF\n$output\nEOF\n"
-    Files.write(script, file, StandardCharsets.UTF_8)
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(script)
     JavaFiles.setPosixFilePermissions(file.toPath,
       EnumSet.of(OWNER_READ, OWNER_EXECUTE, OWNER_WRITE))
     file.getPath
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
index 6a67587fbd80c..b7fb22bab844a 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonRunner.scala
@@ -61,6 +61,8 @@ private[spark] object PythonEvalType {
   val SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE = 208
   val SQL_GROUPED_MAP_ARROW_UDF = 209
   val SQL_COGROUPED_MAP_ARROW_UDF = 210
+  val SQL_TRANSFORM_WITH_STATE_PANDAS_UDF = 211
+  val SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF = 212
 
   val SQL_TABLE_UDF = 300
   val SQL_ARROW_TABLE_UDF = 301
@@ -82,6 +84,9 @@ private[spark] object PythonEvalType {
     case SQL_COGROUPED_MAP_ARROW_UDF => "SQL_COGROUPED_MAP_ARROW_UDF"
     case SQL_TABLE_UDF => "SQL_TABLE_UDF"
     case SQL_ARROW_TABLE_UDF => "SQL_ARROW_TABLE_UDF"
+    case SQL_TRANSFORM_WITH_STATE_PANDAS_UDF => "SQL_TRANSFORM_WITH_STATE_PANDAS_UDF"
+    case SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF =>
+      "SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF"
   }
 }
 
diff --git a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
index c1a05bf547074..8497b4a13e577 100644
--- a/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
+++ b/core/src/main/scala/org/apache/spark/api/r/SerDe.scala
@@ -389,7 +389,7 @@ private[spark] object SerDe {
           writeType(dos, "map")
           writeInt(dos, v.size)
           val iter = v.entrySet.iterator
-          while(iter.hasNext) {
+          while (iter.hasNext) {
             val entry = iter.next
             val key = entry.getKey
             val value = entry.getValue
diff --git a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
index 851fb453fd092..57b0647e59fd9 100644
--- a/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/ExternalShuffleService.scala
@@ -165,9 +165,13 @@ object ExternalShuffleService extends Logging {
   private[spark] def main(
       args: Array[String],
       newShuffleService: (SparkConf, SecurityManager) => ExternalShuffleService): Unit = {
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
     val sparkConf = new SparkConf
     Utils.loadDefaultSparkProperties(sparkConf)
+    // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+    Utils.resetStructuredLogging(sparkConf)
+    Logging.uninitialize()
     val securityManager = new SecurityManager(sparkConf)
 
     // we override this value since this service is started from the command line
diff --git a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
index d6a50ff84f562..85e4ebd707cfc 100644
--- a/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/FaultToleranceTest.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy
 
 import java.io._
-import java.net.URL
+import java.net.URI
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeoutException
 
@@ -351,7 +351,7 @@ private class TestMasterInfo(val ip: String, val dockerId: DockerId, val logFile
   def readState(): Unit = {
     try {
       val masterStream = new InputStreamReader(
-        new URL("http://%s:8080/json".format(ip)).openStream, StandardCharsets.UTF_8)
+        new URI("http://%s:8080/json".format(ip)).toURL.openStream, StandardCharsets.UTF_8)
       val json = JsonMethods.parse(masterStream)
 
       val workers = json \ "workers"
diff --git a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
index 667bf8bbc9754..e9507fa6bee48 100644
--- a/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/PythonRunner.scala
@@ -25,6 +25,9 @@ import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 import scala.util.Try
 
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods.{compact, render}
+
 import org.apache.spark.{SparkConf, SparkUserAppException}
 import org.apache.spark.api.python.{Py4JServer, PythonUtils}
 import org.apache.spark.internal.config._
@@ -50,18 +53,21 @@ object PythonRunner {
     val formattedPythonFile = formatPath(pythonFile)
     val formattedPyFiles = resolvePyFiles(formatPaths(pyFiles))
 
-    val gatewayServer = new Py4JServer(sparkConf)
+    var gatewayServer: Option[Py4JServer] = None
+    if (sparkConf.getOption("spark.remote").isEmpty) {
+      gatewayServer = Some(new Py4JServer(sparkConf))
 
-    val thread = new Thread(() => Utils.logUncaughtExceptions { gatewayServer.start() })
-    thread.setName("py4j-gateway-init")
-    thread.setDaemon(true)
-    thread.start()
+      val thread = new Thread(() => Utils.logUncaughtExceptions { gatewayServer.get.start() })
+      thread.setName("py4j-gateway-init")
+      thread.setDaemon(true)
+      thread.start()
 
-    // Wait until the gateway server has started, so that we know which port is it bound to.
-    // `gatewayServer.start()` will start a new thread and run the server code there, after
-    // initializing the socket, so the thread started above will end as soon as the server is
-    // ready to serve connections.
-    thread.join()
+      // Wait until the gateway server has started, so that we know which port is it bound to.
+      // `gatewayServer.start()` will start a new thread and run the server code there, after
+      // initializing the socket, so the thread started above will end as soon as the server is
+      // ready to serve connections.
+      thread.join()
+    }
 
     // Build up a PYTHONPATH that includes the Spark assembly (where this class is), the
     // python directories in SPARK_HOME (if set), and any files in the pyFiles argument
@@ -74,12 +80,22 @@ object PythonRunner {
     // Launch Python process
     val builder = new ProcessBuilder((Seq(pythonExec, formattedPythonFile) ++ otherArgs).asJava)
     val env = builder.environment()
+    if (sparkConf.getOption("spark.remote").nonEmpty) {
+      // For non-local remote, pass configurations to environment variables so
+      // Spark Connect client sets them. For local remotes, they will be set
+      // via Py4J.
+      val grouped = sparkConf.getAll.toMap.grouped(10).toSeq
+      env.put("PYSPARK_REMOTE_INIT_CONF_LEN", grouped.length.toString)
+      grouped.zipWithIndex.foreach { case (group, idx) =>
+        env.put(s"PYSPARK_REMOTE_INIT_CONF_$idx", compact(render(group)))
+      }
+    }
     sparkConf.getOption("spark.remote").foreach(url => env.put("SPARK_REMOTE", url))
     env.put("PYTHONPATH", pythonPath)
     // This is equivalent to setting the -u flag; we use it because ipython doesn't support -u:
     env.put("PYTHONUNBUFFERED", "YES") // value is needed to be set to a non-empty string
-    env.put("PYSPARK_GATEWAY_PORT", "" + gatewayServer.getListeningPort)
-    env.put("PYSPARK_GATEWAY_SECRET", gatewayServer.secret)
+    gatewayServer.foreach(s => env.put("PYSPARK_GATEWAY_PORT", s.getListeningPort.toString))
+    gatewayServer.foreach(s => env.put("PYSPARK_GATEWAY_SECRET", s.secret))
     // pass conf spark.pyspark.python to python process, the only way to pass info to
     // python process is through environment variable.
     sparkConf.get(PYSPARK_PYTHON).foreach(env.put("PYSPARK_PYTHON", _))
@@ -103,7 +119,7 @@ object PythonRunner {
         throw new SparkUserAppException(exitCode)
       }
     } finally {
-      gatewayServer.shutdown()
+      gatewayServer.foreach(_.shutdown())
     }
   }
 
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
index 7bb945ab9f147..49b2c86e73cd4 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
@@ -79,12 +79,12 @@ private[spark] class SparkSubmit extends Logging {
     } else {
       // For non-shell applications, enable structured logging if it's not explicitly disabled
       // via the configuration `spark.log.structuredLogging.enabled`.
-      if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) {
-        Logging.enableStructuredLogging()
-      } else {
-        Logging.disableStructuredLogging()
-      }
+      Utils.resetStructuredLogging(sparkConf)
     }
+
+    // We should initialize log again after `spark.log.structuredLogging.enabled` effected
+    Logging.uninitialize()
+
     // Initialize logging if it hasn't been done yet. Keep track of whether logging needs to
     // be reset before the application starts.
     val uninitLog = initializeLogIfNecessary(true, silent = true)
@@ -109,8 +109,13 @@ private[spark] class SparkSubmit extends Logging {
    */
   private def kill(args: SparkSubmitArguments, sparkConf: SparkConf): Unit = {
     if (RestSubmissionClient.supportsRestClient(args.master)) {
-      new RestSubmissionClient(args.master)
+      val response = new RestSubmissionClient(args.master)
         .killSubmission(args.submissionToKill)
+      if (response.success) {
+        logInfo(s"${args.submissionToKill} is killed successfully.")
+      } else {
+        logError(response.message)
+      }
     } else {
       sparkConf.set("spark.master", args.master)
       SparkSubmitUtils
@@ -748,6 +753,8 @@ private[spark] class SparkSubmit extends Logging {
           (clusterManager & opt.clusterManager) != 0) {
         if (opt.clOption != null) { childArgs += opt.clOption += opt.value }
         if (opt.confKey != null) {
+          // Used in SparkConnectClient because Spark Connect client does not have SparkConf.
+          if (opt.confKey == "spark.remote") System.setProperty("spark.remote", opt.value)
           if (opt.mergeFn.isDefined && sparkConf.contains(opt.confKey)) {
             sparkConf.set(opt.confKey, opt.mergeFn.get.apply(sparkConf.get(opt.confKey), opt.value))
           } else {
@@ -1075,6 +1082,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
   private val SPARK_SHELL = "spark-shell"
   private val PYSPARK_SHELL = "pyspark-shell"
   private val SPARKR_SHELL = "sparkr-shell"
+  private val CONNECT_SHELL = "connect-shell"
   private val SPARKR_PACKAGE_ARCHIVE = "sparkr.zip"
   private val R_PACKAGE_ARCHIVE = "rpkg.zip"
 
@@ -1149,7 +1157,7 @@ object SparkSubmit extends CommandLineUtils with Logging {
    * Return whether the given primary resource represents a shell.
    */
   private[deploy] def isShell(res: String): Boolean = {
-    (res == SPARK_SHELL || res == PYSPARK_SHELL || res == SPARKR_SHELL)
+    (res == SPARK_SHELL || res == PYSPARK_SHELL || res == SPARKR_SHELL || res == CONNECT_SHELL)
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
index 32dd2f81bbc82..0b84c34ce9477 100644
--- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala
@@ -43,7 +43,8 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
   extends SparkSubmitArgumentsParser with Logging {
   var maybeMaster: Option[String] = None
   // Global defaults. These should be keep to minimum to avoid confusing behavior.
-  def master: String = maybeMaster.getOrElse("local[*]")
+  def master: String =
+    maybeMaster.getOrElse(System.getProperty("spark.test.master", "local[*]"))
   var maybeRemote: Option[String] = None
   var deployMode: String = null
   var executorMemory: String = null
@@ -567,8 +568,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S
         | Spark Connect only:
         |   --remote CONNECT_URL       URL to connect to the server for Spark Connect, e.g.,
         |                              sc://host:port. --master and --deploy-mode cannot be set
-        |                              together with this option. This option is experimental, and
-        |                              might change between minor releases.
+        |                              together with this option.
         |
         | Cluster deploy mode only:
         |  --driver-cores NUM          Number of cores used by the driver, only in cluster mode
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
index 95b23c0f894f8..ec227d40f21ac 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala
@@ -1286,12 +1286,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         rebuildAppStore(store, reader, attempt.info.lastUpdated.getTime())
         hybridStore = store
       } catch {
-        case _: IOException if !retried =>
+        case ioe: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(log"Exception occurred while rebuilding log path " +
+          logInfo(log"Exception occurred while rebuilding log path " +
             log"${MDC(PATH, attempt.logPath)} - " +
-            log"trying again...")
+            log"trying again...", ioe)
           store.close()
           memoryManager.release(appId, attempt.info.attemptId)
           retried = true
@@ -1359,11 +1359,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         }
         newStorePath = lease.commit(appId, attempt.info.attemptId)
       } catch {
-        case _: IOException if !retried =>
+        case ioe: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(log"Exception occurred while rebuilding app ${MDC(APP_ID, appId)} - " +
-            log"trying again...")
+          logInfo(log"Exception occurred while rebuilding app ${MDC(APP_ID, appId)} - " +
+            log"trying again...", ioe)
           lease.rollback()
           retried = true
 
@@ -1387,11 +1387,11 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
         rebuildAppStore(s, reader, attempt.info.lastUpdated.getTime())
         store = s
       } catch {
-        case _: IOException if !retried =>
+        case ioe: IOException if !retried =>
           // compaction may touch the file(s) which app rebuild wants to read
           // compaction wouldn't run in short interval, so try again...
-          logWarning(log"Exception occurred while rebuilding log path " +
-            log"${MDC(LogKeys.PATH, attempt.logPath)} - trying again...")
+          logInfo(log"Exception occurred while rebuilding log path " +
+            log"${MDC(LogKeys.PATH, attempt.logPath)} - trying again...", ioe)
           retried = true
 
         case e: Exception =>
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
index aa1d76bafc53a..ff1629b698096 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryPage.scala
@@ -105,7 +105,7 @@ private[history] class HistoryPage(parent: HistoryServer) extends WebUIPage("")
       <script type="module" src={UIUtils.prependBaseUri(
         request, "/static/utils.js")}></script> ++
       summary ++ appList ++ pageLink
-    UIUtils.basicSparkPage(request, content, "History Server", true)
+    UIUtils.basicSparkPage(request, content, parent.title, true)
   }
 
   def shouldDisplayApplications(requestedIncomplete: Boolean): Boolean = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
index 6e559dc4492ea..ce9f70c9a83ed 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServer.scala
@@ -30,7 +30,6 @@ import org.apache.spark.deploy.SparkHadoopUtil
 import org.apache.spark.deploy.Utils.addRenderLogHandler
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
-import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.History
 import org.apache.spark.internal.config.UI._
 import org.apache.spark.status.api.v1.{ApiRootResource, ApplicationInfo, UIRoot}
@@ -61,11 +60,13 @@ class HistoryServer(
     poolSize = 1000)
   with Logging with UIRoot with ApplicationCacheOperations {
 
+  val title = conf.get(History.HISTORY_SERVER_UI_TITLE)
+
   // How many applications to retain
   private val retainedApplications = conf.get(History.RETAINED_APPLICATIONS)
 
   // How many applications the summary ui displays
-  private[history] val maxApplications = conf.get(HISTORY_UI_MAX_APPS);
+  private[history] val maxApplications = conf.get(History.HISTORY_UI_MAX_APPS);
 
   // application
   private val appCache = new ApplicationCache(this, retainedApplications, new SystemClock())
@@ -291,11 +292,12 @@ class HistoryServer(
  * This launches the HistoryServer as a Spark daemon.
  */
 object HistoryServer extends Logging {
-  private val conf = new SparkConf
+  private lazy val conf = new SparkConf
 
   val UI_PATH_PREFIX = "/history"
 
   def main(argStrings: Array[String]): Unit = {
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
     new HistoryServerArguments(conf, argStrings)
     initSecurity()
@@ -315,7 +317,7 @@ object HistoryServer extends Logging {
     ShutdownHookManager.addShutdownHook { () => server.stop() }
 
     // Wait until the end of the world... or if the HistoryServer process is manually stopped
-    while(true) { Thread.sleep(Int.MaxValue) }
+    while (true) { Thread.sleep(Int.MaxValue) }
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
index 2fdf7a473a298..f1343a0551384 100644
--- a/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/history/HistoryServerArguments.scala
@@ -53,6 +53,9 @@ private[history] class HistoryServerArguments(conf: SparkConf, args: Array[Strin
 
   // This mutates the SparkConf, so all accesses to it must be made after this line
   Utils.loadDefaultSparkProperties(conf, propertiesFile)
+  // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+  Utils.resetStructuredLogging(conf)
+  Logging.uninitialize()
 
   // scalastyle:off line.size.limit println
   private def printUsageAndExit(exitCode: Int, error: String = ""): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
index 84e67cba33a9f..617d073e8c8b5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/Master.scala
@@ -26,7 +26,7 @@ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet}
 import scala.util.Random
 
 import org.apache.spark.{SecurityManager, SparkConf}
-import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState}
+import org.apache.spark.deploy.{ApplicationDescription, DriverDescription, ExecutorState, SparkSubmit}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.DriverState.DriverState
 import org.apache.spark.deploy.master.MasterMessages._
@@ -128,6 +128,7 @@ private[deploy] class Master(
   val reverseProxy = conf.get(UI_REVERSE_PROXY)
   val historyServerUrl = conf.get(MASTER_UI_HISTORY_SERVER_URL)
   val useAppNameAsAppId = conf.get(MASTER_USE_APP_NAME_AS_APP_ID)
+  val useDriverIdAsAppName = conf.get(MASTER_USE_DRIVER_ID_AS_APP_NAME)
 
   // Alternative application submission gateway that is stable across Spark versions
   private val restServerEnabled = conf.get(MASTER_REST_SERVER_ENABLED)
@@ -1330,10 +1331,33 @@ private[deploy] class Master(
     appId
   }
 
+  /**
+   * Update and add `spark.app.name` configurations to DriverDescription.
+   */
+  private def maybeUpdateAppName(desc: DriverDescription, appName: String): DriverDescription = {
+    if (!useDriverIdAsAppName) return desc
+
+    val config = s"spark.app.name=$appName"
+    val javaOpts = desc.command.javaOpts
+      .filter(opt => !opt.startsWith("-Dspark.app.name=")) :+ s"-D$config"
+    val args = desc.command.arguments
+    val arguments = if (args(2).equals(classOf[SparkSubmit].getName)) {
+      if (args.length > 4 && args(4).startsWith("spark.app.name=")) {
+        args.updated(4, config)
+      } else {
+        args.patch(3, Seq("-c", config), 0)
+      }
+    } else {
+      args
+    }
+    desc.copy(command = desc.command.copy(arguments = arguments, javaOpts = javaOpts))
+  }
+
   private def createDriver(desc: DriverDescription): DriverInfo = {
     val now = System.currentTimeMillis()
     val date = new Date(now)
-    new DriverInfo(now, newDriverId(date), desc, date)
+    val id = newDriverId(date)
+    new DriverInfo(now, id, maybeUpdateAppName(desc, id), date)
   }
 
   private def launchDriver(worker: WorkerInfo, driver: DriverInfo): Unit = {
@@ -1382,6 +1406,7 @@ private[deploy] object Master extends Logging {
   def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
     val conf = new SparkConf
     val args = new MasterArguments(argStrings, conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
index 045a3da74dcd0..0904581d72367 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/MasterArguments.scala
@@ -33,12 +33,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
   var webUiPort = 8080
   var propertiesFile: String = null
 
-  // Check for settings in environment variables
-  if (System.getenv("SPARK_MASTER_IP") != null) {
-    logWarning("SPARK_MASTER_IP is deprecated, please use SPARK_MASTER_HOST")
-    host = System.getenv("SPARK_MASTER_IP")
-  }
-
   if (System.getenv("SPARK_MASTER_HOST") != null) {
     host = System.getenv("SPARK_MASTER_HOST")
   }
@@ -53,6 +47,9 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
 
   // This mutates the SparkConf, so all accesses to it must be made after this line
   propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
+  // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+  Utils.resetStructuredLogging(conf)
+  Logging.uninitialize()
 
   if (conf.contains(MASTER_UI_PORT.key)) {
     webUiPort = conf.get(MASTER_UI_PORT)
@@ -60,11 +57,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
 
   @tailrec
   private def parse(args: List[String]): Unit = args match {
-    case ("--ip" | "-i") :: value :: tail =>
-      Utils.checkHost(value)
-      host = value
-      parse(tail)
-
     case ("--host" | "-h") :: value :: tail =>
       Utils.checkHost(value)
       host = value
@@ -100,7 +92,6 @@ private[master] class MasterArguments(args: Array[String], conf: SparkConf) exte
       "Usage: Master [options]\n" +
       "\n" +
       "Options:\n" +
-      "  -i HOST, --ip HOST     Hostname to listen on (deprecated, please use --host or -h) \n" +
       "  -h HOST, --host HOST   Hostname to listen on\n" +
       "  -p PORT, --port PORT   Port to listen on (default: 7077)\n" +
       "  --webui-port PORT      Port for web UI (default: 8080)\n" +
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
index 98cc99c1a24b2..1a46688022341 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/ApplicationPage.scala
@@ -94,10 +94,14 @@ private[ui] class ApplicationPage(parent: MasterWebUI) extends WebUIPage("app")
             <li><strong>State:</strong> {app.state}</li>
             {
               if (!app.isFinished) {
-                <li><strong>
-                    <a href={UIUtils.makeHref(parent.master.reverseProxy,
-                      app.id, app.desc.appUiUrl)}>Application Detail UI</a>
-                </strong></li>
+                if (app.desc.appUiUrl.isBlank()) {
+                  <li><strong>Application UI:</strong> Disabled</li>
+                } else {
+                  <li><strong>
+                      <a href={UIUtils.makeHref(parent.master.reverseProxy,
+                        app.id, app.desc.appUiUrl)}>Application Detail UI</a>
+                  </strong></li>
+                }
               } else if (parent.master.historyServerUrl.nonEmpty) {
                 <li><strong>
                     <a href={s"${parent.master.historyServerUrl.get}/history/${app.id}"}>
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala
index 190e821524ba0..c05b20d30b983 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/EnvironmentPage.scala
@@ -17,12 +17,14 @@
 
 package org.apache.spark.deploy.master.ui
 
+import scala.jdk.CollectionConverters._
 import scala.xml.Node
 
 import jakarta.servlet.http.HttpServletRequest
 
 import org.apache.spark.{SparkConf, SparkEnv}
 import org.apache.spark.deploy.SparkHadoopUtil
+import org.apache.spark.internal.config.UI.MASTER_UI_VISIBLE_ENV_VAR_PREFIXES
 import org.apache.spark.ui._
 import org.apache.spark.util.Utils
 
@@ -39,6 +41,9 @@ private[ui] class EnvironmentPage(
     val systemProperties = Utils.redact(conf, details("System Properties")).sorted
     val metricsProperties = Utils.redact(conf, details("Metrics Properties")).sorted
     val classpathEntries = details("Classpath Entries").sorted
+    val prefixes = conf.get(MASTER_UI_VISIBLE_ENV_VAR_PREFIXES)
+    val environmentVariables = System.getenv().asScala
+      .filter { case (k, _) => prefixes.exists(k.startsWith(_)) }.toSeq.sorted
 
     val runtimeInformationTable = UIUtils.listingTable(propertyHeader, propertyRow,
       jvmInformation, fixedWidth = true, headerClasses = headerClasses)
@@ -52,6 +57,8 @@ private[ui] class EnvironmentPage(
       metricsProperties, fixedWidth = true, headerClasses = headerClasses)
     val classpathEntriesTable = UIUtils.listingTable(classPathHeader, classPathRow,
       classpathEntries, fixedWidth = true, headerClasses = headerClasses)
+    val environmentVariablesTable = UIUtils.listingTable(propertyHeader, propertyRow,
+      environmentVariables, fixedWidth = true, headerClasses = headerClasses)
 
     val content =
       <div>
@@ -124,6 +131,17 @@ private[ui] class EnvironmentPage(
         <div class="aggregated-classpathEntries collapsible-table collapsed">
           {classpathEntriesTable}
         </div>
+        <span class="collapse-aggregated-environmentVariables collapse-table"
+            onClick="collapseTable('collapse-aggregated-environmentVariables',
+            'aggregated-environmentVariables')">
+          <h4>
+            <span class="collapse-table-arrow arrow-closed"></span>
+            <a>Environment Variables</a>
+          </h4>
+        </span>
+        <div class="aggregated-environmentVariables collapsible-table collapsed">
+          {environmentVariablesTable}
+        </div>
       </span>
     UIUtils.basicSparkPage(request, content, "Environment")
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
index 1248b1c368e71..a396444ebe9c5 100644
--- a/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/master/ui/MasterPage.scala
@@ -26,11 +26,13 @@ import org.apache.spark.deploy.DeployMessages.{KillDriverResponse, MasterStateRe
 import org.apache.spark.deploy.JsonProtocol
 import org.apache.spark.deploy.StandaloneResourceUtils._
 import org.apache.spark.deploy.master._
+import org.apache.spark.internal.config.UI.MASTER_UI_TITLE
 import org.apache.spark.ui.{UIUtils, WebUIPage}
 import org.apache.spark.util.Utils
 
 private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
   private val master = parent.masterEndpointRef
+  private val title = parent.master.conf.get(MASTER_UI_TITLE)
   private val jsonFieldPattern = "/json/([a-zA-Z]+).*".r
 
   def getMasterState: MasterStateResponse = {
@@ -170,7 +172,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
                 {state.completedDrivers.count(_.state == DriverState.RELAUNCHING)} Relaunching)
               </li>
               <li><strong>Status:</strong> {state.status}
-                (<a href={"/environment"}>Environment</a>,
+                (<a href={"/environment/"}>Environment</a>,
                 <a href={"/logPage/?self&logType=out"}>Log</a>)
               </li>
             </ul>
@@ -266,7 +268,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
           }
         </div>;
 
-    UIUtils.basicSparkPage(request, content, "Spark Master at " + state.uri)
+    UIUtils.basicSparkPage(request, content, title.getOrElse("Spark Master at " + state.uri))
   }
 
   private def workerRow(showResourceColumn: Boolean): WorkerInfo => Seq[Node] = worker => {
@@ -314,7 +316,7 @@ private[ui] class MasterPage(parent: MasterWebUI) extends WebUIPage("") {
       </td>
       <td>
         {
-          if (app.isFinished) {
+          if (app.isFinished || app.desc.appUiUrl.isBlank()) {
             app.desc.name
           } else {
             <a href={UIUtils.makeHref(parent.master.reverseProxy,
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
index 4fb95033cecef..3c0baacbf10be 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionClient.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.rest
 
 import java.io.{DataOutputStream, FileNotFoundException}
-import java.net.{ConnectException, HttpURLConnection, SocketException, URL}
+import java.net.{ConnectException, HttpURLConnection, SocketException, URI, URL}
 import java.nio.charset.StandardCharsets
 import java.util.concurrent.TimeoutException
 
@@ -383,37 +383,37 @@ private[spark] class RestSubmissionClient(master: String) extends Logging {
   /** Return the REST URL for creating a new submission. */
   private def getSubmitUrl(master: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/create")
+    new URI(s"$baseUrl/create").toURL
   }
 
   /** Return the REST URL for killing an existing submission. */
   private def getKillUrl(master: String, submissionId: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/kill/$submissionId")
+    new URI(s"$baseUrl/kill/$submissionId").toURL
   }
 
   /** Return the REST URL for killing all submissions. */
   private def getKillAllUrl(master: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/killall")
+    new URI(s"$baseUrl/killall").toURL
   }
 
   /** Return the REST URL for clear all existing submissions and applications. */
   private def getClearUrl(master: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/clear")
+    new URI(s"$baseUrl/clear").toURL
   }
 
   /** Return the REST URL for requesting the readyz API. */
   private def getReadyzUrl(master: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/readyz")
+    new URI(s"$baseUrl/readyz").toURL
   }
 
   /** Return the REST URL for requesting the status of an existing submission. */
   private def getStatusUrl(master: String, submissionId: String): URL = {
     val baseUrl = getBaseUrl(master)
-    new URL(s"$baseUrl/status/$submissionId")
+    new URI(s"$baseUrl/status/$submissionId").toURL
   }
 
   /** Return the base URL for communicating with the server, including the protocol version. */
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
index 8e534828e7778..9e3aab125689f 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/RestSubmissionServer.scala
@@ -17,12 +17,16 @@
 
 package org.apache.spark.deploy.rest
 
+import java.util.EnumSet
+import java.util.concurrent.{Executors, ExecutorService}
+
 import scala.io.Source
 
 import com.fasterxml.jackson.core.JsonProcessingException
+import jakarta.servlet.DispatcherType
 import jakarta.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
 import org.eclipse.jetty.server.{HttpConnectionFactory, Server, ServerConnector}
-import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder}
+import org.eclipse.jetty.servlet.{FilterHolder, ServletContextHandler, ServletHolder}
 import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorScheduler}
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
@@ -30,6 +34,7 @@ import org.json4s.jackson.JsonMethods._
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
+import org.apache.spark.internal.config.{MASTER_REST_SERVER_FILTERS, MASTER_REST_SERVER_MAX_THREADS, MASTER_REST_SERVER_VIRTUAL_THREADS}
 import org.apache.spark.util.Utils
 
 /**
@@ -59,7 +64,8 @@ private[spark] abstract class RestSubmissionServer(
   protected val clearRequestServlet: ClearRequestServlet
   protected val readyzRequestServlet: ReadyzRequestServlet
 
-  private var _server: Option[Server] = None
+  // Visible for testing
+  private[rest] var _server: Option[Server] = None
 
   // A mapping from URL prefixes to servlets that serve them. Exposed for testing.
   protected val baseContext = s"/${RestSubmissionServer.PROTOCOL_VERSION}/submissions"
@@ -87,7 +93,14 @@ private[spark] abstract class RestSubmissionServer(
    * Return a 2-tuple of the started server and the bound port.
    */
   private def doStart(startPort: Int): (Server, Int) = {
-    val threadPool = new QueuedThreadPool
+    val threadPool = new QueuedThreadPool(masterConf.get(MASTER_REST_SERVER_MAX_THREADS))
+    threadPool.setName(getClass().getSimpleName())
+    if (Utils.isJavaVersionAtLeast21 && masterConf.get(MASTER_REST_SERVER_VIRTUAL_THREADS)) {
+      val newVirtualThreadPerTaskExecutor =
+        classOf[Executors].getMethod("newVirtualThreadPerTaskExecutor")
+      val service = newVirtualThreadPerTaskExecutor.invoke(null).asInstanceOf[ExecutorService]
+      threadPool.setVirtualThreadsExecutor(service)
+    }
     threadPool.setDaemon(true)
     val server = new Server(threadPool)
 
@@ -111,12 +124,26 @@ private[spark] abstract class RestSubmissionServer(
     contextToServlet.foreach { case (prefix, servlet) =>
       mainHandler.addServlet(new ServletHolder(servlet), prefix)
     }
+    addFilters(mainHandler)
     server.setHandler(mainHandler)
     server.start()
     val boundPort = connector.getLocalPort
     (server, boundPort)
   }
 
+  /**
+   * Add filters, if any, to the given ServletContextHandlers.
+   */
+  private def addFilters(handler: ServletContextHandler): Unit = {
+    masterConf.get(MASTER_REST_SERVER_FILTERS).foreach { filter =>
+      val params = masterConf.getAllWithPrefix(s"spark.$filter.param.").toMap
+      val holder = new FilterHolder()
+      holder.setClassName(filter)
+      params.foreach { case (k, v) => holder.setInitParameter(k, v) }
+      handler.addFilter(holder, "/*", EnumSet.allOf(classOf[DispatcherType]))
+    }
+  }
+
   def stop(): Unit = {
     _server.foreach(_.stop())
   }
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
index 8e3dcdc9d3787..5e29199a352ff 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala
@@ -22,7 +22,7 @@ import java.io.File
 import jakarta.servlet.http.HttpServletResponse
 
 import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf}
-import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription}
+import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription, SparkSubmit}
 import org.apache.spark.deploy.ClientArguments._
 import org.apache.spark.internal.config
 import org.apache.spark.launcher.{JavaModuleOptions, SparkLauncher}
@@ -174,6 +174,11 @@ private[rest] class StandaloneSubmitRequestServlet(
     conf: SparkConf)
   extends SubmitRequestServlet {
 
+  private def replacePlaceHolder(variable: String) = variable match {
+    case s"{{$name}}" if System.getenv(name) != null => System.getenv(name)
+    case _ => variable
+  }
+
   /**
    * Build a driver description from the fields specified in the submit request.
    *
@@ -196,6 +201,7 @@ private[rest] class StandaloneSubmitRequestServlet(
 
     // Optional fields
     val sparkProperties = request.sparkProperties
+      .map(x => (x._1, replacePlaceHolder(x._2)))
     val driverMemory = sparkProperties.get(config.DRIVER_MEMORY.key)
     val driverCores = sparkProperties.get(config.DRIVER_CORES.key)
     val driverDefaultJavaOptions = sparkProperties.get(SparkLauncher.DRIVER_DEFAULT_JAVA_OPTIONS)
@@ -212,10 +218,13 @@ private[rest] class StandaloneSubmitRequestServlet(
     val (_, masterPort) = Utils.extractHostPortFromSparkUrl(masterUrl)
     val updatedMasters = masters.map(
       _.replace(s":$masterRestPort", s":$masterPort")).getOrElse(masterUrl)
-    val appArgs = request.appArgs
+    val appArgs = Option(request.appArgs).getOrElse(Array[String]())
     // Filter SPARK_LOCAL_(IP|HOSTNAME) environment variables from being set on the remote system.
+    // In addition, the placeholders are replaced into the values of environment variables.
     val environmentVariables =
-      request.environmentVariables.filterNot(x => x._1.matches("SPARK_LOCAL_(IP|HOSTNAME)"))
+      Option(request.environmentVariables).getOrElse(Map.empty[String, String])
+        .filterNot(x => x._1.matches("SPARK_LOCAL_(IP|HOSTNAME)"))
+        .map(x => (x._1, replacePlaceHolder(x._2)))
 
     // Construct driver description
     val conf = new SparkConf(false)
@@ -229,9 +238,16 @@ private[rest] class StandaloneSubmitRequestServlet(
     val sparkJavaOpts = Utils.sparkJavaOpts(conf)
     val javaModuleOptions = JavaModuleOptions.defaultModuleOptionArray().toImmutableArraySeq
     val javaOpts = javaModuleOptions ++ sparkJavaOpts ++ defaultJavaOpts ++ extraJavaOpts
+    val sparkSubmitOpts = if (mainClass.equals(classOf[SparkSubmit].getName)) {
+      sparkProperties.get("spark.app.name")
+        .map { v => Seq("-c", s"spark.app.name=$v") }
+        .getOrElse(Seq.empty[String])
+    } else {
+      Seq.empty[String]
+    }
     val command = new Command(
       "org.apache.spark.deploy.worker.DriverWrapper",
-      Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ appArgs, // args to the DriverWrapper
+      Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ sparkSubmitOpts ++ appArgs,
       environmentVariables, extraClassPath, extraLibraryPath, javaOpts)
     val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY)
     val actualDriverCores = driverCores.map(_.toInt).getOrElse(DEFAULT_CORES)
diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
index 7f462148c71a1..c6ff3dbb33cbd 100644
--- a/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/rest/SubmitRestProtocolRequest.scala
@@ -47,9 +47,6 @@ private[rest] class CreateSubmissionRequest extends SubmitRestProtocolRequest {
     super.doValidate()
     assert(sparkProperties != null, "No Spark properties set!")
     assertFieldIsSet(appResource, "appResource")
-    assertFieldIsSet(appArgs, "appArgs")
-    assertFieldIsSet(environmentVariables, "environmentVariables")
-    assertPropertyIsSet("spark.app.name")
     assertPropertyIsBoolean(config.DRIVER_SUPERVISE.key)
     assertPropertyIsNumeric(config.DRIVER_CORES.key)
     assertPropertyIsNumeric(config.CORES_MAX.key)
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
index de517acbf8c5b..b9d88266ed538 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopDelegationTokenManager.scala
@@ -151,7 +151,7 @@ private[spark] class HadoopDelegationTokenManager(
           creds.addAll(newTokens)
         }
       })
-      if(!currentUser.equals(freshUGI)) {
+      if (!currentUser.equals(freshUGI)) {
         FileSystem.closeAllForUGI(freshUGI)
       }
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
index b47f9e5a43afc..fc750b54d0b8e 100644
--- a/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/security/HadoopFSDelegationTokenProvider.scala
@@ -150,6 +150,9 @@ private[deploy] class HadoopFSDelegationTokenProvider
         val interval = newExpiration - getIssueDate(tokenKind, identifier)
         logInfo(log"Renewal interval is ${MDC(TOTAL_TIME, interval)} for" +
           log" token ${MDC(TOKEN_KIND, tokenKind)}")
+        // The token here is only used to obtain renewal intervals. We should cancel it in
+        // a timely manner to avoid causing additional pressure on the server.
+        token.cancel(hadoopConf)
         interval
       }.toOption
     }
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
index bb96ecb38a640..ca0e024ad1aed 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/DriverRunner.scala
@@ -23,7 +23,7 @@ import java.nio.charset.StandardCharsets
 
 import scala.jdk.CollectionConverters._
 
-import com.google.common.io.Files
+import com.google.common.io.{Files, FileWriteMode}
 
 import org.apache.spark.{SecurityManager, SparkConf}
 import org.apache.spark.deploy.{DriverDescription, SparkHadoopUtil}
@@ -216,7 +216,7 @@ private[deploy] class DriverRunner(
       val redactedCommand = Utils.redactCommandLineArgs(conf, builder.command.asScala.toSeq)
         .mkString("\"", "\" \"", "\"")
       val header = "Launch Command: %s\n%s\n\n".format(redactedCommand, "=" * 40)
-      Files.append(header, stderr, StandardCharsets.UTF_8)
+      Files.asCharSink(stderr, StandardCharsets.UTF_8, FileWriteMode.APPEND).write(header)
       CommandUtils.redirectStream(process.getErrorStream, stderr)
     }
     runCommandWithRetry(ProcessBuilderLike(builder), initialize, supervise)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
index 8d0fb7a54f72a..d21904dd16ea7 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/ExecutorRunner.scala
@@ -191,7 +191,7 @@ private[deploy] class ExecutorRunner(
       stdoutAppender = FileAppender(process.getInputStream, stdout, conf, true)
 
       val stderr = new File(executorDir, "stderr")
-      Files.write(header, stderr, StandardCharsets.UTF_8)
+      Files.asCharSink(stderr, StandardCharsets.UTF_8).write(header)
       stderrAppender = FileAppender(process.getErrorStream, stderr, conf, true)
 
       state = ExecutorState.RUNNING
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
index 7ff7974ab59f6..b2ec23887a400 100755
--- a/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/Worker.scala
@@ -980,6 +980,7 @@ private[deploy] object Worker extends Logging {
   def main(argStrings: Array[String]): Unit = {
     Thread.setDefaultUncaughtExceptionHandler(new SparkUncaughtExceptionHandler(
       exitOnUncaughtException = false))
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
     val conf = new SparkConf
     val args = new WorkerArguments(argStrings, conf)
diff --git a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
index 94a27e1a3e6da..87ca01fe82a97 100644
--- a/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
+++ b/core/src/main/scala/org/apache/spark/deploy/worker/WorkerArguments.scala
@@ -22,6 +22,7 @@ import java.lang.management.ManagementFactory
 import scala.annotation.tailrec
 
 import org.apache.spark.SparkConf
+import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.Worker._
 import org.apache.spark.util.{IntParam, MemoryParam, Utils}
 
@@ -59,6 +60,9 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
 
   // This mutates the SparkConf, so all accesses to it must be made after this line
   propertiesFile = Utils.loadDefaultSparkProperties(conf, propertiesFile)
+  // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+  Utils.resetStructuredLogging(conf)
+  Logging.uninitialize()
 
   conf.get(WORKER_UI_PORT).foreach { webUiPort = _ }
 
@@ -66,11 +70,6 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
 
   @tailrec
   private def parse(args: List[String]): Unit = args match {
-    case ("--ip" | "-i") :: value :: tail =>
-      Utils.checkHost(value)
-      host = value
-      parse(tail)
-
     case ("--host" | "-h") :: value :: tail =>
       Utils.checkHost(value)
       host = value
@@ -133,7 +132,6 @@ private[worker] class WorkerArguments(args: Array[String], conf: SparkConf) {
       "  -c CORES, --cores CORES  Number of cores to use\n" +
       "  -m MEM, --memory MEM     Amount of memory to use (e.g. 1000M, 2G)\n" +
       "  -d DIR, --work-dir DIR   Directory to run apps in (default: SPARK_HOME/work)\n" +
-      "  -i HOST, --ip IP         Hostname to listen on (deprecated, please use --host or -h)\n" +
       "  -h HOST, --host HOST     Hostname to listen on\n" +
       "  -p PORT, --port PORT     Port to listen on (default: random)\n" +
       "  --webui-port PORT        Port for web UI (default: 8081)\n" +
diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
index 1b1053a7013e0..e880cf8da9ec2 100644
--- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
+++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala
@@ -422,6 +422,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
       backendCreateFn: (RpcEnv, Arguments, SparkEnv, ResourceProfile) =>
         CoarseGrainedExecutorBackend): Unit = {
 
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
 
     SparkHadoopUtil.get.runAsSparkUser { () =>
@@ -467,6 +468,10 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging {
         }
       }
 
+      // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+      Utils.resetStructuredLogging(driverConf)
+      Logging.uninitialize()
+
       cfg.hadoopDelegationCreds.foreach { tokens =>
         SparkHadoopUtil.get.addDelegationTokens(tokens, driverConf)
       }
diff --git a/core/src/main/scala/org/apache/spark/executor/Executor.scala b/core/src/main/scala/org/apache/spark/executor/Executor.scala
index 586a8a7db28a3..c299f38526aeb 100644
--- a/core/src/main/scala/org/apache/spark/executor/Executor.scala
+++ b/core/src/main/scala/org/apache/spark/executor/Executor.scala
@@ -22,7 +22,7 @@ import java.lang.Thread.UncaughtExceptionHandler
 import java.lang.management.ManagementFactory
 import java.net.{URI, URL}
 import java.nio.ByteBuffer
-import java.util.{Locale, Properties}
+import java.util.{Locale, Properties, Timer, TimerTask}
 import java.util.concurrent._
 import java.util.concurrent.atomic.AtomicBoolean
 import java.util.concurrent.locks.ReentrantLock
@@ -177,7 +177,7 @@ private[spark] class Executor(
   // Whether to monitor killed / interrupted tasks
   private val taskReaperEnabled = conf.get(TASK_REAPER_ENABLED)
 
-  private val killOnFatalErrorDepth = conf.get(EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH)
+  private val killOnFatalErrorDepth = conf.get(KILL_ON_FATAL_ERROR_DEPTH)
 
   private val systemLoader = Utils.getContextOrSparkClassLoader
 
@@ -209,9 +209,10 @@ private[spark] class Executor(
   // The default isolation group
   val defaultSessionState: IsolatedSessionState = newSessionState(JobArtifactState("default", None))
 
+  private val cacheExpiryTime = 30 * 60 * 1000
   val isolatedSessionCache: Cache[String, IsolatedSessionState] = CacheBuilder.newBuilder()
     .maximumSize(100)
-    .expireAfterAccess(30, TimeUnit.MINUTES)
+    .expireAfterAccess(cacheExpiryTime, TimeUnit.MILLISECONDS)
     .removalListener(new RemovalListener[String, IsolatedSessionState]() {
       override def onRemoval(
           notification: RemovalNotification[String, IsolatedSessionState]): Unit = {
@@ -295,6 +296,8 @@ private[spark] class Executor(
 
   private val pollOnHeartbeat = if (METRICS_POLLING_INTERVAL_MS > 0) false else true
 
+  private val timer = new Timer("executor-state-timer", true)
+
   // Poller for the memory metrics. Visible for testing.
   private[executor] val metricsPoller = new ExecutorMetricsPoller(
     env.memoryManager,
@@ -445,6 +448,9 @@ private[spark] class Executor(
         case NonFatal(e) =>
           logWarning("Unable to stop heartbeater", e)
       }
+      if (timer != null) {
+        timer.cancel()
+      }
       ShuffleBlockPusher.stop()
       if (threadPool != null) {
         threadPool.shutdown()
@@ -550,7 +556,7 @@ private[spark] class Executor(
       // Collect latest accumulator values to report back to the driver
       val accums: Seq[AccumulatorV2[_, _]] =
         Option(task).map(_.collectAccumulatorUpdates(taskFailed = true)).getOrElse(Seq.empty)
-      val accUpdates = accums.map(acc => acc.toInfo(Some(acc.value), None))
+      val accUpdates = accums.map(acc => acc.toInfoUpdate)
 
       setTaskFinishedAndClearInterruptStatus()
       (accums, accUpdates)
@@ -559,9 +565,17 @@ private[spark] class Executor(
     override def run(): Unit = {
 
       // Classloader isolation
+      var maybeTimerTask: Option[TimerTask] = None
       val isolatedSession = taskDescription.artifacts.state match {
         case Some(jobArtifactState) =>
-          isolatedSessionCache.get(jobArtifactState.uuid, () => newSessionState(jobArtifactState))
+          val state = isolatedSessionCache.get(
+            jobArtifactState.uuid, () => newSessionState(jobArtifactState))
+          maybeTimerTask = Some(new TimerTask {
+            // Resets the expire time till the task ends.
+            def run(): Unit = isolatedSessionCache.getIfPresent(jobArtifactState.uuid)
+          })
+          maybeTimerTask.foreach(timer.schedule(_, cacheExpiryTime / 10, cacheExpiryTime / 10))
+          state
         case _ => defaultSessionState
       }
 
@@ -706,6 +720,8 @@ private[spark] class Executor(
         task.metrics.setJvmGCTime(computeTotalGcTime() - startGCTime)
         task.metrics.setResultSerializationTime(TimeUnit.NANOSECONDS.toMillis(
           afterSerializationNs - beforeSerializationNs))
+        task.metrics.setPeakOnHeapExecutionMemory(taskMemoryManager.getPeakOnHeapExecutionMemory)
+        task.metrics.setPeakOffHeapExecutionMemory(taskMemoryManager.getPeakOffHeapExecutionMemory)
         // Expose task metrics using the Dropwizard metrics system.
         // Update task metrics counters
         executorSource.METRIC_CPU_TIME.inc(task.metrics.executorCpuTime)
@@ -860,6 +876,7 @@ private[spark] class Executor(
             uncaughtExceptionHandler.uncaughtException(Thread.currentThread(), t)
           }
       } finally {
+        maybeTimerTask.foreach(_.cancel)
         cleanMDCForTask(taskName, mdcProperties)
         runningTasks.remove(taskId)
         if (taskStarted) {
diff --git a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
index 5300598ef53eb..7f0be5c1b704f 100644
--- a/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
+++ b/core/src/main/scala/org/apache/spark/executor/ExecutorExitCode.scala
@@ -49,6 +49,9 @@ object ExecutorExitCode {
    * TaskReaper. */
   val KILLED_BY_TASK_REAPER = 57
 
+  /** Executor is unable to re-register BlockManager. */
+  val BLOCK_MANAGER_REREGISTRATION_FAILED = 58
+
   def explainExitCode(exitCode: Int): String = {
     exitCode match {
       case UNCAUGHT_EXCEPTION => "Uncaught exception"
@@ -63,6 +66,8 @@ object ExecutorExitCode {
         "ExternalBlockStore failed to create a local temporary directory."
       case HEARTBEAT_FAILURE =>
         "Unable to send heartbeats to driver."
+      case BLOCK_MANAGER_REREGISTRATION_FAILED =>
+        "Executor killed due to a failure of block manager re-registration."
       case KILLED_BY_TASK_REAPER =>
         "Executor killed by TaskReaper."
       case _ =>
diff --git a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
index 227e7d84654dd..582c93007f4f5 100644
--- a/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
+++ b/core/src/main/scala/org/apache/spark/executor/TaskMetrics.scala
@@ -29,8 +29,6 @@ import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.scheduler.AccumulableInfo
 import org.apache.spark.storage.{BlockId, BlockStatus}
 import org.apache.spark.util._
-import org.apache.spark.util.ArrayImplicits._
-
 
 /**
  * :: DeveloperApi ::
@@ -58,6 +56,8 @@ class TaskMetrics private[spark] () extends Serializable {
   private val _memoryBytesSpilled = new LongAccumulator
   private val _diskBytesSpilled = new LongAccumulator
   private val _peakExecutionMemory = new LongAccumulator
+  private val _peakOnHeapExecutionMemory = new LongAccumulator
+  private val _peakOffHeapExecutionMemory = new LongAccumulator
   private val _updatedBlockStatuses = new CollectionAccumulator[(BlockId, BlockStatus)]
 
   /**
@@ -111,9 +111,22 @@ class TaskMetrics private[spark] () extends Serializable {
    * joins. The value of this accumulator should be approximately the sum of the peak sizes
    * across all such data structures created in this task. For SQL jobs, this only tracks all
    * unsafe operators and ExternalSort.
+   * This is not equal to peakOnHeapExecutionMemory + peakOffHeapExecutionMemory
    */
+  // TODO: SPARK-48789: the naming is confusing since this does not really reflect the whole
+  //  execution memory. We'd better deprecate this once we have a replacement.
   def peakExecutionMemory: Long = _peakExecutionMemory.sum
 
+  /**
+   * Peak on heap execution memory as tracked by TaskMemoryManager.
+   */
+  def peakOnHeapExecutionMemory: Long = _peakOnHeapExecutionMemory.sum
+
+  /**
+   * Peak off heap execution memory as tracked by TaskMemoryManager.
+   */
+  def peakOffHeapExecutionMemory: Long = _peakOffHeapExecutionMemory.sum
+
   /**
    * Storage statuses of any blocks that have been updated as a result of this task.
    *
@@ -141,6 +154,10 @@ class TaskMetrics private[spark] () extends Serializable {
   private[spark] def setResultSerializationTime(v: Long): Unit =
     _resultSerializationTime.setValue(v)
   private[spark] def setPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.setValue(v)
+  private[spark] def setPeakOnHeapExecutionMemory(v: Long): Unit =
+    _peakOnHeapExecutionMemory.setValue(v)
+  private[spark] def setPeakOffHeapExecutionMemory(v: Long): Unit =
+    _peakOffHeapExecutionMemory.setValue(v)
   private[spark] def incMemoryBytesSpilled(v: Long): Unit = _memoryBytesSpilled.add(v)
   private[spark] def incDiskBytesSpilled(v: Long): Unit = _diskBytesSpilled.add(v)
   private[spark] def incPeakExecutionMemory(v: Long): Unit = _peakExecutionMemory.add(v)
@@ -227,6 +244,8 @@ class TaskMetrics private[spark] () extends Serializable {
     MEMORY_BYTES_SPILLED -> _memoryBytesSpilled,
     DISK_BYTES_SPILLED -> _diskBytesSpilled,
     PEAK_EXECUTION_MEMORY -> _peakExecutionMemory,
+    PEAK_ON_HEAP_EXECUTION_MEMORY -> _peakOnHeapExecutionMemory,
+    PEAK_OFF_HEAP_EXECUTION_MEMORY -> _peakOffHeapExecutionMemory,
     UPDATED_BLOCK_STATUSES -> _updatedBlockStatuses,
     shuffleRead.REMOTE_BLOCKS_FETCHED -> shuffleReadMetrics._remoteBlocksFetched,
     shuffleRead.LOCAL_BLOCKS_FETCHED -> shuffleReadMetrics._localBlocksFetched,
@@ -272,8 +291,17 @@ class TaskMetrics private[spark] () extends Serializable {
    */
   @transient private[spark] lazy val _externalAccums = new ArrayBuffer[AccumulatorV2[_, _]]
 
-  private[spark] def externalAccums: Seq[AccumulatorV2[_, _]] = withReadLock {
-    _externalAccums.toArray.toImmutableArraySeq
+  /**
+   * Perform an `op` conversion on the `_externalAccums` within the read lock.
+   *
+   * Note `op` is expected to not modify the `_externalAccums` and not being
+   * lazy evaluation for safe concern since `ArrayBuffer` is lazily evaluated.
+   * And we intentionally keeps `_externalAccums` as mutable instead of converting
+   * it to immutable for the performance concern.
+   */
+  private[spark] def withExternalAccums[T](op: ArrayBuffer[AccumulatorV2[_, _]] => T)
+    : T = withReadLock {
+    op(_externalAccums)
   }
 
   private def withReadLock[B](fn: => B): B = {
@@ -298,7 +326,9 @@ class TaskMetrics private[spark] () extends Serializable {
     _externalAccums += a
   }
 
-  private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = internalAccums ++ externalAccums
+  private[spark] def accumulators(): Seq[AccumulatorV2[_, _]] = withReadLock {
+    internalAccums ++ _externalAccums
+  }
 
   private[spark] def nonZeroInternalAccums(): Seq[AccumulatorV2[_, _]] = {
     // RESULT_SIZE accumulator is always zero at executor, we need to send it back as its
diff --git a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
index f0d6cba6ae734..3c3017a9a64c1 100644
--- a/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
+++ b/core/src/main/scala/org/apache/spark/input/PortableDataStream.scala
@@ -45,8 +45,8 @@ private[spark] abstract class StreamFileInputFormat[T]
    * which is set through setMaxSplitSize
    */
   def setMinPartitions(sc: SparkContext, context: JobContext, minPartitions: Int): Unit = {
-    val defaultMaxSplitBytes = sc.getConf.get(config.FILES_MAX_PARTITION_BYTES)
-    val openCostInBytes = sc.getConf.get(config.FILES_OPEN_COST_IN_BYTES)
+    val defaultMaxSplitBytes = sc.conf.get(config.FILES_MAX_PARTITION_BYTES)
+    val openCostInBytes = sc.conf.get(config.FILES_OPEN_COST_IN_BYTES)
     val defaultParallelism = Math.max(sc.defaultParallelism, minPartitions)
     val files = listStatus(context).asScala
     val totalBytes = files.filterNot(_.isDirectory).map(_.getLen + openCostInBytes).sum
diff --git a/core/src/main/scala/org/apache/spark/internal/config/History.scala b/core/src/main/scala/org/apache/spark/internal/config/History.scala
index 64a8681ca2954..bbd4afcaebab4 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/History.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/History.scala
@@ -109,6 +109,12 @@ private[spark] object History {
     .bytesConf(ByteUnit.BYTE)
     .createWithDefaultString("10g")
 
+  val HISTORY_SERVER_UI_TITLE = ConfigBuilder("spark.history.ui.title")
+    .version("4.0.0")
+    .doc("Specifies the title of the History Server UI page.")
+    .stringConf
+    .createWithDefault("History Server")
+
   val HISTORY_SERVER_UI_PORT = ConfigBuilder("spark.history.ui.port")
     .doc("Web UI port to bind Spark History Server")
     .version("1.0.0")
@@ -201,6 +207,14 @@ private[spark] object History {
     .toSequence
     .createWithDefault(Nil)
 
+  val HISTORY_UI_MAX_APPS = ConfigBuilder("spark.history.ui.maxApplications")
+    .version("2.0.1")
+    .doc("The number of applications to display on the history summary page. Application UIs " +
+      "are still available by accessing their URLs directly even if they are not displayed on " +
+      "the history summary page.")
+    .intConf
+    .createWithDefault(Integer.MAX_VALUE)
+
   val NUM_REPLAY_THREADS = ConfigBuilder("spark.history.fs.numReplayThreads")
     .version("2.0.0")
     .doc("Number of threads that will be used by history server to process event logs.")
diff --git a/core/src/main/scala/org/apache/spark/internal/config/UI.scala b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
index c4c5796a2b278..fe5a52debdafc 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/UI.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/UI.scala
@@ -244,6 +244,20 @@ private[spark] object UI {
     .checkValues(Set("ALLOW", "LOCAL", "DENY"))
     .createWithDefault("LOCAL")
 
+  val MASTER_UI_TITLE = ConfigBuilder("spark.master.ui.title")
+    .version("4.0.0")
+    .doc("Specifies the title of the Master UI page. If unset, `Spark Master at <MasterURL>` " +
+      "is used by default.")
+    .stringConf
+    .createOptional
+
+  val MASTER_UI_VISIBLE_ENV_VAR_PREFIXES = ConfigBuilder("spark.master.ui.visibleEnvVarPrefixes")
+    .doc("Comma-separated list of key-prefix strings to show environment variables")
+    .version("4.0.0")
+    .stringConf
+    .toSequence
+    .createWithDefault(Seq.empty[String])
+
   val UI_SQL_GROUP_SUB_EXECUTION_ENABLED = ConfigBuilder("spark.ui.groupSQLSubExecutionEnabled")
     .doc("Whether to group sub executions together in SQL UI when they belong to the same " +
       "root execution")
diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
index 6de500024816f..324ef701c4266 100644
--- a/core/src/main/scala/org/apache/spark/internal/config/package.scala
+++ b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -271,6 +271,18 @@ package object config {
       .toSequence
       .createWithDefault(GarbageCollectionMetrics.OLD_GENERATION_BUILTIN_GARBAGE_COLLECTORS)
 
+  private[spark] val EVENT_LOG_INCLUDE_TASK_METRICS_ACCUMULATORS =
+    ConfigBuilder("spark.eventLog.includeTaskMetricsAccumulators")
+      .doc("Whether to include TaskMetrics' underlying accumulator values in the event log " +
+        "(as part of the Task/Stage/Job metrics' 'Accumulables' fields. The TaskMetrics " +
+        "values are already logged in the 'Task Metrics' fields (so the accumulator updates " +
+        "are redundant). This flag defaults to true for behavioral backwards compatibility " +
+        "for applications that might rely on the redundant logging. " +
+        "See SPARK-42204 for details.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   private[spark] val EVENT_LOG_OVERWRITE =
     ConfigBuilder("spark.eventLog.overwrite")
       .version("1.0.0")
@@ -918,6 +930,18 @@ package object config {
       .booleanConf
       .createOptional
 
+  private[spark] val EXCLUDE_ON_FAILURE_ENABLED_APPLICATION =
+    ConfigBuilder("spark.excludeOnFailure.application.enabled")
+      .version("4.0.0")
+      .booleanConf
+      .createOptional
+
+  private[spark] val EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE =
+    ConfigBuilder("spark.excludeOnFailure.taskAndStage.enabled")
+      .version("4.0.0")
+      .booleanConf
+      .createOptional
+
   private[spark] val MAX_TASK_ATTEMPTS_PER_EXECUTOR =
     ConfigBuilder("spark.excludeOnFailure.task.maxTaskAttemptsPerExecutor")
       .version("3.1.0")
@@ -1110,13 +1134,6 @@ package object config {
     .stringConf
     .createOptional
 
-  // To limit how many applications are shown in the History Server summary ui
-  private[spark] val HISTORY_UI_MAX_APPS =
-    ConfigBuilder("spark.history.ui.maxApplications")
-      .version("2.0.1")
-      .intConf
-      .createWithDefault(Integer.MAX_VALUE)
-
   private[spark] val IO_ENCRYPTION_ENABLED = ConfigBuilder("spark.io.encryption.enabled")
     .version("2.1.0")
     .booleanConf
@@ -1369,7 +1386,6 @@ package object config {
 
   private[spark] val SHUFFLE_ACCURATE_BLOCK_SKEWED_FACTOR =
     ConfigBuilder("spark.shuffle.accurateBlockSkewedFactor")
-      .internal()
       .doc("A shuffle block is considered as skewed and will be accurately recorded in " +
         "HighlyCompressedMapStatus if its size is larger than this factor multiplying " +
         "the median shuffle block size or SHUFFLE_ACCURATE_BLOCK_THRESHOLD. It is " +
@@ -1473,6 +1489,14 @@ package object config {
           s" ${ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH / 1024}.")
       .createWithDefaultString("32k")
 
+  private[spark] val SHUFFLE_FILE_MERGE_BUFFER_SIZE =
+    ConfigBuilder("spark.shuffle.file.merge.buffer")
+      .doc("Size of the in-memory buffer for each shuffle file input stream, in KiB unless " +
+        "otherwise specified. These buffers use off-heap buffers and are related to the number " +
+        "of files in the shuffle file. Too large buffers should be avoided.")
+      .version("4.0.0")
+      .fallbackConf(SHUFFLE_FILE_BUFFER_SIZE)
+
   private[spark] val SHUFFLE_UNSAFE_FILE_OUTPUT_BUFFER_SIZE =
     ConfigBuilder("spark.shuffle.unsafe.file.output.buffer")
       .doc("(Deprecated since Spark 4.0, please use 'spark.shuffle.localDisk.file.output.buffer'.)")
@@ -1617,8 +1641,7 @@ package object config {
       .version("3.2.0")
       .stringConf
       .transform(_.toUpperCase(Locale.ROOT))
-      .checkValue(Set("ADLER32", "CRC32").contains, "Shuffle checksum algorithm " +
-        "should be either ADLER32 or CRC32.")
+      .checkValues(Set("ADLER32", "CRC32", "CRC32C"))
       .createWithDefault("ADLER32")
 
   private[spark] val SHUFFLE_COMPRESS =
@@ -1964,6 +1987,26 @@ package object config {
     .intConf
     .createWithDefault(6066)
 
+  private[spark] val MASTER_REST_SERVER_MAX_THREADS = ConfigBuilder("spark.master.rest.maxThreads")
+    .doc("Maximum number of threads to use in the Spark Master REST API Server.")
+    .version("4.0.0")
+    .intConf
+    .createWithDefault(200)
+
+  private[spark] val MASTER_REST_SERVER_FILTERS = ConfigBuilder("spark.master.rest.filters")
+    .doc("Comma separated list of filter class names to apply to the Spark Master REST API.")
+    .version("4.0.0")
+    .stringConf
+    .toSequence
+    .createWithDefault(Nil)
+
+  private[spark] val MASTER_REST_SERVER_VIRTUAL_THREADS =
+    ConfigBuilder("spark.master.rest.virtualThread.enabled")
+      .doc("If true, Spark master tries to use Java 21 virtual thread for REST API.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val MASTER_UI_PORT = ConfigBuilder("spark.master.ui.port")
     .version("1.1.0")
     .intConf
@@ -1985,6 +2028,14 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
+  private[spark] val MASTER_USE_DRIVER_ID_AS_APP_NAME =
+    ConfigBuilder("spark.master.useDriverIdAsAppName.enabled")
+      .internal()
+      .doc("(Experimental) If true, Spark master tries to set driver ID as appName.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   private[spark] val IO_COMPRESSION_SNAPPY_BLOCKSIZE =
     ConfigBuilder("spark.io.compression.snappy.blockSize")
       .doc("Block size in bytes used in Snappy compression, in the case when " +
@@ -2424,11 +2475,11 @@ package object config {
       .booleanConf
       .createWithDefault(false)
 
-  private[spark] val EXECUTOR_KILL_ON_FATAL_ERROR_DEPTH =
+  private[spark] val KILL_ON_FATAL_ERROR_DEPTH =
     ConfigBuilder("spark.executor.killOnFatalError.depth")
       .doc("The max depth of the exception chain in a failed task Spark will search for a fatal " +
-        "error to check whether it should kill an executor. 0 means not checking any fatal " +
-        "error, 1 means checking only the exception but not the cause, and so on.")
+        "error to check whether it should kill the JVM process. 0 means not checking any fatal" +
+        " error, 1 means checking only the exception but not the cause, and so on.")
       .internal()
       .version("3.1.0")
       .intConf
diff --git a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
index f245d2d4e4074..476cddc643954 100644
--- a/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/internal/io/HadoopMapReduceCommitProtocol.scala
@@ -168,7 +168,8 @@ class HadoopMapReduceCommitProtocol(
     // Note that %05d does not truncate the split number, so if we have more than 100000 tasks,
     // the file name is fine and won't overflow.
     val split = taskContext.getTaskAttemptID.getTaskID.getId
-    f"${spec.prefix}part-$split%05d-$jobId${spec.suffix}"
+    val basename = taskContext.getConfiguration.get("mapreduce.output.basename", "part")
+    f"${spec.prefix}$basename-$split%05d-$jobId${spec.suffix}"
   }
 
   override def setupJob(jobContext: JobContext): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
index 7571395289967..2ab49eae8cd85 100644
--- a/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
+++ b/core/src/main/scala/org/apache/spark/metrics/sink/PrometheusServlet.scala
@@ -24,15 +24,21 @@ import jakarta.servlet.http.HttpServletRequest
 import org.eclipse.jetty.servlet.ServletContextHandler
 
 import org.apache.spark.SparkConf
+import org.apache.spark.annotation.{DeveloperApi, Since, Unstable}
 import org.apache.spark.ui.JettyUtils._
 
 /**
+ * :: DeveloperApi ::
  * This exposes the metrics of the given registry with Prometheus format.
  *
  * The output is consistent with /metrics/json result in terms of item ordering
  * and with the previous result of Spark JMX Sink + Prometheus JMX Converter combination
  * in terms of key string format.
+ *
+ * This is used by Spark MetricsSystem internally and Spark K8s operator.
  */
+@Unstable
+@DeveloperApi
 private[spark] class PrometheusServlet(
     val property: Properties, val registry: MetricRegistry) extends Sink {
 
@@ -47,7 +53,10 @@ private[spark] class PrometheusServlet(
     )
   }
 
-  def getMetricsSnapshot(request: HttpServletRequest): String = {
+  def getMetricsSnapshot(request: HttpServletRequest): String = getMetricsSnapshot()
+
+  @Since("4.0.0")
+  def getMetricsSnapshot(): String = {
     import scala.jdk.CollectionConverters._
 
     val gaugesLabel = """{type="gauges"}"""
diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
index bf539320b5985..2b6f322d1805d 100644
--- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala
@@ -242,10 +242,12 @@ class NewHadoopRDD[K, V](
       private var finished = false
       private var reader =
         try {
-          val _reader = format.createRecordReader(
-            split.serializableHadoopSplit.value, hadoopAttemptContext)
-          _reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
-          _reader
+          Utils.tryInitializeResource(
+            format.createRecordReader(split.serializableHadoopSplit.value, hadoopAttemptContext)
+          ) { reader =>
+            reader.initialize(split.serializableHadoopSplit.value, hadoopAttemptContext)
+            reader
+          }
         } catch {
           case e: FileNotFoundException if ignoreMissingFiles =>
             logWarning(log"Skipped missing file: ${MDC(PATH, split.serializableHadoopSplit)}", e)
diff --git a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
index a806b72766c6f..126c92e4cb656 100644
--- a/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/PipedRDD.scala
@@ -240,7 +240,7 @@ private object PipedRDD {
   def tokenize(command: String): Seq[String] = {
     val buf = new ArrayBuffer[String]
     val tok = new StringTokenizer(command)
-    while(tok.hasMoreElements) {
+    while (tok.hasMoreElements) {
       buf += tok.nextToken()
     }
     buf.toSeq
diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index ac93abf3fe7a0..0db0133f632bf 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -211,6 +211,11 @@ abstract class RDD[T: ClassTag](
    * @return This RDD.
    */
   def unpersist(blocking: Boolean = false): this.type = {
+    if (isLocallyCheckpointed) {
+      // This means its lineage has been truncated and cannot be recomputed once unpersisted.
+      logWarning(log"RDD ${MDC(RDD_ID, id)} was locally checkpointed, its lineage has been" +
+        log" truncated and cannot be recomputed after unpersisting")
+    }
     logInfo(log"Removing RDD ${MDC(RDD_ID, id)} from persistence list")
     sc.unpersistRDD(id, blocking)
     storageLevel = StorageLevel.NONE
diff --git a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
index 7dcde35de2518..0baa578764d08 100644
--- a/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
+++ b/core/src/main/scala/org/apache/spark/resource/ResourceProfile.scala
@@ -327,7 +327,7 @@ object ResourceProfile extends Logging {
    */
   val CORES = "cores"
   /**
-   * built-in executor resource: cores
+   * built-in executor resource: memory
    */
   val MEMORY = "memory"
   /**
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
index 6c824e2fdeaed..4f7338f74e298 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -27,6 +27,7 @@ import scala.annotation.tailrec
 import scala.collection.Map
 import scala.collection.mutable
 import scala.collection.mutable.{HashMap, HashSet, ListBuffer}
+import scala.concurrent.Promise
 import scala.concurrent.duration._
 import scala.util.control.NonFatal
 
@@ -173,7 +174,7 @@ private[spark] class DAGScheduler(
   // `NUM_CANCELLED_JOB_GROUPS_TO_TRACK` stored. On a new job submission, if its job group is in
   // this set, the job will be immediately cancelled.
   private[scheduler] val cancelledJobGroups =
-    new LimitedSizeFIFOSet[String](sc.getConf.get(config.NUM_CANCELLED_JOB_GROUPS_TO_TRACK))
+    new LimitedSizeFIFOSet[String](sc.conf.get(config.NUM_CANCELLED_JOB_GROUPS_TO_TRACK))
 
   /**
    * Contains the locations that each RDD's partitions are cached on.  This map's keys are RDD ids
@@ -223,9 +224,9 @@ private[spark] class DAGScheduler(
   private val closureSerializer = SparkEnv.get.closureSerializer.newInstance()
 
   /** If enabled, FetchFailed will not cause stage retry, in order to surface the problem. */
-  private val disallowStageRetryForTest = sc.getConf.get(TEST_NO_STAGE_RETRY)
+  private val disallowStageRetryForTest = sc.conf.get(TEST_NO_STAGE_RETRY)
 
-  private val shouldMergeResourceProfiles = sc.getConf.get(config.RESOURCE_PROFILE_MERGE_CONFLICTS)
+  private val shouldMergeResourceProfiles = sc.conf.get(config.RESOURCE_PROFILE_MERGE_CONFLICTS)
 
   /**
    * Whether to unregister all the outputs on the host in condition that we receive a FetchFailure,
@@ -233,19 +234,19 @@ private[spark] class DAGScheduler(
    * executor(instead of the host) on a FetchFailure.
    */
   private[scheduler] val unRegisterOutputOnHostOnFetchFailure =
-    sc.getConf.get(config.UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE)
+    sc.conf.get(config.UNREGISTER_OUTPUT_ON_HOST_ON_FETCH_FAILURE)
 
   /**
    * Number of consecutive stage attempts allowed before a stage is aborted.
    */
   private[scheduler] val maxConsecutiveStageAttempts =
-    sc.getConf.get(config.STAGE_MAX_CONSECUTIVE_ATTEMPTS)
+    sc.conf.get(config.STAGE_MAX_CONSECUTIVE_ATTEMPTS)
 
   /**
    * Max stage attempts allowed before a stage is aborted.
    */
   private[scheduler] val maxStageAttempts: Int = {
-    Math.max(maxConsecutiveStageAttempts, sc.getConf.get(config.STAGE_MAX_ATTEMPTS))
+    Math.max(maxConsecutiveStageAttempts, sc.conf.get(config.STAGE_MAX_ATTEMPTS))
   }
 
   /**
@@ -253,7 +254,7 @@ private[spark] class DAGScheduler(
    * count spark.stage.maxConsecutiveAttempts
    */
   private[scheduler] val ignoreDecommissionFetchFailure =
-    sc.getConf.get(config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE)
+    sc.conf.get(config.STAGE_IGNORE_DECOMMISSION_FETCH_FAILURE)
 
   /**
    * Number of max concurrent tasks check failures for each barrier job.
@@ -263,14 +264,14 @@ private[spark] class DAGScheduler(
   /**
    * Time in seconds to wait between a max concurrent tasks check failure and the next check.
    */
-  private val timeIntervalNumTasksCheck = sc.getConf
+  private val timeIntervalNumTasksCheck = sc.conf
     .get(config.BARRIER_MAX_CONCURRENT_TASKS_CHECK_INTERVAL)
 
   /**
    * Max number of max concurrent tasks check failures allowed for a job before fail the job
    * submission.
    */
-  private val maxFailureNumTasksCheck = sc.getConf
+  private val maxFailureNumTasksCheck = sc.conf
     .get(config.BARRIER_MAX_CONCURRENT_TASKS_CHECK_MAX_FAILURES)
 
   private val messageScheduler =
@@ -285,26 +286,26 @@ private[spark] class DAGScheduler(
 
   taskScheduler.setDAGScheduler(this)
 
-  private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(sc.getConf, isDriver = true)
+  private val pushBasedShuffleEnabled = Utils.isPushBasedShuffleEnabled(sc.conf, isDriver = true)
 
   private val blockManagerMasterDriverHeartbeatTimeout =
-    sc.getConf.get(config.STORAGE_BLOCKMANAGER_MASTER_DRIVER_HEARTBEAT_TIMEOUT).millis
+    sc.conf.get(config.STORAGE_BLOCKMANAGER_MASTER_DRIVER_HEARTBEAT_TIMEOUT).millis
 
   private val shuffleMergeResultsTimeoutSec =
-    sc.getConf.get(config.PUSH_BASED_SHUFFLE_MERGE_RESULTS_TIMEOUT)
+    sc.conf.get(config.PUSH_BASED_SHUFFLE_MERGE_RESULTS_TIMEOUT)
 
   private val shuffleMergeFinalizeWaitSec =
-    sc.getConf.get(config.PUSH_BASED_SHUFFLE_MERGE_FINALIZE_TIMEOUT)
+    sc.conf.get(config.PUSH_BASED_SHUFFLE_MERGE_FINALIZE_TIMEOUT)
 
   private val shuffleMergeWaitMinSizeThreshold =
-    sc.getConf.get(config.PUSH_BASED_SHUFFLE_SIZE_MIN_SHUFFLE_SIZE_TO_WAIT)
+    sc.conf.get(config.PUSH_BASED_SHUFFLE_SIZE_MIN_SHUFFLE_SIZE_TO_WAIT)
 
-  private val shufflePushMinRatio = sc.getConf.get(config.PUSH_BASED_SHUFFLE_MIN_PUSH_RATIO)
+  private val shufflePushMinRatio = sc.conf.get(config.PUSH_BASED_SHUFFLE_MIN_PUSH_RATIO)
 
   private val shuffleMergeFinalizeNumThreads =
-    sc.getConf.get(config.PUSH_BASED_SHUFFLE_MERGE_FINALIZE_THREADS)
+    sc.conf.get(config.PUSH_BASED_SHUFFLE_MERGE_FINALIZE_THREADS)
 
-  private val shuffleFinalizeRpcThreads = sc.getConf.get(config.PUSH_SHUFFLE_FINALIZE_RPC_THREADS)
+  private val shuffleFinalizeRpcThreads = sc.conf.get(config.PUSH_SHUFFLE_FINALIZE_RPC_THREADS)
 
   // Since SparkEnv gets initialized after DAGScheduler, externalShuffleClient needs to be
   // initialized lazily
@@ -327,11 +328,10 @@ private[spark] class DAGScheduler(
     ThreadUtils.newDaemonFixedThreadPool(shuffleFinalizeRpcThreads, "shuffle-merge-finalize-rpc")
 
   /** Whether rdd cache visibility tracking is enabled. */
-  private val trackingCacheVisibility: Boolean =
-    sc.getConf.get(RDD_CACHE_VISIBILITY_TRACKING_ENABLED)
+  private val trackingCacheVisibility: Boolean = sc.conf.get(RDD_CACHE_VISIBILITY_TRACKING_ENABLED)
 
   /** Whether to abort a stage after canceling all of its tasks. */
-  private val legacyAbortStageAfterKillTasks = sc.getConf.get(LEGACY_ABORT_STAGE_AFTER_KILL_TASKS)
+  private val legacyAbortStageAfterKillTasks = sc.conf.get(LEGACY_ABORT_STAGE_AFTER_KILL_TASKS)
 
   /**
    * Called by the TaskSetManager to report task's starting.
@@ -556,7 +556,7 @@ private[spark] class DAGScheduler(
    * TODO SPARK-24942 Improve cluster resource management with jobs containing barrier stage
    */
   private def checkBarrierStageWithDynamicAllocation(rdd: RDD[_]): Unit = {
-    if (rdd.isBarrier() && Utils.isDynamicAllocationEnabled(sc.getConf)) {
+    if (rdd.isBarrier() && Utils.isDynamicAllocationEnabled(sc.conf)) {
       throw SparkCoreErrors.barrierStageWithDynamicAllocationError()
     }
   }
@@ -1116,11 +1116,18 @@ private[spark] class DAGScheduler(
 
   /**
    * Cancel all jobs with a given tag.
+   *
+   * @param tag The tag to be cancelled. Cannot contain ',' (comma) character.
+   * @param reason reason for cancellation.
+   * @param cancelledJobs a promise to be completed with operation IDs being cancelled.
    */
-  def cancelJobsWithTag(tag: String, reason: Option[String]): Unit = {
+  def cancelJobsWithTag(
+      tag: String,
+      reason: Option[String],
+      cancelledJobs: Option[Promise[Seq[ActiveJob]]]): Unit = {
     SparkContext.throwIfInvalidTag(tag)
     logInfo(log"Asked to cancel jobs with tag ${MDC(TAG, tag)}")
-    eventProcessLoop.post(JobTagCancelled(tag, reason))
+    eventProcessLoop.post(JobTagCancelled(tag, reason, cancelledJobs))
   }
 
   /**
@@ -1234,17 +1241,22 @@ private[spark] class DAGScheduler(
     jobIds.foreach(handleJobCancellation(_, Option(updatedReason)))
   }
 
-  private[scheduler] def handleJobTagCancelled(tag: String, reason: Option[String]): Unit = {
-    // Cancel all jobs belonging that have this tag.
+  private[scheduler] def handleJobTagCancelled(
+      tag: String,
+      reason: Option[String],
+      cancelledJobs: Option[Promise[Seq[ActiveJob]]]): Unit = {
+    // Cancel all jobs that have all provided tags.
     // First finds all active jobs with this group id, and then kill stages for them.
-    val jobIds = activeJobs.filter { activeJob =>
+    val jobsToBeCancelled = activeJobs.filter { activeJob =>
       Option(activeJob.properties).exists { properties =>
         Option(properties.getProperty(SparkContext.SPARK_JOB_TAGS)).getOrElse("")
           .split(SparkContext.SPARK_JOB_TAGS_SEP).filter(!_.isEmpty).toSet.contains(tag)
       }
-    }.map(_.jobId)
-    val updatedReason = reason.getOrElse("part of cancelled job tag %s".format(tag))
-    jobIds.foreach(handleJobCancellation(_, Option(updatedReason)))
+    }
+    val updatedReason =
+      reason.getOrElse("part of cancelled job tags %s".format(tag))
+    jobsToBeCancelled.map(_.jobId).foreach(handleJobCancellation(_, Option(updatedReason)))
+    cancelledJobs.map(_.success(jobsToBeCancelled.toSeq))
   }
 
   private[scheduler] def handleBeginEvent(task: Task[_], taskInfo: TaskInfo): Unit = {
@@ -2150,7 +2162,7 @@ private[spark] class DAGScheduler(
                   case mapStage: ShuffleMapStage =>
                     val numMissingPartitions = mapStage.findMissingPartitions().length
                     if (numMissingPartitions < mapStage.numTasks) {
-                      if (sc.getConf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
+                      if (sc.conf.get(config.SHUFFLE_USE_OLD_FETCH_PROTOCOL)) {
                         val reason = "A shuffle map stage with indeterminate output was failed " +
                           "and retried. However, Spark can only do this while using the new " +
                           "shuffle block fetching protocol. Please check the config " +
@@ -2880,8 +2892,8 @@ private[spark] class DAGScheduler(
       val finalException = exception.collect {
         // If the error is user-facing (defines error class and is not internal error), we don't
         // wrap it with "Job aborted" and expose this error to the end users directly.
-        case st: Exception with SparkThrowable if st.getErrorClass != null &&
-            !SparkThrowableHelper.isInternalError(st.getErrorClass) =>
+        case st: Exception with SparkThrowable if st.getCondition != null &&
+            !SparkThrowableHelper.isInternalError(st.getCondition) =>
           st
       }.getOrElse {
         new SparkException(s"Job aborted due to stage failure: $reason", cause = exception.orNull)
@@ -3113,8 +3125,8 @@ private[scheduler] class DAGSchedulerEventProcessLoop(dagScheduler: DAGScheduler
     case JobGroupCancelled(groupId, cancelFutureJobs, reason) =>
       dagScheduler.handleJobGroupCancelled(groupId, cancelFutureJobs, reason)
 
-    case JobTagCancelled(tag, reason) =>
-      dagScheduler.handleJobTagCancelled(tag, reason)
+    case JobTagCancelled(tag, reason, cancelledJobs) =>
+      dagScheduler.handleJobTagCancelled(tag, reason, cancelledJobs)
 
     case AllJobsCancelled =>
       dagScheduler.doCancelAllJobs()
diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
index c9ad54d1fdc7e..8932d2ef323ba 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/DAGSchedulerEvent.scala
@@ -19,6 +19,8 @@ package org.apache.spark.scheduler
 
 import java.util.Properties
 
+import scala.concurrent.Promise
+
 import org.apache.spark._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.{AccumulatorV2, CallSite}
@@ -71,7 +73,8 @@ private[scheduler] case class JobGroupCancelled(
 
 private[scheduler] case class JobTagCancelled(
     tagName: String,
-    reason: Option[String]) extends DAGSchedulerEvent
+    reason: Option[String],
+    cancelledJobs: Option[Promise[Seq[ActiveJob]]]) extends DAGSchedulerEvent
 
 private[scheduler] case object AllJobsCancelled extends DAGSchedulerEvent
 
diff --git a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
index efd8fecb974e8..1e46142fab255 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala
@@ -31,7 +31,7 @@ import org.apache.spark.deploy.history.EventLogFileWriter
 import org.apache.spark.executor.ExecutorMetrics
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config._
-import org.apache.spark.util.{JsonProtocol, Utils}
+import org.apache.spark.util.{JsonProtocol, JsonProtocolOptions, Utils}
 
 /**
  * A SparkListener that logs events to persistent storage.
@@ -74,6 +74,8 @@ private[spark] class EventLoggingListener(
   private val liveStageExecutorMetrics =
     mutable.HashMap.empty[(Int, Int), mutable.HashMap[String, ExecutorMetrics]]
 
+  private[this] val jsonProtocolOptions = new JsonProtocolOptions(sparkConf)
+
   /**
    * Creates the log file in the configured log directory.
    */
@@ -84,7 +86,7 @@ private[spark] class EventLoggingListener(
 
   private def initEventLog(): Unit = {
     val metadata = SparkListenerLogStart(SPARK_VERSION)
-    val eventJson = JsonProtocol.sparkEventToJsonString(metadata)
+    val eventJson = JsonProtocol.sparkEventToJsonString(metadata, jsonProtocolOptions)
     logWriter.writeEvent(eventJson, flushLogger = true)
     if (testing && loggedEvents != null) {
       loggedEvents += eventJson
@@ -93,7 +95,7 @@ private[spark] class EventLoggingListener(
 
   /** Log the event as JSON. */
   private def logEvent(event: SparkListenerEvent, flushLogger: Boolean = false): Unit = {
-    val eventJson = JsonProtocol.sparkEventToJsonString(event)
+    val eventJson = JsonProtocol.sparkEventToJsonString(event, jsonProtocolOptions)
     logWriter.writeEvent(eventJson, flushLogger)
     if (testing) {
       loggedEvents += eventJson
diff --git a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
index 1606072153906..82ec0ef91f4fc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/HealthTracker.scala
@@ -425,14 +425,16 @@ private[spark] object HealthTracker extends Logging {
   private val DEFAULT_TIMEOUT = "1h"
 
   /**
-   * Returns true if the excludeOnFailure is enabled, based on checking the configuration
-   * in the following order:
-   * 1. Is it specifically enabled or disabled?
-   * 2. Is it enabled via the legacy timeout conf?
-   * 3. Default is off
+   * Returns true if the excludeOnFailure is enabled on the application level,
+   * based on checking the configuration in the following order:
+   * 1. Is application level exclusion specifically enabled or disabled?
+   * 2. Is overall exclusion feature enabled or disabled?
+   * 3. Is it enabled via the legacy timeout conf?
+   * 4. Default is off
    */
   def isExcludeOnFailureEnabled(conf: SparkConf): Boolean = {
-    conf.get(config.EXCLUDE_ON_FAILURE_ENABLED) match {
+    conf.get(config.EXCLUDE_ON_FAILURE_ENABLED_APPLICATION)
+      .orElse(conf.get(config.EXCLUDE_ON_FAILURE_ENABLED)) match {
       case Some(enabled) =>
         enabled
       case None =>
diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
index feed831620840..bfd6759387034 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -45,14 +45,21 @@ private[spark] class JobWaiter[T](
   def completionFuture: Future[Unit] = jobPromise.future
 
   /**
-   * Sends a signal to the DAGScheduler to cancel the job. The cancellation itself is handled
-   * asynchronously. After the low level scheduler cancels all the tasks belonging to this job, it
-   * will fail this job with a SparkException.
+   * Sends a signal to the DAGScheduler to cancel the job with an optional reason. The
+   * cancellation itself is handled asynchronously. After the low level scheduler cancels
+   * all the tasks belonging to this job, it will fail this job with a SparkException.
    */
-  def cancel(): Unit = {
-    dagScheduler.cancelJob(jobId, None)
+  def cancel(reason: Option[String]): Unit = {
+    dagScheduler.cancelJob(jobId, reason)
   }
 
+  /**
+   * Sends a signal to the DAGScheduler to cancel the job. The cancellation itself is
+   * handled asynchronously. After the low level scheduler cancels all the tasks belonging
+   * to this job, it will fail this job with a SparkException.
+   */
+  def cancel(): Unit = cancel(None)
+
   override def taskSucceeded(index: Int, result: Any): Unit = {
     // resultHandler call must be synchronized in case resultHandler itself is not thread safe.
     synchronized {
diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
index cc19b71bfc4d6..384f939a843bc 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala
@@ -22,8 +22,6 @@ import javax.annotation.Nullable
 
 import scala.collection.Map
 
-import com.fasterxml.jackson.annotation.JsonTypeInfo
-
 import org.apache.spark.TaskEndReason
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.executor.{ExecutorMetrics, TaskMetrics}
@@ -31,13 +29,6 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.ExecutorInfo
 import org.apache.spark.storage.{BlockManagerId, BlockUpdatedInfo}
 
-@DeveloperApi
-@JsonTypeInfo(use = JsonTypeInfo.Id.CLASS, include = JsonTypeInfo.As.PROPERTY, property = "Event")
-trait SparkListenerEvent {
-  /* Whether output this event to the event log */
-  protected[spark] def logEvent: Boolean = true
-}
-
 @DeveloperApi
 case class SparkListenerStageSubmitted(stageInfo: StageInfo, properties: Properties = null)
   extends SparkListenerEvent
diff --git a/core/src/main/scala/org/apache/spark/scheduler/Task.scala b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
index 6e449e4dc1112..f511aed6d2166 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/Task.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/Task.scala
@@ -215,7 +215,7 @@ private[spark] abstract class Task[T](
       context.taskMetrics().nonZeroInternalAccums() ++
         // zero value external accumulators may still be useful, e.g. SQLMetrics, we should not
         // filter them out.
-        context.taskMetrics().externalAccums.filter(a => !taskFailed || a.countFailedValues)
+        context.taskMetrics().withExternalAccums(_.filter(a => !taskFailed || a.countFailedValues))
     } else {
       Seq.empty
     }
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
index ec678256a708e..8e3cb1379339d 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSchedulerImpl.scala
@@ -875,7 +875,7 @@ private[spark] class TaskSchedulerImpl(
          executorRunTime = acc.value.asInstanceOf[Long]
        }
      }
-     acc.toInfo(Some(acc.value), None)
+     acc.toInfoUpdate
    }
    val taskProcessRate = if (efficientTaskCalcualtionEnabled) {
      getTaskProcessRate(recordsRead, executorRunTime)
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
index c9aa74e0852be..3637305293107 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetExcludeList.scala
@@ -31,6 +31,9 @@ import org.apache.spark.util.Clock
  * which is handled by [[HealthTracker]].  Note that HealthTracker does not know anything
  * about task failures until a taskset completes successfully.
  *
+ * If isDryRun is true, then this class will only function to store information for application
+ * level exclusion, and will not actually exclude any tasks in task/stage level.
+ *
  * THREADING:  This class is a helper to [[TaskSetManager]]; as with the methods in
  * [[TaskSetManager]] this class is designed only to be called from code with a lock on the
  * TaskScheduler (e.g. its event handlers). It should not be called from other threads.
@@ -40,7 +43,8 @@ private[scheduler] class TaskSetExcludelist(
     val conf: SparkConf,
     val stageId: Int,
     val stageAttemptId: Int,
-    val clock: Clock) extends Logging {
+    val clock: Clock,
+    val isDryRun: Boolean = false) extends Logging {
 
   private val MAX_TASK_ATTEMPTS_PER_EXECUTOR = conf.get(config.MAX_TASK_ATTEMPTS_PER_EXECUTOR)
   private val MAX_TASK_ATTEMPTS_PER_NODE = conf.get(config.MAX_TASK_ATTEMPTS_PER_NODE)
@@ -80,13 +84,13 @@ private[scheduler] class TaskSetExcludelist(
    * of the scheduler, where those filters will have already been applied.
    */
   def isExecutorExcludedForTask(executorId: String, index: Int): Boolean = {
-    execToFailures.get(executorId).exists { execFailures =>
+    !isDryRun && execToFailures.get(executorId).exists { execFailures =>
       execFailures.getNumTaskFailures(index) >= MAX_TASK_ATTEMPTS_PER_EXECUTOR
     }
   }
 
   def isNodeExcludedForTask(node: String, index: Int): Boolean = {
-    nodeToExcludedTaskIndexes.get(node).exists(_.contains(index))
+    !isDryRun && nodeToExcludedTaskIndexes.get(node).exists(_.contains(index))
   }
 
   /**
@@ -96,11 +100,11 @@ private[scheduler] class TaskSetExcludelist(
    * scheduler, where those filters will already have been applied.
    */
   def isExecutorExcludedForTaskSet(executorId: String): Boolean = {
-    excludedExecs.contains(executorId)
+    !isDryRun && excludedExecs.contains(executorId)
   }
 
   def isNodeExcludedForTaskSet(node: String): Boolean = {
-    excludedNodes.contains(node)
+    !isDryRun && excludedNodes.contains(node)
   }
 
   private[scheduler] def updateExcludedForFailedTask(
@@ -163,3 +167,18 @@ private[scheduler] class TaskSetExcludelist(
     }
   }
 }
+
+private[scheduler] object TaskSetExcludelist {
+
+  /**
+   * Returns true if the excludeOnFailure is enabled on the task/stage level,
+   * based on checking the configuration in the following order:
+   * 1. Is taskset level exclusion specifically enabled or disabled?
+   * 2. Is overall exclusion feature enabled or disabled?
+   * 3. Default is off
+   */
+  def isExcludeOnFailureEnabled(conf: SparkConf): Boolean = {
+    conf.get(config.EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE)
+      .orElse(conf.get(config.EXCLUDE_ON_FAILURE_ENABLED)).getOrElse(false)
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
index 7dba4a6dc8fc4..fdc82285b76bb 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala
@@ -143,8 +143,18 @@ private[spark] class TaskSetManager(
   private var calculatedTasks = 0
 
   private[scheduler] val taskSetExcludelistHelperOpt: Option[TaskSetExcludelist] = {
-    healthTracker.map { _ =>
-      new TaskSetExcludelist(sched.sc.listenerBus, conf, stageId, taskSet.stageAttemptId, clock)
+    if (TaskSetExcludelist.isExcludeOnFailureEnabled(conf)) {
+      Some(new TaskSetExcludelist(sched.sc.listenerBus, conf, stageId,
+        taskSet.stageAttemptId, clock))
+    } else if (healthTracker.isDefined) {
+      // If we enabled exclusion at application level but not at taskset level exclusion, we create
+      // TaskSetExcludelist in dry run mode.
+      // In this mode, TaskSetExcludeList would not exclude any executors but only store
+      // task failure information.
+      Some(new TaskSetExcludelist(sched.sc.listenerBus, conf, stageId,
+        taskSet.stageAttemptId, clock, isDryRun = true))
+    } else {
+      None
     }
   }
 
@@ -698,7 +708,6 @@ private[spark] class TaskSetManager(
   private[scheduler] def getCompletelyExcludedTaskIfAny(
       hostToExecutors: HashMap[String, HashSet[String]]): Option[Int] = {
     taskSetExcludelistHelperOpt.flatMap { taskSetExcludelist =>
-      val appHealthTracker = healthTracker.get
       // Only look for unschedulable tasks when at least one executor has registered. Otherwise,
       // task sets will be (unnecessarily) aborted in cases when no executors have registered yet.
       if (hostToExecutors.nonEmpty) {
@@ -725,7 +734,7 @@ private[spark] class TaskSetManager(
           hostToExecutors.forall { case (host, execsOnHost) =>
             // Check if the task can run on the node
             val nodeExcluded =
-              appHealthTracker.isNodeExcluded(host) ||
+              healthTracker.exists(_.isNodeExcluded(host)) ||
                 taskSetExcludelist.isNodeExcludedForTaskSet(host) ||
                 taskSetExcludelist.isNodeExcludedForTask(host, indexInTaskSet)
             if (nodeExcluded) {
@@ -733,7 +742,7 @@ private[spark] class TaskSetManager(
             } else {
               // Check if the task can run on any of the executors
               execsOnHost.forall { exec =>
-                appHealthTracker.isExecutorExcluded(exec) ||
+                healthTracker.exists(_.isExecutorExcluded(exec)) ||
                   taskSetExcludelist.isExecutorExcludedForTaskSet(exec) ||
                   taskSetExcludelist.isExecutorExcludedForTask(exec, indexInTaskSet)
               }
@@ -797,7 +806,7 @@ private[spark] class TaskSetManager(
     val info = taskInfos(tid)
     // SPARK-37300: when the task was already finished state, just ignore it,
     // so that there won't cause successful and tasksSuccessful wrong result.
-    if(info.finished) {
+    if (info.finished) {
       if (dropTaskInfoAccumulablesOnTaskCompletion) {
         // SPARK-46383: Clear out the accumulables for a completed task to reduce accumulable
         // lifetime.
diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
index f4caecd7d6741..eb408a95589f7 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/StandaloneSchedulerBackend.scala
@@ -186,6 +186,9 @@ private[spark] class StandaloneSchedulerBackend(
     val reason: ExecutorLossReason = exitStatus match {
       case Some(ExecutorExitCode.HEARTBEAT_FAILURE) =>
         ExecutorExited(ExecutorExitCode.HEARTBEAT_FAILURE, exitCausedByApp = false, message)
+      case Some(ExecutorExitCode.BLOCK_MANAGER_REREGISTRATION_FAILED) =>
+        ExecutorExited(ExecutorExitCode.BLOCK_MANAGER_REREGISTRATION_FAILED,
+          exitCausedByApp = false, message)
       case Some(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR) =>
         ExecutorExited(ExecutorExitCode.DISK_STORE_FAILED_TO_CREATE_DIR,
           exitCausedByApp = false, message)
diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
index ec5d53e91b3e0..783da1fa4c286 100644
--- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
+++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala
@@ -38,6 +38,7 @@ import org.apache.avro.generic.{GenericContainer, GenericData, GenericRecord}
 import org.roaringbitmap.RoaringBitmap
 
 import org.apache.spark._
+import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.api.python.PythonBroadcast
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.CLASS_NAME
@@ -519,6 +520,7 @@ private[spark] class KryoSerializerInstance(
  * Interface implemented by clients to register their classes with Kryo when using Kryo
  * serialization.
  */
+@DeveloperApi
 trait KryoRegistrator {
   def registerClasses(kryo: Kryo): Unit
 }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
index 73e72b7f1dfc7..80e6ab7c0a663 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockInfoManager.scala
@@ -238,7 +238,7 @@ private[storage] class BlockInfoManager(trackingCacheVisibility: Boolean = false
       f: BlockInfo => Boolean): Option[BlockInfo] = {
     var done = false
     var result: Option[BlockInfo] = None
-    while(!done) {
+    while (!done) {
       val wrapper = blockInfoWrappers.get(blockId)
       if (wrapper == null) {
         done = true
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
index a6d62005e4e66..d99bc5bf30546 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala
@@ -40,7 +40,7 @@ import org.apache.commons.io.IOUtils
 
 import org.apache.spark._
 import org.apache.spark.errors.SparkCoreErrors
-import org.apache.spark.executor.DataReadMethod
+import org.apache.spark.executor.{DataReadMethod, ExecutorExitCode}
 import org.apache.spark.internal.{config, Logging, MDC}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.config.{Network, RDD_CACHE_VISIBILITY_TRACKING_ENABLED, Tests}
@@ -463,7 +463,7 @@ private[spark] class BlockManager(
    * '''Important!''' Callers must not mutate or release the data buffer underlying `bytes`. Doing
    * so may corrupt or change the data stored by the `BlockManager`.
    */
-  private case class ByteBufferBlockStoreUpdater[T](
+  private[spark] case class ByteBufferBlockStoreUpdater[T](
       blockId: BlockId,
       level: StorageLevel,
       classTag: ClassTag[T],
@@ -671,7 +671,7 @@ private[spark] class BlockManager(
       reportAllBlocks()
     } else {
       logError("Exiting executor due to block manager re-registration failure")
-      System.exit(-1)
+      System.exit(ExecutorExitCode.BLOCK_MANAGER_REREGISTRATION_FAILED)
     }
   }
 
@@ -1516,7 +1516,7 @@ private[spark] class BlockManager(
       return true
     }
 
-    if(master.isRDDBlockVisible(blockId)) {
+    if (master.isRDDBlockVisible(blockId)) {
       // Cache the visibility status if block exists.
       blockInfoManager.tryMarkBlockAsVisible(blockId)
       true
@@ -1882,7 +1882,7 @@ private[spark] class BlockManager(
       blockId,
       numPeersToReplicateTo)
 
-    while(numFailures <= maxReplicationFailureCount &&
+    while (numFailures <= maxReplicationFailureCount &&
       peersForReplication.nonEmpty &&
       peersReplicatedTo.size < numPeersToReplicateTo) {
       val peer = peersForReplication.head
@@ -2126,8 +2126,10 @@ private[spark] class BlockManager(
       hasRemoveBlock = true
       if (tellMaster) {
         // Only update storage level from the captured block status before deleting, so that
-        // memory size and disk size are being kept for calculating delta.
-        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = StorageLevel.NONE))
+        // memory size and disk size are being kept for calculating delta. Reset the replica
+        // count 0 in storage level to notify that it is a remove operation.
+        val storageLevel = StorageLevel(blockStatus.get.storageLevel.toInt, 0)
+        reportBlockStatus(blockId, blockStatus.get.copy(storageLevel = storageLevel))
       }
     } finally {
       if (!hasRemoveBlock) {
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
index 73f89ea0e86e5..fc4e6e771aad7 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerMasterEndpoint.scala
@@ -1059,13 +1059,13 @@ private[spark] class BlockManagerInfo(
         _blocks.put(blockId, blockStatus)
         _remainingMem -= memSize
         if (blockExists) {
-          logInfo(log"Updated ${MDC(BLOCK_ID, blockId)} in memory on " +
+          logDebug(log"Updated ${MDC(BLOCK_ID, blockId)} in memory on " +
             log"${MDC(HOST_PORT, blockManagerId.hostPort)}  (current size: " +
             log"${MDC(CURRENT_MEMORY_SIZE, Utils.bytesToString(memSize))}, original " +
             log"size: ${MDC(ORIGINAL_MEMORY_SIZE, Utils.bytesToString(originalMemSize))}, " +
             log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
         } else {
-          logInfo(log"Added ${MDC(BLOCK_ID, blockId)} in memory on " +
+          logDebug(log"Added ${MDC(BLOCK_ID, blockId)} in memory on " +
             log"${MDC(HOST_PORT, blockManagerId.hostPort)} " +
             log"(size: ${MDC(CURRENT_MEMORY_SIZE, Utils.bytesToString(memSize))}, " +
             log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
@@ -1075,12 +1075,12 @@ private[spark] class BlockManagerInfo(
         blockStatus = BlockStatus(storageLevel, memSize = 0, diskSize = diskSize)
         _blocks.put(blockId, blockStatus)
         if (blockExists) {
-          logInfo(log"Updated ${MDC(BLOCK_ID, blockId)} on disk on " +
+          logDebug(log"Updated ${MDC(BLOCK_ID, blockId)} on disk on " +
             log"${MDC(HOST_PORT, blockManagerId.hostPort)} " +
             log"(current size: ${MDC(CURRENT_DISK_SIZE, Utils.bytesToString(diskSize))}," +
             log" original size: ${MDC(ORIGINAL_DISK_SIZE, Utils.bytesToString(originalDiskSize))})")
         } else {
-          logInfo(log"Added ${MDC(BLOCK_ID, blockId)} on disk on " +
+          logDebug(log"Added ${MDC(BLOCK_ID, blockId)} on disk on " +
             log"${MDC(HOST_PORT, blockManagerId.hostPort)} (size: " +
             log"${MDC(CURRENT_DISK_SIZE, Utils.bytesToString(diskSize))})")
         }
@@ -1098,13 +1098,13 @@ private[spark] class BlockManagerInfo(
         blockStatus.remove(blockId)
       }
       if (originalLevel.useMemory) {
-        logInfo(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
+        logDebug(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
           log"${MDC(HOST_PORT, blockManagerId.hostPort)} in memory " +
           log"(size: ${MDC(ORIGINAL_MEMORY_SIZE, Utils.bytesToString(originalMemSize))}, " +
           log"free: ${MDC(FREE_MEMORY_SIZE, Utils.bytesToString(_remainingMem))})")
       }
       if (originalLevel.useDisk) {
-        logInfo(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
+        logDebug(log"Removed ${MDC(BLOCK_ID, blockId)} on " +
           log"${MDC(HOST_PORT, blockManagerId.hostPort)} on disk" +
           log" (size: ${MDC(ORIGINAL_DISK_SIZE, Utils.bytesToString(originalDiskSize))})")
       }
diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
index 686ac1eb786e0..f29e8778da037 100644
--- a/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
+++ b/core/src/main/scala/org/apache/spark/storage/BlockManagerStorageEndpoint.scala
@@ -60,7 +60,13 @@ class BlockManagerStorageEndpoint(
         if (mapOutputTracker != null) {
           mapOutputTracker.unregisterShuffle(shuffleId)
         }
-        SparkEnv.get.shuffleManager.unregisterShuffle(shuffleId)
+        val shuffleManager = SparkEnv.get.shuffleManager
+        if (shuffleManager != null) {
+          shuffleManager.unregisterShuffle(shuffleId)
+        } else {
+          logDebug(log"Ignore remove shuffle ${MDC(SHUFFLE_ID, shuffleId)}")
+          true
+        }
       }
 
     case DecommissionBlockManager =>
diff --git a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
index 1498b224b0c92..3e57094b36a7e 100644
--- a/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/DiskStore.scala
@@ -35,6 +35,7 @@ import org.apache.spark.internal.LogKeys._
 import org.apache.spark.network.buffer.ManagedBuffer
 import org.apache.spark.network.util.{AbstractFileRegion, JavaUtils}
 import org.apache.spark.security.CryptoStreamUtils
+import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.array.ByteArrayMethods
 import org.apache.spark.util.Utils
 import org.apache.spark.util.io.ChunkedByteBuffer
@@ -324,7 +325,7 @@ private class ReadableChannelFileRegion(source: ReadableByteChannel, blockSize:
 
   private var _transferred = 0L
 
-  private val buffer = ByteBuffer.allocateDirect(64 * 1024)
+  private val buffer = Platform.allocateDirectBuffer(64 * 1024)
   buffer.flip()
 
   override def count(): Long = blockSize
diff --git a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
index ff1799d8ff3e1..57f6901a7a735 100644
--- a/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
+++ b/core/src/main/scala/org/apache/spark/storage/ShuffleBlockFetcherIterator.scala
@@ -1156,6 +1156,12 @@ final class ShuffleBlockFetcherIterator(
           log"checksum support for push-based shuffle.")
         s"BlockChunk $shuffleBlockChunk is corrupted but corruption " +
           s"diagnosis is skipped due to lack of shuffle checksum support for push-based shuffle."
+      case shuffleBlockBatch: ShuffleBlockBatchId =>
+        logWarning(log"BlockBatch ${MDC(SHUFFLE_BLOCK_INFO, shuffleBlockBatch)} is corrupted " +
+          log"but corruption diagnosis is skipped due to lack of shuffle checksum support for " +
+          log"ShuffleBlockBatchId")
+        s"BlockBatch $shuffleBlockBatch is corrupted but corruption " +
+          s"diagnosis is skipped due to lack of shuffle checksum support for ShuffleBlockBatchId"
       case unexpected: BlockId =>
         throw SparkException.internalError(
           s"Unexpected type of BlockId, $unexpected", category = "STORAGE")
diff --git a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
index 7a2b7d9caec42..fc7a4675429aa 100644
--- a/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
+++ b/core/src/main/scala/org/apache/spark/ui/ConsoleProgressBar.scala
@@ -35,7 +35,7 @@ private[spark] class ConsoleProgressBar(sc: SparkContext) extends Logging {
   // Carriage return
   private val CR = '\r'
   // Update period of progress bar, in milliseconds
-  private val updatePeriodMSec = sc.getConf.get(UI_CONSOLE_PROGRESS_UPDATE_INTERVAL)
+  private val updatePeriodMSec = sc.conf.get(UI_CONSOLE_PROGRESS_UPDATE_INTERVAL)
   // Delay to show up a progress bar, in milliseconds
   private val firstDelayMSec = 500L
 
diff --git a/core/src/main/scala/org/apache/spark/ui/JWSFilter.scala b/core/src/main/scala/org/apache/spark/ui/JWSFilter.scala
new file mode 100644
index 0000000000000..4031aa6ff192f
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/ui/JWSFilter.scala
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import javax.crypto.SecretKey
+
+import io.jsonwebtoken.{JwtException, Jwts}
+import io.jsonwebtoken.io.Decoders
+import io.jsonwebtoken.security.Keys
+import jakarta.servlet.{Filter, FilterChain, FilterConfig, ServletRequest, ServletResponse}
+import jakarta.servlet.http.{HttpServletRequest, HttpServletResponse}
+
+/**
+ * A servlet filter that requires JWS, a cryptographically signed JSON Web Token, in the header.
+ *
+ * Like the other UI filters, the following configurations are required to use this filter.
+ * {{{
+ *   - spark.ui.filters=org.apache.spark.ui.JWSFilter
+ *   - spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-YOUR-PROVIDED-KEY
+ * }}}
+ * The HTTP request should have {@code Authorization: Bearer <jws>} header.
+ * {{{
+ *   - <jws> is a string with three fields, '<header>.<payload>.<signature>'.
+ *   - <header> is supposed to be a base64url-encoded string of '{"alg":"HS256","typ":"JWT"}'.
+ *   - <payload> is a base64url-encoded string of fully-user-defined content.
+ *   - <signature> is a signature based on '<header>.<payload>' and a user-provided key parameter.
+ * }}}
+ */
+private class JWSFilter extends Filter {
+  private val AUTHORIZATION = "Authorization"
+
+  private var key: SecretKey = null
+
+  /**
+   * Load and validate the configurtions:
+   * - IllegalArgumentException will happen if the user didn't provide this argument
+   * - WeakKeyException will happen if the user-provided value is insufficient
+   */
+  override def init(config: FilterConfig): Unit = {
+    key = Keys.hmacShaKeyFor(Decoders.BASE64URL.decode(config.getInitParameter("secretKey")));
+  }
+
+  override def doFilter(req: ServletRequest, res: ServletResponse, chain: FilterChain): Unit = {
+    val hreq = req.asInstanceOf[HttpServletRequest]
+    val hres = res.asInstanceOf[HttpServletResponse]
+    hres.setHeader("Cache-Control", "no-cache, no-store, must-revalidate")
+
+    try {
+      val header = hreq.getHeader(AUTHORIZATION)
+      header match {
+        case null =>
+          hres.sendError(HttpServletResponse.SC_FORBIDDEN, s"${AUTHORIZATION} header is missing.")
+        case s"Bearer $token" =>
+          val claims = Jwts.parser().verifyWith(key).build().parseSignedClaims(token)
+          chain.doFilter(req, res)
+        case _ =>
+          hres.sendError(HttpServletResponse.SC_FORBIDDEN, s"Malformed ${AUTHORIZATION} header.")
+      }
+    } catch {
+      case e: JwtException =>
+        // We intentionally don't expose the detail of JwtException here
+        hres.sendError(HttpServletResponse.SC_FORBIDDEN, "JWT Validate Fail")
+    }
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
index f503be908c072..068f9b0b2cbcb 100644
--- a/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/JettyUtils.scala
@@ -150,7 +150,10 @@ private[spark] object JettyUtils extends Logging {
       private def doRequest(request: HttpServletRequest, response: HttpServletResponse): Unit = {
         beforeRedirect(request)
         // Make sure we don't end up with "//" in the middle
-        val newUrl = new URL(new URL(request.getRequestURL.toString), prefixedDestPath).toString
+        val requestURL = new URI(request.getRequestURL.toString).toURL
+        // scalastyle:off URLConstructor
+        val newUrl = new URL(requestURL, prefixedDestPath).toString
+        // scalastyle:on URLConstructor
         response.sendRedirect(newUrl)
       }
       // SPARK-5983 ensure TRACE is not supported
diff --git a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
index c7e375849278f..fff6ec4f5b170 100644
--- a/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
+++ b/core/src/main/scala/org/apache/spark/ui/UIUtils.scala
@@ -236,14 +236,17 @@ private[spark] object UIUtils extends Logging {
 
   def dataTablesHeaderNodes(request: HttpServletRequest): Seq[Node] = {
     <link rel="stylesheet"
-          href={prependBaseUri(request, "/static/dataTables.bootstrap4.1.13.5.min.css")}
+          href={prependBaseUri(request, "/static/dataTables.bootstrap4.min.css")}
+          type="text/css"/>
+    <link rel="stylesheet"
+          href={prependBaseUri(request, "/static/jquery.dataTables.min.css")}
           type="text/css"/>
     <link rel="stylesheet"
           href={prependBaseUri(request, "/static/webui-dataTables.css")} type="text/css"/>
-    <script src={prependBaseUri(request, "/static/jquery.dataTables.1.13.5.min.js")}></script>
+    <script src={prependBaseUri(request, "/static/jquery.dataTables.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.cookies.2.2.0.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.blockUI.min.js")}></script>
-    <script src={prependBaseUri(request, "/static/dataTables.bootstrap4.1.13.5.min.js")}></script>
+    <script src={prependBaseUri(request, "/static/dataTables.bootstrap4.min.js")}></script>
     <script src={prependBaseUri(request, "/static/jquery.mustache.js")}></script>
   }
 
@@ -446,16 +449,24 @@ private[spark] object UIUtils extends Logging {
     val startRatio = if (total == 0) 0.0 else (boundedStarted.toDouble / total) * 100
     val startWidth = "width: %s%%".format(startRatio)
 
+    val killTaskReasonText = reasonToNumKilled.toSeq.sortBy(-_._2).map {
+        case (reason, count) => s" ($count killed: $reason)"
+      }.mkString
+    val progressTitle = s"$completed/$total" + {
+      if (started > 0) s" ($started running)" else ""
+    } + {
+      if (failed > 0) s" ($failed failed)" else ""
+    } + {
+      if (skipped > 0) s" ($skipped skipped)" else ""
+    } + killTaskReasonText
+
     <div class="progress">
-      <span style="text-align:center; position:absolute; width:100%;">
+      <span style="text-align:center; position:absolute; width:100%;" title={progressTitle}>
         {completed}/{total}
         { if (failed == 0 && skipped == 0 && started > 0) s"($started running)" }
         { if (failed > 0) s"($failed failed)" }
         { if (skipped > 0) s"($skipped skipped)" }
-        { reasonToNumKilled.toSeq.sortBy(-_._2).map {
-            case (reason, count) => s"($count killed: $reason)"
-          }
-        }
+        { killTaskReasonText }
       </span>
       <div class="progress-bar progress-completed" style={completeWidth}></div>
       <div class="progress-bar progress-started" style={startWidth}></div>
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
index ce7698a59ae91..a3c2d05414a88 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/JobsTab.scala
@@ -52,7 +52,7 @@ private[ui] class JobsTab(parent: SparkUI, store: AppStatusStore)
       Option(request.getParameter("id")).map(_.toInt).foreach { id =>
         store.asOption(store.job(id)).foreach { job =>
           if (job.status == JobExecutionStatus.RUNNING) {
-            sc.foreach(_.cancelJob(id))
+            sc.foreach(_.cancelJob(id, "killed via Web UI"))
             // Do a quick pause here to give Spark time to kill the job so it shows up as
             // killed after the refresh. Note that this will block the serving thread so the
             // time should be limited in duration.
diff --git a/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala b/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
index 49b919ce0de97..5fe542d4fa21e 100644
--- a/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
+++ b/core/src/main/scala/org/apache/spark/ui/jobs/TaskThreadDumpPage.scala
@@ -71,7 +71,7 @@ private[spark] class TaskThreadDumpPage(
 
       <div class="row">
         <div class="col-12">
-          <p>Updated at{UIUtils.formatDate(time)}</p>
+          <p>Updated at {UIUtils.formatDate(time)}</p>
           <table class={UIUtils.TABLE_CLASS_NOT_STRIPED + " accordion-group"}>
             <thead>
               <th>Thread ID</th>
diff --git a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
index 3237a321f1c3e..383a89d40ecee 100644
--- a/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
+++ b/core/src/main/scala/org/apache/spark/util/AccumulatorV2.scala
@@ -102,16 +102,24 @@ abstract class AccumulatorV2[IN, OUT] extends Serializable {
     metadata.countFailedValues
   }
 
+  private def isInternal = name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
+
   /**
    * Creates an [[AccumulableInfo]] representation of this [[AccumulatorV2]] with the provided
    * values.
    */
   private[spark] def toInfo(update: Option[Any], value: Option[Any]): AccumulableInfo = {
-    val isInternal = name.exists(_.startsWith(InternalAccumulator.METRICS_PREFIX))
     AccumulableInfo(id, name, internOption(update), internOption(value), isInternal,
       countFailedValues)
   }
 
+  /**
+   * Creates an [[AccumulableInfo]] representation of this [[AccumulatorV2]] as an update.
+   */
+  private[spark] def toInfoUpdate: AccumulableInfo = {
+    AccumulableInfo(id, name, internOption(Some(value)), None, isInternal, countFailedValues)
+  }
+
   final private[spark] def isAtDriverSide: Boolean = atDriverSide
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/DirectByteBufferOutputStream.scala b/core/src/main/scala/org/apache/spark/util/DirectByteBufferOutputStream.scala
index a4145bb36acc9..fd10d60a13fd1 100644
--- a/core/src/main/scala/org/apache/spark/util/DirectByteBufferOutputStream.scala
+++ b/core/src/main/scala/org/apache/spark/util/DirectByteBufferOutputStream.scala
@@ -20,6 +20,7 @@ package org.apache.spark.util
 import java.io.OutputStream
 import java.nio.ByteBuffer
 
+import org.apache.spark.SparkException
 import org.apache.spark.storage.StorageUtils
 import org.apache.spark.unsafe.Platform
 
@@ -29,16 +30,18 @@ import org.apache.spark.unsafe.Platform
  * @param capacity The initial capacity of the direct byte buffer
  */
 private[spark] class DirectByteBufferOutputStream(capacity: Int) extends OutputStream {
-  private var buffer = Platform.allocateDirectBuffer(capacity)
+  private[this] var buffer = Platform.allocateDirectBuffer(capacity)
 
   def this() = this(32)
 
   override def write(b: Int): Unit = {
+    checkNotClosed()
     ensureCapacity(buffer.position() + 1)
     buffer.put(b.toByte)
   }
 
   override def write(b: Array[Byte], off: Int, len: Int): Unit = {
+    checkNotClosed()
     ensureCapacity(buffer.position() + len)
     buffer.put(b, off, len)
   }
@@ -57,21 +60,35 @@ private[spark] class DirectByteBufferOutputStream(capacity: Int) extends OutputS
     if (newCapacity < minCapacity) newCapacity = minCapacity
     val oldBuffer = buffer
     oldBuffer.flip()
-    val newBuffer = ByteBuffer.allocateDirect(newCapacity)
+    val newBuffer = Platform.allocateDirectBuffer(newCapacity)
     newBuffer.put(oldBuffer)
     StorageUtils.dispose(oldBuffer)
     buffer = newBuffer
   }
 
-  def reset(): Unit = buffer.clear()
+  private def checkNotClosed(): Unit = {
+    if (buffer == null) {
+      throw SparkException.internalError(
+        "Cannot call methods on a closed DirectByteBufferOutputStream")
+    }
+  }
+
+  def reset(): Unit = {
+    checkNotClosed()
+    buffer.clear()
+  }
 
-  def size(): Int = buffer.position()
+  def size(): Int = {
+    checkNotClosed()
+    buffer.position()
+  }
 
   /**
    * Any subsequent call to [[close()]], [[write()]], [[reset()]] will invalidate the buffer
    * returned by this method.
    */
   def toByteBuffer: ByteBuffer = {
+    checkNotClosed()
     val outputBuffer = buffer.duplicate()
     outputBuffer.flip()
     outputBuffer
@@ -80,6 +97,7 @@ private[spark] class DirectByteBufferOutputStream(capacity: Int) extends OutputS
   override def close(): Unit = {
     // Eagerly free the direct byte buffer without waiting for GC to reduce memory pressure.
     StorageUtils.dispose(buffer)
+    buffer = null
   }
 
 }
diff --git a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
index 22dcf6c11e4b4..e30380f41566a 100644
--- a/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
+++ b/core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
@@ -28,6 +28,7 @@ import org.json4s.jackson.JsonMethods.compact
 
 import org.apache.spark._
 import org.apache.spark.executor._
+import org.apache.spark.internal.config._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.{DeterministicLevel, RDDOperationScope}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceInformation, ResourceProfile, TaskResourceRequest}
@@ -37,6 +38,16 @@ import org.apache.spark.storage._
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils.weakIntern
 
+/**
+ * Helper class for passing configuration options to JsonProtocol.
+ * We use this instead of passing SparkConf directly because it lets us avoid
+ * repeated re-parsing of configuration values on each read.
+ */
+private[spark] class JsonProtocolOptions(conf: SparkConf) {
+  val includeTaskMetricsAccumulators: Boolean =
+    conf.get(EVENT_LOG_INCLUDE_TASK_METRICS_ACCUMULATORS)
+}
+
 /**
  * Serializes SparkListener events to/from JSON.  This protocol provides strong backwards-
  * and forwards-compatibility guarantees: any version of Spark should be able to read JSON output
@@ -55,30 +66,41 @@ import org.apache.spark.util.Utils.weakIntern
 private[spark] object JsonProtocol extends JsonUtils {
   // TODO: Remove this file and put JSON serialization into each individual class.
 
+  private[util]
+  val defaultOptions: JsonProtocolOptions = new JsonProtocolOptions(new SparkConf(false))
+
   /** ------------------------------------------------- *
    * JSON serialization methods for SparkListenerEvents |
    * -------------------------------------------------- */
 
+  // Only for use in tests. Production code should use the two-argument overload defined below.
   def sparkEventToJsonString(event: SparkListenerEvent): String = {
+    sparkEventToJsonString(event, defaultOptions)
+  }
+
+  def sparkEventToJsonString(event: SparkListenerEvent, options: JsonProtocolOptions): String = {
     toJsonString { generator =>
-      writeSparkEventToJson(event, generator)
+      writeSparkEventToJson(event, generator, options)
     }
   }
 
-  def writeSparkEventToJson(event: SparkListenerEvent, g: JsonGenerator): Unit = {
+  def writeSparkEventToJson(
+      event: SparkListenerEvent,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     event match {
       case stageSubmitted: SparkListenerStageSubmitted =>
-        stageSubmittedToJson(stageSubmitted, g)
+        stageSubmittedToJson(stageSubmitted, g, options)
       case stageCompleted: SparkListenerStageCompleted =>
-        stageCompletedToJson(stageCompleted, g)
+        stageCompletedToJson(stageCompleted, g, options)
       case taskStart: SparkListenerTaskStart =>
-        taskStartToJson(taskStart, g)
+        taskStartToJson(taskStart, g, options)
       case taskGettingResult: SparkListenerTaskGettingResult =>
-        taskGettingResultToJson(taskGettingResult, g)
+        taskGettingResultToJson(taskGettingResult, g, options)
       case taskEnd: SparkListenerTaskEnd =>
-        taskEndToJson(taskEnd, g)
+        taskEndToJson(taskEnd, g, options)
       case jobStart: SparkListenerJobStart =>
-        jobStartToJson(jobStart, g)
+        jobStartToJson(jobStart, g, options)
       case jobEnd: SparkListenerJobEnd =>
         jobEndToJson(jobEnd, g)
       case environmentUpdate: SparkListenerEnvironmentUpdate =>
@@ -112,12 +134,15 @@ private[spark] object JsonProtocol extends JsonUtils {
     }
   }
 
-  def stageSubmittedToJson(stageSubmitted: SparkListenerStageSubmitted, g: JsonGenerator): Unit = {
+  def stageSubmittedToJson(
+      stageSubmitted: SparkListenerStageSubmitted,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageSubmitted)
     g.writeFieldName("Stage Info")
     // SPARK-42205: don't log accumulables in start events:
-    stageInfoToJson(stageSubmitted.stageInfo, g, includeAccumulables = false)
+    stageInfoToJson(stageSubmitted.stageInfo, g, options, includeAccumulables = false)
     Option(stageSubmitted.properties).foreach { properties =>
       g.writeFieldName("Properties")
       propertiesToJson(properties, g)
@@ -125,38 +150,48 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeEndObject()
   }
 
-  def stageCompletedToJson(stageCompleted: SparkListenerStageCompleted, g: JsonGenerator): Unit = {
+  def stageCompletedToJson(
+      stageCompleted: SparkListenerStageCompleted,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.stageCompleted)
     g.writeFieldName("Stage Info")
-    stageInfoToJson(stageCompleted.stageInfo, g, includeAccumulables = true)
+    stageInfoToJson(stageCompleted.stageInfo, g, options, includeAccumulables = true)
     g.writeEndObject()
   }
 
-  def taskStartToJson(taskStart: SparkListenerTaskStart, g: JsonGenerator): Unit = {
+  def taskStartToJson(
+      taskStart: SparkListenerTaskStart,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskStart)
     g.writeNumberField("Stage ID", taskStart.stageId)
     g.writeNumberField("Stage Attempt ID", taskStart.stageAttemptId)
     g.writeFieldName("Task Info")
     // SPARK-42205: don't log accumulables in start events:
-    taskInfoToJson(taskStart.taskInfo, g, includeAccumulables = false)
+    taskInfoToJson(taskStart.taskInfo, g, options, includeAccumulables = false)
     g.writeEndObject()
   }
 
   def taskGettingResultToJson(
       taskGettingResult: SparkListenerTaskGettingResult,
-      g: JsonGenerator): Unit = {
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     val taskInfo = taskGettingResult.taskInfo
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskGettingResult)
     g.writeFieldName("Task Info")
     // SPARK-42205: don't log accumulables in "task getting result" events:
-    taskInfoToJson(taskInfo, g, includeAccumulables = false)
+    taskInfoToJson(taskInfo, g, options, includeAccumulables = false)
     g.writeEndObject()
   }
 
-  def taskEndToJson(taskEnd: SparkListenerTaskEnd, g: JsonGenerator): Unit = {
+  def taskEndToJson(
+      taskEnd: SparkListenerTaskEnd,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.taskEnd)
     g.writeNumberField("Stage ID", taskEnd.stageId)
@@ -165,7 +200,7 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeFieldName("Task End Reason")
     taskEndReasonToJson(taskEnd.reason, g)
     g.writeFieldName("Task Info")
-    taskInfoToJson(taskEnd.taskInfo, g, includeAccumulables = true)
+    taskInfoToJson(taskEnd.taskInfo, g, options, includeAccumulables = true)
     g.writeFieldName("Task Executor Metrics")
     executorMetricsToJson(taskEnd.taskExecutorMetrics, g)
     Option(taskEnd.taskMetrics).foreach { m =>
@@ -175,7 +210,10 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeEndObject()
   }
 
-  def jobStartToJson(jobStart: SparkListenerJobStart, g: JsonGenerator): Unit = {
+  def jobStartToJson(
+      jobStart: SparkListenerJobStart,
+      g: JsonGenerator,
+      options: JsonProtocolOptions): Unit = {
     g.writeStartObject()
     g.writeStringField("Event", SPARK_LISTENER_EVENT_FORMATTED_CLASS_NAMES.jobStart)
     g.writeNumberField("Job ID", jobStart.jobId)
@@ -186,7 +224,7 @@ private[spark] object JsonProtocol extends JsonUtils {
     // the job was submitted: it is technically possible for a stage to belong to multiple
     // concurrent jobs, so this situation can arise even without races occurring between
     // event logging and stage completion.
-    jobStart.stageInfos.foreach(stageInfoToJson(_, g, includeAccumulables = true))
+    jobStart.stageInfos.foreach(stageInfoToJson(_, g, options, includeAccumulables = true))
     g.writeEndArray()
     g.writeArrayFieldStart("Stage IDs")
     jobStart.stageIds.foreach(g.writeNumber)
@@ -386,6 +424,7 @@ private[spark] object JsonProtocol extends JsonUtils {
   def stageInfoToJson(
       stageInfo: StageInfo,
       g: JsonGenerator,
+      options: JsonProtocolOptions,
       includeAccumulables: Boolean): Unit = {
     g.writeStartObject()
     g.writeNumberField("Stage ID", stageInfo.stageId)
@@ -404,7 +443,10 @@ private[spark] object JsonProtocol extends JsonUtils {
     stageInfo.failureReason.foreach(g.writeStringField("Failure Reason", _))
     g.writeFieldName("Accumulables")
     if (includeAccumulables) {
-      accumulablesToJson(stageInfo.accumulables.values, g)
+      accumulablesToJson(
+        stageInfo.accumulables.values,
+        g,
+        includeTaskMetricsAccumulators = options.includeTaskMetricsAccumulators)
     } else {
       g.writeStartArray()
       g.writeEndArray()
@@ -418,6 +460,7 @@ private[spark] object JsonProtocol extends JsonUtils {
   def taskInfoToJson(
       taskInfo: TaskInfo,
       g: JsonGenerator,
+      options: JsonProtocolOptions,
       includeAccumulables: Boolean): Unit = {
     g.writeStartObject()
     g.writeNumberField("Task ID", taskInfo.taskId)
@@ -435,7 +478,10 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeBooleanField("Killed", taskInfo.killed)
     g.writeFieldName("Accumulables")
     if (includeAccumulables) {
-      accumulablesToJson(taskInfo.accumulables, g)
+      accumulablesToJson(
+        taskInfo.accumulables,
+        g,
+        includeTaskMetricsAccumulators = options.includeTaskMetricsAccumulators)
     } else {
       g.writeStartArray()
       g.writeEndArray()
@@ -443,13 +489,23 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeEndObject()
   }
 
-  private lazy val accumulableExcludeList = Set("internal.metrics.updatedBlockStatuses")
+  private[util] val accumulableExcludeList = Set(InternalAccumulator.UPDATED_BLOCK_STATUSES)
+
+  private[this] val taskMetricAccumulableNames = TaskMetrics.empty.nameToAccums.keySet.toSet
 
-  def accumulablesToJson(accumulables: Iterable[AccumulableInfo], g: JsonGenerator): Unit = {
+  def accumulablesToJson(
+      accumulables: Iterable[AccumulableInfo],
+      g: JsonGenerator,
+    includeTaskMetricsAccumulators: Boolean = true): Unit = {
     g.writeStartArray()
     accumulables
-        .filterNot(_.name.exists(accumulableExcludeList.contains))
-        .toList.sortBy(_.id).foreach(a => accumulableInfoToJson(a, g))
+        .filterNot { acc =>
+          acc.name.exists(accumulableExcludeList.contains) ||
+          (!includeTaskMetricsAccumulators && acc.name.exists(taskMetricAccumulableNames.contains))
+        }
+        .toList
+        .sortBy(_.id)
+        .foreach(a => accumulableInfoToJson(a, g))
     g.writeEndArray()
   }
 
@@ -597,6 +653,8 @@ private[spark] object JsonProtocol extends JsonUtils {
     g.writeNumberField("Executor Run Time", taskMetrics.executorRunTime)
     g.writeNumberField("Executor CPU Time", taskMetrics.executorCpuTime)
     g.writeNumberField("Peak Execution Memory", taskMetrics.peakExecutionMemory)
+    g.writeNumberField("Peak On Heap Execution Memory", taskMetrics.peakOnHeapExecutionMemory)
+    g.writeNumberField("Peak Off Heap Execution Memory", taskMetrics.peakOffHeapExecutionMemory)
     g.writeNumberField("Result Size", taskMetrics.resultSize)
     g.writeNumberField("JVM GC Time", taskMetrics.jvmGCTime)
     g.writeNumberField("Result Serialization Time", taskMetrics.resultSerializationTime)
@@ -1254,6 +1312,10 @@ private[spark] object JsonProtocol extends JsonUtils {
     // The "Peak Execution Memory" field was added in Spark 3.0.0:
     metrics.setPeakExecutionMemory(
       jsonOption(json.get("Peak Execution Memory")).map(_.extractLong).getOrElse(0))
+    metrics.setPeakOnHeapExecutionMemory(
+      jsonOption(json.get("Peak On Heap Execution Memory")).map(_.extractLong).getOrElse(0))
+    metrics.setPeakOffHeapExecutionMemory(
+      jsonOption(json.get("Peak Off Heap Execution Memory")).map(_.extractLong).getOrElse(0))
     metrics.setResultSize(json.get("Result Size").extractLong)
     metrics.setJvmGCTime(json.get("JVM GC Time").extractLong)
     metrics.setResultSerializationTime(json.get("Result Serialization Time").extractLong)
@@ -1379,7 +1441,7 @@ private[spark] object JsonProtocol extends JsonUtils {
         val accumUpdates = jsonOption(json.get("Accumulator Updates"))
           .map(_.extractElements.map(accumulableInfoFromJson).toArray.toImmutableArraySeq)
           .getOrElse(taskMetricsFromJson(json.get("Metrics")).accumulators().map(acc => {
-            acc.toInfo(Some(acc.value), None)
+            acc.toInfoUpdate
           }).toArray.toImmutableArraySeq)
         ExceptionFailure(className, description, stackTrace, fullStackTrace, None, accumUpdates)
       case `taskResultLost` => TaskResultLost
diff --git a/core/src/main/scala/org/apache/spark/util/LazyTry.scala b/core/src/main/scala/org/apache/spark/util/LazyTry.scala
new file mode 100644
index 0000000000000..7edc08672c26b
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/LazyTry.scala
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import scala.util.Try
+
+/**
+ * Wrapper utility for a lazy val, with two differences compared to scala behavior:
+ *
+ * 1. Non-retrying in case of failure. This wrapper stores the exception in a Try, and will re-throw
+ *    it on the access to `get`.
+ *    In scala, when a `lazy val` field initialization throws an exception, the field remains
+ *    uninitialized, and initialization will be re-attempted on the next access. This also can lead
+ *    to performance issues, needlessly computing something towards a failure, and also can lead to
+ *    duplicated side effects.
+ *
+ * 2. Resolving locking issues.
+ *    In scala, when a `lazy val` field is initialized, it grabs the synchronized lock on the
+ *    enclosing object instance. This can lead both to performance issues, and deadlocks.
+ *    For example:
+ *     a) Thread 1 entered a synchronized method, grabbing a coarse lock on the parent object.
+ *     b) Thread 2 get spawned off, and tries to initialize a lazy value on the same parent object
+ *        This causes scala to also try to grab a lock on the parent object.
+ *     c) If thread 1 waits for thread 2 to join, a deadlock occurs.
+ *    This wrapper will only grab a lock on the wrapper itself, and not the parent object.
+ *
+ * @param initialize The block of code to initialize the lazy value.
+ * @tparam T type of the lazy value.
+ */
+private[spark] class LazyTry[T](initialize: => T) extends Serializable {
+  private lazy val tryT: Try[T] = Utils.doTryWithCallerStacktrace { initialize }
+
+  /**
+   * Get the lazy value. If the initialization block threw an exception, it will be re-thrown here.
+   * The exception will be re-thrown with the current caller's stacktrace.
+   * An exception with stack trace from when the exception was first thrown can be accessed with
+   * ```
+   * ex.getSuppressed.find { e =>
+   *   e.getMessage == org.apache.spark.util.Utils.TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE
+   * }
+   * ```
+   */
+  def get: T = Utils.getTryWithCallerStacktrace(tryT)
+}
+
+private[spark] object LazyTry {
+  /**
+   * Create a new LazyTry instance.
+   *
+   * @param initialize The block of code to initialize the lazy value.
+   * @tparam T type of the lazy value.
+   * @return a new LazyTry instance.
+   */
+  def apply[T](initialize: => T): LazyTry[T] = new LazyTry(initialize)
+}
diff --git a/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala b/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
index 21184d70b386a..7d01facc1e421 100644
--- a/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
+++ b/core/src/main/scala/org/apache/spark/util/NonFateSharingCache.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.util
 
-import java.util.concurrent.Callable
+import java.util.concurrent.{Callable, TimeUnit}
 
 import com.google.common.cache.{Cache, CacheBuilder, CacheLoader, LoadingCache}
 
@@ -68,6 +68,20 @@ private[spark] object NonFateSharingCache {
       override def load(k: K): V = loadingFunc.apply(k)
     }))
   }
+
+  def apply[K, V](
+      maximumSize: Long,
+      expireAfterAccessTime: Long,
+      expireAfterAccessTimeUnit: TimeUnit): NonFateSharingCache[K, V] = {
+    val builder = CacheBuilder.newBuilder().asInstanceOf[CacheBuilder[K, V]]
+    if (maximumSize > 0L) {
+      builder.maximumSize(maximumSize)
+    }
+    if(expireAfterAccessTime > 0) {
+      builder.expireAfterAccess(expireAfterAccessTime, expireAfterAccessTimeUnit)
+    }
+    new NonFateSharingCache(builder.build[K, V]())
+  }
 }
 
 private[spark] class NonFateSharingCache[K, V](protected val cache: Cache[K, V]) {
diff --git a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
index c1ea4f929101f..25a6322743baa 100644
--- a/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
+++ b/core/src/main/scala/org/apache/spark/util/SparkUncaughtExceptionHandler.scala
@@ -17,9 +17,11 @@
 
 package org.apache.spark.util
 
+import org.apache.spark.SparkEnv
 import org.apache.spark.executor.{ExecutorExitCode, KilledByTaskReaperException}
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.THREAD
+import org.apache.spark.internal.config.KILL_ON_FATAL_ERROR_DEPTH
 
 /**
  * The default uncaught exception handler for Spark daemons. It terminates the whole process for
@@ -36,6 +38,17 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
     val _ = SparkExitCode.OOM
   }
 
+  // The maximum depth to search in the exception cause chain for a fatal error,
+  // as defined by KILL_ON_FATAL_ERROR_DEPTH.
+  //
+  // SPARK-50034: When this handler is called, there is a fatal error in the cause chain within
+  // the specified depth. We should identify that fatal error and exit with the
+  // correct exit code.
+  private val killOnFatalErrorDepth: Int =
+    // At this point SparkEnv might be None
+    Option(SparkEnv.get).map(_.conf.get(KILL_ON_FATAL_ERROR_DEPTH)).getOrElse(5)
+
+
   override def uncaughtException(thread: Thread, exception: Throwable): Unit = {
     try {
       val mdc = MDC(THREAD, thread)
@@ -50,19 +63,31 @@ private[spark] class SparkUncaughtExceptionHandler(val exitOnUncaughtException:
       // We may have been called from a shutdown hook. If so, we must not call System.exit().
       // (If we do, we will deadlock.)
       if (!ShutdownHookManager.inShutdown()) {
-        exception match {
-          case _: OutOfMemoryError =>
-            System.exit(SparkExitCode.OOM)
-          case e: SparkFatalException if e.throwable.isInstanceOf[OutOfMemoryError] =>
-            // SPARK-24294: This is defensive code, in case that SparkFatalException is
-            // misused and uncaught.
-            System.exit(SparkExitCode.OOM)
-          case _: KilledByTaskReaperException if exitOnUncaughtException =>
-            System.exit(ExecutorExitCode.KILLED_BY_TASK_REAPER)
-          case _ if exitOnUncaughtException =>
-            System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
-          case _ =>
-            // SPARK-30310: Don't System.exit() when exitOnUncaughtException is false
+        // Traverse the causes up to killOnFatalErrorDepth layers
+        var currentException: Throwable = exception
+        var depth = 0
+
+        while (currentException != null && depth < killOnFatalErrorDepth) {
+          currentException match {
+            case _: OutOfMemoryError =>
+              System.exit(SparkExitCode.OOM)
+            case e: SparkFatalException if e.throwable.isInstanceOf[OutOfMemoryError] =>
+              // SPARK-24294: This is defensive code, in case that SparkFatalException is
+              // misused and uncaught.
+              System.exit(SparkExitCode.OOM)
+            case _: KilledByTaskReaperException if exitOnUncaughtException =>
+              System.exit(ExecutorExitCode.KILLED_BY_TASK_REAPER)
+            // No match, continue traversing the cause chain
+            case _ =>
+          }
+          // Move to the next cause in the chain
+          currentException = currentException.getCause
+          depth += 1
+        }
+
+        // SPARK-30310: Don't System.exit() when exitOnUncaughtException is false
+        if (exitOnUncaughtException) {
+          System.exit(SparkExitCode.UNCAUGHT_EXCEPTION)
         }
       }
     } catch {
diff --git a/core/src/main/scala/org/apache/spark/util/TransientLazy.scala b/core/src/main/scala/org/apache/spark/util/TransientLazy.scala
new file mode 100644
index 0000000000000..2833ef93669a6
--- /dev/null
+++ b/core/src/main/scala/org/apache/spark/util/TransientLazy.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+/**
+ * Construct to lazily initialize a variable.
+ * This may be helpful for avoiding deadlocks in certain scenarios. For example,
+ *   a) Thread 1 entered a synchronized method, grabbing a coarse lock on the parent object.
+ *   b) Thread 2 gets spawned off, and tries to initialize a lazy value on the same parent object
+ *      (in our case, this was the logger). This causes scala to also try to grab a coarse lock on
+ *      the parent object.
+ *   c) If thread 1 waits for thread 2 to join, a deadlock occurs.
+ * The main difference between this and [[LazyTry]] is that this does not cache failures.
+ *
+ * @note
+ *   Scala 3 uses a different implementation of lazy vals which doesn't have this problem.
+ *   Please refer to <a
+ *   href="https://docs.scala-lang.org/scala3/reference/changed-features/lazy-vals-init.html">Lazy
+ *   Vals Initialization</a> for more details.
+ */
+private[spark] class TransientLazy[T](initializer: => T) extends Serializable {
+
+  @transient
+  private[this] lazy val value: T = initializer
+
+  def apply(): T = {
+    value
+  }
+}
diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index a37aedfcb635a..109db36d40695 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -638,7 +638,7 @@ private[spark] object Utils
         val is = Channels.newInputStream(source)
         downloadFile(url, is, targetFile, fileOverwrite)
       case "http" | "https" | "ftp" =>
-        val uc = new URL(url).openConnection()
+        val uc = new URI(url).toURL.openConnection()
         val timeoutMs =
           conf.getTimeAsSeconds("spark.files.fetchTimeout", "60s").toInt * 1000
         uc.setConnectTimeout(timeoutMs)
@@ -1351,6 +1351,86 @@ private[spark] object Utils
     }
   }
 
+  val TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE =
+    "Full stacktrace of original doTryWithCallerStacktrace caller"
+
+  val TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE =
+    "Stacktrace under doTryWithCallerStacktrace"
+
+  /**
+   * Use Try with stacktrace substitution for the caller retrieving the error.
+   *
+   * Normally in case of failure, the exception would have the stacktrace of the caller that
+   * originally called doTryWithCallerStacktrace. However, we want to replace the part above
+   * this function with the stacktrace of the caller who calls getTryWithCallerStacktrace.
+   * So here we save the part of the stacktrace below doTryWithCallerStacktrace, and
+   * getTryWithCallerStacktrace will stitch it with the new stack trace of the caller.
+   * The full original stack trace is kept in ex.getSuppressed.
+   *
+   * @param f Code block to be wrapped in Try
+   * @return Try with Success or Failure of the code block. Use with getTryWithCallerStacktrace.
+   */
+  def doTryWithCallerStacktrace[T](f: => T): Try[T] = {
+    val t = Try {
+      f
+    }
+    t match {
+      case Failure(ex) =>
+        // Note: we remove the common suffix instead of e.g. finding the call to this function, to
+        // account for recursive calls with multiple doTryWithCallerStacktrace on the stack trace.
+        val origStackTrace = ex.getStackTrace
+        val currentStackTrace = Thread.currentThread().getStackTrace
+        val commonSuffixLen = origStackTrace.reverse.zip(currentStackTrace.reverse).takeWhile {
+          case (exElem, currentElem) => exElem == currentElem
+        }.length
+        val belowEx = new Exception(TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE)
+        belowEx.setStackTrace(origStackTrace.dropRight(commonSuffixLen))
+        ex.addSuppressed(belowEx)
+
+        // keep the full original stack trace in a suppressed exception.
+        val fullEx = new Exception(TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE)
+        fullEx.setStackTrace(origStackTrace)
+        ex.addSuppressed(fullEx)
+      case Success(_) => // nothing
+    }
+    t
+  }
+
+  /**
+   * Retrieve the result of Try that was created by doTryWithCallerStacktrace.
+   *
+   * In case of failure, the resulting exception has a stack trace that combines the stack trace
+   * below the original doTryWithCallerStacktrace which triggered it, with the caller stack trace
+   * of the current caller of getTryWithCallerStacktrace.
+   *
+   * Full stack trace of the original doTryWithCallerStacktrace caller can be retrieved with
+   * ```
+   * ex.getSuppressed.find { e =>
+   * e.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE
+   * }
+   * ```
+   *
+   *
+   * @param t Try from doTryWithCallerStacktrace
+   * @return Result of the Try or rethrows the failure exception with modified stacktrace.
+   */
+  def getTryWithCallerStacktrace[T](t: Try[T]): T = t match {
+    case Failure(ex) =>
+      val belowStacktrace = ex.getSuppressed.find { e =>
+        // added in doTryWithCallerStacktrace
+        e.getMessage == TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE
+      }.getOrElse {
+        // If we don't have the expected stacktrace information, just rethrow
+        throw ex
+      }.getStackTrace
+      // We are modifying and throwing the original exception. It would be better if we could
+      // return a copy, but we can't easily clone it and preserve. If this is accessed from
+      // multiple threads that then look at the stack trace, this could break.
+      ex.setStackTrace(belowStacktrace ++ Thread.currentThread().getStackTrace.drop(1))
+      throw ex
+    case Success(s) => s
+  }
+
   // A regular expression to match classes of the internal Spark API's
   // that we want to skip when finding the call site of a method.
   private val SPARK_CORE_CLASS_REGEX =
@@ -1753,12 +1833,6 @@ private[spark] object Utils
     Files.createSymbolicLink(dst.toPath, src.toPath)
   }
 
-
-  /** Return the class name of the given object, removing all dollar signs */
-  def getFormattedClassName(obj: AnyRef): String = {
-    getSimpleName(obj.getClass).replace("$", "")
-  }
-
   /**
    * Return a Hadoop FileSystem with the scheme encoded in the given path.
    */
@@ -2586,6 +2660,32 @@ private[spark] object Utils
     SignalUtils.registerLogger(log)
   }
 
+  /**
+   * Utility function to enable or disable structured logging based on system properties.
+   * This is designed for a code path which we cannot use SparkConf yet, and should be used before
+   * the first invocation of `Logging.log()`. For example, this should be used before `initDaemon`.
+   */
+  def resetStructuredLogging(): Unit = {
+    if (System.getProperty(STRUCTURED_LOGGING_ENABLED.key, "false").equals("false")) {
+      Logging.disableStructuredLogging()
+    } else {
+      Logging.enableStructuredLogging()
+    }
+  }
+
+  /**
+   * Utility function to enable or disable structured logging based on SparkConf.
+   * This is designed for a code path which logging system may be initilized before
+   * loading SparkConf.
+   */
+  def resetStructuredLogging(sparkConf: SparkConf): Unit = {
+    if (sparkConf.getBoolean(STRUCTURED_LOGGING_ENABLED.key, defaultValue = true)) {
+      Logging.enableStructuredLogging()
+    } else {
+      Logging.disableStructuredLogging()
+    }
+  }
+
   /**
    * Return the jar files pointed by the "spark.jars" property. Spark internally will distribute
    * these jars through file server. In the YARN mode, it will return an empty list, since YARN
@@ -2814,68 +2914,6 @@ private[spark] object Utils
     Hex.encodeHexString(secretBytes)
   }
 
-  /**
-   * Safer than Class obj's getSimpleName which may throw Malformed class name error in scala.
-   * This method mimics scalatest's getSimpleNameOfAnObjectsClass.
-   */
-  def getSimpleName(cls: Class[_]): String = {
-    try {
-      cls.getSimpleName
-    } catch {
-      // TODO: the value returned here isn't even quite right; it returns simple names
-      // like UtilsSuite$MalformedClassObject$MalformedClass instead of MalformedClass
-      // The exact value may not matter much as it's used in log statements
-      case _: InternalError =>
-        stripDollars(stripPackages(cls.getName))
-    }
-  }
-
-  /**
-   * Remove the packages from full qualified class name
-   */
-  private def stripPackages(fullyQualifiedName: String): String = {
-    fullyQualifiedName.split("\\.").takeRight(1)(0)
-  }
-
-  /**
-   * Remove trailing dollar signs from qualified class name,
-   * and return the trailing part after the last dollar sign in the middle
-   */
-  @scala.annotation.tailrec
-  def stripDollars(s: String): String = {
-    val lastDollarIndex = s.lastIndexOf('$')
-    if (lastDollarIndex < s.length - 1) {
-      // The last char is not a dollar sign
-      if (lastDollarIndex == -1 || !s.contains("$iw")) {
-        // The name does not have dollar sign or is not an interpreter
-        // generated class, so we should return the full string
-        s
-      } else {
-        // The class name is interpreter generated,
-        // return the part after the last dollar sign
-        // This is the same behavior as getClass.getSimpleName
-        s.substring(lastDollarIndex + 1)
-      }
-    }
-    else {
-      // The last char is a dollar sign
-      // Find last non-dollar char
-      val lastNonDollarChar = s.findLast(_ != '$')
-      lastNonDollarChar match {
-        case None => s
-        case Some(c) =>
-          val lastNonDollarIndex = s.lastIndexOf(c)
-          if (lastNonDollarIndex == -1) {
-            s
-          } else {
-            // Strip the trailing dollar signs
-            // Invoke stripDollars again to get the simple name
-            stripDollars(s.substring(0, lastNonDollarIndex + 1))
-          }
-      }
-    }
-  }
-
   /**
    * Regular expression matching full width characters.
    *
@@ -2944,9 +2982,7 @@ private[spark] object Utils
     if (props == null) {
       return props
     }
-    val resultProps = new Properties()
-    props.forEach((k, v) => resultProps.put(k, v))
-    resultProps
+    props.clone().asInstanceOf[Properties]
   }
 
   /**
diff --git a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
index 46e311d8b0476..ec43666898fa7 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/AppendOnlyMap.scala
@@ -208,7 +208,7 @@ class AppendOnlyMap[K, V](initialCapacity: Int = 64)
   /**
    * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
    */
-  private def rehash(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
+  private def rehash(h: Int): Int = Hashing.murmur3_32_fixed().hashInt(h).asInt()
 
   /** Double the table's size and re-hash everything */
   protected def growTable(): Unit = {
diff --git a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
index 6bb5058f5ed14..7245d87a8baba 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/BitSet.scala
@@ -45,7 +45,7 @@ class BitSet(numBits: Int) extends Serializable {
   def setUntil(bitIndex: Int): Unit = {
     val wordIndex = bitIndex >> 6 // divide by 64
     Arrays.fill(words, 0, wordIndex, -1)
-    if(wordIndex < words.length) {
+    if (wordIndex < words.length) {
       // Set the remaining bits (note that the mask could still be zero)
       val mask = ~(-1L << (bitIndex & 0x3f))
       words(wordIndex) |= mask
@@ -58,7 +58,7 @@ class BitSet(numBits: Int) extends Serializable {
   def clearUntil(bitIndex: Int): Unit = {
     val wordIndex = bitIndex >> 6 // divide by 64
     Arrays.fill(words, 0, wordIndex, 0)
-    if(wordIndex < words.length) {
+    if (wordIndex < words.length) {
       // Clear the remaining bits
       val mask = -1L << (bitIndex & 0x3f)
       words(wordIndex) &= mask
@@ -75,7 +75,7 @@ class BitSet(numBits: Int) extends Serializable {
     assert(newBS.numWords >= numWords)
     assert(newBS.numWords >= other.numWords)
     var ind = 0
-    while( ind < smaller ) {
+    while (ind < smaller) {
       newBS.words(ind) = words(ind) & other.words(ind)
       ind += 1
     }
@@ -92,15 +92,15 @@ class BitSet(numBits: Int) extends Serializable {
     assert(newBS.numWords >= other.numWords)
     val smaller = math.min(numWords, other.numWords)
     var ind = 0
-    while( ind < smaller ) {
+    while (ind < smaller) {
       newBS.words(ind) = words(ind) | other.words(ind)
       ind += 1
     }
-    while( ind < numWords ) {
+    while (ind < numWords) {
       newBS.words(ind) = words(ind)
       ind += 1
     }
-    while( ind < other.numWords ) {
+    while (ind < other.numWords) {
       newBS.words(ind) = other.words(ind)
       ind += 1
     }
@@ -242,7 +242,7 @@ class BitSet(numBits: Int) extends Serializable {
   def union(other: BitSet): Unit = {
     require(this.numWords <= other.numWords)
     var ind = 0
-    while( ind < this.numWords ) {
+    while (ind < this.numWords) {
       this.words(ind) = this.words(ind) | other.words(ind)
       ind += 1
     }
diff --git a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
index a42fa9ba6bc85..3d1eb5788c707 100644
--- a/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
+++ b/core/src/main/scala/org/apache/spark/util/collection/OpenHashSet.scala
@@ -266,7 +266,7 @@ class OpenHashSet[@specialized(Long, Int, Double, Float) T: ClassTag](
   /**
    * Re-hash a value to deal better with hash functions that don't differ in the lower bits.
    */
-  private def hashcode(h: Int): Int = Hashing.murmur3_32().hashInt(h).asInt()
+  private def hashcode(h: Int): Int = Hashing.murmur3_32_fixed().hashInt(h).asInt()
 
   private def nextPowerOf2(n: Int): Int = {
     if (n == 0) {
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index 11bd2b2a3312c..802cb2667cc88 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -960,7 +960,7 @@ public void textFiles() throws IOException {
     rdd.saveAsTextFile(outputDir);
     // Read the plain text file and check it's OK
     File outputFile = new File(outputDir, "part-00000");
-    String content = Files.toString(outputFile, StandardCharsets.UTF_8);
+    String content = Files.asCharSource(outputFile, StandardCharsets.UTF_8).read();
     assertEquals("1\n2\n3\n4\n", content);
     // Also try reading it in as a text file RDD
     List<String> expected = Arrays.asList("1", "2", "3", "4");
diff --git a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
index 9b70ccdf07e1b..a9d7e8a0f2eda 100644
--- a/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/AccumulatorSuite.scala
@@ -147,7 +147,7 @@ private[spark] object AccumulatorSuite {
    * Make an `AccumulableInfo` out of an `AccumulatorV2` with the intent to use the
    * info as an accumulator update.
    */
-  def makeInfo(a: AccumulatorV2[_, _]): AccumulableInfo = a.toInfo(Some(a.value), None)
+  def makeInfo(a: AccumulatorV2[_, _]): AccumulableInfo = a.toInfoUpdate
 
   /**
    * Run one or more Spark jobs and verify that in at least one job the peak execution memory
diff --git a/core/src/test/scala/org/apache/spark/FileSuite.scala b/core/src/test/scala/org/apache/spark/FileSuite.scala
index 5651dc9b2dbdc..5f9912cbd021d 100644
--- a/core/src/test/scala/org/apache/spark/FileSuite.scala
+++ b/core/src/test/scala/org/apache/spark/FileSuite.scala
@@ -334,8 +334,8 @@ class FileSuite extends SparkFunSuite with LocalSparkContext {
 
       for (i <- 0 until 8) {
         val tempFile = new File(tempDir, s"part-0000$i")
-        Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", tempFile,
-          StandardCharsets.UTF_8)
+        Files.asCharSink(tempFile, StandardCharsets.UTF_8)
+          .write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1")
       }
 
       for (p <- Seq(1, 2, 8)) {
diff --git a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
index 934517a5b5606..ca51e61f5ed44 100644
--- a/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/JobCancellationSuite.scala
@@ -25,6 +25,7 @@ import scala.concurrent.{ExecutionContext, Future}
 // scalastyle:off executioncontextglobal
 import scala.concurrent.ExecutionContext.Implicits.global
 // scalastyle:on executioncontextglobal
+import scala.concurrent.Promise
 import scala.concurrent.duration._
 
 import org.scalatest.BeforeAndAfter
@@ -33,7 +34,7 @@ import org.scalatest.matchers.must.Matchers
 import org.apache.spark.executor.ExecutorExitCode
 import org.apache.spark.internal.config._
 import org.apache.spark.internal.config.Deploy._
-import org.apache.spark.scheduler.{SparkListener, SparkListenerExecutorRemoved, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
+import org.apache.spark.scheduler.{JobFailed, SparkListener, SparkListenerExecutorRemoved, SparkListenerJobEnd, SparkListenerJobStart, SparkListenerStageCompleted, SparkListenerTaskEnd, SparkListenerTaskStart}
 import org.apache.spark.util.ThreadUtils
 
 /**
@@ -207,7 +208,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     ThreadUtils.awaitReady(job, Duration.Inf).failed.foreach { case e: SparkException =>
       checkError(
         exception = e,
-        errorClass = "SPARK_JOB_CANCELLED",
+        condition = "SPARK_JOB_CANCELLED",
         sqlState = "XXKDA",
         parameters = scala.collection.immutable.Map(
           "jobId" -> "0",
@@ -221,7 +222,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
         sc.setJobGroup(jobGroupName, "")
         sc.parallelize(1 to 100).count()
       },
-      errorClass = "SPARK_JOB_CANCELLED",
+      condition = "SPARK_JOB_CANCELLED",
       sqlState = "XXKDA",
       parameters = scala.collection.immutable.Map(
         "jobId" -> "1",
@@ -257,7 +258,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     ThreadUtils.awaitReady(job, Duration.Inf).failed.foreach { case e: SparkException =>
       checkError(
         exception = e,
-        errorClass = "SPARK_JOB_CANCELLED",
+        condition = "SPARK_JOB_CANCELLED",
         sqlState = "XXKDA",
         parameters = scala.collection.immutable.Map(
           "jobId" -> "0",
@@ -287,7 +288,7 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
       sem.acquire(1)
       sc.cancelJobGroupAndFutureJobs(s"job-group-$idx")
       ThreadUtils.awaitReady(job, Duration.Inf).failed.foreach { case e: SparkException =>
-        assert(e.getErrorClass == "SPARK_JOB_CANCELLED")
+        assert(e.getCondition == "SPARK_JOB_CANCELLED")
       }
     }
     // submit a job with the 0 job group that was evicted from cancelledJobGroups set, it should run
@@ -613,6 +614,40 @@ class JobCancellationSuite extends SparkFunSuite with Matchers with BeforeAndAft
     f2.get()
   }
 
+  test("cancel FutureAction with custom reason") {
+
+    val cancellationPromise = Promise[Unit]()
+
+    // listener to capture job end events and their reasons
+    var failureReason: Option[String] = None
+
+    sc = new SparkContext("local[2]", "test")
+    sc.addSparkListener(new SparkListener {
+      override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+        jobEnd.jobResult match {
+          case jobFailed: JobFailed =>
+            failureReason = Some(jobFailed.exception.getMessage)
+          case _ => // do nothing
+        }
+      }
+    })
+
+    val rdd = sc.parallelize(1 to 100, 2).map(_ * 2)
+    val asyncAction = rdd.collectAsync()
+    val reason = "custom cancel reason"
+
+    Future {
+      asyncAction.cancel(Option(reason))
+      cancellationPromise.success(())
+    }
+
+    // wait for the cancellation to complete and check the reason
+    cancellationPromise.future.map { _ =>
+      Thread.sleep(1000)
+      assert(failureReason.contains(reason))
+    }
+  }
+
   test("interruptible iterator of shuffle reader") {
     // In this test case, we create a Spark job of two stages. The second stage is cancelled during
     // execution and a counter is used to make sure that the corresponding tasks are indeed
diff --git a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
index 7106a780b3256..22c6280198c9a 100644
--- a/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
+++ b/core/src/test/scala/org/apache/spark/SharedSparkContext.scala
@@ -27,7 +27,10 @@ trait SharedSparkContext extends BeforeAndAfterAll with BeforeAndAfterEach { sel
 
   def sc: SparkContext = _sc
 
-  val conf = new SparkConf(false)
+  // SPARK-49647: use `SparkConf()` instead of `SparkConf(false)` because we want to
+  // load defaults from system properties and the classpath, including default test
+  // settings specified in the SBT and Maven build definitions.
+  val conf: SparkConf = new SparkConf()
 
   /**
    * Initialize the [[SparkContext]].  Generally, this is just called from beforeAll; however, in
diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
index 12f9d2f83c777..dd42549e46d93 100644
--- a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -119,8 +119,8 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       val absolutePath2 = file2.getAbsolutePath
 
       try {
-        Files.write("somewords1", file1, StandardCharsets.UTF_8)
-        Files.write("somewords2", file2, StandardCharsets.UTF_8)
+        Files.asCharSink(file1, StandardCharsets.UTF_8).write("somewords1")
+        Files.asCharSink(file2, StandardCharsets.UTF_8).write("somewords2")
         val length1 = file1.length()
         val length2 = file2.length()
 
@@ -178,10 +178,10 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
         s"${jarFile.getParent}/../${jarFile.getParentFile.getName}/${jarFile.getName}#zoo"
 
       try {
-        Files.write("somewords1", file1, StandardCharsets.UTF_8)
-        Files.write("somewords22", file2, StandardCharsets.UTF_8)
-        Files.write("somewords333", file3, StandardCharsets.UTF_8)
-        Files.write("somewords4444", file4, StandardCharsets.UTF_8)
+        Files.asCharSink(file1, StandardCharsets.UTF_8).write("somewords1")
+        Files.asCharSink(file2, StandardCharsets.UTF_8).write("somewords22")
+        Files.asCharSink(file3, StandardCharsets.UTF_8).write("somewords333")
+        Files.asCharSink(file4, StandardCharsets.UTF_8).write("somewords4444")
         val length1 = file1.length()
         val length2 = file2.length()
         val length3 = file1.length()
@@ -373,8 +373,8 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
       assert(subdir2.mkdir())
       val file1 = new File(subdir1, "file")
       val file2 = new File(subdir2, "file")
-      Files.write("old", file1, StandardCharsets.UTF_8)
-      Files.write("new", file2, StandardCharsets.UTF_8)
+      Files.asCharSink(file1, StandardCharsets.UTF_8).write("old")
+      Files.asCharSink(file2, StandardCharsets.UTF_8).write("new")
       sc = new SparkContext("local-cluster[1,1,1024]", "test")
       sc.addFile(file1.getAbsolutePath)
       def getAddedFileContents(): String = {
@@ -503,12 +503,15 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
 
         try {
           // Create 5 text files.
-          Files.write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1", file1,
-            StandardCharsets.UTF_8)
-          Files.write("someline1 in file2\nsomeline2 in file2", file2, StandardCharsets.UTF_8)
-          Files.write("someline1 in file3", file3, StandardCharsets.UTF_8)
-          Files.write("someline1 in file4\nsomeline2 in file4", file4, StandardCharsets.UTF_8)
-          Files.write("someline1 in file2\nsomeline2 in file5", file5, StandardCharsets.UTF_8)
+          Files.asCharSink(file1, StandardCharsets.UTF_8)
+            .write("someline1 in file1\nsomeline2 in file1\nsomeline3 in file1")
+          Files.asCharSink(file2, StandardCharsets.UTF_8)
+            .write("someline1 in file2\nsomeline2 in file2")
+          Files.asCharSink(file3, StandardCharsets.UTF_8).write("someline1 in file3")
+          Files.asCharSink(file4, StandardCharsets.UTF_8)
+            .write("someline1 in file4\nsomeline2 in file4")
+          Files.asCharSink(file5, StandardCharsets.UTF_8)
+            .write("someline1 in file2\nsomeline2 in file5")
 
           sc = new SparkContext(new SparkConf().setAppName("test").setMaster("local"))
 
@@ -1420,6 +1423,43 @@ class SparkContextSuite extends SparkFunSuite with LocalSparkContext with Eventu
     sc = new SparkContext(conf)
     sc.stop()
   }
+
+  test("SPARK-50247: BLOCK_MANAGER_REREGISTRATION_FAILED should be counted as network failure") {
+    // This test case follows the test structure of HEARTBEAT_FAILURE error code (SPARK-39957)
+    val conf = new SparkConf().set(TASK_MAX_FAILURES, 1)
+    val sc = new SparkContext("local-cluster[1, 1, 1024]", "test-exit-code", conf)
+    val result = sc.parallelize(1 to 10, 1).map { x =>
+      val context = org.apache.spark.TaskContext.get()
+      if (context.taskAttemptId() == 0) {
+        System.exit(ExecutorExitCode.BLOCK_MANAGER_REREGISTRATION_FAILED)
+      } else {
+        x
+      }
+    }.count()
+    assert(result == 10L)
+    sc.stop()
+  }
+
+  test("SPARK-50247: BLOCK_MANAGER_REREGISTRATION_FAILED will be counted as task failure when " +
+    "EXECUTOR_REMOVE_DELAY is disabled") {
+    // This test case follows the test structure of HEARTBEAT_FAILURE error code (SPARK-39957)
+    val conf = new SparkConf().set(TASK_MAX_FAILURES, 1).set(EXECUTOR_REMOVE_DELAY.key, "0s")
+    val sc = new SparkContext("local-cluster[1, 1, 1024]", "test-exit-code", conf)
+    eventually(timeout(30.seconds), interval(1.seconds)) {
+      val e = intercept[SparkException] {
+        sc.parallelize(1 to 10, 1).map { x =>
+          val context = org.apache.spark.TaskContext.get()
+          if (context.taskAttemptId() == 0) {
+            System.exit(ExecutorExitCode.BLOCK_MANAGER_REREGISTRATION_FAILED)
+          } else {
+            x
+          }
+        }.count()
+      }
+      assert(e.getMessage.contains("Remote RPC client disassociated"))
+    }
+    sc.stop()
+  }
 }
 
 object SparkContextSuite {
diff --git a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
index 518c0592488fc..e38efc27b78f9 100644
--- a/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkFunSuite.scala
@@ -327,9 +327,9 @@ abstract class SparkFunSuite
   }
 
   /**
-   * Checks an exception with an error class against expected results.
+   * Checks an exception with an error condition against expected results.
    * @param exception     The exception to check
-   * @param errorClass    The expected error class identifying the error
+   * @param condition     The expected error condition identifying the error
    * @param sqlState      Optional the expected SQLSTATE, not verified if not supplied
    * @param parameters    A map of parameter names and values. The names are as defined
    *                      in the error-classes file.
@@ -338,12 +338,12 @@ abstract class SparkFunSuite
    */
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: Option[String] = None,
       parameters: Map[String, String] = Map.empty,
       matchPVals: Boolean = false,
       queryContext: Array[ExpectedContext] = Array.empty): Unit = {
-    assert(exception.getErrorClass === errorClass)
+    assert(exception.getCondition === condition)
     sqlState.foreach(state => assert(exception.getSqlState === state))
     val expectedParameters = exception.getMessageParameters.asScala
     if (matchPVals) {
@@ -380,63 +380,65 @@ abstract class SparkFunSuite
       } else if (actual.contextType() == QueryContextType.DataFrame) {
         assert(actual.fragment() === expected.fragment,
           "Invalid code fragment of a query context. Actual:" + actual.toString)
-        assert(actual.callSite().matches(expected.callSitePattern),
-          "Invalid callSite of a query context. Actual:" + actual.toString)
+        if (expected.callSitePattern.nonEmpty) {
+          assert(actual.callSite().matches(expected.callSitePattern),
+            "Invalid callSite of a query context. Actual:" + actual.toString)
+        }
       }
     }
   }
 
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: String,
       parameters: Map[String, String]): Unit =
-    checkError(exception, errorClass, Some(sqlState), parameters)
+    checkError(exception, condition, Some(sqlState), parameters)
 
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: String,
       parameters: Map[String, String],
       context: ExpectedContext): Unit =
-    checkError(exception, errorClass, Some(sqlState), parameters, false, Array(context))
+    checkError(exception, condition, Some(sqlState), parameters, false, Array(context))
 
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String],
       context: ExpectedContext): Unit =
-    checkError(exception, errorClass, None, parameters, false, Array(context))
+    checkError(exception, condition, None, parameters, false, Array(context))
 
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: String,
       context: ExpectedContext): Unit =
-    checkError(exception, errorClass, None, Map.empty, false, Array(context))
+    checkError(exception, condition, Some(sqlState), Map.empty, false, Array(context))
 
   protected def checkError(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: Option[String],
       parameters: Map[String, String],
       context: ExpectedContext): Unit =
-    checkError(exception, errorClass, sqlState, parameters,
+    checkError(exception, condition, sqlState, parameters,
       false, Array(context))
 
   protected def checkErrorMatchPVals(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String]): Unit =
-    checkError(exception, errorClass, None, parameters, matchPVals = true)
+    checkError(exception, condition, None, parameters, matchPVals = true)
 
   protected def checkErrorMatchPVals(
       exception: SparkThrowable,
-      errorClass: String,
+      condition: String,
       sqlState: Option[String],
       parameters: Map[String, String],
       context: ExpectedContext): Unit =
-    checkError(exception, errorClass, sqlState, parameters,
+    checkError(exception, condition, sqlState, parameters,
       matchPVals = true, Array(context))
 
   protected def checkErrorTableNotFound(
@@ -444,7 +446,7 @@ abstract class SparkFunSuite
       tableName: String,
       queryContext: ExpectedContext): Unit =
     checkError(exception = exception,
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      condition = "TABLE_OR_VIEW_NOT_FOUND",
       parameters = Map("relationName" -> tableName),
       queryContext = Array(queryContext))
 
@@ -452,13 +454,13 @@ abstract class SparkFunSuite
       exception: SparkThrowable,
       tableName: String): Unit =
     checkError(exception = exception,
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      condition = "TABLE_OR_VIEW_NOT_FOUND",
       parameters = Map("relationName" -> tableName))
 
   protected def checkErrorTableAlreadyExists(exception: SparkThrowable,
                                              tableName: String): Unit =
     checkError(exception = exception,
-      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
       parameters = Map("relationName" -> tableName))
 
   case class ExpectedContext(
diff --git a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
index 231cfdc3f32fc..ea845c0f93a4b 100644
--- a/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
+++ b/core/src/test/scala/org/apache/spark/SparkThrowableSuite.scala
@@ -43,16 +43,13 @@ class SparkThrowableSuite extends SparkFunSuite {
   /* Used to regenerate the error class file. Run:
    {{{
       SPARK_GENERATE_GOLDEN_FILES=1 build/sbt \
-        "core/testOnly *SparkThrowableSuite -- -t \"Error classes are correctly formatted\""
+        "core/testOnly *SparkThrowableSuite -- -t \"Error conditions are correctly formatted\""
    }}}
    */
   private val regenerateCommand = "SPARK_GENERATE_GOLDEN_FILES=1 build/sbt " +
-    "\"core/testOnly *SparkThrowableSuite -- -t \\\"Error classes match with document\\\"\""
+    "\"core/testOnly *SparkThrowableSuite -- -t \\\"Error conditions are correctly formatted\\\"\""
 
   private val errorJsonFilePath = getWorkspaceFilePath(
-    // Note that though we call them "error classes" here, the proper name is "error conditions",
-    // hence why the name of the JSON file is different. We will address this inconsistency as part
-    // of this ticket: https://issues.apache.org/jira/browse/SPARK-47429
     "common", "utils", "src", "main", "resources", "error", "error-conditions.json")
 
   private val errorReader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL))
@@ -81,8 +78,8 @@ class SparkThrowableSuite extends SparkFunSuite {
     mapper.readValue(errorJsonFilePath.toUri.toURL, new TypeReference[Map[String, ErrorInfo]]() {})
   }
 
-  test("Error classes are correctly formatted") {
-    val errorClassFileContents =
+  test("Error conditions are correctly formatted") {
+    val errorConditionFileContents =
       IOUtils.toString(errorJsonFilePath.toUri.toURL.openStream(), StandardCharsets.UTF_8)
     val mapper = JsonMapper.builder()
       .addModule(DefaultScalaModule)
@@ -96,33 +93,30 @@ class SparkThrowableSuite extends SparkFunSuite {
       .writeValueAsString(errorReader.errorInfoMap)
 
     if (regenerateGoldenFiles) {
-      if (rewrittenString.trim != errorClassFileContents.trim) {
-        val errorClassesFile = errorJsonFilePath.toFile
-        logInfo(s"Regenerating error class file $errorClassesFile")
-        Files.delete(errorClassesFile.toPath)
+      if (rewrittenString.trim != errorConditionFileContents.trim) {
+        val errorConditionsFile = errorJsonFilePath.toFile
+        logInfo(s"Regenerating error conditions file $errorConditionsFile")
+        Files.delete(errorConditionsFile.toPath)
         FileUtils.writeStringToFile(
-          errorClassesFile,
+          errorConditionsFile,
           rewrittenString + lineSeparator,
           StandardCharsets.UTF_8)
       }
     } else {
-      assert(rewrittenString.trim == errorClassFileContents.trim)
+      assert(rewrittenString.trim == errorConditionFileContents.trim)
     }
   }
 
   test("SQLSTATE is mandatory") {
-    val errorClassesNoSqlState = errorReader.errorInfoMap.filter {
+    val errorConditionsNoSqlState = errorReader.errorInfoMap.filter {
       case (error: String, info: ErrorInfo) =>
         !error.startsWith("_LEGACY_ERROR_TEMP") && info.sqlState.isEmpty
     }.keys.toSeq
-    assert(errorClassesNoSqlState.isEmpty,
-      s"Error classes without SQLSTATE: ${errorClassesNoSqlState.mkString(", ")}")
+    assert(errorConditionsNoSqlState.isEmpty,
+      s"Error classes without SQLSTATE: ${errorConditionsNoSqlState.mkString(", ")}")
   }
 
   test("Error class and error state / SQLSTATE invariants") {
-    // Unlike in the rest of the codebase, the term "error class" is used here as it is in our
-    // documentation as well as in the SQL standard. We can remove this comment as part of this
-    // ticket: https://issues.apache.org/jira/browse/SPARK-47429
     val errorClassesJson = Utils.getSparkClassLoader.getResource("error/error-classes.json")
     val errorStatesJson = Utils.getSparkClassLoader.getResource("error/error-states.json")
     val mapper = JsonMapper.builder()
@@ -171,9 +165,9 @@ class SparkThrowableSuite extends SparkFunSuite {
       .enable(SerializationFeature.INDENT_OUTPUT)
       .build()
     mapper.writeValue(tmpFile, errorReader.errorInfoMap)
-    val rereadErrorClassToInfoMap = mapper.readValue(
+    val rereadErrorConditionToInfoMap = mapper.readValue(
       tmpFile, new TypeReference[Map[String, ErrorInfo]]() {})
-    assert(rereadErrorClassToInfoMap == errorReader.errorInfoMap)
+    assert(rereadErrorConditionToInfoMap == errorReader.errorInfoMap)
   }
 
   test("Error class names should contain only capital letters, numbers and underscores") {
@@ -205,15 +199,8 @@ class SparkThrowableSuite extends SparkFunSuite {
     val e = intercept[SparkException] {
       getMessage("UNRESOLVED_COLUMN.WITHOUT_SUGGESTION", Map.empty[String, String])
     }
-    assert(e.getErrorClass === "INTERNAL_ERROR")
+    assert(e.getCondition === "INTERNAL_ERROR")
     assert(e.getMessageParameters().get("message").contains("Undefined error message parameter"))
-
-    // Does not fail with too many args (expects 0 args)
-    assert(getMessage("DIVIDE_BY_ZERO", Map("config" -> "foo", "a" -> "bar")) ==
-      "[DIVIDE_BY_ZERO] Division by zero. " +
-      "Use `try_divide` to tolerate divisor being 0 and return NULL instead. " +
-        "If necessary set foo to \"false\" " +
-        "to bypass this error. SQLSTATE: 22012")
   }
 
   test("Error message is formatted") {
@@ -258,7 +245,8 @@ class SparkThrowableSuite extends SparkFunSuite {
       throw new SparkException("Arbitrary legacy message")
     } catch {
       case e: SparkThrowable =>
-        assert(e.getErrorClass == null)
+        assert(e.getCondition == null)
+        assert(!e.isInternalError)
         assert(e.getSqlState == null)
       case _: Throwable =>
         // Should not end up here
@@ -274,7 +262,8 @@ class SparkThrowableSuite extends SparkFunSuite {
         cause = null)
     } catch {
       case e: SparkThrowable =>
-        assert(e.getErrorClass == "CANNOT_PARSE_DECIMAL")
+        assert(e.getCondition == "CANNOT_PARSE_DECIMAL")
+        assert(!e.isInternalError)
         assert(e.getSqlState == "22018")
       case _: Throwable =>
         // Should not end up here
@@ -368,7 +357,7 @@ class SparkThrowableSuite extends SparkFunSuite {
         |}""".stripMargin)
     // Legacy mode when an exception does not have any error class
     class LegacyException extends Throwable with SparkThrowable {
-      override def getErrorClass: String = null
+      override def getCondition: String = null
       override def getMessage: String = "Test message"
     }
     val e3 = new LegacyException
@@ -463,7 +452,7 @@ class SparkThrowableSuite extends SparkFunSuite {
       val e = intercept[SparkException] {
         new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
       }
-      assert(e.getErrorClass === "INTERNAL_ERROR")
+      assert(e.getCondition === "INTERNAL_ERROR")
       assert(e.getMessage.contains("DIVIDE.BY_ZERO"))
     }
 
@@ -489,8 +478,54 @@ class SparkThrowableSuite extends SparkFunSuite {
       val e = intercept[SparkException] {
         new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
       }
-      assert(e.getErrorClass === "INTERNAL_ERROR")
+      assert(e.getCondition === "INTERNAL_ERROR")
       assert(e.getMessage.contains("BY.ZERO"))
     }
   }
+
+  test("handle null values in message parameters") {
+    withTempDir { dir =>
+      val json = new File(dir, "errors.json")
+      FileUtils.writeStringToFile(json,
+        """
+          |{
+          |  "MISSING_PARAMETER" : {
+          |    "message" : [
+          |      "Parameter <param> is missing."
+          |    ]
+          |  }
+          |}
+          |""".stripMargin, StandardCharsets.UTF_8)
+
+      val reader = new ErrorClassesJsonReader(Seq(errorJsonFilePath.toUri.toURL, json.toURI.toURL))
+      // Attempt to get the error message with a null parameter
+      val errorMessage = reader.getErrorMessage("MISSING_PARAMETER", Map("param" -> null))
+
+      assert(errorMessage.contains("Parameter null is missing."))
+    }
+  }
+
+  test("detect unused message parameters") {
+    checkError(
+      exception = intercept[SparkException] {
+        SparkThrowableHelper.getMessage(
+          errorClass = "CANNOT_UP_CAST_DATATYPE",
+          messageParameters = Map(
+            "expression" -> "CAST('aaa' AS LONG)",
+            "sourceType" -> "STRING",
+            "targetType" -> "LONG",
+            "op" -> "CAST", // unused parameter
+            "details" -> "implicit cast"
+          ))
+      },
+      condition = "INTERNAL_ERROR",
+      parameters = Map(
+        "message" ->
+          ("Found unused message parameters of the error class 'CANNOT_UP_CAST_DATATYPE'. " +
+          "Its error message format has 4 placeholders, but the passed message parameters map " +
+          "has 5 items. Consider to add placeholders to the error format or " +
+          "remove unused message parameters.")
+      )
+    )
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
index e7315d6119be0..7e88c7ee684bd 100644
--- a/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
+++ b/core/src/test/scala/org/apache/spark/benchmark/Benchmark.scala
@@ -94,9 +94,11 @@ private[spark] class Benchmark(
   /**
    * Runs the benchmark and outputs the results to stdout. This should be copied and added as
    * a comment with the benchmark. Although the results vary from machine to machine, it should
-   * provide some baseline.
+   * provide some baseline. If `relativeTime` is set to `true`, the `Relative` column will be
+   * the relative time of each case relative to the first case (less is better). Otherwise, it
+   * will be the relative execution speed of each case relative to the first case (more is better).
    */
-  def run(): Unit = {
+  def run(relativeTime: Boolean = false): Unit = {
     require(benchmarks.nonEmpty)
     // scalastyle:off
     println("Running benchmark: " + name)
@@ -112,10 +114,12 @@ private[spark] class Benchmark(
     out.println(Benchmark.getJVMOSInfo())
     out.println(Benchmark.getProcessorName())
     val nameLen = Math.max(40, Math.max(name.length, benchmarks.map(_.name.length).max))
+    val relativeHeader = if (relativeTime) "Relative time" else "Relative"
     out.printf(s"%-${nameLen}s %14s %14s %11s %12s %13s %10s\n",
-      name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Rate(M/s)", "Per Row(ns)", "Relative")
+      name + ":", "Best Time(ms)", "Avg Time(ms)", "Stdev(ms)", "Rate(M/s)", "Per Row(ns)", relativeHeader)
     out.println("-" * (nameLen + 80))
     results.zip(benchmarks).foreach { case (result, benchmark) =>
+      val relative = if (relativeTime) result.bestMs / firstBest else firstBest / result.bestMs
       out.printf(s"%-${nameLen}s %14s %14s %11s %12s %13s %10s\n",
         benchmark.name,
         "%5.0f" format result.bestMs,
@@ -123,7 +127,7 @@ private[spark] class Benchmark(
         "%5.0f" format result.stdevMs,
         "%10.1f" format result.bestRate,
         "%6.1f" format (1000 / result.bestRate),
-        "%3.1fX" format (firstBest / result.bestMs))
+        "%3.1fX" format relative)
     }
     out.println()
     // scalastyle:on
diff --git a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
index 1efef3383b821..b0f36b9744fa8 100644
--- a/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
+++ b/core/src/test/scala/org/apache/spark/broadcast/BroadcastSuite.scala
@@ -317,13 +317,13 @@ class BroadcastSuite extends SparkFunSuite with LocalSparkContext with Encryptio
       // Instead, crash the driver by directly accessing the broadcast value.
       val e1 = intercept[SparkException] { broadcast.value }
       assert(e1.isInternalError)
-      assert(e1.getErrorClass == "INTERNAL_ERROR_BROADCAST")
+      assert(e1.getCondition == "INTERNAL_ERROR_BROADCAST")
       val e2 = intercept[SparkException] { broadcast.unpersist(blocking = true) }
       assert(e2.isInternalError)
-      assert(e2.getErrorClass == "INTERNAL_ERROR_BROADCAST")
+      assert(e2.getCondition == "INTERNAL_ERROR_BROADCAST")
       val e3 = intercept[SparkException] { broadcast.destroy(blocking = true) }
       assert(e3.isInternalError)
-      assert(e3.getErrorClass == "INTERNAL_ERROR_BROADCAST")
+      assert(e3.getCondition == "INTERNAL_ERROR_BROADCAST")
     } else {
       val results = sc.parallelize(1 to partitions, partitions).map(x => (x, broadcast.value.sum))
       assert(results.collect().toSet === (1 to partitions).map(x => (x, list.sum)).toSet)
@@ -339,7 +339,7 @@ package object testPackage extends Assertions {
     val thrown = intercept[SparkException] { broadcast.value }
     assert(thrown.getMessage.contains("BroadcastSuite.scala"))
     assert(thrown.isInternalError)
-    assert(thrown.getErrorClass == "INTERNAL_ERROR_BROADCAST")
+    assert(thrown.getCondition == "INTERNAL_ERROR_BROADCAST")
   }
 
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
index 98f475e2d9ce8..015a38a631461 100644
--- a/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/LogUrlsStandaloneSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy
 
-import java.net.URL
+import java.net.URI
 
 import scala.collection.mutable
 import scala.io.Source
@@ -65,7 +65,7 @@ class LogUrlsStandaloneSuite extends SparkFunSuite with LocalSparkContext {
     listener.addedExecutorInfos.values.foreach { info =>
       assert(info.logUrlMap.nonEmpty)
       info.logUrlMap.values.foreach { logUrl =>
-        assert(new URL(logUrl).getHost === SPARK_PUBLIC_DNS)
+        assert(new URI(logUrl).toURL.getHost === SPARK_PUBLIC_DNS)
       }
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
index 40d8eae644a07..ca81283e073ac 100644
--- a/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala
@@ -1802,6 +1802,23 @@ class SparkSubmitSuite
     val (_, classpath, _, _) = submit.prepareSubmitEnvironment(appArgs)
     assert(classpath.contains("."))
   }
+
+  // Requires Python dependencies for Spark Connect. Should be enabled by default.
+  ignore("Spark Connect application submission (Python)") {
+    val pyFile = File.createTempFile("remote_test", ".py")
+    pyFile.deleteOnExit()
+    val content =
+      "from pyspark.sql import SparkSession;" +
+        "spark = SparkSession.builder.getOrCreate();" +
+        "assert 'connect' in str(type(spark));" +
+        "assert spark.range(1).first()[0] == 0"
+    FileUtils.write(pyFile, content, StandardCharsets.UTF_8)
+    val args = Seq(
+      "--name", "testPyApp",
+      "--remote", "local",
+      pyFile.getAbsolutePath)
+    runSparkSubmit(args)
+  }
 }
 
 object JarCreationTest extends Logging {
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
index f34f792881f90..7501a98a1a573 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/EventLogFileReadersSuite.scala
@@ -221,7 +221,7 @@ class SingleFileEventLogFileReaderSuite extends EventLogFileReadersSuite {
       val entry = is.getNextEntry
       assert(entry != null)
       val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
-      val expected = Files.toString(new File(logPath.toString), StandardCharsets.UTF_8)
+      val expected = Files.asCharSource(new File(logPath.toString), StandardCharsets.UTF_8).read()
       assert(actual === expected)
       assert(is.getNextEntry === null)
     }
@@ -368,8 +368,8 @@ class RollingEventLogFilesReaderSuite extends EventLogFileReadersSuite {
           assert(allFileNames.contains(fileName))
 
           val actual = new String(ByteStreams.toByteArray(is), StandardCharsets.UTF_8)
-          val expected = Files.toString(new File(logPath.toString, fileName),
-            StandardCharsets.UTF_8)
+          val expected = Files.asCharSource(
+            new File(logPath.toString, fileName), StandardCharsets.UTF_8).read()
           assert(actual === expected)
         }
       }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
index 3013a5bf4a294..852f94bda870d 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/FsHistoryProviderSuite.scala
@@ -708,7 +708,8 @@ abstract class FsHistoryProviderSuite extends SparkFunSuite with Matchers with P
       while (entry != null) {
         val actual = new String(ByteStreams.toByteArray(inputStream), StandardCharsets.UTF_8)
         val expected =
-          Files.toString(logs.find(_.getName == entry.getName).get, StandardCharsets.UTF_8)
+          Files.asCharSource(logs.find(_.getName == entry.getName).get, StandardCharsets.UTF_8)
+            .read()
         actual should be (expected)
         totalEntries += 1
         entry = inputStream.getNextEntry
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
index 2b9b110a41424..807e5ec3e823e 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerArgumentsSuite.scala
@@ -45,8 +45,8 @@ class HistoryServerArgumentsSuite extends SparkFunSuite {
   test("Properties File Arguments Parsing --properties-file") {
     withTempDir { tmpDir =>
       val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
-      Files.write("spark.test.CustomPropertyA blah\n" +
-        "spark.test.CustomPropertyB notblah\n", outFile, UTF_8)
+      Files.asCharSink(outFile, UTF_8).write("spark.test.CustomPropertyA blah\n" +
+        "spark.test.CustomPropertyB notblah\n")
       val argStrings = Array("--properties-file", outFile.getAbsolutePath)
       val hsa = new HistoryServerArguments(conf, argStrings)
       assert(conf.get("spark.test.CustomPropertyA") === "blah")
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala
index 83ef300460b95..100145a2f4833 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerPageSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.history
 
-import java.net.URL
+import java.net.URI
 
 import jakarta.servlet.http.HttpServletResponse
 import org.json4s.DefaultFormats
@@ -43,12 +43,13 @@ class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
   private val localhost: String = Utils.localHostNameForURI()
   private var port: Int = -1
 
-  private def startHistoryServer(logDir: String): Unit = {
+  private def startHistoryServer(logDir: String, title: Option[String] = None): Unit = {
     assert(server.isEmpty)
     val conf = new SparkConf()
       .set(HISTORY_LOG_DIR, logDir)
       .set(UPDATE_INTERVAL_S.key, "0")
       .set(IS_TESTING, true)
+    title.foreach(conf.set(HISTORY_SERVER_UI_TITLE.key, _))
     val provider = new FsHistoryProvider(conf)
     provider.checkForLogs()
     val securityManager = HistoryServer.createSecurityManager(conf)
@@ -71,7 +72,7 @@ class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
       ApplicationStatus.COMPLETED.toString.toLowerCase()
     }
     val (code, jsonOpt, errOpt) = HistoryServerSuite.getContentAndCode(
-      new URL(s"http://$localhost:$port/api/v1/applications?status=$param")
+      new URI(s"http://$localhost:$port/api/v1/applications?status=$param").toURL
     )
     assert(code == HttpServletResponse.SC_OK)
     assert(jsonOpt.isDefined)
@@ -100,4 +101,19 @@ class HistoryServerPageSuite extends SparkFunSuite with BeforeAndAfter {
       stopHistoryServer()
     }
   }
+
+  test("SPARK-49128: Support custom History Server UI title") {
+    Seq(None, Some("Custom History Server Title")).foreach { title =>
+      startHistoryServer(logDirs.head, title)
+      val page = new HistoryPage(server.get)
+      val (code, htmlOpt, errOpt) = HistoryServerSuite.getContentAndCode(
+        new URI(s"http://$localhost:$port/").toURL
+      )
+      assert(code == HttpServletResponse.SC_OK)
+      val expected = title.getOrElse("History Server")
+      assert(htmlOpt.isDefined && htmlOpt.get.contains(s"<title>$expected</title>"))
+      assert(errOpt.isEmpty)
+      stopHistoryServer()
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
index 91a93bbe01d7f..6b2bd90cd4314 100644
--- a/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/history/HistoryServerSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.deploy.history
 
 import java.io.{File, FileInputStream, FileWriter, InputStream, IOException}
-import java.net.{HttpURLConnection, URL}
+import java.net.{HttpURLConnection, URI, URL}
 import java.nio.charset.StandardCharsets
 import java.util.zip.ZipInputStream
 
@@ -261,9 +261,9 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
 
     val url = attemptId match {
       case Some(id) =>
-        new URL(s"${generateURL(s"applications/$appId")}/$id/logs")
+        new URI(s"${generateURL(s"applications/$appId")}/$id/logs").toURL
       case None =>
-        new URL(s"${generateURL(s"applications/$appId")}/logs")
+        new URI(s"${generateURL(s"applications/$appId")}/logs").toURL
     }
 
     val (code, inputStream, error) = HistoryServerSuite.connectAndGetInputStream(url)
@@ -283,7 +283,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
         val expectedFile = {
           new File(logDir, entry.getName)
         }
-        val expected = Files.toString(expectedFile, StandardCharsets.UTF_8)
+        val expected = Files.asCharSource(expectedFile, StandardCharsets.UTF_8).read()
         val actual = new String(ByteStreams.toByteArray(zipStream), StandardCharsets.UTF_8)
         actual should be (expected)
         filesCompared += 1
@@ -433,12 +433,12 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
 
     // build a URL for an app or app/attempt plus a page underneath
     def buildURL(appId: String, suffix: String): URL = {
-      new URL(s"http://$localhost:$port/history/$appId$suffix")
+      new URI(s"http://$localhost:$port/history/$appId$suffix").toURL
     }
 
     // build a rest URL for the application and suffix.
     def applications(appId: String, suffix: String): URL = {
-      new URL(s"http://$localhost:$port/api/v1/applications/$appId$suffix")
+      new URI(s"http://$localhost:$port/api/v1/applications/$appId$suffix").toURL
     }
 
     // start initial job
@@ -601,7 +601,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
     tests.foreach { case (user, expectedCode) =>
       testUrls.foreach { url =>
         val headers = if (user != null) Seq(FakeAuthFilter.FAKE_HTTP_USER -> user) else Nil
-        val sc = TestUtils.httpResponseCode(new URL(url), headers = headers)
+        val sc = TestUtils.httpResponseCode(new URI(url).toURL, headers = headers)
         assert(sc === expectedCode, s"Unexpected status code $sc for $url (user = $user)")
       }
     }
@@ -620,7 +620,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
       s"http://$localhost:$port/api/v1/applications/$appId/2/logs")
 
     testUrls.foreach { url =>
-      TestUtils.httpResponseCode(new URL(url))
+      TestUtils.httpResponseCode(new URI(url).toURL)
     }
     assert(server.cacheMetrics.loadCount.getCount === 0, "downloading event log shouldn't load ui")
   }
@@ -642,7 +642,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
         case _ =>
           assert(location.stripSuffix("/") === url.toString)
       }
-      HistoryServerSuite.getUrl(new URL(location))
+      HistoryServerSuite.getUrl(new URI(location).toURL)
     }
   }
 
@@ -676,14 +676,14 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
   def buildPageAttemptUrl(appId: String, attemptId: Option[Int]): URL = {
     attemptId match {
       case Some(id) =>
-        new URL(s"http://$localhost:$port/history/$appId/$id")
+        new URI(s"http://$localhost:$port/history/$appId/$id").toURL
       case None =>
-        new URL(s"http://$localhost:$port/history/$appId")
+        new URI(s"http://$localhost:$port/history/$appId").toURL
     }
   }
 
   def getContentAndCode(path: String, port: Int = port): (Int, Option[String], Option[String]) = {
-    HistoryServerSuite.getContentAndCode(new URL(s"http://$localhost:$port/api/v1/$path"))
+    HistoryServerSuite.getContentAndCode(new URI(s"http://$localhost:$port/api/v1/$path").toURL)
   }
 
   def getUrl(path: String): String = {
@@ -691,7 +691,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
   }
 
   def generateURL(path: String): URL = {
-    new URL(s"http://$localhost:$port/api/v1/$path")
+    new URI(s"http://$localhost:$port/api/v1/$path").toURL
   }
 
   def generateExpectation(name: String, path: String): Unit = {
@@ -706,7 +706,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
   test("SPARK-31697: HistoryServer should set Content-Type") {
     val port = server.boundPort
     val nonExistenceAppId = "local-non-existence"
-    val url = new URL(s"http://$localhost:$port/history/$nonExistenceAppId")
+    val url = new URI(s"http://$localhost:$port/history/$nonExistenceAppId").toURL
     val conn = url.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod("GET")
     conn.connect()
@@ -717,7 +717,7 @@ abstract class HistoryServerSuite extends SparkFunSuite with BeforeAndAfter with
 
   test("Redirect to the root page when accessed to /history/") {
     val port = server.boundPort
-    val url = new URL(s"http://$localhost:$port/history/")
+    val url = new URI(s"http://$localhost:$port/history/").toURL
     val conn = url.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod("GET")
     conn.setUseCaches(false)
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterDecommisionSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterDecommisionSuite.scala
index 8c17324d2e38c..19ddb633c9f5c 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterDecommisionSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterDecommisionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.deploy.master
 
-import java.net.{HttpURLConnection, URL}
+import java.net.{HttpURLConnection, URI}
 
 import scala.concurrent.duration._
 
@@ -38,7 +38,7 @@ class MasterDecommisionSuite extends MasterSuiteBase {
     val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
     try {
       eventually(timeout(30.seconds), interval(100.milliseconds)) {
-        val url = new URL(s"$masterUrl/workers/kill/?host=${Utils.localHostNameForURI()}")
+        val url = new URI(s"$masterUrl/workers/kill/?host=${Utils.localHostNameForURI()}").toURL
         val conn = url.openConnection().asInstanceOf[HttpURLConnection]
         conn.setRequestMethod("POST")
         assert(conn.getResponseCode === 405)
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
index e75c4ca88069d..e64bc724cfba0 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuite.scala
@@ -183,6 +183,28 @@ class MasterSuite extends MasterSuiteBase {
     assert(master.invokePrivate(_newDriverId(submitDate)) === "my-driver-00001")
   }
 
+  test("SPARK-50208: Use driverID as appName in javaOpts") {
+    val master = makeMaster(new SparkConf().set(MASTER_USE_DRIVER_ID_AS_APP_NAME, true))
+    val command = Command(
+      "org.apache.spark.deploy.worker.DriverWrapper",
+      Seq("{{WORKER_URL}}", "{{USER_JAR}}", "mainClass"),
+      Map.empty, Seq.empty, Seq.empty, Seq.empty)
+    val desc = DriverDescription("", 1, 1, false, command)
+    val result = master.invokePrivate(_maybeUpdateAppName(desc, "driver-id"))
+    assert(result.command.javaOpts.contains("-Dspark.app.name=driver-id"))
+  }
+
+  test("SPARK-50208: Use driverID as appName in arguments") {
+    val master = makeMaster(new SparkConf().set(MASTER_USE_DRIVER_ID_AS_APP_NAME, true))
+    val command = Command(
+      "org.apache.spark.deploy.worker.DriverWrapper",
+      Seq("{{WORKER_URL}}", "{{USER_JAR}}", "org.apache.spark.deploy.SparkSubmit", "pi.py"),
+      Map.empty, Seq.empty, Seq.empty, Seq.empty)
+    val desc = DriverDescription("", 1, 1, false, command)
+    val result = master.invokePrivate(_maybeUpdateAppName(desc, "driver-id"))
+    assert(result.command.arguments.contains("spark.app.name=driver-id"))
+  }
+
   test("SPARK-45753: Prevent invalid driver id patterns") {
     val m = intercept[IllegalArgumentException] {
       makeMaster(new SparkConf().set(DRIVER_ID_PATTERN, "my driver"))
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuiteBase.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuiteBase.scala
index 629112a27463c..2e159b8288845 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterSuiteBase.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterSuiteBase.scala
@@ -443,6 +443,8 @@ trait MasterSuiteBase extends SparkFunSuite
   private val _state = PrivateMethod[RecoveryState.Value](Symbol("state"))
   protected val _newDriverId = PrivateMethod[String](Symbol("newDriverId"))
   protected val _newApplicationId = PrivateMethod[String](Symbol("newApplicationId"))
+  protected val _maybeUpdateAppName =
+    PrivateMethod[DriverDescription](Symbol("maybeUpdateAppName"))
   protected val _createApplication = PrivateMethod[ApplicationInfo](Symbol("createApplication"))
   protected val _persistenceEngine = PrivateMethod[PersistenceEngine](Symbol("persistenceEngine"))
 
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/MasterWorkerUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/MasterWorkerUISuite.scala
index 428539068a107..5f206f611fe6b 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/MasterWorkerUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/MasterWorkerUISuite.scala
@@ -140,4 +140,22 @@ class MasterWorkerUISuite extends MasterSuiteBase {
       System.getProperties().remove("spark.ui.proxyBase")
     }
   }
+
+  test("SPARK-49007: Support custom master web ui title") {
+    implicit val formats = org.json4s.DefaultFormats
+    val title = "Spark Custom Title"
+    val conf = new SparkConf().set(MASTER_UI_TITLE, title)
+    val localCluster = LocalSparkCluster(2, 2, 512, conf)
+    localCluster.start()
+    val masterUrl = s"http://${Utils.localHostNameForURI()}:${localCluster.masterWebUIPort}"
+    try {
+      eventually(timeout(50.seconds), interval(100.milliseconds)) {
+        val html = Utils
+          .tryWithResource(Source.fromURL(s"$masterUrl/"))(_.getLines().mkString("\n"))
+        html should include (title)
+      }
+    } finally {
+      localCluster.stop()
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala
index a9b96f85808d0..ccfc4ee1600a5 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/ApplicationPageSuite.scala
@@ -36,13 +36,16 @@ class ApplicationPageSuite extends SparkFunSuite {
 
   private val rp = new ResourceProfile(Map.empty, Map.empty)
   private val desc = ApplicationDescription("name", Some(4), null, "appUiUrl", rp)
+  private val descWithoutUI = ApplicationDescription("name", Some(4), null, "", rp)
   private val appFinished = new ApplicationInfo(0, "app-finished", desc, new Date, null, 1)
   appFinished.markFinished(ApplicationState.FINISHED)
   private val appLive = new ApplicationInfo(0, "app-live", desc, new Date, null, 1)
+  private val appLiveWithoutUI =
+    new ApplicationInfo(0, "app-live-without-ui", descWithoutUI, new Date, null, 1)
 
   private val state = mock(classOf[MasterStateResponse])
   when(state.completedApps).thenReturn(Array(appFinished))
-  when(state.activeApps).thenReturn(Array(appLive))
+  when(state.activeApps).thenReturn(Array(appLive, appLiveWithoutUI))
 
   private val rpc = mock(classOf[RpcEndpointRef])
   when(rpc.askSync[MasterStateResponse](RequestMasterState)).thenReturn(state)
@@ -61,6 +64,16 @@ class ApplicationPageSuite extends SparkFunSuite {
     assert(!result.contains(master.historyServerUrl.get))
   }
 
+  test("SPARK-50021: Application Detail UI is empty when spark.ui.enabled=false") {
+    val request = mock(classOf[HttpServletRequest])
+    when(request.getParameter("appId")).thenReturn("app-live-without-ui")
+
+    val result = new ApplicationPage(masterWebUI).render(request).toString()
+    assert(result.contains("Application UI:</strong> Disabled"))
+    assert(!result.contains("Application History UI"))
+    assert(!result.contains(master.historyServerUrl.get))
+  }
+
   test("SPARK-45774: Application History UI") {
     val request = mock(classOf[HttpServletRequest])
     when(request.getParameter("appId")).thenReturn("app-finished")
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
index caefab9e0746b..5e75d1c424eab 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/MasterWebUISuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.deploy.master.ui
 
 import java.io.DataOutputStream
-import java.net.{HttpURLConnection, URL}
+import java.net.{HttpURLConnection, URI}
 import java.nio.charset.StandardCharsets
 import java.util.Date
 
@@ -125,7 +125,7 @@ object MasterWebUISuite {
       url: String,
       method: String,
       body: String = ""): HttpURLConnection = {
-    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
+    val conn = new URI(url).toURL.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod(method)
     if (body.nonEmpty) {
       conn.setDoOutput(true)
diff --git a/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
index 20ff932eb01a3..2679349bfe028 100644
--- a/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/master/ui/ReadOnlyMasterWebUISuite.scala
@@ -17,24 +17,31 @@
 
 package org.apache.spark.deploy.master.ui
 
+import java.util.Date
+
 import scala.io.Source
 
 import jakarta.servlet.http.HttpServletResponse.{SC_METHOD_NOT_ALLOWED, SC_OK}
 import org.mockito.Mockito.{mock, when}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite}
+import org.apache.spark.deploy.DeployMessages.{MasterStateResponse, RequestMasterState}
 import org.apache.spark.deploy.master._
 import org.apache.spark.deploy.master.ui.MasterWebUISuite._
 import org.apache.spark.internal.config.DECOMMISSION_ENABLED
+import org.apache.spark.internal.config.UI.MASTER_UI_VISIBLE_ENV_VAR_PREFIXES
 import org.apache.spark.internal.config.UI.UI_KILL_ENABLED
 import org.apache.spark.rpc.{RpcEndpointRef, RpcEnv}
 import org.apache.spark.util.Utils
 
 class ReadOnlyMasterWebUISuite extends SparkFunSuite {
 
+  import org.apache.spark.deploy.DeployTestUtils._
+
   val conf = new SparkConf()
     .set(UI_KILL_ENABLED, false)
     .set(DECOMMISSION_ENABLED, false)
+    .set(MASTER_UI_VISIBLE_ENV_VAR_PREFIXES.key, "SPARK_SCALA_")
   val securityMgr = new SecurityManager(conf)
   val rpcEnv = mock(classOf[RpcEnv])
   val master = mock(classOf[Master])
@@ -43,6 +50,14 @@ class ReadOnlyMasterWebUISuite extends SparkFunSuite {
   when(master.conf).thenReturn(conf)
   when(master.rpcEnv).thenReturn(rpcEnv)
   when(master.self).thenReturn(masterEndpointRef)
+  val desc1 = createAppDesc().copy(name = "WithUI")
+  val desc2 = desc1.copy(name = "WithoutUI", appUiUrl = "")
+  val app1 = new ApplicationInfo(new Date().getTime, "app1", desc1, new Date(), null, Int.MaxValue)
+  val app2 = new ApplicationInfo(new Date().getTime, "app2", desc2, new Date(), null, Int.MaxValue)
+  val state = new MasterStateResponse(
+    "host", 8080, None, Array.empty, Array(app1, app2), Array.empty,
+    Array.empty, Array.empty, RecoveryState.ALIVE)
+  when(masterEndpointRef.askSync[MasterStateResponse](RequestMasterState)).thenReturn(state)
   val masterWebUI = new MasterWebUI(master, 0)
 
   override def beforeAll(): Unit = {
@@ -86,4 +101,22 @@ class ReadOnlyMasterWebUISuite extends SparkFunSuite {
     assert(result.contains("Spark Properties"))
     assert(result.contains("Hadoop Properties"))
   }
+
+  test("SPARK-49206: Add 'Environment Variables' table to Master 'EnvironmentPage'") {
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/environment/"
+    val conn = sendHttpRequest(url, "GET", "")
+    assert(conn.getResponseCode === SC_OK)
+    val result = Source.fromInputStream(conn.getInputStream).mkString
+    assert(result.contains("Environment Variables"))
+    assert(result.contains("<tr><td>SPARK_SCALA_VERSION</td><td>2.1"))
+  }
+
+  test("SPARK-50022: Fix 'MasterPage' to hide App UI links when UI is disabled") {
+    val url = s"http://${Utils.localHostNameForURI()}:${masterWebUI.boundPort}/"
+    val conn = sendHttpRequest(url, "GET")
+    assert(conn.getResponseCode === SC_OK)
+    val result = Source.fromInputStream(conn.getInputStream).mkString
+    assert(result.contains("<a href=\"appUiUrl\">WithUI</a>"))
+    assert(result.contains("  WithoutUI\n"))
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
index 43a5f7a784f53..a155e4cc3ac90 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/StandaloneRestSubmitSuite.scala
@@ -18,12 +18,15 @@
 package org.apache.spark.deploy.rest
 
 import java.io.DataOutputStream
-import java.net.{HttpURLConnection, URL}
+import java.net.{HttpURLConnection, URI}
 import java.nio.charset.StandardCharsets
+import java.util.Base64
 
 import scala.collection.mutable
 
 import jakarta.servlet.http.HttpServletResponse
+import org.eclipse.jetty.util.thread.QueuedThreadPool
+import org.eclipse.jetty.util.thread.ThreadPool.SizedThreadPool
 import org.json4s.JsonAST._
 import org.json4s.jackson.JsonMethods._
 
@@ -32,6 +35,7 @@ import org.apache.spark.deploy.{SparkSubmit, SparkSubmitArguments}
 import org.apache.spark.deploy.DeployMessages._
 import org.apache.spark.deploy.master.DriverState._
 import org.apache.spark.deploy.master.RecoveryState
+import org.apache.spark.internal.config.{MASTER_REST_SERVER_FILTERS, MASTER_REST_SERVER_MAX_THREADS, MASTER_REST_SERVER_VIRTUAL_THREADS}
 import org.apache.spark.rpc._
 import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
@@ -92,7 +96,6 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
     val RANDOM_PORT = 9000
     val allMasters = s"$masterUrl,${Utils.localHostName()}:$RANDOM_PORT"
     conf.set("spark.master", allMasters)
-    conf.set("spark.app.name", "dreamer")
     val appArgs = Array("one", "two", "six")
     // main method calls this
     val response = new RestSubmissionClientApp().run("app-resource", "main-class", appArgs, conf)
@@ -110,7 +113,6 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
     val masterUrl = startDummyServer(submitId = submittedDriverId, submitMessage = submitMessage)
     val conf = new SparkConf(loadDefaults = false)
     conf.set("spark.master", masterUrl)
-    conf.set("spark.app.name", "dreamer")
     val appArgs = Array("one", "two", "six")
     // main method calls this
     val response = new RestSubmissionClientApp().run("app-resource", "main-class", appArgs, conf)
@@ -445,6 +447,30 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
     assert(filteredVariables == Map("SPARK_VAR" -> "1"))
   }
 
+  test("SPARK-49033: Support server-side environment variable replacement in REST Submission API") {
+    val request = new CreateSubmissionRequest
+    request.appResource = ""
+    request.mainClass = ""
+    request.appArgs = Array.empty[String]
+    request.sparkProperties = Map.empty[String, String]
+    request.environmentVariables = Map("AWS_ENDPOINT_URL" -> "{{SPARK_SCALA_VERSION}}")
+    val servlet = new StandaloneSubmitRequestServlet(null, null, null)
+    val desc = servlet.buildDriverDescription(request, "spark://master:7077", 6066)
+    assert(desc.command.environment.get("AWS_ENDPOINT_URL") === Some("2.13"))
+  }
+
+  test("SPARK-49034: Support server-side sparkProperties replacement in REST Submission API") {
+    val request = new CreateSubmissionRequest
+    request.appResource = ""
+    request.mainClass = ""
+    request.appArgs = Array.empty[String]
+    request.sparkProperties = Map("spark.hadoop.fs.s3a.endpoint" -> "{{SPARK_SCALA_VERSION}}")
+    request.environmentVariables = Map.empty[String, String]
+    val servlet = new StandaloneSubmitRequestServlet(null, null, null)
+    val desc = servlet.buildDriverDescription(request, "spark://master:7077", 6066)
+    assert(desc.command.javaOpts.exists(_.contains("-Dspark.hadoop.fs.s3a.endpoint=2.13")))
+  }
+
   test("SPARK-45197: Make StandaloneRestServer add JavaModuleOptions to drivers") {
     val request = new CreateSubmissionRequest
     request.appResource = ""
@@ -457,6 +483,100 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
     assert(desc.command.javaOpts.exists(_.startsWith("--add-opens")))
   }
 
+  test("SPARK-49103: `spark.master.rest.filters` loads filters successfully") {
+    val conf = new SparkConf()
+    val localhost = Utils.localHostName()
+    val securityManager = new SecurityManager(conf)
+    rpcEnv = Some(RpcEnv.create("rest-with-filter", localhost, 0, conf, securityManager))
+    val fakeMasterRef = rpcEnv.get.setupEndpoint("fake-master", new DummyMaster(rpcEnv.get))
+
+    // Causes exceptions in order to verify new configuration loads filters successfully
+    conf.set(MASTER_REST_SERVER_FILTERS.key, "org.apache.spark.ui.JWSFilter")
+    server = Some(new StandaloneRestServer(localhost, 0, conf, fakeMasterRef, "spark://fake:7077"))
+    val m = intercept[IllegalArgumentException] {
+      server.get.start()
+    }.getMessage()
+    assert(m.contains("Decode argument cannot be null"))
+  }
+
+  private val TEST_KEY = Base64.getUrlEncoder.encodeToString(
+    "Visit https://spark.apache.org to download Apache Spark.".getBytes())
+
+  test("SPARK-49103: REST server stars successfully with `spark.master.rest.filters`") {
+    val conf = new SparkConf()
+    val localhost = Utils.localHostName()
+    val securityManager = new SecurityManager(conf)
+    rpcEnv = Some(RpcEnv.create("rest-with-filter", localhost, 0, conf, securityManager))
+    val fakeMasterRef = rpcEnv.get.setupEndpoint("fake-master", new DummyMaster(rpcEnv.get))
+    conf.set(MASTER_REST_SERVER_FILTERS.key, "org.apache.spark.ui.JWSFilter")
+    conf.set("spark.org.apache.spark.ui.JWSFilter.param.secretKey", TEST_KEY)
+    server = Some(new StandaloneRestServer(localhost, 0, conf, fakeMasterRef, "spark://fake:7077"))
+    server.get.start()
+  }
+
+  test("SPARK-49103: JWSFilter successfully protects REST API via configurations") {
+    val conf = new SparkConf()
+    val localhost = Utils.localHostName()
+    val securityManager = new SecurityManager(conf)
+    rpcEnv = Some(RpcEnv.create("rest-with-filter", localhost, 0, conf, securityManager))
+    val fakeMasterRef = rpcEnv.get.setupEndpoint("fake-master", new DummyMaster(rpcEnv.get))
+    conf.set(MASTER_REST_SERVER_FILTERS.key, "org.apache.spark.ui.JWSFilter")
+    conf.set("spark.org.apache.spark.ui.JWSFilter.param.secretKey", TEST_KEY)
+    server = Some(new StandaloneRestServer(localhost, 0, conf, fakeMasterRef, "spark://fake:7077"))
+    val port = server.get.start()
+    val masterUrl = s"spark://$localhost:$port"
+    val json = constructSubmitRequest(masterUrl).toJson
+    val httpUrl = masterUrl.replace("spark://", "http://")
+    val submitRequestPath = s"$httpUrl/${RestSubmissionServer.PROTOCOL_VERSION}/submissions/create"
+    val conn = sendHttpRequest(submitRequestPath, "POST", json)
+    assert(conn.getResponseCode === HttpServletResponse.SC_FORBIDDEN)
+  }
+
+  test("SPARK-50195: Fix StandaloneRestServer to propagate app name to SparkSubmit properly") {
+    Seq((classOf[SparkSubmit].getName, Seq("-c", "spark.app.name=app1")),
+        ("", Seq.empty)).foreach { case (mainClass, expectedArguments) =>
+      val request = new CreateSubmissionRequest
+      request.appResource = ""
+      request.mainClass = mainClass
+      request.appArgs = Array.empty[String]
+      request.sparkProperties = Map("spark.app.name" -> "app1")
+      request.environmentVariables = Map.empty[String, String]
+      val servlet = new StandaloneSubmitRequestServlet(null, null, null)
+      val desc = servlet.buildDriverDescription(request, "spark://master:7077", 6066)
+      assert(desc.command.arguments.slice(3, 5) === expectedArguments)
+    }
+  }
+
+  test("SPARK-50381: Support spark.master.rest.maxThreads") {
+    val conf = new SparkConf()
+    val localhost = Utils.localHostName()
+    val securityManager = new SecurityManager(conf)
+    rpcEnv = Some(RpcEnv.create("rest-with-maxThreads", localhost, 0, conf, securityManager))
+    val fakeMasterRef = rpcEnv.get.setupEndpoint("fake-master", new DummyMaster(rpcEnv.get))
+    conf.set(MASTER_REST_SERVER_MAX_THREADS, 2000)
+    server = Some(new StandaloneRestServer(localhost, 0, conf, fakeMasterRef, "spark://fake:7077"))
+    server.get.start()
+    val pool = server.get._server.get.getThreadPool.asInstanceOf[SizedThreadPool]
+    assert(pool.getMaxThreads === 2000)
+  }
+
+  test("SPARK-50383: Support spark.master.rest.virtualThread.enabled") {
+    val conf = new SparkConf()
+    val localhost = Utils.localHostName()
+    val securityManager = new SecurityManager(conf)
+    rpcEnv = Some(RpcEnv.create("rest-with-virtualThreads", localhost, 0, conf, securityManager))
+    val fakeMasterRef = rpcEnv.get.setupEndpoint("fake-master", new DummyMaster(rpcEnv.get))
+    conf.set(MASTER_REST_SERVER_VIRTUAL_THREADS, true)
+    server = Some(new StandaloneRestServer(localhost, 0, conf, fakeMasterRef, "spark://fake:7077"))
+    server.get.start()
+    val pool = server.get._server.get.getThreadPool.asInstanceOf[QueuedThreadPool]
+    if (Utils.isJavaVersionAtLeast21) {
+      assert(pool.getVirtualThreadsExecutor != null)
+    } else {
+      assert(pool.getVirtualThreadsExecutor == null)
+    }
+  }
+
   /* --------------------- *
    |     Helper methods    |
    * --------------------- */
@@ -599,7 +719,7 @@ class StandaloneRestSubmitSuite extends SparkFunSuite {
       url: String,
       method: String,
       body: String = ""): HttpURLConnection = {
-    val conn = new URL(url).openConnection().asInstanceOf[HttpURLConnection]
+    val conn = new URI(url).toURL.openConnection().asInstanceOf[HttpURLConnection]
     conn.setRequestMethod(method)
     if (body.nonEmpty) {
       conn.setDoOutput(true)
diff --git a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
index 9eb5172583120..f2807f258f2d1 100644
--- a/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/deploy/rest/SubmitRestProtocolSuite.scala
@@ -87,8 +87,6 @@ class SubmitRestProtocolSuite extends SparkFunSuite {
     message.clientSparkVersion = "1.2.3"
     message.appResource = "honey-walnut-cherry.jar"
     message.mainClass = "org.apache.spark.examples.SparkPie"
-    message.appArgs = Array("two slices")
-    message.environmentVariables = Map("PATH" -> "/dev/null")
     val conf = new SparkConf(false)
     conf.set("spark.app.name", "SparkPie")
     message.sparkProperties = conf.getAll.toMap
diff --git a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
index 7bd84c810c42e..8b98df103c014 100644
--- a/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/CoarseGrainedExecutorBackendSuite.scala
@@ -37,7 +37,7 @@ import org.scalatestplus.mockito.MockitoSugar
 import org.apache.spark._
 import org.apache.spark.TestUtils._
 import org.apache.spark.api.plugin.{DriverPlugin, ExecutorPlugin, PluginContext, SparkPlugin}
-import org.apache.spark.internal.config.PLUGINS
+import org.apache.spark.internal.config.{EXECUTOR_MEMORY, PLUGINS}
 import org.apache.spark.resource._
 import org.apache.spark.resource.ResourceUtils._
 import org.apache.spark.resource.TestResourceIDs._
@@ -581,7 +581,8 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
    */
   test("SPARK-40320 Executor should exit when initialization failed for fatal error") {
     val conf = createSparkConf()
-      .setMaster("local-cluster[1, 1, 1024]")
+      .setMaster("local-cluster[1, 1, 512]")
+      .set(EXECUTOR_MEMORY.key, "512m")
       .set(PLUGINS, Seq(classOf[TestFatalErrorPlugin].getName))
       .setAppName("test")
     sc = new SparkContext(conf)
@@ -599,7 +600,7 @@ class CoarseGrainedExecutorBackendSuite extends SparkFunSuite
     }
     try {
       sc.addSparkListener(listener)
-      eventually(timeout(15.seconds)) {
+      eventually(timeout(30.seconds)) {
         assert(executorAddCounter.get() >= 2)
         assert(executorRemovedCounter.get() >= 2)
       }
diff --git a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
index 805e7ca467497..fa13092dc47aa 100644
--- a/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/executor/ExecutorSuite.scala
@@ -21,7 +21,7 @@ import java.io.{Externalizable, ObjectInput, ObjectOutput}
 import java.lang.Thread.UncaughtExceptionHandler
 import java.net.URL
 import java.nio.ByteBuffer
-import java.util.Properties
+import java.util.{HashMap, Properties}
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.concurrent.atomic.AtomicBoolean
 
@@ -522,7 +522,13 @@ class ExecutorSuite extends SparkFunSuite
       testThrowable(new OutOfMemoryError(), depthToCheck, isFatal = true)
       testThrowable(new InterruptedException(), depthToCheck, isFatal = false)
       testThrowable(new RuntimeException("test"), depthToCheck, isFatal = false)
-      testThrowable(new SparkOutOfMemoryError("test"), depthToCheck, isFatal = false)
+      testThrowable(
+        new SparkOutOfMemoryError(
+          "_LEGACY_ERROR_USER_RAISED_EXCEPTION",
+          new HashMap[String, String]() {
+            put("errorMessage", "test")
+          }),
+        depthToCheck, isFatal = false)
     }
 
     // Verify we can handle the cycle in the exception chain
diff --git a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
index 79fa8d21bf3f1..fc8f48df2cb7d 100644
--- a/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/internal/plugin/PluginContainerSuite.scala
@@ -383,7 +383,7 @@ object NonLocalModeSparkPlugin {
       resources: Map[String, ResourceInformation]): Unit = {
     val path = conf.get(TEST_PATH_CONF)
     val strToWrite = createFileStringWithGpuAddrs(id, resources)
-    Files.write(strToWrite, new File(path, s"$filePrefix$id"), StandardCharsets.UTF_8)
+    Files.asCharSink(new File(path, s"$filePrefix$id"), StandardCharsets.UTF_8).write(strToWrite)
   }
 
   def reset(): Unit = {
diff --git a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
index 5c09a1f965b9e..ff971b72d8910 100644
--- a/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
+++ b/core/src/test/scala/org/apache/spark/io/CompressionCodecSuite.scala
@@ -132,7 +132,7 @@ class CompressionCodecSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         CompressionCodec.createCodec(conf, "foobar")
       },
-      errorClass = "CODEC_NOT_AVAILABLE.WITH_CONF_SUGGESTION",
+      condition = "CODEC_NOT_AVAILABLE.WITH_CONF_SUGGESTION",
       parameters = Map(
         "codecName" -> "foobar",
         "configKey" -> "\"spark.io.compression.codec\"",
@@ -171,7 +171,7 @@ class CompressionCodecSuite extends SparkFunSuite {
         exception = intercept[SparkIllegalArgumentException] {
           CompressionCodec.getShortName(codecClass.toUpperCase(Locale.ROOT))
         },
-        errorClass = "CODEC_SHORT_NAME_NOT_FOUND",
+        condition = "CODEC_SHORT_NAME_NOT_FOUND",
         parameters = Map("codecName" -> codecClass.toUpperCase(Locale.ROOT)))
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
index a6f1707a1aabf..1f40ef944a843 100644
--- a/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/memory/MemoryManagerSuite.scala
@@ -335,6 +335,24 @@ private[memory] trait MemoryManagerSuite extends SparkFunSuite {
     tMemManager.releaseExecutionMemory(500L, c)
     assert(tMemManager.getMemoryConsumptionForThisTask === 0L)
   }
+
+  test("task peak execution memory usage") {
+    val memoryManager = createMemoryManager(
+      maxOnHeapExecutionMemory = 1000L,
+      maxOffHeapExecutionMemory = 1000L)
+
+    val tMemManager = new TaskMemoryManager(memoryManager, 1)
+    val offHeapConsumer = new TestMemoryConsumer(tMemManager, MemoryMode.OFF_HEAP)
+    val onHeapConsumer = new TestMemoryConsumer(tMemManager, MemoryMode.ON_HEAP)
+
+    val result1 = tMemManager.acquireExecutionMemory(500L, offHeapConsumer)
+    val result2 = tMemManager.acquireExecutionMemory(400L, onHeapConsumer)
+    assert(result1 === 500L)
+    assert(result2 === 400L)
+    assert(tMemManager.getMemoryConsumptionForThisTask === 900L)
+    assert(tMemManager.getPeakOnHeapExecutionMemory === 400L)
+    assert(tMemManager.getPeakOffHeapExecutionMemory === 500L)
+  }
 }
 
 private object MemoryManagerSuite {
diff --git a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
index 55d82aed5c3f2..817d660763361 100644
--- a/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
+++ b/core/src/test/scala/org/apache/spark/metrics/sink/GraphiteSinkSuite.scala
@@ -88,7 +88,7 @@ class GraphiteSinkSuite extends SparkFunSuite {
     val e = intercept[SparkException] {
       new GraphiteSink(props, registry)
     }
-    checkError(e, errorClass = "GRAPHITE_SINK_PROPERTY_MISSING",
+    checkError(e, condition = "GRAPHITE_SINK_PROPERTY_MISSING",
       parameters = Map("property" -> "host"))
   }
 
@@ -100,7 +100,7 @@ class GraphiteSinkSuite extends SparkFunSuite {
     val e = intercept[SparkException] {
       new GraphiteSink(props, registry)
     }
-    checkError(e, errorClass = "GRAPHITE_SINK_PROPERTY_MISSING",
+    checkError(e, condition = "GRAPHITE_SINK_PROPERTY_MISSING",
       parameters = Map("property" -> "port"))
   }
 
@@ -115,7 +115,7 @@ class GraphiteSinkSuite extends SparkFunSuite {
       exception = intercept[SparkException] {
         new GraphiteSink(props, registry)
       },
-      errorClass = "GRAPHITE_SINK_INVALID_PROTOCOL",
+      condition = "GRAPHITE_SINK_INVALID_PROTOCOL",
       parameters = Map("protocol" -> "http")
     )
   }
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
index 4239180ba6c37..fb2bb83cb7fc4 100644
--- a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -201,10 +201,10 @@ class AsyncRDDActionsSuite extends SparkFunSuite with TimeLimits {
 
   test("FutureAction result, timeout") {
     val f = sc.parallelize(1 to 100, 4)
-              .mapPartitions(itr => { Thread.sleep(20); itr })
+              .mapPartitions(itr => { Thread.sleep(200); itr })
               .countAsync()
     intercept[TimeoutException] {
-      ThreadUtils.awaitResult(f, Duration(20, "milliseconds"))
+      ThreadUtils.awaitResult(f, Duration(2, "milliseconds"))
     }
   }
 
diff --git a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
index 7c5db914cd5ba..aecb8b99d0e31 100644
--- a/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/RDDSuite.scala
@@ -792,7 +792,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
   test("randomSplit") {
     val n = 600
     val data = sc.parallelize(1 to n, 2)
-    for(seed <- 1 to 5) {
+    for (seed <- 1 to 5) {
       val splits = data.randomSplit(Array(1.0, 2.0, 3.0), seed)
       assert(splits.length == 3, "wrong number of splits")
       assert(splits.flatMap(_.collect()).sorted.toList == data.collect().toList,
@@ -922,7 +922,7 @@ class RDDSuite extends SparkFunSuite with SharedSparkContext with Eventually {
       exception = intercept[SparkIllegalArgumentException] {
         rdd1.cartesian(rdd2).partitions
       },
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
       sqlState = "54000",
       parameters = Map(
         "numberOfElements" -> (numSlices.toLong * numSlices.toLong).toString,
diff --git a/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala b/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala
index ff7d680352177..edf138df9e207 100644
--- a/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala
+++ b/core/src/test/scala/org/apache/spark/resource/ResourceDiscoveryPluginSuite.scala
@@ -148,7 +148,7 @@ object TestResourceDiscoveryPlugin {
   def writeFile(conf: SparkConf, id: String): Unit = {
     val path = conf.get(TEST_PATH_CONF)
     val fileName = s"$id - ${UUID.randomUUID.toString}"
-    Files.write(id, new File(path, fileName), StandardCharsets.UTF_8)
+    Files.asCharSink(new File(path, fileName), StandardCharsets.UTF_8).write(id)
   }
 }
 
diff --git a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
index 3ef382573517b..66b1ee7b58ac8 100644
--- a/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rpc/RpcEnvSuite.scala
@@ -868,23 +868,23 @@ abstract class RpcEnvSuite extends SparkFunSuite {
         val conf = createSparkConf()
 
         val file = new File(tempDir, "file")
-        Files.write(UUID.randomUUID().toString(), file, UTF_8)
+        Files.asCharSink(file, UTF_8).write(UUID.randomUUID().toString)
         val fileWithSpecialChars = new File(tempDir, "file name")
-        Files.write(UUID.randomUUID().toString(), fileWithSpecialChars, UTF_8)
+        Files.asCharSink(fileWithSpecialChars, UTF_8).write(UUID.randomUUID().toString)
         val empty = new File(tempDir, "empty")
-        Files.write("", empty, UTF_8);
+        Files.asCharSink(empty, UTF_8).write("")
         val jar = new File(tempDir, "jar")
-        Files.write(UUID.randomUUID().toString(), jar, UTF_8)
+        Files.asCharSink(jar, UTF_8).write(UUID.randomUUID().toString)
 
         val dir1 = new File(tempDir, "dir1")
         assert(dir1.mkdir())
         val subFile1 = new File(dir1, "file1")
-        Files.write(UUID.randomUUID().toString(), subFile1, UTF_8)
+        Files.asCharSink(subFile1, UTF_8).write(UUID.randomUUID().toString)
 
         val dir2 = new File(tempDir, "dir2")
         assert(dir2.mkdir())
         val subFile2 = new File(dir2, "file2")
-        Files.write(UUID.randomUUID().toString(), subFile2, UTF_8)
+        Files.asCharSink(subFile2, UTF_8).write(UUID.randomUUID().toString)
 
         val fileUri = env.fileServer.addFile(file)
         val fileWithSpecialCharsUri = env.fileServer.addFile(fileWithSpecialChars)
diff --git a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
index 849832c57edaa..f00fb0d2cfa3f 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/BarrierTaskContextSuite.scala
@@ -101,7 +101,7 @@ class BarrierTaskContextSuite extends SparkFunSuite with LocalSparkContext with
     val rdd2 = rdd.barrier().mapPartitions { it =>
       val context = BarrierTaskContext.get()
       // Sleep for a random time before global sync.
-      Thread.sleep(Random.nextInt(1000))
+      Thread.sleep(Random.nextInt(500))
       context.barrier()
       val time1 = System.currentTimeMillis()
       // Sleep for a random time before global sync.
diff --git a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
index 978ceb16b376c..243d33fe55a79 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/DAGSchedulerSuite.scala
@@ -779,7 +779,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     assert(failureReason.isDefined)
     checkError(
       exception = failureReason.get.asInstanceOf[SparkException],
-      errorClass = "SPARK_JOB_CANCELLED",
+      condition = "SPARK_JOB_CANCELLED",
       sqlState = "XXKDA",
       parameters = scala.collection.immutable.Map("jobId" -> "0", "reason" -> "")
     )
@@ -901,7 +901,7 @@ class DAGSchedulerSuite extends SparkFunSuite with TempLocalSparkContext with Ti
     cancel(jobId)
     checkError(
       exception = failure.asInstanceOf[SparkException],
-      errorClass = "SPARK_JOB_CANCELLED",
+      condition = "SPARK_JOB_CANCELLED",
       sqlState = "XXKDA",
       parameters = scala.collection.immutable.Map("jobId" -> jobId.toString, "reason" -> "")
     )
diff --git a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
index e7a57c22ef66e..478e578130fcb 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/HealthTrackerSuite.scala
@@ -441,6 +441,23 @@ class HealthTrackerSuite extends SparkFunSuite with MockitoSugar with LocalSpark
     assert(1000 === HealthTracker.getExcludeOnFailureTimeout(conf))
   }
 
+  test("SPARK-49252: check exclusion enabling config on the application level") {
+    val conf = new SparkConf().setMaster("local")
+    assert(!HealthTracker.isExcludeOnFailureEnabled(conf))
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED, true)
+    assert(HealthTracker.isExcludeOnFailureEnabled(conf))
+    // Turn off taskset level exclusion, application level healthtracker should still be enabled.
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE, false)
+    assert(HealthTracker.isExcludeOnFailureEnabled(conf))
+    // Turn off the application level exclusion specifically, this overrides the global setting.
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED_APPLICATION, false)
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE, false)
+    assert(!HealthTracker.isExcludeOnFailureEnabled(conf))
+    // Turn on application level exclusion, health tracker should be enabled.
+    conf.set(config.EXCLUDE_ON_FAILURE_ENABLED_APPLICATION, true)
+    assert(HealthTracker.isExcludeOnFailureEnabled(conf))
+  }
+
   test("check exclude configuration invariants") {
     val conf = new SparkConf().setMaster("yarn").set(config.SUBMIT_DEPLOY_MODE, "cluster")
     Seq(
diff --git a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
index ab2c00e368468..7607d4d9fe6d9 100644
--- a/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/scheduler/TaskSetManagerSuite.scala
@@ -2725,6 +2725,39 @@ class TaskSetManagerSuite
     assert(executorMonitor.isExecutorIdle("exec2"))
   }
 
+  test("SPARK-49252: TaskSetExcludeList can be created without HealthTracker") {
+    // When the excludeOnFailure.enabled is set to true, the TaskSetManager should create a
+    // TaskSetExcludelist even if the application level HealthTracker is not defined.
+    val conf = new SparkConf().set(config.EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE, true)
+
+    // Create a task with two executors.
+    sc = new SparkContext("local", "test", conf)
+    sched = new FakeTaskScheduler(sc)
+    val taskSet = FakeTask.createTaskSet(1)
+
+    val taskSetManager = new TaskSetManager(sched, taskSet, 1,
+      // No application level HealthTracker.
+      healthTracker = None)
+    assert(taskSetManager.taskSetExcludelistHelperOpt.isDefined)
+  }
+
+  test("SPARK-49252: TaskSetExcludeList will be running in dry run mode when" +
+    "exludeOnFailure at taskset level is disabled but health tracker is enabled") {
+    // Disable the excludeOnFailure.enabled at taskset level.
+    val conf = new SparkConf().set(config.EXCLUDE_ON_FAILURE_ENABLED_TASK_AND_STAGE, false)
+
+    // Create a task with two executors.
+    sc = new SparkContext("local", "test", conf)
+    sched = new FakeTaskScheduler(sc)
+    val taskSet = FakeTask.createTaskSet(1)
+
+    val taskSetManager = new TaskSetManager(sched, taskSet, 1,
+      // Enable the application level HealthTracker.
+      healthTracker = Some(new HealthTracker(sc, None)))
+    assert(taskSetManager.taskSetExcludelistHelperOpt.isDefined)
+    assert(taskSetManager.taskSetExcludelistHelperOpt.get.isDryRun)
+  }
+
 }
 
 class FakeLongTasks(stageId: Int, partitionId: Int) extends FakeTask(stageId, partitionId) {
diff --git a/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
new file mode 100644
index 0000000000000..16a50fabb7ffd
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/shuffle/ChecksumBenchmark.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.shuffle
+
+import java.util.zip.{Adler32, CRC32, CRC32C}
+
+import org.apache.hadoop.util.PureJavaCrc32C
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+
+/**
+ * Benchmark for Checksum Algorithms used by shuffle.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class> <spark core test jar>
+ *   2. build/sbt "core/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/ChecksumBenchmark-results.txt".
+ * }}}
+ */
+object ChecksumBenchmark extends BenchmarkBase {
+
+  val N = 1024
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("Benchmark Checksum Algorithms") {
+      val data: Array[Byte] = (1 until 32 * 1024 * 1024).map(_.toByte).toArray
+      val benchmark = new Benchmark("Checksum Algorithms", N, 3, output = output)
+      benchmark.addCase("CRC32") { _ =>
+        (1 to N).foreach(_ => new CRC32().update(data))
+      }
+      benchmark.addCase(s"CRC32C") { _ =>
+        (1 to N).foreach(_ => new CRC32C().update(data))
+      }
+      benchmark.addCase(s"Adler32") { _ =>
+        (1 to N).foreach(_ => new Adler32().update(data))
+      }
+      benchmark.addCase(s"hadoop PureJavaCrc32C") { _ =>
+        (1 to N).foreach(_ => new PureJavaCrc32C().update(data))
+      }
+      benchmark.run()
+    }
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
index 5b6fb31d598ac..aad649b7b2612 100644
--- a/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
+++ b/core/src/test/scala/org/apache/spark/shuffle/sort/ShuffleExternalSorterSuite.scala
@@ -111,7 +111,7 @@ class ShuffleExternalSorterSuite extends SparkFunSuite with LocalSparkContext wi
       exception = intercept[SparkOutOfMemoryError] {
         sorter.insertRecord(bytes, Platform.BYTE_ARRAY_OFFSET, 1, 0)
       },
-      errorClass = "UNABLE_TO_ACQUIRE_MEMORY",
+      condition = "UNABLE_TO_ACQUIRE_MEMORY",
       parameters = Map("requestedBytes" -> "800", "receivedBytes" -> "400"))
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
index febe1ac4bb4cf..1c4c00c03a470 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerDecommissionIntegrationSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.storage
 
 import java.io.File
+import java.nio.file.{Files, Paths}
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, Semaphore, TimeUnit}
 
 import scala.collection.mutable.ArrayBuffer
@@ -377,20 +378,22 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
       .set(config.STORAGE_DECOMMISSION_SHUFFLE_BLOCKS_ENABLED, true)
     sc = new SparkContext(conf)
     TestUtils.waitUntilExecutorsUp(sc, 2, 60000)
-    val shuffleBlockUpdates = new ArrayBuffer[BlockId]()
-    var isDecommissionedExecutorRemoved = false
+    val shuffleBlockUpdates = new ConcurrentLinkedQueue[BlockId]()
     val execToDecommission = sc.getExecutorIds().head
+    val decommissionedExecutorLocalDir = sc.parallelize(1 to 100, 10).flatMap {  _ =>
+      if (SparkEnv.get.executorId == execToDecommission) {
+        SparkEnv.get.blockManager.getLocalDiskDirs
+      } else {
+        Array.empty[String]
+      }
+    }.collect().toSet
+    assert(decommissionedExecutorLocalDir.size == 1)
     sc.addSparkListener(new SparkListener {
       override def onBlockUpdated(blockUpdated: SparkListenerBlockUpdated): Unit = {
         if (blockUpdated.blockUpdatedInfo.blockId.isShuffle) {
-          shuffleBlockUpdates += blockUpdated.blockUpdatedInfo.blockId
+          shuffleBlockUpdates.add(blockUpdated.blockUpdatedInfo.blockId)
         }
       }
-
-      override def onExecutorRemoved(executorRemoved: SparkListenerExecutorRemoved): Unit = {
-        assert(execToDecommission === executorRemoved.executorId)
-        isDecommissionedExecutorRemoved = true
-      }
     })
 
     // Run a job to create shuffle data
@@ -409,12 +412,13 @@ class BlockManagerDecommissionIntegrationSuite extends SparkFunSuite with LocalS
       )
 
     eventually(timeout(1.minute), interval(10.milliseconds)) {
-      assert(isDecommissionedExecutorRemoved)
+      assert(Files.notExists(Paths.get(decommissionedExecutorLocalDir.head)))
       // Ensure there are shuffle data have been migrated
       assert(shuffleBlockUpdates.size >= 2)
     }
 
     val shuffleId = shuffleBlockUpdates
+      .asScala
       .find(_.isInstanceOf[ShuffleIndexBlockId])
       .map(_.asInstanceOf[ShuffleIndexBlockId].shuffleId)
       .get
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
index 1fbc900727c4c..f5fca56e5ef77 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerReplicationSuite.scala
@@ -38,6 +38,8 @@ import org.apache.spark.internal.config.Tests._
 import org.apache.spark.memory.UnifiedMemoryManager
 import org.apache.spark.network.BlockTransferService
 import org.apache.spark.network.netty.NettyBlockTransferService
+import org.apache.spark.network.shuffle.ExternalBlockStoreClient
+import org.apache.spark.network.util.{MapConfigProvider, TransportConf}
 import org.apache.spark.rpc.RpcEnv
 import org.apache.spark.scheduler.LiveListenerBus
 import org.apache.spark.serializer.{KryoSerializer, SerializerManager}
@@ -296,6 +298,41 @@ trait BlockManagerReplicationBehavior extends SparkFunSuite
     }
   }
 
+  test("Test block location after replication with SHUFFLE_SERVICE_FETCH_RDD_ENABLED enabled") {
+    val newConf = conf.clone()
+    newConf.set(SHUFFLE_SERVICE_ENABLED, true)
+    newConf.set(SHUFFLE_SERVICE_FETCH_RDD_ENABLED, true)
+    newConf.set(Tests.TEST_SKIP_ESS_REGISTER, true)
+    val blockManagerInfo = new mutable.HashMap[BlockManagerId, BlockManagerInfo]()
+    val shuffleClient = Some(new ExternalBlockStoreClient(
+        new TransportConf("shuffle", MapConfigProvider.EMPTY),
+        null, false, 5000))
+    master = new BlockManagerMaster(rpcEnv.setupEndpoint("blockmanager-2",
+      new BlockManagerMasterEndpoint(rpcEnv, true, newConf,
+        new LiveListenerBus(newConf), shuffleClient, blockManagerInfo, mapOutputTracker,
+        sc.env.shuffleManager, isDriver = true)),
+      rpcEnv.setupEndpoint("blockmanagerHeartbeat-2",
+      new BlockManagerMasterHeartbeatEndpoint(rpcEnv, true, blockManagerInfo)), newConf, true)
+
+    val shuffleServicePort = newConf.get(SHUFFLE_SERVICE_PORT)
+    val store1 = makeBlockManager(10000, "host-1")
+    val store2 = makeBlockManager(10000, "host-2")
+    assert(master.getPeers(store1.blockManagerId).toSet === Set(store2.blockManagerId))
+
+    val blockId = RDDBlockId(1, 2)
+    val message = new Array[Byte](1000)
+
+    // if SHUFFLE_SERVICE_FETCH_RDD_ENABLED is enabled, then shuffle port should be present.
+    store1.putSingle(blockId, message, StorageLevel.DISK_ONLY)
+    assert(master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+
+    // after block is removed, shuffle port should be removed.
+    store1.removeBlock(blockId, true)
+    assert(!master.getLocations(blockId).contains(
+      BlockManagerId("host-1", "localhost", shuffleServicePort, None)))
+  }
+
   test("block replication - addition and deletion of block managers") {
     val blockSize = 1000
     val storeSize = 10000
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 17dff20dd993b..9fbe15402c8b3 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -33,7 +33,7 @@ import scala.reflect.classTag
 
 import com.esotericsoftware.kryo.KryoException
 import org.mockito.{ArgumentCaptor, ArgumentMatchers => mc}
-import org.mockito.Mockito.{doAnswer, mock, never, spy, times, verify, when}
+import org.mockito.Mockito.{atLeastOnce, doAnswer, mock, never, spy, times, verify, when}
 import org.scalatest.PrivateMethodTester
 import org.scalatest.concurrent.{Signaler, ThreadSignaler, TimeLimits}
 import org.scalatest.concurrent.Eventually._
@@ -698,7 +698,7 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
       removedFromMemory: Boolean,
       removedFromDisk: Boolean): Unit = {
     def assertSizeReported(captor: ArgumentCaptor[Long], expectRemoved: Boolean): Unit = {
-      assert(captor.getAllValues().size() === 1)
+      assert(captor.getAllValues().size() >= 1)
       if (expectRemoved) {
         assert(captor.getValue() > 0)
       } else {
@@ -708,15 +708,18 @@ class BlockManagerSuite extends SparkFunSuite with Matchers with PrivateMethodTe
 
     val memSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
     val diskSizeCaptor = ArgumentCaptor.forClass(classOf[Long]).asInstanceOf[ArgumentCaptor[Long]]
-    verify(master).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), memSizeCaptor.capture(), diskSizeCaptor.capture())
+    val storageLevelCaptor =
+      ArgumentCaptor.forClass(classOf[StorageLevel]).asInstanceOf[ArgumentCaptor[StorageLevel]]
+    verify(master, atLeastOnce()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
+      storageLevelCaptor.capture(), memSizeCaptor.capture(), diskSizeCaptor.capture())
     assertSizeReported(memSizeCaptor, removedFromMemory)
     assertSizeReported(diskSizeCaptor, removedFromDisk)
+    assert(storageLevelCaptor.getValue.replication == 0)
   }
 
   private def assertUpdateBlockInfoNotReported(store: BlockManager, blockId: BlockId): Unit = {
     verify(master, never()).updateBlockInfo(mc.eq(store.blockManagerId), mc.eq(blockId),
-      mc.eq(StorageLevel.NONE), mc.anyInt(), mc.anyInt())
+      mc.any[StorageLevel](), mc.anyInt(), mc.anyInt())
   }
 
   test("reregistration on heart beat") {
diff --git a/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala b/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
index 9ce4acc75ec43..8f15e8cf19412 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockReplicationPolicySuite.scala
@@ -128,7 +128,7 @@ class TopologyAwareBlockReplicationPolicyBehavior extends RandomBlockReplication
       assert(prioritizedPeers.toSet.size == numReplicas)
       val priorityPeers = prioritizedPeers.take(2)
       assert(priorityPeers.forall(p => p.host != blockManager.host))
-      if(numReplicas > 1) {
+      if (numReplicas > 1) {
         // both these conditions should be satisfied when numReplicas > 1
         assert(priorityPeers.exists(p => p.topologyInfo == blockManager.topologyInfo))
         assert(priorityPeers.exists(p => p.topologyInfo != blockManager.topologyInfo))
diff --git a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
index ca77d2c7b7097..033bd9d244cff 100644
--- a/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/ShuffleBlockFetcherIteratorSuite.scala
@@ -1943,4 +1943,31 @@ class ShuffleBlockFetcherIteratorSuite extends SparkFunSuite with PrivateMethodT
 
     assert(err2.getMessage.contains("corrupt at reset"))
   }
+
+  test("SPARK-43242: Fix throw 'Unexpected type of BlockId' in shuffle corruption diagnose") {
+    val remoteBmId = BlockManagerId("test-client-1", "test-client-1", 2)
+    val blocks = Map[BlockId, ManagedBuffer](
+      ShuffleBlockBatchId(0, 0, 0, 3) -> createMockManagedBuffer())
+    answerFetchBlocks { invocation =>
+      val listener = invocation.getArgument[BlockFetchingListener](4)
+      listener.onBlockFetchSuccess(ShuffleBlockBatchId(0, 0, 0, 3).toString, mockCorruptBuffer())
+    }
+
+    val logAppender = new LogAppender("diagnose corruption")
+    withLogAppender(logAppender) {
+      val iterator = createShuffleBlockIteratorWithDefaults(
+        Map(remoteBmId -> toBlockList(blocks.keys, 1L, 0)),
+        streamWrapperLimitSize = Some(100)
+      )
+      intercept[FetchFailedException](iterator.next())
+      verify(transfer, times(2))
+        .fetchBlocks(any(), any(), any(), any(), any(), any())
+      assert(logAppender.loggingEvents.count(
+        _.getMessage.getFormattedMessage.contains("Start corruption diagnosis")) === 1)
+      assert(logAppender.loggingEvents.exists(
+        _.getMessage.getFormattedMessage.contains("shuffle_0_0_0_3 is corrupted " +
+          "but corruption diagnosis is skipped due to lack of " +
+          "shuffle checksum support for ShuffleBlockBatchId")))
+    }
+  }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/JWSFilterSuite.scala b/core/src/test/scala/org/apache/spark/ui/JWSFilterSuite.scala
new file mode 100644
index 0000000000000..7338054c158c6
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/ui/JWSFilterSuite.scala
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ui
+
+import java.util.{Base64, HashMap => JHashMap}
+
+import scala.jdk.CollectionConverters._
+
+import jakarta.servlet.{FilterChain, FilterConfig, ServletContext}
+import jakarta.servlet.http.{HttpServletRequest, HttpServletResponse}
+import org.mockito.ArgumentMatchers.{any, eq => meq}
+import org.mockito.Mockito.{mock, times, verify, when}
+
+import org.apache.spark._
+
+class JWSFilterSuite extends SparkFunSuite {
+  // {"alg":"HS256","typ":"JWT"} => eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9, {} => e30
+  private val TOKEN =
+      "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.e30.4EKWlOkobpaAPR0J4BE0cPQ-ZD1tRQKLZp1vtE7upPw"
+
+  private val TEST_KEY = Base64.getUrlEncoder.encodeToString(
+      "Visit https://spark.apache.org to download Apache Spark.".getBytes())
+
+  test("Should fail when a parameter is missing") {
+    val filter = new JWSFilter()
+    val params = new JHashMap[String, String]
+    val m = intercept[IllegalArgumentException] {
+      filter.init(new DummyFilterConfig(params))
+    }.getMessage()
+    assert(m.contains("Decode argument cannot be null"))
+  }
+
+  test("Succeed to initialize") {
+    val filter = new JWSFilter()
+    val params = new JHashMap[String, String]
+    params.put("secretKey", TEST_KEY)
+    filter.init(new DummyFilterConfig(params))
+  }
+
+  test("Should response with SC_FORBIDDEN when it cannot verify JWS") {
+    val req = mockRequest()
+    val res = mock(classOf[HttpServletResponse])
+    val chain = mock(classOf[FilterChain])
+
+    val filter = new JWSFilter()
+    val params = new JHashMap[String, String]
+    params.put("secretKey", TEST_KEY)
+    val conf = new DummyFilterConfig(params)
+    filter.init(conf)
+
+    // 'Authorization' header is missing
+    filter.doFilter(req, res, chain)
+    verify(res).sendError(meq(HttpServletResponse.SC_FORBIDDEN),
+      meq("Authorization header is missing."))
+    verify(chain, times(0)).doFilter(any(), any())
+
+    // The value of Authorization field is not 'Bearer <token>' style.
+    when(req.getHeader("Authorization")).thenReturn("Invalid")
+    filter.doFilter(req, res, chain)
+    verify(res).sendError(meq(HttpServletResponse.SC_FORBIDDEN),
+      meq("Malformed Authorization header."))
+    verify(chain, times(0)).doFilter(any(), any())
+  }
+
+  test("Should succeed on valid JWS") {
+    val req = mockRequest()
+    val res = mock(classOf[HttpServletResponse])
+    val chain = mock(classOf[FilterChain])
+
+    val filter = new JWSFilter()
+    val params = new JHashMap[String, String]
+    params.put("secretKey", TEST_KEY)
+    val conf = new DummyFilterConfig(params)
+    filter.init(conf)
+
+    when(req.getHeader("Authorization")).thenReturn(s"Bearer $TOKEN")
+    filter.doFilter(req, res, chain)
+    verify(chain, times(1)).doFilter(any(), any())
+  }
+
+  private def mockRequest(params: Map[String, Array[String]] = Map()): HttpServletRequest = {
+    val req = mock(classOf[HttpServletRequest])
+    when(req.getParameterMap()).thenReturn(params.asJava)
+    req
+  }
+
+  class DummyFilterConfig (val map: java.util.Map[String, String]) extends FilterConfig {
+    override def getFilterName: String = "dummy"
+
+    override def getInitParameter(arg0: String): String = map.get(arg0)
+
+    override def getInitParameterNames: java.util.Enumeration[String] =
+      java.util.Collections.enumeration(map.keySet)
+
+    override def getServletContext: ServletContext = null
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
index 327715b5fa2b9..18a8b50e1df19 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISeleniumSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.ui
 
-import java.net.URL
+import java.net.{URI, URL}
 import java.util.Locale
 
 import scala.io.Source
@@ -544,8 +544,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
       eventually(timeout(5.seconds), interval(50.milliseconds)) {
-        val url = new URL(
-          sc.ui.get.webUrl.stripSuffix("/") + "/stages/stage/kill/?id=0")
+        val url = new URI(
+          sc.ui.get.webUrl.stripSuffix("/") + "/stages/stage/kill/?id=0").toURL
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
         TestUtils.httpResponseCode(url, "GET") should be (200)
         TestUtils.httpResponseCode(url, "POST") should be (200)
@@ -557,8 +557,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
     withSpark(newSparkContext(killEnabled = true)) { sc =>
       sc.parallelize(1 to 10).map{x => Thread.sleep(10000); x}.countAsync()
       eventually(timeout(5.seconds), interval(50.milliseconds)) {
-        val url = new URL(
-          sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/kill/?id=0")
+        val url = new URI(
+          sc.ui.get.webUrl.stripSuffix("/") + "/jobs/job/kill/?id=0").toURL
         // SPARK-6846: should be POST only but YARN AM doesn't proxy POST
         TestUtils.httpResponseCode(url, "GET") should be (200)
         TestUtils.httpResponseCode(url, "POST") should be (200)
@@ -692,8 +692,8 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
 
   test("live UI json application list") {
     withSpark(newSparkContext()) { sc =>
-      val appListRawJson = HistoryServerSuite.getUrl(new URL(
-        sc.ui.get.webUrl + "/api/v1/applications"))
+      val appListRawJson = HistoryServerSuite.getUrl(new URI(
+        sc.ui.get.webUrl + "/api/v1/applications").toURL)
       val appListJsonAst = JsonMethods.parse(appListRawJson)
       appListJsonAst.children.length should be (1)
       val attempts = (appListJsonAst.children.head \ "attempts").children
@@ -918,6 +918,6 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser with Matchers {
   }
 
   def apiUrl(ui: SparkUI, path: String): URL = {
-    new URL(ui.webUrl + "/api/v1/applications/" + ui.sc.get.applicationId + "/" + path)
+    new URI(ui.webUrl + "/api/v1/applications/" + ui.sc.get.applicationId + "/" + path).toURL
   }
 }
diff --git a/core/src/test/scala/org/apache/spark/ui/UISuite.scala b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
index 10681f22fa624..1b68ed301fb92 100644
--- a/core/src/test/scala/org/apache/spark/ui/UISuite.scala
+++ b/core/src/test/scala/org/apache/spark/ui/UISuite.scala
@@ -251,7 +251,7 @@ class UISuite extends SparkFunSuite {
     val serverInfo = JettyUtils.startJettyServer("0.0.0.0", 0, sslOptions, conf)
     try {
       val path = "/test"
-      val url = new URL(s"http://$localhost:${serverInfo.boundPort}$path/root")
+      val url = new URI(s"http://$localhost:${serverInfo.boundPort}$path/root").toURL
 
       assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_NOT_FOUND)
 
@@ -261,8 +261,10 @@ class UISuite extends SparkFunSuite {
 
       // Try a request with bad content in a parameter to make sure the security filter
       // is being added to new handlers.
+      // scalastyle:off URLConstructor
       val badRequest = new URL(
         s"http://$localhost:${serverInfo.boundPort}$path/root?bypass&invalid<=foo")
+      // scalastyle:on URLConstructor
       assert(TestUtils.httpResponseCode(badRequest) === HttpServletResponse.SC_OK)
       assert(servlet.lastRequest.getParameter("invalid<") === null)
       assert(servlet.lastRequest.getParameter("invalid&lt;") !== null)
@@ -283,7 +285,7 @@ class UISuite extends SparkFunSuite {
       val (_, ctx) = newContext("/ctx1")
       serverInfo.addHandler(ctx, securityMgr)
 
-      TestUtils.withHttpConnection(new URL(s"$serverAddr/ctx%281%29?a%5B0%5D=b")) { conn =>
+      TestUtils.withHttpConnection(new URI(s"$serverAddr/ctx%281%29?a%5B0%5D=b").toURL) { conn =>
         assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
         val location = Option(conn.getHeaderFields().get("Location"))
           .map(_.get(0)).orNull
@@ -319,7 +321,7 @@ class UISuite extends SparkFunSuite {
           s"$scheme://$localhost:$port/test1/root",
           s"$scheme://$localhost:$port/test2/root")
         urls.foreach { url =>
-          val rc = TestUtils.httpResponseCode(new URL(url))
+          val rc = TestUtils.httpResponseCode(new URI(url).toURL)
           assert(rc === expected, s"Unexpected status $rc for $url")
         }
       }
@@ -366,7 +368,7 @@ class UISuite extends SparkFunSuite {
       serverInfo.addHandler(redirect, securityMgr)
 
       // Test Jetty's built-in redirect to add the trailing slash to the context path.
-      TestUtils.withHttpConnection(new URL(s"$serverAddr/ctx1")) { conn =>
+      TestUtils.withHttpConnection(new URI(s"$serverAddr/ctx1").toURL) { conn =>
         assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
         val location = Option(conn.getHeaderFields().get("Location"))
           .map(_.get(0)).orNull
@@ -376,7 +378,7 @@ class UISuite extends SparkFunSuite {
       // Test with a URL handled by the added redirect handler, and also including a path prefix.
       val headers = Seq("X-Forwarded-Context" -> "/prefix")
       TestUtils.withHttpConnection(
-          new URL(s"$serverAddr/src/"),
+          new URI(s"$serverAddr/src/").toURL,
           headers = headers) { conn =>
         assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
         val location = Option(conn.getHeaderFields().get("Location"))
@@ -387,7 +389,7 @@ class UISuite extends SparkFunSuite {
       // Not really used by Spark, but test with a relative redirect.
       val relative = JettyUtils.createRedirectHandler("/rel", "root")
       serverInfo.addHandler(relative, securityMgr)
-      TestUtils.withHttpConnection(new URL(s"$serverAddr/rel/")) { conn =>
+      TestUtils.withHttpConnection(new URI(s"$serverAddr/rel/").toURL) { conn =>
         assert(conn.getResponseCode() === HttpServletResponse.SC_FOUND)
         val location = Option(conn.getHeaderFields().get("Location"))
           .map(_.get(0)).orNull
@@ -410,12 +412,12 @@ class UISuite extends SparkFunSuite {
       serverInfo.addHandler(ctx, securityMgr)
       val urlStr = s"http://$localhost:${serverInfo.boundPort}/ctx"
 
-      assert(TestUtils.httpResponseCode(new URL(urlStr + "/")) === HttpServletResponse.SC_OK)
+      assert(TestUtils.httpResponseCode(new URI(urlStr + "/").toURL) === HttpServletResponse.SC_OK)
 
       // In the case of trailing slash,
       // 302 should be return and the redirect URL shouuld be part of the header.
-      assert(TestUtils.redirectUrl(new URL(urlStr)) === proxyRoot + "/ctx/");
-      assert(TestUtils.httpResponseCode(new URL(urlStr)) === HttpServletResponse.SC_FOUND)
+      assert(TestUtils.redirectUrl(new URI(urlStr).toURL) === proxyRoot + "/ctx/");
+      assert(TestUtils.httpResponseCode(new URI(urlStr).toURL) === HttpServletResponse.SC_FOUND)
     } finally {
       stopServer(serverInfo)
     }
@@ -452,10 +454,11 @@ class UISuite extends SparkFunSuite {
       val sparkUI = SparkUI.create(Some(sc), sc.statusStore, sc.conf, sc.env.securityManager,
         sc.appName, "", sc.startTime)
       sparkUI.bind()
-      assert(TestUtils.httpResponseMessage(new URL(sparkUI.webUrl + "/jobs"))
+      val url = new URI(sparkUI.webUrl + "/jobs").toURL
+      assert(TestUtils.httpResponseMessage(url)
         === "Spark is starting up. Please wait a while until it's ready.")
       sparkUI.attachAllHandlers()
-      assert(TestUtils.httpResponseMessage(new URL(sparkUI.webUrl + "/jobs")).contains(sc.appName))
+      assert(TestUtils.httpResponseMessage(url).contains(sc.appName))
       sparkUI.stop()
     }
   }
diff --git a/core/src/test/scala/org/apache/spark/util/DirectByteBufferOutputStreamSuite.scala b/core/src/test/scala/org/apache/spark/util/DirectByteBufferOutputStreamSuite.scala
new file mode 100644
index 0000000000000..7fd9d1fc05c9c
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/DirectByteBufferOutputStreamSuite.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.util
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+
+class DirectByteBufferOutputStreamSuite extends SparkFunSuite {
+  test("use after close") {
+    val o = new DirectByteBufferOutputStream()
+    val size = 1000
+    o.write(new Array[Byte](size), 0, size)
+    val b = o.toByteBuffer
+    o.close()
+
+    // Using `o` after close should throw an exception rather than crashing.
+    assertThrows[SparkException] { o.write(123) }
+    assertThrows[SparkException] { o.write(new Array[Byte](size), 0, size) }
+    assertThrows[SparkException] { o.reset() }
+    assertThrows[SparkException] { o.size() }
+    assertThrows[SparkException] { o.toByteBuffer }
+
+    // Using `b` after `o` is closed may crash.
+    // val arr = new Array[Byte](size)
+    // b.get(arr)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
index 35ef0587b9b4c..4497ea1b2b798 100644
--- a/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/FileAppenderSuite.scala
@@ -54,11 +54,11 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
     val inputStream = new ByteArrayInputStream(testString.getBytes(StandardCharsets.UTF_8))
     // The `header` should not be covered
     val header = "Add header"
-    Files.write(header, testFile, StandardCharsets.UTF_8)
+    Files.asCharSink(testFile, StandardCharsets.UTF_8).write(header)
     val appender = new FileAppender(inputStream, testFile)
     inputStream.close()
     appender.awaitTermination()
-    assert(Files.toString(testFile, StandardCharsets.UTF_8) === header + testString)
+    assert(Files.asCharSource(testFile, StandardCharsets.UTF_8).read() === header + testString)
   }
 
   test("SPARK-35027: basic file appender - close stream") {
@@ -392,7 +392,7 @@ class FileAppenderSuite extends SparkFunSuite with BeforeAndAfter {
           IOUtils.closeQuietly(inputStream)
         }
       } else {
-        Files.toString(file, StandardCharsets.UTF_8)
+        Files.asCharSource(file, StandardCharsets.UTF_8).read()
       }
     }.mkString("")
     assert(allText === expectedText)
diff --git a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
index 3eae1b3278e74..30c9693e6dee3 100644
--- a/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/JsonProtocolSuite.scala
@@ -32,6 +32,7 @@ import org.scalatest.exceptions.TestFailedException
 
 import org.apache.spark._
 import org.apache.spark.executor._
+import org.apache.spark.internal.config._
 import org.apache.spark.metrics.ExecutorMetricType
 import org.apache.spark.rdd.{DeterministicLevel, RDDOperationScope}
 import org.apache.spark.resource._
@@ -276,7 +277,8 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("StageInfo backward compatibility (details, accumulables)") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L)
-    val newJson = toJsonString(JsonProtocol.stageInfoToJson(info, _, includeAccumulables = true))
+    val newJson = toJsonString(
+      JsonProtocol.stageInfoToJson(info, _, defaultOptions, includeAccumulables = true))
 
     // Fields added after 1.0.0.
     assert(info.details.nonEmpty)
@@ -294,7 +296,8 @@ class JsonProtocolSuite extends SparkFunSuite {
 
   test("StageInfo resourceProfileId") {
     val info = makeStageInfo(1, 2, 3, 4L, 5L, 5)
-    val json = toJsonString(JsonProtocol.stageInfoToJson(info, _, includeAccumulables = true))
+    val json = toJsonString(
+      JsonProtocol.stageInfoToJson(info, _, defaultOptions, includeAccumulables = true))
 
     // Fields added after 1.0.0.
     assert(info.details.nonEmpty)
@@ -471,7 +474,7 @@ class JsonProtocolSuite extends SparkFunSuite {
       stageIds.map(id => new StageInfo(id, 0, "unknown", 0, Seq.empty, Seq.empty, "unknown",
         resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID))
     val jobStart = SparkListenerJobStart(10, jobSubmissionTime, stageInfos, properties)
-    val oldEvent = toJsonString(JsonProtocol.jobStartToJson(jobStart, _)).removeField("Stage Infos")
+    val oldEvent = sparkEventToJsonString(jobStart).removeField("Stage Infos")
     val expectedJobStart =
       SparkListenerJobStart(10, jobSubmissionTime, dummyStageInfos, properties)
     assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldEvent))
@@ -483,8 +486,7 @@ class JsonProtocolSuite extends SparkFunSuite {
     val stageIds = Seq[Int](1, 2, 3, 4)
     val stageInfos = stageIds.map(x => makeStageInfo(x * 10, x * 20, x * 30, x * 40L, x * 50L))
     val jobStart = SparkListenerJobStart(11, jobSubmissionTime, stageInfos, properties)
-    val oldStartEvent = toJsonString(JsonProtocol.jobStartToJson(jobStart, _))
-      .removeField("Submission Time")
+    val oldStartEvent = sparkEventToJsonString(jobStart).removeField("Submission Time")
     val expectedJobStart = SparkListenerJobStart(11, -1, stageInfos, properties)
     assertEquals(expectedJobStart, JsonProtocol.jobStartFromJson(oldStartEvent))
 
@@ -519,8 +521,9 @@ class JsonProtocolSuite extends SparkFunSuite {
     val stageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq(1, 2, 3), "details",
       resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     val oldStageInfo =
-      toJsonString(JsonProtocol.stageInfoToJson(stageInfo, _, includeAccumulables = true))
-        .removeField("Parent IDs")
+      toJsonString(
+        JsonProtocol.stageInfoToJson(stageInfo, _, defaultOptions, includeAccumulables = true)
+      ).removeField("Parent IDs")
     val expectedStageInfo = new StageInfo(1, 1, "me-stage", 1, Seq.empty, Seq.empty, "details",
       resourceProfileId = ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID)
     assertEquals(expectedStageInfo, JsonProtocol.stageInfoFromJson(oldStageInfo))
@@ -785,6 +788,87 @@ class JsonProtocolSuite extends SparkFunSuite {
     assert(JsonProtocol.sparkEventFromJson(unknownFieldsJson) === expected)
   }
 
+  test("SPARK-42204: spark.eventLog.includeTaskMetricsAccumulators config") {
+    val includeConf = new JsonProtocolOptions(
+      new SparkConf().set(EVENT_LOG_INCLUDE_TASK_METRICS_ACCUMULATORS, true))
+    val excludeConf = new JsonProtocolOptions(
+      new SparkConf().set(EVENT_LOG_INCLUDE_TASK_METRICS_ACCUMULATORS, false))
+
+    val taskMetricsAccumulables = TaskMetrics
+      .empty
+      .nameToAccums
+      .view
+      .filterKeys(!JsonProtocol.accumulableExcludeList.contains(_))
+      .values
+      .map(_.toInfo(Some(1), None))
+      .toSeq
+
+    val taskInfoWithTaskMetricsAccums = makeTaskInfo(222L, 333, 1, 333, 444L, false)
+    taskInfoWithTaskMetricsAccums.setAccumulables(taskMetricsAccumulables)
+    val taskInfoWithoutTaskMetricsAccums = makeTaskInfo(222L, 333, 1, 333, 444L, false)
+    taskInfoWithoutTaskMetricsAccums.setAccumulables(Seq.empty)
+
+    val stageInfoWithTaskMetricsAccums = makeStageInfo(100, 200, 300, 400L, 500L)
+    stageInfoWithTaskMetricsAccums.accumulables.clear()
+    stageInfoWithTaskMetricsAccums.accumulables ++= taskMetricsAccumulables.map(x => (x.id, x))
+    val stageInfoWithoutTaskMetricsAccums = makeStageInfo(100, 200, 300, 400L, 500L)
+    stageInfoWithoutTaskMetricsAccums.accumulables.clear()
+
+    // Test events which should be impacted by the config.
+
+    // TaskEnd
+    {
+      val originalEvent = SparkListenerTaskEnd(1, 0, "ShuffleMapTask", Success,
+        taskInfoWithTaskMetricsAccums,
+        new ExecutorMetrics(Array(12L, 23L, 45L, 67L, 78L, 89L,
+          90L, 123L, 456L, 789L, 40L, 20L, 20L, 10L, 20L, 10L, 301L)),
+        makeTaskMetrics(300L, 400L, 500L, 600L, 700, 800, 0,
+          hasHadoopInput = false, hasOutput = false))
+      assertEquals(
+        originalEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, includeConf)))
+      val trimmedEvent = originalEvent.copy(taskInfo = taskInfoWithoutTaskMetricsAccums)
+      assertEquals(
+        trimmedEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, excludeConf)))
+    }
+
+    // StageCompleted
+    {
+      val originalEvent = SparkListenerStageCompleted(stageInfoWithTaskMetricsAccums)
+      assertEquals(
+        originalEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, includeConf)))
+      val trimmedEvent = originalEvent.copy(stageInfo = stageInfoWithoutTaskMetricsAccums)
+      assertEquals(
+        trimmedEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, excludeConf)))
+    }
+
+    // JobStart
+    {
+      val originalEvent =
+        SparkListenerJobStart(1, 1, Seq(stageInfoWithTaskMetricsAccums), properties)
+      assertEquals(
+        originalEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, includeConf)))
+      val trimmedEvent = originalEvent.copy(stageInfos = Seq(stageInfoWithoutTaskMetricsAccums))
+      assertEquals(
+        trimmedEvent,
+        sparkEventFromJson(sparkEventToJsonString(originalEvent, excludeConf)))
+    }
+
+    // ExecutorMetricsUpdate events should be unaffected by the config:
+    val executorMetricsUpdate =
+      SparkListenerExecutorMetricsUpdate("0", Seq((0, 0, 0, taskMetricsAccumulables)))
+    assert(
+      sparkEventToJsonString(executorMetricsUpdate, includeConf) ===
+      sparkEventToJsonString(executorMetricsUpdate, excludeConf))
+    assertEquals(
+      JsonProtocol.sparkEventFromJson(sparkEventToJsonString(executorMetricsUpdate, includeConf)),
+      executorMetricsUpdate)
+    }
+
   test("SPARK-42403: properly handle null string values") {
     // Null string values can appear in a few different event types,
     // so we test multiple known cases here:
@@ -966,7 +1050,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
 
   private def testStageInfo(info: StageInfo): Unit = {
     val newInfo = JsonProtocol.stageInfoFromJson(
-      toJsonString(JsonProtocol.stageInfoToJson(info, _, includeAccumulables = true)))
+      toJsonString(
+        JsonProtocol.stageInfoToJson(info, _, defaultOptions, includeAccumulables = true)))
     assertEquals(info, newInfo)
   }
 
@@ -990,7 +1075,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
 
   private def testTaskInfo(info: TaskInfo): Unit = {
     val newInfo = JsonProtocol.taskInfoFromJson(
-      toJsonString(JsonProtocol.taskInfoToJson(info, _, includeAccumulables = true)))
+      toJsonString(
+        JsonProtocol.taskInfoToJson(info, _, defaultOptions, includeAccumulables = true)))
     assertEquals(info, newInfo)
   }
 
@@ -1453,6 +1539,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
     t.setExecutorRunTime(b)
     t.setExecutorCpuTime(b)
     t.setPeakExecutionMemory(c)
+    t.setPeakOnHeapExecutionMemory(c)
+    t.setPeakOffHeapExecutionMemory(c)
     t.setResultSize(c)
     t.setJvmGCTime(d)
     t.setResultSerializationTime(a + b)
@@ -1731,6 +1819,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
       |    "Peak Execution Memory": 500,
+      |    "Peak On Heap Execution Memory": 500,
+      |    "Peak Off Heap Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -1872,6 +1962,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
       |    "Peak Execution Memory": 500,
+      |    "Peak On Heap Execution Memory": 500,
+      |    "Peak Off Heap Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -2013,6 +2105,8 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |    "Executor Run Time": 400,
       |    "Executor CPU Time": 400,
       |    "Peak Execution Memory": 500,
+      |    "Peak On Heap Execution Memory": 500,
+      |    "Peak Off Heap Execution Memory": 500,
       |    "Result Size": 500,
       |    "JVM GC Time": 600,
       |    "Result Serialization Time": 700,
@@ -2683,6 +2777,20 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |        },
       |        {
       |          "ID": 10,
+      |          "Name": "$PEAK_ON_HEAP_EXECUTION_MEMORY",
+      |          "Update": 500,
+      |          "Internal": true,
+      |          "Count Failed Values": true
+      |        },
+      |        {
+      |          "ID": 11,
+      |          "Name": "$PEAK_OFF_HEAP_EXECUTION_MEMORY",
+      |          "Update": 500,
+      |          "Internal": true,
+      |          "Count Failed Values": true
+      |        },
+      |        {
+      |          "ID": 12,
       |          "Name": "$UPDATED_BLOCK_STATUSES",
       |          "Update": [
       |            {
@@ -2704,175 +2812,175 @@ private[spark] object JsonProtocolSuite extends Assertions {
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 11,
+      |          "ID": 13,
       |          "Name": "${shuffleRead.REMOTE_BLOCKS_FETCHED}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 12,
+      |          "ID": 14,
       |          "Name": "${shuffleRead.LOCAL_BLOCKS_FETCHED}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 13,
+      |          "ID": 15,
       |          "Name": "${shuffleRead.REMOTE_BYTES_READ}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 14,
+      |          "ID": 16,
       |          "Name": "${shuffleRead.REMOTE_BYTES_READ_TO_DISK}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 15,
+      |          "ID": 17,
       |          "Name": "${shuffleRead.LOCAL_BYTES_READ}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 16,
+      |          "ID": 18,
       |          "Name": "${shuffleRead.FETCH_WAIT_TIME}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 17,
+      |          "ID": 19,
       |          "Name": "${shuffleRead.RECORDS_READ}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 18,
+      |          "ID": 20,
       |          "Name": "${shuffleRead.CORRUPT_MERGED_BLOCK_CHUNKS}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 19,
+      |          "ID": 21,
       |          "Name": "${shuffleRead.MERGED_FETCH_FALLBACK_COUNT}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID" : 20,
+      |          "ID" : 22,
       |          "Name" : "${shuffleRead.REMOTE_MERGED_BLOCKS_FETCHED}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 21,
+      |          "ID" : 23,
       |          "Name" : "${shuffleRead.LOCAL_MERGED_BLOCKS_FETCHED}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 22,
+      |          "ID" : 24,
       |          "Name" : "${shuffleRead.REMOTE_MERGED_CHUNKS_FETCHED}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 23,
+      |          "ID" : 25,
       |          "Name" : "${shuffleRead.LOCAL_MERGED_CHUNKS_FETCHED}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 24,
+      |          "ID" : 26,
       |          "Name" : "${shuffleRead.REMOTE_MERGED_BYTES_READ}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 25,
+      |          "ID" : 27,
       |          "Name" : "${shuffleRead.LOCAL_MERGED_BYTES_READ}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 26,
+      |          "ID" : 28,
       |          "Name" : "${shuffleRead.REMOTE_REQS_DURATION}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID" : 27,
+      |          "ID" : 29,
       |          "Name" : "${shuffleRead.REMOTE_MERGED_REQS_DURATION}",
       |          "Update" : 0,
       |          "Internal" : true,
       |          "Count Failed Values" : true
       |        },
       |        {
-      |          "ID": 28,
+      |          "ID": 30,
       |          "Name": "${shuffleWrite.BYTES_WRITTEN}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 29,
+      |          "ID": 31,
       |          "Name": "${shuffleWrite.RECORDS_WRITTEN}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 30,
+      |          "ID": 32,
       |          "Name": "${shuffleWrite.WRITE_TIME}",
       |          "Update": 0,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 31,
+      |          "ID": 33,
       |          "Name": "${input.BYTES_READ}",
       |          "Update": 2100,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 32,
+      |          "ID": 34,
       |          "Name": "${input.RECORDS_READ}",
       |          "Update": 21,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 33,
+      |          "ID": 35,
       |          "Name": "${output.BYTES_WRITTEN}",
       |          "Update": 1200,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 34,
+      |          "ID": 36,
       |          "Name": "${output.RECORDS_WRITTEN}",
       |          "Update": 12,
       |          "Internal": true,
       |          "Count Failed Values": true
       |        },
       |        {
-      |          "ID": 35,
+      |          "ID": 37,
       |          "Name": "$TEST_ACCUM",
       |          "Update": 0,
       |          "Internal": true,
diff --git a/core/src/test/scala/org/apache/spark/util/LazyTrySuite.scala b/core/src/test/scala/org/apache/spark/util/LazyTrySuite.scala
new file mode 100644
index 0000000000000..79c07f8fbfead
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/LazyTrySuite.scala
@@ -0,0 +1,151 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import org.apache.spark.SparkFunSuite
+
+class LazyTrySuite extends SparkFunSuite{
+  test("LazyTry should initialize only once") {
+    var count = 0
+    val lazyVal = LazyTry {
+      count += 1
+      count
+    }
+    assert(count == 0)
+    assert(lazyVal.get == 1)
+    assert(count == 1)
+    assert(lazyVal.get == 1)
+    assert(count == 1)
+  }
+
+  test("LazyTry should re-throw exceptions") {
+    val lazyVal = LazyTry {
+      throw new RuntimeException("test")
+    }
+    intercept[RuntimeException] {
+      lazyVal.get
+    }
+    intercept[RuntimeException] {
+      lazyVal.get
+    }
+  }
+
+  test("LazyTry should re-throw exceptions with current caller stack-trace") {
+    val fileName = Thread.currentThread().getStackTrace()(1).getFileName
+    val lineNo = Thread.currentThread().getStackTrace()(1).getLineNumber
+    val lazyVal = LazyTry {
+      throw new RuntimeException("test")
+    }
+
+    val e1 = intercept[RuntimeException] {
+      lazyVal.get // lineNo + 6
+    }
+    assert(e1.getStackTrace
+      .exists(elem => elem.getFileName == fileName && elem.getLineNumber == lineNo + 6))
+
+    val e2 = intercept[RuntimeException] {
+      lazyVal.get // lineNo + 12
+    }
+    assert(e2.getStackTrace
+      .exists(elem => elem.getFileName == fileName && elem.getLineNumber == lineNo + 12))
+  }
+
+  test("LazyTry does not lock containing object") {
+    class LazyContainer() {
+      @volatile var aSet = 0
+
+      val a: LazyTry[Int] = LazyTry {
+        aSet = 1
+        aSet
+      }
+
+      val b: LazyTry[Int] = LazyTry {
+        val t = new Thread(new Runnable {
+          override def run(): Unit = {
+            assert(a.get == 1)
+          }
+        })
+        t.start()
+        t.join()
+        aSet
+      }
+    }
+    val container = new LazyContainer()
+    // Nothing is lazy initialized yet
+    assert(container.aSet == 0)
+    // This will not deadlock, thread t will initialize a, and update aSet
+    assert(container.b.get == 1)
+    assert(container.aSet == 1)
+  }
+
+  // Scala lazy val tests are added to test for potential changes in the semantics of scala lazy val
+
+  test("Scala lazy val initializing multiple times on error") {
+    class LazyValError() {
+      var counter = 0
+      lazy val a = {
+        counter += 1
+        throw new RuntimeException("test")
+      }
+    }
+    val lazyValError = new LazyValError()
+    intercept[RuntimeException] {
+      lazyValError.a
+    }
+    assert(lazyValError.counter == 1)
+    intercept[RuntimeException] {
+      lazyValError.a
+    }
+    assert(lazyValError.counter == 2)
+  }
+
+  test("Scala lazy val locking containing object and deadlocking") {
+    // Note: this will change in scala 3, with different lazy vals not deadlocking with each other.
+    // https://docs.scala-lang.org/scala3/reference/changed-features/lazy-vals-init.html
+    class LazyValContainer() {
+      @volatile var aSet = 0
+      @volatile var t: Thread = _
+
+      lazy val a = {
+        aSet = 1
+        aSet
+      }
+
+      lazy val b = {
+        t = new Thread(new Runnable {
+          override def run(): Unit = {
+            assert(a == 1)
+          }
+        })
+        t.start()
+        t.join(1000)
+        aSet
+      }
+    }
+    val container = new LazyValContainer()
+    // Nothing is lazy initialized yet
+    assert(container.aSet == 0)
+    // This will deadlock, because b will take monitor on LazyValContainer, and then thread t
+    // will wait on that monitor, not able to initialize a.
+    // b will therefore see aSet == 0.
+    assert(container.b == 0)
+    // However, after b finishes initializing, the monitor will be released, and then thread t
+    // will finish initializing a, and set aSet to 1.
+    container.t.join()
+    assert(container.aSet == 1)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
index 4843409661554..20b3fa7e7d753 100644
--- a/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/SparkUncaughtExceptionHandlerSuite.scala
@@ -39,7 +39,15 @@ class SparkUncaughtExceptionHandlerSuite extends SparkFunSuite {
     (ThrowableTypes.SparkFatalRuntimeException, true, SparkExitCode.UNCAUGHT_EXCEPTION),
     (ThrowableTypes.SparkFatalRuntimeException, false, 0),
     (ThrowableTypes.SparkFatalOutOfMemoryError, true, SparkExitCode.OOM),
-    (ThrowableTypes.SparkFatalOutOfMemoryError, false, SparkExitCode.OOM)
+    (ThrowableTypes.SparkFatalOutOfMemoryError, false, SparkExitCode.OOM),
+    (ThrowableTypes.NestedOOMError, true, SparkExitCode.OOM),
+    (ThrowableTypes.NestedOOMError, false, SparkExitCode.OOM),
+    (ThrowableTypes.NestedSparkFatalException, true, SparkExitCode.OOM),
+    (ThrowableTypes.NestedSparkFatalException, false, SparkExitCode.OOM),
+    (ThrowableTypes.NonFatalNestedErrors, true, SparkExitCode.UNCAUGHT_EXCEPTION),
+    (ThrowableTypes.NonFatalNestedErrors, false, 0),
+    (ThrowableTypes.DeepNestedOOMError, true, SparkExitCode.UNCAUGHT_EXCEPTION),
+    (ThrowableTypes.DeepNestedOOMError, false, 0)
   ).foreach {
     case (throwable: ThrowableTypes.ThrowableTypesVal,
     exitOnUncaughtException: Boolean, expectedExitCode) =>
@@ -74,6 +82,46 @@ object ThrowableTypes extends Enumeration {
   val SparkFatalOutOfMemoryError = ThrowableTypesVal("SparkFatalException(OutOfMemoryError)",
     new SparkFatalException(new OutOfMemoryError))
 
+  // SPARK-50034: If there is a fatal error in the cause chain,
+  // we should also identify that fatal error and exit with the
+  // correct exit code.
+  val NestedOOMError = ThrowableTypesVal(
+    "NestedFatalError",
+    new RuntimeException("Nonfatal Level 1",
+      new RuntimeException("Nonfatal Level 2",
+        new RuntimeException("Nonfatal Level 3",
+          new OutOfMemoryError())))
+  )
+
+  val NestedSparkFatalException = ThrowableTypesVal(
+    "NestedSparkFatalException",
+    new RuntimeException("Nonfatal Level 1",
+      new RuntimeException("Nonfatal Level 2",
+        new SparkFatalException(new OutOfMemoryError())))
+  )
+
+  // Nested exception with non-fatal errors only
+  val NonFatalNestedErrors = ThrowableTypesVal(
+    "NonFatalNestedErrors",
+    new RuntimeException("Nonfatal Level 1",
+      new RuntimeException("Nonfatal Level 2",
+        new RuntimeException("Nonfatal Level 3",
+          new RuntimeException("Nonfatal Level 4")))
+    )
+  )
+
+  // Should not report as OOM when its depth is greater than killOnFatalErrorDepth
+  val DeepNestedOOMError = ThrowableTypesVal(
+    "DeepNestedOOMError",
+    new RuntimeException("Nonfatal Level 1",
+      new RuntimeException("Nonfatal Level 2",
+        new RuntimeException("Nonfatal Level 3",
+          new RuntimeException("Nonfatal Level 4",
+            new RuntimeException("Nonfatal Level 5",
+              new OutOfMemoryError()))))
+    )
+  )
+
   // returns the actual Throwable by its name
   def getThrowableByName(name: String): Throwable = {
     super.withName(name).asInstanceOf[ThrowableTypesVal].t
diff --git a/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala b/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala
new file mode 100644
index 0000000000000..c0754ee063d67
--- /dev/null
+++ b/core/src/test/scala/org/apache/spark/util/TransientLazySuite.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.util
+
+import java.io.{ByteArrayOutputStream, ObjectOutputStream}
+
+import org.apache.spark.SparkFunSuite
+
+class TransientLazySuite extends SparkFunSuite {
+
+  test("TransientLazy val works") {
+    var test: Option[Object] = None
+
+    val lazyval = new TransientLazy({
+      test = Some(new Object())
+      test
+    })
+
+    // Ensure no initialization happened before the lazy value was dereferenced
+    assert(test.isEmpty)
+
+    // Ensure the first invocation creates a new object
+    assert(lazyval() == test && test.isDefined)
+
+    // Ensure the subsequent invocation serves the same object
+    assert(lazyval() == test && test.isDefined)
+  }
+
+  test("TransientLazy val is serializable") {
+    val lazyval = new TransientLazy({
+      new Object()
+    })
+
+    // Ensure serializable before the dereference
+    val oos = new ObjectOutputStream(new ByteArrayOutputStream())
+    oos.writeObject(lazyval)
+
+    val dereferenced = lazyval()
+
+    // Ensure serializable after the dereference
+    val oos2 = new ObjectOutputStream(new ByteArrayOutputStream())
+    oos2.writeObject(lazyval)
+  }
+}
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 4fe6fcf17f49f..a6e3345fc600c 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -28,7 +28,7 @@ import java.util.concurrent.TimeUnit
 import java.util.zip.GZIPOutputStream
 
 import scala.collection.mutable.ListBuffer
-import scala.util.Random
+import scala.util.{Random, Try}
 
 import com.google.common.io.Files
 import org.apache.commons.io.IOUtils
@@ -735,8 +735,8 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     withTempDir { tmpDir =>
       val outFile = File.createTempFile("test-load-spark-properties", "test", tmpDir)
       System.setProperty("spark.test.fileNameLoadB", "2")
-      Files.write("spark.test.fileNameLoadA true\n" +
-        "spark.test.fileNameLoadB 1\n", outFile, UTF_8)
+      Files.asCharSink(outFile, UTF_8).write("spark.test.fileNameLoadA true\n" +
+        "spark.test.fileNameLoadB 1\n")
       val properties = Utils.getPropertiesFromFile(outFile.getAbsolutePath)
       properties
         .filter { case (k, v) => k.startsWith("spark.")}
@@ -765,7 +765,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
       val innerSourceDir = Utils.createTempDir(root = sourceDir.getPath)
       val sourceFile = File.createTempFile("someprefix", "somesuffix", innerSourceDir)
       val targetDir = new File(tempDir, "target-dir")
-      Files.write("some text", sourceFile, UTF_8)
+      Files.asCharSink(sourceFile, UTF_8).write("some text")
 
       val path =
         if (Utils.isWindows) {
@@ -1523,6 +1523,116 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties {
     conf.set(SERIALIZER, "org.apache.spark.serializer.JavaSerializer")
     assert(Utils.isPushBasedShuffleEnabled(conf, isDriver = true) === false)
   }
+
+
+  private def throwException(): String = {
+    throw new Exception("test")
+  }
+
+  private def callDoTry(): Try[String] = {
+    Utils.doTryWithCallerStacktrace {
+      throwException()
+    }
+  }
+
+  private def callGetTry(t: Try[String]): String = {
+    Utils.getTryWithCallerStacktrace(t)
+  }
+
+  private def callGetTryAgain(t: Try[String]): String = {
+    Utils.getTryWithCallerStacktrace(t)
+  }
+
+  test("doTryWithCallerStacktrace and getTryWithCallerStacktrace") {
+    val t = callDoTry()
+
+    val e1 = intercept[Exception] {
+      callGetTry(t)
+    }
+    // Uncomment for manual inspection
+    // e1.printStackTrace()
+    // Example:
+    // java.lang.Exception: test
+    //   at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1640)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1645)
+    //   at scala.util.Try$.apply(Try.scala:213)
+    //   at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1586)
+    //   at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1639)
+    //   at org.apache.spark.util.UtilsSuite.callGetTry(UtilsSuite.scala:1650)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$new$165(UtilsSuite.scala:1661)
+    // <- callGetTry is seen as calling getTryWithCallerStacktrace
+
+    val st1 = e1.getStackTrace
+    // throwException should be on the stack trace
+    assert(st1.exists(_.getMethodName == "throwException"))
+    // callDoTry shouldn't be on the stack trace, but callGetTry should be.
+    assert(!st1.exists(_.getMethodName == "callDoTry"))
+    assert(st1.exists(_.getMethodName == "callGetTry"))
+
+    // The original stack trace with callDoTry should be in the suppressed exceptions.
+    // Example:
+    // scalastyle:off line.size.limit
+    // Suppressed: java.lang.Exception: Full stacktrace of original doTryWithCallerStacktrace caller
+    //   at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1640)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1645)
+    //   at scala.util.Try$.apply(Try.scala:213)
+    //   at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1586)
+    //   at org.apache.spark.util.UtilsSuite.callDoTry(UtilsSuite.scala:1645)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$new$165(UtilsSuite.scala:1658)
+    //   ... 56 more
+    // scalastyle:on line.size.limit
+    val origSt = e1.getSuppressed.find(
+      _.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_FULL_STACKTRACE)
+    assert(origSt.isDefined)
+    assert(origSt.get.getStackTrace.exists(_.getMethodName == "throwException"))
+    assert(origSt.get.getStackTrace.exists(_.getMethodName == "callDoTry"))
+
+    // The stack trace under Try should be in the suppressed exceptions.
+    // Example:
+    // Suppressed: java.lang.Exception: Stacktrace under doTryWithCallerStacktrace
+    //   at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala: 1640)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala: 1645)
+    //   at scala.util.Try$.apply(Try.scala: 213)
+    //   at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala: 1586)
+    val trySt = e1.getSuppressed.find(
+      _.getMessage == Utils.TRY_WITH_CALLER_STACKTRACE_TRY_STACKTRACE)
+    assert(trySt.isDefined)
+    // calls under callDoTry should be present.
+    assert(trySt.get.getStackTrace.exists(_.getMethodName == "throwException"))
+    // callDoTry should be removed.
+    assert(!trySt.get.getStackTrace.exists(_.getMethodName == "callDoTry"))
+
+    val e2 = intercept[Exception] {
+      callGetTryAgain(t)
+    }
+    // Uncomment for manual inspection
+    // e2.printStackTrace()
+    // Example:
+    // java.lang.Exception: test
+    //   at org.apache.spark.util.UtilsSuite.throwException(UtilsSuite.scala:1640)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$callDoTry$1(UtilsSuite.scala:1645)
+    //   at scala.util.Try$.apply(Try.scala:213)
+    //   at org.apache.spark.util.Utils$.doTryWithCallerStacktrace(Utils.scala:1586)
+    //   at org.apache.spark.util.Utils$.getTryWithCallerStacktrace(Utils.scala:1639)
+    //   at org.apache.spark.util.UtilsSuite.callGetTryAgain(UtilsSuite.scala:1654)
+    //   at org.apache.spark.util.UtilsSuite.$anonfun$new$165(UtilsSuite.scala:1711)
+    // <- callGetTryAgain is seen as calling getTryWithCallerStacktrace
+
+    val st2 = e2.getStackTrace
+    // throwException should be on the stack trace
+    assert(st2.exists(_.getMethodName == "throwException"))
+    // callDoTry shouldn't be on the stack trace, but callGetTryAgain should be.
+    assert(!st2.exists(_.getMethodName == "callDoTry"))
+    assert(st2.exists(_.getMethodName == "callGetTryAgain"))
+    // callGetTry that we called before shouldn't be on the stack trace.
+    assert(!st2.exists(_.getMethodName == "callGetTry"))
+
+    // Unfortunately, this utility is not able to clone the exception, but modifies it in place,
+    // so now e1 is also pointing to "callGetTryAgain" instead of "callGetTry".
+    val st1Again = e1.getStackTrace
+    assert(st1Again.exists(_.getMethodName == "callGetTryAgain"))
+    assert(!st1Again.exists(_.getMethodName == "callGetTry"))
+  }
 }
 
 private class SimpleExtension
diff --git a/dev/.rat-excludes b/dev/.rat-excludes
index 8bad50951a78f..d8c9196293950 100644
--- a/dev/.rat-excludes
+++ b/dev/.rat-excludes
@@ -31,12 +31,13 @@ graphlib-dot.min.js
 sorttable.js
 vis-timeline-graph2d.min.js
 vis-timeline-graph2d.min.css
-dataTables.bootstrap4.*.min.css
-dataTables.bootstrap4.*.min.js
+dataTables.bootstrap4.min.css
+dataTables.bootstrap4.min.js
 dataTables.rowsGroup.js
 jquery.blockUI.min.js
 jquery.cookies.2.2.0.min.js
-jquery.dataTables.*.min.js
+jquery.dataTables.min.css
+jquery.dataTables.min.js
 jquery.mustache.js
 .*\.avsc
 .*\.txt
@@ -125,9 +126,6 @@ exported_table/*
 node_modules
 spark-events-broken/*
 SqlBaseLexer.tokens
-# Spark Connect related files with custom licence
-any.proto
-empty.proto
 .*\.explain
 .*\.proto.bin
 LimitedInputStream.java
@@ -138,4 +136,6 @@ people.xml
 ui-test/package.json
 ui-test/package-lock.json
 core/src/main/resources/org/apache/spark/ui/static/package.json
+testCommitLog
 .*\.har
+.nojekyll
diff --git a/dev/connect-check-protos.py b/dev/check-protos.py
similarity index 73%
rename from dev/connect-check-protos.py
rename to dev/check-protos.py
index 12a499caf866c..bfca8b27be21c 100755
--- a/dev/connect-check-protos.py
+++ b/dev/check-protos.py
@@ -18,7 +18,7 @@
 #
 
 # Utility for checking whether generated codes in PySpark are out of sync.
-#   usage: ./dev/connect-check-protos.py
+#   usage: ./dev/check-protos.py
 
 import os
 import sys
@@ -43,12 +43,12 @@ def run_cmd(cmd):
         return subprocess.check_output(cmd.split(" ")).decode("utf-8")
 
 
-def check_connect_protos():
-    print("Start checking the generated codes in pyspark-connect.")
-    with tempfile.TemporaryDirectory(prefix="check_connect_protos") as tmp:
-        run_cmd(f"{SPARK_HOME}/dev/connect-gen-protos.sh {tmp}")
+def check_protos(module_name, cmp_path, proto_path):
+    print(f"Start checking the generated codes in pyspark-${module_name}.")
+    with tempfile.TemporaryDirectory(prefix=f"check_${module_name}__protos") as tmp:
+        run_cmd(f"{SPARK_HOME}/dev/gen-protos.sh {module_name} {tmp}")
         result = filecmp.dircmp(
-            f"{SPARK_HOME}/python/pyspark/sql/connect/proto/",
+            f"{SPARK_HOME}/{cmp_path}",
             tmp,
             ignore=["__init__.py", "__pycache__"],
         )
@@ -71,14 +71,17 @@ def check_connect_protos():
             success = False
 
         if success:
-            print("Finish checking the generated codes in pyspark-connect: SUCCESS")
+            print(f"Finish checking the generated codes in pyspark-${module_name}: SUCCESS")
         else:
             fail(
                 "Generated files for pyspark-connect are out of sync! "
-                "If you have touched files under connect/common/src/main/protobuf/, "
-                "please run ./dev/connect-gen-protos.sh. "
+                f"If you have touched files under ${proto_path}, "
+                f"please run ./dev/${module_name}-gen-protos.sh. "
                 "If you haven't touched any file above, please rebase your PR against main branch."
             )
 
 
-check_connect_protos()
+check_protos(
+    "connect", "python/pyspark/sql/connect/proto/", "sql/connect/common/src/main/protobuf/"
+)
+check_protos("streaming", "python/pyspark/sql/streaming/proto/", "sql/core/src/main/protobuf/")
diff --git a/dev/checkstyle-suppressions.xml b/dev/checkstyle-suppressions.xml
index 677381704427c..9925ae406dbd9 100644
--- a/dev/checkstyle-suppressions.xml
+++ b/dev/checkstyle-suppressions.xml
@@ -68,4 +68,6 @@
       files="src/main/java/org/apache/spark/network/util/LimitedInputStream.java" />
     <suppress checks="Header"
       files="src/test/java/org/apache/spark/util/collection/TestTimSort.java" />
+    <suppress checks=".*"
+              files="src/main/java/org/apache/spark/sql/execution/streaming/state/StateMessage.java"/>
 </suppressions>
diff --git a/dev/checkstyle.xml b/dev/checkstyle.xml
index c4023a84ee3cf..4285028109419 100644
--- a/dev/checkstyle.xml
+++ b/dev/checkstyle.xml
@@ -206,6 +206,10 @@
             <property name="illegalPkgs" value="org.apache.log4j" />
             <property name="illegalPkgs" value="org.apache.commons.lang" />
         </module>
+        <module name="RegexpSinglelineJava">
+            <property name="format" value="new URL\("/>
+            <property name="message" value="Use URI.toURL or URL.of instead of URL constructors." />
+        </module>
         <!-- support structured logging -->
         <module name="RegexpSinglelineJava">
             <property name="format" value="org\.slf4j\.(Logger|LoggerFactory)" />
diff --git a/dev/connect-gen-protos.sh b/dev/connect-gen-protos.sh
index 546266c91f88a..8ed323cc42599 100755
--- a/dev/connect-gen-protos.sh
+++ b/dev/connect-gen-protos.sh
@@ -24,80 +24,4 @@ if [[ $# -gt 1 ]]; then
   exit -1
 fi
 
-
-SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
-cd "$SPARK_HOME"
-
-
-OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/
-if [[ $# -eq 1 ]]; then
-  rm -Rf $1
-  mkdir -p $1
-  OUTPUT_PATH=$1
-fi
-
-pushd connect/common/src/main
-
-LICENSE=$(cat <<'EOF'
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-EOF)
-echo "$LICENSE" > /tmp/tmp_licence
-
-
-# Delete the old generated protobuf files.
-rm -Rf gen
-
-# Now, regenerate the new files
-buf generate --debug -vvv
-
-# We need to edit the generate python files to account for the actual package location and not
-# the one generated by proto.
-for f in `find gen/proto/python -name "*.py*"`; do
-  # First fix the imports.
-  if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then
-    sed -e 's/from spark.connect import/from pyspark.sql.connect.proto import/g' $f > $f.tmp
-    mv $f.tmp $f
-    # Now fix the module name in the serialized descriptor.
-    sed -e "s/DESCRIPTOR, 'spark.connect/DESCRIPTOR, 'pyspark.sql.connect.proto/g" $f > $f.tmp
-    mv $f.tmp $f
-  elif [[ $f == *.pyi ]]; then
-    sed -e 's/import spark.connect./import pyspark.sql.connect.proto./g' -e 's/spark.connect./pyspark.sql.connect.proto./g' -e '/ *@typing_extensions\.final/d' $f > $f.tmp
-    mv $f.tmp $f
-  fi
-
-  # Prepend the Apache licence header to the files.
-  cp $f $f.bak
-  cat /tmp/tmp_licence $f.bak > $f
-
-  LC=$(wc -l < $f)
-  echo $LC
-  if [[ $f == *_grpc.py && $LC -eq 20 ]]; then
-    rm $f
-  fi
-  rm $f.bak
-done
-
-black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python
-
-# Last step copy the result files to the destination module.
-for f in `find gen/proto/python -name "*.py*"`; do
-  cp $f $OUTPUT_PATH
-done
-
-# Clean up everything.
-rm -Rf gen
+./dev/gen-protos.sh connect "$@"
diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh
index 19589b951a6e1..8654c8317ae49 100755
--- a/dev/create-release/release-build.sh
+++ b/dev/create-release/release-build.sh
@@ -536,7 +536,7 @@ if [[ "$1" == "publish-release" ]]; then
       file_short=$(echo $file | sed -e "s/\.\///")
       dest_url="$nexus_upload/org/apache/spark/$file_short"
       echo "  Uploading $file_short"
-      curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
+      curl --retry 3 --retry-all-errors -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url
     done
 
     echo "Closing nexus staging repository"
diff --git a/dev/create-release/spark-rm/Dockerfile b/dev/create-release/spark-rm/Dockerfile
index ca9e10bebfc53..8a9890bf68dde 100644
--- a/dev/create-release/spark-rm/Dockerfile
+++ b/dev/create-release/spark-rm/Dockerfile
@@ -17,6 +17,11 @@
 
 # Image for building Spark releases. Based on Ubuntu 22.04.
 FROM ubuntu:jammy-20240227
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Release Manager Image"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
 
 ENV FULL_REFRESH_DATE 20240318
 
@@ -87,17 +92,17 @@ ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library
 
 
 RUN add-apt-repository ppa:pypy/ppa
-RUN mkdir -p /usr/local/pypy/pypy3.9 && \
-    curl -sqL https://downloads.python.org/pypy/pypy3.9-v7.3.16-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.9 --strip-components=1 && \
-    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.8 && \
-    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3
+RUN mkdir -p /usr/local/pypy/pypy3.10 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2 twine==3.4.1"
 # Python deps for Spark Connect
-ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4"
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0"
 
 # Install Python 3.10 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
@@ -126,12 +131,13 @@ RUN python3.9 -m pip install --force $BASIC_PIP_PKGS unittest-xml-reporting $CON
 RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
 ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
-'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5'
 RUN python3.9 -m pip list
 
 RUN gem install --no-document "bundler:2.4.22"
 RUN ln -s "$(which python3.9)" "/usr/local/bin/python"
+RUN ln -s "$(which python3.9)" "/usr/local/bin/python3"
 
 WORKDIR /opt/spark-rm/output
 
diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3
index 9d00f178ef9b5..9f51a70745f27 100644
--- a/dev/deps/spark-deps-hadoop-3-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3-hive-2.3
@@ -1,10 +1,10 @@
 HikariCP/2.5.1//HikariCP-2.5.1.jar
 JLargeArrays/1.5//JLargeArrays-1.5.jar
 JTransforms/3.1//JTransforms-3.1.jar
-RoaringBitmap/1.1.0//RoaringBitmap-1.1.0.jar
+RoaringBitmap/1.3.0//RoaringBitmap-1.3.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
-aircompressor/0.27//aircompressor-0.27.jar
+aircompressor/2.0.2//aircompressor-2.0.2.jar
 algebra_2.13/2.8.0//algebra_2.13-2.8.0.jar
 aliyun-java-sdk-core/4.5.10//aliyun-java-sdk-core-4.5.10.jar
 aliyun-java-sdk-kms/2.11.0//aliyun-java-sdk-kms-2.11.0.jar
@@ -13,18 +13,18 @@ aliyun-sdk-oss/3.13.2//aliyun-sdk-oss-3.13.2.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.13.1//antlr4-runtime-4.13.1.jar
-aopalliance-repackaged/3.0.3//aopalliance-repackaged-3.0.3.jar
+aopalliance-repackaged/3.0.6//aopalliance-repackaged-3.0.6.jar
 arpack/3.0.3//arpack-3.0.3.jar
 arpack_combined_all/0.1//arpack_combined_all-0.1.jar
-arrow-format/17.0.0//arrow-format-17.0.0.jar
-arrow-memory-core/17.0.0//arrow-memory-core-17.0.0.jar
-arrow-memory-netty-buffer-patch/17.0.0//arrow-memory-netty-buffer-patch-17.0.0.jar
-arrow-memory-netty/17.0.0//arrow-memory-netty-17.0.0.jar
-arrow-vector/17.0.0//arrow-vector-17.0.0.jar
+arrow-format/18.1.0//arrow-format-18.1.0.jar
+arrow-memory-core/18.1.0//arrow-memory-core-18.1.0.jar
+arrow-memory-netty-buffer-patch/18.1.0//arrow-memory-netty-buffer-patch-18.1.0.jar
+arrow-memory-netty/18.1.0//arrow-memory-netty-18.1.0.jar
+arrow-vector/18.1.0//arrow-vector-18.1.0.jar
 audience-annotations/0.12.0//audience-annotations-0.12.0.jar
-avro-ipc/1.11.3//avro-ipc-1.11.3.jar
-avro-mapred/1.11.3//avro-mapred-1.11.3.jar
-avro/1.11.3//avro-1.11.3.jar
+avro-ipc/1.12.0//avro-ipc-1.12.0.jar
+avro-mapred/1.12.0//avro-mapred-1.12.0.jar
+avro/1.12.0//avro-1.12.0.jar
 azure-data-lake-store-sdk/2.3.9//azure-data-lake-store-sdk-2.3.9.jar
 azure-keyvault-core/1.0.0//azure-keyvault-core-1.0.0.jar
 azure-storage/7.0.1//azure-storage-7.0.1.jar
@@ -33,51 +33,54 @@ breeze-macros_2.13/2.1.0//breeze-macros_2.13-2.1.0.jar
 breeze_2.13/2.1.0//breeze_2.13-2.1.0.jar
 bundle/2.24.6//bundle-2.24.6.jar
 cats-kernel_2.13/2.8.0//cats-kernel_2.13-2.8.0.jar
+checker-qual/3.42.0//checker-qual-3.42.0.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.13/0.10.0//chill_2.13-0.10.0.jar
-commons-cli/1.8.0//commons-cli-1.8.0.jar
+commons-cli/1.9.0//commons-cli-1.9.0.jar
 commons-codec/1.17.1//commons-codec-1.17.1.jar
 commons-collections/3.2.2//commons-collections-3.2.2.jar
 commons-collections4/4.4//commons-collections4-4.4.jar
 commons-compiler/3.1.9//commons-compiler-3.1.9.jar
-commons-compress/1.26.2//commons-compress-1.26.2.jar
+commons-compress/1.27.1//commons-compress-1.27.1.jar
 commons-crypto/1.1.0//commons-crypto-1.1.0.jar
 commons-dbcp/1.4//commons-dbcp-1.4.jar
-commons-io/2.16.1//commons-io-2.16.1.jar
+commons-io/2.18.0//commons-io-2.18.0.jar
 commons-lang/2.6//commons-lang-2.6.jar
-commons-lang3/3.15.0//commons-lang3-3.15.0.jar
+commons-lang3/3.17.0//commons-lang3-3.17.0.jar
 commons-math3/3.6.1//commons-math3-3.6.1.jar
 commons-pool/1.5.4//commons-pool-1.5.4.jar
 commons-text/1.12.0//commons-text-1.12.0.jar
 compress-lzf/1.1.2//compress-lzf-1.1.2.jar
-curator-client/5.7.0//curator-client-5.7.0.jar
-curator-framework/5.7.0//curator-framework-5.7.0.jar
-curator-recipes/5.7.0//curator-recipes-5.7.0.jar
+curator-client/5.7.1//curator-client-5.7.1.jar
+curator-framework/5.7.1//curator-framework-5.7.1.jar
+curator-recipes/5.7.1//curator-recipes-5.7.1.jar
 datanucleus-api-jdo/4.2.4//datanucleus-api-jdo-4.2.4.jar
 datanucleus-core/4.1.17//datanucleus-core-4.1.17.jar
 datanucleus-rdbms/4.1.19//datanucleus-rdbms-4.1.19.jar
-datasketches-java/6.0.0//datasketches-java-6.0.0.jar
-datasketches-memory/2.2.0//datasketches-memory-2.2.0.jar
+datasketches-java/6.1.1//datasketches-java-6.1.1.jar
+datasketches-memory/3.0.2//datasketches-memory-3.0.2.jar
 derby/10.16.1.1//derby-10.16.1.1.jar
 derbyshared/10.16.1.1//derbyshared-10.16.1.1.jar
 derbytools/10.16.1.1//derbytools-10.16.1.1.jar
 dropwizard-metrics-hadoop-metrics2-reporter/0.1.2//dropwizard-metrics-hadoop-metrics2-reporter-0.1.2.jar
+error_prone_annotations/2.26.1//error_prone_annotations-2.26.1.jar
 esdk-obs-java/3.20.4.2//esdk-obs-java-3.20.4.2.jar
+failureaccess/1.0.2//failureaccess-1.0.2.jar
 flatbuffers-java/24.3.25//flatbuffers-java-24.3.25.jar
-gcs-connector/hadoop3-2.2.21/shaded/gcs-connector-hadoop3-2.2.21-shaded.jar
+gcs-connector/hadoop3-2.2.25/shaded/gcs-connector-hadoop3-2.2.25-shaded.jar
 gmetric4j/1.0.10//gmetric4j-1.0.10.jar
-gson/2.2.4//gson-2.2.4.jar
-guava/14.0.1//guava-14.0.1.jar
-hadoop-aliyun/3.4.0//hadoop-aliyun-3.4.0.jar
-hadoop-annotations/3.4.0//hadoop-annotations-3.4.0.jar
-hadoop-aws/3.4.0//hadoop-aws-3.4.0.jar
-hadoop-azure-datalake/3.4.0//hadoop-azure-datalake-3.4.0.jar
-hadoop-azure/3.4.0//hadoop-azure-3.4.0.jar
-hadoop-client-api/3.4.0//hadoop-client-api-3.4.0.jar
-hadoop-client-runtime/3.4.0//hadoop-client-runtime-3.4.0.jar
-hadoop-cloud-storage/3.4.0//hadoop-cloud-storage-3.4.0.jar
-hadoop-huaweicloud/3.4.0//hadoop-huaweicloud-3.4.0.jar
-hadoop-shaded-guava/1.2.0//hadoop-shaded-guava-1.2.0.jar
+gson/2.11.0//gson-2.11.0.jar
+guava/33.2.1-jre//guava-33.2.1-jre.jar
+hadoop-aliyun/3.4.1//hadoop-aliyun-3.4.1.jar
+hadoop-annotations/3.4.1//hadoop-annotations-3.4.1.jar
+hadoop-aws/3.4.1//hadoop-aws-3.4.1.jar
+hadoop-azure-datalake/3.4.1//hadoop-azure-datalake-3.4.1.jar
+hadoop-azure/3.4.1//hadoop-azure-3.4.1.jar
+hadoop-client-api/3.4.1//hadoop-client-api-3.4.1.jar
+hadoop-client-runtime/3.4.1//hadoop-client-runtime-3.4.1.jar
+hadoop-cloud-storage/3.4.1//hadoop-cloud-storage-3.4.1.jar
+hadoop-huaweicloud/3.4.1//hadoop-huaweicloud-3.4.1.jar
+hadoop-shaded-guava/1.3.0//hadoop-shaded-guava-1.3.0.jar
 hive-beeline/2.3.10//hive-beeline-2.3.10.jar
 hive-cli/2.3.10//hive-cli-2.3.10.jar
 hive-common/2.3.10//hive-common-2.3.10.jar
@@ -92,26 +95,27 @@ hive-shims-common/2.3.10//hive-shims-common-2.3.10.jar
 hive-shims-scheduler/2.3.10//hive-shims-scheduler-2.3.10.jar
 hive-shims/2.3.10//hive-shims-2.3.10.jar
 hive-storage-api/2.8.1//hive-storage-api-2.8.1.jar
-hk2-api/3.0.3//hk2-api-3.0.3.jar
-hk2-locator/3.0.3//hk2-locator-3.0.3.jar
-hk2-utils/3.0.3//hk2-utils-3.0.3.jar
+hk2-api/3.0.6//hk2-api-3.0.6.jar
+hk2-locator/3.0.6//hk2-locator-3.0.6.jar
+hk2-utils/3.0.6//hk2-utils-3.0.6.jar
 httpclient/4.5.14//httpclient-4.5.14.jar
 httpcore/4.4.16//httpcore-4.4.16.jar
-icu4j/75.1//icu4j-75.1.jar
+icu4j/76.1//icu4j-76.1.jar
 ini4j/0.5.4//ini4j-0.5.4.jar
 istack-commons-runtime/3.0.8//istack-commons-runtime-3.0.8.jar
 ivy/2.5.2//ivy-2.5.2.jar
-jackson-annotations/2.17.2//jackson-annotations-2.17.2.jar
+j2objc-annotations/3.0.0//j2objc-annotations-3.0.0.jar
+jackson-annotations/2.18.1//jackson-annotations-2.18.1.jar
 jackson-core-asl/1.9.13//jackson-core-asl-1.9.13.jar
-jackson-core/2.17.2//jackson-core-2.17.2.jar
-jackson-databind/2.17.2//jackson-databind-2.17.2.jar
-jackson-dataformat-cbor/2.17.2//jackson-dataformat-cbor-2.17.2.jar
-jackson-dataformat-yaml/2.17.2//jackson-dataformat-yaml-2.17.2.jar
+jackson-core/2.18.1//jackson-core-2.18.1.jar
+jackson-databind/2.18.1//jackson-databind-2.18.1.jar
+jackson-dataformat-cbor/2.18.1//jackson-dataformat-cbor-2.18.1.jar
+jackson-dataformat-yaml/2.18.1//jackson-dataformat-yaml-2.18.1.jar
 jackson-datatype-jdk8/2.17.0//jackson-datatype-jdk8-2.17.0.jar
-jackson-datatype-jsr310/2.17.2//jackson-datatype-jsr310-2.17.2.jar
+jackson-datatype-jsr310/2.18.1//jackson-datatype-jsr310-2.18.1.jar
 jackson-mapper-asl/1.9.13//jackson-mapper-asl-1.9.13.jar
-jackson-module-scala_2.13/2.17.2//jackson-module-scala_2.13-2.17.2.jar
-jakarta.annotation-api/2.0.0//jakarta.annotation-api-2.0.0.jar
+jackson-module-scala_2.13/2.18.1//jackson-module-scala_2.13-2.18.1.jar
+jakarta.annotation-api/2.1.1//jakarta.annotation-api-2.1.1.jar
 jakarta.inject-api/2.0.1//jakarta.inject-api-2.0.1.jar
 jakarta.servlet-api/5.0.0//jakarta.servlet-api-5.0.0.jar
 jakarta.validation-api/3.0.2//jakarta.validation-api-3.0.2.jar
@@ -120,28 +124,29 @@ jakarta.xml.bind-api/2.3.2//jakarta.xml.bind-api-2.3.2.jar
 janino/3.1.9//janino-3.1.9.jar
 java-diff-utils/4.12//java-diff-utils-4.12.jar
 java-xmlbuilder/1.2//java-xmlbuilder-1.2.jar
-javassist/3.29.2-GA//javassist-3.29.2-GA.jar
+javassist/3.30.2-GA//javassist-3.30.2-GA.jar
 javax.jdo/3.2.0-m3//javax.jdo-3.2.0-m3.jar
 javax.servlet-api/4.0.1//javax.servlet-api-4.0.1.jar
 javolution/5.5.1//javolution-5.5.1.jar
 jaxb-api/2.2.11//jaxb-api-2.2.11.jar
 jaxb-runtime/2.3.2//jaxb-runtime-2.3.2.jar
-jcl-over-slf4j/2.0.13//jcl-over-slf4j-2.0.13.jar
+jcl-over-slf4j/2.0.16//jcl-over-slf4j-2.0.16.jar
 jdo-api/3.0.1//jdo-api-3.0.1.jar
 jdom2/2.0.6//jdom2-2.0.6.jar
-jersey-client/3.0.12//jersey-client-3.0.12.jar
-jersey-common/3.0.12//jersey-common-3.0.12.jar
-jersey-container-servlet-core/3.0.12//jersey-container-servlet-core-3.0.12.jar
-jersey-container-servlet/3.0.12//jersey-container-servlet-3.0.12.jar
-jersey-hk2/3.0.12//jersey-hk2-3.0.12.jar
-jersey-server/3.0.12//jersey-server-3.0.12.jar
+jersey-client/3.0.16//jersey-client-3.0.16.jar
+jersey-common/3.0.16//jersey-common-3.0.16.jar
+jersey-container-servlet-core/3.0.16//jersey-container-servlet-core-3.0.16.jar
+jersey-container-servlet/3.0.16//jersey-container-servlet-3.0.16.jar
+jersey-hk2/3.0.16//jersey-hk2-3.0.16.jar
+jersey-server/3.0.16//jersey-server-3.0.16.jar
 jettison/1.5.4//jettison-1.5.4.jar
-jetty-util-ajax/11.0.21//jetty-util-ajax-11.0.21.jar
-jetty-util/11.0.21//jetty-util-11.0.21.jar
+jetty-util-ajax/11.0.24//jetty-util-ajax-11.0.24.jar
+jetty-util/11.0.24//jetty-util-11.0.24.jar
+jjwt-api/0.12.6//jjwt-api-0.12.6.jar
 jline/2.14.6//jline-2.14.6.jar
-jline/3.25.1//jline-3.25.1.jar
+jline/3.26.3//jline-3.26.3.jar
 jna/5.14.0//jna-5.14.0.jar
-joda-time/2.12.7//joda-time-2.12.7.jar
+joda-time/2.13.0//joda-time-2.13.0.jar
 jodd-core/3.5.2//jodd-core-3.5.2.jar
 jpam/1.1//jpam-1.1.jar
 json/1.8//json-1.8.jar
@@ -152,110 +157,111 @@ json4s-jackson_2.13/4.0.7//json4s-jackson_2.13-4.0.7.jar
 json4s-scalap_2.13/4.0.7//json4s-scalap_2.13-4.0.7.jar
 jsr305/3.0.0//jsr305-3.0.0.jar
 jta/1.1//jta-1.1.jar
-jul-to-slf4j/2.0.13//jul-to-slf4j-2.0.13.jar
+jul-to-slf4j/2.0.16//jul-to-slf4j-2.0.16.jar
 kryo-shaded/4.0.2//kryo-shaded-4.0.2.jar
-kubernetes-client-api/6.13.1//kubernetes-client-api-6.13.1.jar
-kubernetes-client/6.13.1//kubernetes-client-6.13.1.jar
-kubernetes-httpclient-okhttp/6.13.1//kubernetes-httpclient-okhttp-6.13.1.jar
-kubernetes-model-admissionregistration/6.13.1//kubernetes-model-admissionregistration-6.13.1.jar
-kubernetes-model-apiextensions/6.13.1//kubernetes-model-apiextensions-6.13.1.jar
-kubernetes-model-apps/6.13.1//kubernetes-model-apps-6.13.1.jar
-kubernetes-model-autoscaling/6.13.1//kubernetes-model-autoscaling-6.13.1.jar
-kubernetes-model-batch/6.13.1//kubernetes-model-batch-6.13.1.jar
-kubernetes-model-certificates/6.13.1//kubernetes-model-certificates-6.13.1.jar
-kubernetes-model-common/6.13.1//kubernetes-model-common-6.13.1.jar
-kubernetes-model-coordination/6.13.1//kubernetes-model-coordination-6.13.1.jar
-kubernetes-model-core/6.13.1//kubernetes-model-core-6.13.1.jar
-kubernetes-model-discovery/6.13.1//kubernetes-model-discovery-6.13.1.jar
-kubernetes-model-events/6.13.1//kubernetes-model-events-6.13.1.jar
-kubernetes-model-extensions/6.13.1//kubernetes-model-extensions-6.13.1.jar
-kubernetes-model-flowcontrol/6.13.1//kubernetes-model-flowcontrol-6.13.1.jar
-kubernetes-model-gatewayapi/6.13.1//kubernetes-model-gatewayapi-6.13.1.jar
-kubernetes-model-metrics/6.13.1//kubernetes-model-metrics-6.13.1.jar
-kubernetes-model-networking/6.13.1//kubernetes-model-networking-6.13.1.jar
-kubernetes-model-node/6.13.1//kubernetes-model-node-6.13.1.jar
-kubernetes-model-policy/6.13.1//kubernetes-model-policy-6.13.1.jar
-kubernetes-model-rbac/6.13.1//kubernetes-model-rbac-6.13.1.jar
-kubernetes-model-resource/6.13.1//kubernetes-model-resource-6.13.1.jar
-kubernetes-model-scheduling/6.13.1//kubernetes-model-scheduling-6.13.1.jar
-kubernetes-model-storageclass/6.13.1//kubernetes-model-storageclass-6.13.1.jar
+kubernetes-client-api/6.13.4//kubernetes-client-api-6.13.4.jar
+kubernetes-client/6.13.4//kubernetes-client-6.13.4.jar
+kubernetes-httpclient-okhttp/6.13.4//kubernetes-httpclient-okhttp-6.13.4.jar
+kubernetes-model-admissionregistration/6.13.4//kubernetes-model-admissionregistration-6.13.4.jar
+kubernetes-model-apiextensions/6.13.4//kubernetes-model-apiextensions-6.13.4.jar
+kubernetes-model-apps/6.13.4//kubernetes-model-apps-6.13.4.jar
+kubernetes-model-autoscaling/6.13.4//kubernetes-model-autoscaling-6.13.4.jar
+kubernetes-model-batch/6.13.4//kubernetes-model-batch-6.13.4.jar
+kubernetes-model-certificates/6.13.4//kubernetes-model-certificates-6.13.4.jar
+kubernetes-model-common/6.13.4//kubernetes-model-common-6.13.4.jar
+kubernetes-model-coordination/6.13.4//kubernetes-model-coordination-6.13.4.jar
+kubernetes-model-core/6.13.4//kubernetes-model-core-6.13.4.jar
+kubernetes-model-discovery/6.13.4//kubernetes-model-discovery-6.13.4.jar
+kubernetes-model-events/6.13.4//kubernetes-model-events-6.13.4.jar
+kubernetes-model-extensions/6.13.4//kubernetes-model-extensions-6.13.4.jar
+kubernetes-model-flowcontrol/6.13.4//kubernetes-model-flowcontrol-6.13.4.jar
+kubernetes-model-gatewayapi/6.13.4//kubernetes-model-gatewayapi-6.13.4.jar
+kubernetes-model-metrics/6.13.4//kubernetes-model-metrics-6.13.4.jar
+kubernetes-model-networking/6.13.4//kubernetes-model-networking-6.13.4.jar
+kubernetes-model-node/6.13.4//kubernetes-model-node-6.13.4.jar
+kubernetes-model-policy/6.13.4//kubernetes-model-policy-6.13.4.jar
+kubernetes-model-rbac/6.13.4//kubernetes-model-rbac-6.13.4.jar
+kubernetes-model-resource/6.13.4//kubernetes-model-resource-6.13.4.jar
+kubernetes-model-scheduling/6.13.4//kubernetes-model-scheduling-6.13.4.jar
+kubernetes-model-storageclass/6.13.4//kubernetes-model-storageclass-6.13.4.jar
 lapack/3.0.3//lapack-3.0.3.jar
 leveldbjni-all/1.8//leveldbjni-all-1.8.jar
 libfb303/0.9.3//libfb303-0.9.3.jar
 libthrift/0.16.0//libthrift-0.16.0.jar
-log4j-1.2-api/2.22.1//log4j-1.2-api-2.22.1.jar
-log4j-api/2.22.1//log4j-api-2.22.1.jar
-log4j-core/2.22.1//log4j-core-2.22.1.jar
-log4j-layout-template-json/2.22.1//log4j-layout-template-json-2.22.1.jar
-log4j-slf4j2-impl/2.22.1//log4j-slf4j2-impl-2.22.1.jar
+listenablefuture/9999.0-empty-to-avoid-conflict-with-guava//listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar
+log4j-1.2-api/2.24.2//log4j-1.2-api-2.24.2.jar
+log4j-api/2.24.2//log4j-api-2.24.2.jar
+log4j-core/2.24.2//log4j-core-2.24.2.jar
+log4j-layout-template-json/2.24.2//log4j-layout-template-json-2.24.2.jar
+log4j-slf4j2-impl/2.24.2//log4j-slf4j2-impl-2.24.2.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.8.0//lz4-java-1.8.0.jar
-metrics-core/4.2.26//metrics-core-4.2.26.jar
-metrics-graphite/4.2.26//metrics-graphite-4.2.26.jar
-metrics-jmx/4.2.26//metrics-jmx-4.2.26.jar
-metrics-json/4.2.26//metrics-json-4.2.26.jar
-metrics-jvm/4.2.26//metrics-jvm-4.2.26.jar
+metrics-core/4.2.29//metrics-core-4.2.29.jar
+metrics-graphite/4.2.29//metrics-graphite-4.2.29.jar
+metrics-jmx/4.2.29//metrics-jmx-4.2.29.jar
+metrics-json/4.2.29//metrics-json-4.2.29.jar
+metrics-jvm/4.2.29//metrics-jvm-4.2.29.jar
 minlog/1.3.0//minlog-1.3.0.jar
-netty-all/4.1.110.Final//netty-all-4.1.110.Final.jar
-netty-buffer/4.1.110.Final//netty-buffer-4.1.110.Final.jar
-netty-codec-http/4.1.110.Final//netty-codec-http-4.1.110.Final.jar
-netty-codec-http2/4.1.110.Final//netty-codec-http2-4.1.110.Final.jar
-netty-codec-socks/4.1.110.Final//netty-codec-socks-4.1.110.Final.jar
-netty-codec/4.1.110.Final//netty-codec-4.1.110.Final.jar
-netty-common/4.1.110.Final//netty-common-4.1.110.Final.jar
-netty-handler-proxy/4.1.110.Final//netty-handler-proxy-4.1.110.Final.jar
-netty-handler/4.1.110.Final//netty-handler-4.1.110.Final.jar
-netty-resolver/4.1.110.Final//netty-resolver-4.1.110.Final.jar
-netty-tcnative-boringssl-static/2.0.65.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-aarch_64.jar
-netty-tcnative-boringssl-static/2.0.65.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-linux-x86_64.jar
-netty-tcnative-boringssl-static/2.0.65.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-aarch_64.jar
-netty-tcnative-boringssl-static/2.0.65.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-osx-x86_64.jar
-netty-tcnative-boringssl-static/2.0.65.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.65.Final-windows-x86_64.jar
-netty-tcnative-classes/2.0.65.Final//netty-tcnative-classes-2.0.65.Final.jar
-netty-transport-classes-epoll/4.1.110.Final//netty-transport-classes-epoll-4.1.110.Final.jar
-netty-transport-classes-kqueue/4.1.110.Final//netty-transport-classes-kqueue-4.1.110.Final.jar
-netty-transport-native-epoll/4.1.110.Final/linux-aarch_64/netty-transport-native-epoll-4.1.110.Final-linux-aarch_64.jar
-netty-transport-native-epoll/4.1.110.Final/linux-riscv64/netty-transport-native-epoll-4.1.110.Final-linux-riscv64.jar
-netty-transport-native-epoll/4.1.110.Final/linux-x86_64/netty-transport-native-epoll-4.1.110.Final-linux-x86_64.jar
-netty-transport-native-kqueue/4.1.110.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.110.Final-osx-aarch_64.jar
-netty-transport-native-kqueue/4.1.110.Final/osx-x86_64/netty-transport-native-kqueue-4.1.110.Final-osx-x86_64.jar
-netty-transport-native-unix-common/4.1.110.Final//netty-transport-native-unix-common-4.1.110.Final.jar
-netty-transport/4.1.110.Final//netty-transport-4.1.110.Final.jar
+netty-all/4.1.114.Final//netty-all-4.1.114.Final.jar
+netty-buffer/4.1.114.Final//netty-buffer-4.1.114.Final.jar
+netty-codec-http/4.1.114.Final//netty-codec-http-4.1.114.Final.jar
+netty-codec-http2/4.1.114.Final//netty-codec-http2-4.1.114.Final.jar
+netty-codec-socks/4.1.114.Final//netty-codec-socks-4.1.114.Final.jar
+netty-codec/4.1.114.Final//netty-codec-4.1.114.Final.jar
+netty-common/4.1.114.Final//netty-common-4.1.114.Final.jar
+netty-handler-proxy/4.1.114.Final//netty-handler-proxy-4.1.114.Final.jar
+netty-handler/4.1.114.Final//netty-handler-4.1.114.Final.jar
+netty-resolver/4.1.114.Final//netty-resolver-4.1.114.Final.jar
+netty-tcnative-boringssl-static/2.0.69.Final/linux-aarch_64/netty-tcnative-boringssl-static-2.0.69.Final-linux-aarch_64.jar
+netty-tcnative-boringssl-static/2.0.69.Final/linux-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-linux-x86_64.jar
+netty-tcnative-boringssl-static/2.0.69.Final/osx-aarch_64/netty-tcnative-boringssl-static-2.0.69.Final-osx-aarch_64.jar
+netty-tcnative-boringssl-static/2.0.69.Final/osx-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-osx-x86_64.jar
+netty-tcnative-boringssl-static/2.0.69.Final/windows-x86_64/netty-tcnative-boringssl-static-2.0.69.Final-windows-x86_64.jar
+netty-tcnative-classes/2.0.69.Final//netty-tcnative-classes-2.0.69.Final.jar
+netty-transport-classes-epoll/4.1.114.Final//netty-transport-classes-epoll-4.1.114.Final.jar
+netty-transport-classes-kqueue/4.1.114.Final//netty-transport-classes-kqueue-4.1.114.Final.jar
+netty-transport-native-epoll/4.1.114.Final/linux-aarch_64/netty-transport-native-epoll-4.1.114.Final-linux-aarch_64.jar
+netty-transport-native-epoll/4.1.114.Final/linux-riscv64/netty-transport-native-epoll-4.1.114.Final-linux-riscv64.jar
+netty-transport-native-epoll/4.1.114.Final/linux-x86_64/netty-transport-native-epoll-4.1.114.Final-linux-x86_64.jar
+netty-transport-native-kqueue/4.1.114.Final/osx-aarch_64/netty-transport-native-kqueue-4.1.114.Final-osx-aarch_64.jar
+netty-transport-native-kqueue/4.1.114.Final/osx-x86_64/netty-transport-native-kqueue-4.1.114.Final-osx-x86_64.jar
+netty-transport-native-unix-common/4.1.114.Final//netty-transport-native-unix-common-4.1.114.Final.jar
+netty-transport/4.1.114.Final//netty-transport-4.1.114.Final.jar
 objenesis/3.3//objenesis-3.3.jar
 okhttp/3.12.12//okhttp-3.12.12.jar
-okio/1.15.0//okio-1.15.0.jar
+okio/1.17.6//okio-1.17.6.jar
 opencsv/2.3//opencsv-2.3.jar
 opentracing-api/0.33.0//opentracing-api-0.33.0.jar
 opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar
 opentracing-util/0.33.0//opentracing-util-0.33.0.jar
-orc-core/2.0.1/shaded-protobuf/orc-core-2.0.1-shaded-protobuf.jar
+orc-core/2.0.3/shaded-protobuf/orc-core-2.0.3-shaded-protobuf.jar
 orc-format/1.0.0/shaded-protobuf/orc-format-1.0.0-shaded-protobuf.jar
-orc-mapreduce/2.0.1/shaded-protobuf/orc-mapreduce-2.0.1-shaded-protobuf.jar
-orc-shims/2.0.1//orc-shims-2.0.1.jar
+orc-mapreduce/2.0.3/shaded-protobuf/orc-mapreduce-2.0.3-shaded-protobuf.jar
+orc-shims/2.0.3//orc-shims-2.0.3.jar
 oro/2.0.8//oro-2.0.8.jar
 osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar
 paranamer/2.8//paranamer-2.8.jar
-parquet-column/1.14.1//parquet-column-1.14.1.jar
-parquet-common/1.14.1//parquet-common-1.14.1.jar
-parquet-encoding/1.14.1//parquet-encoding-1.14.1.jar
-parquet-format-structures/1.14.1//parquet-format-structures-1.14.1.jar
-parquet-hadoop/1.14.1//parquet-hadoop-1.14.1.jar
-parquet-jackson/1.14.1//parquet-jackson-1.14.1.jar
+parquet-column/1.14.4//parquet-column-1.14.4.jar
+parquet-common/1.14.4//parquet-common-1.14.4.jar
+parquet-encoding/1.14.4//parquet-encoding-1.14.4.jar
+parquet-format-structures/1.14.4//parquet-format-structures-1.14.4.jar
+parquet-hadoop/1.14.4//parquet-hadoop-1.14.4.jar
+parquet-jackson/1.14.4//parquet-jackson-1.14.4.jar
 pickle/1.5//pickle-1.5.jar
 py4j/0.10.9.7//py4j-0.10.9.7.jar
 remotetea-oncrpc/1.1.2//remotetea-oncrpc-1.1.2.jar
-rocksdbjni/9.2.1//rocksdbjni-9.2.1.jar
+rocksdbjni/9.7.3//rocksdbjni-9.7.3.jar
 scala-collection-compat_2.13/2.7.0//scala-collection-compat_2.13-2.7.0.jar
-scala-compiler/2.13.14//scala-compiler-2.13.14.jar
-scala-library/2.13.14//scala-library-2.13.14.jar
+scala-compiler/2.13.15//scala-compiler-2.13.15.jar
+scala-library/2.13.15//scala-library-2.13.15.jar
 scala-parallel-collections_2.13/1.0.4//scala-parallel-collections_2.13-1.0.4.jar
 scala-parser-combinators_2.13/2.4.0//scala-parser-combinators_2.13-2.4.0.jar
-scala-reflect/2.13.14//scala-reflect-2.13.14.jar
+scala-reflect/2.13.15//scala-reflect-2.13.15.jar
 scala-xml_2.13/2.3.0//scala-xml_2.13-2.3.0.jar
-slf4j-api/2.0.13//slf4j-api-2.0.13.jar
+slf4j-api/2.0.16//slf4j-api-2.0.16.jar
 snakeyaml-engine/2.7//snakeyaml-engine-2.7.jar
-snakeyaml/2.2//snakeyaml-2.2.jar
-snappy-java/1.1.10.5//snappy-java-1.1.10.5.jar
+snakeyaml/2.3//snakeyaml-2.3.jar
+snappy-java/1.1.10.7//snappy-java-1.1.10.7.jar
 spire-macros_2.13/0.18.0//spire-macros_2.13-0.18.0.jar
 spire-platform_2.13/0.18.0//spire-platform_2.13-0.18.0.jar
 spire-util_2.13/0.18.0//spire-util_2.13-0.18.0.jar
@@ -264,15 +270,14 @@ stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.8//stream-2.9.8.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
 threeten-extra/1.7.1//threeten-extra-1.7.1.jar
-tink/1.13.0//tink-1.13.0.jar
+tink/1.15.0//tink-1.15.0.jar
 transaction-api/1.1//transaction-api-1.1.jar
-txw2/3.0.2//txw2-3.0.2.jar
 univocity-parsers/2.9.1//univocity-parsers-2.9.1.jar
 wildfly-openssl/1.1.3.Final//wildfly-openssl-1.1.3.Final.jar
-xbean-asm9-shaded/4.25//xbean-asm9-shaded-4.25.jar
+xbean-asm9-shaded/4.26//xbean-asm9-shaded-4.26.jar
 xmlschema-core/2.3.1//xmlschema-core-2.3.1.jar
-xz/1.9//xz-1.9.jar
+xz/1.10//xz-1.10.jar
 zjsonpatch/0.3.0//zjsonpatch-0.3.0.jar
-zookeeper-jute/3.9.2//zookeeper-jute-3.9.2.jar
-zookeeper/3.9.2//zookeeper-3.9.2.jar
-zstd-jni/1.5.6-4//zstd-jni-1.5.6-4.jar
+zookeeper-jute/3.9.3//zookeeper-jute-3.9.3.jar
+zookeeper/3.9.3//zookeeper-3.9.3.jar
+zstd-jni/1.5.6-8//zstd-jni-1.5.6-8.jar
diff --git a/dev/gen-protos.sh b/dev/gen-protos.sh
new file mode 100755
index 0000000000000..d169964feb853
--- /dev/null
+++ b/dev/gen-protos.sh
@@ -0,0 +1,127 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -ex
+
+SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
+cd "$SPARK_HOME"
+
+OUTPUT_PATH=""
+MODULE=""
+SOURCE_MODULE=""
+TARGET_MODULE=""
+
+function usage() {
+  echo "Illegal number of parameters."
+  echo "Usage:./dev/gen-protos.sh [connect|streaming] [output_path]"
+  exit -1
+}
+
+if [[ $# -lt 1 || $# -gt 2 ]]; then
+  usage
+fi
+
+if [[ $1 == "connect" ]]; then
+  MODULE="connect"
+  OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/connect/proto/
+  SOURCE_MODULE="spark.connect"
+  TARGET_MODULE="pyspark.sql.connect.proto"
+elif [[ $1 == "streaming" ]]; then
+  MODULE="streaming"
+  OUTPUT_PATH=${SPARK_HOME}/python/pyspark/sql/streaming/proto/
+  SOURCE_MODULE="org.apache.spark.sql.execution.streaming"
+  TARGET_MODULE="pyspark.sql.streaming.proto"
+else
+  usage
+fi
+
+if [[ $# -eq 2 ]]; then
+  rm -Rf $2
+  mkdir -p $2
+  OUTPUT_PATH=$2
+fi
+
+if [[ $MODULE == "connect" ]]; then
+  pushd sql/connect/common/src/main
+elif [[ $MODULE == "streaming" ]]; then
+  pushd sql/core/src/main
+fi
+
+LICENSE=$(cat <<'EOF'
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+EOF)
+echo "$LICENSE" > /tmp/tmp_licence
+
+# Delete the old generated protobuf files.
+rm -Rf gen
+
+# Now, regenerate the new files
+buf generate --debug -vvv
+
+# We need to edit the generate python files to account for the actual package location and not
+# the one generated by proto.
+for f in `find gen/proto/python -name "*.py*"`; do
+  # First fix the imports.
+  if [[ $f == *_pb2.py || $f == *_pb2_grpc.py ]]; then
+    sed -e "s/from ${SOURCE_MODULE} import/from ${TARGET_MODULE} import/g" $f > $f.tmp
+    mv $f.tmp $f
+    # Now fix the module name in the serialized descriptor.
+    sed -e "s/DESCRIPTOR, '${SOURCE_MODULE}/DESCRIPTOR, '${TARGET_MODULE}/g" $f > $f.tmp
+    mv $f.tmp $f
+  elif [[ $f == *.pyi ]]; then
+    sed -e "s/import ${SOURCE_MODULE}./import ${TARGET_MODULE}./g" -e "s/${SOURCE_MODULE}./${TARGET_MODULE}./g" -e '/ *@typing_extensions\.final/d' $f > $f.tmp
+    mv $f.tmp $f
+  fi
+
+  # Prepend the Apache licence header to the files.
+  cp $f $f.bak
+  cat /tmp/tmp_licence $f.bak > $f
+
+  LC=$(wc -l < $f)
+  echo $LC
+  if [[ $f == *_grpc.py && $LC -eq 20 ]]; then
+    rm $f
+  fi
+  rm $f.bak
+done
+
+black --config $SPARK_HOME/dev/pyproject.toml gen/proto/python
+
+# Last step copy the result files to the destination module.
+for f in `find gen/proto/python -name "*.py*"`; do
+  cp $f $OUTPUT_PATH
+done
+
+# Clean up everything.
+rm -Rf gen
diff --git a/dev/infra/Dockerfile b/dev/infra/Dockerfile
index 6ba9be87552ab..2817818cbc4e3 100644
--- a/dev/infra/Dockerfile
+++ b/dev/infra/Dockerfile
@@ -17,9 +17,14 @@
 
 # Image for building and testing Spark branches. Based on Ubuntu 22.04.
 # See also in https://hub.docker.com/_/ubuntu
-FROM ubuntu:jammy-20240227
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
 
-ENV FULL_REFRESH_DATE 20240318
+ENV FULL_REFRESH_DATE 20241119
 
 ENV DEBIAN_FRONTEND noninteractive
 ENV DEBCONF_NONINTERACTIVE_SEEN true
@@ -81,17 +86,17 @@ ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library
 
 
 RUN add-apt-repository ppa:pypy/ppa
-RUN mkdir -p /usr/local/pypy/pypy3.9 && \
-    curl -sqL https://downloads.python.org/pypy/pypy3.9-v7.3.16-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.9 --strip-components=1 && \
-    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3.8 && \
-    ln -sf /usr/local/pypy/pypy3.9/bin/pypy /usr/local/bin/pypy3
+RUN mkdir -p /usr/local/pypy/pypy3.10 && \
+    curl -sqL https://downloads.python.org/pypy/pypy3.10-v7.3.17-linux64.tar.bz2 | tar xjf - -C /usr/local/pypy/pypy3.10 --strip-components=1 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3.10 && \
+    ln -sf /usr/local/pypy/pypy3.10/bin/pypy /usr/local/bin/pypy3
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | pypy3
-RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.2' scipy coverage matplotlib lxml
+RUN pypy3 -m pip install numpy 'six==1.16.0' 'pandas==2.2.3' scipy coverage matplotlib lxml
 
 
-ARG BASIC_PIP_PKGS="numpy pyarrow>=15.0.0 six==1.16.0 pandas==2.2.2 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
+ARG BASIC_PIP_PKGS="numpy pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy plotly>=4.8 mlflow>=2.8.1 coverage matplotlib openpyxl memory-profiler>=0.61.0 scikit-learn>=1.3.2"
 # Python deps for Spark Connect
-ARG CONNECT_PIP_PKGS="grpcio==1.62.0 grpcio-status==1.62.0 protobuf==4.25.1 googleapis-common-protos==1.56.4 graphviz==0.20.3"
+ARG CONNECT_PIP_PKGS="grpcio==1.67.0 grpcio-status==1.67.0 protobuf==5.28.3 googleapis-common-protos==1.65.0 graphviz==0.20.3"
 
 # Install Python 3.10 packages
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10
@@ -130,14 +135,23 @@ RUN apt-get update && apt-get install -y \
     python3.12 \
     && rm -rf /var/lib/apt/lists/*
 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.12
-# TODO(SPARK-46647) Add unittest-xml-reporting into Python 3.12 image when it supports Python 3.12
 RUN python3.12 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
-RUN python3.12 -m pip install $BASIC_PIP_PKGS $CONNECT_PIP_PKGS lxml && \
+RUN python3.12 -m pip install $BASIC_PIP_PKGS unittest-xml-reporting $CONNECT_PIP_PKGS lxml && \
     python3.12 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu && \
     python3.12 -m pip install torcheval && \
     python3.12 -m pip cache purge
 
+# Install Python 3.13 at the last stage to avoid breaking the existing Python installations
+RUN apt-get update && apt-get install -y \
+    python3.13 \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.13
+# TODO(SPARK-49862) Add BASIC_PIP_PKGS and CONNECT_PIP_PKGS to Python 3.13 image when it supports Python 3.13
+RUN python3.13 -m pip install --ignore-installed blinker>=1.6.2 # mlflow needs this
+RUN python3.13 -m pip install numpy>=2.1 pyarrow>=18.0.0 six==1.16.0 pandas==2.2.3 scipy coverage matplotlib openpyxl grpcio==1.67.0 grpcio-status==1.67.0 lxml jinja2 && \
+    python3.13 -m pip cache purge
+
 # Remove unused installation packages to free up disk space
-RUN apt-get remove --purge -y 'gfortran-11' 'humanity-icon-theme' 'nodejs-doc' || true
+RUN apt-get remove --purge -y 'humanity-icon-theme' 'nodejs-doc'
 RUN apt-get autoremove --purge -y
 RUN apt-get clean
diff --git a/dev/lint-scala b/dev/lint-scala
index d44eb02ef9094..23df146a8d1b4 100755
--- a/dev/lint-scala
+++ b/dev/lint-scala
@@ -29,16 +29,17 @@ ERRORS=$(./build/mvn \
     -Dscalafmt.skip=false \
     -Dscalafmt.validateOnly=true \
     -Dscalafmt.changedOnly=false \
-    -pl connect/common \
-    -pl connect/server \
+    -pl sql/api \
+    -pl sql/connect/common \
+    -pl sql/connect/server \
     -pl connector/connect/client/jvm \
     2>&1 | grep -e "Unformatted files found" \
 )
 
 if test ! -z "$ERRORS"; then
-  echo -e "The scalafmt check failed on connect or connector/connect at following occurrences:\n\n$ERRORS\n"
+  echo -e "The scalafmt check failed on sql/connect or connector/connect at following occurrences:\n\n$ERRORS\n"
   echo "Before submitting your change, please make sure to format your code using the following command:"
-  echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl connect/common -pl connect/server -pl connector/connect/client/jvm"
+  echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl sql/api -pl sql/connect/common -pl sql/connect/server -pl connector/connect/client/jvm"
   exit 1
 else
   echo -e "Scalafmt checks passed."
diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh
index dd60231fa83eb..53c317a39ea1e 100755
--- a/dev/make-distribution.sh
+++ b/dev/make-distribution.sh
@@ -187,7 +187,7 @@ echo "Spark $VERSION$GITREVSTRING built for Hadoop $SPARK_HADOOP_VERSION" > "$DI
 echo "Build flags: $@" >> "$DISTDIR/RELEASE"
 
 # Copy jars
-cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
+cp -r "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/"
 
 # Only create the hive-jackson directory if they exist.
 if [ -f "$DISTDIR"/jars/jackson-core-asl-1.9.13.jar ]; then
diff --git a/dev/protobuf-breaking-changes-check.sh b/dev/protobuf-breaking-changes-check.sh
index cb0a508f62ec8..327e54be63e62 100755
--- a/dev/protobuf-breaking-changes-check.sh
+++ b/dev/protobuf-breaking-changes-check.sh
@@ -21,7 +21,7 @@ set -ex
 if [[ $# -gt 1 ]]; then
   echo "Illegal number of parameters."
   echo "Usage: ./dev/protobuf-breaking-changes-check.sh [branch]"
-  echo "the default branch is 'master', available options are 'master', 'branch-3.4', etc"
+  echo "the default branch is 'master'"
   exit -1
 fi
 
@@ -33,7 +33,7 @@ if [[ $# -eq 1 ]]; then
   BRANCH=$1
 fi
 
-pushd connect/common/src/main &&
+pushd sql/connect/common/src/main &&
 echo "Start protobuf breaking changes checking against $BRANCH" &&
 buf breaking --against "https://github.com/apache/spark.git#branch=$BRANCH,subdir=connector/connect/common/src/main" &&
 echo "Finsh protobuf breaking changes checking: SUCCESS"
diff --git a/connector/docker/spark-test/worker/default_cmd b/dev/py-cleanup
similarity index 75%
rename from connector/docker/spark-test/worker/default_cmd
rename to dev/py-cleanup
index 1f2aac95ed699..6a2edd1040171 100755
--- a/connector/docker/spark-test/worker/default_cmd
+++ b/dev/py-cleanup
@@ -17,9 +17,15 @@
 # limitations under the License.
 #
 
-IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
-echo "CONTAINER_IP=$IP"
-export SPARK_LOCAL_IP=$IP
-export SPARK_PUBLIC_DNS=$IP
+# Utility for temporary files cleanup in 'python'.
+#   usage: ./dev/py-cleanup
 
-/opt/spark/bin/spark-class org.apache.spark.deploy.worker.Worker $1
+set -ex
+
+SPARK_HOME="$(cd "`dirname $0`"/..; pwd)"
+cd "$SPARK_HOME"
+
+rm -rf python/target
+rm -rf python/lib/pyspark.zip
+rm -rf python/docs/build
+rm -rf python/docs/source/reference/*/api
diff --git a/dev/requirements.txt b/dev/requirements.txt
index e0216a63ba790..a9874f77113ab 100644
--- a/dev/requirements.txt
+++ b/dev/requirements.txt
@@ -3,11 +3,11 @@ py4j>=0.10.9.7
 
 # PySpark dependencies (optional)
 numpy>=1.21
-pyarrow>=4.0.0
+pyarrow>=10.0.0
 six==1.16.0
-pandas>=1.4.4
+pandas>=2.0.0
 scipy
-plotly
+plotly>=4.8
 mlflow>=2.3.1
 scikit-learn
 matplotlib
@@ -58,9 +58,10 @@ black==23.9.1
 py
 
 # Spark Connect (required)
-grpcio>=1.62.0
-grpcio-status>=1.62.0
-googleapis-common-protos>=1.56.4
+grpcio>=1.67.0
+grpcio-status>=1.67.0
+googleapis-common-protos>=1.65.0
+protobuf==5.28.3
 
 # Spark Connect python proto generation plugin (optional)
 mypy-protobuf==3.3.0
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
deleted file mode 100755
index aa82b28e38217..0000000000000
--- a/dev/run-tests-jenkins.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python3
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import os
-import sys
-import json
-import functools
-import subprocess
-from urllib.request import urlopen
-from urllib.request import Request
-from urllib.error import HTTPError, URLError
-
-from sparktestsupport import SPARK_HOME, ERROR_CODES
-from sparktestsupport.shellutils import run_cmd
-
-
-def print_err(msg):
-    """
-    Given a set of arguments, will print them to the STDERR stream
-    """
-    print(msg, file=sys.stderr)
-
-
-def post_message_to_github(msg, ghprb_pull_id):
-    print("Attempting to post to GitHub...")
-
-    api_url = os.getenv("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark")
-    url = api_url + "/issues/" + ghprb_pull_id + "/comments"
-    github_oauth_key = os.environ["GITHUB_OAUTH_KEY"]
-
-    posted_message = json.dumps({"body": msg})
-    request = Request(
-        url,
-        headers={
-            "Authorization": "token %s" % github_oauth_key,
-            "Content-Type": "application/json",
-        },
-        data=posted_message.encode("utf-8"),
-    )
-    try:
-        response = urlopen(request)
-
-        if response.getcode() == 201:
-            print(" > Post successful.")
-    except HTTPError as http_e:
-        print_err("Failed to post message to GitHub.")
-        print_err(" > http_code: %s" % http_e.code)
-        print_err(" > api_response: %s" % http_e.read())
-        print_err(" > data: %s" % posted_message)
-    except URLError as url_e:
-        print_err("Failed to post message to GitHub.")
-        print_err(" > urllib_status: %s" % url_e.reason[1])
-        print_err(" > data: %s" % posted_message)
-
-
-def pr_message(
-    build_display_name, build_url, ghprb_pull_id, short_commit_hash, commit_url, msg, post_msg=""
-):
-    # align the arguments properly for string formatting
-    str_args = (
-        build_display_name,
-        msg,
-        build_url,
-        ghprb_pull_id,
-        short_commit_hash,
-        commit_url,
-        str(" " + post_msg + ".") if post_msg else ".",
-    )
-    return "**[Test build %s %s](%stestReport)** for PR %s at commit [`%s`](%s)%s" % str_args
-
-
-def run_pr_checks(pr_tests, ghprb_actual_commit, sha1):
-    """
-    Executes a set of pull request checks to ease development and report issues with various
-    components such as style, linting, dependencies, compatibilities, etc.
-    @return a list of messages to post back to GitHub
-    """
-    # Ensure we save off the current HEAD to revert to
-    current_pr_head = run_cmd(["git", "rev-parse", "HEAD"], return_output=True).strip()
-    pr_results = list()
-
-    for pr_test in pr_tests:
-        test_name = pr_test + ".sh"
-        pr_results.append(
-            run_cmd(
-                [
-                    "bash",
-                    os.path.join(SPARK_HOME, "dev", "tests", test_name),
-                    ghprb_actual_commit,
-                    sha1,
-                ],
-                return_output=True,
-            ).rstrip()
-        )
-        # Ensure, after each test, that we're back on the current PR
-        run_cmd(["git", "checkout", "-f", current_pr_head])
-    return pr_results
-
-
-def run_tests(tests_timeout):
-    """
-    Runs the `dev/run-tests` script and responds with the correct error message
-    under the various failure scenarios.
-    @return a tuple containing the test result code and the result note to post to GitHub
-    """
-
-    test_result_code = subprocess.Popen(
-        ["timeout", tests_timeout, os.path.join(SPARK_HOME, "dev", "run-tests")]
-    ).wait()
-
-    failure_note_by_errcode = {
-        # error to denote run-tests script failures:
-        1: "executing the `dev/run-tests` script",
-        ERROR_CODES["BLOCK_GENERAL"]: "some tests",
-        ERROR_CODES["BLOCK_RAT"]: "RAT tests",
-        ERROR_CODES["BLOCK_SCALA_STYLE"]: "Scala style tests",
-        ERROR_CODES["BLOCK_JAVA_STYLE"]: "Java style tests",
-        ERROR_CODES["BLOCK_PYTHON_STYLE"]: "Python style tests",
-        ERROR_CODES["BLOCK_R_STYLE"]: "R style tests",
-        ERROR_CODES["BLOCK_DOCUMENTATION"]: "to generate documentation",
-        ERROR_CODES["BLOCK_BUILD"]: "to build",
-        ERROR_CODES["BLOCK_BUILD_TESTS"]: "build dependency tests",
-        ERROR_CODES["BLOCK_MIMA"]: "MiMa tests",
-        ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: "Spark unit tests",
-        ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: "PySpark unit tests",
-        ERROR_CODES["BLOCK_PYSPARK_PIP_TESTS"]: "PySpark pip packaging tests",
-        ERROR_CODES["BLOCK_SPARKR_UNIT_TESTS"]: "SparkR unit tests",
-        ERROR_CODES["BLOCK_TIMEOUT"]: "from timeout after a configured wait of `%s`"
-        % (tests_timeout),
-    }
-
-    if test_result_code == 0:
-        test_result_note = " * This patch passes all tests."
-    else:
-        note = failure_note_by_errcode.get(
-            test_result_code, "due to an unknown error code, %s" % test_result_code
-        )
-        test_result_note = " * This patch **fails %s**." % note
-
-    return [test_result_code, test_result_note]
-
-
-def main():
-    # Important Environment Variables
-    # ---
-    # $ghprbActualCommit
-    #   This is the hash of the most recent commit in the PR.
-    #   The merge-base of this and master is the commit from which the PR was branched.
-    # $sha1
-    #   If the patch merges cleanly, this is a reference to the merge commit hash
-    #     (e.g. "origin/pr/2606/merge").
-    #   If the patch does not merge cleanly, it is equal to $ghprbActualCommit.
-    #   The merge-base of this and master in the case of a clean merge is the most recent commit
-    #     against master.
-    ghprb_pull_id = os.environ["ghprbPullId"]
-    ghprb_actual_commit = os.environ["ghprbActualCommit"]
-    ghprb_pull_title = os.environ["ghprbPullTitle"].lower()
-    sha1 = os.environ["sha1"]
-
-    # Marks this build as a pull request build.
-    os.environ["SPARK_JENKINS_PRB"] = "true"
-    # Switch to a Maven-based build if the PR title contains "test-maven":
-    if "test-maven" in ghprb_pull_title:
-        os.environ["SPARK_JENKINS_BUILD_TOOL"] = "maven"
-    if "test-hadoop3" in ghprb_pull_title:
-        os.environ["SPARK_JENKINS_BUILD_PROFILE"] = "hadoop3"
-    # Switch the Scala profile based on the PR title:
-    if "test-scala2.13" in ghprb_pull_title:
-        os.environ["SPARK_JENKINS_BUILD_SCALA_PROFILE"] = "scala2.13"
-
-    build_display_name = os.environ["BUILD_DISPLAY_NAME"]
-    build_url = os.environ["BUILD_URL"]
-
-    project_url = os.getenv("SPARK_PROJECT_URL", "https://github.com/apache/spark")
-    commit_url = project_url + "/commit/" + ghprb_actual_commit
-
-    # GitHub doesn't auto-link short hashes when submitted via the API, unfortunately. :(
-    short_commit_hash = ghprb_actual_commit[0:7]
-
-    # format: http://linux.die.net/man/1/timeout
-    # must be less than the timeout configured on Jenkins. Usually Jenkins's timeout is higher
-    # then this. Please consult with the build manager or a committer when it should be increased.
-    tests_timeout = "500m"
-
-    # Array to capture all test names to run on the pull request. These tests are represented
-    # by their file equivalents in the dev/tests/ directory.
-    #
-    # To write a PR test:
-    #   * the file must reside within the dev/tests directory
-    #   * be an executable bash script
-    #   * accept three arguments on the command line, the first being the GitHub PR long commit
-    #     hash, the second the GitHub SHA1 hash, and the final the current PR hash
-    #   * and, lastly, return string output to be included in the pr message output that will
-    #     be posted to GitHub
-    pr_tests = ["pr_merge_ability", "pr_public_classes"]
-
-    # `bind_message_base` returns a function to generate messages for GitHub posting
-    github_message = functools.partial(
-        pr_message, build_display_name, build_url, ghprb_pull_id, short_commit_hash, commit_url
-    )
-
-    # post start message
-    post_message_to_github(github_message("has started"), ghprb_pull_id)
-
-    pr_check_results = run_pr_checks(pr_tests, ghprb_actual_commit, sha1)
-
-    test_result_code, test_result_note = run_tests(tests_timeout)
-
-    # post end message
-    result_message = github_message("has finished")
-    result_message += "\n" + test_result_note + "\n"
-    result_message += "\n".join(pr_check_results)
-
-    post_message_to_github(result_message, ghprb_pull_id)
-
-    sys.exit(test_result_code)
-
-
-if __name__ == "__main__":
-    main()
diff --git a/dev/spark-test-image-util/docs/build-docs b/dev/spark-test-image-util/docs/build-docs
new file mode 100644
index 0000000000000..6ff9c7cd9455c
--- /dev/null
+++ b/dev/spark-test-image-util/docs/build-docs
@@ -0,0 +1,71 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+if ! [ -x "$(command -v docker)" ]; then
+  echo "Error: Docker is not installed." >&2
+  exit 1
+fi
+
+DOCKER_CACHE_IMG="ghcr.io/apache/spark/apache-spark-github-action-image-docs-cache:master"
+REPO_OWNER="apache/spark"
+REPOSITORY="apache-spark-ci-image-docs"
+IMG_TAG=$(date +%s)
+IMG_NAME="${REPOSITORY}:${IMG_TAG}"
+IMG_URL="$REPO_OWNER/$IMG_NAME"
+DOCKER_MOUNT_SPARK_HOME="/__w/spark/spark"
+BUILD_DOCS_SCRIPT_PATH="${DOCKER_MOUNT_SPARK_HOME}/dev/spark-test-image-util/docs/run-in-container"
+
+FWDIR="$(cd "`dirname "${BASH_SOURCE[0]}"`"; pwd)"
+SPARK_HOME="$(cd "`dirname "${BASH_SOURCE[0]}"`"/../../..; pwd)"
+
+# 1.Compile spark outside the container to prepare for generating documents inside the container.
+build/sbt -Phive -Pkinesis-asl clean unidoc package
+
+# 2.Build container image.
+docker buildx build \
+  --cache-from type=registry,ref="${DOCKER_CACHE_IMG}" \
+  --tag "${IMG_URL}" "${FWDIR}" \
+  --file "${SPARK_HOME}/dev/spark-test-image/docs/Dockerfile"
+
+# 3.Build docs on container: `error docs`, `scala doc`, `python doc`, `sql doc`.
+docker run \
+  --mount type=bind,source="${SPARK_HOME}",target="${DOCKER_MOUNT_SPARK_HOME}" \
+  --interactive --tty "${IMG_URL}" \
+  /bin/bash -c "sh ${BUILD_DOCS_SCRIPT_PATH}"
+
+# 4.Build docs on host: `r doc`.
+#
+# Why does `r` document need to be compiled outside the container?
+# Because when compiling inside the container, the permission of the directory
+# `/__w/spark/spark/R/pkg/docs` automatically generated by `RScript` is `dr-xr--r-x`,
+# and when writing to subsequent files, will throw an error as:
+# `! [EACCES] Failed to copy '/usr/local/lib/R/site-library/pkgdown/BS5/assets/katex-auto.js'
+# to '/__w/spark/spark/R/pkg/docs/katex-auto.js': permission denied`
+export SKIP_ERRORDOC=1
+export SKIP_SCALADOC=1
+export SKIP_PYTHONDOC=1
+export SKIP_SQLDOC=1
+cd docs
+bundle exec jekyll build
+
+# 5.Remove container image.
+IMG_ID=$(docker images | grep "${IMG_TAG}" | awk '{print $3}')
+docker image rm --force "${IMG_ID}"
+
+echo "Build doc done."
diff --git a/dev/run-tests-jenkins b/dev/spark-test-image-util/docs/run-in-container
old mode 100755
new mode 100644
similarity index 51%
rename from dev/run-tests-jenkins
rename to dev/spark-test-image-util/docs/run-in-container
index c5bf160380b56..1d43c602f7c72
--- a/dev/run-tests-jenkins
+++ b/dev/spark-test-image-util/docs/run-in-container
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -17,21 +15,23 @@
 # limitations under the License.
 #
 
-# Wrapper script that runs the Spark tests then reports QA results
-# to github via its API.
-# Environment variables are populated by the code here:
-# https://github.com/jenkinsci/ghprb-plugin/blob/master/src/main/java/org/jenkinsci/plugins/ghprb/GhprbTrigger.java#L139
-
-FWDIR="$( cd "$( dirname "$0" )/.." && pwd )"
-cd "$FWDIR"
+# 1.Set env variable.
+export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-arm64
+export PATH=$JAVA_HOME/bin:$PATH
+export SPARK_DOCS_IS_BUILT_ON_HOST=1
+# We expect to compile the R document on the host.
+export SKIP_RDOC=1
 
-export PATH=/home/anaconda/envs/py36/bin:$PATH
-export LANG="en_US.UTF-8"
+# 2.Install bundler.
+gem install bundler -v 2.4.22
+cd /__w/spark/spark/docs
+bundle install
 
-PYTHON_VERSION_CHECK=$(python3 -c 'import sys; print(sys.version_info < (3, 8, 0))')
-if [[ "$PYTHON_VERSION_CHECK" == "True" ]]; then
-  echo "Python versions prior to 3.8 are not supported."
-  exit -1
-fi
+# 3.Build docs, includes: `error docs`, `scala doc`, `python doc`, `sql doc`, excludes: `r doc`.
+# We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages.
+ln -s "$(which python3.9)" "/usr/local/bin/python3"
 
-exec python3 -u ./dev/run-tests-jenkins.py "$@"
+# Build docs first with SKIP_API to ensure they are buildable without requiring any
+# language docs to be built beforehand.
+cd /__w/spark/spark/docs
+bundle exec jekyll build
diff --git a/dev/spark-test-image/docs/Dockerfile b/dev/spark-test-image/docs/Dockerfile
new file mode 100644
index 0000000000000..2db7e0717cdfd
--- /dev/null
+++ b/dev/spark-test-image/docs/Dockerfile
@@ -0,0 +1,91 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for Documentation"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE 20241029
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    nodejs \
+    npm \
+    openjdk-17-jdk-headless \
+    pandoc \
+    pkg-config \
+    qpdf \
+    r-base \
+    ruby \
+    ruby-dev \
+    software-properties-common \
+    wget \
+    zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+
+# See more in SPARK-39959, roxygen2 < 7.2.1
+RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 'rmarkdown', 'testthat'), repos='https://cloud.r-project.org/')" && \
+    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
+
+# See more in SPARK-39735
+ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y python3.9 python3.9-distutils \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+
+# Should unpin 'sphinxcontrib-*' after upgrading sphinx>5
+# See 'ipython_genutils' in SPARK-38517
+# See 'docutils<0.18.0' in SPARK-39421
+RUN python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \
+  ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly>=4.8' 'docutils<0.18.0' \
+  'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.9.1' \
+  'pandas-stubs==1.2.0.53' 'grpcio==1.62.0' 'grpcio-status==1.62.0' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \
+  'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' \
+  && python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/lint/Dockerfile b/dev/spark-test-image/lint/Dockerfile
new file mode 100644
index 0000000000000..f9ea3124291b1
--- /dev/null
+++ b/dev/spark-test-image/lint/Dockerfile
@@ -0,0 +1,96 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for Linter"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE 20241112
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    libpng-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    nodejs \
+    npm \
+    pkg-config \
+    qpdf \
+    r-base \
+    software-properties-common \
+    wget \
+    zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown', 'rmarkdown', 'testthat'), repos='https://cloud.r-project.org/')" \
+    && Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" \
+    && Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')" \
+    && Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" \
+
+# See more in SPARK-39735
+ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
+
+# Install Python 3.9
+RUN add-apt-repository ppa:deadsnakes/ppa
+RUN apt-get update && apt-get install -y python3.9 python3.9-distutils \
+    && rm -rf /var/lib/apt/lists/*
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.9
+
+RUN python3.9 -m pip install \
+    'black==23.9.1' \
+    'flake8==3.9.0' \
+    'googleapis-common-protos-stubs==2.2.0' \
+    'grpc-stubs==1.24.11' \
+    'grpcio-status==1.67.0' \
+    'grpcio==1.67.0' \
+    'ipython' \
+    'ipython_genutils' \
+    'jinja2' \
+    'matplotlib' \
+    'mypy==1.8.0' \
+    'numpy==2.0.2' \
+    'numpydoc' \
+    'pandas' \
+    'pandas-stubs==1.2.0.53' \
+    'plotly>=4.8' \
+    'pyarrow>=18.0.0' \
+    'pytest-mypy-plugins==1.9.3' \
+    'pytest==7.1.3' \
+    && python3.9 -m pip install torch torchvision --index-url https://download.pytorch.org/whl/cpu \
+    && python3.9 -m pip install torcheval \
+    && python3.9 -m pip cache purge
diff --git a/dev/spark-test-image/sparkr/Dockerfile b/dev/spark-test-image/sparkr/Dockerfile
new file mode 100644
index 0000000000000..43260c714a550
--- /dev/null
+++ b/dev/spark-test-image/sparkr/Dockerfile
@@ -0,0 +1,77 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Image for building and testing Spark branches. Based on Ubuntu 22.04.
+# See also in https://hub.docker.com/_/ubuntu
+FROM ubuntu:jammy-20240911.1
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Infra Image for SparkR"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
+
+ENV FULL_REFRESH_DATE 20241114
+
+ENV DEBIAN_FRONTEND noninteractive
+ENV DEBCONF_NONINTERACTIVE_SEEN true
+
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    ca-certificates \
+    curl \
+    gfortran \
+    git \
+    gnupg \
+    libcurl4-openssl-dev \
+    libfontconfig1-dev \
+    libfreetype6-dev \
+    libfribidi-dev \
+    libgit2-dev \
+    libharfbuzz-dev \
+    libjpeg-dev \
+    liblapack-dev \
+    libopenblas-dev \
+    libpng-dev \
+    libpython3-dev \
+    libssl-dev \
+    libtiff5-dev \
+    libxml2-dev \
+    pandoc \
+    pkg-config \
+    qpdf \
+    r-base \
+    software-properties-common \
+    wget \
+    zlib1g-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN echo 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/' >> /etc/apt/sources.list
+RUN gpg --keyserver hkps://keyserver.ubuntu.com --recv-key E298A3A825C0D65DFD57CBB651716619E084DAB9
+RUN gpg -a --export E084DAB9 | apt-key add -
+RUN add-apt-repository 'deb https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/'
+
+# See more in SPARK-39959, roxygen2 < 7.2.1
+RUN Rscript -e "install.packages(c('devtools', 'knitr', 'markdown',  \
+    'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow',  \
+    'ggplot2', 'mvtnorm', 'statmod', 'xml2'), repos='https://cloud.r-project.org/')" && \
+    Rscript -e "devtools::install_version('roxygen2', version='7.2.0', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('lintr', version='2.0.1', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('pkgdown', version='2.0.1', repos='https://cloud.r-project.org')" && \
+    Rscript -e "devtools::install_version('preferably', version='0.4', repos='https://cloud.r-project.org')"
+
+# See more in SPARK-39735
+ENV R_LIBS_SITE "/usr/local/lib/R/site-library:${R_LIBS_SITE}:/usr/lib/R/library"
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index 8e595707494cd..b8702113a26c7 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -324,7 +324,7 @@ def __hash__(self):
     name="connect",
     dependencies=[hive, avro, protobuf],
     source_file_regexes=[
-        "connect",
+        "sql/connect",
         "connector/connect",
     ],
     sbt_test_goals=[
@@ -427,7 +427,7 @@ def __hash__(self):
 pyspark_core = Module(
     name="pyspark-core",
     dependencies=[core],
-    source_file_regexes=["python/(?!pyspark/(ml|mllib|sql|streaming))"],
+    source_file_regexes=["python/(?!pyspark/(ml|mllib|sql|streaming|pandas|resource|testing))"],
     python_test_goals=[
         # doctests
         "pyspark.conf",
@@ -500,6 +500,7 @@ def __hash__(self):
         "pyspark.sql.pandas.typehints",
         "pyspark.sql.pandas.utils",
         "pyspark.sql.observation",
+        "pyspark.sql.tvf",
         # unittests
         "pyspark.sql.tests.test_arrow",
         "pyspark.sql.tests.test_arrow_cogrouped_map",
@@ -520,10 +521,12 @@ def __hash__(self):
         "pyspark.sql.tests.test_errors",
         "pyspark.sql.tests.test_functions",
         "pyspark.sql.tests.test_group",
+        "pyspark.sql.tests.test_sql",
         "pyspark.sql.tests.pandas.test_pandas_cogrouped_map",
         "pyspark.sql.tests.pandas.test_pandas_grouped_map",
         "pyspark.sql.tests.pandas.test_pandas_grouped_map_with_state",
         "pyspark.sql.tests.pandas.test_pandas_map",
+        "pyspark.sql.tests.pandas.test_pandas_transform_with_state",
         "pyspark.sql.tests.test_arrow_map",
         "pyspark.sql.tests.pandas.test_pandas_udf",
         "pyspark.sql.tests.pandas.test_pandas_udf_grouped_agg",
@@ -542,12 +545,17 @@ def __hash__(self):
         "pyspark.sql.tests.streaming.test_streaming_foreach",
         "pyspark.sql.tests.streaming.test_streaming_foreach_batch",
         "pyspark.sql.tests.streaming.test_streaming_listener",
+        "pyspark.sql.tests.test_subquery",
         "pyspark.sql.tests.test_types",
         "pyspark.sql.tests.test_udf",
         "pyspark.sql.tests.test_udf_profiler",
         "pyspark.sql.tests.test_udtf",
+        "pyspark.sql.tests.test_tvf",
         "pyspark.sql.tests.test_utils",
         "pyspark.sql.tests.test_resources",
+        "pyspark.sql.tests.plot.test_frame_plot",
+        "pyspark.sql.tests.plot.test_frame_plot_plotly",
+        "pyspark.sql.tests.test_connect_compatibility",
     ],
 )
 
@@ -1008,6 +1016,7 @@ def __hash__(self):
         "pyspark.sql.connect.protobuf.functions",
         "pyspark.sql.connect.streaming.readwriter",
         "pyspark.sql.connect.streaming.query",
+        "pyspark.sql.connect.tvf",
         # sql unittests
         "pyspark.sql.tests.connect.test_connect_plan",
         "pyspark.sql.tests.connect.test_connect_basic",
@@ -1029,12 +1038,14 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_serde",
         "pyspark.sql.tests.connect.test_parity_functions",
         "pyspark.sql.tests.connect.test_parity_group",
+        "pyspark.sql.tests.connect.test_parity_sql",
         "pyspark.sql.tests.connect.test_parity_dataframe",
         "pyspark.sql.tests.connect.test_parity_collection",
         "pyspark.sql.tests.connect.test_parity_creation",
         "pyspark.sql.tests.connect.test_parity_observation",
         "pyspark.sql.tests.connect.test_parity_repartition",
         "pyspark.sql.tests.connect.test_parity_stat",
+        "pyspark.sql.tests.connect.test_parity_subquery",
         "pyspark.sql.tests.connect.test_parity_types",
         "pyspark.sql.tests.connect.test_parity_column",
         "pyspark.sql.tests.connect.test_parity_readwriter",
@@ -1042,6 +1053,7 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_udf_profiler",
         "pyspark.sql.tests.connect.test_parity_memory_profiler",
         "pyspark.sql.tests.connect.test_parity_udtf",
+        "pyspark.sql.tests.connect.test_parity_tvf",
         "pyspark.sql.tests.connect.test_parity_pandas_udf",
         "pyspark.sql.tests.connect.test_parity_pandas_map",
         "pyspark.sql.tests.connect.test_parity_arrow_map",
@@ -1051,6 +1063,8 @@ def __hash__(self):
         "pyspark.sql.tests.connect.test_parity_arrow_cogrouped_map",
         "pyspark.sql.tests.connect.test_parity_python_datasource",
         "pyspark.sql.tests.connect.test_parity_python_streaming_datasource",
+        "pyspark.sql.tests.connect.test_parity_frame_plot",
+        "pyspark.sql.tests.connect.test_parity_frame_plot_plotly",
         "pyspark.sql.tests.connect.test_utils",
         "pyspark.sql.tests.connect.client.test_artifact",
         "pyspark.sql.tests.connect.client.test_artifact_localcluster",
diff --git a/connector/docker/spark-test/build b/dev/streaming-gen-protos.sh
similarity index 82%
rename from connector/docker/spark-test/build
rename to dev/streaming-gen-protos.sh
index 55dff4754b000..3d80bda4fb94e 100755
--- a/connector/docker/spark-test/build
+++ b/dev/streaming-gen-protos.sh
@@ -16,7 +16,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+set -ex
 
-docker build -t spark-test-base spark-test/base/
-docker build -t spark-test-master spark-test/master/
-docker build -t spark-test-worker spark-test/worker/
+if [[ $# -gt 1 ]]; then
+  echo "Illegal number of parameters."
+  echo "Usage: ./dev/streaming-gen-protos.sh [path]"
+  exit -1
+fi
+
+./dev/gen-protos.sh streaming "$@"
diff --git a/dev/tests/pr_public_classes.sh b/dev/tests/pr_public_classes.sh
deleted file mode 100755
index ad1ad5e736594..0000000000000
--- a/dev/tests/pr_public_classes.sh
+++ /dev/null
@@ -1,73 +0,0 @@
-#!/usr/bin/env bash
-
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-#
-# This script follows the base format for testing pull requests against
-# another branch and returning results to be published. More details can be
-# found at dev/run-tests-jenkins.
-#
-# Arg1: The GitHub Pull Request Actual Commit
-# known as `ghprbActualCommit` in `run-tests-jenkins`
-
-ghprbActualCommit="$1"
-
-# $ghprbActualCommit is an automatic merge commit generated by GitHub; its parents are some Spark
-# master commit and the tip of the pull request branch.
-
-# By diffing$ghprbActualCommit^...$ghprbActualCommit and filtering to examine the diffs of only
-# non-test files, we can get changes introduced in the PR and not anything else added to master
-# since the PR was branched.
-
-# Handle differences between GNU and BSD sed
-if [[ $(uname) == "Darwin" ]]; then
-    SED='sed -E'
-else
-    SED='sed -r'
-fi
-
-source_files=$(
-  git diff $ghprbActualCommit^...$ghprbActualCommit --name-only  `# diff patch against master from branch point` \
-    | grep -v -e "\/test"                               `# ignore files in test directories` \
-    | grep -e "\.py$" -e "\.java$" -e "\.scala$"        `# include only code files` \
-    | tr "\n" " "
-)
-
-new_public_classes=$(
-  git diff $ghprbActualCommit^...$ghprbActualCommit ${source_files}      `# diff patch against master from branch point` \
-    | grep "^\+"                              `# filter in only added lines` \
-    | $SED -e "s/^\+//g"                      `# remove the leading +` \
-    | grep -e "trait " -e "class "            `# filter in lines with these key words` \
-    | grep -e "{" -e "("                      `# filter in lines with these key words, too` \
-    | grep -v -e "\@\@" -e "private"          `# exclude lines with these words` \
-    | grep -v -e "^// " -e "^/\*" -e "^ \* "  `# exclude comment lines` \
-    | $SED -e "s/\{.*//g"                     `# remove from the { onwards` \
-    | $SED -e "s/\}//g"                       `# just in case, remove }; they mess the JSON` \
-    | $SED -e "s/\"/\\\\\"/g"                 `# escape double quotes; they mess the JSON` \
-    | $SED -e "s/^(.*)$/\`\1\`/g"             `# surround with backticks for style` \
-    | $SED -e "s/^/  \* /g"                   `# prepend '  *' to start of line` \
-    | $SED -e "s/$/\\\n/g"                    `# append newline to end of line` \
-    | tr -d "\n"                              `# remove actual LF characters`
-)
-
-if [ -z "$new_public_classes" ]; then
-  echo " * This patch adds no public classes."
-else
-  public_classes_note=" * This patch adds the following public classes _(experimental)_:"
-  echo -e "${public_classes_note}\n${new_public_classes}"
-fi
diff --git a/dev/tox.ini b/dev/tox.ini
index 47b1b4a9d7832..05a6b16a03bd9 100644
--- a/dev/tox.ini
+++ b/dev/tox.ini
@@ -59,5 +59,6 @@ exclude =
     *python/pyspark/worker.pyi,
     *python/pyspark/java_gateway.pyi,
     *python/pyspark/sql/connect/proto/*,
+    *python/pyspark/sql/streaming/proto/*,
     */venv/*
 max-line-length = 100
diff --git a/docs/_config.yml b/docs/_config.yml
index e74eda0470417..089d6bf2097b8 100644
--- a/docs/_config.yml
+++ b/docs/_config.yml
@@ -22,7 +22,7 @@ include:
 SPARK_VERSION: 4.0.0-SNAPSHOT
 SPARK_VERSION_SHORT: 4.0.0
 SCALA_BINARY_VERSION: "2.13"
-SCALA_VERSION: "2.13.14"
+SCALA_VERSION: "2.13.15"
 SPARK_ISSUE_TRACKER_URL: https://issues.apache.org/jira/browse/SPARK
 SPARK_GITHUB_URL: https://github.com/apache/spark
 # Before a new release, we should:
diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml
index 01c8a8076958f..5fc1f3bcf9b5a 100644
--- a/docs/_data/menu-sql.yaml
+++ b/docs/_data/menu-sql.yaml
@@ -108,5 +108,7 @@
           url: sql-ref-syntax.html#data-retrieval-statements
         - text: Auxiliary Statements
           url: sql-ref-syntax.html#auxiliary-statements
+        - text: Pipe Syntax
+          url: sql-pipe-syntax.html
 - text: Error Conditions
   url: sql-error-conditions.html
diff --git a/docs/_data/menu-streaming.yaml b/docs/_data/menu-streaming.yaml
new file mode 100644
index 0000000000000..b1dd024451125
--- /dev/null
+++ b/docs/_data/menu-streaming.yaml
@@ -0,0 +1,57 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+- text: Overview 
+  url: streaming/index.html
+- text: Getting Started
+  url: streaming/getting-started.html
+  subitems:
+    - text: Quick Example
+      url: streaming/getting-started.html#quick-example
+    - text: Programming Model 
+      url: streaming/getting-started.html#programming-model
+- text: APIs on DataFrames and Datasets
+  url: streaming/apis-on-dataframes-and-datasets.html
+  subitems:
+    - text: Creating Streaming DataFrames and Streaming Datasets
+      url: streaming/apis-on-dataframes-and-datasets.html#creating-streaming-dataframes-and-streaming-datasets
+    - text: Operations on Streaming DataFrames/Datasets
+      url: streaming/apis-on-dataframes-and-datasets.html#operations-on-streaming-dataframesdatasets
+    - text: Starting Streaming Queries
+      url: streaming/apis-on-dataframes-and-datasets.html#starting-streaming-queries
+    - text: Managing Streaming Queries
+      url: streaming/apis-on-dataframes-and-datasets.html#managing-streaming-queries
+    - text: Monitoring Streaming Queries
+      url: streaming/apis-on-dataframes-and-datasets.html#monitoring-streaming-queries
+    - text: Recovering from Failures with Checkpointing
+      url: streaming/apis-on-dataframes-and-datasets.html#recovering-from-failures-with-checkpointing
+    - text: Recovery Semantics after Changes in a Streaming Query
+      url: streaming/apis-on-dataframes-and-datasets.html#recovery-semantics-after-changes-in-a-streaming-query
+- text: Performance Tips
+  url: streaming/performance-tips.html
+  subitems:
+    - text: Asynchronous Progress Tracking
+      url: streaming/performance-tips.html#asynchronous-progress-tracking
+    - text: Continuous Processing
+      url: streaming/performance-tips.html#continuous-processing
+- text: Additional Information
+  url: streaming/additional-information.html
+  subitems:
+    - text: Miscellaneous Notes
+      url: streaming/additional-information.html#miscellaneous-notes
+    - text: Related Resources
+      url: streaming/additional-information.html#related-resources
+    - text: Migration Guide
+      url: streaming/additional-information.html#migration-guide
diff --git a/docs/_includes/nav-left-wrapper-streaming.html b/docs/_includes/nav-left-wrapper-streaming.html
new file mode 100644
index 0000000000000..82849f8140f5d
--- /dev/null
+++ b/docs/_includes/nav-left-wrapper-streaming.html
@@ -0,0 +1,22 @@
+{% comment %}
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+{% endcomment %}
+<div class="left-menu-wrapper">
+    <div class="left-menu">
+        <h3><a href="{{ rel_path_to_root }}streaming/index.html">Structured Streaming Programming Guide</a></h3>
+        {% include nav-left.html nav=include.nav-streaming %}
+    </div>
+</div>
diff --git a/docs/_includes/nav-left.html b/docs/_includes/nav-left.html
index 19d68fd191635..935ed0c732ee6 100644
--- a/docs/_includes/nav-left.html
+++ b/docs/_includes/nav-left.html
@@ -2,7 +2,7 @@
 <ul>
 {% for item in include.nav %}
     <li>
-        <a href="{{ item.url }}">
+        <a href="{{ rel_path_to_root }}{{ item.url }}">
             {% if navurl contains item.url %}
                 <b>{{ item.text }}</b>
             {% else %}
diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html
index c61c9349a6d7e..f5a20dd441b0e 100755
--- a/docs/_layouts/global.html
+++ b/docs/_layouts/global.html
@@ -1,3 +1,9 @@
+{% assign current_page_segments = page.dir | split: "/" | where_exp: "element","element != ''" %}
+{% assign rel_path_to_root = "" %}
+{% for i in (1..current_page_segments.size) %}
+  {% assign rel_path_to_root = rel_path_to_root | append: "../" %}
+{% endfor %}
+
 <!DOCTYPE html>
 <html class="no-js">
     <head>
@@ -21,12 +27,12 @@
         <link rel="preconnect" href="https://fonts.googleapis.com">
         <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
         <link href="https://fonts.googleapis.com/css2?family=DM+Sans:ital,wght@0,400;0,500;0,700;1,400;1,500;1,700&Courier+Prime:wght@400;700&display=swap" rel="stylesheet">
-        <link href="css/custom.css" rel="stylesheet">
-        <script src="js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
+        <link href="{{ rel_path_to_root }}css/custom.css" rel="stylesheet">
+        <script src="{{ rel_path_to_root}}js/vendor/modernizr-2.6.1-respond-1.1.0.min.js"></script>
 
-        <link rel="stylesheet" href="css/pygments-default.css">
+        <link rel="stylesheet" href="{{ rel_path_to_root }}css/pygments-default.css">
         <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
-        <link rel="stylesheet" href="css/docsearch.css">
+        <link rel="stylesheet" href="{{ rel_path_to_root }}css/docsearch.css">
 
         {% production %}
         <!-- Matomo -->
@@ -51,8 +57,8 @@
     <body class="global">
         <!-- This code is taken from http://twitter.github.com/bootstrap/examples/hero.html -->
         <nav class="navbar navbar-expand-lg navbar-dark p-0 px-4 fixed-top" style="background: #1d6890;" id="topbar">
-            <div class="navbar-brand"><a href="index.html">
-                <img src="img/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">{{site.SPARK_VERSION_SHORT}}</span>
+            <div class="navbar-brand"><a href="{{ rel_path_to_root }}index.html">
+                <img src="https://spark.apache.org/images/spark-logo-rev.svg" width="141" height="72"/></a><span class="version">{{site.SPARK_VERSION_SHORT}}</span>
             </div>
             <button class="navbar-toggler" type="button" data-toggle="collapse"
                     data-target="#navbarCollapse" aria-controls="navbarCollapse"
@@ -61,58 +67,58 @@
             </button>
             <div class="collapse navbar-collapse" id="navbarCollapse">
                 <ul class="navbar-nav me-auto">
-                    <li class="nav-item"><a href="index.html" class="nav-link">Overview</a></li>
+                    <li class="nav-item"><a href="{{ rel_path_to_root }}index.html" class="nav-link">Overview</a></li>
 
                     <li class="nav-item dropdown">
                         <a href="#" class="nav-link dropdown-toggle" id="navbarQuickStart" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Programming Guides</a>
                         <div class="dropdown-menu" aria-labelledby="navbarQuickStart">
-                            <a class="dropdown-item" href="quick-start.html">Quick Start</a>
-                            <a class="dropdown-item" href="rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
-                            <a class="dropdown-item" href="sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
-                            <a class="dropdown-item" href="structured-streaming-programming-guide.html">Structured Streaming</a>
-                            <a class="dropdown-item" href="streaming-programming-guide.html">Spark Streaming (DStreams)</a>
-                            <a class="dropdown-item" href="ml-guide.html">MLlib (Machine Learning)</a>
-                            <a class="dropdown-item" href="graphx-programming-guide.html">GraphX (Graph Processing)</a>
-                            <a class="dropdown-item" href="sparkr.html">SparkR (R on Spark)</a>
-                            <a class="dropdown-item" href="api/python/getting_started/index.html">PySpark (Python on Spark)</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}quick-start.html">Quick Start</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}rdd-programming-guide.html">RDDs, Accumulators, Broadcasts Vars</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}sql-programming-guide.html">SQL, DataFrames, and Datasets</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}streaming/index.html">Structured Streaming</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}streaming-programming-guide.html">Spark Streaming (DStreams)</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}ml-guide.html">MLlib (Machine Learning)</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}graphx-programming-guide.html">GraphX (Graph Processing)</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}sparkr.html">SparkR (R on Spark)</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/python/getting_started/index.html">PySpark (Python on Spark)</a>
                         </div>
                     </li>
 
                     <li class="nav-item dropdown">
                         <a href="#" class="nav-link dropdown-toggle" id="navbarAPIDocs" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">API Docs</a>
                         <div class="dropdown-menu" aria-labelledby="navbarAPIDocs">
-                            <a class="dropdown-item" href="api/python/index.html">Python</a>
-                            <a class="dropdown-item" href="api/scala/org/apache/spark/index.html">Scala</a>
-                            <a class="dropdown-item" href="api/java/index.html">Java</a>
-                            <a class="dropdown-item" href="api/R/index.html">R</a>
-                            <a class="dropdown-item" href="api/sql/index.html">SQL, Built-in Functions</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/python/index.html">Python</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/scala/org/apache/spark/index.html">Scala</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/java/index.html">Java</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/R/index.html">R</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}api/sql/index.html">SQL, Built-in Functions</a>
                         </div>
                     </li>
 
                     <li class="nav-item dropdown">
                         <a href="#" class="nav-link dropdown-toggle" id="navbarDeploying" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">Deploying</a>
                         <div class="dropdown-menu" aria-labelledby="navbarDeploying">
-                            <a class="dropdown-item" href="cluster-overview.html">Overview</a>
-                            <a class="dropdown-item" href="submitting-applications.html">Submitting Applications</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}cluster-overview.html">Overview</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}submitting-applications.html">Submitting Applications</a>
                             <div class="dropdown-divider"></div>
-                            <a class="dropdown-item" href="spark-standalone.html">Spark Standalone</a>
-                            <a class="dropdown-item" href="running-on-yarn.html">YARN</a>
-                            <a class="dropdown-item" href="running-on-kubernetes.html">Kubernetes</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}spark-standalone.html">Spark Standalone</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}running-on-yarn.html">YARN</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}running-on-kubernetes.html">Kubernetes</a>
                         </div>
                     </li>
 
                     <li class="nav-item dropdown">
                         <a href="#" class="nav-link dropdown-toggle" id="navbarMore" role="button" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">More</a>
                         <div class="dropdown-menu" aria-labelledby="navbarMore">
-                            <a class="dropdown-item" href="configuration.html">Configuration</a>
-                            <a class="dropdown-item" href="monitoring.html">Monitoring</a>
-                            <a class="dropdown-item" href="tuning.html">Tuning Guide</a>
-                            <a class="dropdown-item" href="job-scheduling.html">Job Scheduling</a>
-                            <a class="dropdown-item" href="security.html">Security</a>
-                            <a class="dropdown-item" href="hardware-provisioning.html">Hardware Provisioning</a>
-                            <a class="dropdown-item" href="migration-guide.html">Migration Guide</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}configuration.html">Configuration</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}monitoring.html">Monitoring</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}tuning.html">Tuning Guide</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}job-scheduling.html">Job Scheduling</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}security.html">Security</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}hardware-provisioning.html">Hardware Provisioning</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}migration-guide.html">Migration Guide</a>
                             <div class="dropdown-divider"></div>
-                            <a class="dropdown-item" href="building-spark.html">Building Spark</a>
+                            <a class="dropdown-item" href="{{ rel_path_to_root }}building-spark.html">Building Spark</a>
                             <a class="dropdown-item" href="https://spark.apache.org/contributing.html">Contributing to Spark</a>
                             <a class="dropdown-item" href="https://spark.apache.org/third-party-projects.html">Third Party Projects</a>
                         </div>
@@ -137,11 +143,11 @@ <h1 style="max-width: 680px;">Apache Spark - A Unified engine for large-scale da
                   It provides high-level APIs in Java, Scala, Python and R,
                   and an optimized engine that supports general execution graphs.
                   It also supports a rich set of higher-level tools including
-                  <a href="sql-programming-guide.html">Spark SQL</a> for SQL and structured data processing,
-                  <a href="api/python/getting_started/quickstart_ps.html">pandas API on Spark</a> for pandas workloads,
-                  <a href="ml-guide.html">MLlib</a> for machine learning,
-                  <a href="graphx-programming-guide.html">GraphX</a> for graph processing,
-                   and <a href="structured-streaming-programming-guide.html">Structured Streaming</a>
+                  <a href="{{ rel_path_to_root }}sql-programming-guide.html">Spark SQL</a> for SQL and structured data processing,
+                  <a href="{{ rel_path_to_root }}api/python/getting_started/quickstart_ps.html">pandas API on Spark</a> for pandas workloads,
+                  <a href="{{ rel_path_to_root }}ml-guide.html">MLlib</a> for machine learning,
+                  <a href="{{ rel_path_to_root }}graphx-programming-guide.html">GraphX</a> for graph processing,
+                   and <a href="{{ rel_path_to_root }}streaming/index.html">Structured Streaming</a>
                    for incremental computation and stream processing.
                 </div>
             </div>
@@ -150,12 +156,13 @@ <h1 style="max-width: 680px;">Apache Spark - A Unified engine for large-scale da
         {% endif %}
 
         <div class="container">
-
-            {% if page.url contains "/ml" or page.url contains "/sql" or page.url contains "migration-guide.html" %}
+            {% if page.url contains "/ml" or page.url contains "/sql" or page.url contains "/streaming/" or page.url contains "migration-guide.html" %}
                 {% if page.url contains "migration-guide.html" %}
                     {% include nav-left-wrapper-migration.html nav-migration=site.data.menu-migration %}
                 {% elsif page.url contains "/ml" %}
                     {% include nav-left-wrapper-ml.html nav-mllib=site.data.menu-mllib nav-ml=site.data.menu-ml %}
+                {% elsif page.url contains "/streaming/" %}
+                    {% include nav-left-wrapper-streaming.html nav-streaming=site.data.menu-streaming %}
                 {% else %}
                     {% include nav-left-wrapper-sql.html nav-sql=site.data.menu-sql %}
                 {% endif %}
@@ -191,8 +198,8 @@ <h1 class="title">{{ page.title }}</h1>
         crossorigin="anonymous"></script>
         <script src="https://code.jquery.com/jquery.js"></script>
 
-        <script src="js/vendor/anchor.min.js"></script>
-        <script src="js/main.js"></script>
+        <script src="{{ rel_path_to_root }}js/vendor/anchor.min.js"></script>
+        <script src="{{ rel_path_to_root}}js/main.js"></script>
 
         <script type="text/javascript" src="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.js"></script>
         <script type="text/javascript">
diff --git a/docs/util/build-error-docs.py b/docs/_plugins/build-error-docs.py
similarity index 100%
rename from docs/util/build-error-docs.py
rename to docs/_plugins/build-error-docs.py
diff --git a/docs/_plugins/build_api_docs.rb b/docs/_plugins/build_api_docs.rb
index c8bd9a700f6c1..e2ddcca6cdde5 100644
--- a/docs/_plugins/build_api_docs.rb
+++ b/docs/_plugins/build_api_docs.rb
@@ -34,6 +34,11 @@ def print_header(text)
 end
 
 def build_spark_if_necessary
+  # If spark has already been compiled on the host, skip here.
+  if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1'
+    return
+  end
+
   if $spark_package_is_built
     return
   end
@@ -116,6 +121,16 @@ def copy_and_update_java_docs(source, dest, scala_source)
   File.open(css_file, 'a') { |f| f.write("\n" + css.join()) }
 end
 
+def build_spark_scala_and_java_docs_if_necessary
+  # If spark's docs has already been compiled on the host, skip here.
+  if ENV['SPARK_DOCS_IS_BUILT_ON_HOST'] == '1'
+    return
+  end
+
+  command = "build/sbt -Pkinesis-asl unidoc"
+  puts "Running '#{command}'..."
+  system(command) || raise("Unidoc generation failed")
+end
 
 def build_scala_and_java_docs
   build_spark_if_necessary
@@ -123,9 +138,7 @@ def build_scala_and_java_docs
   print_header "Building Scala and Java API docs."
   cd(SPARK_PROJECT_ROOT)
 
-  command = "build/sbt -Pkinesis-asl unidoc"
-  puts "Running '#{command}'..."
-  system(command) || raise("Unidoc generation failed")
+  build_spark_scala_and_java_docs_if_necessary
 
   puts "Moving back into docs dir."
   cd("docs")
@@ -136,11 +149,11 @@ def build_scala_and_java_docs
   # Copy over the unified ScalaDoc for all projects to api/scala.
   # This directory will be copied over to _site when `jekyll` command is run.
   copy_and_update_scala_docs("../target/scala-2.13/unidoc", "api/scala")
-  copy_and_update_scala_docs("../connector/connect/client/jvm/target/scala-2.13/unidoc", "api/connect/scala")
+  # copy_and_update_scala_docs("../connector/connect/client/jvm/target/scala-2.13/unidoc", "api/connect/scala")
 
   # Copy over the unified JavaDoc for all projects to api/java.
   copy_and_update_java_docs("../target/javaunidoc", "api/java", "api/scala")
-  copy_and_update_java_docs("../connector/connect/client/jvm/target/javaunidoc", "api/connect/java", "api/connect/scala")
+  # copy_and_update_java_docs("../connector/connect/client/jvm/target/javaunidoc", "api/connect/java", "api/connect/scala")
 end
 
 def build_python_docs
@@ -157,6 +170,7 @@ def build_python_docs
   mkdir_p "api/python"
 
   puts "cp -r ../python/docs/build/html/. api/python"
+  rm_r("../python/docs/build/html/_sources")
   cp_r("../python/docs/build/html/.", "api/python")
 end
 
@@ -194,11 +208,18 @@ def build_sql_docs
 
 def build_error_docs
   print_header "Building error docs."
-  system("python '#{SPARK_PROJECT_ROOT}/docs/util/build-error-docs.py'") \
+
+  if !system("which python3 >/dev/null 2>&1")
+    raise("Missing python3 in your path, stopping error doc generation")
+  end
+
+  system("python3 '#{SPARK_PROJECT_ROOT}/docs/_plugins/build-error-docs.py'") \
   || raise("Error doc generation failed")
 end
 
-build_error_docs
+if not (ENV['SKIP_ERRORDOC'] == '1')
+  build_error_docs
+end
 
 if not (ENV['SKIP_API'] == '1')
   if not (ENV['SKIP_SCALADOC'] == '1')
diff --git a/docs/app-dev-spark-connect.md b/docs/app-dev-spark-connect.md
new file mode 100644
index 0000000000000..218edd331aa94
--- /dev/null
+++ b/docs/app-dev-spark-connect.md
@@ -0,0 +1,243 @@
+---
+layout: global
+title: Application Development with Spark Connect
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+**Spark Connect Overview**
+
+In Apache Spark 3.4, Spark Connect introduced a decoupled client-server
+architecture that allows remote connectivity to Spark clusters using the
+DataFrame API and unresolved logical plans as the protocol. The separation
+between client and server allows Spark and its open ecosystem to be
+leveraged from everywhere. It can be embedded in modern data applications,
+in IDEs, Notebooks and programming languages.
+
+To learn more about Spark Connect, see [Spark Connect Overview](spark-connect-overview.html).
+
+# Redefining Spark Applications using Spark Connect
+
+With its decoupled client-server architecture, Spark Connect simplifies how Spark Applications are
+developed.
+The notion of Spark Client Applications and Spark Server Libraries are introduced as follows: 
+* _Spark Client Applications_ are regular Spark applications that use Spark and its rich ecosystem for
+distributed data processing. Examples include ETL pipelines, data preparation, and model training
+and inference.
+* _Spark Server Libraries_ build on, extend, and complement Spark's functionality, e.g.
+[MLlib](ml-guide.html) (distributed ML libraries that use Spark's powerful distributed processing). Spark Connect
+can be extended to expose client-side interfaces for Spark Server Libraries.
+
+With Spark 3.4 and Spark Connect, the development of Spark Client Applications is simplified, and
+clear extension points and guidelines are provided on how to build Spark Server Libraries, making
+it easy for both types of applications to evolve alongside Spark. As illustrated in Fig.1, Spark
+Client applications connect to Spark using the Spark Connect API, which is essentially the
+DataFrame API and fully declarative.
+
+<p style="text-align: center;">
+  <img src="img/extending-spark-connect.png" title="Figure 1: Architecture" alt="Extending Spark
+Connect Diagram"/>
+</p>
+Spark Server Libraries extend Spark. They typically provide additional server-side logic integrated
+with Spark, which is exposed to client applications as part of the Spark Connect API, using Spark
+Connect extension points. For example, the _Spark Server Library_ consists of custom
+service-side logic (as indicated by the blue box labeled _Custom Library Plugin_), which is exposed
+to the client via the blue box as part of the Spark Connect API. The client uses this API, e.g.,
+alongside PySpark or the Spark Scala client, making it easy for Spark client applications to work
+with the custom logic/library. 
+
+## Spark Client Applications
+
+Spark Client Applications are the _regular Spark applications_ that Spark users develop today, e.g.,
+ETL pipelines, data preparation, or model training or inference. These are typically built using
+Sparks declarative DataFrame and DataSet APIs. With Spark Connect, the core behaviour remains the
+same, but there are a few differences: 
+* Lower-level, non-declarative APIs (RDDs) can no longer be directly used from Spark Client
+applications. Alternatives for missing RDD functionality are provided as part of the higher-level
+DataFrame API.
+* Client applications no longer have direct access to the Spark driver JVM; they are fully
+separated from the server. 
+
+Client applications based on Spark Connect can be submitted in the same way as any previous job.
+In addition, Spark Client Applications based on Spark Connect have several benefits compared to
+classic Spark applications using earlier Spark versions (3.4 and below): 
+* _Upgradability_: Upgrading to new Spark Server versions is seamless, as the Spark Connect API
+abstracts any changes/improvements on the server side. Client- and server APIs are cleanly
+separated.
+* _Simplicity_: The number of APIs exposed to the user is reduced from 3 to 2. The Spark Connect API
+is fully declarative and consequently easy to learn for new users familiar with SQL.
+* _Stability_: When using Spark Connect, the client applications no longer run on the Spark driver
+and, therefore don’t cause and are not affected by any instability on the server.
+* _Remote connectivity_: The decoupled architecture allows remote connectivity to Spark beyond SQL
+and JDBC: any application can now interactively use Spark “as a service”.
+* _Backwards compatibility_: The Spark Connect API is code-compatible with earlier Spark versions,
+except for the usage of RDDs, for which a list of alternative APIs is provided in Spark Connect. 
+
+## Spark Server Libraries
+
+Until Spark 3.4, extensions to Spark (e.g., [Spark ML](ml-guide#:~:text=What%20is%20%E2%80%9CSpark%20ML%E2%80%9D%3F,to%20emphasize%20the%20pipeline%20concept.)
+or [Spark-NLP](https://github.com/JohnSnowLabs/spark-nlp)) were built and deployed like Spark
+Client Applications. With Spark 3.4 and Spark Connect,  explicit extension points are offered to
+extend Spark via Spark Server Libraries. These extension points provide functionality that can be
+exposed to a client, which differs from existing extension points in Spark such as
+[SparkSession extensions](api/java/org/apache/spark/sql/SparkSessionExtensions.html) or
+[Spark Plugins](api/java/org/apache/spark/api/plugin/SparkPlugin.html). 
+
+### Getting Started: Extending Spark with Spark Server Libraries
+
+Spark Connect is available and supports PySpark and Scala
+applications. We will walk through how to run an Apache Spark server with Spark
+Connect and connect to it from a client application using the Spark Connect client
+library.
+
+A Spark Server Library consists of the following components, illustrated in Fig. 2:
+
+1. The Spark Connect protocol extension (blue box _Proto_ API)
+2. A Spark Connect Plugin.
+3. The application logic that extends Spark.
+4. The client package that exposes the Spark Server Library application logic to the Spark Client
+Application, alongside PySpark or the Scala Spark Client. 
+<p style="text-align: center;">
+  <img src="img/extending-spark-connect-labelled.png" title="Figure 2: Labelled Architecture" alt="Extending Spark
+Connect Diagram - Labelled Steps"/>
+</p> 
+
+#### (1) Spark Connect Protocol Extension
+
+To extend Spark with a new Spark Server Library, developers can extend the three main operation
+types in the Spark Connect protocol: _Relation_, _Expression_, and _Command_. 
+
+{% highlight protobuf %}
+message Relation {
+  oneof rel_type {
+    Read read = 1;
+    // ...
+    google.protobuf.Any extension = 998;
+  } 
+}
+
+message Expression {
+  oneof expr_type {
+    Literal literal = 1;
+    // ...
+    google.protobuf.Any extension = 999;
+  } 
+}
+
+message Command {
+  oneof command_type {
+    WriteCommand write_command = 1;
+    // ...
+    google.protobuf.Any extension = 999;
+  } 
+} 
+{% endhighlight %}
+Their extension fields allow serializing arbitrary protobuf messages as part of the Spark Connect
+protocol. These messages represent the parameters or state of the extension implementation.
+To build a custom expression type, the developer first defines the custom protobuf definition
+of the expression.
+
+{% highlight protobuf %}
+message ExamplePluginExpression {
+  Expression child = 1;
+  string custom_field = 2;
+}
+{% endhighlight %}
+
+#### (2) Spark Connect Plugin implementation with (3) custom application logic
+
+As a next step, the developer implements the _ExpressionPlugin_ class of Spark Connect with custom
+application logic based on the input parameters of the protobuf message. 
+{% highlight protobuf %}
+class ExampleExpressionPlugin extends ExpressionPlugin {
+  override def transform(
+      relation: protobuf.Any,
+      planner: SparkConnectPlanner): Option[Expression] = {
+    // Check if the serialized value of protobuf.Any matches the type
+    // of our example expression.
+    if (!relation.is(classOf[proto.ExamplePluginExpression])) {
+      return None
+    }
+    val exp = relation.unpack(classOf[proto.ExamplePluginExpression])
+    Some(Alias(planner.transformExpression(
+        exp.getChild), exp.getCustomField)(explicitMetadata = None))
+  }
+}
+{% endhighlight %} 
+
+Once the application logic is developed, the code must be packaged as a jar and Spark must be
+configured to pick up the additional logic. The relevant Spark configuration options are: 
+* _spark.jars_ which define the location of the Jar file containing the application logic built for
+the custom expression. 
+* _spark.connect.extensions.expression.classes_ specifying the full class name
+of each expression extension loaded by Spark. Based on these configuration options, Spark will
+load the values at startup and make them available for processing.
+
+#### (4) Spark Server Library Client Package
+
+Once the server component is deployed, any client can use it with the right protobuf messages.
+In the example above, the following message payload sent to the Spark Connect endpoint would be
+enough to trigger the extension mechanism.
+{% highlight json %}
+{
+  "project": {
+    "input": {
+      "sql": {
+        "query": "select * from samples.nyctaxi.trips"
+      }
+    },
+    "expressions": [
+      {
+        "extension": {
+          "typeUrl": "type.googleapis.com/spark.connect.ExamplePluginExpression",
+          "value": "\n\006\022\004\n\002id\022\006testval"
+        }
+      }
+    ]
+  }
+} 
+{% endhighlight %} 
+To make the example available in Python, the application developer provides a Python library that
+wraps the new expression and embeds it into PySpark. The easiest way to provide a function for any
+expression is to take a PySpark column instance as an argument and return a new Column instance
+with the expression applied. 
+
+{% highlight python %}
+from pyspark.sql.connect.column import Expression
+import pyspark.sql.connect.proto as proto
+
+from myxample.proto import ExamplePluginExpression
+
+# Internal class that satisfies the interface by the Python client
+# of Spark Connect to generate the protobuf representation from
+# an instance of the expression.
+class ExampleExpression(Expression):
+    def to_plan(self, session) -> proto.Expression:
+        fun = proto.Expression()
+        plugin = ExamplePluginExpression()
+        plugin.child.literal.long = 10
+        plugin.custom_field = "example"
+        fun.extension.Pack(plugin)
+        return fun
+
+# Defining the function to be used from the consumers.
+def example_expression(col: Column) -> Column:
+    return Column(ExampleExpression())
+
+
+# Using the expression in the Spark Connect client code.
+df = spark.read.table("samples.nyctaxi.trips")
+df.select(example_expression(df["fare_amount"])).collect()
+{% endhighlight %} 
\ No newline at end of file
diff --git a/docs/building-spark.md b/docs/building-spark.md
index 54b8e149b77f2..518d936c3c85c 100644
--- a/docs/building-spark.md
+++ b/docs/building-spark.md
@@ -27,7 +27,7 @@ license: |
 ## Apache Maven
 
 The Maven-based build is the build of reference for Apache Spark.
-Building Spark using Maven requires Maven 3.9.8 and Java 17/21.
+Building Spark using Maven requires Maven 3.9.9 and Java 17/21.
 Spark requires Scala 2.13; support for Scala 2.12 was removed in Spark 4.0.0.
 
 ### Setting up Maven's Memory Usage
@@ -72,14 +72,11 @@ This will build Spark distribution along with Python pip and R packages. For mor
 
 ## Specifying the Hadoop Version and Enabling YARN
 
-You can specify the exact version of Hadoop to compile against through the `hadoop.version` property.
-
-You can enable the `yarn` profile and optionally set the `yarn.version` property if it is different
-from `hadoop.version`.
+You can enable the `yarn` profile and specify the exact version of Hadoop to compile against through the `hadoop.version` property.
 
 Example:
 
-    ./build/mvn -Pyarn -Dhadoop.version=3.4.0 -DskipTests clean package
+    ./build/mvn -Pyarn -Dhadoop.version=3.4.1 -DskipTests clean package
 
 ## Building With Hive and JDBC Support
 
@@ -242,7 +239,7 @@ The run-tests script also can be limited to a specific Python version or a speci
 
     ./python/run-tests --python-executables=python --modules=pyspark-sql
 
-## Running R Tests
+## Running R Tests (deprecated)
 
 To run the SparkR tests you will need to install the [knitr](https://cran.r-project.org/package=knitr), [rmarkdown](https://cran.r-project.org/package=rmarkdown), [testthat](https://cran.r-project.org/package=testthat), [e1071](https://cran.r-project.org/package=e1071) and [survival](https://cran.r-project.org/package=survival) packages first:
 
@@ -283,38 +280,6 @@ Enable the profile (e.g. 2.13):
     ./build/sbt -Pscala-2.13 compile
 -->
 
-## Running Jenkins tests with GitHub Enterprise
-
-While the Spark project does not maintain its own Jenkins infrastructure, [community members like Scaleway][scaleway] do.
-
-[scaleway]: https://spark.apache.org/developer-tools.html#scaleway
-
-To run tests with Jenkins:
-
-    ./dev/run-tests-jenkins
-
-If you use an individual repository or a repository on GitHub Enterprise, export the environment variables below before running the above command.
-
-### Related environment variables
-
-<table>
-<thead><tr><th>Variable Name</th><th>Default</th><th>Meaning</th></tr></thead>
-<tr>
-  <td><code>SPARK_PROJECT_URL</code></td>
-  <td>https://github.com/apache/spark</td>
-  <td>
-    The Spark project URL of GitHub Enterprise.
-  </td>
-</tr>
-<tr>
-  <td><code>GITHUB_API_BASE</code></td>
-  <td>https://api.github.com/repos/apache/spark</td>
-  <td>
-    The Spark project API server URL of GitHub Enterprise.
-  </td>
-</tr>
-</table>
-
 # Building and testing on an IPv6-only environment
 
 Use Apache Spark GitBox URL because GitHub doesn't support IPv6 yet.
diff --git a/docs/configuration.md b/docs/configuration.md
index 6833d4e54fd03..e095ae7a61b22 100644
--- a/docs/configuration.md
+++ b/docs/configuration.md
@@ -794,7 +794,7 @@ Apart from these, the following properties are also available, and may be useful
 </tr>
 <tr>
   <td><code>spark.redaction.regex</code></td>
-  <td>(?i)secret|password|token|access[.]key</td>
+  <td>(?i)secret|password|token|access[.]?key</td>
   <td>
     Regex to decide which Spark configuration properties and environment variables in driver and
     executor environments contain sensitive information. When this regex matches a property key or
@@ -1029,6 +1029,16 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>1.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.file.merge.buffer</code></td>
+  <td>32k</td>
+  <td>
+    Size of the in-memory buffer for each shuffle file input stream, in KiB unless otherwise
+    specified. These buffers use off-heap buffers and are related to the number of files in
+    the shuffle file. Too large buffers should be avoided.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.unsafe.file.output.buffer</code></td>
   <td>32k</td>
@@ -1222,6 +1232,19 @@ Apart from these, the following properties are also available, and may be useful
   </td>
   <td>2.2.1</td>
 </tr>
+<tr>
+  <td><code>spark.shuffle.accurateBlockSkewedFactor</code></td>
+  <td>-1.0</td>
+  <td>
+    A shuffle block is considered as skewed and will be accurately recorded in
+    <code>HighlyCompressedMapStatus</code> if its size is larger than this factor multiplying
+    the median shuffle block size or <code>spark.shuffle.accurateBlockThreshold</code>. It is
+    recommended to set this parameter to be the same as
+    <code>spark.sql.adaptive.skewJoin.skewedPartitionFactor</code>. Set to -1.0 to disable this
+    feature by default.
+  </td>
+  <td>3.3.0</td>
+</tr>
 <tr>
   <td><code>spark.shuffle.registration.timeout</code></td>
   <td>5000</td>
@@ -1322,7 +1345,7 @@ Apart from these, the following properties are also available, and may be useful
   <td><code>spark.shuffle.checksum.algorithm</code></td>
   <td>ADLER32</td>
   <td>
-    The algorithm is used to calculate the shuffle checksum. Currently, it only supports built-in algorithms of JDK, e.g., ADLER32, CRC32.
+    The algorithm is used to calculate the shuffle checksum. Currently, it only supports built-in algorithms of JDK, e.g., ADLER32, CRC32 and CRC32C.
   </td>
   <td>3.2.0</td>
 </tr>
@@ -1666,7 +1689,7 @@ Apart from these, the following properties are also available, and may be useful
     which will be also effective when accessing the application on history server. The new log urls must be
     permanent, otherwise you might have dead link for executor log urls.
     <p/>
-    For now, only YARN mode supports this configuration
+    For now, only YARN and K8s cluster manager supports this configuration
   </td>
   <td>3.0.0</td>
 </tr>
@@ -1733,6 +1756,10 @@ Apart from these, the following properties are also available, and may be useful
     <br /><code>spark.ui.filters=com.test.filter1</code>
     <br /><code>spark.com.test.filter1.param.name1=foo</code>
     <br /><code>spark.com.test.filter1.param.name2=bar</code>
+    <br />
+    <br />Note that some filter requires additional dependencies. For example,
+    the built-in <code>org.apache.spark.ui.JWSFilter</code> requires
+    <code>jjwt-impl</code> and <code>jjwt-jackson</code> jar files.
   </td>
   <td>1.0.0</td>
 </tr>
@@ -2825,9 +2852,36 @@ Apart from these, the following properties are also available, and may be useful
     If set to "true", prevent Spark from scheduling tasks on executors that have been excluded
     due to too many task failures. The algorithm used to exclude executors and nodes can be further
     controlled by the other "spark.excludeOnFailure" configuration options.
+    This config will be overriden by "spark.excludeOnFailure.application.enabled" and 
+    "spark.excludeOnFailure.taskAndStage.enabled" to specify exclusion enablement on individual
+    levels.
   </td>
   <td>2.1.0</td>
 </tr>
+<tr>
+  <td><code>spark.excludeOnFailure.application.enabled</code></td>
+  <td>
+    false
+  </td>
+  <td>
+    If set to "true", enables excluding executors for the entire application due to too many task
+    failures and prevent Spark from scheduling tasks on them.
+    This config overrides "spark.excludeOnFailure.enabled". 
+  </td>
+  <td>4.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.excludeOnFailure.taskAndStage.enabled</code></td>
+  <td>
+    false
+  </td>
+  <td>
+    If set to "true", enables excluding executors on a task set level due to too many task
+    failures and prevent Spark from scheduling tasks on them.
+    This config overrides "spark.excludeOnFailure.enabled". 
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.excludeOnFailure.timeout</code></td>
   <td>1h</td>
@@ -3562,7 +3616,7 @@ External users can query the static sql config values via `SparkSession.conf` or
 </tr>
 </table>
 
-### SparkR
+### SparkR (deprecated)
 
 <table class="spark-config">
 <thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
@@ -3707,21 +3761,22 @@ Starting from version 4.0.0, `spark-submit` has adopted the [JSON Template Layou
 
 To configure the layout of structured logging, start with the `log4j2.properties.template` file.
 
-To query Spark logs using Spark SQL, you can use the following Python code snippet:
+To query Spark logs using Spark SQL, you can use the following code snippets:
 
+**Python:**
 ```python
-from pyspark.util import LogUtils
+from pyspark.logger import SPARK_LOG_SCHEMA
 
-logDf = spark.read.schema(LogUtils.LOG_SCHEMA).json("path/to/logs")
+logDf = spark.read.schema(SPARK_LOG_SCHEMA).json("path/to/logs")
 ```
 
-Or using the following Scala code snippet:
+**Scala:**
 ```scala
-import org.apache.spark.util.LogUtils.LOG_SCHEMA
+import org.apache.spark.util.LogUtils.SPARK_LOG_SCHEMA
 
-val logDf = spark.read.schema(LOG_SCHEMA).json("path/to/logs")
+val logDf = spark.read.schema(SPARK_LOG_SCHEMA).json("path/to/logs")
 ```
-
+**Note**: If you're using the interactive shell (pyspark shell or spark-shell), you can omit the import statement in the code because SPARK_LOG_SCHEMA is already available in the shell's context.
 ## Plain Text Logging
 If you prefer plain text logging, you have two options:
 - Disable structured JSON logging by setting the Spark configuration `spark.log.structuredLogging.enabled` to `false`.
diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md
index aeb5118175ff0..88bad6c5d1b9f 100644
--- a/docs/core-migration-guide.md
+++ b/docs/core-migration-guide.md
@@ -45,7 +45,7 @@ license: |
 - Since Spark 4.0, Spark uses the external shuffle service for deleting shuffle blocks for deallocated executors when the shuffle is no longer needed. To restore the legacy behavior, you can set `spark.shuffle.service.removeShuffle` to `false`.
 
 - Starting with Spark 4.0, the default logging format for `spark-submit` has changed from plain text to JSON lines to improve log analysis. If you prefer plain text logs, you have two options:
-  - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. 
+  - Set the Spark configuration `spark.log.structuredLogging.enabled` to `false`. For example, you can use `JDK_JAVA_OPTIONS=-Dspark.log.structuredLogging.enabled=false`.
   - Use a custom log4j configuration file, such as renaming the template file `conf/log4j2.properties.pattern-layout-template` to `conf/log4j2.properties`.
 
 - Since Spark 4.0, the MDC (Mapped Diagnostic Context) key for Spark task names in Spark logs has been changed from `mdc.taskName` to `task_name`. To use the key `mdc.taskName`, you can set `spark.log.legacyTaskNameMdc.enabled` to `true`.
diff --git a/docs/css/custom.css b/docs/css/custom.css
index 9edb466606555..bcee4a54a0cb5 100644
--- a/docs/css/custom.css
+++ b/docs/css/custom.css
@@ -1,3 +1,7 @@
+:root {
+  --navbar-height: 80px;
+}
+
 body {
   color: #666666;
   font-family: "DM Sans", sans-serif;
@@ -5,8 +9,8 @@ body {
   font-weight: 400;
   overflow-wrap: anywhere;
   overflow-x: auto;
-  padding-top: 80px;
   padding-bottom: 20px;
+  padding-top: var(--navbar-height);
 }
 
 a {
@@ -25,6 +29,17 @@ a:focus {
   border-radius: 0;
   z-index: 9999;
   transition: none !important;
+  height: var(--navbar-height);
+}
+
+/*
+Any element with an id attribute can be scrolled to via the URL hash fragment.
+But since the navbar is fixed at the top, elements will, by default, get hidden
+by the navbar. To prevent this, we make sure that there's a margin above these
+links.
+*/
+*[id] {
+  scroll-margin-top: var(--navbar-height);
 }
 
 .navbar .nav-item:hover .dropdown-menu,
diff --git a/docs/img/extending-spark-connect-labelled.png b/docs/img/extending-spark-connect-labelled.png
new file mode 100644
index 0000000000000..94b8cfdc024cb
Binary files /dev/null and b/docs/img/extending-spark-connect-labelled.png differ
diff --git a/docs/img/extending-spark-connect.png b/docs/img/extending-spark-connect.png
new file mode 100644
index 0000000000000..381d99bdda865
Binary files /dev/null and b/docs/img/extending-spark-connect.png differ
diff --git a/docs/img/spark-logo-hd.png b/docs/img/spark-logo-hd.png
deleted file mode 100644
index 464eda547ba91..0000000000000
Binary files a/docs/img/spark-logo-hd.png and /dev/null differ
diff --git a/docs/img/spark-logo-rev.svg b/docs/img/spark-logo-rev.svg
deleted file mode 100644
index fc4f6790218d2..0000000000000
--- a/docs/img/spark-logo-rev.svg
+++ /dev/null
@@ -1,7 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" width="68" height="36" viewBox="0 0 68 36">
-    <g fill="none" fill-rule="evenodd">
-        <path fill="#E25A1C" d="M62.061 17.243c-.058.123-.085.186-.117.245-.85 1.594-1.698 3.19-2.555 4.78-.087.159-.076.254.042.39 1.352 1.558 2.697 3.122 4.044 4.685.047.054.09.113.108.21-.394-.101-.788-.201-1.181-.304-1.634-.427-3.268-.853-4.9-1.285-.152-.04-.221.003-.297.128-.927 1.528-1.86 3.053-2.792 4.578-.048.08-.1.157-.202.223-.075-.407-.152-.814-.225-1.221l-.777-4.314c-.028-.156-.067-.31-.08-.467-.014-.148-.09-.203-.227-.245-1.925-.596-3.848-1.198-5.772-1.8-.084-.026-.167-.06-.257-.141l4.744-1.86c-.058-.045-.096-.08-.138-.108-.984-.628-1.97-1.253-2.95-1.884-.118-.075-.211-.086-.343-.027-1.175.521-2.355 1.033-3.531 1.552-.529.233-1.004.542-1.374.988-.837 1.01-.672 2.158.443 2.86.365.23.78.401 1.192.534 1.886.606 3.779 1.19 5.67 1.777.159.049.233.119.262.288.252 1.44.514 2.877.776 4.315.141.768.216 1.557.595 2.26.146.272.32.537.528.764.75.822 1.799.854 2.593.07.268-.264.498-.576.698-.895.874-1.407 1.735-2.823 2.59-4.24.102-.169.193-.202.38-.153 2.11.558 4.222 1.107 6.333 1.657.436.114.876.155 1.323.073.974-.18 1.4-.91 1.07-1.838-.15-.422-.409-.78-.7-1.116-1.473-1.71-2.944-3.42-4.422-5.125-.121-.14-.124-.24-.04-.396.882-1.64 1.756-3.283 2.634-4.925.21-.393.37-.8.375-1.252.01-1.027-.75-1.867-1.785-2.016-.58-.084-1.118.038-1.66.203-1.323.404-2.648.8-3.974 1.193-.123.037-.17.084-.191.215-.153.92-.319 1.838-.48 2.757-.004.025.003.052.007.109l4.563-1.242" transform="matrix(1 0 0 -1 0 35.974)"/>
-        <path fill="#FFF" d="M59.483 3.841c-1.193.002-2.386.008-3.58.003-.157 0-.246.045-.334.177-1.412 2.122-2.83 4.239-4.248 6.357-.045.068-.093.133-.174.246l-.902-6.767h-3.124c.037.3.069.59.107.88.305 2.296.611 4.594.918 6.89.292 2.195.583 4.39.88 6.585.009.065.053.148.107.183 1.075.69 2.154 1.376 3.232 2.062.016.01.038.011.094.027l-.974-7.324.038-.026 5.113 5.59.136-.772c.121-.698.237-1.396.367-2.092.026-.14-.011-.228-.106-.325-1.094-1.13-2.185-2.263-3.276-3.396l-.147-.159c.035-.055.064-.108.1-.157l5.647-7.814c.034-.048.083-.085.126-.127v-.04M22.455 10.614c-.048.24-.082.593-.19.922-.523 1.592-2.18 2.465-3.894 2.071-1.88-.431-3.224-1.89-3.418-3.782-.144-1.4.62-2.749 2.041-3.255 1.145-.408 2.247-.237 3.27.368 1.358.803 2.093 1.995 2.19 3.676zm-8.258-5.917c-.093-.685-.182-1.33-.268-1.977-.114-.858-.228-1.717-.338-2.576-.013-.1-.044-.145-.153-.144-.858.003-1.716.003-2.574.004-.02 0-.039.01-.085.023.051.406.102.812.156 1.218.19 1.425.378 2.849.57 4.273.22 1.628.394 3.264.675 4.882.499 2.864 2.962 5.333 5.854 5.948 1.676.356 3.28.191 4.743-.74 1.46-.927 2.297-2.273 2.488-3.967.27-2.394-.624-4.382-2.348-6.026-1.132-1.08-2.487-1.766-4.05-1.998-1.61-.239-3.124.026-4.482.962-.05.034-.103.064-.188.118M12.798 18.308l-2.821-2.071c-.15.233-.285.466-.442.683-.404.557-.906.973-1.637 1.024-.607.042-1.126-.156-1.54-.597-.37-.395-.418-.957-.072-1.403.38-.49.796-.956 1.221-1.411.705-.753 1.442-1.478 2.142-2.236.637-.689 1.144-1.462 1.301-2.404.187-1.12-.04-2.187-.577-3.171-.994-1.822-2.56-2.88-4.626-3.21-.913-.146-1.823-.117-2.714.142C1.851 3.997 1.03 4.76.5 5.84.313 6.22.17 6.624 0 7.033l3.06 1.615c.035-.084.06-.154.093-.22.174-.343.317-.707.532-1.023.639-.937 1.67-1.222 2.718-.76.268.12.527.288.748.48.675.591.801 1.413.302 2.156-.287.427-.646.81-.995 1.193-.835.914-1.704 1.8-2.52 2.73-.561.642-.943 1.39-1.064 2.25-.132.938.058 1.823.553 2.613 1.23 1.963 3.02 2.985 5.385 2.9 1.348-.048 2.422-.67 3.256-1.703.246-.306.478-.624.73-.956M35.648 7.767c-.156-1.183-.304-2.307-.458-3.43-.008-.057-.053-.134-.102-.157-2.329-1.063-5.395-.915-7.307 1.227-1.028 1.15-1.459 2.516-1.394 4.036.149 3.517 3.105 6.587 6.641 7.016 2.066.251 3.878-.3 5.277-1.882.953-1.078 1.394-2.366 1.329-3.787-.043-.938-.192-1.873-.31-2.807-.168-1.323-.352-2.645-.529-3.967-.006-.047-.016-.094-.026-.153h-2.786c.037.304.071.601.11.898.202 1.527.424 3.052.6 4.582.11.952.041 1.898-.4 2.78-.47.939-1.259 1.435-2.295 1.543-2.145.224-4.186-1.246-4.643-3.328-.302-1.374.174-2.691 1.284-3.42 1.081-.71 2.24-.712 3.418-.251.598.233 1.107.602 1.591 1.1M47.378 16.192l-.38-2.848c-.59 0-1.167.003-1.744-.002-.468-.003-.895-.301-1.036-.732-.055-.168-.075-.349-.099-.526-.293-2.187-.583-4.374-.874-6.562-.074-.551-.145-1.103-.219-1.663h-2.899c.054.42.105.828.16 1.234.188 1.418.376 2.835.567 4.252.165 1.226.312 2.455.507 3.677.26 1.638 1.912 3.093 3.588 3.164.8.035 1.602.006 2.429.006" transform="matrix(1 0 0 -1 0 35.974)"/>
-        <path fill="#FFF" fill-rule="nonzero" d="M62.9 3.859v1.207h-.006l-.48-1.207h-.154l-.48 1.207h-.007V3.86h-.242v1.446h.373l.438-1.099.43 1.099h.37V3.859h-.241zm-2.127 1.253V3.859h-.242v1.253h-.46v.193h1.16v-.193h-.458M16.682 20.339h.72l-.17 1.073-.55-1.073zm.832-.694h-1.196l-.38-.734h-.847l1.868 3.443h.817l.636-3.443h-.785l-.113.734M21.793 21.66h-.426l-.143-.794h.425c.257 0 .463.166.463.48 0 .208-.13.314-.319.314zm-1.032.694h1.12c.585 0 .995-.345.995-.937 0-.744-.534-1.245-1.293-1.245H21.1l-.226-1.26h-.728l.615 3.442M25.352 20.339h.719l-.17 1.073-.55-1.073zm.831-.694h-1.195l-.38-.734h-.847l1.868 3.443h.816l.637-3.443h-.786l-.113.734M30.912 19.033c-.246-.111-.518-.178-.79-.178-.924 0-1.505.684-1.505 1.54 0 1.093.935 2.015 2.044 2.015.277 0 .528-.066.733-.177l-.102-.826c-.154.167-.4.284-.709.284-.636 0-1.2-.568-1.2-1.225 0-.502.318-.892.82-.892.309 0 .606.117.806.279l-.097-.82M35.366 20.298L33.847 20.298 33.6 18.911 32.872 18.911 33.487 22.354 34.216 22.354 33.97 20.992 35.489 20.992 35.735 22.354 36.464 22.354 35.849 18.911 35.12 18.911 35.366 20.298M38.039 18.911L38.655 22.354 40.59 22.354 40.467 21.66 39.26 21.66 39.138 20.992 40.246 20.992 40.123 20.298 39.014 20.298 38.891 19.605 40.097 19.605 39.974 18.911 38.039 18.911" transform="matrix(1 0 0 -1 0 35.974)"/>
-    </g>
-</svg>
diff --git a/docs/img/spark-logo-reverse.png b/docs/img/spark-logo-reverse.png
deleted file mode 100644
index a3e4ed4bb3d08..0000000000000
Binary files a/docs/img/spark-logo-reverse.png and /dev/null differ
diff --git a/docs/index.md b/docs/index.md
index ddce4f65ba5cb..fea62865e2160 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -34,9 +34,8 @@ source, visit [Building Spark](building-spark.html).
 
 Spark runs on both Windows and UNIX-like systems (e.g. Linux, Mac OS), and it should run on any platform that runs a supported version of Java. This should include JVMs on x86_64 and ARM64. It's easy to run locally on one machine --- all you need is to have `java` installed on your system `PATH`, or the `JAVA_HOME` environment variable pointing to a Java installation.
 
-Spark runs on Java 17/21, Scala 2.13, Python 3.8+, and R 3.5+.
-When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for.
-For example, when using Scala 2.13, use Spark compiled for 2.13, and compile code/applications for Scala 2.13 as well.
+Spark runs on Java 17/21, Scala 2.13, Python 3.9+, and R 3.5+ (Deprecated).
+When using the Scala API, it is necessary for applications to use the same version of Scala that Spark was compiled for. Since Spark 4.0.0, it's Scala 2.13.
 
 # Running the Examples and Shell
 
@@ -106,11 +105,11 @@ options for deployment:
 * [Quick Start](quick-start.html): a quick introduction to the Spark API; start here!
 * [RDD Programming Guide](rdd-programming-guide.html): overview of Spark basics - RDDs (core but old API), accumulators, and broadcast variables
 * [Spark SQL, Datasets, and DataFrames](sql-programming-guide.html): processing structured data with relational queries (newer API than RDDs)
-* [Structured Streaming](structured-streaming-programming-guide.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
+* [Structured Streaming](./streaming/index.html): processing structured data streams with relation queries (using Datasets and DataFrames, newer API than DStreams)
 * [Spark Streaming](streaming-programming-guide.html): processing data streams using DStreams (old API)
 * [MLlib](ml-guide.html): applying machine learning algorithms
 * [GraphX](graphx-programming-guide.html): processing graphs
-* [SparkR](sparkr.html): processing data with Spark in R
+* [SparkR (Deprecated)](sparkr.html): processing data with Spark in R
 * [PySpark](api/python/getting_started/index.html): processing data with Spark in Python
 * [Spark SQL CLI](sql-distributed-sql-engine-spark-sql-cli.html): processing data with SQL on the command line
 
@@ -128,10 +127,13 @@ options for deployment:
 * [Cluster Overview](cluster-overview.html): overview of concepts and components when running on a cluster
 * [Submitting Applications](submitting-applications.html): packaging and deploying applications
 * Deployment modes:
-  * [Amazon EC2](https://github.com/amplab/spark-ec2): scripts that let you launch a cluster on EC2 in about 5 minutes
   * [Standalone Deploy Mode](spark-standalone.html): launch a standalone cluster quickly without a third-party cluster manager
   * [YARN](running-on-yarn.html): deploy Spark on top of Hadoop NextGen (YARN)
-  * [Kubernetes](running-on-kubernetes.html): deploy Spark on top of Kubernetes
+  * [Kubernetes](running-on-kubernetes.html): deploy Spark apps on top of Kubernetes directly
+  * [Amazon EC2](https://github.com/amplab/spark-ec2): scripts that let you launch a cluster on EC2 in about 5 minutes
+* [Spark Kubernetes Operator](https://github.com/apache/spark-kubernetes-operator):
+  * [SparkApp](https://github.com/apache/spark-kubernetes-operator/blob/main/examples/pyspark-pi.yaml): deploy Spark apps on top of Kubernetes via [operator patterns](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/)
+  * [SparkCluster](https://github.com/apache/spark-kubernetes-operator/blob/main/examples/cluster-with-template.yaml): deploy Spark clusters on top of Kubernetes via [operator patterns](https://kubernetes.io/docs/concepts/extend-kubernetes/operator/)
 
 **Other Documents:**
 
diff --git a/docs/js/main.js b/docs/js/main.js
index 1c601f5210ab5..220cf4026bdcc 100755
--- a/docs/js/main.js
+++ b/docs/js/main.js
@@ -87,15 +87,6 @@ function codeTabs() {
 }
 
 
-// A script to fix internal hash links because we have an overlapping top bar.
-// Based on https://github.com/twitter/bootstrap/issues/193#issuecomment-2281510
-function maybeScrollToHash() {
-  if (window.location.hash && $(window.location.hash).length) {
-    var newTop = $(window.location.hash).offset().top - 57;
-    $(window).scrollTop(newTop);
-  }
-}
-
 $(function() {
   codeTabs();
   // Display anchor links when hovering over headers. For documentation of the
@@ -105,14 +96,6 @@ $(function() {
   };
   anchors.add();
 
-  $(window).bind('hashchange', function() {
-    maybeScrollToHash();
-  });
-
-  // Scroll now too in case we had opened the page on a hash, but wait a bit because some browsers
-  // will try to do *their* initial scroll after running the onReady handler.
-  $(window).on('load', function() { setTimeout(function() { maybeScrollToHash(); }, 25); });
-
   // Make dropdown menus in nav bars show on hover instead of click
   // using solution at https://webdesign.tutsplus.com/tutorials/how-
   // to-make-the-bootstrap-navbar-dropdown-work-on-hover--cms-33840
diff --git a/docs/migration-guide.md b/docs/migration-guide.md
index 9ca0ada37a2fe..3a8387c3afb9c 100644
--- a/docs/migration-guide.md
+++ b/docs/migration-guide.md
@@ -24,7 +24,7 @@ for users to migrate effectively.
 
 * [Spark Core](core-migration-guide.html)
 * [SQL, Datasets, and DataFrame](sql-migration-guide.html)
-* [Structured Streaming](ss-migration-guide.html)
+* [Structured Streaming](./streaming/ss-migration-guide.html)
 * [MLlib (Machine Learning)](ml-migration-guide.html)
 * [PySpark (Python on Spark)](pyspark-migration-guide.html)
 * [SparkR (R on Spark)](sparkr-migration-guide.html)
diff --git a/docs/ml-features.md b/docs/ml-features.md
index 3dbb960dea03e..418e94ad1ea19 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -855,6 +855,116 @@ for more details on the API.
 
 </div>
 
+## TargetEncoder
+
+[Target Encoding](https://www.researchgate.net/publication/220520258_A_Preprocessing_Scheme_for_High-Cardinality_Categorical_Attributes_in_Classification_and_Prediction_Problems) is a data-preprocessing technique that transforms high-cardinality categorical features into quasi-continuous scalar attributes suited for use in regression-type models. This paradigm maps individual values of an independent feature to a scalar, representing some estimate of the dependent attribute (meaning categorical values that exhibit similar statistics with respect to the target will have a similar representation).
+
+By leveraging the relationship between categorical features and the target variable, Target Encoding usually performs better than One-Hot and does not require a final binary vector encoding, decreasing the overall dimensionality of the dataset.
+
+User can specify input and output column names by setting `inputCol` and `outputCol` for single-column use cases, or `inputCols` and `outputCols` for multi-column use cases (both arrays required to have the same size). These columns are expected to contain categorical indices (positive integers), being missing values (null) treated as a separate category. Data type must be any subclass of 'NumericType'. For string type input data, it is common to encode categorical features using [StringIndexer](ml-features.html#stringindexer) first.
+
+User can specify the target column name by setting `label`. This column is expected to contain the ground-truth labels from which encodings will be derived. Observations with missing label (null) are not considered when calculating estimates. Data type must be any subclass of 'NumericType'.
+
+`TargetEncoder` supports the `handleInvalid` parameter to choose how to handle invalid input, meaning categories not seen at training, when encoding new data. Available options include 'keep' (any invalid inputs are assigned to an extra categorical index) and 'error' (throw an exception).
+
+`TargetEncoder` supports the `targetType` parameter to choose the label type when fitting data, affecting how estimates are calculated. Available options include 'binary'  and 'continuous'.
+
+When set to 'binary', the target attribute $Y$ is expected to be binary, $Y\in\{ 0,1 \}$. The transformation maps individual values $X_{i}$ to the conditional probability of $Y$ given that $X=X_{i}\;$: $\;\; S_{i}=P(Y\mid X=X_{i})$. This approach is also known as bin-counting.
+
+When set to 'continuous', the target attribute $Y$ is expected to be continuous, $Y\in\mathbb{Q}$. The transformation maps individual values $X_{i}$ to the average of $Y$ given that $X=X_{i}\;$: $\;\; S_{i}=E[Y\mid X=X_{i}]$. This approach is also known as mean-encoding.
+
+`TargetEncoder` supports the `smoothing` parameter to tune how in-category stats and overall stats are blended. High-cardinality categorical features are usually unevenly distributed across all possible values of $X$.
+Therefore, calculating encodings $S_{i}$ according only to in-class statistics makes this estimates very unreliable, and rarely seen categories will very likely cause overfitting in learning.
+
+Smoothing prevents this behaviour by weighting in-class estimates with overall estimates according to the relative size of the particular class on the whole dataset.
+
+$\;\;\; S_{i}=\lambda(n_{i})\, P(Y\mid X=X_{i})+(1-\lambda(n_{i}))\, P(Y)$ for the binary case
+
+$\;\;\; S_{i}=\lambda(n_{i})\, E[Y\mid X=X_{i}]+(1-\lambda(n_{i}))\, E[Y]$ for the continuous case
+
+being $\lambda(n_{i})$ a monotonically increasing function on $n_{i}$, bounded between 0 and 1.
+
+Usually $\lambda(n_{i})$ is implemented as the parametric function $\lambda(n_{i})=\frac{n_{i}}{n_{i}+m}$, where $m$ is the smoothing factor, represented by `smoothing` parameter in `TargetEncoder`.
+
+**Examples**
+
+Building on the `TargetEncoder` example, let's assume we have the following
+DataFrame with columns `feature` and `target` (binary & continuous):
+
+~~~~
+ feature | target | target
+         | (bin)  | (cont)
+ --------|--------|--------
+ 1       | 0      | 1.3
+ 1       | 1      | 2.5
+ 1       | 0      | 1.6
+ 2       | 1      | 1.8
+ 2       | 0      | 2.4
+ 3       | 1      | 3.2
+~~~~
+
+Applying `TargetEncoder` with 'binary' target type,
+`feature` as the input column,`target (bin)` as the label column
+and `encoded` as the output column, we are able to fit a model
+on the data to learn encodings and transform the data according
+to these mappings:
+
+~~~~
+ feature | target | encoded
+         | (bin)  |
+ --------|--------|--------
+ 1       | 0      | 0.333
+ 1       | 1      | 0.333
+ 1       | 0      | 0.333
+ 2       | 1      | 0.5
+ 2       | 0      | 0.5
+ 3       | 1      | 1.0
+~~~~
+
+Applying `TargetEncoder` with 'continuous'  target type,
+`feature` as the input column,`target (cont)` as the label column
+and `encoded` as the output column, we are able to fit a model
+on the data to learn encodings and transform the data according
+to these mappings:
+
+~~~~
+ feature | target | encoded
+         | (cont) |
+ --------|--------|--------
+ 1       | 1.3    | 1.8
+ 1       | 2.5    | 1.8
+ 1       | 1.6    | 1.8
+ 2       | 1.8    | 2.1
+ 2       | 2.4    | 2.1
+ 3       | 3.2    | 3.2
+~~~~
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+
+Refer to the [TargetEncoder Python docs](api/python/reference/api/pyspark.ml.feature.TargetEncoder.html) for more details on the API.
+
+{% include_example python/ml/target_encoder_example.py %}
+</div>
+
+<div data-lang="scala" markdown="1">
+
+Refer to the [TargetEncoder Scala docs](api/scala/org/apache/spark/ml/feature/TargetEncoder.html) for more details on the API.
+
+{% include_example scala/org/apache/spark/examples/ml/TargetEncoderExample.scala %}
+</div>
+
+<div data-lang="java" markdown="1">
+
+Refer to the [TargetEncoder Java docs](api/java/org/apache/spark/ml/feature/TargetEncoder.html)
+for more details on the API.
+
+{% include_example java/org/apache/spark/examples/ml/JavaTargetEncoderExample.java %}
+</div>
+
+</div>
+
 ## VectorIndexer
 
 `VectorIndexer` helps index categorical features in datasets of `Vector`s.
diff --git a/docs/monitoring.md b/docs/monitoring.md
index d04fb35cf7275..957ee555191a4 100644
--- a/docs/monitoring.md
+++ b/docs/monitoring.md
@@ -1084,6 +1084,49 @@ Each instance can report to zero or more _sinks_. Sinks are contained in the
 * `Slf4jSink`: Sends metrics to slf4j as log entries.
 * `StatsdSink`: Sends metrics to a StatsD node.
 
+The Prometheus Servlet mirrors the JSON data exposed by the <code>Metrics Servlet</code> and the REST API, but in a time-series format. The following are the equivalent Prometheus Servlet endpoints.   
+
+<table>
+  <thead>
+    <tr>
+      <th>Component</th>
+      <th>Port</th>
+      <th>JSON End Point</th>
+      <th>Prometheus End Point</th>
+    </tr>
+  </thead>
+  <tr>
+    <td>Master</td>
+    <td>8080</td>
+    <td><code>/metrics/master/json/</code></td>
+    <td><code>/metrics/master/prometheus/</code></td>
+  </tr>
+  <tr>
+    <td>Master</td>
+    <td>8080</td>
+    <td><code>/metrics/applications/json/</code></td>
+    <td><code>/metrics/applications/prometheus/</code></td>
+  </tr>
+  <tr>
+    <td>Worker</td>
+    <td>8081</td>
+    <td><code>/metrics/json/</code></td>
+    <td><code>/metrics/prometheus/</code></td>
+  </tr>
+  <tr>
+    <td>Driver</td>
+    <td>4040</td>
+    <td><code>/metrics/json/</code></td>
+    <td><code>/metrics/prometheus/</code></td>
+  </tr>
+  <tr>
+    <td>Driver</td>
+    <td>4040</td>
+    <td><code>/api/v1/applications/{id}/executors/</code></td>
+    <td><code>/metrics/executors/prometheus/</code></td>
+  </tr>
+</table>
+
 Spark also supports a Ganglia sink which is not included in the default build due to
 licensing restrictions:
 
diff --git a/docs/rdd-programming-guide.md b/docs/rdd-programming-guide.md
index cbbce4c082060..a1adcc2f6eb03 100644
--- a/docs/rdd-programming-guide.md
+++ b/docs/rdd-programming-guide.md
@@ -46,7 +46,7 @@ Spark applications in Python can either be run with the `bin/spark-submit` scrip
 
 {% highlight python %}
     install_requires=[
-        'pyspark=={site.SPARK_VERSION}'
+        'pyspark=={{site.SPARK_VERSION}}'
     ]
 {% endhighlight %}
 
diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md
index 655b30756a298..a0c73813612d0 100644
--- a/docs/running-on-kubernetes.md
+++ b/docs/running-on-kubernetes.md
@@ -44,7 +44,7 @@ Cluster administrators should use [Pod Security Policies](https://kubernetes.io/
 
 # Prerequisites
 
-* A running Kubernetes cluster at version >= 1.27 with access configured to it using
+* A running Kubernetes cluster at version >= 1.29 with access configured to it using
 [kubectl](https://kubernetes.io/docs/reference/kubectl/).  If you do not already have a working Kubernetes cluster,
 you may set up a test cluster on your local machine using
 [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/).
@@ -236,7 +236,7 @@ A typical example of this using S3 is via passing the following options:
 
 ```
 ...
---packages org.apache.hadoop:hadoop-aws:3.4.0
+--packages org.apache.hadoop:hadoop-aws:3.4.1
 --conf spark.kubernetes.file.upload.path=s3a://<s3-bucket>/path
 --conf spark.hadoop.fs.s3a.access.key=...
 --conf spark.hadoop.fs.s3a.impl=org.apache.hadoop.fs.s3a.S3AFileSystem
@@ -430,6 +430,19 @@ The same logs can also be accessed through the
 [Kubernetes dashboard](https://kubernetes.io/docs/tasks/access-application-cluster/web-ui-dashboard/) if installed on
 the cluster.
 
+When there exists a log collection system, you can expose it at Spark Driver `Executors` tab UI. For example,
+
+```
+spark.ui.custom.executor.log.url='https://log-server/log?appId={{APP_ID}}&execId={{EXECUTOR_ID}}'
+```
+
+You can add additional custom variables to this url template, populated with the values of existing executor environment variables like
+
+```
+spark.executorEnv.SPARK_EXECUTOR_ATTRIBUTE_YOUR_VAR='$(EXISTING_EXECUTOR_ENV_VAR)'
+spark.ui.custom.executor.log.url='https://log-server/log?appId={{APP_ID}}&execId={{EXECUTOR_ID}}&your_var={{YOUR_VAR}}'
+```
+
 ### Accessing Driver UI
 
 The UI associated with any application can be accessed locally using
@@ -988,7 +1001,7 @@ See the [configuration page](configuration.html) for information on Spark config
   <td>
     Prefix to use in front of the executor pod names. It must conform the rules defined by the Kubernetes
     <a href="https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#dns-label-names">DNS Label Names</a>.
-    The prefix will be used to generate executor pod names in the form of <code>$podNamePrefix-exec-$id</code>, where the `id` is
+    The prefix will be used to generate executor pod names in the form of <code>\$podNamePrefix-exec-\$id</code>, where the `id` is
     a positive int value, so the length of the `podNamePrefix` needs to be less than or equal to 47(= 63 - 10 - 6).
   </td>
   <td>2.3.0</td>
@@ -1169,6 +1182,24 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>2.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].label.[LabelName]</code></td>
+  <td>(none)</td>
+  <td>
+   Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> labels passed to the Kubernetes with <code>LabelName</code> as key having specified value, must conform with Kubernetes label format. For example,
+   <code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.label.foo=bar</code>.
+  </td>
+  <td>4.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.driver.volumes.[VolumeType].[VolumeName].annotation.[AnnotationName]</code></td>
+  <td>(none)</td>
+  <td>
+   Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> annotations passed to the Kubernetes with <code>AnnotationName</code> as key having specified value, must conform with Kubernetes annotations format. For example,
+   <code>spark.kubernetes.driver.volumes.persistentVolumeClaim.checkpointpvc.annotation.foo=bar</code>.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].mount.path</code></td>
   <td>(none)</td>
@@ -1205,6 +1236,24 @@ See the [configuration page](configuration.html) for information on Spark config
   </td>
   <td>2.4.0</td>
 </tr>
+<tr>
+  <td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].label.[LabelName]</code></td>
+  <td>(none)</td>
+  <td>
+   Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> labels passed to the Kubernetes with <code>LabelName</code> as key having specified value, must conform with Kubernetes label format. For example,
+   <code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.label.foo=bar</code>.
+  </td>
+  <td>4.0.0</td>
+</tr>
+<tr>
+  <td><code>spark.kubernetes.executor.volumes.[VolumeType].[VolumeName].annotation.[AnnotationName]</code></td>
+  <td>(none)</td>
+  <td>
+   Configure <a href="https://kubernetes.io/docs/concepts/storage/volumes/">Kubernetes Volume</a> annotations passed to the Kubernetes with <code>AnnotationName</code> as key having specified value, must conform with Kubernetes annotations format. For example,
+   <code>spark.kubernetes.executor.volumes.persistentVolumeClaim.checkpointpvc.annotation.foo=bar</code>.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.kubernetes.local.dirs.tmpfs</code></td>
   <td><code>false</code></td>
@@ -1939,10 +1988,10 @@ Install Apache YuniKorn:
 ```bash
 helm repo add yunikorn https://apache.github.io/yunikorn-release
 helm repo update
-helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.5.1 --create-namespace --set embedAdmissionController=false
+helm install yunikorn yunikorn/yunikorn --namespace yunikorn --version 1.6.0 --create-namespace --set embedAdmissionController=false
 ```
 
-The above steps will install YuniKorn v1.5.1 on an existing Kubernetes cluster.
+The above steps will install YuniKorn v1.6.0 on an existing Kubernetes cluster.
 
 ##### Get started
 
diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md
index 700ddefabea47..d149f9196b345 100644
--- a/docs/running-on-yarn.md
+++ b/docs/running-on-yarn.md
@@ -33,7 +33,7 @@ Please see [Spark Security](security.html) and the specific security sections in
 
 # Launching Spark on YARN
 
-Apache Hadoop does not support Java 17 as of 3.4.0, while Apache Spark requires at least Java 17 since 4.0.0, so a different JDK should be configured for Spark applications.
+Apache Hadoop does not support Java 17 as of 3.4.1, while Apache Spark requires at least Java 17 since 4.0.0, so a different JDK should be configured for Spark applications.
 Please refer to [Configuring different JDKs for Spark Applications](#configuring-different-jdks-for-spark-applications) for details.
 
 Ensure that `HADOOP_CONF_DIR` or `YARN_CONF_DIR` points to the directory which contains the (client side) configuration files for the Hadoop cluster.
@@ -489,7 +489,7 @@ To use a custom metrics.properties for the application master and executors, upd
     and send to RM, which uses them when renewing delegation tokens. A typical use case of this feature is to support delegation
     tokens in an environment where a YARN cluster needs to talk to multiple downstream HDFS clusters, where the YARN RM may not have configs
     (e.g., dfs.nameservices, dfs.ha.namenodes.*, dfs.namenode.rpc-address.*) to connect to these clusters.
-    In this scenario, Spark users can specify the config value to be <code>^dfs.nameservices$|^dfs.namenode.rpc-address.*$|^dfs.ha.namenodes.*$</code> to parse
+    In this scenario, Spark users can specify the config value to be <code>^dfs.nameservices\$|^dfs.namenode.rpc-address.*\$|^dfs.ha.namenodes.*\$</code> to parse
     these HDFS configs from the job's local configuration files. This config is very similar to <code>mapreduce.job.send-token-conf</code>. Please check YARN-5910 for more details.
   </td>
   <td>3.3.0</td>
diff --git a/docs/security.md b/docs/security.md
index f4ba3eab90138..c7d3fd5f8c36f 100644
--- a/docs/security.md
+++ b/docs/security.md
@@ -49,9 +49,14 @@ specified below, the secret must be defined by setting the `spark.authenticate.s
 option. The same secret is shared by all Spark applications and daemons in that case, which limits
 the security of these deployments, especially on multi-tenant clusters.
 
-The REST Submission Server does not support authentication. You should
-ensure that all network access to the REST API (port 6066 by default) 
-is restricted to hosts that are trusted to submit jobs.
+The REST Submission Server supports HTTP `Authorization` header with
+a cryptographically signed JSON Web Token via `JWSFilter`.
+To enable authorization, Spark Master should have
+`spark.master.rest.filters=org.apache.spark.ui.JWSFilter` and
+`spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-KEY` configurations, and
+client should provide HTTP `Authorization` header which contains JSON Web Token signed by
+the shared secret key. Please note that this feature requires a Spark distribution built with
+`jjwt` profile.
 
 ### YARN
 
@@ -351,6 +356,8 @@ The following options control the authentication of Web UIs:
   <td><code>spark.ui.filters</code></td>
   <td>None</td>
   <td>
+    Spark supports HTTP <code>Authorization</code> header with a cryptographically signed
+    JSON Web Token via <code>org.apache.spark.ui.JWSFilter</code>. <br />
     See the <a href="configuration.html#spark-ui">Spark UI</a> configuration for how to configure
     filters.
   </td>
@@ -807,6 +814,12 @@ They are generally private services, and should only be accessible within the ne
 organization that deploys Spark. Access to the hosts and ports used by Spark services should
 be limited to origin hosts that need to access the services.
 
+However, like the REST Submission port, Spark also supports HTTP `Authorization` header
+with a cryptographically signed JSON Web Token (JWT) for all UI ports.
+To use it, a user needs the Spark distribution built with `jjwt` profile and to configure
+`spark.ui.filters=org.apache.spark.ui.JWSFilter` and
+`spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-KEY`.
+
 Below are the primary ports that Spark uses for its communication and how to
 configure those ports.
 
@@ -934,7 +947,7 @@ mechanism (see `java.util.ServiceLoader`). Implementations of
 `org.apache.spark.security.HadoopDelegationTokenProvider` can be made available to Spark
 by listing their names in the corresponding file in the jar's `META-INF/services` directory.
 
-Delegation token support is currently only supported in YARN mode. Consult the
+Delegation token support is currently only supported in YARN and Kubernetes mode. Consult the
 deployment-specific page for more information.
 
 The following options provides finer-grained control for this feature:
diff --git a/docs/spark-connect-overview.md b/docs/spark-connect-overview.md
index fc79735cd354d..723bae9fd9be5 100644
--- a/docs/spark-connect-overview.md
+++ b/docs/spark-connect-overview.md
@@ -97,7 +97,7 @@ be monitored using the application's framework native metrics and logging librar
 
 # How to use Spark Connect
 
-Starting with Spark 3.4, Spark Connect is available and supports PySpark and Scala
+Spark Connect is available and supports PySpark and Scala
 applications. We will walk through how to run an Apache Spark server with Spark
 Connect and connect to it from a client application using the Spark Connect client
 library.
@@ -105,9 +105,8 @@ library.
 ## Download and start Spark server with Spark Connect
 
 First, download Spark from the
-[Download Apache Spark](https://spark.apache.org/downloads.html) page. Spark Connect
-was introduced in Apache Spark version 3.4 so make sure you choose 3.4.0 or newer in
-the release drop down at the top of the page. Then choose your package type, typically
+[Download Apache Spark](https://spark.apache.org/downloads.html) page. Choose the
+latest release in  the release drop down at the top of the page. Then choose your package type, typically
 “Pre-built for Apache Hadoop 3.3 and later”, and click the link to download.
 
 Now extract the Spark package you just downloaded on your computer, for example:
@@ -124,10 +123,8 @@ Spark Connect, like in this example:
 ./sbin/start-connect-server.sh
 {% endhighlight %}
 
-Note that we include a Spark Connect package (`spark-connect_2.13:{{site.SPARK_VERSION_SHORT}}`), when starting
-Spark server. This is required to use Spark Connect. Make sure to use the same version
-of the package as the Spark version you downloaded previously. In this example,
-Spark {{site.SPARK_VERSION_SHORT}} with Scala 2.13.
+Make sure to use the same version  of the package as the Spark version you
+downloaded previously. In this example, Spark {{site.SPARK_VERSION_SHORT}} with Scala 2.13.
 
 Now Spark server is running and ready to accept Spark Connect sessions from client
 applications. In the next section we will walk through how to use Spark Connect
@@ -204,8 +201,8 @@ SparkSession available as 'spark'.
 Now you can run PySpark code in the shell to see Spark Connect in action:
 
 {% highlight python %}
->>> columns = ["id","name"]
->>> data = [(1,"Sarah"),(2,"Maria")]
+>>> columns = ["id", "name"]
+>>> data = [(1,"Sarah"), (2,"Maria")]
 >>> df = spark.createDataFrame(data).toDF(*columns)
 >>> df.show()
 +---+-----+
@@ -219,29 +216,23 @@ Now you can run PySpark code in the shell to see Spark Connect in action:
 </div>
 
 <div data-lang="scala"  markdown="1">
-For the Scala shell, we use an Ammonite-based REPL that is currently not included in the Apache Spark package.
+For the Scala shell, we use an Ammonite-based REPL. Otherwise, very similar with PySpark shell.
 
-To set up the new Scala shell, first download and install [Coursier CLI](https://get-coursier.io/docs/cli-installation).
-Then, install the REPL using the following command in a terminal window:
 {% highlight bash %}
-cs install --contrib spark-connect-repl
-{% endhighlight %}
-
-And now you can start the Ammonite-based Scala REPL/shell to connect to your Spark server like this:
-
-{% highlight bash %}
-spark-connect-repl
+./bin/spark-shell --remote "sc://localhost"
 {% endhighlight %}
 
 A greeting message will appear when the REPL successfully initializes:
 {% highlight bash %}
+Welcome to
+      ____              __
+     / __/__  ___ _____/ /__
+    _\ \/ _ \/ _ `/ __/  '_/
+   /___/ .__/\_,_/_/ /_/\_\   version 4.0.0-SNAPSHOT
+      /_/
+
+Type in expressions to have them evaluated.
 Spark session available as 'spark'.
-   _____                  __      ______                            __
-  / ___/____  ____ ______/ /__   / ____/___  ____  ____  ___  _____/ /_
-  \__ \/ __ \/ __ `/ ___/ //_/  / /   / __ \/ __ \/ __ \/ _ \/ ___/ __/
- ___/ / /_/ / /_/ / /  / ,<    / /___/ /_/ / / / / / / /  __/ /__/ /_
-/____/ .___/\__,_/_/  /_/|_|   \____/\____/_/ /_/_/ /_/\___/\___/\__/
-    /_/
 {% endhighlight %}
 
 By default, the REPL will attempt to connect to a local Spark Server.
@@ -265,28 +256,22 @@ connection that is initialized at REPL startup.
 
 {% highlight bash %}
 export SPARK_REMOTE="sc://myhost.com:443/;token=ABCDEFG"
-spark-connect-repl
-{% endhighlight %}
-or
-{% highlight bash %}
-SPARK_REMOTE="sc://myhost.com:443/;token=ABCDEFG" spark-connect-repl
+./bin/spark-shell
 {% endhighlight %}
 
-#### Use CLI arguments
+or
 
-The customizations may also be passed in through CLI arguments as shown below:
 {% highlight bash %}
-spark-connect-repl --host myhost.com --port 443 --token ABCDEFG
+SPARK_REMOTE="sc://myhost.com:443/;token=ABCDEFG" spark-connect-repl
 {% endhighlight %}
 
-The supported list of CLI arguments may be found [here](https://github.com/apache/spark/blob/master/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala#L48).
-
 #### Configure programmatically with a connection string
 
 The connection may also be programmatically created using _SparkSession#builder_ as in this example:
+
 {% highlight scala %}
 @ import org.apache.spark.sql.SparkSession
-@ val spark = SparkSession.builder.remote("sc://localhost:443/;token=ABCDEFG").build()
+@ val spark = SparkSession.builder.remote("sc://localhost:443/;token=ABCDEFG").getOrCreate()
 {% endhighlight %}
 
 </div>
@@ -350,7 +335,6 @@ Lines with a: 72, lines with b: 39
 To use Spark Connect as part of a Scala application/project, we first need to include the right dependencies.
 Using the `sbt` build system as an example, we add the following dependencies to the `build.sbt` file:
 {% highlight sbt %}
-libraryDependencies += "org.apache.spark" %% "spark-sql-api" % "3.5.0"
 libraryDependencies += "org.apache.spark" %% "spark-connect-client-jvm" % "3.5.0"
 {% endhighlight %}
 
@@ -359,12 +343,12 @@ your Spark server when you create a Spark session, as in this example:
 
 {% highlight scala %}
 import org.apache.spark.sql.SparkSession
-val spark = SparkSession.builder().remote("sc://localhost").build()
+val spark = SparkSession.builder().remote("sc://localhost").getOrCreate()
 {% endhighlight %}
 
 
 **Note**: Operations that reference User Defined Code such as UDFs, filter, map, etc require a
-[ClassFinder](https://github.com/apache/spark/blob/bb41cd889efdd0602385e70b4c8f1c93740db332/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala#L26)
+[ClassFinder](https://github.com/apache/spark/blob/master/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala)
 to be registered to pickup and upload any required classfiles. Also, any JAR dependencies must be uploaded to the server using `SparkSession#AddArtifact`.
 
 Example:
@@ -386,6 +370,8 @@ one may implement their own class extending `ClassFinder` for customized search
 </div>
 </div>
 
+For more information on application development with Spark Connect as well as extending Spark Connect
+with custom functionality, see [Application Development with Spark Connect](app-dev-spark-connect.html). 
 # Client application authentication
 
 While Spark Connect does not have built-in authentication, it is designed to
@@ -424,6 +410,6 @@ Majority of the Streaming API is supported, including
 [StreamingQueryListener](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html).
 
 APIs such as [SparkContext](api/scala/org/apache/spark/SparkContext.html)
-and [RDD](api/scala/org/apache/spark/rdd/RDD.html) are deprecated in all Spark Connect versions.
+and [RDD](api/scala/org/apache/spark/rdd/RDD.html) are unsupported in Spark Connect.
 
 Support for more APIs is planned for upcoming Spark releases.
diff --git a/docs/spark-standalone.md b/docs/spark-standalone.md
index 774c0bee31295..d828436e77340 100644
--- a/docs/spark-standalone.md
+++ b/docs/spark-standalone.md
@@ -59,10 +59,6 @@ Finally, the following configuration options can be passed to the master and wor
     <td><code>-h HOST</code>, <code>--host HOST</code></td>
     <td>Hostname to listen on</td>
   </tr>
-  <tr>
-    <td><code>-i HOST</code>, <code>--ip HOST</code></td>
-    <td>Hostname to listen on (deprecated, use -h or --host)</td>
-  </tr>
   <tr>
     <td><code>-p PORT</code>, <code>--port PORT</code></td>
     <td>Port for service to listen on (default: 7077 for master, random for worker)</td>
@@ -210,6 +206,15 @@ SPARK_MASTER_OPTS supports the following system properties:
   </td>
   <td>1.1.0</td>
 </tr>
+<tr>
+  <td><code>spark.master.ui.title</code></td>
+  <td>(None)</td>
+  <td>
+    Specifies the title of the Master UI page. If unset, <code>Spark Master at 'master url'</code>
+    is used by default.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.master.ui.decommission.allow.mode</code></td>
   <td><code>LOCAL</code></td>
@@ -254,6 +259,14 @@ SPARK_MASTER_OPTS supports the following system properties:
   </td>
   <td>1.3.0</td>
 </tr>
+<tr>
+  <td><code>spark.master.rest.filters</code></td>
+  <td>(None)</td>
+  <td>
+    Comma separated list of filter class names to apply to the Master REST API.
+  </td>
+  <td>4.0.0</td>
+</tr>
 <tr>
   <td><code>spark.master.useAppNameAsAppId.enabled</code></td>
   <td><code>false</code></td>
@@ -601,7 +614,9 @@ via <code>http://[host:port]/[version]/submissions/[action]</code> where
   <thead><tr><th>Command</th><th>Description</th><th>HTTP METHOD</th><th>Since Version</th></tr></thead>
   <tr>
     <td><code>create</code></td>
-    <td>Create a Spark driver via <code>cluster</code> mode.</td>
+    <td>Create a Spark driver via <code>cluster</code> mode. Since 4.0.0, Spark master supports server-side
+      variable replacements for the values of Spark properties and environment variables.
+    </td>
     <td>POST</td>
     <td>1.3.0</td>
   </tr>
@@ -665,6 +680,33 @@ The following is the response from the REST API for the above <code>create</code
 }
 ```
 
+When Spark master requires HTTP <code>Authorization</code> header via
+<code>spark.master.rest.filters=org.apache.spark.ui.JWSFilter</code> and
+<code>spark.org.apache.spark.ui.JWSFilter.param.secretKey=BASE64URL-ENCODED-KEY</code>
+configurations, <code>curl</code> CLI command can provide the required header like the following.
+
+```bash
+$ curl -XPOST http://IP:PORT/v1/submissions/create \
+--header "Authorization: Bearer USER-PROVIDED-WEB-TOEN-SIGNED-BY-THE-SAME-SHARED-KEY"
+...
+```
+
+For <code>sparkProperties</code> and <code>environmentVariables</code>, users can use place
+holders for server-side environment variables like the following.
+
+```bash
+{% raw %}
+...
+  "sparkProperties": {
+    "spark.hadoop.fs.s3a.endpoint": "{{AWS_ENDPOINT_URL}}",
+    "spark.hadoop.fs.s3a.endpoint.region": "{{AWS_REGION}}"
+  },
+  "environmentVariables": {
+    "AWS_CA_BUNDLE": "{{AWS_CA_BUNDLE}}"
+  },
+...
+{% endraw %}
+```
 
 # Resource Scheduling
 
@@ -793,7 +835,7 @@ In order to enable this recovery mode, you can set SPARK_DAEMON_JAVA_OPTS in spa
     <td><code>spark.deploy.recoveryDirectory</code></td>
     <td>""</td>
     <td>The directory in which Spark will store recovery state, accessible from the Master's perspective.
-      Note that the directory should be clearly manualy if <code>spark.deploy.recoveryMode</code>
+      Note that the directory should be clearly manually if <code>spark.deploy.recoveryMode</code>
       or <code>spark.deploy.recoveryCompressionCodec</code> is changed.
     </td>
     <td>0.8.1</td>
diff --git a/docs/sparkr-migration-guide.md b/docs/sparkr-migration-guide.md
index 3a937b729a288..568cf6ec4f3ac 100644
--- a/docs/sparkr-migration-guide.md
+++ b/docs/sparkr-migration-guide.md
@@ -26,9 +26,13 @@ Note that this migration guide describes the items specific to SparkR.
 Many items of SQL migration can be applied when migrating SparkR to higher versions.
 Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
 
+## Upgrading from SparkR 3.5 to 4.0
+
+ - In Spark 4.0, SparkR is deprecated and will be removed in a future version.
+
 ## Upgrading from SparkR 3.1 to 3.2
 
- - Previously, SparkR automatically downloaded and installed the Spark distribution in user' cache directory to complete SparkR installation when SparkR runs in a plain R shell or Rscript, and the Spark distribution cannot be found. Now, it asks if users want to download and install or not. To restore the previous behavior, set `SPARKR_ASK_INSTALLATION` environment variable to `FALSE`.
+ - Previously, SparkR automatically downloaded and installed the Spark distribution in user's cache directory to complete SparkR installation when SparkR runs in a plain R shell or Rscript, and the Spark distribution cannot be found. Now, it asks if users want to download and install or not. To restore the previous behavior, set `SPARKR_ASK_INSTALLATION` environment variable to `FALSE`.
 
 ## Upgrading from SparkR 2.4 to 3.0
 
diff --git a/docs/sparkr.md b/docs/sparkr.md
index ef99ea961c9ba..45f506c9dc517 100644
--- a/docs/sparkr.md
+++ b/docs/sparkr.md
@@ -22,6 +22,8 @@ license: |
 * This will become a table of contents (this text will be scraped).
 {:toc}
 
+SparkR is deprecated from Apache Spark 4.0.0 and will be removed in a future version.
+
 # Overview
 SparkR is an R package that provides a light-weight frontend to use Apache Spark from R.
 In Spark {{site.SPARK_VERSION}}, SparkR provides a distributed data frame implementation that
@@ -658,7 +660,7 @@ The following example shows how to save/load a MLlib model by SparkR.
 
 # Structured Streaming
 
-SparkR supports the Structured Streaming API. Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](structured-streaming-programming-guide.html)
+SparkR supports the Structured Streaming API. Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. For more information see the R API on the [Structured Streaming Programming Guide](./streaming/index.html).
 
 # Apache Arrow in SparkR
 
diff --git a/docs/sql-data-sources-avro.md b/docs/sql-data-sources-avro.md
index 3721f92d93266..c06e1fd46d2da 100644
--- a/docs/sql-data-sources-avro.md
+++ b/docs/sql-data-sources-avro.md
@@ -353,6 +353,13 @@ Data source options of Avro can be set via:
     <td>read</td>
     <td>4.0.0</td>
   </tr>
+  <tr>
+    <td><code>recursiveFieldMaxDepth</code></td>
+    <td>-1</td>
+    <td>If this option is specified to negative or is set to 0, recursive fields are not permitted. Setting it to 1 drops all recursive fields, 2 allows recursive fields to be recursed once, and 3 allows it to be recursed twice and so on, up to 15. Values larger than 15 are not allowed in order to avoid inadvertently creating very large schemas. If an avro message has depth beyond this limit, the Spark struct returned is truncated after the recursion limit. An example of usage can be found in section <a href="#handling-circular-references-of-avro-fields">Handling circular references of Avro fields</a></td>
+    <td>read</td>
+    <td>4.0.0</td>
+  </tr>
 </table>
 
 ## Configuration
@@ -628,3 +635,41 @@ You can also specify the whole output Avro schema with the option `avroSchema`,
     <td>decimal</td>
   </tr>
 </table>
+
+## Handling circular references of Avro fields
+In Avro, a circular reference occurs when the type of a field is defined in one of the parent records. This can cause issues when parsing the data, as it can result in infinite loops or other unexpected behavior.
+To read Avro data with schema that has circular reference, users can use the `recursiveFieldMaxDepth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, Spark Avro data source will not permit recursive fields by setting `recursiveFieldMaxDepth` to -1. However, you can set this option to 1 to 15 if needed.
+
+Setting `recursiveFieldMaxDepth` to 1 drops all recursive fields, setting it to 2 allows it to be recursed once, and setting it to 3 allows it to be recursed twice. A `recursiveFieldMaxDepth` value greater than 15 is not allowed, as it can lead to performance issues and even stack overflows.
+
+SQL Schema for the below Avro message will vary based on the value of `recursiveFieldMaxDepth`.
+
+<div data-lang="avro" markdown="1">
+<div class="d-none">
+This div is only used to make markdown editor/viewer happy and does not display on web
+
+```avro
+</div>
+
+{% highlight avro %}
+{
+  "type": "record",
+  "name": "Node",
+  "fields": [
+    {"name": "Id", "type": "int"},
+    {"name": "Next", "type": ["null", "Node"]}
+  ]
+}
+
+// The Avro schema defined above, would be converted into a Spark SQL columns with the following
+// structure based on `recursiveFieldMaxDepth` value.
+
+1: struct<Id: int>
+2: struct<Id: int, Next: struct<Id: int>>
+3: struct<Id: int, Next: struct<Id: int, Next: struct<Id: int>>>
+
+{% endhighlight %}
+<div class="d-none">
+```
+</div>
+</div>
diff --git a/docs/sql-data-sources-hive-tables.md b/docs/sql-data-sources-hive-tables.md
index 566dcb33a25d9..d45174425f470 100644
--- a/docs/sql-data-sources-hive-tables.md
+++ b/docs/sql-data-sources-hive-tables.md
@@ -130,7 +130,7 @@ The following options can be used to configure the version of Hive that is used
     <td><code>2.3.10</code></td>
     <td>
       Version of the Hive metastore. Available
-      options are <code>2.0.0</code> through <code>2.3.10</code> and <code>3.0.0</code> through <code>3.1.3</code>.
+      options are <code>2.0.0</code> through <code>2.3.10</code>, <code>3.0.0</code> through <code>3.1.3</code>, and <code>4.0.0</code> through <code>4.0.1</code>.
     </td>
     <td>1.4.0</td>
   </tr>
diff --git a/docs/sql-data-sources-json.md b/docs/sql-data-sources-json.md
index 18288bfc1654e..58f18486e80b2 100644
--- a/docs/sql-data-sources-json.md
+++ b/docs/sql-data-sources-json.md
@@ -273,5 +273,11 @@ Data source options of JSON can be set via:
     <td>Whether to ignore null fields when generating JSON objects.</td>
     <td>write</td>
   </tr>
+  <tr>
+    <td><code>useUnsafeRow</code></td>
+    <td>(value of <code>spark.sql.json.useUnsafeRow</code> configuration)</td>
+    <td>Whether to use UnsafeRow to represent struct result in the JSON parser.</td>
+    <td>read</td>
+  </tr>
 </table>
 Other generic options can be found in <a href="https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html"> Generic File Source Options</a>.
diff --git a/docs/sql-data-sources-protobuf.md b/docs/sql-data-sources-protobuf.md
index 28e3e83bef7c7..4dd6579f92cd2 100644
--- a/docs/sql-data-sources-protobuf.md
+++ b/docs/sql-data-sources-protobuf.md
@@ -402,9 +402,9 @@ Spark supports the writing of all Spark SQL types into Protobuf. For most types,
 ## Handling circular references protobuf fields
 
 One common issue that can arise when working with Protobuf data is the presence of circular references. In Protobuf, a circular reference occurs when a field refers back to itself or to another field that refers back to the original field. This can cause issues when parsing the data, as it can result in infinite loops or other unexpected behavior.
-To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 0 to 10 if needed.
+To address this issue, the latest version of spark-protobuf introduces a new feature: the ability to check for circular references through field types. This allows users use the `recursive.fields.max.depth` option to specify the maximum number of levels of recursion to allow when parsing the schema. By default, `spark-protobuf` will not permit recursive fields by setting `recursive.fields.max.depth` to -1. However, you can set this option to 1 to 10 if needed.
 
-Setting `recursive.fields.max.depth` to 0 drops all recursive fields, setting it to 1 allows it to be recursed once, and setting it to 2 allows it to be recursed twice. A `recursive.fields.max.depth` value greater than 10 is not allowed, as it can lead to performance issues and even stack overflows.
+Setting `recursive.fields.max.depth` to 1 drops all recursive fields, setting it to 2 allows it to be recursed once, and setting it to 3 allows it to be recursed twice. A `recursive.fields.max.depth` value greater than 10 is not allowed, as it can lead to performance issues and even stack overflows.
 
 SQL Schema for the below protobuf message will vary based on the value of `recursive.fields.max.depth`.
 
@@ -425,12 +425,78 @@ message Person {
 // The protobuf schema defined above, would be converted into a Spark SQL columns with the following
 // structure based on `recursive.fields.max.depth` value.
 
-0: struct<name: string, bff: null>
-1: struct<name string, bff: <name: string, bff: null>>
-2: struct<name string, bff: <name: string, bff: struct<name: string, bff: null>>> ...
+1: struct<name: string>
+2: struct<name: string, bff: struct<name: string>>
+3: struct<name: string, bff: struct<name: string, bff: struct<name: string>>>
+...
 
 {% endhighlight %}
 <div class="d-none">
 ```
 </div>
-</div>
\ No newline at end of file
+</div>
+
+## Data Source Option
+
+Data source options of Protobuf can be set via:
+* the built-in functions below
+  * `from_protobuf`
+  * `to_protobuf`
+
+<table>
+  <thead><tr><th><b>Property Name</b></th><th><b>Default</b></th><th><b>Meaning</b></th><th><b>Scope</b></th></tr></thead>
+  <tr>
+    <td><code>mode</code></td>
+    <td><code>FAILFAST</code></td>
+    <td>Allows a mode for dealing with corrupt records during parsing.<br>
+    <ul>
+      <li><code>PERMISSIVE</code>: when it meets a corrupted record, sets all fields to <code>null</code>.</li>
+      <li><code>DROPMALFORMED</code>: ignores the whole corrupted records. This mode is unsupported in the Protobuf built-in functions.</li>
+      <li><code>FAILFAST</code>: throws an exception when it meets corrupted records.</li>
+    </ul>
+    </td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>recursive.fields.max.depth</code></td>
+    <td><code>-1</code></td>
+    <td>Specifies the maximum number of recursion levels to allow when parsing the schema. For more details refers to the section <a href="https://spark.apache.org/docs/latest/sql-data-sources-protobuf.html#handling-circular-references-protobuf-fields">Handling circular references protobuf fields</a>.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>convert.any.fields.to.json</code></td>
+    <td><code>false</code></td>
+    <td>Enables converting Protobuf <code>Any</code> fields to JSON. This option should be enabled carefully. JSON conversion and processing are inefficient. In addition, schema safety is also reduced making downstream processing error-prone.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>emit.default.values</code></td>
+    <td><code>false</code></td>
+    <td>Whether to render fields with zero values when deserializing Protobuf to a Spark struct. When a field is empty in the serialized Protobuf, this library will deserialize them as <code>null</code> by default, this option can control whether to render the type-specific zero values.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>enums.as.ints</code></td>
+    <td><code>false</code></td>
+    <td>Whether to render enum fields as their integer values. When this option set to <code>false</code>, an enum field will be mapped to <code>StringType</code>, and the value is the name of enum; when set to <code>true</code>, an enum field will be mapped to <code>IntegerType</code>, the value is its integer value.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>upcast.unsigned.ints</code></td>
+    <td><code>false</code></td>
+    <td>Whether to upcast unsigned integers into a larger type. Setting this option to <code>true</code>, <code>LongType</code> is used for <code>uint32</code> and <code>Decimal(20, 0)</code> is used for <code>uint64</code>, so their representation can contain large unsigned values without overflow.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>unwrap.primitive.wrapper.types</code></td>
+    <td><code>false</code></td>
+    <td>Whether to unwrap the struct representation for well-known primitive wrapper types when deserializing. By default, the wrapper types for primitives (i.e. google.protobuf.Int32Value, google.protobuf.Int64Value, etc.) will get deserialized as structs.</td>
+    <td>read</td>
+  </tr>
+  <tr>
+    <td><code>retain.empty.message.types</code></td>
+    <td><code>false</code></td>
+    <td>Whether to retain fields of the empty proto message type in Schema. Since Spark doesn't allow writing empty <code>StructType</code>, the empty proto message type will be dropped by default. Setting this option to <code>true</code> will insert a dummy column(<code>__dummy_field_in_empty_struct</code>) to the empty proto message so that the empty message fields will be retained.</td>
+    <td>read</td>
+  </tr>
+</table>
diff --git a/docs/sql-data-sources-xml.md b/docs/sql-data-sources-xml.md
index 4f077862427ca..6168f570a81a9 100644
--- a/docs/sql-data-sources-xml.md
+++ b/docs/sql-data-sources-xml.md
@@ -84,7 +84,7 @@ Data source options of XML can be set via:
     <td>Allows a mode for dealing with corrupt records during parsing.<br>
     <ul>
       <li><code>PERMISSIVE</code>: when it meets a corrupted record, puts the malformed string into a field configured by columnNameOfCorruptRecord, and sets malformed fields to null. To keep corrupt records, an user can set a string type field named columnNameOfCorruptRecord in an user-defined schema. If a schema does not have the field, it drops corrupt records during parsing. When inferring a schema, it implicitly adds a columnNameOfCorruptRecord field in an output schema.</li>
-      <li><code>DROPMALFORMED</code>: ignores the whole corrupted records. This mode is unsupported in the JSON built-in functions.</li>
+      <li><code>DROPMALFORMED</code>: ignores the whole corrupted records. This mode is unsupported in the XML built-in functions.</li>
       <li><code>FAILFAST</code>: throws an exception when it meets corrupted records.</li>
     </ul>
     </td>
diff --git a/docs/sql-distributed-sql-engine.md b/docs/sql-distributed-sql-engine.md
index 734723f8c6235..ae8fd9c7211bd 100644
--- a/docs/sql-distributed-sql-engine.md
+++ b/docs/sql-distributed-sql-engine.md
@@ -83,7 +83,7 @@ Use the following setting to enable HTTP mode as system property or in `hive-sit
 
 To test, use beeline to connect to the JDBC/ODBC server in http mode with:
 
-    beeline> !connect jdbc:hive2://<host>:<port>/<database>?hive.server2.transport.mode=http;hive.server2.thrift.http.path=<http_endpoint>
+    beeline> !connect jdbc:hive2://<host>:<port>/<database>;transportMode=http;httpPath=<http_endpoint>
 
 If you closed a session and do CTAS, you must set `fs.%s.impl.disable.cache` to true in `hive-site.xml`.
 See more details in [[SPARK-21067]](https://issues.apache.org/jira/browse/SPARK-21067).
@@ -94,4 +94,4 @@ To use the Spark SQL command line interface (CLI) from the shell:
 
     ./bin/spark-sql
     
-For details, please refer to [Spark SQL CLI](sql-distributed-sql-engine-spark-sql-cli.html)
\ No newline at end of file
+For details, please refer to [Spark SQL CLI](sql-distributed-sql-engine-spark-sql-cli.html)
diff --git a/docs/sql-error-conditions-collation-mismatch-error-class.md b/docs/sql-error-conditions-collation-mismatch-error-class.md
index b6a63d87b36a0..79aaaf00ee47c 100644
--- a/docs/sql-error-conditions-collation-mismatch-error-class.md
+++ b/docs/sql-error-conditions-collation-mismatch-error-class.md
@@ -36,6 +36,6 @@ Error occurred due to the mismatch between explicit collations: `<explicitTypes>
 
 ## IMPLICIT
 
-Error occurred due to the mismatch between multiple implicit non-default collations. Use COLLATE function to set the collation explicitly.
+Error occurred due to the mismatch between implicit collations: `<implicitTypes>`. Use COLLATE function to set the collation explicitly.
 
 
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 0c1953ea8f468..590c14b9912d4 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -26,7 +26,7 @@ Keep this documentation focused on error states (e.g. `58002`) and conditions (e
 for when they encounter an error.
 
 To update this information, edit `error-conditions.json`. The table below will be automatically
-derived from that file via `docs/util/build-error-docs.py`.
+derived from that file via `docs/_plugins/build-error-docs.py`.
 
 Also note that this is a Jekyll comment and not an HTML comment so that this comment does not show
 up in the generated HTML to end users. :-)
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index 4707e491fa674..ea4dbe926d146 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -40,7 +40,7 @@ license: |
   - `spark.sql.avro.datetimeRebaseModeInWrite` instead of `spark.sql.legacy.avro.datetimeRebaseModeInWrite`
   - `spark.sql.avro.datetimeRebaseModeInRead` instead of `spark.sql.legacy.avro.datetimeRebaseModeInRead`
 - Since Spark 4.0, the default value of `spark.sql.orc.compression.codec` is changed from `snappy` to `zstd`. To restore the previous behavior, set `spark.sql.orc.compression.codec` to `snappy`.
-- Since Spark 4.0, the SQL config `spark.sql.legacy.allowZeroIndexInFormatString` is deprecated. Consider to change `strfmt` of the `format_string` function to use 1-based indexes. The first argument must be referenced by "1$", the second by "2$", etc.
+- Since Spark 4.0, the SQL config `spark.sql.legacy.allowZeroIndexInFormatString` is deprecated. Consider to change `strfmt` of the `format_string` function to use 1-based indexes. The first argument must be referenced by `1$`, the second by `2$`, etc.
 - Since Spark 4.0, Postgres JDBC datasource will read JDBC read TIMESTAMP WITH TIME ZONE as TimestampType regardless of the JDBC read option `preferTimestampNTZ`, while in 3.5 and previous, TimestampNTZType when `preferTimestampNTZ=true`. To restore the previous behavior, set `spark.sql.legacy.postgres.datetimeMapping.enabled` to `true`.
 - Since Spark 4.0, Postgres JDBC datasource will write TimestampType as TIMESTAMP WITH TIME ZONE, while in 3.5 and previous, it wrote as TIMESTAMP a.k.a. TIMESTAMP WITHOUT TIME ZONE. To restore the previous behavior, set `spark.sql.legacy.postgres.datetimeMapping.enabled` to `true`.
 - Since Spark 4.0, MySQL JDBC datasource will read TIMESTAMP as TimestampType regardless of the JDBC read option `preferTimestampNTZ`, while in 3.5 and previous, TimestampNTZType when `preferTimestampNTZ=true`. To restore the previous behavior, set `spark.sql.legacy.mysql.timestampNTZMapping.enabled` to `true`, MySQL DATETIME is not affected.
@@ -58,8 +58,9 @@ license: |
 - Since Spark 4.0, The default value for `spark.sql.legacy.timeParserPolicy` has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an `INCONSISTENT_BEHAVIOR_CROSS_VERSION` error, `CANNOT_PARSE_TIMESTAMP` will be raised if ANSI mode is enable. `NULL` will be returned if ANSI mode is disabled. See [Datetime Patterns for Formatting and Parsing](sql-ref-datetime-pattern.html).
 - Since Spark 4.0, A bug falsely allowing `!` instead of `NOT` when `!` is not a prefix operator has been fixed. Clauses such as `expr ! IN (...)`, `expr ! BETWEEN ...`, or `col ! NULL` now raise syntax errors. To restore the previous behavior, set `spark.sql.legacy.bangEqualsNot` to `true`. 
 - Since Spark 4.0, By default views tolerate column type changes in the query and compensate with casts. To restore the previous behavior, allowing up-casts only, set `spark.sql.legacy.viewSchemaCompensation` to `false`.
-- Since Spark 4.0, Views allow control over how they react to underlying query changes. By default views tolerate column type changes in the query and compensate with casts. To disable thsi feature set `spark.sql.legacy.viewSchemaBindingMode` to `false`. This also removes the clause from `DESCRIBE EXTENDED` and `SHOW CREATE TABLE`.
+- Since Spark 4.0, Views allow control over how they react to underlying query changes. By default views tolerate column type changes in the query and compensate with casts. To disable this feature set `spark.sql.legacy.viewSchemaBindingMode` to `false`. This also removes the clause from `DESCRIBE EXTENDED` and `SHOW CREATE TABLE`.
 - Since Spark 4.0, The Storage-Partitioned Join feature flag `spark.sql.sources.v2.bucketing.pushPartValues.enabled` is set to `true`. To restore the previous behavior, set `spark.sql.sources.v2.bucketing.pushPartValues.enabled` to `false`.
+- Since Spark 4.0, the `sentences` function uses `Locale(language)` instead of `Locale.US` when `language` parameter is not `NULL` and `country` parameter is `NULL`.
 
 ## Upgrading from Spark SQL 3.5.1 to 3.5.2
 
@@ -129,7 +130,7 @@ license: |
       * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
       * `T[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
 
-  - Since Spark 3.3, the `strfmt` in `format_string(strfmt, obj, ...)` and `printf(strfmt, obj, ...)` will no longer support to use "0$" to specify the first argument, the first argument should always reference by "1$" when use argument index to indicating the position of the argument in the argument list.
+  - Since Spark 3.3, the `strfmt` in `format_string(strfmt, obj, ...)` and `printf(strfmt, obj, ...)` will no longer support to use `0$` to specify the first argument, the first argument should always reference by `1$` when use argument index to indicating the position of the argument in the argument list.
 
   - Since Spark 3.3, nulls are written as empty strings in CSV data source by default. In Spark 3.2 or earlier, nulls were written as empty strings as quoted empty strings, `""`. To restore the previous behavior, set `nullValue` to `""`, or set the configuration `spark.sql.legacy.nullValueWrittenAsQuotedEmptyStringCsv` to `true`.
 
@@ -157,6 +158,8 @@ license: |
 
   - Since Spark 3.2, all the supported JDBC dialects use StringType for ROWID. In Spark 3.1 or earlier, Oracle dialect uses StringType and the other dialects use LongType.
 
+  - Since Spark 3.2, Parquet files with nanosecond precision for timestamp type (`INT64 (TIMESTAMP(NANOS, true))`) are not readable. To restore the behavior before Spark 3.2, you can set `spark.sql.legacy.parquet.nanosAsLong` to `true`.
+
   - In Spark 3.2, PostgreSQL JDBC dialect uses StringType for MONEY and MONEY[] is not supported due to the JDBC driver for PostgreSQL can't handle those types properly. In Spark 3.1 or earlier, DoubleType and ArrayType of DoubleType are used respectively.
 
   - In Spark 3.2, `spark.sql.adaptive.enabled` is enabled by default. To restore the behavior before Spark 3.2, you can set `spark.sql.adaptive.enabled` to `false`.
@@ -634,142 +637,111 @@ license: |
   - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown.
 
   - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below:
+
     <table>
-    <thead>
-      <tr>
-        <th>
-          <b>InputA \ InputB</b>
-        </th>
-        <th>
-          <b>NullType</b>
-        </th>
-        <th>
-          <b>IntegerType</b>
-        </th>
-        <th>
-          <b>LongType</b>
-        </th>
-        <th>
-          <b>DecimalType(38,0)*</b>
-        </th>
-        <th>
-          <b>DoubleType</b>
-        </th>
-        <th>
-          <b>DateType</b>
-        </th>
-        <th>
-          <b>TimestampType</b>
-        </th>
-        <th>
-          <b>StringType</b>
-        </th>
-      </tr>
-   </thead>
-      <tr>
-        <td>
-          <b>NullType</b>
-        </td>
-        <td>NullType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>IntegerType</b>
-        </td>
-        <td>IntegerType</td>
-        <td>IntegerType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>LongType</b>
-        </td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>LongType</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DecimalType(38,0)*</b>
-        </td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>DecimalType(38,0)</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DoubleType</b>
-        </td>
-        <td>DoubleType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DoubleType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>DateType</b>
-        </td>
-        <td>DateType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>DateType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>TimestampType</b>
-        </td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>TimestampType</td>
-        <td>TimestampType</td>
-        <td>StringType</td>
-      </tr>
-      <tr>
-        <td>
-          <b>StringType</b>
-        </td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-        <td>StringType</td>
-      </tr>
+      <thead>
+        <tr>
+          <th><b>InputA \ InputB</b></th>
+          <th><b>NullType</b></th>
+          <th><b>IntegerType</b></th>
+          <th><b>LongType</b></th>
+          <th><b>DecimalType(38,0)*</b></th>
+          <th><b>DoubleType</b></th>
+          <th><b>DateType</b></th>
+          <th><b>TimestampType</b></th>
+          <th><b>StringType</b></th>
+        </tr>
+      </thead>
+      <tbody>
+        <tr>
+          <td><b>NullType</b></td>
+          <td>NullType</td>
+          <td>IntegerType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>DoubleType</td>
+          <td>DateType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>IntegerType</b></td>
+          <td>IntegerType</td>
+          <td>IntegerType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>LongType</b></td>
+          <td>LongType</td>
+          <td>LongType</td>
+          <td>LongType</td>
+          <td>DecimalType(38,0)</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+         </tr>
+        <tr>
+          <td><b>DecimalType(38,0)*</b></td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>DecimalType(38,0)</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>DoubleType</b></td>
+          <td>DoubleType</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>DoubleType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>DateType</b></td>
+          <td>DateType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>DateType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>TimestampType</b></td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>TimestampType</td>
+          <td>TimestampType</td>
+          <td>StringType</td>
+        </tr>
+        <tr>
+          <td><b>StringType</b></td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+          <td>StringType</td>
+        </tr> 
+      </tbody>
     </table>
 
     Note that, for <b>DecimalType(38,0)*</b>, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type.
diff --git a/docs/sql-pipe-syntax.md b/docs/sql-pipe-syntax.md
new file mode 100644
index 0000000000000..2c7db9f456475
--- /dev/null
+++ b/docs/sql-pipe-syntax.md
@@ -0,0 +1,543 @@
+---
+layout: global
+title: SQL Pipe Syntax
+displayTitle: SQL Pipe Syntax
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+### Syntax
+
+#### Overview
+
+Apache Spark supports SQL pipe syntax which allows composing queries from combinations of operators.
+
+* Any query can have zero or more pipe operators as a suffix, delineated by the pipe character `|>`. 
+* Each pipe operator starts with one or more SQL keywords followed by its own grammar as described
+  in the table below.
+* Most of these operators reuse existing grammar for standard SQL clauses.
+* Operators can apply in any order, any number of times.
+
+`FROM <tableName>` is now a supported standalone query which behaves the same as
+`TABLE <tableName>`. This provides a convenient starting place to begin a chained pipe SQL query,
+although it is possible to add one or more pipe operators to the end of any valid Spark SQL query
+with the same consistent behavior as written here.
+
+Please refer to the table at the end of this document for a full list of all supported operators
+and their semantics.
+
+#### Example
+
+For example, this is query 13 from the TPC-H benchmark:
+
+```sql
+SELECT c_count, COUNT(*) AS custdist
+FROM
+  (SELECT c_custkey, COUNT(o_orderkey) c_count
+   FROM customer
+   LEFT OUTER JOIN orders ON c_custkey = o_custkey
+     AND o_comment NOT LIKE '%unusual%packages%' GROUP BY c_custkey
+  ) AS c_orders
+GROUP BY c_count
+ORDER BY custdist DESC, c_count DESC;
+```
+
+To write the same logic using SQL pipe operators, we express it like this:
+
+```sql
+FROM customer
+|> LEFT OUTER JOIN orders ON c_custkey = o_custkey
+   AND o_comment NOT LIKE '%unusual%packages%'
+|> AGGREGATE COUNT(o_orderkey) c_count
+   GROUP BY c_custkey
+|> AGGREGATE COUNT(*) AS custdist
+   GROUP BY c_count
+|> ORDER BY custdist DESC, c_count DESC;
+```
+
+#### Source Tables
+
+To start a new query using SQL pipe syntax, use the `FROM <tableName>` or `TABLE <tableName>`
+clause, which creates a relation comprising all rows from the source table. Then append one or more
+pipe operators to the end of this clause to perform further transformations.
+
+#### Projections
+
+SQL pipe syntax supports composable ways to evaluate expressions. A major advantage of these
+projection features is that they support computing new expressions based on previous ones in an
+incremental way. No lateral column references are needed here since each operator applies
+independently on its input table, regardless of the order in which the operators appear. Each of
+these computed columns then becomes visible to use with the following operator.
+
+`SELECT` produces a new table by evaluating the provided expressions.<br>
+It is possible to use `DISTINCT` and `*` as needed.<br>
+This works like the outermost `SELECT` in a table subquery in regular Spark SQL.
+
+`EXTEND` adds new columns to the input table by evaluating the provided expressions.<br>
+This also preserves table aliases.<br>
+This works like `SELECT *, new_column` in regular Spark SQL.
+
+`DROP` removes columns from the input table.<br>
+This is similar to `SELECT * EXCEPT (column)` in regular Spark SQL.
+
+`SET` replaces column values from the input table.<br>
+This is similar to `SELECT * REPLACE (expression AS column)` in regular Spark SQL.
+
+`AS` forwards the input table and introduces a new alias for each row.
+
+#### Aggregations
+
+In general, aggregation takes place differently using SQL pipe syntax as opposed to regular Spark
+SQL.
+
+To perform full-table aggregation, use the `AGGREGATE` operator with a list of aggregate
+expressions to evaluate. This returns one single row in the output table.
+
+To perform aggregation with grouping, use the `AGGREGATE` operator with a `GROUP BY` clause.
+This returns one row for each unique combination of values of the grouping expressions. The output
+table contains the evaluated grouping expressions followed by the evaluated aggregate functions.
+Grouping expressions support assigning aliases for purposes of referring to them in future
+operators. In this way, it is not necessary to repeat entire expressions between `GROUP BY` and
+`SELECT`, since `AGGREGATE` is a single operator that performs both.
+
+#### Other Transformations
+
+The remaining operators are used for other transformations, such as filtering, joining, sorting,
+sampling, and set operations. These operators generally work in the same way as in regular Spark
+SQL, as described in the table below.
+
+### Independence and Interoperability
+
+SQL pipe syntax works in Spark without any backwards-compatibility concerns with existing SQL
+queries; it is possible to write any query using regular Spark SQL, pipe syntax, or a combination of
+the two. As a consequence, the following invariants always hold:
+
+* Each pipe operator receives an input table and operates the same way on its rows regardless of how
+  it was computed.
+* For any valid chain of N SQL pipe operators, any subset of the first M <= N operators also
+represents a valid query.<br>
+  This property can be useful for introspection and debugging, such as by selected a subset of
+  lines and using the "run highlighted text" feature of SQL editors like Jupyter notebooks.
+* It is possible to append pipe operators to any valid query written in regular Spark SQL.<br>
+  The canonical way of starting pipe syntax queries is with the `FROM <tableName>` clause.<br>
+  Note that this is a valid standalone query and may be replaced with any other Spark SQL query
+  without loss of generality.
+* Table subqueries can be written using either regular Spark SQL syntax or pipe syntax.<br>
+  They may appear inside enclosing queries written in either syntax.
+* Other Spark SQL statements such as views and DDL and DML commands may include queries written
+  using either syntax.
+
+### Supported Operators
+
+| Operator                                          | Output rows                                                                                                         |
+|---------------------------------------------------|---------------------------------------------------------------------------------------------------------------------|
+| [FROM](#from-or-table) or [TABLE](#from-or-table) | Returns all the output rows from the source table unmodified.                                                       |
+| [SELECT](#select)                                 | Evaluates the provided expressions over each of the rows of the input table.                                        |
+| [EXTEND](#extend)                                 | Appends new columns to the input table by evaluating the specified expressions over each of the input rows.         |
+| [SET](#set)                                       | Updates columns of the input table by replacing them with the result of evaluating the provided expressions.        |
+| [DROP](#drop)                                     | Drops columns of the input table by name.                                                                           |
+| [AS](#as)                                         | Retains the same rows and column names of the input table but with a new table alias.                               |
+| [WHERE](#where)                                   | Returns the subset of input rows passing the condition.                                                             |
+| [LIMIT](#limit)                                   | Returns the specified number of input rows, preserving ordering (if any).                                           |
+| [AGGREGATE](#aggregate)                           | Performs aggregation with or without grouping.                                                                      |
+| [JOIN](#join)                                     | Joins rows from both inputs, returning a filtered cross-product of the input table and the table argument.          |
+| [ORDER BY](#order-by)                             | Returns the input rows after sorting as indicated.                                                                  |
+| [UNION ALL](#union-intersect-except)              | Performs the union or other set operation over the combined rows from the input table plus other table argument(s). |
+| [TABLESAMPLE](#tablesample)                       | Returns the subset of rows chosen by the provided sampling algorithm.                                               |
+| [PIVOT](#pivot)                                   | Returns a new table with the input rows pivoted to become columns.                                                  |
+| [UNPIVOT](#unpivot)                               | Returns a new table with the input columns pivoted to become rows.                                                  |
+
+This table lists each of the supported pipe operators and describes the output rows they produce.
+Note that each operator accepts an input relation comprising the rows generated by the query
+preceding the `|>` symbol.
+
+#### FROM or TABLE
+
+```sql
+FROM <tableName>
+```
+
+```sql
+TABLE <tableName>
+```
+
+Returns all the output rows from the source table unmodified.
+
+For example:
+
+```sql
+CREATE TABLE t(a INT, b INT) AS VALUES (1, 2), (3, 4);
+TABLE t;
+
++---+---+
+|  a|  b|
++---+---+
+|  1|  2|
+|  3|  4|
++---+---+
+```
+
+#### SELECT
+
+```sql
+|> SELECT <expr> [[AS] alias], ...
+```
+
+Evaluates the provided expressions over each of the rows of the input table.                                                                                                                                                    
+
+It is possible to use `DISTINCT` and `*` as needed.<br>
+This works like the outermost `SELECT` in a table subquery in regular Spark SQL.
+
+Window functions are supported in the `SELECT` list as well. To use them, the `OVER` clause must be
+provided. You may provide the window specification in the `WINDOW` clause.
+
+For example:
+
+```sql
+CREATE TABLE t(col INT) AS VALUES (0), (1);
+
+FROM t
+|> SELECT col * 2 AS result;
+
++------+
+|result|
++------+
+|     0|
+|     2|
++------+
+```
+
+#### EXTEND
+
+```sql
+|> EXTEND <expr> [[AS] alias], ...
+```
+
+Appends new columns to the input table by evaluating the specified expressions over each of the input rows.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(col)
+|> EXTEND col * 2 AS result;
+
++---+------+
+|col|result|
++---+------+
+|  0|     0|
+|  1|     2|
++---+------+
+```
+
+#### SET
+
+```sql
+|> SET <column> = <expression>, ...
+```
+
+Updates columns of the input table by replacing them with the result of evaluating the provided expressions.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(col)
+|> SET col = col * 2;
+
++---+
+|col|
++---+
+|  0|
+|  2|
++---+
+```
+
+#### DROP
+
+```sql
+|> DROP <column>, ...
+```
+
+Drops columns of the input table by name.
+
+For example:
+
+```sql
+VALUES (0, 1) tab(col1, col2)
+|> DROP col1;
+
++----+
+|col2|
++----+
+|   1|
++----+
+```
+
+#### AS
+
+```sql
+|> AS <alias>
+```
+
+Retains the same rows and column names of the input table but with a new table alias.
+
+For example:
+
+```sql
+VALUES (0, 1) tab(col1, col2)
+|> AS new_tab;
+|> SELECT * FROM new_tab;
+
++----+----+
+|col1|col2|
++----+----+
+|   0|   1|
++----+----+
+```
+
+#### WHERE
+
+```sql
+|> WHERE <condition>
+```
+
+Returns the subset of input rows passing the condition.
+
+Since this operator may appear anywhere, no separate `HAVING` or `QUALIFY` syntax is needed.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(col)
+|> WHERE col = 1;
+
++---+
+|col|
++---+
+|  1|
++---+
+```
+
+#### LIMIT
+
+```sql
+|> [LIMIT <n>] [OFFSET <m>]
+```
+
+Returns the specified number of input rows, preserving ordering (if any).
+
+`LIMIT` and `OFFSET` are supported together. The `LIMIT` clause can also be used without the
+`OFFSET` clause, and the `OFFSET` clause can be used without the `LIMIT` clause.
+
+For example:
+
+```sql
+VALUES (0), (0) tab(col)
+|> LIMIT 1;
+
++---+
+|col|
++---+
+|  0|
++---+
+```
+
+#### AGGREGATE
+
+```sql
+|> AGGREGATE <agg_expr> [[AS] alias], ...
+```
+
+Performs full-table aggregation, returning one result row with a column for each aggregate expression.
+
+```sql
+|> AGGREGATE [<agg_expr> [[AS] alias], ...] GROUP BY <grouping_expr> [AS alias], ...
+```
+
+Performs aggregation with grouping, returning one row per group. The column list includes the
+grouping columns first and then the aggregate columns afterward. Aliases can be assigned directly
+on grouping expressions.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(col)
+|> AGGREGATE COUNT(col) AS count;
+
++-----+
+|count|
++-----+
+|    2|
++-----+
+
+VALUES (0, 1), (0, 2) tab(col1, col2)
+|> AGGREGATE COUNT(col2) AS count GROUP BY col1;
+
++----+-----+
+|col1|count|
++----+-----+
+|   0|    2|
++----+-----+
+```
+
+#### JOIN
+
+```sql
+|> [LEFT | RIGHT | FULL | CROSS | SEMI | ANTI | NATURAL | LATERAL] JOIN <table> [ON <condition> | USING(col, ...)]
+```
+
+Joins rows from both inputs, returning a filtered cross-product of the pipe input table and the table expression following the JOIN keyword.
+
+For example:
+
+```sql
+VALUES (0, 1) tab(a, b)
+|> JOIN VALUES (0, 2) tab(c, d) ON a = c;
+
++---+---+---+---+
+|  a|  b|  c|  d|
++---+---+---+---+
+|  0|  1|  0|  2|
++---+---+---+---+
+```
+
+#### ORDER BY
+
+```sql
+|> ORDER BY <expr> [ASC | DESC], ...
+```
+
+Returns the input rows after sorting as indicated. Standard modifiers are supported including NULLS FIRST/LAST.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(col)
+|> ORDER BY col DESC;
+
++---+
+|col|
++---+
+|  1|
+|  0|
++---+
+```
+
+#### UNION, INTERSECT, EXCEPT
+
+```sql
+|> {UNION | INTERSECT | EXCEPT} {ALL | DISTINCT} (<query>), (<query>), ...
+```
+
+Performs the union or other set operation over the combined rows from the input table plus one or more tables provided as input arguments.
+
+For example:
+
+```sql
+VALUES (0), (1) tab(a, b)
+|> UNION ALL VALUES (2), (3) tab(c, d);
+
++---+----+
+|  a|   b|
++---+----+
+|  0|   1|
+|  2|   3|
++---+----+
+```
+
+#### TABLESAMPLE
+
+```sql
+|> TABLESAMPLE <method>(<size> {ROWS | PERCENT})
+```
+
+Returns the subset of rows chosen by the provided sampling algorithm.
+
+For example:
+
+```sql
+VALUES (0), (0), (0), (0) tab(col)
+|> TABLESAMPLE BERNOULLI(1 ROWS);
+
++---+
+|col|
++---+
+|  0|
++---+
+```
+
+#### PIVOT
+
+```sql
+|> PIVOT (agg_expr FOR col IN (val1, ...))
+```
+
+Returns a new table with the input rows pivoted to become columns.
+
+For example:
+
+```sql
+VALUES
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  courseSales(course, year, earnings)
+|> PIVOT (
+     SUM(earnings)
+     FOR COURSE IN ('dotNET', 'Java')
+  )
+
++----+------+------+
+|year|dotNET|  Java|
++----+------+------+
+|2012| 15000| 20000|
+|2013| 48000| 30000|
++----+------+------+
+```
+
+#### UNPIVOT
+
+```sql
+|> UNPIVOT (value_col FOR key_col IN (col1, ...))
+```
+
+Returns a new table with the input columns pivoted to become rows.
+
+For example:
+
+```sql
+VALUES
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  courseSales(course, year, earnings)
+|> UNPIVOT (
+  earningsYear FOR `year` IN (`2012`, `2013`, `2014`)
+
++--------+------+--------+
+|  course|  year|earnings|
++--------+------+--------+
+|    Java|  2012|   20000|
+|    Java|  2013|   30000|
+|  dotNET|  2012|   15000|
+|  dotNET|  2013|   48000|
+|  dotNET|  2014|   22500|
++--------+------+--------+
+```
+
diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index dd1fe45324e90..6cae86a47dc49 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -141,13 +141,13 @@ In the table above, all the `CAST`s with new syntax are marked as red <span styl
 
 -- `spark.sql.ansi.enabled=true` (This is a default behaviour)
 SELECT CAST('a' AS INT);
-org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+org.apache.spark.SparkNumberFormatException: [CAST_INVALID_INPUT] The value 'a' of the type "STRING" cannot be cast to "INT" because it is malformed. Correct the value as per the syntax, or change its target type. Use `try_cast` to tolerate malformed input and return NULL instead.
 == SQL(line 1, position 8) ==
 SELECT CAST('a' AS INT)
        ^^^^^^^^^^^^^^^^
 
 SELECT CAST(2147483648L AS INT);
-org.apache.spark.SparkArithmeticException: [CAST_OVERFLOW] The value 2147483648L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error.
+org.apache.spark.SparkArithmeticException: [CAST_OVERFLOW] The value 2147483648L of the type "BIGINT" cannot be cast to "INT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead.
 
 SELECT CAST(DATE'2020-01-01' AS INT);
 org.apache.spark.sql.AnalysisException: cannot resolve 'CAST(DATE '2020-01-01' AS INT)' due to data type mismatch: cannot cast date to int.
@@ -379,6 +379,11 @@ When ANSI mode is on, it throws exceptions for invalid operations. You can use t
   - `try_avg`: identical to the function `avg`, except that it returns `NULL` result instead of throwing an exception on decimal/interval value overflow.
   - `try_element_at`: identical to the function `element_at`, except that it returns `NULL` result instead of throwing an exception on array's index out of bound.
   - `try_to_timestamp`: identical to the function `to_timestamp`, except that it returns `NULL` result instead of throwing an exception on string parsing error.
+  - `try_parse_url`: identical to the function `parse_url`, except that it returns `NULL` result instead of throwing an exception on url parsing error.
+  - `try_make_timestamp`: identical to the function `make_timestamp`, except that it returns `NULL` result instead of throwing an exception on error.
+  - `try_make_timestamp_ltz`: identical to the function `make_timestamp_ltz`, except that it returns `NULL` result instead of throwing an exception on error.
+  - `try_make_timestamp_ntz`: identical to the function `make_timestamp_ntz`, except that it returns `NULL` result instead of throwing an exception on error.
+  - `try_make_interval`: identical to the function `make_interval`, except that it returns `NULL` result instead of throwing an exception on invalid interval.
 
 ### SQL Keywords (optional, disabled by default)
 
@@ -400,6 +405,7 @@ Below is a list of all the keywords in Spark SQL.
 |--|----------------------|-------------------------|--------|
 |ADD|non-reserved|non-reserved|non-reserved|
 |AFTER|non-reserved|non-reserved|non-reserved|
+|AGGREGATE|non-reserved|non-reserved|non-reserved|
 |ALL|reserved|non-reserved|reserved|
 |ALTER|non-reserved|non-reserved|reserved|
 |ALWAYS|non-reserved|non-reserved|non-reserved|
@@ -426,6 +432,7 @@ Below is a list of all the keywords in Spark SQL.
 |BY|non-reserved|non-reserved|reserved|
 |BYTE|non-reserved|non-reserved|non-reserved|
 |CACHE|non-reserved|non-reserved|non-reserved|
+|CALL|reserved|non-reserved|reserved|
 |CALLED|non-reserved|non-reserved|non-reserved|
 |CASCADE|non-reserved|non-reserved|non-reserved|
 |CASE|reserved|non-reserved|reserved|
@@ -492,6 +499,7 @@ Below is a list of all the keywords in Spark SQL.
 |DISTINCT|reserved|non-reserved|reserved|
 |DISTRIBUTE|non-reserved|non-reserved|non-reserved|
 |DIV|non-reserved|non-reserved|not a keyword|
+|DO|non-reserved|non-reserved|non-reserved|
 |DOUBLE|non-reserved|non-reserved|reserved|
 |DROP|non-reserved|non-reserved|reserved|
 |ELSE|reserved|non-reserved|reserved|
@@ -506,6 +514,7 @@ Below is a list of all the keywords in Spark SQL.
 |EXISTS|non-reserved|non-reserved|reserved|
 |EXPLAIN|non-reserved|non-reserved|non-reserved|
 |EXPORT|non-reserved|non-reserved|non-reserved|
+|EXTEND|non-reserved|non-reserved|non-reserved|
 |EXTENDED|non-reserved|non-reserved|non-reserved|
 |EXTERNAL|non-reserved|non-reserved|reserved|
 |EXTRACT|non-reserved|non-reserved|reserved|
@@ -534,12 +543,14 @@ Below is a list of all the keywords in Spark SQL.
 |HOUR|non-reserved|non-reserved|non-reserved|
 |HOURS|non-reserved|non-reserved|non-reserved|
 |IDENTIFIER|non-reserved|non-reserved|non-reserved|
+|IDENTITY|non-reserved|non-reserved|non-reserved|
 |IF|non-reserved|non-reserved|not a keyword|
 |IGNORE|non-reserved|non-reserved|non-reserved|
 |IMMEDIATE|non-reserved|non-reserved|non-reserved|
 |IMPORT|non-reserved|non-reserved|non-reserved|
 |IN|reserved|non-reserved|reserved|
 |INCLUDE|non-reserved|non-reserved|non-reserved|
+|INCREMENT|non-reserved|non-reserved|non-reserved|
 |INDEX|non-reserved|non-reserved|non-reserved|
 |INDEXES|non-reserved|non-reserved|non-reserved|
 |INNER|reserved|strict-non-reserved|reserved|
@@ -555,6 +566,7 @@ Below is a list of all the keywords in Spark SQL.
 |INVOKER|non-reserved|non-reserved|non-reserved|
 |IS|reserved|non-reserved|reserved|
 |ITEMS|non-reserved|non-reserved|non-reserved|
+|ITERATE|non-reserved|non-reserved|non-reserved|
 |JOIN|reserved|strict-non-reserved|reserved|
 |KEYS|non-reserved|non-reserved|non-reserved|
 |LANGUAGE|non-reserved|non-reserved|reserved|
@@ -562,6 +574,7 @@ Below is a list of all the keywords in Spark SQL.
 |LATERAL|reserved|strict-non-reserved|reserved|
 |LAZY|non-reserved|non-reserved|non-reserved|
 |LEADING|reserved|non-reserved|reserved|
+|LEAVE|non-reserved|non-reserved|non-reserved|
 |LEFT|reserved|strict-non-reserved|reserved|
 |LIKE|non-reserved|non-reserved|reserved|
 |ILIKE|non-reserved|non-reserved|non-reserved|
@@ -575,6 +588,7 @@ Below is a list of all the keywords in Spark SQL.
 |LOCKS|non-reserved|non-reserved|non-reserved|
 |LOGICAL|non-reserved|non-reserved|non-reserved|
 |LONG|non-reserved|non-reserved|non-reserved|
+|LOOP|non-reserved|non-reserved|non-reserved|
 |MACRO|non-reserved|non-reserved|non-reserved|
 |MAP|non-reserved|non-reserved|non-reserved|
 |MATCHED|non-reserved|non-reserved|non-reserved|
@@ -643,6 +657,7 @@ Below is a list of all the keywords in Spark SQL.
 |REGEXP|non-reserved|non-reserved|not a keyword|
 |RENAME|non-reserved|non-reserved|non-reserved|
 |REPAIR|non-reserved|non-reserved|non-reserved|
+|REPEAT|non-reserved|non-reserved|non-reserved|
 |REPEATABLE|non-reserved|non-reserved|non-reserved|
 |REPLACE|non-reserved|non-reserved|non-reserved|
 |RESET|non-reserved|non-reserved|non-reserved|
@@ -731,6 +746,7 @@ Below is a list of all the keywords in Spark SQL.
 |UNLOCK|non-reserved|non-reserved|non-reserved|
 |UNPIVOT|non-reserved|non-reserved|non-reserved|
 |UNSET|non-reserved|non-reserved|non-reserved|
+|UNTIL|non-reserved|non-reserved|non-reserved|
 |UPDATE|non-reserved|non-reserved|reserved|
 |USE|non-reserved|non-reserved|non-reserved|
 |USER|reserved|non-reserved|reserved|
@@ -749,6 +765,7 @@ Below is a list of all the keywords in Spark SQL.
 |WEEKS|non-reserved|non-reserved|non-reserved|
 |WHEN|reserved|non-reserved|reserved|
 |WHERE|reserved|non-reserved|reserved|
+|WHILE|non-reserved|non-reserved|non-reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
 |WITHIN|reserved|non-reserved|reserved|
diff --git a/docs/sql-ref-functions-builtin.md b/docs/sql-ref-functions-builtin.md
index c5f4e44dec0d9..b6572609a34b8 100644
--- a/docs/sql-ref-functions-builtin.md
+++ b/docs/sql-ref-functions-builtin.md
@@ -116,3 +116,13 @@ license: |
 {% include_api_gen generated-generator-funcs-table.html %}
 #### Examples
 {% include_api_gen generated-generator-funcs-examples.html %}
+
+### Table Functions
+{% include_api_gen generated-table-funcs-table.html %}
+#### Examples
+{% include_api_gen generated-table-funcs-examples.html %}
+
+### Variant Functions
+{% include_api_gen generated-variant-funcs-table.html %}
+#### Examples
+{% include_api_gen generated-variant-funcs-examples.html %}
diff --git a/docs/sql-ref-number-pattern.md b/docs/sql-ref-number-pattern.md
index 3d51a2780cd42..eecc3f9640ae0 100644
--- a/docs/sql-ref-number-pattern.md
+++ b/docs/sql-ref-number-pattern.md
@@ -137,7 +137,7 @@ Note that the format string used in most of these examples expects:
 > SELECT to_number('1234-', '999999MI');
   -1234
  
--- PR indicates optional wrapping angel brakets.
+-- PR indicates optional wrapping angel brackets.
 > SELECT to_number('9', '999PR')
   9
 ```
diff --git a/docs/sql-ref-syntax-aux-exec-imm.md b/docs/sql-ref-syntax-aux-exec-imm.md
index 0d9a0f5b4aa3b..904a676605c81 100644
--- a/docs/sql-ref-syntax-aux-exec-imm.md
+++ b/docs/sql-ref-syntax-aux-exec-imm.md
@@ -61,11 +61,11 @@ EXECUTE IMMEDIATE sql_string
 
 ```sql
 -- A self-contained execution using a literal string
-EXECUTE IMMEDIATE 'SELECT SUM(c1) FROM VALUES(?), (?)' USING 5, 6;
+EXECUTE IMMEDIATE 'SELECT SUM(col1) FROM VALUES(?), (?)' USING 5, 6;
  11
 
 -- A SQL string composed in a SQL variable
-DECLARE sqlStr = 'SELECT SUM(c1) FROM VALUES(?), (?)';
+DECLARE sqlStr = 'SELECT SUM(col1) FROM VALUES(?), (?)';
 DECLARE arg1 = 5;
 DECLARE arg2 = 6;
 EXECUTE IMMEDIATE sqlStr USING arg1, arg2;
@@ -78,9 +78,8 @@ SELECT sum;
  11
 
 -- Using named parameter markers
-SET VAR sqlStr = 'SELECT SUM(c1) FROM VALUES(:first), (:second)';
-EXECUTE IMMEDIATE sqlStr INTO (sum)
-    USING 5 AS first, arg2 AS second;
+SET VAR sqlStr = 'SELECT SUM(col1) FROM VALUES(:first), (:second)';
+EXECUTE IMMEDIATE sqlStr INTO sum USING 5 AS first, arg2 AS second;
 SELECT sum;
  11
 ```
\ No newline at end of file
diff --git a/docs/sql-ref-syntax-aux-set-var.md b/docs/sql-ref-syntax-aux-set-var.md
index 9ce9e68cd4fa3..5a61a970c03fd 100644
--- a/docs/sql-ref-syntax-aux-set-var.md
+++ b/docs/sql-ref-syntax-aux-set-var.md
@@ -85,7 +85,7 @@ SELECT var1, var2;
 
 -- Too many rows
 SET VAR (var1, var2) = (SELECT c1, CAST(c1 AS STRING) FROM VALUES(1), (2) AS t(c1));
-Error: ROW_SUBQUERY_TOO_MANY_ROWS
+[ROW_SUBQUERY_TOO_MANY_ROWS] More than one row returned by a subquery used as a row. SQLSTATE: 21000
 
 -- No rows
 SET VAR (var1, var2) = (SELECT c1, CAST(c1 AS STRING) FROM VALUES(1), (2) AS t(c1) WHERE 1=0);
diff --git a/docs/sql-ref-syntax-aux-show-views.md b/docs/sql-ref-syntax-aux-show-views.md
index 5003c092cabce..a4a827304948d 100644
--- a/docs/sql-ref-syntax-aux-show-views.md
+++ b/docs/sql-ref-syntax-aux-show-views.md
@@ -99,7 +99,7 @@ SHOW VIEWS FROM default LIKE 'sam*';
 | default   | sam1       | false        |
 +-----------+------------+--------------+
 
--- List all views from the current database matching the pattern `sam|suj｜temp*`
+-- List all views from the current database matching the pattern `sam|suj|temp*`
 SHOW VIEWS LIKE 'sam|suj|temp*';
 +-------------+------------+--------------+
 | namespace   | viewName   | isTemporary  |
diff --git a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
index b2f5957416a80..583e890f8305e 100644
--- a/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
+++ b/docs/sql-ref-syntax-ddl-create-table-hiveformat.md
@@ -178,7 +178,7 @@ CREATE EXTERNAL TABLE family (id INT, name STRING)
 CREATE TABLE clustered_by_test1 (ID INT, AGE STRING)
     CLUSTERED BY (ID)
     INTO 4 BUCKETS
-    STORED AS ORC
+    STORED AS ORC;
 
 --Use `CLUSTERED BY` clause to create bucket table with `SORTED BY`
 CREATE TABLE clustered_by_test2 (ID INT, NAME STRING)
@@ -186,7 +186,7 @@ CREATE TABLE clustered_by_test2 (ID INT, NAME STRING)
     CLUSTERED BY (ID, NAME)
     SORTED BY (ID ASC)
     INTO 3 BUCKETS
-    STORED AS PARQUET
+    STORED AS PARQUET;
 ```
 
 ### Related Statements
diff --git a/docs/sql-ref-syntax-ddl-declare-variable.md b/docs/sql-ref-syntax-ddl-declare-variable.md
index f4daeb25579d1..ba9857bf1917a 100644
--- a/docs/sql-ref-syntax-ddl-declare-variable.md
+++ b/docs/sql-ref-syntax-ddl-declare-variable.md
@@ -34,7 +34,7 @@ column default expressions, and generated column expressions.
 ### Syntax
 
 ```sql
-DECLARE [ OR REPLACE ] [ VARIABLE ]
+DECLARE [ OR REPLACE ] [ VAR | VARIABLE ]
     variable_name [ data_type ] [ { DEFAULT | = } default_expr ]
 ```
 
@@ -72,6 +72,17 @@ DECLARE [ OR REPLACE ] [ VARIABLE ]
 -- The dense form of declaring a variable with default
 DECLARE five = 5;
 
+-- Declare a defined variable
+DECLARE five = 55;
+[VARIABLE_ALREADY_EXISTS] Cannot create the variable `system`.`session`.`five` because it already exists.
+Choose a different name, or drop or replace the existing variable. SQLSTATE: 42723
+
+-- Use `DECLARE OR REPLACE` to declare a defined variable
+DECLARE OR REPLACE five = 55;
+
+-- Explicitly declare the default value of a variable using the keyword `DEFAULT`
+DECLARE VARIABLE size DEFAULT 6;
+
 -- STRING variable initialialized to `NULL`
 DECLARE some_var STRING;
 ```
diff --git a/docs/sql-ref-syntax-ddl-drop-variable.md b/docs/sql-ref-syntax-ddl-drop-variable.md
index c6cf66769246d..5b2b0da769459 100644
--- a/docs/sql-ref-syntax-ddl-drop-variable.md
+++ b/docs/sql-ref-syntax-ddl-drop-variable.md
@@ -27,7 +27,7 @@ be thrown if the variable does not exist.
 ### Syntax
 
 ```sql
-DROP TEMPORARY VARIABLE [ IF EXISTS ] variable_name
+DROP TEMPORARY { VAR | VARIABLE } [ IF EXISTS ] variable_name
 ```
 
 ### Parameters
@@ -54,10 +54,11 @@ DROP TEMPORARY VARIABLE var1;
 
 -- Try to drop temporary variable which is not present
 DROP TEMPORARY VARIABLE var1;
-Error: VARIABLE_NOT_FOUND
-The variable `system`.`session`.`var1` cannot be found.
+[VARIABLE_NOT_FOUND] The variable `system`.`session`.`var1` cannot be found. Verify the spelling and correctness of the schema and catalog.
+If you did not qualify the name with a schema and catalog, verify the current_schema() output, or qualify the name with the correct schema and catalog.
+To tolerate the error on drop use DROP VARIABLE IF EXISTS. SQLSTATE: 42883
   
--- Drop temporart variable if it exists
+-- Drop temporary variable if it exists
 DROP TEMPORARY VARIABLE IF EXISTS var1;
 ```
 
diff --git a/docs/sql-ref-syntax-dml-insert-table.md b/docs/sql-ref-syntax-dml-insert-table.md
index 6ca062e081747..6f85d4401d3b1 100644
--- a/docs/sql-ref-syntax-dml-insert-table.md
+++ b/docs/sql-ref-syntax-dml-insert-table.md
@@ -379,7 +379,7 @@ SELECT * FROM persons2;
 +-------------+--------------------------+---------+
 
 -- in an atomic operation, 1) delete rows with ssn = 123456789 and 2) insert rows from persons2 
-INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2
+INSERT INTO persons REPLACE WHERE ssn = 123456789 SELECT * FROM persons2;
 
 SELECT * FROM persons;
 +-------------+--------------------------+---------+
diff --git a/docs/sql-ref.md b/docs/sql-ref.md
index 1be5a108cedde..6eb2bf77c6ac0 100644
--- a/docs/sql-ref.md
+++ b/docs/sql-ref.md
@@ -41,3 +41,4 @@ Spark SQL is Apache Spark's module for working with structured data. This guide
    * [DML Statements](sql-ref-syntax.html#dml-statements)
    * [Data Retrieval Statements](sql-ref-syntax.html#data-retrieval-statements)
    * [Auxiliary Statements](sql-ref-syntax.html#auxiliary-statements)
+   * [Pipe Syntax](sql-pipe-syntax.html)
diff --git a/docs/ss-migration-guide.md b/docs/ss-migration-guide.md
index 3247866206eef..1e4c892f062af 100644
--- a/docs/ss-migration-guide.md
+++ b/docs/ss-migration-guide.md
@@ -2,6 +2,7 @@
 layout: global
 title: "Migration Guide: Structured Streaming"
 displayTitle: "Migration Guide: Structured Streaming"
+redirect: ./streaming/ss-migration-guide.html
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -19,40 +20,4 @@ license: |
   limitations under the License.
 ---
 
-* Table of contents
-{:toc}
-
-Note that this migration guide describes the items specific to Structured Streaming.
-Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
-Please refer [Migration Guide: SQL, Datasets and DataFrame](sql-migration-guide.html).
-
-## Upgrading from Structured Streaming 3.5 to 4.0
-
-- Since Spark 4.0, Spark falls back to single batch execution if any source in the query does not support `Trigger.AvailableNow`. This is to avoid any possible correctness, duplication, and dataloss issue due to incompatibility between source and wrapper implementation. (See [SPARK-45178](https://issues.apache.org/jira/browse/SPARK-45178) for more details.)
-
-## Upgrading from Structured Streaming 3.3 to 3.4
-
-- Since Spark 3.4, `Trigger.Once` is deprecated, and users are encouraged to migrate from `Trigger.Once` to `Trigger.AvailableNow`. Please refer [SPARK-39805](https://issues.apache.org/jira/browse/SPARK-39805) for more details.
-
-- Since Spark 3.4, the default value of configuration for Kafka offset fetching (`spark.sql.streaming.kafka.useDeprecatedOffsetFetching`) is changed from `true` to `false`. The default no longer relies consumer group based scheduling, which affect the required ACL. For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
-
-## Upgrading from Structured Streaming 3.2 to 3.3
-
-- Since Spark 3.3, all stateful operators require hash partitioning with exact grouping keys. In previous versions, all stateful operators except stream-stream join require loose partitioning criteria which opens the possibility on correctness issue. (See [SPARK-38204](https://issues.apache.org/jira/browse/SPARK-38204) for more details.) To ensure backward compatibility, we retain the old behavior with the checkpoint built from older versions.
-
-## Upgrading from Structured Streaming 3.0 to 3.1
-
-- In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false.
-
-- In Spark 3.0 and before Spark uses `KafkaConsumer` for offset fetching which could cause infinite wait in the driver.
-  In Spark 3.1 a new configuration option added `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`)
-  which could be set to `false` allowing Spark to use new offset fetching mechanism using `AdminClient`.
-  For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
-
-## Upgrading from Structured Streaming 2.4 to 3.0
-
-- In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
-
-- Spark 3.0 fixes the correctness issue on Stream-stream outer join, which changes the schema of state. (See [SPARK-26154](https://issues.apache.org/jira/browse/SPARK-26154) for more details). If you start your query from checkpoint constructed from Spark 2.x which uses stream-stream outer join, Spark 3.0 fails the query. To recalculate outputs, discard the checkpoint and replay previous inputs.
-
-- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`.
+This page has moved [here](./streaming/ss-migration-guide.html).
\ No newline at end of file
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 21e8fe6e8333c..3d39331eb15f0 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -29,7 +29,7 @@ Spark Streaming is the previous generation of Spark’s streaming engine. There
 updates to Spark Streaming and it’s a legacy project. There is a newer and easier to use
 streaming engine in Spark called Structured Streaming. You should use Spark Structured Streaming
 for your streaming applications and pipelines. See
-[Structured Streaming Programming Guide](structured-streaming-programming-guide.html).
+[Structured Streaming Programming Guide](./streaming/index.html).
 
 # Overview
 Spark Streaming is an extension of the core Spark API that enables scalable, high-throughput,
@@ -2283,7 +2283,7 @@ The overheads of data serialization can be reduced by tuning the serialization f
 
 * **Input data**: By default, the input data received through Receivers is stored in the executors' memory with [StorageLevel.MEMORY_AND_DISK_SER_2](api/scala/org/apache/spark/storage/StorageLevel$.html). That is, the data is serialized into bytes to reduce GC overheads, and replicated for tolerating executor failures. Also, the data is kept first in memory, and spilled over to disk only if the memory is insufficient to hold all of the input data necessary for the streaming computation. This serialization obviously has overheads -- the receiver must deserialize the received data and re-serialize it using Spark's serialization format.
 
-* **Persisted RDDs generated by Streaming Operations**: RDDs generated by streaming computations may be persisted in memory. For example, window operations persist data in memory as they would be processed multiple times. However, unlike the Spark Core default of [StorageLevel.MEMORY_ONLY](api/scala/org/apache/spark/storage/StorageLevel$.html), persisted RDDs generated by streaming computations are persisted with [StorageLevel.MEMORY_ONLY_SER](api/scala/org/apache/spark/storage/StorageLevel.html$) (i.e. serialized) by default to minimize GC overheads.
+* **Persisted RDDs generated by Streaming Operations**: RDDs generated by streaming computations may be persisted in memory. For example, window operations persist data in memory as they would be processed multiple times. However, unlike the Spark Core default of [StorageLevel.MEMORY_ONLY](api/scala/org/apache/spark/storage/StorageLevel$.html), persisted RDDs generated by streaming computations are persisted with [StorageLevel.MEMORY_ONLY_SER](api/scala/org/apache/spark/storage/StorageLevel$.html) (i.e. serialized) by default to minimize GC overheads.
 
 In both cases, using Kryo serialization can reduce both CPU and memory overheads. See the [Spark Tuning Guide](tuning.html#data-serialization) for more details. For Kryo, consider registering custom classes, and disabling object reference tracking (see Kryo-related configurations in the [Configuration Guide](configuration.html#compression-and-serialization)).
 
@@ -2551,7 +2551,7 @@ additional effort may be necessary to achieve exactly-once semantics. There are
     * [StreamingContext](api/scala/org/apache/spark/streaming/StreamingContext.html) and
   [DStream](api/scala/org/apache/spark/streaming/dstream/DStream.html)
     * [KafkaUtils](api/scala/org/apache/spark/streaming/kafka/KafkaUtils$.html),
-    [KinesisUtils](api/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream.html)
+    [KinesisUtils](api/scala/org/apache/spark/streaming/kinesis/KinesisInputDStream$.html)
   - Java docs
     * [JavaStreamingContext](api/java/index.html?org/apache/spark/streaming/api/java/JavaStreamingContext.html),
     [JavaDStream](api/java/index.html?org/apache/spark/streaming/api/java/JavaDStream.html) and
diff --git a/docs/streaming/additional-information.md b/docs/streaming/additional-information.md
new file mode 100644
index 0000000000000..a2574cc547df4
--- /dev/null
+++ b/docs/streaming/additional-information.md
@@ -0,0 +1,58 @@
+---
+layout: global
+displayTitle: Structured Streaming Programming Guide
+title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+# Miscellaneous Notes
+
+- Several configurations are not modifiable after the query has run. To change them, discard the checkpoint and start a new query. These configurations include:
+  - `spark.sql.shuffle.partitions`
+    - This is due to the physical partitioning of state: state is partitioned via applying hash function to key, hence the number of partitions for state should be unchanged.
+    - If you want to run fewer tasks for stateful operations, `coalesce` would help with avoiding unnecessary repartitioning.
+      - After `coalesce`, the number of (reduced) tasks will be kept unless another shuffle happens.
+  - `spark.sql.streaming.stateStore.providerClass`: To read the previous state of the query properly, the class of state store provider should be unchanged.
+  - `spark.sql.streaming.multipleWatermarkPolicy`: Modification of this would lead inconsistent watermark value when query contains multiple watermarks, hence the policy should be unchanged.
+
+# Related Resources 
+
+## Further Reading
+
+- See and run the
+  [Python]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming)/[Scala]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming)
+  examples.
+    - [Instructions](../index.html#running-the-examples-and-shell) on how to run Spark examples
+- Read about integrating with Kafka in the [Structured Streaming Kafka Integration Guide](structured-streaming-kafka-integration.html)
+- Read more details about using DataFrames/Datasets in the [Spark SQL Programming Guide](../sql-programming-guide.html)
+- Third-party Blog Posts
+    - [Real-time Streaming ETL with Structured Streaming in Apache Spark 2.1 (Databricks Blog)](https://databricks.com/blog/2017/01/19/real-time-streaming-etl-structured-streaming-apache-spark-2-1.html)
+    - [Real-Time End-to-End Integration with Apache Kafka in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/04/04/real-time-end-to-end-integration-with-apache-kafka-in-apache-sparks-structured-streaming.html)
+    - [Event-time Aggregation and Watermarking in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/05/08/event-time-aggregation-watermarking-apache-sparks-structured-streaming.html)
+
+## Talks
+
+- Spark Summit Europe 2017
+  - Easy, Scalable, Fault-tolerant Stream Processing with Structured Streaming in Apache Spark -
+    [Part 1 slides/video](https://databricks.com/session/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark), [Part 2 slides/video](https://databricks.com/session/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark-continues)
+  - Deep Dive into Stateful Stream Processing in Structured Streaming - [slides/video](https://databricks.com/session/deep-dive-into-stateful-stream-processing-in-structured-streaming)
+- Spark Summit 2016
+  - A Deep Dive into Structured Streaming - [slides/video](https://spark-summit.org/2016/events/a-deep-dive-into-structured-streaming/)
+
+# Migration Guide
+
+The migration guide is now archived [on this page](ss-migration-guide.html).
\ No newline at end of file
diff --git a/docs/streaming/apis-on-dataframes-and-datasets.md b/docs/streaming/apis-on-dataframes-and-datasets.md
new file mode 100644
index 0000000000000..c67ab9f6339bc
--- /dev/null
+++ b/docs/streaming/apis-on-dataframes-and-datasets.md
@@ -0,0 +1,3592 @@
+---
+layout: global
+displayTitle: Structured Streaming Programming Guide
+title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+# API using Datasets and DataFrames
+Since Spark 2.0, DataFrames and Datasets can represent static, bounded data, as well as streaming, unbounded data. Similar to static Datasets/DataFrames, you can use the common entry point `SparkSession`
+([Python](/api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession)/[Scala](/api/scala/org/apache/spark/sql/SparkSession.html)/[Java](/api/java/org/apache/spark/sql/SparkSession.html)/[R](/api/R/reference/sparkR.session.html) docs)
+to create streaming DataFrames/Datasets from streaming sources, and apply the same operations on them as static DataFrames/Datasets. If you are not familiar with Datasets/DataFrames, you are strongly advised to familiarize yourself with them using the
+[DataFrame/Dataset Programming Guide](../sql-programming-guide.html).
+
+
+## Creating streaming DataFrames and streaming Datasets
+Streaming DataFrames can be created through the `DataStreamReader` interface
+([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader)/[Scala](/api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](/api/java/org/apache/spark/sql/streaming/DataStreamReader.html) docs)
+returned by `SparkSession.readStream()`. In [R](/api/R/reference/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
+
+#### Input Sources
+There are a few built-in sources.
+
+  - **File source** - Reads files written in a directory as a stream of data. Files will be processed in the order of file modification time. If `latestFirst` is set, order will be reversed. Supported file formats are text, CSV, JSON, ORC, Parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
+  - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
+
+  - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees.
+
+  - **Rate source (for testing)** - Generates data at the specified number of rows per second, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. This source is intended for testing and benchmarking.
+
+  - **Rate Per Micro-Batch source (for testing)** - Generates data at the specified number of rows per micro-batch, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. Unlike `rate` data source, this data source provides a consistent set of input rows per micro-batch regardless of query execution (configuration of trigger, query being lagging, etc.), say, batch 0 will produce 0~999 and batch 1 will produce 1000~1999, and so on. Same applies to the generated time. This source is intended for testing and benchmarking.
+
+Some sources are not fault-tolerant because they do not guarantee that data can be replayed using
+checkpointed offsets after a failure. See the earlier section on
+[fault-tolerance semantics](./getting-started.html#fault-tolerance-semantics)
+Here are the details of all the sources in Spark.
+
+<table>
+  <thead>
+  <tr>
+    <th>Source</th>
+    <th>Options</th>
+    <th>Fault-tolerant</th>
+    <th>Notes</th>
+  </tr>
+  </thead>
+  <tr>
+    <td><b>File source</b></td>
+    <td>
+        <code>path</code>: path to the input directory, and common to all file formats.
+        <br/>
+        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
+        <br/>
+        <code>maxBytesPerTrigger</code>: maximum total size of new files to be considered in every trigger (default: no max). <code>maxBytesPerTrigger</code> and <code>maxFilesPerTrigger</code> can't both be set at the same time, only one of two must be chosen. Note that a stream always reads at least one file so it can make progress and not get stuck on a file larger than a given maximum.
+        <br/>
+        <code>latestFirst</code>: whether to process the latest new files first, useful when there is a large backlog of files (default: false)
+        <br/>
+        <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
+        <br/>
+        "file:///dataset.txt"<br/>
+        "s3://a/dataset.txt"<br/>
+        "s3n://a/b/dataset.txt"<br/>
+        "s3a://a/b/c/dataset.txt"
+        <br/>
+        <code>maxFileAge</code>: Maximum age of a file that can be found in this directory, before it is ignored. For the first batch all files will be considered valid. If <code>latestFirst</code> is set to `true` and <code>maxFilesPerTrigger</code> or <code>maxBytesPerTrigger</code> is set, then this parameter will be ignored, because old files that are valid, and should be processed, may be ignored. The max age is specified with respect to the timestamp of the latest file, and not the timestamp of the current system.(default: 1 week)
+        <br/>
+        <code>maxCachedFiles</code>: maximum number of files to cache to be processed in subsequent batches (default: 10000).  If files are available in the cache, they will be read from first before listing from the input source.
+        <br/>
+        <code>discardCachedInputRatio</code>: ratio of cached files/bytes to max files/bytes to allow for listing from input source when there is less cached input than could be available to be read (default: 0.2).  For example, if there are only 10 cached files remaining for a batch but the <code>maxFilesPerTrigger</code> is set to 100, the 10 cached files would be discarded and a new listing would be performed instead. Similarly, if there are cached files that are 10 MB remaining for a batch, but the <code>maxBytesPerTrigger</code> is set to 100MB, the cached files would be discarded.
+        <br/>
+        <code>cleanSource</code>: option to clean up completed files after processing.<br/>
+        Available options are "archive", "delete", "off". If the option is not provided, the default value is "off".<br/>
+        When "archive" is provided, additional option <code>sourceArchiveDir</code> must be provided as well. The value of "sourceArchiveDir" must not match with source pattern in depth (the number of directories from the root directory), where the depth is minimum of depth on both paths. This will ensure archived files are never included as new source files.<br/>
+        For example, suppose you provide '/hello?/spark/*' as source pattern, '/hello1/spark/archive/dir' cannot be used as the value of "sourceArchiveDir", as '/hello?/spark/*' and '/hello1/spark/archive' will be matched. '/hello1/spark' cannot be also used as the value of "sourceArchiveDir", as '/hello?/spark' and '/hello1/spark' will be matched. '/archived/here' would be OK as it doesn't match.<br/>
+        Spark will move source files respecting their own path. For example, if the path of source file is <code>/a/b/dataset.txt</code> and the path of archive directory is <code>/archived/here</code>, file will be moved to <code>/archived/here/a/b/dataset.txt</code>.<br/>
+        NOTE: Both archiving (via moving) or deleting completed files will introduce overhead (slow down, even if it's happening in separate thread) in each micro-batch, so you need to understand the cost for each operation in your file system before enabling this option. On the other hand, enabling this option will reduce the cost to list source files which can be an expensive operation.<br/>
+        Number of threads used in completed file cleaner can be configured with <code>spark.sql.streaming.fileSource.cleaner.numThreads</code> (default: 1).<br/>
+        NOTE 2: The source path should not be used from multiple sources or queries when enabling this option. Similarly, you must ensure the source path doesn't match to any files in output directory of file stream sink.<br/>
+        NOTE 3: Both delete and move actions are best effort. Failing to delete or move files will not fail the streaming query. Spark may not clean up some source files in some circumstances - e.g. the application doesn't shut down gracefully, too many files are queued to clean up.
+        <br/><br/>
+        For file-format-specific options, see the related methods in <code>DataStreamReader</code>
+        (<a href="api/python/reference/pyspark.sql/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a href="api/scala/org/apache/spark/sql/streaming/DataStreamReader.html">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a
+        href="api/R/read.stream.html">R</a>).
+        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code>.
+        <br/><br/>
+        In addition, there are session configurations that affect certain file-formats. See the <a href="../sql-programming-guide.html">SQL Programming Guide</a> for more details. E.g., for "parquet", see <a href="../sql-data-sources-parquet.html#configuration">Parquet configuration</a> section.
+        </td>
+    <td>Yes</td>
+    <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
+  </tr>
+  <tr>
+    <td><b>Socket Source</b></td>
+    <td>
+        <code>host</code>: host to connect to, must be specified<br/>
+        <code>port</code>: port to connect to, must be specified
+    </td>
+    <td>No</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><b>Rate Source</b></td>
+    <td>
+        <code>rowsPerSecond</code> (e.g. 100, default: 1): How many rows should be generated per second.<br/><br/>
+        <code>rampUpTime</code> (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes <code>rowsPerSecond</code>. Using finer granularities than seconds will be truncated to integer seconds. <br/><br/>
+        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
+
+        The source will try its best to reach <code>rowsPerSecond</code>, but the query may be resource constrained, and <code>numPartitions</code> can be tweaked to help reach the desired speed.
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><b>Rate Per Micro-Batch Source</b> (format: <b>rate-micro-batch</b>)</td>
+    <td>
+        <code>rowsPerBatch</code> (e.g. 100): How many rows should be generated per micro-batch.<br/><br/>
+        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
+        <code>startTimestamp</code> (e.g. 1000, default: 0): starting value of generated time. <br/><br/>
+        <code>advanceMillisPerBatch</code> (e.g. 1000, default: 1000): the amount of time being advanced in generated time on each micro-batch. <br/><br/>
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+
+  <tr>
+    <td><b>Kafka Source</b></td>
+    <td>
+        See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a>.
+    </td>
+    <td>Yes</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+
+Here are some examples.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+spark = SparkSession. ...
+
+# Read text from socket
+socketDF = spark \
+    .readStream \
+    .format("socket") \
+    .option("host", "localhost") \
+    .option("port", 9999) \
+    .load()
+
+socketDF.isStreaming()    # Returns True for DataFrames that have streaming sources
+
+socketDF.printSchema()
+
+# Read all the csv files written atomically in a directory
+userSchema = StructType().add("name", "string").add("age", "integer")
+csvDF = spark \
+    .readStream \
+    .option("sep", ";") \
+    .schema(userSchema) \
+    .csv("/path/to/directory")  # Equivalent to format("csv").load("/path/to/directory")
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val spark: SparkSession = ...
+
+// Read text from socket
+val socketDF = spark
+  .readStream
+  .format("socket")
+  .option("host", "localhost")
+  .option("port", 9999)
+  .load()
+
+socketDF.isStreaming    // Returns True for DataFrames that have streaming sources
+
+socketDF.printSchema
+
+// Read all the csv files written atomically in a directory
+val userSchema = new StructType().add("name", "string").add("age", "integer")
+val csvDF = spark
+  .readStream
+  .option("sep", ";")
+  .schema(userSchema)      // Specify schema of the csv files
+  .csv("/path/to/directory")    // Equivalent to format("csv").load("/path/to/directory")
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+SparkSession spark = ...
+
+// Read text from socket
+Dataset<Row> socketDF = spark
+  .readStream()
+  .format("socket")
+  .option("host", "localhost")
+  .option("port", 9999)
+  .load();
+
+socketDF.isStreaming();    // Returns True for DataFrames that have streaming sources
+
+socketDF.printSchema();
+
+// Read all the csv files written atomically in a directory
+StructType userSchema = new StructType().add("name", "string").add("age", "integer");
+Dataset<Row> csvDF = spark
+  .readStream()
+  .option("sep", ";")
+  .schema(userSchema)      // Specify schema of the csv files
+  .csv("/path/to/directory");    // Equivalent to format("csv").load("/path/to/directory")
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+sparkR.session(...)
+
+# Read text from socket
+socketDF <- read.stream("socket", host = hostname, port = port)
+
+isStreaming(socketDF)    # Returns TRUE for SparkDataFrames that have streaming sources
+
+printSchema(socketDF)
+
+# Read all the csv files written atomically in a directory
+schema <- structType(structField("name", "string"),
+                     structField("age", "integer"))
+csvDF <- read.stream("csv", path = "/path/to/directory", schema = schema, sep = ";")
+{% endhighlight %}
+
+</div>
+</div>
+
+These examples generate streaming DataFrames that are untyped, meaning that the schema of the DataFrame is not checked at compile time, only checked at runtime when the query is submitted. Some operations like `map`, `flatMap`, etc. need the type to be known at compile time. To do those, you can convert these untyped streaming DataFrames to typed streaming Datasets using the same methods as static DataFrame. See the [SQL Programming Guide](../sql-programming-guide.html) for more details. Additionally, more details on the supported streaming sources are discussed later in the document.
+
+Since Spark 3.1, you can also create streaming DataFrames from tables with `DataStreamReader.table()`. See [Streaming Table APIs](#streaming-table-apis) for more details.
+
+### Schema inference and partition of streaming DataFrames/Datasets
+
+By default, Structured Streaming from file based sources requires you to specify the schema, rather than rely on Spark to infer it automatically. This restriction ensures a consistent schema will be used for the streaming query, even in the case of failures. For ad-hoc use cases, you can reenable schema inference by setting `spark.sql.streaming.schemaInference` to `true`.
+
+Partition discovery does occur when subdirectories that are named `/key=value/` are present and listing will automatically recurse into these directories. If these columns appear in the user-provided schema, they will be filled in by Spark based on the path of the file being read. The directories that make up the partitioning scheme must be present when the query starts and must remain static. For example, it is okay to add `/data/year=2016/` when `/data/year=2015/` was present, but it is invalid to change the partitioning column (i.e. by creating the directory `/data/date=2016-04-17/`).
+
+## Operations on streaming DataFrames/Datasets
+You can apply all kinds of operations on streaming DataFrames/Datasets – ranging from untyped, SQL-like operations (e.g. `select`, `where`, `groupBy`), to typed RDD-like operations (e.g. `map`, `filter`, `flatMap`). See the [SQL programming guide](../sql-programming-guide.html) for more details. Let’s take a look at a few example operations that you can use.
+
+### Basic Operations - Selection, Projection, Aggregation
+Most of the common operations on DataFrame/Dataset are supported for streaming. The few operations that are not supported are [discussed later](#unsupported-operations) in this section.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+df = ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
+
+# Select the devices which have signal more than 10
+df.select("device").where("signal > 10")
+
+# Running count of the number of updates for each device type
+df.groupBy("deviceType").count()
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+case class DeviceData(device: String, deviceType: String, signal: Double, time: DateTime)
+
+val df: DataFrame = ... // streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: string }
+val ds: Dataset[DeviceData] = df.as[DeviceData]    // streaming Dataset with IOT device data
+
+// Select the devices which have signal more than 10
+df.select("device").where("signal > 10")      // using untyped APIs
+ds.filter(_.signal > 10).map(_.device)         // using typed APIs
+
+// Running count of the number of updates for each device type
+df.groupBy("deviceType").count()                          // using untyped API
+
+// Running average signal for each device type
+import org.apache.spark.sql.expressions.scalalang.typed
+ds.groupByKey(_.deviceType).agg(typed.avg(_.signal))    // using typed API
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+import org.apache.spark.api.java.function.*;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.expressions.javalang.typed;
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
+
+public class DeviceData {
+  private String device;
+  private String deviceType;
+  private Double signal;
+  private java.sql.Date time;
+  ...
+  // Getter and setter methods for each field
+}
+
+Dataset<Row> df = ...;    // streaming DataFrame with IOT device data with schema { device: string, type: string, signal: double, time: DateType }
+Dataset<DeviceData> ds = df.as(ExpressionEncoder.javaBean(DeviceData.class)); // streaming Dataset with IOT device data
+
+// Select the devices which have signal more than 10
+df.select("device").where("signal > 10"); // using untyped APIs
+ds.filter((FilterFunction<DeviceData>) value -> value.getSignal() > 10)
+  .map((MapFunction<DeviceData, String>) value -> value.getDevice(), Encoders.STRING());
+
+// Running count of the number of updates for each device type
+df.groupBy("deviceType").count(); // using untyped API
+
+// Running average signal for each device type
+ds.groupByKey((MapFunction<DeviceData, String>) value -> value.getDeviceType(), Encoders.STRING())
+  .agg(typed.avg((MapFunction<DeviceData, Double>) value -> value.getSignal()));
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+df <- ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
+
+# Select the devices which have signal more than 10
+select(where(df, "signal > 10"), "device")
+
+# Running count of the number of updates for each device type
+count(groupBy(df, "deviceType"))
+{% endhighlight %}
+</div>
+
+</div>
+
+You can also register a streaming DataFrame/Dataset as a temporary view and then apply SQL commands on it.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  # returns another streaming DF
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.createOrReplaceTempView("updates")
+spark.sql("select count(*) from updates")  // returns another streaming DF
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+df.createOrReplaceTempView("updates");
+spark.sql("select count(*) from updates");  // returns another streaming DF
+{% endhighlight %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+createOrReplaceTempView(df, "updates")
+sql("select count(*) from updates")
+{% endhighlight %}
+</div>
+
+</div>
+
+Note, you can identify whether a DataFrame/Dataset has streaming data or not by using `df.isStreaming`.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+df.isStreaming
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+df.isStreaming()
+{% endhighlight %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+isStreaming(df)
+{% endhighlight %}
+</div>
+
+</div>
+
+You may want to check the query plan of the query, as Spark could inject stateful operations during interpret of SQL statement against streaming dataset. Once stateful operations are injected in the query plan, you may need to check your query with considerations in stateful operations. (e.g. output mode, watermark, state store size maintenance, etc.)
+
+### Window Operations on Event Time
+Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration.
+
+Imagine our [quick example](./getting-started.html#quick-example) is modified and the stream now contains lines along with the time when the line was generated. Instead of running word counts, we want to count words within 10 minute windows, updating every 5 minutes. That is, word counts in words received between 10 minute windows 12:00 - 12:10, 12:05 - 12:15, 12:10 - 12:20, etc. Note that 12:00 - 12:10 means data that arrived after 12:00 but before 12:10. Now, consider a word that was received at 12:07. This word should increment the counts corresponding to two windows 12:00 - 12:10 and 12:05 - 12:15. So the counts will be indexed by both, the grouping key (i.e. the word) and the window (can be calculated from the event-time).
+
+The result tables would look something like the following.
+
+![Window Operations](../img/structured-streaming-window.png)
+
+Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. You can see the full code for the below examples in
+[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py)/[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java).
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+    window(words.timestamp, "10 minutes", "5 minutes"),
+    words.word
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts <- count(
+                    groupBy(
+                      words,
+                      window(words$timestamp, "10 minutes", "5 minutes"),
+                      words$word))
+{% endhighlight %}
+
+</div>
+
+</div>
+
+
+#### Handling Late Data and Watermarking
+Now consider what happens if one of the events arrives late to the application.
+For example, say, a word generated at 12:04 (i.e. event time) could be received by
+the application at 12:11. The application should use the time 12:04 instead of 12:11
+to update the older counts for the window `12:00 - 12:10`. This occurs
+naturally in our window-based grouping – Structured Streaming can maintain the intermediate state
+for partial aggregates for a long period of time such that late data can update aggregates of
+old windows correctly, as illustrated below.
+
+![Handling Late Data](../img/structured-streaming-late-data.png)
+
+However, to run this query for days, it's necessary for the system to bound the amount of
+intermediate in-memory state it accumulates. This means the system needs to know when an old
+aggregate can be dropped from the in-memory state because the application is not going to receive
+late data for that aggregate any more. To enable this, in Spark 2.1, we have introduced
+**watermarking**, which lets the engine automatically track the current event time in the data
+and attempt to clean up old state accordingly. You can define the watermark of a query by
+specifying the event time column and the threshold on how late the data is expected to be in terms of
+event time. For a specific window ending at time `T`, the engine will maintain state and allow late
+data to update the state until `(max event time seen by the engine - late threshold > T)`.
+In other words, late data within the threshold will be aggregated,
+but data later than the threshold will start getting dropped
+(see [later](#semantic-guarantees-of-aggregation-with-watermarking)
+in the section for the exact guarantees). Let's understand this with an example. We can
+easily define watermarking on the previous example using `withWatermark()` as shown below.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words \
+    .withWatermark("timestamp", "10 minutes") \
+    .groupBy(
+        window(words.timestamp, "10 minutes", "5 minutes"),
+        words.word) \
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        window($"timestamp", "10 minutes", "5 minutes"),
+        $"word")
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        window(col("timestamp"), "10 minutes", "5 minutes"),
+        col("word"))
+    .count();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+
+words <- withWatermark(words, "timestamp", "10 minutes")
+windowedCounts <- count(
+                    groupBy(
+                      words,
+                      window(words$timestamp, "10 minutes", "5 minutes"),
+                      words$word))
+{% endhighlight %}
+
+</div>
+
+</div>
+
+In this example, we are defining the watermark of the query on the value of the column "timestamp",
+and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query
+is run in Update output mode (discussed later in [Output Modes](#output-modes) section),
+the engine will keep updating counts of a window in the Result Table until the window is older
+than the watermark, which lags behind the current event time in column "timestamp" by 10 minutes.
+Here is an illustration.
+
+![Watermarking in Update Mode](../img/structured-streaming-watermark-update-mode.png)
+
+As shown in the illustration, the maximum event time tracked by the engine is the
+*blue dashed line*, and the watermark set as `(max event time - '10 mins')`
+at the beginning of every trigger is the red line. For example, when the engine observes the data
+`(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
+This watermark lets the engine maintain intermediate state for additional 10 minutes to allow late
+data to be counted. For example, the data `(12:09, cat)` is out of order and late, and it falls in
+windows `12:00 - 12:10` and `12:05 - 12:15`. Since, it is still ahead of the watermark `12:04` in
+the trigger, the engine still maintains the intermediate counts as state and correctly updates the
+counts of the related windows. However, when the watermark is updated to `12:11`, the intermediate
+state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`)
+is considered "too late" and therefore ignored. Note that after every trigger,
+the updated counts (i.e. purple rows) are written to sink as the trigger output, as dictated by
+the Update mode.
+
+Some sinks (e.g. files) may not supported fine-grained updates that Update Mode requires. To work
+with them, we have also support Append Mode, where only the *final counts* are written to sink.
+This is illustrated below.
+
+Note that using `withWatermark` on a non-streaming Dataset is no-op. As the watermark should not affect
+any batch query in any way, we will ignore it directly.
+
+![Watermarking in Append Mode](../img/structured-streaming-watermark-append-mode.png)
+
+Similar to the Update Mode earlier, the engine maintains intermediate counts for each window.
+However, the partial counts are not updated to the Result Table and not written to sink. The engine
+waits for "10 mins" for late date to be counted,
+then drops intermediate state of a window < watermark, and appends the final
+counts to the Result Table/sink. For example, the final counts of window `12:00 - 12:10` is
+appended to the Result Table only after the watermark is updated to `12:11`.
+
+#### Types of time windows
+
+Spark supports three types of time windows: tumbling (fixed), sliding and session.
+
+![The types of time windows](../img/structured-streaming-time-window-types.jpg)
+
+Tumbling windows are a series of fixed-sized, non-overlapping and contiguous time intervals. An input
+can only be bound to a single window.
+
+Sliding windows are similar to the tumbling windows from the point of being "fixed-sized", but windows
+can overlap if the duration of slide is smaller than the duration of window, and in this case an input
+can be bound to the multiple windows.
+
+Tumbling and sliding window use `window` function, which has been described on above examples.
+
+Session windows have different characteristic compared to the previous two types. Session window has a dynamic size
+of the window length, depending on the inputs. A session window starts with an input, and expands itself
+if following input has been received within gap duration. For static gap duration, a session window closes when
+there's no input received within gap duration after receiving the latest input.
+
+Session window uses `session_window` function. The usage of the function is similar to the `window` function.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+events = ...  # streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+# Group the data by session window and userId, and compute the count of each group
+sessionizedCounts = events \
+    .withWatermark("timestamp", "10 minutes") \
+    .groupBy(
+        session_window(events.timestamp, "5 minutes"),
+        events.userId) \
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+// Group the data by session window and userId, and compute the count of each group
+val sessionizedCounts = events
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        session_window($"timestamp", "5 minutes"),
+        $"userId")
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+// Group the data by session window and userId, and compute the count of each group
+Dataset<Row> sessionizedCounts = events
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        session_window(col("timestamp"), "5 minutes"),
+        col("userId"))
+    .count();
+{% endhighlight %}
+
+</div>
+
+</div>
+
+Instead of static value, we can also provide an expression to specify gap duration dynamically
+based on the input row. Note that the rows with negative or zero gap duration will be filtered
+out from the aggregation.
+
+With dynamic gap duration, the closing of a session window does not depend on the latest input
+anymore. A session window's range is the union of all events' ranges which are determined by
+event start time and evaluated gap duration during the query execution.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+from pyspark.sql import functions as sf
+
+events = ...  # streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+session_window = session_window(events.timestamp, \
+    sf.when(events.userId == "user1", "5 seconds") \
+    .when(events.userId == "user2", "20 seconds").otherwise("5 minutes"))
+
+# Group the data by session window and userId, and compute the count of each group
+sessionizedCounts = events \
+    .withWatermark("timestamp", "10 minutes") \
+    .groupBy(
+        session_window,
+        events.userId) \
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+val sessionWindow = session_window($"timestamp", when($"userId" === "user1", "5 seconds")
+  .when($"userId" === "user2", "20 seconds")
+  .otherwise("5 minutes"))
+
+// Group the data by session window and userId, and compute the count of each group
+val sessionizedCounts = events
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        Column(sessionWindow),
+        $"userId")
+    .count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
+
+SessionWindow sessionWindow = session_window(col("timestamp"), when(col("userId").equalTo("user1"), "5 seconds")
+  .when(col("userId").equalTo("user2"), "20 seconds")
+  .otherwise("5 minutes"))
+
+// Group the data by session window and userId, and compute the count of each group
+Dataset<Row> sessionizedCounts = events
+    .withWatermark("timestamp", "10 minutes")
+    .groupBy(
+        new Column(sessionWindow),
+        col("userId"))
+    .count();
+{% endhighlight %}
+
+</div>
+
+</div>
+
+Note that there are some restrictions when you use session window in streaming query, like below:
+
+- "Update mode" as output mode is not supported.
+- There should be at least one column in addition to `session_window` in grouping key.
+
+For batch query, global window (only having `session_window` in grouping key) is supported.
+
+By default, Spark does not perform partial aggregation for session window aggregation, since it requires additional
+sort in local partitions before grouping. It works better for the case there are only few number of input rows in
+same group key for each local partition, but for the case there are numerous input rows having same group key in
+local partition, doing partial aggregation can still increase the performance significantly despite additional sort.
+
+You can enable `spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition` to indicate Spark to perform partial aggregation.
+
+#### Representation of the time for time window
+
+In some use cases, it is necessary to extract the representation of the time for time window, to apply operations requiring timestamp to the time windowed data.
+One example is chained time window aggregations, where users want to define another time window against the time window. Say, someone wants to aggregate 5 minutes time windows as 1 hour tumble time window.
+
+There are two ways to achieve this, like below:
+
+1. Use `window_time` SQL function with time window column as parameter
+2. Use `window` SQL function with time window column as parameter
+
+`window_time` function will produce a timestamp which represents the time for time window.
+User can pass the result to the parameter of `window` function (or anywhere requiring timestamp) to perform operation(s) with time window which requires timestamp.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+    window(words.timestamp, "10 minutes", "5 minutes"),
+    words.word
+).count()
+
+# Group the windowed data by another window and word and compute the count of each group
+anotherWindowedCounts = windowedCounts.groupBy(
+    window(window_time(windowedCounts.window), "1 hour"),
+    windowedCounts.word
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
+
+// Group the windowed data by another window and word and compute the count of each group
+val anotherWindowedCounts = windowedCounts.groupBy(
+  window(window_time($"window"), "1 hour"),
+  $"word"
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
+
+// Group the windowed data by another window and word and compute the count of each group
+Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
+  functions.window(functions.window_time("window"), "1 hour"),
+  windowedCounts.col("word")
+).count();
+{% endhighlight %}
+
+</div>
+
+</div>
+
+`window` function does not only take timestamp column, but also take the time window column. This is specifically useful for cases where users want to apply chained time window aggregations.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+# Group the data by window and word and compute the count of each group
+windowedCounts = words.groupBy(
+    window(words.timestamp, "10 minutes", "5 minutes"),
+    words.word
+).count()
+
+# Group the windowed data by another window and word and compute the count of each group
+anotherWindowedCounts = windowedCounts.groupBy(
+    window(windowedCounts.window, "1 hour"),
+    windowedCounts.word
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import spark.implicits._
+
+val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+val windowedCounts = words.groupBy(
+  window($"timestamp", "10 minutes", "5 minutes"),
+  $"word"
+).count()
+
+// Group the windowed data by another window and word and compute the count of each group
+val anotherWindowedCounts = windowedCounts.groupBy(
+  window($"window", "1 hour"),
+  $"word"
+).count()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
+
+// Group the data by window and word and compute the count of each group
+Dataset<Row> windowedCounts = words.groupBy(
+  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
+  words.col("word")
+).count();
+
+// Group the windowed data by another window and word and compute the count of each group
+Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
+  functions.window("window", "1 hour"),
+  windowedCounts.col("word")
+).count();
+{% endhighlight %}
+
+</div>
+
+</div>
+
+##### Conditions for watermarking to clean aggregation state
+
+It is important to note that the following conditions must be satisfied for the watermarking to
+clean the state in aggregation queries *(as of Spark 2.1.1, subject to change in the future)*.
+
+- **Output mode must be Append or Update.** Complete mode requires all aggregate data to be preserved,
+and hence cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes)
+section for detailed explanation of the semantics of each output mode.
+
+- The aggregation must have either the event-time column, or a `window` on the event-time column.
+
+- `withWatermark` must be called on the
+same column as the timestamp column used in the aggregate. For example,
+`df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid
+in Append output mode, as watermark is defined on a different column
+from the aggregation column.
+
+- `withWatermark` must be called before the aggregation for the watermark details to be used.
+For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is invalid in Append
+output mode.
+
+##### Semantic Guarantees of Aggregation with Watermarking
+
+- A watermark delay (set with `withWatermark`) of "2 hours" guarantees that the engine will never
+drop any data that is less than 2 hours delayed. In other words, any data less than 2 hours behind
+(in terms of event-time) the latest data processed till then is guaranteed to be aggregated.
+
+- However, the guarantee is strict only in one direction. Data delayed by more than 2 hours is
+not guaranteed to be dropped; it may or may not get aggregated. More delayed is the data, less
+likely is the engine going to process it.
+
+### Join Operations
+Structured Streaming supports joining a streaming Dataset/DataFrame with a static Dataset/DataFrame
+as well as another streaming Dataset/DataFrame. The result of the streaming join is generated
+incrementally, similar to the results of streaming aggregations in the previous section. In this
+section we will explore what type of joins (i.e. inner, outer, semi, etc.) are supported in the above
+cases. Note that in all the supported join types, the result of the join with a streaming
+Dataset/DataFrame will be the exactly the same as if it was with a static Dataset/DataFrame
+containing the same data in the stream.
+
+
+#### Stream-static Joins
+
+Since the introduction in Spark 2.0, Structured Streaming has supported joins (inner join and some
+type of outer joins) between a streaming and a static DataFrame/Dataset. Here is a simple example.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+staticDf = spark.read. ...
+streamingDf = spark.readStream. ...
+streamingDf.join(staticDf, "type")  # inner equi-join with a static DF
+streamingDf.join(staticDf, "type", "left_outer")  # left outer join with a static DF
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val staticDf = spark.read. ...
+val streamingDf = spark.readStream. ...
+
+streamingDf.join(staticDf, "type")          // inner equi-join with a static DF
+streamingDf.join(staticDf, "type", "left_outer")  // left outer join with a static DF
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> staticDf = spark.read(). ...;
+Dataset<Row> streamingDf = spark.readStream(). ...;
+streamingDf.join(staticDf, "type");         // inner equi-join with a static DF
+streamingDf.join(staticDf, "type", "left_outer");  // left outer join with a static DF
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+staticDf <- read.df(...)
+streamingDf <- read.stream(...)
+joined <- merge(streamingDf, staticDf, sort = FALSE)  # inner equi-join with a static DF
+joined <- join(
+            streamingDf,
+            staticDf,
+            streamingDf$value == staticDf$value,
+            "left_outer")  # left outer join with a static DF
+{% endhighlight %}
+
+</div>
+
+</div>
+
+Note that stream-static joins are not stateful, so no state management is necessary.
+However, a few types of stream-static outer joins are not yet supported.
+These are listed at the [end of this Join section](#support-matrix-for-joins-in-streaming-queries).
+
+#### Stream-stream Joins
+In Spark 2.3, we have added support for stream-stream joins, that is, you can join two streaming
+Datasets/DataFrames. The challenge of generating join results between two data streams is that,
+at any point of time, the view of the dataset is incomplete for both sides of the join making
+it much harder to find matches between inputs. Any row received from one input stream can match
+with any future, yet-to-be-received row from the other input stream. Hence, for both the input
+streams, we buffer past input as streaming state, so that we can match every future input with
+past input and accordingly generate joined results. Furthermore, similar to streaming aggregations,
+we automatically handle late, out-of-order data and can limit the state using watermarks.
+Let’s discuss the different types of supported stream-stream joins and how to use them.
+
+##### Inner Joins with optional Watermarking
+Inner joins on any kind of columns along with any kind of join conditions are supported.
+However, as the stream runs, the size of streaming state will keep growing indefinitely as
+*all* past input must be saved as any new input can match with any input from the past.
+To avoid unbounded state, you have to define additional join conditions such that indefinitely
+old inputs cannot match with future inputs and therefore can be cleared from the state.
+In other words, you will have to do the following additional steps in the join.
+
+1. Define watermark delays on both inputs such that the engine knows how delayed the input can be
+(similar to streaming aggregations)
+
+1. Define a constraint on event-time across the two inputs such that the engine can figure out when
+old rows of one input is not going to be required (i.e. will not satisfy the time constraint) for
+matches with the other input. This constraint can be defined in one of the two ways.
+
+    1. Time range join conditions (e.g. `...JOIN ON leftTime BETWEEN rightTime AND rightTime + INTERVAL 1 HOUR`),
+
+    1. Join on event-time windows (e.g. `...JOIN ON leftTimeWindow = rightTimeWindow`).
+
+Let’s understand this with an example.
+
+Let’s say we want to join a stream of advertisement impressions (when an ad was shown) with
+another stream of user clicks on advertisements to correlate when impressions led to
+monetizable clicks. To allow the state cleanup in this stream-stream join, you will have to
+specify the watermarking delays and the time constraints as follows.
+
+1. Watermark delays: Say, the impressions and the corresponding clicks can be late/out-of-order
+in event-time by at most 2 and 3 hours, respectively.
+
+1. Event-time range condition: Say, a click can occur within a time range of 0 seconds to 1 hour
+after the corresponding impression.
+
+The code would look like this.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+from pyspark.sql.functions import expr
+
+impressions = spark.readStream. ...
+clicks = spark.readStream. ...
+
+# Apply watermarks on event-time columns
+impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours")
+clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours")
+
+# Join with event-time constraints
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+    """)
+)
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import org.apache.spark.sql.functions.expr
+
+val impressions = spark.readStream. ...
+val clicks = spark.readStream. ...
+
+// Apply watermarks on event-time columns
+val impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours")
+val clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours")
+
+// Join with event-time constraints
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+    """)
+)
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+import static org.apache.spark.sql.functions.expr
+
+Dataset<Row> impressions = spark.readStream(). ...
+Dataset<Row> clicks = spark.readStream(). ...
+
+// Apply watermarks on event-time columns
+Dataset<Row> impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours");
+Dataset<Row> clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours");
+
+// Join with event-time constraints
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr(
+    "clickAdId = impressionAdId AND " +
+    "clickTime >= impressionTime AND " +
+    "clickTime <= impressionTime + interval 1 hour ")
+);
+
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+impressions <- read.stream(...)
+clicks <- read.stream(...)
+
+# Apply watermarks on event-time columns
+impressionsWithWatermark <- withWatermark(impressions, "impressionTime", "2 hours")
+clicksWithWatermark <- withWatermark(clicks, "clickTime", "3 hours")
+
+# Join with event-time constraints
+joined <- join(
+  impressionsWithWatermark,
+  clicksWithWatermark,
+  expr(
+    paste(
+      "clickAdId = impressionAdId AND",
+      "clickTime >= impressionTime AND",
+      "clickTime <= impressionTime + interval 1 hour"
+)))
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+###### Semantic Guarantees of Stream-stream Inner Joins with Watermarking
+This is similar to the [guarantees provided by watermarking on aggregations](#semantic-guarantees-of-aggregation-with-watermarking).
+A watermark delay of "2 hours" guarantees that the engine will never drop any data that is less than
+ 2 hours delayed. But data delayed by more than 2 hours may or may not get processed.
+
+##### Outer Joins with Watermarking
+While the watermark + event-time constraints is optional for inner joins, for outer joins
+they must be specified. This is because for generating the NULL results in outer join, the
+engine must know when an input row is not going to match with anything in future. Hence, the
+watermark + event-time constraints must be specified for generating correct results. Therefore,
+a query with outer-join will look quite like the ad-monetization example earlier, except that
+there will be an additional parameter specifying it to be an outer-join.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+    """),
+  "leftOuter"                 # can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+)
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+    """),
+  joinType = "leftOuter"      // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+ )
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr(
+    "clickAdId = impressionAdId AND " +
+    "clickTime >= impressionTime AND " +
+    "clickTime <= impressionTime + interval 1 hour "),
+  "leftOuter"                 // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+);
+
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+joined <- join(
+  impressionsWithWatermark,
+  clicksWithWatermark,
+  expr(
+    paste(
+      "clickAdId = impressionAdId AND",
+      "clickTime >= impressionTime AND",
+      "clickTime <= impressionTime + interval 1 hour"),
+  "left_outer"                 # can be "inner", "left_outer", "right_outer", "full_outer", "left_semi"
+))
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+
+###### Semantic Guarantees of Stream-stream Outer Joins with Watermarking
+Outer joins have the same guarantees as [inner joins](#semantic-guarantees-of-stream-stream-inner-joins-with-watermarking)
+regarding watermark delays and whether data will be dropped or not.
+
+###### Caveats
+There are a few important characteristics to note regarding how the outer results are generated.
+
+- *The outer NULL results will be generated with a delay that depends on the specified watermark
+delay and the time range condition.* This is because the engine has to wait for that long to ensure
+there were no matches and there will be no more matches in future.
+
+- In the current implementation in the micro-batch engine, watermarks are advanced at the end of a
+micro-batch, and the next micro-batch uses the updated watermark to clean up state and output
+outer results. Since we trigger a micro-batch only when there is new data to be processed, the
+generation of the outer result may get delayed if there no new data being received in the stream.
+*In short, if any of the two input streams being joined does not receive data for a while, the
+outer (both cases, left or right) output may get delayed.*
+
+##### Semi Joins with Watermarking
+A semi join returns values from the left side of the relation that has a match with the right.
+It is also referred to as a left semi join. Similar to outer joins, watermark + event-time
+constraints must be specified for semi join. This is to evict unmatched input rows on left side,
+the engine must know when an input row on left side is not going to match with anything on right
+side in future.
+
+###### Semantic Guarantees of Stream-stream Semi Joins with Watermarking
+Semi joins have the same guarantees as [inner joins](#semantic-guarantees-of-stream-stream-inner-joins-with-watermarking)
+regarding watermark delays and whether data will be dropped or not.
+
+##### Support matrix for joins in streaming queries
+
+<table>
+<thead>
+  <tr>
+    <th>Left Input</th>
+    <th>Right Input</th>
+    <th>Join Type</th>
+    <th></th>
+  </tr>
+</thead>
+  <tr>
+      <td style="vertical-align: middle;">Static</td>
+      <td style="vertical-align: middle;">Static</td>
+      <td style="vertical-align: middle;">All types</td>
+      <td style="vertical-align: middle;">
+        Supported, since its not on streaming data even though it
+        can be present in a streaming query
+      </td>
+  </tr>
+  <tr>
+    <td rowspan="5" style="vertical-align: middle;">Stream</td>
+    <td rowspan="5" style="vertical-align: middle;">Static</td>
+    <td style="vertical-align: middle;">Inner</td>
+    <td style="vertical-align: middle;">Supported, not stateful</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Outer</td>
+    <td style="vertical-align: middle;">Supported, not stateful</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Right Outer</td>
+    <td style="vertical-align: middle;">Not supported</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Full Outer</td>
+    <td style="vertical-align: middle;">Not supported</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Semi</td>
+    <td style="vertical-align: middle;">Supported, not stateful</td>
+  </tr>
+  <tr>
+    <td rowspan="5" style="vertical-align: middle;">Static</td>
+    <td rowspan="5" style="vertical-align: middle;">Stream</td>
+    <td style="vertical-align: middle;">Inner</td>
+    <td style="vertical-align: middle;">Supported, not stateful</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Outer</td>
+    <td style="vertical-align: middle;">Not supported</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Right Outer</td>
+    <td style="vertical-align: middle;">Supported, not stateful</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Full Outer</td>
+    <td style="vertical-align: middle;">Not supported</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Semi</td>
+    <td style="vertical-align: middle;">Not supported</td>
+  </tr>
+  <tr>
+    <td rowspan="5" style="vertical-align: middle;">Stream</td>
+    <td rowspan="5" style="vertical-align: middle;">Stream</td>
+    <td style="vertical-align: middle;">Inner</td>
+    <td style="vertical-align: middle;">
+      Supported, optionally specify watermark on both sides +
+      time constraints for state cleanup
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Outer</td>
+    <td style="vertical-align: middle;">
+      Conditionally supported, must specify watermark on right + time constraints for correct
+      results, optionally specify watermark on left for all state cleanup
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Right Outer</td>
+    <td style="vertical-align: middle;">
+      Conditionally supported, must specify watermark on left + time constraints for correct
+      results, optionally specify watermark on right for all state cleanup
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Full Outer</td>
+    <td style="vertical-align: middle;">
+      Conditionally supported, must specify watermark on one side + time constraints for correct
+      results, optionally specify watermark on the other side for all state cleanup
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Left Semi</td>
+    <td style="vertical-align: middle;">
+      Conditionally supported, must specify watermark on right + time constraints for correct
+      results, optionally specify watermark on left for all state cleanup
+    </td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+
+Additional details on supported joins:
+
+- Joins can be cascaded, that is, you can do `df1.join(df2, ...).join(df3, ...).join(df4, ....)`.
+
+- As of Spark 2.4, you can use joins only when the query is in Append output mode. Other output modes are not yet supported.
+
+- You cannot use mapGroupsWithState and flatMapGroupsWithState before and after joins.
+
+In append output mode, you can construct a query having non-map-like operations e.g. aggregation, deduplication, stream-stream join before/after join.
+
+For example, here's an example of time window aggregation in both streams followed by stream-stream join with event time window:
+
+<div class="codetabs">
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+clicksWindow = clicksWithWatermark.groupBy(
+  clicksWithWatermark.clickAdId,
+  window(clicksWithWatermark.clickTime, "1 hour")
+).count()
+
+impressionsWindow = impressionsWithWatermark.groupBy(
+  impressionsWithWatermark.impressionAdId,
+  window(impressionsWithWatermark.impressionTime, "1 hour")
+).count()
+
+clicksWindow.join(impressionsWindow, "window", "inner")
+
+{% endhighlight %}
+
+</div>
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+
+val clicksWindow = clicksWithWatermark
+  .groupBy(window("clickTime", "1 hour"))
+  .count()
+
+val impressionsWindow = impressionsWithWatermark
+  .groupBy(window("impressionTime", "1 hour"))
+  .count()
+
+clicksWindow.join(impressionsWindow, "window", "inner")
+
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+
+Dataset<Row> clicksWindow = clicksWithWatermark
+  .groupBy(functions.window(clicksWithWatermark.col("clickTime"), "1 hour"))
+  .count();
+
+Dataset<Row> impressionsWindow = impressionsWithWatermark
+  .groupBy(functions.window(impressionsWithWatermark.col("impressionTime"), "1 hour"))
+  .count();
+
+clicksWindow.join(impressionsWindow, "window", "inner");
+
+{% endhighlight %}
+
+
+</div>
+</div>
+
+Here's another example of stream-stream join with time range join condition followed by time window aggregation:
+
+<div class="codetabs">
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+joined = impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+    """),
+  "leftOuter"                 # can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+)
+
+joined.groupBy(
+  joined.clickAdId,
+  window(joined.clickTime, "1 hour")
+).count()
+
+{% endhighlight %}
+
+</div>
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+
+val joined = impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr("""
+    clickAdId = impressionAdId AND
+    clickTime >= impressionTime AND
+    clickTime <= impressionTime + interval 1 hour
+  """),
+  joinType = "leftOuter"      // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+)
+
+joined
+  .groupBy($"clickAdId", window($"clickTime", "1 hour"))
+  .count()
+
+{% endhighlight %}
+
+</div>
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> joined = impressionsWithWatermark.join(
+  clicksWithWatermark,
+  expr(
+    "clickAdId = impressionAdId AND " +
+    "clickTime >= impressionTime AND " +
+    "clickTime <= impressionTime + interval 1 hour "),
+  "leftOuter"                 // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
+);
+
+joined
+  .groupBy(joined.col("clickAdId"), functions.window(joined.col("clickTime"), "1 hour"))
+  .count();
+
+{% endhighlight %}
+
+
+</div>
+</div>
+
+### Streaming Deduplication
+You can deduplicate records in data streams using a unique identifier in the events. This is exactly same as deduplication on static using a unique identifier column. The query will store the necessary amount of data from previous records such that it can filter duplicate records. Similar to aggregations, you can use deduplication with or without watermarking.
+
+- *With watermark* - If there is an upper bound on how late a duplicate record may arrive, then you can define a watermark on an event time column and deduplicate using both the guid and the event time columns. The query will use the watermark to remove old state data from past records that are not expected to get any duplicates any more. This bounds the amount of the state the query has to maintain.
+
+- *Without watermark* - Since there are no bounds on when a duplicate record may arrive, the query stores the data from all the past records as state.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+streamingDf = spark.readStream. ...
+
+# Without watermark using guid column
+streamingDf.dropDuplicates(["guid"])
+
+# With watermark using guid and eventTime columns
+streamingDf \
+  .withWatermark("eventTime", "10 seconds") \
+  .dropDuplicates(["guid", "eventTime"])
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val streamingDf = spark.readStream. ...  // columns: guid, eventTime, ...
+
+// Without watermark using guid column
+streamingDf.dropDuplicates("guid")
+
+// With watermark using guid and eventTime columns
+streamingDf
+  .withWatermark("eventTime", "10 seconds")
+  .dropDuplicates("guid", "eventTime")
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> streamingDf = spark.readStream(). ...;  // columns: guid, eventTime, ...
+
+// Without watermark using guid column
+streamingDf.dropDuplicates("guid");
+
+// With watermark using guid and eventTime columns
+streamingDf
+  .withWatermark("eventTime", "10 seconds")
+  .dropDuplicates("guid", "eventTime");
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+streamingDf <- read.stream(...)
+
+# Without watermark using guid column
+streamingDf <- dropDuplicates(streamingDf, "guid")
+
+# With watermark using guid and eventTime columns
+streamingDf <- withWatermark(streamingDf, "eventTime", "10 seconds")
+streamingDf <- dropDuplicates(streamingDf, "guid", "eventTime")
+{% endhighlight %}
+
+</div>
+
+</div>
+
+Specifically for streaming, you can deduplicate records in data streams using a unique identifier in the events, within the time range of watermark.
+For example, if you set the delay threshold of watermark as "1 hour", duplicated events which occurred within 1 hour can be correctly deduplicated.
+(For more details, please refer to the API doc of [dropDuplicatesWithinWatermark](/api/scala/org/apache/spark/sql/Dataset.html#dropDuplicatesWithinWatermark():org.apache.spark.sql.Dataset[T]).)
+
+This can be used to deal with use case where event time column cannot be a part of unique identifier, mostly due to the case
+where event times are somehow different for the same records. (E.g. non-idempotent writer where issuing event time happens at write)
+
+Users are encouraged to set the delay threshold of watermark longer than max timestamp differences among duplicated events.
+
+This feature requires watermark with delay threshold to be set in streaming DataFrame/Dataset.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+streamingDf = spark.readStream. ...
+
+# deduplicate using guid column with watermark based on eventTime column
+streamingDf \
+  .withWatermark("eventTime", "10 hours") \
+  .dropDuplicatesWithinWatermark(["guid"])
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val streamingDf = spark.readStream. ...  // columns: guid, eventTime, ...
+
+// deduplicate using guid column with watermark based on eventTime column
+streamingDf
+  .withWatermark("eventTime", "10 hours")
+  .dropDuplicatesWithinWatermark("guid")
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+Dataset<Row> streamingDf = spark.readStream(). ...;  // columns: guid, eventTime, ...
+
+// deduplicate using guid column with watermark based on eventTime column
+streamingDf
+  .withWatermark("eventTime", "10 hours")
+  .dropDuplicatesWithinWatermark("guid");
+{% endhighlight %}
+
+
+</div>
+
+</div>
+
+### Policy for handling multiple watermarks
+A streaming query can have multiple input streams that are unioned or joined together.
+Each of the input streams can have a different threshold of late data that needs to
+be tolerated for stateful operations. You specify these thresholds using
+``withWatermarks("eventTime", delay)`` on each of the input streams. For example, consider
+a query with stream-stream joins between `inputStream1` and `inputStream2`.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+inputStream1.withWatermark("eventTime1", "1 hour")
+  .join(
+    inputStream2.withWatermark("eventTime2", "2 hours"),
+    joinCondition)
+{% endhighlight %}
+
+</div>
+</div>
+
+While executing the query, Structured Streaming individually tracks the maximum
+event time seen in each input stream, calculates watermarks based on the corresponding delay,
+and chooses a single global watermark with them to be used for stateful operations. By default,
+the minimum is chosen as the global watermark because it ensures that no data is
+accidentally dropped as too late if one of the streams falls behind the others
+(for example, one of the streams stops receiving data due to upstream failures). In other words,
+the global watermark will safely move at the pace of the slowest stream and the query output will
+be delayed accordingly.
+
+However, in some cases, you may want to get faster results even if it means dropping data from the
+slowest stream. Since Spark 2.4, you can set the multiple watermark policy to choose
+the maximum value as the global watermark by setting the SQL configuration
+``spark.sql.streaming.multipleWatermarkPolicy`` to ``max`` (default is ``min``).
+This lets the global watermark move at the pace of the fastest stream.
+However, as a side effect, data from the slower streams will be aggressively dropped. Hence, use
+this configuration judiciously.
+
+### Arbitrary Stateful Operations
+Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](/api/scala/org/apache/spark/sql/streaming/GroupState.html)/[Java](/api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredComplexSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredComplexSessionization.java)).
+
+Though Spark cannot check and force it, the state function should be implemented with respect to the semantics of the output mode. For example, in Update mode Spark doesn't expect that the state function will emit rows which are older than current watermark plus allowed late record delay, whereas in Append mode the state function can emit these rows.
+
+### Unsupported Operations
+There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets.
+Some of them are as follows.
+
+- Limit and take the first N rows are not supported on streaming Datasets.
+
+- Distinct operations on streaming Datasets are not supported.
+
+- Sorting operations are supported on streaming Datasets only after an aggregation and in Complete Output Mode.
+
+- Few types of outer joins on streaming Datasets are not supported. See the
+  <a href="#support-matrix-for-joins-in-streaming-queries">support matrix in the Join Operations section</a>
+  for more details.
+
+- Chaining multiple stateful operations on streaming Datasets is not supported with Update and Complete mode.
+  - In addition, mapGroupsWithState/flatMapGroupsWithState operation followed by other stateful operation is not supported in Append mode.
+  - A known workaround is to split your streaming query into multiple queries having a single stateful operation per each query,
+    and ensure end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
+
+In addition, there are some Dataset methods that will not work on streaming Datasets. They are actions that will immediately run queries and return results, which does not make sense on a streaming Dataset. Rather, those functionalities can be done by explicitly starting a streaming query (see the next section regarding that).
+
+- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count.
+
+- `foreach()` - Instead use `ds.writeStream.foreach(...)` (see next section).
+
+- `show()` - Instead use the console sink (see next section).
+
+If you try any of these operations, you will see an `AnalysisException` like "operation XYZ is not supported with streaming DataFrames/Datasets".
+While some of them may be supported in future releases of Spark,
+there are others which are fundamentally hard to implement on streaming data efficiently.
+For example, sorting on the input stream is not supported, as it requires keeping
+track of all the data received in the stream. This is therefore fundamentally hard to execute
+efficiently.
+
+### State Store
+
+State store is a versioned key-value store which provides both read and write operations. In
+Structured Streaming, we use the state store provider to handle the stateful operations across
+batches. There are two built-in state store provider implementations. End users can also implement
+their own state store provider by extending StateStoreProvider interface.
+
+#### HDFS state store provider
+
+The HDFS backend state store provider is the default implementation of [[StateStoreProvider]] and
+[[StateStore]] in which all the data is stored in memory map in the first stage, and then backed
+by files in an HDFS-compatible file system. All updates to the store have to be done in sets
+transactionally, and each set of updates increments the store's version. These versions can be
+used to re-execute the updates (by retries in RDD operations) on the correct version of the store,
+and regenerate the store version.
+
+#### RocksDB state store implementation
+
+As of Spark 3.2, we add a new built-in state store implementation, RocksDB state store provider.
+
+If you have stateful operations in your streaming query (for example, streaming aggregation,
+streaming dropDuplicates, stream-stream joins, mapGroupsWithState, or flatMapGroupsWithState)
+and you want to maintain millions of keys in the state, then you may face issues related to large
+JVM garbage collection (GC) pauses causing high variations in the micro-batch processing times.
+This occurs because, by the implementation of HDFSBackedStateStore, the state data is maintained
+in the JVM memory of the executors and large number of state objects puts memory pressure on the
+JVM causing high GC pauses.
+
+In such cases, you can choose to use a more optimized state management solution based on
+[RocksDB](https://rocksdb.org/). Rather than keeping the state in the JVM memory, this solution
+uses RocksDB to efficiently manage the state in the native memory and the local disk. Furthermore,
+any changes to this state are automatically saved by Structured Streaming to the checkpoint
+location you have provided, thus providing full fault-tolerance guarantees (the same as default
+state management).
+
+To enable the new build-in state store implementation, set `spark.sql.streaming.stateStore.providerClass`
+to `org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider`.
+
+Here are the configs regarding to RocksDB instance of the state store provider:
+
+<table>
+  <thead>
+  <tr>
+    <th>Config Name</th>
+    <th>Description</th>
+    <th>Default Value</th>
+  </tr>
+  </thead>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.compactOnCommit</td>
+    <td>Whether we perform a range compaction of RocksDB instance for commit operation</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled</td>
+    <td>Whether to upload changelog instead of snapshot during RocksDB StateStore commit</td>
+    <td>False</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.blockSizeKB</td>
+    <td>Approximate size in KB of user data packed per block for a RocksDB BlockBasedTable, which is a RocksDB's default SST file format.</td>
+    <td>4</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.blockCacheSizeMB</td>
+    <td>The size capacity in MB for a cache of blocks.</td>
+    <td>8</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.lockAcquireTimeoutMs</td>
+    <td>The waiting time in millisecond for acquiring lock in the load operation for RocksDB instance.</td>
+    <td>60000</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.maxOpenFiles</td>
+    <td>The number of open files that can be used by the RocksDB instance. Value of -1 means that files opened are always kept open. If the open file limit is reached, RocksDB will evict entries from the open file cache and close those file descriptors and remove the entries from the cache.</td>
+    <td>-1</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.resetStatsOnLoad</td>
+    <td>Whether we resets all ticker and histogram stats for RocksDB on load.</td>
+    <td>True</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.trackTotalNumberOfRows</td>
+    <td>Whether we track the total number of rows in state store. Please refer the details in <a href="#performance-aspect-considerations">Performance-aspect considerations</a>.</td>
+    <td>True</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB</td>
+    <td>The maximum size of MemTable in RocksDB. Value of -1 means that RocksDB internal default values will be used</td>
+    <td>-1</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber</td>
+    <td>The maximum number of MemTables in RocksDB, both active and immutable. Value of -1 means that RocksDB internal default values will be used</td>
+    <td>-1</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.boundedMemoryUsage</td>
+    <td>Whether total memory usage for RocksDB state store instances on a single node is bounded.</td>
+    <td>false</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.maxMemoryUsageMB</td>
+    <td>Total memory limit in MB for RocksDB state store instances on a single node.</td>
+    <td>500</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.writeBufferCacheRatio</td>
+    <td>Total memory to be occupied by write buffers as a fraction of memory allocated across all RocksDB instances on a single node using maxMemoryUsageMB.</td>
+    <td>0.5</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.highPriorityPoolRatio</td>
+    <td>Total memory to be occupied by blocks in high priority pool as a fraction of memory allocated across all RocksDB instances on a single node using maxMemoryUsageMB.</td>
+    <td>0.1</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.allowFAllocate</td>
+    <td>Allow the rocksdb runtime to use fallocate to pre-allocate disk space for logs, etc...  Disable for apps that have many smaller state stores to trade off disk space for write performance.</td>
+    <td>true</td>
+  </tr>
+  <tr>
+    <td>spark.sql.streaming.stateStore.rocksdb.compression</td>
+    <td>Compression type used in RocksDB. The string is converted RocksDB compression type through RocksDB Java API getCompressionType(). </td>
+    <td>lz4</td>
+  </tr>
+</table>
+
+##### RocksDB State Store Memory Management
+RocksDB allocates memory for different objects such as memtables, block cache and filter/index blocks. If left unbounded, RocksDB memory usage across multiple instances could grow indefinitely and potentially cause OOM (out-of-memory) issues.
+RocksDB provides a way to limit the memory usage for all DB instances running on a single node by using the write buffer manager functionality.
+If you want to cap RocksDB memory usage in your Spark Structured Streaming deployment, this feature can be enabled by setting the `spark.sql.streaming.stateStore.rocksdb.boundedMemoryUsage` config to `true`.
+You can also determine the max allowed memory for RocksDB instances by setting the `spark.sql.streaming.stateStore.rocksdb.maxMemoryUsageMB` value to a static number or as a fraction of the physical memory available on the node.
+Limits for individual RocksDB instances can also be configured by setting `spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB` and `spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber` to the required values. By default, RocksDB internal defaults are used for these settings.
+
+Note that the `boundedMemoryUsage` config will enable a soft limit on the total memory usage for RocksDB.
+So the total memory used by RocksDB can temporarily exceed this value if all blocks allocated to higher level readers are in use.
+Enabling a strict limit is not possible at this time since it will cause query failures and we do not support re-balancing of the state across additional nodes.
+
+##### RocksDB State Store Changelog Checkpointing
+In newer version of Spark, changelog checkpointing is introduced for RocksDB state store. The traditional checkpointing mechanism for RocksDB State Store is incremental snapshot checkpointing, where the manifest files and newly generated RocksDB SST files of RocksDB instances are uploaded to a durable storage.
+Instead of uploading data files of RocksDB instances, changelog checkpointing uploads changes made to the state since the last checkpoint for durability.
+Snapshots are persisted periodically in the background for predictable failure recovery and changelog trimming.
+Changelog checkpointing avoids cost of capturing and uploading snapshots of RocksDB instances and significantly reduce streaming query latency.
+
+Changelog checkpointing is disabled by default. You can enable RocksDB State Store changelog checkpointing by setting `spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled` config to `true`.
+Changelog checkpointing is designed to be backward compatible with traditional checkpointing mechanism.
+RocksDB state store provider offers seamless support for transitioning between two checkpointing mechanisms in both directions. This allows you to leverage the performance benefits of changelog checkpointing without discarding the old state checkpoint.
+In a version of spark that supports changelog checkpointing, you can migrate streaming queries from older versions of Spark to changelog checkpointing by enabling changelog checkpointing in the spark session.
+Vice versa, you can disable changelog checkpointing safely in newer version of Spark, then any query that already run with changelog checkpointing will switch back to traditional checkpointing.
+You would need to restart you streaming queries for change in checkpointing mechanism to be applied, but you won't observe any performance degrade in the process.
+
+##### Performance-aspect considerations
+
+1. You may want to disable the track of total number of rows to aim the better performance on RocksDB state store.
+
+Tracking the number of rows brings additional lookup on write operations - you're encouraged to try turning off the config on tuning RocksDB state store, especially the values of metrics for state operator are big - `numRowsUpdated`, `numRowsRemoved`.
+
+You can change the config during restarting the query, which enables you to change the trade-off decision on "observability vs performance".
+If the config is disabled, the number of rows in state (`numTotalStateRows`) will be reported as 0.
+
+#### State Store and task locality
+
+The stateful operations store states for events in state stores of executors. State stores occupy resources such as memory and disk space to store the states.
+So it is more efficient to keep a state store provider running in the same executor across different streaming batches.
+Changing the location of a state store provider requires the extra overhead of loading checkpointed states. The overhead of loading state from checkpoint depends
+on the external storage and the size of the state, which tends to hurt the latency of micro-batch run. For some use cases such as processing very large state data,
+loading new state store providers from checkpointed states can be very time-consuming and inefficient.
+
+The stateful operations in Structured Streaming queries rely on the preferred location feature of Spark's RDD to run the state store provider on the same executor.
+If in the next batch the corresponding state store provider is scheduled on this executor again, it could reuse the previous states and save the time of loading checkpointed states.
+
+However, generally the preferred location is not a hard requirement and it is still possible that Spark schedules tasks to the executors other than the preferred ones.
+In this case, Spark will load state store providers from checkpointed states on new executors. The state store providers run in the previous batch will not be unloaded immediately.
+Spark runs a maintenance task which checks and unloads the state store providers that are inactive on the executors.
+
+By changing the Spark configurations related to task scheduling, for example `spark.locality.wait`, users can configure Spark how long to wait to launch a data-local task.
+For stateful operations in Structured Streaming, it can be used to let state store providers running on the same executors across batches.
+
+Specifically for built-in HDFS state store provider, users can check the state store metrics such as `loadedMapCacheHitCount` and `loadedMapCacheMissCount`. Ideally,
+it is best if cache missing count is minimized that means Spark won't waste too much time on loading checkpointed state.
+User can increase Spark locality waiting configurations to avoid loading state store providers in different executors across batches.
+
+#### State Data Source (Experimental)
+
+Apache Spark provides a streaming state related data source that provides the ability to manipulate state stores in the checkpoint. Users can run the batch query with State Data Source to get the visibility of the states for existing streaming query.
+
+As of Spark 4.0, the data source only supports read feature. See [State Data Source Integration Guide](structured-streaming-state-data-source.html) for more details.
+
+NOTE: this data source is currently marked as experimental - source options and the behavior (output) might be subject to change.
+
+## Starting Streaming Queries
+Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
+([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter)/[Scala](/api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](/api/java/org/apache/spark/sql/streaming/DataStreamWriter.html) docs)
+returned through `Dataset.writeStream()`. You will have to specify one or more of the following in this interface.
+
+- *Details of the output sink:* Data format, location, etc.
+
+- *Output mode:* Specify what gets written to the output sink.
+
+- *Query name:* Optionally, specify a unique name of the query for identification.
+
+- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has been completed. If a trigger time is missed because the previous processing has not been completed, then the system will trigger processing immediately.
+
+- *Checkpoint location:* For some output sinks where the end-to-end fault-tolerance can be guaranteed, specify the location where the system will write all the checkpoint information. This should be a directory in an HDFS-compatible fault-tolerant file system. The semantics of checkpointing is discussed in more detail in the next section.
+
+#### Output Modes
+There are a few types of output modes.
+
+- **Append mode (default)** - This is the default mode, where only the
+new rows added to the Result Table since the last trigger will be
+outputted to the sink. This is supported for only those queries where
+rows added to the Result Table is never going to change. Hence, this mode
+guarantees that each row will be output only once (assuming
+fault-tolerant sink). For example, queries with only `select`,
+`where`, `map`, `flatMap`, `filter`, `join`, etc. will support Append mode.
+
+- **Complete mode** - The whole Result Table will be outputted to the sink after every trigger.
+ This is supported for aggregation queries.
+
+- **Update mode** - (*Available since Spark 2.1.1*) Only the rows in the Result Table that were
+updated since the last trigger will be outputted to the sink.
+More information to be added in future releases.
+
+Different types of streaming queries support different output modes.
+Here is the compatibility matrix.
+
+<table>
+  <thead>
+  <tr>
+    <th>Query Type</th>
+    <th></th>
+    <th>Supported Output Modes</th>
+    <th>Notes</th>
+  </tr>
+  </thead>
+  <tr>
+    <td rowspan="2" style="vertical-align: middle;">Queries with aggregation</td>
+    <td style="vertical-align: middle;">Aggregation on event-time with watermark</td>
+    <td style="vertical-align: middle;">Append, Update, Complete</td>
+    <td>
+        Append mode uses watermark to drop old aggregation state. But the output of a
+        windowed aggregation is delayed the late threshold specified in <code>withWatermark()</code> as by
+        the modes semantics, rows can be added to the Result Table only once after they are
+        finalized (i.e. after watermark is crossed). See the
+        <a href="#handling-late-data-and-watermarking">Late Data</a> section for more details.
+        <br/><br/>
+        Update mode uses watermark to drop old aggregation state.
+        <br/><br/>
+        Complete mode does not drop old aggregation state since by definition this mode
+        preserves all data in the Result Table.
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Other aggregations</td>
+    <td style="vertical-align: middle;">Complete, Update</td>
+    <td>
+        Since no watermark is defined (only defined in other category),
+        old aggregation state is not dropped.
+        <br/><br/>
+        Append mode is not supported as aggregates can update thus violating the semantics of
+        this mode.
+    </td>
+  </tr>
+  <tr>
+    <td colspan="2" style="vertical-align: middle;">Queries with <code>mapGroupsWithState</code></td>
+    <td style="vertical-align: middle;">Update</td>
+    <td style="vertical-align: middle;">
+      Aggregations not allowed in a query with <code>mapGroupsWithState</code>.
+    </td>
+  </tr>
+  <tr>
+    <td rowspan="2" style="vertical-align: middle;">Queries with <code>flatMapGroupsWithState</code></td>
+    <td style="vertical-align: middle;">Append operation mode</td>
+    <td style="vertical-align: middle;">Append</td>
+    <td style="vertical-align: middle;">
+      Aggregations are allowed after <code>flatMapGroupsWithState</code>.
+    </td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;">Update operation mode</td>
+    <td style="vertical-align: middle;">Update</td>
+    <td style="vertical-align: middle;">
+      Aggregations not allowed in a query with <code>flatMapGroupsWithState</code>.
+    </td>
+  </tr>
+  <tr>
+      <td colspan="2" style="vertical-align: middle;">Queries with <code>joins</code></td>
+      <td style="vertical-align: middle;">Append</td>
+      <td style="vertical-align: middle;">
+        Update and Complete mode not supported yet. See the
+        <a href="#support-matrix-for-joins-in-streaming-queries">support matrix in the Join Operations section</a>
+         for more details on what types of joins are supported.
+      </td>
+    </tr>
+  <tr>
+    <td colspan="2" style="vertical-align: middle;">Other queries</td>
+    <td style="vertical-align: middle;">Append, Update</td>
+    <td style="vertical-align: middle;">
+      Complete mode not supported as it is infeasible to keep all unaggregated data in the Result Table.
+    </td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+
+
+#### Output Sinks
+There are a few types of built-in output sinks.
+
+- **File sink** - Stores the output to a directory.
+
+{% highlight scala %}
+writeStream
+    .format("parquet")        // can be "orc", "json", "csv", etc.
+    .option("path", "path/to/destination/dir")
+    .start()
+{% endhighlight %}
+
+- **Kafka sink** - Stores the output to one or more topics in Kafka.
+
+{% highlight scala %}
+writeStream
+    .format("kafka")
+    .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+    .option("topic", "updates")
+    .start()
+{% endhighlight %}
+
+- **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
+
+{% highlight scala %}
+writeStream
+    .foreach(...)
+    .start()
+{% endhighlight %}
+
+- **Console sink (for debugging)** - Prints the output to the console/stdout every time there is a trigger. Both, Append and Complete output modes, are supported. This should be used for debugging purposes on low data volumes as the entire output is collected and stored in the driver's memory after every trigger.
+
+{% highlight scala %}
+writeStream
+    .format("console")
+    .start()
+{% endhighlight %}
+
+- **Memory sink (for debugging)** - The output is stored in memory as an in-memory table.
+Both, Append and Complete output modes, are supported. This should be used for debugging purposes
+on low data volumes as the entire output is collected and stored in the driver's memory.
+Hence, use it with caution.
+
+{% highlight scala %}
+writeStream
+    .format("memory")
+    .queryName("tableName")
+    .start()
+{% endhighlight %}
+
+Some sinks are not fault-tolerant because they do not guarantee persistence of the output and are
+meant for debugging purposes only. See the earlier section on
+[fault-tolerance semantics](./getting-started.html#fault-tolerance-semantics).
+Here are the details of all the sinks in Spark.
+
+<table>
+  <thead>
+  <tr>
+    <th>Sink</th>
+    <th>Supported Output Modes</th>
+    <th>Options</th>
+    <th>Fault-tolerant</th>
+    <th>Notes</th>
+  </tr>
+  </thead>
+  <tr>
+    <td><b>File Sink</b></td>
+    <td>Append</td>
+    <td>
+        <code>path</code>: path to the output directory, must be specified.<br/>
+        <code>retention</code>: time to live (TTL) for output files. Output files which batches were
+        committed older than TTL will be eventually excluded in metadata log. This means reader queries which read
+        the sink's output directory may not process them. You can provide the value as string format of the time. (like "12h", "7d", etc.)
+        By default it's disabled.
+        <br/><br/>
+        For file-format-specific options, see the related methods in DataFrameWriter
+        (<a href="api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter">Python</a>/<a href="api/scala/org/apache/spark/sql/DataFrameWriter.html">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a
+        href="api/R/write.stream.html">R</a>).
+        E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
+    </td>
+    <td>Yes (exactly-once)</td>
+    <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
+  </tr>
+  <tr>
+    <td><b>Kafka Sink</b></td>
+    <td>Append, Update, Complete</td>
+    <td>See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+    <td>Yes (at-least-once)</td>
+    <td>More details in the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
+  </tr>
+  <tr>
+    <td><b>Foreach Sink</b></td>
+    <td>Append, Update, Complete</td>
+    <td>None</td>
+    <td>Yes (at-least-once)</td>
+    <td>More details in the <a href="#using-foreach-and-foreachbatch">next section</a></td>
+  </tr>
+  <tr>
+      <td><b>ForeachBatch Sink</b></td>
+      <td>Append, Update, Complete</td>
+      <td>None</td>
+      <td>Depends on the implementation</td>
+      <td>More details in the <a href="#using-foreach-and-foreachbatch">next section</a></td>
+    </tr>
+
+  <tr>
+    <td><b>Console Sink</b></td>
+    <td>Append, Update, Complete</td>
+    <td>
+        <code>numRows</code>: Number of rows to print every trigger (default: 20)
+        <br/>
+        <code>truncate</code>: Whether to truncate the output if too long (default: true)
+    </td>
+    <td>No</td>
+    <td></td>
+  </tr>
+  <tr>
+    <td><b>Memory Sink</b></td>
+    <td>Append, Complete</td>
+    <td>None</td>
+    <td>No. But in Complete Mode, restarted query will recreate the full table.</td>
+    <td>Table name is the query name.</td>
+  </tr>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+    <td></td>
+  </tr>
+</table>
+
+Note that you have to call `start()` to actually start the execution of the query. This returns a StreamingQuery object which is a handle to the continuously running execution. You can use this object to manage the query, which we will discuss in the next subsection. For now, let’s understand all this with a few examples.
+
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+# ========== DF with no aggregations ==========
+noAggDF = deviceDataDf.select("device").where("signal > 10")
+
+# Print new data to console
+noAggDF \
+    .writeStream \
+    .format("console") \
+    .start()
+
+# Write new data to Parquet files
+noAggDF \
+    .writeStream \
+    .format("parquet") \
+    .option("checkpointLocation", "path/to/checkpoint/dir") \
+    .option("path", "path/to/destination/dir") \
+    .start()
+
+# ========== DF with aggregation ==========
+aggDF = df.groupBy("device").count()
+
+# Print updated aggregations to console
+aggDF \
+    .writeStream \
+    .outputMode("complete") \
+    .format("console") \
+    .start()
+
+# Have all the aggregates in an in-memory table. The query name will be the table name
+aggDF \
+    .writeStream \
+    .queryName("aggregates") \
+    .outputMode("complete") \
+    .format("memory") \
+    .start()
+
+spark.sql("select * from aggregates").show()   # interactively query in-memory table
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+// ========== DF with no aggregations ==========
+val noAggDF = deviceDataDf.select("device").where("signal > 10")
+
+// Print new data to console
+noAggDF
+  .writeStream
+  .format("console")
+  .start()
+
+// Write new data to Parquet files
+noAggDF
+  .writeStream
+  .format("parquet")
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .option("path", "path/to/destination/dir")
+  .start()
+
+// ========== DF with aggregation ==========
+val aggDF = df.groupBy("device").count()
+
+// Print updated aggregations to console
+aggDF
+  .writeStream
+  .outputMode("complete")
+  .format("console")
+  .start()
+
+// Have all the aggregates in an in-memory table
+aggDF
+  .writeStream
+  .queryName("aggregates")    // this query name will be the table name
+  .outputMode("complete")
+  .format("memory")
+  .start()
+
+spark.sql("select * from aggregates").show()   // interactively query in-memory table
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+// ========== DF with no aggregations ==========
+Dataset<Row> noAggDF = deviceDataDf.select("device").where("signal > 10");
+
+// Print new data to console
+noAggDF
+  .writeStream()
+  .format("console")
+  .start();
+
+// Write new data to Parquet files
+noAggDF
+  .writeStream()
+  .format("parquet")
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .option("path", "path/to/destination/dir")
+  .start();
+
+// ========== DF with aggregation ==========
+Dataset<Row> aggDF = df.groupBy("device").count();
+
+// Print updated aggregations to console
+aggDF
+  .writeStream()
+  .outputMode("complete")
+  .format("console")
+  .start();
+
+// Have all the aggregates in an in-memory table
+aggDF
+  .writeStream()
+  .queryName("aggregates")    // this query name will be the table name
+  .outputMode("complete")
+  .format("memory")
+  .start();
+
+spark.sql("select * from aggregates").show();   // interactively query in-memory table
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# ========== DF with no aggregations ==========
+noAggDF <- select(where(deviceDataDf, "signal > 10"), "device")
+
+# Print new data to console
+write.stream(noAggDF, "console")
+
+# Write new data to Parquet files
+write.stream(noAggDF,
+             "parquet",
+             path = "path/to/destination/dir",
+             checkpointLocation = "path/to/checkpoint/dir")
+
+# ========== DF with aggregation ==========
+aggDF <- count(groupBy(df, "device"))
+
+# Print updated aggregations to console
+write.stream(aggDF, "console", outputMode = "complete")
+
+# Have all the aggregates in an in memory table. The query name will be the table name
+write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
+
+# Interactively query in-memory table
+head(sql("select * from aggregates"))
+{% endhighlight %}
+
+</div>
+
+</div>
+
+##### Using Foreach and ForeachBatch
+The `foreach` and `foreachBatch` operations allow you to apply arbitrary operations and writing
+logic on the output of a streaming query. They have slightly different use cases - while `foreach`
+allows custom write logic on every row, `foreachBatch` allows arbitrary operations
+and custom logic on the output of each micro-batch. Let's understand their usages in more detail.
+
+###### ForeachBatch
+`foreachBatch(...)` allows you to specify a function that is executed on
+the output data of every micro-batch of a streaming query. Since Spark 2.4, this is supported in Scala, Java and Python.
+It takes two parameters: a DataFrame or Dataset that has the output data of a micro-batch and the unique ID of the micro-batch.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+def foreach_batch_function(df, epoch_id):
+    # Transform and write batchDF
+    pass
+
+streamingDF.writeStream.foreachBatch(foreach_batch_function).start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
+  // Transform and write batchDF
+}.start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+streamingDatasetOfString.writeStream().foreachBatch(
+  new VoidFunction2<Dataset<String>, Long>() {
+    public void call(Dataset<String> dataset, Long batchId) {
+      // Transform and write batchDF
+    }
+  }
+).start();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+R is not yet supported.
+</div>
+
+</div>
+
+With `foreachBatch`, you can do the following.
+
+- **Reuse existing batch data sources** - For many storage systems, there may not be a streaming sink available yet,
+  but there may already exist a data writer for batch queries. Using `foreachBatch`, you can use the batch
+  data writers on the output of each micro-batch.
+- **Write to multiple locations** - If you want to write the output of a streaming query to multiple locations,
+  then you can simply write the output DataFrame/Dataset multiple times. However, each attempt to write can
+  cause the output data to be recomputed (including possible re-reading of the input data). To avoid recomputations,
+  you should cache the output DataFrame/Dataset, write it to multiple locations, and then uncache it. Here is an outline.
+
+<div class="codetabs">
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
+  batchDF.persist()
+  batchDF.write.format(...).save(...)  // location 1
+  batchDF.write.format(...).save(...)  // location 2
+  batchDF.unpersist()
+}
+{% endhighlight %}
+
+</div>
+</div>
+
+- **Apply additional DataFrame operations** - Many DataFrame and Dataset operations are not supported
+  in streaming DataFrames because Spark does not support generating incremental plans in those cases.
+  Using `foreachBatch`, you can apply some of these operations on each micro-batch output. However, you will have to reason about the end-to-end semantics of doing that operation yourself.
+
+**Note:**
+- By default, `foreachBatch` provides only at-least-once write guarantees. However, you can use the
+  batchId provided to the function as way to deduplicate the output and get an exactly-once guarantee.
+- `foreachBatch` does not work with the continuous processing mode as it fundamentally relies on the
+  micro-batch execution of a streaming query. If you write data in the continuous mode, use `foreach` instead.
+- If `foreachBatch` is used with stateful streaming queries and multiple DataFrame actions are performed
+  on the same DataFrame (such as `df.count()` followed by `df.collect()`), the query will be evaluated multiple times leading to
+  the state being reloaded multiple times within the same batch resulting in degraded performance. In this case,
+  it's highly recommended for users to call `persist` and `unpersist` on the DataFrame,
+  within the `foreachBatch` UDF (user-defined function) to avoid recomputation.
+
+###### Foreach
+If `foreachBatch` is not an option (for example, corresponding batch data writer does not exist, or
+continuous processing mode), then you can express your custom writer logic using `foreach`.
+Specifically, you can express the data writing logic by dividing it into three methods: `open`, `process`, and `close`.
+Since Spark 2.4, `foreach` is available in Scala, Java and Python.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+In Python, you can invoke foreach in two ways: in a function or in an object.
+The function offers a simple way to express your processing logic but does not allow you to
+deduplicate generated data when failures cause reprocessing of some input data.
+For that situation you must specify the processing logic in an object.
+
+- First, the function takes a row as input.
+
+{% highlight python %}
+def process_row(row):
+    # Write row to storage
+    pass
+
+query = streamingDF.writeStream.foreach(process_row).start()
+{% endhighlight %}
+
+- Second, the object has a process method and optional open and close methods:
+
+{% highlight python %}
+class ForeachWriter:
+    def open(self, partition_id, epoch_id):
+        # Open connection. This method is optional in Python.
+        pass
+
+    def process(self, row):
+        # Write row to connection. This method is NOT optional in Python.
+        pass
+
+    def close(self, error):
+        # Close the connection. This method in optional in Python.
+        pass
+
+query = streamingDF.writeStream.foreach(ForeachWriter()).start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+In Scala, you have to extend the class `ForeachWriter` ([docs](/api/scala/org/apache/spark/sql/ForeachWriter.html)).
+
+{% highlight scala %}
+streamingDatasetOfString.writeStream.foreach(
+  new ForeachWriter[String] {
+
+    def open(partitionId: Long, version: Long): Boolean = {
+      // Open connection
+    }
+
+    def process(record: String): Unit = {
+      // Write string to connection
+    }
+
+    def close(errorOrNull: Throwable): Unit = {
+      // Close the connection
+    }
+  }
+).start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+In Java, you have to extend the class `ForeachWriter` ([docs](/api/java/org/apache/spark/sql/ForeachWriter.html)).
+{% highlight java %}
+streamingDatasetOfString.writeStream().foreach(
+  new ForeachWriter<String>() {
+
+    @Override public boolean open(long partitionId, long version) {
+      // Open connection
+    }
+
+    @Override public void process(String record) {
+      // Write string to connection
+    }
+
+    @Override public void close(Throwable errorOrNull) {
+      // Close the connection
+    }
+  }
+).start();
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+R is not yet supported.
+</div>
+
+</div>
+
+
+**Execution semantics**
+When the streaming query is started, Spark calls the function or the object’s methods in the following way:
+
+- A single copy of this object is responsible for all the data generated by a single task in a query.
+  In other words, one instance is responsible for processing one partition of the data generated in a distributed manner.
+
+- This object must be serializable, because each task will get a fresh serialized-deserialized copy
+  of the provided object. Hence, it is strongly recommended that any initialization for writing data
+  (for example. opening a connection or starting a transaction) is done after the open() method has
+  been called, which signifies that the task is ready to generate data.
+
+- The lifecycle of the methods are as follows:
+
+  - For each partition with partition_id:
+
+    - For each batch/epoch of streaming data with epoch_id:
+
+      - Method open(partitionId, epochId) is called.
+
+      - If open(...) returns true, for each row in the partition and batch/epoch, method process(row) is called.
+
+      - Method close(error) is called with error (if any) seen while processing rows.
+
+- The close() method (if it exists) is called if an open() method exists and returns successfully (irrespective of the return value), except if the JVM or Python process crashes in the middle.
+
+- **Note:** Spark does not guarantee same output for (partitionId, epochId), so deduplication
+  cannot be achieved with (partitionId, epochId). e.g. source provides different number of
+  partitions for some reasons, Spark optimization changes number of partitions, etc.
+  See [SPARK-28650](https://issues.apache.org/jira/browse/SPARK-28650) for more details.
+  If you need deduplication on output, try out `foreachBatch` instead.
+
+#### Streaming Table APIs
+Since Spark 3.1, you can also use `DataStreamReader.table()` to read tables as streaming DataFrames and use `DataStreamWriter.toTable()` to write streaming DataFrames as tables:
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+spark = ...  # spark session
+
+# Create a streaming DataFrame
+df = spark.readStream \
+    .format("rate") \
+    .option("rowsPerSecond", 10) \
+    .load()
+
+# Write the streaming DataFrame to a table
+df.writeStream \
+    .option("checkpointLocation", "path/to/checkpoint/dir") \
+    .toTable("myTable")
+
+# Check the table result
+spark.read.table("myTable").show()
+
+# Transform the source dataset and write to a new table
+spark.readStream \
+    .table("myTable") \
+    .select("value") \
+    .writeStream \
+    .option("checkpointLocation", "path/to/checkpoint/dir") \
+    .format("parquet") \
+    .toTable("newTable")
+
+# Check the new table result
+spark.read.table("newTable").show()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val spark: SparkSession = ...
+
+// Create a streaming DataFrame
+val df = spark.readStream
+  .format("rate")
+  .option("rowsPerSecond", 10)
+  .load()
+
+// Write the streaming DataFrame to a table
+df.writeStream
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .toTable("myTable")
+
+// Check the table result
+spark.read.table("myTable").show()
+
+// Transform the source dataset and write to a new table
+spark.readStream
+  .table("myTable")
+  .select("value")
+  .writeStream
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .format("parquet")
+  .toTable("newTable")
+
+// Check the new table result
+spark.read.table("newTable").show()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+SparkSession spark = ...
+
+// Create a streaming DataFrame
+Dataset<Row> df = spark.readStream()
+  .format("rate")
+  .option("rowsPerSecond", 10)
+  .load();
+
+// Write the streaming DataFrame to a table
+df.writeStream()
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .toTable("myTable");
+
+// Check the table result
+spark.read().table("myTable").show();
+
+// Transform the source dataset and write to a new table
+spark.readStream()
+  .table("myTable")
+  .select("value")
+  .writeStream()
+  .option("checkpointLocation", "path/to/checkpoint/dir")
+  .format("parquet")
+  .toTable("newTable");
+
+// Check the new table result
+spark.read().table("newTable").show();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+Not available in R.
+</div>
+
+</div>
+
+For more details, please check the docs for DataStreamReader ([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader)/[Scala](/api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](/api/java/org/apache/spark/sql/streaming/DataStreamReader.html) docs) and DataStreamWriter ([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter)/[Scala](/api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](/api/java/org/apache/spark/sql/streaming/DataStreamWriter.html) docs).
+
+#### Triggers
+The trigger settings of a streaming query define the timing of streaming data processing, whether
+the query is going to be executed as micro-batch query with a fixed batch interval or as a continuous processing query.
+Here are the different kinds of triggers that are supported.
+
+<table>
+  <thead>
+  <tr>
+    <th>Trigger Type</th>
+    <th>Description</th>
+  </tr>
+  </thead>
+  <tr>
+    <td><i>unspecified (default)</i></td>
+    <td>
+        If no trigger setting is explicitly specified, then by default, the query will be
+        executed in micro-batch mode, where micro-batches will be generated as soon as
+        the previous micro-batch has completed processing.
+    </td>
+  </tr>
+  <tr>
+    <td><b>Fixed interval micro-batches</b></td>
+    <td>
+        The query will be executed with micro-batches mode, where micro-batches will be kicked off
+        at the user-specified intervals.
+        <ul>
+          <li>If the previous micro-batch completes within the interval, then the engine will wait until
+          the interval is over before kicking off the next micro-batch.</li>
+
+          <li>If the previous micro-batch takes longer than the interval to complete (i.e. if an
+          interval boundary is missed), then the next micro-batch will start as soon as the
+          previous one completes (i.e., it will not wait for the next interval boundary).</li>
+
+          <li>If no new data is available, then no micro-batch will be kicked off.</li>
+        </ul>
+    </td>
+  </tr>
+  <tr>
+    <td><b>One-time micro-batch</b><i>(deprecated)</i></td>
+    <td>
+        The query will execute <strong>only one</strong> micro-batch to process all the available data and then
+        stop on its own. This is useful in scenarios you want to periodically spin up a cluster,
+        process everything that is available since the last period, and then shutdown the
+        cluster. In some case, this may lead to significant cost savings.
+        Note that this trigger is deprecated and users are encouraged to migrate to <b>Available-now micro-batch</b>,
+        as it provides the better guarantee of processing, fine-grained scale of batches, and better gradual processing
+        of watermark advancement including no-data batch.
+    </td>
+  </tr>
+  <tr>
+    <td><b>Available-now micro-batch</b></td>
+    <td>
+        Similar to queries one-time micro-batch trigger, the query will process all the available data and then
+        stop on its own. The difference is that, it will process the data in (possibly) multiple micro-batches
+        based on the source options (e.g. <code>maxFilesPerTrigger</code> or <code>maxBytesPerTrigger</code> for file 
+        source), which will result in better query scalability.
+        <ul>
+            <li>This trigger provides a strong guarantee of processing: regardless of how many batches were
+                left over in previous run, it ensures all available data at the time of execution gets
+                processed before termination. All uncommitted batches will be processed first.</li>
+
+            <li>Watermark gets advanced per each batch, and no-data batch gets executed before termination
+                if the last batch advances the watermark. This helps to maintain smaller and predictable
+                state size and smaller latency on the output of stateful operators.</li>
+        </ul>
+        NOTE: this trigger will be deactivated when there is any source which does not support Trigger.AvailableNow.
+        Spark will perform one-time micro-batch as a fall-back. Check the above differences for a risk of fallback.
+    </td>
+  </tr>
+  <tr>
+    <td><b>Continuous with fixed checkpoint interval</b><br/><i>(experimental)</i></td>
+    <td>
+        The query will be executed in the new low-latency, continuous processing mode. Read more
+        about this in the <a href="./performance-tips.html#continuous-processing">Continuous Processing section</a> below.
+    </td>
+  </tr>
+</table>
+
+Here are a few code examples.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+
+# Default trigger (runs micro-batch as soon as it can)
+df.writeStream \
+  .format("console") \
+  .start()
+
+# ProcessingTime trigger with two-seconds micro-batch interval
+df.writeStream \
+  .format("console") \
+  .trigger(processingTime='2 seconds') \
+  .start()
+
+# One-time trigger (Deprecated, encouraged to use Available-now trigger)
+df.writeStream \
+  .format("console") \
+  .trigger(once=True) \
+  .start()
+
+# Available-now trigger
+df.writeStream \
+  .format("console") \
+  .trigger(availableNow=True) \
+  .start()
+
+# Continuous trigger with one-second checkpointing interval
+df.writeStream
+  .format("console")
+  .trigger(continuous='1 second')
+  .start()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import org.apache.spark.sql.streaming.Trigger
+
+// Default trigger (runs micro-batch as soon as it can)
+df.writeStream
+  .format("console")
+  .start()
+
+// ProcessingTime trigger with two-seconds micro-batch interval
+df.writeStream
+  .format("console")
+  .trigger(Trigger.ProcessingTime("2 seconds"))
+  .start()
+
+// One-time trigger (Deprecated, encouraged to use Available-now trigger)
+df.writeStream
+  .format("console")
+  .trigger(Trigger.Once())
+  .start()
+
+// Available-now trigger
+df.writeStream
+  .format("console")
+  .trigger(Trigger.AvailableNow())
+  .start()
+
+// Continuous trigger with one-second checkpointing interval
+df.writeStream
+  .format("console")
+  .trigger(Trigger.Continuous("1 second"))
+  .start()
+
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+import org.apache.spark.sql.streaming.Trigger
+
+// Default trigger (runs micro-batch as soon as it can)
+df.writeStream
+  .format("console")
+  .start();
+
+// ProcessingTime trigger with two-seconds micro-batch interval
+df.writeStream
+  .format("console")
+  .trigger(Trigger.ProcessingTime("2 seconds"))
+  .start();
+
+// One-time trigger (Deprecated, encouraged to use Available-now trigger)
+df.writeStream
+  .format("console")
+  .trigger(Trigger.Once())
+  .start();
+
+// Available-now trigger
+df.writeStream
+  .format("console")
+  .trigger(Trigger.AvailableNow())
+  .start();
+
+// Continuous trigger with one-second checkpointing interval
+df.writeStream
+  .format("console")
+  .trigger(Trigger.Continuous("1 second"))
+  .start();
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# Default trigger (runs micro-batch as soon as it can)
+write.stream(df, "console")
+
+# ProcessingTime trigger with two-seconds micro-batch interval
+write.stream(df, "console", trigger.processingTime = "2 seconds")
+
+# One-time trigger
+write.stream(df, "console", trigger.once = TRUE)
+
+# Continuous trigger is not yet supported
+{% endhighlight %}
+</div>
+
+</div>
+
+
+## Managing Streaming Queries
+The `StreamingQuery` object created when a query is started can be used to monitor and manage the query.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+query = df.writeStream.format("console").start()   # get the query object
+
+query.id()          # get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId()       # get the unique id of this run of the query, which will be generated at every start/restart
+
+query.name()        # get the name of the auto-generated or user-specified name
+
+query.explain()   # print detailed explanations of the query
+
+query.stop()      # stop the query
+
+query.awaitTermination()   # block until query is terminated, with stop() or with error
+
+query.exception()       # the exception if the query has been terminated with error
+
+query.recentProgress  # a list of the most recent progress updates for this query
+
+query.lastProgress    # the most recent progress update of this streaming query
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val query = df.writeStream.format("console").start()   // get the query object
+
+query.id          // get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId       // get the unique id of this run of the query, which will be generated at every start/restart
+
+query.name        // get the name of the auto-generated or user-specified name
+
+query.explain()   // print detailed explanations of the query
+
+query.stop()      // stop the query
+
+query.awaitTermination()   // block until query is terminated, with stop() or with error
+
+query.exception       // the exception if the query has been terminated with error
+
+query.recentProgress  // an array of the most recent progress updates for this query
+
+query.lastProgress    // the most recent progress update of this streaming query
+{% endhighlight %}
+
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+StreamingQuery query = df.writeStream().format("console").start();   // get the query object
+
+query.id();          // get the unique identifier of the running query that persists across restarts from checkpoint data
+
+query.runId();       // get the unique id of this run of the query, which will be generated at every start/restart
+
+query.name();        // get the name of the auto-generated or user-specified name
+
+query.explain();   // print detailed explanations of the query
+
+query.stop();      // stop the query
+
+query.awaitTermination();   // block until query is terminated, with stop() or with error
+
+query.exception();       // the exception if the query has been terminated with error
+
+query.recentProgress();  // an array of the most recent progress updates for this query
+
+query.lastProgress();    // the most recent progress update of this streaming query
+
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+query <- write.stream(df, "console")  # get the query object
+
+queryName(query)          # get the name of the auto-generated or user-specified name
+
+explain(query)            # print detailed explanations of the query
+
+stopQuery(query)          # stop the query
+
+awaitTermination(query)   # block until query is terminated, with stop() or with error
+
+lastProgress(query)       # the most recent progress update of this streaming query
+
+{% endhighlight %}
+
+</div>
+
+</div>
+
+You can start any number of queries in a single SparkSession. They will all be running concurrently sharing the cluster resources. You can use `sparkSession.streams()` to get the `StreamingQueryManager`
+([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryManager.html#pyspark.sql.streaming.StreamingQueryManager)/[Scala](/api/scala/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Java](/api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html) docs)
+that can be used to manage the currently active queries.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+spark = ...  # spark session
+
+spark.streams.active  # get the list of currently active streaming queries
+
+spark.streams.get(id)  # get a query object by its unique id
+
+spark.streams.awaitAnyTermination()  # block until any one of them terminates
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val spark: SparkSession = ...
+
+spark.streams.active    // get the list of currently active streaming queries
+
+spark.streams.get(id)   // get a query object by its unique id
+
+spark.streams.awaitAnyTermination()   // block until any one of them terminates
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+SparkSession spark = ...
+
+spark.streams().active();    // get the list of currently active streaming queries
+
+spark.streams().get(id);   // get a query object by its unique id
+
+spark.streams().awaitAnyTermination();   // block until any one of them terminates
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available in R.
+{% endhighlight %}
+
+</div>
+
+</div>
+
+
+## Monitoring Streaming Queries
+There are multiple ways to monitor active streaming queries. You can either push metrics to external systems using Spark's Dropwizard Metrics support, or access them programmatically.
+
+### Reading Metrics Interactively
+
+You can directly get the current status and metrics of an active query using
+`streamingQuery.lastProgress()` and `streamingQuery.status()`.
+`lastProgress()` returns a `StreamingQueryProgress` object
+in [Scala](/api/scala/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
+and [Java](/api/java/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
+and a dictionary with the same fields in Python. It has all the information about
+the progress made in the last trigger of the stream - what data was processed,
+what were the processing rates, latencies, etc. There is also
+`streamingQuery.recentProgress` which returns an array of last few progresses.
+
+In addition, `streamingQuery.status()` returns a `StreamingQueryStatus` object
+in [Scala](/api/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
+and [Java](/api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
+and a dictionary with the same fields in Python. It gives information about
+what the query is immediately doing - is a trigger active, is data being processed, etc.
+
+Here are a few examples.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+query = ...  # a StreamingQuery
+print(query.lastProgress)
+
+'''
+Will print something like the following.
+
+{u'stateOperators': [], u'eventTime': {u'watermark': u'2016-12-14T18:45:24.873Z'}, u'name': u'MyQuery', u'timestamp': u'2016-12-14T18:45:24.873Z', u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'sources': [{u'description': u'KafkaSource[Subscribe[topic-0]]', u'endOffset': {u'topic-0': {u'1': 134, u'0': 534, u'3': 21, u'2': 0, u'4': 115}}, u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'startOffset': {u'topic-0': {u'1': 1, u'0': 1, u'3': 1, u'2': 0, u'4': 1}}}], u'durationMs': {u'getOffset': 2, u'triggerExecution': 3}, u'runId': u'88e2ff94-ede0-45a8-b687-6316fbef529a', u'id': u'ce011fdc-8762-4dcb-84eb-a77333e28109', u'sink': {u'description': u'MemorySink'}}
+'''
+
+print(query.status)
+'''
+Will print something like the following.
+
+{u'message': u'Waiting for data to arrive', u'isTriggerActive': False, u'isDataAvailable': False}
+'''
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val query: StreamingQuery = ...
+
+println(query.lastProgress)
+
+/* Will print something like the following.
+
+{
+  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
+  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
+  "name" : "MyQuery",
+  "timestamp" : "2016-12-14T18:45:24.873Z",
+  "numInputRows" : 10,
+  "inputRowsPerSecond" : 120.0,
+  "processedRowsPerSecond" : 200.0,
+  "durationMs" : {
+    "triggerExecution" : 3,
+    "getOffset" : 2
+  },
+  "eventTime" : {
+    "watermark" : "2016-12-14T18:45:24.873Z"
+  },
+  "stateOperators" : [ ],
+  "sources" : [ {
+    "description" : "KafkaSource[Subscribe[topic-0]]",
+    "startOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 1,
+        "1" : 1,
+        "3" : 1,
+        "0" : 1
+      }
+    },
+    "endOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 115,
+        "1" : 134,
+        "3" : 21,
+        "0" : 534
+      }
+    },
+    "numInputRows" : 10,
+    "inputRowsPerSecond" : 120.0,
+    "processedRowsPerSecond" : 200.0
+  } ],
+  "sink" : {
+    "description" : "MemorySink"
+  }
+}
+*/
+
+
+println(query.status)
+
+/*  Will print something like the following.
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
+*/
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+StreamingQuery query = ...
+
+System.out.println(query.lastProgress());
+/* Will print something like the following.
+
+{
+  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
+  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
+  "name" : "MyQuery",
+  "timestamp" : "2016-12-14T18:45:24.873Z",
+  "numInputRows" : 10,
+  "inputRowsPerSecond" : 120.0,
+  "processedRowsPerSecond" : 200.0,
+  "durationMs" : {
+    "triggerExecution" : 3,
+    "getOffset" : 2
+  },
+  "eventTime" : {
+    "watermark" : "2016-12-14T18:45:24.873Z"
+  },
+  "stateOperators" : [ ],
+  "sources" : [ {
+    "description" : "KafkaSource[Subscribe[topic-0]]",
+    "startOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 1,
+        "1" : 1,
+        "3" : 1,
+        "0" : 1
+      }
+    },
+    "endOffset" : {
+      "topic-0" : {
+        "2" : 0,
+        "4" : 115,
+        "1" : 134,
+        "3" : 21,
+        "0" : 534
+      }
+    },
+    "numInputRows" : 10,
+    "inputRowsPerSecond" : 120.0,
+    "processedRowsPerSecond" : 200.0
+  } ],
+  "sink" : {
+    "description" : "MemorySink"
+  }
+}
+*/
+
+
+System.out.println(query.status());
+/*  Will print something like the following.
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
+*/
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+query <- ...  # a StreamingQuery
+lastProgress(query)
+
+'''
+Will print something like the following.
+
+{
+  "id" : "8c57e1ec-94b5-4c99-b100-f694162df0b9",
+  "runId" : "ae505c5a-a64e-4896-8c28-c7cbaf926f16",
+  "name" : null,
+  "timestamp" : "2017-04-26T08:27:28.835Z",
+  "numInputRows" : 0,
+  "inputRowsPerSecond" : 0.0,
+  "processedRowsPerSecond" : 0.0,
+  "durationMs" : {
+    "getOffset" : 0,
+    "triggerExecution" : 1
+  },
+  "stateOperators" : [ {
+    "numRowsTotal" : 4,
+    "numRowsUpdated" : 0
+  } ],
+  "sources" : [ {
+    "description" : "TextSocketSource[host: localhost, port: 9999]",
+    "startOffset" : 1,
+    "endOffset" : 1,
+    "numInputRows" : 0,
+    "inputRowsPerSecond" : 0.0,
+    "processedRowsPerSecond" : 0.0
+  } ],
+  "sink" : {
+    "description" : "org.apache.spark.sql.execution.streaming.ConsoleSink@76b37531"
+  }
+}
+'''
+
+status(query)
+'''
+Will print something like the following.
+
+{
+  "message" : "Waiting for data to arrive",
+  "isDataAvailable" : false,
+  "isTriggerActive" : false
+}
+'''
+{% endhighlight %}
+
+</div>
+
+</div>
+
+### Reporting Metrics programmatically using Asynchronous APIs
+
+You can also asynchronously monitor all queries associated with a
+`SparkSession` by attaching a `StreamingQueryListener`
+([Python](/api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryListener.html)/[Scala](/api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](/api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
+Once you attach your custom `StreamingQueryListener` object with
+`sparkSession.streams.addListener()`, you will get callbacks when a query is started and
+stopped and when there is progress made in an active query. Here is an example,
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+spark = ...
+
+class Listener(StreamingQueryListener):
+    def onQueryStarted(self, event):
+        print("Query started: " + queryStarted.id)
+
+    def onQueryProgress(self, event):
+        print("Query made progress: " + queryProgress.progress)
+
+    def onQueryTerminated(self, event):
+        print("Query terminated: " + queryTerminated.id)
+
+
+spark.streams.addListener(Listener())
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+val spark: SparkSession = ...
+
+spark.streams.addListener(new StreamingQueryListener() {
+    override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
+        println("Query started: " + queryStarted.id)
+    }
+    override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
+        println("Query terminated: " + queryTerminated.id)
+    }
+    override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
+        println("Query made progress: " + queryProgress.progress)
+    }
+})
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+SparkSession spark = ...
+
+spark.streams().addListener(new StreamingQueryListener() {
+    @Override
+    public void onQueryStarted(QueryStartedEvent queryStarted) {
+        System.out.println("Query started: " + queryStarted.id());
+    }
+    @Override
+    public void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
+        System.out.println("Query terminated: " + queryTerminated.id());
+    }
+    @Override
+    public void onQueryProgress(QueryProgressEvent queryProgress) {
+        System.out.println("Query made progress: " + queryProgress.progress());
+    }
+});
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+Not available in R.
+{% endhighlight %}
+
+</div>
+
+</div>
+
+### Reporting Metrics using Dropwizard
+Spark supports reporting metrics using the [Dropwizard Library](../monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+# or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+spark.conf().set("spark.sql.streaming.metricsEnabled", "true");
+// or
+spark.sql("SET spark.sql.streaming.metricsEnabled=true");
+{% endhighlight %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight r %}
+sql("SET spark.sql.streaming.metricsEnabled=true")
+{% endhighlight %}
+</div>
+
+</div>
+
+
+All queries started in the SparkSession after this configuration has been enabled will report metrics through Dropwizard to whatever [sinks](../monitoring.html#metrics) have been configured (e.g. Ganglia, Graphite, JMX, etc.).
+
+## Recovering from Failures with Checkpointing
+In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write-ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](./getting-started.html#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+aggDF \
+    .writeStream \
+    .outputMode("complete") \
+    .option("checkpointLocation", "path/to/HDFS/dir") \
+    .format("memory") \
+    .start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+aggDF
+  .writeStream
+  .outputMode("complete")
+  .option("checkpointLocation", "path/to/HDFS/dir")
+  .format("memory")
+  .start()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+aggDF
+  .writeStream()
+  .outputMode("complete")
+  .option("checkpointLocation", "path/to/HDFS/dir")
+  .format("memory")
+  .start();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "path/to/HDFS/dir")
+{% endhighlight %}
+
+</div>
+
+</div>
+
+
+## Recovery Semantics after Changes in a Streaming Query
+There are limitations on what changes in a streaming query are allowed between restarts from the
+same checkpoint location. Here are a few kinds of changes that are either not allowed, or
+the effect of the change is not well-defined. For all of them:
+
+- The term *allowed* means you can do the specified change but whether the semantics of its effect
+  is well-defined depends on the query and the change.
+
+- The term *not allowed* means you should not do the specified change as the restarted query is likely
+  to fail with unpredictable errors. `sdf` represents a streaming DataFrame/Dataset
+  generated with sparkSession.readStream.
+
+**Types of changes**
+
+- *Changes in the number or type (i.e. different source) of input sources*: This is not allowed.
+
+- *Changes in the parameters of input sources*: Whether this is allowed and whether the semantics
+  of the change are well-defined depends on the source and the query. Here are a few examples.
+
+  - Addition/deletion/modification of rate limits is allowed: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "topic").option("maxOffsetsPerTrigger", ...)`
+
+  - Changes to subscribed topics/files are generally not allowed as the results are unpredictable: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "newTopic")`
+
+- *Changes in the type of output sink*: Changes between a few specific combinations of sinks
+  are allowed. This needs to be verified on a case-by-case basis. Here are a few examples.
+
+  - File sink to Kafka sink is allowed. Kafka will see only the new data.
+
+  - Kafka sink to file sink is not allowed.
+
+  - Kafka sink changed to foreach, or vice versa is allowed.
+
+- *Changes in the parameters of output sink*: Whether this is allowed and whether the semantics of
+  the change are well-defined depends on the sink and the query. Here are a few examples.
+
+  - Changes to output directory of a file sink are not allowed: `sdf.writeStream.format("parquet").option("path", "/somePath")` to `sdf.writeStream.format("parquet").option("path", "/anotherPath")`
+
+  - Changes to output topic are allowed: `sdf.writeStream.format("kafka").option("topic", "someTopic")` to `sdf.writeStream.format("kafka").option("topic", "anotherTopic")`
+
+  - Changes to the user-defined foreach sink (that is, the `ForeachWriter` code) are allowed, but the semantics of the change depends on the code.
+
+- *Changes in projection / filter / map-like operations*: Some cases are allowed. For example:
+
+  - Addition / deletion of filters is allowed: `sdf.selectExpr("a")` to `sdf.where(...).selectExpr("a").filter(...)`.
+
+  - Changes in projections with same output schema are allowed: `sdf.selectExpr("stringColumn AS json").writeStream` to `sdf.selectExpr("anotherStringColumn AS json").writeStream`
+
+  - Changes in projections with different output schema are conditionally allowed: `sdf.selectExpr("a").writeStream` to `sdf.selectExpr("b").writeStream` is allowed only if the output sink allows the schema change from `"a"` to `"b"`.
+
+- *Changes in stateful operations*: Some operations in streaming queries need to maintain
+  state data in order to continuously update the result. Structured Streaming automatically checkpoints
+  the state data to fault-tolerant storage (for example, HDFS, AWS S3, Azure Blob storage) and restores it after restart.
+  However, this assumes that the schema of the state data remains same across restarts. This means that
+  *any changes (that is, additions, deletions, or schema modifications) to the stateful operations of a streaming query are not allowed between restarts*.
+  Here is the list of stateful operations whose schema should not be changed between restarts in order to ensure state recovery:
+
+  - *Streaming aggregation*: For example, `sdf.groupBy("a").agg(...)`. Any change in number or type of grouping keys or aggregates is not allowed.
+
+  - *Streaming deduplication*: For example, `sdf.dropDuplicates("a")`. Any change in number or type of deduplicating columns is not allowed.
+
+  - *Stream-stream join*: For example, `sdf1.join(sdf2, ...)` (i.e. both inputs are generated with `sparkSession.readStream`). Changes
+    in the schema or equi-joining columns are not allowed. Changes in join type (outer or inner) are not allowed. Other changes in the join condition are ill-defined.
+
+  - *Arbitrary stateful operation*: For example, `sdf.groupByKey(...).mapGroupsWithState(...)` or `sdf.groupByKey(...).flatMapGroupsWithState(...)`.
+    Any change to the schema of the user-defined state and the type of timeout is not allowed.
+    Any change within the user-defined state-mapping function are allowed, but the semantic effect of the change depends on the user-defined logic.
+    If you really want to support state schema changes, then you can explicitly encode/decode your complex state data
+    structures into bytes using an encoding/decoding scheme that supports schema migration. For example,
+    if you save your state as Avro-encoded bytes, then you are free to change the Avro-state-schema between query
+    restarts as the binary state will always be restored successfully.
diff --git a/docs/streaming/getting-started.md b/docs/streaming/getting-started.md
new file mode 100644
index 0000000000000..9b34bcf58baf9
--- /dev/null
+++ b/docs/streaming/getting-started.md
@@ -0,0 +1,509 @@
+---
+layout: global
+displayTitle: Structured Streaming Programming Guide
+title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+# Quick Example
+Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
+[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py)/[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[R]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming/structured_network_wordcount.R).
+And if you [download Spark](https://spark.apache.org/downloads.html), you can directly [run the example](../index.html#running-the-examples-and-shell). In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+from pyspark.sql import SparkSession
+from pyspark.sql.functions import explode
+from pyspark.sql.functions import split
+
+spark = SparkSession \
+    .builder \
+    .appName("StructuredNetworkWordCount") \
+    .getOrCreate()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.SparkSession
+
+val spark = SparkSession
+  .builder
+  .appName("StructuredNetworkWordCount")
+  .getOrCreate()
+
+import spark.implicits._
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+import org.apache.spark.api.java.function.FlatMapFunction;
+import org.apache.spark.sql.*;
+import org.apache.spark.sql.streaming.StreamingQuery;
+
+import java.util.Arrays;
+import java.util.Iterator;
+
+SparkSession spark = SparkSession
+  .builder()
+  .appName("JavaStructuredNetworkWordCount")
+  .getOrCreate();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+sparkR.session(appName = "StructuredNetworkWordCount")
+{% endhighlight %}
+
+</div>
+</div>
+
+Next, let’s create a streaming DataFrame that represents text data received from a server listening on localhost:9999, and transform the DataFrame to calculate word counts.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+# Create DataFrame representing the stream of input lines from connection to localhost:9999
+lines = spark \
+    .readStream \
+    .format("socket") \
+    .option("host", "localhost") \
+    .option("port", 9999) \
+    .load()
+
+# Split the lines into words
+words = lines.select(
+   explode(
+       split(lines.value, " ")
+   ).alias("word")
+)
+
+# Generate running word count
+wordCounts = words.groupBy("word").count()
+{% endhighlight %}
+
+This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have used two built-in SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we use the function `alias` to name the new column as "word". Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+// Create DataFrame representing the stream of input lines from connection to localhost:9999
+val lines = spark.readStream
+  .format("socket")
+  .option("host", "localhost")
+  .option("port", 9999)
+  .load()
+
+// Split the lines into words
+val words = lines.as[String].flatMap(_.split(" "))
+
+// Generate running word count
+val wordCounts = words.groupBy("value").count()
+{% endhighlight %}
+
+This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have converted the DataFrame to a  Dataset of String using `.as[String]`, so that we can apply the `flatMap` operation to split each line into multiple words. The resultant `words` Dataset contains all the words. Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+// Create DataFrame representing the stream of input lines from connection to localhost:9999
+Dataset<Row> lines = spark
+  .readStream()
+  .format("socket")
+  .option("host", "localhost")
+  .option("port", 9999)
+  .load();
+
+// Split the lines into words
+Dataset<String> words = lines
+  .as(Encoders.STRING())
+  .flatMap((FlatMapFunction<String, String>) x -> Arrays.asList(x.split(" ")).iterator(), Encoders.STRING());
+
+// Generate running word count
+Dataset<Row> wordCounts = words.groupBy("value").count();
+{% endhighlight %}
+
+This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have converted the DataFrame to a  Dataset of String using `.as(Encoders.STRING())`, so that we can apply the `flatMap` operation to split each line into multiple words. The resultant `words` Dataset contains all the words. Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# Create DataFrame representing the stream of input lines from connection to localhost:9999
+lines <- read.stream("socket", host = "localhost", port = 9999)
+
+# Split the lines into words
+words <- selectExpr(lines, "explode(split(value, ' ')) as word")
+
+# Generate running word count
+wordCounts <- count(group_by(words, "word"))
+{% endhighlight %}
+
+This `lines` SparkDataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have a SQL expression with two SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we name the new column as "word". Finally, we have defined the `wordCounts` SparkDataFrame by grouping by the unique values in the SparkDataFrame and counting them. Note that this is a streaming SparkDataFrame which represents the running word counts of the stream.
+
+</div>
+
+</div>
+
+We have now set up the query on the streaming data. All that is left is to actually start receiving data and computing the counts. To do this, we set it up to print the complete set of counts (specified by `outputMode("complete")`) to the console every time they are updated. And then start the streaming computation using `start()`.
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+
+{% highlight python %}
+ # Start running the query that prints the running counts to the console
+query = wordCounts \
+    .writeStream \
+    .outputMode("complete") \
+    .format("console") \
+    .start()
+
+query.awaitTermination()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="scala"  markdown="1">
+
+{% highlight scala %}
+// Start running the query that prints the running counts to the console
+val query = wordCounts.writeStream
+  .outputMode("complete")
+  .format("console")
+  .start()
+
+query.awaitTermination()
+{% endhighlight %}
+
+</div>
+
+<div data-lang="java"  markdown="1">
+
+{% highlight java %}
+// Start running the query that prints the running counts to the console
+StreamingQuery query = wordCounts.writeStream()
+  .outputMode("complete")
+  .format("console")
+  .start();
+
+query.awaitTermination();
+{% endhighlight %}
+
+</div>
+
+<div data-lang="r"  markdown="1">
+
+{% highlight r %}
+# Start running the query that prints the running counts to the console
+query <- write.stream(wordCounts, "console", outputMode = "complete")
+
+awaitTermination(query)
+{% endhighlight %}
+
+</div>
+
+</div>
+
+After this code is executed, the streaming computation will have started in the background. The `query` object is a handle to that active streaming query, and we have decided to wait for the termination of the query using `awaitTermination()` to prevent the process from exiting while the query is active.
+
+To actually execute this example code, you can either compile the code in your own
+[Spark application](../quick-start.html#self-contained-applications), or simply
+[run the example](../index.html#running-the-examples-and-shell) once you have downloaded Spark. We are showing the latter. You will first need to run Netcat (a small utility found in most Unix-like systems) as a data server by using
+
+
+    $ nc -lk 9999
+
+Then, in a different terminal, you can start the example by using
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight bash %}
+$ ./bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py localhost 9999
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight bash %}
+$ ./bin/run-example org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount localhost 9999
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight bash %}
+$ ./bin/run-example org.apache.spark.examples.sql.streaming.JavaStructuredNetworkWordCount localhost 9999
+{% endhighlight %}
+</div>
+
+<div data-lang="r"  markdown="1">
+{% highlight bash %}
+$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+{% endhighlight %}
+</div>
+
+</div>
+
+Then, any lines typed in the terminal running the netcat server will be counted and printed on screen every second. It will look something like the following.
+
+<table width="100%">
+    <td>
+{% highlight bash %}
+# TERMINAL 1:
+# Running Netcat
+
+$ nc -lk 9999
+apache spark
+apache hadoop
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+...
+{% endhighlight %}
+    </td>
+    <td width="2%"></td>
+    <td>
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING structured_network_wordcount.py
+
+$ ./bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py localhost 9999
+
+-------------------------------------------
+Batch: 0
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    1|
+| spark|    1|
++------+-----+
+
+-------------------------------------------
+Batch: 1
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    2|
+| spark|    1|
+|hadoop|    1|
++------+-----+
+...
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING StructuredNetworkWordCount
+
+$ ./bin/run-example org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount localhost 9999
+
+-------------------------------------------
+Batch: 0
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    1|
+| spark|    1|
++------+-----+
+
+-------------------------------------------
+Batch: 1
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    2|
+| spark|    1|
+|hadoop|    1|
++------+-----+
+...
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING JavaStructuredNetworkWordCount
+
+$ ./bin/run-example org.apache.spark.examples.sql.streaming.JavaStructuredNetworkWordCount localhost 9999
+
+-------------------------------------------
+Batch: 0
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    1|
+| spark|    1|
++------+-----+
+
+-------------------------------------------
+Batch: 1
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    2|
+| spark|    1|
+|hadoop|    1|
++------+-----+
+...
+{% endhighlight %}
+</div>
+
+<div data-lang="r" markdown="1">
+{% highlight bash %}
+# TERMINAL 2: RUNNING structured_network_wordcount.R
+
+$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
+
+-------------------------------------------
+Batch: 0
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    1|
+| spark|    1|
++------+-----+
+
+-------------------------------------------
+Batch: 1
+-------------------------------------------
++------+-----+
+| value|count|
++------+-----+
+|apache|    2|
+| spark|    1|
+|hadoop|    1|
++------+-----+
+...
+{% endhighlight %}
+</div>
+</div>
+    </td>
+</table>
+
+
+# Programming Model
+
+The key idea in Structured Streaming is to treat a live data stream as a
+table that is being continuously appended. This leads to a new stream
+processing model that is very similar to a batch processing model. You will
+express your streaming computation as standard batch-like query as on a static
+table, and Spark runs it as an *incremental* query on the *unbounded* input
+table. Let’s understand this model in more detail.
+
+## Basic Concepts
+
+Consider the input data stream as the "Input Table". Every data item that is
+arriving on the stream is like a new row being appended to the Input Table.
+
+![Stream as a Table](../img/structured-streaming-stream-as-a-table.png "Stream as a Table")
+
+A query on the input will generate the "Result Table". Every trigger interval (say, every 1 second), new rows get appended to the Input Table, which eventually updates the Result Table. Whenever the result table gets updated, we would want to write the changed result rows to an external sink.
+
+![Model](../img/structured-streaming-model.png)
+
+The "Output" is defined as what gets written out to the external storage. The output can be defined in a different mode:
+
+  - *Complete Mode* - The entire updated Result Table will be written to the external storage. It is up to the storage connector to decide how to handle writing of the entire table.
+
+  - *Append Mode* - Only the new rows appended in the Result Table since the last trigger will be written to the external storage. This is applicable only on the queries where existing rows in the Result Table are not expected to change.
+
+  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger. If the query doesn't contain aggregations, it will be equivalent to Append mode.
+
+Note that each mode is applicable on certain types of queries. This is discussed in detail [later](./apis-on-dataframes-and-datasets.html#output-modes).
+
+To illustrate the use of this model, let’s understand the model in context of
+the [Quick Example](#quick-example) above. The first `lines` DataFrame is the input table, and
+the final `wordCounts` DataFrame is the result table. Note that the query on
+streaming `lines` DataFrame to generate `wordCounts` is *exactly the same* as
+it would be a static DataFrame. However, when this query is started, Spark
+will continuously check for new data from the socket connection. If there is
+new data, Spark will run an "incremental" query that combines the previous
+running counts with the new data to compute updated counts, as shown below.
+
+![Model](../img/structured-streaming-example-model.png)
+
+**Note that Structured Streaming does not materialize the entire table**. It reads the latest
+available data from the streaming data source, processes it incrementally to update the result,
+and then discards the source data. It only keeps around the minimal intermediate *state* data as
+required to update the result (e.g. intermediate counts in the earlier example).
+
+This model is significantly different from many other stream processing
+engines. Many streaming systems require the user to maintain running
+aggregations themselves, thus having to reason about fault-tolerance, and
+data consistency (at-least-once, or at-most-once, or exactly-once). In this
+model, Spark is responsible for updating the Result Table when there is new
+data, thus relieving the users from reasoning about it. As an example, let’s
+see how this model handles event-time based processing and late arriving data.
+
+## Handling Event-time and Late Data
+Event-time is the time embedded in the data itself. For many applications, you may want to operate on this event-time. For example, if you want to get the number of events generated by IoT devices every minute, then you probably want to use the time when the data was generated (that is, event-time in the data), rather than the time Spark receives them. This event-time is very naturally expressed in this model -- each event from the devices is a row in the table, and event-time is a column value in the row. This allows window-based aggregations (e.g. number of events every minute) to be just a special type of grouping and aggregation on the event-time column -- each time window is a group and each row can belong to multiple windows/groups. Therefore, such event-time-window-based aggregation queries can be defined consistently on both a static dataset (e.g. from collected device events logs) as well as on a data stream, making the life of the user much easier.
+
+Furthermore, this model naturally handles data that has arrived later than
+expected based on its event-time. Since Spark is updating the Result Table,
+it has full control over updating old aggregates when there is late data,
+as well as cleaning up old aggregates to limit the size of intermediate
+state data. Since Spark 2.1, we have support for watermarking which
+allows the user to specify the threshold of late data, and allows the engine
+to accordingly clean up old state. These are explained later in more
+detail in the [Window Operations](./apis-on-dataframes-and-datasets.html#window-operations-on-event-time) section.
+
+## Fault Tolerance Semantics
+Delivering end-to-end exactly-once semantics was one of key goals behind the design of Structured Streaming. To achieve that, we have designed the Structured Streaming sources, the sinks and the execution engine to reliably track the exact progress of the processing so that it can handle any kind of failure by restarting and/or reprocessing. Every streaming source is assumed to have offsets (similar to Kafka offsets, or Kinesis sequence numbers)
+to track the read position in the stream. The engine uses checkpointing and write-ahead logs to record the offset range of the data being processed in each trigger. The streaming sinks are designed to be idempotent for handling reprocessing. Together, using replayable sources and idempotent sinks, Structured Streaming can ensure **end-to-end exactly-once semantics** under any failure.
\ No newline at end of file
diff --git a/docs/streaming/index.md b/docs/streaming/index.md
new file mode 100644
index 0000000000000..046d8317218a6
--- /dev/null
+++ b/docs/streaming/index.md
@@ -0,0 +1,28 @@
+---
+layout: global
+displayTitle: Structured Streaming Programming Guide
+title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+# Overview
+
+Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](../sql-programming-guide.html) in Scala, Java, Python or R to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write-Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
+
+Internally, by default, Structured Streaming queries are processed using a *micro-batch processing* engine, which processes data streams as a series of small batch jobs thereby achieving end-to-end latencies as low as 100 milliseconds and exactly-once fault-tolerance guarantees. However, since Spark 2.3, we have introduced a new low-latency processing mode called **Continuous Processing**, which can achieve end-to-end latencies as low as 1 millisecond with at-least-once guarantees. Without changing the Dataset/DataFrame operations in your queries, you will be able to choose the mode based on your application requirements.
+
+In this guide, we are going to walk you through the programming model and the APIs. We are going to explain the concepts mostly using the default micro-batch processing model, and then [later](./performance-tips.html#continuous-processing) discuss Continuous Processing model. First, let's start with a simple example of a Structured Streaming query - a streaming word count.
\ No newline at end of file
diff --git a/docs/streaming/performance-tips.md b/docs/streaming/performance-tips.md
new file mode 100644
index 0000000000000..25b29ee7097be
--- /dev/null
+++ b/docs/streaming/performance-tips.md
@@ -0,0 +1,174 @@
+---
+layout: global
+displayTitle: Structured Streaming Programming Guide
+title: Structured Streaming Programming Guide
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+* Table of contents
+
+# Asynchronous Progress Tracking
+## What is it?
+
+Asynchronous progress tracking allows streaming queries to checkpoint progress asynchronously and in parallel to the actual data processing within a micro-batch, reducing latency associated with maintaining the offset log and commit log.
+
+![Async Progress Tracking](../img/async-progress.png)
+
+## How does it work?
+
+Structured Streaming relies on persisting and managing offsets as progress indicators for query processing. Offset management operation directly impacts processing latency, because no data processing can occur until these operations are complete. Asynchronous progress tracking enables streaming queries to checkpoint progress without being impacted by these offset management operations.
+
+## How to use it?
+
+The code snippet below provides an example of how to use this feature:
+```scala
+val stream = spark.readStream
+      .format("kafka")
+      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+      .option("subscribe", "in")
+      .load()
+val query = stream.writeStream
+     .format("kafka")
+	.option("topic", "out")
+     .option("checkpointLocation", "/tmp/checkpoint")
+	.option("asyncProgressTrackingEnabled", "true")
+     .start()
+```
+
+The table below describes the configurations for this feature and default values associated with them.
+
+| Option    | Value           | Default | Description       |
+|-------------|-----------------|------------|---------------------|
+|asyncProgressTrackingEnabled|true/false|false|enable or disable asynchronous progress tracking|
+|asyncProgressTrackingCheckpointIntervalMs|millisecond|1000|the interval in which we commit offsets and completion commits|
+
+## Limitations
+The initial version of the feature has the following limitations:
+
+* Asynchronous progress tracking is only supported in stateless queries using Kafka Sink
+* Exactly once end-to-end processing will not be supported with this asynchronous progress tracking because offset ranges for batch can be changed in case of failure. Though many sinks, such as Kafka sink, do not support writing exactly once anyways.
+
+## Switching the setting off
+Turning the async progress tracking off may cause the following exception to be thrown
+
+```scala
+java.lang.IllegalStateException: batch x doesn't exist
+```
+
+Also the following error message may be printed in the driver logs:
+
+```
+The offset log for batch x doesn't exist, which is required to restart the query from the latest batch x from the offset log. Please ensure there are two subsequent offset logs available for the latest batch via manually deleting the offset file(s). Please also ensure the latest batch for commit log is equal or one batch earlier than the latest batch for offset log.
+```
+
+This is caused by the fact that when async progress tracking is enabled, the framework will not checkpoint progress for every batch as would be done if async progress tracking is not used. To solve this problem simply re-enable “asyncProgressTrackingEnabled” and set “asyncProgressTrackingCheckpointIntervalMs” to 0 and run the streaming query until at least two micro-batches have been processed. Async progress tracking can be now safely disabled and restarting query should proceed normally.
+
+# Continuous Processing
+## [Experimental]
+
+**Continuous processing** is a new, experimental streaming execution mode introduced in Spark 2.3 that enables low (~1 ms) end-to-end latency with at-least-once fault-tolerance guarantees. Compare this with the default *micro-batch processing* engine which can achieve exactly-once guarantees but achieve latencies of ~100ms at best. For some types of queries (discussed below), you can choose which mode to execute them in without modifying the application logic (i.e. without changing the DataFrame/Dataset operations).
+
+To run a supported query in continuous processing mode, all you need to do is specify a **continuous trigger** with the desired checkpoint interval as a parameter. For example,
+
+<div class="codetabs">
+
+<div data-lang="python"  markdown="1">
+{% highlight python %}
+spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .load() \
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .writeStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("topic", "topic1") \
+  .trigger(continuous="1 second") \     # only change in query
+  .start()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala"  markdown="1">
+{% highlight scala %}
+import org.apache.spark.sql.streaming.Trigger
+
+spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .trigger(Trigger.Continuous("1 second"))  // only change in query
+  .start()
+{% endhighlight %}
+</div>
+
+<div data-lang="java"  markdown="1">
+{% highlight java %}
+import org.apache.spark.sql.streaming.Trigger;
+
+spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .trigger(Trigger.Continuous("1 second"))  // only change in query
+  .start();
+{% endhighlight %}
+</div>
+
+</div>
+
+A checkpoint interval of 1 second means that the continuous processing engine will record the progress of the query every second. The resulting checkpoints are in a format compatible with the micro-batch engine, hence any query can be restarted with any trigger. For example, a supported query started with the micro-batch mode can be restarted in continuous mode, and vice versa. Note that any time you switch to continuous mode, you will get at-least-once fault-tolerance guarantees.
+
+## Supported Queries
+
+As of Spark 2.4, only the following type of queries are supported in the continuous processing mode.
+
+- *Operations*: Only map-like Dataset/DataFrame operations are supported in continuous mode, that is, only projections (`select`, `map`, `flatMap`, `mapPartitions`, etc.) and selections (`where`, `filter`, etc.).
+  + All SQL functions are supported except aggregation functions (since aggregations are not yet supported), `current_timestamp()` and `current_date()` (deterministic computations using time is challenging).
+
+- *Sources*:
+  + Kafka source: All options are supported.
+  + Rate source: Good for testing. Only options that are supported in the continuous mode are `numPartitions` and `rowsPerSecond`.
+
+- *Sinks*:
+  + Kafka sink: All options are supported.
+  + Memory sink: Good for debugging.
+  + Console sink: Good for debugging. All options are supported. Note that the console will print every checkpoint interval that you have specified in the continuous trigger.
+
+See [Input Sources](./apis-on-dataframes-and-datasets.html#input-sources) and [Output Sinks](./apis-on-dataframes-and-datasets.html#output-sinks) sections for more details on them. While the console sink is good for testing, the end-to-end low-latency processing can be best observed with Kafka as the source and sink, as this allows the engine to process the data and make the results available in the output topic within milliseconds of the input data being available in the input topic.
+
+## Caveats
+
+- Continuous processing engine launches multiple long-running tasks that continuously read data from sources, process it and continuously write to sinks. The number of tasks required by the query depends on how many partitions the query can read from the sources in parallel. Therefore, before starting a continuous processing query, you must ensure there are enough cores in the cluster to all the tasks in parallel. For example, if you are reading from a Kafka topic that has 10 partitions, then the cluster must have at least 10 cores for the query to make progress.
+- Stopping a continuous processing stream may produce spurious task termination warnings. These can be safely ignored.
+- There are currently no automatic retries of failed tasks. Any failure will lead to the query being stopped and it needs to be manually restarted from the checkpoint.
\ No newline at end of file
diff --git a/docs/streaming/ss-migration-guide.md b/docs/streaming/ss-migration-guide.md
new file mode 100644
index 0000000000000..a1c7dc11db757
--- /dev/null
+++ b/docs/streaming/ss-migration-guide.md
@@ -0,0 +1,56 @@
+---
+layout: global
+title: "Migration Guide: Structured Streaming"
+displayTitle: "Migration Guide: Structured Streaming"
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+ 
+     http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+Note that this migration guide describes the items specific to Structured Streaming.
+Many items of SQL migration can be applied when migrating Structured Streaming to higher versions.
+Please refer [Migration Guide: SQL, Datasets and DataFrame](../sql-migration-guide.html).
+
+## Upgrading from Structured Streaming 3.5 to 4.0
+
+- Since Spark 4.0, Spark falls back to single batch execution if any source in the query does not support `Trigger.AvailableNow`. This is to avoid any possible correctness, duplication, and dataloss issue due to incompatibility between source and wrapper implementation. (See [SPARK-45178](https://issues.apache.org/jira/browse/SPARK-45178) for more details.)
+- Since Spark 4.0, new configuration `spark.sql.streaming.ratioExtraSpaceAllowedInCheckpoint` (default: `0.3`) controls the amount of additional space allowed in the checkpoint directory to store stale version files for batch deletion inside maintenance task. This is to amortize the cost of listing in cloud store. Setting this to `0` defaults to the old behavior. (See [SPARK-48931](https://issues.apache.org/jira/browse/SPARK-48931) for more details.)
+
+## Upgrading from Structured Streaming 3.3 to 3.4
+
+- Since Spark 3.4, `Trigger.Once` is deprecated, and users are encouraged to migrate from `Trigger.Once` to `Trigger.AvailableNow`. Please refer [SPARK-39805](https://issues.apache.org/jira/browse/SPARK-39805) for more details.
+
+- Since Spark 3.4, the default value of configuration for Kafka offset fetching (`spark.sql.streaming.kafka.useDeprecatedOffsetFetching`) is changed from `true` to `false`. The default no longer relies consumer group based scheduling, which affect the required ACL. For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
+
+## Upgrading from Structured Streaming 3.2 to 3.3
+
+- Since Spark 3.3, all stateful operators require hash partitioning with exact grouping keys. In previous versions, all stateful operators except stream-stream join require loose partitioning criteria which opens the possibility on correctness issue. (See [SPARK-38204](https://issues.apache.org/jira/browse/SPARK-38204) for more details.) To ensure backward compatibility, we retain the old behavior with the checkpoint built from older versions.
+
+## Upgrading from Structured Streaming 3.0 to 3.1
+
+- In Spark 3.0 and before, for the queries that have stateful operation which can emit rows older than the current watermark plus allowed late record delay, which are "late rows" in downstream stateful operations and these rows can be discarded, Spark only prints a warning message. Since Spark 3.1, Spark will check for such queries with possible correctness issue and throw AnalysisException for it by default. For the users who understand the possible risk of correctness issue and still decide to run the query, please disable this check by setting the config `spark.sql.streaming.statefulOperator.checkCorrectness.enabled` to false.
+
+- In Spark 3.0 and before Spark uses `KafkaConsumer` for offset fetching which could cause infinite wait in the driver.
+  In Spark 3.1 a new configuration option added `spark.sql.streaming.kafka.useDeprecatedOffsetFetching` (default: `true`)
+  which could be set to `false` allowing Spark to use new offset fetching mechanism using `AdminClient`.
+  For further details please see [Structured Streaming Kafka Integration](structured-streaming-kafka-integration.html#offset-fetching).
+
+## Upgrading from Structured Streaming 2.4 to 3.0
+
+- In Spark 3.0, Structured Streaming forces the source schema into nullable when file-based datasources such as text, json, csv, parquet and orc are used via `spark.readStream(...)`. Previously, it respected the nullability in source schema; however, it caused issues tricky to debug with NPE. To restore the previous behavior, set `spark.sql.streaming.fileSource.schema.forceNullable` to `false`.
+
+- Spark 3.0 fixes the correctness issue on Stream-stream outer join, which changes the schema of state. (See [SPARK-26154](https://issues.apache.org/jira/browse/SPARK-26154) for more details). If you start your query from checkpoint constructed from Spark 2.x which uses stream-stream outer join, Spark 3.0 fails the query. To recalculate outputs, discard the checkpoint and replay previous inputs.
+
+- In Spark 3.0, the deprecated class `org.apache.spark.sql.streaming.ProcessingTime` has been removed. Use `org.apache.spark.sql.streaming.Trigger.ProcessingTime` instead. Likewise, `org.apache.spark.sql.execution.streaming.continuous.ContinuousTrigger` has been removed in favor of `Trigger.Continuous`, and `org.apache.spark.sql.execution.streaming.OneTimeTrigger` has been hidden in favor of `Trigger.Once`.
diff --git a/docs/streaming/structured-streaming-kafka-integration.md b/docs/streaming/structured-streaming-kafka-integration.md
new file mode 100644
index 0000000000000..a8f2bcdeb9bc1
--- /dev/null
+++ b/docs/streaming/structured-streaming-kafka-integration.md
@@ -0,0 +1,1187 @@
+---
+layout: global
+title: Structured Streaming + Kafka Integration Guide (Kafka broker version 0.10.0 or higher)
+license: |
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+---
+
+Structured Streaming integration for Kafka 0.10 to read data from and write data to Kafka.
+
+## Linking
+For Scala/Java applications using SBT/Maven project definitions, link your application with the following artifact:
+
+    groupId = org.apache.spark
+    artifactId = spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}
+    version = {{site.SPARK_VERSION_SHORT}}
+
+Please note that to use the headers functionality, your Kafka client version should be version 0.11.0.0 or up.
+
+For Python applications, you need to add this above library and its dependencies when deploying your
+application. See the [Deploying](#deploying) subsection below.
+
+For experimenting on `spark-shell`, you need to add this above library and its dependencies too when invoking `spark-shell`. Also, see the [Deploying](#deploying) subsection below.
+
+## Reading Data from Kafka
+
+### Creating a Kafka Source for Streaming Queries
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+# Subscribe to 1 topic
+df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to 1 topic, with headers
+df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .option("includeHeaders", "true") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+
+# Subscribe to multiple topics
+df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1,topic2") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to a pattern
+df = spark \
+  .readStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribePattern", "topic.*") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Subscribe to 1 topic
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to 1 topic, with headers
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
+  .as[(String, String, Array[(String, Array[Byte])])]
+
+// Subscribe to multiple topics
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to a pattern
+val df = spark
+  .readStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Subscribe to 1 topic
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to 1 topic, with headers
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .option("includeHeaders", "true")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers");
+
+// Subscribe to multiple topics
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to a pattern
+Dataset<Row> df = spark
+  .readStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+{% endhighlight %}
+</div>
+
+</div>
+
+### Creating a Kafka Source for Batch Queries
+If you have a use case that is better suited to batch processing,
+you can create a Dataset/DataFrame for a defined range of offsets.
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+# Subscribe to 1 topic defaults to the earliest and latest offsets
+df = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to multiple topics, specifying explicit Kafka offsets
+df = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribe", "topic1,topic2") \
+  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""") \
+  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+
+# Subscribe to a pattern, at the earliest and latest offsets
+df = spark \
+  .read \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("subscribePattern", "topic.*") \
+  .option("startingOffsets", "earliest") \
+  .option("endingOffsets", "latest") \
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Subscribe to 1 topic defaults to the earliest and latest offsets
+val df = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to multiple topics, specifying explicit Kafka offsets
+val df = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""")
+  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+// Subscribe to a pattern, at the earliest and latest offsets
+val df = spark
+  .read
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .option("startingOffsets", "earliest")
+  .option("endingOffsets", "latest")
+  .load()
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .as[(String, String)]
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Subscribe to 1 topic defaults to the earliest and latest offsets
+Dataset<Row> df = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to multiple topics, specifying explicit Kafka offsets
+Dataset<Row> df = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribe", "topic1,topic2")
+  .option("startingOffsets", "{\"topic1\":{\"0\":23,\"1\":-2},\"topic2\":{\"0\":-2}}")
+  .option("endingOffsets", "{\"topic1\":{\"0\":50,\"1\":-1},\"topic2\":{\"0\":-1}}")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+// Subscribe to a pattern, at the earliest and latest offsets
+Dataset<Row> df = spark
+  .read()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("subscribePattern", "topic.*")
+  .option("startingOffsets", "earliest")
+  .option("endingOffsets", "latest")
+  .load();
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
+
+{% endhighlight %}
+</div>
+
+</div>
+
+Each row in the source has the following schema:
+<table>
+<thead><tr><th>Column</th><th>Type</th></tr></thead>
+<tr>
+  <td>key</td>
+  <td>binary</td>
+</tr>
+<tr>
+  <td>value</td>
+  <td>binary</td>
+</tr>
+<tr>
+  <td>topic</td>
+  <td>string</td>
+</tr>
+<tr>
+  <td>partition</td>
+  <td>int</td>
+</tr>
+<tr>
+  <td>offset</td>
+  <td>long</td>
+</tr>
+<tr>
+  <td>timestamp</td>
+  <td>timestamp</td>
+</tr>
+<tr>
+  <td>timestampType</td>
+  <td>int</td>
+</tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
+</table>
+
+The following options must be set for the Kafka source
+for both batch and streaming queries.
+
+<table>
+<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
+<tr>
+  <td>assign</td>
+  <td>json string {"topicA":[0,1],"topicB":[2,4]}</td>
+  <td>Specific TopicPartitions to consume.
+  Only one of "assign", "subscribe" or "subscribePattern"
+  options can be specified for Kafka source.</td>
+</tr>
+<tr>
+  <td>subscribe</td>
+  <td>A comma-separated list of topics</td>
+  <td>The topic list to subscribe.
+  Only one of "assign", "subscribe" or "subscribePattern"
+  options can be specified for Kafka source.</td>
+</tr>
+<tr>
+  <td>subscribePattern</td>
+  <td>Java regex string</td>
+  <td>The pattern used to subscribe to topic(s).
+  Only one of "assign, "subscribe" or "subscribePattern"
+  options can be specified for Kafka source.</td>
+</tr>
+<tr>
+  <td>kafka.bootstrap.servers</td>
+  <td>A comma-separated list of host:port</td>
+  <td>The Kafka "bootstrap.servers" configuration.</td>
+</tr>
+</table>
+
+The following configurations are optional:
+
+<table>
+<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
+<tr>
+  <td>startingTimestamp</td>
+  <td>timestamp string e.g. "1000"</td>
+  <td>none (next preference is <code>startingOffsetsByTimestamp</code>)</td>
+  <td>streaming and batch</td>
+  <td>The start point of timestamp when a query is started, a string specifying a starting timestamp for
+  all partitions in topics being subscribed. Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
+  the behavior will follow to the value of the option <code>startingOffsetsByTimestampStrategy</code><p/>
+  <p/>
+  Note1: <code>startingTimestamp</code> takes precedence over <code>startingOffsetsByTimestamp</code> and <code>startingOffsets</code>.<p/>
+  Note2: For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
+  earliest.</td>
+</tr>
+<tr>
+  <td>startingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>none (next preference is <code>startingOffsets</code>)</td>
+  <td>streaming and batch</td>
+  <td>The start point of timestamp when a query is started, a json string specifying a starting timestamp for
+  each TopicPartition. Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
+  the behavior will follow to the value of the option <code>startingOffsetsByTimestampStrategy</code><p/>
+  <p/>
+  Note1: <code>startingOffsetsByTimestamp</code> takes precedence over <code>startingOffsets</code>.<p/>
+  Note2: For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
+  earliest.</td>
+</tr>
+<tr>
+  <td>startingOffsets</td>
+  <td>"earliest", "latest" (streaming only), or json string
+  """ {"topicA":{"0":23,"1":-1},"topicB":{"0":-2}} """
+  </td>
+  <td>"latest" for streaming, "earliest" for batch</td>
+  <td>streaming and batch</td>
+  <td>The start point when a query is started, either "earliest" which is from the earliest offsets,
+  "latest" which is just from the latest offsets, or a json string specifying a starting offset for
+  each TopicPartition.  In the json, -2 as an offset can be used to refer to earliest, -1 to latest.
+  Note: For batch queries, latest (either implicitly or by using -1 in json) is not allowed.
+  For streaming queries, this only applies when a new query is started, and that resuming will
+  always pick up from where the query left off. Newly discovered partitions during a query will start at
+  earliest.</td>
+</tr>
+<tr>
+  <td>endingTimestamp</td>
+  <td>timestamp string e.g. "1000"</td>
+  <td>none (next preference is <code>endingOffsetsByTimestamp</code>)</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, a json string specifying an ending timestamp for
+  all partitions in topics being subscribed. Please refer the details on timestamp offset options below.
+  If Kafka doesn't return the matched offset, the offset will be set to latest.<p/>
+  Note: <code>endingTimestamp</code> takes precedence over <code>endingOffsetsByTimestamp</code> and <code>endingOffsets</code>.<p/>
+  </td>
+</tr>
+<tr>
+  <td>endingOffsetsByTimestamp</td>
+  <td>json string
+  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
+  </td>
+  <td>none (next preference is <code>endingOffsets</code>)</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, a json string specifying an ending timestamp for each TopicPartition.
+  Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
+  the offset will be set to latest.<p/>
+  Note: <code>endingOffsetsByTimestamp</code> takes precedence over <code>endingOffsets</code>.
+  </td>
+</tr>
+<tr>
+  <td>endingOffsets</td>
+  <td>latest or json string
+  {"topicA":{"0":23,"1":-1},"topicB":{"0":-1}}
+  </td>
+  <td>latest</td>
+  <td>batch query</td>
+  <td>The end point when a batch query is ended, either "latest" which is just referred to the
+  latest, or a json string specifying an ending offset for each TopicPartition.  In the json, -1
+  as an offset can be used to refer to latest, and -2 (earliest) as an offset is not allowed.</td>
+</tr>
+<tr>
+  <td>failOnDataLoss</td>
+  <td>true or false</td>
+  <td>true</td>
+  <td>streaming and batch</td>
+  <td>Whether to fail the query when it's possible that data is lost (e.g., topics are deleted, or
+  offsets are out of range). This may be a false alarm. You can disable it when it doesn't work
+  as you expected.</td>
+</tr>
+<tr>
+  <td>kafkaConsumer.pollTimeoutMs</td>
+  <td>long</td>
+  <td>120000</td>
+  <td>streaming and batch</td>
+  <td>The timeout in milliseconds to poll data from Kafka in executors. When not defined it falls
+  back to <code>spark.network.timeout</code>.</td>
+</tr>
+<tr>
+  <td>fetchOffset.numRetries</td>
+  <td>int</td>
+  <td>3</td>
+  <td>streaming and batch</td>
+  <td>Number of times to retry before giving up fetching Kafka offsets.</td>
+</tr>
+<tr>
+  <td>fetchOffset.retryIntervalMs</td>
+  <td>long</td>
+  <td>10</td>
+  <td>streaming and batch</td>
+  <td>milliseconds to wait before retrying to fetch Kafka offsets</td>
+</tr>
+<tr>
+  <td>maxOffsetsPerTrigger</td>
+  <td>long</td>
+  <td>none</td>
+  <td>streaming query</td>
+  <td>Rate limit on maximum number of offsets processed per trigger interval. The specified total number of offsets will be proportionally split across topicPartitions of different volume.</td>
+</tr>
+<tr>
+  <td>minOffsetsPerTrigger</td>
+  <td>long</td>
+  <td>none</td>
+  <td>streaming query</td>
+  <td>Minimum number of offsets to be processed per trigger interval. The specified total number of
+  offsets will be proportionally split across topicPartitions of different volume. Note, if the
+  maxTriggerDelay is exceeded, a trigger will be fired even if the number of available offsets
+  doesn't reach minOffsetsPerTrigger.</td>
+</tr>
+<tr>
+  <td>maxTriggerDelay</td>
+  <td>time with units</td>
+  <td>15m</td>
+  <td>streaming query</td>
+  <td>Maximum amount of time for which trigger can be delayed between two triggers provided some
+  data is available from the source. This option is only applicable if minOffsetsPerTrigger is set.</td>
+</tr>
+<tr>
+  <td>minPartitions</td>
+  <td>int</td>
+  <td>none</td>
+  <td>streaming and batch</td>
+  <td>Desired minimum number of partitions to read from Kafka.
+  By default, Spark has a 1-1 mapping of topicPartitions to Spark partitions consuming from Kafka.
+  If you set this option to a value greater than your topicPartitions, Spark will divvy up large
+  Kafka partitions to smaller pieces. Please note that this configuration is like a <code>hint</code>: the
+  number of Spark tasks will be <strong>approximately</strong> <code>minPartitions</code>. It can be less or more depending on
+  rounding errors or Kafka partitions that didn't receive any new data.</td>
+</tr>
+<tr>
+  <td>maxRecordsPerPartition</td>
+  <td>long</td>
+  <td>none</td>
+  <td>streaming and batch</td>
+  <td>Limit maximum number of records present in a partition.
+  By default, Spark has a 1-1 mapping of topicPartitions to Spark partitions consuming from Kafka.
+  If you set this option, Spark will divvy up Kafka partitions to smaller pieces so that each partition
+  has upto <code>maxRecordsPerPartition</code> records. When both <code>minPartitions</code> and
+  <code>maxRecordsPerPartition</code> are set, number of partitions will be <strong>approximately</strong>
+  max of <code>(recordsPerPartition / maxRecordsPerPartition)</code> and <code>minPartitions</code>. In such case spark
+  will divvy up partitions based on <code>maxRecordsPerPartition</code> and if the final partition count is less than
+  <code>minPartitions</code> it will divvy up partitions again based on <code>minPartitions</code>.</td>
+</tr>
+<tr>
+  <td>groupIdPrefix</td>
+  <td>string</td>
+  <td>spark-kafka-source</td>
+  <td>streaming and batch</td>
+  <td>Prefix of consumer group identifiers (<code>group.id</code>) that are generated by structured streaming
+  queries. If "kafka.group.id" is set, this option will be ignored.</td>
+</tr>
+<tr>
+  <td>kafka.group.id</td>
+  <td>string</td>
+  <td>none</td>
+  <td>streaming and batch</td>
+  <td>The Kafka group id to use in Kafka consumer while reading from Kafka. Use this with caution.
+  By default, each query generates a unique group id for reading data. This ensures that each Kafka
+  source has its own consumer group that does not face interference from any other consumer, and
+  therefore can read all of the partitions of its subscribed topics. In some scenarios (for example,
+  Kafka group-based authorization), you may want to use a specific authorized group id to read data.
+  You can optionally set the group id. However, do this with extreme caution as it can cause
+  unexpected behavior. Concurrently running queries (both, batch and streaming) or sources with the
+  same group id are likely interfere with each other causing each query to read only part of the
+  data. This may also occur when queries are started/restarted in quick succession. To minimize such
+  issues, set the Kafka consumer session timeout (by setting option "kafka.session.timeout.ms") to
+  be very small. When this is set, option "groupIdPrefix" will be ignored.</td>
+</tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
+<tr>
+  <td>startingOffsetsByTimestampStrategy</td>
+  <td>"error" or "latest"</td>
+  <td>"error"</td>
+  <td>streaming and batch</td>
+  <td>The strategy will be used when the specified starting offset by timestamp (either global or per partition) doesn't match with the offset Kafka returned. Here's the strategy name and corresponding descriptions:<p/>
+  <p/>
+  "error": fail the query and end users have to deal with workarounds requiring manual steps.<p/>
+  "latest": assigns the latest offset for these partitions, so that Spark can read newer records from these partitions in further micro-batches.<p/></td>
+</tr>
+</table>
+
+### Details on timestamp offset options
+
+The returned offset for each partition is the earliest offset whose timestamp is greater than or equal to the given timestamp in the corresponding partition.
+The behavior varies across options if Kafka doesn't return the matched offset - check the description of each option.
+
+Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value.
+For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="http://kafka.apache.org/0101/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes(java.util.Map)">javadoc</a> for details.
+Also, the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.
+
+Timestamp offset options require Kafka 0.10.1.0 or higher.
+
+### Offset fetching
+
+In Spark 3.0 and before Spark uses <code>KafkaConsumer</code> for offset fetching which could cause infinite wait in the driver.
+In Spark 3.1 a new configuration option added <code>spark.sql.streaming.kafka.useDeprecatedOffsetFetching</code> (default: <code>false</code>)
+which allows Spark to use new offset fetching mechanism using <code>AdminClient</code>. (Set this to `true` to use old offset fetching with <code>KafkaConsumer</code>.)
+
+When the new mechanism used the following applies.
+
+First of all the new approach supports Kafka brokers `0.11.0.0+`.
+
+In Spark 3.0 and below, secure Kafka processing needed the following ACLs from driver perspective:
+* Topic resource describe operation
+* Topic resource read operation
+* Group resource read operation
+
+Since Spark 3.1, offsets can be obtained with <code>AdminClient</code> instead of <code>KafkaConsumer</code> and for that the following ACLs needed from driver perspective:
+* Topic resource describe operation
+
+Since <code>AdminClient</code> in driver is not connecting to consumer group, <code>group.id</code> based authorization will not work anymore (executors never done group based authorization).
+Worth to mention executor side is behaving the exact same way like before (group prefix and override works).
+
+### Consumer Caching
+
+It's time-consuming to initialize Kafka consumers, especially in streaming scenarios where processing time is a key factor.
+Because of this, Spark pools Kafka consumers on executors, by leveraging Apache Commons Pool.
+
+The caching key is built up from the following information:
+
+* Topic name
+* Topic partition
+* Group ID
+
+The following properties are available to configure the consumer pool:
+
+<table class="spark-config">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+<tr>
+  <td>spark.kafka.consumer.cache.capacity</td>
+  <td>64</td>
+  <td>The maximum number of consumers cached. Please note that it's a soft limit.</td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td>spark.kafka.consumer.cache.timeout</td>
+  <td>5m (5 minutes)</td>
+  <td>The minimum amount of time a consumer may sit idle in the pool before it is eligible for eviction by the evictor.</td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td>spark.kafka.consumer.cache.evictorThreadRunInterval</td>
+  <td>1m (1 minute)</td>
+  <td>The interval of time between runs of the idle evictor thread for consumer pool. When non-positive, no idle evictor thread will be run.</td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td>spark.kafka.consumer.cache.jmx.enable</td>
+  <td>false</td>
+  <td>Enable or disable JMX for pools created with this configuration instance. Statistics of the pool are available via JMX instance.
+  The prefix of JMX name is set to "kafka010-cached-simple-kafka-consumer-pool".
+  </td>
+  <td>3.0.0</td>
+</tr>
+</table>
+
+The size of the pool is limited by <code>spark.kafka.consumer.cache.capacity</code>,
+but it works as "soft-limit" to not block Spark tasks.
+
+Idle eviction thread periodically removes consumers which are not used longer than given timeout.
+If this threshold is reached when borrowing, it tries to remove the least-used entry that is currently not in use.
+
+If it cannot be removed, then the pool will keep growing. In the worst case, the pool will grow to
+the max number of concurrent tasks that can run in the executor (that is, number of task slots).
+
+If a task fails for any reason, the new task is executed with a newly created Kafka consumer for safety reasons.
+At the same time, we invalidate all consumers in pool which have same caching key, to remove consumer which was used
+in failed execution. Consumers which any other tasks are using will not be closed, but will be invalidated as well
+when they are returned into pool.
+
+Along with consumers, Spark pools the records fetched from Kafka separately, to let Kafka consumers stateless in point
+of Spark's view, and maximize the efficiency of pooling. It leverages same cache key with Kafka consumers pool.
+Note that it doesn't leverage Apache Commons Pool due to the difference of characteristics.
+
+The following properties are available to configure the fetched data pool:
+
+<table class="spark-config">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+<tr>
+  <td>spark.kafka.consumer.fetchedData.cache.timeout</td>
+  <td>5m (5 minutes)</td>
+  <td>The minimum amount of time a fetched data may sit idle in the pool before it is eligible for eviction by the evictor.</td>
+  <td>3.0.0</td>
+</tr>
+<tr>
+  <td>spark.kafka.consumer.fetchedData.cache.evictorThreadRunInterval</td>
+  <td>1m (1 minute)</td>
+  <td>The interval of time between runs of the idle evictor thread for fetched data pool. When non-positive, no idle evictor thread will be run.</td>
+  <td>3.0.0</td>
+</tr>
+</table>
+
+## Writing Data to Kafka
+
+Here, we describe the support for writing Streaming Queries and Batch Queries to Apache Kafka. Take note that
+Apache Kafka only supports at least once write semantics. Consequently, when writing---either Streaming Queries
+or Batch Queries---to Kafka, some records may be duplicated; this can happen, for example, if Kafka needs
+to retry a message that was not acknowledged by a Broker, even though that Broker received and wrote the message record.
+Structured Streaming cannot prevent such duplicates from occurring due to these Kafka write semantics. However,
+if writing the query is successful, then you can assume that the query output was written at least once. A possible
+solution to remove duplicates when reading the written data could be to introduce a primary (unique) key
+that can be used to perform de-duplication when reading.
+
+The Dataframe being written to Kafka should have the following columns in schema:
+<table>
+<thead><tr><th>Column</th><th>Type</th></tr></thead>
+<tr>
+  <td>key (optional)</td>
+  <td>string or binary</td>
+</tr>
+<tr>
+  <td>value (required)</td>
+  <td>string or binary</td>
+</tr>
+<tr>
+  <td>headers (optional)</td>
+  <td>array</td>
+</tr>
+<tr>
+  <td>topic (*optional)</td>
+  <td>string</td>
+</tr>
+<tr>
+  <td>partition (optional)</td>
+  <td>int</td>
+</tr>
+</table>
+\* The topic column is required if the "topic" configuration option is not specified.<br>
+
+The value column is the only required option. If a key column is not specified then
+a ```null``` valued key column will be automatically added (see Kafka semantics on
+how ```null``` valued key values are handled). If a topic column exists then its value
+is used as the topic when writing the given row to Kafka, unless the "topic" configuration
+option is set i.e., the "topic" configuration option overrides the topic column.
+If a "partition" column is not specified (or its value is ```null```)
+then the partition is calculated by the Kafka producer.
+A Kafka partitioner can be specified in Spark by setting the
+```kafka.partitioner.class``` option. If not present, Kafka default partitioner
+will be used.
+
+
+The following options must be set for the Kafka sink
+for both batch and streaming queries.
+
+<table>
+<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
+<tr>
+  <td>kafka.bootstrap.servers</td>
+  <td>A comma-separated list of host:port</td>
+  <td>The Kafka "bootstrap.servers" configuration.</td>
+</tr>
+</table>
+
+The following configurations are optional:
+
+<table>
+<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
+<tr>
+  <td>topic</td>
+  <td>string</td>
+  <td>none</td>
+  <td>streaming and batch</td>
+  <td>Sets the topic that all rows will be written to in Kafka. This option overrides any
+  topic column that may exist in the data.</td>
+</tr>
+<tr>
+  <td>includeHeaders</td>
+  <td>boolean</td>
+  <td>false</td>
+  <td>streaming and batch</td>
+  <td>Whether to include the Kafka headers in the row.</td>
+</tr>
+</table>
+
+### Creating a Kafka Sink for Streaming Queries
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+# Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+ds = df \
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .writeStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("topic", "topic1") \
+  .start()
+
+# Write key-value data from a DataFrame to Kafka using a topic specified in the data
+ds = df \
+  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .writeStream \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .start()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+val ds = df
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .start()
+
+// Write key-value data from a DataFrame to Kafka using a topic specified in the data
+val ds = df
+  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .start()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+StreamingQuery ds = df
+  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .start();
+
+// Write key-value data from a DataFrame to Kafka using a topic specified in the data
+StreamingQuery ds = df
+  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
+  .writeStream()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .start();
+
+{% endhighlight %}
+</div>
+
+</div>
+
+### Writing the output of Batch Queries to Kafka
+
+<div class="codetabs">
+
+<div data-lang="python" markdown="1">
+{% highlight python %}
+
+# Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .write \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .option("topic", "topic1") \
+  .save()
+
+# Write key-value data from a DataFrame to Kafka using a topic specified in the data
+df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)") \
+  .write \
+  .format("kafka") \
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
+  .save()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="scala" markdown="1">
+{% highlight scala %}
+
+// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .save()
+
+// Write key-value data from a DataFrame to Kafka using a topic specified in the data
+df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .save()
+
+{% endhighlight %}
+</div>
+
+<div data-lang="java" markdown="1">
+{% highlight java %}
+
+// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
+df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .option("topic", "topic1")
+  .save();
+
+// Write key-value data from a DataFrame to Kafka using a topic specified in the data
+df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
+  .write()
+  .format("kafka")
+  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
+  .save();
+
+{% endhighlight %}
+</div>
+
+</div>
+
+### Producer Caching
+
+Given Kafka producer instance is designed to be thread-safe, Spark initializes a Kafka producer instance and co-use across tasks for same caching key.
+
+The caching key is built up from the following information:
+
+* Kafka producer configuration
+
+This includes configuration for authorization, which Spark will automatically include when delegation token is being used. Even we take authorization into account, you can expect same Kafka producer instance will be used among same Kafka producer configuration.
+It will use different Kafka producer when delegation token is renewed; Kafka producer instance for old delegation token will be evicted according to the cache policy.
+
+The following properties are available to configure the producer pool:
+
+<table class="spark-config">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+<tr>
+  <td>spark.kafka.producer.cache.timeout</td>
+  <td>10m (10 minutes)</td>
+  <td>The minimum amount of time a producer may sit idle in the pool before it is eligible for eviction by the evictor.</td>
+  <td>2.2.1</td>
+</tr>
+<tr>
+  <td>spark.kafka.producer.cache.evictorThreadRunInterval</td>
+  <td>1m (1 minute)</td>
+  <td>The interval of time between runs of the idle evictor thread for producer pool. When non-positive, no idle evictor thread will be run.</td>
+  <td>3.0.0</td>
+</tr>
+</table>
+
+Idle eviction thread periodically removes producers which are not used longer than given timeout. Note that the producer is shared and used concurrently, so the last used timestamp is determined by the moment the producer instance is returned and reference count is 0.
+
+## Kafka Specific Configurations
+
+Kafka's own configurations can be set via `DataStreamReader.option` with `kafka.` prefix, e.g,
+`stream.option("kafka.bootstrap.servers", "host:port")`. For possible kafka parameters, see
+[Kafka consumer config docs](http://kafka.apache.org/documentation.html#consumerconfigs) for
+parameters related to reading data, and [Kafka producer config docs](http://kafka.apache.org/documentation/#producerconfigs)
+for parameters related to writing data.
+
+Note that the following Kafka params cannot be set and the Kafka source or sink will throw an exception:
+
+- **group.id**: Kafka source will create a unique group id for each query automatically. The user can
+set the prefix of the automatically generated group.id's via the optional source option `groupIdPrefix`,
+default value is "spark-kafka-source". You can also set "kafka.group.id" to force Spark to use a special
+group id, however, please read warnings for this option and use it with caution.
+- **auto.offset.reset**: Set the source option `startingOffsets` to specify
+ where to start instead. Structured Streaming manages which offsets are consumed internally, rather
+ than rely on the kafka Consumer to do it. This will ensure that no data is missed when new
+ topics/partitions are dynamically subscribed. Note that `startingOffsets` only applies when a new
+ streaming query is started, and that resuming will always pick up from where the query left off. Note
+ that when the offsets consumed by a streaming application no longer exist in Kafka (e.g., topics are deleted,
+ offsets are out of range, or offsets are removed after retention period), the offsets will not be reset
+ and the streaming application will see data loss. In extreme cases, for example the throughput of the
+ streaming application cannot catch up the retention speed of Kafka, the input rows of a batch might be
+ gradually reduced until zero when the offset ranges of the batch are completely not in Kafka. Enabling
+ `failOnDataLoss` option can ask Structured Streaming to fail the query for such cases.
+- **key.deserializer**: Keys are always deserialized as byte arrays with ByteArrayDeserializer. Use
+ DataFrame operations to explicitly deserialize the keys.
+- **value.deserializer**: Values are always deserialized as byte arrays with ByteArrayDeserializer.
+ Use DataFrame operations to explicitly deserialize the values.
+- **key.serializer**: Keys are always serialized with ByteArraySerializer or StringSerializer. Use
+DataFrame operations to explicitly serialize the keys into either strings or byte arrays.
+- **value.serializer**: values are always serialized with ByteArraySerializer or StringSerializer. Use
+DataFrame operations to explicitly serialize the values into either strings or byte arrays.
+- **enable.auto.commit**: Kafka source doesn't commit any offset.
+- **interceptor.classes**: Kafka source always read keys and values as byte arrays. It's not safe to
+ use ConsumerInterceptor as it may break the query.
+
+## Deploying
+
+As with any Spark applications, `spark-submit` is used to launch your application. `spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}`
+and its dependencies can be directly added to `spark-submit` using `--packages`, such as,
+
+    ./bin/spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
+
+For experimenting on `spark-shell`, you can also use `--packages` to add `spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}` and its dependencies directly,
+
+    ./bin/spark-shell --packages org.apache.spark:spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
+
+See [Application Submission Guide](submitting-applications.html) for more details about submitting
+applications with external dependencies.
+
+## Security
+
+Kafka 0.9.0.0 introduced several features that increases security in a cluster. For detailed
+description about these possibilities, see [Kafka security docs](http://kafka.apache.org/documentation.html#security).
+
+It's worth noting that security is optional and turned off by default.
+
+Spark supports the following ways to authenticate against Kafka cluster:
+- **Delegation token (introduced in Kafka broker 1.1.0)**
+- **JAAS login configuration**
+
+### Delegation token
+
+This way the application can be configured via Spark parameters and may not need JAAS login
+configuration (Spark can use Kafka's dynamic JAAS configuration feature). For further information
+about delegation tokens, see [Kafka delegation token docs](http://kafka.apache.org/documentation/#security_delegation_token).
+
+The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.clusters.${cluster}.auth.bootstrap.servers` is set,
+Spark considers the following log in options, in order of preference:
+- **JAAS login configuration**, please see example below.
+- **Keytab file**, such as,
+
+      ./bin/spark-submit \
+          --keytab <KEYTAB_FILE> \
+          --principal <PRINCIPAL> \
+          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
+          ...
+
+- **Kerberos credential cache**, such as,
+
+      ./bin/spark-submit \
+          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
+          ...
+
+The Kafka delegation token provider can be turned off by setting `spark.security.credentials.kafka.enabled` to `false` (default: `true`).
+
+Spark can be configured to use the following authentication protocols to obtain token (it must match with
+Kafka broker configuration):
+- **SASL SSL (default)**
+- **SSL**
+- **SASL PLAINTEXT (for testing)**
+
+After obtaining delegation token successfully, Spark distributes it across nodes and renews it accordingly.
+Delegation token uses `SCRAM` login module for authentication and because of that the appropriate
+`spark.kafka.clusters.${cluster}.sasl.token.mechanism` (default: `SCRAM-SHA-512`) has to be configured. Also, this parameter
+must match with Kafka broker configuration.
+
+When delegation token is available on an executor Spark considers the following log in options, in order of preference:
+- **JAAS login configuration**, please see example below.
+- **Delegation token**, please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code> parameter for further details.
+
+When none of the above applies then unsecure connection assumed.
+
+
+#### Configuration
+
+Delegation tokens can be obtained from multiple clusters and <code>${cluster}</code> is an arbitrary unique identifier which helps to group different configurations.
+
+<table class="spark-config">
+<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.auth.bootstrap.servers</code></td>
+    <td>None</td>
+    <td>
+      A list of coma separated host/port pairs to use for establishing the initial connection
+      to the Kafka cluster. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code></td>
+    <td>.*</td>
+    <td>
+      Regular expression to match against the <code>bootstrap.servers</code> config for sources and sinks in the application.
+      If a server address matches this regex, the delegation token obtained from the respective bootstrap servers will be used when connecting.
+      If multiple clusters match the address, an exception will be thrown and the query won't be started.
+      Kafka's secure and unsecure listeners are bound to different ports. When both used the secure listener port has to be part of the regular expression.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.security.protocol</code></td>
+    <td>SASL_SSL</td>
+    <td>
+      Protocol used to communicate with brokers. For further details please see Kafka documentation. Protocol is applied on all the sources and sinks as default where
+      <code>bootstrap.servers</code> config matches (for further details please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code>),
+      and can be overridden by setting <code>kafka.security.protocol</code> on the source or sink.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.sasl.kerberos.service.name</code></td>
+    <td>kafka</td>
+    <td>
+      The Kerberos principal name that Kafka runs as. This can be defined either in Kafka's JAAS config or in Kafka's config.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.type</code></td>
+    <td>None</td>
+    <td>
+      The file format of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.2.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code></td>
+    <td>None</td>
+    <td>
+      The location of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.password</code></td>
+    <td>None</td>
+    <td>
+      The store password for the trust store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code> is configured.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.type</code></td>
+    <td>None</td>
+    <td>
+      The file format of the key store file. This is optional for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.2.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code></td>
+    <td>None</td>
+    <td>
+      The location of the key store file. This is optional for client and can be used for two-way authentication for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.password</code></td>
+    <td>None</td>
+    <td>
+      The store password for the key store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code> is configured.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.ssl.key.password</code></td>
+    <td>None</td>
+    <td>
+      The password of the private key in the key store file. This is optional for client.
+      For further details please see Kafka documentation. Only used to obtain delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+  <tr>
+    <td><code>spark.kafka.clusters.${cluster}.sasl.token.mechanism</code></td>
+    <td>SCRAM-SHA-512</td>
+    <td>
+      SASL mechanism used for client connections with delegation token. Because SCRAM login module used for authentication a compatible mechanism has to be set here.
+      For further details please see Kafka documentation (<code>sasl.mechanism</code>). Only used to authenticate against Kafka broker with delegation token.
+    </td>
+    <td>3.0.0</td>
+  </tr>
+</table>
+
+#### Kafka Specific Configurations
+
+Kafka's own configurations can be set with `kafka.` prefix, e.g, `--conf spark.kafka.clusters.${cluster}.kafka.retries=1`.
+For possible Kafka parameters, see [Kafka adminclient config docs](http://kafka.apache.org/documentation.html#adminclientconfigs).
+
+#### Caveats
+
+- Obtaining delegation token for proxy user is not yet supported ([KAFKA-6945](https://issues.apache.org/jira/browse/KAFKA-6945)).
+
+### JAAS login configuration
+
+JAAS login configuration must placed on all nodes where Spark tries to access Kafka cluster.
+This provides the possibility to apply any custom authentication logic with a higher cost to maintain.
+This can be done several ways. One possibility is to provide additional JVM parameters, such as,
+
+    ./bin/spark-submit \
+        --driver-java-options "-Djava.security.auth.login.config=/path/to/custom_jaas.conf" \
+        --conf spark.executor.extraJavaOptions=-Djava.security.auth.login.config=/path/to/custom_jaas.conf \
+        ...
diff --git a/docs/structured-streaming-state-data-source.md b/docs/streaming/structured-streaming-state-data-source.md
similarity index 91%
rename from docs/structured-streaming-state-data-source.md
rename to docs/streaming/structured-streaming-state-data-source.md
index 1fac39566c1da..d92b825a4ed55 100644
--- a/docs/structured-streaming-state-data-source.md
+++ b/docs/streaming/structured-streaming-state-data-source.md
@@ -108,7 +108,7 @@ Users are encouraged to query about the schema via df.schema() / df.printSchema(
 The following options must be set for the source.
 
 <table>
-<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
+<thead><tr><th>Option</th><th>Value</th><th>Meaning</th></tr></thead>
 <tr>
   <td>path</td>
   <td>string</td>
@@ -119,7 +119,7 @@ The following options must be set for the source.
 The following configurations are optional:
 
 <table>
-<thead><tr><th>Option</th><th>value</th><th>default</th><th>meaning</th></tr></thead>
+<thead><tr><th>Option</th><th>Value</th><th>Default</th><th>Meaning</th></tr></thead>
 <tr>
   <td>batchId</td>
   <td>numeric value</td>
@@ -264,13 +264,14 @@ The output schema will also be different from the normal output.
 </tr>
 </table>
 
-## State metadata source
+## State Metadata Source
 
 Before querying the state from existing checkpoint via state data source, users would like to understand the information for the checkpoint, especially about state operator. This includes which operators and state store instances are available in the checkpoint, available range of batch IDs, etc.
 
 Structured Streaming provides a data source named "State metadata source" to provide the state-related metadata information from the checkpoint.
 
 Note: The metadata is constructed when the streaming query is running with Spark 4.0+. The existing checkpoint which has been running with lower Spark version does not have the metadata and will be unable to query/use with this metadata source. It is required to run the streaming query pointing the existing checkpoint in Spark 4.0+ to construct the metadata before querying.
+Users can optionally provide the batchId to get the operator metadata at a point in time.
 
 ### Creating a State metadata store for Batch Queries
 
@@ -310,6 +311,29 @@ Dataset<Row> df = spark
 
 </div>
 
+The following options must be set for the source:
+
+<table>
+<thead><tr><th>Option</th><th>Value</th><th>Meaning</th></tr></thead>
+<tr>
+  <td>path</td>
+  <td>string</td>
+  <td>Specify the root directory of the checkpoint location. You can either specify the path via option("path", `path`) or load(`path`).</td>
+</tr>
+</table>
+
+The following configurations are optional:
+
+<table>
+<thead><tr><th>Option</th><th>Value</th><th>Default</th><th>Meaning</th></tr></thead>
+<tr>
+  <td>batchId</td>
+  <td>numeric value</td>
+  <td>Last committed batch if available, else 0</td>
+  <td>Optional batchId used to retrieve operator metadata at that batch.</td>
+</tr>
+</table>
+
 Each row in the source has the following schema:
 
 <table>
@@ -344,6 +368,11 @@ Each row in the source has the following schema:
   <td>int</td>
   <td>The maximum batch ID available for querying state. The value could be invalid if the streaming query taking the checkpoint is running, as the query will commit further batches.</td>
 </tr>
+<tr>
+  <td>operatorProperties</td>
+  <td>string</td>
+  <td>List of properties used by the operator encoded as JSON. Output generated here is operator dependent.</td>
+</tr>
 <tr>
   <td>_numColsPrefixKey</td>
   <td>int</td>
diff --git a/docs/structured-streaming-kafka-integration.md b/docs/structured-streaming-kafka-integration.md
index 37846216fc758..17bb858bace05 100644
--- a/docs/structured-streaming-kafka-integration.md
+++ b/docs/structured-streaming-kafka-integration.md
@@ -1,6 +1,7 @@
 ---
 layout: global
 title: Structured Streaming + Kafka Integration Guide (Kafka broker version 0.10.0 or higher)
+redirect: ./streaming/structured-streaming-kafka-integration.html
 license: |
   Licensed to the Apache Software Foundation (ASF) under one or more
   contributor license agreements.  See the NOTICE file distributed with
@@ -18,1156 +19,4 @@ license: |
   limitations under the License.
 ---
 
-Structured Streaming integration for Kafka 0.10 to read data from and write data to Kafka.
-
-## Linking
-For Scala/Java applications using SBT/Maven project definitions, link your application with the following artifact:
-
-    groupId = org.apache.spark
-    artifactId = spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}
-    version = {{site.SPARK_VERSION_SHORT}}
-
-Please note that to use the headers functionality, your Kafka client version should be version 0.11.0.0 or up.
-
-For Python applications, you need to add this above library and its dependencies when deploying your
-application. See the [Deploying](#deploying) subsection below.
-
-For experimenting on `spark-shell`, you need to add this above library and its dependencies too when invoking `spark-shell`. Also, see the [Deploying](#deploying) subsection below.
-
-## Reading Data from Kafka
-
-### Creating a Kafka Source for Streaming Queries
-
-<div class="codetabs">
-
-<div data-lang="python" markdown="1">
-{% highlight python %}
-
-# Subscribe to 1 topic
-df = spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-# Subscribe to 1 topic, with headers
-df = spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1") \
-  .option("includeHeaders", "true") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
-
-# Subscribe to multiple topics
-df = spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1,topic2") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-# Subscribe to a pattern
-df = spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribePattern", "topic.*") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-{% endhighlight %}
-</div>
-
-<div data-lang="scala" markdown="1">
-{% highlight scala %}
-
-// Subscribe to 1 topic
-val df = spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Subscribe to 1 topic, with headers
-val df = spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .option("includeHeaders", "true")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers")
-  .as[(String, String, Array[(String, Array[Byte])])]
-
-// Subscribe to multiple topics
-val df = spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1,topic2")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Subscribe to a pattern
-val df = spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribePattern", "topic.*")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-{% highlight java %}
-
-// Subscribe to 1 topic
-Dataset<Row> df = spark
-  .readStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Subscribe to 1 topic, with headers
-Dataset<Row> df = spark
-  .readStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .option("includeHeaders", "true")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)", "headers");
-
-// Subscribe to multiple topics
-Dataset<Row> df = spark
-  .readStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1,topic2")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Subscribe to a pattern
-Dataset<Row> df = spark
-  .readStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribePattern", "topic.*")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-{% endhighlight %}
-</div>
-
-</div>
-
-### Creating a Kafka Source for Batch Queries
-If you have a use case that is better suited to batch processing,
-you can create a Dataset/DataFrame for a defined range of offsets.
-
-<div class="codetabs">
-
-<div data-lang="python" markdown="1">
-{% highlight python %}
-
-# Subscribe to 1 topic defaults to the earliest and latest offsets
-df = spark \
-  .read \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-# Subscribe to multiple topics, specifying explicit Kafka offsets
-df = spark \
-  .read \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1,topic2") \
-  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""") \
-  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-
-# Subscribe to a pattern, at the earliest and latest offsets
-df = spark \
-  .read \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribePattern", "topic.*") \
-  .option("startingOffsets", "earliest") \
-  .option("endingOffsets", "latest") \
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-{% endhighlight %}
-</div>
-
-<div data-lang="scala" markdown="1">
-{% highlight scala %}
-
-// Subscribe to 1 topic defaults to the earliest and latest offsets
-val df = spark
-  .read
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Subscribe to multiple topics, specifying explicit Kafka offsets
-val df = spark
-  .read
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1,topic2")
-  .option("startingOffsets", """{"topic1":{"0":23,"1":-2},"topic2":{"0":-2}}""")
-  .option("endingOffsets", """{"topic1":{"0":50,"1":-1},"topic2":{"0":-1}}""")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-// Subscribe to a pattern, at the earliest and latest offsets
-val df = spark
-  .read
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribePattern", "topic.*")
-  .option("startingOffsets", "earliest")
-  .option("endingOffsets", "latest")
-  .load()
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .as[(String, String)]
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-{% highlight java %}
-
-// Subscribe to 1 topic defaults to the earliest and latest offsets
-Dataset<Row> df = spark
-  .read()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Subscribe to multiple topics, specifying explicit Kafka offsets
-Dataset<Row> df = spark
-  .read()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1,topic2")
-  .option("startingOffsets", "{\"topic1\":{\"0\":23,\"1\":-2},\"topic2\":{\"0\":-2}}")
-  .option("endingOffsets", "{\"topic1\":{\"0\":50,\"1\":-1},\"topic2\":{\"0\":-1}}")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-// Subscribe to a pattern, at the earliest and latest offsets
-Dataset<Row> df = spark
-  .read()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribePattern", "topic.*")
-  .option("startingOffsets", "earliest")
-  .option("endingOffsets", "latest")
-  .load();
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)");
-
-{% endhighlight %}
-</div>
-
-</div>
-
-Each row in the source has the following schema:
-<table>
-<thead><tr><th>Column</th><th>Type</th></tr></thead>
-<tr>
-  <td>key</td>
-  <td>binary</td>
-</tr>
-<tr>
-  <td>value</td>
-  <td>binary</td>
-</tr>
-<tr>
-  <td>topic</td>
-  <td>string</td>
-</tr>
-<tr>
-  <td>partition</td>
-  <td>int</td>
-</tr>
-<tr>
-  <td>offset</td>
-  <td>long</td>
-</tr>
-<tr>
-  <td>timestamp</td>
-  <td>timestamp</td>
-</tr>
-<tr>
-  <td>timestampType</td>
-  <td>int</td>
-</tr>
-<tr>
-  <td>headers (optional)</td>
-  <td>array</td>
-</tr>
-</table>
-
-The following options must be set for the Kafka source
-for both batch and streaming queries.
-
-<table>
-<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
-<tr>
-  <td>assign</td>
-  <td>json string {"topicA":[0,1],"topicB":[2,4]}</td>
-  <td>Specific TopicPartitions to consume.
-  Only one of "assign", "subscribe" or "subscribePattern"
-  options can be specified for Kafka source.</td>
-</tr>
-<tr>
-  <td>subscribe</td>
-  <td>A comma-separated list of topics</td>
-  <td>The topic list to subscribe.
-  Only one of "assign", "subscribe" or "subscribePattern"
-  options can be specified for Kafka source.</td>
-</tr>
-<tr>
-  <td>subscribePattern</td>
-  <td>Java regex string</td>
-  <td>The pattern used to subscribe to topic(s).
-  Only one of "assign, "subscribe" or "subscribePattern"
-  options can be specified for Kafka source.</td>
-</tr>
-<tr>
-  <td>kafka.bootstrap.servers</td>
-  <td>A comma-separated list of host:port</td>
-  <td>The Kafka "bootstrap.servers" configuration.</td>
-</tr>
-</table>
-
-The following configurations are optional:
-
-<table>
-<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
-<tr>
-  <td>startingTimestamp</td>
-  <td>timestamp string e.g. "1000"</td>
-  <td>none (next preference is <code>startingOffsetsByTimestamp</code>)</td>
-  <td>streaming and batch</td>
-  <td>The start point of timestamp when a query is started, a string specifying a starting timestamp for
-  all partitions in topics being subscribed. Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
-  the behavior will follow to the value of the option <code>startingOffsetsByTimestampStrategy</code><p/>
-  <p/>
-  Note1: <code>startingTimestamp</code> takes precedence over <code>startingOffsetsByTimestamp</code> and <code>startingOffsets</code>.<p/>
-  Note2: For streaming queries, this only applies when a new query is started, and that resuming will
-  always pick up from where the query left off. Newly discovered partitions during a query will start at
-  earliest.</td>
-</tr>
-<tr>
-  <td>startingOffsetsByTimestamp</td>
-  <td>json string
-  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
-  </td>
-  <td>none (next preference is <code>startingOffsets</code>)</td>
-  <td>streaming and batch</td>
-  <td>The start point of timestamp when a query is started, a json string specifying a starting timestamp for
-  each TopicPartition. Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
-  the behavior will follow to the value of the option <code>startingOffsetsByTimestampStrategy</code><p/>
-  <p/>
-  Note1: <code>startingOffsetsByTimestamp</code> takes precedence over <code>startingOffsets</code>.<p/>
-  Note2: For streaming queries, this only applies when a new query is started, and that resuming will
-  always pick up from where the query left off. Newly discovered partitions during a query will start at
-  earliest.</td>
-</tr>
-<tr>
-  <td>startingOffsets</td>
-  <td>"earliest", "latest" (streaming only), or json string
-  """ {"topicA":{"0":23,"1":-1},"topicB":{"0":-2}} """
-  </td>
-  <td>"latest" for streaming, "earliest" for batch</td>
-  <td>streaming and batch</td>
-  <td>The start point when a query is started, either "earliest" which is from the earliest offsets,
-  "latest" which is just from the latest offsets, or a json string specifying a starting offset for
-  each TopicPartition.  In the json, -2 as an offset can be used to refer to earliest, -1 to latest.
-  Note: For batch queries, latest (either implicitly or by using -1 in json) is not allowed.
-  For streaming queries, this only applies when a new query is started, and that resuming will
-  always pick up from where the query left off. Newly discovered partitions during a query will start at
-  earliest.</td>
-</tr>
-<tr>
-  <td>endingTimestamp</td>
-  <td>timestamp string e.g. "1000"</td>
-  <td>none (next preference is <code>endingOffsetsByTimestamp</code>)</td>
-  <td>batch query</td>
-  <td>The end point when a batch query is ended, a json string specifying an ending timestamp for
-  all partitions in topics being subscribed. Please refer the details on timestamp offset options below.
-  If Kafka doesn't return the matched offset, the offset will be set to latest.<p/>
-  Note: <code>endingTimestamp</code> takes precedence over <code>endingOffsetsByTimestamp</code> and <code>endingOffsets</code>.<p/>
-  </td>
-</tr>
-<tr>
-  <td>endingOffsetsByTimestamp</td>
-  <td>json string
-  """ {"topicA":{"0": 1000, "1": 1000}, "topicB": {"0": 2000, "1": 2000}} """
-  </td>
-  <td>none (next preference is <code>endingOffsets</code>)</td>
-  <td>batch query</td>
-  <td>The end point when a batch query is ended, a json string specifying an ending timestamp for each TopicPartition.
-  Please refer the details on timestamp offset options below. If Kafka doesn't return the matched offset,
-  the offset will be set to latest.<p/>
-  Note: <code>endingOffsetsByTimestamp</code> takes precedence over <code>endingOffsets</code>.
-  </td>
-</tr>
-<tr>
-  <td>endingOffsets</td>
-  <td>latest or json string
-  {"topicA":{"0":23,"1":-1},"topicB":{"0":-1}}
-  </td>
-  <td>latest</td>
-  <td>batch query</td>
-  <td>The end point when a batch query is ended, either "latest" which is just referred to the
-  latest, or a json string specifying an ending offset for each TopicPartition.  In the json, -1
-  as an offset can be used to refer to latest, and -2 (earliest) as an offset is not allowed.</td>
-</tr>
-<tr>
-  <td>failOnDataLoss</td>
-  <td>true or false</td>
-  <td>true</td>
-  <td>streaming and batch</td>
-  <td>Whether to fail the query when it's possible that data is lost (e.g., topics are deleted, or
-  offsets are out of range). This may be a false alarm. You can disable it when it doesn't work
-  as you expected.</td>
-</tr>
-<tr>
-  <td>kafkaConsumer.pollTimeoutMs</td>
-  <td>long</td>
-  <td>120000</td>
-  <td>streaming and batch</td>
-  <td>The timeout in milliseconds to poll data from Kafka in executors. When not defined it falls
-  back to <code>spark.network.timeout</code>.</td>
-</tr>
-<tr>
-  <td>fetchOffset.numRetries</td>
-  <td>int</td>
-  <td>3</td>
-  <td>streaming and batch</td>
-  <td>Number of times to retry before giving up fetching Kafka offsets.</td>
-</tr>
-<tr>
-  <td>fetchOffset.retryIntervalMs</td>
-  <td>long</td>
-  <td>10</td>
-  <td>streaming and batch</td>
-  <td>milliseconds to wait before retrying to fetch Kafka offsets</td>
-</tr>
-<tr>
-  <td>maxOffsetsPerTrigger</td>
-  <td>long</td>
-  <td>none</td>
-  <td>streaming query</td>
-  <td>Rate limit on maximum number of offsets processed per trigger interval. The specified total number of offsets will be proportionally split across topicPartitions of different volume.</td>
-</tr>
-<tr>
-  <td>minOffsetsPerTrigger</td>
-  <td>long</td>
-  <td>none</td>
-  <td>streaming query</td>
-  <td>Minimum number of offsets to be processed per trigger interval. The specified total number of
-  offsets will be proportionally split across topicPartitions of different volume. Note, if the
-  maxTriggerDelay is exceeded, a trigger will be fired even if the number of available offsets
-  doesn't reach minOffsetsPerTrigger.</td>
-</tr>
-<tr>
-  <td>maxTriggerDelay</td>
-  <td>time with units</td>
-  <td>15m</td>
-  <td>streaming query</td>
-  <td>Maximum amount of time for which trigger can be delayed between two triggers provided some
-  data is available from the source. This option is only applicable if minOffsetsPerTrigger is set.</td>
-</tr>
-<tr>
-  <td>minPartitions</td>
-  <td>int</td>
-  <td>none</td>
-  <td>streaming and batch</td>
-  <td>Desired minimum number of partitions to read from Kafka.
-  By default, Spark has a 1-1 mapping of topicPartitions to Spark partitions consuming from Kafka.
-  If you set this option to a value greater than your topicPartitions, Spark will divvy up large
-  Kafka partitions to smaller pieces. Please note that this configuration is like a <code>hint</code>: the
-  number of Spark tasks will be <strong>approximately</strong> <code>minPartitions</code>. It can be less or more depending on
-  rounding errors or Kafka partitions that didn't receive any new data.</td>
-</tr>
-<tr>
-  <td>groupIdPrefix</td>
-  <td>string</td>
-  <td>spark-kafka-source</td>
-  <td>streaming and batch</td>
-  <td>Prefix of consumer group identifiers (<code>group.id</code>) that are generated by structured streaming
-  queries. If "kafka.group.id" is set, this option will be ignored.</td>
-</tr>
-<tr>
-  <td>kafka.group.id</td>
-  <td>string</td>
-  <td>none</td>
-  <td>streaming and batch</td>
-  <td>The Kafka group id to use in Kafka consumer while reading from Kafka. Use this with caution.
-  By default, each query generates a unique group id for reading data. This ensures that each Kafka
-  source has its own consumer group that does not face interference from any other consumer, and
-  therefore can read all of the partitions of its subscribed topics. In some scenarios (for example,
-  Kafka group-based authorization), you may want to use a specific authorized group id to read data.
-  You can optionally set the group id. However, do this with extreme caution as it can cause
-  unexpected behavior. Concurrently running queries (both, batch and streaming) or sources with the
-  same group id are likely interfere with each other causing each query to read only part of the
-  data. This may also occur when queries are started/restarted in quick succession. To minimize such
-  issues, set the Kafka consumer session timeout (by setting option "kafka.session.timeout.ms") to
-  be very small. When this is set, option "groupIdPrefix" will be ignored.</td>
-</tr>
-<tr>
-  <td>includeHeaders</td>
-  <td>boolean</td>
-  <td>false</td>
-  <td>streaming and batch</td>
-  <td>Whether to include the Kafka headers in the row.</td>
-</tr>
-<tr>
-  <td>startingOffsetsByTimestampStrategy</td>
-  <td>"error" or "latest"</td>
-  <td>"error"</td>
-  <td>streaming and batch</td>
-  <td>The strategy will be used when the specified starting offset by timestamp (either global or per partition) doesn't match with the offset Kafka returned. Here's the strategy name and corresponding descriptions:<p/>
-  <p/>
-  "error": fail the query and end users have to deal with workarounds requiring manual steps.<p/>
-  "latest": assigns the latest offset for these partitions, so that Spark can read newer records from these partitions in further micro-batches.<p/></td>
-</tr>
-</table>
-
-### Details on timestamp offset options
-
-The returned offset for each partition is the earliest offset whose timestamp is greater than or equal to the given timestamp in the corresponding partition.
-The behavior varies across options if Kafka doesn't return the matched offset - check the description of each option.
-
-Spark simply passes the timestamp information to <code>KafkaConsumer.offsetsForTimes</code>, and doesn't interpret or reason about the value.
-For more details on <code>KafkaConsumer.offsetsForTimes</code>, please refer <a href="http://kafka.apache.org/0101/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#offsetsForTimes(java.util.Map)">javadoc</a> for details.
-Also, the meaning of <code>timestamp</code> here can be vary according to Kafka configuration (<code>log.message.timestamp.type</code>): please refer <a href="https://kafka.apache.org/documentation/">Kafka documentation</a> for further details.
-
-Timestamp offset options require Kafka 0.10.1.0 or higher.
-
-### Offset fetching
-
-In Spark 3.0 and before Spark uses <code>KafkaConsumer</code> for offset fetching which could cause infinite wait in the driver.
-In Spark 3.1 a new configuration option added <code>spark.sql.streaming.kafka.useDeprecatedOffsetFetching</code> (default: <code>false</code>)
-which allows Spark to use new offset fetching mechanism using <code>AdminClient</code>. (Set this to `true` to use old offset fetching with <code>KafkaConsumer</code>.)
-
-When the new mechanism used the following applies.
-
-First of all the new approach supports Kafka brokers `0.11.0.0+`.
-
-In Spark 3.0 and below, secure Kafka processing needed the following ACLs from driver perspective:
-* Topic resource describe operation
-* Topic resource read operation
-* Group resource read operation
-
-Since Spark 3.1, offsets can be obtained with <code>AdminClient</code> instead of <code>KafkaConsumer</code> and for that the following ACLs needed from driver perspective:
-* Topic resource describe operation
-
-Since <code>AdminClient</code> in driver is not connecting to consumer group, <code>group.id</code> based authorization will not work anymore (executors never done group based authorization).
-Worth to mention executor side is behaving the exact same way like before (group prefix and override works).
-
-### Consumer Caching
-
-It's time-consuming to initialize Kafka consumers, especially in streaming scenarios where processing time is a key factor.
-Because of this, Spark pools Kafka consumers on executors, by leveraging Apache Commons Pool.
-
-The caching key is built up from the following information:
-
-* Topic name
-* Topic partition
-* Group ID
-
-The following properties are available to configure the consumer pool:
-
-<table class="spark-config">
-<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
-<tr>
-  <td>spark.kafka.consumer.cache.capacity</td>
-  <td>64</td>
-  <td>The maximum number of consumers cached. Please note that it's a soft limit.</td>
-  <td>3.0.0</td>
-</tr>
-<tr>
-  <td>spark.kafka.consumer.cache.timeout</td>
-  <td>5m (5 minutes)</td>
-  <td>The minimum amount of time a consumer may sit idle in the pool before it is eligible for eviction by the evictor.</td>
-  <td>3.0.0</td>
-</tr>
-<tr>
-  <td>spark.kafka.consumer.cache.evictorThreadRunInterval</td>
-  <td>1m (1 minute)</td>
-  <td>The interval of time between runs of the idle evictor thread for consumer pool. When non-positive, no idle evictor thread will be run.</td>
-  <td>3.0.0</td>
-</tr>
-<tr>
-  <td>spark.kafka.consumer.cache.jmx.enable</td>
-  <td>false</td>
-  <td>Enable or disable JMX for pools created with this configuration instance. Statistics of the pool are available via JMX instance.
-  The prefix of JMX name is set to "kafka010-cached-simple-kafka-consumer-pool".
-  </td>
-  <td>3.0.0</td>
-</tr>
-</table>
-
-The size of the pool is limited by <code>spark.kafka.consumer.cache.capacity</code>,
-but it works as "soft-limit" to not block Spark tasks.
-
-Idle eviction thread periodically removes consumers which are not used longer than given timeout.
-If this threshold is reached when borrowing, it tries to remove the least-used entry that is currently not in use.
-
-If it cannot be removed, then the pool will keep growing. In the worst case, the pool will grow to
-the max number of concurrent tasks that can run in the executor (that is, number of task slots).
-
-If a task fails for any reason, the new task is executed with a newly created Kafka consumer for safety reasons.
-At the same time, we invalidate all consumers in pool which have same caching key, to remove consumer which was used
-in failed execution. Consumers which any other tasks are using will not be closed, but will be invalidated as well
-when they are returned into pool.
-
-Along with consumers, Spark pools the records fetched from Kafka separately, to let Kafka consumers stateless in point
-of Spark's view, and maximize the efficiency of pooling. It leverages same cache key with Kafka consumers pool.
-Note that it doesn't leverage Apache Commons Pool due to the difference of characteristics.
-
-The following properties are available to configure the fetched data pool:
-
-<table class="spark-config">
-<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
-<tr>
-  <td>spark.kafka.consumer.fetchedData.cache.timeout</td>
-  <td>5m (5 minutes)</td>
-  <td>The minimum amount of time a fetched data may sit idle in the pool before it is eligible for eviction by the evictor.</td>
-  <td>3.0.0</td>
-</tr>
-<tr>
-  <td>spark.kafka.consumer.fetchedData.cache.evictorThreadRunInterval</td>
-  <td>1m (1 minute)</td>
-  <td>The interval of time between runs of the idle evictor thread for fetched data pool. When non-positive, no idle evictor thread will be run.</td>
-  <td>3.0.0</td>
-</tr>
-</table>
-
-## Writing Data to Kafka
-
-Here, we describe the support for writing Streaming Queries and Batch Queries to Apache Kafka. Take note that
-Apache Kafka only supports at least once write semantics. Consequently, when writing---either Streaming Queries
-or Batch Queries---to Kafka, some records may be duplicated; this can happen, for example, if Kafka needs
-to retry a message that was not acknowledged by a Broker, even though that Broker received and wrote the message record.
-Structured Streaming cannot prevent such duplicates from occurring due to these Kafka write semantics. However,
-if writing the query is successful, then you can assume that the query output was written at least once. A possible
-solution to remove duplicates when reading the written data could be to introduce a primary (unique) key
-that can be used to perform de-duplication when reading.
-
-The Dataframe being written to Kafka should have the following columns in schema:
-<table>
-<thead><tr><th>Column</th><th>Type</th></tr></thead>
-<tr>
-  <td>key (optional)</td>
-  <td>string or binary</td>
-</tr>
-<tr>
-  <td>value (required)</td>
-  <td>string or binary</td>
-</tr>
-<tr>
-  <td>headers (optional)</td>
-  <td>array</td>
-</tr>
-<tr>
-  <td>topic (*optional)</td>
-  <td>string</td>
-</tr>
-<tr>
-  <td>partition (optional)</td>
-  <td>int</td>
-</tr>
-</table>
-\* The topic column is required if the "topic" configuration option is not specified.<br>
-
-The value column is the only required option. If a key column is not specified then
-a ```null``` valued key column will be automatically added (see Kafka semantics on
-how ```null``` valued key values are handled). If a topic column exists then its value
-is used as the topic when writing the given row to Kafka, unless the "topic" configuration
-option is set i.e., the "topic" configuration option overrides the topic column.
-If a "partition" column is not specified (or its value is ```null```)
-then the partition is calculated by the Kafka producer.
-A Kafka partitioner can be specified in Spark by setting the
-```kafka.partitioner.class``` option. If not present, Kafka default partitioner
-will be used.
-
-
-The following options must be set for the Kafka sink
-for both batch and streaming queries.
-
-<table>
-<thead><tr><th>Option</th><th>value</th><th>meaning</th></tr></thead>
-<tr>
-  <td>kafka.bootstrap.servers</td>
-  <td>A comma-separated list of host:port</td>
-  <td>The Kafka "bootstrap.servers" configuration.</td>
-</tr>
-</table>
-
-The following configurations are optional:
-
-<table>
-<thead><tr><th>Option</th><th>value</th><th>default</th><th>query type</th><th>meaning</th></tr></thead>
-<tr>
-  <td>topic</td>
-  <td>string</td>
-  <td>none</td>
-  <td>streaming and batch</td>
-  <td>Sets the topic that all rows will be written to in Kafka. This option overrides any
-  topic column that may exist in the data.</td>
-</tr>
-<tr>
-  <td>includeHeaders</td>
-  <td>boolean</td>
-  <td>false</td>
-  <td>streaming and batch</td>
-  <td>Whether to include the Kafka headers in the row.</td>
-</tr>
-</table>
-
-### Creating a Kafka Sink for Streaming Queries
-
-<div class="codetabs">
-
-<div data-lang="python" markdown="1">
-{% highlight python %}
-
-# Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-ds = df \
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .writeStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("topic", "topic1") \
-  .start()
-
-# Write key-value data from a DataFrame to Kafka using a topic specified in the data
-ds = df \
-  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .writeStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .start()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="scala" markdown="1">
-{% highlight scala %}
-
-// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-val ds = df
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .start()
-
-// Write key-value data from a DataFrame to Kafka using a topic specified in the data
-val ds = df
-  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .start()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-{% highlight java %}
-
-// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-StreamingQuery ds = df
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .start();
-
-// Write key-value data from a DataFrame to Kafka using a topic specified in the data
-StreamingQuery ds = df
-  .selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .start();
-
-{% endhighlight %}
-</div>
-
-</div>
-
-### Writing the output of Batch Queries to Kafka
-
-<div class="codetabs">
-
-<div data-lang="python" markdown="1">
-{% highlight python %}
-
-# Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .write \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("topic", "topic1") \
-  .save()
-
-# Write key-value data from a DataFrame to Kafka using a topic specified in the data
-df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .write \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .save()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="scala" markdown="1">
-{% highlight scala %}
-
-// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .save()
-
-// Write key-value data from a DataFrame to Kafka using a topic specified in the data
-df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .save()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-{% highlight java %}
-
-// Write key-value data from a DataFrame to a specific Kafka topic specified in an option
-df.selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .save();
-
-// Write key-value data from a DataFrame to Kafka using a topic specified in the data
-df.selectExpr("topic", "CAST(key AS STRING)", "CAST(value AS STRING)")
-  .write()
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .save();
-
-{% endhighlight %}
-</div>
-
-</div>
-
-### Producer Caching
-
-Given Kafka producer instance is designed to be thread-safe, Spark initializes a Kafka producer instance and co-use across tasks for same caching key.
-
-The caching key is built up from the following information:
-
-* Kafka producer configuration
-
-This includes configuration for authorization, which Spark will automatically include when delegation token is being used. Even we take authorization into account, you can expect same Kafka producer instance will be used among same Kafka producer configuration.
-It will use different Kafka producer when delegation token is renewed; Kafka producer instance for old delegation token will be evicted according to the cache policy.
-
-The following properties are available to configure the producer pool:
-
-<table class="spark-config">
-<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
-<tr>
-  <td>spark.kafka.producer.cache.timeout</td>
-  <td>10m (10 minutes)</td>
-  <td>The minimum amount of time a producer may sit idle in the pool before it is eligible for eviction by the evictor.</td>
-  <td>2.2.1</td>
-</tr>
-<tr>
-  <td>spark.kafka.producer.cache.evictorThreadRunInterval</td>
-  <td>1m (1 minute)</td>
-  <td>The interval of time between runs of the idle evictor thread for producer pool. When non-positive, no idle evictor thread will be run.</td>
-  <td>3.0.0</td>
-</tr>
-</table>
-
-Idle eviction thread periodically removes producers which are not used longer than given timeout. Note that the producer is shared and used concurrently, so the last used timestamp is determined by the moment the producer instance is returned and reference count is 0.
-
-## Kafka Specific Configurations
-
-Kafka's own configurations can be set via `DataStreamReader.option` with `kafka.` prefix, e.g,
-`stream.option("kafka.bootstrap.servers", "host:port")`. For possible kafka parameters, see
-[Kafka consumer config docs](http://kafka.apache.org/documentation.html#consumerconfigs) for
-parameters related to reading data, and [Kafka producer config docs](http://kafka.apache.org/documentation/#producerconfigs)
-for parameters related to writing data.
-
-Note that the following Kafka params cannot be set and the Kafka source or sink will throw an exception:
-
-- **group.id**: Kafka source will create a unique group id for each query automatically. The user can
-set the prefix of the automatically generated group.id's via the optional source option `groupIdPrefix`,
-default value is "spark-kafka-source". You can also set "kafka.group.id" to force Spark to use a special
-group id, however, please read warnings for this option and use it with caution.
-- **auto.offset.reset**: Set the source option `startingOffsets` to specify
- where to start instead. Structured Streaming manages which offsets are consumed internally, rather
- than rely on the kafka Consumer to do it. This will ensure that no data is missed when new
- topics/partitions are dynamically subscribed. Note that `startingOffsets` only applies when a new
- streaming query is started, and that resuming will always pick up from where the query left off. Note
- that when the offsets consumed by a streaming application no longer exist in Kafka (e.g., topics are deleted,
- offsets are out of range, or offsets are removed after retention period), the offsets will not be reset
- and the streaming application will see data loss. In extreme cases, for example the throughput of the
- streaming application cannot catch up the retention speed of Kafka, the input rows of a batch might be
- gradually reduced until zero when the offset ranges of the batch are completely not in Kafka. Enabling
- `failOnDataLoss` option can ask Structured Streaming to fail the query for such cases.
-- **key.deserializer**: Keys are always deserialized as byte arrays with ByteArrayDeserializer. Use
- DataFrame operations to explicitly deserialize the keys.
-- **value.deserializer**: Values are always deserialized as byte arrays with ByteArrayDeserializer.
- Use DataFrame operations to explicitly deserialize the values.
-- **key.serializer**: Keys are always serialized with ByteArraySerializer or StringSerializer. Use
-DataFrame operations to explicitly serialize the keys into either strings or byte arrays.
-- **value.serializer**: values are always serialized with ByteArraySerializer or StringSerializer. Use
-DataFrame operations to explicitly serialize the values into either strings or byte arrays.
-- **enable.auto.commit**: Kafka source doesn't commit any offset.
-- **interceptor.classes**: Kafka source always read keys and values as byte arrays. It's not safe to
- use ConsumerInterceptor as it may break the query.
-
-## Deploying
-
-As with any Spark applications, `spark-submit` is used to launch your application. `spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}`
-and its dependencies can be directly added to `spark-submit` using `--packages`, such as,
-
-    ./bin/spark-submit --packages org.apache.spark:spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
-
-For experimenting on `spark-shell`, you can also use `--packages` to add `spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}` and its dependencies directly,
-
-    ./bin/spark-shell --packages org.apache.spark:spark-sql-kafka-0-10_{{site.SCALA_BINARY_VERSION}}:{{site.SPARK_VERSION_SHORT}} ...
-
-See [Application Submission Guide](submitting-applications.html) for more details about submitting
-applications with external dependencies.
-
-## Security
-
-Kafka 0.9.0.0 introduced several features that increases security in a cluster. For detailed
-description about these possibilities, see [Kafka security docs](http://kafka.apache.org/documentation.html#security).
-
-It's worth noting that security is optional and turned off by default.
-
-Spark supports the following ways to authenticate against Kafka cluster:
-- **Delegation token (introduced in Kafka broker 1.1.0)**
-- **JAAS login configuration**
-
-### Delegation token
-
-This way the application can be configured via Spark parameters and may not need JAAS login
-configuration (Spark can use Kafka's dynamic JAAS configuration feature). For further information
-about delegation tokens, see [Kafka delegation token docs](http://kafka.apache.org/documentation/#security_delegation_token).
-
-The process is initiated by Spark's Kafka delegation token provider. When `spark.kafka.clusters.${cluster}.auth.bootstrap.servers` is set,
-Spark considers the following log in options, in order of preference:
-- **JAAS login configuration**, please see example below.
-- **Keytab file**, such as,
-
-      ./bin/spark-submit \
-          --keytab <KEYTAB_FILE> \
-          --principal <PRINCIPAL> \
-          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
-          ...
-
-- **Kerberos credential cache**, such as,
-
-      ./bin/spark-submit \
-          --conf spark.kafka.clusters.${cluster}.auth.bootstrap.servers=<KAFKA_SERVERS> \
-          ...
-
-The Kafka delegation token provider can be turned off by setting `spark.security.credentials.kafka.enabled` to `false` (default: `true`).
-
-Spark can be configured to use the following authentication protocols to obtain token (it must match with
-Kafka broker configuration):
-- **SASL SSL (default)**
-- **SSL**
-- **SASL PLAINTEXT (for testing)**
-
-After obtaining delegation token successfully, Spark distributes it across nodes and renews it accordingly.
-Delegation token uses `SCRAM` login module for authentication and because of that the appropriate
-`spark.kafka.clusters.${cluster}.sasl.token.mechanism` (default: `SCRAM-SHA-512`) has to be configured. Also, this parameter
-must match with Kafka broker configuration.
-
-When delegation token is available on an executor Spark considers the following log in options, in order of preference:
-- **JAAS login configuration**, please see example below.
-- **Delegation token**, please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code> parameter for further details.
-
-When none of the above applies then unsecure connection assumed.
-
-
-#### Configuration
-
-Delegation tokens can be obtained from multiple clusters and <code>${cluster}</code> is an arbitrary unique identifier which helps to group different configurations.
-
-<table class="spark-config">
-<thead><tr><th>Property Name</th><th>Default</th><th>Meaning</th><th>Since Version</th></tr></thead>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.auth.bootstrap.servers</code></td>
-    <td>None</td>
-    <td>
-      A list of coma separated host/port pairs to use for establishing the initial connection
-      to the Kafka cluster. For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code></td>
-    <td>.*</td>
-    <td>
-      Regular expression to match against the <code>bootstrap.servers</code> config for sources and sinks in the application.
-      If a server address matches this regex, the delegation token obtained from the respective bootstrap servers will be used when connecting.
-      If multiple clusters match the address, an exception will be thrown and the query won't be started.
-      Kafka's secure and unsecure listeners are bound to different ports. When both used the secure listener port has to be part of the regular expression.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.security.protocol</code></td>
-    <td>SASL_SSL</td>
-    <td>
-      Protocol used to communicate with brokers. For further details please see Kafka documentation. Protocol is applied on all the sources and sinks as default where
-      <code>bootstrap.servers</code> config matches (for further details please see <code>spark.kafka.clusters.${cluster}.target.bootstrap.servers.regex</code>),
-      and can be overridden by setting <code>kafka.security.protocol</code> on the source or sink.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.sasl.kerberos.service.name</code></td>
-    <td>kafka</td>
-    <td>
-      The Kerberos principal name that Kafka runs as. This can be defined either in Kafka's JAAS config or in Kafka's config.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.type</code></td>
-    <td>None</td>
-    <td>
-      The file format of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.2.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code></td>
-    <td>None</td>
-    <td>
-      The location of the trust store file. For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.truststore.password</code></td>
-    <td>None</td>
-    <td>
-      The store password for the trust store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.truststore.location</code> is configured.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.type</code></td>
-    <td>None</td>
-    <td>
-      The file format of the key store file. This is optional for client.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.2.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code></td>
-    <td>None</td>
-    <td>
-      The location of the key store file. This is optional for client and can be used for two-way authentication for client.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.keystore.password</code></td>
-    <td>None</td>
-    <td>
-      The store password for the key store file. This is optional and only needed if <code>spark.kafka.clusters.${cluster}.ssl.keystore.location</code> is configured.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.ssl.key.password</code></td>
-    <td>None</td>
-    <td>
-      The password of the private key in the key store file. This is optional for client.
-      For further details please see Kafka documentation. Only used to obtain delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-  <tr>
-    <td><code>spark.kafka.clusters.${cluster}.sasl.token.mechanism</code></td>
-    <td>SCRAM-SHA-512</td>
-    <td>
-      SASL mechanism used for client connections with delegation token. Because SCRAM login module used for authentication a compatible mechanism has to be set here.
-      For further details please see Kafka documentation (<code>sasl.mechanism</code>). Only used to authenticate against Kafka broker with delegation token.
-    </td>
-    <td>3.0.0</td>
-  </tr>
-</table>
-
-#### Kafka Specific Configurations
-
-Kafka's own configurations can be set with `kafka.` prefix, e.g, `--conf spark.kafka.clusters.${cluster}.kafka.retries=1`.
-For possible Kafka parameters, see [Kafka adminclient config docs](http://kafka.apache.org/documentation.html#adminclientconfigs).
-
-#### Caveats
-
-- Obtaining delegation token for proxy user is not yet supported ([KAFKA-6945](https://issues.apache.org/jira/browse/KAFKA-6945)).
-
-### JAAS login configuration
-
-JAAS login configuration must placed on all nodes where Spark tries to access Kafka cluster.
-This provides the possibility to apply any custom authentication logic with a higher cost to maintain.
-This can be done several ways. One possibility is to provide additional JVM parameters, such as,
-
-    ./bin/spark-submit \
-        --driver-java-options "-Djava.security.auth.login.config=/path/to/custom_jaas.conf" \
-        --conf spark.executor.extraJavaOptions=-Djava.security.auth.login.config=/path/to/custom_jaas.conf \
-        ...
+This page has moved [here](./streaming/structured-streaming-kafka-integration.html).
diff --git a/docs/structured-streaming-programming-guide.md b/docs/structured-streaming-programming-guide.md
index fabe7f17b78b3..661edf67bf450 100644
--- a/docs/structured-streaming-programming-guide.md
+++ b/docs/structured-streaming-programming-guide.md
@@ -19,4270 +19,4 @@ license: |
   limitations under the License.
 ---
 
-* This will become a table of contents (this text will be scraped).
-{:toc}
-
-# Overview
-Structured Streaming is a scalable and fault-tolerant stream processing engine built on the Spark SQL engine. You can express your streaming computation the same way you would express a batch computation on static data. The Spark SQL engine will take care of running it incrementally and continuously and updating the final result as streaming data continues to arrive. You can use the [Dataset/DataFrame API](sql-programming-guide.html) in Scala, Java, Python or R to express streaming aggregations, event-time windows, stream-to-batch joins, etc. The computation is executed on the same optimized Spark SQL engine. Finally, the system ensures end-to-end exactly-once fault-tolerance guarantees through checkpointing and Write-Ahead Logs. In short, *Structured Streaming provides fast, scalable, fault-tolerant, end-to-end exactly-once stream processing without the user having to reason about streaming.*
-
-Internally, by default, Structured Streaming queries are processed using a *micro-batch processing* engine, which processes data streams as a series of small batch jobs thereby achieving end-to-end latencies as low as 100 milliseconds and exactly-once fault-tolerance guarantees. However, since Spark 2.3, we have introduced a new low-latency processing mode called **Continuous Processing**, which can achieve end-to-end latencies as low as 1 millisecond with at-least-once guarantees. Without changing the Dataset/DataFrame operations in your queries, you will be able to choose the mode based on your application requirements.
-
-In this guide, we are going to walk you through the programming model and the APIs. We are going to explain the concepts mostly using the default micro-batch processing model, and then [later](#continuous-processing) discuss Continuous Processing model. First, let's start with a simple example of a Structured Streaming query - a streaming word count.
-
-# Quick Example
-Let’s say you want to maintain a running word count of text data received from a data server listening on a TCP socket. Let’s see how you can express this using Structured Streaming. You can see the full code in
-[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount.py)/[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCount.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCount.java)/[R]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming/structured_network_wordcount.R).
-And if you [download Spark](https://spark.apache.org/downloads.html), you can directly [run the example](index.html#running-the-examples-and-shell). In any case, let’s walk through the example step-by-step and understand how it works. First, we have to import the necessary classes and create a local SparkSession, the starting point of all functionalities related to Spark.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-from pyspark.sql import SparkSession
-from pyspark.sql.functions import explode
-from pyspark.sql.functions import split
-
-spark = SparkSession \
-    .builder \
-    .appName("StructuredNetworkWordCount") \
-    .getOrCreate()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import org.apache.spark.sql.functions._
-import org.apache.spark.sql.SparkSession
-
-val spark = SparkSession
-  .builder
-  .appName("StructuredNetworkWordCount")
-  .getOrCreate()
-
-import spark.implicits._
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.sql.*;
-import org.apache.spark.sql.streaming.StreamingQuery;
-
-import java.util.Arrays;
-import java.util.Iterator;
-
-SparkSession spark = SparkSession
-  .builder()
-  .appName("JavaStructuredNetworkWordCount")
-  .getOrCreate();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-sparkR.session(appName = "StructuredNetworkWordCount")
-{% endhighlight %}
-
-</div>
-</div>
-
-Next, let’s create a streaming DataFrame that represents text data received from a server listening on localhost:9999, and transform the DataFrame to calculate word counts.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-# Create DataFrame representing the stream of input lines from connection to localhost:9999
-lines = spark \
-    .readStream \
-    .format("socket") \
-    .option("host", "localhost") \
-    .option("port", 9999) \
-    .load()
-
-# Split the lines into words
-words = lines.select(
-   explode(
-       split(lines.value, " ")
-   ).alias("word")
-)
-
-# Generate running word count
-wordCounts = words.groupBy("word").count()
-{% endhighlight %}
-
-This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have used two built-in SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we use the function `alias` to name the new column as "word". Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-// Create DataFrame representing the stream of input lines from connection to localhost:9999
-val lines = spark.readStream
-  .format("socket")
-  .option("host", "localhost")
-  .option("port", 9999)
-  .load()
-
-// Split the lines into words
-val words = lines.as[String].flatMap(_.split(" "))
-
-// Generate running word count
-val wordCounts = words.groupBy("value").count()
-{% endhighlight %}
-
-This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have converted the DataFrame to a  Dataset of String using `.as[String]`, so that we can apply the `flatMap` operation to split each line into multiple words. The resultant `words` Dataset contains all the words. Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-// Create DataFrame representing the stream of input lines from connection to localhost:9999
-Dataset<Row> lines = spark
-  .readStream()
-  .format("socket")
-  .option("host", "localhost")
-  .option("port", 9999)
-  .load();
-
-// Split the lines into words
-Dataset<String> words = lines
-  .as(Encoders.STRING())
-  .flatMap((FlatMapFunction<String, String>) x -> Arrays.asList(x.split(" ")).iterator(), Encoders.STRING());
-
-// Generate running word count
-Dataset<Row> wordCounts = words.groupBy("value").count();
-{% endhighlight %}
-
-This `lines` DataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have converted the DataFrame to a  Dataset of String using `.as(Encoders.STRING())`, so that we can apply the `flatMap` operation to split each line into multiple words. The resultant `words` Dataset contains all the words. Finally, we have defined the `wordCounts` DataFrame by grouping by the unique values in the Dataset and counting them. Note that this is a streaming DataFrame which represents the running word counts of the stream.
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-# Create DataFrame representing the stream of input lines from connection to localhost:9999
-lines <- read.stream("socket", host = "localhost", port = 9999)
-
-# Split the lines into words
-words <- selectExpr(lines, "explode(split(value, ' ')) as word")
-
-# Generate running word count
-wordCounts <- count(group_by(words, "word"))
-{% endhighlight %}
-
-This `lines` SparkDataFrame represents an unbounded table containing the streaming text data. This table contains one column of strings named "value", and each line in the streaming text data becomes a row in the table. Note, that this is not currently receiving any data as we are just setting up the transformation, and have not yet started it. Next, we have a SQL expression with two SQL functions - split and explode, to split each line into multiple rows with a word each. In addition, we name the new column as "word". Finally, we have defined the `wordCounts` SparkDataFrame by grouping by the unique values in the SparkDataFrame and counting them. Note that this is a streaming SparkDataFrame which represents the running word counts of the stream.
-
-</div>
-
-</div>
-
-We have now set up the query on the streaming data. All that is left is to actually start receiving data and computing the counts. To do this, we set it up to print the complete set of counts (specified by `outputMode("complete")`) to the console every time they are updated. And then start the streaming computation using `start()`.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
- # Start running the query that prints the running counts to the console
-query = wordCounts \
-    .writeStream \
-    .outputMode("complete") \
-    .format("console") \
-    .start()
-
-query.awaitTermination()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-// Start running the query that prints the running counts to the console
-val query = wordCounts.writeStream
-  .outputMode("complete")
-  .format("console")
-  .start()
-
-query.awaitTermination()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-// Start running the query that prints the running counts to the console
-StreamingQuery query = wordCounts.writeStream()
-  .outputMode("complete")
-  .format("console")
-  .start();
-
-query.awaitTermination();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-# Start running the query that prints the running counts to the console
-query <- write.stream(wordCounts, "console", outputMode = "complete")
-
-awaitTermination(query)
-{% endhighlight %}
-
-</div>
-
-</div>
-
-After this code is executed, the streaming computation will have started in the background. The `query` object is a handle to that active streaming query, and we have decided to wait for the termination of the query using `awaitTermination()` to prevent the process from exiting while the query is active.
-
-To actually execute this example code, you can either compile the code in your own
-[Spark application](quick-start.html#self-contained-applications), or simply
-[run the example](index.html#running-the-examples-and-shell) once you have downloaded Spark. We are showing the latter. You will first need to run Netcat (a small utility found in most Unix-like systems) as a data server by using
-
-
-    $ nc -lk 9999
-
-Then, in a different terminal, you can start the example by using
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight bash %}
-$ ./bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py localhost 9999
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-{% highlight bash %}
-$ ./bin/run-example org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount localhost 9999
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight bash %}
-$ ./bin/run-example org.apache.spark.examples.sql.streaming.JavaStructuredNetworkWordCount localhost 9999
-{% endhighlight %}
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight bash %}
-$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
-{% endhighlight %}
-</div>
-
-</div>
-
-Then, any lines typed in the terminal running the netcat server will be counted and printed on screen every second. It will look something like the following.
-
-<table width="100%">
-    <td>
-{% highlight bash %}
-# TERMINAL 1:
-# Running Netcat
-
-$ nc -lk 9999
-apache spark
-apache hadoop
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-...
-{% endhighlight %}
-    </td>
-    <td width="2%"></td>
-    <td>
-<div class="codetabs">
-
-<div data-lang="python" markdown="1">
-{% highlight bash %}
-# TERMINAL 2: RUNNING structured_network_wordcount.py
-
-$ ./bin/spark-submit examples/src/main/python/sql/streaming/structured_network_wordcount.py localhost 9999
-
--------------------------------------------
-Batch: 0
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    1|
-| spark|    1|
-+------+-----+
-
--------------------------------------------
-Batch: 1
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    2|
-| spark|    1|
-|hadoop|    1|
-+------+-----+
-...
-{% endhighlight %}
-</div>
-
-<div data-lang="scala" markdown="1">
-{% highlight bash %}
-# TERMINAL 2: RUNNING StructuredNetworkWordCount
-
-$ ./bin/run-example org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount localhost 9999
-
--------------------------------------------
-Batch: 0
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    1|
-| spark|    1|
-+------+-----+
-
--------------------------------------------
-Batch: 1
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    2|
-| spark|    1|
-|hadoop|    1|
-+------+-----+
-...
-{% endhighlight %}
-</div>
-
-<div data-lang="java" markdown="1">
-{% highlight bash %}
-# TERMINAL 2: RUNNING JavaStructuredNetworkWordCount
-
-$ ./bin/run-example org.apache.spark.examples.sql.streaming.JavaStructuredNetworkWordCount localhost 9999
-
--------------------------------------------
-Batch: 0
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    1|
-| spark|    1|
-+------+-----+
-
--------------------------------------------
-Batch: 1
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    2|
-| spark|    1|
-|hadoop|    1|
-+------+-----+
-...
-{% endhighlight %}
-</div>
-
-<div data-lang="r" markdown="1">
-{% highlight bash %}
-# TERMINAL 2: RUNNING structured_network_wordcount.R
-
-$ ./bin/spark-submit examples/src/main/r/streaming/structured_network_wordcount.R localhost 9999
-
--------------------------------------------
-Batch: 0
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    1|
-| spark|    1|
-+------+-----+
-
--------------------------------------------
-Batch: 1
--------------------------------------------
-+------+-----+
-| value|count|
-+------+-----+
-|apache|    2|
-| spark|    1|
-|hadoop|    1|
-+------+-----+
-...
-{% endhighlight %}
-</div>
-</div>
-    </td>
-</table>
-
-
-# Programming Model
-
-The key idea in Structured Streaming is to treat a live data stream as a
-table that is being continuously appended. This leads to a new stream
-processing model that is very similar to a batch processing model. You will
-express your streaming computation as standard batch-like query as on a static
-table, and Spark runs it as an *incremental* query on the *unbounded* input
-table. Let’s understand this model in more detail.
-
-## Basic Concepts
-Consider the input data stream as the "Input Table". Every data item that is
-arriving on the stream is like a new row being appended to the Input Table.
-
-![Stream as a Table](img/structured-streaming-stream-as-a-table.png "Stream as a Table")
-
-A query on the input will generate the "Result Table". Every trigger interval (say, every 1 second), new rows get appended to the Input Table, which eventually updates the Result Table. Whenever the result table gets updated, we would want to write the changed result rows to an external sink.
-
-![Model](img/structured-streaming-model.png)
-
-The "Output" is defined as what gets written out to the external storage. The output can be defined in a different mode:
-
-  - *Complete Mode* - The entire updated Result Table will be written to the external storage. It is up to the storage connector to decide how to handle writing of the entire table.
-
-  - *Append Mode* - Only the new rows appended in the Result Table since the last trigger will be written to the external storage. This is applicable only on the queries where existing rows in the Result Table are not expected to change.
-
-  - *Update Mode* - Only the rows that were updated in the Result Table since the last trigger will be written to the external storage (available since Spark 2.1.1). Note that this is different from the Complete Mode in that this mode only outputs the rows that have changed since the last trigger. If the query doesn't contain aggregations, it will be equivalent to Append mode.
-
-Note that each mode is applicable on certain types of queries. This is discussed in detail [later](#output-modes).
-
-To illustrate the use of this model, let’s understand the model in context of
-the [Quick Example](#quick-example) above. The first `lines` DataFrame is the input table, and
-the final `wordCounts` DataFrame is the result table. Note that the query on
-streaming `lines` DataFrame to generate `wordCounts` is *exactly the same* as
-it would be a static DataFrame. However, when this query is started, Spark
-will continuously check for new data from the socket connection. If there is
-new data, Spark will run an "incremental" query that combines the previous
-running counts with the new data to compute updated counts, as shown below.
-
-![Model](img/structured-streaming-example-model.png)
-
-**Note that Structured Streaming does not materialize the entire table**. It reads the latest
-available data from the streaming data source, processes it incrementally to update the result,
-and then discards the source data. It only keeps around the minimal intermediate *state* data as
-required to update the result (e.g. intermediate counts in the earlier example).
-
-This model is significantly different from many other stream processing
-engines. Many streaming systems require the user to maintain running
-aggregations themselves, thus having to reason about fault-tolerance, and
-data consistency (at-least-once, or at-most-once, or exactly-once). In this
-model, Spark is responsible for updating the Result Table when there is new
-data, thus relieving the users from reasoning about it. As an example, let’s
-see how this model handles event-time based processing and late arriving data.
-
-## Handling Event-time and Late Data
-Event-time is the time embedded in the data itself. For many applications, you may want to operate on this event-time. For example, if you want to get the number of events generated by IoT devices every minute, then you probably want to use the time when the data was generated (that is, event-time in the data), rather than the time Spark receives them. This event-time is very naturally expressed in this model -- each event from the devices is a row in the table, and event-time is a column value in the row. This allows window-based aggregations (e.g. number of events every minute) to be just a special type of grouping and aggregation on the event-time column -- each time window is a group and each row can belong to multiple windows/groups. Therefore, such event-time-window-based aggregation queries can be defined consistently on both a static dataset (e.g. from collected device events logs) as well as on a data stream, making the life of the user much easier.
-
-Furthermore, this model naturally handles data that has arrived later than
-expected based on its event-time. Since Spark is updating the Result Table,
-it has full control over updating old aggregates when there is late data,
-as well as cleaning up old aggregates to limit the size of intermediate
-state data. Since Spark 2.1, we have support for watermarking which
-allows the user to specify the threshold of late data, and allows the engine
-to accordingly clean up old state. These are explained later in more
-detail in the [Window Operations](#window-operations-on-event-time) section.
-
-## Fault Tolerance Semantics
-Delivering end-to-end exactly-once semantics was one of key goals behind the design of Structured Streaming. To achieve that, we have designed the Structured Streaming sources, the sinks and the execution engine to reliably track the exact progress of the processing so that it can handle any kind of failure by restarting and/or reprocessing. Every streaming source is assumed to have offsets (similar to Kafka offsets, or Kinesis sequence numbers)
-to track the read position in the stream. The engine uses checkpointing and write-ahead logs to record the offset range of the data being processed in each trigger. The streaming sinks are designed to be idempotent for handling reprocessing. Together, using replayable sources and idempotent sinks, Structured Streaming can ensure **end-to-end exactly-once semantics** under any failure.
-
-# API using Datasets and DataFrames
-Since Spark 2.0, DataFrames and Datasets can represent static, bounded data, as well as streaming, unbounded data. Similar to static Datasets/DataFrames, you can use the common entry point `SparkSession`
-([Python](api/python/reference/pyspark.sql/api/pyspark.sql.SparkSession.html#pyspark.sql.SparkSession)/[Scala](api/scala/org/apache/spark/sql/SparkSession.html)/[Java](api/java/org/apache/spark/sql/SparkSession.html)/[R](api/R/reference/sparkR.session.html) docs)
-to create streaming DataFrames/Datasets from streaming sources, and apply the same operations on them as static DataFrames/Datasets. If you are not familiar with Datasets/DataFrames, you are strongly advised to familiarize yourself with them using the
-[DataFrame/Dataset Programming Guide](sql-programming-guide.html).
-
-## Creating streaming DataFrames and streaming Datasets
-Streaming DataFrames can be created through the `DataStreamReader` interface
-([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader)/[Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html) docs)
-returned by `SparkSession.readStream()`. In [R](api/R/reference/read.stream.html), with the `read.stream()` method. Similar to the read interface for creating static DataFrame, you can specify the details of the source – data format, schema, options, etc.
-
-#### Input Sources
-There are a few built-in sources.
-
-  - **File source** - Reads files written in a directory as a stream of data. Files will be processed in the order of file modification time. If `latestFirst` is set, order will be reversed. Supported file formats are text, CSV, JSON, ORC, Parquet. See the docs of the DataStreamReader interface for a more up-to-date list, and supported options for each file format. Note that the files must be atomically placed in the given directory, which in most file systems, can be achieved by file move operations.
-  - **Kafka source** - Reads data from Kafka. It's compatible with Kafka broker versions 0.10.0 or higher. See the [Kafka Integration Guide](structured-streaming-kafka-integration.html) for more details.
-
-  - **Socket source (for testing)** - Reads UTF8 text data from a socket connection. The listening server socket is at the driver. Note that this should be used only for testing as this does not provide end-to-end fault-tolerance guarantees.
-
-  - **Rate source (for testing)** - Generates data at the specified number of rows per second, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. This source is intended for testing and benchmarking.
-
-  - **Rate Per Micro-Batch source (for testing)** - Generates data at the specified number of rows per micro-batch, each output row contains a `timestamp` and `value`. Where `timestamp` is a `Timestamp` type containing the time of message dispatch, and `value` is of `Long` type containing the message count, starting from 0 as the first row. Unlike `rate` data source, this data source provides a consistent set of input rows per micro-batch regardless of query execution (configuration of trigger, query being lagging, etc.), say, batch 0 will produce 0~999 and batch 1 will produce 1000~1999, and so on. Same applies to the generated time. This source is intended for testing and benchmarking.
-
-Some sources are not fault-tolerant because they do not guarantee that data can be replayed using
-checkpointed offsets after a failure. See the earlier section on
-[fault-tolerance semantics](#fault-tolerance-semantics).
-Here are the details of all the sources in Spark.
-
-<table>
-  <thead>
-  <tr>
-    <th>Source</th>
-    <th>Options</th>
-    <th>Fault-tolerant</th>
-    <th>Notes</th>
-  </tr>
-  </thead>
-  <tr>
-    <td><b>File source</b></td>
-    <td>
-        <code>path</code>: path to the input directory, and common to all file formats.
-        <br/>
-        <code>maxFilesPerTrigger</code>: maximum number of new files to be considered in every trigger (default: no max)
-        <br/>
-        <code>maxBytesPerTrigger</code>: maximum total size of new files to be considered in every trigger (default: no max). <code>maxBytesPerTrigger</code> and <code>maxFilesPerTrigger</code> can't both be set at the same time, only one of two must be chosen. Note that a stream always reads at least one file so it can make progress and not get stuck on a file larger than a given maximum.
-        <br/>
-        <code>latestFirst</code>: whether to process the latest new files first, useful when there is a large backlog of files (default: false)
-        <br/>
-        <code>fileNameOnly</code>: whether to check new files based on only the filename instead of on the full path (default: false). With this set to `true`, the following files would be considered as the same file, because their filenames, "dataset.txt", are the same:
-        <br/>
-        "file:///dataset.txt"<br/>
-        "s3://a/dataset.txt"<br/>
-        "s3n://a/b/dataset.txt"<br/>
-        "s3a://a/b/c/dataset.txt"
-        <br/>
-        <code>maxFileAge</code>: Maximum age of a file that can be found in this directory, before it is ignored. For the first batch all files will be considered valid. If <code>latestFirst</code> is set to `true` and <code>maxFilesPerTrigger</code> or <code>maxBytesPerTrigger</code> is set, then this parameter will be ignored, because old files that are valid, and should be processed, may be ignored. The max age is specified with respect to the timestamp of the latest file, and not the timestamp of the current system.(default: 1 week)
-        <br/>
-        <code>maxCachedFiles</code>: maximum number of files to cache to be processed in subsequent batches (default: 10000).  If files are available in the cache, they will be read from first before listing from the input source.
-        <br/>
-        <code>discardCachedInputRatio</code>: ratio of cached files/bytes to max files/bytes to allow for listing from input source when there is less cached input than could be available to be read (default: 0.2).  For example, if there are only 10 cached files remaining for a batch but the <code>maxFilesPerTrigger</code> is set to 100, the 10 cached files would be discarded and a new listing would be performed instead. Similarly, if there are cached files that are 10 MB remaining for a batch, but the <code>maxBytesPerTrigger</code> is set to 100MB, the cached files would be discarded.
-        <br/>
-        <code>cleanSource</code>: option to clean up completed files after processing.<br/>
-        Available options are "archive", "delete", "off". If the option is not provided, the default value is "off".<br/>
-        When "archive" is provided, additional option <code>sourceArchiveDir</code> must be provided as well. The value of "sourceArchiveDir" must not match with source pattern in depth (the number of directories from the root directory), where the depth is minimum of depth on both paths. This will ensure archived files are never included as new source files.<br/>
-        For example, suppose you provide '/hello?/spark/*' as source pattern, '/hello1/spark/archive/dir' cannot be used as the value of "sourceArchiveDir", as '/hello?/spark/*' and '/hello1/spark/archive' will be matched. '/hello1/spark' cannot be also used as the value of "sourceArchiveDir", as '/hello?/spark' and '/hello1/spark' will be matched. '/archived/here' would be OK as it doesn't match.<br/>
-        Spark will move source files respecting their own path. For example, if the path of source file is <code>/a/b/dataset.txt</code> and the path of archive directory is <code>/archived/here</code>, file will be moved to <code>/archived/here/a/b/dataset.txt</code>.<br/>
-        NOTE: Both archiving (via moving) or deleting completed files will introduce overhead (slow down, even if it's happening in separate thread) in each micro-batch, so you need to understand the cost for each operation in your file system before enabling this option. On the other hand, enabling this option will reduce the cost to list source files which can be an expensive operation.<br/>
-        Number of threads used in completed file cleaner can be configured with <code>spark.sql.streaming.fileSource.cleaner.numThreads</code> (default: 1).<br/>
-        NOTE 2: The source path should not be used from multiple sources or queries when enabling this option. Similarly, you must ensure the source path doesn't match to any files in output directory of file stream sink.<br/>
-        NOTE 3: Both delete and move actions are best effort. Failing to delete or move files will not fail the streaming query. Spark may not clean up some source files in some circumstances - e.g. the application doesn't shut down gracefully, too many files are queued to clean up.
-        <br/><br/>
-        For file-format-specific options, see the related methods in <code>DataStreamReader</code>
-        (<a href="api/python/reference/pyspark.sql/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader">Python</a>/<a href="api/scala/org/apache/spark/sql/streaming/DataStreamReader.html">Scala</a>/<a href="api/java/org/apache/spark/sql/streaming/DataStreamReader.html">Java</a>/<a
-        href="api/R/read.stream.html">R</a>).
-        E.g. for "parquet" format options see <code>DataStreamReader.parquet()</code>.
-        <br/><br/>
-        In addition, there are session configurations that affect certain file-formats. See the <a href="sql-programming-guide.html">SQL Programming Guide</a> for more details. E.g., for "parquet", see <a href="sql-data-sources-parquet.html#configuration">Parquet configuration</a> section.
-        </td>
-    <td>Yes</td>
-    <td>Supports glob paths, but does not support multiple comma-separated paths/globs.</td>
-  </tr>
-  <tr>
-    <td><b>Socket Source</b></td>
-    <td>
-        <code>host</code>: host to connect to, must be specified<br/>
-        <code>port</code>: port to connect to, must be specified
-    </td>
-    <td>No</td>
-    <td></td>
-  </tr>
-  <tr>
-    <td><b>Rate Source</b></td>
-    <td>
-        <code>rowsPerSecond</code> (e.g. 100, default: 1): How many rows should be generated per second.<br/><br/>
-        <code>rampUpTime</code> (e.g. 5s, default: 0s): How long to ramp up before the generating speed becomes <code>rowsPerSecond</code>. Using finer granularities than seconds will be truncated to integer seconds. <br/><br/>
-        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
-
-        The source will try its best to reach <code>rowsPerSecond</code>, but the query may be resource constrained, and <code>numPartitions</code> can be tweaked to help reach the desired speed.
-    </td>
-    <td>Yes</td>
-    <td></td>
-  </tr>
-  <tr>
-    <td><b>Rate Per Micro-Batch Source</b> (format: <b>rate-micro-batch</b>)</td>
-    <td>
-        <code>rowsPerBatch</code> (e.g. 100): How many rows should be generated per micro-batch.<br/><br/>
-        <code>numPartitions</code> (e.g. 10, default: Spark's default parallelism): The partition number for the generated rows. <br/><br/>
-        <code>startTimestamp</code> (e.g. 1000, default: 0): starting value of generated time. <br/><br/>
-        <code>advanceMillisPerBatch</code> (e.g. 1000, default: 1000): the amount of time being advanced in generated time on each micro-batch. <br/><br/>
-    </td>
-    <td>Yes</td>
-    <td></td>
-  </tr>
-
-  <tr>
-    <td><b>Kafka Source</b></td>
-    <td>
-        See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a>.
-    </td>
-    <td>Yes</td>
-    <td></td>
-  </tr>
-  <tr>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-</table>
-
-Here are some examples.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-spark = SparkSession. ...
-
-# Read text from socket
-socketDF = spark \
-    .readStream \
-    .format("socket") \
-    .option("host", "localhost") \
-    .option("port", 9999) \
-    .load()
-
-socketDF.isStreaming()    # Returns True for DataFrames that have streaming sources
-
-socketDF.printSchema()
-
-# Read all the csv files written atomically in a directory
-userSchema = StructType().add("name", "string").add("age", "integer")
-csvDF = spark \
-    .readStream \
-    .option("sep", ";") \
-    .schema(userSchema) \
-    .csv("/path/to/directory")  # Equivalent to format("csv").load("/path/to/directory")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val spark: SparkSession = ...
-
-// Read text from socket
-val socketDF = spark
-  .readStream
-  .format("socket")
-  .option("host", "localhost")
-  .option("port", 9999)
-  .load()
-
-socketDF.isStreaming    // Returns True for DataFrames that have streaming sources
-
-socketDF.printSchema
-
-// Read all the csv files written atomically in a directory
-val userSchema = new StructType().add("name", "string").add("age", "integer")
-val csvDF = spark
-  .readStream
-  .option("sep", ";")
-  .schema(userSchema)      // Specify schema of the csv files
-  .csv("/path/to/directory")    // Equivalent to format("csv").load("/path/to/directory")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-SparkSession spark = ...
-
-// Read text from socket
-Dataset<Row> socketDF = spark
-  .readStream()
-  .format("socket")
-  .option("host", "localhost")
-  .option("port", 9999)
-  .load();
-
-socketDF.isStreaming();    // Returns True for DataFrames that have streaming sources
-
-socketDF.printSchema();
-
-// Read all the csv files written atomically in a directory
-StructType userSchema = new StructType().add("name", "string").add("age", "integer");
-Dataset<Row> csvDF = spark
-  .readStream()
-  .option("sep", ";")
-  .schema(userSchema)      // Specify schema of the csv files
-  .csv("/path/to/directory");    // Equivalent to format("csv").load("/path/to/directory")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-sparkR.session(...)
-
-# Read text from socket
-socketDF <- read.stream("socket", host = hostname, port = port)
-
-isStreaming(socketDF)    # Returns TRUE for SparkDataFrames that have streaming sources
-
-printSchema(socketDF)
-
-# Read all the csv files written atomically in a directory
-schema <- structType(structField("name", "string"),
-                     structField("age", "integer"))
-csvDF <- read.stream("csv", path = "/path/to/directory", schema = schema, sep = ";")
-{% endhighlight %}
-
-</div>
-</div>
-
-These examples generate streaming DataFrames that are untyped, meaning that the schema of the DataFrame is not checked at compile time, only checked at runtime when the query is submitted. Some operations like `map`, `flatMap`, etc. need the type to be known at compile time. To do those, you can convert these untyped streaming DataFrames to typed streaming Datasets using the same methods as static DataFrame. See the [SQL Programming Guide](sql-programming-guide.html) for more details. Additionally, more details on the supported streaming sources are discussed later in the document.
-
-Since Spark 3.1, you can also create streaming DataFrames from tables with `DataStreamReader.table()`. See [Streaming Table APIs](#streaming-table-apis) for more details.
-
-### Schema inference and partition of streaming DataFrames/Datasets
-
-By default, Structured Streaming from file based sources requires you to specify the schema, rather than rely on Spark to infer it automatically. This restriction ensures a consistent schema will be used for the streaming query, even in the case of failures. For ad-hoc use cases, you can reenable schema inference by setting `spark.sql.streaming.schemaInference` to `true`.
-
-Partition discovery does occur when subdirectories that are named `/key=value/` are present and listing will automatically recurse into these directories. If these columns appear in the user-provided schema, they will be filled in by Spark based on the path of the file being read. The directories that make up the partitioning scheme must be present when the query starts and must remain static. For example, it is okay to add `/data/year=2016/` when `/data/year=2015/` was present, but it is invalid to change the partitioning column (i.e. by creating the directory `/data/date=2016-04-17/`).
-
-## Operations on streaming DataFrames/Datasets
-You can apply all kinds of operations on streaming DataFrames/Datasets – ranging from untyped, SQL-like operations (e.g. `select`, `where`, `groupBy`), to typed RDD-like operations (e.g. `map`, `filter`, `flatMap`). See the [SQL programming guide](sql-programming-guide.html) for more details. Let’s take a look at a few example operations that you can use.
-
-### Basic Operations - Selection, Projection, Aggregation
-Most of the common operations on DataFrame/Dataset are supported for streaming. The few operations that are not supported are [discussed later](#unsupported-operations) in this section.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-df = ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
-
-# Select the devices which have signal more than 10
-df.select("device").where("signal > 10")
-
-# Running count of the number of updates for each device type
-df.groupBy("deviceType").count()
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-case class DeviceData(device: String, deviceType: String, signal: Double, time: DateTime)
-
-val df: DataFrame = ... // streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: string }
-val ds: Dataset[DeviceData] = df.as[DeviceData]    // streaming Dataset with IOT device data
-
-// Select the devices which have signal more than 10
-df.select("device").where("signal > 10")      // using untyped APIs
-ds.filter(_.signal > 10).map(_.device)         // using typed APIs
-
-// Running count of the number of updates for each device type
-df.groupBy("deviceType").count()                          // using untyped API
-
-// Running average signal for each device type
-import org.apache.spark.sql.expressions.scalalang.typed
-ds.groupByKey(_.deviceType).agg(typed.avg(_.signal))    // using typed API
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-import org.apache.spark.api.java.function.*;
-import org.apache.spark.sql.*;
-import org.apache.spark.sql.expressions.javalang.typed;
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder;
-
-public class DeviceData {
-  private String device;
-  private String deviceType;
-  private Double signal;
-  private java.sql.Date time;
-  ...
-  // Getter and setter methods for each field
-}
-
-Dataset<Row> df = ...;    // streaming DataFrame with IOT device data with schema { device: string, type: string, signal: double, time: DateType }
-Dataset<DeviceData> ds = df.as(ExpressionEncoder.javaBean(DeviceData.class)); // streaming Dataset with IOT device data
-
-// Select the devices which have signal more than 10
-df.select("device").where("signal > 10"); // using untyped APIs
-ds.filter((FilterFunction<DeviceData>) value -> value.getSignal() > 10)
-  .map((MapFunction<DeviceData, String>) value -> value.getDevice(), Encoders.STRING());
-
-// Running count of the number of updates for each device type
-df.groupBy("deviceType").count(); // using untyped API
-
-// Running average signal for each device type
-ds.groupByKey((MapFunction<DeviceData, String>) value -> value.getDeviceType(), Encoders.STRING())
-  .agg(typed.avg((MapFunction<DeviceData, Double>) value -> value.getSignal()));
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-df <- ...  # streaming DataFrame with IOT device data with schema { device: string, deviceType: string, signal: double, time: DateType }
-
-# Select the devices which have signal more than 10
-select(where(df, "signal > 10"), "device")
-
-# Running count of the number of updates for each device type
-count(groupBy(df, "deviceType"))
-{% endhighlight %}
-</div>
-
-</div>
-
-You can also register a streaming DataFrame/Dataset as a temporary view and then apply SQL commands on it.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-df.createOrReplaceTempView("updates")
-spark.sql("select count(*) from updates")  # returns another streaming DF
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-df.createOrReplaceTempView("updates")
-spark.sql("select count(*) from updates")  // returns another streaming DF
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-df.createOrReplaceTempView("updates");
-spark.sql("select count(*) from updates");  // returns another streaming DF
-{% endhighlight %}
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-createOrReplaceTempView(df, "updates")
-sql("select count(*) from updates")
-{% endhighlight %}
-</div>
-
-</div>
-
-Note, you can identify whether a DataFrame/Dataset has streaming data or not by using `df.isStreaming`.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-df.isStreaming()
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-df.isStreaming
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-df.isStreaming()
-{% endhighlight %}
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-isStreaming(df)
-{% endhighlight %}
-</div>
-
-</div>
-
-You may want to check the query plan of the query, as Spark could inject stateful operations during interpret of SQL statement against streaming dataset. Once stateful operations are injected in the query plan, you may need to check your query with considerations in stateful operations. (e.g. output mode, watermark, state store size maintenance, etc.)
-
-### Window Operations on Event Time
-Aggregations over a sliding event-time window are straightforward with Structured Streaming and are very similar to grouped aggregations. In a grouped aggregation, aggregate values (e.g. counts) are maintained for each unique value in the user-specified grouping column. In case of window-based aggregations, aggregate values are maintained for each window the event-time of a row falls into. Let's understand this with an illustration.
-
-Imagine our [quick example](#quick-example) is modified and the stream now contains lines along with the time when the line was generated. Instead of running word counts, we want to count words within 10 minute windows, updating every 5 minutes. That is, word counts in words received between 10 minute windows 12:00 - 12:10, 12:05 - 12:15, 12:10 - 12:20, etc. Note that 12:00 - 12:10 means data that arrived after 12:00 but before 12:10. Now, consider a word that was received at 12:07. This word should increment the counts corresponding to two windows 12:00 - 12:10 and 12:05 - 12:15. So the counts will be indexed by both, the grouping key (i.e. the word) and the window (can be calculated from the event-time).
-
-The result tables would look something like the following.
-
-![Window Operations](img/structured-streaming-window.png)
-
-Since this windowing is similar to grouping, in code, you can use `groupBy()` and `window()` operations to express windowed aggregations. You can see the full code for the below examples in
-[Python]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming/structured_network_wordcount_windowed.py)/[Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredNetworkWordCountWindowed.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredNetworkWordCountWindowed.java).
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-windowedCounts = words.groupBy(
-    window(words.timestamp, "10 minutes", "5 minutes"),
-    words.word
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-val windowedCounts = words.groupBy(
-  window($"timestamp", "10 minutes", "5 minutes"),
-  $"word"
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-Dataset<Row> windowedCounts = words.groupBy(
-  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
-  words.col("word")
-).count();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-windowedCounts <- count(
-                    groupBy(
-                      words,
-                      window(words$timestamp, "10 minutes", "5 minutes"),
-                      words$word))
-{% endhighlight %}
-
-</div>
-
-</div>
-
-
-#### Handling Late Data and Watermarking
-Now consider what happens if one of the events arrives late to the application.
-For example, say, a word generated at 12:04 (i.e. event time) could be received by
-the application at 12:11. The application should use the time 12:04 instead of 12:11
-to update the older counts for the window `12:00 - 12:10`. This occurs
-naturally in our window-based grouping – Structured Streaming can maintain the intermediate state
-for partial aggregates for a long period of time such that late data can update aggregates of
-old windows correctly, as illustrated below.
-
-![Handling Late Data](img/structured-streaming-late-data.png)
-
-However, to run this query for days, it's necessary for the system to bound the amount of
-intermediate in-memory state it accumulates. This means the system needs to know when an old
-aggregate can be dropped from the in-memory state because the application is not going to receive
-late data for that aggregate any more. To enable this, in Spark 2.1, we have introduced
-**watermarking**, which lets the engine automatically track the current event time in the data
-and attempt to clean up old state accordingly. You can define the watermark of a query by
-specifying the event time column and the threshold on how late the data is expected to be in terms of
-event time. For a specific window ending at time `T`, the engine will maintain state and allow late
-data to update the state until `(max event time seen by the engine - late threshold > T)`.
-In other words, late data within the threshold will be aggregated,
-but data later than the threshold will start getting dropped
-(see [later](#semantic-guarantees-of-aggregation-with-watermarking)
-in the section for the exact guarantees). Let's understand this with an example. We can
-easily define watermarking on the previous example using `withWatermark()` as shown below.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-windowedCounts = words \
-    .withWatermark("timestamp", "10 minutes") \
-    .groupBy(
-        window(words.timestamp, "10 minutes", "5 minutes"),
-        words.word) \
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-val windowedCounts = words
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        window($"timestamp", "10 minutes", "5 minutes"),
-        $"word")
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-Dataset<Row> windowedCounts = words
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        window(col("timestamp"), "10 minutes", "5 minutes"),
-        col("word"))
-    .count();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-words <- ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-
-words <- withWatermark(words, "timestamp", "10 minutes")
-windowedCounts <- count(
-                    groupBy(
-                      words,
-                      window(words$timestamp, "10 minutes", "5 minutes"),
-                      words$word))
-{% endhighlight %}
-
-</div>
-
-</div>
-
-In this example, we are defining the watermark of the query on the value of the column "timestamp",
-and also defining "10 minutes" as the threshold of how late is the data allowed to be. If this query
-is run in Update output mode (discussed later in [Output Modes](#output-modes) section),
-the engine will keep updating counts of a window in the Result Table until the window is older
-than the watermark, which lags behind the current event time in column "timestamp" by 10 minutes.
-Here is an illustration.
-
-![Watermarking in Update Mode](img/structured-streaming-watermark-update-mode.png)
-
-As shown in the illustration, the maximum event time tracked by the engine is the
-*blue dashed line*, and the watermark set as `(max event time - '10 mins')`
-at the beginning of every trigger is the red line. For example, when the engine observes the data
-`(12:14, dog)`, it sets the watermark for the next trigger as `12:04`.
-This watermark lets the engine maintain intermediate state for additional 10 minutes to allow late
-data to be counted. For example, the data `(12:09, cat)` is out of order and late, and it falls in
-windows `12:00 - 12:10` and `12:05 - 12:15`. Since, it is still ahead of the watermark `12:04` in
-the trigger, the engine still maintains the intermediate counts as state and correctly updates the
-counts of the related windows. However, when the watermark is updated to `12:11`, the intermediate
-state for window `(12:00 - 12:10)` is cleared, and all subsequent data (e.g. `(12:04, donkey)`)
-is considered "too late" and therefore ignored. Note that after every trigger,
-the updated counts (i.e. purple rows) are written to sink as the trigger output, as dictated by
-the Update mode.
-
-Some sinks (e.g. files) may not supported fine-grained updates that Update Mode requires. To work
-with them, we have also support Append Mode, where only the *final counts* are written to sink.
-This is illustrated below.
-
-Note that using `withWatermark` on a non-streaming Dataset is no-op. As the watermark should not affect
-any batch query in any way, we will ignore it directly.
-
-![Watermarking in Append Mode](img/structured-streaming-watermark-append-mode.png)
-
-Similar to the Update Mode earlier, the engine maintains intermediate counts for each window.
-However, the partial counts are not updated to the Result Table and not written to sink. The engine
-waits for "10 mins" for late date to be counted,
-then drops intermediate state of a window < watermark, and appends the final
-counts to the Result Table/sink. For example, the final counts of window `12:00 - 12:10` is
-appended to the Result Table only after the watermark is updated to `12:11`.
-
-#### Types of time windows
-
-Spark supports three types of time windows: tumbling (fixed), sliding and session.
-
-![The types of time windows](img/structured-streaming-time-window-types.jpg)
-
-Tumbling windows are a series of fixed-sized, non-overlapping and contiguous time intervals. An input
-can only be bound to a single window.
-
-Sliding windows are similar to the tumbling windows from the point of being "fixed-sized", but windows
-can overlap if the duration of slide is smaller than the duration of window, and in this case an input
-can be bound to the multiple windows.
-
-Tumbling and sliding window use `window` function, which has been described on above examples.
-
-Session windows have different characteristic compared to the previous two types. Session window has a dynamic size
-of the window length, depending on the inputs. A session window starts with an input, and expands itself
-if following input has been received within gap duration. For static gap duration, a session window closes when
-there's no input received within gap duration after receiving the latest input.
-
-Session window uses `session_window` function. The usage of the function is similar to the `window` function.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-events = ...  # streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-# Group the data by session window and userId, and compute the count of each group
-sessionizedCounts = events \
-    .withWatermark("timestamp", "10 minutes") \
-    .groupBy(
-        session_window(events.timestamp, "5 minutes"),
-        events.userId) \
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-// Group the data by session window and userId, and compute the count of each group
-val sessionizedCounts = events
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        session_window($"timestamp", "5 minutes"),
-        $"userId")
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-// Group the data by session window and userId, and compute the count of each group
-Dataset<Row> sessionizedCounts = events
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        session_window(col("timestamp"), "5 minutes"),
-        col("userId"))
-    .count();
-{% endhighlight %}
-
-</div>
-
-</div>
-
-Instead of static value, we can also provide an expression to specify gap duration dynamically
-based on the input row. Note that the rows with negative or zero gap duration will be filtered
-out from the aggregation.
-
-With dynamic gap duration, the closing of a session window does not depend on the latest input
-anymore. A session window's range is the union of all events' ranges which are determined by
-event start time and evaluated gap duration during the query execution.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-from pyspark.sql import functions as sf
-
-events = ...  # streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-session_window = session_window(events.timestamp, \
-    sf.when(events.userId == "user1", "5 seconds") \
-    .when(events.userId == "user2", "20 seconds").otherwise("5 minutes"))
-
-# Group the data by session window and userId, and compute the count of each group
-sessionizedCounts = events \
-    .withWatermark("timestamp", "10 minutes") \
-    .groupBy(
-        session_window,
-        events.userId) \
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-val sessionWindow = session_window($"timestamp", when($"userId" === "user1", "5 seconds")
-  .when($"userId" === "user2", "20 seconds")
-  .otherwise("5 minutes"))
-
-// Group the data by session window and userId, and compute the count of each group
-val sessionizedCounts = events
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        Column(sessionWindow),
-        $"userId")
-    .count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> events = ... // streaming DataFrame of schema { timestamp: Timestamp, userId: String }
-
-SessionWindow sessionWindow = session_window(col("timestamp"), when(col("userId").equalTo("user1"), "5 seconds")
-  .when(col("userId").equalTo("user2"), "20 seconds")
-  .otherwise("5 minutes"))
-
-// Group the data by session window and userId, and compute the count of each group
-Dataset<Row> sessionizedCounts = events
-    .withWatermark("timestamp", "10 minutes")
-    .groupBy(
-        new Column(sessionWindow),
-        col("userId"))
-    .count();
-{% endhighlight %}
-
-</div>
-
-</div>
-
-Note that there are some restrictions when you use session window in streaming query, like below:
-
-- "Update mode" as output mode is not supported.
-- There should be at least one column in addition to `session_window` in grouping key.
-
-For batch query, global window (only having `session_window` in grouping key) is supported.
-
-By default, Spark does not perform partial aggregation for session window aggregation, since it requires additional
-sort in local partitions before grouping. It works better for the case there are only few number of input rows in
-same group key for each local partition, but for the case there are numerous input rows having same group key in
-local partition, doing partial aggregation can still increase the performance significantly despite additional sort.
-
-You can enable `spark.sql.streaming.sessionWindow.merge.sessions.in.local.partition` to indicate Spark to perform partial aggregation.
-
-#### Representation of the time for time window
-
-In some use cases, it is necessary to extract the representation of the time for time window, to apply operations requiring timestamp to the time windowed data.
-One example is chained time window aggregations, where users want to define another time window against the time window. Say, someone wants to aggregate 5 minutes time windows as 1 hour tumble time window.
-
-There are two ways to achieve this, like below:
-
-1. Use `window_time` SQL function with time window column as parameter
-2. Use `window` SQL function with time window column as parameter
-
-`window_time` function will produce a timestamp which represents the time for time window.
-User can pass the result to the parameter of `window` function (or anywhere requiring timestamp) to perform operation(s) with time window which requires timestamp.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-windowedCounts = words.groupBy(
-    window(words.timestamp, "10 minutes", "5 minutes"),
-    words.word
-).count()
-
-# Group the windowed data by another window and word and compute the count of each group
-anotherWindowedCounts = windowedCounts.groupBy(
-    window(window_time(windowedCounts.window), "1 hour"),
-    windowedCounts.word
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-val windowedCounts = words.groupBy(
-  window($"timestamp", "10 minutes", "5 minutes"),
-  $"word"
-).count()
-
-// Group the windowed data by another window and word and compute the count of each group
-val anotherWindowedCounts = windowedCounts.groupBy(
-  window(window_time($"window"), "1 hour"),
-  $"word"
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-Dataset<Row> windowedCounts = words.groupBy(
-  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
-  words.col("word")
-).count();
-
-// Group the windowed data by another window and word and compute the count of each group
-Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
-  functions.window(functions.window_time("window"), "1 hour"),
-  windowedCounts.col("word")
-).count();
-{% endhighlight %}
-
-</div>
-
-</div>
-
-`window` function does not only take timestamp column, but also take the time window column. This is specifically useful for cases where users want to apply chained time window aggregations.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-words = ...  # streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-# Group the data by window and word and compute the count of each group
-windowedCounts = words.groupBy(
-    window(words.timestamp, "10 minutes", "5 minutes"),
-    words.word
-).count()
-
-# Group the windowed data by another window and word and compute the count of each group
-anotherWindowedCounts = windowedCounts.groupBy(
-    window(windowedCounts.window, "1 hour"),
-    windowedCounts.word
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import spark.implicits._
-
-val words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-val windowedCounts = words.groupBy(
-  window($"timestamp", "10 minutes", "5 minutes"),
-  $"word"
-).count()
-
-// Group the windowed data by another window and word and compute the count of each group
-val anotherWindowedCounts = windowedCounts.groupBy(
-  window($"window", "1 hour"),
-  $"word"
-).count()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> words = ... // streaming DataFrame of schema { timestamp: Timestamp, word: String }
-
-// Group the data by window and word and compute the count of each group
-Dataset<Row> windowedCounts = words.groupBy(
-  functions.window(words.col("timestamp"), "10 minutes", "5 minutes"),
-  words.col("word")
-).count();
-
-// Group the windowed data by another window and word and compute the count of each group
-Dataset<Row> anotherWindowedCounts = windowedCounts.groupBy(
-  functions.window("window", "1 hour"),
-  windowedCounts.col("word")
-).count();
-{% endhighlight %}
-
-</div>
-
-</div>
-
-##### Conditions for watermarking to clean aggregation state
-{:.no_toc}
-
-It is important to note that the following conditions must be satisfied for the watermarking to
-clean the state in aggregation queries *(as of Spark 2.1.1, subject to change in the future)*.
-
-- **Output mode must be Append or Update.** Complete mode requires all aggregate data to be preserved,
-and hence cannot use watermarking to drop intermediate state. See the [Output Modes](#output-modes)
-section for detailed explanation of the semantics of each output mode.
-
-- The aggregation must have either the event-time column, or a `window` on the event-time column.
-
-- `withWatermark` must be called on the
-same column as the timestamp column used in the aggregate. For example,
-`df.withWatermark("time", "1 min").groupBy("time2").count()` is invalid
-in Append output mode, as watermark is defined on a different column
-from the aggregation column.
-
-- `withWatermark` must be called before the aggregation for the watermark details to be used.
-For example, `df.groupBy("time").count().withWatermark("time", "1 min")` is invalid in Append
-output mode.
-
-##### Semantic Guarantees of Aggregation with Watermarking
-{:.no_toc}
-
-- A watermark delay (set with `withWatermark`) of "2 hours" guarantees that the engine will never
-drop any data that is less than 2 hours delayed. In other words, any data less than 2 hours behind
-(in terms of event-time) the latest data processed till then is guaranteed to be aggregated.
-
-- However, the guarantee is strict only in one direction. Data delayed by more than 2 hours is
-not guaranteed to be dropped; it may or may not get aggregated. More delayed is the data, less
-likely is the engine going to process it.
-
-### Join Operations
-Structured Streaming supports joining a streaming Dataset/DataFrame with a static Dataset/DataFrame
-as well as another streaming Dataset/DataFrame. The result of the streaming join is generated
-incrementally, similar to the results of streaming aggregations in the previous section. In this
-section we will explore what type of joins (i.e. inner, outer, semi, etc.) are supported in the above
-cases. Note that in all the supported join types, the result of the join with a streaming
-Dataset/DataFrame will be the exactly the same as if it was with a static Dataset/DataFrame
-containing the same data in the stream.
-
-
-#### Stream-static Joins
-
-Since the introduction in Spark 2.0, Structured Streaming has supported joins (inner join and some
-type of outer joins) between a streaming and a static DataFrame/Dataset. Here is a simple example.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-staticDf = spark.read. ...
-streamingDf = spark.readStream. ...
-streamingDf.join(staticDf, "type")  # inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "left_outer")  # left outer join with a static DF
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val staticDf = spark.read. ...
-val streamingDf = spark.readStream. ...
-
-streamingDf.join(staticDf, "type")          // inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "left_outer")  // left outer join with a static DF
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> staticDf = spark.read(). ...;
-Dataset<Row> streamingDf = spark.readStream(). ...;
-streamingDf.join(staticDf, "type");         // inner equi-join with a static DF
-streamingDf.join(staticDf, "type", "left_outer");  // left outer join with a static DF
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-staticDf <- read.df(...)
-streamingDf <- read.stream(...)
-joined <- merge(streamingDf, staticDf, sort = FALSE)  # inner equi-join with a static DF
-joined <- join(
-            streamingDf,
-            staticDf,
-            streamingDf$value == staticDf$value,
-            "left_outer")  # left outer join with a static DF
-{% endhighlight %}
-
-</div>
-
-</div>
-
-Note that stream-static joins are not stateful, so no state management is necessary.
-However, a few types of stream-static outer joins are not yet supported.
-These are listed at the [end of this Join section](#support-matrix-for-joins-in-streaming-queries).
-
-#### Stream-stream Joins
-In Spark 2.3, we have added support for stream-stream joins, that is, you can join two streaming
-Datasets/DataFrames. The challenge of generating join results between two data streams is that,
-at any point of time, the view of the dataset is incomplete for both sides of the join making
-it much harder to find matches between inputs. Any row received from one input stream can match
-with any future, yet-to-be-received row from the other input stream. Hence, for both the input
-streams, we buffer past input as streaming state, so that we can match every future input with
-past input and accordingly generate joined results. Furthermore, similar to streaming aggregations,
-we automatically handle late, out-of-order data and can limit the state using watermarks.
-Let’s discuss the different types of supported stream-stream joins and how to use them.
-
-##### Inner Joins with optional Watermarking
-Inner joins on any kind of columns along with any kind of join conditions are supported.
-However, as the stream runs, the size of streaming state will keep growing indefinitely as
-*all* past input must be saved as any new input can match with any input from the past.
-To avoid unbounded state, you have to define additional join conditions such that indefinitely
-old inputs cannot match with future inputs and therefore can be cleared from the state.
-In other words, you will have to do the following additional steps in the join.
-
-1. Define watermark delays on both inputs such that the engine knows how delayed the input can be
-(similar to streaming aggregations)
-
-1. Define a constraint on event-time across the two inputs such that the engine can figure out when
-old rows of one input is not going to be required (i.e. will not satisfy the time constraint) for
-matches with the other input. This constraint can be defined in one of the two ways.
-
-    1. Time range join conditions (e.g. `...JOIN ON leftTime BETWEEN rightTime AND rightTime + INTERVAL 1 HOUR`),
-
-    1. Join on event-time windows (e.g. `...JOIN ON leftTimeWindow = rightTimeWindow`).
-
-Let’s understand this with an example.
-
-Let’s say we want to join a stream of advertisement impressions (when an ad was shown) with
-another stream of user clicks on advertisements to correlate when impressions led to
-monetizable clicks. To allow the state cleanup in this stream-stream join, you will have to
-specify the watermarking delays and the time constraints as follows.
-
-1. Watermark delays: Say, the impressions and the corresponding clicks can be late/out-of-order
-in event-time by at most 2 and 3 hours, respectively.
-
-1. Event-time range condition: Say, a click can occur within a time range of 0 seconds to 1 hour
-after the corresponding impression.
-
-The code would look like this.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-from pyspark.sql.functions import expr
-
-impressions = spark.readStream. ...
-clicks = spark.readStream. ...
-
-# Apply watermarks on event-time columns
-impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours")
-clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours")
-
-# Join with event-time constraints
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-    """)
-)
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import org.apache.spark.sql.functions.expr
-
-val impressions = spark.readStream. ...
-val clicks = spark.readStream. ...
-
-// Apply watermarks on event-time columns
-val impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours")
-val clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours")
-
-// Join with event-time constraints
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-    """)
-)
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-import static org.apache.spark.sql.functions.expr
-
-Dataset<Row> impressions = spark.readStream(). ...
-Dataset<Row> clicks = spark.readStream(). ...
-
-// Apply watermarks on event-time columns
-Dataset<Row> impressionsWithWatermark = impressions.withWatermark("impressionTime", "2 hours");
-Dataset<Row> clicksWithWatermark = clicks.withWatermark("clickTime", "3 hours");
-
-// Join with event-time constraints
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr(
-    "clickAdId = impressionAdId AND " +
-    "clickTime >= impressionTime AND " +
-    "clickTime <= impressionTime + interval 1 hour ")
-);
-
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-impressions <- read.stream(...)
-clicks <- read.stream(...)
-
-# Apply watermarks on event-time columns
-impressionsWithWatermark <- withWatermark(impressions, "impressionTime", "2 hours")
-clicksWithWatermark <- withWatermark(clicks, "clickTime", "3 hours")
-
-# Join with event-time constraints
-joined <- join(
-  impressionsWithWatermark,
-  clicksWithWatermark,
-  expr(
-    paste(
-      "clickAdId = impressionAdId AND",
-      "clickTime >= impressionTime AND",
-      "clickTime <= impressionTime + interval 1 hour"
-)))
-
-{% endhighlight %}
-
-</div>
-
-</div>
-
-###### Semantic Guarantees of Stream-stream Inner Joins with Watermarking
-{:.no_toc}
-This is similar to the [guarantees provided by watermarking on aggregations](#semantic-guarantees-of-aggregation-with-watermarking).
-A watermark delay of "2 hours" guarantees that the engine will never drop any data that is less than
- 2 hours delayed. But data delayed by more than 2 hours may or may not get processed.
-
-##### Outer Joins with Watermarking
-While the watermark + event-time constraints is optional for inner joins, for outer joins
-they must be specified. This is because for generating the NULL results in outer join, the
-engine must know when an input row is not going to match with anything in future. Hence, the
-watermark + event-time constraints must be specified for generating correct results. Therefore,
-a query with outer-join will look quite like the ad-monetization example earlier, except that
-there will be an additional parameter specifying it to be an outer-join.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-    """),
-  "leftOuter"                 # can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
-)
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-    """),
-  joinType = "leftOuter"      // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
- )
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr(
-    "clickAdId = impressionAdId AND " +
-    "clickTime >= impressionTime AND " +
-    "clickTime <= impressionTime + interval 1 hour "),
-  "leftOuter"                 // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
-);
-
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-joined <- join(
-  impressionsWithWatermark,
-  clicksWithWatermark,
-  expr(
-    paste(
-      "clickAdId = impressionAdId AND",
-      "clickTime >= impressionTime AND",
-      "clickTime <= impressionTime + interval 1 hour"),
-  "left_outer"                 # can be "inner", "left_outer", "right_outer", "full_outer", "left_semi"
-))
-
-{% endhighlight %}
-
-</div>
-
-</div>
-
-
-###### Semantic Guarantees of Stream-stream Outer Joins with Watermarking
-{:.no_toc}
-Outer joins have the same guarantees as [inner joins](#semantic-guarantees-of-stream-stream-inner-joins-with-watermarking)
-regarding watermark delays and whether data will be dropped or not.
-
-###### Caveats
-{:.no_toc}
-There are a few important characteristics to note regarding how the outer results are generated.
-
-- *The outer NULL results will be generated with a delay that depends on the specified watermark
-delay and the time range condition.* This is because the engine has to wait for that long to ensure
-there were no matches and there will be no more matches in future.
-
-- In the current implementation in the micro-batch engine, watermarks are advanced at the end of a
-micro-batch, and the next micro-batch uses the updated watermark to clean up state and output
-outer results. Since we trigger a micro-batch only when there is new data to be processed, the
-generation of the outer result may get delayed if there no new data being received in the stream.
-*In short, if any of the two input streams being joined does not receive data for a while, the
-outer (both cases, left or right) output may get delayed.*
-
-##### Semi Joins with Watermarking
-A semi join returns values from the left side of the relation that has a match with the right.
-It is also referred to as a left semi join. Similar to outer joins, watermark + event-time
-constraints must be specified for semi join. This is to evict unmatched input rows on left side,
-the engine must know when an input row on left side is not going to match with anything on right
-side in future.
-
-###### Semantic Guarantees of Stream-stream Semi Joins with Watermarking
-{:.no_toc}
-Semi joins have the same guarantees as [inner joins](#semantic-guarantees-of-stream-stream-inner-joins-with-watermarking)
-regarding watermark delays and whether data will be dropped or not.
-
-##### Support matrix for joins in streaming queries
-
-<table>
-<thead>
-  <tr>
-    <th>Left Input</th>
-    <th>Right Input</th>
-    <th>Join Type</th>
-    <th></th>
-  </tr>
-</thead>
-  <tr>
-      <td style="vertical-align: middle;">Static</td>
-      <td style="vertical-align: middle;">Static</td>
-      <td style="vertical-align: middle;">All types</td>
-      <td style="vertical-align: middle;">
-        Supported, since its not on streaming data even though it
-        can be present in a streaming query
-      </td>
-  </tr>
-  <tr>
-    <td rowspan="5" style="vertical-align: middle;">Stream</td>
-    <td rowspan="5" style="vertical-align: middle;">Static</td>
-    <td style="vertical-align: middle;">Inner</td>
-    <td style="vertical-align: middle;">Supported, not stateful</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Outer</td>
-    <td style="vertical-align: middle;">Supported, not stateful</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Right Outer</td>
-    <td style="vertical-align: middle;">Not supported</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Full Outer</td>
-    <td style="vertical-align: middle;">Not supported</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Semi</td>
-    <td style="vertical-align: middle;">Supported, not stateful</td>
-  </tr>
-  <tr>
-    <td rowspan="5" style="vertical-align: middle;">Static</td>
-    <td rowspan="5" style="vertical-align: middle;">Stream</td>
-    <td style="vertical-align: middle;">Inner</td>
-    <td style="vertical-align: middle;">Supported, not stateful</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Outer</td>
-    <td style="vertical-align: middle;">Not supported</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Right Outer</td>
-    <td style="vertical-align: middle;">Supported, not stateful</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Full Outer</td>
-    <td style="vertical-align: middle;">Not supported</td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Semi</td>
-    <td style="vertical-align: middle;">Not supported</td>
-  </tr>
-  <tr>
-    <td rowspan="5" style="vertical-align: middle;">Stream</td>
-    <td rowspan="5" style="vertical-align: middle;">Stream</td>
-    <td style="vertical-align: middle;">Inner</td>
-    <td style="vertical-align: middle;">
-      Supported, optionally specify watermark on both sides +
-      time constraints for state cleanup
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Outer</td>
-    <td style="vertical-align: middle;">
-      Conditionally supported, must specify watermark on right + time constraints for correct
-      results, optionally specify watermark on left for all state cleanup
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Right Outer</td>
-    <td style="vertical-align: middle;">
-      Conditionally supported, must specify watermark on left + time constraints for correct
-      results, optionally specify watermark on right for all state cleanup
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Full Outer</td>
-    <td style="vertical-align: middle;">
-      Conditionally supported, must specify watermark on one side + time constraints for correct
-      results, optionally specify watermark on the other side for all state cleanup
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Left Semi</td>
-    <td style="vertical-align: middle;">
-      Conditionally supported, must specify watermark on right + time constraints for correct
-      results, optionally specify watermark on left for all state cleanup
-    </td>
-  </tr>
-  <tr>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-</table>
-
-Additional details on supported joins:
-
-- Joins can be cascaded, that is, you can do `df1.join(df2, ...).join(df3, ...).join(df4, ....)`.
-
-- As of Spark 2.4, you can use joins only when the query is in Append output mode. Other output modes are not yet supported.
-
-- You cannot use mapGroupsWithState and flatMapGroupsWithState before and after joins.
-
-In append output mode, you can construct a query having non-map-like operations e.g. aggregation, deduplication, stream-stream join before/after join.
-
-For example, here's an example of time window aggregation in both streams followed by stream-stream join with event time window:
-
-<div class="codetabs">
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-clicksWindow = clicksWithWatermark.groupBy(
-  clicksWithWatermark.clickAdId,
-  window(clicksWithWatermark.clickTime, "1 hour")
-).count()
-
-impressionsWindow = impressionsWithWatermark.groupBy(
-  impressionsWithWatermark.impressionAdId,
-  window(impressionsWithWatermark.impressionTime, "1 hour")
-).count()
-
-clicksWindow.join(impressionsWindow, "window", "inner")
-
-{% endhighlight %}
-
-</div>
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-
-val clicksWindow = clicksWithWatermark
-  .groupBy(window("clickTime", "1 hour"))
-  .count()
-
-val impressionsWindow = impressionsWithWatermark
-  .groupBy(window("impressionTime", "1 hour"))
-  .count()
-
-clicksWindow.join(impressionsWindow, "window", "inner")
-
-{% endhighlight %}
-
-</div>
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-
-Dataset<Row> clicksWindow = clicksWithWatermark
-  .groupBy(functions.window(clicksWithWatermark.col("clickTime"), "1 hour"))
-  .count();
-
-Dataset<Row> impressionsWindow = impressionsWithWatermark
-  .groupBy(functions.window(impressionsWithWatermark.col("impressionTime"), "1 hour"))
-  .count();
-
-clicksWindow.join(impressionsWindow, "window", "inner");
-
-{% endhighlight %}
-
-
-</div>
-</div>
-
-Here's another example of stream-stream join with time range join condition followed by time window aggregation:
-
-<div class="codetabs">
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-joined = impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-    """),
-  "leftOuter"                 # can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
-)
-
-joined.groupBy(
-  joined.clickAdId,
-  window(joined.clickTime, "1 hour")
-).count()
-
-{% endhighlight %}
-
-</div>
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-
-val joined = impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr("""
-    clickAdId = impressionAdId AND
-    clickTime >= impressionTime AND
-    clickTime <= impressionTime + interval 1 hour
-  """),
-  joinType = "leftOuter"      // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
-)
-
-joined
-  .groupBy($"clickAdId", window($"clickTime", "1 hour"))
-  .count()
-
-{% endhighlight %}
-
-</div>
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> joined = impressionsWithWatermark.join(
-  clicksWithWatermark,
-  expr(
-    "clickAdId = impressionAdId AND " +
-    "clickTime >= impressionTime AND " +
-    "clickTime <= impressionTime + interval 1 hour "),
-  "leftOuter"                 // can be "inner", "leftOuter", "rightOuter", "fullOuter", "leftSemi"
-);
-
-joined
-  .groupBy(joined.col("clickAdId"), functions.window(joined.col("clickTime"), "1 hour"))
-  .count();
-
-{% endhighlight %}
-
-
-</div>
-</div>
-
-### Streaming Deduplication
-You can deduplicate records in data streams using a unique identifier in the events. This is exactly same as deduplication on static using a unique identifier column. The query will store the necessary amount of data from previous records such that it can filter duplicate records. Similar to aggregations, you can use deduplication with or without watermarking.
-
-- *With watermark* - If there is an upper bound on how late a duplicate record may arrive, then you can define a watermark on an event time column and deduplicate using both the guid and the event time columns. The query will use the watermark to remove old state data from past records that are not expected to get any duplicates any more. This bounds the amount of the state the query has to maintain.
-
-- *Without watermark* - Since there are no bounds on when a duplicate record may arrive, the query stores the data from all the past records as state.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-streamingDf = spark.readStream. ...
-
-# Without watermark using guid column
-streamingDf.dropDuplicates("guid")
-
-# With watermark using guid and eventTime columns
-streamingDf \
-  .withWatermark("eventTime", "10 seconds") \
-  .dropDuplicates("guid", "eventTime")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val streamingDf = spark.readStream. ...  // columns: guid, eventTime, ...
-
-// Without watermark using guid column
-streamingDf.dropDuplicates("guid")
-
-// With watermark using guid and eventTime columns
-streamingDf
-  .withWatermark("eventTime", "10 seconds")
-  .dropDuplicates("guid", "eventTime")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> streamingDf = spark.readStream(). ...;  // columns: guid, eventTime, ...
-
-// Without watermark using guid column
-streamingDf.dropDuplicates("guid");
-
-// With watermark using guid and eventTime columns
-streamingDf
-  .withWatermark("eventTime", "10 seconds")
-  .dropDuplicates("guid", "eventTime");
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-streamingDf <- read.stream(...)
-
-# Without watermark using guid column
-streamingDf <- dropDuplicates(streamingDf, "guid")
-
-# With watermark using guid and eventTime columns
-streamingDf <- withWatermark(streamingDf, "eventTime", "10 seconds")
-streamingDf <- dropDuplicates(streamingDf, "guid", "eventTime")
-{% endhighlight %}
-
-</div>
-
-</div>
-
-Specifically for streaming, you can deduplicate records in data streams using a unique identifier in the events, within the time range of watermark.
-For example, if you set the delay threshold of watermark as "1 hour", duplicated events which occurred within 1 hour can be correctly deduplicated.
-(For more details, please refer to the API doc of [dropDuplicatesWithinWatermark](/api/scala/org/apache/spark/sql/Dataset.html#dropDuplicatesWithinWatermark():org.apache.spark.sql.Dataset[T]).)
-
-This can be used to deal with use case where event time column cannot be a part of unique identifier, mostly due to the case
-where event times are somehow different for the same records. (E.g. non-idempotent writer where issuing event time happens at write)
-
-Users are encouraged to set the delay threshold of watermark longer than max timestamp differences among duplicated events.
-
-This feature requires watermark with delay threshold to be set in streaming DataFrame/Dataset.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-streamingDf = spark.readStream. ...
-
-# deduplicate using guid column with watermark based on eventTime column
-streamingDf \
-  .withWatermark("eventTime", "10 hours") \
-  .dropDuplicatesWithinWatermark("guid")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val streamingDf = spark.readStream. ...  // columns: guid, eventTime, ...
-
-// deduplicate using guid column with watermark based on eventTime column
-streamingDf
-  .withWatermark("eventTime", "10 hours")
-  .dropDuplicatesWithinWatermark("guid")
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-Dataset<Row> streamingDf = spark.readStream(). ...;  // columns: guid, eventTime, ...
-
-// deduplicate using guid column with watermark based on eventTime column
-streamingDf
-  .withWatermark("eventTime", "10 hours")
-  .dropDuplicatesWithinWatermark("guid");
-{% endhighlight %}
-
-
-</div>
-
-</div>
-
-### Policy for handling multiple watermarks
-A streaming query can have multiple input streams that are unioned or joined together.
-Each of the input streams can have a different threshold of late data that needs to
-be tolerated for stateful operations. You specify these thresholds using
-``withWatermarks("eventTime", delay)`` on each of the input streams. For example, consider
-a query with stream-stream joins between `inputStream1` and `inputStream2`.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-inputStream1.withWatermark("eventTime1", "1 hour")
-  .join(
-    inputStream2.withWatermark("eventTime2", "2 hours"),
-    joinCondition)
-{% endhighlight %}
-
-</div>
-</div>
-
-While executing the query, Structured Streaming individually tracks the maximum
-event time seen in each input stream, calculates watermarks based on the corresponding delay,
-and chooses a single global watermark with them to be used for stateful operations. By default,
-the minimum is chosen as the global watermark because it ensures that no data is
-accidentally dropped as too late if one of the streams falls behind the others
-(for example, one of the streams stops receiving data due to upstream failures). In other words,
-the global watermark will safely move at the pace of the slowest stream and the query output will
-be delayed accordingly.
-
-However, in some cases, you may want to get faster results even if it means dropping data from the
-slowest stream. Since Spark 2.4, you can set the multiple watermark policy to choose
-the maximum value as the global watermark by setting the SQL configuration
-``spark.sql.streaming.multipleWatermarkPolicy`` to ``max`` (default is ``min``).
-This lets the global watermark move at the pace of the fastest stream.
-However, as a side effect, data from the slower streams will be aggressively dropped. Hence, use
-this configuration judiciously.
-
-### Arbitrary Stateful Operations
-Many usecases require more advanced stateful operations than aggregations. For example, in many usecases, you have to track sessions from data streams of events. For doing such sessionization, you will have to save arbitrary types of data as state, and perform arbitrary operations on the state using the data stream events in every trigger. Since Spark 2.2, this can be done using the operation `mapGroupsWithState` and the more powerful operation `flatMapGroupsWithState`. Both operations allow you to apply user-defined code on grouped Datasets to update user-defined state. For more concrete details, take a look at the API documentation ([Scala](api/scala/org/apache/spark/sql/streaming/GroupState.html)/[Java](api/java/org/apache/spark/sql/streaming/GroupState.html)) and the examples ([Scala]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming/StructuredComplexSessionization.scala)/[Java]({{site.SPARK_GITHUB_URL}}/blob/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming/JavaStructuredComplexSessionization.java)).
-
-Though Spark cannot check and force it, the state function should be implemented with respect to the semantics of the output mode. For example, in Update mode Spark doesn't expect that the state function will emit rows which are older than current watermark plus allowed late record delay, whereas in Append mode the state function can emit these rows.
-
-### Unsupported Operations
-There are a few DataFrame/Dataset operations that are not supported with streaming DataFrames/Datasets.
-Some of them are as follows.
-
-- Limit and take the first N rows are not supported on streaming Datasets.
-
-- Distinct operations on streaming Datasets are not supported.
-
-- Sorting operations are supported on streaming Datasets only after an aggregation and in Complete Output Mode.
-
-- Few types of outer joins on streaming Datasets are not supported. See the
-  <a href="#support-matrix-for-joins-in-streaming-queries">support matrix in the Join Operations section</a>
-  for more details.
-
-- Chaining multiple stateful operations on streaming Datasets is not supported with Update and Complete mode.
-  - In addition, mapGroupsWithState/flatMapGroupsWithState operation followed by other stateful operation is not supported in Append mode.
-  - A known workaround is to split your streaming query into multiple queries having a single stateful operation per each query,
-    and ensure end-to-end exactly once per query. Ensuring end-to-end exactly once for the last query is optional.
-
-In addition, there are some Dataset methods that will not work on streaming Datasets. They are actions that will immediately run queries and return results, which does not make sense on a streaming Dataset. Rather, those functionalities can be done by explicitly starting a streaming query (see the next section regarding that).
-
-- `count()` - Cannot return a single count from a streaming Dataset. Instead, use `ds.groupBy().count()` which returns a streaming Dataset containing a running count.
-
-- `foreach()` - Instead use `ds.writeStream.foreach(...)` (see next section).
-
-- `show()` - Instead use the console sink (see next section).
-
-If you try any of these operations, you will see an `AnalysisException` like "operation XYZ is not supported with streaming DataFrames/Datasets".
-While some of them may be supported in future releases of Spark,
-there are others which are fundamentally hard to implement on streaming data efficiently.
-For example, sorting on the input stream is not supported, as it requires keeping
-track of all the data received in the stream. This is therefore fundamentally hard to execute
-efficiently.
-
-### State Store
-
-State store is a versioned key-value store which provides both read and write operations. In
-Structured Streaming, we use the state store provider to handle the stateful operations across
-batches. There are two built-in state store provider implementations. End users can also implement
-their own state store provider by extending StateStoreProvider interface.
-
-#### HDFS state store provider
-
-The HDFS backend state store provider is the default implementation of [[StateStoreProvider]] and
-[[StateStore]] in which all the data is stored in memory map in the first stage, and then backed
-by files in an HDFS-compatible file system. All updates to the store have to be done in sets
-transactionally, and each set of updates increments the store's version. These versions can be
-used to re-execute the updates (by retries in RDD operations) on the correct version of the store,
-and regenerate the store version.
-
-#### RocksDB state store implementation
-
-As of Spark 3.2, we add a new built-in state store implementation, RocksDB state store provider.
-
-If you have stateful operations in your streaming query (for example, streaming aggregation,
-streaming dropDuplicates, stream-stream joins, mapGroupsWithState, or flatMapGroupsWithState)
-and you want to maintain millions of keys in the state, then you may face issues related to large
-JVM garbage collection (GC) pauses causing high variations in the micro-batch processing times.
-This occurs because, by the implementation of HDFSBackedStateStore, the state data is maintained
-in the JVM memory of the executors and large number of state objects puts memory pressure on the
-JVM causing high GC pauses.
-
-In such cases, you can choose to use a more optimized state management solution based on
-[RocksDB](https://rocksdb.org/). Rather than keeping the state in the JVM memory, this solution
-uses RocksDB to efficiently manage the state in the native memory and the local disk. Furthermore,
-any changes to this state are automatically saved by Structured Streaming to the checkpoint
-location you have provided, thus providing full fault-tolerance guarantees (the same as default
-state management).
-
-To enable the new build-in state store implementation, set `spark.sql.streaming.stateStore.providerClass`
-to `org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider`.
-
-Here are the configs regarding to RocksDB instance of the state store provider:
-
-<table>
-  <thead>
-  <tr>
-    <th>Config Name</th>
-    <th>Description</th>
-    <th>Default Value</th>
-  </tr>
-  </thead>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.compactOnCommit</td>
-    <td>Whether we perform a range compaction of RocksDB instance for commit operation</td>
-    <td>False</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled</td>
-    <td>Whether to upload changelog instead of snapshot during RocksDB StateStore commit</td>
-    <td>False</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.blockSizeKB</td>
-    <td>Approximate size in KB of user data packed per block for a RocksDB BlockBasedTable, which is a RocksDB's default SST file format.</td>
-    <td>4</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.blockCacheSizeMB</td>
-    <td>The size capacity in MB for a cache of blocks.</td>
-    <td>8</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.lockAcquireTimeoutMs</td>
-    <td>The waiting time in millisecond for acquiring lock in the load operation for RocksDB instance.</td>
-    <td>60000</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.maxOpenFiles</td>
-    <td>The number of open files that can be used by the RocksDB instance. Value of -1 means that files opened are always kept open. If the open file limit is reached, RocksDB will evict entries from the open file cache and close those file descriptors and remove the entries from the cache.</td>
-    <td>-1</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.resetStatsOnLoad</td>
-    <td>Whether we resets all ticker and histogram stats for RocksDB on load.</td>
-    <td>True</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.trackTotalNumberOfRows</td>
-    <td>Whether we track the total number of rows in state store. Please refer the details in <a href="#performance-aspect-considerations">Performance-aspect considerations</a>.</td>
-    <td>True</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB</td>
-    <td>The maximum size of MemTable in RocksDB. Value of -1 means that RocksDB internal default values will be used</td>
-    <td>-1</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber</td>
-    <td>The maximum number of MemTables in RocksDB, both active and immutable. Value of -1 means that RocksDB internal default values will be used</td>
-    <td>-1</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.boundedMemoryUsage</td>
-    <td>Whether total memory usage for RocksDB state store instances on a single node is bounded.</td>
-    <td>false</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.maxMemoryUsageMB</td>
-    <td>Total memory limit in MB for RocksDB state store instances on a single node.</td>
-    <td>500</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.writeBufferCacheRatio</td>
-    <td>Total memory to be occupied by write buffers as a fraction of memory allocated across all RocksDB instances on a single node using maxMemoryUsageMB.</td>
-    <td>0.5</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.highPriorityPoolRatio</td>
-    <td>Total memory to be occupied by blocks in high priority pool as a fraction of memory allocated across all RocksDB instances on a single node using maxMemoryUsageMB.</td>
-    <td>0.1</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.allowFAllocate</td>
-    <td>Allow the rocksdb runtime to use fallocate to pre-allocate disk space for logs, etc...  Disable for apps that have many smaller state stores to trade off disk space for write performance.</td>
-    <td>true</td>
-  </tr>
-  <tr>
-    <td>spark.sql.streaming.stateStore.rocksdb.compression</td>
-    <td>Compression type used in RocksDB. The string is converted RocksDB compression type through RocksDB Java API getCompressionType(). </td>
-    <td>lz4</td>
-  </tr>
-</table>
-
-##### RocksDB State Store Memory Management
-RocksDB allocates memory for different objects such as memtables, block cache and filter/index blocks. If left unbounded, RocksDB memory usage across multiple instances could grow indefinitely and potentially cause OOM (out-of-memory) issues.
-RocksDB provides a way to limit the memory usage for all DB instances running on a single node by using the write buffer manager functionality.
-If you want to cap RocksDB memory usage in your Spark Structured Streaming deployment, this feature can be enabled by setting the `spark.sql.streaming.stateStore.rocksdb.boundedMemoryUsage` config to `true`.
-You can also determine the max allowed memory for RocksDB instances by setting the `spark.sql.streaming.stateStore.rocksdb.maxMemoryUsageMB` value to a static number or as a fraction of the physical memory available on the node.
-Limits for individual RocksDB instances can also be configured by setting `spark.sql.streaming.stateStore.rocksdb.writeBufferSizeMB` and `spark.sql.streaming.stateStore.rocksdb.maxWriteBufferNumber` to the required values. By default, RocksDB internal defaults are used for these settings.
-
-Note that the `boundedMemoryUsage` config will enable a soft limit on the total memory usage for RocksDB.
-So the total memory used by RocksDB can temporarily exceed this value if all blocks allocated to higher level readers are in use.
-Enabling a strict limit is not possible at this time since it will cause query failures and we do not support re-balancing of the state across additional nodes.
-
-##### RocksDB State Store Changelog Checkpointing
-In newer version of Spark, changelog checkpointing is introduced for RocksDB state store. The traditional checkpointing mechanism for RocksDB State Store is incremental snapshot checkpointing, where the manifest files and newly generated RocksDB SST files of RocksDB instances are uploaded to a durable storage.
-Instead of uploading data files of RocksDB instances, changelog checkpointing uploads changes made to the state since the last checkpoint for durability.
-Snapshots are persisted periodically in the background for predictable failure recovery and changelog trimming.
-Changelog checkpointing avoids cost of capturing and uploading snapshots of RocksDB instances and significantly reduce streaming query latency.
-
-Changelog checkpointing is disabled by default. You can enable RocksDB State Store changelog checkpointing by setting `spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled` config to `true`.
-Changelog checkpointing is designed to be backward compatible with traditional checkpointing mechanism.
-RocksDB state store provider offers seamless support for transitioning between two checkpointing mechanisms in both directions. This allows you to leverage the performance benefits of changelog checkpointing without discarding the old state checkpoint.
-In a version of spark that supports changelog checkpointing, you can migrate streaming queries from older versions of Spark to changelog checkpointing by enabling changelog checkpointing in the spark session.
-Vice versa, you can disable changelog checkpointing safely in newer version of Spark, then any query that already run with changelog checkpointing will switch back to traditional checkpointing.
-You would need to restart you streaming queries for change in checkpointing mechanism to be applied, but you won't observe any performance degrade in the process.
-
-##### Performance-aspect considerations
-
-1. You may want to disable the track of total number of rows to aim the better performance on RocksDB state store.
-
-Tracking the number of rows brings additional lookup on write operations - you're encouraged to try turning off the config on tuning RocksDB state store, especially the values of metrics for state operator are big - `numRowsUpdated`, `numRowsRemoved`.
-
-You can change the config during restarting the query, which enables you to change the trade-off decision on "observability vs performance".
-If the config is disabled, the number of rows in state (`numTotalStateRows`) will be reported as 0.
-
-#### State Store and task locality
-
-The stateful operations store states for events in state stores of executors. State stores occupy resources such as memory and disk space to store the states.
-So it is more efficient to keep a state store provider running in the same executor across different streaming batches.
-Changing the location of a state store provider requires the extra overhead of loading checkpointed states. The overhead of loading state from checkpoint depends
-on the external storage and the size of the state, which tends to hurt the latency of micro-batch run. For some use cases such as processing very large state data,
-loading new state store providers from checkpointed states can be very time-consuming and inefficient.
-
-The stateful operations in Structured Streaming queries rely on the preferred location feature of Spark's RDD to run the state store provider on the same executor.
-If in the next batch the corresponding state store provider is scheduled on this executor again, it could reuse the previous states and save the time of loading checkpointed states.
-
-However, generally the preferred location is not a hard requirement and it is still possible that Spark schedules tasks to the executors other than the preferred ones.
-In this case, Spark will load state store providers from checkpointed states on new executors. The state store providers run in the previous batch will not be unloaded immediately.
-Spark runs a maintenance task which checks and unloads the state store providers that are inactive on the executors.
-
-By changing the Spark configurations related to task scheduling, for example `spark.locality.wait`, users can configure Spark how long to wait to launch a data-local task.
-For stateful operations in Structured Streaming, it can be used to let state store providers running on the same executors across batches.
-
-Specifically for built-in HDFS state store provider, users can check the state store metrics such as `loadedMapCacheHitCount` and `loadedMapCacheMissCount`. Ideally,
-it is best if cache missing count is minimized that means Spark won't waste too much time on loading checkpointed state.
-User can increase Spark locality waiting configurations to avoid loading state store providers in different executors across batches.
-
-#### State Data Source (Experimental)
-
-Apache Spark provides a streaming state related data source that provides the ability to manipulate state stores in the checkpoint. Users can run the batch query with State Data Source to get the visibility of the states for existing streaming query.
-
-As of Spark 4.0, the data source only supports read feature. See [State Data Source Integration Guide](structured-streaming-state-data-source.html) for more details.
-
-NOTE: this data source is currently marked as experimental - source options and the behavior (output) might be subject to change.
-
-## Starting Streaming Queries
-Once you have defined the final result DataFrame/Dataset, all that is left is for you to start the streaming computation. To do that, you have to use the `DataStreamWriter`
-([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter)/[Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html) docs)
-returned through `Dataset.writeStream()`. You will have to specify one or more of the following in this interface.
-
-- *Details of the output sink:* Data format, location, etc.
-
-- *Output mode:* Specify what gets written to the output sink.
-
-- *Query name:* Optionally, specify a unique name of the query for identification.
-
-- *Trigger interval:* Optionally, specify the trigger interval. If it is not specified, the system will check for availability of new data as soon as the previous processing has been completed. If a trigger time is missed because the previous processing has not been completed, then the system will trigger processing immediately.
-
-- *Checkpoint location:* For some output sinks where the end-to-end fault-tolerance can be guaranteed, specify the location where the system will write all the checkpoint information. This should be a directory in an HDFS-compatible fault-tolerant file system. The semantics of checkpointing is discussed in more detail in the next section.
-
-#### Output Modes
-There are a few types of output modes.
-
-- **Append mode (default)** - This is the default mode, where only the
-new rows added to the Result Table since the last trigger will be
-outputted to the sink. This is supported for only those queries where
-rows added to the Result Table is never going to change. Hence, this mode
-guarantees that each row will be output only once (assuming
-fault-tolerant sink). For example, queries with only `select`,
-`where`, `map`, `flatMap`, `filter`, `join`, etc. will support Append mode.
-
-- **Complete mode** - The whole Result Table will be outputted to the sink after every trigger.
- This is supported for aggregation queries.
-
-- **Update mode** - (*Available since Spark 2.1.1*) Only the rows in the Result Table that were
-updated since the last trigger will be outputted to the sink.
-More information to be added in future releases.
-
-Different types of streaming queries support different output modes.
-Here is the compatibility matrix.
-
-<table>
-  <thead>
-  <tr>
-    <th>Query Type</th>
-    <th></th>
-    <th>Supported Output Modes</th>
-    <th>Notes</th>
-  </tr>
-  </thead>
-  <tr>
-    <td rowspan="2" style="vertical-align: middle;">Queries with aggregation</td>
-    <td style="vertical-align: middle;">Aggregation on event-time with watermark</td>
-    <td style="vertical-align: middle;">Append, Update, Complete</td>
-    <td>
-        Append mode uses watermark to drop old aggregation state. But the output of a
-        windowed aggregation is delayed the late threshold specified in <code>withWatermark()</code> as by
-        the modes semantics, rows can be added to the Result Table only once after they are
-        finalized (i.e. after watermark is crossed). See the
-        <a href="#handling-late-data-and-watermarking">Late Data</a> section for more details.
-        <br/><br/>
-        Update mode uses watermark to drop old aggregation state.
-        <br/><br/>
-        Complete mode does not drop old aggregation state since by definition this mode
-        preserves all data in the Result Table.
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Other aggregations</td>
-    <td style="vertical-align: middle;">Complete, Update</td>
-    <td>
-        Since no watermark is defined (only defined in other category),
-        old aggregation state is not dropped.
-        <br/><br/>
-        Append mode is not supported as aggregates can update thus violating the semantics of
-        this mode.
-    </td>
-  </tr>
-  <tr>
-    <td colspan="2" style="vertical-align: middle;">Queries with <code>mapGroupsWithState</code></td>
-    <td style="vertical-align: middle;">Update</td>
-    <td style="vertical-align: middle;">
-      Aggregations not allowed in a query with <code>mapGroupsWithState</code>.
-    </td>
-  </tr>
-  <tr>
-    <td rowspan="2" style="vertical-align: middle;">Queries with <code>flatMapGroupsWithState</code></td>
-    <td style="vertical-align: middle;">Append operation mode</td>
-    <td style="vertical-align: middle;">Append</td>
-    <td style="vertical-align: middle;">
-      Aggregations are allowed after <code>flatMapGroupsWithState</code>.
-    </td>
-  </tr>
-  <tr>
-    <td style="vertical-align: middle;">Update operation mode</td>
-    <td style="vertical-align: middle;">Update</td>
-    <td style="vertical-align: middle;">
-      Aggregations not allowed in a query with <code>flatMapGroupsWithState</code>.
-    </td>
-  </tr>
-  <tr>
-      <td colspan="2" style="vertical-align: middle;">Queries with <code>joins</code></td>
-      <td style="vertical-align: middle;">Append</td>
-      <td style="vertical-align: middle;">
-        Update and Complete mode not supported yet. See the
-        <a href="#support-matrix-for-joins-in-streaming-queries">support matrix in the Join Operations section</a>
-         for more details on what types of joins are supported.
-      </td>
-    </tr>
-  <tr>
-    <td colspan="2" style="vertical-align: middle;">Other queries</td>
-    <td style="vertical-align: middle;">Append, Update</td>
-    <td style="vertical-align: middle;">
-      Complete mode not supported as it is infeasible to keep all unaggregated data in the Result Table.
-    </td>
-  </tr>
-  <tr>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-</table>
-
-
-#### Output Sinks
-There are a few types of built-in output sinks.
-
-- **File sink** - Stores the output to a directory.
-
-{% highlight scala %}
-writeStream
-    .format("parquet")        // can be "orc", "json", "csv", etc.
-    .option("path", "path/to/destination/dir")
-    .start()
-{% endhighlight %}
-
-- **Kafka sink** - Stores the output to one or more topics in Kafka.
-
-{% highlight scala %}
-writeStream
-    .format("kafka")
-    .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-    .option("topic", "updates")
-    .start()
-{% endhighlight %}
-
-- **Foreach sink** - Runs arbitrary computation on the records in the output. See later in the section for more details.
-
-{% highlight scala %}
-writeStream
-    .foreach(...)
-    .start()
-{% endhighlight %}
-
-- **Console sink (for debugging)** - Prints the output to the console/stdout every time there is a trigger. Both, Append and Complete output modes, are supported. This should be used for debugging purposes on low data volumes as the entire output is collected and stored in the driver's memory after every trigger.
-
-{% highlight scala %}
-writeStream
-    .format("console")
-    .start()
-{% endhighlight %}
-
-- **Memory sink (for debugging)** - The output is stored in memory as an in-memory table.
-Both, Append and Complete output modes, are supported. This should be used for debugging purposes
-on low data volumes as the entire output is collected and stored in the driver's memory.
-Hence, use it with caution.
-
-{% highlight scala %}
-writeStream
-    .format("memory")
-    .queryName("tableName")
-    .start()
-{% endhighlight %}
-
-Some sinks are not fault-tolerant because they do not guarantee persistence of the output and are
-meant for debugging purposes only. See the earlier section on
-[fault-tolerance semantics](#fault-tolerance-semantics).
-Here are the details of all the sinks in Spark.
-
-<table>
-  <thead>
-  <tr>
-    <th>Sink</th>
-    <th>Supported Output Modes</th>
-    <th>Options</th>
-    <th>Fault-tolerant</th>
-    <th>Notes</th>
-  </tr>
-  </thead>
-  <tr>
-    <td><b>File Sink</b></td>
-    <td>Append</td>
-    <td>
-        <code>path</code>: path to the output directory, must be specified.<br/>
-        <code>retention</code>: time to live (TTL) for output files. Output files which batches were
-        committed older than TTL will be eventually excluded in metadata log. This means reader queries which read
-        the sink's output directory may not process them. You can provide the value as string format of the time. (like "12h", "7d", etc.)
-        By default it's disabled.
-        <br/><br/>
-        For file-format-specific options, see the related methods in DataFrameWriter
-        (<a href="api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter">Python</a>/<a href="api/scala/org/apache/spark/sql/DataFrameWriter.html">Scala</a>/<a href="api/java/org/apache/spark/sql/DataFrameWriter.html">Java</a>/<a
-        href="api/R/write.stream.html">R</a>).
-        E.g. for "parquet" format options see <code>DataFrameWriter.parquet()</code>
-    </td>
-    <td>Yes (exactly-once)</td>
-    <td>Supports writes to partitioned tables. Partitioning by time may be useful.</td>
-  </tr>
-  <tr>
-    <td><b>Kafka Sink</b></td>
-    <td>Append, Update, Complete</td>
-    <td>See the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
-    <td>Yes (at-least-once)</td>
-    <td>More details in the <a href="structured-streaming-kafka-integration.html">Kafka Integration Guide</a></td>
-  </tr>
-  <tr>
-    <td><b>Foreach Sink</b></td>
-    <td>Append, Update, Complete</td>
-    <td>None</td>
-    <td>Yes (at-least-once)</td>
-    <td>More details in the <a href="#using-foreach-and-foreachbatch">next section</a></td>
-  </tr>
-  <tr>
-      <td><b>ForeachBatch Sink</b></td>
-      <td>Append, Update, Complete</td>
-      <td>None</td>
-      <td>Depends on the implementation</td>
-      <td>More details in the <a href="#using-foreach-and-foreachbatch">next section</a></td>
-    </tr>
-
-  <tr>
-    <td><b>Console Sink</b></td>
-    <td>Append, Update, Complete</td>
-    <td>
-        <code>numRows</code>: Number of rows to print every trigger (default: 20)
-        <br/>
-        <code>truncate</code>: Whether to truncate the output if too long (default: true)
-    </td>
-    <td>No</td>
-    <td></td>
-  </tr>
-  <tr>
-    <td><b>Memory Sink</b></td>
-    <td>Append, Complete</td>
-    <td>None</td>
-    <td>No. But in Complete Mode, restarted query will recreate the full table.</td>
-    <td>Table name is the query name.</td>
-  </tr>
-  <tr>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-    <td></td>
-  </tr>
-</table>
-
-Note that you have to call `start()` to actually start the execution of the query. This returns a StreamingQuery object which is a handle to the continuously running execution. You can use this object to manage the query, which we will discuss in the next subsection. For now, let’s understand all this with a few examples.
-
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-# ========== DF with no aggregations ==========
-noAggDF = deviceDataDf.select("device").where("signal > 10")
-
-# Print new data to console
-noAggDF \
-    .writeStream \
-    .format("console") \
-    .start()
-
-# Write new data to Parquet files
-noAggDF \
-    .writeStream \
-    .format("parquet") \
-    .option("checkpointLocation", "path/to/checkpoint/dir") \
-    .option("path", "path/to/destination/dir") \
-    .start()
-
-# ========== DF with aggregation ==========
-aggDF = df.groupBy("device").count()
-
-# Print updated aggregations to console
-aggDF \
-    .writeStream \
-    .outputMode("complete") \
-    .format("console") \
-    .start()
-
-# Have all the aggregates in an in-memory table. The query name will be the table name
-aggDF \
-    .writeStream \
-    .queryName("aggregates") \
-    .outputMode("complete") \
-    .format("memory") \
-    .start()
-
-spark.sql("select * from aggregates").show()   # interactively query in-memory table
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-// ========== DF with no aggregations ==========
-val noAggDF = deviceDataDf.select("device").where("signal > 10")
-
-// Print new data to console
-noAggDF
-  .writeStream
-  .format("console")
-  .start()
-
-// Write new data to Parquet files
-noAggDF
-  .writeStream
-  .format("parquet")
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .option("path", "path/to/destination/dir")
-  .start()
-
-// ========== DF with aggregation ==========
-val aggDF = df.groupBy("device").count()
-
-// Print updated aggregations to console
-aggDF
-  .writeStream
-  .outputMode("complete")
-  .format("console")
-  .start()
-
-// Have all the aggregates in an in-memory table
-aggDF
-  .writeStream
-  .queryName("aggregates")    // this query name will be the table name
-  .outputMode("complete")
-  .format("memory")
-  .start()
-
-spark.sql("select * from aggregates").show()   // interactively query in-memory table
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-// ========== DF with no aggregations ==========
-Dataset<Row> noAggDF = deviceDataDf.select("device").where("signal > 10");
-
-// Print new data to console
-noAggDF
-  .writeStream()
-  .format("console")
-  .start();
-
-// Write new data to Parquet files
-noAggDF
-  .writeStream()
-  .format("parquet")
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .option("path", "path/to/destination/dir")
-  .start();
-
-// ========== DF with aggregation ==========
-Dataset<Row> aggDF = df.groupBy("device").count();
-
-// Print updated aggregations to console
-aggDF
-  .writeStream()
-  .outputMode("complete")
-  .format("console")
-  .start();
-
-// Have all the aggregates in an in-memory table
-aggDF
-  .writeStream()
-  .queryName("aggregates")    // this query name will be the table name
-  .outputMode("complete")
-  .format("memory")
-  .start();
-
-spark.sql("select * from aggregates").show();   // interactively query in-memory table
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-# ========== DF with no aggregations ==========
-noAggDF <- select(where(deviceDataDf, "signal > 10"), "device")
-
-# Print new data to console
-write.stream(noAggDF, "console")
-
-# Write new data to Parquet files
-write.stream(noAggDF,
-             "parquet",
-             path = "path/to/destination/dir",
-             checkpointLocation = "path/to/checkpoint/dir")
-
-# ========== DF with aggregation ==========
-aggDF <- count(groupBy(df, "device"))
-
-# Print updated aggregations to console
-write.stream(aggDF, "console", outputMode = "complete")
-
-# Have all the aggregates in an in memory table. The query name will be the table name
-write.stream(aggDF, "memory", queryName = "aggregates", outputMode = "complete")
-
-# Interactively query in-memory table
-head(sql("select * from aggregates"))
-{% endhighlight %}
-
-</div>
-
-</div>
-
-##### Using Foreach and ForeachBatch
-The `foreach` and `foreachBatch` operations allow you to apply arbitrary operations and writing
-logic on the output of a streaming query. They have slightly different use cases - while `foreach`
-allows custom write logic on every row, `foreachBatch` allows arbitrary operations
-and custom logic on the output of each micro-batch. Let's understand their usages in more detail.
-
-###### ForeachBatch
-`foreachBatch(...)` allows you to specify a function that is executed on
-the output data of every micro-batch of a streaming query. Since Spark 2.4, this is supported in Scala, Java and Python.
-It takes two parameters: a DataFrame or Dataset that has the output data of a micro-batch and the unique ID of the micro-batch.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-def foreach_batch_function(df, epoch_id):
-    # Transform and write batchDF
-    pass
-
-streamingDF.writeStream.foreachBatch(foreach_batch_function).start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
-  // Transform and write batchDF
-}.start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-streamingDatasetOfString.writeStream().foreachBatch(
-  new VoidFunction2<Dataset<String>, Long>() {
-    public void call(Dataset<String> dataset, Long batchId) {
-      // Transform and write batchDF
-    }
-  }
-).start();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-R is not yet supported.
-</div>
-
-</div>
-
-With `foreachBatch`, you can do the following.
-
-- **Reuse existing batch data sources** - For many storage systems, there may not be a streaming sink available yet,
-  but there may already exist a data writer for batch queries. Using `foreachBatch`, you can use the batch
-  data writers on the output of each micro-batch.
-- **Write to multiple locations** - If you want to write the output of a streaming query to multiple locations,
-  then you can simply write the output DataFrame/Dataset multiple times. However, each attempt to write can
-  cause the output data to be recomputed (including possible re-reading of the input data). To avoid recomputations,
-  you should cache the output DataFrame/Dataset, write it to multiple locations, and then uncache it. Here is an outline.
-
-<div class="codetabs">
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-streamingDF.writeStream.foreachBatch { (batchDF: DataFrame, batchId: Long) =>
-  batchDF.persist()
-  batchDF.write.format(...).save(...)  // location 1
-  batchDF.write.format(...).save(...)  // location 2
-  batchDF.unpersist()
-}
-{% endhighlight %}
-
-</div>
-</div>
-
-- **Apply additional DataFrame operations** - Many DataFrame and Dataset operations are not supported
-  in streaming DataFrames because Spark does not support generating incremental plans in those cases.
-  Using `foreachBatch`, you can apply some of these operations on each micro-batch output. However, you will have to reason about the end-to-end semantics of doing that operation yourself.
-
-**Note:**
-- By default, `foreachBatch` provides only at-least-once write guarantees. However, you can use the
-  batchId provided to the function as way to deduplicate the output and get an exactly-once guarantee.
-- `foreachBatch` does not work with the continuous processing mode as it fundamentally relies on the
-  micro-batch execution of a streaming query. If you write data in the continuous mode, use `foreach` instead.
-- If `foreachBatch` is used with stateful streaming queries and multiple DataFrame actions are performed
-  on the same DataFrame (such as `df.count()` followed by `df.collect()`), the query will be evaluated multiple times leading to
-  the state being reloaded multiple times within the same batch resulting in degraded performance. In this case,
-  it's highly recommended for users to call `persist` and `unpersist` on the DataFrame,
-  within the `foreachBatch` UDF (user-defined function) to avoid recomputation.
-
-###### Foreach
-If `foreachBatch` is not an option (for example, corresponding batch data writer does not exist, or
-continuous processing mode), then you can express your custom writer logic using `foreach`.
-Specifically, you can express the data writing logic by dividing it into three methods: `open`, `process`, and `close`.
-Since Spark 2.4, `foreach` is available in Scala, Java and Python.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-In Python, you can invoke foreach in two ways: in a function or in an object.
-The function offers a simple way to express your processing logic but does not allow you to
-deduplicate generated data when failures cause reprocessing of some input data.
-For that situation you must specify the processing logic in an object.
-
-- First, the function takes a row as input.
-
-{% highlight python %}
-def process_row(row):
-    # Write row to storage
-    pass
-
-query = streamingDF.writeStream.foreach(process_row).start()
-{% endhighlight %}
-
-- Second, the object has a process method and optional open and close methods:
-
-{% highlight python %}
-class ForeachWriter:
-    def open(self, partition_id, epoch_id):
-        # Open connection. This method is optional in Python.
-        pass
-
-    def process(self, row):
-        # Write row to connection. This method is NOT optional in Python.
-        pass
-
-    def close(self, error):
-        # Close the connection. This method in optional in Python.
-        pass
-
-query = streamingDF.writeStream.foreach(ForeachWriter()).start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-In Scala, you have to extend the class `ForeachWriter` ([docs](api/scala/org/apache/spark/sql/ForeachWriter.html)).
-
-{% highlight scala %}
-streamingDatasetOfString.writeStream.foreach(
-  new ForeachWriter[String] {
-
-    def open(partitionId: Long, version: Long): Boolean = {
-      // Open connection
-    }
-
-    def process(record: String): Unit = {
-      // Write string to connection
-    }
-
-    def close(errorOrNull: Throwable): Unit = {
-      // Close the connection
-    }
-  }
-).start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-In Java, you have to extend the class `ForeachWriter` ([docs](api/java/org/apache/spark/sql/ForeachWriter.html)).
-{% highlight java %}
-streamingDatasetOfString.writeStream().foreach(
-  new ForeachWriter<String>() {
-
-    @Override public boolean open(long partitionId, long version) {
-      // Open connection
-    }
-
-    @Override public void process(String record) {
-      // Write string to connection
-    }
-
-    @Override public void close(Throwable errorOrNull) {
-      // Close the connection
-    }
-  }
-).start();
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-R is not yet supported.
-</div>
-
-</div>
-
-
-**Execution semantics**
-When the streaming query is started, Spark calls the function or the object’s methods in the following way:
-
-- A single copy of this object is responsible for all the data generated by a single task in a query.
-  In other words, one instance is responsible for processing one partition of the data generated in a distributed manner.
-
-- This object must be serializable, because each task will get a fresh serialized-deserialized copy
-  of the provided object. Hence, it is strongly recommended that any initialization for writing data
-  (for example. opening a connection or starting a transaction) is done after the open() method has
-  been called, which signifies that the task is ready to generate data.
-
-- The lifecycle of the methods are as follows:
-
-  - For each partition with partition_id:
-
-    - For each batch/epoch of streaming data with epoch_id:
-
-      - Method open(partitionId, epochId) is called.
-
-      - If open(...) returns true, for each row in the partition and batch/epoch, method process(row) is called.
-
-      - Method close(error) is called with error (if any) seen while processing rows.
-
-- The close() method (if it exists) is called if an open() method exists and returns successfully (irrespective of the return value), except if the JVM or Python process crashes in the middle.
-
-- **Note:** Spark does not guarantee same output for (partitionId, epochId), so deduplication
-  cannot be achieved with (partitionId, epochId). e.g. source provides different number of
-  partitions for some reasons, Spark optimization changes number of partitions, etc.
-  See [SPARK-28650](https://issues.apache.org/jira/browse/SPARK-28650) for more details.
-  If you need deduplication on output, try out `foreachBatch` instead.
-
-#### Streaming Table APIs
-Since Spark 3.1, you can also use `DataStreamReader.table()` to read tables as streaming DataFrames and use `DataStreamWriter.toTable()` to write streaming DataFrames as tables:
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-spark = ...  # spark session
-
-# Create a streaming DataFrame
-df = spark.readStream \
-    .format("rate") \
-    .option("rowsPerSecond", 10) \
-    .load()
-
-# Write the streaming DataFrame to a table
-df.writeStream \
-    .option("checkpointLocation", "path/to/checkpoint/dir") \
-    .toTable("myTable")
-
-# Check the table result
-spark.read.table("myTable").show()
-
-# Transform the source dataset and write to a new table
-spark.readStream \
-    .table("myTable") \
-    .select("value") \
-    .writeStream \
-    .option("checkpointLocation", "path/to/checkpoint/dir") \
-    .format("parquet") \
-    .toTable("newTable")
-
-# Check the new table result
-spark.read.table("newTable").show()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val spark: SparkSession = ...
-
-// Create a streaming DataFrame
-val df = spark.readStream
-  .format("rate")
-  .option("rowsPerSecond", 10)
-  .load()
-
-// Write the streaming DataFrame to a table
-df.writeStream
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .toTable("myTable")
-
-// Check the table result
-spark.read.table("myTable").show()
-
-// Transform the source dataset and write to a new table
-spark.readStream
-  .table("myTable")
-  .select("value")
-  .writeStream
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .format("parquet")
-  .toTable("newTable")
-
-// Check the new table result
-spark.read.table("newTable").show()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-SparkSession spark = ...
-
-// Create a streaming DataFrame
-Dataset<Row> df = spark.readStream()
-  .format("rate")
-  .option("rowsPerSecond", 10)
-  .load();
-
-// Write the streaming DataFrame to a table
-df.writeStream()
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .toTable("myTable");
-
-// Check the table result
-spark.read().table("myTable").show();
-
-// Transform the source dataset and write to a new table
-spark.readStream()
-  .table("myTable")
-  .select("value")
-  .writeStream()
-  .option("checkpointLocation", "path/to/checkpoint/dir")
-  .format("parquet")
-  .toTable("newTable");
-
-// Check the new table result
-spark.read().table("newTable").show();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-Not available in R.
-</div>
-
-</div>
-
-For more details, please check the docs for DataStreamReader ([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamReader.html#pyspark.sql.streaming.DataStreamReader)/[Scala](api/scala/org/apache/spark/sql/streaming/DataStreamReader.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamReader.html) docs) and DataStreamWriter ([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.DataStreamWriter.html#pyspark.sql.streaming.DataStreamWriter)/[Scala](api/scala/org/apache/spark/sql/streaming/DataStreamWriter.html)/[Java](api/java/org/apache/spark/sql/streaming/DataStreamWriter.html) docs).
-
-#### Triggers
-The trigger settings of a streaming query define the timing of streaming data processing, whether
-the query is going to be executed as micro-batch query with a fixed batch interval or as a continuous processing query.
-Here are the different kinds of triggers that are supported.
-
-<table>
-  <thead>
-  <tr>
-    <th>Trigger Type</th>
-    <th>Description</th>
-  </tr>
-  </thead>
-  <tr>
-    <td><i>unspecified (default)</i></td>
-    <td>
-        If no trigger setting is explicitly specified, then by default, the query will be
-        executed in micro-batch mode, where micro-batches will be generated as soon as
-        the previous micro-batch has completed processing.
-    </td>
-  </tr>
-  <tr>
-    <td><b>Fixed interval micro-batches</b></td>
-    <td>
-        The query will be executed with micro-batches mode, where micro-batches will be kicked off
-        at the user-specified intervals.
-        <ul>
-          <li>If the previous micro-batch completes within the interval, then the engine will wait until
-          the interval is over before kicking off the next micro-batch.</li>
-
-          <li>If the previous micro-batch takes longer than the interval to complete (i.e. if an
-          interval boundary is missed), then the next micro-batch will start as soon as the
-          previous one completes (i.e., it will not wait for the next interval boundary).</li>
-
-          <li>If no new data is available, then no micro-batch will be kicked off.</li>
-        </ul>
-    </td>
-  </tr>
-  <tr>
-    <td><b>One-time micro-batch</b><i>(deprecated)</i></td>
-    <td>
-        The query will execute <strong>only one</strong> micro-batch to process all the available data and then
-        stop on its own. This is useful in scenarios you want to periodically spin up a cluster,
-        process everything that is available since the last period, and then shutdown the
-        cluster. In some case, this may lead to significant cost savings.
-        Note that this trigger is deprecated and users are encouraged to migrate to <b>Available-now micro-batch</b>,
-        as it provides the better guarantee of processing, fine-grained scale of batches, and better gradual processing
-        of watermark advancement including no-data batch.
-    </td>
-  </tr>
-  <tr>
-    <td><b>Available-now micro-batch</b></td>
-    <td>
-        Similar to queries one-time micro-batch trigger, the query will process all the available data and then
-        stop on its own. The difference is that, it will process the data in (possibly) multiple micro-batches
-        based on the source options (e.g. <code>maxFilesPerTrigger</code> or <code>maxBytesPerTrigger</code> for file 
-        source), which will result in better query scalability.
-        <ul>
-            <li>This trigger provides a strong guarantee of processing: regardless of how many batches were
-                left over in previous run, it ensures all available data at the time of execution gets
-                processed before termination. All uncommitted batches will be processed first.</li>
-
-            <li>Watermark gets advanced per each batch, and no-data batch gets executed before termination
-                if the last batch advances the watermark. This helps to maintain smaller and predictable
-                state size and smaller latency on the output of stateful operators.</li>
-        </ul>
-        NOTE: this trigger will be deactivated when there is any source which does not support Trigger.AvailableNow.
-        Spark will perform one-time micro-batch as a fall-back. Check the above differences for a risk of fallback.
-    </td>
-  </tr>
-  <tr>
-    <td><b>Continuous with fixed checkpoint interval</b><br/><i>(experimental)</i></td>
-    <td>
-        The query will be executed in the new low-latency, continuous processing mode. Read more
-        about this in the <a href="#continuous-processing">Continuous Processing section</a> below.
-    </td>
-  </tr>
-</table>
-
-Here are a few code examples.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-
-# Default trigger (runs micro-batch as soon as it can)
-df.writeStream \
-  .format("console") \
-  .start()
-
-# ProcessingTime trigger with two-seconds micro-batch interval
-df.writeStream \
-  .format("console") \
-  .trigger(processingTime='2 seconds') \
-  .start()
-
-# One-time trigger (Deprecated, encouraged to use Available-now trigger)
-df.writeStream \
-  .format("console") \
-  .trigger(once=True) \
-  .start()
-
-# Available-now trigger
-df.writeStream \
-  .format("console") \
-  .trigger(availableNow=True) \
-  .start()
-
-# Continuous trigger with one-second checkpointing interval
-df.writeStream
-  .format("console")
-  .trigger(continuous='1 second')
-  .start()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-import org.apache.spark.sql.streaming.Trigger
-
-// Default trigger (runs micro-batch as soon as it can)
-df.writeStream
-  .format("console")
-  .start()
-
-// ProcessingTime trigger with two-seconds micro-batch interval
-df.writeStream
-  .format("console")
-  .trigger(Trigger.ProcessingTime("2 seconds"))
-  .start()
-
-// One-time trigger (Deprecated, encouraged to use Available-now trigger)
-df.writeStream
-  .format("console")
-  .trigger(Trigger.Once())
-  .start()
-
-// Available-now trigger
-df.writeStream
-  .format("console")
-  .trigger(Trigger.AvailableNow())
-  .start()
-
-// Continuous trigger with one-second checkpointing interval
-df.writeStream
-  .format("console")
-  .trigger(Trigger.Continuous("1 second"))
-  .start()
-
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-import org.apache.spark.sql.streaming.Trigger
-
-// Default trigger (runs micro-batch as soon as it can)
-df.writeStream
-  .format("console")
-  .start();
-
-// ProcessingTime trigger with two-seconds micro-batch interval
-df.writeStream
-  .format("console")
-  .trigger(Trigger.ProcessingTime("2 seconds"))
-  .start();
-
-// One-time trigger (Deprecated, encouraged to use Available-now trigger)
-df.writeStream
-  .format("console")
-  .trigger(Trigger.Once())
-  .start();
-
-// Available-now trigger
-df.writeStream
-  .format("console")
-  .trigger(Trigger.AvailableNow())
-  .start();
-
-// Continuous trigger with one-second checkpointing interval
-df.writeStream
-  .format("console")
-  .trigger(Trigger.Continuous("1 second"))
-  .start();
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-# Default trigger (runs micro-batch as soon as it can)
-write.stream(df, "console")
-
-# ProcessingTime trigger with two-seconds micro-batch interval
-write.stream(df, "console", trigger.processingTime = "2 seconds")
-
-# One-time trigger
-write.stream(df, "console", trigger.once = TRUE)
-
-# Continuous trigger is not yet supported
-{% endhighlight %}
-</div>
-
-</div>
-
-
-## Managing Streaming Queries
-The `StreamingQuery` object created when a query is started can be used to monitor and manage the query.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-query = df.writeStream.format("console").start()   # get the query object
-
-query.id()          # get the unique identifier of the running query that persists across restarts from checkpoint data
-
-query.runId()       # get the unique id of this run of the query, which will be generated at every start/restart
-
-query.name()        # get the name of the auto-generated or user-specified name
-
-query.explain()   # print detailed explanations of the query
-
-query.stop()      # stop the query
-
-query.awaitTermination()   # block until query is terminated, with stop() or with error
-
-query.exception()       # the exception if the query has been terminated with error
-
-query.recentProgress  # a list of the most recent progress updates for this query
-
-query.lastProgress    # the most recent progress update of this streaming query
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val query = df.writeStream.format("console").start()   // get the query object
-
-query.id          // get the unique identifier of the running query that persists across restarts from checkpoint data
-
-query.runId       // get the unique id of this run of the query, which will be generated at every start/restart
-
-query.name        // get the name of the auto-generated or user-specified name
-
-query.explain()   // print detailed explanations of the query
-
-query.stop()      // stop the query
-
-query.awaitTermination()   // block until query is terminated, with stop() or with error
-
-query.exception       // the exception if the query has been terminated with error
-
-query.recentProgress  // an array of the most recent progress updates for this query
-
-query.lastProgress    // the most recent progress update of this streaming query
-{% endhighlight %}
-
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-StreamingQuery query = df.writeStream().format("console").start();   // get the query object
-
-query.id();          // get the unique identifier of the running query that persists across restarts from checkpoint data
-
-query.runId();       // get the unique id of this run of the query, which will be generated at every start/restart
-
-query.name();        // get the name of the auto-generated or user-specified name
-
-query.explain();   // print detailed explanations of the query
-
-query.stop();      // stop the query
-
-query.awaitTermination();   // block until query is terminated, with stop() or with error
-
-query.exception();       // the exception if the query has been terminated with error
-
-query.recentProgress();  // an array of the most recent progress updates for this query
-
-query.lastProgress();    // the most recent progress update of this streaming query
-
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-query <- write.stream(df, "console")  # get the query object
-
-queryName(query)          # get the name of the auto-generated or user-specified name
-
-explain(query)            # print detailed explanations of the query
-
-stopQuery(query)          # stop the query
-
-awaitTermination(query)   # block until query is terminated, with stop() or with error
-
-lastProgress(query)       # the most recent progress update of this streaming query
-
-{% endhighlight %}
-
-</div>
-
-</div>
-
-You can start any number of queries in a single SparkSession. They will all be running concurrently sharing the cluster resources. You can use `sparkSession.streams()` to get the `StreamingQueryManager`
-([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryManager.html#pyspark.sql.streaming.StreamingQueryManager)/[Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryManager.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryManager.html) docs)
-that can be used to manage the currently active queries.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-spark = ...  # spark session
-
-spark.streams.active  # get the list of currently active streaming queries
-
-spark.streams.get(id)  # get a query object by its unique id
-
-spark.streams.awaitAnyTermination()  # block until any one of them terminates
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val spark: SparkSession = ...
-
-spark.streams.active    // get the list of currently active streaming queries
-
-spark.streams.get(id)   // get a query object by its unique id
-
-spark.streams.awaitAnyTermination()   // block until any one of them terminates
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-SparkSession spark = ...
-
-spark.streams().active();    // get the list of currently active streaming queries
-
-spark.streams().get(id);   // get a query object by its unique id
-
-spark.streams().awaitAnyTermination();   // block until any one of them terminates
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight bash %}
-Not available in R.
-{% endhighlight %}
-
-</div>
-
-</div>
-
-
-## Monitoring Streaming Queries
-There are multiple ways to monitor active streaming queries. You can either push metrics to external systems using Spark's Dropwizard Metrics support, or access them programmatically.
-
-### Reading Metrics Interactively
-
-You can directly get the current status and metrics of an active query using
-`streamingQuery.lastProgress()` and `streamingQuery.status()`.
-`lastProgress()` returns a `StreamingQueryProgress` object
-in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
-and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryProgress.html)
-and a dictionary with the same fields in Python. It has all the information about
-the progress made in the last trigger of the stream - what data was processed,
-what were the processing rates, latencies, etc. There is also
-`streamingQuery.recentProgress` which returns an array of last few progresses.
-
-In addition, `streamingQuery.status()` returns a `StreamingQueryStatus` object
-in [Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
-and [Java](api/java/org/apache/spark/sql/streaming/StreamingQueryStatus.html)
-and a dictionary with the same fields in Python. It gives information about
-what the query is immediately doing - is a trigger active, is data being processed, etc.
-
-Here are a few examples.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-query = ...  # a StreamingQuery
-print(query.lastProgress)
-
-'''
-Will print something like the following.
-
-{u'stateOperators': [], u'eventTime': {u'watermark': u'2016-12-14T18:45:24.873Z'}, u'name': u'MyQuery', u'timestamp': u'2016-12-14T18:45:24.873Z', u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'sources': [{u'description': u'KafkaSource[Subscribe[topic-0]]', u'endOffset': {u'topic-0': {u'1': 134, u'0': 534, u'3': 21, u'2': 0, u'4': 115}}, u'processedRowsPerSecond': 200.0, u'inputRowsPerSecond': 120.0, u'numInputRows': 10, u'startOffset': {u'topic-0': {u'1': 1, u'0': 1, u'3': 1, u'2': 0, u'4': 1}}}], u'durationMs': {u'getOffset': 2, u'triggerExecution': 3}, u'runId': u'88e2ff94-ede0-45a8-b687-6316fbef529a', u'id': u'ce011fdc-8762-4dcb-84eb-a77333e28109', u'sink': {u'description': u'MemorySink'}}
-'''
-
-print(query.status)
-'''
-Will print something like the following.
-
-{u'message': u'Waiting for data to arrive', u'isTriggerActive': False, u'isDataAvailable': False}
-'''
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val query: StreamingQuery = ...
-
-println(query.lastProgress)
-
-/* Will print something like the following.
-
-{
-  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
-  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
-  "name" : "MyQuery",
-  "timestamp" : "2016-12-14T18:45:24.873Z",
-  "numInputRows" : 10,
-  "inputRowsPerSecond" : 120.0,
-  "processedRowsPerSecond" : 200.0,
-  "durationMs" : {
-    "triggerExecution" : 3,
-    "getOffset" : 2
-  },
-  "eventTime" : {
-    "watermark" : "2016-12-14T18:45:24.873Z"
-  },
-  "stateOperators" : [ ],
-  "sources" : [ {
-    "description" : "KafkaSource[Subscribe[topic-0]]",
-    "startOffset" : {
-      "topic-0" : {
-        "2" : 0,
-        "4" : 1,
-        "1" : 1,
-        "3" : 1,
-        "0" : 1
-      }
-    },
-    "endOffset" : {
-      "topic-0" : {
-        "2" : 0,
-        "4" : 115,
-        "1" : 134,
-        "3" : 21,
-        "0" : 534
-      }
-    },
-    "numInputRows" : 10,
-    "inputRowsPerSecond" : 120.0,
-    "processedRowsPerSecond" : 200.0
-  } ],
-  "sink" : {
-    "description" : "MemorySink"
-  }
-}
-*/
-
-
-println(query.status)
-
-/*  Will print something like the following.
-{
-  "message" : "Waiting for data to arrive",
-  "isDataAvailable" : false,
-  "isTriggerActive" : false
-}
-*/
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-StreamingQuery query = ...
-
-System.out.println(query.lastProgress());
-/* Will print something like the following.
-
-{
-  "id" : "ce011fdc-8762-4dcb-84eb-a77333e28109",
-  "runId" : "88e2ff94-ede0-45a8-b687-6316fbef529a",
-  "name" : "MyQuery",
-  "timestamp" : "2016-12-14T18:45:24.873Z",
-  "numInputRows" : 10,
-  "inputRowsPerSecond" : 120.0,
-  "processedRowsPerSecond" : 200.0,
-  "durationMs" : {
-    "triggerExecution" : 3,
-    "getOffset" : 2
-  },
-  "eventTime" : {
-    "watermark" : "2016-12-14T18:45:24.873Z"
-  },
-  "stateOperators" : [ ],
-  "sources" : [ {
-    "description" : "KafkaSource[Subscribe[topic-0]]",
-    "startOffset" : {
-      "topic-0" : {
-        "2" : 0,
-        "4" : 1,
-        "1" : 1,
-        "3" : 1,
-        "0" : 1
-      }
-    },
-    "endOffset" : {
-      "topic-0" : {
-        "2" : 0,
-        "4" : 115,
-        "1" : 134,
-        "3" : 21,
-        "0" : 534
-      }
-    },
-    "numInputRows" : 10,
-    "inputRowsPerSecond" : 120.0,
-    "processedRowsPerSecond" : 200.0
-  } ],
-  "sink" : {
-    "description" : "MemorySink"
-  }
-}
-*/
-
-
-System.out.println(query.status());
-/*  Will print something like the following.
-{
-  "message" : "Waiting for data to arrive",
-  "isDataAvailable" : false,
-  "isTriggerActive" : false
-}
-*/
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-query <- ...  # a StreamingQuery
-lastProgress(query)
-
-'''
-Will print something like the following.
-
-{
-  "id" : "8c57e1ec-94b5-4c99-b100-f694162df0b9",
-  "runId" : "ae505c5a-a64e-4896-8c28-c7cbaf926f16",
-  "name" : null,
-  "timestamp" : "2017-04-26T08:27:28.835Z",
-  "numInputRows" : 0,
-  "inputRowsPerSecond" : 0.0,
-  "processedRowsPerSecond" : 0.0,
-  "durationMs" : {
-    "getOffset" : 0,
-    "triggerExecution" : 1
-  },
-  "stateOperators" : [ {
-    "numRowsTotal" : 4,
-    "numRowsUpdated" : 0
-  } ],
-  "sources" : [ {
-    "description" : "TextSocketSource[host: localhost, port: 9999]",
-    "startOffset" : 1,
-    "endOffset" : 1,
-    "numInputRows" : 0,
-    "inputRowsPerSecond" : 0.0,
-    "processedRowsPerSecond" : 0.0
-  } ],
-  "sink" : {
-    "description" : "org.apache.spark.sql.execution.streaming.ConsoleSink@76b37531"
-  }
-}
-'''
-
-status(query)
-'''
-Will print something like the following.
-
-{
-  "message" : "Waiting for data to arrive",
-  "isDataAvailable" : false,
-  "isTriggerActive" : false
-}
-'''
-{% endhighlight %}
-
-</div>
-
-</div>
-
-### Reporting Metrics programmatically using Asynchronous APIs
-
-You can also asynchronously monitor all queries associated with a
-`SparkSession` by attaching a `StreamingQueryListener`
-([Python](api/python/reference/pyspark.ss/api/pyspark.sql.streaming.StreamingQueryListener.html)/[Scala](api/scala/org/apache/spark/sql/streaming/StreamingQueryListener.html)/[Java](api/java/org/apache/spark/sql/streaming/StreamingQueryListener.html) docs).
-Once you attach your custom `StreamingQueryListener` object with
-`sparkSession.streams.addListener()`, you will get callbacks when a query is started and
-stopped and when there is progress made in an active query. Here is an example,
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-spark = ...
-
-class Listener(StreamingQueryListener):
-    def onQueryStarted(self, event):
-        print("Query started: " + queryStarted.id)
-
-    def onQueryProgress(self, event):
-        print("Query made progress: " + queryProgress.progress)
-
-    def onQueryTerminated(self, event):
-    	print("Query terminated: " + queryTerminated.id)
-
-
-spark.streams.addListener(Listener())
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-val spark: SparkSession = ...
-
-spark.streams.addListener(new StreamingQueryListener() {
-    override def onQueryStarted(queryStarted: QueryStartedEvent): Unit = {
-        println("Query started: " + queryStarted.id)
-    }
-    override def onQueryTerminated(queryTerminated: QueryTerminatedEvent): Unit = {
-        println("Query terminated: " + queryTerminated.id)
-    }
-    override def onQueryProgress(queryProgress: QueryProgressEvent): Unit = {
-        println("Query made progress: " + queryProgress.progress)
-    }
-})
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-SparkSession spark = ...
-
-spark.streams().addListener(new StreamingQueryListener() {
-    @Override
-    public void onQueryStarted(QueryStartedEvent queryStarted) {
-        System.out.println("Query started: " + queryStarted.id());
-    }
-    @Override
-    public void onQueryTerminated(QueryTerminatedEvent queryTerminated) {
-        System.out.println("Query terminated: " + queryTerminated.id());
-    }
-    @Override
-    public void onQueryProgress(QueryProgressEvent queryProgress) {
-        System.out.println("Query made progress: " + queryProgress.progress());
-    }
-});
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight bash %}
-Not available in R.
-{% endhighlight %}
-
-</div>
-
-</div>
-
-### Reporting Metrics using Dropwizard
-Spark supports reporting metrics using the [Dropwizard Library](monitoring.html#metrics). To enable metrics of Structured Streaming queries to be reported as well, you have to explicitly enable the configuration `spark.sql.streaming.metricsEnabled` in the SparkSession.
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
-# or
-spark.sql("SET spark.sql.streaming.metricsEnabled=true")
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-spark.conf.set("spark.sql.streaming.metricsEnabled", "true")
-// or
-spark.sql("SET spark.sql.streaming.metricsEnabled=true")
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-spark.conf().set("spark.sql.streaming.metricsEnabled", "true");
-// or
-spark.sql("SET spark.sql.streaming.metricsEnabled=true");
-{% endhighlight %}
-</div>
-
-<div data-lang="r"  markdown="1">
-{% highlight r %}
-sql("SET spark.sql.streaming.metricsEnabled=true")
-{% endhighlight %}
-</div>
-
-</div>
-
-
-All queries started in the SparkSession after this configuration has been enabled will report metrics through Dropwizard to whatever [sinks](monitoring.html#metrics) have been configured (e.g. Ganglia, Graphite, JMX, etc.).
-
-## Recovering from Failures with Checkpointing
-In case of a failure or intentional shutdown, you can recover the previous progress and state of a previous query, and continue where it left off. This is done using checkpointing and write-ahead logs. You can configure a query with a checkpoint location, and the query will save all the progress information (i.e. range of offsets processed in each trigger) and the running aggregates (e.g. word counts in the [quick example](#quick-example)) to the checkpoint location. This checkpoint location has to be a path in an HDFS compatible file system, and can be set as an option in the DataStreamWriter when [starting a query](#starting-streaming-queries).
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-
-{% highlight python %}
-aggDF \
-    .writeStream \
-    .outputMode("complete") \
-    .option("checkpointLocation", "path/to/HDFS/dir") \
-    .format("memory") \
-    .start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="scala"  markdown="1">
-
-{% highlight scala %}
-aggDF
-  .writeStream
-  .outputMode("complete")
-  .option("checkpointLocation", "path/to/HDFS/dir")
-  .format("memory")
-  .start()
-{% endhighlight %}
-
-</div>
-
-<div data-lang="java"  markdown="1">
-
-{% highlight java %}
-aggDF
-  .writeStream()
-  .outputMode("complete")
-  .option("checkpointLocation", "path/to/HDFS/dir")
-  .format("memory")
-  .start();
-{% endhighlight %}
-
-</div>
-
-<div data-lang="r"  markdown="1">
-
-{% highlight r %}
-write.stream(aggDF, "memory", outputMode = "complete", checkpointLocation = "path/to/HDFS/dir")
-{% endhighlight %}
-
-</div>
-
-</div>
-
-
-## Recovery Semantics after Changes in a Streaming Query
-There are limitations on what changes in a streaming query are allowed between restarts from the
-same checkpoint location. Here are a few kinds of changes that are either not allowed, or
-the effect of the change is not well-defined. For all of them:
-
-- The term *allowed* means you can do the specified change but whether the semantics of its effect
-  is well-defined depends on the query and the change.
-
-- The term *not allowed* means you should not do the specified change as the restarted query is likely
-  to fail with unpredictable errors. `sdf` represents a streaming DataFrame/Dataset
-  generated with sparkSession.readStream.
-
-**Types of changes**
-
-- *Changes in the number or type (i.e. different source) of input sources*: This is not allowed.
-
-- *Changes in the parameters of input sources*: Whether this is allowed and whether the semantics
-  of the change are well-defined depends on the source and the query. Here are a few examples.
-
-  - Addition/deletion/modification of rate limits is allowed: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "topic").option("maxOffsetsPerTrigger", ...)`
-
-  - Changes to subscribed topics/files are generally not allowed as the results are unpredictable: `spark.readStream.format("kafka").option("subscribe", "topic")` to `spark.readStream.format("kafka").option("subscribe", "newTopic")`
-
-- *Changes in the type of output sink*: Changes between a few specific combinations of sinks
-  are allowed. This needs to be verified on a case-by-case basis. Here are a few examples.
-
-  - File sink to Kafka sink is allowed. Kafka will see only the new data.
-
-  - Kafka sink to file sink is not allowed.
-
-  - Kafka sink changed to foreach, or vice versa is allowed.
-
-- *Changes in the parameters of output sink*: Whether this is allowed and whether the semantics of
-  the change are well-defined depends on the sink and the query. Here are a few examples.
-
-  - Changes to output directory of a file sink are not allowed: `sdf.writeStream.format("parquet").option("path", "/somePath")` to `sdf.writeStream.format("parquet").option("path", "/anotherPath")`
-
-  - Changes to output topic are allowed: `sdf.writeStream.format("kafka").option("topic", "someTopic")` to `sdf.writeStream.format("kafka").option("topic", "anotherTopic")`
-
-  - Changes to the user-defined foreach sink (that is, the `ForeachWriter` code) are allowed, but the semantics of the change depends on the code.
-
-- *Changes in projection / filter / map-like operations*: Some cases are allowed. For example:
-
-  - Addition / deletion of filters is allowed: `sdf.selectExpr("a")` to `sdf.where(...).selectExpr("a").filter(...)`.
-
-  - Changes in projections with same output schema are allowed: `sdf.selectExpr("stringColumn AS json").writeStream` to `sdf.selectExpr("anotherStringColumn AS json").writeStream`
-
-  - Changes in projections with different output schema are conditionally allowed: `sdf.selectExpr("a").writeStream` to `sdf.selectExpr("b").writeStream` is allowed only if the output sink allows the schema change from `"a"` to `"b"`.
-
-- *Changes in stateful operations*: Some operations in streaming queries need to maintain
-  state data in order to continuously update the result. Structured Streaming automatically checkpoints
-  the state data to fault-tolerant storage (for example, HDFS, AWS S3, Azure Blob storage) and restores it after restart.
-  However, this assumes that the schema of the state data remains same across restarts. This means that
-  *any changes (that is, additions, deletions, or schema modifications) to the stateful operations of a streaming query are not allowed between restarts*.
-  Here is the list of stateful operations whose schema should not be changed between restarts in order to ensure state recovery:
-
-  - *Streaming aggregation*: For example, `sdf.groupBy("a").agg(...)`. Any change in number or type of grouping keys or aggregates is not allowed.
-
-  - *Streaming deduplication*: For example, `sdf.dropDuplicates("a")`. Any change in number or type of deduplicating columns is not allowed.
-
-  - *Stream-stream join*: For example, `sdf1.join(sdf2, ...)` (i.e. both inputs are generated with `sparkSession.readStream`). Changes
-    in the schema or equi-joining columns are not allowed. Changes in join type (outer or inner) are not allowed. Other changes in the join condition are ill-defined.
-
-  - *Arbitrary stateful operation*: For example, `sdf.groupByKey(...).mapGroupsWithState(...)` or `sdf.groupByKey(...).flatMapGroupsWithState(...)`.
-    Any change to the schema of the user-defined state and the type of timeout is not allowed.
-    Any change within the user-defined state-mapping function are allowed, but the semantic effect of the change depends on the user-defined logic.
-    If you really want to support state schema changes, then you can explicitly encode/decode your complex state data
-    structures into bytes using an encoding/decoding scheme that supports schema migration. For example,
-    if you save your state as Avro-encoded bytes, then you are free to change the Avro-state-schema between query
-    restarts as the binary state will always be restored successfully.
-
-# Asynchronous Progress Tracking
-## What is it?
-
-Asynchronous progress tracking allows streaming queries to checkpoint progress asynchronously and in parallel to the actual data processing within a micro-batch, reducing latency associated with maintaining the offset log and commit log.
-
-![Async Progress Tracking](img/async-progress.png)
-
-## How does it work?
-
-Structured Streaming relies on persisting and managing offsets as progress indicators for query processing. Offset management operation directly impacts processing latency, because no data processing can occur until these operations are complete. Asynchronous progress tracking enables streaming queries to checkpoint progress without being impacted by these offset management operations.
-
-## How to use it?
-
-The code snippet below provides an example of how to use this feature:
-```scala
-val stream = spark.readStream
-      .format("kafka")
-      .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-      .option("subscribe", "in")
-      .load()
-val query = stream.writeStream
-     .format("kafka")
-	.option("topic", "out")
-     .option("checkpointLocation", "/tmp/checkpoint")
-	.option("asyncProgressTrackingEnabled", "true")
-     .start()
-```
-
-The table below describes the configurations for this feature and default values associated with them.
-
-| Option    | Value           | Default | Description       |
-|-------------|-----------------|------------|---------------------|
-|asyncProgressTrackingEnabled|true/false|false|enable or disable asynchronous progress tracking|
-|asyncProgressTrackingCheckpointIntervalMs|millisecond|1000|the interval in which we commit offsets and completion commits|
-
-## Limitations
-The initial version of the feature has the following limitations:
-
-* Asynchronous progress tracking is only supported in stateless queries using Kafka Sink
-* Exactly once end-to-end processing will not be supported with this asynchronous progress tracking because offset ranges for batch can be changed in case of failure. Though many sinks, such as Kafka sink, do not support writing exactly once anyways.
-
-## Switching the setting off
-Turning the async progress tracking off may cause the following exception to be thrown
-
-```scala
-java.lang.IllegalStateException: batch x doesn't exist
-```
-
-Also the following error message may be printed in the driver logs:
-
-```
-The offset log for batch x doesn't exist, which is required to restart the query from the latest batch x from the offset log. Please ensure there are two subsequent offset logs available for the latest batch via manually deleting the offset file(s). Please also ensure the latest batch for commit log is equal or one batch earlier than the latest batch for offset log.
-```
-
-This is caused by the fact that when async progress tracking is enabled, the framework will not checkpoint progress for every batch as would be done if async progress tracking is not used. To solve this problem simply re-enable “asyncProgressTrackingEnabled” and set “asyncProgressTrackingCheckpointIntervalMs” to 0 and run the streaming query until at least two micro-batches have been processed. Async progress tracking can be now safely disabled and restarting query should proceed normally.
-
-# Continuous Processing
-## [Experimental]
-{:.no_toc}
-
-**Continuous processing** is a new, experimental streaming execution mode introduced in Spark 2.3 that enables low (~1 ms) end-to-end latency with at-least-once fault-tolerance guarantees. Compare this with the default *micro-batch processing* engine which can achieve exactly-once guarantees but achieve latencies of ~100ms at best. For some types of queries (discussed below), you can choose which mode to execute them in without modifying the application logic (i.e. without changing the DataFrame/Dataset operations).
-
-To run a supported query in continuous processing mode, all you need to do is specify a **continuous trigger** with the desired checkpoint interval as a parameter. For example,
-
-<div class="codetabs">
-
-<div data-lang="python"  markdown="1">
-{% highlight python %}
-spark \
-  .readStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("subscribe", "topic1") \
-  .load() \
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)") \
-  .writeStream \
-  .format("kafka") \
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2") \
-  .option("topic", "topic1") \
-  .trigger(continuous="1 second") \     # only change in query
-  .start()
-
-{% endhighlight %}
-</div>
-
-<div data-lang="scala"  markdown="1">
-{% highlight scala %}
-import org.apache.spark.sql.streaming.Trigger
-
-spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load()
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .trigger(Trigger.Continuous("1 second"))  // only change in query
-  .start()
-{% endhighlight %}
-</div>
-
-<div data-lang="java"  markdown="1">
-{% highlight java %}
-import org.apache.spark.sql.streaming.Trigger;
-
-spark
-  .readStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("subscribe", "topic1")
-  .load()
-  .selectExpr("CAST(key AS STRING)", "CAST(value AS STRING)")
-  .writeStream
-  .format("kafka")
-  .option("kafka.bootstrap.servers", "host1:port1,host2:port2")
-  .option("topic", "topic1")
-  .trigger(Trigger.Continuous("1 second"))  // only change in query
-  .start();
-{% endhighlight %}
-</div>
-
-</div>
-
-A checkpoint interval of 1 second means that the continuous processing engine will record the progress of the query every second. The resulting checkpoints are in a format compatible with the micro-batch engine, hence any query can be restarted with any trigger. For example, a supported query started with the micro-batch mode can be restarted in continuous mode, and vice versa. Note that any time you switch to continuous mode, you will get at-least-once fault-tolerance guarantees.
-
-## Supported Queries
-{:.no_toc}
-
-As of Spark 2.4, only the following type of queries are supported in the continuous processing mode.
-
-- *Operations*: Only map-like Dataset/DataFrame operations are supported in continuous mode, that is, only projections (`select`, `map`, `flatMap`, `mapPartitions`, etc.) and selections (`where`, `filter`, etc.).
-  + All SQL functions are supported except aggregation functions (since aggregations are not yet supported), `current_timestamp()` and `current_date()` (deterministic computations using time is challenging).
-
-- *Sources*:
-  + Kafka source: All options are supported.
-  + Rate source: Good for testing. Only options that are supported in the continuous mode are `numPartitions` and `rowsPerSecond`.
-
-- *Sinks*:
-  + Kafka sink: All options are supported.
-  + Memory sink: Good for debugging.
-  + Console sink: Good for debugging. All options are supported. Note that the console will print every checkpoint interval that you have specified in the continuous trigger.
-
-See [Input Sources](#input-sources) and [Output Sinks](#output-sinks) sections for more details on them. While the console sink is good for testing, the end-to-end low-latency processing can be best observed with Kafka as the source and sink, as this allows the engine to process the data and make the results available in the output topic within milliseconds of the input data being available in the input topic.
-
-## Caveats
-{:.no_toc}
-
-- Continuous processing engine launches multiple long-running tasks that continuously read data from sources, process it and continuously write to sinks. The number of tasks required by the query depends on how many partitions the query can read from the sources in parallel. Therefore, before starting a continuous processing query, you must ensure there are enough cores in the cluster to all the tasks in parallel. For example, if you are reading from a Kafka topic that has 10 partitions, then the cluster must have at least 10 cores for the query to make progress.
-- Stopping a continuous processing stream may produce spurious task termination warnings. These can be safely ignored.
-- There are currently no automatic retries of failed tasks. Any failure will lead to the query being stopped and it needs to be manually restarted from the checkpoint.
-
-# Additional Information
-
-**Notes**
-
-- Several configurations are not modifiable after the query has run. To change them, discard the checkpoint and start a new query. These configurations include:
-  - `spark.sql.shuffle.partitions`
-    - This is due to the physical partitioning of state: state is partitioned via applying hash function to key, hence the number of partitions for state should be unchanged.
-    - If you want to run fewer tasks for stateful operations, `coalesce` would help with avoiding unnecessary repartitioning.
-      - After `coalesce`, the number of (reduced) tasks will be kept unless another shuffle happens.
-  - `spark.sql.streaming.stateStore.providerClass`: To read the previous state of the query properly, the class of state store provider should be unchanged.
-  - `spark.sql.streaming.multipleWatermarkPolicy`: Modification of this would lead inconsistent watermark value when query contains multiple watermarks, hence the policy should be unchanged.
-
-**Further Reading**
-
-- See and run the
-  [Python]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/python/sql/streaming)/[Scala]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/scala/org/apache/spark/examples/sql/streaming)/[Java]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/java/org/apache/spark/examples/sql/streaming)/[R]({{site.SPARK_GITHUB_URL}}/tree/v{{site.SPARK_VERSION_SHORT}}/examples/src/main/r/streaming)
-  examples.
-    - [Instructions](index.html#running-the-examples-and-shell) on how to run Spark examples
-- Read about integrating with Kafka in the [Structured Streaming Kafka Integration Guide](structured-streaming-kafka-integration.html)
-- Read more details about using DataFrames/Datasets in the [Spark SQL Programming Guide](sql-programming-guide.html)
-- Third-party Blog Posts
-    - [Real-time Streaming ETL with Structured Streaming in Apache Spark 2.1 (Databricks Blog)](https://databricks.com/blog/2017/01/19/real-time-streaming-etl-structured-streaming-apache-spark-2-1.html)
-    - [Real-Time End-to-End Integration with Apache Kafka in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/04/04/real-time-end-to-end-integration-with-apache-kafka-in-apache-sparks-structured-streaming.html)
-    - [Event-time Aggregation and Watermarking in Apache Spark’s Structured Streaming (Databricks Blog)](https://databricks.com/blog/2017/05/08/event-time-aggregation-watermarking-apache-sparks-structured-streaming.html)
-
-**Talks**
-
-- Spark Summit Europe 2017
-  - Easy, Scalable, Fault-tolerant Stream Processing with Structured Streaming in Apache Spark -
-    [Part 1 slides/video](https://databricks.com/session/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark), [Part 2 slides/video](https://databricks.com/session/easy-scalable-fault-tolerant-stream-processing-with-structured-streaming-in-apache-spark-continues)
-  - Deep Dive into Stateful Stream Processing in Structured Streaming - [slides/video](https://databricks.com/session/deep-dive-into-stateful-stream-processing-in-structured-streaming)
-- Spark Summit 2016
-  - A Deep Dive into Structured Streaming - [slides/video](https://spark-summit.org/2016/events/a-deep-dive-into-structured-streaming/)
-
-# Migration Guide
-
-The migration guide is now archived [on this page](ss-migration-guide.html).
+As of Spark 4.0.0, the Structured Streaming Programming Guide has been broken apart into smaller, more readable pages. You can find these pages [here](/streaming).
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTargetEncoderExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTargetEncoderExample.java
new file mode 100644
index 0000000000000..460f0d5a51e69
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTargetEncoderExample.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import org.apache.spark.sql.SparkSession;
+
+// $example on$
+import org.apache.spark.ml.feature.TargetEncoder;
+import org.apache.spark.ml.feature.TargetEncoderModel;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import java.util.Arrays;
+import java.util.List;
+// $example off$
+
+public class JavaTargetEncoderExample {
+  public static void main(String[] args) {
+    SparkSession spark = SparkSession
+      .builder()
+      .appName("JavaTargetEncoderExample")
+      .getOrCreate();
+
+    // Note: categorical features are usually first encoded with StringIndexer
+    // $example on$
+    List<Row> data = Arrays.asList(
+      RowFactory.create(0.0, 1.0, 0, 10.0),
+      RowFactory.create(1.0, 0.0, 1, 20.0),
+      RowFactory.create(2.0, 1.0, 0, 30.0),
+      RowFactory.create(0.0, 2.0, 1, 40.0),
+      RowFactory.create(0.0, 1.0, 0, 50.0),
+      RowFactory.create(2.0, 0.0, 1, 60.0)
+    );
+
+    StructType schema = new StructType(new StructField[]{
+      new StructField("categoryIndex1", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("categoryIndex2", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("binaryLabel", DataTypes.DoubleType, false, Metadata.empty()),
+      new StructField("continuousLabel", DataTypes.DoubleType, false, Metadata.empty())
+    });
+
+    Dataset<Row> df = spark.createDataFrame(data, schema);
+
+    // binary target
+    TargetEncoder bin_encoder = new TargetEncoder()
+      .setInputCols(new String[] {"categoryIndex1", "categoryIndex2"})
+      .setOutputCols(new String[] {"categoryIndex1Target", "categoryIndex2Target"})
+      .setLabelCol("binaryLabel")
+      .setTargetType("binary");
+
+    TargetEncoderModel bin_model = bin_encoder.fit(df);
+    Dataset<Row> bin_encoded = bin_model.transform(df);
+    bin_encoded.show();
+
+    // continuous target
+    TargetEncoder cont_encoder = new TargetEncoder()
+      .setInputCols(new String[] {"categoryIndex1", "categoryIndex2"})
+      .setOutputCols(new String[] {"categoryIndex1Target", "categoryIndex2Target"})
+      .setLabelCol("continuousLabel")
+      .setTargetType("continuous");
+
+    TargetEncoderModel cont_model = cont_encoder.fit(df);
+    Dataset<Row> cont_encoded = cont_model.transform(df);
+    cont_encoded.show();
+    // $example off$
+
+    spark.stop();
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
index 0c11c40cfe7ed..1052f47ea496e 100644
--- a/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
+++ b/examples/src/main/java/org/apache/spark/examples/streaming/JavaRecoverableNetworkWordCount.java
@@ -23,6 +23,7 @@
 import java.util.List;
 import java.util.regex.Pattern;
 
+import com.google.common.io.FileWriteMode;
 import scala.Tuple2;
 
 import com.google.common.io.Files;
@@ -152,7 +153,8 @@ private static JavaStreamingContext createContext(String ip,
       System.out.println(output);
       System.out.println("Dropped " + droppedWordsCounter.value() + " word(s) totally");
       System.out.println("Appending to " + outputFile.getAbsolutePath());
-      Files.append(output + "\n", outputFile, Charset.defaultCharset());
+      Files.asCharSink(outputFile, Charset.defaultCharset(), FileWriteMode.APPEND)
+        .write(output + "\n");
     });
 
     return ssc;
diff --git a/examples/src/main/python/ml/target_encoder_example.py b/examples/src/main/python/ml/target_encoder_example.py
new file mode 100644
index 0000000000000..f6c1010de71f3
--- /dev/null
+++ b/examples/src/main/python/ml/target_encoder_example.py
@@ -0,0 +1,65 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# $example on$
+from pyspark.ml.feature import TargetEncoder
+
+# $example off$
+from pyspark.sql import SparkSession
+
+if __name__ == "__main__":
+    spark = SparkSession.builder.appName("TargetEncoderExample").getOrCreate()
+
+    # Note: categorical features are usually first encoded with StringIndexer
+    # $example on$
+    df = spark.createDataFrame(
+        [
+            (0.0, 1.0, 0, 10.0),
+            (1.0, 0.0, 1, 20.0),
+            (2.0, 1.0, 0, 30.0),
+            (0.0, 2.0, 1, 40.0),
+            (0.0, 1.0, 0, 50.0),
+            (2.0, 0.0, 1, 60.0),
+        ],
+        ["categoryIndex1", "categoryIndex2", "binaryLabel", "continuousLabel"],
+    )
+
+    # binary target
+    encoder = TargetEncoder(
+        inputCols=["categoryIndex1", "categoryIndex2"],
+        outputCols=["categoryIndex1Target", "categoryIndex2Target"],
+        labelCol="binaryLabel",
+        targetType="binary"
+    )
+    model = encoder.fit(df)
+    encoded = model.transform(df)
+    encoded.show()
+
+    # continuous target
+    encoder = TargetEncoder(
+        inputCols=["categoryIndex1", "categoryIndex2"],
+        outputCols=["categoryIndex1Target", "categoryIndex2Target"],
+        labelCol="continuousLabel",
+        targetType="continuous"
+    )
+
+    model = encoder.fit(df)
+    encoded = model.transform(df)
+    encoded.show()
+    # $example off$
+
+    spark.stop()
diff --git a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
index f649c310b34ac..370c6fcd434c7 100644
--- a/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/LocalKMeans.scala
@@ -90,7 +90,7 @@ object LocalKMeans {
 
     println(s"Initial centers: $kPoints")
 
-    while(tempDist > convergeDist) {
+    while (tempDist > convergeDist) {
       val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
 
       val mappings = closest.groupBy[Int] (x => x._1)
diff --git a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
index a6e1de7f5f2a2..a0cd91a582dcd 100644
--- a/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/SparkKMeans.scala
@@ -79,7 +79,7 @@ object SparkKMeans {
     val kPoints = data.takeSample(withReplacement = false, K, 42)
     var tempDist = 1.0
 
-    while(tempDist > convergeDist) {
+    while (tempDist > convergeDist) {
       val closest = data.map (p => (closestPoint(p, kPoints), (p, 1)))
 
       val pointStats = closest.reduceByKey(mergeResults)
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TargetEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TargetEncoderExample.scala
new file mode 100644
index 0000000000000..a03f903c86d06
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TargetEncoderExample.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+// $example on$
+import org.apache.spark.ml.feature.TargetEncoder
+// $example off$
+import org.apache.spark.sql.SparkSession
+
+object TargetEncoderExample {
+  def main(args: Array[String]): Unit = {
+    val spark = SparkSession
+      .builder()
+      .appName("TargetEncoderExample")
+      .getOrCreate()
+
+    // Note: categorical features are usually first encoded with StringIndexer
+    // $example on$
+    val df = spark.createDataFrame(Seq(
+      (0.0, 1.0, 0, 10.0),
+      (1.0, 0.0, 1, 20.0),
+      (2.0, 1.0, 0, 30.0),
+      (0.0, 2.0, 1, 40.0),
+      (0.0, 1.0, 0, 50.0),
+      (2.0, 0.0, 1, 60.0)
+    )).toDF("categoryIndex1", "categoryIndex2",
+            "binaryLabel", "continuousLabel")
+
+    // binary target
+    val bin_encoder = new TargetEncoder()
+      .setInputCols(Array("categoryIndex1", "categoryIndex2"))
+      .setOutputCols(Array("categoryIndex1Target", "categoryIndex2Target"))
+      .setLabelCol("binaryLabel")
+      .setTargetType("binary");
+
+    val bin_model = bin_encoder.fit(df)
+    val bin_encoded = bin_model.transform(df)
+    bin_encoded.show()
+
+    // continuous target
+    val cont_encoder = new TargetEncoder()
+      .setInputCols(Array("categoryIndex1", "categoryIndex2"))
+      .setOutputCols(Array("categoryIndex1Target", "categoryIndex2Target"))
+      .setLabelCol("continuousLabel")
+      .setTargetType("continuous");
+
+    val cont_model = cont_encoder.fit(df)
+    val cont_encoded = cont_model.transform(df)
+    cont_encoded.show()
+    // $example off$
+
+    spark.stop()
+  }
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala b/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala
index ea794c700ae7e..8659e37c6a7fe 100644
--- a/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/mllib/Word2VecExample.scala
@@ -40,7 +40,7 @@ object Word2VecExample {
 
     val synonyms = model.findSynonyms("1", 5)
 
-    for((synonym, cosineSimilarity) <- synonyms) {
+    for ((synonym, cosineSimilarity) <- synonyms) {
       println(s"$synonym $cosineSimilarity")
     }
 
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
index c88a90768d7ff..59d0641b62cef 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/CustomReceiver.scala
@@ -87,7 +87,7 @@ class CustomReceiver(host: String, port: Int)
      val reader = new BufferedReader(
        new InputStreamReader(socket.getInputStream(), StandardCharsets.UTF_8))
      userInput = reader.readLine()
-     while(!isStopped() && userInput != null) {
+     while (!isStopped() && userInput != null) {
        store(userInput)
        userInput = reader.readLine()
      }
diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
index 98539d6494231..1ec6ee4abd327 100644
--- a/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/streaming/RecoverableNetworkWordCount.scala
@@ -21,7 +21,7 @@ package org.apache.spark.examples.streaming
 import java.io.File
 import java.nio.charset.Charset
 
-import com.google.common.io.Files
+import com.google.common.io.{Files, FileWriteMode}
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.broadcast.Broadcast
@@ -134,7 +134,8 @@ object RecoverableNetworkWordCount {
       println(output)
       println(s"Dropped ${droppedWordsCounter.value} word(s) totally")
       println(s"Appending to ${outputFile.getAbsolutePath}")
-      Files.append(output + "\n", outputFile, Charset.defaultCharset())
+      Files.asCharSink(outputFile, Charset.defaultCharset(), FileWriteMode.APPEND)
+        .write(output + "\n")
     }
     ssc
   }
diff --git a/examples/src/main/scripts/submit-pi.sh b/examples/src/main/scripts/submit-pi.sh
new file mode 100755
index 0000000000000..fa3f3f0d6ccfd
--- /dev/null
+++ b/examples/src/main/scripts/submit-pi.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Usage: Use Spark Cluster and 'pi.py' Python script
+#        In case of standalone clusters on K8s, we can use the local file.
+#        - local:///opt/spark/examples/src/main/python/pi.py
+#        Otherwise, use any cluster-accessible file via HTTP/HTTPS, S3, or HDFS
+#        - https://raw.githubusercontent.com/apache/spark/master/examples/src/main/python/pi.py
+#
+#    submit-pi.sh <spark_master_hostname> <location_of_python_script>
+#
+
+SPARK_MASTER=${1:-localhost}
+PYTHON_FILE=${2:-https://raw.githubusercontent.com/apache/spark/master/examples/src/main/python/pi.py}
+
+curl -XPOST http://$SPARK_MASTER:6066/v1/submissions/create \
+--data '{
+  "appResource": "",
+  "sparkProperties": {
+    "spark.submit.deployMode": "cluster",
+    "spark.driver.cores": "1",
+    "spark.driver.memory": "1g",
+    "spark.executor.cores": "1",
+    "spark.executor.memory": "1g",
+    "spark.cores.max": "2"
+  },
+  "clientSparkVersion": "",
+  "mainClass": "org.apache.spark.deploy.SparkSubmit",
+  "environmentVariables": {
+    "MASTER": "spark://'$SPARK_MASTER':7077"
+  },
+  "action": "CreateSubmissionRequest",
+  "appArgs": [ "'$PYTHON_FILE'", "2000" ]
+}'
diff --git a/examples/src/main/scripts/submit-sql.sh b/examples/src/main/scripts/submit-sql.sh
new file mode 100755
index 0000000000000..4b61106514a8a
--- /dev/null
+++ b/examples/src/main/scripts/submit-sql.sh
@@ -0,0 +1,39 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Usage: Run SQL statements
+#
+#    submit-sql.sh <spark_master_hostname>
+#
+
+SPARK_MASTER=${1:-localhost}
+
+curl -XPOST http://$SPARK_MASTER:6066/v1/submissions/create \
+--data '{
+  "appResource": "",
+  "sparkProperties": {
+    "spark.master": "local[2]",
+    "spark.driver.cores": "2",
+    "spark.driver.memory": "2g"
+  },
+  "clientSparkVersion": "",
+  "mainClass": "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver",
+  "action": "CreateSubmissionRequest",
+  "appArgs": [ "-e", "SHOW DATABASES; SELECT * FROM RANGE(10)" ]
+}'
diff --git a/launcher/pom.xml b/launcher/pom.xml
index c47244ff887a6..e8feb7b684555 100644
--- a/launcher/pom.xml
+++ b/launcher/pom.xml
@@ -57,6 +57,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.slf4j</groupId>
       <artifactId>jul-to-slf4j</artifactId>
diff --git a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
index 238ff88103ea5..7b692201dac11 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/AbstractCommandBuilder.java
@@ -60,6 +60,11 @@ abstract class AbstractCommandBuilder {
   // properties files multiple times.
   private Map<String, String> effectiveConfig;
 
+  /**
+   * Indicate if the current app submission has to use Spark Connect.
+   */
+  protected boolean isRemote = System.getenv().containsKey("SPARK_REMOTE");
+
   AbstractCommandBuilder() {
     this.appArgs = new ArrayList<>();
     this.childEnv = new HashMap<>();
@@ -140,6 +145,8 @@ List<String> buildClassPath(String appClassPath) throws IOException {
 
     boolean prependClasses = !isEmpty(getenv("SPARK_PREPEND_CLASSES"));
     boolean isTesting = "1".equals(getenv("SPARK_TESTING"));
+    boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING"));
+    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql);
     if (prependClasses || isTesting) {
       String scala = getScalaVersion();
       List<String> projects = Arrays.asList(
@@ -150,8 +157,8 @@ List<String> buildClassPath(String appClassPath) throws IOException {
         "common/sketch",
         "common/tags",
         "common/unsafe",
-        "connect/common",
-        "connect/server",
+        "sql/connect/common",
+        "sql/connect/server",
         "core",
         "examples",
         "graphx",
@@ -171,10 +178,36 @@ List<String> buildClassPath(String appClassPath) throws IOException {
             "NOTE: SPARK_PREPEND_CLASSES is set, placing locally compiled Spark classes ahead of " +
             "assembly.");
         }
+        boolean shouldPrePendSparkHive = isJarAvailable(jarsDir, "spark-hive_");
+        boolean shouldPrePendSparkHiveThriftServer =
+          shouldPrePendSparkHive && isJarAvailable(jarsDir, "spark-hive-thriftserver_");
         for (String project : projects) {
           // Do not use locally compiled class files for Spark server because it should use shaded
           // dependencies.
-          if (project.equals("connect/server") || project.equals("connect/common")) continue;
+          if (project.equals("sql/connect/server") || project.equals("sql/connect/common")) {
+            continue;
+          }
+          if (isRemote && "1".equals(getenv("SPARK_SCALA_SHELL")) && project.equals("sql/core")) {
+            continue;
+          }
+          // SPARK-49534: The assumption here is that if `spark-hive_xxx.jar` is not in the
+          // classpath, then the `-Phive` profile was not used during package, and therefore
+          // the Hive-related jars should also not be in the classpath. To avoid failure in
+          // loading the SPI in `DataSourceRegister` under `sql/hive`, no longer prepend `sql/hive`.
+          if (!shouldPrePendSparkHive && project.equals("sql/hive")) {
+            continue;
+          }
+          // SPARK-49534: Meanwhile, due to the strong dependency of `sql/hive-thriftserver`
+          // on `sql/hive`, the prepend for `sql/hive-thriftserver` will also be excluded
+          // if `spark-hive_xxx.jar` is not in the classpath. On the other hand, if
+          // `spark-hive-thriftserver_xxx.jar` is not in the classpath, then the
+          // `-Phive-thriftserver` profile was not used during package, and therefore,
+          // jars such as hive-cli and hive-beeline should also not be included in the classpath.
+          // To avoid the inelegant startup failures of tools such as spark-sql, in this scenario,
+          // `sql/hive-thriftserver` will no longer be prepended to the classpath.
+          if (!shouldPrePendSparkHiveThriftServer && project.equals("sql/hive-thriftserver")) {
+            continue;
+          }
           addToClassPath(cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project,
             scala));
         }
@@ -195,8 +228,6 @@ List<String> buildClassPath(String appClassPath) throws IOException {
     // Add Spark jars to the classpath. For the testing case, we rely on the test code to set and
     // propagate the test classpath appropriately. For normal invocation, look for the jars
     // directory under SPARK_HOME.
-    boolean isTestingSql = "1".equals(getenv("SPARK_SQL_TESTING"));
-    String jarsDir = findJarsDir(getSparkHome(), getScalaVersion(), !isTesting && !isTestingSql);
     if (jarsDir != null) {
       // Place slf4j-api-* jar first to be robust
       for (File f: new File(jarsDir).listFiles()) {
@@ -204,7 +235,28 @@ List<String> buildClassPath(String appClassPath) throws IOException {
           addToClassPath(cp, f.toString());
         }
       }
-      addToClassPath(cp, join(File.separator, jarsDir, "*"));
+      // If we're in 'spark.local.connect', it should create a Spark Classic Spark Context
+      // that launches Spark Connect server.
+      if (isRemote && System.getenv("SPARK_LOCAL_CONNECT") == null) {
+        for (File f: new File(jarsDir).listFiles()) {
+          // Exclude Spark Classic SQL and Spark Connect server jars
+          // if we're in Spark Connect Shell. Also exclude Spark SQL API and
+          // Spark Connect Common which Spark Connect client shades.
+          // Then, we add the Spark Connect shell and its dependencies in connect-repl
+          // See also SPARK-48936.
+          if (f.isDirectory() && f.getName().equals("connect-repl")) {
+            addToClassPath(cp, join(File.separator, f.toString(), "*"));
+          } else if (
+              !f.getName().startsWith("spark-sql_") &&
+              !f.getName().startsWith("spark-connect_") &&
+              !f.getName().startsWith("spark-sql-api_") &&
+              !f.getName().startsWith("spark-connect-common_")) {
+            addToClassPath(cp, f.toString());
+          }
+        }
+      } else {
+        addToClassPath(cp, join(File.separator, jarsDir, "*"));
+      }
     }
 
     addToClassPath(cp, getenv("HADOOP_CONF_DIR"));
@@ -234,6 +286,24 @@ private void addToClassPath(Set<String> cp, String entries) {
     }
   }
 
+  /**
+   * Checks if a JAR file with a specific prefix is available in the given directory.
+   *
+   * @param jarsDir       the directory to search for JAR files
+   * @param jarNamePrefix the prefix of the JAR file name to look for
+   * @return true if a JAR file with the specified prefix is found, false otherwise
+   */
+  private boolean isJarAvailable(String jarsDir, String jarNamePrefix) {
+    if (jarsDir != null) {
+      for (File f : new File(jarsDir).listFiles()) {
+        if (f.getName().startsWith(jarNamePrefix)) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
   String getScalaVersion() {
     String scala = getenv("SPARK_SCALA_VERSION");
     if (scala != null) {
diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
index eb57d43c853eb..7c4eab177c37f 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java
@@ -21,6 +21,9 @@
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.stream.Collectors;
 
 /**
  * Helper methods for command builders.
@@ -30,6 +33,11 @@ class CommandBuilderUtils {
   static final String DEFAULT_MEM = "1g";
   static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf";
   static final String ENV_SPARK_HOME = "SPARK_HOME";
+  // This should be consistent with org.apache.spark.internal.config.SECRET_REDACTION_PATTERN
+  // We maintain this copy to avoid depending on `core` module.
+  static final String SECRET_REDACTION_PATTERN = "(?i)secret|password|token|access[.]?key";
+  static final Pattern redactPattern = Pattern.compile(SECRET_REDACTION_PATTERN);
+  static final Pattern keyValuePattern = Pattern.compile("-D(.+?)=(.+)");
 
   /** Returns whether the given string is null or empty. */
   static boolean isEmpty(String s) {
@@ -326,4 +334,23 @@ static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfN
     return libdir.getAbsolutePath();
   }
 
+  /**
+   * Redact a command-line argument's value part which matches `-Dkey=value` pattern.
+   * Note that this should be consistent with `org.apache.spark.util.Utils.redactCommandLineArgs`.
+   */
+  static List<String> redactCommandLineArgs(List<String> args) {
+    return args.stream().map(CommandBuilderUtils::redact).collect(Collectors.toList());
+  }
+
+  /**
+   * Redact a command-line argument's value part which matches `-Dkey=value` pattern.
+   */
+  static String redact(String arg) {
+    Matcher m = keyValuePattern.matcher(arg);
+    if (m.find() && redactPattern.matcher(m.group(1)).find()) {
+      return String.format("-D%s=%s", m.group(1), "*********(redacted)");
+    } else {
+      return arg;
+    }
+  }
 }
diff --git a/launcher/src/main/java/org/apache/spark/launcher/Main.java b/launcher/src/main/java/org/apache/spark/launcher/Main.java
index 6501fc1764c25..321fca0912704 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/Main.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/Main.java
@@ -114,7 +114,7 @@ private static List<String> buildCommand(
       boolean printLaunchCommand) throws IOException, IllegalArgumentException {
     List<String> cmd = builder.buildCommand(env);
     if (printLaunchCommand) {
-      System.err.println("Spark Command: " + join(" ", cmd));
+      System.err.println("Spark Command: " + join(" ", redactCommandLineArgs(cmd)));
       System.err.println("========================================");
     }
     return cmd;
diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
index 9b53711ebaea3..291e7fd272ed4 100644
--- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
+++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitCommandBuilder.java
@@ -83,6 +83,7 @@ class SparkSubmitCommandBuilder extends AbstractCommandBuilder {
   private static final Map<String, String> specialClasses = new HashMap<>();
   static {
     specialClasses.put("org.apache.spark.repl.Main", "spark-shell");
+    specialClasses.put("org.apache.spark.sql.application.ConnectRepl", "connect-shell");
     specialClasses.put("org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver",
       SparkLauncher.NO_RESOURCE);
     specialClasses.put("org.apache.spark.sql.hive.thriftserver.HiveThriftServer2",
@@ -245,13 +246,21 @@ List<String> buildSparkSubmitArgs() {
 
     if (mainClass != null) {
       args.add(parser.CLASS);
-      args.add(mainClass);
+      if (isRemote && "1".equals(getenv("SPARK_SCALA_SHELL"))) {
+        args.add("org.apache.spark.sql.application.ConnectRepl");
+      } else {
+        args.add(mainClass);
+      }
     }
 
     args.addAll(parsedArgs);
 
     if (appResource != null) {
-      args.add(appResource);
+      if (isRemote && "1".equals(getenv("SPARK_SCALA_SHELL"))) {
+        args.add("connect-shell");
+      } else {
+        args.add(appResource);
+      }
     }
 
     args.addAll(appArgs);
@@ -498,7 +507,10 @@ private class OptionParser extends SparkSubmitOptionParser {
     protected boolean handle(String opt, String value) {
       switch (opt) {
         case MASTER -> master = value;
-        case REMOTE -> remote = value;
+        case REMOTE -> {
+          isRemote = true;
+          remote = value;
+        }
         case DEPLOY_MODE -> deployMode = value;
         case PROPERTIES_FILE -> propertiesFile = value;
         case DRIVER_MEMORY -> conf.put(SparkLauncher.DRIVER_MEMORY, value);
@@ -511,6 +523,9 @@ protected boolean handle(String opt, String value) {
           checkArgument(value != null, "Missing argument to %s", CONF);
           String[] setConf = value.split("=", 2);
           checkArgument(setConf.length == 2, "Invalid argument to %s: %s", CONF, value);
+          if (setConf[0].equals("spark.remote")) {
+            isRemote = true;
+          }
           conf.put(setConf[0], setConf[1]);
         }
         case CLASS -> {
diff --git a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
index 1ead183185860..740d7c57f7a12 100644
--- a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
+++ b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java
@@ -68,6 +68,16 @@ public void testInvalidOptionStrings() {
     testInvalidOpt("'abcde");
   }
 
+  @Test
+  public void testRedactCommandLineArgs() {
+    assertEquals(redact("secret"), "secret");
+    assertEquals(redact("-Dk=v"), "-Dk=v");
+    assertEquals(redact("-Dk=secret"), "-Dk=secret");
+    assertEquals(redact("-DsecretKey=my-secret"), "-DsecretKey=*********(redacted)");
+    assertEquals(redactCommandLineArgs(Arrays.asList("-DsecretKey=my-secret")),
+      Arrays.asList("-DsecretKey=*********(redacted)"));
+  }
+
   @Test
   public void testWindowsBatchQuoting() {
     assertEquals("abc", quoteForBatchScript("abc"));
diff --git a/licenses-binary/LICENSE-txw2.txt b/licenses-binary/LICENSE-txw2.txt
deleted file mode 100644
index da1c1cea70215..0000000000000
--- a/licenses-binary/LICENSE-txw2.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2018 Oracle and/or its affiliates. All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
-  - Redistributions of source code must retain the above copyright
-    notice, this list of conditions and the following disclaimer.
-
-  - Redistributions in binary form must reproduce the above copyright
-    notice, this list of conditions and the following disclaimer in the
-    documentation and/or other materials provided with the distribution.
-
-  - Neither the name of the Eclipse Foundation, Inc. nor the names of its
-    contributors may be used to endorse or promote products derived
-    from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/licenses-binary/LICENSE-xz.txt b/licenses-binary/LICENSE-xz.txt
new file mode 100644
index 0000000000000..4322122aecf1a
--- /dev/null
+++ b/licenses-binary/LICENSE-xz.txt
@@ -0,0 +1,11 @@
+Permission to use, copy, modify, and/or distribute this
+software for any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/licenses/LICENSE-xz.txt b/licenses/LICENSE-xz.txt
new file mode 100644
index 0000000000000..4322122aecf1a
--- /dev/null
+++ b/licenses/LICENSE-xz.txt
@@ -0,0 +1,11 @@
+Permission to use, copy, modify, and/or distribute this
+software for any purpose with or without fee is hereby granted.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR
+CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
+LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
index b599123685236..97f88e2fe2de4 100644
--- a/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-jdk21-results.txt
@@ -2,337 +2,337 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 166            169           2        603.4           1.7       1.0X
-java                                                155            157           1        646.0           1.5       1.1X
-native                                              165            170           1        605.0           1.7       1.0X
+f2j                                                 143            149           3        700.3           1.4       1.0X
+java                                                126            146           8        791.3           1.3       1.1X
+native                                              142            149           4        705.8           1.4       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  90             92           1       1110.4           0.9       1.0X
-java                                                 78             80           1       1277.7           0.8       1.2X
-native                                               91             93           1       1098.6           0.9       1.0X
+f2j                                                  75             82           6       1339.1           0.7       1.0X
+java                                                 68             71           2       1471.4           0.7       1.1X
+native                                               76             84           6       1321.9           0.8       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 163            168           3        614.6           1.6       1.0X
-java                                                154            162           4        649.8           1.5       1.1X
-native                                              158            162           2        632.7           1.6       1.0X
+f2j                                                 139            145           3        718.2           1.4       1.0X
+java                                                127            143           9        786.7           1.3       1.1X
+native                                              126            145           8        792.5           1.3       1.1X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  78             86           6       1275.7           0.8       1.0X
-java                                                 72             81           6       1391.8           0.7       1.1X
-native                                               77             86           6       1291.6           0.8       1.0X
+f2j                                                  73             80           4       1370.0           0.7       1.0X
+java                                                 69             72           2       1450.9           0.7       1.1X
+native                                               73             80           4       1374.3           0.7       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1052.3           1.0       1.0X
-java                                                 51             54           1       1954.2           0.5       1.9X
-native                                               95             95           0       1055.1           0.9       1.0X
+f2j                                                  95             95           0       1052.1           1.0       1.0X
+java                                                 45             47           1       2236.4           0.4       2.1X
+native                                               95             95           0       1053.4           0.9       1.0X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1074.3           0.9       1.0X
-java                                                 26             27           0       3891.3           0.3       3.6X
-native                                               93             93           0       1075.0           0.9       1.0X
+f2j                                                  93             93           0       1074.4           0.9       1.0X
+java                                                 23             23           1       4444.2           0.2       4.1X
+native                                               93             93           0       1075.1           0.9       1.0X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 142            143           2        702.7           1.4       1.0X
-java                                                 36             37           0       2791.0           0.4       4.0X
-native                                               94             95           1       1059.9           0.9       1.5X
+f2j                                                 142            143           0        701.9           1.4       1.0X
+java                                                 33             33           0       3066.4           0.3       4.4X
+native                                               94             95           1       1060.7           0.9       1.5X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 123            124           1        813.3           1.2       1.0X
-java                                                 18             18           0       5598.9           0.2       6.9X
-native                                               93             93           1       1074.8           0.9       1.3X
+f2j                                                 113            114           1        885.5           1.1       1.0X
+java                                                 16             16           0       6158.1           0.2       7.0X
+native                                               93             93           0       1073.9           0.9       1.2X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 153            157           2        655.5           1.5       1.0X
-java                                                135            139           2        740.6           1.4       1.1X
-native                                              150            153           5        667.4           1.5       1.0X
+f2j                                                 132            137           2        755.3           1.3       1.0X
+java                                                120            125           2        830.5           1.2       1.1X
+native                                              128            133           3        779.8           1.3       1.0X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  83             91           7       1204.0           0.8       1.0X
-java                                                 63             70           4       1593.5           0.6       1.3X
-native                                               77             84           7       1304.7           0.8       1.1X
+f2j                                                  78             86           7       1288.3           0.8       1.0X
+java                                                 57             63           4       1756.3           0.6       1.4X
+native                                               69             77           7       1445.6           0.7       1.1X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  97             97           1       1031.7           1.0       1.0X
-java                                                 25             26           0       3958.2           0.3       3.8X
-native                                               55             57           1       1814.1           0.6       1.8X
+f2j                                                  67             68           1       1491.1           0.7       1.0X
+java                                                 22             23           1       4447.1           0.2       3.0X
+native                                               44             46           1       2264.6           0.4       1.5X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             94           0       1070.0           0.9       1.0X
-java                                                 25             26           0       3995.9           0.3       3.7X
-native                                               94             94           0       1068.7           0.9       1.0X
+f2j                                                  93             93           0       1072.3           0.9       1.0X
+java                                                 23             23           1       4439.7           0.2       4.1X
+native                                               93             93           0       1073.1           0.9       1.0X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             96           0       1049.7           1.0       1.0X
-java                                                 13             14           0       7739.8           0.1       7.4X
-native                                               36             38           1       2745.4           0.4       2.6X
+f2j                                                  95             95           0       1053.6           0.9       1.0X
+java                                                 11             11           0       9024.3           0.1       8.6X
+native                                               34             35           1       2939.9           0.3       2.8X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1078.6           0.9       1.0X
-java                                                 13             14           0       7912.9           0.1       7.3X
-native                                               93             93           0       1079.6           0.9       1.0X
+f2j                                                  93             93           0       1078.0           0.9       1.0X
+java                                                 11             12           0       8986.9           0.1       8.3X
+native                                               93             93           0       1079.0           0.9       1.0X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 158            161           1        633.4           1.6       1.0X
-java                                                131            134           2        763.1           1.3       1.2X
-native                                              158            162           2        634.7           1.6       1.0X
+f2j                                                 137            141           2        731.0           1.4       1.0X
+java                                                120            123           2        836.1           1.2       1.1X
+native                                              134            139           3        743.8           1.3       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  89             90           0        560.1           1.8       1.0X
-java                                                 13             14           0       3730.5           0.3       6.7X
-native                                               47             47           0       1063.5           0.9       1.9X
+f2j                                                  93             93           0        538.1           1.9       1.0X
+java                                                 11             12           0       4370.3           0.2       8.1X
+native                                               47             47           0       1066.7           0.9       2.0X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93            100           8        537.7           1.9       1.0X
-java                                                 92            100           8        541.3           1.8       1.0X
-native                                               73             80           8        683.4           1.5       1.3X
+f2j                                                  87             95           7        574.0           1.7       1.0X
+java                                                 87             95           7        575.3           1.7       1.0X
+native                                               62             70           6        812.0           1.2       1.4X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 148            151           1        337.0           3.0       1.0X
-java                                                149            151           1        336.2           3.0       1.0X
-native                                              128            132           1        389.5           2.6       1.2X
+f2j                                                 142            145           2        353.3           2.8       1.0X
+java                                                142            145           2        352.9           2.8       1.0X
+native                                              117            124           3        427.3           2.3       1.2X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 668            669           3       1497.4           0.7       1.0X
-java                                                 63             69           4      15802.2           0.1      10.6X
-native                                              631            633           3       1584.1           0.6       1.1X
+f2j                                                 661            662           1       1512.5           0.7       1.0X
+java                                                 63             68           4      15787.8           0.1      10.4X
+native                                              631            633           2       1583.8           0.6       1.0X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 661            663           1       1512.6           0.7       1.0X
-java                                                 64             68           4      15730.6           0.1      10.4X
-native                                              374            376           1       2672.3           0.4       1.8X
+f2j                                                 669            670           2       1495.3           0.7       1.0X
+java                                                 64             70           3      15673.5           0.1      10.5X
+native                                              375            377           5       2665.8           0.4       1.8X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 899            901           1       1111.9           0.9       1.0X
-java                                                 63             68           4      15890.7           0.1      14.3X
-native                                              902            903           1       1108.7           0.9       1.0X
+f2j                                                 900            901           1       1111.3           0.9       1.0X
+java                                                 63             68           4      15822.8           0.1      14.2X
+native                                              903            904           1       1107.9           0.9       1.0X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 940            943           4       1063.9           0.9       1.0X
-java                                                 63             68           5      15828.7           0.1      14.9X
-native                                              914            916           1       1094.3           0.9       1.0X
+f2j                                                 941            943           3       1062.6           0.9       1.0X
+java                                                 63             69           5      15771.6           0.1      14.8X
+native                                              915            916           1       1092.7           0.9       1.0X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 649            650           1       1541.4           0.6       1.0X
-java                                                 40             41           1      25057.3           0.0      16.3X
-native                                              371            372           1       2696.7           0.4       1.7X
+f2j                                                 650            651           1       1537.8           0.7       1.0X
+java                                                 40             41           1      24986.7           0.0      16.2X
+native                                              372            372           1       2691.6           0.4       1.8X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 650            651           1       1538.5           0.6       1.0X
-java                                                 40             41           1      24717.0           0.0      16.1X
-native                                              371            372           1       2692.9           0.4       1.8X
+f2j                                                 651            652           1       1536.7           0.7       1.0X
+java                                                 41             41           1      24643.9           0.0      16.0X
+native                                              372            373           1       2688.8           0.4       1.7X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 900            901           0       1111.2           0.9       1.0X
-java                                                 40             42           1      25076.9           0.0      22.6X
-native                                              917            920           2       1090.1           0.9       1.0X
+f2j                                                 901            902           0       1109.8           0.9       1.0X
+java                                                 40             41           1      25107.2           0.0      22.6X
+native                                              918            919           1       1089.3           0.9       1.0X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 940            943           1       1063.6           0.9       1.0X
-java                                                 40             42           1      24825.6           0.0      23.3X
-native                                              914            916           1       1094.0           0.9       1.0X
+f2j                                                 942            944           2       1061.1           0.9       1.0X
+java                                                 40             41           1      24888.3           0.0      23.5X
+native                                              914            915           0       1093.7           0.9       1.0X
 
 
diff --git a/mllib-local/benchmarks/BLASBenchmark-results.txt b/mllib-local/benchmarks/BLASBenchmark-results.txt
index 8fde701d5b2b9..db92355b7a3c0 100644
--- a/mllib-local/benchmarks/BLASBenchmark-results.txt
+++ b/mllib-local/benchmarks/BLASBenchmark-results.txt
@@ -2,337 +2,337 @@
 daxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 daxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 152            157           2        656.7           1.5       1.0X
-java                                                146            152           3        686.0           1.5       1.0X
-native                                              154            160           2        647.8           1.5       1.0X
+f2j                                                 150            158           4        667.3           1.5       1.0X
+java                                                142            147           3        703.2           1.4       1.1X
+native                                              150            158           4        668.3           1.5       1.0X
 
 
 ================================================================================================
 saxpy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 saxpy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  86             88           1       1167.6           0.9       1.0X
-java                                                 73             76           1       1367.1           0.7       1.2X
-native                                               87             89           1       1150.2           0.9       1.0X
+f2j                                                  85             89           2       1173.5           0.9       1.0X
+java                                                 71             74           2       1409.0           0.7       1.2X
+native                                               86             89           2       1158.6           0.9       1.0X
 
 
 ================================================================================================
 dcopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dcopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 149            156           3        673.1           1.5       1.0X
-java                                                148            154           3        676.5           1.5       1.0X
-native                                              149            154           2        668.9           1.5       1.0X
+f2j                                                 143            151           4        698.9           1.4       1.0X
+java                                                142            150           4        705.2           1.4       1.0X
+native                                              143            148           3        697.2           1.4       1.0X
 
 
 ================================================================================================
 scopy
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scopy:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  83             86           1       1199.9           0.8       1.0X
-java                                                 75             78           2       1337.1           0.7       1.1X
-native                                               81             83           1       1240.3           0.8       1.0X
+f2j                                                  82             85           2       1215.8           0.8       1.0X
+java                                                 72             75           2       1398.0           0.7       1.1X
+native                                               80             83           2       1250.7           0.8       1.0X
 
 
 ================================================================================================
 ddot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 ddot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             96           0       1048.1           1.0       1.0X
-java                                                 45             48           2       2208.4           0.5       2.1X
-native                                               95             96           1       1053.9           0.9       1.0X
+f2j                                                  95             95           0       1051.8           1.0       1.0X
+java                                                 44             46           2       2279.3           0.4       2.2X
+native                                               95             95           0       1057.0           0.9       1.0X
 
 
 ================================================================================================
 sdot
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sdot:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1073.1           0.9       1.0X
-java                                                 22             23           1       4467.0           0.2       4.2X
-native                                               93             93           0       1075.0           0.9       1.0X
+f2j                                                  93             93           0       1074.0           0.9       1.0X
+java                                                 21             22           1       4768.4           0.2       4.4X
+native                                               93             93           1       1075.7           0.9       1.0X
 
 
 ================================================================================================
 dnrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dnrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 143            143           1        699.6           1.4       1.0X
-java                                                 32             33           0       3090.1           0.3       4.4X
-native                                               94             95           1       1059.3           0.9       1.5X
+f2j                                                 143            143           0        699.8           1.4       1.0X
+java                                                 32             33           1       3105.2           0.3       4.4X
+native                                               94             95           1       1061.0           0.9       1.5X
 
 
 ================================================================================================
 snrm2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 snrm2:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 116            116           1        862.5           1.2       1.0X
-java                                                 16             16           0       6189.1           0.2       7.2X
-native                                               93             93           0       1074.4           0.9       1.2X
+f2j                                                 120            120           1        834.9           1.2       1.0X
+java                                                 16             16           0       6220.1           0.2       7.5X
+native                                               93             93           2       1074.9           0.9       1.3X
 
 
 ================================================================================================
 dscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 144            148           3        696.6           1.4       1.0X
-java                                                132            136           2        757.4           1.3       1.1X
-native                                              139            145           2        718.7           1.4       1.0X
+f2j                                                 142            147           2        704.2           1.4       1.0X
+java                                                130            134           2        772.1           1.3       1.1X
+native                                              135            142           3        740.7           1.4       1.1X
 
 
 ================================================================================================
 sscal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sscal:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  88             90           1       1130.9           0.9       1.0X
-java                                                 66             69           1       1506.1           0.7       1.3X
-native                                               80             83           1       1244.9           0.8       1.1X
+f2j                                                  80             82           1       1253.8           0.8       1.0X
+java                                                 64             68           1       1554.0           0.6       1.2X
+native                                               80             83           2       1256.6           0.8       1.0X
 
 
 ================================================================================================
 dgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  96             97           0       1038.0           1.0       1.0X
-java                                                 23             24           0       4285.5           0.2       4.1X
-native                                               46             48           1       2175.7           0.5       2.1X
+f2j                                                  96             96           0       1043.0           1.0       1.0X
+java                                                 22             23           1       4563.6           0.2       4.4X
+native                                               45             47           1       2229.3           0.4       2.1X
 
 
 ================================================================================================
 dgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  94             95           1       1061.5           0.9       1.0X
-java                                                 23             24           0       4279.5           0.2       4.0X
-native                                               93             94           0       1072.6           0.9       1.0X
+f2j                                                  94             94           0       1065.7           0.9       1.0X
+java                                                 22             24           1       4467.6           0.2       4.2X
+native                                               93             93           0       1073.1           0.9       1.0X
 
 
 ================================================================================================
 sgemv[N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[N]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  95             95           0       1053.1           0.9       1.0X
-java                                                 12             13           0       8517.5           0.1       8.1X
-native                                               34             36           1       2909.1           0.3       2.8X
+f2j                                                  95             96           0       1050.3           1.0       1.0X
+java                                                 11             12           0       8901.1           0.1       8.5X
+native                                               34             35           1       2956.0           0.3       2.8X
 
 
 ================================================================================================
 sgemv[T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemv[T]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  93             93           0       1077.2           0.9       1.0X
-java                                                 12             12           0       8423.1           0.1       7.8X
-native                                               93             93           0       1078.4           0.9       1.0X
+f2j                                                  93             93           0       1077.7           0.9       1.0X
+java                                                 11             12           0       8874.5           0.1       8.2X
+native                                               93             93           0       1079.6           0.9       1.0X
 
 
 ================================================================================================
 dger
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dger:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 149            154           2        672.9           1.5       1.0X
-java                                                130            134           2        768.8           1.3       1.1X
-native                                              146            150           2        686.0           1.5       1.0X
+f2j                                                 150            154           2        666.9           1.5       1.0X
+java                                                125            130           2        801.5           1.2       1.2X
+native                                              143            149           3        698.6           1.4       1.0X
 
 
 ================================================================================================
 dspmv[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dspmv[U]:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 100            100           0        500.5           2.0       1.0X
-java                                                 12             13           0       4225.7           0.2       8.4X
-native                                               47             47           0       1066.5           0.9       2.1X
+f2j                                                  99            100           0        503.9           2.0       1.0X
+java                                                 11             12           0       4411.4           0.2       8.8X
+native                                               47             47           0       1067.1           0.9       2.1X
 
 
 ================================================================================================
 dspr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dspr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                  97             99           1        515.9           1.9       1.0X
-java                                                 97             99           1        517.1           1.9       1.0X
-native                                               74             78           1        677.5           1.5       1.3X
+f2j                                                  96             99           2        520.9           1.9       1.0X
+java                                                 97             98           1        517.4           1.9       1.0X
+native                                               73             77           2        681.8           1.5       1.3X
 
 
 ================================================================================================
 dsyr[U]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dsyr[U]:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 145            149           2        345.3           2.9       1.0X
-java                                                146            150           2        342.4           2.9       1.0X
-native                                              122            128           2        409.9           2.4       1.2X
+f2j                                                 144            149           2        347.0           2.9       1.0X
+java                                                144            148           2        346.2           2.9       1.0X
+native                                              121            126           2        413.6           2.4       1.2X
 
 
 ================================================================================================
 dgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 670            673           4       1491.8           0.7       1.0X
-java                                                 65             72           1      15466.7           0.1      10.4X
-native                                              632            634           3       1583.1           0.6       1.1X
+f2j                                                 665            666           2       1503.6           0.7       1.0X
+java                                                 65             71           3      15448.3           0.1      10.3X
+native                                              630            632           2       1586.6           0.6       1.1X
 
 
 ================================================================================================
 dgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 673            675           2       1486.3           0.7       1.0X
-java                                                 72             73           2      13912.8           0.1       9.4X
-native                                              376            377           1       2662.5           0.4       1.8X
+f2j                                                 672            674           2       1487.3           0.7       1.0X
+java                                                 65             71           3      15366.3           0.1      10.3X
+native                                              376            377           1       2661.0           0.4       1.8X
 
 
 ================================================================================================
 dgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 923            925           1       1082.9           0.9       1.0X
-java                                                 64             72           1      15595.7           0.1      14.4X
-native                                              902            904           1       1108.7           0.9       1.0X
+f2j                                                 921            921           1       1086.1           0.9       1.0X
+java                                                 64             70           3      15574.7           0.1      14.3X
+native                                              901            902           1       1109.5           0.9       1.0X
 
 
 ================================================================================================
 dgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 953            957           2       1049.0           1.0       1.0X
-java                                                 65             73           1      15430.1           0.1      14.7X
-native                                              915            917           1       1092.5           0.9       1.0X
+f2j                                                 950            952           5       1052.4           1.0       1.0X
+java                                                 71             72           1      14034.5           0.1      13.3X
+native                                              914            914           1       1094.4           0.9       1.0X
 
 
 ================================================================================================
 sgemm[N,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 649            651           2       1540.5           0.6       1.0X
-java                                                 41             42           1      24371.9           0.0      15.8X
-native                                              371            373           5       2695.0           0.4       1.7X
+f2j                                                 648            649           2       1543.9           0.6       1.0X
+java                                                 41             42           1      24403.6           0.0      15.8X
+native                                              371            371           1       2699.0           0.4       1.7X
 
 
 ================================================================================================
 sgemm[N,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[N,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 651            653           3       1535.9           0.7       1.0X
-java                                                 41             42           1      24106.8           0.0      15.7X
-native                                              372            373           1       2688.0           0.4       1.8X
+f2j                                                 652            653           2       1533.0           0.7       1.0X
+java                                                 42             43           1      24056.0           0.0      15.7X
+native                                              371            372           5       2697.5           0.4       1.8X
 
 
 ================================================================================================
 sgemm[T,N]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,N]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 903            904           1       1107.6           0.9       1.0X
-java                                                 40             41           1      24712.8           0.0      22.3X
-native                                              919            921           1       1088.1           0.9       1.0X
+f2j                                                 900            901           2       1111.2           0.9       1.0X
+java                                                 40             41           1      24704.4           0.0      22.2X
+native                                              917            919           2       1090.4           0.9       1.0X
 
 
 ================================================================================================
 sgemm[T,T]
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sgemm[T,T]:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-f2j                                                 950            953           5       1052.9           0.9       1.0X
-java                                                 41             42           1      24365.2           0.0      23.1X
-native                                              915            916           1       1093.0           0.9       1.0X
+f2j                                                 948            950           1       1054.4           0.9       1.0X
+java                                                 41             42           1      24366.6           0.0      23.1X
+native                                              913            915           1       1094.8           0.9       1.0X
 
 
diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml
index ecfe45f046f2b..3b35a481adb1b 100644
--- a/mllib-local/pom.xml
+++ b/mllib-local/pom.xml
@@ -52,6 +52,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
index af675fe609fe2..00de1f2d104f9 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                            95             97           1          0.0       94974.9       1.0X
-deserialize                                          67             69           1          0.0       66631.2       1.4X
+serialize                                            92            103           6          0.0       92038.9       1.0X
+deserialize                                          69             74           3          0.0       69046.7       1.3X
 
 
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
index d22630df30dd8..05004e6f74f3c 100644
--- a/mllib/benchmarks/UDTSerializationBenchmark-results.txt
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -2,11 +2,11 @@
 VectorUDT de/serialization
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 VectorUDT de/serialization:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-serialize                                            90             99           3          0.0       89569.0       1.0X
-deserialize                                          68             72           3          0.0       68026.4       1.3X
+serialize                                            92            109           9          0.0       91694.5       1.0X
+deserialize                                          69             71           1          0.0       69297.4       1.3X
 
 
diff --git a/mllib/pom.xml b/mllib/pom.xml
index 4f983a325a0c1..c342519ca428a 100644
--- a/mllib/pom.xml
+++ b/mllib/pom.xml
@@ -117,6 +117,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-streaming_${scala.binary.version}</artifactId>
diff --git a/connector/docker/spark-test/master/Dockerfile b/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
similarity index 90%
rename from connector/docker/spark-test/master/Dockerfile
rename to mllib/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
index f729534ab63ed..2d51a203bbe5d 100644
--- a/connector/docker/spark-test/master/Dockerfile
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.sql.SparkSessionExtensionsProvider
@@ -1,4 +1,3 @@
-# Spark Master
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +15,4 @@
 # limitations under the License.
 #
 
-FROM spark-test-base
-ADD default_cmd /root/
-CMD ["/root/default_cmd"]
+org.apache.spark.sql.ml.InternalFunctionRegistration
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index 42106372a203d..807648545fc60 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -32,7 +32,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.ml.param.{Param, ParamMap, Params}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.sql.{DataFrame, Dataset}
+import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
 
@@ -204,7 +204,7 @@ object Pipeline extends MLReadable[Pipeline] {
     override def save(path: String): Unit =
       instrumented(_.withSaveInstanceEvent(this, path)(super.save(path)))
     override protected def saveImpl(path: String): Unit =
-      SharedReadWrite.saveImpl(instance, instance.getStages, sc, path)
+      SharedReadWrite.saveImpl(instance, instance.getStages, sparkSession, path)
   }
 
   private class PipelineReader extends MLReader[Pipeline] {
@@ -213,7 +213,8 @@ object Pipeline extends MLReadable[Pipeline] {
     private val className = classOf[Pipeline].getName
 
     override def load(path: String): Pipeline = instrumented(_.withLoadInstanceEvent(this, path) {
-      val (uid: String, stages: Array[PipelineStage]) = SharedReadWrite.load(className, sc, path)
+      val (uid: String, stages: Array[PipelineStage]) =
+        SharedReadWrite.load(className, sparkSession, path)
       new Pipeline(uid).setStages(stages)
     })
   }
@@ -241,14 +242,26 @@ object Pipeline extends MLReadable[Pipeline] {
      *  - save metadata to path/metadata
      *  - save stages to stages/IDX_UID
      */
+    @deprecated("use saveImpl with SparkSession", "4.0.0")
     def saveImpl(
         instance: Params,
         stages: Array[PipelineStage],
         sc: SparkContext,
+        path: String): Unit =
+      saveImpl(
+        instance,
+        stages,
+        SparkSession.builder().sparkContext(sc).getOrCreate(),
+        path)
+
+    def saveImpl(
+        instance: Params,
+        stages: Array[PipelineStage],
+        spark: SparkSession,
         path: String): Unit = instrumented { instr =>
       val stageUids = stages.map(_.uid)
       val jsonParams = List("stageUids" -> parse(compact(render(stageUids.toImmutableArraySeq))))
-      DefaultParamsWriter.saveMetadata(instance, path, sc, paramMap = Some(jsonParams))
+      DefaultParamsWriter.saveMetadata(instance, path, spark, None, Some(jsonParams))
 
       // Save stages
       val stagesDir = new Path(path, "stages").toString
@@ -263,18 +276,28 @@ object Pipeline extends MLReadable[Pipeline] {
      * Load metadata and stages for a [[Pipeline]] or [[PipelineModel]]
      * @return  (UID, list of stages)
      */
+    @deprecated("use load with SparkSession", "4.0.0")
     def load(
         expectedClassName: String,
         sc: SparkContext,
+        path: String): (String, Array[PipelineStage]) =
+      load(
+        expectedClassName,
+        SparkSession.builder().sparkContext(sc).getOrCreate(),
+        path)
+
+    def load(
+        expectedClassName: String,
+        spark: SparkSession,
         path: String): (String, Array[PipelineStage]) = instrumented { instr =>
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
+      val metadata = DefaultParamsReader.loadMetadata(path, spark, expectedClassName)
 
       implicit val format = DefaultFormats
       val stagesDir = new Path(path, "stages").toString
       val stageUids: Array[String] = (metadata.params \ "stageUids").extract[Seq[String]].toArray
       val stages: Array[PipelineStage] = stageUids.zipWithIndex.map { case (stageUid, idx) =>
         val stagePath = SharedReadWrite.getStagePath(stageUid, idx, stageUids.length, stagesDir)
-        val reader = DefaultParamsReader.loadParamsInstanceReader[PipelineStage](stagePath, sc)
+        val reader = DefaultParamsReader.loadParamsInstanceReader[PipelineStage](stagePath, spark)
         instr.withLoadInstanceEvent(reader, stagePath)(reader.load(stagePath))
       }
       (metadata.uid, stages)
@@ -344,7 +367,7 @@ object PipelineModel extends MLReadable[PipelineModel] {
     override def save(path: String): Unit =
       instrumented(_.withSaveInstanceEvent(this, path)(super.save(path)))
     override protected def saveImpl(path: String): Unit = SharedReadWrite.saveImpl(instance,
-      instance.stages.asInstanceOf[Array[PipelineStage]], sc, path)
+      instance.stages.asInstanceOf[Array[PipelineStage]], sparkSession, path)
   }
 
   private class PipelineModelReader extends MLReader[PipelineModel] {
@@ -354,7 +377,8 @@ object PipelineModel extends MLReadable[PipelineModel] {
 
     override def load(path: String): PipelineModel = instrumented(_.withLoadInstanceEvent(
         this, path) {
-      val (uid: String, stages: Array[PipelineStage]) = SharedReadWrite.load(className, sc, path)
+      val (uid: String, stages: Array[PipelineStage]) =
+        SharedReadWrite.load(className, sparkSession, path)
       val transformers = stages map {
         case stage: Transformer => stage
         case other => throw new RuntimeException(s"PipelineModel.read loaded a stage but found it" +
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
index 3b60b5ae294ea..1d74f3c8a969e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Transformer.scala
@@ -24,6 +24,7 @@ import org.apache.spark.annotation.Since
 import org.apache.spark.internal.Logging
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util.SchemaUtils
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
@@ -103,7 +104,7 @@ abstract class UnaryTransformer[IN: TypeTag, OUT: TypeTag, T <: UnaryTransformer
   protected def validateInputType(inputType: DataType): Unit = {}
 
   override def transformSchema(schema: StructType): StructType = {
-    val inputType = schema($(inputCol)).dataType
+    val inputType = SchemaUtils.getSchemaFieldType(schema, $(inputCol))
     validateInputType(inputType)
     if (schema.fieldNames.contains($(outputCol))) {
       throw new IllegalArgumentException(s"Output column ${$(outputCol)} already exists.")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 7deefda2eeaff..c5f1d7f39b6b1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -293,7 +293,7 @@ object DecisionTreeClassificationModel extends MLReadable[DecisionTreeClassifica
       val extraMetadata: JObject = Map(
         "numFeatures" -> instance.numFeatures,
         "numClasses" -> instance.numClasses)
-      DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata))
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession, Some(extraMetadata))
       val (nodeData, _) = NodeData.build(instance.rootNode, 0)
       val dataPath = new Path(path, "data").toString
       val numDataParts = NodeData.inferNumPartitions(instance.numNodes)
@@ -309,7 +309,7 @@ object DecisionTreeClassificationModel extends MLReadable[DecisionTreeClassifica
 
     override def load(path: String): DecisionTreeClassificationModel = {
       implicit val format = DefaultFormats
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val numFeatures = (metadata.metadata \ "numFeatures").extract[Int]
       val numClasses = (metadata.metadata \ "numClasses").extract[Int]
       val root = loadTreeNodes(path, metadata, sparkSession)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index aec740a932acf..33e7c1fdd5e05 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -339,7 +339,7 @@ object FMClassificationModel extends MLReadable[FMClassificationModel] {
       factors: Matrix)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.intercept, instance.linear, instance.factors)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -351,7 +351,7 @@ object FMClassificationModel extends MLReadable[FMClassificationModel] {
     private val className = classOf[FMClassificationModel].getName
 
     override def load(path: String): FMClassificationModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 3e27f781d561b..161e8f4cbd2c5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -446,7 +446,7 @@ object LinearSVCModel extends MLReadable[LinearSVCModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.coefficients, instance.intercept)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -459,7 +459,7 @@ object LinearSVCModel extends MLReadable[LinearSVCModel] {
     private val className = classOf[LinearSVCModel].getName
 
     override def load(path: String): LinearSVCModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
       val Row(coefficients: Vector, intercept: Double) =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index ac0682f1df5bf..745cb61bb7aa1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1310,7 +1310,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: numClasses, numFeatures, intercept, coefficients
       val data = Data(instance.numClasses, instance.numFeatures, instance.interceptVector,
         instance.coefficientMatrix, instance.isMultinomial)
@@ -1325,7 +1325,7 @@ object LogisticRegressionModel extends MLReadable[LogisticRegressionModel] {
     private val className = classOf[LogisticRegressionModel].getName
 
     override def load(path: String): LogisticRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index 16984bf9aed8a..106282b9dc3a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -365,7 +365,7 @@ object MultilayerPerceptronClassificationModel
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: weights
       val data = Data(instance.weights)
       val dataPath = new Path(path, "data").toString
@@ -380,7 +380,7 @@ object MultilayerPerceptronClassificationModel
     private val className = classOf[MultilayerPerceptronClassificationModel].getName
 
     override def load(path: String): MultilayerPerceptronClassificationModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val (majorVersion, _) = majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index 52486cb8aa245..4a511581d31a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -580,7 +580,7 @@ object NaiveBayesModel extends MLReadable[NaiveBayesModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val dataPath = new Path(path, "data").toString
 
       instance.getModelType match {
@@ -602,7 +602,7 @@ object NaiveBayesModel extends MLReadable[NaiveBayesModel] {
 
     override def load(path: String): NaiveBayesModel = {
       implicit val format = DefaultFormats
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
index 18643f74b700f..0f7b6485c7705 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/OneVsRest.scala
@@ -28,7 +28,6 @@ import org.json4s._
 import org.json4s.JsonDSL._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml._
@@ -38,7 +37,7 @@ import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
 import org.apache.spark.ml.param.shared.{HasParallelism, HasWeightCol}
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.Instrumentation.instrumented
-import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row, SparkSession}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.storage.StorageLevel
@@ -94,7 +93,7 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
   def saveImpl(
       path: String,
       instance: OneVsRestParams,
-      sc: SparkContext,
+      spark: SparkSession,
       extraMetadata: Option[JObject] = None): Unit = {
 
     val params = instance.extractParamMap().toSeq
@@ -103,7 +102,7 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
       .map { case ParamPair(p, v) => p.name -> parse(p.jsonEncode(v)) }
       .toList)
 
-    DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, Some(jsonParams))
+    DefaultParamsWriter.saveMetadata(instance, path, spark, extraMetadata, Some(jsonParams))
 
     val classifierPath = new Path(path, "classifier").toString
     instance.getClassifier.asInstanceOf[MLWritable].save(classifierPath)
@@ -111,12 +110,12 @@ private[ml] object OneVsRestParams extends ClassifierTypeTrait {
 
   def loadImpl(
       path: String,
-      sc: SparkContext,
+      spark: SparkSession,
       expectedClassName: String): (DefaultParamsReader.Metadata, ClassifierType) = {
 
-    val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
+    val metadata = DefaultParamsReader.loadMetadata(path, spark, expectedClassName)
     val classifierPath = new Path(path, "classifier").toString
-    val estimator = DefaultParamsReader.loadParamsInstance[ClassifierType](classifierPath, sc)
+    val estimator = DefaultParamsReader.loadParamsInstance[ClassifierType](classifierPath, spark)
     (metadata, estimator)
   }
 }
@@ -282,7 +281,7 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
     override protected def saveImpl(path: String): Unit = {
       val extraJson = ("labelMetadata" -> instance.labelMetadata.json) ~
         ("numClasses" -> instance.models.length)
-      OneVsRestParams.saveImpl(path, instance, sc, Some(extraJson))
+      OneVsRestParams.saveImpl(path, instance, sparkSession, Some(extraJson))
       instance.models.map(_.asInstanceOf[MLWritable]).zipWithIndex.foreach { case (model, idx) =>
         val modelPath = new Path(path, s"model_$idx").toString
         model.save(modelPath)
@@ -297,12 +296,12 @@ object OneVsRestModel extends MLReadable[OneVsRestModel] {
 
     override def load(path: String): OneVsRestModel = {
       implicit val format = DefaultFormats
-      val (metadata, classifier) = OneVsRestParams.loadImpl(path, sc, className)
+      val (metadata, classifier) = OneVsRestParams.loadImpl(path, sparkSession, className)
       val labelMetadata = Metadata.fromJson((metadata.metadata \ "labelMetadata").extract[String])
       val numClasses = (metadata.metadata \ "numClasses").extract[Int]
       val models = Range(0, numClasses).toArray.map { idx =>
         val modelPath = new Path(path, s"model_$idx").toString
-        DefaultParamsReader.loadParamsInstance[ClassificationModel[_, _]](modelPath, sc)
+        DefaultParamsReader.loadParamsInstance[ClassificationModel[_, _]](modelPath, sparkSession)
       }
       val ovrModel = new OneVsRestModel(metadata.uid, labelMetadata, models)
       metadata.getAndSetParams(ovrModel)
@@ -490,7 +489,7 @@ object OneVsRest extends MLReadable[OneVsRest] {
     OneVsRestParams.validateParams(instance)
 
     override protected def saveImpl(path: String): Unit = {
-      OneVsRestParams.saveImpl(path, instance, sc)
+      OneVsRestParams.saveImpl(path, instance, sparkSession)
     }
   }
 
@@ -500,7 +499,7 @@ object OneVsRest extends MLReadable[OneVsRest] {
     private val className = classOf[OneVsRest].getName
 
     override def load(path: String): OneVsRest = {
-      val (metadata, classifier) = OneVsRestParams.loadImpl(path, sc, className)
+      val (metadata, classifier) = OneVsRestParams.loadImpl(path, sparkSession, className)
       val ovr = new OneVsRest(metadata.uid)
       metadata.getAndSetParams(ovr)
       ovr.setClassifier(classifier)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 2d809151384b8..b4f1565362b02 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -186,7 +186,7 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val dataPath = new Path(path, "data").toString
       instance.parentModel.save(sc, dataPath)
     }
@@ -198,7 +198,7 @@ object BisectingKMeansModel extends MLReadable[BisectingKMeansModel] {
     private val className = classOf[BisectingKMeansModel].getName
 
     override def load(path: String): BisectingKMeansModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val mllibModel = MLlibBisectingKMeansModel.load(sc, dataPath)
       val model = new BisectingKMeansModel(metadata.uid, mllibModel)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 0f6648bb4cda7..d0db5dcba87b5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -235,7 +235,7 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: weights and gaussians
       val weights = instance.weights
       val gaussians = instance.gaussians
@@ -253,7 +253,7 @@ object GaussianMixtureModel extends MLReadable[GaussianMixtureModel] {
     private val className = classOf[GaussianMixtureModel].getName
 
     override def load(path: String): GaussianMixtureModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val row = sparkSession.read.parquet(dataPath).select("weights", "mus", "sigmas").head()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 04f76660aee6a..50fb18bb620a8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -219,9 +219,8 @@ private class InternalKMeansModelWriter extends MLWriterFormat with MLFormatRegi
   override def write(path: String, sparkSession: SparkSession,
     optionMap: mutable.Map[String, String], stage: PipelineStage): Unit = {
     val instance = stage.asInstanceOf[KMeansModel]
-    val sc = sparkSession.sparkContext
     // Save metadata and Params
-    DefaultParamsWriter.saveMetadata(instance, path, sc)
+    DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
     // Save model data: cluster centers
     val data: Array[ClusterData] = instance.clusterCenters.zipWithIndex.map {
       case (center, idx) =>
@@ -272,7 +271,7 @@ object KMeansModel extends MLReadable[KMeansModel] {
       val sparkSession = super.sparkSession
       import sparkSession.implicits._
 
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
 
       val clusterCenters = if (majorVersion(metadata.sparkVersion) >= 2) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 7cbfc732a19ca..b3d3c84db0511 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -654,7 +654,7 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
         gammaShape: Double)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val oldModel = instance.oldLocalModel
       val data = Data(instance.vocabSize, oldModel.topicsMatrix, oldModel.docConcentration,
         oldModel.topicConcentration, oldModel.gammaShape)
@@ -668,7 +668,7 @@ object LocalLDAModel extends MLReadable[LocalLDAModel] {
     private val className = classOf[LocalLDAModel].getName
 
     override def load(path: String): LocalLDAModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
       val vectorConverted = MLUtils.convertVectorColumnsToML(data, "docConcentration")
@@ -809,7 +809,7 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
   class DistributedWriter(instance: DistributedLDAModel) extends MLWriter {
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val modelPath = new Path(path, "oldModel").toString
       instance.oldDistributedModel.save(sc, modelPath)
     }
@@ -820,7 +820,7 @@ object DistributedLDAModel extends MLReadable[DistributedLDAModel] {
     private val className = classOf[DistributedLDAModel].getName
 
     override def load(path: String): DistributedLDAModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val modelPath = new Path(path, "oldModel").toString
       val oldModel = OldDistributedLDAModel.load(sc, modelPath)
       val model = new DistributedLDAModel(metadata.uid, oldModel.vocabSize,
@@ -1008,7 +1008,7 @@ object LDA extends MLReadable[LDA] {
     private val className = classOf[LDA].getName
 
     override def load(path: String): LDA = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val model = new LDA(metadata.uid)
       LDAParams.getAndSetParams(model, metadata)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
index c726aed14ee51..8123438fd8878 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Binarizer.scala
@@ -19,6 +19,7 @@ package org.apache.spark.ml.feature
 
 import scala.collection.mutable.ArrayBuilder
 
+import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.Transformer
@@ -117,7 +118,7 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
       }
 
     val mappedOutputCols = inputColNames.zip(tds).map { case (colName, td) =>
-      dataset.schema(colName).dataType match {
+      SchemaUtils.getSchemaField(dataset.schema, colName).dataType match {
         case DoubleType =>
           when(!col(colName).isNaN && col(colName) > td, lit(1.0))
             .otherwise(lit(0.0))
@@ -199,12 +200,23 @@ final class Binarizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
       require(!schema.fieldNames.contains(outputColName),
         s"Output column $outputColName already exists.")
-      val inputType = schema(inputColName).dataType
+
+      val inputType = try {
+        SchemaUtils.getSchemaFieldType(schema, inputColName)
+      } catch {
+        case e: SparkIllegalArgumentException if e.getCondition == "FIELD_NOT_FOUND" =>
+          throw new SparkException(s"Input column $inputColName does not exist.")
+        case e: Exception =>
+          throw e
+      }
+
       val outputField = inputType match {
         case DoubleType =>
           BinaryAttribute.defaultAttr.withName(outputColName).toStructField()
         case _: VectorUDT =>
-          val size = AttributeGroup.fromStructField(schema(inputColName)).size
+          val size = AttributeGroup.fromStructField(
+            SchemaUtils.getSchemaField(schema, inputColName)
+          ).size
           if (size < 0) {
             StructField(outputColName, new VectorUDT)
           } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index d30962088cb81..537cb5020c88d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -227,7 +227,7 @@ object BucketedRandomProjectionLSHModel extends MLReadable[BucketedRandomProject
     private case class Data(randUnitVectors: Matrix)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.randMatrix)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -241,7 +241,7 @@ object BucketedRandomProjectionLSHModel extends MLReadable[BucketedRandomProject
     private val className = classOf[BucketedRandomProjectionLSHModel].getName
 
     override def load(path: String): BucketedRandomProjectionLSHModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index 3062f643e950d..eb2122b09b2fb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -173,7 +173,7 @@ object ChiSqSelectorModel extends MLReadable[ChiSqSelectorModel] {
     private case class Data(selectedFeatures: Seq[Int])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.selectedFeatures.toImmutableArraySeq)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -185,7 +185,7 @@ object ChiSqSelectorModel extends MLReadable[ChiSqSelectorModel] {
     private val className = classOf[ChiSqSelectorModel].getName
 
     override def load(path: String): ChiSqSelectorModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("selectedFeatures").head()
       val selectedFeatures = data.getAs[Seq[Int]](0).toArray
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index b81914f86fbb7..611b5c710add1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -372,7 +372,7 @@ object CountVectorizerModel extends MLReadable[CountVectorizerModel] {
     private case class Data(vocabulary: Seq[String])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.vocabulary.toImmutableArraySeq)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -384,7 +384,7 @@ object CountVectorizerModel extends MLReadable[CountVectorizerModel] {
     private val className = classOf[CountVectorizerModel].getName
 
     override def load(path: String): CountVectorizerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
         .select("vocabulary")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
index d057e5a62e507..9a8bfb195666b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/DCT.scala
@@ -79,7 +79,9 @@ class DCT @Since("1.5.0") (@Since("1.5.0") override val uid: String)
   override def transformSchema(schema: StructType): StructType = {
     var outputSchema = super.transformSchema(schema)
     if ($(inputCol).nonEmpty && $(outputCol).nonEmpty) {
-      val size = AttributeGroup.fromStructField(schema($(inputCol))).size
+      val size = AttributeGroup.fromStructField(
+        SchemaUtils.getSchemaField(schema, $(inputCol))
+      ).size
       if (size >= 0) {
         outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
           $(outputCol), size)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
index 866bf9e5bf3fa..4a12d77ed8400 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/FeatureHasher.scala
@@ -131,8 +131,10 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
     val n = $(numFeatures)
     val localInputCols = $(inputCols)
 
-    var catCols = dataset.schema(localInputCols.toSet)
-      .filterNot(_.dataType.isInstanceOf[NumericType]).map(_.name).toArray
+    var catCols = localInputCols.map {
+      localInputCol => SchemaUtils.getSchemaField(dataset.schema, localInputCol)
+    }.filterNot(_.dataType.isInstanceOf[NumericType]).map(_.name)
+
     if (isSet(categoricalCols)) {
       // categoricalCols may contain columns not set in inputCols
       catCols = (catCols ++ $(categoricalCols).intersect(localInputCols)).distinct
@@ -204,10 +206,9 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
         log.warn(s"categoricalCols ${set.mkString("[", ",", "]")} do not exist in inputCols")
       }
     }
-    val fields = schema(localInputCols)
-    fields.foreach { fieldSchema =>
-      val dataType = fieldSchema.dataType
-      val fieldName = fieldSchema.name
+    for (fieldName <- localInputCols) {
+      val field = SchemaUtils.getSchemaField(schema, fieldName)
+      val dataType = field.dataType
       require(dataType.isInstanceOf[NumericType] ||
         dataType.isInstanceOf[StringType] ||
         dataType.isInstanceOf[BooleanType],
@@ -215,6 +216,7 @@ class FeatureHasher(@Since("2.3.0") override val uid: String) extends Transforme
           s"${BooleanType.catalogString} or ${StringType.catalogString}. " +
           s"Column $fieldName was ${dataType.catalogString}")
     }
+
     val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
     SchemaUtils.appendColumn(schema, attrGroup.toStructField())
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
index f4223bc85943d..dab0a6494fdb9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/HashingTF.scala
@@ -105,7 +105,7 @@ class HashingTF @Since("3.0.0") private[ml] (
 
   @Since("1.4.0")
   override def transformSchema(schema: StructType): StructType = {
-    val inputType = schema($(inputCol)).dataType
+    val inputType = SchemaUtils.getSchemaFieldType(schema, $(inputCol))
     require(inputType.isInstanceOf[ArrayType],
       s"The input column must be ${ArrayType.simpleString}, but got ${inputType.catalogString}.")
     val attrGroup = new AttributeGroup($(outputCol), $(numFeatures))
@@ -154,7 +154,7 @@ object HashingTF extends DefaultParamsReadable[HashingTF] {
     private val className = classOf[HashingTF].getName
 
     override def load(path: String): HashingTF = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       // We support loading old `HashingTF` saved by previous Spark versions.
       // Previous `HashingTF` uses `mllib.feature.HashingTF.murmur3Hash`, but new `HashingTF` uses
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 696e1516582d0..3025a7b04af53 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -198,7 +198,7 @@ object IDFModel extends MLReadable[IDFModel] {
     private case class Data(idf: Vector, docFreq: Array[Long], numDocs: Long)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.idf, instance.docFreq, instance.numDocs)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -210,7 +210,7 @@ object IDFModel extends MLReadable[IDFModel] {
     private val className = classOf[IDFModel].getName
 
     override def load(path: String): IDFModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index ae65b17d7a810..ed093c4ba35d3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -90,7 +90,7 @@ private[feature] trait ImputerParams extends Params with HasInputCol with HasInp
     require(inputColNames.length == outputColNames.length, s"inputCols(${inputColNames.length})" +
       s" and outputCols(${outputColNames.length}) should have the same length")
     val outputFields = inputColNames.zip(outputColNames).map { case (inputCol, outputCol) =>
-      val inputField = schema(inputCol)
+      val inputField = SchemaUtils.getSchemaField(schema, inputCol)
       SchemaUtils.checkNumericType(schema, inputCol)
       StructField(outputCol, inputField.dataType, inputField.nullable)
     }
@@ -155,12 +155,14 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
     val spark = dataset.sparkSession
 
     val (inputColumns, _) = getInOutCols()
-    val cols = inputColumns.map { inputCol =>
+
+    val transformedColNames = Array.tabulate(inputColumns.length)(index => s"c_$index")
+    val cols = inputColumns.zip(transformedColNames).map { case (inputCol, transformedColName) =>
       when(col(inputCol).equalTo($(missingValue)), null)
         .when(col(inputCol).isNaN, null)
         .otherwise(col(inputCol))
         .cast(DoubleType)
-        .as(inputCol)
+        .as(transformedColName)
     }
     val numCols = cols.length
 
@@ -176,7 +178,7 @@ class Imputer @Since("2.2.0") (@Since("2.2.0") override val uid: String)
         // Function approxQuantile will ignore null automatically.
         // For a column only containing null, approxQuantile will return an empty array.
         dataset.select(cols.toImmutableArraySeq: _*)
-          .stat.approxQuantile(inputColumns, Array(0.5), $(relativeError))
+          .stat.approxQuantile(transformedColNames, Array(0.5), $(relativeError))
           .map(_.headOption.getOrElse(Double.NaN))
 
       case Imputer.mode =>
@@ -271,7 +273,7 @@ class ImputerModel private[ml] (
 
     val newCols = inputColumns.map { inputCol =>
       val surrogate = surrogates(inputCol)
-      val inputType = dataset.schema(inputCol).dataType
+      val inputType = SchemaUtils.getSchemaFieldType(dataset.schema, inputCol)
       val ic = col(inputCol).cast(DoubleType)
       when(ic.isNull, surrogate)
         .when(ic === $(missingValue), surrogate)
@@ -308,7 +310,7 @@ object ImputerModel extends MLReadable[ImputerModel] {
   private[ImputerModel] class ImputerModelWriter(instance: ImputerModel) extends MLWriter {
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val dataPath = new Path(path, "data").toString
       instance.surrogateDF.repartition(1).write.parquet(dataPath)
     }
@@ -319,9 +321,9 @@ object ImputerModel extends MLReadable[ImputerModel] {
     private val className = classOf[ImputerModel].getName
 
     override def load(path: String): ImputerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
-      val surrogateDF = sqlContext.read.parquet(dataPath)
+      val surrogateDF = sparkSession.read.parquet(dataPath)
       val model = new ImputerModel(metadata.uid, surrogateDF)
       metadata.getAndSetParams(model)
       model
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
index bd2d08c0d79ed..3311231e6d830 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Interaction.scala
@@ -70,7 +70,7 @@ class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) ext
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val inputFeatures = $(inputCols).map(c => dataset.schema(c))
+    val inputFeatures = $(inputCols).map(c => SchemaUtils.getSchemaField(dataset.schema, c))
     val featureEncoders = getFeatureEncoders(inputFeatures.toImmutableArraySeq)
     val featureAttrs = getFeatureAttrs(inputFeatures.toImmutableArraySeq)
 
@@ -102,11 +102,11 @@ class Interaction @Since("1.6.0") (@Since("1.6.0") override val uid: String) ext
       Vectors.sparse(size, indices.result(), values.result()).compressed
     }
 
-    val featureCols = inputFeatures.map { f =>
+    val featureCols = inputFeatures.zip($(inputCols)).map { case (f, inputCol) =>
       f.dataType match {
-        case DoubleType => dataset(f.name)
-        case _: VectorUDT => dataset(f.name)
-        case _: NumericType | BooleanType => dataset(f.name).cast(DoubleType)
+        case DoubleType => dataset(inputCol)
+        case _: VectorUDT => dataset(inputCol)
+        case _: NumericType | BooleanType => dataset(inputCol).cast(DoubleType)
       }
     }
     import org.apache.spark.util.ArrayImplicits._
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
index 05ee59d1627da..1a378cd85f3e4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
@@ -162,7 +162,7 @@ object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] {
     private case class Data(maxAbs: Vector)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = new Data(instance.maxAbs)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -174,7 +174,7 @@ object MaxAbsScalerModel extends MLReadable[MaxAbsScalerModel] {
     private val className = classOf[MaxAbsScalerModel].getName
 
     override def load(path: String): MaxAbsScalerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val Row(maxAbs: Vector) = sparkSession.read.parquet(dataPath)
         .select("maxAbs")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index d94aadd1ce1f9..3f2a3327128a5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -220,7 +220,7 @@ object MinHashLSHModel extends MLReadable[MinHashLSHModel] {
     private case class Data(randCoefficients: Array[Int])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.randCoefficients.flatMap(tuple => Array(tuple._1, tuple._2)))
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -233,7 +233,7 @@ object MinHashLSHModel extends MLReadable[MinHashLSHModel] {
     private val className = classOf[MinHashLSHModel].getName
 
     override def load(path: String): MinHashLSHModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("randCoefficients").head()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index 4111e559a5c20..c311f4260424d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -247,7 +247,7 @@ object MinMaxScalerModel extends MLReadable[MinMaxScalerModel] {
     private case class Data(originalMin: Vector, originalMax: Vector)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = new Data(instance.originalMin, instance.originalMax)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -259,7 +259,7 @@ object MinMaxScalerModel extends MLReadable[MinMaxScalerModel] {
     private val className = classOf[MinMaxScalerModel].getName
 
     override def load(path: String): MinMaxScalerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
       val Row(originalMin: Vector, originalMax: Vector) =
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
index 4c7583b8381dc..c7b7164e42f36 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Normalizer.scala
@@ -71,7 +71,9 @@ class Normalizer @Since("1.4.0") (@Since("1.4.0") override val uid: String)
   override def transformSchema(schema: StructType): StructType = {
     var outputSchema = super.transformSchema(schema)
     if ($(inputCol).nonEmpty && $(outputCol).nonEmpty) {
-      val size = AttributeGroup.fromStructField(schema($(inputCol))).size
+      val size = AttributeGroup.fromStructField(
+        SchemaUtils.getSchemaField(schema, $(inputCol))
+      ).size
       if (size >= 0) {
         outputSchema = SchemaUtils.updateAttributeGroupSize(outputSchema,
           $(outputCol), size)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index e7cf0105754a9..44b8b2047681b 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -89,10 +89,12 @@ private[ml] trait OneHotEncoderBase extends Params with HasHandleInvalid
         s"output columns ${outputColNames.length}.")
 
     // Input columns must be NumericType.
-    inputColNames.foreach(SchemaUtils.checkNumericType(schema, _))
+    inputColNames.foreach { colName =>
+      SchemaUtils.checkNumericType(schema, colName)
+    }
 
     // Prepares output columns with proper attributes by examining input columns.
-    val inputFields = inputColNames.map(schema(_))
+    val inputFields = inputColNames.map(SchemaUtils.getSchemaField(schema, _))
 
     val outputFields = inputFields.zip(outputColNames).map { case (inputField, outputColName) =>
       OneHotEncoderCommon.transformOutputColumnSchema(
@@ -403,7 +405,7 @@ object OneHotEncoderModel extends MLReadable[OneHotEncoderModel] {
     private case class Data(categorySizes: Array[Int])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.categorySizes)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -415,7 +417,7 @@ object OneHotEncoderModel extends MLReadable[OneHotEncoderModel] {
     private val className = classOf[OneHotEncoderModel].getName
 
     override def load(path: String): OneHotEncoderModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
         .select("categorySizes")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index f7ec18b38a0e3..0bd9a3c38a1e1 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -184,7 +184,7 @@ object PCAModel extends MLReadable[PCAModel] {
     private case class Data(pc: DenseMatrix, explainedVariance: DenseVector)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.pc, instance.explainedVariance)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -205,7 +205,7 @@ object PCAModel extends MLReadable[PCAModel] {
      * @return a [[PCAModel]]
      */
     override def load(path: String): PCAModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val model = if (majorVersion(metadata.sparkVersion) >= 2) {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
index 177c90a767983..fbec76cc79cfb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/QuantileDiscretizer.scala
@@ -25,6 +25,7 @@ import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
 import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
 
@@ -186,6 +187,7 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
     }
 
     var outputFields = schema.fields
+
     inputColNames.zip(outputColNames).foreach { case (inputColName, outputColName) =>
       SchemaUtils.checkNumericType(schema, inputColName)
       require(!schema.fieldNames.contains(outputColName),
@@ -201,13 +203,18 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
     transformSchema(dataset.schema, logging = true)
     val bucketizer = new Bucketizer(uid).setHandleInvalid($(handleInvalid))
     if (isSet(inputCols)) {
+      val quantileColNames = Array.tabulate($(inputCols).length)(index => s"c_$index")
+      val quantileDataset = dataset.select($(inputCols).zipWithIndex.map {
+        case (colName, index) => col(colName).alias(quantileColNames(index))
+      }.toImmutableArraySeq: _*)
+
       val splitsArray = if (isSet(numBucketsArray)) {
         val probArrayPerCol = $(numBucketsArray).map { numOfBuckets =>
           (0 to numOfBuckets).map(_.toDouble / numOfBuckets).toArray
         }
 
         val probabilityArray = probArrayPerCol.flatten.sorted.distinct
-        val splitsArrayRaw = dataset.stat.approxQuantile($(inputCols),
+        val splitsArrayRaw = quantileDataset.stat.approxQuantile(quantileColNames,
           probabilityArray, $(relativeError))
 
         splitsArrayRaw.zip(probArrayPerCol).map { case (splits, probs) =>
@@ -222,12 +229,13 @@ final class QuantileDiscretizer @Since("1.6.0") (@Since("1.6.0") override val ui
           }
         }
       } else {
-        dataset.stat.approxQuantile($(inputCols),
+        quantileDataset.stat.approxQuantile(quantileColNames,
           (0 to $(numBuckets)).map(_.toDouble / $(numBuckets)).toArray, $(relativeError))
       }
       bucketizer.setSplitsArray(splitsArray.map(getDistinctSplits))
     } else {
-      val splits = dataset.stat.approxQuantile($(inputCol),
+      val quantileDataset = dataset.select(col($(inputCol)).alias("c_0"))
+      val splits = quantileDataset.stat.approxQuantile("c_0",
         (0 to $(numBuckets)).map(_.toDouble / $(numBuckets)).toArray, $(relativeError))
       bucketizer.setSplits(getDistinctSplits(splits))
     }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 7a47e73e5ef48..221d70c18d5aa 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -220,7 +220,8 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
 
     // First we index each string column referenced by the input terms.
     val indexed = terms.zipWithIndex.map { case (term, i) =>
-      dataset.schema(term).dataType match {
+      val termField = SchemaUtils.getSchemaField(dataset.schema, term)
+      termField.dataType match {
         case _: StringType =>
           val indexCol = tmpColumn("stridx")
           encoderStages += new StringIndexer()
@@ -231,7 +232,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
           prefixesToRewrite(indexCol + "_") = term + "_"
           (term, indexCol)
         case _: VectorUDT =>
-          val group = AttributeGroup.fromStructField(dataset.schema(term))
+          val group = AttributeGroup.fromStructField(termField)
           val size = if (group.size < 0) {
             firstRow.getAs[Vector](i).size
           } else {
@@ -250,7 +251,7 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
     // Then we handle one-hot encoding and interactions between terms.
     var keepReferenceCategory = false
     val encodedTerms = resolvedFormula.terms.map {
-      case Seq(term) if dataset.schema(term).dataType == StringType =>
+      case Seq(term) if SchemaUtils.getSchemaFieldType(dataset.schema, term) == StringType =>
         val encodedCol = tmpColumn("onehot")
         // Formula w/o intercept, one of the categories in the first category feature is
         // being used as reference category, we will not drop any category for that feature.
@@ -292,7 +293,8 @@ class RFormula @Since("1.5.0") (@Since("1.5.0") override val uid: String)
     encoderStages += new ColumnPruner(tempColumns.toSet)
 
     if ((dataset.schema.fieldNames.contains(resolvedFormula.label) &&
-      dataset.schema(resolvedFormula.label).dataType == StringType) || $(forceIndexLabel)) {
+      SchemaUtils.getSchemaFieldType(
+        dataset.schema, resolvedFormula.label) == StringType) || $(forceIndexLabel)) {
       encoderStages += new StringIndexer()
         .setInputCol(resolvedFormula.label)
         .setOutputCol($(labelCol))
@@ -359,8 +361,8 @@ class RFormulaModel private[feature](
     val withFeatures = pipelineModel.transformSchema(schema)
     if (resolvedFormula.label.isEmpty || hasLabelCol(withFeatures)) {
       withFeatures
-    } else if (schema.exists(_.name == resolvedFormula.label)) {
-      val nullable = schema(resolvedFormula.label).dataType match {
+    } else if (SchemaUtils.checkSchemaFieldExist(schema, resolvedFormula.label)) {
+      val nullable = SchemaUtils.getSchemaFieldType(schema, resolvedFormula.label) match {
         case _: NumericType | BooleanType => false
         case _ => true
       }
@@ -387,8 +389,8 @@ class RFormulaModel private[feature](
     val labelName = resolvedFormula.label
     if (labelName.isEmpty || hasLabelCol(dataset.schema)) {
       dataset.toDF()
-    } else if (dataset.schema.exists(_.name == labelName)) {
-      dataset.schema(labelName).dataType match {
+    } else if (SchemaUtils.checkSchemaFieldExist(dataset.schema, labelName)) {
+      SchemaUtils.getSchemaFieldType(dataset.schema, labelName) match {
         case _: NumericType | BooleanType =>
           dataset.withColumn($(labelCol), dataset(labelName).cast(DoubleType))
         case other =>
@@ -402,10 +404,12 @@ class RFormulaModel private[feature](
   }
 
   private def checkCanTransform(schema: StructType): Unit = {
-    val columnNames = schema.map(_.name)
-    require(!columnNames.contains($(featuresCol)), "Features column already exists.")
     require(
-      !columnNames.contains($(labelCol)) || schema($(labelCol)).dataType.isInstanceOf[NumericType],
+      !SchemaUtils.checkSchemaFieldExist(schema, $(featuresCol)), "Features column already exists."
+    )
+    require(
+      !SchemaUtils.checkSchemaFieldExist(schema, $(labelCol))
+      || SchemaUtils.getSchemaFieldType(schema, $(labelCol)).isInstanceOf[NumericType],
       s"Label column already exists and is not of type ${NumericType.simpleString}.")
   }
 
@@ -427,7 +431,7 @@ object RFormulaModel extends MLReadable[RFormulaModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: resolvedFormula
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(instance.resolvedFormula))
@@ -444,7 +448,7 @@ object RFormulaModel extends MLReadable[RFormulaModel] {
     private val className = classOf[RFormulaModel].getName
 
     override def load(path: String): RFormulaModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("label", "terms", "hasIntercept").head()
@@ -502,7 +506,7 @@ private object ColumnPruner extends MLReadable[ColumnPruner] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: columnsToPrune
       val data = Data(instance.columnsToPrune.toSeq)
       val dataPath = new Path(path, "data").toString
@@ -516,7 +520,7 @@ private object ColumnPruner extends MLReadable[ColumnPruner] {
     private val className = classOf[ColumnPruner].getName
 
     override def load(path: String): ColumnPruner = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("columnsToPrune").head()
@@ -550,7 +554,9 @@ private class VectorAttributeRewriter(
 
   override def transform(dataset: Dataset[_]): DataFrame = {
     val metadata = {
-      val group = AttributeGroup.fromStructField(dataset.schema(vectorCol))
+      val group = AttributeGroup.fromStructField(
+        SchemaUtils.getSchemaField(dataset.schema, vectorCol)
+      )
       val attrs = group.attributes.get.map { attr =>
         if (attr.name.isDefined) {
           val name = prefixesToRewrite.foldLeft(attr.name.get) { case (curName, (from, to)) =>
@@ -563,7 +569,8 @@ private class VectorAttributeRewriter(
       }
       new AttributeGroup(vectorCol, attrs).toMetadata()
     }
-    val otherCols = dataset.columns.filter(_ != vectorCol).map(dataset.col)
+    val vectorColFieldName = SchemaUtils.getSchemaField(dataset.schema, vectorCol).name
+    val otherCols = dataset.columns.filter(_ != vectorColFieldName).map(dataset.col)
     val rewrittenCol = dataset.col(vectorCol).as(vectorCol, metadata)
     import org.apache.spark.util.ArrayImplicits._
     dataset.select((otherCols :+ rewrittenCol).toImmutableArraySeq : _*)
@@ -594,7 +601,7 @@ private object VectorAttributeRewriter extends MLReadable[VectorAttributeRewrite
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: vectorCol, prefixesToRewrite
       val data = Data(instance.vectorCol, instance.prefixesToRewrite)
       val dataPath = new Path(path, "data").toString
@@ -608,7 +615,7 @@ private object VectorAttributeRewriter extends MLReadable[VectorAttributeRewrite
     private val className = classOf[VectorAttributeRewriter].getName
 
     override def load(path: String): VectorAttributeRewriter = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath).select("vectorCol", "prefixesToRewrite").head()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
index 0950dc55dccba..f3e068f049205 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
@@ -284,7 +284,7 @@ object RobustScalerModel extends MLReadable[RobustScalerModel] {
     private case class Data(range: Vector, median: Vector)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.range, instance.median)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -296,7 +296,7 @@ object RobustScalerModel extends MLReadable[RobustScalerModel] {
     private val className = classOf[RobustScalerModel].getName
 
     override def load(path: String): RobustScalerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
       val Row(range: Vector, median: Vector) = MLUtils
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
index 8ff880b7b8aaf..dde1068c5b924 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Selector.scala
@@ -341,7 +341,9 @@ private[feature] object SelectorModel {
       featuresCol: String,
       isNumericAttribute: Boolean): StructField = {
     val selector = selectedFeatures.toSet
-    val origAttrGroup = AttributeGroup.fromStructField(schema(featuresCol))
+    val origAttrGroup = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(schema, featuresCol)
+    )
     val featureAttributes: Array[Attribute] = if (origAttrGroup.attributes.nonEmpty) {
       origAttrGroup.attributes.get.zipWithIndex.filter(x => selector.contains(x._2)).map(_._1)
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index c0a6392c29c3e..f1e48b053d883 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -205,7 +205,7 @@ object StandardScalerModel extends MLReadable[StandardScalerModel] {
     private case class Data(std: Vector, mean: Vector)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.std, instance.mean)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -217,7 +217,7 @@ object StandardScalerModel extends MLReadable[StandardScalerModel] {
     private val className = classOf[StandardScalerModel].getName
 
     override def load(path: String): StandardScalerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
       val Row(std: Vector, mean: Vector) = MLUtils.convertVectorColumnsToML(data, "std", "mean")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
index 93956fc1811ef..99a20f3aa52ca 100755
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StopWordsRemover.scala
@@ -193,13 +193,16 @@ class StopWordsRemover @Since("1.5.0") (@Since("1.5.0") override val uid: String
     }
 
     val (inputColNames, outputColNames) = getInOutCols()
+
     val newCols = inputColNames.zip(outputColNames).map { case (inputColName, outputColName) =>
        require(!schema.fieldNames.contains(outputColName),
         s"Output Column $outputColName already exists.")
-      val inputType = schema(inputColName).dataType
+      val inputType = SchemaUtils.getSchemaFieldType(schema, inputColName)
       require(DataTypeUtils.sameType(inputType, ArrayType(StringType)), "Input type must be " +
         s"${ArrayType(StringType).catalogString} but got ${inputType.catalogString}.")
-      StructField(outputColName, inputType, schema(inputColName).nullable)
+      StructField(
+        outputColName, inputType, SchemaUtils.getSchemaField(schema, inputColName).nullable
+      )
     }
     StructType(schema.fields ++ newCols)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 2ca640445b553..1acffa471e9a4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -17,11 +17,9 @@
 
 package org.apache.spark.ml.feature
 
-import java.util.ArrayList
-
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkIllegalArgumentException}
 import org.apache.spark.annotation.Since
 import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.ml.{Estimator, Model, Transformer}
@@ -29,13 +27,10 @@ import org.apache.spark.ml.attribute.{Attribute, NominalAttribute}
 import org.apache.spark.ml.param._
 import org.apache.spark.ml.param.shared._
 import org.apache.spark.ml.util._
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset, Encoder, Encoders, Row, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.{If, Literal}
-import org.apache.spark.sql.expressions.Aggregator
+import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.ThreadUtils
 import org.apache.spark.util.VersionUtils.majorMinorVersion
 import org.apache.spark.util.collection.OpenHashMap
 
@@ -124,17 +119,15 @@ private[feature] trait StringIndexerBase extends Params with HasHandleInvalid wi
     require(outputColNames.distinct.length == outputColNames.length,
       s"Output columns should not be duplicate.")
 
-    val sparkSession = SparkSession.getActiveSession.get
-    val transformDataset = sparkSession.createDataFrame(new ArrayList[Row](), schema = schema)
     val outputFields = inputColNames.zip(outputColNames).flatMap {
       case (inputColName, outputColName) =>
         try {
-          val dtype = transformDataset.col(inputColName).expr.dataType
+          val dtype = SchemaUtils.getSchemaFieldType(schema, inputColName)
           Some(
             validateAndTransformField(schema, inputColName, dtype, outputColName)
           )
         } catch {
-          case _: AnalysisException =>
+          case e: SparkIllegalArgumentException if e.getCondition == "FIELD_NOT_FOUND" =>
             if (skipNonExistsCol) {
               None
             } else {
@@ -194,56 +187,48 @@ class StringIndexer @Since("1.4.0") (
   private def getSelectedCols(dataset: Dataset[_], inputCols: Seq[String]): Seq[Column] = {
     inputCols.map { colName =>
       val col = dataset.col(colName)
-      if (col.expr.dataType == StringType) {
-        col
-      } else {
-        // We don't count for NaN values. Because `StringIndexerAggregator` only processes strings,
-        // we replace NaNs with null in advance.
-        new Column(If(col.isNaN.expr, Literal(null), col.expr)).cast(StringType)
-      }
+      // We don't count for NaN values. Because `StringIndexerAggregator` only processes strings,
+      // we replace NaNs with null in advance.
+      val fpTypes = Seq(DoubleType, FloatType).map(_.catalogString)
+      when(typeof(col).isin(fpTypes: _*) && isnan(col), lit(null))
+        .otherwise(col)
+        .cast(StringType)
     }
   }
 
-  private def countByValue(
-      dataset: Dataset[_],
-      inputCols: Array[String]): Array[OpenHashMap[String, Long]] = {
-
-    val aggregator = new StringIndexerAggregator(inputCols.length)
-    implicit val encoder = Encoders.kryo[Array[OpenHashMap[String, Long]]]
-
-    val selectedCols = getSelectedCols(dataset, inputCols.toImmutableArraySeq)
-    dataset.select(selectedCols: _*)
-      .toDF()
-      .agg(aggregator.toColumn)
-      .as[Array[OpenHashMap[String, Long]]]
-      .collect()(0)
-  }
-
   private def sortByFreq(dataset: Dataset[_], ascending: Boolean): Array[Array[String]] = {
     val (inputCols, _) = getInOutCols()
-
-    val sortFunc = StringIndexer.getSortFunc(ascending = ascending)
-    val orgStrings = countByValue(dataset, inputCols).toImmutableArraySeq
-    ThreadUtils.parmap(orgStrings, "sortingStringLabels", 8) { counts =>
-      counts.toSeq.sortWith(sortFunc).map(_._1).toArray
-    }.toArray
+    val selectedCols = getSelectedCols(dataset, inputCols.toImmutableArraySeq)
+    val numCols = inputCols.length
+
+    // In the case of equal frequency, always sorts strings by alphabet (ascending).
+    val countCol = if (ascending) count(lit(1)) else negate(count(lit(1)))
+
+    val result = Array.fill(numCols)(Array.empty[String])
+    dataset.select(posexplode(array(selectedCols: _*)).as(Seq("index", "value")))
+      .where(col("value").isNotNull)
+      .groupBy("index", "value")
+      .agg(countCol.as("count"))
+      .groupBy("index")
+      .agg(sort_array(collect_list(struct("count", "value"))).getField("value"))
+      .collect()
+      .foreach(r => result(r.getInt(0)) = r.getSeq[String](1).toArray)
+    result
   }
 
   private def sortByAlphabet(dataset: Dataset[_], ascending: Boolean): Array[Array[String]] = {
     val (inputCols, _) = getInOutCols()
-
-    val selectedCols = getSelectedCols(dataset, inputCols.toImmutableArraySeq).map(collect_set)
-    val allLabels = dataset.select(selectedCols: _*)
-      .collect().toImmutableArraySeq.flatMap(_.toSeq)
-      .asInstanceOf[scala.collection.Seq[scala.collection.Seq[String]]].toSeq
-    ThreadUtils.parmap(allLabels, "sortingStringLabels", 8) { labels =>
-      val sorted = labels.filter(_ != null).sorted
-      if (ascending) {
-        sorted.toArray
-      } else {
-        sorted.reverse.toArray
-      }
-    }.toArray
+    val selectedCols = getSelectedCols(dataset, inputCols.toImmutableArraySeq)
+    val numCols = inputCols.length
+
+    val result = Array.fill(numCols)(Array.empty[String])
+    dataset.select(posexplode(array(selectedCols: _*)).as(Seq("index", "value")))
+      .where(col("value").isNotNull)
+      .groupBy("index")
+      .agg(sort_array(collect_set("value"), ascending))
+      .collect()
+      .foreach(r => result(r.getInt(0)) = r.getSeq[String](1).toArray)
+    result
   }
 
   @Since("2.0.0")
@@ -286,27 +271,6 @@ object StringIndexer extends DefaultParamsReadable[StringIndexer] {
 
   @Since("1.6.0")
   override def load(path: String): StringIndexer = super.load(path)
-
-  // Returns a function used to sort strings by frequency (ascending or descending).
-  // In case of equal frequency, it sorts strings by alphabet (ascending).
-  private[feature] def getSortFunc(
-      ascending: Boolean): ((String, Long), (String, Long)) => Boolean = {
-    if (ascending) {
-      case ((strA: String, freqA: Long), (strB: String, freqB: Long)) =>
-        if (freqA == freqB) {
-          strA < strB
-        } else {
-          freqA < freqB
-        }
-    } else {
-      case ((strA: String, freqA: Long), (strB: String, freqB: Long)) =>
-        if (freqA == freqB) {
-          strA < strB
-        } else {
-          freqA > freqB
-        }
-    }
-  }
 }
 
 /**
@@ -509,7 +473,7 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
     private case class Data(labelsArray: Array[Array[String]])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.labelsArray)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -521,7 +485,7 @@ object StringIndexerModel extends MLReadable[StringIndexerModel] {
     private val className = classOf[StringIndexerModel].getName
 
     override def load(path: String): StringIndexerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
 
       // We support loading old `StringIndexerModel` saved by previous Spark versions.
@@ -600,7 +564,7 @@ class IndexToString @Since("2.2.0") (@Since("1.5.0") override val uid: String)
   @Since("1.5.0")
   override def transformSchema(schema: StructType): StructType = {
     val inputColName = $(inputCol)
-    val inputDataType = schema(inputColName).dataType
+    val inputDataType = SchemaUtils.getSchemaFieldType(schema, inputColName)
     require(inputDataType.isInstanceOf[NumericType],
       s"The input column $inputColName must be a numeric type, " +
         s"but got $inputDataType.")
@@ -615,7 +579,7 @@ class IndexToString @Since("2.2.0") (@Since("1.5.0") override val uid: String)
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val inputColSchema = dataset.schema($(inputCol))
+    val inputColSchema = SchemaUtils.getSchemaField(dataset.schema, $(inputCol))
     // If the labels array is empty use column metadata
     val values = if (!isDefined(labels) || $(labels).isEmpty) {
       Attribute.fromStructField(inputColSchema)
@@ -648,47 +612,3 @@ object IndexToString extends DefaultParamsReadable[IndexToString] {
   @Since("1.6.0")
   override def load(path: String): IndexToString = super.load(path)
 }
-
-/**
- * A SQL `Aggregator` used by `StringIndexer` to count labels in string columns during fitting.
- */
-private class StringIndexerAggregator(numColumns: Int)
-  extends Aggregator[Row, Array[OpenHashMap[String, Long]], Array[OpenHashMap[String, Long]]] {
-
-  override def zero: Array[OpenHashMap[String, Long]] =
-    Array.fill(numColumns)(new OpenHashMap[String, Long]())
-
-  def reduce(
-      array: Array[OpenHashMap[String, Long]],
-      row: Row): Array[OpenHashMap[String, Long]] = {
-    for (i <- 0 until numColumns) {
-      val stringValue = row.getString(i)
-      // We don't count for null values.
-      if (stringValue != null) {
-        array(i).changeValue(stringValue, 1L, _ + 1)
-      }
-    }
-    array
-  }
-
-  def merge(
-      array1: Array[OpenHashMap[String, Long]],
-      array2: Array[OpenHashMap[String, Long]]): Array[OpenHashMap[String, Long]] = {
-    for (i <- 0 until numColumns) {
-      array2(i).foreach { case (key: String, count: Long) =>
-        array1(i).changeValue(key, count, _ + count)
-      }
-    }
-    array1
-  }
-
-  def finish(array: Array[OpenHashMap[String, Long]]): Array[OpenHashMap[String, Long]] = array
-
-  override def bufferEncoder: Encoder[Array[OpenHashMap[String, Long]]] = {
-    Encoders.kryo[Array[OpenHashMap[String, Long]]]
-  }
-
-  override def outputEncoder: Encoder[Array[OpenHashMap[String, Long]]] = {
-    Encoders.kryo[Array[OpenHashMap[String, Long]]]
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
new file mode 100644
index 0000000000000..d0046e3f0c5bc
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
@@ -0,0 +1,439 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import org.apache.hadoop.fs.Path
+
+import org.apache.spark.SparkException
+import org.apache.spark.annotation.Since
+import org.apache.spark.ml.{Estimator, Model}
+import org.apache.spark.ml.attribute.NominalAttribute
+import org.apache.spark.ml.param._
+import org.apache.spark.ml.param.shared._
+import org.apache.spark.ml.util._
+import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+
+/** Private trait for params and common methods for TargetEncoder and TargetEncoderModel */
+private[ml] trait TargetEncoderBase extends Params with HasLabelCol
+  with HasInputCol with HasInputCols with HasOutputCol with HasOutputCols with HasHandleInvalid {
+
+  /**
+   * Param for how to handle invalid data during transform().
+   * Options are 'keep' (invalid data presented as an extra categorical feature) or
+   * 'error' (throw an error).
+   * Note that this Param is only used during transform; during fitting, invalid data
+   * will result in an error.
+   * Default: "error"
+   * @group param
+   */
+  @Since("4.0.0")
+  override val handleInvalid: Param[String] = new Param[String](this, "handleInvalid",
+    "How to handle invalid data during transform(). " +
+      "Options are 'keep' (invalid data presented as an extra categorical feature) " +
+      "or 'error' (throw an error). Note that this Param is only used during transform; " +
+      "during fitting, invalid data will result in an error.",
+    ParamValidators.inArray(TargetEncoder.supportedHandleInvalids))
+
+  setDefault(handleInvalid -> TargetEncoder.ERROR_INVALID)
+
+  @Since("4.0.0")
+  val targetType: Param[String] = new Param[String](this, "targetType",
+    "Type of label considered during fit(). " +
+      "Options are 'binary' and 'continuous'. When 'binary', estimates are calculated as " +
+      "conditional probability of the target given each category. When 'continuous', " +
+      "estimates are calculated as the average of the target given each category" +
+      "Note that this Param is only used during fitting.",
+    ParamValidators.inArray(TargetEncoder.supportedTargetTypes))
+
+  setDefault(targetType -> TargetEncoder.TARGET_BINARY)
+
+  final def getTargetType: String = $(targetType)
+
+  @Since("4.0.0")
+  val smoothing: DoubleParam = new DoubleParam(this, "smoothing",
+    "Smoothing factor for encodings. Smoothing blends in-class estimates with overall estimates " +
+      "according to the relative size of the particular class on the whole dataset, reducing the " +
+      "risk of overfitting due to unreliable estimates",
+    ParamValidators.gtEq(0.0))
+
+  setDefault(smoothing -> 0.0)
+
+  final def getSmoothing: Double = $(smoothing)
+
+  private[feature] def inputFeatures: Array[String] =
+    if (isSet(inputCol)) {
+      Array($(inputCol))
+    } else if (isSet(inputCols)) {
+      $(inputCols)
+    } else {
+      Array.empty[String]
+    }
+
+  private[feature] def outputFeatures: Array[String] =
+    if (isSet(outputCol)) {
+      Array($(outputCol))
+    } else if (isSet(outputCols)) {
+      $(outputCols)
+    } else {
+      inputFeatures.map{field: String => s"${field}_indexed"}
+    }
+
+  private[feature] def validateSchema(schema: StructType, fitting: Boolean): StructType = {
+
+    require(inputFeatures.length > 0,
+      s"At least one input column must be specified.")
+
+    require(inputFeatures.length == outputFeatures.length,
+      s"The number of input columns ${inputFeatures.length} must be the same as the number of " +
+        s"output columns ${outputFeatures.length}.")
+
+    val features = if (fitting) inputFeatures :+ $(labelCol)
+    else inputFeatures
+
+    features.foreach {
+      feature => {
+        try {
+          val field = schema(feature)
+          if (!field.dataType.isInstanceOf[NumericType]) {
+            throw new SparkException(s"Data type for column ${feature} is ${field.dataType}" +
+              s", but a subclass of ${NumericType} is required.")
+          }
+        } catch {
+          case e: IllegalArgumentException =>
+            throw new SparkException(s"No column named ${feature} found on dataset.")
+        }
+      }
+    }
+    schema
+  }
+
+}
+
+/**
+ * Target Encoding maps a column of categorical indices into a numerical feature derived
+ * from the target.
+ *
+ * When `handleInvalid` is configured to 'keep', previously unseen values of a feature
+ * are mapped to the dataset overall statistics.
+ *
+ * When 'targetType' is configured to 'binary', categories are encoded as the conditional
+ * probability of the target given that category (bin counting).
+ * When 'targetType' is configured to 'continuous', categories are encoded as the average
+ * of the target given that category (mean encoding)
+ *
+ * Parameter 'smoothing' controls how in-category stats and overall stats are weighted.
+ *
+ * @note When encoding multi-column by using `inputCols` and `outputCols` params, input/output cols
+ * come in pairs, specified by the order in the arrays, and each pair is treated independently.
+ *
+ * @see `StringIndexer` for converting categorical values into category indices
+ */
+@Since("4.0.0")
+class TargetEncoder @Since("4.0.0") (@Since("4.0.0") override val uid: String)
+  extends Estimator[TargetEncoderModel] with TargetEncoderBase with DefaultParamsWritable {
+
+  @Since("4.0.0")
+  def this() = this(Identifiable.randomUID("TargetEncoder"))
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setLabelCol(value: String): this.type = set(labelCol, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setTargetType(value: String): this.type = set(targetType, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setSmoothing(value: Double): this.type = set(smoothing, value)
+
+  @Since("4.0.0")
+  override def transformSchema(schema: StructType): StructType = {
+    validateSchema(schema, fitting = true)
+  }
+
+  private def extractLabel(name: String, targetType: String): Column = {
+    val c = col(name).cast(DoubleType)
+    targetType match {
+      case TargetEncoder.TARGET_BINARY =>
+        when(c === 0 || c === 1, c)
+          .when(c.isNull || c.isNaN, c)
+          .otherwise(raise_error(
+            concat(lit("Labels for TARGET_BINARY must be {0, 1}, but got "), c)))
+
+      case TargetEncoder.TARGET_CONTINUOUS => c
+    }
+  }
+
+  private def extractValue(name: String): Column = {
+    val c = col(name).cast(DoubleType)
+    when(c >= 0 && c === c.cast(IntegerType), c)
+      .when(c.isNull, lit(TargetEncoder.NULL_CATEGORY))
+      .when(c.isNaN, raise_error(lit("Values MUST NOT be NaN")))
+      .otherwise(raise_error(
+        concat(lit("Values MUST be non-negative integers, but got "), c)))
+  }
+
+  @Since("4.0.0")
+  override def fit(dataset: Dataset[_]): TargetEncoderModel = {
+    validateSchema(dataset.schema, fitting = true)
+    val numFeatures = inputFeatures.length
+
+    // Append the unseen category, for global stats computation
+    val arrayCol = array(
+      (inputFeatures.map(v => extractValue(v)) :+ lit(TargetEncoder.UNSEEN_CATEGORY))
+        .toIndexedSeq: _*)
+
+    val checked = dataset
+      .select(extractLabel($(labelCol), $(targetType)).as("label"), arrayCol.as("array"))
+      .where(!col("label").isNaN && !col("label").isNull)
+      .select(col("label"), posexplode(col("array")).as(Seq("index", "value")))
+
+    val statCol = $(targetType) match {
+      case TargetEncoder.TARGET_BINARY => count_if(col("label") === 1)
+      case TargetEncoder.TARGET_CONTINUOUS => avg(col("label"))
+    }
+    val aggregated = checked
+      .groupBy("index", "value")
+      .agg(count(lit(1)).cast(DoubleType).as("count"), statCol.cast(DoubleType).as("stat"))
+
+    // stats: Array[Map[category, (count, stat)]]
+    val stats = Array.fill(numFeatures)(collection.mutable.Map.empty[Double, (Double, Double)])
+    aggregated.select("index", "value", "count", "stat").collect()
+      .foreach { case Row(index: Int, value: Double, count: Double, stat: Double) =>
+        if (index < numFeatures) {
+          // Assign the per-category stats to the corresponding feature
+          stats(index).update(value, (count, stat))
+        } else {
+          // Assign the global stats to all features
+          assert(value == TargetEncoder.UNSEEN_CATEGORY)
+          stats.foreach { s => s.update(value, (count, stat)) }
+        }
+      }
+
+    val model = new TargetEncoderModel(uid, stats.map(_.toMap)).setParent(this)
+    copyValues(model)
+  }
+
+  @Since("4.0.0")
+  override def copy(extra: ParamMap): TargetEncoder = defaultCopy(extra)
+}
+
+@Since("4.0.0")
+object TargetEncoder extends DefaultParamsReadable[TargetEncoder] {
+
+  // handleInvalid parameter values
+  private[feature] val KEEP_INVALID: String = "keep"
+  private[feature] val ERROR_INVALID: String = "error"
+  private[feature] val supportedHandleInvalids: Array[String] = Array(KEEP_INVALID, ERROR_INVALID)
+
+  // targetType parameter values
+  private[feature] val TARGET_BINARY: String = "binary"
+  private[feature] val TARGET_CONTINUOUS: String = "continuous"
+  private[feature] val supportedTargetTypes: Array[String] = Array(TARGET_BINARY, TARGET_CONTINUOUS)
+
+  private[feature] val UNSEEN_CATEGORY: Double = Int.MaxValue
+  private[feature] val NULL_CATEGORY: Double = -1
+
+  @Since("4.0.0")
+  override def load(path: String): TargetEncoder = super.load(path)
+}
+
+/**
+ * @param stats  Array of statistics for each input feature.
+ *               Array( Map( category, (counter, stat) ) )
+ */
+@Since("4.0.0")
+class TargetEncoderModel private[ml] (
+                     @Since("4.0.0") override val uid: String,
+                     @Since("4.0.0") val stats: Array[Map[Double, (Double, Double)]])
+  extends Model[TargetEncoderModel] with TargetEncoderBase with MLWritable {
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setInputCol(value: String): this.type = set(inputCol, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setOutputCol(value: String): this.type = set(outputCol, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setInputCols(values: Array[String]): this.type = set(inputCols, values)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setOutputCols(values: Array[String]): this.type = set(outputCols, values)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setHandleInvalid(value: String): this.type = set(handleInvalid, value)
+
+  /** @group setParam */
+  @Since("4.0.0")
+  def setSmoothing(value: Double): this.type = set(smoothing, value)
+
+  @Since("4.0.0")
+  override def transformSchema(schema: StructType): StructType = {
+    if (outputFeatures.length == stats.length) {
+      outputFeatures.filter(_ != null)
+        .foldLeft(validateSchema(schema, fitting = false)) {
+          case (newSchema, outputField) =>
+            newSchema.add(StructField(outputField, DoubleType, nullable = false))
+        }
+    } else throw new SparkException("The number of features does not match the number of " +
+      s"encodings in the model (${stats.length}). " +
+      s"Found ${outputFeatures.length} features)")
+  }
+
+  @Since("4.0.0")
+  override def transform(dataset: Dataset[_]): DataFrame = {
+    transformSchema(dataset.schema)
+
+    // encodings: Array[Map[category, encoding]]
+    val encodings: Array[Map[Double, Double]] =
+      stats.map {
+        stat =>
+          val (global_count, global_stat) = stat.get(TargetEncoder.UNSEEN_CATEGORY).get
+          stat.map {
+            case (cat, (class_count, class_stat)) => cat -> {
+              val weight = class_count / (class_count + $(smoothing)) // smoothing weight
+              $(targetType) match {
+                case TargetEncoder.TARGET_BINARY =>
+                  // calculate conditional probabilities and blend
+                  weight * (class_stat/ class_count) + (1 - weight) * (global_stat / global_count)
+                case TargetEncoder.TARGET_CONTINUOUS =>
+                  // blend means
+                  weight * class_stat + (1 - weight) * global_stat
+              }
+            }
+          }
+      }
+
+    val newCols = inputFeatures.zip(outputFeatures).zip(encodings).map {
+      case ((featureIn, featureOut), mapping) =>
+        val unseenErrMsg = s"Unseen value %s in feature $featureIn. " +
+          s"To handle unseen values, set Param handleInvalid to ${TargetEncoder.KEEP_INVALID}."
+        val unseenErrCol = raise_error(printf(lit(unseenErrMsg), col(featureIn).cast(StringType)))
+
+        val fillUnseenCol = $(handleInvalid) match {
+          case TargetEncoder.KEEP_INVALID => lit(mapping(TargetEncoder.UNSEEN_CATEGORY))
+          case _ => unseenErrCol
+        }
+        val fillNullCol = mapping.get(TargetEncoder.NULL_CATEGORY) match {
+          case Some(code) => lit(code)
+          case _ => fillUnseenCol
+        }
+        val filteredMapping = mapping.filter { case (k, _) =>
+          k != TargetEncoder.UNSEEN_CATEGORY && k != TargetEncoder.NULL_CATEGORY
+        }
+
+        val castedCol = col(featureIn).cast(DoubleType)
+        val targetCol = try_element_at(typedlit(filteredMapping), castedCol)
+        when(castedCol.isNull, fillNullCol)
+          .when(!targetCol.isNull, targetCol)
+          .otherwise(fillUnseenCol)
+          .as(featureOut, NominalAttribute.defaultAttr
+            .withName(featureOut)
+            .withNumValues(mapping.values.toSet.size)
+            .withValues(mapping.values.toSet.toArray.map(_.toString)).toMetadata())
+    }
+    dataset.withColumns(outputFeatures.toIndexedSeq, newCols.toIndexedSeq)
+  }
+
+  @Since("4.0.0")
+  override def copy(extra: ParamMap): TargetEncoderModel = {
+    val copied = new TargetEncoderModel(uid, stats)
+    copyValues(copied, extra).setParent(parent)
+  }
+
+  @Since("4.0.0")
+  override def write: MLWriter = new TargetEncoderModel.TargetEncoderModelWriter(this)
+
+  @Since("4.0.0")
+  override def toString: String = {
+    s"TargetEncoderModel: uid=$uid, " +
+      s"handleInvalid=${$(handleInvalid)}, targetType=${$(targetType)}, " +
+      s"numInputCols=${inputFeatures.length}, numOutputCols=${outputFeatures.length}, " +
+      s"smoothing=${$(smoothing)}"
+  }
+
+}
+
+@Since("4.0.0")
+object TargetEncoderModel extends MLReadable[TargetEncoderModel] {
+
+  private[TargetEncoderModel]
+  class TargetEncoderModelWriter(instance: TargetEncoderModel) extends MLWriter {
+
+    private case class Data(stats: Array[Map[Double, (Double, Double)]])
+
+    override protected def saveImpl(path: String): Unit = {
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
+      val data = Data(instance.stats)
+      val dataPath = new Path(path, "data").toString
+      sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
+    }
+  }
+
+  private class TargetEncoderModelReader extends MLReader[TargetEncoderModel] {
+
+    private val className = classOf[TargetEncoderModel].getName
+
+    override def load(path: String): TargetEncoderModel = {
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
+      val dataPath = new Path(path, "data").toString
+      val data = sparkSession.read.parquet(dataPath)
+        .select("encodings")
+        .head()
+      val stats = data.getAs[Array[Map[Double, (Double, Double)]]](0)
+      val model = new TargetEncoderModel(metadata.uid, stats)
+      metadata.getAndSetParams(model)
+      model
+    }
+  }
+
+  @Since("4.0.0")
+  override def read: MLReader[TargetEncoderModel] = new TargetEncoderModelReader
+
+  @Since("4.0.0")
+  override def load(path: String): TargetEncoderModel = super.load(path)
+}
+
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
index 29a0910124953..ea1a8c6438c8d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
@@ -349,7 +349,7 @@ object UnivariateFeatureSelectorModel extends MLReadable[UnivariateFeatureSelect
     private case class Data(selectedFeatures: Seq[Int])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.selectedFeatures.toImmutableArraySeq)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -363,7 +363,7 @@ object UnivariateFeatureSelectorModel extends MLReadable[UnivariateFeatureSelect
     private val className = classOf[UnivariateFeatureSelectorModel].getName
 
     override def load(path: String): UnivariateFeatureSelectorModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
         .select("selectedFeatures").head()
@@ -410,7 +410,9 @@ object UnivariateFeatureSelectorModel extends MLReadable[UnivariateFeatureSelect
       featuresCol: String,
       isNumericAttribute: Boolean): StructField = {
     val selector = selectedFeatures.toSet
-    val origAttrGroup = AttributeGroup.fromStructField(schema(featuresCol))
+    val origAttrGroup = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(schema, featuresCol)
+    )
     val featureAttributes: Array[Attribute] = if (origAttrGroup.attributes.nonEmpty) {
       origAttrGroup.attributes.get.zipWithIndex.filter(x => selector.contains(x._2)).map(_._1)
     } else {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
index df57e19f1a723..d767e113144c2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
@@ -187,7 +187,7 @@ object VarianceThresholdSelectorModel extends MLReadable[VarianceThresholdSelect
     private case class Data(selectedFeatures: Seq[Int])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.selectedFeatures.toImmutableArraySeq)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -201,7 +201,7 @@ object VarianceThresholdSelectorModel extends MLReadable[VarianceThresholdSelect
     private val className = classOf[VarianceThresholdSelectorModel].getName
 
     override def load(path: String): VarianceThresholdSelectorModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
         .select("selectedFeatures").head()
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
index daad072f42bcc..831a8a33afecb 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorAssembler.scala
@@ -87,17 +87,17 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     // Schema transformation.
     val schema = dataset.schema
 
-    val vectorCols = $(inputCols).filter { c =>
-      schema(c).dataType match {
-        case _: VectorUDT => true
-        case _ => false
-      }
+    val inputColsWithField = $(inputCols).map { c =>
+      c -> SchemaUtils.getSchemaField(schema, c)
+    }
+
+    val vectorCols = inputColsWithField.collect {
+      case (c, field) if field.dataType.isInstanceOf[VectorUDT] => c
     }
     val vectorColsLengths = VectorAssembler.getLengths(
       dataset, vectorCols.toImmutableArraySeq, $(handleInvalid))
 
-    val featureAttributesMap = $(inputCols).map { c =>
-      val field = schema(c)
+    val featureAttributesMap = inputColsWithField.map { case (c, field) =>
       field.dataType match {
         case DoubleType =>
           val attribute = Attribute.fromStructField(field)
@@ -144,8 +144,8 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     val assembleFunc = udf { r: Row =>
       VectorAssembler.assemble(lengths, keepInvalid)(r.toSeq: _*)
     }.asNondeterministic()
-    val args = $(inputCols).map { c =>
-      schema(c).dataType match {
+    val args = inputColsWithField.map { case (c, field) =>
+      field.dataType match {
         case DoubleType => dataset(c)
         case _: VectorUDT => dataset(c)
         case _: NumericType | BooleanType => dataset(c).cast(DoubleType).as(s"${c}_double_$uid")
@@ -161,7 +161,7 @@ class VectorAssembler @Since("1.4.0") (@Since("1.4.0") override val uid: String)
     val inputColNames = $(inputCols)
     val outputColName = $(outputCol)
     val incorrectColumns = inputColNames.flatMap { name =>
-      schema(name).dataType match {
+      SchemaUtils.getSchemaFieldType(schema, name) match {
         case _: NumericType | BooleanType => None
         case t if t.isInstanceOf[VectorUDT] => None
         case other => Some(s"Data type ${other.catalogString} of column $name is not supported.")
@@ -226,7 +226,8 @@ object VectorAssembler extends DefaultParamsReadable[VectorAssembler] {
       columns: Seq[String],
       handleInvalid: String): Map[String, Int] = {
     val groupSizes = columns.map { c =>
-      c -> AttributeGroup.fromStructField(dataset.schema(c)).size
+      val field = SchemaUtils.getSchemaField(dataset.schema, c)
+      c -> AttributeGroup.fromStructField(field).size
     }.toMap
     val missingColumns = groupSizes.filter(_._2 == -1).keys.toSeq
     val firstSizes = (missingColumns.nonEmpty, handleInvalid) match {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 4fed325e19e91..b2323d2b706f4 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -445,7 +445,9 @@ class VectorIndexerModel private[ml] (
     SchemaUtils.checkColumnType(schema, $(inputCol), dataType)
 
     // If the input metadata specifies numFeatures, compare with expected numFeatures.
-    val origAttrGroup = AttributeGroup.fromStructField(schema($(inputCol)))
+    val origAttrGroup = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(schema, $(inputCol))
+    )
     val origNumFeatures: Option[Int] = if (origAttrGroup.attributes.nonEmpty) {
       Some(origAttrGroup.attributes.get.length)
     } else {
@@ -466,7 +468,9 @@ class VectorIndexerModel private[ml] (
    * @return  Output column field.  This field does not contain non-ML metadata.
    */
   private def prepOutputField(schema: StructType): StructField = {
-    val origAttrGroup = AttributeGroup.fromStructField(schema($(inputCol)))
+    val origAttrGroup = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(schema, $(inputCol))
+    )
     val featureAttributes: Array[Attribute] = if (origAttrGroup.attributes.nonEmpty) {
       // Convert original attributes to modified attributes
       val origAttrs: Array[Attribute] = origAttrGroup.attributes.get
@@ -519,7 +523,7 @@ object VectorIndexerModel extends MLReadable[VectorIndexerModel] {
     private case class Data(numFeatures: Int, categoryMaps: Map[Int, Map[Double, Int]])
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.numFeatures, instance.categoryMaps)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -531,7 +535,7 @@ object VectorIndexerModel extends MLReadable[VectorIndexerModel] {
     private val className = classOf[VectorIndexerModel].getName
 
     override def load(path: String): VectorIndexerModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
         .select("numFeatures", "categoryMaps")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
index 5c96d07e0ca94..4abb607733e35 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSizeHint.scala
@@ -23,7 +23,7 @@ import org.apache.spark.ml.attribute.AttributeGroup
 import org.apache.spark.ml.linalg.VectorUDT
 import org.apache.spark.ml.param.{IntParam, Param, ParamMap, ParamValidators}
 import org.apache.spark.ml.param.shared.{HasHandleInvalid, HasInputCol}
-import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable}
+import org.apache.spark.ml.util.{DefaultParamsReadable, DefaultParamsWritable, Identifiable, SchemaUtils}
 import org.apache.spark.sql.{DataFrame, Dataset}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types.{StringType, StructType}
@@ -98,7 +98,9 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
     val localSize = getSize
     val localHandleInvalid = getHandleInvalid
 
-    val group = AttributeGroup.fromStructField(dataset.schema(localInputCol))
+    val group = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(dataset.schema, localInputCol)
+    )
     val newGroup = validateSchemaAndSize(dataset.schema, group)
     if (localHandleInvalid == VectorSizeHint.OPTIMISTIC_INVALID && group.size == localSize) {
       dataset.toDF()
@@ -139,7 +141,7 @@ class VectorSizeHint @Since("2.3.0") (@Since("2.3.0") override val uid: String)
     val localSize = getSize
     val localInputCol = getInputCol
 
-    val inputColType = schema(getInputCol).dataType
+    val inputColType = SchemaUtils.getSchemaFieldType(schema, getInputCol)
     require(
       inputColType.isInstanceOf[VectorUDT],
       s"Input column, $getInputCol must be of Vector type, got $inputColType"
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
index 5687ba878634a..58a44a41f0e84 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorSlicer.scala
@@ -99,7 +99,9 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri
   override def transform(dataset: Dataset[_]): DataFrame = {
     // Validity checks
     transformSchema(dataset.schema)
-    val inputAttr = AttributeGroup.fromStructField(dataset.schema($(inputCol)))
+    val inputAttr = AttributeGroup.fromStructField(
+      SchemaUtils.getSchemaField(dataset.schema, $(inputCol))
+    )
     if ($(indices).nonEmpty) {
       val size = inputAttr.size
       if (size >= 0) {
@@ -130,7 +132,9 @@ final class VectorSlicer @Since("1.5.0") (@Since("1.5.0") override val uid: Stri
 
   /** Get the feature indices in order: indices, names */
   private def getSelectedFeatureIndices(schema: StructType): Array[Int] = {
-    val nameFeatures = MetadataUtils.getFeatureIndicesFromNames(schema($(inputCol)), $(names))
+    val nameFeatures = MetadataUtils.getFeatureIndicesFromNames(
+      SchemaUtils.getSchemaField(schema, $(inputCol)), $(names)
+    )
     val indFeatures = $(indices)
     val numDistinctFeatures = (nameFeatures ++ indFeatures).distinct.length
     lazy val errMsg = "VectorSlicer requires indices and names to be disjoint" +
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index 66b56f8b88ef1..0329190a239ec 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -352,7 +352,7 @@ object Word2VecModel extends MLReadable[Word2VecModel] {
   class Word2VecModelWriter(instance: Word2VecModel) extends MLWriter {
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
 
       val wordVectors = instance.wordVectors.getVectors
       val dataPath = new Path(path, "data").toString
@@ -407,7 +407,7 @@ object Word2VecModel extends MLReadable[Word2VecModel] {
       val spark = sparkSession
       import spark.implicits._
 
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
 
       val dataPath = new Path(path, "data").toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 081a40bfbe801..d054ea8ebdb47 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -336,7 +336,8 @@ object FPGrowthModel extends MLReadable[FPGrowthModel] {
 
     override protected def saveImpl(path: String): Unit = {
       val extraMetadata: JObject = Map("numTrainingRecords" -> instance.numTrainingRecords)
-      DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata = Some(extraMetadata))
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession,
+        extraMetadata = Some(extraMetadata))
       val dataPath = new Path(path, "data").toString
       instance.freqItemsets.write.parquet(dataPath)
     }
@@ -349,7 +350,7 @@ object FPGrowthModel extends MLReadable[FPGrowthModel] {
 
     override def load(path: String): FPGrowthModel = {
       implicit val format = DefaultFormats
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
       val numTrainingRecords = if (major < 2 || (major == 2 && minor < 4)) {
         // 2.3 and before don't store the count
diff --git a/mllib/src/main/scala/org/apache/spark/ml/functions.scala b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
index 46c437812233a..48bd1a7207e90 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/functions.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/functions.scala
@@ -18,44 +18,13 @@
 package org.apache.spark.ml
 
 import org.apache.spark.annotation.Since
-import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors}
-import org.apache.spark.mllib.linalg.{SparseVector => OldSparseVector, Vector => OldVector}
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.functions.lit
 
 // scalastyle:off
 @Since("3.0.0")
 object functions {
 // scalastyle:on
-  private[spark] lazy val vectorToArrayUdf = udf { vec: Any =>
-    vec match {
-      case v: Vector => v.toArray
-      case v: OldVector => v.toArray
-      case v => throw new IllegalArgumentException(
-        "function vector_to_array requires a non-null input argument and input type must be " +
-        "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
-        s"but got ${ if (v == null) "null" else v.getClass.getName }.")
-    }
-  }.asNonNullable()
-
-  private[spark] lazy val vectorToArrayFloatUdf = udf { vec: Any =>
-    vec match {
-      case v: SparseVector =>
-        val data = new Array[Float](v.size)
-        v.foreachNonZero { (index, value) => data(index) = value.toFloat }
-        data
-      case v: Vector => v.toArray.map(_.toFloat)
-      case v: OldSparseVector =>
-        val data = new Array[Float](v.size)
-        v.foreachNonZero { (index, value) => data(index) = value.toFloat }
-        data
-      case v: OldVector => v.toArray.map(_.toFloat)
-      case v => throw new IllegalArgumentException(
-        "function vector_to_array requires a non-null input argument and input type must be " +
-        "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
-        s"but got ${ if (v == null) "null" else v.getClass.getName }.")
-    }
-  }.asNonNullable()
 
   /**
    * Converts a column of MLlib sparse/dense vectors into a column of dense arrays.
@@ -64,21 +33,8 @@ object functions {
    * @return an array&lt;float&gt; if dtype is float32, or array&lt;double&gt; if dtype is float64
    * @since 3.0.0
    */
-  def vector_to_array(v: Column, dtype: String = "float64"): Column = {
-    if (dtype == "float64") {
-      vectorToArrayUdf(v)
-    } else if (dtype == "float32") {
-      vectorToArrayFloatUdf(v)
-    } else {
-      throw new IllegalArgumentException(
-        s"Unsupported dtype: $dtype. Valid values: float64, float32."
-      )
-    }
-  }
-
-  private[spark] lazy val arrayToVectorUdf = udf { array: Seq[Double] =>
-    Vectors.dense(array.toArray)
-  }
+  def vector_to_array(v: Column, dtype: String = "float64"): Column =
+    Column.internalFn("vector_to_array", v, lit(dtype))
 
   /**
    * Converts a column of array of numeric type into a column of dense vectors in MLlib.
@@ -86,7 +42,5 @@ object functions {
    * @return a column of type `org.apache.spark.ml.linalg.Vector`
    * @since 3.1.0
    */
-  def array_to_vector(v: Column): Column = {
-    arrayToVectorUdf(v)
-  }
+  def array_to_vector(v: Column): Column = Column.internalFn("array_to_vector", v)
 }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 50f94a5799444..5899bf891ec9d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -517,7 +517,7 @@ class ALSModel private[ml] (
     )
 
     ratings.groupBy(srcOutputColumn)
-      .agg(collect_top_k(struct(ratingColumn, dstOutputColumn), num, false))
+      .agg(ALSModel.collect_top_k(struct(ratingColumn, dstOutputColumn), num, false))
       .as[(Int, Seq[(Float, Int)])]
       .map(t => (t._1, t._2.map(p => (p._2, p._1))))
       .toDF(srcOutputColumn, recommendColumn)
@@ -546,6 +546,9 @@ object ALSModel extends MLReadable[ALSModel] {
   private val Drop = "drop"
   private[recommendation] final val supportedColdStartStrategies = Array(NaN, Drop)
 
+  private[recommendation] def collect_top_k(e: Column, num: Int, reverse: Boolean): Column =
+    Column.internalFn("collect_top_k", e, lit(num), lit(reverse))
+
   @Since("1.6.0")
   override def read: MLReader[ALSModel] = new ALSModelReader
 
@@ -556,7 +559,7 @@ object ALSModel extends MLReadable[ALSModel] {
 
     override protected def saveImpl(path: String): Unit = {
       val extraMetadata = "rank" -> instance.rank
-      DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata))
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession, Some(extraMetadata))
       val userPath = new Path(path, "userFactors").toString
       instance.userFactors.write.format("parquet").save(userPath)
       val itemPath = new Path(path, "itemFactors").toString
@@ -570,7 +573,7 @@ object ALSModel extends MLReadable[ALSModel] {
     private val className = classOf[ALSModel].getName
 
     override def load(path: String): ALSModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       implicit val format = DefaultFormats
       val rank = (metadata.metadata \ "rank").extract[Int]
       val userPath = new Path(path, "userFactors").toString
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index d77d79dae4b8c..6451cbf0329d2 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -494,7 +494,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: coefficients, intercept, scale
       val data = Data(instance.coefficients, instance.intercept, instance.scale)
       val dataPath = new Path(path, "data").toString
@@ -508,7 +508,7 @@ object AFTSurvivalRegressionModel extends MLReadable[AFTSurvivalRegressionModel]
     private val className = classOf[AFTSurvivalRegressionModel].getName
 
     override def load(path: String): AFTSurvivalRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 481e8c8357f16..dace99f214b16 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -302,7 +302,7 @@ object DecisionTreeRegressionModel extends MLReadable[DecisionTreeRegressionMode
     override protected def saveImpl(path: String): Unit = {
       val extraMetadata: JObject = Map(
         "numFeatures" -> instance.numFeatures)
-      DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata))
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession, Some(extraMetadata))
       val (nodeData, _) = NodeData.build(instance.rootNode, 0)
       val dataPath = new Path(path, "data").toString
       val numDataParts = NodeData.inferNumPartitions(instance.numNodes)
@@ -318,7 +318,7 @@ object DecisionTreeRegressionModel extends MLReadable[DecisionTreeRegressionMode
 
     override def load(path: String): DecisionTreeRegressionModel = {
       implicit val format = DefaultFormats
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val numFeatures = (metadata.metadata \ "numFeatures").extract[Int]
       val root = loadTreeNodes(path, metadata, sparkSession)
       val model = new DecisionTreeRegressionModel(metadata.uid, root, numFeatures)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index 8c797295e6715..182107a443c1c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -504,7 +504,7 @@ object FMRegressionModel extends MLReadable[FMRegressionModel] {
         factors: Matrix)
 
     override protected def saveImpl(path: String): Unit = {
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       val data = Data(instance.intercept, instance.linear, instance.factors)
       val dataPath = new Path(path, "data").toString
       sparkSession.createDataFrame(Seq(data)).write.parquet(dataPath)
@@ -516,7 +516,7 @@ object FMRegressionModel extends MLReadable[FMRegressionModel] {
     private val className = classOf[FMRegressionModel].getName
 
     override def load(path: String): FMRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 181a1a03e6f38..dc0b553e2c91d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1141,7 +1141,7 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: intercept, coefficients
       val data = Data(instance.intercept, instance.coefficients)
       val dataPath = new Path(path, "data").toString
@@ -1156,7 +1156,7 @@ object GeneralizedLinearRegressionModel extends MLReadable[GeneralizedLinearRegr
     private val className = classOf[GeneralizedLinearRegressionModel].getName
 
     override def load(path: String): GeneralizedLinearRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 29d8a00a43844..d624270af89d6 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -301,7 +301,7 @@ object IsotonicRegressionModel extends MLReadable[IsotonicRegressionModel] {
 
     override protected def saveImpl(path: String): Unit = {
       // Save metadata and Params
-      DefaultParamsWriter.saveMetadata(instance, path, sc)
+      DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
       // Save model data: boundaries, predictions, isotonic
       val data = Data(
         instance.oldModel.boundaries, instance.oldModel.predictions, instance.oldModel.isotonic)
@@ -316,7 +316,7 @@ object IsotonicRegressionModel extends MLReadable[IsotonicRegressionModel] {
     private val className = classOf[IsotonicRegressionModel].getName
 
     override def load(path: String): IsotonicRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.parquet(dataPath)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index d5dce782770b4..abac9db8df024 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -780,7 +780,7 @@ private class InternalLinearRegressionModelWriter
     val instance = stage.asInstanceOf[LinearRegressionModel]
     val sc = sparkSession.sparkContext
     // Save metadata and Params
-    DefaultParamsWriter.saveMetadata(instance, path, sc)
+    DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
     // Save model data: intercept, coefficients, scale
     val data = Data(instance.intercept, instance.coefficients, instance.scale)
     val dataPath = new Path(path, "data").toString
@@ -824,7 +824,7 @@ object LinearRegressionModel extends MLReadable[LinearRegressionModel] {
     private val className = classOf[LinearRegressionModel].getName
 
     override def load(path: String): LinearRegressionModel = {
-      val metadata = DefaultParamsReader.loadMetadata(path, sc, className)
+      val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
       val dataPath = new Path(path, "data").toString
       val data = sparkSession.read.format("parquet").load(dataPath)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
index 7a27b32aa24c5..4c3242c132090 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/stat/Summarizer.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, ImplicitCastInputT
 import org.apache.spark.sql.catalyst.expressions.aggregate.TypedImperativeAggregate
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -248,16 +249,13 @@ private[ml] class SummaryBuilderImpl(
   ) extends SummaryBuilder {
 
   override def summary(featuresCol: Column, weightCol: Column): Column = {
-
-    val agg = SummaryBuilderImpl.MetricsAggregate(
+    SummaryBuilderImpl.MetricsAggregate(
       requestedMetrics,
       requestedCompMetrics,
-      featuresCol.expr,
-      weightCol.expr,
+      featuresCol,
+      weightCol,
       mutableAggBufferOffset = 0,
       inputAggBufferOffset = 0)
-
-    new Column(agg.toAggregateExpression())
   }
 }
 
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 6a7615fb149b8..c06a17289fa08 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -471,10 +471,10 @@ private[ml] object EnsembleModelReadWrite {
       path: String,
       sparkSession: SparkSession,
       extraMetadata: JObject): Unit = {
-    DefaultParamsWriter.saveMetadata(instance, path, sparkSession.sparkContext, Some(extraMetadata))
+    DefaultParamsWriter.saveMetadata(instance, path, sparkSession, Some(extraMetadata))
     val treesMetadataWeights = instance.trees.zipWithIndex.map { case (tree, treeID) =>
       (treeID,
-        DefaultParamsWriter.getMetadataToSave(tree.asInstanceOf[Params], sparkSession.sparkContext),
+        DefaultParamsWriter.getMetadataToSave(tree.asInstanceOf[Params], sparkSession),
         instance.treeWeights(treeID))
     }
     val treesMetadataPath = new Path(path, "treesMetadata").toString
@@ -510,7 +510,7 @@ private[ml] object EnsembleModelReadWrite {
       treeClassName: String): (Metadata, Array[(Metadata, Node)], Array[Double]) = {
     import sparkSession.implicits._
     implicit val format = DefaultFormats
-    val metadata = DefaultParamsReader.loadMetadata(path, sparkSession.sparkContext, className)
+    val metadata = DefaultParamsReader.loadMetadata(path, sparkSession, className)
 
     // Get impurity to construct ImpurityCalculator for each node
     val impurityType: String = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
index 867f35a5d2b80..5953afb7ba781 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/CrossValidator.scala
@@ -248,7 +248,7 @@ object CrossValidator extends MLReadable[CrossValidator] {
     ValidatorParams.validateParams(instance)
 
     override protected def saveImpl(path: String): Unit =
-      ValidatorParams.saveImpl(path, instance, sc)
+      ValidatorParams.saveImpl(path, instance, sparkSession)
   }
 
   private class CrossValidatorReader extends MLReader[CrossValidator] {
@@ -260,7 +260,7 @@ object CrossValidator extends MLReadable[CrossValidator] {
       implicit val format = DefaultFormats
 
       val (metadata, estimator, evaluator, estimatorParamMaps) =
-        ValidatorParams.loadImpl(path, sc, className)
+        ValidatorParams.loadImpl(path, sparkSession, className)
       val cv = new CrossValidator(metadata.uid)
         .setEstimator(estimator)
         .setEvaluator(evaluator)
@@ -403,7 +403,7 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
       import org.json4s.JsonDSL._
       val extraMetadata = ("avgMetrics" -> instance.avgMetrics.toImmutableArraySeq) ~
         ("persistSubModels" -> persistSubModels)
-      ValidatorParams.saveImpl(path, instance, sc, Some(extraMetadata))
+      ValidatorParams.saveImpl(path, instance, sparkSession, Some(extraMetadata))
       val bestModelPath = new Path(path, "bestModel").toString
       instance.bestModel.asInstanceOf[MLWritable].save(bestModelPath)
       if (persistSubModels) {
@@ -431,10 +431,10 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
       implicit val format = DefaultFormats
 
       val (metadata, estimator, evaluator, estimatorParamMaps) =
-        ValidatorParams.loadImpl(path, sc, className)
+        ValidatorParams.loadImpl(path, sparkSession, className)
       val numFolds = (metadata.params \ "numFolds").extract[Int]
       val bestModelPath = new Path(path, "bestModel").toString
-      val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sc)
+      val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sparkSession)
       val avgMetrics = (metadata.metadata \ "avgMetrics").extract[Seq[Double]].toArray
       val persistSubModels = (metadata.metadata \ "persistSubModels")
         .extractOrElse[Boolean](false)
@@ -448,7 +448,7 @@ object CrossValidatorModel extends MLReadable[CrossValidatorModel] {
           for (paramIndex <- estimatorParamMaps.indices) {
             val modelPath = new Path(splitPath, paramIndex.toString).toString
             _subModels(splitIndex)(paramIndex) =
-              DefaultParamsReader.loadParamsInstance(modelPath, sc)
+              DefaultParamsReader.loadParamsInstance(modelPath, sparkSession)
           }
         }
         Some(_subModels)
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
index 8e33ae6aad28b..baf14f11c424f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/TrainValidationSplit.scala
@@ -216,7 +216,7 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
     ValidatorParams.validateParams(instance)
 
     override protected def saveImpl(path: String): Unit =
-      ValidatorParams.saveImpl(path, instance, sc)
+      ValidatorParams.saveImpl(path, instance, sparkSession)
   }
 
   private class TrainValidationSplitReader extends MLReader[TrainValidationSplit] {
@@ -228,7 +228,7 @@ object TrainValidationSplit extends MLReadable[TrainValidationSplit] {
       implicit val format = DefaultFormats
 
       val (metadata, estimator, evaluator, estimatorParamMaps) =
-        ValidatorParams.loadImpl(path, sc, className)
+        ValidatorParams.loadImpl(path, sparkSession, className)
       val tvs = new TrainValidationSplit(metadata.uid)
         .setEstimator(estimator)
         .setEvaluator(evaluator)
@@ -368,7 +368,7 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
       import org.json4s.JsonDSL._
       val extraMetadata = ("validationMetrics" -> instance.validationMetrics.toImmutableArraySeq) ~
         ("persistSubModels" -> persistSubModels)
-      ValidatorParams.saveImpl(path, instance, sc, Some(extraMetadata))
+      ValidatorParams.saveImpl(path, instance, sparkSession, Some(extraMetadata))
       val bestModelPath = new Path(path, "bestModel").toString
       instance.bestModel.asInstanceOf[MLWritable].save(bestModelPath)
       if (persistSubModels) {
@@ -393,9 +393,9 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
       implicit val format = DefaultFormats
 
       val (metadata, estimator, evaluator, estimatorParamMaps) =
-        ValidatorParams.loadImpl(path, sc, className)
+        ValidatorParams.loadImpl(path, sparkSession, className)
       val bestModelPath = new Path(path, "bestModel").toString
-      val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sc)
+      val bestModel = DefaultParamsReader.loadParamsInstance[Model[_]](bestModelPath, sparkSession)
       val validationMetrics = (metadata.metadata \ "validationMetrics").extract[Seq[Double]].toArray
       val persistSubModels = (metadata.metadata \ "persistSubModels")
         .extractOrElse[Boolean](false)
@@ -406,7 +406,7 @@ object TrainValidationSplitModel extends MLReadable[TrainValidationSplitModel] {
         for (paramIndex <- estimatorParamMaps.indices) {
           val modelPath = new Path(subModelsPath, paramIndex.toString).toString
           _subModels(paramIndex) =
-            DefaultParamsReader.loadParamsInstance(modelPath, sc)
+            DefaultParamsReader.loadParamsInstance(modelPath, sparkSession)
         }
         Some(_subModels)
       } else None
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
index 77ab7d45eda43..950ee1e58202f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tuning/ValidatorParams.scala
@@ -21,13 +21,13 @@ import org.apache.hadoop.fs.Path
 import org.json4s._
 import org.json4s.jackson.JsonMethods._
 
-import org.apache.spark.SparkContext
 import org.apache.spark.ml.{Estimator, Model}
 import org.apache.spark.ml.evaluation.Evaluator
 import org.apache.spark.ml.param.{Param, ParamMap, ParamPair, Params}
 import org.apache.spark.ml.param.shared.HasSeed
 import org.apache.spark.ml.util._
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
 
@@ -123,7 +123,7 @@ private[ml] object ValidatorParams {
   def saveImpl(
       path: String,
       instance: ValidatorParams,
-      sc: SparkContext,
+      spark: SparkSession,
       extraMetadata: Option[JObject] = None): Unit = {
     import org.json4s.JsonDSL._
 
@@ -160,7 +160,7 @@ private[ml] object ValidatorParams {
       }.toList ++ List("estimatorParamMaps" -> parse(estimatorParamMapsJson))
     )
 
-    DefaultParamsWriter.saveMetadata(instance, path, sc, extraMetadata, Some(jsonParams))
+    DefaultParamsWriter.saveMetadata(instance, path, spark, extraMetadata, Some(jsonParams))
 
     val evaluatorPath = new Path(path, "evaluator").toString
     instance.getEvaluator.asInstanceOf[MLWritable].save(evaluatorPath)
@@ -175,16 +175,16 @@ private[ml] object ValidatorParams {
    */
   def loadImpl[M <: Model[M]](
       path: String,
-      sc: SparkContext,
+      spark: SparkSession,
       expectedClassName: String): (Metadata, Estimator[M], Evaluator, Array[ParamMap]) = {
 
-    val metadata = DefaultParamsReader.loadMetadata(path, sc, expectedClassName)
+    val metadata = DefaultParamsReader.loadMetadata(path, spark, expectedClassName)
 
     implicit val format = DefaultFormats
     val evaluatorPath = new Path(path, "evaluator").toString
-    val evaluator = DefaultParamsReader.loadParamsInstance[Evaluator](evaluatorPath, sc)
+    val evaluator = DefaultParamsReader.loadParamsInstance[Evaluator](evaluatorPath, spark)
     val estimatorPath = new Path(path, "estimator").toString
-    val estimator = DefaultParamsReader.loadParamsInstance[Estimator[M]](estimatorPath, sc)
+    val estimator = DefaultParamsReader.loadParamsInstance[Estimator[M]](estimatorPath, spark)
 
     val uidToParams = Map(evaluator.uid -> evaluator) ++ MetaAlgorithmReadWrite.getUidMap(estimator)
 
@@ -202,7 +202,7 @@ private[ml] object ValidatorParams {
             } else {
               val relativePath = param.jsonDecode(pInfo("value")).toString
               val value = DefaultParamsReader
-                .loadParamsInstance[MLWritable](new Path(path, relativePath).toString, sc)
+                .loadParamsInstance[MLWritable](new Path(path, relativePath).toString, spark)
               param -> value
             }
           }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
index 021595f76c247..f9d9056c801e8 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala
@@ -382,7 +382,7 @@ trait DefaultParamsReadable[T] extends MLReadable[T] {
 private[ml] class DefaultParamsWriter(instance: Params) extends MLWriter {
 
   override protected def saveImpl(path: String): Unit = {
-    DefaultParamsWriter.saveMetadata(instance, path, sc)
+    DefaultParamsWriter.saveMetadata(instance, path, sparkSession)
   }
 }
 
@@ -403,31 +403,87 @@ private[ml] object DefaultParamsWriter {
    *                  Otherwise, all [[org.apache.spark.ml.param.Param]]s are encoded using
    *                  [[org.apache.spark.ml.param.Param.jsonEncode()]].
    */
+  @deprecated("use saveMetadata with SparkSession", "4.0.0")
   def saveMetadata(
       instance: Params,
       path: String,
       sc: SparkContext,
       extraMetadata: Option[JObject] = None,
-      paramMap: Option[JValue] = None): Unit = {
+      paramMap: Option[JValue] = None): Unit =
+    saveMetadata(
+      instance,
+      path,
+      SparkSession.builder().sparkContext(sc).getOrCreate(),
+      extraMetadata,
+      paramMap)
+
+  /**
+   * Saves metadata + Params to: path + "/metadata"
+   *  - class
+   *  - timestamp
+   *  - sparkVersion
+   *  - uid
+   *  - defaultParamMap
+   *  - paramMap
+   *  - (optionally, extra metadata)
+   *
+   * @param extraMetadata  Extra metadata to be saved at same level as uid, paramMap, etc.
+   * @param paramMap  If given, this is saved in the "paramMap" field.
+   *                  Otherwise, all [[org.apache.spark.ml.param.Param]]s are encoded using
+   *                  [[org.apache.spark.ml.param.Param.jsonEncode()]].
+   */
+  def saveMetadata(
+      instance: Params,
+      path: String,
+      spark: SparkSession,
+      extraMetadata: Option[JObject],
+      paramMap: Option[JValue]): Unit = {
     val metadataPath = new Path(path, "metadata").toString
-    val metadataJson = getMetadataToSave(instance, sc, extraMetadata, paramMap)
-    val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
+    val metadataJson = getMetadataToSave(instance, spark, extraMetadata, paramMap)
     // Note that we should write single file. If there are more than one row
     // it produces more partitions.
     spark.createDataFrame(Seq(Tuple1(metadataJson))).write.text(metadataPath)
   }
 
+  def saveMetadata(
+      instance: Params,
+      path: String,
+      spark: SparkSession,
+      extraMetadata: Option[JObject]): Unit =
+    saveMetadata(instance, path, spark, extraMetadata, None)
+
+  def saveMetadata(instance: Params, path: String, spark: SparkSession): Unit =
+    saveMetadata(instance, path, spark, None, None)
+
   /**
    * Helper for [[saveMetadata()]] which extracts the JSON to save.
    * This is useful for ensemble models which need to save metadata for many sub-models.
    *
    * @see [[saveMetadata()]] for details on what this includes.
    */
+  @deprecated("use getMetadataToSave with SparkSession", "4.0.0")
   def getMetadataToSave(
       instance: Params,
       sc: SparkContext,
       extraMetadata: Option[JObject] = None,
-      paramMap: Option[JValue] = None): String = {
+      paramMap: Option[JValue] = None): String =
+    getMetadataToSave(
+      instance,
+      SparkSession.builder().sparkContext(sc).getOrCreate(),
+      extraMetadata,
+      paramMap)
+
+  /**
+   * Helper for [[saveMetadata()]] which extracts the JSON to save.
+   * This is useful for ensemble models which need to save metadata for many sub-models.
+   *
+   * @see [[saveMetadata()]] for details on what this includes.
+   */
+  def getMetadataToSave(
+      instance: Params,
+      spark: SparkSession,
+      extraMetadata: Option[JObject],
+      paramMap: Option[JValue]): String = {
     val uid = instance.uid
     val cls = instance.getClass.getName
     val params = instance.paramMap.toSeq
@@ -440,7 +496,7 @@ private[ml] object DefaultParamsWriter {
     }.toList)
     val basicMetadata = ("class" -> cls) ~
       ("timestamp" -> System.currentTimeMillis()) ~
-      ("sparkVersion" -> sc.version) ~
+      ("sparkVersion" -> spark.version) ~
       ("uid" -> uid) ~
       ("paramMap" -> jsonParams) ~
       ("defaultParamMap" -> jsonDefaultParams)
@@ -453,6 +509,17 @@ private[ml] object DefaultParamsWriter {
     val metadataJson: String = compact(render(metadata))
     metadataJson
   }
+
+  def getMetadataToSave(
+      instance: Params,
+      spark: SparkSession,
+      extraMetadata: Option[JObject]): String =
+    getMetadataToSave(instance, spark, extraMetadata, None)
+
+  def getMetadataToSave(
+      instance: Params,
+      spark: SparkSession): String =
+    getMetadataToSave(instance, spark, None, None)
 }
 
 /**
@@ -466,7 +533,7 @@ private[ml] object DefaultParamsWriter {
 private[ml] class DefaultParamsReader[T] extends MLReader[T] {
 
   override def load(path: String): T = {
-    val metadata = DefaultParamsReader.loadMetadata(path, sc)
+    val metadata = DefaultParamsReader.loadMetadata(path, sparkSession)
     val cls = Utils.classForName(metadata.className)
     val instance =
       cls.getConstructor(classOf[String]).newInstance(metadata.uid).asInstanceOf[Params]
@@ -586,13 +653,22 @@ private[ml] object DefaultParamsReader {
    * @param expectedClassName  If non empty, this is checked against the loaded metadata.
    * @throws IllegalArgumentException if expectedClassName is specified and does not match metadata
    */
-  def loadMetadata(path: String, sc: SparkContext, expectedClassName: String = ""): Metadata = {
+  @deprecated("use loadMetadata with SparkSession", "4.0.0")
+  def loadMetadata(path: String, sc: SparkContext, expectedClassName: String = ""): Metadata =
+    loadMetadata(
+      path,
+      SparkSession.builder().sparkContext(sc).getOrCreate(),
+      expectedClassName)
+
+  def loadMetadata(path: String, spark: SparkSession, expectedClassName: String): Metadata = {
     val metadataPath = new Path(path, "metadata").toString
-    val spark = SparkSession.getActiveSession.get
     val metadataStr = spark.read.text(metadataPath).first().getString(0)
     parseMetadata(metadataStr, expectedClassName)
   }
 
+  def loadMetadata(path: String, spark: SparkSession): Metadata =
+    loadMetadata(path, spark, "")
+
   /**
    * Parse metadata JSON string produced by [[DefaultParamsWriter.getMetadataToSave()]].
    * This is a helper function for [[loadMetadata()]].
@@ -623,15 +699,23 @@ private[ml] object DefaultParamsReader {
    * Load a `Params` instance from the given path, and return it.
    * This assumes the instance implements [[MLReadable]].
    */
+  @deprecated("use loadParamsInstance with SparkSession", "4.0.0")
   def loadParamsInstance[T](path: String, sc: SparkContext): T =
-    loadParamsInstanceReader(path, sc).load(path)
+    loadParamsInstance[T](path, SparkSession.builder().sparkContext(sc).getOrCreate())
+
+  def loadParamsInstance[T](path: String, spark: SparkSession): T =
+    loadParamsInstanceReader(path, spark).load(path)
 
   /**
    * Load a `Params` instance reader from the given path, and return it.
    * This assumes the instance implements [[MLReadable]].
    */
-  def loadParamsInstanceReader[T](path: String, sc: SparkContext): MLReader[T] = {
-    val metadata = DefaultParamsReader.loadMetadata(path, sc)
+  @deprecated("use loadParamsInstanceReader with SparkSession", "4.0.0")
+  def loadParamsInstanceReader[T](path: String, sc: SparkContext): MLReader[T] =
+    loadParamsInstanceReader[T](path, SparkSession.builder().sparkContext(sc).getOrCreate())
+
+  def loadParamsInstanceReader[T](path: String, spark: SparkSession): MLReader[T] = {
+    val metadata = DefaultParamsReader.loadMetadata(path, spark)
     val cls = Utils.classForName(metadata.className)
     cls.getMethod("read").invoke(null).asInstanceOf[MLReader[T]]
   }
diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
index e3f93b3e481db..5386641838726 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/util/SchemaUtils.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.ml.util
 
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.ml.attribute._
 import org.apache.spark.ml.linalg.VectorUDT
+import org.apache.spark.sql.catalyst.util.{AttributeNameParser, QuotingUtils}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
-
 /**
  * Utils for handling schemas.
  */
@@ -39,7 +41,7 @@ private[spark] object SchemaUtils {
       colName: String,
       dataType: DataType,
       msg: String = ""): Unit = {
-    val actualDataType = schema(colName).dataType
+    val actualDataType = SchemaUtils.getSchemaField(schema, colName).dataType
     val message = if (msg != null && msg.trim.length > 0) " " + msg else ""
     require(actualDataType.equals(dataType),
       s"Column $colName must be of type ${dataType.getClass}:${dataType.catalogString} " +
@@ -72,7 +74,7 @@ private[spark] object SchemaUtils {
       schema: StructType,
       colName: String,
       msg: String = ""): Unit = {
-    val actualDataType = schema(colName).dataType
+    val actualDataType = getSchemaFieldType(schema, colName)
     val message = if (msg != null && msg.trim.length > 0) " " + msg else ""
     require(actualDataType.isInstanceOf[NumericType],
       s"Column $colName must be of type ${NumericType.simpleString} but was actually of type " +
@@ -204,4 +206,47 @@ private[spark] object SchemaUtils {
       new ArrayType(FloatType, false))
     checkColumnTypes(schema, colName, typeCandidates)
   }
+
+  def toSQLId(parts: String): String = {
+    AttributeNameParser.parseAttributeName(parts).map(QuotingUtils.quoteIdentifier).mkString(".")
+  }
+
+  /**
+   * Get schema field.
+   * @param schema input schema
+   * @param colName column name, nested column name is supported.
+   */
+  def getSchemaField(schema: StructType, colName: String): StructField = {
+    val colSplits = AttributeNameParser.parseAttributeName(colName)
+    val fieldOpt = schema.findNestedField(colSplits, resolver = SQLConf.get.resolver)
+    if (fieldOpt.isEmpty) {
+      throw new SparkIllegalArgumentException(
+        errorClass = "FIELD_NOT_FOUND",
+        messageParameters = Map(
+          "fieldName" -> toSQLId(colName),
+          "fields" -> schema.fields.map(f => toSQLId(f.name)).mkString(", "))
+      )
+    }
+    fieldOpt.get._2
+  }
+
+  /**
+   * Get schema field type.
+   * @param schema input schema
+   * @param colName column name, nested column name is supported.
+   */
+  def getSchemaFieldType(schema: StructType, colName: String): DataType = {
+    getSchemaField(schema, colName).dataType
+  }
+
+  /**
+   * Check whether a certain column name exists in the schema.
+   * @param schema input schema
+   * @param colName column name, nested column name is supported.
+   */
+  def checkSchemaFieldExist(schema: StructType, colName: String): Boolean = {
+    val colSplits = AttributeNameParser.parseAttributeName(colName)
+    val fieldOpt = schema.findNestedField(colSplits, resolver = SQLConf.get.resolver)
+    fieldOpt.isDefined
+  }
 }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
index 1565447af7b91..7caf36e85c791 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/evaluation/MultilabelMetrics.scala
@@ -121,7 +121,7 @@ class MultilabelMetrics @Since("1.2.0") (predictionAndLabels: RDD[(Array[Double]
   def f1Measure(label: Double): Double = {
     val p = precision(label)
     val r = recall(label)
-    if((p + r) == 0) 0.0 else 2 * p * r / (p + r)
+    if ((p + r) == 0) 0.0 else 2 * p * r / (p + r)
   }
 
   private lazy val sumTp = summary.tpPerClass.values.sum
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
index e23423e4c004e..1257d2ccfbfb1 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/MLUtils.scala
@@ -119,7 +119,7 @@ object MLUtils extends Logging {
       ).resolveRelation(checkFilesExist = false))
       .select("value")
 
-    import lines.sqlContext.implicits._
+    import lines.sparkSession.implicits._
 
     lines.select(trim($"value").as("line"))
       .filter(not((length($"line") === 0).or($"line".startsWith("#"))))
diff --git a/mllib/src/main/scala/org/apache/spark/sql/ml/InternalFunctionRegistration.scala b/mllib/src/main/scala/org/apache/spark/sql/ml/InternalFunctionRegistration.scala
new file mode 100644
index 0000000000000..b3a3bc1a791ba
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/sql/ml/InternalFunctionRegistration.scala
@@ -0,0 +1,103 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.ml
+
+import org.apache.spark.ml.linalg.{SparseVector, Vector, Vectors}
+import org.apache.spark.mllib.linalg.{SparseVector => OldSparseVector, Vector => OldVector}
+import org.apache.spark.sql.{SparkSessionExtensions, SparkSessionExtensionsProvider}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.expressions.{Expression, StringLiteral}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.functions.udf
+import org.apache.spark.sql.internal.UserDefinedFunctionUtils.toScalaUDF
+
+/**
+ * Register a couple ML vector conversion UDFs in the internal function registry.
+ *
+ * This is a bit of a hack because we use the [[SparkSessionExtensions]] mechanism to register
+ * functions in a global registry. The use of a Scala object makes sure we only do this once.
+ */
+object InternalFunctionRegistration {
+  def apply(): Unit = ()
+
+  private def invokeUdf(udf: UserDefinedFunction, e: Expression): Expression = {
+    toScalaUDF(udf.asInstanceOf[SparkUserDefinedFunction], e :: Nil)
+  }
+
+  private def registerFunction(name: String)(builder: Seq[Expression] => Expression): Unit = {
+    FunctionRegistry.internal.createOrReplaceTempFunction(name, builder, "internal")
+  }
+
+  private val vectorToArrayUdf = udf { vec: Any =>
+    vec match {
+      case v: Vector => v.toArray
+      case v: OldVector => v.toArray
+      case v => throw new IllegalArgumentException(
+        "function vector_to_array requires a non-null input argument and input type must be " +
+          "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
+          s"but got ${if (v == null) "null" else v.getClass.getName}.")
+    }
+  }.asNonNullable()
+
+  private val vectorToArrayFloatUdf = udf { vec: Any =>
+    vec match {
+      case v: SparseVector =>
+        val data = new Array[Float](v.size)
+        v.foreachNonZero { (index, value) => data(index) = value.toFloat }
+        data
+      case v: Vector => v.toArray.map(_.toFloat)
+      case v: OldSparseVector =>
+        val data = new Array[Float](v.size)
+        v.foreachNonZero { (index, value) => data(index) = value.toFloat }
+        data
+      case v: OldVector => v.toArray.map(_.toFloat)
+      case v => throw new IllegalArgumentException(
+        "function vector_to_array requires a non-null input argument and input type must be " +
+          "`org.apache.spark.ml.linalg.Vector` or `org.apache.spark.mllib.linalg.Vector`, " +
+          s"but got ${if (v == null) "null" else v.getClass.getName}.")
+    }
+  }.asNonNullable()
+
+  registerFunction("vector_to_array") {
+    case Seq(input, StringLiteral("float64")) =>
+      invokeUdf(vectorToArrayUdf, input)
+    case Seq(input, StringLiteral("float32")) =>
+      invokeUdf(vectorToArrayFloatUdf, input)
+    case Seq(_, invalid @ StringLiteral(_)) =>
+      throw QueryCompilationErrors.invalidParameter("DTYPE", "vector_to_array", "dtype", invalid)
+    case Seq(_, invalid) =>
+      throw QueryCompilationErrors.invalidStringParameter("vector_to_array", "dtype", invalid)
+    case exprs =>
+      throw QueryCompilationErrors.wrongNumArgsError("vector_to_array", "2", exprs.size)
+  }
+
+  private val arrayToVectorUdf = udf { array: Seq[Double] =>
+    Vectors.dense(array.toArray)
+  }
+
+  registerFunction("array_to_vector") {
+    case Seq(input) =>
+      invokeUdf(arrayToVectorUdf, input)
+    case exprs =>
+      throw QueryCompilationErrors.wrongNumArgsError("array_to_vector", "1", exprs.size)
+  }
+}
+
+class InternalFunctionRegistration extends SparkSessionExtensionsProvider {
+  override def apply(e: SparkSessionExtensions): Unit = InternalFunctionRegistration()
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/feature/JavaTargetEncoderSuite.java b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTargetEncoderSuite.java
new file mode 100644
index 0000000000000..c488cc0dfca14
--- /dev/null
+++ b/mllib/src/test/java/org/apache/spark/ml/feature/JavaTargetEncoderSuite.java
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature;
+
+import org.apache.spark.SharedSparkSession;
+import org.apache.spark.sql.Dataset;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+public class JavaTargetEncoderSuite extends SharedSparkSession {
+
+  @Test
+  public void testTargetEncoderBinary() {
+
+    // checkstyle.off: LineLength
+    List<Row> data = Arrays.asList(
+      RowFactory.create((short) 0, 3, 5.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (1 - 5.0 / 6) * (4.0 / 9),
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 1, 4, 5.0, 1.0, 2.0 / 3, 1.0, 1.0 / 3,
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (4.0 / 5) * 1 + (1 - 4.0 / 5) * (4.0 / 9),
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 2, 3, 5.0, 0.0, 1.0 / 3, 0.0, 1.0 / 3,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (1 - 5.0 / 6) * (4.0 / 9),
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 0, 4, 6.0, 1.0, 1.0 / 3, 1.0, 2.0 / 3,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (4.0 / 5) * 1 + (1 - 4.0 / 5) * (4.0 / 9),
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 1, 3, 6.0, 0.0, 2.0 / 3, 0.0, 2.0 / 3,
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (1 - 5.0 / 6) * (4.0 / 9),
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 2, 4, 6.0, 1.0, 1.0 / 3, 1.0, 2.0 / 3,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (4.0 / 5) * 1 + (1 - 4.0 / 5) * (4.0 / 9),
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9)),
+      RowFactory.create((short) 0, 3, 7.0, 0.0, 1.0 / 3, 0.0, 0.0,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (1 - 5.0 / 6) * (4.0 / 9), (1 - 1.0 / 2) * (4.0 / 9)),
+      RowFactory.create((short) 1, 4, 8.0, 1.0, 2.0 / 3, 1.0, 1.0,
+        (3.0 / 4) * (2.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (4.0 / 5) * 1 + (1 - 4.0 / 5) * (4.0 / 9),
+        (1.0 / 2) + (1 - 1.0 / 2) * (4.0 / 9)),
+      RowFactory.create((short) 2, 3, null, 0.0, 1.0 / 3, 0.0, 0.0,
+        (3.0 / 4) * (1.0 / 3) + (1 - 3.0 / 4) * (4.0 / 9),
+        (1 - 5.0 / 6) * (4.0 / 9),
+        (1 - 1.0 / 2) * (4.0 / 9)));
+    // checkstyle.off: LineLength
+    StructType schema = createStructType(new StructField[]{
+      createStructField("input1", ShortType, true),
+      createStructField("input2", IntegerType, true),
+      createStructField("input3", DoubleType, true),
+      createStructField("label", DoubleType, false),
+      createStructField("expected1", DoubleType, false),
+      createStructField("expected2", DoubleType, false),
+      createStructField("expected3", DoubleType, false),
+      createStructField("smoothing1", DoubleType, false),
+      createStructField("smoothing2", DoubleType, false),
+      createStructField("smoothing3", DoubleType, false)
+    });
+
+    Dataset<Row> dataset = spark.createDataFrame(data, schema);
+
+    TargetEncoder encoder = new TargetEncoder()
+      .setInputCols(new String[]{"input1", "input2", "input3"})
+      .setOutputCols(new String[]{"output1", "output2", "output3"})
+      .setTargetType("binary");
+    TargetEncoderModel model = encoder.fit(dataset);
+
+    Dataset<Row> output = model.transform(dataset);
+    Assertions.assertEquals(
+      output.select("output1", "output2", "output3").collectAsList(),
+      output.select("expected1", "expected2", "expected3").collectAsList());
+
+    Dataset<Row> output_smoothing = model.setSmoothing(1.0).transform(dataset);
+    Assertions.assertEquals(
+      output_smoothing.select("output1", "output2", "output3").collectAsList(),
+      output_smoothing.select("smoothing1", "smoothing2", "smoothing3").collectAsList());
+
+  }
+
+  @Test
+  public void testTargetEncoderContinuous() {
+
+    List<Row> data = Arrays.asList(
+      RowFactory.create((short) 0, 3, 5.0, 10.0, 40.0, 50.0, 20.0, 42.5, 50.0, 27.5),
+      RowFactory.create((short) 1, 4, 5.0, 20.0, 50.0, 50.0, 20.0, 50.0, 50.0, 27.5),
+      RowFactory.create((short) 2, 3, 5.0, 30.0, 60.0, 50.0, 20.0, 57.5, 50.0, 27.5),
+      RowFactory.create((short) 0, 4, 6.0, 40.0, 40.0, 50.0, 50.0, 42.5, 50.0, 50.0),
+      RowFactory.create((short) 1, 3, 6.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0),
+      RowFactory.create((short) 2, 4, 6.0, 60.0, 60.0, 50.0, 50.0, 57.5, 50.0, 50.0),
+      RowFactory.create((short) 0, 3, 7.0, 70.0, 40.0, 50.0, 70.0, 42.5, 50.0, 60.0),
+      RowFactory.create((short) 1, 4, 8.0, 80.0, 50.0, 50.0, 80.0, 50.0, 50.0, 65.0),
+      RowFactory.create((short) 2, 3, null, 90.0, 60.0, 50.0, 90.0, 57.5, 50.0, 70.0));
+
+    StructType schema = createStructType(new StructField[]{
+      createStructField("input1", ShortType, true),
+      createStructField("input2", IntegerType, true),
+      createStructField("input3", DoubleType, true),
+      createStructField("label", DoubleType, false),
+      createStructField("expected1", DoubleType, false),
+      createStructField("expected2", DoubleType, false),
+      createStructField("expected3", DoubleType, false),
+      createStructField("smoothing1", DoubleType, false),
+      createStructField("smoothing2", DoubleType, false),
+      createStructField("smoothing3", DoubleType, false)
+    });
+
+    Dataset<Row> dataset = spark.createDataFrame(data, schema);
+
+    TargetEncoder encoder = new TargetEncoder()
+      .setInputCols(new String[]{"input1", "input2", "input3"})
+      .setOutputCols(new String[]{"output1", "output2", "output3"})
+      .setTargetType("continuous");
+    TargetEncoderModel model = encoder.fit(dataset);
+
+    Dataset<Row> output = model.transform(dataset);
+    Assertions.assertEquals(
+      output.select("output1", "output2", "output3").collectAsList(),
+      output.select("expected1", "expected2", "expected3").collectAsList());
+
+    Dataset<Row> output_smoothing = model.setSmoothing(1.0).transform(dataset);
+    Assertions.assertEquals(
+      output_smoothing.select("output1", "output2", "output3").collectAsList(),
+      output_smoothing.select("smoothing1", "smoothing2", "smoothing3").collectAsList());
+
+  }
+
+}
diff --git a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
index c3038fa9e1f8f..5f0d22ea2a8aa 100644
--- a/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
+++ b/mllib/src/test/java/org/apache/spark/ml/source/libsvm/JavaLibSVMRelationSuite.java
@@ -50,7 +50,7 @@ public void setUp() throws IOException {
     tempDir = Utils.createTempDir(System.getProperty("java.io.tmpdir"), "datasource");
     File file = new File(tempDir, "part-00000");
     String s = "1 1:1.0 3:2.0 5:3.0\n0\n0 2:4.0 4:5.0 6:6.0";
-    Files.write(s, file, StandardCharsets.UTF_8);
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(s);
     path = tempDir.toURI().toString();
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsLoadingSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsLoadingSuite.scala
deleted file mode 100644
index 89480b86428de..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsLoadingSuite.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.ml
-
-import org.apache.spark._
-import org.apache.spark.ml.functions.{array_to_vector, vector_to_array}
-import org.apache.spark.sql.catalyst.util.quietly
-import org.apache.spark.sql.functions.col
-
-class FunctionsLoadingSuite extends SparkFunSuite with LocalSparkContext {
-
-  test("SPARK-45859: 'functions$' should not be affected by a broken class loader") {
-    quietly {
-      val conf = new SparkConf()
-        .setAppName("FunctionsLoadingSuite")
-        .setMaster("local-cluster[1,1,1024]")
-      sc = new SparkContext(conf)
-      // Make `functions$` be loaded by a broken class loader
-      intercept[SparkException] {
-        sc.parallelize(1 to 1).foreach { _ =>
-          val originalClassLoader = Thread.currentThread.getContextClassLoader
-          try {
-            Thread.currentThread.setContextClassLoader(new BrokenClassLoader)
-            vector_to_array(col("vector"))
-            array_to_vector(col("array"))
-          } finally {
-            Thread.currentThread.setContextClassLoader(originalClassLoader)
-          }
-        }
-      }
-
-      // We should be able to use `functions$` even it was loaded by a broken class loader
-      sc.parallelize(1 to 1).foreach { _ =>
-        vector_to_array(col("vector"))
-        array_to_vector(col("array"))
-      }
-    }
-  }
-}
-
-class BrokenClassLoader extends ClassLoader {
-  override def findClass(name: String): Class[_] = {
-    throw new Error(s"class $name")
-  }
-}
diff --git a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
index d166b4b0b57f4..32986a1345d68 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/FunctionsSuite.scala
@@ -22,6 +22,7 @@ import org.apache.spark.ml.functions.{array_to_vector, vector_to_array}
 import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.util.MLTest
 import org.apache.spark.mllib.linalg.{Vectors => OldVectors}
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.functions.col
 
 class FunctionsSuite extends MLTest {
@@ -80,12 +81,12 @@ class FunctionsSuite extends MLTest {
     assert(dfArrayFloat.schema.simpleString ===
       "struct<UDF(vec):array<float>,UDF(oldVec):array<float>>")
 
-    val thrown2 = intercept[IllegalArgumentException] {
+    val thrown2 = intercept[AnalysisException] {
       df3.select(
         vector_to_array($"vec", dtype = "float16"), vector_to_array($"oldVec", dtype = "float16"))
     }
     assert(thrown2.getMessage.contains(
-      s"Unsupported dtype: float16. Valid values: float64, float32."))
+      "Unsupported dtype: \"float16\". Valid values: float64, float32."))
   }
 
   test("test array_to_vector") {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
index 68e83fccf3d11..ff9ce1ca7b9f7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/FMClassifierSuite.scala
@@ -52,8 +52,8 @@ class FMClassifierSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("FMClassifier: Predictor, Classifier methods") {
-    val sqlContext = smallBinaryDataset.sqlContext
-    import sqlContext.implicits._
+    val session = smallBinaryDataset.sparkSession
+    import session.implicits._
     val fm = new FMClassifier()
 
     val model = fm.fit(smallBinaryDataset)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 8e54262e2f616..b0e275f5e193c 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -550,8 +550,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("multinomial logistic regression: Predictor, Classifier methods") {
-    val sqlContext = smallMultinomialDataset.sqlContext
-    import sqlContext.implicits._
+    val session = smallMultinomialDataset.sparkSession
+    import session.implicits._
     val mlr = new LogisticRegression().setFamily("multinomial")
 
     val model = mlr.fit(smallMultinomialDataset)
@@ -590,8 +590,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
   }
 
   test("binary logistic regression: Predictor, Classifier methods") {
-    val sqlContext = smallBinaryDataset.sqlContext
-    import sqlContext.implicits._
+    val session = smallBinaryDataset.sparkSession
+    import session.implicits._
     val lr = new LogisticRegression().setFamily("binomial")
 
     val model = lr.fit(smallBinaryDataset)
@@ -1427,8 +1427,8 @@ class LogisticRegressionSuite extends MLTest with DefaultReadWriteTest {
     val trainer2 = (new LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(1.0).setRegParam(6.0).setStandardization(false)
 
-    val sqlContext = multinomialDataset.sqlContext
-    import sqlContext.implicits._
+    val session = multinomialDataset.sparkSession
+    import session.implicits._
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
index 66b9b8f2ab31d..22291e271092d 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BinarizerSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions.{col, struct}
 import org.apache.spark.util.ArrayImplicits._
 
 class BinarizerSuite extends MLTest with DefaultReadWriteTest {
@@ -250,4 +251,20 @@ class BinarizerSuite extends MLTest with DefaultReadWriteTest {
       binarizer.transform(df).count()
     }
   }
+
+  test("Binarize nested input") {
+    val defaultBinarized: Array[Double] = data.map(x => if (x > 0.0) 1.0 else 0.0)
+    val dataFrame: DataFrame = data.zip(defaultBinarized).toSeq.toDF("feature", "expected")
+      .select(struct(col("feature")).as("nest"), col("expected"))
+
+    val binarizer: Binarizer = new Binarizer()
+      .setInputCol("nest.feature")
+      .setOutputCol("binarized_feature")
+
+    val resultDF = binarizer.transform(dataFrame)
+    resultDF.select("binarized_feature", "expected").collect().foreach {
+      case Row(x: Double, y: Double) =>
+        assert(x === y, "The feature value is not correct after binarization.")
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
index 97d95c7cd7326..2634d63200485 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/BucketizerSuite.scala
@@ -419,6 +419,34 @@ class BucketizerSuite extends MLTest with DefaultReadWriteTest {
       ("inputCols", Array("feature1", "feature2")),
       ("outputCols", Array("result1", "result2")))
   }
+
+  test("Bucketizer nested input column") {
+    // Check a set of valid feature values.
+    val splits = Array(-0.5, 0.0, 0.5)
+    val validData = Array(-0.5, -0.3, 0.0, 0.2)
+    val expectedBuckets = Array(0.0, 0.0, 1.0, 1.0)
+    val dataFrame: DataFrame = validData.zip(expectedBuckets).toSeq.toDF("feature", "expected")
+      .select(struct(col("feature")).as("nest"), col("expected"))
+
+    val bucketizer1: Bucketizer = new Bucketizer()
+      .setInputCol("nest.feature")
+      .setOutputCol("result")
+      .setSplits(splits)
+
+    val bucketizer2: Bucketizer = new Bucketizer()
+      .setInputCols(Array("nest.feature"))
+      .setOutputCols(Array("result"))
+      .setSplitsArray(Array(splits))
+
+    for (bucketizer <- Seq(bucketizer1, bucketizer2)) {
+      val resultDF = bucketizer.transform(dataFrame).select("result", "expected")
+      resultDF.collect().foreach {
+        case Row(x: Double, y: Double) =>
+          assert(x === y,
+            s"The feature value is not correct after bucketing.  Expected $y but found $x")
+      }
+    }
+  }
 }
 
 private object BucketizerSuite extends SparkFunSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
index d799ba6011fa8..6bcd93a959949 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/FeatureHasherSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions.{col, struct}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -213,6 +213,30 @@ class FeatureHasherSuite extends MLTest with DefaultReadWriteTest {
       .setNumFeatures(10)
     testDefaultReadWrite(t)
   }
+
+  test("FeatureHasher with nested input columns") {
+    val df = Seq(5.0, 10.0, 15.0).toDF("real")
+    val nestDf = df.select(struct(col("real")).as("nest"))
+
+    val hasher = new FeatureHasher()
+      .setInputCols("nest.real")
+      .setOutputCol("features")
+
+    val expectedResult = hasher.transform(nestDf).select("features").as[Vector].collect()
+    // check all numeric types work as expected. String & boolean types are tested in default case
+    val types =
+      Seq(ShortType, LongType, IntegerType, FloatType, ByteType, DoubleType, DecimalType(10, 0))
+    types.foreach { t =>
+      val castDF = df.select(col("real").cast(t))
+        .select(struct(col("real")).as("nest"))
+      val castResult = hasher.transform(castDF).select("features").as[Vector].collect()
+      withClue(s"FeatureHasher works for all numeric types (testing $t): ") {
+        assert(castResult.zip(expectedResult).forall { case (actual, expected) =>
+          actual ~== expected absTol 1e-14
+        })
+      }
+    }
+  }
 }
 
 object FeatureHasherSuite {
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
index 4873dacfc0f1d..c149618f60664 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/ImputerSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.feature
 import org.apache.spark.SparkException
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.param.ParamsSuite
-import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, SchemaUtils}
 import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -471,6 +471,23 @@ class ImputerSuite extends MLTest with DefaultReadWriteTest {
       }
     }
   }
+
+  test("Imputer nested input column") {
+    val df = spark.createDataFrame(Seq(
+      (0, 1.0, 4.0, 1.0, 1.0, 1.0, 4.0, 4.0, 4.0),
+      (1, 11.0, 12.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0),
+      (2, 3.0, Double.NaN, 3.0, 3.0, 3.0, 10.0, 12.0, 4.0),
+      (3, Double.NaN, 14.0, 5.0, 3.0, 1.0, 14.0, 14.0, 14.0)
+    )).toDF("id", "value1", "value2",
+      "expected_mean_value1", "expected_median_value1", "expected_mode_value1",
+      "expected_mean_value2", "expected_median_value2", "expected_mode_value2")
+      .withColumn("nest", struct("value1", "value2"))
+      .drop("value1", "value2")
+    val imputer = new Imputer()
+      .setInputCols(Array("nest.value1", "nest.value2"))
+      .setOutputCols(Array("out1", "out2"))
+    ImputerSuite.iterateStrategyTest(true, imputer, df)
+  }
 }
 
 object ImputerSuite {
@@ -501,12 +518,14 @@ object ImputerSuite {
       outputCol: String,
       resultDF: DataFrame): Unit = {
     // check dataType is consistent between input and output
-    val inputType = resultDF.schema(inputCol).dataType
-    val outputType = resultDF.schema(outputCol).dataType
+    val inputType = SchemaUtils.getSchemaFieldType(resultDF.schema, inputCol)
+    val outputType = SchemaUtils.getSchemaFieldType(resultDF.schema, outputCol)
     assert(inputType == outputType, "Output type is not the same as input type.")
 
+    val inputColSplits = inputCol.split("\\.")
+    val inputColLastSplit = inputColSplits(inputColSplits.length - 1)
     // check value
-    resultDF.select(s"expected_${strategy}_$inputCol", outputCol).collect().foreach {
+    resultDF.select(s"expected_${strategy}_$inputColLastSplit", outputCol).collect().foreach {
       case Row(exp: Float, out: Float) =>
         assert((exp.isNaN && out.isNaN) || (exp == out),
           s"Imputed values differ. Expected: $exp, actual: $out")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
index 90038d8cc3797..d8d3128d521c7 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/InteractionSuite.scala
@@ -25,7 +25,7 @@ import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions.{col, struct}
 
 class InteractionSuite extends MLTest with DefaultReadWriteTest {
 
@@ -169,4 +169,37 @@ class InteractionSuite extends MLTest with DefaultReadWriteTest {
       .setOutputCol("myOutputCol")
     testDefaultReadWrite(t)
   }
+
+  test("nested input columns") {
+    val data = Seq(
+      (2, Vectors.dense(3.0, 4.0), Vectors.dense(6.0, 8.0)),
+      (1, Vectors.dense(1.0, 5.0), Vectors.dense(1.0, 5.0))
+    ).toDF("a", "b", "expected")
+    val groupAttr = new AttributeGroup(
+      "b",
+      Array[Attribute](
+        NumericAttribute.defaultAttr.withName("foo"),
+        NumericAttribute.defaultAttr.withName("bar")))
+    val df = data.select(
+      col("a").as("a", NumericAttribute.defaultAttr.toMetadata()),
+      col("b").as("b", groupAttr.toMetadata()),
+      col("expected"))
+      .select(struct("a", "b").alias("nest"), col("expected"))
+    val trans = new Interaction().setInputCols(Array("nest.a", "nest.b")).setOutputCol("features")
+
+    trans.transform(df).select("features", "expected").collect().foreach {
+      case Row(features: Vector, expected: Vector) =>
+        assert(features === expected)
+    }
+
+    val res = trans.transform(df)
+    val attrs = AttributeGroup.fromStructField(res.schema("features"))
+    val expectedAttrs = new AttributeGroup(
+      "features",
+      Array[Attribute](
+        new NumericAttribute(Some("a:b_foo"), Some(1)),
+        new NumericAttribute(Some("a:b_bar"), Some(2))))
+    assert(attrs === expectedAttrs)
+  }
+
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
index 897251d9815c8..49ba4b0233ed2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/OneHotEncoderSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql.{Encoder, Row}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.functions.{col, struct}
 import org.apache.spark.sql.types._
 
 class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
@@ -613,4 +613,20 @@ class OneHotEncoderSuite extends MLTest with DefaultReadWriteTest {
         assert(rowForSingle === rowForMultiCols)
     }
   }
+
+  test("nested input columns") {
+    val attr = NominalAttribute.defaultAttr.withValues("small", "medium", "large")
+    val df = Seq(0.0, 1.0, 2.0, 1.0).map(Tuple1.apply).toDF("size")
+      .select(col("size").as("size", attr.toMetadata()))
+      .select(struct("size").as("nest"))
+    val encoder = new OneHotEncoder()
+      .setInputCols(Array("nest.size"))
+      .setOutputCols(Array("encoded"))
+    val model = encoder.fit(df)
+    val rows = model.transform(df).select("encoded").collect().toSeq
+    val group = AttributeGroup.fromStructField(rows.head.schema("encoded"))
+    assert(group.size === 2)
+    assert(group.getAttr(0) === BinaryAttribute.defaultAttr.withName("small").withIndex(0))
+    assert(group.getAttr(1) === BinaryAttribute.defaultAttr.withName("medium").withIndex(1))
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
index 5c654764a68f4..c72294d7fbd58 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/QuantileDiscretizerSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql._
+import org.apache.spark.sql.functions.{col, struct}
 import org.apache.spark.util.ArrayImplicits._
 
 class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
@@ -532,4 +533,44 @@ class QuantileDiscretizerSuite extends MLTest with DefaultReadWriteTest {
 
     qd.fit(df1) // assert no exception raised here.
   }
+
+  test("Test nested input columns") {
+    val spark = this.spark
+    import spark.implicits._
+
+    val datasetSize = 30000
+    val numBuckets = 5
+    val df = sc.parallelize(1 to datasetSize).map(_.toDouble).toDF("input")
+      .select(struct(col("input")).alias("nest"))
+
+    val discretizer1 = new QuantileDiscretizer()
+      .setInputCol("nest.input")
+      .setOutputCol("result")
+      .setNumBuckets(numBuckets)
+    val discretizer2 = new QuantileDiscretizer()
+      .setInputCols(Array("nest.input"))
+      .setOutputCols(Array("result"))
+      .setNumBuckets(numBuckets)
+
+    for (discretizer <- Seq(discretizer1, discretizer2)) {
+      val model = discretizer.fit(df)
+
+      val rows = model.transform(df).select("result").collect().toSeq
+
+      val result = rows.map(_.getDouble(0)).toDF("result").cache()
+      try {
+        val observedNumBuckets = result.select("result").distinct().count()
+        assert(observedNumBuckets === numBuckets,
+          "Observed number of buckets does not equal expected number of buckets.")
+        val relativeError = discretizer.getRelativeError
+        val numGoodBuckets = result.groupBy("result").count()
+          .filter(s"abs(count - ${datasetSize / numBuckets}) <= ${relativeError * datasetSize}")
+          .count()
+        assert(numGoodBuckets === numBuckets,
+          "Bucket sizes are not within expected relative error tolerance.")
+      } finally {
+        result.unpersist()
+      }
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
index eaf91769a08dd..6d9fb4600f1df 100755
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StopWordsRemoverSuite.scala
@@ -22,6 +22,7 @@ import java.util.Locale
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
 import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.functions.{col, struct}
 
 class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
 
@@ -346,4 +347,24 @@ class StopWordsRemoverSuite extends MLTest with DefaultReadWriteTest {
       remover.transform(df).count()
     }
   }
+
+  test("StopWordsRemover nested input column") {
+    val remover = new StopWordsRemover()
+      .setInputCol("nest.raw")
+      .setOutputCol("filtered")
+    val dataSet = Seq(
+      (Seq("test", "test"), Seq("test", "test")),
+      (Seq("a", "b", "c", "d"), Seq("b", "c", "d")),
+      (Seq("a", "the", "an"), Seq()),
+      (Seq("A", "The", "AN"), Seq()),
+      (Seq(null), Seq(null)),
+      (Seq(), Seq())
+    ).toDF("raw", "expected")
+     .select(struct(col("raw")).alias("nest"), col("expected"))
+
+    remover.transform(dataSet).select("filtered", "expected").collect().foreach {
+      case Row(tokens: scala.collection.Seq[_], wantedTokens: scala.collection.Seq[_]) =>
+        assert(tokens === wantedTokens)
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/TargetEncoderSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/TargetEncoderSuite.scala
new file mode 100644
index 0000000000000..6bb3ce224a2e7
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/TargetEncoderSuite.scala
@@ -0,0 +1,524 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.ml.feature
+
+import scala.collection.immutable.HashMap
+
+import org.apache.spark.{SparkException, SparkRuntimeException}
+import org.apache.spark.ml.param.ParamsSuite
+import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest}
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.types._
+
+class TargetEncoderSuite extends MLTest with DefaultReadWriteTest {
+
+  import testImplicits._
+
+  @transient var data_binary: Seq[Row] = _
+  @transient var data_continuous: Seq[Row] = _
+  @transient var schema: StructType = _
+  @transient var expected_stats_binary: Array[Map[Double, (Double, Double)]] = _
+  @transient var expected_stats_continuous: Array[Map[Double, (Double, Double)]] = _
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+
+    // scalastyle:off
+    data_binary = Seq(
+      Row(0.toShort, 3, 5.0, 0.0, 1.0/3, 0.0, 1.0/3, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9),           (1-5.0/6)*(4.0/9), (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(1.toShort, 4, 5.0, 1.0, 2.0/3, 1.0, 1.0/3, (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9), (4.0/5)*1+(1-4.0/5)*(4.0/9), (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(2.toShort, 3, 5.0, 0.0, 1.0/3, 0.0, 1.0/3, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9),           (1-5.0/6)*(4.0/9), (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(0.toShort, 4, 6.0, 1.0, 1.0/3, 1.0, 2.0/3, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9), (4.0/5)*1+(1-4.0/5)*(4.0/9), (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(1.toShort, 3, 6.0, 0.0, 2.0/3, 0.0, 2.0/3, (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9),           (1-5.0/6)*(4.0/9), (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(2.toShort, 4, 6.0, 1.0, 1.0/3, 1.0, 2.0/3, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9), (4.0/5)*1+(1-4.0/5)*(4.0/9), (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9)),
+      Row(0.toShort, 3, 7.0, 0.0, 1.0/3, 0.0,   0.0, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9),           (1-5.0/6)*(4.0/9),                 (1-1.0/2)*(4.0/9)),
+      Row(1.toShort, 4, 8.0, 1.0, 2.0/3, 1.0,   1.0, (3.0/4)*(2.0/3)+(1-3.0/4)*(4.0/9), (4.0/5)*1+(1-4.0/5)*(4.0/9), (1.0/2)        +(1-1.0/2)*(4.0/9)),
+      Row(2.toShort, 3, 9.0, 0.0, 1.0/3, 0.0,   0.0, (3.0/4)*(1.0/3)+(1-3.0/4)*(4.0/9),           (1-5.0/6)*(4.0/9),                 (1-1.0/2)*(4.0/9)))
+
+    data_continuous = Seq(
+      Row(0.toShort, 3, 5.0, 10.0, 40.0, 50.0, 20.0, 42.5, 50.0, 27.5),
+      Row(1.toShort, 4, 5.0, 20.0, 50.0, 50.0, 20.0, 50.0, 50.0, 27.5),
+      Row(2.toShort, 3, 5.0, 30.0, 60.0, 50.0, 20.0, 57.5, 50.0, 27.5),
+      Row(0.toShort, 4, 6.0, 40.0, 40.0, 50.0, 50.0, 42.5, 50.0, 50.0),
+      Row(1.toShort, 3, 6.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0),
+      Row(2.toShort, 4, 6.0, 60.0, 60.0, 50.0, 50.0, 57.5, 50.0, 50.0),
+      Row(0.toShort, 3, 7.0, 70.0, 40.0, 50.0, 70.0, 42.5, 50.0, 60.0),
+      Row(1.toShort, 4, 8.0, 80.0, 50.0, 50.0, 80.0, 50.0, 50.0, 65.0),
+      Row(2.toShort, 3, 9.0, 90.0, 60.0, 50.0, 90.0, 57.5, 50.0, 70.0))
+
+    schema = StructType(Array(
+      StructField("input1", ShortType, nullable = true),
+      StructField("input2", IntegerType, nullable = true),
+      StructField("input3", DoubleType, nullable = true),
+      StructField("label", DoubleType),
+      StructField("expected1", DoubleType),
+      StructField("expected2", DoubleType),
+      StructField("expected3", DoubleType),
+      StructField("smoothing1", DoubleType),
+      StructField("smoothing2", DoubleType),
+      StructField("smoothing3", DoubleType)))
+
+    expected_stats_binary = Array(
+      Map(0.0 -> (3.0, 1.0), 1.0 -> (3.0, 2.0), 2.0 -> (3.0, 1.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 4.0)),
+      Map(3.0 -> (5.0, 0.0), 4.0 -> (4.0, 4.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 4.0)),
+      HashMap(5.0 -> (3.0, 1.0), 6.0 -> (3.0, 2.0), 7.0 -> (1.0, 0.0), 8.0 -> (1.0, 1.0), 9.0 -> (1.0, 0.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 4.0)))
+
+    expected_stats_continuous = Array(
+      Map(0.0 -> (3.0, 40.0), 1.0 -> (3.0, 50.0), 2.0 -> (3.0, 60.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 50.0)),
+      Map(3.0 -> (5.0, 50.0), 4.0 -> (4.0, 50.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 50.0)),
+      HashMap(5.0 -> (3.0, 20.0), 6.0 -> (3.0, 50.0), 7.0 -> (1.0, 70.0), 8.0 -> (1.0, 80.0), 9.0 -> (1.0, 90.0), TargetEncoder.UNSEEN_CATEGORY -> (9.0, 50.0)))
+    // scalastyle:on
+  }
+
+  test("params") {
+    ParamsSuite.checkParams(new TargetEncoder)
+  }
+
+  test("TargetEncoder - binary target") {
+
+    val df = spark.createDataFrame(sc.parallelize(data_binary), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+
+    model.stats.zip(expected_stats_binary).foreach{
+      case (actual, expected) => assert(actual.equals(expected))
+    }
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df.select("input1", "input2", "input3",
+        "expected1", "expected2", "expected3"),
+      model,
+      "output1", "expected1",
+      "output2", "expected2",
+      "output3", "expected3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+
+    val model_smooth = model.setSmoothing(1.0)
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df.select("input1", "input2", "input3",
+        "smoothing1", "smoothing2", "smoothing3"),
+      model_smooth,
+      "output1", "smoothing1",
+      "output2", "smoothing2",
+      "output3", "smoothing3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+
+
+  }
+
+  test("TargetEncoder - continuous target") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_continuous), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+
+    model.stats.zip(expected_stats_continuous).foreach{
+      case (actual, expected) => assert(actual.equals(expected))
+    }
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df.select("input1", "input2", "input3",
+        "expected1", "expected2", "expected3"),
+      model,
+      "output1", "expected1",
+      "output2", "expected2",
+      "output3", "expected3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+
+    val model_smooth = model.setSmoothing(1.0)
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df.select("input1", "input2", "input3",
+        "smoothing1", "smoothing2", "smoothing3"),
+      model_smooth,
+      "output1", "smoothing1",
+      "output2", "smoothing2",
+      "output3", "smoothing3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+
+  }
+
+  test("TargetEncoder - unseen value - keep") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_continuous), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setHandleInvalid(TargetEncoder.KEEP_INVALID)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+
+    val data_unseen = Row(0.toShort, 3, 10.0, 0.0, 40.0, 50.0, 50.0, 0.0, 0.0, 0.0)
+
+    val df_unseen = spark
+      .createDataFrame(sc.parallelize(data_continuous :+ data_unseen), schema)
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df_unseen.select("input1", "input2", "input3",
+        "expected1", "expected2", "expected3"),
+      model,
+      "output1", "expected1",
+      "output2", "expected2",
+      "output3", "expected3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+  }
+
+  test("TargetEncoder - unseen value - error") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_continuous), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setHandleInvalid(TargetEncoder.ERROR_INVALID)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+
+    val data_unseen = Row(0.toShort, 3, 10.0, 0.0, 4.0/9, 4.0/9, 4.0/9, 0.0, 0.0, 0.0)
+
+    val df_unseen = spark
+      .createDataFrame(sc.parallelize(data_continuous :+ data_unseen), schema)
+
+    val ex = intercept[SparkRuntimeException] {
+      val output = model.transform(df_unseen)
+      output.show()
+    }
+
+    assert(ex.isInstanceOf[SparkRuntimeException])
+    assert(ex.getMessage.contains("Unseen value 10.0 in feature input3"))
+
+  }
+
+  test("TargetEncoder - missing feature") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_binary), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val ex = intercept[SparkException] {
+      val model = encoder.fit(df.drop("input3"))
+      print(model.stats)
+    }
+
+    assert(ex.isInstanceOf[SparkException])
+    assert(ex.getMessage.contains("No column named input3 found on dataset"))
+  }
+
+  test("TargetEncoder - wrong data type") {
+
+    val wrong_schema = new StructType(
+      schema.map{
+        field: StructField => if (field.name != "input3") field
+        else StructField(field.name, StringType, field.nullable, field.metadata)
+      }.toArray)
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_binary), wrong_schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val ex = intercept[SparkException] {
+      val model = encoder.fit(df)
+      print(model.stats)
+    }
+
+    assert(ex.isInstanceOf[SparkException])
+    assert(ex.getMessage.contains("Data type for column input3 is StringType"))
+  }
+
+  test("TargetEncoder - seen null category") {
+
+    val data_null = Row(2.toShort, 3, null, 90.0, 60.0, 50.0, 90.0, 57.5, 50.0, 70.0)
+
+    val df_null = spark
+      .createDataFrame(sc.parallelize(data_continuous.dropRight(1) :+ data_null), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df_null)
+
+    val expected_stats = Array(
+      expected_stats_continuous(0),
+      expected_stats_continuous(1),
+      expected_stats_continuous(2) + (TargetEncoder.NULL_CATEGORY -> (1.0, 90.0)) - 9.0)
+
+    model.stats.zip(expected_stats).foreach{
+      case (actual, expected) => assert(actual.equals(expected))
+    }
+
+    val output = model.transform(df_null)
+
+    assert_true(
+      output("output1") === output("expected1") &&
+        output("output2") === output("expected2") &&
+        output("output3") === output("expected3"))
+
+  }
+
+  test("TargetEncoder - unseen null category") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_continuous), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setHandleInvalid(TargetEncoder.KEEP_INVALID)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val data_null = Row(null, null, null, 90.0, 50.0, 50.0, 50.0, 57.5, 50.0, 70.0)
+
+    val df_null = spark
+      .createDataFrame(sc.parallelize(data_continuous :+ data_null), schema)
+
+    val model = encoder.fit(df)
+
+    val output = model.transform(df_null)
+
+    assert_true(
+      output("output1") === output("expected1") &&
+        output("output2") === output("expected2") &&
+        output("output3") === output("expected3"))
+
+  }
+
+  test("TargetEncoder - non-indexed categories") {
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val data_noindex = Row(0.toShort, 3, 5.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+    val df_noindex = spark
+      .createDataFrame(sc.parallelize(data_binary :+ data_noindex), schema)
+
+    val ex = intercept[SparkRuntimeException] {
+      val model = encoder.fit(df_noindex)
+      print(model.stats)
+    }
+
+    assert(ex.getMessage.contains(
+      "Values MUST be non-negative integers, but got 5.1"))
+  }
+
+  test("TargetEncoder - invalid label") {
+
+    val data_null = Row(2.toShort, 3, 5.0, null, 160.0, 150.0, 190.0, 57.5, 50.0, 70.0)
+    val data_nan = Row(1.toShort, 2, 6.0, Double.NaN, 160.0, 150.0, 190.0, 57.5, 50.0, 70.0)
+
+    val df_nolabel = spark
+      .createDataFrame(sc.parallelize(
+        data_continuous :+ data_null :+ data_nan), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df_nolabel)
+
+    model.stats.zip(expected_stats_continuous).foreach{
+      case (actual, expected) => assert(actual.equals(expected))
+    }
+  }
+
+  test("TargetEncoder - non-binary labels") {
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val data_non_binary = Row(0.toShort, 3, 5.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
+
+    val df_non_binary = spark
+      .createDataFrame(sc.parallelize(data_binary :+ data_non_binary), schema)
+
+    val ex = intercept[SparkRuntimeException] {
+      val model = encoder.fit(df_non_binary)
+      print(model.stats)
+    }
+
+    assert(ex.getMessage.contains(
+      "Labels for TARGET_BINARY must be {0, 1}, but got 2.0"))
+  }
+
+  test("TargetEncoder - features renamed") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_continuous), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+      .setInputCols(Array("renamed_input1", "renamed_input2", "renamed_input3"))
+      .setOutputCols(Array("renamed_output1", "renamed_output2", "renamed_output3"))
+
+    val df_renamed = df
+      .withColumnsRenamed((1 to 3).map{
+        f => s"input${f}" -> s"renamed_input${f}"}.toMap)
+
+    testTransformer[(Double, Double, Double, Double, Double, Double)](
+      df_renamed
+        .select("renamed_input1", "renamed_input2", "renamed_input3",
+          "expected1", "expected2", "expected3"),
+      model,
+      "renamed_output1", "expected1",
+      "renamed_output2", "expected2",
+      "renamed_output3", "expected3") {
+      case Row(output1: Double, expected1: Double,
+      output2: Double, expected2: Double,
+      output3: Double, expected3: Double) =>
+        assert(output1 === expected1)
+        assert(output2 === expected2)
+        assert(output3 === expected3)
+    }
+
+  }
+
+  test("TargetEncoder - wrong number of features") {
+
+    val df = spark
+      .createDataFrame(sc.parallelize(data_binary), schema)
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("label")
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+
+    val model = encoder.fit(df)
+
+    val ex = intercept[SparkException] {
+      val output = model
+        .setInputCols(Array("input1", "input2"))
+        .setOutputCols(Array("output1", "output2"))
+        .transform(df)
+      output.show()
+    }
+
+    assert(ex.isInstanceOf[SparkException])
+    assert(ex.getMessage.contains(
+      "does not match the number of encodings in the model (3). Found 2 features"))
+
+  }
+
+  test("TargetEncoder - R/W single-column") {
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("continuousLabel")
+      .setTargetType(TargetEncoder.TARGET_CONTINUOUS)
+      .setInputCol("input1")
+      .setOutputCol("output1")
+      .setHandleInvalid(TargetEncoder.ERROR_INVALID)
+      .setSmoothing(2)
+
+    testDefaultReadWrite(encoder)
+
+  }
+
+  test("TargetEncoder - R/W multi-column") {
+
+    val encoder = new TargetEncoder()
+      .setLabelCol("binaryLabel")
+      .setTargetType(TargetEncoder.TARGET_BINARY)
+      .setInputCols(Array("input1", "input2", "input3"))
+      .setOutputCols(Array("output1", "output2", "output3"))
+      .setHandleInvalid(TargetEncoder.KEEP_INVALID)
+      .setSmoothing(1)
+
+    testDefaultReadWrite(encoder)
+
+  }
+
+}
\ No newline at end of file
diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
index 4957f6f1f46a7..8d5ce6395af5f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/VectorAssemblerSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.util.DefaultReadWriteTest
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.sql.{Dataset, Row}
-import org.apache.spark.sql.functions.{col, udf}
+import org.apache.spark.sql.functions.{col, struct, udf}
 
 class VectorAssemblerSuite
   extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
@@ -272,4 +272,20 @@ class VectorAssemblerSuite
     assert(!intercept[RuntimeException](assembler.setHandleInvalid("keep").transform(hintedDf))
       .getMessage.contains("n1"), "should only show no vector size columns' name")
   }
+
+  test("VectorAssembler nested input columns") {
+    val df = Seq(
+      (0, 0.0, Vectors.dense(1.0, 2.0), "a", Vectors.sparse(2, Array(1), Array(3.0)), 10L)
+    ).toDF("id", "x", "y", "name", "z", "n").select(struct(
+      col("x"), col("y"), col("z"), col("n")
+    ).alias("data"))
+
+    val assembler = new VectorAssembler()
+      .setInputCols(Array("data.x", "data.y", "data.z", "data.n"))
+      .setOutputCol("features")
+    assembler.transform(df).select("features").collect().foreach {
+      case Row(v: Vector) =>
+        assert(v === Vectors.sparse(6, Array(1, 2, 4, 5), Array(1.0, 2.0, 3.0, 10.0)))
+    }
+  }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
index 5f532593d512e..bd83d5498ae6f 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/recommendation/CollectTopKSuite.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.ml.recommendation
 
+import org.apache.spark.ml.recommendation.ALSModel.collect_top_k
 import org.apache.spark.ml.util.MLTest
 import org.apache.spark.sql.DataFrame
-import org.apache.spark.sql.functions.{col, collect_top_k, struct}
+import org.apache.spark.sql.functions.{col, struct}
 
 class CollectTopKSuite extends MLTest {
 
@@ -29,8 +30,8 @@ class CollectTopKSuite extends MLTest {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    val sqlContext = spark.sqlContext
-    import sqlContext.implicits._
+    val session = spark
+    import session.implicits._
     dataFrame = Seq(
       (0, 3, 54f),
       (0, 4, 44f),
diff --git a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
index d70af42b13898..7ccd3494bd32b 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/regression/LinearRegressionSuite.scala
@@ -962,8 +962,8 @@ class LinearRegressionSuite extends MLTest with DefaultReadWriteTest with PMMLRe
   }
 
   test("linear regression with weighted samples") {
-    val sqlContext = spark.sqlContext
-    import sqlContext.implicits._
+    val session = spark
+    import session.implicits._
     val numClasses = 0
     def modelEquals(m1: LinearRegressionModel, m2: LinearRegressionModel): Unit = {
       assert(m1.coefficients ~== m2.coefficients relTol 0.01)
diff --git a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
index f2bb145614725..6a0d7b1237ee4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/source/libsvm/LibSVMRelationSuite.scala
@@ -65,9 +65,9 @@ class LibSVMRelationSuite
     val succ = new File(dir, "_SUCCESS")
     val file0 = new File(dir, "part-00000")
     val file1 = new File(dir, "part-00001")
-    Files.write("", succ, StandardCharsets.UTF_8)
-    Files.write(lines0, file0, StandardCharsets.UTF_8)
-    Files.write(lines1, file1, StandardCharsets.UTF_8)
+    Files.asCharSink(succ, StandardCharsets.UTF_8).write("")
+    Files.asCharSink(file0, StandardCharsets.UTF_8).write(lines0)
+    Files.asCharSink(file1, StandardCharsets.UTF_8).write(lines1)
     path = dir.getPath
   }
 
diff --git a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
index b961f97cd877f..f97fefa245145 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/tuning/CrossValidatorSuite.scala
@@ -128,7 +128,7 @@ class CrossValidatorSuite
       exception = intercept[SparkIllegalArgumentException] {
         cv.fit(datasetWithFold)
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map(
         "fieldName" -> "`fold1`",
         "fields" -> "`label`, `features`, `fold`")
diff --git a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
index ad4ac7259165d..9f6d4d2422897 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/util/MLTestingUtils.scala
@@ -220,7 +220,7 @@ object MLTestingUtils extends SparkFunSuite {
       numClasses: Int,
       modelEquals: (M, M) => Unit,
       outlierRatio: Int): Unit = {
-    import data.sqlContext.implicits._
+    import data.sparkSession.implicits._
     val outlierDS = data.withColumn("weight", lit(1.0)).as[Instance].flatMap {
       case Instance(l, w, f) =>
         val outlierLabel = if (numClasses == 0) -l else numClasses - l - 1
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index a90c9c80d4959..1a02e26b9260c 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -93,7 +93,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
       """.stripMargin
     val tempDir = Utils.createTempDir()
     val file = new File(tempDir.getPath, "part-00000")
-    Files.write(lines, file, StandardCharsets.UTF_8)
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(lines)
     val path = tempDir.toURI.toString
 
     val pointsWithNumFeatures = loadLibSVMFile(sc, path, 6).collect()
@@ -126,7 +126,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
       """.stripMargin
     val tempDir = Utils.createTempDir()
     val file = new File(tempDir.getPath, "part-00000")
-    Files.write(lines, file, StandardCharsets.UTF_8)
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(lines)
     val path = tempDir.toURI.toString
 
     intercept[SparkException] {
@@ -143,7 +143,7 @@ class MLUtilsSuite extends SparkFunSuite with MLlibTestSparkContext {
       """.stripMargin
     val tempDir = Utils.createTempDir()
     val file = new File(tempDir.getPath, "part-00000")
-    Files.write(lines, file, StandardCharsets.UTF_8)
+    Files.asCharSink(file, StandardCharsets.UTF_8).write(lines)
     val path = tempDir.toURI.toString
 
     intercept[SparkException] {
diff --git a/pom.xml b/pom.xml
index badb2a6e95836..db74659a5f69c 100644
--- a/pom.xml
+++ b/pom.xml
@@ -84,6 +84,7 @@
     <module>common/utils</module>
     <module>common/variant</module>
     <module>common/tags</module>
+    <module>sql/connect/shims</module>
     <module>core</module>
     <module>graphx</module>
     <module>mllib</module>
@@ -94,12 +95,12 @@
     <module>sql/catalyst</module>
     <module>sql/core</module>
     <module>sql/hive</module>
+    <module>sql/connect/server</module>
+    <module>sql/connect/common</module>
     <module>assembly</module>
     <module>examples</module>
     <module>repl</module>
     <module>launcher</module>
-    <module>connect/server</module>
-    <module>connect/common</module>
     <module>connector/kafka-0-10-token-provider</module>
     <module>connector/kafka-0-10</module>
     <module>connector/kafka-0-10-assembly</module>
@@ -115,32 +116,31 @@
     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
     <java.version>17</java.version>
     <maven.compiler.release>${java.version}</maven.compiler.release>
-    <maven.version>3.9.8</maven.version>
+    <maven.version>3.9.9</maven.version>
     <exec-maven-plugin.version>3.2.0</exec-maven-plugin.version>
     <sbt.project.name>spark</sbt.project.name>
-    <asm.version>9.7</asm.version>
-    <slf4j.version>2.0.13</slf4j.version>
-    <log4j.version>2.22.1</log4j.version>
+    <asm.version>9.7.1</asm.version>
+    <slf4j.version>2.0.16</slf4j.version>
+    <log4j.version>2.24.2</log4j.version>
     <!-- make sure to update IsolatedClientLoader whenever this version is changed -->
-    <hadoop.version>3.4.0</hadoop.version>
+    <hadoop.version>3.4.1</hadoop.version>
     <!-- SPARK-41247: When updating `protobuf.version`, also need to update `protoVersion` in `SparkBuild.scala` -->
-    <protobuf.version>3.25.3</protobuf.version>
+    <protobuf.version>4.28.3</protobuf.version>
     <protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
-    <yarn.version>${hadoop.version}</yarn.version>
-    <zookeeper.version>3.9.2</zookeeper.version>
-    <curator.version>5.7.0</curator.version>
+    <zookeeper.version>3.9.3</zookeeper.version>
+    <curator.version>5.7.1</curator.version>
     <hive.group>org.apache.hive</hive.group>
     <hive.classifier>core</hive.classifier>
     <!-- Version used in Maven Hive dependency -->
     <hive.version>2.3.10</hive.version>
     <!-- note that this should be compatible with Kafka brokers version 0.10 and up -->
-    <kafka.version>3.7.1</kafka.version>
+    <kafka.version>3.9.0</kafka.version>
     <!-- After 10.17.1.0, the minimum required version is JDK19 -->
     <derby.version>10.16.1.1</derby.version>
-    <parquet.version>1.14.1</parquet.version>
-    <orc.version>2.0.1</orc.version>
+    <parquet.version>1.14.4</parquet.version>
+    <orc.version>2.0.3</orc.version>
     <orc.classifier>shaded-protobuf</orc.classifier>
-    <jetty.version>11.0.21</jetty.version>
+    <jetty.version>11.0.24</jetty.version>
     <jakartaservlet.version>5.0.0</jakartaservlet.version>
     <!-- SPARK-46938: Required by Hive / LibThrift libs -->
     <javaxservlet.version>4.0.1</javaxservlet.version>
@@ -151,9 +151,9 @@
     If you change codahale.metrics.version, you also need to change
     the link to metrics.dropwizard.io in docs/monitoring.md.
     -->
-    <codahale.metrics.version>4.2.26</codahale.metrics.version>
+    <codahale.metrics.version>4.2.29</codahale.metrics.version>
     <!-- Should be consistent with SparkBuild.scala and docs -->
-    <avro.version>1.11.3</avro.version>
+    <avro.version>1.12.0</avro.version>
     <aws.kinesis.client.version>1.12.0</aws.kinesis.client.version>
     <!-- Should be consistent with Kinesis client dependency -->
     <aws.java.sdk.version>1.11.655</aws.java.sdk.version>
@@ -161,7 +161,7 @@
     <!-- the producer is used in tests -->
     <aws.kinesis.producer.version>0.12.8</aws.kinesis.producer.version>
     <!-- Do not use 3.0.0: https://github.com/GoogleCloudDataproc/hadoop-connectors/issues/1114 -->
-    <gcs-connector.version>hadoop3-2.2.21</gcs-connector.version>
+    <gcs-connector.version>hadoop3-2.2.25</gcs-connector.version>
     <!--  org.apache.httpcomponents/httpclient-->
     <commons.httpclient.version>4.5.14</commons.httpclient.version>
     <commons.httpcore.version>4.4.16</commons.httpcore.version>
@@ -169,10 +169,10 @@
     <!-- managed up from 3.2.1 for SPARK-11652 -->
     <commons.collections.version>3.2.2</commons.collections.version>
     <commons.collections4.version>4.4</commons.collections4.version>
-    <scala.version>2.13.14</scala.version>
+    <scala.version>2.13.15</scala.version>
     <scala.binary.version>2.13</scala.binary.version>
     <scalatest-maven-plugin.version>2.2.0</scalatest-maven-plugin.version>
-    <scala-maven-plugin.version>4.9.1</scala-maven-plugin.version>
+    <scala-maven-plugin.version>4.9.2</scala-maven-plugin.version>
     <maven.scaladoc.skip>false</maven.scaladoc.skip>
     <versions-maven-plugin.version>2.16.2</versions-maven-plugin.version>
     <!-- for now, not running scalafmt as part of default verify pipeline -->
@@ -180,26 +180,26 @@
     <scalafmt.validateOnly>true</scalafmt.validateOnly>
     <scalafmt.changedOnly>true</scalafmt.changedOnly>
     <codehaus.jackson.version>1.9.13</codehaus.jackson.version>
-    <fasterxml.jackson.version>2.17.2</fasterxml.jackson.version>
-    <fasterxml.jackson.databind.version>2.17.2</fasterxml.jackson.databind.version>
+    <fasterxml.jackson.version>2.18.1</fasterxml.jackson.version>
+    <fasterxml.jackson.databind.version>2.18.1</fasterxml.jackson.databind.version>
     <ws.xmlschema.version>2.3.1</ws.xmlschema.version>
-    <org.glassfish.jaxb.txw2.version>3.0.2</org.glassfish.jaxb.txw2.version>
-    <snappy.version>1.1.10.5</snappy.version>
+    <snappy.version>1.1.10.7</snappy.version>
     <netlib.ludovic.dev.version>3.0.3</netlib.ludovic.dev.version>
     <commons-codec.version>1.17.1</commons-codec.version>
-    <commons-compress.version>1.26.2</commons-compress.version>
-    <commons-io.version>2.16.1</commons-io.version>
+    <commons-compress.version>1.27.1</commons-compress.version>
+    <commons-io.version>2.18.0</commons-io.version>
     <!-- To support Hive UDF jars built by Hive 2.0.0 ~ 2.3.9 and 3.0.0 ~ 3.1.3. -->
     <commons-lang2.version>2.6</commons-lang2.version>
     <!-- org.apache.commons/commons-lang3/-->
-    <commons-lang3.version>3.15.0</commons-lang3.version>
+    <commons-lang3.version>3.17.0</commons-lang3.version>
     <!-- org.apache.commons/commons-pool2/-->
     <commons-pool2.version>2.12.0</commons-pool2.version>
     <datanucleus-core.version>4.1.17</datanucleus-core.version>
-    <guava.version>14.0.1</guava.version>
+    <guava.version>33.2.1-jre</guava.version>
+    <gson.version>2.11.0</gson.version>
     <janino.version>3.1.9</janino.version>
-    <jersey.version>3.0.12</jersey.version>
-    <joda.version>2.12.7</joda.version>
+    <jersey.version>3.0.16</jersey.version>
+    <joda.version>2.13.0</joda.version>
     <jodd.version>3.5.2</jodd.version>
     <jsr305.version>3.0.0</jsr305.version>
     <jaxb.version>2.2.11</jaxb.version>
@@ -210,27 +210,33 @@
     <htmlunit3-driver.version>4.21.0</htmlunit3-driver.version>
     <maven-antrun.version>3.1.0</maven-antrun.version>
     <commons-crypto.version>1.1.0</commons-crypto.version>
-    <commons-cli.version>1.8.0</commons-cli.version>
-    <bouncycastle.version>1.78</bouncycastle.version>
-    <tink.version>1.13.0</tink.version>
-    <datasketches.version>6.0.0</datasketches.version>
-    <netty.version>4.1.110.Final</netty.version>
-    <netty-tcnative.version>2.0.65.Final</netty-tcnative.version>
-    <icu4j.version>75.1</icu4j.version>
-    <junit-jupiter.version>5.9.3</junit-jupiter.version>
-    <junit-platform.version>1.9.3</junit-platform.version>
-    <sbt-jupiter-interface.version>0.11.1</sbt-jupiter-interface.version>
+    <commons-cli.version>1.9.0</commons-cli.version>
+    <bouncycastle.version>1.79</bouncycastle.version>
+    <tink.version>1.15.0</tink.version>
+    <datasketches.version>6.1.1</datasketches.version>
+    <netty.version>4.1.114.Final</netty.version>
+    <netty-tcnative.version>2.0.69.Final</netty-tcnative.version>
+    <icu4j.version>76.1</icu4j.version>
+    <junit-jupiter.version>5.11.3</junit-jupiter.version>
+    <junit-platform.version>1.11.3</junit-platform.version>
+    <!--
+      SPARK-50299: When updating `sbt-jupiter-interface.version`,
+      also need to update the version in `SparkBuild.scala` and `plugins.sbt`.
+    -->
+    <sbt-jupiter-interface.version>0.13.1</sbt-jupiter-interface.version>
     <!--
     If you are changing Arrow version specification, please check
     ./python/pyspark/sql/pandas/utils.py, ./python/packaging/classic/setup.py
     and ./python/packaging/connect/setup.py too.
     -->
-    <arrow.version>17.0.0</arrow.version>
-    <ammonite.version>3.0.0-M2</ammonite.version>
+    <arrow.version>18.1.0</arrow.version>
+    <ammonite.version>3.0.0</ammonite.version>
+    <jjwt.version>0.12.6</jjwt.version>
 
     <!-- org.fusesource.leveldbjni will be used except on arm64 platform. -->
     <leveldbjni.group>org.fusesource.leveldbjni</leveldbjni.group>
-    <kubernetes-client.version>6.13.1</kubernetes-client.version>
+    <kubernetes-client.version>6.13.4</kubernetes-client.version>
+    <okio.version>1.17.6</okio.version>
 
     <test.java.home>${java.home}</test.java.home>
 
@@ -275,6 +281,7 @@
     <orc.deps.scope>compile</orc.deps.scope>
     <parquet.deps.scope>compile</parquet.deps.scope>
     <parquet.test.deps.scope>test</parquet.test.deps.scope>
+    <jjwt.deps.scope>test</jjwt.deps.scope>
 
     <spark.yarn.isHadoopProvided>false</spark.yarn.isHadoopProvided>
 
@@ -290,8 +297,8 @@
     <!-- Version used in Connect -->
     <connect.guava.version>33.2.1-jre</connect.guava.version>
     <guava.failureaccess.version>1.0.2</guava.failureaccess.version>
-    <io.grpc.version>1.62.2</io.grpc.version>
-    <mima.version>1.1.3</mima.version>
+    <io.grpc.version>1.67.1</io.grpc.version>
+    <mima.version>1.1.4</mima.version>
     <tomcat.annotations.api.version>6.0.53</tomcat.annotations.api.version>
 
     <!-- Version used in Profiler -->
@@ -323,11 +330,13 @@
       -Dio.netty.tryReflectionSetAccessible=true
     </extraJavaTestArgs>
     <mariadb.java.client.version>2.7.12</mariadb.java.client.version>
-    <mysql.connector.version>9.0.0</mysql.connector.version>
-    <postgresql.version>42.7.3</postgresql.version>
+    <mysql.connector.version>9.1.0</mysql.connector.version>
+    <postgresql.version>42.7.4</postgresql.version>
     <db2.jcc.version>11.5.9.0</db2.jcc.version>
-    <mssql.jdbc.version>12.6.3.jre11</mssql.jdbc.version>
-    <ojdbc11.version>23.4.0.24.05</ojdbc11.version>
+    <mssql.jdbc.version>12.8.1.jre11</mssql.jdbc.version>
+    <ojdbc17.version>23.6.0.24.10</ojdbc17.version>
+    <!-- Used for SBT build to retrieve the Spark version -->
+    <spark.version>${project.version}</spark.version>
   </properties>
   <repositories>
     <repository>
@@ -434,7 +443,7 @@
       <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>net.aichler</groupId>
+      <groupId>com.github.sbt.junit</groupId>
       <artifactId>jupiter-interface</artifactId>
       <scope>test</scope>
     </dependency>
@@ -485,7 +494,7 @@
       <dependency>
         <groupId>org.apache.xbean</groupId>
         <artifactId>xbean-asm9-shaded</artifactId>
-        <version>4.25</version>
+        <version>4.26</version>
       </dependency>
 
       <!-- Shaded deps marked as provided. These are promoted to compile scope
@@ -572,6 +581,18 @@
 
       <!-- End of shaded deps -->
 
+      <dependency>
+        <groupId>com.google.code.gson</groupId>
+        <artifactId>gson</artifactId>
+        <version>${gson.version}</version>
+        <exclusions>
+          <exclusion>
+            <groupId>com.google.errorprone</groupId>
+            <artifactId>error_prone_annotations</artifactId>
+          </exclusion>
+        </exclusions>
+      </dependency>
+
       <!-- Provide a JAXB impl; no longer auto available in Java 9+ in the JDK -->
       <dependency>
         <groupId>org.glassfish.jaxb</groupId>
@@ -662,6 +683,23 @@
         <artifactId>ivy</artifactId>
         <version>${ivy.version}</version>
       </dependency>
+      <dependency>
+        <groupId>io.jsonwebtoken</groupId>
+        <artifactId>jjwt-api</artifactId>
+        <version>${jjwt.version}</version>
+      </dependency>
+      <dependency>
+        <groupId>io.jsonwebtoken</groupId>
+        <artifactId>jjwt-impl</artifactId>
+        <version>${jjwt.version}</version>
+        <scope>${jjwt.deps.scope}</scope>
+      </dependency>
+      <dependency>
+        <groupId>io.jsonwebtoken</groupId>
+        <artifactId>jjwt-jackson</artifactId>
+        <version>${jjwt.version}</version>
+        <scope>${jjwt.deps.scope}</scope>
+      </dependency>
       <dependency>
         <groupId>com.google.code.findbugs</groupId>
         <artifactId>jsr305</artifactId>
@@ -691,7 +729,7 @@
       <dependency>
         <groupId>org.rocksdb</groupId>
         <artifactId>rocksdbjni</artifactId>
-        <version>9.2.1</version>
+        <version>9.7.3</version>
       </dependency>
       <dependency>
         <groupId>${leveldbjni.group}</groupId>
@@ -801,7 +839,7 @@
       <dependency>
         <groupId>com.github.luben</groupId>
         <artifactId>zstd-jni</artifactId>
-        <version>1.5.6-4</version>
+        <version>1.5.6-8</version>
       </dependency>
       <dependency>
         <groupId>com.clearspring.analytics</groupId>
@@ -830,7 +868,7 @@
       <dependency>
         <groupId>org.roaringbitmap</groupId>
         <artifactId>RoaringBitmap</artifactId>
-        <version>1.1.0</version>
+        <version>1.3.0</version>
       </dependency>
 
       <!-- Netty Begin -->
@@ -1025,11 +1063,6 @@
         <artifactId>xmlschema-core</artifactId>
         <version>${ws.xmlschema.version}</version>
       </dependency>
-      <dependency>
-        <groupId>org.glassfish.jaxb</groupId>
-        <artifactId>txw2</artifactId>
-        <version>${org.glassfish.jaxb.txw2.version}</version>
-      </dependency>
       <dependency>
         <groupId>org.glassfish.jersey.core</groupId>
         <artifactId>jersey-server</artifactId>
@@ -1085,11 +1118,6 @@
         <artifactId>jersey-client</artifactId>
         <version>${jersey.version}</version>
       </dependency>
-      <dependency>
-        <groupId>javax.ws.rs</groupId>
-        <artifactId>javax.ws.rs-api</artifactId>
-        <version>2.0.1</version>
-      </dependency>
       <dependency>
         <groupId>javax.xml.bind</groupId>
         <artifactId>jaxb-api</artifactId>
@@ -1256,7 +1284,7 @@
         <scope>test</scope>
       </dependency>
       <dependency>
-        <groupId>net.aichler</groupId>
+        <groupId>com.github.sbt.junit</groupId>
         <artifactId>jupiter-interface</artifactId>
         <version>${sbt-jupiter-interface.version}</version>
         <scope>test</scope>
@@ -1319,8 +1347,8 @@
       </dependency>
       <dependency>
         <groupId>com.oracle.database.jdbc</groupId>
-        <artifactId>ojdbc11</artifactId>
-        <version>${ojdbc11.version}</version>
+        <artifactId>ojdbc17</artifactId>
+        <version>${ojdbc17.version}</version>
         <scope>test</scope>
       </dependency>
       <dependency>
@@ -1389,96 +1417,10 @@
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-client-minicluster</artifactId>
-        <version>${yarn.version}</version>
+        <version>${hadoop.version}</version>
         <scope>test</scope>
       </dependency>
       <!-- End of Hadoop 3.x dependencies -->
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-client</artifactId>
-        <version>${hadoop.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>org.fusesource.leveldbjni</groupId>
-            <artifactId>leveldbjni-all</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>io.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <!-- BeanUtils >= 1.9.0 no longer splits out -core; exclude it -->
-            <groupId>commons-beanutils</groupId>
-            <artifactId>commons-beanutils-core</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.mockito</groupId>
-            <artifactId>mockito-all</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.mortbay.jetty</groupId>
-            <artifactId>servlet-api-2.5</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>net.java.dev.jets3t</groupId>
-            <artifactId>jets3t</artifactId>
-          </exclusion>
-          <!-- Hadoop-3.x -->
-          <exclusion>
-            <groupId>javax.ws.rs</groupId>
-            <artifactId>jsr311-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.eclipse.jetty</groupId>
-            <artifactId>jetty-webapp</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-minikdc</artifactId>
@@ -1519,16 +1461,6 @@
         <version>${bouncycastle.version}</version>
         <scope>test</scope>
       </dependency>
-      <!-- Managed up to match Hadoop in HADOOP-16530 -->
-      <!--
-        When upgrading `xercesImpl` version, also need to change
-        the version definition in `SparkBuild#DependencyOverrides`.
-      -->
-      <dependency>
-        <groupId>xerces</groupId>
-        <artifactId>xercesImpl</artifactId>
-        <version>2.12.2</version>
-      </dependency>
       <dependency>
         <groupId>org.apache.avro</groupId>
         <artifactId>avro</artifactId>
@@ -1599,7 +1531,7 @@
       <dependency>
         <groupId>org.tukaani</groupId>
         <artifactId>xz</artifactId>
-        <version>1.9</version>
+        <version>1.10</version>
       </dependency>
       <!-- See SPARK-23654 for info on this dependency;
       It is used to keep javax.activation at v1.1.1 after dropping
@@ -1611,207 +1543,6 @@
         <version>1.1.1</version>
         <scope>${hadoop.deps.scope}</scope>
       </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-api</artifactId>
-        <version>${yarn.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>jdk.tools</groupId>
-            <artifactId>jdk.tools</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-common</artifactId>
-        <version>${yarn.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-server-tests</artifactId>
-        <version>${yarn.version}</version>
-        <classifier>tests</classifier>
-        <scope>test</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>org.fusesource.leveldbjni</groupId>
-            <artifactId>leveldbjni-all</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.apache.hadoop</groupId>
-            <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
-      <!--
-        Hack to exclude org.apache.hadoop:hadoop-yarn-server-resourcemanager:jar:tests.
-        For some reasons, SBT starts to pull the dependencies of 'hadoop-yarn-server-tests' above
-        with 'tests' classifier after upgrading SBT 1.3 (SPARK-21708). Otherwise, some tests might
-        fail, see also SPARK-33104.
-      -->
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-server-resourcemanager</artifactId>
-        <version>${yarn.version}</version>
-        <scope>test</scope>
-      </dependency>
-      <dependency>
-        <groupId>org.apache.hadoop</groupId>
-        <artifactId>hadoop-yarn-client</artifactId>
-        <version>${yarn.version}</version>
-        <scope>${hadoop.deps.scope}</scope>
-        <exclusions>
-          <exclusion>
-            <groupId>asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.ow2.asm</groupId>
-            <artifactId>asm</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.jboss.netty</groupId>
-            <artifactId>netty</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>javax.servlet</groupId>
-            <artifactId>servlet-api</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>commons-logging</groupId>
-            <artifactId>commons-logging</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.jersey-test-framework</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>com.sun.jersey.contribs</groupId>
-            <artifactId>*</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>log4j</groupId>
-            <artifactId>log4j</artifactId>
-          </exclusion>
-          <exclusion>
-            <groupId>org.slf4j</groupId>
-            <artifactId>slf4j-log4j12</artifactId>
-          </exclusion>
-        </exclusions>
-      </dependency>
       <dependency>
         <groupId>org.apache.zookeeper</groupId>
         <artifactId>zookeeper</artifactId>
@@ -2605,7 +2336,7 @@
       <dependency>
         <groupId>io.airlift</groupId>
         <artifactId>aircompressor</artifactId>
-        <version>0.27</version>
+        <version>2.0.2</version>
       </dependency>
       <dependency>
         <groupId>org.apache.orc</groupId>
@@ -2863,6 +2594,11 @@
         <artifactId>javax.servlet-api</artifactId>
         <version>${javaxservlet.version}</version>
       </dependency>
+      <dependency>
+        <groupId>com.squareup.okio</groupId>
+        <artifactId>okio</artifactId>
+        <version>${okio.version}</version>
+      </dependency>
     </dependencies>
   </dependencyManagement>
 
@@ -3000,6 +2736,9 @@
               <goals>
                 <goal>doc-jar</goal>
               </goals>
+              <configuration>
+                <skip>${maven.scaladoc.skip}</skip>
+              </configuration>
             </execution>
           </executions>
           <configuration>
@@ -3007,7 +2746,6 @@
             <checkMultipleScalaVersions>true</checkMultipleScalaVersions>
             <failOnMultipleScalaVersions>true</failOnMultipleScalaVersions>
             <recompileMode>incremental</recompileMode>
-            <skip>${maven.scaladoc.skip}</skip>
             <args>
               <arg>-unchecked</arg>
               <arg>-deprecation</arg>
@@ -3015,7 +2753,8 @@
               <arg>-explaintypes</arg>
               <arg>-release</arg>
               <arg>17</arg>
-              <arg>-Wconf:cat=deprecation:wv,any:e</arg>
+              <arg>-Wconf:any:e</arg>
+              <arg>-Wconf:cat=deprecation:wv</arg>
               <arg>-Wunused:imports</arg>
               <arg>-Wconf:cat=scaladoc:wv</arg>
               <arg>-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:e</arg>
@@ -3037,6 +2776,10 @@
                 reduce the cost of migration in subsequent versions.
               -->
               <arg>-Wconf:cat=deprecation&amp;msg=it will become a keyword in Scala 3:e</arg>
+              <!--
+                SPARK-49937 ban call the method `SparkThrowable#getErrorClass`
+              -->
+              <arg>-Wconf:cat=deprecation&amp;msg=method getErrorClass in trait SparkThrowable is deprecated:e</arg>
             </args>
             <jvmArgs>
               <jvmArg>-Xss128m</jvmArg>
@@ -3105,7 +2848,6 @@
               <spark.ui.showConsoleProgress>false</spark.ui.showConsoleProgress>
               <spark.unsafe.exceptionOnMemoryLeak>true</spark.unsafe.exceptionOnMemoryLeak>
               <spark.memory.debugFill>true</spark.memory.debugFill>
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>src</test.src.tables>
               <hive.conf.validation>false</hive.conf.validation>
@@ -3162,8 +2904,6 @@
               <spark.test.docker.removePulledImage>${spark.test.docker.removePulledImage}</spark.test.docker.removePulledImage>
               <!-- Needed by sql/hive tests. -->
               <test.src.tables>__not_used__</test.src.tables>
-              <!--SPARK-42934: Need by `OrcEncryptionSuite` -->
-              <spark.hadoop.hadoop.security.key.provider.path>test:///</spark.hadoop.hadoop.security.key.provider.path>
             </systemProperties>
             <tagsToExclude>${test.exclude.tags},${test.default.exclude.tags}</tagsToExclude>
             <tagsToInclude>${test.include.tags}</tagsToInclude>
@@ -3387,6 +3127,7 @@
             <includes>
               <include>org.spark-project.spark:unused</include>
               <include>com.google.guava:guava</include>
+              <include>com.google.guava:failureaccess</include>
               <include>org.jpmml:*</include>
             </includes>
           </artifactSet>
@@ -3484,7 +3225,7 @@
             -->
             <groupId>com.puppycrawl.tools</groupId>
             <artifactId>checkstyle</artifactId>
-            <version>10.17.0</version>
+            <version>10.20.0</version>
           </dependency>
         </dependencies>
         <executions>
@@ -3795,6 +3536,9 @@
     <profile>
       <id>sparkr</id>
     </profile>
+    <profile>
+      <id>jjwt</id>
+    </profile>
     <!-- use org.openlabtesting.leveldbjni on aarch64 platform except MacOS -->
     <profile>
       <id>aarch64</id>
diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala
index 8cf872e4dd0f9..1c3e2f16cb0f8 100644
--- a/project/MimaExcludes.scala
+++ b/project/MimaExcludes.scala
@@ -100,8 +100,110 @@ object MimaExcludes {
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLContext#implicits._sqlContext"),
     ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits._sqlContext"),
     ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.session"),
-    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession#implicits._sqlContext")
-  )
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SparkSession#implicits._sqlContext"),
+    // SPARK-48761: Add clusterBy() to CreateTableWriter.
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.CreateTableWriter.clusterBy"),
+    // SPARK-48900: Add `reason` string to all job / stage / job group cancellation calls
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.scheduler.JobWaiter.cancel"),
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.FutureAction.cancel"),
+    // SPARK-48901: Add clusterBy() to DataStreamWriter.
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.sql.DataStreamWriter.clusterBy"),
+    // SPARK-49027: A shared Column API
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ColumnName"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.TypedColumn"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.functions"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.functions$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.Aggregator"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.MutableAggregationBuffer"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedAggregateFunction"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.UserDefinedFunction"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.Window"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.Window$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.WindowSpec"),
+
+    // SPARK-49423: Consolidate Observation in sql/api
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Observation"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Observation$"),
+
+    // SPARK-49425: Create a shared DataFrameWriter interface.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameWriter"),
+
+    // SPARK-49284: Shared Catalog interface.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.CatalogMetadata"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Column"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Database"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Function"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.catalog.Table"),
+
+    // SPARK-49426: Shared DataFrameWriterV2
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.CreateTableWriter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DataFrameWriterV2"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.WriteConfigMethods"),
+
+    // SPARK-49424: Shared Encoders
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Encoders"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.Encoders$"),
+
+    // SPARK-49413: Create a shared RuntimeConfig interface.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.RuntimeConfig"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.RuntimeConfig$"),
+
+    // SPARK-49287: Shared Streaming interfaces
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.scheduler.SparkListenerEvent"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.ForeachWriter"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.SourceProgress"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.SourceProgress$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StateOperatorProgress"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StateOperatorProgress$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$Event"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$QueryIdleEvent"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$QueryProgressEvent"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$QueryStartedEvent"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryListener$QueryTerminatedEvent"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.streaming.StreamingQueryStatus"),
+
+    // SPARK-49415: Shared SQLImplicits.
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DatasetHolder"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.DatasetHolder$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.LowPrioritySQLImplicits"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SQLContext$implicits$"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SQLImplicits"),
+    ProblemFilters.exclude[IncompatibleResultTypeProblem]("org.apache.spark.sql.SQLImplicits.StringToColumn"),
+    ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.sql.SQLImplicits.this"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.SQLImplicits$StringToColumn"),
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession$implicits$"),
+    ProblemFilters.exclude[InheritedNewAbstractMethodProblem]("org.apache.spark.sql.SQLImplicits.session"),
+
+    // SPARK-49282: Shared SparkSessionBuilder
+    ProblemFilters.exclude[MissingTypesProblem]("org.apache.spark.sql.SparkSession$Builder"),
+
+    // SPARK-49286: Avro/Protobuf functions in sql/api
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.avro.functions"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.avro.functions$"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.protobuf.functions"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.protobuf.functions$"),
+
+    // SPARK-49434: Move aggregators to sql/api
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.javalang.typed"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.scalalang.typed"),
+    ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.expressions.scalalang.typed$"),
+
+    // SPARK-49418: Consolidate thread local handling in sql/api
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.SparkSession.setActiveSession"),
+    ProblemFilters.exclude[IncompatibleMethTypeProblem]("org.apache.spark.sql.SparkSession.setDefaultSession"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.api.SparkSessionCompanion.clearActiveSession"),
+    ProblemFilters.exclude[DirectAbstractMethodProblem]("org.apache.spark.sql.api.SparkSessionCompanion.clearDefaultSession"),
+
+    // SPARK-49748: Add getCondition and deprecate getErrorClass in SparkThrowable
+    ProblemFilters.exclude[ReversedMissingMethodProblem]("org.apache.spark.SparkThrowable.getCondition"),
+
+    // SPARK-50112: Moving avro files from connector to sql/core
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.avro.*"),
+  ) ++ loggingExcludes("org.apache.spark.sql.DataFrameReader") ++
+    loggingExcludes("org.apache.spark.sql.streaming.DataStreamReader") ++
+    loggingExcludes("org.apache.spark.sql.SparkSession#Builder")
 
   // Default exclude rules
   lazy val defaultExcludes = Seq(
@@ -119,6 +221,8 @@ object MimaExcludes {
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.execution.*"),
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.internal.*"),
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.errors.*"),
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.classic.*"),
+    ProblemFilters.exclude[Problem]("org.apache.spark.sql.connect.*"),
     // DSv2 catalog and expression APIs are unstable yet. We should enable this back.
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.connector.catalog.*"),
     ProblemFilters.exclude[Problem]("org.apache.spark.sql.connector.expressions.*"),
@@ -140,6 +244,26 @@ object MimaExcludes {
     }
   )
 
+  private def loggingExcludes(fqn: String) = {
+    Seq(
+      ProblemFilters.exclude[MissingTypesProblem](fqn),
+      missingMethod(fqn, "logName"),
+      missingMethod(fqn, "log"),
+      missingMethod(fqn, "logInfo"),
+      missingMethod(fqn, "logDebug"),
+      missingMethod(fqn, "logTrace"),
+      missingMethod(fqn, "logWarning"),
+      missingMethod(fqn, "logError"),
+      missingMethod(fqn, "isTraceEnabled"),
+      missingMethod(fqn, "initializeLogIfNecessary"),
+      missingMethod(fqn, "initializeLogIfNecessary$default$2"),
+      missingMethod(fqn, "initializeForcefully"))
+  }
+
+  private def missingMethod(names: String*) = {
+    ProblemFilters.exclude[DirectMissingMethodProblem](names.mkString("."))
+  }
+
   def excludes(version: String): Seq[Problem => Boolean] = version match {
     case v if v.startsWith("4.0") => v40excludes
     case _ => Seq()
diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala
index 8d94ce1639e78..48b243618eea3 100644
--- a/project/SparkBuild.scala
+++ b/project/SparkBuild.scala
@@ -17,7 +17,7 @@
 
 import java.io._
 import java.nio.charset.StandardCharsets.UTF_8
-import java.nio.file.Files
+import java.nio.file.{Files, Paths, StandardCopyOption}
 import java.util.Locale
 
 import scala.io.Source
@@ -45,24 +45,24 @@ object BuildCommons {
 
   private val buildLocation = file(".").getAbsoluteFile.getParentFile
 
-  val sqlProjects@Seq(catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) = Seq(
-    "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10", "sql-kafka-0-10", "avro", "protobuf"
-  ).map(ProjectRef(buildLocation, _))
+  val sqlProjects@Seq(sqlApi, catalyst, sql, hive, hiveThriftServer, tokenProviderKafka010, sqlKafka010, avro, protobuf) =
+    Seq("sql-api", "catalyst", "sql", "hive", "hive-thriftserver", "token-provider-kafka-0-10",
+      "sql-kafka-0-10", "avro", "protobuf").map(ProjectRef(buildLocation, _))
 
   val streamingProjects@Seq(streaming, streamingKafka010) =
     Seq("streaming", "streaming-kafka-0-10").map(ProjectRef(buildLocation, _))
 
-  val connectCommon = ProjectRef(buildLocation, "connect-common")
-  val connect = ProjectRef(buildLocation, "connect")
-  val connectClient = ProjectRef(buildLocation, "connect-client-jvm")
+  val connectProjects@Seq(connectCommon, connect, connectClient, connectShims) =
+    Seq("connect-common", "connect", "connect-client-jvm", "connect-shims")
+      .map(ProjectRef(buildLocation, _))
 
   val allProjects@Seq(
     core, graphx, mllib, mllibLocal, repl, networkCommon, networkShuffle, launcher, unsafe, tags, sketch, kvstore,
-    commonUtils, sqlApi, variant, _*
+    commonUtils, variant, _*
   ) = Seq(
     "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe",
-    "tags", "sketch", "kvstore", "common-utils", "sql-api", "variant"
-  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ Seq(connectCommon, connect, connectClient)
+    "tags", "sketch", "kvstore", "common-utils", "variant"
+  ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects ++ connectProjects
 
   val optionallyEnabledProjects@Seq(kubernetes, yarn,
     sparkGangliaLgpl, streamingKinesisAsl,
@@ -89,9 +89,9 @@ object BuildCommons {
 
   // Google Protobuf version used for generating the protobuf.
   // SPARK-41247: needs to be consistent with `protobuf.version` in `pom.xml`.
-  val protoVersion = "3.25.3"
+  val protoVersion = "4.28.3"
   // GRPC version used for Spark Connect.
-  val grpcVersion = "1.62.2"
+  val grpcVersion = "1.67.1"
 }
 
 object SparkBuild extends PomBuild {
@@ -234,7 +234,11 @@ object SparkBuild extends PomBuild {
         // replace -Xfatal-warnings with fine-grained configuration, since 2.13.2
         // verbose warning on deprecation, error on all others
         // see `scalac -Wconf:help` for details
-        "-Wconf:cat=deprecation:wv,any:e",
+        // since 2.13.15, "-Wconf:cat=deprecation:wv,any:e" no longer takes effect and needs to
+        // be changed to "-Wconf:any:e", "-Wconf:cat=deprecation:wv",
+        // please refer to the details: https://github.com/scala/scala/pull/10708
+        "-Wconf:any:e",
+        "-Wconf:cat=deprecation:wv",
         // 2.13-specific warning hits to be muted (as narrowly as possible) and addressed separately
         "-Wunused:imports",
         "-Wconf:msg=^(?=.*?method|value|type|object|trait|inheritance)(?=.*?deprecated)(?=.*?since 2.13).+$:e",
@@ -250,7 +254,9 @@ object SparkBuild extends PomBuild {
         // reduce the cost of migration in subsequent versions.
         "-Wconf:cat=deprecation&msg=it will become a keyword in Scala 3:e",
         // SPARK-46938 to prevent enum scan on pmml-model, under spark-mllib module.
-        "-Wconf:cat=other&site=org.dmg.pmml.*:w"
+        "-Wconf:cat=other&site=org.dmg.pmml.*:w",
+        // SPARK-49937 ban call the method `SparkThrowable#getErrorClass`
+        "-Wconf:cat=deprecation&msg=method getErrorClass in trait SparkThrowable is deprecated:e"
       )
     }
   )
@@ -356,7 +362,7 @@ object SparkBuild extends PomBuild {
   /* Enable shared settings on all projects */
   (allProjects ++ optionallyEnabledProjects ++ assemblyProjects ++ copyJarsProjects ++ Seq(spark, tools))
     .foreach(enable(sharedSettings ++ DependencyOverrides.settings ++
-      ExcludedDependencies.settings ++ Checkstyle.settings))
+      ExcludedDependencies.settings ++ Checkstyle.settings ++ ExcludeShims.settings))
 
   /* Enable tests settings for all projects except examples, assembly and tools */
   (allProjects ++ optionallyEnabledProjects).foreach(enable(TestSettings.settings))
@@ -365,7 +371,7 @@ object SparkBuild extends PomBuild {
     Seq(
       spark, hive, hiveThriftServer, repl, networkCommon, networkShuffle, networkYarn,
       unsafe, tags, tokenProviderKafka010, sqlKafka010, connectCommon, connect, connectClient,
-      variant
+      variant, connectShims
     ).contains(x)
   }
 
@@ -400,12 +406,12 @@ object SparkBuild extends PomBuild {
   enable(SparkUnidoc.settings)(spark)
 
   /* Enable unidoc only for the root spark connect client project */
-  enable(SparkConnectClientUnidoc.settings)(connectClient)
+  // enable(SparkConnectClientUnidoc.settings)(connectClient)
 
   /* Sql-api ANTLR generation settings */
   enable(SqlApi.settings)(sqlApi)
 
-  /* Spark SQL Core console settings */
+  /* Spark SQL Core settings */
   enable(SQL.settings)(sql)
 
   /* Hive console settings */
@@ -420,11 +426,6 @@ object SparkBuild extends PomBuild {
 
   enable(DockerIntegrationTests.settings)(dockerIntegrationTests)
 
-  if (!profiles.contains("volcano")) {
-    enable(Volcano.settings)(kubernetes)
-    enable(Volcano.settings)(kubernetesIntegrationTests)
-  }
-
   enable(KubernetesIntegrationTests.settings)(kubernetesIntegrationTests)
 
   enable(YARN.settings)(yarn)
@@ -1056,10 +1057,9 @@ object KubernetesIntegrationTests {
  * Overrides to work around sbt's dependency resolution being different from Maven's.
  */
 object DependencyOverrides {
-  lazy val guavaVersion = sys.props.get("guava.version").getOrElse("14.0.1")
+  lazy val guavaVersion = sys.props.get("guava.version").getOrElse("33.1.0-jre")
   lazy val settings = Seq(
     dependencyOverrides += "com.google.guava" % "guava" % guavaVersion,
-    dependencyOverrides += "xerces" % "xercesImpl" % "2.12.2",
     dependencyOverrides += "jline" % "jline" % "2.14.6",
     dependencyOverrides += "org.apache.avro" % "avro" % "1.11.3")
 }
@@ -1071,20 +1071,39 @@ object DependencyOverrides {
 object ExcludedDependencies {
   lazy val settings = Seq(
     libraryDependencies ~= { libs => libs.filterNot(_.name == "groovy-all") },
-    // SPARK-33705: Due to sbt compiler issues, it brings exclusions defined in maven pom back to
-    // the classpath directly and assemble test scope artifacts to assembly/target/scala-xx/jars,
-    // which is also will be added to the classpath of some unit tests that will build a subprocess
-    // to run `spark-submit`, e.g. HiveThriftServer2Test.
-    //
-    // These artifacts are for the jersey-1 API but Spark use jersey-2 ones, so it cause test
-    // flakiness w/ jar conflicts issues.
-    //
-    // Also jersey-1 is only used by yarn module(see resource-managers/yarn/pom.xml) for testing
-    // purpose only. Here we exclude them from the whole project scope and add them w/ yarn only.
     excludeDependencies ++= Seq(
-      ExclusionRule(organization = "com.sun.jersey"),
       ExclusionRule(organization = "ch.qos.logback"),
-      ExclusionRule("javax.ws.rs", "jsr311-api"))
+      ExclusionRule("javax.servlet", "javax.servlet-api"))
+  )
+}
+
+/**
+ * This excludes the spark-connect-shims module from a module when it is not part of the connect
+ * client dependencies.
+ */
+object ExcludeShims {
+  val shimmedProjects = Set("spark-sql-api", "spark-connect-common", "spark-connect-client-jvm")
+  val classPathFilter = TaskKey[Classpath => Classpath]("filter for classpath")
+  lazy val settings = Seq(
+    classPathFilter := {
+      if (!shimmedProjects(moduleName.value)) {
+        cp => cp.filterNot(_.data.name.contains("spark-connect-shims"))
+      } else {
+        identity _
+      }
+    },
+    Compile / internalDependencyClasspath :=
+      classPathFilter.value((Compile / internalDependencyClasspath).value),
+    Compile / internalDependencyAsJars :=
+      classPathFilter.value((Compile / internalDependencyAsJars).value),
+    Runtime / internalDependencyClasspath :=
+      classPathFilter.value((Runtime / internalDependencyClasspath).value),
+    Runtime / internalDependencyAsJars :=
+      classPathFilter.value((Runtime / internalDependencyAsJars).value),
+    Test / internalDependencyClasspath :=
+      classPathFilter.value((Test / internalDependencyClasspath).value),
+    Test / internalDependencyAsJars :=
+      classPathFilter.value((Test / internalDependencyAsJars).value),
   )
 }
 
@@ -1129,29 +1148,31 @@ object SqlApi {
 }
 
 object SQL {
+  import BuildCommons.protoVersion
   lazy val settings = Seq(
-    (console / initialCommands) :=
-      """
-        |import org.apache.spark.SparkContext
-        |import org.apache.spark.sql.SQLContext
-        |import org.apache.spark.sql.catalyst.analysis._
-        |import org.apache.spark.sql.catalyst.dsl._
-        |import org.apache.spark.sql.catalyst.errors._
-        |import org.apache.spark.sql.catalyst.expressions._
-        |import org.apache.spark.sql.catalyst.plans.logical._
-        |import org.apache.spark.sql.catalyst.rules._
-        |import org.apache.spark.sql.catalyst.util._
-        |import org.apache.spark.sql.execution
-        |import org.apache.spark.sql.functions._
-        |import org.apache.spark.sql.types._
-        |
-        |val sc = new SparkContext("local[*]", "dev-shell")
-        |val sqlContext = new SQLContext(sc)
-        |import sqlContext.implicits._
-        |import sqlContext._
-      """.stripMargin,
-    (console / cleanupCommands) := "sc.stop()"
-  )
+    // Setting version for the protobuf compiler. This has to be propagated to every sub-project
+    // even if the project is not using it.
+    PB.protocVersion := BuildCommons.protoVersion,
+    // For some reason the resolution from the imported Maven build does not work for some
+    // of these dependendencies that we need to shade later on.
+    libraryDependencies ++= {
+      Seq(
+        "com.google.protobuf" % "protobuf-java" % protoVersion % "protobuf"
+      )
+    },
+    (Compile / PB.targets) := Seq(
+      PB.gens.java -> (Compile / sourceManaged).value
+    )
+  ) ++ {
+    val sparkProtocExecPath = sys.props.get("spark.protoc.executable.path")
+    if (sparkProtocExecPath.isDefined) {
+      Seq(
+        PB.protocExecutable := file(sparkProtocExecPath.get)
+      )
+    } else {
+      Seq.empty
+    }
+  }
 }
 
 object Hive {
@@ -1162,28 +1183,21 @@ object Hive {
     // Hive tests need higher metaspace size
     (Test / javaOptions) := (Test / javaOptions).value.filterNot(_.contains("MaxMetaspaceSize")),
     (Test / javaOptions) += "-XX:MaxMetaspaceSize=2g",
+    // SPARK-45265: HivePartitionFilteringSuite addPartitions related tests generate supper long
+    // direct sql against derby server, which may cause stack overflow error when derby do sql
+    // parsing.
+    // We need to increase the Xss for the test. Meanwhile, QueryParsingErrorsSuite requires a
+    // smaller size of Xss to mock a FAILED_TO_PARSE_TOO_COMPLEX error, so we need to set for
+    // hive moudle specifically.
+    (Test / javaOptions) := (Test / javaOptions).value.filterNot(_.contains("Xss")),
+    // SPARK-45265: The value for `-Xss` should be consistent with the configuration value for
+    // `scalatest-maven-plugin` in `sql/hive/pom.xml`
+    (Test / javaOptions) += "-Xss64m",
     // Supporting all SerDes requires us to depend on deprecated APIs, so we turn off the warnings
     // only for this subproject.
     scalacOptions := (scalacOptions map { currentOpts: Seq[String] =>
       currentOpts.filterNot(_ == "-deprecation")
     }).value,
-    (console / initialCommands) :=
-      """
-        |import org.apache.spark.SparkContext
-        |import org.apache.spark.sql.catalyst.analysis._
-        |import org.apache.spark.sql.catalyst.dsl._
-        |import org.apache.spark.sql.catalyst.errors._
-        |import org.apache.spark.sql.catalyst.expressions._
-        |import org.apache.spark.sql.catalyst.plans.logical._
-        |import org.apache.spark.sql.catalyst.rules._
-        |import org.apache.spark.sql.catalyst.util._
-        |import org.apache.spark.sql.execution
-        |import org.apache.spark.sql.functions._
-        |import org.apache.spark.sql.hive._
-        |import org.apache.spark.sql.hive.test.TestHive._
-        |import org.apache.spark.sql.hive.test.TestHive.implicits._
-        |import org.apache.spark.sql.types._""".stripMargin,
-    (console / cleanupCommands) := "sparkContext.stop()",
     // Some of our log4j jars make it impossible to submit jobs from this JVM to Hive Map/Reduce
     // in order to generate golden files.  This is only required for developers who are adding new
     // new query tests.
@@ -1198,16 +1212,12 @@ object YARN {
   val hadoopProvidedProp = "spark.yarn.isHadoopProvided"
 
   lazy val settings = Seq(
-    excludeDependencies --= Seq(
-      ExclusionRule(organization = "com.sun.jersey"),
-      ExclusionRule("javax.servlet", "javax.servlet-api"),
-      ExclusionRule("javax.ws.rs", "jsr311-api")),
     Compile / unmanagedResources :=
       (Compile / unmanagedResources).value.filter(!_.getName.endsWith(s"$propFileName")),
     genConfigProperties := {
       val file = (Compile / classDirectory).value / s"org/apache/spark/deploy/yarn/$propFileName"
       val isHadoopProvided = SbtPomKeys.effectivePom.value.getProperties.get(hadoopProvidedProp)
-      IO.write(file, s"$hadoopProvidedProp = $isHadoopProvided")
+      sbt.IO.write(file, s"$hadoopProvidedProp = $isHadoopProvided")
     },
     Compile / copyResources := (Def.taskDyn {
       val c = (Compile / copyResources).value
@@ -1322,13 +1332,6 @@ object SparkR {
   )
 }
 
-object Volcano {
-  // Exclude all volcano file for Compile and Test
-  lazy val settings = Seq(
-    unmanagedSources / excludeFilter := HiddenFileFilter || "*Volcano*.scala"
-  )
-}
-
 trait SharedUnidocSettings {
 
   import BuildCommons._
@@ -1364,6 +1367,7 @@ trait SharedUnidocSettings {
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/util/kvstore")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalyst")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/connect/")))
+      .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/classic/")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/execution")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/hive")))
       .map(_.filterNot(_.getCanonicalPath.contains("org/apache/spark/sql/catalog/v2/utils")))
@@ -1379,6 +1383,7 @@ trait SharedUnidocSettings {
       .map(_.filterNot(_.data.getCanonicalPath.matches(""".*kafka-clients-0\.10.*""")))
       .map(_.filterNot(_.data.getCanonicalPath.matches(""".*kafka_2\..*-0\.10.*""")))
       .map(_.filterNot(_.data.getCanonicalPath.contains("apache-rat")))
+      .map(_.filterNot(_.data.getCanonicalPath.contains("connect-shims")))
   }
 
   val unidocSourceBase = settingKey[String]("Base URL of source links in Scaladoc.")
@@ -1421,6 +1426,7 @@ trait SharedUnidocSettings {
         "-tag", "constructor:X",
         "-tag", "todo:X",
         "-tag", "groupname:X",
+        "-tag", "inheritdoc",
         "--ignore-source-errors", "-notree"
       )
     },
@@ -1462,10 +1468,12 @@ object SparkUnidoc extends SharedUnidocSettings {
   lazy val settings = baseSettings ++ Seq(
     (ScalaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient,
+        connectShims, protobuf),
     (JavaUnidoc / unidoc / unidocProjectFilter) :=
       inAnyProject -- inProjects(OldDeps.project, repl, examples, tools, kubernetes,
-        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient, protobuf),
+        yarn, tags, streamingKafka010, sqlKafka010, connectCommon, connect, connectClient,
+        connectShims, protobuf),
   )
 }
 
@@ -1486,8 +1494,8 @@ object SparkConnectClientUnidoc extends SharedUnidocSettings {
   }
 
   lazy val settings = baseSettings ++ Seq(
-    (ScalaUnidoc / unidoc / unidocProjectFilter) := inProjects(connectClient, connectCommon),
-    (JavaUnidoc / unidoc / unidocProjectFilter) := inProjects(connectClient, connectCommon),
+    (ScalaUnidoc / unidoc / unidocProjectFilter) := inProjects(connectClient, connectCommon, sqlApi),
+    (JavaUnidoc / unidoc / unidocProjectFilter) := inProjects(connectClient, connectCommon, sqlApi),
   )
 }
 
@@ -1503,6 +1511,7 @@ object Checkstyle {
 }
 
 object CopyDependencies {
+  import scala.sys.process.Process
 
   val copyDeps = TaskKey[Unit]("copyDeps", "Copies needed dependencies to the build directory.")
   val destPath = (Compile / crossTarget) { _ / "jars"}
@@ -1533,12 +1542,11 @@ object CopyDependencies {
           if (jar.getName.contains("spark-connect-common") &&
             !SbtPomKeys.profiles.value.contains("noshade-connect")) {
             // Don't copy the spark connect common JAR as it is shaded in the spark connect.
+          } else if (jar.getName.contains("connect-client-jvm")) {
+            // Do not place Spark Connect client jars as it is not built-in.
           } else if (jar.getName.contains("spark-connect") &&
             !SbtPomKeys.profiles.value.contains("noshade-connect")) {
             Files.copy(fid.toPath, destJar.toPath)
-          } else if (jar.getName.contains("connect-client-jvm") &&
-            !SbtPomKeys.profiles.value.contains("noshade-connect-client-jvm")) {
-            Files.copy(fidClient.toPath, destJar.toPath)
           } else if (jar.getName.contains("spark-protobuf") &&
             !SbtPomKeys.profiles.value.contains("noshade-protobuf")) {
             Files.copy(fidProtobuf.toPath, destJar.toPath)
@@ -1546,6 +1554,40 @@ object CopyDependencies {
             Files.copy(jar.toPath(), destJar.toPath())
           }
         }
+
+      // Here we get the full classpathes required for Spark Connect client including
+      // ammonite, and exclude all spark-* jars. After that, we add the shaded Spark
+      // Connect client assembly manually.
+      Def.taskDyn {
+        if (moduleName.value.contains("assembly")) {
+          Def.task {
+            val replClasspathes = (LocalProject("connect-client-jvm") / Compile / dependencyClasspath)
+              .value.map(_.data).filter(_.isFile())
+            val scalaBinaryVer = SbtPomKeys.effectivePom.value.getProperties.get(
+              "scala.binary.version").asInstanceOf[String]
+            val sparkVer = SbtPomKeys.effectivePom.value.getProperties.get(
+              "spark.version").asInstanceOf[String]
+            val dest = destPath.value
+            val destDir = new File(dest, "connect-repl").toPath
+            Files.createDirectories(destDir)
+
+            val sourceAssemblyJar = Paths.get(
+              BuildCommons.sparkHome.getAbsolutePath, "connector", "connect", "client",
+              "jvm", "target", s"scala-$scalaBinaryVer", s"spark-connect-client-jvm-assembly-$sparkVer.jar")
+            val destAssemblyJar = Paths.get(destDir.toString, s"spark-connect-client-jvm-assembly-$sparkVer.jar")
+            Files.copy(sourceAssemblyJar, destAssemblyJar, StandardCopyOption.REPLACE_EXISTING)
+
+            replClasspathes.foreach { f =>
+              val destFile = Paths.get(destDir.toString, f.getName)
+              if (!f.getName.startsWith("spark-")) {
+                Files.copy(f.toPath, destFile, StandardCopyOption.REPLACE_EXISTING)
+              }
+            }
+          }.dependsOn(LocalProject("connect-client-jvm") / assembly)
+        } else {
+          Def.task {}
+        }
+      }.value
     },
     (Compile / packageBin / crossTarget) := destPath.value,
     (Compile / packageBin) := (Compile / packageBin).dependsOn(copyDeps).value
@@ -1599,7 +1641,6 @@ object TestSettings {
     (Test / javaOptions) += "-Dspark.ui.enabled=false",
     (Test / javaOptions) += "-Dspark.ui.showConsoleProgress=false",
     (Test / javaOptions) += "-Dspark.unsafe.exceptionOnMemoryLeak=true",
-    (Test / javaOptions) += "-Dspark.hadoop.hadoop.security.key.provider.path=test:///",
     (Test / javaOptions) += "-Dhive.conf.validation=false",
     (Test / javaOptions) += "-Dsun.io.serialization.extendedDebugInfo=false",
     (Test / javaOptions) += "-Dderby.system.durability=test",
@@ -1683,7 +1724,7 @@ object TestSettings {
     (Test / testOptions) += Tests.Argument(TestFrameworks.ScalaTest, "-W", "120", "300"),
     (Test / testOptions) += Tests.Argument(TestFrameworks.JUnit, "-v", "-a"),
     // Enable Junit testing.
-    libraryDependencies += "net.aichler" % "jupiter-interface" % "0.11.1" % "test",
+    libraryDependencies += "com.github.sbt.junit" % "jupiter-interface" % "0.13.1" % "test",
     // `parallelExecutionInTest` controls whether test suites belonging to the same SBT project
     // can run in parallel with one another. It does NOT control whether tests execute in parallel
     // within the same JVM (which is controlled by `testForkedParallel`) or whether test cases
diff --git a/project/plugins.sbt b/project/plugins.sbt
index 98170afd84759..8ae03c0995132 100644
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -19,31 +19,31 @@ addSbtPlugin("software.purpledragon" % "sbt-checkstyle-plugin" % "4.0.1")
 
 // If you are changing the dependency setting for checkstyle plugin,
 // please check pom.xml in the root of the source tree too.
-libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.17.0"
+libraryDependencies += "com.puppycrawl.tools" % "checkstyle" % "10.20.0"
 
 // checkstyle uses guava 33.1.0-jre.
 libraryDependencies += "com.google.guava" % "guava" % "33.1.0-jre"
 
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.2.0")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "2.3.0")
 
 addSbtPlugin("com.github.sbt" % "sbt-eclipse" % "6.2.0")
 
 addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0")
 
-addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.3")
+addSbtPlugin("com.typesafe" % "sbt-mima-plugin" % "1.1.4")
 
 addSbtPlugin("com.github.sbt" % "sbt-unidoc" % "0.5.0")
 
 addSbtPlugin("io.spray" % "sbt-revolver" % "0.10.0")
 
-libraryDependencies += "org.ow2.asm"  % "asm" % "9.7"
+libraryDependencies += "org.ow2.asm"  % "asm" % "9.7.1"
 
-libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.7"
+libraryDependencies += "org.ow2.asm"  % "asm-commons" % "9.7.1"
 
 addSbtPlugin("com.simplytyped" % "sbt-antlr4" % "0.8.3")
 
 addSbtPlugin("com.github.sbt" % "sbt-pom-reader" % "2.4.0")
 
-addSbtPlugin("net.aichler" % "sbt-jupiter-interface" % "0.11.1")
+addSbtPlugin("com.github.sbt.junit" % "sbt-jupiter-interface" % "0.13.1")
 
 addSbtPlugin("com.thesamet" % "sbt-protoc" % "1.0.7")
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 5058c1206171b..428b0d24b568e 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -16,7 +16,7 @@
 # Minimal makefile for Sphinx documentation
 
 # You can set these variables from the command line.
-SPHINXOPTS    ?= "-W" "-j" "auto"
+SPHINXOPTS    ?= "-W" "-j" "4"
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
diff --git a/python/docs/source/_static/spark-logo-dark.png b/python/docs/source/_static/spark-logo-dark.png
deleted file mode 100644
index 7460faec37fc7..0000000000000
Binary files a/python/docs/source/_static/spark-logo-dark.png and /dev/null differ
diff --git a/python/docs/source/_static/spark-logo-light.png b/python/docs/source/_static/spark-logo-light.png
deleted file mode 100644
index 41938560822ca..0000000000000
Binary files a/python/docs/source/_static/spark-logo-light.png and /dev/null differ
diff --git a/python/docs/source/conf.py b/python/docs/source/conf.py
index 66b985092faf1..5640ba151176d 100644
--- a/python/docs/source/conf.py
+++ b/python/docs/source/conf.py
@@ -205,8 +205,8 @@
     "navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"],
     "footer_start": ["spark_footer", "sphinx-version"],
     "logo": {
-        "image_light": "_static/spark-logo-light.png",
-        "image_dark": "_static/spark-logo-dark.png",
+        "image_light": "https://spark.apache.org/images/spark-logo.png",
+        "image_dark": "https://spark.apache.org/images/spark-logo-rev.svg",
     },
     "icon_links": [
         {
@@ -234,7 +234,7 @@
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-html_logo = "../../../docs/img/spark-logo-reverse.png"
+html_logo = "https://spark.apache.org/images/spark-logo-rev.svg"
 
 # The name of an image file (within the static path) to use as a favicon of the
 # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
diff --git a/python/docs/source/development/contributing.rst b/python/docs/source/development/contributing.rst
index d7e87c4de390e..7ec873b53ae60 100644
--- a/python/docs/source/development/contributing.rst
+++ b/python/docs/source/development/contributing.rst
@@ -279,8 +279,8 @@ Throw with arbitrary error message:
 .. code-block:: python
 
   class PySparkTestError(PySparkException):
-      def __init__(self, error_class: str, message_parameters: Dict[str, str]):
-          super().__init__(error_class=error_class, message_parameters=message_parameters)
+      def __init__(self, errorClass: str, messageParameters: Dict[str, str]):
+          super().__init__(errorClass=errorClass, messageParameters=messageParameters)
   
       def getMessageParameters(self) -> Optional[Dict[str, str]]:
           return super().getMessageParameters()
diff --git a/python/docs/source/development/logger.rst b/python/docs/source/development/logger.rst
index d809dbf728508..6160bf814740f 100644
--- a/python/docs/source/development/logger.rst
+++ b/python/docs/source/development/logger.rst
@@ -50,9 +50,9 @@ Example log entry:
       "msg": "[DIVIDE_BY_ZERO] Division by zero. Use `try_divide` to tolerate divisor being 0 and return NULL instead. If necessary set \"spark.sql.ansi.enabled\" to \"false\" to bypass this error. SQLSTATE: 22012\n== DataFrame ==\n\"divide\" was called from\n/.../spark/python/test_error_context.py:17\n",
       "context": {
         "file": "/path/to/file.py",
-        "line_no": "17",
+        "line": "17",
         "fragment": "divide"
-        "error_class": "DIVIDE_BY_ZERO"
+        "errorClass": "DIVIDE_BY_ZERO"
       },
       "exception": {
         "class": "Py4JJavaError",
diff --git a/python/docs/source/development/testing.rst b/python/docs/source/development/testing.rst
index c2737371c9b46..e4d0407b414de 100644
--- a/python/docs/source/development/testing.rst
+++ b/python/docs/source/development/testing.rst
@@ -69,7 +69,7 @@ Running Tests for Python Client
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In order to test the changes in Protobuf definitions, for example, at
-`spark/connect/common/src/main/protobuf/spark/connect <https://github.com/apache/spark/tree/master/connect/common/src/main/protobuf/spark/connect>`_,
+`spark/sql/connect/common/src/main/protobuf/spark/connect <https://github.com/apache/spark/tree/master/sql/connect/common/src/main/protobuf/spark/connect>`_,
 you should regenerate Python Protobuf client first by running ``dev/connect-gen-protos.sh``.
 
 
diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst
index 549656bea103e..d0dc285b5257c 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -183,6 +183,7 @@ Package                    Supported version         Note
 Additional libraries that enhance functionality but are not included in the installation packages:
 
 - **memory-profiler**: Used for PySpark UDF memory profiling, ``spark.profile.show(...)`` and ``spark.sql.pyspark.udf.profiler``.
+- **plotly**: Used for PySpark plotting, ``DataFrame.plot``.
 
 Note that PySpark requires Java 17 or later with ``JAVA_HOME`` properly set and refer to |downloading|_.
 
@@ -207,9 +208,9 @@ Package                    Supported version Note
 ========================== ================= ==========================
 `pandas`                   >=2.0.0           Required for Spark Connect
 `pyarrow`                  >=10.0.0          Required for Spark Connect
-`grpcio`                   >=1.62.0          Required for Spark Connect
-`grpcio-status`            >=1.62.0          Required for Spark Connect
-`googleapis-common-protos` >=1.56.4          Required for Spark Connect
+`grpcio`                   >=1.67.0          Required for Spark Connect
+`grpcio-status`            >=1.67.0          Required for Spark Connect
+`googleapis-common-protos` >=1.65.0          Required for Spark Connect
 `graphviz`                 >=0.20            Optional for Spark Connect
 ========================== ================= ==========================
 
diff --git a/python/docs/source/getting_started/quickstart_connect.ipynb b/python/docs/source/getting_started/quickstart_connect.ipynb
index 74b77238c67fe..019e6c175a444 100644
--- a/python/docs/source/getting_started/quickstart_connect.ipynb
+++ b/python/docs/source/getting_started/quickstart_connect.ipynb
@@ -28,7 +28,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!$HOME/sbin/start-connect-server.sh"
+    "%%bash\n",
+    "source ~/.profile # Make sure environment variables are loaded.\n",
+    "$HOME/sbin/start-connect-server.sh"
    ]
   },
   {
diff --git a/python/docs/source/reference/pyspark.ml.rst b/python/docs/source/reference/pyspark.ml.rst
index 965cbe7eb5a57..f81498d3b5eae 100644
--- a/python/docs/source/reference/pyspark.ml.rst
+++ b/python/docs/source/reference/pyspark.ml.rst
@@ -104,6 +104,8 @@ Feature
     StopWordsRemover
     StringIndexer
     StringIndexerModel
+    TargetEncoder
+    TargetEncoderModel
     Tokenizer
     UnivariateFeatureSelector
     UnivariateFeatureSelectorModel
diff --git a/python/docs/source/reference/pyspark.pandas/frame.rst b/python/docs/source/reference/pyspark.pandas/frame.rst
index 336fd262f611e..ccecb360a5293 100644
--- a/python/docs/source/reference/pyspark.pandas/frame.rst
+++ b/python/docs/source/reference/pyspark.pandas/frame.rst
@@ -326,14 +326,15 @@ specific plotting methods of the form ``DataFrame.plot.<kind>``.
    :template: autosummary/accessor_method.rst
 
    DataFrame.plot.area
-   DataFrame.plot.barh
    DataFrame.plot.bar
-   DataFrame.plot.hist
+   DataFrame.plot.barh
    DataFrame.plot.box
+   DataFrame.plot.density
+   DataFrame.plot.hist
+   DataFrame.plot.kde
    DataFrame.plot.line
    DataFrame.plot.pie
    DataFrame.plot.scatter
-   DataFrame.plot.density
 
 .. autosummary::
    :toctree: api/
diff --git a/python/docs/source/reference/pyspark.pandas/io.rst b/python/docs/source/reference/pyspark.pandas/io.rst
index fd41a03699cac..7f7a39e981932 100644
--- a/python/docs/source/reference/pyspark.pandas/io.rst
+++ b/python/docs/source/reference/pyspark.pandas/io.rst
@@ -105,6 +105,7 @@ JSON
 .. autosummary::
    :toctree: api/
 
+   json_normalize
    read_json
    DataFrame.to_json
 
diff --git a/python/docs/source/reference/pyspark.pandas/series.rst b/python/docs/source/reference/pyspark.pandas/series.rst
index 5606fa93a5f38..67dc3582c27dc 100644
--- a/python/docs/source/reference/pyspark.pandas/series.rst
+++ b/python/docs/source/reference/pyspark.pandas/series.rst
@@ -461,9 +461,9 @@ specific plotting methods of the form ``Series.plot.<kind>``.
    Series.plot.box
    Series.plot.density
    Series.plot.hist
+   Series.plot.kde
    Series.plot.line
    Series.plot.pie
-   Series.plot.kde
 
 .. autosummary::
    :toctree: api/
diff --git a/python/docs/source/reference/pyspark.rst b/python/docs/source/reference/pyspark.rst
index 9a6fbb651716f..798cda64d4d09 100644
--- a/python/docs/source/reference/pyspark.rst
+++ b/python/docs/source/reference/pyspark.rst
@@ -74,6 +74,7 @@ Spark Context APIs
     SparkContext.getJobTags
     SparkContext.getLocalProperty
     SparkContext.getOrCreate
+    SparkContext.getSystemProperty
     SparkContext.hadoopFile
     SparkContext.hadoopRDD
     SparkContext.listArchives
diff --git a/python/docs/source/reference/pyspark.sql/dataframe.rst b/python/docs/source/reference/pyspark.sql/dataframe.rst
index a7349c98f5803..569c5cec69557 100644
--- a/python/docs/source/reference/pyspark.sql/dataframe.rst
+++ b/python/docs/source/reference/pyspark.sql/dataframe.rst
@@ -84,6 +84,7 @@ DataFrame
     DataFrame.offset
     DataFrame.orderBy
     DataFrame.persist
+    DataFrame.plot
     DataFrame.printSchema
     DataFrame.randomSplit
     DataFrame.rdd
diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst
index 7585448204f60..430e353dd701c 100644
--- a/python/docs/source/reference/pyspark.sql/functions.rst
+++ b/python/docs/source/reference/pyspark.sql/functions.rst
@@ -48,9 +48,11 @@ Conditional Functions
     ifnull
     nanvl
     nullif
+    nullifzero
     nvl
     nvl2
     when
+    zeroifnull
 
 
 Predicate Functions
@@ -146,6 +148,7 @@ Mathematical Functions
     try_multiply
     try_subtract
     unhex
+    uniform
     width_bucket
 
 
@@ -174,6 +177,7 @@ String Functions
     format_string
     initcap
     instr
+    is_valid_utf8
     lcase
     left
     length
@@ -182,11 +186,13 @@ String Functions
     lower
     lpad
     ltrim
+    make_valid_utf8
     mask
     octet_length
     overlay
     position
     printf
+    randstr
     regexp_count
     regexp_extract
     regexp_extract_all
@@ -214,9 +220,11 @@ String Functions
     trim
     try_to_binary
     try_to_number
+    try_validate_utf8
     ucase
     unbase64
     upper
+    validate_utf8
 
 
 Bitwise Functions
@@ -293,6 +301,10 @@ Date and Timestamp Functions
     to_unix_timestamp
     to_utc_timestamp
     trunc
+    try_make_interval
+    try_make_timestamp
+    try_make_timestamp_ltz
+    try_make_timestamp_ntz
     try_to_timestamp
     unix_date
     unix_micros
@@ -551,6 +563,7 @@ VARIANT Functions
     try_variant_get
     variant_get
     try_parse_json
+    to_variant_object
 
 
 XML Functions
@@ -578,6 +591,7 @@ URL Functions
     :toctree: api/
 
     parse_url
+    try_parse_url
     url_decode
     url_encode
     try_url_decode
diff --git a/python/docs/source/reference/pyspark.sql/grouping.rst b/python/docs/source/reference/pyspark.sql/grouping.rst
index e6db9be39e59d..df99f718885a5 100644
--- a/python/docs/source/reference/pyspark.sql/grouping.rst
+++ b/python/docs/source/reference/pyspark.sql/grouping.rst
@@ -37,5 +37,6 @@ Grouping
     GroupedData.min
     GroupedData.pivot
     GroupedData.sum
+    GroupedData.transformWithStateInPandas
     PandasCogroupedOps.applyInArrow
     PandasCogroupedOps.applyInPandas
diff --git a/python/docs/source/reference/pyspark.sql/index.rst b/python/docs/source/reference/pyspark.sql/index.rst
index 93901ab7ce12e..36618af2de2c2 100644
--- a/python/docs/source/reference/pyspark.sql/index.rst
+++ b/python/docs/source/reference/pyspark.sql/index.rst
@@ -44,3 +44,4 @@ This page gives an overview of all public Spark SQL API.
     variant_val
     protobuf
     datasource
+    stateful_processor
diff --git a/python/docs/source/reference/pyspark.sql/spark_session.rst b/python/docs/source/reference/pyspark.sql/spark_session.rst
index 4e679da59c163..1677d3e8e0209 100644
--- a/python/docs/source/reference/pyspark.sql/spark_session.rst
+++ b/python/docs/source/reference/pyspark.sql/spark_session.rst
@@ -44,13 +44,17 @@ See also :class:`SparkSession`.
 .. autosummary::
     :toctree: api/
 
+    SparkSession.addTag
     SparkSession.catalog
+    SparkSession.clearTags
     SparkSession.conf
     SparkSession.createDataFrame
     SparkSession.dataSource
     SparkSession.getActiveSession
+    SparkSession.getTags
     SparkSession.newSession
     SparkSession.profile
+    SparkSession.removeTag
     SparkSession.range
     SparkSession.read
     SparkSession.readStream
@@ -59,6 +63,7 @@ See also :class:`SparkSession`.
     SparkSession.stop
     SparkSession.streams
     SparkSession.table
+    SparkSession.tvf
     SparkSession.udf
     SparkSession.udtf
     SparkSession.version
@@ -78,15 +83,11 @@ Spark Connect Only
 
     SparkSession.addArtifact
     SparkSession.addArtifacts
-    SparkSession.addTag
     SparkSession.clearProgressHandlers
-    SparkSession.clearTags
     SparkSession.client
     SparkSession.copyFromLocalToFs
-    SparkSession.getTags
     SparkSession.interruptAll
     SparkSession.interruptOperation
     SparkSession.interruptTag
     SparkSession.registerProgressHandler
     SparkSession.removeProgressHandler
-    SparkSession.removeTag
diff --git a/python/docs/source/reference/pyspark.sql/stateful_processor.rst b/python/docs/source/reference/pyspark.sql/stateful_processor.rst
new file mode 100644
index 0000000000000..1905b3332a8bc
--- /dev/null
+++ b/python/docs/source/reference/pyspark.sql/stateful_processor.rst
@@ -0,0 +1,30 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+
+==================
+Stateful Processor
+==================
+.. currentmodule:: pyspark.sql.streaming
+
+.. autosummary::
+    :toctree: api/
+
+    StatefulProcessor.init
+    StatefulProcessor.handleInputRows
+    StatefulProcessor.close
+    StatefulProcessor.handleInitialState
\ No newline at end of file
diff --git a/python/docs/source/user_guide/sql/python_data_source.rst b/python/docs/source/user_guide/sql/python_data_source.rst
index 342b6f685d0b4..22b2a0b5f3c7b 100644
--- a/python/docs/source/user_guide/sql/python_data_source.rst
+++ b/python/docs/source/user_guide/sql/python_data_source.rst
@@ -452,3 +452,72 @@ We can also use the same data source in streaming reader and writer
 .. code-block:: python
 
     query = spark.readStream.format("fake").load().writeStream.format("fake").start("/output_path")
+
+Python Data Source Reader with direct Arrow Batch support for improved performance
+----------------------------------------------------------------------------------
+The Python Datasource Reader supports direct yielding of Arrow Batches, which can significantly improve data processing performance. By using the efficient Arrow format,
+this feature avoids the overhead of traditional row-by-row data processing, resulting in performance improvements of up to one order of magnitude, especially with large datasets.
+
+**Enabling Arrow Batch Support**:
+To enable this feature, configure your custom DataSource to yield Arrow batches by returning `pyarrow.RecordBatch` objects within the `read` method of your `DataSourceReader`
+(or `DataSourceStreamReader`) implementation. This method simplifies data handling and reduces the number of I/O operations, particularly beneficial for large-scale data processing tasks.
+
+**Arrow Batch Example**:
+The following example demonstrates how to implement a basic Data Source using Arrow Batch support.
+
+.. code-block:: python
+
+    from pyspark.sql.datasource import DataSource, DataSourceReader, InputPartition
+    from pyspark.sql import SparkSession
+    import pyarrow as pa
+
+    # Define the ArrowBatchDataSource
+    class ArrowBatchDataSource(DataSource):
+        """
+        A Data Source for testing Arrow Batch Serialization
+        """
+
+        @classmethod
+        def name(cls):
+            return "arrowbatch"
+
+        def schema(self):
+            return "key int, value string"
+
+        def reader(self, schema: str):
+            return ArrowBatchDataSourceReader(schema, self.options)
+
+    # Define the ArrowBatchDataSourceReader
+    class ArrowBatchDataSourceReader(DataSourceReader):
+        def __init__(self, schema, options):
+            self.schema: str = schema
+            self.options = options
+
+        def read(self, partition):
+            # Create Arrow Record Batch
+            keys = pa.array([1, 2, 3, 4, 5], type=pa.int32())
+            values = pa.array(["one", "two", "three", "four", "five"], type=pa.string())
+            schema = pa.schema([("key", pa.int32()), ("value", pa.string())])
+            record_batch = pa.RecordBatch.from_arrays([keys, values], schema=schema)
+            yield record_batch
+
+        def partitions(self):
+            # Define the number of partitions
+            num_part = 1
+            return [InputPartition(i) for i in range(num_part)]
+
+    # Initialize the Spark Session
+    spark = SparkSession.builder.appName("ArrowBatchExample").getOrCreate()
+
+    # Register the ArrowBatchDataSource
+    spark.dataSource.register(ArrowBatchDataSource)
+
+    # Load data using the custom data source
+    df = spark.read.format("arrowbatch").load()
+
+    df.show()
+
+Usage Notes
+-----------
+
+- During Data Source resolution, built-in and Scala/Java Data Sources take precedence over Python Data Sources with the same name; to explicitly use a Python Data Source, make sure its name does not conflict with the other Data Sources.
diff --git a/python/mypy.ini b/python/mypy.ini
index 4daa185933343..cb3595949e8d9 100644
--- a/python/mypy.ini
+++ b/python/mypy.ini
@@ -181,5 +181,5 @@ ignore_missing_imports = True
 ignore_missing_imports = True
 
 ; Ignore errors for proto generated code
-[mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto]
+[mypy-pyspark.sql.connect.proto.*, pyspark.sql.connect.proto, pyspark.sql.streaming.proto]
 ignore_errors = True
diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py
index 79b74483f00dd..d799af1216345 100755
--- a/python/packaging/classic/setup.py
+++ b/python/packaging/classic/setup.py
@@ -153,8 +153,8 @@ def _supports_symlinks():
 _minimum_pandas_version = "2.0.0"
 _minimum_numpy_version = "1.21"
 _minimum_pyarrow_version = "10.0.0"
-_minimum_grpc_version = "1.62.0"
-_minimum_googleapis_common_protos_version = "1.56.4"
+_minimum_grpc_version = "1.67.0"
+_minimum_googleapis_common_protos_version = "1.65.0"
 
 
 class InstallCommand(install):
@@ -288,6 +288,7 @@ def run(self):
             "pyspark.sql.connect.streaming.worker",
             "pyspark.sql.functions",
             "pyspark.sql.pandas",
+            "pyspark.sql.plot",
             "pyspark.sql.protobuf",
             "pyspark.sql.streaming",
             "pyspark.sql.worker",
@@ -373,6 +374,7 @@ def run(self):
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
             "Programming Language :: Python :: 3.12",
+            "Programming Language :: Python :: 3.13",
             "Programming Language :: Python :: Implementation :: CPython",
             "Programming Language :: Python :: Implementation :: PyPy",
             "Typing :: Typed",
diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py
index ab166c79747df..de76d51d0cfdc 100755
--- a/python/packaging/connect/setup.py
+++ b/python/packaging/connect/setup.py
@@ -77,6 +77,7 @@
         "pyspark.sql.tests.connect.client",
         "pyspark.sql.tests.connect.shell",
         "pyspark.sql.tests.pandas",
+        "pyspark.sql.tests.plot",
         "pyspark.sql.tests.streaming",
         "pyspark.ml.tests.connect",
         "pyspark.pandas.tests",
@@ -161,6 +162,7 @@
         "pyspark.sql.connect.streaming.worker",
         "pyspark.sql.functions",
         "pyspark.sql.pandas",
+        "pyspark.sql.plot",
         "pyspark.sql.protobuf",
         "pyspark.sql.streaming",
         "pyspark.sql.worker",
@@ -210,6 +212,7 @@
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
             "Programming Language :: Python :: 3.12",
+            "Programming Language :: Python :: 3.13",
             "Programming Language :: Python :: Implementation :: CPython",
             "Programming Language :: Python :: Implementation :: PyPy",
             "Typing :: Typed",
diff --git a/python/pyspark/accumulators.py b/python/pyspark/accumulators.py
index d205720bd8836..93a64d8eef10a 100644
--- a/python/pyspark/accumulators.py
+++ b/python/pyspark/accumulators.py
@@ -149,8 +149,8 @@ def value(self) -> T:
         """Get the accumulator's value; only usable in driver program"""
         if self._deserialized:
             raise PySparkRuntimeError(
-                error_class="VALUE_NOT_ACCESSIBLE",
-                message_parameters={
+                errorClass="VALUE_NOT_ACCESSIBLE",
+                messageParameters={
                     "value": "Accumulator.value",
                 },
             )
@@ -161,8 +161,8 @@ def value(self, value: T) -> None:
         """Sets the accumulator's value; only usable in driver program"""
         if self._deserialized:
             raise PySparkRuntimeError(
-                error_class="VALUE_NOT_ACCESSIBLE",
-                message_parameters={
+                errorClass="VALUE_NOT_ACCESSIBLE",
+                messageParameters={
                     "value": "Accumulator.value",
                 },
             )
diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py
index ca03266a11c63..850754a75789e 100644
--- a/python/pyspark/conf.py
+++ b/python/pyspark/conf.py
@@ -188,8 +188,8 @@ def setExecutorEnv(
         """Set an environment variable to be passed to executors."""
         if (key is not None and pairs is not None) or (key is None and pairs is None):
             raise PySparkRuntimeError(
-                error_class="KEY_VALUE_PAIR_REQUIRED",
-                message_parameters={},
+                errorClass="KEY_VALUE_PAIR_REQUIRED",
+                messageParameters={},
             )
         elif key is not None:
             self.set("spark.executorEnv.{}".format(key), cast(str, value))
diff --git a/python/pyspark/core/broadcast.py b/python/pyspark/core/broadcast.py
index 0b0d027b929e5..69d57c35614d3 100644
--- a/python/pyspark/core/broadcast.py
+++ b/python/pyspark/core/broadcast.py
@@ -59,8 +59,8 @@ def _from_id(bid: int) -> "Broadcast[Any]":
 
     if bid not in _broadcastRegistry:
         raise PySparkRuntimeError(
-            error_class="BROADCAST_VARIABLE_NOT_LOADED",
-            message_parameters={
+            errorClass="BROADCAST_VARIABLE_NOT_LOADED",
+            messageParameters={
                 "variable": str(bid),
             },
         )
@@ -299,8 +299,8 @@ def unpersist(self, blocking: bool = False) -> None:
         """
         if self._jbroadcast is None:
             raise PySparkRuntimeError(
-                error_class="INVALID_BROADCAST_OPERATION",
-                message_parameters={"operation": "unpersisted"},
+                errorClass="INVALID_BROADCAST_OPERATION",
+                messageParameters={"operation": "unpersisted"},
             )
         self._jbroadcast.unpersist(blocking)
 
@@ -329,8 +329,8 @@ def destroy(self, blocking: bool = False) -> None:
         """
         if self._jbroadcast is None:
             raise PySparkRuntimeError(
-                error_class="INVALID_BROADCAST_OPERATION",
-                message_parameters={"operation": "destroyed"},
+                errorClass="INVALID_BROADCAST_OPERATION",
+                messageParameters={"operation": "destroyed"},
             )
         self._jbroadcast.destroy(blocking)
         os.unlink(self._path)
@@ -338,8 +338,8 @@ def destroy(self, blocking: bool = False) -> None:
     def __reduce__(self) -> Tuple[Callable[[int], "Broadcast[T]"], Tuple[int]]:
         if self._jbroadcast is None:
             raise PySparkRuntimeError(
-                error_class="INVALID_BROADCAST_OPERATION",
-                message_parameters={"operation": "serialized"},
+                errorClass="INVALID_BROADCAST_OPERATION",
+                messageParameters={"operation": "serialized"},
             )
         assert self._pickle_registry is not None
         self._pickle_registry.add(self)
diff --git a/python/pyspark/core/context.py b/python/pyspark/core/context.py
index be2f103e7a948..6ea793a118389 100644
--- a/python/pyspark/core/context.py
+++ b/python/pyspark/core/context.py
@@ -84,6 +84,8 @@
 DEFAULT_CONFIGS: Dict[str, Any] = {
     "spark.serializer.objectStreamReset": 100,
     "spark.rdd.compress": True,
+    # Disable artifact isolation in PySpark, or user-added .py file won't work
+    "spark.sql.artifact.isolation.enabled": "false",
 }
 
 T = TypeVar("T")
@@ -184,8 +186,8 @@ def __init__(
     ):
         if "SPARK_CONNECT_MODE_ENABLED" in os.environ and "SPARK_LOCAL_REMOTE" not in os.environ:
             raise PySparkRuntimeError(
-                error_class="CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT",
-                message_parameters={},
+                errorClass="CONTEXT_UNAVAILABLE_FOR_REMOTE_CLIENT",
+                messageParameters={},
             )
 
         if conf is None or conf.get("spark.executor.allowSparkContext", "false").lower() != "true":
@@ -271,13 +273,13 @@ def _do_init(
         # Check that we have at least the required parameters
         if not self._conf.contains("spark.master"):
             raise PySparkRuntimeError(
-                error_class="MASTER_URL_NOT_SET",
-                message_parameters={},
+                errorClass="MASTER_URL_NOT_SET",
+                messageParameters={},
             )
         if not self._conf.contains("spark.app.name"):
             raise PySparkRuntimeError(
-                error_class="APPLICATION_NAME_NOT_SET",
-                message_parameters={},
+                errorClass="APPLICATION_NAME_NOT_SET",
+                messageParameters={},
             )
 
         # Read back our properties from the conf in case we loaded some of them from
@@ -465,8 +467,8 @@ def _ensure_initialized(
     def __getnewargs__(self) -> NoReturn:
         # This method is called when attempting to pickle SparkContext, which is always an error:
         raise PySparkRuntimeError(
-            error_class="CONTEXT_ONLY_VALID_ON_DRIVER",
-            message_parameters={},
+            errorClass="CONTEXT_ONLY_VALID_ON_DRIVER",
+            messageParameters={},
         )
 
     def __enter__(self) -> "SparkContext":
@@ -554,6 +556,28 @@ def setSystemProperty(cls, key: str, value: str) -> None:
         assert SparkContext._jvm is not None
         SparkContext._jvm.java.lang.System.setProperty(key, value)
 
+    @classmethod
+    def getSystemProperty(cls, key: str) -> str:
+        """
+        Get a Java system property, such as `java.home`.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        key : str
+            The key of a new Java system property.
+
+        Examples
+        --------
+        >>> sc.getSystemProperty("SPARK_SUBMIT")
+        'true'
+        >>> _ = sc.getSystemProperty("java.home")
+        """
+        SparkContext._ensure_initialized()
+        assert SparkContext._jvm is not None
+        return SparkContext._jvm.java.lang.System.getProperty(key)
+
     @property
     def version(self) -> str:
         """
@@ -2535,8 +2559,8 @@ def show_profiles(self) -> None:
             self.profiler_collector.show_profiles()
         else:
             raise PySparkRuntimeError(
-                error_class="INCORRECT_CONF_FOR_PROFILE",
-                message_parameters={},
+                errorClass="INCORRECT_CONF_FOR_PROFILE",
+                messageParameters={},
             )
 
     def dump_profiles(self, path: str) -> None:
@@ -2552,8 +2576,8 @@ def dump_profiles(self, path: str) -> None:
             self.profiler_collector.dump_profiles(path)
         else:
             raise PySparkRuntimeError(
-                error_class="INCORRECT_CONF_FOR_PROFILE",
-                message_parameters={},
+                errorClass="INCORRECT_CONF_FOR_PROFILE",
+                messageParameters={},
             )
 
     def getConf(self) -> SparkConf:
@@ -2591,8 +2615,8 @@ def _assert_on_driver() -> None:
         """
         if TaskContext.get() is not None:
             raise PySparkRuntimeError(
-                error_class="CONTEXT_ONLY_VALID_ON_DRIVER",
-                message_parameters={},
+                errorClass="CONTEXT_ONLY_VALID_ON_DRIVER",
+                messageParameters={},
             )
 
 
diff --git a/python/pyspark/core/rdd.py b/python/pyspark/core/rdd.py
index f199070376c8d..a40af3e551584 100644
--- a/python/pyspark/core/rdd.py
+++ b/python/pyspark/core/rdd.py
@@ -137,8 +137,8 @@ def portable_hash(x: Hashable) -> int:
 
     if "PYTHONHASHSEED" not in os.environ:
         raise PySparkRuntimeError(
-            error_class="PYTHON_HASH_SEED_NOT_SET",
-            message_parameters={},
+            errorClass="PYTHON_HASH_SEED_NOT_SET",
+            messageParameters={},
         )
 
     if x is None:
@@ -246,8 +246,8 @@ def __repr__(self) -> str:
     def __getnewargs__(self) -> NoReturn:
         # This method is called when attempting to pickle an RDD, which is always an error:
         raise PySparkRuntimeError(
-            error_class="RDD_TRANSFORM_ONLY_VALID_ON_DRIVER",
-            message_parameters={},
+            errorClass="RDD_TRANSFORM_ONLY_VALID_ON_DRIVER",
+            messageParameters={},
         )
 
     @property
@@ -1581,8 +1581,8 @@ def check_return_code() -> Iterable[int]:
                 pipe.wait()
                 if checkCode and pipe.returncode:
                     raise PySparkRuntimeError(
-                        error_class="PIPE_FUNCTION_EXITED",
-                        message_parameters={
+                        errorClass="PIPE_FUNCTION_EXITED",
+                        messageParameters={
                             "func_name": command,
                             "error_code": str(pipe.returncode),
                         },
@@ -5105,8 +5105,8 @@ def toDF(
         self: "RDD[Any]", schema: Optional[Any] = None, sampleRatio: Optional[float] = None
     ) -> "DataFrame":
         raise PySparkRuntimeError(
-            error_class="CALL_BEFORE_INITIALIZE",
-            message_parameters={
+            errorClass="CALL_BEFORE_INITIALIZE",
+            messageParameters={
                 "func_name": "RDD.toDF",
                 "object": "SparkSession",
             },
@@ -5379,7 +5379,7 @@ def _test() -> None:
         if Version(np.__version__) >= Version("2"):
             # `legacy="1.25"` only available in `nump>=2`
             np.set_printoptions(legacy="1.25")  # type: ignore[arg-type]
-    except TypeError:
+    except (ModuleNotFoundError, TypeError):
         pass
 
     tmp_dir = tempfile.TemporaryDirectory()
diff --git a/python/pyspark/errors/error-conditions.json b/python/pyspark/errors/error-conditions.json
index 4061d024a83cd..b2a68a83bfa70 100644
--- a/python/pyspark/errors/error-conditions.json
+++ b/python/pyspark/errors/error-conditions.json
@@ -94,9 +94,9 @@
       "Could not get batch id from <obj_name>."
     ]
   },
-  "CANNOT_INFER_ARRAY_TYPE": {
+  "CANNOT_INFER_ARRAY_ELEMENT_TYPE": {
     "message": [
-      "Can not infer Array Type from a list with None as the first element."
+      "Can not infer the element data type, an non-empty list starting with an non-None value is required."
     ]
   },
   "CANNOT_INFER_EMPTY_SCHEMA": {
@@ -512,6 +512,11 @@
       "A master URL must be set in your configuration."
     ]
   },
+  "MEMORY_PROFILE_INVALID_SOURCE":{
+    "message": [
+      "Memory profiler can only be used on editors with line numbers."
+    ]
+  },
   "MISSING_LIBRARY_FOR_PROFILER": {
     "message": [
       "Install the 'memory_profiler' library in the cluster to enable memory profiling."
@@ -802,11 +807,26 @@
       "<package_name> >= <minimum_version> must be installed; however, it was not found."
     ]
   },
+  "PANDAS_UDF_OUTPUT_EXCEEDS_INPUT_ROWS" : {
+    "message": [
+      "The Pandas SCALAR_ITER UDF outputs more rows than input rows."
+    ]
+  },
   "PIPE_FUNCTION_EXITED": {
     "message": [
       "Pipe function `<func_name>` exited with error code <error_code>."
     ]
   },
+  "PLOT_INVALID_TYPE_COLUMN": {
+    "message": [
+      "Column <col_name> must be one of <valid_types> for plotting, got <col_type>."
+    ]
+  },
+  "PLOT_NOT_NUMERIC_COLUMN_ARGUMENT": {
+    "message": [
+      "Argument <arg_name> must be a numerical column for plotting, got <arg_type>."
+    ]
+  },
   "PYTHON_HASH_SEED_NOT_SET": {
     "message": [
       "Randomness of hash of string should be disabled via PYTHONHASHSEED."
@@ -1088,6 +1108,16 @@
       "Function `<func_name>` should use only POSITIONAL or POSITIONAL OR KEYWORD arguments."
     ]
   },
+  "UNSUPPORTED_PLOT_BACKEND": {
+    "message": [
+      "`<backend>` is not supported, it should be one of the values from <supported_backends>"
+    ]
+  },
+  "UNSUPPORTED_PLOT_BACKEND_PARAM": {
+    "message": [
+      "`<backend>` does not support `<param>` set to <value>, it should be one of the values from <supported_values>"
+    ]
+  },
   "UNSUPPORTED_SIGNATURE": {
     "message": [
       "Unsupported signature: <signature>."
diff --git a/python/pyspark/errors/exceptions/base.py b/python/pyspark/errors/exceptions/base.py
index e33492fbe15ed..e74a9c453511b 100644
--- a/python/pyspark/errors/exceptions/base.py
+++ b/python/pyspark/errors/exceptions/base.py
@@ -34,24 +34,24 @@ class PySparkException(Exception):
     def __init__(
         self,
         message: Optional[str] = None,
-        error_class: Optional[str] = None,
-        message_parameters: Optional[Dict[str, str]] = None,
-        query_contexts: Optional[List["QueryContext"]] = None,
+        errorClass: Optional[str] = None,
+        messageParameters: Optional[Dict[str, str]] = None,
+        contexts: Optional[List["QueryContext"]] = None,
     ):
-        if query_contexts is None:
-            query_contexts = []
+        if contexts is None:
+            contexts = []
         self._error_reader = ErrorClassesReader()
 
         if message is None:
             self._message = self._error_reader.get_error_message(
-                cast(str, error_class), cast(Dict[str, str], message_parameters)
+                cast(str, errorClass), cast(Dict[str, str], messageParameters)
             )
         else:
             self._message = message
 
-        self._error_class = error_class
-        self._message_parameters = message_parameters
-        self._query_contexts = query_contexts
+        self._errorClass = errorClass
+        self._messageParameters = messageParameters
+        self._contexts = contexts
 
     def getErrorClass(self) -> Optional[str]:
         """
@@ -66,7 +66,7 @@ def getErrorClass(self) -> Optional[str]:
         :meth:`PySparkException.getQueryContext`
         :meth:`PySparkException.getSqlState`
         """
-        return self._error_class
+        return self._errorClass
 
     def getMessageParameters(self) -> Optional[Dict[str, str]]:
         """
@@ -81,7 +81,7 @@ def getMessageParameters(self) -> Optional[Dict[str, str]]:
         :meth:`PySparkException.getQueryContext`
         :meth:`PySparkException.getSqlState`
         """
-        return self._message_parameters
+        return self._messageParameters
 
     def getSqlState(self) -> Optional[str]:
         """
@@ -128,28 +128,28 @@ def getQueryContext(self) -> List["QueryContext"]:
         :meth:`PySparkException.getMessage`
         :meth:`PySparkException.getSqlState`
         """
-        return self._query_contexts
+        return self._contexts
 
     def _log_exception(self) -> None:
-        query_contexts = self.getQueryContext()
-        query_context = query_contexts[0] if len(query_contexts) != 0 else None
-        if query_context:
-            if query_context.contextType().name == "DataFrame":
+        contexts = self.getQueryContext()
+        context = contexts[0] if len(contexts) != 0 else None
+        if context:
+            if context.contextType().name == "DataFrame":
                 logger = PySparkLogger.getLogger("DataFrameQueryContextLogger")
-                call_site = query_context.callSite().split(":")
-                line_no = call_site[1] if len(call_site) == 2 else ""
+                call_site = context.callSite().split(":")
+                line = call_site[1] if len(call_site) == 2 else ""
                 logger.exception(
                     self.getMessage(),
                     file=call_site[0],
-                    line_no=line_no,
-                    fragment=query_context.fragment(),
-                    error_class=self.getErrorClass(),
+                    line=line,
+                    fragment=context.fragment(),
+                    errorClass=self.getErrorClass(),
                 )
             else:
                 logger = PySparkLogger.getLogger("SQLQueryContextLogger")
                 logger.exception(
                     self.getMessage(),
-                    error_class=self.getErrorClass(),
+                    errorClass=self.getErrorClass(),
                 )
 
     def __str__(self) -> str:
@@ -299,11 +299,11 @@ class PySparkAssertionError(PySparkException, AssertionError):
     def __init__(
         self,
         message: Optional[str] = None,
-        error_class: Optional[str] = None,
-        message_parameters: Optional[Dict[str, str]] = None,
+        errorClass: Optional[str] = None,
+        messageParameters: Optional[Dict[str, str]] = None,
         data: Optional[Iterable["Row"]] = None,
     ):
-        super().__init__(message, error_class, message_parameters)
+        super().__init__(message, errorClass, messageParameters)
         self.data = data
 
 
diff --git a/python/pyspark/errors/exceptions/captured.py b/python/pyspark/errors/exceptions/captured.py
index 8a79f78e4c80e..749b0cca96b78 100644
--- a/python/pyspark/errors/exceptions/captured.py
+++ b/python/pyspark/errors/exceptions/captured.py
@@ -146,11 +146,11 @@ def getMessage(self) -> str:
         if self._origin is not None and is_instance_of(
             gw, self._origin, "org.apache.spark.SparkThrowable"
         ):
-            error_class = self._origin.getErrorClass()
-            message_parameters = self._origin.getMessageParameters()
+            errorClass = self._origin.getErrorClass()
+            messageParameters = self._origin.getMessageParameters()
 
             error_message = gw.jvm.org.apache.spark.SparkThrowableHelper.getMessage(
-                error_class, message_parameters
+                errorClass, messageParameters
             )
 
             return error_message
diff --git a/python/pyspark/errors/exceptions/connect.py b/python/pyspark/errors/exceptions/connect.py
index efa3fdcf1e56d..c24b25af01631 100644
--- a/python/pyspark/errors/exceptions/connect.py
+++ b/python/pyspark/errors/exceptions/connect.py
@@ -56,9 +56,9 @@ def convert_exception(
 ) -> SparkConnectException:
     classes = []
     sql_state = None
-    error_class = None
-    message_parameters = None
-    query_contexts: Optional[List[BaseQueryContext]] = None
+    errorClass = None
+    messageParameters = None
+    contexts: Optional[List[BaseQueryContext]] = None
 
     if "classes" in info.metadata:
         classes = json.loads(info.metadata["classes"])
@@ -67,10 +67,10 @@ def convert_exception(
         sql_state = info.metadata["sqlState"]
 
     if "errorClass" in info.metadata:
-        error_class = info.metadata["errorClass"]
+        errorClass = info.metadata["errorClass"]
 
     if "messageParameters" in info.metadata:
-        message_parameters = json.loads(info.metadata["messageParameters"])
+        messageParameters = json.loads(info.metadata["messageParameters"])
 
     stacktrace: Optional[str] = None
     if resp is not None and resp.HasField("root_error_idx"):
@@ -86,136 +86,136 @@ def convert_exception(
         and resp.errors
         and hasattr(resp.errors[resp.root_error_idx], "spark_throwable")
     ):
-        message_parameters = dict(
+        messageParameters = dict(
             resp.errors[resp.root_error_idx].spark_throwable.message_parameters
         )
-        query_contexts = []
-        for query_context in resp.errors[resp.root_error_idx].spark_throwable.query_contexts:
-            if query_context.context_type == pb2.FetchErrorDetailsResponse.QueryContext.SQL:
-                query_contexts.append(SQLQueryContext(query_context))
+        contexts = []
+        for context in resp.errors[resp.root_error_idx].spark_throwable.query_contexts:
+            if context.context_type == pb2.FetchErrorDetailsResponse.QueryContext.SQL:
+                contexts.append(SQLQueryContext(context))
             else:
-                query_contexts.append(DataFrameQueryContext(query_context))
+                contexts.append(DataFrameQueryContext(context))
 
     if "org.apache.spark.sql.catalyst.parser.ParseException" in classes:
         return ParseException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     # Order matters. ParseException inherits AnalysisException.
     elif "org.apache.spark.sql.AnalysisException" in classes:
         return AnalysisException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "org.apache.spark.sql.streaming.StreamingQueryException" in classes:
         return StreamingQueryException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "org.apache.spark.sql.execution.QueryExecutionException" in classes:
         return QueryExecutionException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     # Order matters. NumberFormatException inherits IllegalArgumentException.
     elif "java.lang.NumberFormatException" in classes:
         return NumberFormatException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "java.lang.IllegalArgumentException" in classes:
         return IllegalArgumentException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "java.lang.ArithmeticException" in classes:
         return ArithmeticException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "java.lang.UnsupportedOperationException" in classes:
         return UnsupportedOperationException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "java.lang.ArrayIndexOutOfBoundsException" in classes:
         return ArrayIndexOutOfBoundsException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "java.time.DateTimeException" in classes:
         return DateTimeException(
             message,
-            error_class=error_class,
+            errorClass=errorClass,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "org.apache.spark.SparkRuntimeException" in classes:
         return SparkRuntimeException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "org.apache.spark.SparkUpgradeException" in classes:
         return SparkUpgradeException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     elif "org.apache.spark.api.python.PythonException" in classes:
         return PythonException(
@@ -225,34 +225,34 @@ def convert_exception(
     elif "org.apache.spark.SparkNoSuchElementException" in classes:
         return SparkNoSuchElementException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     # Make sure that the generic SparkException is handled last.
     elif "org.apache.spark.SparkException" in classes:
         return SparkException(
             message,
-            error_class=error_class,
-            message_parameters=message_parameters,
+            errorClass=errorClass,
+            messageParameters=messageParameters,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
     else:
         return SparkConnectGrpcException(
             message,
             reason=info.reason,
-            message_parameters=message_parameters,
-            error_class=error_class,
+            messageParameters=messageParameters,
+            errorClass=errorClass,
             sql_state=sql_state,
             server_stacktrace=stacktrace,
             display_server_stacktrace=display_server_stacktrace,
-            query_contexts=query_contexts,
+            contexts=contexts,
         )
 
 
@@ -291,40 +291,40 @@ class SparkConnectGrpcException(SparkConnectException):
     def __init__(
         self,
         message: Optional[str] = None,
-        error_class: Optional[str] = None,
-        message_parameters: Optional[Dict[str, str]] = None,
+        errorClass: Optional[str] = None,
+        messageParameters: Optional[Dict[str, str]] = None,
         reason: Optional[str] = None,
         sql_state: Optional[str] = None,
         server_stacktrace: Optional[str] = None,
         display_server_stacktrace: bool = False,
-        query_contexts: Optional[List[BaseQueryContext]] = None,
+        contexts: Optional[List[BaseQueryContext]] = None,
     ) -> None:
-        if query_contexts is None:
-            query_contexts = []
+        if contexts is None:
+            contexts = []
         self._message = message  # type: ignore[assignment]
         if reason is not None:
             self._message = f"({reason}) {self._message}"
 
-        # PySparkException has the assumption that error_class and message_parameters are
+        # PySparkException has the assumption that errorClass and messageParameters are
         # only occurring together. If only one is set, we assume the message to be fully
         # parsed.
-        tmp_error_class = error_class
-        tmp_message_parameters = message_parameters
-        if error_class is not None and message_parameters is None:
+        tmp_error_class = errorClass
+        tmp_message_parameters = messageParameters
+        if errorClass is not None and messageParameters is None:
             tmp_error_class = None
-        elif error_class is None and message_parameters is not None:
+        elif errorClass is None and messageParameters is not None:
             tmp_message_parameters = None
 
         super().__init__(
             message=self._message,
-            error_class=tmp_error_class,
-            message_parameters=tmp_message_parameters,
+            errorClass=tmp_error_class,
+            messageParameters=tmp_message_parameters,
         )
-        self._error_class = error_class
+        self._errorClass = errorClass
         self._sql_state: Optional[str] = sql_state
         self._stacktrace: Optional[str] = server_stacktrace
         self._display_stacktrace: bool = display_server_stacktrace
-        self._query_contexts: List[BaseQueryContext] = query_contexts
+        self._contexts: List[BaseQueryContext] = contexts
         self._log_exception()
 
     def getSqlState(self) -> Optional[str]:
@@ -459,12 +459,12 @@ def fragment(self) -> str:
     def callSite(self) -> str:
         raise UnsupportedOperationException(
             "",
-            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-            message_parameters={"className": "SQLQueryContext", "methodName": "callSite"},
+            errorClass="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            messageParameters={"className": "SQLQueryContext", "methodName": "callSite"},
             sql_state="0A000",
             server_stacktrace=None,
             display_server_stacktrace=False,
-            query_contexts=[],
+            contexts=[],
         )
 
     def summary(self) -> str:
@@ -481,45 +481,45 @@ def contextType(self) -> QueryContextType:
     def objectType(self) -> str:
         raise UnsupportedOperationException(
             "",
-            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-            message_parameters={"className": "DataFrameQueryContext", "methodName": "objectType"},
+            errorClass="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            messageParameters={"className": "DataFrameQueryContext", "methodName": "objectType"},
             sql_state="0A000",
             server_stacktrace=None,
             display_server_stacktrace=False,
-            query_contexts=[],
+            contexts=[],
         )
 
     def objectName(self) -> str:
         raise UnsupportedOperationException(
             "",
-            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-            message_parameters={"className": "DataFrameQueryContext", "methodName": "objectName"},
+            errorClass="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            messageParameters={"className": "DataFrameQueryContext", "methodName": "objectName"},
             sql_state="0A000",
             server_stacktrace=None,
             display_server_stacktrace=False,
-            query_contexts=[],
+            contexts=[],
         )
 
     def startIndex(self) -> int:
         raise UnsupportedOperationException(
             "",
-            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-            message_parameters={"className": "DataFrameQueryContext", "methodName": "startIndex"},
+            errorClass="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            messageParameters={"className": "DataFrameQueryContext", "methodName": "startIndex"},
             sql_state="0A000",
             server_stacktrace=None,
             display_server_stacktrace=False,
-            query_contexts=[],
+            contexts=[],
         )
 
     def stopIndex(self) -> int:
         raise UnsupportedOperationException(
             "",
-            error_class="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-            message_parameters={"className": "DataFrameQueryContext", "methodName": "stopIndex"},
+            errorClass="UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+            messageParameters={"className": "DataFrameQueryContext", "methodName": "stopIndex"},
             sql_state="0A000",
             server_stacktrace=None,
             display_server_stacktrace=False,
-            query_contexts=[],
+            contexts=[],
         )
 
     def fragment(self) -> str:
diff --git a/python/pyspark/errors/tests/test_errors.py b/python/pyspark/errors/tests/test_errors.py
index d9a8cf45bceda..721bfb6a0f35e 100644
--- a/python/pyspark/errors/tests/test_errors.py
+++ b/python/pyspark/errors/tests/test_errors.py
@@ -52,7 +52,7 @@ def detect_duplication(pairs):
 
     def test_invalid_error_class(self):
         with self.assertRaisesRegex(ValueError, "Cannot find main error class"):
-            PySparkValueError(error_class="invalid", message_parameters={})
+            PySparkValueError(errorClass="invalid", messageParameters={})
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/errors/utils.py b/python/pyspark/errors/utils.py
index a9889da2b6103..cbe5739204ac1 100644
--- a/python/pyspark/errors/utils.py
+++ b/python/pyspark/errors/utils.py
@@ -33,6 +33,7 @@
     Union,
     TYPE_CHECKING,
     overload,
+    cast,
 )
 import pyspark
 from pyspark.errors.error_classes import ERROR_CLASSES_MAP
@@ -41,6 +42,7 @@
     from pyspark.sql import SparkSession
 
 T = TypeVar("T")
+FuncT = TypeVar("FuncT", bound=Callable[..., Any])
 
 _current_origin = threading.local()
 
@@ -70,16 +72,16 @@ class ErrorClassesReader:
     def __init__(self) -> None:
         self.error_info_map = ERROR_CLASSES_MAP
 
-    def get_error_message(self, error_class: str, message_parameters: Dict[str, str]) -> str:
+    def get_error_message(self, errorClass: str, messageParameters: Dict[str, str]) -> str:
         """
         Returns the completed error message by applying message parameters to the message template.
         """
-        message_template = self.get_message_template(error_class)
+        message_template = self.get_message_template(errorClass)
         # Verify message parameters.
         message_parameters_from_template = re.findall("<([a-zA-Z0-9_-]+)>", message_template)
-        assert set(message_parameters_from_template) == set(message_parameters), (
-            f"Undefined error message parameter for error class: {error_class}. "
-            f"Parameters: {message_parameters}"
+        assert set(message_parameters_from_template) == set(messageParameters), (
+            f"Undefined error message parameter for error class: {errorClass}. "
+            f"Parameters: {messageParameters}"
         )
 
         def replace_match(match: Match[str]) -> str:
@@ -88,14 +90,14 @@ def replace_match(match: Match[str]) -> str:
         # Convert <> to {} only when paired.
         message_template = re.sub(r"<([^<>]*)>", replace_match, message_template)
 
-        return message_template.format(**message_parameters)
+        return message_template.format(**messageParameters)
 
-    def get_message_template(self, error_class: str) -> str:
+    def get_message_template(self, errorClass: str) -> str:
         """
         Returns the message template for corresponding error class from error-conditions.json.
 
         For example,
-        when given `error_class` is "EXAMPLE_ERROR_CLASS",
+        when given `errorClass` is "EXAMPLE_ERROR_CLASS",
         and corresponding error class in error-conditions.json looks like the below:
 
         .. code-block:: python
@@ -109,7 +111,7 @@ def get_message_template(self, error_class: str) -> str:
         In this case, this function returns:
         "Problem <A> because of <B>."
 
-        For sub error class, when given `error_class` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS",
+        For sub error class, when given `errorClass` is "EXAMPLE_ERROR_CLASS.SUB_ERROR_CLASS",
         and corresponding error class in error-conditions.json looks like the below:
 
         .. code-block:: python
@@ -130,7 +132,7 @@ def get_message_template(self, error_class: str) -> str:
         In this case, this function returns:
         "Problem <A> because <B>. Do <C> to fix the problem."
         """
-        error_classes = error_class.split(".")
+        error_classes = errorClass.split(".")
         len_error_classes = len(error_classes)
         assert len_error_classes in (1, 2)
 
@@ -225,7 +227,7 @@ def inspect_stack() -> Iterator[inspect.FrameInfo]:
     return call_sites_str
 
 
-def _with_origin(func: Callable[..., Any]) -> Callable[..., Any]:
+def _with_origin(func: FuncT) -> FuncT:
     """
     A decorator to capture and provide the call site information to the server side
     when PySpark API functions are invoked.
@@ -269,7 +271,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
         else:
             return func(*args, **kwargs)
 
-    return wrapper
+    return cast(FuncT, wrapper)
 
 
 @overload
diff --git a/python/pyspark/install.py b/python/pyspark/install.py
index 90b0150b0a8ca..ba67a157e964d 100644
--- a/python/pyspark/install.py
+++ b/python/pyspark/install.py
@@ -163,7 +163,7 @@ def install_spark(dest, spark_version, hadoop_version, hive_version):
                 tar.close()
             if os.path.exists(package_local_path):
                 os.remove(package_local_path)
-    raise IOError("Unable to download %s." % pretty_pkg_name)
+    raise OSError("Unable to download %s." % pretty_pkg_name)
 
 
 def get_preferred_mirrors():
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 18b6536c74036..4f19421ca64f1 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -108,8 +108,8 @@ def preexec_func():
 
             if not os.path.isfile(conn_info_file):
                 raise PySparkRuntimeError(
-                    error_class="JAVA_GATEWAY_EXITED",
-                    message_parameters={},
+                    errorClass="JAVA_GATEWAY_EXITED",
+                    messageParameters={},
                 )
 
             with open(conn_info_file, "rb") as info:
diff --git a/python/pyspark/logger/__init__.py b/python/pyspark/logger/__init__.py
index d8fab8beca8d8..9e629971f0cbe 100644
--- a/python/pyspark/logger/__init__.py
+++ b/python/pyspark/logger/__init__.py
@@ -18,10 +18,6 @@
 """
 PySpark logging
 """
-from pyspark.logger.logger import (  # noqa: F401
-    PySparkLogger,
-)
+from pyspark.logger.logger import PySparkLogger, SPARK_LOG_SCHEMA  # noqa: F401
 
-__all__ = [
-    "PySparkLogger",
-]
+__all__ = ["PySparkLogger", "SPARK_LOG_SCHEMA"]
diff --git a/python/pyspark/logger/logger.py b/python/pyspark/logger/logger.py
index 975441a9cb572..a2226fd717e0a 100644
--- a/python/pyspark/logger/logger.py
+++ b/python/pyspark/logger/logger.py
@@ -20,6 +20,16 @@
 import json
 from typing import cast, Optional
 
+SPARK_LOG_SCHEMA = (
+    "ts TIMESTAMP, "
+    "level STRING, "
+    "msg STRING, "
+    "context map<STRING, STRING>, "
+    "exception STRUCT<class STRING, msg STRING, "
+    "stacktrace ARRAY<STRUCT<class STRING, method STRING, file STRING,line STRING>>>,"
+    "logger STRING"
+)
+
 
 class JSONFormatter(logging.Formatter):
     """
diff --git a/python/pyspark/logger/tests/test_logger.py b/python/pyspark/logger/tests/test_logger.py
index 514b37dc8b6f7..95c886603acb7 100644
--- a/python/pyspark/logger/tests/test_logger.py
+++ b/python/pyspark/logger/tests/test_logger.py
@@ -102,7 +102,7 @@ def test_dataframe_query_context_logger(self):
             self.assertTrue(err_msg in log_json["msg"])
             self.assertTrue(err_msg in log_json["exception"]["msg"])
             self.assertEqual(log_json["context"]["fragment"], "__truediv__")
-            self.assertEqual(log_json["context"]["error_class"], "DIVIDE_BY_ZERO")
+            self.assertEqual(log_json["context"]["errorClass"], "DIVIDE_BY_ZERO")
             # Only the class name is different between classic and connect.
             # Py4JJavaError for classic, _MultiThreadedRendezvous for connect
             self.assertTrue(
diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py
index 1eb42f8029b6c..b89755d9c18a5 100644
--- a/python/pyspark/ml/classification.py
+++ b/python/pyspark/ml/classification.py
@@ -89,7 +89,7 @@
 from pyspark.ml.wrapper import JavaParams, JavaPredictor, JavaPredictionModel, JavaWrapper
 from pyspark.ml.common import inherit_doc
 from pyspark.ml.linalg import Matrix, Vector, Vectors, VectorUDT
-from pyspark.sql import DataFrame, Row
+from pyspark.sql import DataFrame, Row, SparkSession
 from pyspark.sql.functions import udf, when
 from pyspark.sql.types import ArrayType, DoubleType
 from pyspark.storagelevel import StorageLevel
@@ -699,7 +699,7 @@ class LinearSVC(
     >>> model_path = temp_path + "/svm_model"
     >>> model.save(model_path)
     >>> model2 = LinearSVCModel.load(model_path)
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
     >>> model.intercept == model2.intercept
     True
@@ -1210,7 +1210,7 @@ class LogisticRegression(
     >>> model_path = temp_path + "/lr_model"
     >>> blorModel.save(model_path)
     >>> model2 = LogisticRegressionModel.load(model_path)
-    >>> blorModel.coefficients[0] == model2.coefficients[0]
+    >>> bool(blorModel.coefficients[0] == model2.coefficients[0])
     True
     >>> blorModel.intercept == model2.intercept
     True
@@ -2038,9 +2038,9 @@ class RandomForestClassifier(
     >>> result = model.transform(test0).head()
     >>> result.prediction
     0.0
-    >>> numpy.argmax(result.probability)
+    >>> int(numpy.argmax(result.probability))
     0
-    >>> numpy.argmax(result.newRawPrediction)
+    >>> int(numpy.argmax(result.newRawPrediction))
     0
     >>> result.leafId
     DenseVector([0.0, 0.0, 0.0])
@@ -3678,7 +3678,7 @@ class _OneVsRestSharedReadWrite:
     @staticmethod
     def saveImpl(
         instance: Union[OneVsRest, "OneVsRestModel"],
-        sc: "SparkContext",
+        sc: Union["SparkContext", SparkSession],
         path: str,
         extraMetadata: Optional[Dict[str, Any]] = None,
     ) -> None:
@@ -3691,7 +3691,10 @@ def saveImpl(
         cast(MLWritable, instance.getClassifier()).save(classifierPath)
 
     @staticmethod
-    def loadClassifier(path: str, sc: "SparkContext") -> Union[OneVsRest, "OneVsRestModel"]:
+    def loadClassifier(
+        path: str,
+        sc: Union["SparkContext", SparkSession],
+    ) -> Union[OneVsRest, "OneVsRestModel"]:
         classifierPath = os.path.join(path, "classifier")
         return DefaultParamsReader.loadParamsInstance(classifierPath, sc)
 
@@ -3716,11 +3719,13 @@ def __init__(self, cls: Type[OneVsRest]) -> None:
         self.cls = cls
 
     def load(self, path: str) -> OneVsRest:
-        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        metadata = DefaultParamsReader.loadMetadata(path, self.sparkSession)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
             return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
-            classifier = cast(Classifier, _OneVsRestSharedReadWrite.loadClassifier(path, self.sc))
+            classifier = cast(
+                Classifier, _OneVsRestSharedReadWrite.loadClassifier(path, self.sparkSession)
+            )
             ova: OneVsRest = OneVsRest(classifier=classifier)._resetUid(metadata["uid"])
             DefaultParamsReader.getAndSetParams(ova, metadata, skipParams=["classifier"])
             return ova
@@ -3734,7 +3739,7 @@ def __init__(self, instance: OneVsRest):
 
     def saveImpl(self, path: str) -> None:
         _OneVsRestSharedReadWrite.validateParams(self.instance)
-        _OneVsRestSharedReadWrite.saveImpl(self.instance, self.sc, path)
+        _OneVsRestSharedReadWrite.saveImpl(self.instance, self.sparkSession, path)
 
 
 class OneVsRestModel(
@@ -3963,16 +3968,18 @@ def __init__(self, cls: Type[OneVsRestModel]):
         self.cls = cls
 
     def load(self, path: str) -> OneVsRestModel:
-        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        metadata = DefaultParamsReader.loadMetadata(path, self.sparkSession)
         if not DefaultParamsReader.isPythonParamsInstance(metadata):
             return JavaMLReader(self.cls).load(path)  # type: ignore[arg-type]
         else:
-            classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sc)
+            classifier = _OneVsRestSharedReadWrite.loadClassifier(path, self.sparkSession)
             numClasses = metadata["numClasses"]
             subModels = [None] * numClasses
             for idx in range(numClasses):
                 subModelPath = os.path.join(path, f"model_{idx}")
-                subModels[idx] = DefaultParamsReader.loadParamsInstance(subModelPath, self.sc)
+                subModels[idx] = DefaultParamsReader.loadParamsInstance(
+                    subModelPath, self.sparkSession
+                )
             ovaModel = OneVsRestModel(cast(List[ClassificationModel], subModels))._resetUid(
                 metadata["uid"]
             )
@@ -3992,7 +3999,9 @@ def saveImpl(self, path: str) -> None:
         instance = self.instance
         numClasses = len(instance.models)
         extraMetadata = {"numClasses": numClasses}
-        _OneVsRestSharedReadWrite.saveImpl(instance, self.sc, path, extraMetadata=extraMetadata)
+        _OneVsRestSharedReadWrite.saveImpl(
+            instance, self.sparkSession, path, extraMetadata=extraMetadata
+        )
         for idx in range(numClasses):
             subModelPath = os.path.join(path, f"model_{idx}")
             cast(MLWritable, instance.models[idx]).save(subModelPath)
diff --git a/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py b/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py
index 590e541c3842a..66a9b553cc751 100644
--- a/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py
+++ b/python/pyspark/ml/deepspeed/tests/test_deepspeed_distributor.py
@@ -30,12 +30,7 @@
     set_up_test_dirs,
     get_distributed_mode_conf,
 )
-
-have_deepspeed = True
-try:
-    import deepspeed  # noqa: F401
-except ImportError:
-    have_deepspeed = False
+from pyspark.testing.utils import have_deepspeed, deepspeed_requirement_message
 
 
 class DeepspeedTorchDistributorUnitTests(unittest.TestCase):
@@ -219,7 +214,7 @@ def pythagorean_thm(x : int, y: int): # type: ignore
 # and inference, the hope is to switch out the training
 # and file for the tests with more realistic testing
 # that use Deepspeed constructs.
-@unittest.skipIf(not have_deepspeed, "deepspeed is required for these tests")
+@unittest.skipIf(not have_deepspeed, deepspeed_requirement_message)
 class DeepspeedTorchDistributorDistributedEndToEnd(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
@@ -259,7 +254,7 @@ def test_pytorch_file_e2e(self) -> None:
             dist.run(cp_path, 2, 5)
 
 
-@unittest.skipIf(not have_deepspeed, "deepspeed is required for these tests")
+@unittest.skipIf(not have_deepspeed, deepspeed_requirement_message)
 class DeepspeedDistributorLocalEndToEndTests(unittest.TestCase):
     @classmethod
     def setUpClass(cls) -> None:
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 9a392c9dd420f..e053ea273140c 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -104,6 +104,8 @@
     "StopWordsRemover",
     "StringIndexer",
     "StringIndexerModel",
+    "TargetEncoder",
+    "TargetEncoderModel",
     "Tokenizer",
     "UnivariateFeatureSelector",
     "UnivariateFeatureSelectorModel",
@@ -5200,6 +5202,305 @@ def loadDefaultStopWords(language: str) -> List[str]:
         return list(stopWordsObj.loadDefaultStopWords(language))
 
 
+class _TargetEncoderParams(
+    HasLabelCol, HasInputCol, HasInputCols, HasOutputCol, HasOutputCols, HasHandleInvalid
+):
+    """
+    Params for :py:class:`TargetEncoder` and :py:class:`TargetEncoderModel`.
+
+    .. versionadded:: 4.0.0
+    """
+
+    handleInvalid: Param[str] = Param(
+        Params._dummy(),
+        "handleInvalid",
+        "How to handle invalid data during transform(). "
+        + "Options are 'keep' (invalid data presented as an extra "
+        + "categorical feature) or error (throw an error).",
+        typeConverter=TypeConverters.toString,
+    )
+
+    targetType: Param[str] = Param(
+        Params._dummy(),
+        "targetType",
+        "whether the label is 'binary' or 'continuous'",
+        typeConverter=TypeConverters.toString,
+    )
+
+    smoothing: Param[float] = Param(
+        Params._dummy(),
+        "smoothing",
+        "value to smooth in-category averages with overall averages.",
+        typeConverter=TypeConverters.toFloat,
+    )
+
+    def __init__(self, *args: Any):
+        super(_TargetEncoderParams, self).__init__(*args)
+        self._setDefault(handleInvalid="error", targetType="binary", smoothing=0.0)
+
+    @since("4.0.0")
+    def getTargetType(self) -> str:
+        """
+        Gets the value of targetType or its default value.
+        """
+        return self.getOrDefault(self.targetType)
+
+    @since("4.0.0")
+    def getSmoothing(self) -> float:
+        """
+        Gets the value of smoothing or its default value.
+        """
+        return self.getOrDefault(self.smoothing)
+
+
+@inherit_doc
+class TargetEncoder(
+    JavaEstimator["TargetEncoderModel"],
+    _TargetEncoderParams,
+    JavaMLReadable["TargetEncoder"],
+    JavaMLWritable,
+):
+    """
+    Target Encoding maps a column of categorical indices into a numerical feature derived
+    from the target.
+
+    When :py:attr:`handleInvalid` is configured to 'keep', previously unseen values of
+    a feature are mapped to the dataset overall statistics.
+
+    When :py:attr:'targetType' is configured to 'binary', categories are encoded as the
+    conditional probability of the target given that category (bin counting).
+    When :py:attr:'targetType' is configured to 'continuous', categories are encoded as
+    the average of the target given that category (mean encoding)
+
+    Parameter :py:attr:'smoothing' controls how in-category stats and overall stats are
+    weighted to build the encodings
+
+    @note When encoding multi-column by using `inputCols` and `outputCols` params,
+    input/output cols come in pairs, specified by the order in the arrays, and each pair
+    is treated independently.
+
+    .. versionadded:: 4.0.0
+    """
+
+    _input_kwargs: Dict[str, Any]
+
+    @overload
+    def __init__(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        labelCol: str = ...,
+        handleInvalid: str = ...,
+        targetType: str = ...,
+        smoothing: float = ...,
+    ):
+        ...
+
+    @overload
+    def __init__(
+        self,
+        *,
+        labelCol: str = ...,
+        handleInvalid: str = ...,
+        targetType: str = ...,
+        smoothing: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+    ):
+        ...
+
+    @keyword_only
+    def __init__(
+        self,
+        *,
+        inputCols: Optional[List[str]] = None,
+        outputCols: Optional[List[str]] = None,
+        labelCol: str = "label",
+        handleInvalid: str = "error",
+        targetType: str = "binary",
+        smoothing: float = 0.0,
+        inputCol: Optional[str] = None,
+        outputCol: Optional[str] = None,
+    ):
+        """
+        __init__(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", dropLast=True, \
+                 targetType="binary", smoothing=0.0, inputCol=None, outputCol=None)
+        """
+        super(TargetEncoder, self).__init__()
+        self._java_obj = self._new_java_obj("org.apache.spark.ml.feature.TargetEncoder", self.uid)
+        kwargs = self._input_kwargs
+        self.setParams(**kwargs)
+
+    @overload
+    def setParams(
+        self,
+        *,
+        inputCols: Optional[List[str]] = ...,
+        outputCols: Optional[List[str]] = ...,
+        labelCol: str = ...,
+        handleInvalid: str = ...,
+        targetType: str = ...,
+        smoothing: float = ...,
+    ) -> "TargetEncoder":
+        ...
+
+    @overload
+    def setParams(
+        self,
+        *,
+        labelCol: str = ...,
+        handleInvalid: str = ...,
+        targetType: str = ...,
+        smoothing: float = ...,
+        inputCol: Optional[str] = ...,
+        outputCol: Optional[str] = ...,
+    ) -> "TargetEncoder":
+        ...
+
+    @keyword_only
+    @since("4.0.0")
+    def setParams(
+        self,
+        *,
+        inputCols: Optional[List[str]] = None,
+        outputCols: Optional[List[str]] = None,
+        labelCol: str = "label",
+        handleInvalid: str = "error",
+        targetType: str = "binary",
+        smoothing: float = 0.0,
+        inputCol: Optional[str] = None,
+        outputCol: Optional[str] = None,
+    ) -> "TargetEncoder":
+        """
+        setParams(self, \\*, inputCols=None, outputCols=None, handleInvalid="error", \
+                  dropLast=True, inputCol=None, outputCol=None)
+        Sets params for this TargetEncoder.
+        """
+        kwargs = self._input_kwargs
+        return self._set(**kwargs)
+
+    @since("4.0.0")
+    def setLabelCol(self, value: str) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`labelCol`.
+        """
+        return self._set(labelCol=value)
+
+    @since("4.0.0")
+    def setInputCols(self, value: List[str]) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("4.0.0")
+    def setOutputCols(self, value: List[str]) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("4.0.0")
+    def setInputCol(self, value: str) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("4.0.0")
+    def setOutputCol(self, value: str) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("4.0.0")
+    def setHandleInvalid(self, value: str) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    @since("4.0.0")
+    def setTargetType(self, value: str) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`targetType`.
+        """
+        return self._set(targetType=value)
+
+    @since("4.0.0")
+    def setSmoothing(self, value: float) -> "TargetEncoder":
+        """
+        Sets the value of :py:attr:`smoothing`.
+        """
+        return self._set(smoothing=value)
+
+    def _create_model(self, java_model: "JavaObject") -> "TargetEncoderModel":
+        return TargetEncoderModel(java_model)
+
+
+class TargetEncoderModel(
+    JavaModel, _TargetEncoderParams, JavaMLReadable["TargetEncoderModel"], JavaMLWritable
+):
+    """
+    Model fitted by :py:class:`TargetEncoder`.
+
+    .. versionadded:: 4.0.0
+    """
+
+    @since("4.0.0")
+    def setInputCols(self, value: List[str]) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`inputCols`.
+        """
+        return self._set(inputCols=value)
+
+    @since("4.0.0")
+    def setOutputCols(self, value: List[str]) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`outputCols`.
+        """
+        return self._set(outputCols=value)
+
+    @since("4.0.0")
+    def setInputCol(self, value: str) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`inputCol`.
+        """
+        return self._set(inputCol=value)
+
+    @since("4.0.0")
+    def setOutputCol(self, value: str) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`outputCol`.
+        """
+        return self._set(outputCol=value)
+
+    @since("4.0.0")
+    def setHandleInvalid(self, value: str) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`handleInvalid`.
+        """
+        return self._set(handleInvalid=value)
+
+    @since("4.0.0")
+    def setSmoothing(self, value: float) -> "TargetEncoderModel":
+        """
+        Sets the value of :py:attr:`smoothing`.
+        """
+        return self._set(smoothing=value)
+
+    @property
+    @since("4.0.0")
+    def stats(self) -> List[Dict[float, Tuple[float, float]]]:
+        """
+        Fitted statistics for each feature to being encoded.
+        The list contains a dictionary for each input column.
+        """
+        return self._call_java("stats")
+
+
 @inherit_doc
 class Tokenizer(
     JavaTransformer,
diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
index cba4219a0694b..72fcfccf19e4c 100644
--- a/python/pyspark/ml/fpm.py
+++ b/python/pyspark/ml/fpm.py
@@ -213,7 +213,6 @@ class FPGrowth(
     |      [q]|   2|
     +---------+----+
     only showing top 5 rows
-    ...
     >>> fpm.associationRules.sort("antecedent", "consequent").show(5)
     +----------+----------+----------+----+------------------+
     |antecedent|consequent|confidence|lift|           support|
@@ -225,7 +224,6 @@ class FPGrowth(
     |       [q]|       [t]|       1.0| 2.0|0.3333333333333333|
     +----------+----------+----------+----+------------------+
     only showing top 5 rows
-    ...
     >>> new_data = spark.createDataFrame([(["t", "s"], )], ["items"])
     >>> sorted(fpm.transform(new_data).first().newPrediction)
     ['x', 'y', 'z']
diff --git a/python/pyspark/ml/linalg/__init__.py b/python/pyspark/ml/linalg/__init__.py
index d470f8b8b5c46..cedd3b04564ec 100644
--- a/python/pyspark/ml/linalg/__init__.py
+++ b/python/pyspark/ml/linalg/__init__.py
@@ -706,12 +706,12 @@ def dot(self, other: Iterable[float]) -> np.float64:
 
         elif isinstance(other, SparseVector):
             # Find out common indices.
-            self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
+            self_cmind = np.isin(self.indices, other.indices, assume_unique=True)
             self_values = self.values[self_cmind]
             if self_values.size == 0:
                 return np.float64(0.0)
             else:
-                other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
+                other_cmind = np.isin(other.indices, self.indices, assume_unique=True)
                 return np.dot(self_values, other.values[other_cmind])
 
         else:
diff --git a/python/pyspark/ml/pipeline.py b/python/pyspark/ml/pipeline.py
index c8415f89670b7..01339283839e1 100644
--- a/python/pyspark/ml/pipeline.py
+++ b/python/pyspark/ml/pipeline.py
@@ -35,6 +35,7 @@
 )
 from pyspark.ml.wrapper import JavaParams
 from pyspark.ml.common import inherit_doc
+from pyspark.sql import SparkSession
 from pyspark.sql.dataframe import DataFrame
 
 if TYPE_CHECKING:
@@ -230,7 +231,7 @@ def __init__(self, instance: Pipeline):
     def saveImpl(self, path: str) -> None:
         stages = self.instance.getStages()
         PipelineSharedReadWrite.validateStages(stages)
-        PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sc, path)
+        PipelineSharedReadWrite.saveImpl(self.instance, stages, self.sparkSession, path)
 
 
 @inherit_doc
@@ -244,11 +245,11 @@ def __init__(self, cls: Type[Pipeline]):
         self.cls = cls
 
     def load(self, path: str) -> Pipeline:
-        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        metadata = DefaultParamsReader.loadMetadata(path, self.sparkSession)
         if "language" not in metadata["paramMap"] or metadata["paramMap"]["language"] != "Python":
             return JavaMLReader(cast(Type["JavaMLReadable[Pipeline]"], self.cls)).load(path)
         else:
-            uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
+            uid, stages = PipelineSharedReadWrite.load(metadata, self.sparkSession, path)
             return Pipeline(stages=stages)._resetUid(uid)
 
 
@@ -266,7 +267,7 @@ def saveImpl(self, path: str) -> None:
         stages = self.instance.stages
         PipelineSharedReadWrite.validateStages(cast(List["PipelineStage"], stages))
         PipelineSharedReadWrite.saveImpl(
-            self.instance, cast(List["PipelineStage"], stages), self.sc, path
+            self.instance, cast(List["PipelineStage"], stages), self.sparkSession, path
         )
 
 
@@ -281,11 +282,11 @@ def __init__(self, cls: Type["PipelineModel"]):
         self.cls = cls
 
     def load(self, path: str) -> "PipelineModel":
-        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        metadata = DefaultParamsReader.loadMetadata(path, self.sparkSession)
         if "language" not in metadata["paramMap"] or metadata["paramMap"]["language"] != "Python":
             return JavaMLReader(cast(Type["JavaMLReadable[PipelineModel]"], self.cls)).load(path)
         else:
-            uid, stages = PipelineSharedReadWrite.load(metadata, self.sc, path)
+            uid, stages = PipelineSharedReadWrite.load(metadata, self.sparkSession, path)
             return PipelineModel(stages=cast(List[Transformer], stages))._resetUid(uid)
 
 
@@ -403,7 +404,7 @@ def validateStages(stages: List["PipelineStage"]) -> None:
     def saveImpl(
         instance: Union[Pipeline, PipelineModel],
         stages: List["PipelineStage"],
-        sc: "SparkContext",
+        sc: Union["SparkContext", SparkSession],
         path: str,
     ) -> None:
         """
@@ -422,7 +423,9 @@ def saveImpl(
 
     @staticmethod
     def load(
-        metadata: Dict[str, Any], sc: "SparkContext", path: str
+        metadata: Dict[str, Any],
+        sc: Union["SparkContext", SparkSession],
+        path: str,
     ) -> Tuple[str, List["PipelineStage"]]:
         """
         Load metadata and stages for a :py:class:`Pipeline` or :py:class:`PipelineModel`
diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py
index d08e241b41d23..d7cc27e274279 100644
--- a/python/pyspark/ml/regression.py
+++ b/python/pyspark/ml/regression.py
@@ -266,7 +266,7 @@ class LinearRegression(
     True
     >>> abs(model.transform(test0).head().newPrediction - (-1.0)) < 0.001
     True
-    >>> abs(model.coefficients[0] - 1.0) < 0.001
+    >>> bool(abs(model.coefficients[0] - 1.0) < 0.001)
     True
     >>> abs(model.intercept - 0.0) < 0.001
     True
@@ -283,11 +283,11 @@ class LinearRegression(
     >>> model_path = temp_path + "/lr_model"
     >>> model.save(model_path)
     >>> model2 = LinearRegressionModel.load(model_path)
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
-    >>> model.intercept == model2.intercept
+    >>> bool(model.intercept == model2.intercept)
     True
-    >>> model.transform(test0).take(1) == model2.transform(test0).take(1)
+    >>> bool(model.transform(test0).take(1) == model2.transform(test0).take(1))
     True
     >>> model.numFeatures
     1
@@ -2542,7 +2542,7 @@ class GeneralizedLinearRegression(
     >>> model2 = GeneralizedLinearRegressionModel.load(model_path)
     >>> model.intercept == model2.intercept
     True
-    >>> model.coefficients[0] == model2.coefficients[0]
+    >>> bool(model.coefficients[0] == model2.coefficients[0])
     True
     >>> model.transform(df).take(1) == model2.transform(df).take(1)
     True
diff --git a/python/pyspark/ml/tests/connect/test_connect_classification.py b/python/pyspark/ml/tests/connect/test_connect_classification.py
index 8083090523a0e..d3e86a3fb9df7 100644
--- a/python/pyspark/ml/tests/connect/test_connect_classification.py
+++ b/python/pyspark/ml/tests/connect/test_connect_classification.py
@@ -21,17 +21,9 @@
 
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
+from pyspark.ml.tests.connect.test_legacy_mode_classification import ClassificationTestsMixin
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-torch_requirement_message = "torch is required"
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-
-if should_test_connect:
-    from pyspark.ml.tests.connect.test_legacy_mode_classification import ClassificationTestsMixin
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 
 @unittest.skipIf(
diff --git a/python/pyspark/ml/tests/connect/test_connect_evaluation.py b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
index 359a77bbcb20f..cabd8b5b50df1 100644
--- a/python/pyspark/ml/tests/connect/test_connect_evaluation.py
+++ b/python/pyspark/ml/tests/connect/test_connect_evaluation.py
@@ -20,29 +20,23 @@
 
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-have_torcheval = True
-try:
-    import torcheval  # noqa: F401
-except ImportError:
-    have_torcheval = False
+from pyspark.testing.utils import have_torcheval, torcheval_requirement_message
 
 if should_test_connect:
     from pyspark.ml.tests.connect.test_legacy_mode_evaluation import EvaluationTestsMixin
 
-
-@unittest.skipIf(
-    not should_test_connect or not have_torcheval,
-    connect_requirement_message or "torcheval is required",
-)
-class EvaluationTestsOnConnect(EvaluationTestsMixin, unittest.TestCase):
-    def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote(
-            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
-        ).getOrCreate()
-
-    def tearDown(self) -> None:
-        self.spark.stop()
+    @unittest.skipIf(
+        not should_test_connect or not have_torcheval,
+        connect_requirement_message or torcheval_requirement_message,
+    )
+    class EvaluationTestsOnConnect(EvaluationTestsMixin, unittest.TestCase):
+        def setUp(self) -> None:
+            self.spark = SparkSession.builder.remote(
+                os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+            ).getOrCreate()
+
+        def tearDown(self) -> None:
+            self.spark.stop()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/tests/connect/test_connect_feature.py b/python/pyspark/ml/tests/connect/test_connect_feature.py
index c786ce2f87d0f..879cbff6d0cc7 100644
--- a/python/pyspark/ml/tests/connect/test_connect_feature.py
+++ b/python/pyspark/ml/tests/connect/test_connect_feature.py
@@ -20,31 +20,23 @@
 
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-have_sklearn = True
-sklearn_requirement_message = None
-try:
-    from sklearn.datasets import load_breast_cancer  # noqa: F401
-except ImportError:
-    have_sklearn = False
-    sklearn_requirement_message = "No sklearn found"
+from pyspark.testing.utils import have_sklearn, sklearn_requirement_message
 
 if should_test_connect:
     from pyspark.ml.tests.connect.test_legacy_mode_feature import FeatureTestsMixin
 
-
-@unittest.skipIf(
-    not should_test_connect or not have_sklearn,
-    connect_requirement_message or sklearn_requirement_message,
-)
-class FeatureTestsOnConnect(FeatureTestsMixin, unittest.TestCase):
-    def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote(
-            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
-        ).getOrCreate()
-
-    def tearDown(self) -> None:
-        self.spark.stop()
+    @unittest.skipIf(
+        not should_test_connect or not have_sklearn,
+        connect_requirement_message or sklearn_requirement_message,
+    )
+    class FeatureTestsOnConnect(FeatureTestsMixin, unittest.TestCase):
+        def setUp(self) -> None:
+            self.spark = SparkSession.builder.remote(
+                os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+            ).getOrCreate()
+
+        def tearDown(self) -> None:
+            self.spark.stop()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/tests/connect/test_connect_pipeline.py b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
index 4105f593f170e..f8576d0cb09da 100644
--- a/python/pyspark/ml/tests/connect/test_connect_pipeline.py
+++ b/python/pyspark/ml/tests/connect/test_connect_pipeline.py
@@ -21,35 +21,30 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+from pyspark.testing.utils import have_torch, torch_requirement_message
+
 
 if should_test_connect:
     from pyspark.ml.tests.connect.test_legacy_mode_pipeline import PipelineTestsMixin
 
-torch_requirement_message = None
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-    torch_requirement_message = "torch is required"
-
-
-@unittest.skipIf(
-    not should_test_connect or not have_torch or is_remote_only(),
-    connect_requirement_message
-    or torch_requirement_message
-    or "Requires PySpark core library in Spark Connect server",
-)
-class PipelineTestsOnConnect(PipelineTestsMixin, unittest.TestCase):
-    def setUp(self) -> None:
-        self.spark = (
-            SparkSession.builder.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
-            .config("spark.sql.artifact.copyFromLocalToFs.allowDestLocal", "true")
-            .getOrCreate()
-        )
-
-    def tearDown(self) -> None:
-        self.spark.stop()
+    @unittest.skipIf(
+        not should_test_connect or not have_torch or is_remote_only(),
+        connect_requirement_message
+        or torch_requirement_message
+        or "Requires PySpark core library in Spark Connect server",
+    )
+    class PipelineTestsOnConnect(PipelineTestsMixin, unittest.TestCase):
+        def setUp(self) -> None:
+            self.spark = (
+                SparkSession.builder.remote(
+                    os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+                )
+                .config("spark.sql.artifact.copyFromLocalToFs.allowDestLocal", "true")
+                .getOrCreate()
+            )
+
+        def tearDown(self) -> None:
+            self.spark.stop()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/tests/connect/test_connect_summarizer.py b/python/pyspark/ml/tests/connect/test_connect_summarizer.py
index 1cfd2ed229e5b..9c737c96ee87a 100644
--- a/python/pyspark/ml/tests/connect/test_connect_summarizer.py
+++ b/python/pyspark/ml/tests/connect/test_connect_summarizer.py
@@ -24,16 +24,15 @@
 if should_test_connect:
     from pyspark.ml.tests.connect.test_legacy_mode_summarizer import SummarizerTestsMixin
 
-
-@unittest.skipIf(not should_test_connect, connect_requirement_message)
-class SummarizerTestsOnConnect(SummarizerTestsMixin, unittest.TestCase):
-    def setUp(self) -> None:
-        self.spark = SparkSession.builder.remote(
-            os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
-        ).getOrCreate()
-
-    def tearDown(self) -> None:
-        self.spark.stop()
+    @unittest.skipIf(not should_test_connect, connect_requirement_message)
+    class SummarizerTestsOnConnect(SummarizerTestsMixin, unittest.TestCase):
+        def setUp(self) -> None:
+            self.spark = SparkSession.builder.remote(
+                os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+            ).getOrCreate()
+
+        def tearDown(self) -> None:
+            self.spark.stop()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/tests/connect/test_connect_tuning.py b/python/pyspark/ml/tests/connect/test_connect_tuning.py
index d5fcb93099b6e..d737dd5767dbd 100644
--- a/python/pyspark/ml/tests/connect/test_connect_tuning.py
+++ b/python/pyspark/ml/tests/connect/test_connect_tuning.py
@@ -22,25 +22,29 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if should_test_connect:
     from pyspark.ml.tests.connect.test_legacy_mode_tuning import CrossValidatorTestsMixin
 
-
-@unittest.skipIf(
-    not should_test_connect or is_remote_only(),
-    connect_requirement_message or "Requires PySpark core library in Spark Connect server",
-)
-class CrossValidatorTestsOnConnect(CrossValidatorTestsMixin, unittest.TestCase):
-    def setUp(self) -> None:
-        self.spark = (
-            SparkSession.builder.remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
-            .config("spark.sql.artifact.copyFromLocalToFs.allowDestLocal", "true")
-            .getOrCreate()
-        )
-
-    def tearDown(self) -> None:
-        self.spark.stop()
+    @unittest.skipIf(
+        not should_test_connect or not have_torch or is_remote_only(),
+        connect_requirement_message
+        or torch_requirement_message
+        or "Requires PySpark core library in Spark Connect server",
+    )
+    class CrossValidatorTestsOnConnect(CrossValidatorTestsMixin, unittest.TestCase):
+        def setUp(self) -> None:
+            self.spark = (
+                SparkSession.builder.remote(
+                    os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]")
+                )
+                .config("spark.sql.artifact.copyFromLocalToFs.allowDestLocal", "true")
+                .getOrCreate()
+            )
+
+        def tearDown(self) -> None:
+            self.spark.stop()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py b/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py
index dc2642a42d666..fdae31077002e 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_classification.py
@@ -24,14 +24,7 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-have_torch = True
-torch_requirement_message = None
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-    torch_requirement_message = "No torch found"
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if should_test_connect:
     from pyspark.ml.connect.classification import (
@@ -135,6 +128,8 @@ def test_multi_classes_logistic_regression(self):
         self._check_result(local_transform_result, expected_predictions, expected_probabilities)
 
     def test_save_load(self):
+        import torch
+
         with tempfile.TemporaryDirectory(prefix="test_save_load") as tmp_dir:
             estimator = LORV2(maxIter=2, numTrainWorkers=2, learningRate=0.001)
             local_path = os.path.join(tmp_dir, "estimator")
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py b/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py
index 11c1f9aeee51d..3a5417dadf50a 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_evaluation.py
@@ -23,14 +23,7 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-have_torcheval = True
-torcheval_requirement_message = None
-try:
-    import torcheval  # noqa: F401
-except ImportError:
-    have_torcheval = False
-    torcheval_requirement_message = "torcheval is required"
+from pyspark.testing.utils import have_torcheval, torcheval_requirement_message
 
 if should_test_connect:
     from pyspark.ml.connect.evaluation import (
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
index 4915d4706b872..6812db778450a 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_feature.py
@@ -26,7 +26,7 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if should_test_connect:
     from pyspark.ml.connect.feature import (
@@ -196,8 +196,10 @@ def test_array_assembler(self):
 
 
 @unittest.skipIf(
-    not should_test_connect or is_remote_only(),
-    connect_requirement_message or "pyspark-connect cannot test classic Spark",
+    not should_test_connect or not have_torch or is_remote_only(),
+    connect_requirement_message
+    or torch_requirement_message
+    or "pyspark-connect cannot test classic Spark",
 )
 class FeatureTests(FeatureTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py b/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py
index 692144148af07..8b19f5931d207 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_pipeline.py
@@ -24,6 +24,7 @@
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if should_test_connect:
     from pyspark.ml.connect.feature import StandardScaler
@@ -169,8 +170,10 @@ def test_pipeline_copy():
 
 
 @unittest.skipIf(
-    not should_test_connect or is_remote_only(),
-    connect_requirement_message or "pyspark-connect cannot test classic Spark",
+    not should_test_connect or not have_torch or is_remote_only(),
+    connect_requirement_message
+    or torch_requirement_message
+    or "pyspark-connect cannot test classic Spark",
 )
 class PipelineTests(PipelineTestsMixin, unittest.TestCase):
     def setUp(self) -> None:
diff --git a/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py b/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
index 14f52d75e6d63..06c3ad93d92d2 100644
--- a/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
+++ b/python/pyspark/ml/tests/connect/test_legacy_mode_tuning.py
@@ -28,15 +28,14 @@
 from pyspark.sql import SparkSession
 from pyspark.sql.functions import rand
 from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
-
-have_sklearn = True
-sklearn_requirement_message = None
-try:
-    from sklearn.datasets import load_breast_cancer  # noqa: F401
-except ImportError:
-    have_sklearn = False
-    sklearn_requirement_message = "No sklearn found"
-
+from pyspark.testing.utils import (
+    have_sklearn,
+    sklearn_requirement_message,
+    have_torch,
+    torch_requirement_message,
+    have_torcheval,
+    torcheval_requirement_message,
+)
 
 if should_test_connect:
     import pandas as pd
@@ -47,37 +46,34 @@
     from pyspark.ml.connect.tuning import CrossValidator, CrossValidatorModel
     from pyspark.ml.connect.evaluation import BinaryClassificationEvaluator, RegressionEvaluator
 
+    class HasInducedError(Params):
+        def __init__(self):
+            super(HasInducedError, self).__init__()
+            self.inducedError = Param(
+                self, "inducedError", "Uniformly-distributed error added to feature"
+            )
 
-class HasInducedError(Params):
-    def __init__(self):
-        super(HasInducedError, self).__init__()
-        self.inducedError = Param(
-            self, "inducedError", "Uniformly-distributed error added to feature"
-        )
-
-    def getInducedError(self):
-        return self.getOrDefault(self.inducedError)
-
-
-class InducedErrorModel(Model, HasInducedError):
-    def __init__(self):
-        super(InducedErrorModel, self).__init__()
+        def getInducedError(self):
+            return self.getOrDefault(self.inducedError)
 
-    def _transform(self, dataset):
-        return dataset.withColumn(
-            "prediction", dataset.feature + (rand(0) * self.getInducedError())
-        )
+    class InducedErrorModel(Model, HasInducedError):
+        def __init__(self):
+            super(InducedErrorModel, self).__init__()
 
+        def _transform(self, dataset):
+            return dataset.withColumn(
+                "prediction", dataset.feature + (rand(0) * self.getInducedError())
+            )
 
-class InducedErrorEstimator(Estimator, HasInducedError):
-    def __init__(self, inducedError=1.0):
-        super(InducedErrorEstimator, self).__init__()
-        self._set(inducedError=inducedError)
+    class InducedErrorEstimator(Estimator, HasInducedError):
+        def __init__(self, inducedError=1.0):
+            super(InducedErrorEstimator, self).__init__()
+            self._set(inducedError=inducedError)
 
-    def _fit(self, dataset):
-        model = InducedErrorModel()
-        self._copyValues(model)
-        return model
+        def _fit(self, dataset):
+            model = InducedErrorModel()
+            self._copyValues(model)
+            return model
 
 
 class CrossValidatorTestsMixin:
@@ -192,6 +188,8 @@ def _check_result(result_dataframe, expected_predictions, expected_probabilities
             )
 
     def test_crossvalidator_on_pipeline(self):
+        from sklearn.datasets import load_breast_cancer
+
         sk_dataset = load_breast_cancer()
 
         train_dataset = self.spark.createDataFrame(
@@ -257,6 +255,8 @@ def _verify_cv_saved_params(instance, loaded_instance):
         sys.version_info > (3, 12), "SPARK-46078: Fails with dev torch with Python 3.12"
     )
     def test_crossvalidator_with_fold_col(self):
+        from sklearn.datasets import load_breast_cancer
+
         sk_dataset = load_breast_cancer()
 
         train_dataset = self.spark.createDataFrame(
@@ -283,9 +283,15 @@ def test_crossvalidator_with_fold_col(self):
 
 
 @unittest.skipIf(
-    not should_test_connect or not have_sklearn or is_remote_only(),
+    not should_test_connect
+    or not have_sklearn
+    or not have_torch
+    or not have_torcheval
+    or is_remote_only(),
     connect_requirement_message
     or sklearn_requirement_message
+    or torch_requirement_message
+    or torcheval_requirement_message
     or "pyspark-connect cannot test classic Spark",
 )
 class CrossValidatorTests(CrossValidatorTestsMixin, unittest.TestCase):
diff --git a/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py b/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
index 462fe3822141e..de05927138d4a 100644
--- a/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
+++ b/python/pyspark/ml/tests/connect/test_parity_torch_data_loader.py
@@ -19,14 +19,7 @@
 
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
-
-torch_requirement_message = None
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-    torch_requirement_message = "torch is required"
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if not is_remote_only():
     from pyspark.ml.torch.tests.test_data_loader import TorchDistributorDataLoaderUnitTests
diff --git a/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py b/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
index e40303ae9ce21..3cd8abfc6e4ea 100644
--- a/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
+++ b/python/pyspark/ml/tests/connect/test_parity_torch_distributor.py
@@ -19,16 +19,9 @@
 import shutil
 import unittest
 
-torch_requirement_message = None
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-    torch_requirement_message = "torch is required"
-
 from pyspark.util import is_remote_only
 from pyspark.sql import SparkSession
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 if not is_remote_only():
     from pyspark.ml.torch.tests.test_distributor import (
diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py
index 4bf6641723da6..92919adecd069 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -29,6 +29,7 @@
     StopWordsRemover,
     StringIndexer,
     StringIndexerModel,
+    TargetEncoder,
     VectorSizeHint,
 )
 from pyspark.ml.linalg import DenseVector, SparseVector, Vectors
@@ -346,6 +347,171 @@ def test_string_indexer_from_labels(self):
         )
         self.assertEqual(len(transformed_list), 5)
 
+    def test_target_encoder_binary(self):
+        df = self.spark.createDataFrame(
+            [
+                (0, 3, 5.0, 0.0),
+                (1, 4, 5.0, 1.0),
+                (2, 3, 5.0, 0.0),
+                (0, 4, 6.0, 1.0),
+                (1, 3, 6.0, 0.0),
+                (2, 4, 6.0, 1.0),
+                (0, 3, 7.0, 0.0),
+                (1, 4, 8.0, 1.0),
+                (2, 3, 9.0, 0.0),
+            ],
+            schema="input1 short, input2 int, input3 double, label double",
+        )
+        encoder = TargetEncoder(
+            inputCols=["input1", "input2", "input3"],
+            outputCols=["output", "output2", "output3"],
+            labelCol="label",
+            targetType="binary",
+        )
+        model = encoder.fit(df)
+        te = model.transform(df)
+        actual = te.drop("label").collect()
+        expected = [
+            Row(input1=0, input2=3, input3=5.0, output1=1.0 / 3, output2=0.0, output3=1.0 / 3),
+            Row(input1=1, input2=4, input3=5.0, output1=2.0 / 3, output2=1.0, output3=1.0 / 3),
+            Row(input1=2, input2=3, input3=5.0, output1=1.0 / 3, output2=0.0, output3=1.0 / 3),
+            Row(input1=0, input2=4, input3=6.0, output1=1.0 / 3, output2=1.0, output3=2.0 / 3),
+            Row(input1=1, input2=3, input3=6.0, output1=2.0 / 3, output2=0.0, output3=2.0 / 3),
+            Row(input1=2, input2=4, input3=6.0, output1=1.0 / 3, output2=1.0, output3=2.0 / 3),
+            Row(input1=0, input2=3, input3=7.0, output1=1.0 / 3, output2=0.0, output3=0.0),
+            Row(input1=1, input2=4, input3=8.0, output1=2.0 / 3, output2=1.0, output3=1.0),
+            Row(input1=2, input2=3, input3=9.0, output1=1.0 / 3, output2=0.0, output3=0.0),
+        ]
+        self.assertEqual(actual, expected)
+        te = model.setSmoothing(1.0).transform(df)
+        actual = te.drop("label").collect()
+        expected = [
+            Row(
+                input1=0,
+                input2=3,
+                input3=5.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(1 - 5 / 6) * (4 / 9),
+                output3=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=1,
+                input2=4,
+                input3=5.0,
+                output1=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(4 / 5) * 1 + (1 - 4 / 5) * (4 / 9),
+                output3=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=2,
+                input2=3,
+                input3=5.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(1 - 5 / 6) * (4 / 9),
+                output3=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=0,
+                input2=4,
+                input3=6.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(4 / 5) * 1 + (1 - 4 / 5) * (4 / 9),
+                output3=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=1,
+                input2=3,
+                input3=6.0,
+                output1=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(1 - 5 / 6) * (4 / 9),
+                output3=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=2,
+                input2=4,
+                input3=6.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(4 / 5) * 1 + (1 - 4 / 5) * (4 / 9),
+                output3=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+            ),
+            Row(
+                input1=0,
+                input2=3,
+                input3=7.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(1 - 5 / 6) * (4 / 9),
+                output3=(1 - 1 / 2) * (4 / 9),
+            ),
+            Row(
+                input1=1,
+                input2=4,
+                input3=8.0,
+                output1=(3 / 4) * (2 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(4 / 5) * 1 + (1 - 4 / 5) * (4 / 9),
+                output3=(1 / 2) + (1 - 1 / 2) * (4 / 9),
+            ),
+            Row(
+                input1=2,
+                input2=3,
+                input3=9.0,
+                output1=(3 / 4) * (1 / 3) + (1 - 3 / 4) * (4 / 9),
+                output2=(1 - 5 / 6) * (4 / 9),
+                output3=(1 - 1 / 2) * (4 / 9),
+            ),
+        ]
+        self.assertEqual(actual, expected)
+
+    def test_target_encoder_continuous(self):
+        df = self.spark.createDataFrame(
+            [
+                (0, 3, 5.0, 10.0),
+                (1, 4, 5.0, 20.0),
+                (2, 3, 5.0, 30.0),
+                (0, 4, 6.0, 40.0),
+                (1, 3, 6.0, 50.0),
+                (2, 4, 6.0, 60.0),
+                (0, 3, 7.0, 70.0),
+                (1, 4, 8.0, 80.0),
+                (2, 3, 9.0, 90.0),
+            ],
+            schema="input1 short, input2 int, input3 double, label double",
+        )
+        encoder = TargetEncoder(
+            inputCols=["input1", "input2", "input3"],
+            outputCols=["output", "output2", "output3"],
+            labelCol="label",
+            targetType="continuous",
+        )
+        model = encoder.fit(df)
+        te = model.transform(df)
+        actual = te.drop("label").collect()
+        expected = [
+            Row(input1=0, input2=3, input3=5.0, output1=40.0, output2=50.0, output3=20.0),
+            Row(input1=1, input2=4, input3=5.0, output1=50.0, output2=50.0, output3=20.0),
+            Row(input1=2, input2=3, input3=5.0, output1=60.0, output2=50.0, output3=20.0),
+            Row(input1=0, input2=4, input3=6.0, output1=40.0, output2=50.0, output3=50.0),
+            Row(input1=1, input2=3, input3=6.0, output1=50.0, output2=50.0, output3=50.0),
+            Row(input1=2, input2=4, input3=6.0, output1=60.0, output2=50.0, output3=50.0),
+            Row(input1=0, input2=3, input3=7.0, output1=40.0, output2=50.0, output3=70.0),
+            Row(input1=1, input2=4, input3=8.0, output1=50.0, output2=50.0, output3=80.0),
+            Row(input1=2, input2=3, input3=9.0, output1=60.0, output2=50.0, output3=90.0),
+        ]
+        self.assertEqual(actual, expected)
+        te = model.setSmoothing(1.0).transform(df)
+        actual = te.drop("label").collect()
+        expected = [
+            Row(input1=0, input2=3, input3=5.0, output1=42.5, output2=50.0, output3=27.5),
+            Row(input1=1, input2=4, input3=5.0, output1=50.0, output2=50.0, output3=27.5),
+            Row(input1=2, input2=3, input3=5.0, output1=57.5, output2=50.0, output3=27.5),
+            Row(input1=0, input2=4, input3=6.0, output1=42.5, output2=50.0, output3=50.0),
+            Row(input1=1, input2=3, input3=6.0, output1=50.0, output2=50.0, output3=50.0),
+            Row(input1=2, input2=4, input3=6.0, output1=57.5, output2=50.0, output3=50.0),
+            Row(input1=0, input2=3, input3=7.0, output1=42.5, output2=50.0, output3=60.0),
+            Row(input1=1, input2=4, input3=8.0, output1=50.0, output2=50.0, output3=65.0),
+            Row(input1=2, input2=3, input3=9.0, output1=57.5, output2=50.0, output3=70.0),
+        ]
+        self.assertEqual(actual, expected)
+
     def test_vector_size_hint(self):
         df = self.spark.createDataFrame(
             [
diff --git a/python/pyspark/ml/tests/test_functions.py b/python/pyspark/ml/tests/test_functions.py
index 7df0a26394140..e67e46ded67bd 100644
--- a/python/pyspark/ml/tests/test_functions.py
+++ b/python/pyspark/ml/tests/test_functions.py
@@ -18,6 +18,7 @@
 
 import numpy as np
 
+from pyspark.loose_version import LooseVersion
 from pyspark.ml.functions import predict_batch_udf
 from pyspark.sql.functions import array, struct, col
 from pyspark.sql.types import ArrayType, DoubleType, IntegerType, StructType, StructField, FloatType
@@ -193,6 +194,10 @@ def predict(inputs):
         batch_sizes = preds["preds"].to_numpy()
         self.assertTrue(all(batch_sizes <= batch_size))
 
+    # TODO(SPARK-49793): enable the test below
+    @unittest.skipIf(
+        LooseVersion(np.__version__) >= LooseVersion("2"), "Caching does not work with numpy 2"
+    )
     def test_caching(self):
         def make_predict_fn():
             # emulate loading a model, this should only be invoked once (per worker process)
diff --git a/python/pyspark/ml/torch/tests/test_data_loader.py b/python/pyspark/ml/torch/tests/test_data_loader.py
index 00f5f0a8c8d85..a47a5f163b687 100644
--- a/python/pyspark/ml/torch/tests/test_data_loader.py
+++ b/python/pyspark/ml/torch/tests/test_data_loader.py
@@ -17,23 +17,16 @@
 
 import unittest
 
-import numpy as np
-
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-
 from pyspark.ml.torch.distributor import (
     TorchDistributor,
     _get_spark_partition_data_loader,
 )
 from pyspark.sql import SparkSession
 from pyspark.ml.linalg import Vectors
+from pyspark.testing.utils import have_torch, torch_requirement_message
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchDistributorDataLoaderUnitTests(unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
@@ -46,6 +39,8 @@ def tearDown(self) -> None:
         self.spark.stop()
 
     def _check_data_loader_result_correctness(self, result, expected):
+        import numpy as np
+
         assert len(result) == len(expected)
 
         for res_row, exp_row in zip(result, expected):
diff --git a/python/pyspark/ml/torch/tests/test_distributor.py b/python/pyspark/ml/torch/tests/test_distributor.py
index d16e60588482c..7b6a93afbff78 100644
--- a/python/pyspark/ml/torch/tests/test_distributor.py
+++ b/python/pyspark/ml/torch/tests/test_distributor.py
@@ -29,17 +29,11 @@
 import unittest
 from unittest.mock import patch
 
-have_torch = True
-try:
-    import torch  # noqa: F401
-except ImportError:
-    have_torch = False
-
 from pyspark import SparkConf, SparkContext
 from pyspark.ml.torch.distributor import TorchDistributor, _get_gpus_owned
 from pyspark.ml.torch.torch_run_process_wrapper import clean_and_terminate, check_parent_alive
 from pyspark.sql import SparkSession
-from pyspark.testing.utils import SPARK_HOME
+from pyspark.testing.utils import SPARK_HOME, have_torch, torch_requirement_message
 
 
 @contextlib.contextmanager
@@ -312,7 +306,7 @@ def test_create_torchrun_command(self) -> None:
         self.delete_env_vars(input_env_vars)
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchDistributorBaselineUnitTests(TorchDistributorBaselineUnitTestsMixin, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -409,7 +403,7 @@ def test_end_to_end_run_locally(self) -> None:
         self.assertEqual(output, "success" * 4096)
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchDistributorLocalUnitTests(TorchDistributorLocalUnitTestsMixin, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -431,7 +425,7 @@ def tearDownClass(cls):
         cls.spark.stop()
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchDistributorLocalUnitTestsII(TorchDistributorLocalUnitTestsMixin, unittest.TestCase):
     @classmethod
     def setUpClass(cls):
@@ -502,7 +496,7 @@ def test_end_to_end_run_distributedly(self) -> None:
         self.assertEqual(output, "success" * 4096)
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchDistributorDistributedUnitTests(
     TorchDistributorDistributedUnitTestsMixin, unittest.TestCase
 ):
@@ -549,7 +543,7 @@ def test_check_parent_alive(self, mock_clean_and_terminate: Callable) -> None:
         self.assertEqual(mock_clean_and_terminate.call_count, 0)
 
 
-@unittest.skipIf(not have_torch, "torch is required")
+@unittest.skipIf(not have_torch, torch_requirement_message)
 class TorchWrapperUnitTests(TorchWrapperUnitTestsMixin, unittest.TestCase):
     pass
 
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index e8713d81c4d62..888beff663523 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -706,7 +706,7 @@ class CrossValidator(
     >>> cvModel = cv.fit(dataset)
     >>> cvModel.getNumFolds()
     3
-    >>> cvModel.avgMetrics[0]
+    >>> float(cvModel.avgMetrics[0])
     0.5
     >>> path = tempfile.mkdtemp()
     >>> model_path = path + "/model"
diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py
index 5e7965554d825..9bbd64d2aef5a 100644
--- a/python/pyspark/ml/util.py
+++ b/python/pyspark/ml/util.py
@@ -32,6 +32,7 @@
     TypeVar,
     cast,
     TYPE_CHECKING,
+    Union,
 )
 
 from pyspark import since
@@ -424,7 +425,7 @@ def __init__(self, instance: "Params"):
         self.instance = instance
 
     def saveImpl(self, path: str) -> None:
-        DefaultParamsWriter.saveMetadata(self.instance, path, self.sc)
+        DefaultParamsWriter.saveMetadata(self.instance, path, self.sparkSession)
 
     @staticmethod
     def extractJsonParams(instance: "Params", skipParams: Sequence[str]) -> Dict[str, Any]:
@@ -438,7 +439,7 @@ def extractJsonParams(instance: "Params", skipParams: Sequence[str]) -> Dict[str
     def saveMetadata(
         instance: "Params",
         path: str,
-        sc: "SparkContext",
+        sc: Union["SparkContext", SparkSession],
         extraMetadata: Optional[Dict[str, Any]] = None,
         paramMap: Optional[Dict[str, Any]] = None,
     ) -> None:
@@ -464,15 +465,15 @@ def saveMetadata(
         metadataJson = DefaultParamsWriter._get_metadata_to_save(
             instance, sc, extraMetadata, paramMap
         )
-        spark = SparkSession.getActiveSession()
-        spark.createDataFrame(  # type: ignore[union-attr]
-            [(metadataJson,)], schema=["value"]
-        ).coalesce(1).write.text(metadataPath)
+        spark = sc if isinstance(sc, SparkSession) else SparkSession._getActiveSessionOrCreate()
+        spark.createDataFrame([(metadataJson,)], schema=["value"]).coalesce(1).write.text(
+            metadataPath
+        )
 
     @staticmethod
     def _get_metadata_to_save(
         instance: "Params",
-        sc: "SparkContext",
+        sc: Union["SparkContext", SparkSession],
         extraMetadata: Optional[Dict[str, Any]] = None,
         paramMap: Optional[Dict[str, Any]] = None,
     ) -> str:
@@ -560,7 +561,7 @@ def __get_class(clazz: str) -> Type[RL]:
         return getattr(m, parts[-1])
 
     def load(self, path: str) -> RL:
-        metadata = DefaultParamsReader.loadMetadata(path, self.sc)
+        metadata = DefaultParamsReader.loadMetadata(path, self.sparkSession)
         py_type: Type[RL] = DefaultParamsReader.__get_class(metadata["class"])
         instance = py_type()
         cast("Params", instance)._resetUid(metadata["uid"])
@@ -568,20 +569,24 @@ def load(self, path: str) -> RL:
         return instance
 
     @staticmethod
-    def loadMetadata(path: str, sc: "SparkContext", expectedClassName: str = "") -> Dict[str, Any]:
+    def loadMetadata(
+        path: str,
+        sc: Union["SparkContext", SparkSession],
+        expectedClassName: str = "",
+    ) -> Dict[str, Any]:
         """
         Load metadata saved using :py:meth:`DefaultParamsWriter.saveMetadata`
 
         Parameters
         ----------
         path : str
-        sc : :py:class:`pyspark.SparkContext`
+        sc : :py:class:`pyspark.SparkContext` or :py:class:`pyspark.sql.SparkSession`
         expectedClassName : str, optional
             If non empty, this is checked against the loaded metadata.
         """
         metadataPath = os.path.join(path, "metadata")
-        spark = SparkSession.getActiveSession()
-        metadataStr = spark.read.text(metadataPath).first()[0]  # type: ignore[union-attr,index]
+        spark = sc if isinstance(sc, SparkSession) else SparkSession._getActiveSessionOrCreate()
+        metadataStr = spark.read.text(metadataPath).first()[0]  # type: ignore[index]
         loadedVals = DefaultParamsReader._parseMetaData(metadataStr, expectedClassName)
         return loadedVals
 
@@ -641,7 +646,7 @@ def isPythonParamsInstance(metadata: Dict[str, Any]) -> bool:
         return metadata["class"].startswith("pyspark.ml.")
 
     @staticmethod
-    def loadParamsInstance(path: str, sc: "SparkContext") -> RL:
+    def loadParamsInstance(path: str, sc: Union["SparkContext", SparkSession]) -> RL:
         """
         Load a :py:class:`Params` instance from the given path, and return it.
         This assumes the instance inherits from :py:class:`MLReadable`.
diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py
index 1e1795d9fb3d4..bf8fd04dc2837 100644
--- a/python/pyspark/mllib/classification.py
+++ b/python/pyspark/mllib/classification.py
@@ -172,9 +172,9 @@ class LogisticRegressionModel(LinearClassificationModel):
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LogisticRegressionModel.load(sc, path)
-    >>> sameModel.predict(numpy.array([0.0, 1.0]))
+    >>> int(sameModel.predict(numpy.array([0.0, 1.0])))
     1
-    >>> sameModel.predict(SparseVector(2, {0: 1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {0: 1.0})))
     0
     >>> from shutil import rmtree
     >>> try:
@@ -555,7 +555,7 @@ class SVMModel(LinearClassificationModel):
     >>> svm.predict(sc.parallelize([[1.0]])).collect()
     [1]
     >>> svm.clearThreshold()
-    >>> svm.predict(numpy.array([1.0]))
+    >>> float(svm.predict(numpy.array([1.0])))
     1.44...
 
     >>> sparse_data = [
@@ -573,9 +573,9 @@ class SVMModel(LinearClassificationModel):
     >>> path = tempfile.mkdtemp()
     >>> svm.save(sc, path)
     >>> sameModel = SVMModel.load(sc, path)
-    >>> sameModel.predict(SparseVector(2, {1: 1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {1: 1.0})))
     1
-    >>> sameModel.predict(SparseVector(2, {0: -1.0}))
+    >>> int(sameModel.predict(SparseVector(2, {0: -1.0})))
     0
     >>> from shutil import rmtree
     >>> try:
@@ -756,11 +756,11 @@ class NaiveBayesModel(Saveable, Loader["NaiveBayesModel"]):
     ...     LabeledPoint(1.0, [1.0, 0.0]),
     ... ]
     >>> model = NaiveBayes.train(sc.parallelize(data))
-    >>> model.predict(numpy.array([0.0, 1.0]))
+    >>> float(model.predict(numpy.array([0.0, 1.0])))
     0.0
-    >>> model.predict(numpy.array([1.0, 0.0]))
+    >>> float(model.predict(numpy.array([1.0, 0.0])))
     1.0
-    >>> model.predict(sc.parallelize([[1.0, 0.0]])).collect()
+    >>> list(map(float, model.predict(sc.parallelize([[1.0, 0.0]])).collect()))
     [1.0]
     >>> sparse_data = [
     ...     LabeledPoint(0.0, SparseVector(2, {1: 0.0})),
@@ -768,15 +768,18 @@ class NaiveBayesModel(Saveable, Loader["NaiveBayesModel"]):
     ...     LabeledPoint(1.0, SparseVector(2, {0: 1.0}))
     ... ]
     >>> model = NaiveBayes.train(sc.parallelize(sparse_data))
-    >>> model.predict(SparseVector(2, {1: 1.0}))
+    >>> float(model.predict(SparseVector(2, {1: 1.0})))
     0.0
-    >>> model.predict(SparseVector(2, {0: 1.0}))
+    >>> float(model.predict(SparseVector(2, {0: 1.0})))
     1.0
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> model.save(sc, path)
     >>> sameModel = NaiveBayesModel.load(sc, path)
-    >>> sameModel.predict(SparseVector(2, {0: 1.0})) == model.predict(SparseVector(2, {0: 1.0}))
+    >>> bool((
+    ...     sameModel.predict(SparseVector(2, {0: 1.0})) ==
+    ...     model.predict(SparseVector(2, {0: 1.0}))
+    ... ))
     True
     >>> from shutil import rmtree
     >>> try:
diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py
index 24884f4853371..915a55595cb53 100644
--- a/python/pyspark/mllib/feature.py
+++ b/python/pyspark/mllib/feature.py
@@ -554,9 +554,9 @@ class PCA:
     ...     Vectors.dense([4.0, 0.0, 0.0, 6.0, 7.0])]
     >>> model = PCA(2).fit(sc.parallelize(data))
     >>> pcArray = model.transform(Vectors.sparse(5, [(1, 1.0), (3, 7.0)])).toArray()
-    >>> pcArray[0]
+    >>> float(pcArray[0])
     1.648...
-    >>> pcArray[1]
+    >>> float(pcArray[1])
     -4.013...
     """
 
diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py
index 02cef36c11c46..40f0255a91bbe 100644
--- a/python/pyspark/mllib/linalg/__init__.py
+++ b/python/pyspark/mllib/linalg/__init__.py
@@ -813,12 +813,12 @@ def dot(self, other: Iterable[float]) -> np.float64:
 
         elif isinstance(other, SparseVector):
             # Find out common indices.
-            self_cmind = np.in1d(self.indices, other.indices, assume_unique=True)
+            self_cmind = np.isin(self.indices, other.indices, assume_unique=True)
             self_values = self.values[self_cmind]
             if self_values.size == 0:
                 return np.float64(0.0)
             else:
-                other_cmind = np.in1d(other.indices, self.indices, assume_unique=True)
+                other_cmind = np.isin(other.indices, self.indices, assume_unique=True)
                 return np.dot(self_values, other.values[other_cmind])
 
         else:
diff --git a/python/pyspark/mllib/random.py b/python/pyspark/mllib/random.py
index 80bbd717071dc..dbe1048a64b36 100644
--- a/python/pyspark/mllib/random.py
+++ b/python/pyspark/mllib/random.py
@@ -134,9 +134,9 @@ def normalRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - 0.0) < 0.1
+        >>> bool(abs(stats.mean() - 0.0) < 0.1)
         True
-        >>> abs(stats.stdev() - 1.0) < 0.1
+        >>> bool(abs(stats.stdev() - 1.0) < 0.1)
         True
         """
         return callMLlibFunc("normalRDD", sc._jsc, size, numPartitions, seed)
@@ -186,10 +186,10 @@ def logNormalRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - expMean) < 0.5
+        >>> bool(abs(stats.mean() - expMean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - expStd) < 0.5
+        >>> bool(abs(stats.stdev() - expStd) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -238,7 +238,7 @@ def poissonRDD(
         >>> abs(stats.mean() - mean) < 0.5
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        >>> bool(abs(stats.stdev() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc("poissonRDD", sc._jsc, float(mean), size, numPartitions, seed)
@@ -285,7 +285,7 @@ def exponentialRDD(
         >>> abs(stats.mean() - mean) < 0.5
         True
         >>> from math import sqrt
-        >>> abs(stats.stdev() - sqrt(mean)) < 0.5
+        >>> bool(abs(stats.stdev() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc("exponentialRDD", sc._jsc, float(mean), size, numPartitions, seed)
@@ -336,9 +336,9 @@ def gammaRDD(
         >>> stats = x.stats()
         >>> stats.count()
         1000
-        >>> abs(stats.mean() - expMean) < 0.5
+        >>> bool(abs(stats.mean() - expMean) < 0.5)
         True
-        >>> abs(stats.stdev() - expStd) < 0.5
+        >>> bool(abs(stats.stdev() - expStd) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -384,7 +384,7 @@ def uniformVectorRDD(
         >>> mat = np.matrix(RandomRDDs.uniformVectorRDD(sc, 10, 10).collect())
         >>> mat.shape
         (10, 10)
-        >>> mat.max() <= 1.0 and mat.min() >= 0.0
+        >>> bool(mat.max() <= 1.0 and mat.min() >= 0.0)
         True
         >>> RandomRDDs.uniformVectorRDD(sc, 10, 10, 4).getNumPartitions()
         4
@@ -430,9 +430,9 @@ def normalVectorRDD(
         >>> mat = np.matrix(RandomRDDs.normalVectorRDD(sc, 100, 100, seed=1).collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - 0.0) < 0.1
+        >>> bool(abs(mat.mean() - 0.0) < 0.1)
         True
-        >>> abs(mat.std() - 1.0) < 0.1
+        >>> bool(abs(mat.std() - 1.0) < 0.1)
         True
         """
         return callMLlibFunc("normalVectorRDD", sc._jsc, numRows, numCols, numPartitions, seed)
@@ -488,9 +488,9 @@ def logNormalVectorRDD(
         >>> mat = np.matrix(m)
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - expMean) < 0.1
+        >>> bool(abs(mat.mean() - expMean) < 0.1)
         True
-        >>> abs(mat.std() - expStd) < 0.1
+        >>> bool(abs(mat.std() - expStd) < 0.1)
         True
         """
         return callMLlibFunc(
@@ -545,13 +545,13 @@ def poissonVectorRDD(
         >>> import numpy as np
         >>> mean = 100.0
         >>> rdd = RandomRDDs.poissonVectorRDD(sc, mean, 100, 100, seed=1)
-        >>> mat = np.mat(rdd.collect())
+        >>> mat = np.asmatrix(rdd.collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - mean) < 0.5
+        >>> bool(abs(mat.mean() - mean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        >>> bool(abs(mat.std() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -599,13 +599,13 @@ def exponentialVectorRDD(
         >>> import numpy as np
         >>> mean = 0.5
         >>> rdd = RandomRDDs.exponentialVectorRDD(sc, mean, 100, 100, seed=1)
-        >>> mat = np.mat(rdd.collect())
+        >>> mat = np.asmatrix(rdd.collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - mean) < 0.5
+        >>> bool(abs(mat.mean() - mean) < 0.5)
         True
         >>> from math import sqrt
-        >>> abs(mat.std() - sqrt(mean)) < 0.5
+        >>> bool(abs(mat.std() - sqrt(mean)) < 0.5)
         True
         """
         return callMLlibFunc(
@@ -662,9 +662,9 @@ def gammaVectorRDD(
         >>> mat = np.matrix(RandomRDDs.gammaVectorRDD(sc, shape, scale, 100, 100, seed=1).collect())
         >>> mat.shape
         (100, 100)
-        >>> abs(mat.mean() - expMean) < 0.1
+        >>> bool(abs(mat.mean() - expMean) < 0.1)
         True
-        >>> abs(mat.std() - expStd) < 0.1
+        >>> bool(abs(mat.std() - expStd) < 0.1)
         True
         """
         return callMLlibFunc(
diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py
index f1003327912d0..87f05bc0979b8 100644
--- a/python/pyspark/mllib/regression.py
+++ b/python/pyspark/mllib/regression.py
@@ -144,9 +144,9 @@ class LinearRegressionModelBase(LinearModel):
     --------
     >>> from pyspark.mllib.linalg import SparseVector
     >>> lrmb = LinearRegressionModelBase(np.array([1.0, 2.0]), 0.1)
-    >>> abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6
+    >>> bool(abs(lrmb.predict(np.array([-1.03, 7.777])) - 14.624) < 1e-6)
     True
-    >>> abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6
+    >>> bool(abs(lrmb.predict(SparseVector(2, {0: -1.03, 1: 7.777})) - 14.624) < 1e-6)
     True
     """
 
@@ -190,23 +190,23 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LinearRegressionModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -221,16 +221,16 @@ class LinearRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...    miniBatchFraction=1.0, initialWeights=np.array([1.0]), regParam=0.1, regType="l2",
     ...    intercept=True, validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -402,23 +402,23 @@ class LassoModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LassoWithSGD.train(
     ...     sc.parallelize(data), iterations=10, initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = LassoModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -433,16 +433,16 @@ class LassoModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = LassoWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -580,23 +580,23 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(lrm.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
-    >>> abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5
+    >>> bool(abs(lrm.predict(sc.parallelize([[1.0]])).collect()[0] - 1) < 0.5)
     True
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> lrm.save(sc, path)
     >>> sameModel = RidgeRegressionModel.load(sc, path)
-    >>> abs(sameModel.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(sameModel.predict(np.array([1.0])) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(np.array([1.0])) - 1) < 0.5)
     True
-    >>> abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(sameModel.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> from shutil import rmtree
     >>> try:
@@ -611,16 +611,16 @@ class RidgeRegressionModel(LinearRegressionModelBase):
     ... ]
     >>> lrm = LinearRegressionWithSGD.train(sc.parallelize(data), iterations=10,
     ...     initialWeights=np.array([1.0]))
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     >>> lrm = RidgeRegressionWithSGD.train(sc.parallelize(data), iterations=10, step=1.0,
     ...     regParam=0.01, miniBatchFraction=1.0, initialWeights=np.array([1.0]), intercept=True,
     ...     validateData=True)
-    >>> abs(lrm.predict(np.array([0.0])) - 0) < 0.5
+    >>> bool(abs(lrm.predict(np.array([0.0])) - 0) < 0.5)
     True
-    >>> abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5
+    >>> bool(abs(lrm.predict(SparseVector(1, {0: 1.0})) - 1) < 0.5)
     True
     """
 
@@ -764,19 +764,19 @@ class IsotonicRegressionModel(Saveable, Loader["IsotonicRegressionModel"]):
     --------
     >>> data = [(1, 0, 1), (2, 1, 1), (3, 2, 1), (1, 3, 1), (6, 4, 1), (17, 5, 1), (16, 6, 1)]
     >>> irm = IsotonicRegression.train(sc.parallelize(data))
-    >>> irm.predict(3)
+    >>> float(irm.predict(3))
     2.0
-    >>> irm.predict(5)
+    >>> float(irm.predict(5))
     16.5
-    >>> irm.predict(sc.parallelize([3, 5])).collect()
+    >>> list(map(float, irm.predict(sc.parallelize([3, 5])).collect()))
     [2.0, 16.5]
     >>> import os, tempfile
     >>> path = tempfile.mkdtemp()
     >>> irm.save(sc, path)
     >>> sameModel = IsotonicRegressionModel.load(sc, path)
-    >>> sameModel.predict(3)
+    >>> float(sameModel.predict(3))
     2.0
-    >>> sameModel.predict(5)
+    >>> float(sameModel.predict(5))
     16.5
     >>> from shutil import rmtree
     >>> try:
diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py
index bfa88253dc6f4..6ed4adf21ff44 100644
--- a/python/pyspark/pandas/config.py
+++ b/python/pyspark/pandas/config.py
@@ -287,7 +287,8 @@ def validate(self, v: Any) -> None:
         doc=(
             "'plotting.sample_ratio' sets the proportion of data that will be plotted for sample-"
             "based plots such as `plot.line` and `plot.area`. "
-            "This option defaults to 'plotting.max_rows' option."
+            "If not set, it is derived from 'plotting.max_rows', by calculating the ratio of "
+            "'plotting.max_rows' to the total data size."
         ),
         default=None,
         types=(float, type(None)),
diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py
index 9b4cc72fa2e45..22bd7a6d329d9 100644
--- a/python/pyspark/pandas/data_type_ops/datetime_ops.py
+++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py
@@ -34,6 +34,7 @@
 )
 from pyspark.sql.utils import pyspark_column_op
 from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.pandas.base import IndexOpsMixin
 from pyspark.pandas.data_type_ops.base import (
     DataTypeOps,
@@ -150,10 +151,7 @@ class DatetimeNTZOps(DatetimeOps):
     """
 
     def _cast_spark_column_timestamp_to_long(self, scol: Column) -> Column:
-        from pyspark import SparkContext
-
-        jvm = SparkContext._active_spark_context._jvm
-        return Column(jvm.PythonSQLUtils.castTimestampNTZToLong(scol._jc))
+        return SF.timestamp_ntz_to_long(scol)
 
     def astype(self, index_ops: IndexOpsLike, dtype: Union[str, type, Dtype]) -> IndexOpsLike:
         dtype, spark_type = pandas_on_spark_type(dtype)
diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py
index 2683a6020cd41..49aa49f65e35b 100644
--- a/python/pyspark/pandas/frame.py
+++ b/python/pyspark/pandas/frame.py
@@ -70,6 +70,7 @@
 from pyspark import StorageLevel
 from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame, functions as F
 from pyspark.sql.functions import pandas_udf
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.types import (
     ArrayType,
     BooleanType,
@@ -105,7 +106,6 @@
     CORRELATION_CORR_OUTPUT_COLUMN,
     CORRELATION_COUNT_OUTPUT_COLUMN,
 )
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkFrameMethods, CachedSparkFrameMethods
 from pyspark.pandas.utils import (
     align_diff_frames,
@@ -3746,8 +3746,8 @@ def between_time(
         allowed_inclusive_values = ["left", "right", "both", "neither"]
         if inclusive not in allowed_inclusive_values:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "inclusive",
                     "allowed_values": str(allowed_inclusive_values),
                 },
diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py
index 6e63cff1d37b9..1244ee2d88aac 100644
--- a/python/pyspark/pandas/generic.py
+++ b/python/pyspark/pandas/generic.py
@@ -40,6 +40,7 @@
 from pandas.api.types import is_list_like  # type: ignore[attr-defined]
 
 from pyspark.sql import Column, functions as F
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.types import (
     BooleanType,
     DoubleType,
@@ -58,7 +59,6 @@
 )
 from pyspark.pandas.indexing import AtIndexer, iAtIndexer, iLocIndexer, LocIndexer
 from pyspark.pandas.internal import InternalFrame
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.typedef import spark_type_to_pandas_dtype
 from pyspark.pandas.utils import (
     is_name_like_tuple,
@@ -2631,7 +2631,7 @@ def first_valid_index(self) -> Optional[Union[Scalar, Tuple[Scalar, ...]]]:
         500    5.0
         dtype: float64
 
-        >>> s.first_valid_index()
+        >>> int(s.first_valid_index())
         300
 
         Support for MultiIndex
@@ -2950,7 +2950,7 @@ def get(self, key: Any, default: Optional[Any] = None) -> Any:
         20  1  b
         20  2  b
 
-        >>> df.x.get(10)
+        >>> int(df.x.get(10))
         0
 
         >>> df.x.get(20)
@@ -3008,7 +3008,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
         0    2
         dtype: int64
 
-        >>> even_primes.squeeze()
+        >>> int(even_primes.squeeze())
         2
 
         Squeezing objects with more than one value in every axis does nothing:
@@ -3066,7 +3066,7 @@ def squeeze(self, axis: Optional[Axis] = None) -> Union[Scalar, "DataFrame", "Se
 
         Squeezing all axes will project directly into a scalar:
 
-        >>> df_1a.squeeze()
+        >>> int(df_1a.squeeze())
         3
         """
         if axis is not None:
diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py
index 55627a4c740c3..8de03918a4cdd 100644
--- a/python/pyspark/pandas/groupby.py
+++ b/python/pyspark/pandas/groupby.py
@@ -47,6 +47,7 @@
 from pandas.core.common import _builtin_table  # type: ignore[attr-defined]
 
 from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.types import (
     BooleanType,
     DataType,
@@ -74,7 +75,6 @@
     MissingPandasLikeSeriesGroupBy,
 )
 from pyspark.pandas.series import Series, first_series
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.config import get_option
 from pyspark.pandas.correlation import (
     compute,
diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py
index b5bf65a4907b7..c93366a31e315 100644
--- a/python/pyspark/pandas/indexing.py
+++ b/python/pyspark/pandas/indexing.py
@@ -122,7 +122,7 @@ class AtIndexer(IndexerLike):
 
     Get value at specified row/column pair
 
-    >>> psdf.at[4, 'B']
+    >>> int(psdf.at[4, 'B'])
     2
 
     Get array if an index occurs multiple times
@@ -202,7 +202,7 @@ class iAtIndexer(IndexerLike):
 
     Get value at specified row/column pair
 
-    >>> df.iat[1, 2]
+    >>> int(df.iat[1, 2])
     1
 
     Get value within a series
@@ -214,7 +214,7 @@ class iAtIndexer(IndexerLike):
     30    3
     dtype: int64
 
-    >>> psser.iat[1]
+    >>> int(psser.iat[1])
     2
     """
 
@@ -853,7 +853,7 @@ class LocIndexer(LocIndexerLike):
 
     Single label for column.
 
-    >>> df.loc['cobra', 'shield']
+    >>> int(df.loc['cobra', 'shield'])
     2
 
     List of labels for row.
diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py
index c5fef3b138254..3f6831b600678 100644
--- a/python/pyspark/pandas/internal.py
+++ b/python/pyspark/pandas/internal.py
@@ -43,6 +43,7 @@
 )
 from pyspark.sql.utils import is_timestamp_ntz_preferred, is_remote
 from pyspark import pandas as ps
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.pandas._typing import Label
 from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
 from pyspark.pandas.data_type_ops.base import DataTypeOps
@@ -901,24 +902,14 @@ def attach_default_index(
 
     @staticmethod
     def attach_sequence_column(sdf: PySparkDataFrame, column_name: str) -> PySparkDataFrame:
-        scols = [scol_for(sdf, column) for column in sdf.columns]
         sequential_index = (
             F.row_number().over(Window.orderBy(F.monotonically_increasing_id())).cast("long") - 1
         )
-        return sdf.select(sequential_index.alias(column_name), *scols)
+        return sdf.select(sequential_index.alias(column_name), "*")
 
     @staticmethod
     def attach_distributed_column(sdf: PySparkDataFrame, column_name: str) -> PySparkDataFrame:
-        scols = [scol_for(sdf, column) for column in sdf.columns]
-        # Does not add an alias to avoid having some changes in protobuf definition for now.
-        # The alias is more for query strings in DataFrame.explain, and they are cosmetic changes.
-        if is_remote():
-            return sdf.select(F.monotonically_increasing_id().alias(column_name), *scols)
-        jvm = sdf.sparkSession._jvm
-        tag = jvm.org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FUNC_ALIAS()
-        jexpr = F.monotonically_increasing_id()._jc.expr()
-        jexpr.setTagValue(tag, "distributed_index")
-        return sdf.select(PySparkColumn(jvm.Column(jexpr)).alias(column_name), *scols)
+        return sdf.select(SF.distributed_id().alias(column_name), "*")
 
     @staticmethod
     def attach_distributed_sequence_column(
@@ -939,28 +930,7 @@ def attach_distributed_sequence_column(
         |       2|  c|
         +--------+---+
         """
-        if len(sdf.columns) > 0:
-            if is_remote():
-                from pyspark.sql.connect.column import Column as ConnectColumn
-                from pyspark.sql.connect.expressions import DistributedSequenceID
-
-                return sdf.select(
-                    ConnectColumn(DistributedSequenceID()).alias(column_name),
-                    "*",
-                )
-            else:
-                return PySparkDataFrame(
-                    sdf._jdf.toDF().withSequenceColumn(column_name),
-                    sdf.sparkSession,
-                )
-        else:
-            cnt = sdf.count()
-            if cnt > 0:
-                return default_session().range(cnt).toDF(column_name)
-            else:
-                return default_session().createDataFrame(
-                    [], schema=StructType().add(column_name, data_type=LongType(), nullable=False)
-                )
+        return sdf.select(SF.distributed_sequence_id().alias(column_name), "*")
 
     def spark_column_for(self, label: Label) -> PySparkColumn:
         """Return Spark Column for the given column label."""
diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py
index 729f4f5984072..c77cdf51a2f6d 100644
--- a/python/pyspark/pandas/namespace.py
+++ b/python/pyspark/pandas/namespace.py
@@ -88,6 +88,7 @@
     DEFAULT_SERIES_NAME,
     HIDDEN_COLUMNS,
     SPARK_INDEX_NAME_FORMAT,
+    NATURAL_ORDER_COLUMN_NAME,
 )
 from pyspark.pandas.series import Series, first_series
 from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale
@@ -125,6 +126,7 @@
     "to_numeric",
     "broadcast",
     "read_orc",
+    "json_normalize",
 ]
 
 
@@ -3687,6 +3689,82 @@ def read_orc(
     return psdf
 
 
+def json_normalize(
+    data: Union[Dict, List[Dict]],
+    sep: str = ".",
+) -> DataFrame:
+    """
+    Normalize semi-structured JSON data into a flat table.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    data : dict or list of dicts
+        Unserialized JSON objects.
+    sep : str, default '.'
+        Nested records will generate names separated by sep.
+
+    Returns
+    -------
+    DataFrame
+
+    See Also
+    --------
+    DataFrame.to_json : Convert the pandas-on-Spark DataFrame to a JSON string.
+
+    Examples
+    --------
+    >>> data = [
+    ...     {"id": 1, "name": "Alice", "address": {"city": "NYC", "zipcode": "10001"}},
+    ...     {"id": 2, "name": "Bob", "address": {"city": "SF", "zipcode": "94105"}},
+    ... ]
+    >>> ps.json_normalize(data)
+       id   name address.city address.zipcode
+    0   1  Alice          NYC           10001
+    1   2    Bob           SF           94105
+    """
+    # Convert the input JSON data to a Pandas-on-Spark DataFrame.
+    psdf: DataFrame = ps.DataFrame(data)
+    internal = psdf._internal
+    sdf = internal.spark_frame
+
+    index_spark_column_names = internal.index_spark_column_names
+
+    def flatten_schema(schema: StructType, prefix: str = "") -> Tuple[List[str], List[str]]:
+        """
+        Recursively flattens a nested schema and returns a list of columns and aliases.
+        """
+        fields = []
+        aliases = []
+        for field in schema.fields:
+            field_name = field.name
+            if field_name not in index_spark_column_names + [NATURAL_ORDER_COLUMN_NAME]:
+                name = f"{prefix}.{field_name}" if prefix else field_name
+                alias = f"{prefix}{sep}{field_name}" if prefix else field_name
+                if isinstance(field.dataType, StructType):
+                    subfields, subaliases = flatten_schema(field.dataType, prefix=name)
+                    fields += subfields
+                    aliases += subaliases
+                else:
+                    fields.append(name)
+                    aliases.append(alias)
+        return fields, aliases
+
+    fields, aliases = flatten_schema(sdf.schema)
+
+    # Create columns using fields and aliases
+    selected_columns = [F.col(field).alias(alias) for field, alias in zip(fields, aliases)]
+
+    # Update internal frame with new columns
+    internal = internal.with_new_columns(
+        selected_columns, column_labels=[(column_label,) for column_label in aliases]
+    )
+
+    # Convert back to Pandas-on-Spark DataFrame
+    return ps.DataFrame(internal)
+
+
 def _get_index_map(
     sdf: PySparkDataFrame, index_col: Optional[Union[str, List[str]]] = None
 ) -> Tuple[Optional[List[PySparkColumn]], Optional[List[Label]]]:
diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py
index 91e20295ba7cb..7004bae47c902 100644
--- a/python/pyspark/pandas/plot/core.py
+++ b/python/pyspark/pandas/plot/core.py
@@ -16,14 +16,15 @@
 #
 
 import importlib
+import math
 
 import pandas as pd
 import numpy as np
 from pandas.core.base import PandasObject
 from pandas.core.dtypes.inference import is_integer
 
-from pyspark.sql import functions as F
-from pyspark.sql.utils import is_remote
+from pyspark.sql import functions as F, Column
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.pandas.missing import unsupported_function
 from pyspark.pandas.config import get_option
 from pyspark.pandas.utils import name_like_string
@@ -67,19 +68,52 @@ class SampledPlotBase:
     def get_sampled(self, data):
         from pyspark.pandas import DataFrame, Series
 
+        if not isinstance(data, (DataFrame, Series)):
+            raise TypeError("Only DataFrame and Series are supported for plotting.")
+        if isinstance(data, Series):
+            data = data.to_frame()
+
         fraction = get_option("plotting.sample_ratio")
-        if fraction is None:
-            fraction = 1 / (len(data) / get_option("plotting.max_rows"))
-            fraction = min(1.0, fraction)
-        self.fraction = fraction
-
-        if isinstance(data, (DataFrame, Series)):
-            if isinstance(data, Series):
-                data = data.to_frame()
+        if fraction is not None:
+            self.fraction = fraction
             sampled = data._internal.resolved_copy.spark_frame.sample(fraction=self.fraction)
             return DataFrame(data._internal.with_new_sdf(sampled))._to_pandas()
         else:
-            raise TypeError("Only DataFrame and Series are supported for plotting.")
+            from pyspark.sql import Observation
+
+            max_rows = get_option("plotting.max_rows")
+            observation = Observation("ps plotting")
+            sdf = data._internal.resolved_copy.spark_frame.observe(
+                observation, F.count(F.lit(1)).alias("count")
+            )
+
+            rand_col_name = "__ps_plotting_sampled_plot_base_rand__"
+            id_col_name = "__ps_plotting_sampled_plot_base_id__"
+
+            sampled = (
+                sdf.select(
+                    "*",
+                    F.rand().alias(rand_col_name),
+                    F.monotonically_increasing_id().alias(id_col_name),
+                )
+                .sort(rand_col_name)
+                .limit(max_rows + 1)
+                .coalesce(1)
+                .sortWithinPartitions(id_col_name)
+                .drop(rand_col_name, id_col_name)
+            )
+
+            pdf = DataFrame(data._internal.with_new_sdf(sampled))._to_pandas()
+
+            if len(pdf) > max_rows:
+                try:
+                    self.fraction = float(max_rows) / observation.get["count"]
+                except Exception:
+                    pass
+                return pdf[:max_rows]
+            else:
+                self.fraction = 1.0
+                return pdf
 
     def set_result_text(self, ax):
         assert hasattr(self, "fraction")
@@ -146,10 +180,9 @@ def get_bins(sdf, bins):
 
     @staticmethod
     def compute_hist(psdf, bins):
-        from pyspark.ml.feature import Bucketizer
-
         # 'data' is a Spark DataFrame that selects one column.
         assert isinstance(bins, (np.ndarray, np.generic))
+        assert len(bins) > 2, "the number of buckets must be higher than 2."
 
         sdf = psdf._internal.spark_frame
         scols = []
@@ -180,25 +213,29 @@ def compute_hist(psdf, bins):
         colnames = sdf.columns
         bucket_names = ["__{}_bucket".format(colname) for colname in colnames]
 
-        output_df = None
-        for group_id, (colname, bucket_name) in enumerate(zip(colnames, bucket_names)):
-            # creates a Bucketizer to get corresponding bin of each value
-            bucketizer = Bucketizer(
-                splits=bins, inputCol=colname, outputCol=bucket_name, handleInvalid="skip"
+        # refers to org.apache.spark.ml.feature.Bucketizer#binarySearchForBuckets
+        def binary_search_for_buckets(value: Column):
+            index = SF.array_binary_search(F.lit(bins), value)
+            bucket = F.when(index >= 0, index).otherwise(-index - 2)
+            unboundErrMsg = F.lit(f"value %s out of the bins bounds: [{bins[0]}, {bins[-1]}]")
+            return (
+                F.when(value == F.lit(bins[-1]), F.lit(len(bins) - 2))
+                .when(value.between(F.lit(bins[0]), F.lit(bins[-1])), bucket)
+                .otherwise(F.raise_error(F.printf(unboundErrMsg, value)))
             )
 
-            bucket_df = bucketizer.transform(sdf)
-
-            if output_df is None:
-                output_df = bucket_df.select(
-                    F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
-                )
-            else:
-                output_df = output_df.union(
-                    bucket_df.select(
-                        F.lit(group_id).alias("__group_id"), F.col(bucket_name).alias("__bucket")
-                    )
-                )
+        output_df = (
+            sdf.select(
+                F.posexplode(
+                    F.array([F.col(colname).cast("double") for colname in colnames])
+                ).alias("__group_id", "__value")
+            )
+            .where(F.col("__value").isNotNull() & ~F.col("__value").isNaN())
+            .select(
+                F.col("__group_id"),
+                binary_search_for_buckets(F.col("__value")).cast("double").alias("__bucket"),
+            )
+        )
 
         # 2. Calculate the count based on each group and bucket.
         #     +----------+-------+------+
@@ -273,158 +310,76 @@ def compute_hist(psdf, bins):
 
 class BoxPlotBase:
     @staticmethod
-    def compute_multicol_stats(data, colnames, whis, precision):
-        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-        scol = []
-        for colname in colnames:
-            scol.append(
-                F.percentile_approx(
-                    "`%s`" % colname, [0.25, 0.50, 0.75], int(1.0 / precision)
-                ).alias("{}_percentiles%".format(colname))
-            )
-            scol.append(F.mean("`%s`" % colname).alias("{}_mean".format(colname)))
-
-        #      a_percentiles  a_mean    b_percentiles  b_mean
-        # 0  [3.0, 3.2, 3.2]    3.18  [5.1, 5.9, 6.4]    5.86
-        pdf = data._internal.resolved_copy.spark_frame.select(*scol).toPandas()
-
-        i = 0
-        multicol_stats = {}
-        for colname in colnames:
-            q1, med, q3 = pdf.iloc[0, i]
+    def compute_box(sdf, colnames, whis, precision, showfliers):
+        assert len(colnames) > 0
+        formatted_colnames = ["`{}`".format(colname) for colname in colnames]
+
+        stats_scols = []
+        for i, colname in enumerate(formatted_colnames):
+            percentiles = F.percentile_approx(colname, [0.25, 0.50, 0.75], int(1.0 / precision))
+            q1 = F.get(percentiles, 0)
+            med = F.get(percentiles, 1)
+            q3 = F.get(percentiles, 2)
             iqr = q3 - q1
-            lfence = q1 - whis * iqr
-            ufence = q3 + whis * iqr
-            i += 1
-
-            mean = pdf.iloc[0, i]
-            i += 1
-
-            multicol_stats[colname] = {
-                "mean": mean,
-                "med": med,
-                "q1": q1,
-                "q3": q3,
-                "lfence": lfence,
-                "ufence": ufence,
-            }
-
-        return multicol_stats
-
-    @staticmethod
-    def compute_stats(data, colname, whis, precision):
-        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-        pdf = data._psdf._internal.resolved_copy.spark_frame.agg(
-            *[
-                F.expr(
-                    "approx_percentile(`{}`, {}, {})".format(colname, q, int(1.0 / precision))
-                ).alias("{}_{}%".format(colname, int(q * 100)))
-                for q in [0.25, 0.50, 0.75]
-            ],
-            F.mean("`%s`" % colname).alias("{}_mean".format(colname)),
-        ).toPandas()
-
-        # Computes IQR and Tukey's fences
-        iqr = "{}_iqr".format(colname)
-        p75 = "{}_75%".format(colname)
-        p25 = "{}_25%".format(colname)
-        pdf.loc[:, iqr] = pdf.loc[:, p75] - pdf.loc[:, p25]
-        pdf.loc[:, "{}_lfence".format(colname)] = pdf.loc[:, p25] - whis * pdf.loc[:, iqr]
-        pdf.loc[:, "{}_ufence".format(colname)] = pdf.loc[:, p75] + whis * pdf.loc[:, iqr]
-
-        qnames = ["25%", "50%", "75%", "mean", "lfence", "ufence"]
-        col_summ = pdf[["{}_{}".format(colname, q) for q in qnames]]
-        col_summ.columns = qnames
-        lfence, ufence = col_summ["lfence"], col_summ["ufence"]
-
-        stats = {
-            "mean": col_summ["mean"].values[0],
-            "med": col_summ["50%"].values[0],
-            "q1": col_summ["25%"].values[0],
-            "q3": col_summ["75%"].values[0],
-        }
-
-        return stats, (lfence.values[0], ufence.values[0])
-
-    @staticmethod
-    def multicol_outliers(data, multicol_stats):
-        scols = {}
-        for colname, stats in multicol_stats.items():
-            scols["__{}_outlier".format(colname)] = ~F.col("`%s`" % colname).between(
-                stats["lfence"], stats["ufence"]
+            lfence = q1 - F.lit(whis) * iqr
+            ufence = q3 + F.lit(whis) * iqr
+
+            stats_scols.append(
+                F.struct(
+                    F.mean(colname).alias("mean"),
+                    med.alias("med"),
+                    q1.alias("q1"),
+                    q3.alias("q3"),
+                    lfence.alias("lfence"),
+                    ufence.alias("ufence"),
+                ).alias(f"_box_plot_stats_{i}")
             )
-        return data._internal.resolved_copy.spark_frame.withColumns(scols)
-
-    @staticmethod
-    def outliers(data, colname, lfence, ufence):
-        # Builds expression to identify outliers
-        expression = F.col("`%s`" % colname).between(lfence, ufence)
-        # Creates a column to flag rows as outliers or not
-        return data._psdf._internal.resolved_copy.spark_frame.withColumn(
-            "__{}_outlier".format(colname), ~expression
-        )
 
-    @staticmethod
-    def calc_multicol_whiskers(colnames, multicol_outliers):
-        # Computes min and max values of non-outliers - the whiskers
-        scols = []
-        for colname in colnames:
-            outlier_colname = "__{}_outlier".format(colname)
-            scols.append(
-                F.min(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
-                    "__{}_min".format(colname)
-                )
-            )
-            scols.append(
-                F.max(F.when(~F.col(outlier_colname), F.col(colname)).otherwise(F.lit(None))).alias(
-                    "__{}_max".format(colname)
-                )
+        sdf_stats = sdf.select(*stats_scols)
+
+        result_scols = []
+        for i, colname in enumerate(formatted_colnames):
+            value = F.col(colname)
+
+            lfence = F.col(f"_box_plot_stats_{i}.lfence")
+            ufence = F.col(f"_box_plot_stats_{i}.ufence")
+            mean = F.col(f"_box_plot_stats_{i}.mean")
+            med = F.col(f"_box_plot_stats_{i}.med")
+            q1 = F.col(f"_box_plot_stats_{i}.q1")
+            q3 = F.col(f"_box_plot_stats_{i}.q3")
+
+            outlier = ~value.between(lfence, ufence)
+
+            # Computes min and max values of non-outliers - the whiskers
+            upper_whisker = F.max(F.when(~outlier, value).otherwise(F.lit(None)))
+            lower_whisker = F.min(F.when(~outlier, value).otherwise(F.lit(None)))
+
+            # If it shows fliers, take the top 1k with the highest absolute values
+            # Here we normalize the values by subtracting the median.
+            if showfliers:
+                pair = F.when(
+                    outlier,
+                    F.struct(F.abs(value - med), value.alias("val")),
+                ).otherwise(F.lit(None))
+                topk = SF.collect_top_k(pair, 1001, False)
+                fliers = F.when(F.size(topk) > 0, topk["val"]).otherwise(F.lit(None))
+            else:
+                fliers = F.lit(None)
+
+            result_scols.append(
+                F.struct(
+                    F.first(mean).alias("mean"),
+                    F.first(med).alias("med"),
+                    F.first(q1).alias("q1"),
+                    F.first(q3).alias("q3"),
+                    upper_whisker.alias("upper_whisker"),
+                    lower_whisker.alias("lower_whisker"),
+                    fliers.alias("fliers"),
+                ).alias(f"_box_plot_results_{i}")
             )
 
-        pdf = multicol_outliers.select(*scols).toPandas()
-
-        i = 0
-        whiskers = {}
-        for colname in colnames:
-            min = pdf.iloc[0, i]
-            i += 1
-            max = pdf.iloc[0, i]
-            i += 1
-            whiskers[colname] = {
-                "min": min,
-                "max": max,
-            }
-
-        return whiskers
-
-    @staticmethod
-    def calc_whiskers(colname, outliers):
-        # Computes min and max values of non-outliers - the whiskers
-        minmax = (
-            outliers.filter("not `__{}_outlier`".format(colname))
-            .agg(F.min("`%s`" % colname).alias("min"), F.max(colname).alias("max"))
-            .toPandas()
-        )
-        return minmax.iloc[0][["min", "max"]].values
-
-    @staticmethod
-    def get_fliers(colname, outliers, min_val):
-        # Filters only the outliers, should "showfliers" be True
-        fliers_df = outliers.filter("`__{}_outlier`".format(colname))
-
-        # If it shows fliers, take the top 1k with highest absolute values
-        # Here we normalize the values by subtracting the minimum value from
-        # each, and use absolute values.
-        order_col = F.abs(F.col("`{}`".format(colname)) - min_val.item())
-        fliers = (
-            fliers_df.select(F.col("`{}`".format(colname)))
-            .orderBy(order_col)
-            .limit(1001)
-            .toPandas()[colname]
-            .values
-        )
-
-        return fliers
+        sdf_result = sdf.join(sdf_stats.hint("broadcast")).select(*result_scols)
+        return sdf_result.first()
 
 
 class KdePlotBase(NumericPlotBase):
@@ -463,23 +418,49 @@ def calc_min_max():
         return ind
 
     @staticmethod
-    def compute_kde(sdf, bw_method=None, ind=None):
-        from pyspark.mllib.stat import KernelDensity
-
-        # 'sdf' is a Spark DataFrame that selects one column.
-
-        # Using RDD is slow so we might have to change it to Dataset based implementation
-        # once Spark has that implementation.
-        sample = sdf.rdd.map(lambda x: float(x[0]))
-        kd = KernelDensity()
-        kd.setSample(sample)
-
-        assert isinstance(bw_method, (int, float)), "'bw_method' must be set as a scalar number."
+    def compute_kde_col(input_col, bw_method=None, ind=None):
+        # refers to org.apache.spark.mllib.stat.KernelDensity
+        assert bw_method is not None and isinstance(
+            bw_method, (int, float)
+        ), "'bw_method' must be set as a scalar number."
+
+        assert ind is not None, "'ind' must be a scalar array."
+
+        bandwidth = float(bw_method)
+        points = [float(i) for i in ind]
+        log_std_plus_half_log2_pi = math.log(bandwidth) + 0.5 * math.log(2 * math.pi)
+
+        def norm_pdf(
+            mean: Column,
+            std: Column,
+            log_std_plus_half_log2_pi: Column,
+            x: Column,
+        ) -> Column:
+            x0 = x - mean
+            x1 = x0 / std
+            log_density = -0.5 * x1 * x1 - log_std_plus_half_log2_pi
+            return F.exp(log_density)
+
+        return F.array(
+            [
+                F.avg(
+                    norm_pdf(
+                        input_col.cast("double"),
+                        F.lit(bandwidth),
+                        F.lit(log_std_plus_half_log2_pi),
+                        F.lit(point),
+                    )
+                )
+                for point in points
+            ]
+        )
 
-        if bw_method is not None:
-            # Match the bandwidth with Spark.
-            kd.setBandwidth(float(bw_method))
-        return kd.estimate(list(map(float, ind)))
+    @staticmethod
+    def compute_kde(sdf, bw_method=None, ind=None):
+        input_col = F.col(sdf.columns[0])
+        kde_col = KdePlotBase.compute_kde_col(input_col, bw_method, ind).alias("kde")
+        row = sdf.select(kde_col).first()
+        return row[0]
 
 
 class PandasOnSparkPlotAccessor(PandasObject):
@@ -498,7 +479,7 @@ class PandasOnSparkPlotAccessor(PandasObject):
         "pie": TopNPlotBase().get_top_n,
         "bar": TopNPlotBase().get_top_n,
         "barh": TopNPlotBase().get_top_n,
-        "scatter": TopNPlotBase().get_top_n,
+        "scatter": SampledPlotBase().get_sampled,
         "area": SampledPlotBase().get_sampled,
         "line": SampledPlotBase().get_sampled,
     }
@@ -572,11 +553,6 @@ def _get_plot_backend(backend=None):
         return module
 
     def __call__(self, kind="line", backend=None, **kwargs):
-        kind = {"density": "kde"}.get(kind, kind)
-
-        if is_remote() and kind in ["hist", "kde"]:
-            return unsupported_function(class_name="pd.DataFrame", method_name=kind)()
-
         plot_backend = PandasOnSparkPlotAccessor._get_plot_backend(backend)
         plot_data = self.data
 
@@ -597,7 +573,7 @@ def line(self, x=None, y=None, **kwargs):
         """
         Plot DataFrame/Series as lines.
 
-        This function is useful to plot lines using Series's values
+        This function is useful to plot lines using DataFrame’s values
         as coordinates.
 
         Parameters
@@ -663,6 +639,12 @@ def bar(self, x=None, y=None, **kwds):
         """
         Vertical bar plot.
 
+        A bar plot is a plot that presents categorical data with rectangular
+        bars with lengths proportional to the values that they represent. A
+        bar plot shows comparisons among discrete categories. One axis of the
+        plot shows the specific categories being compared, and the other axis
+        represents a measured value.
+
         Parameters
         ----------
         x : label or position, optional
@@ -774,10 +756,10 @@ def barh(self, x=None, y=None, **kwargs):
 
         Parameters
         ----------
-        x : label or position, default DataFrame.index
-            Column to be used for categories.
-        y : label or position, default All numeric columns in dataframe
+        x : label or position, default All numeric columns in dataframe
             Columns to be plotted from the DataFrame.
+        y : label or position, default DataFrame.index
+            Column to be used for categories.
         **kwds
             Keyword arguments to pass on to
             :meth:`pyspark.pandas.DataFrame.plot` or :meth:`pyspark.pandas.Series.plot`.
@@ -788,6 +770,13 @@ def barh(self, x=None, y=None, **kwargs):
             Return an custom object when ``backend!=plotly``.
             Return an ndarray when ``subplots=True`` (matplotlib-only).
 
+        Notes
+        -----
+        In Plotly and Matplotlib, the interpretation of `x` and `y` for `barh` plots differs.
+        In Plotly, `x` refers to the values and `y` refers to the categories.
+        In Matplotlib, `x` refers to the categories and `y` refers to the values.
+        Ensure correct axis labeling based on the backend used.
+
         See Also
         --------
         plotly.express.bar : Plot a vertical bar plot using plotly.
@@ -854,19 +843,27 @@ def barh(self, x=None, y=None, **kwargs):
 
     def box(self, **kwds):
         """
-        Make a box plot of the Series columns.
+        Make a box plot of the DataFrame columns.
+
+        A box plot is a method for graphically depicting groups of numerical data through
+        their quartiles. The box extends from the Q1 to Q3 quartile values of the data,
+        with a line at the median (Q2). The whiskers extend from the edges of box to show
+        the range of the data. The position of the whiskers is set by default to
+        1.5*IQR (IQR = Q3 - Q1) from the edges of the box. Outlier points are those past
+        the end of the whiskers.
+
+        A consideration when using this chart is that the box and the whiskers can overlap,
+        which is very common when plotting small sets of data.
 
         Parameters
         ----------
-        **kwds : optional
-            Additional keyword arguments are documented in
+        **kwds : dict, optional
+            Extra arguments to `precision`: refer to a float that is used by
+            pandas-on-Spark to compute approximate statistics for building a
+            boxplot. The default value is 0.01. Use smaller values to get more
+            precise statistics. Additional keyword arguments are documented in
             :meth:`pyspark.pandas.Series.plot`.
 
-        precision: scalar, default = 0.01
-            This argument is used by pandas-on-Spark to compute approximate statistics
-            for building a boxplot. Use *smaller* values to get more precise
-            statistics (matplotlib-only).
-
         Returns
         -------
         :class:`plotly.graph_objs.Figure`
@@ -908,9 +905,11 @@ def box(self, **kwds):
     def hist(self, bins=10, **kwds):
         """
         Draw one histogram of the DataFrame’s columns.
+
         A `histogram`_ is a representation of the distribution of data.
         This function calls :meth:`plotting.backend.plot`,
         on each series in the DataFrame, resulting in one histogram per column.
+        This is useful when the DataFrame’s Series are in a similar scale.
 
         .. _histogram: https://en.wikipedia.org/wiki/Histogram
 
@@ -953,15 +952,16 @@ def hist(self, bins=10, **kwds):
             >>> df = ps.from_pandas(df)
             >>> df.plot.hist(bins=12, alpha=0.5)  # doctest: +SKIP
         """
-        if is_remote():
-            return unsupported_function(class_name="pd.DataFrame", method_name="hist")()
-
         return self(kind="hist", bins=bins, **kwds)
 
     def kde(self, bw_method=None, ind=None, **kwargs):
         """
         Generate Kernel Density Estimate plot using Gaussian kernels.
 
+        In statistics, kernel density estimation (KDE) is a non-parametric way to
+        estimate the probability density function (PDF) of a random variable. This
+        function uses Gaussian kernels and includes automatic bandwidth determination.
+
         Parameters
         ----------
         bw_method : scalar
@@ -1031,9 +1031,6 @@ def kde(self, bw_method=None, ind=None, **kwargs):
             ... })
             >>> df.plot.kde(ind=[1, 2, 3, 4, 5, 6], bw_method=0.3)  # doctest: +SKIP
         """
-        if is_remote():
-            return unsupported_function(class_name="pd.DataFrame", method_name="kde")()
-
         return self(kind="kde", bw_method=bw_method, ind=ind, **kwargs)
 
     density = kde
diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py
index f496f2bc664be..7dff6adbdea7f 100644
--- a/python/pyspark/pandas/plot/matplotlib.py
+++ b/python/pyspark/pandas/plot/matplotlib.py
@@ -282,30 +282,26 @@ def _compute_plot_data(self):
         # This one is pandas-on-Spark specific to control precision for approx_percentile
         precision = self.kwds.get("precision", 0.01)
 
-        # # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-        col_stats, col_fences = BoxPlotBase.compute_stats(data, spark_column_name, whis, precision)
-
-        # # Creates a column to flag rows as outliers or not
-        outliers = BoxPlotBase.outliers(data, spark_column_name, *col_fences)
-
-        # # Computes min and max values of non-outliers - the whiskers
-        whiskers = BoxPlotBase.calc_whiskers(spark_column_name, outliers)
-
-        if showfliers:
-            fliers = BoxPlotBase.get_fliers(spark_column_name, outliers, whiskers[0])
-        else:
-            fliers = []
+        results = BoxPlotBase.compute_box(
+            data._psdf._internal.resolved_copy.spark_frame,
+            [spark_column_name],
+            whis,
+            precision,
+            showfliers,
+        )
+        assert len(results) == 1
+        result = results[0]
 
         # Builds bxpstats dict
         stats = []
         item = {
-            "mean": col_stats["mean"],
-            "med": col_stats["med"],
-            "q1": col_stats["q1"],
-            "q3": col_stats["q3"],
-            "whislo": whiskers[0],
-            "whishi": whiskers[1],
-            "fliers": fliers,
+            "mean": result["mean"],
+            "med": result["med"],
+            "q1": result["q1"],
+            "q3": result["q3"],
+            "whislo": result["lower_whisker"],
+            "whishi": result["upper_whisker"],
+            "fliers": result["fliers"] if result["fliers"] else [],
             "label": labels[0],
         }
         stats.append(item)
diff --git a/python/pyspark/pandas/plot/plotly.py b/python/pyspark/pandas/plot/plotly.py
index d54166a33a0ad..a23c255757ee3 100644
--- a/python/pyspark/pandas/plot/plotly.py
+++ b/python/pyspark/pandas/plot/plotly.py
@@ -147,34 +147,41 @@ def plot_box(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
         )
 
     fig = go.Figure()
+
     if isinstance(data, ps.Series):
-        colname = name_like_string(data.name)
+        sdf = data._psdf._internal.resolved_copy.spark_frame
         spark_column_name = data._internal.spark_column_name_for(data._column_label)
+        colnames = [spark_column_name]
+    else:
+        sdf = data._internal.resolved_copy.spark_frame
+        colnames = []
+        for column_label in data._internal.column_labels:
+            if isinstance(data._internal.spark_type_for(column_label), NumericType):
+                colnames.append(name_like_string(column_label))
 
-        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-        col_stats, col_fences = BoxPlotBase.compute_stats(data, spark_column_name, whis, precision)
-
-        # Creates a column to flag rows as outliers or not
-        outliers = BoxPlotBase.outliers(data, spark_column_name, *col_fences)
-
-        # Computes min and max values of non-outliers - the whiskers
-        whiskers = BoxPlotBase.calc_whiskers(spark_column_name, outliers)
+    results = BoxPlotBase.compute_box(
+        sdf,
+        colnames,
+        whis,
+        precision,
+        boxpoints is not None,
+    )
+    assert len(results) == len(colnames)
 
-        fliers = None
-        if boxpoints:
-            fliers = BoxPlotBase.get_fliers(spark_column_name, outliers, whiskers[0])
-            fliers = [fliers] if len(fliers) > 0 else None
+    if isinstance(data, ps.Series):
+        colname = name_like_string(data.name)
+        result = results[0]
 
         fig.add_trace(
             go.Box(
                 name=colname,
-                q1=[col_stats["q1"]],
-                median=[col_stats["med"]],
-                q3=[col_stats["q3"]],
-                mean=[col_stats["mean"]],
-                lowerfence=[whiskers[0]],
-                upperfence=[whiskers[1]],
-                y=fliers,
+                q1=[result["q1"]],
+                median=[result["med"]],
+                q3=[result["q3"]],
+                mean=[result["mean"]],
+                lowerfence=[result["lower_whisker"]],
+                upperfence=[result["upper_whisker"]],
+                y=[result["fliers"]] if result["fliers"] else None,
                 boxpoints=boxpoints,
                 notched=notched,
                 **kwargs,  # this is for workarounds. Box takes different options from express.box.
@@ -183,44 +190,25 @@ def plot_box(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
         fig["layout"]["xaxis"]["title"] = colname
 
     else:
-        numeric_column_names = []
-        for column_label in data._internal.column_labels:
-            if isinstance(data._internal.spark_type_for(column_label), NumericType):
-                numeric_column_names.append(name_like_string(column_label))
-
-        # Computes mean, median, Q1 and Q3 with approx_percentile and precision
-        multicol_stats = BoxPlotBase.compute_multicol_stats(
-            data, numeric_column_names, whis, precision
-        )
-
-        # Creates a column to flag rows as outliers or not
-        outliers = BoxPlotBase.multicol_outliers(data, multicol_stats)
-
-        # Computes min and max values of non-outliers - the whiskers
-        whiskers = BoxPlotBase.calc_multicol_whiskers(numeric_column_names, outliers)
-
-        i = 0
-        for colname in numeric_column_names:
-            col_stats = multicol_stats[colname]
-            col_whiskers = whiskers[colname]
+        for i, colname in enumerate(colnames):
+            result = results[i]
 
             fig.add_trace(
                 go.Box(
                     x=[i],
                     name=colname,
-                    q1=[col_stats["q1"]],
-                    median=[col_stats["med"]],
-                    q3=[col_stats["q3"]],
-                    mean=[col_stats["mean"]],
-                    lowerfence=[col_whiskers["min"]],
-                    upperfence=[col_whiskers["max"]],
-                    y=None,  # todo: support y=fliers
+                    q1=[result["q1"]],
+                    median=[result["med"]],
+                    q3=[result["q3"]],
+                    mean=[result["mean"]],
+                    lowerfence=[result["lower_whisker"]],
+                    upperfence=[result["upper_whisker"]],
+                    y=[result["fliers"]] if result["fliers"] else None,
                     boxpoints=boxpoints,
                     notched=notched,
                     **kwargs,
                 )
             )
-            i += 1
 
     fig["layout"]["yaxis"]["title"] = "value"
     return fig
@@ -239,22 +227,28 @@ def plot_kde(data: Union["ps.DataFrame", "ps.Series"], **kwargs):
     ind = KdePlotBase.get_ind(sdf.select(*data_columns), kwargs.pop("ind", None))
     bw_method = kwargs.pop("bw_method", None)
 
-    pdfs = []
-    for label in psdf._internal.column_labels:
-        pdfs.append(
+    kde_cols = [
+        KdePlotBase.compute_kde_col(
+            input_col=psdf._internal.spark_column_for(label),
+            ind=ind,
+            bw_method=bw_method,
+        ).alias(f"kde_{i}")
+        for i, label in enumerate(psdf._internal.column_labels)
+    ]
+    kde_results = sdf.select(*kde_cols).first()
+
+    pdf = pd.concat(
+        [
             pd.DataFrame(
                 {
-                    "Density": KdePlotBase.compute_kde(
-                        sdf.select(psdf._internal.spark_column_for(label)),
-                        ind=ind,
-                        bw_method=bw_method,
-                    ),
+                    "Density": kde_result,
                     "names": name_like_string(label),
                     "index": ind,
                 }
             )
-        )
-    pdf = pd.concat(pdfs)
+            for label, kde_result in zip(psdf._internal.column_labels, list(kde_results))
+        ]
+    )
 
     fig = express.line(pdf, x="index", y="Density", **kwargs)
     fig["layout"]["xaxis"]["title"] = None
diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py
index 02db8fd91b9b4..152bf90e60cf9 100644
--- a/python/pyspark/pandas/resample.py
+++ b/python/pyspark/pandas/resample.py
@@ -32,6 +32,7 @@
 from pandas.tseries.frequencies import to_offset
 
 from pyspark.sql import Column, functions as F
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.types import (
     NumericType,
     StructField,
@@ -41,7 +42,6 @@
 from pyspark import pandas as ps  # For running doctests and reference resolution in PyCharm.
 from pyspark.pandas._typing import FrameLike
 from pyspark.pandas.frame import DataFrame
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.internal import (
     InternalField,
     InternalFrame,
diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py
index ff941b692f95f..18a4dfaee1d54 100644
--- a/python/pyspark/pandas/series.py
+++ b/python/pyspark/pandas/series.py
@@ -60,6 +60,7 @@
     DataFrame as SparkDataFrame,
     Window as PySparkWindow,
 )
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.sql.types import (
     ArrayType,
     BooleanType,
@@ -117,7 +118,6 @@
     log_advice,
 )
 from pyspark.pandas.datetimes import DatetimeMethods
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.spark.accessors import SparkSeriesMethods
 from pyspark.pandas.strings import StringMethods
 from pyspark.pandas.typedef import (
@@ -4558,7 +4558,7 @@ def pop(self, item: Name) -> Union["Series", Scalar]:
         C    2
         dtype: int64
 
-        >>> s.pop('A')
+        >>> int(s.pop('A'))
         0
 
         >>> s
@@ -5821,7 +5821,7 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
 
         A scalar `where`.
 
-        >>> s.asof(20)
+        >>> float(s.asof(20))
         2.0
 
         For a sequence `where`, a Series is returned. The first value is
@@ -5836,12 +5836,12 @@ def asof(self, where: Union[Any, List]) -> Union[Scalar, "Series"]:
         Missing values are not considered. The following is ``2.0``, not
         NaN, even though NaN is at the index location for ``30``.
 
-        >>> s.asof(30)
+        >>> float(s.asof(30))
         2.0
 
         >>> s = ps.Series([1, 2, np.nan, 4], index=[10, 30, 20, 40])
         >>> with ps.option_context("compute.eager_check", False):
-        ...     s.asof(20)
+        ...     float(s.asof(20))
         ...
         1.0
         """
diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py
deleted file mode 100644
index 8abeff655ea50..0000000000000
--- a/python/pyspark/pandas/spark/functions.py
+++ /dev/null
@@ -1,187 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-"""
-Additional Spark functions used in pandas-on-Spark.
-"""
-from pyspark.sql import Column, functions as F
-from pyspark.sql.utils import is_remote
-from typing import Union
-
-
-def product(col: Column, dropna: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "pandas_product",
-            col,
-            lit(dropna),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasProduct(col._jc, dropna))
-
-
-def stddev(col: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "pandas_stddev",
-            col,
-            lit(ddof),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasStddev(col._jc, ddof))
-
-
-def var(col: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "pandas_var",
-            col,
-            lit(ddof),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasVariance(col._jc, ddof))
-
-
-def skew(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
-
-        return _invoke_function_over_columns(
-            "pandas_skew",
-            col,
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasSkewness(col._jc))
-
-
-def kurt(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
-
-        return _invoke_function_over_columns(
-            "pandas_kurt",
-            col,
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasKurtosis(col._jc))
-
-
-def mode(col: Column, dropna: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "pandas_mode",
-            col,
-            lit(dropna),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasMode(col._jc, dropna))
-
-
-def covar(col1: Column, col2: Column, ddof: int) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "pandas_covar",
-            col1,
-            col2,
-            lit(ddof),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.pandasCovar(col1._jc, col2._jc, ddof))
-
-
-def ewm(col: Column, alpha: float, ignore_na: bool) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns, lit
-
-        return _invoke_function_over_columns(
-            "ewm",
-            col,
-            lit(alpha),
-            lit(ignore_na),
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.ewm(col._jc, alpha, ignore_na))
-
-
-def null_index(col: Column) -> Column:
-    if is_remote():
-        from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
-
-        return _invoke_function_over_columns(
-            "null_index",
-            col,
-        )
-
-    else:
-        from pyspark import SparkContext
-
-        sc = SparkContext._active_spark_context
-        return Column(sc._jvm.PythonSQLUtils.nullIndex(col._jc))
-
-
-def make_interval(unit: str, e: Union[Column, int, float]) -> Column:
-    unit_mapping = {
-        "YEAR": "years",
-        "MONTH": "months",
-        "WEEK": "weeks",
-        "DAY": "days",
-        "HOUR": "hours",
-        "MINUTE": "mins",
-        "SECOND": "secs",
-    }
-    return F.make_interval(**{unit_mapping[unit]: F.lit(e)})
diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py
index bbf0b3cbc3d67..f2a73cb1c1adf 100644
--- a/python/pyspark/pandas/supported_api_gen.py
+++ b/python/pyspark/pandas/supported_api_gen.py
@@ -38,7 +38,7 @@
 MAX_MISSING_PARAMS_SIZE = 5
 COMMON_PARAMETER_SET = {"kwargs", "args", "cls"}
 MODULE_GROUP_MATCH = [(pd, ps), (pdw, psw), (pdg, psg)]
-PANDAS_LATEST_VERSION = "2.2.2"
+PANDAS_LATEST_VERSION = "2.2.3"
 
 RST_HEADER = """
 =====================
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py
index 10054f585016d..3378c0918f96a 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot.py
@@ -24,13 +24,7 @@
 class DataFramePlotParityTests(
     DataFramePlotTestsMixin, PandasOnSparkTestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_compute_hist_multi_columns(self):
-        super().test_compute_hist_multi_columns()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_compute_hist_single_column(self):
-        super().test_compute_hist_single_column()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py
index 9fec1c57c02d5..5c7530f897b4d 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_matplotlib.py
@@ -24,13 +24,7 @@
 class DataFramePlotMatplotlibParityTests(
     DataFramePlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist_plot(self):
-        super().test_hist_plot()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_kde_plot(self):
-        super().test_kde_plot()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py
index 452962d813521..7e1c94e026f4f 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_frame_plot_plotly.py
@@ -24,17 +24,7 @@
 class DataFramePlotPlotlyParityTests(
     DataFramePlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist_layout_kwargs(self):
-        super().test_hist_layout_kwargs()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist_plot(self):
-        super().test_hist_plot()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_kde_plot(self):
-        super().test_kde_plot()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
index abb18d473bf8d..d2b87ab471219 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_matplotlib.py
@@ -24,25 +24,7 @@
 class SeriesPlotMatplotlibParityTests(
     SeriesPlotMatplotlibTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_empty_hist(self):
-        super().test_empty_hist()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist(self):
-        super().test_hist()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist_plot(self):
-        super().test_hist_plot()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_kde_plot(self):
-        super().test_kde_plot()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_single_value_hist(self):
-        super().test_single_value_hist()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py
index 795732950b8a0..c305c90ba5d88 100644
--- a/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/connect/plot/test_parity_series_plot_plotly.py
@@ -24,13 +24,7 @@
 class SeriesPlotPlotlyParityTests(
     SeriesPlotPlotlyTestsMixin, PandasOnSparkTestUtils, TestUtils, ReusedConnectTestCase
 ):
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_hist_plot(self):
-        super().test_hist_plot()
-
-    @unittest.skip("Test depends on Spark ML which is not supported from Spark Connect.")
-    def test_kde_plot(self):
-        super().test_kde_plot()
+    pass
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/pandas/tests/connect/test_connect_plotting.py b/python/pyspark/pandas/tests/connect/test_connect_plotting.py
index 9b7cfebfcd552..ec3296a242d46 100644
--- a/python/pyspark/pandas/tests/connect/test_connect_plotting.py
+++ b/python/pyspark/pandas/tests/connect/test_connect_plotting.py
@@ -19,7 +19,6 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.pandas.exceptions import PandasNotImplementedError
 from pyspark.testing.connectutils import ReusedConnectTestCase
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils, TestUtils
 
@@ -37,80 +36,6 @@ def pdf1(self):
     def psdf1(self):
         return ps.from_pandas(self.pdf1)
 
-    def test_unsupported_functions(self):
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.hist()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.hist(bins=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.kde()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.kde(bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.density()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot.density(bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.hist()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.hist(bins=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.kde()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.kde(bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.density()
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot.density(bw_method=3)
-
-    def test_unsupported_kinds(self):
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="hist")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="hist", bins=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="kde")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="kde", bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="density")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.plot(kind="density", bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="hist")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="hist", bins=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="kde")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="kde", bw_method=3)
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="density")
-
-        with self.assertRaises(PandasNotImplementedError):
-            self.psdf1.shield.plot(kind="density", bw_method=3)
-
 
 if __name__ == "__main__":
     from pyspark.pandas.tests.connect.test_connect_plotting import *  # noqa: F401
diff --git a/python/pyspark/pandas/tests/frame/test_reindexing.py b/python/pyspark/pandas/tests/frame/test_reindexing.py
index 33f507c0e0306..0d231a4a6cc71 100644
--- a/python/pyspark/pandas/tests/frame/test_reindexing.py
+++ b/python/pyspark/pandas/tests/frame/test_reindexing.py
@@ -172,8 +172,8 @@ def test_between_time(self):
 
         self.check_error(
             exception=ctx.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "inclusive",
                 "allowed_values": str(["left", "right", "both", "neither"]),
             },
diff --git a/python/pyspark/pandas/tests/io/test_io.py b/python/pyspark/pandas/tests/io/test_io.py
index d4e61319f229c..6fbdc366dd76a 100644
--- a/python/pyspark/pandas/tests/io/test_io.py
+++ b/python/pyspark/pandas/tests/io/test_io.py
@@ -22,12 +22,9 @@
 import pandas as pd
 
 from pyspark import pandas as ps
-from pyspark.testing.pandasutils import (
-    have_tabulate,
-    PandasOnSparkTestCase,
-    tabulate_requirement_message,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.testing.utils import have_tabulate, tabulate_requirement_message
 
 
 # This file contains test cases for 'Serialization / IO / Conversion'
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot.py b/python/pyspark/pandas/tests/plot/test_frame_plot.py
index 65c6c57847dad..a34c7c0a67de4 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot.py
@@ -109,47 +109,29 @@ def test_compute_hist_multi_columns(self):
                 pd.Series(expected_histogram, name=expected_name), histogram, almost=True
             )
 
-    def test_compute_box_multi_columns(self):
-        # compare compute_multicol_stats with compute_stats
-        def check_box_multi_columns(psdf):
-            k = 1.5
-            multicol_stats = BoxPlotBase.compute_multicol_stats(
-                psdf, ["a", "b", "c"], whis=k, precision=0.01
-            )
-            multicol_outliers = BoxPlotBase.multicol_outliers(psdf, multicol_stats)
-            multicol_whiskers = BoxPlotBase.calc_multicol_whiskers(
-                ["a", "b", "c"], multicol_outliers
-            )
-
-            for col in ["a", "b", "c"]:
-                col_stats = multicol_stats[col]
-                col_whiskers = multicol_whiskers[col]
-
-                stats, fences = BoxPlotBase.compute_stats(psdf[col], col, whis=k, precision=0.01)
-                outliers = BoxPlotBase.outliers(psdf[col], col, *fences)
-                whiskers = BoxPlotBase.calc_whiskers(col, outliers)
-
-                self.assertEqual(stats["mean"], col_stats["mean"])
-                self.assertEqual(stats["med"], col_stats["med"])
-                self.assertEqual(stats["q1"], col_stats["q1"])
-                self.assertEqual(stats["q3"], col_stats["q3"])
-                self.assertEqual(fences[0], col_stats["lfence"])
-                self.assertEqual(fences[1], col_stats["ufence"])
-                self.assertEqual(whiskers[0], col_whiskers["min"])
-                self.assertEqual(whiskers[1], col_whiskers["max"])
-
+    def test_compute_box(self):
         pdf = pd.DataFrame(
             {
-                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 15, 50],
-                "b": [3, 2, 5, 4, 5, 6, 8, 8, 11, 60, 90],
-                "c": [-30, -2, 5, 4, 5, 6, -8, 8, 11, 12, 18],
+                "a": [1, 2, 3, 4, 5, 6, 7, 8, 9, 50.0],
+                "b": [3, 2, 5, 4, 5, 6, 8, 8, 11, -90.0],
+                "c": [-30, -2, 5, 4, 5, 6, -8, 8, 11, 80.0],
             },
-            index=[0, 1, 3, 5, 6, 8, 9, 9, 9, 10, 10],
+            index=[0, 1, 3, 5, 6, 8, 9, 9, 9, 10],
         )
         psdf = ps.from_pandas(pdf)
 
-        check_box_multi_columns(psdf)
-        check_box_multi_columns(-psdf)
+        results = BoxPlotBase.compute_box(
+            sdf=psdf._internal.resolved_copy.spark_frame,
+            colnames=["a", "b", "c"],
+            whis=1.5,
+            precision=0.01,
+            showfliers=True,
+        )
+
+        self.assertEqual(len(results), 3)
+        self.assertEqual(list(results[0]), [9.5, 5.0, 3.0, 8.0, 9.0, 1.0, [50.0]])
+        self.assertEqual(list(results[1]), [-3.8, 5.0, 3.0, 8.0, 11.0, 2.0, [-90.0]])
+        self.assertEqual(list(results[2]), [7.9, 5.0, -2.0, 8.0, 11.0, -8.0, [80.0, -30.0]])
 
 
 class DataFramePlotTests(DataFramePlotTestsMixin, PandasOnSparkTestCase):
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
index 365d34b1f550e..1d63cafe19b42 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_matplotlib.py
@@ -24,12 +24,8 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_matplotlib,
-    matplotlib_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_matplotlib, matplotlib_requirement_message
 
 if have_matplotlib:
     import matplotlib
diff --git a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
index 37469db2c8f51..5308932573330 100644
--- a/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_frame_plot_plotly.py
@@ -23,12 +23,8 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_plotly,
-    plotly_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 from pyspark.pandas.utils import name_like_string
 
 if have_plotly:
@@ -105,9 +101,10 @@ def check_barh_plot_with_x_y(pdf, psdf, x, y):
             self.assertEqual(pdf.plot.barh(x=x, y=y), psdf.plot.barh(x=x, y=y))
 
         # this is testing plot with specified x and y
-        pdf1 = pd.DataFrame({"lab": ["A", "B", "C"], "val": [10, 30, 20]})
+        pdf1 = pd.DataFrame({"lab": ["A", "B", "C"], "val": [10, 30, 20], "val2": [1.1, 2.2, 3.3]})
         psdf1 = ps.from_pandas(pdf1)
-        check_barh_plot_with_x_y(pdf1, psdf1, x="lab", y="val")
+        check_barh_plot_with_x_y(pdf1, psdf1, x="val", y="lab")
+        check_barh_plot_with_x_y(pdf1, psdf1, x=["val", "val2"], y="lab")
 
     def test_barh_plot(self):
         def check_barh_plot(pdf, psdf):
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot.py b/python/pyspark/pandas/tests/plot/test_series_plot.py
index 9daefbc2a23b4..61d114f37b0e8 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot.py
@@ -22,7 +22,7 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.plot import PandasOnSparkPlotAccessor, BoxPlotBase
-from pyspark.testing.pandasutils import have_plotly, plotly_requirement_message
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 
 
 class SeriesPlotTestsMixin:
@@ -56,38 +56,31 @@ def test_plot_backends_incorrect(self):
                 PandasOnSparkPlotAccessor._get_plot_backend(fake_plot_backend)
 
     def test_box_summary(self):
-        def check_box_summary(psdf, pdf):
-            k = 1.5
-            stats, fences = BoxPlotBase.compute_stats(psdf["a"], "a", whis=k, precision=0.01)
-            outliers = BoxPlotBase.outliers(psdf["a"], "a", *fences)
-            whiskers = BoxPlotBase.calc_whiskers("a", outliers)
-            fliers = BoxPlotBase.get_fliers("a", outliers, whiskers[0])
-
-            expected_mean = pdf["a"].mean()
-            expected_median = pdf["a"].median()
-            expected_q1 = np.percentile(pdf["a"], 25)
-            expected_q3 = np.percentile(pdf["a"], 75)
-            iqr = expected_q3 - expected_q1
-            expected_fences = (expected_q1 - k * iqr, expected_q3 + k * iqr)
-            pdf["outlier"] = ~pdf["a"].between(fences[0], fences[1])
-            expected_whiskers = (
-                pdf.query("not outlier")["a"].min(),
-                pdf.query("not outlier")["a"].max(),
-            )
-            expected_fliers = pdf.query("outlier")["a"].values
-
-            self.assertEqual(expected_mean, stats["mean"])
-            self.assertEqual(expected_median, stats["med"])
-            self.assertEqual(expected_q1, stats["q1"] + 0.5)
-            self.assertEqual(expected_q3, stats["q3"] - 0.5)
-            self.assertEqual(expected_fences[0], fences[0] + 2.0)
-            self.assertEqual(expected_fences[1], fences[1] - 2.0)
-            self.assertEqual(expected_whiskers[0], whiskers[0])
-            self.assertEqual(expected_whiskers[1], whiskers[1])
-            self.assertEqual(expected_fliers, fliers)
-
-        check_box_summary(self.psdf1, self.pdf1)
-        check_box_summary(-self.psdf1, -self.pdf1)
+        psdf = self.psdf1
+        pdf = self.pdf1
+
+        k = 1.5
+
+        results = BoxPlotBase.compute_box(
+            sdf=psdf._internal.resolved_copy.spark_frame,
+            colnames=["a"],
+            whis=k,
+            precision=0.01,
+            showfliers=True,
+        )
+        self.assertEqual(len(results), 1)
+        result = results[0]
+
+        expected_mean = pdf["a"].mean()
+        expected_median = pdf["a"].median()
+        expected_q1 = np.percentile(pdf["a"], 25)
+        expected_q3 = np.percentile(pdf["a"], 75)
+
+        self.assertEqual(expected_mean, result["mean"])
+        self.assertEqual(expected_median, result["med"])
+        self.assertEqual(expected_q1, result["q1"] + 0.5)
+        self.assertEqual(expected_q3, result["q3"] - 0.5)
+        self.assertEqual([50], result["fliers"])
 
 
 class SeriesPlotTests(SeriesPlotTestsMixin, unittest.TestCase):
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
index c98c1aeea04e7..0fdcbc9d748e0 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_matplotlib.py
@@ -24,12 +24,8 @@
 
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
-from pyspark.testing.pandasutils import (
-    have_matplotlib,
-    matplotlib_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_matplotlib, matplotlib_requirement_message
 
 if have_matplotlib:
     import matplotlib
diff --git a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
index 1aa175f9308a1..8123af26dbf4b 100644
--- a/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
+++ b/python/pyspark/pandas/tests/plot/test_series_plot_plotly.py
@@ -24,12 +24,8 @@
 from pyspark import pandas as ps
 from pyspark.pandas.config import set_option, reset_option
 from pyspark.pandas.utils import name_like_string
-from pyspark.testing.pandasutils import (
-    have_plotly,
-    plotly_requirement_message,
-    PandasOnSparkTestCase,
-    TestUtils,
-)
+from pyspark.testing.pandasutils import PandasOnSparkTestCase, TestUtils
+from pyspark.testing.utils import have_plotly, plotly_requirement_message
 
 if have_plotly:
     from plotly import express
diff --git a/python/pyspark/pandas/tests/series/test_compute.py b/python/pyspark/pandas/tests/series/test_compute.py
index 20c3c856c8e02..a37095bd3de07 100644
--- a/python/pyspark/pandas/tests/series/test_compute.py
+++ b/python/pyspark/pandas/tests/series/test_compute.py
@@ -544,8 +544,8 @@ def test_between_time(self):
 
         self.check_error(
             exception=ctx.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "inclusive",
                 "allowed_values": str(["left", "right", "both", "neither"]),
             },
diff --git a/python/pyspark/pandas/tests/series/test_conversion.py b/python/pyspark/pandas/tests/series/test_conversion.py
index 71ae858631d4d..7711d05abd76d 100644
--- a/python/pyspark/pandas/tests/series/test_conversion.py
+++ b/python/pyspark/pandas/tests/series/test_conversion.py
@@ -21,7 +21,7 @@
 from pyspark import pandas as ps
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
-from pyspark.testing.pandasutils import have_tabulate, tabulate_requirement_message
+from pyspark.testing.utils import have_tabulate, tabulate_requirement_message
 
 
 class SeriesConversionMixin:
diff --git a/python/pyspark/pandas/tests/test_internal.py b/python/pyspark/pandas/tests/test_internal.py
index 5a936d2dcd634..741b81c94440d 100644
--- a/python/pyspark/pandas/tests/test_internal.py
+++ b/python/pyspark/pandas/tests/test_internal.py
@@ -17,6 +17,7 @@
 
 import pandas as pd
 
+from pyspark.sql.types import LongType, StructType, StructField
 from pyspark.pandas.internal import (
     InternalFrame,
     SPARK_DEFAULT_INDEX_NAME,
@@ -106,6 +107,32 @@ def test_from_pandas(self):
 
         self.assert_eq(internal.to_pandas_frame, pdf)
 
+    def test_attach_distributed_column(self):
+        sdf1 = self.spark.range(10)
+        self.assert_eq(
+            InternalFrame.attach_distributed_sequence_column(sdf1, "index").schema,
+            StructType(
+                [
+                    StructField("index", LongType(), False),
+                    StructField("id", LongType(), False),
+                ]
+            ),
+        )
+
+        # zero columns
+        sdf2 = self.spark.range(10).select()
+        self.assert_eq(
+            InternalFrame.attach_distributed_sequence_column(sdf2, "index").schema,
+            StructType([StructField("index", LongType(), False)]),
+        )
+
+        # empty dataframe, zero columns
+        sdf3 = self.spark.range(10).where("id < 0").select()
+        self.assert_eq(
+            InternalFrame.attach_distributed_sequence_column(sdf3, "index").schema,
+            StructType([StructField("index", LongType(), False)]),
+        )
+
 
 class InternalFrameTests(InternalFrameTestsMixin, PandasOnSparkTestCase, SQLTestUtils):
     pass
diff --git a/python/pyspark/pandas/tests/test_namespace.py b/python/pyspark/pandas/tests/test_namespace.py
index 7024ef2a977c4..6e559937008d9 100644
--- a/python/pyspark/pandas/tests/test_namespace.py
+++ b/python/pyspark/pandas/tests/test_namespace.py
@@ -28,6 +28,7 @@
 from pyspark.pandas.missing.general_functions import MissingPandasLikeGeneralFunctions
 from pyspark.testing.pandasutils import PandasOnSparkTestCase
 from pyspark.testing.sqlutils import SQLTestUtils
+from pyspark.pandas.testing import assert_frame_equal
 
 
 class NamespaceTestsMixin:
@@ -606,6 +607,61 @@ def test_to_numeric(self):
             lambda: ps.to_numeric(psser, errors="ignore"),
         )
 
+    def test_json_normalize(self):
+        # Basic test case with a simple JSON structure
+        data = [
+            {"id": 1, "name": "Alice", "address": {"city": "NYC", "zipcode": "10001"}},
+            {"id": 2, "name": "Bob", "address": {"city": "SF", "zipcode": "94105"}},
+        ]
+        assert_frame_equal(pd.json_normalize(data), ps.json_normalize(data))
+
+        # Test case with nested JSON structure
+        data = [
+            {"id": 1, "name": "Alice", "address": {"city": {"name": "NYC"}, "zipcode": "10001"}},
+            {"id": 2, "name": "Bob", "address": {"city": {"name": "SF"}, "zipcode": "94105"}},
+        ]
+        assert_frame_equal(pd.json_normalize(data), ps.json_normalize(data))
+
+        # Test case with lists included in the JSON structure
+        data = [
+            {
+                "id": 1,
+                "name": "Alice",
+                "hobbies": ["reading", "swimming"],
+                "address": {"city": "NYC", "zipcode": "10001"},
+            },
+            {
+                "id": 2,
+                "name": "Bob",
+                "hobbies": ["biking"],
+                "address": {"city": "SF", "zipcode": "94105"},
+            },
+        ]
+        assert_frame_equal(pd.json_normalize(data), ps.json_normalize(data))
+
+        # Test case with various data types
+        data = [
+            {
+                "id": 1,
+                "name": "Alice",
+                "age": 25,
+                "is_student": True,
+                "address": {"city": "NYC", "zipcode": "10001"},
+            },
+            {
+                "id": 2,
+                "name": "Bob",
+                "age": 30,
+                "is_student": False,
+                "address": {"city": "SF", "zipcode": "94105"},
+            },
+        ]
+        assert_frame_equal(pd.json_normalize(data), ps.json_normalize(data))
+
+        # Test case handling empty input data
+        data = []
+        self.assert_eq(pd.json_normalize(data), ps.json_normalize(data))
+
     def test_missing(self):
         missing_functions = inspect.getmembers(
             MissingPandasLikeGeneralFunctions, inspect.isfunction
diff --git a/python/pyspark/pandas/tests/test_utils.py b/python/pyspark/pandas/tests/test_utils.py
index c763d7401b310..26f571233a8bd 100644
--- a/python/pyspark/pandas/tests/test_utils.py
+++ b/python/pyspark/pandas/tests/test_utils.py
@@ -118,8 +118,8 @@ def test_dataframe_error_assert_pandas_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": pdf1.to_string(),
                 "left_dtype": str(pdf1.dtypes),
                 "right": pdf2.to_string(),
@@ -136,8 +136,8 @@ def test_series_error_assert_pandas_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_SERIES",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_SERIES",
+            messageParameters={
                 "left": series1.to_string(),
                 "left_dtype": str(series1.dtype),
                 "right": series2.to_string(),
@@ -154,8 +154,8 @@ def test_index_error_assert_pandas_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_INDEX",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_INDEX",
+            messageParameters={
                 "left": index1,
                 "left_dtype": str(index1.dtype),
                 "right": index2,
@@ -174,8 +174,8 @@ def test_multiindex_error_assert_pandas_almost_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_MULTIINDEX",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_MULTIINDEX",
+            messageParameters={
                 "left": multiindex1,
                 "left_dtype": str(multiindex1.dtype),
                 "right": multiindex2,
diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py
index fec45072cf93a..111bfd4630667 100644
--- a/python/pyspark/pandas/utils.py
+++ b/python/pyspark/pandas/utils.py
@@ -947,13 +947,13 @@ def spark_column_equals(left: Column, right: Column) -> bool:
 
         if not isinstance(left, ConnectColumn):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "left", "arg_type": type(left).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "left", "arg_type": type(left).__name__},
             )
         if not isinstance(right, ConnectColumn):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "right", "arg_type": type(right).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "right", "arg_type": type(right).__name__},
             )
         return repr(left).replace("`", "") == repr(right).replace("`", "")
     else:
diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py
index 0aaeb7df89be5..591fa7d828757 100644
--- a/python/pyspark/pandas/window.py
+++ b/python/pyspark/pandas/window.py
@@ -22,6 +22,7 @@
 
 from pyspark.sql import Window
 from pyspark.sql import functions as F
+from pyspark.sql.internal import InternalFunction as SF
 from pyspark.pandas.missing.window import (
     MissingPandasLikeRolling,
     MissingPandasLikeRollingGroupby,
@@ -34,7 +35,6 @@
 from pyspark.pandas._typing import FrameLike
 from pyspark.pandas.groupby import GroupBy, DataFrameGroupBy
 from pyspark.pandas.internal import NATURAL_ORDER_COLUMN_NAME, SPARK_INDEX_NAME_FORMAT
-from pyspark.pandas.spark import functions as SF
 from pyspark.pandas.utils import scol_for
 from pyspark.sql.column import Column
 from pyspark.sql.types import (
@@ -2434,7 +2434,8 @@ def _compute_unified_alpha(self) -> float:
         if opt_count != 1:
             raise ValueError("com, span, halflife, and alpha are mutually exclusive")
 
-        return unified_alpha
+        # convert possible numpy.float64 to float for lit function
+        return float(unified_alpha)
 
     @abstractmethod
     def _apply_as_series_or_frame(self, func: Callable[[Column], Column]) -> FrameLike:
diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py
index 736873e963c93..f4fc83e66dd59 100644
--- a/python/pyspark/profiler.py
+++ b/python/pyspark/profiler.py
@@ -45,7 +45,7 @@
     has_memory_profiler = False
 
 from pyspark.accumulators import AccumulatorParam
-from pyspark.errors import PySparkRuntimeError
+from pyspark.errors import PySparkRuntimeError, PySparkValueError
 
 if TYPE_CHECKING:
     from pyspark.core.context import SparkContext
@@ -447,8 +447,8 @@ def profile(  # type: ignore
             return ret
         else:
             raise PySparkRuntimeError(
-                error_class="MISSING_LIBRARY_FOR_PROFILER",
-                message_parameters={},
+                errorClass="MISSING_LIBRARY_FOR_PROFILER",
+                messageParameters={},
             )
 
     def stats(self) -> CodeMapDict:
@@ -473,6 +473,10 @@ def _show_results(
             stream.write("=" * len(header) + "\n")
 
             all_lines = linecache.getlines(filename)
+            if len(all_lines) == 0:
+                raise PySparkValueError(
+                    errorClass="MEMORY_PROFILE_INVALID_SOURCE", messageParameters={}
+                )
 
             float_format = "{0}.{1}f".format(precision + 4, precision)
             template_mem = "{0:" + float_format + "} MiB"
diff --git a/python/pyspark/shell.py b/python/pyspark/shell.py
index 12ff86ecc9ff9..91951b644f6bf 100644
--- a/python/pyspark/shell.py
+++ b/python/pyspark/shell.py
@@ -30,6 +30,7 @@
 
 import pyspark
 from pyspark.core.context import SparkContext
+from pyspark.logger import SPARK_LOG_SCHEMA  # noqa: F401
 from pyspark.sql import SparkSession
 from pyspark.sql.context import SQLContext
 from pyspark.sql.utils import is_remote
diff --git a/python/pyspark/sql/_typing.pyi b/python/pyspark/sql/_typing.pyi
index b696eea7293fb..4969268939adf 100644
--- a/python/pyspark/sql/_typing.pyi
+++ b/python/pyspark/sql/_typing.pyi
@@ -38,7 +38,6 @@ import pyspark.sql.types
 from pyspark.sql.column import Column
 
 ColumnOrName = Union[Column, str]
-ColumnOrName_ = TypeVar("ColumnOrName_", bound=ColumnOrName)
 ColumnOrNameOrOrdinal = Union[Column, str, int]
 DecimalLiteral = decimal.Decimal
 DateTimeLiteral = Union[datetime.datetime, datetime.date]
diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py
index fb3bd53984959..a9e41f20357e8 100644
--- a/python/pyspark/sql/avro/functions.py
+++ b/python/pyspark/sql/avro/functions.py
@@ -83,21 +83,21 @@ def from_avro(
 
     if not isinstance(data, (Column, str)):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={
+            errorClass="INVALID_TYPE",
+            messageParameters={
                 "arg_name": "data",
                 "arg_type": "pyspark.sql.Column or str",
             },
         )
     if not isinstance(jsonFormatSchema, str):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
         )
     if options is not None and not isinstance(options, dict):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "options", "arg_type": "dict, optional"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "options", "arg_type": "dict, optional"},
         )
 
     sc = get_active_spark_context()
@@ -153,16 +153,16 @@ def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
 
     if not isinstance(data, (Column, str)):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={
+            errorClass="INVALID_TYPE",
+            messageParameters={
                 "arg_name": "data",
                 "arg_type": "pyspark.sql.Column or str",
             },
         )
     if not isinstance(jsonFormatSchema, str):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
         )
 
     sc = get_active_spark_context()
diff --git a/python/pyspark/sql/catalog.py b/python/pyspark/sql/catalog.py
index d70bd89baedab..8c35aafa7066c 100644
--- a/python/pyspark/sql/catalog.py
+++ b/python/pyspark/sql/catalog.py
@@ -27,7 +27,7 @@
 
 if TYPE_CHECKING:
     from pyspark.sql._typing import UserDefinedFunctionLike
-    from pyspark.sql.types import DataType
+    from pyspark.sql._typing import DataTypeOrString
 
 
 class CatalogMetadata(NamedTuple):
@@ -65,6 +65,7 @@ class Column(NamedTuple):
     nullable: bool
     isPartition: bool
     isBucket: bool
+    isCluster: bool
 
 
 class Function(NamedTuple):
@@ -663,6 +664,7 @@ def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Colu
                     nullable=jcolumn.nullable(),
                     isPartition=jcolumn.isPartition(),
                     isBucket=jcolumn.isBucket(),
+                    isCluster=jcolumn.isCluster(),
                 )
             )
         return columns
@@ -757,7 +759,7 @@ def createExternalTable(
         source: Optional[str] = None,
         schema: Optional[StructType] = None,
         **options: str,
-    ) -> DataFrame:
+    ) -> "DataFrame":
         """Creates a table based on the dataset in a data source.
 
         It returns the DataFrame associated with the external table.
@@ -789,7 +791,7 @@ def createTable(
         schema: Optional[StructType] = None,
         description: Optional[str] = None,
         **options: str,
-    ) -> DataFrame:
+    ) -> "DataFrame":
         """Creates a table based on the dataset in a data source.
 
         .. versionadded:: 2.2.0
@@ -853,8 +855,8 @@ def createTable(
         else:
             if not isinstance(schema, StructType):
                 raise PySparkTypeError(
-                    error_class="NOT_STRUCT",
-                    message_parameters={
+                    errorClass="NOT_STRUCT",
+                    messageParameters={
                         "arg_name": "schema",
                         "arg_type": type(schema).__name__,
                     },
@@ -940,7 +942,7 @@ def dropGlobalTempView(self, viewName: str) -> bool:
         return self._jcatalog.dropGlobalTempView(viewName)
 
     def registerFunction(
-        self, name: str, f: Callable[..., Any], returnType: Optional["DataType"] = None
+        self, name: str, f: Callable[..., Any], returnType: Optional["DataTypeOrString"] = None
     ) -> "UserDefinedFunctionLike":
         """An alias for :func:`spark.udf.register`.
         See :meth:`pyspark.sql.UDFRegistration.register`.
diff --git a/python/pyspark/sql/classic/column.py b/python/pyspark/sql/classic/column.py
index 7630cfed5c173..c08eac7f6a049 100644
--- a/python/pyspark/sql/classic/column.py
+++ b/python/pyspark/sql/classic/column.py
@@ -35,7 +35,7 @@
 from pyspark.errors import PySparkAttributeError, PySparkTypeError, PySparkValueError
 from pyspark.errors.utils import with_origin_to_class
 from pyspark.sql.types import DataType
-from pyspark.sql.utils import get_active_spark_context
+from pyspark.sql.utils import get_active_spark_context, enum_to_value
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
@@ -52,7 +52,7 @@ def _create_column_from_literal(
     from py4j.java_gateway import JVMView
 
     sc = get_active_spark_context()
-    return cast(JVMView, sc._jvm).functions.lit(literal)
+    return cast(JVMView, sc._jvm).functions.lit(enum_to_value(literal))
 
 
 def _create_column_from_name(name: str) -> "JavaObject":
@@ -69,16 +69,12 @@ def _to_java_column(col: "ColumnOrName") -> "JavaObject":
         jcol = _create_column_from_name(col)
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
         )
     return jcol
 
 
-def _to_java_expr(col: "ColumnOrName") -> "JavaObject":
-    return _to_java_column(col).expr()
-
-
 @overload
 def _to_seq(sc: "SparkContext", cols: Iterable["JavaObject"]) -> "JavaObject":
     ...
@@ -163,7 +159,7 @@ def _bin_op(
     other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
 ) -> ParentColumn:
     """Create a method for given binary operator"""
-    jc = other._jc if isinstance(other, ParentColumn) else other
+    jc = other._jc if isinstance(other, ParentColumn) else enum_to_value(other)
     njc = getattr(self._jc, name)(jc)
     return Column(njc)
 
@@ -309,12 +305,16 @@ def eqNullSafe(
     def __and__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("and", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("and", self, lit(other))
 
     def __or__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("or", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("or", self, lit(other))
 
     def __invert__(self) -> ParentColumn:
         return _func_op("not", self)
@@ -322,18 +322,22 @@ def __invert__(self) -> ParentColumn:
     def __rand__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("and", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("and", self, lit(other))
 
     def __ror__(
         self, other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"]
     ) -> ParentColumn:
-        return _bin_op("or", self, other)
+        from pyspark.sql.functions import lit
+
+        return _bin_op("or", self, lit(other))
 
     # container operators
     def __contains__(self, item: Any) -> None:
         raise PySparkValueError(
-            error_class="CANNOT_APPLY_IN_FOR_COLUMN",
-            message_parameters={},
+            errorClass="CANNOT_APPLY_IN_FOR_COLUMN",
+            messageParameters={},
         )
 
     # bitwise operators
@@ -375,14 +379,14 @@ def getField(self, name: Any) -> ParentColumn:
     def withField(self, fieldName: str, col: ParentColumn) -> ParentColumn:
         if not isinstance(fieldName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
             )
 
         if not isinstance(col, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
 
         return Column(self._jc.withField(fieldName, col._jc))
@@ -395,8 +399,8 @@ def dropFields(self, *fieldNames: str) -> ParentColumn:
     def __getattr__(self, item: Any) -> ParentColumn:
         if item.startswith("__"):
             raise PySparkAttributeError(
-                error_class="CANNOT_ACCESS_TO_DUNDER",
-                message_parameters={},
+                errorClass="CANNOT_ACCESS_TO_DUNDER",
+                messageParameters={},
             )
         return self[item]
 
@@ -404,8 +408,8 @@ def __getitem__(self, k: Any) -> ParentColumn:
         if isinstance(k, slice):
             if k.step is not None:
                 raise PySparkValueError(
-                    error_class="SLICE_WITH_STEP",
-                    message_parameters={},
+                    errorClass="SLICE_WITH_STEP",
+                    messageParameters={},
                 )
             return self.substr(k.start, k.stop)
         else:
@@ -413,7 +417,7 @@ def __getitem__(self, k: Any) -> ParentColumn:
 
     def __iter__(self) -> None:
         raise PySparkTypeError(
-            error_class="NOT_ITERABLE", message_parameters={"objectName": "Column"}
+            errorClass="NOT_ITERABLE", messageParameters={"objectName": "Column"}
         )
 
     # string methods
@@ -433,24 +437,27 @@ def endswith(
         return _bin_op("endsWith", self, other)
 
     def like(self: ParentColumn, other: str) -> ParentColumn:
-        njc = getattr(self._jc, "like")(other)
+        njc = getattr(self._jc, "like")(enum_to_value(other))
         return Column(njc)
 
     def rlike(self: ParentColumn, other: str) -> ParentColumn:
-        njc = getattr(self._jc, "rlike")(other)
+        njc = getattr(self._jc, "rlike")(enum_to_value(other))
         return Column(njc)
 
     def ilike(self: ParentColumn, other: str) -> ParentColumn:
-        njc = getattr(self._jc, "ilike")(other)
+        njc = getattr(self._jc, "ilike")(enum_to_value(other))
         return Column(njc)
 
     def substr(
         self, startPos: Union[int, ParentColumn], length: Union[int, ParentColumn]
     ) -> ParentColumn:
+        startPos = enum_to_value(startPos)
+        length = enum_to_value(length)
+
         if type(startPos) != type(length):
             raise PySparkTypeError(
-                error_class="NOT_SAME_TYPE",
-                message_parameters={
+                errorClass="NOT_SAME_TYPE",
+                messageParameters={
                     "arg_name1": "startPos",
                     "arg_name2": "length",
                     "arg_type1": type(startPos).__name__,
@@ -463,8 +470,8 @@ def substr(
             jc = self._jc.substr(startPos._jc, cast(ParentColumn, length)._jc)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT",
-                message_parameters={"arg_name": "startPos", "arg_type": type(startPos).__name__},
+                errorClass="NOT_COLUMN_OR_INT",
+                messageParameters={"arg_name": "startPos", "arg_type": type(startPos).__name__},
             )
         return Column(jc)
 
@@ -522,8 +529,8 @@ def alias(self, *alias: str, **kwargs: Any) -> ParentColumn:
         else:
             if metadata is not None:
                 raise PySparkValueError(
-                    error_class="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
-                    message_parameters={"arg_name": "metadata"},
+                    errorClass="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
+                    messageParameters={"arg_name": "metadata"},
                 )
             return Column(getattr(self._jc, "as")(_to_seq(sc, list(alias))))
 
@@ -541,8 +548,8 @@ def cast(self, dataType: Union[DataType, str]) -> ParentColumn:
             jc = self._jc.cast(jdt)
         else:
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
         return Column(jc)
 
@@ -557,8 +564,8 @@ def try_cast(self, dataType: Union[DataType, str]) -> ParentColumn:
             jc = self._jc.try_cast(jdt)
         else:
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
         return Column(jc)
 
@@ -575,15 +582,15 @@ def between(
     def when(self, condition: ParentColumn, value: Any) -> ParentColumn:
         if not isinstance(condition, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
             )
-        v = value._jc if isinstance(value, Column) else value
+        v = value._jc if isinstance(value, Column) else enum_to_value(value)
         jc = self._jc.when(condition._jc, v)
         return Column(jc)
 
     def otherwise(self, value: Any) -> ParentColumn:
-        v = value._jc if isinstance(value, Column) else value
+        v = value._jc if isinstance(value, Column) else enum_to_value(value)
         jc = self._jc.otherwise(v)
         return Column(jc)
 
@@ -592,16 +599,20 @@ def over(self, window: "WindowSpec") -> ParentColumn:
 
         if not isinstance(window, WindowSpec):
             raise PySparkTypeError(
-                error_class="NOT_WINDOWSPEC",
-                message_parameters={"arg_name": "window", "arg_type": type(window).__name__},
+                errorClass="NOT_WINDOWSPEC",
+                messageParameters={"arg_name": "window", "arg_type": type(window).__name__},
             )
         jc = self._jc.over(window._jspec)
         return Column(jc)
 
+    def outer(self) -> ParentColumn:
+        jc = self._jc.outer()
+        return Column(jc)
+
     def __nonzero__(self) -> None:
         raise PySparkValueError(
-            error_class="CANNOT_CONVERT_COLUMN_INTO_BOOL",
-            message_parameters={},
+            errorClass="CANNOT_CONVERT_COLUMN_INTO_BOOL",
+            messageParameters={},
         )
 
     __bool__ = __nonzero__
diff --git a/python/pyspark/sql/classic/dataframe.py b/python/pyspark/sql/classic/dataframe.py
index fd632c038603b..169755c753907 100644
--- a/python/pyspark/sql/classic/dataframe.py
+++ b/python/pyspark/sql/classic/dataframe.py
@@ -42,6 +42,7 @@
 from pyspark.resource import ResourceProfile
 from pyspark._globals import _NoValueType
 from pyspark.errors import (
+    AnalysisException,
     PySparkTypeError,
     PySparkValueError,
     PySparkIndexError,
@@ -55,6 +56,7 @@
 from pyspark.storagelevel import StorageLevel
 from pyspark.traceback_utils import SCCallSiteSync
 from pyspark.sql.column import Column
+from pyspark.sql.functions import builtin as F
 from pyspark.sql.classic.column import _to_seq, _to_list, _to_java_column
 from pyspark.sql.readwriter import DataFrameWriter, DataFrameWriterV2
 from pyspark.sql.merge import MergeIntoWriter
@@ -73,6 +75,7 @@
 from pyspark.sql.pandas.conversion import PandasConversionMixin
 from pyspark.sql.pandas.map_ops import PandasMapOpsMixin
 
+
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
     import pyarrow as pa
@@ -212,10 +215,12 @@ def schema(self) -> StructType:
                 self._schema = cast(
                     StructType, _parse_datatype_json_string(self._jdf.schema().json())
                 )
+            except AnalysisException as e:
+                raise e
             except Exception as e:
                 raise PySparkValueError(
-                    error_class="CANNOT_PARSE_DATATYPE",
-                    message_parameters={"error": str(e)},
+                    errorClass="CANNOT_PARSE_DATATYPE",
+                    messageParameters={"error": str(e)},
                 )
         return self._schema
 
@@ -230,8 +235,8 @@ def explain(
     ) -> None:
         if extended is not None and mode is not None:
             raise PySparkValueError(
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={"arg_list": "extended and mode"},
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "extended and mode"},
             )
 
         # For the no argument case: df.explain()
@@ -253,16 +258,16 @@ def explain(
         if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
             if (extended is not None) and (not isinstance(extended, (bool, str))):
                 raise PySparkTypeError(
-                    error_class="NOT_BOOL_OR_STR",
-                    message_parameters={
+                    errorClass="NOT_BOOL_OR_STR",
+                    messageParameters={
                         "arg_name": "extended",
                         "arg_type": type(extended).__name__,
                     },
                 )
             if (mode is not None) and (not isinstance(mode, str)):
                 raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={"arg_name": "mode", "arg_type": type(mode).__name__},
+                    errorClass="NOT_STR",
+                    messageParameters={"arg_name": "mode", "arg_type": type(mode).__name__},
                 )
 
         # Sets an explain mode depending on a given argument
@@ -298,14 +303,14 @@ def _show_string(
     ) -> str:
         if not isinstance(n, int) or isinstance(n, bool):
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
+                errorClass="NOT_INT",
+                messageParameters={"arg_name": "n", "arg_type": type(n).__name__},
             )
 
         if not isinstance(vertical, bool):
             raise PySparkTypeError(
-                error_class="NOT_BOOL",
-                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
+                errorClass="NOT_BOOL",
+                messageParameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
             )
 
         if isinstance(truncate, bool) and truncate:
@@ -315,8 +320,8 @@ def _show_string(
                 int_truncate = int(truncate)
             except ValueError:
                 raise PySparkTypeError(
-                    error_class="NOT_BOOL",
-                    message_parameters={
+                    errorClass="NOT_BOOL",
+                    messageParameters={
                         "arg_name": "truncate",
                         "arg_type": type(truncate).__name__,
                     },
@@ -354,20 +359,25 @@ def checkpoint(self, eager: bool = True) -> ParentDataFrame:
         jdf = self._jdf.checkpoint(eager)
         return DataFrame(jdf, self.sparkSession)
 
-    def localCheckpoint(self, eager: bool = True) -> ParentDataFrame:
-        jdf = self._jdf.localCheckpoint(eager)
+    def localCheckpoint(
+        self, eager: bool = True, storageLevel: Optional[StorageLevel] = None
+    ) -> ParentDataFrame:
+        if storageLevel is None:
+            jdf = self._jdf.localCheckpoint(eager)
+        else:
+            jdf = self._jdf.localCheckpoint(eager, self._sc._getJavaStorageLevel(storageLevel))
         return DataFrame(jdf, self.sparkSession)
 
     def withWatermark(self, eventTime: str, delayThreshold: str) -> ParentDataFrame:
         if not eventTime or type(eventTime) is not str:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
             )
         if not delayThreshold or type(delayThreshold) is not str:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={
+                errorClass="NOT_STR",
+                messageParameters={
                     "arg_name": "delayThreshold",
                     "arg_type": type(delayThreshold).__name__,
                 },
@@ -383,8 +393,8 @@ def hint(
 
         if not isinstance(name, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
             )
 
         allowed_types = (str, float, int, Column, list)
@@ -396,8 +406,8 @@ def hint(
         for p in parameters:
             if not isinstance(p, allowed_types):
                 raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
+                    errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+                    messageParameters={
                         "arg_name": "parameters",
                         "arg_type": type(parameters).__name__,
                         "allowed_types": allowed_types_repr,
@@ -407,8 +417,8 @@ def hint(
             if isinstance(p, list):
                 if not all(isinstance(e, allowed_primitive_types) for e in p):
                     raise PySparkTypeError(
-                        error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                        message_parameters={
+                        errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+                        messageParameters={
                             "arg_name": "parameters",
                             "arg_type": type(parameters).__name__,
                             "allowed_types": allowed_types_repr,
@@ -532,8 +542,8 @@ def repartition(  # type: ignore[misc]
             return DataFrame(self._jdf.repartition(self._jcols(*cols)), self.sparkSession)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={
                     "arg_name": "numPartitions",
                     "arg_type": type(numPartitions).__name__,
                 },
@@ -553,8 +563,8 @@ def repartitionByRange(  # type: ignore[misc]
         if isinstance(numPartitions, int):
             if len(cols) == 0:
                 raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"item": "partition-by expression"},
+                    errorClass="CANNOT_BE_EMPTY",
+                    messageParameters={"item": "partition-by expression"},
                 )
             else:
                 return DataFrame(
@@ -566,8 +576,8 @@ def repartitionByRange(  # type: ignore[misc]
             return DataFrame(self._jdf.repartitionByRange(self._jcols(*cols)), self.sparkSession)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT_OR_STR",
-                message_parameters={
+                errorClass="NOT_COLUMN_OR_INT_OR_STR",
+                messageParameters={
                     "arg_name": "numPartitions",
                     "arg_type": type(numPartitions).__name__,
                 },
@@ -595,44 +605,8 @@ def sample(  # type: ignore[misc]
         fraction: Optional[Union[int, float]] = None,
         seed: Optional[int] = None,
     ) -> ParentDataFrame:
-        # For the cases below:
-        #   sample(True, 0.5 [, seed])
-        #   sample(True, fraction=0.5 [, seed])
-        #   sample(withReplacement=False, fraction=0.5 [, seed])
-        is_withReplacement_set = type(withReplacement) == bool and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(faction=0.5 [, seed])
-        is_withReplacement_omitted_kwargs = withReplacement is None and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(0.5 [, seed])
-        is_withReplacement_omitted_args = isinstance(withReplacement, float)
-
-        if not (
-            is_withReplacement_set
-            or is_withReplacement_omitted_kwargs
-            or is_withReplacement_omitted_args
-        ):
-            argtypes = [type(arg).__name__ for arg in [withReplacement, fraction, seed]]
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT",
-                message_parameters={
-                    "arg_name": "withReplacement (optional), "
-                    + "fraction (required) and seed (optional)",
-                    "arg_type": ", ".join(argtypes),
-                },
-            )
-
-        if is_withReplacement_omitted_args:
-            if fraction is not None:
-                seed = cast(int, fraction)
-            fraction = withReplacement
-            withReplacement = None
-
-        seed = int(seed) if seed is not None else None
-        args = [arg for arg in [withReplacement, fraction, seed] if arg is not None]
-        jdf = self._jdf.sample(*args)
+        _w, _f, _s = self._preapare_args_for_sample(withReplacement, fraction, seed)
+        jdf = self._jdf.sample(*[_w, _f, _s])
         return DataFrame(jdf, self.sparkSession)
 
     def sampleBy(
@@ -642,19 +616,19 @@ def sampleBy(
             col = Column(col)
         elif not isinstance(col, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
         if not isinstance(fractions, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
             )
         for k, v in fractions.items():
             if not isinstance(k, (float, int, str)):
                 raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
+                    errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+                    messageParameters={
                         "arg_name": "fractions",
                         "arg_type": type(fractions).__name__,
                         "allowed_types": "float, int, str",
@@ -674,8 +648,8 @@ def randomSplit(
         for w in weights:
             if w < 0.0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "weights", "arg_value": str(w)},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "weights", "arg_value": str(w)},
                 )
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
         df_array = self._jdf.randomSplit(
@@ -694,8 +668,8 @@ def columns(self) -> List[str]:
     def colRegex(self, colName: str) -> Column:
         if not isinstance(colName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "colName", "arg_type": type(colName).__name__},
             )
         jc = self._jdf.colRegex(colName)
         return Column(jc)
@@ -868,7 +842,8 @@ def sortWithinPartitions(
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> ParentDataFrame:
-        jdf = self._jdf.sortWithinPartitions(self._sort_cols(cols, kwargs))
+        _cols = self._preapare_cols_for_sort(F.col, cols, kwargs)
+        jdf = self._jdf.sortWithinPartitions(self._jseq(_cols, _to_java_column))
         return DataFrame(jdf, self.sparkSession)
 
     def sort(
@@ -876,7 +851,8 @@ def sort(
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> ParentDataFrame:
-        jdf = self._jdf.sort(self._sort_cols(cols, kwargs))
+        _cols = self._preapare_cols_for_sort(F.col, cols, kwargs)
+        jdf = self._jdf.sort(self._jseq(_cols, _to_java_column))
         return DataFrame(jdf, self.sparkSession)
 
     orderBy = sort
@@ -915,7 +891,7 @@ def _jcols_ordinal(self, *cols: "ColumnOrNameOrOrdinal") -> "JavaObject":
             if isinstance(c, int) and not isinstance(c, bool):
                 if c < 1:
                     raise PySparkIndexError(
-                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                        errorClass="INDEX_NOT_POSITIVE", messageParameters={"index": str(c)}
                     )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
@@ -923,51 +899,6 @@ def _jcols_ordinal(self, *cols: "ColumnOrNameOrOrdinal") -> "JavaObject":
                 _cols.append(c)  # type: ignore[arg-type]
         return self._jseq(_cols, _to_java_column)
 
-    def _sort_cols(
-        self,
-        cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
-        kwargs: Dict[str, Any],
-    ) -> "JavaObject":
-        """Return a JVM Seq of Columns that describes the sort order"""
-        if not cols:
-            raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "column"},
-            )
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]
-
-        jcols = []
-        for c in cols:
-            if isinstance(c, int) and not isinstance(c, bool):
-                # ordinal is 1-based
-                if c > 0:
-                    _c = self[c - 1]
-                # negative ordinal means sort by desc
-                elif c < 0:
-                    _c = self[-c - 1].desc()
-                else:
-                    raise PySparkIndexError(
-                        error_class="ZERO_INDEX",
-                        message_parameters={},
-                    )
-            else:
-                _c = c  # type: ignore[assignment]
-            jcols.append(_to_java_column(cast("ColumnOrName", _c)))
-
-        ascending = kwargs.get("ascending", True)
-        if isinstance(ascending, (bool, int)):
-            if not ascending:
-                jcols = [jc.desc() for jc in jcols]
-        elif isinstance(ascending, list):
-            jcols = [jc if asc else jc.desc() for asc, jc in zip(ascending, jcols)]
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_LIST",
-                message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
-            )
-        return self._jseq(jcols)
-
     def describe(self, *cols: Union[str, List[str]]) -> ParentDataFrame:
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]  # type: ignore[assignment]
@@ -1020,14 +951,14 @@ def __getitem__(
             return Column(jc)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR",
-                message_parameters={"arg_name": "item", "arg_type": type(item).__name__},
+                errorClass="NOT_COLUMN_OR_FLOAT_OR_INT_OR_LIST_OR_STR",
+                messageParameters={"arg_name": "item", "arg_type": type(item).__name__},
             )
 
     def __getattr__(self, name: str) -> Column:
         if name not in self.columns:
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
             )
         jc = self._jdf.apply(name)
         return Column(jc)
@@ -1063,15 +994,15 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> ParentDataFrame:
         jdf = self._jdf.selectExpr(self._jseq(expr))
         return DataFrame(jdf, self.sparkSession)
 
-    def filter(self, condition: "ColumnOrName") -> ParentDataFrame:
+    def filter(self, condition: Union[Column, str]) -> ParentDataFrame:
         if isinstance(condition, str):
             jdf = self._jdf.filter(condition)
         elif isinstance(condition, Column):
             jdf = self._jdf.filter(condition._jc)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
             )
         return DataFrame(jdf, self.sparkSession)
 
@@ -1175,13 +1106,13 @@ def observe(
 
         if len(exprs) == 0:
             raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "exprs"},
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={"item": "exprs"},
             )
         if not all(isinstance(c, Column) for c in exprs):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN",
-                message_parameters={"arg_name": "exprs"},
+                errorClass="NOT_LIST_OF_COLUMN",
+                messageParameters={"arg_name": "exprs"},
             )
 
         if isinstance(observation, Observation):
@@ -1195,8 +1126,8 @@ def observe(
             )
         else:
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN",
-                message_parameters={
+                errorClass="NOT_LIST_OF_COLUMN",
+                messageParameters={
                     "arg_name": "observation",
                     "arg_type": type(observation).__name__,
                 },
@@ -1222,35 +1153,41 @@ def intersectAll(self, other: ParentDataFrame) -> ParentDataFrame:
     def subtract(self, other: ParentDataFrame) -> ParentDataFrame:
         return DataFrame(getattr(self._jdf, "except")(other._jdf), self.sparkSession)
 
-    def dropDuplicates(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
-        # Acceptable args should be str, ... or a single List[str]
-        # So if subset length is 1, it can be either single str, or a list of str
-        # if subset length is greater than 1, it must be a sequence of str
-        if len(subset) > 1:
-            assert all(isinstance(c, str) for c in subset)
+    def dropDuplicates(self, subset: Optional[List[str]] = None) -> ParentDataFrame:
+        if subset is not None and (not isinstance(subset, Iterable) or isinstance(subset, str)):
+            raise PySparkTypeError(
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
-        if not subset:
+        if subset is None:
             jdf = self._jdf.dropDuplicates()
-        elif len(subset) == 1 and isinstance(subset[0], list):
-            jdf = self._jdf.dropDuplicates(self._jseq(subset[0]))
         else:
+            for c in subset:
+                if not isinstance(c, str):
+                    raise PySparkTypeError(
+                        errorClass="NOT_STR",
+                        messageParameters={"arg_name": "subset", "arg_type": type(c).__name__},
+                    )
             jdf = self._jdf.dropDuplicates(self._jseq(subset))
         return DataFrame(jdf, self.sparkSession)
 
-    drop_duplicates = dropDuplicates
-
-    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
-        # Acceptable args should be str, ... or a single List[str]
-        # So if subset length is 1, it can be either single str, or a list of str
-        # if subset length is greater than 1, it must be a sequence of str
-        if len(subset) > 1:
-            assert all(isinstance(c, str) for c in subset)
+    def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> ParentDataFrame:
+        if subset is not None and (not isinstance(subset, Iterable) or isinstance(subset, str)):
+            raise PySparkTypeError(
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
-        if not subset:
+        if subset is None:
             jdf = self._jdf.dropDuplicatesWithinWatermark()
-        elif len(subset) == 1 and isinstance(subset[0], list):
-            jdf = self._jdf.dropDuplicatesWithinWatermark(self._jseq(subset[0]))
         else:
+            for c in subset:
+                if not isinstance(c, str):
+                    raise PySparkTypeError(
+                        errorClass="NOT_STR",
+                        messageParameters={"arg_name": "subset", "arg_type": type(c).__name__},
+                    )
             jdf = self._jdf.dropDuplicatesWithinWatermark(self._jseq(subset))
         return DataFrame(jdf, self.sparkSession)
 
@@ -1262,8 +1199,8 @@ def dropna(
     ) -> ParentDataFrame:
         if how is not None and how not in ["any", "all"]:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ANY_OR_ALL",
-                message_parameters={"arg_name": "how", "arg_type": how},
+                errorClass="VALUE_NOT_ANY_OR_ALL",
+                messageParameters={"arg_name": "how", "arg_type": how},
             )
 
         if subset is None:
@@ -1272,8 +1209,8 @@ def dropna(
             subset = [subset]
         elif not isinstance(subset, (list, tuple)):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
             )
 
         if thresh is None:
@@ -1300,8 +1237,8 @@ def fillna(
     ) -> ParentDataFrame:
         if not isinstance(value, (float, int, str, bool, dict)):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
-                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+                errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+                messageParameters={"arg_name": "value", "arg_type": type(value).__name__},
             )
 
         # Note that bool validates isinstance(int), but we don't want to
@@ -1319,8 +1256,8 @@ def fillna(
                 subset = [subset]
             elif not isinstance(subset, (list, tuple)):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OR_TUPLE",
-                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                    errorClass="NOT_LIST_OR_TUPLE",
+                    messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
                 )
 
             return DataFrame(self._jdf.na().fill(value, self._jseq(subset)), self.sparkSession)
@@ -1375,8 +1312,8 @@ def replace(  # type: ignore[misc]
                 value = None
             else:
                 raise PySparkTypeError(
-                    error_class="ARGUMENT_REQUIRED",
-                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+                    errorClass="ARGUMENT_REQUIRED",
+                    messageParameters={"arg_name": "value", "condition": "`to_replace` is dict"},
                 )
 
         # Helper functions
@@ -1403,8 +1340,8 @@ def all_of_(xs: Iterable) -> bool:
         valid_types = (bool, float, int, str, list, tuple)
         if not isinstance(to_replace, valid_types + (dict,)):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-                message_parameters={
+                errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                messageParameters={
                     "arg_name": "to_replace",
                     "arg_type": type(to_replace).__name__,
                 },
@@ -1416,8 +1353,8 @@ def all_of_(xs: Iterable) -> bool:
             and not isinstance(to_replace, dict)
         ):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
-                message_parameters={
+                errorClass="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
+                messageParameters={
                     "arg_name": "value",
                     "arg_type": type(value).__name__,
                 },
@@ -1426,8 +1363,8 @@ def all_of_(xs: Iterable) -> bool:
         if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
             if len(to_replace) != len(value):
                 raise PySparkValueError(
-                    error_class="LENGTH_SHOULD_BE_THE_SAME",
-                    message_parameters={
+                    errorClass="LENGTH_SHOULD_BE_THE_SAME",
+                    messageParameters={
                         "arg1": "to_replace",
                         "arg2": "value",
                         "arg1_length": str(len(to_replace)),
@@ -1437,8 +1374,8 @@ def all_of_(xs: Iterable) -> bool:
 
         if not (subset is None or isinstance(subset, (list, tuple, str))):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
             )
 
         # Reshape input arguments if necessary
@@ -1464,8 +1401,8 @@ def all_of_(xs: Iterable) -> bool:
             for all_of_type in [all_of_bool, all_of_str, all_of_numeric]
         ):
             raise PySparkValueError(
-                error_class="MIXED_TYPE_REPLACEMENT",
-                message_parameters={},
+                errorClass="MIXED_TYPE_REPLACEMENT",
+                messageParameters={},
             )
 
         if subset is None:
@@ -1502,8 +1439,8 @@ def approxQuantile(
     ) -> Union[List[float], List[List[float]]]:
         if not isinstance(col, (str, list, tuple)):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
 
         isStr = isinstance(col, str)
@@ -1516,8 +1453,8 @@ def approxQuantile(
         for c in col:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
+                    errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+                    messageParameters={
                         "arg_name": "col",
                         "arg_type": type(col).__name__,
                         "allowed_types": "str",
@@ -1528,8 +1465,8 @@ def approxQuantile(
 
         if not isinstance(probabilities, (list, tuple)):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={
                     "arg_name": "probabilities",
                     "arg_type": type(probabilities).__name__,
                 },
@@ -1539,8 +1476,8 @@ def approxQuantile(
         for p in probabilities:
             if not isinstance(p, (float, int)) or p < 0 or p > 1:
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_FLOAT_OR_INT",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_FLOAT_OR_INT",
+                    messageParameters={
                         "arg_name": "probabilities",
                         "arg_type": type(p).__name__,
                     },
@@ -1549,16 +1486,16 @@ def approxQuantile(
 
         if not isinstance(relativeError, (float, int)):
             raise PySparkTypeError(
-                error_class="NOT_FLOAT_OR_INT",
-                message_parameters={
+                errorClass="NOT_FLOAT_OR_INT",
+                messageParameters={
                     "arg_name": "relativeError",
                     "arg_type": type(relativeError).__name__,
                 },
             )
         if relativeError < 0:
             raise PySparkValueError(
-                error_class="NEGATIVE_VALUE",
-                message_parameters={
+                errorClass="NEGATIVE_VALUE",
+                messageParameters={
                     "arg_name": "relativeError",
                     "arg_value": str(relativeError),
                 },
@@ -1572,46 +1509,46 @@ def approxQuantile(
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         if not method:
             method = "pearson"
         if not method == "pearson":
             raise PySparkValueError(
-                error_class="VALUE_NOT_PEARSON",
-                message_parameters={"arg_name": "method", "arg_value": method},
+                errorClass="VALUE_NOT_PEARSON",
+                messageParameters={"arg_name": "method", "arg_value": method},
             )
         return self._jdf.stat().corr(col1, col2, method)
 
     def cov(self, col1: str, col2: str) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         return self._jdf.stat().cov(col1, col2)
 
     def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         return DataFrame(self._jdf.stat().crosstab(col1, col2), self.sparkSession)
 
@@ -1622,8 +1559,8 @@ def freqItems(
             cols = list(cols)
         if not isinstance(cols, list):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "cols", "arg_type": type(cols).__name__},
             )
         if not support:
             support = 0.01
@@ -1654,8 +1591,8 @@ def withColumns(self, *colsMap: Dict[str, Column]) -> ParentDataFrame:
 
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
             )
 
         col_names = list(colsMap.keys())
@@ -1669,8 +1606,8 @@ def withColumns(self, *colsMap: Dict[str, Column]) -> ParentDataFrame:
     def withColumn(self, colName: str, col: Column) -> ParentDataFrame:
         if not isinstance(col, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
         return DataFrame(self._jdf.withColumn(colName, col._jc), self.sparkSession)
 
@@ -1680,8 +1617,8 @@ def withColumnRenamed(self, existing: str, new: str) -> ParentDataFrame:
     def withColumnsRenamed(self, colsMap: Dict[str, str]) -> ParentDataFrame:
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
             )
 
         col_names: List[str] = []
@@ -1702,8 +1639,8 @@ def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> ParentDataF
 
         if not isinstance(metadata, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
             )
         sc = get_active_spark_context()
         jmeta = cast(JVMView, sc._jvm).org.apache.spark.sql.types.Metadata.fromJson(
@@ -1730,8 +1667,8 @@ def drop(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
                 java_columns.append(c._jc)
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "col", "arg_type": type(c).__name__},
+                    errorClass="NOT_COLUMN_OR_STR",
+                    messageParameters={"arg_name": "col", "arg_type": type(c).__name__},
                 )
 
         jdf = self._jdf
@@ -1747,8 +1684,8 @@ def toDF(self, *cols: str) -> ParentDataFrame:
         for col in cols:
             if not isinstance(col, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(col).__name__},
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(col).__name__},
                 )
         jdf = self._jdf.toDF(self._jseq(cols))
         return DataFrame(jdf, self.sparkSession)
@@ -1765,8 +1702,8 @@ def transform(
     def sameSemantics(self, other: ParentDataFrame) -> bool:
         if not isinstance(other, DataFrame):
             raise PySparkTypeError(
-                error_class="NOT_DATAFRAME",
-                message_parameters={"arg_name": "other", "arg_type": type(other).__name__},
+                errorClass="NOT_DATAFRAME",
+                messageParameters={"arg_name": "other", "arg_type": type(other).__name__},
             )
         return self._jdf.sameSemantics(other._jdf)
 
@@ -1776,7 +1713,7 @@ def semanticHash(self) -> int:
     def inputFiles(self) -> List[str]:
         return list(self._jdf.inputFiles())
 
-    def where(self, condition: "ColumnOrName") -> ParentDataFrame:
+    def where(self, condition: Union[Column, str]) -> ParentDataFrame:
         return self.filter(condition)
 
     # Two aliases below were added for pandas compatibility many years ago.
@@ -1795,10 +1732,13 @@ def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> "Grouped
     def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
         return self.groupBy(*cols)
 
-    def writeTo(self, table: str) -> DataFrameWriterV2:
+    def drop_duplicates(self, subset: Optional[List[str]] = None) -> ParentDataFrame:
+        return self.dropDuplicates(subset)
+
+    def writeTo(self, table: str) -> "DataFrameWriterV2":
         return DataFrameWriterV2(self, table)
 
-    def mergeInto(self, table: str, condition: Column) -> MergeIntoWriter:
+    def mergeInto(self, table: str, condition: Column) -> "MergeIntoWriter":
         return MergeIntoWriter(self, table, condition)
 
     def pandas_api(
@@ -1840,13 +1780,31 @@ def toArrow(self) -> "pa.Table":
     def toPandas(self) -> "PandasDataFrameLike":
         return PandasConversionMixin.toPandas(self)
 
+    def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> ParentDataFrame:
+        if indexColumn is not None:
+            return DataFrame(self._jdf.transpose(_to_java_column(indexColumn)), self.sparkSession)
+        else:
+            return DataFrame(self._jdf.transpose(), self.sparkSession)
+
+    def scalar(self) -> Column:
+        return Column(self._jdf.scalar())
+
+    def exists(self) -> Column:
+        return Column(self._jdf.exists())
+
     @property
     def executionInfo(self) -> Optional["ExecutionInfo"]:
         raise PySparkValueError(
-            error_class="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
-            message_parameters={"member": "queryExecution"},
+            errorClass="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
+            messageParameters={"member": "queryExecution"},
         )
 
+    @property
+    def plot(self) -> "PySparkPlotAccessor":  # type: ignore[name-defined] # noqa: F821
+        from pyspark.sql.plot import PySparkPlotAccessor
+
+        return PySparkPlotAccessor(self)
+
 
 class DataFrameNaFunctions(ParentDataFrameNaFunctions):
     def __init__(self, df: ParentDataFrame):
diff --git a/python/pyspark/sql/classic/window.py b/python/pyspark/sql/classic/window.py
index b5c528eec10a1..63e9a337c0c2e 100644
--- a/python/pyspark/sql/classic/window.py
+++ b/python/pyspark/sql/classic/window.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 import sys
-from typing import cast, Iterable, List, Tuple, TYPE_CHECKING, Union
+from typing import cast, Iterable, Sequence, Tuple, TYPE_CHECKING, Union
 
 from pyspark.sql.window import (
     Window as ParentWindow,
@@ -25,13 +25,15 @@
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
-    from pyspark.sql._typing import ColumnOrName, ColumnOrName_
+    from pyspark.sql._typing import ColumnOrName
 
 
 __all__ = ["Window", "WindowSpec"]
 
 
-def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> "JavaObject":
+def _to_java_cols(
+    cols: Tuple[Union["ColumnOrName", Sequence["ColumnOrName"]], ...]
+) -> "JavaObject":
     from pyspark.sql.classic.column import _to_seq, _to_java_column
 
     if len(cols) == 1 and isinstance(cols[0], list):
@@ -42,7 +44,7 @@ def _to_java_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]
 
 class Window(ParentWindow):
     @staticmethod
-    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
         from py4j.java_gateway import JVMView
 
         sc = get_active_spark_context()
@@ -52,7 +54,7 @@ def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWi
         return WindowSpec(jspec)
 
     @staticmethod
-    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
         from py4j.java_gateway import JVMView
 
         sc = get_active_spark_context()
@@ -99,10 +101,12 @@ def __new__(cls, jspec: "JavaObject") -> "WindowSpec":
     def __init__(self, jspec: "JavaObject") -> None:
         self._jspec = jspec
 
-    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def partitionBy(
+        self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]
+    ) -> ParentWindowSpec:
         return WindowSpec(self._jspec.partitionBy(_to_java_cols(cols)))
 
-    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def orderBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> ParentWindowSpec:
         return WindowSpec(self._jspec.orderBy(_to_java_cols(cols)))
 
     def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
diff --git a/python/pyspark/sql/column.py b/python/pyspark/sql/column.py
index 4ea621b626bb8..285d30fad3bc2 100644
--- a/python/pyspark/sql/column.py
+++ b/python/pyspark/sql/column.py
@@ -315,8 +315,8 @@ def __ror__(
     @dispatch_col_method
     def __contains__(self, item: Any) -> None:
         raise PySparkValueError(
-            error_class="CANNOT_APPLY_IN_FOR_COLUMN",
-            message_parameters={},
+            errorClass="CANNOT_APPLY_IN_FOR_COLUMN",
+            messageParameters={},
         )
 
     # bitwise operators
@@ -1521,6 +1521,24 @@ def over(self, window: "WindowSpec") -> "Column":
         """
         ...
 
+    @dispatch_col_method
+    def outer(self) -> "Column":
+        """
+        Mark this column as an outer column if its expression refers to columns from an outer query.
+
+        This is used to trigger lazy analysis of Spark Classic DataFrame, so that we can use it
+        to build subquery expressions. Spark Connect DataFrame is always lazily analyzed and
+        does not need to use this function.
+
+        .. versionadded:: 4.0.0
+
+        See Also
+        --------
+        pyspark.sql.dataframe.DataFrame.scalar
+        pyspark.sql.dataframe.DataFrame.exists
+        """
+        ...
+
     @dispatch_col_method
     def __nonzero__(self) -> None:
         ...
diff --git a/python/pyspark/sql/conf.py b/python/pyspark/sql/conf.py
index b718f779a1797..9a4cc2e7e1628 100644
--- a/python/pyspark/sql/conf.py
+++ b/python/pyspark/sql/conf.py
@@ -136,8 +136,8 @@ def _check_type(self, obj: Any, identifier: str) -> None:
         """Assert that an object is of type str."""
         if not isinstance(obj, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={
+                errorClass="NOT_STR",
+                messageParameters={
                     "arg_name": identifier,
                     "arg_type": type(obj).__name__,
                 },
diff --git a/python/pyspark/sql/connect/_typing.py b/python/pyspark/sql/connect/_typing.py
index 806476af1eb60..efb3e0e8eb507 100644
--- a/python/pyspark/sql/connect/_typing.py
+++ b/python/pyspark/sql/connect/_typing.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 from types import FunctionType
-from typing import Any, Callable, Iterable, Union, Optional, NewType, Protocol, Tuple, TypeVar
+from typing import Any, Callable, Iterable, Union, Optional, NewType, Protocol, Tuple
 import datetime
 import decimal
 
@@ -28,7 +28,6 @@
 
 
 ColumnOrName = Union[Column, str]
-ColumnOrName_ = TypeVar("ColumnOrName_", bound=ColumnOrName)
 
 ColumnOrNameOrOrdinal = Union[Column, str, int]
 
diff --git a/python/pyspark/sql/connect/avro/functions.py b/python/pyspark/sql/connect/avro/functions.py
index da350f92a531a..b26c29343d883 100644
--- a/python/pyspark/sql/connect/avro/functions.py
+++ b/python/pyspark/sql/connect/avro/functions.py
@@ -39,21 +39,21 @@ def from_avro(
 ) -> Column:
     if not isinstance(data, (Column, str)):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={
+            errorClass="INVALID_TYPE",
+            messageParameters={
                 "arg_name": "data",
                 "arg_type": "pyspark.sql.Column or str",
             },
         )
     if not isinstance(jsonFormatSchema, str):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
         )
     if options is not None and not isinstance(options, dict):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "options", "arg_type": "dict, optional"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "options", "arg_type": "dict, optional"},
         )
 
     if options is None:
@@ -70,16 +70,16 @@ def from_avro(
 def to_avro(data: "ColumnOrName", jsonFormatSchema: str = "") -> Column:
     if not isinstance(data, (Column, str)):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={
+            errorClass="INVALID_TYPE",
+            messageParameters={
                 "arg_name": "data",
                 "arg_type": "pyspark.sql.Column or str",
             },
         )
     if not isinstance(jsonFormatSchema, str):
         raise PySparkTypeError(
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "jsonFormatSchema", "arg_type": "str"},
         )
 
     if jsonFormatSchema == "":
diff --git a/python/pyspark/sql/connect/catalog.py b/python/pyspark/sql/connect/catalog.py
index 4194c71a25c6b..85bf3caaf94d7 100644
--- a/python/pyspark/sql/connect/catalog.py
+++ b/python/pyspark/sql/connect/catalog.py
@@ -208,6 +208,7 @@ def listColumns(self, tableName: str, dbName: Optional[str] = None) -> List[Colu
                 nullable=table[3][i].as_py(),
                 isPartition=table[4][i].as_py(),
                 isBucket=table[5][i].as_py(),
+                isCluster=table[6][i].as_py(),
             )
             for i in range(table.num_rows)
         ]
@@ -233,7 +234,7 @@ def createExternalTable(
         source: Optional[str] = None,
         schema: Optional[StructType] = None,
         **options: str,
-    ) -> DataFrame:
+    ) -> "DataFrame":
         warnings.warn(
             "createExternalTable is deprecated since Spark 4.0, please use createTable instead.",
             FutureWarning,
@@ -250,11 +251,11 @@ def createTable(
         schema: Optional[StructType] = None,
         description: Optional[str] = None,
         **options: str,
-    ) -> DataFrame:
+    ) -> "DataFrame":
         if schema is not None and not isinstance(schema, StructType):
             raise PySparkTypeError(
-                error_class="NOT_STRUCT",
-                message_parameters={
+                errorClass="NOT_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
diff --git a/python/pyspark/sql/connect/client/__init__.py b/python/pyspark/sql/connect/client/__init__.py
index 38523352e5b4a..40c05d4905c76 100644
--- a/python/pyspark/sql/connect/client/__init__.py
+++ b/python/pyspark/sql/connect/client/__init__.py
@@ -20,4 +20,4 @@
 check_dependencies(__name__)
 
 from pyspark.sql.connect.client.core import *  # noqa: F401,F403
-from pyspark.sql.connect.client.logging import getLogLevel  # noqa: F401
+from pyspark.sql.connect.logging import getLogLevel  # noqa: F401
diff --git a/python/pyspark/sql/connect/client/artifact.py b/python/pyspark/sql/connect/client/artifact.py
index 46c2b2750ba5e..ac33233a00ff3 100644
--- a/python/pyspark/sql/connect/client/artifact.py
+++ b/python/pyspark/sql/connect/client/artifact.py
@@ -16,7 +16,7 @@
 #
 from pyspark.errors import PySparkRuntimeError, PySparkValueError
 from pyspark.sql.connect.utils import check_dependencies
-from pyspark.sql.connect.client.logging import logger
+from pyspark.sql.connect.logging import logger
 
 check_dependencies(__name__)
 
@@ -114,8 +114,8 @@ def size(self) -> int:
             return self.storage.size
         else:
             raise PySparkRuntimeError(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"operation": f"{self.storage} storage"},
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={"operation": f"{self.storage} storage"},
             )
 
 
@@ -219,8 +219,8 @@ def _parse_artifacts(
                     # This has a limitation - hash(#) in the file name would not work.
                     if "#" in local_path:
                         raise PySparkValueError(
-                            error_class="VALUE_ALLOWED",
-                            message_parameters={
+                            errorClass="VALUE_ALLOWED",
+                            messageParameters={
                                 "arg_name": "artifact path",
                                 "disallowed_value": "#",
                             },
@@ -234,13 +234,13 @@ def _parse_artifacts(
                 artifact = new_jar_artifact(name, LocalFile(local_path))
             else:
                 raise PySparkRuntimeError(
-                    error_class="UNSUPPORTED_OPERATION",
-                    message_parameters={"operation": f"{local_path} file format"},
+                    errorClass="UNSUPPORTED_OPERATION",
+                    messageParameters={"operation": f"{local_path} file format"},
                 )
             return [artifact]
         raise PySparkRuntimeError(
-            error_class="UNSUPPORTED_OPERATION",
-            message_parameters={"operation": f"{parsed.scheme} scheme"},
+            errorClass="UNSUPPORTED_OPERATION",
+            messageParameters={"operation": f"{parsed.scheme} scheme"},
         )
 
     def _parse_forward_to_fs_artifacts(self, local_path: str, dest_path: str) -> List[Artifact]:
diff --git a/python/pyspark/sql/connect/client/core.py b/python/pyspark/sql/connect/client/core.py
index 9ad14ed2c5f4d..78d4e0fc1c4f4 100644
--- a/python/pyspark/sql/connect/client/core.py
+++ b/python/pyspark/sql/connect/client/core.py
@@ -27,6 +27,7 @@
 import logging
 import threading
 import os
+import copy
 import platform
 import urllib.parse
 import uuid
@@ -42,6 +43,7 @@
     Dict,
     Set,
     NoReturn,
+    Mapping,
     cast,
     TYPE_CHECKING,
     Type,
@@ -64,7 +66,7 @@
 from pyspark.resource.information import ResourceInformation
 from pyspark.sql.metrics import MetricValue, PlanMetrics, ExecutionInfo, ObservedMetrics
 from pyspark.sql.connect.client.artifact import ArtifactManager
-from pyspark.sql.connect.client.logging import logger
+from pyspark.sql.connect.logging import logger
 from pyspark.sql.connect.profiler import ConnectProfilerCollector
 from pyspark.sql.connect.client.reattach import ExecutePlanResponseReattachableIterator
 from pyspark.sql.connect.client.retries import RetryPolicy, Retrying, DefaultPolicy
@@ -193,7 +195,7 @@ def _insecure_channel(self, target: Any, **kwargs: Any) -> grpc.Channel:
         channel = grpc.insecure_channel(target, options=self._channel_options, **kwargs)
 
         if len(self._interceptors) > 0:
-            logger.info(f"Applying interceptors ({self._interceptors})")
+            logger.debug(f"Applying interceptors ({self._interceptors})")
             channel = grpc.intercept_channel(channel, *self._interceptors)
         return channel
 
@@ -201,7 +203,7 @@ def _secure_channel(self, target: Any, credentials: Any, **kwargs: Any) -> grpc.
         channel = grpc.secure_channel(target, credentials, options=self._channel_options, **kwargs)
 
         if len(self._interceptors) > 0:
-            logger.info(f"Applying interceptors ({self._interceptors})")
+            logger.debug(f"Applying interceptors ({self._interceptors})")
             channel = grpc.intercept_channel(channel, *self._interceptors)
         return channel
 
@@ -254,8 +256,8 @@ def session_id(self) -> Optional[str]:
                 uuid.UUID(session_id, version=4)
             except ValueError as ve:
                 raise PySparkValueError(
-                    error_class="INVALID_SESSION_UUID_ID",
-                    message_parameters={"arg_name": "session_id", "origin": str(ve)},
+                    errorClass="INVALID_SESSION_UUID_ID",
+                    messageParameters={"arg_name": "session_id", "origin": str(ve)},
                 )
         return session_id
 
@@ -352,8 +354,8 @@ def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = N
         # Explicitly check the scheme of the URL.
         if url[:5] != "sc://":
             raise PySparkValueError(
-                error_class="INVALID_CONNECT_URL",
-                message_parameters={
+                errorClass="INVALID_CONNECT_URL",
+                messageParameters={
                     "detail": "The URL must start with 'sc://'. Please update the URL to "
                     "follow the correct format, e.g., 'sc://hostname:port'.",
                 },
@@ -364,8 +366,8 @@ def __init__(self, url: str, channelOptions: Optional[List[Tuple[str, Any]]] = N
         self.url = urllib.parse.urlparse(tmp_url)
         if len(self.url.path) > 0 and self.url.path != "/":
             raise PySparkValueError(
-                error_class="INVALID_CONNECT_URL",
-                message_parameters={
+                errorClass="INVALID_CONNECT_URL",
+                messageParameters={
                     "detail": f"The path component '{self.url.path}' must be empty. Please update "
                     f"the URL to follow the correct format, e.g., 'sc://hostname:port'.",
                 },
@@ -379,8 +381,8 @@ def _extract_attributes(self) -> None:
                 kv = p.split("=")
                 if len(kv) != 2:
                     raise PySparkValueError(
-                        error_class="INVALID_CONNECT_URL",
-                        message_parameters={
+                        errorClass="INVALID_CONNECT_URL",
+                        messageParameters={
                             "detail": f"Parameter '{p}' should be provided as a "
                             f"key-value pair separated by an equal sign (=). Please update "
                             f"the parameter to follow the correct format, e.g., 'key=value'.",
@@ -397,8 +399,8 @@ def _extract_attributes(self) -> None:
             self._port = int(netloc[1])
         else:
             raise PySparkValueError(
-                error_class="INVALID_CONNECT_URL",
-                message_parameters={
+                errorClass="INVALID_CONNECT_URL",
+                messageParameters={
                     "detail": f"Target destination '{self.url.netloc}' should match the "
                     f"'<host>:<port>' pattern. Please update the destination to follow "
                     f"the correct format, e.g., 'hostname:port'.",
@@ -845,7 +847,7 @@ def _build_metrics(self, metrics: "pb2.ExecutePlanResponse.Metrics") -> Iterator
         )
 
     def _resources(self) -> Dict[str, ResourceInformation]:
-        logger.info("Fetching the resources")
+        logger.debug("Fetching the resources")
         cmd = pb2.Command()
         cmd.get_resources_command.SetInParent()
         (_, properties, _) = self.execute_command(cmd)
@@ -863,8 +865,10 @@ def to_table_as_iterator(
         """
         Return given plan as a PyArrow Table iterator.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"Executing plan {self._proto_to_string(plan, True)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
         with Progress(handlers=self._progress_handlers, operation_id=req.operation_id) as progress:
@@ -880,8 +884,10 @@ def to_table(
         """
         Return given plan as a PyArrow Table.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"Executing plan {self._proto_to_string(plan, True)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
         table, schema, metrics, observed_metrics, _ = self._execute_and_fetch(req, observations)
@@ -897,8 +903,10 @@ def to_pandas(
         """
         Return given plan as a pandas DataFrame.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Executing plan {self._proto_to_string(plan)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"Executing plan {self._proto_to_string(plan, True)}")
         req = self._execute_plan_request_with_metadata()
         req.plan.CopyFrom(plan)
         (self_destruct_conf,) = self.get_config_with_defaults(
@@ -978,7 +986,7 @@ def to_pandas(
             pdf.attrs["observed_metrics"] = observed_metrics
         return pdf, ei
 
-    def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
+    def _proto_to_string(self, p: google.protobuf.message.Message, truncate: bool = False) -> str:
         """
         Helper method to generate a one line string representation of the plan.
 
@@ -986,22 +994,83 @@ def _proto_to_string(self, p: google.protobuf.message.Message) -> str:
         ----------
         p : google.protobuf.message.Message
             Generic Message type
+        truncate: bool
+            Indicates whether to truncate the message
 
         Returns
         -------
         Single line string of the serialized proto message.
         """
         try:
-            return text_format.MessageToString(p, as_one_line=True)
+            max_level = 8 if truncate else sys.maxsize
+            p2 = self._truncate(p, max_level) if truncate else p
+            return text_format.MessageToString(p2, as_one_line=True)
         except RecursionError:
             return "<Truncated message due to recursion error>"
+        except Exception:
+            return "<Truncated message due to truncation error>"
+
+    def _truncate(
+        self, p: google.protobuf.message.Message, allowed_recursion_depth: int
+    ) -> google.protobuf.message.Message:
+        """
+        Helper method to truncate the protobuf message.
+        Refer to 'org.apache.spark.sql.connect.common.Abbreviator' in the server side.
+        """
+
+        def truncate_str(s: str) -> str:
+            if len(s) > 1024:
+                return s[:1024] + "[truncated]"
+            return s
+
+        def truncate_bytes(b: bytes) -> bytes:
+            if len(b) > 8:
+                return b[:8] + b"[truncated]"
+            return b
+
+        p2 = copy.deepcopy(p)
+
+        for descriptor, value in p.ListFields():
+            if value is not None:
+                field_name = descriptor.name
+
+                if descriptor.type == descriptor.TYPE_MESSAGE:
+                    if allowed_recursion_depth == 0:
+                        p2.ClearField(field_name)
+                    elif descriptor.label == descriptor.LABEL_REPEATED:
+                        p2.ClearField(field_name)
+                        getattr(p2, field_name).extend(
+                            [self._truncate(v, allowed_recursion_depth - 1) for v in value]
+                        )
+                    else:
+                        getattr(p2, field_name).CopyFrom(
+                            self._truncate(value, allowed_recursion_depth - 1)
+                        )
+
+                elif descriptor.type == descriptor.TYPE_STRING:
+                    if descriptor.label == descriptor.LABEL_REPEATED:
+                        p2.ClearField(field_name)
+                        getattr(p2, field_name).extend([truncate_str(v) for v in value])
+                    else:
+                        setattr(p2, field_name, truncate_str(value))
+
+                elif descriptor.type == descriptor.TYPE_BYTES:
+                    if descriptor.label == descriptor.LABEL_REPEATED:
+                        p2.ClearField(field_name)
+                        getattr(p2, field_name).extend([truncate_bytes(v) for v in value])
+                    else:
+                        setattr(p2, field_name, truncate_bytes(value))
+
+        return p2
 
     def schema(self, plan: pb2.Plan) -> StructType:
         """
         Return schema for given plan.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Schema for plan: {self._proto_to_string(plan)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"Schema for plan: {self._proto_to_string(plan, True)}")
         schema = self._analyze(method="schema", plan=plan).schema
         assert schema is not None
         # Server side should populate the struct field which is the schema.
@@ -1012,8 +1081,12 @@ def explain_string(self, plan: pb2.Plan, explain_mode: str = "extended") -> str:
         """
         Return explain string for given plan.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Explain (mode={explain_mode}) for plan {self._proto_to_string(plan)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(
+                f"Explain (mode={explain_mode}) for plan {self._proto_to_string(plan, True)}"
+            )
         result = self._analyze(
             method="explain", plan=plan, explain_mode=explain_mode
         ).explain_string
@@ -1026,8 +1099,10 @@ def execute_command(
         """
         Execute given command.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Execute command for command {self._proto_to_string(command)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"Execute command for command {self._proto_to_string(command, True)}")
         req = self._execute_plan_request_with_metadata()
         if self._user_id:
             req.user_context.user_id = self._user_id
@@ -1048,8 +1123,12 @@ def execute_command_as_iterator(
         """
         Execute given command. Similar to execute_command, but the value is returned using yield.
         """
-        if logger.isEnabledFor(logging.INFO):
-            logger.info(f"Execute command as iterator for command {self._proto_to_string(command)}")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(
+                f"Execute command as iterator for command {self._proto_to_string(command, True)}"
+            )
         req = self._execute_plan_request_with_metadata()
         if self._user_id:
             req.user_context.user_id = self._user_id
@@ -1059,8 +1138,8 @@ def execute_command_as_iterator(
                 yield response
             else:
                 raise PySparkValueError(
-                    error_class="UNKNOWN_RESPONSE",
-                    message_parameters={
+                    errorClass="UNKNOWN_RESPONSE",
+                    messageParameters={
                         "response": str(response),
                     },
                 )
@@ -1149,8 +1228,8 @@ def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
             explain_mode = kwargs.get("explain_mode")
             if explain_mode not in ["simple", "extended", "codegen", "cost", "formatted"]:
                 raise PySparkValueError(
-                    error_class="UNKNOWN_EXPLAIN_MODE",
-                    message_parameters={
+                    errorClass="UNKNOWN_EXPLAIN_MODE",
+                    messageParameters={
                         "explain_mode": str(explain_mode),
                     },
                 )
@@ -1207,8 +1286,8 @@ def _analyze(self, method: str, **kwargs: Any) -> AnalyzeResult:
             req.get_storage_level.relation.CopyFrom(cast(pb2.Relation, kwargs.get("relation")))
         else:
             raise PySparkValueError(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={
                     "operation": method,
                 },
             )
@@ -1233,7 +1312,7 @@ def _execute(self, req: pb2.ExecutePlanRequest) -> None:
             Proto representation of the plan.
 
         """
-        logger.info("Execute")
+        logger.debug("Execute")
 
         def handle_response(b: pb2.ExecutePlanResponse) -> None:
             self._verify_response_integrity(b)
@@ -1268,7 +1347,10 @@ def _execute_and_fetch_as_iterator(
             Dict[str, Any],
         ]
     ]:
-        logger.info("ExecuteAndFetchAsIterator")
+        if logger.isEnabledFor(logging.DEBUG):
+            # inside an if statement to not incur a performance cost converting proto to string
+            # when not at debug log level.
+            logger.debug(f"ExecuteAndFetchAsIterator. Request: {self._proto_to_string(req)}")
 
         num_records = 0
 
@@ -1287,6 +1369,13 @@ def handle_response(
             nonlocal num_records
             # The session ID is the local session ID and should match what we expect.
             self._verify_response_integrity(b)
+            if logger.isEnabledFor(logging.DEBUG):
+                # inside an if statement to not incur a performance cost converting proto to string
+                # when not at debug log level.
+                logger.debug(
+                    f"ExecuteAndFetchAsIterator. Response received: {self._proto_to_string(b)}"
+                )
+
             if b.HasField("metrics"):
                 logger.debug("Received metric batch.")
                 yield from self._build_metrics(b.metrics)
@@ -1417,7 +1506,7 @@ def _execute_and_fetch(
         List[PlanObservedMetrics],
         Dict[str, Any],
     ]:
-        logger.info("ExecuteAndFetch")
+        logger.debug("ExecuteAndFetch")
 
         observed_metrics: List[PlanObservedMetrics] = []
         metrics: List[PlanMetrics] = []
@@ -1441,8 +1530,8 @@ def _execute_and_fetch(
                     properties.update(**response)
                 else:
                     raise PySparkValueError(
-                        error_class="UNKNOWN_RESPONSE",
-                        message_parameters={
+                        errorClass="UNKNOWN_RESPONSE",
+                        messageParameters={
                             "response": response,
                         },
                     )
@@ -1488,6 +1577,10 @@ def get_configs(self, *keys: str) -> Tuple[Optional[str], ...]:
         configs = dict(self.config(op).pairs)
         return tuple(configs.get(key) for key in keys)
 
+    def get_config_dict(self, *keys: str) -> Mapping[str, Optional[str]]:
+        op = pb2.ConfigRequest.Operation(get=pb2.ConfigRequest.Get(keys=keys))
+        return dict(self.config(op).pairs)
+
     def get_config_with_defaults(
         self, *pairs: Tuple[str, Optional[str]]
     ) -> Tuple[Optional[str], ...]:
@@ -1546,8 +1639,8 @@ def _interrupt_request(
             req.operation_id = id_or_tag
         else:
             raise PySparkValueError(
-                error_class="UNKNOWN_INTERRUPT_TYPE",
-                message_parameters={
+                errorClass="UNKNOWN_INTERRUPT_TYPE",
+                messageParameters={
                     "interrupt_type": str(interrupt_type),
                 },
             )
@@ -1635,20 +1728,20 @@ def _throw_if_invalid_tag(self, tag: str) -> None:
         spark_job_tags_sep = ","
         if tag is None:
             raise PySparkValueError(
-                error_class="CANNOT_BE_NONE", message_paramters={"arg_name": "Spark Connect tag"}
+                errorClass="CANNOT_BE_NONE", message_paramters={"arg_name": "Spark Connect tag"}
             )
         if spark_job_tags_sep in tag:
             raise PySparkValueError(
-                error_class="VALUE_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_ALLOWED",
+                messageParameters={
                     "arg_name": "Spark Connect tag",
                     "disallowed_value": spark_job_tags_sep,
                 },
             )
         if len(tag) == 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_NON_EMPTY_STR",
-                message_parameters={"arg_name": "Spark Connect tag", "arg_value": tag},
+                errorClass="VALUE_NOT_NON_EMPTY_STR",
+                messageParameters={"arg_name": "Spark Connect tag", "arg_value": tag},
             )
 
     def _handle_error(self, error: Exception) -> NoReturn:
@@ -1677,7 +1770,7 @@ def _handle_error(self, error: Exception) -> NoReturn:
             elif isinstance(error, ValueError):
                 if "Cannot invoke RPC" in str(error) and "closed" in str(error):
                     raise SparkConnectException(
-                        error_class="NO_ACTIVE_SESSION", message_parameters=dict()
+                        errorClass="NO_ACTIVE_SESSION", messageParameters=dict()
                     ) from None
             raise error
         finally:
@@ -1698,7 +1791,7 @@ def _fetch_enriched_error(self, info: "ErrorInfo") -> Optional[pb2.FetchErrorDet
             req.user_context.user_id = self._user_id
 
         try:
-            return self._stub.FetchErrorDetails(req)
+            return self._stub.FetchErrorDetails(req, metadata=self._builder.metadata())
         except grpc.RpcError:
             return None
 
@@ -1744,6 +1837,7 @@ def _handle_rpc_error(self, rpc_error: grpc.RpcError) -> NoReturn:
                 if d.Is(error_details_pb2.ErrorInfo.DESCRIPTOR):
                     info = error_details_pb2.ErrorInfo()
                     d.Unpack(info)
+                    logger.debug(f"Received ErrorInfo: {info}")
 
                     if info.metadata["errorClass"] == "INVALID_HANDLE.SESSION_CHANGED":
                         self._closed = True
@@ -1823,7 +1917,7 @@ def _verify_response_integrity(
 
     def _create_profile(self, profile: pb2.ResourceProfile) -> int:
         """Create the ResourceProfile on the server side and return the profile ID"""
-        logger.info("Creating the ResourceProfile")
+        logger.debug("Creating the ResourceProfile")
         cmd = pb2.Command()
         cmd.create_resource_profile_command.profile.CopyFrom(profile)
         (_, properties, _) = self.execute_command(cmd)
diff --git a/python/pyspark/sql/connect/client/reattach.py b/python/pyspark/sql/connect/client/reattach.py
index 82c7ae9772188..91b7aa125920d 100644
--- a/python/pyspark/sql/connect/client/reattach.py
+++ b/python/pyspark/sql/connect/client/reattach.py
@@ -20,16 +20,16 @@
 check_dependencies(__name__)
 
 from threading import RLock
-import warnings
 import uuid
 from collections.abc import Generator
 from typing import Optional, Any, Iterator, Iterable, Tuple, Callable, cast, Type, ClassVar
-from multiprocessing.pool import ThreadPool
+from concurrent.futures import ThreadPoolExecutor
 import os
 
 import grpc
 from grpc_status import rpc_status
 
+from pyspark.sql.connect.logging import logger
 import pyspark.sql.connect.proto as pb2
 import pyspark.sql.connect.proto.base_pb2_grpc as grpc_lib
 from pyspark.errors import PySparkRuntimeError
@@ -58,19 +58,17 @@ class ExecutePlanResponseReattachableIterator(Generator):
 
     # Lock to manage the pool
     _lock: ClassVar[RLock] = RLock()
-    _release_thread_pool_instance: Optional[ThreadPool] = None
+    _release_thread_pool_instance: Optional[ThreadPoolExecutor] = None
 
-    @classmethod  # type: ignore[misc]
-    @property
-    def _release_thread_pool(cls) -> ThreadPool:
+    @classmethod
+    def _get_or_create_release_thread_pool(cls) -> ThreadPoolExecutor:
         # Perform a first check outside the critical path.
         if cls._release_thread_pool_instance is not None:
             return cls._release_thread_pool_instance
         with cls._lock:
             if cls._release_thread_pool_instance is None:
-                cls._release_thread_pool_instance = ThreadPool(
-                    os.cpu_count() if os.cpu_count() else 8
-                )
+                max_workers = os.cpu_count() or 8
+                cls._release_thread_pool_instance = ThreadPoolExecutor(max_workers=max_workers)
             return cls._release_thread_pool_instance
 
     @classmethod
@@ -81,8 +79,7 @@ def shutdown(cls: Type["ExecutePlanResponseReattachableIterator"]) -> None:
         """
         with cls._lock:
             if cls._release_thread_pool_instance is not None:
-                cls._release_thread_pool.close()  # type: ignore[attr-defined]
-                cls._release_thread_pool.join()  # type: ignore[attr-defined]
+                cls._get_or_create_release_thread_pool().shutdown()
                 cls._release_thread_pool_instance = None
 
     def __init__(
@@ -132,6 +129,10 @@ def __init__(
         # Current item from this iterator.
         self._current: Optional[pb2.ExecutePlanResponse] = None
 
+    @property
+    def _release_thread_pool(self) -> ThreadPoolExecutor:
+        return self._get_or_create_release_thread_pool()
+
     def send(self, value: Any) -> pb2.ExecutePlanResponse:
         # will trigger reattach in case the stream completed without result_complete
         if not self._has_next():
@@ -208,11 +209,11 @@ def target() -> None:
                     with attempt:
                         self._stub.ReleaseExecute(request, metadata=self._metadata)
             except Exception as e:
-                warnings.warn(f"ReleaseExecute failed with exception: {e}.")
+                logger.warn(f"ReleaseExecute failed with exception: {e}.")
 
         with self._lock:
             if self._release_thread_pool_instance is not None:
-                self._release_thread_pool.apply_async(target)
+                self._release_thread_pool.submit(target)
 
     def _release_all(self) -> None:
         """
@@ -233,11 +234,11 @@ def target() -> None:
                     with attempt:
                         self._stub.ReleaseExecute(request, metadata=self._metadata)
             except Exception as e:
-                warnings.warn(f"ReleaseExecute failed with exception: {e}.")
+                logger.warn(f"ReleaseExecute failed with exception: {e}.")
 
         with self._lock:
             if self._release_thread_pool_instance is not None:
-                self._release_thread_pool.apply_async(target)
+                self._release_thread_pool.submit(target)
         self._result_complete = True
 
     def _call_iter(self, iter_fun: Callable) -> Any:
@@ -266,8 +267,8 @@ def _call_iter(self, iter_fun: Callable) -> Any:
             ):
                 if self._last_returned_response_id is not None:
                     raise PySparkRuntimeError(
-                        error_class="RESPONSE_ALREADY_RECEIVED",
-                        message_parameters={},
+                        errorClass="RESPONSE_ALREADY_RECEIVED",
+                        messageParameters={},
                     )
                 # Try a new ExecutePlan, and throw upstream for retry.
                 self._iterator = iter(
@@ -284,8 +285,14 @@ def _call_iter(self, iter_fun: Callable) -> Any:
             raise e
 
     def _create_reattach_execute_request(self) -> pb2.ReattachExecuteRequest:
+        server_side_session_id = (
+            None
+            if not self._initial_request.client_observed_server_side_session_id
+            else self._initial_request.client_observed_server_side_session_id
+        )
         reattach = pb2.ReattachExecuteRequest(
             session_id=self._initial_request.session_id,
+            client_observed_server_side_session_id=server_side_session_id,
             user_context=self._initial_request.user_context,
             operation_id=self._initial_request.operation_id,
         )
diff --git a/python/pyspark/sql/connect/client/retries.py b/python/pyspark/sql/connect/client/retries.py
index 44e5e1834a2d7..e27100133b5ae 100644
--- a/python/pyspark/sql/connect/client/retries.py
+++ b/python/pyspark/sql/connect/client/retries.py
@@ -21,7 +21,7 @@
 import typing
 from typing import Optional, Callable, Generator, List, Type
 from types import TracebackType
-from pyspark.sql.connect.client.logging import logger
+from pyspark.sql.connect.logging import logger
 from pyspark.errors import PySparkRuntimeError, RetriesExceeded
 
 """
@@ -203,8 +203,8 @@ def accept_succeeded(self) -> None:
     def _last_exception(self) -> BaseException:
         if self._exception is None:
             raise PySparkRuntimeError(
-                error_class="NO_ACTIVE_EXCEPTION",
-                message_parameters={},
+                errorClass="NO_ACTIVE_EXCEPTION",
+                messageParameters={},
             )
         return self._exception
 
@@ -233,7 +233,7 @@ def _wait(self) -> None:
 
         # Exceeded retries
         logger.debug(f"Given up on retrying. error: {repr(exception)}")
-        raise RetriesExceeded(error_class="RETRIES_EXCEEDED", message_parameters={}) from exception
+        raise RetriesExceeded(errorClass="RETRIES_EXCEEDED", messageParameters={}) from exception
 
     def __iter__(self) -> Generator[AttemptManager, None, None]:
         """
diff --git a/python/pyspark/sql/connect/column.py b/python/pyspark/sql/connect/column.py
index 625f5f13f69a3..e840081146340 100644
--- a/python/pyspark/sql/connect/column.py
+++ b/python/pyspark/sql/connect/column.py
@@ -30,8 +30,14 @@
 )
 
 from pyspark.sql.column import Column as ParentColumn
-from pyspark.errors import PySparkTypeError, PySparkAttributeError, PySparkValueError
+from pyspark.errors import (
+    PySparkTypeError,
+    PySparkAttributeError,
+    PySparkValueError,
+    PySparkNotImplementedError,
+)
 from pyspark.sql.types import DataType
+from pyspark.sql.utils import enum_to_value
 
 import pyspark.sql.connect.proto as proto
 from pyspark.sql.connect.expressions import (
@@ -69,6 +75,7 @@ def _bin_op(
     other: Union[ParentColumn, "LiteralType", "DecimalLiteral", "DateTimeLiteral"],
     reverse: bool = False,
 ) -> ParentColumn:
+    other = enum_to_value(other)
     if other is None or isinstance(
         other,
         (
@@ -113,8 +120,8 @@ def __new__(
     def __init__(self, expr: "Expression") -> None:
         if not isinstance(expr, Expression):
             raise PySparkTypeError(
-                error_class="NOT_EXPRESSION",
-                message_parameters={"arg_name": "expr", "arg_type": type(expr).__name__},
+                errorClass="NOT_EXPRESSION",
+                messageParameters={"arg_name": "expr", "arg_type": type(expr).__name__},
             )
         self._expr = expr
 
@@ -244,8 +251,8 @@ def __ror__(
     # container operators
     def __contains__(self, item: Any) -> None:
         raise PySparkValueError(
-            error_class="CANNOT_APPLY_IN_FOR_COLUMN",
-            message_parameters={},
+            errorClass="CANNOT_APPLY_IN_FOR_COLUMN",
+            messageParameters={},
         )
 
     # bitwise operators
@@ -298,20 +305,20 @@ def endswith(
     def when(self, condition: ParentColumn, value: Any) -> ParentColumn:
         if not isinstance(condition, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
             )
 
         if not isinstance(self._expr, CaseWhen):
             raise PySparkTypeError(
-                error_class="INVALID_WHEN_USAGE",
-                message_parameters={},
+                errorClass="INVALID_WHEN_USAGE",
+                messageParameters={},
             )
 
         if self._expr._else_value is not None:
             raise PySparkTypeError(
-                error_class="INVALID_WHEN_USAGE",
-                message_parameters={},
+                errorClass="INVALID_WHEN_USAGE",
+                messageParameters={},
             )
 
         return Column(
@@ -351,10 +358,13 @@ def ilike(self: ParentColumn, other: str) -> ParentColumn:
     def substr(
         self, startPos: Union[int, ParentColumn], length: Union[int, ParentColumn]
     ) -> ParentColumn:
+        startPos = enum_to_value(startPos)
+        length = enum_to_value(length)
+
         if type(startPos) != type(length):
             raise PySparkTypeError(
-                error_class="NOT_SAME_TYPE",
-                message_parameters={
+                errorClass="NOT_SAME_TYPE",
+                messageParameters={
                     "arg_name1": "startPos",
                     "arg_name2": "length",
                     "arg_type1": type(startPos).__name__,
@@ -367,12 +377,13 @@ def substr(
             start_expr = _to_expr(startPos)
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT",
-                message_parameters={"arg_name": "startPos", "arg_type": type(length).__name__},
+                errorClass="NOT_COLUMN_OR_INT",
+                messageParameters={"arg_name": "startPos", "arg_type": type(length).__name__},
             )
         return Column(UnresolvedFunction("substr", [self._expr, start_expr, length_expr]))
 
     def __eq__(self, other: Any) -> ParentColumn:  # type: ignore[override]
+        other = enum_to_value(other)
         if other is None or isinstance(
             other, (bool, float, int, str, datetime.datetime, datetime.date, decimal.Decimal)
         ):
@@ -413,8 +424,8 @@ def cast(self, dataType: Union[DataType, str]) -> ParentColumn:
             return Column(CastExpression(expr=self._expr, data_type=dataType))
         else:
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
 
     astype = cast
@@ -430,8 +441,8 @@ def try_cast(self, dataType: Union[DataType, str]) -> ParentColumn:
             )
         else:
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={"arg_name": "dataType", "arg_type": type(dataType).__name__},
             )
 
     def __repr__(self) -> str:
@@ -442,12 +453,19 @@ def over(self, window: "WindowSpec") -> ParentColumn:  # type: ignore[override]
 
         if not isinstance(window, WindowSpec):
             raise PySparkTypeError(
-                error_class="NOT_WINDOWSPEC",
-                message_parameters={"arg_name": "window", "arg_type": type(window).__name__},
+                errorClass="NOT_WINDOWSPEC",
+                messageParameters={"arg_name": "window", "arg_type": type(window).__name__},
             )
 
         return Column(WindowExpression(windowFunction=self._expr, windowSpec=window))
 
+    def outer(self) -> ParentColumn:
+        # TODO(SPARK-50134): Implement this method
+        raise PySparkNotImplementedError(
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "outer()"},
+        )
+
     def isin(self, *cols: Any) -> ParentColumn:
         if len(cols) == 1 and isinstance(cols[0], (list, set)):
             _cols = list(cols[0])
@@ -486,14 +504,14 @@ def getField(self, name: Any) -> ParentColumn:
     def withField(self, fieldName: str, col: ParentColumn) -> ParentColumn:
         if not isinstance(fieldName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "fieldName", "arg_type": type(fieldName).__name__},
             )
 
         if not isinstance(col, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
 
         return Column(WithField(self._expr, fieldName, col._expr))
@@ -503,8 +521,8 @@ def dropFields(self, *fieldNames: str) -> ParentColumn:
         for fieldName in fieldNames:
             if not isinstance(fieldName, str):
                 raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={
+                    errorClass="NOT_STR",
+                    messageParameters={
                         "arg_name": "fieldName",
                         "arg_type": type(fieldName).__name__,
                     },
@@ -517,8 +535,8 @@ def dropFields(self, *fieldNames: str) -> ParentColumn:
 
         if dropField is None:
             raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={
                     "item": "dropFields",
                 },
             )
@@ -528,11 +546,11 @@ def dropFields(self, *fieldNames: str) -> ParentColumn:
     def __getattr__(self, item: Any) -> ParentColumn:
         if item == "_jc":
             raise PySparkAttributeError(
-                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jc"}
+                errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": "_jc"}
             )
         if item.startswith("__"):
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": item}
             )
         return self[item]
 
@@ -540,8 +558,8 @@ def __getitem__(self, k: Any) -> ParentColumn:
         if isinstance(k, slice):
             if k.step is not None:
                 raise PySparkValueError(
-                    error_class="SLICE_WITH_STEP",
-                    message_parameters={},
+                    errorClass="SLICE_WITH_STEP",
+                    messageParameters={},
                 )
             return self.substr(k.start, k.stop)
         else:
@@ -549,14 +567,14 @@ def __getitem__(self, k: Any) -> ParentColumn:
 
     def __iter__(self) -> None:
         raise PySparkTypeError(
-            error_class="NOT_ITERABLE",
-            message_parameters={"objectName": "Column"},
+            errorClass="NOT_ITERABLE",
+            messageParameters={"objectName": "Column"},
         )
 
     def __nonzero__(self) -> None:
         raise PySparkValueError(
-            error_class="CANNOT_CONVERT_COLUMN_INTO_BOOL",
-            message_parameters={},
+            errorClass="CANNOT_CONVERT_COLUMN_INTO_BOOL",
+            messageParameters={},
         )
 
     __bool__ = __nonzero__
diff --git a/python/pyspark/sql/connect/conf.py b/python/pyspark/sql/connect/conf.py
index 2dc382da8143a..1ef72ee3cfa43 100644
--- a/python/pyspark/sql/connect/conf.py
+++ b/python/pyspark/sql/connect/conf.py
@@ -100,8 +100,8 @@ def isModifiable(self, key: str) -> bool:
             return False
         else:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={"arg_name": "result", "allowed_values": "'true' or 'false'"},
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={"arg_name": "result", "allowed_values": "'true' or 'false'"},
             )
 
     isModifiable.__doc__ = PySparkRuntimeConfig.isModifiable.__doc__
@@ -110,8 +110,8 @@ def _checkType(self, obj: Any, identifier: str) -> None:
         """Assert that an object is of type str."""
         if not isinstance(obj, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={
+                errorClass="NOT_STR",
+                messageParameters={
                     "arg_name": identifier,
                     "arg_type": type(obj).__name__,
                 },
diff --git a/python/pyspark/sql/connect/conversion.py b/python/pyspark/sql/connect/conversion.py
index 1c205586d6096..d803f37c5b9f1 100644
--- a/python/pyspark/sql/connect/conversion.py
+++ b/python/pyspark/sql/connect/conversion.py
@@ -155,8 +155,8 @@ def convert_struct(value: Any) -> Any:
                     else:
                         if len(value) != len(field_names):
                             raise PySparkValueError(
-                                error_class="AXIS_LENGTH_MISMATCH",
-                                message_parameters={
+                                errorClass="AXIS_LENGTH_MISMATCH",
+                                messageParameters={
                                     "expected_length": str(len(field_names)),
                                     "actual_length": str(len(value)),
                                 },
@@ -306,7 +306,7 @@ def convert_variant(value: Any) -> Any:
                 ):
                     return VariantVal(value["value"], value["metadata"])
                 else:
-                    raise PySparkValueError(error_class="MALFORMED_VARIANT")
+                    raise PySparkValueError(errorClass="MALFORMED_VARIANT")
 
             return convert_variant
 
@@ -352,8 +352,8 @@ def convert(data: Sequence[Any], schema: StructType) -> "pa.Table":
             else:
                 if len(item) != len(column_names):
                     raise PySparkValueError(
-                        error_class="AXIS_LENGTH_MISMATCH",
-                        message_parameters={
+                        errorClass="AXIS_LENGTH_MISMATCH",
+                        messageParameters={
                             "expected_length": str(len(column_names)),
                             "actual_length": str(len(item)),
                         },
@@ -522,7 +522,7 @@ def convert_variant(value: Any) -> Any:
                 ):
                     return VariantVal(value["value"], value["metadata"])
                 else:
-                    raise PySparkValueError(error_class="MALFORMED_VARIANT")
+                    raise PySparkValueError(errorClass="MALFORMED_VARIANT")
 
             return convert_variant
 
diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py
index dd093fc19b343..e85efeb592dff 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -124,15 +124,15 @@ def __init__(
         self._plan = plan
         if self._plan is None:
             raise PySparkRuntimeError(
-                error_class="MISSING_VALID_PLAN",
-                message_parameters={"operator": "__init__"},
+                errorClass="MISSING_VALID_PLAN",
+                messageParameters={"operator": "__init__"},
             )
 
         self._session: "SparkSession" = session  # type: ignore[assignment]
         if self._session is None:
             raise PySparkRuntimeError(
-                error_class="NO_ACTIVE_SESSION",
-                message_parameters={"operator": "__init__"},
+                errorClass="NO_ACTIVE_SESSION",
+                messageParameters={"operator": "__init__"},
             )
 
         # Check whether _repr_html is supported or not, we use it to avoid calling RPC twice
@@ -231,8 +231,8 @@ def select(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc
             cols = cols[0]
         if any(not isinstance(c, (str, Column)) for c in cols):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN_OR_STR",
-                message_parameters={"arg_name": "columns"},
+                errorClass="NOT_LIST_OF_COLUMN_OR_STR",
+                messageParameters={"arg_name": "columns"},
             )
         return DataFrame(
             plan.Project(self._plan, [F._to_col(c) for c in cols]),
@@ -254,8 +254,8 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> ParentDataFrame:
     def agg(self, *exprs: Union[Column, Dict[str, str]]) -> ParentDataFrame:
         if not exprs:
             raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "exprs"},
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={"item": "exprs"},
             )
 
         if len(exprs) == 1 and isinstance(exprs[0], dict):
@@ -277,8 +277,8 @@ def colRegex(self, colName: str) -> Column:
 
         if not isinstance(colName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "colName", "arg_type": type(colName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "colName", "arg_type": type(colName).__name__},
             )
         return ConnectColumn(UnresolvedRegex(colName, self._plan._plan_id))
 
@@ -312,15 +312,15 @@ def crossJoin(self, other: ParentDataFrame) -> ParentDataFrame:
     def _check_same_session(self, other: ParentDataFrame) -> None:
         if self._session.session_id != other._session.session_id:  # type: ignore[attr-defined]
             raise SessionNotSameException(
-                error_class="SESSION_NOT_SAME",
-                message_parameters={},
+                errorClass="SESSION_NOT_SAME",
+                messageParameters={},
             )
 
     def coalesce(self, numPartitions: int) -> ParentDataFrame:
         if not numPartitions > 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_POSITIVE",
-                message_parameters={"arg_name": "numPartitions", "arg_value": str(numPartitions)},
+                errorClass="VALUE_NOT_POSITIVE",
+                messageParameters={"arg_name": "numPartitions", "arg_value": str(numPartitions)},
             )
         res = DataFrame(
             plan.Repartition(self._plan, num_partitions=numPartitions, shuffle=False),
@@ -343,8 +343,8 @@ def repartition(  # type: ignore[misc]
         if isinstance(numPartitions, int):
             if not numPartitions > 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={
                         "arg_name": "numPartitions",
                         "arg_value": str(numPartitions),
                     },
@@ -369,8 +369,8 @@ def repartition(  # type: ignore[misc]
             )
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={
                     "arg_name": "numPartitions",
                     "arg_type": type(numPartitions).__name__,
                 },
@@ -393,16 +393,16 @@ def repartitionByRange(  # type: ignore[misc]
         if isinstance(numPartitions, int):
             if not numPartitions > 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={
                         "arg_name": "numPartitions",
                         "arg_value": str(numPartitions),
                     },
                 )
             if len(cols) == 0:
                 raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"item": "cols"},
+                    errorClass="CANNOT_BE_EMPTY",
+                    messageParameters={"item": "cols"},
                 )
             else:
                 res = DataFrame(
@@ -420,36 +420,35 @@ def repartitionByRange(  # type: ignore[misc]
             )
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT_OR_STR",
-                message_parameters={
+                errorClass="NOT_COLUMN_OR_INT_OR_STR",
+                messageParameters={
                     "arg_name": "numPartitions",
                     "arg_type": type(numPartitions).__name__,
                 },
             )
-
         res._cached_schema = self._cached_schema
         return res
 
-    def dropDuplicates(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
-        # Acceptable args should be str, ... or a single List[str]
-        # So if subset length is 1, it can be either single str, or a list of str
-        # if subset length is greater than 1, it must be a sequence of str
-        if len(subset) > 1:
-            assert all(isinstance(c, str) for c in subset)
+    def dropDuplicates(self, subset: Optional[List[str]] = None) -> ParentDataFrame:
+        if subset is not None and not isinstance(subset, (list, tuple)):
+            raise PySparkTypeError(
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
-        if not subset:
+        if subset is None:
             res = DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True), session=self._session
             )
-        elif len(subset) == 1 and isinstance(subset[0], list):
-            res = DataFrame(
-                plan.Deduplicate(child=self._plan, column_names=subset[0]),
-                session=self._session,
-            )
         else:
+            for c in subset:
+                if not isinstance(c, str):
+                    raise PySparkTypeError(
+                        errorClass="NOT_STR",
+                        messageParameters={"arg_name": "subset", "arg_type": type(c).__name__},
+                    )
             res = DataFrame(
-                plan.Deduplicate(child=self._plan, column_names=cast(List[str], subset)),
-                session=self._session,
+                plan.Deduplicate(child=self._plan, column_names=subset), session=self._session
             )
 
         res._cached_schema = self._cached_schema
@@ -457,30 +456,27 @@ def dropDuplicates(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
 
     drop_duplicates = dropDuplicates
 
-    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> ParentDataFrame:
-        # Acceptable args should be str, ... or a single List[str]
-        # So if subset length is 1, it can be either single str, or a list of str
-        # if subset length is greater than 1, it must be a sequence of str
-        if len(subset) > 1:
-            assert all(isinstance(c, str) for c in subset)
+    def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> ParentDataFrame:
+        if subset is not None and not isinstance(subset, (list, tuple)):
+            raise PySparkTypeError(
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+            )
 
-        if not subset:
+        if subset is None:
             return DataFrame(
                 plan.Deduplicate(child=self._plan, all_columns_as_keys=True, within_watermark=True),
                 session=self._session,
             )
-        elif len(subset) == 1 and isinstance(subset[0], list):
-            return DataFrame(
-                plan.Deduplicate(child=self._plan, column_names=subset[0], within_watermark=True),
-                session=self._session,
-            )
         else:
+            for c in subset:
+                if not isinstance(c, str):
+                    raise PySparkTypeError(
+                        errorClass="NOT_STR",
+                        messageParameters={"arg_name": "subset", "arg_type": type(c).__name__},
+                    )
             return DataFrame(
-                plan.Deduplicate(
-                    child=self._plan,
-                    column_names=cast(List[str], subset),
-                    within_watermark=True,
-                ),
+                plan.Deduplicate(child=self._plan, column_names=subset, within_watermark=True),
                 session=self._session,
             )
 
@@ -503,8 +499,8 @@ def drop(self, *cols: "ColumnOrName") -> ParentDataFrame:  # type: ignore[misc]
         _cols = list(cols)
         if any(not isinstance(c, (str, Column)) for c in _cols):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": "cols", "arg_type": type(cols).__name__},
             )
 
         return DataFrame(
@@ -535,7 +531,7 @@ def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
     def groupby(self, __cols: Union[List[Column], List[str], List[int]]) -> "GroupedData":
         ...
 
-    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> GroupedData:
+    def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
         if len(cols) == 1 and isinstance(cols[0], list):
             cols = cols[0]
 
@@ -548,14 +544,14 @@ def groupBy(self, *cols: "ColumnOrNameOrOrdinal") -> GroupedData:
             elif isinstance(c, int) and not isinstance(c, bool):
                 if c < 1:
                     raise PySparkIndexError(
-                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                        errorClass="INDEX_NOT_POSITIVE", messageParameters={"index": str(c)}
                     )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                    errorClass="NOT_COLUMN_OR_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                 )
 
         return GroupedData(df=self, group_type="groupby", grouping_cols=_cols)
@@ -570,7 +566,7 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":
     def rollup(self, __cols: Union[List[Column], List[str]]) -> "GroupedData":
         ...
 
-    def rollup(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
+    def rollup(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":  # type: ignore[misc]
         _cols: List[Column] = []
         for c in cols:
             if isinstance(c, Column):
@@ -580,14 +576,14 @@ def rollup(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
             elif isinstance(c, int) and not isinstance(c, bool):
                 if c < 1:
                     raise PySparkIndexError(
-                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                        errorClass="INDEX_NOT_POSITIVE", messageParameters={"index": str(c)}
                     )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                    errorClass="NOT_COLUMN_OR_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                 )
 
         return GroupedData(df=self, group_type="rollup", grouping_cols=_cols)
@@ -610,14 +606,14 @@ def cube(self, *cols: "ColumnOrName") -> "GroupedData":  # type: ignore[misc]
             elif isinstance(c, int) and not isinstance(c, bool):
                 if c < 1:
                     raise PySparkIndexError(
-                        error_class="INDEX_NOT_POSITIVE", message_parameters={"index": str(c)}
+                        errorClass="INDEX_NOT_POSITIVE", messageParameters={"index": str(c)}
                     )
                 # ordinal is 1-based
                 _cols.append(self[c - 1])
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                    errorClass="NOT_COLUMN_OR_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                 )
 
         return GroupedData(df=self, group_type="cube", grouping_cols=_cols)
@@ -635,8 +631,8 @@ def groupingSets(
                     gset.append(self[c])
                 else:
                     raise PySparkTypeError(
-                        error_class="NOT_COLUMN_OR_STR",
-                        message_parameters={
+                        errorClass="NOT_COLUMN_OR_STR",
+                        messageParameters={
                             "arg_name": "groupingSets",
                             "arg_type": type(c).__name__,
                         },
@@ -651,8 +647,8 @@ def groupingSets(
                 gcols.append(self[c])
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_COLUMN_OR_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                    errorClass="NOT_COLUMN_OR_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                 )
 
         return GroupedData(
@@ -731,70 +727,24 @@ def _convert_col(df: ParentDataFrame, col: "ColumnOrName") -> Column:
             session=self._session,
         )
 
-    def limit(self, n: int) -> ParentDataFrame:
-        res = DataFrame(plan.Limit(child=self._plan, limit=n), session=self._session)
+    def limit(self, num: int) -> ParentDataFrame:
+        res = DataFrame(plan.Limit(child=self._plan, limit=num), session=self._session)
         res._cached_schema = self._cached_schema
         return res
 
     def tail(self, num: int) -> List[Row]:
         return DataFrame(plan.Tail(child=self._plan, limit=num), session=self._session).collect()
 
-    def _sort_cols(
-        self,
-        cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
-        kwargs: Dict[str, Any],
-    ) -> List[Column]:
-        """Return a JVM Seq of Columns that describes the sort order"""
-        if cols is None:
-            raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "cols"},
-            )
-
-        if len(cols) == 1 and isinstance(cols[0], list):
-            cols = cols[0]
-
-        _cols: List[Column] = []
-        for c in cols:
-            if isinstance(c, int) and not isinstance(c, bool):
-                # ordinal is 1-based
-                if c > 0:
-                    _c = self[c - 1]
-                # negative ordinal means sort by desc
-                elif c < 0:
-                    _c = self[-c - 1].desc()
-                else:
-                    raise PySparkIndexError(
-                        error_class="ZERO_INDEX",
-                        message_parameters={},
-                    )
-            else:
-                _c = c  # type: ignore[assignment]
-            _cols.append(F._to_col(cast("ColumnOrName", _c)))
-
-        ascending = kwargs.get("ascending", True)
-        if isinstance(ascending, (bool, int)):
-            if not ascending:
-                _cols = [c.desc() for c in _cols]
-        elif isinstance(ascending, list):
-            _cols = [c if asc else c.desc() for asc, c in zip(ascending, _cols)]
-        else:
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_LIST",
-                message_parameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
-            )
-
-        return [F._sort_col(c) for c in _cols]
-
     def sort(
         self,
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> ParentDataFrame:
+        _cols = self._preapare_cols_for_sort(F.col, cols, kwargs)
         res = DataFrame(
             plan.Sort(
                 self._plan,
-                columns=self._sort_cols(cols, kwargs),
+                columns=[F._sort_col(c) for c in _cols],
                 is_global=True,
             ),
             session=self._session,
@@ -809,10 +759,11 @@ def sortWithinPartitions(
         *cols: Union[int, str, Column, List[Union[int, str, Column]]],
         **kwargs: Any,
     ) -> ParentDataFrame:
+        _cols = self._preapare_cols_for_sort(F.col, cols, kwargs)
         res = DataFrame(
             plan.Sort(
                 self._plan,
-                columns=self._sort_cols(cols, kwargs),
+                columns=[F._sort_col(c) for c in _cols],
                 is_global=False,
             ),
             session=self._session,
@@ -826,53 +777,14 @@ def sample(
         fraction: Optional[Union[int, float]] = None,
         seed: Optional[int] = None,
     ) -> ParentDataFrame:
-        # For the cases below:
-        #   sample(True, 0.5 [, seed])
-        #   sample(True, fraction=0.5 [, seed])
-        #   sample(withReplacement=False, fraction=0.5 [, seed])
-        is_withReplacement_set = type(withReplacement) == bool and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(faction=0.5 [, seed])
-        is_withReplacement_omitted_kwargs = withReplacement is None and isinstance(fraction, float)
-
-        # For the case below:
-        #   sample(0.5 [, seed])
-        is_withReplacement_omitted_args = isinstance(withReplacement, float)
-
-        if not (
-            is_withReplacement_set
-            or is_withReplacement_omitted_kwargs
-            or is_withReplacement_omitted_args
-        ):
-            argtypes = [type(arg).__name__ for arg in [withReplacement, fraction, seed]]
-            raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT",
-                message_parameters={
-                    "arg_name": "withReplacement (optional), "
-                    + "fraction (required) and seed (optional)",
-                    "arg_type": ", ".join(argtypes),
-                },
-            )
-
-        if is_withReplacement_omitted_args:
-            if fraction is not None:
-                seed = cast(int, fraction)
-            fraction = withReplacement
-            withReplacement = None
-
-        if withReplacement is None:
-            withReplacement = False
-
-        seed = int(seed) if seed is not None else random.randint(0, sys.maxsize)
-
+        _w, _f, _s = self._preapare_args_for_sample(withReplacement, fraction, seed)
         res = DataFrame(
             plan.Sample(
                 child=self._plan,
                 lower_bound=0.0,
-                upper_bound=fraction,  # type: ignore[arg-type]
-                with_replacement=withReplacement,  # type: ignore[arg-type]
-                seed=seed,
+                upper_bound=_f,
+                with_replacement=_w,
+                seed=_s,
             ),
             session=self._session,
         )
@@ -885,8 +797,8 @@ def withColumnRenamed(self, existing: str, new: str) -> ParentDataFrame:
     def withColumnsRenamed(self, colsMap: Dict[str, str]) -> ParentDataFrame:
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
             )
 
         return DataFrame(plan.WithColumnsRenamed(self._plan, colsMap), self._session)
@@ -896,13 +808,13 @@ def _show_string(
     ) -> str:
         if not isinstance(n, int) or isinstance(n, bool):
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={"arg_name": "n", "arg_type": type(n).__name__},
+                errorClass="NOT_INT",
+                messageParameters={"arg_name": "n", "arg_type": type(n).__name__},
             )
         if not isinstance(vertical, bool):
             raise PySparkTypeError(
-                error_class="NOT_BOOL",
-                message_parameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
+                errorClass="NOT_BOOL",
+                messageParameters={"arg_name": "vertical", "arg_type": type(vertical).__name__},
             )
 
         _truncate: int = -1
@@ -913,8 +825,8 @@ def _show_string(
                 _truncate = int(truncate)
             except ValueError:
                 raise PySparkTypeError(
-                    error_class="NOT_BOOL",
-                    message_parameters={
+                    errorClass="NOT_BOOL",
+                    messageParameters={
                         "arg_name": "truncate",
                         "arg_type": type(truncate).__name__,
                     },
@@ -931,11 +843,15 @@ def _show_string(
         )._to_table()
         return table[0][0].as_py()
 
-    def withColumns(self, colsMap: Dict[str, Column]) -> ParentDataFrame:
+    def withColumns(self, *colsMap: Dict[str, Column]) -> ParentDataFrame:
+        # Below code is to help enable kwargs in future.
+        assert len(colsMap) == 1
+        colsMap = colsMap[0]  # type: ignore[assignment]
+
         if not isinstance(colsMap, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "colsMap", "arg_type": type(colsMap).__name__},
             )
 
         names: List[str] = []
@@ -956,8 +872,8 @@ def withColumns(self, colsMap: Dict[str, Column]) -> ParentDataFrame:
     def withColumn(self, colName: str, col: Column) -> ParentDataFrame:
         if not isinstance(col, Column):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
         return DataFrame(
             plan.WithColumns(
@@ -971,8 +887,8 @@ def withColumn(self, colName: str, col: Column) -> ParentDataFrame:
     def withMetadata(self, columnName: str, metadata: Dict[str, Any]) -> ParentDataFrame:
         if not isinstance(metadata, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "metadata", "arg_type": type(metadata).__name__},
             )
 
         return DataFrame(
@@ -1021,13 +937,13 @@ def withWatermark(self, eventTime: str, delayThreshold: str) -> ParentDataFrame:
         # TODO: reuse error handling code in sql.DataFrame.withWatermark()
         if not eventTime or type(eventTime) is not str:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "eventTime", "arg_type": type(eventTime).__name__},
             )
         if not delayThreshold or type(delayThreshold) is not str:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={
+                errorClass="NOT_STR",
+                messageParameters={
                     "arg_name": "delayThreshold",
                     "arg_type": type(delayThreshold).__name__,
                 },
@@ -1050,8 +966,8 @@ def hint(
 
         if not isinstance(name, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
             )
 
         allowed_types = (str, float, int, Column, list)
@@ -1063,8 +979,8 @@ def hint(
         for p in parameters:
             if not isinstance(p, allowed_types):
                 raise PySparkTypeError(
-                    error_class="INVALID_ITEM_FOR_CONTAINER",
-                    message_parameters={
+                    errorClass="INVALID_ITEM_FOR_CONTAINER",
+                    messageParameters={
                         "arg_name": "parameters",
                         "allowed_types": allowed_types_repr,
                         "item_type": type(p).__name__,
@@ -1073,8 +989,8 @@ def hint(
             if isinstance(p, list):
                 if not all(isinstance(e, allowed_primitive_types) for e in p):
                     raise PySparkTypeError(
-                        error_class="INVALID_ITEM_FOR_CONTAINER",
-                        message_parameters={
+                        errorClass="INVALID_ITEM_FOR_CONTAINER",
+                        messageParameters={
                             "arg_name": "parameters",
                             "allowed_types": allowed_types_repr,
                             "item_type": type(p).__name__ + "[" + type(p[0]).__name__ + "]",
@@ -1096,15 +1012,15 @@ def randomSplit(
         for w in weights:
             if w < 0.0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "weights", "arg_value": str(w)},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "weights", "arg_value": str(w)},
                 )
         seed = seed if seed is not None else random.randint(0, sys.maxsize)
         total = sum(weights)
         if total <= 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_POSITIVE",
-                message_parameters={"arg_name": "sum(weights)", "arg_value": str(total)},
+                errorClass="VALUE_NOT_POSITIVE",
+                messageParameters={"arg_name": "sum(weights)", "arg_value": str(total)},
             )
         proportions = list(map(lambda x: x / total, weights))
         normalizedCumWeights = [0.0]
@@ -1142,13 +1058,13 @@ def observe(
 
         if len(exprs) == 0:
             raise PySparkValueError(
-                error_class="CANNOT_BE_EMPTY",
-                message_parameters={"item": "exprs"},
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={"item": "exprs"},
             )
         if not all(isinstance(c, Column) for c in exprs):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_COLUMN",
-                message_parameters={"arg_name": "exprs"},
+                errorClass="NOT_LIST_OF_COLUMN",
+                messageParameters={"arg_name": "exprs"},
             )
 
         if isinstance(observation, Observation):
@@ -1160,8 +1076,8 @@ def observe(
             )
         else:
             raise PySparkTypeError(
-                error_class="NOT_OBSERVATION_OR_STR",
-                message_parameters={
+                errorClass="NOT_OBSERVATION_OR_STR",
+                messageParameters={
                     "arg_name": "observation",
                     "arg_type": type(observation).__name__,
                 },
@@ -1259,8 +1175,8 @@ def intersectAll(self, other: ParentDataFrame) -> ParentDataFrame:
     def where(self, condition: Union[Column, str]) -> ParentDataFrame:
         if not isinstance(condition, (str, Column)):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
             )
         return self.filter(condition)
 
@@ -1275,28 +1191,28 @@ def fillna(
     ) -> ParentDataFrame:
         if not isinstance(value, (float, int, str, bool, dict)):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
-                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+                errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+                messageParameters={"arg_name": "value", "arg_type": type(value).__name__},
             )
         if isinstance(value, dict):
             if len(value) == 0:
                 raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"item": "value"},
+                    errorClass="CANNOT_BE_EMPTY",
+                    messageParameters={"item": "value"},
                 )
             for c, v in value.items():
                 if not isinstance(c, str):
                     raise PySparkTypeError(
-                        error_class="NOT_STR",
-                        message_parameters={
+                        errorClass="NOT_STR",
+                        messageParameters={
                             "arg_name": "key type of dict",
                             "arg_type": type(c).__name__,
                         },
                     )
                 if not isinstance(v, (bool, int, float, str)):
                     raise PySparkTypeError(
-                        error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
-                        message_parameters={
+                        errorClass="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
+                        messageParameters={
                             "arg_name": "value type of dict",
                             "arg_type": type(v).__name__,
                         },
@@ -1310,14 +1226,14 @@ def fillna(
                 for c in subset:
                     if not isinstance(c, str):
                         raise PySparkTypeError(
-                            error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                            message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                            errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                            messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                         )
                 _cols = list(subset)
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OR_TUPLE",
-                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                    errorClass="NOT_LIST_OR_TUPLE",
+                    messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
                 )
 
         if isinstance(value, dict):
@@ -1342,8 +1258,8 @@ def dropna(
         if how is not None:
             if not isinstance(how, str):
                 raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={"arg_name": "how", "arg_type": type(how).__name__},
+                    errorClass="NOT_STR",
+                    messageParameters={"arg_name": "how", "arg_type": type(how).__name__},
                 )
             if how == "all":
                 min_non_nulls = 1
@@ -1351,15 +1267,15 @@ def dropna(
                 min_non_nulls = None
             else:
                 raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"arg_name": "how", "arg_value": str(how)},
+                    errorClass="CANNOT_BE_EMPTY",
+                    messageParameters={"arg_name": "how", "arg_value": str(how)},
                 )
 
         if thresh is not None:
             if not isinstance(thresh, int):
                 raise PySparkTypeError(
-                    error_class="NOT_INT",
-                    message_parameters={"arg_name": "thresh", "arg_type": type(thresh).__name__},
+                    errorClass="NOT_INT",
+                    messageParameters={"arg_name": "thresh", "arg_type": type(thresh).__name__},
                 )
 
             # 'thresh' overwrites 'how'
@@ -1373,14 +1289,14 @@ def dropna(
                 for c in subset:
                     if not isinstance(c, str):
                         raise PySparkTypeError(
-                            error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                            message_parameters={"arg_name": "cols", "arg_type": type(c).__name__},
+                            errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                            messageParameters={"arg_name": "cols", "arg_type": type(c).__name__},
                         )
                 _cols = list(subset)
             else:
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                    message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                    errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                    messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
                 )
 
         return DataFrame(
@@ -1403,8 +1319,8 @@ def replace(
                 value = None
             else:
                 raise PySparkTypeError(
-                    error_class="ARGUMENT_REQUIRED",
-                    message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+                    errorClass="ARGUMENT_REQUIRED",
+                    messageParameters={"arg_name": "value", "condition": "`to_replace` is dict"},
                 )
 
         # Helper functions
@@ -1431,8 +1347,8 @@ def all_of_(xs: Iterable) -> bool:
         valid_types = (bool, float, int, str, list, tuple)
         if not isinstance(to_replace, valid_types + (dict,)):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-                message_parameters={
+                errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                messageParameters={
                     "arg_name": "to_replace",
                     "arg_type": type(to_replace).__name__,
                 },
@@ -1444,15 +1360,15 @@ def all_of_(xs: Iterable) -> bool:
             and not isinstance(to_replace, dict)
         ):
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "value", "arg_type": type(value).__name__},
+                errorClass="NOT_BOOL_OR_FLOAT_OR_INT_OR_LIST_OR_NONE_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "value", "arg_type": type(value).__name__},
             )
 
         if isinstance(to_replace, (list, tuple)) and isinstance(value, (list, tuple)):
             if len(to_replace) != len(value):
                 raise PySparkValueError(
-                    error_class="LENGTH_SHOULD_BE_THE_SAME",
-                    message_parameters={
+                    errorClass="LENGTH_SHOULD_BE_THE_SAME",
+                    messageParameters={
                         "arg1": "to_replace",
                         "arg2": "value",
                         "arg1_length": str(len(to_replace)),
@@ -1462,8 +1378,8 @@ def all_of_(xs: Iterable) -> bool:
 
         if not (subset is None or isinstance(subset, (list, tuple, str))):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "subset", "arg_type": type(subset).__name__},
+                errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "subset", "arg_type": type(subset).__name__},
             )
 
         # Reshape input arguments if necessary
@@ -1489,8 +1405,8 @@ def all_of_(xs: Iterable) -> bool:
             for all_of_type in [all_of_bool, all_of_str, all_of_numeric]
         ):
             raise PySparkValueError(
-                error_class="MIXED_TYPE_REPLACEMENT",
-                message_parameters={},
+                errorClass="MIXED_TYPE_REPLACEMENT",
+                messageParameters={},
             )
 
         def _convert_int_to_float(v: Any) -> Any:
@@ -1524,8 +1440,8 @@ def summary(self, *statistics: str) -> ParentDataFrame:
         for s in _statistics:
             if not isinstance(s, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={"arg_name": "statistics", "arg_type": type(s).__name__},
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={"arg_name": "statistics", "arg_type": type(s).__name__},
                 )
         return DataFrame(
             plan.StatSummary(child=self._plan, statistics=_statistics),
@@ -1550,13 +1466,13 @@ def describe(self, *cols: Union[str, List[str]]) -> ParentDataFrame:
     def cov(self, col1: str, col2: str) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         table, _ = DataFrame(
             plan.StatCov(child=self._plan, col1=col1, col2=col2),
@@ -1567,20 +1483,20 @@ def cov(self, col1: str, col2: str) -> float:
     def corr(self, col1: str, col2: str, method: Optional[str] = None) -> float:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         if not method:
             method = "pearson"
         if not method == "pearson":
             raise PySparkValueError(
-                error_class="VALUE_NOT_PEARSON",
-                message_parameters={"arg_name": "method", "arg_value": method},
+                errorClass="VALUE_NOT_PEARSON",
+                messageParameters={"arg_name": "method", "arg_value": method},
             )
         table, _ = DataFrame(
             plan.StatCorr(child=self._plan, col1=col1, col2=col2, method=method),
@@ -1596,8 +1512,8 @@ def approxQuantile(
     ) -> Union[List[float], List[List[float]]]:
         if not isinstance(col, (str, list, tuple)):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
 
         isStr = isinstance(col, str)
@@ -1610,14 +1526,14 @@ def approxQuantile(
         for c in col:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={"arg_name": "columns", "arg_type": type(c).__name__},
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={"arg_name": "columns", "arg_type": type(c).__name__},
                 )
 
         if not isinstance(probabilities, (list, tuple)):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={
                     "arg_name": "probabilities",
                     "arg_type": type(probabilities).__name__,
                 },
@@ -1627,8 +1543,8 @@ def approxQuantile(
         for p in probabilities:
             if not isinstance(p, (float, int)) or p < 0 or p > 1:
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_FLOAT_OR_INT",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_FLOAT_OR_INT",
+                    messageParameters={
                         "arg_name": "probabilities",
                         "arg_type": type(p).__name__,
                     },
@@ -1636,16 +1552,16 @@ def approxQuantile(
 
         if not isinstance(relativeError, (float, int)):
             raise PySparkTypeError(
-                error_class="NOT_FLOAT_OR_INT",
-                message_parameters={
+                errorClass="NOT_FLOAT_OR_INT",
+                messageParameters={
                     "arg_name": "relativeError",
                     "arg_type": type(relativeError).__name__,
                 },
             )
         if relativeError < 0:
             raise PySparkValueError(
-                error_class="NEGATIVE_VALUE",
-                message_parameters={
+                errorClass="NEGATIVE_VALUE",
+                messageParameters={
                     "arg_name": "relativeError",
                     "arg_value": str(relativeError),
                 },
@@ -1667,13 +1583,13 @@ def approxQuantile(
     def crosstab(self, col1: str, col2: str) -> ParentDataFrame:
         if not isinstance(col1, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col1", "arg_type": type(col1).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col1", "arg_type": type(col1).__name__},
             )
         if not isinstance(col2, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "col2", "arg_type": type(col2).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "col2", "arg_type": type(col2).__name__},
             )
         return DataFrame(
             plan.StatCrosstab(child=self._plan, col1=col1, col2=col2),
@@ -1687,8 +1603,8 @@ def freqItems(
             cols = list(cols)
         if not isinstance(cols, list):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_TUPLE",
-                message_parameters={"arg_name": "cols", "arg_type": type(cols).__name__},
+                errorClass="NOT_LIST_OR_TUPLE",
+                messageParameters={"arg_name": "cols", "arg_type": type(cols).__name__},
             )
         if not support:
             support = 0.01
@@ -1702,21 +1618,21 @@ def sampleBy(
     ) -> ParentDataFrame:
         if not isinstance(col, (str, Column)):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": "col", "arg_type": type(col).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": "col", "arg_type": type(col).__name__},
             )
         if not isinstance(fractions, dict):
             raise PySparkTypeError(
-                error_class="NOT_DICT",
-                message_parameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
+                errorClass="NOT_DICT",
+                messageParameters={"arg_name": "fractions", "arg_type": type(fractions).__name__},
             )
 
         _fractions = []
         for k, v in fractions.items():
             if not isinstance(k, (float, int, str)):
                 raise PySparkTypeError(
-                    error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-                    message_parameters={
+                    errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+                    messageParameters={
                         "arg_name": "fractions",
                         "arg_type": type(fractions).__name__,
                         "allowed_types": "float, int, str",
@@ -1755,12 +1671,12 @@ def _ipython_key_completions_(self) -> List[str]:
     def __getattr__(self, name: str) -> "Column":
         if name in ["_jseq", "_jdf", "_jmap", "_jcols", "rdd", "toJSON"]:
             raise PySparkAttributeError(
-                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+                errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
             )
 
         if name not in self.columns:
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
             )
 
         return self._col(name)
@@ -1787,7 +1703,7 @@ def __getitem__(
                     )
                 )
             else:
-                # TODO: revisit vanilla Spark's Dataset.col
+                # TODO: revisit classic Spark's Dataset.col
                 # if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
                 #   colRegex(colName)
                 # } else {
@@ -1812,8 +1728,8 @@ def __getitem__(
             return F.col(self.columns[item])
         else:
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-                message_parameters={"arg_name": "item", "arg_type": type(item).__name__},
+                errorClass="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+                messageParameters={"arg_name": "item", "arg_type": type(item).__name__},
             )
 
     def _col(self, name: str) -> Column:
@@ -1862,6 +1778,26 @@ def toPandas(self) -> "PandasDataFrameLike":
         self._execution_info = ei
         return pdf
 
+    def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> ParentDataFrame:
+        return DataFrame(
+            plan.Transpose(self._plan, [F._to_col(indexColumn)] if indexColumn is not None else []),
+            self._session,
+        )
+
+    def scalar(self) -> Column:
+        # TODO(SPARK-50134): Implement this method
+        raise PySparkNotImplementedError(
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "scalar()"},
+        )
+
+    def exists(self) -> Column:
+        # TODO(SPARK-50134): Implement this method
+        raise PySparkNotImplementedError(
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "exists()"},
+        )
+
     @property
     def schema(self) -> StructType:
         # Schema caching is correct in most cases. Connect is lazy by nature. This means that
@@ -1914,8 +1850,8 @@ def toDF(self, *cols: str) -> ParentDataFrame:
         for col_ in cols:
             if not isinstance(col_, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={"arg_name": "cols", "arg_type": type(col_).__name__},
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={"arg_name": "cols", "arg_type": type(col_).__name__},
                 )
         return DataFrame(plan.ToDF(self._plan, list(cols)), self._session)
 
@@ -1933,8 +1869,8 @@ def _explain_string(
     ) -> str:
         if extended is not None and mode is not None:
             raise PySparkValueError(
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={"arg_list": "extended and mode"},
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "extended and mode"},
             )
 
         # For the no argument case: df.explain()
@@ -1956,8 +1892,8 @@ def _explain_string(
         if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
             argtypes = [str(type(arg)) for arg in [extended, mode] if arg is not None]
             raise PySparkTypeError(
-                error_class="NOT_BOOL_OR_STR",
-                message_parameters={
+                errorClass="NOT_BOOL_OR_STR",
+                messageParameters={
                     "arg_name": "extended (optional) and mode (optional)",
                     "arg_type": ", ".join(argtypes),
                 },
@@ -2166,8 +2102,8 @@ def writeStream(self) -> DataStreamWriter:
     def sameSemantics(self, other: ParentDataFrame) -> bool:
         if not isinstance(other, DataFrame):
             raise PySparkTypeError(
-                error_class="NOT_DATAFRAME",
-                message_parameters={"arg_name": "other", "arg_type": type(other).__name__},
+                errorClass="NOT_DATAFRAME",
+                messageParameters={"arg_name": "other", "arg_type": type(other).__name__},
             )
         self._check_same_session(other)
         return self._session.client.same_semantics(
@@ -2187,7 +2123,7 @@ def cb(ei: "ExecutionInfo") -> None:
 
         return DataFrameWriterV2(self._plan, self._session, table, cb)
 
-    def mergeInto(self, table: str, condition: Column) -> MergeIntoWriter:
+    def mergeInto(self, table: str, condition: Column) -> "MergeIntoWriter":
         def cb(ei: "ExecutionInfo") -> None:
             self._execution_info = ei
 
@@ -2195,10 +2131,10 @@ def cb(ei: "ExecutionInfo") -> None:
             self._plan, self._session, table, condition, cb  # type: ignore[arg-type]
         )
 
-    def offset(self, n: int) -> ParentDataFrame:
-        return DataFrame(plan.Offset(child=self._plan, offset=n), session=self._session)
+    def offset(self, num: int) -> ParentDataFrame:
+        return DataFrame(plan.Offset(child=self._plan, offset=num), session=self._session)
 
-    def checkpoint(self, eager: bool = True) -> "DataFrame":
+    def checkpoint(self, eager: bool = True) -> ParentDataFrame:
         cmd = plan.Checkpoint(child=self._plan, local=False, eager=eager)
         _, properties, self._execution_info = self._session.client.execute_command(
             cmd.command(self._session.client)
@@ -2208,8 +2144,10 @@ def checkpoint(self, eager: bool = True) -> "DataFrame":
         assert isinstance(checkpointed._plan, plan.CachedRemoteRelation)
         return checkpointed
 
-    def localCheckpoint(self, eager: bool = True) -> "DataFrame":
-        cmd = plan.Checkpoint(child=self._plan, local=True, eager=eager)
+    def localCheckpoint(
+        self, eager: bool = True, storageLevel: Optional[StorageLevel] = None
+    ) -> ParentDataFrame:
+        cmd = plan.Checkpoint(child=self._plan, local=True, eager=eager, storage_level=storageLevel)
         _, properties, self._execution_info = self._session.client.execute_command(
             cmd.command(self._session.client)
         )
@@ -2222,21 +2160,27 @@ def localCheckpoint(self, eager: bool = True) -> "DataFrame":
 
         def toJSON(self, use_unicode: bool = True) -> "RDD[str]":
             raise PySparkNotImplementedError(
-                error_class="NOT_IMPLEMENTED",
-                message_parameters={"feature": "toJSON()"},
+                errorClass="NOT_IMPLEMENTED",
+                messageParameters={"feature": "toJSON()"},
             )
 
         @property
         def rdd(self) -> "RDD[Row]":
             raise PySparkNotImplementedError(
-                error_class="NOT_IMPLEMENTED",
-                message_parameters={"feature": "rdd"},
+                errorClass="NOT_IMPLEMENTED",
+                messageParameters={"feature": "rdd"},
             )
 
     @property
     def executionInfo(self) -> Optional["ExecutionInfo"]:
         return self._execution_info
 
+    @property
+    def plot(self) -> "PySparkPlotAccessor":  # type: ignore[name-defined] # noqa: F821
+        from pyspark.sql.plot import PySparkPlotAccessor
+
+        return PySparkPlotAccessor(self)
+
 
 class DataFrameNaFunctions(ParentDataFrameNaFunctions):
     def __init__(self, df: ParentDataFrame):
@@ -2318,6 +2262,10 @@ def _test() -> None:
         del pyspark.sql.dataframe.DataFrame.toJSON.__doc__
         del pyspark.sql.dataframe.DataFrame.rdd.__doc__
 
+    # TODO(SPARK-50134): Support subquery in connect
+    del pyspark.sql.dataframe.DataFrame.scalar.__doc__
+    del pyspark.sql.dataframe.DataFrame.exists.__doc__
+
     globs["spark"] = (
         PySparkSession.builder.appName("sql.connect.dataframe tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
diff --git a/python/pyspark/sql/connect/expressions.py b/python/pyspark/sql/connect/expressions.py
index c10bef56c3b83..5a5320366f666 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -77,7 +77,7 @@
 )
 from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.errors.utils import current_origin
-from pyspark.sql.utils import is_timestamp_ntz_preferred
+from pyspark.sql.utils import is_timestamp_ntz_preferred, enum_to_value
 
 if TYPE_CHECKING:
     from pyspark.sql.connect.client import SparkConnectClient
@@ -113,8 +113,8 @@ def alias(self, *alias: str, **kwargs: Any) -> "ColumnAlias":
         metadata = kwargs.pop("metadata", None)
         if len(alias) > 1 and metadata is not None:
             raise PySparkValueError(
-                error_class="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
-                message_parameters={"arg_name": "metadata"},
+                errorClass="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
+                messageParameters={"arg_name": "metadata"},
             )
         assert not kwargs, "Unexpected kwargs where passed: %s" % kwargs
         return ColumnAlias(self, list(alias), metadata)
@@ -188,8 +188,8 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
         else:
             if self._metadata:
                 raise PySparkValueError(
-                    error_class="CANNOT_PROVIDE_METADATA",
-                    message_parameters={},
+                    errorClass="CANNOT_PROVIDE_METADATA",
+                    messageParameters={},
                 )
             exp = self._create_proto_expression()
             exp.alias.name.extend(self._alias)
@@ -231,6 +231,7 @@ def __init__(self, value: Any, dataType: DataType) -> None:
             ),
         )
 
+        value = enum_to_value(value)
         if isinstance(dataType, NullType):
             assert value is None
 
@@ -265,7 +266,8 @@ def __init__(self, value: Any, dataType: DataType) -> None:
             elif isinstance(dataType, DecimalType):
                 assert isinstance(value, decimal.Decimal)
             elif isinstance(dataType, StringType):
-                assert isinstance(value, str)
+                assert isinstance(value, (str, np.str_))
+                value = str(value)
             elif isinstance(dataType, DateType):
                 assert isinstance(value, (datetime.date, datetime.datetime))
                 if isinstance(value, datetime.date):
@@ -286,8 +288,8 @@ def __init__(self, value: Any, dataType: DataType) -> None:
                 assert isinstance(value, list)
             else:
                 raise PySparkTypeError(
-                    error_class="UNSUPPORTED_DATA_TYPE",
-                    message_parameters={"data_type": str(dataType)},
+                    errorClass="UNSUPPORTED_DATA_TYPE",
+                    messageParameters={"data_type": str(dataType)},
                 )
 
         self._value = value
@@ -295,11 +297,12 @@ def __init__(self, value: Any, dataType: DataType) -> None:
 
     @classmethod
     def _infer_type(cls, value: Any) -> DataType:
+        value = enum_to_value(value)
         if value is None:
             return NullType()
         elif isinstance(value, (bytes, bytearray)):
             return BinaryType()
-        elif isinstance(value, bool):
+        elif isinstance(value, (bool, np.bool_)):
             return BooleanType()
         elif isinstance(value, int):
             if JVM_INT_MIN <= value <= JVM_INT_MAX:
@@ -308,8 +311,8 @@ def _infer_type(cls, value: Any) -> DataType:
                 return LongType()
             else:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_BETWEEN",
-                    message_parameters={
+                    errorClass="VALUE_NOT_BETWEEN",
+                    messageParameters={
                         "arg_name": "value",
                         "min": str(JVM_LONG_MIN),
                         "max": str(JVM_SHORT_MAX),
@@ -317,14 +320,12 @@ def _infer_type(cls, value: Any) -> DataType:
                 )
         elif isinstance(value, float):
             return DoubleType()
-        elif isinstance(value, str):
+        elif isinstance(value, (str, np.str_)):
             return StringType()
         elif isinstance(value, decimal.Decimal):
             return DecimalType()
-        elif isinstance(value, datetime.datetime) and is_timestamp_ntz_preferred():
-            return TimestampNTZType()
         elif isinstance(value, datetime.datetime):
-            return TimestampType()
+            return TimestampNTZType() if is_timestamp_ntz_preferred() else TimestampType()
         elif isinstance(value, datetime.date):
             return DateType()
         elif isinstance(value, datetime.timedelta):
@@ -333,27 +334,19 @@ def _infer_type(cls, value: Any) -> DataType:
             dt = _from_numpy_type(value.dtype)
             if dt is not None:
                 return dt
-            elif isinstance(value, np.bool_):
-                return BooleanType()
         elif isinstance(value, list):
             # follow the 'infer_array_from_first_element' strategy in 'sql.types._infer_type'
             # right now, it's dedicated for pyspark.ml params like array<...>, array<array<...>>
-            if len(value) == 0:
-                raise PySparkValueError(
-                    error_class="CANNOT_BE_EMPTY",
-                    message_parameters={"item": "value"},
-                )
-            first = value[0]
-            if first is None:
+            if len(value) == 0 or value[0] is None:
                 raise PySparkTypeError(
-                    error_class="CANNOT_INFER_ARRAY_TYPE",
-                    message_parameters={},
+                    errorClass="CANNOT_INFER_ARRAY_ELEMENT_TYPE",
+                    messageParameters={},
                 )
-            return ArrayType(LiteralExpression._infer_type(first), True)
+            return ArrayType(LiteralExpression._infer_type(value[0]), True)
 
         raise PySparkTypeError(
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(value).__name__},
+            errorClass="UNSUPPORTED_DATA_TYPE",
+            messageParameters={"data_type": type(value).__name__},
         )
 
     @classmethod
@@ -416,8 +409,8 @@ def _to_value(
             return [LiteralExpression._to_value(v, elementType) for v in literal.array.elements]
 
         raise PySparkTypeError(
-            error_class="UNSUPPORTED_LITERAL",
-            message_parameters={"literal": str(literal)},
+            errorClass="UNSUPPORTED_LITERAL",
+            messageParameters={"literal": str(literal)},
         )
 
     def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
@@ -466,8 +459,8 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
                 )
         else:
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": str(self._dataType)},
+                errorClass="UNSUPPORTED_DATA_TYPE",
+                messageParameters={"data_type": str(self._dataType)},
             )
 
         return expr
@@ -475,8 +468,30 @@ def to_plan(self, session: "SparkConnectClient") -> "proto.Expression":
     def __repr__(self) -> str:
         if self._value is None:
             return "NULL"
-        else:
-            return f"{self._value}"
+        elif isinstance(self._dataType, DateType):
+            dt = DateType().fromInternal(self._value)
+            if dt is not None and isinstance(dt, datetime.date):
+                return dt.strftime("%Y-%m-%d")
+        elif isinstance(self._dataType, TimestampType):
+            ts = TimestampType().fromInternal(self._value)
+            if ts is not None and isinstance(ts, datetime.datetime):
+                return ts.strftime("%Y-%m-%d %H:%M:%S.%f")
+        elif isinstance(self._dataType, TimestampNTZType):
+            ts = TimestampNTZType().fromInternal(self._value)
+            if ts is not None and isinstance(ts, datetime.datetime):
+                return ts.strftime("%Y-%m-%d %H:%M:%S.%f")
+        elif isinstance(self._dataType, DayTimeIntervalType):
+            delta = DayTimeIntervalType().fromInternal(self._value)
+            if delta is not None and isinstance(delta, datetime.timedelta):
+                import pandas as pd
+
+                # Note: timedelta itself does not provide isoformat method.
+                # Both Pandas and java.time.Duration provide it, but the format
+                # is sightly different:
+                # java.time.Duration only applies HOURS, MINUTES, SECONDS units,
+                # while Pandas applies all supported units.
+                return pd.Timedelta(delta).isoformat()
+        return f"{self._value}"
 
 
 class ColumnReference(Expression):
@@ -785,7 +800,7 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return expr
 
     def __repr__(self) -> str:
-        return f"WithField({self._structExpr}, {self._fieldName}, {self._valueExpr})"
+        return f"update_field({self._structExpr}, {self._fieldName}, {self._valueExpr})"
 
 
 class DropField(Expression):
@@ -809,7 +824,7 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return expr
 
     def __repr__(self) -> str:
-        return f"DropField({self._structExpr}, {self._fieldName})"
+        return f"drop_field({self._structExpr}, {self._fieldName})"
 
 
 class UnresolvedExtractValue(Expression):
@@ -833,7 +848,7 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
         return expr
 
     def __repr__(self) -> str:
-        return f"UnresolvedExtractValue({str(self._child)}, {str(self._extraction)})"
+        return f"{self._child}['{self._extraction}']"
 
 
 class UnresolvedRegex(Expression):
@@ -1035,8 +1050,8 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
                     expr.window.frame_spec.lower.value.literal.integer = start
                 else:
                     raise PySparkValueError(
-                        error_class="VALUE_NOT_BETWEEN",
-                        message_parameters={
+                        errorClass="VALUE_NOT_BETWEEN",
+                        messageParameters={
                             "arg_name": "start",
                             "min": str(JVM_INT_MIN),
                             "max": str(JVM_INT_MAX),
@@ -1052,8 +1067,8 @@ def to_plan(self, session: "SparkConnectClient") -> proto.Expression:
                     expr.window.frame_spec.upper.value.literal.integer = end
                 else:
                     raise PySparkValueError(
-                        error_class="VALUE_NOT_BETWEEN",
-                        message_parameters={
+                        errorClass="VALUE_NOT_BETWEEN",
+                        messageParameters={
                             "arg_name": "end",
                             "min": str(JVM_INT_MIN),
                             "max": str(JVM_INT_MAX),
diff --git a/python/pyspark/sql/connect/functions/__init__.py b/python/pyspark/sql/connect/functions/__init__.py
index e0179d4d56cf8..087a51e8616b9 100644
--- a/python/pyspark/sql/connect/functions/__init__.py
+++ b/python/pyspark/sql/connect/functions/__init__.py
@@ -16,6 +16,8 @@
 #
 
 """PySpark Functions with Spark Connect"""
+from pyspark.testing import should_test_connect
 
-from pyspark.sql.connect.functions.builtin import *  # noqa: F401,F403
-from pyspark.sql.connect.functions import partitioning  # noqa: F401,F403
+if should_test_connect:
+    from pyspark.sql.connect.functions.builtin import *  # noqa: F401,F403
+    from pyspark.sql.connect.functions import partitioning  # noqa: F401,F403
diff --git a/python/pyspark/sql/connect/functions/builtin.py b/python/pyspark/sql/connect/functions/builtin.py
index 4a6f9e651d00f..f52cdffb84b7c 100644
--- a/python/pyspark/sql/connect/functions/builtin.py
+++ b/python/pyspark/sql/connect/functions/builtin.py
@@ -24,9 +24,10 @@
 import functools
 from typing import (
     Any,
-    Dict,
+    Mapping,
     TYPE_CHECKING,
     Union,
+    Sequence,
     List,
     overload,
     Optional,
@@ -42,6 +43,7 @@
 import numpy as np
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.errors.utils import _with_origin
 from pyspark.sql.dataframe import DataFrame as ParentDataFrame
 from pyspark.sql import Column
 from pyspark.sql.connect.expressions import (
@@ -64,11 +66,11 @@
 from pyspark.sql.types import (
     _from_numpy_type,
     DataType,
-    LongType,
     StructType,
     ArrayType,
     StringType,
 )
+from pyspark.sql.utils import enum_to_value as _enum_to_value
 
 # The implementation of pandas_udf is embedded in pyspark.sql.function.pandas_udf
 # for code reuse.
@@ -158,15 +160,15 @@ def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
     # Validate that the function arity is between 1 and 3.
     if not (1 <= len(parameters) <= 3):
         raise PySparkValueError(
-            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
+            errorClass="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": f.__name__, "num_args": str(len(parameters))},
         )
 
     # Verify that all arguments can be used as positional arguments.
     if not all(p.kind in supported_parameter_types for p in parameters):
         raise PySparkValueError(
-            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": f.__name__},
+            errorClass="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": f.__name__},
         )
 
     return parameters
@@ -197,8 +199,8 @@ def _create_lambda(f: Callable) -> LambdaFunction:
 
     if not isinstance(result, Column):
         raise PySparkValueError(
-            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
-            message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
+            errorClass="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            messageParameters={"func_name": f.__name__, "return_type": type(result).__name__},
         )
 
     return LambdaFunction(result._expr, arg_exprs)  # type: ignore[arg-type]
@@ -206,8 +208,8 @@ def _create_lambda(f: Callable) -> LambdaFunction:
 
 def _invoke_higher_order_function(
     name: str,
-    cols: List["ColumnOrName"],
-    funs: List[Callable],
+    cols: Sequence["ColumnOrName"],
+    funs: Sequence[Callable],
 ) -> Column:
     """
     Invokes expression identified by name,
@@ -226,7 +228,7 @@ def _invoke_higher_order_function(
     return _invoke_function(name, *_cols, *_funs)
 
 
-def _options_to_col(options: Dict[str, Any]) -> Column:
+def _options_to_col(options: Mapping[str, Any]) -> Column:
     _options: List[Column] = []
     for k, v in options.items():
         _options.append(lit(str(k)))
@@ -237,6 +239,7 @@ def _options_to_col(options: Dict[str, Any]) -> Column:
 # Normal Functions
 
 
+@_with_origin
 def col(col: str) -> Column:
     from pyspark.sql.connect.column import Column as ConnectColumn
 
@@ -262,25 +265,18 @@ def lit(col: Any) -> Column:
     elif isinstance(col, list):
         if any(isinstance(c, Column) for c in col):
             raise PySparkValueError(
-                error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
+                errorClass="COLUMN_IN_LIST", messageParameters={"func_name": "lit"}
             )
         return array(*[lit(c) for c in col])
     elif isinstance(col, np.ndarray) and col.ndim == 1:
-        if _from_numpy_type(col.dtype) is None:
+        dt = _from_numpy_type(col.dtype)
+        if dt is None:
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
-                message_parameters={"dtype": col.dtype.name},
+                errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+                messageParameters={"dtype": col.dtype.name},
             )
-
-        # NumpyArrayConverter for Py4J can not support ndarray with int8 values.
-        # Actually this is not a problem for Connect, but here still convert it
-        # to int16 for compatibility.
-        if col.dtype == np.int8:
-            col = col.astype(np.int16)
-
-        return array(*[lit(c) for c in col])
-    else:
-        return ConnectColumn(LiteralExpression._from_value(col))
+        return array(*[lit(c) for c in col]).cast(ArrayType(dt))
+    return ConnectColumn(LiteralExpression._from_value(col))
 
 
 lit.__doc__ = pysparkfuncs.lit.__doc__
@@ -327,8 +323,8 @@ def broadcast(df: "ParentDataFrame") -> "ParentDataFrame":
 
     if not isinstance(df, DataFrame):
         raise PySparkTypeError(
-            error_class="NOT_DATAFRAME",
-            message_parameters={"arg_name": "df", "arg_type": type(df).__name__},
+            errorClass="NOT_DATAFRAME",
+            messageParameters={"arg_name": "df", "arg_type": type(df).__name__},
         )
     return df.hint("broadcast")
 
@@ -355,8 +351,8 @@ def expr(str: str) -> Column:
 def greatest(*cols: "ColumnOrName") -> Column:
     if len(cols) < 2:
         raise PySparkValueError(
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "greatest", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "greatest", "num_cols": "2"},
         )
     return _invoke_function_over_columns("greatest", *cols)
 
@@ -374,8 +370,8 @@ def input_file_name() -> Column:
 def least(*cols: "ColumnOrName") -> Column:
     if len(cols) < 2:
         raise PySparkValueError(
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "least", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "least", "num_cols": "2"},
         )
     return _invoke_function_over_columns("least", *cols)
 
@@ -444,10 +440,11 @@ def when(condition: Column, value: Any) -> Column:
     # Explicitly not using ColumnOrName type here to make reading condition less opaque
     if not isinstance(condition, Column):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
         )
 
+    value = _enum_to_value(value)
     value_col = value if isinstance(value, Column) else lit(value)
 
     return ConnectColumn(
@@ -576,8 +573,9 @@ def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> C
     if scale is None:
         return _invoke_function_over_columns("bround", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("bround", col, scale)
+        return _invoke_function_over_columns("bround", col, scale)  # type: ignore[arg-type]
 
 
 bround.__doc__ = pysparkfuncs.bround.__doc__
@@ -594,8 +592,9 @@ def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Col
     if scale is None:
         return _invoke_function_over_columns("ceil", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("ceil", col, scale)
+        return _invoke_function_over_columns("ceil", col, scale)  # type: ignore[arg-type]
 
 
 ceil.__doc__ = pysparkfuncs.ceil.__doc__
@@ -605,8 +604,9 @@ def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) ->
     if scale is None:
         return _invoke_function_over_columns("ceiling", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("ceiling", col, scale)
+        return _invoke_function_over_columns("ceiling", col, scale)  # type: ignore[arg-type]
 
 
 ceiling.__doc__ = pysparkfuncs.ceiling.__doc__
@@ -686,8 +686,9 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     if scale is None:
         return _invoke_function_over_columns("floor", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("floor", col, scale)
+        return _invoke_function_over_columns("floor", col, scale)  # type: ignore[arg-type]
 
 
 floor.__doc__ = pysparkfuncs.floor.__doc__
@@ -784,6 +785,7 @@ def width_bucket(
     max: "ColumnOrName",
     numBucket: Union["ColumnOrName", int],
 ) -> Column:
+    numBucket = _enum_to_value(numBucket)
     numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
     return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
 
@@ -819,8 +821,9 @@ def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     if scale is None:
         return _invoke_function_over_columns("round", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("round", col, scale)
+        return _invoke_function_over_columns("round", col, scale)  # type: ignore[arg-type]
 
 
 round.__doc__ = pysparkfuncs.round.__doc__
@@ -999,6 +1002,22 @@ def unhex(col: "ColumnOrName") -> Column:
 unhex.__doc__ = pysparkfuncs.unhex.__doc__
 
 
+def uniform(
+    min: Union[Column, int, float],
+    max: Union[Column, int, float],
+    seed: Optional[Union[Column, int]] = None,
+) -> Column:
+    if seed is None:
+        return _invoke_function_over_columns(
+            "uniform", lit(min), lit(max), lit(random.randint(0, sys.maxsize))
+        )
+    else:
+        return _invoke_function_over_columns("uniform", lit(min), lit(max), lit(seed))
+
+
+uniform.__doc__ = pysparkfuncs.uniform.__doc__
+
+
 def approxCountDistinct(col: "ColumnOrName", rsd: Optional[float] = None) -> Column:
     warnings.warn("Deprecated in 3.4, use approx_count_distinct instead.", FutureWarning)
     return approx_count_distinct(col, rsd)
@@ -1117,11 +1136,12 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
 
 def count_min_sketch(
     col: "ColumnOrName",
-    eps: "ColumnOrName",
-    confidence: "ColumnOrName",
-    seed: "ColumnOrName",
+    eps: Union[Column, float],
+    confidence: Union[Column, float],
+    seed: Optional[Union[Column, int]] = None,
 ) -> Column:
-    return _invoke_function_over_columns("count_min_sketch", col, eps, confidence, seed)
+    _seed = lit(random.randint(0, sys.maxsize)) if seed is None else lit(seed)
+    return _invoke_function_over_columns("count_min_sketch", col, lit(eps), lit(confidence), _seed)
 
 
 count_min_sketch.__doc__ = pysparkfuncs.count_min_sketch.__doc__
@@ -1192,13 +1212,13 @@ def mode(col: "ColumnOrName", deterministic: bool = False) -> Column:
 
 def percentile(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
     frequency: Union[Column, int] = 1,
 ) -> Column:
     if not isinstance(frequency, (int, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={
                 "arg_name": "frequency",
                 "arg_type": type(frequency).__name__,
             },
@@ -1213,8 +1233,8 @@ def percentile(
 
 def percentile_approx(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
-    accuracy: Union[Column, float] = 10000,
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
+    accuracy: Union[Column, int] = 10000,
 ) -> Column:
     percentage = lit(list(percentage)) if isinstance(percentage, (list, tuple)) else lit(percentage)
     return _invoke_function_over_columns("percentile_approx", col, percentage, lit(accuracy))
@@ -1225,8 +1245,8 @@ def percentile_approx(
 
 def approx_percentile(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
-    accuracy: Union[Column, float] = 10000,
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
+    accuracy: Union[Column, int] = 10000,
 ) -> Column:
     percentage = lit(list(percentage)) if isinstance(percentage, (list, tuple)) else lit(percentage)
     return _invoke_function_over_columns("approx_percentile", col, percentage, lit(accuracy))
@@ -1473,7 +1493,7 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
 lead.__doc__ = pysparkfuncs.lead.__doc__
 
 
-def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = None) -> Column:
+def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = False) -> Column:
     if ignoreNulls is None:
         return _invoke_function("nth_value", _to_col(col), lit(offset))
     else:
@@ -1487,8 +1507,11 @@ def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
     if ignoreNulls is None:
         return _invoke_function_over_columns("any_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("any_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "any_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 any_value.__doc__ = pysparkfuncs.any_value.__doc__
@@ -1498,8 +1521,11 @@ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]]
     if ignoreNulls is None:
         return _invoke_function_over_columns("first_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("first_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "first_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 first_value.__doc__ = pysparkfuncs.first_value.__doc__
@@ -1509,8 +1535,11 @@ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
     if ignoreNulls is None:
         return _invoke_function_over_columns("last_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("last_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "last_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 last_value.__doc__ = pysparkfuncs.last_value.__doc__
@@ -1523,7 +1552,7 @@ def count_if(col: "ColumnOrName") -> Column:
 count_if.__doc__ = pysparkfuncs.count_if.__doc__
 
 
-def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
+def histogram_numeric(col: "ColumnOrName", nBins: Column) -> Column:
     return _invoke_function_over_columns("histogram_numeric", col, nBins)
 
 
@@ -1590,7 +1619,9 @@ def reduce(
 reduce.__doc__ = pysparkfuncs.reduce.__doc__
 
 
-def array(*cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
+def array(
+    *cols: Union["ColumnOrName", Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]
+) -> Column:
     if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
         cols = cols[0]  # type: ignore[assignment]
     return _invoke_function_over_columns("array", *cols)  # type: ignore[arg-type]
@@ -1628,6 +1659,7 @@ def array_except(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
 
 def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: Any) -> Column:
+    pos = _enum_to_value(pos)
     _pos = lit(pos) if isinstance(pos, int) else _to_col(pos)
     return _invoke_function("array_insert", _to_col(arr), _pos, lit(value))
 
@@ -1711,6 +1743,7 @@ def array_remove(col: "ColumnOrName", element: Any) -> Column:
 
 
 def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Column:
+    count = _enum_to_value(count)
     _count = lit(count) if isinstance(count, int) else _to_col(count)
     return _invoke_function("array_repeat", _to_col(col), _count)
 
@@ -1759,7 +1792,7 @@ def concat(*cols: "ColumnOrName") -> Column:
 
 
 def create_map(
-    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+    *cols: Union["ColumnOrName", Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]
 ) -> Column:
     if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
         cols = cols[0]  # type: ignore[assignment]
@@ -1831,7 +1864,7 @@ def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
 def from_csv(
     col: "ColumnOrName",
     schema: Union[Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     if isinstance(schema, Column):
         _schema = schema
@@ -1839,8 +1872,8 @@ def from_csv(
         _schema = lit(schema)
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
         )
 
     if options is None:
@@ -1855,7 +1888,7 @@ def from_csv(
 def from_json(
     col: "ColumnOrName",
     schema: Union[ArrayType, StructType, Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     if isinstance(schema, (str, Column)):
         _schema = lit(schema)
@@ -1863,8 +1896,8 @@ def from_json(
         _schema = lit(schema.json())
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+            errorClass="NOT_COLUMN_OR_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
         )
 
     if options is None:
@@ -1879,7 +1912,7 @@ def from_json(
 def from_xml(
     col: "ColumnOrName",
     schema: Union[StructType, Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     if isinstance(schema, (str, Column)):
         _schema = lit(schema)
@@ -1887,8 +1920,8 @@ def from_xml(
         _schema = lit(schema.json())
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+            errorClass="NOT_COLUMN_OR_STR_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
         )
 
     if options is None:
@@ -1901,6 +1934,7 @@ def from_xml(
 
 
 def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
+    index = _enum_to_value(index)
     index = lit(index) if isinstance(index, int) else index
 
     return _invoke_function_over_columns("get", col, index)
@@ -1947,8 +1981,8 @@ def inline_outer(col: "ColumnOrName") -> Column:
 def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
     if len(fields) == 0:
         raise PySparkValueError(
-            error_class="CANNOT_BE_EMPTY",
-            message_parameters={"item": "field"},
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "field"},
         )
     return _invoke_function("json_tuple", _to_col(col), *[lit(field) for field in fields])
 
@@ -1957,7 +1991,7 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
 
 
 def map_concat(
-    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+    *cols: Union["ColumnOrName", Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]
 ) -> Column:
     if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
         cols = cols[0]  # type: ignore[assignment]
@@ -2048,6 +2082,13 @@ def try_parse_json(col: "ColumnOrName") -> Column:
 try_parse_json.__doc__ = pysparkfuncs.try_parse_json.__doc__
 
 
+def to_variant_object(col: "ColumnOrName") -> Column:
+    return _invoke_function("to_variant_object", _to_col(col))
+
+
+to_variant_object.__doc__ = pysparkfuncs.to_variant_object.__doc__
+
+
 def parse_json(col: "ColumnOrName") -> Column:
     return _invoke_function("parse_json", _to_col(col))
 
@@ -2123,11 +2164,12 @@ def sequence(
 sequence.__doc__ = pysparkfuncs.sequence.__doc__
 
 
-def schema_of_csv(csv: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_csv(csv: Union[str, Column], options: Optional[Mapping[str, str]] = None) -> Column:
+    csv = _enum_to_value(csv)
     if not isinstance(csv, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "csv", "arg_type": type(csv).__name__},
         )
 
     if options is None:
@@ -2139,11 +2181,12 @@ def schema_of_csv(csv: Union[str, Column], options: Optional[Dict[str, str]] = N
 schema_of_csv.__doc__ = pysparkfuncs.schema_of_csv.__doc__
 
 
-def schema_of_json(json: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_json(json: Union[str, Column], options: Optional[Mapping[str, str]] = None) -> Column:
+    json = _enum_to_value(json)
     if not isinstance(json, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "json", "arg_type": type(json).__name__},
         )
 
     if options is None:
@@ -2155,11 +2198,12 @@ def schema_of_json(json: Union[str, Column], options: Optional[Dict[str, str]] =
 schema_of_json.__doc__ = pysparkfuncs.schema_of_json.__doc__
 
 
-def schema_of_xml(xml: Union[str, Column], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_xml(xml: Union[str, Column], options: Optional[Mapping[str, str]] = None) -> Column:
+    xml = _enum_to_value(xml)
     if not isinstance(xml, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "xml", "arg_type": type(xml).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "xml", "arg_type": type(xml).__name__},
         )
 
     if options is None:
@@ -2171,12 +2215,9 @@ def schema_of_xml(xml: Union[str, Column], options: Optional[Dict[str, str]] = N
 schema_of_xml.__doc__ = pysparkfuncs.schema_of_xml.__doc__
 
 
-def shuffle(col: "ColumnOrName") -> Column:
-    return _invoke_function(
-        "shuffle",
-        _to_col(col),
-        LiteralExpression(random.randint(0, sys.maxsize), LongType()),
-    )
+def shuffle(col: "ColumnOrName", seed: Optional[Union[Column, int]] = None) -> Column:
+    _seed = lit(random.randint(0, sys.maxsize)) if seed is None else lit(seed)
+    return _invoke_function("shuffle", _to_col(col), _seed)
 
 
 shuffle.__doc__ = pysparkfuncs.shuffle.__doc__
@@ -2190,29 +2231,31 @@ def size(col: "ColumnOrName") -> Column:
 
 
 def slice(
-    col: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
+    x: "ColumnOrName", start: Union["ColumnOrName", int], length: Union["ColumnOrName", int]
 ) -> Column:
+    start = _enum_to_value(start)
     if isinstance(start, (Column, str)):
         _start = start
     elif isinstance(start, int):
         _start = lit(start)
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "start", "arg_type": type(start).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "start", "arg_type": type(start).__name__},
         )
 
+    length = _enum_to_value(length)
     if isinstance(length, (Column, str)):
         _length = length
     elif isinstance(length, int):
         _length = lit(length)
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "length", "arg_type": type(length).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "length", "arg_type": type(length).__name__},
         )
 
-    return _invoke_function_over_columns("slice", col, _start, _length)
+    return _invoke_function_over_columns("slice", x, _start, _length)
 
 
 slice.__doc__ = pysparkfuncs.slice.__doc__
@@ -2226,7 +2269,7 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
 
 
 def struct(
-    *cols: Union["ColumnOrName", List["ColumnOrName"], Tuple["ColumnOrName", ...]]
+    *cols: Union["ColumnOrName", Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]
 ) -> Column:
     if len(cols) == 1 and isinstance(cols[0], (list, set, tuple)):
         cols = cols[0]  # type: ignore[assignment]
@@ -2243,7 +2286,7 @@ def named_struct(*cols: "ColumnOrName") -> Column:
 named_struct.__doc__ = pysparkfuncs.named_struct.__doc__
 
 
-def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_csv(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     if options is None:
         return _invoke_function("to_csv", _to_col(col))
     else:
@@ -2253,7 +2296,7 @@ def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
 to_csv.__doc__ = pysparkfuncs.to_csv.__doc__
 
 
-def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     if options is None:
         return _invoke_function("to_json", _to_col(col))
     else:
@@ -2263,7 +2306,7 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
 to_json.__doc__ = pysparkfuncs.to_json.__doc__
 
 
-def to_xml(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_xml(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     if options is None:
         return _invoke_function("to_xml", _to_col(col))
     else:
@@ -2346,22 +2389,31 @@ def unbase64(col: "ColumnOrName") -> Column:
 unbase64.__doc__ = pysparkfuncs.unbase64.__doc__
 
 
-def ltrim(col: "ColumnOrName") -> Column:
-    return _invoke_function_over_columns("ltrim", col)
+def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
+    if trim is not None:
+        return _invoke_function_over_columns("ltrim", trim, col)
+    else:
+        return _invoke_function_over_columns("ltrim", col)
 
 
 ltrim.__doc__ = pysparkfuncs.ltrim.__doc__
 
 
-def rtrim(col: "ColumnOrName") -> Column:
-    return _invoke_function_over_columns("rtrim", col)
+def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
+    if trim is not None:
+        return _invoke_function_over_columns("rtrim", trim, col)
+    else:
+        return _invoke_function_over_columns("rtrim", col)
 
 
 rtrim.__doc__ = pysparkfuncs.rtrim.__doc__
 
 
-def trim(col: "ColumnOrName") -> Column:
-    return _invoke_function_over_columns("trim", col)
+def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
+    if trim is not None:
+        return _invoke_function_over_columns("trim", trim, col)
+    else:
+        return _invoke_function_over_columns("trim", col)
 
 
 trim.__doc__ = pysparkfuncs.trim.__doc__
@@ -2388,6 +2440,34 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
 encode.__doc__ = pysparkfuncs.encode.__doc__
 
 
+def is_valid_utf8(str: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("is_valid_utf8", _to_col(str))
+
+
+is_valid_utf8.__doc__ = pysparkfuncs.is_valid_utf8.__doc__
+
+
+def make_valid_utf8(str: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("make_valid_utf8", _to_col(str))
+
+
+make_valid_utf8.__doc__ = pysparkfuncs.make_valid_utf8.__doc__
+
+
+def validate_utf8(str: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("validate_utf8", _to_col(str))
+
+
+validate_utf8.__doc__ = pysparkfuncs.validate_utf8.__doc__
+
+
+def try_validate_utf8(str: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("try_validate_utf8", _to_col(str))
+
+
+try_validate_utf8.__doc__ = pysparkfuncs.try_validate_utf8.__doc__
+
+
 def format_number(col: "ColumnOrName", d: int) -> Column:
     return _invoke_function("format_number", _to_col(col), lit(d))
 
@@ -2402,7 +2482,7 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
 format_string.__doc__ = pysparkfuncs.format_string.__doc__
 
 
-def instr(str: "ColumnOrName", substr: str) -> Column:
+def instr(str: "ColumnOrName", substr: Union[Column, str]) -> Column:
     return _invoke_function("instr", _to_col(str), lit(substr))
 
 
@@ -2415,15 +2495,17 @@ def overlay(
     pos: Union["ColumnOrName", int],
     len: Union["ColumnOrName", int] = -1,
 ) -> Column:
+    pos = _enum_to_value(pos)
     if not isinstance(pos, (int, str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "pos", "arg_type": type(pos).__name__},
         )
+    len = _enum_to_value(len)
     if len is not None and not isinstance(len, (int, str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "len", "arg_type": type(len).__name__},
         )
 
     if isinstance(pos, int):
@@ -2451,8 +2533,14 @@ def sentences(
 sentences.__doc__ = pysparkfuncs.sentences.__doc__
 
 
-def substring(str: "ColumnOrName", pos: int, len: int) -> Column:
-    return _invoke_function("substring", _to_col(str), lit(pos), lit(len))
+def substring(
+    str: "ColumnOrName",
+    pos: Union["ColumnOrName", int],
+    len: Union["ColumnOrName", int],
+) -> Column:
+    _pos = lit(pos) if isinstance(pos, int) else _to_col(pos)
+    _len = lit(len) if isinstance(len, int) else _to_col(len)
+    return _invoke_function("substring", _to_col(str), _pos, _len)
 
 
 substring.__doc__ = pysparkfuncs.substring.__doc__
@@ -2484,21 +2572,30 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
 locate.__doc__ = pysparkfuncs.locate.__doc__
 
 
-def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
-    return _invoke_function("lpad", _to_col(col), lit(len), lit(pad))
+def lpad(
+    col: "ColumnOrName",
+    len: Union[Column, int],
+    pad: Union[Column, str],
+) -> Column:
+    return _invoke_function_over_columns("lpad", col, lit(len), lit(pad))
 
 
 lpad.__doc__ = pysparkfuncs.lpad.__doc__
 
 
-def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
-    return _invoke_function("rpad", _to_col(col), lit(len), lit(pad))
+def rpad(
+    col: "ColumnOrName",
+    len: Union[Column, int],
+    pad: Union[Column, str],
+) -> Column:
+    return _invoke_function_over_columns("rpad", col, lit(len), lit(pad))
 
 
 rpad.__doc__ = pysparkfuncs.rpad.__doc__
 
 
 def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
+    n = _enum_to_value(n)
     n = lit(n) if isinstance(n, int) else n
     return _invoke_function("repeat", _to_col(col), _to_col(n))
 
@@ -2511,6 +2608,7 @@ def split(
     pattern: Union[Column, str],
     limit: Union["ColumnOrName", int] = -1,
 ) -> Column:
+    limit = _enum_to_value(limit)
     limit = lit(limit) if isinstance(limit, int) else _to_col(limit)
     return _invoke_function("split", _to_col(str), lit(pattern), limit)
 
@@ -2539,6 +2637,18 @@ def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
 regexp_like.__doc__ = pysparkfuncs.regexp_like.__doc__
 
 
+def randstr(length: Union[Column, int], seed: Optional[Union[Column, int]] = None) -> Column:
+    if seed is None:
+        return _invoke_function_over_columns(
+            "randstr", lit(length), lit(random.randint(0, sys.maxsize))
+        )
+    else:
+        return _invoke_function_over_columns("randstr", lit(length), lit(seed))
+
+
+randstr.__doc__ = pysparkfuncs.randstr.__doc__
+
+
 def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("regexp_count", str, regexp)
 
@@ -2709,6 +2819,18 @@ def parse_url(
 parse_url.__doc__ = pysparkfuncs.parse_url.__doc__
 
 
+def try_parse_url(
+    url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
+) -> Column:
+    if key is not None:
+        return _invoke_function_over_columns("try_parse_url", url, partToExtract, key)
+    else:
+        return _invoke_function_over_columns("try_parse_url", url, partToExtract)
+
+
+try_parse_url.__doc__ = pysparkfuncs.try_parse_url.__doc__
+
+
 def printf(format: "ColumnOrName", *cols: "ColumnOrName") -> Column:
     return _invoke_function("printf", _to_col(format), *[_to_col(c) for c in cols])
 
@@ -3068,21 +3190,21 @@ def dayname(col: "ColumnOrName") -> Column:
 dayname.__doc__ = pysparkfuncs.dayname.__doc__
 
 
-def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def extract(field: Column, source: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("extract", field, source)
 
 
 extract.__doc__ = pysparkfuncs.extract.__doc__
 
 
-def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def date_part(field: Column, source: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("date_part", field, source)
 
 
 extract.__doc__ = pysparkfuncs.extract.__doc__
 
 
-def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def datepart(field: Column, source: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("datepart", field, source)
 
 
@@ -3097,6 +3219,7 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
 
 
 def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_add", start, days)
 
@@ -3105,6 +3228,7 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 
 
 def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("dateadd", start, days)
 
@@ -3113,6 +3237,7 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 
 
 def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_sub", start, days)
 
@@ -3142,6 +3267,7 @@ def date_from_unix_date(days: "ColumnOrName") -> Column:
 
 
 def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Column:
+    months = _enum_to_value(months)
     months = lit(months) if isinstance(months, int) else months
     return _invoke_function_over_columns("add_months", start, months)
 
@@ -3400,24 +3526,24 @@ def window(
 ) -> Column:
     if windowDuration is None or not isinstance(windowDuration, str):
         raise PySparkTypeError(
-            error_class="NOT_STR",
-            message_parameters={
+            errorClass="NOT_STR",
+            messageParameters={
                 "arg_name": "windowDuration",
                 "arg_type": type(windowDuration).__name__,
             },
         )
     if slideDuration is not None and not isinstance(slideDuration, str):
         raise PySparkTypeError(
-            error_class="NOT_STR",
-            message_parameters={
+            errorClass="NOT_STR",
+            messageParameters={
                 "arg_name": "slideDuration",
                 "arg_type": type(slideDuration).__name__,
             },
         )
     if startTime is not None and not isinstance(startTime, str):
         raise PySparkTypeError(
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "startTime", "arg_type": type(startTime).__name__},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "startTime", "arg_type": type(startTime).__name__},
         )
 
     time_col = _to_col(timeColumn)
@@ -3449,10 +3575,11 @@ def window_time(
 
 
 def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str]) -> Column:
+    gapDuration = _enum_to_value(gapDuration)
     if gapDuration is None or not isinstance(gapDuration, (Column, str)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "gapDuration", "arg_type": type(gapDuration).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "gapDuration", "arg_type": type(gapDuration).__name__},
         )
 
     time_col = _to_col(timeColumn)
@@ -3587,6 +3714,31 @@ def make_dt_interval(
 make_dt_interval.__doc__ = pysparkfuncs.make_dt_interval.__doc__
 
 
+def try_make_interval(
+    years: Optional["ColumnOrName"] = None,
+    months: Optional["ColumnOrName"] = None,
+    weeks: Optional["ColumnOrName"] = None,
+    days: Optional["ColumnOrName"] = None,
+    hours: Optional["ColumnOrName"] = None,
+    mins: Optional["ColumnOrName"] = None,
+    secs: Optional["ColumnOrName"] = None,
+) -> Column:
+    _years = lit(0) if years is None else _to_col(years)
+    _months = lit(0) if months is None else _to_col(months)
+    _weeks = lit(0) if weeks is None else _to_col(weeks)
+    _days = lit(0) if days is None else _to_col(days)
+    _hours = lit(0) if hours is None else _to_col(hours)
+    _mins = lit(0) if mins is None else _to_col(mins)
+    _secs = lit(decimal.Decimal(0)) if secs is None else _to_col(secs)
+
+    return _invoke_function_over_columns(
+        "try_make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
+    )
+
+
+try_make_interval.__doc__ = pysparkfuncs.try_make_interval.__doc__
+
+
 def make_interval(
     years: Optional["ColumnOrName"] = None,
     months: Optional["ColumnOrName"] = None,
@@ -3634,6 +3786,28 @@ def make_timestamp(
 make_timestamp.__doc__ = pysparkfuncs.make_timestamp.__doc__
 
 
+def try_make_timestamp(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: Optional["ColumnOrName"] = None,
+) -> Column:
+    if timezone is not None:
+        return _invoke_function_over_columns(
+            "try_make_timestamp", years, months, days, hours, mins, secs, timezone
+        )
+    else:
+        return _invoke_function_over_columns(
+            "try_make_timestamp", years, months, days, hours, mins, secs
+        )
+
+
+try_make_timestamp.__doc__ = pysparkfuncs.try_make_timestamp.__doc__
+
+
 def make_timestamp_ltz(
     years: "ColumnOrName",
     months: "ColumnOrName",
@@ -3656,6 +3830,28 @@ def make_timestamp_ltz(
 make_timestamp_ltz.__doc__ = pysparkfuncs.make_timestamp_ltz.__doc__
 
 
+def try_make_timestamp_ltz(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: Optional["ColumnOrName"] = None,
+) -> Column:
+    if timezone is not None:
+        return _invoke_function_over_columns(
+            "try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone
+        )
+    else:
+        return _invoke_function_over_columns(
+            "try_make_timestamp_ltz", years, months, days, hours, mins, secs
+        )
+
+
+try_make_timestamp_ltz.__doc__ = pysparkfuncs.try_make_timestamp_ltz.__doc__
+
+
 def make_timestamp_ntz(
     years: "ColumnOrName",
     months: "ColumnOrName",
@@ -3672,6 +3868,22 @@ def make_timestamp_ntz(
 make_timestamp_ntz.__doc__ = pysparkfuncs.make_timestamp_ntz.__doc__
 
 
+def try_make_timestamp_ntz(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+) -> Column:
+    return _invoke_function_over_columns(
+        "try_make_timestamp_ntz", years, months, days, hours, mins, secs
+    )
+
+
+try_make_timestamp_ntz.__doc__ = pysparkfuncs.try_make_timestamp_ntz.__doc__
+
+
 def make_ym_interval(
     years: Optional["ColumnOrName"] = None,
     months: Optional["ColumnOrName"] = None,
@@ -3729,12 +3941,13 @@ def session_user() -> Column:
 
 
 def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None) -> Column:
+    errMsg = _enum_to_value(errMsg)
     if errMsg is None:
         return _invoke_function_over_columns("assert_true", col)
     if not isinstance(errMsg, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
         )
     return _invoke_function_over_columns("assert_true", col, lit(errMsg))
 
@@ -3743,10 +3956,11 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
 
 
 def raise_error(errMsg: Union[Column, str]) -> Column:
+    errMsg = _enum_to_value(errMsg)
     if not isinstance(errMsg, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
         )
     return _invoke_function_over_columns("raise_error", lit(errMsg))
 
@@ -3792,8 +4006,8 @@ def sha1(col: "ColumnOrName") -> Column:
 def sha2(col: "ColumnOrName", numBits: int) -> Column:
     if numBits not in [0, 224, 256, 384, 512]:
         raise PySparkValueError(
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "numBits",
                 "allowed_values": "[0, 224, 256, 384, 512]",
             },
@@ -3882,6 +4096,13 @@ def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 nullif.__doc__ = pysparkfuncs.nullif.__doc__
 
 
+def nullifzero(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("nullifzero", col)
+
+
+nullifzero.__doc__ = pysparkfuncs.nullifzero.__doc__
+
+
 def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("nvl", col1, col2)
 
@@ -3896,6 +4117,13 @@ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Co
 nvl2.__doc__ = pysparkfuncs.nvl2.__doc__
 
 
+def zeroifnull(col: "ColumnOrName") -> Column:
+    return _invoke_function_over_columns("zeroifnull", col)
+
+
+zeroifnull.__doc__ = pysparkfuncs.zeroifnull.__doc__
+
+
 def aes_encrypt(
     input: "ColumnOrName",
     key: "ColumnOrName",
@@ -4067,6 +4295,7 @@ def unwrap_udt(col: "ColumnOrName") -> Column:
 def udf(
     f: Optional[Union[Callable[..., Any], "DataTypeOrString"]] = None,
     returnType: "DataTypeOrString" = StringType(),
+    *,
     useArrow: Optional[bool] = None,
 ) -> Union["UserDefinedFunctionLike", Callable[[Callable[..., Any]], "UserDefinedFunctionLike"]]:
     if f is None or isinstance(f, (str, DataType)):
diff --git a/python/pyspark/sql/connect/functions/partitioning.py b/python/pyspark/sql/connect/functions/partitioning.py
index 5d2dd58313bb6..70f9aed8c2132 100644
--- a/python/pyspark/sql/connect/functions/partitioning.py
+++ b/python/pyspark/sql/connect/functions/partitioning.py
@@ -38,8 +38,8 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
         _numBuckets = numBuckets
     else:
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={
                 "arg_name": "numBuckets",
                 "arg_type": type(numBuckets).__name__,
             },
diff --git a/python/pyspark/sql/connect/group.py b/python/pyspark/sql/connect/group.py
index 85806b1a265b0..863461da10ec9 100644
--- a/python/pyspark/sql/connect/group.py
+++ b/python/pyspark/sql/connect/group.py
@@ -101,7 +101,7 @@ def __init__(
 
     def __repr__(self) -> str:
         # the expressions are not resolved here,
-        # so the string representation can be different from vanilla PySpark.
+        # so the string representation can be different from classic PySpark.
         grouping_str = ", ".join(str(e._expr) for e in self._grouping_cols)
         grouping_str = f"grouping expressions: [{grouping_str}]"
 
@@ -172,8 +172,8 @@ def _numeric_agg(self, function: str, cols: Sequence[str]) -> "DataFrame":
             invalid_cols = [c for c in cols if c not in numerical_cols]
             if len(invalid_cols) > 0:
                 raise PySparkTypeError(
-                    error_class="NOT_NUMERIC_COLUMNS",
-                    message_parameters={"invalid_columns": str(invalid_cols)},
+                    errorClass="NOT_NUMERIC_COLUMNS",
+                    messageParameters={"invalid_columns": str(invalid_cols)},
                 )
             agg_cols = cols
         else:
@@ -193,29 +193,29 @@ def _numeric_agg(self, function: str, cols: Sequence[str]) -> "DataFrame":
             session=self._df._session,
         )
 
-    def min(self, *cols: str) -> "DataFrame":
+    def min(self: "GroupedData", *cols: str) -> "DataFrame":
         return self._numeric_agg("min", list(cols))
 
     min.__doc__ = PySparkGroupedData.min.__doc__
 
-    def max(self, *cols: str) -> "DataFrame":
+    def max(self: "GroupedData", *cols: str) -> "DataFrame":
         return self._numeric_agg("max", list(cols))
 
     max.__doc__ = PySparkGroupedData.max.__doc__
 
-    def sum(self, *cols: str) -> "DataFrame":
+    def sum(self: "GroupedData", *cols: str) -> "DataFrame":
         return self._numeric_agg("sum", list(cols))
 
     sum.__doc__ = PySparkGroupedData.sum.__doc__
 
-    def avg(self, *cols: str) -> "DataFrame":
+    def avg(self: "GroupedData", *cols: str) -> "DataFrame":
         return self._numeric_agg("avg", list(cols))
 
     avg.__doc__ = PySparkGroupedData.avg.__doc__
 
     mean = avg
 
-    def count(self) -> "DataFrame":
+    def count(self: "GroupedData") -> "DataFrame":
         return self.agg(F._invoke_function("count", F.lit(1)).alias("count"))
 
     count.__doc__ = PySparkGroupedData.count.__doc__
@@ -224,32 +224,32 @@ def pivot(self, pivot_col: str, values: Optional[List["LiteralType"]] = None) ->
         if self._group_type != "groupby":
             if self._group_type == "pivot":
                 raise PySparkNotImplementedError(
-                    error_class="UNSUPPORTED_OPERATION",
-                    message_parameters={"operation": "Repeated PIVOT operation"},
+                    errorClass="UNSUPPORTED_OPERATION",
+                    messageParameters={"operation": "Repeated PIVOT operation"},
                 )
             else:
                 raise PySparkNotImplementedError(
-                    error_class="UNSUPPORTED_OPERATION",
-                    message_parameters={"operation": f"PIVOT after {self._group_type.upper()}"},
+                    errorClass="UNSUPPORTED_OPERATION",
+                    messageParameters={"operation": f"PIVOT after {self._group_type.upper()}"},
                 )
 
         if not isinstance(pivot_col, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "pivot_col", "arg_type": type(pivot_col).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "pivot_col", "arg_type": type(pivot_col).__name__},
             )
 
         if values is not None:
             if not isinstance(values, list):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST",
-                    message_parameters={"arg_name": "values", "arg_type": type(values).__name__},
+                    errorClass="NOT_LIST",
+                    messageParameters={"arg_name": "values", "arg_type": type(values).__name__},
                 )
             for v in values:
                 if not isinstance(v, (bool, float, int, str)):
                     raise PySparkTypeError(
-                        error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
-                        message_parameters={"arg_name": "value", "arg_type": type(v).__name__},
+                        errorClass="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
+                        messageParameters={"arg_name": "value", "arg_type": type(v).__name__},
                     )
 
         return GroupedData(
@@ -273,8 +273,8 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> "DataFrame":
             )
         ):
             raise PySparkTypeError(
-                error_class="INVALID_UDF_EVAL_TYPE",
-                message_parameters={"eval_type": "SQL_GROUPED_MAP_PANDAS_UDF"},
+                errorClass="INVALID_UDF_EVAL_TYPE",
+                messageParameters={"eval_type": "SQL_GROUPED_MAP_PANDAS_UDF"},
             )
 
         warnings.warn(
diff --git a/python/pyspark/sql/connect/client/logging.py b/python/pyspark/sql/connect/logging.py
similarity index 71%
rename from python/pyspark/sql/connect/client/logging.py
rename to python/pyspark/sql/connect/logging.py
index 7fdcfaca4cfbd..099193fd7ce45 100644
--- a/python/pyspark/sql/connect/client/logging.py
+++ b/python/pyspark/sql/connect/logging.py
@@ -17,25 +17,32 @@
 
 
 import logging
+from pyspark.logger import PySparkLogger
 import os
 from typing import Optional
 
-__all__ = [
-    "getLogLevel",
-]
+__all__ = ["configureLogging", "getLogLevel"]
 
 
-def _configure_logging() -> logging.Logger:
-    """Configure logging for the Spark Connect clients."""
-    logger = logging.getLogger(__name__)
+def configureLogging(level: Optional[str] = None) -> logging.Logger:
+    """
+    Configure log level for Spark Connect components.
+    When not specified as a parameter, log level will be configured based on
+    the SPARK_CONNECT_LOG_LEVEL environment variable.
+    When both are absent, logging is disabled.
+
+    .. versionadded:: 4.0.0
+    """
+    logger = PySparkLogger.getLogger(__name__)
     handler = logging.StreamHandler()
     handler.setFormatter(
         logging.Formatter(fmt="%(asctime)s %(process)d %(levelname)s %(funcName)s %(message)s")
     )
     logger.addHandler(handler)
 
-    # Check the environment variables for log levels:
-    if "SPARK_CONNECT_LOG_LEVEL" in os.environ:
+    if level is not None:
+        logger.setLevel(level.upper())
+    elif "SPARK_CONNECT_LOG_LEVEL" in os.environ:
         logger.setLevel(os.environ["SPARK_CONNECT_LOG_LEVEL"].upper())
     else:
         logger.disabled = True
@@ -43,7 +50,7 @@ def _configure_logging() -> logging.Logger:
 
 
 # Instantiate the logger based on the environment configuration.
-logger = _configure_logging()
+logger = configureLogging()
 
 
 def getLogLevel() -> Optional[int]:
diff --git a/python/pyspark/sql/connect/merge.py b/python/pyspark/sql/connect/merge.py
index 9c3b3e4370a40..295e6089e092e 100644
--- a/python/pyspark/sql/connect/merge.py
+++ b/python/pyspark/sql/connect/merge.py
@@ -14,6 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
 
 import sys
 from typing import Dict, Optional, TYPE_CHECKING, List, Callable
@@ -235,12 +238,12 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.merge.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.connect.dataframe tests")
+        PySparkSession.builder.appName("sql.connect.merge tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
         .getOrCreate()
     )
     (failure_count, test_count) = doctest.testmod(
-        pyspark.sql.merge,
+        pyspark.sql.connect.merge,
         globs=globs,
         optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE | doctest.REPORT_NDIFF,
     )
diff --git a/python/pyspark/sql/connect/observation.py b/python/pyspark/sql/connect/observation.py
index 2471cf04cfbe7..bfb8a0a9355fe 100644
--- a/python/pyspark/sql/connect/observation.py
+++ b/python/pyspark/sql/connect/observation.py
@@ -14,6 +14,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from pyspark.sql.connect.utils import check_dependencies
+
+check_dependencies(__name__)
+
 from typing import Any, Dict, Optional
 import uuid
 
@@ -37,13 +41,13 @@ def __init__(self, name: Optional[str] = None) -> None:
         if name is not None:
             if not isinstance(name, str):
                 raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                    errorClass="NOT_STR",
+                    messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
                 )
             if name == "":
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={"arg_name": "name", "arg_value": name},
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={"arg_name": "name", "arg_value": name},
                 )
         self._name = name
         self._result: Optional[Dict[str, Any]] = None
@@ -52,15 +56,15 @@ def __init__(self, name: Optional[str] = None) -> None:
 
     def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         if self._result is not None:
-            raise PySparkAssertionError(error_class="REUSE_OBSERVATION", message_parameters={})
+            raise PySparkAssertionError(errorClass="REUSE_OBSERVATION", messageParameters={})
 
         if self._name is None:
             self._name = str(uuid.uuid4())
 
         if df.isStreaming:
             raise IllegalArgumentException(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"operation": "Streaming DataFrame with Observation"},
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={"operation": "Streaming DataFrame with Observation"},
             )
 
         self._result = {}
@@ -71,7 +75,7 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
     @property
     def get(self) -> Dict[str, Any]:
         if self._result is None:
-            raise PySparkAssertionError(error_class="NO_OBSERVE_BEFORE_GET", message_parameters={})
+            raise PySparkAssertionError(errorClass="NO_OBSERVE_BEFORE_GET", messageParameters={})
 
         return self._result
 
diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py
index 19377515ed28c..b387ca1d4e508 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -40,7 +40,6 @@
 import pickle
 from threading import Lock
 from inspect import signature, isclass
-import warnings
 
 import pyarrow as pa
 
@@ -50,6 +49,7 @@
 
 import pyspark.sql.connect.proto as proto
 from pyspark.sql.column import Column
+from pyspark.sql.connect.logging import logger
 from pyspark.sql.connect.proto import base_pb2 as spark_dot_connect_dot_base__pb2
 from pyspark.sql.connect.conversion import storage_level_to_proto
 from pyspark.sql.connect.expressions import Expression
@@ -281,9 +281,13 @@ def __init__(
         assert schema is None or isinstance(schema, str)
 
         if options is not None:
+            new_options = {}
             for k, v in options.items():
-                assert isinstance(k, str)
-                assert isinstance(v, str)
+                if v is not None:
+                    assert isinstance(k, str)
+                    assert isinstance(v, str)
+                    new_options[k] = v
+            options = new_options
 
         if paths is not None:
             assert isinstance(paths, list)
@@ -592,7 +596,7 @@ def __del__(self) -> None:
                         metadata = session.client._builder.metadata()
                         channel(req, metadata=metadata)  # type: ignore[arg-type]
             except Exception as e:
-                warnings.warn(f"RemoveRemoteCachedRelation failed with exception: {e}.")
+                logger.warn(f"RemoveRemoteCachedRelation failed with exception: {e}.")
 
 
 class Hint(LogicalPlan):
@@ -682,7 +686,7 @@ def __init__(
         self,
         child: Optional["LogicalPlan"],
         all_columns_as_keys: bool = False,
-        column_names: Optional[Sequence[str]] = None,
+        column_names: Optional[List[str]] = None,
         within_watermark: bool = False,
     ) -> None:
         super().__init__(child)
@@ -888,8 +892,8 @@ def __init__(
             join_type = proto.Join.JoinType.JOIN_TYPE_CROSS
         else:
             raise AnalysisException(
-                error_class="UNSUPPORTED_JOIN_TYPE",
-                message_parameters={"join_type": how},
+                errorClass="UNSUPPORTED_JOIN_TYPE",
+                messageParameters={"join_type": how},
             )
         self.how = join_type
 
@@ -1056,8 +1060,8 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
             plan.set_op.set_op_type = proto.SetOperation.SET_OP_TYPE_EXCEPT
         else:
             raise PySparkValueError(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"operation": self.set_op},
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={"operation": self.set_op},
             )
 
         plan.set_op.is_all = self.is_all
@@ -1325,6 +1329,41 @@ def plan(self, session: "SparkConnectClient") -> proto.Relation:
         return plan
 
 
+class Transpose(LogicalPlan):
+    """Logical plan object for a transpose operation."""
+
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        index_columns: Sequence[Column],
+    ) -> None:
+        super().__init__(child)
+        self.index_columns = index_columns
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        assert self._child is not None
+        plan = self._create_proto_relation()
+        plan.transpose.input.CopyFrom(self._child.plan(session))
+        if self.index_columns is not None and len(self.index_columns) > 0:
+            for index_column in self.index_columns:
+                plan.transpose.index_columns.append(index_column.to_plan(session))
+        return plan
+
+
+class UnresolvedTableValuedFunction(LogicalPlan):
+    def __init__(self, name: str, args: Sequence[Column]):
+        super().__init__(None)
+        self._name = name
+        self._args = args
+
+    def plan(self, session: "SparkConnectClient") -> proto.Relation:
+        plan = self._create_proto_relation()
+        plan.unresolved_table_valued_function.function_name = self._name
+        for arg in self._args:
+            plan.unresolved_table_valued_function.arguments.append(arg.to_plan(session))
+        return plan
+
+
 class CollectMetrics(LogicalPlan):
     """Logical plan object for a CollectMetrics operation."""
 
@@ -1655,6 +1694,7 @@ def __init__(self, child: "LogicalPlan") -> None:
         self.mode: Optional[str] = None
         self.sort_cols: List[str] = []
         self.partitioning_cols: List[str] = []
+        self.clustering_cols: List[str] = []
         self.options: Dict[str, Optional[str]] = {}
         self.num_buckets: int = -1
         self.bucket_cols: List[str] = []
@@ -1668,6 +1708,7 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
             plan.write_operation.source = self.source
         plan.write_operation.sort_column_names.extend(self.sort_cols)
         plan.write_operation.partitioning_columns.extend(self.partitioning_cols)
+        plan.write_operation.clustering_columns.extend(self.clustering_cols)
 
         if self.num_buckets > 0:
             plan.write_operation.bucket_by.bucket_column_names.extend(self.bucket_cols)
@@ -1693,8 +1734,8 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                     )
                 else:
                     raise PySparkValueError(
-                        error_class="UNSUPPORTED_OPERATION",
-                        message_parameters={"operation": tsm},
+                        errorClass="UNSUPPORTED_OPERATION",
+                        messageParameters={"operation": tsm},
                     )
         elif self.path is not None:
             plan.write_operation.path = self.path
@@ -1711,8 +1752,8 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                 plan.write_operation.mode = proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
             else:
                 raise PySparkValueError(
-                    error_class="UNSUPPORTED_OPERATION",
-                    message_parameters={"operation": self.mode},
+                    errorClass="UNSUPPORTED_OPERATION",
+                    messageParameters={"operation": self.mode},
                 )
         return plan
 
@@ -1727,6 +1768,7 @@ def print(self, indent: int = 0) -> str:
             f"mode='{self.mode}' "
             f"sort_cols='{self.sort_cols}' "
             f"partitioning_cols='{self.partitioning_cols}' "
+            f"clustering_cols='{self.clustering_cols}' "
             f"num_buckets='{self.num_buckets}' "
             f"bucket_cols='{self.bucket_cols}' "
             f"options='{self.options}'>"
@@ -1741,6 +1783,7 @@ def _repr_html_(self) -> str:
             f"mode: '{self.mode}' <br />"
             f"sort_cols: '{self.sort_cols}' <br />"
             f"partitioning_cols: '{self.partitioning_cols}' <br />"
+            f"clustering_cols: '{self.clustering_cols}' <br />"
             f"num_buckets: '{self.num_buckets}' <br />"
             f"bucket_cols: '{self.bucket_cols}' <br />"
             f"options: '{self.options}'<br />"
@@ -1754,6 +1797,7 @@ def __init__(self, child: "LogicalPlan", table_name: str) -> None:
         self.table_name: Optional[str] = table_name
         self.provider: Optional[str] = None
         self.partitioning_columns: List[Column] = []
+        self.clustering_columns: List[str] = []
         self.options: dict[str, Optional[str]] = {}
         self.table_properties: dict[str, Optional[str]] = {}
         self.mode: Optional[str] = None
@@ -1771,6 +1815,7 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
         plan.write_operation_v2.partitioning_columns.extend(
             [c.to_plan(session) for c in self.partitioning_columns]
         )
+        plan.write_operation_v2.clustering_columns.extend(self.clustering_columns)
 
         for k in self.options:
             if self.options[k] is None:
@@ -1804,8 +1849,8 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
                 plan.write_operation_v2.mode = proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE
             else:
                 raise PySparkValueError(
-                    error_class="UNSUPPORTED_OPERATION",
-                    message_parameters={"operation": self.mode},
+                    errorClass="UNSUPPORTED_OPERATION",
+                    messageParameters={"operation": self.mode},
                 )
         return plan
 
@@ -1837,21 +1882,29 @@ def command(self, session: "SparkConnectClient") -> proto.Command:
 
 
 class Checkpoint(LogicalPlan):
-    def __init__(self, child: Optional["LogicalPlan"], local: bool, eager: bool) -> None:
+    def __init__(
+        self,
+        child: Optional["LogicalPlan"],
+        local: bool,
+        eager: bool,
+        storage_level: Optional[StorageLevel] = None,
+    ) -> None:
         super().__init__(child)
         self._local = local
         self._eager = eager
+        self._storage_level = storage_level
 
     def command(self, session: "SparkConnectClient") -> proto.Command:
         cmd = proto.Command()
         assert self._child is not None
-        cmd.checkpoint_command.CopyFrom(
-            proto.CheckpointCommand(
-                relation=self._child.plan(session),
-                local=self._local,
-                eager=self._eager,
-            )
+        checkpoint_command = proto.CheckpointCommand(
+            relation=self._child.plan(session),
+            local=self._local,
+            eager=self._eager,
         )
+        if self._storage_level is not None:
+            checkpoint_command.storage_level.CopyFrom(storage_level_to_proto(self._storage_level))
+        cmd.checkpoint_command.CopyFrom(checkpoint_command)
         return cmd
 
 
@@ -2352,8 +2405,8 @@ def to_plan(self, session: "SparkConnectClient") -> proto.PythonUDTF:
             udtf.command = CloudPickleSerializer().dumps(self._func)
         except pickle.PicklingError:
             raise PySparkPicklingError(
-                error_class="UDTF_SERIALIZATION_ERROR",
-                message_parameters={
+                errorClass="UDTF_SERIALIZATION_ERROR",
+                messageParameters={
                     "name": self._name,
                     "message": "Please check the stack trace and "
                     "make sure the function is serializable.",
diff --git a/python/pyspark/sql/connect/proto/base_pb2.py b/python/pyspark/sql/connect/proto/base_pb2.py
index 5243e55576f8e..620f413f62c00 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.py
+++ b/python/pyspark/sql/connect/proto/base_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/base.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/base.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -40,191 +46,208 @@
     b'\n\x18spark/connect/base.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1cspark/connect/commands.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto\x1a\x19spark/connect/types.proto"t\n\x04Plan\x12-\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationH\x00R\x04root\x12\x32\n\x07\x63ommand\x18\x02 \x01(\x0b\x32\x16.spark.connect.CommandH\x00R\x07\x63ommandB\t\n\x07op_type"z\n\x0bUserContext\x12\x17\n\x07user_id\x18\x01 \x01(\tR\x06userId\x12\x1b\n\tuser_name\x18\x02 \x01(\tR\x08userName\x12\x35\n\nextensions\x18\xe7\x07 \x03(\x0b\x32\x14.google.protobuf.AnyR\nextensions"\xf8\x13\n\x12\x41nalyzePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x11 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12\x42\n\x06schema\x18\x04 \x01(\x0b\x32(.spark.connect.AnalyzePlanRequest.SchemaH\x00R\x06schema\x12\x45\n\x07\x65xplain\x18\x05 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.ExplainH\x00R\x07\x65xplain\x12O\n\x0btree_string\x18\x06 \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.TreeStringH\x00R\ntreeString\x12\x46\n\x08is_local\x18\x07 \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.IsLocalH\x00R\x07isLocal\x12R\n\x0cis_streaming\x18\x08 \x01(\x0b\x32-.spark.connect.AnalyzePlanRequest.IsStreamingH\x00R\x0bisStreaming\x12O\n\x0binput_files\x18\t \x01(\x0b\x32,.spark.connect.AnalyzePlanRequest.InputFilesH\x00R\ninputFiles\x12U\n\rspark_version\x18\n \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SparkVersionH\x00R\x0csparkVersion\x12I\n\tddl_parse\x18\x0b \x01(\x0b\x32*.spark.connect.AnalyzePlanRequest.DDLParseH\x00R\x08\x64\x64lParse\x12X\n\x0esame_semantics\x18\x0c \x01(\x0b\x32/.spark.connect.AnalyzePlanRequest.SameSemanticsH\x00R\rsameSemantics\x12U\n\rsemantic_hash\x18\r \x01(\x0b\x32..spark.connect.AnalyzePlanRequest.SemanticHashH\x00R\x0csemanticHash\x12\x45\n\x07persist\x18\x0e \x01(\x0b\x32).spark.connect.AnalyzePlanRequest.PersistH\x00R\x07persist\x12K\n\tunpersist\x18\x0f \x01(\x0b\x32+.spark.connect.AnalyzePlanRequest.UnpersistH\x00R\tunpersist\x12_\n\x11get_storage_level\x18\x10 \x01(\x0b\x32\x31.spark.connect.AnalyzePlanRequest.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x31\n\x06Schema\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\xbb\x02\n\x07\x45xplain\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12X\n\x0c\x65xplain_mode\x18\x02 \x01(\x0e\x32\x35.spark.connect.AnalyzePlanRequest.Explain.ExplainModeR\x0b\x65xplainMode"\xac\x01\n\x0b\x45xplainMode\x12\x1c\n\x18\x45XPLAIN_MODE_UNSPECIFIED\x10\x00\x12\x17\n\x13\x45XPLAIN_MODE_SIMPLE\x10\x01\x12\x19\n\x15\x45XPLAIN_MODE_EXTENDED\x10\x02\x12\x18\n\x14\x45XPLAIN_MODE_CODEGEN\x10\x03\x12\x15\n\x11\x45XPLAIN_MODE_COST\x10\x04\x12\x1a\n\x16\x45XPLAIN_MODE_FORMATTED\x10\x05\x1aZ\n\nTreeString\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12\x19\n\x05level\x18\x02 \x01(\x05H\x00R\x05level\x88\x01\x01\x42\x08\n\x06_level\x1a\x32\n\x07IsLocal\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x36\n\x0bIsStreaming\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x35\n\nInputFiles\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x0e\n\x0cSparkVersion\x1a)\n\x08\x44\x44LParse\x12\x1d\n\nddl_string\x18\x01 \x01(\tR\tddlString\x1ay\n\rSameSemantics\x12\x34\n\x0btarget_plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\ntargetPlan\x12\x32\n\nother_plan\x18\x02 \x01(\x0b\x32\x13.spark.connect.PlanR\totherPlan\x1a\x37\n\x0cSemanticHash\x12\'\n\x04plan\x18\x01 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x1a\x97\x01\n\x07Persist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level\x1an\n\tUnpersist\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x1f\n\x08\x62locking\x18\x02 \x01(\x08H\x00R\x08\x62locking\x88\x01\x01\x42\x0b\n\t_blocking\x1a\x46\n\x0fGetStorageLevel\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relationB\t\n\x07\x61nalyzeB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xce\r\n\x13\x41nalyzePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12\x43\n\x06schema\x18\x02 \x01(\x0b\x32).spark.connect.AnalyzePlanResponse.SchemaH\x00R\x06schema\x12\x46\n\x07\x65xplain\x18\x03 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.ExplainH\x00R\x07\x65xplain\x12P\n\x0btree_string\x18\x04 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.TreeStringH\x00R\ntreeString\x12G\n\x08is_local\x18\x05 \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.IsLocalH\x00R\x07isLocal\x12S\n\x0cis_streaming\x18\x06 \x01(\x0b\x32..spark.connect.AnalyzePlanResponse.IsStreamingH\x00R\x0bisStreaming\x12P\n\x0binput_files\x18\x07 \x01(\x0b\x32-.spark.connect.AnalyzePlanResponse.InputFilesH\x00R\ninputFiles\x12V\n\rspark_version\x18\x08 \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SparkVersionH\x00R\x0csparkVersion\x12J\n\tddl_parse\x18\t \x01(\x0b\x32+.spark.connect.AnalyzePlanResponse.DDLParseH\x00R\x08\x64\x64lParse\x12Y\n\x0esame_semantics\x18\n \x01(\x0b\x32\x30.spark.connect.AnalyzePlanResponse.SameSemanticsH\x00R\rsameSemantics\x12V\n\rsemantic_hash\x18\x0b \x01(\x0b\x32/.spark.connect.AnalyzePlanResponse.SemanticHashH\x00R\x0csemanticHash\x12\x46\n\x07persist\x18\x0c \x01(\x0b\x32*.spark.connect.AnalyzePlanResponse.PersistH\x00R\x07persist\x12L\n\tunpersist\x18\r \x01(\x0b\x32,.spark.connect.AnalyzePlanResponse.UnpersistH\x00R\tunpersist\x12`\n\x11get_storage_level\x18\x0e \x01(\x0b\x32\x32.spark.connect.AnalyzePlanResponse.GetStorageLevelH\x00R\x0fgetStorageLevel\x1a\x39\n\x06Schema\x12/\n\x06schema\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1a\x30\n\x07\x45xplain\x12%\n\x0e\x65xplain_string\x18\x01 \x01(\tR\rexplainString\x1a-\n\nTreeString\x12\x1f\n\x0btree_string\x18\x01 \x01(\tR\ntreeString\x1a$\n\x07IsLocal\x12\x19\n\x08is_local\x18\x01 \x01(\x08R\x07isLocal\x1a\x30\n\x0bIsStreaming\x12!\n\x0cis_streaming\x18\x01 \x01(\x08R\x0bisStreaming\x1a"\n\nInputFiles\x12\x14\n\x05\x66iles\x18\x01 \x03(\tR\x05\x66iles\x1a(\n\x0cSparkVersion\x12\x18\n\x07version\x18\x01 \x01(\tR\x07version\x1a;\n\x08\x44\x44LParse\x12/\n\x06parsed\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06parsed\x1a\'\n\rSameSemantics\x12\x16\n\x06result\x18\x01 \x01(\x08R\x06result\x1a&\n\x0cSemanticHash\x12\x16\n\x06result\x18\x01 \x01(\x05R\x06result\x1a\t\n\x07Persist\x1a\x0b\n\tUnpersist\x1aS\n\x0fGetStorageLevel\x12@\n\rstorage_level\x18\x01 \x01(\x0b\x32\x1b.spark.connect.StorageLevelR\x0cstorageLevelB\x08\n\x06result"\xa3\x05\n\x12\x45xecutePlanRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12&\n\x0coperation_id\x18\x06 \x01(\tH\x01R\x0boperationId\x88\x01\x01\x12\'\n\x04plan\x18\x03 \x01(\x0b\x32\x13.spark.connect.PlanR\x04plan\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12X\n\x0frequest_options\x18\x05 \x03(\x0b\x32/.spark.connect.ExecutePlanRequest.RequestOptionR\x0erequestOptions\x12\x12\n\x04tags\x18\x07 \x03(\tR\x04tags\x1a\xa5\x01\n\rRequestOption\x12K\n\x10reattach_options\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ReattachOptionsH\x00R\x0freattachOptions\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x10\n\x0erequest_optionB)\n\'_client_observed_server_side_session_idB\x0f\n\r_operation_idB\x0e\n\x0c_client_type"\xe6\x16\n\x13\x45xecutePlanResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x0f \x01(\tR\x13serverSideSessionId\x12!\n\x0coperation_id\x18\x0c \x01(\tR\x0boperationId\x12\x1f\n\x0bresponse_id\x18\r \x01(\tR\nresponseId\x12P\n\x0b\x61rrow_batch\x18\x02 \x01(\x0b\x32-.spark.connect.ExecutePlanResponse.ArrowBatchH\x00R\narrowBatch\x12\x63\n\x12sql_command_result\x18\x05 \x01(\x0b\x32\x33.spark.connect.ExecutePlanResponse.SqlCommandResultH\x00R\x10sqlCommandResult\x12~\n#write_stream_operation_start_result\x18\x08 \x01(\x0b\x32..spark.connect.WriteStreamOperationStartResultH\x00R\x1fwriteStreamOperationStartResult\x12q\n\x1estreaming_query_command_result\x18\t \x01(\x0b\x32*.spark.connect.StreamingQueryCommandResultH\x00R\x1bstreamingQueryCommandResult\x12k\n\x1cget_resources_command_result\x18\n \x01(\x0b\x32(.spark.connect.GetResourcesCommandResultH\x00R\x19getResourcesCommandResult\x12\x87\x01\n&streaming_query_manager_command_result\x18\x0b \x01(\x0b\x32\x31.spark.connect.StreamingQueryManagerCommandResultH\x00R"streamingQueryManagerCommandResult\x12\x87\x01\n&streaming_query_listener_events_result\x18\x10 \x01(\x0b\x32\x31.spark.connect.StreamingQueryListenerEventsResultH\x00R"streamingQueryListenerEventsResult\x12\\\n\x0fresult_complete\x18\x0e \x01(\x0b\x32\x31.spark.connect.ExecutePlanResponse.ResultCompleteH\x00R\x0eresultComplete\x12\x87\x01\n&create_resource_profile_command_result\x18\x11 \x01(\x0b\x32\x31.spark.connect.CreateResourceProfileCommandResultH\x00R"createResourceProfileCommandResult\x12\x65\n\x12\x65xecution_progress\x18\x12 \x01(\x0b\x32\x34.spark.connect.ExecutePlanResponse.ExecutionProgressH\x00R\x11\x65xecutionProgress\x12\x64\n\x19\x63heckpoint_command_result\x18\x13 \x01(\x0b\x32&.spark.connect.CheckpointCommandResultH\x00R\x17\x63heckpointCommandResult\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x44\n\x07metrics\x18\x04 \x01(\x0b\x32*.spark.connect.ExecutePlanResponse.MetricsR\x07metrics\x12]\n\x10observed_metrics\x18\x06 \x03(\x0b\x32\x32.spark.connect.ExecutePlanResponse.ObservedMetricsR\x0fobservedMetrics\x12/\n\x06schema\x18\x07 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema\x1aG\n\x10SqlCommandResult\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x1av\n\nArrowBatch\x12\x1b\n\trow_count\x18\x01 \x01(\x03R\x08rowCount\x12\x12\n\x04\x64\x61ta\x18\x02 \x01(\x0cR\x04\x64\x61ta\x12&\n\x0cstart_offset\x18\x03 \x01(\x03H\x00R\x0bstartOffset\x88\x01\x01\x42\x0f\n\r_start_offset\x1a\x85\x04\n\x07Metrics\x12Q\n\x07metrics\x18\x01 \x03(\x0b\x32\x37.spark.connect.ExecutePlanResponse.Metrics.MetricObjectR\x07metrics\x1a\xcc\x02\n\x0cMetricObject\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x17\n\x07plan_id\x18\x02 \x01(\x03R\x06planId\x12\x16\n\x06parent\x18\x03 \x01(\x03R\x06parent\x12z\n\x11\x65xecution_metrics\x18\x04 \x03(\x0b\x32M.spark.connect.ExecutePlanResponse.Metrics.MetricObject.ExecutionMetricsEntryR\x10\x65xecutionMetrics\x1a{\n\x15\x45xecutionMetricsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ExecutePlanResponse.Metrics.MetricValueR\x05value:\x02\x38\x01\x1aX\n\x0bMetricValue\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x14\n\x05value\x18\x02 \x01(\x03R\x05value\x12\x1f\n\x0bmetric_type\x18\x03 \x01(\tR\nmetricType\x1a\x8d\x01\n\x0fObservedMetrics\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x12\x12\n\x04keys\x18\x03 \x03(\tR\x04keys\x12\x17\n\x07plan_id\x18\x04 \x01(\x03R\x06planId\x1a\x10\n\x0eResultComplete\x1a\xcd\x02\n\x11\x45xecutionProgress\x12V\n\x06stages\x18\x01 \x03(\x0b\x32>.spark.connect.ExecutePlanResponse.ExecutionProgress.StageInfoR\x06stages\x12,\n\x12num_inflight_tasks\x18\x02 \x01(\x03R\x10numInflightTasks\x1a\xb1\x01\n\tStageInfo\x12\x19\n\x08stage_id\x18\x01 \x01(\x03R\x07stageId\x12\x1b\n\tnum_tasks\x18\x02 \x01(\x03R\x08numTasks\x12.\n\x13num_completed_tasks\x18\x03 \x01(\x03R\x11numCompletedTasks\x12(\n\x10input_bytes_read\x18\x04 \x01(\x03R\x0einputBytesRead\x12\x12\n\x04\x64one\x18\x05 \x01(\x08R\x04\x64oneB\x0f\n\rresponse_type"A\n\x08KeyValue\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x19\n\x05value\x18\x02 \x01(\tH\x00R\x05value\x88\x01\x01\x42\x08\n\x06_value"\x87\t\n\rConfigRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x08 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x44\n\toperation\x18\x03 \x01(\x0b\x32&.spark.connect.ConfigRequest.OperationR\toperation\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x1a\xf2\x03\n\tOperation\x12\x34\n\x03set\x18\x01 \x01(\x0b\x32 .spark.connect.ConfigRequest.SetH\x00R\x03set\x12\x34\n\x03get\x18\x02 \x01(\x0b\x32 .spark.connect.ConfigRequest.GetH\x00R\x03get\x12W\n\x10get_with_default\x18\x03 \x01(\x0b\x32+.spark.connect.ConfigRequest.GetWithDefaultH\x00R\x0egetWithDefault\x12G\n\nget_option\x18\x04 \x01(\x0b\x32&.spark.connect.ConfigRequest.GetOptionH\x00R\tgetOption\x12>\n\x07get_all\x18\x05 \x01(\x0b\x32#.spark.connect.ConfigRequest.GetAllH\x00R\x06getAll\x12:\n\x05unset\x18\x06 \x01(\x0b\x32".spark.connect.ConfigRequest.UnsetH\x00R\x05unset\x12P\n\ris_modifiable\x18\x07 \x01(\x0b\x32).spark.connect.ConfigRequest.IsModifiableH\x00R\x0cisModifiableB\t\n\x07op_type\x1a\x34\n\x03Set\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x19\n\x03Get\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a?\n\x0eGetWithDefault\x12-\n\x05pairs\x18\x01 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x1a\x1f\n\tGetOption\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a\x30\n\x06GetAll\x12\x1b\n\x06prefix\x18\x01 \x01(\tH\x00R\x06prefix\x88\x01\x01\x42\t\n\x07_prefix\x1a\x1b\n\x05Unset\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keys\x1a"\n\x0cIsModifiable\x12\x12\n\x04keys\x18\x01 \x03(\tR\x04keysB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xaf\x01\n\x0e\x43onfigResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x04 \x01(\tR\x13serverSideSessionId\x12-\n\x05pairs\x18\x02 \x03(\x0b\x32\x17.spark.connect.KeyValueR\x05pairs\x12\x1a\n\x08warnings\x18\x03 \x03(\tR\x08warnings"\xea\x07\n\x13\x41\x64\x64\x41rtifactsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12$\n\x0b\x63lient_type\x18\x06 \x01(\tH\x02R\nclientType\x88\x01\x01\x12@\n\x05\x62\x61tch\x18\x03 \x01(\x0b\x32(.spark.connect.AddArtifactsRequest.BatchH\x00R\x05\x62\x61tch\x12Z\n\x0b\x62\x65gin_chunk\x18\x04 \x01(\x0b\x32\x37.spark.connect.AddArtifactsRequest.BeginChunkedArtifactH\x00R\nbeginChunk\x12H\n\x05\x63hunk\x18\x05 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkH\x00R\x05\x63hunk\x1a\x35\n\rArtifactChunk\x12\x12\n\x04\x64\x61ta\x18\x01 \x01(\x0cR\x04\x64\x61ta\x12\x10\n\x03\x63rc\x18\x02 \x01(\x03R\x03\x63rc\x1ao\n\x13SingleChunkArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x44\n\x04\x64\x61ta\x18\x02 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x04\x64\x61ta\x1a]\n\x05\x42\x61tch\x12T\n\tartifacts\x18\x01 \x03(\x0b\x32\x36.spark.connect.AddArtifactsRequest.SingleChunkArtifactR\tartifacts\x1a\xc1\x01\n\x14\x42\x65ginChunkedArtifact\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1f\n\x0btotal_bytes\x18\x02 \x01(\x03R\ntotalBytes\x12\x1d\n\nnum_chunks\x18\x03 \x01(\x03R\tnumChunks\x12U\n\rinitial_chunk\x18\x04 \x01(\x0b\x32\x30.spark.connect.AddArtifactsRequest.ArtifactChunkR\x0cinitialChunkB\t\n\x07payloadB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x02\n\x14\x41\x64\x64\x41rtifactsResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\tartifacts\x18\x01 \x03(\x0b\x32\x33.spark.connect.AddArtifactsResponse.ArtifactSummaryR\tartifacts\x1aQ\n\x0f\x41rtifactSummary\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12*\n\x11is_crc_successful\x18\x02 \x01(\x08R\x0fisCrcSuccessful"\xc6\x02\n\x17\x41rtifactStatusesRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x01R\nclientType\x88\x01\x01\x12\x14\n\x05names\x18\x04 \x03(\tR\x05namesB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xe0\x02\n\x18\x41rtifactStatusesResponse\x12\x1d\n\nsession_id\x18\x02 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12Q\n\x08statuses\x18\x01 \x03(\x0b\x32\x35.spark.connect.ArtifactStatusesResponse.StatusesEntryR\x08statuses\x1as\n\rStatusesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12L\n\x05value\x18\x02 \x01(\x0b\x32\x36.spark.connect.ArtifactStatusesResponse.ArtifactStatusR\x05value:\x02\x38\x01\x1a(\n\x0e\x41rtifactStatus\x12\x16\n\x06\x65xists\x18\x01 \x01(\x08R\x06\x65xists"\xdb\x04\n\x10InterruptRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x02R\nclientType\x88\x01\x01\x12T\n\x0einterrupt_type\x18\x04 \x01(\x0e\x32-.spark.connect.InterruptRequest.InterruptTypeR\rinterruptType\x12%\n\roperation_tag\x18\x05 \x01(\tH\x00R\x0coperationTag\x12#\n\x0coperation_id\x18\x06 \x01(\tH\x00R\x0boperationId"\x80\x01\n\rInterruptType\x12\x1e\n\x1aINTERRUPT_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12INTERRUPT_TYPE_ALL\x10\x01\x12\x16\n\x12INTERRUPT_TYPE_TAG\x10\x02\x12\x1f\n\x1bINTERRUPT_TYPE_OPERATION_ID\x10\x03\x42\x0b\n\tinterruptB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x90\x01\n\x11InterruptResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\'\n\x0finterrupted_ids\x18\x02 \x03(\tR\x0einterruptedIds"5\n\x0fReattachOptions\x12"\n\x0creattachable\x18\x01 \x01(\x08R\x0creattachable"\x96\x03\n\x16ReattachExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x06 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x12-\n\x10last_response_id\x18\x05 \x01(\tH\x02R\x0elastResponseId\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_typeB\x13\n\x11_last_response_id"\xc9\x04\n\x15ReleaseExecuteRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x07 \x01(\tH\x01R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12!\n\x0coperation_id\x18\x03 \x01(\tR\x0boperationId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x02R\nclientType\x88\x01\x01\x12R\n\x0brelease_all\x18\x05 \x01(\x0b\x32/.spark.connect.ReleaseExecuteRequest.ReleaseAllH\x00R\nreleaseAll\x12X\n\rrelease_until\x18\x06 \x01(\x0b\x32\x31.spark.connect.ReleaseExecuteRequest.ReleaseUntilH\x00R\x0creleaseUntil\x1a\x0c\n\nReleaseAll\x1a/\n\x0cReleaseUntil\x12\x1f\n\x0bresponse_id\x18\x01 \x01(\tR\nresponseIdB\t\n\x07releaseB)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\xa5\x01\n\x16ReleaseExecuteResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12&\n\x0coperation_id\x18\x02 \x01(\tH\x00R\x0boperationId\x88\x01\x01\x42\x0f\n\r_operation_id"\xab\x01\n\x15ReleaseSessionRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12$\n\x0b\x63lient_type\x18\x03 \x01(\tH\x00R\nclientType\x88\x01\x01\x42\x0e\n\x0c_client_type"l\n\x16ReleaseSessionResponse\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12\x33\n\x16server_side_session_id\x18\x02 \x01(\tR\x13serverSideSessionId"\xcc\x02\n\x18\x46\x65tchErrorDetailsRequest\x12\x1d\n\nsession_id\x18\x01 \x01(\tR\tsessionId\x12V\n&client_observed_server_side_session_id\x18\x05 \x01(\tH\x00R!clientObservedServerSideSessionId\x88\x01\x01\x12=\n\x0cuser_context\x18\x02 \x01(\x0b\x32\x1a.spark.connect.UserContextR\x0buserContext\x12\x19\n\x08\x65rror_id\x18\x03 \x01(\tR\x07\x65rrorId\x12$\n\x0b\x63lient_type\x18\x04 \x01(\tH\x01R\nclientType\x88\x01\x01\x42)\n\'_client_observed_server_side_session_idB\x0e\n\x0c_client_type"\x93\x0c\n\x19\x46\x65tchErrorDetailsResponse\x12\x33\n\x16server_side_session_id\x18\x03 \x01(\tR\x13serverSideSessionId\x12\x1d\n\nsession_id\x18\x04 \x01(\tR\tsessionId\x12)\n\x0eroot_error_idx\x18\x01 \x01(\x05H\x00R\x0crootErrorIdx\x88\x01\x01\x12\x46\n\x06\x65rrors\x18\x02 \x03(\x0b\x32..spark.connect.FetchErrorDetailsResponse.ErrorR\x06\x65rrors\x1a\xae\x01\n\x11StackTraceElement\x12\'\n\x0f\x64\x65\x63laring_class\x18\x01 \x01(\tR\x0e\x64\x65\x63laringClass\x12\x1f\n\x0bmethod_name\x18\x02 \x01(\tR\nmethodName\x12 \n\tfile_name\x18\x03 \x01(\tH\x00R\x08\x66ileName\x88\x01\x01\x12\x1f\n\x0bline_number\x18\x04 \x01(\x05R\nlineNumberB\x0c\n\n_file_name\x1a\xf0\x02\n\x0cQueryContext\x12\x64\n\x0c\x63ontext_type\x18\n \x01(\x0e\x32\x41.spark.connect.FetchErrorDetailsResponse.QueryContext.ContextTypeR\x0b\x63ontextType\x12\x1f\n\x0bobject_type\x18\x01 \x01(\tR\nobjectType\x12\x1f\n\x0bobject_name\x18\x02 \x01(\tR\nobjectName\x12\x1f\n\x0bstart_index\x18\x03 \x01(\x05R\nstartIndex\x12\x1d\n\nstop_index\x18\x04 \x01(\x05R\tstopIndex\x12\x1a\n\x08\x66ragment\x18\x05 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x06 \x01(\tR\x08\x63\x61llSite\x12\x18\n\x07summary\x18\x07 \x01(\tR\x07summary"%\n\x0b\x43ontextType\x12\x07\n\x03SQL\x10\x00\x12\r\n\tDATAFRAME\x10\x01\x1a\x99\x03\n\x0eSparkThrowable\x12$\n\x0b\x65rror_class\x18\x01 \x01(\tH\x00R\nerrorClass\x88\x01\x01\x12}\n\x12message_parameters\x18\x02 \x03(\x0b\x32N.spark.connect.FetchErrorDetailsResponse.SparkThrowable.MessageParametersEntryR\x11messageParameters\x12\\\n\x0equery_contexts\x18\x03 \x03(\x0b\x32\x35.spark.connect.FetchErrorDetailsResponse.QueryContextR\rqueryContexts\x12 \n\tsql_state\x18\x04 \x01(\tH\x01R\x08sqlState\x88\x01\x01\x1a\x44\n\x16MessageParametersEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x0e\n\x0c_error_classB\x0c\n\n_sql_state\x1a\xdb\x02\n\x05\x45rror\x12\x30\n\x14\x65rror_type_hierarchy\x18\x01 \x03(\tR\x12\x65rrorTypeHierarchy\x12\x18\n\x07message\x18\x02 \x01(\tR\x07message\x12[\n\x0bstack_trace\x18\x03 \x03(\x0b\x32:.spark.connect.FetchErrorDetailsResponse.StackTraceElementR\nstackTrace\x12 \n\tcause_idx\x18\x04 \x01(\x05H\x00R\x08\x63\x61useIdx\x88\x01\x01\x12\x65\n\x0fspark_throwable\x18\x05 \x01(\x0b\x32\x37.spark.connect.FetchErrorDetailsResponse.SparkThrowableH\x01R\x0esparkThrowable\x88\x01\x01\x42\x0c\n\n_cause_idxB\x12\n\x10_spark_throwableB\x11\n\x0f_root_error_idx"Z\n\x17\x43heckpointCommandResult\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation2\xb2\x07\n\x13SparkConnectService\x12X\n\x0b\x45xecutePlan\x12!.spark.connect.ExecutePlanRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12V\n\x0b\x41nalyzePlan\x12!.spark.connect.AnalyzePlanRequest\x1a".spark.connect.AnalyzePlanResponse"\x00\x12G\n\x06\x43onfig\x12\x1c.spark.connect.ConfigRequest\x1a\x1d.spark.connect.ConfigResponse"\x00\x12[\n\x0c\x41\x64\x64\x41rtifacts\x12".spark.connect.AddArtifactsRequest\x1a#.spark.connect.AddArtifactsResponse"\x00(\x01\x12\x63\n\x0e\x41rtifactStatus\x12&.spark.connect.ArtifactStatusesRequest\x1a\'.spark.connect.ArtifactStatusesResponse"\x00\x12P\n\tInterrupt\x12\x1f.spark.connect.InterruptRequest\x1a .spark.connect.InterruptResponse"\x00\x12`\n\x0fReattachExecute\x12%.spark.connect.ReattachExecuteRequest\x1a".spark.connect.ExecutePlanResponse"\x00\x30\x01\x12_\n\x0eReleaseExecute\x12$.spark.connect.ReleaseExecuteRequest\x1a%.spark.connect.ReleaseExecuteResponse"\x00\x12_\n\x0eReleaseSession\x12$.spark.connect.ReleaseSessionRequest\x1a%.spark.connect.ReleaseSessionResponse"\x00\x12h\n\x11\x46\x65tchErrorDetails\x12\'.spark.connect.FetchErrorDetailsRequest\x1a(.spark.connect.FetchErrorDetailsResponse"\x00\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
-_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "pyspark.sql.connect.proto.base_pb2", globals())
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._options = None
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_options = b"8\001"
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._options = None
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_options = b"8\001"
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._options = None
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_options = b"8\001"
-    _PLAN._serialized_start = 219
-    _PLAN._serialized_end = 335
-    _USERCONTEXT._serialized_start = 337
-    _USERCONTEXT._serialized_end = 459
-    _ANALYZEPLANREQUEST._serialized_start = 462
-    _ANALYZEPLANREQUEST._serialized_end = 3014
-    _ANALYZEPLANREQUEST_SCHEMA._serialized_start = 1745
-    _ANALYZEPLANREQUEST_SCHEMA._serialized_end = 1794
-    _ANALYZEPLANREQUEST_EXPLAIN._serialized_start = 1797
-    _ANALYZEPLANREQUEST_EXPLAIN._serialized_end = 2112
-    _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_start = 1940
-    _ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE._serialized_end = 2112
-    _ANALYZEPLANREQUEST_TREESTRING._serialized_start = 2114
-    _ANALYZEPLANREQUEST_TREESTRING._serialized_end = 2204
-    _ANALYZEPLANREQUEST_ISLOCAL._serialized_start = 2206
-    _ANALYZEPLANREQUEST_ISLOCAL._serialized_end = 2256
-    _ANALYZEPLANREQUEST_ISSTREAMING._serialized_start = 2258
-    _ANALYZEPLANREQUEST_ISSTREAMING._serialized_end = 2312
-    _ANALYZEPLANREQUEST_INPUTFILES._serialized_start = 2314
-    _ANALYZEPLANREQUEST_INPUTFILES._serialized_end = 2367
-    _ANALYZEPLANREQUEST_SPARKVERSION._serialized_start = 2369
-    _ANALYZEPLANREQUEST_SPARKVERSION._serialized_end = 2383
-    _ANALYZEPLANREQUEST_DDLPARSE._serialized_start = 2385
-    _ANALYZEPLANREQUEST_DDLPARSE._serialized_end = 2426
-    _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_start = 2428
-    _ANALYZEPLANREQUEST_SAMESEMANTICS._serialized_end = 2549
-    _ANALYZEPLANREQUEST_SEMANTICHASH._serialized_start = 2551
-    _ANALYZEPLANREQUEST_SEMANTICHASH._serialized_end = 2606
-    _ANALYZEPLANREQUEST_PERSIST._serialized_start = 2609
-    _ANALYZEPLANREQUEST_PERSIST._serialized_end = 2760
-    _ANALYZEPLANREQUEST_UNPERSIST._serialized_start = 2762
-    _ANALYZEPLANREQUEST_UNPERSIST._serialized_end = 2872
-    _ANALYZEPLANREQUEST_GETSTORAGELEVEL._serialized_start = 2874
-    _ANALYZEPLANREQUEST_GETSTORAGELEVEL._serialized_end = 2944
-    _ANALYZEPLANRESPONSE._serialized_start = 3017
-    _ANALYZEPLANRESPONSE._serialized_end = 4759
-    _ANALYZEPLANRESPONSE_SCHEMA._serialized_start = 4178
-    _ANALYZEPLANRESPONSE_SCHEMA._serialized_end = 4235
-    _ANALYZEPLANRESPONSE_EXPLAIN._serialized_start = 4237
-    _ANALYZEPLANRESPONSE_EXPLAIN._serialized_end = 4285
-    _ANALYZEPLANRESPONSE_TREESTRING._serialized_start = 4287
-    _ANALYZEPLANRESPONSE_TREESTRING._serialized_end = 4332
-    _ANALYZEPLANRESPONSE_ISLOCAL._serialized_start = 4334
-    _ANALYZEPLANRESPONSE_ISLOCAL._serialized_end = 4370
-    _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_start = 4372
-    _ANALYZEPLANRESPONSE_ISSTREAMING._serialized_end = 4420
-    _ANALYZEPLANRESPONSE_INPUTFILES._serialized_start = 4422
-    _ANALYZEPLANRESPONSE_INPUTFILES._serialized_end = 4456
-    _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_start = 4458
-    _ANALYZEPLANRESPONSE_SPARKVERSION._serialized_end = 4498
-    _ANALYZEPLANRESPONSE_DDLPARSE._serialized_start = 4500
-    _ANALYZEPLANRESPONSE_DDLPARSE._serialized_end = 4559
-    _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_start = 4561
-    _ANALYZEPLANRESPONSE_SAMESEMANTICS._serialized_end = 4600
-    _ANALYZEPLANRESPONSE_SEMANTICHASH._serialized_start = 4602
-    _ANALYZEPLANRESPONSE_SEMANTICHASH._serialized_end = 4640
-    _ANALYZEPLANRESPONSE_PERSIST._serialized_start = 2609
-    _ANALYZEPLANRESPONSE_PERSIST._serialized_end = 2618
-    _ANALYZEPLANRESPONSE_UNPERSIST._serialized_start = 2762
-    _ANALYZEPLANRESPONSE_UNPERSIST._serialized_end = 2773
-    _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_start = 4666
-    _ANALYZEPLANRESPONSE_GETSTORAGELEVEL._serialized_end = 4749
-    _EXECUTEPLANREQUEST._serialized_start = 4762
-    _EXECUTEPLANREQUEST._serialized_end = 5437
-    _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_start = 5196
-    _EXECUTEPLANREQUEST_REQUESTOPTION._serialized_end = 5361
-    _EXECUTEPLANRESPONSE._serialized_start = 5440
-    _EXECUTEPLANRESPONSE._serialized_end = 8358
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_start = 7132
-    _EXECUTEPLANRESPONSE_SQLCOMMANDRESULT._serialized_end = 7203
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_start = 7205
-    _EXECUTEPLANRESPONSE_ARROWBATCH._serialized_end = 7323
-    _EXECUTEPLANRESPONSE_METRICS._serialized_start = 7326
-    _EXECUTEPLANRESPONSE_METRICS._serialized_end = 7843
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_start = 7421
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT._serialized_end = 7753
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_start = 7630
-    _EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY._serialized_end = 7753
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_start = 7755
-    _EXECUTEPLANRESPONSE_METRICS_METRICVALUE._serialized_end = 7843
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_start = 7846
-    _EXECUTEPLANRESPONSE_OBSERVEDMETRICS._serialized_end = 7987
-    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_start = 7989
-    _EXECUTEPLANRESPONSE_RESULTCOMPLETE._serialized_end = 8005
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_start = 8008
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS._serialized_end = 8341
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_start = 8164
-    _EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO._serialized_end = 8341
-    _KEYVALUE._serialized_start = 8360
-    _KEYVALUE._serialized_end = 8425
-    _CONFIGREQUEST._serialized_start = 8428
-    _CONFIGREQUEST._serialized_end = 9587
-    _CONFIGREQUEST_OPERATION._serialized_start = 8736
-    _CONFIGREQUEST_OPERATION._serialized_end = 9234
-    _CONFIGREQUEST_SET._serialized_start = 9236
-    _CONFIGREQUEST_SET._serialized_end = 9288
-    _CONFIGREQUEST_GET._serialized_start = 9290
-    _CONFIGREQUEST_GET._serialized_end = 9315
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_start = 9317
-    _CONFIGREQUEST_GETWITHDEFAULT._serialized_end = 9380
-    _CONFIGREQUEST_GETOPTION._serialized_start = 9382
-    _CONFIGREQUEST_GETOPTION._serialized_end = 9413
-    _CONFIGREQUEST_GETALL._serialized_start = 9415
-    _CONFIGREQUEST_GETALL._serialized_end = 9463
-    _CONFIGREQUEST_UNSET._serialized_start = 9465
-    _CONFIGREQUEST_UNSET._serialized_end = 9492
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_start = 9494
-    _CONFIGREQUEST_ISMODIFIABLE._serialized_end = 9528
-    _CONFIGRESPONSE._serialized_start = 9590
-    _CONFIGRESPONSE._serialized_end = 9765
-    _ADDARTIFACTSREQUEST._serialized_start = 9768
-    _ADDARTIFACTSREQUEST._serialized_end = 10770
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_start = 10243
-    _ADDARTIFACTSREQUEST_ARTIFACTCHUNK._serialized_end = 10296
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_start = 10298
-    _ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT._serialized_end = 10409
-    _ADDARTIFACTSREQUEST_BATCH._serialized_start = 10411
-    _ADDARTIFACTSREQUEST_BATCH._serialized_end = 10504
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_start = 10507
-    _ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT._serialized_end = 10700
-    _ADDARTIFACTSRESPONSE._serialized_start = 10773
-    _ADDARTIFACTSRESPONSE._serialized_end = 11045
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_start = 10964
-    _ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY._serialized_end = 11045
-    _ARTIFACTSTATUSESREQUEST._serialized_start = 11048
-    _ARTIFACTSTATUSESREQUEST._serialized_end = 11374
-    _ARTIFACTSTATUSESRESPONSE._serialized_start = 11377
-    _ARTIFACTSTATUSESRESPONSE._serialized_end = 11729
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_start = 11572
-    _ARTIFACTSTATUSESRESPONSE_STATUSESENTRY._serialized_end = 11687
-    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_start = 11689
-    _ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS._serialized_end = 11729
-    _INTERRUPTREQUEST._serialized_start = 11732
-    _INTERRUPTREQUEST._serialized_end = 12335
-    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_start = 12135
-    _INTERRUPTREQUEST_INTERRUPTTYPE._serialized_end = 12263
-    _INTERRUPTRESPONSE._serialized_start = 12338
-    _INTERRUPTRESPONSE._serialized_end = 12482
-    _REATTACHOPTIONS._serialized_start = 12484
-    _REATTACHOPTIONS._serialized_end = 12537
-    _REATTACHEXECUTEREQUEST._serialized_start = 12540
-    _REATTACHEXECUTEREQUEST._serialized_end = 12946
-    _RELEASEEXECUTEREQUEST._serialized_start = 12949
-    _RELEASEEXECUTEREQUEST._serialized_end = 13534
-    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_start = 13403
-    _RELEASEEXECUTEREQUEST_RELEASEALL._serialized_end = 13415
-    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_start = 13417
-    _RELEASEEXECUTEREQUEST_RELEASEUNTIL._serialized_end = 13464
-    _RELEASEEXECUTERESPONSE._serialized_start = 13537
-    _RELEASEEXECUTERESPONSE._serialized_end = 13702
-    _RELEASESESSIONREQUEST._serialized_start = 13705
-    _RELEASESESSIONREQUEST._serialized_end = 13876
-    _RELEASESESSIONRESPONSE._serialized_start = 13878
-    _RELEASESESSIONRESPONSE._serialized_end = 13986
-    _FETCHERRORDETAILSREQUEST._serialized_start = 13989
-    _FETCHERRORDETAILSREQUEST._serialized_end = 14321
-    _FETCHERRORDETAILSRESPONSE._serialized_start = 14324
-    _FETCHERRORDETAILSRESPONSE._serialized_end = 15879
-    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_start = 14553
-    _FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT._serialized_end = 14727
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_start = 14730
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT._serialized_end = 15098
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_start = 15061
-    _FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE._serialized_end = 15098
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_start = 15101
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE._serialized_end = 15510
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_start = 15412
-    _FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY._serialized_end = 15480
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_start = 15513
-    _FETCHERRORDETAILSRESPONSE_ERROR._serialized_end = 15860
-    _CHECKPOINTCOMMANDRESULT._serialized_start = 15881
-    _CHECKPOINTCOMMANDRESULT._serialized_end = 15971
-    _SPARKCONNECTSERVICE._serialized_start = 15974
-    _SPARKCONNECTSERVICE._serialized_end = 16920
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "pyspark.sql.connect.proto.base_pb2", _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals[
+        "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
+    ]._loaded_options = None
+    _globals[
+        "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
+    ]._serialized_options = b"8\001"
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._loaded_options = None
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_options = b"8\001"
+    _globals[
+        "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
+    ]._loaded_options = None
+    _globals[
+        "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
+    ]._serialized_options = b"8\001"
+    _globals["_PLAN"]._serialized_start = 219
+    _globals["_PLAN"]._serialized_end = 335
+    _globals["_USERCONTEXT"]._serialized_start = 337
+    _globals["_USERCONTEXT"]._serialized_end = 459
+    _globals["_ANALYZEPLANREQUEST"]._serialized_start = 462
+    _globals["_ANALYZEPLANREQUEST"]._serialized_end = 3014
+    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_start = 1745
+    _globals["_ANALYZEPLANREQUEST_SCHEMA"]._serialized_end = 1794
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_start = 1797
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN"]._serialized_end = 2112
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_start = 1940
+    _globals["_ANALYZEPLANREQUEST_EXPLAIN_EXPLAINMODE"]._serialized_end = 2112
+    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_start = 2114
+    _globals["_ANALYZEPLANREQUEST_TREESTRING"]._serialized_end = 2204
+    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_start = 2206
+    _globals["_ANALYZEPLANREQUEST_ISLOCAL"]._serialized_end = 2256
+    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_start = 2258
+    _globals["_ANALYZEPLANREQUEST_ISSTREAMING"]._serialized_end = 2312
+    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_start = 2314
+    _globals["_ANALYZEPLANREQUEST_INPUTFILES"]._serialized_end = 2367
+    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_start = 2369
+    _globals["_ANALYZEPLANREQUEST_SPARKVERSION"]._serialized_end = 2383
+    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_start = 2385
+    _globals["_ANALYZEPLANREQUEST_DDLPARSE"]._serialized_end = 2426
+    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_start = 2428
+    _globals["_ANALYZEPLANREQUEST_SAMESEMANTICS"]._serialized_end = 2549
+    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_start = 2551
+    _globals["_ANALYZEPLANREQUEST_SEMANTICHASH"]._serialized_end = 2606
+    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_start = 2609
+    _globals["_ANALYZEPLANREQUEST_PERSIST"]._serialized_end = 2760
+    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_start = 2762
+    _globals["_ANALYZEPLANREQUEST_UNPERSIST"]._serialized_end = 2872
+    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_start = 2874
+    _globals["_ANALYZEPLANREQUEST_GETSTORAGELEVEL"]._serialized_end = 2944
+    _globals["_ANALYZEPLANRESPONSE"]._serialized_start = 3017
+    _globals["_ANALYZEPLANRESPONSE"]._serialized_end = 4759
+    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_start = 4178
+    _globals["_ANALYZEPLANRESPONSE_SCHEMA"]._serialized_end = 4235
+    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_start = 4237
+    _globals["_ANALYZEPLANRESPONSE_EXPLAIN"]._serialized_end = 4285
+    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_start = 4287
+    _globals["_ANALYZEPLANRESPONSE_TREESTRING"]._serialized_end = 4332
+    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_start = 4334
+    _globals["_ANALYZEPLANRESPONSE_ISLOCAL"]._serialized_end = 4370
+    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_start = 4372
+    _globals["_ANALYZEPLANRESPONSE_ISSTREAMING"]._serialized_end = 4420
+    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_start = 4422
+    _globals["_ANALYZEPLANRESPONSE_INPUTFILES"]._serialized_end = 4456
+    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_start = 4458
+    _globals["_ANALYZEPLANRESPONSE_SPARKVERSION"]._serialized_end = 4498
+    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_start = 4500
+    _globals["_ANALYZEPLANRESPONSE_DDLPARSE"]._serialized_end = 4559
+    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_start = 4561
+    _globals["_ANALYZEPLANRESPONSE_SAMESEMANTICS"]._serialized_end = 4600
+    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_start = 4602
+    _globals["_ANALYZEPLANRESPONSE_SEMANTICHASH"]._serialized_end = 4640
+    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_start = 2609
+    _globals["_ANALYZEPLANRESPONSE_PERSIST"]._serialized_end = 2618
+    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_start = 2762
+    _globals["_ANALYZEPLANRESPONSE_UNPERSIST"]._serialized_end = 2773
+    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_start = 4666
+    _globals["_ANALYZEPLANRESPONSE_GETSTORAGELEVEL"]._serialized_end = 4749
+    _globals["_EXECUTEPLANREQUEST"]._serialized_start = 4762
+    _globals["_EXECUTEPLANREQUEST"]._serialized_end = 5437
+    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_start = 5196
+    _globals["_EXECUTEPLANREQUEST_REQUESTOPTION"]._serialized_end = 5361
+    _globals["_EXECUTEPLANRESPONSE"]._serialized_start = 5440
+    _globals["_EXECUTEPLANRESPONSE"]._serialized_end = 8358
+    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_start = 7132
+    _globals["_EXECUTEPLANRESPONSE_SQLCOMMANDRESULT"]._serialized_end = 7203
+    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_start = 7205
+    _globals["_EXECUTEPLANRESPONSE_ARROWBATCH"]._serialized_end = 7323
+    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_start = 7326
+    _globals["_EXECUTEPLANRESPONSE_METRICS"]._serialized_end = 7843
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_start = 7421
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT"]._serialized_end = 7753
+    _globals[
+        "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
+    ]._serialized_start = 7630
+    _globals[
+        "_EXECUTEPLANRESPONSE_METRICS_METRICOBJECT_EXECUTIONMETRICSENTRY"
+    ]._serialized_end = 7753
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_start = 7755
+    _globals["_EXECUTEPLANRESPONSE_METRICS_METRICVALUE"]._serialized_end = 7843
+    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_start = 7846
+    _globals["_EXECUTEPLANRESPONSE_OBSERVEDMETRICS"]._serialized_end = 7987
+    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_start = 7989
+    _globals["_EXECUTEPLANRESPONSE_RESULTCOMPLETE"]._serialized_end = 8005
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_start = 8008
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS"]._serialized_end = 8341
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_start = 8164
+    _globals["_EXECUTEPLANRESPONSE_EXECUTIONPROGRESS_STAGEINFO"]._serialized_end = 8341
+    _globals["_KEYVALUE"]._serialized_start = 8360
+    _globals["_KEYVALUE"]._serialized_end = 8425
+    _globals["_CONFIGREQUEST"]._serialized_start = 8428
+    _globals["_CONFIGREQUEST"]._serialized_end = 9587
+    _globals["_CONFIGREQUEST_OPERATION"]._serialized_start = 8736
+    _globals["_CONFIGREQUEST_OPERATION"]._serialized_end = 9234
+    _globals["_CONFIGREQUEST_SET"]._serialized_start = 9236
+    _globals["_CONFIGREQUEST_SET"]._serialized_end = 9288
+    _globals["_CONFIGREQUEST_GET"]._serialized_start = 9290
+    _globals["_CONFIGREQUEST_GET"]._serialized_end = 9315
+    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_start = 9317
+    _globals["_CONFIGREQUEST_GETWITHDEFAULT"]._serialized_end = 9380
+    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_start = 9382
+    _globals["_CONFIGREQUEST_GETOPTION"]._serialized_end = 9413
+    _globals["_CONFIGREQUEST_GETALL"]._serialized_start = 9415
+    _globals["_CONFIGREQUEST_GETALL"]._serialized_end = 9463
+    _globals["_CONFIGREQUEST_UNSET"]._serialized_start = 9465
+    _globals["_CONFIGREQUEST_UNSET"]._serialized_end = 9492
+    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_start = 9494
+    _globals["_CONFIGREQUEST_ISMODIFIABLE"]._serialized_end = 9528
+    _globals["_CONFIGRESPONSE"]._serialized_start = 9590
+    _globals["_CONFIGRESPONSE"]._serialized_end = 9765
+    _globals["_ADDARTIFACTSREQUEST"]._serialized_start = 9768
+    _globals["_ADDARTIFACTSREQUEST"]._serialized_end = 10770
+    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_start = 10243
+    _globals["_ADDARTIFACTSREQUEST_ARTIFACTCHUNK"]._serialized_end = 10296
+    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_start = 10298
+    _globals["_ADDARTIFACTSREQUEST_SINGLECHUNKARTIFACT"]._serialized_end = 10409
+    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_start = 10411
+    _globals["_ADDARTIFACTSREQUEST_BATCH"]._serialized_end = 10504
+    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_start = 10507
+    _globals["_ADDARTIFACTSREQUEST_BEGINCHUNKEDARTIFACT"]._serialized_end = 10700
+    _globals["_ADDARTIFACTSRESPONSE"]._serialized_start = 10773
+    _globals["_ADDARTIFACTSRESPONSE"]._serialized_end = 11045
+    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_start = 10964
+    _globals["_ADDARTIFACTSRESPONSE_ARTIFACTSUMMARY"]._serialized_end = 11045
+    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_start = 11048
+    _globals["_ARTIFACTSTATUSESREQUEST"]._serialized_end = 11374
+    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_start = 11377
+    _globals["_ARTIFACTSTATUSESRESPONSE"]._serialized_end = 11729
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_start = 11572
+    _globals["_ARTIFACTSTATUSESRESPONSE_STATUSESENTRY"]._serialized_end = 11687
+    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_start = 11689
+    _globals["_ARTIFACTSTATUSESRESPONSE_ARTIFACTSTATUS"]._serialized_end = 11729
+    _globals["_INTERRUPTREQUEST"]._serialized_start = 11732
+    _globals["_INTERRUPTREQUEST"]._serialized_end = 12335
+    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_start = 12135
+    _globals["_INTERRUPTREQUEST_INTERRUPTTYPE"]._serialized_end = 12263
+    _globals["_INTERRUPTRESPONSE"]._serialized_start = 12338
+    _globals["_INTERRUPTRESPONSE"]._serialized_end = 12482
+    _globals["_REATTACHOPTIONS"]._serialized_start = 12484
+    _globals["_REATTACHOPTIONS"]._serialized_end = 12537
+    _globals["_REATTACHEXECUTEREQUEST"]._serialized_start = 12540
+    _globals["_REATTACHEXECUTEREQUEST"]._serialized_end = 12946
+    _globals["_RELEASEEXECUTEREQUEST"]._serialized_start = 12949
+    _globals["_RELEASEEXECUTEREQUEST"]._serialized_end = 13534
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_start = 13403
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEALL"]._serialized_end = 13415
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_start = 13417
+    _globals["_RELEASEEXECUTEREQUEST_RELEASEUNTIL"]._serialized_end = 13464
+    _globals["_RELEASEEXECUTERESPONSE"]._serialized_start = 13537
+    _globals["_RELEASEEXECUTERESPONSE"]._serialized_end = 13702
+    _globals["_RELEASESESSIONREQUEST"]._serialized_start = 13705
+    _globals["_RELEASESESSIONREQUEST"]._serialized_end = 13876
+    _globals["_RELEASESESSIONRESPONSE"]._serialized_start = 13878
+    _globals["_RELEASESESSIONRESPONSE"]._serialized_end = 13986
+    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_start = 13989
+    _globals["_FETCHERRORDETAILSREQUEST"]._serialized_end = 14321
+    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_start = 14324
+    _globals["_FETCHERRORDETAILSRESPONSE"]._serialized_end = 15879
+    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_start = 14553
+    _globals["_FETCHERRORDETAILSRESPONSE_STACKTRACEELEMENT"]._serialized_end = 14727
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_start = 14730
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT"]._serialized_end = 15098
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_start = 15061
+    _globals["_FETCHERRORDETAILSRESPONSE_QUERYCONTEXT_CONTEXTTYPE"]._serialized_end = 15098
+    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_start = 15101
+    _globals["_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE"]._serialized_end = 15510
+    _globals[
+        "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
+    ]._serialized_start = 15412
+    _globals[
+        "_FETCHERRORDETAILSRESPONSE_SPARKTHROWABLE_MESSAGEPARAMETERSENTRY"
+    ]._serialized_end = 15480
+    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_start = 15513
+    _globals["_FETCHERRORDETAILSRESPONSE_ERROR"]._serialized_end = 15860
+    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_start = 15881
+    _globals["_CHECKPOINTCOMMANDRESULT"]._serialized_end = 15971
+    _globals["_SPARKCONNECTSERVICE"]._serialized_start = 15974
+    _globals["_SPARKCONNECTSERVICE"]._serialized_end = 16920
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/base_pb2.pyi b/python/pyspark/sql/connect/proto/base_pb2.pyi
index 1f9dfbb3294d0..5db25569828b7 100644
--- a/python/pyspark/sql/connect/proto/base_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/base_pb2.pyi
@@ -1910,7 +1910,7 @@ class ConfigRequest(google.protobuf.message.Message):
         def pairs(
             self,
         ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[global___KeyValue]:
-            """(Required) The config key-value paris to get. The value will be used as the default value."""
+            """(Required) The config key-value pairs to get. The value will be used as the default value."""
         def __init__(
             self,
             *,
diff --git a/python/pyspark/sql/connect/proto/base_pb2_grpc.py b/python/pyspark/sql/connect/proto/base_pb2_grpc.py
index 12675747e0f92..7501aaf0a3a23 100644
--- a/python/pyspark/sql/connect/proto/base_pb2_grpc.py
+++ b/python/pyspark/sql/connect/proto/base_pb2_grpc.py
@@ -34,51 +34,61 @@ def __init__(self, channel):
             "/spark.connect.SparkConnectService/ExecutePlan",
             request_serializer=spark_dot_connect_dot_base__pb2.ExecutePlanRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString,
+            _registered_method=True,
         )
         self.AnalyzePlan = channel.unary_unary(
             "/spark.connect.SparkConnectService/AnalyzePlan",
             request_serializer=spark_dot_connect_dot_base__pb2.AnalyzePlanRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.AnalyzePlanResponse.FromString,
+            _registered_method=True,
         )
         self.Config = channel.unary_unary(
             "/spark.connect.SparkConnectService/Config",
             request_serializer=spark_dot_connect_dot_base__pb2.ConfigRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ConfigResponse.FromString,
+            _registered_method=True,
         )
         self.AddArtifacts = channel.stream_unary(
             "/spark.connect.SparkConnectService/AddArtifacts",
             request_serializer=spark_dot_connect_dot_base__pb2.AddArtifactsRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.AddArtifactsResponse.FromString,
+            _registered_method=True,
         )
         self.ArtifactStatus = channel.unary_unary(
             "/spark.connect.SparkConnectService/ArtifactStatus",
             request_serializer=spark_dot_connect_dot_base__pb2.ArtifactStatusesRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ArtifactStatusesResponse.FromString,
+            _registered_method=True,
         )
         self.Interrupt = channel.unary_unary(
             "/spark.connect.SparkConnectService/Interrupt",
             request_serializer=spark_dot_connect_dot_base__pb2.InterruptRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.InterruptResponse.FromString,
+            _registered_method=True,
         )
         self.ReattachExecute = channel.unary_stream(
             "/spark.connect.SparkConnectService/ReattachExecute",
             request_serializer=spark_dot_connect_dot_base__pb2.ReattachExecuteRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ExecutePlanResponse.FromString,
+            _registered_method=True,
         )
         self.ReleaseExecute = channel.unary_unary(
             "/spark.connect.SparkConnectService/ReleaseExecute",
             request_serializer=spark_dot_connect_dot_base__pb2.ReleaseExecuteRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ReleaseExecuteResponse.FromString,
+            _registered_method=True,
         )
         self.ReleaseSession = channel.unary_unary(
             "/spark.connect.SparkConnectService/ReleaseSession",
             request_serializer=spark_dot_connect_dot_base__pb2.ReleaseSessionRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.ReleaseSessionResponse.FromString,
+            _registered_method=True,
         )
         self.FetchErrorDetails = channel.unary_unary(
             "/spark.connect.SparkConnectService/FetchErrorDetails",
             request_serializer=spark_dot_connect_dot_base__pb2.FetchErrorDetailsRequest.SerializeToString,
             response_deserializer=spark_dot_connect_dot_base__pb2.FetchErrorDetailsResponse.FromString,
+            _registered_method=True,
         )
 
 
@@ -220,6 +230,7 @@ def add_SparkConnectServiceServicer_to_server(servicer, server):
         "spark.connect.SparkConnectService", rpc_method_handlers
     )
     server.add_generic_rpc_handlers((generic_handler,))
+    server.add_registered_method_handlers("spark.connect.SparkConnectService", rpc_method_handlers)
 
 
 # This class is part of an EXPERIMENTAL API.
@@ -253,6 +264,7 @@ def ExecutePlan(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -282,6 +294,7 @@ def AnalyzePlan(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -311,6 +324,7 @@ def Config(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -340,6 +354,7 @@ def AddArtifacts(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -369,6 +384,7 @@ def ArtifactStatus(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -398,6 +414,7 @@ def Interrupt(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -427,6 +444,7 @@ def ReattachExecute(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -456,6 +474,7 @@ def ReleaseExecute(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -485,6 +504,7 @@ def ReleaseSession(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
 
     @staticmethod
@@ -514,4 +534,5 @@ def FetchErrorDetails(
             wait_for_ready,
             timeout,
             metadata,
+            _registered_method=True,
         )
diff --git a/python/pyspark/sql/connect/proto/catalog_pb2.py b/python/pyspark/sql/connect/proto/catalog_pb2.py
index aed4cb32bdc72..3360209b0385b 100644
--- a/python/pyspark/sql/connect/proto/catalog_pb2.py
+++ b/python/pyspark/sql/connect/proto/catalog_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/catalog.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/catalog.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -36,75 +42,76 @@
     b'\n\x1bspark/connect/catalog.proto\x12\rspark.connect\x1a\x1aspark/connect/common.proto\x1a\x19spark/connect/types.proto"\xc6\x0e\n\x07\x43\x61talog\x12K\n\x10\x63urrent_database\x18\x01 \x01(\x0b\x32\x1e.spark.connect.CurrentDatabaseH\x00R\x0f\x63urrentDatabase\x12U\n\x14set_current_database\x18\x02 \x01(\x0b\x32!.spark.connect.SetCurrentDatabaseH\x00R\x12setCurrentDatabase\x12\x45\n\x0elist_databases\x18\x03 \x01(\x0b\x32\x1c.spark.connect.ListDatabasesH\x00R\rlistDatabases\x12<\n\x0blist_tables\x18\x04 \x01(\x0b\x32\x19.spark.connect.ListTablesH\x00R\nlistTables\x12\x45\n\x0elist_functions\x18\x05 \x01(\x0b\x32\x1c.spark.connect.ListFunctionsH\x00R\rlistFunctions\x12?\n\x0clist_columns\x18\x06 \x01(\x0b\x32\x1a.spark.connect.ListColumnsH\x00R\x0blistColumns\x12?\n\x0cget_database\x18\x07 \x01(\x0b\x32\x1a.spark.connect.GetDatabaseH\x00R\x0bgetDatabase\x12\x36\n\tget_table\x18\x08 \x01(\x0b\x32\x17.spark.connect.GetTableH\x00R\x08getTable\x12?\n\x0cget_function\x18\t \x01(\x0b\x32\x1a.spark.connect.GetFunctionH\x00R\x0bgetFunction\x12H\n\x0f\x64\x61tabase_exists\x18\n \x01(\x0b\x32\x1d.spark.connect.DatabaseExistsH\x00R\x0e\x64\x61tabaseExists\x12?\n\x0ctable_exists\x18\x0b \x01(\x0b\x32\x1a.spark.connect.TableExistsH\x00R\x0btableExists\x12H\n\x0f\x66unction_exists\x18\x0c \x01(\x0b\x32\x1d.spark.connect.FunctionExistsH\x00R\x0e\x66unctionExists\x12X\n\x15\x63reate_external_table\x18\r \x01(\x0b\x32".spark.connect.CreateExternalTableH\x00R\x13\x63reateExternalTable\x12?\n\x0c\x63reate_table\x18\x0e \x01(\x0b\x32\x1a.spark.connect.CreateTableH\x00R\x0b\x63reateTable\x12\x43\n\x0e\x64rop_temp_view\x18\x0f \x01(\x0b\x32\x1b.spark.connect.DropTempViewH\x00R\x0c\x64ropTempView\x12V\n\x15\x64rop_global_temp_view\x18\x10 \x01(\x0b\x32!.spark.connect.DropGlobalTempViewH\x00R\x12\x64ropGlobalTempView\x12Q\n\x12recover_partitions\x18\x11 \x01(\x0b\x32 .spark.connect.RecoverPartitionsH\x00R\x11recoverPartitions\x12\x36\n\tis_cached\x18\x12 \x01(\x0b\x32\x17.spark.connect.IsCachedH\x00R\x08isCached\x12<\n\x0b\x63\x61\x63he_table\x18\x13 \x01(\x0b\x32\x19.spark.connect.CacheTableH\x00R\ncacheTable\x12\x42\n\runcache_table\x18\x14 \x01(\x0b\x32\x1b.spark.connect.UncacheTableH\x00R\x0cuncacheTable\x12<\n\x0b\x63lear_cache\x18\x15 \x01(\x0b\x32\x19.spark.connect.ClearCacheH\x00R\nclearCache\x12\x42\n\rrefresh_table\x18\x16 \x01(\x0b\x32\x1b.spark.connect.RefreshTableH\x00R\x0crefreshTable\x12\x46\n\x0frefresh_by_path\x18\x17 \x01(\x0b\x32\x1c.spark.connect.RefreshByPathH\x00R\rrefreshByPath\x12H\n\x0f\x63urrent_catalog\x18\x18 \x01(\x0b\x32\x1d.spark.connect.CurrentCatalogH\x00R\x0e\x63urrentCatalog\x12R\n\x13set_current_catalog\x18\x19 \x01(\x0b\x32 .spark.connect.SetCurrentCatalogH\x00R\x11setCurrentCatalog\x12\x42\n\rlist_catalogs\x18\x1a \x01(\x0b\x32\x1b.spark.connect.ListCatalogsH\x00R\x0clistCatalogsB\n\n\x08\x63\x61t_type"\x11\n\x0f\x43urrentDatabase"-\n\x12SetCurrentDatabase\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name":\n\rListDatabases\x12\x1d\n\x07pattern\x18\x01 \x01(\tH\x00R\x07pattern\x88\x01\x01\x42\n\n\x08_pattern"a\n\nListTables\x12\x1c\n\x07\x64\x62_name\x18\x01 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x12\x1d\n\x07pattern\x18\x02 \x01(\tH\x01R\x07pattern\x88\x01\x01\x42\n\n\x08_db_nameB\n\n\x08_pattern"d\n\rListFunctions\x12\x1c\n\x07\x64\x62_name\x18\x01 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x12\x1d\n\x07pattern\x18\x02 \x01(\tH\x01R\x07pattern\x88\x01\x01\x42\n\n\x08_db_nameB\n\n\x08_pattern"V\n\x0bListColumns\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"&\n\x0bGetDatabase\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name"S\n\x08GetTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"\\\n\x0bGetFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name")\n\x0e\x44\x61tabaseExists\x12\x17\n\x07\x64\x62_name\x18\x01 \x01(\tR\x06\x64\x62Name"V\n\x0bTableExists\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"_\n\x0e\x46unctionExists\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x1c\n\x07\x64\x62_name\x18\x02 \x01(\tH\x00R\x06\x64\x62Name\x88\x01\x01\x42\n\n\x08_db_name"\xc6\x02\n\x13\x43reateExternalTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x17\n\x04path\x18\x02 \x01(\tH\x00R\x04path\x88\x01\x01\x12\x1b\n\x06source\x18\x03 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x34\n\x06schema\x18\x04 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x02R\x06schema\x88\x01\x01\x12I\n\x07options\x18\x05 \x03(\x0b\x32/.spark.connect.CreateExternalTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x07\n\x05_pathB\t\n\x07_sourceB\t\n\x07_schema"\xed\x02\n\x0b\x43reateTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x17\n\x04path\x18\x02 \x01(\tH\x00R\x04path\x88\x01\x01\x12\x1b\n\x06source\x18\x03 \x01(\tH\x01R\x06source\x88\x01\x01\x12%\n\x0b\x64\x65scription\x18\x04 \x01(\tH\x02R\x0b\x64\x65scription\x88\x01\x01\x12\x34\n\x06schema\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x03R\x06schema\x88\x01\x01\x12\x41\n\x07options\x18\x06 \x03(\x0b\x32\'.spark.connect.CreateTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\x07\n\x05_pathB\t\n\x07_sourceB\x0e\n\x0c_descriptionB\t\n\x07_schema"+\n\x0c\x44ropTempView\x12\x1b\n\tview_name\x18\x01 \x01(\tR\x08viewName"1\n\x12\x44ropGlobalTempView\x12\x1b\n\tview_name\x18\x01 \x01(\tR\x08viewName"2\n\x11RecoverPartitions\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName")\n\x08IsCached\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"\x84\x01\n\nCacheTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12\x45\n\rstorage_level\x18\x02 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"-\n\x0cUncacheTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"\x0c\n\nClearCache"-\n\x0cRefreshTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName"#\n\rRefreshByPath\x12\x12\n\x04path\x18\x01 \x01(\tR\x04path"\x10\n\x0e\x43urrentCatalog"6\n\x11SetCurrentCatalog\x12!\n\x0c\x63\x61talog_name\x18\x01 \x01(\tR\x0b\x63\x61talogName"9\n\x0cListCatalogs\x12\x1d\n\x07pattern\x18\x01 \x01(\tH\x00R\x07pattern\x88\x01\x01\x42\n\n\x08_patternB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.catalog_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.catalog_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._options = None
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_options = b"8\001"
-    _CREATETABLE_OPTIONSENTRY._options = None
-    _CREATETABLE_OPTIONSENTRY._serialized_options = b"8\001"
-    _CATALOG._serialized_start = 102
-    _CATALOG._serialized_end = 1964
-    _CURRENTDATABASE._serialized_start = 1966
-    _CURRENTDATABASE._serialized_end = 1983
-    _SETCURRENTDATABASE._serialized_start = 1985
-    _SETCURRENTDATABASE._serialized_end = 2030
-    _LISTDATABASES._serialized_start = 2032
-    _LISTDATABASES._serialized_end = 2090
-    _LISTTABLES._serialized_start = 2092
-    _LISTTABLES._serialized_end = 2189
-    _LISTFUNCTIONS._serialized_start = 2191
-    _LISTFUNCTIONS._serialized_end = 2291
-    _LISTCOLUMNS._serialized_start = 2293
-    _LISTCOLUMNS._serialized_end = 2379
-    _GETDATABASE._serialized_start = 2381
-    _GETDATABASE._serialized_end = 2419
-    _GETTABLE._serialized_start = 2421
-    _GETTABLE._serialized_end = 2504
-    _GETFUNCTION._serialized_start = 2506
-    _GETFUNCTION._serialized_end = 2598
-    _DATABASEEXISTS._serialized_start = 2600
-    _DATABASEEXISTS._serialized_end = 2641
-    _TABLEEXISTS._serialized_start = 2643
-    _TABLEEXISTS._serialized_end = 2729
-    _FUNCTIONEXISTS._serialized_start = 2731
-    _FUNCTIONEXISTS._serialized_end = 2826
-    _CREATEEXTERNALTABLE._serialized_start = 2829
-    _CREATEEXTERNALTABLE._serialized_end = 3155
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_start = 3066
-    _CREATEEXTERNALTABLE_OPTIONSENTRY._serialized_end = 3124
-    _CREATETABLE._serialized_start = 3158
-    _CREATETABLE._serialized_end = 3523
-    _CREATETABLE_OPTIONSENTRY._serialized_start = 3066
-    _CREATETABLE_OPTIONSENTRY._serialized_end = 3124
-    _DROPTEMPVIEW._serialized_start = 3525
-    _DROPTEMPVIEW._serialized_end = 3568
-    _DROPGLOBALTEMPVIEW._serialized_start = 3570
-    _DROPGLOBALTEMPVIEW._serialized_end = 3619
-    _RECOVERPARTITIONS._serialized_start = 3621
-    _RECOVERPARTITIONS._serialized_end = 3671
-    _ISCACHED._serialized_start = 3673
-    _ISCACHED._serialized_end = 3714
-    _CACHETABLE._serialized_start = 3717
-    _CACHETABLE._serialized_end = 3849
-    _UNCACHETABLE._serialized_start = 3851
-    _UNCACHETABLE._serialized_end = 3896
-    _CLEARCACHE._serialized_start = 3898
-    _CLEARCACHE._serialized_end = 3910
-    _REFRESHTABLE._serialized_start = 3912
-    _REFRESHTABLE._serialized_end = 3957
-    _REFRESHBYPATH._serialized_start = 3959
-    _REFRESHBYPATH._serialized_end = 3994
-    _CURRENTCATALOG._serialized_start = 3996
-    _CURRENTCATALOG._serialized_end = 4012
-    _SETCURRENTCATALOG._serialized_start = 4014
-    _SETCURRENTCATALOG._serialized_end = 4068
-    _LISTCATALOGS._serialized_start = 4070
-    _LISTCATALOGS._serialized_end = 4127
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_CREATEEXTERNALTABLE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_CREATEEXTERNALTABLE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_CREATETABLE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_CREATETABLE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_CATALOG"]._serialized_start = 102
+    _globals["_CATALOG"]._serialized_end = 1964
+    _globals["_CURRENTDATABASE"]._serialized_start = 1966
+    _globals["_CURRENTDATABASE"]._serialized_end = 1983
+    _globals["_SETCURRENTDATABASE"]._serialized_start = 1985
+    _globals["_SETCURRENTDATABASE"]._serialized_end = 2030
+    _globals["_LISTDATABASES"]._serialized_start = 2032
+    _globals["_LISTDATABASES"]._serialized_end = 2090
+    _globals["_LISTTABLES"]._serialized_start = 2092
+    _globals["_LISTTABLES"]._serialized_end = 2189
+    _globals["_LISTFUNCTIONS"]._serialized_start = 2191
+    _globals["_LISTFUNCTIONS"]._serialized_end = 2291
+    _globals["_LISTCOLUMNS"]._serialized_start = 2293
+    _globals["_LISTCOLUMNS"]._serialized_end = 2379
+    _globals["_GETDATABASE"]._serialized_start = 2381
+    _globals["_GETDATABASE"]._serialized_end = 2419
+    _globals["_GETTABLE"]._serialized_start = 2421
+    _globals["_GETTABLE"]._serialized_end = 2504
+    _globals["_GETFUNCTION"]._serialized_start = 2506
+    _globals["_GETFUNCTION"]._serialized_end = 2598
+    _globals["_DATABASEEXISTS"]._serialized_start = 2600
+    _globals["_DATABASEEXISTS"]._serialized_end = 2641
+    _globals["_TABLEEXISTS"]._serialized_start = 2643
+    _globals["_TABLEEXISTS"]._serialized_end = 2729
+    _globals["_FUNCTIONEXISTS"]._serialized_start = 2731
+    _globals["_FUNCTIONEXISTS"]._serialized_end = 2826
+    _globals["_CREATEEXTERNALTABLE"]._serialized_start = 2829
+    _globals["_CREATEEXTERNALTABLE"]._serialized_end = 3155
+    _globals["_CREATEEXTERNALTABLE_OPTIONSENTRY"]._serialized_start = 3066
+    _globals["_CREATEEXTERNALTABLE_OPTIONSENTRY"]._serialized_end = 3124
+    _globals["_CREATETABLE"]._serialized_start = 3158
+    _globals["_CREATETABLE"]._serialized_end = 3523
+    _globals["_CREATETABLE_OPTIONSENTRY"]._serialized_start = 3066
+    _globals["_CREATETABLE_OPTIONSENTRY"]._serialized_end = 3124
+    _globals["_DROPTEMPVIEW"]._serialized_start = 3525
+    _globals["_DROPTEMPVIEW"]._serialized_end = 3568
+    _globals["_DROPGLOBALTEMPVIEW"]._serialized_start = 3570
+    _globals["_DROPGLOBALTEMPVIEW"]._serialized_end = 3619
+    _globals["_RECOVERPARTITIONS"]._serialized_start = 3621
+    _globals["_RECOVERPARTITIONS"]._serialized_end = 3671
+    _globals["_ISCACHED"]._serialized_start = 3673
+    _globals["_ISCACHED"]._serialized_end = 3714
+    _globals["_CACHETABLE"]._serialized_start = 3717
+    _globals["_CACHETABLE"]._serialized_end = 3849
+    _globals["_UNCACHETABLE"]._serialized_start = 3851
+    _globals["_UNCACHETABLE"]._serialized_end = 3896
+    _globals["_CLEARCACHE"]._serialized_start = 3898
+    _globals["_CLEARCACHE"]._serialized_end = 3910
+    _globals["_REFRESHTABLE"]._serialized_start = 3912
+    _globals["_REFRESHTABLE"]._serialized_end = 3957
+    _globals["_REFRESHBYPATH"]._serialized_start = 3959
+    _globals["_REFRESHBYPATH"]._serialized_end = 3994
+    _globals["_CURRENTCATALOG"]._serialized_start = 3996
+    _globals["_CURRENTCATALOG"]._serialized_end = 4012
+    _globals["_SETCURRENTCATALOG"]._serialized_start = 4014
+    _globals["_SETCURRENTCATALOG"]._serialized_end = 4068
+    _globals["_LISTCATALOGS"]._serialized_start = 4070
+    _globals["_LISTCATALOGS"]._serialized_end = 4127
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.py b/python/pyspark/sql/connect/proto/commands_pb2.py
index c24840908f34d..a7fcc1d7e0908 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.py
+++ b/python/pyspark/sql/connect/proto/commands_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/commands.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/commands.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -35,140 +41,151 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\x90\r\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xa0\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"t\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1cspark/connect/commands.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1aspark/connect/common.proto\x1a\x1fspark/connect/expressions.proto\x1a\x1dspark/connect/relations.proto"\x90\r\n\x07\x43ommand\x12]\n\x11register_function\x18\x01 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x10registerFunction\x12H\n\x0fwrite_operation\x18\x02 \x01(\x0b\x32\x1d.spark.connect.WriteOperationH\x00R\x0ewriteOperation\x12_\n\x15\x63reate_dataframe_view\x18\x03 \x01(\x0b\x32).spark.connect.CreateDataFrameViewCommandH\x00R\x13\x63reateDataframeView\x12O\n\x12write_operation_v2\x18\x04 \x01(\x0b\x32\x1f.spark.connect.WriteOperationV2H\x00R\x10writeOperationV2\x12<\n\x0bsql_command\x18\x05 \x01(\x0b\x32\x19.spark.connect.SqlCommandH\x00R\nsqlCommand\x12k\n\x1cwrite_stream_operation_start\x18\x06 \x01(\x0b\x32(.spark.connect.WriteStreamOperationStartH\x00R\x19writeStreamOperationStart\x12^\n\x17streaming_query_command\x18\x07 \x01(\x0b\x32$.spark.connect.StreamingQueryCommandH\x00R\x15streamingQueryCommand\x12X\n\x15get_resources_command\x18\x08 \x01(\x0b\x32".spark.connect.GetResourcesCommandH\x00R\x13getResourcesCommand\x12t\n\x1fstreaming_query_manager_command\x18\t \x01(\x0b\x32+.spark.connect.StreamingQueryManagerCommandH\x00R\x1cstreamingQueryManagerCommand\x12m\n\x17register_table_function\x18\n \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R\x15registerTableFunction\x12\x81\x01\n$streaming_query_listener_bus_command\x18\x0b \x01(\x0b\x32/.spark.connect.StreamingQueryListenerBusCommandH\x00R streamingQueryListenerBusCommand\x12\x64\n\x14register_data_source\x18\x0c \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R\x12registerDataSource\x12t\n\x1f\x63reate_resource_profile_command\x18\r \x01(\x0b\x32+.spark.connect.CreateResourceProfileCommandH\x00R\x1c\x63reateResourceProfileCommand\x12Q\n\x12\x63heckpoint_command\x18\x0e \x01(\x0b\x32 .spark.connect.CheckpointCommandH\x00R\x11\x63heckpointCommand\x12\x84\x01\n%remove_cached_remote_relation_command\x18\x0f \x01(\x0b\x32\x30.spark.connect.RemoveCachedRemoteRelationCommandH\x00R!removeCachedRemoteRelationCommand\x12_\n\x18merge_into_table_command\x18\x10 \x01(\x0b\x32$.spark.connect.MergeIntoTableCommandH\x00R\x15mergeIntoTableCommand\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textensionB\x0e\n\x0c\x63ommand_type"\xaa\x04\n\nSqlCommand\x12\x14\n\x03sql\x18\x01 \x01(\tB\x02\x18\x01R\x03sql\x12;\n\x04\x61rgs\x18\x02 \x03(\x0b\x32#.spark.connect.SqlCommand.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12Z\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32-.spark.connect.SqlCommand.NamedArgumentsEntryB\x02\x18\x01R\x0enamedArguments\x12\x42\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionB\x02\x18\x01R\x0cposArguments\x12-\n\x05input\x18\x06 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"\x96\x01\n\x1a\x43reateDataFrameViewCommand\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x1b\n\tis_global\x18\x03 \x01(\x08R\x08isGlobal\x12\x18\n\x07replace\x18\x04 \x01(\x08R\x07replace"\xca\x08\n\x0eWriteOperation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1b\n\x06source\x18\x02 \x01(\tH\x01R\x06source\x88\x01\x01\x12\x14\n\x04path\x18\x03 \x01(\tH\x00R\x04path\x12?\n\x05table\x18\x04 \x01(\x0b\x32\'.spark.connect.WriteOperation.SaveTableH\x00R\x05table\x12:\n\x04mode\x18\x05 \x01(\x0e\x32&.spark.connect.WriteOperation.SaveModeR\x04mode\x12*\n\x11sort_column_names\x18\x06 \x03(\tR\x0fsortColumnNames\x12\x31\n\x14partitioning_columns\x18\x07 \x03(\tR\x13partitioningColumns\x12\x43\n\tbucket_by\x18\x08 \x01(\x0b\x32&.spark.connect.WriteOperation.BucketByR\x08\x62ucketBy\x12\x44\n\x07options\x18\t \x03(\x0b\x32*.spark.connect.WriteOperation.OptionsEntryR\x07options\x12-\n\x12\x63lustering_columns\x18\n \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x82\x02\n\tSaveTable\x12\x1d\n\ntable_name\x18\x01 \x01(\tR\ttableName\x12X\n\x0bsave_method\x18\x02 \x01(\x0e\x32\x37.spark.connect.WriteOperation.SaveTable.TableSaveMethodR\nsaveMethod"|\n\x0fTableSaveMethod\x12!\n\x1dTABLE_SAVE_METHOD_UNSPECIFIED\x10\x00\x12#\n\x1fTABLE_SAVE_METHOD_SAVE_AS_TABLE\x10\x01\x12!\n\x1dTABLE_SAVE_METHOD_INSERT_INTO\x10\x02\x1a[\n\x08\x42ucketBy\x12.\n\x13\x62ucket_column_names\x18\x01 \x03(\tR\x11\x62ucketColumnNames\x12\x1f\n\x0bnum_buckets\x18\x02 \x01(\x05R\nnumBuckets"\x89\x01\n\x08SaveMode\x12\x19\n\x15SAVE_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10SAVE_MODE_APPEND\x10\x01\x12\x17\n\x13SAVE_MODE_OVERWRITE\x10\x02\x12\x1d\n\x19SAVE_MODE_ERROR_IF_EXISTS\x10\x03\x12\x14\n\x10SAVE_MODE_IGNORE\x10\x04\x42\x0b\n\tsave_typeB\t\n\x07_source"\xdc\x06\n\x10WriteOperationV2\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\ntable_name\x18\x02 \x01(\tR\ttableName\x12\x1f\n\x08provider\x18\x03 \x01(\tH\x00R\x08provider\x88\x01\x01\x12L\n\x14partitioning_columns\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13partitioningColumns\x12\x46\n\x07options\x18\x05 \x03(\x0b\x32,.spark.connect.WriteOperationV2.OptionsEntryR\x07options\x12_\n\x10table_properties\x18\x06 \x03(\x0b\x32\x34.spark.connect.WriteOperationV2.TablePropertiesEntryR\x0ftableProperties\x12\x38\n\x04mode\x18\x07 \x01(\x0e\x32$.spark.connect.WriteOperationV2.ModeR\x04mode\x12J\n\x13overwrite_condition\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x12overwriteCondition\x12-\n\x12\x63lustering_columns\x18\t \x03(\tR\x11\x63lusteringColumns\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x42\n\x14TablePropertiesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"\x9f\x01\n\x04Mode\x12\x14\n\x10MODE_UNSPECIFIED\x10\x00\x12\x0f\n\x0bMODE_CREATE\x10\x01\x12\x12\n\x0eMODE_OVERWRITE\x10\x02\x12\x1d\n\x19MODE_OVERWRITE_PARTITIONS\x10\x03\x12\x0f\n\x0bMODE_APPEND\x10\x04\x12\x10\n\x0cMODE_REPLACE\x10\x05\x12\x1a\n\x16MODE_CREATE_OR_REPLACE\x10\x06\x42\x0b\n\t_provider"\xd8\x06\n\x19WriteStreamOperationStart\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06\x66ormat\x18\x02 \x01(\tR\x06\x66ormat\x12O\n\x07options\x18\x03 \x03(\x0b\x32\x35.spark.connect.WriteStreamOperationStart.OptionsEntryR\x07options\x12:\n\x19partitioning_column_names\x18\x04 \x03(\tR\x17partitioningColumnNames\x12:\n\x18processing_time_interval\x18\x05 \x01(\tH\x00R\x16processingTimeInterval\x12%\n\ravailable_now\x18\x06 \x01(\x08H\x00R\x0c\x61vailableNow\x12\x14\n\x04once\x18\x07 \x01(\x08H\x00R\x04once\x12\x46\n\x1e\x63ontinuous_checkpoint_interval\x18\x08 \x01(\tH\x00R\x1c\x63ontinuousCheckpointInterval\x12\x1f\n\x0boutput_mode\x18\t \x01(\tR\noutputMode\x12\x1d\n\nquery_name\x18\n \x01(\tR\tqueryName\x12\x14\n\x04path\x18\x0b \x01(\tH\x01R\x04path\x12\x1f\n\ntable_name\x18\x0c \x01(\tH\x01R\ttableName\x12N\n\x0e\x66oreach_writer\x18\r \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\rforeachWriter\x12L\n\rforeach_batch\x18\x0e \x01(\x0b\x32\'.spark.connect.StreamingForeachFunctionR\x0c\x66oreachBatch\x12\x36\n\x17\x63lustering_column_names\x18\x0f \x03(\tR\x15\x63lusteringColumnNames\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07triggerB\x12\n\x10sink_destination"\xb3\x01\n\x18StreamingForeachFunction\x12\x43\n\x0fpython_function\x18\x01 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x0epythonFunction\x12\x46\n\x0escala_function\x18\x02 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\rscalaFunctionB\n\n\x08\x66unction"\xd4\x01\n\x1fWriteStreamOperationStartResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12<\n\x18query_started_event_json\x18\x03 \x01(\tH\x00R\x15queryStartedEventJson\x88\x01\x01\x42\x1b\n\x19_query_started_event_json"A\n\x18StreamingQueryInstanceId\x12\x0e\n\x02id\x18\x01 \x01(\tR\x02id\x12\x15\n\x06run_id\x18\x02 \x01(\tR\x05runId"\xf8\x04\n\x15StreamingQueryCommand\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12\x18\n\x06status\x18\x02 \x01(\x08H\x00R\x06status\x12%\n\rlast_progress\x18\x03 \x01(\x08H\x00R\x0clastProgress\x12)\n\x0frecent_progress\x18\x04 \x01(\x08H\x00R\x0erecentProgress\x12\x14\n\x04stop\x18\x05 \x01(\x08H\x00R\x04stop\x12\x34\n\x15process_all_available\x18\x06 \x01(\x08H\x00R\x13processAllAvailable\x12O\n\x07\x65xplain\x18\x07 \x01(\x0b\x32\x33.spark.connect.StreamingQueryCommand.ExplainCommandH\x00R\x07\x65xplain\x12\x1e\n\texception\x18\x08 \x01(\x08H\x00R\texception\x12k\n\x11\x61wait_termination\x18\t \x01(\x0b\x32<.spark.connect.StreamingQueryCommand.AwaitTerminationCommandH\x00R\x10\x61waitTermination\x1a,\n\x0e\x45xplainCommand\x12\x1a\n\x08\x65xtended\x18\x01 \x01(\x08R\x08\x65xtended\x1aL\n\x17\x41waitTerminationCommand\x12"\n\ntimeout_ms\x18\x02 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_msB\t\n\x07\x63ommand"\xf5\x08\n\x1bStreamingQueryCommandResult\x12\x42\n\x08query_id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x07queryId\x12Q\n\x06status\x18\x02 \x01(\x0b\x32\x37.spark.connect.StreamingQueryCommandResult.StatusResultH\x00R\x06status\x12j\n\x0frecent_progress\x18\x03 \x01(\x0b\x32?.spark.connect.StreamingQueryCommandResult.RecentProgressResultH\x00R\x0erecentProgress\x12T\n\x07\x65xplain\x18\x04 \x01(\x0b\x32\x38.spark.connect.StreamingQueryCommandResult.ExplainResultH\x00R\x07\x65xplain\x12Z\n\texception\x18\x05 \x01(\x0b\x32:.spark.connect.StreamingQueryCommandResult.ExceptionResultH\x00R\texception\x12p\n\x11\x61wait_termination\x18\x06 \x01(\x0b\x32\x41.spark.connect.StreamingQueryCommandResult.AwaitTerminationResultH\x00R\x10\x61waitTermination\x1a\xaa\x01\n\x0cStatusResult\x12%\n\x0estatus_message\x18\x01 \x01(\tR\rstatusMessage\x12*\n\x11is_data_available\x18\x02 \x01(\x08R\x0fisDataAvailable\x12*\n\x11is_trigger_active\x18\x03 \x01(\x08R\x0fisTriggerActive\x12\x1b\n\tis_active\x18\x04 \x01(\x08R\x08isActive\x1aH\n\x14RecentProgressResult\x12\x30\n\x14recent_progress_json\x18\x05 \x03(\tR\x12recentProgressJson\x1a\'\n\rExplainResult\x12\x16\n\x06result\x18\x01 \x01(\tR\x06result\x1a\xc5\x01\n\x0f\x45xceptionResult\x12\x30\n\x11\x65xception_message\x18\x01 \x01(\tH\x00R\x10\x65xceptionMessage\x88\x01\x01\x12$\n\x0b\x65rror_class\x18\x02 \x01(\tH\x01R\nerrorClass\x88\x01\x01\x12$\n\x0bstack_trace\x18\x03 \x01(\tH\x02R\nstackTrace\x88\x01\x01\x42\x14\n\x12_exception_messageB\x0e\n\x0c_error_classB\x0e\n\x0c_stack_trace\x1a\x38\n\x16\x41waitTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminatedB\r\n\x0bresult_type"\xbd\x06\n\x1cStreamingQueryManagerCommand\x12\x18\n\x06\x61\x63tive\x18\x01 \x01(\x08H\x00R\x06\x61\x63tive\x12\x1d\n\tget_query\x18\x02 \x01(\tH\x00R\x08getQuery\x12|\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32\x46.spark.connect.StreamingQueryManagerCommand.AwaitAnyTerminationCommandH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12n\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0b\x61\x64\x64Listener\x12t\n\x0fremove_listener\x18\x06 \x01(\x0b\x32I.spark.connect.StreamingQueryManagerCommand.StreamingQueryListenerCommandH\x00R\x0eremoveListener\x12\'\n\x0elist_listeners\x18\x07 \x01(\x08H\x00R\rlistListeners\x1aO\n\x1a\x41waitAnyTerminationCommand\x12"\n\ntimeout_ms\x18\x01 \x01(\x03H\x00R\ttimeoutMs\x88\x01\x01\x42\r\n\x0b_timeout_ms\x1a\xcd\x01\n\x1dStreamingQueryListenerCommand\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x12U\n\x17python_listener_payload\x18\x02 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\x15pythonListenerPayload\x88\x01\x01\x12\x0e\n\x02id\x18\x03 \x01(\tR\x02idB\x1a\n\x18_python_listener_payloadB\t\n\x07\x63ommand"\xb4\x08\n"StreamingQueryManagerCommandResult\x12X\n\x06\x61\x63tive\x18\x01 \x01(\x0b\x32>.spark.connect.StreamingQueryManagerCommandResult.ActiveResultH\x00R\x06\x61\x63tive\x12`\n\x05query\x18\x02 \x01(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceH\x00R\x05query\x12\x81\x01\n\x15\x61wait_any_termination\x18\x03 \x01(\x0b\x32K.spark.connect.StreamingQueryManagerCommandResult.AwaitAnyTerminationResultH\x00R\x13\x61waitAnyTermination\x12+\n\x10reset_terminated\x18\x04 \x01(\x08H\x00R\x0fresetTerminated\x12#\n\x0c\x61\x64\x64_listener\x18\x05 \x01(\x08H\x00R\x0b\x61\x64\x64Listener\x12)\n\x0fremove_listener\x18\x06 \x01(\x08H\x00R\x0eremoveListener\x12{\n\x0elist_listeners\x18\x07 \x01(\x0b\x32R.spark.connect.StreamingQueryManagerCommandResult.ListStreamingQueryListenerResultH\x00R\rlistListeners\x1a\x7f\n\x0c\x41\x63tiveResult\x12o\n\x0e\x61\x63tive_queries\x18\x01 \x03(\x0b\x32H.spark.connect.StreamingQueryManagerCommandResult.StreamingQueryInstanceR\ractiveQueries\x1as\n\x16StreamingQueryInstance\x12\x37\n\x02id\x18\x01 \x01(\x0b\x32\'.spark.connect.StreamingQueryInstanceIdR\x02id\x12\x17\n\x04name\x18\x02 \x01(\tH\x00R\x04name\x88\x01\x01\x42\x07\n\x05_name\x1a;\n\x19\x41waitAnyTerminationResult\x12\x1e\n\nterminated\x18\x01 \x01(\x08R\nterminated\x1aK\n\x1eStreamingQueryListenerInstance\x12)\n\x10listener_payload\x18\x01 \x01(\x0cR\x0flistenerPayload\x1a\x45\n ListStreamingQueryListenerResult\x12!\n\x0clistener_ids\x18\x01 \x03(\tR\x0blistenerIdsB\r\n\x0bresult_type"\xad\x01\n StreamingQueryListenerBusCommand\x12;\n\x19\x61\x64\x64_listener_bus_listener\x18\x01 \x01(\x08H\x00R\x16\x61\x64\x64ListenerBusListener\x12\x41\n\x1cremove_listener_bus_listener\x18\x02 \x01(\x08H\x00R\x19removeListenerBusListenerB\t\n\x07\x63ommand"\x83\x01\n\x1bStreamingQueryListenerEvent\x12\x1d\n\nevent_json\x18\x01 \x01(\tR\teventJson\x12\x45\n\nevent_type\x18\x02 \x01(\x0e\x32&.spark.connect.StreamingQueryEventTypeR\teventType"\xcc\x01\n"StreamingQueryListenerEventsResult\x12\x42\n\x06\x65vents\x18\x01 \x03(\x0b\x32*.spark.connect.StreamingQueryListenerEventR\x06\x65vents\x12\x42\n\x1blistener_bus_listener_added\x18\x02 \x01(\x08H\x00R\x18listenerBusListenerAdded\x88\x01\x01\x42\x1e\n\x1c_listener_bus_listener_added"\x15\n\x13GetResourcesCommand"\xd4\x01\n\x19GetResourcesCommandResult\x12U\n\tresources\x18\x01 \x03(\x0b\x32\x37.spark.connect.GetResourcesCommandResult.ResourcesEntryR\tresources\x1a`\n\x0eResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.ResourceInformationR\x05value:\x02\x38\x01"X\n\x1c\x43reateResourceProfileCommand\x12\x38\n\x07profile\x18\x01 \x01(\x0b\x32\x1e.spark.connect.ResourceProfileR\x07profile"C\n"CreateResourceProfileCommandResult\x12\x1d\n\nprofile_id\x18\x01 \x01(\x05R\tprofileId"d\n!RemoveCachedRemoteRelationCommand\x12?\n\x08relation\x18\x01 \x01(\x0b\x32#.spark.connect.CachedRemoteRelationR\x08relation"\xcd\x01\n\x11\x43heckpointCommand\x12\x33\n\x08relation\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x08relation\x12\x14\n\x05local\x18\x02 \x01(\x08R\x05local\x12\x14\n\x05\x65\x61ger\x18\x03 \x01(\x08R\x05\x65\x61ger\x12\x45\n\rstorage_level\x18\x04 \x01(\x0b\x32\x1b.spark.connect.StorageLevelH\x00R\x0cstorageLevel\x88\x01\x01\x42\x10\n\x0e_storage_level"\xe8\x03\n\x15MergeIntoTableCommand\x12*\n\x11target_table_name\x18\x01 \x01(\tR\x0ftargetTableName\x12\x43\n\x11source_table_plan\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x0fsourceTablePlan\x12\x42\n\x0fmerge_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0emergeCondition\x12>\n\rmatch_actions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cmatchActions\x12I\n\x13not_matched_actions\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x11notMatchedActions\x12[\n\x1dnot_matched_by_source_actions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x19notMatchedBySourceActions\x12\x32\n\x15with_schema_evolution\x18\x07 \x01(\x08R\x13withSchemaEvolution*\x85\x01\n\x17StreamingQueryEventType\x12\x1e\n\x1aQUERY_PROGRESS_UNSPECIFIED\x10\x00\x12\x18\n\x14QUERY_PROGRESS_EVENT\x10\x01\x12\x1a\n\x16QUERY_TERMINATED_EVENT\x10\x02\x12\x14\n\x10QUERY_IDLE_EVENT\x10\x03\x42\x36\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.commands_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.commands_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _SQLCOMMAND_ARGSENTRY._options = None
-    _SQLCOMMAND_ARGSENTRY._serialized_options = b"8\001"
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._options = None
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_options = b"8\001"
-    _SQLCOMMAND.fields_by_name["sql"]._options = None
-    _SQLCOMMAND.fields_by_name["sql"]._serialized_options = b"\030\001"
-    _SQLCOMMAND.fields_by_name["args"]._options = None
-    _SQLCOMMAND.fields_by_name["args"]._serialized_options = b"\030\001"
-    _SQLCOMMAND.fields_by_name["pos_args"]._options = None
-    _SQLCOMMAND.fields_by_name["pos_args"]._serialized_options = b"\030\001"
-    _SQLCOMMAND.fields_by_name["named_arguments"]._options = None
-    _SQLCOMMAND.fields_by_name["named_arguments"]._serialized_options = b"\030\001"
-    _SQLCOMMAND.fields_by_name["pos_arguments"]._options = None
-    _SQLCOMMAND.fields_by_name["pos_arguments"]._serialized_options = b"\030\001"
-    _WRITEOPERATION_OPTIONSENTRY._options = None
-    _WRITEOPERATION_OPTIONSENTRY._serialized_options = b"8\001"
-    _WRITEOPERATIONV2_OPTIONSENTRY._options = None
-    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_options = b"8\001"
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._options = None
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_options = b"8\001"
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._options = None
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_options = b"8\001"
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._options = None
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_options = b"8\001"
-    _STREAMINGQUERYEVENTTYPE._serialized_start = 11106
-    _STREAMINGQUERYEVENTTYPE._serialized_end = 11239
-    _COMMAND._serialized_start = 167
-    _COMMAND._serialized_end = 1847
-    _SQLCOMMAND._serialized_start = 1850
-    _SQLCOMMAND._serialized_end = 2404
-    _SQLCOMMAND_ARGSENTRY._serialized_start = 2220
-    _SQLCOMMAND_ARGSENTRY._serialized_end = 2310
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_start = 2312
-    _SQLCOMMAND_NAMEDARGUMENTSENTRY._serialized_end = 2404
-    _CREATEDATAFRAMEVIEWCOMMAND._serialized_start = 2407
-    _CREATEDATAFRAMEVIEWCOMMAND._serialized_end = 2557
-    _WRITEOPERATION._serialized_start = 2560
-    _WRITEOPERATION._serialized_end = 3658
-    _WRITEOPERATION_OPTIONSENTRY._serialized_start = 3082
-    _WRITEOPERATION_OPTIONSENTRY._serialized_end = 3140
-    _WRITEOPERATION_SAVETABLE._serialized_start = 3143
-    _WRITEOPERATION_SAVETABLE._serialized_end = 3401
-    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_start = 3277
-    _WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD._serialized_end = 3401
-    _WRITEOPERATION_BUCKETBY._serialized_start = 3403
-    _WRITEOPERATION_BUCKETBY._serialized_end = 3494
-    _WRITEOPERATION_SAVEMODE._serialized_start = 3497
-    _WRITEOPERATION_SAVEMODE._serialized_end = 3634
-    _WRITEOPERATIONV2._serialized_start = 3661
-    _WRITEOPERATIONV2._serialized_end = 4521
-    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_start = 3082
-    _WRITEOPERATIONV2_OPTIONSENTRY._serialized_end = 3140
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_start = 4280
-    _WRITEOPERATIONV2_TABLEPROPERTIESENTRY._serialized_end = 4346
-    _WRITEOPERATIONV2_MODE._serialized_start = 4349
-    _WRITEOPERATIONV2_MODE._serialized_end = 4508
-    _WRITESTREAMOPERATIONSTART._serialized_start = 4524
-    _WRITESTREAMOPERATIONSTART._serialized_end = 5324
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_start = 3082
-    _WRITESTREAMOPERATIONSTART_OPTIONSENTRY._serialized_end = 3140
-    _STREAMINGFOREACHFUNCTION._serialized_start = 5327
-    _STREAMINGFOREACHFUNCTION._serialized_end = 5506
-    _WRITESTREAMOPERATIONSTARTRESULT._serialized_start = 5509
-    _WRITESTREAMOPERATIONSTARTRESULT._serialized_end = 5721
-    _STREAMINGQUERYINSTANCEID._serialized_start = 5723
-    _STREAMINGQUERYINSTANCEID._serialized_end = 5788
-    _STREAMINGQUERYCOMMAND._serialized_start = 5791
-    _STREAMINGQUERYCOMMAND._serialized_end = 6423
-    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_start = 6290
-    _STREAMINGQUERYCOMMAND_EXPLAINCOMMAND._serialized_end = 6334
-    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_start = 6336
-    _STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND._serialized_end = 6412
-    _STREAMINGQUERYCOMMANDRESULT._serialized_start = 6426
-    _STREAMINGQUERYCOMMANDRESULT._serialized_end = 7567
-    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_start = 7009
-    _STREAMINGQUERYCOMMANDRESULT_STATUSRESULT._serialized_end = 7179
-    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_start = 7181
-    _STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT._serialized_end = 7253
-    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_start = 7255
-    _STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT._serialized_end = 7294
-    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_start = 7297
-    _STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT._serialized_end = 7494
-    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_start = 7496
-    _STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT._serialized_end = 7552
-    _STREAMINGQUERYMANAGERCOMMAND._serialized_start = 7570
-    _STREAMINGQUERYMANAGERCOMMAND._serialized_end = 8399
-    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_start = 8101
-    _STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND._serialized_end = 8180
-    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_start = 8183
-    _STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND._serialized_end = 8388
-    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_start = 8402
-    _STREAMINGQUERYMANAGERCOMMANDRESULT._serialized_end = 9478
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_start = 9010
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT._serialized_end = 9137
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_start = 9139
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE._serialized_end = 9254
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_start = 9256
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT._serialized_end = 9315
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_start = 9317
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE._serialized_end = 9392
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_start = 9394
-    _STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT._serialized_end = 9463
-    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_start = 9481
-    _STREAMINGQUERYLISTENERBUSCOMMAND._serialized_end = 9654
-    _STREAMINGQUERYLISTENEREVENT._serialized_start = 9657
-    _STREAMINGQUERYLISTENEREVENT._serialized_end = 9788
-    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_start = 9791
-    _STREAMINGQUERYLISTENEREVENTSRESULT._serialized_end = 9995
-    _GETRESOURCESCOMMAND._serialized_start = 9997
-    _GETRESOURCESCOMMAND._serialized_end = 10018
-    _GETRESOURCESCOMMANDRESULT._serialized_start = 10021
-    _GETRESOURCESCOMMANDRESULT._serialized_end = 10233
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_start = 10137
-    _GETRESOURCESCOMMANDRESULT_RESOURCESENTRY._serialized_end = 10233
-    _CREATERESOURCEPROFILECOMMAND._serialized_start = 10235
-    _CREATERESOURCEPROFILECOMMAND._serialized_end = 10323
-    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_start = 10325
-    _CREATERESOURCEPROFILECOMMANDRESULT._serialized_end = 10392
-    _REMOVECACHEDREMOTERELATIONCOMMAND._serialized_start = 10394
-    _REMOVECACHEDREMOTERELATIONCOMMAND._serialized_end = 10494
-    _CHECKPOINTCOMMAND._serialized_start = 10496
-    _CHECKPOINTCOMMAND._serialized_end = 10612
-    _MERGEINTOTABLECOMMAND._serialized_start = 10615
-    _MERGEINTOTABLECOMMAND._serialized_end = 11103
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_SQLCOMMAND_ARGSENTRY"]._loaded_options = None
+    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_options = b"8\001"
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._loaded_options = None
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_options = b"8\001"
+    _globals["_SQLCOMMAND"].fields_by_name["sql"]._loaded_options = None
+    _globals["_SQLCOMMAND"].fields_by_name["sql"]._serialized_options = b"\030\001"
+    _globals["_SQLCOMMAND"].fields_by_name["args"]._loaded_options = None
+    _globals["_SQLCOMMAND"].fields_by_name["args"]._serialized_options = b"\030\001"
+    _globals["_SQLCOMMAND"].fields_by_name["pos_args"]._loaded_options = None
+    _globals["_SQLCOMMAND"].fields_by_name["pos_args"]._serialized_options = b"\030\001"
+    _globals["_SQLCOMMAND"].fields_by_name["named_arguments"]._loaded_options = None
+    _globals["_SQLCOMMAND"].fields_by_name["named_arguments"]._serialized_options = b"\030\001"
+    _globals["_SQLCOMMAND"].fields_by_name["pos_arguments"]._loaded_options = None
+    _globals["_SQLCOMMAND"].fields_by_name["pos_arguments"]._serialized_options = b"\030\001"
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._loaded_options = None
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._loaded_options = None
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._loaded_options = None
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_options = b"8\001"
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._loaded_options = None
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._loaded_options = None
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_options = b"8\001"
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_start = 11252
+    _globals["_STREAMINGQUERYEVENTTYPE"]._serialized_end = 11385
+    _globals["_COMMAND"]._serialized_start = 167
+    _globals["_COMMAND"]._serialized_end = 1847
+    _globals["_SQLCOMMAND"]._serialized_start = 1850
+    _globals["_SQLCOMMAND"]._serialized_end = 2404
+    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_start = 2220
+    _globals["_SQLCOMMAND_ARGSENTRY"]._serialized_end = 2310
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_start = 2312
+    _globals["_SQLCOMMAND_NAMEDARGUMENTSENTRY"]._serialized_end = 2404
+    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_start = 2407
+    _globals["_CREATEDATAFRAMEVIEWCOMMAND"]._serialized_end = 2557
+    _globals["_WRITEOPERATION"]._serialized_start = 2560
+    _globals["_WRITEOPERATION"]._serialized_end = 3658
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_start = 3082
+    _globals["_WRITEOPERATION_OPTIONSENTRY"]._serialized_end = 3140
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_start = 3143
+    _globals["_WRITEOPERATION_SAVETABLE"]._serialized_end = 3401
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_start = 3277
+    _globals["_WRITEOPERATION_SAVETABLE_TABLESAVEMETHOD"]._serialized_end = 3401
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_start = 3403
+    _globals["_WRITEOPERATION_BUCKETBY"]._serialized_end = 3494
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_start = 3497
+    _globals["_WRITEOPERATION_SAVEMODE"]._serialized_end = 3634
+    _globals["_WRITEOPERATIONV2"]._serialized_start = 3661
+    _globals["_WRITEOPERATIONV2"]._serialized_end = 4521
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_start = 3082
+    _globals["_WRITEOPERATIONV2_OPTIONSENTRY"]._serialized_end = 3140
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_start = 4280
+    _globals["_WRITEOPERATIONV2_TABLEPROPERTIESENTRY"]._serialized_end = 4346
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_start = 4349
+    _globals["_WRITEOPERATIONV2_MODE"]._serialized_end = 4508
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_start = 4524
+    _globals["_WRITESTREAMOPERATIONSTART"]._serialized_end = 5380
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_start = 3082
+    _globals["_WRITESTREAMOPERATIONSTART_OPTIONSENTRY"]._serialized_end = 3140
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_start = 5383
+    _globals["_STREAMINGFOREACHFUNCTION"]._serialized_end = 5562
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_start = 5565
+    _globals["_WRITESTREAMOPERATIONSTARTRESULT"]._serialized_end = 5777
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_start = 5779
+    _globals["_STREAMINGQUERYINSTANCEID"]._serialized_end = 5844
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_start = 5847
+    _globals["_STREAMINGQUERYCOMMAND"]._serialized_end = 6479
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_start = 6346
+    _globals["_STREAMINGQUERYCOMMAND_EXPLAINCOMMAND"]._serialized_end = 6390
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_start = 6392
+    _globals["_STREAMINGQUERYCOMMAND_AWAITTERMINATIONCOMMAND"]._serialized_end = 6468
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_start = 6482
+    _globals["_STREAMINGQUERYCOMMANDRESULT"]._serialized_end = 7623
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_start = 7065
+    _globals["_STREAMINGQUERYCOMMANDRESULT_STATUSRESULT"]._serialized_end = 7235
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_start = 7237
+    _globals["_STREAMINGQUERYCOMMANDRESULT_RECENTPROGRESSRESULT"]._serialized_end = 7309
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_start = 7311
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXPLAINRESULT"]._serialized_end = 7350
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_start = 7353
+    _globals["_STREAMINGQUERYCOMMANDRESULT_EXCEPTIONRESULT"]._serialized_end = 7550
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_start = 7552
+    _globals["_STREAMINGQUERYCOMMANDRESULT_AWAITTERMINATIONRESULT"]._serialized_end = 7608
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_start = 7626
+    _globals["_STREAMINGQUERYMANAGERCOMMAND"]._serialized_end = 8455
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_start = 8157
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_AWAITANYTERMINATIONCOMMAND"]._serialized_end = 8236
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_start = 8239
+    _globals["_STREAMINGQUERYMANAGERCOMMAND_STREAMINGQUERYLISTENERCOMMAND"]._serialized_end = 8444
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_start = 8458
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT"]._serialized_end = 9534
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_start = 9066
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_ACTIVERESULT"]._serialized_end = 9193
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_start = 9195
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYINSTANCE"]._serialized_end = 9310
+    _globals[
+        "_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"
+    ]._serialized_start = 9312
+    _globals["_STREAMINGQUERYMANAGERCOMMANDRESULT_AWAITANYTERMINATIONRESULT"]._serialized_end = 9371
+    _globals[
+        "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
+    ]._serialized_start = 9373
+    _globals[
+        "_STREAMINGQUERYMANAGERCOMMANDRESULT_STREAMINGQUERYLISTENERINSTANCE"
+    ]._serialized_end = 9448
+    _globals[
+        "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
+    ]._serialized_start = 9450
+    _globals[
+        "_STREAMINGQUERYMANAGERCOMMANDRESULT_LISTSTREAMINGQUERYLISTENERRESULT"
+    ]._serialized_end = 9519
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_start = 9537
+    _globals["_STREAMINGQUERYLISTENERBUSCOMMAND"]._serialized_end = 9710
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_start = 9713
+    _globals["_STREAMINGQUERYLISTENEREVENT"]._serialized_end = 9844
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_start = 9847
+    _globals["_STREAMINGQUERYLISTENEREVENTSRESULT"]._serialized_end = 10051
+    _globals["_GETRESOURCESCOMMAND"]._serialized_start = 10053
+    _globals["_GETRESOURCESCOMMAND"]._serialized_end = 10074
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_start = 10077
+    _globals["_GETRESOURCESCOMMANDRESULT"]._serialized_end = 10289
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_start = 10193
+    _globals["_GETRESOURCESCOMMANDRESULT_RESOURCESENTRY"]._serialized_end = 10289
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_start = 10291
+    _globals["_CREATERESOURCEPROFILECOMMAND"]._serialized_end = 10379
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_start = 10381
+    _globals["_CREATERESOURCEPROFILECOMMANDRESULT"]._serialized_end = 10448
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_start = 10450
+    _globals["_REMOVECACHEDREMOTERELATIONCOMMAND"]._serialized_end = 10550
+    _globals["_CHECKPOINTCOMMAND"]._serialized_start = 10553
+    _globals["_CHECKPOINTCOMMAND"]._serialized_end = 10758
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_start = 10761
+    _globals["_MERGEINTOTABLECOMMAND"]._serialized_end = 11249
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/commands_pb2.pyi b/python/pyspark/sql/connect/proto/commands_pb2.pyi
index 03a31ecdfedfe..6192a29607cbf 100644
--- a/python/pyspark/sql/connect/proto/commands_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/commands_pb2.pyi
@@ -905,6 +905,7 @@ class WriteStreamOperationStart(google.protobuf.message.Message):
     TABLE_NAME_FIELD_NUMBER: builtins.int
     FOREACH_WRITER_FIELD_NUMBER: builtins.int
     FOREACH_BATCH_FIELD_NUMBER: builtins.int
+    CLUSTERING_COLUMN_NAMES_FIELD_NUMBER: builtins.int
     @property
     def input(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
         """(Required) The output of the `input` streaming relation will be written."""
@@ -932,6 +933,11 @@ class WriteStreamOperationStart(google.protobuf.message.Message):
     def foreach_writer(self) -> global___StreamingForeachFunction: ...
     @property
     def foreach_batch(self) -> global___StreamingForeachFunction: ...
+    @property
+    def clustering_column_names(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedScalarFieldContainer[builtins.str]:
+        """(Optional) Columns used for clustering the table."""
     def __init__(
         self,
         *,
@@ -949,6 +955,7 @@ class WriteStreamOperationStart(google.protobuf.message.Message):
         table_name: builtins.str = ...,
         foreach_writer: global___StreamingForeachFunction | None = ...,
         foreach_batch: global___StreamingForeachFunction | None = ...,
+        clustering_column_names: collections.abc.Iterable[builtins.str] | None = ...,
     ) -> None: ...
     def HasField(
         self,
@@ -982,6 +989,8 @@ class WriteStreamOperationStart(google.protobuf.message.Message):
         field_name: typing_extensions.Literal[
             "available_now",
             b"available_now",
+            "clustering_column_names",
+            b"clustering_column_names",
             "continuous_checkpoint_interval",
             b"continuous_checkpoint_interval",
             "foreach_batch",
@@ -2179,6 +2188,7 @@ class CheckpointCommand(google.protobuf.message.Message):
     RELATION_FIELD_NUMBER: builtins.int
     LOCAL_FIELD_NUMBER: builtins.int
     EAGER_FIELD_NUMBER: builtins.int
+    STORAGE_LEVEL_FIELD_NUMBER: builtins.int
     @property
     def relation(self) -> pyspark.sql.connect.proto.relations_pb2.Relation:
         """(Required) The logical plan to checkpoint."""
@@ -2188,22 +2198,46 @@ class CheckpointCommand(google.protobuf.message.Message):
     """
     eager: builtins.bool
     """(Required) Whether to checkpoint this dataframe immediately."""
+    @property
+    def storage_level(self) -> pyspark.sql.connect.proto.common_pb2.StorageLevel:
+        """(Optional) For local checkpoint, the storage level to use."""
     def __init__(
         self,
         *,
         relation: pyspark.sql.connect.proto.relations_pb2.Relation | None = ...,
         local: builtins.bool = ...,
         eager: builtins.bool = ...,
+        storage_level: pyspark.sql.connect.proto.common_pb2.StorageLevel | None = ...,
     ) -> None: ...
     def HasField(
-        self, field_name: typing_extensions.Literal["relation", b"relation"]
+        self,
+        field_name: typing_extensions.Literal[
+            "_storage_level",
+            b"_storage_level",
+            "relation",
+            b"relation",
+            "storage_level",
+            b"storage_level",
+        ],
     ) -> builtins.bool: ...
     def ClearField(
         self,
         field_name: typing_extensions.Literal[
-            "eager", b"eager", "local", b"local", "relation", b"relation"
+            "_storage_level",
+            b"_storage_level",
+            "eager",
+            b"eager",
+            "local",
+            b"local",
+            "relation",
+            b"relation",
+            "storage_level",
+            b"storage_level",
         ],
     ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["_storage_level", b"_storage_level"]
+    ) -> typing_extensions.Literal["storage_level"] | None: ...
 
 global___CheckpointCommand = CheckpointCommand
 
diff --git a/python/pyspark/sql/connect/proto/common_pb2.py b/python/pyspark/sql/connect/proto/common_pb2.py
index fd528fae33691..85f475b31a9e3 100644
--- a/python/pyspark/sql/connect/proto/common_pb2.py
+++ b/python/pyspark/sql/connect/proto/common_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/common.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/common.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -32,35 +38,36 @@
     b'\n\x1aspark/connect/common.proto\x12\rspark.connect"\xb0\x01\n\x0cStorageLevel\x12\x19\n\x08use_disk\x18\x01 \x01(\x08R\x07useDisk\x12\x1d\n\nuse_memory\x18\x02 \x01(\x08R\tuseMemory\x12 \n\x0cuse_off_heap\x18\x03 \x01(\x08R\nuseOffHeap\x12"\n\x0c\x64\x65serialized\x18\x04 \x01(\x08R\x0c\x64\x65serialized\x12 \n\x0breplication\x18\x05 \x01(\x05R\x0breplication"G\n\x13ResourceInformation\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x1c\n\taddresses\x18\x02 \x03(\tR\taddresses"\xc3\x01\n\x17\x45xecutorResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x03R\x06\x61mount\x12.\n\x10\x64iscovery_script\x18\x03 \x01(\tH\x00R\x0f\x64iscoveryScript\x88\x01\x01\x12\x1b\n\x06vendor\x18\x04 \x01(\tH\x01R\x06vendor\x88\x01\x01\x42\x13\n\x11_discovery_scriptB\t\n\x07_vendor"R\n\x13TaskResourceRequest\x12#\n\rresource_name\x18\x01 \x01(\tR\x0cresourceName\x12\x16\n\x06\x61mount\x18\x02 \x01(\x01R\x06\x61mount"\xa5\x03\n\x0fResourceProfile\x12\x64\n\x12\x65xecutor_resources\x18\x01 \x03(\x0b\x32\x35.spark.connect.ResourceProfile.ExecutorResourcesEntryR\x11\x65xecutorResources\x12X\n\x0etask_resources\x18\x02 \x03(\x0b\x32\x31.spark.connect.ResourceProfile.TaskResourcesEntryR\rtaskResources\x1al\n\x16\x45xecutorResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12<\n\x05value\x18\x02 \x01(\x0b\x32&.spark.connect.ExecutorResourceRequestR\x05value:\x02\x38\x01\x1a\x64\n\x12TaskResourcesEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32".spark.connect.TaskResourceRequestR\x05value:\x02\x38\x01"X\n\x06Origin\x12\x42\n\rpython_origin\x18\x01 \x01(\x0b\x32\x1b.spark.connect.PythonOriginH\x00R\x0cpythonOriginB\n\n\x08\x66unction"G\n\x0cPythonOrigin\x12\x1a\n\x08\x66ragment\x18\x01 \x01(\tR\x08\x66ragment\x12\x1b\n\tcall_site\x18\x02 \x01(\tR\x08\x63\x61llSiteB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.common_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.common_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _RESOURCEPROFILE_EXECUTORRESOURCESENTRY._options = None
-    _RESOURCEPROFILE_EXECUTORRESOURCESENTRY._serialized_options = b"8\001"
-    _RESOURCEPROFILE_TASKRESOURCESENTRY._options = None
-    _RESOURCEPROFILE_TASKRESOURCESENTRY._serialized_options = b"8\001"
-    _STORAGELEVEL._serialized_start = 46
-    _STORAGELEVEL._serialized_end = 222
-    _RESOURCEINFORMATION._serialized_start = 224
-    _RESOURCEINFORMATION._serialized_end = 295
-    _EXECUTORRESOURCEREQUEST._serialized_start = 298
-    _EXECUTORRESOURCEREQUEST._serialized_end = 493
-    _TASKRESOURCEREQUEST._serialized_start = 495
-    _TASKRESOURCEREQUEST._serialized_end = 577
-    _RESOURCEPROFILE._serialized_start = 580
-    _RESOURCEPROFILE._serialized_end = 1001
-    _RESOURCEPROFILE_EXECUTORRESOURCESENTRY._serialized_start = 791
-    _RESOURCEPROFILE_EXECUTORRESOURCESENTRY._serialized_end = 899
-    _RESOURCEPROFILE_TASKRESOURCESENTRY._serialized_start = 901
-    _RESOURCEPROFILE_TASKRESOURCESENTRY._serialized_end = 1001
-    _ORIGIN._serialized_start = 1003
-    _ORIGIN._serialized_end = 1091
-    _PYTHONORIGIN._serialized_start = 1093
-    _PYTHONORIGIN._serialized_end = 1164
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_RESOURCEPROFILE_EXECUTORRESOURCESENTRY"]._loaded_options = None
+    _globals["_RESOURCEPROFILE_EXECUTORRESOURCESENTRY"]._serialized_options = b"8\001"
+    _globals["_RESOURCEPROFILE_TASKRESOURCESENTRY"]._loaded_options = None
+    _globals["_RESOURCEPROFILE_TASKRESOURCESENTRY"]._serialized_options = b"8\001"
+    _globals["_STORAGELEVEL"]._serialized_start = 46
+    _globals["_STORAGELEVEL"]._serialized_end = 222
+    _globals["_RESOURCEINFORMATION"]._serialized_start = 224
+    _globals["_RESOURCEINFORMATION"]._serialized_end = 295
+    _globals["_EXECUTORRESOURCEREQUEST"]._serialized_start = 298
+    _globals["_EXECUTORRESOURCEREQUEST"]._serialized_end = 493
+    _globals["_TASKRESOURCEREQUEST"]._serialized_start = 495
+    _globals["_TASKRESOURCEREQUEST"]._serialized_end = 577
+    _globals["_RESOURCEPROFILE"]._serialized_start = 580
+    _globals["_RESOURCEPROFILE"]._serialized_end = 1001
+    _globals["_RESOURCEPROFILE_EXECUTORRESOURCESENTRY"]._serialized_start = 791
+    _globals["_RESOURCEPROFILE_EXECUTORRESOURCESENTRY"]._serialized_end = 899
+    _globals["_RESOURCEPROFILE_TASKRESOURCESENTRY"]._serialized_start = 901
+    _globals["_RESOURCEPROFILE_TASKRESOURCESENTRY"]._serialized_end = 1001
+    _globals["_ORIGIN"]._serialized_start = 1003
+    _globals["_ORIGIN"]._serialized_end = 1091
+    _globals["_PYTHONORIGIN"]._serialized_start = 1093
+    _globals["_PYTHONORIGIN"]._serialized_end = 1164
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/example_plugins_pb2.py b/python/pyspark/sql/connect/proto/example_plugins_pb2.py
index f328b7da2156d..8bfc49e2622b4 100644
--- a/python/pyspark/sql/connect/proto/example_plugins_pb2.py
+++ b/python/pyspark/sql/connect/proto/example_plugins_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/example_plugins.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/example_plugins.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -36,19 +42,20 @@
     b'\n#spark/connect/example_plugins.proto\x12\rspark.connect\x1a\x1dspark/connect/relations.proto\x1a\x1fspark/connect/expressions.proto"i\n\x15\x45xamplePluginRelation\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63ustom_field\x18\x02 \x01(\tR\x0b\x63ustomField"m\n\x17\x45xamplePluginExpression\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12!\n\x0c\x63ustom_field\x18\x02 \x01(\tR\x0b\x63ustomField"9\n\x14\x45xamplePluginCommand\x12!\n\x0c\x63ustom_field\x18\x01 \x01(\tR\x0b\x63ustomFieldB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.example_plugins_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.example_plugins_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _EXAMPLEPLUGINRELATION._serialized_start = 118
-    _EXAMPLEPLUGINRELATION._serialized_end = 223
-    _EXAMPLEPLUGINEXPRESSION._serialized_start = 225
-    _EXAMPLEPLUGINEXPRESSION._serialized_end = 334
-    _EXAMPLEPLUGINCOMMAND._serialized_start = 336
-    _EXAMPLEPLUGINCOMMAND._serialized_end = 393
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_EXAMPLEPLUGINRELATION"]._serialized_start = 118
+    _globals["_EXAMPLEPLUGINRELATION"]._serialized_end = 223
+    _globals["_EXAMPLEPLUGINEXPRESSION"]._serialized_start = 225
+    _globals["_EXAMPLEPLUGINEXPRESSION"]._serialized_end = 334
+    _globals["_EXAMPLEPLUGINCOMMAND"]._serialized_start = 336
+    _globals["_EXAMPLEPLUGINCOMMAND"]._serialized_end = 393
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/expressions_pb2.py b/python/pyspark/sql/connect/proto/expressions_pb2.py
index 1c1ad2b6ecec5..0d4730ac736e3 100644
--- a/python/pyspark/sql/connect/proto/expressions_pb2.py
+++ b/python/pyspark/sql/connect/proto/expressions_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/expressions.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/expressions.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -37,87 +43,88 @@
     b'\n\x1fspark/connect/expressions.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x19spark/connect/types.proto\x1a\x1aspark/connect/common.proto"\xc1\x30\n\nExpression\x12\x37\n\x06\x63ommon\x18\x12 \x01(\x0b\x32\x1f.spark.connect.ExpressionCommonR\x06\x63ommon\x12=\n\x07literal\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralH\x00R\x07literal\x12\x62\n\x14unresolved_attribute\x18\x02 \x01(\x0b\x32-.spark.connect.Expression.UnresolvedAttributeH\x00R\x13unresolvedAttribute\x12_\n\x13unresolved_function\x18\x03 \x01(\x0b\x32,.spark.connect.Expression.UnresolvedFunctionH\x00R\x12unresolvedFunction\x12Y\n\x11\x65xpression_string\x18\x04 \x01(\x0b\x32*.spark.connect.Expression.ExpressionStringH\x00R\x10\x65xpressionString\x12S\n\x0funresolved_star\x18\x05 \x01(\x0b\x32(.spark.connect.Expression.UnresolvedStarH\x00R\x0eunresolvedStar\x12\x37\n\x05\x61lias\x18\x06 \x01(\x0b\x32\x1f.spark.connect.Expression.AliasH\x00R\x05\x61lias\x12\x34\n\x04\x63\x61st\x18\x07 \x01(\x0b\x32\x1e.spark.connect.Expression.CastH\x00R\x04\x63\x61st\x12V\n\x10unresolved_regex\x18\x08 \x01(\x0b\x32).spark.connect.Expression.UnresolvedRegexH\x00R\x0funresolvedRegex\x12\x44\n\nsort_order\x18\t \x01(\x0b\x32#.spark.connect.Expression.SortOrderH\x00R\tsortOrder\x12S\n\x0flambda_function\x18\n \x01(\x0b\x32(.spark.connect.Expression.LambdaFunctionH\x00R\x0elambdaFunction\x12:\n\x06window\x18\x0b \x01(\x0b\x32 .spark.connect.Expression.WindowH\x00R\x06window\x12l\n\x18unresolved_extract_value\x18\x0c \x01(\x0b\x32\x30.spark.connect.Expression.UnresolvedExtractValueH\x00R\x16unresolvedExtractValue\x12M\n\rupdate_fields\x18\r \x01(\x0b\x32&.spark.connect.Expression.UpdateFieldsH\x00R\x0cupdateFields\x12\x82\x01\n unresolved_named_lambda_variable\x18\x0e \x01(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableH\x00R\x1dunresolvedNamedLambdaVariable\x12~\n#common_inline_user_defined_function\x18\x0f \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionH\x00R\x1f\x63ommonInlineUserDefinedFunction\x12\x42\n\rcall_function\x18\x10 \x01(\x0b\x32\x1b.spark.connect.CallFunctionH\x00R\x0c\x63\x61llFunction\x12\x64\n\x19named_argument_expression\x18\x11 \x01(\x0b\x32&.spark.connect.NamedArgumentExpressionH\x00R\x17namedArgumentExpression\x12?\n\x0cmerge_action\x18\x13 \x01(\x0b\x32\x1a.spark.connect.MergeActionH\x00R\x0bmergeAction\x12g\n\x1atyped_aggregate_expression\x18\x14 \x01(\x0b\x32\'.spark.connect.TypedAggregateExpressionH\x00R\x18typedAggregateExpression\x12\x35\n\textension\x18\xe7\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x1a\x8f\x06\n\x06Window\x12\x42\n\x0fwindow_function\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0ewindowFunction\x12@\n\x0epartition_spec\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\rpartitionSpec\x12\x42\n\norder_spec\x18\x03 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\torderSpec\x12K\n\nframe_spec\x18\x04 \x01(\x0b\x32,.spark.connect.Expression.Window.WindowFrameR\tframeSpec\x1a\xed\x03\n\x0bWindowFrame\x12U\n\nframe_type\x18\x01 \x01(\x0e\x32\x36.spark.connect.Expression.Window.WindowFrame.FrameTypeR\tframeType\x12P\n\x05lower\x18\x02 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05lower\x12P\n\x05upper\x18\x03 \x01(\x0b\x32:.spark.connect.Expression.Window.WindowFrame.FrameBoundaryR\x05upper\x1a\x91\x01\n\rFrameBoundary\x12!\n\x0b\x63urrent_row\x18\x01 \x01(\x08H\x00R\ncurrentRow\x12\x1e\n\tunbounded\x18\x02 \x01(\x08H\x00R\tunbounded\x12\x31\n\x05value\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\x05valueB\n\n\x08\x62oundary"O\n\tFrameType\x12\x18\n\x14\x46RAME_TYPE_UNDEFINED\x10\x00\x12\x12\n\x0e\x46RAME_TYPE_ROW\x10\x01\x12\x14\n\x10\x46RAME_TYPE_RANGE\x10\x02\x1a\xa9\x03\n\tSortOrder\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12O\n\tdirection\x18\x02 \x01(\x0e\x32\x31.spark.connect.Expression.SortOrder.SortDirectionR\tdirection\x12U\n\rnull_ordering\x18\x03 \x01(\x0e\x32\x30.spark.connect.Expression.SortOrder.NullOrderingR\x0cnullOrdering"l\n\rSortDirection\x12\x1e\n\x1aSORT_DIRECTION_UNSPECIFIED\x10\x00\x12\x1c\n\x18SORT_DIRECTION_ASCENDING\x10\x01\x12\x1d\n\x19SORT_DIRECTION_DESCENDING\x10\x02"U\n\x0cNullOrdering\x12\x1a\n\x16SORT_NULLS_UNSPECIFIED\x10\x00\x12\x14\n\x10SORT_NULLS_FIRST\x10\x01\x12\x13\n\x0fSORT_NULLS_LAST\x10\x02\x1a\xbb\x02\n\x04\x43\x61st\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12-\n\x04type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04type\x12\x1b\n\x08type_str\x18\x03 \x01(\tH\x00R\x07typeStr\x12\x44\n\teval_mode\x18\x04 \x01(\x0e\x32\'.spark.connect.Expression.Cast.EvalModeR\x08\x65valMode"b\n\x08\x45valMode\x12\x19\n\x15\x45VAL_MODE_UNSPECIFIED\x10\x00\x12\x14\n\x10\x45VAL_MODE_LEGACY\x10\x01\x12\x12\n\x0e\x45VAL_MODE_ANSI\x10\x02\x12\x11\n\rEVAL_MODE_TRY\x10\x03\x42\x0e\n\x0c\x63\x61st_to_type\x1a\x9b\x0c\n\x07Literal\x12-\n\x04null\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x04null\x12\x18\n\x06\x62inary\x18\x02 \x01(\x0cH\x00R\x06\x62inary\x12\x1a\n\x07\x62oolean\x18\x03 \x01(\x08H\x00R\x07\x62oolean\x12\x14\n\x04\x62yte\x18\x04 \x01(\x05H\x00R\x04\x62yte\x12\x16\n\x05short\x18\x05 \x01(\x05H\x00R\x05short\x12\x1a\n\x07integer\x18\x06 \x01(\x05H\x00R\x07integer\x12\x14\n\x04long\x18\x07 \x01(\x03H\x00R\x04long\x12\x16\n\x05\x66loat\x18\n \x01(\x02H\x00R\x05\x66loat\x12\x18\n\x06\x64ouble\x18\x0b \x01(\x01H\x00R\x06\x64ouble\x12\x45\n\x07\x64\x65\x63imal\x18\x0c \x01(\x0b\x32).spark.connect.Expression.Literal.DecimalH\x00R\x07\x64\x65\x63imal\x12\x18\n\x06string\x18\r \x01(\tH\x00R\x06string\x12\x14\n\x04\x64\x61te\x18\x10 \x01(\x05H\x00R\x04\x64\x61te\x12\x1e\n\ttimestamp\x18\x11 \x01(\x03H\x00R\ttimestamp\x12%\n\rtimestamp_ntz\x18\x12 \x01(\x03H\x00R\x0ctimestampNtz\x12\x61\n\x11\x63\x61lendar_interval\x18\x13 \x01(\x0b\x32\x32.spark.connect.Expression.Literal.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12\x30\n\x13year_month_interval\x18\x14 \x01(\x05H\x00R\x11yearMonthInterval\x12,\n\x11\x64\x61y_time_interval\x18\x15 \x01(\x03H\x00R\x0f\x64\x61yTimeInterval\x12?\n\x05\x61rray\x18\x16 \x01(\x0b\x32\'.spark.connect.Expression.Literal.ArrayH\x00R\x05\x61rray\x12\x39\n\x03map\x18\x17 \x01(\x0b\x32%.spark.connect.Expression.Literal.MapH\x00R\x03map\x12\x42\n\x06struct\x18\x18 \x01(\x0b\x32(.spark.connect.Expression.Literal.StructH\x00R\x06struct\x1au\n\x07\x44\x65\x63imal\x12\x14\n\x05value\x18\x01 \x01(\tR\x05value\x12!\n\tprecision\x18\x02 \x01(\x05H\x00R\tprecision\x88\x01\x01\x12\x19\n\x05scale\x18\x03 \x01(\x05H\x01R\x05scale\x88\x01\x01\x42\x0c\n\n_precisionB\x08\n\x06_scale\x1a\x62\n\x10\x43\x61lendarInterval\x12\x16\n\x06months\x18\x01 \x01(\x05R\x06months\x12\x12\n\x04\x64\x61ys\x18\x02 \x01(\x05R\x04\x64\x61ys\x12"\n\x0cmicroseconds\x18\x03 \x01(\x03R\x0cmicroseconds\x1a\x82\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lements\x1a\xe3\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12\x35\n\x04keys\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x04keys\x12\x39\n\x06values\x18\x04 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1a\x81\x01\n\x06Struct\x12\x38\n\x0bstruct_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\nstructType\x12=\n\x08\x65lements\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x08\x65lementsB\x0e\n\x0cliteral_type\x1a\xba\x01\n\x13UnresolvedAttribute\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12\x31\n\x12is_metadata_column\x18\x03 \x01(\x08H\x01R\x10isMetadataColumn\x88\x01\x01\x42\n\n\x08_plan_idB\x15\n\x13_is_metadata_column\x1a\xcc\x01\n\x12UnresolvedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x1f\n\x0bis_distinct\x18\x03 \x01(\x08R\nisDistinct\x12\x37\n\x18is_user_defined_function\x18\x04 \x01(\x08R\x15isUserDefinedFunction\x1a\x32\n\x10\x45xpressionString\x12\x1e\n\nexpression\x18\x01 \x01(\tR\nexpression\x1a|\n\x0eUnresolvedStar\x12,\n\x0funparsed_target\x18\x01 \x01(\tH\x00R\x0eunparsedTarget\x88\x01\x01\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x01R\x06planId\x88\x01\x01\x42\x12\n\x10_unparsed_targetB\n\n\x08_plan_id\x1aV\n\x0fUnresolvedRegex\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x42\n\n\x08_plan_id\x1a\x84\x01\n\x16UnresolvedExtractValue\x12/\n\x05\x63hild\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05\x63hild\x12\x39\n\nextraction\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\nextraction\x1a\xbb\x01\n\x0cUpdateFields\x12\x46\n\x11struct_expression\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x10structExpression\x12\x1d\n\nfield_name\x18\x02 \x01(\tR\tfieldName\x12\x44\n\x10value_expression\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x0fvalueExpression\x1ax\n\x05\x41lias\x12-\n\x04\x65xpr\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x04\x65xpr\x12\x12\n\x04name\x18\x02 \x03(\tR\x04name\x12\x1f\n\x08metadata\x18\x03 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x9e\x01\n\x0eLambdaFunction\x12\x35\n\x08\x66unction\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08\x66unction\x12U\n\targuments\x18\x02 \x03(\x0b\x32\x37.spark.connect.Expression.UnresolvedNamedLambdaVariableR\targuments\x1a>\n\x1dUnresolvedNamedLambdaVariable\x12\x1d\n\nname_parts\x18\x01 \x03(\tR\tnamePartsB\x0b\n\texpr_type"A\n\x10\x45xpressionCommon\x12-\n\x06origin\x18\x01 \x01(\x0b\x32\x15.spark.connect.OriginR\x06origin"\xec\x02\n\x1f\x43ommonInlineUserDefinedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12\x39\n\npython_udf\x18\x04 \x01(\x0b\x32\x18.spark.connect.PythonUDFH\x00R\tpythonUdf\x12I\n\x10scalar_scala_udf\x18\x05 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFH\x00R\x0escalarScalaUdf\x12\x33\n\x08java_udf\x18\x06 \x01(\x0b\x32\x16.spark.connect.JavaUDFH\x00R\x07javaUdfB\n\n\x08\x66unction"\xcc\x01\n\tPythonUDF\x12\x38\n\x0boutput_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVer\x12/\n\x13\x61\x64\x64itional_includes\x18\x05 \x03(\tR\x12\x61\x64\x64itionalIncludes"\xd6\x01\n\x0eScalarScalaUDF\x12\x18\n\x07payload\x18\x01 \x01(\x0cR\x07payload\x12\x37\n\ninputTypes\x18\x02 \x03(\x0b\x32\x17.spark.connect.DataTypeR\ninputTypes\x12\x37\n\noutputType\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeR\noutputType\x12\x1a\n\x08nullable\x18\x04 \x01(\x08R\x08nullable\x12\x1c\n\taggregate\x18\x05 \x01(\x08R\taggregate"\x95\x01\n\x07JavaUDF\x12\x1d\n\nclass_name\x18\x01 \x01(\tR\tclassName\x12=\n\x0boutput_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\noutputType\x88\x01\x01\x12\x1c\n\taggregate\x18\x03 \x01(\x08R\taggregateB\x0e\n\x0c_output_type"c\n\x18TypedAggregateExpression\x12G\n\x10scalar_scala_udf\x18\x01 \x01(\x0b\x32\x1d.spark.connect.ScalarScalaUDFR\x0escalarScalaUdf"l\n\x0c\x43\x61llFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"\\\n\x17NamedArgumentExpression\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\x80\x04\n\x0bMergeAction\x12\x46\n\x0b\x61\x63tion_type\x18\x01 \x01(\x0e\x32%.spark.connect.MergeAction.ActionTypeR\nactionType\x12<\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionH\x00R\tcondition\x88\x01\x01\x12G\n\x0b\x61ssignments\x18\x03 \x03(\x0b\x32%.spark.connect.MergeAction.AssignmentR\x0b\x61ssignments\x1aj\n\nAssignment\x12+\n\x03key\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value"\xa7\x01\n\nActionType\x12\x17\n\x13\x41\x43TION_TYPE_INVALID\x10\x00\x12\x16\n\x12\x41\x43TION_TYPE_DELETE\x10\x01\x12\x16\n\x12\x41\x43TION_TYPE_INSERT\x10\x02\x12\x1b\n\x17\x41\x43TION_TYPE_INSERT_STAR\x10\x03\x12\x16\n\x12\x41\x43TION_TYPE_UPDATE\x10\x04\x12\x1b\n\x17\x41\x43TION_TYPE_UPDATE_STAR\x10\x05\x42\x0c\n\n_conditionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.expressions_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.expressions_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _EXPRESSION._serialized_start = 133
-    _EXPRESSION._serialized_end = 6342
-    _EXPRESSION_WINDOW._serialized_start = 1900
-    _EXPRESSION_WINDOW._serialized_end = 2683
-    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_start = 2190
-    _EXPRESSION_WINDOW_WINDOWFRAME._serialized_end = 2683
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_start = 2457
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY._serialized_end = 2602
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_start = 2604
-    _EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE._serialized_end = 2683
-    _EXPRESSION_SORTORDER._serialized_start = 2686
-    _EXPRESSION_SORTORDER._serialized_end = 3111
-    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_start = 2916
-    _EXPRESSION_SORTORDER_SORTDIRECTION._serialized_end = 3024
-    _EXPRESSION_SORTORDER_NULLORDERING._serialized_start = 3026
-    _EXPRESSION_SORTORDER_NULLORDERING._serialized_end = 3111
-    _EXPRESSION_CAST._serialized_start = 3114
-    _EXPRESSION_CAST._serialized_end = 3429
-    _EXPRESSION_CAST_EVALMODE._serialized_start = 3315
-    _EXPRESSION_CAST_EVALMODE._serialized_end = 3413
-    _EXPRESSION_LITERAL._serialized_start = 3432
-    _EXPRESSION_LITERAL._serialized_end = 4995
-    _EXPRESSION_LITERAL_DECIMAL._serialized_start = 4267
-    _EXPRESSION_LITERAL_DECIMAL._serialized_end = 4384
-    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_start = 4386
-    _EXPRESSION_LITERAL_CALENDARINTERVAL._serialized_end = 4484
-    _EXPRESSION_LITERAL_ARRAY._serialized_start = 4487
-    _EXPRESSION_LITERAL_ARRAY._serialized_end = 4617
-    _EXPRESSION_LITERAL_MAP._serialized_start = 4620
-    _EXPRESSION_LITERAL_MAP._serialized_end = 4847
-    _EXPRESSION_LITERAL_STRUCT._serialized_start = 4850
-    _EXPRESSION_LITERAL_STRUCT._serialized_end = 4979
-    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_start = 4998
-    _EXPRESSION_UNRESOLVEDATTRIBUTE._serialized_end = 5184
-    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_start = 5187
-    _EXPRESSION_UNRESOLVEDFUNCTION._serialized_end = 5391
-    _EXPRESSION_EXPRESSIONSTRING._serialized_start = 5393
-    _EXPRESSION_EXPRESSIONSTRING._serialized_end = 5443
-    _EXPRESSION_UNRESOLVEDSTAR._serialized_start = 5445
-    _EXPRESSION_UNRESOLVEDSTAR._serialized_end = 5569
-    _EXPRESSION_UNRESOLVEDREGEX._serialized_start = 5571
-    _EXPRESSION_UNRESOLVEDREGEX._serialized_end = 5657
-    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_start = 5660
-    _EXPRESSION_UNRESOLVEDEXTRACTVALUE._serialized_end = 5792
-    _EXPRESSION_UPDATEFIELDS._serialized_start = 5795
-    _EXPRESSION_UPDATEFIELDS._serialized_end = 5982
-    _EXPRESSION_ALIAS._serialized_start = 5984
-    _EXPRESSION_ALIAS._serialized_end = 6104
-    _EXPRESSION_LAMBDAFUNCTION._serialized_start = 6107
-    _EXPRESSION_LAMBDAFUNCTION._serialized_end = 6265
-    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_start = 6267
-    _EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE._serialized_end = 6329
-    _EXPRESSIONCOMMON._serialized_start = 6344
-    _EXPRESSIONCOMMON._serialized_end = 6409
-    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_start = 6412
-    _COMMONINLINEUSERDEFINEDFUNCTION._serialized_end = 6776
-    _PYTHONUDF._serialized_start = 6779
-    _PYTHONUDF._serialized_end = 6983
-    _SCALARSCALAUDF._serialized_start = 6986
-    _SCALARSCALAUDF._serialized_end = 7200
-    _JAVAUDF._serialized_start = 7203
-    _JAVAUDF._serialized_end = 7352
-    _TYPEDAGGREGATEEXPRESSION._serialized_start = 7354
-    _TYPEDAGGREGATEEXPRESSION._serialized_end = 7453
-    _CALLFUNCTION._serialized_start = 7455
-    _CALLFUNCTION._serialized_end = 7563
-    _NAMEDARGUMENTEXPRESSION._serialized_start = 7565
-    _NAMEDARGUMENTEXPRESSION._serialized_end = 7657
-    _MERGEACTION._serialized_start = 7660
-    _MERGEACTION._serialized_end = 8172
-    _MERGEACTION_ASSIGNMENT._serialized_start = 7882
-    _MERGEACTION_ASSIGNMENT._serialized_end = 7988
-    _MERGEACTION_ACTIONTYPE._serialized_start = 7991
-    _MERGEACTION_ACTIONTYPE._serialized_end = 8158
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_EXPRESSION"]._serialized_start = 133
+    _globals["_EXPRESSION"]._serialized_end = 6342
+    _globals["_EXPRESSION_WINDOW"]._serialized_start = 1900
+    _globals["_EXPRESSION_WINDOW"]._serialized_end = 2683
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_start = 2190
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME"]._serialized_end = 2683
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_start = 2457
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMEBOUNDARY"]._serialized_end = 2602
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_start = 2604
+    _globals["_EXPRESSION_WINDOW_WINDOWFRAME_FRAMETYPE"]._serialized_end = 2683
+    _globals["_EXPRESSION_SORTORDER"]._serialized_start = 2686
+    _globals["_EXPRESSION_SORTORDER"]._serialized_end = 3111
+    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_start = 2916
+    _globals["_EXPRESSION_SORTORDER_SORTDIRECTION"]._serialized_end = 3024
+    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_start = 3026
+    _globals["_EXPRESSION_SORTORDER_NULLORDERING"]._serialized_end = 3111
+    _globals["_EXPRESSION_CAST"]._serialized_start = 3114
+    _globals["_EXPRESSION_CAST"]._serialized_end = 3429
+    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_start = 3315
+    _globals["_EXPRESSION_CAST_EVALMODE"]._serialized_end = 3413
+    _globals["_EXPRESSION_LITERAL"]._serialized_start = 3432
+    _globals["_EXPRESSION_LITERAL"]._serialized_end = 4995
+    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_start = 4267
+    _globals["_EXPRESSION_LITERAL_DECIMAL"]._serialized_end = 4384
+    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_start = 4386
+    _globals["_EXPRESSION_LITERAL_CALENDARINTERVAL"]._serialized_end = 4484
+    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_start = 4487
+    _globals["_EXPRESSION_LITERAL_ARRAY"]._serialized_end = 4617
+    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_start = 4620
+    _globals["_EXPRESSION_LITERAL_MAP"]._serialized_end = 4847
+    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_start = 4850
+    _globals["_EXPRESSION_LITERAL_STRUCT"]._serialized_end = 4979
+    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_start = 4998
+    _globals["_EXPRESSION_UNRESOLVEDATTRIBUTE"]._serialized_end = 5184
+    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_start = 5187
+    _globals["_EXPRESSION_UNRESOLVEDFUNCTION"]._serialized_end = 5391
+    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_start = 5393
+    _globals["_EXPRESSION_EXPRESSIONSTRING"]._serialized_end = 5443
+    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_start = 5445
+    _globals["_EXPRESSION_UNRESOLVEDSTAR"]._serialized_end = 5569
+    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_start = 5571
+    _globals["_EXPRESSION_UNRESOLVEDREGEX"]._serialized_end = 5657
+    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_start = 5660
+    _globals["_EXPRESSION_UNRESOLVEDEXTRACTVALUE"]._serialized_end = 5792
+    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_start = 5795
+    _globals["_EXPRESSION_UPDATEFIELDS"]._serialized_end = 5982
+    _globals["_EXPRESSION_ALIAS"]._serialized_start = 5984
+    _globals["_EXPRESSION_ALIAS"]._serialized_end = 6104
+    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_start = 6107
+    _globals["_EXPRESSION_LAMBDAFUNCTION"]._serialized_end = 6265
+    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_start = 6267
+    _globals["_EXPRESSION_UNRESOLVEDNAMEDLAMBDAVARIABLE"]._serialized_end = 6329
+    _globals["_EXPRESSIONCOMMON"]._serialized_start = 6344
+    _globals["_EXPRESSIONCOMMON"]._serialized_end = 6409
+    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_start = 6412
+    _globals["_COMMONINLINEUSERDEFINEDFUNCTION"]._serialized_end = 6776
+    _globals["_PYTHONUDF"]._serialized_start = 6779
+    _globals["_PYTHONUDF"]._serialized_end = 6983
+    _globals["_SCALARSCALAUDF"]._serialized_start = 6986
+    _globals["_SCALARSCALAUDF"]._serialized_end = 7200
+    _globals["_JAVAUDF"]._serialized_start = 7203
+    _globals["_JAVAUDF"]._serialized_end = 7352
+    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_start = 7354
+    _globals["_TYPEDAGGREGATEEXPRESSION"]._serialized_end = 7453
+    _globals["_CALLFUNCTION"]._serialized_start = 7455
+    _globals["_CALLFUNCTION"]._serialized_end = 7563
+    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_start = 7565
+    _globals["_NAMEDARGUMENTEXPRESSION"]._serialized_end = 7657
+    _globals["_MERGEACTION"]._serialized_start = 7660
+    _globals["_MERGEACTION"]._serialized_end = 8172
+    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_start = 7882
+    _globals["_MERGEACTION_ASSIGNMENT"]._serialized_end = 7988
+    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_start = 7991
+    _globals["_MERGEACTION_ACTIONTYPE"]._serialized_end = 8158
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.py b/python/pyspark/sql/connect/proto/relations_pb2.py
index 9f4d1e717a28d..479abcfb597a1 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.py
+++ b/python/pyspark/sql/connect/proto/relations_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/relations.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/relations.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -36,186 +42,193 @@
 
 
 DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
-    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto"\xe9\x1a\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
+    b'\n\x1dspark/connect/relations.proto\x12\rspark.connect\x1a\x19google/protobuf/any.proto\x1a\x1fspark/connect/expressions.proto\x1a\x19spark/connect/types.proto\x1a\x1bspark/connect/catalog.proto\x1a\x1aspark/connect/common.proto"\x9c\x1c\n\x08Relation\x12\x35\n\x06\x63ommon\x18\x01 \x01(\x0b\x32\x1d.spark.connect.RelationCommonR\x06\x63ommon\x12)\n\x04read\x18\x02 \x01(\x0b\x32\x13.spark.connect.ReadH\x00R\x04read\x12\x32\n\x07project\x18\x03 \x01(\x0b\x32\x16.spark.connect.ProjectH\x00R\x07project\x12/\n\x06\x66ilter\x18\x04 \x01(\x0b\x32\x15.spark.connect.FilterH\x00R\x06\x66ilter\x12)\n\x04join\x18\x05 \x01(\x0b\x32\x13.spark.connect.JoinH\x00R\x04join\x12\x34\n\x06set_op\x18\x06 \x01(\x0b\x32\x1b.spark.connect.SetOperationH\x00R\x05setOp\x12)\n\x04sort\x18\x07 \x01(\x0b\x32\x13.spark.connect.SortH\x00R\x04sort\x12,\n\x05limit\x18\x08 \x01(\x0b\x32\x14.spark.connect.LimitH\x00R\x05limit\x12\x38\n\taggregate\x18\t \x01(\x0b\x32\x18.spark.connect.AggregateH\x00R\taggregate\x12&\n\x03sql\x18\n \x01(\x0b\x32\x12.spark.connect.SQLH\x00R\x03sql\x12\x45\n\x0elocal_relation\x18\x0b \x01(\x0b\x32\x1c.spark.connect.LocalRelationH\x00R\rlocalRelation\x12/\n\x06sample\x18\x0c \x01(\x0b\x32\x15.spark.connect.SampleH\x00R\x06sample\x12/\n\x06offset\x18\r \x01(\x0b\x32\x15.spark.connect.OffsetH\x00R\x06offset\x12>\n\x0b\x64\x65\x64uplicate\x18\x0e \x01(\x0b\x32\x1a.spark.connect.DeduplicateH\x00R\x0b\x64\x65\x64uplicate\x12,\n\x05range\x18\x0f \x01(\x0b\x32\x14.spark.connect.RangeH\x00R\x05range\x12\x45\n\x0esubquery_alias\x18\x10 \x01(\x0b\x32\x1c.spark.connect.SubqueryAliasH\x00R\rsubqueryAlias\x12>\n\x0brepartition\x18\x11 \x01(\x0b\x32\x1a.spark.connect.RepartitionH\x00R\x0brepartition\x12*\n\x05to_df\x18\x12 \x01(\x0b\x32\x13.spark.connect.ToDFH\x00R\x04toDf\x12U\n\x14with_columns_renamed\x18\x13 \x01(\x0b\x32!.spark.connect.WithColumnsRenamedH\x00R\x12withColumnsRenamed\x12<\n\x0bshow_string\x18\x14 \x01(\x0b\x32\x19.spark.connect.ShowStringH\x00R\nshowString\x12)\n\x04\x64rop\x18\x15 \x01(\x0b\x32\x13.spark.connect.DropH\x00R\x04\x64rop\x12)\n\x04tail\x18\x16 \x01(\x0b\x32\x13.spark.connect.TailH\x00R\x04tail\x12?\n\x0cwith_columns\x18\x17 \x01(\x0b\x32\x1a.spark.connect.WithColumnsH\x00R\x0bwithColumns\x12)\n\x04hint\x18\x18 \x01(\x0b\x32\x13.spark.connect.HintH\x00R\x04hint\x12\x32\n\x07unpivot\x18\x19 \x01(\x0b\x32\x16.spark.connect.UnpivotH\x00R\x07unpivot\x12\x36\n\tto_schema\x18\x1a \x01(\x0b\x32\x17.spark.connect.ToSchemaH\x00R\x08toSchema\x12\x64\n\x19repartition_by_expression\x18\x1b \x01(\x0b\x32&.spark.connect.RepartitionByExpressionH\x00R\x17repartitionByExpression\x12\x45\n\x0emap_partitions\x18\x1c \x01(\x0b\x32\x1c.spark.connect.MapPartitionsH\x00R\rmapPartitions\x12H\n\x0f\x63ollect_metrics\x18\x1d \x01(\x0b\x32\x1d.spark.connect.CollectMetricsH\x00R\x0e\x63ollectMetrics\x12,\n\x05parse\x18\x1e \x01(\x0b\x32\x14.spark.connect.ParseH\x00R\x05parse\x12\x36\n\tgroup_map\x18\x1f \x01(\x0b\x32\x17.spark.connect.GroupMapH\x00R\x08groupMap\x12=\n\x0c\x63o_group_map\x18  \x01(\x0b\x32\x19.spark.connect.CoGroupMapH\x00R\ncoGroupMap\x12\x45\n\x0ewith_watermark\x18! \x01(\x0b\x32\x1c.spark.connect.WithWatermarkH\x00R\rwithWatermark\x12\x63\n\x1a\x61pply_in_pandas_with_state\x18" \x01(\x0b\x32%.spark.connect.ApplyInPandasWithStateH\x00R\x16\x61pplyInPandasWithState\x12<\n\x0bhtml_string\x18# \x01(\x0b\x32\x19.spark.connect.HtmlStringH\x00R\nhtmlString\x12X\n\x15\x63\x61\x63hed_local_relation\x18$ \x01(\x0b\x32".spark.connect.CachedLocalRelationH\x00R\x13\x63\x61\x63hedLocalRelation\x12[\n\x16\x63\x61\x63hed_remote_relation\x18% \x01(\x0b\x32#.spark.connect.CachedRemoteRelationH\x00R\x14\x63\x61\x63hedRemoteRelation\x12\x8e\x01\n)common_inline_user_defined_table_function\x18& \x01(\x0b\x32\x33.spark.connect.CommonInlineUserDefinedTableFunctionH\x00R$commonInlineUserDefinedTableFunction\x12\x37\n\nas_of_join\x18\' \x01(\x0b\x32\x17.spark.connect.AsOfJoinH\x00R\x08\x61sOfJoin\x12\x85\x01\n&common_inline_user_defined_data_source\x18( \x01(\x0b\x32\x30.spark.connect.CommonInlineUserDefinedDataSourceH\x00R!commonInlineUserDefinedDataSource\x12\x45\n\x0ewith_relations\x18) \x01(\x0b\x32\x1c.spark.connect.WithRelationsH\x00R\rwithRelations\x12\x38\n\ttranspose\x18* \x01(\x0b\x32\x18.spark.connect.TransposeH\x00R\ttranspose\x12w\n unresolved_table_valued_function\x18+ \x01(\x0b\x32,.spark.connect.UnresolvedTableValuedFunctionH\x00R\x1dunresolvedTableValuedFunction\x12\x30\n\x07\x66ill_na\x18Z \x01(\x0b\x32\x15.spark.connect.NAFillH\x00R\x06\x66illNa\x12\x30\n\x07\x64rop_na\x18[ \x01(\x0b\x32\x15.spark.connect.NADropH\x00R\x06\x64ropNa\x12\x34\n\x07replace\x18\\ \x01(\x0b\x32\x18.spark.connect.NAReplaceH\x00R\x07replace\x12\x36\n\x07summary\x18\x64 \x01(\x0b\x32\x1a.spark.connect.StatSummaryH\x00R\x07summary\x12\x39\n\x08\x63rosstab\x18\x65 \x01(\x0b\x32\x1b.spark.connect.StatCrosstabH\x00R\x08\x63rosstab\x12\x39\n\x08\x64\x65scribe\x18\x66 \x01(\x0b\x32\x1b.spark.connect.StatDescribeH\x00R\x08\x64\x65scribe\x12*\n\x03\x63ov\x18g \x01(\x0b\x32\x16.spark.connect.StatCovH\x00R\x03\x63ov\x12-\n\x04\x63orr\x18h \x01(\x0b\x32\x17.spark.connect.StatCorrH\x00R\x04\x63orr\x12L\n\x0f\x61pprox_quantile\x18i \x01(\x0b\x32!.spark.connect.StatApproxQuantileH\x00R\x0e\x61pproxQuantile\x12=\n\nfreq_items\x18j \x01(\x0b\x32\x1c.spark.connect.StatFreqItemsH\x00R\tfreqItems\x12:\n\tsample_by\x18k \x01(\x0b\x32\x1b.spark.connect.StatSampleByH\x00R\x08sampleBy\x12\x33\n\x07\x63\x61talog\x18\xc8\x01 \x01(\x0b\x32\x16.spark.connect.CatalogH\x00R\x07\x63\x61talog\x12\x35\n\textension\x18\xe6\x07 \x01(\x0b\x32\x14.google.protobuf.AnyH\x00R\textension\x12\x33\n\x07unknown\x18\xe7\x07 \x01(\x0b\x32\x16.spark.connect.UnknownH\x00R\x07unknownB\n\n\x08rel_type"\t\n\x07Unknown"\x8e\x01\n\x0eRelationCommon\x12#\n\x0bsource_info\x18\x01 \x01(\tB\x02\x18\x01R\nsourceInfo\x12\x1c\n\x07plan_id\x18\x02 \x01(\x03H\x00R\x06planId\x88\x01\x01\x12-\n\x06origin\x18\x03 \x01(\x0b\x32\x15.spark.connect.OriginR\x06originB\n\n\x08_plan_id"\xde\x03\n\x03SQL\x12\x14\n\x05query\x18\x01 \x01(\tR\x05query\x12\x34\n\x04\x61rgs\x18\x02 \x03(\x0b\x32\x1c.spark.connect.SQL.ArgsEntryB\x02\x18\x01R\x04\x61rgs\x12@\n\x08pos_args\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralB\x02\x18\x01R\x07posArgs\x12O\n\x0fnamed_arguments\x18\x04 \x03(\x0b\x32&.spark.connect.SQL.NamedArgumentsEntryR\x0enamedArguments\x12>\n\rpos_arguments\x18\x05 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cposArguments\x1aZ\n\tArgsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x37\n\x05value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x05value:\x02\x38\x01\x1a\\\n\x13NamedArgumentsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12/\n\x05value\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x05value:\x02\x38\x01"u\n\rWithRelations\x12+\n\x04root\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04root\x12\x37\n\nreferences\x18\x02 \x03(\x0b\x32\x17.spark.connect.RelationR\nreferences"\x97\x05\n\x04Read\x12\x41\n\x0bnamed_table\x18\x01 \x01(\x0b\x32\x1e.spark.connect.Read.NamedTableH\x00R\nnamedTable\x12\x41\n\x0b\x64\x61ta_source\x18\x02 \x01(\x0b\x32\x1e.spark.connect.Read.DataSourceH\x00R\ndataSource\x12!\n\x0cis_streaming\x18\x03 \x01(\x08R\x0bisStreaming\x1a\xc0\x01\n\nNamedTable\x12/\n\x13unparsed_identifier\x18\x01 \x01(\tR\x12unparsedIdentifier\x12\x45\n\x07options\x18\x02 \x03(\x0b\x32+.spark.connect.Read.NamedTable.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x95\x02\n\nDataSource\x12\x1b\n\x06\x66ormat\x18\x01 \x01(\tH\x00R\x06\x66ormat\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x12\x45\n\x07options\x18\x03 \x03(\x0b\x32+.spark.connect.Read.DataSource.OptionsEntryR\x07options\x12\x14\n\x05paths\x18\x04 \x03(\tR\x05paths\x12\x1e\n\npredicates\x18\x05 \x03(\tR\npredicates\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x42\t\n\x07_formatB\t\n\x07_schemaB\x0b\n\tread_type"u\n\x07Project\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12;\n\x0b\x65xpressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0b\x65xpressions"p\n\x06\x46ilter\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x37\n\tcondition\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\tcondition"\x95\x05\n\x04Join\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12@\n\x0ejoin_condition\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\rjoinCondition\x12\x39\n\tjoin_type\x18\x04 \x01(\x0e\x32\x1c.spark.connect.Join.JoinTypeR\x08joinType\x12#\n\rusing_columns\x18\x05 \x03(\tR\x0cusingColumns\x12K\n\x0ejoin_data_type\x18\x06 \x01(\x0b\x32 .spark.connect.Join.JoinDataTypeH\x00R\x0cjoinDataType\x88\x01\x01\x1a\\\n\x0cJoinDataType\x12$\n\x0eis_left_struct\x18\x01 \x01(\x08R\x0cisLeftStruct\x12&\n\x0fis_right_struct\x18\x02 \x01(\x08R\risRightStruct"\xd0\x01\n\x08JoinType\x12\x19\n\x15JOIN_TYPE_UNSPECIFIED\x10\x00\x12\x13\n\x0fJOIN_TYPE_INNER\x10\x01\x12\x18\n\x14JOIN_TYPE_FULL_OUTER\x10\x02\x12\x18\n\x14JOIN_TYPE_LEFT_OUTER\x10\x03\x12\x19\n\x15JOIN_TYPE_RIGHT_OUTER\x10\x04\x12\x17\n\x13JOIN_TYPE_LEFT_ANTI\x10\x05\x12\x17\n\x13JOIN_TYPE_LEFT_SEMI\x10\x06\x12\x13\n\x0fJOIN_TYPE_CROSS\x10\x07\x42\x11\n\x0f_join_data_type"\xdf\x03\n\x0cSetOperation\x12\x36\n\nleft_input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\tleftInput\x12\x38\n\x0bright_input\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\nrightInput\x12\x45\n\x0bset_op_type\x18\x03 \x01(\x0e\x32%.spark.connect.SetOperation.SetOpTypeR\tsetOpType\x12\x1a\n\x06is_all\x18\x04 \x01(\x08H\x00R\x05isAll\x88\x01\x01\x12\x1c\n\x07\x62y_name\x18\x05 \x01(\x08H\x01R\x06\x62yName\x88\x01\x01\x12\x37\n\x15\x61llow_missing_columns\x18\x06 \x01(\x08H\x02R\x13\x61llowMissingColumns\x88\x01\x01"r\n\tSetOpType\x12\x1b\n\x17SET_OP_TYPE_UNSPECIFIED\x10\x00\x12\x19\n\x15SET_OP_TYPE_INTERSECT\x10\x01\x12\x15\n\x11SET_OP_TYPE_UNION\x10\x02\x12\x16\n\x12SET_OP_TYPE_EXCEPT\x10\x03\x42\t\n\x07_is_allB\n\n\x08_by_nameB\x18\n\x16_allow_missing_columns"L\n\x05Limit\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"O\n\x06Offset\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x16\n\x06offset\x18\x02 \x01(\x05R\x06offset"K\n\x04Tail\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05limit\x18\x02 \x01(\x05R\x05limit"\xfe\x05\n\tAggregate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x41\n\ngroup_type\x18\x02 \x01(\x0e\x32".spark.connect.Aggregate.GroupTypeR\tgroupType\x12L\n\x14grouping_expressions\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12N\n\x15\x61ggregate_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x14\x61ggregateExpressions\x12\x34\n\x05pivot\x18\x05 \x01(\x0b\x32\x1e.spark.connect.Aggregate.PivotR\x05pivot\x12J\n\rgrouping_sets\x18\x06 \x03(\x0b\x32%.spark.connect.Aggregate.GroupingSetsR\x0cgroupingSets\x1ao\n\x05Pivot\x12+\n\x03\x63ol\x18\x01 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x39\n\x06values\x18\x02 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values\x1aL\n\x0cGroupingSets\x12<\n\x0cgrouping_set\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0bgroupingSet"\x9f\x01\n\tGroupType\x12\x1a\n\x16GROUP_TYPE_UNSPECIFIED\x10\x00\x12\x16\n\x12GROUP_TYPE_GROUPBY\x10\x01\x12\x15\n\x11GROUP_TYPE_ROLLUP\x10\x02\x12\x13\n\x0fGROUP_TYPE_CUBE\x10\x03\x12\x14\n\x10GROUP_TYPE_PIVOT\x10\x04\x12\x1c\n\x18GROUP_TYPE_GROUPING_SETS\x10\x05"\xa0\x01\n\x04Sort\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x05order\x18\x02 \x03(\x0b\x32#.spark.connect.Expression.SortOrderR\x05order\x12 \n\tis_global\x18\x03 \x01(\x08H\x00R\x08isGlobal\x88\x01\x01\x42\x0c\n\n_is_global"\x8d\x01\n\x04\x44rop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x33\n\x07\x63olumns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07\x63olumns\x12!\n\x0c\x63olumn_names\x18\x03 \x03(\tR\x0b\x63olumnNames"\xf0\x01\n\x0b\x44\x65\x64uplicate\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames\x12\x32\n\x13\x61ll_columns_as_keys\x18\x03 \x01(\x08H\x00R\x10\x61llColumnsAsKeys\x88\x01\x01\x12.\n\x10within_watermark\x18\x04 \x01(\x08H\x01R\x0fwithinWatermark\x88\x01\x01\x42\x16\n\x14_all_columns_as_keysB\x13\n\x11_within_watermark"Y\n\rLocalRelation\x12\x17\n\x04\x64\x61ta\x18\x01 \x01(\x0cH\x00R\x04\x64\x61ta\x88\x01\x01\x12\x1b\n\x06schema\x18\x02 \x01(\tH\x01R\x06schema\x88\x01\x01\x42\x07\n\x05_dataB\t\n\x07_schema"H\n\x13\x43\x61\x63hedLocalRelation\x12\x12\n\x04hash\x18\x03 \x01(\tR\x04hashJ\x04\x08\x01\x10\x02J\x04\x08\x02\x10\x03R\x06userIdR\tsessionId"7\n\x14\x43\x61\x63hedRemoteRelation\x12\x1f\n\x0brelation_id\x18\x01 \x01(\tR\nrelationId"\x91\x02\n\x06Sample\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1f\n\x0blower_bound\x18\x02 \x01(\x01R\nlowerBound\x12\x1f\n\x0bupper_bound\x18\x03 \x01(\x01R\nupperBound\x12.\n\x10with_replacement\x18\x04 \x01(\x08H\x00R\x0fwithReplacement\x88\x01\x01\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x01R\x04seed\x88\x01\x01\x12/\n\x13\x64\x65terministic_order\x18\x06 \x01(\x08R\x12\x64\x65terministicOrderB\x13\n\x11_with_replacementB\x07\n\x05_seed"\x91\x01\n\x05Range\x12\x19\n\x05start\x18\x01 \x01(\x03H\x00R\x05start\x88\x01\x01\x12\x10\n\x03\x65nd\x18\x02 \x01(\x03R\x03\x65nd\x12\x12\n\x04step\x18\x03 \x01(\x03R\x04step\x12*\n\x0enum_partitions\x18\x04 \x01(\x05H\x01R\rnumPartitions\x88\x01\x01\x42\x08\n\x06_startB\x11\n\x0f_num_partitions"r\n\rSubqueryAlias\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x14\n\x05\x61lias\x18\x02 \x01(\tR\x05\x61lias\x12\x1c\n\tqualifier\x18\x03 \x03(\tR\tqualifier"\x8e\x01\n\x0bRepartition\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12%\n\x0enum_partitions\x18\x02 \x01(\x05R\rnumPartitions\x12\x1d\n\x07shuffle\x18\x03 \x01(\x08H\x00R\x07shuffle\x88\x01\x01\x42\n\n\x08_shuffle"\x8e\x01\n\nShowString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate\x12\x1a\n\x08vertical\x18\x04 \x01(\x08R\x08vertical"r\n\nHtmlString\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x19\n\x08num_rows\x18\x02 \x01(\x05R\x07numRows\x12\x1a\n\x08truncate\x18\x03 \x01(\x05R\x08truncate"\\\n\x0bStatSummary\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1e\n\nstatistics\x18\x02 \x03(\tR\nstatistics"Q\n\x0cStatDescribe\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols"e\n\x0cStatCrosstab\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"`\n\x07StatCov\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2"\x89\x01\n\x08StatCorr\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ol1\x18\x02 \x01(\tR\x04\x63ol1\x12\x12\n\x04\x63ol2\x18\x03 \x01(\tR\x04\x63ol2\x12\x1b\n\x06method\x18\x04 \x01(\tH\x00R\x06method\x88\x01\x01\x42\t\n\x07_method"\xa4\x01\n\x12StatApproxQuantile\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12$\n\rprobabilities\x18\x03 \x03(\x01R\rprobabilities\x12%\n\x0erelative_error\x18\x04 \x01(\x01R\rrelativeError"}\n\rStatFreqItems\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x1d\n\x07support\x18\x03 \x01(\x01H\x00R\x07support\x88\x01\x01\x42\n\n\x08_support"\xb5\x02\n\x0cStatSampleBy\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03\x63ol\x18\x02 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x03\x63ol\x12\x42\n\tfractions\x18\x03 \x03(\x0b\x32$.spark.connect.StatSampleBy.FractionR\tfractions\x12\x17\n\x04seed\x18\x05 \x01(\x03H\x00R\x04seed\x88\x01\x01\x1a\x63\n\x08\x46raction\x12;\n\x07stratum\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x07stratum\x12\x1a\n\x08\x66raction\x18\x02 \x01(\x01R\x08\x66ractionB\x07\n\x05_seed"\x86\x01\n\x06NAFill\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\x39\n\x06values\x18\x03 \x03(\x0b\x32!.spark.connect.Expression.LiteralR\x06values"\x86\x01\n\x06NADrop\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12\'\n\rmin_non_nulls\x18\x03 \x01(\x05H\x00R\x0bminNonNulls\x88\x01\x01\x42\x10\n\x0e_min_non_nulls"\xa8\x02\n\tNAReplace\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04\x63ols\x18\x02 \x03(\tR\x04\x63ols\x12H\n\x0creplacements\x18\x03 \x03(\x0b\x32$.spark.connect.NAReplace.ReplacementR\x0creplacements\x1a\x8d\x01\n\x0bReplacement\x12>\n\told_value\x18\x01 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08oldValue\x12>\n\tnew_value\x18\x02 \x01(\x0b\x32!.spark.connect.Expression.LiteralR\x08newValue"X\n\x04ToDF\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12!\n\x0c\x63olumn_names\x18\x02 \x03(\tR\x0b\x63olumnNames"\xfe\x02\n\x12WithColumnsRenamed\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12i\n\x12rename_columns_map\x18\x02 \x03(\x0b\x32\x37.spark.connect.WithColumnsRenamed.RenameColumnsMapEntryB\x02\x18\x01R\x10renameColumnsMap\x12\x42\n\x07renames\x18\x03 \x03(\x0b\x32(.spark.connect.WithColumnsRenamed.RenameR\x07renames\x1a\x43\n\x15RenameColumnsMapEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01\x1a\x45\n\x06Rename\x12\x19\n\x08\x63ol_name\x18\x01 \x01(\tR\x07\x63olName\x12 \n\x0cnew_col_name\x18\x02 \x01(\tR\nnewColName"w\n\x0bWithColumns\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x39\n\x07\x61liases\x18\x02 \x03(\x0b\x32\x1f.spark.connect.Expression.AliasR\x07\x61liases"\x86\x01\n\rWithWatermark\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x1d\n\nevent_time\x18\x02 \x01(\tR\teventTime\x12\'\n\x0f\x64\x65lay_threshold\x18\x03 \x01(\tR\x0e\x64\x65layThreshold"\x84\x01\n\x04Hint\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x39\n\nparameters\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\nparameters"\xc7\x02\n\x07Unpivot\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12+\n\x03ids\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x03ids\x12:\n\x06values\x18\x03 \x01(\x0b\x32\x1d.spark.connect.Unpivot.ValuesH\x00R\x06values\x88\x01\x01\x12\x30\n\x14variable_column_name\x18\x04 \x01(\tR\x12variableColumnName\x12*\n\x11value_column_name\x18\x05 \x01(\tR\x0fvalueColumnName\x1a;\n\x06Values\x12\x31\n\x06values\x18\x01 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x06valuesB\t\n\x07_values"z\n\tTranspose\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12>\n\rindex_columns\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0cindexColumns"}\n\x1dUnresolvedTableValuedFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12\x37\n\targuments\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments"j\n\x08ToSchema\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12/\n\x06schema\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x06schema"\xcb\x01\n\x17RepartitionByExpression\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x0fpartition_exprs\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x0epartitionExprs\x12*\n\x0enum_partitions\x18\x03 \x01(\x05H\x00R\rnumPartitions\x88\x01\x01\x42\x11\n\x0f_num_partitions"\xe8\x01\n\rMapPartitions\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x42\n\x04\x66unc\x18\x02 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12"\n\nis_barrier\x18\x03 \x01(\x08H\x00R\tisBarrier\x88\x01\x01\x12"\n\nprofile_id\x18\x04 \x01(\x05H\x01R\tprofileId\x88\x01\x01\x42\r\n\x0b_is_barrierB\r\n\x0b_profile_id"\xfb\x04\n\x08GroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12J\n\x13sorting_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x12sortingExpressions\x12<\n\rinitial_input\x18\x05 \x01(\x0b\x32\x17.spark.connect.RelationR\x0cinitialInput\x12[\n\x1cinitial_grouping_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x1ainitialGroupingExpressions\x12;\n\x18is_map_groups_with_state\x18\x07 \x01(\x08H\x00R\x14isMapGroupsWithState\x88\x01\x01\x12$\n\x0boutput_mode\x18\x08 \x01(\tH\x01R\noutputMode\x88\x01\x01\x12&\n\x0ctimeout_conf\x18\t \x01(\tH\x02R\x0btimeoutConf\x88\x01\x01\x42\x1b\n\x19_is_map_groups_with_stateB\x0e\n\x0c_output_modeB\x0f\n\r_timeout_conf"\x8e\x04\n\nCoGroupMap\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12W\n\x1ainput_grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18inputGroupingExpressions\x12-\n\x05other\x18\x03 \x01(\x0b\x32\x17.spark.connect.RelationR\x05other\x12W\n\x1aother_grouping_expressions\x18\x04 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x18otherGroupingExpressions\x12\x42\n\x04\x66unc\x18\x05 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12U\n\x19input_sorting_expressions\x18\x06 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17inputSortingExpressions\x12U\n\x19other_sorting_expressions\x18\x07 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x17otherSortingExpressions"\xe5\x02\n\x16\x41pplyInPandasWithState\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12L\n\x14grouping_expressions\x18\x02 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x13groupingExpressions\x12\x42\n\x04\x66unc\x18\x03 \x01(\x0b\x32..spark.connect.CommonInlineUserDefinedFunctionR\x04\x66unc\x12#\n\routput_schema\x18\x04 \x01(\tR\x0coutputSchema\x12!\n\x0cstate_schema\x18\x05 \x01(\tR\x0bstateSchema\x12\x1f\n\x0boutput_mode\x18\x06 \x01(\tR\noutputMode\x12!\n\x0ctimeout_conf\x18\x07 \x01(\tR\x0btimeoutConf"\xf4\x01\n$CommonInlineUserDefinedTableFunction\x12#\n\rfunction_name\x18\x01 \x01(\tR\x0c\x66unctionName\x12$\n\rdeterministic\x18\x02 \x01(\x08R\rdeterministic\x12\x37\n\targuments\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\targuments\x12<\n\x0bpython_udtf\x18\x04 \x01(\x0b\x32\x19.spark.connect.PythonUDTFH\x00R\npythonUdtfB\n\n\x08\x66unction"\xb1\x01\n\nPythonUDTF\x12=\n\x0breturn_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\nreturnType\x88\x01\x01\x12\x1b\n\teval_type\x18\x02 \x01(\x05R\x08\x65valType\x12\x18\n\x07\x63ommand\x18\x03 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x04 \x01(\tR\tpythonVerB\x0e\n\x0c_return_type"\x97\x01\n!CommonInlineUserDefinedDataSource\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12O\n\x12python_data_source\x18\x02 \x01(\x0b\x32\x1f.spark.connect.PythonDataSourceH\x00R\x10pythonDataSourceB\r\n\x0b\x64\x61ta_source"K\n\x10PythonDataSource\x12\x18\n\x07\x63ommand\x18\x01 \x01(\x0cR\x07\x63ommand\x12\x1d\n\npython_ver\x18\x02 \x01(\tR\tpythonVer"\x88\x01\n\x0e\x43ollectMetrics\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x12\n\x04name\x18\x02 \x01(\tR\x04name\x12\x33\n\x07metrics\x18\x03 \x03(\x0b\x32\x19.spark.connect.ExpressionR\x07metrics"\x84\x03\n\x05Parse\x12-\n\x05input\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x05input\x12\x38\n\x06\x66ormat\x18\x02 \x01(\x0e\x32 .spark.connect.Parse.ParseFormatR\x06\x66ormat\x12\x34\n\x06schema\x18\x03 \x01(\x0b\x32\x17.spark.connect.DataTypeH\x00R\x06schema\x88\x01\x01\x12;\n\x07options\x18\x04 \x03(\x0b\x32!.spark.connect.Parse.OptionsEntryR\x07options\x1a:\n\x0cOptionsEntry\x12\x10\n\x03key\x18\x01 \x01(\tR\x03key\x12\x14\n\x05value\x18\x02 \x01(\tR\x05value:\x02\x38\x01"X\n\x0bParseFormat\x12\x1c\n\x18PARSE_FORMAT_UNSPECIFIED\x10\x00\x12\x14\n\x10PARSE_FORMAT_CSV\x10\x01\x12\x15\n\x11PARSE_FORMAT_JSON\x10\x02\x42\t\n\x07_schema"\xdb\x03\n\x08\x41sOfJoin\x12+\n\x04left\x18\x01 \x01(\x0b\x32\x17.spark.connect.RelationR\x04left\x12-\n\x05right\x18\x02 \x01(\x0b\x32\x17.spark.connect.RelationR\x05right\x12\x37\n\nleft_as_of\x18\x03 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08leftAsOf\x12\x39\n\x0bright_as_of\x18\x04 \x01(\x0b\x32\x19.spark.connect.ExpressionR\trightAsOf\x12\x36\n\tjoin_expr\x18\x05 \x01(\x0b\x32\x19.spark.connect.ExpressionR\x08joinExpr\x12#\n\rusing_columns\x18\x06 \x03(\tR\x0cusingColumns\x12\x1b\n\tjoin_type\x18\x07 \x01(\tR\x08joinType\x12\x37\n\ttolerance\x18\x08 \x01(\x0b\x32\x19.spark.connect.ExpressionR\ttolerance\x12.\n\x13\x61llow_exact_matches\x18\t \x01(\x08R\x11\x61llowExactMatches\x12\x1c\n\tdirection\x18\n \x01(\tR\tdirectionB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3'
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
 _builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.relations_pb2", globals()
+    DESCRIPTOR, "pyspark.sql.connect.proto.relations_pb2", _globals
 )
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _RELATIONCOMMON.fields_by_name["source_info"]._options = None
-    _RELATIONCOMMON.fields_by_name["source_info"]._serialized_options = b"\030\001"
-    _SQL_ARGSENTRY._options = None
-    _SQL_ARGSENTRY._serialized_options = b"8\001"
-    _SQL_NAMEDARGUMENTSENTRY._options = None
-    _SQL_NAMEDARGUMENTSENTRY._serialized_options = b"8\001"
-    _SQL.fields_by_name["args"]._options = None
-    _SQL.fields_by_name["args"]._serialized_options = b"\030\001"
-    _SQL.fields_by_name["pos_args"]._options = None
-    _SQL.fields_by_name["pos_args"]._serialized_options = b"\030\001"
-    _READ_NAMEDTABLE_OPTIONSENTRY._options = None
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_options = b"8\001"
-    _READ_DATASOURCE_OPTIONSENTRY._options = None
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_options = b"8\001"
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._options = None
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_options = b"8\001"
-    _WITHCOLUMNSRENAMED.fields_by_name["rename_columns_map"]._options = None
-    _WITHCOLUMNSRENAMED.fields_by_name["rename_columns_map"]._serialized_options = b"\030\001"
-    _PARSE_OPTIONSENTRY._options = None
-    _PARSE_OPTIONSENTRY._serialized_options = b"8\001"
-    _RELATION._serialized_start = 193
-    _RELATION._serialized_end = 3626
-    _UNKNOWN._serialized_start = 3628
-    _UNKNOWN._serialized_end = 3637
-    _RELATIONCOMMON._serialized_start = 3640
-    _RELATIONCOMMON._serialized_end = 3782
-    _SQL._serialized_start = 3785
-    _SQL._serialized_end = 4263
-    _SQL_ARGSENTRY._serialized_start = 4079
-    _SQL_ARGSENTRY._serialized_end = 4169
-    _SQL_NAMEDARGUMENTSENTRY._serialized_start = 4171
-    _SQL_NAMEDARGUMENTSENTRY._serialized_end = 4263
-    _WITHRELATIONS._serialized_start = 4265
-    _WITHRELATIONS._serialized_end = 4382
-    _READ._serialized_start = 4385
-    _READ._serialized_end = 5048
-    _READ_NAMEDTABLE._serialized_start = 4563
-    _READ_NAMEDTABLE._serialized_end = 4755
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_start = 4697
-    _READ_NAMEDTABLE_OPTIONSENTRY._serialized_end = 4755
-    _READ_DATASOURCE._serialized_start = 4758
-    _READ_DATASOURCE._serialized_end = 5035
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_start = 4697
-    _READ_DATASOURCE_OPTIONSENTRY._serialized_end = 4755
-    _PROJECT._serialized_start = 5050
-    _PROJECT._serialized_end = 5167
-    _FILTER._serialized_start = 5169
-    _FILTER._serialized_end = 5281
-    _JOIN._serialized_start = 5284
-    _JOIN._serialized_end = 5945
-    _JOIN_JOINDATATYPE._serialized_start = 5623
-    _JOIN_JOINDATATYPE._serialized_end = 5715
-    _JOIN_JOINTYPE._serialized_start = 5718
-    _JOIN_JOINTYPE._serialized_end = 5926
-    _SETOPERATION._serialized_start = 5948
-    _SETOPERATION._serialized_end = 6427
-    _SETOPERATION_SETOPTYPE._serialized_start = 6264
-    _SETOPERATION_SETOPTYPE._serialized_end = 6378
-    _LIMIT._serialized_start = 6429
-    _LIMIT._serialized_end = 6505
-    _OFFSET._serialized_start = 6507
-    _OFFSET._serialized_end = 6586
-    _TAIL._serialized_start = 6588
-    _TAIL._serialized_end = 6663
-    _AGGREGATE._serialized_start = 6666
-    _AGGREGATE._serialized_end = 7432
-    _AGGREGATE_PIVOT._serialized_start = 7081
-    _AGGREGATE_PIVOT._serialized_end = 7192
-    _AGGREGATE_GROUPINGSETS._serialized_start = 7194
-    _AGGREGATE_GROUPINGSETS._serialized_end = 7270
-    _AGGREGATE_GROUPTYPE._serialized_start = 7273
-    _AGGREGATE_GROUPTYPE._serialized_end = 7432
-    _SORT._serialized_start = 7435
-    _SORT._serialized_end = 7595
-    _DROP._serialized_start = 7598
-    _DROP._serialized_end = 7739
-    _DEDUPLICATE._serialized_start = 7742
-    _DEDUPLICATE._serialized_end = 7982
-    _LOCALRELATION._serialized_start = 7984
-    _LOCALRELATION._serialized_end = 8073
-    _CACHEDLOCALRELATION._serialized_start = 8075
-    _CACHEDLOCALRELATION._serialized_end = 8147
-    _CACHEDREMOTERELATION._serialized_start = 8149
-    _CACHEDREMOTERELATION._serialized_end = 8204
-    _SAMPLE._serialized_start = 8207
-    _SAMPLE._serialized_end = 8480
-    _RANGE._serialized_start = 8483
-    _RANGE._serialized_end = 8628
-    _SUBQUERYALIAS._serialized_start = 8630
-    _SUBQUERYALIAS._serialized_end = 8744
-    _REPARTITION._serialized_start = 8747
-    _REPARTITION._serialized_end = 8889
-    _SHOWSTRING._serialized_start = 8892
-    _SHOWSTRING._serialized_end = 9034
-    _HTMLSTRING._serialized_start = 9036
-    _HTMLSTRING._serialized_end = 9150
-    _STATSUMMARY._serialized_start = 9152
-    _STATSUMMARY._serialized_end = 9244
-    _STATDESCRIBE._serialized_start = 9246
-    _STATDESCRIBE._serialized_end = 9327
-    _STATCROSSTAB._serialized_start = 9329
-    _STATCROSSTAB._serialized_end = 9430
-    _STATCOV._serialized_start = 9432
-    _STATCOV._serialized_end = 9528
-    _STATCORR._serialized_start = 9531
-    _STATCORR._serialized_end = 9668
-    _STATAPPROXQUANTILE._serialized_start = 9671
-    _STATAPPROXQUANTILE._serialized_end = 9835
-    _STATFREQITEMS._serialized_start = 9837
-    _STATFREQITEMS._serialized_end = 9962
-    _STATSAMPLEBY._serialized_start = 9965
-    _STATSAMPLEBY._serialized_end = 10274
-    _STATSAMPLEBY_FRACTION._serialized_start = 10166
-    _STATSAMPLEBY_FRACTION._serialized_end = 10265
-    _NAFILL._serialized_start = 10277
-    _NAFILL._serialized_end = 10411
-    _NADROP._serialized_start = 10414
-    _NADROP._serialized_end = 10548
-    _NAREPLACE._serialized_start = 10551
-    _NAREPLACE._serialized_end = 10847
-    _NAREPLACE_REPLACEMENT._serialized_start = 10706
-    _NAREPLACE_REPLACEMENT._serialized_end = 10847
-    _TODF._serialized_start = 10849
-    _TODF._serialized_end = 10937
-    _WITHCOLUMNSRENAMED._serialized_start = 10940
-    _WITHCOLUMNSRENAMED._serialized_end = 11322
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_start = 11184
-    _WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY._serialized_end = 11251
-    _WITHCOLUMNSRENAMED_RENAME._serialized_start = 11253
-    _WITHCOLUMNSRENAMED_RENAME._serialized_end = 11322
-    _WITHCOLUMNS._serialized_start = 11324
-    _WITHCOLUMNS._serialized_end = 11443
-    _WITHWATERMARK._serialized_start = 11446
-    _WITHWATERMARK._serialized_end = 11580
-    _HINT._serialized_start = 11583
-    _HINT._serialized_end = 11715
-    _UNPIVOT._serialized_start = 11718
-    _UNPIVOT._serialized_end = 12045
-    _UNPIVOT_VALUES._serialized_start = 11975
-    _UNPIVOT_VALUES._serialized_end = 12034
-    _TOSCHEMA._serialized_start = 12047
-    _TOSCHEMA._serialized_end = 12153
-    _REPARTITIONBYEXPRESSION._serialized_start = 12156
-    _REPARTITIONBYEXPRESSION._serialized_end = 12359
-    _MAPPARTITIONS._serialized_start = 12362
-    _MAPPARTITIONS._serialized_end = 12594
-    _GROUPMAP._serialized_start = 12597
-    _GROUPMAP._serialized_end = 13232
-    _COGROUPMAP._serialized_start = 13235
-    _COGROUPMAP._serialized_end = 13761
-    _APPLYINPANDASWITHSTATE._serialized_start = 13764
-    _APPLYINPANDASWITHSTATE._serialized_end = 14121
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_start = 14124
-    _COMMONINLINEUSERDEFINEDTABLEFUNCTION._serialized_end = 14368
-    _PYTHONUDTF._serialized_start = 14371
-    _PYTHONUDTF._serialized_end = 14548
-    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_start = 14551
-    _COMMONINLINEUSERDEFINEDDATASOURCE._serialized_end = 14702
-    _PYTHONDATASOURCE._serialized_start = 14704
-    _PYTHONDATASOURCE._serialized_end = 14779
-    _COLLECTMETRICS._serialized_start = 14782
-    _COLLECTMETRICS._serialized_end = 14918
-    _PARSE._serialized_start = 14921
-    _PARSE._serialized_end = 15309
-    _PARSE_OPTIONSENTRY._serialized_start = 4697
-    _PARSE_OPTIONSENTRY._serialized_end = 4755
-    _PARSE_PARSEFORMAT._serialized_start = 15210
-    _PARSE_PARSEFORMAT._serialized_end = 15298
-    _ASOFJOIN._serialized_start = 15312
-    _ASOFJOIN._serialized_end = 15787
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_RELATIONCOMMON"].fields_by_name["source_info"]._loaded_options = None
+    _globals["_RELATIONCOMMON"].fields_by_name["source_info"]._serialized_options = b"\030\001"
+    _globals["_SQL_ARGSENTRY"]._loaded_options = None
+    _globals["_SQL_ARGSENTRY"]._serialized_options = b"8\001"
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._loaded_options = None
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_options = b"8\001"
+    _globals["_SQL"].fields_by_name["args"]._loaded_options = None
+    _globals["_SQL"].fields_by_name["args"]._serialized_options = b"\030\001"
+    _globals["_SQL"].fields_by_name["pos_args"]._loaded_options = None
+    _globals["_SQL"].fields_by_name["pos_args"]._serialized_options = b"\030\001"
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._loaded_options = None
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_options = b"8\001"
+    _globals["_WITHCOLUMNSRENAMED"].fields_by_name["rename_columns_map"]._loaded_options = None
+    _globals["_WITHCOLUMNSRENAMED"].fields_by_name[
+        "rename_columns_map"
+    ]._serialized_options = b"\030\001"
+    _globals["_PARSE_OPTIONSENTRY"]._loaded_options = None
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_options = b"8\001"
+    _globals["_RELATION"]._serialized_start = 193
+    _globals["_RELATION"]._serialized_end = 3805
+    _globals["_UNKNOWN"]._serialized_start = 3807
+    _globals["_UNKNOWN"]._serialized_end = 3816
+    _globals["_RELATIONCOMMON"]._serialized_start = 3819
+    _globals["_RELATIONCOMMON"]._serialized_end = 3961
+    _globals["_SQL"]._serialized_start = 3964
+    _globals["_SQL"]._serialized_end = 4442
+    _globals["_SQL_ARGSENTRY"]._serialized_start = 4258
+    _globals["_SQL_ARGSENTRY"]._serialized_end = 4348
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_start = 4350
+    _globals["_SQL_NAMEDARGUMENTSENTRY"]._serialized_end = 4442
+    _globals["_WITHRELATIONS"]._serialized_start = 4444
+    _globals["_WITHRELATIONS"]._serialized_end = 4561
+    _globals["_READ"]._serialized_start = 4564
+    _globals["_READ"]._serialized_end = 5227
+    _globals["_READ_NAMEDTABLE"]._serialized_start = 4742
+    _globals["_READ_NAMEDTABLE"]._serialized_end = 4934
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_start = 4876
+    _globals["_READ_NAMEDTABLE_OPTIONSENTRY"]._serialized_end = 4934
+    _globals["_READ_DATASOURCE"]._serialized_start = 4937
+    _globals["_READ_DATASOURCE"]._serialized_end = 5214
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_start = 4876
+    _globals["_READ_DATASOURCE_OPTIONSENTRY"]._serialized_end = 4934
+    _globals["_PROJECT"]._serialized_start = 5229
+    _globals["_PROJECT"]._serialized_end = 5346
+    _globals["_FILTER"]._serialized_start = 5348
+    _globals["_FILTER"]._serialized_end = 5460
+    _globals["_JOIN"]._serialized_start = 5463
+    _globals["_JOIN"]._serialized_end = 6124
+    _globals["_JOIN_JOINDATATYPE"]._serialized_start = 5802
+    _globals["_JOIN_JOINDATATYPE"]._serialized_end = 5894
+    _globals["_JOIN_JOINTYPE"]._serialized_start = 5897
+    _globals["_JOIN_JOINTYPE"]._serialized_end = 6105
+    _globals["_SETOPERATION"]._serialized_start = 6127
+    _globals["_SETOPERATION"]._serialized_end = 6606
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_start = 6443
+    _globals["_SETOPERATION_SETOPTYPE"]._serialized_end = 6557
+    _globals["_LIMIT"]._serialized_start = 6608
+    _globals["_LIMIT"]._serialized_end = 6684
+    _globals["_OFFSET"]._serialized_start = 6686
+    _globals["_OFFSET"]._serialized_end = 6765
+    _globals["_TAIL"]._serialized_start = 6767
+    _globals["_TAIL"]._serialized_end = 6842
+    _globals["_AGGREGATE"]._serialized_start = 6845
+    _globals["_AGGREGATE"]._serialized_end = 7611
+    _globals["_AGGREGATE_PIVOT"]._serialized_start = 7260
+    _globals["_AGGREGATE_PIVOT"]._serialized_end = 7371
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_start = 7373
+    _globals["_AGGREGATE_GROUPINGSETS"]._serialized_end = 7449
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_start = 7452
+    _globals["_AGGREGATE_GROUPTYPE"]._serialized_end = 7611
+    _globals["_SORT"]._serialized_start = 7614
+    _globals["_SORT"]._serialized_end = 7774
+    _globals["_DROP"]._serialized_start = 7777
+    _globals["_DROP"]._serialized_end = 7918
+    _globals["_DEDUPLICATE"]._serialized_start = 7921
+    _globals["_DEDUPLICATE"]._serialized_end = 8161
+    _globals["_LOCALRELATION"]._serialized_start = 8163
+    _globals["_LOCALRELATION"]._serialized_end = 8252
+    _globals["_CACHEDLOCALRELATION"]._serialized_start = 8254
+    _globals["_CACHEDLOCALRELATION"]._serialized_end = 8326
+    _globals["_CACHEDREMOTERELATION"]._serialized_start = 8328
+    _globals["_CACHEDREMOTERELATION"]._serialized_end = 8383
+    _globals["_SAMPLE"]._serialized_start = 8386
+    _globals["_SAMPLE"]._serialized_end = 8659
+    _globals["_RANGE"]._serialized_start = 8662
+    _globals["_RANGE"]._serialized_end = 8807
+    _globals["_SUBQUERYALIAS"]._serialized_start = 8809
+    _globals["_SUBQUERYALIAS"]._serialized_end = 8923
+    _globals["_REPARTITION"]._serialized_start = 8926
+    _globals["_REPARTITION"]._serialized_end = 9068
+    _globals["_SHOWSTRING"]._serialized_start = 9071
+    _globals["_SHOWSTRING"]._serialized_end = 9213
+    _globals["_HTMLSTRING"]._serialized_start = 9215
+    _globals["_HTMLSTRING"]._serialized_end = 9329
+    _globals["_STATSUMMARY"]._serialized_start = 9331
+    _globals["_STATSUMMARY"]._serialized_end = 9423
+    _globals["_STATDESCRIBE"]._serialized_start = 9425
+    _globals["_STATDESCRIBE"]._serialized_end = 9506
+    _globals["_STATCROSSTAB"]._serialized_start = 9508
+    _globals["_STATCROSSTAB"]._serialized_end = 9609
+    _globals["_STATCOV"]._serialized_start = 9611
+    _globals["_STATCOV"]._serialized_end = 9707
+    _globals["_STATCORR"]._serialized_start = 9710
+    _globals["_STATCORR"]._serialized_end = 9847
+    _globals["_STATAPPROXQUANTILE"]._serialized_start = 9850
+    _globals["_STATAPPROXQUANTILE"]._serialized_end = 10014
+    _globals["_STATFREQITEMS"]._serialized_start = 10016
+    _globals["_STATFREQITEMS"]._serialized_end = 10141
+    _globals["_STATSAMPLEBY"]._serialized_start = 10144
+    _globals["_STATSAMPLEBY"]._serialized_end = 10453
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_start = 10345
+    _globals["_STATSAMPLEBY_FRACTION"]._serialized_end = 10444
+    _globals["_NAFILL"]._serialized_start = 10456
+    _globals["_NAFILL"]._serialized_end = 10590
+    _globals["_NADROP"]._serialized_start = 10593
+    _globals["_NADROP"]._serialized_end = 10727
+    _globals["_NAREPLACE"]._serialized_start = 10730
+    _globals["_NAREPLACE"]._serialized_end = 11026
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_start = 10885
+    _globals["_NAREPLACE_REPLACEMENT"]._serialized_end = 11026
+    _globals["_TODF"]._serialized_start = 11028
+    _globals["_TODF"]._serialized_end = 11116
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_start = 11119
+    _globals["_WITHCOLUMNSRENAMED"]._serialized_end = 11501
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_start = 11363
+    _globals["_WITHCOLUMNSRENAMED_RENAMECOLUMNSMAPENTRY"]._serialized_end = 11430
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_start = 11432
+    _globals["_WITHCOLUMNSRENAMED_RENAME"]._serialized_end = 11501
+    _globals["_WITHCOLUMNS"]._serialized_start = 11503
+    _globals["_WITHCOLUMNS"]._serialized_end = 11622
+    _globals["_WITHWATERMARK"]._serialized_start = 11625
+    _globals["_WITHWATERMARK"]._serialized_end = 11759
+    _globals["_HINT"]._serialized_start = 11762
+    _globals["_HINT"]._serialized_end = 11894
+    _globals["_UNPIVOT"]._serialized_start = 11897
+    _globals["_UNPIVOT"]._serialized_end = 12224
+    _globals["_UNPIVOT_VALUES"]._serialized_start = 12154
+    _globals["_UNPIVOT_VALUES"]._serialized_end = 12213
+    _globals["_TRANSPOSE"]._serialized_start = 12226
+    _globals["_TRANSPOSE"]._serialized_end = 12348
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_start = 12350
+    _globals["_UNRESOLVEDTABLEVALUEDFUNCTION"]._serialized_end = 12475
+    _globals["_TOSCHEMA"]._serialized_start = 12477
+    _globals["_TOSCHEMA"]._serialized_end = 12583
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_start = 12586
+    _globals["_REPARTITIONBYEXPRESSION"]._serialized_end = 12789
+    _globals["_MAPPARTITIONS"]._serialized_start = 12792
+    _globals["_MAPPARTITIONS"]._serialized_end = 13024
+    _globals["_GROUPMAP"]._serialized_start = 13027
+    _globals["_GROUPMAP"]._serialized_end = 13662
+    _globals["_COGROUPMAP"]._serialized_start = 13665
+    _globals["_COGROUPMAP"]._serialized_end = 14191
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_start = 14194
+    _globals["_APPLYINPANDASWITHSTATE"]._serialized_end = 14551
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_start = 14554
+    _globals["_COMMONINLINEUSERDEFINEDTABLEFUNCTION"]._serialized_end = 14798
+    _globals["_PYTHONUDTF"]._serialized_start = 14801
+    _globals["_PYTHONUDTF"]._serialized_end = 14978
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_start = 14981
+    _globals["_COMMONINLINEUSERDEFINEDDATASOURCE"]._serialized_end = 15132
+    _globals["_PYTHONDATASOURCE"]._serialized_start = 15134
+    _globals["_PYTHONDATASOURCE"]._serialized_end = 15209
+    _globals["_COLLECTMETRICS"]._serialized_start = 15212
+    _globals["_COLLECTMETRICS"]._serialized_end = 15348
+    _globals["_PARSE"]._serialized_start = 15351
+    _globals["_PARSE"]._serialized_end = 15739
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_start = 4876
+    _globals["_PARSE_OPTIONSENTRY"]._serialized_end = 4934
+    _globals["_PARSE_PARSEFORMAT"]._serialized_start = 15640
+    _globals["_PARSE_PARSEFORMAT"]._serialized_end = 15728
+    _globals["_ASOFJOIN"]._serialized_start = 15742
+    _globals["_ASOFJOIN"]._serialized_end = 16217
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/proto/relations_pb2.pyi b/python/pyspark/sql/connect/proto/relations_pb2.pyi
index 864803fd33084..03753056c6bf1 100644
--- a/python/pyspark/sql/connect/proto/relations_pb2.pyi
+++ b/python/pyspark/sql/connect/proto/relations_pb2.pyi
@@ -104,6 +104,8 @@ class Relation(google.protobuf.message.Message):
     AS_OF_JOIN_FIELD_NUMBER: builtins.int
     COMMON_INLINE_USER_DEFINED_DATA_SOURCE_FIELD_NUMBER: builtins.int
     WITH_RELATIONS_FIELD_NUMBER: builtins.int
+    TRANSPOSE_FIELD_NUMBER: builtins.int
+    UNRESOLVED_TABLE_VALUED_FUNCTION_FIELD_NUMBER: builtins.int
     FILL_NA_FIELD_NUMBER: builtins.int
     DROP_NA_FIELD_NUMBER: builtins.int
     REPLACE_FIELD_NUMBER: builtins.int
@@ -205,6 +207,10 @@ class Relation(google.protobuf.message.Message):
     @property
     def with_relations(self) -> global___WithRelations: ...
     @property
+    def transpose(self) -> global___Transpose: ...
+    @property
+    def unresolved_table_valued_function(self) -> global___UnresolvedTableValuedFunction: ...
+    @property
     def fill_na(self) -> global___NAFill:
         """NA functions"""
     @property
@@ -284,6 +290,8 @@ class Relation(google.protobuf.message.Message):
         common_inline_user_defined_data_source: global___CommonInlineUserDefinedDataSource
         | None = ...,
         with_relations: global___WithRelations | None = ...,
+        transpose: global___Transpose | None = ...,
+        unresolved_table_valued_function: global___UnresolvedTableValuedFunction | None = ...,
         fill_na: global___NAFill | None = ...,
         drop_na: global___NADrop | None = ...,
         replace: global___NAReplace | None = ...,
@@ -402,10 +410,14 @@ class Relation(google.protobuf.message.Message):
             b"to_df",
             "to_schema",
             b"to_schema",
+            "transpose",
+            b"transpose",
             "unknown",
             b"unknown",
             "unpivot",
             b"unpivot",
+            "unresolved_table_valued_function",
+            b"unresolved_table_valued_function",
             "with_columns",
             b"with_columns",
             "with_columns_renamed",
@@ -519,10 +531,14 @@ class Relation(google.protobuf.message.Message):
             b"to_df",
             "to_schema",
             b"to_schema",
+            "transpose",
+            b"transpose",
             "unknown",
             b"unknown",
             "unpivot",
             b"unpivot",
+            "unresolved_table_valued_function",
+            b"unresolved_table_valued_function",
             "with_columns",
             b"with_columns",
             "with_columns_renamed",
@@ -577,6 +593,8 @@ class Relation(google.protobuf.message.Message):
             "as_of_join",
             "common_inline_user_defined_data_source",
             "with_relations",
+            "transpose",
+            "unresolved_table_valued_function",
             "fill_na",
             "drop_na",
             "replace",
@@ -3141,6 +3159,77 @@ class Unpivot(google.protobuf.message.Message):
 
 global___Unpivot = Unpivot
 
+class Transpose(google.protobuf.message.Message):
+    """Transpose a DataFrame, switching rows to columns.
+    Transforms the DataFrame such that the values in the specified index column
+    become the new columns of the DataFrame.
+    """
+
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    INPUT_FIELD_NUMBER: builtins.int
+    INDEX_COLUMNS_FIELD_NUMBER: builtins.int
+    @property
+    def input(self) -> global___Relation:
+        """(Required) The input relation."""
+    @property
+    def index_columns(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Optional) A list of columns that will be treated as the indices.
+        Only single column is supported now.
+        """
+    def __init__(
+        self,
+        *,
+        input: global___Relation | None = ...,
+        index_columns: collections.abc.Iterable[
+            pyspark.sql.connect.proto.expressions_pb2.Expression
+        ]
+        | None = ...,
+    ) -> None: ...
+    def HasField(
+        self, field_name: typing_extensions.Literal["input", b"input"]
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal["index_columns", b"index_columns", "input", b"input"],
+    ) -> None: ...
+
+global___Transpose = Transpose
+
+class UnresolvedTableValuedFunction(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    FUNCTION_NAME_FIELD_NUMBER: builtins.int
+    ARGUMENTS_FIELD_NUMBER: builtins.int
+    function_name: builtins.str
+    """(Required) name (or unparsed name for user defined function) for the unresolved function."""
+    @property
+    def arguments(
+        self,
+    ) -> google.protobuf.internal.containers.RepeatedCompositeFieldContainer[
+        pyspark.sql.connect.proto.expressions_pb2.Expression
+    ]:
+        """(Optional) Function arguments. Empty arguments are allowed."""
+    def __init__(
+        self,
+        *,
+        function_name: builtins.str = ...,
+        arguments: collections.abc.Iterable[pyspark.sql.connect.proto.expressions_pb2.Expression]
+        | None = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "arguments", b"arguments", "function_name", b"function_name"
+        ],
+    ) -> None: ...
+
+global___UnresolvedTableValuedFunction = UnresolvedTableValuedFunction
+
 class ToSchema(google.protobuf.message.Message):
     DESCRIPTOR: google.protobuf.descriptor.Descriptor
 
diff --git a/python/pyspark/sql/connect/proto/types_pb2.py b/python/pyspark/sql/connect/proto/types_pb2.py
index 1022605fb160d..55f98717a5b0a 100644
--- a/python/pyspark/sql/connect/proto/types_pb2.py
+++ b/python/pyspark/sql/connect/proto/types_pb2.py
@@ -16,13 +16,19 @@
 #
 # -*- coding: utf-8 -*-
 # Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
 # source: spark/connect/types.proto
+# Protobuf Python Version: 5.28.3
 """Generated protocol buffer code."""
-from google.protobuf.internal import builder as _builder
 from google.protobuf import descriptor as _descriptor
 from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
 from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
 
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC, 5, 28, 3, "", "spark/connect/types.proto"
+)
 # @@protoc_insertion_point(imports)
 
 _sym_db = _symbol_database.Default()
@@ -32,67 +38,66 @@
     b"\n\x19spark/connect/types.proto\x12\rspark.connect\"\xe7!\n\x08\x44\x61taType\x12\x32\n\x04null\x18\x01 \x01(\x0b\x32\x1c.spark.connect.DataType.NULLH\x00R\x04null\x12\x38\n\x06\x62inary\x18\x02 \x01(\x0b\x32\x1e.spark.connect.DataType.BinaryH\x00R\x06\x62inary\x12;\n\x07\x62oolean\x18\x03 \x01(\x0b\x32\x1f.spark.connect.DataType.BooleanH\x00R\x07\x62oolean\x12\x32\n\x04\x62yte\x18\x04 \x01(\x0b\x32\x1c.spark.connect.DataType.ByteH\x00R\x04\x62yte\x12\x35\n\x05short\x18\x05 \x01(\x0b\x32\x1d.spark.connect.DataType.ShortH\x00R\x05short\x12;\n\x07integer\x18\x06 \x01(\x0b\x32\x1f.spark.connect.DataType.IntegerH\x00R\x07integer\x12\x32\n\x04long\x18\x07 \x01(\x0b\x32\x1c.spark.connect.DataType.LongH\x00R\x04long\x12\x35\n\x05\x66loat\x18\x08 \x01(\x0b\x32\x1d.spark.connect.DataType.FloatH\x00R\x05\x66loat\x12\x38\n\x06\x64ouble\x18\t \x01(\x0b\x32\x1e.spark.connect.DataType.DoubleH\x00R\x06\x64ouble\x12;\n\x07\x64\x65\x63imal\x18\n \x01(\x0b\x32\x1f.spark.connect.DataType.DecimalH\x00R\x07\x64\x65\x63imal\x12\x38\n\x06string\x18\x0b \x01(\x0b\x32\x1e.spark.connect.DataType.StringH\x00R\x06string\x12\x32\n\x04\x63har\x18\x0c \x01(\x0b\x32\x1c.spark.connect.DataType.CharH\x00R\x04\x63har\x12<\n\x08var_char\x18\r \x01(\x0b\x32\x1f.spark.connect.DataType.VarCharH\x00R\x07varChar\x12\x32\n\x04\x64\x61te\x18\x0e \x01(\x0b\x32\x1c.spark.connect.DataType.DateH\x00R\x04\x64\x61te\x12\x41\n\ttimestamp\x18\x0f \x01(\x0b\x32!.spark.connect.DataType.TimestampH\x00R\ttimestamp\x12K\n\rtimestamp_ntz\x18\x10 \x01(\x0b\x32$.spark.connect.DataType.TimestampNTZH\x00R\x0ctimestampNtz\x12W\n\x11\x63\x61lendar_interval\x18\x11 \x01(\x0b\x32(.spark.connect.DataType.CalendarIntervalH\x00R\x10\x63\x61lendarInterval\x12[\n\x13year_month_interval\x18\x12 \x01(\x0b\x32).spark.connect.DataType.YearMonthIntervalH\x00R\x11yearMonthInterval\x12U\n\x11\x64\x61y_time_interval\x18\x13 \x01(\x0b\x32'.spark.connect.DataType.DayTimeIntervalH\x00R\x0f\x64\x61yTimeInterval\x12\x35\n\x05\x61rray\x18\x14 \x01(\x0b\x32\x1d.spark.connect.DataType.ArrayH\x00R\x05\x61rray\x12\x38\n\x06struct\x18\x15 \x01(\x0b\x32\x1e.spark.connect.DataType.StructH\x00R\x06struct\x12/\n\x03map\x18\x16 \x01(\x0b\x32\x1b.spark.connect.DataType.MapH\x00R\x03map\x12;\n\x07variant\x18\x19 \x01(\x0b\x32\x1f.spark.connect.DataType.VariantH\x00R\x07variant\x12/\n\x03udt\x18\x17 \x01(\x0b\x32\x1b.spark.connect.DataType.UDTH\x00R\x03udt\x12>\n\x08unparsed\x18\x18 \x01(\x0b\x32 .spark.connect.DataType.UnparsedH\x00R\x08unparsed\x1a\x43\n\x07\x42oolean\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x42yte\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05Short\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Integer\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04Long\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x41\n\x05\x46loat\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x42\n\x06\x44ouble\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a`\n\x06String\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x12\x1c\n\tcollation\x18\x02 \x01(\tR\tcollation\x1a\x42\n\x06\x42inary\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04NULL\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x45\n\tTimestamp\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a@\n\x04\x44\x61te\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aH\n\x0cTimestampNTZ\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1aL\n\x10\x43\x61lendarInterval\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\xb3\x01\n\x11YearMonthInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1a\xb1\x01\n\x0f\x44\x61yTimeInterval\x12$\n\x0bstart_field\x18\x01 \x01(\x05H\x00R\nstartField\x88\x01\x01\x12 \n\tend_field\x18\x02 \x01(\x05H\x01R\x08\x65ndField\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x0e\n\x0c_start_fieldB\x0c\n\n_end_field\x1aX\n\x04\x43har\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a[\n\x07VarChar\x12\x16\n\x06length\x18\x01 \x01(\x05R\x06length\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\x99\x01\n\x07\x44\x65\x63imal\x12\x19\n\x05scale\x18\x01 \x01(\x05H\x00R\x05scale\x88\x01\x01\x12!\n\tprecision\x18\x02 \x01(\x05H\x01R\tprecision\x88\x01\x01\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReferenceB\x08\n\x06_scaleB\x0c\n\n_precision\x1a\xa1\x01\n\x0bStructField\x12\x12\n\x04name\x18\x01 \x01(\tR\x04name\x12\x34\n\tdata_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x08\x64\x61taType\x12\x1a\n\x08nullable\x18\x03 \x01(\x08R\x08nullable\x12\x1f\n\x08metadata\x18\x04 \x01(\tH\x00R\x08metadata\x88\x01\x01\x42\x0b\n\t_metadata\x1a\x7f\n\x06Struct\x12;\n\x06\x66ields\x18\x01 \x03(\x0b\x32#.spark.connect.DataType.StructFieldR\x06\x66ields\x12\x38\n\x18type_variation_reference\x18\x02 \x01(\rR\x16typeVariationReference\x1a\xa2\x01\n\x05\x41rray\x12:\n\x0c\x65lement_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x0b\x65lementType\x12#\n\rcontains_null\x18\x02 \x01(\x08R\x0c\x63ontainsNull\x12\x38\n\x18type_variation_reference\x18\x03 \x01(\rR\x16typeVariationReference\x1a\xdb\x01\n\x03Map\x12\x32\n\x08key_type\x18\x01 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07keyType\x12\x36\n\nvalue_type\x18\x02 \x01(\x0b\x32\x17.spark.connect.DataTypeR\tvalueType\x12.\n\x13value_contains_null\x18\x03 \x01(\x08R\x11valueContainsNull\x12\x38\n\x18type_variation_reference\x18\x04 \x01(\rR\x16typeVariationReference\x1a\x43\n\x07Variant\x12\x38\n\x18type_variation_reference\x18\x01 \x01(\rR\x16typeVariationReference\x1a\x8f\x02\n\x03UDT\x12\x12\n\x04type\x18\x01 \x01(\tR\x04type\x12 \n\tjvm_class\x18\x02 \x01(\tH\x00R\x08jvmClass\x88\x01\x01\x12&\n\x0cpython_class\x18\x03 \x01(\tH\x01R\x0bpythonClass\x88\x01\x01\x12;\n\x17serialized_python_class\x18\x04 \x01(\tH\x02R\x15serializedPythonClass\x88\x01\x01\x12\x32\n\x08sql_type\x18\x05 \x01(\x0b\x32\x17.spark.connect.DataTypeR\x07sqlTypeB\x0c\n\n_jvm_classB\x0f\n\r_python_classB\x1a\n\x18_serialized_python_class\x1a\x34\n\x08Unparsed\x12(\n\x10\x64\x61ta_type_string\x18\x01 \x01(\tR\x0e\x64\x61taTypeStringB\x06\n\x04kindB6\n\x1eorg.apache.spark.connect.protoP\x01Z\x12internal/generatedb\x06proto3"
 )
 
-_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
-_builder.BuildTopDescriptorsAndMessages(
-    DESCRIPTOR, "pyspark.sql.connect.proto.types_pb2", globals()
-)
-if _descriptor._USE_C_DESCRIPTORS == False:
-    DESCRIPTOR._options = None
-    DESCRIPTOR._serialized_options = (
-        b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
-    )
-    _DATATYPE._serialized_start = 45
-    _DATATYPE._serialized_end = 4372
-    _DATATYPE_BOOLEAN._serialized_start = 1595
-    _DATATYPE_BOOLEAN._serialized_end = 1662
-    _DATATYPE_BYTE._serialized_start = 1664
-    _DATATYPE_BYTE._serialized_end = 1728
-    _DATATYPE_SHORT._serialized_start = 1730
-    _DATATYPE_SHORT._serialized_end = 1795
-    _DATATYPE_INTEGER._serialized_start = 1797
-    _DATATYPE_INTEGER._serialized_end = 1864
-    _DATATYPE_LONG._serialized_start = 1866
-    _DATATYPE_LONG._serialized_end = 1930
-    _DATATYPE_FLOAT._serialized_start = 1932
-    _DATATYPE_FLOAT._serialized_end = 1997
-    _DATATYPE_DOUBLE._serialized_start = 1999
-    _DATATYPE_DOUBLE._serialized_end = 2065
-    _DATATYPE_STRING._serialized_start = 2067
-    _DATATYPE_STRING._serialized_end = 2163
-    _DATATYPE_BINARY._serialized_start = 2165
-    _DATATYPE_BINARY._serialized_end = 2231
-    _DATATYPE_NULL._serialized_start = 2233
-    _DATATYPE_NULL._serialized_end = 2297
-    _DATATYPE_TIMESTAMP._serialized_start = 2299
-    _DATATYPE_TIMESTAMP._serialized_end = 2368
-    _DATATYPE_DATE._serialized_start = 2370
-    _DATATYPE_DATE._serialized_end = 2434
-    _DATATYPE_TIMESTAMPNTZ._serialized_start = 2436
-    _DATATYPE_TIMESTAMPNTZ._serialized_end = 2508
-    _DATATYPE_CALENDARINTERVAL._serialized_start = 2510
-    _DATATYPE_CALENDARINTERVAL._serialized_end = 2586
-    _DATATYPE_YEARMONTHINTERVAL._serialized_start = 2589
-    _DATATYPE_YEARMONTHINTERVAL._serialized_end = 2768
-    _DATATYPE_DAYTIMEINTERVAL._serialized_start = 2771
-    _DATATYPE_DAYTIMEINTERVAL._serialized_end = 2948
-    _DATATYPE_CHAR._serialized_start = 2950
-    _DATATYPE_CHAR._serialized_end = 3038
-    _DATATYPE_VARCHAR._serialized_start = 3040
-    _DATATYPE_VARCHAR._serialized_end = 3131
-    _DATATYPE_DECIMAL._serialized_start = 3134
-    _DATATYPE_DECIMAL._serialized_end = 3287
-    _DATATYPE_STRUCTFIELD._serialized_start = 3290
-    _DATATYPE_STRUCTFIELD._serialized_end = 3451
-    _DATATYPE_STRUCT._serialized_start = 3453
-    _DATATYPE_STRUCT._serialized_end = 3580
-    _DATATYPE_ARRAY._serialized_start = 3583
-    _DATATYPE_ARRAY._serialized_end = 3745
-    _DATATYPE_MAP._serialized_start = 3748
-    _DATATYPE_MAP._serialized_end = 3967
-    _DATATYPE_VARIANT._serialized_start = 3969
-    _DATATYPE_VARIANT._serialized_end = 4036
-    _DATATYPE_UDT._serialized_start = 4039
-    _DATATYPE_UDT._serialized_end = 4310
-    _DATATYPE_UNPARSED._serialized_start = 4312
-    _DATATYPE_UNPARSED._serialized_end = 4364
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "pyspark.sql.connect.proto.types_pb2", _globals)
+if not _descriptor._USE_C_DESCRIPTORS:
+    _globals["DESCRIPTOR"]._loaded_options = None
+    _globals[
+        "DESCRIPTOR"
+    ]._serialized_options = b"\n\036org.apache.spark.connect.protoP\001Z\022internal/generated"
+    _globals["_DATATYPE"]._serialized_start = 45
+    _globals["_DATATYPE"]._serialized_end = 4372
+    _globals["_DATATYPE_BOOLEAN"]._serialized_start = 1595
+    _globals["_DATATYPE_BOOLEAN"]._serialized_end = 1662
+    _globals["_DATATYPE_BYTE"]._serialized_start = 1664
+    _globals["_DATATYPE_BYTE"]._serialized_end = 1728
+    _globals["_DATATYPE_SHORT"]._serialized_start = 1730
+    _globals["_DATATYPE_SHORT"]._serialized_end = 1795
+    _globals["_DATATYPE_INTEGER"]._serialized_start = 1797
+    _globals["_DATATYPE_INTEGER"]._serialized_end = 1864
+    _globals["_DATATYPE_LONG"]._serialized_start = 1866
+    _globals["_DATATYPE_LONG"]._serialized_end = 1930
+    _globals["_DATATYPE_FLOAT"]._serialized_start = 1932
+    _globals["_DATATYPE_FLOAT"]._serialized_end = 1997
+    _globals["_DATATYPE_DOUBLE"]._serialized_start = 1999
+    _globals["_DATATYPE_DOUBLE"]._serialized_end = 2065
+    _globals["_DATATYPE_STRING"]._serialized_start = 2067
+    _globals["_DATATYPE_STRING"]._serialized_end = 2163
+    _globals["_DATATYPE_BINARY"]._serialized_start = 2165
+    _globals["_DATATYPE_BINARY"]._serialized_end = 2231
+    _globals["_DATATYPE_NULL"]._serialized_start = 2233
+    _globals["_DATATYPE_NULL"]._serialized_end = 2297
+    _globals["_DATATYPE_TIMESTAMP"]._serialized_start = 2299
+    _globals["_DATATYPE_TIMESTAMP"]._serialized_end = 2368
+    _globals["_DATATYPE_DATE"]._serialized_start = 2370
+    _globals["_DATATYPE_DATE"]._serialized_end = 2434
+    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_start = 2436
+    _globals["_DATATYPE_TIMESTAMPNTZ"]._serialized_end = 2508
+    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_start = 2510
+    _globals["_DATATYPE_CALENDARINTERVAL"]._serialized_end = 2586
+    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_start = 2589
+    _globals["_DATATYPE_YEARMONTHINTERVAL"]._serialized_end = 2768
+    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_start = 2771
+    _globals["_DATATYPE_DAYTIMEINTERVAL"]._serialized_end = 2948
+    _globals["_DATATYPE_CHAR"]._serialized_start = 2950
+    _globals["_DATATYPE_CHAR"]._serialized_end = 3038
+    _globals["_DATATYPE_VARCHAR"]._serialized_start = 3040
+    _globals["_DATATYPE_VARCHAR"]._serialized_end = 3131
+    _globals["_DATATYPE_DECIMAL"]._serialized_start = 3134
+    _globals["_DATATYPE_DECIMAL"]._serialized_end = 3287
+    _globals["_DATATYPE_STRUCTFIELD"]._serialized_start = 3290
+    _globals["_DATATYPE_STRUCTFIELD"]._serialized_end = 3451
+    _globals["_DATATYPE_STRUCT"]._serialized_start = 3453
+    _globals["_DATATYPE_STRUCT"]._serialized_end = 3580
+    _globals["_DATATYPE_ARRAY"]._serialized_start = 3583
+    _globals["_DATATYPE_ARRAY"]._serialized_end = 3745
+    _globals["_DATATYPE_MAP"]._serialized_start = 3748
+    _globals["_DATATYPE_MAP"]._serialized_end = 3967
+    _globals["_DATATYPE_VARIANT"]._serialized_start = 3969
+    _globals["_DATATYPE_VARIANT"]._serialized_end = 4036
+    _globals["_DATATYPE_UDT"]._serialized_start = 4039
+    _globals["_DATATYPE_UDT"]._serialized_end = 4310
+    _globals["_DATATYPE_UNPARSED"]._serialized_start = 4312
+    _globals["_DATATYPE_UNPARSED"]._serialized_end = 4364
 # @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/connect/protobuf/functions.py b/python/pyspark/sql/connect/protobuf/functions.py
index 07e9b4b8c6861..ba43f94ce1eeb 100644
--- a/python/pyspark/sql/connect/protobuf/functions.py
+++ b/python/pyspark/sql/connect/protobuf/functions.py
@@ -142,7 +142,7 @@ def _test() -> None:
 
     globs = pyspark.sql.connect.protobuf.functions.__dict__.copy()
     globs["spark"] = (
-        PySparkSession.builder.appName("sql.protobuf.functions tests")
+        PySparkSession.builder.appName("sql.connect.protobuf.functions tests")
         .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[2]"))
         .getOrCreate()
     )
diff --git a/python/pyspark/sql/connect/readwriter.py b/python/pyspark/sql/connect/readwriter.py
index de62cf65b01ed..aeb0f98d71076 100644
--- a/python/pyspark/sql/connect/readwriter.py
+++ b/python/pyspark/sql/connect/readwriter.py
@@ -83,8 +83,8 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
             self._schema = schema
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_STRUCT",
-                message_parameters={
+                errorClass="NOT_STR_OR_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
@@ -94,7 +94,7 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
     schema.__doc__ = PySparkDataFrameReader.schema.__doc__
 
     def option(self, key: str, value: "OptionalPrimitiveType") -> "DataFrameReader":
-        self._options[key] = str(value)
+        self._options[key] = cast(str, to_str(value))
         return self
 
     option.__doc__ = PySparkDataFrameReader.option.__doc__
@@ -170,6 +170,7 @@ def json(
         modifiedBefore: Optional[Union[bool, str]] = None,
         modifiedAfter: Optional[Union[bool, str]] = None,
         allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+        useUnsafeRow: Optional[Union[bool, str]] = None,
     ) -> "DataFrame":
         self._set_opts(
             primitivesAsString=primitivesAsString,
@@ -195,6 +196,7 @@ def json(
             modifiedBefore=modifiedBefore,
             modifiedAfter=modifiedAfter,
             allowNonNumericNumbers=allowNonNumericNumbers,
+            useUnsafeRow=useUnsafeRow,
         )
         if isinstance(path, str):
             path = [path]
@@ -227,7 +229,7 @@ def parquet(self, *paths: str, **options: "OptionalPrimitiveType") -> "DataFrame
     def text(
         self,
         paths: PathOrPaths,
-        wholetext: Optional[bool] = None,
+        wholetext: bool = False,
         lineSep: Optional[str] = None,
         pathGlobFilter: Optional[Union[bool, str]] = None,
         recursiveFileLookup: Optional[Union[bool, str]] = None,
@@ -479,7 +481,7 @@ def jdbc(
     @property
     def _jreader(self) -> None:
         raise PySparkAttributeError(
-            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": "_jreader"}
+            errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": "_jreader"}
         )
 
 
@@ -557,8 +559,8 @@ def bucketBy(
     ) -> "DataFrameWriter":
         if not isinstance(numBuckets, int):
             raise PySparkValueError(
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={
                     "arg_list": f"`col` of type {type(col).__name__} and `cols`",
                 },
             )
@@ -566,8 +568,8 @@ def bucketBy(
         if isinstance(col, (list, tuple)):
             if cols:
                 raise PySparkValueError(
-                    error_class="NOT_INT",
-                    message_parameters={
+                    errorClass="NOT_INT",
+                    messageParameters={
                         "arg_list": "numBuckets",
                     },
                 )
@@ -577,16 +579,16 @@ def bucketBy(
         for c in cols:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={
                         "arg_name": "cols",
                         "arg_type": type(c).__name__,
                     },
                 )
         if not isinstance(col, str):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_STR",
-                message_parameters={
+                errorClass="NOT_LIST_OF_STR",
+                messageParameters={
                     "arg_name": "col",
                     "arg_type": type(col).__name__,
                 },
@@ -612,8 +614,8 @@ def sortBy(
         if isinstance(col, (list, tuple)):
             if cols:
                 raise PySparkValueError(
-                    error_class="CANNOT_SET_TOGETHER",
-                    message_parameters={
+                    errorClass="CANNOT_SET_TOGETHER",
+                    messageParameters={
                         "arg_list": f"`col` of type {type(col).__name__} and `cols`",
                     },
                 )
@@ -623,16 +625,16 @@ def sortBy(
         for c in cols:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={
                         "arg_name": "cols",
                         "arg_type": type(c).__name__,
                     },
                 )
         if not isinstance(col, str):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_STR",
-                message_parameters={
+                errorClass="NOT_LIST_OF_STR",
+                messageParameters={
                     "arg_name": "col",
                     "arg_type": type(col).__name__,
                 },
@@ -643,6 +645,23 @@ def sortBy(
 
     sortBy.__doc__ = PySparkDataFrameWriter.sortBy.__doc__
 
+    @overload
+    def clusterBy(self, *cols: str) -> "DataFrameWriter":
+        ...
+
+    @overload
+    def clusterBy(self, *cols: List[str]) -> "DataFrameWriter":
+        ...
+
+    def clusterBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]  # type: ignore[assignment]
+        assert len(cols) > 0, "clusterBy needs one or more clustering columns."
+        self._write.clustering_cols = cast(List[str], cols)
+        return self
+
+    clusterBy.__doc__ = PySparkDataFrameWriter.clusterBy.__doc__
+
     def save(
         self,
         path: Optional[str] = None,
@@ -900,6 +919,13 @@ def partitionedBy(self, col: "ColumnOrName", *cols: "ColumnOrName") -> "DataFram
 
     partitionedBy.__doc__ = PySparkDataFrameWriterV2.partitionedBy.__doc__
 
+    def clusterBy(self, col: str, *cols: str) -> "DataFrameWriterV2":
+        self._write.clustering_columns = [col]
+        self._write.clustering_columns.extend(cols)
+        return self
+
+    clusterBy.__doc__ = PySparkDataFrameWriterV2.clusterBy.__doc__
+
     def create(self) -> None:
         self._write.mode = "create"
         _, _, ei = self._spark.client.execute_command(
diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py
index b5e76982b3fd1..2d544f3f0eca7 100644
--- a/python/pyspark/sql/connect/session.py
+++ b/python/pyspark/sql/connect/session.py
@@ -15,10 +15,10 @@
 # limitations under the License.
 #
 from pyspark.sql.connect.utils import check_dependencies
-from pyspark.sql.utils import is_timestamp_ntz_preferred
 
 check_dependencies(__name__)
 
+import json
 import threading
 import os
 import warnings
@@ -36,6 +36,7 @@
     cast,
     overload,
     Iterable,
+    Mapping,
     TYPE_CHECKING,
     ClassVar,
 )
@@ -51,6 +52,7 @@
 
 from pyspark.sql.connect.dataframe import DataFrame
 from pyspark.sql.dataframe import DataFrame as ParentDataFrame
+from pyspark.sql.connect.logging import logger
 from pyspark.sql.connect.client import SparkConnectClient, DefaultChannelBuilder
 from pyspark.sql.connect.conf import RuntimeConf
 from pyspark.sql.connect.plan import (
@@ -107,16 +109,10 @@
     from pyspark.sql.connect.catalog import Catalog
     from pyspark.sql.connect.udf import UDFRegistration
     from pyspark.sql.connect.udtf import UDTFRegistration
+    from pyspark.sql.connect.tvf import TableValuedFunction
     from pyspark.sql.connect.shell.progress import ProgressHandler
     from pyspark.sql.connect.datasource import DataSourceRegistration
 
-try:
-    import memory_profiler  # noqa: F401
-
-    has_memory_profiler = True
-except Exception:
-    has_memory_profiler = False
-
 
 class SparkSession:
     # The active SparkSession for the current thread
@@ -196,10 +192,30 @@ class CustomChannelBuilder(ChannelBuilder):
 
         def enableHiveSupport(self) -> "SparkSession.Builder":
             raise PySparkNotImplementedError(
-                error_class="NOT_IMPLEMENTED", message_parameters={"feature": "enableHiveSupport"}
+                errorClass="NOT_IMPLEMENTED", messageParameters={"feature": "enableHiveSupport"}
             )
 
         def _apply_options(self, session: "SparkSession") -> None:
+            init_opts = {}
+            for i in range(int(os.environ.get("PYSPARK_REMOTE_INIT_CONF_LEN", "0"))):
+                init_opts = json.loads(os.environ[f"PYSPARK_REMOTE_INIT_CONF_{i}"])
+
+            with self._lock:
+                for k, v in init_opts.items():
+                    # the options are applied after session creation,
+                    # so following options always take no effect
+                    if k not in [
+                        "spark.remote",
+                        "spark.master",
+                    ] and k.startswith("spark.sql."):
+                        # Only attempts to set Spark SQL configurations.
+                        # If the configurations are static, it might throw an exception so
+                        # simply ignore it for now.
+                        try:
+                            session.conf.set(k, v)
+                        except Exception as e:
+                            logger.warn(f"Failed to set configuration {k} due to {e}")
+
             with self._lock:
                 for k, v in self._options.items():
                     # the options are applied after session creation,
@@ -211,7 +227,7 @@ def _apply_options(self, session: "SparkSession") -> None:
                         try:
                             session.conf.set(k, v)
                         except Exception as e:
-                            warnings.warn(str(e))
+                            logger.warn(f"Failed to set configuration {k} due to {e}")
 
         def create(self) -> "SparkSession":
             has_channel_builder = self._channel_builder is not None
@@ -221,7 +237,7 @@ def create(self) -> "SparkSession":
                 not has_channel_builder and not has_spark_remote
             ):
                 raise PySparkValueError(
-                    error_class="SESSION_NEED_CONN_STR_OR_BUILDER", message_parameters={}
+                    errorClass="SESSION_NEED_CONN_STR_OR_BUILDER", messageParameters={}
                 )
 
             if has_channel_builder:
@@ -311,8 +327,8 @@ def _getActiveSessionIfMatches(cls, session_id: str) -> "SparkSession":
         session = SparkSession.getActiveSession()
         if session is None:
             raise PySparkRuntimeError(
-                error_class="NO_ACTIVE_SESSION",
-                message_parameters={},
+                errorClass="NO_ACTIVE_SESSION",
+                messageParameters={},
             )
         if session._session_id != session_id:
             raise PySparkAssertionError(
@@ -330,8 +346,8 @@ def active(cls) -> "SparkSession":
             session = cls._get_default_session()
             if session is None:
                 raise PySparkRuntimeError(
-                    error_class="NO_ACTIVE_OR_DEFAULT_SESSION",
-                    message_parameters={},
+                    errorClass="NO_ACTIVE_OR_DEFAULT_SESSION",
+                    messageParameters={},
                 )
         return session
 
@@ -340,8 +356,8 @@ def active(cls) -> "SparkSession":
     def table(self, tableName: str) -> ParentDataFrame:
         if not isinstance(tableName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
             )
 
         return self.read.table(tableName)
@@ -360,6 +376,14 @@ def readStream(self) -> "DataStreamReader":
 
     readStream.__doc__ = PySparkSession.readStream.__doc__
 
+    @property
+    def tvf(self) -> "TableValuedFunction":
+        from pyspark.sql.connect.tvf import TableValuedFunction
+
+        return TableValuedFunction(self)
+
+    tvf.__doc__ = PySparkSession.tvf.__doc__
+
     def registerProgressHandler(self, handler: "ProgressHandler") -> None:
         self._client.register_progress_handler(handler)
 
@@ -376,27 +400,30 @@ def clearProgressHandlers(self) -> None:
     clearProgressHandlers.__doc__ = PySparkSession.clearProgressHandlers.__doc__
 
     def _inferSchemaFromList(
-        self, data: Iterable[Any], names: Optional[List[str]] = None
+        self,
+        data: Iterable[Any],
+        names: Optional[List[str]],
+        configs: Mapping[str, Optional[str]],
     ) -> StructType:
         """
         Infer schema from list of Row, dict, or tuple.
         """
         if not data:
             raise PySparkValueError(
-                error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                message_parameters={},
+                errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                messageParameters={},
             )
 
         (
             infer_dict_as_struct,
             infer_array_from_first_element,
             infer_map_from_first_pair,
-            prefer_timestamp_ntz,
-        ) = self._client.get_configs(
-            "spark.sql.pyspark.inferNestedDictAsStruct.enabled",
-            "spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled",
-            "spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled",
-            "spark.sql.timestampType",
+            prefer_timestamp,
+        ) = (
+            configs["spark.sql.pyspark.inferNestedDictAsStruct.enabled"],
+            configs["spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled"],
+            configs["spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled"],
+            configs["spark.sql.timestampType"],
         )
         return functools.reduce(
             _merge_type,
@@ -407,7 +434,7 @@ def _inferSchemaFromList(
                     infer_dict_as_struct=(infer_dict_as_struct == "true"),
                     infer_array_from_first_element=(infer_array_from_first_element == "true"),
                     infer_map_from_first_pair=(infer_map_from_first_pair == "true"),
-                    prefer_timestamp_ntz=(prefer_timestamp_ntz == "TIMESTAMP_NTZ"),
+                    prefer_timestamp_ntz=(prefer_timestamp == "TIMESTAMP_NTZ"),
                 )
                 for row in data
             ),
@@ -423,8 +450,8 @@ def createDataFrame(
         assert data is not None
         if isinstance(data, DataFrame):
             raise PySparkTypeError(
-                error_class="INVALID_TYPE",
-                message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
+                errorClass="INVALID_TYPE",
+                messageParameters={"arg_name": "data", "arg_type": "DataFrame"},
             )
 
         if samplingRatio is not None:
@@ -456,8 +483,8 @@ def createDataFrame(
 
         elif schema is not None:
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_NONE_OR_STRUCT",
-                message_parameters={
+                errorClass="NOT_LIST_OR_NONE_OR_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
@@ -465,20 +492,33 @@ def createDataFrame(
 
         if isinstance(data, np.ndarray) and data.ndim not in [1, 2]:
             raise PySparkValueError(
-                error_class="INVALID_NDARRAY_DIMENSION",
-                message_parameters={"dimensions": "1 or 2"},
+                errorClass="INVALID_NDARRAY_DIMENSION",
+                messageParameters={"dimensions": "1 or 2"},
             )
         elif isinstance(data, Sized) and len(data) == 0:
             if _schema is not None:
                 return DataFrame(LocalRelation(table=None, schema=_schema.json()), self)
             else:
                 raise PySparkValueError(
-                    error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                    message_parameters={},
+                    errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                    messageParameters={},
                 )
 
+        # Get all related configs in a batch
+        configs = self._client.get_config_dict(
+            "spark.sql.timestampType",
+            "spark.sql.session.timeZone",
+            "spark.sql.session.localRelationCacheThreshold",
+            "spark.sql.execution.pandas.convertToArrowArraySafely",
+            "spark.sql.execution.pandas.inferPandasDictAsMap",
+            "spark.sql.pyspark.inferNestedDictAsStruct.enabled",
+            "spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled",
+            "spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled",
+        )
+        timezone = configs["spark.sql.session.timeZone"]
+        prefer_timestamp = configs["spark.sql.timestampType"]
+
         _table: Optional[pa.Table] = None
-        timezone: Optional[str] = None
 
         if isinstance(data, pd.DataFrame):
             # Logic was borrowed from `_create_from_pandas_with_arrow` in
@@ -488,8 +528,7 @@ def createDataFrame(
             if schema is None:
                 _cols = [str(x) if not isinstance(x, str) else x for x in data.columns]
                 infer_pandas_dict_as_map = (
-                    str(self.conf.get("spark.sql.execution.pandas.inferPandasDictAsMap")).lower()
-                    == "true"
+                    configs["spark.sql.execution.pandas.inferPandasDictAsMap"] == "true"
                 )
                 if infer_pandas_dict_as_map:
                     struct = StructType()
@@ -500,8 +539,8 @@ def createDataFrame(
                         if isinstance(field_type, pa.StructType):
                             if len(field_type) == 0:
                                 raise PySparkValueError(
-                                    error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                                    message_parameters={},
+                                    errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                                    messageParameters={},
                                 )
                             arrow_type = field_type.field(0).type
                             spark_type = MapType(StringType(), from_arrow_type(arrow_type))
@@ -526,8 +565,8 @@ def createDataFrame(
                 _cols = [str(x) if not isinstance(x, str) else x for x in schema.fieldNames()]
             elif isinstance(schema, DataType):
                 raise PySparkTypeError(
-                    error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
-                    message_parameters={"data_type": str(schema)},
+                    errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
+                    messageParameters={"data_type": str(schema)},
                 )
             else:
                 # Any timestamps must be coerced to be compatible with Spark
@@ -541,9 +580,7 @@ def createDataFrame(
                 ]
                 arrow_types = [to_arrow_type(dt) if dt is not None else None for dt in spark_types]
 
-            timezone, safecheck = self._client.get_configs(
-                "spark.sql.session.timeZone", "spark.sql.execution.pandas.convertToArrowArraySafely"
-            )
+            safecheck = configs["spark.sql.execution.pandas.convertToArrowArraySafely"]
 
             ser = ArrowStreamPandasSerializer(cast(str, timezone), safecheck == "true")
 
@@ -565,10 +602,6 @@ def createDataFrame(
                 ).cast(arrow_schema)
 
         elif isinstance(data, pa.Table):
-            prefer_timestamp_ntz = is_timestamp_ntz_preferred()
-
-            (timezone,) = self._client.get_configs("spark.sql.session.timeZone")
-
             # If no schema supplied by user then get the names of columns only
             if schema is None:
                 _cols = data.column_names
@@ -578,7 +611,9 @@ def createDataFrame(
                 _num_cols = len(_cols)
 
             if not isinstance(schema, StructType):
-                schema = from_arrow_schema(data.schema, prefer_timestamp_ntz=prefer_timestamp_ntz)
+                schema = from_arrow_schema(
+                    data.schema, prefer_timestamp_ntz=prefer_timestamp == "TIMESTAMP_NTZ"
+                )
 
             _table = (
                 _check_arrow_table_timestamps_localize(data, schema, True, timezone)
@@ -596,8 +631,8 @@ def createDataFrame(
             if data.ndim == 1:
                 if 1 != len(_cols):
                     raise PySparkValueError(
-                        error_class="AXIS_LENGTH_MISMATCH",
-                        message_parameters={
+                        errorClass="AXIS_LENGTH_MISMATCH",
+                        messageParameters={
                             "expected_length": str(len(_cols)),
                             "actual_length": "1",
                         },
@@ -607,8 +642,8 @@ def createDataFrame(
             else:
                 if data.shape[1] != len(_cols):
                     raise PySparkValueError(
-                        error_class="AXIS_LENGTH_MISMATCH",
-                        message_parameters={
+                        errorClass="AXIS_LENGTH_MISMATCH",
+                        messageParameters={
                             "expected_length": str(len(_cols)),
                             "actual_length": str(data.shape[1]),
                         },
@@ -640,7 +675,7 @@ def createDataFrame(
                 if not isinstance(_schema, StructType):
                     _schema = StructType().add("value", _schema)
             else:
-                _schema = self._inferSchemaFromList(_data, _cols)
+                _schema = self._inferSchemaFromList(_data, _cols, configs)
 
                 if _cols is not None and cast(int, _num_cols) < len(_cols):
                     _num_cols = len(_cols)
@@ -649,7 +684,7 @@ def createDataFrame(
                     # For cases like createDataFrame([("Alice", None, 80.1)], schema)
                     # we can not infer the schema from the data itself.
                     raise PySparkValueError(
-                        error_class="CANNOT_DETERMINE_TYPE", message_parameters={}
+                        errorClass="CANNOT_DETERMINE_TYPE", messageParameters={}
                     )
 
             from pyspark.sql.connect.conversion import LocalDataToArrowConversion
@@ -663,8 +698,8 @@ def createDataFrame(
         # whether the Arrow Schema is compatible with the user provided Schema.
         if _num_cols is not None and _num_cols != _table.shape[1]:
             raise PySparkValueError(
-                error_class="AXIS_LENGTH_MISMATCH",
-                message_parameters={
+                errorClass="AXIS_LENGTH_MISMATCH",
+                messageParameters={
                     "expected_length": str(_num_cols),
                     "actual_length": str(_table.shape[1]),
                 },
@@ -675,9 +710,9 @@ def createDataFrame(
         else:
             local_relation = LocalRelation(_table)
 
-        cache_threshold = self._client.get_configs("spark.sql.session.localRelationCacheThreshold")
+        cache_threshold = configs["spark.sql.session.localRelationCacheThreshold"]
         plan: LogicalPlan = local_relation
-        if cache_threshold[0] is not None and int(cache_threshold[0]) <= _table.nbytes:
+        if cache_threshold is not None and int(cache_threshold) <= _table.nbytes:
             plan = CachedLocalRelation(self._cache_local_relation(local_relation))
 
         df = DataFrame(plan, self)
@@ -704,8 +739,8 @@ def sql(
                 _args = [F.lit(v) for v in args]
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_TYPE",
-                    message_parameters={"arg_name": "args", "arg_type": type(args).__name__},
+                    errorClass="INVALID_TYPE",
+                    messageParameters={"arg_name": "args", "arg_type": type(args).__name__},
                 )
 
         _views: List[SubqueryAlias] = []
@@ -839,12 +874,12 @@ def stop(self) -> None:
                 try:
                     self.client.release_session()
                 except Exception as e:
-                    warnings.warn(f"session.stop(): Session could not be released. Error: ${e}")
+                    logger.warn(f"session.stop(): Session could not be released. Error: ${e}")
 
             try:
                 self.client.close()
             except Exception as e:
-                warnings.warn(f"session.stop(): Client could not be closed. Error: ${e}")
+                logger.warn(f"session.stop(): Client could not be closed. Error: ${e}")
 
             if self is SparkSession._default_session:
                 SparkSession._default_session = None
@@ -860,7 +895,7 @@ def stop(self) -> None:
                     try:
                         PySparkSession._activeSession.stop()
                     except Exception as e:
-                        warnings.warn(
+                        logger.warn(
                             "session.stop(): Local Spark Connect Server could not be stopped. "
                             f"Error: ${e}"
                         )
@@ -894,7 +929,7 @@ def streams(self) -> "StreamingQueryManager":
     def __getattr__(self, name: str) -> Any:
         if name in ["_jsc", "_jconf", "_jvm", "_jsparkSession", "sparkContext", "newSession"]:
             raise PySparkAttributeError(
-                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": name}
+                errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": name}
             )
         return object.__getattribute__(self, name)
 
@@ -941,8 +976,8 @@ def addArtifacts(
     ) -> None:
         if sum([file, pyfile, archive]) > 1:
             raise PySparkValueError(
-                error_class="INVALID_MULTIPLE_ARGUMENT_CONDITIONS",
-                message_parameters={
+                errorClass="INVALID_MULTIPLE_ARGUMENT_CONDITIONS",
+                messageParameters={
                     "arg_names": "'pyfile', 'archive' and/or 'file'",
                     "condition": "True together",
                 },
@@ -963,8 +998,8 @@ def _cache_local_relation(self, local_relation: LocalRelation) -> str:
     def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
         if urllib.parse.urlparse(dest_path).scheme:
             raise PySparkValueError(
-                error_class="NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME",
-                message_parameters={"arg_name": "dest_path"},
+                errorClass="NO_SCHEMA_AND_DRIVER_DEFAULT_SCHEME",
+                messageParameters={"arg_name": "dest_path"},
             )
         self._client.copy_from_local_to_fs(local_path, dest_path)
 
@@ -993,13 +1028,24 @@ def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
 
         session = PySparkSession._instantiatedSession
         if session is None or session._sc._jsc is None:
+            init_opts = {}
+            for i in range(int(os.environ.get("PYSPARK_REMOTE_INIT_CONF_LEN", "0"))):
+                init_opts = json.loads(os.environ[f"PYSPARK_REMOTE_INIT_CONF_{i}"])
+            init_opts.update(opts)
+            opts = init_opts
+
             # Configurations to be overwritten
             overwrite_conf = opts
             overwrite_conf["spark.master"] = master
             overwrite_conf["spark.local.connect"] = "1"
+            os.environ["SPARK_LOCAL_CONNECT"] = "1"
 
             # Configurations to be set if unset.
-            default_conf = {"spark.plugins": "org.apache.spark.sql.connect.SparkConnectPlugin"}
+            default_conf = {
+                "spark.plugins": "org.apache.spark.sql.connect.SparkConnectPlugin",
+                "spark.sql.artifact.isolation.enabled": "true",
+                "spark.sql.artifact.isolation.always.apply.classloader": "true",
+            }
 
             if "SPARK_TESTING" in os.environ:
                 # For testing, we use 0 to use an ephemeral port to allow parallel testing.
@@ -1030,10 +1076,11 @@ def _start_connect_server(master: str, opts: Dict[str, Any]) -> None:
             finally:
                 if origin_remote is not None:
                     os.environ["SPARK_REMOTE"] = origin_remote
+                del os.environ["SPARK_LOCAL_CONNECT"]
         else:
             raise PySparkRuntimeError(
-                error_class="SESSION_OR_CONTEXT_EXISTS",
-                message_parameters={},
+                errorClass="SESSION_OR_CONTEXT_EXISTS",
+                messageParameters={},
             )
 
     @property
diff --git a/python/pyspark/sql/connect/streaming/query.py b/python/pyspark/sql/connect/streaming/query.py
index 13458d650fa9f..c4e9c512ec58e 100644
--- a/python/pyspark/sql/connect/streaming/query.py
+++ b/python/pyspark/sql/connect/streaming/query.py
@@ -33,6 +33,7 @@
     QueryProgressEvent,
     QueryIdleEvent,
     QueryTerminatedEvent,
+    StreamingQueryProgress,
 )
 from pyspark.sql.streaming.query import (
     StreamingQuery as PySparkStreamingQuery,
@@ -84,8 +85,8 @@ def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
         if timeout is not None:
             if not isinstance(timeout, (int, float)) or timeout <= 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
                 )
             cmd.await_termination.timeout_ms = int(timeout * 1000)
             terminated = self._execute_streaming_query_cmd(cmd).await_termination.terminated
@@ -110,21 +111,21 @@ def status(self) -> Dict[str, Any]:
     status.__doc__ = PySparkStreamingQuery.status.__doc__
 
     @property
-    def recentProgress(self) -> List[Dict[str, Any]]:
+    def recentProgress(self) -> List[StreamingQueryProgress]:
         cmd = pb2.StreamingQueryCommand()
         cmd.recent_progress = True
         progress = self._execute_streaming_query_cmd(cmd).recent_progress.recent_progress_json
-        return [json.loads(p) for p in progress]
+        return [StreamingQueryProgress.fromJson(json.loads(p)) for p in progress]
 
     recentProgress.__doc__ = PySparkStreamingQuery.recentProgress.__doc__
 
     @property
-    def lastProgress(self) -> Optional[Dict[str, Any]]:
+    def lastProgress(self) -> Optional[StreamingQueryProgress]:
         cmd = pb2.StreamingQueryCommand()
         cmd.last_progress = True
         progress = self._execute_streaming_query_cmd(cmd).recent_progress.recent_progress_json
         if len(progress) > 0:
-            return json.loads(progress[-1])
+            return StreamingQueryProgress.fromJson(json.loads(progress[-1]))
         else:
             return None
 
@@ -202,7 +203,7 @@ def active(self) -> List[StreamingQuery]:
 
     active.__doc__ = PySparkStreamingQueryManager.active.__doc__
 
-    def get(self, id: str) -> Optional[StreamingQuery]:
+    def get(self, id: str) -> Optional["StreamingQuery"]:
         cmd = pb2.StreamingQueryManagerCommand()
         cmd.get_query = id
         response = self._execute_streaming_query_manager_cmd(cmd)
@@ -219,8 +220,8 @@ def awaitAnyTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
         if timeout is not None:
             if not isinstance(timeout, (int, float)) or timeout <= 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
                 )
             cmd.await_any_termination.timeout_ms = int(timeout * 1000)
             terminated = self._execute_streaming_query_manager_cmd(
@@ -401,8 +402,8 @@ def deserialize(
             return QueryIdleEvent.fromJson(json.loads(event.event_json))
         else:
             raise PySparkValueError(
-                error_class="UNKNOWN_VALUE_FOR",
-                message_parameters={"var": f"proto.StreamingQueryEventType: {event.event_type}"},
+                errorClass="UNKNOWN_VALUE_FOR",
+                messageParameters={"var": f"proto.StreamingQueryEventType: {event.event_type}"},
             )
 
     def post_to_all(
diff --git a/python/pyspark/sql/connect/streaming/readwriter.py b/python/pyspark/sql/connect/streaming/readwriter.py
index 9b11bf328b853..21c513f88c0fc 100644
--- a/python/pyspark/sql/connect/streaming/readwriter.py
+++ b/python/pyspark/sql/connect/streaming/readwriter.py
@@ -69,8 +69,8 @@ def schema(self, schema: Union[StructType, str]) -> "DataStreamReader":
             self._schema = schema
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_STRUCT",
-                message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+                errorClass="NOT_STR_OR_STRUCT",
+                messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
             )
         return self
 
@@ -103,8 +103,8 @@ def load(
         self.options(**options)
         if path is not None and (type(path) != str or len(path.strip()) == 0):
             raise PySparkValueError(
-                error_class="VALUE_NOT_NON_EMPTY_STR",
-                message_parameters={"arg_name": "path", "arg_value": str(path)},
+                errorClass="VALUE_NOT_NON_EMPTY_STR",
+                messageParameters={"arg_name": "path", "arg_value": str(path)},
             )
 
         plan = DataSource(
@@ -143,6 +143,7 @@ def json(
         pathGlobFilter: Optional[Union[bool, str]] = None,
         recursiveFileLookup: Optional[Union[bool, str]] = None,
         allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+        useUnsafeRow: Optional[Union[bool, str]] = None,
     ) -> "DataFrame":
         self._set_opts(
             schema=schema,
@@ -166,13 +167,14 @@ def json(
             pathGlobFilter=pathGlobFilter,
             recursiveFileLookup=recursiveFileLookup,
             allowNonNumericNumbers=allowNonNumericNumbers,
+            useUnsafeRow=useUnsafeRow,
         )
         if isinstance(path, str):
             return self.load(path=path, format="json")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     json.__doc__ = PySparkDataStreamReader.json.__doc__
@@ -193,8 +195,8 @@ def orc(
             return self.load(path=path, format="orc")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     orc.__doc__ = PySparkDataStreamReader.orc.__doc__
@@ -226,8 +228,8 @@ def parquet(
             return self.load(path=path, format="parquet")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     parquet.__doc__ = PySparkDataStreamReader.parquet.__doc__
@@ -250,8 +252,8 @@ def text(
             return self.load(path=path, format="text")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     text.__doc__ = PySparkDataStreamReader.text.__doc__
@@ -326,8 +328,8 @@ def csv(
             return self.load(path=path, format="csv")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     csv.__doc__ = PySparkDataStreamReader.csv.__doc__
@@ -380,8 +382,8 @@ def xml(
             return self.load(path=path, format="xml")
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     xml.__doc__ = PySparkDataStreamReader.xml.__doc__
@@ -445,11 +447,30 @@ def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
 
     partitionBy.__doc__ = PySparkDataStreamWriter.partitionBy.__doc__
 
+    @overload
+    def clusterBy(self, *cols: str) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def clusterBy(self, __cols: List[str]) -> "DataStreamWriter":
+        ...
+
+    def clusterBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]
+        # Clear any existing columns (if any).
+        while len(self._write_proto.clustering_column_names) > 0:
+            self._write_proto.clustering_column_names.pop()
+        self._write_proto.clustering_column_names.extend(cast(List[str], cols))
+        return self
+
+    clusterBy.__doc__ = PySparkDataStreamWriter.clusterBy.__doc__
+
     def queryName(self, queryName: str) -> "DataStreamWriter":
         if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_NON_EMPTY_STR",
-                message_parameters={"arg_name": "queryName", "arg_value": str(queryName)},
+                errorClass="VALUE_NOT_NON_EMPTY_STR",
+                messageParameters={"arg_name": "queryName", "arg_value": str(queryName)},
             )
         self._write_proto.query_name = queryName
         return self
@@ -484,20 +505,20 @@ def trigger(
 
         if params.count(None) == 4:
             raise PySparkValueError(
-                error_class="ONLY_ALLOW_SINGLE_TRIGGER",
-                message_parameters={},
+                errorClass="ONLY_ALLOW_SINGLE_TRIGGER",
+                messageParameters={},
             )
         elif params.count(None) < 3:
             raise PySparkValueError(
-                error_class="ONLY_ALLOW_SINGLE_TRIGGER",
-                message_parameters={},
+                errorClass="ONLY_ALLOW_SINGLE_TRIGGER",
+                messageParameters={},
             )
 
         if processingTime is not None:
             if type(processingTime) != str or len(processingTime.strip()) == 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={
                         "arg_name": "processingTime",
                         "arg_value": str(processingTime),
                     },
@@ -507,24 +528,24 @@ def trigger(
         elif once is not None:
             if once is not True:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_TRUE",
-                    message_parameters={"arg_name": "once", "arg_value": str(once)},
+                    errorClass="VALUE_NOT_TRUE",
+                    messageParameters={"arg_name": "once", "arg_value": str(once)},
                 )
             self._write_proto.once = True
 
         elif continuous is not None:
             if type(continuous) != str or len(continuous.strip()) == 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={"arg_name": "continuous", "arg_value": str(continuous)},
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={"arg_name": "continuous", "arg_value": str(continuous)},
                 )
             self._write_proto.continuous_checkpoint_interval = continuous.strip()
 
         else:
             if availableNow is not True:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_TRUE",
-                    message_parameters={"arg_name": "availableNow", "arg_value": str(availableNow)},
+                    errorClass="VALUE_NOT_TRUE",
+                    messageParameters={"arg_name": "availableNow", "arg_value": str(availableNow)},
                 )
             self._write_proto.available_now = True
 
@@ -553,8 +574,8 @@ def foreach(self, f: Union[Callable[[Row], None], "SupportsProcess"]) -> "DataSt
             )
         except pickle.PicklingError:
             raise PySparkPicklingError(
-                error_class="STREAMING_CONNECT_SERIALIZATION_ERROR",
-                message_parameters={"name": "foreach"},
+                errorClass="STREAMING_CONNECT_SERIALIZATION_ERROR",
+                messageParameters={"name": "foreach"},
             )
         self._write_proto.foreach_writer.python_function.python_ver = "%d.%d" % sys.version_info[:2]
         return self
@@ -568,8 +589,8 @@ def foreachBatch(self, func: Callable[["DataFrame", int], None]) -> "DataStreamW
             )
         except pickle.PicklingError:
             raise PySparkPicklingError(
-                error_class="STREAMING_CONNECT_SERIALIZATION_ERROR",
-                message_parameters={"name": "foreachBatch"},
+                errorClass="STREAMING_CONNECT_SERIALIZATION_ERROR",
+                messageParameters={"name": "foreachBatch"},
             )
         self._write_proto.foreach_batch.python_function.python_ver = get_python_ver()
         return self
@@ -631,7 +652,7 @@ def start(
         partitionBy: Optional[Union[str, List[str]]] = None,
         queryName: Optional[str] = None,
         **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
+    ) -> "StreamingQuery":
         return self._start_internal(
             path=path,
             tableName=None,
@@ -652,7 +673,7 @@ def toTable(
         partitionBy: Optional[Union[str, List[str]]] = None,
         queryName: Optional[str] = None,
         **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
+    ) -> "StreamingQuery":
         return self._start_internal(
             path=None,
             tableName=tableName,
diff --git a/python/pyspark/sql/connect/tvf.py b/python/pyspark/sql/connect/tvf.py
new file mode 100644
index 0000000000000..2fca610a5fe3a
--- /dev/null
+++ b/python/pyspark/sql/connect/tvf.py
@@ -0,0 +1,148 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Optional
+
+from pyspark.errors import PySparkValueError
+from pyspark.sql.connect.column import Column
+from pyspark.sql.connect.dataframe import DataFrame
+from pyspark.sql.connect.functions.builtin import _to_col
+from pyspark.sql.connect.plan import UnresolvedTableValuedFunction
+from pyspark.sql.connect.session import SparkSession
+from pyspark.sql.tvf import TableValuedFunction as PySparkTableValuedFunction
+
+
+class TableValuedFunction:
+    __doc__ = PySparkTableValuedFunction.__doc__
+
+    def __init__(self, sparkSession: SparkSession):
+        self._sparkSession = sparkSession
+
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = None,
+        step: int = 1,
+        numPartitions: Optional[int] = None,
+    ) -> DataFrame:
+        return self._sparkSession.range(  # type: ignore[return-value]
+            start, end, step, numPartitions
+        )
+
+    range.__doc__ = PySparkTableValuedFunction.range.__doc__
+
+    def explode(self, collection: Column) -> DataFrame:
+        return self._fn("explode", collection)
+
+    explode.__doc__ = PySparkTableValuedFunction.explode.__doc__
+
+    def explode_outer(self, collection: Column) -> DataFrame:
+        return self._fn("explode_outer", collection)
+
+    explode_outer.__doc__ = PySparkTableValuedFunction.explode_outer.__doc__
+
+    def inline(self, input: Column) -> DataFrame:
+        return self._fn("inline", input)
+
+    inline.__doc__ = PySparkTableValuedFunction.inline.__doc__
+
+    def inline_outer(self, input: Column) -> DataFrame:
+        return self._fn("inline_outer", input)
+
+    inline_outer.__doc__ = PySparkTableValuedFunction.inline_outer.__doc__
+
+    def json_tuple(self, input: Column, *fields: Column) -> DataFrame:
+        if len(fields) == 0:
+            raise PySparkValueError(
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={"item": "field"},
+            )
+        return self._fn("json_tuple", input, *fields)
+
+    json_tuple.__doc__ = PySparkTableValuedFunction.json_tuple.__doc__
+
+    def posexplode(self, collection: Column) -> DataFrame:
+        return self._fn("posexplode", collection)
+
+    posexplode.__doc__ = PySparkTableValuedFunction.posexplode.__doc__
+
+    def posexplode_outer(self, collection: Column) -> DataFrame:
+        return self._fn("posexplode_outer", collection)
+
+    posexplode_outer.__doc__ = PySparkTableValuedFunction.posexplode_outer.__doc__
+
+    def stack(self, n: Column, *fields: Column) -> DataFrame:
+        return self._fn("stack", n, *fields)
+
+    stack.__doc__ = PySparkTableValuedFunction.stack.__doc__
+
+    def collations(self) -> DataFrame:
+        return self._fn("collations")
+
+    collations.__doc__ = PySparkTableValuedFunction.collations.__doc__
+
+    def sql_keywords(self) -> DataFrame:
+        return self._fn("sql_keywords")
+
+    sql_keywords.__doc__ = PySparkTableValuedFunction.sql_keywords.__doc__
+
+    def variant_explode(self, input: Column) -> DataFrame:
+        return self._fn("variant_explode", input)
+
+    variant_explode.__doc__ = PySparkTableValuedFunction.variant_explode.__doc__
+
+    def variant_explode_outer(self, input: Column) -> DataFrame:
+        return self._fn("variant_explode_outer", input)
+
+    variant_explode_outer.__doc__ = PySparkTableValuedFunction.variant_explode_outer.__doc__
+
+    def _fn(self, name: str, *args: Column) -> DataFrame:
+        return DataFrame(
+            UnresolvedTableValuedFunction(name, [_to_col(arg) for arg in args]), self._sparkSession
+        )
+
+
+def _test() -> None:
+    import os
+    import doctest
+    import sys
+    from pyspark.sql import SparkSession as PySparkSession
+    import pyspark.sql.connect.tvf
+
+    globs = pyspark.sql.connect.tvf.__dict__.copy()
+
+    globs["spark"] = (
+        PySparkSession.builder.appName("sql.connect.tvf tests")
+        .remote(os.environ.get("SPARK_CONNECT_TESTING_REMOTE", "local[4]"))
+        .getOrCreate()
+    )
+
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.connect.tvf,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS
+        | doctest.NORMALIZE_WHITESPACE
+        | doctest.IGNORE_EXCEPTION_DETAIL,
+    )
+
+    globs["spark"].stop()
+
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/connect/types.py b/python/pyspark/sql/connect/types.py
index 885ce62e7db6f..c2eb5f4e017f0 100644
--- a/python/pyspark/sql/connect/types.py
+++ b/python/pyspark/sql/connect/types.py
@@ -101,26 +101,26 @@ def __repr__(self) -> str:
 
     def jsonValue(self) -> Dict[str, Any]:
         raise PySparkAssertionError(
-            error_class="INVALID_CALL_ON_UNRESOLVED_OBJECT",
-            message_parameters={"func_name": "jsonValue"},
+            errorClass="INVALID_CALL_ON_UNRESOLVED_OBJECT",
+            messageParameters={"func_name": "jsonValue"},
         )
 
     def needConversion(self) -> bool:
         raise PySparkAssertionError(
-            error_class="INVALID_CALL_ON_UNRESOLVED_OBJECT",
-            message_parameters={"func_name": "needConversion"},
+            errorClass="INVALID_CALL_ON_UNRESOLVED_OBJECT",
+            messageParameters={"func_name": "needConversion"},
         )
 
     def toInternal(self, obj: Any) -> Any:
         raise PySparkAssertionError(
-            error_class="INVALID_CALL_ON_UNRESOLVED_OBJECT",
-            message_parameters={"func_name": "toInternal"},
+            errorClass="INVALID_CALL_ON_UNRESOLVED_OBJECT",
+            messageParameters={"func_name": "toInternal"},
         )
 
     def fromInternal(self, obj: Any) -> Any:
         raise PySparkAssertionError(
-            error_class="INVALID_CALL_ON_UNRESOLVED_OBJECT",
-            message_parameters={"func_name": "fromInternal"},
+            errorClass="INVALID_CALL_ON_UNRESOLVED_OBJECT",
+            messageParameters={"func_name": "fromInternal"},
         )
 
 
@@ -199,8 +199,8 @@ def pyspark_types_to_proto_types(data_type: DataType) -> pb2.DataType:
         ret.unparsed.data_type_string = data_type_string
     else:
         raise PySparkValueError(
-            error_class="UNSUPPORTED_OPERATION",
-            message_parameters={"operation": f"data type {data_type}"},
+            errorClass="UNSUPPORTED_OPERATION",
+            messageParameters={"operation": f"data type {data_type}"},
         )
     return ret
 
@@ -303,8 +303,8 @@ def proto_schema_to_pyspark_data_type(schema: pb2.DataType) -> DataType:
         return UserDefinedType.fromJson(json_value)
     else:
         raise PySparkValueError(
-            error_class="UNSUPPORTED_OPERATION",
-            message_parameters={"operation": f"data type {schema}"},
+            errorClass="UNSUPPORTED_OPERATION",
+            messageParameters={"operation": f"data type {schema}"},
         )
 
 
diff --git a/python/pyspark/sql/connect/udf.py b/python/pyspark/sql/connect/udf.py
index f5daf3ff841fd..2e8c4600dd254 100644
--- a/python/pyspark/sql/connect/udf.py
+++ b/python/pyspark/sql/connect/udf.py
@@ -130,14 +130,14 @@ def __init__(
     ):
         if not callable(func):
             raise PySparkTypeError(
-                error_class="NOT_CALLABLE",
-                message_parameters={"arg_name": "func", "arg_type": type(func).__name__},
+                errorClass="NOT_CALLABLE",
+                messageParameters={"arg_name": "func", "arg_type": type(func).__name__},
             )
 
         if not isinstance(returnType, (DataType, str)):
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={
                     "arg_name": "returnType",
                     "arg_type": type(returnType).__name__,
                 },
@@ -145,8 +145,8 @@ def __init__(
 
         if not isinstance(evalType, int):
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={"arg_name": "evalType", "arg_type": type(evalType).__name__},
+                errorClass="NOT_INT",
+                messageParameters={"arg_name": "evalType", "arg_type": type(evalType).__name__},
             )
 
         self.func = func
@@ -268,8 +268,8 @@ def register(
         if hasattr(f, "asNondeterministic"):
             if returnType is not None:
                 raise PySparkTypeError(
-                    error_class="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
-                    message_parameters={"arg_name": "f", "return_type": str(returnType)},
+                    errorClass="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
+                    messageParameters={"arg_name": "f", "return_type": str(returnType)},
                 )
             f = cast("UserDefinedFunctionLike", f)
             if f.evalType not in [
@@ -280,8 +280,8 @@ def register(
                 PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
             ]:
                 raise PySparkTypeError(
-                    error_class="INVALID_UDF_EVAL_TYPE",
-                    message_parameters={
+                    errorClass="INVALID_UDF_EVAL_TYPE",
+                    messageParameters={
                         "eval_type": "SQL_BATCHED_UDF, SQL_ARROW_BATCHED_UDF, "
                         "SQL_SCALAR_PANDAS_UDF, SQL_SCALAR_PANDAS_ITER_UDF or "
                         "SQL_GROUPED_AGG_PANDAS_UDF"
diff --git a/python/pyspark/sql/connect/udtf.py b/python/pyspark/sql/connect/udtf.py
index 1a55f0aa08bf3..2795b0185b74d 100644
--- a/python/pyspark/sql/connect/udtf.py
+++ b/python/pyspark/sql/connect/udtf.py
@@ -194,16 +194,16 @@ def register(
     ) -> "UserDefinedTableFunction":
         if not isinstance(f, UserDefinedTableFunction):
             raise PySparkTypeError(
-                error_class="CANNOT_REGISTER_UDTF",
-                message_parameters={
+                errorClass="CANNOT_REGISTER_UDTF",
+                messageParameters={
                     "name": name,
                 },
             )
 
         if f.evalType not in [PythonEvalType.SQL_TABLE_UDF, PythonEvalType.SQL_ARROW_TABLE_UDF]:
             raise PySparkTypeError(
-                error_class="INVALID_UDTF_EVAL_TYPE",
-                message_parameters={
+                errorClass="INVALID_UDTF_EVAL_TYPE",
+                messageParameters={
                     "name": name,
                     "eval_type": "SQL_TABLE_UDF, SQL_ARROW_TABLE_UDF",
                 },
diff --git a/python/pyspark/sql/connect/utils.py b/python/pyspark/sql/connect/utils.py
index 24073f3a30ea1..a2511836816c9 100644
--- a/python/pyspark/sql/connect/utils.py
+++ b/python/pyspark/sql/connect/utils.py
@@ -22,7 +22,7 @@
 
 
 def check_dependencies(mod_name: str) -> None:
-    if mod_name == "__main__":
+    if mod_name == "__main__" or mod_name == "pyspark.sql.connect.utils":
         from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
 
         if not should_test_connect:
@@ -47,16 +47,16 @@ def require_minimum_grpc_version() -> None:
         import grpc
     except ImportError as error:
         raise PySparkImportError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
                 "package_name": "grpcio",
                 "minimum_version": str(minimum_grpc_version),
             },
         ) from error
     if LooseVersion(grpc.__version__) < LooseVersion(minimum_grpc_version):
         raise PySparkImportError(
-            error_class="UNSUPPORTED_PACKAGE_VERSION",
-            message_parameters={
+            errorClass="UNSUPPORTED_PACKAGE_VERSION",
+            messageParameters={
                 "package_name": "grpcio",
                 "minimum_version": str(minimum_grpc_version),
                 "current_version": str(grpc.__version__),
@@ -72,8 +72,8 @@ def require_minimum_grpcio_status_version() -> None:
         import grpc_status  # noqa
     except ImportError as error:
         raise PySparkImportError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
                 "package_name": "grpcio-status",
                 "minimum_version": str(minimum_grpc_version),
             },
@@ -88,8 +88,8 @@ def require_minimum_googleapis_common_protos_version() -> None:
         import google.rpc  # noqa
     except ImportError as error:
         raise PySparkImportError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
                 "package_name": "googleapis-common-protos",
                 "minimum_version": str(minimum_common_protos_version),
             },
diff --git a/python/pyspark/sql/connect/window.py b/python/pyspark/sql/connect/window.py
index cbca6886060cf..bf6d60df63505 100644
--- a/python/pyspark/sql/connect/window.py
+++ b/python/pyspark/sql/connect/window.py
@@ -29,12 +29,12 @@
 from pyspark.sql.connect.functions import builtin as F
 
 if TYPE_CHECKING:
-    from pyspark.sql.connect._typing import ColumnOrName, ColumnOrName_
+    from pyspark.sql.connect._typing import ColumnOrName
 
 __all__ = ["Window", "WindowSpec"]
 
 
-def _to_cols(cols: Tuple[Union["ColumnOrName", List["ColumnOrName_"]], ...]) -> List[Column]:
+def _to_cols(cols: Tuple[Union["ColumnOrName", Sequence["ColumnOrName"]], ...]) -> List[Column]:
     if len(cols) == 1 and isinstance(cols[0], list):
         cols = cols[0]  # type: ignore[assignment]
     return [F._to_col(c) for c in cast(Iterable["ColumnOrName"], cols)]
@@ -84,21 +84,21 @@ def __init__(
         self._orderSpec = orderSpec
         self._frame = frame
 
-    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def partitionBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return WindowSpec(
             partitionSpec=[c._expr for c in _to_cols(cols)],  # type: ignore[misc]
             orderSpec=self._orderSpec,
             frame=self._frame,
         )
 
-    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def orderBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return WindowSpec(
             partitionSpec=self._partitionSpec,
             orderSpec=[cast(SortOrder, F._sort_col(c)._expr) for c in _to_cols(cols)],
             frame=self._frame,
         )
 
-    def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
+    def rowsBetween(self, start: int, end: int) -> "WindowSpec":
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -110,7 +110,7 @@ def rowsBetween(self, start: int, end: int) -> ParentWindowSpec:
             frame=WindowFrame(isRowFrame=True, start=start, end=end),
         )
 
-    def rangeBetween(self, start: int, end: int) -> ParentWindowSpec:
+    def rangeBetween(self, start: int, end: int) -> "WindowSpec":
         if start <= Window._PRECEDING_THRESHOLD:
             start = Window.unboundedPreceding
         if end >= Window._FOLLOWING_THRESHOLD:
@@ -139,19 +139,19 @@ class Window(ParentWindow):
     _spec = WindowSpec(partitionSpec=[], orderSpec=[], frame=None)
 
     @staticmethod
-    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return Window._spec.partitionBy(*cols)
 
     @staticmethod
-    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> ParentWindowSpec:
+    def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         return Window._spec.orderBy(*cols)
 
     @staticmethod
-    def rowsBetween(start: int, end: int) -> ParentWindowSpec:
+    def rowsBetween(start: int, end: int) -> "WindowSpec":
         return Window._spec.rowsBetween(start, end)
 
     @staticmethod
-    def rangeBetween(start: int, end: int) -> ParentWindowSpec:
+    def rangeBetween(start: int, end: int) -> "WindowSpec":
         return Window._spec.rangeBetween(start, end)
 
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index cdacc719aaca8..085a1a629634a 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -17,6 +17,8 @@
 
 # mypy: disable-error-code="empty-body"
 
+import sys
+import random
 from typing import (
     Any,
     Callable,
@@ -43,6 +45,7 @@
 from pyspark.sql.types import StructType, Row
 from pyspark.sql.utils import dispatch_df_method
 
+
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
     import pyarrow as pa
@@ -65,6 +68,7 @@
         ArrowMapIterFunction,
         DataFrameLike as PandasDataFrameLike,
     )
+    from pyspark.sql.plot import PySparkPlotAccessor
     from pyspark.sql.metrics import ExecutionInfo
 
 
@@ -1013,7 +1017,9 @@ def checkpoint(self, eager: bool = True) -> "DataFrame":
         """
         ...
 
-    def localCheckpoint(self, eager: bool = True) -> "DataFrame":
+    def localCheckpoint(
+        self, eager: bool = True, storageLevel: Optional[StorageLevel] = None
+    ) -> "DataFrame":
         """Returns a locally checkpointed version of this :class:`DataFrame`. Checkpointing can
         be used to truncate the logical plan of this :class:`DataFrame`, which is especially
         useful in iterative algorithms where the plan may grow exponentially. Local checkpoints
@@ -1024,12 +1030,17 @@ def localCheckpoint(self, eager: bool = True) -> "DataFrame":
 
         .. versionchanged:: 4.0.0
             Supports Spark Connect.
+            Added storageLevel parameter.
 
         Parameters
         ----------
         eager : bool, optional, default True
             Whether to checkpoint this :class:`DataFrame` immediately.
 
+        storageLevel : :class:`StorageLevel`, optional, default None
+            The StorageLevel with which the checkpoint will be stored.
+            If not specified, default for RDD local checkpoints.
+
         Returns
         -------
         :class:`DataFrame`
@@ -1332,7 +1343,7 @@ def offset(self, num: int) -> "DataFrame":
         .. versionadded:: 3.4.0
 
         .. versionchanged:: 3.5.0
-            Supports vanilla PySpark.
+            Supports classic PySpark.
 
         Parameters
         ----------
@@ -2038,6 +2049,46 @@ def sample(
         """
         ...
 
+    def _preapare_args_for_sample(
+        self,
+        withReplacement: Optional[Union[float, bool]] = None,
+        fraction: Optional[Union[int, float]] = None,
+        seed: Optional[int] = None,
+    ) -> Tuple[bool, float, int]:
+        from pyspark.errors import PySparkTypeError
+
+        if isinstance(withReplacement, bool) and isinstance(fraction, float):
+            # For the cases below:
+            #   sample(True, 0.5 [, seed])
+            #   sample(True, fraction=0.5 [, seed])
+            #   sample(withReplacement=False, fraction=0.5 [, seed])
+            _seed = int(seed) if seed is not None else random.randint(0, sys.maxsize)
+            return withReplacement, fraction, _seed
+
+        elif withReplacement is None and isinstance(fraction, float):
+            # For the case below:
+            #   sample(faction=0.5 [, seed])
+            _seed = int(seed) if seed is not None else random.randint(0, sys.maxsize)
+            return False, fraction, _seed
+
+        elif isinstance(withReplacement, float):
+            # For the case below:
+            #   sample(0.5 [, seed])
+            _seed = int(fraction) if fraction is not None else random.randint(0, sys.maxsize)
+            _fraction = float(withReplacement)
+            return False, _fraction, _seed
+
+        else:
+            argtypes = [type(arg).__name__ for arg in [withReplacement, fraction, seed]]
+            raise PySparkTypeError(
+                errorClass="NOT_BOOL_OR_FLOAT_OR_INT",
+                messageParameters={
+                    "arg_name": "withReplacement (optional), "
+                    + "fraction (required) and seed (optional)",
+                    "arg_type": ", ".join(argtypes),
+                },
+            )
+
     @dispatch_df_method
     def sampleBy(
         self, col: "ColumnOrName", fractions: Dict[Any, float], seed: Optional[int] = None
@@ -2889,6 +2940,62 @@ def sort(
         """
         ...
 
+    def _preapare_cols_for_sort(
+        self,
+        _to_col: Callable[[str], Column],
+        cols: Sequence[Union[int, str, Column, List[Union[int, str, Column]]]],
+        kwargs: Dict[str, Any],
+    ) -> Sequence[Column]:
+        from pyspark.errors import PySparkTypeError, PySparkValueError, PySparkIndexError
+
+        if not cols:
+            raise PySparkValueError(
+                errorClass="CANNOT_BE_EMPTY", messageParameters={"item": "cols"}
+            )
+
+        if len(cols) == 1 and isinstance(cols[0], list):
+            cols = cols[0]
+
+        _cols: List[Column] = []
+        for c in cols:
+            if isinstance(c, int) and not isinstance(c, bool):
+                # ordinal is 1-based
+                if c > 0:
+                    _cols.append(self[c - 1])
+                # negative ordinal means sort by desc
+                elif c < 0:
+                    _cols.append(self[-c - 1].desc())
+                else:
+                    raise PySparkIndexError(
+                        errorClass="ZERO_INDEX",
+                        messageParameters={},
+                    )
+            elif isinstance(c, Column):
+                _cols.append(c)
+            elif isinstance(c, str):
+                _cols.append(_to_col(c))
+            else:
+                raise PySparkTypeError(
+                    errorClass="NOT_COLUMN_OR_INT_OR_STR",
+                    messageParameters={
+                        "arg_name": "col",
+                        "arg_type": type(c).__name__,
+                    },
+                )
+
+        ascending = kwargs.get("ascending", True)
+        if isinstance(ascending, (bool, int)):
+            if not ascending:
+                _cols = [c.desc() for c in _cols]
+        elif isinstance(ascending, list):
+            _cols = [c if asc else c.desc() for asc, c in zip(ascending, _cols)]
+        else:
+            raise PySparkTypeError(
+                errorClass="NOT_COLUMN_OR_INT_OR_STR",
+                messageParameters={"arg_name": "ascending", "arg_type": type(ascending).__name__},
+            )
+        return _cols
+
     orderBy = sort
 
     @dispatch_df_method
@@ -3349,7 +3456,7 @@ def selectExpr(self, *expr: Union[str, List[str]]) -> "DataFrame":
         ...
 
     @dispatch_df_method
-    def filter(self, condition: "ColumnOrName") -> "DataFrame":
+    def filter(self, condition: Union[Column, str]) -> "DataFrame":
         """Filters rows using the given condition.
 
         :func:`where` is an alias for :func:`filter`.
@@ -4570,7 +4677,7 @@ def subtract(self, other: "DataFrame") -> "DataFrame":
         ...
 
     @dispatch_df_method
-    def dropDuplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
+    def dropDuplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         """Return a new :class:`DataFrame` with duplicate rows removed,
         optionally only considering certain columns.
 
@@ -4587,9 +4694,6 @@ def dropDuplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
         .. versionchanged:: 3.4.0
             Supports Spark Connect.
 
-        .. versionchanged:: 4.0.0
-            Supports variable-length argument
-
         Parameters
         ----------
         subset : list of column names, optional
@@ -4621,7 +4725,7 @@ def dropDuplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
 
         Deduplicate values on 'name' and 'height' columns.
 
-        >>> df.dropDuplicates('name', 'height').show()
+        >>> df.dropDuplicates(['name', 'height']).show()
         +-----+---+------+
         | name|age|height|
         +-----+---+------+
@@ -4631,7 +4735,7 @@ def dropDuplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
         ...
 
     @dispatch_df_method
-    def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> "DataFrame":
+    def dropDuplicatesWithinWatermark(self, subset: Optional[List[str]] = None) -> "DataFrame":
         """Return a new :class:`DataFrame` with duplicate rows removed,
          optionally only considering certain columns, within watermark.
 
@@ -4648,9 +4752,6 @@ def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> "Data
 
          .. versionadded:: 3.5.0
 
-         .. versionchanged:: 4.0.0
-            Supports variable-length argument
-
          Parameters
          ----------
          subset : List of column names, optional
@@ -4680,7 +4781,7 @@ def dropDuplicatesWithinWatermark(self, *subset: Union[str, List[str]]) -> "Data
 
          Deduplicate values on 'value' columns.
 
-         >>> df.dropDuplicatesWithinWatermark('value')  # doctest: +SKIP
+         >>> df.dropDuplicatesWithinWatermark(['value'])  # doctest: +SKIP
         """
         ...
 
@@ -4691,7 +4792,7 @@ def dropna(
         thresh: Optional[int] = None,
         subset: Optional[Union[str, Tuple[str, ...], List[str]]] = None,
     ) -> "DataFrame":
-        """Returns a new :class:`DataFrame` omitting rows with null values.
+        """Returns a new :class:`DataFrame` omitting rows with null or NaN values.
         :func:`DataFrame.dropna` and :func:`DataFrameNaFunctions.drop` are
         aliases of each other.
 
@@ -4720,50 +4821,50 @@ def dropna(
         --------
         >>> from pyspark.sql import Row
         >>> df = spark.createDataFrame([
-        ...     Row(age=10, height=80, name="Alice"),
-        ...     Row(age=5, height=None, name="Bob"),
+        ...     Row(age=10, height=80.0, name="Alice"),
+        ...     Row(age=5, height=float("nan"), name="Bob"),
         ...     Row(age=None, height=None, name="Tom"),
-        ...     Row(age=None, height=None, name=None),
+        ...     Row(age=None, height=float("nan"), name=None),
         ... ])
 
-        Example 1: Drop the row if it contains any nulls.
+        Example 1: Drop the row if it contains any null or NaN.
 
         >>> df.na.drop().show()
         +---+------+-----+
         |age|height| name|
         +---+------+-----+
-        | 10|    80|Alice|
+        | 10|  80.0|Alice|
         +---+------+-----+
 
-        Example 2: Drop the row only if all its values are null.
+        Example 2: Drop the row only if all its values are null or NaN.
 
         >>> df.na.drop(how='all').show()
         +----+------+-----+
         | age|height| name|
         +----+------+-----+
-        |  10|    80|Alice|
-        |   5|  NULL|  Bob|
+        |  10|  80.0|Alice|
+        |   5|   NaN|  Bob|
         |NULL|  NULL|  Tom|
         +----+------+-----+
 
-        Example 3: Drop rows that have less than `thresh` non-null values.
+        Example 3: Drop rows that have less than `thresh` non-null and non-NaN values.
 
         >>> df.na.drop(thresh=2).show()
         +---+------+-----+
         |age|height| name|
         +---+------+-----+
-        | 10|    80|Alice|
-        |  5|  NULL|  Bob|
+        | 10|  80.0|Alice|
+        |  5|   NaN|  Bob|
         +---+------+-----+
 
-        Example 4: Drop rows with non-null values in the specified columns.
+        Example 4: Drop rows with null and NaN values in the specified columns.
 
         >>> df.na.drop(subset=['age', 'name']).show()
         +---+------+-----+
         |age|height| name|
         +---+------+-----+
-        | 10|    80|Alice|
-        |  5|  NULL|  Bob|
+        | 10|  80.0|Alice|
+        |  5|   NaN|  Bob|
         +---+------+-----+
         """
         ...
@@ -5906,7 +6007,7 @@ def inputFiles(self) -> List[str]:
         ...
 
     @dispatch_df_method
-    def where(self, condition: "ColumnOrName") -> "DataFrame":
+    def where(self, condition: Union[Column, str]) -> "DataFrame":
         """
         :func:`where` is an alias for :func:`filter`.
 
@@ -5937,17 +6038,11 @@ def groupby(self, *cols: "ColumnOrNameOrOrdinal") -> "GroupedData":
         ...
 
     @dispatch_df_method
-    def drop_duplicates(self, *subset: Union[str, List[str]]) -> "DataFrame":
+    def drop_duplicates(self, subset: Optional[List[str]] = None) -> "DataFrame":
         """
         :func:`drop_duplicates` is an alias for :func:`dropDuplicates`.
 
         .. versionadded:: 1.4.0
-
-        .. versionchanged:: 3.4.0
-            Supports Spark Connect
-
-        .. versionchanged:: 4.0.0
-            Supports variable-length argument
         """
         ...
 
@@ -6180,10 +6275,6 @@ def mapInPandas(
         |  1| 21|
         +---+---+
 
-        Notes
-        -----
-        This API is experimental
-
         See Also
         --------
         pyspark.sql.functions.pandas_udf
@@ -6257,10 +6348,6 @@ def mapInArrow(
         |  1| 21|
         +---+---+
 
-        Notes
-        -----
-        This API is unstable, and for developers.
-
         See Also
         --------
         pyspark.sql.functions.pandas_udf
@@ -6323,12 +6410,245 @@ def toPandas(self) -> "PandasDataFrameLike":
         """
         ...
 
+    @dispatch_df_method
+    def transpose(self, indexColumn: Optional["ColumnOrName"] = None) -> "DataFrame":
+        """
+        Transposes a DataFrame such that the values in the specified index column become the new
+        columns of the DataFrame. If no index column is provided, the first column is used as
+        the default.
+
+        Please note:
+        - All columns except the index column must share a least common data type. Unless they
+        are the same data type, all columns are cast to the nearest common data type.
+        - The name of the column into which the original column names are transposed defaults
+        to "key".
+        - null values in the index column are excluded from the column names for the
+        transposed table, which are ordered in ascending order.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        indexColumn : str or :class:`Column`, optional
+            The single column that will be treated as the index for the transpose operation. This
+            column will be used to transform the DataFrame such that the values of the indexColumn
+            become the new columns in the transposed DataFrame. If not provided, the first column of
+            the DataFrame will be used as the default.
+
+        Returns
+        -------
+        :class:`DataFrame`
+            Transposed DataFrame.
+
+        Notes
+        -----
+        Supports Spark Connect.
+
+        Examples
+        --------
+        >>> df = spark.createDataFrame(
+        ...     [("A", 1, 2), ("B", 3, 4)],
+        ...     ["id", "val1", "val2"],
+        ... )
+        >>> df.show()
+        +---+----+----+
+        | id|val1|val2|
+        +---+----+----+
+        |  A|   1|   2|
+        |  B|   3|   4|
+        +---+----+----+
+
+        >>> df.transpose().show()
+        +----+---+---+
+        | key|  A|  B|
+        +----+---+---+
+        |val1|  1|  3|
+        |val2|  2|  4|
+        +----+---+---+
+
+        >>> df.transpose(df.id).show()
+        +----+---+---+
+        | key|  A|  B|
+        +----+---+---+
+        |val1|  1|  3|
+        |val2|  2|  4|
+        +----+---+---+
+        """
+        ...
+
+    def scalar(self) -> Column:
+        """
+        Return a `Column` object for a SCALAR Subquery containing exactly one row and one column.
+
+        The `scalar()` method is useful for extracting a `Column` object that represents a scalar
+        value from a DataFrame, especially when the DataFrame results from an aggregation or
+        single-value computation. This returned `Column` can then be used directly in `select`
+        clauses or as predicates in filters on the outer DataFrame, enabling dynamic data filtering
+        and calculations based on scalar values.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`Column`
+            A `Column` object representing a SCALAR subquery.
+
+        Examples
+        --------
+        Setup a sample DataFrame.
+
+        >>> data = [
+        ...     (1, "Alice", 45000, 101), (2, "Bob", 54000, 101), (3, "Charlie", 29000, 102),
+        ...     (4, "David", 61000, 102), (5, "Eve", 48000, 101),
+        ... ]
+        >>> employees = spark.createDataFrame(data, ["id", "name", "salary", "department_id"])
+
+        Example 1 (non-correlated): Filter for employees with salary greater than the average
+        salary.
+
+        >>> from pyspark.sql import functions as sf
+        >>> employees.where(
+        ...     sf.col("salary") > employees.select(sf.avg("salary")).scalar()
+        ... ).select("name", "salary", "department_id").show()
+        +-----+------+-------------+
+        | name|salary|department_id|
+        +-----+------+-------------+
+        |  Bob| 54000|          101|
+        |David| 61000|          102|
+        |  Eve| 48000|          101|
+        +-----+------+-------------+
+
+        Example 2 (correlated): Filter for employees with salary greater than the average salary
+        in their department.
+
+        >>> from pyspark.sql import functions as sf
+        >>> employees.alias("e1").where(
+        ...     sf.col("salary")
+        ...     > employees.alias("e2").where(
+        ...         sf.col("e2.department_id") == sf.col("e1.department_id").outer()
+        ...     ).select(sf.avg("salary")).scalar()
+        ... ).select("name", "salary", "department_id").show()
+        +-----+------+-------------+
+        | name|salary|department_id|
+        +-----+------+-------------+
+        |  Bob| 54000|          101|
+        |David| 61000|          102|
+        +-----+------+-------------+
+
+        Example 3 (in select): Select the name, salary, and the proportion of the salary in the
+        department.
+
+        >>> from pyspark.sql import functions as sf
+        >>> employees.alias("e1").select(
+        ...     "name", "salary", "department_id",
+        ...     sf.format_number(
+        ...         sf.lit(100) * sf.col("salary") /
+        ...             employees.alias("e2").where(
+        ...                 sf.col("e2.department_id") == sf.col("e1.department_id").outer()
+        ...             ).select(sf.sum("salary")).scalar().alias("avg_salary"),
+        ...         1
+        ...     ).alias("salary_proportion_in_department")
+        ... ).show()
+        +-------+------+-------------+-------------------------------+
+        |   name|salary|department_id|salary_proportion_in_department|
+        +-------+------+-------------+-------------------------------+
+        |  Alice| 45000|          101|                           30.6|
+        |    Bob| 54000|          101|                           36.7|
+        |Charlie| 29000|          102|                           32.2|
+        |    Eve| 48000|          101|                           32.7|
+        |  David| 61000|          102|                           67.8|
+        +-------+------+-------------+-------------------------------+
+        """
+        ...
+
+    def exists(self) -> Column:
+        """
+        Return a `Column` object for an EXISTS Subquery.
+
+        The `exists` method provides a way to create a boolean column that checks for the presence
+        of related records in a subquery. When applied within a `DataFrame`, this method allows you
+        to filter rows based on whether matching records exist in the related dataset. The resulting
+        `Column` object can be used directly in filtering conditions or as a computed column.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`Column`
+            A `Column` object representing an EXISTS subquery
+
+        Examples
+        --------
+        Setup sample data for customers and orders.
+
+        >>> data_customers = [
+        ...     (101, "Alice", "USA"), (102, "Bob", "Canada"), (103, "Charlie", "USA"),
+        ...     (104, "David", "Australia")
+        ... ]
+        >>> data_orders = [
+        ...     (1, 101, "2023-01-15", 250), (2, 102, "2023-01-20", 300),
+        ...     (3, 103, "2023-01-25", 400), (4, 101, "2023-02-05", 150)
+        ... ]
+        >>> customers = spark.createDataFrame(
+        ...     data_customers, ["customer_id", "customer_name", "country"])
+        >>> orders = spark.createDataFrame(
+        ...     data_orders, ["order_id", "customer_id", "order_date", "total_amount"])
+
+        Example 1: Filter for customers who have placed at least one order.
+
+        >>> from pyspark.sql import functions as sf
+        >>> customers.alias("c").where(
+        ...     orders.alias("o").where(
+        ...         sf.col("o.customer_id") == sf.col("c.customer_id").outer()
+        ...     ).exists()
+        ... ).orderBy("customer_id").show()
+        +-----------+-------------+-------+
+        |customer_id|customer_name|country|
+        +-----------+-------------+-------+
+        |        101|        Alice|    USA|
+        |        102|          Bob| Canada|
+        |        103|      Charlie|    USA|
+        +-----------+-------------+-------+
+
+        Example 2: Filter for customers who have never placed an order.
+
+        >>> from pyspark.sql import functions as sf
+        >>> customers.alias("c").where(
+        ...     ~orders.alias("o").where(
+        ...         sf.col("o.customer_id") == sf.col("c.customer_id").outer()
+        ...     ).exists()
+        ... ).orderBy("customer_id").show()
+        +-----------+-------------+---------+
+        |customer_id|customer_name|  country|
+        +-----------+-------------+---------+
+        |        104|        David|Australia|
+        +-----------+-------------+---------+
+
+        Example 3: Find Orders from Customers in the USA.
+
+        >>> from pyspark.sql import functions as sf
+        >>> orders.alias("o").where(
+        ...     customers.alias("c").where(
+        ...         (sf.col("c.customer_id") == sf.col("o.customer_id").outer())
+        ...         & (sf.col("country") == "USA")
+        ...     ).exists()
+        ... ).orderBy("order_id").show()
+        +--------+-----------+----------+------------+
+        |order_id|customer_id|order_date|total_amount|
+        +--------+-----------+----------+------------+
+        |       1|        101|2023-01-15|         250|
+        |       3|        103|2023-01-25|         400|
+        |       4|        101|2023-02-05|         150|
+        +--------+-----------+----------+------------+
+        """
+        ...
+
     @property
     def executionInfo(self) -> Optional["ExecutionInfo"]:
         """
-        Returns a QueryExecution object after the query was executed.
+        Returns a ExecutionInfo object after the query was executed.
 
-        The queryExecution method allows to introspect information about the actual
+        The executionInfo method allows to introspect information about the actual
         query execution after the successful execution. Accessing this member before
         the query execution will return None.
 
@@ -6339,7 +6659,7 @@ def executionInfo(self) -> Optional["ExecutionInfo"]:
 
         Returns
         -------
-        An instance of QueryExecution or None when the value is not set yet.
+        An instance of ExecutionInfo or None when the value is not set yet.
 
         Notes
         -----
@@ -6348,6 +6668,32 @@ def executionInfo(self) -> Optional["ExecutionInfo"]:
         """
         ...
 
+    @property
+    def plot(self) -> "PySparkPlotAccessor":
+        """
+        Returns a :class:`PySparkPlotAccessor` for plotting functions.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`PySparkPlotAccessor`
+
+        Notes
+        -----
+        This API is experimental.
+
+        Examples
+        --------
+        >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
+        >>> columns = ["category", "int_val", "float_val"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> type(df.plot)
+        <class 'pyspark.sql.plot.core.PySparkPlotAccessor'>
+        >>> df.plot.line(x="category", y=["int_val", "float_val"])  # doctest: +SKIP
+        """
+        ...
+
 
 class DataFrameNaFunctions:
     """Functionality for working with missing data in :class:`DataFrame`.
diff --git a/python/pyspark/sql/datasource.py b/python/pyspark/sql/datasource.py
index 8ea36bb04fb68..a51c96a9d178f 100644
--- a/python/pyspark/sql/datasource.py
+++ b/python/pyspark/sql/datasource.py
@@ -23,9 +23,9 @@
 from pyspark.errors import PySparkNotImplementedError
 
 if TYPE_CHECKING:
+    from pyarrow import RecordBatch
     from pyspark.sql.session import SparkSession
 
-
 __all__ = [
     "DataSource",
     "DataSourceReader",
@@ -115,8 +115,8 @@ def schema(self) -> Union[StructType, str]:
         ...   return StructType().add("a", "int").add("b", "string")
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "schema"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "schema"},
         )
 
     def reader(self, schema: StructType) -> "DataSourceReader":
@@ -136,8 +136,8 @@ def reader(self, schema: StructType) -> "DataSourceReader":
             A reader instance for this data source.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "reader"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "reader"},
         )
 
     def writer(self, schema: StructType, overwrite: bool) -> "DataSourceWriter":
@@ -159,8 +159,8 @@ def writer(self, schema: StructType, overwrite: bool) -> "DataSourceWriter":
             A writer instance for this data source.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "writer"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "writer"},
         )
 
     def streamWriter(self, schema: StructType, overwrite: bool) -> "DataSourceStreamWriter":
@@ -182,8 +182,8 @@ def streamWriter(self, schema: StructType, overwrite: bool) -> "DataSourceStream
             A writer instance for writing data into a streaming sink.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "streamWriter"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "streamWriter"},
         )
 
     def simpleStreamReader(self, schema: StructType) -> "SimpleDataSourceStreamReader":
@@ -206,8 +206,8 @@ def simpleStreamReader(self, schema: StructType) -> "SimpleDataSourceStreamReade
             A reader instance for this data source.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "simpleStreamReader"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "simpleStreamReader"},
         )
 
     def streamReader(self, schema: StructType) -> "DataSourceStreamReader":
@@ -228,8 +228,8 @@ def streamReader(self, schema: StructType) -> "DataSourceStreamReader":
             A reader instance for this streaming data source.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "streamReader"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "streamReader"},
         )
 
 
@@ -328,12 +328,12 @@ def partitions(self) -> Sequence[InputPartition]:
         ...     return [RangeInputPartition(1, 3), RangeInputPartition(5, 10)]
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "partitions"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "partitions"},
         )
 
     @abstractmethod
-    def read(self, partition: InputPartition) -> Iterator[Tuple]:
+    def read(self, partition: InputPartition) -> Union[Iterator[Tuple], Iterator["RecordBatch"]]:
         """
         Generates data for a given partition and returns an iterator of tuples or rows.
 
@@ -350,9 +350,11 @@ def read(self, partition: InputPartition) -> Iterator[Tuple]:
 
         Returns
         -------
-        iterator of tuples or :class:`Row`\\s
+        iterator of tuples or PyArrow's `RecordBatch`
             An iterator of tuples or rows. Each tuple or row will be converted to a row
             in the final DataFrame.
+            It can also return an iterator of PyArrow's `RecordBatch` if the data source
+            supports it.
 
         Examples
         --------
@@ -398,8 +400,8 @@ def initialOffset(self) -> dict:
         ...     return {"parititon-1": {"index": 3, "closed": True}, "partition-2": {"index": 5}}
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "initialOffset"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "initialOffset"},
         )
 
     def latestOffset(self) -> dict:
@@ -418,8 +420,8 @@ def latestOffset(self) -> dict:
         ...     return {"parititon-1": {"index": 3, "closed": True}, "partition-2": {"index": 5}}
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "latestOffset"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "latestOffset"},
         )
 
     def partitions(self, start: dict, end: dict) -> Sequence[InputPartition]:
@@ -443,12 +445,12 @@ def partitions(self, start: dict, end: dict) -> Sequence[InputPartition]:
             must be an instance of `InputPartition` or a subclass of it.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "partitions"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "partitions"},
         )
 
     @abstractmethod
-    def read(self, partition: InputPartition) -> Iterator[Tuple]:
+    def read(self, partition: InputPartition) -> Union[Iterator[Tuple], Iterator["RecordBatch"]]:
         """
         Generates data for a given partition and returns an iterator of tuples or rows.
 
@@ -470,13 +472,15 @@ def read(self, partition: InputPartition) -> Iterator[Tuple]:
 
         Returns
         -------
-        iterator of tuples or :class:`Row`\\s
+        iterator of tuples or PyArrow's `RecordBatch`
             An iterator of tuples or rows. Each tuple or row will be converted to a row
             in the final DataFrame.
+            It can also return an iterator of PyArrow's `RecordBatch` if the data source
+            supports it.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "read"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "read"},
         )
 
     def commit(self, end: dict) -> None:
@@ -534,8 +538,8 @@ def initialOffset(self) -> dict:
         ...     return {"parititon-1": {"index": 3, "closed": True}, "partition-2": {"index": 5}}
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "initialOffset"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "initialOffset"},
         )
 
     def read(self, start: dict) -> Tuple[Iterator[Tuple], dict]:
@@ -555,8 +559,8 @@ def read(self, start: dict) -> Tuple[Iterator[Tuple], dict]:
             The dict is the end offset of this read attempt and the start of next read attempt.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "read"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "read"},
         )
 
     def readBetweenOffsets(self, start: dict, end: dict) -> Iterator[Tuple]:
@@ -578,8 +582,8 @@ def readBetweenOffsets(self, start: dict, end: dict) -> Iterator[Tuple]:
             All the records between start offset and end offset.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "readBetweenOffsets"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "readBetweenOffsets"},
         )
 
     def commit(self, end: dict) -> None:
diff --git a/python/pyspark/sql/functions/builtin.py b/python/pyspark/sql/functions/builtin.py
index 1c60b1b4cb3e1..11011c2f30252 100644
--- a/python/pyspark/sql/functions/builtin.py
+++ b/python/pyspark/sql/functions/builtin.py
@@ -27,8 +27,8 @@
     Any,
     cast,
     Callable,
-    Dict,
-    List,
+    Mapping,
+    Sequence,
     Iterable,
     overload,
     Optional,
@@ -40,9 +40,18 @@
 )
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.errors.utils import _with_origin
 from pyspark.sql.column import Column
-from pyspark.sql.dataframe import DataFrame
-from pyspark.sql.types import ArrayType, DataType, StringType, StructType, _from_numpy_type
+from pyspark.sql.dataframe import DataFrame as ParentDataFrame
+from pyspark.sql.types import (
+    ArrayType,
+    ByteType,
+    DataType,
+    StringType,
+    StructType,
+    NumericType,
+    _from_numpy_type,
+)
 
 # Keep UserDefinedFunction import for backwards compatible import; moved in SPARK-22409
 from pyspark.sql.udf import UserDefinedFunction, _create_py_udf  # noqa: F401
@@ -59,13 +68,13 @@
     has_numpy as _has_numpy,
     try_remote_functions as _try_remote_functions,
     get_active_spark_context as _get_active_spark_context,
+    enum_to_value as _enum_to_value,
 )
 
 if TYPE_CHECKING:
     from pyspark import SparkContext
     from pyspark.sql._typing import (
         ColumnOrName,
-        ColumnOrName_,
         DataTypeOrString,
         UserDefinedFunctionLike,
     )
@@ -137,7 +146,7 @@ def _invoke_binary_math_function(name: str, col1: Any, col2: Any) -> Column:
     return _invoke_function(name, *cols)
 
 
-def _options_to_str(options: Optional[Dict[str, Any]] = None) -> Dict[str, Optional[str]]:
+def _options_to_str(options: Optional[Mapping[str, Any]] = None) -> Mapping[str, Optional[str]]:
     if options:
         return {key: _to_str(value) for (key, value) in options.items()}
     return {}
@@ -212,24 +221,80 @@ def lit(col: Any) -> Column:
     |      false|     Yes|
     |      false|      No|
     +-----------+--------+
+
+    Example 5: Creating literal columns from Numpy scalar.
+
+    >>> from pyspark.sql import functions as sf
+    >>> import numpy as np # doctest: +SKIP
+    >>> spark.range(1).select(
+    ...     sf.lit(np.bool_(True)),
+    ...     sf.lit(np.int64(123)),
+    ...     sf.lit(np.float64(0.456)),
+    ...     sf.lit(np.str_("xyz"))
+    ... ).show() # doctest: +SKIP
+    +----+---+-----+---+
+    |true|123|0.456|xyz|
+    +----+---+-----+---+
+    |true|123|0.456|xyz|
+    +----+---+-----+---+
+
+    Example 6: Creating literal columns from Numpy ndarray.
+
+    >>> from pyspark.sql import functions as sf
+    >>> import numpy as np # doctest: +SKIP
+    >>> spark.range(1).select(
+    ...     sf.lit(np.array([True, False], np.bool_)),
+    ...     sf.lit(np.array([], np.int8)),
+    ...     sf.lit(np.array([1.5, 0.1], np.float64)),
+    ...     sf.lit(np.array(["a", "b", "c"], np.str_)),
+    ... ).show() # doctest: +SKIP
+    +------------------+-------+-----------------+--------------------+
+    |ARRAY(true, false)|ARRAY()|ARRAY(1.5D, 0.1D)|ARRAY('a', 'b', 'c')|
+    +------------------+-------+-----------------+--------------------+
+    |     [true, false]|     []|       [1.5, 0.1]|           [a, b, c]|
+    +------------------+-------+-----------------+--------------------+
     """
     if isinstance(col, Column):
         return col
     elif isinstance(col, list):
         if any(isinstance(c, Column) for c in col):
             raise PySparkValueError(
-                error_class="COLUMN_IN_LIST", message_parameters={"func_name": "lit"}
+                errorClass="COLUMN_IN_LIST", messageParameters={"func_name": "lit"}
             )
         return array(*[lit(item) for item in col])
-    else:
-        if _has_numpy and isinstance(col, np.generic):
+    elif _has_numpy:
+        if isinstance(col, np.generic):
             dt = _from_numpy_type(col.dtype)
-            if dt is not None:
+            if dt is None:
+                raise PySparkTypeError(
+                    errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+                    messageParameters={"dtype": col.dtype.name},
+                )
+            if isinstance(dt, NumericType):
+                # NumpyScalarConverter for Py4J converts numeric scalar to Python scalar.
+                # E.g. numpy.int64(1) is converted to int(1).
+                # So, we need to cast it back to the original type.
                 return _invoke_function("lit", col).astype(dt).alias(str(col))
-        return _invoke_function("lit", col)
-
-
-@_try_remote_functions
+            else:
+                return _invoke_function("lit", col)
+        elif isinstance(col, np.ndarray) and col.ndim == 1:
+            dt = _from_numpy_type(col.dtype)
+            if dt is None:
+                raise PySparkTypeError(
+                    errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+                    messageParameters={"dtype": col.dtype.name},
+                )
+            if isinstance(dt, ByteType):
+                # NumpyArrayConverter for Py4J converts Array[Byte] to Array[Short].
+                # Cast it back to ByteType.
+                return _invoke_function("lit", col).cast(ArrayType(dt))
+            else:
+                return _invoke_function("lit", col)
+    return _invoke_function("lit", _enum_to_value(col))
+
+
+@_try_remote_functions
+@_with_origin
 def col(col: str) -> Column:
     """
     Returns a :class:`~pyspark.sql.Column` based on the given column name.
@@ -241,7 +306,7 @@ def col(col: str) -> Column:
 
     Parameters
     ----------
-    col : str
+    col : column name
         the name for the column
 
     Returns
@@ -275,7 +340,7 @@ def asc(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Target column to sort by in the ascending order.
 
     Returns
@@ -283,13 +348,18 @@ def asc(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         The column specifying the sort order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.asc_nulls_first`
+    :meth:`pyspark.sql.functions.asc_nulls_last`
+
     Examples
     --------
     Example 1: Sort DataFrame by 'id' column in ascending order.
 
-    >>> from pyspark.sql.functions import asc
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(4, 'B'), (3, 'A'), (2, 'C')], ['id', 'value'])
-    >>> df.sort(asc("id")).show()
+    >>> df.sort(sf.asc("id")).show()
     +---+-----+
     | id|value|
     +---+-----+
@@ -300,9 +370,9 @@ def asc(col: "ColumnOrName") -> Column:
 
     Example 2: Use `asc` in `orderBy` function to sort the DataFrame.
 
-    >>> from pyspark.sql.functions import asc
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(4, 'B'), (3, 'A'), (2, 'C')], ['id', 'value'])
-    >>> df.orderBy(asc("value")).show()
+    >>> df.orderBy(sf.asc("value")).show()
     +---+-----+
     | id|value|
     +---+-----+
@@ -313,11 +383,11 @@ def asc(col: "ColumnOrName") -> Column:
 
     Example 3: Combine `asc` with `desc` to sort by multiple columns.
 
-    >>> from pyspark.sql.functions import asc, desc
-    >>> df = spark.createDataFrame([(2, 'A', 4),
-    ...                             (1, 'B', 3),
-    ...                             (3, 'A', 2)], ['id', 'group', 'value'])
-    >>> df.sort(asc("group"), desc("value")).show()
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(2, 'A', 4), (1, 'B', 3), (3, 'A', 2)],
+    ...     ['id', 'group', 'value'])
+    >>> df.sort(sf.asc("group"), sf.desc("value")).show()
     +---+-----+-----+
     | id|group|value|
     +---+-----+-----+
@@ -354,7 +424,7 @@ def desc(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Target column to sort by in the descending order.
 
     Returns
@@ -362,13 +432,18 @@ def desc(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         The column specifying the sort order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.desc_nulls_first`
+    :meth:`pyspark.sql.functions.desc_nulls_last`
+
     Examples
     --------
     Example 1: Sort DataFrame by 'id' column in descending order.
 
-    >>> from pyspark.sql.functions import desc
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(4, 'B'), (3, 'A'), (2, 'C')], ['id', 'value'])
-    >>> df.sort(desc("id")).show()
+    >>> df.sort(sf.desc("id")).show()
     +---+-----+
     | id|value|
     +---+-----+
@@ -379,9 +454,9 @@ def desc(col: "ColumnOrName") -> Column:
 
     Example 2: Use `desc` in `orderBy` function to sort the DataFrame.
 
-    >>> from pyspark.sql.functions import desc
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(4, 'B'), (3, 'A'), (2, 'C')], ['id', 'value'])
-    >>> df.orderBy(desc("value")).show()
+    >>> df.orderBy(sf.desc("value")).show()
     +---+-----+
     | id|value|
     +---+-----+
@@ -392,11 +467,11 @@ def desc(col: "ColumnOrName") -> Column:
 
     Example 3: Combine `asc` with `desc` to sort by multiple columns.
 
-    >>> from pyspark.sql.functions import asc, desc
-    >>> df = spark.createDataFrame([(2, 'A', 4),
-    ...                             (1, 'B', 3),
-    ...                             (3, 'A', 2)], ['id', 'group', 'value'])
-    >>> df.sort(desc("group"), asc("value")).show()
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(2, 'A', 4), (1, 'B', 3), (3, 'A', 2)],
+    ...     ['id', 'group', 'value'])
+    >>> df.sort(sf.desc("group"), sf.asc("value")).show()
     +---+-----+-----+
     | id|group|value|
     +---+-----+-----+
@@ -432,7 +507,7 @@ def sqrt(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -442,13 +517,19 @@ def sqrt(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(sqrt(lit(4))).show()
-    +-------+
-    |SQRT(4)|
-    +-------+
-    |    2.0|
-    +-------+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-1), (0), (1), (4), (NULL) AS TAB(value)"
+    ... ).select("*", sf.sqrt("value")).show()
+    +-----+-----------+
+    |value|SQRT(value)|
+    +-----+-----------+
+    |   -1|        NaN|
+    |    0|        0.0|
+    |    1|        1.0|
+    |    4|        2.0|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("sqrt", col)
 
@@ -463,8 +544,8 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
-    right : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
+    right : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
@@ -473,49 +554,49 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [(1982, 15), (1990, 2)], ["birth", "age"]
-    ... ).select(sf.try_add("birth", "age")).show()
-    +-------------------+
-    |try_add(birth, age)|
-    +-------------------+
-    |               1997|
-    |               1992|
-    +-------------------+
+    ... ).select("*", sf.try_add("birth", "age")).show()
+    +-----+---+-------------------+
+    |birth|age|try_add(birth, age)|
+    +-----+---+-------------------+
+    | 1982| 15|               1997|
+    | 1990|  2|               1992|
+    +-----+---+-------------------+
 
     Example 2: Date plus Integer.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
     ...     "SELECT * FROM VALUES (DATE('2015-09-30')) AS TAB(date)"
-    ... ).select(sf.try_add("date", sf.lit(1))).show()
-    +----------------+
-    |try_add(date, 1)|
-    +----------------+
-    |      2015-10-01|
-    +----------------+
+    ... ).select("*", sf.try_add("date", sf.lit(1))).show()
+    +----------+----------------+
+    |      date|try_add(date, 1)|
+    +----------+----------------+
+    |2015-09-30|      2015-10-01|
+    +----------+----------------+
 
     Example 3: Date plus Interval.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
-    ...     "SELECT * FROM VALUES (DATE('2015-09-30'), INTERVAL 1 YEAR) AS TAB(date, i)"
-    ... ).select(sf.try_add("date", "i")).show()
-    +----------------+
-    |try_add(date, i)|
-    +----------------+
-    |      2016-09-30|
-    +----------------+
+    ...     "SELECT * FROM VALUES (DATE('2015-09-30'), INTERVAL 1 YEAR) AS TAB(date, itvl)"
+    ... ).select("*", sf.try_add("date", "itvl")).show()
+    +----------+-----------------+-------------------+
+    |      date|             itvl|try_add(date, itvl)|
+    +----------+-----------------+-------------------+
+    |2015-09-30|INTERVAL '1' YEAR|         2016-09-30|
+    +----------+-----------------+-------------------+
 
     Example 4: Interval plus Interval.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
-    ...     "SELECT * FROM VALUES (INTERVAL 1 YEAR, INTERVAL 2 YEAR) AS TAB(i, j)"
-    ... ).select(sf.try_add("i", "j")).show()
-    +-----------------+
-    |    try_add(i, j)|
-    +-----------------+
-    |INTERVAL '3' YEAR|
-    +-----------------+
+    ...     "SELECT * FROM VALUES (INTERVAL 1 YEAR, INTERVAL 2 YEAR) AS TAB(itvl1, itvl2)"
+    ... ).select("*", sf.try_add("itvl1", "itvl2")).show()
+    +-----------------+-----------------+---------------------+
+    |            itvl1|            itvl2|try_add(itvl1, itvl2)|
+    +-----------------+-----------------+---------------------+
+    |INTERVAL '1' YEAR|INTERVAL '2' YEAR|    INTERVAL '3' YEAR|
+    +-----------------+-----------------+---------------------+
 
     Example 5: Overflow results in NULL when ANSI mode is on
 
@@ -523,8 +604,7 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
     >>> spark.conf.set("spark.sql.ansi.enabled", "true")
     >>> try:
-    ...     df = spark.range(1)
-    ...     df.select(sf.try_add(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
+    ...     spark.range(1).select(sf.try_add(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
     ... finally:
     ...     spark.conf.set("spark.sql.ansi.enabled", origin)
     +-------------------------------------------------+
@@ -545,7 +625,7 @@ def try_avg(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
@@ -602,9 +682,9 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
         dividend
-    right : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or column name
         divisor
 
     Examples
@@ -614,29 +694,28 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [(6000, 15), (1990, 2), (1234, 0)], ["a", "b"]
-    ... ).select(sf.try_divide("a", "b")).show()
-    +----------------+
-    |try_divide(a, b)|
-    +----------------+
-    |           400.0|
-    |           995.0|
-    |            NULL|
-    +----------------+
+    ... ).select("*", sf.try_divide("a", "b")).show()
+    +----+---+----------------+
+    |   a|  b|try_divide(a, b)|
+    +----+---+----------------+
+    |6000| 15|           400.0|
+    |1990|  2|           995.0|
+    |1234|  0|            NULL|
+    +----+---+----------------+
 
     Example 2: Interval divided by Integer.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.range(4).select(
-    ...     sf.try_divide(sf.make_interval(sf.lit(1)), "id")
-    ... ).show()
-    +--------------------------------------------------+
-    |try_divide(make_interval(1, 0, 0, 0, 0, 0, 0), id)|
-    +--------------------------------------------------+
-    |                                              NULL|
-    |                                           1 years|
-    |                                          6 months|
-    |                                          4 months|
-    +--------------------------------------------------+
+    >>> df = spark.range(4).select(sf.make_interval(sf.lit(1)).alias("itvl"), "id")
+    >>> df.select("*", sf.try_divide("itvl", "id")).show()
+    +-------+---+--------------------+
+    |   itvl| id|try_divide(itvl, id)|
+    +-------+---+--------------------+
+    |1 years|  0|                NULL|
+    |1 years|  1|             1 years|
+    |1 years|  2|            6 months|
+    |1 years|  3|            4 months|
+    +-------+---+--------------------+
 
     Example 3: Exception during division, resulting in NULL when ANSI mode is on
 
@@ -644,8 +723,7 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
     >>> spark.conf.set("spark.sql.ansi.enabled", "true")
     >>> try:
-    ...     df = spark.range(1)
-    ...     df.select(sf.try_divide(df.id, sf.lit(0))).show()
+    ...     spark.range(1).select(sf.try_divide("id", sf.lit(0))).show()
     ... finally:
     ...     spark.conf.set("spark.sql.ansi.enabled", origin)
     +-----------------+
@@ -667,9 +745,9 @@ def try_mod(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
         dividend
-    right : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or column name
         divisor
 
     Examples
@@ -679,14 +757,14 @@ def try_mod(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [(6000, 15), (3, 2), (1234, 0)], ["a", "b"]
-    ... ).select(sf.try_mod("a", "b")).show()
-    +-------------+
-    |try_mod(a, b)|
-    +-------------+
-    |            0|
-    |            1|
-    |         NULL|
-    +-------------+
+    ... ).select("*", sf.try_mod("a", "b")).show()
+    +----+---+-------------+
+    |   a|  b|try_mod(a, b)|
+    +----+---+-------------+
+    |6000| 15|            0|
+    |   3|  2|            1|
+    |1234|  0|         NULL|
+    +----+---+-------------+
 
     Example 2: Exception during division, resulting in NULL when ANSI mode is on
 
@@ -694,8 +772,7 @@ def try_mod(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
     >>> spark.conf.set("spark.sql.ansi.enabled", "true")
     >>> try:
-    ...     df = spark.range(1)
-    ...     df.select(sf.try_mod(df.id, sf.lit(0))).show()
+    ...     spark.range(1).select(sf.try_mod("id", sf.lit(0))).show()
     ... finally:
     ...     spark.conf.set("spark.sql.ansi.enabled", origin)
     +--------------+
@@ -717,9 +794,9 @@ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
         multiplicand
-    right : :class:`~pyspark.sql.Column` or str
+    right : :class:`~pyspark.sql.Column` or column name
         multiplier
 
     Examples
@@ -729,30 +806,29 @@ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [(6000, 15), (1990, 2)], ["a", "b"]
-    ... ).select(sf.try_multiply("a", "b")).show()
-    +------------------+
-    |try_multiply(a, b)|
-    +------------------+
-    |             90000|
-    |              3980|
-    +------------------+
+    ... ).select("*", sf.try_multiply("a", "b")).show()
+    +----+---+------------------+
+    |   a|  b|try_multiply(a, b)|
+    +----+---+------------------+
+    |6000| 15|             90000|
+    |1990|  2|              3980|
+    +----+---+------------------+
 
     Example 2: Interval multiplied by Integer.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.range(6).select(
-    ...     sf.try_multiply(sf.make_interval(sf.lit(0), sf.lit(3)), "id")
-    ... ).show()
-    +----------------------------------------------------+
-    |try_multiply(make_interval(0, 3, 0, 0, 0, 0, 0), id)|
-    +----------------------------------------------------+
-    |                                           0 seconds|
-    |                                            3 months|
-    |                                            6 months|
-    |                                            9 months|
-    |                                             1 years|
-    |                                    1 years 3 months|
-    +----------------------------------------------------+
+    >>> df = spark.range(6).select(sf.make_interval(sf.col("id"), sf.lit(3)).alias("itvl"), "id")
+    >>> df.select("*", sf.try_multiply("itvl", "id")).show()
+    +----------------+---+----------------------+
+    |            itvl| id|try_multiply(itvl, id)|
+    +----------------+---+----------------------+
+    |        3 months|  0|             0 seconds|
+    |1 years 3 months|  1|      1 years 3 months|
+    |2 years 3 months|  2|      4 years 6 months|
+    |3 years 3 months|  3|      9 years 9 months|
+    |4 years 3 months|  4|              17 years|
+    |5 years 3 months|  5|     26 years 3 months|
+    +----------------+---+----------------------+
 
     Example 3: Overflow results in NULL when ANSI mode is on
 
@@ -760,8 +836,7 @@ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
     >>> spark.conf.set("spark.sql.ansi.enabled", "true")
     >>> try:
-    ...     df = spark.range(1)
-    ...     df.select(sf.try_multiply(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
+    ...     spark.range(1).select(sf.try_multiply(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
     ... finally:
     ...     spark.conf.set("spark.sql.ansi.enabled", origin)
     +------------------------------------------------------+
@@ -783,8 +858,8 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    left : :class:`~pyspark.sql.Column` or str
-    right : :class:`~pyspark.sql.Column` or str
+    left : :class:`~pyspark.sql.Column` or column name
+    right : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
@@ -793,49 +868,49 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
     ...     [(1982, 15), (1990, 2)], ["birth", "age"]
-    ... ).select(sf.try_subtract("birth", "age")).show()
-    +------------------------+
-    |try_subtract(birth, age)|
-    +------------------------+
-    |                    1967|
-    |                    1988|
-    +------------------------+
+    ... ).select("*", sf.try_subtract("birth", "age")).show()
+    +-----+---+------------------------+
+    |birth|age|try_subtract(birth, age)|
+    +-----+---+------------------------+
+    | 1982| 15|                    1967|
+    | 1990|  2|                    1988|
+    +-----+---+------------------------+
 
     Example 2: Date minus Integer.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
     ...     "SELECT * FROM VALUES (DATE('2015-10-01')) AS TAB(date)"
-    ... ).select(sf.try_subtract("date", sf.lit(1))).show()
-    +---------------------+
-    |try_subtract(date, 1)|
-    +---------------------+
-    |           2015-09-30|
-    +---------------------+
+    ... ).select("*", sf.try_subtract("date", sf.lit(1))).show()
+    +----------+---------------------+
+    |      date|try_subtract(date, 1)|
+    +----------+---------------------+
+    |2015-10-01|           2015-09-30|
+    +----------+---------------------+
 
     Example 3: Date minus Interval.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
-    ...     "SELECT * FROM VALUES (DATE('2015-09-30'), INTERVAL 1 YEAR) AS TAB(date, i)"
-    ... ).select(sf.try_subtract("date", "i")).show()
-    +---------------------+
-    |try_subtract(date, i)|
-    +---------------------+
-    |           2014-09-30|
-    +---------------------+
+    ...     "SELECT * FROM VALUES (DATE('2015-09-30'), INTERVAL 1 YEAR) AS TAB(date, itvl)"
+    ... ).select("*", sf.try_subtract("date", "itvl")).show()
+    +----------+-----------------+------------------------+
+    |      date|             itvl|try_subtract(date, itvl)|
+    +----------+-----------------+------------------------+
+    |2015-09-30|INTERVAL '1' YEAR|              2014-09-30|
+    +----------+-----------------+------------------------+
 
     Example 4: Interval minus Interval.
 
     >>> import pyspark.sql.functions as sf
     >>> spark.sql(
-    ...     "SELECT * FROM VALUES (INTERVAL 1 YEAR, INTERVAL 2 YEAR) AS TAB(i, j)"
-    ... ).select(sf.try_subtract("i", "j")).show()
-    +------------------+
-    |try_subtract(i, j)|
-    +------------------+
-    |INTERVAL '-1' YEAR|
-    +------------------+
+    ...     "SELECT * FROM VALUES (INTERVAL 1 YEAR, INTERVAL 2 YEAR) AS TAB(itvl1, itvl2)"
+    ... ).select("*", sf.try_subtract("itvl1", "itvl2")).show()
+    +-----------------+-----------------+--------------------------+
+    |            itvl1|            itvl2|try_subtract(itvl1, itvl2)|
+    +-----------------+-----------------+--------------------------+
+    |INTERVAL '1' YEAR|INTERVAL '2' YEAR|        INTERVAL '-1' YEAR|
+    +-----------------+-----------------+--------------------------+
 
     Example 5: Overflow results in NULL when ANSI mode is on
 
@@ -843,8 +918,7 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
     >>> origin = spark.conf.get("spark.sql.ansi.enabled")
     >>> spark.conf.set("spark.sql.ansi.enabled", "true")
     >>> try:
-    ...     df = spark.range(1)
-    ...     df.select(sf.try_subtract(sf.lit(-sys.maxsize), sf.lit(sys.maxsize))).show()
+    ...     spark.range(1).select(sf.try_subtract(sf.lit(-sys.maxsize), sf.lit(sys.maxsize))).show()
     ... finally:
     ...     spark.conf.set("spark.sql.ansi.enabled", origin)
     +-------------------------------------------------------+
@@ -865,15 +939,14 @@ def try_sum(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
     Example 1: Calculating the sum of values in a column
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.range(10)
-    >>> df.select(sf.try_sum(df["id"])).show()
+    >>> spark.range(10).select(sf.try_sum("id")).show()
     +-----------+
     |try_sum(id)|
     +-----------+
@@ -934,7 +1007,7 @@ def abs(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or expression to compute the absolute value on.
 
     Returns
@@ -944,57 +1017,46 @@ def abs(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    Example 1: Compute the absolute value of a negative number
-
-    >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(1, -1), (2, -2), (3, -3)], ["id", "value"])
-    >>> df.select(sf.abs(df.value)).show()
-    +----------+
-    |abs(value)|
-    +----------+
-    |         1|
-    |         2|
-    |         3|
-    +----------+
-
-    Example 2: Compute the absolute value of an expression
+    Example 1: Compute the absolute value of a long column
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(1, 1), (2, -2), (3, 3)], ["id", "value"])
-    >>> df.select(sf.abs(df.id - df.value)).show()
-    +-----------------+
-    |abs((id - value))|
-    +-----------------+
-    |                0|
-    |                4|
-    |                0|
-    +-----------------+
+    >>> df = spark.createDataFrame([(-1,), (-2,), (-3,), (None,)], ["value"])
+    >>> df.select("*", sf.abs(df.value)).show()
+    +-----+----------+
+    |value|abs(value)|
+    +-----+----------+
+    |   -1|         1|
+    |   -2|         2|
+    |   -3|         3|
+    | NULL|      NULL|
+    +-----+----------+
 
-    Example 3: Compute the absolute value of a column with null values
+    Example 2: Compute the absolute value of a double column
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(1, None), (2, -2), (3, None)], ["id", "value"])
-    >>> df.select(sf.abs(df.value)).show()
-    +----------+
-    |abs(value)|
-    +----------+
-    |      NULL|
-    |         2|
-    |      NULL|
-    +----------+
+    >>> df = spark.createDataFrame([(-1.5,), (-2.5,), (None,), (float("nan"),)], ["value"])
+    >>> df.select("*", sf.abs(df.value)).show()
+    +-----+----------+
+    |value|abs(value)|
+    +-----+----------+
+    | -1.5|       1.5|
+    | -2.5|       2.5|
+    | NULL|      NULL|
+    |  NaN|       NaN|
+    +-----+----------+
 
-    Example 4: Compute the absolute value of a column with double values
+    Example 3: Compute the absolute value of an expression
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(1, -1.5), (2, -2.5), (3, -3.5)], ["id", "value"])
-    >>> df.select(sf.abs(df.value)).show()
-    +----------+
-    |abs(value)|
-    +----------+
-    |       1.5|
-    |       2.5|
-    |       3.5|
-    +----------+
+    >>> df = spark.createDataFrame([(1, 1), (2, -2), (3, 3)], ["id", "value"])
+    >>> df.select("*", sf.abs(df.id - df.value)).show()
+    +---+-----+-----------------+
+    | id|value|abs((id - value))|
+    +---+-----+-----------------+
+    |  1|    1|                0|
+    |  2|   -2|                4|
+    |  3|    3|                0|
+    +---+-----+-----------------+
     """
     return _invoke_function_over_columns("abs", col)
 
@@ -1011,7 +1073,7 @@ def mode(col: "ColumnOrName", deterministic: bool = False) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
     deterministic : bool, optional
         if there are multiple equally-frequent results then return the lowest (defaults to false).
@@ -1053,6 +1115,7 @@ def mode(col: "ColumnOrName", deterministic: bool = False) -> Column:
     +---------+
     |        0|
     +---------+
+
     >>> df.select(sf.mode("col", True)).show()
     +---------------------------------------+
     |mode() WITHIN GROUP (ORDER BY col DESC)|
@@ -1062,7 +1125,7 @@ def mode(col: "ColumnOrName", deterministic: bool = False) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("mode", _to_java_column(col), deterministic)
+    return _invoke_function("mode", _to_java_column(col), _enum_to_value(deterministic))
 
 
 @_try_remote_functions
@@ -1077,7 +1140,7 @@ def max(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column on which the maximum value is computed.
 
     Returns
@@ -1182,7 +1245,7 @@ def min(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column on which the minimum value is computed.
 
     Returns
@@ -1278,10 +1341,10 @@ def max_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The column representing the values to be returned. This could be the column instance
         or the column name as string.
-    ord : :class:`~pyspark.sql.Column` or str
+    ord : :class:`~pyspark.sql.Column` or column name
         The column that needs to be maximized. This could be the column instance
         or the column name as string.
 
@@ -1364,10 +1427,10 @@ def min_by(col: "ColumnOrName", ord: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The column representing the values that will be returned. This could be the column instance
         or the column name as string.
-    ord : :class:`~pyspark.sql.Column` or str
+    ord : :class:`~pyspark.sql.Column` or column name
         The column that needs to be minimized. This could be the column instance
         or the column name as string.
 
@@ -1443,7 +1506,7 @@ def count(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1451,6 +1514,10 @@ def count(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.count_if`
+
     Examples
     --------
     Example 1: Count all rows in a DataFrame
@@ -1511,7 +1578,7 @@ def sum(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1569,7 +1636,7 @@ def avg(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1617,7 +1684,7 @@ def mean(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1661,7 +1728,7 @@ def median(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1673,14 +1740,19 @@ def median(col: "ColumnOrName") -> Column:
     -----
     Supports Spark Connect.
 
+    :meth:`pyspark.sql.functions.percentile`
+    :meth:`pyspark.sql.functions.approx_percentile`
+    :meth:`pyspark.sql.functions.percentile_approx`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([
     ...     ("Java", 2012, 20000), ("dotNET", 2012, 5000),
     ...     ("Java", 2012, 22000), ("dotNET", 2012, 10000),
     ...     ("dotNET", 2013, 48000), ("Java", 2013, 30000)],
     ...     schema=("course", "year", "earnings"))
-    >>> df.groupby("course").agg(median("earnings")).show()
+    >>> df.groupby("course").agg(sf.median("earnings")).show()
     +------+----------------+
     |course|median(earnings)|
     +------+----------------+
@@ -1720,7 +1792,7 @@ def sum_distinct(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1791,26 +1863,26 @@ def product(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : str, :class:`Column`
+    col : :class:`~pyspark.sql.Column` or column name
         column containing values to be multiplied together
 
     Returns
     -------
-    :class:`~pyspark.sql.Column`
+    :class:`~pyspark.sql.Column` or column name
         the column for computed results.
 
     Examples
     --------
-    >>> df = spark.range(1, 10).toDF('x').withColumn('mod3', col('x') % 3)
-    >>> prods = df.groupBy('mod3').agg(product('x').alias('product'))
-    >>> prods.orderBy('mod3').show()
-    +----+-------+
-    |mod3|product|
-    +----+-------+
-    |   0|  162.0|
-    |   1|   28.0|
-    |   2|   80.0|
-    +----+-------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT id % 3 AS mod3, id AS value FROM RANGE(10)")
+    >>> df.groupBy('mod3').agg(sf.product('value')).orderBy('mod3').show()
+    +----+--------------+
+    |mod3|product(value)|
+    +----+--------------+
+    |   0|           0.0|
+    |   1|          28.0|
+    |   2|          80.0|
+    +----+--------------+
     """
     return _invoke_function_over_columns("product", col)
 
@@ -1828,7 +1900,7 @@ def acos(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or expression to compute the inverse cosine on.
 
     Returns
@@ -1838,11 +1910,11 @@ def acos(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    Example 1: Compute the inverse cosine of a column of numbers
+    Example 1: Compute the inverse cosine
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(-1.0,), (-0.5,), (0.0,), (0.5,), (1.0,)], ["value"])
-    >>> df.select("value", sf.acos("value")).show()
+    >>> df.select("*", sf.acos("value")).show()
     +-----+------------------+
     |value|       ACOS(value)|
     +-----+------------------+
@@ -1853,30 +1925,19 @@ def acos(col: "ColumnOrName") -> Column:
     |  1.0|               0.0|
     +-----+------------------+
 
-    Example 2: Compute the inverse cosine of a column with null values
-
-    >>> from pyspark.sql import functions as sf
-    >>> from pyspark.sql.types import StructType, StructField, IntegerType
-    >>> schema = StructType([StructField("value", IntegerType(), True)])
-    >>> df = spark.createDataFrame([(None,)], schema=schema)
-    >>> df.select(sf.acos(df.value)).show()
-    +-----------+
-    |ACOS(value)|
-    +-----------+
-    |       NULL|
-    +-----------+
-
-    Example 3: Compute the inverse cosine of a column with values outside the valid range
+    Example 2: Compute the inverse cosine of invalid values
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(2,), (-2,)], ["value"])
-    >>> df.select(sf.acos(df.value)).show()
-    +-----------+
-    |ACOS(value)|
-    +-----------+
-    |        NaN|
-    |        NaN|
-    +-----------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-2), (2), (NULL) AS TAB(value)"
+    ... ).select("*", sf.acos("value")).show()
+    +-----+-----------+
+    |value|ACOS(value)|
+    +-----+-----------+
+    |   -2|        NaN|
+    |    2|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("acos", col)
 
@@ -1894,7 +1955,7 @@ def acosh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or expression to compute the inverse hyperbolic cosine on.
 
     Returns
@@ -1904,11 +1965,11 @@ def acosh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    Example 1: Compute the inverse hyperbolic cosine of a column of numbers
+    Example 1: Compute the inverse hyperbolic cosine
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1,), (2,)], ["value"])
-    >>> df.select("value", sf.acosh(df.value)).show()
+    >>> df.select("*", sf.acosh(df.value)).show()
     +-----+------------------+
     |value|      ACOSH(value)|
     +-----+------------------+
@@ -1916,30 +1977,19 @@ def acosh(col: "ColumnOrName") -> Column:
     |    2|1.3169578969248...|
     +-----+------------------+
 
-    Example 2: Compute the inverse hyperbolic cosine of a column with null values
-
-    >>> from pyspark.sql import functions as sf
-    >>> from pyspark.sql.types import StructType, StructField, IntegerType
-    >>> schema = StructType([StructField("value", IntegerType(), True)])
-    >>> df = spark.createDataFrame([(None,)], schema=schema)
-    >>> df.select(sf.acosh(df.value)).show()
-    +------------+
-    |ACOSH(value)|
-    +------------+
-    |        NULL|
-    +------------+
-
-    Example 3: Compute the inverse hyperbolic cosine of a column with values less than 1
+    Example 2: Compute the inverse hyperbolic cosine of invalid values
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(0.5,), (-0.5,)], ["value"])
-    >>> df.select(sf.acosh(df.value)).show()
-    +------------+
-    |ACOSH(value)|
-    +------------+
-    |         NaN|
-    |         NaN|
-    +------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-0.5), (0.5), (NULL) AS TAB(value)"
+    ... ).select("*", sf.acosh("value")).show()
+    +-----+------------+
+    |value|ACOSH(value)|
+    +-----+------------+
+    | -0.5|         NaN|
+    |  0.5|         NaN|
+    | NULL|        NULL|
+    +-----+------------+
     """
     return _invoke_function_over_columns("acosh", col)
 
@@ -1956,7 +2006,7 @@ def asin(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -1966,14 +2016,32 @@ def asin(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(0,), (2,)])
-    >>> df.select(asin(df.schema.fieldNames()[0])).show()
-    +--------+
-    |ASIN(_1)|
-    +--------+
-    |     0.0|
-    |     NaN|
-    +--------+
+    Example 1: Compute the inverse sine
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-0.5,), (0.0,), (0.5,)], ["value"])
+    >>> df.select("*", sf.asin(df.value)).show()
+    +-----+-------------------+
+    |value|        ASIN(value)|
+    +-----+-------------------+
+    | -0.5|-0.5235987755982...|
+    |  0.0|                0.0|
+    |  0.5| 0.5235987755982...|
+    +-----+-------------------+
+
+    Example 2: Compute the inverse sine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-2), (2), (NULL) AS TAB(value)"
+    ... ).select("*", sf.asin("value")).show()
+    +-----+-----------+
+    |value|ASIN(value)|
+    +-----+-----------+
+    |   -2|        NaN|
+    |    2|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("asin", col)
 
@@ -1990,7 +2058,7 @@ def asinh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2000,13 +2068,31 @@ def asinh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(asinh(col("id"))).show()
-    +---------+
-    |ASINH(id)|
-    +---------+
-    |      0.0|
-    +---------+
+    Example 1: Compute the inverse hyperbolic sine
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-0.5,), (0.0,), (0.5,)], ["value"])
+    >>> df.select("*", sf.asinh(df.value)).show()
+    +-----+--------------------+
+    |value|        ASINH(value)|
+    +-----+--------------------+
+    | -0.5|-0.48121182505960...|
+    |  0.0|                 0.0|
+    |  0.5| 0.48121182505960...|
+    +-----+--------------------+
+
+    Example 2: Compute the inverse hyperbolic sine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.asinh("value")).show()
+    +-----+------------+
+    |value|ASINH(value)|
+    +-----+------------+
+    |  NaN|         NaN|
+    | NULL|        NULL|
+    +-----+------------+
     """
     return _invoke_function_over_columns("asinh", col)
 
@@ -2023,7 +2109,7 @@ def atan(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2033,13 +2119,31 @@ def atan(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(atan(df.id)).show()
-    +--------+
-    |ATAN(id)|
-    +--------+
-    |     0.0|
-    +--------+
+    Example 1: Compute the inverse tangent
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-0.5,), (0.0,), (0.5,)], ["value"])
+    >>> df.select("*", sf.atan(df.value)).show()
+    +-----+-------------------+
+    |value|        ATAN(value)|
+    +-----+-------------------+
+    | -0.5|-0.4636476090008...|
+    |  0.0|                0.0|
+    |  0.5| 0.4636476090008...|
+    +-----+-------------------+
+
+    Example 2: Compute the inverse tangent of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.atan("value")).show()
+    +-----+-----------+
+    |value|ATAN(value)|
+    +-----+-----------+
+    |  NaN|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("atan", col)
 
@@ -2056,7 +2160,7 @@ def atanh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2066,14 +2170,33 @@ def atanh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(0,), (2,)], schema=["numbers"])
-    >>> df.select(atanh(df["numbers"])).show()
-    +--------------+
-    |ATANH(numbers)|
-    +--------------+
-    |           0.0|
-    |           NaN|
-    +--------------+
+    Example 1: Compute the inverse hyperbolic tangent
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-0.5,), (0.0,), (0.5,)], ["value"])
+    >>> df.select("*", sf.atanh(df.value)).show()
+    +-----+-------------------+
+    |value|       ATANH(value)|
+    +-----+-------------------+
+    | -0.5|-0.5493061443340...|
+    |  0.0|                0.0|
+    |  0.5| 0.5493061443340...|
+    +-----+-------------------+
+
+    Example 2: Compute the inverse hyperbolic tangent of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-2), (2), (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.atanh("value")).show()
+    +-----+------------+
+    |value|ATANH(value)|
+    +-----+------------+
+    | -2.0|         NaN|
+    |  2.0|         NaN|
+    |  NaN|         NaN|
+    | NULL|        NULL|
+    +-----+------------+
     """
     return _invoke_function_over_columns("atanh", col)
 
@@ -2090,7 +2213,7 @@ def cbrt(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2100,13 +2223,31 @@ def cbrt(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(cbrt(lit(27))).show()
-    +--------+
-    |CBRT(27)|
-    +--------+
-    |     3.0|
-    +--------+
+    Example 1: Compute the cube-root
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-8,), (0,), (8,)], ["value"])
+    >>> df.select("*", sf.cbrt(df.value)).show()
+    +-----+-----------+
+    |value|CBRT(value)|
+    +-----+-----------+
+    |   -8|       -2.0|
+    |    0|        0.0|
+    |    8|        2.0|
+    +-----+-----------+
+
+    Example 2: Compute the cube-root of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.cbrt("value")).show()
+    +-----+-----------+
+    |value|CBRT(value)|
+    +-----+-----------+
+    |  NaN|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("cbrt", col)
 
@@ -2123,7 +2264,7 @@ def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Col
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or column name to compute the ceiling on.
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
@@ -2160,8 +2301,9 @@ def ceil(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Col
     if scale is None:
         return _invoke_function_over_columns("ceil", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("ceil", col, scale)
+        return _invoke_function_over_columns("ceil", col, scale)  # type: ignore[arg-type]
 
 
 @_try_remote_functions
@@ -2176,7 +2318,7 @@ def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) ->
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or column name to compute the ceiling on.
     scale : :class:`~pyspark.sql.Column` or int
         An optional parameter to control the rounding behavior.
@@ -2213,8 +2355,9 @@ def ceiling(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) ->
     if scale is None:
         return _invoke_function_over_columns("ceiling", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("ceiling", col, scale)
+        return _invoke_function_over_columns("ceiling", col, scale)  # type: ignore[arg-type]
 
 
 @_try_remote_functions
@@ -2229,7 +2372,7 @@ def cos(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in radians
 
     Returns
@@ -2239,21 +2382,40 @@ def cos(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the cosine
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.cos(sf.pi())).show()
-    +---------+
-    |COS(PI())|
-    +---------+
-    |     -1.0|
-    +---------+
-    """
-    return _invoke_function_over_columns("cos", col)
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (PI()), (PI() / 4), (PI() / 16) AS TAB(value)"
+    ... ).select("*", sf.cos("value")).show()
+    +-------------------+------------------+
+    |              value|        COS(value)|
+    +-------------------+------------------+
+    |  3.141592653589...|              -1.0|
+    | 0.7853981633974...|0.7071067811865...|
+    |0.19634954084936...|0.9807852804032...|
+    +-------------------+------------------+
 
+    Example 2: Compute the cosine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.cos("value")).show()
+    +-----+----------+
+    |value|COS(value)|
+    +-----+----------+
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
+    """
+    return _invoke_function_over_columns("cos", col)
 
-@_try_remote_functions
-def cosh(col: "ColumnOrName") -> Column:
-    """
-    Computes hyperbolic cosine of the input column.
+
+@_try_remote_functions
+def cosh(col: "ColumnOrName") -> Column:
+    """
+    Computes hyperbolic cosine of the input column.
 
     .. versionadded:: 1.4.0
 
@@ -2262,7 +2424,7 @@ def cosh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         hyperbolic angle
 
     Returns
@@ -2272,9 +2434,31 @@ def cosh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(cosh(lit(1))).first()
-    Row(COSH(1)=1.54308...)
+    Example 1: Compute the cosine
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ["value"])
+    >>> df.select("*", sf.cosh(df.value)).show()
+    +-----+-----------------+
+    |value|      COSH(value)|
+    +-----+-----------------+
+    |   -1|1.543080634815...|
+    |    0|              1.0|
+    |    1|1.543080634815...|
+    +-----+-----------------+
+
+    Example 2: Compute the cosine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.cosh("value")).show()
+    +-----+-----------+
+    |value|COSH(value)|
+    +-----+-----------+
+    |  NaN|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("cosh", col)
 
@@ -2291,7 +2475,7 @@ def cot(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in radians.
 
     Returns
@@ -2301,13 +2485,32 @@ def cot(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the cotangent
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.cot(sf.pi() / 4)).show()
-    +------------------+
-    |   COT((PI() / 4))|
-    +------------------+
-    |1.0000000000000...|
-    +------------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (PI() / 4), (PI() / 16) AS TAB(value)"
+    ... ).select("*", sf.cot("value")).show()
+    +-------------------+------------------+
+    |              value|        COT(value)|
+    +-------------------+------------------+
+    | 0.7853981633974...|1.0000000000000...|
+    |0.19634954084936...| 5.027339492125...|
+    +-------------------+------------------+
+
+    Example 2: Compute the cotangent of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0.0), (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.cot("value")).show()
+    +-----+----------+
+    |value|COT(value)|
+    +-----+----------+
+    |  0.0|  Infinity|
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("cot", col)
 
@@ -2324,7 +2527,7 @@ def csc(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in radians.
 
     Returns
@@ -2334,13 +2537,32 @@ def csc(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the cosecant
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.csc(sf.pi() / 2)).show()
-    +---------------+
-    |CSC((PI() / 2))|
-    +---------------+
-    |            1.0|
-    +---------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (PI() / 2), (PI() / 4) AS TAB(value)"
+    ... ).select("*", sf.csc("value")).show()
+    +------------------+------------------+
+    |             value|        CSC(value)|
+    +------------------+------------------+
+    |1.5707963267948...|               1.0|
+    |0.7853981633974...|1.4142135623730...|
+    +------------------+------------------+
+
+    Example 2: Compute the cosecant of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0.0), (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.csc("value")).show()
+    +-----+----------+
+    |value|CSC(value)|
+    +-----+----------+
+    |  0.0|  Infinity|
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("csc", col)
 
@@ -2353,7 +2575,8 @@ def e() -> Column:
 
     Examples
     --------
-    >>> spark.range(1).select(e()).show()
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.e()).show()
     +-----------------+
     |              E()|
     +-----------------+
@@ -2375,7 +2598,7 @@ def exp(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate exponential for.
 
     Returns
@@ -2385,13 +2608,33 @@ def exp(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(exp(lit(0))).show()
-    +------+
-    |EXP(0)|
-    +------+
-    |   1.0|
-    +------+
+    Example 1: Compute the exponential
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT id AS value FROM RANGE(5)")
+    >>> df.select("*", sf.exp(df.value)).show()
+    +-----+------------------+
+    |value|        EXP(value)|
+    +-----+------------------+
+    |    0|               1.0|
+    |    1|2.7182818284590...|
+    |    2|  7.38905609893...|
+    |    3|20.085536923187...|
+    |    4|54.598150033144...|
+    +-----+------------------+
+
+    Example 2: Compute the exponential of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.exp("value")).show()
+    +-----+----------+
+    |value|EXP(value)|
+    +-----+----------+
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("exp", col)
 
@@ -2408,7 +2651,7 @@ def expm1(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate exponential for.
 
     Returns
@@ -2418,9 +2661,33 @@ def expm1(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(expm1(lit(1))).first()
-    Row(EXPM1(1)=1.71828...)
+    Example 1: Compute the exponential minus one
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT id AS value FROM RANGE(5)")
+    >>> df.select("*", sf.expm1(df.value)).show()
+    +-----+------------------+
+    |value|      EXPM1(value)|
+    +-----+------------------+
+    |    0|               0.0|
+    |    1| 1.718281828459...|
+    |    2|  6.38905609893...|
+    |    3|19.085536923187...|
+    |    4|53.598150033144...|
+    +-----+------------------+
+
+    Example 2: Compute the exponential minus one of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.expm1("value")).show()
+    +-----+------------+
+    |value|EXPM1(value)|
+    +-----+------------+
+    |  NaN|         NaN|
+    | NULL|        NULL|
+    +-----+------------+
     """
     return _invoke_function_over_columns("expm1", col)
 
@@ -2437,7 +2704,7 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or column name to compute the floor on.
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
@@ -2475,8 +2742,9 @@ def floor(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     if scale is None:
         return _invoke_function_over_columns("floor", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("floor", col, scale)
+        return _invoke_function_over_columns("floor", col, scale)  # type: ignore[arg-type]
 
 
 @_try_remote_functions
@@ -2491,7 +2759,7 @@ def log(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate natural logarithm for.
 
     Returns
@@ -2501,6 +2769,8 @@ def log(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the natural logarithm of E
+
     >>> from pyspark.sql import functions as sf
     >>> spark.range(1).select(sf.log(sf.e())).show()
     +-------+
@@ -2508,6 +2778,21 @@ def log(col: "ColumnOrName") -> Column:
     +-------+
     |    1.0|
     +-------+
+
+    Example 2: Compute the natural logarithm of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-1), (0), (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.log("value")).show()
+    +-----+---------+
+    |value|ln(value)|
+    +-----+---------+
+    | -1.0|     NULL|
+    |  0.0|     NULL|
+    |  NaN|      NaN|
+    | NULL|     NULL|
+    +-----+---------+
     """
     return _invoke_function_over_columns("log", col)
 
@@ -2524,7 +2809,7 @@ def log10(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate logarithm for.
 
     Returns
@@ -2534,13 +2819,33 @@ def log10(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(log10(lit(100))).show()
-    +----------+
-    |LOG10(100)|
-    +----------+
-    |       2.0|
-    +----------+
+    Example 1: Compute the logarithm in Base 10
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1,), (10,), (100,)], ["value"])
+    >>> df.select("*", sf.log10(df.value)).show()
+    +-----+------------+
+    |value|LOG10(value)|
+    +-----+------------+
+    |    1|         0.0|
+    |   10|         1.0|
+    |  100|         2.0|
+    +-----+------------+
+
+    Example 2: Compute the logarithm in Base 10 of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (-1), (0), (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.log10("value")).show()
+    +-----+------------+
+    |value|LOG10(value)|
+    +-----+------------+
+    | -1.0|        NULL|
+    |  0.0|        NULL|
+    |  NaN|         NaN|
+    | NULL|        NULL|
+    +-----+------------+
     """
     return _invoke_function_over_columns("log10", col)
 
@@ -2548,7 +2853,7 @@ def log10(col: "ColumnOrName") -> Column:
 @_try_remote_functions
 def log1p(col: "ColumnOrName") -> Column:
     """
-    Computes the natural logarithm of the "given value plus one".
+    Computes the natural logarithm of the given value plus one.
 
     .. versionadded:: 1.4.0
 
@@ -2557,7 +2862,7 @@ def log1p(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate natural logarithm for.
 
     Returns
@@ -2596,7 +2901,7 @@ def negative(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to calculate negative value for.
 
     Returns
@@ -2607,14 +2912,15 @@ def negative(col: "ColumnOrName") -> Column:
     Examples
     --------
     >>> import pyspark.sql.functions as sf
-    >>> spark.range(3).select(sf.negative("id")).show()
-    +------------+
-    |negative(id)|
-    +------------+
-    |           0|
-    |          -1|
-    |          -2|
-    +------------+
+    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ["value"])
+    >>> df.select("*", sf.negative(df.value)).show()
+    +-----+---------------+
+    |value|negative(value)|
+    +-----+---------------+
+    |   -1|              1|
+    |    0|              0|
+    |    1|             -1|
+    +-----+---------------+
     """
     return _invoke_function_over_columns("negative", col)
 
@@ -2630,7 +2936,8 @@ def pi() -> Column:
 
     Examples
     --------
-    >>> spark.range(1).select(pi()).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.pi()).show()
     +-----------------+
     |             PI()|
     +-----------------+
@@ -2649,7 +2956,7 @@ def positive(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input value column.
 
     Returns
@@ -2659,15 +2966,16 @@ def positive(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ['v'])
-    >>> df.select(positive("v").alias("p")).show()
-    +---+
-    |  p|
-    +---+
-    | -1|
-    |  0|
-    |  1|
-    +---+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ["value"])
+    >>> df.select("*", sf.positive(df.value)).show()
+    +-----+---------+
+    |value|(+ value)|
+    +-----+---------+
+    |   -1|       -1|
+    |    0|        0|
+    |    1|        1|
+    +-----+---------+
     """
     return _invoke_function_over_columns("positive", col)
 
@@ -2685,7 +2993,7 @@ def rint(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2695,15 +3003,15 @@ def rint(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(rint(lit(10.6))).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.rint(sf.lit(10.6))).show()
     +----------+
     |rint(10.6)|
     +----------+
     |      11.0|
     +----------+
 
-    >>> df.select(rint(lit(10.3))).show()
+    >>> spark.range(1).select(sf.rint(sf.lit(10.3))).show()
     +----------+
     |rint(10.3)|
     +----------+
@@ -2725,7 +3033,7 @@ def sec(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Angle in radians
 
     Returns
@@ -2735,9 +3043,31 @@ def sec(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(sec(lit(1.5))).first()
-    Row(SEC(1.5)=14.13683...)
+    Example 1: Compute the secant
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (PI() / 4), (PI() / 16) AS TAB(value)"
+    ... ).select("*", sf.sec("value")).show()
+    +-------------------+------------------+
+    |              value|        SEC(value)|
+    +-------------------+------------------+
+    | 0.7853981633974...| 1.414213562373...|
+    |0.19634954084936...|1.0195911582083...|
+    +-------------------+------------------+
+
+    Example 2: Compute the secant of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.sec("value")).show()
+    +-----+----------+
+    |value|SEC(value)|
+    +-----+----------+
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("sec", col)
 
@@ -2754,7 +3084,7 @@ def signum(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2767,13 +3097,15 @@ def signum(col: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.range(1).select(
     ...     sf.signum(sf.lit(-5)),
-    ...     sf.signum(sf.lit(6))
+    ...     sf.signum(sf.lit(6)),
+    ...     sf.signum(sf.lit(float('nan'))),
+    ...     sf.signum(sf.lit(None))
     ... ).show()
-    +----------+---------+
-    |SIGNUM(-5)|SIGNUM(6)|
-    +----------+---------+
-    |      -1.0|      1.0|
-    +----------+---------+
+    +----------+---------+-----------+------------+
+    |SIGNUM(-5)|SIGNUM(6)|SIGNUM(NaN)|SIGNUM(NULL)|
+    +----------+---------+-----------+------------+
+    |      -1.0|      1.0|        NaN|        NULL|
+    +----------+---------+-----------+------------+
     """
     return _invoke_function_over_columns("signum", col)
 
@@ -2790,7 +3122,7 @@ def sign(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2803,13 +3135,15 @@ def sign(col: "ColumnOrName") -> Column:
     >>> import pyspark.sql.functions as sf
     >>> spark.range(1).select(
     ...     sf.sign(sf.lit(-5)),
-    ...     sf.sign(sf.lit(6))
+    ...     sf.sign(sf.lit(6)),
+    ...     sf.sign(sf.lit(float('nan'))),
+    ...     sf.sign(sf.lit(None))
     ... ).show()
-    +--------+-------+
-    |sign(-5)|sign(6)|
-    +--------+-------+
-    |    -1.0|    1.0|
-    +--------+-------+
+    +--------+-------+---------+----------+
+    |sign(-5)|sign(6)|sign(NaN)|sign(NULL)|
+    +--------+-------+---------+----------+
+    |    -1.0|    1.0|      NaN|      NULL|
+    +--------+-------+---------+----------+
     """
     return _invoke_function_over_columns("sign", col)
 
@@ -2826,7 +3160,7 @@ def sin(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -2836,13 +3170,32 @@ def sin(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the sine
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.sin(sf.pi() / 2)).show()
-    +---------------+
-    |SIN((PI() / 2))|
-    +---------------+
-    |            1.0|
-    +---------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0.0), (PI() / 2), (PI() / 4) AS TAB(value)"
+    ... ).select("*", sf.sin("value")).show()
+    +------------------+------------------+
+    |             value|        SIN(value)|
+    +------------------+------------------+
+    |               0.0|               0.0|
+    |1.5707963267948...|               1.0|
+    |0.7853981633974...|0.7071067811865...|
+    +------------------+------------------+
+
+    Example 2: Compute the sine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.sin("value")).show()
+    +-----+----------+
+    |value|SIN(value)|
+    +-----+----------+
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("sin", col)
 
@@ -2859,7 +3212,7 @@ def sinh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         hyperbolic angle.
 
     Returns
@@ -2870,9 +3223,31 @@ def sinh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(sinh(lit(1.1))).first()
-    Row(SINH(1.1)=1.33564...)
+    Example 1: Compute the hyperbolic sine
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ["value"])
+    >>> df.select("*", sf.sinh(df.value)).show()
+    +-----+-------------------+
+    |value|        SINH(value)|
+    +-----+-------------------+
+    |   -1|-1.1752011936438...|
+    |    0|                0.0|
+    |    1| 1.1752011936438...|
+    +-----+-------------------+
+
+    Example 2: Compute the hyperbolic sine of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.sinh("value")).show()
+    +-----+-----------+
+    |value|SINH(value)|
+    +-----+-----------+
+    |  NaN|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("sinh", col)
 
@@ -2889,7 +3264,7 @@ def tan(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in radians
 
     Returns
@@ -2899,13 +3274,32 @@ def tan(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the tangent
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.tan(sf.pi() / 4)).show()
-    +------------------+
-    |   TAN((PI() / 4))|
-    +------------------+
-    |0.9999999999999...|
-    +------------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0.0), (PI() / 4), (PI() / 6) AS TAB(value)"
+    ... ).select("*", sf.tan("value")).show()
+    +------------------+------------------+
+    |             value|        TAN(value)|
+    +------------------+------------------+
+    |               0.0|               0.0|
+    |0.7853981633974...|0.9999999999999...|
+    |0.5235987755982...|0.5773502691896...|
+    +------------------+------------------+
+
+    Example 2: Compute the tangent of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.tan("value")).show()
+    +-----+----------+
+    |value|TAN(value)|
+    +-----+----------+
+    |  NaN|       NaN|
+    | NULL|      NULL|
+    +-----+----------+
     """
     return _invoke_function_over_columns("tan", col)
 
@@ -2922,7 +3316,7 @@ def tanh(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         hyperbolic angle
 
     Returns
@@ -2933,13 +3327,31 @@ def tanh(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Compute the hyperbolic tangent sine
+
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.tanh(sf.pi() / 2)).show()
-    +------------------+
-    |  TANH((PI() / 2))|
-    +------------------+
-    |0.9171523356672744|
-    +------------------+
+    >>> df = spark.createDataFrame([(-1,), (0,), (1,)], ["value"])
+    >>> df.select("*", sf.tanh(df.value)).show()
+    +-----+-------------------+
+    |value|        TANH(value)|
+    +-----+-------------------+
+    |   -1|-0.7615941559557...|
+    |    0|                0.0|
+    |    1| 0.7615941559557...|
+    +-----+-------------------+
+
+    Example 2: Compute the hyperbolic tangent of invalid values
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (FLOAT('NAN')), (NULL) AS TAB(value)"
+    ... ).select("*", sf.tanh("value")).show()
+    +-----+-----------+
+    |value|TANH(value)|
+    +-----+-----------+
+    |  NaN|        NaN|
+    | NULL|       NULL|
+    +-----+-----------+
     """
     return _invoke_function_over_columns("tanh", col)
 
@@ -3009,7 +3421,7 @@ def bitwise_not(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3019,19 +3431,19 @@ def bitwise_not(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(bitwise_not(lit(0))).show()
-    +---+
-    | ~0|
-    +---+
-    | -1|
-    +---+
-    >>> df.select(bitwise_not(lit(1))).show()
-    +---+
-    | ~1|
-    +---+
-    | -2|
-    +---+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0), (1), (2), (3), (NULL) AS TAB(value)"
+    ... ).select("*", sf.bitwise_not("value")).show()
+    +-----+------+
+    |value|~value|
+    +-----+------+
+    |    0|    -1|
+    |    1|    -2|
+    |    2|    -3|
+    |    3|    -4|
+    | NULL|  NULL|
+    +-----+------+
     """
     return _invoke_function_over_columns("bitwise_not", col)
 
@@ -3046,7 +3458,7 @@ def bit_count(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3055,17 +3467,25 @@ def bit_count(col: "ColumnOrName") -> Column:
         the number of bits that are set in the argument expr as an unsigned 64-bit integer,
         or NULL if the argument is NULL.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_get`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
-    >>> df.select(bit_count("c")).show()
-    +------------+
-    |bit_count(c)|
-    +------------+
-    |           1|
-    |           1|
-    |           1|
-    +------------+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0), (1), (2), (3), (NULL) AS TAB(value)"
+    ... ).select("*", sf.bit_count("value")).show()
+    +-----+----------------+
+    |value|bit_count(value)|
+    +-----+----------------+
+    |    0|               0|
+    |    1|               1|
+    |    2|               1|
+    |    3|               2|
+    | NULL|            NULL|
+    +-----+----------------+
     """
     return _invoke_function_over_columns("bit_count", col)
 
@@ -3081,9 +3501,9 @@ def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
-    pos : :class:`~pyspark.sql.Column` or str
+    pos : :class:`~pyspark.sql.Column` or column name
         The positions are numbered from right to left, starting at zero.
 
     Returns
@@ -3091,17 +3511,40 @@ def bit_get(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the value of the bit (0 or 1) at the specified position.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_count`
+    :meth:`pyspark.sql.functions.getbit`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
-    >>> df.select(bit_get("c", lit(1))).show()
-    +-------------+
-    |bit_get(c, 1)|
-    +-------------+
-    |            0|
-    |            0|
-    |            1|
-    +-------------+
+    Example 1: Get the bit with a literal position
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([[1],[2],[3],[None]], ["value"])
+    >>> df.select("*", sf.bit_get("value", sf.lit(1))).show()
+    +-----+-----------------+
+    |value|bit_get(value, 1)|
+    +-----+-----------------+
+    |    1|                0|
+    |    2|                1|
+    |    3|                1|
+    | NULL|             NULL|
+    +-----+-----------------+
+
+    Example 2: Get the bit with a column position
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([[1,2],[2,1],[3,None],[None,1]], ["value", "pos"])
+    >>> df.select("*", sf.bit_get(df.value, "pos")).show()
+    +-----+----+-------------------+
+    |value| pos|bit_get(value, pos)|
+    +-----+----+-------------------+
+    |    1|   2|                  0|
+    |    2|   1|                  1|
+    |    3|NULL|               NULL|
+    | NULL|   1|               NULL|
+    +-----+----+-------------------+
     """
     return _invoke_function_over_columns("bit_get", col, pos)
 
@@ -3117,9 +3560,9 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
-    pos : :class:`~pyspark.sql.Column` or str
+    pos : :class:`~pyspark.sql.Column` or column name
         The positions are numbered from right to left, starting at zero.
 
     Returns
@@ -3127,19 +3570,40 @@ def getbit(col: "ColumnOrName", pos: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the value of the bit (0 or 1) at the specified position.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_get`
+
     Examples
     --------
+    Example 1: Get the bit with a literal position
+
     >>> import pyspark.sql.functions as sf
     >>> spark.createDataFrame(
-    ...     [[1], [1], [2]], ["c"]
-    ... ).select(sf.getbit("c", sf.lit(1))).show()
-    +------------+
-    |getbit(c, 1)|
-    +------------+
-    |           0|
-    |           0|
-    |           1|
-    +------------+
+    ...     [[1], [2], [3], [None]], ["value"]
+    ... ).select("*", sf.getbit("value", sf.lit(1))).show()
+    +-----+----------------+
+    |value|getbit(value, 1)|
+    +-----+----------------+
+    |    1|               0|
+    |    2|               1|
+    |    3|               1|
+    | NULL|            NULL|
+    +-----+----------------+
+
+    Example 2: Get the bit with a column position
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([[1,2],[2,1],[3,None],[None,1]], ["value", "pos"])
+    >>> df.select("*", sf.getbit(df.value, "pos")).show()
+    +-----+----+------------------+
+    |value| pos|getbit(value, pos)|
+    +-----+----+------------------+
+    |    1|   2|                 0|
+    |    2|   1|                 1|
+    |    3|NULL|              NULL|
+    | NULL|   1|              NULL|
+    +-----+----+------------------+
     """
     return _invoke_function_over_columns("getbit", col, pos)
 
@@ -3157,7 +3621,7 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to sort by in the ascending order.
 
     Returns
@@ -3165,6 +3629,11 @@ def asc_nulls_first(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column specifying the order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.asc`
+    :meth:`pyspark.sql.functions.asc_nulls_last`
+
     Examples
     --------
     Example 1: Sorting a DataFrame with null values in ascending order
@@ -3227,7 +3696,7 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to sort by in the ascending order.
 
     Returns
@@ -3235,6 +3704,11 @@ def asc_nulls_last(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column specifying the order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.asc`
+    :meth:`pyspark.sql.functions.asc_nulls_first`
+
     Examples
     --------
     Example 1: Sorting a DataFrame with null values in ascending order
@@ -3295,7 +3769,7 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to sort by in the descending order.
 
     Returns
@@ -3303,6 +3777,11 @@ def desc_nulls_first(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column specifying the order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.desc`
+    :meth:`pyspark.sql.functions.desc_nulls_last`
+
     Examples
     --------
     Example 1: Sorting a DataFrame with null values in descending order
@@ -3365,7 +3844,7 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to sort by in the descending order.
 
     Returns
@@ -3373,6 +3852,11 @@ def desc_nulls_last(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column specifying the order.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.desc`
+    :meth:`pyspark.sql.functions.desc_nulls_first`
+
     Examples
     --------
     Example 1: Sorting a DataFrame with null values in descending order
@@ -3434,9 +3918,18 @@ def stddev(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.std`
+    :meth:`pyspark.sql.functions.stddev_pop`
+    :meth:`pyspark.sql.functions.stddev_samp`
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.skewness`
+    :meth:`pyspark.sql.functions.kurtosis`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -3464,7 +3957,7 @@ def std(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3472,6 +3965,15 @@ def std(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         standard deviation of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.stddev_pop`
+    :meth:`pyspark.sql.functions.stddev_samp`
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.skewness`
+    :meth:`pyspark.sql.functions.kurtosis`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -3498,7 +4000,7 @@ def stddev_samp(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3506,6 +4008,13 @@ def stddev_samp(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         standard deviation of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.std`
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.stddev_pop`
+    :meth:`pyspark.sql.functions.var_samp`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -3532,7 +4041,7 @@ def stddev_pop(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3540,6 +4049,13 @@ def stddev_pop(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         standard deviation of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.std`
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.stddev_samp`
+    :meth:`pyspark.sql.functions.var_pop`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -3565,7 +4081,7 @@ def variance(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3573,10 +4089,19 @@ def variance(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         variance of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.var_pop`
+    :meth:`pyspark.sql.functions.var_samp`
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.skewness`
+    :meth:`pyspark.sql.functions.kurtosis`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.range(6)
-    >>> df.select(variance(df.id)).show()
+    >>> df.select(sf.variance(df.id)).show()
     +------------+
     |variance(id)|
     +------------+
@@ -3599,7 +4124,7 @@ def var_samp(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3607,10 +4132,17 @@ def var_samp(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         variance of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.var_pop`
+    :meth:`pyspark.sql.functions.std_samp`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.range(6)
-    >>> df.select(var_samp(df.id)).show()
+    >>> df.select(sf.var_samp(df.id)).show()
     +------------+
     |var_samp(id)|
     +------------+
@@ -3632,7 +4164,7 @@ def var_pop(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -3640,11 +4172,22 @@ def var_pop(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         variance of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.var_samp`
+    :meth:`pyspark.sql.functions.std_pop`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.range(6)
-    >>> df.select(var_pop(df.id)).first()
-    Row(var_pop(id)=2.91666...)
+    >>> df.select(sf.var_pop(df.id)).show()
+    +------------------+
+    |       var_pop(id)|
+    +------------------+
+    |2.9166666666666...|
+    +------------------+
     """
     return _invoke_function_over_columns("var_pop", col)
 
@@ -3659,9 +4202,9 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -3669,9 +4212,19 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the average of the independent variable for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)")
@@ -3682,7 +4235,7 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           2.75|  2.75|
     +---------------+------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -3693,7 +4246,7 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           NULL|  NULL|
     +---------------+------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -3704,7 +4257,7 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           NULL|   1.0|
     +---------------+------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x)")
@@ -3715,7 +4268,7 @@ def regr_avgx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |            3.0|   3.0|
     +---------------+------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x)")
@@ -3739,9 +4292,9 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -3749,9 +4302,19 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the average of the dependent variable for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)")
@@ -3762,7 +4325,7 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           1.75|  1.75|
     +---------------+------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -3773,7 +4336,7 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           NULL|   1.0|
     +---------------+------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -3784,7 +4347,7 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           NULL|  NULL|
     +---------------+------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x)")
@@ -3795,7 +4358,7 @@ def regr_avgy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |1.6666666666666...|  1.75|
     +------------------+------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x)")
@@ -3819,11 +4382,21 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -3831,7 +4404,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, 2), (2, 3), (2, 4) AS tab(y, x)")
@@ -3842,7 +4415,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |               4|       4|
     +----------------+--------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -3853,7 +4426,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |               0|       1|
     +----------------+--------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -3864,7 +4437,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |               0|       1|
     +----------------+--------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (2, 3), (2, 4) AS tab(y, x)")
@@ -3875,7 +4448,7 @@ def regr_count(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |               3|       4|
     +----------------+--------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 2), (2, null), (null, 3), (2, 4) AS tab(y, x)")
@@ -3900,9 +4473,9 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -3910,9 +4483,19 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the intercept of the univariate linear regression line for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -3923,7 +4506,7 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |                 0.0|
     +--------------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -3934,7 +4517,7 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |                NULL|
     +--------------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -3945,7 +4528,7 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |                NULL|
     +--------------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -3956,7 +4539,7 @@ def regr_intercept(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |                 0.0|
     +--------------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -3980,9 +4563,9 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -3990,9 +4573,19 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the coefficient of determination for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -4003,7 +4596,7 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          1.0|
     +-------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -4014,7 +4607,7 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |         NULL|
     +-------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -4025,7 +4618,7 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |         NULL|
     +-------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -4036,7 +4629,7 @@ def regr_r2(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          1.0|
     +-------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -4060,9 +4653,9 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -4070,9 +4663,19 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the slope of the linear regression line for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -4083,7 +4686,7 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |             1.0|
     +----------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -4094,7 +4697,7 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |            NULL|
     +----------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -4105,7 +4708,7 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |            NULL|
     +----------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -4116,7 +4719,7 @@ def regr_slope(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |             1.0|
     +----------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -4140,9 +4743,9 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -4150,9 +4753,19 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         REGR_COUNT(y, x) * VAR_POP(x) for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_sxy`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -4163,7 +4776,7 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           5.0|
     +--------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -4174,7 +4787,7 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -4185,7 +4798,7 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -4196,7 +4809,7 @@ def regr_sxx(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |4.666666666666...|
     +-----------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -4220,11 +4833,21 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_syy`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -4232,7 +4855,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -4243,7 +4866,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           5.0|
     +--------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -4254,7 +4877,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -4265,7 +4888,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -4276,7 +4899,7 @@ def regr_sxy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |4.666666666666...|
     +-----------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -4300,9 +4923,9 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    y : :class:`~pyspark.sql.Column` or str
+    y : :class:`~pyspark.sql.Column` or column name
         the dependent variable.
-    x : :class:`~pyspark.sql.Column` or str
+    x : :class:`~pyspark.sql.Column` or column name
         the independent variable.
 
     Returns
@@ -4310,9 +4933,19 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         REGR_COUNT(y, x) * VAR_POP(y) for non-null pairs in a group.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.regr_avgx`
+    :meth:`pyspark.sql.functions.regr_avgy`
+    :meth:`pyspark.sql.functions.regr_count`
+    :meth:`pyspark.sql.functions.regr_intercept`
+    :meth:`pyspark.sql.functions.regr_r2`
+    :meth:`pyspark.sql.functions.regr_slope`
+    :meth:`pyspark.sql.functions.regr_sxy`
+
     Examples
     --------
-    Example 1: All paris are non-null
+    Example 1: All pairs are non-null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, 2), (3, 3), (4, 4) AS tab(y, x)")
@@ -4323,7 +4956,7 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |           5.0|
     +--------------+
 
-    Example 2: All paris's x values are null
+    Example 2: All pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, null) AS tab(y, x)")
@@ -4334,7 +4967,7 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 3: All paris's y values are null
+    Example 3: All pairs' y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (null, 1) AS tab(y, x)")
@@ -4345,7 +4978,7 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |          NULL|
     +--------------+
 
-    Example 4: Some paris's x values are null
+    Example 4: Some pairs' x values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (3, 3), (4, 4) AS tab(y, x)")
@@ -4356,7 +4989,7 @@ def regr_syy(y: "ColumnOrName", x: "ColumnOrName") -> Column:
     |4.666666666666...|
     +-----------------+
 
-    Example 5: Some paris's x or y values are null
+    Example 5: Some pairs' x or y values are null
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1, 1), (2, null), (null, 3), (4, 4) AS tab(y, x)")
@@ -4379,9 +5012,13 @@ def every(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to check if all values are true.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.some`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -4431,7 +5068,7 @@ def bool_and(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to check if all values are true.
 
     Returns
@@ -4439,24 +5076,33 @@ def bool_and(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         true if all values of `col` are true, false otherwise.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bool_or`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[True], [True], [True]], ["flag"])
-    >>> df.select(bool_and("flag")).show()
+    >>> df.select(sf.bool_and("flag")).show()
     +--------------+
     |bool_and(flag)|
     +--------------+
     |          true|
     +--------------+
+
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[True], [False], [True]], ["flag"])
-    >>> df.select(bool_and("flag")).show()
+    >>> df.select(sf.bool_and("flag")).show()
     +--------------+
     |bool_and(flag)|
     +--------------+
     |         false|
     +--------------+
+
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[False], [False], [False]], ["flag"])
-    >>> df.select(bool_and("flag")).show()
+    >>> df.select(sf.bool_and("flag")).show()
     +--------------+
     |bool_and(flag)|
     +--------------+
@@ -4475,7 +5121,7 @@ def some(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to check if at least one value is true.
 
     Returns
@@ -4483,6 +5129,10 @@ def some(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         true if at least one value of `col` is true, false otherwise.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.every`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -4527,7 +5177,7 @@ def bool_or(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to check if at least one value is true.
 
     Returns
@@ -4535,6 +5185,10 @@ def bool_or(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         true if at least one value of `col` is true, false otherwise.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bool_and`
+
     Examples
     --------
     >>> df = spark.createDataFrame([[True], [True], [True]], ["flag"])
@@ -4571,7 +5225,7 @@ def bit_and(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -4579,6 +5233,11 @@ def bit_and(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the bitwise AND of all non-null input values, or null if none.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_or`
+    :meth:`pyspark.sql.functions.bit_xor`
+
     Examples
     --------
     Example 1: Bitwise AND with all non-null values
@@ -4639,7 +5298,7 @@ def bit_or(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -4647,6 +5306,11 @@ def bit_or(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the bitwise OR of all non-null input values, or null if none.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_and`
+    :meth:`pyspark.sql.functions.bit_xor`
+
     Examples
     --------
     Example 1: Bitwise OR with all non-null values
@@ -4707,7 +5371,7 @@ def bit_xor(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -4715,6 +5379,11 @@ def bit_xor(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the bitwise XOR of all non-null input values, or null if none.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bit_and`
+    :meth:`pyspark.sql.functions.bit_or`
+
     Examples
     --------
     Example 1: Bitwise XOR with all non-null values
@@ -4778,9 +5447,16 @@ def skewness(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.std`
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.kurtosis`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -4788,9 +5464,14 @@ def skewness(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
-    >>> df.select(skewness(df.c)).first()
-    Row(skewness(c)=0.70710...)
+    >>> df.select(sf.skewness(df.c)).show()
+    +------------------+
+    |       skewness(c)|
+    +------------------+
+    |0.7071067811865...|
+    +------------------+
     """
     return _invoke_function_over_columns("skewness", col)
 
@@ -4807,7 +5488,7 @@ def kurtosis(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -4815,10 +5496,18 @@ def kurtosis(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         kurtosis of given column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.std`
+    :meth:`pyspark.sql.functions.stddev`
+    :meth:`pyspark.sql.functions.variance`
+    :meth:`pyspark.sql.functions.skewness`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
-    >>> df.select(kurtosis(df.c)).show()
+    >>> df.select(sf.kurtosis(df.c)).show()
     +-----------+
     |kurtosis(c)|
     +-----------+
@@ -4841,9 +5530,14 @@ def collect_list(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column on which the function is computed.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.array_agg`
+    :meth:`pyspark.sql.functions.collect_set`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -4903,7 +5597,7 @@ def array_agg(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -4911,51 +5605,56 @@ def array_agg(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         list of objects with duplicates.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.collect_list`
+    :meth:`pyspark.sql.functions.collect_set`
+
     Examples
     --------
     Example 1: Using array_agg function on an int column
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([[1],[1],[2]], ["c"])
-    >>> df.agg(sf.sort_array(sf.array_agg('c'))).show()
-    +---------------------------------+
-    |sort_array(collect_list(c), true)|
-    +---------------------------------+
-    |                        [1, 1, 2]|
-    +---------------------------------+
+    >>> df.agg(sf.sort_array(sf.array_agg('c')).alias('sorted_list')).show()
+    +-----------+
+    |sorted_list|
+    +-----------+
+    |  [1, 1, 2]|
+    +-----------+
 
     Example 2: Using array_agg function on a string column
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([["apple"],["apple"],["banana"]], ["c"])
-    >>> df.agg(sf.sort_array(sf.array_agg('c'))).show(truncate=False)
-    +---------------------------------+
-    |sort_array(collect_list(c), true)|
-    +---------------------------------+
-    |[apple, apple, banana]           |
-    +---------------------------------+
+    >>> df.agg(sf.sort_array(sf.array_agg('c')).alias('sorted_list')).show(truncate=False)
+    +----------------------+
+    |sorted_list           |
+    +----------------------+
+    |[apple, apple, banana]|
+    +----------------------+
 
     Example 3: Using array_agg function on a column with null values
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([[1],[None],[2]], ["c"])
-    >>> df.agg(sf.sort_array(sf.array_agg('c'))).show()
-    +---------------------------------+
-    |sort_array(collect_list(c), true)|
-    +---------------------------------+
-    |                           [1, 2]|
-    +---------------------------------+
+    >>> df.agg(sf.sort_array(sf.array_agg('c')).alias('sorted_list')).show()
+    +-----------+
+    |sorted_list|
+    +-----------+
+    |     [1, 2]|
+    +-----------+
 
     Example 4: Using array_agg function on a column with different data types
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([[1],["apple"],[2]], ["c"])
-    >>> df.agg(sf.sort_array(sf.array_agg('c'))).show()
-    +---------------------------------+
-    |sort_array(collect_list(c), true)|
-    +---------------------------------+
-    |                    [1, 2, apple]|
-    +---------------------------------+
+    >>> df.agg(sf.sort_array(sf.array_agg('c')).alias('sorted_list')).show()
+    +-------------+
+    |  sorted_list|
+    +-------------+
+    |[1, 2, apple]|
+    +-------------+
     """
     return _invoke_function_over_columns("array_agg", col)
 
@@ -4973,7 +5672,7 @@ def collect_set(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column on which the function is computed.
 
     Returns
@@ -4981,6 +5680,11 @@ def collect_set(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         A new Column object representing a set of collected values, duplicates excluded.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.array_agg`
+    :meth:`pyspark.sql.functions.collect_list`
+
     Notes
     -----
     This function is non-deterministic as the order of collected results depends
@@ -5039,7 +5743,7 @@ def degrees(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in radians
 
     Returns
@@ -5050,12 +5754,17 @@ def degrees(col: "ColumnOrName") -> Column:
     Examples
     --------
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(1).select(sf.degrees(sf.pi())).show()
-    +-------------+
-    |DEGREES(PI())|
-    +-------------+
-    |        180.0|
-    +-------------+
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (0.0), (PI()), (PI() / 2), (PI() / 4) AS TAB(value)"
+    ... ).select("*", sf.degrees("value")).show()
+    +------------------+--------------+
+    |             value|DEGREES(value)|
+    +------------------+--------------+
+    |               0.0|           0.0|
+    | 3.141592653589...|         180.0|
+    |1.5707963267948...|          90.0|
+    |0.7853981633974...|          45.0|
+    +------------------+--------------+
     """
     return _invoke_function_over_columns("degrees", col)
 
@@ -5073,7 +5782,7 @@ def radians(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         angle in degrees
 
     Returns
@@ -5083,9 +5792,18 @@ def radians(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(radians(lit(180))).first()
-    Row(RADIANS(180)=3.14159...)
+    >>> from pyspark.sql import functions as sf
+    >>> spark.sql(
+    ...     "SELECT * FROM VALUES (180), (90), (45), (0) AS TAB(value)"
+    ... ).select("*", sf.radians("value")).show()
+    +-----+------------------+
+    |value|    RADIANS(value)|
+    +-----+------------------+
+    |  180| 3.141592653589...|
+    |   90|1.5707963267948...|
+    |   45|0.7853981633974...|
+    |    0|               0.0|
+    +-----+------------------+
     """
     return _invoke_function_over_columns("radians", col)
 
@@ -5103,9 +5821,9 @@ def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]
 
     Parameters
     ----------
-    col1 : str, :class:`~pyspark.sql.Column` or float
+    col1 : :class:`~pyspark.sql.Column`, column name or float
         coordinate on y-axis
-    col2 : str, :class:`~pyspark.sql.Column` or float
+    col2 : :class:`~pyspark.sql.Column`, column name or float
         coordinate on x-axis
 
     Returns
@@ -5119,9 +5837,13 @@ def atan2(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(atan2(lit(1), lit(2))).first()
-    Row(ATAN2(1, 2)=0.46364...)
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.atan2(sf.lit(1), sf.lit(2))).show()
+    +------------------+
+    |       ATAN2(1, 2)|
+    +------------------+
+    |0.4636476090008...|
+    +------------------+
     """
     return _invoke_binary_math_function("atan2", col1, col2)
 
@@ -5138,9 +5860,9 @@ def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]
 
     Parameters
     ----------
-    col1 : str, :class:`~pyspark.sql.Column` or float
+    col1 : :class:`~pyspark.sql.Column`, column name or float
         a leg.
-    col2 : str, :class:`~pyspark.sql.Column` or float
+    col2 : :class:`~pyspark.sql.Column`, column name or float
         b leg.
 
     Returns
@@ -5150,9 +5872,13 @@ def hypot(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float]
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(hypot(lit(1), lit(2))).first()
-    Row(HYPOT(1, 2)=2.23606...)
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.hypot(sf.lit(1), sf.lit(2))).show()
+    +----------------+
+    |     HYPOT(1, 2)|
+    +----------------+
+    |2.23606797749...|
+    +----------------+
     """
     return _invoke_binary_math_function("hypot", col1, col2)
 
@@ -5169,9 +5895,9 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float])
 
     Parameters
     ----------
-    col1 : str, :class:`~pyspark.sql.Column` or float
+    col1 : :class:`~pyspark.sql.Column`, column name or float
         the base number.
-    col2 : str, :class:`~pyspark.sql.Column` or float
+    col2 : :class:`~pyspark.sql.Column`, column name or float
         the exponent number.
 
     Returns
@@ -5181,9 +5907,17 @@ def pow(col1: Union["ColumnOrName", float], col2: Union["ColumnOrName", float])
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(pow(lit(3), lit(2))).first()
-    Row(POWER(3, 2)=9.0)
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(5).select("*", sf.pow("id", 2)).show()
+    +---+------------+
+    | id|POWER(id, 2)|
+    +---+------------+
+    |  0|         0.0|
+    |  1|         1.0|
+    |  2|         4.0|
+    |  3|         9.0|
+    |  4|        16.0|
+    +---+------------+
     """
     return _invoke_binary_math_function("pow", col1, col2)
 
@@ -5200,9 +5934,9 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName",
 
     Parameters
     ----------
-    dividend : str, :class:`~pyspark.sql.Column` or float
+    dividend : :class:`~pyspark.sql.Column`, column name or float
         the column that contains dividend, or the specified dividend value
-    divisor : str, :class:`~pyspark.sql.Column` or float
+    divisor : :class:`~pyspark.sql.Column`, column name or float
         the column that contains divisor, or the specified divisor value
 
     Returns
@@ -5216,26 +5950,26 @@ def pmod(dividend: Union["ColumnOrName", float], divisor: Union["ColumnOrName",
 
     Examples
     --------
-    >>> from pyspark.sql.functions import pmod
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([
     ...     (1.0, float('nan')), (float('nan'), 2.0), (10.0, 3.0),
     ...     (float('nan'), float('nan')), (-3.0, 4.0), (-10.0, 3.0),
     ...     (-5.0, -6.0), (7.0, -8.0), (1.0, 2.0)],
     ...     ("a", "b"))
-    >>> df.select(pmod("a", "b")).show()
-    +----------+
-    |pmod(a, b)|
-    +----------+
-    |       NaN|
-    |       NaN|
-    |       1.0|
-    |       NaN|
-    |       1.0|
-    |       2.0|
-    |      -5.0|
-    |       7.0|
-    |       1.0|
-    +----------+
+    >>> df.select("*", sf.pmod("a", "b")).show()
+    +-----+----+----------+
+    |    a|   b|pmod(a, b)|
+    +-----+----+----------+
+    |  1.0| NaN|       NaN|
+    |  NaN| 2.0|       NaN|
+    | 10.0| 3.0|       1.0|
+    |  NaN| NaN|       NaN|
+    | -3.0| 4.0|       1.0|
+    |-10.0| 3.0|       2.0|
+    | -5.0|-6.0|      -5.0|
+    |  7.0|-8.0|       7.0|
+    |  1.0| 2.0|       1.0|
+    +-----+----+----------+
     """
     return _invoke_binary_math_function("pmod", dividend, divisor)
 
@@ -5256,13 +5990,13 @@ def width_bucket(
 
     Parameters
     ----------
-    v : str or :class:`~pyspark.sql.Column`
+    v : :class:`~pyspark.sql.Column` or column name
         value to compute a bucket number in the histogram
-    min : str or :class:`~pyspark.sql.Column`
+    min : :class:`~pyspark.sql.Column` or column name
         minimum value of the histogram
-    max : str or :class:`~pyspark.sql.Column`
+    max : :class:`~pyspark.sql.Column` or column name
         maximum value of the histogram
-    numBucket : str, :class:`~pyspark.sql.Column` or int
+    numBucket : :class:`~pyspark.sql.Column`, column name or int
         the number of buckets
 
     Returns
@@ -5272,22 +6006,24 @@ def width_bucket(
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([
     ...     (5.3, 0.2, 10.6, 5),
     ...     (-2.1, 1.3, 3.4, 3),
     ...     (8.1, 0.0, 5.7, 4),
     ...     (-0.9, 5.2, 0.5, 2)],
     ...     ['v', 'min', 'max', 'n'])
-    >>> df.select(width_bucket('v', 'min', 'max', 'n')).show()
-    +----------------------------+
-    |width_bucket(v, min, max, n)|
-    +----------------------------+
-    |                           3|
-    |                           0|
-    |                           5|
-    |                           3|
-    +----------------------------+
-    """
+    >>> df.select("*", sf.width_bucket('v', 'min', 'max', 'n')).show()
+    +----+---+----+---+----------------------------+
+    |   v|min| max|  n|width_bucket(v, min, max, n)|
+    +----+---+----+---+----------------------------+
+    | 5.3|0.2|10.6|  5|                           3|
+    |-2.1|1.3| 3.4|  3|                           0|
+    | 8.1|0.0| 5.7|  4|                           5|
+    |-0.9|5.2| 0.5|  2|                           3|
+    +----+---+----+---+----------------------------+
+    """
+    numBucket = _enum_to_value(numBucket)
     numBucket = lit(numBucket) if isinstance(numBucket, int) else numBucket
     return _invoke_function_over_columns("width_bucket", v, min, max, numBucket)
 
@@ -5309,10 +6045,11 @@ def row_number() -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> from pyspark.sql import Window
     >>> df = spark.range(3)
     >>> w = Window.orderBy(df.id.desc())
-    >>> df.withColumn("desc_order", row_number().over(w)).show()
+    >>> df.withColumn("desc_order", sf.row_number().over(w)).show()
     +---+----------+
     | id|desc_order|
     +---+----------+
@@ -5349,10 +6086,11 @@ def dense_rank() -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Window, types
-    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], "int")
     >>> w = Window.orderBy("value")
-    >>> df.withColumn("drank", dense_rank().over(w)).show()
+    >>> df.withColumn("drank", sf.dense_rank().over(w)).show()
     +-----+-----+
     |value|drank|
     +-----+-----+
@@ -5392,10 +6130,11 @@ def rank() -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Window, types
-    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], "int")
     >>> w = Window.orderBy("value")
-    >>> df.withColumn("drank", rank().over(w)).show()
+    >>> df.withColumn("drank", sf.rank().over(w)).show()
     +-----+-----+
     |value|drank|
     +-----+-----+
@@ -5428,10 +6167,11 @@ def cume_dist() -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Window, types
-    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], types.IntegerType())
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], "int")
     >>> w = Window.orderBy("value")
-    >>> df.withColumn("cd", cume_dist().over(w)).show()
+    >>> df.withColumn("cd", sf.cume_dist().over(w)).show()
     +-----+---+
     |value| cd|
     +-----+---+
@@ -5462,10 +6202,11 @@ def percent_rank() -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Window, types
-    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], types.IntegerType())
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql import Window
+    >>> df = spark.createDataFrame([1, 1, 2, 3, 3, 4], "int")
     >>> w = Window.orderBy("value")
-    >>> df.withColumn("pr", percent_rank().over(w)).show()
+    >>> df.withColumn("pr", sf.percent_rank().over(w)).show()
     +-----+---+
     |value| pr|
     +-----+---+
@@ -5511,7 +6252,7 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The label of the column to count distinct values in.
     rsd : float, optional
         The maximum allowed relative standard deviation (default = 0.05).
@@ -5530,47 +6271,46 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
     --------
     Example 1: Counting distinct values in a single column DataFrame representing integers
 
-    >>> from pyspark.sql.functions import approx_count_distinct
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([1,2,2,3], "int")
-    >>> df.agg(approx_count_distinct("value").alias('distinct_values')).show()
-    +---------------+
-    |distinct_values|
-    +---------------+
-    |              3|
-    +---------------+
+    >>> df.agg(sf.approx_count_distinct("value")).show()
+    +----------------------------+
+    |approx_count_distinct(value)|
+    +----------------------------+
+    |                           3|
+    +----------------------------+
 
     Example 2: Counting distinct values in a single column DataFrame representing strings
 
-    >>> from pyspark.sql.functions import approx_count_distinct
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("apple",), ("orange",), ("apple",), ("banana",)], ['fruit'])
-    >>> df.agg(approx_count_distinct("fruit").alias('distinct_fruits')).show()
-    +---------------+
-    |distinct_fruits|
-    +---------------+
-    |              3|
-    +---------------+
+    >>> df.agg(sf.approx_count_distinct("fruit")).show()
+    +----------------------------+
+    |approx_count_distinct(fruit)|
+    +----------------------------+
+    |                           3|
+    +----------------------------+
 
     Example 3: Counting distinct values in a DataFrame with multiple columns
 
-    >>> from pyspark.sql.functions import approx_count_distinct, struct
-    >>> df = spark.createDataFrame([("Alice", 1),
-    ...                             ("Alice", 2),
-    ...                             ("Bob", 3),
-    ...                             ("Bob", 3)], ["name", "value"])
-    >>> df = df.withColumn("combined", struct("name", "value"))
-    >>> df.agg(approx_count_distinct("combined").alias('distinct_pairs')).show()
-    +--------------+
-    |distinct_pairs|
-    +--------------+
-    |             3|
-    +--------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [("Alice", 1), ("Alice", 2), ("Bob", 3), ("Bob", 3)], ["name", "value"])
+    >>> df = df.withColumn("combined", sf.struct("name", "value"))
+    >>> df.agg(sf.approx_count_distinct(df.combined)).show()
+    +-------------------------------+
+    |approx_count_distinct(combined)|
+    +-------------------------------+
+    |                              3|
+    +-------------------------------+
 
     Example 4: Counting distinct values with a specified relative standard deviation
 
-    >>> from pyspark.sql.functions import approx_count_distinct
-    >>> df = spark.range(100000)
-    >>> df.agg(approx_count_distinct("id").alias('with_default_rsd'),
-    ...        approx_count_distinct("id", 0.1).alias('with_rsd_0.1')).show()
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(100000).agg(
+    ...     sf.approx_count_distinct("id").alias('with_default_rsd'),
+    ...     sf.approx_count_distinct("id", 0.1).alias('with_rsd_0.1')
+    ... ).show()
     +----------------+------------+
     |with_default_rsd|with_rsd_0.1|
     +----------------+------------+
@@ -5582,11 +6322,11 @@ def approx_count_distinct(col: "ColumnOrName", rsd: Optional[float] = None) -> C
     if rsd is None:
         return _invoke_function_over_columns("approx_count_distinct", col)
     else:
-        return _invoke_function("approx_count_distinct", _to_java_column(col), rsd)
+        return _invoke_function("approx_count_distinct", _to_java_column(col), _enum_to_value(rsd))
 
 
 @_try_remote_functions
-def broadcast(df: DataFrame) -> DataFrame:
+def broadcast(df: "ParentDataFrame") -> "ParentDataFrame":
     """
     Marks a DataFrame as small enough for use in broadcast joins.
 
@@ -5602,10 +6342,10 @@ def broadcast(df: DataFrame) -> DataFrame:
 
     Examples
     --------
-    >>> from pyspark.sql import types
-    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], types.IntegerType())
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([1, 2, 3, 3, 4], "int")
     >>> df_small = spark.range(3)
-    >>> df_b = broadcast(df_small)
+    >>> df_b = sf.broadcast(df_small)
     >>> df.join(df_b, df.value == df_small.id).show()
     +-----+---+
     |value| id|
@@ -5617,7 +6357,7 @@ def broadcast(df: DataFrame) -> DataFrame:
     from py4j.java_gateway import JVMView
 
     sc = _get_active_spark_context()
-    return DataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession)
+    return ParentDataFrame(cast(JVMView, sc._jvm).functions.broadcast(df._jdf), df.sparkSession)
 
 
 @_try_remote_functions
@@ -5631,7 +6371,7 @@ def coalesce(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         list of columns to work on.
 
     Returns
@@ -5641,8 +6381,9 @@ def coalesce(*cols: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> cDf = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
-    >>> cDf.show()
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(None, None), (1, None), (None, 2)], ("a", "b"))
+    >>> df.show()
     +----+----+
     |   a|   b|
     +----+----+
@@ -5651,16 +6392,16 @@ def coalesce(*cols: "ColumnOrName") -> Column:
     |NULL|   2|
     +----+----+
 
-    >>> cDf.select(coalesce(cDf["a"], cDf["b"])).show()
-    +--------------+
-    |coalesce(a, b)|
-    +--------------+
-    |          NULL|
-    |             1|
-    |             2|
-    +--------------+
+    >>> df.select('*', sf.coalesce("a", df["b"])).show()
+    +----+----+--------------+
+    |   a|   b|coalesce(a, b)|
+    +----+----+--------------+
+    |NULL|NULL|          NULL|
+    |   1|NULL|             1|
+    |NULL|   2|             2|
+    +----+----+--------------+
 
-    >>> cDf.select('*', coalesce(cDf["a"], lit(0.0))).show()
+    >>> df.select('*', sf.coalesce(df["a"], lit(0.0))).show()
     +----+----+----------------+
     |   a|   b|coalesce(a, 0.0)|
     +----+----+----------------+
@@ -5684,9 +6425,9 @@ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col1 : :class:`~pyspark.sql.Column` or str
+    col1 : :class:`~pyspark.sql.Column` or column name
         first column to calculate correlation.
-    col2 : :class:`~pyspark.sql.Column` or str
+    col2 : :class:`~pyspark.sql.Column` or column name
         second column to calculate correlation.
 
     Returns
@@ -5696,11 +6437,16 @@ def corr(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> a = range(20)
     >>> b = [2 * x for x in range(20)]
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
-    >>> df.agg(corr("a", "b").alias('c')).collect()
-    [Row(c=1.0)]
+    >>> df.agg(sf.corr("a", df.b)).show()
+    +----------+
+    |corr(a, b)|
+    +----------+
+    |       1.0|
+    +----------+
     """
     return _invoke_function_over_columns("corr", col1, col2)
 
@@ -5717,9 +6463,9 @@ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col1 : :class:`~pyspark.sql.Column` or str
+    col1 : :class:`~pyspark.sql.Column` or column name
         first column to calculate covariance.
-    col2 : :class:`~pyspark.sql.Column` or str
+    col2 : :class:`~pyspark.sql.Column` or column name
         second column to calculate covariance.
 
     Returns
@@ -5727,13 +6473,22 @@ def covar_pop(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         covariance of these two column values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.covar_samp`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> a = [1] * 10
     >>> b = [1] * 10
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
-    >>> df.agg(covar_pop("a", "b").alias('c')).collect()
-    [Row(c=0.0)]
+    >>> df.agg(sf.covar_pop("a", df.b)).show()
+    +---------------+
+    |covar_pop(a, b)|
+    +---------------+
+    |            0.0|
+    +---------------+
     """
     return _invoke_function_over_columns("covar_pop", col1, col2)
 
@@ -5750,9 +6505,9 @@ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col1 : :class:`~pyspark.sql.Column` or str
+    col1 : :class:`~pyspark.sql.Column` or column name
         first column to calculate covariance.
-    col2 : :class:`~pyspark.sql.Column` or str
+    col2 : :class:`~pyspark.sql.Column` or column name
         second column to calculate covariance.
 
     Returns
@@ -5760,13 +6515,22 @@ def covar_samp(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         sample covariance of these two column values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.covar_pop`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> a = [1] * 10
     >>> b = [1] * 10
     >>> df = spark.createDataFrame(zip(a, b), ["a", "b"])
-    >>> df.agg(covar_samp("a", "b").alias('c')).collect()
-    [Row(c=0.0)]
+    >>> df.agg(sf.covar_samp("a", df.b)).show()
+    +----------------+
+    |covar_samp(a, b)|
+    +----------------+
+    |             0.0|
+    +----------------+
     """
     return _invoke_function_over_columns("covar_samp", col1, col2)
 
@@ -5815,9 +6579,9 @@ def count_distinct(col: "ColumnOrName", *cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         first column to compute on.
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         other columns to compute on.
 
     Returns
@@ -5887,7 +6651,7 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to fetch first value for.
     ignorenulls : bool
         if first value is null then look for first non-null value. ``False``` by default.
@@ -5899,9 +6663,10 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5), ("Alice", None)], ("name", "age"))
     >>> df = df.orderBy(df.age)
-    >>> df.groupby("name").agg(first("age")).orderBy("name").show()
+    >>> df.groupby("name").agg(sf.first("age")).orderBy("name").show()
     +-----+----------+
     | name|first(age)|
     +-----+----------+
@@ -5911,7 +6676,7 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     To ignore any null values, set ``ignorenulls`` to `True`
 
-    >>> df.groupby("name").agg(first("age", ignorenulls=True)).orderBy("name").show()
+    >>> df.groupby("name").agg(sf.first("age", ignorenulls=True)).orderBy("name").show()
     +-----+----------+
     | name|first(age)|
     +-----+----------+
@@ -5921,7 +6686,7 @@ def first(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("first", _to_java_column(col), ignorenulls)
+    return _invoke_function("first", _to_java_column(col), _enum_to_value(ignorenulls))
 
 
 @_try_remote_functions
@@ -5937,7 +6702,7 @@ def grouping(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to check if it's aggregated.
 
     Returns
@@ -5947,8 +6712,9 @@ def grouping(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
-    >>> df.cube("name").agg(grouping("name"), sum("age")).orderBy("name").show()
+    >>> df.cube("name").agg(sf.grouping("name"), sf.sum("age")).orderBy("name").show()
     +-----+--------------+--------+
     | name|grouping(name)|sum(age)|
     +-----+--------------+--------+
@@ -5979,7 +6745,7 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         columns to check for.
 
     Returns
@@ -5989,10 +6755,10 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(1, "a", "a"),
-    ...                             (3, "a", "a"),
-    ...                             (4, "b", "c")], ["c1", "c2", "c3"])
-    >>> df.cube("c2", "c3").agg(grouping_id(), sum("c1")).orderBy("c2", "c3").show()
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(1, "a", "a"), (3, "a", "a"), (4, "b", "c")], ["c1", "c2", "c3"])
+    >>> df.cube("c2", "c3").agg(sf.grouping_id(), sf.sum("c1")).orderBy("c2", "c3").show()
     +----+----+-------------+-------+
     |  c2|  c3|grouping_id()|sum(c1)|
     +----+----+-------------+-------+
@@ -6011,9 +6777,9 @@ def grouping_id(*cols: "ColumnOrName") -> Column:
 @_try_remote_functions
 def count_min_sketch(
     col: "ColumnOrName",
-    eps: "ColumnOrName",
-    confidence: "ColumnOrName",
-    seed: "ColumnOrName",
+    eps: Union[Column, float],
+    confidence: Union[Column, float],
+    seed: Optional[Union[Column, int]] = None,
 ) -> Column:
     """
     Returns a count-min sketch of a column with the given esp, confidence and seed.
@@ -6025,15 +6791,26 @@ def count_min_sketch(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
-    eps : :class:`~pyspark.sql.Column` or str
+    eps : :class:`~pyspark.sql.Column` or float
         relative error, must be positive
-    confidence : :class:`~pyspark.sql.Column` or str
+
+        .. versionchanged:: 4.0.0
+            `eps` now accepts float value.
+
+    confidence : :class:`~pyspark.sql.Column` or float
         confidence, must be positive and less than 1.0
-    seed : :class:`~pyspark.sql.Column` or str
+
+        .. versionchanged:: 4.0.0
+            `confidence` now accepts float value.
+
+    seed : :class:`~pyspark.sql.Column` or int, optional
         random seed
 
+        .. versionchanged:: 4.0.0
+            `seed` now accepts int value.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -6041,12 +6818,60 @@ def count_min_sketch(
 
     Examples
     --------
-    >>> df = spark.createDataFrame([[1], [2], [1]], ['data'])
-    >>> df = df.agg(count_min_sketch(df.data, lit(0.5), lit(0.5), lit(1)).alias('sketch'))
-    >>> df.select(hex(df.sketch).alias('r')).collect()
-    [Row(r='0000000100000000000000030000000100000004000000005D8D6AB90000000000000000000000000000000200000000000000010000000000000000')]
-    """
-    return _invoke_function_over_columns("count_min_sketch", col, eps, confidence, seed)
+    Example 1: Using columns as arguments
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(100).select(
+    ...     sf.hex(sf.count_min_sketch(sf.col("id"), sf.lit(3.0), sf.lit(0.1), sf.lit(1)))
+    ... ).show(truncate=False)
+    +------------------------------------------------------------------------+
+    |hex(count_min_sketch(id, 3.0, 0.1, 1))                                  |
+    +------------------------------------------------------------------------+
+    |0000000100000000000000640000000100000001000000005D8D6AB90000000000000064|
+    +------------------------------------------------------------------------+
+
+    Example 2: Using numbers as arguments
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(100).select(
+    ...     sf.hex(sf.count_min_sketch("id", 1.0, 0.3, 2))
+    ... ).show(truncate=False)
+    +----------------------------------------------------------------------------------------+
+    |hex(count_min_sketch(id, 1.0, 0.3, 2))                                                  |
+    +----------------------------------------------------------------------------------------+
+    |0000000100000000000000640000000100000002000000005D96391C00000000000000320000000000000032|
+    +----------------------------------------------------------------------------------------+
+
+    Example 3: Using a long seed
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(100).select(
+    ...     sf.hex(sf.count_min_sketch("id", sf.lit(1.5), 0.2, 1111111111111111111))
+    ... ).show(truncate=False)
+    +----------------------------------------------------------------------------------------+
+    |hex(count_min_sketch(id, 1.5, 0.2, 1111111111111111111))                                |
+    +----------------------------------------------------------------------------------------+
+    |00000001000000000000006400000001000000020000000044078BA100000000000000320000000000000032|
+    +----------------------------------------------------------------------------------------+
+
+    Example 4: Using a random seed
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(100).select(
+    ...     sf.hex(sf.count_min_sketch("id", sf.lit(1.5), 0.6))
+    ... ).show(truncate=False) # doctest: +SKIP
+    +----------------------------------------------------------------------------------------------------------------------------------------+
+    |hex(count_min_sketch(id, 1.5, 0.6, 2120704260))                                                                                         |
+    +----------------------------------------------------------------------------------------------------------------------------------------+
+    |0000000100000000000000640000000200000002000000005ADECCEE00000000153EBE090000000000000033000000000000003100000000000000320000000000000032|
+    +----------------------------------------------------------------------------------------------------------------------------------------+
+    """  # noqa: E501
+    _eps = lit(eps)
+    _conf = lit(confidence)
+    if seed is None:
+        return _invoke_function_over_columns("count_min_sketch", col, _eps, _conf)
+    else:
+        return _invoke_function_over_columns("count_min_sketch", col, _eps, _conf, lit(seed))
 
 
 @_try_remote_functions
@@ -6064,12 +6889,18 @@ def input_file_name() -> Column:
     :class:`~pyspark.sql.Column`
         file names.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.input_file_block_length`
+    :meth:`pyspark.sql.functions.input_file_block_start`
+
     Examples
     --------
     >>> import os
+    >>> from pyspark.sql import functions as sf
     >>> path = os.path.abspath(__file__)
     >>> df = spark.read.text(path)
-    >>> df.select(input_file_name()).first()
+    >>> df.select(sf.input_file_name()).first()
     Row(input_file_name()='file:///...')
     """
     return _invoke_function("input_file_name")
@@ -6086,7 +6917,7 @@ def isnan(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -6096,14 +6927,15 @@ def isnan(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
-    >>> df.select("a", "b", isnan("a").alias("r1"), isnan(df.b).alias("r2")).show()
-    +---+---+-----+-----+
-    |  a|  b|   r1|   r2|
-    +---+---+-----+-----+
-    |1.0|NaN|false| true|
-    |NaN|2.0| true|false|
-    +---+---+-----+-----+
+    >>> df.select("*", sf.isnan("a"), sf.isnan(df.b)).show()
+    +---+---+--------+--------+
+    |  a|  b|isnan(a)|isnan(b)|
+    +---+---+--------+--------+
+    |1.0|NaN|   false|    true|
+    |NaN|2.0|    true|   false|
+    +---+---+--------+--------+
     """
     return _invoke_function_over_columns("isnan", col)
 
@@ -6119,7 +6951,7 @@ def isnull(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -6129,14 +6961,15 @@ def isnull(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1, None), (None, 2)], ("a", "b"))
-    >>> df.select("a", "b", isnull("a").alias("r1"), isnull(df.b).alias("r2")).show()
-    +----+----+-----+-----+
-    |   a|   b|   r1|   r2|
-    +----+----+-----+-----+
-    |   1|NULL|false| true|
-    |NULL|   2| true|false|
-    +----+----+-----+-----+
+    >>> df.select("*", sf.isnull("a"), isnull(df.b)).show()
+    +----+----+-----------+-----------+
+    |   a|   b|(a IS NULL)|(b IS NULL)|
+    +----+----+-----------+-----------+
+    |   1|NULL|      false|       true|
+    |NULL|   2|       true|      false|
+    +----+----+-----------+-----------+
     """
     return _invoke_function_over_columns("isnull", col)
 
@@ -6160,7 +6993,7 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column to fetch last value for.
     ignorenulls : bool
         if last value is null then look for non-null value. ``False``` by default.
@@ -6172,9 +7005,10 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5), ("Alice", None)], ("name", "age"))
     >>> df = df.orderBy(df.age.desc())
-    >>> df.groupby("name").agg(last("age")).orderBy("name").show()
+    >>> df.groupby("name").agg(sf.last("age")).orderBy("name").show()
     +-----+---------+
     | name|last(age)|
     +-----+---------+
@@ -6184,7 +7018,7 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
 
     To ignore any null values, set ``ignorenulls`` to `True`
 
-    >>> df.groupby("name").agg(last("age", ignorenulls=True)).orderBy("name").show()
+    >>> df.groupby("name").agg(sf.last("age", ignorenulls=True)).orderBy("name").show()
     +-----+---------+
     | name|last(age)|
     +-----+---------+
@@ -6194,7 +7028,7 @@ def last(col: "ColumnOrName", ignorenulls: bool = False) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("last", _to_java_column(col), ignorenulls)
+    return _invoke_function("last", _to_java_column(col), _enum_to_value(ignorenulls))
 
 
 @_try_remote_functions
@@ -6227,21 +7061,24 @@ def monotonically_increasing_id() -> Column:
     Examples
     --------
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(0, 10, 1, 2).select(sf.monotonically_increasing_id()).show()
-    +-----------------------------+
-    |monotonically_increasing_id()|
-    +-----------------------------+
-    |                            0|
-    |                            1|
-    |                            2|
-    |                            3|
-    |                            4|
-    |                   8589934592|
-    |                   8589934593|
-    |                   8589934594|
-    |                   8589934595|
-    |                   8589934596|
-    +-----------------------------+
+    >>> spark.range(0, 10, 1, 2).select(
+    ...     "*",
+    ...     sf.spark_partition_id(),
+    ...     sf.monotonically_increasing_id()).show()
+    +---+--------------------+-----------------------------+
+    | id|SPARK_PARTITION_ID()|monotonically_increasing_id()|
+    +---+--------------------+-----------------------------+
+    |  0|                   0|                            0|
+    |  1|                   0|                            1|
+    |  2|                   0|                            2|
+    |  3|                   0|                            3|
+    |  4|                   0|                            4|
+    |  5|                   1|                   8589934592|
+    |  6|                   1|                   8589934593|
+    |  7|                   1|                   8589934594|
+    |  8|                   1|                   8589934595|
+    |  9|                   1|                   8589934596|
+    +---+--------------------+-----------------------------+
     """
     return _invoke_function("monotonically_increasing_id")
 
@@ -6259,9 +7096,9 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col1 : :class:`~pyspark.sql.Column` or str
+    col1 : :class:`~pyspark.sql.Column` or column name
         first column to check.
-    col2 : :class:`~pyspark.sql.Column` or str
+    col2 : :class:`~pyspark.sql.Column` or column name
         second column to return if first is NaN.
 
     Returns
@@ -6271,9 +7108,15 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1.0, float('nan')), (float('nan'), 2.0)], ("a", "b"))
-    >>> df.select(nanvl("a", "b").alias("r1"), nanvl(df.a, df.b).alias("r2")).collect()
-    [Row(r1=1.0, r2=1.0), Row(r1=2.0, r2=2.0)]
+    >>> df.select("*", sf.nanvl("a", "b"), sf.nanvl(df.a, df.b)).show()
+    +---+---+-----------+-----------+
+    |  a|  b|nanvl(a, b)|nanvl(a, b)|
+    +---+---+-----------+-----------+
+    |1.0|NaN|        1.0|        1.0|
+    |NaN|2.0|        2.0|        2.0|
+    +---+---+-----------+-----------+
     """
     return _invoke_function_over_columns("nanvl", col1, col2)
 
@@ -6281,7 +7124,7 @@ def nanvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
 @_try_remote_functions
 def percentile(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
     frequency: Union[Column, int] = 1,
 ) -> Column:
     """Returns the exact percentile(s) of numeric column `expr` at the given percentage(s)
@@ -6291,7 +7134,7 @@ def percentile(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str input column.
+    col : :class:`~pyspark.sql.Column` or column name
     percentage : :class:`~pyspark.sql.Column`, float, list of floats or tuple of floats
         percentage in decimal (must be between 0.0 and 1.0).
     frequency : :class:`~pyspark.sql.Column` or int is a positive numeric literal which
@@ -6302,30 +7145,37 @@ def percentile(
     :class:`~pyspark.sql.Column`
         the exact `percentile` of the numeric column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.median`
+    :meth:`pyspark.sql.functions.approx_percentile`
+    :meth:`pyspark.sql.functions.percentile_approx`
+
     Examples
     --------
-    >>> key = (col("id") % 3).alias("key")
-    >>> value = (randn(42) + key * 10).alias("value")
+    >>> from pyspark.sql import functions as sf
+    >>> key = (sf.col("id") % 3).alias("key")
+    >>> value = (sf.randn(42) + key * 10).alias("value")
     >>> df = spark.range(0, 1000, 1, 1).select(key, value)
     >>> df.select(
-    ...     percentile("value", [0.25, 0.5, 0.75], lit(1)).alias("quantiles")
-    ... ).show()
-    +--------------------+
-    |           quantiles|
-    +--------------------+
-    |[0.74419914941216...|
-    +--------------------+
+    ...     sf.percentile("value", [0.25, 0.5, 0.75], sf.lit(1))
+    ... ).show(truncate=False)
+    +--------------------------------------------------------+
+    |percentile(value, array(0.25, 0.5, 0.75), 1)            |
+    +--------------------------------------------------------+
+    |[0.7441991494121..., 9.9900713756..., 19.33740203080...]|
+    +--------------------------------------------------------+
 
     >>> df.groupBy("key").agg(
-    ...     percentile("value", 0.5, lit(1)).alias("median")
-    ... ).show()
-    +---+--------------------+
-    |key|              median|
-    +---+--------------------+
-    |  0|-0.03449962216667901|
-    |  1|   9.990389751837329|
-    |  2|  19.967859769284075|
-    +---+--------------------+
+    ...     sf.percentile("value", sf.lit(0.5), sf.lit(1))
+    ... ).sort("key").show()
+    +---+-------------------------+
+    |key|percentile(value, 0.5, 1)|
+    +---+-------------------------+
+    |  0|     -0.03449962216667901|
+    |  1|        9.990389751837329|
+    |  2|       19.967859769284075|
+    +---+-------------------------+
     """
     percentage = lit(list(percentage)) if isinstance(percentage, (list, tuple)) else lit(percentage)
     return _invoke_function_over_columns("percentile", col, percentage, lit(frequency))
@@ -6334,8 +7184,8 @@ def percentile(
 @_try_remote_functions
 def percentile_approx(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
-    accuracy: Union[Column, float] = 10000,
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
+    accuracy: Union[Column, int] = 10000,
 ) -> Column:
     """Returns the approximate `percentile` of the numeric column `col` which is the smallest value
     in the ordered `col` values (sorted from least to greatest) such that no more than `percentage`
@@ -6349,14 +7199,14 @@ def percentile_approx(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column.
     percentage : :class:`~pyspark.sql.Column`, float, list of floats or tuple of floats
         percentage in decimal (must be between 0.0 and 1.0).
         When percentage is an array, each value of the percentage array must be between 0.0 and 1.0.
         In this case, returns the approximate percentile array of column col
         at the given percentage array.
-    accuracy : :class:`~pyspark.sql.Column` or float
+    accuracy : :class:`~pyspark.sql.Column` or int
         is a positive numeric literal which controls approximation accuracy
         at the cost of memory. Higher value of accuracy yields better accuracy,
         1.0/accuracy is the relative error of the approximation. (default: 10000).
@@ -6366,24 +7216,37 @@ def percentile_approx(
     :class:`~pyspark.sql.Column`
         approximate `percentile` of the numeric column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.median`
+    :meth:`pyspark.sql.functions.percentile`
+    :meth:`pyspark.sql.functions.approx_percentile`
+
     Examples
     --------
-    >>> key = (col("id") % 3).alias("key")
-    >>> value = (randn(42) + key * 10).alias("value")
+    >>> from pyspark.sql import functions as sf
+    >>> key = (sf.col("id") % 3).alias("key")
+    >>> value = (sf.randn(42) + key * 10).alias("value")
     >>> df = spark.range(0, 1000, 1, 1).select(key, value)
     >>> df.select(
-    ...     percentile_approx("value", [0.25, 0.5, 0.75], 1000000).alias("quantiles")
-    ... ).printSchema()
-    root
-     |-- quantiles: array (nullable = true)
-     |    |-- element: double (containsNull = false)
+    ...     sf.percentile_approx("value", [0.25, 0.5, 0.75], 1000000)
+    ... ).show(truncate=False)
+    +----------------------------------------------------------+
+    |percentile_approx(value, array(0.25, 0.5, 0.75), 1000000) |
+    +----------------------------------------------------------+
+    |[0.7264430125286..., 9.98975299938..., 19.335304783039...]|
+    +----------------------------------------------------------+
 
     >>> df.groupBy("key").agg(
-    ...     percentile_approx("value", 0.5, lit(1000000)).alias("median")
-    ... ).printSchema()
-    root
-     |-- key: long (nullable = true)
-     |-- median: double (nullable = true)
+    ...     sf.percentile_approx("value", sf.lit(0.5), sf.lit(1000000))
+    ... ).sort("key").show()
+    +---+--------------------------------------+
+    |key|percentile_approx(value, 0.5, 1000000)|
+    +---+--------------------------------------+
+    |  0|                  -0.03519435193070...|
+    |  1|                     9.990389751837...|
+    |  2|                    19.967859769284...|
+    +---+--------------------------------------+
     """
     percentage = lit(list(percentage)) if isinstance(percentage, (list, tuple)) else lit(percentage)
     return _invoke_function_over_columns("percentile_approx", col, percentage, lit(accuracy))
@@ -6392,8 +7255,8 @@ def percentile_approx(
 @_try_remote_functions
 def approx_percentile(
     col: "ColumnOrName",
-    percentage: Union[Column, float, List[float], Tuple[float]],
-    accuracy: Union[Column, float] = 10000,
+    percentage: Union[Column, float, Sequence[float], Tuple[float]],
+    accuracy: Union[Column, int] = 10000,
 ) -> Column:
     """Returns the approximate `percentile` of the numeric column `col` which is the smallest value
     in the ordered `col` values (sorted from least to greatest) such that no more than `percentage`
@@ -6403,14 +7266,14 @@ def approx_percentile(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column.
     percentage : :class:`~pyspark.sql.Column`, float, list of floats or tuple of floats
         percentage in decimal (must be between 0.0 and 1.0).
         When percentage is an array, each value of the percentage array must be between 0.0 and 1.0.
         In this case, returns the approximate percentile array of column col
         at the given percentage array.
-    accuracy : :class:`~pyspark.sql.Column` or float
+    accuracy : :class:`~pyspark.sql.Column` or int
         is a positive numeric literal which controls approximation accuracy
         at the cost of memory. Higher value of accuracy yields better accuracy,
         1.0/accuracy is the relative error of the approximation. (default: 10000).
@@ -6420,25 +7283,37 @@ def approx_percentile(
     :class:`~pyspark.sql.Column`
         approximate `percentile` of the numeric column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.median`
+    :meth:`pyspark.sql.functions.percentile`
+    :meth:`pyspark.sql.functions.percentile_approx`
+
     Examples
     --------
-    >>> import pyspark.sql.functions as sf
+    >>> from pyspark.sql import functions as sf
     >>> key = (sf.col("id") % 3).alias("key")
     >>> value = (sf.randn(42) + key * 10).alias("value")
     >>> df = spark.range(0, 1000, 1, 1).select(key, value)
     >>> df.select(
     ...     sf.approx_percentile("value", [0.25, 0.5, 0.75], 1000000)
-    ... ).printSchema()
-    root
-     |-- approx_percentile(value, array(0.25, 0.5, 0.75), 1000000): array (nullable = true)
-     |    |-- element: double (containsNull = false)
+    ... ).show(truncate=False)
+    +----------------------------------------------------------+
+    |approx_percentile(value, array(0.25, 0.5, 0.75), 1000000) |
+    +----------------------------------------------------------+
+    |[0.7264430125286..., 9.98975299938..., 19.335304783039...]|
+    +----------------------------------------------------------+
 
     >>> df.groupBy("key").agg(
-    ...     sf.approx_percentile("value", 0.5, sf.lit(1000000))
-    ... ).printSchema()
-    root
-     |-- key: long (nullable = true)
-     |-- approx_percentile(value, 0.5, 1000000): double (nullable = true)
+    ...     sf.approx_percentile("value", sf.lit(0.5), sf.lit(1000000))
+    ... ).sort("key").show()
+    +---+--------------------------------------+
+    |key|approx_percentile(value, 0.5, 1000000)|
+    +---+--------------------------------------+
+    |  0|                  -0.03519435193070...|
+    |  1|                     9.990389751837...|
+    |  2|                    19.967859769284...|
+    +---+--------------------------------------+
     """
     percentage = lit(list(percentage)) if isinstance(percentage, (list, tuple)) else lit(percentage)
     return _invoke_function_over_columns("approx_percentile", col, percentage, lit(accuracy))
@@ -6473,26 +7348,26 @@ def rand(seed: Optional[int] = None) -> Column:
     Example 1: Generate a random column without a seed
 
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(0, 2, 1, 1).withColumn('rand', sf.rand()).show() # doctest: +SKIP
-    +---+-------------------+
-    | id|               rand|
-    +---+-------------------+
-    |  0|0.14879325244215424|
-    |  1| 0.4640631044275454|
-    +---+-------------------+
+    >>> spark.range(0, 2, 1, 1).select("*", sf.rand()).show() # doctest: +SKIP
+    +---+-------------------------+
+    | id|rand(-158884697681280011)|
+    +---+-------------------------+
+    |  0|       0.9253464547887...|
+    |  1|       0.6533254118758...|
+    +---+-------------------------+
 
     Example 2: Generate a random column with a specific seed
 
-    >>> spark.range(0, 2, 1, 1).withColumn('rand', sf.rand(seed=42) * 3).show()
+    >>> spark.range(0, 2, 1, 1).select("*", sf.rand(seed=42)).show()
     +---+------------------+
-    | id|              rand|
+    | id|          rand(42)|
     +---+------------------+
-    |  0|1.8575681106759028|
-    |  1|1.5288056527339444|
+    |  0| 0.619189370225...|
+    |  1|0.5096018842446...|
     +---+------------------+
     """
     if seed is not None:
-        return _invoke_function("rand", seed)
+        return _invoke_function("rand", _enum_to_value(seed))
     else:
         return _invoke_function("rand")
 
@@ -6526,26 +7401,26 @@ def randn(seed: Optional[int] = None) -> Column:
     Example 1: Generate a random column without a seed
 
     >>> from pyspark.sql import functions as sf
-    >>> spark.range(0, 2, 1, 1).withColumn('randn', sf.randn()).show() # doctest: +SKIP
-    +---+--------------------+
-    | id|               randn|
-    +---+--------------------+
-    |  0|-0.45011372342934214|
-    |  1|  0.6567304165329736|
-    +---+--------------------+
+    >>> spark.range(0, 2, 1, 1).select("*", sf.randn()).show() # doctest: +SKIP
+    +---+--------------------------+
+    | id|randn(3968742514375399317)|
+    +---+--------------------------+
+    |  0|      -0.47968645355788...|
+    |  1|       -0.4950952457305...|
+    +---+--------------------------+
 
     Example 2: Generate a random column with a specific seed
 
-    >>> spark.range(0, 2, 1, 1).withColumn('randn', sf.randn(seed=42)).show()
+    >>> spark.range(0, 2, 1, 1).select("*", sf.randn(seed=42)).show()
     +---+------------------+
-    | id|             randn|
+    | id|         randn(42)|
     +---+------------------+
-    |  0| 2.384479054241165|
-    |  1|0.1920934041293524|
+    |  0| 2.384479054241...|
+    |  1|0.1920934041293...|
     +---+------------------+
     """
     if seed is not None:
-        return _invoke_function("randn", seed)
+        return _invoke_function("randn", _enum_to_value(seed))
     else:
         return _invoke_function("randn")
 
@@ -6563,7 +7438,7 @@ def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or column name to compute the round on.
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
@@ -6601,8 +7476,9 @@ def round(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> Co
     if scale is None:
         return _invoke_function_over_columns("round", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("round", col, scale)
+        return _invoke_function_over_columns("round", col, scale)  # type: ignore[arg-type]
 
 
 @_try_remote_functions
@@ -6618,7 +7494,7 @@ def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> C
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The target column or column name to compute the round on.
     scale : :class:`~pyspark.sql.Column` or int, optional
         An optional parameter to control the rounding behavior.
@@ -6656,8 +7532,9 @@ def bround(col: "ColumnOrName", scale: Optional[Union[Column, int]] = None) -> C
     if scale is None:
         return _invoke_function_over_columns("bround", col)
     else:
+        scale = _enum_to_value(scale)
         scale = lit(scale) if isinstance(scale, int) else scale
-        return _invoke_function_over_columns("bround", col, scale)
+        return _invoke_function_over_columns("bround", col, scale)  # type: ignore[arg-type]
 
 
 @_try_remote_functions
@@ -6687,7 +7564,7 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to shift.
     numBits : int
         number of bits to shift.
@@ -6699,12 +7576,20 @@ def shiftleft(col: "ColumnOrName", numBits: int) -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([(21,)], ['a']).select(shiftleft('a', 1).alias('r')).collect()
-    [Row(r=42)]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(4).select("*", sf.shiftleft('id', 1)).show()
+    +---+----------------+
+    | id|shiftleft(id, 1)|
+    +---+----------------+
+    |  0|               0|
+    |  1|               2|
+    |  2|               4|
+    |  3|               6|
+    +---+----------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("shiftleft", _to_java_column(col), numBits)
+    return _invoke_function("shiftleft", _to_java_column(col), _enum_to_value(numBits))
 
 
 @_try_remote_functions
@@ -6734,7 +7619,7 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to shift.
     numBits : int
         number of bits to shift.
@@ -6746,12 +7631,20 @@ def shiftright(col: "ColumnOrName", numBits: int) -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([(42,)], ['a']).select(shiftright('a', 1).alias('r')).collect()
-    [Row(r=21)]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(4).select("*", sf.shiftright('id', 1)).show()
+    +---+-----------------+
+    | id|shiftright(id, 1)|
+    +---+-----------------+
+    |  0|                0|
+    |  1|                0|
+    |  2|                1|
+    |  3|                1|
+    +---+-----------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("shiftright", _to_java_column(col), numBits)
+    return _invoke_function("shiftright", _to_java_column(col), _enum_to_value(numBits))
 
 
 @_try_remote_functions
@@ -6781,7 +7674,7 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to shift.
     numBits : int
         number of bits to shift.
@@ -6793,13 +7686,20 @@ def shiftrightunsigned(col: "ColumnOrName", numBits: int) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(-42,)], ['a'])
-    >>> df.select(shiftrightunsigned('a', 1).alias('r')).collect()
-    [Row(r=9223372036854775787)]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(4).select("*", sf.shiftrightunsigned(sf.col('id') - 2, 1)).show()
+    +---+-------------------------------+
+    | id|shiftrightunsigned((id - 2), 1)|
+    +---+-------------------------------+
+    |  0|            9223372036854775807|
+    |  1|            9223372036854775807|
+    |  2|                              0|
+    |  3|                              0|
+    +---+-------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("shiftrightunsigned", _to_java_column(col), numBits)
+    return _invoke_function("shiftrightunsigned", _to_java_column(col), _enum_to_value(numBits))
 
 
 @_try_remote_functions
@@ -6822,9 +7722,22 @@ def spark_partition_id() -> Column:
 
     Examples
     --------
-    >>> df = spark.range(2)
-    >>> df.repartition(1).select(spark_partition_id().alias("pid")).collect()
-    [Row(pid=0), Row(pid=0)]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(10, numPartitions=5).select("*", sf.spark_partition_id()).show()
+    +---+--------------------+
+    | id|SPARK_PARTITION_ID()|
+    +---+--------------------+
+    |  0|                   0|
+    |  1|                   0|
+    |  2|                   1|
+    |  3|                   1|
+    |  4|                   2|
+    |  5|                   2|
+    |  6|                   3|
+    |  7|                   3|
+    |  8|                   4|
+    |  9|                   4|
+    +---+--------------------+
     """
     return _invoke_function("spark_partition_id")
 
@@ -6840,7 +7753,7 @@ def expr(str: str) -> Column:
 
     Parameters
     ----------
-    str : str
+    str : expression string
         expression defined in string.
 
     Returns
@@ -6850,8 +7763,9 @@ def expr(str: str) -> Column:
 
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
-    >>> df.select("name", expr("length(name)")).show()
+    >>> df.select("*", sf.expr("length(name)")).show()
     +-----+------------+
     | name|length(name)|
     +-----+------------+
@@ -6868,13 +7782,13 @@ def struct(*cols: "ColumnOrName") -> Column:
 
 
 @overload
-def struct(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> Column:
+def struct(__cols: Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
     ...
 
 
 @_try_remote_functions
 def struct(
-    *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
+    *cols: Union["ColumnOrName", Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]]
 ) -> Column:
     """Creates a new struct column.
 
@@ -6885,7 +7799,7 @@ def struct(
 
     Parameters
     ----------
-    cols : list, set, str or :class:`~pyspark.sql.Column`
+    cols : list, set, :class:`~pyspark.sql.Column` or column name
         column names or :class:`~pyspark.sql.Column`\\s to contain in the output struct.
 
     Returns
@@ -6893,13 +7807,21 @@ def struct(
     :class:`~pyspark.sql.Column`
         a struct type column of given columns.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.named_struct`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([("Alice", 2), ("Bob", 5)], ("name", "age"))
-    >>> df.select(struct('age', 'name').alias("struct")).collect()
-    [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))]
-    >>> df.select(struct([df.age, df.name]).alias("struct")).collect()
-    [Row(struct=Row(age=2, name='Alice')), Row(struct=Row(age=5, name='Bob'))]
+    >>> df.select("*", sf.struct('age', df.name)).show()
+    +-----+---+-----------------+
+    | name|age|struct(age, name)|
+    +-----+---+-----------------+
+    |Alice|  2|       {2, Alice}|
+    |  Bob|  5|         {5, Bob}|
+    +-----+---+-----------------+
     """
     if len(cols) == 1 and isinstance(cols[0], (list, set)):
         cols = cols[0]  # type: ignore[assignment]
@@ -6915,18 +7837,27 @@ def named_struct(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         list of columns to work on.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.struct`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(1, 2, 3)], ['a', 'b', 'c'])
-    >>> df.select(named_struct(lit('x'), df.a, lit('y'), df.b).alias('r')).collect()
-    [Row(r=Row(x=1, y=2))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(1, 2)], ['a', 'b'])
+    >>> df.select("*", sf.named_struct(sf.lit('x'), df.a, sf.lit('y'), "b")).show()
+    +---+---+------------------------+
+    |  a|  b|named_struct(x, a, y, b)|
+    +---+---+------------------------+
+    |  1|  2|                  {1, 2}|
+    +---+---+------------------------+
     """
     return _invoke_function_over_seq_of_columns("named_struct", cols)
 
@@ -6944,7 +7875,7 @@ def greatest(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols: :class:`~pyspark.sql.Column` or str
+    cols: :class:`~pyspark.sql.Column` or column name
         columns to check for greatest value.
 
     Returns
@@ -6952,16 +7883,25 @@ def greatest(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         greatest value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.least`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
-    >>> df.select(greatest(df.a, df.b, df.c).alias("greatest")).collect()
-    [Row(greatest=4)]
+    >>> df.select("*", sf.greatest(df.a, "b", df.c)).show()
+    +---+---+---+-----------------+
+    |  a|  b|  c|greatest(a, b, c)|
+    +---+---+---+-----------------+
+    |  1|  4|  3|                4|
+    +---+---+---+-----------------+
     """
     if len(cols) < 2:
         raise PySparkValueError(
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "greatest", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "greatest", "num_cols": "2"},
         )
     return _invoke_function_over_seq_of_columns("greatest", cols)
 
@@ -6979,7 +7919,7 @@ def least(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         column names or columns to be compared
 
     Returns
@@ -6987,16 +7927,25 @@ def least(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         least value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.greatest`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([(1, 4, 3)], ['a', 'b', 'c'])
-    >>> df.select(least(df.a, df.b, df.c).alias("least")).collect()
-    [Row(least=1)]
+    >>> df.select("*", sf.least(df.a, "b", df.c)).show()
+    +---+---+---+--------------+
+    |  a|  b|  c|least(a, b, c)|
+    +---+---+---+--------------+
+    |  1|  4|  3|             1|
+    +---+---+---+--------------+
     """
     if len(cols) < 2:
         raise PySparkValueError(
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "least", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "least", "num_cols": "2"},
         )
     return _invoke_function_over_seq_of_columns("least", cols)
 
@@ -7024,34 +7973,41 @@ def when(condition: Column, value: Any) -> Column:
     :class:`~pyspark.sql.Column`
         column representing when expression.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.Column.when`
+    :meth:`pyspark.sql.Column.otherwise`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.range(3)
-    >>> df.select(when(df['id'] == 2, 3).otherwise(4).alias("age")).show()
-    +---+
-    |age|
-    +---+
-    |  4|
-    |  4|
-    |  3|
-    +---+
-
-    >>> df.select(when(df.id == 2, df.id + 1).alias("age")).show()
-    +----+
-    | age|
-    +----+
-    |NULL|
-    |NULL|
-    |   3|
-    +----+
+    >>> df.select("*", sf.when(df['id'] == 2, 3).otherwise(4)).show()
+    +---+------------------------------------+
+    | id|CASE WHEN (id = 2) THEN 3 ELSE 4 END|
+    +---+------------------------------------+
+    |  0|                                   4|
+    |  1|                                   4|
+    |  2|                                   3|
+    +---+------------------------------------+
+
+    >>> df.select("*", sf.when(df.id == 2, df.id + 1)).show()
+    +---+------------------------------------+
+    | id|CASE WHEN (id = 2) THEN (id + 1) END|
+    +---+------------------------------------+
+    |  0|                                NULL|
+    |  1|                                NULL|
+    |  2|                                   3|
+    +---+------------------------------------+
     """
     # Explicitly not using ColumnOrName type here to make reading condition less opaque
     if not isinstance(condition, Column):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "condition", "arg_type": type(condition).__name__},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "condition", "arg_type": type(condition).__name__},
         )
-    v = value._jc if isinstance(value, Column) else value
+    value = _enum_to_value(value)
+    v = value._jc if isinstance(value, Column) else _enum_to_value(value)
 
     return _invoke_function("when", condition._jc, v)
 
@@ -7091,34 +8047,53 @@ def log(arg1: Union["ColumnOrName", float], arg2: Optional["ColumnOrName"] = Non
 
     Examples
     --------
+    Example 1: Specify both base number and the input value
+
     >>> from pyspark.sql import functions as sf
     >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
-    >>> df.select(sf.log(2.0, df.value).alias('log2_value')).show()
-    +----------+
-    |log2_value|
-    +----------+
-    |       0.0|
-    |       1.0|
-    |       2.0|
-    +----------+
+    >>> df.select("*", sf.log(2.0, df.value)).show()
+    +-----+---------------+
+    |value|LOG(2.0, value)|
+    +-----+---------------+
+    |    1|            0.0|
+    |    2|            1.0|
+    |    4|            2.0|
+    +-----+---------------+
 
-    And Natural logarithm
+    Example 2: Return NULL for invalid input values
 
-    >>> df.select(sf.log(df.value).alias('ln_value')).show()
-    +------------------+
-    |          ln_value|
-    +------------------+
-    |               0.0|
-    |0.6931471805599453|
-    |1.3862943611198906|
-    +------------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (0), (-1), (NULL) AS t(value)")
+    >>> df.select("*", sf.log(3.0, df.value)).show()
+    +-----+------------------+
+    |value|   LOG(3.0, value)|
+    +-----+------------------+
+    |    1|               0.0|
+    |    2|0.6309297535714...|
+    |    0|              NULL|
+    |   -1|              NULL|
+    | NULL|              NULL|
+    +-----+------------------+
+
+    Example 3: Specify only the input value (Natural logarithm)
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql("SELECT * FROM VALUES (1), (2), (4) AS t(value)")
+    >>> df.select("*", sf.log(df.value)).show()
+    +-----+------------------+
+    |value|         ln(value)|
+    +-----+------------------+
+    |    1|               0.0|
+    |    2|0.6931471805599...|
+    |    4|1.3862943611198...|
+    +-----+------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     if arg2 is None:
         return _invoke_function_over_columns("log", cast("ColumnOrName", arg1))
     else:
-        return _invoke_function("log", arg1, _to_java_column(arg2))
+        return _invoke_function("log", _enum_to_value(arg1), _to_java_column(arg2))
 
 
 @_try_remote_functions
@@ -7139,13 +8114,22 @@ def ln(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(4,)], ['a'])
-    >>> df.select(ln('a')).show()
-    +------------------+
-    |             ln(a)|
-    +------------------+
-    |1.3862943611198906|
-    +------------------+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(10).select("*", sf.ln('id')).show()
+    +---+------------------+
+    | id|            ln(id)|
+    +---+------------------+
+    |  0|              NULL|
+    |  1|               0.0|
+    |  2|0.6931471805599...|
+    |  3|1.0986122886681...|
+    |  4|1.3862943611198...|
+    |  5|1.6094379124341...|
+    |  6| 1.791759469228...|
+    |  7|1.9459101490553...|
+    |  8|2.0794415416798...|
+    |  9|2.1972245773362...|
+    +---+------------------+
     """
     return _invoke_function_over_columns("ln", col)
 
@@ -7171,13 +8155,22 @@ def log2(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(4,)], ['a'])
-    >>> df.select(log2('a').alias('log2')).show()
-    +----+
-    |log2|
-    +----+
-    | 2.0|
-    +----+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(10).select("*", sf.log2('id')).show()
+    +---+------------------+
+    | id|          LOG2(id)|
+    +---+------------------+
+    |  0|              NULL|
+    |  1|               0.0|
+    |  2|               1.0|
+    |  3| 1.584962500721...|
+    |  4|               2.0|
+    |  5| 2.321928094887...|
+    |  6| 2.584962500721...|
+    |  7| 2.807354922057...|
+    |  8|               3.0|
+    |  9|3.1699250014423...|
+    +---+------------------+
     """
     return _invoke_function_over_columns("log2", col)
 
@@ -7208,13 +8201,22 @@ def conv(col: "ColumnOrName", fromBase: int, toBase: int) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("010101",)], ['n'])
-    >>> df.select(conv(df.n, 2, 16).alias('hex')).collect()
-    [Row(hex='15')]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("010101",), ( "101",), ("001",)], ['n'])
+    >>> df.select("*", sf.conv(df.n, 2, 16)).show()
+    +------+--------------+
+    |     n|conv(n, 2, 16)|
+    +------+--------------+
+    |010101|            15|
+    |   101|             5|
+    |   001|             1|
+    +------+--------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("conv", _to_java_column(col), fromBase, toBase)
+    return _invoke_function(
+        "conv", _to_java_column(col), _enum_to_value(fromBase), _enum_to_value(toBase)
+    )
 
 
 @_try_remote_functions
@@ -7239,9 +8241,22 @@ def factorial(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(5,)], ['n'])
-    >>> df.select(factorial(df.n).alias('f')).collect()
-    [Row(f=120)]
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(10).select("*", sf.factorial('id')).show()
+    +---+-------------+
+    | id|factorial(id)|
+    +---+-------------+
+    |  0|            1|
+    |  1|            1|
+    |  2|            2|
+    |  3|            6|
+    |  4|           24|
+    |  5|          120|
+    |  6|          720|
+    |  7|         5040|
+    |  8|        40320|
+    |  9|       362880|
+    +---+-------------+
     """
     return _invoke_function_over_columns("factorial", col)
 
@@ -7265,7 +8280,7 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         name of column or expression
     offset : int, optional default 1
         number of row to extend
@@ -7277,14 +8292,16 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     :class:`~pyspark.sql.Column`
         value before current row based on `offset`.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.lead`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> from pyspark.sql import Window
-    >>> df = spark.createDataFrame([("a", 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df = spark.createDataFrame(
+    ...     [("a", 1), ("a", 2), ("a", 3), ("b", 8), ("b", 2)], ["c1", "c2"])
     >>> df.show()
     +---+---+
     | c1| c2|
@@ -7295,41 +8312,46 @@ def lag(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  8|
     |  b|  2|
     +---+---+
+
     >>> w = Window.partitionBy("c1").orderBy("c2")
-    >>> df.withColumn("previos_value", lag("c2").over(w)).show()
-    +---+---+-------------+
-    | c1| c2|previos_value|
-    +---+---+-------------+
-    |  a|  1|         NULL|
-    |  a|  2|            1|
-    |  a|  3|            2|
-    |  b|  2|         NULL|
-    |  b|  8|            2|
-    +---+---+-------------+
-    >>> df.withColumn("previos_value", lag("c2", 1, 0).over(w)).show()
-    +---+---+-------------+
-    | c1| c2|previos_value|
-    +---+---+-------------+
-    |  a|  1|            0|
-    |  a|  2|            1|
-    |  a|  3|            2|
-    |  b|  2|            0|
-    |  b|  8|            2|
-    +---+---+-------------+
-    >>> df.withColumn("previos_value", lag("c2", 2, -1).over(w)).show()
-    +---+---+-------------+
-    | c1| c2|previos_value|
-    +---+---+-------------+
-    |  a|  1|           -1|
-    |  a|  2|           -1|
-    |  a|  3|            1|
-    |  b|  2|           -1|
-    |  b|  8|           -1|
-    +---+---+-------------+
+    >>> df.withColumn("previous_value", sf.lag("c2").over(w)).show()
+    +---+---+--------------+
+    | c1| c2|previous_value|
+    +---+---+--------------+
+    |  a|  1|          NULL|
+    |  a|  2|             1|
+    |  a|  3|             2|
+    |  b|  2|          NULL|
+    |  b|  8|             2|
+    +---+---+--------------+
+
+    >>> df.withColumn("previous_value", sf.lag("c2", 1, 0).over(w)).show()
+    +---+---+--------------+
+    | c1| c2|previous_value|
+    +---+---+--------------+
+    |  a|  1|             0|
+    |  a|  2|             1|
+    |  a|  3|             2|
+    |  b|  2|             0|
+    |  b|  8|             2|
+    +---+---+--------------+
+
+    >>> df.withColumn("previous_value", sf.lag("c2", 2, -1).over(w)).show()
+    +---+---+--------------+
+    | c1| c2|previous_value|
+    +---+---+--------------+
+    |  a|  1|            -1|
+    |  a|  2|            -1|
+    |  a|  3|             1|
+    |  b|  2|            -1|
+    |  b|  8|            -1|
+    +---+---+--------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("lag", _to_java_column(col), offset, default)
+    return _invoke_function(
+        "lag", _to_java_column(col), _enum_to_value(offset), _enum_to_value(default)
+    )
 
 
 @_try_remote_functions
@@ -7348,7 +8370,7 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         name of column or expression
     offset : int, optional default 1
         number of row to extend
@@ -7360,14 +8382,16 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     :class:`~pyspark.sql.Column`
         value after current row based on `offset`.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.lag`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> from pyspark.sql import Window
-    >>> df = spark.createDataFrame([("a", 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df = spark.createDataFrame(
+    ...     [("a", 1), ("a", 2), ("a", 3), ("b", 8), ("b", 2)], ["c1", "c2"])
     >>> df.show()
     +---+---+
     | c1| c2|
@@ -7378,8 +8402,9 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  8|
     |  b|  2|
     +---+---+
+
     >>> w = Window.partitionBy("c1").orderBy("c2")
-    >>> df.withColumn("next_value", lead("c2").over(w)).show()
+    >>> df.withColumn("next_value", sf.lead("c2").over(w)).show()
     +---+---+----------+
     | c1| c2|next_value|
     +---+---+----------+
@@ -7389,7 +8414,8 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  2|         8|
     |  b|  8|      NULL|
     +---+---+----------+
-    >>> df.withColumn("next_value", lead("c2", 1, 0).over(w)).show()
+
+    >>> df.withColumn("next_value", sf.lead("c2", 1, 0).over(w)).show()
     +---+---+----------+
     | c1| c2|next_value|
     +---+---+----------+
@@ -7399,7 +8425,8 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     |  b|  2|         8|
     |  b|  8|         0|
     +---+---+----------+
-    >>> df.withColumn("next_value", lead("c2", 2, -1).over(w)).show()
+
+    >>> df.withColumn("next_value", sf.lead("c2", 2, -1).over(w)).show()
     +---+---+----------+
     | c1| c2|next_value|
     +---+---+----------+
@@ -7412,7 +8439,9 @@ def lead(col: "ColumnOrName", offset: int = 1, default: Optional[Any] = None) ->
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("lead", _to_java_column(col), offset, default)
+    return _invoke_function(
+        "lead", _to_java_column(col), _enum_to_value(offset), _enum_to_value(default)
+    )
 
 
 @_try_remote_functions
@@ -7433,7 +8462,7 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         name of column or expression
     offset : int
         number of row to use as the value
@@ -7448,12 +8477,10 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> from pyspark.sql import Window
-    >>> df = spark.createDataFrame([("a", 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df = spark.createDataFrame(
+    ...     [("a", 1), ("a", 2), ("a", 3), ("b", 8), ("b", 2)], ["c1", "c2"])
     >>> df.show()
     +---+---+
     | c1| c2|
@@ -7464,8 +8491,9 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
     |  b|  8|
     |  b|  2|
     +---+---+
+
     >>> w = Window.partitionBy("c1").orderBy("c2")
-    >>> df.withColumn("nth_value", nth_value("c2", 1).over(w)).show()
+    >>> df.withColumn("nth_value", sf.nth_value("c2", 1).over(w)).show()
     +---+---+---------+
     | c1| c2|nth_value|
     +---+---+---------+
@@ -7475,7 +8503,8 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
     |  b|  2|        2|
     |  b|  8|        2|
     +---+---+---------+
-    >>> df.withColumn("nth_value", nth_value("c2", 2).over(w)).show()
+
+    >>> df.withColumn("nth_value", sf.nth_value("c2", 2).over(w)).show()
     +---+---+---------+
     | c1| c2|nth_value|
     +---+---+---------+
@@ -7488,7 +8517,9 @@ def nth_value(col: "ColumnOrName", offset: int, ignoreNulls: Optional[bool] = Fa
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("nth_value", _to_java_column(col), offset, ignoreNulls)
+    return _invoke_function(
+        "nth_value", _to_java_column(col), _enum_to_value(offset), _enum_to_value(ignoreNulls)
+    )
 
 
 @_try_remote_functions
@@ -7499,7 +8530,7 @@ def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
     ignoreNulls : :class:`~pyspark.sql.Column` or bool, optional
         if first value is null then look for first non-null value.
@@ -7511,21 +8542,31 @@ def any_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(None, 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
-    >>> df.select(any_value('c1'), any_value('c2')).collect()
-    [Row(any_value(c1)=None, any_value(c2)=1)]
-    >>> df.select(any_value('c1', True), any_value('c2', True)).collect()
-    [Row(any_value(c1)='a', any_value(c2)=1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [(None, 1), ("a", 2), ("a", 3), ("b", 8), ("b", 2)], ["c1", "c2"])
+    >>> df.select(sf.any_value('c1'), sf.any_value('c2')).show()
+    +-------------+-------------+
+    |any_value(c1)|any_value(c2)|
+    +-------------+-------------+
+    |         NULL|            1|
+    +-------------+-------------+
+
+    >>> df.select(sf.any_value('c1', True), sf.any_value('c2', True)).show()
+    +-------------+-------------+
+    |any_value(c1)|any_value(c2)|
+    +-------------+-------------+
+    |            a|            1|
+    +-------------+-------------+
     """
     if ignoreNulls is None:
         return _invoke_function_over_columns("any_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("any_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "any_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 @_try_remote_functions
@@ -7537,7 +8578,7 @@ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]]
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
     ignoreNulls : :class:`~pyspark.sql.Column` or bool, optional
         if first value is null then look for first non-null value.
@@ -7547,6 +8588,10 @@ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]]
     :class:`~pyspark.sql.Column`
         some value of `col` for a group of rows.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.last_value`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -7572,8 +8617,11 @@ def first_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]]
     if ignoreNulls is None:
         return _invoke_function_over_columns("first_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("first_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "first_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 @_try_remote_functions
@@ -7585,7 +8633,7 @@ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
     ignoreNulls : :class:`~pyspark.sql.Column` or bool, optional
         if first value is null then look for first non-null value.
@@ -7595,6 +8643,10 @@ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
     :class:`~pyspark.sql.Column`
         some value of `col` for a group of rows.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.first_value`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -7620,8 +8672,11 @@ def last_value(col: "ColumnOrName", ignoreNulls: Optional[Union[bool, Column]] =
     if ignoreNulls is None:
         return _invoke_function_over_columns("last_value", col)
     else:
+        ignoreNulls = _enum_to_value(ignoreNulls)
         ignoreNulls = lit(ignoreNulls) if isinstance(ignoreNulls, bool) else ignoreNulls
-        return _invoke_function_over_columns("last_value", col, ignoreNulls)
+        return _invoke_function_over_columns(
+            "last_value", col, ignoreNulls  # type: ignore[arg-type]
+        )
 
 
 @_try_remote_functions
@@ -7633,7 +8688,7 @@ def count_if(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -7641,6 +8696,10 @@ def count_if(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the number of `TRUE` values for the `col`.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.count`
+
     Examples
     --------
     Example 1: Counting the number of even numbers in a numeric column
@@ -7680,19 +8739,19 @@ def count_if(col: "ColumnOrName") -> Column:
     Example 4: Counting the number of rows where a boolean column is True
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame([(True,), (False,), (True,), (False,), (True,)], ["bool"])
-    >>> df.select(sf.count_if(sf.col('bool'))).show()
-    +--------------+
-    |count_if(bool)|
-    +--------------+
-    |             3|
-    +--------------+
+    >>> df = spark.createDataFrame([(True,), (False,), (True,), (False,), (True,)], ["b"])
+    >>> df.select(sf.count('b'), sf.count_if('b')).show()
+    +--------+-----------+
+    |count(b)|count_if(b)|
+    +--------+-----------+
+    |       5|          3|
+    +--------+-----------+
     """
     return _invoke_function_over_columns("count_if", col)
 
 
 @_try_remote_functions
-def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
+def histogram_numeric(col: "ColumnOrName", nBins: Column) -> Column:
     """Computes a histogram on numeric 'col' using nb bins.
     The return value is an array of (x,y) pairs representing the centers of the
     histogram's bins. As the value of 'nb' is increased, the histogram approximation
@@ -7708,9 +8767,9 @@ def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    nBins : :class:`~pyspark.sql.Column` or str
+    nBins : :class:`~pyspark.sql.Column`
         number of Histogram columns.
 
     Returns
@@ -7720,17 +8779,14 @@ def histogram_numeric(col: "ColumnOrName", nBins: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("a", 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
-    >>> df.select(histogram_numeric('c2', lit(5))).show()
-    +------------------------+
-    |histogram_numeric(c2, 5)|
-    +------------------------+
-    |    [{1, 1.0}, {2, 1....|
-    +------------------------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.range(100, numPartitions=1)
+    >>> df.select(sf.histogram_numeric('id', sf.lit(5))).show(truncate=False)
+    +-----------------------------------------------------------+
+    |histogram_numeric(id, 5)                                   |
+    +-----------------------------------------------------------+
+    |[{11, 25.0}, {36, 24.0}, {59, 23.0}, {84, 25.0}, {98, 3.0}]|
+    +-----------------------------------------------------------+
     """
     return _invoke_function_over_columns("histogram_numeric", col, nBins)
 
@@ -7762,12 +8818,10 @@ def ntile(n: int) -> Column:
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> from pyspark.sql import Window
-    >>> df = spark.createDataFrame([("a", 1),
-    ...                             ("a", 2),
-    ...                             ("a", 3),
-    ...                             ("b", 8),
-    ...                             ("b", 2)], ["c1", "c2"])
+    >>> df = spark.createDataFrame(
+    ...     [("a", 1), ("a", 2), ("a", 3), ("b", 8), ("b", 2)], ["c1", "c2"])
     >>> df.show()
     +---+---+
     | c1| c2|
@@ -7778,8 +8832,9 @@ def ntile(n: int) -> Column:
     |  b|  8|
     |  b|  2|
     +---+---+
+
     >>> w = Window.partitionBy("c1").orderBy("c2")
-    >>> df.withColumn("ntile", ntile(2).over(w)).show()
+    >>> df.withColumn("ntile", sf.ntile(2).over(w)).show()
     +---+---+-----+
     | c1| c2|ntile|
     +---+---+-----+
@@ -7790,7 +8845,7 @@ def ntile(n: int) -> Column:
     |  b|  8|    2|
     +---+---+-----+
     """
-    return _invoke_function("ntile", int(n))
+    return _invoke_function("ntile", int(_enum_to_value(n)))
 
 
 # ---------------------- Date/Timestamp functions ------------------------------
@@ -7809,6 +8864,13 @@ def curdate() -> Column:
     :class:`~pyspark.sql.Column`
         current date.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -7838,10 +8900,17 @@ def current_date() -> Column:
     :class:`~pyspark.sql.Column`
         current date.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(current_date()).show() # doctest: +SKIP
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.current_date()).show() # doctest: +SKIP
     +--------------+
     |current_date()|
     +--------------+
@@ -7863,15 +8932,32 @@ def current_timezone() -> Column:
     :class:`~pyspark.sql.Column`
         current session local timezone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.convert_timezone`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> spark.range(1).select(current_timezone()).show()
+
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.current_timezone()).show()
     +-------------------+
     | current_timezone()|
     +-------------------+
     |America/Los_Angeles|
     +-------------------+
+
+    Switch the timezone to Shanghai.
+
+    >>> spark.conf.set("spark.sql.session.timeZone", "Asia/Shanghai")
+    >>> spark.range(1).select(sf.current_timezone()).show()
+    +------------------+
+    |current_timezone()|
+    +------------------+
+    |     Asia/Shanghai|
+    +------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function("current_timezone")
@@ -7893,10 +8979,17 @@ def current_timestamp() -> Column:
     :class:`~pyspark.sql.Column`
         current date and time.
 
-    Examples
+    See Also
     --------
-    >>> df = spark.range(1)
-    >>> df.select(current_timestamp()).show(truncate=False) # doctest: +SKIP
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
+    Examples
+    --------
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.current_timestamp()).show(truncate=False) # doctest: +SKIP
     +-----------------------+
     |current_timestamp()    |
     +-----------------------+
@@ -7918,11 +9011,17 @@ def now() -> Column:
     :class:`~pyspark.sql.Column`
         current timestamp at the start of query evaluation.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+    :meth:`pyspark.sql.functions.localtimestamp`
+
     Examples
     --------
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.range(1)
-    >>> df.select(sf.now()).show(truncate=False) # doctest: +SKIP
+    >>> spark.range(1).select(sf.now()).show(truncate=False) # doctest: +SKIP
     +--------------------------+
     |now()                     |
     +--------------------------+
@@ -7949,10 +9048,17 @@ def localtimestamp() -> Column:
     :class:`~pyspark.sql.Column`
         current local date and time.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.now`
+    :meth:`pyspark.sql.functions.curdate`
+    :meth:`pyspark.sql.functions.current_date`
+    :meth:`pyspark.sql.functions.current_timestamp`
+
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(localtimestamp()).show(truncate=False) # doctest: +SKIP
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.localtimestamp()).show(truncate=False) # doctest: +SKIP
     +-----------------------+
     |localtimestamp()       |
     +-----------------------+
@@ -7984,11 +9090,20 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
 
     Parameters
     ----------
-    date : :class:`~pyspark.sql.Column` or str
+    date : :class:`~pyspark.sql.Column` or column name
         input column of values to format.
-    format: str
+    format: literal string
         format to use to represent datetime values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.try_to_timestamp`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -7996,13 +9111,63 @@ def date_format(date: "ColumnOrName", format: str) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(date_format('dt', 'MM/dd/yyyy').alias('date')).collect()
-    [Row(date='04/08/2015')]
+    Example 1: Format a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.date_format('dt', 'MM/dd/yyyy')).show()
+    +----------+----------+---------------------------+
+    |        dt|typeof(dt)|date_format(dt, MM/dd/yyyy)|
+    +----------+----------+---------------------------+
+    |2015-04-08|    string|                 04/08/2015|
+    |2024-10-31|    string|                 10/31/2024|
+    +----------+----------+---------------------------+
+
+    Example 2: Format a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.date_format('ts', 'yy=MM=dd HH=mm=ss')).show()
+    +-------------------+----------+----------------------------------+
+    |                 ts|typeof(ts)|date_format(ts, yy=MM=dd HH=mm=ss)|
+    +-------------------+----------+----------------------------------+
+    |2015-04-08 13:08:15|    string|                 15=04=08 13=08=15|
+    |2024-10-31 10:09:16|    string|                 24=10=31 10=09=16|
+    +-------------------+----------+----------------------------------+
+
+    Example 3: Format a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.date_format('dt', 'yy--MM--dd')).show()
+    +----------+----------+---------------------------+
+    |        dt|typeof(dt)|date_format(dt, yy--MM--dd)|
+    +----------+----------+---------------------------+
+    |2015-04-08|      date|                 15--04--08|
+    |2024-10-31|      date|                 24--10--31|
+    +----------+----------+---------------------------+
+
+    Example 4: Format a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.date_format('ts', 'yy=MM=dd HH=mm=ss')).show()
+    +-------------------+----------+----------------------------------+
+    |                 ts|typeof(ts)|date_format(ts, yy=MM=dd HH=mm=ss)|
+    +-------------------+----------+----------------------------------+
+    |2015-04-08 13:08:15| timestamp|                 15=04=08 13=08=15|
+    |2024-10-31 10:09:16| timestamp|                 24=10=31 10=09=16|
+    +-------------------+----------+----------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("date_format", _to_java_column(date), format)
+    return _invoke_function("date_format", _to_java_column(date), _enum_to_value(format))
 
 
 @_try_remote_functions
@@ -8017,7 +9182,7 @@ def year(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8025,11 +9190,73 @@ def year(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         year part of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(year('dt').alias('year')).collect()
-    [Row(year=2015)]
+    Example 1: Extract the year from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.year('dt')).show()
+    +----------+----------+--------+
+    |        dt|typeof(dt)|year(dt)|
+    +----------+----------+--------+
+    |2015-04-08|    string|    2015|
+    |2024-10-31|    string|    2024|
+    +----------+----------+--------+
+
+    Example 2: Extract the year from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.year('ts')).show()
+    +-------------------+----------+--------+
+    |                 ts|typeof(ts)|year(ts)|
+    +-------------------+----------+--------+
+    |2015-04-08 13:08:15|    string|    2015|
+    |2024-10-31 10:09:16|    string|    2024|
+    +-------------------+----------+--------+
+
+    Example 3: Extract the year from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.year('dt')).show()
+    +----------+----------+--------+
+    |        dt|typeof(dt)|year(dt)|
+    +----------+----------+--------+
+    |2015-04-08|      date|    2015|
+    |2024-10-31|      date|    2024|
+    +----------+----------+--------+
+
+    Example 4: Extract the year from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.year('ts')).show()
+    +-------------------+----------+--------+
+    |                 ts|typeof(ts)|year(ts)|
+    +-------------------+----------+--------+
+    |2015-04-08 13:08:15| timestamp|    2015|
+    |2024-10-31 10:09:16| timestamp|    2024|
+    +-------------------+----------+--------+
     """
     return _invoke_function_over_columns("year", col)
 
@@ -8046,7 +9273,7 @@ def quarter(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8054,11 +9281,73 @@ def quarter(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         quarter of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(quarter('dt').alias('quarter')).collect()
-    [Row(quarter=2)]
+    Example 1: Extract the quarter from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.quarter('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|quarter(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|    string|          2|
+    |2024-10-31|    string|          4|
+    +----------+----------+-----------+
+
+    Example 2: Extract the quarter from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.quarter('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|quarter(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15|    string|          2|
+    |2024-10-31 10:09:16|    string|          4|
+    +-------------------+----------+-----------+
+
+    Example 3: Extract the quarter from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.quarter('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|quarter(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|      date|          2|
+    |2024-10-31|      date|          4|
+    +----------+----------+-----------+
+
+    Example 4: Extract the quarter from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.quarter('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|quarter(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15| timestamp|          2|
+    |2024-10-31 10:09:16| timestamp|          4|
+    +-------------------+----------+-----------+
     """
     return _invoke_function_over_columns("quarter", col)
 
@@ -8075,7 +9364,7 @@ def month(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8083,11 +9372,74 @@ def month(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         month part of the date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.monthname`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(month('dt').alias('month')).collect()
-    [Row(month=4)]
+    Example 1: Extract the month from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.month('dt')).show()
+    +----------+----------+---------+
+    |        dt|typeof(dt)|month(dt)|
+    +----------+----------+---------+
+    |2015-04-08|    string|        4|
+    |2024-10-31|    string|       10|
+    +----------+----------+---------+
+
+    Example 2: Extract the month from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.month('ts')).show()
+    +-------------------+----------+---------+
+    |                 ts|typeof(ts)|month(ts)|
+    +-------------------+----------+---------+
+    |2015-04-08 13:08:15|    string|        4|
+    |2024-10-31 10:09:16|    string|       10|
+    +-------------------+----------+---------+
+
+    Example 3: Extract the month from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.month('dt')).show()
+    +----------+----------+---------+
+    |        dt|typeof(dt)|month(dt)|
+    +----------+----------+---------+
+    |2015-04-08|      date|        4|
+    |2024-10-31|      date|       10|
+    +----------+----------+---------+
+
+    Example 3: Extract the month from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.month('ts')).show()
+    +-------------------+----------+---------+
+    |                 ts|typeof(ts)|month(ts)|
+    +-------------------+----------+---------+
+    |2015-04-08 13:08:15| timestamp|        4|
+    |2024-10-31 10:09:16| timestamp|       10|
+    +-------------------+----------+---------+
     """
     return _invoke_function_over_columns("month", col)
 
@@ -8105,7 +9457,7 @@ def dayofweek(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8113,11 +9465,67 @@ def dayofweek(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the week for given date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(dayofweek('dt').alias('day')).collect()
-    [Row(day=4)]
+    Example 1: Extract the day of the week from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofweek('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|dayofweek(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|    string|            4|
+    |2024-10-31|    string|            5|
+    +----------+----------+-------------+
+
+    Example 2: Extract the day of the week from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofweek('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|dayofweek(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15|    string|            4|
+    |2024-10-31 10:09:16|    string|            5|
+    +-------------------+----------+-------------+
+
+    Example 3: Extract the day of the week from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofweek('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|dayofweek(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|      date|            4|
+    |2024-10-31|      date|            5|
+    +----------+----------+-------------+
+
+    Example 4: Extract the day of the week from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofweek('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|dayofweek(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15| timestamp|            4|
+    |2024-10-31 10:09:16| timestamp|            5|
+    +-------------------+----------+-------------+
     """
     return _invoke_function_over_columns("dayofweek", col)
 
@@ -8134,9 +9542,15 @@ def dayofmonth(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofweek`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -8144,9 +9558,59 @@ def dayofmonth(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(dayofmonth('dt').alias('day')).collect()
-    [Row(day=8)]
+    Example 1: Extract the day of the month from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofmonth('dt')).show()
+    +----------+----------+--------------+
+    |        dt|typeof(dt)|dayofmonth(dt)|
+    +----------+----------+--------------+
+    |2015-04-08|    string|             8|
+    |2024-10-31|    string|            31|
+    +----------+----------+--------------+
+
+    Example 2: Extract the day of the month from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofmonth('ts')).show()
+    +-------------------+----------+--------------+
+    |                 ts|typeof(ts)|dayofmonth(ts)|
+    +-------------------+----------+--------------+
+    |2015-04-08 13:08:15|    string|             8|
+    |2024-10-31 10:09:16|    string|            31|
+    +-------------------+----------+--------------+
+
+    Example 3: Extract the day of the month from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofmonth('dt')).show()
+    +----------+----------+--------------+
+    |        dt|typeof(dt)|dayofmonth(dt)|
+    +----------+----------+--------------+
+    |2015-04-08|      date|             8|
+    |2024-10-31|      date|            31|
+    +----------+----------+--------------+
+
+    Example 4: Extract the day of the month from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofmonth('ts')).show()
+    +-------------------+----------+--------------+
+    |                 ts|typeof(ts)|dayofmonth(ts)|
+    +-------------------+----------+--------------+
+    |2015-04-08 13:08:15| timestamp|             8|
+    |2024-10-31 10:09:16| timestamp|            31|
+    +-------------------+----------+--------------+
     """
     return _invoke_function_over_columns("dayofmonth", col)
 
@@ -8160,7 +9624,7 @@ def day(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8168,11 +9632,77 @@ def day(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the month for given date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.dayname`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+    :meth:`pyspark.sql.functions.dayofweek`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(day('dt').alias('day')).collect()
-    [Row(day=8)]
+    Example 1: Extract the day of the month from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.day('dt')).show()
+    +----------+----------+-------+
+    |        dt|typeof(dt)|day(dt)|
+    +----------+----------+-------+
+    |2015-04-08|    string|      8|
+    |2024-10-31|    string|     31|
+    +----------+----------+-------+
+
+    Example 2: Extract the day of the month from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.day('ts')).show()
+    +-------------------+----------+-------+
+    |                 ts|typeof(ts)|day(ts)|
+    +-------------------+----------+-------+
+    |2015-04-08 13:08:15|    string|      8|
+    |2024-10-31 10:09:16|    string|     31|
+    +-------------------+----------+-------+
+
+    Example 3: Extract the day of the month from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.day('dt')).show()
+    +----------+----------+-------+
+    |        dt|typeof(dt)|day(dt)|
+    +----------+----------+-------+
+    |2015-04-08|      date|      8|
+    |2024-10-31|      date|     31|
+    +----------+----------+-------+
+
+    Example 4: Extract the day of the month from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.day('ts')).show()
+    +-------------------+----------+-------+
+    |                 ts|typeof(ts)|day(ts)|
+    +-------------------+----------+-------+
+    |2015-04-08 13:08:15| timestamp|      8|
+    |2024-10-31 10:09:16| timestamp|     31|
+    +-------------------+----------+-------+
     """
     return _invoke_function_over_columns("day", col)
 
@@ -8189,7 +9719,7 @@ def dayofyear(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8197,11 +9727,67 @@ def dayofyear(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         day of the year for given date/timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.dayofyear`
+    :meth:`pyspark.sql.functions.dayofmonth`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(dayofyear('dt').alias('day')).collect()
-    [Row(day=98)]
+    Example 1: Extract the day of the year from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofyear('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|dayofyear(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|    string|           98|
+    |2024-10-31|    string|          305|
+    +----------+----------+-------------+
+
+    Example 2: Extract the day of the year from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofyear('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|dayofyear(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15|    string|           98|
+    |2024-10-31 10:09:16|    string|          305|
+    +-------------------+----------+-------------+
+
+    Example 3: Extract the day of the year from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayofyear('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|dayofyear(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|      date|           98|
+    |2024-10-31|      date|          305|
+    +----------+----------+-------------+
+
+    Example 4: Extract the day of the year from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayofyear('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|dayofyear(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15| timestamp|           98|
+    |2024-10-31 10:09:16| timestamp|          305|
+    +-------------------+----------+-------------+
     """
     return _invoke_function_over_columns("dayofyear", col)
 
@@ -8218,7 +9804,7 @@ def hour(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8226,12 +9812,46 @@ def hour(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hour part of the timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
+    Example 1: Extract the hours from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.hour('ts')).show()
+    +-------------------+----------+--------+
+    |                 ts|typeof(ts)|hour(ts)|
+    +-------------------+----------+--------+
+    |2015-04-08 13:08:15|    string|      13|
+    |2024-10-31 10:09:16|    string|      10|
+    +-------------------+----------+--------+
+
+    Example 2: Extract the hours from a timestamp column
+
     >>> import datetime
-    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
-    >>> df.select(hour('ts').alias('hour')).collect()
-    [Row(hour=13)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.hour('ts')).show()
+    +-------------------+----------+--------+
+    |                 ts|typeof(ts)|hour(ts)|
+    +-------------------+----------+--------+
+    |2015-04-08 13:08:15| timestamp|      13|
+    |2024-10-31 10:09:16| timestamp|      10|
+    +-------------------+----------+--------+
     """
     return _invoke_function_over_columns("hour", col)
 
@@ -8248,9 +9868,21 @@ def minute(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -8258,10 +9890,32 @@ def minute(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Extract the minutes from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.minute('ts')).show()
+    +-------------------+----------+----------+
+    |                 ts|typeof(ts)|minute(ts)|
+    +-------------------+----------+----------+
+    |2015-04-08 13:08:15|    string|         8|
+    |2024-10-31 10:09:16|    string|         9|
+    +-------------------+----------+----------+
+
+    Example 2: Extract the minutes from a timestamp column
+
     >>> import datetime
-    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
-    >>> df.select(minute('ts').alias('minute')).collect()
-    [Row(minute=8)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.minute('ts')).show()
+    +-------------------+----------+----------+
+    |                 ts|typeof(ts)|minute(ts)|
+    +-------------------+----------+----------+
+    |2015-04-08 13:08:15| timestamp|         8|
+    |2024-10-31 10:09:16| timestamp|         9|
+    +-------------------+----------+----------+
     """
     return _invoke_function_over_columns("minute", col)
 
@@ -8278,7 +9932,7 @@ def second(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8286,12 +9940,46 @@ def second(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         `seconds` part of the timestamp as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.extract`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
+    Example 1: Extract the seconds from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.second('ts')).show()
+    +-------------------+----------+----------+
+    |                 ts|typeof(ts)|second(ts)|
+    +-------------------+----------+----------+
+    |2015-04-08 13:08:15|    string|        15|
+    |2024-10-31 10:09:16|    string|        16|
+    +-------------------+----------+----------+
+
+    Example 2: Extract the seconds from a timestamp column
+
     >>> import datetime
-    >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
-    >>> df.select(second('ts').alias('second')).collect()
-    [Row(second=15)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.second('ts')).show()
+    +-------------------+----------+----------+
+    |                 ts|typeof(ts)|second(ts)|
+    +-------------------+----------+----------+
+    |2015-04-08 13:08:15| timestamp|        15|
+    |2024-10-31 10:09:16| timestamp|        16|
+    +-------------------+----------+----------+
     """
     return _invoke_function_over_columns("second", col)
 
@@ -8310,7 +9998,7 @@ def weekofyear(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target timestamp column to work on.
 
     Returns
@@ -8318,11 +10006,65 @@ def weekofyear(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         `week` of the year for given date as integer.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.weekday`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(weekofyear(df.dt).alias('week')).collect()
-    [Row(week=15)]
+    Example 1: Extract the week of the year from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.weekofyear('dt')).show()
+    +----------+----------+--------------+
+    |        dt|typeof(dt)|weekofyear(dt)|
+    +----------+----------+--------------+
+    |2015-04-08|    string|            15|
+    |2024-10-31|    string|            44|
+    +----------+----------+--------------+
+
+    Example 2: Extract the week of the year from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.weekofyear('ts')).show()
+    +-------------------+----------+--------------+
+    |                 ts|typeof(ts)|weekofyear(ts)|
+    +-------------------+----------+--------------+
+    |2015-04-08 13:08:15|    string|            15|
+    |2024-10-31 10:09:16|    string|            44|
+    +-------------------+----------+--------------+
+
+    Example 3: Extract the week of the year from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.weekofyear('dt')).show()
+    +----------+----------+--------------+
+    |        dt|typeof(dt)|weekofyear(dt)|
+    +----------+----------+--------------+
+    |2015-04-08|      date|            15|
+    |2024-10-31|      date|            44|
+    +----------+----------+--------------+
+
+    Example 4: Extract the week of the year from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.weekofyear('ts')).show()
+    +-------------------+----------+--------------+
+    |                 ts|typeof(ts)|weekofyear(ts)|
+    +-------------------+----------+--------------+
+    |2015-04-08 13:08:15| timestamp|            15|
+    |2024-10-31 10:09:16| timestamp|            44|
+    +-------------------+----------+--------------+
     """
     return _invoke_function_over_columns("weekofyear", col)
 
@@ -8336,7 +10078,7 @@ def weekday(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8344,15 +10086,66 @@ def weekday(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.weekofyear`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(weekday('dt').alias('day')).show()
-    +---+
-    |day|
-    +---+
-    |  2|
-    +---+
+    Example 1: Extract the day of the week from a string column representing dates
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.weekday('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|weekday(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|    string|          2|
+    |2024-10-31|    string|          3|
+    +----------+----------+-----------+
+
+    Example 2: Extract the day of the week from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.weekday('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|weekday(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15|    string|          2|
+    |2024-10-31 10:09:16|    string|          3|
+    +-------------------+----------+-----------+
+
+    Example 3: Extract the day of the week from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.weekday('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|weekday(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|      date|          2|
+    |2024-10-31|      date|          3|
+    +----------+----------+-----------+
+
+    Example 4: Extract the day of the week from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.weekday('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|weekday(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15| timestamp|          2|
+    |2024-10-31 10:09:16| timestamp|          3|
+    +-------------------+----------+-----------+
     """
     return _invoke_function_over_columns("weekday", col)
 
@@ -8366,7 +10159,7 @@ def monthname(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8374,29 +10167,80 @@ def monthname(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the three-letter abbreviation of month name for date/timestamp (Jan, Feb, Mar...)
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.dayname`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(monthname('dt').alias('month')).show()
-    +-----+
-    |month|
-    +-----+
-    |  Apr|
-    +-----+
-    """
-    return _invoke_function_over_columns("monthname", col)
+    Example 1: Extract the month name from a string column representing dates
 
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.monthname('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|monthname(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|    string|          Apr|
+    |2024-10-31|    string|          Oct|
+    +----------+----------+-------------+
 
-@_try_remote_functions
-def dayname(col: "ColumnOrName") -> Column:
-    """
-    Date and Timestamp Function: Returns the three-letter abbreviated day name from the given date.
+    Example 2: Extract the month name from a string column representing timestamp
 
-    .. versionadded:: 4.0.0
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.monthname('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|monthname(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15|    string|          Apr|
+    |2024-10-31 10:09:16|    string|          Oct|
+    +-------------------+----------+-------------+
 
-    Parameters
-    ----------
-    col : :class:`~pyspark.sql.Column` or str
+    Example 3: Extract the month name from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.monthname('dt')).show()
+    +----------+----------+-------------+
+    |        dt|typeof(dt)|monthname(dt)|
+    +----------+----------+-------------+
+    |2015-04-08|      date|          Apr|
+    |2024-10-31|      date|          Oct|
+    +----------+----------+-------------+
+
+    Example 4: Extract the month name from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.monthname('ts')).show()
+    +-------------------+----------+-------------+
+    |                 ts|typeof(ts)|monthname(ts)|
+    +-------------------+----------+-------------+
+    |2015-04-08 13:08:15| timestamp|          Apr|
+    |2024-10-31 10:09:16| timestamp|          Oct|
+    +-------------------+----------+-------------+
+    """
+    return _invoke_function_over_columns("monthname", col)
+
+
+@_try_remote_functions
+def dayname(col: "ColumnOrName") -> Column:
+    """
+    Date and Timestamp Function: Returns the three-letter abbreviated day name from the given date.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
         target date/timestamp column to work on.
 
     Returns
@@ -8404,24 +10248,72 @@ def dayname(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the three-letter abbreviation of day name for date/timestamp (Mon, Tue, Wed...)
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.monthname`
+
     Examples
     --------
-    Example 1: Basic usage of dayname function.
+    Example 1: Extract the weekday name from a string column representing dates
 
-    >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(sf.dayname('dt').alias('dayname')).show()
-    +-------+
-    |dayname|
-    +-------+
-    |    Wed|
-    +-------+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08',), ('2024-10-31',)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayname('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|dayname(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|    string|        Wed|
+    |2024-10-31|    string|        Thu|
+    +----------+----------+-----------+
+
+    Example 2: Extract the weekday name from a string column representing timestamp
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 13:08:15',), ('2024-10-31 10:09:16',)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayname('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|dayname(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15|    string|        Wed|
+    |2024-10-31 10:09:16|    string|        Thu|
+    +-------------------+----------+-----------+
+
+    Example 3: Extract the weekday name from a date column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.date(2015, 4, 8),),
+    ...     (datetime.date(2024, 10, 31),)], ['dt'])
+    >>> df.select("*", sf.typeof('dt'), sf.dayname('dt')).show()
+    +----------+----------+-----------+
+    |        dt|typeof(dt)|dayname(dt)|
+    +----------+----------+-----------+
+    |2015-04-08|      date|        Wed|
+    |2024-10-31|      date|        Thu|
+    +----------+----------+-----------+
+
+    Example 4: Extract the weekday name from a timestamp column
+
+    >>> import datetime
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([
+    ...     (datetime.datetime(2015, 4, 8, 13, 8, 15),),
+    ...     (datetime.datetime(2024, 10, 31, 10, 9, 16),)], ['ts'])
+    >>> df.select("*", sf.typeof('ts'), sf.dayname('ts')).show()
+    +-------------------+----------+-----------+
+    |                 ts|typeof(ts)|dayname(ts)|
+    +-------------------+----------+-----------+
+    |2015-04-08 13:08:15| timestamp|        Wed|
+    |2024-10-31 10:09:16| timestamp|        Thu|
+    +-------------------+----------+-----------+
     """
     return _invoke_function_over_columns("dayname", col)
 
 
 @_try_remote_functions
-def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def extract(field: Column, source: "ColumnOrName") -> Column:
     """
     Extracts a part of the date/timestamp or interval source.
 
@@ -8429,9 +10321,9 @@ def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    field : :class:`~pyspark.sql.Column` or str
+    field : :class:`~pyspark.sql.Column`
         selects which part of the source should be extracted.
-    source : :class:`~pyspark.sql.Column` or str
+    source : :class:`~pyspark.sql.Column` or column name
         a date/timestamp or interval column from where `field` should be extracted.
 
     Returns
@@ -8439,25 +10331,43 @@ def extract(field: "ColumnOrName", source: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         a part of the date/timestamp or interval source.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.date_part`
+
     Examples
     --------
     >>> import datetime
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(
-    ...     extract(lit('YEAR'), 'ts').alias('year'),
-    ...     extract(lit('month'), 'ts').alias('month'),
-    ...     extract(lit('WEEK'), 'ts').alias('week'),
-    ...     extract(lit('D'), 'ts').alias('day'),
-    ...     extract(lit('M'), 'ts').alias('minute'),
-    ...     extract(lit('S'), 'ts').alias('second')
-    ... ).collect()
-    [Row(year=2015, month=4, week=15, day=8, minute=8, second=Decimal('15.000000'))]
+    ...     '*',
+    ...     sf.extract(sf.lit('YEAR'), 'ts').alias('year'),
+    ...     sf.extract(sf.lit('month'), 'ts').alias('month'),
+    ...     sf.extract(sf.lit('WEEK'), 'ts').alias('week'),
+    ...     sf.extract(sf.lit('D'), df.ts).alias('day'),
+    ...     sf.extract(sf.lit('M'), df.ts).alias('minute'),
+    ...     sf.extract(sf.lit('S'), df.ts).alias('second')
+    ... ).show()
+    +-------------------+----+-----+----+---+------+---------+
+    |                 ts|year|month|week|day|minute|   second|
+    +-------------------+----+-----+----+---+------+---------+
+    |2015-04-08 13:08:15|2015|    4|  15|  8|     8|15.000000|
+    +-------------------+----+-----+----+---+------+---------+
     """
     return _invoke_function_over_columns("extract", field, source)
 
 
 @_try_remote_functions
-def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def date_part(field: Column, source: "ColumnOrName") -> Column:
     """
     Extracts a part of the date/timestamp or interval source.
 
@@ -8465,10 +10375,10 @@ def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    field : :class:`~pyspark.sql.Column` or str
+    field : :class:`~pyspark.sql.Column`
         selects which part of the source should be extracted, and supported string values
         are as same as the fields of the equivalent function `extract`.
-    source : :class:`~pyspark.sql.Column` or str
+    source : :class:`~pyspark.sql.Column` or column name
         a date/timestamp or interval column from where `field` should be extracted.
 
     Returns
@@ -8476,25 +10386,43 @@ def date_part(field: "ColumnOrName", source: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         a part of the date/timestamp or interval source.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.datepart`
+    :meth:`pyspark.sql.functions.extract`
+
     Examples
     --------
     >>> import datetime
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(
-    ...     date_part(lit('YEAR'), 'ts').alias('year'),
-    ...     date_part(lit('month'), 'ts').alias('month'),
-    ...     date_part(lit('WEEK'), 'ts').alias('week'),
-    ...     date_part(lit('D'), 'ts').alias('day'),
-    ...     date_part(lit('M'), 'ts').alias('minute'),
-    ...     date_part(lit('S'), 'ts').alias('second')
-    ... ).collect()
-    [Row(year=2015, month=4, week=15, day=8, minute=8, second=Decimal('15.000000'))]
+    ...     '*',
+    ...     sf.date_part(sf.lit('YEAR'), 'ts').alias('year'),
+    ...     sf.date_part(sf.lit('month'), 'ts').alias('month'),
+    ...     sf.date_part(sf.lit('WEEK'), 'ts').alias('week'),
+    ...     sf.date_part(sf.lit('D'), df.ts).alias('day'),
+    ...     sf.date_part(sf.lit('M'), df.ts).alias('minute'),
+    ...     sf.date_part(sf.lit('S'), df.ts).alias('second')
+    ... ).show()
+    +-------------------+----+-----+----+---+------+---------+
+    |                 ts|year|month|week|day|minute|   second|
+    +-------------------+----+-----+----+---+------+---------+
+    |2015-04-08 13:08:15|2015|    4|  15|  8|     8|15.000000|
+    +-------------------+----+-----+----+---+------+---------+
     """
     return _invoke_function_over_columns("date_part", field, source)
 
 
 @_try_remote_functions
-def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
+def datepart(field: Column, source: "ColumnOrName") -> Column:
     """
     Extracts a part of the date/timestamp or interval source.
 
@@ -8502,10 +10430,10 @@ def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    field : :class:`~pyspark.sql.Column` or str
+    field : :class:`~pyspark.sql.Column`
         selects which part of the source should be extracted, and supported string values
         are as same as the fields of the equivalent function `extract`.
-    source : :class:`~pyspark.sql.Column` or str
+    source : :class:`~pyspark.sql.Column` or column name
         a date/timestamp or interval column from where `field` should be extracted.
 
     Returns
@@ -8513,19 +10441,37 @@ def datepart(field: "ColumnOrName", source: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         a part of the date/timestamp or interval source.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.year`
+    :meth:`pyspark.sql.functions.quarter`
+    :meth:`pyspark.sql.functions.month`
+    :meth:`pyspark.sql.functions.day`
+    :meth:`pyspark.sql.functions.hour`
+    :meth:`pyspark.sql.functions.minute`
+    :meth:`pyspark.sql.functions.second`
+    :meth:`pyspark.sql.functions.date_part`
+    :meth:`pyspark.sql.functions.extract`
+
     Examples
     --------
     >>> import datetime
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(datetime.datetime(2015, 4, 8, 13, 8, 15),)], ['ts'])
     >>> df.select(
-    ...     datepart(lit('YEAR'), 'ts').alias('year'),
-    ...     datepart(lit('month'), 'ts').alias('month'),
-    ...     datepart(lit('WEEK'), 'ts').alias('week'),
-    ...     datepart(lit('D'), 'ts').alias('day'),
-    ...     datepart(lit('M'), 'ts').alias('minute'),
-    ...     datepart(lit('S'), 'ts').alias('second')
-    ... ).collect()
-    [Row(year=2015, month=4, week=15, day=8, minute=8, second=Decimal('15.000000'))]
+    ...     '*',
+    ...     sf.datepart(sf.lit('YEAR'), 'ts').alias('year'),
+    ...     sf.datepart(sf.lit('month'), 'ts').alias('month'),
+    ...     sf.datepart(sf.lit('WEEK'), 'ts').alias('week'),
+    ...     sf.datepart(sf.lit('D'), df.ts).alias('day'),
+    ...     sf.datepart(sf.lit('M'), df.ts).alias('minute'),
+    ...     sf.datepart(sf.lit('S'), df.ts).alias('second')
+    ... ).show()
+    +-------------------+----+-----+----+---+------+---------+
+    |                 ts|year|month|week|day|minute|   second|
+    +-------------------+----+-----+----+---+------+---------+
+    |2015-04-08 13:08:15|2015|    4|  15|  8|     8|15.000000|
+    +-------------------+----+-----+----+---+------+---------+
     """
     return _invoke_function_over_columns("datepart", field, source)
 
@@ -8542,11 +10488,11 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
 
     Parameters
     ----------
-    year : :class:`~pyspark.sql.Column` or str
+    year : :class:`~pyspark.sql.Column` or column name
         The year to build the date
-    month : :class:`~pyspark.sql.Column` or str
+    month : :class:`~pyspark.sql.Column` or column name
         The month to build the date
-    day : :class:`~pyspark.sql.Column` or str
+    day : :class:`~pyspark.sql.Column` or column name
         The day to build the date
 
     Returns
@@ -8554,11 +10500,22 @@ def make_date(year: "ColumnOrName", month: "ColumnOrName", day: "ColumnOrName")
     :class:`~pyspark.sql.Column`
         a date built from given parts.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(2020, 6, 26)], ['Y', 'M', 'D'])
-    >>> df.select(make_date(df.Y, df.M, df.D).alias("datefield")).collect()
-    [Row(datefield=datetime.date(2020, 6, 26))]
+    >>> df.select('*', sf.make_date(df.Y, 'M', df.D)).show()
+    +----+---+---+------------------+
+    |   Y|  M|  D|make_date(Y, M, D)|
+    +----+---+---+------------------+
+    |2020|  6| 26|        2020-06-26|
+    +----+---+---+------------------+
     """
     return _invoke_function_over_columns("make_date", year, month, day)
 
@@ -8576,9 +10533,9 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 
     Parameters
     ----------
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         date column to work on.
-    days : :class:`~pyspark.sql.Column` or str or int
+    days : :class:`~pyspark.sql.Column` or column name or int
         how many days after the given date to calculate.
         Accepts negative value as well to calculate backwards in time.
 
@@ -8587,16 +10544,40 @@ def date_add(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     :class:`~pyspark.sql.Column`
         a date after/before given number of days.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_sub`
+    :meth:`pyspark.sql.functions.datediff`
+    :meth:`pyspark.sql.functions.date_diff`
+    :meth:`pyspark.sql.functions.timestamp_add`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'add'])
-    >>> df.select(date_add(df.dt, 1).alias('next_date')).collect()
-    [Row(next_date=datetime.date(2015, 4, 9))]
-    >>> df.select(date_add(df.dt, df.add.cast('integer')).alias('next_date')).collect()
-    [Row(next_date=datetime.date(2015, 4, 10))]
-    >>> df.select(date_add('dt', -1).alias('prev_date')).collect()
-    [Row(prev_date=datetime.date(2015, 4, 7))]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08', 2,)], 'struct<dt:string,a:int>')
+    >>> df.select('*', sf.date_add(df.dt, 1)).show()
+    +----------+---+---------------+
+    |        dt|  a|date_add(dt, 1)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-09|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.date_add('dt', 'a')).show()
+    +----------+---+---------------+
+    |        dt|  a|date_add(dt, a)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-10|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.date_add('dt', sf.lit(-1))).show()
+    +----------+---+----------------+
+    |        dt|  a|date_add(dt, -1)|
+    +----------+---+----------------+
+    |2015-04-08|  2|      2015-04-07|
+    +----------+---+----------------+
     """
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_add", start, days)
 
@@ -8611,9 +10592,9 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 
     Parameters
     ----------
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         date column to work on.
-    days : :class:`~pyspark.sql.Column` or str or int
+    days : :class:`~pyspark.sql.Column` or column name or int
         how many days after the given date to calculate.
         Accepts negative value as well to calculate backwards in time.
 
@@ -8622,38 +10603,40 @@ def dateadd(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     :class:`~pyspark.sql.Column`
         a date after/before given number of days.
 
-    Examples
+    See Also
     --------
-    >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('2015-04-08', 2,)], ['dt', 'add']
-    ... ).select(sf.dateadd("dt", 1)).show()
-    +---------------+
-    |date_add(dt, 1)|
-    +---------------+
-    |     2015-04-09|
-    +---------------+
-
-    >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('2015-04-08', 2,)], ['dt', 'add']
-    ... ).select(sf.dateadd("dt", sf.lit(2))).show()
-    +---------------+
-    |date_add(dt, 2)|
-    +---------------+
-    |     2015-04-10|
-    +---------------+
+    :meth:`pyspark.sql.functions.date_add`
+    :meth:`pyspark.sql.functions.date_sub`
+    :meth:`pyspark.sql.functions.datediff`
+    :meth:`pyspark.sql.functions.date_diff`
+    :meth:`pyspark.sql.functions.timestamp_add`
 
+    Examples
+    --------
     >>> import pyspark.sql.functions as sf
-    >>> spark.createDataFrame(
-    ...     [('2015-04-08', 2,)], ['dt', 'add']
-    ... ).select(sf.dateadd("dt", -1)).show()
-    +----------------+
-    |date_add(dt, -1)|
-    +----------------+
-    |      2015-04-07|
-    +----------------+
-    """
+    >>> df = spark.createDataFrame([('2015-04-08', 2,)], 'struct<dt:string,a:int>')
+    >>> df.select('*', sf.dateadd(df.dt, 1)).show()
+    +----------+---+---------------+
+    |        dt|  a|date_add(dt, 1)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-09|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.dateadd('dt', 'a')).show()
+    +----------+---+---------------+
+    |        dt|  a|date_add(dt, a)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-10|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.dateadd('dt', sf.lit(-1))).show()
+    +----------+---+----------------+
+    |        dt|  a|date_add(dt, -1)|
+    +----------+---+----------------+
+    |2015-04-08|  2|      2015-04-07|
+    +----------+---+----------------+
+    """
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("dateadd", start, days)
 
@@ -8671,9 +10654,9 @@ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
 
     Parameters
     ----------
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         date column to work on.
-    days : :class:`~pyspark.sql.Column` or str or int
+    days : :class:`~pyspark.sql.Column` or column name or int
         how many days before the given date to calculate.
         Accepts negative value as well to calculate forward in time.
 
@@ -8682,16 +10665,39 @@ def date_sub(start: "ColumnOrName", days: Union["ColumnOrName", int]) -> Column:
     :class:`~pyspark.sql.Column`
         a date before/after given number of days.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_add`
+    :meth:`pyspark.sql.functions.datediff`
+    :meth:`pyspark.sql.functions.date_diff`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08', 2,)], ['dt', 'sub'])
-    >>> df.select(date_sub(df.dt, 1).alias('prev_date')).collect()
-    [Row(prev_date=datetime.date(2015, 4, 7))]
-    >>> df.select(date_sub(df.dt, df.sub.cast('integer')).alias('prev_date')).collect()
-    [Row(prev_date=datetime.date(2015, 4, 6))]
-    >>> df.select(date_sub('dt', -1).alias('next_date')).collect()
-    [Row(next_date=datetime.date(2015, 4, 9))]
-    """
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08', 2,)], 'struct<dt:string,a:int>')
+    >>> df.select('*', sf.date_sub(df.dt, 1)).show()
+    +----------+---+---------------+
+    |        dt|  a|date_sub(dt, 1)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-07|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.date_sub('dt', 'a')).show()
+    +----------+---+---------------+
+    |        dt|  a|date_sub(dt, a)|
+    +----------+---+---------------+
+    |2015-04-08|  2|     2015-04-06|
+    +----------+---+---------------+
+
+    >>> df.select('*', sf.date_sub('dt', sf.lit(-1))).show()
+    +----------+---+----------------+
+    |        dt|  a|date_sub(dt, -1)|
+    +----------+---+----------------+
+    |2015-04-08|  2|      2015-04-09|
+    +----------+---+----------------+
+    """
+    days = _enum_to_value(days)
     days = lit(days) if isinstance(days, int) else days
     return _invoke_function_over_columns("date_sub", start, days)
 
@@ -8708,9 +10714,9 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    end : :class:`~pyspark.sql.Column` or str
+    end : :class:`~pyspark.sql.Column` or column name
         to date column to work on.
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         from date column to work on.
 
     Returns
@@ -8718,11 +10724,31 @@ def datediff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         difference in days between two dates.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_add`
+    :meth:`pyspark.sql.functions.date_sub`
+    :meth:`pyspark.sql.functions.date_diff`
+    :meth:`pyspark.sql.functions.timestamp_diff`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
-    >>> df.select(datediff(df.d2, df.d1).alias('diff')).collect()
-    [Row(diff=32)]
+    >>> df.select('*', sf.datediff('d1', 'd2')).show()
+    +----------+----------+----------------+
+    |        d1|        d2|datediff(d1, d2)|
+    +----------+----------+----------------+
+    |2015-04-08|2015-05-10|             -32|
+    +----------+----------+----------------+
+
+    >>> df.select('*', sf.datediff(df.d2, df.d1)).show()
+    +----------+----------+----------------+
+    |        d1|        d2|datediff(d2, d1)|
+    +----------+----------+----------------+
+    |2015-04-08|2015-05-10|              32|
+    +----------+----------+----------------+
     """
     return _invoke_function_over_columns("datediff", end, start)
 
@@ -8736,9 +10762,9 @@ def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    end : :class:`~pyspark.sql.Column` or str
+    end : :class:`~pyspark.sql.Column` or column name
         to date column to work on.
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         from date column to work on.
 
     Returns
@@ -8746,11 +10772,31 @@ def date_diff(end: "ColumnOrName", start: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         difference in days between two dates.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_add`
+    :meth:`pyspark.sql.functions.date_sub`
+    :meth:`pyspark.sql.functions.datediff`
+    :meth:`pyspark.sql.functions.timestamp_diff`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('2015-04-08','2015-05-10')], ['d1', 'd2'])
-    >>> df.select(date_diff(df.d2, df.d1).alias('diff')).collect()
-    [Row(diff=32)]
+    >>> df.select('*', sf.date_diff('d1', 'd2')).show()
+    +----------+----------+-----------------+
+    |        d1|        d2|date_diff(d1, d2)|
+    +----------+----------+-----------------+
+    |2015-04-08|2015-05-10|              -32|
+    +----------+----------+-----------------+
+
+    >>> df.select('*', sf.date_diff(df.d2, df.d1)).show()
+    +----------+----------+-----------------+
+    |        d1|        d2|date_diff(d2, d1)|
+    +----------+----------+-----------------+
+    |2015-04-08|2015-05-10|               32|
+    +----------+----------+-----------------+
     """
     return _invoke_function_over_columns("date_diff", end, start)
 
@@ -8764,7 +10810,7 @@ def date_from_unix_date(days: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    days : :class:`~pyspark.sql.Column` or str
+    days : :class:`~pyspark.sql.Column` or column name
         the target column to work on.
 
     Returns
@@ -8772,15 +10818,23 @@ def date_from_unix_date(days: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the date from the number of days since 1970-01-01.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.from_unixtime`
+    :meth:`pyspark.sql.functions.unix_date`
+
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(date_from_unix_date(lit(1))).show()
-    +----------------------+
-    |date_from_unix_date(1)|
-    +----------------------+
-    |            1970-01-02|
-    +----------------------+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(4).select('*', sf.date_from_unix_date('id')).show()
+    +---+-----------------------+
+    | id|date_from_unix_date(id)|
+    +---+-----------------------+
+    |  0|             1970-01-01|
+    |  1|             1970-01-02|
+    |  2|             1970-01-03|
+    |  3|             1970-01-04|
+    +---+-----------------------+
     """
     return _invoke_function_over_columns("date_from_unix_date", days)
 
@@ -8798,9 +10852,9 @@ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Col
 
     Parameters
     ----------
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         date column to work on.
-    months : :class:`~pyspark.sql.Column` or str or int
+    months : :class:`~pyspark.sql.Column` or column name or int
         how many months after the given date to calculate.
         Accepts negative value as well to calculate backwards.
 
@@ -8809,16 +10863,37 @@ def add_months(start: "ColumnOrName", months: Union["ColumnOrName", int]) -> Col
     :class:`~pyspark.sql.Column`
         a date after/before given number of months.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_add`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-04-08', 2)], ['dt', 'add'])
-    >>> df.select(add_months(df.dt, 1).alias('next_month')).collect()
-    [Row(next_month=datetime.date(2015, 5, 8))]
-    >>> df.select(add_months(df.dt, df.add.cast('integer')).alias('next_month')).collect()
-    [Row(next_month=datetime.date(2015, 6, 8))]
-    >>> df.select(add_months('dt', -2).alias('prev_month')).collect()
-    [Row(prev_month=datetime.date(2015, 2, 8))]
-    """
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08', 2,)], 'struct<dt:string,a:int>')
+    >>> df.select('*', sf.add_months(df.dt, 1)).show()
+    +----------+---+-----------------+
+    |        dt|  a|add_months(dt, 1)|
+    +----------+---+-----------------+
+    |2015-04-08|  2|       2015-05-08|
+    +----------+---+-----------------+
+
+    >>> df.select('*', sf.add_months('dt', 'a')).show()
+    +----------+---+-----------------+
+    |        dt|  a|add_months(dt, a)|
+    +----------+---+-----------------+
+    |2015-04-08|  2|       2015-06-08|
+    +----------+---+-----------------+
+
+    >>> df.select('*', sf.add_months('dt', sf.lit(-1))).show()
+    +----------+---+------------------+
+    |        dt|  a|add_months(dt, -1)|
+    +----------+---+------------------+
+    |2015-04-08|  2|        2015-03-08|
+    +----------+---+------------------+
+    """
+    months = _enum_to_value(months)
     months = lit(months) if isinstance(months, int) else months
     return _invoke_function_over_columns("add_months", start, months)
 
@@ -8839,9 +10914,9 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
 
     Parameters
     ----------
-    date1 : :class:`~pyspark.sql.Column` or str
+    date1 : :class:`~pyspark.sql.Column` or column name
         first date column.
-    date2 : :class:`~pyspark.sql.Column` or str
+    date2 : :class:`~pyspark.sql.Column` or column name
         second date column.
     roundOff : bool, optional
         whether to round (to 8 digits) the final value or not (default: True).
@@ -8853,16 +10928,33 @@ def months_between(date1: "ColumnOrName", date2: "ColumnOrName", roundOff: bool
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['date1', 'date2'])
-    >>> df.select(months_between(df.date1, df.date2).alias('months')).collect()
-    [Row(months=3.94959677)]
-    >>> df.select(months_between(df.date1, df.date2, False).alias('months')).collect()
-    [Row(months=3.9495967741935485)]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00', '1996-10-30')], ['d1', 'd2'])
+    >>> df.select('*', sf.months_between(df.d1, df.d2)).show()
+    +-------------------+----------+----------------------------+
+    |                 d1|        d2|months_between(d1, d2, true)|
+    +-------------------+----------+----------------------------+
+    |1997-02-28 10:30:00|1996-10-30|                  3.94959677|
+    +-------------------+----------+----------------------------+
+
+    >>> df.select('*', sf.months_between('d2', 'd1')).show()
+    +-------------------+----------+----------------------------+
+    |                 d1|        d2|months_between(d2, d1, true)|
+    +-------------------+----------+----------------------------+
+    |1997-02-28 10:30:00|1996-10-30|                 -3.94959677|
+    +-------------------+----------+----------------------------+
+
+    >>> df.select('*', sf.months_between('d1', df.d2, False)).show()
+    +-------------------+----------+-----------------------------+
+    |                 d1|        d2|months_between(d1, d2, false)|
+    +-------------------+----------+-----------------------------+
+    |1997-02-28 10:30:00|1996-10-30|           3.9495967741935...|
+    +-------------------+----------+-----------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function(
-        "months_between", _to_java_column(date1), _to_java_column(date2), roundOff
+        "months_between", _to_java_column(date1), _to_java_column(date2), _enum_to_value(roundOff)
     )
 
 
@@ -8882,9 +10974,9 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to convert.
-    format: str, optional
+    format: literal string, optional
         format to use to convert date values.
 
     Returns
@@ -8892,22 +10984,39 @@ def to_date(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     :class:`~pyspark.sql.Column`
         date value as :class:`pyspark.sql.types.DateType` type.
 
-    Examples
+    See Also
     --------
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(to_date(df.t).alias('date')).collect()
-    [Row(date=datetime.date(1997, 2, 28))]
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.try_to_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
 
-    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(to_date(df.t, 'yyyy-MM-dd HH:mm:ss').alias('date')).collect()
-    [Row(date=datetime.date(1997, 2, 28))]
+    Examples
+    --------
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['ts'])
+    >>> df.select('*', sf.to_date(df.ts)).show()
+    +-------------------+-----------+
+    |                 ts|to_date(ts)|
+    +-------------------+-----------+
+    |1997-02-28 10:30:00| 1997-02-28|
+    +-------------------+-----------+
+
+    >>> df.select('*', sf.to_date('ts', 'yyyy-MM-dd HH:mm:ss')).show()
+    +-------------------+--------------------------------+
+    |                 ts|to_date(ts, yyyy-MM-dd HH:mm:ss)|
+    +-------------------+--------------------------------+
+    |1997-02-28 10:30:00|                      1997-02-28|
+    +-------------------+--------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     if format is None:
         return _invoke_function_over_columns("to_date", col)
     else:
-        return _invoke_function("to_date", _to_java_column(col), format)
+        return _invoke_function("to_date", _to_java_column(col), _enum_to_value(format))
 
 
 @_try_remote_functions
@@ -8916,12 +11025,37 @@ def unix_date(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        input column of values to convert.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the number of days since 1970-01-01.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.date_from_unix_date`
+    :meth:`pyspark.sql.functions.unix_seconds`
+    :meth:`pyspark.sql.functions.unix_millis`
+    :meth:`pyspark.sql.functions.unix_micros`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([('1970-01-02',)], ['t'])
-    >>> df.select(unix_date(to_date(df.t)).alias('n')).collect()
-    [Row(n=1)]
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('1970-01-02',), ('2022-01-02',)], ['dt'])
+    >>> df.select('*', sf.unix_date(sf.to_date('dt'))).show()
+    +----------+----------------------+
+    |        dt|unix_date(to_date(dt))|
+    +----------+----------------------+
+    |1970-01-02|                     1|
+    |2022-01-02|                 18994|
+    +----------+----------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("unix_date", col)
@@ -8933,12 +11067,37 @@ def unix_micros(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        input column of values to convert.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the number of microseconds since 1970-01-01 00:00:00 UTC.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.unix_date`
+    :meth:`pyspark.sql.functions.unix_seconds`
+    :meth:`pyspark.sql.functions.unix_millis`
+    :meth:`pyspark.sql.functions.timestamp_micros`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
-    >>> df.select(unix_micros(to_timestamp(df.t)).alias('n')).collect()
-    [Row(n=1437584400000000)]
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',), ('2022-10-09 11:12:13',)], ['ts'])
+    >>> df.select('*', sf.unix_micros(sf.to_timestamp('ts'))).show()
+    +-------------------+-----------------------------+
+    |                 ts|unix_micros(to_timestamp(ts))|
+    +-------------------+-----------------------------+
+    |2015-07-22 10:00:00|             1437584400000000|
+    |2022-10-09 11:12:13|             1665339133000000|
+    +-------------------+-----------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("unix_micros", col)
@@ -8951,12 +11110,37 @@ def unix_millis(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        input column of values to convert.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the number of milliseconds since 1970-01-01 00:00:00 UTC.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.unix_date`
+    :meth:`pyspark.sql.functions.unix_seconds`
+    :meth:`pyspark.sql.functions.unix_micros`
+    :meth:`pyspark.sql.functions.timestamp_millis`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
-    >>> df.select(unix_millis(to_timestamp(df.t)).alias('n')).collect()
-    [Row(n=1437584400000)]
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',), ('2022-10-09 11:12:13',)], ['ts'])
+    >>> df.select('*', sf.unix_millis(sf.to_timestamp('ts'))).show()
+    +-------------------+-----------------------------+
+    |                 ts|unix_millis(to_timestamp(ts))|
+    +-------------------+-----------------------------+
+    |2015-07-22 10:00:00|                1437584400000|
+    |2022-10-09 11:12:13|                1665339133000|
+    +-------------------+-----------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("unix_millis", col)
@@ -8969,12 +11153,38 @@ def unix_seconds(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or column name
+        input column of values to convert.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the number of seconds since 1970-01-01 00:00:00 UTC.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.unix_date`
+    :meth:`pyspark.sql.functions.unix_millis`
+    :meth:`pyspark.sql.functions.unix_micros`
+    :meth:`pyspark.sql.functions.from_unixtime`
+    :meth:`pyspark.sql.functions.timestamp_seconds`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',)], ['t'])
-    >>> df.select(unix_seconds(to_timestamp(df.t)).alias('n')).collect()
-    [Row(n=1437584400)]
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-07-22 10:00:00',), ('2022-10-09 11:12:13',)], ['ts'])
+    >>> df.select('*', sf.unix_seconds(sf.to_timestamp('ts'))).show()
+    +-------------------+------------------------------+
+    |                 ts|unix_seconds(to_timestamp(ts))|
+    +-------------------+------------------------------+
+    |2015-07-22 10:00:00|                    1437584400|
+    |2022-10-09 11:12:13|                    1665339133|
+    +-------------------+------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("unix_seconds", col)
@@ -9006,9 +11216,9 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column values to convert.
-    format: str, optional
+    format: literal string, optional
         format to use to convert timestamp values.
 
     Returns
@@ -9016,15 +11226,25 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
     :class:`~pyspark.sql.Column`
         timestamp value as :class:`pyspark.sql.types.TimestampType` type.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
+    :meth:`pyspark.sql.functions.try_to_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
     Example 1: Convert string to a timestamp
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(sf.try_to_timestamp(df.t).alias('dt')).show()
+    >>> df.select(sf.to_timestamp(df.t)).show()
     +-------------------+
-    |                 dt|
+    |    to_timestamp(t)|
     +-------------------+
     |1997-02-28 10:30:00|
     +-------------------+
@@ -9033,19 +11253,19 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
-    >>> df.select(sf.try_to_timestamp(df.t, sf.lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).show()
-    +-------------------+
-    |                 dt|
-    +-------------------+
-    |1997-02-28 10:30:00|
-    +-------------------+
+    >>> df.select(sf.to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss')).show()
+    +------------------------------------+
+    |to_timestamp(t, yyyy-MM-dd HH:mm:ss)|
+    +------------------------------------+
+    |                 1997-02-28 10:30:00|
+    +------------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     if format is None:
         return _invoke_function_over_columns("to_timestamp", col)
     else:
-        return _invoke_function("to_timestamp", _to_java_column(col), format)
+        return _invoke_function("to_timestamp", _to_java_column(col), _enum_to_value(format))
 
 
 @_try_remote_functions
@@ -9059,11 +11279,18 @@ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = Non
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column values to convert.
-    format: str, optional
+    format: literal string, optional
         format to use to convert timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
     Example 1: Convert string to a timestamp
@@ -9279,9 +11506,9 @@ def trunc(date: "ColumnOrName", format: str) -> Column:
 
     Parameters
     ----------
-    date : :class:`~pyspark.sql.Column` or str
+    date : :class:`~pyspark.sql.Column` or column name
         input column of values to truncate.
-    format : str
+    format : literal string
         'year', 'yyyy', 'yy' to truncate by year,
         or 'month', 'mon', 'mm' to truncate by month
         Other options are: 'week', 'quarter'
@@ -9291,17 +11518,31 @@ def trunc(date: "ColumnOrName", format: str) -> Column:
     :class:`~pyspark.sql.Column`
         truncated date.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.date_trunc`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('1997-02-28',)], ['d'])
-    >>> df.select(trunc(df.d, 'year').alias('year')).collect()
-    [Row(year=datetime.date(1997, 1, 1))]
-    >>> df.select(trunc(df.d, 'mon').alias('month')).collect()
-    [Row(month=datetime.date(1997, 2, 1))]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('1997-02-28',)], ['dt'])
+    >>> df.select('*', sf.trunc(df.dt, 'year')).show()
+    +----------+---------------+
+    |        dt|trunc(dt, year)|
+    +----------+---------------+
+    |1997-02-28|     1997-01-01|
+    +----------+---------------+
+
+    >>> df.select('*', sf.trunc('dt', 'mon')).show()
+    +----------+--------------+
+    |        dt|trunc(dt, mon)|
+    +----------+--------------+
+    |1997-02-28|    1997-02-01|
+    +----------+--------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("trunc", _to_java_column(date), format)
+    return _invoke_function("trunc", _to_java_column(date), _enum_to_value(format))
 
 
 @_try_remote_functions
@@ -9316,13 +11557,13 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    format : str
+    format : literal string
         'year', 'yyyy', 'yy' to truncate by year,
         'month', 'mon', 'mm' to truncate by month,
         'day', 'dd' to truncate by day,
         Other options are:
         'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         input column of values to truncate.
 
     Returns
@@ -9330,17 +11571,31 @@ def date_trunc(format: str, timestamp: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         truncated timestamp.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.trunc`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['t'])
-    >>> df.select(date_trunc('year', df.t).alias('year')).collect()
-    [Row(year=datetime.datetime(1997, 1, 1, 0, 0))]
-    >>> df.select(date_trunc('mon', df.t).alias('month')).collect()
-    [Row(month=datetime.datetime(1997, 2, 1, 0, 0))]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('1997-02-28 05:02:11',)], ['ts'])
+    >>> df.select('*', sf.date_trunc('year', df.ts)).show()
+    +-------------------+--------------------+
+    |                 ts|date_trunc(year, ts)|
+    +-------------------+--------------------+
+    |1997-02-28 05:02:11| 1997-01-01 00:00:00|
+    +-------------------+--------------------+
+
+    >>> df.select('*', sf.date_trunc('mon', 'ts')).show()
+    +-------------------+-------------------+
+    |                 ts|date_trunc(mon, ts)|
+    +-------------------+-------------------+
+    |1997-02-28 05:02:11|1997-02-01 00:00:00|
+    +-------------------+-------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("date_trunc", format, _to_java_column(timestamp))
+    return _invoke_function("date_trunc", _enum_to_value(format), _to_java_column(timestamp))
 
 
 @_try_remote_functions
@@ -9356,9 +11611,9 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
 
     Parameters
     ----------
-    date : :class:`~pyspark.sql.Column` or str
+    date : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
-    dayOfWeek : str
+    dayOfWeek : literal string
         day of the week, case-insensitive, accepts:
             "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
 
@@ -9369,13 +11624,25 @@ def next_day(date: "ColumnOrName", dayOfWeek: str) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('2015-07-27',)], ['d'])
-    >>> df.select(next_day(df.d, 'Sun').alias('date')).collect()
-    [Row(date=datetime.date(2015, 8, 2))]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2015-07-27',)], ['dt'])
+    >>> df.select('*', sf.next_day(df.dt, 'Sun')).show()
+    +----------+-----------------+
+    |        dt|next_day(dt, Sun)|
+    +----------+-----------------+
+    |2015-07-27|       2015-08-02|
+    +----------+-----------------+
+
+    >>> df.select('*', sf.next_day('dt', 'Sat')).show()
+    +----------+-----------------+
+    |        dt|next_day(dt, Sat)|
+    +----------+-----------------+
+    |2015-07-27|       2015-08-01|
+    +----------+-----------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("next_day", _to_java_column(date), dayOfWeek)
+    return _invoke_function("next_day", _to_java_column(date), _enum_to_value(dayOfWeek))
 
 
 @_try_remote_functions
@@ -9390,7 +11657,7 @@ def last_day(date: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    date : :class:`~pyspark.sql.Column` or str
+    date : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -9400,9 +11667,21 @@ def last_day(date: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('1997-02-10',)], ['d'])
-    >>> df.select(last_day(df.d).alias('date')).collect()
-    [Row(date=datetime.date(1997, 2, 28))]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('1997-02-10',)], ['dt'])
+    >>> df.select('*', sf.last_day(df.dt)).show()
+    +----------+------------+
+    |        dt|last_day(dt)|
+    +----------+------------+
+    |1997-02-10|  1997-02-28|
+    +----------+------------+
+
+    >>> df.select('*', sf.last_day('dt')).show()
+    +----------+------------+
+    |        dt|last_day(dt)|
+    +----------+------------+
+    |1997-02-10|  1997-02-28|
+    +----------+------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -9423,9 +11702,9 @@ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss"
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         column of unix time values.
-    format : str, optional
+    format : literal string, optional
         format to use to convert to (default: yyyy-MM-dd HH:mm:ss)
 
     Returns
@@ -9433,17 +11712,29 @@ def from_unixtime(timestamp: "ColumnOrName", format: str = "yyyy-MM-dd HH:mm:ss"
     :class:`~pyspark.sql.Column`
         formatted timestamp as string.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.date_from_unix_date`
+    :meth:`pyspark.sql.functions.unix_seconds`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> time_df = spark.createDataFrame([(1428476400,)], ['unix_time'])
-    >>> time_df.select(from_unixtime('unix_time').alias('ts')).collect()
-    [Row(ts='2015-04-08 00:00:00')]
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1428476400,)], ['unix_time'])
+    >>> df.select('*', sf.from_unixtime('unix_time')).show()
+    +----------+---------------------------------------------+
+    | unix_time|from_unixtime(unix_time, yyyy-MM-dd HH:mm:ss)|
+    +----------+---------------------------------------------+
+    |1428476400|                          2015-04-08 00:00:00|
+    +----------+---------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("from_unixtime", _to_java_column(timestamp), format)
+    return _invoke_function("from_unixtime", _to_java_column(timestamp), _enum_to_value(format))
 
 
 @overload
@@ -9474,9 +11765,9 @@ def unix_timestamp(
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str, optional
+    timestamp : :class:`~pyspark.sql.Column` or column name, optional
         timestamps of string values.
-    format : str, optional
+    format : literal string, optional
         alternative format to use for converting (default: yyyy-MM-dd HH:mm:ss).
 
     Returns
@@ -9491,8 +11782,7 @@ def unix_timestamp(
     Example 1: Returns the current timestamp in UNIX.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.range(1).select(sf.unix_timestamp().alias('unix_time')).show()
-    ... # doctest: +SKIP
+    >>> spark.range(1).select(sf.unix_timestamp()).show() # doctest: +SKIP
     +----------+
     | unix_time|
     +----------+
@@ -9502,24 +11792,24 @@ def unix_timestamp(
     Example 2: Using default format 'yyyy-MM-dd HH:mm:ss' parses the timestamp string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt'])
-    >>> time_df.select(sf.unix_timestamp('dt').alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428520332|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.unix_timestamp('ts')).show()
+    +-------------------+---------------------------------------+
+    |                 ts|unix_timestamp(ts, yyyy-MM-dd HH:mm:ss)|
+    +-------------------+---------------------------------------+
+    |2015-04-08 12:12:12|                             1428520332|
+    +-------------------+---------------------------------------+
 
     Example 3: Using user-specified format 'yyyy-MM-dd' parses the timestamp string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> time_df.select(sf.unix_timestamp('dt', 'yyyy-MM-dd').alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428476400|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select('*', sf.unix_timestamp('dt', 'yyyy-MM-dd')).show()
+    +----------+------------------------------+
+    |        dt|unix_timestamp(dt, yyyy-MM-dd)|
+    +----------+------------------------------+
+    |2015-04-08|                    1428476400|
+    +----------+------------------------------+
 
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
@@ -9527,7 +11817,7 @@ def unix_timestamp(
 
     if timestamp is None:
         return _invoke_function("unix_timestamp")
-    return _invoke_function("unix_timestamp", _to_java_column(timestamp), format)
+    return _invoke_function("unix_timestamp", _to_java_column(timestamp), _enum_to_value(format))
 
 
 @_try_remote_functions
@@ -9553,9 +11843,9 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Col
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         the column that contains timestamps
-    tz : :class:`~pyspark.sql.Column` or str
+    tz : :class:`~pyspark.sql.Column` or literal string
         A string detailing the time zone ID that the input should be adjusted to. It should
         be in the format of either region-based zone IDs or zone offsets. Region IDs must
         have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
@@ -9571,19 +11861,36 @@ def from_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Col
     :class:`~pyspark.sql.Column`
         timestamp value represented in given timezone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
-    >>> df.select(from_utc_timestamp(df.ts, "PST").alias('local_time')).collect()
-    [Row(local_time=datetime.datetime(1997, 2, 28, 2, 30))]
-    >>> df.select(from_utc_timestamp(df.ts, df.tz).alias('local_time')).collect()
-    [Row(local_time=datetime.datetime(1997, 2, 28, 19, 30))]
+    >>> df.select('*', sf.from_utc_timestamp('ts', 'PST')).show()
+    +-------------------+---+---------------------------+
+    |                 ts| tz|from_utc_timestamp(ts, PST)|
+    +-------------------+---+---------------------------+
+    |1997-02-28 10:30:00|JST|        1997-02-28 02:30:00|
+    +-------------------+---+---------------------------+
+
+    >>> df.select('*', sf.from_utc_timestamp(df.ts, df.tz)).show()
+    +-------------------+---+--------------------------+
+    |                 ts| tz|from_utc_timestamp(ts, tz)|
+    +-------------------+---+--------------------------+
+    |1997-02-28 10:30:00|JST|       1997-02-28 19:30:00|
+    +-------------------+---+--------------------------+
     """
     return _invoke_function_over_columns("from_utc_timestamp", timestamp, lit(tz))
 
 
 @_try_remote_functions
-def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
+def to_utc_timestamp(timestamp: "ColumnOrName", tz: Union[Column, str]) -> Column:
     """
     This is a common function for databases supporting TIMESTAMP WITHOUT TIMEZONE. This function
     takes a timestamp which is timezone-agnostic, and interprets it as a timestamp in the given
@@ -9605,9 +11912,9 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         the column that contains timestamps
-    tz : :class:`~pyspark.sql.Column` or str
+    tz : :class:`~pyspark.sql.Column` or literal string
         A string detailing the time zone ID that the input should be adjusted to. It should
         be in the format of either region-based zone IDs or zone offsets. Region IDs must
         have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
@@ -9623,13 +11930,30 @@ def to_utc_timestamp(timestamp: "ColumnOrName", tz: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         timestamp value represented in UTC timezone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.from_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('1997-02-28 10:30:00', 'JST')], ['ts', 'tz'])
-    >>> df.select(to_utc_timestamp(df.ts, "PST").alias('utc_time')).collect()
-    [Row(utc_time=datetime.datetime(1997, 2, 28, 18, 30))]
-    >>> df.select(to_utc_timestamp(df.ts, df.tz).alias('utc_time')).collect()
-    [Row(utc_time=datetime.datetime(1997, 2, 28, 1, 30))]
+    >>> df.select('*', sf.to_utc_timestamp('ts', "PST")).show()
+    +-------------------+---+-------------------------+
+    |                 ts| tz|to_utc_timestamp(ts, PST)|
+    +-------------------+---+-------------------------+
+    |1997-02-28 10:30:00|JST|      1997-02-28 18:30:00|
+    +-------------------+---+-------------------------+
+
+    >>> df.select('*', sf.to_utc_timestamp(df.ts, df.tz)).show()
+    +-------------------+---+------------------------+
+    |                 ts| tz|to_utc_timestamp(ts, tz)|
+    +-------------------+---+------------------------+
+    |1997-02-28 10:30:00|JST|     1997-02-28 01:30:00|
+    +-------------------+---+------------------------+
     """
     return _invoke_function_over_columns("to_utc_timestamp", timestamp, lit(tz))
 
@@ -9647,7 +11971,7 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         unix time values.
 
     Returns
@@ -9655,21 +11979,27 @@ def timestamp_seconds(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         converted timestamp value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.timestamp_millis`
+    :meth:`pyspark.sql.functions.timestamp_micros`
+    :meth:`pyspark.sql.functions.unix_seconds`
+
     Examples
     --------
-    >>> from pyspark.sql.functions import timestamp_seconds
     >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
-    >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time'])
-    >>> time_df.select(timestamp_seconds(time_df.unix_time).alias('ts')).show()
-    +-------------------+
-    |                 ts|
-    +-------------------+
-    |2008-12-25 15:30:00|
-    +-------------------+
-    >>> time_df.select(timestamp_seconds('unix_time').alias('ts')).printSchema()
-    root
-     |-- ts: timestamp (nullable = true)
-    >>> spark.conf.unset("spark.sql.session.timeZone")
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(1230219000,), (1280219000,)], ['seconds'])
+    >>> df.select('*', sf.timestamp_seconds('seconds')).show()
+    +----------+--------------------------+
+    |   seconds|timestamp_seconds(seconds)|
+    +----------+--------------------------+
+    |1230219000|       2008-12-25 15:30:00|
+    |1280219000|       2010-07-27 08:23:20|
+    +----------+--------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
     """
 
     return _invoke_function_over_columns("timestamp_seconds", col)
@@ -9684,7 +12014,7 @@ def timestamp_millis(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         unix time values.
 
     Returns
@@ -9692,19 +12022,26 @@ def timestamp_millis(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         converted timestamp value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.timestamp_seconds`
+    :meth:`pyspark.sql.functions.timestamp_micros`
+    :meth:`pyspark.sql.functions.unix_millis`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
-    >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time'])
-    >>> time_df.select(timestamp_millis(time_df.unix_time).alias('ts')).show()
-    +-------------------+
-    |                 ts|
-    +-------------------+
-    |1970-01-15 05:43:39|
-    +-------------------+
-    >>> time_df.select(timestamp_millis('unix_time').alias('ts')).printSchema()
-    root
-     |-- ts: timestamp (nullable = true)
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(1230219000,), (1280219000,)], ['millis'])
+    >>> df.select('*', sf.timestamp_millis('millis')).show()
+    +----------+------------------------+
+    |    millis|timestamp_millis(millis)|
+    +----------+------------------------+
+    |1230219000|     1970-01-15 05:43:39|
+    |1280219000|     1970-01-15 19:36:59|
+    +----------+------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("timestamp_millis", col)
@@ -9719,7 +12056,7 @@ def timestamp_micros(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         unix time values.
 
     Returns
@@ -9727,19 +12064,26 @@ def timestamp_micros(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         converted timestamp value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.timestamp_seconds`
+    :meth:`pyspark.sql.functions.timestamp_millis`
+    :meth:`pyspark.sql.functions.unix_micros`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "UTC")
-    >>> time_df = spark.createDataFrame([(1230219000,)], ['unix_time'])
-    >>> time_df.select(timestamp_micros(time_df.unix_time).alias('ts')).show()
-    +--------------------+
-    |                  ts|
-    +--------------------+
-    |1970-01-01 00:20:...|
-    +--------------------+
-    >>> time_df.select(timestamp_micros('unix_time').alias('ts')).printSchema()
-    root
-     |-- ts: timestamp (nullable = true)
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(1230219000,), (1280219000,)], ['micros'])
+    >>> df.select('*', sf.timestamp_micros('micros')).show(truncate=False)
+    +----------+------------------------+
+    |micros    |timestamp_micros(micros)|
+    +----------+------------------------+
+    |1230219000|1970-01-01 00:20:30.219 |
+    |1280219000|1970-01-01 00:21:20.219 |
+    +----------+------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns("timestamp_micros", col)
@@ -9755,13 +12099,13 @@ def timestamp_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Col
 
     Parameters
     ----------
-    unit : str
+    unit : literal string
         This indicates the units of the difference between the given timestamps.
         Supported options are (case insensitive): "YEAR", "QUARTER", "MONTH", "WEEK",
         "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND" and "MICROSECOND".
-    start : :class:`~pyspark.sql.Column` or str
+    start : :class:`~pyspark.sql.Column` or column name
         A timestamp which the expression subtracts from `endTimestamp`.
-    end : :class:`~pyspark.sql.Column` or str
+    end : :class:`~pyspark.sql.Column` or column name
         A timestamp from which the expression subtracts `startTimestamp`.
 
     Returns
@@ -9769,37 +12113,44 @@ def timestamp_diff(unit: str, start: "ColumnOrName", end: "ColumnOrName") -> Col
     :class:`~pyspark.sql.Column`
         the difference between the timestamps.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.datediff`
+    :meth:`pyspark.sql.functions.date_diff`
+
     Examples
     --------
     >>> import datetime
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(
     ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), datetime.datetime(2024, 4, 2, 9, 0, 7))],
-    ... ).toDF("start", "end")
-    >>> df.select(sf.timestamp_diff("year", "start", "end")).show()
-    +-------------------------------+
-    |timestampdiff(year, start, end)|
-    +-------------------------------+
-    |                              8|
-    +-------------------------------+
-    >>> df.select(sf.timestamp_diff("WEEK", "start", "end")).show()
-    +-------------------------------+
-    |timestampdiff(WEEK, start, end)|
-    +-------------------------------+
-    |                            420|
-    +-------------------------------+
-    >>> df.select(sf.timestamp_diff("day", "end", "start")).show()
-    +------------------------------+
-    |timestampdiff(day, end, start)|
-    +------------------------------+
-    |                         -2944|
-    +------------------------------+
+    ...     ['ts1', 'ts2'])
+    >>> df.select('*', sf.timestamp_diff('year', 'ts1', 'ts2')).show()
+    +-------------------+-------------------+-----------------------------+
+    |                ts1|                ts2|timestampdiff(year, ts1, ts2)|
+    +-------------------+-------------------+-----------------------------+
+    |2016-03-11 09:00:07|2024-04-02 09:00:07|                            8|
+    +-------------------+-------------------+-----------------------------+
+
+    >>> df.select('*', sf.timestamp_diff('WEEK', 'ts1', 'ts2')).show()
+    +-------------------+-------------------+-----------------------------+
+    |                ts1|                ts2|timestampdiff(WEEK, ts1, ts2)|
+    +-------------------+-------------------+-----------------------------+
+    |2016-03-11 09:00:07|2024-04-02 09:00:07|                          420|
+    +-------------------+-------------------+-----------------------------+
+
+    >>> df.select('*', sf.timestamp_diff('day', df.ts2, df.ts1)).show()
+    +-------------------+-------------------+----------------------------+
+    |                ts1|                ts2|timestampdiff(day, ts2, ts1)|
+    +-------------------+-------------------+----------------------------+
+    |2016-03-11 09:00:07|2024-04-02 09:00:07|                       -2944|
+    +-------------------+-------------------+----------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function(
         "timestamp_diff",
-        unit,
+        _enum_to_value(unit),
         _to_java_column(start),
         _to_java_column(end),
     )
@@ -9815,13 +12166,13 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co
 
     Parameters
     ----------
-    unit : str
+    unit : literal string
         This indicates the units of the difference between the given timestamps.
         Supported options are (case insensitive): "YEAR", "QUARTER", "MONTH", "WEEK",
         "DAY", "HOUR", "MINUTE", "SECOND", "MILLISECOND" and "MICROSECOND".
-    quantity : :class:`~pyspark.sql.Column` or str
+    quantity : :class:`~pyspark.sql.Column` or column name
         The number of units of time that you want to add.
-    ts : :class:`~pyspark.sql.Column` or str
+    ts : :class:`~pyspark.sql.Column` or column name
         A timestamp to which you want to add.
 
     Returns
@@ -9829,40 +12180,47 @@ def timestamp_add(unit: str, quantity: "ColumnOrName", ts: "ColumnOrName") -> Co
     :class:`~pyspark.sql.Column`
         the difference between the timestamps.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.dateadd`
+    :meth:`pyspark.sql.functions.date_add`
+
     Examples
     --------
     >>> import datetime
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(
     ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 2),
-    ...      (datetime.datetime(2024, 4, 2, 9, 0, 7), 3)], ["ts", "quantity"])
-    >>> df.select(sf.timestamp_add("year", "quantity", "ts")).show()
-    +--------------------------------+
-    |timestampadd(year, quantity, ts)|
-    +--------------------------------+
-    |             2018-03-11 09:00:07|
-    |             2027-04-02 09:00:07|
-    +--------------------------------+
-    >>> df.select(sf.timestamp_add("WEEK", sf.lit(5), "ts")).show()
-    +-------------------------+
-    |timestampadd(WEEK, 5, ts)|
-    +-------------------------+
-    |      2016-04-15 09:00:07|
-    |      2024-05-07 09:00:07|
-    +-------------------------+
-    >>> df.select(sf.timestamp_add("day", sf.lit(-5), "ts")).show()
-    +-------------------------+
-    |timestampadd(day, -5, ts)|
-    +-------------------------+
-    |      2016-03-06 09:00:07|
-    |      2024-03-28 09:00:07|
-    +-------------------------+
+    ...      (datetime.datetime(2024, 4, 2, 9, 0, 7), 3)], ['ts', 'quantity'])
+    >>> df.select('*', sf.timestamp_add('year', 'quantity', 'ts')).show()
+    +-------------------+--------+--------------------------------+
+    |                 ts|quantity|timestampadd(year, quantity, ts)|
+    +-------------------+--------+--------------------------------+
+    |2016-03-11 09:00:07|       2|             2018-03-11 09:00:07|
+    |2024-04-02 09:00:07|       3|             2027-04-02 09:00:07|
+    +-------------------+--------+--------------------------------+
+
+    >>> df.select('*', sf.timestamp_add('WEEK', sf.lit(5), df.ts)).show()
+    +-------------------+--------+-------------------------+
+    |                 ts|quantity|timestampadd(WEEK, 5, ts)|
+    +-------------------+--------+-------------------------+
+    |2016-03-11 09:00:07|       2|      2016-04-15 09:00:07|
+    |2024-04-02 09:00:07|       3|      2024-05-07 09:00:07|
+    +-------------------+--------+-------------------------+
+
+    >>> df.select('*', sf.timestamp_add('day', sf.lit(-5), 'ts')).show()
+    +-------------------+--------+-------------------------+
+    |                 ts|quantity|timestampadd(day, -5, ts)|
+    +-------------------+--------+-------------------------+
+    |2016-03-11 09:00:07|       2|      2016-03-06 09:00:07|
+    |2024-04-02 09:00:07|       3|      2024-03-28 09:00:07|
+    +-------------------+--------+-------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     return _invoke_function(
         "timestamp_add",
-        unit,
+        _enum_to_value(unit),
         _to_java_column(quantity),
         _to_java_column(ts),
     )
@@ -9900,22 +12258,22 @@ def window(
 
     Parameters
     ----------
-    timeColumn : :class:`~pyspark.sql.Column`
+    timeColumn : :class:`~pyspark.sql.Column` or column name
         The column or the expression to use as the timestamp for windowing by time.
         The time column must be of TimestampType or TimestampNTZType.
-    windowDuration : str
+    windowDuration : literal string
         A string specifying the width of the window, e.g. `10 minutes`,
         `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
         valid duration identifiers. Note that the duration is a fixed length of
         time, and does not vary over time according to a calendar. For example,
         `1 day` always means 86,400,000 milliseconds, not a calendar day.
-    slideDuration : str, optional
+    slideDuration : literal string, optional
         A new window will be generated every `slideDuration`. Must be less than
         or equal to the `windowDuration`. Check
         `org.apache.spark.unsafe.types.CalendarInterval` for valid duration
         identifiers. This duration is likewise absolute, and does not vary
         according to a calendar.
-    startTime : str, optional
+    startTime : literal string, optional
         The offset with respect to 1970-01-01 00:00:00 UTC with which to start
         window intervals. For example, in order to have hourly tumbling windows that
         start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
@@ -9926,34 +12284,44 @@ def window(
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window_time`
+    :meth:`pyspark.sql.functions.session_window`
+
     Examples
     --------
     >>> import datetime
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.createDataFrame(
-    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
-    ... ).toDF("date", "val")
-    >>> w = df.groupBy(sf.window("date", "5 seconds")).agg(sf.sum("val").alias("sum"))
-    >>> w.select(
-    ...     w.window.start.cast("string").alias("start"),
-    ...     w.window.end.cast("string").alias("end"),
-    ...     "sum"
-    ... ).show()
-    +-------------------+-------------------+---+
-    |              start|                end|sum|
-    +-------------------+-------------------+---+
-    |2016-03-11 09:00:05|2016-03-11 09:00:10|  1|
-    +-------------------+-------------------+---+
+    >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v'])
+    >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
+    >>> df2.show(truncate=False)
+    +------------------------------------------+------+
+    |window                                    |sum(v)|
+    +------------------------------------------+------+
+    |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1     |
+    +------------------------------------------+------+
+
+    >>> df2.printSchema()
+    root
+     |-- window: struct (nullable = false)
+     |    |-- start: timestamp (nullable = true)
+     |    |-- end: timestamp (nullable = true)
+     |-- sum(v): long (nullable = true)
     """
     from pyspark.sql.classic.column import _to_java_column
 
     def check_string_field(field, fieldName):  # type: ignore[no-untyped-def]
         if not field or type(field) is not str:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": fieldName, "arg_type": type(field).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": fieldName, "arg_type": type(field).__name__},
             )
 
+    windowDuration = _enum_to_value(windowDuration)
+    slideDuration = _enum_to_value(slideDuration)
+    startTime = _enum_to_value(startTime)
+
     time_col = _to_java_column(timeColumn)
     check_string_field(windowDuration, "windowDuration")
     if slideDuration and startTime:
@@ -9985,7 +12353,7 @@ def window_time(
 
     Parameters
     ----------
-    windowColumn : :class:`~pyspark.sql.Column`
+    windowColumn : :class:`~pyspark.sql.Column` or column name
         The window column of a window aggregate records.
 
     Returns
@@ -9993,29 +12361,29 @@ def window_time(
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
-    Notes
-    -----
-    Supports Spark Connect.
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window`
+    :meth:`pyspark.sql.functions.session_window`
 
     Examples
     --------
     >>> import datetime
-    >>> df = spark.createDataFrame(
-    ...     [(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)],
-    ... ).toDF("date", "val")
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(datetime.datetime(2016, 3, 11, 9, 0, 7), 1)], ['dt', 'v'])
 
     Group the data into 5 second time windows and aggregate as sum.
 
-    >>> w = df.groupBy(window("date", "5 seconds")).agg(sum("val").alias("sum"))
+    >>> df2 = df.groupBy(sf.window('dt', '5 seconds')).agg(sf.sum('v'))
 
     Extract the window event time using the window_time function.
 
-    >>> w.select(
-    ...     w.window.end.cast("string").alias("end"),
-    ...     window_time(w.window).cast("string").alias("window_time"),
-    ...     "sum"
-    ... ).collect()
-    [Row(end='2016-03-11 09:00:10', window_time='2016-03-11 09:00:09.999999', sum=1)]
+    >>> df2.select('*', sf.window_time('window')).show(truncate=False)
+    +------------------------------------------+------+--------------------------+
+    |window                                    |sum(v)|window_time(window)       |
+    +------------------------------------------+------+--------------------------+
+    |{2016-03-11 09:00:05, 2016-03-11 09:00:10}|1     |2016-03-11 09:00:09.999999|
+    +------------------------------------------+------+--------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -10049,10 +12417,10 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
 
     Parameters
     ----------
-    timeColumn : :class:`~pyspark.sql.Column` or str
+    timeColumn : :class:`~pyspark.sql.Column` or column name
         The column name or column to use as the timestamp for windowing by time.
         The time column must be of TimestampType or TimestampNTZType.
-    gapDuration : :class:`~pyspark.sql.Column` or str
+    gapDuration : :class:`~pyspark.sql.Column` or literal string
         A Python string literal or column specifying the timeout of the session. It could be
         static value, e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
         duration dynamically based on the input row.
@@ -10062,28 +12430,41 @@ def session_window(timeColumn: "ColumnOrName", gapDuration: Union[Column, str])
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.window`
+    :meth:`pyspark.sql.functions.window_time`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-03-11 09:00:07", 1)]).toDF("date", "val")
-    >>> w = df.groupBy(session_window("date", "5 seconds")).agg(sum("val").alias("sum"))
-    >>> w.select(w.session_window.start.cast("string").alias("start"),
-    ...          w.session_window.end.cast("string").alias("end"), "sum").collect()
-    [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
-    >>> w = df.groupBy(session_window("date", lit("5 seconds"))).agg(sum("val").alias("sum"))
-    >>> w.select(w.session_window.start.cast("string").alias("start"),
-    ...          w.session_window.end.cast("string").alias("end"), "sum").collect()
-    [Row(start='2016-03-11 09:00:07', end='2016-03-11 09:00:12', sum=1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('2016-03-11 09:00:07', 1)], ['dt', 'v'])
+    >>> df2 = df.groupBy(sf.session_window('dt', '5 seconds')).agg(sf.sum('v'))
+    >>> df2.show(truncate=False)
+    +------------------------------------------+------+
+    |session_window                            |sum(v)|
+    +------------------------------------------+------+
+    |{2016-03-11 09:00:07, 2016-03-11 09:00:12}|1     |
+    +------------------------------------------+------+
+
+    >>> df2.printSchema()
+    root
+     |-- session_window: struct (nullable = false)
+     |    |-- start: timestamp (nullable = true)
+     |    |-- end: timestamp (nullable = true)
+     |-- sum(v): long (nullable = true)
     """
     from pyspark.sql.classic.column import _to_java_column
 
     def check_field(field: Union[Column, str], fieldName: str) -> None:
         if field is None or not isinstance(field, (str, Column)):
             raise PySparkTypeError(
-                error_class="NOT_COLUMN_OR_STR",
-                message_parameters={"arg_name": fieldName, "arg_type": type(field).__name__},
+                errorClass="NOT_COLUMN_OR_STR",
+                messageParameters={"arg_name": fieldName, "arg_type": type(field).__name__},
             )
 
     time_col = _to_java_column(timeColumn)
+    gapDuration = _enum_to_value(gapDuration)
     check_field(gapDuration, "gapDuration")
     gap_duration = gapDuration if isinstance(gapDuration, str) else _to_java_column(gapDuration)
     return _invoke_function("session_window", time_col, gap_duration)
@@ -10101,37 +12482,57 @@ def to_unix_timestamp(
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert UNIX timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+
     Examples
     --------
     >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
-    Example 1: Using default format 'yyyy-MM-dd HH:mm:ss' parses the timestamp string.
+    Example 1: Using default format to parse the timestamp string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['dt'])
-    >>> time_df.select(sf.to_unix_timestamp('dt').alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428520332|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_unix_timestamp('ts')).show()
+    +-------------------+------------------------------------------+
+    |                 ts|to_unix_timestamp(ts, yyyy-MM-dd HH:mm:ss)|
+    +-------------------+------------------------------------------+
+    |2015-04-08 12:12:12|                                1428520332|
+    +-------------------+------------------------------------------+
 
-    Example 2: Using user-specified format 'yyyy-MM-dd' parses the timestamp string.
+    Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string.
 
     >>> import pyspark.sql.functions as sf
-    >>> time_df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> time_df.select(
-    ...     sf.to_unix_timestamp('dt', sf.lit('yyyy-MM-dd')).alias('unix_time')).show()
-    +----------+
-    | unix_time|
-    +----------+
-    |1428476400|
-    +----------+
+    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
+    >>> df.select('*', sf.to_unix_timestamp(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+---------------------------------+
+    |        dt|to_unix_timestamp(dt, yyyy-MM-dd)|
+    +----------+---------------------------------+
+    |2015-04-08|                       1428476400|
+    +----------+---------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_unix_timestamp('dt', 'fmt')).show()
+    +----------+----------+--------------------------+
+    |        dt|       fmt|to_unix_timestamp(dt, fmt)|
+    +----------+----------+--------------------------+
+    |2015-04-08|yyyy-MM-dd|                1428476400|
+    |2025+01+09|yyyy+MM+dd|                1736409600|
+    +----------+----------+--------------------------+
 
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
@@ -10147,29 +12548,63 @@ def to_timestamp_ltz(
     format: Optional["ColumnOrName"] = None,
 ) -> Column:
     """
-    Parses the `timestamp` with the `format` to a timestamp without time zone.
+    Parses the `timestamp` with the `format` to a timestamp with time zone.
     Returns null with invalid input.
 
     .. versionadded:: 3.5.0
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert type `TimestampType` timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ntz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
-    >>> df.select(to_timestamp_ltz(df.e, lit("yyyy-MM-dd")).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+    Example 1: Using default format to parse the timestamp string.
 
-    >>> df = spark.createDataFrame([("2016-12-31",)], ["e"])
-    >>> df.select(to_timestamp_ltz(df.e).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 12, 31, 0, 0))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_timestamp_ltz('ts')).show()
+    +-------------------+--------------------+
+    |                 ts|to_timestamp_ltz(ts)|
+    +-------------------+--------------------+
+    |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+    +-------------------+--------------------+
+
+    Example 2: Using user-specified format to parse the date string.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+    >>> df.select('*', sf.to_timestamp_ltz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+--------------------------------+
+    |        dt|to_timestamp_ltz(dt, yyyy-MM-dd)|
+    +----------+--------------------------------+
+    |2016-12-31|             2016-12-31 00:00:00|
+    +----------+--------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_timestamp_ltz('dt', 'fmt')).show()
+    +----------+----------+-------------------------+
+    |        dt|       fmt|to_timestamp_ltz(dt, fmt)|
+    +----------+----------+-------------------------+
+    |2015-04-08|yyyy-MM-dd|      2015-04-08 00:00:00|
+    |2025+01+09|yyyy+MM+dd|      2025-01-09 00:00:00|
+    +----------+----------+-------------------------+
     """
     if format is not None:
         return _invoke_function_over_columns("to_timestamp_ltz", timestamp, format)
@@ -10190,22 +12625,56 @@ def to_timestamp_ntz(
 
     Parameters
     ----------
-    timestamp : :class:`~pyspark.sql.Column` or str
+    timestamp : :class:`~pyspark.sql.Column` or column name
         Input column or strings.
-    format : :class:`~pyspark.sql.Column` or str, optional
+    format : :class:`~pyspark.sql.Column` or column name, optional
         format to use to convert type `TimestampNTZType` timestamp values.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.to_date`
+    :meth:`pyspark.sql.functions.to_timestamp`
+    :meth:`pyspark.sql.functions.to_timestamp_ltz`
+    :meth:`pyspark.sql.functions.to_utc_timestamp`
+    :meth:`pyspark.sql.functions.to_unix_timestamp`
+    :meth:`pyspark.sql.functions.date_format`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
-    >>> df.select(to_timestamp_ntz(df.e, lit("yyyy-MM-dd")).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+    Example 1: Using default format to parse the timestamp string.
 
-    >>> df = spark.createDataFrame([("2016-04-08",)], ["e"])
-    >>> df.select(to_timestamp_ntz(df.e).alias('r')).collect()
-    ... # doctest: +SKIP
-    [Row(r=datetime.datetime(2016, 4, 8, 0, 0))]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2015-04-08 12:12:12',)], ['ts'])
+    >>> df.select('*', sf.to_timestamp_ntz('ts')).show()
+    +-------------------+--------------------+
+    |                 ts|to_timestamp_ntz(ts)|
+    +-------------------+--------------------+
+    |2015-04-08 12:12:12| 2015-04-08 12:12:12|
+    +-------------------+--------------------+
+
+    Example 2: Using user-specified format 'yyyy-MM-dd' to parse the date string.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('2016-12-31',)], ['dt'])
+    >>> df.select('*', sf.to_timestamp_ntz(df.dt, sf.lit('yyyy-MM-dd'))).show()
+    +----------+--------------------------------+
+    |        dt|to_timestamp_ntz(dt, yyyy-MM-dd)|
+    +----------+--------------------------------+
+    |2016-12-31|             2016-12-31 00:00:00|
+    +----------+--------------------------------+
+
+    Example 3: Using a format column to represent different formats.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame(
+    ...     [('2015-04-08', 'yyyy-MM-dd'), ('2025+01+09', 'yyyy+MM+dd')], ['dt', 'fmt'])
+    >>> df.select('*', sf.to_timestamp_ntz('dt', 'fmt')).show()
+    +----------+----------+-------------------------+
+    |        dt|       fmt|to_timestamp_ntz(dt, fmt)|
+    +----------+----------+-------------------------+
+    |2015-04-08|yyyy-MM-dd|      2015-04-08 00:00:00|
+    |2025+01+09|yyyy+MM+dd|      2025-01-09 00:00:00|
+    +----------+----------+-------------------------+
     """
     if format is not None:
         return _invoke_function_over_columns("to_timestamp_ntz", timestamp, format)
@@ -10222,9 +12691,15 @@ def current_catalog() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_database`
+    :meth:`pyspark.sql.functions.current_schema`
+
     Examples
     --------
-    >>> spark.range(1).select(current_catalog()).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_catalog()).show()
     +-----------------+
     |current_catalog()|
     +-----------------+
@@ -10240,9 +12715,15 @@ def current_database() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_catalog`
+    :meth:`pyspark.sql.functions.current_schema`
+
     Examples
     --------
-    >>> spark.range(1).select(current_database()).show()
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_database()).show()
     +----------------+
     |current_schema()|
     +----------------+
@@ -10258,6 +12739,11 @@ def current_schema() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_catalog`
+    :meth:`pyspark.sql.functions.current_database`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -10277,9 +12763,15 @@ def current_user() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.user`
+    :meth:`pyspark.sql.functions.session_user`
+
     Examples
     --------
-    >>> spark.range(1).select(current_user()).show() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.current_user()).show() # doctest: +SKIP
     +--------------+
     |current_user()|
     +--------------+
@@ -10295,6 +12787,11 @@ def user() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_user`
+    :meth:`pyspark.sql.functions.session_user`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -10314,6 +12811,11 @@ def session_user() -> Column:
 
     .. versionadded:: 4.0.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.user`
+    :meth:`pyspark.sql.functions.current_user`
+
     Examples
     --------
     >>> import pyspark.sql.functions as sf
@@ -10330,7 +12832,7 @@ def session_user() -> Column:
 @_try_remote_functions
 def crc32(col: "ColumnOrName") -> Column:
     """
-    Calculates the cyclic redundancy check value  (CRC32) of a binary column and
+    Calculates the cyclic redundancy check value (CRC32) of a binary column and
     returns the value as a bigint.
 
     .. versionchanged:: 3.4.0
@@ -10338,7 +12840,7 @@ def crc32(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -10350,8 +12852,14 @@ def crc32(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(crc32('a').alias('crc32')).collect()
-    [Row(crc32=2743272264)]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.crc32('a')).show(truncate=False)
+    +---+----------+
+    |a  |crc32(a)  |
+    +---+----------+
+    |ABC|2743272264|
+    +---+----------+
     """
     return _invoke_function_over_columns("crc32", col)
 
@@ -10367,7 +12875,7 @@ def md5(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -10377,8 +12885,14 @@ def md5(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(md5('a').alias('hash')).collect()
-    [Row(hash='902fbdd2b1df0c4f70b4a5d23525e932')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.md5('a')).show(truncate=False)
+    +---+--------------------------------+
+    |a  |md5(a)                          |
+    +---+--------------------------------+
+    |ABC|902fbdd2b1df0c4f70b4a5d23525e932|
+    +---+--------------------------------+
     """
     return _invoke_function_over_columns("md5", col)
 
@@ -10394,7 +12908,7 @@ def sha1(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
 
     Returns
@@ -10402,10 +12916,21 @@ def sha1(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha`
+    :meth:`pyspark.sql.functions.sha2`
+
     Examples
     --------
-    >>> spark.createDataFrame([('ABC',)], ['a']).select(sha1('a').alias('hash')).collect()
-    [Row(hash='3c01bdbb26f358bab27f267924aa2c9a03fcfdb8')]
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('ABC',)], ['a'])
+    >>> df.select('*', sf.sha1('a')).show(truncate=False)
+    +---+----------------------------------------+
+    |a  |sha1(a)                                 |
+    +---+----------------------------------------+
+    |ABC|3c01bdbb26f358bab27f267924aa2c9a03fcfdb8|
+    +---+----------------------------------------+
     """
     return _invoke_function_over_columns("sha1", col)
 
@@ -10423,7 +12948,7 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to compute on.
     numBits : int
         the desired bit length of the result, which must have a
@@ -10434,12 +12959,18 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
     :class:`~pyspark.sql.Column`
         the column for computed results.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha`
+    :meth:`pyspark.sql.functions.sha1`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([["Alice"], ["Bob"]], ["name"])
-    >>> df.withColumn("sha2", sha2(df.name, 256)).show(truncate=False)
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([['Alice'], ['Bob']], ['name'])
+    >>> df.select('*', sf.sha2('name', 256)).show(truncate=False)
     +-----+----------------------------------------------------------------+
-    |name |sha2                                                            |
+    |name |sha2(name, 256)                                                 |
     +-----+----------------------------------------------------------------+
     |Alice|3bc51062973c458d5a6f2d8d64a023246354ad7e064b1e4e009ec8a0699a3043|
     |Bob  |cd9fb1e148ccd8442e5aa74904cc73bf6fb54d1d54d333bd596aa9bb4bb4e961|
@@ -10449,8 +12980,8 @@ def sha2(col: "ColumnOrName", numBits: int) -> Column:
 
     if numBits not in [0, 224, 256, 384, 512]:
         raise PySparkValueError(
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "numBits",
                 "allowed_values": "[0, 224, 256, 384, 512]",
             },
@@ -10469,7 +13000,7 @@ def hash(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         one or more columns to compute on.
 
     Returns
@@ -10477,27 +13008,34 @@ def hash(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hash value as int column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.xxhash64`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
+    >>> df.select('*', sf.hash('c1')).show()
+    +---+---+----------+
+    | c1| c2|  hash(c1)|
+    +---+---+----------+
+    |ABC|DEF|-757602832|
+    +---+---+----------+
 
-    Hash for one column
-
-    >>> df.select(hash('c1').alias('hash')).show()
-    +----------+
-    |      hash|
-    +----------+
-    |-757602832|
-    +----------+
-
-    Two or more columns
+    >>> df.select('*', sf.hash('c1', df.c2)).show()
+    +---+---+------------+
+    | c1| c2|hash(c1, c2)|
+    +---+---+------------+
+    |ABC|DEF|   599895104|
+    +---+---+------------+
 
-    >>> df.select(hash('c1', 'c2').alias('hash')).show()
-    +---------+
-    |     hash|
-    +---------+
-    |599895104|
-    +---------+
+    >>> df.select('*', sf.hash('*')).show()
+    +---+---+------------+
+    | c1| c2|hash(c1, c2)|
+    +---+---+------------+
+    |ABC|DEF|   599895104|
+    +---+---+------------+
     """
     return _invoke_function_over_seq_of_columns("hash", cols)
 
@@ -10514,7 +13052,7 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         one or more columns to compute on.
 
     Returns
@@ -10522,27 +13060,34 @@ def xxhash64(*cols: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         hash value as long column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hash`
+
     Examples
     --------
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('ABC', 'DEF')], ['c1', 'c2'])
-
-    Hash for one column
-
-    >>> df.select(xxhash64('c1').alias('hash')).show()
-    +-------------------+
-    |               hash|
-    +-------------------+
-    |4105715581806190027|
-    +-------------------+
-
-    Two or more columns
-
-    >>> df.select(xxhash64('c1', 'c2').alias('hash')).show()
-    +-------------------+
-    |               hash|
-    +-------------------+
-    |3233247871021311208|
-    +-------------------+
+    >>> df.select('*', sf.xxhash64('c1')).show()
+    +---+---+-------------------+
+    | c1| c2|       xxhash64(c1)|
+    +---+---+-------------------+
+    |ABC|DEF|4105715581806190027|
+    +---+---+-------------------+
+
+    >>> df.select('*', sf.xxhash64('c1', df.c2)).show()
+    +---+---+-------------------+
+    | c1| c2|   xxhash64(c1, c2)|
+    +---+---+-------------------+
+    |ABC|DEF|3233247871021311208|
+    +---+---+-------------------+
+
+    >>> df.select('*', sf.xxhash64('*')).show()
+    +---+---+-------------------+
+    | c1| c2|   xxhash64(c1, c2)|
+    +---+---+-------------------+
+    |ABC|DEF|3233247871021311208|
+    +---+---+-------------------+
     """
     return _invoke_function_over_seq_of_columns("xxhash64", cols)
 
@@ -10560,9 +13105,9 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         column name or column that represents the input column to test
-    errMsg : :class:`~pyspark.sql.Column` or str, optional
+    errMsg : :class:`~pyspark.sql.Column` or literal string, optional
         A Python string literal or column containing the error message
 
     Returns
@@ -10570,26 +13115,47 @@ def assert_true(col: "ColumnOrName", errMsg: Optional[Union[Column, str]] = None
     :class:`~pyspark.sql.Column`
         `null` if the input column is `true` otherwise throws an error with specified message.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.raise_error`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(0,1)], ['a', 'b'])
-    >>> df.select(assert_true(df.a < df.b).alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a < df.b, df.a).alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a < df.b, 'error').alias('r')).collect()
-    [Row(r=None)]
-    >>> df.select(assert_true(df.a > df.b, 'My error msg').alias('r')).collect() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([(0, 1)], ['a', 'b'])
+    >>> df.select('*', sf.assert_true(df.a < df.b)).show() # doctest: +SKIP
+    +------------------------------------------------------+
+    |assert_true((a < b), '(a#788L < b#789L)' is not true!)|
+    +------------------------------------------------------+
+    |                                                  NULL|
+    +------------------------------------------------------+
+
+    >>> df.select('*', sf.assert_true(df.a < df.b, df.a)).show()
+    +---+---+-----------------------+
+    |  a|  b|assert_true((a < b), a)|
+    +---+---+-----------------------+
+    |  0|  1|                   NULL|
+    +---+---+-----------------------+
+
+    >>> df.select('*', sf.assert_true(df.a < df.b, 'error')).show()
+    +---+---+---------------------------+
+    |  a|  b|assert_true((a < b), error)|
+    +---+---+---------------------------+
+    |  0|  1|                       NULL|
+    +---+---+---------------------------+
+
+    >>> df.select('*', sf.assert_true(df.a > df.b, 'My error msg')).show() # doctest: +SKIP
     ...
     java.lang.RuntimeException: My error msg
     ...
     """
+    errMsg = _enum_to_value(errMsg)
     if errMsg is None:
         return _invoke_function_over_columns("assert_true", col)
     if not isinstance(errMsg, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
         )
     return _invoke_function_over_columns("assert_true", col, lit(errMsg))
 
@@ -10606,7 +13172,7 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
 
     Parameters
     ----------
-    errMsg : :class:`~pyspark.sql.Column` or str
+    errMsg : :class:`~pyspark.sql.Column` or literal string
         A Python string literal or column containing the error message
 
     Returns
@@ -10614,18 +13180,23 @@ def raise_error(errMsg: Union[Column, str]) -> Column:
     :class:`~pyspark.sql.Column`
         throws an error with specified message.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.assert_true`
+
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(raise_error("My error message")).show() # doctest: +SKIP
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.raise_error("My error message")).show() # doctest: +SKIP
     ...
     java.lang.RuntimeException: My error message
     ...
     """
+    errMsg = _enum_to_value(errMsg)
     if not isinstance(errMsg, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": type(errMsg).__name__},
         )
     return _invoke_function_over_columns("raise_error", lit(errMsg))
 
@@ -10811,7 +13382,7 @@ def unbase64(col: "ColumnOrName") -> Column:
 
 
 @_try_remote_functions
-def ltrim(col: "ColumnOrName") -> Column:
+def ltrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     """
     Trim the spaces from left end for the specified string value.
 
@@ -10824,6 +13395,10 @@ def ltrim(col: "ColumnOrName") -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         target column to work on.
+    trim : :class:`~pyspark.sql.Column` or str, optional
+        The trim string characters to trim, the default value is a single space
+
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -10832,21 +13407,40 @@ def ltrim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Trim the spaces
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
-    >>> df.select(ltrim("value").alias("r")).withColumn("length", length("r")).show()
-    +-------+------+
-    |      r|length|
-    +-------+------+
-    |  Spark|     5|
-    |Spark  |     7|
-    |  Spark|     5|
-    +-------+------+
+    >>> df.select("*", sf.ltrim("value")).show()
+    +--------+------------+
+    |   value|ltrim(value)|
+    +--------+------------+
+    |   Spark|       Spark|
+    | Spark  |     Spark  |
+    |   Spark|       Spark|
+    +--------+------------+
+
+    Example 2: Trim specified characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
+    >>> df.select("*", sf.ltrim("value", sf.lit("*"))).show()
+    +--------+--------------------------+
+    |   value|TRIM(LEADING * FROM value)|
+    +--------+--------------------------+
+    |***Spark|                     Spark|
+    | Spark**|                   Spark**|
+    |  *Spark|                     Spark|
+    +--------+--------------------------+
     """
-    return _invoke_function_over_columns("ltrim", col)
+    if trim is not None:
+        return _invoke_function_over_columns("ltrim", col, trim)
+    else:
+        return _invoke_function_over_columns("ltrim", col)
 
 
 @_try_remote_functions
-def rtrim(col: "ColumnOrName") -> Column:
+def rtrim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     """
     Trim the spaces from right end for the specified string value.
 
@@ -10859,6 +13453,10 @@ def rtrim(col: "ColumnOrName") -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         target column to work on.
+    trim : :class:`~pyspark.sql.Column` or str, optional
+        The trim string characters to trim, the default value is a single space
+
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -10867,21 +13465,40 @@ def rtrim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Trim the spaces
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
-    >>> df.select(rtrim("value").alias("r")).withColumn("length", length("r")).show()
-    +--------+------+
-    |       r|length|
-    +--------+------+
-    |   Spark|     8|
-    |   Spark|     5|
-    |   Spark|     6|
-    +--------+------+
+    >>> df.select("*", sf.rtrim("value")).show()
+    +--------+------------+
+    |   value|rtrim(value)|
+    +--------+------------+
+    |   Spark|       Spark|
+    | Spark  |       Spark|
+    |   Spark|       Spark|
+    +--------+------------+
+
+    Example 2: Trim specified characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
+    >>> df.select("*", sf.rtrim("value", sf.lit("*"))).show()
+    +--------+---------------------------+
+    |   value|TRIM(TRAILING * FROM value)|
+    +--------+---------------------------+
+    |***Spark|                   ***Spark|
+    | Spark**|                      Spark|
+    |  *Spark|                     *Spark|
+    +--------+---------------------------+
     """
-    return _invoke_function_over_columns("rtrim", col)
+    if trim is not None:
+        return _invoke_function_over_columns("rtrim", col, trim)
+    else:
+        return _invoke_function_over_columns("rtrim", col)
 
 
 @_try_remote_functions
-def trim(col: "ColumnOrName") -> Column:
+def trim(col: "ColumnOrName", trim: Optional["ColumnOrName"] = None) -> Column:
     """
     Trim the spaces from both ends for the specified string column.
 
@@ -10894,6 +13511,10 @@ def trim(col: "ColumnOrName") -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         target column to work on.
+    trim : :class:`~pyspark.sql.Column` or str, optional
+        The trim string characters to trim, the default value is a single space
+
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -10902,17 +13523,36 @@ def trim(col: "ColumnOrName") -> Column:
 
     Examples
     --------
+    Example 1: Trim the spaces
+
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(["   Spark", "Spark  ", " Spark"], "STRING")
-    >>> df.select(trim("value").alias("r")).withColumn("length", length("r")).show()
-    +-----+------+
-    |    r|length|
-    +-----+------+
-    |Spark|     5|
-    |Spark|     5|
-    |Spark|     5|
-    +-----+------+
+    >>> df.select("*", sf.trim("value")).show()
+    +--------+-----------+
+    |   value|trim(value)|
+    +--------+-----------+
+    |   Spark|      Spark|
+    | Spark  |      Spark|
+    |   Spark|      Spark|
+    +--------+-----------+
+
+    Example 2: Trim specified characters
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(["***Spark", "Spark**", "*Spark"], "STRING")
+    >>> df.select("*", sf.trim("value", sf.lit("*"))).show()
+    +--------+-----------------------+
+    |   value|TRIM(BOTH * FROM value)|
+    +--------+-----------------------+
+    |***Spark|                  Spark|
+    | Spark**|                  Spark|
+    |  *Spark|                  Spark|
+    +--------+-----------------------+
     """
-    return _invoke_function_over_columns("trim", col)
+    if trim is not None:
+        return _invoke_function_over_columns("trim", col, trim)
+    else:
+        return _invoke_function_over_columns("trim", col)
 
 
 @_try_remote_functions
@@ -10947,14 +13587,14 @@ def concat_ws(sep: str, *cols: "ColumnOrName") -> Column:
     from pyspark.sql.classic.column import _to_seq, _to_java_column
 
     sc = _get_active_spark_context()
-    return _invoke_function("concat_ws", sep, _to_seq(sc, cols, _to_java_column))
+    return _invoke_function("concat_ws", _enum_to_value(sep), _to_seq(sc, cols, _to_java_column))
 
 
 @_try_remote_functions
 def decode(col: "ColumnOrName", charset: str) -> Column:
     """
     Computes the first argument into a string from a binary using the provided character set
-    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32').
 
     .. versionadded:: 1.5.0
 
@@ -10985,14 +13625,14 @@ def decode(col: "ColumnOrName", charset: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("decode", _to_java_column(col), charset)
+    return _invoke_function("decode", _to_java_column(col), _enum_to_value(charset))
 
 
 @_try_remote_functions
 def encode(col: "ColumnOrName", charset: str) -> Column:
     """
     Computes the first argument into a binary from a string using the provided character set
-    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
+    (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32').
 
     .. versionadded:: 1.5.0
 
@@ -11023,40 +13663,161 @@ def encode(col: "ColumnOrName", charset: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("encode", _to_java_column(col), charset)
+    return _invoke_function("encode", _to_java_column(col), _enum_to_value(charset))
 
 
 @_try_remote_functions
-def format_number(col: "ColumnOrName", d: int) -> Column:
+def is_valid_utf8(str: "ColumnOrName") -> Column:
     """
-    Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
-    with HALF_EVEN round mode, and returns the result as a string.
-
-    .. versionadded:: 1.5.0
+    Returns true if the input is a valid UTF-8 string, otherwise returns false.
 
-    .. versionchanged:: 3.4.0
-        Supports Spark Connect.
+    .. versionadded:: 4.0.0
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
-        the column name of the numeric value to be formatted
-    d : int
-        the N decimal places
+    str : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing a UTF-8 byte sequence.
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        the column of formatted results.
+        whether the input string is a valid UTF-8 string.
 
     Examples
     --------
-    >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
-    [Row(v='5.0000')]
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.is_valid_utf8(sf.lit("SparkSQL"))).show()
+    +-----------------------+
+    |is_valid_utf8(SparkSQL)|
+    +-----------------------+
+    |                   true|
+    +-----------------------+
     """
-    from pyspark.sql.classic.column import _to_java_column
+    return _invoke_function_over_columns("is_valid_utf8", str)
 
-    return _invoke_function("format_number", _to_java_column(col), d)
+
+@_try_remote_functions
+def make_valid_utf8(str: "ColumnOrName") -> Column:
+    """
+    Returns a new string in which all invalid UTF-8 byte sequences, if any, are replaced by the
+    Unicode replacement character (U+FFFD).
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing a UTF-8 byte sequence.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the valid UTF-8 version of the given input string.
+
+    Examples
+    --------
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.make_valid_utf8(sf.lit("SparkSQL"))).show()
+    +-------------------------+
+    |make_valid_utf8(SparkSQL)|
+    +-------------------------+
+    |                 SparkSQL|
+    +-------------------------+
+    """
+    return _invoke_function_over_columns("make_valid_utf8", str)
+
+
+@_try_remote_functions
+def validate_utf8(str: "ColumnOrName") -> Column:
+    """
+    Returns the input value if it corresponds to a valid UTF-8 string, or emits an error otherwise.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing a UTF-8 byte sequence.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the input string if it is a valid UTF-8 string, error otherwise.
+
+    Examples
+    --------
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.validate_utf8(sf.lit("SparkSQL"))).show()
+    +-----------------------+
+    |validate_utf8(SparkSQL)|
+    +-----------------------+
+    |               SparkSQL|
+    +-----------------------+
+    """
+    return _invoke_function_over_columns("validate_utf8", str)
+
+
+@_try_remote_functions
+def try_validate_utf8(str: "ColumnOrName") -> Column:
+    """
+    Returns the input value if it corresponds to a valid UTF-8 string, or NULL otherwise.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    str : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing a UTF-8 byte sequence.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the input string if it is a valid UTF-8 string, null otherwise.
+
+    Examples
+    --------
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.try_validate_utf8(sf.lit("SparkSQL"))).show()
+    +---------------------------+
+    |try_validate_utf8(SparkSQL)|
+    +---------------------------+
+    |                   SparkSQL|
+    +---------------------------+
+    """
+    return _invoke_function_over_columns("try_validate_utf8", str)
+
+
+@_try_remote_functions
+def format_number(col: "ColumnOrName", d: int) -> Column:
+    """
+    Formats the number X to a format like '#,--#,--#.--', rounded to d decimal places
+    with HALF_EVEN round mode, and returns the result as a string.
+
+    .. versionadded:: 1.5.0
+
+    .. versionchanged:: 3.4.0
+        Supports Spark Connect.
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        the column name of the numeric value to be formatted
+    d : int
+        the N decimal places
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        the column of formatted results.
+
+    Examples
+    --------
+    >>> spark.createDataFrame([(5,)], ['a']).select(format_number('a', 4).alias('v')).collect()
+    [Row(v='5.0000')]
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("format_number", _to_java_column(col), _enum_to_value(d))
 
 
 @_try_remote_functions
@@ -11090,11 +13851,13 @@ def format_string(format: str, *cols: "ColumnOrName") -> Column:
     from pyspark.sql.classic.column import _to_seq, _to_java_column
 
     sc = _get_active_spark_context()
-    return _invoke_function("format_string", format, _to_seq(sc, cols, _to_java_column))
+    return _invoke_function(
+        "format_string", _enum_to_value(format), _to_seq(sc, cols, _to_java_column)
+    )
 
 
 @_try_remote_functions
-def instr(str: "ColumnOrName", substr: str) -> Column:
+def instr(str: "ColumnOrName", substr: Union[Column, str]) -> Column:
     """
     Locate the position of the first occurrence of substr column in the given string.
     Returns null if either of the arguments are null.
@@ -11111,11 +13874,14 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
 
     Parameters
     ----------
-    str : :class:`~pyspark.sql.Column` or str
+    str : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    substr : str
+    substr : :class:`~pyspark.sql.Column` or literal string
         substring to look for.
 
+        .. versionchanged:: 4.0.0
+            `substr` now accepts column.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -11123,13 +13889,31 @@ def instr(str: "ColumnOrName", substr: str) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd',)], ['s',])
-    >>> df.select(instr(df.s, 'b').alias('s')).collect()
-    [Row(s=2)]
-    """
-    from pyspark.sql.classic.column import _to_java_column
+    Example 1: Using a literal string as the 'substring'
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("abcd",), ("xyz",)], ["s",])
+    >>> df.select("*", sf.instr(df.s, "b")).show()
+    +----+-----------+
+    |   s|instr(s, b)|
+    +----+-----------+
+    |abcd|          2|
+    | xyz|          0|
+    +----+-----------+
 
-    return _invoke_function("instr", _to_java_column(str), substr)
+    Example 2: Using a Column 'substring'
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([("abcd",), ("xyz",)], ["s",])
+    >>> df.select("*", sf.instr("s", sf.lit("abc").substr(0, 2))).show()
+    +----+---------------------------+
+    |   s|instr(s, substr(abc, 0, 2))|
+    +----+---------------------------+
+    |abcd|                          1|
+    | xyz|                          0|
+    +----+---------------------------+
+    """
+    return _invoke_function_over_columns("instr", str, lit(substr))
 
 
 @_try_remote_functions
@@ -11175,15 +13959,17 @@ def overlay(
     >>> df.select(overlay("x", "y", 7, 2).alias("overlayed")).collect()
     [Row(overlayed='SPARK_COREL')]
     """
+    pos = _enum_to_value(pos)
     if not isinstance(pos, (int, str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "pos", "arg_type": type(pos).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "pos", "arg_type": type(pos).__name__},
         )
+    len = _enum_to_value(len)
     if len is not None and not isinstance(len, (int, str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "len", "arg_type": type(len).__name__},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "len", "arg_type": type(len).__name__},
         )
 
     if isinstance(pos, int):
@@ -11202,13 +13988,27 @@ def sentences(
 ) -> Column:
     """
     Splits a string into arrays of sentences, where each sentence is an array of words.
-    The 'language' and 'country' arguments are optional, and if omitted, the default locale is used.
+    The `language` and `country` arguments are optional,
+    When they are omitted:
+    1.If they are both omitted, the `Locale.ROOT - locale(language='', country='')` is used.
+    The `Locale.ROOT` is regarded as the base locale of all locales, and is used as the
+    language/country neutral locale for the locale sensitive operations.
+    2.If the `country` is omitted, the `locale(language, country='')` is used.
+    When they are null:
+    1.If they are both `null`, the `Locale.US - locale(language='en', country='US')` is used.
+    2.If the `language` is null and the `country` is not null,
+    the `Locale.US - locale(language='en', country='US')` is used.
+    3.If the `language` is not null and the `country` is null, the `locale(language)` is used.
+    4.If neither is `null`, the `locale(language, country)` is used.
 
     .. versionadded:: 3.2.0
 
     .. versionchanged:: 3.4.0
         Supports Spark Connect.
 
+    .. versionchanged:: 4.0.0
+        Supports `sentences(string, language)`.
+
     Parameters
     ----------
     string : :class:`~pyspark.sql.Column` or str
@@ -11232,6 +14032,12 @@ def sentences(
     +-----------------------------------+
     |[[This, is, an, example, sentence]]|
     +-----------------------------------+
+    >>> df.select(sentences(df.string, lit("en"))).show(truncate=False)
+    +-----------------------------------+
+    |sentences(string, en, )            |
+    +-----------------------------------+
+    |[[This, is, an, example, sentence]]|
+    +-----------------------------------+
     >>> df = spark.createDataFrame([["Hello world. How are you?"]], ["s"])
     >>> df.select(sentences("s")).show(truncate=False)
     +---------------------------------+
@@ -11250,7 +14056,9 @@ def sentences(
 
 @_try_remote_functions
 def substring(
-    str: "ColumnOrName", pos: Union["ColumnOrName", int], len: Union["ColumnOrName", int]
+    str: "ColumnOrName",
+    pos: Union["ColumnOrName", int],
+    len: Union["ColumnOrName", int],
 ) -> Column:
     """
     Substring starts at `pos` and is of length `len` when str is String type or
@@ -11289,18 +14097,63 @@ def substring(
 
     Examples
     --------
+    Example 1: Using literal integers as arguments
+
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('abcd',)], ['s',])
-    >>> df.select(substring(df.s, 1, 2).alias('s')).collect()
-    [Row(s='ab')]
+    >>> df.select('*', sf.substring(df.s, 1, 2)).show()
+    +----+------------------+
+    |   s|substring(s, 1, 2)|
+    +----+------------------+
+    |abcd|                ab|
+    +----+------------------+
+
+    Example 2: Using columns as arguments
+
+    >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([('Spark', 2, 3)], ['s', 'p', 'l'])
-    >>> df.select(substring(df.s, 2, df.l).alias('s')).collect()
-    [Row(s='par')]
-    >>> df.select(substring(df.s, df.p, 3).alias('s')).collect()
-    [Row(s='par')]
-    >>> df.select(substring(df.s, df.p, df.l).alias('s')).collect()
-    [Row(s='par')]
-    """
+    >>> df.select('*', sf.substring(df.s, 2, df.l)).show()
+    +-----+---+---+------------------+
+    |    s|  p|  l|substring(s, 2, l)|
+    +-----+---+---+------------------+
+    |Spark|  2|  3|               par|
+    +-----+---+---+------------------+
+
+    >>> df.select('*', sf.substring(df.s, df.p, 3)).show()
+    +-----+---+---+------------------+
+    |    s|  p|  l|substring(s, p, 3)|
+    +-----+---+---+------------------+
+    |Spark|  2|  3|               par|
+    +-----+---+---+------------------+
+
+    >>> df.select('*', sf.substring(df.s, df.p, df.l)).show()
+    +-----+---+---+------------------+
+    |    s|  p|  l|substring(s, p, l)|
+    +-----+---+---+------------------+
+    |Spark|  2|  3|               par|
+    +-----+---+---+------------------+
+
+    Example 3: Using column names as arguments
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('Spark', 2, 3)], ['s', 'p', 'l'])
+    >>> df.select('*', sf.substring(df.s, 2, 'l')).show()
+    +-----+---+---+------------------+
+    |    s|  p|  l|substring(s, 2, l)|
+    +-----+---+---+------------------+
+    |Spark|  2|  3|               par|
+    +-----+---+---+------------------+
+
+    >>> df.select('*', sf.substring('s', 'p', 'l')).show()
+    +-----+---+---+------------------+
+    |    s|  p|  l|substring(s, p, l)|
+    +-----+---+---+------------------+
+    |Spark|  2|  3|               par|
+    +-----+---+---+------------------+
+    """
+    pos = _enum_to_value(pos)
     pos = lit(pos) if isinstance(pos, int) else pos
+    len = _enum_to_value(len)
     len = lit(len) if isinstance(len, int) else len
     return _invoke_function_over_columns("substring", str, pos, len)
 
@@ -11342,7 +14195,9 @@ def substring_index(str: "ColumnOrName", delim: str, count: int) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("substring_index", _to_java_column(str), delim, count)
+    return _invoke_function(
+        "substring_index", _to_java_column(str), _enum_to_value(delim), _enum_to_value(count)
+    )
 
 
 @_try_remote_functions
@@ -11388,7 +14243,7 @@ def levenshtein(
         return _invoke_function_over_columns("levenshtein", left, right)
     else:
         return _invoke_function(
-            "levenshtein", _to_java_column(left), _to_java_column(right), threshold
+            "levenshtein", _to_java_column(left), _to_java_column(right), _enum_to_value(threshold)
         )
 
 
@@ -11429,11 +14284,17 @@ def locate(substr: str, str: "ColumnOrName", pos: int = 1) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("locate", substr, _to_java_column(str), pos)
+    return _invoke_function(
+        "locate", _enum_to_value(substr), _to_java_column(str), _enum_to_value(pos)
+    )
 
 
 @_try_remote_functions
-def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
+def lpad(
+    col: "ColumnOrName",
+    len: Union[Column, int],
+    pad: Union[Column, str],
+) -> Column:
     """
     Left-pad the string column to width `len` with `pad`.
 
@@ -11444,13 +14305,20 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
-    len : int
+    len : :class:`~pyspark.sql.Column` or int
         length of the final string.
-    pad : str
+
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column.
+
+    pad : :class:`~pyspark.sql.Column` or literal string
         chars to prepend.
 
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column.
+
     Returns
     -------
     :class:`~pyspark.sql.Column`
@@ -11458,17 +14326,41 @@ def lpad(col: "ColumnOrName", len: int, pad: str) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd',)], ['s',])
-    >>> df.select(lpad(df.s, 6, '#').alias('s')).collect()
-    [Row(s='##abcd')]
-    """
-    from pyspark.sql.classic.column import _to_java_column
+    Example 1: Pad with a literal string
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
+    >>> df.select("*", sf.lpad(df.s, 6, '#')).show()
+    +----+-------------+
+    |   s|lpad(s, 6, #)|
+    +----+-------------+
+    |abcd|       ##abcd|
+    | xyz|       ###xyz|
+    |  12|       ####12|
+    +----+-------------+
+
+    Example 2: Pad with a bytes column
 
-    return _invoke_function("lpad", _to_java_column(col), len, pad)
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
+    >>> df.select("*", sf.lpad(df.s, 6, sf.lit(b"\x75\x76"))).show()
+    +----+-------------------+
+    |   s|lpad(s, 6, X'7576')|
+    +----+-------------------+
+    |abcd|             uvabcd|
+    | xyz|             uvuxyz|
+    |  12|             uvuv12|
+    +----+-------------------+
+    """
+    return _invoke_function_over_columns("lpad", col, lit(len), lit(pad))
 
 
 @_try_remote_functions
-def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
+def rpad(
+    col: "ColumnOrName",
+    len: Union[Column, int],
+    pad: Union[Column, str],
+) -> Column:
     """
     Right-pad the string column to width `len` with `pad`.
 
@@ -11481,10 +14373,17 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         target column to work on.
-    len : int
+    len : :class:`~pyspark.sql.Column` or int
         length of the final string.
-    pad : str
-        chars to append.
+
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column.
+
+    pad : :class:`~pyspark.sql.Column` or literal string
+        chars to prepend.
+
+        .. versionchanged:: 4.0.0
+             `pattern` now accepts column.
 
     Returns
     -------
@@ -11493,13 +14392,33 @@ def rpad(col: "ColumnOrName", len: int, pad: str) -> Column:
 
     Examples
     --------
-    >>> df = spark.createDataFrame([('abcd',)], ['s',])
-    >>> df.select(rpad(df.s, 6, '#').alias('s')).collect()
-    [Row(s='abcd##')]
-    """
-    from pyspark.sql.classic.column import _to_java_column
+    Example 1: Pad with a literal string
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
+    >>> df.select("*", sf.rpad(df.s, 6, '#')).show()
+    +----+-------------+
+    |   s|rpad(s, 6, #)|
+    +----+-------------+
+    |abcd|       abcd##|
+    | xyz|       xyz###|
+    |  12|       12####|
+    +----+-------------+
+
+    Example 2: Pad with a bytes column
 
-    return _invoke_function("rpad", _to_java_column(col), len, pad)
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([('abcd',), ('xyz',), ('12',)], ['s',])
+    >>> df.select("*", sf.rpad(df.s, 6, sf.lit(b"\x75\x76"))).show()
+    +----+-------------------+
+    |   s|rpad(s, 6, X'7576')|
+    +----+-------------------+
+    |abcd|             abcduv|
+    | xyz|             xyzuvu|
+    |  12|             12uvuv|
+    +----+-------------------+
+    """
+    return _invoke_function_over_columns("rpad", col, lit(len), lit(pad))
 
 
 @_try_remote_functions
@@ -11559,6 +14478,7 @@ def repeat(col: "ColumnOrName", n: Union["ColumnOrName", int]) -> Column:
     |  ababababab|
     +------------+
     """
+    n = _enum_to_value(n)
     n = lit(n) if isinstance(n, int) else n
     return _invoke_function_over_columns("repeat", col, n)
 
@@ -11653,6 +14573,7 @@ def split(
     |      [, A, B, C]|
     +-----------------+
     """
+    limit = _enum_to_value(limit)
     limit = lit(limit) if isinstance(limit, int) else limit
     return _invoke_function_over_columns("split", str, lit(pattern), limit)
 
@@ -11794,6 +14715,47 @@ def regexp_like(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("regexp_like", str, regexp)
 
 
+@_try_remote_functions
+def randstr(length: Union[Column, int], seed: Optional[Union[Column, int]] = None) -> Column:
+    """Returns a string of the specified length whose characters are chosen uniformly at random from
+    the following pool of characters: 0-9, a-z, A-Z. The random seed is optional. The string length
+    must be a constant two-byte or four-byte integer (SMALLINT or INT, respectively).
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    length : :class:`~pyspark.sql.Column` or int
+        Number of characters in the string to generate.
+    seed : :class:`~pyspark.sql.Column` or int
+        Optional random number seed to use.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        The generated random string with the specified length.
+
+    Examples
+    --------
+    >>> spark.createDataFrame([('3',)], ['a']) \\
+    ...   .select(randstr(lit(5), lit(0)).alias('result')) \\
+    ...   .selectExpr("length(result) > 0").show()
+    +--------------------+
+    |(length(result) > 0)|
+    +--------------------+
+    |                true|
+    +--------------------+
+    """
+    length = _enum_to_value(length)
+    length = lit(length)
+    if seed is None:
+        return _invoke_function_over_columns("randstr", length)
+    else:
+        seed = _enum_to_value(seed)
+        seed = lit(seed)
+        return _invoke_function_over_columns("randstr", length, seed)
+
+
 @_try_remote_functions
 def regexp_count(str: "ColumnOrName", regexp: "ColumnOrName") -> Column:
     r"""Returns a count of the number of times that the Java regex pattern `regexp` is matched
@@ -11864,7 +14826,9 @@ def regexp_extract(str: "ColumnOrName", pattern: str, idx: int) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("regexp_extract", _to_java_column(str), pattern, idx)
+    return _invoke_function(
+        "regexp_extract", _to_java_column(str), _enum_to_value(pattern), _enum_to_value(idx)
+    )
 
 
 @_try_remote_functions
@@ -12158,6 +15122,57 @@ def unhex(col: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("unhex", col)
 
 
+@_try_remote_functions
+def uniform(
+    min: Union[Column, int, float],
+    max: Union[Column, int, float],
+    seed: Optional[Union[Column, int]] = None,
+) -> Column:
+    """Returns a random value with independent and identically distributed (i.i.d.) values with the
+    specified range of numbers. The random seed is optional. The provided numbers specifying the
+    minimum and maximum values of the range must be constant. If both of these numbers are integers,
+    then the result will also be an integer. Otherwise if one or both of these are floating-point
+    numbers, then the result will also be a floating-point number.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    min : :class:`~pyspark.sql.Column`, int, or float
+        Minimum value in the range.
+    max : :class:`~pyspark.sql.Column`, int, or float
+        Maximum value in the range.
+    seed : :class:`~pyspark.sql.Column` or int
+        Optional random number seed to use.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        The generated random number within the specified range.
+
+    Examples
+    --------
+    >>> spark.createDataFrame([('3',)], ['a']) \\
+    ...    .select(uniform(lit(0), lit(10), lit(0)).alias('result')) \\
+    ...    .selectExpr("result < 15").show()
+    +-------------+
+    |(result < 15)|
+    +-------------+
+    |         true|
+    +-------------+
+    """
+    min = _enum_to_value(min)
+    min = lit(min)
+    max = _enum_to_value(max)
+    max = lit(max)
+    if seed is None:
+        return _invoke_function_over_columns("uniform", min, max)
+    else:
+        seed = _enum_to_value(seed)
+        seed = lit(seed)
+        return _invoke_function_over_columns("uniform", min, max, seed)
+
+
 @_try_remote_functions
 def length(col: "ColumnOrName") -> Column:
     """Computes the character length of string data or number of bytes of binary data.
@@ -12282,7 +15297,9 @@ def translate(srcCol: "ColumnOrName", matching: str, replace: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("translate", _to_java_column(srcCol), matching, replace)
+    return _invoke_function(
+        "translate", _to_java_column(srcCol), _enum_to_value(matching), _enum_to_value(replace)
+    )
 
 
 @_try_remote_functions
@@ -12569,14 +15586,14 @@ def substr(
 
 
 @_try_remote_functions
-def parse_url(
+def try_parse_url(
     url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
 ) -> Column:
     """
-    URL function: Extracts a specified part from a URL. If a key is provided,
-    it returns the associated query parameter value.
+    This is a special version of `parse_url` that performs the same operation, but returns a
+    NULL value instead of raising an error if the parsing cannot be performed.
 
-    .. versionadded:: 3.5.0
+    .. versionadded:: 4.0.0
 
     Parameters
     ----------
@@ -12601,12 +15618,12 @@ def parse_url(
     ...   [("https://spark.apache.org/path?query=1", "QUERY")],
     ...   ["url", "part"]
     ... )
-    >>> df.select(sf.parse_url(df.url, df.part)).show()
-    +--------------------+
-    |parse_url(url, part)|
-    +--------------------+
-    |             query=1|
-    +--------------------+
+    >>> df.select(sf.try_parse_url(df.url, df.part)).show()
+    +------------------------+
+    |try_parse_url(url, part)|
+    +------------------------+
+    |                 query=1|
+    +------------------------+
 
     Example 2: Extracting the value of a specific query parameter from a URL
 
@@ -12615,12 +15632,12 @@ def parse_url(
     ...   [("https://spark.apache.org/path?query=1", "QUERY", "query")],
     ...   ["url", "part", "key"]
     ... )
-    >>> df.select(sf.parse_url(df.url, df.part, df.key)).show()
-    +-------------------------+
-    |parse_url(url, part, key)|
-    +-------------------------+
-    |                        1|
-    +-------------------------+
+    >>> df.select(sf.try_parse_url(df.url, df.part, df.key)).show()
+    +-----------------------------+
+    |try_parse_url(url, part, key)|
+    +-----------------------------+
+    |                            1|
+    +-----------------------------+
 
     Example 3: Extracting the protocol part from a URL
 
@@ -12629,12 +15646,12 @@ def parse_url(
     ...   [("https://spark.apache.org/path?query=1", "PROTOCOL")],
     ...   ["url", "part"]
     ... )
-    >>> df.select(sf.parse_url(df.url, df.part)).show()
-    +--------------------+
-    |parse_url(url, part)|
-    +--------------------+
-    |               https|
-    +--------------------+
+    >>> df.select(sf.try_parse_url(df.url, df.part)).show()
+    +------------------------+
+    |try_parse_url(url, part)|
+    +------------------------+
+    |                   https|
+    +------------------------+
 
     Example 4: Extracting the host part from a URL
 
@@ -12643,14 +15660,130 @@ def parse_url(
     ...   [("https://spark.apache.org/path?query=1", "HOST")],
     ...   ["url", "part"]
     ... )
-    >>> df.select(sf.parse_url(df.url, df.part)).show()
-    +--------------------+
-    |parse_url(url, part)|
-    +--------------------+
-    |    spark.apache.org|
-    +--------------------+
-
-    Example 5: Extracting the path part from a URL
+    >>> df.select(sf.try_parse_url(df.url, df.part)).show()
+    +------------------------+
+    |try_parse_url(url, part)|
+    +------------------------+
+    |        spark.apache.org|
+    +------------------------+
+
+    Example 5: Extracting the path part from a URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("https://spark.apache.org/path?query=1", "PATH")],
+    ...   ["url", "part"]
+    ... )
+    >>> df.select(sf.try_parse_url(df.url, df.part)).show()
+    +------------------------+
+    |try_parse_url(url, part)|
+    +------------------------+
+    |                   /path|
+    +------------------------+
+
+    Example 6: Invalid URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("inva lid://spark.apache.org/path?query=1", "QUERY", "query")],
+    ...   ["url", "part", "key"]
+    ... )
+    >>> df.select(sf.try_parse_url(df.url, df.part, df.key)).show()
+    +-----------------------------+
+    |try_parse_url(url, part, key)|
+    +-----------------------------+
+    |                         NULL|
+    +-----------------------------+
+    """
+    if key is not None:
+        return _invoke_function_over_columns("try_parse_url", url, partToExtract, key)
+    else:
+        return _invoke_function_over_columns("try_parse_url", url, partToExtract)
+
+
+@_try_remote_functions
+def parse_url(
+    url: "ColumnOrName", partToExtract: "ColumnOrName", key: Optional["ColumnOrName"] = None
+) -> Column:
+    """
+    URL function: Extracts a specified part from a URL. If a key is provided,
+    it returns the associated query parameter value.
+
+    .. versionadded:: 3.5.0
+
+    Parameters
+    ----------
+    url : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing a URL.
+    partToExtract : :class:`~pyspark.sql.Column` or str
+        A column of strings, each representing the part to extract from the URL.
+    key : :class:`~pyspark.sql.Column` or str, optional
+        A column of strings, each representing the key of a query parameter in the URL.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        A new column of strings, each representing the value of the extracted part from the URL.
+
+    Examples
+    --------
+    Example 1: Extracting the query part from a URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("https://spark.apache.org/path?query=1", "QUERY")],
+    ...   ["url", "part"]
+    ... )
+    >>> df.select(sf.parse_url(df.url, df.part)).show()
+    +--------------------+
+    |parse_url(url, part)|
+    +--------------------+
+    |             query=1|
+    +--------------------+
+
+    Example 2: Extracting the value of a specific query parameter from a URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("https://spark.apache.org/path?query=1", "QUERY", "query")],
+    ...   ["url", "part", "key"]
+    ... )
+    >>> df.select(sf.parse_url(df.url, df.part, df.key)).show()
+    +-------------------------+
+    |parse_url(url, part, key)|
+    +-------------------------+
+    |                        1|
+    +-------------------------+
+
+    Example 3: Extracting the protocol part from a URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("https://spark.apache.org/path?query=1", "PROTOCOL")],
+    ...   ["url", "part"]
+    ... )
+    >>> df.select(sf.parse_url(df.url, df.part)).show()
+    +--------------------+
+    |parse_url(url, part)|
+    +--------------------+
+    |               https|
+    +--------------------+
+
+    Example 4: Extracting the host part from a URL
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame(
+    ...   [("https://spark.apache.org/path?query=1", "HOST")],
+    ...   ["url", "part"]
+    ... )
+    >>> df.select(sf.parse_url(df.url, df.part)).show()
+    +--------------------+
+    |parse_url(url, part)|
+    +--------------------+
+    |    spark.apache.org|
+    +--------------------+
+
+    Example 5: Extracting the path part from a URL
 
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame(
@@ -13600,7 +16733,7 @@ def collate(col: "ColumnOrName", collation: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("collate", _to_java_column(col), collation)
+    return _invoke_function("collate", _to_java_column(col), _enum_to_value(collation))
 
 
 @_try_remote_functions
@@ -13642,13 +16775,13 @@ def create_map(*cols: "ColumnOrName") -> Column:
 
 
 @overload
-def create_map(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> Column:
+def create_map(__cols: Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
     ...
 
 
 @_try_remote_functions
 def create_map(
-    *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
+    *cols: Union["ColumnOrName", Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]]
 ) -> Column:
     """
     Map function: Creates a new map column from an even number of input columns or
@@ -13809,13 +16942,13 @@ def array(*cols: "ColumnOrName") -> Column:
 
 
 @overload
-def array(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> Column:
+def array(__cols: Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
     ...
 
 
 @_try_remote_functions
 def array(
-    *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
+    *cols: Union["ColumnOrName", Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]]
 ) -> Column:
     """
     Collection function: Creates a new array column from the input columns or column names.
@@ -14134,7 +17267,9 @@ def slice(
     |                 [4, 5]|
     +-----------------------+
     """
+    start = _enum_to_value(start)
     start = lit(start) if isinstance(start, int) else start
+    length = _enum_to_value(length)
     length = lit(length) if isinstance(length, int) else length
 
     return _invoke_function_over_columns("slice", x, start, length)
@@ -14236,9 +17371,14 @@ def array_join(
 
     _get_active_spark_context()
     if null_replacement is None:
-        return _invoke_function("array_join", _to_java_column(col), delimiter)
+        return _invoke_function("array_join", _to_java_column(col), _enum_to_value(delimiter))
     else:
-        return _invoke_function("array_join", _to_java_column(col), delimiter, null_replacement)
+        return _invoke_function(
+            "array_join",
+            _to_java_column(col),
+            _enum_to_value(delimiter),
+            _enum_to_value(null_replacement),
+        )
 
 
 @_try_remote_functions
@@ -14722,6 +17862,7 @@ def get(col: "ColumnOrName", index: Union["ColumnOrName", int]) -> Column:
     |         NULL|
     +-------------+
     """
+    index = _enum_to_value(index)
     index = lit(index) if isinstance(index, int) else index
 
     return _invoke_function_over_columns("get", col, index)
@@ -15089,6 +18230,7 @@ def array_insert(arr: "ColumnOrName", pos: Union["ColumnOrName", int], value: An
     |                    NULL|
     +------------------------+
     """
+    pos = _enum_to_value(pos)
     pos = lit(pos) if isinstance(pos, int) else pos
 
     return _invoke_function_over_columns("array_insert", arr, pos, lit(value))
@@ -15568,7 +18710,7 @@ def explode(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Target column to work on.
 
     Returns
@@ -15581,6 +18723,8 @@ def explode(col: "ColumnOrName") -> Column:
     :meth:`pyspark.sql.functions.posexplode`
     :meth:`pyspark.sql.functions.explode_outer`
     :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
 
     Notes
     -----
@@ -15590,119 +18734,79 @@ def explode(col: "ColumnOrName") -> Column:
     --------
     Example 1: Exploding an array column
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(id=1, values=[1, 2, 3])])
-    >>> df.select(sf.explode(df.values).alias("value")).show()
-    +-----+
-    |value|
-    +-----+
-    |    1|
-    |    2|
-    |    3|
-    +-----+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.show()
+    +---+---------------+
+    |  i|              a|
+    +---+---------------+
+    |  1|[1, 2, 3, NULL]|
+    |  2|             []|
+    |  3|           NULL|
+    +---+---------------+
+
+    >>> df.select('*', sf.explode('a')).show()
+    +---+---------------+----+
+    |  i|              a| col|
+    +---+---------------+----+
+    |  1|[1, 2, 3, NULL]|   1|
+    |  1|[1, 2, 3, NULL]|   2|
+    |  1|[1, 2, 3, NULL]|   3|
+    |  1|[1, 2, 3, NULL]|NULL|
+    +---+---------------+----+
 
     Example 2: Exploding a map column
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(id=1, values={"a": "b", "c": "d"})])
-    >>> df.select(sf.explode(df.values).alias("key", "value")).show()
-    +---+-----+
-    |key|value|
-    +---+-----+
-    |  a|    b|
-    |  c|    d|
-    +---+-----+
-
-    Example 3: Exploding an array column with multiple rows
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [Row(id=1, values=[1, 2]), Row(id=2, values=[3, 4])])
-    >>> df.select("id", sf.explode(df.values).alias("value")).show()
-    +---+-----+
-    | id|value|
-    +---+-----+
-    |  1|    1|
-    |  1|    2|
-    |  2|    3|
-    |  2|    4|
-    +---+-----+
-
-    Example 4: Exploding a map column with multiple rows
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.show(truncate=False)
+    +---+---------------------------+
+    |i  |m                          |
+    +---+---------------------------+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|
+    |2  |{}                         |
+    |3  |NULL                       |
+    +---+---------------------------+
 
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(id=1, values={"a": "b", "c": "d"}),
-    ...     Row(id=2, values={"e": "f", "g": "h"})
-    ... ])
-    >>> df.select("id", sf.explode(df.values).alias("key", "value")).show()
-    +---+---+-----+
-    | id|key|value|
-    +---+---+-----+
-    |  1|  a|    b|
-    |  1|  c|    d|
-    |  2|  e|    f|
-    |  2|  g|    h|
-    +---+---+-----+
+    >>> df.select('*', sf.explode('m')).show(truncate=False)
+    +---+---------------------------+---+-----+
+    |i  |m                          |key|value|
+    +---+---------------------------+---+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1  |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|3  |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|5  |NULL |
+    +---+---------------------------+---+-----+
 
-    Example 5: Exploding multiple array columns
+    Example 3: Exploding multiple array columns
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=1, list1=[1, 2], list2=[3, 4])])
-    >>> df.select(sf.explode(df.list1).alias("list1"), "list2") \\
-    ...     .select("list1", sf.explode(df.list2).alias("list2")).show()
-    +-----+-----+
-    |list1|list2|
-    +-----+-----+
-    |    1|    3|
-    |    1|    4|
-    |    2|    3|
-    |    2|    4|
-    +-----+-----+
-
-    Example 6: Exploding an array of struct column
+    >>> df = spark.sql('SELECT ARRAY(1,2) AS a1, ARRAY(3,4,5) AS a2')
+    >>> df.select(
+    ...     '*', sf.explode('a1').alias('v1')
+    ... ).select('*', sf.explode('a2').alias('v2')).show()
+    +------+---------+---+---+
+    |    a1|       a2| v1| v2|
+    +------+---------+---+---+
+    |[1, 2]|[3, 4, 5]|  1|  3|
+    |[1, 2]|[3, 4, 5]|  1|  4|
+    |[1, 2]|[3, 4, 5]|  1|  5|
+    |[1, 2]|[3, 4, 5]|  2|  3|
+    |[1, 2]|[3, 4, 5]|  2|  4|
+    |[1, 2]|[3, 4, 5]|  2|  5|
+    +------+---------+---+---+
+
+    Example 4: Exploding an array of struct column
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [(1, [(1, 2), (3, 4)])],
-    ...     "id: int, structlist: array<struct<a:int,b:int>>")
-    >>> df = df.select(sf.explode(df.structlist).alias("struct"))
-    >>> df.select("struct.*").show()
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select(sf.explode('a').alias("s")).select("s.*").show()
     +---+---+
     |  a|  b|
     +---+---+
     |  1|  2|
     |  3|  4|
     +---+---+
-
-    Example 7: Exploding an empty array column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([(1, [])], "id: int, values: array<int>")
-    >>> df.select(sf.explode(df.values).alias("value")).show()
-    +-----+
-    |value|
-    +-----+
-    +-----+
-
-    Example 8: Exploding an empty map column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([(1, {})], "id: int, values: map<int,int>")
-    >>> df.select(sf.explode(df.values).alias("key", "value")).show()
-    +---+-----+
-    |key|value|
-    +---+-----+
-    +---+-----+
-    """
+    """  # noqa: E501
     return _invoke_function_over_columns("explode", col)
 
 
@@ -15720,7 +18824,7 @@ def posexplode(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -15728,20 +18832,61 @@ def posexplode(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value including positions as a separate column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
     Examples
     --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(a=1, intlist=[1,2,3], mapfield={"a": "b"})])
-    >>> df.select(posexplode(df.intlist)).collect()
-    [Row(pos=0, col=1), Row(pos=1, col=2), Row(pos=2, col=3)]
+    Example 1: Exploding an array column
 
-    >>> df.select(posexplode(df.mapfield)).show()
-    +---+---+-----+
-    |pos|key|value|
-    +---+---+-----+
-    |  0|  a|    b|
-    +---+---+-----+
-    """
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.show()
+    +---+---------------+
+    |  i|              a|
+    +---+---------------+
+    |  1|[1, 2, 3, NULL]|
+    |  2|             []|
+    |  3|           NULL|
+    +---+---------------+
+
+    >>> df.select('*', sf.posexplode('a')).show()
+    +---+---------------+---+----+
+    |  i|              a|pos| col|
+    +---+---------------+---+----+
+    |  1|[1, 2, 3, NULL]|  0|   1|
+    |  1|[1, 2, 3, NULL]|  1|   2|
+    |  1|[1, 2, 3, NULL]|  2|   3|
+    |  1|[1, 2, 3, NULL]|  3|NULL|
+    +---+---------------+---+----+
+
+    Example 2: Exploding a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.show(truncate=False)
+    +---+---------------------------+
+    |i  |m                          |
+    +---+---------------------------+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|
+    |2  |{}                         |
+    |3  |NULL                       |
+    +---+---------------------------+
+
+    >>> df.select('*', sf.posexplode('m')).show(truncate=False)
+    +---+---------------------------+---+---+-----+
+    |i  |m                          |pos|key|value|
+    +---+---------------------------+---+---+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|0  |1  |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1  |3  |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|2  |5  |NULL |
+    +---+---------------------------+---+---+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("posexplode", col)
 
 
@@ -15757,7 +18902,7 @@ def inline(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         Input column of values to explode.
 
     Returns
@@ -15768,6 +18913,9 @@ def inline(col: "ColumnOrName") -> Column:
     See Also
     --------
     :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
     :meth:`pyspark.sql.functions.inline_outer`
 
     Examples
@@ -15775,102 +18923,89 @@ def inline(col: "ColumnOrName") -> Column:
     Example 1: Using inline with a single struct array column
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline(df.structlist)).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline(df.a)).show()
+    +----------------+---+---+
+    |               a|  a|  b|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 2: Using inline with a column name
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline("structlist")).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline('a')).show()
+    +----------------+---+---+
+    |               a|  a|  b|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 3: Using inline with an alias
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), Row(a=3, b=4)])])
-    >>> df.select(sf.inline("structlist").alias("c1", "c2")).show()
-    +---+---+
-    | c1| c2|
-    +---+---+
-    |  1|  2|
-    |  3|  4|
-    +---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a')
+    >>> df.select('*', sf.inline('a').alias("c1", "c2")).show()
+    +----------------+---+---+
+    |               a| c1| c2|
+    +----------------+---+---+
+    |[{1, 2}, {3, 4}]|  1|  2|
+    |[{1, 2}, {3, 4}]|  3|  4|
+    +----------------+---+---+
 
     Example 4: Using inline with multiple struct array columns
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(structlist1=[Row(a=1, b=2), Row(a=3, b=4)],
-    ...         structlist2=[Row(c=5, d=6), Row(c=7, d=8)])
-    ... ])
-    >>> df.select(sf.inline("structlist1"), "structlist2") \\
-    ...     .select("a", "b", sf.inline("structlist2")).show()
-    +---+---+---+---+
-    |  a|  b|  c|  d|
-    +---+---+---+---+
-    |  1|  2|  5|  6|
-    |  1|  2|  7|  8|
-    |  3|  4|  5|  6|
-    |  3|  4|  7|  8|
-    +---+---+---+---+
+    >>> df = spark.sql('SELECT ARRAY(NAMED_STRUCT("a",1,"b",2), NAMED_STRUCT("a",3,"b",4)) AS a1, ARRAY(NAMED_STRUCT("c",5,"d",6), NAMED_STRUCT("c",7,"d",8)) AS a2')
+    >>> df.select(
+    ...     '*', sf.inline('a1')
+    ... ).select('*', sf.inline('a2')).show()
+    +----------------+----------------+---+---+---+---+
+    |              a1|              a2|  a|  b|  c|  d|
+    +----------------+----------------+---+---+---+---+
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  1|  2|  5|  6|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  1|  2|  7|  8|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  3|  4|  5|  6|
+    |[{1, 2}, {3, 4}]|[{5, 6}, {7, 8}]|  3|  4|  7|  8|
+    +----------------+----------------+---+---+---+---+
 
     Example 5: Using inline with a nested struct array column
 
     >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(structlist=Row(a=1, b=2, nested=[Row(c=3, d=4), Row(c=5, d=6)]))
-    ... ])
-    >>> df.select(sf.inline("structlist.nested")).show()
-    +---+---+
-    |  c|  d|
-    +---+---+
-    |  3|  4|
-    |  5|  6|
-    +---+---+
-
-    Example 6: Using inline with an empty struct array column
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame(
-    ...     [Row(structlist=[])], "structlist: array<struct<a:int,b:int>>")
-    >>> df.select(sf.inline(df.structlist)).show()
-    +---+---+
-    |  a|  b|
-    +---+---+
-    +---+---+
-
-    Example 7: Using inline with a struct array column containing null values
-
-    >>> import pyspark.sql.functions as sf
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([Row(structlist=[Row(a=1, b=2), None, Row(a=3, b=4)])])
-    >>> df.select(sf.inline(df.structlist)).show()
-    +----+----+
-    |   a|   b|
-    +----+----+
-    |   1|   2|
-    |NULL|NULL|
-    |   3|   4|
-    +----+----+
-    """
+    >>> df = spark.sql('SELECT NAMED_STRUCT("a",1,"b",2,"c",ARRAY(NAMED_STRUCT("c",3,"d",4), NAMED_STRUCT("c",5,"d",6))) AS s')
+    >>> df.select('*', sf.inline('s.c')).show(truncate=False)
+    +------------------------+---+---+
+    |s                       |c  |d  |
+    +------------------------+---+---+
+    |{1, 2, [{3, 4}, {5, 6}]}|3  |4  |
+    |{1, 2, [{3, 4}, {5, 6}]}|5  |6  |
+    +------------------------+---+---+
+
+    Example 6: Using inline with a column containing: array continaing null, empty array and null
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(NAMED_STRUCT("a",1,"b",2), NULL, NAMED_STRUCT("a",3,"b",4))), (2,ARRAY()), (3,NULL) AS t(i,s)')
+    >>> df.show(truncate=False)
+    +---+----------------------+
+    |i  |s                     |
+    +---+----------------------+
+    |1  |[{1, 2}, NULL, {3, 4}]|
+    |2  |[]                    |
+    |3  |NULL                  |
+    +---+----------------------+
+
+    >>> df.select('*', sf.inline('s')).show(truncate=False)
+    +---+----------------------+----+----+
+    |i  |s                     |a   |b   |
+    +---+----------------------+----+----+
+    |1  |[{1, 2}, NULL, {3, 4}]|1   |2   |
+    |1  |[{1, 2}, NULL, {3, 4}]|NULL|NULL|
+    |1  |[{1, 2}, NULL, {3, 4}]|3   |4   |
+    +---+----------------------+----+----+
+    """  # noqa: E501
     return _invoke_function_over_columns("inline", col)
 
 
@@ -15889,7 +19024,7 @@ def explode_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -15897,31 +19032,47 @@ def explode_outer(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value.
 
-    Examples
+    See Also
     --------
-    >>> df = spark.createDataFrame(
-    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
-    ...     ("id", "an_array", "a_map")
-    ... )
-    >>> df.select("id", "an_array", explode_outer("a_map")).show()
-    +---+----------+----+-----+
-    | id|  an_array| key|value|
-    +---+----------+----+-----+
-    |  1|[foo, bar]|   x|  1.0|
-    |  2|        []|NULL| NULL|
-    |  3|      NULL|NULL| NULL|
-    +---+----------+----+-----+
-
-    >>> df.select("id", "a_map", explode_outer("an_array")).show()
-    +---+----------+----+
-    | id|     a_map| col|
-    +---+----------+----+
-    |  1|{x -> 1.0}| foo|
-    |  1|{x -> 1.0}| bar|
-    |  2|        {}|NULL|
-    |  3|      NULL|NULL|
-    +---+----------+----+
-    """
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
+    Examples
+    --------
+    Example 1: Using an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.select('*', sf.explode_outer('a')).show()
+    +---+---------------+----+
+    |  i|              a| col|
+    +---+---------------+----+
+    |  1|[1, 2, 3, NULL]|   1|
+    |  1|[1, 2, 3, NULL]|   2|
+    |  1|[1, 2, 3, NULL]|   3|
+    |  1|[1, 2, 3, NULL]|NULL|
+    |  2|             []|NULL|
+    |  3|           NULL|NULL|
+    +---+---------------+----+
+
+    Example 2: Using a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.select('*', sf.explode_outer('m')).show(truncate=False)
+    +---+---------------------------+----+-----+
+    |i  |m                          |key |value|
+    +---+---------------------------+----+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1   |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|3   |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|5   |NULL |
+    |2  |{}                         |NULL|NULL |
+    |3  |NULL                       |NULL|NULL |
+    +---+---------------------------+----+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("explode_outer", col)
 
 
@@ -15940,7 +19091,7 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         target column to work on.
 
     Returns
@@ -15948,30 +19099,47 @@ def posexplode_outer(col: "ColumnOrName") -> Column:
     :class:`~pyspark.sql.Column`
         one row per array item or map key value including positions as a separate column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.explode`
+    :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.inline`
+    :meth:`pyspark.sql.functions.inline_outer`
+
     Examples
     --------
-    >>> df = spark.createDataFrame(
-    ...     [(1, ["foo", "bar"], {"x": 1.0}), (2, [], {}), (3, None, None)],
-    ...     ("id", "an_array", "a_map")
-    ... )
-    >>> df.select("id", "an_array", posexplode_outer("a_map")).show()
-    +---+----------+----+----+-----+
-    | id|  an_array| pos| key|value|
-    +---+----------+----+----+-----+
-    |  1|[foo, bar]|   0|   x|  1.0|
-    |  2|        []|NULL|NULL| NULL|
-    |  3|      NULL|NULL|NULL| NULL|
-    +---+----------+----+----+-----+
-    >>> df.select("id", "a_map", posexplode_outer("an_array")).show()
-    +---+----------+----+----+
-    | id|     a_map| pos| col|
-    +---+----------+----+----+
-    |  1|{x -> 1.0}|   0| foo|
-    |  1|{x -> 1.0}|   1| bar|
-    |  2|        {}|NULL|NULL|
-    |  3|      NULL|NULL|NULL|
-    +---+----------+----+----+
-    """
+    Example 1: Using an array column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(1,2,3,NULL)), (2,ARRAY()), (3,NULL) AS t(i,a)')
+    >>> df.select('*', sf.posexplode_outer('a')).show()
+    +---+---------------+----+----+
+    |  i|              a| pos| col|
+    +---+---------------+----+----+
+    |  1|[1, 2, 3, NULL]|   0|   1|
+    |  1|[1, 2, 3, NULL]|   1|   2|
+    |  1|[1, 2, 3, NULL]|   2|   3|
+    |  1|[1, 2, 3, NULL]|   3|NULL|
+    |  2|             []|NULL|NULL|
+    |  3|           NULL|NULL|NULL|
+    +---+---------------+----+----+
+
+    Example 2: Using a map column
+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,MAP(1,2,3,4,5,NULL)), (2,MAP()), (3,NULL) AS t(i,m)')
+    >>> df.select('*', sf.posexplode_outer('m')).show(truncate=False)
+    +---+---------------------------+----+----+-----+
+    |i  |m                          |pos |key |value|
+    +---+---------------------------+----+----+-----+
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|0   |1   |2    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|1   |3   |4    |
+    |1  |{1 -> 2, 3 -> 4, 5 -> NULL}|2   |5   |NULL |
+    |2  |{}                         |NULL|NULL|NULL |
+    |3  |NULL                       |NULL|NULL|NULL |
+    +---+---------------------------+----+----+-----+
+    """  # noqa: E501
     return _invoke_function_over_columns("posexplode_outer", col)
 
 
@@ -15985,7 +19153,7 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         input column of values to explode.
 
     Returns
@@ -15995,7 +19163,10 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     See Also
     --------
+    :meth:`pyspark.sql.functions.explode`
     :meth:`pyspark.sql.functions.explode_outer`
+    :meth:`pyspark.sql.functions.posexplode`
+    :meth:`pyspark.sql.functions.posexplode_outer`
     :meth:`pyspark.sql.functions.inline`
 
     Notes
@@ -16004,20 +19175,27 @@ def inline_outer(col: "ColumnOrName") -> Column:
 
     Examples
     --------
-    >>> from pyspark.sql import Row
-    >>> df = spark.createDataFrame([
-    ...     Row(id=1, structlist=[Row(a=1, b=2), Row(a=3, b=4)]),
-    ...     Row(id=2, structlist=[])
-    ... ])
-    >>> df.select('id', inline_outer(df.structlist)).show()
-    +---+----+----+
-    | id|   a|   b|
-    +---+----+----+
-    |  1|   1|   2|
-    |  1|   3|   4|
-    |  2|NULL|NULL|
-    +---+----+----+
-    """
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.sql('SELECT * FROM VALUES (1,ARRAY(NAMED_STRUCT("a",1,"b",2), NULL, NAMED_STRUCT("a",3,"b",4))), (2,ARRAY()), (3,NULL) AS t(i,s)')
+    >>> df.printSchema()
+    root
+     |-- i: integer (nullable = false)
+     |-- s: array (nullable = true)
+     |    |-- element: struct (containsNull = true)
+     |    |    |-- a: integer (nullable = false)
+     |    |    |-- b: integer (nullable = false)
+
+    >>> df.select('*', sf.inline_outer('s')).show(truncate=False)
+    +---+----------------------+----+----+
+    |i  |s                     |a   |b   |
+    +---+----------------------+----+----+
+    |1  |[{1, 2}, NULL, {3, 4}]|1   |2   |
+    |1  |[{1, 2}, NULL, {3, 4}]|NULL|NULL|
+    |1  |[{1, 2}, NULL, {3, 4}]|3   |4   |
+    |2  |[]                    |NULL|NULL|
+    |3  |NULL                  |NULL|NULL|
+    +---+----------------------+----+----+
+    """  # noqa: E501
     return _invoke_function_over_columns("inline_outer", col)
 
 
@@ -16054,7 +19232,7 @@ def get_json_object(col: "ColumnOrName", path: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("get_json_object", _to_java_column(col), path)
+    return _invoke_function("get_json_object", _to_java_column(col), _enum_to_value(path))
 
 
 @_try_remote_functions
@@ -16089,8 +19267,8 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
 
     if len(fields) == 0:
         raise PySparkValueError(
-            error_class="CANNOT_BE_EMPTY",
-            message_parameters={"item": "field"},
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "field"},
         )
     sc = _get_active_spark_context()
     return _invoke_function("json_tuple", _to_java_column(col), _to_seq(sc, fields))
@@ -16100,7 +19278,7 @@ def json_tuple(col: "ColumnOrName", *fields: str) -> Column:
 def from_json(
     col: "ColumnOrName",
     schema: Union[ArrayType, StructType, Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     """
     Parses a column containing a JSON string into a :class:`MapType` with :class:`StringType`
@@ -16248,6 +19426,55 @@ def try_parse_json(
     return _invoke_function("try_parse_json", _to_java_column(col))
 
 
+@_try_remote_functions
+def to_variant_object(
+    col: "ColumnOrName",
+) -> Column:
+    """
+    Converts a column containing nested inputs (array/map/struct) into a variants where maps and
+    structs are converted to variant objects which are unordered unlike SQL structs. Input maps can
+    only have string keys.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+        a column with a nested schema or column name
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        a new column of VariantType.
+
+    Examples
+    --------
+    Example 1: Converting an array containing a nested struct into a variant
+
+    >>> from pyspark.sql import functions as sf
+    >>> from pyspark.sql.types import ArrayType, StructType, StructField, StringType, MapType
+    >>> schema = StructType([
+    ...     StructField("i", StringType(), True),
+    ...     StructField("v", ArrayType(StructType([
+    ...         StructField("a", MapType(StringType(), StringType()), True)
+    ...     ]), True))
+    ... ])
+    >>> data = [("1", [{"a": {"b": 2}}])]
+    >>> df = spark.createDataFrame(data, schema)
+    >>> df.select(sf.to_variant_object(df.v))
+    DataFrame[to_variant_object(v): variant]
+    >>> df.select(sf.to_variant_object(df.v)).show(truncate=False)
+    +--------------------+
+    |to_variant_object(v)|
+    +--------------------+
+    |[{"a":{"b":"2"}}]   |
+    +--------------------+
+    """
+    from pyspark.sql.classic.column import _to_java_column
+
+    return _invoke_function("to_variant_object", _to_java_column(col))
+
+
 @_try_remote_functions
 def parse_json(
     col: "ColumnOrName",
@@ -16341,7 +19568,9 @@ def variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("variant_get", _to_java_column(v), path, targetType)
+    return _invoke_function(
+        "variant_get", _to_java_column(v), _enum_to_value(path), _enum_to_value(targetType)
+    )
 
 
 @_try_remote_functions
@@ -16379,7 +19608,9 @@ def try_variant_get(v: "ColumnOrName", path: str, targetType: str) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("try_variant_get", _to_java_column(v), path, targetType)
+    return _invoke_function(
+        "try_variant_get", _to_java_column(v), _enum_to_value(path), _enum_to_value(targetType)
+    )
 
 
 @_try_remote_functions
@@ -16403,7 +19634,7 @@ def schema_of_variant(v: "ColumnOrName") -> Column:
     --------
     >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
     >>> df.select(schema_of_variant(parse_json(df.json)).alias("r")).collect()
-    [Row(r='STRUCT<a: BIGINT>')]
+    [Row(r='OBJECT<a: BIGINT>')]
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -16431,7 +19662,7 @@ def schema_of_variant_agg(v: "ColumnOrName") -> Column:
     --------
     >>> df = spark.createDataFrame([ {'json': '''{ "a" : 1 }'''} ])
     >>> df.select(schema_of_variant_agg(parse_json(df.json)).alias("r")).collect()
-    [Row(r='STRUCT<a: BIGINT>')]
+    [Row(r='OBJECT<a: BIGINT>')]
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -16439,7 +19670,7 @@ def schema_of_variant_agg(v: "ColumnOrName") -> Column:
 
 
 @_try_remote_functions
-def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_json(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     """
     Converts a column containing a :class:`StructType`, :class:`ArrayType` or a :class:`MapType`
     into a JSON string. Throws an exception, in the case of an unsupported type.
@@ -16547,7 +19778,7 @@ def to_json(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Co
 
 
 @_try_remote_functions
-def schema_of_json(json: Union[Column, str], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_json(json: Union[Column, str], options: Optional[Mapping[str, str]] = None) -> Column:
     """
     Parses a JSON string and infers its schema in DDL format.
 
@@ -16589,10 +19820,11 @@ def schema_of_json(json: Union[Column, str], options: Optional[Dict[str, str]] =
     """
     from pyspark.sql.classic.column import _to_java_column
 
+    json = _enum_to_value(json)
     if not isinstance(json, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "json", "arg_type": type(json).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "json", "arg_type": type(json).__name__},
         )
 
     return _invoke_function("schema_of_json", _to_java_column(lit(json)), _options_to_str(options))
@@ -16659,7 +19891,7 @@ def json_object_keys(col: "ColumnOrName") -> Column:
 def from_xml(
     col: "ColumnOrName",
     schema: Union[StructType, Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     """
     Parses a column containing a XML string to a row with
@@ -16744,14 +19976,14 @@ def from_xml(
         schema = _to_java_column(schema)
     elif not isinstance(schema, str):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+            errorClass="NOT_COLUMN_OR_STR_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
         )
     return _invoke_function("from_xml", _to_java_column(col), schema, _options_to_str(options))
 
 
 @_try_remote_functions
-def schema_of_xml(xml: Union[Column, str], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_xml(xml: Union[Column, str], options: Optional[Mapping[str, str]] = None) -> Column:
     """
     Parses a XML string and infers its schema in DDL format.
 
@@ -16817,17 +20049,18 @@ def schema_of_xml(xml: Union[Column, str], options: Optional[Dict[str, str]] = N
     """
     from pyspark.sql.classic.column import _to_java_column
 
+    xml = _enum_to_value(xml)
     if not isinstance(xml, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "xml", "arg_type": type(xml).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "xml", "arg_type": type(xml).__name__},
         )
 
     return _invoke_function("schema_of_xml", _to_java_column(lit(xml)), _options_to_str(options))
 
 
 @_try_remote_functions
-def to_xml(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_xml(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     """
     Converts a column containing a :class:`StructType` into a XML string.
     Throws an exception, in the case of an unsupported type.
@@ -16864,7 +20097,7 @@ def to_xml(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Col
 
 
 @_try_remote_functions
-def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = None) -> Column:
+def schema_of_csv(csv: Union[Column, str], options: Optional[Mapping[str, str]] = None) -> Column:
     """
     CSV Function: Parses a CSV string and infers its schema in DDL format.
 
@@ -16937,17 +20170,18 @@ def schema_of_csv(csv: Union[Column, str], options: Optional[Dict[str, str]] = N
     """
     from pyspark.sql.classic.column import _to_java_column
 
+    csv = _enum_to_value(csv)
     if not isinstance(csv, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "csv", "arg_type": type(csv).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "csv", "arg_type": type(csv).__name__},
         )
 
     return _invoke_function("schema_of_csv", _to_java_column(lit(csv)), _options_to_str(options))
 
 
 @_try_remote_functions
-def to_csv(col: "ColumnOrName", options: Optional[Dict[str, str]] = None) -> Column:
+def to_csv(col: "ColumnOrName", options: Optional[Mapping[str, str]] = None) -> Column:
     """
     CSV Function: Converts a column containing a :class:`StructType` into a CSV string.
     Throws an exception, in the case of an unsupported type.
@@ -17441,7 +20675,7 @@ def sort_array(col: "ColumnOrName", asc: bool = True) -> Column:
     """
     from pyspark.sql.classic.column import _to_java_column
 
-    return _invoke_function("sort_array", _to_java_column(col), asc)
+    return _invoke_function("sort_array", _to_java_column(col), _enum_to_value(asc))
 
 
 @_try_remote_functions
@@ -17491,11 +20725,11 @@ def array_sort(
     if comparator is None:
         return _invoke_function_over_columns("array_sort", col)
     else:
-        return _invoke_higher_order_function("ArraySort", [col], [comparator])
+        return _invoke_higher_order_function("array_sort", [col], [comparator])
 
 
 @_try_remote_functions
-def shuffle(col: "ColumnOrName") -> Column:
+def shuffle(col: "ColumnOrName", seed: Optional[Union[Column, int]] = None) -> Column:
     """
     Array function: Generates a random permutation of the given array.
 
@@ -17508,6 +20742,10 @@ def shuffle(col: "ColumnOrName") -> Column:
     ----------
     col : :class:`~pyspark.sql.Column` or str
         The name of the column or expression to be shuffled.
+    seed : :class:`~pyspark.sql.Column` or int, optional
+        Seed value for the random generator.
+
+        .. versionadded:: 4.0.0
 
     Returns
     -------
@@ -17524,48 +20762,51 @@ def shuffle(col: "ColumnOrName") -> Column:
     Example 1: Shuffling a simple array
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([([1, 20, 3, 5],)], ['data'])
-    >>> df.select(sf.shuffle(df.data)).show() # doctest: +SKIP
-    +-------------+
-    |shuffle(data)|
-    +-------------+
-    |[1, 3, 20, 5]|
-    +-------------+
+    >>> df = spark.sql("SELECT ARRAY(1, 20, 3, 5) AS data")
+    >>> df.select("*", sf.shuffle(df.data, sf.lit(123))).show()
+    +-------------+-------------+
+    |         data|shuffle(data)|
+    +-------------+-------------+
+    |[1, 20, 3, 5]|[5, 1, 20, 3]|
+    +-------------+-------------+
 
     Example 2: Shuffling an array with null values
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([([1, 20, None, 3],)], ['data'])
-    >>> df.select(sf.shuffle(df.data)).show() # doctest: +SKIP
-    +----------------+
-    |   shuffle(data)|
-    +----------------+
-    |[20, 3, NULL, 1]|
-    +----------------+
+    >>> df = spark.sql("SELECT ARRAY(1, 20, NULL, 5) AS data")
+    >>> df.select("*", sf.shuffle(sf.col("data"), 234)).show()
+    +----------------+----------------+
+    |            data|   shuffle(data)|
+    +----------------+----------------+
+    |[1, 20, NULL, 5]|[NULL, 5, 20, 1]|
+    +----------------+----------------+
 
     Example 3: Shuffling an array with duplicate values
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([([1, 2, 2, 3, 3, 3],)], ['data'])
-    >>> df.select(sf.shuffle(df.data)).show() # doctest: +SKIP
-    +------------------+
-    |     shuffle(data)|
-    +------------------+
-    |[3, 2, 1, 3, 2, 3]|
-    +------------------+
+    >>> df = spark.sql("SELECT ARRAY(1, 2, 2, 3, 3, 3) AS data")
+    >>> df.select("*", sf.shuffle("data", 345)).show()
+    +------------------+------------------+
+    |              data|     shuffle(data)|
+    +------------------+------------------+
+    |[1, 2, 2, 3, 3, 3]|[2, 3, 3, 1, 2, 3]|
+    +------------------+------------------+
 
-    Example 4: Shuffling an array with different types of elements
+    Example 4: Shuffling an array with random seed
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([(['a', 'b', 'c', 1, 2, 3],)], ['data'])
-    >>> df.select(sf.shuffle(df.data)).show() # doctest: +SKIP
-    +------------------+
-    |     shuffle(data)|
-    +------------------+
-    |[1, c, 2, a, b, 3]|
-    +------------------+
+    >>> df = spark.sql("SELECT ARRAY(1, 2, 2, 3, 3, 3) AS data")
+    >>> df.select("*", sf.shuffle("data")).show() # doctest: +SKIP
+    +------------------+------------------+
+    |              data|     shuffle(data)|
+    +------------------+------------------+
+    |[1, 2, 2, 3, 3, 3]|[3, 3, 2, 3, 2, 1]|
+    +------------------+------------------+
     """
-    return _invoke_function_over_columns("shuffle", col)
+    if seed is not None:
+        return _invoke_function_over_columns("shuffle", col, lit(seed))
+    else:
+        return _invoke_function_over_columns("shuffle", col)
 
 
 @_try_remote_functions
@@ -18130,6 +21371,7 @@ def array_repeat(col: "ColumnOrName", count: Union["ColumnOrName", int]) -> Colu
     |   [NULL, NULL, NULL]|
     +---------------------+
     """
+    count = _enum_to_value(count)
     count = lit(count) if isinstance(count, int) else count
 
     return _invoke_function_over_columns("array_repeat", col, count)
@@ -18214,13 +21456,13 @@ def map_concat(*cols: "ColumnOrName") -> Column:
 
 
 @overload
-def map_concat(__cols: Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]) -> Column:
+def map_concat(__cols: Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]) -> Column:
     ...
 
 
 @_try_remote_functions
 def map_concat(
-    *cols: Union["ColumnOrName", Union[List["ColumnOrName_"], Tuple["ColumnOrName_", ...]]]
+    *cols: Union["ColumnOrName", Union[Sequence["ColumnOrName"], Tuple["ColumnOrName", ...]]]
 ) -> Column:
     """
     Map function: Returns the union of all given maps.
@@ -18385,7 +21627,7 @@ def sequence(
 def from_csv(
     col: "ColumnOrName",
     schema: Union[Column, str],
-    options: Optional[Dict[str, str]] = None,
+    options: Optional[Mapping[str, str]] = None,
 ) -> Column:
     """
     CSV Function: Parses a column containing a CSV string into a row with the specified schema.
@@ -18482,8 +21724,8 @@ def from_csv(
 
     if not isinstance(schema, (str, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
         )
 
     return _invoke_function(
@@ -18491,7 +21733,7 @@ def from_csv(
     )
 
 
-def _unresolved_named_lambda_variable(*name_parts: Any) -> Column:
+def _unresolved_named_lambda_variable(name: str) -> Column:
     """
     Create `o.a.s.sql.expressions.UnresolvedNamedLambdaVariable`,
     convert it to o.s.sql.Column and wrap in Python `Column`
@@ -18504,14 +21746,9 @@ def _unresolved_named_lambda_variable(*name_parts: Any) -> Column:
     name_parts : str
     """
     from py4j.java_gateway import JVMView
-    from pyspark.sql.classic.column import _to_seq
 
     sc = _get_active_spark_context()
-    name_parts_seq = _to_seq(sc, name_parts)
-    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
-    return Column(
-        cast(JVMView, sc._jvm).Column(expressions.UnresolvedNamedLambdaVariable(name_parts_seq))
-    )
+    return Column(cast(JVMView, sc._jvm).PythonSQLUtils.unresolvedNamedLambdaVariable(name))
 
 
 def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
@@ -18530,15 +21767,15 @@ def _get_lambda_parameters(f: Callable) -> ValuesView[inspect.Parameter]:
     # function arity is between 1 and 3
     if not (1 <= len(parameters) <= 3):
         raise PySparkValueError(
-            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": f.__name__, "num_args": str(len(parameters))},
+            errorClass="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": f.__name__, "num_args": str(len(parameters))},
         )
 
     # and all arguments can be used as positional
     if not all(p.kind in supported_parameter_types for p in parameters):
         raise PySparkValueError(
-            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": f.__name__},
+            errorClass="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": f.__name__},
         )
 
     return parameters
@@ -18560,34 +21797,27 @@ def _create_lambda(f: Callable) -> Callable:
     parameters = _get_lambda_parameters(f)
 
     sc = _get_active_spark_context()
-    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
 
     argnames = ["x", "y", "z"]
-    args = [
-        _unresolved_named_lambda_variable(
-            expressions.UnresolvedNamedLambdaVariable.freshVarName(arg)
-        )
-        for arg in argnames[: len(parameters)]
-    ]
+    args = [_unresolved_named_lambda_variable(arg) for arg in argnames[: len(parameters)]]
 
     result = f(*args)
 
     if not isinstance(result, Column):
         raise PySparkValueError(
-            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
-            message_parameters={"func_name": f.__name__, "return_type": type(result).__name__},
+            errorClass="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            messageParameters={"func_name": f.__name__, "return_type": type(result).__name__},
         )
 
-    jexpr = result._jc.expr()
-    jargs = _to_seq(sc, [arg._jc.expr() for arg in args])
-
-    return expressions.LambdaFunction(jexpr, jargs, False)
+    jexpr = result._jc
+    jargs = _to_seq(sc, [arg._jc for arg in args])
+    return cast(JVMView, sc._jvm).PythonSQLUtils.lambdaFunction(jexpr, jargs)
 
 
 def _invoke_higher_order_function(
     name: str,
-    cols: List["ColumnOrName"],
-    funs: List[Callable],
+    cols: Sequence["ColumnOrName"],
+    funs: Sequence[Callable],
 ) -> Column:
     """
     Invokes expression identified by name,
@@ -18601,16 +21831,12 @@ def _invoke_higher_order_function(
     :return: a Column
     """
     from py4j.java_gateway import JVMView
-    from pyspark.sql.classic.column import _to_java_column
+    from pyspark.sql.classic.column import _to_seq, _to_java_column
 
     sc = _get_active_spark_context()
-    expressions = cast(JVMView, sc._jvm).org.apache.spark.sql.catalyst.expressions
-    expr = getattr(expressions, name)
-
-    jcols = [_to_java_column(col).expr() for col in cols]
     jfuns = [_create_lambda(f) for f in funs]
-
-    return Column(cast(JVMView, sc._jvm).Column(expr(*jcols + jfuns)))
+    jcols = [_to_java_column(c) for c in cols]
+    return Column(cast(JVMView, sc._jvm).PythonSQLUtils.fn(name, _to_seq(sc, jcols + jfuns)))
 
 
 @overload
@@ -18678,7 +21904,7 @@ def transform(
     |[1, -2, 3, -4]|
     +--------------+
     """
-    return _invoke_higher_order_function("ArrayTransform", [col], [f])
+    return _invoke_higher_order_function("transform", [col], [f])
 
 
 @_try_remote_functions
@@ -18719,7 +21945,7 @@ def exists(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     |        true|
     +------------+
     """
-    return _invoke_higher_order_function("ArrayExists", [col], [f])
+    return _invoke_higher_order_function("exists", [col], [f])
 
 
 @_try_remote_functions
@@ -18764,7 +21990,7 @@ def forall(col: "ColumnOrName", f: Callable[[Column], Column]) -> Column:
     |   true|
     +-------+
     """
-    return _invoke_higher_order_function("ArrayForAll", [col], [f])
+    return _invoke_higher_order_function("forall", [col], [f])
 
 
 @overload
@@ -18831,7 +22057,7 @@ def filter(
     |[2018-09-20, 2019-07-01]|
     +------------------------+
     """
-    return _invoke_higher_order_function("ArrayFilter", [col], [f])
+    return _invoke_higher_order_function("filter", [col], [f])
 
 
 @_try_remote_functions
@@ -18904,10 +22130,10 @@ def aggregate(
     +----+
     """
     if finish is not None:
-        return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge, finish])
+        return _invoke_higher_order_function("aggregate", [col, initialValue], [merge, finish])
 
     else:
-        return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge])
+        return _invoke_higher_order_function("aggregate", [col, initialValue], [merge])
 
 
 @_try_remote_functions
@@ -18977,10 +22203,10 @@ def reduce(
     +----+
     """
     if finish is not None:
-        return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge, finish])
+        return _invoke_higher_order_function("reduce", [col, initialValue], [merge, finish])
 
     else:
-        return _invoke_higher_order_function("ArrayAggregate", [col, initialValue], [merge])
+        return _invoke_higher_order_function("reduce", [col, initialValue], [merge])
 
 
 @_try_remote_functions
@@ -19035,7 +22261,7 @@ def zip_with(
     |[foo_1, bar_2, 3]|
     +-----------------+
     """
-    return _invoke_higher_order_function("ZipWith", [left, right], [f])
+    return _invoke_higher_order_function("zip_with", [left, right], [f])
 
 
 @_try_remote_functions
@@ -19075,7 +22301,7 @@ def transform_keys(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -
     >>> sorted(row["data_upper"].items())
     [('BAR', 2.0), ('FOO', -2.0)]
     """
-    return _invoke_higher_order_function("TransformKeys", [col], [f])
+    return _invoke_higher_order_function("transform_keys", [col], [f])
 
 
 @_try_remote_functions
@@ -19115,7 +22341,7 @@ def transform_values(col: "ColumnOrName", f: Callable[[Column, Column], Column])
     >>> sorted(row["new_data"].items())
     [('IT', 20.0), ('OPS', 34.0), ('SALES', 2.0)]
     """
-    return _invoke_higher_order_function("TransformValues", [col], [f])
+    return _invoke_higher_order_function("transform_values", [col], [f])
 
 
 @_try_remote_functions
@@ -19178,7 +22404,7 @@ def map_filter(col: "ColumnOrName", f: Callable[[Column, Column], Column]) -> Co
     >>> sorted(row["data_filtered"].items())
     [('baz', 32.0)]
     """
-    return _invoke_higher_order_function("MapFilter", [col], [f])
+    return _invoke_higher_order_function("map_filter", [col], [f])
 
 
 @_try_remote_functions
@@ -19259,7 +22485,7 @@ def map_zip_with(
     >>> sorted(row["updated_data"].items())
     [('A', 1), ('B', 5), ('C', None)]
     """
-    return _invoke_higher_order_function("MapZipWith", [col1, col2], [f])
+    return _invoke_higher_order_function("map_zip_with", [col1, col2], [f])
 
 
 @_try_remote_functions
@@ -19554,7 +22780,7 @@ def convert_timezone(
         the current session time zone is used as the source time zone.
     targetTz : :class:`~pyspark.sql.Column`
         The time zone to which the input timestamp should be converted.
-    sourceTs : :class:`~pyspark.sql.Column`
+    sourceTs : :class:`~pyspark.sql.Column` or column name
         A timestamp without time zone.
 
     Returns
@@ -19562,35 +22788,43 @@ def convert_timezone(
     :class:`~pyspark.sql.Column`
         A new column that contains a timestamp for converted time zone.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.current_timezone`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
-    Example 1: Converts the timestamp without time zone `sourceTs`,
-        the source time zone `sourceTz` is None.
+    Example 1: Converts the timestamp without time zone `sourceTs`.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(sf.convert_timezone(   # doctest: +SKIP
-    ...     None, sf.lit('Asia/Hong_Kong'), 'dt')
-    ... ).show()
-    +--------------------------------------------------------+
-    |convert_timezone(current_timezone(), Asia/Hong_Kong, dt)|
-    +--------------------------------------------------------+
-    |                                     2015-04-08 00:00:00|
-    +--------------------------------------------------------+
+    >>> df = spark.createDataFrame([('2015-04-08 00:00:00',)], ['ts'])
+    >>> df.select(
+    ...     '*',
+    ...     sf.convert_timezone(None, sf.lit('Asia/Hong_Kong'), 'ts')
+    ... ).show() # doctest: +SKIP
+    +-------------------+--------------------------------------------------------+
+    |                 ts|convert_timezone(current_timezone(), Asia/Hong_Kong, ts)|
+    +-------------------+--------------------------------------------------------+
+    |2015-04-08 00:00:00|                                     2015-04-08 15:00:00|
+    +-------------------+--------------------------------------------------------+
 
-    Example 2: Converts the timestamp without time zone `sourceTs`.
+    Example 2: Converts the timestamp with time zone `sourceTs`.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([('2015-04-08',)], ['dt'])
-    >>> df.select(sf.convert_timezone(
-    ...     sf.lit('America/Los_Angeles'), sf.lit('Asia/Hong_Kong'), 'dt')
+    >>> df = spark.createDataFrame([('2015-04-08 15:00:00',)], ['ts'])
+    >>> df.select(
+    ...     '*',
+    ...     sf.convert_timezone(sf.lit('Asia/Hong_Kong'), sf.lit('America/Los_Angeles'), df.ts)
     ... ).show()
-    +---------------------------------------------------------+
-    |convert_timezone(America/Los_Angeles, Asia/Hong_Kong, dt)|
-    +---------------------------------------------------------+
-    |                                      2015-04-08 15:00:00|
-    +---------------------------------------------------------+
+    +-------------------+---------------------------------------------------------+
+    |                 ts|convert_timezone(Asia/Hong_Kong, America/Los_Angeles, ts)|
+    +-------------------+---------------------------------------------------------+
+    |2015-04-08 15:00:00|                                      2015-04-08 00:00:00|
+    +-------------------+---------------------------------------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if sourceTz is None:
         return _invoke_function_over_columns("convert_timezone", targetTz, sourceTs)
@@ -19612,13 +22846,13 @@ def make_dt_interval(
 
     Parameters
     ----------
-    days : :class:`~pyspark.sql.Column` or str, optional
+    days : :class:`~pyspark.sql.Column` or column name, optional
         The number of days, positive or negative.
-    hours : :class:`~pyspark.sql.Column` or str, optional
+    hours : :class:`~pyspark.sql.Column` or column name, optional
         The number of hours, positive or negative.
-    mins : :class:`~pyspark.sql.Column` or str, optional
+    mins : :class:`~pyspark.sql.Column` or column name, optional
         The number of minutes, positive or negative.
-    secs : :class:`~pyspark.sql.Column` or str, optional
+    secs : :class:`~pyspark.sql.Column` or column name, optional
         The number of seconds with the fractional part in microsecond precision.
 
     Returns
@@ -19626,63 +22860,62 @@ def make_dt_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains a DayTimeIntervalType duration.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
+    Examples
+    --------
     Example 1: Make DayTimeIntervalType duration from days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour, df.min, df.sec)).show(truncate=False)
-    +------------------------------------------+
-    |make_dt_interval(day, hour, min, sec)     |
-    +------------------------------------------+
-    |INTERVAL '1 12:30:01.001001' DAY TO SECOND|
-    +------------------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, df.hour, df.min, df.sec)).show(truncate=False)
+    +---+----+---+--------+------------------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, min, sec)     |
+    +---+----+---+--------+------------------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:30:01.001001' DAY TO SECOND|
+    +---+----+---+--------+------------------------------------------+
 
     Example 2: Make DayTimeIntervalType duration from days, hours and mins.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour, df.min)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, hour, min, 0)|
-    +-----------------------------------+
-    |INTERVAL '1 12:30:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, 'hour', df.min)).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, min, 0)|
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:30:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
     Example 3: Make DayTimeIntervalType duration from days and hours.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day, df.hour)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, hour, 0, 0)  |
-    +-----------------------------------+
-    |INTERVAL '1 12:00:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval(df.day, df.hour)).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, hour, 0, 0)  |
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 12:00:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
     Example 4: Make DayTimeIntervalType duration from days.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval(df.day)).show(truncate=False)
-    +-----------------------------------+
-    |make_dt_interval(day, 0, 0, 0)     |
-    +-----------------------------------+
-    |INTERVAL '1 00:00:00' DAY TO SECOND|
-    +-----------------------------------+
+    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]], ['day', 'hour', 'min', 'sec'])
+    >>> df.select('*', sf.make_dt_interval('day')).show(truncate=False)
+    +---+----+---+--------+-----------------------------------+
+    |day|hour|min|sec     |make_dt_interval(day, 0, 0, 0)     |
+    +---+----+---+--------+-----------------------------------+
+    |1  |12  |30 |1.001001|INTERVAL '1 00:00:00' DAY TO SECOND|
+    +---+----+---+--------+-----------------------------------+
 
-    Example 5: Make DayTimeIntervalType duration.
+    Example 5: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[1, 12, 30, 01.001001]],
-    ...     ["day", "hour", "min", "sec"])
-    >>> df.select(sf.make_dt_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_dt_interval()).show(truncate=False)
     +-----------------------------------+
     |make_dt_interval(0, 0, 0, 0)       |
     +-----------------------------------+
@@ -19696,6 +22929,174 @@ def make_dt_interval(
     return _invoke_function_over_columns("make_dt_interval", _days, _hours, _mins, _secs)
 
 
+@_try_remote_functions
+def try_make_interval(
+    years: Optional["ColumnOrName"] = None,
+    months: Optional["ColumnOrName"] = None,
+    weeks: Optional["ColumnOrName"] = None,
+    days: Optional["ColumnOrName"] = None,
+    hours: Optional["ColumnOrName"] = None,
+    mins: Optional["ColumnOrName"] = None,
+    secs: Optional["ColumnOrName"] = None,
+) -> Column:
+    """
+    This is a special version of `make_interval` that performs the same operation, but returns a
+    NULL value instead of raising an error if interval cannot be created.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    years : :class:`~pyspark.sql.Column` or column name, optional
+        The number of years, positive or negative.
+    months : :class:`~pyspark.sql.Column` or column name, optional
+        The number of months, positive or negative.
+    weeks : :class:`~pyspark.sql.Column` or column name, optional
+        The number of weeks, positive or negative.
+    days : :class:`~pyspark.sql.Column` or column name, optional
+        The number of days, positive or negative.
+    hours : :class:`~pyspark.sql.Column` or column name, optional
+        The number of hours, positive or negative.
+    mins : :class:`~pyspark.sql.Column` or column name, optional
+        The number of minutes, positive or negative.
+    secs : :class:`~pyspark.sql.Column` or column name, optional
+        The number of seconds with the fractional part in microsecond precision.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        A new column that contains an interval.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
+
+    Examples
+    --------
+    Example 1: Try make interval from years, months, weeks, days, hours, mins and secs.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, 'hour', df.min, df.sec)
+    ... ).show(truncate=False)
+    +---------------------------------------------------------------+
+    |try_make_interval(year, month, week, day, hour, min, sec)      |
+    +---------------------------------------------------------------+
+    |100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds|
+    +---------------------------------------------------------------+
+
+    Example 2: Try make interval from years, months, weeks, days, hours and mins.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, df.hour, df.min)
+    ... ).show(truncate=False)
+    +-------------------------------------------------------+
+    |try_make_interval(year, month, week, day, hour, min, 0)|
+    +-------------------------------------------------------+
+    |100 years 11 months 8 days 12 hours 30 minutes         |
+    +-------------------------------------------------------+
+
+    Example 3: Try make interval from years, months, weeks, days and hours.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_interval(df.year, df.month, 'week', df.day, df.hour)
+    ... ).show(truncate=False)
+    +-----------------------------------------------------+
+    |try_make_interval(year, month, week, day, hour, 0, 0)|
+    +-----------------------------------------------------+
+    |100 years 11 months 8 days 12 hours                  |
+    +-----------------------------------------------------+
+
+    Example 4: Try make interval from years, months, weeks and days.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month', df.week, df.day)).show(truncate=False)
+    +--------------------------------------------------+
+    |try_make_interval(year, month, week, day, 0, 0, 0)|
+    +--------------------------------------------------+
+    |100 years 11 months 8 days                        |
+    +--------------------------------------------------+
+
+    Example 5: Try make interval from years, months and weeks.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month', df.week)).show(truncate=False)
+    +------------------------------------------------+
+    |try_make_interval(year, month, week, 0, 0, 0, 0)|
+    +------------------------------------------------+
+    |100 years 11 months 7 days                      |
+    +------------------------------------------------+
+
+    Example 6: Try make interval from years and months.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year, 'month')).show(truncate=False)
+    +---------------------------------------------+
+    |try_make_interval(year, month, 0, 0, 0, 0, 0)|
+    +---------------------------------------------+
+    |100 years 11 months                          |
+    +---------------------------------------------+
+
+    Example 7: Try make interval from years.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.try_make_interval(df.year)).show(truncate=False)
+    +-----------------------------------------+
+    |try_make_interval(year, 0, 0, 0, 0, 0, 0)|
+    +-----------------------------------------+
+    |100 years                                |
+    +-----------------------------------------+
+
+    Example 8: Try make empty interval.
+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.try_make_interval()).show(truncate=False)
+    +--------------------------------------+
+    |try_make_interval(0, 0, 0, 0, 0, 0, 0)|
+    +--------------------------------------+
+    |0 seconds                             |
+    +--------------------------------------+
+
+    Example 9: Try make interval from years with overflow.
+
+    >>> import pyspark.sql.functions as sf
+    >>> spark.range(1).select(sf.try_make_interval(sf.lit(2147483647))).show(truncate=False)
+    +-----------------------------------------------+
+    |try_make_interval(2147483647, 0, 0, 0, 0, 0, 0)|
+    +-----------------------------------------------+
+    |NULL                                           |
+    +-----------------------------------------------+
+    """
+    _years = lit(0) if years is None else years
+    _months = lit(0) if months is None else months
+    _weeks = lit(0) if weeks is None else weeks
+    _days = lit(0) if days is None else days
+    _hours = lit(0) if hours is None else hours
+    _mins = lit(0) if mins is None else mins
+    _secs = lit(decimal.Decimal(0)) if secs is None else secs
+    return _invoke_function_over_columns(
+        "try_make_interval", _years, _months, _weeks, _days, _hours, _mins, _secs
+    )
+
+
 @_try_remote_functions
 def make_interval(
     years: Optional["ColumnOrName"] = None,
@@ -19713,19 +23114,19 @@ def make_interval(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str, optional
+    years : :class:`~pyspark.sql.Column` or column name, optional
         The number of years, positive or negative.
-    months : :class:`~pyspark.sql.Column` or str, optional
+    months : :class:`~pyspark.sql.Column` or column name, optional
         The number of months, positive or negative.
-    weeks : :class:`~pyspark.sql.Column` or str, optional
+    weeks : :class:`~pyspark.sql.Column` or column name, optional
         The number of weeks, positive or negative.
-    days : :class:`~pyspark.sql.Column` or str, optional
+    days : :class:`~pyspark.sql.Column` or column name, optional
         The number of days, positive or negative.
-    hours : :class:`~pyspark.sql.Column` or str, optional
+    hours : :class:`~pyspark.sql.Column` or column name, optional
         The number of hours, positive or negative.
-    mins : :class:`~pyspark.sql.Column` or str, optional
+    mins : :class:`~pyspark.sql.Column` or column name, optional
         The number of minutes, positive or negative.
-    secs : :class:`~pyspark.sql.Column` or str, optional
+    secs : :class:`~pyspark.sql.Column` or column name, optional
         The number of seconds with the fractional part in microsecond precision.
 
     Returns
@@ -19733,16 +23134,21 @@ def make_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains an interval.
 
-    Examples
+    See Also
     --------
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.make_ym_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
+    Examples
+    --------
     Example 1: Make interval from years, months, weeks, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +---------------------------------------------------------------+
     |make_interval(year, month, week, day, hour, min, sec)          |
@@ -19754,9 +23160,9 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour, df.min)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour, df.min)
     ... ).show(truncate=False)
     +---------------------------------------------------+
     |make_interval(year, month, week, day, hour, min, 0)|
@@ -19768,9 +23174,9 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(
-    ...     df.year, df.month, df.week, df.day, df.hour)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_interval(df.year, df.month, 'week', df.day, df.hour)
     ... ).show(truncate=False)
     +-------------------------------------------------+
     |make_interval(year, month, week, day, hour, 0, 0)|
@@ -19782,8 +23188,8 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(df.year, df.month, df.week, df.day)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.make_interval(df.year, df.month, 'week', df.day)).show(truncate=False)
     +----------------------------------------------+
     |make_interval(year, month, week, day, 0, 0, 0)|
     +----------------------------------------------+
@@ -19794,8 +23200,8 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval(df.year, df.month, df.week)).show(truncate=False)
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(sf.make_interval(df.year, df.month, 'week')).show(truncate=False)
     +--------------------------------------------+
     |make_interval(year, month, week, 0, 0, 0, 0)|
     +--------------------------------------------+
@@ -19806,7 +23212,7 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
     >>> df.select(sf.make_interval(df.year, df.month)).show(truncate=False)
     +-----------------------------------------+
     |make_interval(year, month, 0, 0, 0, 0, 0)|
@@ -19818,7 +23224,7 @@ def make_interval(
 
     >>> import pyspark.sql.functions as sf
     >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
+    ...     ['year', 'month', 'week', 'day', 'hour', 'min', 'sec'])
     >>> df.select(sf.make_interval(df.year)).show(truncate=False)
     +-------------------------------------+
     |make_interval(year, 0, 0, 0, 0, 0, 0)|
@@ -19826,12 +23232,10 @@ def make_interval(
     |100 years                            |
     +-------------------------------------+
 
-    Example 8: Make interval.
+    Example 8: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> df = spark.createDataFrame([[100, 11, 1, 1, 12, 30, 01.001001]],
-    ...     ["year", "month", "week", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_interval()).show(truncate=False)
     +----------------------------------+
     |make_interval(0, 0, 0, 0, 0, 0, 0)|
     +----------------------------------+
@@ -19870,22 +23274,22 @@ def make_timestamp(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str
+    years : :class:`~pyspark.sql.Column` or column name
         The year to represent, from 1 to 9999
-    months : :class:`~pyspark.sql.Column` or str
+    months : :class:`~pyspark.sql.Column` or column name
         The month-of-year to represent, from 1 (January) to 12 (December)
-    days : :class:`~pyspark.sql.Column` or str
+    days : :class:`~pyspark.sql.Column` or column name
         The day-of-month to represent, from 1 to 31
-    hours : :class:`~pyspark.sql.Column` or str
+    hours : :class:`~pyspark.sql.Column` or column name
         The hour-of-day to represent, from 0 to 23
-    mins : :class:`~pyspark.sql.Column` or str
+    mins : :class:`~pyspark.sql.Column` or column name
         The minute-of-hour to represent, from 0 to 59
-    secs : :class:`~pyspark.sql.Column` or str
+    secs : :class:`~pyspark.sql.Column` or column name
         The second-of-minute and its micro-fraction to represent, from 0 to 60.
         The value can be either an integer like 13 , or a fraction like 13.123.
         If the sec argument equals to 60, the seconds field is set
         to 0 and 1 minute is added to the final timestamp.
-    timezone : :class:`~pyspark.sql.Column` or str, optional
+    timezone : :class:`~pyspark.sql.Column` or column name, optional
         The time zone identifier. For example, CET, UTC and etc.
 
     Returns
@@ -19893,38 +23297,48 @@ def make_timestamp(
     :class:`~pyspark.sql.Column`
         A new column that contains a timestamp.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec, 'tz')
     ... ).show(truncate=False)
-    +----------------------------------------------------------+
-    |make_timestamp(year, month, day, hour, min, sec, timezone)|
-    +----------------------------------------------------------+
-    |2014-12-27 21:30:45.887                                   |
-    +----------------------------------------------------------+
+    +----------------------------------------------------+
+    |make_timestamp(year, month, day, hour, min, sec, tz)|
+    +----------------------------------------------------+
+    |2014-12-27 21:30:45.887                             |
+    +----------------------------------------------------+
 
     Example 2: Make timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
     ... ).show(truncate=False)
     +------------------------------------------------+
     |make_timestamp(year, month, day, hour, min, sec)|
     +------------------------------------------------+
     |2014-12-28 06:30:45.887                         |
     +------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -19937,6 +23351,117 @@ def make_timestamp(
         )
 
 
+@_try_remote_functions
+def try_make_timestamp(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: Optional["ColumnOrName"] = None,
+) -> Column:
+    """
+    Try to create timestamp from years, months, days, hours, mins, secs and timezone fields.
+    The result data type is consistent with the value of configuration `spark.sql.timestampType`.
+    The function returns NULL on invalid inputs.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    years : :class:`~pyspark.sql.Column` or column name
+        The year to represent, from 1 to 9999
+    months : :class:`~pyspark.sql.Column` or column name
+        The month-of-year to represent, from 1 (January) to 12 (December)
+    days : :class:`~pyspark.sql.Column` or column name
+        The day-of-month to represent, from 1 to 31
+    hours : :class:`~pyspark.sql.Column` or column name
+        The hour-of-day to represent, from 0 to 23
+    mins : :class:`~pyspark.sql.Column` or column name
+        The minute-of-hour to represent, from 0 to 59
+    secs : :class:`~pyspark.sql.Column` or column name
+        The second-of-minute and its micro-fraction to represent, from 0 to 60.
+        The value can be either an integer like 13 , or a fraction like 13.123.
+        If the sec argument equals to 60, the seconds field is set
+        to 0 and 1 minute is added to the final timestamp.
+    timezone : :class:`~pyspark.sql.Column` or column name, optional
+        The time zone identifier. For example, CET, UTC and etc.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        A new column that contains a timestamp or NULL in case of an error.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+    Example 1: Make timestamp from years, months, days, hours, mins and secs.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec, 'tz')
+    ... ).show(truncate=False)
+    +----------------------------------------------------+
+    |try_make_timestamp(year, month, day, hour, min, sec)|
+    +----------------------------------------------------+
+    |2014-12-27 21:30:45.887                             |
+    +----------------------------------------------------+
+
+    Example 2: Make timestamp without timezone.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
+    ... ).show(truncate=False)
+    +----------------------------------------------------+
+    |try_make_timestamp(year, month, day, hour, min, sec)|
+    +----------------------------------------------------+
+    |2014-12-28 06:30:45.887                             |
+    +----------------------------------------------------+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+
+    Example 3: Make timestamp with invalid input.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp(df.year, df.month, df.day, 'hour', df.min, df.sec)
+    ... ).show(truncate=False)
+    +----------------------------------------------------+
+    |try_make_timestamp(year, month, day, hour, min, sec)|
+    +----------------------------------------------------+
+    |NULL                                                |
+    +----------------------------------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    if timezone is not None:
+        return _invoke_function_over_columns(
+            "try_make_timestamp", years, months, days, hours, mins, secs, timezone
+        )
+    else:
+        return _invoke_function_over_columns(
+            "try_make_timestamp", years, months, days, hours, mins, secs
+        )
+
+
 @_try_remote_functions
 def make_timestamp_ltz(
     years: "ColumnOrName",
@@ -19979,38 +23504,48 @@ def make_timestamp_ltz(
     :class:`~pyspark.sql.Column`
         A new column that contains a current timestamp.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make the current timestamp from years, months, days, hours, mins and secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec, df.timezone)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp_ltz(df.year, df.month, 'day', df.hour, df.min, df.sec, 'tz')
     ... ).show(truncate=False)
-    +--------------------------------------------------------------+
-    |make_timestamp_ltz(year, month, day, hour, min, sec, timezone)|
-    +--------------------------------------------------------------+
-    |2014-12-27 21:30:45.887                                       |
-    +--------------------------------------------------------------+
+    +--------------------------------------------------------+
+    |make_timestamp_ltz(year, month, day, hour, min, sec, tz)|
+    +--------------------------------------------------------+
+    |2014-12-27 21:30:45.887                                 |
+    +--------------------------------------------------------+
 
     Example 2: Make the current timestamp without timezone.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
-    ...     ["year", "month", "day", "hour", "min", "sec", "timezone"])
-    >>> df.select(sf.make_timestamp_ltz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.make_timestamp_ltz(df.year, df.month, 'day', df.hour, df.min, df.sec)
     ... ).show(truncate=False)
     +----------------------------------------------------+
     |make_timestamp_ltz(year, month, day, hour, min, sec)|
     +----------------------------------------------------+
     |2014-12-28 06:30:45.887                             |
     +----------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     if timezone is not None:
@@ -20023,6 +23558,116 @@ def make_timestamp_ltz(
         )
 
 
+@_try_remote_functions
+def try_make_timestamp_ltz(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+    timezone: Optional["ColumnOrName"] = None,
+) -> Column:
+    """
+    Try to create the current timestamp with local time zone from years, months, days, hours, mins,
+    secs and timezone fields.
+    The function returns NULL on invalid inputs.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    years : :class:`~pyspark.sql.Column` or column name
+        The year to represent, from 1 to 9999
+    months : :class:`~pyspark.sql.Column` or column name
+        The month-of-year to represent, from 1 (January) to 12 (December)
+    days : :class:`~pyspark.sql.Column` or column name
+        The day-of-month to represent, from 1 to 31
+    hours : :class:`~pyspark.sql.Column` or column name
+        The hour-of-day to represent, from 0 to 23
+    mins : :class:`~pyspark.sql.Column` or column name
+        The minute-of-hour to represent, from 0 to 59
+    secs : :class:`~pyspark.sql.Column` or column name
+        The second-of-minute and its micro-fraction to represent, from 0 to 60.
+        The value can be either an integer like 13 , or a fraction like 13.123.
+        If the sec argument equals to 60, the seconds field is set
+        to 0 and 1 minute is added to the final timestamp.
+    timezone : :class:`~pyspark.sql.Column` or column name, optional
+        The time zone identifier. For example, CET, UTC and etc.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        A new column that contains a current timestamp, or NULL in case of an error.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+    Example 1: Make the current timestamp from years, months, days, hours, mins and secs.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec, 'tz')
+    ... ).show(truncate=False)
+    +------------------------------------------------------------+
+    |try_make_timestamp_ltz(year, month, day, hour, min, sec, tz)|
+    +------------------------------------------------------------+
+    |2014-12-27 21:30:45.887                                     |
+    +------------------------------------------------------------+
+
+    Example 2: Make the current timestamp without timezone.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec)
+    ... ).show(truncate=False)
+    +--------------------------------------------------------+
+    |try_make_timestamp_ltz(year, month, day, hour, min, sec)|
+    +--------------------------------------------------------+
+    |2014-12-28 06:30:45.887                                 |
+    +--------------------------------------------------------+
+
+    Example 3: Make the current timestamp with invalid input.
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887, 'CET']],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec', 'tz'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ltz('year', 'month', df.day, df.hour, df.min, df.sec)
+    ... ).show(truncate=False)
+    +--------------------------------------------------------+
+    |try_make_timestamp_ltz(year, month, day, hour, min, sec)|
+    +--------------------------------------------------------+
+    |NULL                                                    |
+    +--------------------------------------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    if timezone is not None:
+        return _invoke_function_over_columns(
+            "try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone
+        )
+    else:
+        return _invoke_function_over_columns(
+            "try_make_timestamp_ltz", years, months, days, hours, mins, secs
+        )
+
+
 @_try_remote_functions
 def make_timestamp_ntz(
     years: "ColumnOrName",
@@ -20041,17 +23686,88 @@ def make_timestamp_ntz(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str
+    years : :class:`~pyspark.sql.Column` or column name
         The year to represent, from 1 to 9999
-    months : :class:`~pyspark.sql.Column` or str
+    months : :class:`~pyspark.sql.Column` or column name
         The month-of-year to represent, from 1 (January) to 12 (December)
-    days : :class:`~pyspark.sql.Column` or str
+    days : :class:`~pyspark.sql.Column` or column name
         The day-of-month to represent, from 1 to 31
-    hours : :class:`~pyspark.sql.Column` or str
+    hours : :class:`~pyspark.sql.Column` or column name
         The hour-of-day to represent, from 0 to 23
-    mins : :class:`~pyspark.sql.Column` or str
+    mins : :class:`~pyspark.sql.Column` or column name
+        The minute-of-hour to represent, from 0 to 59
+    secs : :class:`~pyspark.sql.Column` or column name
+        The second-of-minute and its micro-fraction to represent, from 0 to 60.
+        The value can be either an integer like 13 , or a fraction like 13.123.
+        If the sec argument equals to 60, the seconds field is set
+        to 0 and 1 minute is added to the final timestamp.
+
+    Returns
+    -------
+    :class:`~pyspark.sql.Column`
+        A new column that contains a local date-time.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
+    Examples
+    --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
+    ... ).show(truncate=False)
+    +----------------------------------------------------+
+    |make_timestamp_ntz(year, month, day, hour, min, sec)|
+    +----------------------------------------------------+
+    |2014-12-28 06:30:45.887                             |
+    +----------------------------------------------------+
+
+    >>> spark.conf.unset("spark.sql.session.timeZone")
+    """
+    return _invoke_function_over_columns(
+        "make_timestamp_ntz", years, months, days, hours, mins, secs
+    )
+
+
+@_try_remote_functions
+def try_make_timestamp_ntz(
+    years: "ColumnOrName",
+    months: "ColumnOrName",
+    days: "ColumnOrName",
+    hours: "ColumnOrName",
+    mins: "ColumnOrName",
+    secs: "ColumnOrName",
+) -> Column:
+    """
+    Try to create local date-time from years, months, days, hours, mins, secs fields.
+    The function returns NULL on invalid inputs.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    years : :class:`~pyspark.sql.Column` or column name
+        The year to represent, from 1 to 9999
+    months : :class:`~pyspark.sql.Column` or column name
+        The month-of-year to represent, from 1 (January) to 12 (December)
+    days : :class:`~pyspark.sql.Column` or column name
+        The day-of-month to represent, from 1 to 31
+    hours : :class:`~pyspark.sql.Column` or column name
+        The hour-of-day to represent, from 0 to 23
+    mins : :class:`~pyspark.sql.Column` or column name
         The minute-of-hour to represent, from 0 to 59
-    secs : :class:`~pyspark.sql.Column` or str
+    secs : :class:`~pyspark.sql.Column` or column name
         The second-of-minute and its micro-fraction to represent, from 0 to 60.
         The value can be either an integer like 13 , or a fraction like 13.123.
         If the sec argument equals to 60, the seconds field is set
@@ -20060,29 +23776,54 @@ def make_timestamp_ntz(
     Returns
     -------
     :class:`~pyspark.sql.Column`
-        A new column that contains a local date-time.
+        A new column that contains a local date-time, or NULL in case of an error.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_timestamp`
+    :meth:`pyspark.sql.functions.make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_timestamp_ntz`
+    :meth:`pyspark.sql.functions.try_make_timestamp`
+    :meth:`pyspark.sql.functions.try_make_timestamp_ltz`
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
 
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make local date-time from years, months, days, hours, mins, secs.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
     >>> df = spark.createDataFrame([[2014, 12, 28, 6, 30, 45.887]],
-    ...     ["year", "month", "day", "hour", "min", "sec"])
-    >>> df.select(sf.make_timestamp_ntz(
-    ...     df.year, df.month, df.day, df.hour, df.min, df.sec)
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
     ... ).show(truncate=False)
-    +----------------------------------------------------+
-    |make_timestamp_ntz(year, month, day, hour, min, sec)|
-    +----------------------------------------------------+
-    |2014-12-28 06:30:45.887                             |
-    +----------------------------------------------------+
+    +--------------------------------------------------------+
+    |try_make_timestamp_ntz(year, month, day, hour, min, sec)|
+    +--------------------------------------------------------+
+    |2014-12-28 06:30:45.887                                 |
+    +--------------------------------------------------------+
+
+    Example 2: Make local date-time with invalid input
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([[2014, 13, 28, 6, 30, 45.887]],
+    ...     ['year', 'month', 'day', 'hour', 'min', 'sec'])
+    >>> df.select(
+    ...     sf.try_make_timestamp_ntz('year', 'month', df.day, df.hour, df.min, df.sec)
+    ... ).show(truncate=False)
+    +--------------------------------------------------------+
+    |try_make_timestamp_ntz(year, month, day, hour, min, sec)|
+    +--------------------------------------------------------+
+    |NULL                                                    |
+    +--------------------------------------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     return _invoke_function_over_columns(
-        "make_timestamp_ntz", years, months, days, hours, mins, secs
+        "try_make_timestamp_ntz", years, months, days, hours, mins, secs
     )
 
 
@@ -20098,9 +23839,9 @@ def make_ym_interval(
 
     Parameters
     ----------
-    years : :class:`~pyspark.sql.Column` or str, optional
+    years : :class:`~pyspark.sql.Column` or column name, optional
         The number of years, positive or negative
-    months : :class:`~pyspark.sql.Column` or str, optional
+    months : :class:`~pyspark.sql.Column` or column name, optional
         The number of months, positive or negative
 
     Returns
@@ -20108,44 +23849,48 @@ def make_ym_interval(
     :class:`~pyspark.sql.Column`
         A new column that contains a year-month interval.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.make_interval`
+    :meth:`pyspark.sql.functions.make_dt_interval`
+    :meth:`pyspark.sql.functions.try_make_interval`
+
     Examples
     --------
+    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
 
     Example 1: Make year-month interval from years, months.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval(df.year, df.month)).show(truncate=False)
-    +-------------------------------+
-    |make_ym_interval(year, month)  |
-    +-------------------------------+
-    |INTERVAL '2015-0' YEAR TO MONTH|
-    +-------------------------------+
+    >>> df = spark.createDataFrame([[2014, 12]], ['year', 'month'])
+    >>> df.select('*', sf.make_ym_interval('year', df.month)).show(truncate=False)
+    +----+-----+-------------------------------+
+    |year|month|make_ym_interval(year, month)  |
+    +----+-----+-------------------------------+
+    |2014|12   |INTERVAL '2015-0' YEAR TO MONTH|
+    +----+-----+-------------------------------+
 
     Example 2: Make year-month interval from years.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval(df.year)).show(truncate=False)
-    +-------------------------------+
-    |make_ym_interval(year, 0)      |
-    +-------------------------------+
-    |INTERVAL '2014-0' YEAR TO MONTH|
-    +-------------------------------+
+    >>> df = spark.createDataFrame([[2014, 12]], ['year', 'month'])
+    >>> df.select('*', sf.make_ym_interval(df.year)).show(truncate=False)
+    +----+-----+-------------------------------+
+    |year|month|make_ym_interval(year, 0)      |
+    +----+-----+-------------------------------+
+    |2014|12   |INTERVAL '2014-0' YEAR TO MONTH|
+    +----+-----+-------------------------------+
 
-    Example 3: Make year-month interval.
+    Example 3: Make empty interval.
 
     >>> import pyspark.sql.functions as sf
-    >>> spark.conf.set("spark.sql.session.timeZone", "America/Los_Angeles")
-    >>> df = spark.createDataFrame([[2014, 12]], ["year", "month"])
-    >>> df.select(sf.make_ym_interval()).show(truncate=False)
+    >>> spark.range(1).select(sf.make_ym_interval()).show(truncate=False)
     +----------------------------+
     |make_ym_interval(0, 0)      |
     +----------------------------+
     |INTERVAL '0-0' YEAR TO MONTH|
     +----------------------------+
+
     >>> spark.conf.unset("spark.sql.session.timeZone")
     """
     _years = lit(0) if years is None else years
@@ -20348,7 +24093,7 @@ def hll_sketch_agg(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
     lgConfigK : :class:`~pyspark.sql.Column` or int, optional
         The log-base-2 of K, where K is the number of buckets or slots for the HllSketch
 
@@ -20357,33 +24102,29 @@ def hll_sketch_agg(
     :class:`~pyspark.sql.Column`
         The binary representation of the HllSketch.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hll_union`
+    :meth:`pyspark.sql.functions.hll_union_agg`
+    :meth:`pyspark.sql.functions.hll_sketch_estimate`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([1,2,2,3], "INT")
-    >>> df1 = df.agg(hll_sketch_estimate(hll_sketch_agg("value")).alias("distinct_cnt"))
-    >>> df1.show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           3|
-    +------------+
-    >>> df2 = df.agg(hll_sketch_estimate(
-    ...     hll_sketch_agg("value", lit(12))
-    ... ).alias("distinct_cnt"))
-    >>> df2.show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           3|
-    +------------+
-    >>> df3 = df.agg(hll_sketch_estimate(
-    ...     hll_sketch_agg(col("value"), lit(12))).alias("distinct_cnt"))
-    >>> df3.show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           3|
-    +------------+
+    >>> df.agg(sf.hll_sketch_estimate(sf.hll_sketch_agg("value"))).show()
+    +----------------------------------------------+
+    |hll_sketch_estimate(hll_sketch_agg(value, 12))|
+    +----------------------------------------------+
+    |                                             3|
+    +----------------------------------------------+
+
+    >>> df.agg(sf.hll_sketch_estimate(sf.hll_sketch_agg("value", 12))).show()
+    +----------------------------------------------+
+    |hll_sketch_estimate(hll_sketch_agg(value, 12))|
+    +----------------------------------------------+
+    |                                             3|
+    +----------------------------------------------+
     """
     if lgConfigK is None:
         return _invoke_function_over_columns("hll_sketch_agg", col)
@@ -20406,7 +24147,7 @@ def hll_union_agg(
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
     allowDifferentLgConfigK : :class:`~pyspark.sql.Column` or bool, optional
         Allow sketches with different lgConfigK values to be merged (defaults to false).
 
@@ -20415,39 +24156,33 @@ def hll_union_agg(
     :class:`~pyspark.sql.Column`
         The binary representation of the merged HllSketch.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hll_union`
+    :meth:`pyspark.sql.functions.hll_sketch_agg`
+    :meth:`pyspark.sql.functions.hll_sketch_estimate`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df1 = spark.createDataFrame([1,2,2,3], "INT")
-    >>> df1 = df1.agg(hll_sketch_agg("value").alias("sketch"))
+    >>> df1 = df1.agg(sf.hll_sketch_agg("value").alias("sketch"))
     >>> df2 = spark.createDataFrame([4,5,5,6], "INT")
-    >>> df2 = df2.agg(hll_sketch_agg("value").alias("sketch"))
-    >>> df3 = df1.union(df2).agg(hll_sketch_estimate(
-    ...     hll_union_agg("sketch")
-    ... ).alias("distinct_cnt"))
-    >>> df3.drop("sketch").show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           6|
-    +------------+
-    >>> df4 = df1.union(df2).agg(hll_sketch_estimate(
-    ...     hll_union_agg("sketch", lit(False))
-    ... ).alias("distinct_cnt"))
-    >>> df4.drop("sketch").show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           6|
-    +------------+
-    >>> df5 = df1.union(df2).agg(hll_sketch_estimate(
-    ...     hll_union_agg(col("sketch"), lit(False))
-    ... ).alias("distinct_cnt"))
-    >>> df5.drop("sketch").show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           6|
-    +------------+
+    >>> df2 = df2.agg(sf.hll_sketch_agg("value").alias("sketch"))
+    >>> df3 = df1.union(df2)
+    >>> df3.agg(sf.hll_sketch_estimate(sf.hll_union_agg("sketch"))).show()
+    +-------------------------------------------------+
+    |hll_sketch_estimate(hll_union_agg(sketch, false))|
+    +-------------------------------------------------+
+    |                                                6|
+    +-------------------------------------------------+
+
+    >>> df3.agg(sf.hll_sketch_estimate(sf.hll_union_agg("sketch", False))).show()
+    +-------------------------------------------------+
+    |hll_sketch_estimate(hll_union_agg(sketch, false))|
+    +-------------------------------------------------+
+    |                                                6|
+    +-------------------------------------------------+
     """
     if allowDifferentLgConfigK is None:
         return _invoke_function_over_columns("hll_union_agg", col)
@@ -20465,23 +24200,29 @@ def hll_sketch_estimate(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
 
     Returns
     -------
     :class:`~pyspark.sql.Column`
         The estimated number of unique values for the HllSketch.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hll_union`
+    :meth:`pyspark.sql.functions.hll_union_agg`
+    :meth:`pyspark.sql.functions.hll_sketch_agg`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([1,2,2,3], "INT")
-    >>> df = df.agg(hll_sketch_estimate(hll_sketch_agg("value")).alias("distinct_cnt"))
-    >>> df.show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           3|
-    +------------+
+    >>> df.agg(sf.hll_sketch_estimate(sf.hll_sketch_agg("value"))).show()
+    +----------------------------------------------+
+    |hll_sketch_estimate(hll_sketch_agg(value, 12))|
+    +----------------------------------------------+
+    |                                             3|
+    +----------------------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
@@ -20501,8 +24242,8 @@ def hll_union(
 
     Parameters
     ----------
-    col1 : :class:`~pyspark.sql.Column` or str
-    col2 : :class:`~pyspark.sql.Column` or str
+    col1 : :class:`~pyspark.sql.Column` or column name
+    col2 : :class:`~pyspark.sql.Column` or column name
     allowDifferentLgConfigK : bool, optional
         Allow sketches with different lgConfigK values to be merged (defaults to false).
 
@@ -20511,23 +24252,35 @@ def hll_union(
     :class:`~pyspark.sql.Column`
         The binary representation of the merged HllSketch.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.hll_union_agg`
+    :meth:`pyspark.sql.functions.hll_sketch_agg`
+    :meth:`pyspark.sql.functions.hll_sketch_estimate`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1,4),(2,5),(2,5),(3,6)], "struct<v1:int,v2:int>")
-    >>> df = df.agg(hll_sketch_agg("v1").alias("sketch1"), hll_sketch_agg("v2").alias("sketch2"))
-    >>> df = df.withColumn("distinct_cnt", hll_sketch_estimate(hll_union("sketch1", "sketch2")))
-    >>> df.drop("sketch1", "sketch2").show()
-    +------------+
-    |distinct_cnt|
-    +------------+
-    |           6|
-    +------------+
+    >>> df = df.agg(
+    ...     sf.hll_sketch_agg("v1").alias("sketch1"),
+    ...     sf.hll_sketch_agg("v2").alias("sketch2")
+    ... )
+    >>> df.select(sf.hll_sketch_estimate(sf.hll_union(df.sketch1, "sketch2"))).show()
+    +-------------------------------------------------------+
+    |hll_sketch_estimate(hll_union(sketch1, sketch2, false))|
+    +-------------------------------------------------------+
+    |                                                      6|
+    +-------------------------------------------------------+
     """
     from pyspark.sql.classic.column import _to_java_column
 
     if allowDifferentLgConfigK is not None:
         return _invoke_function(
-            "hll_union", _to_java_column(col1), _to_java_column(col2), allowDifferentLgConfigK
+            "hll_union",
+            _to_java_column(col1),
+            _to_java_column(col2),
+            _enum_to_value(allowDifferentLgConfigK),
         )
     else:
         return _invoke_function("hll_union", _to_java_column(col1), _to_java_column(col2))
@@ -20626,6 +24379,31 @@ def nullif(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     return _invoke_function_over_columns("nullif", col1, col2)
 
 
+@_try_remote_functions
+def nullifzero(col: "ColumnOrName") -> Column:
+    """
+    Returns null if `col` is equal to zero, or `col` otherwise.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(0,), (1,)], ["a"])
+    >>> df.select(nullifzero(df.a).alias("result")).show()
+    +------+
+    |result|
+    +------+
+    |  NULL|
+    |     1|
+    +------+
+    """
+    return _invoke_function_over_columns("nullifzero", col)
+
+
 @_try_remote_functions
 def nvl(col1: "ColumnOrName", col2: "ColumnOrName") -> Column:
     """
@@ -20669,6 +24447,31 @@ def nvl2(col1: "ColumnOrName", col2: "ColumnOrName", col3: "ColumnOrName") -> Co
     return _invoke_function_over_columns("nvl2", col1, col2, col3)
 
 
+@_try_remote_functions
+def zeroifnull(col: "ColumnOrName") -> Column:
+    """
+    Returns zero if `col` is null, or `col` otherwise.
+
+    .. versionadded:: 4.0.0
+
+    Parameters
+    ----------
+    col : :class:`~pyspark.sql.Column` or str
+
+    Examples
+    --------
+    >>> df = spark.createDataFrame([(None,), (1,)], ["a"])
+    >>> df.select(zeroifnull(df.a).alias("result")).show()
+    +------+
+    |result|
+    +------+
+    |     0|
+    |     1|
+    +------+
+    """
+    return _invoke_function_over_columns("zeroifnull", col)
+
+
 @_try_remote_functions
 def aes_encrypt(
     input: "ColumnOrName",
@@ -20692,21 +24495,21 @@ def aes_encrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to encrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to encrypt the data.
     mode : :class:`~pyspark.sql.Column` or str, optional
         Specifies which block cipher mode should be used to encrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    iv : :class:`~pyspark.sql.Column` or str, optional
+    iv : :class:`~pyspark.sql.Column` or column name, optional
         Optional initialization vector. Only supported for CBC and GCM modes. Valid values: None or
         "". 16-byte array for CBC mode. 12-byte array for GCM mode.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -20715,6 +24518,11 @@ def aes_encrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains an encrypted value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_decrypt`
+    :meth:`pyspark.sql.functions.try_aes_decrypt`
+
     Examples
     --------
 
@@ -20727,7 +24535,7 @@ def aes_encrypt(
     ...     ["input", "key", "mode", "padding", "iv", "aad"]
     ... )
     >>> df.select(sf.base64(sf.aes_encrypt(
-    ...     df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv, sf.lit("hex")), df.aad)
+    ...     df.input, df.key, "mode", df.padding, sf.to_binary(df.iv, sf.lit("hex")), df.aad)
     ... )).show(truncate=False)
     +-----------------------------------------------------------------------+
     |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), aad))|
@@ -20744,7 +24552,7 @@ def aes_encrypt(
     ...     ["input", "key", "mode", "padding", "iv", "aad"]
     ... )
     >>> df.select(sf.base64(sf.aes_encrypt(
-    ...     df.input, df.key, df.mode, df.padding, sf.to_binary(df.iv, sf.lit("hex")))
+    ...     df.input, df.key, "mode", df.padding, sf.to_binary(df.iv, sf.lit("hex")))
     ... )).show(truncate=False)
     +--------------------------------------------------------------------+
     |base64(aes_encrypt(input, key, mode, padding, to_binary(iv, hex), ))|
@@ -20759,7 +24567,7 @@ def aes_encrypt(
     ...     "Spark SQL", "1234567890abcdef", "ECB", "PKCS",)],
     ...     ["input", "key", "mode", "padding"]
     ... )
-    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode, df.padding),
+    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, "mode", df.padding),
     ...     df.key, df.mode, df.padding
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------------------------------+
@@ -20775,7 +24583,7 @@ def aes_encrypt(
     ...     "Spark SQL", "0000111122223333", "ECB",)],
     ...     ["input", "key", "mode"]
     ... )
-    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, df.mode),
+    >>> df.select(sf.aes_decrypt(sf.aes_encrypt(df.input, df.key, "mode"),
     ...     df.key, df.mode
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------------------------------+
@@ -20826,18 +24634,18 @@ def aes_decrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to decrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to decrypt the data.
-    mode : :class:`~pyspark.sql.Column` or str, optional
+    mode : :class:`~pyspark.sql.Column` or column name, optional
         Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -20846,6 +24654,11 @@ def aes_decrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains a decrypted value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_encrypt`
+    :meth:`pyspark.sql.functions.try_aes_decrypt`
+
     Examples
     --------
 
@@ -20859,7 +24672,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +---------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -20876,7 +24689,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding
     ... ).cast("STRING")).show(truncate=False)
     +------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
@@ -20893,7 +24706,7 @@ def aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode
+    ...     sf.unbase64(df.input), df.key, "mode"
     ... ).cast("STRING")).show(truncate=False)
     +------------------------------------------------------------------+
     |CAST(aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
@@ -20945,18 +24758,18 @@ def try_aes_decrypt(
 
     Parameters
     ----------
-    input : :class:`~pyspark.sql.Column` or str
+    input : :class:`~pyspark.sql.Column` or column name
         The binary value to decrypt.
-    key : :class:`~pyspark.sql.Column` or str
+    key : :class:`~pyspark.sql.Column` or column name
         The passphrase to use to decrypt the data.
-    mode : :class:`~pyspark.sql.Column` or str, optional
+    mode : :class:`~pyspark.sql.Column` or column name, optional
         Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
         GCM, CBC.
-    padding : :class:`~pyspark.sql.Column` or str, optional
+    padding : :class:`~pyspark.sql.Column` or column name, optional
         Specifies how to pad messages whose length is not a multiple of the block size. Valid
         values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
         for CBC.
-    aad : :class:`~pyspark.sql.Column` or str, optional
+    aad : :class:`~pyspark.sql.Column` or column name, optional
         Optional additional authenticated data. Only supported for GCM mode. This can be any
         free-form input and must be provided for both encryption and decryption.
 
@@ -20965,6 +24778,11 @@ def try_aes_decrypt(
     :class:`~pyspark.sql.Column`
         A new column that contains a decrypted value or a NULL value.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.aes_encrypt`
+    :meth:`pyspark.sql.functions.aes_decrypt`
+
     Examples
     --------
 
@@ -20978,7 +24796,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +-------------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -20996,7 +24814,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding", "aad"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding, df.aad
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding, df.aad
     ... ).cast("STRING")).show(truncate=False)
     +-------------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, aad) AS STRING)|
@@ -21013,7 +24831,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode, df.padding
+    ...     sf.unbase64(df.input), df.key, "mode", df.padding
     ... ).cast("STRING")).show(truncate=False)
     +----------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, padding, ) AS STRING)|
@@ -21030,7 +24848,7 @@ def try_aes_decrypt(
     ...     ["input", "key", "mode", "padding"]
     ... )
     >>> df.select(sf.try_aes_decrypt(
-    ...     sf.unbase64(df.input), df.key, df.mode
+    ...     sf.unbase64(df.input), df.key, "mode"
     ... ).cast("STRING")).show(truncate=False)
     +----------------------------------------------------------------------+
     |CAST(try_aes_decrypt(unbase64(input), key, mode, DEFAULT, ) AS STRING)|
@@ -21070,7 +24888,12 @@ def sha(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.sha1`
+    :meth:`pyspark.sql.functions.sha2`
 
     Examples
     --------
@@ -21092,11 +24915,28 @@ def input_file_block_length() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.input_file_name`
+    :meth:`pyspark.sql.functions.input_file_block_start`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.read.text("python/test_support/sql/ages_newlines.csv", lineSep=",")
-    >>> df.select(input_file_block_length().alias('r')).first()
-    Row(r=87)
+    >>> df.select(sf.input_file_block_length()).show()
+    +-------------------------+
+    |input_file_block_length()|
+    +-------------------------+
+    |                       87|
+    |                       87|
+    |                       87|
+    |                       87|
+    |                       87|
+    |                       87|
+    |                       87|
+    |                       87|
+    +-------------------------+
     """
     return _invoke_function_over_columns("input_file_block_length")
 
@@ -21108,11 +24948,28 @@ def input_file_block_start() -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.input_file_name`
+    :meth:`pyspark.sql.functions.input_file_block_length`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.read.text("python/test_support/sql/ages_newlines.csv", lineSep=",")
-    >>> df.select(input_file_block_start().alias('r')).first()
-    Row(r=0)
+    >>> df.select(sf.input_file_block_start()).show()
+    +------------------------+
+    |input_file_block_start()|
+    +------------------------+
+    |                       0|
+    |                       0|
+    |                       0|
+    |                       0|
+    |                       0|
+    |                       0|
+    |                       0|
+    |                       0|
+    +------------------------+
     """
     return _invoke_function_over_columns("input_file_block_start")
 
@@ -21126,18 +24983,28 @@ def reflect(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.java_method`
+    :meth:`pyspark.sql.functions.try_reflect`
 
     Examples
     --------
-    >>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2',)], ['a'])
     >>> df.select(
-    ...     reflect(lit("java.util.UUID"), lit("fromString"), df.a).alias('r')
-    ... ).collect()
-    [Row(r='a5cf6c42-0c85-418f-af6c-3e4e5b1328f2')]
+    ...     sf.reflect(sf.lit('java.util.UUID'), sf.lit('fromString'), 'a')
+    ... ).show(truncate=False)
+    +--------------------------------------+
+    |reflect(java.util.UUID, fromString, a)|
+    +--------------------------------------+
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2  |
+    +--------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("reflect", cols)
 
@@ -21151,13 +25018,20 @@ def java_method(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.reflect`
+    :meth:`pyspark.sql.functions.try_reflect`
 
     Examples
     --------
+    Example 1: Reflecting a method call with a column argument
+
     >>> import pyspark.sql.functions as sf
     >>> spark.range(1).select(
     ...     sf.java_method(
@@ -21171,6 +25045,19 @@ def java_method(*cols: "ColumnOrName") -> Column:
     +-----------------------------------------------------------------------------+
     |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2                                         |
     +-----------------------------------------------------------------------------+
+
+    Example 2: Reflecting a method call with a column name argument
+
+    >>> import pyspark.sql.functions as sf
+    >>> df = spark.createDataFrame([('a5cf6c42-0c85-418f-af6c-3e4e5b1328f2',)], ['a'])
+    >>> df.select(
+    ...     sf.java_method(sf.lit('java.util.UUID'), sf.lit('fromString'), 'a')
+    ... ).show(truncate=False)
+    +------------------------------------------+
+    |java_method(java.util.UUID, fromString, a)|
+    +------------------------------------------+
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2      |
+    +------------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("java_method", cols)
 
@@ -21186,10 +25073,15 @@ def try_reflect(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
-        the first element should be a literal string for the class name,
-        and the second element should be a literal string for the method name,
-        and the remaining are input arguments to the Java method.
+    cols : :class:`~pyspark.sql.Column` or column name
+        the first element should be a Column representing literal string for the class name,
+        and the second element should be a Column representing literal string for the method name,
+        and the remaining are input arguments (Columns or column names) to the Java method.
+
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.reflect`
+    :meth:`pyspark.sql.functions.java_method`
 
     Examples
     --------
@@ -21198,25 +25090,24 @@ def try_reflect(*cols: "ColumnOrName") -> Column:
     >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
     >>> df.select(
-    ...     sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), df.a)
-    ... ).show()
+    ...     sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), "a")
+    ... ).show(truncate=False)
     +------------------------------------------+
     |try_reflect(java.util.UUID, fromString, a)|
     +------------------------------------------+
-    |                      a5cf6c42-0c85-418...|
+    |a5cf6c42-0c85-418f-af6c-3e4e5b1328f2      |
     +------------------------------------------+
 
     Example 2: Exception in the reflection call, resulting in null
 
     >>> from pyspark.sql import functions as sf
-    >>> df = spark.range(1)
-    >>> df.select(
+    >>> spark.range(1).select(
     ...     sf.try_reflect(sf.lit("scala.Predef"), sf.lit("require"), sf.lit(False))
-    ... ).show()
+    ... ).show(truncate=False)
     +-----------------------------------------+
     |try_reflect(scala.Predef, require, false)|
     +-----------------------------------------+
-    |                                     NULL|
+    |NULL                                     |
     +-----------------------------------------+
     """
     return _invoke_function_over_seq_of_columns("try_reflect", cols)
@@ -21232,12 +25123,12 @@ def version() -> Column:
 
     Examples
     --------
-    >>> df = spark.range(1)
-    >>> df.select(version()).show(truncate=False) # doctest: +SKIP
+    >>> from pyspark.sql import functions as sf
+    >>> spark.range(1).select(sf.version()).show(truncate=False) # doctest: +SKIP
     +----------------------------------------------+
     |version()                                     |
     +----------------------------------------------+
-    |3.5.0 cafbea5b13623276517a9d716f75745eff91f616|
+    |4.0.0 4f8d1f575e99aeef8990c63a9614af0fc5479330|
     +----------------------------------------------+
     """
     return _invoke_function_over_columns("version")
@@ -21252,13 +25143,18 @@ def typeof(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(1,)], ["a"])
-    >>> df.select(typeof(df.a).alias('r')).collect()
-    [Row(r='bigint')]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(True, 1, 1.0, 'xyz',)], ['a', 'b', 'c', 'd'])
+    >>> df.select(sf.typeof(df.a), sf.typeof(df.b), sf.typeof('c'), sf.typeof('d')).show()
+    +---------+---------+---------+---------+
+    |typeof(a)|typeof(b)|typeof(c)|typeof(d)|
+    +---------+---------+---------+---------+
+    |  boolean|   bigint|   double|   string|
+    +---------+---------+---------+---------+
     """
     return _invoke_function_over_columns("typeof", col)
 
@@ -21273,20 +25169,48 @@ def stack(*cols: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    cols : :class:`~pyspark.sql.Column` or str
+    cols : :class:`~pyspark.sql.Column` or column name
         the first element should be a literal int for the number of rows to be separated,
         and the remaining are input elements to be separated.
 
     Examples
     --------
-    >>> df = spark.createDataFrame([(1, 2, 3)], ["a", "b", "c"])
-    >>> df.select(stack(lit(2), df.a, df.b, df.c)).show(truncate=False)
-    +----+----+
-    |col0|col1|
-    +----+----+
-    |1   |2   |
-    |3   |NULL|
-    +----+----+
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(1, 2, 3)], ['a', 'b', 'c'])
+    >>> df.select('*', sf.stack(sf.lit(2), df.a, df.b, 'c')).show()
+    +---+---+---+----+----+
+    |  a|  b|  c|col0|col1|
+    +---+---+---+----+----+
+    |  1|  2|  3|   1|   2|
+    |  1|  2|  3|   3|NULL|
+    +---+---+---+----+----+
+
+    >>> df.select('*', sf.stack(sf.lit(2), df.a, df.b, 'c').alias('x', 'y')).show()
+    +---+---+---+---+----+
+    |  a|  b|  c|  x|   y|
+    +---+---+---+---+----+
+    |  1|  2|  3|  1|   2|
+    |  1|  2|  3|  3|NULL|
+    +---+---+---+---+----+
+
+    >>> df.select('*', sf.stack(sf.lit(3), df.a, df.b, 'c')).show()
+    +---+---+---+----+
+    |  a|  b|  c|col0|
+    +---+---+---+----+
+    |  1|  2|  3|   1|
+    |  1|  2|  3|   2|
+    |  1|  2|  3|   3|
+    +---+---+---+----+
+
+    >>> df.select('*', sf.stack(sf.lit(4), df.a, df.b, 'c')).show()
+    +---+---+---+----+
+    |  a|  b|  c|col0|
+    +---+---+---+----+
+    |  1|  2|  3|   1|
+    |  1|  2|  3|   2|
+    |  1|  2|  3|   3|
+    |  1|  2|  3|NULL|
+    +---+---+---+----+
     """
     return _invoke_function_over_seq_of_columns("stack", cols)
 
@@ -21300,14 +25224,26 @@ def bitmap_bit_position(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(123,)], ["a"])
-    >>> df.select(bitmap_bit_position(df.a).alias("r")).collect()
-    [Row(r=122)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(123,)], ['a'])
+    >>> df.select('*', sf.bitmap_bit_position('a')).show()
+    +---+----------------------+
+    |  a|bitmap_bit_position(a)|
+    +---+----------------------+
+    |123|                   122|
+    +---+----------------------+
     """
     return _invoke_function_over_columns("bitmap_bit_position", col)
 
@@ -21321,14 +25257,26 @@ def bitmap_bucket_number(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
-    >>> df = spark.createDataFrame([(123,)], ["a"])
-    >>> df.select(bitmap_bucket_number(df.a).alias("r")).collect()
-    [Row(r=1)]
+    >>> from pyspark.sql import functions as sf
+    >>> df = spark.createDataFrame([(123,)], ['a'])
+    >>> df.select('*', sf.bitmap_bucket_number('a')).show()
+    +---+-----------------------+
+    |  a|bitmap_bucket_number(a)|
+    +---+-----------------------+
+    |123|                      1|
+    +---+-----------------------+
     """
     return _invoke_function_over_columns("bitmap_bucket_number", col)
 
@@ -21343,16 +25291,28 @@ def bitmap_construct_agg(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column will most likely be bitmap_bit_position().
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_count`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([(1,),(2,),(3,)], ["a"])
-    >>> df.select(substring(hex(
-    ...     bitmap_construct_agg(bitmap_bit_position(df.a))
-    ... ), 0, 6).alias("r")).collect()
-    [Row(r='070000')]
+    >>> df.select(
+    ...     sf.bitmap_construct_agg(sf.bitmap_bit_position('a'))
+    ... ).show()
+    +--------------------------------------------+
+    |bitmap_construct_agg(bitmap_bit_position(a))|
+    +--------------------------------------------+
+    |                        [07 00 00 00 00 0...|
+    +--------------------------------------------+
     """
     return _invoke_function_over_columns("bitmap_construct_agg", col)
 
@@ -21366,14 +25326,26 @@ def bitmap_count(col: "ColumnOrName") -> Column:
 
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input bitmap.
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_or_agg`
+
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("FFFF",)], ["a"])
-    >>> df.select(bitmap_count(to_binary(df.a, lit("hex"))).alias('r')).collect()
-    [Row(r=16)]
+    >>> df.select(sf.bitmap_count(sf.to_binary(df.a, sf.lit("hex")))).show()
+    +-------------------------------+
+    |bitmap_count(to_binary(a, hex))|
+    +-------------------------------+
+    |                             16|
+    +-------------------------------+
     """
     return _invoke_function_over_columns("bitmap_count", col)
 
@@ -21386,18 +25358,28 @@ def bitmap_or_agg(col: "ColumnOrName") -> Column:
 
     .. versionadded:: 3.5.0
 
+    See Also
+    --------
+    :meth:`pyspark.sql.functions.bitmap_bit_position`
+    :meth:`pyspark.sql.functions.bitmap_bucket_number`
+    :meth:`pyspark.sql.functions.bitmap_construct_agg`
+    :meth:`pyspark.sql.functions.bitmap_count`
+
     Parameters
     ----------
-    col : :class:`~pyspark.sql.Column` or str
+    col : :class:`~pyspark.sql.Column` or column name
         The input column should be bitmaps created from bitmap_construct_agg().
 
     Examples
     --------
+    >>> from pyspark.sql import functions as sf
     >>> df = spark.createDataFrame([("10",),("20",),("40",)], ["a"])
-    >>> df.select(substring(hex(
-    ...     bitmap_or_agg(to_binary(df.a, lit("hex")))
-    ... ), 0, 6).alias("r")).collect()
-    [Row(r='700000')]
+    >>> df.select(sf.bitmap_or_agg(sf.to_binary(df.a, sf.lit("hex")))).show()
+    +--------------------------------+
+    |bitmap_or_agg(to_binary(a, hex))|
+    +--------------------------------+
+    |            [70 00 00 00 00 0...|
+    +--------------------------------+
     """
     return _invoke_function_over_columns("bitmap_or_agg", col)
 
diff --git a/python/pyspark/sql/functions/partitioning.py b/python/pyspark/sql/functions/partitioning.py
index da4a261887f1d..76bd0ef6c8890 100644
--- a/python/pyspark/sql/functions/partitioning.py
+++ b/python/pyspark/sql/functions/partitioning.py
@@ -210,8 +210,8 @@ def bucket(numBuckets: Union[Column, int], col: "ColumnOrName") -> Column:
 
     if not isinstance(numBuckets, (int, Column)):
         raise PySparkTypeError(
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={
                 "arg_name": "numBuckets",
                 "arg_type": type(numBuckets).__name__,
             },
diff --git a/python/pyspark/sql/group.py b/python/pyspark/sql/group.py
index ac4ac02a36b16..2e6941e485418 100644
--- a/python/pyspark/sql/group.py
+++ b/python/pyspark/sql/group.py
@@ -32,7 +32,7 @@
 
 
 def dfapi(f: Callable[..., DataFrame]) -> Callable[..., DataFrame]:
-    def _api(self: "GroupedData") -> DataFrame:
+    def _api(self: "GroupedData") -> "DataFrame":
         name = f.__name__
         jdf = getattr(self._jgd, name)()
         return DataFrame(jdf, self.session)
@@ -43,7 +43,7 @@ def _api(self: "GroupedData") -> DataFrame:
 
 
 def df_varargs_api(f: Callable[..., DataFrame]) -> Callable[..., DataFrame]:
-    def _api(self: "GroupedData", *cols: str) -> DataFrame:
+    def _api(self: "GroupedData", *cols: str) -> "DataFrame":
         from pyspark.sql.classic.column import _to_seq
 
         name = f.__name__
@@ -80,14 +80,14 @@ def __repr__(self) -> str:
             return super().__repr__()
 
     @overload
-    def agg(self, *exprs: Column) -> DataFrame:
+    def agg(self, *exprs: Column) -> "DataFrame":
         ...
 
     @overload
-    def agg(self, __exprs: Dict[str, str]) -> DataFrame:
+    def agg(self, __exprs: Dict[str, str]) -> "DataFrame":
         ...
 
-    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
+    def agg(self, *exprs: Union[Column, Dict[str, str]]) -> "DataFrame":
         """Compute aggregates and returns the result as a :class:`DataFrame`.
 
         The available aggregate functions can be:
@@ -126,8 +126,9 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
 
         Examples
         --------
+        >>> import pandas as pd  # doctest: +SKIP
         >>> from pyspark.sql import functions as sf
-        >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+        >>> from pyspark.sql.functions import pandas_udf
         >>> df = spark.createDataFrame(
         ...      [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")], ["age", "name"])
         >>> df.show()
@@ -165,8 +166,8 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
 
         Same as above but uses pandas UDF.
 
-        >>> @pandas_udf('int', PandasUDFType.GROUPED_AGG)  # doctest: +SKIP
-        ... def min_udf(v):
+        >>> @pandas_udf('int')  # doctest: +SKIP
+        ... def min_udf(v: pd.Series) -> int:
         ...     return v.min()
         ...
         >>> df.groupBy(df.name).agg(min_udf(df.age)).sort("name").show()  # doctest: +SKIP
@@ -190,7 +191,7 @@ def agg(self, *exprs: Union[Column, Dict[str, str]]) -> DataFrame:
         return DataFrame(jdf, self.session)
 
     @dfapi
-    def count(self) -> DataFrame:  # type: ignore[empty-body]
+    def count(self) -> "DataFrame":  # type: ignore[empty-body]
         """Counts the number of records for each group.
 
         .. versionadded:: 1.3.0
@@ -241,7 +242,7 @@ def mean(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
         """
 
     @df_varargs_api
-    def avg(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
+    def avg(self, *cols: str) -> "DataFrame":  # type: ignore[empty-body]
         """Computes average values for each numeric columns for each group.
 
         :func:`mean` is an alias for :func:`avg`.
@@ -292,7 +293,7 @@ def avg(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
         """
 
     @df_varargs_api
-    def max(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
+    def max(self, *cols: str) -> "DataFrame":  # type: ignore[empty-body]
         """Computes the max value for each numeric columns for each group.
 
         .. versionadded:: 1.3.0
@@ -336,7 +337,7 @@ def max(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
         """
 
     @df_varargs_api
-    def min(self, *cols: str) -> DataFrame:  # type: ignore[empty-body]
+    def min(self, *cols: str) -> "DataFrame":  # type: ignore[empty-body]
         """Computes the min value for each numeric column for each group.
 
         .. versionadded:: 1.3.0
diff --git a/python/pyspark/sql/internal.py b/python/pyspark/sql/internal.py
new file mode 100644
index 0000000000000..eb4a2153cdaa3
--- /dev/null
+++ b/python/pyspark/sql/internal.py
@@ -0,0 +1,132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.sql import Column, functions as F, is_remote
+
+from typing import Union, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from pyspark.sql._typing import ColumnOrName
+
+"""
+Internal Spark functions used in Pandas API on Spark & PySpark Native Plotting.
+"""
+
+
+class InternalFunction:
+    @staticmethod
+    def _invoke_internal_function_over_columns(name: str, *cols: "ColumnOrName") -> Column:
+        if is_remote():
+            from pyspark.sql.connect.functions.builtin import _invoke_function_over_columns
+
+            return _invoke_function_over_columns(name, *cols)
+
+        else:
+            from pyspark.sql.classic.column import Column, _to_seq, _to_java_column
+            from pyspark import SparkContext
+
+            sc = SparkContext._active_spark_context
+            return Column(
+                sc._jvm.PythonSQLUtils.internalFn(  # type: ignore
+                    name, _to_seq(sc, cols, _to_java_column)  # type: ignore
+                )
+            )
+
+    @staticmethod
+    def timestamp_ntz_to_long(col: Column) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("timestamp_ntz_to_long", col)
+
+    @staticmethod
+    def product(col: Column, dropna: bool) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "pandas_product", col, F.lit(dropna)
+        )
+
+    @staticmethod
+    def stddev(col: Column, ddof: int) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "pandas_stddev", col, F.lit(ddof)
+        )
+
+    @staticmethod
+    def var(col: Column, ddof: int) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "pandas_var", col, F.lit(ddof)
+        )
+
+    @staticmethod
+    def skew(col: Column) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("pandas_skew", col)
+
+    @staticmethod
+    def kurt(col: Column) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("pandas_kurt", col)
+
+    @staticmethod
+    def mode(col: Column, dropna: bool) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "pandas_mode", col, F.lit(dropna)
+        )
+
+    @staticmethod
+    def covar(col1: Column, col2: Column, ddof: int) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "pandas_covar", col1, col2, F.lit(ddof)
+        )
+
+    @staticmethod
+    def ewm(col: Column, alpha: float, ignorena: bool) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "ewm", col, F.lit(alpha), F.lit(ignorena)
+        )
+
+    @staticmethod
+    def null_index(col: Column) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("null_index", col)
+
+    @staticmethod
+    def distributed_id() -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("distributed_id")
+
+    @staticmethod
+    def distributed_sequence_id() -> Column:
+        return InternalFunction._invoke_internal_function_over_columns("distributed_sequence_id")
+
+    @staticmethod
+    def collect_top_k(col: Column, num: int, reverse: bool) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "collect_top_k", col, F.lit(num), F.lit(reverse)
+        )
+
+    @staticmethod
+    def array_binary_search(col: Column, value: Column) -> Column:
+        return InternalFunction._invoke_internal_function_over_columns(
+            "array_binary_search", col, value
+        )
+
+    @staticmethod
+    def make_interval(unit: str, e: Union[Column, int, float]) -> Column:
+        unit_mapping = {
+            "YEAR": "years",
+            "MONTH": "months",
+            "WEEK": "weeks",
+            "DAY": "days",
+            "HOUR": "hours",
+            "MINUTE": "mins",
+            "SECOND": "secs",
+        }
+        return F.make_interval(**{unit_mapping[unit]: F.lit(e)})
diff --git a/python/pyspark/sql/metrics.py b/python/pyspark/sql/metrics.py
index 6664582952014..4ab9b041e3135 100644
--- a/python/pyspark/sql/metrics.py
+++ b/python/pyspark/sql/metrics.py
@@ -21,10 +21,10 @@
 from pyspark.errors import PySparkValueError
 
 if TYPE_CHECKING:
-    from pyspark.testing.connectutils import have_graphviz
-
-    if have_graphviz:
+    try:
         import graphviz  # type: ignore
+    except ImportError:
+        pass
 
 
 class ObservedMetrics(abc.ABC):
@@ -263,13 +263,13 @@ def toDot(self, filename: Optional[str] = None, out_format: str = "png") -> "gra
 
         except ImportError:
             raise PySparkValueError(
-                error_class="PACKAGE_NOT_INSTALLED",
-                message_parameters={"package_name": "graphviz", "minimum_version": "0.20"},
+                errorClass="PACKAGE_NOT_INSTALLED",
+                messageParameters={"package_name": "graphviz", "minimum_version": "0.20"},
             )
 
 
 class ExecutionInfo:
-    """The query execution class allows users to inspect the query execution of this particular
+    """The ExecutionInfo class allows users to inspect the query execution of this particular
     data frame. This value is only set in the data frame if it was executed."""
 
     def __init__(
diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py
index 4ef4c78ba3c33..6ceb6bc90327a 100644
--- a/python/pyspark/sql/observation.py
+++ b/python/pyspark/sql/observation.py
@@ -88,13 +88,13 @@ def __init__(self, name: Optional[str] = None) -> None:
         if name is not None:
             if not isinstance(name, str):
                 raise PySparkTypeError(
-                    error_class="NOT_STR",
-                    message_parameters={"arg_name": "name", "arg_type": type(name).__name__},
+                    errorClass="NOT_STR",
+                    messageParameters={"arg_name": "name", "arg_type": type(name).__name__},
                 )
             if name == "":
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={"arg_name": "name", "arg_value": name},
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={"arg_name": "name", "arg_value": name},
                 )
         self._name = name
         self._jvm: Optional[JVMView] = None
@@ -118,16 +118,14 @@ def _on(self, df: DataFrame, *exprs: Column) -> DataFrame:
         from pyspark.sql.classic.column import _to_seq
 
         if self._jo is not None:
-            raise PySparkAssertionError(error_class="REUSE_OBSERVATION", message_parameters={})
+            raise PySparkAssertionError(errorClass="REUSE_OBSERVATION", messageParameters={})
 
         self._jvm = df._sc._jvm
         assert self._jvm is not None
         cls = self._jvm.org.apache.spark.sql.Observation
         self._jo = cls(self._name) if self._name is not None else cls()
-        observed_df = self._jo.on(
-            df._jdf,
-            exprs[0]._jc,
-            _to_seq(df._sc, [c._jc for c in exprs[1:]]),
+        observed_df = df._jdf.observe(
+            self._jo, exprs[0]._jc, _to_seq(df._sc, [c._jc for c in exprs[1:]])
         )
         return DataFrame(observed_df, df.sparkSession)
 
@@ -144,7 +142,7 @@ def get(self) -> Dict[str, Any]:
             the observed metrics
         """
         if self._jo is None:
-            raise PySparkAssertionError(error_class="NO_OBSERVE_BEFORE_GET", message_parameters={})
+            raise PySparkAssertionError(errorClass="NO_OBSERVE_BEFORE_GET", messageParameters={})
 
         jmap = self._jo.getAsJava()
         # return a pure Python dict, not jmap which is a py4j JavaMap
diff --git a/python/pyspark/sql/pandas/_typing/__init__.pyi b/python/pyspark/sql/pandas/_typing/__init__.pyi
index 0838f446279b9..750e3f52709c2 100644
--- a/python/pyspark/sql/pandas/_typing/__init__.pyi
+++ b/python/pyspark/sql/pandas/_typing/__init__.pyi
@@ -55,6 +55,8 @@ ArrowMapIterUDFType = Literal[207]
 PandasGroupedMapUDFWithStateType = Literal[208]
 ArrowGroupedMapUDFType = Literal[209]
 ArrowCogroupedMapUDFType = Literal[210]
+PandasGroupedMapUDFTransformWithStateType = Literal[211]
+PandasGroupedMapUDFTransformWithStateInitStateType = Literal[212]
 
 class PandasVariadicScalarToScalarFunction(Protocol):
     def __call__(self, *_: DataFrameOrSeriesLike_) -> DataFrameOrSeriesLike_: ...
diff --git a/python/pyspark/sql/pandas/conversion.py b/python/pyspark/sql/pandas/conversion.py
index 9da15caac8025..172a4fc4b2343 100644
--- a/python/pyspark/sql/pandas/conversion.py
+++ b/python/pyspark/sql/pandas/conversion.py
@@ -520,7 +520,7 @@ def convert_timestamp(value: Any) -> Any:
                                 else:
                                     return (
                                         pd.Timestamp(value)
-                                        .tz_localize(timezone, ambiguous=False)  # type: ignore
+                                        .tz_localize(timezone, ambiguous=False)
                                         .tz_convert(_get_local_timezone())
                                         .tz_localize(None)
                                         .to_pydatetime()
@@ -673,8 +673,8 @@ def _create_from_pandas_with_arrow(
                     if isinstance(field_type, pa.StructType):
                         if len(field_type) == 0:
                             raise PySparkValueError(
-                                error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                                message_parameters={},
+                                errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                                messageParameters={},
                             )
                         arrow_type = field_type.field(0).type
                         spark_type = MapType(
@@ -697,8 +697,8 @@ def _create_from_pandas_with_arrow(
             spark_types = [_deduplicate_field_names(f.dataType) for f in schema.fields]
         elif isinstance(schema, DataType):
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
-                message_parameters={"data_type": str(schema)},
+                errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
+                messageParameters={"data_type": str(schema)},
             )
         else:
             # Any timestamps must be coerced to be compatible with Spark
diff --git a/python/pyspark/sql/pandas/functions.py b/python/pyspark/sql/pandas/functions.py
index 020105bb064ae..4a1528bda0237 100644
--- a/python/pyspark/sql/pandas/functions.py
+++ b/python/pyspark/sql/pandas/functions.py
@@ -400,8 +400,8 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
 
     if return_type is None:
         raise PySparkTypeError(
-            error_class="CANNOT_BE_NONE",
-            message_parameters={"arg_name": "returnType"},
+            errorClass="CANNOT_BE_NONE",
+            messageParameters={"arg_name": "returnType"},
         )
 
     if eval_type not in [
@@ -413,13 +413,15 @@ def calculate(iterator: Iterator[pd.Series]) -> Iterator[pd.Series]:
         PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF,
         PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
         None,
     ]:  # None means it should infer the type from type hints.
         raise PySparkTypeError(
-            error_class="INVALID_PANDAS_UDF_TYPE",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF_TYPE",
+            messageParameters={
                 "arg_name": "functionType",
                 "arg_type": str(eval_type),
             },
@@ -453,6 +455,8 @@ def _validate_pandas_udf(f, evalType) -> int:
         PythonEvalType.SQL_MAP_ARROW_ITER_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF,
         PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
         PythonEvalType.SQL_ARROW_BATCHED_UDF,
@@ -485,8 +489,8 @@ def _validate_pandas_udf(f, evalType) -> int:
         and argspec.varargs is None
     ):
         raise PySparkValueError(
-            error_class="INVALID_PANDAS_UDF",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF",
+            messageParameters={
                 "detail": "0-arg pandas_udfs are not supported. "
                 "Instead, create a 1-arg pandas_udf and ignore the arg in your function.",
             },
@@ -494,8 +498,8 @@ def _validate_pandas_udf(f, evalType) -> int:
 
     if evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF and len(argspec.args) not in (1, 2):
         raise PySparkValueError(
-            error_class="INVALID_PANDAS_UDF",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF",
+            messageParameters={
                 "detail": "pandas_udf with function type GROUPED_MAP or the function in "
                 "groupby.applyInPandas must take either one argument (data) or "
                 "two arguments (key, data).",
@@ -504,8 +508,8 @@ def _validate_pandas_udf(f, evalType) -> int:
 
     if evalType == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF and len(argspec.args) not in (1, 2):
         raise PySparkValueError(
-            error_class="INVALID_PANDAS_UDF",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF",
+            messageParameters={
                 "detail": "the function in groupby.applyInArrow must take either one argument "
                 "(data) or two arguments (key, data).",
             },
@@ -513,8 +517,8 @@ def _validate_pandas_udf(f, evalType) -> int:
 
     if evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF and len(argspec.args) not in (2, 3):
         raise PySparkValueError(
-            error_class="INVALID_PANDAS_UDF",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF",
+            messageParameters={
                 "detail": "the function in cogroup.applyInPandas must take either two arguments "
                 "(left, right) or three arguments (key, left, right).",
             },
@@ -522,8 +526,8 @@ def _validate_pandas_udf(f, evalType) -> int:
 
     if evalType == PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF and len(argspec.args) not in (2, 3):
         raise PySparkValueError(
-            error_class="INVALID_PANDAS_UDF",
-            message_parameters={
+            errorClass="INVALID_PANDAS_UDF",
+            messageParameters={
                 "detail": "the function in cogroup.applyInArrow must take either two arguments "
                 "(left, right) or three arguments (key, left, right).",
             },
diff --git a/python/pyspark/sql/pandas/group_ops.py b/python/pyspark/sql/pandas/group_ops.py
index 3d1c50d949028..688ad4b05732e 100644
--- a/python/pyspark/sql/pandas/group_ops.py
+++ b/python/pyspark/sql/pandas/group_ops.py
@@ -14,8 +14,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import itertools
 import sys
-from typing import List, Union, TYPE_CHECKING, cast
+from typing import Any, Iterator, List, Optional, Union, TYPE_CHECKING, cast
 import warnings
 
 from pyspark.errors import PySparkTypeError
@@ -23,6 +24,18 @@
 from pyspark.sql.column import Column
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql.streaming.state import GroupStateTimeout
+from pyspark.sql.streaming.stateful_processor_api_client import (
+    StatefulProcessorApiClient,
+    StatefulProcessorHandleState,
+)
+from pyspark.sql.streaming.stateful_processor import (
+    ExpiredTimerInfo,
+    StatefulProcessor,
+    StatefulProcessorHandle,
+    TimerValues,
+)
+from pyspark.sql.streaming.stateful_processor import StatefulProcessor, StatefulProcessorHandle
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
 from pyspark.sql.types import StructType, _parse_datatype_string
 
 if TYPE_CHECKING:
@@ -33,6 +46,7 @@
         PandasCogroupedMapFunction,
         ArrowGroupedMapFunction,
         ArrowCogroupedMapFunction,
+        DataFrameLike as PandasDataFrameLike,
     )
     from pyspark.sql.group import GroupedData
 
@@ -43,7 +57,7 @@ class PandasGroupedOpsMixin:
     can use this class.
     """
 
-    def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
+    def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> "DataFrame":
         """
         It is an alias of :meth:`pyspark.sql.GroupedData.applyInPandas`; however, it takes a
         :meth:`pyspark.sql.functions.pandas_udf` whereas
@@ -101,8 +115,8 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
             )
         ):
             raise PySparkTypeError(
-                error_class="INVALID_UDF_EVAL_TYPE",
-                message_parameters={"eval_type": "SQL_GROUPED_MAP_PANDAS_UDF"},
+                errorClass="INVALID_UDF_EVAL_TYPE",
+                messageParameters={"eval_type": "SQL_GROUPED_MAP_PANDAS_UDF"},
             )
 
         warnings.warn(
@@ -115,8 +129,8 @@ def apply(self, udf: "GroupedMapPandasUserDefinedFunction") -> DataFrame:
         return self.applyInPandas(udf.func, schema=udf.returnType)  # type: ignore[attr-defined]
 
     def applyInPandas(
-        self, func: "PandasGroupedMapFunction", schema: Union[StructType, str]
-    ) -> DataFrame:
+        self, func: "PandasGroupedMapFunction", schema: Union["StructType", str]
+    ) -> "DataFrame":
         """
         Maps each group of the current :class:`DataFrame` using a pandas udf and returns the result
         as a `DataFrame`.
@@ -218,8 +232,6 @@ def applyInPandas(
         into memory, so the user should be aware of the potential OOM risk if data is skewed
         and certain groups are too large to fit in memory.
 
-        This API is experimental.
-
         See Also
         --------
         pyspark.sql.functions.pandas_udf
@@ -232,7 +244,7 @@ def applyInPandas(
         udf = pandas_udf(func, returnType=schema, functionType=PandasUDFType.GROUPED_MAP)
         df = self._df
         udf_column = udf(*[df[col] for col in df.columns])
-        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc.expr())
+        jdf = self._jgd.flatMapGroupsInPandas(udf_column._jc)
         return DataFrame(jdf, self.session)
 
     def applyInPandasWithState(
@@ -242,7 +254,7 @@ def applyInPandasWithState(
         stateStructType: Union[StructType, str],
         outputMode: str,
         timeoutConf: str,
-    ) -> DataFrame:
+    ) -> "DataFrame":
         """
         Applies the given function to each group of data, while maintaining a user-defined
         per-group state. The result Dataset will represent the flattened record returned by the
@@ -323,8 +335,6 @@ def applyInPandasWithState(
         Notes
         -----
         This function requires a full shuffle.
-
-        This API is experimental.
         """
 
         from pyspark.sql import GroupedData
@@ -350,7 +360,7 @@ def applyInPandasWithState(
         df = self._df
         udf_column = udf(*[df[col] for col in df.columns])
         jdf = self._jgd.applyInPandasWithState(
-            udf_column._jc.expr(),
+            udf_column._jc,
             self.session._jsparkSession.parseDataType(outputStructType.json()),
             self.session._jsparkSession.parseDataType(stateStructType.json()),
             outputMode,
@@ -358,6 +368,326 @@ def applyInPandasWithState(
         )
         return DataFrame(jdf, self.session)
 
+    def transformWithStateInPandas(
+        self,
+        statefulProcessor: StatefulProcessor,
+        outputStructType: Union[StructType, str],
+        outputMode: str,
+        timeMode: str,
+        initialState: Optional["GroupedData"] = None,
+        eventTimeColumnName: str = "",
+    ) -> DataFrame:
+        """
+        Invokes methods defined in the stateful processor used in arbitrary state API v2. It
+        requires protobuf, pandas and pyarrow as dependencies to process input/state data. We
+        allow the user to act on per-group set of input rows along with keyed state and the user
+        can choose to output/return 0 or more rows.
+
+        For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
+        in each trigger and the user's state/state variables will be stored persistently across
+        invocations.
+
+        The `statefulProcessor` should be a Python class that implements the interface defined in
+        :class:`StatefulProcessor`.
+
+        The `outputStructType` should be a :class:`StructType` describing the schema of all
+        elements in the returned value, `pandas.DataFrame`. The column labels of all elements in
+        returned `pandas.DataFrame` must either match the field names in the defined schema if
+        specified as strings, or match the field data types by position if not strings,
+        e.g. integer indices.
+
+        The size of each `pandas.DataFrame` in both the input and output can be arbitrary. The
+        number of `pandas.DataFrame` in both the input and output can also be arbitrary.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        statefulProcessor : :class:`pyspark.sql.streaming.stateful_processor.StatefulProcessor`
+            Instance of StatefulProcessor whose functions will be invoked by the operator.
+        outputStructType : :class:`pyspark.sql.types.DataType` or str
+            The type of the output records. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        outputMode : str
+            The output mode of the stateful processor.
+        timeMode : str
+            The time mode semantics of the stateful processor for timers and TTL.
+        initialState : :class:`pyspark.sql.GroupedData`
+            Optional. The grouped dataframe as initial states used for initialization
+            of state variables in the first batch.
+
+        Examples
+        --------
+        >>> from typing import Iterator
+        ...
+        >>> import pandas as pd # doctest: +SKIP
+        ...
+        >>> from pyspark.sql import Row
+        >>> from pyspark.sql.functions import col, split
+        >>> from pyspark.sql.streaming import StatefulProcessor, StatefulProcessorHandle
+        >>> from pyspark.sql.types import IntegerType, LongType, StringType, StructField, StructType
+        ...
+        >>> spark.conf.set("spark.sql.streaming.stateStore.providerClass",
+        ...     "org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider")
+        ... # Below is a simple example to find erroneous sensors from temperature sensor data. The
+        ... # processor returns a count of total readings, while keeping erroneous reading counts
+        ... # in streaming state. A violation is defined when the temperature is above 100.
+        ... # The input data is a DataFrame with the following schema:
+        ... #    `id: string, temperature: long`.
+        ... # The output schema and state schema are defined as below.
+        >>> output_schema = StructType([
+        ...     StructField("id", StringType(), True),
+        ...     StructField("count", IntegerType(), True)
+        ... ])
+        >>> state_schema = StructType([
+        ...     StructField("value", IntegerType(), True)
+        ... ])
+        >>> class SimpleStatefulProcessor(StatefulProcessor):
+        ...     def init(self, handle: StatefulProcessorHandle):
+        ...         self.num_violations_state = handle.getValueState("numViolations", state_schema)
+        ...
+        ...     def handleInputRows(self, key, rows):
+        ...         new_violations = 0
+        ...         count = 0
+        ...         exists = self.num_violations_state.exists()
+        ...         if exists:
+        ...             existing_violations_row = self.num_violations_state.get()
+        ...             existing_violations = existing_violations_row[0]
+        ...         else:
+        ...             existing_violations = 0
+        ...         for pdf in rows:
+        ...             pdf_count = pdf.count()
+        ...             count += pdf_count.get('temperature')
+        ...             violations_pdf = pdf.loc[pdf['temperature'] > 100]
+        ...             new_violations += violations_pdf.count().get('temperature')
+        ...         updated_violations = new_violations + existing_violations
+        ...         self.num_violations_state.update((updated_violations,))
+        ...         yield pd.DataFrame({'id': key, 'count': count})
+        ...
+        ...     def close(self) -> None:
+        ...         pass
+
+        Input DataFrame:
+        +---+-----------+
+        | id|temperature|
+        +---+-----------+
+        |  0|        123|
+        |  0|         23|
+        |  1|         33|
+        |  1|        188|
+        |  1|         88|
+        +---+-----------+
+
+        >>> df.groupBy("value").transformWithStateInPandas(statefulProcessor =
+        ...     SimpleStatefulProcessor(), outputStructType=output_schema, outputMode="Update",
+        ...     timeMode="None") # doctest: +SKIP
+
+        Output DataFrame:
+        +---+-----+
+        | id|count|
+        +---+-----+
+        |  0|    2|
+        |  1|    3|
+        +---+-----+
+
+        Notes
+        -----
+        This function requires a full shuffle.
+        """
+
+        from pyspark.sql import GroupedData
+        from pyspark.sql.functions import pandas_udf
+
+        assert isinstance(self, GroupedData)
+        if initialState is not None:
+            assert isinstance(initialState, GroupedData)
+        if isinstance(outputStructType, str):
+            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+
+        def handle_data_rows(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+            key: Any,
+            inputRows: Optional[Iterator["PandasDataFrameLike"]] = None,
+        ) -> Iterator["PandasDataFrameLike"]:
+            statefulProcessorApiClient.set_implicit_key(key)
+
+            batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                timeMode
+            )
+
+            # process with data rows
+            if inputRows is not None:
+                data_iter = statefulProcessor.handleInputRows(
+                    key, inputRows, TimerValues(batch_timestamp, watermark_timestamp)
+                )
+                return data_iter
+            else:
+                return iter([])
+
+        def handle_expired_timers(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+        ) -> Iterator["PandasDataFrameLike"]:
+            batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                timeMode
+            )
+
+            if timeMode.lower() == "processingtime":
+                expiry_list_iter = statefulProcessorApiClient.get_expiry_timers_iterator(
+                    batch_timestamp
+                )
+            elif timeMode.lower() == "eventtime":
+                expiry_list_iter = statefulProcessorApiClient.get_expiry_timers_iterator(
+                    watermark_timestamp
+                )
+            else:
+                expiry_list_iter = iter([[]])
+
+            # process with expiry timers, only timer related rows will be emitted
+            for expiry_list in expiry_list_iter:
+                for key_obj, expiry_timestamp in expiry_list:
+                    statefulProcessorApiClient.set_implicit_key(key_obj)
+                    for pd in statefulProcessor.handleExpiredTimer(
+                        key=key_obj,
+                        timer_values=TimerValues(batch_timestamp, watermark_timestamp),
+                        expired_timer_info=ExpiredTimerInfo(expiry_timestamp),
+                    ):
+                        yield pd
+                    statefulProcessorApiClient.delete_timer(expiry_timestamp)
+
+        def transformWithStateUDF(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+            mode: TransformWithStateInPandasFuncMode,
+            key: Any,
+            inputRows: Iterator["PandasDataFrameLike"],
+        ) -> Iterator["PandasDataFrameLike"]:
+            handle = StatefulProcessorHandle(statefulProcessorApiClient)
+
+            if statefulProcessorApiClient.handle_state == StatefulProcessorHandleState.CREATED:
+                statefulProcessor.init(handle)
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.INITIALIZED
+                )
+
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_TIMER:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.DATA_PROCESSED
+                )
+                result = handle_expired_timers(statefulProcessorApiClient)
+                return result
+            elif mode == TransformWithStateInPandasFuncMode.COMPLETE:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.TIMER_PROCESSED
+                )
+                statefulProcessorApiClient.remove_implicit_key()
+                statefulProcessor.close()
+                statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.CLOSED)
+                return iter([])
+            else:
+                # mode == TransformWithStateInPandasFuncMode.PROCESS_DATA
+                result = handle_data_rows(statefulProcessorApiClient, key, inputRows)
+                return result
+
+        def transformWithStateWithInitStateUDF(
+            statefulProcessorApiClient: StatefulProcessorApiClient,
+            mode: TransformWithStateInPandasFuncMode,
+            key: Any,
+            inputRows: Iterator["PandasDataFrameLike"],
+            initialStates: Optional[Iterator["PandasDataFrameLike"]] = None,
+        ) -> Iterator["PandasDataFrameLike"]:
+            """
+            UDF for TWS operator with non-empty initial states. Possible input combinations
+            of inputRows and initialStates iterator:
+            - Both `inputRows` and `initialStates` are non-empty. Both input rows and initial
+             states contains the grouping key and data.
+            - `InitialStates` is non-empty, while `inputRows` is empty. Only initial states
+             contains the grouping key and data, and it is first batch.
+            - `initialStates` is empty, while `inputRows` is non-empty. Only inputRows contains the
+             grouping key and data, and it is first batch.
+            - `initialStates` is None, while `inputRows` is not empty. This is not first batch.
+             `initialStates` is initialized to the positional value as None.
+            """
+            handle = StatefulProcessorHandle(statefulProcessorApiClient)
+
+            if statefulProcessorApiClient.handle_state == StatefulProcessorHandleState.CREATED:
+                statefulProcessor.init(handle)
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.INITIALIZED
+                )
+
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_TIMER:
+                statefulProcessorApiClient.set_handle_state(
+                    StatefulProcessorHandleState.DATA_PROCESSED
+                )
+                result = handle_expired_timers(statefulProcessorApiClient)
+                return result
+            elif mode == TransformWithStateInPandasFuncMode.COMPLETE:
+                statefulProcessorApiClient.remove_implicit_key()
+                statefulProcessor.close()
+                statefulProcessorApiClient.set_handle_state(StatefulProcessorHandleState.CLOSED)
+                return iter([])
+            else:
+                # mode == TransformWithStateInPandasFuncMode.PROCESS_DATA
+                batch_timestamp, watermark_timestamp = statefulProcessorApiClient.get_timestamps(
+                    timeMode
+                )
+
+            # only process initial state if first batch and initial state is not None
+            if initialStates is not None:
+                for cur_initial_state in initialStates:
+                    statefulProcessorApiClient.set_implicit_key(key)
+                    statefulProcessor.handleInitialState(
+                        key, cur_initial_state, TimerValues(batch_timestamp, watermark_timestamp)
+                    )
+
+            # if we don't have input rows for the given key but only have initial state
+            # for the grouping key, the inputRows iterator could be empty
+            input_rows_empty = False
+            try:
+                first = next(inputRows)
+            except StopIteration:
+                input_rows_empty = True
+            else:
+                inputRows = itertools.chain([first], inputRows)
+
+            if not input_rows_empty:
+                result = handle_data_rows(statefulProcessorApiClient, key, inputRows)
+            else:
+                result = iter([])
+
+            return result
+
+        if isinstance(outputStructType, str):
+            outputStructType = cast(StructType, _parse_datatype_string(outputStructType))
+
+        df = self._df
+
+        if initialState is None:
+            initial_state_java_obj = None
+            udf = pandas_udf(
+                transformWithStateUDF,  # type: ignore
+                returnType=outputStructType,
+                functionType=PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF,
+            )
+        else:
+            initial_state_java_obj = initialState._jgd
+            udf = pandas_udf(
+                transformWithStateWithInitStateUDF,  # type: ignore
+                returnType=outputStructType,
+                functionType=PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF,
+            )
+
+        udf_column = udf(*[df[col] for col in df.columns])
+
+        jdf = self._jgd.transformWithStateInPandas(
+            udf_column._jc,
+            self.session._jsparkSession.parseDataType(outputStructType.json()),
+            outputMode,
+            timeMode,
+            initial_state_java_obj,
+            eventTimeColumnName,
+        )
+        return DataFrame(jdf, self.session)
+
     def applyInArrow(
         self, func: "ArrowGroupedMapFunction", schema: Union[StructType, str]
     ) -> "DataFrame":
@@ -481,7 +811,7 @@ def applyInArrow(
         )  # type: ignore[call-overload]
         df = self._df
         udf_column = udf(*[df[col] for col in df.columns])
-        jdf = self._jgd.flatMapGroupsInArrow(udf_column._jc.expr())
+        jdf = self._jgd.flatMapGroupsInArrow(udf_column._jc)
         return DataFrame(jdf, self.session)
 
     def cogroup(self, other: "GroupedData") -> "PandasCogroupedOps":
@@ -511,10 +841,6 @@ class PandasCogroupedOps:
 
     .. versionchanged:: 3.4.0
         Support Spark Connect.
-
-    Notes
-    -----
-    This API is experimental.
     """
 
     def __init__(self, gd1: "GroupedData", gd2: "GroupedData"):
@@ -522,8 +848,8 @@ def __init__(self, gd1: "GroupedData", gd2: "GroupedData"):
         self._gd2 = gd2
 
     def applyInPandas(
-        self, func: "PandasCogroupedMapFunction", schema: Union[StructType, str]
-    ) -> DataFrame:
+        self, func: "PandasCogroupedMapFunction", schema: Union["StructType", str]
+    ) -> "DataFrame":
         """
         Applies a function to each cogroup using pandas and returns the result
         as a `DataFrame`.
@@ -606,8 +932,6 @@ def applyInPandas(
         into memory, so the user should be aware of the potential OOM risk if data is skewed
         and certain groups are too large to fit in memory.
 
-        This API is experimental.
-
         See Also
         --------
         pyspark.sql.functions.pandas_udf
@@ -621,7 +945,7 @@ def applyInPandas(
 
         all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
         udf_column = udf(*all_cols)
-        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(self._gd2._jgd, udf_column._jc.expr())
+        jdf = self._gd1._jgd.flatMapCoGroupsInPandas(self._gd2._jgd, udf_column._jc)
         return DataFrame(jdf, self._gd1.session)
 
     def applyInArrow(
@@ -719,7 +1043,7 @@ def applyInArrow(
 
         all_cols = self._extract_cols(self._gd1) + self._extract_cols(self._gd2)
         udf_column = udf(*all_cols)
-        jdf = self._gd1._jgd.flatMapCoGroupsInArrow(self._gd2._jgd, udf_column._jc.expr())
+        jdf = self._gd1._jgd.flatMapCoGroupsInArrow(self._gd2._jgd, udf_column._jc)
         return DataFrame(jdf, self._gd1.session)
 
     @staticmethod
diff --git a/python/pyspark/sql/pandas/map_ops.py b/python/pyspark/sql/pandas/map_ops.py
index b02fe018b688e..c11a8b9d8d4d2 100644
--- a/python/pyspark/sql/pandas/map_ops.py
+++ b/python/pyspark/sql/pandas/map_ops.py
@@ -53,7 +53,7 @@ def mapInPandas(
         udf_column = udf(*[self[col] for col in self.columns])
 
         jrp = self._build_java_profile(profile)
-        jdf = self._jdf.mapInPandas(udf_column._jc.expr(), barrier, jrp)
+        jdf = self._jdf.mapInPandas(udf_column._jc, barrier, jrp)
         return DataFrame(jdf, self.sparkSession)
 
     def mapInArrow(
@@ -75,7 +75,7 @@ def mapInArrow(
         udf_column = udf(*[self[col] for col in self.columns])
 
         jrp = self._build_java_profile(profile)
-        jdf = self._jdf.mapInArrow(udf_column._jc.expr(), barrier, jrp)
+        jdf = self._jdf.mapInArrow(udf_column._jc, barrier, jrp)
         return DataFrame(jdf, self.sparkSession)
 
     def _build_java_profile(
diff --git a/python/pyspark/sql/pandas/serializers.py b/python/pyspark/sql/pandas/serializers.py
index 834f22c86c0c8..536bf7307065c 100644
--- a/python/pyspark/sql/pandas/serializers.py
+++ b/python/pyspark/sql/pandas/serializers.py
@@ -19,15 +19,24 @@
 Serializers for PyArrow and pandas conversions. See `pyspark.serializers` for more details.
 """
 
+from itertools import groupby
 from pyspark.errors import PySparkRuntimeError, PySparkTypeError, PySparkValueError
 from pyspark.loose_version import LooseVersion
-from pyspark.serializers import Serializer, read_int, write_int, UTF8Deserializer, CPickleSerializer
+from pyspark.serializers import (
+    Serializer,
+    read_int,
+    write_int,
+    UTF8Deserializer,
+    CPickleSerializer,
+)
 from pyspark.sql.pandas.types import (
     from_arrow_type,
+    is_variant,
     to_arrow_type,
     _create_converter_from_pandas,
     _create_converter_to_pandas,
 )
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
 from pyspark.sql.types import (
     DataType,
     StringType,
@@ -75,8 +84,8 @@ def load_stream(self, stream):
         if num == -1:
             error_msg = UTF8Deserializer().loads(stream)
             raise PySparkRuntimeError(
-                error_class="ERROR_OCCURRED_WHILE_CALLING",
-                message_parameters={
+                errorClass="ERROR_OCCURRED_WHILE_CALLING",
+                messageParameters={
                     "func_name": "ArrowCollectSerializer.load_stream",
                     "error_msg": error_msg,
                 },
@@ -413,7 +422,14 @@ def __init__(
     def arrow_to_pandas(self, arrow_column):
         import pyarrow.types as types
 
-        if self._df_for_struct and types.is_struct(arrow_column.type):
+        # If the arrow type is struct, return a pandas dataframe where the fields of the struct
+        # correspond to columns in the DataFrame. However, if the arrow struct is actually a
+        # Variant, which is an atomic type, treat it as a non-struct arrow type.
+        if (
+            self._df_for_struct
+            and types.is_struct(arrow_column.type)
+            and not is_variant(arrow_column.type)
+        ):
             import pandas as pd
 
             series = [
@@ -498,13 +514,21 @@ def _create_batch(self, series):
 
         arrs = []
         for s, t in series:
-            if self._struct_in_pandas == "dict" and t is not None and pa.types.is_struct(t):
+            # Variants are represented in arrow as structs with additional metadata (checked by
+            # is_variant). If the data type is Variant, return a VariantVal atomic type instead of
+            # a dict of two binary values.
+            if (
+                self._struct_in_pandas == "dict"
+                and t is not None
+                and pa.types.is_struct(t)
+                and not is_variant(t)
+            ):
                 # A pandas UDF should return pd.DataFrame when the return type is a struct type.
                 # If it returns a pd.Series, it should throw an error.
                 if not isinstance(s, pd.DataFrame):
                     raise PySparkValueError(
-                        "A field of type StructType expects a pandas.DataFrame, "
-                        "but got: %s" % str(type(s))
+                        "Invalid return type. Please make sure that the UDF returns a "
+                        "pandas.DataFrame when the specified return type is StructType."
                     )
                 arrs.append(self._create_struct_array(s, t))
             else:
@@ -667,8 +691,8 @@ def _create_array(self, series, arrow_type, spark_type=None, arrow_cast=False):
             # arrow's error message. This also works better with Spark Connect
             # where the exception messages are by default truncated.
             raise PySparkRuntimeError(
-                error_class="UDTF_ARROW_TYPE_CAST_ERROR",
-                message_parameters={
+                errorClass="UDTF_ARROW_TYPE_CAST_ERROR",
+                messageParameters={
                     "col_name": series.name,
                     "col_type": str(series.dtype),
                     "arrow_type": arrow_type,
@@ -711,8 +735,8 @@ def load_stream(self, stream):
 
             elif dataframes_in_group != 0:
                 raise PySparkValueError(
-                    error_class="INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP",
-                    message_parameters={"dataframes_in_group": str(dataframes_in_group)},
+                    errorClass="INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP",
+                    messageParameters={"dataframes_in_group": str(dataframes_in_group)},
                 )
 
 
@@ -739,8 +763,8 @@ def load_stream(self, stream):
 
             elif dataframes_in_group != 0:
                 raise PySparkValueError(
-                    error_class="INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP",
-                    message_parameters={"dataframes_in_group": str(dataframes_in_group)},
+                    errorClass="INVALID_NUMBER_OF_DATAFRAMES_IN_GROUP",
+                    messageParameters={"dataframes_in_group": str(dataframes_in_group)},
                 )
 
 
@@ -1116,3 +1140,154 @@ def init_stream_yield_batches(batches):
         batches_to_write = init_stream_yield_batches(serialize_batches())
 
         return ArrowStreamSerializer.dump_stream(self, batches_to_write, stream)
+
+
+class TransformWithStateInPandasSerializer(ArrowStreamPandasUDFSerializer):
+    """
+    Serializer used by Python worker to evaluate UDF for
+    :meth:`pyspark.sql.GroupedData.transformWithStateInPandasSerializer`.
+
+    Parameters
+    ----------
+    timezone : str
+        A timezone to respect when handling timestamp values
+    safecheck : bool
+        If True, conversion from Arrow to Pandas checks for overflow/truncation
+    assign_cols_by_name : bool
+        If True, then Pandas DataFrames will get columns by name
+    arrow_max_records_per_batch : int
+        Limit of the number of records that can be written to a single ArrowRecordBatch in memory.
+    """
+
+    def __init__(self, timezone, safecheck, assign_cols_by_name, arrow_max_records_per_batch):
+        super(TransformWithStateInPandasSerializer, self).__init__(
+            timezone, safecheck, assign_cols_by_name
+        )
+        self.arrow_max_records_per_batch = arrow_max_records_per_batch
+        self.key_offsets = None
+
+    def load_stream(self, stream):
+        """
+        Read ArrowRecordBatches from stream, deserialize them to populate a list of data chunk, and
+        convert the data into a list of pandas.Series.
+
+        Please refer the doc of inner function `generate_data_batches` for more details how
+        this function works in overall.
+        """
+        import pyarrow as pa
+
+        def generate_data_batches(batches):
+            """
+            Deserialize ArrowRecordBatches and return a generator of pandas.Series list.
+
+            The deserialization logic assumes that Arrow RecordBatches contain the data with the
+            ordering that data chunks for same grouping key will appear sequentially.
+
+            This function must avoid materializing multiple Arrow RecordBatches into memory at the
+            same time. And data chunks from the same grouping key should appear sequentially.
+            """
+            for batch in batches:
+                data_pandas = [
+                    self.arrow_to_pandas(c) for c in pa.Table.from_batches([batch]).itercolumns()
+                ]
+                key_series = [data_pandas[o] for o in self.key_offsets]
+                batch_key = tuple(s[0] for s in key_series)
+                yield (batch_key, data_pandas)
+
+        _batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
+        data_batches = generate_data_batches(_batches)
+
+        for k, g in groupby(data_batches, key=lambda x: x[0]):
+            yield (TransformWithStateInPandasFuncMode.PROCESS_DATA, k, g)
+
+        yield (TransformWithStateInPandasFuncMode.PROCESS_TIMER, None, None)
+
+        yield (TransformWithStateInPandasFuncMode.COMPLETE, None, None)
+
+    def dump_stream(self, iterator, stream):
+        """
+        Read through an iterator of (iterator of pandas DataFrame), serialize them to Arrow
+        RecordBatches, and write batches to stream.
+        """
+        result = [(b, t) for x in iterator for y, t in x for b in y]
+        super().dump_stream(result, stream)
+
+
+class TransformWithStateInPandasInitStateSerializer(TransformWithStateInPandasSerializer):
+    """
+    Serializer used by Python worker to evaluate UDF for
+    :meth:`pyspark.sql.GroupedData.transformWithStateInPandasInitStateSerializer`.
+    Parameters
+    ----------
+    Same as input parameters in TransformWithStateInPandasSerializer.
+    """
+
+    def __init__(self, timezone, safecheck, assign_cols_by_name, arrow_max_records_per_batch):
+        super(TransformWithStateInPandasInitStateSerializer, self).__init__(
+            timezone, safecheck, assign_cols_by_name, arrow_max_records_per_batch
+        )
+        self.init_key_offsets = None
+
+    def load_stream(self, stream):
+        import pyarrow as pa
+
+        def generate_data_batches(batches):
+            """
+            Deserialize ArrowRecordBatches and return a generator of pandas.Series list.
+            The deserialization logic assumes that Arrow RecordBatches contain the data with the
+            ordering that data chunks for same grouping key will appear sequentially.
+            See `TransformWithStateInPandasPythonInitialStateRunner` for arrow batch schema sent
+             from JVM.
+            This function flatten the columns of input rows and initial state rows and feed them
+             into the data generator.
+            """
+
+            def flatten_columns(cur_batch, col_name):
+                state_column = cur_batch.column(cur_batch.schema.get_field_index(col_name))
+                state_field_names = [
+                    state_column.type[i].name for i in range(state_column.type.num_fields)
+                ]
+                state_field_arrays = [
+                    state_column.field(i) for i in range(state_column.type.num_fields)
+                ]
+                table_from_fields = pa.Table.from_arrays(
+                    state_field_arrays, names=state_field_names
+                )
+                return table_from_fields
+
+            """
+            The arrow batch is written in the schema:
+            schema: StructType = new StructType()
+                .add("inputData", dataSchema)
+                .add("initState", initStateSchema)
+            We'll parse batch into Tuples of (key, inputData, initState) and pass into the Python
+             data generator. All rows in the same batch have the same grouping key.
+            """
+            for batch in batches:
+                flatten_state_table = flatten_columns(batch, "inputData")
+                data_pandas = [self.arrow_to_pandas(c) for c in flatten_state_table.itercolumns()]
+
+                flatten_init_table = flatten_columns(batch, "initState")
+                init_data_pandas = [
+                    self.arrow_to_pandas(c) for c in flatten_init_table.itercolumns()
+                ]
+                key_series = [data_pandas[o] for o in self.key_offsets]
+                init_key_series = [init_data_pandas[o] for o in self.init_key_offsets]
+
+                if any(s.empty for s in key_series):
+                    # If any row is empty, assign batch_key using init_key_series
+                    batch_key = tuple(s[0] for s in init_key_series)
+                else:
+                    # If all rows are non-empty, create batch_key from key_series
+                    batch_key = tuple(s[0] for s in key_series)
+                yield (batch_key, data_pandas, init_data_pandas)
+
+        _batches = super(ArrowStreamPandasSerializer, self).load_stream(stream)
+        data_batches = generate_data_batches(_batches)
+
+        for k, g in groupby(data_batches, key=lambda x: x[0]):
+            yield (TransformWithStateInPandasFuncMode.PROCESS_DATA, k, g)
+
+        yield (TransformWithStateInPandasFuncMode.PROCESS_TIMER, None, None)
+
+        yield (TransformWithStateInPandasFuncMode.COMPLETE, None, None)
diff --git a/python/pyspark/sql/pandas/typehints.py b/python/pyspark/sql/pandas/typehints.py
index c51e680329bee..5335bcf03412b 100644
--- a/python/pyspark/sql/pandas/typehints.py
+++ b/python/pyspark/sql/pandas/typehints.py
@@ -52,16 +52,16 @@ def infer_eval_type(
     ]
     if len(parameters_sig) != len(sig.parameters):
         raise PySparkValueError(
-            error_class="TYPE_HINT_SHOULD_BE_SPECIFIED",
-            message_parameters={"target": "all parameters", "sig": str(sig)},
+            errorClass="TYPE_HINT_SHOULD_BE_SPECIFIED",
+            messageParameters={"target": "all parameters", "sig": str(sig)},
         )
 
     # Check if the return has a type hint
     return_annotation = type_hints.get("return", sig.return_annotation)
     if sig.empty is return_annotation:
         raise PySparkValueError(
-            error_class="TYPE_HINT_SHOULD_BE_SPECIFIED",
-            message_parameters={"target": "the return type", "sig": str(sig)},
+            errorClass="TYPE_HINT_SHOULD_BE_SPECIFIED",
+            messageParameters={"target": "the return type", "sig": str(sig)},
         )
 
     # Series, Frame or Union[DataFrame, Series], ... -> Series or Frame
@@ -140,8 +140,8 @@ def infer_eval_type(
         return PandasUDFType.GROUPED_AGG
     else:
         raise PySparkNotImplementedError(
-            error_class="UNSUPPORTED_SIGNATURE",
-            message_parameters={"signature": str(sig)},
+            errorClass="UNSUPPORTED_SIGNATURE",
+            messageParameters={"signature": str(sig)},
         )
 
 
diff --git a/python/pyspark/sql/pandas/types.py b/python/pyspark/sql/pandas/types.py
index 27c77c9d2d7f1..d65126bb3db9e 100644
--- a/python/pyspark/sql/pandas/types.py
+++ b/python/pyspark/sql/pandas/types.py
@@ -57,8 +57,10 @@
 if TYPE_CHECKING:
     import pandas as pd
     import pyarrow as pa
+    import numpy as np
 
     from pyspark.sql.pandas._typing import SeriesLike as PandasSeriesLike
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
 
 
 def to_arrow_type(
@@ -143,8 +145,8 @@ def to_arrow_type(
         field_names = dt.names
         if error_on_duplicated_field_names_in_struct and len(set(field_names)) != len(field_names):
             raise UnsupportedOperationException(
-                error_class="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
-                message_parameters={"field_names": str(field_names)},
+                errorClass="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
+                messageParameters={"field_names": str(field_names)},
             )
         fields = [
             pa.field(
@@ -166,13 +168,15 @@ def to_arrow_type(
     elif type(dt) == VariantType:
         fields = [
             pa.field("value", pa.binary(), nullable=False),
-            pa.field("metadata", pa.binary(), nullable=False),
+            # The metadata field is tagged so we can identify that the arrow struct actually
+            # represents a variant.
+            pa.field("metadata", pa.binary(), nullable=False, metadata={b"variant": b"true"}),
         ]
         arrow_type = pa.struct(fields)
     else:
         raise PySparkTypeError(
-            error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
-            message_parameters={"data_type": str(dt)},
+            errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
+            messageParameters={"data_type": str(dt)},
         )
     return arrow_type
 
@@ -216,6 +220,22 @@ def to_arrow_schema(
     return pa.schema(fields)
 
 
+def is_variant(at: "pa.DataType") -> bool:
+    """Check if a PyArrow struct data type represents a variant"""
+    import pyarrow.types as types
+
+    assert types.is_struct(at)
+
+    return any(
+        (
+            field.name == "metadata"
+            and b"variant" in field.metadata
+            and field.metadata[b"variant"] == b"true"
+        )
+        for field in at
+    ) and any(field.name == "value" for field in at)
+
+
 def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> DataType:
     """Convert pyarrow type to Spark data type."""
     import pyarrow.types as types
@@ -263,8 +283,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
         if LooseVersion(pa.__version__) < LooseVersion("14.0.0"):
             # PyArrow versions before 14.0.0 do not support casting FixedSizeListArray to ListArray
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
-                message_parameters={"data_type": str(at)},
+                errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
+                messageParameters={"data_type": str(at)},
             )
         spark_type = ArrayType(from_arrow_type(at.value_type, prefer_timestamp_ntz))
     elif types.is_large_list(at):
@@ -275,6 +295,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
             from_arrow_type(at.item_type, prefer_timestamp_ntz),
         )
     elif types.is_struct(at):
+        if is_variant(at):
+            return VariantType()
         return StructType(
             [
                 StructField(
@@ -291,8 +313,8 @@ def from_arrow_type(at: "pa.DataType", prefer_timestamp_ntz: bool = False) -> Da
         spark_type = NullType()
     else:
         raise PySparkTypeError(
-            error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
-            message_parameters={"data_type": str(at)},
+            errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW_CONVERSION",
+            messageParameters={"data_type": str(at)},
         )
     return spark_type
 
@@ -888,8 +910,8 @@ def convert_map(value: Any) -> Any:
 
             if error_on_duplicated_field_names and len(set(field_names)) != len(field_names):
                 raise UnsupportedOperationException(
-                    error_class="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
-                    message_parameters={"field_names": str(field_names)},
+                    errorClass="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
+                    messageParameters={"field_names": str(field_names)},
                 )
 
             dedup_field_names = _dedup_names(field_names)
@@ -970,8 +992,8 @@ def convert_struct_as_dict(value: Any) -> Any:
 
             else:
                 raise PySparkValueError(
-                    error_class="UNKNOWN_VALUE_FOR",
-                    message_parameters={"var": str(_struct_in_pandas)},
+                    errorClass="UNKNOWN_VALUE_FOR",
+                    messageParameters={"var": str(_struct_in_pandas)},
                 )
 
         elif isinstance(dt, TimestampType):
@@ -1032,7 +1054,7 @@ def convert_variant(value: Any) -> Any:
                 ):
                     return VariantVal(value["value"], value["metadata"])
                 else:
-                    raise PySparkValueError(error_class="MALFORMED_VARIANT")
+                    raise PySparkValueError(errorClass="MALFORMED_VARIANT")
 
             return convert_variant
 
@@ -1185,8 +1207,8 @@ def convert_map(value: Any) -> Any:
 
             if error_on_duplicated_field_names and len(set(field_names)) != len(field_names):
                 raise UnsupportedOperationException(
-                    error_class="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
-                    message_parameters={"field_names": str(field_names)},
+                    errorClass="DUPLICATED_FIELD_NAME_IN_ARROW_STRUCT",
+                    messageParameters={"field_names": str(field_names)},
                 )
 
             dedup_field_names = _dedup_names(field_names)
@@ -1290,6 +1312,14 @@ def convert_udt(value: Any) -> Any:
 
             return convert_udt
 
+        elif isinstance(dt, VariantType):
+
+            def convert_variant(variant: Any) -> Any:
+                assert isinstance(variant, VariantVal)
+                return {"value": variant.value, "metadata": variant.metadata}
+
+            return convert_variant
+
         return None
 
     conv = _converter(data_type)
@@ -1344,3 +1374,34 @@ def _deduplicate_field_names(dt: DataType) -> DataType:
         )
     else:
         return dt
+
+
+def _to_numpy_type(type: DataType) -> Optional["np.dtype"]:
+    """Convert Spark data type to NumPy type."""
+    import numpy as np
+
+    if type == ByteType():
+        return np.dtype("int8")
+    elif type == ShortType():
+        return np.dtype("int16")
+    elif type == IntegerType():
+        return np.dtype("int32")
+    elif type == LongType():
+        return np.dtype("int64")
+    elif type == FloatType():
+        return np.dtype("float32")
+    elif type == DoubleType():
+        return np.dtype("float64")
+    return None
+
+
+def convert_pandas_using_numpy_type(
+    df: "PandasDataFrameLike", schema: StructType
+) -> "PandasDataFrameLike":
+    for field in schema.fields:
+        if isinstance(
+            field.dataType, (ByteType, ShortType, LongType, FloatType, DoubleType, IntegerType)
+        ):
+            np_type = _to_numpy_type(field.dataType)
+            df[field.name] = df[field.name].astype(np_type)
+    return df
diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py
index fafc3186410c3..5849ae0edd6d9 100644
--- a/python/pyspark/sql/pandas/utils.py
+++ b/python/pyspark/sql/pandas/utils.py
@@ -41,16 +41,16 @@ def require_minimum_pandas_version() -> None:
         raised_error = error
     if not have_pandas:
         raise PySparkImportError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
                 "package_name": "Pandas",
                 "minimum_version": str(minimum_pandas_version),
             },
         ) from raised_error
     if LooseVersion(pandas.__version__) < LooseVersion(minimum_pandas_version):
         raise PySparkImportError(
-            error_class="UNSUPPORTED_PACKAGE_VERSION",
-            message_parameters={
+            errorClass="UNSUPPORTED_PACKAGE_VERSION",
+            messageParameters={
                 "package_name": "Pandas",
                 "minimum_version": str(minimum_pandas_version),
                 "current_version": str(pandas.__version__),
@@ -74,16 +74,16 @@ def require_minimum_pyarrow_version() -> None:
         raised_error = error
     if not have_arrow:
         raise PySparkImportError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
                 "package_name": "PyArrow",
                 "minimum_version": str(minimum_pyarrow_version),
             },
         ) from raised_error
     if LooseVersion(pyarrow.__version__) < LooseVersion(minimum_pyarrow_version):
         raise PySparkImportError(
-            error_class="UNSUPPORTED_PACKAGE_VERSION",
-            message_parameters={
+            errorClass="UNSUPPORTED_PACKAGE_VERSION",
+            messageParameters={
                 "package_name": "PyArrow",
                 "minimum_version": str(minimum_pyarrow_version),
                 "current_version": str(pyarrow.__version__),
@@ -91,6 +91,36 @@ def require_minimum_pyarrow_version() -> None:
         )
     if os.environ.get("ARROW_PRE_0_15_IPC_FORMAT", "0") == "1":
         raise PySparkRuntimeError(
-            error_class="ARROW_LEGACY_IPC_FORMAT",
-            message_parameters={},
+            errorClass="ARROW_LEGACY_IPC_FORMAT",
+            messageParameters={},
+        )
+
+
+def require_minimum_numpy_version() -> None:
+    """Raise ImportError if minimum version of NumPy is not installed"""
+    minimum_numpy_version = "1.21"
+
+    try:
+        import numpy
+
+        have_numpy = True
+    except ImportError as error:
+        have_numpy = False
+        raised_error = error
+    if not have_numpy:
+        raise PySparkImportError(
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
+                "package_name": "NumPy",
+                "minimum_version": str(minimum_numpy_version),
+            },
+        ) from raised_error
+    if LooseVersion(numpy.__version__) < LooseVersion(minimum_numpy_version):
+        raise PySparkImportError(
+            errorClass="UNSUPPORTED_PACKAGE_VERSION",
+            messageParameters={
+                "package_name": "NumPy",
+                "minimum_version": str(minimum_numpy_version),
+                "current_version": str(numpy.__version__),
+            },
         )
diff --git a/connector/docker/build b/python/pyspark/sql/plot/__init__.py
old mode 100755
new mode 100644
similarity index 86%
rename from connector/docker/build
rename to python/pyspark/sql/plot/__init__.py
index de83c7d7611dc..6da07061b2a09
--- a/connector/docker/build
+++ b/python/pyspark/sql/plot/__init__.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -17,6 +15,7 @@
 # limitations under the License.
 #
 
-docker images > /dev/null || { echo Please install docker in non-sudo mode. ; exit; }
-
-./spark-test/build
\ No newline at end of file
+"""
+This package includes the plotting APIs for PySpark DataFrame.
+"""
+from pyspark.sql.plot.core import *  # noqa: F403, F401
diff --git a/python/pyspark/sql/plot/core.py b/python/pyspark/sql/plot/core.py
new file mode 100644
index 0000000000000..f7133bdb70ed6
--- /dev/null
+++ b/python/pyspark/sql/plot/core.py
@@ -0,0 +1,767 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import math
+
+from typing import Any, TYPE_CHECKING, List, Optional, Union, Sequence
+from types import ModuleType
+from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.sql import Column, functions as F
+from pyspark.sql.internal import InternalFunction as SF
+from pyspark.sql.pandas.utils import require_minimum_pandas_version
+from pyspark.sql.types import NumericType
+from pyspark.sql.utils import NumpyHelper, require_minimum_plotly_version
+
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame, Row
+    import pandas as pd
+    from plotly.graph_objs import Figure
+
+
+class PySparkTopNPlotBase:
+    def get_top_n(self, sdf: "DataFrame") -> "pd.DataFrame":
+        max_rows = int(
+            sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows")  # type: ignore[arg-type]
+        )
+        pdf = sdf.limit(max_rows + 1).toPandas()
+
+        self.partial = False
+        if len(pdf) > max_rows:
+            self.partial = True
+            pdf = pdf.iloc[:max_rows]
+
+        return pdf
+
+
+class PySparkSampledPlotBase:
+    def get_sampled(self, sdf: "DataFrame") -> "pd.DataFrame":
+        from pyspark.sql import Observation, functions as F
+
+        max_rows = int(
+            sdf._session.conf.get("spark.sql.pyspark.plotting.max_rows")  # type: ignore[arg-type]
+        )
+        observation = Observation("pyspark plotting")
+
+        rand_col_name = "__pyspark_plotting_sampled_plot_base_rand__"
+        id_col_name = "__pyspark_plotting_sampled_plot_base_id__"
+
+        sampled_sdf = (
+            sdf.observe(observation, F.count(F.lit(1)).alias("count"))
+            .select(
+                "*",
+                F.rand().alias(rand_col_name),
+                F.monotonically_increasing_id().alias(id_col_name),
+            )
+            .sort(rand_col_name)
+            .limit(max_rows + 1)
+            .coalesce(1)
+            .sortWithinPartitions(id_col_name)
+            .drop(rand_col_name, id_col_name)
+        )
+        pdf = sampled_sdf.toPandas()
+
+        if len(pdf) > max_rows:
+            try:
+                self.fraction = float(max_rows) / observation.get["count"]
+            except Exception:
+                pass
+            return pdf[:max_rows]
+        else:
+            self.fraction = 1.0
+            return pdf
+
+
+class PySparkPlotAccessor:
+    plot_data_map = {
+        "area": PySparkSampledPlotBase().get_sampled,
+        "bar": PySparkTopNPlotBase().get_top_n,
+        "barh": PySparkTopNPlotBase().get_top_n,
+        "line": PySparkSampledPlotBase().get_sampled,
+        "pie": PySparkTopNPlotBase().get_top_n,
+        "scatter": PySparkSampledPlotBase().get_sampled,
+    }
+    _backends = {}  # type: ignore[var-annotated]
+
+    def __init__(self, data: "DataFrame"):
+        self.data = data
+
+    def __call__(
+        self, kind: str = "line", backend: Optional[str] = None, **kwargs: Any
+    ) -> "Figure":
+        plot_backend = PySparkPlotAccessor._get_plot_backend(backend)
+
+        return plot_backend.plot_pyspark(self.data, kind=kind, **kwargs)
+
+    @staticmethod
+    def _get_plot_backend(backend: Optional[str] = None) -> ModuleType:
+        backend = backend or "plotly"
+
+        if backend in PySparkPlotAccessor._backends:
+            return PySparkPlotAccessor._backends[backend]
+
+        if backend == "plotly":
+            require_minimum_plotly_version()
+        else:
+            raise PySparkValueError(
+                errorClass="UNSUPPORTED_PLOT_BACKEND",
+                messageParameters={"backend": backend, "supported_backends": ", ".join(["plotly"])},
+            )
+        from pyspark.sql.plot import plotly as module
+
+        return module
+
+    def line(self, x: str, y: Union[str, list[str]], **kwargs: Any) -> "Figure":
+        """
+        Plot DataFrame as lines.
+
+        Parameters
+        ----------
+        x : str
+            Name of column to use for the horizontal axis.
+        y : str or list of str
+            Name(s) of the column(s) to use for the vertical axis. Multiple columns can be plotted.
+        **kwargs : optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
+        >>> columns = ["category", "int_val", "float_val"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.line(x="category", y="int_val")  # doctest: +SKIP
+        >>> df.plot.line(x="category", y=["int_val", "float_val"])  # doctest: +SKIP
+        """
+        return self(kind="line", x=x, y=y, **kwargs)
+
+    def bar(self, x: str, y: Union[str, list[str]], **kwargs: Any) -> "Figure":
+        """
+        Vertical bar plot.
+
+        A bar plot is a plot that presents categorical data with rectangular bars with lengths
+        proportional to the values that they represent. A bar plot shows comparisons among
+        discrete categories. One axis of the plot shows the specific categories being compared,
+        and the other axis represents a measured value.
+
+        Parameters
+        ----------
+        x : str
+            Name of column to use for the horizontal axis.
+        y : str or list of str
+            Name(s) of the column(s) to use for the vertical axis.
+            Multiple columns can be plotted.
+        **kwargs : optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
+        >>> columns = ["category", "int_val", "float_val"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.bar(x="category", y="int_val")  # doctest: +SKIP
+        >>> df.plot.bar(x="category", y=["int_val", "float_val"])  # doctest: +SKIP
+        """
+        return self(kind="bar", x=x, y=y, **kwargs)
+
+    def barh(self, x: str, y: Union[str, list[str]], **kwargs: Any) -> "Figure":
+        """
+        Make a horizontal bar plot.
+
+        A horizontal bar plot is a plot that presents quantitative data with
+        rectangular bars with lengths proportional to the values that they
+        represent. A bar plot shows comparisons among discrete categories. One
+        axis of the plot shows the specific categories being compared, and the
+        other axis represents a measured value.
+
+        Parameters
+        ----------
+        x : str or list of str
+            Name(s) of the column(s) to use for the horizontal axis.
+            Multiple columns can be plotted.
+        y : str or list of str
+            Name(s) of the column(s) to use for the vertical axis.
+            Multiple columns can be plotted.
+        **kwargs : optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Notes
+        -----
+        In Plotly and Matplotlib, the interpretation of `x` and `y` for `barh` plots differs.
+        In Plotly, `x` refers to the values and `y` refers to the categories.
+        In Matplotlib, `x` refers to the categories and `y` refers to the values.
+        Ensure correct axis labeling based on the backend used.
+
+        Examples
+        --------
+        >>> data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
+        >>> columns = ["category", "int_val", "float_val"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.barh(x="int_val", y="category")  # doctest: +SKIP
+        >>> df.plot.barh(
+        ...     x=["int_val", "float_val"], y="category"
+        ... )  # doctest: +SKIP
+        """
+        return self(kind="barh", x=x, y=y, **kwargs)
+
+    def scatter(self, x: str, y: str, **kwargs: Any) -> "Figure":
+        """
+        Create a scatter plot with varying marker point size and color.
+
+        The coordinates of each point are defined by two dataframe columns and
+        filled circles are used to represent each point. This kind of plot is
+        useful to see complex correlations between two variables. Points could
+        be for instance natural 2D coordinates like longitude and latitude in
+        a map or, in general, any pair of metrics that can be plotted against
+        each other.
+
+        Parameters
+        ----------
+        x : str
+            Name of column to use as horizontal coordinates for each point.
+        y : str or list of str
+            Name of column to use as vertical coordinates for each point.
+        **kwargs: Optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [(5.1, 3.5, 0), (4.9, 3.0, 0), (7.0, 3.2, 1), (6.4, 3.2, 1), (5.9, 3.0, 2)]
+        >>> columns = ['length', 'width', 'species']
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.scatter(x='length', y='width')  # doctest: +SKIP
+        """
+        return self(kind="scatter", x=x, y=y, **kwargs)
+
+    def area(self, x: str, y: Union[str, list[str]], **kwargs: Any) -> "Figure":
+        """
+        Draw a stacked area plot.
+
+        An area plot displays quantitative data visually.
+
+        Parameters
+        ----------
+        x : str
+            Name of column to use for the horizontal axis.
+        y : str or list of str
+            Name(s) of the column(s) to plot.
+        **kwargs: Optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> from datetime import datetime
+        >>> data = [
+        ...     (3, 5, 20, datetime(2018, 1, 31)),
+        ...     (2, 5, 42, datetime(2018, 2, 28)),
+        ...     (3, 6, 28, datetime(2018, 3, 31)),
+        ...     (9, 12, 62, datetime(2018, 4, 30))
+        ... ]
+        >>> columns = ["sales", "signups", "visits", "date"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.area(x='date', y=['sales', 'signups', 'visits'])  # doctest: +SKIP
+        """
+        return self(kind="area", x=x, y=y, **kwargs)
+
+    def pie(self, x: str, y: str, **kwargs: Any) -> "Figure":
+        """
+        Generate a pie plot.
+
+        A pie plot is a proportional representation of the numerical data in a
+        column.
+
+        Parameters
+        ----------
+        x : str
+            Name of column to be used as the category labels for the pie plot.
+        y : str
+            Name of the column to plot.
+        **kwargs
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> from datetime import datetime
+        >>> data = [
+        ...     (3, 5, 20, datetime(2018, 1, 31)),
+        ...     (2, 5, 42, datetime(2018, 2, 28)),
+        ...     (3, 6, 28, datetime(2018, 3, 31)),
+        ...     (9, 12, 62, datetime(2018, 4, 30))
+        ... ]
+        >>> columns = ["sales", "signups", "visits", "date"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.pie(x='date', y='sales')  # doctest: +SKIP
+        """
+        schema = self.data.schema
+
+        # Check if 'y' is a numerical column
+        y_field = schema[y] if y in schema.names else None
+        if y_field is None or not isinstance(y_field.dataType, NumericType):
+            raise PySparkTypeError(
+                errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
+                messageParameters={
+                    "arg_name": "y",
+                    "arg_type": str(y_field.dataType.__class__.__name__) if y_field else "None",
+                },
+            )
+        return self(kind="pie", x=x, y=y, **kwargs)
+
+    def box(self, column: Optional[Union[str, List[str]]] = None, **kwargs: Any) -> "Figure":
+        """
+        Make a box plot of the DataFrame columns.
+
+        Make a box-and-whisker plot from DataFrame columns, optionally grouped by some
+        other columns. A box plot is a method for graphically depicting groups of numerical
+        data through their quartiles. The box extends from the Q1 to Q3 quartile values of
+        the data, with a line at the median (Q2). The whiskers extend from the edges of box
+        to show the range of the data. By default, they extend no more than
+        1.5 * IQR (IQR = Q3 - Q1) from the edges of the box, ending at the farthest data point
+        within that interval. Outliers are plotted as separate dots.
+
+        Parameters
+        ----------
+        column: str or list of str, optional
+            Column name or list of names to be used for creating the box plot.
+            If None (default), all numeric columns will be used.
+        **kwargs
+            Extra arguments to `precision`: refer to a float that is used by
+            pyspark to compute approximate statistics for building a boxplot.
+            The default value is 0.01. Use smaller values to get more precise statistics.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [
+        ...     ("A", 50, 55),
+        ...     ("B", 55, 60),
+        ...     ("C", 60, 65),
+        ...     ("D", 65, 70),
+        ...     ("E", 70, 75),
+        ...     ("F", 10, 15),
+        ...     ("G", 85, 90),
+        ...     ("H", 5, 150),
+        ... ]
+        >>> columns = ["student", "math_score", "english_score"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.box()  # doctest: +SKIP
+        >>> df.plot.box(column="math_score")  # doctest: +SKIP
+        >>> df.plot.box(column=["math_score", "english_score"])  # doctest: +SKIP
+        """
+        return self(kind="box", column=column, **kwargs)
+
+    def kde(
+        self,
+        bw_method: Union[int, float],
+        column: Optional[Union[str, List[str]]] = None,
+        ind: Optional[Union[Sequence[float], int]] = None,
+        **kwargs: Any,
+    ) -> "Figure":
+        """
+        Generate Kernel Density Estimate plot using Gaussian kernels.
+
+        In statistics, kernel density estimation (KDE) is a non-parametric way to
+        estimate the probability density function (PDF) of a random variable. This
+        function uses Gaussian kernels and includes automatic bandwidth determination.
+
+        Parameters
+        ----------
+        bw_method : int or float
+            The method used to calculate the estimator bandwidth.
+            See KernelDensity in PySpark for more information.
+        column: str or list of str, optional
+            Column name or list of names to be used for creating the kde plot.
+            If None (default), all numeric columns will be used.
+        ind : List of float, NumPy array or integer, optional
+            Evaluation points for the estimated PDF. If None (default),
+            1000 equally spaced points are used. If `ind` is a NumPy array, the
+            KDE is evaluated at the points passed. If `ind` is an integer,
+            `ind` number of equally spaced points are used.
+        **kwargs : optional
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [(5.1, 3.5, 0), (4.9, 3.0, 0), (7.0, 3.2, 1), (6.4, 3.2, 1), (5.9, 3.0, 2)]
+        >>> columns = ["length", "width", "species"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.kde(bw_method=0.3)  # doctest: +SKIP
+        >>> df.plot.kde(column=["length", "width"], bw_method=0.3)  # doctest: +SKIP
+        >>> df.plot.kde(column="length", bw_method=0.3)  # doctest: +SKIP
+        """
+        return self(kind="kde", column=column, bw_method=bw_method, ind=ind, **kwargs)
+
+    def hist(
+        self, column: Optional[Union[str, List[str]]] = None, bins: int = 10, **kwargs: Any
+    ) -> "Figure":
+        """
+        Draw one histogram of the DataFrame’s columns.
+
+        A `histogram`_ is a representation of the distribution of data.
+
+        .. _histogram: https://en.wikipedia.org/wiki/Histogram
+
+        Parameters
+        ----------
+        column: str or list of str, optional
+            Column name or list of names to be used for creating the hostogram plot.
+            If None (default), all numeric columns will be used.
+        bins : integer, default 10
+            Number of histogram bins to be used.
+        **kwargs
+            Additional keyword arguments.
+
+        Returns
+        -------
+        :class:`plotly.graph_objs.Figure`
+
+        Examples
+        --------
+        >>> data = [(5.1, 3.5, 0), (4.9, 3.0, 0), (7.0, 3.2, 1), (6.4, 3.2, 1), (5.9, 3.0, 2)]
+        >>> columns = ["length", "width", "species"]
+        >>> df = spark.createDataFrame(data, columns)
+        >>> df.plot.hist(bins=4)  # doctest: +SKIP
+        >>> df.plot.hist(column=["length", "width"])  # doctest: +SKIP
+        >>> df.plot.hist(column="length", bins=4)  # doctest: +SKIP
+        """
+        return self(kind="hist", column=column, bins=bins, **kwargs)
+
+
+class PySparkKdePlotBase:
+    @staticmethod
+    def get_ind(sdf: "DataFrame", ind: Optional[Union[Sequence[float], int]]) -> Sequence[float]:
+        def calc_min_max() -> "Row":
+            if len(sdf.columns) > 1:
+                min_col = F.least(*map(F.min, sdf))  # type: ignore
+                max_col = F.greatest(*map(F.max, sdf))  # type: ignore
+            else:
+                min_col = F.min(sdf.columns[-1])
+                max_col = F.max(sdf.columns[-1])
+            return sdf.select(min_col, max_col).first()  # type: ignore
+
+        if ind is None:
+            min_val, max_val = calc_min_max()
+            sample_range = max_val - min_val
+            ind = NumpyHelper.linspace(
+                min_val - 0.5 * sample_range,
+                max_val + 0.5 * sample_range,
+                1000,
+            )
+        elif isinstance(ind, int):
+            min_val, max_val = calc_min_max()
+            sample_range = max_val - min_val
+            ind = NumpyHelper.linspace(
+                min_val - 0.5 * sample_range,
+                max_val + 0.5 * sample_range,
+                ind,
+            )
+        return ind
+
+    @staticmethod
+    def compute_kde_col(
+        input_col: Column,
+        bw_method: Union[int, float],
+        ind: Sequence[float],
+    ) -> Column:
+        # refers to org.apache.spark.mllib.stat.KernelDensity
+        assert bw_method is not None and isinstance(
+            bw_method, (int, float)
+        ), "'bw_method' must be set as a scalar number."
+
+        assert ind is not None, "'ind' must be a scalar array."
+
+        bandwidth = float(bw_method)
+        log_std_plus_half_log2_pi = math.log(bandwidth) + 0.5 * math.log(2 * math.pi)
+
+        def norm_pdf(
+            mean: Column,
+            std: Column,
+            log_std_plus_half_log2_pi: Column,
+            x: Column,
+        ) -> Column:
+            x0 = x - mean
+            x1 = x0 / std
+            log_density = -0.5 * x1 * x1 - log_std_plus_half_log2_pi
+            return F.exp(log_density)
+
+        return F.array(
+            [
+                F.avg(
+                    norm_pdf(
+                        input_col.cast("double"),
+                        F.lit(bandwidth),
+                        F.lit(log_std_plus_half_log2_pi),
+                        F.lit(point),
+                    )
+                )
+                for point in ind
+            ]
+        )
+
+
+class PySparkHistogramPlotBase:
+    @staticmethod
+    def get_bins(sdf: "DataFrame", bins: int) -> Sequence[float]:
+        if len(sdf.columns) > 1:
+            min_col = F.least(*map(F.min, sdf))  # type: ignore
+            max_col = F.greatest(*map(F.max, sdf))  # type: ignore
+        else:
+            min_col = F.min(sdf.columns[-1])
+            max_col = F.max(sdf.columns[-1])
+        boundaries = sdf.select(min_col, max_col).first()
+
+        if boundaries[0] == boundaries[1]:  # type: ignore
+            boundaries = (boundaries[0] - 0.5, boundaries[1] + 0.5)  # type: ignore
+
+        return NumpyHelper.linspace(boundaries[0], boundaries[1], bins + 1)  # type: ignore
+
+    @staticmethod
+    def compute_hist(sdf: "DataFrame", bins: Sequence[float]) -> List["pd.Series"]:
+        require_minimum_pandas_version()
+
+        assert isinstance(bins, list)
+
+        spark = sdf._session
+        assert spark is not None
+
+        # 1. Make the bucket output flat to:
+        #     +----------+--------+
+        #     |__group_id|__bucket|
+        #     +----------+--------+
+        #     |0         |0       |
+        #     |0         |0       |
+        #     |0         |1       |
+        #     |0         |2       |
+        #     |0         |3       |
+        #     |0         |3       |
+        #     |1         |0       |
+        #     |1         |1       |
+        #     |1         |1       |
+        #     |1         |2       |
+        #     |1         |1       |
+        #     |1         |0       |
+        #     +----------+--------+
+        colnames = sdf.columns
+
+        # determines which bucket a given value falls into, based on predefined bin intervals
+        # refers to org.apache.spark.ml.feature.Bucketizer#binarySearchForBuckets
+        def binary_search_for_buckets(value: Column) -> Column:
+            index = SF.array_binary_search(F.lit(bins), value)
+            bucket = F.when(index >= 0, index).otherwise(-index - 2)
+            unboundErrMsg = F.lit(f"value %s out of the bins bounds: [{bins[0]}, {bins[-1]}]")
+            return (
+                F.when(value == F.lit(bins[-1]), F.lit(len(bins) - 2))
+                .when(value.between(F.lit(bins[0]), F.lit(bins[-1])), bucket)
+                .otherwise(F.raise_error(F.printf(unboundErrMsg, value)))
+            )
+
+        output_df = (
+            sdf.select(
+                F.posexplode(
+                    F.array([F.col(colname).cast("double") for colname in colnames])
+                ).alias("__group_id", "__value")
+            )
+            .where(F.col("__value").isNotNull() & ~F.col("__value").isNaN())
+            .select(
+                F.col("__group_id"),
+                binary_search_for_buckets(F.col("__value")).alias("__bucket"),
+            )
+        )
+
+        # 2. Calculate the count based on each group and bucket, also fill empty bins.
+        #     +----------+--------+------+
+        #     |__group_id|__bucket| count|
+        #     +----------+--------+------+
+        #     |0         |0       |2     |
+        #     |0         |1       |1     |
+        #     |0         |2       |1     |
+        #     |0         |3       |2     |
+        #     |1         |0       |2     |
+        #     |1         |1       |3     |
+        #     |1         |2       |1     |
+        #     |1         |3       |0     | <- fill empty bins with zeros (by joining with bin_df)
+        #     +----------+--------+------+
+        output_df = output_df.groupby("__group_id", "__bucket").agg(F.count("*").alias("count"))
+
+        # Generate all possible combinations of group id and bucket
+        bin_df = (
+            spark.range(len(colnames))
+            .select(
+                F.col("id").alias("__group_id"),
+                F.explode(F.lit(list(range(len(bins) - 1)))).alias("__bucket"),
+            )
+            .hint("broadcast")
+        )
+
+        output_df = (
+            bin_df.join(output_df, ["__group_id", "__bucket"], "left")
+            .select("__group_id", "__bucket", F.nvl(F.col("count"), F.lit(0)).alias("count"))
+            .coalesce(1)
+            .sortWithinPartitions("__group_id", "__bucket")
+            .select("__group_id", "count")
+        )
+
+        # 3. Calculate based on each group id. From:
+        #     +----------+--------+------+
+        #     |__group_id|__bucket| count|
+        #     +----------+--------+------+
+        #     |0         |0       |2     |
+        #     |0         |1       |1     |
+        #     |0         |2       |1     |
+        #     |0         |3       |2     |
+        #     +----------+--------+------+
+        #     +----------+--------+------+
+        #     |__group_id|__bucket| count|
+        #     +----------+--------+------+
+        #     |1         |0       |2     |
+        #     |1         |1       |3     |
+        #     |1         |2       |1     |
+        #     |1         |3       |0     |
+        #     +----------+--------+------+
+        #
+        # to:
+        #     +-----------------+
+        #     |__values1__bucket|
+        #     +-----------------+
+        #     |2                |
+        #     |1                |
+        #     |1                |
+        #     |2                |
+        #     |0                |
+        #     +-----------------+
+        #     +-----------------+
+        #     |__values2__bucket|
+        #     +-----------------+
+        #     |2                |
+        #     |3                |
+        #     |1                |
+        #     |0                |
+        #     |0                |
+        #     +-----------------+
+        result = output_df.toPandas()
+        output_series = []
+        for i, input_column_name in enumerate(colnames):
+            pdf = result[result["__group_id"] == i]
+            pdf = pdf[["count"]]
+            pdf.columns = [input_column_name]
+            output_series.append(pdf[input_column_name])
+
+        return output_series
+
+
+class PySparkBoxPlotBase:
+    @staticmethod
+    def compute_box(
+        sdf: "DataFrame", colnames: List[str], whis: float, precision: float, showfliers: bool
+    ) -> Optional["Row"]:
+        assert len(colnames) > 0
+        formatted_colnames = ["`{}`".format(colname) for colname in colnames]
+
+        stats_scols = []
+        for i, colname in enumerate(formatted_colnames):
+            percentiles = F.percentile_approx(colname, [0.25, 0.50, 0.75], int(1.0 / precision))
+            q1 = F.get(percentiles, 0)
+            med = F.get(percentiles, 1)
+            q3 = F.get(percentiles, 2)
+            iqr = q3 - q1
+            lfence = q1 - F.lit(whis) * iqr
+            ufence = q3 + F.lit(whis) * iqr
+
+            stats_scols.append(
+                F.struct(
+                    F.mean(colname).alias("mean"),
+                    med.alias("med"),
+                    q1.alias("q1"),
+                    q3.alias("q3"),
+                    lfence.alias("lfence"),
+                    ufence.alias("ufence"),
+                ).alias(f"_box_plot_stats_{i}")
+            )
+
+        sdf_stats = sdf.select(*stats_scols)
+
+        result_scols = []
+        for i, colname in enumerate(formatted_colnames):
+            value = F.col(colname)
+
+            lfence = F.col(f"_box_plot_stats_{i}.lfence")
+            ufence = F.col(f"_box_plot_stats_{i}.ufence")
+            mean = F.col(f"_box_plot_stats_{i}.mean")
+            med = F.col(f"_box_plot_stats_{i}.med")
+            q1 = F.col(f"_box_plot_stats_{i}.q1")
+            q3 = F.col(f"_box_plot_stats_{i}.q3")
+
+            outlier = ~value.between(lfence, ufence)
+
+            # Computes min and max values of non-outliers - the whiskers
+            upper_whisker = F.max(F.when(~outlier, value).otherwise(F.lit(None)))
+            lower_whisker = F.min(F.when(~outlier, value).otherwise(F.lit(None)))
+
+            # If it shows fliers, take the top 1k with the highest absolute values
+            # Here we normalize the values by subtracting the median.
+            if showfliers:
+                pair = F.when(
+                    outlier,
+                    F.struct(F.abs(value - med), value.alias("val")),
+                ).otherwise(F.lit(None))
+                topk = SF.collect_top_k(pair, 1001, False)
+                fliers = F.when(F.size(topk) > 0, topk["val"]).otherwise(F.lit(None))
+            else:
+                fliers = F.lit(None)
+
+            result_scols.append(
+                F.struct(
+                    F.first(mean).alias("mean"),
+                    F.first(med).alias("med"),
+                    F.first(q1).alias("q1"),
+                    F.first(q3).alias("q3"),
+                    upper_whisker.alias("upper_whisker"),
+                    lower_whisker.alias("lower_whisker"),
+                    fliers.alias("fliers"),
+                ).alias(f"_box_plot_results_{i}")
+            )
+
+        sdf_result = sdf.join(sdf_stats.hint("broadcast")).select(*result_scols)
+        return sdf_result.first()
diff --git a/python/pyspark/sql/plot/plotly.py b/python/pyspark/sql/plot/plotly.py
new file mode 100644
index 0000000000000..959562b43552a
--- /dev/null
+++ b/python/pyspark/sql/plot/plotly.py
@@ -0,0 +1,249 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import inspect
+from typing import TYPE_CHECKING, Any, List, Optional, Union
+
+from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.sql.plot import (
+    PySparkPlotAccessor,
+    PySparkBoxPlotBase,
+    PySparkKdePlotBase,
+    PySparkHistogramPlotBase,
+)
+from pyspark.sql.types import NumericType
+
+if TYPE_CHECKING:
+    from pyspark.sql import DataFrame
+    from plotly.graph_objs import Figure
+
+
+def plot_pyspark(data: "DataFrame", kind: str, **kwargs: Any) -> "Figure":
+    import plotly
+
+    if kind == "pie":
+        return plot_pie(data, **kwargs)
+    if kind == "box":
+        return plot_box(data, **kwargs)
+    if kind == "kde" or kind == "density":
+        return plot_kde(data, **kwargs)
+    if kind == "hist":
+        return plot_histogram(data, **kwargs)
+
+    return plotly.plot(PySparkPlotAccessor.plot_data_map[kind](data), kind, **kwargs)
+
+
+def plot_pie(data: "DataFrame", **kwargs: Any) -> "Figure":
+    # TODO(SPARK-49530): Support pie subplots with plotly backend
+    from plotly import express
+
+    pdf = PySparkPlotAccessor.plot_data_map["pie"](data)
+    x = kwargs.pop("x", None)
+    y = kwargs.pop("y", None)
+    fig = express.pie(pdf, values=y, names=x, **kwargs)
+
+    return fig
+
+
+def plot_box(data: "DataFrame", **kwargs: Any) -> "Figure":
+    import plotly.graph_objs as go
+
+    # 'whis' isn't actually an argument in plotly (but in matplotlib). But seems like
+    # plotly doesn't expose the reach of the whiskers to the beyond the first and
+    # third quartiles (?). Looks they use default 1.5.
+    whis = kwargs.pop("whis", 1.5)
+    # 'precision' is pyspark specific to control precision for approx_percentile
+    precision = kwargs.pop("precision", 0.01)
+    colnames = process_column_param(kwargs.pop("column", None), data)
+
+    # Plotly options
+    boxpoints = kwargs.pop("boxpoints", "suspectedoutliers")
+    notched = kwargs.pop("notched", False)
+    if boxpoints not in ["suspectedoutliers", False]:
+        raise PySparkValueError(
+            errorClass="UNSUPPORTED_PLOT_BACKEND_PARAM",
+            messageParameters={
+                "backend": "plotly",
+                "param": "boxpoints",
+                "value": str(boxpoints),
+                "supported_values": ", ".join(["suspectedoutliers", "False"]),
+            },
+        )
+    if notched:
+        raise PySparkValueError(
+            errorClass="UNSUPPORTED_PLOT_BACKEND_PARAM",
+            messageParameters={
+                "backend": "plotly",
+                "param": "notched",
+                "value": str(notched),
+                "supported_values": ", ".join(["False"]),
+            },
+        )
+
+    fig = go.Figure()
+
+    results = PySparkBoxPlotBase.compute_box(
+        data,
+        colnames,
+        whis,
+        precision,
+        boxpoints is not None,
+    )
+    assert len(results) == len(colnames)  # type: ignore
+
+    for i, colname in enumerate(colnames):
+        result = results[i]  # type: ignore
+
+        fig.add_trace(
+            go.Box(
+                x=[i],
+                name=colname,
+                q1=[result["q1"]],
+                median=[result["med"]],
+                q3=[result["q3"]],
+                mean=[result["mean"]],
+                lowerfence=[result["lower_whisker"]],
+                upperfence=[result["upper_whisker"]],
+                y=[result["fliers"]] if result["fliers"] else None,
+                boxpoints=boxpoints,
+                notched=notched,
+                **kwargs,
+            )
+        )
+
+    fig["layout"]["yaxis"]["title"] = "value"
+    return fig
+
+
+def plot_kde(data: "DataFrame", **kwargs: Any) -> "Figure":
+    from pyspark.sql.utils import has_numpy
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+
+    require_minimum_pandas_version()
+
+    import pandas as pd
+    from plotly import express
+
+    if "color" not in kwargs:
+        kwargs["color"] = "names"
+
+    bw_method = kwargs.pop("bw_method", None)
+    colnames = process_column_param(kwargs.pop("column", None), data)
+    ind = PySparkKdePlotBase.get_ind(data.select(*colnames), kwargs.pop("ind", None))
+
+    if has_numpy:
+        import numpy as np
+
+        if isinstance(ind, np.ndarray):
+            ind = [float(i) for i in ind]
+
+    kde_cols = [
+        PySparkKdePlotBase.compute_kde_col(
+            input_col=data[col_name],
+            ind=ind,
+            bw_method=bw_method,
+        ).alias(f"kde_{i}")
+        for i, col_name in enumerate(colnames)
+    ]
+    kde_results = data.select(*kde_cols).first()
+    pdf = pd.concat(
+        [
+            pd.DataFrame(  # type: ignore
+                {
+                    "Density": kde_result,
+                    "names": col_name,
+                    "index": ind,
+                }
+            )
+            for col_name, kde_result in zip(colnames, list(kde_results))  # type: ignore[arg-type]
+        ]
+    )
+    fig = express.line(pdf, x="index", y="Density", **kwargs)
+    fig["layout"]["xaxis"]["title"] = None
+    return fig
+
+
+def plot_histogram(data: "DataFrame", **kwargs: Any) -> "Figure":
+    import plotly.graph_objs as go
+
+    bins = kwargs.get("bins", 10)
+    colnames = process_column_param(kwargs.pop("column", None), data)
+    numeric_data = data.select(*colnames)
+    bins = PySparkHistogramPlotBase.get_bins(numeric_data, bins)
+    assert len(bins) > 2, "the number of buckets must be higher than 2."
+    output_series = PySparkHistogramPlotBase.compute_hist(numeric_data, bins)
+    prev = float("%.9f" % bins[0])  # to make it prettier, truncate.
+    text_bins = []
+    for b in bins[1:]:
+        norm_b = float("%.9f" % b)
+        text_bins.append("[%s, %s)" % (prev, norm_b))
+        prev = norm_b
+    text_bins[-1] = text_bins[-1][:-1] + "]"  # replace ) to ] for the last bucket.
+
+    bins = [(bins[i] + bins[i + 1]) / 2 for i in range(0, len(bins) - 1)]
+    output_series = list(output_series)
+    bars = []
+    for series in output_series:
+        bars.append(
+            go.Bar(
+                x=bins,
+                y=series,
+                name=series.name,
+                text=text_bins,
+                hovertemplate=("variable=" + str(series.name) + "<br>value=%{text}<br>count=%{y}"),
+            )
+        )
+
+    layout_keys = inspect.signature(go.Layout).parameters.keys()
+    layout_kwargs = {k: v for k, v in kwargs.items() if k in layout_keys}
+
+    fig = go.Figure(data=bars, layout=go.Layout(**layout_kwargs))
+    fig["layout"]["barmode"] = "stack"
+    fig["layout"]["xaxis"]["title"] = "value"
+    fig["layout"]["yaxis"]["title"] = "count"
+    return fig
+
+
+def process_column_param(column: Optional[Union[str, List[str]]], data: "DataFrame") -> List[str]:
+    """
+    Processes the provided column parameter for a DataFrame.
+    - If `column` is None, returns a list of numeric columns from the DataFrame.
+    - If `column` is a string, converts it to a list first.
+    - If `column` is a list, it checks if all specified columns exist in the DataFrame
+      and are of NumericType.
+    - Raises a PySparkTypeError if any column in the list is not present in the DataFrame
+      or is not of NumericType.
+    """
+    if column is None:
+        return [
+            field.name for field in data.schema.fields if isinstance(field.dataType, NumericType)
+        ]
+    if isinstance(column, str):
+        column = [column]
+
+    for col in column:
+        field = next((f for f in data.schema.fields if f.name == col), None)
+        if not field or not isinstance(field.dataType, NumericType):
+            raise PySparkTypeError(
+                errorClass="PLOT_INVALID_TYPE_COLUMN",
+                messageParameters={
+                    "col_name": col,
+                    "valid_types": NumericType.__name__,
+                    "col_type": field.dataType.__class__.__name__ if field else "None",
+                },
+            )
+    return column
diff --git a/python/pyspark/sql/profiler.py b/python/pyspark/sql/profiler.py
index 9eaf1f264a5b2..6924cde9a292a 100644
--- a/python/pyspark/sql/profiler.py
+++ b/python/pyspark/sql/profiler.py
@@ -315,6 +315,12 @@ def show(self, id: Optional[int] = None, *, type: Optional[str] = None) -> None:
             A UDF ID to be shown. If not specified, all the results will be shown.
         type : str, optional
             The profiler type, which can be either "perf" or "memory".
+
+        Notes
+        -----
+        The results are gathered from all Python executions. For example, if there are
+        8 tasks, each processing 1,000 rows, the total output will display the results
+        for 8,000 rows.
         """
         if type == "memory":
             self.profiler_collector.show_memory_profiles(id)
@@ -324,8 +330,8 @@ def show(self, id: Optional[int] = None, *, type: Optional[str] = None) -> None:
                 self.profiler_collector.show_memory_profiles(id)
         else:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "type",
                     "allowed_values": str(["perf", "memory"]),
                 },
@@ -354,8 +360,8 @@ def dump(self, path: str, id: Optional[int] = None, *, type: Optional[str] = Non
                 self.profiler_collector.dump_memory_profiles(path, id)
         else:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "type",
                     "allowed_values": str(["perf", "memory"]),
                 },
@@ -415,8 +421,8 @@ def render(
             result = self.profiler_collector._memory_profile_results.get(id)
         else:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "type",
                     "allowed_values": str(["perf", "memory"]),
                 },
@@ -429,8 +435,8 @@ def render(
             render = renderer
         if render is None:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "(type, renderer)",
                     "allowed_values": str(list(_renderers.keys())),
                 },
@@ -461,8 +467,8 @@ def clear(self, id: Optional[int] = None, *, type: Optional[str] = None) -> None
                 self.profiler_collector.clear_memory_profiles(id)
         else:
             raise PySparkValueError(
-                error_class="VALUE_NOT_ALLOWED",
-                message_parameters={
+                errorClass="VALUE_NOT_ALLOWED",
+                messageParameters={
                     "arg_name": "type",
                     "allowed_values": str(["perf", "memory"]),
                 },
@@ -474,8 +480,8 @@ def _render_flameprof(stats: pstats.Stats) -> Any:
         from flameprof import render
     except ImportError:
         raise PySparkValueError(
-            error_class="PACKAGE_NOT_INSTALLED",
-            message_parameters={"package_name": "flameprof", "minimum_version": "0.4"},
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={"package_name": "flameprof", "minimum_version": "0.4"},
         )
 
     buf = StringIO()
diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py
index 9d05d85da0131..4744bdf861d37 100644
--- a/python/pyspark/sql/readwriter.py
+++ b/python/pyspark/sql/readwriter.py
@@ -18,7 +18,6 @@
 from typing import cast, overload, Dict, Iterable, List, Optional, Tuple, TYPE_CHECKING, Union
 
 from pyspark.util import is_remote_only
-from pyspark.sql.column import Column
 from pyspark.sql.types import StructType
 from pyspark.sql import utils
 from pyspark.sql.utils import to_str
@@ -155,8 +154,8 @@ def schema(self, schema: Union[StructType, str]) -> "DataFrameReader":
             self._jreader = self._jreader.schema(schema)
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_STRUCT",
-                message_parameters={
+                errorClass="NOT_STR_OR_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
@@ -345,6 +344,7 @@ def json(
         modifiedBefore: Optional[Union[bool, str]] = None,
         modifiedAfter: Optional[Union[bool, str]] = None,
         allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+        useUnsafeRow: Optional[Union[bool, str]] = None,
     ) -> "DataFrame":
         """
         Loads JSON files and returns the results as a :class:`DataFrame`.
@@ -459,6 +459,7 @@ def json(
             modifiedBefore=modifiedBefore,
             modifiedAfter=modifiedAfter,
             allowNonNumericNumbers=allowNonNumericNumbers,
+            useUnsafeRow=useUnsafeRow,
         )
         if isinstance(path, str):
             path = [path]
@@ -486,8 +487,8 @@ def func(iterator: Iterable) -> Iterable:
             return self._df(self._jreader.json(jrdd))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_LIST_OF_RDD",
-                message_parameters={
+                errorClass="NOT_STR_OR_LIST_OF_RDD",
+                messageParameters={
                     "arg_name": "path",
                     "arg_type": type(path).__name__,
                 },
@@ -862,8 +863,8 @@ def func(iterator):
             return self._df(self._jreader.csv(jdataset))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_LIST_OF_RDD",
-                message_parameters={
+                errorClass="NOT_STR_OR_LIST_OF_RDD",
+                messageParameters={
                     "arg_name": "path",
                     "arg_type": type(path).__name__,
                 },
@@ -989,8 +990,8 @@ def func(iterator: Iterable) -> Iterable:
             return self._df(self._jreader.xml(jdataset))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_LIST_OF_RDD",
-                message_parameters={
+                errorClass="NOT_STR_OR_LIST_OF_RDD",
+                messageParameters={
                     "arg_name": "path",
                     "arg_type": type(path).__name__,
                 },
@@ -1513,8 +1514,8 @@ def bucketBy(
 
         if not isinstance(numBuckets, int):
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={
+                errorClass="NOT_INT",
+                messageParameters={
                     "arg_name": "numBuckets",
                     "arg_type": type(numBuckets).__name__,
                 },
@@ -1523,8 +1524,8 @@ def bucketBy(
         if isinstance(col, (list, tuple)):
             if cols:
                 raise PySparkValueError(
-                    error_class="CANNOT_SET_TOGETHER",
-                    message_parameters={
+                    errorClass="CANNOT_SET_TOGETHER",
+                    messageParameters={
                         "arg_list": f"`col` of type {type(col).__name__} and `cols`",
                     },
                 )
@@ -1534,16 +1535,16 @@ def bucketBy(
         for c in cols:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={
                         "arg_name": "cols",
                         "arg_type": type(c).__name__,
                     },
                 )
         if not isinstance(col, str):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_STR",
-                message_parameters={
+                errorClass="NOT_LIST_OF_STR",
+                messageParameters={
                     "arg_name": "col",
                     "arg_type": type(col).__name__,
                 },
@@ -1607,8 +1608,8 @@ def sortBy(
         if isinstance(col, (list, tuple)):
             if cols:
                 raise PySparkValueError(
-                    error_class="CANNOT_SET_TOGETHER",
-                    message_parameters={
+                    errorClass="CANNOT_SET_TOGETHER",
+                    messageParameters={
                         "arg_list": f"`col` of type {type(col).__name__} and `cols`",
                     },
                 )
@@ -1618,16 +1619,16 @@ def sortBy(
         for c in cols:
             if not isinstance(c, str):
                 raise PySparkTypeError(
-                    error_class="NOT_LIST_OF_STR",
-                    message_parameters={
+                    errorClass="NOT_LIST_OF_STR",
+                    messageParameters={
                         "arg_name": "cols",
                         "arg_type": type(c).__name__,
                     },
                 )
         if not isinstance(col, str):
             raise PySparkTypeError(
-                error_class="NOT_LIST_OF_STR",
-                message_parameters={
+                errorClass="NOT_LIST_OF_STR",
+                messageParameters={
                     "arg_name": "col",
                     "arg_type": type(col).__name__,
                 },
@@ -1638,6 +1639,42 @@ def sortBy(
         )
         return self
 
+    @overload
+    def clusterBy(self, *cols: str) -> "DataFrameWriter":
+        ...
+
+    @overload
+    def clusterBy(self, *cols: List[str]) -> "DataFrameWriter":
+        ...
+
+    def clusterBy(self, *cols: Union[str, List[str]]) -> "DataFrameWriter":
+        """Clusters the data by the given columns to optimize query performance.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        cols : str or list
+            name of columns
+
+        Examples
+        --------
+        Write a DataFrame into a Parquet file with clustering.
+
+        >>> import tempfile
+        >>> with tempfile.TemporaryDirectory(prefix="clusterBy") as d:
+        ...     spark.createDataFrame(
+        ...         [{"age": 100, "name": "Hyukjin Kwon"}, {"age": 120, "name": "Ruifeng Zheng"}]
+        ...     ).write.clusterBy("name").mode("overwrite").format("parquet").save(d)
+        """
+        from pyspark.sql.classic.column import _to_seq
+
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]  # type: ignore[assignment]
+        assert len(cols) > 0, "clusterBy needs one or more clustering columns."
+        self._jwrite = self._jwrite.clusterBy(cols[0], _to_seq(self._spark._sc, cols[1:]))
+        return self
+
     def save(
         self,
         path: Optional[str] = None,
@@ -2362,7 +2399,7 @@ def tableProperty(self, property: str, value: str) -> "DataFrameWriterV2":
         self._jwriter.tableProperty(property, value)
         return self
 
-    def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
+    def partitionedBy(self, col: "ColumnOrName", *cols: "ColumnOrName") -> "DataFrameWriterV2":
         """
         Partition the output table created by `create`, `createOrReplace`, or `replace` using
         the given columns or transforms.
@@ -2397,6 +2434,15 @@ def partitionedBy(self, col: Column, *cols: Column) -> "DataFrameWriterV2":
         self._jwriter.partitionedBy(col, cols)
         return self
 
+    def clusterBy(self, col: str, *cols: str) -> "DataFrameWriterV2":
+        """
+        Clusters the data by the given columns to optimize query performance.
+        """
+        from pyspark.sql.classic.column import _to_seq
+
+        self._jwriter.clusterBy(col, _to_seq(self._spark._sc, cols))
+        return self
+
     def create(self) -> None:
         """
         Create a new table from the contents of the data frame.
@@ -2440,7 +2486,7 @@ def append(self) -> None:
         """
         self._jwriter.append()
 
-    def overwrite(self, condition: Column) -> None:
+    def overwrite(self, condition: "ColumnOrName") -> None:
         """
         Overwrite rows matching the given filter condition with the contents of the data frame in
         the output table.
diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py
index d6fb4b60d90a9..d19e01eecc89c 100644
--- a/python/pyspark/sql/session.py
+++ b/python/pyspark/sql/session.py
@@ -62,7 +62,12 @@
     _from_numpy_type,
 )
 from pyspark.errors.exceptions.captured import install_exception_handler
-from pyspark.sql.utils import is_timestamp_ntz_preferred, to_str, try_remote_session_classmethod
+from pyspark.sql.utils import (
+    is_timestamp_ntz_preferred,
+    to_str,
+    try_remote_session_classmethod,
+    remote_only,
+)
 from pyspark.errors import PySparkValueError, PySparkTypeError, PySparkRuntimeError
 
 if TYPE_CHECKING:
@@ -74,21 +79,17 @@
     from pyspark.sql.catalog import Catalog
     from pyspark.sql.pandas._typing import ArrayLike, DataFrameLike as PandasDataFrameLike
     from pyspark.sql.streaming import StreamingQueryManager
+    from pyspark.sql.tvf import TableValuedFunction
     from pyspark.sql.udf import UDFRegistration
     from pyspark.sql.udtf import UDTFRegistration
     from pyspark.sql.datasource import DataSourceRegistration
+    from pyspark.sql.dataframe import DataFrame as ParentDataFrame
 
     # Running MyPy type checks will always require pandas and
     # other dependencies so importing here is fine.
     from pyspark.sql.connect.client import SparkConnectClient
     from pyspark.sql.connect.shell.progress import ProgressHandler
 
-try:
-    import memory_profiler  # noqa: F401
-
-    has_memory_profiler = True
-except Exception:
-    has_memory_profiler = False
 
 __all__ = ["SparkSession"]
 
@@ -137,15 +138,6 @@ def toDF(self, schema=None, sampleRatio=None):
         RDD.toDF = toDF  # type: ignore[method-assign]
 
 
-# TODO(SPARK-38912): This method can be dropped once support for Python 3.8 is dropped
-# In Python 3.9, the @property decorator has been made compatible with the
-# @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
-#
-# @classmethod + @property is also affected by a bug in Python's docstring which was backported
-# to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
-#
-# Python 3.9 with MyPy complains about @classmethod + @property combination. We should fix
-# it together with MyPy.
 class classproperty(property):
     """Same as Python's @property decorator, but for class attributes.
 
@@ -325,8 +317,8 @@ def _validate_startup_urls(
                 "spark.remote" in self._options or "SPARK_REMOTE" in os.environ
             ):
                 raise PySparkRuntimeError(
-                    error_class="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
-                    message_parameters={
+                    errorClass="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
+                    messageParameters={
                         "master_url": self._options.get("spark.master", os.environ.get("MASTER")),
                         "connect_url": self._options.get(
                             "spark.remote", os.environ.get("SPARK_REMOTE")
@@ -340,8 +332,8 @@ def _validate_startup_urls(
                     "SPARK_LOCAL_REMOTE" in os.environ and not remote.startswith("local")
                 ):
                     raise PySparkRuntimeError(
-                        error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
-                        message_parameters={
+                        errorClass="CANNOT_CONFIGURE_SPARK_CONNECT",
+                        messageParameters={
                             "existing_url": os.environ["SPARK_REMOTE"],
                             "new_url": remote,
                         },
@@ -483,8 +475,8 @@ def getOrCreate(self) -> "SparkSession":
 
                 if url is None:
                     raise PySparkRuntimeError(
-                        error_class="CONNECT_URL_NOT_SET",
-                        message_parameters={},
+                        errorClass="CONNECT_URL_NOT_SET",
+                        messageParameters={},
                     )
 
                 os.environ["SPARK_CONNECT_MODE_ENABLED"] = "1"
@@ -510,8 +502,8 @@ def getOrCreate(self) -> "SparkSession":
 
                             if url is None:
                                 raise PySparkRuntimeError(
-                                    error_class="CONNECT_URL_NOT_SET",
-                                    message_parameters={},
+                                    errorClass="CONNECT_URL_NOT_SET",
+                                    messageParameters={},
                                 )
 
                             if url.startswith("local"):
@@ -535,8 +527,8 @@ def getOrCreate(self) -> "SparkSession":
                             )
                         else:
                             raise PySparkRuntimeError(
-                                error_class="SESSION_ALREADY_EXIST",
-                                message_parameters={},
+                                errorClass="SESSION_ALREADY_EXIST",
+                                messageParameters={},
                             )
 
                 session = SparkSession._instantiatedSession
@@ -556,6 +548,7 @@ def getOrCreate(self) -> "SparkSession":
                 return session
 
         # Spark Connect-specific API
+        @remote_only
         def create(self) -> "SparkSession":
             """Creates a new SparkSession. Can only be used in the context of Spark Connect
             and will throw an exception otherwise.
@@ -580,8 +573,8 @@ def create(self) -> "SparkSession":
                 url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE"))
                 if url.startswith("local"):
                     raise PySparkRuntimeError(
-                        error_class="UNSUPPORTED_LOCAL_CONNECTION_STRING",
-                        message_parameters={},
+                        errorClass="UNSUPPORTED_LOCAL_CONNECTION_STRING",
+                        messageParameters={},
                     )
 
                 # Mark this Spark Session as Spark Connect. This prevents that local PySpark is
@@ -591,19 +584,10 @@ def create(self) -> "SparkSession":
                 return cast(SparkSession, RemoteSparkSession.builder.config(map=opts).create())
             else:
                 raise PySparkRuntimeError(
-                    error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-                    message_parameters={"feature": "SparkSession.builder.create"},
+                    errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+                    messageParameters={"feature": "SparkSession.builder.create"},
                 )
 
-    # TODO(SPARK-38912): Replace classproperty with @classmethod + @property once support for
-    # Python 3.8 is dropped.
-    #
-    # In Python 3.9, the @property decorator has been made compatible with the
-    # @classmethod decorator (https://docs.python.org/3.9/library/functions.html#classmethod)
-    #
-    # @classmethod + @property is also affected by a bug in Python's docstring which was backported
-    # to Python 3.9.6 (https://github.com/python/cpython/pull/28838)
-    #
     # SPARK-47544: Explicitly declaring this as an identifier instead of a method.
     # If changing, make sure this bug is not reintroduced.
     builder: Builder = classproperty(lambda cls: cls.Builder())  # type: ignore
@@ -758,8 +742,8 @@ def active(cls) -> "SparkSession":
             session = cls._instantiatedSession
             if session is None:
                 raise PySparkRuntimeError(
-                    error_class="NO_ACTIVE_OR_DEFAULT_SESSION",
-                    message_parameters={},
+                    errorClass="NO_ACTIVE_OR_DEFAULT_SESSION",
+                    messageParameters={},
                 )
         return session
 
@@ -1038,8 +1022,8 @@ def _inferSchemaFromList(
         """
         if not data:
             raise PySparkValueError(
-                error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                message_parameters={},
+                errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                messageParameters={},
             )
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
         infer_array_from_first_element = self._jconf.legacyInferArrayTypeFromFirstElement()
@@ -1061,8 +1045,8 @@ def _inferSchemaFromList(
         )
         if _has_nulltype(schema):
             raise PySparkValueError(
-                error_class="CANNOT_DETERMINE_TYPE",
-                message_parameters={},
+                errorClass="CANNOT_DETERMINE_TYPE",
+                messageParameters={},
             )
         return schema
 
@@ -1090,8 +1074,8 @@ def _inferSchema(
         first = rdd.first()
         if isinstance(first, Sized) and len(first) == 0:
             raise PySparkValueError(
-                error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                message_parameters={},
+                errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                messageParameters={},
             )
 
         infer_dict_as_struct = self._jconf.inferDictAsStruct()
@@ -1122,8 +1106,8 @@ def _inferSchema(
                         break
                 else:
                     raise PySparkValueError(
-                        error_class="CANNOT_DETERMINE_TYPE",
-                        message_parameters={},
+                        errorClass="CANNOT_DETERMINE_TYPE",
+                        messageParameters={},
                     )
         else:
             if samplingRatio < 0.99:
@@ -1164,8 +1148,8 @@ def _createFromRDD(
 
         else:
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_NONE_OR_STRUCT",
-                message_parameters={
+                errorClass="NOT_LIST_OR_NONE_OR_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
@@ -1201,8 +1185,8 @@ def _createFromLocal(
 
         else:
             raise PySparkTypeError(
-                error_class="NOT_LIST_OR_NONE_OR_STRUCT",
-                message_parameters={
+                errorClass="NOT_LIST_OR_NONE_OR_STRUCT",
+                messageParameters={
                     "arg_name": "schema",
                     "arg_type": type(schema).__name__,
                 },
@@ -1516,8 +1500,8 @@ def createDataFrame(  # type: ignore[misc]
         self._jvm.SparkSession.setActiveSession(self._jsparkSession)
         if isinstance(data, DataFrame):
             raise PySparkTypeError(
-                error_class="INVALID_TYPE",
-                message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
+                errorClass="INVALID_TYPE",
+                messageParameters={"arg_name": "data", "arg_type": "DataFrame"},
             )
 
         if isinstance(schema, str):
@@ -1555,8 +1539,8 @@ def createDataFrame(  # type: ignore[misc]
             require_minimum_pandas_version()
             if data.ndim not in [1, 2]:
                 raise PySparkValueError(
-                    error_class="INVALID_NDARRAY_DIMENSION",
-                    message_parameters={"dimensions": "1 or 2"},
+                    errorClass="INVALID_NDARRAY_DIMENSION",
+                    messageParameters={"dimensions": "1 or 2"},
                 )
 
             if data.ndim == 1 or data.shape[1] == 1:
@@ -1641,7 +1625,7 @@ def prepare(obj: Any) -> Any:
 
     def sql(
         self, sqlQuery: str, args: Optional[Union[Dict[str, Any], List]] = None, **kwargs: Any
-    ) -> DataFrame:
+    ) -> "ParentDataFrame":
         """Returns a :class:`DataFrame` representing the result of the given query.
         When ``kwargs`` is specified, this method formats the given string by using the Python
         standard formatter. The method binds named parameters to SQL literals or
@@ -1793,8 +1777,8 @@ def sql(
                 )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_TYPE",
-                    message_parameters={"arg_name": "args", "arg_type": type(args).__name__},
+                    errorClass="INVALID_TYPE",
+                    messageParameters={"arg_name": "args", "arg_type": type(args).__name__},
                 )
             return DataFrame(self._jsparkSession.sql(sqlQuery, litArgs), self)
         finally:
@@ -1841,8 +1825,8 @@ def table(self, tableName: str) -> DataFrame:
         """
         if not isinstance(tableName, str):
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
             )
 
         return DataFrame(self._jsparkSession.table(tableName), self)
@@ -1962,6 +1946,41 @@ def streams(self) -> "StreamingQueryManager":
         self._sqm: StreamingQueryManager = StreamingQueryManager(self._jsparkSession.streams())
         return self._sqm
 
+    @property
+    def tvf(self) -> "TableValuedFunction":
+        """
+        Returns a :class:`TableValuedFunction` that can be used to call a table-valued function
+        (TVF).
+
+        .. versionadded:: 4.0.0
+
+        Notes
+        -----
+        This API is evolving.
+
+        Returns
+        -------
+        :class:`TableValuedFunction`
+
+        Examples
+        --------
+        >>> spark.tvf
+        <pyspark...TableValuedFunction object ...>
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(sf.array(sf.lit(1), sf.lit(2), sf.lit(3))).show()
+        +---+
+        |col|
+        +---+
+        |  1|
+        |  2|
+        |  3|
+        +---+
+        """
+        from pyspark.sql.tvf import TableValuedFunction
+
+        return TableValuedFunction(self)
+
     def stop(self) -> None:
         """
         Stop the underlying :class:`SparkContext`.
@@ -2039,6 +2058,7 @@ def __exit__(
 
     # SparkConnect-specific API
     @property
+    @remote_only
     def client(self) -> "SparkConnectClient":
         """
         Gives access to the Spark Connect client. In normal cases this is not necessary to be used
@@ -2058,10 +2078,11 @@ def client(self) -> "SparkConnectClient":
         an exception.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.client"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.client"},
         )
 
+    @remote_only
     def addArtifacts(
         self, *path: str, pyfile: bool = False, archive: bool = False, file: bool = False
     ) -> None:
@@ -2091,12 +2112,13 @@ def addArtifacts(
         an exception.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.addArtifact(s)"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.addArtifact(s)"},
         )
 
     addArtifact = addArtifacts
 
+    @remote_only
     def registerProgressHandler(self, handler: "ProgressHandler") -> None:
         """
         Register a progress handler to be called when a progress update is received from the server.
@@ -2121,10 +2143,11 @@ def registerProgressHandler(self, handler: "ProgressHandler") -> None:
         >>> spark.clearProgressHandlers()
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.registerProgressHandler"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.registerProgressHandler"},
         )
 
+    @remote_only
     def removeProgressHandler(self, handler: "ProgressHandler") -> None:
         """
         Remove a progress handler that was previously registered.
@@ -2137,10 +2160,11 @@ def removeProgressHandler(self, handler: "ProgressHandler") -> None:
           The handler to remove if present in the list of progress handlers.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.removeProgressHandler"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.removeProgressHandler"},
         )
 
+    @remote_only
     def clearProgressHandlers(self) -> None:
         """
         Clear all registered progress handlers.
@@ -2148,10 +2172,11 @@ def clearProgressHandlers(self) -> None:
         .. versionadded:: 4.0
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.clearProgressHandlers"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.clearProgressHandlers"},
         )
 
+    @remote_only
     def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
         """
         Copy file from local to cloud storage file system.
@@ -2176,10 +2201,11 @@ def copyFromLocalToFs(self, local_path: str, dest_path: str) -> None:
         Spark Session, it throws an exception.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.copyFromLocalToFs"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.copyFromLocalToFs"},
         )
 
+    @remote_only
     def interruptAll(self) -> List[str]:
         """
         Interrupt all operations of this session currently running on the connected server.
@@ -2196,10 +2222,11 @@ def interruptAll(self) -> List[str]:
         There is still a possibility of operation finishing just as it is interrupted.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.interruptAll"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.interruptAll"},
         )
 
+    @remote_only
     def interruptTag(self, tag: str) -> List[str]:
         """
         Interrupt all operations of this session with the given operation tag.
@@ -2216,10 +2243,11 @@ def interruptTag(self, tag: str) -> List[str]:
         There is still a possibility of operation finishing just as it is interrupted.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.interruptTag"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.interruptTag"},
         )
 
+    @remote_only
     def interruptOperation(self, op_id: str) -> List[str]:
         """
         Interrupt an operation of this session with the given operationId.
@@ -2236,10 +2264,11 @@ def interruptOperation(self, op_id: str) -> List[str]:
         There is still a possibility of operation finishing just as it is interrupted.
         """
         raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.interruptOperation"},
+            errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+            messageParameters={"feature": "SparkSession.interruptOperation"},
         )
 
+    @remote_only
     def addTag(self, tag: str) -> None:
         """
         Add a tag to be assigned to all the operations started by this thread in this session.
@@ -2254,16 +2283,17 @@ def addTag(self, tag: str) -> None:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Parameters
         ----------
         tag : str
             The tag to be added. Cannot contain ',' (comma) character or be an empty string.
         """
-        raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.addTag"},
-        )
+        self._jsparkSession.addTag(tag)
 
+    @remote_only
     def removeTag(self, tag: str) -> None:
         """
         Remove a tag previously added to be assigned to all the operations started by this thread in
@@ -2271,16 +2301,17 @@ def removeTag(self, tag: str) -> None:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Parameters
         ----------
         tag : list of str
             The tag to be removed. Cannot contain ',' (comma) character or be an empty string.
         """
-        raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.removeTag"},
-        )
+        self._jsparkSession.removeTag(tag)
 
+    @remote_only
     def getTags(self) -> Set[str]:
         """
         Get the tags that are currently set to be assigned to all the operations started by this
@@ -2288,26 +2319,35 @@ def getTags(self) -> Set[str]:
 
         .. versionadded:: 3.5.0
 
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
+
         Returns
         -------
         set of str
             Set of tags of interrupted operations.
         """
-        raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.getTags"},
-        )
+        java_set = self._jsparkSession.getTags()
+        python_set = set()
+
+        # Use iterator to manually iterate through Java Set
+        java_iterator = java_set.iterator()
+        while java_iterator.hasNext():
+            python_set.add(str(java_iterator.next()))
 
+        return python_set
+
+    @remote_only
     def clearTags(self) -> None:
         """
         Clear the current thread's operation tags.
 
         .. versionadded:: 3.5.0
+
+        .. versionchanged:: 4.0.0
+            Supports Spark Classic.
         """
-        raise PySparkRuntimeError(
-            error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-            message_parameters={"feature": "SparkSession.clearTags"},
-        )
+        self._jsparkSession.clearTags()
 
 
 def _test() -> None:
diff --git a/python/pyspark/sql/sql_formatter.py b/python/pyspark/sql/sql_formatter.py
index abb75f88f3858..011563d7006e8 100644
--- a/python/pyspark/sql/sql_formatter.py
+++ b/python/pyspark/sql/sql_formatter.py
@@ -48,13 +48,14 @@ def _convert_value(self, val: Any, field_name: str) -> Optional[str]:
         from py4j.java_gateway import is_instance_of
 
         from pyspark import SparkContext
-        from pyspark.sql import Column, DataFrame
+        from pyspark.sql import Column, DataFrame, SparkSession
 
         if isinstance(val, Column):
-            assert SparkContext._gateway is not None
+            jsession = SparkSession.active()._jsparkSession
+            jexpr = jsession.expression(val._jc)
 
+            assert SparkContext._gateway is not None
             gw = SparkContext._gateway
-            jexpr = val._jc.expr()
             if is_instance_of(
                 gw, jexpr, "org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute"
             ) or is_instance_of(
@@ -63,8 +64,8 @@ def _convert_value(self, val: Any, field_name: str) -> Optional[str]:
                 return jexpr.sql()
             else:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_PLAIN_COLUMN_REFERENCE",
-                    message_parameters={"val": str(val), "field_name": field_name},
+                    errorClass="VALUE_NOT_PLAIN_COLUMN_REFERENCE",
+                    messageParameters={"val": str(val), "field_name": field_name},
                 )
         elif isinstance(val, DataFrame):
             for df, n in self._temp_views:
diff --git a/python/pyspark/sql/streaming/__init__.py b/python/pyspark/sql/streaming/__init__.py
index 6b5723d3a3d01..e3c6ca519ad02 100644
--- a/python/pyspark/sql/streaming/__init__.py
+++ b/python/pyspark/sql/streaming/__init__.py
@@ -18,4 +18,8 @@
 from pyspark.sql.streaming.query import StreamingQuery, StreamingQueryManager  # noqa: F401
 from pyspark.sql.streaming.readwriter import DataStreamReader, DataStreamWriter  # noqa: F401
 from pyspark.sql.streaming.listener import StreamingQueryListener  # noqa: F401
+from pyspark.sql.streaming.stateful_processor import (  # noqa: F401
+    StatefulProcessor,  # noqa: F401
+    StatefulProcessorHandle,  # noqa: F401
+)  # noqa: F401
 from pyspark.errors import StreamingQueryException  # noqa: F401
diff --git a/python/pyspark/sql/streaming/list_state_client.py b/python/pyspark/sql/streaming/list_state_client.py
new file mode 100644
index 0000000000000..d2152842819a5
--- /dev/null
+++ b/python/pyspark/sql/streaming/list_state_client.py
@@ -0,0 +1,198 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Dict, Iterator, List, Union, cast, Tuple
+
+from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.types import StructType, TYPE_CHECKING, _parse_datatype_string
+from pyspark.errors import PySparkRuntimeError
+import uuid
+
+if TYPE_CHECKING:
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+
+__all__ = ["ListStateClient"]
+
+
+class ListStateClient:
+    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient) -> None:
+        self._stateful_processor_api_client = stateful_processor_api_client
+        # A dictionary to store the mapping between list state name and a tuple of pandas DataFrame
+        # and the index of the last row that was read.
+        self.pandas_df_dict: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
+
+    def exists(self, state_name: str) -> bool:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        exists_call = stateMessage.Exists()
+        list_state_call = stateMessage.ListStateCall(stateName=state_name, exists=exists_call)
+        state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            return True
+        elif status == 2:
+            # Expect status code is 2 when state variable doesn't have a value.
+            return False
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error checking value state exists: " f"{response_message[1]}"
+            )
+
+    def get(self, state_name: str, iterator_id: str) -> Tuple:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if iterator_id in self.pandas_df_dict:
+            # If the state is already in the dictionary, return the next row.
+            pandas_df, index = self.pandas_df_dict[iterator_id]
+        else:
+            # If the state is not in the dictionary, fetch the state from the server.
+            get_call = stateMessage.ListStateGet(iteratorId=iterator_id)
+            list_state_call = stateMessage.ListStateCall(
+                stateName=state_name, listStateGet=get_call
+            )
+            state_variable_request = stateMessage.StateVariableRequest(
+                listStateCall=list_state_call
+            )
+            message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+            self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+            response_message = self._stateful_processor_api_client._receive_proto_message()
+            status = response_message[0]
+            if status == 0:
+                iterator = self._stateful_processor_api_client._read_arrow_state()
+                # We need to exhaust the iterator here to make sure all the arrow batches are read,
+                # even though there is only one batch in the iterator. Otherwise, the stream might
+                # block further reads since it thinks there might still be some arrow batches left.
+                # We only need to read the first batch in the iterator because it's guaranteed that
+                # there would only be one batch sent from the JVM side.
+                data_batch = None
+                for batch in iterator:
+                    if data_batch is None:
+                        data_batch = batch
+                if data_batch is None:
+                    # TODO(SPARK-49233): Classify user facing errors.
+                    raise PySparkRuntimeError("Error getting next list state row.")
+                pandas_df = data_batch.to_pandas()
+                index = 0
+            else:
+                raise StopIteration()
+
+        new_index = index + 1
+        if new_index < len(pandas_df):
+            # Update the index in the dictionary.
+            self.pandas_df_dict[iterator_id] = (pandas_df, new_index)
+        else:
+            # If the index is at the end of the DataFrame, remove the state from the dictionary.
+            self.pandas_df_dict.pop(iterator_id, None)
+        pandas_row = pandas_df.iloc[index]
+        return tuple(pandas_row)
+
+    def append_value(self, state_name: str, schema: Union[StructType, str], value: Tuple) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(schema, value)
+        append_value_call = stateMessage.AppendValue(value=bytes)
+        list_state_call = stateMessage.ListStateCall(
+            stateName=state_name, appendValue=append_value_call
+        )
+        state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
+
+    def append_list(
+        self, state_name: str, schema: Union[StructType, str], values: List[Tuple]
+    ) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+        append_list_call = stateMessage.AppendList()
+        list_state_call = stateMessage.ListStateCall(
+            stateName=state_name, appendList=append_list_call
+        )
+        state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+
+        self._stateful_processor_api_client._send_arrow_state(schema, values)
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
+
+    def put(self, state_name: str, schema: Union[StructType, str], values: List[Tuple]) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+        put_call = stateMessage.ListStatePut()
+        list_state_call = stateMessage.ListStateCall(stateName=state_name, listStatePut=put_call)
+        state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+
+        self._stateful_processor_api_client._send_arrow_state(schema, values)
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
+
+    def clear(self, state_name: str) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        clear_call = stateMessage.Clear()
+        list_state_call = stateMessage.ListStateCall(stateName=state_name, clear=clear_call)
+        state_variable_request = stateMessage.StateVariableRequest(listStateCall=list_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error clearing value state: " f"{response_message[1]}")
+
+
+class ListStateIterator:
+    def __init__(self, list_state_client: ListStateClient, state_name: str):
+        self.list_state_client = list_state_client
+        self.state_name = state_name
+        # Generate a unique identifier for the iterator to make sure iterators from the same
+        # list state do not interfere with each other.
+        self.iterator_id = str(uuid.uuid4())
+
+    def __iter__(self) -> Iterator[Tuple]:
+        return self
+
+    def __next__(self) -> Tuple:
+        return self.list_state_client.get(self.state_name, self.iterator_id)
diff --git a/python/pyspark/sql/streaming/listener.py b/python/pyspark/sql/streaming/listener.py
index 2aa63cdb91ab6..6cc2cc3fa2b86 100644
--- a/python/pyspark/sql/streaming/listener.py
+++ b/python/pyspark/sql/streaming/listener.py
@@ -397,10 +397,13 @@ def errorClassOnException(self) -> Optional[str]:
         return self._errorClassOnException
 
 
-class StreamingQueryProgress:
+class StreamingQueryProgress(dict):
     """
     .. versionadded:: 3.4.0
 
+    .. versionchanged:: 4.0.0
+        Becomes a subclass of dict
+
     Notes
     -----
     This API is evolving.
@@ -426,23 +429,25 @@ def __init__(
         jprogress: Optional["JavaObject"] = None,
         jdict: Optional[Dict[str, Any]] = None,
     ):
+        super().__init__(
+            id=id,
+            runId=runId,
+            name=name,
+            timestamp=timestamp,
+            batchId=batchId,
+            batchDuration=batchDuration,
+            durationMs=durationMs,
+            eventTime=eventTime,
+            stateOperators=stateOperators,
+            sources=sources,
+            sink=sink,
+            numInputRows=numInputRows,
+            inputRowsPerSecond=inputRowsPerSecond,
+            processedRowsPerSecond=processedRowsPerSecond,
+            observedMetrics=observedMetrics,
+        )
         self._jprogress: Optional["JavaObject"] = jprogress
         self._jdict: Optional[Dict[str, Any]] = jdict
-        self._id: uuid.UUID = id
-        self._runId: uuid.UUID = runId
-        self._name: Optional[str] = name
-        self._timestamp: str = timestamp
-        self._batchId: int = batchId
-        self._batchDuration: int = batchDuration
-        self._durationMs: Dict[str, int] = durationMs
-        self._eventTime: Dict[str, str] = eventTime
-        self._stateOperators: List[StateOperatorProgress] = stateOperators
-        self._sources: List[SourceProgress] = sources
-        self._sink: SinkProgress = sink
-        self._numInputRows: int = numInputRows
-        self._inputRowsPerSecond: float = inputRowsPerSecond
-        self._processedRowsPerSecond: float = processedRowsPerSecond
-        self._observedMetrics: Dict[str, Row] = observedMetrics
 
     @classmethod
     def fromJObject(cls, jprogress: "JavaObject") -> "StreamingQueryProgress":
@@ -489,9 +494,11 @@ def fromJson(cls, j: Dict[str, Any]) -> "StreamingQueryProgress":
             stateOperators=[StateOperatorProgress.fromJson(s) for s in j["stateOperators"]],
             sources=[SourceProgress.fromJson(s) for s in j["sources"]],
             sink=SinkProgress.fromJson(j["sink"]),
-            numInputRows=j["numInputRows"],
-            inputRowsPerSecond=j["inputRowsPerSecond"],
-            processedRowsPerSecond=j["processedRowsPerSecond"],
+            numInputRows=j["numInputRows"] if "numInputRows" in j else None,
+            inputRowsPerSecond=j["inputRowsPerSecond"] if "inputRowsPerSecond" in j else None,
+            processedRowsPerSecond=j["processedRowsPerSecond"]
+            if "processedRowsPerSecond" in j
+            else None,
             observedMetrics={
                 k: Row(*row_dict.keys())(*row_dict.values())  # Assume no nested rows
                 for k, row_dict in j["observedMetrics"].items()
@@ -506,7 +513,10 @@ def id(self) -> uuid.UUID:
         A unique query id that persists across restarts. See
         py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
         """
-        return self._id
+        # Before Spark 4.0, StreamingQuery.lastProgress returns a dict, which casts id and runId
+        # to string. But here they are UUID.
+        # To prevent breaking change, do not cast them to string when accessed with attribute.
+        return super().__getitem__("id")
 
     @property
     def runId(self) -> uuid.UUID:
@@ -514,21 +524,24 @@ def runId(self) -> uuid.UUID:
         A query id that is unique for every start/restart. See
         py:meth:`~pyspark.sql.streaming.StreamingQuery.runId`.
         """
-        return self._runId
+        # Before Spark 4.0, StreamingQuery.lastProgress returns a dict, which casts id and runId
+        # to string. But here they are UUID.
+        # To prevent breaking change, do not cast them to string when accessed with attribute.
+        return super().__getitem__("runId")
 
     @property
     def name(self) -> Optional[str]:
         """
         User-specified name of the query, `None` if not specified.
         """
-        return self._name
+        return self["name"]
 
     @property
     def timestamp(self) -> str:
         """
         The timestamp to start a query.
         """
-        return self._timestamp
+        return self["timestamp"]
 
     @property
     def batchId(self) -> int:
@@ -538,21 +551,21 @@ def batchId(self) -> int:
         Similarly, when there is no data to be processed, the batchId will not be
         incremented.
         """
-        return self._batchId
+        return self["batchId"]
 
     @property
     def batchDuration(self) -> int:
         """
         The process duration of each batch.
         """
-        return self._batchDuration
+        return self["batchDuration"]
 
     @property
     def durationMs(self) -> Dict[str, int]:
         """
         The amount of time taken to perform various operations in milliseconds.
         """
-        return self._durationMs
+        return self["durationMs"]
 
     @property
     def eventTime(self) -> Dict[str, str]:
@@ -570,21 +583,21 @@ def eventTime(self) -> Dict[str, str]:
 
         All timestamps are in ISO8601 format, i.e. UTC timestamps.
         """
-        return self._eventTime
+        return self["eventTime"]
 
     @property
     def stateOperators(self) -> List["StateOperatorProgress"]:
         """
         Information about operators in the query that store state.
         """
-        return self._stateOperators
+        return self["stateOperators"]
 
     @property
     def sources(self) -> List["SourceProgress"]:
         """
         detailed statistics on data being read from each of the streaming sources.
         """
-        return self._sources
+        return self["sources"]
 
     @property
     def sink(self) -> "SinkProgress":
@@ -592,32 +605,41 @@ def sink(self) -> "SinkProgress":
         A unique query id that persists across restarts. See
         py:meth:`~pyspark.sql.streaming.StreamingQuery.id`.
         """
-        return self._sink
+        return self["sink"]
 
     @property
     def observedMetrics(self) -> Dict[str, Row]:
-        return self._observedMetrics
+        return self["observedMetrics"]
 
     @property
     def numInputRows(self) -> int:
         """
         The aggregate (across all sources) number of records processed in a trigger.
         """
-        return self._numInputRows
+        if self["numInputRows"] is not None:
+            return self["numInputRows"]
+        else:
+            return sum(s.numInputRows for s in self.sources)
 
     @property
     def inputRowsPerSecond(self) -> float:
         """
         The aggregate (across all sources) rate of data arriving.
         """
-        return self._inputRowsPerSecond
+        if self["inputRowsPerSecond"] is not None:
+            return self["inputRowsPerSecond"]
+        else:
+            return sum(s.inputRowsPerSecond for s in self.sources)
 
     @property
     def processedRowsPerSecond(self) -> float:
         """
         The aggregate (across all sources) rate at which Spark is processing data.
         """
-        return self._processedRowsPerSecond
+        if self["processedRowsPerSecond"] is not None:
+            return self["processedRowsPerSecond"]
+        else:
+            return sum(s.processedRowsPerSecond for s in self.sources)
 
     @property
     def json(self) -> str:
@@ -641,14 +663,29 @@ def prettyJson(self) -> str:
         else:
             return json.dumps(self._jdict, indent=4)
 
+    def __getitem__(self, key: str) -> Any:
+        # Before Spark 4.0, StreamingQuery.lastProgress returns a dict, which casts id and runId
+        # to string. But here they are UUID.
+        # To prevent breaking change, also cast them to string when accessed with __getitem__.
+        if key == "id" or key == "runId":
+            return str(super().__getitem__(key))
+        else:
+            return super().__getitem__(key)
+
     def __str__(self) -> str:
         return self.prettyJson
 
+    def __repr__(self) -> str:
+        return self.prettyJson
+
 
-class StateOperatorProgress:
+class StateOperatorProgress(dict):
     """
     .. versionadded:: 3.4.0
 
+    .. versionchanged:: 4.0.0
+        Becomes a subclass of dict
+
     Notes
     -----
     This API is evolving.
@@ -671,20 +708,22 @@ def __init__(
         jprogress: Optional["JavaObject"] = None,
         jdict: Optional[Dict[str, Any]] = None,
     ):
+        super().__init__(
+            operatorName=operatorName,
+            numRowsTotal=numRowsTotal,
+            numRowsUpdated=numRowsUpdated,
+            numRowsRemoved=numRowsRemoved,
+            allUpdatesTimeMs=allUpdatesTimeMs,
+            allRemovalsTimeMs=allRemovalsTimeMs,
+            commitTimeMs=commitTimeMs,
+            memoryUsedBytes=memoryUsedBytes,
+            numRowsDroppedByWatermark=numRowsDroppedByWatermark,
+            numShufflePartitions=numShufflePartitions,
+            numStateStoreInstances=numStateStoreInstances,
+            customMetrics=customMetrics,
+        )
         self._jprogress: Optional["JavaObject"] = jprogress
         self._jdict: Optional[Dict[str, Any]] = jdict
-        self._operatorName: str = operatorName
-        self._numRowsTotal: int = numRowsTotal
-        self._numRowsUpdated: int = numRowsUpdated
-        self._numRowsRemoved: int = numRowsRemoved
-        self._allUpdatesTimeMs: int = allUpdatesTimeMs
-        self._allRemovalsTimeMs: int = allRemovalsTimeMs
-        self._commitTimeMs: int = commitTimeMs
-        self._memoryUsedBytes: int = memoryUsedBytes
-        self._numRowsDroppedByWatermark: int = numRowsDroppedByWatermark
-        self._numShufflePartitions: int = numShufflePartitions
-        self._numStateStoreInstances: int = numStateStoreInstances
-        self._customMetrics: Dict[str, int] = customMetrics
 
     @classmethod
     def fromJObject(cls, jprogress: "JavaObject") -> "StateOperatorProgress":
@@ -724,51 +763,51 @@ def fromJson(cls, j: Dict[str, Any]) -> "StateOperatorProgress":
 
     @property
     def operatorName(self) -> str:
-        return self._operatorName
+        return self["operatorName"]
 
     @property
     def numRowsTotal(self) -> int:
-        return self._numRowsTotal
+        return self["numRowsTotal"]
 
     @property
     def numRowsUpdated(self) -> int:
-        return self._numRowsUpdated
+        return self["numRowsUpdated"]
 
     @property
     def allUpdatesTimeMs(self) -> int:
-        return self._allUpdatesTimeMs
+        return self["allUpdatesTimeMs"]
 
     @property
     def numRowsRemoved(self) -> int:
-        return self._numRowsRemoved
+        return self["numRowsRemoved"]
 
     @property
     def allRemovalsTimeMs(self) -> int:
-        return self._allRemovalsTimeMs
+        return self["allRemovalsTimeMs"]
 
     @property
     def commitTimeMs(self) -> int:
-        return self._commitTimeMs
+        return self["commitTimeMs"]
 
     @property
     def memoryUsedBytes(self) -> int:
-        return self._memoryUsedBytes
+        return self["memoryUsedBytes"]
 
     @property
     def numRowsDroppedByWatermark(self) -> int:
-        return self._numRowsDroppedByWatermark
+        return self["numRowsDroppedByWatermark"]
 
     @property
     def numShufflePartitions(self) -> int:
-        return self._numShufflePartitions
+        return self["numShufflePartitions"]
 
     @property
     def numStateStoreInstances(self) -> int:
-        return self._numStateStoreInstances
+        return self["numStateStoreInstances"]
 
     @property
-    def customMetrics(self) -> Dict[str, int]:
-        return self._customMetrics
+    def customMetrics(self) -> dict:
+        return self["customMetrics"]
 
     @property
     def json(self) -> str:
@@ -795,11 +834,17 @@ def prettyJson(self) -> str:
     def __str__(self) -> str:
         return self.prettyJson
 
+    def __repr__(self) -> str:
+        return self.prettyJson
+
 
-class SourceProgress:
+class SourceProgress(dict):
     """
     .. versionadded:: 3.4.0
 
+    .. versionchanged:: 4.0.0
+        Becomes a subclass of dict
+
     Notes
     -----
     This API is evolving.
@@ -818,16 +863,18 @@ def __init__(
         jprogress: Optional["JavaObject"] = None,
         jdict: Optional[Dict[str, Any]] = None,
     ) -> None:
+        super().__init__(
+            description=description,
+            startOffset=startOffset,
+            endOffset=endOffset,
+            latestOffset=latestOffset,
+            numInputRows=numInputRows,
+            inputRowsPerSecond=inputRowsPerSecond,
+            processedRowsPerSecond=processedRowsPerSecond,
+            metrics=metrics,
+        )
         self._jprogress: Optional["JavaObject"] = jprogress
         self._jdict: Optional[Dict[str, Any]] = jdict
-        self._description: str = description
-        self._startOffset: str = startOffset
-        self._endOffset: str = endOffset
-        self._latestOffset: str = latestOffset
-        self._numInputRows: int = numInputRows
-        self._inputRowsPerSecond: float = inputRowsPerSecond
-        self._processedRowsPerSecond: float = processedRowsPerSecond
-        self._metrics: Dict[str, str] = metrics
 
     @classmethod
     def fromJObject(cls, jprogress: "JavaObject") -> "SourceProgress":
@@ -862,53 +909,53 @@ def description(self) -> str:
         """
         Description of the source.
         """
-        return self._description
+        return self["description"]
 
     @property
     def startOffset(self) -> str:
         """
         The starting offset for data being read.
         """
-        return self._startOffset
+        return self["startOffset"]
 
     @property
     def endOffset(self) -> str:
         """
         The ending offset for data being read.
         """
-        return self._endOffset
+        return self["endOffset"]
 
     @property
     def latestOffset(self) -> str:
         """
         The latest offset from this source.
         """
-        return self._latestOffset
+        return self["latestOffset"]
 
     @property
     def numInputRows(self) -> int:
         """
         The number of records read from this source.
         """
-        return self._numInputRows
+        return self["numInputRows"]
 
     @property
     def inputRowsPerSecond(self) -> float:
         """
         The rate at which data is arriving from this source.
         """
-        return self._inputRowsPerSecond
+        return self["inputRowsPerSecond"]
 
     @property
     def processedRowsPerSecond(self) -> float:
         """
         The rate at which data from this source is being processed by Spark.
         """
-        return self._processedRowsPerSecond
+        return self["processedRowsPerSecond"]
 
     @property
-    def metrics(self) -> Dict[str, str]:
-        return self._metrics
+    def metrics(self) -> dict:
+        return self["metrics"]
 
     @property
     def json(self) -> str:
@@ -935,11 +982,17 @@ def prettyJson(self) -> str:
     def __str__(self) -> str:
         return self.prettyJson
 
+    def __repr__(self) -> str:
+        return self.prettyJson
+
 
-class SinkProgress:
+class SinkProgress(dict):
     """
     .. versionadded:: 3.4.0
 
+    .. versionchanged:: 4.0.0
+        Becomes a subclass of dict
+
     Notes
     -----
     This API is evolving.
@@ -953,11 +1006,13 @@ def __init__(
         jprogress: Optional["JavaObject"] = None,
         jdict: Optional[Dict[str, Any]] = None,
     ) -> None:
+        super().__init__(
+            description=description,
+            numOutputRows=numOutputRows,
+            metrics=metrics,
+        )
         self._jprogress: Optional["JavaObject"] = jprogress
         self._jdict: Optional[Dict[str, Any]] = jdict
-        self._description: str = description
-        self._numOutputRows: int = numOutputRows
-        self._metrics: Dict[str, str] = metrics
 
     @classmethod
     def fromJObject(cls, jprogress: "JavaObject") -> "SinkProgress":
@@ -982,7 +1037,7 @@ def description(self) -> str:
         """
         Description of the source.
         """
-        return self._description
+        return self["description"]
 
     @property
     def numOutputRows(self) -> int:
@@ -990,11 +1045,11 @@ def numOutputRows(self) -> int:
         Number of rows written to the sink or -1 for Continuous Mode (temporarily)
         or Sink V1 (until decommissioned).
         """
-        return self._numOutputRows
+        return self["numOutputRows"]
 
     @property
     def metrics(self) -> Dict[str, str]:
-        return self._metrics
+        return self["metrics"]
 
     @property
     def json(self) -> str:
@@ -1021,6 +1076,9 @@ def prettyJson(self) -> str:
     def __str__(self) -> str:
         return self.prettyJson
 
+    def __repr__(self) -> str:
+        return self.prettyJson
+
 
 def _test() -> None:
     import sys
diff --git a/python/pyspark/sql/streaming/map_state_client.py b/python/pyspark/sql/streaming/map_state_client.py
new file mode 100644
index 0000000000000..6ec7448b48634
--- /dev/null
+++ b/python/pyspark/sql/streaming/map_state_client.py
@@ -0,0 +1,309 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Dict, Iterator, Union, cast, Tuple, Optional
+
+from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.types import StructType, TYPE_CHECKING, _parse_datatype_string
+from pyspark.errors import PySparkRuntimeError
+import uuid
+
+if TYPE_CHECKING:
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+
+__all__ = ["MapStateClient"]
+
+
+class MapStateClient:
+    def __init__(
+        self,
+        stateful_processor_api_client: StatefulProcessorApiClient,
+        user_key_schema: Union[StructType, str],
+        value_schema: Union[StructType, str],
+    ) -> None:
+        self._stateful_processor_api_client = stateful_processor_api_client
+        if isinstance(user_key_schema, str):
+            self.user_key_schema = cast(StructType, _parse_datatype_string(user_key_schema))
+        else:
+            self.user_key_schema = user_key_schema
+        if isinstance(value_schema, str):
+            self.value_schema = cast(StructType, _parse_datatype_string(value_schema))
+        else:
+            self.value_schema = value_schema
+        # Dictionaries to store the mapping between iterator id and a tuple of pandas DataFrame
+        # and the index of the last row that was read.
+        self.user_key_value_pair_iterator_cursors: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
+        self.user_key_or_value_iterator_cursors: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
+
+    def exists(self, state_name: str) -> bool:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        exists_call = stateMessage.Exists()
+        map_state_call = stateMessage.MapStateCall(stateName=state_name, exists=exists_call)
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            return True
+        elif status == 2:
+            # Expect status code is 2 when state variable doesn't have a value.
+            return False
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error checking map state exists: {response_message[1]}")
+
+    def get_value(self, state_name: str, user_key: Tuple) -> Optional[Tuple]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(
+            self.user_key_schema, user_key
+        )
+        get_value_call = stateMessage.GetValue(userKey=bytes)
+        map_state_call = stateMessage.MapStateCall(stateName=state_name, getValue=get_value_call)
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            if len(response_message[2]) == 0:
+                return None
+            row = self._stateful_processor_api_client._deserialize_from_bytes(response_message[2])
+            return row
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error getting value: {response_message[1]}")
+
+    def contains_key(self, state_name: str, user_key: Tuple) -> bool:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(
+            self.user_key_schema, user_key
+        )
+        contains_key_call = stateMessage.ContainsKey(userKey=bytes)
+        map_state_call = stateMessage.MapStateCall(
+            stateName=state_name, containsKey=contains_key_call
+        )
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            return True
+        elif status == 2:
+            # Expect status code is 2 when the given key doesn't exist in the map state.
+            return False
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error checking if map state contains key: {response_message[1]}"
+            )
+
+    def update_value(
+        self,
+        state_name: str,
+        user_key: Tuple,
+        value: Tuple,
+    ) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        key_bytes = self._stateful_processor_api_client._serialize_to_bytes(
+            self.user_key_schema, user_key
+        )
+        value_bytes = self._stateful_processor_api_client._serialize_to_bytes(
+            self.value_schema, value
+        )
+        update_value_call = stateMessage.UpdateValue(userKey=key_bytes, value=value_bytes)
+        map_state_call = stateMessage.MapStateCall(
+            stateName=state_name, updateValue=update_value_call
+        )
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error updating map state value: {response_message[1]}")
+
+    def get_key_value_pair(self, state_name: str, iterator_id: str) -> Tuple[Tuple, Tuple]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if iterator_id in self.user_key_value_pair_iterator_cursors:
+            # If the state is already in the dictionary, return the next row.
+            pandas_df, index = self.user_key_value_pair_iterator_cursors[iterator_id]
+        else:
+            # If the state is not in the dictionary, fetch the state from the server.
+            iterator_call = stateMessage.Iterator(iteratorId=iterator_id)
+            map_state_call = stateMessage.MapStateCall(stateName=state_name, iterator=iterator_call)
+            state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+            message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+            self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+            response_message = self._stateful_processor_api_client._receive_proto_message()
+            status = response_message[0]
+            if status == 0:
+                iterator = self._stateful_processor_api_client._read_arrow_state()
+                # We need to exhaust the iterator here to make sure all the arrow batches are read,
+                # even though there is only one batch in the iterator. Otherwise, the stream might
+                # block further reads since it thinks there might still be some arrow batches left.
+                # We only need to read the first batch in the iterator because it's guaranteed that
+                # there would only be one batch sent from the JVM side.
+                data_batch = None
+                for batch in iterator:
+                    if data_batch is None:
+                        data_batch = batch
+                if data_batch is None:
+                    # TODO(SPARK-49233): Classify user facing errors.
+                    raise PySparkRuntimeError("Error getting map state entry.")
+                pandas_df = data_batch.to_pandas()
+                index = 0
+            else:
+                raise StopIteration()
+
+        new_index = index + 1
+        if new_index < len(pandas_df):
+            # Update the index in the dictionary.
+            self.user_key_value_pair_iterator_cursors[iterator_id] = (pandas_df, new_index)
+        else:
+            # If the index is at the end of the DataFrame, remove the state from the dictionary.
+            self.user_key_value_pair_iterator_cursors.pop(iterator_id, None)
+        key_row_bytes = pandas_df.iloc[index, 0]
+        value_row_bytes = pandas_df.iloc[index, 1]
+        key_row = self._stateful_processor_api_client._deserialize_from_bytes(key_row_bytes)
+        value_row = self._stateful_processor_api_client._deserialize_from_bytes(value_row_bytes)
+        return tuple(key_row), tuple(value_row)
+
+    def get_row(self, state_name: str, iterator_id: str, is_key: bool) -> Tuple:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if iterator_id in self.user_key_or_value_iterator_cursors:
+            # If the state is already in the dictionary, return the next row.
+            pandas_df, index = self.user_key_or_value_iterator_cursors[iterator_id]
+        else:
+            # If the state is not in the dictionary, fetch the state from the server.
+            if is_key:
+                keys_call = stateMessage.Keys(iteratorId=iterator_id)
+                map_state_call = stateMessage.MapStateCall(stateName=state_name, keys=keys_call)
+            else:
+                values_call = stateMessage.Values(iteratorId=iterator_id)
+                map_state_call = stateMessage.MapStateCall(stateName=state_name, values=values_call)
+            state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+            message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+            self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+            response_message = self._stateful_processor_api_client._receive_proto_message()
+            status = response_message[0]
+            if status == 0:
+                iterator = self._stateful_processor_api_client._read_arrow_state()
+                # We need to exhaust the iterator here to make sure all the arrow batches are read,
+                # even though there is only one batch in the iterator. Otherwise, the stream might
+                # block further reads since it thinks there might still be some arrow batches left.
+                # We only need to read the first batch in the iterator because it's guaranteed that
+                # there would only be one batch sent from the JVM side.
+                data_batch = None
+                for batch in iterator:
+                    if data_batch is None:
+                        data_batch = batch
+                if data_batch is None:
+                    entry_name = "key"
+                    if not is_key:
+                        entry_name = "value"
+                    # TODO(SPARK-49233): Classify user facing errors.
+                    raise PySparkRuntimeError(f"Error getting map state {entry_name}.")
+                pandas_df = data_batch.to_pandas()
+                index = 0
+            else:
+                raise StopIteration()
+
+        new_index = index + 1
+        if new_index < len(pandas_df):
+            # Update the index in the dictionary.
+            self.user_key_or_value_iterator_cursors[iterator_id] = (pandas_df, new_index)
+        else:
+            # If the index is at the end of the DataFrame, remove the state from the dictionary.
+            self.user_key_or_value_iterator_cursors.pop(iterator_id, None)
+        pandas_row = pandas_df.iloc[index]
+        return tuple(pandas_row)
+
+    def remove_key(self, state_name: str, key: Tuple) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(self.user_key_schema, key)
+        remove_key_call = stateMessage.RemoveKey(userKey=bytes)
+        map_state_call = stateMessage.MapStateCall(stateName=state_name, removeKey=remove_key_call)
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error removing key from map state: {response_message[1]}")
+
+    def clear(self, state_name: str) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        clear_call = stateMessage.Clear()
+        map_state_call = stateMessage.MapStateCall(stateName=state_name, clear=clear_call)
+        state_variable_request = stateMessage.StateVariableRequest(mapStateCall=map_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error clearing map state: " f"{response_message[1]}")
+
+
+class MapStateIterator:
+    def __init__(self, map_state_client: MapStateClient, state_name: str, is_key: bool):
+        self.map_state_client = map_state_client
+        self.state_name = state_name
+        # Generate a unique identifier for the iterator to make sure iterators from the same
+        # map state do not interfere with each other.
+        self.iterator_id = str(uuid.uuid4())
+        self.is_key = is_key
+
+    def __iter__(self) -> Iterator[Tuple]:
+        return self
+
+    def __next__(self) -> Tuple:
+        return self.map_state_client.get_row(self.state_name, self.iterator_id, self.is_key)
+
+
+class MapStateKeyValuePairIterator:
+    def __init__(self, map_state_client: MapStateClient, state_name: str):
+        self.map_state_client = map_state_client
+        self.state_name = state_name
+        # Generate a unique identifier for the iterator to make sure iterators from the same
+        # map state do not interfere with each other.
+        self.iterator_id = str(uuid.uuid4())
+
+    def __iter__(self) -> Iterator[Tuple[Tuple, Tuple]]:
+        return self
+
+    def __next__(self) -> Tuple[Tuple, Tuple]:
+        return self.map_state_client.get_key_value_pair(self.state_name, self.iterator_id)
diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.py b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py
new file mode 100644
index 0000000000000..0a54690513a39
--- /dev/null
+++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.py
@@ -0,0 +1,131 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- coding: utf-8 -*-
+# Generated by the protocol buffer compiler.  DO NOT EDIT!
+# NO CHECKED-IN PROTOBUF GENCODE
+# source: org/apache/spark/sql/execution/streaming/StateMessage.proto
+# Protobuf Python Version: 5.28.3
+"""Generated protocol buffer code."""
+from google.protobuf import descriptor as _descriptor
+from google.protobuf import descriptor_pool as _descriptor_pool
+from google.protobuf import runtime_version as _runtime_version
+from google.protobuf import symbol_database as _symbol_database
+from google.protobuf.internal import builder as _builder
+
+_runtime_version.ValidateProtobufRuntimeVersion(
+    _runtime_version.Domain.PUBLIC,
+    5,
+    28,
+    3,
+    "",
+    "org/apache/spark/sql/execution/streaming/StateMessage.proto",
+)
+# @@protoc_insertion_point(imports)
+
+_sym_db = _symbol_database.Default()
+
+
+DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(
+    b'\n;org/apache/spark/sql/execution/streaming/StateMessage.proto\x12.org.apache.spark.sql.execution.streaming.state"\xa0\x04\n\x0cStateRequest\x12\x18\n\x07version\x18\x01 \x01(\x05R\x07version\x12}\n\x15statefulProcessorCall\x18\x02 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.StatefulProcessorCallH\x00R\x15statefulProcessorCall\x12z\n\x14stateVariableRequest\x18\x03 \x01(\x0b\x32\x44.org.apache.spark.sql.execution.streaming.state.StateVariableRequestH\x00R\x14stateVariableRequest\x12\x8c\x01\n\x1aimplicitGroupingKeyRequest\x18\x04 \x01(\x0b\x32J.org.apache.spark.sql.execution.streaming.state.ImplicitGroupingKeyRequestH\x00R\x1aimplicitGroupingKeyRequest\x12\x62\n\x0ctimerRequest\x18\x05 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.TimerRequestH\x00R\x0ctimerRequestB\x08\n\x06method"i\n\rStateResponse\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x0cR\x05value"x\n\x1cStateResponseWithLongTypeVal\x12\x1e\n\nstatusCode\x18\x01 \x01(\x05R\nstatusCode\x12"\n\x0c\x65rrorMessage\x18\x02 \x01(\tR\x0c\x65rrorMessage\x12\x14\n\x05value\x18\x03 \x01(\x03R\x05value"\xa0\x05\n\x15StatefulProcessorCall\x12h\n\x0esetHandleState\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetHandleStateH\x00R\x0esetHandleState\x12h\n\rgetValueState\x18\x02 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\rgetValueState\x12\x66\n\x0cgetListState\x18\x03 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0cgetListState\x12\x64\n\x0bgetMapState\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0bgetMapState\x12o\n\x0etimerStateCall\x18\x05 \x01(\x0b\x32\x45.org.apache.spark.sql.execution.streaming.state.TimerStateCallCommandH\x00R\x0etimerStateCall\x12j\n\x0e\x64\x65leteIfExists\x18\x06 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.StateCallCommandH\x00R\x0e\x64\x65leteIfExistsB\x08\n\x06method"\xd5\x02\n\x14StateVariableRequest\x12h\n\x0evalueStateCall\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.ValueStateCallH\x00R\x0evalueStateCall\x12\x65\n\rlistStateCall\x18\x02 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.ListStateCallH\x00R\rlistStateCall\x12\x62\n\x0cmapStateCall\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.MapStateCallH\x00R\x0cmapStateCallB\x08\n\x06method"\x83\x02\n\x1aImplicitGroupingKeyRequest\x12h\n\x0esetImplicitKey\x18\x01 \x01(\x0b\x32>.org.apache.spark.sql.execution.streaming.state.SetImplicitKeyH\x00R\x0esetImplicitKey\x12q\n\x11removeImplicitKey\x18\x02 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.RemoveImplicitKeyH\x00R\x11removeImplicitKeyB\x08\n\x06method"\x81\x02\n\x0cTimerRequest\x12q\n\x11timerValueRequest\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.TimerValueRequestH\x00R\x11timerValueRequest\x12t\n\x12\x65xpiryTimerRequest\x18\x02 \x01(\x0b\x32\x42.org.apache.spark.sql.execution.streaming.state.ExpiryTimerRequestH\x00R\x12\x65xpiryTimerRequestB\x08\n\x06method"\xf6\x01\n\x11TimerValueRequest\x12s\n\x12getProcessingTimer\x18\x01 \x01(\x0b\x32\x41.org.apache.spark.sql.execution.streaming.state.GetProcessingTimeH\x00R\x12getProcessingTimer\x12\x62\n\x0cgetWatermark\x18\x02 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.GetWatermarkH\x00R\x0cgetWatermarkB\x08\n\x06method"B\n\x12\x45xpiryTimerRequest\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs"\x13\n\x11GetProcessingTime"\x0e\n\x0cGetWatermark"\xc7\x01\n\x10StateCallCommand\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12\x16\n\x06schema\x18\x02 \x01(\tR\x06schema\x12\x30\n\x13mapStateValueSchema\x18\x03 \x01(\tR\x13mapStateValueSchema\x12K\n\x03ttl\x18\x04 \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.TTLConfigR\x03ttl"\xa7\x02\n\x15TimerStateCallCommand\x12[\n\x08register\x18\x01 \x01(\x0b\x32=.org.apache.spark.sql.execution.streaming.state.RegisterTimerH\x00R\x08register\x12U\n\x06\x64\x65lete\x18\x02 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.DeleteTimerH\x00R\x06\x64\x65lete\x12P\n\x04list\x18\x03 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.ListTimersH\x00R\x04listB\x08\n\x06method"\x92\x03\n\x0eValueStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12G\n\x03get\x18\x03 \x01(\x0b\x32\x33.org.apache.spark.sql.execution.streaming.state.GetH\x00R\x03get\x12n\n\x10valueStateUpdate\x18\x04 \x01(\x0b\x32@.org.apache.spark.sql.execution.streaming.state.ValueStateUpdateH\x00R\x10valueStateUpdate\x12M\n\x05\x63lear\x18\x05 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xdf\x04\n\rListStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12\x62\n\x0clistStateGet\x18\x03 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStateGetH\x00R\x0clistStateGet\x12\x62\n\x0clistStatePut\x18\x04 \x01(\x0b\x32<.org.apache.spark.sql.execution.streaming.state.ListStatePutH\x00R\x0clistStatePut\x12_\n\x0b\x61ppendValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.AppendValueH\x00R\x0b\x61ppendValue\x12\\\n\nappendList\x18\x06 \x01(\x0b\x32:.org.apache.spark.sql.execution.streaming.state.AppendListH\x00R\nappendList\x12M\n\x05\x63lear\x18\x07 \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method"\xc2\x06\n\x0cMapStateCall\x12\x1c\n\tstateName\x18\x01 \x01(\tR\tstateName\x12P\n\x06\x65xists\x18\x02 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ExistsH\x00R\x06\x65xists\x12V\n\x08getValue\x18\x03 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.GetValueH\x00R\x08getValue\x12_\n\x0b\x63ontainsKey\x18\x04 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.ContainsKeyH\x00R\x0b\x63ontainsKey\x12_\n\x0bupdateValue\x18\x05 \x01(\x0b\x32;.org.apache.spark.sql.execution.streaming.state.UpdateValueH\x00R\x0bupdateValue\x12V\n\x08iterator\x18\x06 \x01(\x0b\x32\x38.org.apache.spark.sql.execution.streaming.state.IteratorH\x00R\x08iterator\x12J\n\x04keys\x18\x07 \x01(\x0b\x32\x34.org.apache.spark.sql.execution.streaming.state.KeysH\x00R\x04keys\x12P\n\x06values\x18\x08 \x01(\x0b\x32\x36.org.apache.spark.sql.execution.streaming.state.ValuesH\x00R\x06values\x12Y\n\tremoveKey\x18\t \x01(\x0b\x32\x39.org.apache.spark.sql.execution.streaming.state.RemoveKeyH\x00R\tremoveKey\x12M\n\x05\x63lear\x18\n \x01(\x0b\x32\x35.org.apache.spark.sql.execution.streaming.state.ClearH\x00R\x05\x63learB\x08\n\x06method""\n\x0eSetImplicitKey\x12\x10\n\x03key\x18\x01 \x01(\x0cR\x03key"\x13\n\x11RemoveImplicitKey"\x08\n\x06\x45xists"\x05\n\x03Get"=\n\rRegisterTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs";\n\x0b\x44\x65leteTimer\x12,\n\x11\x65xpiryTimestampMs\x18\x01 \x01(\x03R\x11\x65xpiryTimestampMs",\n\nListTimers\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x10ValueStateUpdate\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x07\n\x05\x43lear".\n\x0cListStateGet\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"\x0e\n\x0cListStatePut"#\n\x0b\x41ppendValue\x12\x14\n\x05value\x18\x01 \x01(\x0cR\x05value"\x0c\n\nAppendList"$\n\x08GetValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"\'\n\x0b\x43ontainsKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"=\n\x0bUpdateValue\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey\x12\x14\n\x05value\x18\x02 \x01(\x0cR\x05value"*\n\x08Iterator\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"&\n\x04Keys\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"(\n\x06Values\x12\x1e\n\niteratorId\x18\x01 \x01(\tR\niteratorId"%\n\tRemoveKey\x12\x18\n\x07userKey\x18\x01 \x01(\x0cR\x07userKey"c\n\x0eSetHandleState\x12Q\n\x05state\x18\x01 \x01(\x0e\x32;.org.apache.spark.sql.execution.streaming.state.HandleStateR\x05state"+\n\tTTLConfig\x12\x1e\n\ndurationMs\x18\x01 \x01(\x05R\ndurationMs*`\n\x0bHandleState\x12\x0b\n\x07\x43REATED\x10\x00\x12\x0f\n\x0bINITIALIZED\x10\x01\x12\x12\n\x0e\x44\x41TA_PROCESSED\x10\x02\x12\x13\n\x0fTIMER_PROCESSED\x10\x03\x12\n\n\x06\x43LOSED\x10\x04\x62\x06proto3'
+)
+
+_globals = globals()
+_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals)
+_builder.BuildTopDescriptorsAndMessages(
+    DESCRIPTOR, "pyspark.sql.streaming.proto.StateMessage_pb2", _globals
+)
+if not _descriptor._USE_C_DESCRIPTORS:
+    DESCRIPTOR._loaded_options = None
+    _globals["_HANDLESTATE"]._serialized_start = 5997
+    _globals["_HANDLESTATE"]._serialized_end = 6093
+    _globals["_STATEREQUEST"]._serialized_start = 112
+    _globals["_STATEREQUEST"]._serialized_end = 656
+    _globals["_STATERESPONSE"]._serialized_start = 658
+    _globals["_STATERESPONSE"]._serialized_end = 763
+    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_start = 765
+    _globals["_STATERESPONSEWITHLONGTYPEVAL"]._serialized_end = 885
+    _globals["_STATEFULPROCESSORCALL"]._serialized_start = 888
+    _globals["_STATEFULPROCESSORCALL"]._serialized_end = 1560
+    _globals["_STATEVARIABLEREQUEST"]._serialized_start = 1563
+    _globals["_STATEVARIABLEREQUEST"]._serialized_end = 1904
+    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_start = 1907
+    _globals["_IMPLICITGROUPINGKEYREQUEST"]._serialized_end = 2166
+    _globals["_TIMERREQUEST"]._serialized_start = 2169
+    _globals["_TIMERREQUEST"]._serialized_end = 2426
+    _globals["_TIMERVALUEREQUEST"]._serialized_start = 2429
+    _globals["_TIMERVALUEREQUEST"]._serialized_end = 2675
+    _globals["_EXPIRYTIMERREQUEST"]._serialized_start = 2677
+    _globals["_EXPIRYTIMERREQUEST"]._serialized_end = 2743
+    _globals["_GETPROCESSINGTIME"]._serialized_start = 2745
+    _globals["_GETPROCESSINGTIME"]._serialized_end = 2764
+    _globals["_GETWATERMARK"]._serialized_start = 2766
+    _globals["_GETWATERMARK"]._serialized_end = 2780
+    _globals["_STATECALLCOMMAND"]._serialized_start = 2783
+    _globals["_STATECALLCOMMAND"]._serialized_end = 2982
+    _globals["_TIMERSTATECALLCOMMAND"]._serialized_start = 2985
+    _globals["_TIMERSTATECALLCOMMAND"]._serialized_end = 3280
+    _globals["_VALUESTATECALL"]._serialized_start = 3283
+    _globals["_VALUESTATECALL"]._serialized_end = 3685
+    _globals["_LISTSTATECALL"]._serialized_start = 3688
+    _globals["_LISTSTATECALL"]._serialized_end = 4295
+    _globals["_MAPSTATECALL"]._serialized_start = 4298
+    _globals["_MAPSTATECALL"]._serialized_end = 5132
+    _globals["_SETIMPLICITKEY"]._serialized_start = 5134
+    _globals["_SETIMPLICITKEY"]._serialized_end = 5168
+    _globals["_REMOVEIMPLICITKEY"]._serialized_start = 5170
+    _globals["_REMOVEIMPLICITKEY"]._serialized_end = 5189
+    _globals["_EXISTS"]._serialized_start = 5191
+    _globals["_EXISTS"]._serialized_end = 5199
+    _globals["_GET"]._serialized_start = 5201
+    _globals["_GET"]._serialized_end = 5206
+    _globals["_REGISTERTIMER"]._serialized_start = 5208
+    _globals["_REGISTERTIMER"]._serialized_end = 5269
+    _globals["_DELETETIMER"]._serialized_start = 5271
+    _globals["_DELETETIMER"]._serialized_end = 5330
+    _globals["_LISTTIMERS"]._serialized_start = 5332
+    _globals["_LISTTIMERS"]._serialized_end = 5376
+    _globals["_VALUESTATEUPDATE"]._serialized_start = 5378
+    _globals["_VALUESTATEUPDATE"]._serialized_end = 5418
+    _globals["_CLEAR"]._serialized_start = 5420
+    _globals["_CLEAR"]._serialized_end = 5427
+    _globals["_LISTSTATEGET"]._serialized_start = 5429
+    _globals["_LISTSTATEGET"]._serialized_end = 5475
+    _globals["_LISTSTATEPUT"]._serialized_start = 5477
+    _globals["_LISTSTATEPUT"]._serialized_end = 5491
+    _globals["_APPENDVALUE"]._serialized_start = 5493
+    _globals["_APPENDVALUE"]._serialized_end = 5528
+    _globals["_APPENDLIST"]._serialized_start = 5530
+    _globals["_APPENDLIST"]._serialized_end = 5542
+    _globals["_GETVALUE"]._serialized_start = 5544
+    _globals["_GETVALUE"]._serialized_end = 5580
+    _globals["_CONTAINSKEY"]._serialized_start = 5582
+    _globals["_CONTAINSKEY"]._serialized_end = 5621
+    _globals["_UPDATEVALUE"]._serialized_start = 5623
+    _globals["_UPDATEVALUE"]._serialized_end = 5684
+    _globals["_ITERATOR"]._serialized_start = 5686
+    _globals["_ITERATOR"]._serialized_end = 5728
+    _globals["_KEYS"]._serialized_start = 5730
+    _globals["_KEYS"]._serialized_end = 5768
+    _globals["_VALUES"]._serialized_start = 5770
+    _globals["_VALUES"]._serialized_end = 5810
+    _globals["_REMOVEKEY"]._serialized_start = 5812
+    _globals["_REMOVEKEY"]._serialized_end = 5849
+    _globals["_SETHANDLESTATE"]._serialized_start = 5851
+    _globals["_SETHANDLESTATE"]._serialized_end = 5950
+    _globals["_TTLCONFIG"]._serialized_start = 5952
+    _globals["_TTLCONFIG"]._serialized_end = 5995
+# @@protoc_insertion_point(module_scope)
diff --git a/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi
new file mode 100644
index 0000000000000..52f66928294cb
--- /dev/null
+++ b/python/pyspark/sql/streaming/proto/StateMessage_pb2.pyi
@@ -0,0 +1,1132 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+@generated by mypy-protobuf.  Do not edit manually!
+isort:skip_file
+
+Licensed to the Apache Software Foundation (ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import builtins
+import google.protobuf.descriptor
+import google.protobuf.internal.enum_type_wrapper
+import google.protobuf.message
+import sys
+import typing
+
+if sys.version_info >= (3, 10):
+    import typing as typing_extensions
+else:
+    import typing_extensions
+
+DESCRIPTOR: google.protobuf.descriptor.FileDescriptor
+
+class _HandleState:
+    ValueType = typing.NewType("ValueType", builtins.int)
+    V: typing_extensions.TypeAlias = ValueType
+
+class _HandleStateEnumTypeWrapper(
+    google.protobuf.internal.enum_type_wrapper._EnumTypeWrapper[_HandleState.ValueType],
+    builtins.type,
+):  # noqa: F821
+    DESCRIPTOR: google.protobuf.descriptor.EnumDescriptor
+    CREATED: _HandleState.ValueType  # 0
+    INITIALIZED: _HandleState.ValueType  # 1
+    DATA_PROCESSED: _HandleState.ValueType  # 2
+    TIMER_PROCESSED: _HandleState.ValueType  # 3
+    CLOSED: _HandleState.ValueType  # 4
+
+class HandleState(_HandleState, metaclass=_HandleStateEnumTypeWrapper): ...
+
+CREATED: HandleState.ValueType  # 0
+INITIALIZED: HandleState.ValueType  # 1
+DATA_PROCESSED: HandleState.ValueType  # 2
+TIMER_PROCESSED: HandleState.ValueType  # 3
+CLOSED: HandleState.ValueType  # 4
+global___HandleState = HandleState
+
+class StateRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VERSION_FIELD_NUMBER: builtins.int
+    STATEFULPROCESSORCALL_FIELD_NUMBER: builtins.int
+    STATEVARIABLEREQUEST_FIELD_NUMBER: builtins.int
+    IMPLICITGROUPINGKEYREQUEST_FIELD_NUMBER: builtins.int
+    TIMERREQUEST_FIELD_NUMBER: builtins.int
+    version: builtins.int
+    @property
+    def statefulProcessorCall(self) -> global___StatefulProcessorCall: ...
+    @property
+    def stateVariableRequest(self) -> global___StateVariableRequest: ...
+    @property
+    def implicitGroupingKeyRequest(self) -> global___ImplicitGroupingKeyRequest: ...
+    @property
+    def timerRequest(self) -> global___TimerRequest: ...
+    def __init__(
+        self,
+        *,
+        version: builtins.int = ...,
+        statefulProcessorCall: global___StatefulProcessorCall | None = ...,
+        stateVariableRequest: global___StateVariableRequest | None = ...,
+        implicitGroupingKeyRequest: global___ImplicitGroupingKeyRequest | None = ...,
+        timerRequest: global___TimerRequest | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "implicitGroupingKeyRequest",
+            b"implicitGroupingKeyRequest",
+            "method",
+            b"method",
+            "stateVariableRequest",
+            b"stateVariableRequest",
+            "statefulProcessorCall",
+            b"statefulProcessorCall",
+            "timerRequest",
+            b"timerRequest",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "implicitGroupingKeyRequest",
+            b"implicitGroupingKeyRequest",
+            "method",
+            b"method",
+            "stateVariableRequest",
+            b"stateVariableRequest",
+            "statefulProcessorCall",
+            b"statefulProcessorCall",
+            "timerRequest",
+            b"timerRequest",
+            "version",
+            b"version",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> (
+        typing_extensions.Literal[
+            "statefulProcessorCall",
+            "stateVariableRequest",
+            "implicitGroupingKeyRequest",
+            "timerRequest",
+        ]
+        | None
+    ): ...
+
+global___StateRequest = StateRequest
+
+class StateResponse(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATUSCODE_FIELD_NUMBER: builtins.int
+    ERRORMESSAGE_FIELD_NUMBER: builtins.int
+    VALUE_FIELD_NUMBER: builtins.int
+    statusCode: builtins.int
+    errorMessage: builtins.str
+    value: builtins.bytes
+    def __init__(
+        self,
+        *,
+        statusCode: builtins.int = ...,
+        errorMessage: builtins.str = ...,
+        value: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "errorMessage", b"errorMessage", "statusCode", b"statusCode", "value", b"value"
+        ],
+    ) -> None: ...
+
+global___StateResponse = StateResponse
+
+class StateResponseWithLongTypeVal(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATUSCODE_FIELD_NUMBER: builtins.int
+    ERRORMESSAGE_FIELD_NUMBER: builtins.int
+    VALUE_FIELD_NUMBER: builtins.int
+    statusCode: builtins.int
+    errorMessage: builtins.str
+    value: builtins.int
+    def __init__(
+        self,
+        *,
+        statusCode: builtins.int = ...,
+        errorMessage: builtins.str = ...,
+        value: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "errorMessage", b"errorMessage", "statusCode", b"statusCode", "value", b"value"
+        ],
+    ) -> None: ...
+
+global___StateResponseWithLongTypeVal = StateResponseWithLongTypeVal
+
+class StatefulProcessorCall(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SETHANDLESTATE_FIELD_NUMBER: builtins.int
+    GETVALUESTATE_FIELD_NUMBER: builtins.int
+    GETLISTSTATE_FIELD_NUMBER: builtins.int
+    GETMAPSTATE_FIELD_NUMBER: builtins.int
+    TIMERSTATECALL_FIELD_NUMBER: builtins.int
+    DELETEIFEXISTS_FIELD_NUMBER: builtins.int
+    @property
+    def setHandleState(self) -> global___SetHandleState: ...
+    @property
+    def getValueState(self) -> global___StateCallCommand: ...
+    @property
+    def getListState(self) -> global___StateCallCommand: ...
+    @property
+    def getMapState(self) -> global___StateCallCommand: ...
+    @property
+    def timerStateCall(self) -> global___TimerStateCallCommand: ...
+    @property
+    def deleteIfExists(self) -> global___StateCallCommand: ...
+    def __init__(
+        self,
+        *,
+        setHandleState: global___SetHandleState | None = ...,
+        getValueState: global___StateCallCommand | None = ...,
+        getListState: global___StateCallCommand | None = ...,
+        getMapState: global___StateCallCommand | None = ...,
+        timerStateCall: global___TimerStateCallCommand | None = ...,
+        deleteIfExists: global___StateCallCommand | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "deleteIfExists",
+            b"deleteIfExists",
+            "getListState",
+            b"getListState",
+            "getMapState",
+            b"getMapState",
+            "getValueState",
+            b"getValueState",
+            "method",
+            b"method",
+            "setHandleState",
+            b"setHandleState",
+            "timerStateCall",
+            b"timerStateCall",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "deleteIfExists",
+            b"deleteIfExists",
+            "getListState",
+            b"getListState",
+            "getMapState",
+            b"getMapState",
+            "getValueState",
+            b"getValueState",
+            "method",
+            b"method",
+            "setHandleState",
+            b"setHandleState",
+            "timerStateCall",
+            b"timerStateCall",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> (
+        typing_extensions.Literal[
+            "setHandleState",
+            "getValueState",
+            "getListState",
+            "getMapState",
+            "timerStateCall",
+            "deleteIfExists",
+        ]
+        | None
+    ): ...
+
+global___StatefulProcessorCall = StatefulProcessorCall
+
+class StateVariableRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VALUESTATECALL_FIELD_NUMBER: builtins.int
+    LISTSTATECALL_FIELD_NUMBER: builtins.int
+    MAPSTATECALL_FIELD_NUMBER: builtins.int
+    @property
+    def valueStateCall(self) -> global___ValueStateCall: ...
+    @property
+    def listStateCall(self) -> global___ListStateCall: ...
+    @property
+    def mapStateCall(self) -> global___MapStateCall: ...
+    def __init__(
+        self,
+        *,
+        valueStateCall: global___ValueStateCall | None = ...,
+        listStateCall: global___ListStateCall | None = ...,
+        mapStateCall: global___MapStateCall | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "listStateCall",
+            b"listStateCall",
+            "mapStateCall",
+            b"mapStateCall",
+            "method",
+            b"method",
+            "valueStateCall",
+            b"valueStateCall",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "listStateCall",
+            b"listStateCall",
+            "mapStateCall",
+            b"mapStateCall",
+            "method",
+            b"method",
+            "valueStateCall",
+            b"valueStateCall",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["valueStateCall", "listStateCall", "mapStateCall"] | None: ...
+
+global___StateVariableRequest = StateVariableRequest
+
+class ImplicitGroupingKeyRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    SETIMPLICITKEY_FIELD_NUMBER: builtins.int
+    REMOVEIMPLICITKEY_FIELD_NUMBER: builtins.int
+    @property
+    def setImplicitKey(self) -> global___SetImplicitKey: ...
+    @property
+    def removeImplicitKey(self) -> global___RemoveImplicitKey: ...
+    def __init__(
+        self,
+        *,
+        setImplicitKey: global___SetImplicitKey | None = ...,
+        removeImplicitKey: global___RemoveImplicitKey | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "method",
+            b"method",
+            "removeImplicitKey",
+            b"removeImplicitKey",
+            "setImplicitKey",
+            b"setImplicitKey",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "method",
+            b"method",
+            "removeImplicitKey",
+            b"removeImplicitKey",
+            "setImplicitKey",
+            b"setImplicitKey",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["setImplicitKey", "removeImplicitKey"] | None: ...
+
+global___ImplicitGroupingKeyRequest = ImplicitGroupingKeyRequest
+
+class TimerRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    TIMERVALUEREQUEST_FIELD_NUMBER: builtins.int
+    EXPIRYTIMERREQUEST_FIELD_NUMBER: builtins.int
+    @property
+    def timerValueRequest(self) -> global___TimerValueRequest: ...
+    @property
+    def expiryTimerRequest(self) -> global___ExpiryTimerRequest: ...
+    def __init__(
+        self,
+        *,
+        timerValueRequest: global___TimerValueRequest | None = ...,
+        expiryTimerRequest: global___ExpiryTimerRequest | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "expiryTimerRequest",
+            b"expiryTimerRequest",
+            "method",
+            b"method",
+            "timerValueRequest",
+            b"timerValueRequest",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "expiryTimerRequest",
+            b"expiryTimerRequest",
+            "method",
+            b"method",
+            "timerValueRequest",
+            b"timerValueRequest",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["timerValueRequest", "expiryTimerRequest"] | None: ...
+
+global___TimerRequest = TimerRequest
+
+class TimerValueRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    GETPROCESSINGTIMER_FIELD_NUMBER: builtins.int
+    GETWATERMARK_FIELD_NUMBER: builtins.int
+    @property
+    def getProcessingTimer(self) -> global___GetProcessingTime: ...
+    @property
+    def getWatermark(self) -> global___GetWatermark: ...
+    def __init__(
+        self,
+        *,
+        getProcessingTimer: global___GetProcessingTime | None = ...,
+        getWatermark: global___GetWatermark | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "getProcessingTimer",
+            b"getProcessingTimer",
+            "getWatermark",
+            b"getWatermark",
+            "method",
+            b"method",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "getProcessingTimer",
+            b"getProcessingTimer",
+            "getWatermark",
+            b"getWatermark",
+            "method",
+            b"method",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["getProcessingTimer", "getWatermark"] | None: ...
+
+global___TimerValueRequest = TimerValueRequest
+
+class ExpiryTimerRequest(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int
+    expiryTimestampMs: builtins.int
+    def __init__(
+        self,
+        *,
+        expiryTimestampMs: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"]
+    ) -> None: ...
+
+global___ExpiryTimerRequest = ExpiryTimerRequest
+
+class GetProcessingTime(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___GetProcessingTime = GetProcessingTime
+
+class GetWatermark(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___GetWatermark = GetWatermark
+
+class StateCallCommand(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATENAME_FIELD_NUMBER: builtins.int
+    SCHEMA_FIELD_NUMBER: builtins.int
+    MAPSTATEVALUESCHEMA_FIELD_NUMBER: builtins.int
+    TTL_FIELD_NUMBER: builtins.int
+    stateName: builtins.str
+    schema: builtins.str
+    mapStateValueSchema: builtins.str
+    @property
+    def ttl(self) -> global___TTLConfig: ...
+    def __init__(
+        self,
+        *,
+        stateName: builtins.str = ...,
+        schema: builtins.str = ...,
+        mapStateValueSchema: builtins.str = ...,
+        ttl: global___TTLConfig | None = ...,
+    ) -> None: ...
+    def HasField(self, field_name: typing_extensions.Literal["ttl", b"ttl"]) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "mapStateValueSchema",
+            b"mapStateValueSchema",
+            "schema",
+            b"schema",
+            "stateName",
+            b"stateName",
+            "ttl",
+            b"ttl",
+        ],
+    ) -> None: ...
+
+global___StateCallCommand = StateCallCommand
+
+class TimerStateCallCommand(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    REGISTER_FIELD_NUMBER: builtins.int
+    DELETE_FIELD_NUMBER: builtins.int
+    LIST_FIELD_NUMBER: builtins.int
+    @property
+    def register(self) -> global___RegisterTimer: ...
+    @property
+    def delete(self) -> global___DeleteTimer: ...
+    @property
+    def list(self) -> global___ListTimers: ...
+    def __init__(
+        self,
+        *,
+        register: global___RegisterTimer | None = ...,
+        delete: global___DeleteTimer | None = ...,
+        list: global___ListTimers | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "delete", b"delete", "list", b"list", "method", b"method", "register", b"register"
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "delete", b"delete", "list", b"list", "method", b"method", "register", b"register"
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["register", "delete", "list"] | None: ...
+
+global___TimerStateCallCommand = TimerStateCallCommand
+
+class ValueStateCall(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATENAME_FIELD_NUMBER: builtins.int
+    EXISTS_FIELD_NUMBER: builtins.int
+    GET_FIELD_NUMBER: builtins.int
+    VALUESTATEUPDATE_FIELD_NUMBER: builtins.int
+    CLEAR_FIELD_NUMBER: builtins.int
+    stateName: builtins.str
+    @property
+    def exists(self) -> global___Exists: ...
+    @property
+    def get(self) -> global___Get: ...
+    @property
+    def valueStateUpdate(self) -> global___ValueStateUpdate: ...
+    @property
+    def clear(self) -> global___Clear: ...
+    def __init__(
+        self,
+        *,
+        stateName: builtins.str = ...,
+        exists: global___Exists | None = ...,
+        get: global___Get | None = ...,
+        valueStateUpdate: global___ValueStateUpdate | None = ...,
+        clear: global___Clear | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "clear",
+            b"clear",
+            "exists",
+            b"exists",
+            "get",
+            b"get",
+            "method",
+            b"method",
+            "valueStateUpdate",
+            b"valueStateUpdate",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "clear",
+            b"clear",
+            "exists",
+            b"exists",
+            "get",
+            b"get",
+            "method",
+            b"method",
+            "stateName",
+            b"stateName",
+            "valueStateUpdate",
+            b"valueStateUpdate",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> typing_extensions.Literal["exists", "get", "valueStateUpdate", "clear"] | None: ...
+
+global___ValueStateCall = ValueStateCall
+
+class ListStateCall(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATENAME_FIELD_NUMBER: builtins.int
+    EXISTS_FIELD_NUMBER: builtins.int
+    LISTSTATEGET_FIELD_NUMBER: builtins.int
+    LISTSTATEPUT_FIELD_NUMBER: builtins.int
+    APPENDVALUE_FIELD_NUMBER: builtins.int
+    APPENDLIST_FIELD_NUMBER: builtins.int
+    CLEAR_FIELD_NUMBER: builtins.int
+    stateName: builtins.str
+    @property
+    def exists(self) -> global___Exists: ...
+    @property
+    def listStateGet(self) -> global___ListStateGet: ...
+    @property
+    def listStatePut(self) -> global___ListStatePut: ...
+    @property
+    def appendValue(self) -> global___AppendValue: ...
+    @property
+    def appendList(self) -> global___AppendList: ...
+    @property
+    def clear(self) -> global___Clear: ...
+    def __init__(
+        self,
+        *,
+        stateName: builtins.str = ...,
+        exists: global___Exists | None = ...,
+        listStateGet: global___ListStateGet | None = ...,
+        listStatePut: global___ListStatePut | None = ...,
+        appendValue: global___AppendValue | None = ...,
+        appendList: global___AppendList | None = ...,
+        clear: global___Clear | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "appendList",
+            b"appendList",
+            "appendValue",
+            b"appendValue",
+            "clear",
+            b"clear",
+            "exists",
+            b"exists",
+            "listStateGet",
+            b"listStateGet",
+            "listStatePut",
+            b"listStatePut",
+            "method",
+            b"method",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "appendList",
+            b"appendList",
+            "appendValue",
+            b"appendValue",
+            "clear",
+            b"clear",
+            "exists",
+            b"exists",
+            "listStateGet",
+            b"listStateGet",
+            "listStatePut",
+            b"listStatePut",
+            "method",
+            b"method",
+            "stateName",
+            b"stateName",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> (
+        typing_extensions.Literal[
+            "exists", "listStateGet", "listStatePut", "appendValue", "appendList", "clear"
+        ]
+        | None
+    ): ...
+
+global___ListStateCall = ListStateCall
+
+class MapStateCall(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATENAME_FIELD_NUMBER: builtins.int
+    EXISTS_FIELD_NUMBER: builtins.int
+    GETVALUE_FIELD_NUMBER: builtins.int
+    CONTAINSKEY_FIELD_NUMBER: builtins.int
+    UPDATEVALUE_FIELD_NUMBER: builtins.int
+    ITERATOR_FIELD_NUMBER: builtins.int
+    KEYS_FIELD_NUMBER: builtins.int
+    VALUES_FIELD_NUMBER: builtins.int
+    REMOVEKEY_FIELD_NUMBER: builtins.int
+    CLEAR_FIELD_NUMBER: builtins.int
+    stateName: builtins.str
+    @property
+    def exists(self) -> global___Exists: ...
+    @property
+    def getValue(self) -> global___GetValue: ...
+    @property
+    def containsKey(self) -> global___ContainsKey: ...
+    @property
+    def updateValue(self) -> global___UpdateValue: ...
+    @property
+    def iterator(self) -> global___Iterator: ...
+    @property
+    def keys(self) -> global___Keys: ...
+    @property
+    def values(self) -> global___Values: ...
+    @property
+    def removeKey(self) -> global___RemoveKey: ...
+    @property
+    def clear(self) -> global___Clear: ...
+    def __init__(
+        self,
+        *,
+        stateName: builtins.str = ...,
+        exists: global___Exists | None = ...,
+        getValue: global___GetValue | None = ...,
+        containsKey: global___ContainsKey | None = ...,
+        updateValue: global___UpdateValue | None = ...,
+        iterator: global___Iterator | None = ...,
+        keys: global___Keys | None = ...,
+        values: global___Values | None = ...,
+        removeKey: global___RemoveKey | None = ...,
+        clear: global___Clear | None = ...,
+    ) -> None: ...
+    def HasField(
+        self,
+        field_name: typing_extensions.Literal[
+            "clear",
+            b"clear",
+            "containsKey",
+            b"containsKey",
+            "exists",
+            b"exists",
+            "getValue",
+            b"getValue",
+            "iterator",
+            b"iterator",
+            "keys",
+            b"keys",
+            "method",
+            b"method",
+            "removeKey",
+            b"removeKey",
+            "updateValue",
+            b"updateValue",
+            "values",
+            b"values",
+        ],
+    ) -> builtins.bool: ...
+    def ClearField(
+        self,
+        field_name: typing_extensions.Literal[
+            "clear",
+            b"clear",
+            "containsKey",
+            b"containsKey",
+            "exists",
+            b"exists",
+            "getValue",
+            b"getValue",
+            "iterator",
+            b"iterator",
+            "keys",
+            b"keys",
+            "method",
+            b"method",
+            "removeKey",
+            b"removeKey",
+            "stateName",
+            b"stateName",
+            "updateValue",
+            b"updateValue",
+            "values",
+            b"values",
+        ],
+    ) -> None: ...
+    def WhichOneof(
+        self, oneof_group: typing_extensions.Literal["method", b"method"]
+    ) -> (
+        typing_extensions.Literal[
+            "exists",
+            "getValue",
+            "containsKey",
+            "updateValue",
+            "iterator",
+            "keys",
+            "values",
+            "removeKey",
+            "clear",
+        ]
+        | None
+    ): ...
+
+global___MapStateCall = MapStateCall
+
+class SetImplicitKey(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    KEY_FIELD_NUMBER: builtins.int
+    key: builtins.bytes
+    def __init__(
+        self,
+        *,
+        key: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["key", b"key"]) -> None: ...
+
+global___SetImplicitKey = SetImplicitKey
+
+class RemoveImplicitKey(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___RemoveImplicitKey = RemoveImplicitKey
+
+class Exists(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___Exists = Exists
+
+class Get(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___Get = Get
+
+class RegisterTimer(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int
+    expiryTimestampMs: builtins.int
+    def __init__(
+        self,
+        *,
+        expiryTimestampMs: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"]
+    ) -> None: ...
+
+global___RegisterTimer = RegisterTimer
+
+class DeleteTimer(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    EXPIRYTIMESTAMPMS_FIELD_NUMBER: builtins.int
+    expiryTimestampMs: builtins.int
+    def __init__(
+        self,
+        *,
+        expiryTimestampMs: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["expiryTimestampMs", b"expiryTimestampMs"]
+    ) -> None: ...
+
+global___DeleteTimer = DeleteTimer
+
+class ListTimers(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ITERATORID_FIELD_NUMBER: builtins.int
+    iteratorId: builtins.str
+    def __init__(
+        self,
+        *,
+        iteratorId: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"]
+    ) -> None: ...
+
+global___ListTimers = ListTimers
+
+class ValueStateUpdate(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VALUE_FIELD_NUMBER: builtins.int
+    value: builtins.bytes
+    def __init__(
+        self,
+        *,
+        value: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["value", b"value"]) -> None: ...
+
+global___ValueStateUpdate = ValueStateUpdate
+
+class Clear(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___Clear = Clear
+
+class ListStateGet(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ITERATORID_FIELD_NUMBER: builtins.int
+    iteratorId: builtins.str
+    def __init__(
+        self,
+        *,
+        iteratorId: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"]
+    ) -> None: ...
+
+global___ListStateGet = ListStateGet
+
+class ListStatePut(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___ListStatePut = ListStatePut
+
+class AppendValue(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    VALUE_FIELD_NUMBER: builtins.int
+    value: builtins.bytes
+    def __init__(
+        self,
+        *,
+        value: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["value", b"value"]) -> None: ...
+
+global___AppendValue = AppendValue
+
+class AppendList(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    def __init__(
+        self,
+    ) -> None: ...
+
+global___AppendList = AppendList
+
+class GetValue(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USERKEY_FIELD_NUMBER: builtins.int
+    userKey: builtins.bytes
+    def __init__(
+        self,
+        *,
+        userKey: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ...
+
+global___GetValue = GetValue
+
+class ContainsKey(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USERKEY_FIELD_NUMBER: builtins.int
+    userKey: builtins.bytes
+    def __init__(
+        self,
+        *,
+        userKey: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ...
+
+global___ContainsKey = ContainsKey
+
+class UpdateValue(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USERKEY_FIELD_NUMBER: builtins.int
+    VALUE_FIELD_NUMBER: builtins.int
+    userKey: builtins.bytes
+    value: builtins.bytes
+    def __init__(
+        self,
+        *,
+        userKey: builtins.bytes = ...,
+        value: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["userKey", b"userKey", "value", b"value"]
+    ) -> None: ...
+
+global___UpdateValue = UpdateValue
+
+class Iterator(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ITERATORID_FIELD_NUMBER: builtins.int
+    iteratorId: builtins.str
+    def __init__(
+        self,
+        *,
+        iteratorId: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"]
+    ) -> None: ...
+
+global___Iterator = Iterator
+
+class Keys(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ITERATORID_FIELD_NUMBER: builtins.int
+    iteratorId: builtins.str
+    def __init__(
+        self,
+        *,
+        iteratorId: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"]
+    ) -> None: ...
+
+global___Keys = Keys
+
+class Values(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    ITERATORID_FIELD_NUMBER: builtins.int
+    iteratorId: builtins.str
+    def __init__(
+        self,
+        *,
+        iteratorId: builtins.str = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["iteratorId", b"iteratorId"]
+    ) -> None: ...
+
+global___Values = Values
+
+class RemoveKey(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    USERKEY_FIELD_NUMBER: builtins.int
+    userKey: builtins.bytes
+    def __init__(
+        self,
+        *,
+        userKey: builtins.bytes = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["userKey", b"userKey"]) -> None: ...
+
+global___RemoveKey = RemoveKey
+
+class SetHandleState(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    STATE_FIELD_NUMBER: builtins.int
+    state: global___HandleState.ValueType
+    def __init__(
+        self,
+        *,
+        state: global___HandleState.ValueType = ...,
+    ) -> None: ...
+    def ClearField(self, field_name: typing_extensions.Literal["state", b"state"]) -> None: ...
+
+global___SetHandleState = SetHandleState
+
+class TTLConfig(google.protobuf.message.Message):
+    DESCRIPTOR: google.protobuf.descriptor.Descriptor
+
+    DURATIONMS_FIELD_NUMBER: builtins.int
+    durationMs: builtins.int
+    def __init__(
+        self,
+        *,
+        durationMs: builtins.int = ...,
+    ) -> None: ...
+    def ClearField(
+        self, field_name: typing_extensions.Literal["durationMs", b"durationMs"]
+    ) -> None: ...
+
+global___TTLConfig = TTLConfig
diff --git a/connector/docker/spark-test/worker/Dockerfile b/python/pyspark/sql/streaming/proto/__init__.py
similarity index 86%
rename from connector/docker/spark-test/worker/Dockerfile
rename to python/pyspark/sql/streaming/proto/__init__.py
index 890febe7b68e2..cce3acad34a49 100644
--- a/connector/docker/spark-test/worker/Dockerfile
+++ b/python/pyspark/sql/streaming/proto/__init__.py
@@ -1,4 +1,3 @@
-# Spark Worker
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -15,8 +14,3 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-
-FROM spark-test-base
-ENV SPARK_WORKER_PORT 8888
-ADD default_cmd /root/
-ENTRYPOINT ["/root/default_cmd"]
diff --git a/python/pyspark/sql/streaming/python_streaming_source_runner.py b/python/pyspark/sql/streaming/python_streaming_source_runner.py
index 754ecff61b973..a7349779dc626 100644
--- a/python/pyspark/sql/streaming/python_streaming_source_runner.py
+++ b/python/pyspark/sql/streaming/python_streaming_source_runner.py
@@ -130,8 +130,8 @@ def main(infile: IO, outfile: IO) -> None:
 
         if not isinstance(data_source, DataSource):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a Python data source instance of type 'DataSource'",
                     "actual": f"'{type(data_source).__name__}'",
                 },
@@ -142,8 +142,8 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
                 },
@@ -177,8 +177,8 @@ def main(infile: IO, outfile: IO) -> None:
                     commit_func(reader, infile, outfile)
                 else:
                     raise IllegalArgumentException(
-                        error_class="UNSUPPORTED_OPERATION",
-                        message_parameters={
+                        errorClass="UNSUPPORTED_OPERATION",
+                        messageParameters={
                             "operation": "Function call id not recognized by stream reader"
                         },
                     )
@@ -186,13 +186,15 @@ def main(infile: IO, outfile: IO) -> None:
         except Exception as e:
             error_msg = "data source {} throw exception: {}".format(data_source.name, e)
             raise PySparkRuntimeError(
-                error_class="PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
-                message_parameters={"msg": error_msg},
+                errorClass="PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+                messageParameters={"msg": error_msg},
             )
         finally:
             reader.stop()
     except BaseException as e:
         handle_worker_exception(e, outfile)
+        # ensure that the updates to the socket are flushed
+        outfile.flush()
         sys.exit(-1)
     send_accumulator_updates(outfile)
 
diff --git a/python/pyspark/sql/streaming/query.py b/python/pyspark/sql/streaming/query.py
index d3d58da3562b6..d2f9f0957e0ae 100644
--- a/python/pyspark/sql/streaming/query.py
+++ b/python/pyspark/sql/streaming/query.py
@@ -22,7 +22,10 @@
 from pyspark.errors.exceptions.captured import (
     StreamingQueryException as CapturedStreamingQueryException,
 )
-from pyspark.sql.streaming.listener import StreamingQueryListener
+from pyspark.sql.streaming.listener import (
+    StreamingQueryListener,
+    StreamingQueryProgress,
+)
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
@@ -214,8 +217,8 @@ def awaitTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
         if timeout is not None:
             if not isinstance(timeout, (int, float)) or timeout <= 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
                 )
             return self._jsq.awaitTermination(int(timeout * 1000))
         else:
@@ -251,7 +254,7 @@ def status(self) -> Dict[str, Any]:
         return json.loads(self._jsq.status().json())
 
     @property
-    def recentProgress(self) -> List[Dict[str, Any]]:
+    def recentProgress(self) -> List[StreamingQueryProgress]:
         """
         Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
         The number of progress updates retained for each stream is configured by Spark session
@@ -280,10 +283,10 @@ def recentProgress(self) -> List[Dict[str, Any]]:
 
         >>> sq.stop()
         """
-        return [json.loads(p.json()) for p in self._jsq.recentProgress()]
+        return [StreamingQueryProgress.fromJObject(p) for p in self._jsq.recentProgress()]
 
     @property
-    def lastProgress(self) -> Optional[Dict[str, Any]]:
+    def lastProgress(self) -> Optional[StreamingQueryProgress]:
         """
         Returns the most recent :class:`StreamingQueryProgress` update of this streaming query or
         None if there were no progress updates
@@ -311,7 +314,7 @@ def lastProgress(self) -> Optional[Dict[str, Any]]:
         """
         lastProgress = self._jsq.lastProgress()
         if lastProgress:
-            return json.loads(lastProgress.json())
+            return StreamingQueryProgress.fromJObject(lastProgress)
         else:
             return None
 
@@ -488,7 +491,7 @@ def active(self) -> List[StreamingQuery]:
         """
         return [StreamingQuery(jsq) for jsq in self._jsqm.active()]
 
-    def get(self, id: str) -> Optional[StreamingQuery]:
+    def get(self, id: str) -> Optional["StreamingQuery"]:
         """
         Returns an active query from this :class:`SparkSession`.
 
@@ -589,8 +592,8 @@ def awaitAnyTermination(self, timeout: Optional[int] = None) -> Optional[bool]:
         if timeout is not None:
             if not isinstance(timeout, (int, float)) or timeout < 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_POSITIVE",
-                    message_parameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
+                    errorClass="VALUE_NOT_POSITIVE",
+                    messageParameters={"arg_name": "timeout", "arg_value": type(timeout).__name__},
                 )
             return self._jsqm.awaitAnyTermination(int(timeout * 1000))
         else:
diff --git a/python/pyspark/sql/streaming/readwriter.py b/python/pyspark/sql/streaming/readwriter.py
index b202a499e8b08..6aa01d2f83a42 100644
--- a/python/pyspark/sql/streaming/readwriter.py
+++ b/python/pyspark/sql/streaming/readwriter.py
@@ -171,8 +171,8 @@ def schema(self, schema: Union[StructType, str]) -> "DataStreamReader":
             self._jreader = self._jreader.schema(schema)
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR_OR_STRUCT",
-                message_parameters={"arg_name": "schema", "arg_type": type(schema).__name__},
+                errorClass="NOT_STR_OR_STRUCT",
+                messageParameters={"arg_name": "schema", "arg_type": type(schema).__name__},
             )
         return self
 
@@ -299,8 +299,8 @@ def load(
         if path is not None:
             if type(path) != str or len(path.strip()) == 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={"arg_name": "path", "arg_value": str(path)},
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={"arg_name": "path", "arg_value": str(path)},
                 )
             return self._df(self._jreader.load(path))
         else:
@@ -330,6 +330,7 @@ def json(
         pathGlobFilter: Optional[Union[bool, str]] = None,
         recursiveFileLookup: Optional[Union[bool, str]] = None,
         allowNonNumericNumbers: Optional[Union[bool, str]] = None,
+        useUnsafeRow: Optional[Union[bool, str]] = None,
     ) -> "DataFrame":
         """
         Loads a JSON file stream and returns the results as a :class:`DataFrame`.
@@ -408,13 +409,14 @@ def json(
             pathGlobFilter=pathGlobFilter,
             recursiveFileLookup=recursiveFileLookup,
             allowNonNumericNumbers=allowNonNumericNumbers,
+            useUnsafeRow=useUnsafeRow,
         )
         if isinstance(path, str):
             return self._df(self._jreader.json(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def orc(
@@ -464,8 +466,8 @@ def orc(
             return self._df(self._jreader.orc(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def parquet(
@@ -526,8 +528,8 @@ def parquet(
             return self._df(self._jreader.parquet(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def text(
@@ -595,8 +597,8 @@ def text(
             return self._df(self._jreader.text(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def csv(
@@ -718,8 +720,8 @@ def csv(
             return self._df(self._jreader.csv(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def xml(
@@ -817,8 +819,8 @@ def xml(
             return self._df(self._jreader.xml(path))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "path", "arg_type": type(path).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "path", "arg_type": type(path).__name__},
             )
 
     def table(self, tableName: str) -> "DataFrame":
@@ -866,8 +868,8 @@ def table(self, tableName: str) -> "DataFrame":
             return self._df(self._jreader.table(tableName))
         else:
             raise PySparkTypeError(
-                error_class="NOT_STR",
-                message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
+                errorClass="NOT_STR",
+                messageParameters={"arg_name": "tableName", "arg_type": type(tableName).__name__},
             )
 
 
@@ -948,8 +950,8 @@ def outputMode(self, outputMode: str) -> "DataStreamWriter":
         """
         if not outputMode or type(outputMode) != str or len(outputMode.strip()) == 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_NON_EMPTY_STR",
-                message_parameters={"arg_name": "outputMode", "arg_value": str(outputMode)},
+                errorClass="VALUE_NOT_NON_EMPTY_STR",
+                messageParameters={"arg_name": "outputMode", "arg_value": str(outputMode)},
             )
         self._jwrite = self._jwrite.outputMode(outputMode)
         return self
@@ -1123,6 +1125,65 @@ def partitionBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
         self._jwrite = self._jwrite.partitionBy(_to_seq(self._spark._sc, cols))
         return self
 
+    @overload
+    def clusterBy(self, *cols: str) -> "DataStreamWriter":
+        ...
+
+    @overload
+    def clusterBy(self, __cols: List[str]) -> "DataStreamWriter":
+        ...
+
+    def clusterBy(self, *cols: str) -> "DataStreamWriter":  # type: ignore[misc]
+        """Clusters the output by the given columns.
+
+        If specified, the output is laid out such that records with similar values on the clustering
+        column(s) are grouped together in the same file.
+
+        Clustering improves query efficiency by allowing queries with predicates on the clustering
+        columns to skip unnecessary data. Unlike partitioning, clustering can be used on very high
+        cardinality columns.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        cols : str or list
+            name of columns
+
+        Notes
+        -----
+        This API is evolving.
+
+        Examples
+        --------
+        >>> df = spark.readStream.format("rate").load()
+        >>> df.writeStream.clusterBy("value")
+        <...streaming.readwriter.DataStreamWriter object ...>
+
+        Cluster-by timestamp column from Rate source.
+
+        >>> import tempfile
+        >>> import time
+        >>> with tempfile.TemporaryDirectory(prefix="partitionBy1") as d:
+        ...     with tempfile.TemporaryDirectory(prefix="partitionBy2") as cp:
+        ...         df = spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        ...         q = df.writeStream.clusterBy(
+        ...             "timestamp").format("parquet").option("checkpointLocation", cp).start(d)
+        ...         time.sleep(5)
+        ...         q.stop()
+        ...         spark.read.schema(df.schema).parquet(d).show()
+        +...---------+-----+
+        |...timestamp|value|
+        +...---------+-----+
+        ...
+        """
+        from pyspark.sql.classic.column import _to_seq
+
+        if len(cols) == 1 and isinstance(cols[0], (list, tuple)):
+            cols = cols[0]
+        self._jwrite = self._jwrite.clusterBy(_to_seq(self._spark._sc, cols))
+        return self
+
     def queryName(self, queryName: str) -> "DataStreamWriter":
         """Specifies the name of the :class:`StreamingQuery` that can be started with
         :func:`start`. This name must be unique among all the currently active queries
@@ -1153,8 +1214,8 @@ def queryName(self, queryName: str) -> "DataStreamWriter":
         """
         if not queryName or type(queryName) != str or len(queryName.strip()) == 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_NON_EMPTY_STR",
-                message_parameters={"arg_name": "queryName", "arg_value": str(queryName)},
+                errorClass="VALUE_NOT_NON_EMPTY_STR",
+                messageParameters={"arg_name": "queryName", "arg_value": str(queryName)},
             )
         self._jwrite = self._jwrite.queryName(queryName)
         return self
@@ -1235,13 +1296,13 @@ def trigger(
 
         if params.count(None) == 4:
             raise PySparkValueError(
-                error_class="ONLY_ALLOW_SINGLE_TRIGGER",
-                message_parameters={},
+                errorClass="ONLY_ALLOW_SINGLE_TRIGGER",
+                messageParameters={},
             )
         elif params.count(None) < 3:
             raise PySparkValueError(
-                error_class="ONLY_ALLOW_SINGLE_TRIGGER",
-                message_parameters={},
+                errorClass="ONLY_ALLOW_SINGLE_TRIGGER",
+                messageParameters={},
             )
 
         jTrigger = None
@@ -1249,8 +1310,8 @@ def trigger(
         if processingTime is not None:
             if type(processingTime) != str or len(processingTime.strip()) == 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={
                         "arg_name": "processingTime",
                         "arg_value": str(processingTime),
                     },
@@ -1263,8 +1324,8 @@ def trigger(
         elif once is not None:
             if once is not True:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_TRUE",
-                    message_parameters={"arg_name": "once", "arg_value": str(once)},
+                    errorClass="VALUE_NOT_TRUE",
+                    messageParameters={"arg_name": "once", "arg_value": str(once)},
                 )
 
             jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Once()
@@ -1272,8 +1333,8 @@ def trigger(
         elif continuous is not None:
             if type(continuous) != str or len(continuous.strip()) == 0:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_NON_EMPTY_STR",
-                    message_parameters={"arg_name": "continuous", "arg_value": str(continuous)},
+                    errorClass="VALUE_NOT_NON_EMPTY_STR",
+                    messageParameters={"arg_name": "continuous", "arg_value": str(continuous)},
                 )
             interval = continuous.strip()
             jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.Continuous(
@@ -1282,8 +1343,8 @@ def trigger(
         else:
             if availableNow is not True:
                 raise PySparkValueError(
-                    error_class="VALUE_NOT_TRUE",
-                    message_parameters={"arg_name": "availableNow", "arg_value": str(availableNow)},
+                    errorClass="VALUE_NOT_TRUE",
+                    messageParameters={"arg_name": "availableNow", "arg_value": str(availableNow)},
                 )
             jTrigger = self._spark._sc._jvm.org.apache.spark.sql.streaming.Trigger.AvailableNow()
 
@@ -1314,22 +1375,22 @@ def func_without_process(_: Any, iterator: Iterator) -> Iterator:
 
             if not hasattr(f, "process"):
                 raise PySparkAttributeError(
-                    error_class="ATTRIBUTE_NOT_CALLABLE",
-                    message_parameters={"attr_name": "process", "obj_name": "f"},
+                    errorClass="ATTRIBUTE_NOT_CALLABLE",
+                    messageParameters={"attr_name": "process", "obj_name": "f"},
                 )
 
             if not callable(getattr(f, "process")):
                 raise PySparkAttributeError(
-                    error_class="ATTRIBUTE_NOT_CALLABLE",
-                    message_parameters={"attr_name": "process", "obj_name": "f"},
+                    errorClass="ATTRIBUTE_NOT_CALLABLE",
+                    messageParameters={"attr_name": "process", "obj_name": "f"},
                 )
 
             def doesMethodExist(method_name: str) -> bool:
                 exists = hasattr(f, method_name)
                 if exists and not callable(getattr(f, method_name)):
                     raise PySparkAttributeError(
-                        error_class="ATTRIBUTE_NOT_CALLABLE",
-                        message_parameters={"attr_name": method_name, "obj_name": "f"},
+                        errorClass="ATTRIBUTE_NOT_CALLABLE",
+                        messageParameters={"attr_name": method_name, "obj_name": "f"},
                     )
                 return exists
 
@@ -1344,8 +1405,8 @@ def func_with_open_process_close(partition_id: Any, iterator: Iterator) -> Itera
                     int_epoch_id = int(epoch_id)
                 else:
                     raise PySparkRuntimeError(
-                        error_class="CANNOT_GET_BATCH_ID",
-                        message_parameters={"obj_name": "TaskContext"},
+                        errorClass="CANNOT_GET_BATCH_ID",
+                        messageParameters={"obj_name": "TaskContext"},
                     )
 
                 # Check if the data should be processed
@@ -1561,7 +1622,7 @@ def start(
         partitionBy: Optional[Union[str, List[str]]] = None,
         queryName: Optional[str] = None,
         **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
+    ) -> "StreamingQuery":
         """Streams the contents of the :class:`DataFrame` to a data source.
 
         The data source is specified by the ``format`` and a set of ``options``.
@@ -1649,7 +1710,7 @@ def toTable(
         partitionBy: Optional[Union[str, List[str]]] = None,
         queryName: Optional[str] = None,
         **options: "OptionalPrimitiveType",
-    ) -> StreamingQuery:
+    ) -> "StreamingQuery":
         """
         Starts the execution of the streaming query, which will continually output results to the
         given table as new data arrives.
diff --git a/python/pyspark/sql/streaming/state.py b/python/pyspark/sql/streaming/state.py
index 273f68c744e1a..0ea5590ef2e65 100644
--- a/python/pyspark/sql/streaming/state.py
+++ b/python/pyspark/sql/streaming/state.py
@@ -100,8 +100,8 @@ def get(self) -> Tuple:
             return tuple(self._value)
         else:
             raise PySparkValueError(
-                error_class="STATE_NOT_EXISTS",
-                message_parameters={},
+                errorClass="STATE_NOT_EXISTS",
+                messageParameters={},
             )
 
     @property
@@ -134,8 +134,8 @@ def update(self, newValue: Tuple) -> None:
         """
         if newValue is None:
             raise PySparkTypeError(
-                error_class="CANNOT_BE_NONE",
-                message_parameters={"arg_name": "newValue"},
+                errorClass="CANNOT_BE_NONE",
+                messageParameters={"arg_name": "newValue"},
             )
 
         converted = []
@@ -177,8 +177,8 @@ def setTimeoutDuration(self, durationMs: int) -> None:
         if isinstance(durationMs, str):
             # TODO(SPARK-40437): Support string representation of durationMs.
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={
+                errorClass="NOT_INT",
+                messageParameters={
                     "arg_name": "durationMs",
                     "arg_type": type(durationMs).__name__,
                 },
@@ -186,8 +186,8 @@ def setTimeoutDuration(self, durationMs: int) -> None:
 
         if self._timeout_conf != GroupStateTimeout.ProcessingTimeTimeout:
             raise PySparkRuntimeError(
-                error_class="CANNOT_WITHOUT",
-                message_parameters={
+                errorClass="CANNOT_WITHOUT",
+                messageParameters={
                     "condition1": "set timeout duration",
                     "condition2": "enabling processing time timeout in applyInPandasWithState",
                 },
@@ -195,8 +195,8 @@ def setTimeoutDuration(self, durationMs: int) -> None:
 
         if durationMs <= 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_POSITIVE",
-                message_parameters={
+                errorClass="VALUE_NOT_POSITIVE",
+                messageParameters={
                     "arg_name": "durationMs",
                     "arg_value": type(durationMs).__name__,
                 },
@@ -212,8 +212,8 @@ def setTimeoutTimestamp(self, timestampMs: int) -> None:
         """
         if self._timeout_conf != GroupStateTimeout.EventTimeTimeout:
             raise PySparkRuntimeError(
-                error_class="CANNOT_WITHOUT",
-                message_parameters={
+                errorClass="CANNOT_WITHOUT",
+                messageParameters={
                     "condition1": "set timeout duration",
                     "condition2": "enabling processing time timeout in applyInPandasWithState",
                 },
@@ -224,8 +224,8 @@ def setTimeoutTimestamp(self, timestampMs: int) -> None:
 
         if timestampMs <= 0:
             raise PySparkValueError(
-                error_class="VALUE_NOT_POSITIVE",
-                message_parameters={
+                errorClass="VALUE_NOT_POSITIVE",
+                messageParameters={
                     "arg_name": "timestampMs",
                     "arg_value": type(timestampMs).__name__,
                 },
@@ -236,8 +236,8 @@ def setTimeoutTimestamp(self, timestampMs: int) -> None:
             and timestampMs < self._event_time_watermark_ms
         ):
             raise PySparkValueError(
-                error_class="INVALID_TIMEOUT_TIMESTAMP",
-                message_parameters={
+                errorClass="INVALID_TIMEOUT_TIMESTAMP",
+                messageParameters={
                     "timestamp": str(timestampMs),
                     "watermark": str(self._event_time_watermark_ms),
                 },
@@ -252,8 +252,8 @@ def getCurrentWatermarkMs(self) -> int:
         """
         if not self._watermark_present:
             raise PySparkRuntimeError(
-                error_class="CANNOT_WITHOUT",
-                message_parameters={
+                errorClass="CANNOT_WITHOUT",
+                messageParameters={
                     "condition1": "get event time watermark timestamp",
                     "condition2": "setting watermark before applyInPandasWithState",
                 },
diff --git a/python/pyspark/sql/streaming/stateful_processor.py b/python/pyspark/sql/streaming/stateful_processor.py
new file mode 100644
index 0000000000000..9caa9304d6a87
--- /dev/null
+++ b/python/pyspark/sql/streaming/stateful_processor.py
@@ -0,0 +1,462 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from abc import ABC, abstractmethod
+from typing import Any, List, TYPE_CHECKING, Iterator, Optional, Union, Tuple
+
+from pyspark.sql.streaming.stateful_processor_api_client import (
+    StatefulProcessorApiClient,
+    ListTimerIterator,
+)
+from pyspark.sql.streaming.list_state_client import ListStateClient, ListStateIterator
+from pyspark.sql.streaming.map_state_client import (
+    MapStateClient,
+    MapStateIterator,
+    MapStateKeyValuePairIterator,
+)
+from pyspark.sql.streaming.value_state_client import ValueStateClient
+from pyspark.sql.types import StructType
+
+if TYPE_CHECKING:
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+
+__all__ = ["StatefulProcessor", "StatefulProcessorHandle"]
+
+
+class ValueState:
+    """
+    Class used for arbitrary stateful operations with transformWithState to capture single value
+    state.
+
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(
+        self, value_state_client: ValueStateClient, state_name: str, schema: Union[StructType, str]
+    ) -> None:
+        self._value_state_client = value_state_client
+        self._state_name = state_name
+        self.schema = schema
+
+    def exists(self) -> bool:
+        """
+        Whether state exists or not.
+        """
+        return self._value_state_client.exists(self._state_name)
+
+    def get(self) -> Optional[Tuple]:
+        """
+        Get the state value if it exists. Returns None if the state variable does not have a value.
+        """
+        return self._value_state_client.get(self._state_name)
+
+    def update(self, new_value: Tuple) -> None:
+        """
+        Update the value of the state.
+        """
+        self._value_state_client.update(self._state_name, self.schema, new_value)
+
+    def clear(self) -> None:
+        """
+        Remove this state.
+        """
+        self._value_state_client.clear(self._state_name)
+
+
+class TimerValues:
+    """
+    Class used for arbitrary stateful operations with transformWithState to access processing
+    time or event time for current batch.
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(
+        self, current_processing_time_in_ms: int = -1, current_watermark_in_ms: int = -1
+    ) -> None:
+        self._current_processing_time_in_ms = current_processing_time_in_ms
+        self._current_watermark_in_ms = current_watermark_in_ms
+
+    def get_current_processing_time_in_ms(self) -> int:
+        """
+        Get processing time for current batch, return timestamp in millisecond.
+        """
+        return self._current_processing_time_in_ms
+
+    def get_current_watermark_in_ms(self) -> int:
+        """
+        Get watermark for current batch, return timestamp in millisecond.
+        """
+        return self._current_watermark_in_ms
+
+
+class ExpiredTimerInfo:
+    """
+    Class used to provide access to expired timer's expiry time.
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(self, expiry_time_in_ms: int = -1) -> None:
+        self._expiry_time_in_ms = expiry_time_in_ms
+
+    def get_expiry_time_in_ms(self) -> int:
+        """
+        Get the timestamp for expired timer, return timestamp in millisecond.
+        """
+        return self._expiry_time_in_ms
+
+
+class ListState:
+    """
+    Class used for arbitrary stateful operations with transformWithState to capture list value
+    state.
+
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(
+        self, list_state_client: ListStateClient, state_name: str, schema: Union[StructType, str]
+    ) -> None:
+        self._list_state_client = list_state_client
+        self._state_name = state_name
+        self.schema = schema
+
+    def exists(self) -> bool:
+        """
+        Whether list state exists or not.
+        """
+        return self._list_state_client.exists(self._state_name)
+
+    def get(self) -> Iterator[Tuple]:
+        """
+        Get list state with an iterator.
+        """
+        return ListStateIterator(self._list_state_client, self._state_name)
+
+    def put(self, new_state: List[Tuple]) -> None:
+        """
+        Update the values of the list state.
+        """
+        self._list_state_client.put(self._state_name, self.schema, new_state)
+
+    def append_value(self, new_state: Tuple) -> None:
+        """
+        Append a new value to the list state.
+        """
+        self._list_state_client.append_value(self._state_name, self.schema, new_state)
+
+    def append_list(self, new_state: List[Tuple]) -> None:
+        """
+        Append a list of new values to the list state.
+        """
+        self._list_state_client.append_list(self._state_name, self.schema, new_state)
+
+    def clear(self) -> None:
+        """
+        Remove this state.
+        """
+        self._list_state_client.clear(self._state_name)
+
+
+class MapState:
+    """
+    Class used for arbitrary stateful operations with transformWithState to capture single map
+    state.
+
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(
+        self,
+        map_state_client: MapStateClient,
+        state_name: str,
+    ) -> None:
+        self._map_state_client = map_state_client
+        self._state_name = state_name
+
+    def exists(self) -> bool:
+        """
+        Whether state exists or not.
+        """
+        return self._map_state_client.exists(self._state_name)
+
+    def get_value(self, key: Tuple) -> Optional[Tuple]:
+        """
+        Get the state value for given user key if it exists.
+        """
+        return self._map_state_client.get_value(self._state_name, key)
+
+    def contains_key(self, key: Tuple) -> bool:
+        """
+        Check if the user key is contained in the map.
+        """
+        return self._map_state_client.contains_key(self._state_name, key)
+
+    def update_value(self, key: Tuple, value: Tuple) -> None:
+        """
+        Update value for given user key.
+        """
+        return self._map_state_client.update_value(self._state_name, key, value)
+
+    def iterator(self) -> Iterator[Tuple[Tuple, Tuple]]:
+        """
+        Get the map associated with grouping key.
+        """
+        return MapStateKeyValuePairIterator(self._map_state_client, self._state_name)
+
+    def keys(self) -> Iterator[Tuple]:
+        """
+        Get the list of keys present in map associated with grouping key.
+        """
+        return MapStateIterator(self._map_state_client, self._state_name, True)
+
+    def values(self) -> Iterator[Tuple]:
+        """
+        Get the list of values present in map associated with grouping key.
+        """
+        return MapStateIterator(self._map_state_client, self._state_name, False)
+
+    def remove_key(self, key: Tuple) -> None:
+        """
+        Remove user key from map state.
+        """
+        return self._map_state_client.remove_key(self._state_name, key)
+
+    def clear(self) -> None:
+        """
+        Remove this state.
+        """
+        self._map_state_client.clear(self._state_name)
+
+
+class StatefulProcessorHandle:
+    """
+    Represents the operation handle provided to the stateful processor used in transformWithState
+    API.
+
+    .. versionadded:: 4.0.0
+    """
+
+    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient) -> None:
+        self.stateful_processor_api_client = stateful_processor_api_client
+
+    def getValueState(
+        self, state_name: str, schema: Union[StructType, str], ttl_duration_ms: Optional[int] = None
+    ) -> ValueState:
+        """
+        Function to create new or return existing single value state variable of given type.
+        The user must ensure to call this function only within the `init()` method of the
+        :class:`StatefulProcessor`.
+
+        Parameters
+        ----------
+        state_name : str
+            name of the state variable
+        schema : :class:`pyspark.sql.types.DataType` or str
+            The schema of the state variable. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        ttlDurationMs: int
+            Time to live duration of the state in milliseconds. State values will not be returned
+            past ttlDuration and will be eventually removed from the state store. Any state update
+            resets the expiration time to current processing time plus ttlDuration.
+            If ttl is not specified the state will never expire.
+        """
+        self.stateful_processor_api_client.get_value_state(state_name, schema, ttl_duration_ms)
+        return ValueState(ValueStateClient(self.stateful_processor_api_client), state_name, schema)
+
+    def getListState(
+        self, state_name: str, schema: Union[StructType, str], ttl_duration_ms: Optional[int] = None
+    ) -> ListState:
+        """
+        Function to create new or return existing single value state variable of given type.
+        The user must ensure to call this function only within the `init()` method of the
+        :class:`StatefulProcessor`.
+
+        Parameters
+        ----------
+        state_name : str
+            name of the state variable
+        schema : :class:`pyspark.sql.types.DataType` or str
+            The schema of the state variable. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        ttlDurationMs: int
+            Time to live duration of the state in milliseconds. State values will not be returned
+            past ttlDuration and will be eventually removed from the state store. Any state update
+            resets the expiration time to current processing time plus ttlDuration.
+            If ttl is not specified the state will never expire.
+        """
+        self.stateful_processor_api_client.get_list_state(state_name, schema, ttl_duration_ms)
+        return ListState(ListStateClient(self.stateful_processor_api_client), state_name, schema)
+
+    def getMapState(
+        self,
+        state_name: str,
+        user_key_schema: Union[StructType, str],
+        value_schema: Union[StructType, str],
+        ttl_duration_ms: Optional[int] = None,
+    ) -> MapState:
+        """
+        Function to create new or return existing single map state variable of given type.
+        The user must ensure to call this function only within the `init()` method of the
+        :class:`StatefulProcessor`.
+
+        Parameters
+        ----------
+        state_name : str
+            name of the state variable
+        user_key_schema : :class:`pyspark.sql.types.DataType` or str
+            The schema of the key of map state. The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        value_schema : :class:`pyspark.sql.types.DataType` or str
+            The schema of the value of map state The value can be either a
+            :class:`pyspark.sql.types.DataType` object or a DDL-formatted type string.
+        ttl_duration_ms: int
+            Time to live duration of the state in milliseconds. State values will not be returned
+            past ttlDuration and will be eventually removed from the state store. Any state update
+            resets the expiration time to current processing time plus ttlDuration.
+            If ttl is not specified the state will never expire.
+        """
+        self.stateful_processor_api_client.get_map_state(
+            state_name, user_key_schema, value_schema, ttl_duration_ms
+        )
+        return MapState(
+            MapStateClient(self.stateful_processor_api_client, user_key_schema, value_schema),
+            state_name,
+        )
+
+    def registerTimer(self, expiry_time_stamp_ms: int) -> None:
+        """
+        Register a timer for a given expiry timestamp in milliseconds for the grouping key.
+        """
+        self.stateful_processor_api_client.register_timer(expiry_time_stamp_ms)
+
+    def deleteTimer(self, expiry_time_stamp_ms: int) -> None:
+        """
+        Delete a timer for a given expiry timestamp in milliseconds for the grouping key.
+        """
+        self.stateful_processor_api_client.delete_timer(expiry_time_stamp_ms)
+
+    def listTimers(self) -> Iterator[int]:
+        """
+        List all timers of their expiry timestamps in milliseconds for the grouping key.
+        """
+        return ListTimerIterator(self.stateful_processor_api_client)
+
+    def deleteIfExists(self, state_name: str) -> None:
+        """
+        Function to delete and purge state variable if defined previously
+        """
+        self.stateful_processor_api_client.delete_if_exists(state_name)
+
+
+class StatefulProcessor(ABC):
+    """
+    Class that represents the arbitrary stateful logic that needs to be provided by the user to
+    perform stateful manipulations on keyed streams.
+
+    .. versionadded:: 4.0.0
+    """
+
+    @abstractmethod
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        """
+        Function that will be invoked as the first method that allows for users to initialize all
+        their state variables and perform other init actions before handling data.
+
+        Parameters
+        ----------
+        handle : :class:`pyspark.sql.streaming.stateful_processor.StatefulProcessorHandle`
+            Handle to the stateful processor that provides access to the state store and other
+            stateful processing related APIs.
+        """
+        ...
+
+    @abstractmethod
+    def handleInputRows(
+        self,
+        key: Any,
+        rows: Iterator["PandasDataFrameLike"],
+        timer_values: TimerValues,
+    ) -> Iterator["PandasDataFrameLike"]:
+        """
+        Function that will allow users to interact with input data rows along with the grouping key.
+        It should take parameters (key, Iterator[`pandas.DataFrame`]) and return another
+        Iterator[`pandas.DataFrame`]. For each group, all columns are passed together as
+        `pandas.DataFrame` to the function, and the returned `pandas.DataFrame` across all
+        invocations are combined as a :class:`DataFrame`. Note that the function should not make a
+        guess of the number of elements in the iterator. To process all data, the `handleInputRows`
+        function needs to iterate all elements and process them. On the other hand, the
+        `handleInputRows` function is not strictly required to iterate through all elements in the
+        iterator if it intends to read a part of data.
+
+        Parameters
+        ----------
+        key : Any
+            grouping key.
+        rows : iterable of :class:`pandas.DataFrame`
+            iterator of input rows associated with grouping key
+        timer_values: TimerValues
+                      Timer value for the current batch that process the input rows.
+                      Users can get the processing or event time timestamp from TimerValues.
+        """
+        ...
+
+    def handleExpiredTimer(
+        self, key: Any, timer_values: TimerValues, expired_timer_info: ExpiredTimerInfo
+    ) -> Iterator["PandasDataFrameLike"]:
+        """
+        Optional to implement. Will act return an empty iterator if not defined.
+        Function that will be invoked when a timer is fired for a given key. Users can choose to
+        evict state, register new timers and optionally provide output rows.
+
+        Parameters
+        ----------
+        key : Any
+            grouping key.
+        timer_values: TimerValues
+                      Timer value for the current batch that process the input rows.
+                      Users can get the processing or event time timestamp from TimerValues.
+        expired_timer_info: ExpiredTimerInfo
+                            Instance of ExpiredTimerInfo that provides access to expired timer.
+        """
+        return iter([])
+
+    @abstractmethod
+    def close(self) -> None:
+        """
+        Function called as the last method that allows for users to perform any cleanup or teardown
+        operations.
+        """
+        ...
+
+    def handleInitialState(
+        self, key: Any, initialState: "PandasDataFrameLike", timer_values: TimerValues
+    ) -> None:
+        """
+        Optional to implement. Will act as no-op if not defined or no initial state input.
+         Function that will be invoked only in the first batch for users to process initial states.
+
+        Parameters
+        ----------
+        key : Any
+            grouping key.
+        initialState: :class:`pandas.DataFrame`
+                      One dataframe in the initial state associated with the key.
+        timer_values: TimerValues
+                      Timer value for the current batch that process the input rows.
+                      Users can get the processing or event time timestamp from TimerValues.
+        """
+        pass
diff --git a/python/pyspark/sql/streaming/stateful_processor_api_client.py b/python/pyspark/sql/streaming/stateful_processor_api_client.py
new file mode 100644
index 0000000000000..53704188081c3
--- /dev/null
+++ b/python/pyspark/sql/streaming/stateful_processor_api_client.py
@@ -0,0 +1,451 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from enum import Enum
+import os
+import socket
+from typing import Any, Dict, List, Union, Optional, cast, Tuple, Iterator
+
+from pyspark.serializers import write_int, read_int, UTF8Deserializer
+from pyspark.sql.pandas.serializers import ArrowStreamSerializer
+from pyspark.sql.types import (
+    StructType,
+    TYPE_CHECKING,
+    _parse_datatype_string,
+    Row,
+)
+from pyspark.sql.pandas.types import convert_pandas_using_numpy_type
+from pyspark.sql.utils import has_numpy
+from pyspark.serializers import CPickleSerializer
+from pyspark.errors import PySparkRuntimeError
+import uuid
+
+if TYPE_CHECKING:
+    from pyspark.sql.pandas._typing import DataFrameLike as PandasDataFrameLike
+
+__all__ = ["StatefulProcessorApiClient", "StatefulProcessorHandleState"]
+
+
+class StatefulProcessorHandleState(Enum):
+    CREATED = 1
+    INITIALIZED = 2
+    DATA_PROCESSED = 3
+    TIMER_PROCESSED = 4
+    CLOSED = 5
+
+
+class StatefulProcessorApiClient:
+    def __init__(self, state_server_port: int, key_schema: StructType) -> None:
+        self.key_schema = key_schema
+        self._client_socket = socket.socket()
+        self._client_socket.connect(("localhost", state_server_port))
+        self.sockfile = self._client_socket.makefile(
+            "rwb", int(os.environ.get("SPARK_BUFFER_SIZE", 65536))
+        )
+        self.handle_state = StatefulProcessorHandleState.CREATED
+        self.utf8_deserializer = UTF8Deserializer()
+        self.pickleSer = CPickleSerializer()
+        self.serializer = ArrowStreamSerializer()
+        # Dictionaries to store the mapping between iterator id and a tuple of pandas DataFrame
+        # and the index of the last row that was read.
+        self.list_timer_iterator_cursors: Dict[str, Tuple["PandasDataFrameLike", int]] = {}
+        # statefulProcessorApiClient is initialized per batch per partition,
+        # so we will have new timestamps for a new batch
+        self._batch_timestamp = -1
+        self._watermark_timestamp = -1
+
+    def set_handle_state(self, state: StatefulProcessorHandleState) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if state == StatefulProcessorHandleState.CREATED:
+            proto_state = stateMessage.CREATED
+        elif state == StatefulProcessorHandleState.INITIALIZED:
+            proto_state = stateMessage.INITIALIZED
+        elif state == StatefulProcessorHandleState.DATA_PROCESSED:
+            proto_state = stateMessage.DATA_PROCESSED
+        elif state == StatefulProcessorHandleState.TIMER_PROCESSED:
+            proto_state = stateMessage.TIMER_PROCESSED
+        else:
+            proto_state = stateMessage.CLOSED
+        set_handle_state = stateMessage.SetHandleState(state=proto_state)
+        handle_call = stateMessage.StatefulProcessorCall(setHandleState=set_handle_state)
+        message = stateMessage.StateRequest(statefulProcessorCall=handle_call)
+
+        self._send_proto_message(message.SerializeToString())
+
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            self.handle_state = state
+        else:
+            # TODO(SPARK-49233): Classify errors thrown by internal methods.
+            raise PySparkRuntimeError(f"Error setting handle state: " f"{response_message[1]}")
+
+    def set_implicit_key(self, key: Tuple) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        key_bytes = self._serialize_to_bytes(self.key_schema, key)
+        set_implicit_key = stateMessage.SetImplicitKey(key=key_bytes)
+        request = stateMessage.ImplicitGroupingKeyRequest(setImplicitKey=set_implicit_key)
+        message = stateMessage.StateRequest(implicitGroupingKeyRequest=request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify errors thrown by internal methods.
+            raise PySparkRuntimeError(f"Error setting implicit key: " f"{response_message[1]}")
+
+    def remove_implicit_key(self) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        remove_implicit_key = stateMessage.RemoveImplicitKey()
+        request = stateMessage.ImplicitGroupingKeyRequest(removeImplicitKey=remove_implicit_key)
+        message = stateMessage.StateRequest(implicitGroupingKeyRequest=request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify errors thrown by internal methods.
+            raise PySparkRuntimeError(f"Error removing implicit key: " f"{response_message[1]}")
+
+    def get_value_state(
+        self, state_name: str, schema: Union[StructType, str], ttl_duration_ms: Optional[int]
+    ) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+
+        state_call_command = stateMessage.StateCallCommand()
+        state_call_command.stateName = state_name
+        state_call_command.schema = schema.json()
+        if ttl_duration_ms is not None:
+            state_call_command.ttl.durationMs = ttl_duration_ms
+        call = stateMessage.StatefulProcessorCall(getValueState=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error initializing value state: " f"{response_message[1]}")
+
+    def get_list_state(
+        self, state_name: str, schema: Union[StructType, str], ttl_duration_ms: Optional[int]
+    ) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+
+        state_call_command = stateMessage.StateCallCommand()
+        state_call_command.stateName = state_name
+        state_call_command.schema = schema.json()
+        if ttl_duration_ms is not None:
+            state_call_command.ttl.durationMs = ttl_duration_ms
+        call = stateMessage.StatefulProcessorCall(getListState=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error initializing list state: " f"{response_message[1]}")
+
+    def register_timer(self, expiry_time_stamp_ms: int) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        register_call = stateMessage.RegisterTimer(expiryTimestampMs=expiry_time_stamp_ms)
+        state_call_command = stateMessage.TimerStateCallCommand(register=register_call)
+        call = stateMessage.StatefulProcessorCall(timerStateCall=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error register timer: " f"{response_message[1]}")
+
+    def delete_timer(self, expiry_time_stamp_ms: int) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        delete_call = stateMessage.DeleteTimer(expiryTimestampMs=expiry_time_stamp_ms)
+        state_call_command = stateMessage.TimerStateCallCommand(delete=delete_call)
+        call = stateMessage.StatefulProcessorCall(timerStateCall=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error deleting timer: " f"{response_message[1]}")
+
+    def get_list_timer_row(self, iterator_id: str) -> int:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if iterator_id in self.list_timer_iterator_cursors:
+            # if the iterator is already in the dictionary, return the next row
+            pandas_df, index = self.list_timer_iterator_cursors[iterator_id]
+        else:
+            list_call = stateMessage.ListTimers(iteratorId=iterator_id)
+            state_call_command = stateMessage.TimerStateCallCommand(list=list_call)
+            call = stateMessage.StatefulProcessorCall(timerStateCall=state_call_command)
+            message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+            self._send_proto_message(message.SerializeToString())
+            response_message = self._receive_proto_message()
+            status = response_message[0]
+            if status == 0:
+                iterator = self._read_arrow_state()
+                # We need to exhaust the iterator here to make sure all the arrow batches are read,
+                # even though there is only one batch in the iterator. Otherwise, the stream might
+                # block further reads since it thinks there might still be some arrow batches left.
+                # We only need to read the first batch in the iterator because it's guaranteed that
+                # there would only be one batch sent from the JVM side.
+                data_batch = None
+                for batch in iterator:
+                    if data_batch is None:
+                        data_batch = batch
+                if data_batch is None:
+                    # TODO(SPARK-49233): Classify user facing errors.
+                    raise PySparkRuntimeError("Error getting map state entry.")
+                pandas_df = data_batch.to_pandas()
+                index = 0
+            else:
+                raise StopIteration()
+        new_index = index + 1
+        if new_index < len(pandas_df):
+            # Update the index in the dictionary.
+            self.list_timer_iterator_cursors[iterator_id] = (pandas_df, new_index)
+        else:
+            # If the index is at the end of the DataFrame, remove the state from the dictionary.
+            self.list_timer_iterator_cursors.pop(iterator_id, None)
+        return pandas_df.at[index, "timestamp"].item()
+
+    def get_expiry_timers_iterator(
+        self, expiry_timestamp: int
+    ) -> Iterator[list[Tuple[Tuple, int]]]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        while True:
+            expiry_timer_call = stateMessage.ExpiryTimerRequest(expiryTimestampMs=expiry_timestamp)
+            timer_request = stateMessage.TimerRequest(expiryTimerRequest=expiry_timer_call)
+            message = stateMessage.StateRequest(timerRequest=timer_request)
+
+            self._send_proto_message(message.SerializeToString())
+            response_message = self._receive_proto_message()
+            status = response_message[0]
+            if status == 1:
+                break
+            elif status == 0:
+                result_list = []
+                iterator = self._read_arrow_state()
+                for batch in iterator:
+                    batch_df = batch.to_pandas()
+                    for i in range(batch.num_rows):
+                        deserialized_key = self.pickleSer.loads(batch_df.at[i, "key"])
+                        timestamp = batch_df.at[i, "timestamp"].item()
+                        result_list.append((tuple(deserialized_key), timestamp))
+                yield result_list
+            else:
+                # TODO(SPARK-49233): Classify user facing errors.
+                raise PySparkRuntimeError(f"Error getting expiry timers: " f"{response_message[1]}")
+
+    def get_timestamps(self, time_mode: str) -> Tuple[int, int]:
+        if time_mode.lower() == "none":
+            return -1, -1
+        else:
+            if self._batch_timestamp == -1:
+                self._batch_timestamp = self._get_batch_timestamp()
+            if self._watermark_timestamp == -1:
+                self._watermark_timestamp = self._get_watermark_timestamp()
+        return self._batch_timestamp, self._watermark_timestamp
+
+    def get_map_state(
+        self,
+        state_name: str,
+        user_key_schema: Union[StructType, str],
+        value_schema: Union[StructType, str],
+        ttl_duration_ms: Optional[int],
+    ) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(user_key_schema, str):
+            user_key_schema = cast(StructType, _parse_datatype_string(user_key_schema))
+        if isinstance(value_schema, str):
+            value_schema = cast(StructType, _parse_datatype_string(value_schema))
+
+        state_call_command = stateMessage.StateCallCommand()
+        state_call_command.stateName = state_name
+        state_call_command.schema = user_key_schema.json()
+        state_call_command.mapStateValueSchema = value_schema.json()
+        if ttl_duration_ms is not None:
+            state_call_command.ttl.durationMs = ttl_duration_ms
+        call = stateMessage.StatefulProcessorCall(getMapState=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error initializing map state: " f"{response_message[1]}")
+
+    def delete_if_exists(self, state_name: str) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        state_call_command = stateMessage.StateCallCommand()
+        state_call_command.stateName = state_name
+        call = stateMessage.StatefulProcessorCall(deleteIfExists=state_call_command)
+        message = stateMessage.StateRequest(statefulProcessorCall=call)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error deleting state: " f"{response_message[1]}")
+
+    def _get_batch_timestamp(self) -> int:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        get_processing_time_call = stateMessage.GetProcessingTime()
+        timer_value_call = stateMessage.TimerValueRequest(
+            getProcessingTimer=get_processing_time_call
+        )
+        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
+        message = stateMessage.StateRequest(timerRequest=timer_request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message_with_long_value()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error getting processing timestamp: " f"{response_message[1]}"
+            )
+        else:
+            timestamp = response_message[2]
+            return timestamp
+
+    def _get_watermark_timestamp(self) -> int:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        get_watermark_call = stateMessage.GetWatermark()
+        timer_value_call = stateMessage.TimerValueRequest(getWatermark=get_watermark_call)
+        timer_request = stateMessage.TimerRequest(timerValueRequest=timer_value_call)
+        message = stateMessage.StateRequest(timerRequest=timer_request)
+
+        self._send_proto_message(message.SerializeToString())
+        response_message = self._receive_proto_message_with_long_value()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error getting eventtime timestamp: " f"{response_message[1]}"
+            )
+        else:
+            timestamp = response_message[2]
+            return timestamp
+
+    def _send_proto_message(self, message: bytes) -> None:
+        # Writing zero here to indicate message version. This allows us to evolve the message
+        # format or even changing the message protocol in the future.
+        write_int(0, self.sockfile)
+        write_int(len(message), self.sockfile)
+        self.sockfile.write(message)
+        self.sockfile.flush()
+
+    def _receive_proto_message(self) -> Tuple[int, str, bytes]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        length = read_int(self.sockfile)
+        bytes = self.sockfile.read(length)
+        message = stateMessage.StateResponse()
+        message.ParseFromString(bytes)
+        return message.statusCode, message.errorMessage, message.value
+
+    def _receive_proto_message_with_long_value(self) -> Tuple[int, str, int]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        length = read_int(self.sockfile)
+        bytes = self.sockfile.read(length)
+        message = stateMessage.StateResponseWithLongTypeVal()
+        message.ParseFromString(bytes)
+        return message.statusCode, message.errorMessage, message.value
+
+    def _receive_str(self) -> str:
+        return self.utf8_deserializer.loads(self.sockfile)
+
+    def _serialize_to_bytes(self, schema: StructType, data: Tuple) -> bytes:
+        converted = []
+        if has_numpy:
+            import numpy as np
+
+            # In order to convert NumPy types to Python primitive types.
+            for v in data:
+                if isinstance(v, np.generic):
+                    converted.append(v.tolist())
+                # Address a couple of pandas dtypes too.
+                elif hasattr(v, "to_pytimedelta"):
+                    converted.append(v.to_pytimedelta())
+                elif hasattr(v, "to_pydatetime"):
+                    converted.append(v.to_pydatetime())
+                else:
+                    converted.append(v)
+        else:
+            converted = list(data)
+
+        row_value = Row(*converted)
+        return self.pickleSer.dumps(schema.toInternal(row_value))
+
+    def _deserialize_from_bytes(self, value: bytes) -> Any:
+        return self.pickleSer.loads(value)
+
+    def _send_arrow_state(self, schema: StructType, state: List[Tuple]) -> None:
+        import pyarrow as pa
+        import pandas as pd
+
+        column_names = [field.name for field in schema.fields]
+        pandas_df = convert_pandas_using_numpy_type(
+            pd.DataFrame(state, columns=column_names), schema
+        )
+        batch = pa.RecordBatch.from_pandas(pandas_df)
+        self.serializer.dump_stream(iter([batch]), self.sockfile)
+        self.sockfile.flush()
+
+    def _read_arrow_state(self) -> Any:
+        return self.serializer.load_stream(self.sockfile)
+
+
+class ListTimerIterator:
+    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient):
+        # Generate a unique identifier for the iterator to make sure iterators on the
+        # same partition won't interfere with each other
+        self.iterator_id = str(uuid.uuid4())
+        self.stateful_processor_api_client = stateful_processor_api_client
+
+    def __iter__(self) -> Iterator[int]:
+        return self
+
+    def __next__(self) -> int:
+        return self.stateful_processor_api_client.get_list_timer_row(self.iterator_id)
diff --git a/connector/docker/spark-test/master/default_cmd b/python/pyspark/sql/streaming/stateful_processor_util.py
old mode 100755
new mode 100644
similarity index 71%
rename from connector/docker/spark-test/master/default_cmd
rename to python/pyspark/sql/streaming/stateful_processor_util.py
index 6865ca41b894f..6130a9581bc24
--- a/connector/docker/spark-test/master/default_cmd
+++ b/python/pyspark/sql/streaming/stateful_processor_util.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -17,9 +15,13 @@
 # limitations under the License.
 #
 
-IP=$(ip -o -4 addr list eth0 | perl -n -e 'if (m{inet\s([\d\.]+)\/\d+\s}xms) { print $1 }')
-echo "CONTAINER_IP=$IP"
-export SPARK_LOCAL_IP=$IP
-export SPARK_PUBLIC_DNS=$IP
+from enum import Enum
+
+# This file places the utilities for transformWithStateInPandas; we have a separate file to avoid
+# putting internal classes to the stateful_processor.py file which contains public APIs.
+
 
-/opt/spark/bin/spark-class org.apache.spark.deploy.master.Master -i $IP
+class TransformWithStateInPandasFuncMode(Enum):
+    PROCESS_DATA = 1
+    PROCESS_TIMER = 2
+    COMPLETE = 3
diff --git a/python/pyspark/sql/streaming/value_state_client.py b/python/pyspark/sql/streaming/value_state_client.py
new file mode 100644
index 0000000000000..fd783af7931da
--- /dev/null
+++ b/python/pyspark/sql/streaming/value_state_client.py
@@ -0,0 +1,105 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Union, cast, Tuple, Optional
+
+from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.types import StructType, _parse_datatype_string
+from pyspark.errors import PySparkRuntimeError
+
+__all__ = ["ValueStateClient"]
+
+
+class ValueStateClient:
+    def __init__(self, stateful_processor_api_client: StatefulProcessorApiClient) -> None:
+        self._stateful_processor_api_client = stateful_processor_api_client
+
+    def exists(self, state_name: str) -> bool:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        exists_call = stateMessage.Exists()
+        value_state_call = stateMessage.ValueStateCall(stateName=state_name, exists=exists_call)
+        state_variable_request = stateMessage.StateVariableRequest(valueStateCall=value_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            return True
+        elif status == 2:
+            # Expect status code is 2 when state variable doesn't have a value.
+            return False
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(
+                f"Error checking value state exists: " f"{response_message[1]}"
+            )
+
+    def get(self, state_name: str) -> Optional[Tuple]:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        get_call = stateMessage.Get()
+        value_state_call = stateMessage.ValueStateCall(stateName=state_name, get=get_call)
+        state_variable_request = stateMessage.StateVariableRequest(valueStateCall=value_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status == 0:
+            if len(response_message[2]) == 0:
+                return None
+            data = self._stateful_processor_api_client._deserialize_from_bytes(response_message[2])
+            return tuple(data)
+        else:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error getting value state: " f"{response_message[1]}")
+
+    def update(self, state_name: str, schema: Union[StructType, str], value: Tuple) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        if isinstance(schema, str):
+            schema = cast(StructType, _parse_datatype_string(schema))
+        bytes = self._stateful_processor_api_client._serialize_to_bytes(schema, value)
+        update_call = stateMessage.ValueStateUpdate(value=bytes)
+        value_state_call = stateMessage.ValueStateCall(
+            stateName=state_name, valueStateUpdate=update_call
+        )
+        state_variable_request = stateMessage.StateVariableRequest(valueStateCall=value_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error updating value state: " f"{response_message[1]}")
+
+    def clear(self, state_name: str) -> None:
+        import pyspark.sql.streaming.proto.StateMessage_pb2 as stateMessage
+
+        clear_call = stateMessage.Clear()
+        value_state_call = stateMessage.ValueStateCall(stateName=state_name, clear=clear_call)
+        state_variable_request = stateMessage.StateVariableRequest(valueStateCall=value_state_call)
+        message = stateMessage.StateRequest(stateVariableRequest=state_variable_request)
+
+        self._stateful_processor_api_client._send_proto_message(message.SerializeToString())
+        response_message = self._stateful_processor_api_client._receive_proto_message()
+        status = response_message[0]
+        if status != 0:
+            # TODO(SPARK-49233): Classify user facing errors.
+            raise PySparkRuntimeError(f"Error clearing value state: " f"{response_message[1]}")
diff --git a/python/pyspark/sql/tests/connect/client/test_artifact.py b/python/pyspark/sql/tests/connect/client/test_artifact.py
index c886ff36d776f..0857591c306ae 100644
--- a/python/pyspark/sql/tests/connect/client/test_artifact.py
+++ b/python/pyspark/sql/tests/connect/client/test_artifact.py
@@ -21,7 +21,6 @@
 import os
 
 from pyspark.util import is_remote_only
-from pyspark.errors.exceptions.connect import SparkConnectGrpcException
 from pyspark.sql import SparkSession
 from pyspark.testing.connectutils import ReusedConnectTestCase, should_test_connect
 from pyspark.testing.utils import SPARK_HOME
@@ -30,6 +29,7 @@
 if should_test_connect:
     from pyspark.sql.connect.client.artifact import ArtifactManager
     from pyspark.sql.connect.client import DefaultChannelBuilder
+    from pyspark.errors.exceptions.connect import SparkConnectGrpcException
 
 
 class ArtifactTestsMixin:
diff --git a/python/pyspark/sql/tests/connect/client/test_client.py b/python/pyspark/sql/tests/connect/client/test_client.py
index 196c9eb5d81d8..741d6b9c1104e 100644
--- a/python/pyspark/sql/tests/connect/client/test_client.py
+++ b/python/pyspark/sql/tests/connect/client/test_client.py
@@ -408,8 +408,8 @@ def not_found():
             def checks():
                 self.assertEqual(1, stub.execute_calls)
                 self.assertEqual(1, stub.attach_calls)
-                self.assertEqual(0, stub.release_calls)
-                self.assertEqual(0, stub.release_until_calls)
+                self.assertEqual(1, stub.release_calls)
+                self.assertEqual(1, stub.release_until_calls)
 
             eventually(timeout=1, catch_assertions=True)(checks)()
 
@@ -417,6 +417,18 @@ def checks():
         for b in parameters:
             not_found_fails(b)
 
+    def test_observed_session_id(self):
+        stub = self._stub_with([self.response, self.finished])
+        ite = ExecutePlanResponseReattachableIterator(self.request, stub, self.retrying, [])
+        session_id = "test-session-id"
+
+        reattach = ite._create_reattach_execute_request()
+        self.assertEqual(reattach.client_observed_server_side_session_id, "")
+
+        self.request.client_observed_server_side_session_id = session_id
+        reattach = ite._create_reattach_execute_request()
+        self.assertEqual(reattach.client_observed_server_side_session_id, session_id)
+
 
 if __name__ == "__main__":
     from pyspark.sql.tests.connect.client.test_client import *  # noqa: F401
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py b/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
index d79bfef2426a4..9d28ec0e19702 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_foreach_batch.py
@@ -18,9 +18,11 @@
 import unittest
 
 from pyspark.sql.tests.streaming.test_streaming_foreach_batch import StreamingTestsForeachBatchMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import ReusedConnectTestCase, should_test_connect
 from pyspark.errors import PySparkPicklingError
-from pyspark.errors.exceptions.connect import SparkConnectGrpcException
+
+if should_test_connect:
+    from pyspark.errors.exceptions.connect import SparkConnectGrpcException
 
 
 class StreamingForeachBatchParityTests(StreamingTestsForeachBatchMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
index 14edfa4003b23..f05f982d2d14e 100644
--- a/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
+++ b/python/pyspark/sql/tests/connect/streaming/test_parity_listener.py
@@ -176,7 +176,7 @@ def test_slow_query(self):
 
     def test_listener_throw(self):
         """
-        Following Vanilla Spark's behavior, when the callback of user-defined listener throws,
+        Following classic Spark's behavior, when the callback of user-defined listener throws,
         other listeners should still proceed.
         """
 
@@ -271,6 +271,46 @@ def test_listener_events_spark_command(self):
             # Remove again to verify this won't throw any error
             self.spark.streams.removeListener(test_listener)
 
+    def test_server_listener_uninterruptible(self):
+        listener = TestListenerLocalV1()
+
+        try:
+            self.spark.streams.addListener(listener)
+            q = (
+                self.spark.readStream.format("rate")
+                .load()
+                .writeStream.format("noop")
+                .queryName("test_listener_uninterruptible")
+                .start()
+            )
+
+            self.assertEqual(len(listener.start), 1)
+            self.assertEqual(str(listener.start[0].id), q.id)
+
+            while q.lastProgress is None:
+                q.awaitTermination(0.5)
+
+            # Interrupt should stop the query but should not impact the listener,
+            # therefore there should be a QueryTerminatedEvent sent from the server.
+            self.spark.interruptAll()
+
+            # Need to wait a while before the query really stops
+            while q.isActive:
+                q.awaitTermination(0.5)
+
+            # Need to wait a while before QueryTerminatedEvent reaches client
+            while len(listener.terminated) == 0:
+                time.sleep(1)
+
+            self.assertEqual(len(listener.terminated), 1)
+            self.assertEqual(str(listener.terminated[0].id), q.id)
+
+        finally:
+            for listener in self.spark.streams._sqlb._listener_bus:
+                self.spark.streams.removeListener(listener)
+            for q in self.spark.streams.active:
+                q.stop()
+
 
 if __name__ == "__main__":
     import unittest
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 35baf5cca61b3..f0637056ab8f9 100755
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -22,6 +22,7 @@
 import tempfile
 import io
 from contextlib import redirect_stdout
+import datetime
 
 from pyspark.util import is_remote_only
 from pyspark.errors import PySparkTypeError, PySparkValueError
@@ -217,8 +218,8 @@ def test_df_get_item(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+            messageParameters={
                 "arg_name": "item",
                 "arg_type": "float",
             },
@@ -229,8 +230,8 @@ def test_df_get_item(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+            messageParameters={
                 "arg_name": "item",
                 "arg_type": "NoneType",
             },
@@ -241,8 +242,8 @@ def test_df_get_item(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+            messageParameters={
                 "arg_name": "item",
                 "arg_type": "DataFrame",
             },
@@ -637,6 +638,11 @@ def test_namedargs_with_global_limit(self):
         df2 = self.spark.sql(sqlText, args={"val": 1})
         self.assert_eq(df.toPandas(), df2.toPandas())
 
+        self.assert_eq(df.first()[0], datetime.datetime(2022, 12, 25, 10, 30))
+        self.assert_eq(df.first().date, datetime.datetime(2022, 12, 25, 10, 30))
+        self.assert_eq(df.first()[1], 1)
+        self.assert_eq(df.first().val, 1)
+
     def test_sql_with_pos_args(self):
         sqlText = "SELECT *, element_at(?, 1) FROM range(10) WHERE id > ?"
         df = self.connect.sql(sqlText, args=[CF.array(CF.lit(1)), 7])
@@ -651,8 +657,8 @@ def test_sql_with_invalid_args(self):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="INVALID_TYPE",
-                message_parameters={"arg_name": "args", "arg_type": "set"},
+                errorClass="INVALID_TYPE",
+                messageParameters={"arg_name": "args", "arg_type": "set"},
             )
 
     def test_deduplicate(self):
@@ -664,18 +670,6 @@ def test_deduplicate(self):
         self.assert_eq(
             df.dropDuplicates(["name"]).toPandas(), df2.dropDuplicates(["name"]).toPandas()
         )
-        self.assert_eq(
-            df.drop_duplicates(["name"]).toPandas(), df2.drop_duplicates(["name"]).toPandas()
-        )
-        self.assert_eq(
-            df.dropDuplicates(["name", "id"]).toPandas(),
-            df2.dropDuplicates(["name", "id"]).toPandas(),
-        )
-        self.assert_eq(
-            df.drop_duplicates(["name", "id"]).toPandas(),
-            df2.drop_duplicates(["name", "id"]).toPandas(),
-        )
-        self.assert_eq(df.dropDuplicates("name").toPandas(), df2.dropDuplicates("name").toPandas())
 
     def test_drop(self):
         # SPARK-41169: test drop
@@ -924,8 +918,8 @@ def test_observe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_BE_EMPTY",
-            message_parameters={"item": "exprs"},
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "exprs"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -933,8 +927,8 @@ def test_observe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OF_COLUMN",
-            message_parameters={"arg_name": "exprs"},
+            errorClass="NOT_LIST_OF_COLUMN",
+            messageParameters={"arg_name": "exprs"},
         )
 
     def test_with_columns(self):
@@ -1030,8 +1024,8 @@ def test_extended_hint_types(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_ITEM_FOR_CONTAINER",
-            message_parameters={
+            errorClass="INVALID_ITEM_FOR_CONTAINER",
+            messageParameters={
                 "arg_name": "parameters",
                 "allowed_types": "str, float, int, Column, list[str], list[float], list[int]",
                 "item_type": "dict",
@@ -1094,8 +1088,8 @@ def test_explain_string(self):
             self.connect.sql("SELECT 1")._explain_string(mode="unknown")
         self.check_error(
             exception=pe.exception,
-            error_class="UNKNOWN_EXPLAIN_MODE",
-            message_parameters={"explain_mode": "unknown"},
+            errorClass="UNKNOWN_EXPLAIN_MODE",
+            messageParameters={"explain_mode": "unknown"},
         )
 
     def test_count(self) -> None:
@@ -1237,8 +1231,8 @@ def test_with_metadata(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DICT",
-            message_parameters={
+            errorClass="NOT_DICT",
+            messageParameters={
                 "arg_name": "metadata",
                 "arg_type": "list",
             },
@@ -1405,6 +1399,39 @@ def test_verify_col_name(self):
         self.assertTrue(verify_col_name("`m```.`s.s`.v", cdf.schema))
         self.assertTrue(verify_col_name("`m```.`s.s`.`v`", cdf.schema))
 
+    def test_truncate_message(self):
+        cdf1 = self.connect.createDataFrame(
+            [
+                ("a B c"),
+                ("X y Z"),
+            ],
+            ["a" * 4096],
+        )
+        plan1 = cdf1._plan.to_proto(self.connect._client)
+
+        proto_string_1 = self.connect._client._proto_to_string(plan1, False)
+        self.assertTrue(len(proto_string_1) > 10000, len(proto_string_1))
+        proto_string_truncated_1 = self.connect._client._proto_to_string(plan1, True)
+        self.assertTrue(len(proto_string_truncated_1) < 4000, len(proto_string_truncated_1))
+
+        cdf2 = cdf1.select("a" * 4096, "a" * 4096, "a" * 4096)
+        plan2 = cdf2._plan.to_proto(self.connect._client)
+
+        proto_string_2 = self.connect._client._proto_to_string(plan2, False)
+        self.assertTrue(len(proto_string_2) > 20000, len(proto_string_2))
+        proto_string_truncated_2 = self.connect._client._proto_to_string(plan2, True)
+        self.assertTrue(len(proto_string_truncated_2) < 8000, len(proto_string_truncated_2))
+
+        cdf3 = cdf1.select("a" * 4096)
+        for _ in range(64):
+            cdf3 = cdf3.select("a" * 4096)
+        plan3 = cdf3._plan.to_proto(self.connect._client)
+
+        proto_string_3 = self.connect._client._proto_to_string(plan3, False)
+        self.assertTrue(len(proto_string_3) > 128000, len(proto_string_3))
+        proto_string_truncated_3 = self.connect._client._proto_to_string(plan3, True)
+        self.assertTrue(len(proto_string_truncated_3) < 64000, len(proto_string_truncated_3))
+
 
 class SparkConnectGCTests(SparkConnectSQLTestCase):
     @classmethod
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py
index 39b46153d4b15..60ddcb6f22a54 100644
--- a/python/pyspark/sql/tests/connect/test_connect_column.py
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -40,7 +40,6 @@
     BooleanType,
 )
 from pyspark.errors import PySparkTypeError, PySparkValueError
-from pyspark.errors.exceptions.connect import SparkConnectException
 from pyspark.testing.connectutils import should_test_connect
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
 
@@ -61,6 +60,7 @@
         JVM_LONG_MIN,
         JVM_LONG_MAX,
     )
+    from pyspark.errors.exceptions.connect import SparkConnectException
 
 
 class SparkConnectColumnTests(SparkConnectSQLTestCase):
@@ -139,8 +139,8 @@ def test_columns(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_SAME_TYPE",
-            message_parameters={
+            errorClass="NOT_SAME_TYPE",
+            messageParameters={
                 "arg_name1": "startPos",
                 "arg_name2": "length",
                 "arg_type1": "Column",
@@ -153,8 +153,8 @@ def test_columns(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={
                 "arg_name": "startPos",
                 "arg_type": "float",
             },
@@ -519,8 +519,8 @@ def test_literal_integers(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_BETWEEN",
-            message_parameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
+            errorClass="VALUE_NOT_BETWEEN",
+            messageParameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
         )
 
         with self.assertRaises(PySparkValueError) as pe:
@@ -528,8 +528,8 @@ def test_literal_integers(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_BETWEEN",
-            message_parameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
+            errorClass="VALUE_NOT_BETWEEN",
+            messageParameters={"arg_name": "value", "min": "-9223372036854775808", "max": "32767"},
         )
 
     def test_cast(self):
@@ -566,8 +566,8 @@ def test_cast(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "dataType", "arg_type": "int"},
+            errorClass="NOT_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "dataType", "arg_type": "int"},
         )
 
     def test_isin(self):
@@ -923,8 +923,8 @@ def test_column_field_ops(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "fieldName", "arg_type": "Column"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "fieldName", "arg_type": "Column"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -932,8 +932,8 @@ def test_column_field_ops(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "col", "arg_type": "int"},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "col", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -941,8 +941,8 @@ def test_column_field_ops(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "fieldName", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "fieldName", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkValueError) as pe:
@@ -950,8 +950,8 @@ def test_column_field_ops(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_BE_EMPTY",
-            message_parameters={"item": "dropFields"},
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "dropFields"},
         )
 
     def test_column_string_ops(self):
diff --git a/python/pyspark/sql/tests/connect/test_connect_creation.py b/python/pyspark/sql/tests/connect/test_connect_creation.py
index 118e11161b15c..5352913f6609d 100644
--- a/python/pyspark/sql/tests/connect/test_connect_creation.py
+++ b/python/pyspark/sql/tests/connect/test_connect_creation.py
@@ -35,7 +35,6 @@
 from pyspark.testing.sqlutils import MyObject, PythonOnlyUDT
 
 from pyspark.testing.connectutils import should_test_connect
-from pyspark.errors.exceptions.connect import ParseException
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
 
 if should_test_connect:
@@ -43,6 +42,7 @@
     import numpy as np
     from pyspark.sql import functions as SF
     from pyspark.sql.connect import functions as CF
+    from pyspark.errors.exceptions.connect import ParseException
 
 
 class SparkConnectCreationTests(SparkConnectSQLTestCase):
@@ -97,8 +97,8 @@ def test_with_local_ndarray(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="AXIS_LENGTH_MISMATCH",
-            message_parameters={"expected_length": "5", "actual_length": "4"},
+            errorClass="AXIS_LENGTH_MISMATCH",
+            messageParameters={"expected_length": "5", "actual_length": "4"},
         )
 
         with self.assertRaises(ParseException):
@@ -109,8 +109,8 @@ def test_with_local_ndarray(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="AXIS_LENGTH_MISMATCH",
-            message_parameters={"expected_length": "3", "actual_length": "4"},
+            errorClass="AXIS_LENGTH_MISMATCH",
+            messageParameters={"expected_length": "3", "actual_length": "4"},
         )
 
         # test 1 dim ndarray
@@ -150,8 +150,8 @@ def test_with_local_list(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="AXIS_LENGTH_MISMATCH",
-            message_parameters={"expected_length": "5", "actual_length": "4"},
+            errorClass="AXIS_LENGTH_MISMATCH",
+            messageParameters={"expected_length": "5", "actual_length": "4"},
         )
 
         with self.assertRaises(ParseException):
@@ -162,8 +162,8 @@ def test_with_local_list(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="AXIS_LENGTH_MISMATCH",
-            message_parameters={"expected_length": "3", "actual_length": "4"},
+            errorClass="AXIS_LENGTH_MISMATCH",
+            messageParameters={"expected_length": "3", "actual_length": "4"},
         )
 
     def test_with_local_rows(self):
@@ -339,8 +339,8 @@ def test_create_empty_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_INFER_EMPTY_SCHEMA",
-            message_parameters={},
+            errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+            messageParameters={},
         )
 
     def test_create_dataframe_from_arrays(self):
diff --git a/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
index c712e5d6efcb6..1a8c7190e31a6 100644
--- a/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
+++ b/python/pyspark/sql/tests/connect/test_connect_dataframe_property.py
@@ -19,11 +19,9 @@
 
 from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, DoubleType
 from pyspark.sql.utils import is_remote
-
 from pyspark.sql import functions as SF
-from pyspark.sql.connect import functions as CF
-
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
+from pyspark.testing.connectutils import should_test_connect
 from pyspark.testing.sqlutils import (
     have_pandas,
     have_pyarrow,
@@ -38,6 +36,9 @@
 if have_pandas:
     import pandas as pd
 
+if should_test_connect:
+    from pyspark.sql.connect import functions as CF
+
 
 class SparkConnectDataFramePropertyTests(SparkConnectSQLTestCase):
     def test_cached_property_is_copied(self):
diff --git a/python/pyspark/sql/tests/connect/test_connect_error.py b/python/pyspark/sql/tests/connect/test_connect_error.py
index d5d9f9a221847..01047741f6740 100644
--- a/python/pyspark/sql/tests/connect/test_connect_error.py
+++ b/python/pyspark/sql/tests/connect/test_connect_error.py
@@ -22,13 +22,13 @@
 from pyspark.sql.types import Row
 from pyspark.testing.connectutils import should_test_connect
 from pyspark.errors import PySparkTypeError
-from pyspark.errors.exceptions.connect import AnalysisException
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
 
 if should_test_connect:
     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
     from pyspark.sql.connect import functions as CF
     from pyspark.sql.connect.column import Column
+    from pyspark.errors.exceptions.connect import AnalysisException
 
 
 class SparkConnectErrorTests(SparkConnectSQLTestCase):
@@ -136,24 +136,24 @@ def test_different_spark_session_join_or_union(self):
             df.union(df2).collect()
         self.check_error(
             exception=e1.exception,
-            error_class="SESSION_NOT_SAME",
-            message_parameters={},
+            errorClass="SESSION_NOT_SAME",
+            messageParameters={},
         )
 
         with self.assertRaises(SessionNotSameException) as e2:
             df.unionByName(df2).collect()
         self.check_error(
             exception=e2.exception,
-            error_class="SESSION_NOT_SAME",
-            message_parameters={},
+            errorClass="SESSION_NOT_SAME",
+            messageParameters={},
         )
 
         with self.assertRaises(SessionNotSameException) as e3:
             df.join(df2).collect()
         self.check_error(
             exception=e3.exception,
-            error_class="SESSION_NOT_SAME",
-            message_parameters={},
+            errorClass="SESSION_NOT_SAME",
+            messageParameters={},
         )
 
     def test_unsupported_functions(self):
@@ -174,8 +174,8 @@ def test_unsupported_jvm_attribute(self):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
-                message_parameters={"attr_name": attr},
+                errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                messageParameters={"attr_name": attr},
             )
 
         # Unsupported jvm attributes for DataFrame.
@@ -187,8 +187,8 @@ def test_unsupported_jvm_attribute(self):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
-                message_parameters={"attr_name": attr},
+                errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED",
+                messageParameters={"attr_name": attr},
             )
 
         # Unsupported jvm attributes for Column.
@@ -197,8 +197,8 @@ def test_unsupported_jvm_attribute(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
-            message_parameters={"attr_name": "_jc"},
+            errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            messageParameters={"attr_name": "_jc"},
         )
 
         # Unsupported jvm attributes for DataFrameReader.
@@ -207,8 +207,8 @@ def test_unsupported_jvm_attribute(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="JVM_ATTRIBUTE_NOT_SUPPORTED",
-            message_parameters={"attr_name": "_jreader"},
+            errorClass="JVM_ATTRIBUTE_NOT_SUPPORTED",
+            messageParameters={"attr_name": "_jreader"},
         )
 
     def test_column_cannot_be_constructed_from_string(self):
@@ -221,8 +221,8 @@ def test_select_none(self):
 
         self.check_error(
             exception=e1.exception,
-            error_class="NOT_LIST_OF_COLUMN_OR_STR",
-            message_parameters={"arg_name": "columns"},
+            errorClass="NOT_LIST_OF_COLUMN_OR_STR",
+            messageParameters={"arg_name": "columns"},
         )
 
     def test_ym_interval_in_collect(self):
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py
index a4dcf1ee0e31a..e29873173cc3a 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -180,8 +180,8 @@ def test_broadcast(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATAFRAME",
-            message_parameters={"arg_name": "df", "arg_type": "Column"},
+            errorClass="NOT_DATAFRAME",
+            messageParameters={"arg_name": "df", "arg_type": "Column"},
         )
 
     def test_normal_functions(self):
@@ -375,8 +375,8 @@ def test_when_otherwise(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "condition", "arg_type": "bool"},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "condition", "arg_type": "bool"},
         )
 
     def test_sorting_functions_with_column(self):
@@ -880,8 +880,8 @@ def test_window_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_BETWEEN",
-            message_parameters={"arg_name": "end", "min": "-2147483648", "max": "2147483647"},
+            errorClass="VALUE_NOT_BETWEEN",
+            messageParameters={"arg_name": "end", "min": "-2147483648", "max": "2147483647"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -889,8 +889,8 @@ def test_window_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_WINDOWSPEC",
-            message_parameters={"arg_name": "window", "arg_type": "Column"},
+            errorClass="NOT_WINDOWSPEC",
+            messageParameters={"arg_name": "window", "arg_type": "Column"},
         )
 
         # invalid window function
@@ -1217,8 +1217,8 @@ def test_collection_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "start", "arg_type": "float"},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "start", "arg_type": "float"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -1226,8 +1226,8 @@ def test_collection_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "length", "arg_type": "float"},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "length", "arg_type": "float"},
         )
 
         # test sort_array
@@ -1843,8 +1843,8 @@ def test_json_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "schema", "arg_type": "list"},
+            errorClass="NOT_COLUMN_OR_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "schema", "arg_type": "list"},
         )
 
         # test get_json_object
@@ -1980,8 +1980,8 @@ def test_xml_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": "list"},
+            errorClass="NOT_COLUMN_OR_STR_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": "list"},
         )
 
         # test schema_of_xml
@@ -2373,8 +2373,8 @@ def test_time_window_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "slideDuration", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "slideDuration", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -2382,8 +2382,8 @@ def test_time_window_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "startTime", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "startTime", "arg_type": "int"},
         )
 
         # test session_window
@@ -2572,7 +2572,7 @@ def test_function_parity(self):
 
         cf_fn = {name for (name, value) in getmembers(CF, isfunction) if name[0] != "_"}
 
-        # Functions in vanilla PySpark we do not expect to be available in Spark Connect
+        # Functions in classic PySpark we do not expect to be available in Spark Connect
         sf_excluded_fn = set()
 
         self.assertEqual(
@@ -2581,7 +2581,7 @@ def test_function_parity(self):
             "Missing functions in Spark Connect not as expected",
         )
 
-        # Functions in Spark Connect we do not expect to be available in vanilla PySpark
+        # Functions in Spark Connect we do not expect to be available in classic PySpark
         cf_excluded_fn = {
             "check_dependencies",  # internal helper function
         }
@@ -2589,7 +2589,7 @@ def test_function_parity(self):
         self.assertEqual(
             cf_fn - sf_fn,
             cf_excluded_fn,
-            "Missing functions in vanilla PySpark not as expected",
+            "Missing functions in classic PySpark not as expected",
         )
 
     # SPARK-45216: Fix non-deterministic seeded Dataset APIs
diff --git a/python/pyspark/sql/tests/connect/test_connect_plan.py b/python/pyspark/sql/tests/connect/test_connect_plan.py
index 47e3fb5a96023..a03cd30c733fb 100644
--- a/python/pyspark/sql/tests/connect/test_connect_plan.py
+++ b/python/pyspark/sql/tests/connect/test_connect_plan.py
@@ -553,25 +553,13 @@ def test_deduplicate(self):
         self.assertEqual(deduplicate_on_all_columns_plan.root.deduplicate.all_columns_as_keys, True)
         self.assertEqual(len(deduplicate_on_all_columns_plan.root.deduplicate.column_names), 0)
 
-        deduplicate_on_subset_columns_plan_list_arg = df.dropDuplicates(
-            ["name", "height"]
-        )._plan.to_proto(self.connect)
-        self.assertEqual(
-            deduplicate_on_subset_columns_plan_list_arg.root.deduplicate.all_columns_as_keys, False
-        )
-        self.assertEqual(
-            len(deduplicate_on_subset_columns_plan_list_arg.root.deduplicate.column_names), 2
-        )
-
-        deduplicate_on_subset_columns_plan_var_arg = df.dropDuplicates(
-            "name", "height"
-        )._plan.to_proto(self.connect)
-        self.assertEqual(
-            deduplicate_on_subset_columns_plan_var_arg.root.deduplicate.all_columns_as_keys, False
+        deduplicate_on_subset_columns_plan = df.dropDuplicates(["name", "height"])._plan.to_proto(
+            self.connect
         )
         self.assertEqual(
-            len(deduplicate_on_subset_columns_plan_var_arg.root.deduplicate.column_names), 2
+            deduplicate_on_subset_columns_plan.root.deduplicate.all_columns_as_keys, False
         )
+        self.assertEqual(len(deduplicate_on_subset_columns_plan.root.deduplicate.column_names), 2)
 
     def test_relation_alias(self):
         df = self.connect.readTable(table_name=self.tbl_name)
@@ -668,8 +656,8 @@ def test_coalesce_and_repartition(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_POSITIVE",
-            message_parameters={"arg_name": "numPartitions", "arg_value": "-1"},
+            errorClass="VALUE_NOT_POSITIVE",
+            messageParameters={"arg_name": "numPartitions", "arg_value": "-1"},
         )
 
         with self.assertRaises(PySparkValueError) as pe:
@@ -677,8 +665,8 @@ def test_coalesce_and_repartition(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_POSITIVE",
-            message_parameters={"arg_name": "numPartitions", "arg_value": "-1"},
+            errorClass="VALUE_NOT_POSITIVE",
+            messageParameters={"arg_name": "numPartitions", "arg_value": "-1"},
         )
 
     def test_repartition_by_expression(self):
diff --git a/python/pyspark/sql/tests/connect/test_connect_session.py b/python/pyspark/sql/tests/connect/test_connect_session.py
index 8af0e6f81be02..0028ecb95830d 100644
--- a/python/pyspark/sql/tests/connect/test_connect_session.py
+++ b/python/pyspark/sql/tests/connect/test_connect_session.py
@@ -27,24 +27,24 @@
     RetriesExceeded,
 )
 from pyspark.sql import SparkSession as PySparkSession
-from pyspark.sql.connect.client.retries import RetryPolicy
 
 from pyspark.testing.connectutils import (
     should_test_connect,
     ReusedConnectTestCase,
     connect_requirement_message,
 )
-from pyspark.errors.exceptions.connect import (
-    AnalysisException,
-    SparkConnectException,
-    SparkUpgradeException,
-)
 
 if should_test_connect:
     import grpc
     from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
     from pyspark.sql.connect.client import DefaultChannelBuilder, ChannelBuilder
     from pyspark.sql.connect.client.core import Retrying, SparkConnectClient
+    from pyspark.sql.connect.client.retries import RetryPolicy
+    from pyspark.errors.exceptions.connect import (
+        AnalysisException,
+        SparkConnectException,
+        SparkUpgradeException,
+    )
 
 
 @unittest.skipIf(is_remote_only(), "Session creation different from local mode")
@@ -82,7 +82,7 @@ def handler(**kwargs):
         self.assertGreaterEqual(len(handler_called), 0)
 
     def _check_no_active_session_error(self, e: PySparkException):
-        self.check_error(exception=e, error_class="NO_ACTIVE_SESSION", message_parameters=dict())
+        self.check_error(exception=e, errorClass="NO_ACTIVE_SESSION", messageParameters=dict())
 
     def test_stop_session(self):
         df = self.spark.sql("select 1 as a, 2 as b")
@@ -282,6 +282,7 @@ def test_stop_invalid_session(self):  # SPARK-47986
         session.stop()
 
 
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
 class SparkConnectSessionWithOptionsTest(unittest.TestCase):
     def setUp(self) -> None:
         self.spark = (
@@ -303,31 +304,31 @@ def test_config(self):
         self.assertEqual(self.spark.conf.get("integer"), "1")
 
 
-class TestError(grpc.RpcError, Exception):
-    def __init__(self, code: grpc.StatusCode):
-        self._code = code
-
-    def code(self):
-        return self._code
+if should_test_connect:
 
+    class TestError(grpc.RpcError, Exception):
+        def __init__(self, code: grpc.StatusCode):
+            self._code = code
 
-class TestPolicy(RetryPolicy):
-    # Put a small value for initial backoff so that tests don't spend
-    # Time waiting
-    def __init__(self, initial_backoff=10, **kwargs):
-        super().__init__(initial_backoff=initial_backoff, **kwargs)
+        def code(self):
+            return self._code
 
-    def can_retry(self, exception: BaseException):
-        return isinstance(exception, TestError)
+    class TestPolicy(RetryPolicy):
+        # Put a small value for initial backoff so that tests don't spend
+        # Time waiting
+        def __init__(self, initial_backoff=10, **kwargs):
+            super().__init__(initial_backoff=initial_backoff, **kwargs)
 
+        def can_retry(self, exception: BaseException):
+            return isinstance(exception, TestError)
 
-class TestPolicySpecificError(TestPolicy):
-    def __init__(self, specific_code: grpc.StatusCode, **kwargs):
-        super().__init__(**kwargs)
-        self.specific_code = specific_code
+    class TestPolicySpecificError(TestPolicy):
+        def __init__(self, specific_code: grpc.StatusCode, **kwargs):
+            super().__init__(**kwargs)
+            self.specific_code = specific_code
 
-    def can_retry(self, exception: BaseException):
-        return exception.code() == self.specific_code
+        def can_retry(self, exception: BaseException):
+            return exception.code() == self.specific_code
 
 
 @unittest.skipIf(not should_test_connect, connect_requirement_message)
diff --git a/python/pyspark/sql/tests/connect/test_connect_stat.py b/python/pyspark/sql/tests/connect/test_connect_stat.py
index 24165636202c8..6e3cc2f58d814 100644
--- a/python/pyspark/sql/tests/connect/test_connect_stat.py
+++ b/python/pyspark/sql/tests/connect/test_connect_stat.py
@@ -19,15 +19,15 @@
 
 from pyspark.errors import PySparkTypeError, PySparkValueError
 from pyspark.testing.connectutils import should_test_connect
-from pyspark.errors.exceptions.connect import (
-    AnalysisException,
-    SparkConnectException,
-)
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
 
 if should_test_connect:
     from pyspark.sql import functions as SF
     from pyspark.sql.connect import functions as CF
+    from pyspark.errors.exceptions.connect import (
+        AnalysisException,
+        SparkConnectException,
+    )
 
 
 class SparkConnectStatTests(SparkConnectSQLTestCase):
@@ -195,8 +195,8 @@ def test_stat_corr(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={
+            errorClass="NOT_STR",
+            messageParameters={
                 "arg_name": "col1",
                 "arg_type": "int",
             },
@@ -207,8 +207,8 @@ def test_stat_corr(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={
+            errorClass="NOT_STR",
+            messageParameters={
                 "arg_name": "col2",
                 "arg_type": "int",
             },
@@ -241,8 +241,8 @@ def test_stat_approx_quantile(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_STR_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+            messageParameters={
                 "arg_name": "col",
                 "arg_type": "int",
             },
@@ -253,8 +253,8 @@ def test_stat_approx_quantile(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_LIST_OR_TUPLE",
+            messageParameters={
                 "arg_name": "probabilities",
                 "arg_type": "float",
             },
@@ -266,8 +266,8 @@ def test_stat_approx_quantile(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OF_FLOAT_OR_INT",
-            message_parameters={"arg_name": "probabilities", "arg_type": "float"},
+            errorClass="NOT_LIST_OF_FLOAT_OR_INT",
+            messageParameters={"arg_name": "probabilities", "arg_type": "float"},
         )
         with self.assertRaises(PySparkTypeError) as pe:
             self.connect.read.table(self.tbl_name2).stat.approxQuantile(
@@ -276,8 +276,8 @@ def test_stat_approx_quantile(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_FLOAT_OR_INT",
-            message_parameters={
+            errorClass="NOT_FLOAT_OR_INT",
+            messageParameters={
                 "arg_name": "relativeError",
                 "arg_type": "str",
             },
@@ -289,8 +289,8 @@ def test_stat_approx_quantile(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NEGATIVE_VALUE",
-            message_parameters={
+            errorClass="NEGATIVE_VALUE",
+            messageParameters={
                 "arg_name": "relativeError",
                 "arg_value": "-0.1",
             },
@@ -316,8 +316,8 @@ def test_stat_freq_items(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_TUPLE",
-            message_parameters={
+            errorClass="NOT_LIST_OR_TUPLE",
+            messageParameters={
                 "arg_name": "cols",
                 "arg_type": "str",
             },
@@ -347,8 +347,8 @@ def test_stat_sample_by(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-            message_parameters={
+            errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+            messageParameters={
                 "arg_name": "fractions",
                 "arg_type": "dict",
                 "allowed_types": "float, int, str",
@@ -538,8 +538,8 @@ def test_grouped_data(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
-            message_parameters={
+            errorClass="NOT_BOOL_OR_FLOAT_OR_INT_OR_STR",
+            messageParameters={
                 "arg_name": "value",
                 "arg_type": "bytes",
             },
diff --git a/python/pyspark/sql/tests/connect/test_df_debug.py b/python/pyspark/sql/tests/connect/test_df_debug.py
index 8a4ec68fda844..40b6a072e9127 100644
--- a/python/pyspark/sql/tests/connect/test_df_debug.py
+++ b/python/pyspark/sql/tests/connect/test_df_debug.py
@@ -17,12 +17,9 @@
 
 import unittest
 
-from pyspark.testing.connectutils import (
-    should_test_connect,
-    have_graphviz,
-    graphviz_requirement_message,
-)
 from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase
+from pyspark.testing.connectutils import should_test_connect
+from pyspark.testing.utils import have_graphviz, graphviz_requirement_message
 
 if should_test_connect:
     from pyspark.sql.connect.dataframe import DataFrame
diff --git a/dev/tests/pr_merge_ability.sh b/python/pyspark/sql/tests/connect/test_parity_frame_plot.py
old mode 100755
new mode 100644
similarity index 56%
rename from dev/tests/pr_merge_ability.sh
rename to python/pyspark/sql/tests/connect/test_parity_frame_plot.py
index a32667730f76c..c69e438bf7eb0
--- a/dev/tests/pr_merge_ability.sh
+++ b/python/pyspark/sql/tests/connect/test_parity_frame_plot.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env bash
-
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
@@ -17,23 +15,22 @@
 # limitations under the License.
 #
 
-#
-# This script follows the base format for testing pull requests against
-# another branch and returning results to be published. More details can be
-# found at dev/run-tests-jenkins.
-#
-# Arg1: The GitHub Pull Request Actual Commit
-# known as `ghprbActualCommit` in `run-tests-jenkins`
-# Arg2: The SHA1 hash
-# known as `sha1` in `run-tests-jenkins`
-#
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.sql.tests.plot.test_frame_plot import DataFramePlotTestsMixin
+
+
+class FramePlotParityTests(DataFramePlotTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_frame_plot import *  # noqa: F401
 
-ghprbActualCommit="$1"
-sha1="$2"
+    try:
+        import xmlrunner  # type: ignore[import]
 
-# check PR merge-ability
-if [ "${sha1}" == "${ghprbActualCommit}" ]; then
-  echo " * This patch **does not merge cleanly**."
-else
-  echo " * This patch merges cleanly."
-fi
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py b/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py
new file mode 100644
index 0000000000000..78508fe533379
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_frame_plot_plotly.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.sql.tests.plot.test_frame_plot_plotly import DataFramePlotPlotlyTestsMixin
+
+
+class FramePlotPlotlyParityTests(DataFramePlotPlotlyTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_frame_plot_plotly import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py b/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py
new file mode 100644
index 0000000000000..c7f50495af15b
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_job_cancellation.py
@@ -0,0 +1,68 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import threading
+
+from pyspark import inheritable_thread_target
+from pyspark.sql.tests.test_job_cancellation import JobCancellationTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class JobCancellationParityTests(JobCancellationTestsMixin, ReusedConnectTestCase):
+    def test_inheritable_tags_with_deco(self):
+        @inheritable_thread_target(self.spark)
+        def func(target):
+            return target()
+
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: threading.Thread(target=func, args=(target,))
+        )
+
+    def test_interrupt_tag(self):
+        thread_ids = range(4)
+        self.check_job_cancellation(
+            lambda job_group: self.spark.addTag(job_group),
+            lambda job_group: self.spark.interruptTag(job_group),
+            thread_ids,
+            [i for i in thread_ids if i % 2 == 0],
+            [i for i in thread_ids if i % 2 != 0],
+        )
+        self.spark.clearTags()
+
+    def test_interrupt_all(self):
+        thread_ids = range(4)
+        self.check_job_cancellation(
+            lambda job_group: None,
+            lambda job_group: self.spark.interruptAll(),
+            thread_ids,
+            thread_ids,
+            [],
+        )
+        self.spark.clearTags()
+
+
+if __name__ == "__main__":
+    import unittest
+    from pyspark.sql.tests.connect.test_parity_serde import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_readwriter.py b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
index 46333b555c351..f83f3edbfa787 100644
--- a/python/pyspark/sql/tests/connect/test_parity_readwriter.py
+++ b/python/pyspark/sql/tests/connect/test_parity_readwriter.py
@@ -33,6 +33,7 @@ def test_api(self):
 
     def test_partitioning_functions(self):
         self.check_partitioning_functions(DataFrameWriterV2)
+        self.partitioning_functions_user_error()
 
 
 if __name__ == "__main__":
diff --git a/python/pyspark/sql/tests/connect/test_parity_sql.py b/python/pyspark/sql/tests/connect/test_parity_sql.py
new file mode 100644
index 0000000000000..4c6b11c60cbe9
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_sql.py
@@ -0,0 +1,37 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_sql import SQLTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class SQLParityTests(SQLTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_sql import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore[import]
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_subquery.py b/python/pyspark/sql/tests/connect/test_parity_subquery.py
new file mode 100644
index 0000000000000..1cba3a7d49956
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_subquery.py
@@ -0,0 +1,38 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql.tests.test_subquery import SubqueryTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+@unittest.skip("TODO(SPARK-50134): Support subquery in connect")
+class SubqueryParityTests(SubqueryTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_subquery import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_tvf.py b/python/pyspark/sql/tests/connect/test_parity_tvf.py
new file mode 100644
index 0000000000000..61e3decf562c3
--- /dev/null
+++ b/python/pyspark/sql/tests/connect/test_parity_tvf.py
@@ -0,0 +1,36 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.sql.tests.test_tvf import TVFTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
+
+
+class TVFParityTestsMixin(TVFTestsMixin, ReusedConnectTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.connect.test_parity_tvf import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
index 274364b181441..5c46130c5b50d 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udf_profiler.py
@@ -21,9 +21,9 @@
 from pyspark.sql.tests.test_udf_profiler import (
     UDFProfiler2TestsMixin,
     _do_computation,
-    has_flameprof,
 )
 from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.utils import have_flameprof
 
 
 class UDFProfilerParityTests(UDFProfiler2TestsMixin, ReusedConnectTestCase):
@@ -65,7 +65,7 @@ def action(df):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
 
diff --git a/python/pyspark/sql/tests/connect/test_parity_udtf.py b/python/pyspark/sql/tests/connect/test_parity_udtf.py
index 2ea6ef8cc389d..6955e7377b4c4 100644
--- a/python/pyspark/sql/tests/connect/test_parity_udtf.py
+++ b/python/pyspark/sql/tests/connect/test_parity_udtf.py
@@ -17,6 +17,8 @@
 import unittest
 
 from pyspark.testing.connectutils import should_test_connect
+from pyspark.sql.tests.test_udtf import BaseUDTFTestsMixin, UDTFArrowTestsMixin
+from pyspark.testing.connectutils import ReusedConnectTestCase
 
 if should_test_connect:
     from pyspark import sql
@@ -24,10 +26,7 @@
 
     sql.udtf.UserDefinedTableFunction = UserDefinedTableFunction
     from pyspark.sql.connect.functions import lit, udtf
-
-from pyspark.sql.tests.test_udtf import BaseUDTFTestsMixin, UDTFArrowTestsMixin
-from pyspark.testing.connectutils import ReusedConnectTestCase
-from pyspark.errors.exceptions.connect import SparkConnectGrpcException, PythonException
+    from pyspark.errors.exceptions.connect import SparkConnectGrpcException, PythonException
 
 
 class UDTFParityTests(BaseUDTFTestsMixin, ReusedConnectTestCase):
diff --git a/python/pyspark/sql/tests/connect/test_session.py b/python/pyspark/sql/tests/connect/test_session.py
index 6f0e4aaad3f89..e327c868895f4 100644
--- a/python/pyspark/sql/tests/connect/test_session.py
+++ b/python/pyspark/sql/tests/connect/test_session.py
@@ -14,18 +14,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-import threading
-import time
+
 import unittest
 from typing import Optional
 
-from pyspark import InheritableThread, inheritable_thread_target
 from pyspark.sql.connect.client import DefaultChannelBuilder
 from pyspark.sql.connect.session import SparkSession as RemoteSparkSession
-from pyspark.testing.connectutils import should_test_connect
-
-if should_test_connect:
-    from pyspark.testing.connectutils import ReusedConnectTestCase
 
 
 class CustomChannelBuilder(DefaultChannelBuilder):
@@ -104,178 +98,3 @@ def test_default_session_expires_when_client_closes(self):
         s3 = RemoteSparkSession.builder.remote("sc://other").getOrCreate()
 
         self.assertIsNot(s1, s3)
-
-
-class JobCancellationTests(ReusedConnectTestCase):
-    def test_tags(self):
-        self.spark.clearTags()
-        self.spark.addTag("a")
-        self.assertEqual(self.spark.getTags(), {"a"})
-        self.spark.addTag("b")
-        self.spark.removeTag("a")
-        self.assertEqual(self.spark.getTags(), {"b"})
-        self.spark.addTag("c")
-        self.spark.clearTags()
-        self.assertEqual(self.spark.getTags(), set())
-        self.spark.clearTags()
-
-    def test_tags_multithread(self):
-        output1 = None
-        output2 = None
-
-        def tag1():
-            nonlocal output1
-
-            self.spark.addTag("tag1")
-            output1 = self.spark.getTags()
-
-        def tag2():
-            nonlocal output2
-
-            self.spark.addTag("tag2")
-            output2 = self.spark.getTags()
-
-        t1 = threading.Thread(target=tag1)
-        t1.start()
-        t1.join()
-        t2 = threading.Thread(target=tag2)
-        t2.start()
-        t2.join()
-
-        self.assertIsNotNone(output1)
-        self.assertEquals(output1, {"tag1"})
-        self.assertIsNotNone(output2)
-        self.assertEquals(output2, {"tag2"})
-
-    def test_interrupt_tag(self):
-        thread_ids = range(4)
-        self.check_job_cancellation(
-            lambda job_group: self.spark.addTag(job_group),
-            lambda job_group: self.spark.interruptTag(job_group),
-            thread_ids,
-            [i for i in thread_ids if i % 2 == 0],
-            [i for i in thread_ids if i % 2 != 0],
-        )
-        self.spark.clearTags()
-
-    def test_interrupt_all(self):
-        thread_ids = range(4)
-        self.check_job_cancellation(
-            lambda job_group: None,
-            lambda job_group: self.spark.interruptAll(),
-            thread_ids,
-            thread_ids,
-            [],
-        )
-        self.spark.clearTags()
-
-    def check_job_cancellation(
-        self, setter, canceller, thread_ids, thread_ids_to_cancel, thread_ids_to_run
-    ):
-        job_id_a = "job_ids_to_cancel"
-        job_id_b = "job_ids_to_run"
-        threads = []
-
-        # A list which records whether job is cancelled.
-        # The index of the array is the thread index which job run in.
-        is_job_cancelled = [False for _ in thread_ids]
-
-        def run_job(job_id, index):
-            """
-            Executes a job with the group ``job_group``. Each job waits for 3 seconds
-            and then exits.
-            """
-            try:
-                setter(job_id)
-
-                def func(itr):
-                    for pdf in itr:
-                        time.sleep(pdf._1.iloc[0])
-                        yield pdf
-
-                self.spark.createDataFrame([[20]]).repartition(1).mapInPandas(
-                    func, schema="_1 LONG"
-                ).collect()
-                is_job_cancelled[index] = False
-            except Exception:
-                # Assume that exception means job cancellation.
-                is_job_cancelled[index] = True
-
-        # Test if job succeeded when not cancelled.
-        run_job(job_id_a, 0)
-        self.assertFalse(is_job_cancelled[0])
-        self.spark.clearTags()
-
-        # Run jobs
-        for i in thread_ids_to_cancel:
-            t = threading.Thread(target=run_job, args=(job_id_a, i))
-            t.start()
-            threads.append(t)
-
-        for i in thread_ids_to_run:
-            t = threading.Thread(target=run_job, args=(job_id_b, i))
-            t.start()
-            threads.append(t)
-
-        # Wait to make sure all jobs are executed.
-        time.sleep(10)
-        # And then, cancel one job group.
-        canceller(job_id_a)
-
-        # Wait until all threads launching jobs are finished.
-        for t in threads:
-            t.join()
-
-        for i in thread_ids_to_cancel:
-            self.assertTrue(
-                is_job_cancelled[i], "Thread {i}: Job in group A was not cancelled.".format(i=i)
-            )
-
-        for i in thread_ids_to_run:
-            self.assertFalse(
-                is_job_cancelled[i], "Thread {i}: Job in group B did not succeeded.".format(i=i)
-            )
-
-    def test_inheritable_tags(self):
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: InheritableThread(target, session=session)
-        )
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: threading.Thread(
-                target=inheritable_thread_target(session)(target)
-            )
-        )
-
-        # Test decorator usage
-        @inheritable_thread_target(self.spark)
-        def func(target):
-            return target()
-
-        self.check_inheritable_tags(
-            create_thread=lambda target, session: threading.Thread(target=func, args=(target,))
-        )
-
-    def check_inheritable_tags(self, create_thread):
-        spark = self.spark
-        spark.addTag("a")
-        first = set()
-        second = set()
-
-        def get_inner_local_prop():
-            spark.addTag("c")
-            second.update(spark.getTags())
-
-        def get_outer_local_prop():
-            spark.addTag("b")
-            first.update(spark.getTags())
-            t2 = create_thread(target=get_inner_local_prop, session=spark)
-            t2.start()
-            t2.join()
-
-        t1 = create_thread(target=get_outer_local_prop, session=spark)
-        t1.start()
-        t1.join()
-
-        self.assertEqual(spark.getTags(), {"a"})
-        self.assertEqual(first, {"a", "b"})
-        self.assertEqual(second, {"a", "b", "c"})
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
index b1060ef48156a..f85a7b03eddab 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_cogrouped_map.py
@@ -163,7 +163,7 @@ def test_apply_in_pandas_not_returning_pandas_dataframe(self):
     def check_apply_in_pandas_not_returning_pandas_dataframe(self):
         self._test_merge_error(
             fn=lambda lft, rgt: lft.size + rgt.size,
-            error_class=PythonException,
+            errorClass=PythonException,
             error_message_regex="Return type of the user-defined function "
             "should be pandas.DataFrame, but is int",
         )
@@ -203,7 +203,7 @@ def merge_pandas(lft, rgt):
 
         self._test_merge_error(
             fn=merge_pandas,
-            error_class=PythonException,
+            errorClass=PythonException,
             error_message_regex="Column names of the returned pandas.DataFrame "
             "do not match specified schema. Unexpected: add, more.\n",
         )
@@ -224,7 +224,7 @@ def merge_pandas(lft, rgt):
 
         self._test_merge_error(
             fn=merge_pandas,
-            error_class=PythonException,
+            errorClass=PythonException,
             error_message_regex="Number of columns of the returned pandas.DataFrame "
             "doesn't match specified schema. Expected: 4 Actual: 6\n",
         )
@@ -264,7 +264,7 @@ def check_apply_in_pandas_returning_incompatible_type(self):
                     self._test_merge_error(
                         fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": ["2.0"]}),
                         output_schema="id long, k double",
-                        error_class=PythonException,
+                        errorClass=PythonException,
                         error_message_regex=expected,
                     )
 
@@ -277,7 +277,7 @@ def check_apply_in_pandas_returning_incompatible_type(self):
                     self._test_merge_error(
                         fn=lambda lft, rgt: pd.DataFrame({"id": [1], "k": [2.0]}),
                         output_schema="id long, k string",
-                        error_class=PythonException,
+                        errorClass=PythonException,
                         error_message_regex=expected,
                     )
 
@@ -341,7 +341,7 @@ def check_wrong_return_type(self):
             output_schema=(
                 StructType().add("id", LongType()).add("v", ArrayType(YearMonthIntervalType()))
             ),
-            error_class=NotImplementedError,
+            errorClass=NotImplementedError,
             error_message_regex="Invalid return type.*ArrayType.*YearMonthIntervalType",
         )
 
@@ -353,7 +353,7 @@ def check_wrong_args(self):
         self.__test_merge_error(
             fn=lambda: 1,
             output_schema=StructType([StructField("d", DoubleType())]),
-            error_class=ValueError,
+            errorClass=ValueError,
             error_message_regex="Invalid function",
         )
 
@@ -558,7 +558,7 @@ def __test_merge(
 
     def _test_merge_error(
         self,
-        error_class,
+        errorClass,
         error_message_regex,
         left=None,
         right=None,
@@ -577,7 +577,7 @@ def fn_with_key(_, lft, rgt):
                 by=by,
                 fn=fn,
                 output_schema=output_schema,
-                error_class=error_class,
+                errorClass=errorClass,
                 error_message_regex=error_message_regex,
             )
         with self.subTest("with key"):
@@ -587,13 +587,13 @@ def fn_with_key(_, lft, rgt):
                 by=by,
                 fn=fn_with_key,
                 output_schema=output_schema,
-                error_class=error_class,
+                errorClass=errorClass,
                 error_message_regex=error_message_regex,
             )
 
     def __test_merge_error(
         self,
-        error_class,
+        errorClass,
         error_message_regex,
         left=None,
         right=None,
@@ -602,7 +602,7 @@ def __test_merge_error(
         output_schema="id long, k int, v int, v2 int",
     ):
         # Test fn as is, cf. _test_merge_error
-        with self.assertRaisesRegex(error_class, error_message_regex):
+        with self.assertRaisesRegex(errorClass, error_message_regex):
             self.__test_merge(left, right, by, fn, output_schema)
 
 
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
index a26d6d02a2bcd..4ef334549ef5a 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_grouped_map.py
@@ -219,8 +219,8 @@ def check_register_grouped_map_udf(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_UDF_EVAL_TYPE",
-            message_parameters={
+            errorClass="INVALID_UDF_EVAL_TYPE",
+            messageParameters={
                 "eval_type": "SQL_BATCHED_UDF, SQL_ARROW_BATCHED_UDF, SQL_SCALAR_PANDAS_UDF, "
                 "SQL_SCALAR_PANDAS_ITER_UDF or SQL_GROUPED_AGG_PANDAS_UDF"
             },
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
new file mode 100644
index 0000000000000..60f2c9348db3f
--- /dev/null
+++ b/python/pyspark/sql/tests/pandas/test_pandas_transform_with_state.py
@@ -0,0 +1,1267 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import os
+import time
+import tempfile
+from pyspark.sql.streaming import StatefulProcessor, StatefulProcessorHandle
+from typing import Iterator
+
+import unittest
+from typing import cast
+
+from pyspark import SparkConf
+from pyspark.errors import PySparkRuntimeError
+from pyspark.sql.functions import split
+from pyspark.sql.types import StringType, StructType, StructField, Row, IntegerType, TimestampType
+from pyspark.testing import assertDataFrameEqual
+from pyspark.testing.sqlutils import (
+    ReusedSQLTestCase,
+    have_pandas,
+    have_pyarrow,
+    pandas_requirement_message,
+    pyarrow_requirement_message,
+)
+
+if have_pandas:
+    import pandas as pd
+
+
+@unittest.skipIf(
+    not have_pandas or not have_pyarrow,
+    cast(str, pandas_requirement_message or pyarrow_requirement_message),
+)
+class TransformWithStateInPandasTestsMixin:
+    @classmethod
+    def conf(cls):
+        cfg = SparkConf()
+        cfg.set("spark.sql.shuffle.partitions", "5")
+        cfg.set(
+            "spark.sql.streaming.stateStore.providerClass",
+            "org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider",
+        )
+        cfg.set("spark.sql.execution.arrow.transformWithStateInPandas.maxRecordsPerBatch", "2")
+        cfg.set("spark.sql.session.timeZone", "UTC")
+        return cfg
+
+    def _prepare_input_data(self, input_path, col1, col2):
+        with open(input_path, "w") as fw:
+            for e1, e2 in zip(col1, col2):
+                fw.write(f"{e1}, {e2}\n")
+
+    def _prepare_test_resource1(self, input_path):
+        self._prepare_input_data(input_path + "/text-test1.txt", [0, 0, 1, 1], [123, 46, 146, 346])
+
+    def _prepare_test_resource2(self, input_path):
+        self._prepare_input_data(
+            input_path + "/text-test2.txt", [0, 0, 0, 1, 1], [123, 223, 323, 246, 6]
+        )
+
+    def _prepare_test_resource3(self, input_path):
+        self._prepare_input_data(input_path + "/text-test3.txt", [0, 1], [123, 6])
+
+    def _build_test_df(self, input_path):
+        df = self.spark.readStream.format("text").option("maxFilesPerTrigger", 1).load(input_path)
+        df_split = df.withColumn("split_values", split(df["value"], ","))
+        df_final = df_split.select(
+            df_split.split_values.getItem(0).alias("id").cast("string"),
+            df_split.split_values.getItem(1).alias("temperature").cast("int"),
+        )
+        return df_final
+
+    def _prepare_input_data_with_3_cols(self, input_path, col1, col2, col3):
+        with open(input_path, "w") as fw:
+            for e1, e2, e3 in zip(col1, col2, col3):
+                fw.write(f"{e1},{e2},{e3}\n")
+
+    def build_test_df_with_3_cols(self, input_path):
+        df = self.spark.readStream.format("text").option("maxFilesPerTrigger", 1).load(input_path)
+        df_split = df.withColumn("split_values", split(df["value"], ","))
+        df_final = df_split.select(
+            df_split.split_values.getItem(0).alias("id1").cast("string"),
+            df_split.split_values.getItem(1).alias("temperature").cast("int"),
+            df_split.split_values.getItem(2).alias("id2").cast("string"),
+        )
+        return df_final
+
+    def _test_transform_with_state_in_pandas_basic(
+        self, stateful_processor, check_results, single_batch=False, timeMode="None"
+    ):
+        input_path = tempfile.mkdtemp()
+        self._prepare_test_resource1(input_path)
+        if not single_batch:
+            time.sleep(2)
+            self._prepare_test_resource2(input_path)
+
+        df = self._build_test_df(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("countAsString", StringType(), True),
+            ]
+        )
+
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode=timeMode,
+            )
+            .writeStream.queryName("this_query")
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+    def test_transform_with_state_in_pandas_basic(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="3"),
+                    Row(id="1", countAsString="2"),
+                }
+
+        self._test_transform_with_state_in_pandas_basic(SimpleStatefulProcessor(), check_results)
+
+    def test_transform_with_state_in_pandas_non_exist_value_state(self):
+        def check_results(batch_df, _):
+            assert set(batch_df.sort("id").collect()) == {
+                Row(id="0", countAsString="0"),
+                Row(id="1", countAsString="0"),
+            }
+
+        self._test_transform_with_state_in_pandas_basic(
+            InvalidSimpleStatefulProcessor(), check_results, True
+        )
+
+    def test_transform_with_state_in_pandas_query_restarts(self):
+        root_path = tempfile.mkdtemp()
+        input_path = root_path + "/input"
+        os.makedirs(input_path, exist_ok=True)
+        checkpoint_path = root_path + "/checkpoint"
+        output_path = root_path + "/output"
+
+        self._prepare_test_resource1(input_path)
+
+        df = self._build_test_df(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("countAsString", StringType(), True),
+            ]
+        )
+
+        base_query = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=SimpleStatefulProcessor(),
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode="None",
+            )
+            .writeStream.queryName("this_query")
+            .format("parquet")
+            .outputMode("append")
+            .option("checkpointLocation", checkpoint_path)
+            .option("path", output_path)
+        )
+        q = base_query.start()
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+        # Verify custom metrics.
+        self.assertTrue(q.lastProgress.stateOperators[0].customMetrics["numValueStateVars"] > 0)
+        self.assertTrue(q.lastProgress.stateOperators[0].customMetrics["numDeletedStateVars"] > 0)
+
+        q.stop()
+
+        self._prepare_test_resource2(input_path)
+
+        q = base_query.start()
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+        result_df = self.spark.read.parquet(output_path)
+        assert set(result_df.sort("id").collect()) == {
+            Row(id="0", countAsString="2"),
+            Row(id="0", countAsString="3"),
+            Row(id="1", countAsString="2"),
+            Row(id="1", countAsString="2"),
+        }
+
+    def test_transform_with_state_in_pandas_list_state(self):
+        def check_results(batch_df, _):
+            assert set(batch_df.sort("id").collect()) == {
+                Row(id="0", countAsString="2"),
+                Row(id="1", countAsString="2"),
+            }
+
+        self._test_transform_with_state_in_pandas_basic(ListStateProcessor(), check_results, True)
+
+    # test list state with ttl has the same behavior as list state when state doesn't expire.
+    def test_transform_with_state_in_pandas_list_state_large_ttl(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_basic(
+            ListStateLargeTTLProcessor(), check_results, True, "processingTime"
+        )
+
+    def test_transform_with_state_in_pandas_map_state(self):
+        def check_results(batch_df, _):
+            assert set(batch_df.sort("id").collect()) == {
+                Row(id="0", countAsString="2"),
+                Row(id="1", countAsString="2"),
+            }
+
+        self._test_transform_with_state_in_pandas_basic(MapStateProcessor(), check_results, True)
+
+    # test map state with ttl has the same behavior as map state when state doesn't expire.
+    def test_transform_with_state_in_pandas_map_state_large_ttl(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_basic(
+            MapStateLargeTTLProcessor(), check_results, True, "processingTime"
+        )
+
+    # test value state with ttl has the same behavior as value state when
+    # state doesn't expire.
+    def test_value_state_ttl_basic(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="2"),
+                    Row(id="1", countAsString="2"),
+                }
+            elif batch_id == 1:
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", countAsString="3"),
+                    Row(id="1", countAsString="2"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_basic(
+            SimpleTTLStatefulProcessor(), check_results, False, "processingTime"
+        )
+
+    def test_value_state_ttl_expiration(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assertDataFrameEqual(
+                    batch_df,
+                    [
+                        Row(id="ttl-count-0", count=1),
+                        Row(id="count-0", count=1),
+                        Row(id="ttl-list-state-count-0", count=1),
+                        Row(id="ttl-map-state-count-0", count=1),
+                        Row(id="ttl-count-1", count=1),
+                        Row(id="count-1", count=1),
+                        Row(id="ttl-list-state-count-1", count=1),
+                        Row(id="ttl-map-state-count-1", count=1),
+                    ],
+                )
+            elif batch_id == 1:
+                assertDataFrameEqual(
+                    batch_df,
+                    [
+                        Row(id="ttl-count-0", count=2),
+                        Row(id="count-0", count=2),
+                        Row(id="ttl-list-state-count-0", count=3),
+                        Row(id="ttl-map-state-count-0", count=2),
+                        Row(id="ttl-count-1", count=2),
+                        Row(id="count-1", count=2),
+                        Row(id="ttl-list-state-count-1", count=3),
+                        Row(id="ttl-map-state-count-1", count=2),
+                    ],
+                )
+            elif batch_id == 2:
+                # ttl-count-0 expire and restart from count 0.
+                # The TTL for value state ttl_count_state gets reset in batch 1 because of the
+                # update operation and ttl-count-1 keeps the state.
+                # ttl-list-state-count-0 expire and restart from count 0.
+                # The TTL for list state ttl_list_state gets reset in batch 1 because of the
+                # put operation and ttl-list-state-count-1 keeps the state.
+                # non-ttl state never expires
+                assertDataFrameEqual(
+                    batch_df,
+                    [
+                        Row(id="ttl-count-0", count=1),
+                        Row(id="count-0", count=3),
+                        Row(id="ttl-list-state-count-0", count=1),
+                        Row(id="ttl-map-state-count-0", count=1),
+                        Row(id="ttl-count-1", count=3),
+                        Row(id="count-1", count=3),
+                        Row(id="ttl-list-state-count-1", count=7),
+                        Row(id="ttl-map-state-count-1", count=3),
+                    ],
+                )
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+            if batch_id == 0 or batch_id == 1:
+                time.sleep(6)
+
+        input_dir = tempfile.TemporaryDirectory()
+        input_path = input_dir.name
+        try:
+            df = self._build_test_df(input_path)
+            self._prepare_input_data(input_path + "/batch1.txt", [1, 0], [0, 0])
+            self._prepare_input_data(input_path + "/batch2.txt", [1, 0], [0, 0])
+            self._prepare_input_data(input_path + "/batch3.txt", [1, 0], [0, 0])
+            for q in self.spark.streams.active:
+                q.stop()
+            output_schema = StructType(
+                [
+                    StructField("id", StringType(), True),
+                    StructField("count", IntegerType(), True),
+                ]
+            )
+
+            q = (
+                df.groupBy("id")
+                .transformWithStateInPandas(
+                    statefulProcessor=TTLStatefulProcessor(),
+                    outputStructType=output_schema,
+                    outputMode="Update",
+                    timeMode="processingTime",
+                )
+                .writeStream.foreachBatch(check_results)
+                .outputMode("update")
+                .start()
+            )
+            self.assertTrue(q.isActive)
+            q.processAllAvailable()
+            q.stop()
+            q.awaitTermination()
+            self.assertTrue(q.exception() is None)
+        finally:
+            input_dir.cleanup()
+
+    def _test_transform_with_state_in_pandas_proc_timer(self, stateful_processor, check_results):
+        input_path = tempfile.mkdtemp()
+        self._prepare_test_resource3(input_path)
+        time.sleep(2)
+        self._prepare_test_resource1(input_path)
+        time.sleep(2)
+        self._prepare_test_resource2(input_path)
+
+        df = self._build_test_df(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("countAsString", StringType(), True),
+                StructField("timeValues", StringType(), True),
+            ]
+        )
+
+        query_name = "processing_time_test_query"
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode="processingtime",
+            )
+            .writeStream.queryName(query_name)
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, query_name)
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+    def test_transform_with_state_in_pandas_proc_timer(self):
+        # helper function to check expired timestamp is smaller than current processing time
+        def check_timestamp(batch_df):
+            expired_df = (
+                batch_df.filter(batch_df["countAsString"] == "-1")
+                .select("id", "timeValues")
+                .withColumnRenamed("timeValues", "expiredTimestamp")
+            )
+            count_df = (
+                batch_df.filter(batch_df["countAsString"] != "-1")
+                .select("id", "timeValues")
+                .withColumnRenamed("timeValues", "countStateTimestamp")
+            )
+            joined_df = expired_df.join(count_df, on="id")
+            for row in joined_df.collect():
+                assert row["expiredTimestamp"] < row["countStateTimestamp"]
+
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.sort("id").select("id", "countAsString").collect()) == {
+                    Row(id="0", countAsString="1"),
+                    Row(id="1", countAsString="1"),
+                }
+            elif batch_id == 1:
+                # for key 0, the accumulated count is emitted before the count state is cleared
+                # during the timer process
+                assert set(batch_df.sort("id").select("id", "countAsString").collect()) == {
+                    Row(id="0", countAsString="3"),
+                    Row(id="0", countAsString="-1"),
+                    Row(id="1", countAsString="3"),
+                }
+                self.first_expired_timestamp = batch_df.filter(
+                    batch_df["countAsString"] == -1
+                ).first()["timeValues"]
+                check_timestamp(batch_df)
+
+            elif batch_id == 2:
+                assert set(batch_df.sort("id").select("id", "countAsString").collect()) == {
+                    Row(id="0", countAsString="3"),
+                    Row(id="0", countAsString="-1"),
+                    Row(id="1", countAsString="5"),
+                }
+                # The expired timestamp in current batch is larger than expiry timestamp in batch 1
+                # because this is a new timer registered in batch1 and
+                # different from the one registered in batch 0
+                current_batch_expired_timestamp = batch_df.filter(
+                    batch_df["countAsString"] == -1
+                ).first()["timeValues"]
+                assert current_batch_expired_timestamp > self.first_expired_timestamp
+
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_proc_timer(
+            ProcTimeStatefulProcessor(), check_results
+        )
+
+    def _test_transform_with_state_in_pandas_event_time(self, stateful_processor, check_results):
+        import pyspark.sql.functions as f
+
+        input_path = tempfile.mkdtemp()
+
+        def prepare_batch1(input_path):
+            with open(input_path + "/text-test3.txt", "w") as fw:
+                fw.write("a, 20\n")
+
+        def prepare_batch2(input_path):
+            with open(input_path + "/text-test1.txt", "w") as fw:
+                fw.write("a, 4\n")
+
+        def prepare_batch3(input_path):
+            with open(input_path + "/text-test2.txt", "w") as fw:
+                fw.write("a, 11\n")
+                fw.write("a, 13\n")
+                fw.write("a, 15\n")
+
+        prepare_batch1(input_path)
+        time.sleep(2)
+        prepare_batch2(input_path)
+        time.sleep(2)
+        prepare_batch3(input_path)
+
+        df = self._build_test_df(input_path)
+        df = df.select(
+            "id", f.from_unixtime(f.col("temperature")).alias("eventTime").cast("timestamp")
+        ).withWatermark("eventTime", "10 seconds")
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [StructField("id", StringType(), True), StructField("timestamp", StringType(), True)]
+        )
+
+        query_name = "event_time_test_query"
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode="eventtime",
+            )
+            .writeStream.queryName(query_name)
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, query_name)
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+    def test_transform_with_state_in_pandas_event_time(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                # watermark for late event = 0
+                # watermark for eviction = 0
+                # timer is registered with expiration time = 0, hence expired at the same batch
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="a", timestamp="20"),
+                    Row(id="a-expired", timestamp="0"),
+                }
+            elif batch_id == 1:
+                # watermark for late event = 0
+                # watermark for eviction = 10 (20 - 10)
+                # timer is registered with expiration time = 10, hence expired at the same batch
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="a", timestamp="4"),
+                    Row(id="a-expired", timestamp="10000"),
+                }
+            elif batch_id == 2:
+                # watermark for late event = 10
+                # watermark for eviction = 10 (unchanged as 4 < 10)
+                # timer is registered with expiration time = 10, hence expired at the same batch
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="a", timestamp="15"),
+                    Row(id="a-expired", timestamp="10000"),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_in_pandas_event_time(
+            EventTimeStatefulProcessor(), check_results
+        )
+
+    def _test_transform_with_state_init_state_in_pandas(
+        self, stateful_processor, check_results, time_mode="None"
+    ):
+        input_path = tempfile.mkdtemp()
+        self._prepare_test_resource1(input_path)
+        time.sleep(2)
+        self._prepare_input_data(input_path + "/text-test2.txt", [0, 3], [67, 12])
+
+        df = self._build_test_df(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("value", StringType(), True),
+            ]
+        )
+
+        data = [("0", 789), ("3", 987)]
+        initial_state = self.spark.createDataFrame(data, "id string, initVal int").groupBy("id")
+
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode=time_mode,
+                initialState=initial_state,
+            )
+            .writeStream.queryName("this_query")
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+    def test_transform_with_state_init_state_in_pandas(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                # for key 0, initial state was processed and it was only processed once;
+                # for key 1, it did not appear in the initial state df;
+                # for key 3, it did not appear in the first batch of input keys
+                # so it won't be emitted
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46)),
+                    Row(id="1", value=str(146 + 346)),
+                }
+            else:
+                # for key 0, verify initial state was only processed once in the first batch;
+                # for key 3, verify init state was processed and reflected in the accumulated value
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46 + 67)),
+                    Row(id="3", value=str(987 + 12)),
+                }
+
+        self._test_transform_with_state_init_state_in_pandas(
+            SimpleStatefulProcessorWithInitialState(), check_results
+        )
+
+    def _test_transform_with_state_non_contiguous_grouping_cols(
+        self, stateful_processor, check_results, initial_state=None
+    ):
+        input_path = tempfile.mkdtemp()
+        self._prepare_input_data_with_3_cols(
+            input_path + "/text-test1.txt", [0, 0, 1, 1], [123, 46, 146, 346], [1, 1, 2, 2]
+        )
+
+        df = self.build_test_df_with_3_cols(input_path)
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id1", StringType(), True),
+                StructField("id2", StringType(), True),
+                StructField("value", StringType(), True),
+            ]
+        )
+
+        q = (
+            df.groupBy("id1", "id2")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Update",
+                timeMode="None",
+                initialState=initial_state,
+            )
+            .writeStream.queryName("this_query")
+            .foreachBatch(check_results)
+            .outputMode("update")
+            .start()
+        )
+
+        self.assertEqual(q.name, "this_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+        self.assertTrue(q.exception() is None)
+
+    def test_transform_with_state_non_contiguous_grouping_cols(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                assert set(batch_df.collect()) == {
+                    Row(id1="0", id2="1", value=str(123 + 46)),
+                    Row(id1="1", id2="2", value=str(146 + 346)),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_non_contiguous_grouping_cols(
+            SimpleStatefulProcessorWithInitialState(), check_results
+        )
+
+    def test_transform_with_state_non_contiguous_grouping_cols_with_init_state(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                # initial state for key (0, 1) is processed
+                assert set(batch_df.collect()) == {
+                    Row(id1="0", id2="1", value=str(789 + 123 + 46)),
+                    Row(id1="1", id2="2", value=str(146 + 346)),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        # grouping key of initial state is also not starting from the beginning of attributes
+        data = [(789, "0", "1"), (987, "3", "2")]
+        initial_state = self.spark.createDataFrame(
+            data, "initVal int, id1 string, id2 string"
+        ).groupBy("id1", "id2")
+
+        self._test_transform_with_state_non_contiguous_grouping_cols(
+            SimpleStatefulProcessorWithInitialState(), check_results, initial_state
+        )
+
+    def _test_transform_with_state_in_pandas_chaining_ops(
+        self, stateful_processor, check_results, timeMode="None", grouping_cols=["outputTimestamp"]
+    ):
+        import pyspark.sql.functions as f
+
+        input_path = tempfile.mkdtemp()
+        self._prepare_input_data(input_path + "/text-test3.txt", ["a", "b"], [10, 15])
+        time.sleep(2)
+        self._prepare_input_data(input_path + "/text-test4.txt", ["a", "c"], [11, 25])
+        time.sleep(2)
+        self._prepare_input_data(input_path + "/text-test1.txt", ["a"], [5])
+
+        df = self._build_test_df(input_path)
+        df = df.select(
+            "id", f.from_unixtime(f.col("temperature")).alias("eventTime").cast("timestamp")
+        ).withWatermark("eventTime", "5 seconds")
+
+        for q in self.spark.streams.active:
+            q.stop()
+        self.assertTrue(df.isStreaming)
+
+        output_schema = StructType(
+            [
+                StructField("id", StringType(), True),
+                StructField("outputTimestamp", TimestampType(), True),
+            ]
+        )
+
+        q = (
+            df.groupBy("id")
+            .transformWithStateInPandas(
+                statefulProcessor=stateful_processor,
+                outputStructType=output_schema,
+                outputMode="Append",
+                timeMode=timeMode,
+                eventTimeColumnName="outputTimestamp",
+            )
+            .groupBy(grouping_cols)
+            .count()
+            .writeStream.queryName("chaining_ops_query")
+            .foreachBatch(check_results)
+            .outputMode("append")
+            .start()
+        )
+
+        self.assertEqual(q.name, "chaining_ops_query")
+        self.assertTrue(q.isActive)
+        q.processAllAvailable()
+        q.awaitTermination(10)
+
+    def test_transform_with_state_in_pandas_chaining_ops(self):
+        def check_results(batch_df, batch_id):
+            import datetime
+
+            if batch_id == 0:
+                assert batch_df.isEmpty()
+            elif batch_id == 1:
+                # eviction watermark = 15 - 5 = 10 (max event time from batch 0),
+                # late event watermark = 0 (eviction event time from batch 0)
+                assert set(
+                    batch_df.sort("outputTimestamp").select("outputTimestamp", "count").collect()
+                ) == {
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 10), count=1),
+                }
+            elif batch_id == 2:
+                # eviction watermark = 25 - 5 = 20, late event watermark = 10;
+                # row with watermark=5<10 is dropped so it does not show up in the results;
+                # row with eventTime<=20 are finalized and emitted
+                assert set(
+                    batch_df.sort("outputTimestamp").select("outputTimestamp", "count").collect()
+                ) == {
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 11), count=1),
+                    Row(outputTimestamp=datetime.datetime(1970, 1, 1, 0, 0, 15), count=1),
+                }
+
+        self._test_transform_with_state_in_pandas_chaining_ops(
+            StatefulProcessorChainingOps(), check_results, "eventTime"
+        )
+        self._test_transform_with_state_in_pandas_chaining_ops(
+            StatefulProcessorChainingOps(), check_results, "eventTime", ["outputTimestamp", "id"]
+        )
+
+    def test_transform_with_state_init_state_with_timers(self):
+        def check_results(batch_df, batch_id):
+            if batch_id == 0:
+                # timers are registered and handled in the first batch for
+                # rows in initial state; For key=0 and key=3 which contains
+                # expired timers, both should be handled by handleExpiredTimers
+                # regardless of whether key exists in the data rows or not
+                expired_df = batch_df.filter(batch_df["id"].contains("expired"))
+                data_df = batch_df.filter(~batch_df["id"].contains("expired"))
+                assert set(expired_df.sort("id").select("id").collect()) == {
+                    Row(id="0-expired"),
+                    Row(id="3-expired"),
+                }
+                assert set(data_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46)),
+                    Row(id="1", value=str(146 + 346)),
+                }
+            elif batch_id == 1:
+                # handleInitialState is only processed in the first batch,
+                # no more timer is registered so no more expired timers
+                assert set(batch_df.sort("id").collect()) == {
+                    Row(id="0", value=str(789 + 123 + 46 + 67)),
+                    Row(id="3", value=str(987 + 12)),
+                }
+            else:
+                for q in self.spark.streams.active:
+                    q.stop()
+
+        self._test_transform_with_state_init_state_in_pandas(
+            StatefulProcessorWithInitialStateTimers(), check_results, "processingTime"
+        )
+
+    # run the same test suites again but with single shuffle partition
+    def test_transform_with_state_with_timers_single_partition(self):
+        with self.sql_conf({"spark.sql.shuffle.partitions": "1"}):
+            self.test_transform_with_state_init_state_with_timers()
+            self.test_transform_with_state_in_pandas_event_time()
+            self.test_transform_with_state_in_pandas_proc_timer()
+
+
+class SimpleStatefulProcessorWithInitialState(StatefulProcessor):
+    # this dict is the same as input initial state dataframe
+    dict = {("0",): 789, ("3",): 987}
+
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", IntegerType(), True)])
+        self.value_state = handle.getValueState("value_state", state_schema)
+        self.handle = handle
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        exists = self.value_state.exists()
+        if exists:
+            value_row = self.value_state.get()
+            existing_value = value_row[0]
+        else:
+            existing_value = 0
+
+        accumulated_value = existing_value
+
+        for pdf in rows:
+            value = pdf["temperature"].astype(int).sum()
+            accumulated_value += value
+
+        self.value_state.update((accumulated_value,))
+
+        if len(key) > 1:
+            yield pd.DataFrame(
+                {"id1": (key[0],), "id2": (key[1],), "value": str(accumulated_value)}
+            )
+        else:
+            yield pd.DataFrame({"id": key, "value": str(accumulated_value)})
+
+    def handleInitialState(self, key, initialState, timer_values) -> None:
+        init_val = initialState.at[0, "initVal"]
+        self.value_state.update((init_val,))
+        if len(key) == 1:
+            assert self.dict[key] == init_val
+
+    def close(self) -> None:
+        pass
+
+
+class StatefulProcessorWithInitialStateTimers(SimpleStatefulProcessorWithInitialState):
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        str_key = f"{str(key[0])}-expired"
+        yield pd.DataFrame(
+            {"id": (str_key,), "value": str(expired_timer_info.get_expiry_time_in_ms())}
+        )
+
+    def handleInitialState(self, key, initialState, timer_values) -> None:
+        super().handleInitialState(key, initialState, timer_values)
+        self.handle.registerTimer(timer_values.get_current_processing_time_in_ms() - 1)
+
+
+# A stateful processor that output the max event time it has seen. Register timer for
+# current watermark. Clear max state if timer expires.
+class EventTimeStatefulProcessor(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", StringType(), True)])
+        self.handle = handle
+        self.max_state = handle.getValueState("max_state", state_schema)
+
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        self.max_state.clear()
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        str_key = f"{str(key[0])}-expired"
+        yield pd.DataFrame(
+            {"id": (str_key,), "timestamp": str(expired_timer_info.get_expiry_time_in_ms())}
+        )
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        timestamp_list = []
+        for pdf in rows:
+            # int64 will represent timestamp in nanosecond, restore to second
+            timestamp_list.extend((pdf["eventTime"].astype("int64") // 10**9).tolist())
+
+        if self.max_state.exists():
+            cur_max = int(self.max_state.get()[0])
+        else:
+            cur_max = 0
+        max_event_time = str(max(cur_max, max(timestamp_list)))
+
+        self.max_state.update((max_event_time,))
+        self.handle.registerTimer(timer_values.get_current_watermark_in_ms())
+
+        yield pd.DataFrame({"id": key, "timestamp": max_event_time})
+
+    def close(self) -> None:
+        pass
+
+
+# A stateful processor that output the accumulation of count of input rows; register
+# processing timer and clear the counter if timer expires.
+class ProcTimeStatefulProcessor(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", StringType(), True)])
+        self.handle = handle
+        self.count_state = handle.getValueState("count_state", state_schema)
+
+    def handleExpiredTimer(self, key, timer_values, expired_timer_info) -> Iterator[pd.DataFrame]:
+        # reset count state each time the timer is expired
+        timer_list_1 = [e for e in self.handle.listTimers()]
+        timer_list_2 = []
+        idx = 0
+        for e in self.handle.listTimers():
+            timer_list_2.append(e)
+            # check multiple iterator on the same grouping key works
+            assert timer_list_2[idx] == timer_list_1[idx]
+            idx += 1
+
+        if len(timer_list_1) > 0:
+            assert len(timer_list_1) == 2
+        self.count_state.clear()
+        self.handle.deleteTimer(expired_timer_info.get_expiry_time_in_ms())
+        yield pd.DataFrame(
+            {
+                "id": key,
+                "countAsString": str("-1"),
+                "timeValues": str(expired_timer_info.get_expiry_time_in_ms()),
+            }
+        )
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        if not self.count_state.exists():
+            count = 0
+        else:
+            count = int(self.count_state.get()[0])
+
+        if key == ("0",):
+            self.handle.registerTimer(timer_values.get_current_processing_time_in_ms() + 1)
+
+        rows_count = 0
+        for pdf in rows:
+            pdf_count = len(pdf)
+            rows_count += pdf_count
+
+        count = count + rows_count
+
+        self.count_state.update((str(count),))
+        timestamp = str(timer_values.get_current_processing_time_in_ms())
+
+        yield pd.DataFrame({"id": key, "countAsString": str(count), "timeValues": timestamp})
+
+    def close(self) -> None:
+        pass
+
+
+class SimpleStatefulProcessor(StatefulProcessor, unittest.TestCase):
+    dict = {0: {"0": 1, "1": 2}, 1: {"0": 4, "1": 3}}
+    batch_id = 0
+
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", IntegerType(), True)])
+        self.num_violations_state = handle.getValueState("numViolations", state_schema)
+        self.temp_state = handle.getValueState("tempState", state_schema)
+        handle.deleteIfExists("tempState")
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        with self.assertRaisesRegex(PySparkRuntimeError, "Error checking value state exists"):
+            self.temp_state.exists()
+        new_violations = 0
+        count = 0
+        key_str = key[0]
+        exists = self.num_violations_state.exists()
+        if exists:
+            existing_violations_row = self.num_violations_state.get()
+            existing_violations = existing_violations_row[0]
+            assert existing_violations == self.dict[0][key_str]
+            self.batch_id = 1
+        else:
+            existing_violations = 0
+        for pdf in rows:
+            pdf_count = pdf.count()
+            count += pdf_count.get("temperature")
+            violations_pdf = pdf.loc[pdf["temperature"] > 100]
+            new_violations += violations_pdf.count().get("temperature")
+        updated_violations = new_violations + existing_violations
+        assert updated_violations == self.dict[self.batch_id][key_str]
+        self.num_violations_state.update((updated_violations,))
+        yield pd.DataFrame({"id": key, "countAsString": str(count)})
+
+    def close(self) -> None:
+        pass
+
+
+class StatefulProcessorChainingOps(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        pass
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        for pdf in rows:
+            timestamp_list = pdf["eventTime"].tolist()
+        yield pd.DataFrame({"id": key, "outputTimestamp": timestamp_list[0]})
+
+    def close(self) -> None:
+        pass
+
+
+# A stateful processor that inherit all behavior of SimpleStatefulProcessor except that it use
+# ttl state with a large timeout.
+class SimpleTTLStatefulProcessor(SimpleStatefulProcessor, unittest.TestCase):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", IntegerType(), True)])
+        self.num_violations_state = handle.getValueState("numViolations", state_schema, 30000)
+        self.temp_state = handle.getValueState("tempState", state_schema)
+        handle.deleteIfExists("tempState")
+
+
+class TTLStatefulProcessor(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", IntegerType(), True)])
+        user_key_schema = StructType([StructField("id", StringType(), True)])
+        self.ttl_count_state = handle.getValueState("ttl-state", state_schema, 10000)
+        self.count_state = handle.getValueState("state", state_schema)
+        self.ttl_list_state = handle.getListState("ttl-list-state", state_schema, 10000)
+        self.ttl_map_state = handle.getMapState(
+            "ttl-map-state", user_key_schema, state_schema, 10000
+        )
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        count = 0
+        ttl_count = 0
+        ttl_list_state_count = 0
+        ttl_map_state_count = 0
+        id = key[0]
+        if self.count_state.exists():
+            count = self.count_state.get()[0]
+        if self.ttl_count_state.exists():
+            ttl_count = self.ttl_count_state.get()[0]
+        if self.ttl_list_state.exists():
+            iter = self.ttl_list_state.get()
+            for s in iter:
+                ttl_list_state_count += s[0]
+        if self.ttl_map_state.exists():
+            ttl_map_state_count = self.ttl_map_state.get_value(key)[0]
+        for pdf in rows:
+            pdf_count = pdf.count().get("temperature")
+            count += pdf_count
+            ttl_count += pdf_count
+            ttl_list_state_count += pdf_count
+            ttl_map_state_count += pdf_count
+
+        self.count_state.update((count,))
+        # skip updating state for the 2nd batch so that ttl state expire
+        if not (ttl_count == 2 and id == "0"):
+            self.ttl_count_state.update((ttl_count,))
+            self.ttl_list_state.put([(ttl_list_state_count,), (ttl_list_state_count,)])
+            self.ttl_map_state.update_value(key, (ttl_map_state_count,))
+        yield pd.DataFrame(
+            {
+                "id": [
+                    f"ttl-count-{id}",
+                    f"count-{id}",
+                    f"ttl-list-state-count-{id}",
+                    f"ttl-map-state-count-{id}",
+                ],
+                "count": [ttl_count, count, ttl_list_state_count, ttl_map_state_count],
+            }
+        )
+
+    def close(self) -> None:
+        pass
+
+
+class InvalidSimpleStatefulProcessor(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("value", IntegerType(), True)])
+        self.num_violations_state = handle.getValueState("numViolations", state_schema)
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        count = 0
+        exists = self.num_violations_state.exists()
+        assert not exists
+        # try to get a state variable with no value
+        assert self.num_violations_state.get() is None
+        self.num_violations_state.clear()
+        yield pd.DataFrame({"id": key, "countAsString": str(count)})
+
+    def close(self) -> None:
+        pass
+
+
+class ListStateProcessor(StatefulProcessor):
+    # Dict to store the expected results. The key represents the grouping key string, and the value
+    # is a dictionary of pandas dataframe index -> expected temperature value. Since we set
+    # maxRecordsPerBatch to 2, we expect the pandas dataframe dictionary to have 2 entries.
+    dict = {0: 120, 1: 20}
+
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("temperature", IntegerType(), True)])
+        self.list_state1 = handle.getListState("listState1", state_schema)
+        self.list_state2 = handle.getListState("listState2", state_schema)
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        count = 0
+        for pdf in rows:
+            list_state_rows = [(120,), (20,)]
+            self.list_state1.put(list_state_rows)
+            self.list_state2.put(list_state_rows)
+            self.list_state1.append_value((111,))
+            self.list_state2.append_value((222,))
+            self.list_state1.append_list(list_state_rows)
+            self.list_state2.append_list(list_state_rows)
+            pdf_count = pdf.count()
+            count += pdf_count.get("temperature")
+        iter1 = self.list_state1.get()
+        iter2 = self.list_state2.get()
+        # Mixing the iterator to test it we can resume from the correct point
+        assert next(iter1)[0] == self.dict[0]
+        assert next(iter2)[0] == self.dict[0]
+        assert next(iter1)[0] == self.dict[1]
+        assert next(iter2)[0] == self.dict[1]
+        # Get another iterator for list_state1 to test if the 2 iterators (iter1 and iter3) don't
+        # interfere with each other.
+        iter3 = self.list_state1.get()
+        assert next(iter3)[0] == self.dict[0]
+        assert next(iter3)[0] == self.dict[1]
+        # the second arrow batch should contain the appended value 111 for list_state1 and
+        # 222 for list_state2
+        assert next(iter1)[0] == 111
+        assert next(iter2)[0] == 222
+        assert next(iter3)[0] == 111
+        # since we put another 2 rows after 111/222, check them here
+        assert next(iter1)[0] == self.dict[0]
+        assert next(iter2)[0] == self.dict[0]
+        assert next(iter3)[0] == self.dict[0]
+        assert next(iter1)[0] == self.dict[1]
+        assert next(iter2)[0] == self.dict[1]
+        assert next(iter3)[0] == self.dict[1]
+        yield pd.DataFrame({"id": key, "countAsString": str(count)})
+
+    def close(self) -> None:
+        pass
+
+
+class ListStateLargeTTLProcessor(ListStateProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        state_schema = StructType([StructField("temperature", IntegerType(), True)])
+        self.list_state1 = handle.getListState("listState1", state_schema, 30000)
+        self.list_state2 = handle.getListState("listState2", state_schema, 30000)
+
+
+class MapStateProcessor(StatefulProcessor):
+    def init(self, handle: StatefulProcessorHandle):
+        key_schema = StructType([StructField("name", StringType(), True)])
+        value_schema = StructType([StructField("count", IntegerType(), True)])
+        self.map_state = handle.getMapState("mapState", key_schema, value_schema)
+
+    def handleInputRows(self, key, rows, timer_values) -> Iterator[pd.DataFrame]:
+        count = 0
+        key1 = ("key1",)
+        key2 = ("key2",)
+        for pdf in rows:
+            pdf_count = pdf.count()
+            count += pdf_count.get("temperature")
+        value1 = count
+        value2 = count
+        if self.map_state.exists():
+            if self.map_state.contains_key(key1):
+                value1 += self.map_state.get_value(key1)[0]
+            if self.map_state.contains_key(key2):
+                value2 += self.map_state.get_value(key2)[0]
+        self.map_state.update_value(key1, (value1,))
+        self.map_state.update_value(key2, (value2,))
+        key_iter = self.map_state.keys()
+        assert next(key_iter)[0] == "key1"
+        assert next(key_iter)[0] == "key2"
+        value_iter = self.map_state.values()
+        assert next(value_iter)[0] == value1
+        assert next(value_iter)[0] == value2
+        map_iter = self.map_state.iterator()
+        assert next(map_iter)[0] == key1
+        assert next(map_iter)[1] == (value2,)
+        self.map_state.remove_key(key1)
+        assert not self.map_state.contains_key(key1)
+        yield pd.DataFrame({"id": key, "countAsString": str(count)})
+
+    def close(self) -> None:
+        pass
+
+
+# A stateful processor that inherit all behavior of MapStateProcessor except that it use
+# ttl state with a large timeout.
+class MapStateLargeTTLProcessor(MapStateProcessor):
+    def init(self, handle: StatefulProcessorHandle) -> None:
+        key_schema = StructType([StructField("name", StringType(), True)])
+        value_schema = StructType([StructField("count", IntegerType(), True)])
+        self.map_state = handle.getMapState("mapState", key_schema, value_schema, 30000)
+
+
+class TransformWithStateInPandasTests(TransformWithStateInPandasTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.pandas.test_pandas_transform_with_state import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf.py b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
index 82a93524fcf9c..4168af64a4d7c 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf.py
@@ -20,7 +20,14 @@
 from typing import cast
 
 from pyspark.sql.functions import udf, pandas_udf, PandasUDFType, assert_true, lit
-from pyspark.sql.types import DoubleType, StructType, StructField, LongType, DayTimeIntervalType
+from pyspark.sql.types import (
+    DoubleType,
+    StructType,
+    StructField,
+    LongType,
+    DayTimeIntervalType,
+    VariantType,
+)
 from pyspark.errors import ParseException, PythonException, PySparkTypeError
 from pyspark.util import PythonEvalType
 from pyspark.testing.sqlutils import (
@@ -42,33 +49,65 @@ def test_pandas_udf_basic(self):
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
+        udf = pandas_udf(lambda x: x, VariantType())
+        self.assertEqual(udf.returnType, VariantType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, DoubleType(), PandasUDFType.SCALAR)
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
+        udf = pandas_udf(lambda x: x, VariantType(), PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, VariantType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
         udf = pandas_udf(
             lambda x: x, StructType([StructField("v", DoubleType())]), PandasUDFType.GROUPED_MAP
         )
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+        udf = pandas_udf(
+            lambda x: x, StructType([StructField("v", VariantType())]), PandasUDFType.GROUPED_MAP
+        )
+        self.assertEqual(udf.returnType, StructType([StructField("v", VariantType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
     def test_pandas_udf_basic_with_return_type_string(self):
         udf = pandas_udf(lambda x: x, "double", PandasUDFType.SCALAR)
         self.assertEqual(udf.returnType, DoubleType())
         self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
 
+        udf = pandas_udf(lambda x: x, "variant", PandasUDFType.SCALAR)
+        self.assertEqual(udf.returnType, VariantType())
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_SCALAR_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, "v double", PandasUDFType.GROUPED_MAP)
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+        udf = pandas_udf(lambda x: x, "v variant", PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", VariantType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, "v double", functionType=PandasUDFType.GROUPED_MAP)
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+        udf = pandas_udf(lambda x: x, "v variant", functionType=PandasUDFType.GROUPED_MAP)
+        self.assertEqual(udf.returnType, StructType([StructField("v", VariantType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
         udf = pandas_udf(lambda x: x, returnType="v double", functionType=PandasUDFType.GROUPED_MAP)
         self.assertEqual(udf.returnType, StructType([StructField("v", DoubleType())]))
         self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
 
+        udf = pandas_udf(
+            lambda x: x, returnType="v variant", functionType=PandasUDFType.GROUPED_MAP
+        )
+        self.assertEqual(udf.returnType, StructType([StructField("v", VariantType())]))
+        self.assertEqual(udf.evalType, PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF)
+
     def test_pandas_udf_decorator(self):
         @pandas_udf(DoubleType())
         def foo(x):
@@ -142,8 +181,8 @@ def foo(df):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
-                message_parameters={
+                errorClass="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
+                messageParameters={
                     "eval_type": "SQL_GROUPED_MAP_PANDAS_UDF "
                     "or SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE",
                     "return_type": "DoubleType()",
@@ -165,8 +204,8 @@ def foo(x):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_BE_NONE",
-            message_parameters={"arg_name": "returnType"},
+            errorClass="CANNOT_BE_NONE",
+            messageParameters={"arg_name": "returnType"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -177,8 +216,8 @@ def foo(x):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_PANDAS_UDF_TYPE",
-            message_parameters={"arg_name": "functionType", "arg_type": "100"},
+            errorClass="INVALID_PANDAS_UDF_TYPE",
+            messageParameters={"arg_name": "functionType", "arg_type": "100"},
         )
 
         with self.assertRaisesRegex(ValueError, "0-arg pandas_udfs.*not.*supported"):
@@ -197,8 +236,8 @@ def foo(df):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "returnType", "arg_type": "int"},
+            errorClass="NOT_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "returnType", "arg_type": "int"},
         )
 
     def test_stopiteration_in_udf(self):
@@ -339,6 +378,19 @@ def noop(s: pd.Series) -> pd.Series:
         self.assertEqual(df.schema[0].dataType.simpleString(), "interval day to second")
         self.assertEqual(df.first()[0], datetime.timedelta(microseconds=123))
 
+    def test_pandas_udf_return_type_error(self):
+        import pandas as pd
+
+        @pandas_udf("s string")
+        def upper(s: pd.Series) -> pd.Series:
+            return s.str.upper()
+
+        df = self.spark.createDataFrame([("a",)], schema="s string")
+
+        self.assertRaisesRegex(
+            PythonException, "Invalid return type", df.select(upper("s")).collect
+        )
+
 
 class PandasUDFTests(PandasUDFTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
index 70fa31fd515bb..e22b8f9ccacc2 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_grouped_agg.py
@@ -189,8 +189,8 @@ def check_unsupported_types(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={
                 "feature": "Invalid return type with grouped aggregate Pandas UDFs: "
                 "ArrayType(ArrayType(YearMonthIntervalType(0, 1), True), True)"
             },
@@ -204,8 +204,8 @@ def mean_and_std_udf(v):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={
                 "feature": "Invalid return type with grouped aggregate Pandas UDFs: "
                 "StructType([StructField('mean', DoubleType(), True), "
                 "StructField('std', DoubleType(), True)])"
@@ -220,8 +220,8 @@ def mean_and_std_udf(v):  # noqa: F811
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={
                 "feature": "Invalid return type with grouped aggregate Pandas UDFs: "
                 "ArrayType(YearMonthIntervalType(0, 1), True)"
             },
diff --git a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
index c2f7ef88c9364..56a736a20b3af 100644
--- a/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
+++ b/python/pyspark/sql/tests/pandas/test_pandas_udf_scalar.py
@@ -27,7 +27,18 @@
 from pyspark import TaskContext
 from pyspark.util import PythonEvalType
 from pyspark.sql import Column
-from pyspark.sql.functions import array, col, expr, lit, sum, struct, udf, pandas_udf, PandasUDFType
+from pyspark.sql.functions import (
+    array,
+    col,
+    expr,
+    lit,
+    sum,
+    struct,
+    udf,
+    pandas_udf,
+    to_json,
+    PandasUDFType,
+)
 from pyspark.sql.types import (
     IntegerType,
     ByteType,
@@ -752,46 +763,272 @@ def check_vectorized_udf_return_scalar(self):
 
     def test_udf_with_variant_input(self):
         df = self.spark.range(0, 10).selectExpr("parse_json(cast(id as string)) v")
-        from pyspark.sql.functions import col
 
-        scalar_f = pandas_udf(lambda u: str(u), StringType())
+        scalar_f = pandas_udf(lambda u: u.apply(str), StringType(), PandasUDFType.SCALAR)
         iter_f = pandas_udf(
-            lambda it: map(lambda u: str(u), it), StringType(), PandasUDFType.SCALAR_ITER
+            lambda it: map(lambda u: u.apply(str), it), StringType(), PandasUDFType.SCALAR_ITER
         )
 
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+
         for f in [scalar_f, iter_f]:
-            with self.assertRaises(AnalysisException) as ae:
-                df.select(f(col("v"))).collect()
-
-            self.check_error(
-                exception=ae.exception,
-                error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-                message_parameters={
-                    "sqlExpr": '"<lambda>(v)"',
-                    "dataType": "VARIANT",
-                },
-            )
+            result = df.select(f(col("v")).alias("udf")).collect()
+            self.assertEqual(result, expected)
 
     def test_udf_with_variant_output(self):
-        # Corresponds to a JSON string of {"a": "b"}.
-        returned_variant = VariantVal(bytes([2, 1, 0, 0, 2, 5, 98]), bytes([1, 1, 0, 1, 97]))
-        scalar_f = pandas_udf(lambda x: returned_variant, VariantType())
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_f = pandas_udf(
+            lambda u: u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0]))), VariantType()
+        )
         iter_f = pandas_udf(
-            lambda it: map(lambda x: returned_variant, it), VariantType(), PandasUDFType.SCALAR_ITER
+            lambda it: map(
+                lambda u: u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0]))), it
+            ),
+            VariantType(),
+            PandasUDFType.SCALAR_ITER,
         )
 
+        expected = [Row(udf=i) for i in range(10)]
+
         for f in [scalar_f, iter_f]:
-            with self.assertRaises(AnalysisException) as ae:
-                self.spark.range(0, 10).select(f()).collect()
-
-            self.check_error(
-                exception=ae.exception,
-                error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-                message_parameters={
-                    "sqlExpr": '"<lambda>()"',
-                    "dataType": "VARIANT",
-                },
-            )
+            result = self.spark.range(10).select(f(col("id")).cast("int").alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+    def test_udf_with_nested_variant_input(self):
+        # struct<variant>
+        df = self.spark.range(0, 10).selectExpr(
+            "named_struct('v', parse_json(cast(id as string))) struct_of_v"
+        )
+        scalar_f = pandas_udf(lambda u: u["v"].apply(str), StringType(), PandasUDFType.SCALAR)
+        iter_f = pandas_udf(
+            lambda it: map(lambda u: u["v"].apply(str), it), StringType(), PandasUDFType.SCALAR_ITER
+        )
+        expected = [Row(udf=f"{i}") for i in range(10)]
+        for f in [scalar_f, iter_f]:
+            result = df.select(f(col("struct_of_v")).alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+        # array<variant>
+        df = self.spark.range(0, 10).selectExpr("array(parse_json(cast(id as string))) array_of_v")
+        scalar_f = pandas_udf(lambda u: u.apply(str), StringType(), PandasUDFType.SCALAR)
+        iter_f = pandas_udf(
+            lambda it: map(lambda u: u.apply(str), it), StringType(), PandasUDFType.SCALAR_ITER
+        )
+        expected = [
+            Row(udf=str([VariantVal(bytes([12, i]), bytes([1, 0, 0]))]).format(i))
+            for i in range(10)
+        ]
+        for f in [scalar_f, iter_f]:
+            result = df.select(f(col("array_of_v")).alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+        # map<variant>
+        df = self.spark.range(0, 10).selectExpr("map('v', parse_json(cast(id as string))) map_of_v")
+        scalar_f = pandas_udf(lambda u: u.apply(str), StringType(), PandasUDFType.SCALAR)
+        iter_f = pandas_udf(
+            lambda it: map(lambda u: u.apply(str), it), StringType(), PandasUDFType.SCALAR_ITER
+        )
+        expected = [
+            Row(udf=str({"v": VariantVal(bytes([12, i]), bytes([1, 0, 0]))})) for i in range(10)
+        ]
+        for f in [scalar_f, iter_f]:
+            result = df.select(f(col("map_of_v")).alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+    def test_udf_with_variant_nested_output(self):
+        # struct<variant>
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_f = pandas_udf(
+            lambda u: pd.DataFrame(
+                {"v": u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0])))}
+            ),
+            StructType([StructField("v", VariantType(), True)]),
+        )
+        iter_f = pandas_udf(
+            lambda it: map(
+                lambda u: pd.DataFrame(
+                    {"v": u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0])))}
+                ),
+                it,
+            ),
+            StructType([StructField("v", VariantType(), True)]),
+            PandasUDFType.SCALAR_ITER,
+        )
+        expected = [Row(udf=f"{{{i}}}") for i in range(10)]
+        for f in [scalar_f, iter_f]:
+            result = self.spark.range(10).select(f(col("id")).cast("string").alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+        # array<variant>
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_f = pandas_udf(
+            lambda u: u.apply(lambda i: [VariantVal(bytes([12, i]), bytes([1, 0, 0]))]),
+            ArrayType(VariantType()),
+        )
+        iter_f = pandas_udf(
+            lambda it: map(
+                lambda u: u.apply(lambda i: [VariantVal(bytes([12, i]), bytes([1, 0, 0]))]), it
+            ),
+            ArrayType(VariantType()),
+            PandasUDFType.SCALAR_ITER,
+        )
+        expected = [Row(udf=f"[{i}]") for i in range(10)]
+        for f in [scalar_f, iter_f]:
+            result = self.spark.range(10).select(f(col("id")).cast("string").alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+        # map<string, variant>
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_f = pandas_udf(
+            lambda u: u.apply(lambda i: {"v": VariantVal(bytes([12, i]), bytes([1, 0, 0]))}),
+            MapType(StringType(), VariantType()),
+        )
+        iter_f = pandas_udf(
+            lambda it: map(
+                lambda u: u.apply(lambda i: {"v": VariantVal(bytes([12, i]), bytes([1, 0, 0]))}), it
+            ),
+            MapType(StringType(), VariantType()),
+            PandasUDFType.SCALAR_ITER,
+        )
+        expected = [Row(udf=f"{{v -> {i}}}") for i in range(10)]
+        for f in [scalar_f, iter_f]:
+            result = self.spark.range(10).select(f(col("id")).cast("string").alias("udf")).collect()
+            self.assertEqual(result, expected)
+
+    def test_chained_udfs_with_variant(self):
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_first = pandas_udf(
+            lambda u: u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0]))), VariantType()
+        )
+        iter_first = pandas_udf(
+            lambda it: map(
+                lambda u: u.apply(lambda i: VariantVal(bytes([12, i]), bytes([1, 0, 0]))), it
+            ),
+            VariantType(),
+            PandasUDFType.SCALAR_ITER,
+        )
+        scalar_second = pandas_udf(lambda u: u.apply(str), StringType(), PandasUDFType.SCALAR)
+        iter_second = pandas_udf(
+            lambda it: map(lambda u: u.apply(str), it), StringType(), PandasUDFType.SCALAR_ITER
+        )
+
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+
+        for f in [scalar_first, iter_first]:
+            for s in [scalar_second, iter_second]:
+                result = self.spark.range(10).select(s(f(col("id"))).alias("udf")).collect()
+                self.assertEqual(result, expected)
+
+    def test_chained_udfs_with_complex_variant(self):
+        # Variants representing the int8 value i.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        scalar_first = pandas_udf(
+            lambda u: u.apply(lambda i: [VariantVal(bytes([12, i]), bytes([1, 0, 0]))]),
+            ArrayType(VariantType()),
+        )
+        iter_first = pandas_udf(
+            lambda it: map(
+                lambda u: u.apply(lambda i: [VariantVal(bytes([12, i]), bytes([1, 0, 0]))]), it
+            ),
+            ArrayType(VariantType()),
+            PandasUDFType.SCALAR_ITER,
+        )
+        scalar_second = pandas_udf(
+            lambda u: u.apply(lambda v: str(v[0])), StringType(), PandasUDFType.SCALAR
+        )
+        iter_second = pandas_udf(
+            lambda it: map(lambda u: u.apply(lambda v: str(v[0])), it),
+            StringType(),
+            PandasUDFType.SCALAR_ITER,
+        )
+
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+
+        for f in [scalar_first, iter_first]:
+            for s in [scalar_second, iter_second]:
+                result = self.spark.range(10).select(s(f(col("id"))).alias("udf")).collect()
+                self.assertEqual(result, expected)
+
+    def test_udafs_with_variant_input(self):
+        df = self.spark.range(0, 10).selectExpr("parse_json(cast(id as string)) v")
+
+        @pandas_udf("double")
+        def f(u: pd.Series) -> float:
+            return u.apply(lambda v: len(str(v))).mean()
+
+        expected = [Row(udf=1)]
+        result = df.select(f(col("v")).alias("udf")).collect()
+        self.assertEqual(result, expected)
+
+    def test_udafs_with_complex_variant_input(self):
+        # struct<v variant>
+        df = self.spark.range(0, 10).selectExpr("named_struct('v', parse_json(id::string)) s")
+
+        @pandas_udf("double")
+        def f(u: pd.Series) -> float:
+            return u.apply(lambda s: len(str(s["v"]))).mean()
+
+        expected = [Row(udf=1)]
+        result = df.select(f(col("s")).alias("udf")).collect()
+        self.assertEqual(result, expected)
+
+        # array<variant>
+        df = self.spark.range(0, 10).selectExpr("array(parse_json(id::string)) a")
+
+        @pandas_udf("double")
+        def f(u: pd.Series) -> float:
+            return u.apply(lambda s: len(str(s[0]))).mean() + 1
+
+        expected = [Row(udf=2)]
+        result = df.select(f(col("a")).alias("udf")).collect()
+        self.assertEqual(result, expected)
+
+        # map<string, variant>
+        df = self.spark.range(0, 10).selectExpr("map('v', parse_json(id::string)) m")
+
+        @pandas_udf("double")
+        def f(u: pd.Series) -> float:
+            return u.apply(lambda s: len(str(s["v"]))).mean() + 2
+
+        expected = [Row(udf=3)]
+        result = df.select(f(col("m")).alias("udf")).collect()
+        self.assertEqual(result, expected)
+
+    def test_udafs_with_variant_output(self):
+        @pandas_udf("variant")
+        def f(u: pd.Series) -> VariantVal:
+            return VariantVal(bytes([12, int(u.mean())]), bytes([1, 0, 0]))
+
+        result = self.spark.range(0, 10).select(to_json(f(col("id"))).alias("udf")).collect()
+        expected = [Row(udf="4")]
+        self.assertEqual(result, expected)
+
+    def test_udafs_with_complex_variant_output(self):
+        # struct is not support as the return type for PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
+        # UDFs yet.
+
+        # array<variant>
+        @pandas_udf("array<variant>")
+        def f(u: pd.Series) -> list:
+            return [VariantVal(bytes([12, int(u.mean())]), bytes([1, 0, 0]))]
+
+        result = self.spark.range(0, 10).select(to_json(f(col("id"))).alias("udf")).collect()
+        expected = [Row(udf="[4]")]
+        self.assertEqual(result, expected)
+
+        # map<string, variant>
+        @pandas_udf("map<string, variant>")
+        def f(u: pd.Series) -> dict:
+            return {"v": VariantVal(bytes([12, int(u.mean())]), bytes([1, 0, 0]))}
+
+        result = self.spark.range(0, 10).select(to_json(f(col("id"))).alias("udf")).collect()
+        expected = [Row(udf='{"v":4}')]
+        self.assertEqual(result, expected)
 
     def test_vectorized_udf_decorator(self):
         df = self.spark.range(10)
diff --git a/python/pyspark/sql/tests/plot/__init__.py b/python/pyspark/sql/tests/plot/__init__.py
new file mode 100644
index 0000000000000..cce3acad34a49
--- /dev/null
+++ b/python/pyspark/sql/tests/plot/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot.py b/python/pyspark/sql/tests/plot/test_frame_plot.py
new file mode 100644
index 0000000000000..c37aef5f7c94f
--- /dev/null
+++ b/python/pyspark/sql/tests/plot/test_frame_plot.py
@@ -0,0 +1,78 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from pyspark.errors import PySparkValueError
+from pyspark.sql import Row
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
+    have_plotly,
+    plotly_requirement_message,
+    have_pandas,
+    pandas_requirement_message,
+)
+
+if have_plotly and have_pandas:
+    from pyspark.sql.plot import PySparkSampledPlotBase, PySparkTopNPlotBase
+
+
+@unittest.skipIf(
+    not have_plotly or not have_pandas, plotly_requirement_message or pandas_requirement_message
+)
+class DataFramePlotTestsMixin:
+    def test_backend(self):
+        accessor = self.spark.range(2).plot
+        backend = accessor._get_plot_backend()
+        self.assertEqual(backend.__name__, "pyspark.sql.plot.plotly")
+
+        with self.assertRaises(PySparkValueError) as pe:
+            accessor._get_plot_backend("matplotlib")
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="UNSUPPORTED_PLOT_BACKEND",
+            messageParameters={"backend": "matplotlib", "supported_backends": "plotly"},
+        )
+
+    def test_topn_max_rows(self):
+        with self.sql_conf({"spark.sql.pyspark.plotting.max_rows": "1000"}):
+            self.spark.conf.set("spark.sql.pyspark.plotting.max_rows", "1000")
+            sdf = self.spark.range(2500)
+            pdf = PySparkTopNPlotBase().get_top_n(sdf)
+            self.assertEqual(len(pdf), 1000)
+
+    def test_sampled_plot_with_max_rows(self):
+        data = [Row(a=i, b=i + 1, c=i + 2, d=i + 3) for i in range(2000)]
+        sdf = self.spark.createDataFrame(data)
+        pdf = PySparkSampledPlotBase().get_sampled(sdf)
+        self.assertEqual(round(len(pdf) / 2000, 1), 0.5)
+
+
+class DataFramePlotTests(DataFramePlotTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.plot.test_frame_plot import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
new file mode 100644
index 0000000000000..fd264c3488823
--- /dev/null
+++ b/python/pyspark/sql/tests/plot/test_frame_plot_plotly.py
@@ -0,0 +1,497 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+from datetime import datetime
+
+from pyspark.errors import PySparkTypeError, PySparkValueError
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
+    have_plotly,
+    plotly_requirement_message,
+    have_pandas,
+    pandas_requirement_message,
+)
+
+if have_plotly and have_pandas:
+    import pyspark.sql.plot  # noqa: F401
+
+
+@unittest.skipIf(
+    not have_plotly or not have_pandas, plotly_requirement_message or pandas_requirement_message
+)
+class DataFramePlotPlotlyTestsMixin:
+    @property
+    def sdf(self):
+        data = [("A", 10, 1.5), ("B", 30, 2.5), ("C", 20, 3.5)]
+        columns = ["category", "int_val", "float_val"]
+        return self.spark.createDataFrame(data, columns)
+
+    @property
+    def sdf2(self):
+        data = [(5.1, 3.5, "0"), (4.9, 3.0, "0"), (7.0, 3.2, "1"), (6.4, 3.2, "1"), (5.9, 3.0, "2")]
+        columns = ["length", "width", "species"]
+        return self.spark.createDataFrame(data, columns)
+
+    @property
+    def sdf3(self):
+        data = [
+            (3, 5, 20, datetime(2018, 1, 31)),
+            (2, 5, 42, datetime(2018, 2, 28)),
+            (3, 6, 28, datetime(2018, 3, 31)),
+            (9, 12, 62, datetime(2018, 4, 30)),
+        ]
+        columns = ["sales", "signups", "visits", "date"]
+        return self.spark.createDataFrame(data, columns)
+
+    @property
+    def sdf4(self):
+        data = [
+            ("A", 50, 55),
+            ("B", 55, 60),
+            ("C", 60, 65),
+            ("D", 65, 70),
+            ("E", 70, 75),
+            # outliers
+            ("F", 10, 15),
+            ("G", 85, 90),
+            ("H", 5, 150),
+        ]
+        columns = ["student", "math_score", "english_score"]
+        return self.spark.createDataFrame(data, columns)
+
+    def _check_fig_data(self, fig_data, **kwargs):
+        for key, expected_value in kwargs.items():
+            if key in ["x", "y", "labels", "values"]:
+                converted_values = [v.item() if hasattr(v, "item") else v for v in fig_data[key]]
+                self.assertEqual(converted_values, expected_value)
+            else:
+                self.assertEqual(fig_data[key], expected_value)
+
+    def test_line_plot(self):
+        # single column as vertical axis
+        fig = self.sdf.plot(kind="line", x="category", y="int_val")
+        expected_fig_data = {
+            "mode": "lines",
+            "name": "",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        # multiple columns as vertical axis
+        fig = self.sdf.plot.line(x="category", y=["int_val", "float_val"])
+        expected_fig_data = {
+            "mode": "lines",
+            "name": "int_val",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "mode": "lines",
+            "name": "float_val",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [1.5, 2.5, 3.5],
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data)
+
+    def test_bar_plot(self):
+        # single column as vertical axis
+        fig = self.sdf.plot(kind="bar", x="category", y="int_val")
+        expected_fig_data = {
+            "name": "",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        # multiple columns as vertical axis
+        fig = self.sdf.plot.bar(x="category", y=["int_val", "float_val"])
+        expected_fig_data = {
+            "name": "int_val",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "name": "float_val",
+            "orientation": "v",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [1.5, 2.5, 3.5],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data)
+
+    def test_barh_plot(self):
+        # single column as vertical axis
+        fig = self.sdf.plot(kind="barh", x="category", y="int_val")
+        expected_fig_data = {
+            "name": "",
+            "orientation": "h",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        # multiple columns as vertical axis
+        fig = self.sdf.plot.barh(x="category", y=["int_val", "float_val"])
+        expected_fig_data = {
+            "name": "int_val",
+            "orientation": "h",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [10, 30, 20],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "name": "float_val",
+            "orientation": "h",
+            "x": ["A", "B", "C"],
+            "xaxis": "x",
+            "y": [1.5, 2.5, 3.5],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data)
+
+        # multiple columns as horizontal axis
+        fig = self.sdf.plot.barh(x=["int_val", "float_val"], y="category")
+        expected_fig_data = {
+            "name": "int_val",
+            "orientation": "h",
+            "y": ["A", "B", "C"],
+            "xaxis": "x",
+            "x": [10, 30, 20],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "name": "float_val",
+            "orientation": "h",
+            "y": ["A", "B", "C"],
+            "xaxis": "x",
+            "x": [1.5, 2.5, 3.5],
+            "yaxis": "y",
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data)
+
+    def test_scatter_plot(self):
+        fig = self.sdf2.plot(kind="scatter", x="length", y="width")
+        expected_fig_data = {
+            "name": "",
+            "orientation": "v",
+            "x": [5.1, 4.9, 7.0, 6.4, 5.9],
+            "xaxis": "x",
+            "y": [3.5, 3.0, 3.2, 3.2, 3.0],
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "name": "",
+            "orientation": "v",
+            "y": [5.1, 4.9, 7.0, 6.4, 5.9],
+            "xaxis": "x",
+            "x": [3.5, 3.0, 3.2, 3.2, 3.0],
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        fig = self.sdf2.plot.scatter(x="width", y="length")
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+    def test_area_plot(self):
+        # single column as vertical axis
+        fig = self.sdf3.plot(kind="area", x="date", y="sales")
+        expected_x = [
+            datetime(2018, 1, 31, 0, 0),
+            datetime(2018, 2, 28, 0, 0),
+            datetime(2018, 3, 31, 0, 0),
+            datetime(2018, 4, 30, 0, 0),
+        ]
+        expected_fig_data = {
+            "name": "",
+            "orientation": "v",
+            "x": expected_x,
+            "xaxis": "x",
+            "y": [3, 2, 3, 9],
+            "yaxis": "y",
+            "mode": "lines",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        # multiple columns as vertical axis
+        fig = self.sdf3.plot.area(x="date", y=["sales", "signups", "visits"])
+        expected_fig_data = {
+            "name": "sales",
+            "orientation": "v",
+            "x": expected_x,
+            "xaxis": "x",
+            "y": [3, 2, 3, 9],
+            "yaxis": "y",
+            "mode": "lines",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+        expected_fig_data = {
+            "name": "signups",
+            "orientation": "v",
+            "x": expected_x,
+            "xaxis": "x",
+            "y": [5, 5, 6, 12],
+            "yaxis": "y",
+            "mode": "lines",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data)
+        expected_fig_data = {
+            "name": "visits",
+            "orientation": "v",
+            "x": expected_x,
+            "xaxis": "x",
+            "y": [20, 42, 28, 62],
+            "yaxis": "y",
+            "mode": "lines",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][2], **expected_fig_data)
+
+    def test_pie_plot(self):
+        fig = self.sdf3.plot(kind="pie", x="date", y="sales")
+        expected_x = [
+            datetime(2018, 1, 31, 0, 0),
+            datetime(2018, 2, 28, 0, 0),
+            datetime(2018, 3, 31, 0, 0),
+            datetime(2018, 4, 30, 0, 0),
+        ]
+        expected_fig_data = {
+            "name": "",
+            "labels": expected_x,
+            "values": [3, 2, 3, 9],
+            "type": "pie",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        # y is not a numerical column
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.sdf.plot.pie(x="int_val", y="category")
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="PLOT_NOT_NUMERIC_COLUMN_ARGUMENT",
+            messageParameters={"arg_name": "y", "arg_type": "StringType"},
+        )
+
+    def test_box_plot(self):
+        fig = self.sdf4.plot.box(column="math_score")
+        expected_fig_data1 = {
+            "boxpoints": "suspectedoutliers",
+            "lowerfence": (5,),
+            "mean": (50.0,),
+            "median": (55,),
+            "name": "math_score",
+            "notched": False,
+            "q1": (10,),
+            "q3": (65,),
+            "upperfence": (85,),
+            "x": [0],
+            "type": "box",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+
+        fig = self.sdf4.plot(kind="box", column=["math_score", "english_score"])
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        expected_fig_data2 = {
+            "boxpoints": "suspectedoutliers",
+            "lowerfence": (55,),
+            "mean": (72.5,),
+            "median": (65,),
+            "name": "english_score",
+            "notched": False,
+            "q1": (55,),
+            "q3": (75,),
+            "upperfence": (90,),
+            "x": [1],
+            "y": [[150, 15]],
+            "type": "box",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+
+        fig = self.sdf4.plot(kind="box")
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+
+        with self.assertRaises(PySparkValueError) as pe:
+            self.sdf4.plot.box(column="math_score", boxpoints=True)
+        self.check_error(
+            exception=pe.exception,
+            errorClass="UNSUPPORTED_PLOT_BACKEND_PARAM",
+            messageParameters={
+                "backend": "plotly",
+                "param": "boxpoints",
+                "value": "True",
+                "supported_values": ", ".join(["suspectedoutliers", "False"]),
+            },
+        )
+        with self.assertRaises(PySparkValueError) as pe:
+            self.sdf4.plot.box(column="math_score", notched=True)
+        self.check_error(
+            exception=pe.exception,
+            errorClass="UNSUPPORTED_PLOT_BACKEND_PARAM",
+            messageParameters={
+                "backend": "plotly",
+                "param": "notched",
+                "value": "True",
+                "supported_values": ", ".join(["False"]),
+            },
+        )
+
+    def test_kde_plot(self):
+        fig = self.sdf4.plot.kde(column="math_score", bw_method=0.3, ind=5)
+        expected_fig_data1 = {
+            "mode": "lines",
+            "name": "math_score",
+            "orientation": "v",
+            "xaxis": "x",
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+
+        fig = self.sdf4.plot.kde(column=["math_score", "english_score"], bw_method=0.3, ind=5)
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        expected_fig_data2 = {
+            "mode": "lines",
+            "name": "english_score",
+            "orientation": "v",
+            "xaxis": "x",
+            "yaxis": "y",
+            "type": "scatter",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+        self.assertEqual(list(fig["data"][0]["x"]), list(fig["data"][1]["x"]))
+
+        fig = self.sdf4.plot.kde(bw_method=0.3, ind=5)
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+        self.assertEqual(list(fig["data"][0]["x"]), list(fig["data"][1]["x"]))
+
+    def test_hist_plot(self):
+        fig = self.sdf2.plot.hist(column="length", bins=4)
+        expected_fig_data = {
+            "name": "length",
+            "x": [5.1625000000000005, 5.6875, 6.2125, 6.7375],
+            "y": [2, 1, 1, 1],
+            "text": ("[4.9, 5.425)", "[5.425, 5.95)", "[5.95, 6.475)", "[6.475, 7.0]"),
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data)
+
+        fig = self.sdf2.plot.hist(column=["length", "width"], bins=4)
+        expected_fig_data1 = {
+            "name": "length",
+            "x": [3.5, 4.5, 5.5, 6.5],
+            "y": [0, 1, 2, 2],
+            "text": ("[3.0, 4.0)", "[4.0, 5.0)", "[5.0, 6.0)", "[6.0, 7.0]"),
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        expected_fig_data2 = {
+            "name": "width",
+            "x": [3.5, 4.5, 5.5, 6.5],
+            "y": [5, 0, 0, 0],
+            "text": ("[3.0, 4.0)", "[4.0, 5.0)", "[5.0, 6.0)", "[6.0, 7.0]"),
+            "type": "bar",
+        }
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+
+        fig = self.sdf2.plot.hist(bins=4)
+        self._check_fig_data(fig["data"][0], **expected_fig_data1)
+        self._check_fig_data(fig["data"][1], **expected_fig_data2)
+
+    def test_process_column_param_errors(self):
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.sdf4.plot.box(column="math_scor")
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="PLOT_INVALID_TYPE_COLUMN",
+            messageParameters={
+                "col_name": "math_scor",
+                "valid_types": "NumericType",
+                "col_type": "None",
+            },
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            self.sdf4.plot.box(column="student")
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="PLOT_INVALID_TYPE_COLUMN",
+            messageParameters={
+                "col_name": "student",
+                "valid_types": "NumericType",
+                "col_type": "StringType",
+            },
+        )
+
+
+class DataFramePlotPlotlyTests(DataFramePlotPlotlyTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.plot.test_frame_plot_plotly import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/streaming/test_streaming.py b/python/pyspark/sql/tests/streaming/test_streaming.py
index e284d052d9ae2..a0e85c73aedf6 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming.py
@@ -29,7 +29,7 @@
 
 class StreamingTestsMixin:
     def test_streaming_query_functions_basic(self):
-        df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+        df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
         query = (
             df.writeStream.format("memory")
             .queryName("test_streaming_query_functions_basic")
@@ -43,8 +43,8 @@ def test_streaming_query_functions_basic(self):
             self.assertEqual(query.exception(), None)
             self.assertFalse(query.awaitTermination(1))
             query.processAllAvailable()
-            recentProgress = query.recentProgress
             lastProgress = query.lastProgress
+            recentProgress = query.recentProgress
             self.assertEqual(lastProgress["name"], query.name)
             self.assertEqual(lastProgress["id"], query.id)
             self.assertTrue(any(p == lastProgress for p in recentProgress))
@@ -59,6 +59,46 @@ def test_streaming_query_functions_basic(self):
         finally:
             query.stop()
 
+    def test_streaming_progress(self):
+        """
+        Should be able to access fields using attributes in lastProgress / recentProgress
+        e.g. q.lastProgress.id
+        """
+        df = self.spark.readStream.format("text").load("python/test_support/sql/streaming")
+        query = df.writeStream.format("noop").start()
+        try:
+            query.processAllAvailable()
+            lastProgress = query.lastProgress
+            recentProgress = query.recentProgress
+            self.assertEqual(lastProgress["name"], query.name)
+            # Return str when accessed using dict get.
+            self.assertEqual(lastProgress["id"], query.id)
+            # SPARK-48567 Use attribute to access fields in q.lastProgress
+            self.assertEqual(lastProgress.name, query.name)
+            # Return uuid when accessed using attribute.
+            self.assertEqual(str(lastProgress.id), query.id)
+            self.assertTrue(any(p == lastProgress for p in recentProgress))
+            self.assertTrue(lastProgress.numInputRows > 0)
+            # Also access source / sink progress with attributes
+            self.assertTrue(len(lastProgress.sources) > 0)
+            self.assertTrue(lastProgress.sources[0].numInputRows > 0)
+            self.assertTrue(lastProgress["sources"][0]["numInputRows"] > 0)
+            self.assertTrue(lastProgress.sink.numOutputRows > 0)
+            self.assertTrue(lastProgress["sink"]["numOutputRows"] > 0)
+            # In Python, for historical reasons, changing field value
+            # in StreamingQueryProgress is allowed.
+            new_name = "myNewQuery"
+            lastProgress["name"] = new_name
+            self.assertEqual(lastProgress.name, new_name)
+
+        except Exception as e:
+            self.fail(
+                "Streaming query functions sanity check shouldn't throw any error. "
+                "Error message: " + str(e)
+            )
+        finally:
+            query.stop()
+
     def test_streaming_query_name_edge_case(self):
         # Query name should be None when not specified
         q1 = self.spark.readStream.format("rate").load().writeStream.format("noop").start()
@@ -389,6 +429,31 @@ def test_streaming_write_to_table(self):
             result = self.spark.sql("SELECT value FROM output_table").collect()
             self.assertTrue(len(result) > 0)
 
+    def test_streaming_write_to_table_cluster_by(self):
+        with self.table("output_table"), tempfile.TemporaryDirectory(prefix="to_table") as tmpdir:
+            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+            q = df.writeStream.clusterBy("value").toTable(
+                "output_table", format="parquet", checkpointLocation=tmpdir
+            )
+            self.assertTrue(q.isActive)
+            time.sleep(10)
+            q.stop()
+            result = self.spark.sql("DESCRIBE output_table").collect()
+            self.assertEqual(
+                set(
+                    [
+                        Row(col_name="timestamp", data_type="timestamp", comment=None),
+                        Row(col_name="value", data_type="bigint", comment=None),
+                        Row(col_name="# Clustering Information", data_type="", comment=""),
+                        Row(col_name="# col_name", data_type="data_type", comment="comment"),
+                        Row(col_name="value", data_type="bigint", comment=None),
+                    ]
+                ),
+                set(result),
+            )
+            result = self.spark.sql("SELECT value FROM output_table").collect()
+            self.assertTrue(len(result) > 0)
+
     def test_streaming_with_temporary_view(self):
         """
         This verifies createOrReplaceTempView() works with a streaming dataframe. An SQL
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_foreach.py b/python/pyspark/sql/tests/streaming/test_streaming_foreach.py
index 5041fefff1909..b29338e7f59e7 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_foreach.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_foreach.py
@@ -220,9 +220,10 @@ def close(self, error):
         try:
             tester.run_streaming_query_on_writer(ForeachWriter(), 1)
             self.fail("bad writer did not fail the query")  # this is not expected
-        except StreamingQueryException:
-            # TODO: Verify whether original error message is inside the exception
-            pass
+        except StreamingQueryException as e:
+            err_msg = str(e)
+            self.assertTrue("test error" in err_msg)
+            self.assertTrue("FOREACH_USER_FUNCTION_ERROR" in err_msg)
 
         self.assertEqual(len(tester.process_events()), 0)  # no row was processed
         close_events = tester.close_events()
diff --git a/python/pyspark/sql/tests/streaming/test_streaming_listener.py b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
index 762fc335b56ad..51f62f56a7c54 100644
--- a/python/pyspark/sql/tests/streaming/test_streaming_listener.py
+++ b/python/pyspark/sql/tests/streaming/test_streaming_listener.py
@@ -51,7 +51,7 @@ def check_progress_event(self, event, is_stateful):
         self.assertTrue(isinstance(event, QueryProgressEvent))
         self.check_streaming_query_progress(event.progress, is_stateful)
 
-    def check_terminated_event(self, event, exception=None, error_class=None):
+    def check_terminated_event(self, event, exception=None, errorClass=None):
         """Check QueryTerminatedEvent"""
         self.assertTrue(isinstance(event, QueryTerminatedEvent))
         self.assertTrue(isinstance(event.id, uuid.UUID))
@@ -61,8 +61,8 @@ def check_terminated_event(self, event, exception=None, error_class=None):
         else:
             self.assertEqual(event.exception, None)
 
-        if error_class:
-            self.assertTrue(error_class in event.errorClassOnException)
+        if errorClass:
+            self.assertTrue(errorClass in event.errorClassOnException)
         else:
             self.assertEqual(event.errorClassOnException, None)
 
@@ -227,9 +227,9 @@ def onQueryTerminated(self, event):
                 "my_event", count(lit(1)).alias("rc"), count(col("error")).alias("erc")
             )
 
-            q = observed_ds.writeStream.format("console").start()
+            q = observed_ds.writeStream.format("noop").start()
 
-            while q.lastProgress is None or q.lastProgress["batchId"] == 0:
+            while q.lastProgress is None or q.lastProgress.batchId == 0:
                 q.awaitTermination(0.5)
 
             time.sleep(5)
@@ -241,6 +241,32 @@ def onQueryTerminated(self, event):
             q.stop()
             self.spark.streams.removeListener(error_listener)
 
+    def test_streaming_progress(self):
+        try:
+            # Test a fancier query with stateful operation and observed metrics
+            df = self.spark.readStream.format("rate").option("rowsPerSecond", 10).load()
+            df_observe = df.observe("my_event", count(lit(1)).alias("rc"))
+            df_stateful = df_observe.groupBy().count()  # make query stateful
+            q = (
+                df_stateful.writeStream.format("noop")
+                .queryName("test")
+                .outputMode("update")
+                .trigger(processingTime="5 seconds")
+                .start()
+            )
+
+            while q.lastProgress is None or q.lastProgress.batchId == 0:
+                q.awaitTermination(0.5)
+
+            q.stop()
+
+            self.check_streaming_query_progress(q.lastProgress, True)
+            for p in q.recentProgress:
+                self.check_streaming_query_progress(p, True)
+
+        finally:
+            q.stop()
+
 
 class StreamingListenerTests(StreamingListenerTestsMixin, ReusedSQLTestCase):
     def test_number_of_public_methods(self):
@@ -355,7 +381,12 @@ def verify(test_listener):
                     .start()
                 )
                 self.assertTrue(q.isActive)
-                time.sleep(10)
+                wait_count = 0
+                while progress_event is None or progress_event.progress.batchId == 0:
+                    q.awaitTermination(0.5)
+                    wait_count = wait_count + 1
+                    if wait_count > 100:
+                        self.fail("Not getting progress event after 50 seconds")
                 q.stop()
 
                 # Make sure all events are empty
diff --git a/python/pyspark/sql/tests/test_arrow.py b/python/pyspark/sql/tests/test_arrow.py
index c1a69c404086b..b71bdb1eece28 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -701,8 +701,8 @@ def check_createDataFrame_with_single_data_type(self):
 
                 self.check_error(
                     exception=pe.exception,
-                    error_class="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
-                    message_parameters={"data_type": "IntegerType()"},
+                    errorClass="UNSUPPORTED_DATA_TYPE_FOR_ARROW",
+                    messageParameters={"data_type": "IntegerType()"},
                 )
 
     def test_createDataFrame_does_not_modify_input(self):
diff --git a/python/pyspark/sql/tests/test_arrow_python_udf.py b/python/pyspark/sql/tests/test_arrow_python_udf.py
index cdfab287a7706..a3fd8c01992ad 100644
--- a/python/pyspark/sql/tests/test_arrow_python_udf.py
+++ b/python/pyspark/sql/tests/test_arrow_python_udf.py
@@ -186,8 +186,8 @@ def test_err_return_type(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={
                 "feature": "Invalid return type with Arrow-optimized Python UDF: VarcharType(10)"
             },
         )
@@ -238,6 +238,20 @@ def tearDownClass(cls):
             super(PythonUDFArrowTests, cls).tearDownClass()
 
 
+class AsyncPythonUDFArrowTests(PythonUDFArrowTests):
+    @classmethod
+    def setUpClass(cls):
+        super(AsyncPythonUDFArrowTests, cls).setUpClass()
+        cls.spark.conf.set("spark.sql.execution.pythonUDF.arrow.concurrency.level", "4")
+
+    @classmethod
+    def tearDownClass(cls):
+        try:
+            cls.spark.conf.unset("spark.sql.execution.pythonUDF.arrow.concurrency.level")
+        finally:
+            super(AsyncPythonUDFArrowTests, cls).tearDownClass()
+
+
 if __name__ == "__main__":
     from pyspark.sql.tests.test_arrow_python_udf import *  # noqa: F401
 
diff --git a/python/pyspark/sql/tests/test_catalog.py b/python/pyspark/sql/tests/test_catalog.py
index bc6bfdd2759f2..5698195d8af1d 100644
--- a/python/pyspark/sql/tests/test_catalog.py
+++ b/python/pyspark/sql/tests/test_catalog.py
@@ -367,6 +367,7 @@ def test_list_columns(self):
                         nullable=True,
                         isPartition=False,
                         isBucket=False,
+                        isCluster=False,
                     ),
                 )
                 self.assertEqual(
@@ -378,6 +379,7 @@ def test_list_columns(self):
                         nullable=True,
                         isPartition=False,
                         isBucket=False,
+                        isCluster=False,
                     ),
                 )
                 columns2 = sorted(
@@ -393,6 +395,7 @@ def test_list_columns(self):
                         nullable=True,
                         isPartition=False,
                         isBucket=False,
+                        isCluster=False,
                     ),
                 )
                 self.assertEqual(
@@ -404,6 +407,7 @@ def test_list_columns(self):
                         nullable=True,
                         isPartition=False,
                         isBucket=False,
+                        isCluster=False,
                     ),
                 )
                 self.assertRaisesRegex(
diff --git a/python/pyspark/sql/tests/test_column.py b/python/pyspark/sql/tests/test_column.py
index ea17febc00e38..5f1991973d27d 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -16,12 +16,16 @@
 # limitations under the License.
 #
 
+from enum import Enum
 from itertools import chain
+import datetime
+import unittest
+
 from pyspark.sql import Column, Row
 from pyspark.sql import functions as sf
 from pyspark.sql.types import StructType, StructField, IntegerType, LongType
 from pyspark.errors import AnalysisException, PySparkTypeError, PySparkValueError
-from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.sqlutils import ReusedSQLTestCase, have_pandas, pandas_requirement_message
 
 
 class ColumnTestsMixin:
@@ -53,8 +57,8 @@ def test_validate_column_types(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "col", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "col", "arg_type": "int"},
         )
 
         class A:
@@ -68,8 +72,8 @@ class A:
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "col", "arg_type": "NoneType"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "col", "arg_type": "NoneType"},
         )
         self.assertRaises(TypeError, lambda: to_json(1))
 
@@ -94,6 +98,14 @@ def test_column_operators(self):
             cs.startswith("a"),
             cs.endswith("a"),
             ci.eqNullSafe(cs),
+            sf.col("b") & sf.lit(True),
+            sf.col("b") & True,
+            sf.lit(True) & sf.col("b"),
+            True & sf.col("b"),
+            sf.col("b") | sf.lit(True),
+            sf.col("b") | True,
+            sf.lit(True) | sf.col("b"),
+            True | sf.col("b"),
         )
         self.assertTrue(all(isinstance(c, Column) for c in css))
         self.assertTrue(isinstance(ci.cast(LongType()), Column))
@@ -177,8 +189,8 @@ def test_with_field(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "col", "arg_type": "int"},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "col", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -186,8 +198,8 @@ def test_with_field(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "fieldName", "arg_type": "Column"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "fieldName", "arg_type": "Column"},
         )
 
     def test_drop_fields(self):
@@ -224,8 +236,8 @@ def test_alias_negative(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
-            message_parameters={"arg_name": "metadata"},
+            errorClass="ONLY_ALLOWED_FOR_SINGLE_COLUMN",
+            messageParameters={"arg_name": "metadata"},
         )
 
     def test_cast_str_representation(self):
@@ -245,8 +257,8 @@ def test_cast_negative(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "dataType", "arg_type": "int"},
+            errorClass="NOT_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "dataType", "arg_type": "int"},
         )
 
     def test_over_negative(self):
@@ -255,8 +267,8 @@ def test_over_negative(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_WINDOWSPEC",
-            message_parameters={"arg_name": "window", "arg_type": "int"},
+            errorClass="NOT_WINDOWSPEC",
+            messageParameters={"arg_name": "window", "arg_type": "int"},
         )
 
     def test_eqnullsafe_classmethod_usage(self):
@@ -271,6 +283,168 @@ def test_expr_str_representation(self):
         when_cond = sf.when(expression, sf.lit(None))
         self.assertEqual(str(when_cond), "Column<'CASE WHEN foo THEN NULL END'>")
 
+    def test_col_field_ops_representation(self):
+        # SPARK-49894: Test string representation of columns
+        c = sf.col("c")
+
+        # getField
+        self.assertEqual(str(c.x), "Column<'c['x']'>")
+        self.assertEqual(str(c.x.y), "Column<'c['x']['y']'>")
+        self.assertEqual(str(c.x.y.z), "Column<'c['x']['y']['z']'>")
+
+        self.assertEqual(str(c["x"]), "Column<'c['x']'>")
+        self.assertEqual(str(c["x"]["y"]), "Column<'c['x']['y']'>")
+        self.assertEqual(str(c["x"]["y"]["z"]), "Column<'c['x']['y']['z']'>")
+
+        self.assertEqual(str(c.getField("x")), "Column<'c['x']'>")
+        self.assertEqual(
+            str(c.getField("x").getField("y")),
+            "Column<'c['x']['y']'>",
+        )
+        self.assertEqual(
+            str(c.getField("x").getField("y").getField("z")),
+            "Column<'c['x']['y']['z']'>",
+        )
+
+        self.assertEqual(str(c.getItem("x")), "Column<'c['x']'>")
+        self.assertEqual(
+            str(c.getItem("x").getItem("y")),
+            "Column<'c['x']['y']'>",
+        )
+        self.assertEqual(
+            str(c.getItem("x").getItem("y").getItem("z")),
+            "Column<'c['x']['y']['z']'>",
+        )
+
+        self.assertEqual(
+            str(c.x["y"].getItem("z")),
+            "Column<'c['x']['y']['z']'>",
+        )
+        self.assertEqual(
+            str(c["x"].getField("y").getItem("z")),
+            "Column<'c['x']['y']['z']'>",
+        )
+        self.assertEqual(
+            str(c.getField("x").getItem("y").z),
+            "Column<'c['x']['y']['z']'>",
+        )
+        self.assertEqual(
+            str(c["x"].y.getField("z")),
+            "Column<'c['x']['y']['z']'>",
+        )
+
+        # WithField
+        self.assertEqual(
+            str(c.withField("x", sf.col("y"))),
+            "Column<'update_field(c, x, y)'>",
+        )
+        self.assertEqual(
+            str(c.withField("x", sf.col("y")).withField("x", sf.col("z"))),
+            "Column<'update_field(update_field(c, x, y), x, z)'>",
+        )
+
+        # DropFields
+        self.assertEqual(str(c.dropFields("x")), "Column<'drop_field(c, x)'>")
+        self.assertEqual(
+            str(c.dropFields("x", "y")),
+            "Column<'drop_field(drop_field(c, x), y)'>",
+        )
+        self.assertEqual(
+            str(c.dropFields("x", "y", "z")),
+            "Column<'drop_field(drop_field(drop_field(c, x), y), z)'>",
+        )
+
+    def test_lit_time_representation(self):
+        dt = datetime.date(2021, 3, 4)
+        self.assertEqual(str(sf.lit(dt)), "Column<'2021-03-04'>")
+
+        ts = datetime.datetime(2021, 3, 4, 12, 34, 56, 1234)
+        self.assertEqual(str(sf.lit(ts)), "Column<'2021-03-04 12:34:56.001234'>")
+
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)
+    def test_lit_delta_representation(self):
+        for delta in [
+            datetime.timedelta(days=1),
+            datetime.timedelta(hours=2),
+            datetime.timedelta(minutes=3),
+            datetime.timedelta(seconds=4),
+            datetime.timedelta(microseconds=5),
+            datetime.timedelta(days=2, hours=21, microseconds=908),
+            datetime.timedelta(days=1, minutes=-3, microseconds=-1001),
+            datetime.timedelta(days=1, hours=2, minutes=3, seconds=4, microseconds=5),
+        ]:
+            import pandas as pd
+
+            # Column<'PT69H0.000908S'> or Column<'P2DT21H0M0.000908S'>
+            s = str(sf.lit(delta))
+
+            # Parse the ISO string representation and compare
+            self.assertTrue(pd.Timedelta(s[8:-2]).to_pytimedelta() == delta)
+
+    def test_enum_literals(self):
+        class IntEnum(Enum):
+            X = 1
+            Y = 2
+            Z = 3
+
+        class BoolEnum(Enum):
+            T = True
+
+        class StrEnum(Enum):
+            X = "x"
+
+        id = sf.col("id")
+        s = sf.col("s")
+        b = sf.col("b")
+
+        cols, expected = list(
+            zip(
+                (id + IntEnum.X, 2),
+                (id - IntEnum.X, 0),
+                (id * IntEnum.X, 1),
+                (id / IntEnum.X, 1.0),
+                (id % IntEnum.X, 0),
+                (IntEnum.X + id, 2),
+                (IntEnum.X - id, 0),
+                (IntEnum.X * id, 1),
+                (IntEnum.X / id, 1.0),
+                (IntEnum.X % id, 0),
+                (id**IntEnum.X, 1.0),
+                (IntEnum.X**id, 1, 0),
+                (id == IntEnum.X, True),
+                (IntEnum.X == id, True),
+                (id < IntEnum.X, False),
+                (id <= IntEnum.X, True),
+                (id >= IntEnum.X, True),
+                (id > IntEnum.X, False),
+                (id.eqNullSafe(IntEnum.X), True),
+                (b & BoolEnum.T, True),
+                (b | BoolEnum.T, True),
+                (BoolEnum.T & b, True),
+                (BoolEnum.T | b, True),
+                (id.bitwiseOR(IntEnum.X), 1),
+                (id.bitwiseAND(IntEnum.X), 1),
+                (id.bitwiseXOR(IntEnum.X), 0),
+                (id.contains(IntEnum.X), True),
+                (s.startswith(StrEnum.X), False),
+                (s.endswith(StrEnum.X), False),
+                (s.like(StrEnum.X), False),
+                (s.rlike(StrEnum.X), False),
+                (s.ilike(StrEnum.X), False),
+                (s.substr(IntEnum.X, IntEnum.Y), "1"),
+                (sf.when(b, IntEnum.X).when(~b, IntEnum.Y).otherwise(IntEnum.Z), 1),
+            )
+        )
+        result = (
+            self.spark.range(1, 2)
+            .select(id, id.astype("string").alias("s"), id.astype("boolean").alias("b"))
+            .select(*cols)
+            .first()
+        )
+
+        for r, c, e in zip(result, cols, expected):
+            self.assertEqual(r, e, str(c))
+
 
 class ColumnTests(ColumnTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_conf.py b/python/pyspark/sql/tests/test_conf.py
index a2a1be3401ad3..487f00430f818 100644
--- a/python/pyspark/sql/tests/test_conf.py
+++ b/python/pyspark/sql/tests/test_conf.py
@@ -69,8 +69,8 @@ def test_conf_with_python_objects(self):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="NOT_STR",
-                message_parameters={
+                errorClass="NOT_STR",
+                messageParameters={
                     "arg_name": "key",
                     "arg_type": "int",
                 },
diff --git a/python/pyspark/sql/tests/test_connect_compatibility.py b/python/pyspark/sql/tests/test_connect_compatibility.py
new file mode 100644
index 0000000000000..3d74e796cd7a0
--- /dev/null
+++ b/python/pyspark/sql/tests/test_connect_compatibility.py
@@ -0,0 +1,542 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import inspect
+import functools
+
+from pyspark.testing.connectutils import should_test_connect, connect_requirement_message
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.sql.classic.dataframe import DataFrame as ClassicDataFrame
+from pyspark.sql.classic.column import Column as ClassicColumn
+from pyspark.sql.session import SparkSession as ClassicSparkSession
+from pyspark.sql.catalog import Catalog as ClassicCatalog
+from pyspark.sql.readwriter import DataFrameReader as ClassicDataFrameReader
+from pyspark.sql.readwriter import DataFrameWriter as ClassicDataFrameWriter
+from pyspark.sql.readwriter import DataFrameWriterV2 as ClassicDataFrameWriterV2
+from pyspark.sql.window import Window as ClassicWindow
+from pyspark.sql.window import WindowSpec as ClassicWindowSpec
+import pyspark.sql.functions as ClassicFunctions
+from pyspark.sql.group import GroupedData as ClassicGroupedData
+import pyspark.sql.avro.functions as ClassicAvro
+import pyspark.sql.protobuf.functions as ClassicProtobuf
+from pyspark.sql.streaming.query import StreamingQuery as ClassicStreamingQuery
+from pyspark.sql.streaming.query import StreamingQueryManager as ClassicStreamingQueryManager
+from pyspark.sql.streaming.readwriter import DataStreamReader as ClassicDataStreamReader
+from pyspark.sql.streaming.readwriter import DataStreamWriter as ClassicDataStreamWriter
+
+if should_test_connect:
+    from pyspark.sql.connect.dataframe import DataFrame as ConnectDataFrame
+    from pyspark.sql.connect.column import Column as ConnectColumn
+    from pyspark.sql.connect.session import SparkSession as ConnectSparkSession
+    from pyspark.sql.connect.catalog import Catalog as ConnectCatalog
+    from pyspark.sql.connect.readwriter import DataFrameReader as ConnectDataFrameReader
+    from pyspark.sql.connect.readwriter import DataFrameWriter as ConnectDataFrameWriter
+    from pyspark.sql.connect.readwriter import DataFrameWriterV2 as ConnectDataFrameWriterV2
+    from pyspark.sql.connect.window import Window as ConnectWindow
+    from pyspark.sql.connect.window import WindowSpec as ConnectWindowSpec
+    import pyspark.sql.connect.functions as ConnectFunctions
+    from pyspark.sql.connect.group import GroupedData as ConnectGroupedData
+    import pyspark.sql.connect.avro.functions as ConnectAvro
+    import pyspark.sql.connect.protobuf.functions as ConnectProtobuf
+    from pyspark.sql.connect.streaming.query import StreamingQuery as ConnectStreamingQuery
+    from pyspark.sql.connect.streaming.query import (
+        StreamingQueryManager as ConnectStreamingQueryManager,
+    )
+    from pyspark.sql.connect.streaming.readwriter import DataStreamReader as ConnectDataStreamReader
+    from pyspark.sql.connect.streaming.readwriter import DataStreamWriter as ConnectDataStreamWriter
+
+
+class ConnectCompatibilityTestsMixin:
+    def get_public_methods(self, cls):
+        """Get public methods of a class."""
+        methods = {}
+        for name, method in inspect.getmembers(cls):
+            if (
+                inspect.isfunction(method) or isinstance(method, functools._lru_cache_wrapper)
+            ) and not name.startswith("_"):
+                if getattr(method, "_remote_only", False):
+                    methods[name] = None
+                else:
+                    methods[name] = method
+        return methods
+
+    def get_public_properties(self, cls):
+        """Get public properties of a class."""
+        return {
+            name: member
+            for name, member in inspect.getmembers(cls)
+            if (isinstance(member, property) or isinstance(member, functools.cached_property))
+            and not name.startswith("_")
+        }
+
+    def compare_method_signatures(self, classic_cls, connect_cls, cls_name):
+        """Compare method signatures between classic and connect classes."""
+        classic_methods = self.get_public_methods(classic_cls)
+        connect_methods = self.get_public_methods(connect_cls)
+
+        common_methods = set(classic_methods.keys()) & set(connect_methods.keys())
+
+        for method in common_methods:
+            # Skip non-callable, Spark Connect-specific methods
+            if classic_methods[method] is None or connect_methods[method] is None:
+                continue
+
+            classic_signature = inspect.signature(classic_methods[method])
+            connect_signature = inspect.signature(connect_methods[method])
+
+            # Cannot support RDD arguments from Spark Connect
+            has_rdd_arguments = ("createDataFrame", "xml", "json")
+            if method not in has_rdd_arguments:
+                self.assertEqual(
+                    classic_signature,
+                    connect_signature,
+                    f"Signature mismatch in {cls_name} method '{method}'\n"
+                    f"Classic: {classic_signature}\n"
+                    f"Connect: {connect_signature}",
+                )
+
+    def compare_property_lists(
+        self,
+        classic_cls,
+        connect_cls,
+        cls_name,
+        expected_missing_connect_properties,
+        expected_missing_classic_properties,
+    ):
+        """Compare properties between classic and connect classes."""
+        classic_properties = self.get_public_properties(classic_cls)
+        connect_properties = self.get_public_properties(connect_cls)
+
+        # Identify missing properties
+        classic_only_properties = set(classic_properties.keys()) - set(connect_properties.keys())
+        connect_only_properties = set(connect_properties.keys()) - set(classic_properties.keys())
+
+        # Compare the actual missing properties with the expected ones
+        self.assertEqual(
+            classic_only_properties,
+            expected_missing_connect_properties,
+            f"{cls_name}: Unexpected missing properties in Connect: {classic_only_properties}",
+        )
+
+        # Reverse compatibility check
+        self.assertEqual(
+            connect_only_properties,
+            expected_missing_classic_properties,
+            f"{cls_name}: Unexpected missing properties in Classic: {connect_only_properties}",
+        )
+
+    def check_missing_methods(
+        self,
+        classic_cls,
+        connect_cls,
+        cls_name,
+        expected_missing_connect_methods,
+        expected_missing_classic_methods,
+    ):
+        """Check for expected missing methods between classic and connect classes."""
+        classic_methods = self.get_public_methods(classic_cls)
+        connect_methods = self.get_public_methods(connect_cls)
+
+        # Identify missing methods
+        classic_only_methods = {
+            name
+            for name, method in classic_methods.items()
+            if name not in connect_methods or method is None
+        }
+        connect_only_methods = set(connect_methods.keys()) - set(classic_methods.keys())
+
+        # Compare the actual missing methods with the expected ones
+        self.assertEqual(
+            classic_only_methods,
+            expected_missing_connect_methods,
+            f"{cls_name}: Unexpected missing methods in Connect: {classic_only_methods}",
+        )
+
+        # Reverse compatibility check
+        self.assertEqual(
+            connect_only_methods,
+            expected_missing_classic_methods,
+            f"{cls_name}: Unexpected missing methods in Classic: {connect_only_methods}",
+        )
+
+    def check_compatibility(
+        self,
+        classic_cls,
+        connect_cls,
+        cls_name,
+        expected_missing_connect_properties,
+        expected_missing_classic_properties,
+        expected_missing_connect_methods,
+        expected_missing_classic_methods,
+    ):
+        """
+        Main method for checking compatibility between classic and connect.
+
+        This method performs the following checks:
+        - API signature comparison between classic and connect classes.
+        - Property comparison, identifying any missing properties between classic and connect.
+        - Method comparison, identifying any missing methods between classic and connect.
+
+        Parameters
+        ----------
+        classic_cls : type
+            The classic class to compare.
+        connect_cls : type
+            The connect class to compare.
+        cls_name : str
+            The name of the class.
+        expected_missing_connect_properties : set
+            A set of properties expected to be missing in the connect class.
+        expected_missing_classic_properties : set
+            A set of properties expected to be missing in the classic class.
+        expected_missing_connect_methods : set
+            A set of methods expected to be missing in the connect class.
+        expected_missing_classic_methods : set
+            A set of methods expected to be missing in the classic class.
+        """
+        self.compare_method_signatures(classic_cls, connect_cls, cls_name)
+        self.compare_property_lists(
+            classic_cls,
+            connect_cls,
+            cls_name,
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+        )
+        self.check_missing_methods(
+            classic_cls,
+            connect_cls,
+            cls_name,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_dataframe_compatibility(self):
+        """Test DataFrame compatibility between classic and connect."""
+        expected_missing_connect_properties = {"sql_ctx"}
+        expected_missing_classic_properties = {"is_cached"}
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataFrame,
+            ConnectDataFrame,
+            "DataFrame",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_column_compatibility(self):
+        """Test Column compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = {"to_plan"}
+        self.check_compatibility(
+            ClassicColumn,
+            ConnectColumn,
+            "Column",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_spark_session_compatibility(self):
+        """Test SparkSession compatibility between classic and connect."""
+        expected_missing_connect_properties = {"sparkContext"}
+        expected_missing_classic_properties = {"is_stopped", "session_id"}
+        expected_missing_connect_methods = {
+            "addArtifact",
+            "addArtifacts",
+            "addTag",
+            "clearProgressHandlers",
+            "clearTags",
+            "copyFromLocalToFs",
+            "getTags",
+            "interruptAll",
+            "interruptOperation",
+            "interruptTag",
+            "newSession",
+            "registerProgressHandler",
+            "removeProgressHandler",
+            "removeTag",
+        }
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicSparkSession,
+            ConnectSparkSession,
+            "SparkSession",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_catalog_compatibility(self):
+        """Test Catalog compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicCatalog,
+            ConnectCatalog,
+            "Catalog",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_dataframe_reader_compatibility(self):
+        """Test DataFrameReader compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataFrameReader,
+            ConnectDataFrameReader,
+            "DataFrameReader",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_dataframe_writer_compatibility(self):
+        """Test DataFrameWriter compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataFrameWriter,
+            ConnectDataFrameWriter,
+            "DataFrameWriter",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_dataframe_writer_v2_compatibility(self):
+        """Test DataFrameWriterV2 compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataFrameWriterV2,
+            ConnectDataFrameWriterV2,
+            "DataFrameWriterV2",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_window_compatibility(self):
+        """Test Window compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicWindow,
+            ConnectWindow,
+            "Window",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_window_spec_compatibility(self):
+        """Test WindowSpec compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicWindowSpec,
+            ConnectWindowSpec,
+            "WindowSpec",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_functions_compatibility(self):
+        """Test Functions compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = {"check_dependencies"}
+        self.check_compatibility(
+            ClassicFunctions,
+            ConnectFunctions,
+            "Functions",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_grouping_compatibility(self):
+        """Test Grouping compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = {"transformWithStateInPandas"}
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicGroupedData,
+            ConnectGroupedData,
+            "Grouping",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_avro_compatibility(self):
+        """Test Avro compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        # The current supported Avro functions are only `from_avro` and `to_avro`.
+        # The missing methods belows are just util functions that imported to implement them.
+        expected_missing_connect_methods = {
+            "try_remote_avro_functions",
+            "cast",
+            "get_active_spark_context",
+        }
+        expected_missing_classic_methods = {"lit", "check_dependencies"}
+        self.check_compatibility(
+            ClassicAvro,
+            ConnectAvro,
+            "Avro",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_streaming_query_compatibility(self):
+        """Test Streaming Query compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicStreamingQuery,
+            ConnectStreamingQuery,
+            "StreamingQuery",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_protobuf_compatibility(self):
+        """Test Protobuf compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        # The current supported Avro functions are only `from_protobuf` and `to_protobuf`.
+        # The missing methods belows are just util functions that imported to implement them.
+        expected_missing_connect_methods = {
+            "cast",
+            "try_remote_protobuf_functions",
+            "get_active_spark_context",
+        }
+        expected_missing_classic_methods = {"lit", "check_dependencies"}
+        self.check_compatibility(
+            ClassicProtobuf,
+            ConnectProtobuf,
+            "Protobuf",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_streaming_query_manager_compatibility(self):
+        """Test Streaming Query Manager compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = {"close"}
+        self.check_compatibility(
+            ClassicStreamingQueryManager,
+            ConnectStreamingQueryManager,
+            "StreamingQueryManager",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_streaming_reader_compatibility(self):
+        """Test Data Stream Reader compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataStreamReader,
+            ConnectDataStreamReader,
+            "DataStreamReader",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+    def test_streaming_writer_compatibility(self):
+        """Test Data Stream Writer compatibility between classic and connect."""
+        expected_missing_connect_properties = set()
+        expected_missing_classic_properties = set()
+        expected_missing_connect_methods = set()
+        expected_missing_classic_methods = set()
+        self.check_compatibility(
+            ClassicDataStreamWriter,
+            ConnectDataStreamWriter,
+            "DataStreamWriter",
+            expected_missing_connect_properties,
+            expected_missing_classic_properties,
+            expected_missing_connect_methods,
+            expected_missing_classic_methods,
+        )
+
+
+@unittest.skipIf(not should_test_connect, connect_requirement_message)
+class ConnectCompatibilityTests(ConnectCompatibilityTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_connect_compatibility import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_creation.py b/python/pyspark/sql/tests/test_creation.py
index c5a3934b25852..c6917aa234b41 100644
--- a/python/pyspark/sql/tests/test_creation.py
+++ b/python/pyspark/sql/tests/test_creation.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-import platform
 from decimal import Decimal
 import os
 import time
@@ -111,11 +110,7 @@ def test_create_dataframe_from_pandas_with_dst(self):
                 os.environ["TZ"] = orig_env_tz
             time.tzset()
 
-    # TODO(SPARK-43354): Re-enable test_create_dataframe_from_pandas_with_day_time_interval
-    @unittest.skipIf(
-        "pypy" in platform.python_implementation().lower() or not have_pandas,
-        "Fails in PyPy Python 3.8, should enable.",
-    )
+    @unittest.skipIf(not have_pandas, pandas_requirement_message)  # type: ignore
     def test_create_dataframe_from_pandas_with_day_time_interval(self):
         # SPARK-37277: Test DayTimeIntervalType in createDataFrame without Arrow.
         import pandas as pd
@@ -137,8 +132,8 @@ def test_invalid_argument_create_dataframe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_NONE_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": "int"},
+            errorClass="NOT_LIST_OR_NONE_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -146,8 +141,8 @@ def test_invalid_argument_create_dataframe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE",
-            message_parameters={"arg_name": "data", "arg_type": "DataFrame"},
+            errorClass="INVALID_TYPE",
+            messageParameters={"arg_name": "data", "arg_type": "DataFrame"},
         )
 
     def test_partial_inference_failure(self):
@@ -156,8 +151,8 @@ def test_partial_inference_failure(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_DETERMINE_TYPE",
-            message_parameters={},
+            errorClass="CANNOT_DETERMINE_TYPE",
+            messageParameters={},
         )
 
     @unittest.skipIf(
@@ -191,8 +186,8 @@ def test_schema_inference_from_pandas_with_dict(self):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="CANNOT_INFER_EMPTY_SCHEMA",
-                message_parameters={},
+                errorClass="CANNOT_INFER_EMPTY_SCHEMA",
+                messageParameters={},
             )
 
             # Dict has different types of values should fail
diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py
index 28ff19dc2e18e..cd6a57429cfa9 100644
--- a/python/pyspark/sql/tests/test_dataframe.py
+++ b/python/pyspark/sql/tests/test_dataframe.py
@@ -15,6 +15,8 @@
 # limitations under the License.
 #
 
+import glob
+import os
 import pydoc
 import shutil
 import tempfile
@@ -39,6 +41,7 @@
     PySparkTypeError,
     PySparkValueError,
 )
+from pyspark.testing import assertDataFrameEqual
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
     have_pyarrow,
@@ -46,6 +49,7 @@
     pandas_requirement_message,
     pyarrow_requirement_message,
 )
+from pyspark.testing.utils import SPARK_HOME
 
 
 class DataFrameTestsMixin:
@@ -62,8 +66,8 @@ def test_table(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "tableName", "arg_type": "NoneType"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "tableName", "arg_type": "NoneType"},
         )
 
     def test_dataframe_star(self):
@@ -114,7 +118,6 @@ def test_count_star(self):
 
         self.assertEqual(df3.select(count(df3["*"])).columns, ["count(1)"])
         self.assertEqual(df3.select(count(col("*"))).columns, ["count(1)"])
-        self.assertEqual(df3.select(count(col("s.*"))).columns, ["count(1)"])
 
     def test_self_join(self):
         df1 = self.spark.range(10).withColumn("a", lit(0))
@@ -131,6 +134,20 @@ def test_self_join_II(self):
         self.assertTrue(df3.columns, ["aa", "b", "a", "b"])
         self.assertTrue(df3.count() == 2)
 
+    def test_self_join_III(self):
+        df1 = self.spark.range(10).withColumn("value", lit(1))
+        df2 = df1.union(df1)
+        df3 = df1.join(df2, df1.id == df2.id, "left")
+        self.assertTrue(df3.columns, ["id", "value", "id", "value"])
+        self.assertTrue(df3.count() == 20)
+
+    def test_self_join_IV(self):
+        df1 = self.spark.range(10).withColumn("value", lit(1))
+        df2 = df1.withColumn("value", lit(2)).union(df1.withColumn("value", lit(3)))
+        df3 = df1.join(df2, df1.id == df2.id, "right")
+        self.assertTrue(df3.columns, ["id", "value", "id", "value"])
+        self.assertTrue(df3.count() == 20)
+
     def test_duplicated_column_names(self):
         df = self.spark.createDataFrame([(1, 2)], ["c", "c"])
         row = df.select("*").first()
@@ -217,8 +234,8 @@ def test_with_columns_renamed(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DICT",
-            message_parameters={"arg_name": "colsMap", "arg_type": "tuple"},
+            errorClass="NOT_DICT",
+            messageParameters={"arg_name": "colsMap", "arg_type": "tuple"},
         )
 
     def test_with_columns_renamed_with_duplicated_names(self):
@@ -253,22 +270,43 @@ def test_ordering_of_with_columns_renamed(self):
         self.assertEqual(df2.columns, ["a"])
 
     def test_drop_duplicates(self):
+        # SPARK-36034 test that drop duplicates throws a type error when in correct type provided
         df = self.spark.createDataFrame([("Alice", 50), ("Alice", 60)], ["name", "age"])
 
         # shouldn't drop a non-null row
         self.assertEqual(df.dropDuplicates().count(), 2)
 
         self.assertEqual(df.dropDuplicates(["name"]).count(), 1)
+
         self.assertEqual(df.dropDuplicates(["name", "age"]).count(), 2)
 
-        self.assertEqual(df.drop_duplicates(["name"]).count(), 1)
-        self.assertEqual(df.drop_duplicates(["name", "age"]).count(), 2)
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.dropDuplicates("name")
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NOT_LIST_OR_TUPLE",
+            messageParameters={"arg_name": "subset", "arg_type": "str"},
+        )
+
+        # Should raise proper error when taking non-string values
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.dropDuplicates([None]).show()
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "subset", "arg_type": "NoneType"},
+        )
+
+        with self.assertRaises(PySparkTypeError) as pe:
+            df.dropDuplicates([1]).show()
 
-        # SPARK-48482 dropDuplicates should take varargs
-        self.assertEqual(df.dropDuplicates("name").count(), 1)
-        self.assertEqual(df.dropDuplicates("name", "age").count(), 2)
-        self.assertEqual(df.drop_duplicates("name").count(), 1)
-        self.assertEqual(df.drop_duplicates("name", "age").count(), 2)
+        self.check_error(
+            exception=pe.exception,
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "subset", "arg_type": "int"},
+        )
 
     def test_drop_duplicates_with_ambiguous_reference(self):
         df1 = self.spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])
@@ -414,8 +452,8 @@ def test_sample(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL_OR_FLOAT_OR_INT",
-            message_parameters={
+            errorClass="NOT_BOOL_OR_FLOAT_OR_INT",
+            messageParameters={
                 "arg_name": "withReplacement (optional), fraction (required) and seed (optional)",
                 "arg_type": "NoneType, NoneType, NoneType",
             },
@@ -447,8 +485,8 @@ def test_toDF_with_string(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OF_STR",
-            message_parameters={"arg_name": "cols", "arg_type": "NoneType"},
+            errorClass="NOT_LIST_OF_STR",
+            messageParameters={"arg_name": "cols", "arg_type": "NoneType"},
         )
 
     def test_toDF_with_schema_string(self):
@@ -471,14 +509,16 @@ def test_toDF_with_schema_string(self):
 
         # number of fields must match.
         self.assertRaisesRegex(
-            Exception, "FIELD_STRUCT_LENGTH_MISMATCH", lambda: rdd.toDF("key: int").collect()
+            Exception,
+            "FIELD_STRUCT_LENGTH_MISMATCH",
+            lambda: rdd.coalesce(1).toDF("key: int").collect(),
         )
 
         # field types mismatch will cause exception at runtime.
         self.assertRaisesRegex(
             Exception,
             "FIELD_DATA_TYPE_UNACCEPTABLE",
-            lambda: rdd.toDF("key: float, value: string").collect(),
+            lambda: rdd.coalesce(1).toDF("key: float, value: string").collect(),
         )
 
         # flat schema values will be wrapped into row.
@@ -636,8 +676,7 @@ def test_repr_behaviors(self):
                     |+---+-----+
                     ||  1|    1|
                     |+---+-----+
-                    |only showing top 1 row
-                    |"""
+                    |only showing top 1 row"""
                     self.assertEqual(re.sub(pattern, "", expected3), df.__repr__())
 
         # test when eager evaluation is enabled and _repr_html_ will be called
@@ -684,8 +723,8 @@ def test_same_semantics_error(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATAFRAME",
-            message_parameters={"arg_name": "other", "arg_type": "int"},
+            errorClass="NOT_DATAFRAME",
+            messageParameters={"arg_name": "other", "arg_type": "int"},
         )
 
     def test_input_files(self):
@@ -720,8 +759,8 @@ def test_df_show(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_INT",
-            message_parameters={"arg_name": "n", "arg_type": "bool"},
+            errorClass="NOT_INT",
+            messageParameters={"arg_name": "n", "arg_type": "bool"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -729,8 +768,8 @@ def test_df_show(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL",
-            message_parameters={"arg_name": "vertical", "arg_type": "str"},
+            errorClass="NOT_BOOL",
+            messageParameters={"arg_name": "vertical", "arg_type": "str"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -738,11 +777,21 @@ def test_df_show(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL",
-            message_parameters={"arg_name": "truncate", "arg_type": "str"},
+            errorClass="NOT_BOOL",
+            messageParameters={"arg_name": "truncate", "arg_type": "str"},
         )
 
     def test_df_merge_into(self):
+        filename_pattern = (
+            "sql/catalyst/target/scala-*/test-classes/org/apache/spark/sql/connector/catalog/"
+            "InMemoryRowLevelOperationTableCatalog.class"
+        )
+        if not bool(glob.glob(os.path.join(SPARK_HOME, filename_pattern))):
+            raise unittest.SkipTest(
+                "org.apache.spark.sql.connector.catalog.InMemoryRowLevelOperationTableCatalog' "
+                "is not available. Will skip the related tests"
+            )
+
         try:
             # InMemoryRowLevelOperationTableCatalog is a test catalog that is included in the
             # catalyst-test package. If Spark complains that it can't find this class, make sure
@@ -871,8 +920,8 @@ def test_colregex(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "colName", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "colName", "arg_type": "int"},
         )
 
     def test_where(self):
@@ -881,8 +930,8 @@ def test_where(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "condition", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "condition", "arg_type": "int"},
         )
 
     def test_duplicate_field_names(self):
@@ -916,11 +965,85 @@ def test_union_classmethod_usage(self):
     def test_isinstance_dataframe(self):
         self.assertIsInstance(self.spark.range(1), DataFrame)
 
-    def test_checkpoint_dataframe(self):
+    def test_local_checkpoint_dataframe(self):
         with io.StringIO() as buf, redirect_stdout(buf):
             self.spark.range(1).localCheckpoint().explain()
             self.assertIn("ExistingRDD", buf.getvalue())
 
+    def test_local_checkpoint_dataframe_with_storage_level(self):
+        # We don't have a way to reach into the server and assert the storage level server side, but
+        # this test should cover for unexpected errors in the API.
+        df = self.spark.range(10).localCheckpoint(eager=True, storageLevel=StorageLevel.DISK_ONLY)
+        df.collect()
+
+    def test_transpose(self):
+        df = self.spark.createDataFrame([{"a": "x", "b": "y", "c": "z"}])
+
+        # default index column
+        transposed_df = df.transpose()
+        expected_schema = StructType(
+            [StructField("key", StringType(), False), StructField("x", StringType(), True)]
+        )
+        expected_data = [Row(key="b", x="y"), Row(key="c", x="z")]
+        expected_df = self.spark.createDataFrame(expected_data, schema=expected_schema)
+        assertDataFrameEqual(transposed_df, expected_df, checkRowOrder=True)
+
+        # specified index column
+        transposed_df = df.transpose("c")
+        expected_schema = StructType(
+            [StructField("key", StringType(), False), StructField("z", StringType(), True)]
+        )
+        expected_data = [Row(key="a", z="x"), Row(key="b", z="y")]
+        expected_df = self.spark.createDataFrame(expected_data, schema=expected_schema)
+        assertDataFrameEqual(transposed_df, expected_df, checkRowOrder=True)
+
+        # enforce transpose max values
+        with self.sql_conf({"spark.sql.transposeMaxValues": 0}):
+            with self.assertRaises(AnalysisException) as pe:
+                df.transpose().collect()
+            self.check_error(
+                exception=pe.exception,
+                errorClass="TRANSPOSE_EXCEED_ROW_LIMIT",
+                messageParameters={"maxValues": "0", "config": "spark.sql.transposeMaxValues"},
+            )
+
+        # enforce ascending order based on index column values for transposed columns
+        df = self.spark.createDataFrame([{"a": "z"}, {"a": "y"}, {"a": "x"}])
+        transposed_df = df.transpose()
+        expected_schema = StructType(
+            [
+                StructField("key", StringType(), False),
+                StructField("x", StringType(), True),
+                StructField("y", StringType(), True),
+                StructField("z", StringType(), True),
+            ]
+        )  # z, y, x -> x, y, z
+        expected_df = self.spark.createDataFrame([], schema=expected_schema)
+        assertDataFrameEqual(transposed_df, expected_df, checkRowOrder=True)
+
+        # enforce AtomicType Attribute for index column values
+        df = self.spark.createDataFrame([{"a": ["x", "x"], "b": "y", "c": "z"}])
+        with self.assertRaises(AnalysisException) as pe:
+            df.transpose().collect()
+        self.check_error(
+            exception=pe.exception,
+            errorClass="TRANSPOSE_INVALID_INDEX_COLUMN",
+            messageParameters={
+                "reason": "Index column must be of atomic type, "
+                "but found: ArrayType(StringType,true)"
+            },
+        )
+
+        # enforce least common type for non-index columns
+        df = self.spark.createDataFrame([{"a": "x", "b": "y", "c": 1}])
+        with self.assertRaises(AnalysisException) as pe:
+            df.transpose().collect()
+        self.check_error(
+            exception=pe.exception,
+            errorClass="TRANSPOSE_NO_LEAST_COMMON_TYPE",
+            messageParameters={"dt1": '"STRING"', "dt2": '"BIGINT"'},
+        )
+
 
 class DataFrameTests(DataFrameTestsMixin, ReusedSQLTestCase):
     def test_query_execution_unsupported_in_classic(self):
@@ -929,8 +1052,8 @@ def test_query_execution_unsupported_in_classic(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
-            message_parameters={"member": "queryExecution"},
+            errorClass="CLASSIC_OPERATION_NOT_SUPPORTED_ON_DF",
+            messageParameters={"member": "queryExecution"},
         )
 
 
diff --git a/python/pyspark/sql/tests/test_dataframe_query_context.py b/python/pyspark/sql/tests/test_dataframe_query_context.py
index e1a3e33df8593..edd769680c779 100644
--- a/python/pyspark/sql/tests/test_dataframe_query_context.py
+++ b/python/pyspark/sql/tests/test_dataframe_query_context.py
@@ -22,6 +22,7 @@
     QueryContextType,
     NumberFormatException,
 )
+from pyspark.sql import functions as sf
 from pyspark.testing.sqlutils import (
     ReusedSQLTestCase,
 )
@@ -38,8 +39,8 @@ def test_dataframe_query_context(self):
                 df.withColumn("div_zero", df.id / 0).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__truediv__",
             )
@@ -49,12 +50,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("plus_invalid_type", df.id + "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__add__",
@@ -65,12 +65,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("minus_invalid_type", df.id - "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__sub__",
@@ -81,12 +80,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("multiply_invalid_type", df.id * "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__mul__",
@@ -97,12 +95,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("mod_invalid_type", df.id % "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__mod__",
@@ -113,12 +110,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("equalTo_invalid_type", df.id == "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__eq__",
@@ -129,12 +125,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("lt_invalid_type", df.id < "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__lt__",
@@ -145,12 +140,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("leq_invalid_type", df.id <= "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__le__",
@@ -161,12 +155,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("geq_invalid_type", df.id >= "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__ge__",
@@ -177,12 +170,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("gt_invalid_type", df.id > "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__gt__",
@@ -193,12 +185,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("eqNullSafe_invalid_type", df.id.eqNullSafe("string")).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="eqNullSafe",
@@ -209,12 +200,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("bitwiseOR_invalid_type", df.id.bitwiseOR("string")).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="bitwiseOR",
@@ -225,12 +215,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("bitwiseAND_invalid_type", df.id.bitwiseAND("string")).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="bitwiseAND",
@@ -241,12 +230,11 @@ def test_dataframe_query_context(self):
                 df.withColumn("bitwiseXOR_invalid_type", df.id.bitwiseXOR("string")).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="bitwiseXOR",
@@ -259,8 +247,8 @@ def test_dataframe_query_context(self):
                 ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__truediv__",
             )
@@ -274,12 +262,11 @@ def test_dataframe_query_context(self):
                 ).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__add__",
@@ -294,12 +281,11 @@ def test_dataframe_query_context(self):
                 ).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__sub__",
@@ -312,12 +298,11 @@ def test_dataframe_query_context(self):
                 ).withColumn("plus_ten", df.id + 10).withColumn("minus_ten", df.id - 10).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__mul__",
@@ -328,8 +313,8 @@ def test_dataframe_query_context(self):
                 df.select(df.id - 10, df.id + 4, df.id / 0, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__truediv__",
             )
@@ -339,12 +324,11 @@ def test_dataframe_query_context(self):
                 df.select(df.id - 10, df.id + "string", df.id / 10, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__add__",
@@ -355,12 +339,11 @@ def test_dataframe_query_context(self):
                 df.select(df.id - "string", df.id + 4, df.id / 10, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__sub__",
@@ -371,12 +354,11 @@ def test_dataframe_query_context(self):
                 df.select(df.id - 10, df.id + 4, df.id / 10, df.id * "string").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__mul__",
@@ -389,8 +371,8 @@ def test_dataframe_query_context(self):
                 df.select(a, df.id + 4, b, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__truediv__",
             )
@@ -402,12 +384,11 @@ def test_dataframe_query_context(self):
                 df.select(df.id / 10, a, b, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__add__",
@@ -420,12 +401,11 @@ def test_dataframe_query_context(self):
                 df.select(a, df.id / 10, b, df.id * 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__sub__",
@@ -438,12 +418,11 @@ def test_dataframe_query_context(self):
                 df.select(a, df.id / 10, b, df.id + 5).collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="CAST_INVALID_INPUT",
-                message_parameters={
+                errorClass="CAST_INVALID_INPUT",
+                messageParameters={
                     "expression": "'string'",
                     "sourceType": '"STRING"',
                     "targetType": '"BIGINT"',
-                    "ansiConfig": '"spark.sql.ansi.enabled"',
                 },
                 query_context_type=QueryContextType.DataFrame,
                 fragment="__mul__",
@@ -456,8 +435,8 @@ def test_sql_query_context(self):
                 self.spark.sql("select 10/0").collect()
             self.check_error(
                 exception=pe.exception,
-                error_class="DIVIDE_BY_ZERO",
-                message_parameters={"config": '"spark.sql.ansi.enabled"'},
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
                 query_context_type=QueryContextType.SQL,
             )
 
@@ -466,11 +445,47 @@ def test_sql_query_context(self):
                 self.spark.sql("select * from non-existing-table")
             self.check_error(
                 exception=pe.exception,
-                error_class="INVALID_IDENTIFIER",
-                message_parameters={"ident": "non-existing-table"},
+                errorClass="INVALID_IDENTIFIER",
+                messageParameters={"ident": "non-existing-table"},
                 query_context_type=None,
             )
 
+    def test_query_context_complex(self):
+        with self.sql_conf({"spark.sql.ansi.enabled": True}):
+            # SQLQueryContext
+            with self.assertRaises(ArithmeticException) as pe:
+                self.spark.sql("select (10/0)*100").collect()
+            self.check_error(
+                exception=pe.exception,
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.SQL,
+            )
+
+            # DataFrameQueryContext
+            df = self.spark.range(10)
+            with self.assertRaises(ArithmeticException) as pe:
+                df.withColumn("div_zero", (df.id / 0) * 10).collect()
+            self.check_error(
+                exception=pe.exception,
+                errorClass="DIVIDE_BY_ZERO",
+                messageParameters={"config": '"spark.sql.ansi.enabled"'},
+                query_context_type=QueryContextType.DataFrame,
+                fragment="__truediv__",
+            )
+
+    def test_dataframe_query_context_col(self):
+        with self.assertRaises(AnalysisException) as pe:
+            self.spark.range(1).select(sf.col("id") + sf.col("idd")).show()
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            messageParameters={"objectName": "`idd`", "proposal": "`id`"},
+            query_context_type=QueryContextType.DataFrame,
+            fragment="col",
+        )
+
 
 class DataFrameQueryContextTests(DataFrameQueryContextTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_datasources.py b/python/pyspark/sql/tests/test_datasources.py
index ece4839d88a8d..0d5d554a17be7 100644
--- a/python/pyspark/sql/tests/test_datasources.py
+++ b/python/pyspark/sql/tests/test_datasources.py
@@ -212,6 +212,7 @@ def test_checking_csv_header(self):
             )
             df = (
                 self.spark.read.option("header", "true")
+                .option("quote", None)
                 .schema(schema)
                 .csv(path, enforceSchema=False)
             )
@@ -296,8 +297,8 @@ class MyCustomDataSource(DataSource):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "schema"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "schema"},
         )
 
         with self.assertRaises(PySparkNotImplementedError) as pe:
@@ -305,8 +306,8 @@ class MyCustomDataSource(DataSource):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "reader"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "reader"},
         )
 
         with self.assertRaises(PySparkNotImplementedError) as pe:
@@ -314,8 +315,8 @@ class MyCustomDataSource(DataSource):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "writer"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "writer"},
         )
 
     def test_input_partition(self):
diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py
index 4e9b61f7d0d96..cf8f685ea4499 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -17,6 +17,7 @@
 
 from contextlib import redirect_stdout
 import datetime
+from enum import Enum
 from inspect import getmembers, isfunction
 import io
 from itertools import chain
@@ -28,6 +29,7 @@
 from pyspark.sql import Row, Window, functions as F, types
 from pyspark.sql.avro.functions import from_avro, to_avro
 from pyspark.sql.column import Column
+from pyspark.sql.functions.builtin import nullifzero, randstr, uniform, zeroifnull
 from pyspark.testing.sqlutils import ReusedSQLTestCase, SQLTestUtils
 from pyspark.testing.utils import have_numpy
 
@@ -166,8 +168,8 @@ def test_sampleby(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "col", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "col", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -175,8 +177,8 @@ def test_sampleby(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DICT",
-            message_parameters={"arg_name": "fractions", "arg_type": "list"},
+            errorClass="NOT_DICT",
+            messageParameters={"arg_name": "fractions", "arg_type": "list"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -184,8 +186,8 @@ def test_sampleby(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DISALLOWED_TYPE_FOR_CONTAINER",
-            message_parameters={
+            errorClass="DISALLOWED_TYPE_FOR_CONTAINER",
+            messageParameters={
                 "arg_name": "fractions",
                 "arg_type": "dict",
                 "allowed_types": "float, int, str",
@@ -203,8 +205,8 @@ def test_cov(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "col1", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "col1", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -212,8 +214,8 @@ def test_cov(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "col2", "arg_type": "bool"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "col2", "arg_type": "bool"},
         )
 
     def test_crosstab(self):
@@ -331,6 +333,75 @@ def test_rand_functions(self):
         rndn2 = df.select("key", F.randn(0)).collect()
         self.assertEqual(sorted(rndn1), sorted(rndn2))
 
+    def test_try_parse_url(self):
+        df = self.spark.createDataFrame(
+            [("https://spark.apache.org/path?query=1", "QUERY", "query")],
+            ["url", "part", "key"],
+        )
+        actual = df.select(F.try_parse_url(df.url, df.part, df.key)).collect()
+        self.assertEqual(actual, [Row("1")])
+        df = self.spark.createDataFrame(
+            [("inva lid://spark.apache.org/path?query=1", "QUERY", "query")],
+            ["url", "part", "key"],
+        )
+        actual = df.select(F.try_parse_url(df.url, df.part, df.key)).collect()
+        self.assertEqual(actual, [Row(None)])
+
+    def test_try_make_timestamp(self):
+        data = [(2024, 5, 22, 10, 30, 0)]
+        df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
+        actual = df.select(
+            F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second)
+        ).collect()
+        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
+
+        data = [(2024, 13, 22, 10, 30, 0)]
+        df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
+        actual = df.select(
+            F.try_make_timestamp(df.year, df.month, df.day, df.hour, df.minute, df.second)
+        ).collect()
+        self.assertEqual(actual, [Row(None)])
+
+    def test_try_make_timestamp_ltz(self):
+        # use local timezone here to avoid flakiness
+        data = [(2024, 5, 22, 10, 30, 0, datetime.datetime.now().astimezone().tzinfo.__str__())]
+        df = self.spark.createDataFrame(
+            data, ["year", "month", "day", "hour", "minute", "second", "timezone"]
+        )
+        actual = df.select(
+            F.try_make_timestamp_ltz(
+                df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone
+            )
+        ).collect()
+        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30, 0))])
+
+        # use local timezone here to avoid flakiness
+        data = [(2024, 13, 22, 10, 30, 0, datetime.datetime.now().astimezone().tzinfo.__str__())]
+        df = self.spark.createDataFrame(
+            data, ["year", "month", "day", "hour", "minute", "second", "timezone"]
+        )
+        actual = df.select(
+            F.try_make_timestamp_ltz(
+                df.year, df.month, df.day, df.hour, df.minute, df.second, df.timezone
+            )
+        ).collect()
+        self.assertEqual(actual, [Row(None)])
+
+    def test_try_make_timestamp_ntz(self):
+        data = [(2024, 5, 22, 10, 30, 0)]
+        df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
+        actual = df.select(
+            F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second)
+        ).collect()
+        self.assertEqual(actual, [Row(datetime.datetime(2024, 5, 22, 10, 30))])
+
+        data = [(2024, 13, 22, 10, 30, 0)]
+        df = self.spark.createDataFrame(data, ["year", "month", "day", "hour", "minute", "second"])
+        actual = df.select(
+            F.try_make_timestamp_ntz(df.year, df.month, df.day, df.hour, df.minute, df.second)
+        ).collect()
+        self.assertEqual(actual, [Row(None)])
+
     def test_string_functions(self):
         string_functions = [
             "upper",
@@ -349,8 +420,8 @@ def test_string_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_SAME_TYPE",
-            message_parameters={
+            errorClass="NOT_SAME_TYPE",
+            messageParameters={
                 "arg_name1": "startPos",
                 "arg_name2": "length",
                 "arg_type1": "int",
@@ -363,8 +434,8 @@ def test_string_functions(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={
                 "arg_name": "startPos",
                 "arg_type": "str",
             },
@@ -381,6 +452,11 @@ def test_collation(self):
         actual = df.select(F.collation(F.collate("name", "UNICODE"))).distinct().collect()
         self.assertEqual([Row("UNICODE")], actual)
 
+    def test_try_make_interval(self):
+        df = self.spark.createDataFrame([(2147483647,)], ["num"])
+        actual = df.select(F.isnull(F.try_make_interval("num"))).collect()
+        self.assertEqual([Row(True)], actual)
+
     def test_octet_length_function(self):
         # SPARK-36751: add octet length api for python
         df = self.spark.createDataFrame([("cat",), ("\U0001F408",)], ["cat"])
@@ -683,8 +759,8 @@ def test_least(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "least", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "least", "num_cols": "2"},
         )
 
     def test_overlay(self):
@@ -729,8 +805,8 @@ def test_overlay(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "pos", "arg_type": "float"},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "pos", "arg_type": "float"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -738,8 +814,8 @@ def test_overlay(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "len", "arg_type": "float"},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "len", "arg_type": "float"},
         )
 
     def test_percentile(self):
@@ -859,8 +935,8 @@ def test_higher_order_function_failures(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": "<lambda>"},
+            errorClass="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": "<lambda>"},
         )
 
         # Should fail with kwargs
@@ -869,8 +945,8 @@ def test_higher_order_function_failures(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": "<lambda>"},
+            errorClass="UNSUPPORTED_PARAM_TYPE_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": "<lambda>"},
         )
 
         # Should fail with nullary function
@@ -879,8 +955,8 @@ def test_higher_order_function_failures(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": "<lambda>", "num_args": "0"},
+            errorClass="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": "<lambda>", "num_args": "0"},
         )
 
         # Should fail with quaternary function
@@ -889,8 +965,8 @@ def test_higher_order_function_failures(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
-            message_parameters={"func_name": "<lambda>", "num_args": "4"},
+            errorClass="WRONG_NUM_ARGS_FOR_HIGHER_ORDER_FUNCTION",
+            messageParameters={"func_name": "<lambda>", "num_args": "4"},
         )
 
         # Should fail if function doesn't return Column
@@ -899,8 +975,8 @@ def test_higher_order_function_failures(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
-            message_parameters={"func_name": "<lambda>", "return_type": "int"},
+            errorClass="HIGHER_ORDER_FUNCTION_SHOULD_RETURN_COLUMN",
+            messageParameters={"func_name": "<lambda>", "return_type": "int"},
         )
 
     def test_nested_higher_order_function(self):
@@ -1098,8 +1174,8 @@ def check_assert_true(self, tpe):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": "int"},
         )
 
     def test_raise_error(self):
@@ -1119,8 +1195,8 @@ def check_raise_error(self, tpe):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "errMsg", "arg_type": "NoneType"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "errMsg", "arg_type": "NoneType"},
         )
 
     def test_sum_distinct(self):
@@ -1179,8 +1255,8 @@ def test_lit_list(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="COLUMN_IN_LIST",
-            message_parameters={"func_name": "lit"},
+            errorClass="COLUMN_IN_LIST",
+            messageParameters={"func_name": "lit"},
         )
 
     # Test added for SPARK-39832; change Python API to accept both col & str as input
@@ -1237,7 +1313,7 @@ def test_ndarray_input(self):
         import numpy as np
 
         arr_dtype_to_spark_dtypes = [
-            ("int8", [("b", "array<smallint>")]),
+            ("int8", [("b", "array<tinyint>")]),
             ("int16", [("b", "array<smallint>")]),
             ("int32", [("b", "array<int>")]),
             ("int64", [("b", "array<bigint>")]),
@@ -1255,12 +1331,58 @@ def test_ndarray_input(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
-            message_parameters={
+            errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+            messageParameters={
                 "dtype": "uint64",
             },
         )
 
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_bool_ndarray(self):
+        import numpy as np
+
+        for arr in [
+            np.array([], np.bool_),
+            np.array([True, False], np.bool_),
+            np.array([1, 0, 3], np.bool_),
+        ]:
+            self.assertEqual(
+                [("a", "array<boolean>")],
+                self.spark.range(1).select(F.lit(arr).alias("a")).dtypes,
+            )
+
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_str_ndarray(self):
+        import numpy as np
+
+        for arr in [
+            np.array([], np.str_),
+            np.array(["a"], np.str_),
+            np.array([1, 2, 3], np.str_),
+        ]:
+            self.assertEqual(
+                [("a", "array<string>")],
+                self.spark.range(1).select(F.lit(arr).alias("a")).dtypes,
+            )
+
+    @unittest.skipIf(not have_numpy, "NumPy not installed")
+    def test_empty_ndarray(self):
+        import numpy as np
+
+        arr_dtype_to_spark_dtypes = [
+            ("int8", [("b", "array<tinyint>")]),
+            ("int16", [("b", "array<smallint>")]),
+            ("int32", [("b", "array<int>")]),
+            ("int64", [("b", "array<bigint>")]),
+            ("float32", [("b", "array<float>")]),
+            ("float64", [("b", "array<double>")]),
+        ]
+        for t, expected_spark_dtypes in arr_dtype_to_spark_dtypes:
+            arr = np.array([]).astype(t)
+            self.assertEqual(
+                expected_spark_dtypes, self.spark.range(1).select(F.lit(arr).alias("b")).dtypes
+            )
+
     def test_binary_math_function(self):
         funcs, expected = zip(
             *[(F.atan2, 0.13664), (F.hypot, 8.07527), (F.pow, 2.14359), (F.pmod, 1.1)]
@@ -1324,8 +1446,8 @@ def check(resultDf, expected):
             self.assertEqual([r[0] for r in resultDf.collect()], expected)
 
         check(df.select(F.is_variant_null(v)), [False, False])
-        check(df.select(F.schema_of_variant(v)), ["STRUCT<a: BIGINT>", "STRUCT<b: BIGINT>"])
-        check(df.select(F.schema_of_variant_agg(v)), ["STRUCT<a: BIGINT, b: BIGINT>"])
+        check(df.select(F.schema_of_variant(v)), ["OBJECT<a: BIGINT>", "OBJECT<b: BIGINT>"])
+        check(df.select(F.schema_of_variant_agg(v)), ["OBJECT<a: BIGINT, b: BIGINT>"])
 
         check(df.select(F.variant_get(v, "$.a", "int")), [1, None])
         check(df.select(F.variant_get(v, "$.b", "int")), [None, 2])
@@ -1336,8 +1458,8 @@ def check(resultDf, expected):
 
         self.check_error(
             exception=ex.exception,
-            error_class="INVALID_VARIANT_CAST",
-            message_parameters={"value": "1", "dataType": '"BINARY"'},
+            errorClass="INVALID_VARIANT_CAST",
+            messageParameters={"value": "1", "dataType": '"BINARY"'},
         )
 
         check(df.select(F.try_variant_get(v, "$.a", "int")), [1, None])
@@ -1351,8 +1473,8 @@ def test_schema_of_json(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "json", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "json", "arg_type": "int"},
         )
 
     def test_try_parse_json(self):
@@ -1363,14 +1485,21 @@ def test_try_parse_json(self):
         self.assertEqual("""{"a":1}""", actual[0]["var"])
         self.assertEqual(None, actual[1]["var"])
 
+    def test_to_variant_object(self):
+        df = self.spark.createDataFrame([(1, {"a": 1})], "i int, v struct<a int>")
+        actual = df.select(
+            F.to_json(F.to_variant_object(df.v)).alias("var"),
+        ).collect()
+        self.assertEqual("""{"a":1}""", actual[0]["var"])
+
     def test_schema_of_csv(self):
         with self.assertRaises(PySparkTypeError) as pe:
             F.schema_of_csv(1)
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "csv", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "csv", "arg_type": "int"},
         )
 
     def test_from_csv(self):
@@ -1380,8 +1509,8 @@ def test_from_csv(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "schema", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "schema", "arg_type": "int"},
         )
 
     def test_schema_of_xml(self):
@@ -1390,8 +1519,8 @@ def test_schema_of_xml(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "xml", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "xml", "arg_type": "int"},
         )
 
     def test_from_xml(self):
@@ -1401,8 +1530,8 @@ def test_from_xml(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": "int"},
         )
 
     def test_greatest(self):
@@ -1412,8 +1541,8 @@ def test_greatest(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="WRONG_NUM_COLUMNS",
-            message_parameters={"func_name": "greatest", "num_cols": "2"},
+            errorClass="WRONG_NUM_COLUMNS",
+            messageParameters={"func_name": "greatest", "num_cols": "2"},
         )
 
     def test_when(self):
@@ -1422,8 +1551,8 @@ def test_when(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN",
-            message_parameters={"arg_name": "condition", "arg_type": "str"},
+            errorClass="NOT_COLUMN",
+            messageParameters={"arg_name": "condition", "arg_type": "str"},
         )
 
     def test_window(self):
@@ -1432,8 +1561,8 @@ def test_window(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_STR",
-            message_parameters={"arg_name": "windowDuration", "arg_type": "int"},
+            errorClass="NOT_STR",
+            messageParameters={"arg_name": "windowDuration", "arg_type": "int"},
         )
 
     def test_session_window(self):
@@ -1442,8 +1571,8 @@ def test_session_window(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "gapDuration", "arg_type": "int"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "gapDuration", "arg_type": "int"},
         )
 
     def test_current_user(self):
@@ -1461,8 +1590,8 @@ def test_bucket(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT",
-            message_parameters={"arg_name": "numBuckets", "arg_type": "str"},
+            errorClass="NOT_COLUMN_OR_INT",
+            messageParameters={"arg_name": "numBuckets", "arg_type": "str"},
         )
 
     def test_to_timestamp_ltz(self):
@@ -1546,8 +1675,8 @@ def test_avro_type_check(self):
         for i in range(3):
             self.check_error(
                 exception=from_avro_pes[i].exception,
-                error_class="INVALID_TYPE",
-                message_parameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
+                errorClass="INVALID_TYPE",
+                messageParameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
             )
 
         # test to_avro type checks for each parameter
@@ -1559,9 +1688,81 @@ def test_avro_type_check(self):
         for i in range(2):
             self.check_error(
                 exception=to_avro_pes[i].exception,
-                error_class="INVALID_TYPE",
-                message_parameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
+                errorClass="INVALID_TYPE",
+                messageParameters={"arg_name": parameters[i], "arg_type": expected_type[i]},
+            )
+
+    def test_enum_literals(self):
+        class IntEnum(Enum):
+            X = 1
+            Y = 2
+            Z = 3
+
+        id = F.col("id")
+        b = F.col("b")
+
+        cols, expected = list(
+            zip(
+                (F.lit(IntEnum.X), 1),
+                (F.lit([IntEnum.X, IntEnum.Y]), [1, 2]),
+                (F.rand(IntEnum.X), 0.9531453492357947),
+                (F.randn(IntEnum.X), -1.1081822375859998),
+                (F.when(b, IntEnum.X), 1),
             )
+        )
+
+        result = (
+            self.spark.range(1, 2)
+            .select(id, id.astype("string").alias("s"), id.astype("boolean").alias("b"))
+            .select(*cols)
+            .first()
+        )
+
+        for r, c, e in zip(result, cols, expected):
+            self.assertEqual(r, e, str(c))
+
+    def test_nullifzero_zeroifnull(self):
+        df = self.spark.createDataFrame([(0,), (1,)], ["a"])
+        result = df.select(nullifzero(df.a).alias("r")).collect()
+        self.assertEqual([Row(r=None), Row(r=1)], result)
+
+        df = self.spark.createDataFrame([(None,), (1,)], ["a"])
+        result = df.select(zeroifnull(df.a).alias("r")).collect()
+        self.assertEqual([Row(r=0), Row(r=1)], result)
+
+    def test_randstr_uniform(self):
+        df = self.spark.createDataFrame([(0,)], ["a"])
+        result = df.select(randstr(F.lit(5), F.lit(0)).alias("x")).selectExpr("length(x)").collect()
+        self.assertEqual([Row(5)], result)
+        # The random seed is optional.
+        result = df.select(randstr(F.lit(5)).alias("x")).selectExpr("length(x)").collect()
+        self.assertEqual([Row(5)], result)
+
+        df = self.spark.createDataFrame([(0,)], ["a"])
+        result = (
+            df.select(uniform(F.lit(10), F.lit(20), F.lit(0)).alias("x"))
+            .selectExpr("x > 5")
+            .collect()
+        )
+        self.assertEqual([Row(True)], result)
+        # The random seed is optional.
+        result = df.select(uniform(F.lit(10), F.lit(20)).alias("x")).selectExpr("x > 5").collect()
+        self.assertEqual([Row(True)], result)
+
+    def test_string_validation(self):
+        df = self.spark.createDataFrame([("abc",)], ["a"])
+        # test is_valid_utf8
+        result_is_valid_utf8 = df.select(F.is_valid_utf8(df.a).alias("r")).collect()
+        self.assertEqual([Row(r=True)], result_is_valid_utf8)
+        # test make_valid_utf8
+        result_make_valid_utf8 = df.select(F.make_valid_utf8(df.a).alias("r")).collect()
+        self.assertEqual([Row(r="abc")], result_make_valid_utf8)
+        # test validate_utf8
+        result_validate_utf8 = df.select(F.validate_utf8(df.a).alias("r")).collect()
+        self.assertEqual([Row(r="abc")], result_validate_utf8)
+        # test try_validate_utf8
+        result_try_validate_utf8 = df.select(F.try_validate_utf8(df.a).alias("r")).collect()
+        self.assertEqual([Row(r="abc")], result_try_validate_utf8)
 
 
 class FunctionsTests(ReusedSQLTestCase, FunctionsTestsMixin):
diff --git a/python/pyspark/sql/tests/test_job_cancellation.py b/python/pyspark/sql/tests/test_job_cancellation.py
new file mode 100644
index 0000000000000..6fc404b0a0b3a
--- /dev/null
+++ b/python/pyspark/sql/tests/test_job_cancellation.py
@@ -0,0 +1,183 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+import threading
+import time
+
+from pyspark import InheritableThread, inheritable_thread_target
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class JobCancellationTestsMixin:
+    def test_tags(self):
+        self.spark.clearTags()
+        self.spark.addTag("a")
+        self.assertEqual(self.spark.getTags(), {"a"})
+        self.spark.addTag("b")
+        self.spark.removeTag("a")
+        self.assertEqual(self.spark.getTags(), {"b"})
+        self.spark.addTag("c")
+        self.spark.clearTags()
+        self.assertEqual(self.spark.getTags(), set())
+        self.spark.clearTags()
+
+    def test_tags_multithread(self):
+        output1 = None
+        output2 = None
+
+        def tag1():
+            nonlocal output1
+
+            self.spark.addTag("tag1")
+            output1 = self.spark.getTags()
+
+        def tag2():
+            nonlocal output2
+
+            self.spark.addTag("tag2")
+            output2 = self.spark.getTags()
+
+        t1 = threading.Thread(target=tag1)
+        t1.start()
+        t1.join()
+        t2 = threading.Thread(target=tag2)
+        t2.start()
+        t2.join()
+
+        self.assertIsNotNone(output1)
+        self.assertEquals(output1, {"tag1"})
+        self.assertIsNotNone(output2)
+        self.assertEquals(output2, {"tag2"})
+
+    def check_job_cancellation(
+        self, setter, canceller, thread_ids, thread_ids_to_cancel, thread_ids_to_run
+    ):
+        job_id_a = "job_ids_to_cancel"
+        job_id_b = "job_ids_to_run"
+        threads = []
+
+        # A list which records whether job is cancelled.
+        # The index of the array is the thread index which job run in.
+        is_job_cancelled = [False for _ in thread_ids]
+
+        def run_job(job_id, index):
+            """
+            Executes a job with the group ``job_group``. Each job waits for 3 seconds
+            and then exits.
+            """
+            try:
+                setter(job_id)
+
+                def func(itr):
+                    for pdf in itr:
+                        time.sleep(pdf._1.iloc[0])
+                        yield pdf
+
+                self.spark.createDataFrame([[20]]).repartition(1).mapInPandas(
+                    func, schema="_1 LONG"
+                ).collect()
+                is_job_cancelled[index] = False
+            except Exception:
+                # Assume that exception means job cancellation.
+                is_job_cancelled[index] = True
+
+        # Test if job succeeded when not cancelled.
+        run_job(job_id_a, 0)
+        self.assertFalse(is_job_cancelled[0])
+        self.spark.clearTags()
+
+        # Run jobs
+        for i in thread_ids_to_cancel:
+            t = threading.Thread(target=run_job, args=(job_id_a, i))
+            t.start()
+            threads.append(t)
+
+        for i in thread_ids_to_run:
+            t = threading.Thread(target=run_job, args=(job_id_b, i))
+            t.start()
+            threads.append(t)
+
+        # Wait to make sure all jobs are executed.
+        time.sleep(10)
+        # And then, cancel one job group.
+        canceller(job_id_a)
+
+        # Wait until all threads launching jobs are finished.
+        for t in threads:
+            t.join()
+
+        for i in thread_ids_to_cancel:
+            self.assertTrue(
+                is_job_cancelled[i], "Thread {i}: Job in group A was not cancelled.".format(i=i)
+            )
+
+        for i in thread_ids_to_run:
+            self.assertFalse(
+                is_job_cancelled[i], "Thread {i}: Job in group B did not succeeded.".format(i=i)
+            )
+
+    def test_inheritable_tags(self):
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: InheritableThread(target, session=session)
+        )
+        self.check_inheritable_tags(
+            create_thread=lambda target, session: threading.Thread(
+                target=inheritable_thread_target(session)(target)
+            )
+        )
+
+    def check_inheritable_tags(self, create_thread):
+        spark = self.spark
+        spark.addTag("a")
+        first = set()
+        second = set()
+
+        def get_inner_local_prop():
+            spark.addTag("c")
+            second.update(spark.getTags())
+
+        def get_outer_local_prop():
+            spark.addTag("b")
+            first.update(spark.getTags())
+            t2 = create_thread(target=get_inner_local_prop, session=spark)
+            t2.start()
+            t2.join()
+
+        t1 = create_thread(target=get_outer_local_prop, session=spark)
+        t1.start()
+        t1.join()
+
+        self.assertEqual(spark.getTags(), {"a"})
+        self.assertEqual(first, {"a", "b"})
+        self.assertEqual(second, {"a", "b", "c"})
+
+
+class JobCancellationTests(JobCancellationTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_session import *  # noqa: F401
+
+    try:
+        import xmlrunner
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_observation.py b/python/pyspark/sql/tests/test_observation.py
index 953e22be786bf..d780da300c357 100644
--- a/python/pyspark/sql/tests/test_observation.py
+++ b/python/pyspark/sql/tests/test_observation.py
@@ -50,8 +50,8 @@ def test_observe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NO_OBSERVE_BEFORE_GET",
-            message_parameters={},
+            errorClass="NO_OBSERVE_BEFORE_GET",
+            messageParameters={},
         )
 
         observed = (
@@ -85,8 +85,8 @@ def test_observe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="REUSE_OBSERVATION",
-            message_parameters={},
+            errorClass="REUSE_OBSERVATION",
+            messageParameters={},
         )
 
         # observation requires name (if given) to be non empty string
@@ -101,8 +101,8 @@ def test_observe(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_BE_EMPTY",
-            message_parameters={"item": "exprs"},
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "exprs"},
         )
 
         # dataframe.observe requires non-None Columns
@@ -113,8 +113,8 @@ def test_observe(self):
 
                 self.check_error(
                     exception=pe.exception,
-                    error_class="NOT_LIST_OF_COLUMN",
-                    message_parameters={"arg_name": "exprs"},
+                    errorClass="NOT_LIST_OF_COLUMN",
+                    messageParameters={"arg_name": "exprs"},
                 )
 
     def test_observe_str(self):
diff --git a/python/pyspark/sql/tests/test_python_datasource.py b/python/pyspark/sql/tests/test_python_datasource.py
index 8431e9b3e35d4..140c7680b181b 100644
--- a/python/pyspark/sql/tests/test_python_datasource.py
+++ b/python/pyspark/sql/tests/test_python_datasource.py
@@ -374,6 +374,68 @@ def test_case_insensitive_dict(self):
         self.assertEqual(d2["BaR"], 3)
         self.assertEqual(d2["baz"], 3)
 
+    def test_arrow_batch_data_source(self):
+        import pyarrow as pa
+
+        class ArrowBatchDataSource(DataSource):
+            """
+            A data source for testing Arrow Batch Serialization
+            """
+
+            @classmethod
+            def name(cls):
+                return "arrowbatch"
+
+            def schema(self):
+                return "key int, value string"
+
+            def reader(self, schema: str):
+                return ArrowBatchDataSourceReader(schema, self.options)
+
+        class ArrowBatchDataSourceReader(DataSourceReader):
+            def __init__(self, schema, options):
+                self.schema: str = schema
+                self.options = options
+
+            def read(self, partition):
+                # Create Arrow Record Batch
+                keys = pa.array([1, 2, 3, 4, 5], type=pa.int32())
+                values = pa.array(["one", "two", "three", "four", "five"], type=pa.string())
+                schema = pa.schema([("key", pa.int32()), ("value", pa.string())])
+                record_batch = pa.RecordBatch.from_arrays([keys, values], schema=schema)
+                yield record_batch
+
+            def partitions(self):
+                # hardcoded number of partitions
+                num_part = 1
+                return [InputPartition(i) for i in range(num_part)]
+
+        self.spark.dataSource.register(ArrowBatchDataSource)
+        df = self.spark.read.format("arrowbatch").load()
+        expected_data = [
+            Row(key=1, value="one"),
+            Row(key=2, value="two"),
+            Row(key=3, value="three"),
+            Row(key=4, value="four"),
+            Row(key=5, value="five"),
+        ]
+        assertDataFrameEqual(df, expected_data)
+
+        with self.assertRaisesRegex(
+            PythonException,
+            "PySparkRuntimeError: \\[DATA_SOURCE_RETURN_SCHEMA_MISMATCH\\] Return schema"
+            " mismatch in the result from 'read' method\\. Expected: 1 columns, Found: 2 columns",
+        ):
+            self.spark.read.format("arrowbatch").schema("dummy int").load().show()
+
+        with self.assertRaisesRegex(
+            PythonException,
+            "PySparkRuntimeError: \\[DATA_SOURCE_RETURN_SCHEMA_MISMATCH\\] Return schema mismatch"
+            " in the result from 'read' method\\. Expected: \\['key', 'dummy'\\] columns, Found:"
+            " \\['key', 'value'\\] columns",
+        ):
+            self.spark.read.format("arrowbatch").schema("key int, dummy string").load().show()
+
     def test_data_source_type_mismatch(self):
         class TestDataSource(DataSource):
             @classmethod
diff --git a/python/pyspark/sql/tests/test_python_streaming_datasource.py b/python/pyspark/sql/tests/test_python_streaming_datasource.py
index 183b0ad80d9d4..fa14b37b57e62 100644
--- a/python/pyspark/sql/tests/test_python_streaming_datasource.py
+++ b/python/pyspark/sql/tests/test_python_streaming_datasource.py
@@ -152,6 +152,66 @@ def check_batch(df, batch_id):
         q.awaitTermination()
         self.assertIsNone(q.exception(), "No exception has to be propagated.")
 
+    def test_stream_reader_pyarrow(self):
+        import pyarrow as pa
+
+        class TestStreamReader(DataSourceStreamReader):
+            def initialOffset(self):
+                return {"offset": 0}
+
+            def latestOffset(self):
+                return {"offset": 2}
+
+            def partitions(self, start, end):
+                # hardcoded number of partitions
+                num_part = 1
+                return [InputPartition(i) for i in range(num_part)]
+
+            def read(self, partition):
+                keys = pa.array([1, 2, 3, 4, 5], type=pa.int32())
+                values = pa.array(["one", "two", "three", "four", "five"], type=pa.string())
+                schema = pa.schema([("key", pa.int32()), ("value", pa.string())])
+                record_batch = pa.RecordBatch.from_arrays([keys, values], schema=schema)
+                yield record_batch
+
+        class TestDataSourcePyarrow(DataSource):
+            @classmethod
+            def name(cls):
+                return "testdatasourcepyarrow"
+
+            def schema(self):
+                return "key int, value string"
+
+            def streamReader(self, schema):
+                return TestStreamReader()
+
+        self.spark.dataSource.register(TestDataSourcePyarrow)
+        df = self.spark.readStream.format("testdatasourcepyarrow").load()
+
+        output_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_output")
+        checkpoint_dir = tempfile.TemporaryDirectory(prefix="test_data_stream_write_checkpoint")
+
+        q = (
+            df.writeStream.format("json")
+            .option("checkpointLocation", checkpoint_dir.name)
+            .start(output_dir.name)
+        )
+        while not q.recentProgress:
+            time.sleep(0.2)
+        q.stop()
+        q.awaitTermination()
+
+        expected_data = [
+            Row(key=1, value="one"),
+            Row(key=2, value="two"),
+            Row(key=3, value="three"),
+            Row(key=4, value="four"),
+            Row(key=5, value="five"),
+        ]
+        df = self.spark.read.json(output_dir.name)
+
+        assertDataFrameEqual(df, expected_data)
+
     def test_simple_stream_reader(self):
         class SimpleStreamReader(SimpleDataSourceStreamReader):
             def initialOffset(self):
diff --git a/python/pyspark/sql/tests/test_readwriter.py b/python/pyspark/sql/tests/test_readwriter.py
index 8060a9ae8bc76..2fca6b57decf9 100644
--- a/python/pyspark/sql/tests/test_readwriter.py
+++ b/python/pyspark/sql/tests/test_readwriter.py
@@ -154,6 +154,47 @@ def count_bucketed_cols(names, table="pyspark_bucket"):
             )
             self.assertSetEqual(set(data), set(self.spark.table("pyspark_bucket").collect()))
 
+    def test_cluster_by(self):
+        data = [
+            (1, "foo", 3.0),
+            (2, "foo", 5.0),
+            (3, "bar", -1.0),
+            (4, "bar", 6.0),
+        ]
+        df = self.spark.createDataFrame(data, ["x", "y", "z"])
+
+        def get_cluster_by_cols(table="pyspark_cluster_by"):
+            cols = self.spark.catalog.listColumns(table)
+            return [c.name for c in cols if c.isCluster]
+
+        table_name = "pyspark_cluster_by"
+        with self.table(table_name):
+            # Test write with one clustering column
+            df.write.clusterBy("x").mode("overwrite").saveAsTable(table_name)
+            self.assertEqual(get_cluster_by_cols(), ["x"])
+            self.assertSetEqual(set(data), set(self.spark.table(table_name).collect()))
+
+            # Test write with two clustering columns
+            df.write.clusterBy("x", "y").mode("overwrite").option(
+                "overwriteSchema", "true"
+            ).saveAsTable(table_name)
+            self.assertEqual(get_cluster_by_cols(), ["x", "y"])
+            self.assertSetEqual(set(data), set(self.spark.table(table_name).collect()))
+
+            # Test write with a list of columns
+            df.write.clusterBy(["y", "z"]).mode("overwrite").option(
+                "overwriteSchema", "true"
+            ).saveAsTable(table_name)
+            self.assertEqual(get_cluster_by_cols(), ["y", "z"])
+            self.assertSetEqual(set(data), set(self.spark.table(table_name).collect()))
+
+            # Test write with a tuple of columns
+            df.write.clusterBy(("x", "z")).mode("overwrite").option(
+                "overwriteSchema", "true"
+            ).saveAsTable(table_name)
+            self.assertEqual(get_cluster_by_cols(), ["x", "z"])
+            self.assertSetEqual(set(data), set(self.spark.table(table_name).collect()))
+
     def test_insert_into(self):
         df = self.spark.createDataFrame([("a", 1), ("b", 2)], ["C1", "C2"])
         with self.table("test_table"):
@@ -214,6 +255,7 @@ def check_api(self, tpe):
 
     def test_partitioning_functions(self):
         self.check_partitioning_functions(DataFrameWriterV2)
+        self.partitioning_functions_user_error()
 
     def check_partitioning_functions(self, tpe):
         import datetime
@@ -233,6 +275,35 @@ def check_partitioning_functions(self, tpe):
         self.assertIsInstance(writer.partitionedBy(bucket(11, col("id"))), tpe)
         self.assertIsInstance(writer.partitionedBy(bucket(3, "id"), hours(col("ts"))), tpe)
 
+    def partitioning_functions_user_error(self):
+        import datetime
+        from pyspark.sql.functions.partitioning import years, months, days, hours, bucket
+
+        df = self.spark.createDataFrame(
+            [(1, datetime.datetime(2000, 1, 1), "foo")], ("id", "ts", "value")
+        )
+
+        with self.assertRaisesRegex(
+            Exception, "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY"
+        ):
+            df.select(years("ts")).collect()
+        with self.assertRaisesRegex(
+            Exception, "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY"
+        ):
+            df.select(months("ts")).collect()
+        with self.assertRaisesRegex(
+            Exception, "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY"
+        ):
+            df.select(days("ts")).collect()
+        with self.assertRaisesRegex(
+            Exception, "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY"
+        ):
+            df.select(hours("ts")).collect()
+        with self.assertRaisesRegex(
+            Exception, "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY"
+        ):
+            df.select(bucket(2, "ts")).collect()
+
     def test_create(self):
         df = self.df
         with self.table("test_table"):
@@ -250,6 +321,28 @@ def test_table_overwrite(self):
         with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
             df.writeTo("test_table").overwrite(lit(True))
 
+    def test_cluster_by(self):
+        data = [
+            (1, "foo", 3.0),
+            (2, "foo", 5.0),
+            (3, "bar", -1.0),
+            (4, "bar", 6.0),
+        ]
+        df = self.spark.createDataFrame(data, ["x", "y", "z"])
+
+        def get_cluster_by_cols(table="pyspark_cluster_by"):
+            # Note that listColumns only returns top-level clustering columns and doesn't consider
+            # nested clustering columns as isCluster. This is fine for this test.
+            cols = self.spark.catalog.listColumns(table)
+            return [c.name for c in cols if c.isCluster]
+
+        table_name = "pyspark_cluster_by"
+        with self.table(table_name):
+            # Test write with one clustering column
+            df.writeTo(table_name).using("parquet").clusterBy("x").create()
+            self.assertEqual(get_cluster_by_cols(), ["x"])
+            self.assertSetEqual(set(data), set(self.spark.table(table_name).collect()))
+
 
 class ReadwriterTests(ReadwriterTestsMixin, ReusedSQLTestCase):
     pass
diff --git a/python/pyspark/sql/tests/test_repartition.py b/python/pyspark/sql/tests/test_repartition.py
index dc39635c8dd11..058861e9c1615 100644
--- a/python/pyspark/sql/tests/test_repartition.py
+++ b/python/pyspark/sql/tests/test_repartition.py
@@ -37,8 +37,8 @@ def test_repartition(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_STR",
-            message_parameters={"arg_name": "numPartitions", "arg_type": "list"},
+            errorClass="NOT_COLUMN_OR_STR",
+            messageParameters={"arg_name": "numPartitions", "arg_type": "list"},
         )
 
     def test_repartition_by_range(self):
@@ -80,8 +80,8 @@ def test_repartition_by_range(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_COLUMN_OR_INT_OR_STR",
-            message_parameters={"arg_name": "numPartitions", "arg_type": "list"},
+            errorClass="NOT_COLUMN_OR_INT_OR_STR",
+            messageParameters={"arg_name": "numPartitions", "arg_type": "list"},
         )
 
 
diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py
index d37f0aa4a6010..3fbc0be943e45 100644
--- a/python/pyspark/sql/tests/test_session.py
+++ b/python/pyspark/sql/tests/test_session.py
@@ -87,8 +87,8 @@ def test_active_session(self):
 
         self.check_error(
             exception=pe1.exception,
-            error_class="NO_ACTIVE_OR_DEFAULT_SESSION",
-            message_parameters={},
+            errorClass="NO_ACTIVE_OR_DEFAULT_SESSION",
+            messageParameters={},
         )
 
         spark = SparkSession.builder.master("local").getOrCreate()
@@ -229,10 +229,6 @@ def test_unsupported_api(self):
                 (lambda: session.copyFromLocalToFs("", ""), "copyFromLocalToFs"),
                 (lambda: session.interruptTag(""), "interruptTag"),
                 (lambda: session.interruptOperation(""), "interruptOperation"),
-                (lambda: session.addTag(""), "addTag"),
-                (lambda: session.removeTag(""), "removeTag"),
-                (session.getTags, "getTags"),
-                (session.clearTags, "clearTags"),
             ]
 
             for func, name in unsupported:
@@ -241,8 +237,8 @@ def test_unsupported_api(self):
 
                 self.check_error(
                     exception=pe1.exception,
-                    error_class="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
-                    message_parameters={"feature": f"SparkSession.{name}"},
+                    errorClass="ONLY_SUPPORTED_WITH_SPARK_CONNECT",
+                    messageParameters={"feature": f"SparkSession.{name}"},
                 )
 
 
@@ -410,8 +406,8 @@ def test_create_spark_context_with_invalid_configs(self):
 
         self.check_error(
             exception=pe1.exception,
-            error_class="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
-            message_parameters={"master_url": "x", "connect_url": "y"},
+            errorClass="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
+            messageParameters={"master_url": "x", "connect_url": "y"},
         )
 
         with unittest.mock.patch.dict(
@@ -422,8 +418,8 @@ def test_create_spark_context_with_invalid_configs(self):
 
             self.check_error(
                 exception=pe2.exception,
-                error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
-                message_parameters={
+                errorClass="CANNOT_CONFIGURE_SPARK_CONNECT",
+                messageParameters={
                     "existing_url": "remote_url",
                     "new_url": "different_remote_url",
                 },
@@ -435,8 +431,8 @@ def test_master_remote_conflicts(self):
 
         self.check_error(
             exception=pe2.exception,
-            error_class="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
-            message_parameters={"connect_url": "2", "master_url": "1"},
+            errorClass="CANNOT_CONFIGURE_SPARK_CONNECT_MASTER",
+            messageParameters={"connect_url": "2", "master_url": "1"},
         )
 
         try:
@@ -447,8 +443,8 @@ def test_master_remote_conflicts(self):
 
             self.check_error(
                 exception=pe2.exception,
-                error_class="CANNOT_CONFIGURE_SPARK_CONNECT",
-                message_parameters={
+                errorClass="CANNOT_CONFIGURE_SPARK_CONNECT",
+                messageParameters={
                     "new_url": "1",
                     "existing_url": "2",
                 },
@@ -464,8 +460,8 @@ def test_invalid_create(self):
 
         self.check_error(
             exception=pe2.exception,
-            error_class="UNSUPPORTED_LOCAL_CONNECTION_STRING",
-            message_parameters={},
+            errorClass="UNSUPPORTED_LOCAL_CONNECTION_STRING",
+            messageParameters={},
         )
 
 
@@ -494,8 +490,8 @@ def test_show_invalid_type(self):
             self.profile.show(type="invalid")
         self.check_error(
             exception=e.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "type",
                 "allowed_values": str(["perf", "memory"]),
             },
@@ -521,8 +517,8 @@ def test_dump_invalid_type(self):
             self.profile.dump("path/to/dump", type="invalid")
         self.check_error(
             exception=e.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "type",
                 "allowed_values": str(["perf", "memory"]),
             },
@@ -548,8 +544,8 @@ def test_clear_invalid_type(self):
             self.profile.clear(type="invalid")
         self.check_error(
             exception=e.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "type",
                 "allowed_values": str(["perf", "memory"]),
             },
diff --git a/python/pyspark/sql/tests/test_sql.py b/python/pyspark/sql/tests/test_sql.py
new file mode 100644
index 0000000000000..bf50bbc11ac33
--- /dev/null
+++ b/python/pyspark/sql/tests/test_sql.py
@@ -0,0 +1,185 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.sql import Row
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class SQLTestsMixin:
+    def test_simple(self):
+        res = self.spark.sql("SELECT 1 + 1").collect()
+        self.assertEqual(len(res), 1)
+        self.assertEqual(res[0][0], 2)
+
+    def test_args_dict(self):
+        with self.tempView("test"):
+            self.spark.range(10).createOrReplaceTempView("test")
+            df = self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:table_name)",
+                args={"table_name": "test"},
+            )
+
+            self.assertEqual(df.count(), 10)
+            self.assertEqual(df.limit(5).count(), 5)
+            self.assertEqual(df.offset(5).count(), 5)
+
+            self.assertEqual(df.take(1), [Row(id=0)])
+            self.assertEqual(df.tail(1), [Row(id=9)])
+
+    def test_args_list(self):
+        with self.tempView("test"):
+            self.spark.range(10).createOrReplaceTempView("test")
+            df = self.spark.sql(
+                "SELECT * FROM test WHERE ? < id AND id < ?",
+                args=[1, 6],
+            )
+
+            self.assertEqual(df.count(), 4)
+            self.assertEqual(df.limit(3).count(), 3)
+            self.assertEqual(df.offset(3).count(), 1)
+
+            self.assertEqual(df.take(1), [Row(id=2)])
+            self.assertEqual(df.tail(1), [Row(id=5)])
+
+    def test_kwargs_literal(self):
+        with self.tempView("test"):
+            self.spark.range(10).createOrReplaceTempView("test")
+
+            df = self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:table_name) WHERE {m1} < id AND id < {m2} OR id = {m3}",
+                args={"table_name": "test"},
+                m1=3,
+                m2=7,
+                m3=9,
+            )
+
+            self.assertEqual(df.count(), 4)
+            self.assertEqual(df.collect(), [Row(id=4), Row(id=5), Row(id=6), Row(id=9)])
+            self.assertEqual(df.take(1), [Row(id=4)])
+            self.assertEqual(df.tail(1), [Row(id=9)])
+
+    def test_kwargs_literal_multiple_ref(self):
+        with self.tempView("test"):
+            self.spark.range(10).createOrReplaceTempView("test")
+
+            df = self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:table_name) WHERE {m} = id OR id > {m} OR {m} < 0",
+                args={"table_name": "test"},
+                m=6,
+            )
+
+            self.assertEqual(df.count(), 4)
+            self.assertEqual(df.collect(), [Row(id=6), Row(id=7), Row(id=8), Row(id=9)])
+            self.assertEqual(df.take(1), [Row(id=6)])
+            self.assertEqual(df.tail(1), [Row(id=9)])
+
+    def test_kwargs_dataframe(self):
+        df0 = self.spark.range(10)
+        df1 = self.spark.sql(
+            "SELECT * FROM {df} WHERE id > 4",
+            df=df0,
+        )
+
+        self.assertEqual(df0.schema, df1.schema)
+        self.assertEqual(df1.count(), 5)
+        self.assertEqual(df1.take(1), [Row(id=5)])
+        self.assertEqual(df1.tail(1), [Row(id=9)])
+
+    def test_kwargs_dataframe_with_column(self):
+        df0 = self.spark.range(10)
+        df1 = self.spark.sql(
+            "SELECT * FROM {df} WHERE {df.id} > :m1 AND {df[id]} < :m2",
+            {"m1": 4, "m2": 9},
+            df=df0,
+        )
+
+        self.assertEqual(df0.schema, df1.schema)
+        self.assertEqual(df1.count(), 4)
+        self.assertEqual(df1.take(1), [Row(id=5)])
+        self.assertEqual(df1.tail(1), [Row(id=8)])
+
+    def test_nested_view(self):
+        with self.tempView("v1", "v2", "v3", "v4"):
+            self.spark.range(10).createOrReplaceTempView("v1")
+            self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:view) WHERE id > :m",
+                args={"view": "v1", "m": 1},
+            ).createOrReplaceTempView("v2")
+            self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:view) WHERE id > :m",
+                args={"view": "v2", "m": 2},
+            ).createOrReplaceTempView("v3")
+            self.spark.sql(
+                "SELECT * FROM IDENTIFIER(:view) WHERE id > :m",
+                args={"view": "v3", "m": 3},
+            ).createOrReplaceTempView("v4")
+
+            df = self.spark.sql("select * from v4")
+            self.assertEqual(df.count(), 6)
+            self.assertEqual(df.take(1), [Row(id=4)])
+            self.assertEqual(df.tail(1), [Row(id=9)])
+
+    def test_nested_dataframe(self):
+        df0 = self.spark.range(10)
+        df1 = self.spark.sql(
+            "SELECT * FROM {df} WHERE id > ?",
+            args=[1],
+            df=df0,
+        )
+        df2 = self.spark.sql(
+            "SELECT * FROM {df} WHERE id > ?",
+            args=[2],
+            df=df1,
+        )
+        df3 = self.spark.sql(
+            "SELECT * FROM {df} WHERE id > ?",
+            args=[3],
+            df=df2,
+        )
+
+        self.assertEqual(df0.schema, df1.schema)
+        self.assertEqual(df1.count(), 8)
+        self.assertEqual(df1.take(1), [Row(id=2)])
+        self.assertEqual(df1.tail(1), [Row(id=9)])
+
+        self.assertEqual(df0.schema, df2.schema)
+        self.assertEqual(df2.count(), 7)
+        self.assertEqual(df2.take(1), [Row(id=3)])
+        self.assertEqual(df2.tail(1), [Row(id=9)])
+
+        self.assertEqual(df0.schema, df3.schema)
+        self.assertEqual(df3.count(), 6)
+        self.assertEqual(df3.take(1), [Row(id=4)])
+        self.assertEqual(df3.tail(1), [Row(id=9)])
+
+
+class SQLTests(SQLTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_sql import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_stat.py b/python/pyspark/sql/tests/test_stat.py
index 4e0199aff0eff..804e2484536f8 100644
--- a/python/pyspark/sql/tests/test_stat.py
+++ b/python/pyspark/sql/tests/test_stat.py
@@ -120,8 +120,8 @@ def test_dropna(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_STR_OR_TUPLE",
-            message_parameters={"arg_name": "subset", "arg_type": "int"},
+            errorClass="NOT_LIST_OR_STR_OR_TUPLE",
+            messageParameters={"arg_name": "subset", "arg_type": "int"},
         )
 
     def test_fillna(self):
@@ -200,8 +200,8 @@ def test_fillna(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
-            message_parameters={"arg_name": "value", "arg_type": "list"},
+            errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_STR",
+            messageParameters={"arg_name": "value", "arg_type": "list"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -209,8 +209,8 @@ def test_fillna(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_TUPLE",
-            message_parameters={"arg_name": "subset", "arg_type": "int"},
+            errorClass="NOT_LIST_OR_TUPLE",
+            messageParameters={"arg_name": "subset", "arg_type": "int"},
         )
 
     def test_replace(self):
@@ -399,8 +399,8 @@ def test_replace(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="ARGUMENT_REQUIRED",
-            message_parameters={"arg_name": "value", "condition": "`to_replace` is dict"},
+            errorClass="ARGUMENT_REQUIRED",
+            messageParameters={"arg_name": "value", "condition": "`to_replace` is dict"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -408,8 +408,8 @@ def test_replace(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
-            message_parameters={"arg_name": "to_replace", "arg_type": "function"},
+            errorClass="NOT_BOOL_OR_DICT_OR_FLOAT_OR_INT_OR_LIST_OR_STR_OR_TUPLE",
+            messageParameters={"arg_name": "to_replace", "arg_type": "function"},
         )
 
     def test_unpivot(self):
diff --git a/python/pyspark/sql/tests/test_subquery.py b/python/pyspark/sql/tests/test_subquery.py
new file mode 100644
index 0000000000000..7cc0360c39421
--- /dev/null
+++ b/python/pyspark/sql/tests/test_subquery.py
@@ -0,0 +1,501 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import unittest
+
+from pyspark.errors import AnalysisException, QueryContextType, SparkRuntimeException
+from pyspark.sql import functions as sf
+from pyspark.testing import assertDataFrameEqual
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class SubqueryTestsMixin:
+    @property
+    def df1(self):
+        return self.spark.createDataFrame(
+            [
+                (1, 1.0),
+                (1, 2.0),
+                (2, 1.0),
+                (2, 2.0),
+                (3, 3.0),
+                (None, None),
+                (None, 5.0),
+                (6, None),
+            ],
+            ["a", "b"],
+        )
+
+    @property
+    def df2(self):
+        return self.spark.createDataFrame(
+            [(2, 3.0), (2, 3.0), (3, 2.0), (4, 1.0), (None, None), (None, 5.0), (6, None)],
+            ["c", "d"],
+        )
+
+    def test_noop_outer(self):
+        assertDataFrameEqual(
+            self.spark.range(1).select(sf.col("id").outer()),
+            self.spark.range(1).select(sf.col("id")),
+        )
+
+        with self.assertRaises(AnalysisException) as pe:
+            self.spark.range(1).select(sf.col("outer_col").outer()).collect()
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            messageParameters={"objectName": "`outer_col`", "proposal": "`id`"},
+            query_context_type=QueryContextType.DataFrame,
+            fragment="col",
+        )
+
+    def test_simple_uncorrelated_scalar_subquery(self):
+        assertDataFrameEqual(
+            self.spark.range(1).select(self.spark.range(1).select(sf.lit(1)).scalar().alias("b")),
+            self.spark.sql("""select (select 1 as b) as b"""),
+        )
+
+        assertDataFrameEqual(
+            self.spark.range(1).select(
+                self.spark.range(1)
+                .select(self.spark.range(1).select(sf.lit(1)).scalar() + 1)
+                .scalar()
+                + 1
+            ),
+            self.spark.sql("""select (select (select 1) + 1) + 1"""),
+        )
+
+        # string type
+        assertDataFrameEqual(
+            self.spark.range(1).select(self.spark.range(1).select(sf.lit("s")).scalar().alias("b")),
+            self.spark.sql("""select (select 's' as s) as b"""),
+        )
+
+        # 0 rows
+        assertDataFrameEqual(
+            self.spark.range(1).select(
+                self.spark.range(1).select(sf.lit("s")).limit(0).scalar().alias("b")
+            ),
+            self.spark.sql("""select (select 's' as s limit 0) as b"""),
+        )
+
+    def test_uncorrelated_scalar_subquery_with_view(self):
+        with self.tempView("subqueryData"):
+            df = self.spark.createDataFrame(
+                [(1, "one"), (2, "two"), (3, "three")], ["key", "value"]
+            )
+            df.createOrReplaceTempView("subqueryData")
+
+            assertDataFrameEqual(
+                self.spark.range(1).select(
+                    self.spark.table("subqueryData")
+                    .select("key")
+                    .where(sf.col("key") > 2)
+                    .orderBy("key")
+                    .limit(1)
+                    .scalar()
+                    + 1
+                ),
+                self.spark.sql(
+                    """
+                    select (select key from subqueryData where key > 2 order by key limit 1) + 1
+                    """
+                ),
+            )
+
+            assertDataFrameEqual(
+                self.spark.range(1).select(
+                    (-self.spark.table("subqueryData").select(sf.max("key")).scalar()).alias(
+                        "negative_max_key"
+                    )
+                ),
+                self.spark.sql(
+                    """select -(select max(key) from subqueryData) as negative_max_key"""
+                ),
+            )
+
+            assertDataFrameEqual(
+                self.spark.range(1).select(
+                    self.spark.table("subqueryData").select("value").limit(0).scalar()
+                ),
+                self.spark.sql("""select (select value from subqueryData limit 0)"""),
+            )
+
+            assertDataFrameEqual(
+                self.spark.range(1).select(
+                    self.spark.table("subqueryData")
+                    .where(
+                        sf.col("key")
+                        == self.spark.table("subqueryData").select(sf.max("key")).scalar() - 1
+                    )
+                    .select(sf.min("value"))
+                    .scalar()
+                ),
+                self.spark.sql(
+                    """
+                    select (
+                        select min(value) from subqueryData
+                        where key = (select max(key) from subqueryData) - 1
+                    )
+                    """
+                ),
+            )
+
+    def test_scalar_subquery_against_local_relations(self):
+        with self.tempView("t1", "t2"):
+            self.spark.createDataFrame([(1, 1), (2, 2)], ["c1", "c2"]).createOrReplaceTempView("t1")
+            self.spark.createDataFrame([(1, 1), (2, 2)], ["c1", "c2"]).createOrReplaceTempView("t2")
+
+            assertDataFrameEqual(
+                self.spark.table("t1").select(
+                    self.spark.range(1).select(sf.lit(1).alias("col")).scalar()
+                ),
+                self.spark.sql("""SELECT (select 1 as col) from t1"""),
+            )
+
+            assertDataFrameEqual(
+                self.spark.table("t1").select(self.spark.table("t2").select(sf.max("c1")).scalar()),
+                self.spark.sql("""SELECT (select max(c1) from t2) from t1"""),
+            )
+
+            assertDataFrameEqual(
+                self.spark.table("t1").select(
+                    sf.lit(1) + self.spark.range(1).select(sf.lit(1).alias("col")).scalar()
+                ),
+                self.spark.sql("""SELECT 1 + (select 1 as col) from t1"""),
+            )
+
+            assertDataFrameEqual(
+                self.spark.table("t1").select(
+                    "c1", self.spark.table("t2").select(sf.max("c1")).scalar() + sf.col("c2")
+                ),
+                self.spark.sql("""SELECT c1, (select max(c1) from t2) + c2 from t1"""),
+            )
+
+            assertDataFrameEqual(
+                self.spark.table("t1").select(
+                    "c1",
+                    (
+                        self.spark.table("t2")
+                        .where(sf.col("t1.c2").outer() == sf.col("t2.c2"))
+                        .select(sf.max("c1"))
+                        .scalar()
+                    ),
+                ),
+                self.spark.sql(
+                    """SELECT c1, (select max(c1) from t2 where t1.c2 = t2.c2) from t1"""
+                ),
+            )
+
+    def test_correlated_scalar_subquery(self):
+        with self.tempView("l", "r"):
+            self.df1.createOrReplaceTempView("l")
+            self.df2.createOrReplaceTempView("r")
+
+            with self.subTest("in where"):
+                for cond in [
+                    sf.col("a").outer() == sf.col("c"),
+                    (sf.col("a") == sf.col("c")).outer(),
+                    sf.expr("a = c").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(
+                                sf.col("b")
+                                < self.spark.table("r").where(cond).select(sf.max("d")).scalar()
+                            ),
+                            self.spark.sql(
+                                """select * from l where b < (select max(d) from r where a = c)"""
+                            ),
+                        )
+
+            with self.subTest("in select"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
+                for cond in [
+                    sf.col("t1.a") == sf.col("t2.a").outer(),
+                    (sf.col("t1.a") == sf.col("t2.a")).outer(),
+                    sf.expr("t1.a = t2.a").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            df1.select(
+                                "a",
+                                df2.where(cond).select(sf.sum("b")).scalar().alias("sum_b"),
+                            ),
+                            self.spark.sql(
+                                """
+                                select
+                                    a, (select sum(b) from l t2 where t2.a = t1.a) sum_b
+                                from l t1
+                                """
+                            ),
+                        )
+
+            with self.subTest("without .outer()"):
+                assertDataFrameEqual(
+                    self.spark.table("l").select(
+                        "a",
+                        (
+                            self.spark.table("r")
+                            .where(sf.col("b") == sf.col("a").outer())
+                            .select(sf.sum("d"))
+                            .scalar()
+                            .alias("sum_d")
+                        ),
+                    ),
+                    self.spark.sql(
+                        """select a, (select sum(d) from r where b = l.a) sum_d from l"""
+                    ),
+                )
+
+            with self.subTest("in select (null safe)"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
+                assertDataFrameEqual(
+                    df1.select(
+                        "a",
+                        (
+                            df2.where(sf.col("t2.a").eqNullSafe(sf.col("t1.a").outer()))
+                            .select(sf.sum("b"))
+                            .scalar()
+                            .alias("sum_b")
+                        ),
+                    ),
+                    self.spark.sql(
+                        """
+                        select a, (select sum(b) from l l2 where l2.a <=> l1.a) sum_b from l l1
+                        """
+                    ),
+                )
+
+            with self.subTest("in aggregate"):
+                assertDataFrameEqual(
+                    self.spark.table("l")
+                    .groupBy(
+                        "a",
+                        (
+                            self.spark.table("r")
+                            .where(sf.col("a").outer() == sf.col("c"))
+                            .select(sf.sum("d"))
+                            .scalar()
+                            .alias("sum_d")
+                        ),
+                    )
+                    .agg({}),
+                    self.spark.sql(
+                        """
+                        select a, (select sum(d) from r where a = c) sum_d from l l1 group by 1, 2
+                        """
+                    ),
+                )
+
+            with self.subTest("non-aggregated"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
+                with self.assertRaises(SparkRuntimeException) as pe:
+                    df1.select(
+                        "a",
+                        df2.where(sf.col("t1.a") == sf.col("t2.a").outer()).select("b").scalar(),
+                    ).collect()
+
+                self.check_error(
+                    exception=pe.exception,
+                    errorClass="SCALAR_SUBQUERY_TOO_MANY_ROWS",
+                    messageParameters={},
+                )
+
+            with self.subTest("non-equal"):
+                df1 = self.spark.table("l").alias("t1")
+                df2 = self.spark.table("l").alias("t2")
+
+                assertDataFrameEqual(
+                    df1.select(
+                        "a",
+                        (
+                            df2.where(sf.col("t2.a") < sf.col("t1.a").outer())
+                            .select(sf.sum("b"))
+                            .scalar()
+                            .alias("sum_b")
+                        ),
+                    ),
+                    self.spark.sql(
+                        """select a, (select sum(b) from l t2 where t2.a < t1.a) sum_b from l t1"""
+                    ),
+                )
+
+            with self.subTest("disjunctive"):
+                assertDataFrameEqual(
+                    self.spark.table("l")
+                    .where(
+                        self.spark.table("r")
+                        .where(
+                            ((sf.col("a").outer() == sf.col("c")) & (sf.col("d") == sf.lit(2.0)))
+                            | ((sf.col("a").outer() == sf.col("c")) & (sf.col("d") == sf.lit(1.0)))
+                        )
+                        .select(sf.count(sf.lit(1)))
+                        .scalar()
+                        > 0
+                    )
+                    .select("a"),
+                    self.spark.sql(
+                        """
+                        select a
+                        from   l
+                        where  (select count(*)
+                                from   r
+                                where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0
+                        """
+                    ),
+                )
+
+    def test_exists_subquery(self):
+        with self.tempView("l", "r"):
+            self.df1.createOrReplaceTempView("l")
+            self.df2.createOrReplaceTempView("r")
+
+            with self.subTest("EXISTS"):
+                for cond in [
+                    sf.col("a").outer() == sf.col("c"),
+                    (sf.col("a") == sf.col("c")).outer(),
+                    sf.expr("a = c").outer(),
+                ]:
+                    with self.subTest(cond=cond):
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(self.spark.table("r").where(cond).exists()),
+                            self.spark.sql(
+                                """select * from l where exists (select * from r where l.a = r.c)"""
+                            ),
+                        )
+
+                        assertDataFrameEqual(
+                            self.spark.table("l").where(
+                                self.spark.table("r").where(cond).exists()
+                                & (sf.col("a") <= sf.lit(2))
+                            ),
+                            self.spark.sql(
+                                """
+                        select * from l where exists (select * from r where l.a = r.c) and l.a <= 2
+                        """
+                            ),
+                        )
+
+            with self.subTest("NOT EXISTS"):
+                assertDataFrameEqual(
+                    self.spark.table("l").where(
+                        ~self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
+                    ),
+                    self.spark.sql(
+                        """select * from l where not exists (select * from r where l.a = r.c)"""
+                    ),
+                )
+
+                assertDataFrameEqual(
+                    self.spark.table("l").where(
+                        ~(
+                            self.spark.table("r")
+                            .where(
+                                (sf.col("a").outer() == sf.col("c"))
+                                & (sf.col("b").outer() < sf.col("d"))
+                            )
+                            .exists()
+                        )
+                    ),
+                    self.spark.sql(
+                        """
+                        select * from l
+                            where not exists (select * from r where l.a = r.c and l.b < r.d)
+                        """
+                    ),
+                )
+
+            with self.subTest("EXISTS within OR"):
+                assertDataFrameEqual(
+                    self.spark.table("l").where(
+                        self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
+                        | self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
+                    ),
+                    self.spark.sql(
+                        """
+                        select * from l where exists (select * from r where l.a = r.c)
+                            or exists (select * from r where l.a = r.c)
+                        """
+                    ),
+                )
+
+                assertDataFrameEqual(
+                    self.spark.table("l").where(
+                        self.spark.table("r")
+                        .where(
+                            (sf.col("a").outer() == sf.col("c"))
+                            & (sf.col("b").outer() < sf.col("d"))
+                        )
+                        .exists()
+                        | self.spark.table("r").where(sf.col("a").outer() == sf.col("c")).exists()
+                    ),
+                    self.spark.sql(
+                        """
+                        select * from l where exists (select * from r where l.a = r.c and l.b < r.d)
+                            or exists (select * from r where l.a = r.c)
+                        """
+                    ),
+                )
+
+    def test_scalar_subquery_with_outer_reference_errors(self):
+        with self.tempView("l", "r"):
+            self.df1.createOrReplaceTempView("l")
+            self.df2.createOrReplaceTempView("r")
+
+            with self.subTest("missing `outer()`"):
+                with self.assertRaises(AnalysisException) as pe:
+                    self.spark.table("l").select(
+                        "a",
+                        (
+                            self.spark.table("r")
+                            .where(sf.col("c") == sf.col("a"))
+                            .select(sf.sum("d"))
+                            .scalar()
+                        ),
+                    ).collect()
+
+                self.check_error(
+                    exception=pe.exception,
+                    errorClass="UNRESOLVED_COLUMN.WITH_SUGGESTION",
+                    messageParameters={"objectName": "`a`", "proposal": "`c`, `d`"},
+                    query_context_type=QueryContextType.DataFrame,
+                    fragment="col",
+                )
+
+
+class SubqueryTests(SubqueryTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_subquery import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_tvf.py b/python/pyspark/sql/tests/test_tvf.py
new file mode 100644
index 0000000000000..5c709437fc4db
--- /dev/null
+++ b/python/pyspark/sql/tests/test_tvf.py
@@ -0,0 +1,307 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import unittest
+
+from pyspark.errors import PySparkValueError
+from pyspark.sql import functions as sf
+from pyspark.testing import assertDataFrameEqual
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+
+
+class TVFTestsMixin:
+    def test_explode(self):
+        actual = self.spark.tvf.explode(sf.array(sf.lit(1), sf.lit(2)))
+        expected = self.spark.sql("""SELECT * FROM explode(array(1, 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode(
+            sf.create_map(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2))
+        )
+        expected = self.spark.sql("""SELECT * FROM explode(map('a', 1, 'b', 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.explode(sf.array())
+        expected = self.spark.sql("""SELECT * FROM explode(array())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode(sf.create_map())
+        expected = self.spark.sql("""SELECT * FROM explode(map())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.explode(sf.lit(None).astype("array<int>"))
+        expected = self.spark.sql("""SELECT * FROM explode(null :: array<int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode(sf.lit(None).astype("map<string, int>"))
+        expected = self.spark.sql("""SELECT * FROM explode(null :: map<string, int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_explode_outer(self):
+        actual = self.spark.tvf.explode_outer(sf.array(sf.lit(1), sf.lit(2)))
+        expected = self.spark.sql("""SELECT * FROM explode_outer(array(1, 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode_outer(
+            sf.create_map(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2))
+        )
+        expected = self.spark.sql("""SELECT * FROM explode_outer(map('a', 1, 'b', 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.explode_outer(sf.array())
+        expected = self.spark.sql("""SELECT * FROM explode_outer(array())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode_outer(sf.create_map())
+        expected = self.spark.sql("""SELECT * FROM explode_outer(map())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.explode_outer(sf.lit(None).astype("array<int>"))
+        expected = self.spark.sql("""SELECT * FROM explode_outer(null :: array<int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.explode_outer(sf.lit(None).astype("map<string, int>"))
+        expected = self.spark.sql("""SELECT * FROM explode_outer(null :: map<string, int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_inline(self):
+        actual = self.spark.tvf.inline(
+            sf.array(sf.struct(sf.lit(1), sf.lit("a")), sf.struct(sf.lit(2), sf.lit("b")))
+        )
+        expected = self.spark.sql("""SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b')))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.inline(sf.array().astype("array<struct<a:int,b:int>>"))
+        expected = self.spark.sql("""SELECT * FROM inline(array() :: array<struct<a:int,b:int>>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.inline(
+            sf.array(
+                sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+                sf.lit(None),
+                sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4)),
+            )
+        )
+        expected = self.spark.sql(
+            """
+            SELECT * FROM
+              inline(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))
+            """
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_inline_outer(self):
+        actual = self.spark.tvf.inline_outer(
+            sf.array(sf.struct(sf.lit(1), sf.lit("a")), sf.struct(sf.lit(2), sf.lit("b")))
+        )
+        expected = self.spark.sql(
+            """SELECT * FROM inline_outer(array(struct(1, 'a'), struct(2, 'b')))"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.inline_outer(sf.array().astype("array<struct<a:int,b:int>>"))
+        expected = self.spark.sql(
+            """SELECT * FROM inline_outer(array() :: array<struct<a:int,b:int>>)"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.inline_outer(
+            sf.array(
+                sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+                sf.lit(None),
+                sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4)),
+            )
+        )
+        expected = self.spark.sql(
+            """
+            SELECT * FROM
+              inline_outer(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))
+            """
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_json_tuple(self):
+        actual = self.spark.tvf.json_tuple(sf.lit('{"a":1, "b":2}'), sf.lit("a"), sf.lit("b"))
+        expected = self.spark.sql("""SELECT json_tuple('{"a":1, "b":2}', 'a', 'b')""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        with self.assertRaises(PySparkValueError) as pe:
+            self.spark.tvf.json_tuple(sf.lit('{"a":1, "b":2}'))
+
+        self.check_error(
+            exception=pe.exception,
+            errorClass="CANNOT_BE_EMPTY",
+            messageParameters={"item": "field"},
+        )
+
+    def test_posexplode(self):
+        actual = self.spark.tvf.posexplode(sf.array(sf.lit(1), sf.lit(2)))
+        expected = self.spark.sql("""SELECT * FROM posexplode(array(1, 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode(
+            sf.create_map(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2))
+        )
+        expected = self.spark.sql("""SELECT * FROM posexplode(map('a', 1, 'b', 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.posexplode(sf.array())
+        expected = self.spark.sql("""SELECT * FROM posexplode(array())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode(sf.create_map())
+        expected = self.spark.sql("""SELECT * FROM posexplode(map())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.posexplode(sf.lit(None).astype("array<int>"))
+        expected = self.spark.sql("""SELECT * FROM posexplode(null :: array<int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode(sf.lit(None).astype("map<string, int>"))
+        expected = self.spark.sql("""SELECT * FROM posexplode(null :: map<string, int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_posexplode_outer(self):
+        actual = self.spark.tvf.posexplode_outer(sf.array(sf.lit(1), sf.lit(2)))
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(array(1, 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode_outer(
+            sf.create_map(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2))
+        )
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(map('a', 1, 'b', 2))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.posexplode_outer(sf.array())
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(array())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode_outer(sf.create_map())
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(map())""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.posexplode_outer(sf.lit(None).astype("array<int>"))
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(null :: array<int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.posexplode_outer(sf.lit(None).astype("map<string, int>"))
+        expected = self.spark.sql("""SELECT * FROM posexplode_outer(null :: map<string, int>)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_stack(self):
+        actual = self.spark.tvf.stack(sf.lit(2), sf.lit(1), sf.lit(2), sf.lit(3))
+        expected = self.spark.sql("""SELECT * FROM stack(2, 1, 2, 3)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_collations(self):
+        actual = self.spark.tvf.collations()
+        expected = self.spark.sql("""SELECT * FROM collations()""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_sql_keywords(self):
+        actual = self.spark.tvf.sql_keywords()
+        expected = self.spark.sql("""SELECT * FROM sql_keywords()""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_variant_explode(self):
+        actual = self.spark.tvf.variant_explode(sf.parse_json(sf.lit('["hello", "world"]')))
+        expected = self.spark.sql(
+            """SELECT * FROM variant_explode(parse_json('["hello", "world"]'))"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.variant_explode(sf.parse_json(sf.lit('{"a": true, "b": 3.14}')))
+        expected = self.spark.sql(
+            """SELECT * FROM variant_explode(parse_json('{"a": true, "b": 3.14}'))"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.variant_explode(sf.parse_json(sf.lit("[]")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode(parse_json('[]'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.variant_explode(sf.parse_json(sf.lit("{}")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode(parse_json('{}'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.variant_explode(sf.lit(None).astype("variant"))
+        expected = self.spark.sql("""SELECT * FROM variant_explode(null :: variant)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # not a variant object/array
+        actual = self.spark.tvf.variant_explode(sf.parse_json(sf.lit("1")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode(parse_json('1'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+    def test_variant_explode_outer(self):
+        actual = self.spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('["hello", "world"]')))
+        expected = self.spark.sql(
+            """SELECT * FROM variant_explode_outer(parse_json('["hello", "world"]'))"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.variant_explode_outer(
+            sf.parse_json(sf.lit('{"a": true, "b": 3.14}'))
+        )
+        expected = self.spark.sql(
+            """SELECT * FROM variant_explode_outer(parse_json('{"a": true, "b": 3.14}'))"""
+        )
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # empty
+        actual = self.spark.tvf.variant_explode_outer(sf.parse_json(sf.lit("[]")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode_outer(parse_json('[]'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        actual = self.spark.tvf.variant_explode_outer(sf.parse_json(sf.lit("{}")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode_outer(parse_json('{}'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # null
+        actual = self.spark.tvf.variant_explode_outer(sf.lit(None).astype("variant"))
+        expected = self.spark.sql("""SELECT * FROM variant_explode_outer(null :: variant)""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+        # not a variant object/array
+        actual = self.spark.tvf.variant_explode_outer(sf.parse_json(sf.lit("1")))
+        expected = self.spark.sql("""SELECT * FROM variant_explode_outer(parse_json('1'))""")
+        assertDataFrameEqual(actual=actual, expected=expected)
+
+
+class TVFTests(TVFTestsMixin, ReusedSQLTestCase):
+    pass
+
+
+if __name__ == "__main__":
+    from pyspark.sql.tests.test_tvf import *  # noqa: F401
+
+    try:
+        import xmlrunner  # type: ignore
+
+        testRunner = xmlrunner.XMLTestRunner(output="target/test-reports", verbosity=2)
+    except ImportError:
+        testRunner = None
+    unittest.main(testRunner=testRunner, verbosity=2)
diff --git a/python/pyspark/sql/tests/test_types.py b/python/pyspark/sql/tests/test_types.py
index 0d0550e421616..dcc383b7add5a 100644
--- a/python/pyspark/sql/tests/test_types.py
+++ b/python/pyspark/sql/tests/test_types.py
@@ -134,8 +134,8 @@ def test_infer_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_LIST_OR_NONE_OR_STRUCT",
-            message_parameters={"arg_name": "schema", "arg_type": "int"},
+            errorClass="NOT_LIST_OR_NONE_OR_STRUCT",
+            messageParameters={"arg_name": "schema", "arg_type": "int"},
         )
 
         df = self.spark.createDataFrame(rdd)
@@ -613,6 +613,12 @@ def test_convert_list_to_str(self):
         self.assertEqual(df.count(), 1)
         self.assertEqual(df.head(), Row(name="[123]", income=120))
 
+    def test_string_type_simple_string(self):
+        self.assertEqual(StringType().simpleString(), "string")
+        self.assertEqual(StringType("UTF8_BINARY").simpleString(), "string")
+        self.assertEqual(StringType("UTF8_LCASE").simpleString(), "string collate UTF8_LCASE")
+        self.assertEqual(StringType("UNICODE").simpleString(), "string collate UNICODE")
+
     def test_schema_with_collations_json_ser_de(self):
         from pyspark.sql.types import _parse_datatype_json_string
 
@@ -818,6 +824,38 @@ def test_schema_with_collations_on_non_string_types(self):
             PySparkTypeError, lambda: _parse_datatype_json_string(collations_in_nested_map_json)
         )
 
+    def test_array_type_from_json(self):
+        arrayWithoutCollations = ArrayType(StringType(), True)
+        arrayWithCollations = ArrayType(StringType("UNICODE"), True)
+        array_json = {"type": "array", "elementType": "string", "containsNull": True}
+        collationsMap = {"element": "UNICODE"}
+
+        self.assertEqual(arrayWithoutCollations, ArrayType.fromJson(array_json))
+        self.assertEqual(
+            arrayWithCollations,
+            ArrayType.fromJson(array_json, fieldPath="", collationsMap=collationsMap),
+        )
+        self.assertEqual(
+            arrayWithCollations, ArrayType.fromJson(array_json, collationsMap=collationsMap)
+        )
+
+    def test_map_type_from_json(self):
+        mapWithoutCollations = MapType(StringType(), StringType(), True)
+        mapWithCollations = MapType(StringType("UNICODE"), StringType("UNICODE"), True)
+        map_json = {
+            "type": "map",
+            "keyType": "string",
+            "valueType": "string",
+            "valueContainsNull": True,
+        }
+        collationsMap = {"key": "UNICODE", "value": "UNICODE"}
+
+        self.assertEqual(mapWithoutCollations, MapType.fromJson(map_json))
+        self.assertEqual(
+            mapWithCollations, MapType.fromJson(map_json, fieldPath="", collationsMap=collationsMap)
+        )
+        self.assertEqual(mapWithCollations, MapType.fromJson(map_json, collationsMap=collationsMap))
+
     def test_schema_with_bad_collations_provider(self):
         from pyspark.sql.types import _parse_datatype_json_string, _COLLATIONS_METADATA_KEY
 
@@ -1602,8 +1640,8 @@ def test_merge_type(self):
             _merge_type(ArrayType(LongType()), ArrayType(DoubleType()))
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": "LongType", "data_type2": "DoubleType"},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": "LongType", "data_type2": "DoubleType"},
         )
 
         self.assertEqual(
@@ -1620,8 +1658,8 @@ def test_merge_type(self):
             _merge_type(MapType(StringType(), LongType()), MapType(StringType(), DoubleType()))
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": "LongType", "data_type2": "DoubleType"},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": "LongType", "data_type2": "DoubleType"},
         )
 
         self.assertEqual(
@@ -1638,8 +1676,8 @@ def test_merge_type(self):
             )
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": "LongType", "data_type2": "DoubleType"},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": "LongType", "data_type2": "DoubleType"},
         )
 
         self.assertEqual(
@@ -1679,8 +1717,8 @@ def test_merge_type(self):
             )
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": "LongType", "data_type2": "DoubleType"},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": "LongType", "data_type2": "DoubleType"},
         )
 
         self.assertEqual(
@@ -1722,8 +1760,8 @@ def test_merge_type(self):
             )
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": "LongType", "data_type2": "DoubleType"},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": "LongType", "data_type2": "DoubleType"},
         )
 
         self.assertEqual(
@@ -1833,8 +1871,8 @@ def assertCollectSuccess(typecode, value):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="CANNOT_INFER_TYPE_FOR_FIELD",
-                message_parameters={"field_name": "myarray"},
+                errorClass="CANNOT_INFER_TYPE_FOR_FIELD",
+                messageParameters={"field_name": "myarray"},
             )
 
     def test_repr(self):
@@ -1886,8 +1924,8 @@ def test_daytime_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "None", "end_field": "3"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "None", "end_field": "3"},
         )
 
         with self.assertRaises(PySparkRuntimeError) as pe:
@@ -1895,8 +1933,8 @@ def test_daytime_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "123", "end_field": "123"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "123", "end_field": "123"},
         )
 
         with self.assertRaises(PySparkRuntimeError) as pe:
@@ -1904,8 +1942,8 @@ def test_daytime_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "0", "end_field": "321"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "0", "end_field": "321"},
         )
 
     def test_daytime_interval_type(self):
@@ -1973,8 +2011,8 @@ def test_yearmonth_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "None", "end_field": "3"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "None", "end_field": "3"},
         )
 
         with self.assertRaises(PySparkRuntimeError) as pe:
@@ -1982,8 +2020,8 @@ def test_yearmonth_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "123", "end_field": "123"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "123", "end_field": "123"},
         )
 
         with self.assertRaises(PySparkRuntimeError) as pe:
@@ -1991,8 +2029,8 @@ def test_yearmonth_interval_type_constructor(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_INTERVAL_CASTING",
-            message_parameters={"start_field": "0", "end_field": "321"},
+            errorClass="INVALID_INTERVAL_CASTING",
+            messageParameters={"start_field": "0", "end_field": "321"},
         )
 
     def test_yearmonth_interval_type(self):
@@ -2239,6 +2277,7 @@ def schema_from_udf(ddl):
             ("struct<>", True),
             ("struct<a: string, b: array<long>>", True),
             ("", True),
+            ("a: int, b: variant", True),
             ("<a: int, b: variant>", False),
             ("randomstring", False),
             ("struct", False),
@@ -2271,7 +2310,7 @@ def test_collated_string(self):
                 StringType("UTF8_LCASE"),
             )
 
-    def test_infer_array_element_type_with_struct(self):
+    def test_infer_nested_array_element_type_with_struct(self):
         # SPARK-48248: Nested array to respect legacy conf of inferArrayTypeFromFirstElement
         with self.sql_conf(
             {"spark.sql.pyspark.legacy.inferArrayTypeFromFirstElement.enabled": True}
@@ -2377,8 +2416,8 @@ def test_verify_type_exception_msg(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="FIELD_NOT_NULLABLE_WITH_NAME",
-            message_parameters={
+            errorClass="FIELD_NOT_NULLABLE_WITH_NAME",
+            messageParameters={
                 "field_name": "test_name",
             },
         )
@@ -2389,8 +2428,8 @@ def test_verify_type_exception_msg(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
-            message_parameters={
+            errorClass="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
+            messageParameters={
                 "data_type": "IntegerType()",
                 "field_name": "field b in field a",
                 "obj": "'data'",
diff --git a/python/pyspark/sql/tests/test_udf.py b/python/pyspark/sql/tests/test_udf.py
index 32a7517b735b3..78aa2546128a1 100644
--- a/python/pyspark/sql/tests/test_udf.py
+++ b/python/pyspark/sql/tests/test_udf.py
@@ -128,8 +128,8 @@ def check_udf_registration_return_type_not_none(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
-            message_parameters={"arg_name": "f", "return_type": "StringType()"},
+            errorClass="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
+            messageParameters={"arg_name": "f", "return_type": "StringType()"},
         )
 
     def test_nondeterministic_udf(self):
@@ -237,11 +237,12 @@ def test_udf_in_join_condition(self):
         f = udf(lambda a, b: a == b, BooleanType())
         # The udf uses attributes from both sides of join, so it is pulled out as Filter +
         # Cross join.
-        df = left.join(right, f("a", "b"))
         with self.sql_conf({"spark.sql.crossJoin.enabled": False}):
+            df = left.join(right, f("a", "b"))
             with self.assertRaisesRegex(AnalysisException, "Detected implicit cartesian product"):
                 df.collect()
         with self.sql_conf({"spark.sql.crossJoin.enabled": True}):
+            df = left.join(right, f("a", "b"))
             self.assertEqual(df.collect(), [Row(a=1, b=1)])
 
     def test_udf_in_left_outer_join_condition(self):
@@ -334,67 +335,125 @@ def test_udf_with_filter_function(self):
 
     def test_udf_with_variant_input(self):
         df = self.spark.range(0, 10).selectExpr("parse_json(cast(id as string)) v")
-
         u = udf(lambda u: str(u), StringType())
-        with self.assertRaises(AnalysisException) as ae:
-            df.select(u(col("v"))).collect()
-
-        self.check_error(
-            exception=ae.exception,
-            error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-            message_parameters={"sqlExpr": '"<lambda>(v)"', "dataType": "VARIANT"},
-        )
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+        result = df.select(u(col("v")).alias("udf")).collect()
+        self.assertEqual(result, expected)
 
     def test_udf_with_complex_variant_input(self):
+        # struct<variant>
         df = self.spark.range(0, 10).selectExpr(
             "named_struct('v', parse_json(cast(id as string))) struct_of_v"
         )
-
-        u = udf(lambda u: str(u), StringType())
-
-        with self.assertRaises(AnalysisException) as ae:
-            df.select(u(col("struct_of_v"))).collect()
-
-        self.check_error(
-            exception=ae.exception,
-            error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-            message_parameters={
-                "sqlExpr": '"<lambda>(struct_of_v)"',
-                "dataType": "STRUCT<v: VARIANT NOT NULL>",
-            },
-        )
+        u = udf(lambda u: str(u["v"]), StringType())
+        result = df.select(u(col("struct_of_v"))).collect()
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+        self.assertEqual(result, expected)
+
+        # array<variant>
+        df = self.spark.range(0, 10).selectExpr("array(parse_json(cast(id as string))) array_of_v")
+        u = udf(lambda u: str(u[0]), StringType())
+        result = df.select(u(col("array_of_v"))).collect()
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+        self.assertEqual(result, expected)
+
+        # map<string, variant>
+        df = self.spark.range(0, 10).selectExpr("map('v', parse_json(cast(id as string))) map_of_v")
+        u = udf(lambda u: str(u["v"]), StringType())
+        result = df.select(u(col("map_of_v"))).collect()
+        expected = [Row(udf="{0}".format(i)) for i in range(10)]
+        self.assertEqual(result, expected)
 
     def test_udf_with_variant_output(self):
-        # The variant value returned corresponds to a JSON string of {"a": "b"}.
+        # The variant value returned corresponds to a JSON string of {"a": "<a-j>"}.
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
         u = udf(
-            lambda: VariantVal(bytes([2, 1, 0, 0, 2, 5, 98]), bytes([1, 1, 0, 1, 97])),
+            lambda i: VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97])),
             VariantType(),
         )
+        result = self.spark.range(0, 10).select(u(col("id")).cast("string").alias("udf")).collect()
+        expected = [Row(udf=f'{{"a":"{chr(97 + i)}"}}') for i in range(10)]
+        self.assertEqual(result, expected)
 
-        with self.assertRaises(AnalysisException) as ae:
-            self.spark.range(0, 10).select(u()).collect()
+    def test_udf_with_complex_variant_output(self):
+        # The variant value returned corresponds to a JSON string of {"a": "<a-j>"}.
+        # struct<variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        u = udf(
+            lambda i: {"v": VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))},
+            StructType([StructField("v", VariantType(), True)]),
+        )
+        result = self.spark.range(0, 10).select(u(col("id")).cast("string").alias("udf")).collect()
+        expected = [Row(udf=f'{{{{"a":"{chr(97 + i)}"}}}}') for i in range(10)]
+        self.assertEqual(result, expected)
 
-        self.check_error(
-            exception=ae.exception,
-            error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-            message_parameters={"sqlExpr": '"<lambda>()"', "dataType": "VARIANT"},
+        # array<variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        u = udf(
+            lambda i: [VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))],
+            ArrayType(VariantType()),
         )
+        result = self.spark.range(0, 10).select(u(col("id")).cast("string").alias("udf")).collect()
+        expected = [Row(udf=f'[{{"a":"{chr(97 + i)}"}}]') for i in range(10)]
+        self.assertEqual(result, expected)
 
-    def test_udf_with_complex_variant_output(self):
-        # The variant value returned corresponds to a JSON string of {"a": "b"}.
+        # map<string, variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
         u = udf(
-            lambda: {"v", VariantVal(bytes([2, 1, 0, 0, 2, 5, 98]), bytes([1, 1, 0, 1, 97]))},
+            lambda i: {"v": VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))},
             MapType(StringType(), VariantType()),
         )
-
-        with self.assertRaises(AnalysisException) as ae:
-            self.spark.range(0, 10).select(u()).collect()
-
-        self.check_error(
-            exception=ae.exception,
-            error_class="DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-            message_parameters={"sqlExpr": '"<lambda>()"', "dataType": "MAP<STRING, VARIANT>"},
+        result = self.spark.range(0, 10).select(u(col("id")).cast("string").alias("udf")).collect()
+        expected = [Row(udf=f'{{v -> {{"a":"{chr(97 + i)}"}}}}') for i in range(10)]
+        self.assertEqual(result, expected)
+
+    def test_chained_udfs_with_variant(self):
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        udf_first = udf(
+            lambda i: VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97])),
+            VariantType(),
+        )
+        udf_second = udf(lambda u: str(u), StringType())
+        result = (
+            self.spark.range(0, 10)
+            .select(udf_second(udf_first(col("id"))).cast("string").alias("udf"))
+            .collect()
+        )
+        expected = [Row(udf=f'{{"a":"{chr(97 + i)}"}}') for i in range(10)]
+        self.assertEqual(result, expected)
+
+        # struct<variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        u_first = udf(
+            lambda i: {"v": VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))},
+            StructType([StructField("v", VariantType(), True)]),
+        )
+        u_second = udf(lambda u: str(u["v"]), StringType())
+        result = self.spark.range(0, 10).select(u_second(u_first(col("id"))).alias("udf")).collect()
+        expected = [Row(udf=f'{{"a":"{chr(97 + i)}"}}') for i in range(10)]
+        self.assertEqual(result, expected)
+
+        # array<variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        u_first = udf(
+            lambda i: [VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))],
+            ArrayType(VariantType()),
+        )
+        u_second = udf(lambda u: str(u[0]), StringType())
+        result = self.spark.range(0, 10).select(u_second(u_first(col("id"))).alias("udf")).collect()
+        expected = [Row(udf=f'{{"a":"{chr(97 + i)}"}}') for i in range(10)]
+        self.assertEqual(result, expected)
+
+        # map<string, variant>
+        # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+        u_first = udf(
+            lambda i: {"v": VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))},
+            ArrayType(VariantType()),
         )
+        u_second = udf(lambda u: str(u["v"]), StringType())
+        result = self.spark.range(0, 10).select(u_second(u_first(col("id"))).alias("udf")).collect()
+        expected = [Row(udf=f'{{"a":"{chr(97 + i)}"}}') for i in range(10)]
+        self.assertEqual(result, expected)
 
     def test_udf_with_aggregate_function(self):
         df = self.spark.createDataFrame([(1, "1"), (2, "2"), (1, "2"), (1, "2")], ["key", "value"])
@@ -516,8 +575,8 @@ def check_err_udf_registration(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_CALLABLE",
-            message_parameters={"arg_name": "func", "arg_type": "str"},
+            errorClass="NOT_CALLABLE",
+            messageParameters={"arg_name": "func", "arg_type": "str"},
         )
 
     def test_non_existed_udf(self):
@@ -1141,8 +1200,8 @@ def check_err_udf_init(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_CALLABLE",
-            message_parameters={"arg_name": "func", "arg_type": "str"},
+            errorClass="NOT_CALLABLE",
+            messageParameters={"arg_name": "func", "arg_type": "str"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -1150,8 +1209,8 @@ def check_err_udf_init(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_DATATYPE_OR_STR",
-            message_parameters={"arg_name": "returnType", "arg_type": "int"},
+            errorClass="NOT_DATATYPE_OR_STR",
+            messageParameters={"arg_name": "returnType", "arg_type": "int"},
         )
 
         with self.assertRaises(PySparkTypeError) as pe:
@@ -1159,8 +1218,8 @@ def check_err_udf_init(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="NOT_INT",
-            message_parameters={"arg_name": "evalType", "arg_type": "str"},
+            errorClass="NOT_INT",
+            messageParameters={"arg_name": "evalType", "arg_type": "str"},
         )
 
 
diff --git a/python/pyspark/sql/tests/test_udf_profiler.py b/python/pyspark/sql/tests/test_udf_profiler.py
index bb8c0765153c9..7c741bce51f77 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -31,21 +31,15 @@
 from pyspark.sql.functions import col, pandas_udf, udf
 from pyspark.sql.window import Window
 from pyspark.profiler import UDFBasicProfiler
-from pyspark.testing.sqlutils import (
-    ReusedSQLTestCase,
+from pyspark.testing.sqlutils import ReusedSQLTestCase
+from pyspark.testing.utils import (
     have_pandas,
     have_pyarrow,
+    have_flameprof,
     pandas_requirement_message,
     pyarrow_requirement_message,
 )
 
-try:
-    import flameprof  # noqa: F401
-
-    has_flameprof = True
-except ImportError:
-    has_flameprof = False
-
 
 def _do_computation(spark, *, action=lambda df: df.collect(), use_arrow=False):
     @udf("long", useArrow=use_arrow)
@@ -208,7 +202,7 @@ def test_perf_profiler_udf(self):
                 )
                 self.assertTrue(f"udf_{id}_perf.pstats" in os.listdir(d))
 
-                if has_flameprof:
+                if have_flameprof:
                     self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -230,7 +224,7 @@ def test_perf_profiler_udf_with_arrow(self):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_udf_multiple_actions(self):
@@ -252,7 +246,7 @@ def action(df):
                 io.getvalue(), f"20.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_udf_registered(self):
@@ -276,7 +270,7 @@ def add1(x):
                 io.getvalue(), f"10.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -309,7 +303,7 @@ def add2(x):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -345,7 +339,7 @@ def add2(iter: Iterator[pd.Series]) -> Iterator[pd.Series]:
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -395,7 +389,7 @@ def mean_udf(v: pd.Series) -> float:
                 io.getvalue(), f"5.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -427,7 +421,7 @@ def min_udf(v: pd.Series) -> float:
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -458,7 +452,7 @@ def normalize(pdf):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -496,7 +490,7 @@ def asof_join(left, right):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -530,7 +524,7 @@ def normalize(table):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     @unittest.skipIf(
@@ -562,7 +556,7 @@ def summarize(left, right):
                 io.getvalue(), f"2.*{os.path.basename(inspect.getfile(_do_computation))}"
             )
 
-            if has_flameprof:
+            if have_flameprof:
                 self.assertIn("svg", self.spark.profile.render(id))
 
     def test_perf_profiler_render(self):
@@ -572,7 +566,7 @@ def test_perf_profiler_render(self):
 
         id = list(self.profile_results.keys())[0]
 
-        if has_flameprof:
+        if have_flameprof:
             self.assertIn("svg", self.spark.profile.render(id))
             self.assertIn("svg", self.spark.profile.render(id, type="perf"))
             self.assertIn("svg", self.spark.profile.render(id, renderer="flameprof"))
@@ -582,8 +576,8 @@ def test_perf_profiler_render(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "type",
                 "allowed_values": "['perf', 'memory']",
             },
@@ -594,8 +588,8 @@ def test_perf_profiler_render(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "(type, renderer)",
                 "allowed_values": "[('perf', None), ('perf', 'flameprof')]",
             },
@@ -606,8 +600,8 @@ def test_perf_profiler_render(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="VALUE_NOT_ALLOWED",
-            message_parameters={
+            errorClass="VALUE_NOT_ALLOWED",
+            messageParameters={
                 "arg_name": "(type, renderer)",
                 "allowed_values": "[('perf', None), ('perf', 'flameprof')]",
             },
diff --git a/python/pyspark/sql/tests/test_udtf.py b/python/pyspark/sql/tests/test_udtf.py
index 74a2a40a46314..206cfd7dc4885 100644
--- a/python/pyspark/sql/tests/test_udtf.py
+++ b/python/pyspark/sql/tests/test_udtf.py
@@ -55,6 +55,7 @@
     StringType,
     StructField,
     StructType,
+    VariantVal,
 )
 from pyspark.testing import assertDataFrameEqual, assertSchemaEqual
 from pyspark.testing.sqlutils import (
@@ -73,8 +74,7 @@ def eval(self):
                 yield "hello", "world"
 
         func = udtf(TestUDTF, returnType="c1: string, c2: string")
-        rows = func().collect()
-        self.assertEqual(rows, [Row(c1="hello", c2="world")])
+        assertDataFrameEqual(func(), [Row(c1="hello", c2="world")])
 
     def test_udtf_yield_single_row_col(self):
         class TestUDTF:
@@ -82,8 +82,7 @@ def eval(self, a: int):
                 yield a,
 
         func = udtf(TestUDTF, returnType="a: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1)])
 
     def test_udtf_yield_multi_cols(self):
         class TestUDTF:
@@ -91,8 +90,7 @@ def eval(self, a: int):
                 yield a, a + 1
 
         func = udtf(TestUDTF, returnType="a: int, b: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1, b=2)])
 
     def test_udtf_yield_multi_rows(self):
         class TestUDTF:
@@ -101,8 +99,7 @@ def eval(self, a: int):
                 yield a + 1,
 
         func = udtf(TestUDTF, returnType="a: int")
-        rows = func(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1), Row(a=2)])
+        assertDataFrameEqual(func(lit(1)), [Row(a=1), Row(a=2)])
 
     def test_udtf_yield_multi_row_col(self):
         class TestUDTF:
@@ -112,8 +109,8 @@ def eval(self, a: int, b: int):
                 yield a, b, b - a
 
         func = udtf(TestUDTF, returnType="a: int, b: int, c: int")
-        rows = func(lit(1), lit(2)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
+        res = func(lit(1), lit(2))
+        assertDataFrameEqual(res, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
 
     def test_udtf_decorator(self):
         @udtf(returnType="a: int, b: int")
@@ -121,8 +118,7 @@ class TestUDTF:
             def eval(self, a: int):
                 yield a, a + 1
 
-        rows = TestUDTF(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2)])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(a=1, b=2)])
 
     def test_udtf_registration(self):
         class TestUDTF:
@@ -134,9 +130,7 @@ def eval(self, a: int, b: int):
         func = udtf(TestUDTF, returnType="a: int, b: int, c: int")
         self.spark.udtf.register("testUDTF", func)
         df = self.spark.sql("SELECT * FROM testUDTF(1, 2)")
-        self.assertEqual(
-            df.collect(), [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)]
-        )
+        assertDataFrameEqual(df, [Row(a=1, b=2, c=3), Row(a=1, b=2, c=-1), Row(a=1, b=2, c=1)])
 
     def test_udtf_with_lateral_join(self):
         class TestUDTF:
@@ -149,10 +143,17 @@ def eval(self, a: int, b: int) -> Iterator:
         df = self.spark.sql(
             "SELECT f.* FROM values (0, 1), (1, 2) t(a, b), LATERAL testUDTF(a, b) f"
         )
+        schema = StructType(
+            [
+                StructField("a", IntegerType(), True),
+                StructField("b", IntegerType(), True),
+                StructField("c", IntegerType(), True),
+            ]
+        )
         expected = self.spark.createDataFrame(
-            [(0, 1, 1), (0, 1, -1), (1, 2, 3), (1, 2, -1)], schema=["a", "b", "c"]
+            [(0, 1, 1), (0, 1, -1), (1, 2, 3), (1, 2, -1)], schema=schema
         )
-        self.assertEqual(df.collect(), expected.collect())
+        assertDataFrameEqual(df, expected)
 
     def test_udtf_eval_with_return_stmt(self):
         class TestUDTF:
@@ -160,8 +161,8 @@ def eval(self, a: int, b: int):
                 return [(a, a + 1), (b, b + 1)]
 
         func = udtf(TestUDTF, returnType="a: int, b: int")
-        rows = func(lit(1), lit(2)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2), Row(a=2, b=3)])
+        res = func(lit(1), lit(2))
+        assertDataFrameEqual(res, [Row(a=1, b=2), Row(a=2, b=3)])
 
     def test_udtf_eval_returning_non_tuple(self):
         @udtf(returnType="a: int")
@@ -216,14 +217,14 @@ class TestUDTF:
             def eval(self, a: int):
                 ...
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [])
+        assertDataFrameEqual(TestUDTF(lit(1)), [])
 
         @udtf(returnType="a: int")
         class TestUDTF:
             def eval(self, a: int):
                 return
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [])
+        assertDataFrameEqual(TestUDTF(lit(1)), [])
 
     def test_udtf_with_conditional_return(self):
         class TestUDTF:
@@ -233,8 +234,8 @@ def eval(self, a: int):
 
         func = udtf(TestUDTF, returnType="a: int")
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql("SELECT * FROM range(0, 8) JOIN LATERAL test_udtf(id)").collect(),
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM range(0, 8) JOIN LATERAL test_udtf(id)"),
             [Row(id=6, a=6), Row(id=7, a=7)],
         )
 
@@ -253,9 +254,9 @@ def eval(self, a: int):
                 yield a,
                 yield None,
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(a=1), Row(a=None)])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(a=1), Row(a=None)])
         df = self.spark.createDataFrame([(0, 1), (1, 2)], schema=["a", "b"])
-        self.assertEqual(TestUDTF(lit(1)).join(df, "a", "inner").collect(), [Row(a=1, b=2)])
+        assertDataFrameEqual(TestUDTF(lit(1)).join(df, "a", "inner"), [Row(a=1, b=2)])
         assertDataFrameEqual(
             TestUDTF(lit(1)).join(df, "a", "left"), [Row(a=None, b=None), Row(a=1, b=2)]
         )
@@ -266,10 +267,10 @@ class TestUDTF:
             def eval(self, a: int):
                 yield a,
 
-        self.assertEqual(TestUDTF(lit(None)).collect(), [Row(a=None)])
+        assertDataFrameEqual(TestUDTF(lit(None)), [Row(a=None)])
         self.spark.udtf.register("testUDTF", TestUDTF)
         df = self.spark.sql("SELECT * FROM testUDTF(null)")
-        self.assertEqual(df.collect(), [Row(a=None)])
+        assertDataFrameEqual(df, [Row(a=None)])
 
     # These are expected error message substrings to be used in test cases below.
     tooManyPositionalArguments = "too many positional arguments"
@@ -365,8 +366,8 @@ def __init__(self):
             def eval(self, a: int):
                 yield a, a + 1, self.key
 
-        rows = TestUDTF(lit(1)).collect()
-        self.assertEqual(rows, [Row(a=1, b=2, c="test")])
+        res = TestUDTF(lit(1))
+        assertDataFrameEqual(res, [Row(a=1, b=2, c="test")])
 
     def test_udtf_terminate(self):
         @udtf(returnType="key: string, value: float")
@@ -384,8 +385,8 @@ def terminate(self):
                 yield "count", float(self._count)
                 yield "avg", self._sum / self._count
 
-        self.assertEqual(
-            TestUDTF(lit(1)).collect(),
+        assertDataFrameEqual(
+            TestUDTF(lit(1)),
             [Row(key="input", value=1), Row(key="count", value=1.0), Row(key="avg", value=1.0)],
         )
 
@@ -394,8 +395,8 @@ def terminate(self):
             "SELECT id, key, value FROM range(0, 10, 1, 2), "
             "LATERAL test_udtf(id) WHERE key != 'input'"
         )
-        self.assertEqual(
-            df.collect(),
+        assertDataFrameEqual(
+            df,
             [
                 Row(id=4, key="count", value=5.0),
                 Row(id=4, key="avg", value=2.0),
@@ -579,8 +580,8 @@ def eval(self, a: int):
 
         self.check_error(
             exception=e.exception,
-            error_class="UDTF_RETURN_TYPE_MISMATCH",
-            message_parameters={"name": "TestUDTF", "return_type": "IntegerType()"},
+            errorClass="UDTF_RETURN_TYPE_MISMATCH",
+            messageParameters={"name": "TestUDTF", "return_type": "IntegerType()"},
         )
 
         @udtf(returnType=MapType(StringType(), IntegerType()))
@@ -593,8 +594,8 @@ def eval(self, a: int):
 
         self.check_error(
             exception=e.exception,
-            error_class="UDTF_RETURN_TYPE_MISMATCH",
-            message_parameters={
+            errorClass="UDTF_RETURN_TYPE_MISMATCH",
+            messageParameters={
                 "name": "TestUDTF",
                 "return_type": "MapType(StringType(), IntegerType(), True)",
             },
@@ -607,10 +608,8 @@ def eval(self, person):
                 yield f"{person.name}: {person.age}",
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        self.assertEqual(
-            self.spark.sql(
-                "select * from test_udtf(named_struct('name', 'Alice', 'age', 1))"
-            ).collect(),
+        assertDataFrameEqual(
+            self.spark.sql("select * from test_udtf(named_struct('name', 'Alice', 'age', 1))"),
             [Row(x="Alice: 1")],
         )
 
@@ -633,8 +632,8 @@ def eval(self, m):
                 yield str(m),
 
         self.spark.udtf.register("test_udtf", TestUDTF)
-        self.assertEqual(
-            self.spark.sql("select * from test_udtf(map('key', 'value'))").collect(),
+        assertDataFrameEqual(
+            self.spark.sql("select * from test_udtf(map('key', 'value'))"),
             [Row(x="{'key': 'value'}")],
         )
 
@@ -644,7 +643,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield {"a": x, "b": x + 1},
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x=Row(a=1, b=2))])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x=Row(a=1, b=2))])
 
     def test_udtf_with_array_output_types(self):
         @udtf(returnType="x: array<int>")
@@ -652,7 +651,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield [x, x + 1, x + 2],
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x=[1, 2, 3])])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x=[1, 2, 3])])
 
     def test_udtf_with_map_output_types(self):
         @udtf(returnType="x: map<int,string>")
@@ -660,7 +659,7 @@ class TestUDTF:
             def eval(self, x: int):
                 yield {x: str(x)},
 
-        self.assertEqual(TestUDTF(lit(1)).collect(), [Row(x={1: "1"})])
+        assertDataFrameEqual(TestUDTF(lit(1)), [Row(x={1: "1"})])
 
     def test_udtf_with_empty_output_types(self):
         @udtf(returnType=StructType())
@@ -935,8 +934,8 @@ def upper(s: str):
 
         self.check_error(
             exception=e.exception,
-            error_class="CANNOT_REGISTER_UDTF",
-            message_parameters={
+            errorClass="CANNOT_REGISTER_UDTF",
+            messageParameters={
                 "name": "test_udf",
             },
         )
@@ -949,8 +948,8 @@ class TestUDTF:
 
         self.check_error(
             exception=e.exception,
-            error_class="CANNOT_REGISTER_UDTF",
-            message_parameters={
+            errorClass="CANNOT_REGISTER_UDTF",
+            messageParameters={
                 "name": "test_udtf",
             },
         )
@@ -991,8 +990,8 @@ def run(self, a: int):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_NO_EVAL",
-            message_parameters={"name": "TestUDTF"},
+            errorClass="INVALID_UDTF_NO_EVAL",
+            messageParameters={"name": "TestUDTF"},
         )
 
     def test_udtf_with_no_handler_class(self):
@@ -1004,8 +1003,8 @@ def test_udtf(a: int):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_HANDLER_TYPE",
-            message_parameters={"type": "function"},
+            errorClass="INVALID_UDTF_HANDLER_TYPE",
+            messageParameters={"type": "function"},
         )
 
         with self.assertRaises(PySparkTypeError) as e:
@@ -1013,22 +1012,26 @@ def test_udtf(a: int):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_HANDLER_TYPE",
-            message_parameters={"type": "int"},
+            errorClass="INVALID_UDTF_HANDLER_TYPE",
+            messageParameters={"type": "int"},
         )
 
     def test_udtf_with_table_argument_query(self):
+        func = self.udtf_for_table_argument()
+        self.spark.udtf.register("test_udtf", func)
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)))"),
+            [Row(a=6), Row(a=7)],
+        )
+
+    def udtf_for_table_argument(self):
         class TestUDTF:
             def eval(self, row: Row):
                 if row["id"] > 5:
                     yield row["id"],
 
         func = udtf(TestUDTF, returnType="a: int")
-        self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT id FROM range(0, 8)))").collect(),
-            [Row(a=6), Row(a=7)],
-        )
+        return func
 
     def test_udtf_with_int_and_table_argument_query(self):
         class TestUDTF:
@@ -1038,26 +1041,19 @@ def eval(self, i: int, row: Row):
 
         func = udtf(TestUDTF, returnType="a: int")
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
-            self.spark.sql(
-                "SELECT * FROM test_udtf(5, TABLE (SELECT id FROM range(0, 8)))"
-            ).collect(),
+        assertDataFrameEqual(
+            self.spark.sql("SELECT * FROM test_udtf(5, TABLE (SELECT id FROM range(0, 8)))"),
             [Row(a=6), Row(a=7)],
         )
 
     def test_udtf_with_table_argument_identifier(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)")
-            self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect(),
+            assertDataFrameEqual(
+                self.spark.sql("SELECT * FROM test_udtf(TABLE (v))"),
                 [Row(a=6), Row(a=7)],
             )
 
@@ -1072,44 +1068,29 @@ def eval(self, i: int, row: Row):
 
         with self.tempView("v"):
             self.spark.sql("CREATE OR REPLACE TEMPORARY VIEW v as SELECT id FROM range(0, 8)")
-            self.assertEqual(
-                self.spark.sql("SELECT * FROM test_udtf(5, TABLE (v))").collect(),
+            assertDataFrameEqual(
+                self.spark.sql("SELECT * FROM test_udtf(5, TABLE (v))"),
                 [Row(a=6), Row(a=7)],
             )
 
     def test_udtf_with_table_argument_unknown_identifier(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
             self.spark.sql("SELECT * FROM test_udtf(TABLE (v))").collect()
 
     def test_udtf_with_table_argument_malformed_query(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
 
         with self.assertRaisesRegex(AnalysisException, "TABLE_OR_VIEW_NOT_FOUND"):
             self.spark.sql("SELECT * FROM test_udtf(TABLE (SELECT * FROM v))").collect()
 
     def test_udtf_with_table_argument_cte_inside(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 SELECT * FROM test_udtf(TABLE (
@@ -1119,19 +1100,14 @@ def eval(self, row: Row):
                   SELECT * FROM t
                 ))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
     def test_udtf_with_table_argument_cte_outside(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 WITH t AS (
@@ -1139,11 +1115,11 @@ def eval(self, row: Row):
                 )
                 SELECT * FROM test_udtf(TABLE (SELECT id FROM t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 WITH t AS (
@@ -1151,28 +1127,23 @@ def eval(self, row: Row):
                 )
                 SELECT * FROM test_udtf(TABLE (t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
     # TODO(SPARK-44233): Fix the subquery resolution.
     @unittest.skip("Fails to resolve the subquery.")
     def test_udtf_with_table_argument_lateral_join(self):
-        class TestUDTF:
-            def eval(self, row: Row):
-                if row["id"] > 5:
-                    yield row["id"],
-
-        func = udtf(TestUDTF, returnType="a: int")
+        func = self.udtf_for_table_argument()
         self.spark.udtf.register("test_udtf", func)
-        self.assertEqual(
+        assertDataFrameEqual(
             self.spark.sql(
                 """
                 SELECT * FROM
                   range(0, 8) AS t,
                   LATERAL test_udtf(TABLE (t))
                 """
-            ).collect(),
+            ),
             [Row(a=6), Row(a=7)],
         )
 
@@ -1197,8 +1168,8 @@ def eval(self, a: Row, b: Row):
                 self.spark.sql(query).collect()
 
         with self.sql_conf({"spark.sql.tvf.allowMultipleTableArguments.enabled": True}):
-            self.assertEqual(
-                self.spark.sql(query).collect(),
+            assertDataFrameEqual(
+                self.spark.sql(query),
                 [
                     Row(a=0, b=0),
                     Row(a=1, b=0),
@@ -1541,8 +1512,8 @@ def eval(self):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
-            message_parameters={"name": "TestUDTF"},
+            errorClass="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
+            messageParameters={"name": "TestUDTF"},
         )
 
     def test_udtf_with_neither_return_type_nor_analyze(self):
@@ -1555,8 +1526,8 @@ def eval(self):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_RETURN_TYPE",
-            message_parameters={"name": "TestUDTF"},
+            errorClass="INVALID_UDTF_RETURN_TYPE",
+            messageParameters={"name": "TestUDTF"},
         )
 
     def test_udtf_with_analyze_non_staticmethod(self):
@@ -1572,8 +1543,8 @@ def eval(self):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_RETURN_TYPE",
-            message_parameters={"name": "TestUDTF"},
+            errorClass="INVALID_UDTF_RETURN_TYPE",
+            messageParameters={"name": "TestUDTF"},
         )
 
         with self.assertRaises(PySparkAttributeError) as e:
@@ -1581,8 +1552,8 @@ def eval(self):
 
         self.check_error(
             exception=e.exception,
-            error_class="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
-            message_parameters={"name": "TestUDTF"},
+            errorClass="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
+            messageParameters={"name": "TestUDTF"},
         )
 
     def test_udtf_with_analyze_returning_non_struct(self):
@@ -2530,6 +2501,112 @@ def terminate(self):
             [Row(current=4, total=4), Row(current=13, total=4), Row(current=20, total=1)],
         )
 
+    def test_udtf_with_variant_input(self):
+        @udtf(returnType="i int, s: string")
+        class TestUDTF:
+            def eval(self, v):
+                for i in range(10):
+                    yield i, v.toJson()
+
+        self.spark.udtf.register("test_udtf", TestUDTF)
+        assertDataFrameEqual(
+            self.spark.sql('select i, s from test_udtf(parse_json(\'{"a":"b"}\'))'),
+            [Row(i=n, s='{"a":"b"}') for n in range(10)],
+        )
+
+    def test_udtf_with_nested_variant_input(self):
+        # struct<variant>
+        @udtf(returnType="i int, s: string")
+        class TestUDTFStruct:
+            def eval(self, v):
+                for i in range(10):
+                    yield i, v["v"].toJson()
+
+        self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
+        res = self.spark.sql(
+            "select i, s from test_udtf_struct(named_struct('v', parse_json('{\"a\":\"c\"}')))"
+        )
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"c"}') for n in range(10)])
+
+        # array<variant>
+        @udtf(returnType="i int, s: string")
+        class TestUDTFArray:
+            def eval(self, v):
+                for i in range(10):
+                    yield i, v[0].toJson()
+
+        self.spark.udtf.register("test_udtf_array", TestUDTFArray)
+        res = self.spark.sql('select i, s from test_udtf_array(array(parse_json(\'{"a":"d"}\')))')
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"d"}') for n in range(10)])
+
+        # map<string, variant>
+        @udtf(returnType="i int, s: string")
+        class TestUDTFMap:
+            def eval(self, v):
+                for i in range(10):
+                    yield i, v["v"].toJson()
+
+        self.spark.udtf.register("test_udtf_map", TestUDTFMap)
+        res = self.spark.sql(
+            "select i, s from test_udtf_map(map('v', parse_json('{\"a\":\"e\"}')))"
+        )
+        assertDataFrameEqual(res, [Row(i=n, s='{"a":"e"}') for n in range(10)])
+
+    def test_udtf_with_variant_output(self):
+        @udtf(returnType="i int, v: variant")
+        class TestUDTF:
+            # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+            def eval(self, n):
+                for i in range(n):
+                    yield i, VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))
+
+        self.spark.udtf.register("test_udtf", TestUDTF)
+        res = self.spark.sql("select i, to_json(v) from test_udtf(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
+
+    def test_udtf_with_nested_variant_output(self):
+        # struct<variant>
+        @udtf(returnType="i int, v: struct<v1 variant>")
+        class TestUDTFStruct:
+            # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+            def eval(self, n):
+                for i in range(n):
+                    yield i, {
+                        "v1": VariantVal(bytes([2, 1, 0, 0, 2, 5, 97 + i]), bytes([1, 1, 0, 1, 97]))
+                    }
+
+        self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
+        res = self.spark.sql("select i, to_json(v.v1) from test_udtf_struct(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(97 + n)}"}}') for n in range(8)])
+
+        # array<variant>
+        @udtf(returnType="i int, v: array<variant>")
+        class TestUDTFArray:
+            # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+            def eval(self, n):
+                for i in range(n):
+                    yield i, [
+                        VariantVal(bytes([2, 1, 0, 0, 2, 5, 98 + i]), bytes([1, 1, 0, 1, 97]))
+                    ]
+
+        self.spark.udtf.register("test_udtf_array", TestUDTFArray)
+        res = self.spark.sql("select i, to_json(v[0]) from test_udtf_array(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(98 + n)}"}}') for n in range(8)])
+
+        # map<string, variant>
+        @udtf(returnType="i int, v: map<string, variant>")
+        class TestUDTFStruct:
+            # TODO(SPARK-50284): Replace when an easy Python API to construct Variants is created.
+            def eval(self, n):
+                for i in range(n):
+                    yield i, {
+                        "v1": VariantVal(bytes([2, 1, 0, 0, 2, 5, 99 + i]), bytes([1, 1, 0, 1, 97]))
+                    }
+
+        self.spark.udtf.register("test_udtf_struct", TestUDTFStruct)
+        res = self.spark.sql("select i, to_json(v['v1']) from test_udtf_struct(8)")
+        assertDataFrameEqual(res, [Row(i=n, s=f'{{"a":"{chr(99 + n)}"}}') for n in range(8)])
+
 
 class UDTFTests(BaseUDTFTestsMixin, ReusedSQLTestCase):
     @classmethod
diff --git a/python/pyspark/sql/tests/test_utils.py b/python/pyspark/sql/tests/test_utils.py
index 3fe3a3f83d7e2..d468776cf4f4b 100644
--- a/python/pyspark/sql/tests/test_utils.py
+++ b/python/pyspark/sql/tests/test_utils.py
@@ -27,6 +27,7 @@
     PySparkValueError,
     IllegalArgumentException,
     SparkUpgradeException,
+    PySparkTypeError,
 )
 from pyspark.testing.utils import assertDataFrameEqual, assertSchemaEqual, _context_diff, have_numpy
 from pyspark.testing.sqlutils import ReusedSQLTestCase
@@ -175,8 +176,8 @@ def test_assert_approx_equal_arraytype_float_default_rtol_fail(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -184,8 +185,8 @@ def test_assert_approx_equal_arraytype_float_default_rtol_fail(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_assert_approx_equal_arraytype_float_custom_rtol_pass(self):
@@ -332,8 +333,8 @@ def test_assert_notequal_arraytype(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         rows_str1 = ""
@@ -358,8 +359,8 @@ def test_assert_notequal_arraytype(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_assert_equal_maptype(self):
@@ -653,8 +654,8 @@ def test_assert_notequal_nullval(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -662,8 +663,8 @@ def test_assert_notequal_nullval(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_assert_equal_nulldf(self):
@@ -688,8 +689,8 @@ def test_assert_unequal_null_actual(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": None,
@@ -701,8 +702,8 @@ def test_assert_unequal_null_actual(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": None,
@@ -724,8 +725,8 @@ def test_assert_unequal_null_expected(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "expected",
                 "actual_type": None,
@@ -737,8 +738,8 @@ def test_assert_unequal_null_expected(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "expected",
                 "actual_type": None,
@@ -803,8 +804,8 @@ def test_assert_approx_equal_fail_exact_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -817,8 +818,8 @@ def test_assert_approx_equal_fail_exact_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -846,8 +847,8 @@ def test_assert_unequal_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -860,8 +861,8 @@ def test_assert_unequal_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -888,8 +889,8 @@ def test_assert_type_error_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -902,8 +903,8 @@ def test_assert_type_error_pandas_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_PANDAS_DATAFRAME",
-            message_parameters={
+            errorClass="DIFFERENT_PANDAS_DATAFRAME",
+            messageParameters={
                 "left": df1.to_string(),
                 "left_dtype": str(df1.dtypes),
                 "right": df2.to_string(),
@@ -953,8 +954,8 @@ def test_assert_error_pandas_pyspark_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": f"{ps.DataFrame.__name__}, "
                 f"{pd.DataFrame.__name__}, "
                 f"{ps.Series.__name__}, "
@@ -971,8 +972,8 @@ def test_assert_error_pandas_pyspark_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": f"{ps.DataFrame.__name__}, "
                 f"{pd.DataFrame.__name__}, "
                 f"{ps.Series.__name__}, "
@@ -993,8 +994,8 @@ def test_assert_error_non_pyspark_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": type(dict1),
@@ -1006,8 +1007,8 @@ def test_assert_error_non_pyspark_df(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": type(dict1),
@@ -1072,8 +1073,8 @@ def test_check_row_order_error(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_remove_non_word_characters_long(self):
@@ -1167,8 +1168,8 @@ def test_assert_pyspark_df_not_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -1176,8 +1177,8 @@ def test_assert_pyspark_df_not_equal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_assert_notequal_schema(self):
@@ -1205,8 +1206,8 @@ def test_assert_notequal_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": expected_error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": expected_error_msg},
         )
 
     def test_diff_schema_lens(self):
@@ -1235,8 +1236,8 @@ def test_diff_schema_lens(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": expected_error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": expected_error_msg},
         )
 
     def test_schema_ignore_nullable(self):
@@ -1285,8 +1286,8 @@ def test_schema_array_unequal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": expected_error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": expected_error_msg},
         )
 
     def test_schema_struct_unequal(self):
@@ -1306,8 +1307,8 @@ def test_schema_struct_unequal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": expected_error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": expected_error_msg},
         )
 
     def test_schema_more_nested_struct_unequal(self):
@@ -1350,21 +1351,21 @@ def test_schema_more_nested_struct_unequal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": expected_error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": expected_error_msg},
         )
 
     def test_schema_unsupported_type(self):
         s1 = "names: int"
         s2 = "names: int"
 
-        with self.assertRaises(PySparkAssertionError) as pe:
+        with self.assertRaises(PySparkTypeError) as pe:
             assertSchemaEqual(s1, s2)
 
         self.check_error(
             exception=pe.exception,
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(s1)},
+            errorClass="NOT_STRUCT",
+            messageParameters={"arg_name": "actual", "arg_type": "str"},
         )
 
     def test_spark_sql(self):
@@ -1522,8 +1523,8 @@ def test_list_rows_unequal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -1531,8 +1532,8 @@ def test_list_rows_unequal(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_list_row_unequal_schema(self):
@@ -1569,8 +1570,8 @@ def test_list_row_unequal_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -1578,8 +1579,8 @@ def test_list_row_unequal_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_list_row_unequal_schema(self):
@@ -1617,8 +1618,8 @@ def test_list_row_unequal_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
         with self.assertRaises(PySparkAssertionError) as pe:
@@ -1626,8 +1627,8 @@ def test_list_row_unequal_schema(self):
 
         self.check_error(
             exception=pe.exception,
-            error_class="DIFFERENT_ROWS",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_ROWS",
+            messageParameters={"error_msg": error_msg},
         )
 
     def test_dataframe_include_diff_rows(self):
diff --git a/python/pyspark/sql/tests/typing/test_functions.yml b/python/pyspark/sql/tests/typing/test_functions.yml
index d699bf01876ff..3f29c0dc17443 100644
--- a/python/pyspark/sql/tests/typing/test_functions.yml
+++ b/python/pyspark/sql/tests/typing/test_functions.yml
@@ -70,32 +70,32 @@
     main:29: error: No overload variant of "array" matches argument types "list[Column]", "list[Column]"  [call-overload]
     main:29: note: Possible overload variants:
     main:29: note:     def array(*cols: Union[Column, str]) -> Column
-    main:29: note:     def [ColumnOrName_] array(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:29: note:     def array(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:30: error: No overload variant of "create_map" matches argument types "list[Column]", "list[Column]"  [call-overload]
     main:30: note: Possible overload variants:
     main:30: note:     def create_map(*cols: Union[Column, str]) -> Column
-    main:30: note:     def [ColumnOrName_] create_map(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:30: note:     def create_map(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:31: error: No overload variant of "map_concat" matches argument types "list[Column]", "list[Column]"  [call-overload]
     main:31: note: Possible overload variants:
     main:31: note:     def map_concat(*cols: Union[Column, str]) -> Column
-    main:31: note:     def [ColumnOrName_] map_concat(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:31: note:     def map_concat(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:32: error: No overload variant of "struct" matches argument types "list[str]", "list[str]"  [call-overload]
     main:32: note: Possible overload variants:
     main:32: note:     def struct(*cols: Union[Column, str]) -> Column
-    main:32: note:     def [ColumnOrName_] struct(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:32: note:     def struct(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:33: error: No overload variant of "array" matches argument types "list[str]", "list[str]"  [call-overload]
     main:33: note: Possible overload variants:
     main:33: note:     def array(*cols: Union[Column, str]) -> Column
-    main:33: note:     def [ColumnOrName_] array(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:33: note:     def array(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:34: error: No overload variant of "create_map" matches argument types "list[str]", "list[str]"  [call-overload]
     main:34: note: Possible overload variants:
     main:34: note:     def create_map(*cols: Union[Column, str]) -> Column
-    main:34: note:     def [ColumnOrName_] create_map(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:34: note:     def create_map(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:35: error: No overload variant of "map_concat" matches argument types "list[str]", "list[str]"  [call-overload]
     main:35: note: Possible overload variants:
     main:35: note:     def map_concat(*cols: Union[Column, str]) -> Column
-    main:35: note:     def [ColumnOrName_] map_concat(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:35: note:     def map_concat(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
     main:36: error: No overload variant of "struct" matches argument types "list[str]", "list[str]"  [call-overload]
     main:36: note: Possible overload variants:
     main:36: note:     def struct(*cols: Union[Column, str]) -> Column
-    main:36: note:     def [ColumnOrName_] struct(Union[list[ColumnOrName_], tuple[ColumnOrName_, ...]], /) -> Column
+    main:36: note:     def struct(Union[Sequence[Union[Column, str]], tuple[Union[Column, str], ...]], /) -> Column
diff --git a/python/pyspark/sql/tvf.py b/python/pyspark/sql/tvf.py
new file mode 100644
index 0000000000000..1d0febf9ba3aa
--- /dev/null
+++ b/python/pyspark/sql/tvf.py
@@ -0,0 +1,712 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from typing import Optional
+
+from pyspark.errors import PySparkValueError
+from pyspark.sql.column import Column
+from pyspark.sql.dataframe import DataFrame
+from pyspark.sql.session import SparkSession
+
+__all__ = ["TableValuedFunction"]
+
+
+class TableValuedFunction:
+    """
+    Interface for invoking table-valued functions in Spark SQL.
+    """
+
+    def __init__(self, sparkSession: SparkSession):
+        self._sparkSession = sparkSession
+
+    def range(
+        self,
+        start: int,
+        end: Optional[int] = None,
+        step: int = 1,
+        numPartitions: Optional[int] = None,
+    ) -> DataFrame:
+        """
+        Create a :class:`DataFrame` with single :class:`pyspark.sql.types.LongType` column named
+        ``id``, containing elements in a range from ``start`` to ``end`` (exclusive) with
+        step value ``step``.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        start : int
+            the start value
+        end : int, optional
+            the end value (exclusive)
+        step : int, optional
+            the incremental step (default: 1)
+        numPartitions : int, optional
+            the number of partitions of the DataFrame
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
+        >>> spark.tvf.range(1, 7, 2).show()
+        +---+
+        | id|
+        +---+
+        |  1|
+        |  3|
+        |  5|
+        +---+
+
+        If only one argument is specified, it will be used as the end value.
+
+        >>> spark.tvf.range(3).show()
+        +---+
+        | id|
+        +---+
+        |  0|
+        |  1|
+        |  2|
+        +---+
+        """
+        return self._sparkSession.range(start, end, step, numPartitions)
+
+    def explode(self, collection: Column) -> DataFrame:
+        """
+        Returns a :class:`DataFrame` containing a new row for each element
+        in the given array or map.
+        Uses the default column name `col` for elements in the array and
+        `key` and `value` for elements in the map unless specified otherwise.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        collection : :class:`~pyspark.sql.Column`
+            Target column to work on.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.explode`
+
+        Examples
+        --------
+        Example 1: Exploding an array column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(sf.array(sf.lit(1), sf.lit(2), sf.lit(3))).show()
+        +---+
+        |col|
+        +---+
+        |  1|
+        |  2|
+        |  3|
+        +---+
+
+        Example 2: Exploding a map column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(
+        ...     sf.create_map(sf.lit("a"), sf.lit("b"), sf.lit("c"), sf.lit("d"))
+        ... ).show()
+        +---+-----+
+        |key|value|
+        +---+-----+
+        |  a|    b|
+        |  c|    d|
+        +---+-----+
+
+        Example 3: Exploding an array of struct column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(sf.array(
+        ...     sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+        ...     sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4))
+        ... )).select("col.*").show()
+        +---+---+
+        |  a|  b|
+        +---+---+
+        |  1|  2|
+        |  3|  4|
+        +---+---+
+
+        Example 4: Exploding an empty array column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(sf.array()).show()
+        +---+
+        |col|
+        +---+
+        +---+
+
+        Example 5: Exploding an empty map column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode(sf.create_map()).show()
+        +---+-----+
+        |key|value|
+        +---+-----+
+        +---+-----+
+        """
+        return self._fn("explode", collection)
+
+    def explode_outer(self, collection: Column) -> DataFrame:
+        """
+        Returns a :class:`DataFrame` containing a new row for each element with position
+        in the given array or map.
+        Unlike explode, if the array/map is null or empty then null is produced.
+        Uses the default column name `col` for elements in the array and
+        `key` and `value` for elements in the map unless specified otherwise.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        collection : :class:`~pyspark.sql.Column`
+            target column to work on.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.explode_outer`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.explode_outer(sf.array(sf.lit("foo"), sf.lit("bar"))).show()
+        +---+
+        |col|
+        +---+
+        |foo|
+        |bar|
+        +---+
+        >>> spark.tvf.explode_outer(sf.array()).show()
+        +----+
+        | col|
+        +----+
+        |NULL|
+        +----+
+        >>> spark.tvf.explode_outer(sf.create_map(sf.lit("x"), sf.lit(1.0))).show()
+        +---+-----+
+        |key|value|
+        +---+-----+
+        |  x|  1.0|
+        +---+-----+
+        >>> spark.tvf.explode_outer(sf.create_map()).show()
+        +----+-----+
+        | key|value|
+        +----+-----+
+        |NULL| NULL|
+        +----+-----+
+        """
+        return self._fn("explode_outer", collection)
+
+    def inline(self, input: Column) -> DataFrame:
+        """
+        Explodes an array of structs into a table.
+
+        This function takes an input column containing an array of structs and returns a
+        new column where each struct in the array is exploded into a separate row.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        input : :class:`~pyspark.sql.Column`
+            Input column of values to explode.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.inline`
+
+        Examples
+        --------
+        Example 1: Using inline with a single struct array
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.inline(sf.array(
+        ...     sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+        ...     sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4))
+        ... )).show()
+        +---+---+
+        |  a|  b|
+        +---+---+
+        |  1|  2|
+        |  3|  4|
+        +---+---+
+
+        Example 2: Using inline with an empty struct array column
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.inline(sf.array().astype("array<struct<a:int,b:int>>")).show()
+        +---+---+
+        |  a|  b|
+        +---+---+
+        +---+---+
+
+        Example 3: Using inline with a struct array column containing null values
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.inline(sf.array(
+        ...     sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+        ...     sf.lit(None),
+        ...     sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4))
+        ... )).show()
+        +----+----+
+        |   a|   b|
+        +----+----+
+        |   1|   2|
+        |NULL|NULL|
+        |   3|   4|
+        +----+----+
+        """
+        return self._fn("inline", input)
+
+    def inline_outer(self, input: Column) -> DataFrame:
+        """
+        Explodes an array of structs into a table.
+        Unlike inline, if the array is null or empty then null is produced for each nested column.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        input : :class:`~pyspark.sql.Column`
+            input column of values to explode.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.inline_outer`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.inline_outer(sf.array(
+        ...     sf.named_struct(sf.lit("a"), sf.lit(1), sf.lit("b"), sf.lit(2)),
+        ...     sf.named_struct(sf.lit("a"), sf.lit(3), sf.lit("b"), sf.lit(4))
+        ... )).show()
+        +---+---+
+        |  a|  b|
+        +---+---+
+        |  1|  2|
+        |  3|  4|
+        +---+---+
+        >>> spark.tvf.inline_outer(sf.array().astype("array<struct<a:int,b:int>>")).show()
+        +----+----+
+        |   a|   b|
+        +----+----+
+        |NULL|NULL|
+        +----+----+
+        """
+        return self._fn("inline_outer", input)
+
+    def json_tuple(self, input: Column, *fields: Column) -> DataFrame:
+        """
+        Creates a new row for a json column according to the given field names.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        input : :class:`~pyspark.sql.Column`
+            string column in json format
+        fields : :class:`~pyspark.sql.Column`
+            a field or fields to extract
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.json_tuple`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.json_tuple(
+        ...     sf.lit('{"f1": "value1", "f2": "value2"}'), sf.lit("f1"), sf.lit("f2")
+        ... ).show()
+        +------+------+
+        |    c0|    c1|
+        +------+------+
+        |value1|value2|
+        +------+------+
+        """
+        from pyspark.sql.classic.column import _to_seq, _to_java_column
+
+        if len(fields) == 0:
+            raise PySparkValueError(
+                errorClass="CANNOT_BE_EMPTY",
+                messageParameters={"item": "field"},
+            )
+
+        sc = self._sparkSession.sparkContext
+        return DataFrame(
+            self._sparkSession._jsparkSession.tvf().json_tuple(
+                _to_java_column(input), _to_seq(sc, fields, _to_java_column)
+            ),
+            self._sparkSession,
+        )
+
+    def posexplode(self, collection: Column) -> DataFrame:
+        """
+        Returns a :class:`DataFrame` containing a new row for each element with position
+        in the given array or map.
+        Uses the default column name `pos` for position, and `col` for elements in the
+        array and `key` and `value` for elements in the map unless specified otherwise.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        collection : :class:`~pyspark.sql.Column`
+            target column to work on.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.posexplode`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.posexplode(sf.array(sf.lit(1), sf.lit(2), sf.lit(3))).show()
+        +---+---+
+        |pos|col|
+        +---+---+
+        |  0|  1|
+        |  1|  2|
+        |  2|  3|
+        +---+---+
+        >>> spark.tvf.posexplode(sf.create_map(sf.lit("a"), sf.lit("b"))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        |  0|  a|    b|
+        +---+---+-----+
+        """
+        return self._fn("posexplode", collection)
+
+    def posexplode_outer(self, collection: Column) -> DataFrame:
+        """
+        Returns a :class:`DataFrame` containing a new row for each element with position
+        in the given array or map.
+        Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
+        Uses the default column name `pos` for position, and `col` for elements in the
+        array and `key` and `value` for elements in the map unless specified otherwise.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        collection : :class:`~pyspark.sql.Column`
+            target column to work on.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.posexplode_outer`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.posexplode_outer(sf.array(sf.lit("foo"), sf.lit("bar"))).show()
+        +---+---+
+        |pos|col|
+        +---+---+
+        |  0|foo|
+        |  1|bar|
+        +---+---+
+        >>> spark.tvf.posexplode_outer(sf.array()).show()
+        +----+----+
+        | pos| col|
+        +----+----+
+        |NULL|NULL|
+        +----+----+
+        >>> spark.tvf.posexplode_outer(sf.create_map(sf.lit("x"), sf.lit(1.0))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        |  0|  x|  1.0|
+        +---+---+-----+
+        >>> spark.tvf.posexplode_outer(sf.create_map()).show()
+        +----+----+-----+
+        | pos| key|value|
+        +----+----+-----+
+        |NULL|NULL| NULL|
+        +----+----+-----+
+        """
+        return self._fn("posexplode_outer", collection)
+
+    def stack(self, n: Column, *fields: Column) -> DataFrame:
+        """
+        Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
+        unless specified otherwise.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        n : :class:`~pyspark.sql.Column`
+            the number of rows to separate
+        fields : :class:`~pyspark.sql.Column`
+            input elements to be separated
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        See Also
+        --------
+        :meth:`pyspark.sql.functions.stack`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.stack(sf.lit(2), sf.lit(1), sf.lit(2), sf.lit(3)).show()
+        +----+----+
+        |col0|col1|
+        +----+----+
+        |   1|   2|
+        |   3|NULL|
+        +----+----+
+        """
+        from pyspark.sql.classic.column import _to_seq, _to_java_column
+
+        sc = self._sparkSession.sparkContext
+        return DataFrame(
+            self._sparkSession._jsparkSession.tvf().stack(
+                _to_java_column(n), _to_seq(sc, fields, _to_java_column)
+            ),
+            self._sparkSession,
+        )
+
+    def collations(self) -> DataFrame:
+        """
+        Get all of the Spark SQL string collations.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
+        >>> spark.tvf.collations().show()
+        +-------+-------+-------------+...
+        |CATALOG| SCHEMA|         NAME|...
+        +-------+-------+-------------+...
+        ...
+        +-------+-------+-------------+...
+        """
+        return self._fn("collations")
+
+    def sql_keywords(self) -> DataFrame:
+        """
+        Get Spark SQL keywords.
+
+        .. versionadded:: 4.0.0
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
+        >>> spark.tvf.sql_keywords().show()
+        +-------------+--------+
+        |      keyword|reserved|
+        +-------------+--------+
+        ...
+        +-------------+--------+...
+        """
+        return self._fn("sql_keywords")
+
+    def variant_explode(self, input: Column) -> DataFrame:
+        """
+        Separates a variant object/array into multiple rows containing its fields/elements.
+
+        Its result schema is `struct<pos int, key string, value variant>`. `pos` is the position of
+        the field/element in its parent object/array, and `value` is the field/element value.
+        `key` is the field name when exploding a variant object, or is NULL when exploding a variant
+        array. It ignores any input that is not a variant array/object, including SQL NULL, variant
+        null, and any other variant values.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        input : :class:`~pyspark.sql.Column`
+            input column of values to explode.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
+        Example 1: Using variant_explode with a variant array
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.variant_explode(sf.parse_json(sf.lit('["hello", "world"]'))).show()
+        +---+----+-------+
+        |pos| key|  value|
+        +---+----+-------+
+        |  0|NULL|"hello"|
+        |  1|NULL|"world"|
+        +---+----+-------+
+
+        Example 2: Using variant_explode with a variant object
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.variant_explode(sf.parse_json(sf.lit('{"a": true, "b": 3.14}'))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        |  0|  a| true|
+        |  1|  b| 3.14|
+        +---+---+-----+
+
+        Example 3: Using variant_explode with an empty variant array
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.variant_explode(sf.parse_json(sf.lit('[]'))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        +---+---+-----+
+
+        Example 4: Using variant_explode with an empty variant object
+
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.variant_explode(sf.parse_json(sf.lit('{}'))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        +---+---+-----+
+        """
+        return self._fn("variant_explode", input)
+
+    def variant_explode_outer(self, input: Column) -> DataFrame:
+        """
+        Separates a variant object/array into multiple rows containing its fields/elements.
+
+        Its result schema is `struct<pos int, key string, value variant>`. `pos` is the position of
+        the field/element in its parent object/array, and `value` is the field/element value.
+        `key` is the field name when exploding a variant object, or is NULL when exploding a variant
+        array. Unlike variant_explode, if the given variant is not a variant array/object, including
+        SQL NULL, variant null, and any other variant values, then NULL is produced.
+
+        .. versionadded:: 4.0.0
+
+        Parameters
+        ----------
+        input : :class:`~pyspark.sql.Column`
+            input column of values to explode.
+
+        Returns
+        -------
+        :class:`DataFrame`
+
+        Examples
+        --------
+        >>> import pyspark.sql.functions as sf
+        >>> spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('["hello", "world"]'))).show()
+        +---+----+-------+
+        |pos| key|  value|
+        +---+----+-------+
+        |  0|NULL|"hello"|
+        |  1|NULL|"world"|
+        +---+----+-------+
+        >>> spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('[]'))).show()
+        +----+----+-----+
+        | pos| key|value|
+        +----+----+-----+
+        |NULL|NULL| NULL|
+        +----+----+-----+
+        >>> spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('{"a": true, "b": 3.14}'))).show()
+        +---+---+-----+
+        |pos|key|value|
+        +---+---+-----+
+        |  0|  a| true|
+        |  1|  b| 3.14|
+        +---+---+-----+
+        >>> spark.tvf.variant_explode_outer(sf.parse_json(sf.lit('{}'))).show()
+        +----+----+-----+
+        | pos| key|value|
+        +----+----+-----+
+        |NULL|NULL| NULL|
+        +----+----+-----+
+        """
+        return self._fn("variant_explode_outer", input)
+
+    def _fn(self, functionName: str, *args: Column) -> DataFrame:
+        from pyspark.sql.classic.column import _to_java_column
+
+        return DataFrame(
+            getattr(self._sparkSession._jsparkSession.tvf(), functionName)(
+                *(_to_java_column(arg) for arg in args)
+            ),
+            self._sparkSession,
+        )
+
+
+def _test() -> None:
+    import os
+    import doctest
+    import sys
+    import pyspark.sql.tvf
+
+    os.chdir(os.environ["SPARK_HOME"])
+
+    globs = pyspark.sql.tvf.__dict__.copy()
+    globs["spark"] = SparkSession.builder.master("local[4]").appName("sql.tvf tests").getOrCreate()
+    (failure_count, test_count) = doctest.testmod(
+        pyspark.sql.tvf,
+        globs=globs,
+        optionflags=doctest.ELLIPSIS | doctest.NORMALIZE_WHITESPACE,
+    )
+    globs["spark"].stop()
+    if failure_count:
+        sys.exit(-1)
+
+
+if __name__ == "__main__":
+    _test()
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 39763fe8d0a6b..03227c8c8760f 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -298,7 +298,7 @@ def simpleString(self) -> str:
         if self.isUTF8BinaryCollation():
             return "string"
 
-        return f"string collate ${self.collation}"
+        return f"string collate {self.collation}"
 
     # For backwards compatibility and compatibility with other readers all string types
     # are serialized in json as regular strings and the collation info is written to
@@ -543,8 +543,8 @@ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = N
         fields = DayTimeIntervalType._fields
         if startField not in fields.keys() or endField not in fields.keys():
             raise PySparkRuntimeError(
-                error_class="INVALID_INTERVAL_CASTING",
-                message_parameters={"start_field": str(startField), "end_field": str(endField)},
+                errorClass="INVALID_INTERVAL_CASTING",
+                messageParameters={"start_field": str(startField), "end_field": str(endField)},
             )
         self.startField = startField
         self.endField = endField
@@ -606,8 +606,8 @@ def __init__(self, startField: Optional[int] = None, endField: Optional[int] = N
         fields = YearMonthIntervalType._fields
         if startField not in fields.keys() or endField not in fields.keys():
             raise PySparkRuntimeError(
-                error_class="INVALID_INTERVAL_CASTING",
-                message_parameters={"start_field": str(startField), "end_field": str(endField)},
+                errorClass="INVALID_INTERVAL_CASTING",
+                messageParameters={"start_field": str(startField), "end_field": str(endField)},
             )
         self.startField = startField
         self.endField = endField
@@ -633,14 +633,14 @@ def needConversion(self) -> bool:
 
     def toInternal(self, obj: Any) -> Any:
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "YearMonthIntervalType.toInternal"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "YearMonthIntervalType.toInternal"},
         )
 
     def fromInternal(self, obj: Any) -> Any:
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "YearMonthIntervalType.fromInternal"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "YearMonthIntervalType.fromInternal"},
         )
 
     def __repr__(self) -> str:
@@ -665,14 +665,14 @@ def needConversion(self) -> bool:
 
     def toInternal(self, obj: Any) -> Any:
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "CalendarIntervalType.toInternal"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "CalendarIntervalType.toInternal"},
         )
 
     def fromInternal(self, obj: Any) -> Any:
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "CalendarIntervalType.fromInternal"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "CalendarIntervalType.fromInternal"},
         )
 
 
@@ -762,11 +762,13 @@ def jsonValue(self) -> Dict[str, Any]:
     def fromJson(
         cls,
         json: Dict[str, Any],
-        fieldPath: str,
-        collationsMap: Optional[Dict[str, str]],
+        fieldPath: str = "",
+        collationsMap: Optional[Dict[str, str]] = None,
     ) -> "ArrayType":
         elementType = _parse_datatype_json_value(
-            json["elementType"], fieldPath + ".element", collationsMap
+            json["elementType"],
+            "element" if fieldPath == "" else fieldPath + ".element",
+            collationsMap,
         )
         return ArrayType(elementType, json["containsNull"])
 
@@ -902,12 +904,14 @@ def jsonValue(self) -> Dict[str, Any]:
     def fromJson(
         cls,
         json: Dict[str, Any],
-        fieldPath: str,
-        collationsMap: Optional[Dict[str, str]],
+        fieldPath: str = "",
+        collationsMap: Optional[Dict[str, str]] = None,
     ) -> "MapType":
-        keyType = _parse_datatype_json_value(json["keyType"], fieldPath + ".key", collationsMap)
+        keyType = _parse_datatype_json_value(
+            json["keyType"], "key" if fieldPath == "" else fieldPath + ".key", collationsMap
+        )
         valueType = _parse_datatype_json_value(
-            json["valueType"], fieldPath + ".value", collationsMap
+            json["valueType"], "value" if fieldPath == "" else fieldPath + ".value", collationsMap
         )
         return MapType(
             keyType,
@@ -1095,8 +1099,8 @@ def fromInternal(self, obj: T) -> T:
 
     def typeName(self) -> str:  # type: ignore[override]
         raise PySparkTypeError(
-            error_class="INVALID_TYPENAME_CALL",
-            message_parameters={},
+            errorClass="INVALID_TYPENAME_CALL",
+            messageParameters={},
         )
 
     def _build_formatted_string(
@@ -1256,8 +1260,8 @@ def add(
         else:
             if isinstance(field, str) and data_type is None:
                 raise PySparkValueError(
-                    error_class="ARGUMENT_REQUIRED",
-                    message_parameters={
+                    errorClass="ARGUMENT_REQUIRED",
+                    messageParameters={
                         "arg_name": "data_type",
                         "condition": "passing name of struct_field to create",
                     },
@@ -1288,23 +1292,21 @@ def __getitem__(self, key: Union[str, int]) -> StructField:
             for field in self:
                 if field.name == key:
                     return field
-            raise PySparkKeyError(
-                error_class="KEY_NOT_EXISTS", message_parameters={"key": str(key)}
-            )
+            raise PySparkKeyError(errorClass="KEY_NOT_EXISTS", messageParameters={"key": str(key)})
         elif isinstance(key, int):
             try:
                 return self.fields[key]
             except IndexError:
                 raise PySparkIndexError(
-                    error_class="INDEX_OUT_OF_RANGE",
-                    message_parameters={"arg_name": "StructType", "index": str(key)},
+                    errorClass="INDEX_OUT_OF_RANGE",
+                    messageParameters={"arg_name": "StructType", "index": str(key)},
                 )
         elif isinstance(key, slice):
             return StructType(self.fields[key])
         else:
             raise PySparkTypeError(
-                error_class="NOT_INT_OR_SLICE_OR_STR",
-                message_parameters={"arg_name": "key", "arg_type": type(key).__name__},
+                errorClass="NOT_INT_OR_SLICE_OR_STR",
+                messageParameters={"arg_name": "key", "arg_type": type(key).__name__},
             )
 
     def simpleString(self) -> str:
@@ -1500,8 +1502,8 @@ def toInternal(self, obj: Tuple) -> Tuple:
                 )
             else:
                 raise PySparkValueError(
-                    error_class="UNEXPECTED_TUPLE_WITH_STRUCT",
-                    message_parameters={"tuple": str(obj)},
+                    errorClass="UNEXPECTED_TUPLE_WITH_STRUCT",
+                    messageParameters={"tuple": str(obj)},
                 )
         else:
             if isinstance(obj, dict):
@@ -1513,8 +1515,8 @@ def toInternal(self, obj: Tuple) -> Tuple:
                 return tuple(d.get(n) for n in self.names)
             else:
                 raise PySparkValueError(
-                    error_class="UNEXPECTED_TUPLE_WITH_STRUCT",
-                    message_parameters={"tuple": str(obj)},
+                    errorClass="UNEXPECTED_TUPLE_WITH_STRUCT",
+                    messageParameters={"tuple": str(obj)},
                 )
 
     def fromInternal(self, obj: Tuple) -> "Row":
@@ -1569,11 +1571,9 @@ def fromInternal(self, obj: Dict) -> Optional["VariantVal"]:
             return None
         return VariantVal(obj["value"], obj["metadata"])
 
-    def toInternal(self, obj: Any) -> Any:
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "VariantType.toInternal"},
-        )
+    def toInternal(self, variant: Any) -> Any:
+        assert isinstance(variant, VariantVal)
+        return {"value": variant.value, "metadata": variant.metadata}
 
 
 class UserDefinedType(DataType):
@@ -1592,8 +1592,8 @@ def sqlType(cls) -> DataType:
         Underlying SQL storage type for this UDT.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "sqlType()"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "sqlType()"},
         )
 
     @classmethod
@@ -1602,8 +1602,8 @@ def module(cls) -> str:
         The Python module of the UDT.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "module()"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "module()"},
         )
 
     @classmethod
@@ -1640,8 +1640,8 @@ def serialize(self, obj: Any) -> Any:
         Converts a user-type object into a SQL datum.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "toInternal()"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "toInternal()"},
         )
 
     def deserialize(self, datum: Any) -> Any:
@@ -1649,8 +1649,8 @@ def deserialize(self, datum: Any) -> Any:
         Converts a SQL datum into a user-type object.
         """
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": "fromInternal()"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": "fromInternal()"},
         )
 
     def simpleString(self) -> str:
@@ -2021,8 +2021,8 @@ def _parse_datatype_json_value(
             return VarcharType(int(m.group(1)))  # type: ignore[union-attr]
         else:
             raise PySparkValueError(
-                error_class="CANNOT_PARSE_DATATYPE",
-                message_parameters={"error": str(json_value)},
+                errorClass="CANNOT_PARSE_DATATYPE",
+                messageParameters={"error": str(json_value)},
             )
     else:
         tpe = json_value["type"]
@@ -2040,8 +2040,8 @@ def _parse_datatype_json_value(
             return UserDefinedType.fromJson(json_value)
         else:
             raise PySparkValueError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": str(tpe)},
+                errorClass="UNSUPPORTED_DATA_TYPE",
+                messageParameters={"data_type": str(tpe)},
             )
 
 
@@ -2050,16 +2050,16 @@ def _assert_valid_type_for_collation(
 ) -> None:
     if fieldPath in collationMap and fieldType != "string":
         raise PySparkTypeError(
-            error_class="INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
-            message_parameters={"jsonType": fieldType},
+            errorClass="INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+            messageParameters={"jsonType": fieldType},
         )
 
 
 def _assert_valid_collation_provider(provider: str) -> None:
     if provider.lower() not in StringType.providers:
         raise PySparkValueError(
-            error_class="COLLATION_INVALID_PROVIDER",
-            message_parameters={
+            errorClass="COLLATION_INVALID_PROVIDER",
+            messageParameters={
                 "provider": provider,
                 "supportedProviders": ", ".join(StringType.providers),
             },
@@ -2165,7 +2165,9 @@ def _from_numpy_type(nt: "np.dtype") -> Optional[DataType]:
     """Convert NumPy type to Spark data type."""
     import numpy as np
 
-    if nt == np.dtype("int8"):
+    if nt == np.dtype("bool"):
+        return BooleanType()
+    elif nt == np.dtype("int8"):
         return ByteType()
     elif nt == np.dtype("int16"):
         return ShortType()
@@ -2177,6 +2179,8 @@ def _from_numpy_type(nt: "np.dtype") -> Optional[DataType]:
         return FloatType()
     elif nt == np.dtype("float64"):
         return DoubleType()
+    elif nt.type == np.dtype("str"):
+        return StringType()
 
     return None
 
@@ -2310,8 +2314,8 @@ def _infer_type(
             return ArrayType(_array_type_mappings[obj.typecode](), False)
         else:
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": f"array({obj.typecode})"},
+                errorClass="UNSUPPORTED_DATA_TYPE",
+                messageParameters={"data_type": f"array({obj.typecode})"},
             )
     else:
         try:
@@ -2323,8 +2327,8 @@ def _infer_type(
             )
         except TypeError:
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(obj).__name__},
+                errorClass="UNSUPPORTED_DATA_TYPE",
+                messageParameters={"data_type": type(obj).__name__},
             )
 
 
@@ -2358,8 +2362,8 @@ def _infer_schema(
 
     else:
         raise PySparkTypeError(
-            error_class="CANNOT_INFER_SCHEMA_FOR_TYPE",
-            message_parameters={"data_type": type(row).__name__},
+            errorClass="CANNOT_INFER_SCHEMA_FOR_TYPE",
+            messageParameters={"data_type": type(row).__name__},
         )
 
     fields = []
@@ -2380,8 +2384,8 @@ def _infer_schema(
             )
         except TypeError:
             raise PySparkTypeError(
-                error_class="CANNOT_INFER_TYPE_FOR_FIELD",
-                message_parameters={"field_name": k},
+                errorClass="CANNOT_INFER_TYPE_FOR_FIELD",
+                messageParameters={"field_name": k},
             )
     return StructType(fields)
 
@@ -2468,8 +2472,8 @@ def new_name(n: str) -> str:
     elif type(a) is not type(b):
         # TODO: type cast (such as int -> long)
         raise PySparkTypeError(
-            error_class="CANNOT_MERGE_TYPE",
-            message_parameters={"data_type1": type(a).__name__, "data_type2": type(b).__name__},
+            errorClass="CANNOT_MERGE_TYPE",
+            messageParameters={"data_type1": type(a).__name__, "data_type2": type(b).__name__},
         )
 
     # same type
@@ -2559,8 +2563,8 @@ def convert_struct(obj: Any) -> Optional[Tuple]:
             d = obj.__dict__
         else:
             raise PySparkTypeError(
-                error_class="UNSUPPORTED_DATA_TYPE",
-                message_parameters={"data_type": type(obj).__name__},
+                errorClass="UNSUPPORTED_DATA_TYPE",
+                messageParameters={"data_type": type(obj).__name__},
             )
 
         if convert_fields:
@@ -2695,14 +2699,14 @@ def verify_nullability(obj: Any) -> bool:
             else:
                 if name is not None:
                     raise PySparkValueError(
-                        error_class="FIELD_NOT_NULLABLE_WITH_NAME",
-                        message_parameters={
+                        errorClass="FIELD_NOT_NULLABLE_WITH_NAME",
+                        messageParameters={
                             "field_name": str(name),
                         },
                     )
                 raise PySparkValueError(
-                    error_class="FIELD_NOT_NULLABLE",
-                    message_parameters={},
+                    errorClass="FIELD_NOT_NULLABLE",
+                    messageParameters={},
                 )
         else:
             return False
@@ -2719,8 +2723,8 @@ def verify_acceptable_types(obj: Any) -> None:
         if type(obj) not in _acceptable_types[_type]:
             if name is not None:
                 raise PySparkTypeError(
-                    error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
-                    message_parameters={
+                    errorClass="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
+                    messageParameters={
                         "field_name": str(name),
                         "data_type": str(dataType),
                         "obj": repr(obj),
@@ -2728,8 +2732,8 @@ def verify_acceptable_types(obj: Any) -> None:
                     },
                 )
             raise PySparkTypeError(
-                error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
-                message_parameters={
+                errorClass="FIELD_DATA_TYPE_UNACCEPTABLE",
+                messageParameters={
                     "data_type": str(dataType),
                     "obj": repr(obj),
                     "obj_type": str(type(obj)),
@@ -2748,16 +2752,16 @@ def verify_udf(obj: Any) -> None:
             if not (hasattr(obj, "__UDT__") and obj.__UDT__ == dataType):
                 if name is not None:
                     raise PySparkValueError(
-                        error_class="FIELD_TYPE_MISMATCH_WITH_NAME",
-                        message_parameters={
+                        errorClass="FIELD_TYPE_MISMATCH_WITH_NAME",
+                        messageParameters={
                             "field_name": str(name),
                             "obj": str(obj),
                             "data_type": str(dataType),
                         },
                     )
                 raise PySparkValueError(
-                    error_class="FIELD_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="FIELD_TYPE_MISMATCH",
+                    messageParameters={
                         "obj": str(obj),
                         "data_type": str(dataType),
                     },
@@ -2775,8 +2779,8 @@ def verify_byte(obj: Any) -> None:
             upper_bound = 127
             if obj < lower_bound or obj > upper_bound:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUNDS",
-                    message_parameters={
+                    errorClass="VALUE_OUT_OF_BOUNDS",
+                    messageParameters={
                         "arg_name": "obj",
                         "lower_bound": str(lower_bound),
                         "upper_bound": str(upper_bound),
@@ -2795,8 +2799,8 @@ def verify_short(obj: Any) -> None:
             upper_bound = 32767
             if obj < lower_bound or obj > upper_bound:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUNDS",
-                    message_parameters={
+                    errorClass="VALUE_OUT_OF_BOUNDS",
+                    messageParameters={
                         "arg_name": "obj",
                         "lower_bound": str(lower_bound),
                         "upper_bound": str(upper_bound),
@@ -2815,8 +2819,8 @@ def verify_integer(obj: Any) -> None:
             upper_bound = 2147483647
             if obj < lower_bound or obj > upper_bound:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUNDS",
-                    message_parameters={
+                    errorClass="VALUE_OUT_OF_BOUNDS",
+                    messageParameters={
                         "arg_name": "obj",
                         "lower_bound": str(lower_bound),
                         "upper_bound": str(upper_bound),
@@ -2835,8 +2839,8 @@ def verify_long(obj: Any) -> None:
             upper_bound = 9223372036854775807
             if obj < lower_bound or obj > upper_bound:
                 raise PySparkValueError(
-                    error_class="VALUE_OUT_OF_BOUNDS",
-                    message_parameters={
+                    errorClass="VALUE_OUT_OF_BOUNDS",
+                    messageParameters={
                         "arg_name": "obj",
                         "lower_bound": str(lower_bound),
                         "upper_bound": str(upper_bound),
@@ -2890,16 +2894,16 @@ def verify_struct(obj: Any) -> None:
                 if len(obj) != len(verifiers):
                     if name is not None:
                         raise PySparkValueError(
-                            error_class="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME",
-                            message_parameters={
+                            errorClass="FIELD_STRUCT_LENGTH_MISMATCH_WITH_NAME",
+                            messageParameters={
                                 "field_name": str(name),
                                 "object_length": str(len(obj)),
                                 "field_length": str(len(verifiers)),
                             },
                         )
                     raise PySparkValueError(
-                        error_class="FIELD_STRUCT_LENGTH_MISMATCH",
-                        message_parameters={
+                        errorClass="FIELD_STRUCT_LENGTH_MISMATCH",
+                        messageParameters={
                             "object_length": str(len(obj)),
                             "field_length": str(len(verifiers)),
                         },
@@ -2913,8 +2917,8 @@ def verify_struct(obj: Any) -> None:
             else:
                 if name is not None:
                     raise PySparkTypeError(
-                        error_class="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
-                        message_parameters={
+                        errorClass="FIELD_DATA_TYPE_UNACCEPTABLE_WITH_NAME",
+                        messageParameters={
                             "field_name": str(name),
                             "data_type": str(dataType),
                             "obj": repr(obj),
@@ -2922,8 +2926,8 @@ def verify_struct(obj: Any) -> None:
                         },
                     )
                 raise PySparkTypeError(
-                    error_class="FIELD_DATA_TYPE_UNACCEPTABLE",
-                    message_parameters={
+                    errorClass="FIELD_DATA_TYPE_UNACCEPTABLE",
+                    messageParameters={
                         "data_type": str(dataType),
                         "obj": repr(obj),
                         "obj_type": str(type(obj)),
@@ -3036,8 +3040,8 @@ def __new__(cls, **kwargs: Any) -> "Row":
     def __new__(cls, *args: Optional[str], **kwargs: Optional[Any]) -> "Row":
         if args and kwargs:
             raise PySparkValueError(
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={"arg_list": "args and kwargs"},
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={"arg_list": "args and kwargs"},
             )
         if kwargs:
             # create row objects
@@ -3078,8 +3082,8 @@ def asDict(self, recursive: bool = False) -> Dict[str, Any]:
         """
         if not hasattr(self, "__fields__"):
             raise PySparkTypeError(
-                error_class="CANNOT_CONVERT_TYPE",
-                message_parameters={
+                errorClass="CANNOT_CONVERT_TYPE",
+                messageParameters={
                     "from_type": "Row",
                     "to_type": "dict",
                 },
@@ -3112,8 +3116,8 @@ def __call__(self, *args: Any) -> "Row":
         """create new Row object"""
         if len(args) > len(self):
             raise PySparkValueError(
-                error_class="TOO_MANY_VALUES",
-                message_parameters={
+                errorClass="TOO_MANY_VALUES",
+                messageParameters={
                     "expected": str(len(self)),
                     "item": "fields",
                     "actual": str(len(args)),
@@ -3130,16 +3134,14 @@ def __getitem__(self, item: Any) -> Any:
             idx = self.__fields__.index(item)
             return super(Row, self).__getitem__(idx)
         except IndexError:
-            raise PySparkKeyError(
-                error_class="KEY_NOT_EXISTS", message_parameters={"key": str(item)}
-            )
+            raise PySparkKeyError(errorClass="KEY_NOT_EXISTS", messageParameters={"key": str(item)})
         except ValueError:
             raise PySparkValueError(item)
 
     def __getattr__(self, item: str) -> Any:
         if item.startswith("__"):
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": item}
             )
         try:
             # it will be slow when it has many fields,
@@ -3148,18 +3150,18 @@ def __getattr__(self, item: str) -> Any:
             return self[idx]
         except IndexError:
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": item}
             )
         except ValueError:
             raise PySparkAttributeError(
-                error_class="ATTRIBUTE_NOT_SUPPORTED", message_parameters={"attr_name": item}
+                errorClass="ATTRIBUTE_NOT_SUPPORTED", messageParameters={"attr_name": item}
             )
 
     def __setattr__(self, key: Any, value: Any) -> None:
         if key != "__fields__":
             raise PySparkRuntimeError(
-                error_class="READ_ONLY",
-                message_parameters={"object": "Row"},
+                errorClass="READ_ONLY",
+                messageParameters={"object": "Row"},
             )
         self.__dict__[key] = value
 
@@ -3273,6 +3275,8 @@ def _from_numpy_type_to_java_type(
             return gateway.jvm.double
         elif nt == np.dtype("bool"):
             return gateway.jvm.boolean
+        elif nt.type == np.dtype("str"):
+            return gateway.jvm.String
 
         return None
 
@@ -3286,15 +3290,12 @@ def convert(self, obj: "np.ndarray", gateway_client: "GatewayClient") -> "JavaGa
         assert gateway is not None
         plist = obj.tolist()
 
-        if len(obj) > 0 and isinstance(plist[0], str):
-            jtpe = gateway.jvm.String
-        else:
-            jtpe = self._from_numpy_type_to_java_type(obj.dtype, gateway)
-            if jtpe is None:
-                raise PySparkTypeError(
-                    error_class="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
-                    message_parameters={"dtype": str(obj.dtype)},
-                )
+        jtpe = self._from_numpy_type_to_java_type(obj.dtype, gateway)
+        if jtpe is None:
+            raise PySparkTypeError(
+                errorClass="UNSUPPORTED_NUMPY_ARRAY_SCALAR",
+                messageParameters={"dtype": str(obj.dtype)},
+            )
         jarr = gateway.new_array(jtpe, len(obj))
         for i in range(len(plist)):
             jarr[i] = plist[i]
diff --git a/python/pyspark/sql/udf.py b/python/pyspark/sql/udf.py
index 3d19a2b5458bd..9cf93938528f8 100644
--- a/python/pyspark/sql/udf.py
+++ b/python/pyspark/sql/udf.py
@@ -175,14 +175,14 @@ def __init__(
     ):
         if not callable(func):
             raise PySparkTypeError(
-                error_class="NOT_CALLABLE",
-                message_parameters={"arg_name": "func", "arg_type": type(func).__name__},
+                errorClass="NOT_CALLABLE",
+                messageParameters={"arg_name": "func", "arg_type": type(func).__name__},
             )
 
         if not isinstance(returnType, (DataType, str)):
             raise PySparkTypeError(
-                error_class="NOT_DATATYPE_OR_STR",
-                message_parameters={
+                errorClass="NOT_DATATYPE_OR_STR",
+                messageParameters={
                     "arg_name": "returnType",
                     "arg_type": type(returnType).__name__,
                 },
@@ -190,8 +190,8 @@ def __init__(
 
         if not isinstance(evalType, int):
             raise PySparkTypeError(
-                error_class="NOT_INT",
-                message_parameters={"arg_name": "evalType", "arg_type": type(evalType).__name__},
+                errorClass="NOT_INT",
+                messageParameters={"arg_name": "evalType", "arg_type": type(evalType).__name__},
             )
 
         self.func = func
@@ -212,8 +212,8 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                 to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
-                    error_class="NOT_IMPLEMENTED",
-                    message_parameters={
+                    errorClass="NOT_IMPLEMENTED",
+                    messageParameters={
                         "feature": f"Invalid return type with Arrow-optimized Python UDF: "
                         f"{returnType}"
                     },
@@ -226,8 +226,8 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                 to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
-                    error_class="NOT_IMPLEMENTED",
-                    message_parameters={
+                    errorClass="NOT_IMPLEMENTED",
+                    messageParameters={
                         "feature": f"Invalid return type with scalar Pandas UDFs: " f"{returnType}"
                     },
                 )
@@ -240,16 +240,16 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                     to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": f"Invalid return type with grouped map Pandas UDFs or "
                             f"at groupby.applyInPandas(WithState): {returnType}"
                         },
                     )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
+                    messageParameters={
                         "eval_type": "SQL_GROUPED_MAP_PANDAS_UDF or "
                         "SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE",
                         "return_type": str(returnType),
@@ -264,15 +264,15 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                     to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": f"Invalid return type in mapInPandas: " f"{returnType}"
                         },
                     )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
+                    messageParameters={
                         "eval_type": "SQL_MAP_PANDAS_ITER_UDF or SQL_MAP_ARROW_ITER_UDF",
                         "return_type": str(returnType),
                     },
@@ -283,16 +283,16 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                     to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": "Invalid return type with grouped map Arrow UDFs or "
                             f"at groupby.applyInArrow: {returnType}"
                         },
                     )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
-                    message_parameters={
+                    errorClass="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
+                    messageParameters={
                         "eval_type": "SQL_GROUPED_MAP_ARROW_UDF",
                         "return_type": str(returnType),
                     },
@@ -303,16 +303,16 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                     to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": f"Invalid return type in cogroup.applyInPandas: "
                             f"{returnType}"
                         },
                     )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="INVALID_RETURN_TYPE_FOR_PANDAS_UDF",
+                    messageParameters={
                         "eval_type": "SQL_COGROUPED_MAP_PANDAS_UDF",
                         "return_type": str(returnType),
                     },
@@ -323,16 +323,16 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                     to_arrow_type(returnType)
                 except TypeError:
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": "Invalid return type in cogroup.applyInArrow: "
                             f"{returnType}"
                         },
                     )
             else:
                 raise PySparkTypeError(
-                    error_class="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
-                    message_parameters={
+                    errorClass="INVALID_RETURN_TYPE_FOR_ARROW_UDF",
+                    messageParameters={
                         "eval_type": "SQL_COGROUPED_MAP_ARROW_UDF",
                         "return_type": str(returnType),
                     },
@@ -342,8 +342,8 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                 # StructType is not yet allowed as a return type, explicitly check here to fail fast
                 if isinstance(returnType, StructType):
                     raise PySparkNotImplementedError(
-                        error_class="NOT_IMPLEMENTED",
-                        message_parameters={
+                        errorClass="NOT_IMPLEMENTED",
+                        messageParameters={
                             "feature": f"Invalid return type with grouped aggregate Pandas UDFs: "
                             f"{returnType}"
                         },
@@ -351,8 +351,8 @@ def _check_return_type(returnType: DataType, evalType: int) -> None:
                 to_arrow_type(returnType)
             except TypeError:
                 raise PySparkNotImplementedError(
-                    error_class="NOT_IMPLEMENTED",
-                    message_parameters={
+                    errorClass="NOT_IMPLEMENTED",
+                    messageParameters={
                         "feature": f"Invalid return type with grouped aggregate Pandas UDFs: "
                         f"{returnType}"
                     },
@@ -397,15 +397,13 @@ def _create_judf(self, func: Callable[..., Any]) -> "JavaObject":
         return judf
 
     def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> Column:
-        from pyspark.sql.classic.column import _to_java_expr, _to_seq
+        from pyspark.sql.classic.column import _to_java_column, _to_seq
 
         sc = get_active_spark_context()
 
         assert sc._jvm is not None
-        jexprs = [_to_java_expr(arg) for arg in args] + [
-            sc._jvm.org.apache.spark.sql.catalyst.expressions.NamedArgumentExpression(
-                key, _to_java_expr(value)
-            )
+        jcols = [_to_java_column(arg) for arg in args] + [
+            sc._jvm.PythonSQLUtils.namedArgumentExpression(key, _to_java_column(value))
             for key, value in kwargs.items()
         ]
 
@@ -424,17 +422,15 @@ def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> Column:
                     UserWarning,
                 )
                 judf = self._judf
-                jUDFExpr = judf.builder(_to_seq(sc, jexprs))
-                jPythonUDF = judf.fromUDFExpr(jUDFExpr)
-                return Column(jPythonUDF)
+                return Column(judf.apply(_to_seq(sc, jcols)))
 
             # Disallow enabling two profilers at the same time.
             if profiler_enabled and memory_profiler_enabled:
                 # When both profilers are enabled, they interfere with each other,
                 # that makes the result profile misleading.
                 raise PySparkRuntimeError(
-                    error_class="CANNOT_SET_TOGETHER",
-                    message_parameters={
+                    errorClass="CANNOT_SET_TOGETHER",
+                    messageParameters={
                         "arg_list": "'spark.python.profile' and "
                         "'spark.python.profile.memory' configuration"
                     },
@@ -450,7 +446,7 @@ def func(*args: Any, **kwargs: Any) -> Any:
 
                 func.__signature__ = inspect.signature(f)  # type: ignore[attr-defined]
                 judf = self._create_judf(func)
-                jUDFExpr = judf.builder(_to_seq(sc, jexprs))
+                jUDFExpr = judf.builderWithColumns(_to_seq(sc, jcols))
                 jPythonUDF = judf.fromUDFExpr(jUDFExpr)
                 id = jUDFExpr.resultId().id()
                 sc.profiler_collector.add_profiler(id, profiler)
@@ -468,14 +464,13 @@ def func(*args: Any, **kwargs: Any) -> Any:
 
                 func.__signature__ = inspect.signature(f)  # type: ignore[attr-defined]
                 judf = self._create_judf(func)
-                jUDFExpr = judf.builder(_to_seq(sc, jexprs))
+                jUDFExpr = judf.builderWithColumns(_to_seq(sc, jcols))
                 jPythonUDF = judf.fromUDFExpr(jUDFExpr)
                 id = jUDFExpr.resultId().id()
                 sc.profiler_collector.add_profiler(id, memory_profiler)
         else:
             judf = self._judf
-            jUDFExpr = judf.builder(_to_seq(sc, jexprs))
-            jPythonUDF = judf.fromUDFExpr(jUDFExpr)
+            jPythonUDF = judf.apply(_to_seq(sc, jcols))
         return Column(jPythonUDF)
 
     # This function is for improving the online help system in the interactive interpreter.
@@ -651,8 +646,8 @@ def register(
         if hasattr(f, "asNondeterministic"):
             if returnType is not None:
                 raise PySparkTypeError(
-                    error_class="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
-                    message_parameters={"arg_name": "f", "return_type": str(returnType)},
+                    errorClass="CANNOT_SPECIFY_RETURN_TYPE_FOR_UDF",
+                    messageParameters={"arg_name": "f", "return_type": str(returnType)},
                 )
             f = cast("UserDefinedFunctionLike", f)
             if f.evalType not in [
@@ -663,8 +658,8 @@ def register(
                 PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
             ]:
                 raise PySparkTypeError(
-                    error_class="INVALID_UDF_EVAL_TYPE",
-                    message_parameters={
+                    errorClass="INVALID_UDF_EVAL_TYPE",
+                    messageParameters={
                         "eval_type": "SQL_BATCHED_UDF, SQL_ARROW_BATCHED_UDF, "
                         "SQL_SCALAR_PANDAS_UDF, SQL_SCALAR_PANDAS_ITER_UDF or "
                         "SQL_GROUPED_AGG_PANDAS_UDF"
diff --git a/python/pyspark/sql/udtf.py b/python/pyspark/sql/udtf.py
index 83ef1d488d960..5ce3e2dfd2a9e 100644
--- a/python/pyspark/sql/udtf.py
+++ b/python/pyspark/sql/udtf.py
@@ -247,12 +247,12 @@ def _validate_udtf_handler(cls: Any, returnType: Optional[Union[StructType, str]
 
     if not isinstance(cls, type):
         raise PySparkTypeError(
-            error_class="INVALID_UDTF_HANDLER_TYPE", message_parameters={"type": type(cls).__name__}
+            errorClass="INVALID_UDTF_HANDLER_TYPE", messageParameters={"type": type(cls).__name__}
         )
 
     if not hasattr(cls, "eval"):
         raise PySparkAttributeError(
-            error_class="INVALID_UDTF_NO_EVAL", message_parameters={"name": cls.__name__}
+            errorClass="INVALID_UDTF_NO_EVAL", messageParameters={"name": cls.__name__}
         )
 
     has_analyze = hasattr(cls, "analyze")
@@ -261,12 +261,12 @@ def _validate_udtf_handler(cls: Any, returnType: Optional[Union[StructType, str]
     )
     if returnType is None and not has_analyze_staticmethod:
         raise PySparkAttributeError(
-            error_class="INVALID_UDTF_RETURN_TYPE", message_parameters={"name": cls.__name__}
+            errorClass="INVALID_UDTF_RETURN_TYPE", messageParameters={"name": cls.__name__}
         )
     if returnType is not None and has_analyze:
         raise PySparkAttributeError(
-            error_class="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
-            message_parameters={"name": cls.__name__},
+            errorClass="INVALID_UDTF_BOTH_RETURN_TYPE_AND_ANALYZE",
+            messageParameters={"name": cls.__name__},
         )
 
 
@@ -316,8 +316,8 @@ def returnType(self) -> Optional[StructType]:
                 parsed = self._returnType
             if not isinstance(parsed, StructType):
                 raise PySparkTypeError(
-                    error_class="UDTF_RETURN_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="UDTF_RETURN_TYPE_MISMATCH",
+                    messageParameters={
                         "name": self._name,
                         "return_type": f"{parsed}",
                     },
@@ -342,8 +342,8 @@ def _create_judtf(self, func: Type) -> "JavaObject":
         except pickle.PicklingError as e:
             if "CONTEXT_ONLY_VALID_ON_DRIVER" in str(e):
                 raise PySparkPicklingError(
-                    error_class="UDTF_SERIALIZATION_ERROR",
-                    message_parameters={
+                    errorClass="UDTF_SERIALIZATION_ERROR",
+                    messageParameters={
                         "name": self._name,
                         "message": "it appears that you are attempting to reference SparkSession "
                         "inside a UDTF. SparkSession can only be used on the driver, "
@@ -352,8 +352,8 @@ def _create_judtf(self, func: Type) -> "JavaObject":
                     },
                 ) from None
             raise PySparkPicklingError(
-                error_class="UDTF_SERIALIZATION_ERROR",
-                message_parameters={
+                errorClass="UDTF_SERIALIZATION_ERROR",
+                messageParameters={
                     "name": self._name,
                     "message": "Please check the stack trace and make sure the "
                     "function is serializable.",
@@ -373,7 +373,7 @@ def _create_judtf(self, func: Type) -> "JavaObject":
         return judtf
 
     def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> "DataFrame":
-        from pyspark.sql.classic.column import _to_java_column, _to_java_expr, _to_seq
+        from pyspark.sql.classic.column import _to_java_column, _to_seq
 
         from pyspark.sql import DataFrame, SparkSession
 
@@ -382,11 +382,7 @@ def __call__(self, *args: "ColumnOrName", **kwargs: "ColumnOrName") -> "DataFram
 
         assert sc._jvm is not None
         jcols = [_to_java_column(arg) for arg in args] + [
-            sc._jvm.Column(
-                sc._jvm.org.apache.spark.sql.catalyst.expressions.NamedArgumentExpression(
-                    key, _to_java_expr(value)
-                )
-            )
+            sc._jvm.PythonSQLUtils.namedArgumentExpression(key, _to_java_column(value))
             for key, value in kwargs.items()
         ]
 
@@ -463,16 +459,16 @@ def register(
         """
         if not isinstance(f, UserDefinedTableFunction):
             raise PySparkTypeError(
-                error_class="CANNOT_REGISTER_UDTF",
-                message_parameters={
+                errorClass="CANNOT_REGISTER_UDTF",
+                messageParameters={
                     "name": name,
                 },
             )
 
         if f.evalType not in [PythonEvalType.SQL_TABLE_UDF, PythonEvalType.SQL_ARROW_TABLE_UDF]:
             raise PySparkTypeError(
-                error_class="INVALID_UDTF_EVAL_TYPE",
-                message_parameters={
+                errorClass="INVALID_UDTF_EVAL_TYPE",
+                messageParameters={
                     "name": name,
                     "eval_type": "SQL_TABLE_UDF, SQL_ARROW_TABLE_UDF",
                 },
diff --git a/python/pyspark/sql/utils.py b/python/pyspark/sql/utils.py
index e6094502c0849..3cacc5b9d021a 100644
--- a/python/pyspark/sql/utils.py
+++ b/python/pyspark/sql/utils.py
@@ -14,6 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from enum import Enum
 import inspect
 import functools
 import os
@@ -40,6 +41,7 @@
     PythonException,
     UnknownException,
     SparkUpgradeException,
+    PySparkImportError,
     PySparkNotImplementedError,
     PySparkRuntimeError,
 )
@@ -109,11 +111,27 @@ def require_test_compiled() -> None:
 
     if len(paths) == 0:
         raise PySparkRuntimeError(
-            error_class="TEST_CLASS_NOT_COMPILED",
-            message_parameters={"test_class_path": test_class_path},
+            errorClass="TEST_CLASS_NOT_COMPILED",
+            messageParameters={"test_class_path": test_class_path},
         )
 
 
+def require_minimum_plotly_version() -> None:
+    """Raise ImportError if plotly is not installed"""
+    minimum_plotly_version = "4.8"
+
+    try:
+        import plotly  # noqa: F401
+    except ImportError as error:
+        raise PySparkImportError(
+            errorClass="PACKAGE_NOT_INSTALLED",
+            messageParameters={
+                "package_name": "plotly",
+                "minimum_version": str(minimum_plotly_version),
+            },
+        ) from error
+
+
 class ForeachBatchFunction:
     """
     This is the Python implementation of Java interface 'ForeachBatchFunction'. This wraps
@@ -193,6 +211,11 @@ def to_str(value: Any) -> Optional[str]:
         return str(value)
 
 
+def enum_to_value(value: Any) -> Any:
+    """Convert an Enum to its value if it is not None."""
+    return enum_to_value(value.value) if value is not None and isinstance(value, Enum) else value
+
+
 def is_timestamp_ntz_preferred() -> bool:
     """
     Return a bool if TimestampNTZType is preferred according to the SQL configuration set.
@@ -306,8 +329,8 @@ def get_active_spark_context() -> "SparkContext":
     sc = SparkContext._active_spark_context
     if sc is None or sc._jvm is None:
         raise PySparkRuntimeError(
-            error_class="SESSION_OR_CONTEXT_NOT_EXISTS",
-            message_parameters={},
+            errorClass="SESSION_OR_CONTEXT_NOT_EXISTS",
+            messageParameters={},
         )
     return sc
 
@@ -330,7 +353,7 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
 
 def dispatch_df_method(f: FuncT) -> FuncT:
     """
-    For the usecases of direct DataFrame.union(df, ...), it checks if self
+    For the use cases of direct DataFrame.method(df, ...), it checks if self
     is a Connect DataFrame or Classic DataFrame, and dispatches.
     """
 
@@ -348,8 +371,8 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
                 return getattr(ClassicDataFrame, f.__name__)(*args, **kwargs)
 
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": f"DataFrame.{f.__name__}"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": f"DataFrame.{f.__name__}"},
         )
 
     return cast(FuncT, wrapped)
@@ -357,8 +380,8 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
 
 def dispatch_col_method(f: FuncT) -> FuncT:
     """
-    For the usecases of direct Column.method(col, ...), it checks if self
-    is a Connect DataFrame or Classic DataFrame, and dispatches.
+    For the use cases of direct Column.method(col, ...), it checks if self
+    is a Connect Column or Classic Column, and dispatches.
     """
 
     @functools.wraps(f)
@@ -375,8 +398,8 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
                 return getattr(ClassicColumn, f.__name__)(*args, **kwargs)
 
         raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": f"Column.{f.__name__}"},
+            errorClass="NOT_IMPLEMENTED",
+            messageParameters={"feature": f"Column.{f.__name__}"},
         )
 
     return cast(FuncT, wrapped)
@@ -384,8 +407,9 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
 
 def dispatch_window_method(f: FuncT) -> FuncT:
     """
-    For the usecases of direct Window.method(col, ...), it checks if self
-    is a Connect Window or Classic Window, and dispatches.
+    For use cases of direct Window.method(col, ...), this function dispatches
+    the call to either ConnectWindow or ClassicWindow based on the execution
+    environment.
     """
 
     @functools.wraps(f)
@@ -399,11 +423,6 @@ def wrapped(*args: Any, **kwargs: Any) -> Any:
 
             return getattr(ClassicWindow, f.__name__)(*args, **kwargs)
 
-        raise PySparkNotImplementedError(
-            error_class="NOT_IMPLEMENTED",
-            message_parameters={"feature": f"Window.{f.__name__}"},
-        )
-
     return cast(FuncT, wrapped)
 
 
@@ -430,3 +449,28 @@ def get_lit_sql_str(val: str) -> str:
     # See `sql` definition in `sql/catalyst/src/main/scala/org/apache/spark/
     # sql/catalyst/expressions/literals.scala`
     return "'" + val.replace("\\", "\\\\").replace("'", "\\'") + "'"
+
+
+class NumpyHelper:
+    @staticmethod
+    def linspace(start: float, stop: float, num: int) -> Sequence[float]:
+        if num == 1:
+            return [float(start)]
+        step = (float(stop) - float(start)) / (num - 1)
+        return [start + step * i for i in range(num)]
+
+
+def remote_only(func: Union[Callable, property]) -> Union[Callable, property]:
+    """
+    Decorator to mark a function or method as only available in Spark Connect.
+
+    This decorator allows for easy identification of Spark Connect-specific APIs.
+    """
+    if isinstance(func, property):
+        # If it's a property, we need to set the attribute on the getter function
+        getter_func = func.fget
+        getter_func._remote_only = True  # type: ignore[union-attr]
+        return property(getter_func)
+    else:
+        func._remote_only = True  # type: ignore[attr-defined]
+        return func
diff --git a/python/pyspark/sql/variant_utils.py b/python/pyspark/sql/variant_utils.py
index 95084fc7d932f..40cc69c1f0961 100644
--- a/python/pyspark/sql/variant_utils.py
+++ b/python/pyspark/sql/variant_utils.py
@@ -149,7 +149,7 @@ def _read_long(cls, data: bytes, pos: int, num_bytes: int, signed: bool) -> int:
     @classmethod
     def _check_index(cls, pos: int, length: int) -> None:
         if pos < 0 or pos >= length:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_type_info(cls, value: bytes, pos: int) -> Tuple[int, int]:
@@ -169,14 +169,14 @@ def _get_metadata_key(cls, metadata: bytes, id: int) -> str:
         offset_size = ((metadata[0] >> 6) & 0x3) + 1
         dict_size = cls._read_long(metadata, 1, offset_size, signed=False)
         if id >= dict_size:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         string_start = 1 + (dict_size + 2) * offset_size
         offset = cls._read_long(metadata, 1 + (id + 1) * offset_size, offset_size, signed=False)
         next_offset = cls._read_long(
             metadata, 1 + (id + 2) * offset_size, offset_size, signed=False
         )
         if offset > next_offset:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         cls._check_index(string_start + next_offset - 1, len(metadata))
         return metadata[string_start + offset : (string_start + next_offset)].decode("utf-8")
 
@@ -187,7 +187,7 @@ def _get_boolean(cls, value: bytes, pos: int) -> bool:
         if basic_type != VariantUtils.PRIMITIVE or (
             type_info != VariantUtils.TRUE and type_info != VariantUtils.FALSE
         ):
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         return type_info == VariantUtils.TRUE
 
     @classmethod
@@ -195,7 +195,7 @@ def _get_long(cls, value: bytes, pos: int) -> int:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         if type_info == VariantUtils.INT1:
             return cls._read_long(value, pos + 1, 1, signed=True)
         elif type_info == VariantUtils.INT2:
@@ -204,25 +204,25 @@ def _get_long(cls, value: bytes, pos: int) -> int:
             return cls._read_long(value, pos + 1, 4, signed=True)
         elif type_info == VariantUtils.INT8:
             return cls._read_long(value, pos + 1, 8, signed=True)
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_date(cls, value: bytes, pos: int) -> datetime.date:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         if type_info == VariantUtils.DATE:
             days_since_epoch = cls._read_long(value, pos + 1, 4, signed=True)
             return datetime.date.fromordinal(VariantUtils.EPOCH.toordinal() + days_since_epoch)
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_timestamp(cls, value: bytes, pos: int, zone_id: str) -> datetime.datetime:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         if type_info == VariantUtils.TIMESTAMP_NTZ:
             microseconds_since_epoch = cls._read_long(value, pos + 1, 8, signed=True)
             return VariantUtils.EPOCH_NTZ + datetime.timedelta(
@@ -233,7 +233,7 @@ def _get_timestamp(cls, value: bytes, pos: int, zone_id: str) -> datetime.dateti
             return (
                 VariantUtils.EPOCH + datetime.timedelta(microseconds=microseconds_since_epoch)
             ).astimezone(ZoneInfo(zone_id))
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_string(cls, value: bytes, pos: int) -> str:
@@ -252,35 +252,35 @@ def _get_string(cls, value: bytes, pos: int) -> str:
                 length = cls._read_long(value, pos + 1, VariantUtils.U32_SIZE, signed=False)
             cls._check_index(start + length - 1, len(value))
             return value[start : start + length].decode("utf-8")
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_double(cls, value: bytes, pos: int) -> float:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         if type_info == VariantUtils.FLOAT:
             cls._check_index(pos + 4, len(value))
             return struct.unpack("<f", value[pos + 1 : pos + 5])[0]
         elif type_info == VariantUtils.DOUBLE:
             cls._check_index(pos + 8, len(value))
             return struct.unpack("<d", value[pos + 1 : pos + 9])[0]
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _check_decimal(cls, unscaled: int, scale: int, max_unscaled: int, max_scale: int) -> None:
         # max_unscaled == 10**max_scale, but we pass a literal parameter to avoid redundant
         # computation.
         if unscaled >= max_unscaled or unscaled <= -max_unscaled or scale > max_scale:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _get_decimal(cls, value: bytes, pos: int) -> decimal.Decimal:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         scale = value[pos + 1]
         unscaled = 0
         if type_info == VariantUtils.DECIMAL4:
@@ -296,7 +296,7 @@ def _get_decimal(cls, value: bytes, pos: int) -> decimal.Decimal:
                 unscaled, scale, cls.MAX_DECIMAL16_VALUE, cls.MAX_DECIMAL16_PRECISION
             )
         else:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         return decimal.Decimal(unscaled) * (decimal.Decimal(10) ** (-scale))
 
     @classmethod
@@ -304,7 +304,7 @@ def _get_binary(cls, value: bytes, pos: int) -> bytes:
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.PRIMITIVE or type_info != VariantUtils.BINARY:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         start = pos + 1 + VariantUtils.U32_SIZE
         length = cls._read_long(value, pos + 1, VariantUtils.U32_SIZE, signed=False)
         cls._check_index(start + length - 1, len(value))
@@ -350,7 +350,7 @@ def _get_type(cls, value: bytes, pos: int) -> Any:
             return datetime.datetime
         elif type_info == VariantUtils.LONG_STR:
             return str
-        raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+        raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _to_json(cls, value: bytes, metadata: bytes, pos: int, zone_id: str) -> str:
@@ -438,7 +438,7 @@ def _get_scalar(
         elif variant_type == datetime.datetime:
             return cls._get_timestamp(value, pos, zone_id)
         else:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
 
     @classmethod
     def _handle_object(
@@ -451,7 +451,7 @@ def _handle_object(
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.OBJECT:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         large_size = ((type_info >> 4) & 0x1) != 0
         size_bytes = VariantUtils.U32_SIZE if large_size else 1
         num_fields = cls._read_long(value, pos + 1, size_bytes, signed=False)
@@ -480,7 +480,7 @@ def _handle_array(cls, value: bytes, pos: int, func: Callable[[List[int]], Any])
         cls._check_index(pos, len(value))
         basic_type, type_info = cls._get_type_info(value, pos)
         if basic_type != VariantUtils.ARRAY:
-            raise PySparkValueError(error_class="MALFORMED_VARIANT", message_parameters={})
+            raise PySparkValueError(errorClass="MALFORMED_VARIANT", messageParameters={})
         large_size = ((type_info >> 2) & 0x1) != 0
         size_bytes = VariantUtils.U32_SIZE if large_size else 1
         num_fields = cls._read_long(value, pos + 1, size_bytes, signed=False)
diff --git a/python/pyspark/sql/window.py b/python/pyspark/sql/window.py
index 22c9f697acde3..0c2cf4f666164 100644
--- a/python/pyspark/sql/window.py
+++ b/python/pyspark/sql/window.py
@@ -18,7 +18,7 @@
 # mypy: disable-error-code="empty-body"
 
 import sys
-from typing import List, TYPE_CHECKING, Union
+from typing import Sequence, TYPE_CHECKING, Union
 
 from pyspark.sql.utils import dispatch_window_method
 from pyspark.util import (
@@ -28,7 +28,7 @@
 
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaObject
-    from pyspark.sql._typing import ColumnOrName, ColumnOrName_
+    from pyspark.sql._typing import ColumnOrName
 
 __all__ = ["Window", "WindowSpec"]
 
@@ -68,7 +68,7 @@ class Window:
 
     @staticmethod
     @dispatch_window_method
-    def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
+    def partitionBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the partitioning defined.
 
@@ -121,7 +121,7 @@ def partitionBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowS
 
     @staticmethod
     @dispatch_window_method
-    def orderBy(*cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
+    def orderBy(*cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         """
         Creates a :class:`WindowSpec` with the ordering defined.
 
@@ -348,7 +348,7 @@ def __new__(cls, jspec: "JavaObject") -> "WindowSpec":
 
         return WindowSpec.__new__(WindowSpec, jspec)
 
-    def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
+    def partitionBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         """
         Defines the partitioning columns in a :class:`WindowSpec`.
 
@@ -361,7 +361,7 @@ def partitionBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "W
         """
         ...
 
-    def orderBy(self, *cols: Union["ColumnOrName", List["ColumnOrName_"]]) -> "WindowSpec":
+    def orderBy(self, *cols: Union["ColumnOrName", Sequence["ColumnOrName"]]) -> "WindowSpec":
         """
         Defines the ordering columns in a :class:`WindowSpec`.
 
diff --git a/python/pyspark/sql/worker/commit_data_source_write.py b/python/pyspark/sql/worker/commit_data_source_write.py
index 1d9e53083d4d9..661e4c8aafd3f 100644
--- a/python/pyspark/sql/worker/commit_data_source_write.py
+++ b/python/pyspark/sql/worker/commit_data_source_write.py
@@ -69,8 +69,8 @@ def main(infile: IO, outfile: IO) -> None:
             message = pickleSer._read_with_length(infile)
             if message is not None and not isinstance(message, WriterCommitMessage):
                 raise PySparkAssertionError(
-                    error_class="DATA_SOURCE_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                    messageParameters={
                         "expected": "an instance of WriterCommitMessage",
                         "actual": f"'{type(message).__name__}'",
                     },
diff --git a/python/pyspark/sql/worker/create_data_source.py b/python/pyspark/sql/worker/create_data_source.py
index d6b59b04393d8..ef70876fc32c5 100644
--- a/python/pyspark/sql/worker/create_data_source.py
+++ b/python/pyspark/sql/worker/create_data_source.py
@@ -75,8 +75,8 @@ def main(infile: IO, outfile: IO) -> None:
         data_source_cls = read_command(pickleSer, infile)
         if not (isinstance(data_source_cls, type) and issubclass(data_source_cls, DataSource)):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a subclass of DataSource",
                     "actual": f"'{type(data_source_cls).__name__}'",
                 },
@@ -85,8 +85,8 @@ def main(infile: IO, outfile: IO) -> None:
         # Check the name method is a class method.
         if not inspect.ismethod(data_source_cls.name):
             raise PySparkTypeError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "'name()' method to be a classmethod",
                     "actual": f"'{type(data_source_cls.name).__name__}'",
                 },
@@ -98,8 +98,8 @@ def main(infile: IO, outfile: IO) -> None:
         # Check if the provider name matches the data source's name.
         if provider.lower() != data_source_cls.name().lower():
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": f"provider with name {data_source_cls.name()}",
                     "actual": f"'{provider}'",
                 },
@@ -111,8 +111,8 @@ def main(infile: IO, outfile: IO) -> None:
             user_specified_schema = _parse_datatype_json_string(utf8_deserializer.loads(infile))
             if not isinstance(user_specified_schema, StructType):
                 raise PySparkAssertionError(
-                    error_class="DATA_SOURCE_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                    messageParameters={
                         "expected": "the user-defined schema to be a 'StructType'",
                         "actual": f"'{type(data_source_cls).__name__}'",
                     },
@@ -131,8 +131,8 @@ def main(infile: IO, outfile: IO) -> None:
             data_source = data_source_cls(options=options)  # type: ignore
         except Exception as e:
             raise PySparkRuntimeError(
-                error_class="DATA_SOURCE_CREATE_ERROR",
-                message_parameters={"error": str(e)},
+                errorClass="DATA_SOURCE_CREATE_ERROR",
+                messageParameters={"error": str(e)},
             )
 
         # Get the schema of the data source.
@@ -149,8 +149,8 @@ def main(infile: IO, outfile: IO) -> None:
                     is_ddl_string = True
             except NotImplementedError:
                 raise PySparkRuntimeError(
-                    error_class="NOT_IMPLEMENTED",
-                    message_parameters={"feature": "DataSource.schema"},
+                    errorClass="NOT_IMPLEMENTED",
+                    messageParameters={"feature": "DataSource.schema"},
                 )
         else:
             schema = user_specified_schema  # type: ignore
diff --git a/python/pyspark/sql/worker/plan_data_source_read.py b/python/pyspark/sql/worker/plan_data_source_read.py
index 51a90bba14547..2af25fb52f150 100644
--- a/python/pyspark/sql/worker/plan_data_source_read.py
+++ b/python/pyspark/sql/worker/plan_data_source_read.py
@@ -19,7 +19,7 @@
 import sys
 import functools
 import pyarrow as pa
-from itertools import islice
+from itertools import islice, chain
 from typing import IO, List, Iterator, Iterable, Tuple, Union
 
 from pyspark.accumulators import _accumulatorRegistry
@@ -59,21 +59,18 @@
 
 
 def records_to_arrow_batches(
-    output_iter: Iterator[Tuple],
+    output_iter: Union[Iterator[Tuple], Iterator[pa.RecordBatch]],
     max_arrow_batch_size: int,
     return_type: StructType,
     data_source: DataSource,
 ) -> Iterable[pa.RecordBatch]:
     """
-    Convert an iterator of Python tuples to an iterator of pyarrow record batches.
-
-    For each python tuple, check the types of each field and append it to the records batch.
-
+    First check if the iterator yields PyArrow's `pyarrow.RecordBatch`, if so, yield
+    them directly.  Otherwise, convert an iterator of Python tuples to an iterator
+    of pyarrow record batches.  For each Python tuple, check the types of each field
+    and append it to the records batch.
     """
 
-    def batched(iterator: Iterator, n: int) -> Iterator:
-        return iter(functools.partial(lambda it: list(islice(it, n)), iterator), [])
-
     pa_schema = to_arrow_schema(return_type)
     column_names = return_type.fieldNames()
     column_converters = [
@@ -83,14 +80,53 @@ def batched(iterator: Iterator, n: int) -> Iterator:
     num_cols = len(column_names)
     col_mapping = {name: i for i, name in enumerate(column_names)}
     col_name_set = set(column_names)
+
+    try:
+        first_element = next(output_iter)
+    except StopIteration:
+        return
+
+    # If the first element is of type pa.RecordBatch yield all elements and return
+    if isinstance(first_element, pa.RecordBatch):
+        # Validate the schema, check the RecordBatch column count
+        num_columns = first_element.num_columns
+        if num_columns != num_cols:
+            raise PySparkRuntimeError(
+                errorClass="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                messageParameters={
+                    "expected": str(num_cols),
+                    "actual": str(num_columns),
+                },
+            )
+        for name in column_names:
+            if name not in first_element.schema.names:
+                raise PySparkRuntimeError(
+                    errorClass="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                    messageParameters={
+                        "expected": str(column_names),
+                        "actual": str(first_element.schema.names),
+                    },
+                )
+
+        yield first_element
+        for element in output_iter:
+            yield element
+        return
+
+    # Put the first element back to the iterator
+    output_iter = chain([first_element], output_iter)
+
+    def batched(iterator: Iterator, n: int) -> Iterator:
+        return iter(functools.partial(lambda it: list(islice(it, n)), iterator), [])
+
     for batch in batched(output_iter, max_arrow_batch_size):
         pylist: List[List] = [[] for _ in range(num_cols)]
         for result in batch:
             # Validate the output row schema.
             if hasattr(result, "__len__") and len(result) != num_cols:
                 raise PySparkRuntimeError(
-                    error_class="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                    messageParameters={
                         "expected": str(num_cols),
                         "actual": str(len(result)),
                     },
@@ -99,11 +135,12 @@ def batched(iterator: Iterator, n: int) -> Iterator:
             # Validate the output row type.
             if not isinstance(result, (list, tuple)):
                 raise PySparkRuntimeError(
-                    error_class="DATA_SOURCE_INVALID_RETURN_TYPE",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_INVALID_RETURN_TYPE",
+                    messageParameters={
                         "type": type(result).__name__,
                         "name": data_source.name(),
-                        "supported_types": "tuple, list, `pyspark.sql.types.Row`",
+                        "supported_types": "tuple, list, `pyspark.sql.types.Row`,"
+                        " `pyarrow.RecordBatch`",
                     },
                 )
 
@@ -113,8 +150,8 @@ def batched(iterator: Iterator, n: int) -> Iterator:
                 # Check if the names are the same as the schema.
                 if set(result.__fields__) != col_name_set:
                     raise PySparkRuntimeError(
-                        error_class="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
-                        message_parameters={
+                        errorClass="DATA_SOURCE_RETURN_SCHEMA_MISMATCH",
+                        messageParameters={
                             "expected": str(column_names),
                             "actual": str(result.__fields__),
                         },
@@ -145,9 +182,10 @@ def main(infile: IO, outfile: IO) -> None:
 
     This process then creates a `DataSourceReader` instance by calling the `reader` method
     on the `DataSource` instance. Then it calls the `partitions()` method of the reader and
-    constructs a Python UDTF using the `read()` method of the reader.
+    constructs a PyArrow's `RecordBatch` with the data using the `read()` method of the reader.
 
-    The partition values and the UDTF are then serialized and sent back to the JVM via the socket.
+    The partition values and the Arrow Batch are then serialized and sent back to the JVM
+    via the socket.
     """
     try:
         check_python_version(infile)
@@ -164,8 +202,8 @@ def main(infile: IO, outfile: IO) -> None:
         data_source = read_command(pickleSer, infile)
         if not isinstance(data_source, DataSource):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a Python data source instance of type 'DataSource'",
                     "actual": f"'{type(data_source).__name__}'",
                 },
@@ -176,8 +214,8 @@ def main(infile: IO, outfile: IO) -> None:
         input_schema = _parse_datatype_json_string(input_schema_json)
         if not isinstance(input_schema, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "an input schema of type 'StructType'",
                     "actual": f"'{type(input_schema).__name__}'",
                 },
@@ -192,8 +230,8 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
                 },
@@ -218,8 +256,8 @@ def main(infile: IO, outfile: IO) -> None:
             # Validate the reader.
             if not isinstance(reader, DataSourceReader):
                 raise PySparkAssertionError(
-                    error_class="DATA_SOURCE_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                    messageParameters={
                         "expected": "an instance of DataSourceReader",
                         "actual": f"'{type(reader).__name__}'",
                     },
@@ -261,8 +299,8 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec
             # Validate the output iterator.
             if not isinstance(output_iter, Iterator):
                 raise PySparkRuntimeError(
-                    error_class="DATA_SOURCE_INVALID_RETURN_TYPE",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_INVALID_RETURN_TYPE",
+                    messageParameters={
                         "type": type(output_iter).__name__,
                         "name": data_source.name(),
                         "supported_types": "iterator",
@@ -282,8 +320,8 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec
                 partitions = reader.partitions()  # type: ignore[call-arg]
                 if not isinstance(partitions, list):
                     raise PySparkRuntimeError(
-                        error_class="DATA_SOURCE_TYPE_MISMATCH",
-                        message_parameters={
+                        errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                        messageParameters={
                             "expected": "'partitions' to return a list",
                             "actual": f"'{type(partitions).__name__}'",
                         },
@@ -291,8 +329,8 @@ def data_source_read_func(iterator: Iterable[pa.RecordBatch]) -> Iterable[pa.Rec
                 if not all(isinstance(p, InputPartition) for p in partitions):
                     partition_types = ", ".join([f"'{type(p).__name__}'" for p in partitions])
                     raise PySparkRuntimeError(
-                        error_class="DATA_SOURCE_TYPE_MISMATCH",
-                        message_parameters={
+                        errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                        messageParameters={
                             "expected": "elements in 'partitions' to be of type 'InputPartition'",
                             "actual": partition_types,
                         },
diff --git a/python/pyspark/sql/worker/python_streaming_sink_runner.py b/python/pyspark/sql/worker/python_streaming_sink_runner.py
index 7d03157d705d6..0d46fc9021213 100644
--- a/python/pyspark/sql/worker/python_streaming_sink_runner.py
+++ b/python/pyspark/sql/worker/python_streaming_sink_runner.py
@@ -70,8 +70,8 @@ def main(infile: IO, outfile: IO) -> None:
 
         if not isinstance(data_source, DataSource):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a Python data source instance of type 'DataSource'",
                     "actual": f"'{type(data_source).__name__}'",
                 },
@@ -81,8 +81,8 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(schema_json)
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "an output schema of type 'StructType'",
                     "actual": f"'{type(schema).__name__}'",
                 },
@@ -101,8 +101,8 @@ def main(infile: IO, outfile: IO) -> None:
                 message = pickleSer._read_with_length(infile)
                 if message is not None and not isinstance(message, WriterCommitMessage):
                     raise PySparkAssertionError(
-                        error_class="DATA_SOURCE_TYPE_MISMATCH",
-                        message_parameters={
+                        errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                        messageParameters={
                             "expected": "an instance of WriterCommitMessage",
                             "actual": f"'{type(message).__name__}'",
                         },
@@ -124,8 +124,8 @@ def main(infile: IO, outfile: IO) -> None:
         except Exception as e:
             error_msg = "data source {} throw exception: {}".format(data_source.name, e)
             raise PySparkRuntimeError(
-                error_class="PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
-                message_parameters={"action": "commitOrAbort", "error": error_msg},
+                errorClass="PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+                messageParameters={"action": "commitOrAbort", "error": error_msg},
             )
     except BaseException as e:
         handle_worker_exception(e, outfile)
diff --git a/python/pyspark/sql/worker/write_into_data_source.py b/python/pyspark/sql/worker/write_into_data_source.py
index 212a2754ec9f0..a114a3facc467 100644
--- a/python/pyspark/sql/worker/write_into_data_source.py
+++ b/python/pyspark/sql/worker/write_into_data_source.py
@@ -88,8 +88,8 @@ def main(infile: IO, outfile: IO) -> None:
         data_source_cls = read_command(pickleSer, infile)
         if not (isinstance(data_source_cls, type) and issubclass(data_source_cls, DataSource)):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a subclass of DataSource",
                     "actual": f"'{type(data_source_cls).__name__}'",
                 },
@@ -98,8 +98,8 @@ def main(infile: IO, outfile: IO) -> None:
         # Check the name method is a class method.
         if not inspect.ismethod(data_source_cls.name):
             raise PySparkTypeError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "'name()' method to be a classmethod",
                     "actual": f"'{type(data_source_cls.name).__name__}'",
                 },
@@ -111,8 +111,8 @@ def main(infile: IO, outfile: IO) -> None:
         # Check if the provider name matches the data source's name.
         if provider.lower() != data_source_cls.name().lower():
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": f"provider with name {data_source_cls.name()}",
                     "actual": f"'{provider}'",
                 },
@@ -122,8 +122,8 @@ def main(infile: IO, outfile: IO) -> None:
         schema = _parse_datatype_json_string(utf8_deserializer.loads(infile))
         if not isinstance(schema, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "the schema to be a 'StructType'",
                     "actual": f"'{type(data_source_cls).__name__}'",
                 },
@@ -133,8 +133,8 @@ def main(infile: IO, outfile: IO) -> None:
         return_type = _parse_datatype_json_string(utf8_deserializer.loads(infile))
         if not isinstance(return_type, StructType):
             raise PySparkAssertionError(
-                error_class="DATA_SOURCE_TYPE_MISMATCH",
-                message_parameters={
+                errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                messageParameters={
                     "expected": "a return type of type 'StructType'",
                     "actual": f"'{type(return_type).__name__}'",
                 },
@@ -169,8 +169,8 @@ def main(infile: IO, outfile: IO) -> None:
             writer = data_source.writer(schema, overwrite)  # type: ignore[assignment]
             if not isinstance(writer, DataSourceWriter):
                 raise PySparkAssertionError(
-                    error_class="DATA_SOURCE_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                    messageParameters={
                         "expected": "an instance of DataSourceWriter",
                         "actual": f"'{type(writer).__name__}'",
                     },
@@ -199,8 +199,8 @@ def batch_to_rows() -> Iterator[Row]:
             # Check the commit message has the right type.
             if not isinstance(res, WriterCommitMessage):
                 raise PySparkRuntimeError(
-                    error_class="DATA_SOURCE_TYPE_MISMATCH",
-                    message_parameters={
+                    errorClass="DATA_SOURCE_TYPE_MISMATCH",
+                    messageParameters={
                         "expected": (
                             "'WriterCommitMessage' as the return type of " "the `write` method"
                         ),
diff --git a/python/pyspark/taskcontext.py b/python/pyspark/taskcontext.py
index 829d6f628e9f4..9785664d7a15a 100644
--- a/python/pyspark/taskcontext.py
+++ b/python/pyspark/taskcontext.py
@@ -361,8 +361,8 @@ def get(cls: Type["BarrierTaskContext"]) -> "BarrierTaskContext":
         """
         if not isinstance(cls._taskContext, BarrierTaskContext):
             raise PySparkRuntimeError(
-                error_class="NOT_IN_BARRIER_STAGE",
-                message_parameters={},
+                errorClass="NOT_IN_BARRIER_STAGE",
+                messageParameters={},
             )
         return cls._taskContext
 
@@ -395,8 +395,8 @@ def barrier(self) -> None:
         """
         if self._port is None or self._secret is None:
             raise PySparkRuntimeError(
-                error_class="CALL_BEFORE_INITIALIZE",
-                message_parameters={
+                errorClass="CALL_BEFORE_INITIALIZE",
+                messageParameters={
                     "func_name": "barrier",
                     "object": "BarrierTaskContext",
                 },
@@ -424,8 +424,8 @@ def allGather(self, message: str = "") -> List[str]:
             raise TypeError("Argument `message` must be of type `str`")
         elif self._port is None or self._secret is None:
             raise PySparkRuntimeError(
-                error_class="CALL_BEFORE_INITIALIZE",
-                message_parameters={
+                errorClass="CALL_BEFORE_INITIALIZE",
+                messageParameters={
                     "func_name": "allGather",
                     "object": "BarrierTaskContext",
                 },
@@ -455,8 +455,8 @@ def getTaskInfos(self) -> List["BarrierTaskInfo"]:
         """
         if self._port is None or self._secret is None:
             raise PySparkRuntimeError(
-                error_class="CALL_BEFORE_INITIALIZE",
-                message_parameters={
+                errorClass="CALL_BEFORE_INITIALIZE",
+                messageParameters={
                     "func_name": "getTaskInfos",
                     "object": "BarrierTaskContext",
                 },
diff --git a/python/pyspark/testing/__init__.py b/python/pyspark/testing/__init__.py
index 88853e925f801..2a20035e54898 100644
--- a/python/pyspark/testing/__init__.py
+++ b/python/pyspark/testing/__init__.py
@@ -14,6 +14,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+import typing
+
 from pyspark.testing.utils import assertDataFrameEqual, assertSchemaEqual
 
+
+grpc_requirement_message = None
+try:
+    import grpc
+except ImportError as e:
+    grpc_requirement_message = str(e)
+have_grpc = grpc_requirement_message is None
+
+
+grpc_status_requirement_message = None
+try:
+    import grpc_status
+except ImportError as e:
+    grpc_status_requirement_message = str(e)
+have_grpc_status = grpc_status_requirement_message is None
+
+googleapis_common_protos_requirement_message = None
+try:
+    from google.rpc import error_details_pb2
+except ImportError as e:
+    googleapis_common_protos_requirement_message = str(e)
+have_googleapis_common_protos = googleapis_common_protos_requirement_message is None
+
+graphviz_requirement_message = None
+try:
+    import graphviz
+except ImportError as e:
+    graphviz_requirement_message = str(e)
+have_graphviz: bool = graphviz_requirement_message is None
+
+from pyspark.testing.utils import PySparkErrorTestUtils
+from pyspark.testing.sqlutils import pandas_requirement_message, pyarrow_requirement_message
+
+
+connect_requirement_message = (
+    pandas_requirement_message
+    or pyarrow_requirement_message
+    or grpc_requirement_message
+    or googleapis_common_protos_requirement_message
+    or grpc_status_requirement_message
+)
+should_test_connect: str = typing.cast(str, connect_requirement_message is None)
+
 __all__ = ["assertDataFrameEqual", "assertSchemaEqual"]
diff --git a/python/pyspark/testing/connectutils.py b/python/pyspark/testing/connectutils.py
index 2f18cd8a6ccdc..7dea8a2103c3d 100644
--- a/python/pyspark/testing/connectutils.py
+++ b/python/pyspark/testing/connectutils.py
@@ -23,35 +23,18 @@
 import uuid
 import contextlib
 
-grpc_requirement_message = None
-try:
-    import grpc
-except ImportError as e:
-    grpc_requirement_message = str(e)
-have_grpc = grpc_requirement_message is None
-
-
-grpc_status_requirement_message = None
-try:
-    import grpc_status
-except ImportError as e:
-    grpc_status_requirement_message = str(e)
-have_grpc_status = grpc_status_requirement_message is None
-
-googleapis_common_protos_requirement_message = None
-try:
-    from google.rpc import error_details_pb2
-except ImportError as e:
-    googleapis_common_protos_requirement_message = str(e)
-have_googleapis_common_protos = googleapis_common_protos_requirement_message is None
-
-graphviz_requirement_message = None
-try:
-    import graphviz
-except ImportError as e:
-    graphviz_requirement_message = str(e)
-have_graphviz: bool = graphviz_requirement_message is None
-
+from pyspark.testing import (
+    grpc_requirement_message,
+    have_grpc,
+    grpc_status_requirement_message,
+    have_grpc_status,
+    googleapis_common_protos_requirement_message,
+    have_googleapis_common_protos,
+    graphviz_requirement_message,
+    have_graphviz,
+    connect_requirement_message,
+    should_test_connect,
+)
 from pyspark import Row, SparkConf
 from pyspark.util import is_remote_only
 from pyspark.testing.utils import PySparkErrorTestUtils
@@ -64,15 +47,6 @@
 from pyspark.sql.session import SparkSession as PySparkSession
 
 
-connect_requirement_message = (
-    pandas_requirement_message
-    or pyarrow_requirement_message
-    or grpc_requirement_message
-    or googleapis_common_protos_requirement_message
-    or grpc_status_requirement_message
-)
-should_test_connect: str = typing.cast(str, connect_requirement_message is None)
-
 if should_test_connect:
     from pyspark.sql.connect.dataframe import DataFrame
     from pyspark.sql.connect.plan import Read, Range, SQL, LogicalPlan
diff --git a/python/pyspark/testing/pandasutils.py b/python/pyspark/testing/pandasutils.py
index a660057b2c3c6..09d3ffb09708f 100644
--- a/python/pyspark/testing/pandasutils.py
+++ b/python/pyspark/testing/pandasutils.py
@@ -23,29 +23,6 @@
 import decimal
 from typing import Any, Union
 
-tabulate_requirement_message = None
-try:
-    from tabulate import tabulate
-except ImportError as e:
-    # If tabulate requirement is not satisfied, skip related tests.
-    tabulate_requirement_message = str(e)
-have_tabulate = tabulate_requirement_message is None
-
-matplotlib_requirement_message = None
-try:
-    import matplotlib
-except ImportError as e:
-    # If matplotlib requirement is not satisfied, skip related tests.
-    matplotlib_requirement_message = str(e)
-have_matplotlib = matplotlib_requirement_message is None
-
-plotly_requirement_message = None
-try:
-    import plotly
-except ImportError as e:
-    # If plotly requirement is not satisfied, skip related tests.
-    plotly_requirement_message = str(e)
-have_plotly = plotly_requirement_message is None
 
 try:
     from pyspark.sql.pandas.utils import require_minimum_pandas_version
@@ -86,8 +63,8 @@ def _assert_pandas_equal(
             )
         except AssertionError:
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_DATAFRAME",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                messageParameters={
                     "left": left.to_string(),
                     "left_dtype": str(left.dtypes),
                     "right": right.to_string(),
@@ -106,8 +83,8 @@ def _assert_pandas_equal(
             )
         except AssertionError:
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_SERIES",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_SERIES",
+                messageParameters={
                     "left": left.to_string(),
                     "left_dtype": str(left.dtype),
                     "right": right.to_string(),
@@ -119,8 +96,8 @@ def _assert_pandas_equal(
             assert_index_equal(left, right, check_exact=checkExact)
         except AssertionError:
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_INDEX",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_INDEX",
+                messageParameters={
                     "left": left,
                     "left_dtype": str(left.dtype),
                     "right": right,
@@ -159,8 +136,8 @@ def compare_vals_approx(val1, val2):
     if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame):
         if left.shape != right.shape:
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_DATAFRAME",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                messageParameters={
                     "left": left.to_string(),
                     "left_dtype": str(left.dtypes),
                     "right": right.to_string(),
@@ -170,8 +147,8 @@ def compare_vals_approx(val1, val2):
         for lcol, rcol in zip(left.columns, right.columns):
             if lcol != rcol:
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_DATAFRAME",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                    messageParameters={
                         "left": left.to_string(),
                         "left_dtype": str(left.dtypes),
                         "right": right.to_string(),
@@ -181,8 +158,8 @@ def compare_vals_approx(val1, val2):
             for lnull, rnull in zip(left[lcol].isnull(), right[rcol].isnull()):
                 if lnull != rnull:
                     raise PySparkAssertionError(
-                        error_class="DIFFERENT_PANDAS_DATAFRAME",
-                        message_parameters={
+                        errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                        messageParameters={
                             "left": left.to_string(),
                             "left_dtype": str(left.dtypes),
                             "right": right.to_string(),
@@ -192,8 +169,8 @@ def compare_vals_approx(val1, val2):
             for lval, rval in zip(left[lcol].dropna(), right[rcol].dropna()):
                 if not compare_vals_approx(lval, rval):
                     raise PySparkAssertionError(
-                        error_class="DIFFERENT_PANDAS_DATAFRAME",
-                        message_parameters={
+                        errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                        messageParameters={
                             "left": left.to_string(),
                             "left_dtype": str(left.dtypes),
                             "right": right.to_string(),
@@ -202,8 +179,8 @@ def compare_vals_approx(val1, val2):
                     )
         if left.columns.names != right.columns.names:
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_DATAFRAME",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_DATAFRAME",
+                messageParameters={
                     "left": left.to_string(),
                     "left_dtype": str(left.dtypes),
                     "right": right.to_string(),
@@ -213,8 +190,8 @@ def compare_vals_approx(val1, val2):
     elif isinstance(left, pd.Series) and isinstance(right, pd.Series):
         if left.name != right.name or len(left) != len(right):
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_SERIES",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_SERIES",
+                messageParameters={
                     "left": left.to_string(),
                     "left_dtype": str(left.dtype),
                     "right": right.to_string(),
@@ -224,8 +201,8 @@ def compare_vals_approx(val1, val2):
         for lnull, rnull in zip(left.isnull(), right.isnull()):
             if lnull != rnull:
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_SERIES",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_SERIES",
+                    messageParameters={
                         "left": left.to_string(),
                         "left_dtype": str(left.dtype),
                         "right": right.to_string(),
@@ -235,8 +212,8 @@ def compare_vals_approx(val1, val2):
         for lval, rval in zip(left.dropna(), right.dropna()):
             if not compare_vals_approx(lval, rval):
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_SERIES",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_SERIES",
+                    messageParameters={
                         "left": left.to_string(),
                         "left_dtype": str(left.dtype),
                         "right": right.to_string(),
@@ -246,8 +223,8 @@ def compare_vals_approx(val1, val2):
     elif isinstance(left, pd.MultiIndex) and isinstance(right, pd.MultiIndex):
         if len(left) != len(right):
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_MULTIINDEX",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_MULTIINDEX",
+                messageParameters={
                     "left": left,
                     "left_dtype": str(left.dtype),
                     "right": right,
@@ -257,8 +234,8 @@ def compare_vals_approx(val1, val2):
         for lval, rval in zip(left, right):
             if not compare_vals_approx(lval, rval):
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_MULTIINDEX",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_MULTIINDEX",
+                    messageParameters={
                         "left": left,
                         "left_dtype": str(left.dtype),
                         "right": right,
@@ -268,8 +245,8 @@ def compare_vals_approx(val1, val2):
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
         if len(left) != len(right):
             raise PySparkAssertionError(
-                error_class="DIFFERENT_PANDAS_INDEX",
-                message_parameters={
+                errorClass="DIFFERENT_PANDAS_INDEX",
+                messageParameters={
                     "left": left,
                     "left_dtype": str(left.dtype),
                     "right": right,
@@ -279,8 +256,8 @@ def compare_vals_approx(val1, val2):
         for lnull, rnull in zip(left.isnull(), right.isnull()):
             if lnull != rnull:
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_INDEX",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_INDEX",
+                    messageParameters={
                         "left": left,
                         "left_dtype": str(left.dtype),
                         "right": right,
@@ -290,8 +267,8 @@ def compare_vals_approx(val1, val2):
         for lval, rval in zip(left.dropna(), right.dropna()):
             if not compare_vals_approx(lval, rval):
                 raise PySparkAssertionError(
-                    error_class="DIFFERENT_PANDAS_INDEX",
-                    message_parameters={
+                    errorClass="DIFFERENT_PANDAS_INDEX",
+                    messageParameters={
                         "left": left,
                         "left_dtype": str(left.dtype),
                         "right": right,
@@ -301,8 +278,8 @@ def compare_vals_approx(val1, val2):
     else:
         if not isinstance(left, (pd.DataFrame, pd.Series, pd.Index)):
             raise PySparkAssertionError(
-                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-                message_parameters={
+                errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+                messageParameters={
                     "expected_type": f"{pd.DataFrame.__name__}, "
                     f"{pd.Series.__name__}, "
                     f"{pd.Index.__name__}, ",
@@ -312,8 +289,8 @@ def compare_vals_approx(val1, val2):
             )
         elif not isinstance(right, (pd.DataFrame, pd.Series, pd.Index)):
             raise PySparkAssertionError(
-                error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-                message_parameters={
+                errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+                messageParameters={
                     "expected_type": f"{pd.DataFrame.__name__}, "
                     f"{pd.Series.__name__}, "
                     f"{pd.Index.__name__}, ",
@@ -424,8 +401,8 @@ def assert_eq(
 
             if not isinstance(left, (DataFrame, Series, Index)):
                 raise PySparkAssertionError(
-                    error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-                    message_parameters={
+                    errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+                    messageParameters={
                         "expected_type": f"{DataFrame.__name__}, {Series.__name__}, "
                         f"{Index.__name__}",
                         "arg_name": "actual",
@@ -436,8 +413,8 @@ def assert_eq(
                 right, (DataFrame, pd.DataFrame, Series, pd.Series, Index, pd.Index)
             ):
                 raise PySparkAssertionError(
-                    error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-                    message_parameters={
+                    errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+                    messageParameters={
                         "expected_type": f"{DataFrame.__name__}, "
                         f"{pd.DataFrame.__name__}, "
                         f"{Series.__name__}, "
diff --git a/python/pyspark/testing/sqlutils.py b/python/pyspark/testing/sqlutils.py
index 9f07c44c084cf..e5464257422ae 100644
--- a/python/pyspark/testing/sqlutils.py
+++ b/python/pyspark/testing/sqlutils.py
@@ -22,23 +22,17 @@
 import tempfile
 from contextlib import contextmanager
 
-pandas_requirement_message = None
-try:
-    from pyspark.sql.pandas.utils import require_minimum_pandas_version
-
-    require_minimum_pandas_version()
-except ImportError as e:
-    # If Pandas version requirement is not satisfied, skip related tests.
-    pandas_requirement_message = str(e)
-
-pyarrow_requirement_message = None
-try:
-    from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+from pyspark.sql import SparkSession
+from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
+from pyspark.testing.utils import (
+    ReusedPySparkTestCase,
+    PySparkErrorTestUtils,
+    have_pandas,
+    pandas_requirement_message,
+    have_pyarrow,
+    pyarrow_requirement_message,
+)
 
-    require_minimum_pyarrow_version()
-except ImportError as e:
-    # If Arrow version requirement is not satisfied, skip related tests.
-    pyarrow_requirement_message = str(e)
 
 test_not_compiled_message = None
 try:
@@ -48,13 +42,6 @@
 except Exception as e:
     test_not_compiled_message = str(e)
 
-from pyspark.sql import SparkSession
-from pyspark.sql.types import ArrayType, DoubleType, UserDefinedType, Row
-from pyspark.testing.utils import ReusedPySparkTestCase, PySparkErrorTestUtils
-
-
-have_pandas = pandas_requirement_message is None
-have_pyarrow = pyarrow_requirement_message is None
 test_compiled = test_not_compiled_message is None
 
 
diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py
index c74291524daed..c38cd928d584b 100644
--- a/python/pyspark/testing/utils.py
+++ b/python/pyspark/testing/utils.py
@@ -35,31 +35,14 @@
 )
 from itertools import zip_longest
 
-have_scipy = False
-have_numpy = False
-try:
-    import scipy  # noqa: F401
-
-    have_scipy = True
-except ImportError:
-    # No SciPy, but that's okay, we'll skip those tests
-    pass
-try:
-    import numpy as np  # noqa: F401
-
-    have_numpy = True
-except ImportError:
-    # No NumPy, but that's okay, we'll skip those tests
-    pass
-
 from pyspark import SparkConf
-from pyspark.errors import PySparkAssertionError, PySparkException
+from pyspark.errors import PySparkAssertionError, PySparkException, PySparkTypeError
 from pyspark.errors.exceptions.captured import CapturedException
 from pyspark.errors.exceptions.base import QueryContextType
 from pyspark.find_spark_home import _find_spark_home
 from pyspark.sql.dataframe import DataFrame
 from pyspark.sql import Row
-from pyspark.sql.types import StructType, StructField
+from pyspark.sql.types import StructType, StructField, VariantVal
 from pyspark.sql.functions import col, when
 
 
@@ -68,6 +51,73 @@
 SPARK_HOME = _find_spark_home()
 
 
+def have_package(name: str) -> bool:
+    try:
+        import importlib
+
+        importlib.import_module(name)
+        return True
+    except Exception:
+        return False
+
+
+have_numpy = have_package("numpy")
+numpy_requirement_message = None if have_numpy else "No module named 'numpy'"
+
+have_scipy = have_package("scipy")
+scipy_requirement_message = None if have_scipy else "No module named 'scipy'"
+
+have_sklearn = have_package("sklearn")
+sklearn_requirement_message = None if have_sklearn else "No module named 'sklearn'"
+
+have_torch = have_package("torch")
+torch_requirement_message = None if have_torch else "No module named 'torch'"
+
+have_torcheval = have_package("torcheval")
+torcheval_requirement_message = None if have_torcheval else "No module named 'torcheval'"
+
+have_deepspeed = have_package("deepspeed")
+deepspeed_requirement_message = None if have_deepspeed else "No module named 'deepspeed'"
+
+have_plotly = have_package("plotly")
+plotly_requirement_message = None if have_plotly else "No module named 'plotly'"
+
+have_matplotlib = have_package("matplotlib")
+matplotlib_requirement_message = None if have_matplotlib else "No module named 'matplotlib'"
+
+have_tabulate = have_package("tabulate")
+tabulate_requirement_message = None if have_tabulate else "No module named 'tabulate'"
+
+have_graphviz = have_package("graphviz")
+graphviz_requirement_message = None if have_graphviz else "No module named 'graphviz'"
+
+have_flameprof = have_package("flameprof")
+flameprof_requirement_message = None if have_flameprof else "No module named 'flameprof'"
+
+pandas_requirement_message = None
+try:
+    from pyspark.sql.pandas.utils import require_minimum_pandas_version
+
+    require_minimum_pandas_version()
+except Exception as e:
+    # If Pandas version requirement is not satisfied, skip related tests.
+    pandas_requirement_message = str(e)
+
+have_pandas = pandas_requirement_message is None
+
+
+pyarrow_requirement_message = None
+try:
+    from pyspark.sql.pandas.utils import require_minimum_pyarrow_version
+
+    require_minimum_pyarrow_version()
+except Exception as e:
+    # If Arrow version requirement is not satisfied, skip related tests.
+    pyarrow_requirement_message = str(e)
+
+have_pyarrow = pyarrow_requirement_message is None
+
+
 def read_int(b):
     return struct.unpack("!i", b)[0]
 
@@ -185,7 +235,7 @@ def setUpClass(cls):
     def tearDownClass(cls):
         cls.sc.stop()
 
-    def test_assert_vanilla_mode(self):
+    def test_assert_classic_mode(self):
         from pyspark.sql import is_remote
 
         self.assertFalse(is_remote())
@@ -284,8 +334,8 @@ class PySparkErrorTestUtils:
     def check_error(
         self,
         exception: PySparkException,
-        error_class: str,
-        message_parameters: Optional[Dict[str, str]] = None,
+        errorClass: str,
+        messageParameters: Optional[Dict[str, str]] = None,
         query_context_type: Optional[QueryContextType] = None,
         fragment: Optional[str] = None,
     ):
@@ -302,14 +352,14 @@ def check_error(
         )
 
         # Test error class
-        expected = error_class
+        expected = errorClass
         actual = exception.getErrorClass()
         self.assertEqual(
             expected, actual, f"Expected error class was '{expected}', got '{actual}'."
         )
 
         # Test message parameters
-        expected = message_parameters
+        expected = messageParameters
         actual = exception.getMessageParameters()
         self.assertEqual(
             expected, actual, f"Expected message parameters was '{expected}', got '{actual}'"
@@ -442,14 +492,14 @@ def assertSchemaEqual(
     >>> assertSchemaEqual(s1, s2, ignoreColumnName=True)
     """
     if not isinstance(actual, StructType):
-        raise PySparkAssertionError(
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(actual)},
+        raise PySparkTypeError(
+            errorClass="NOT_STRUCT",
+            messageParameters={"arg_name": "actual", "arg_type": type(actual).__name__},
         )
     if not isinstance(expected, StructType):
-        raise PySparkAssertionError(
-            error_class="UNSUPPORTED_DATA_TYPE",
-            message_parameters={"data_type": type(expected)},
+        raise PySparkTypeError(
+            errorClass="NOT_STRUCT",
+            messageParameters={"arg_name": "expected", "arg_type": type(expected).__name__},
         )
 
     def compare_schemas_ignore_nullable(s1: StructType, s2: StructType):
@@ -504,8 +554,8 @@ def compare_datatypes_ignore_nullable(dt1: Any, dt2: Any):
         generated_diff = difflib.ndiff(str(actual).splitlines(), str(expected).splitlines())
         error_msg = "\n".join(generated_diff)
         raise PySparkAssertionError(
-            error_class="DIFFERENT_SCHEMA",
-            message_parameters={"error_msg": error_msg},
+            errorClass="DIFFERENT_SCHEMA",
+            messageParameters={"error_msg": error_msg},
         )
 
 
@@ -778,8 +828,8 @@ def assertDataFrameEqual(
         return True
     elif actual is None:
         raise PySparkAssertionError(
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": None,
@@ -787,8 +837,8 @@ def assertDataFrameEqual(
         )
     elif expected is None:
         raise PySparkAssertionError(
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "expected",
                 "actual_type": None,
@@ -831,8 +881,8 @@ def assertDataFrameEqual(
 
     if not isinstance(actual, (DataFrame, list)):
         raise PySparkAssertionError(
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "actual",
                 "actual_type": type(actual),
@@ -840,8 +890,8 @@ def assertDataFrameEqual(
         )
     elif not isinstance(expected, (DataFrame, list)):
         raise PySparkAssertionError(
-            error_class="INVALID_TYPE_DF_EQUALITY_ARG",
-            message_parameters={
+            errorClass="INVALID_TYPE_DF_EQUALITY_ARG",
+            messageParameters={
                 "expected_type": "Union[DataFrame, ps.DataFrame, List[Row]]",
                 "arg_name": "expected",
                 "actual_type": type(expected),
@@ -899,6 +949,8 @@ def compare_vals(val1, val2):
             elif isinstance(val1, Decimal) and isinstance(val2, Decimal):
                 if abs(val1 - val2) > (Decimal(atol) + Decimal(rtol) * abs(val2)):
                     return False
+            elif isinstance(val1, VariantVal) and isinstance(val2, VariantVal):
+                return compare_vals(val1.toPython(), val2.toPython())
             else:
                 if val1 != val2:
                     return False
@@ -948,7 +1000,7 @@ def assert_rows_equal(
             error_msg += "\n" + "\n".join(generated_diff)
             data = diff_rows if includeDiffRows else None
             raise PySparkAssertionError(
-                error_class="DIFFERENT_ROWS", message_parameters={"error_msg": error_msg}, data=data
+                errorClass="DIFFERENT_ROWS", messageParameters={"error_msg": error_msg}, data=data
             )
 
     # only compare schema if expected is not a List
@@ -962,15 +1014,15 @@ def assert_rows_equal(
             error_msg = "\n".join(generated_diff)
 
             raise PySparkAssertionError(
-                error_class="DIFFERENT_SCHEMA",
-                message_parameters={"error_msg": error_msg},
+                errorClass="DIFFERENT_SCHEMA",
+                messageParameters={"error_msg": error_msg},
             )
 
     if not isinstance(actual, list):
         if actual.isStreaming:
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
             )
         actual_list = actual.collect()
     else:
@@ -979,8 +1031,8 @@ def assert_rows_equal(
     if not isinstance(expected, list):
         if expected.isStreaming:
             raise PySparkAssertionError(
-                error_class="UNSUPPORTED_OPERATION",
-                message_parameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
+                errorClass="UNSUPPORTED_OPERATION",
+                messageParameters={"operation": "assertDataFrameEqual on streaming DataFrame"},
             )
         expected_list = expected.collect()
     else:
diff --git a/python/pyspark/tests/test_install_spark.py b/python/pyspark/tests/test_install_spark.py
index dee28af9a407d..d46d55e022842 100644
--- a/python/pyspark/tests/test_install_spark.py
+++ b/python/pyspark/tests/test_install_spark.py
@@ -32,7 +32,7 @@ class SparkInstallationTestCase(unittest.TestCase):
     def test_install_spark(self):
         # Test only one case. Testing this is expensive because it needs to download
         # the Spark distribution.
-        spark_version, hadoop_version, hive_version = checked_versions("3.0.1", "3", "2.3")
+        spark_version, hadoop_version, hive_version = checked_versions("3.4.4", "3", "2.3")
 
         with tempfile.TemporaryDirectory(prefix="test_install_spark") as tmp_dir:
             install_spark(
diff --git a/python/pyspark/tests/test_profiler.py b/python/pyspark/tests/test_profiler.py
index 09470bd47f7e5..5510e1d50d32c 100644
--- a/python/pyspark/tests/test_profiler.py
+++ b/python/pyspark/tests/test_profiler.py
@@ -96,16 +96,16 @@ def test_profiler_disabled(self):
                 sc.show_profiles()
             self.check_error(
                 exception=pe.exception,
-                error_class="INCORRECT_CONF_FOR_PROFILE",
-                message_parameters={},
+                errorClass="INCORRECT_CONF_FOR_PROFILE",
+                messageParameters={},
             )
 
             with self.assertRaises(PySparkRuntimeError) as pe:
                 sc.dump_profiles("/tmp/abc")
             self.check_error(
                 exception=pe.exception,
-                error_class="INCORRECT_CONF_FOR_PROFILE",
-                message_parameters={},
+                errorClass="INCORRECT_CONF_FOR_PROFILE",
+                messageParameters={},
             )
         finally:
             sc.stop()
@@ -128,8 +128,8 @@ def plus_one(v):
 
             self.check_error(
                 exception=pe.exception,
-                error_class="CANNOT_SET_TOGETHER",
-                message_parameters={
+                errorClass="CANNOT_SET_TOGETHER",
+                messageParameters={
                     "arg_list": "'spark.python.profile' and "
                     "'spark.python.profile.memory' configuration"
                 },
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index 49766913e6ee2..3e9a68ccfe2e5 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -27,6 +27,7 @@
 import traceback
 import typing
 import socket
+import warnings
 from types import TracebackType
 from typing import Any, Callable, IO, Iterator, List, Optional, TextIO, Tuple, Union
 
@@ -60,6 +61,8 @@
         PandasGroupedMapUDFWithStateType,
         ArrowGroupedMapUDFType,
         ArrowCogroupedMapUDFType,
+        PandasGroupedMapUDFTransformWithStateType,
+        PandasGroupedMapUDFTransformWithStateInitStateType,
     )
     from pyspark.sql._typing import (
         SQLArrowBatchedUDFType,
@@ -117,22 +120,6 @@ def majorMinorVersion(sparkVersion: str) -> Tuple[int, int]:
             )
 
 
-class LogUtils:
-    """
-    Utils for querying structured Spark logs with Spark SQL.
-    """
-
-    LOG_SCHEMA = (
-        "ts TIMESTAMP, "
-        "level STRING, "
-        "msg STRING, "
-        "context map<STRING, STRING>, "
-        "exception STRUCT<class STRING, msg STRING, "
-        "stacktrace ARRAY<STRUCT<class STRING, method STRING, file STRING,line STRING>>>,"
-        "logger STRING"
-    )
-
-
 def fail_on_stopiteration(f: Callable) -> Callable:
     """
     Wraps the input function to fail on 'StopIteration' by raising a 'RuntimeError'
@@ -144,8 +131,8 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
             return f(*args, **kwargs)
         except StopIteration as exc:
             raise PySparkRuntimeError(
-                error_class="STOP_ITERATION_OCCURRED",
-                message_parameters={
+                errorClass="STOP_ITERATION_OCCURRED",
+                messageParameters={
                     "exc": str(exc),
                 },
             )
@@ -261,10 +248,6 @@ def try_simplify_traceback(tb: TracebackType) -> Optional[TracebackType]:
     if "pypy" in platform.python_implementation().lower():
         # Traceback modification is not supported with PyPy in PySpark.
         return None
-    if sys.version_info[:2] < (3, 7):
-        # Traceback creation is not supported Python < 3.7.
-        # See https://bugs.python.org/issue30579.
-        return None
 
     import pyspark
 
@@ -384,7 +367,8 @@ def inheritable_thread_target(f: Optional[Union[Callable, "SparkSession"]] = Non
 
     >>> Thread(target=inheritable_thread_target(target_func)).start()  # doctest: +SKIP
 
-    If you're using Spark Connect, you should explicitly provide Spark session as follows:
+    If you're using Spark Connect or if you want to inherit the tags properly,
+    you should explicitly provide Spark session as follows:
 
     >>> @inheritable_thread_target(session)  # doctest: +SKIP
     ... def target_func():
@@ -400,32 +384,65 @@ def inheritable_thread_target(f: Optional[Union[Callable, "SparkSession"]] = Non
         assert session is not None, "Spark Connect session must be provided."
 
         def outer(ff: Callable) -> Callable:
+            thread_local = session.client.thread_local  # type: ignore[union-attr, operator]
             session_client_thread_local_attrs = [
                 (attr, copy.deepcopy(value))
                 for (
                     attr,
                     value,
-                ) in session.client.thread_local.__dict__.items()  # type: ignore[union-attr]
+                ) in thread_local.__dict__.items()
             ]
 
             @functools.wraps(ff)
             def inner(*args: Any, **kwargs: Any) -> Any:
                 # Set thread locals in child thread.
                 for attr, value in session_client_thread_local_attrs:
-                    setattr(session.client.thread_local, attr, value)  # type: ignore[union-attr]
+                    setattr(
+                        session.client.thread_local,  # type: ignore[union-attr, operator]
+                        attr,
+                        value,
+                    )
                 return ff(*args, **kwargs)
 
             return inner
 
         return outer
 
-    # Non Spark Connect
+    # Non Spark Connect with SparkSession or Callable
+    from pyspark.sql import SparkSession
     from pyspark import SparkContext
     from py4j.clientserver import ClientServer
 
     if isinstance(SparkContext._gateway, ClientServer):
         # Here's when the pinned-thread mode (PYSPARK_PIN_THREAD) is on.
 
+        if isinstance(f, SparkSession):
+            session = f
+            assert session is not None
+            tags = set(session.getTags())
+            # Local properties are copied when wrapping the function.
+            assert SparkContext._active_spark_context is not None
+            properties = SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
+
+            def outer(ff: Callable) -> Callable:
+                @functools.wraps(ff)
+                def wrapped(*args: Any, **kwargs: Any) -> Any:
+                    # Apply properties and tags in the child thread.
+                    assert SparkContext._active_spark_context is not None
+                    SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
+                    for tag in tags:
+                        session.addTag(tag)  # type: ignore[union-attr]
+                    return ff(*args, **kwargs)
+
+                return wrapped
+
+            return outer
+
+        warnings.warn(
+            "Spark session is not provided. Tags will not be inherited.",
+            UserWarning,
+        )
+
         # NOTICE the internal difference vs `InheritableThread`. `InheritableThread`
         # copies local properties when the thread starts but `inheritable_thread_target`
         # copies when the function is wrapped.
@@ -507,7 +524,8 @@ def __init__(
             def copy_local_properties(*a: Any, **k: Any) -> Any:
                 # Set tags in child thread.
                 assert hasattr(self, "_tags")
-                session.client.thread_local.tags = self._tags  # type: ignore[union-attr, has-type]
+                thread_local = session.client.thread_local  # type: ignore[union-attr, operator]
+                thread_local.tags = self._tags  # type: ignore[has-type]
                 return target(*a, **k)
 
             super(InheritableThread, self).__init__(
@@ -518,11 +536,15 @@ def copy_local_properties(*a: Any, **k: Any) -> Any:
             from pyspark import SparkContext
             from py4j.clientserver import ClientServer
 
+            self._session = session  # type: ignore[assignment]
             if isinstance(SparkContext._gateway, ClientServer):
                 # Here's when the pinned-thread mode (PYSPARK_PIN_THREAD) is on.
                 def copy_local_properties(*a: Any, **k: Any) -> Any:
                     # self._props is set before starting the thread to match the behavior with JVM.
                     assert hasattr(self, "_props")
+                    if hasattr(self, "_tags"):
+                        for tag in self._tags:  # type: ignore[has-type]
+                            self._session.addTag(tag)
                     assert SparkContext._active_spark_context is not None
                     SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props)
                     return target(*a, **k)
@@ -541,9 +563,10 @@ def start(self) -> None:
         if is_remote():
             # Spark Connect
             assert hasattr(self, "_session")
-            if not hasattr(self._session.client.thread_local, "tags"):
-                self._session.client.thread_local.tags = set()
-            self._tags = set(self._session.client.thread_local.tags)
+            thread_local = self._session.client.thread_local  # type: ignore[union-attr, operator]
+            if not hasattr(thread_local, "tags"):
+                thread_local.tags = set()
+            self._tags = set(thread_local.tags)
         else:
             # Non Spark Connect
             from pyspark import SparkContext
@@ -557,6 +580,9 @@ def start(self) -> None:
                 self._props = (
                     SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
                 )
+                if self._session is not None:
+                    self._tags = self._session.getTags()
+
         return super(InheritableThread, self).start()
 
 
@@ -585,7 +611,10 @@ class PythonEvalType:
     SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE: "PandasGroupedMapUDFWithStateType" = 208
     SQL_GROUPED_MAP_ARROW_UDF: "ArrowGroupedMapUDFType" = 209
     SQL_COGROUPED_MAP_ARROW_UDF: "ArrowCogroupedMapUDFType" = 210
-
+    SQL_TRANSFORM_WITH_STATE_PANDAS_UDF: "PandasGroupedMapUDFTransformWithStateType" = 211
+    SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF: "PandasGroupedMapUDFTransformWithStateInitStateType" = (  # noqa: E501
+        212
+    )
     SQL_TABLE_UDF: "SQLTableUDFType" = 300
     SQL_ARROW_TABLE_UDF: "SQLArrowTableUDFType" = 301
 
@@ -721,8 +750,8 @@ def local_connect_and_auth(port: Optional[Union[str, int]], auth_secret: str) ->
                 sock.close()
                 sock = None
     raise PySparkRuntimeError(
-        error_class="CANNOT_OPEN_SOCKET",
-        message_parameters={
+        errorClass="CANNOT_OPEN_SOCKET",
+        messageParameters={
             "errors": str(errors),
         },
     )
@@ -739,8 +768,8 @@ def _do_server_auth(conn: "io.IOBase", auth_secret: str) -> None:
     if reply != "ok":
         conn.close()
         raise PySparkRuntimeError(
-            error_class="UNEXPECTED_RESPONSE_FROM_SERVER",
-            message_parameters={},
+            errorClass="UNEXPECTED_RESPONSE_FROM_SERVER",
+            messageParameters={},
         )
 
 
@@ -789,7 +818,7 @@ def is_remote_only() -> bool:
 
 
 if __name__ == "__main__":
-    if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 7):
+    if "pypy" not in platform.python_implementation().lower() and sys.version_info[:2] >= (3, 9):
         import doctest
         import pyspark.util
         from pyspark.core.context import SparkContext
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 62524889b66a2..a11465e7a3239 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -23,6 +23,7 @@
 import dataclasses
 import time
 import inspect
+import itertools
 import json
 from typing import Any, Callable, Iterable, Iterator, Optional
 import faulthandler
@@ -32,6 +33,8 @@
     _accumulatorRegistry,
     _deserialize_accumulator,
 )
+from pyspark.sql.streaming.stateful_processor_api_client import StatefulProcessorApiClient
+from pyspark.sql.streaming.stateful_processor_util import TransformWithStateInPandasFuncMode
 from pyspark.taskcontext import BarrierTaskContext, TaskContext
 from pyspark.resource import ResourceInformation
 from pyspark.util import PythonEvalType, local_connect_and_auth
@@ -55,6 +58,8 @@
     ArrowStreamUDFSerializer,
     ArrowStreamGroupUDFSerializer,
     ApplyInPandasWithStateSerializer,
+    TransformWithStateInPandasSerializer,
+    TransformWithStateInPandasInitStateSerializer,
 )
 from pyspark.sql.pandas.types import to_arrow_type
 from pyspark.sql.types import (
@@ -121,8 +126,8 @@ def verify_result_type(result):
         if not hasattr(result, "__len__"):
             pd_type = "pandas.DataFrame" if type(return_type) == StructType else "pandas.Series"
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": pd_type,
                     "actual": type(result).__name__,
                 },
@@ -132,8 +137,8 @@ def verify_result_type(result):
     def verify_result_length(result, length):
         if len(result) != length:
             raise PySparkRuntimeError(
-                error_class="SCHEMA_MISMATCH_FOR_PANDAS_UDF",
-                message_parameters={
+                errorClass="SCHEMA_MISMATCH_FOR_PANDAS_UDF",
+                messageParameters={
                     "expected": str(length),
                     "actual": str(len(result)),
                 },
@@ -149,7 +154,7 @@ def verify_result_length(result, length):
     )
 
 
-def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type):
+def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type, runner_conf):
     import pandas as pd
 
     func, args_kwargs_offsets = wrap_kwargs_support(f, args_offsets, kwargs_offsets)
@@ -167,15 +172,27 @@ def wrap_arrow_batch_udf(f, args_offsets, kwargs_offsets, return_type):
     elif type(return_type) == BinaryType:
         result_func = lambda r: bytes(r) if r is not None else r  # noqa: E731
 
-    @fail_on_stopiteration
-    def evaluate(*args: pd.Series) -> pd.Series:
-        return pd.Series([result_func(func(*row)) for row in zip(*args)])
+    if "spark.sql.execution.pythonUDF.arrow.concurrency.level" in runner_conf:
+        from concurrent.futures import ThreadPoolExecutor
+
+        c = int(runner_conf["spark.sql.execution.pythonUDF.arrow.concurrency.level"])
+
+        @fail_on_stopiteration
+        def evaluate(*args: pd.Series) -> pd.Series:
+            with ThreadPoolExecutor(max_workers=c) as pool:
+                return pd.Series(list(pool.map(lambda row: result_func(func(*row)), zip(*args))))
+
+    else:
+
+        @fail_on_stopiteration
+        def evaluate(*args: pd.Series) -> pd.Series:
+            return pd.Series([result_func(func(*row)) for row in zip(*args)])
 
     def verify_result_length(result, length):
         if len(result) != length:
             raise PySparkRuntimeError(
-                error_class="SCHEMA_MISMATCH_FOR_PANDAS_UDF",
-                message_parameters={
+                errorClass="SCHEMA_MISMATCH_FOR_PANDAS_UDF",
+                messageParameters={
                     "expected": str(length),
                     "actual": str(len(result)),
                 },
@@ -195,8 +212,8 @@ def wrap_pandas_batch_iter_udf(f, return_type):
     def verify_result(result):
         if not isinstance(result, Iterator) and not hasattr(result, "__iter__"):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "iterator of {}".format(iter_type_label),
                     "actual": type(result).__name__,
                 },
@@ -208,8 +225,8 @@ def verify_element(elem):
 
         if not isinstance(elem, pd.DataFrame if type(return_type) == StructType else pd.Series):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "iterator of {}".format(iter_type_label),
                     "actual": "iterator of {}".format(type(elem).__name__),
                 },
@@ -232,8 +249,8 @@ def verify_pandas_result(result, return_type, assign_cols_by_name, truncate_retu
     if type(return_type) == StructType:
         if not isinstance(result, pd.DataFrame):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "pandas.DataFrame",
                     "actual": type(result).__name__,
                 },
@@ -263,8 +280,8 @@ def verify_pandas_result(result, return_type, assign_cols_by_name, truncate_retu
                 extra = f" Unexpected: {', '.join(extra)}." if extra else ""
 
                 raise PySparkRuntimeError(
-                    error_class="RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="RESULT_COLUMNS_MISMATCH_FOR_PANDAS_UDF",
+                    messageParameters={
                         "missing": missing,
                         "extra": extra,
                     },
@@ -272,8 +289,8 @@ def verify_pandas_result(result, return_type, assign_cols_by_name, truncate_retu
             # otherwise the number of columns of result have to match the return type
             elif len(result_columns) != len(return_type):
                 raise PySparkRuntimeError(
-                    error_class="RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF",
+                    messageParameters={
                         "expected": str(len(return_type)),
                         "actual": str(len(result.columns)),
                     },
@@ -281,8 +298,8 @@ def verify_pandas_result(result, return_type, assign_cols_by_name, truncate_retu
     else:
         if not isinstance(result, pd.Series):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={"expected": "pandas.Series", "actual": type(result).__name__},
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={"expected": "pandas.Series", "actual": type(result).__name__},
             )
 
 
@@ -292,8 +309,8 @@ def wrap_arrow_batch_iter_udf(f, return_type):
     def verify_result(result):
         if not isinstance(result, Iterator) and not hasattr(result, "__iter__"):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "iterator of pyarrow.RecordBatch",
                     "actual": type(result).__name__,
                 },
@@ -305,8 +322,8 @@ def verify_element(elem):
 
         if not isinstance(elem, pa.RecordBatch):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "iterator of pyarrow.RecordBatch",
                     "actual": "iterator of {}".format(type(elem).__name__),
                 },
@@ -375,8 +392,8 @@ def verify_arrow_result(table, assign_cols_by_name, expected_cols_and_types):
 
     if not isinstance(table, pa.Table):
         raise PySparkTypeError(
-            error_class="UDF_RETURN_TYPE",
-            message_parameters={
+            errorClass="UDF_RETURN_TYPE",
+            messageParameters={
                 "expected": "pyarrow.Table",
                 "actual": type(table).__name__,
             },
@@ -402,8 +419,8 @@ def verify_arrow_result(table, assign_cols_by_name, expected_cols_and_types):
                 extra = f" Unexpected: {', '.join(extra)}." if extra else ""
 
                 raise PySparkRuntimeError(
-                    error_class="RESULT_COLUMNS_MISMATCH_FOR_ARROW_UDF",
-                    message_parameters={
+                    errorClass="RESULT_COLUMNS_MISMATCH_FOR_ARROW_UDF",
+                    messageParameters={
                         "missing": missing,
                         "extra": extra,
                     },
@@ -432,8 +449,8 @@ def verify_arrow_result(table, assign_cols_by_name, expected_cols_and_types):
 
         if type_mismatch:
             raise PySparkRuntimeError(
-                error_class="RESULT_TYPE_MISMATCH_FOR_ARROW_UDF",
-                message_parameters={
+                errorClass="RESULT_TYPE_MISMATCH_FOR_ARROW_UDF",
+                messageParameters={
                     "mismatch": ", ".join(
                         "column '{}' (expected {}, actual {})".format(name, expected, actual)
                         for name, expected, actual in type_mismatch
@@ -488,6 +505,39 @@ def wrapped(key_series, value_series):
     return lambda k, v: [(wrapped(k, v), to_arrow_type(return_type))]
 
 
+def wrap_grouped_transform_with_state_pandas_udf(f, return_type, runner_conf):
+    def wrapped(stateful_processor_api_client, mode, key, value_series_gen):
+        import pandas as pd
+
+        values = (pd.concat(x, axis=1) for x in value_series_gen)
+        result_iter = f(stateful_processor_api_client, mode, key, values)
+
+        # TODO(SPARK-49100): add verification that elements in result_iter are
+        # indeed of type pd.DataFrame and confirm to assigned cols
+
+        return result_iter
+
+    return lambda p, m, k, v: [(wrapped(p, m, k, v), to_arrow_type(return_type))]
+
+
+def wrap_grouped_transform_with_state_pandas_init_state_udf(f, return_type, runner_conf):
+    def wrapped(stateful_processor_api_client, mode, key, value_series_gen):
+        import pandas as pd
+
+        state_values_gen, init_states_gen = itertools.tee(value_series_gen, 2)
+        state_values = (df for x, _ in state_values_gen if not (df := pd.concat(x, axis=1)).empty)
+        init_states = (df for _, x in init_states_gen if not (df := pd.concat(x, axis=1)).empty)
+
+        result_iter = f(stateful_processor_api_client, mode, key, state_values, init_states)
+
+        # TODO(SPARK-49100): add verification that elements in result_iter are
+        # indeed of type pd.DataFrame and confirm to assigned cols
+
+        return result_iter
+
+    return lambda p, m, k, v: [(wrapped(p, m, k, v), to_arrow_type(return_type))]
+
+
 def wrap_grouped_map_pandas_udf_with_state(f, return_type):
     """
     Provides a new lambda instance wrapping user function of applyInPandasWithState.
@@ -531,8 +581,8 @@ def wrapped(key_series, value_series_gen, state):
         def verify_element(result):
             if not isinstance(result, pd.DataFrame):
                 raise PySparkTypeError(
-                    error_class="UDF_RETURN_TYPE",
-                    message_parameters={
+                    errorClass="UDF_RETURN_TYPE",
+                    messageParameters={
                         "expected": "iterator of pandas.DataFrame",
                         "actual": "iterator of {}".format(type(result).__name__),
                     },
@@ -544,8 +594,8 @@ def verify_element(result):
                 or (len(result.columns) == 0 and result.empty)
             ):
                 raise PySparkRuntimeError(
-                    error_class="RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF",
-                    message_parameters={
+                    errorClass="RESULT_LENGTH_MISMATCH_FOR_PANDAS_UDF",
+                    messageParameters={
                         "expected": str(len(return_type)),
                         "actual": str(len(result.columns)),
                     },
@@ -555,8 +605,8 @@ def verify_element(result):
 
         if isinstance(result_iter, pd.DataFrame):
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={
                     "expected": "iterable of pandas.DataFrame",
                     "actual": type(result_iter).__name__,
                 },
@@ -566,8 +616,8 @@ def verify_element(result):
             iter(result_iter)
         except TypeError:
             raise PySparkTypeError(
-                error_class="UDF_RETURN_TYPE",
-                message_parameters={"expected": "iterable", "actual": type(result_iter).__name__},
+                errorClass="UDF_RETURN_TYPE",
+                messageParameters={"expected": "iterable", "actual": type(result_iter).__name__},
             )
 
         result_iter_with_validation = (verify_element(x) for x in result_iter)
@@ -608,8 +658,8 @@ def wrap_window_agg_pandas_udf(
         return wrap_unbounded_window_agg_pandas_udf(f, args_offsets, kwargs_offsets, return_type)
     else:
         raise PySparkRuntimeError(
-            error_class="INVALID_WINDOW_BOUND_TYPE",
-            message_parameters={
+            errorClass="INVALID_WINDOW_BOUND_TYPE",
+            messageParameters={
                 "window_bound_type": window_bound_type,
             },
         )
@@ -817,7 +867,7 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index, profil
     if eval_type == PythonEvalType.SQL_SCALAR_PANDAS_UDF:
         return wrap_scalar_pandas_udf(func, args_offsets, kwargs_offsets, return_type)
     elif eval_type == PythonEvalType.SQL_ARROW_BATCHED_UDF:
-        return wrap_arrow_batch_udf(func, args_offsets, kwargs_offsets, return_type)
+        return wrap_arrow_batch_udf(func, args_offsets, kwargs_offsets, return_type, runner_conf)
     elif eval_type == PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF:
         return args_offsets, wrap_pandas_batch_iter_udf(func, return_type)
     elif eval_type == PythonEvalType.SQL_MAP_PANDAS_ITER_UDF:
@@ -832,6 +882,14 @@ def read_single_udf(pickleSer, infile, eval_type, runner_conf, udf_index, profil
         return args_offsets, wrap_grouped_map_arrow_udf(func, return_type, argspec, runner_conf)
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
         return args_offsets, wrap_grouped_map_pandas_udf_with_state(func, return_type)
+    elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF:
+        return args_offsets, wrap_grouped_transform_with_state_pandas_udf(
+            func, return_type, runner_conf
+        )
+    elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
+        return args_offsets, wrap_grouped_transform_with_state_pandas_init_state_udf(
+            func, return_type, runner_conf
+        )
     elif eval_type == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF:
         argspec = inspect.getfullargspec(chained_func)  # signature was lost when wrapping it
         return args_offsets, wrap_cogrouped_map_pandas_udf(func, return_type, argspec, runner_conf)
@@ -931,8 +989,8 @@ def read_udtf(pickleSer, infile, eval_type):
     if has_pickled_analyze_result:
         if len(udtf_init_args.args) > 2:
             raise PySparkRuntimeError(
-                error_class="UDTF_CONSTRUCTOR_INVALID_IMPLEMENTS_ANALYZE_METHOD",
-                message_parameters={"name": udtf_name},
+                errorClass="UDTF_CONSTRUCTOR_INVALID_IMPLEMENTS_ANALYZE_METHOD",
+                messageParameters={"name": udtf_name},
             )
         elif len(udtf_init_args.args) == 2:
             prev_handler = handler
@@ -944,8 +1002,8 @@ def construct_udtf():
             handler = construct_udtf
     elif len(udtf_init_args.args) > 1:
         raise PySparkRuntimeError(
-            error_class="UDTF_CONSTRUCTOR_INVALID_NO_ANALYZE_METHOD",
-            message_parameters={"name": udtf_name},
+            errorClass="UDTF_CONSTRUCTOR_INVALID_NO_ANALYZE_METHOD",
+            messageParameters={"name": udtf_name},
         )
 
     class UDTFWithPartitions:
@@ -1062,8 +1120,8 @@ def _remove_partition_by_exprs(self, arg: Any) -> Any:
             udtf = handler()
     except Exception as e:
         raise PySparkRuntimeError(
-            error_class="UDTF_EXEC_ERROR",
-            message_parameters={"method_name": "__init__", "error": str(e)},
+            errorClass="UDTF_EXEC_ERROR",
+            messageParameters={"method_name": "__init__", "error": str(e)},
         )
 
     # Validate the UDTF
@@ -1080,15 +1138,15 @@ def _remove_partition_by_exprs(self, arg: Any) -> Any:
         inspect.signature(udtf.eval).bind(*args_offsets, **kwargs_offsets)
     except TypeError as e:
         raise PySparkRuntimeError(
-            error_class="UDTF_EVAL_METHOD_ARGUMENTS_DO_NOT_MATCH_SIGNATURE",
-            message_parameters={"name": udtf_name, "reason": str(e)},
+            errorClass="UDTF_EVAL_METHOD_ARGUMENTS_DO_NOT_MATCH_SIGNATURE",
+            messageParameters={"name": udtf_name, "reason": str(e)},
         ) from None
 
     def build_null_checker(return_type: StructType) -> Optional[Callable[[Any], None]]:
         def raise_(result_column_index):
             raise PySparkRuntimeError(
-                error_class="UDTF_EXEC_ERROR",
-                message_parameters={
+                errorClass="UDTF_EXEC_ERROR",
+                messageParameters={
                     "method_name": "eval' or 'terminate",
                     "error": f"Column {result_column_index} within a returned row had a "
                     + "value of None, either directly or within array/struct/map "
@@ -1202,8 +1260,8 @@ def wrap_arrow_udtf(f, return_type):
             def verify_result(result):
                 if not isinstance(result, pd.DataFrame):
                     raise PySparkTypeError(
-                        error_class="INVALID_ARROW_UDTF_RETURN_TYPE",
-                        message_parameters={
+                        errorClass="INVALID_ARROW_UDTF_RETURN_TYPE",
+                        messageParameters={
                             "return_type": type(result).__name__,
                             "value": str(result),
                             "func": f.__name__,
@@ -1217,8 +1275,8 @@ def verify_result(result):
                 if len(result) > 0 or len(result.columns) > 0:
                     if len(result.columns) != return_type_size:
                         raise PySparkRuntimeError(
-                            error_class="UDTF_RETURN_SCHEMA_MISMATCH",
-                            message_parameters={
+                            errorClass="UDTF_RETURN_SCHEMA_MISMATCH",
+                            messageParameters={
                                 "expected": str(return_type_size),
                                 "actual": str(len(result.columns)),
                                 "func": f.__name__,
@@ -1239,8 +1297,8 @@ def func(*a: Any) -> Any:
                     raise
                 except Exception as e:
                     raise PySparkRuntimeError(
-                        error_class="UDTF_EXEC_ERROR",
-                        message_parameters={"method_name": f.__name__, "error": str(e)},
+                        errorClass="UDTF_EXEC_ERROR",
+                        messageParameters={"method_name": f.__name__, "error": str(e)},
                     )
 
             def check_return_value(res):
@@ -1249,8 +1307,8 @@ def check_return_value(res):
                 if res is not None:
                     if not isinstance(res, Iterable):
                         raise PySparkRuntimeError(
-                            error_class="UDTF_RETURN_NOT_ITERABLE",
-                            message_parameters={
+                            errorClass="UDTF_RETURN_NOT_ITERABLE",
+                            messageParameters={
                                 "type": type(res).__name__,
                                 "func": f.__name__,
                             },
@@ -1319,8 +1377,8 @@ def verify_and_convert_result(result):
                 if result is not None:
                     if hasattr(result, "__len__") and len(result) != return_type_size:
                         raise PySparkRuntimeError(
-                            error_class="UDTF_RETURN_SCHEMA_MISMATCH",
-                            message_parameters={
+                            errorClass="UDTF_RETURN_SCHEMA_MISMATCH",
+                            messageParameters={
                                 "expected": str(return_type_size),
                                 "actual": str(len(result)),
                                 "func": f.__name__,
@@ -1329,8 +1387,8 @@ def verify_and_convert_result(result):
 
                     if not (isinstance(result, (list, dict, tuple)) or hasattr(result, "__dict__")):
                         raise PySparkRuntimeError(
-                            error_class="UDTF_INVALID_OUTPUT_ROW_TYPE",
-                            message_parameters={
+                            errorClass="UDTF_INVALID_OUTPUT_ROW_TYPE",
+                            messageParameters={
                                 "type": type(result).__name__,
                                 "func": f.__name__,
                             },
@@ -1347,8 +1405,8 @@ def evaluate(*a) -> tuple:
                     raise
                 except Exception as e:
                     raise PySparkRuntimeError(
-                        error_class="UDTF_EXEC_ERROR",
-                        message_parameters={"method_name": f.__name__, "error": str(e)},
+                        errorClass="UDTF_EXEC_ERROR",
+                        messageParameters={"method_name": f.__name__, "error": str(e)},
                     )
 
                 if res is None:
@@ -1359,8 +1417,8 @@ def evaluate(*a) -> tuple:
 
                 if not isinstance(res, Iterable):
                     raise PySparkRuntimeError(
-                        error_class="UDTF_RETURN_NOT_ITERABLE",
-                        message_parameters={
+                        errorClass="UDTF_RETURN_NOT_ITERABLE",
+                        messageParameters={
                             "type": type(res).__name__,
                             "func": f.__name__,
                         },
@@ -1404,6 +1462,8 @@ def mapper(_, it):
 def read_udfs(pickleSer, infile, eval_type):
     runner_conf = {}
 
+    state_server_port = None
+    key_schema = None
     if eval_type in (
         PythonEvalType.SQL_ARROW_BATCHED_UDF,
         PythonEvalType.SQL_SCALAR_PANDAS_UDF,
@@ -1417,6 +1477,8 @@ def read_udfs(pickleSer, infile, eval_type):
         PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE,
         PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
         PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF,
     ):
         # Load conf used for pandas_udf evaluation
         num_conf = read_int(infile)
@@ -1428,6 +1490,12 @@ def read_udfs(pickleSer, infile, eval_type):
         state_object_schema = None
         if eval_type == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF_WITH_STATE:
             state_object_schema = StructType.fromJson(json.loads(utf8_deserializer.loads(infile)))
+        elif (
+            eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF
+            or eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF
+        ):
+            state_server_port = read_int(infile)
+            key_schema = StructType.fromJson(json.loads(utf8_deserializer.loads(infile)))
 
         # NOTE: if timezone is set here, that implies respectSessionTimeZone is True
         timezone = runner_conf.get("spark.sql.session.timeZone", None)
@@ -1454,6 +1522,24 @@ def read_udfs(pickleSer, infile, eval_type):
                 state_object_schema,
                 arrow_max_records_per_batch,
             )
+        elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF:
+            arrow_max_records_per_batch = runner_conf.get(
+                "spark.sql.execution.arrow.maxRecordsPerBatch", 10000
+            )
+            arrow_max_records_per_batch = int(arrow_max_records_per_batch)
+
+            ser = TransformWithStateInPandasSerializer(
+                timezone, safecheck, _assign_cols_by_name, arrow_max_records_per_batch
+            )
+        elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
+            arrow_max_records_per_batch = runner_conf.get(
+                "spark.sql.execution.arrow.maxRecordsPerBatch", 10000
+            )
+            arrow_max_records_per_batch = int(arrow_max_records_per_batch)
+
+            ser = TransformWithStateInPandasInitStateSerializer(
+                timezone, safecheck, _assign_cols_by_name, arrow_max_records_per_batch
+            )
         elif eval_type == PythonEvalType.SQL_MAP_ARROW_ITER_UDF:
             ser = ArrowStreamUDFSerializer()
         elif eval_type == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF:
@@ -1528,14 +1614,15 @@ def map_batch(batch):
             num_output_rows = 0
             for result_batch, result_type in result_iter:
                 num_output_rows += len(result_batch)
-                # This assert is for Scalar Iterator UDF to fail fast.
+                # This check is for Scalar Iterator UDF to fail fast.
                 # The length of the entire input can only be explicitly known
                 # by consuming the input iterator in user side. Therefore,
                 # it's very unlikely the output length is higher than
                 # input length.
-                assert (
-                    is_map_pandas_iter or is_map_arrow_iter or num_output_rows <= num_input_rows
-                ), "Pandas SCALAR_ITER UDF outputted more rows than input rows."
+                if is_scalar_iter and num_output_rows > num_input_rows:
+                    raise PySparkRuntimeError(
+                        errorClass="PANDAS_UDF_OUTPUT_EXCEEDS_INPUT_ROWS", messageParameters={}
+                    )
                 yield (result_batch, result_type)
 
             if is_scalar_iter:
@@ -1545,14 +1632,14 @@ def map_batch(batch):
                     pass
                 else:
                     raise PySparkRuntimeError(
-                        error_class="STOP_ITERATION_OCCURRED_FROM_SCALAR_ITER_PANDAS_UDF",
-                        message_parameters={},
+                        errorClass="STOP_ITERATION_OCCURRED_FROM_SCALAR_ITER_PANDAS_UDF",
+                        messageParameters={},
                     )
 
                 if num_output_rows != num_input_rows:
                     raise PySparkRuntimeError(
-                        error_class="RESULT_LENGTH_MISMATCH_FOR_SCALAR_ITER_PANDAS_UDF",
-                        message_parameters={
+                        errorClass="RESULT_LENGTH_MISMATCH_FOR_SCALAR_ITER_PANDAS_UDF",
+                        messageParameters={
                             "output_length": str(num_output_rows),
                             "input_length": str(num_input_rows),
                         },
@@ -1609,6 +1696,75 @@ def mapper(a):
             vals = [a[o] for o in parsed_offsets[0][1]]
             return f(keys, vals)
 
+    elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF:
+        # We assume there is only one UDF here because grouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+
+        # See TransformWithStateInPandasExec for how arg_offsets are used to
+        # distinguish between grouping attributes and data attributes
+        arg_offsets, f = read_single_udf(
+            pickleSer, infile, eval_type, runner_conf, udf_index=0, profiler=profiler
+        )
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+        ser.key_offsets = parsed_offsets[0][0]
+        stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
+
+        def mapper(a):
+            mode = a[0]
+
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_DATA:
+                key = a[1]
+
+                def values_gen():
+                    for x in a[2]:
+                        retVal = [x[1][o] for o in parsed_offsets[0][1]]
+                        yield retVal
+
+                # This must be generator comprehension - do not materialize.
+                return f(stateful_processor_api_client, mode, key, values_gen())
+            else:
+                # mode == PROCESS_TIMER or mode == COMPLETE
+                return f(stateful_processor_api_client, mode, None, iter([]))
+
+    elif eval_type == PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF:
+        # We assume there is only one UDF here because grouped map doesn't
+        # support combining multiple UDFs.
+        assert num_udfs == 1
+
+        # See TransformWithStateInPandasExec for how arg_offsets are used to
+        # distinguish between grouping attributes and data attributes
+        arg_offsets, f = read_single_udf(
+            pickleSer, infile, eval_type, runner_conf, udf_index=0, profiler=profiler
+        )
+        # parsed offsets:
+        # [
+        #     [groupingKeyOffsets, dedupDataOffsets],
+        #     [initStateGroupingOffsets, dedupInitDataOffsets]
+        # ]
+        parsed_offsets = extract_key_value_indexes(arg_offsets)
+        ser.key_offsets = parsed_offsets[0][0]
+        ser.init_key_offsets = parsed_offsets[1][0]
+        stateful_processor_api_client = StatefulProcessorApiClient(state_server_port, key_schema)
+
+        def mapper(a):
+            mode = a[0]
+
+            if mode == TransformWithStateInPandasFuncMode.PROCESS_DATA:
+                key = a[1]
+
+                def values_gen():
+                    for x in a[2]:
+                        retVal = [x[1][o] for o in parsed_offsets[0][1]]
+                        initVal = [x[2][o] for o in parsed_offsets[1][1]]
+                        yield retVal, initVal
+
+                # This must be generator comprehension - do not materialize.
+                return f(stateful_processor_api_client, mode, key, values_gen())
+            else:
+                # mode == PROCESS_TIMER or mode == COMPLETE
+                return f(stateful_processor_api_client, mode, None, iter([]))
+
     elif eval_type == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF:
         import pyarrow as pa
 
diff --git a/python/pyspark/worker_util.py b/python/pyspark/worker_util.py
index 6dfa12ce3affe..81c05ce94eb65 100644
--- a/python/pyspark/worker_util.py
+++ b/python/pyspark/worker_util.py
@@ -75,8 +75,8 @@ def check_python_version(infile: IO) -> None:
     worker_version = "%d.%d" % sys.version_info[:2]
     if version != worker_version:
         raise PySparkRuntimeError(
-            error_class="PYTHON_VERSION_MISMATCH",
-            message_parameters={
+            errorClass="PYTHON_VERSION_MISMATCH",
+            messageParameters={
                 "worker_version": worker_version,
                 "driver_version": str(version),
             },
diff --git a/repl/pom.xml b/repl/pom.xml
index 831379467a29e..1a1c6b92c9222 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -82,6 +82,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
diff --git a/repl/src/main/scala/org/apache/spark/repl/Main.scala b/repl/src/main/scala/org/apache/spark/repl/Main.scala
index 7b126c357271b..4d3465b320391 100644
--- a/repl/src/main/scala/org/apache/spark/repl/Main.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/Main.scala
@@ -25,6 +25,7 @@ import scala.tools.nsc.GenericRunnerSettings
 import org.apache.spark._
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.util.Utils
 
@@ -95,6 +96,9 @@ object Main extends Logging {
       // initialization in certain cases, there's an initialization order issue that prevents
       // this from being set after SparkContext is instantiated.
       conf.set("spark.repl.class.outputDir", outputDir.getAbsolutePath())
+      // Disable isolation for REPL, to avoid having in-line classes stored in a isolated directory,
+      // prevent the REPL classloader from finding it.
+      conf.set(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED, false)
       if (execUri != null) {
         conf.set("spark.executor.uri", execUri)
       }
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 861cf5a740ce1..f49e8adcc74af 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -66,7 +66,8 @@ class SparkILoop(in0: BufferedReader, out: PrintWriter)
     "import org.apache.spark.SparkContext._",
     "import spark.implicits._",
     "import spark.sql",
-    "import org.apache.spark.sql.functions._"
+    "import org.apache.spark.sql.functions._",
+    "import org.apache.spark.util.LogUtils.SPARK_LOG_SCHEMA"
   )
 
   override protected def internalReplAutorunCode(): Seq[String] =
diff --git a/repl/src/test/resources/IntSumUdf.class b/repl/src/test/resources/IntSumUdf.class
new file mode 100644
index 0000000000000..75a41446cfca1
Binary files /dev/null and b/repl/src/test/resources/IntSumUdf.class differ
diff --git a/repl/src/test/resources/IntSumUdf.scala b/repl/src/test/resources/IntSumUdf.scala
new file mode 100644
index 0000000000000..9678caaed5db5
--- /dev/null
+++ b/repl/src/test/resources/IntSumUdf.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.spark.sql.api.java.UDF2
+
+class IntSumUdf extends UDF2[Long, Long, Long] {
+  override def call(t1: Long, t2: Long): Long = t1 + t2
+}
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index 1a7be083d2d92..327ef3d074207 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -396,4 +396,67 @@ class ReplSuite extends SparkFunSuite {
     Main.sparkContext.stop()
     System.clearProperty("spark.driver.port")
   }
+
+  test("register UDF via SparkSession.addArtifact") {
+    val artifactPath = new File("src/test/resources").toPath
+    val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
+    val output = runInterpreterInPasteMode("local",
+      s"""
+         |import org.apache.spark.sql.api.java.UDF2
+         |import org.apache.spark.sql.types.DataTypes
+         |
+         |spark.addArtifact("${intSumUdfPath.toString}")
+         |
+         |spark.udf.registerJava("intSum", "IntSumUdf", DataTypes.LongType)
+         |
+         |val r = spark.range(5)
+         |  .withColumn("id2", col("id") + 1)
+         |  .selectExpr("intSum(id, id2)")
+         |  .collect()
+         |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
+         |
+      """.stripMargin)
+    assertContains("Array([1], [3], [5], [7], [9])", output)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertDoesNotContain("assertion failed", output)
+
+    // The UDF should not work in a new REPL session.
+    val anotherOutput = runInterpreterInPasteMode("local",
+      s"""
+         |val r = spark.range(5)
+         |  .withColumn("id2", col("id") + 1)
+         |  .selectExpr("intSum(id, id2)")
+         |  .collect()
+         |
+      """.stripMargin)
+    assertContains(
+      "[UNRESOLVED_ROUTINE] Cannot resolve routine `intSum` on search path",
+      anotherOutput)
+  }
+
+  test("register a class via SparkSession.addArtifact") {
+    val artifactPath = new File("src/test/resources").toPath
+    val intSumUdfPath = artifactPath.resolve("IntSumUdf.class")
+    val output = runInterpreterInPasteMode("local",
+      s"""
+         |import org.apache.spark.sql.functions.udf
+         |
+         |spark.addArtifact("${intSumUdfPath.toString}")
+         |
+         |val intSumUdf = udf((x: Long, y: Long) => new IntSumUdf().call(x, y))
+         |spark.udf.register("intSum", intSumUdf)
+         |
+         |val r = spark.range(5)
+         |  .withColumn("id2", col("id") + 1)
+         |  .selectExpr("intSum(id, id2)")
+         |  .collect()
+         |assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
+         |
+      """.stripMargin)
+    assertContains("Array([1], [3], [5], [7], [9])", output)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+    assertDoesNotContain("assertion failed", output)
+  }
 }
diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml
index fa0fd454ccc44..211c6c93b9674 100644
--- a/resource-managers/kubernetes/core/pom.xml
+++ b/resource-managers/kubernetes/core/pom.xml
@@ -29,15 +29,11 @@
   <name>Spark Project Kubernetes</name>
   <properties>
     <sbt.project.name>kubernetes</sbt.project.name>
-    <volcano.exclude>**/*Volcano*.scala</volcano.exclude>
   </properties>
 
   <profiles>
     <profile>
       <id>volcano</id>
-      <properties>
-        <volcano.exclude></volcano.exclude>
-      </properties>
       <dependencies>
         <dependency>
           <groupId>io.fabric8</groupId>
@@ -50,6 +46,40 @@
           <version>${kubernetes-client.version}</version>
         </dependency>
       </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-volcano-source</id>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>add-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>volcano/src/main/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+              <execution>
+                <id>add-volcano-test-sources</id>
+                <phase>generate-test-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>volcano/src/test/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
   </profiles>
 
@@ -151,19 +181,6 @@
 
 
   <build>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>net.alchim31.maven</groupId>
-          <artifactId>scala-maven-plugin</artifactId>
-          <configuration>
-            <excludes>
-              <exclude>${volcano.exclude}</exclude>
-            </excludes>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
   </build>
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
index 2f9ee6943fe61..db7fc85976c2a 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Config.scala
@@ -457,7 +457,7 @@ private[spark] object Config extends Logging {
       .doc("Time to wait between each round of executor allocation.")
       .version("2.3.0")
       .timeConf(TimeUnit.MILLISECONDS)
-      .checkValue(value => value > 0, "Allocation batch delay must be a positive time value.")
+      .checkValue(value => value > 100, "Allocation batch delay must be greater than 0.1s.")
       .createWithDefaultString("1s")
 
   val KUBERNETES_ALLOCATION_DRIVER_READINESS_TIMEOUT =
@@ -769,14 +769,17 @@ private[spark] object Config extends Logging {
   val KUBERNETES_VOLUMES_NFS_TYPE = "nfs"
   val KUBERNETES_VOLUMES_MOUNT_PATH_KEY = "mount.path"
   val KUBERNETES_VOLUMES_MOUNT_SUBPATH_KEY = "mount.subPath"
+  val KUBERNETES_VOLUMES_MOUNT_SUBPATHEXPR_KEY = "mount.subPathExpr"
   val KUBERNETES_VOLUMES_MOUNT_READONLY_KEY = "mount.readOnly"
   val KUBERNETES_VOLUMES_OPTIONS_PATH_KEY = "options.path"
+  val KUBERNETES_VOLUMES_OPTIONS_TYPE_KEY = "options.type"
   val KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY = "options.claimName"
   val KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY = "options.storageClass"
   val KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY = "options.medium"
   val KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY = "options.sizeLimit"
   val KUBERNETES_VOLUMES_OPTIONS_SERVER_KEY = "options.server"
-
+  val KUBERNETES_VOLUMES_LABEL_KEY = "label."
+  val KUBERNETES_VOLUMES_ANNOTATION_KEY = "annotation."
   val KUBERNETES_DRIVER_ENV_PREFIX = "spark.kubernetes.driverEnv."
 
   val KUBERNETES_DNS_SUBDOMAIN_NAME_MAX_LENGTH = 253
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
index ead3188aa6494..aee07c096fe58 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/Constants.scala
@@ -72,6 +72,8 @@ object Constants {
   val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID"
   val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP"
   val ENV_EXECUTOR_POD_NAME = "SPARK_EXECUTOR_POD_NAME"
+  val ENV_EXECUTOR_ATTRIBUTE_APP_ID = "SPARK_EXECUTOR_ATTRIBUTE_APP_ID"
+  val ENV_EXECUTOR_ATTRIBUTE_EXECUTOR_ID = "SPARK_EXECUTOR_ATTRIBUTE_EXECUTOR_ID"
   val ENV_JAVA_OPT_PREFIX = "SPARK_JAVA_OPT_"
   val ENV_CLASSPATH = "SPARK_CLASSPATH"
   val ENV_DRIVER_BIND_ADDRESS = "SPARK_DRIVER_BIND_ADDRESS"
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
index 3f7355de18911..b7113a562fa06 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeSpec.scala
@@ -18,13 +18,15 @@ package org.apache.spark.deploy.k8s
 
 private[spark] sealed trait KubernetesVolumeSpecificConf
 
-private[spark] case class KubernetesHostPathVolumeConf(hostPath: String)
+private[spark] case class KubernetesHostPathVolumeConf(hostPath: String, volumeType: String)
   extends KubernetesVolumeSpecificConf
 
 private[spark] case class KubernetesPVCVolumeConf(
     claimName: String,
     storageClass: Option[String] = None,
-    size: Option[String] = None)
+    size: Option[String] = None,
+    labels: Option[Map[String, String]] = None,
+    annotations: Option[Map[String, String]] = None)
   extends KubernetesVolumeSpecificConf
 
 private[spark] case class KubernetesEmptyDirVolumeConf(
@@ -41,5 +43,6 @@ private[spark] case class KubernetesVolumeSpec(
     volumeName: String,
     mountPath: String,
     mountSubPath: String,
+    mountSubPathExpr: String,
     mountReadOnly: Boolean,
     volumeConf: KubernetesVolumeSpecificConf)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
index ee2108e8234d3..95821a909f351 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtils.scala
@@ -45,13 +45,30 @@ object KubernetesVolumeUtils {
       val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_PATH_KEY"
       val readOnlyKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_READONLY_KEY"
       val subPathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_SUBPATH_KEY"
+      val subPathExprKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_MOUNT_SUBPATHEXPR_KEY"
+      val labelKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_LABEL_KEY"
+      val annotationKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_ANNOTATION_KEY"
+      verifyMutuallyExclusiveOptionKeys(properties, subPathKey, subPathExprKey)
+
+      val volumeLabelsMap = properties
+        .filter(_._1.startsWith(labelKey))
+        .map {
+          case (k, v) => k.replaceAll(labelKey, "") -> v
+        }
+      val volumeAnnotationsMap = properties
+        .filter(_._1.startsWith(annotationKey))
+        .map {
+          case (k, v) => k.replaceAll(annotationKey, "") -> v
+        }
 
       KubernetesVolumeSpec(
         volumeName = volumeName,
         mountPath = properties(pathKey),
         mountSubPath = properties.getOrElse(subPathKey, ""),
+        mountSubPathExpr = properties.getOrElse(subPathExprKey, ""),
         mountReadOnly = properties.get(readOnlyKey).exists(_.toBoolean),
-        volumeConf = parseVolumeSpecificConf(properties, volumeType, volumeName))
+        volumeConf = parseVolumeSpecificConf(properties,
+          volumeType, volumeName, Option(volumeLabelsMap), Option(volumeAnnotationsMap)))
     }.toSeq
   }
 
@@ -74,12 +91,17 @@ object KubernetesVolumeUtils {
   private def parseVolumeSpecificConf(
       options: Map[String, String],
       volumeType: String,
-      volumeName: String): KubernetesVolumeSpecificConf = {
+      volumeName: String,
+      labels: Option[Map[String, String]],
+      annotations: Option[Map[String, String]]): KubernetesVolumeSpecificConf = {
     volumeType match {
       case KUBERNETES_VOLUMES_HOSTPATH_TYPE =>
         val pathKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_PATH_KEY"
+        val typeKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_TYPE_KEY"
         verifyOptionKey(options, pathKey, KUBERNETES_VOLUMES_HOSTPATH_TYPE)
-        KubernetesHostPathVolumeConf(options(pathKey))
+        // "" means that no checks will be performed before mounting the hostPath volume
+        // backward compatibility default
+        KubernetesHostPathVolumeConf(options(pathKey), options.getOrElse(typeKey, ""))
 
       case KUBERNETES_VOLUMES_PVC_TYPE =>
         val claimNameKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY"
@@ -91,7 +113,9 @@ object KubernetesVolumeUtils {
         KubernetesPVCVolumeConf(
           options(claimNameKey),
           options.get(storageClassKey),
-          options.get(sizeLimitKey))
+          options.get(sizeLimitKey),
+          labels,
+          annotations)
 
       case KUBERNETES_VOLUMES_EMPTYDIR_TYPE =>
         val mediumKey = s"$volumeType.$volumeName.$KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY"
@@ -119,6 +143,16 @@ object KubernetesVolumeUtils {
     }
   }
 
+  private def verifyMutuallyExclusiveOptionKeys(
+      options: Map[String, String],
+      keys: String*): Unit = {
+    val givenKeys = keys.filter(options.contains)
+    if (givenKeys.length > 1) {
+      throw new IllegalArgumentException("These config options are mutually exclusive: " +
+        s"${givenKeys.mkString(", ")}")
+    }
+  }
+
   private def verifySize(size: Option[String]): Unit = {
     size.foreach { v =>
       if (v.forall(_.isDigit) && parseLong(v) < 1024) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
index 79f76e96474e3..2c28dc380046c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/SparkKubernetesClientFactory.scala
@@ -107,7 +107,7 @@ object SparkKubernetesClientFactory extends Logging {
         (token, configBuilder) => configBuilder.withOauthToken(token)
       }.withOption(oauthTokenFile) {
         (file, configBuilder) =>
-            configBuilder.withOauthToken(Files.toString(file, Charsets.UTF_8))
+            configBuilder.withOauthToken(Files.asCharSource(file, Charsets.UTF_8).read())
       }.withOption(caCertFile) {
         (file, configBuilder) => configBuilder.withCaCertFile(file)
       }.withOption(clientKeyFile) {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
index fa4a6f43215c3..20050de69f89c 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStep.scala
@@ -143,6 +143,14 @@ private[spark] class BasicExecutorFeatureStep(
         (s"$ENV_JAVA_OPT_PREFIX$index", opt)
       }.toMap
 
+      val attributes = if (kubernetesConf.get(UI.CUSTOM_EXECUTOR_LOG_URL).isDefined) {
+        Map(
+          ENV_EXECUTOR_ATTRIBUTE_APP_ID -> kubernetesConf.appId,
+          ENV_EXECUTOR_ATTRIBUTE_EXECUTOR_ID -> kubernetesConf.executorId)
+      } else {
+        Map.empty[String, String]
+      }
+
       KubernetesUtils.buildEnvVars(
         Seq(
           ENV_DRIVER_URL -> driverUrl,
@@ -153,6 +161,7 @@ private[spark] class BasicExecutorFeatureStep(
           ENV_SPARK_CONF_DIR -> SPARK_CONF_DIR_INTERNAL,
           ENV_EXECUTOR_ID -> kubernetesConf.executorId,
           ENV_RESOURCE_PROFILE_ID -> resourceProfile.id.toString)
+          ++ attributes
           ++ kubernetesConf.environment
           ++ sparkAuthSecret
           ++ Seq(ENV_CLASSPATH -> kubernetesConf.get(EXECUTOR_CLASS_PATH).orNull)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
index e266d0f904e46..d64378a65d66f 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStep.scala
@@ -116,7 +116,7 @@ private[spark] class HadoopConfDriverFeatureStep(conf: KubernetesConf)
   override def getAdditionalKubernetesResources(): Seq[HasMetadata] = {
     if (confDir.isDefined) {
       val fileMap = confFiles.map { file =>
-        (file.getName(), Files.toString(file, StandardCharsets.UTF_8))
+        (file.getName(), Files.asCharSource(file, StandardCharsets.UTF_8).read())
       }.toMap.asJava
 
       Seq(new ConfigMapBuilder()
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
index 82bda88892d04..89aefe47e46d1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStep.scala
@@ -229,7 +229,7 @@ private[spark] class KerberosConfDriverFeatureStep(kubernetesConf: KubernetesDri
             .endMetadata()
           .withImmutable(true)
           .addToData(
-            Map(file.getName() -> Files.toString(file, StandardCharsets.UTF_8)).asJava)
+            Map(file.getName() -> Files.asCharSource(file, StandardCharsets.UTF_8).read()).asJava)
           .build()
       }
     } ++ {
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
index 72cc012a6bdd0..3d89696f19fcc 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStep.scala
@@ -65,16 +65,16 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
         .withMountPath(spec.mountPath)
         .withReadOnly(spec.mountReadOnly)
         .withSubPath(spec.mountSubPath)
+        .withSubPathExpr(spec.mountSubPathExpr)
         .withName(spec.volumeName)
         .build()
 
       val volumeBuilder = spec.volumeConf match {
-        case KubernetesHostPathVolumeConf(hostPath) =>
-          /* "" means that no checks will be performed before mounting the hostPath volume */
+        case KubernetesHostPathVolumeConf(hostPath, volumeType) =>
           new VolumeBuilder()
-            .withHostPath(new HostPathVolumeSource(hostPath, ""))
+            .withHostPath(new HostPathVolumeSource(hostPath, volumeType))
 
-        case KubernetesPVCVolumeConf(claimNameTemplate, storageClass, size) =>
+        case KubernetesPVCVolumeConf(claimNameTemplate, storageClass, size, labels, annotations) =>
           val claimName = conf match {
             case c: KubernetesExecutorConf =>
               claimNameTemplate
@@ -86,12 +86,22 @@ private[spark] class MountVolumesFeatureStep(conf: KubernetesConf)
                 .replaceAll(PVC_ON_DEMAND, s"${conf.resourceNamePrefix}-driver$PVC_POSTFIX-$i")
           }
           if (storageClass.isDefined && size.isDefined) {
+            val defaultVolumeLabels = Map(SPARK_APP_ID_LABEL -> conf.appId)
+            val volumeLabels = labels match {
+              case Some(customLabelsMap) => (customLabelsMap ++ defaultVolumeLabels).asJava
+              case None => defaultVolumeLabels.asJava
+            }
+            val volumeAnnotations = annotations match {
+              case Some(value) => value.asJava
+              case None => Map[String, String]().asJava
+            }
             additionalResources.append(new PersistentVolumeClaimBuilder()
               .withKind(PVC)
               .withApiVersion("v1")
               .withNewMetadata()
                 .withName(claimName)
-                .addToLabels(SPARK_APP_ID_LABEL, conf.appId)
+                .addToLabels(volumeLabels)
+                .addToAnnotations(volumeAnnotations)
                 .endMetadata()
               .withNewSpec()
                 .withStorageClassName(storageClass.get)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
index cdc0112294113..f94dad2d15dc1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/PodTemplateConfigMapStep.scala
@@ -81,7 +81,7 @@ private[spark] class PodTemplateConfigMapStep(conf: KubernetesConf)
       val hadoopConf = SparkHadoopUtil.get.newConfiguration(conf.sparkConf)
       val uri = downloadFile(podTemplateFile, Utils.createTempDir(), conf.sparkConf, hadoopConf)
       val file = new java.net.URI(uri).getPath
-      val podTemplateString = Files.toString(new File(file), StandardCharsets.UTF_8)
+      val podTemplateString = Files.asCharSource(new File(file), StandardCharsets.UTF_8).read()
       Seq(new ConfigMapBuilder()
           .withNewMetadata()
             .withName(configmapName)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
index 465c5e605b8cc..1bef7e50c0460 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/submit/LoggingPodStatusWatcher.scala
@@ -70,7 +70,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf)
 
   override def onClose(e: WatcherException): Unit = {
     logDebug(s"Stopping watching application $appId with last-observed phase $phase")
-    if(e != null && e.isHttpGone) {
+    if (e != null && e.isHttpGone) {
       resourceTooOldReceived = true
       logDebug(s"Got HTTP Gone code, resource version changed in k8s api: $e")
     } else {
@@ -108,7 +108,7 @@ private[k8s] class LoggingPodStatusWatcherImpl(conf: KubernetesDriverConf)
       }
     }
 
-    if(podCompleted) {
+    if (podCompleted) {
       logInfo(
         pod.map { p => log"Container final statuses:\n\n${MDC(STATUS, containersDescription(p))}" }
           .getOrElse(log"No containers were found in the driver pod."))
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
index ef3547fd389fd..6021a4fb953e5 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocator.scala
@@ -28,7 +28,6 @@ import io.fabric8.kubernetes.api.model.{HasMetadata, PersistentVolumeClaim, Pod,
 import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException}
 
 import org.apache.spark.{SecurityManager, SparkConf, SparkException}
-import org.apache.spark.deploy.ExecutorFailureTracker
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesConf
@@ -38,7 +37,6 @@ import org.apache.spark.internal.config._
 import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.scheduler.cluster.SchedulerBackendUtils.DEFAULT_NUMBER_EXECUTORS
 import org.apache.spark.util.{Clock, Utils}
-import org.apache.spark.util.SparkExitCode.EXCEED_MAX_EXECUTOR_FAILURES
 
 class ExecutorPodsAllocator(
     conf: SparkConf,
@@ -73,8 +71,6 @@ class ExecutorPodsAllocator(
 
   protected val maxPendingPods = conf.get(KUBERNETES_MAX_PENDING_PODS)
 
-  protected val maxNumExecutorFailures = ExecutorFailureTracker.maxNumExecutorFailures(conf)
-
   protected val podCreationTimeout = math.max(
     podAllocationDelay * 5,
     conf.get(KUBERNETES_ALLOCATION_EXECUTOR_TIMEOUT))
@@ -121,12 +117,6 @@ class ExecutorPodsAllocator(
   // if they happen to come up before the deletion takes effect.
   @volatile protected var deletedExecutorIds = Set.empty[Long]
 
-  @volatile private var failedExecutorIds = Set.empty[Long]
-
-  protected val failureTracker = new ExecutorFailureTracker(conf, clock)
-
-  protected[spark] def getNumExecutorsFailed: Int = failureTracker.numFailedExecutors
-
   def start(applicationId: String, schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     appId = applicationId
     driverPod.foreach { pod =>
@@ -142,11 +132,6 @@ class ExecutorPodsAllocator(
     }
     snapshotsStore.addSubscriber(podAllocationDelay) { executorPodsSnapshot =>
       onNewSnapshots(applicationId, schedulerBackend, executorPodsSnapshot)
-      if (failureTracker.numFailedExecutors > maxNumExecutorFailures) {
-        logError(log"Max number of executor failures " +
-          log"(${MDC(LogKeys.MAX_EXECUTOR_FAILURES, maxNumExecutorFailures)}) reached")
-        stopApplication(EXCEED_MAX_EXECUTOR_FAILURES)
-      }
     }
   }
 
@@ -163,10 +148,6 @@ class ExecutorPodsAllocator(
 
   def isDeleted(executorId: String): Boolean = deletedExecutorIds.contains(executorId.toLong)
 
-  private[k8s] def stopApplication(exitCode: Int): Unit = {
-    sys.exit(exitCode)
-  }
-
   protected def onNewSnapshots(
       applicationId: String,
       schedulerBackend: KubernetesClusterSchedulerBackend,
@@ -276,18 +257,6 @@ class ExecutorPodsAllocator(
         case _ => false
       }
 
-      val currentFailedExecutorIds = podsForRpId.filter {
-        case (_, PodFailed(_)) => true
-        case _ => false
-      }.keySet
-
-      val newFailedExecutorIds = currentFailedExecutorIds.diff(failedExecutorIds)
-      if (newFailedExecutorIds.nonEmpty) {
-        logWarning(log"${MDC(LogKeys.COUNT, newFailedExecutorIds.size)} new failed executors.")
-        newFailedExecutorIds.foreach { _ => failureTracker.registerExecutorFailure() }
-      }
-      failedExecutorIds = failedExecutorIds ++ currentFailedExecutorIds
-
       val (schedulerKnownPendingExecsForRpId, currentPendingExecutorsForRpId) = podsForRpId.filter {
         case (_, PodPending(_)) => true
         case _ => false
@@ -430,7 +399,7 @@ class ExecutorPodsAllocator(
         val reusablePVCs = createdPVCs
           .filterNot(pvc => pvcsInUse.contains(pvc.getMetadata.getName))
           .filter(pvc => now - Instant.parse(pvc.getMetadata.getCreationTimestamp).toEpochMilli
-            > podAllocationDelay)
+            > podCreationTimeout)
         logInfo(log"Found ${MDC(LogKeys.COUNT, reusablePVCs.size)} reusable PVCs from " +
           log"${MDC(LogKeys.TOTAL, createdPVCs.size)} PVCs")
         reusablePVCs
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
index 0d79efa06e497..fe2707a7f65b1 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManager.scala
@@ -27,18 +27,20 @@ import io.fabric8.kubernetes.api.model.{Pod, PodBuilder}
 import io.fabric8.kubernetes.client.KubernetesClient
 
 import org.apache.spark.SparkConf
+import org.apache.spark.deploy.ExecutorFailureTracker
 import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.KubernetesUtils._
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKeys.EXECUTOR_ID
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.scheduler.ExecutorExited
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{Clock, SystemClock, Utils}
+import org.apache.spark.util.SparkExitCode.EXCEED_MAX_EXECUTOR_FAILURES
 
 private[spark] class ExecutorPodsLifecycleManager(
     val conf: SparkConf,
     kubernetesClient: KubernetesClient,
-    snapshotsStore: ExecutorPodsSnapshotsStore) extends Logging {
+    snapshotsStore: ExecutorPodsSnapshotsStore,
+    clock: Clock = new SystemClock()) extends Logging {
 
   import ExecutorPodsLifecycleManager._
 
@@ -62,18 +64,49 @@ private[spark] class ExecutorPodsLifecycleManager(
 
   private val namespace = conf.get(KUBERNETES_NAMESPACE)
 
+  private val sparkContainerName = conf.get(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME)
+    .getOrElse(DEFAULT_EXECUTOR_CONTAINER_NAME)
+
+  protected val maxNumExecutorFailures = ExecutorFailureTracker.maxNumExecutorFailures(conf)
+
+  @volatile private var failedExecutorIds = Set.empty[Long]
+
+  protected val failureTracker = new ExecutorFailureTracker(conf, clock)
+
+  protected[spark] def getNumExecutorsFailed: Int = failureTracker.numFailedExecutors
+
   def start(schedulerBackend: KubernetesClusterSchedulerBackend): Unit = {
     val eventProcessingInterval = conf.get(KUBERNETES_EXECUTOR_EVENT_PROCESSING_INTERVAL)
-    snapshotsStore.addSubscriber(eventProcessingInterval) {
-      onNewSnapshots(schedulerBackend, _)
+    snapshotsStore.addSubscriber(eventProcessingInterval) { executorPodsSnapshot =>
+      onNewSnapshots(schedulerBackend, executorPodsSnapshot)
+      if (failureTracker.numFailedExecutors > maxNumExecutorFailures) {
+        logError(log"Max number of executor failures " +
+          log"(${MDC(LogKeys.MAX_EXECUTOR_FAILURES, maxNumExecutorFailures)}) reached")
+        stopApplication(EXCEED_MAX_EXECUTOR_FAILURES)
+      }
     }
   }
 
+  private[k8s] def stopApplication(exitCode: Int): Unit = {
+    sys.exit(exitCode)
+  }
+
   private def onNewSnapshots(
       schedulerBackend: KubernetesClusterSchedulerBackend,
       snapshots: Seq[ExecutorPodsSnapshot]): Unit = {
     val execIdsRemovedInThisRound = mutable.HashSet.empty[Long]
     snapshots.foreach { snapshot =>
+      val currentFailedExecutorIds = snapshot.executorPods.filter {
+        case (_, PodFailed(_)) => true
+        case _ => false
+      }.keySet
+
+      val newFailedExecutorIds = currentFailedExecutorIds -- failedExecutorIds
+      if (newFailedExecutorIds.nonEmpty) {
+        logWarning(log"${MDC(LogKeys.COUNT, newFailedExecutorIds.size)} new failed executors.")
+        newFailedExecutorIds.foreach { _ => failureTracker.registerExecutorFailure() }
+      }
+      failedExecutorIds = failedExecutorIds ++ currentFailedExecutorIds
       snapshot.executorPods.foreach { case (execId, state) =>
         state match {
           case _state if isPodInactive(_state.pod) =>
@@ -101,7 +134,7 @@ private[spark] class ExecutorPodsLifecycleManager(
               execIdsRemovedInThisRound += execId
               if (schedulerBackend.isExecutorActive(execId.toString)) {
                 logInfo(log"Snapshot reported succeeded executor with id " +
-                  log"${MDC(EXECUTOR_ID, execId)}, even though the application has not " +
+                  log"${MDC(LogKeys.EXECUTOR_ID, execId)}, even though the application has not " +
                   log"requested for it to be removed.")
               } else {
                 logDebug(s"Snapshot reported succeeded executor with id $execId," +
@@ -246,7 +279,8 @@ private[spark] class ExecutorPodsLifecycleManager(
 
   private def findExitCode(podState: FinalPodState): Int = {
     podState.pod.getStatus.getContainerStatuses.asScala.find { containerStatus =>
-      containerStatus.getState.getTerminated != null
+      containerStatus.getName == sparkContainerName &&
+        containerStatus.getState.getTerminated != null
     }.map { terminatedContainer =>
       terminatedContainer.getState.getTerminated.getExitCode.toInt
     }.getOrElse(UNKNOWN_EXIT_CODE)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
index 4e4634504a0f3..09faa2a7fb1b3 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesClusterSchedulerBackend.scala
@@ -32,7 +32,7 @@ import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.submit.KubernetesClientUtils
 import org.apache.spark.deploy.security.HadoopDelegationTokenManager
-import org.apache.spark.internal.LogKeys.{COUNT, HOST_PORT, TOTAL}
+import org.apache.spark.internal.LogKeys.{COUNT, TOTAL}
 import org.apache.spark.internal.MDC
 import org.apache.spark.internal.config.SCHEDULER_MIN_REGISTERED_RESOURCES_RATIO
 import org.apache.spark.resource.ResourceProfile
@@ -356,7 +356,7 @@ private[spark] class KubernetesClusterSchedulerBackend(
               execIDRequester -= rpcAddress
               // Expected, executors re-establish a connection with an ID
             case _ =>
-              logInfo(log"No executor found for ${MDC(HOST_PORT, rpcAddress)}")
+              logDebug(s"No executor found for ${rpcAddress}")
           }
       }
     }
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
index 61570f5cf4bb2..e44d7e29ef606 100644
--- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
+++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/k8s/KubernetesExecutorBackend.scala
@@ -62,6 +62,7 @@ private[spark] object KubernetesExecutorBackend extends Logging {
       backendCreateFn: (RpcEnv, Arguments, SparkEnv, ResourceProfile, String) =>
         CoarseGrainedExecutorBackend): Unit = {
 
+    Utils.resetStructuredLogging()
     Utils.initDaemon(log)
 
     SparkHadoopUtil.get.runAsSparkUser { () =>
@@ -115,6 +116,10 @@ private[spark] object KubernetesExecutorBackend extends Logging {
         }
       }
 
+      // Initialize logging system again after `spark.log.structuredLogging.enabled` takes effect
+      Utils.resetStructuredLogging(driverConf)
+      Logging.uninitialize()
+
       cfg.hadoopDelegationCreds.foreach { tokens =>
         SparkHadoopUtil.get.addDelegationTokens(tokens, driverConf)
       }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
index b70b9348d23b4..e5ed79718d733 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesTestConf.scala
@@ -113,16 +113,26 @@ object KubernetesTestConf {
 
     volumes.foreach { case spec =>
       val (vtype, configs) = spec.volumeConf match {
-        case KubernetesHostPathVolumeConf(path) =>
-          (KUBERNETES_VOLUMES_HOSTPATH_TYPE,
-            Map(KUBERNETES_VOLUMES_OPTIONS_PATH_KEY -> path))
+        case KubernetesHostPathVolumeConf(hostPath, volumeType) =>
+          (KUBERNETES_VOLUMES_HOSTPATH_TYPE, Map(
+            KUBERNETES_VOLUMES_OPTIONS_PATH_KEY -> hostPath,
+            KUBERNETES_VOLUMES_OPTIONS_TYPE_KEY -> volumeType))
 
-        case KubernetesPVCVolumeConf(claimName, storageClass, sizeLimit) =>
+        case KubernetesPVCVolumeConf(claimName, storageClass, sizeLimit, labels, annotations) =>
           val sconf = storageClass
             .map { s => (KUBERNETES_VOLUMES_OPTIONS_CLAIM_STORAGE_CLASS_KEY, s) }.toMap
           val lconf = sizeLimit.map { l => (KUBERNETES_VOLUMES_OPTIONS_SIZE_LIMIT_KEY, l) }.toMap
+          val llabels = labels match {
+            case Some(value) => value.map { case(k, v) => s"label.$k" -> v }
+            case None => Map()
+          }
+          val aannotations = annotations match {
+            case Some(value) => value.map { case (k, v) => s"annotation.$k" -> v }
+            case None => Map()
+          }
           (KUBERNETES_VOLUMES_PVC_TYPE,
-            Map(KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY -> claimName) ++ sconf ++ lconf)
+            Map(KUBERNETES_VOLUMES_OPTIONS_CLAIM_NAME_KEY -> claimName) ++
+              sconf ++ lconf ++ llabels ++ aannotations)
 
         case KubernetesEmptyDirVolumeConf(medium, sizeLimit) =>
           val mconf = medium.map { m => (KUBERNETES_VOLUMES_OPTIONS_MEDIUM_KEY, m) }.toMap
@@ -140,6 +150,10 @@ object KubernetesTestConf {
         conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_SUBPATH_KEY),
           spec.mountSubPath)
       }
+      if (spec.mountSubPathExpr.nonEmpty) {
+        conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_SUBPATHEXPR_KEY),
+          spec.mountSubPathExpr)
+      }
       conf.set(key(vtype, spec.volumeName, KUBERNETES_VOLUMES_MOUNT_READONLY_KEY),
         spec.mountReadOnly.toString)
       configs.foreach { case (k, v) =>
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
index fdc1aae0d4109..3c57cba9a7ff0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/KubernetesVolumeUtilsSuite.scala
@@ -30,7 +30,20 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesHostPathVolumeConf] ===
-      KubernetesHostPathVolumeConf("/hostPath"))
+      KubernetesHostPathVolumeConf("/hostPath", ""))
+  }
+
+  test("Parses hostPath volume type correctly") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.hostPath.volumeName.mount.path", "/path")
+    sparkConf.set("test.hostPath.volumeName.options.path", "/hostPath")
+    sparkConf.set("test.hostPath.volumeName.options.type", "Type")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.volumeConf.asInstanceOf[KubernetesHostPathVolumeConf] ===
+      KubernetesHostPathVolumeConf("/hostPath", "Type"))
   }
 
   test("Parses subPath correctly") {
@@ -43,6 +56,33 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     assert(volumeSpec.volumeName === "volumeName")
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountSubPath === "subPath")
+    assert(volumeSpec.mountSubPathExpr === "")
+  }
+
+  test("Parses subPathExpr correctly") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.emptyDir.volumeName.mount.path", "/path")
+    sparkConf.set("test.emptyDir.volumeName.mount.readOnly", "true")
+    sparkConf.set("test.emptyDir.volumeName.mount.subPathExpr", "subPathExpr")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.mountSubPath === "")
+    assert(volumeSpec.mountSubPathExpr === "subPathExpr")
+  }
+
+  test("Rejects mutually exclusive subPath and subPathExpr") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.emptyDir.volumeName.mount.path", "/path")
+    sparkConf.set("test.emptyDir.volumeName.mount.subPath", "subPath")
+    sparkConf.set("test.emptyDir.volumeName.mount.subPathExpr", "subPathExpr")
+
+    val msg = intercept[IllegalArgumentException] {
+      KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    }.getMessage
+    assert(msg === "These config options are mutually exclusive: " +
+      "emptyDir.volumeName.mount.subPath, emptyDir.volumeName.mount.subPathExpr")
   }
 
   test("Parses persistentVolumeClaim volumes correctly") {
@@ -56,7 +96,41 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     assert(volumeSpec.mountPath === "/path")
     assert(volumeSpec.mountReadOnly)
     assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
-      KubernetesPVCVolumeConf("claimName"))
+      KubernetesPVCVolumeConf("claimName", labels = Some(Map()), annotations = Some(Map())))
+  }
+
+  test("SPARK-49598: Parses persistentVolumeClaim volumes correctly with labels") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.label.env", "test")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.label.foo", "bar")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.mountReadOnly)
+    assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
+      KubernetesPVCVolumeConf(claimName = "claimName",
+        labels = Some(Map("env" -> "test", "foo" -> "bar")),
+        annotations = Some(Map())))
+  }
+
+  test("SPARK-49598: Parses persistentVolumeClaim volumes & puts " +
+    "labels as empty Map if not provided") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.mountReadOnly)
+    assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
+      KubernetesPVCVolumeConf(claimName = "claimName", labels = Some(Map()),
+        annotations = Some(Map())))
   }
 
   test("Parses emptyDir volumes correctly") {
@@ -208,4 +282,38 @@ class KubernetesVolumeUtilsSuite extends SparkFunSuite {
     }.getMessage
     assert(m.contains("smaller than 1KiB. Missing units?"))
   }
+
+  test("SPARK-49833: Parses persistentVolumeClaim volumes correctly with annotations") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.annotation.key1", "value1")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.annotation.key2", "value2")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.mountReadOnly)
+    assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
+      KubernetesPVCVolumeConf(claimName = "claimName",
+        labels = Some(Map()),
+        annotations = Some(Map("key1" -> "value1", "key2" -> "value2"))))
+  }
+
+  test("SPARK-49833: Parses persistentVolumeClaim volumes & puts " +
+    "annotations as empty Map if not provided") {
+    val sparkConf = new SparkConf(false)
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.path", "/path")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.mount.readOnly", "true")
+    sparkConf.set("test.persistentVolumeClaim.volumeName.options.claimName", "claimName")
+
+    val volumeSpec = KubernetesVolumeUtils.parseVolumesWithPrefix(sparkConf, "test.").head
+    assert(volumeSpec.volumeName === "volumeName")
+    assert(volumeSpec.mountPath === "/path")
+    assert(volumeSpec.mountReadOnly)
+    assert(volumeSpec.volumeConf.asInstanceOf[KubernetesPVCVolumeConf] ===
+      KubernetesPVCVolumeConf(claimName = "claimName", labels = Some(Map()),
+        annotations = Some(Map())))
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
index eaf39dd816dca..3ed6d50f689da 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/BasicExecutorFeatureStepSuite.scala
@@ -252,6 +252,18 @@ class BasicExecutorFeatureStepSuite extends SparkFunSuite with BeforeAndAfter {
       s"/p1/${KubernetesTestConf.APP_ID}/1,/p2/${KubernetesTestConf.APP_ID}/1"))
   }
 
+  test("SPARK-49190: Add SPARK_EXECUTOR_ATTRIBUTE_(APP|EXECUTOR)_ID if CUSTOM_EXECUTOR_LOG_URL" +
+      " is defined") {
+    val conf = baseConf.clone()
+      .set(UI.CUSTOM_EXECUTOR_LOG_URL, "https://custom-executor-log-server/")
+    val kconf = KubernetesTestConf.createExecutorConf(sparkConf = conf)
+    val step = new BasicExecutorFeatureStep(kconf, new SecurityManager(conf), defaultProfile)
+    val executor = step.configurePod(SparkPod.initialPod())
+    checkEnv(executor, conf, Map(
+      ENV_EXECUTOR_ATTRIBUTE_APP_ID -> KubernetesTestConf.APP_ID,
+      ENV_EXECUTOR_ATTRIBUTE_EXECUTOR_ID -> KubernetesTestConf.EXECUTOR_ID))
+  }
+
   test("test executor pyspark memory") {
     baseConf.set("spark.kubernetes.resource.type", "python")
     baseConf.set(PYSPARK_EXECUTOR_MEMORY, 42L)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
index f1dd8b94f17ff..a72152a851c4f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/DriverKubernetesCredentialsFeatureStepSuite.scala
@@ -128,7 +128,7 @@ class DriverKubernetesCredentialsFeatureStepSuite extends SparkFunSuite {
 
   private def writeCredentials(credentialsFileName: String, credentialsContents: String): File = {
     val credentialsFile = new File(credentialsTempDirectory, credentialsFileName)
-    Files.write(credentialsContents, credentialsFile, Charsets.UTF_8)
+    Files.asCharSink(credentialsFile, Charsets.UTF_8).write(credentialsContents)
     credentialsFile
   }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
index 8f21b95236a9c..4310ac0220e5e 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfDriverFeatureStepSuite.scala
@@ -48,7 +48,7 @@ class HadoopConfDriverFeatureStepSuite extends SparkFunSuite {
     val confFiles = Set("core-site.xml", "hdfs-site.xml")
 
     confFiles.foreach { f =>
-      Files.write("some data", new File(confDir, f), UTF_8)
+      Files.asCharSink(new File(confDir, f), UTF_8).write("some data")
     }
 
     val sparkConf = new SparkConfWithEnv(Map(ENV_HADOOP_CONF_DIR -> confDir.getAbsolutePath()))
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStepSuite.scala
index a60227814eb13..04e20258d068f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/HadoopConfExecutorFeatureStepSuite.scala
@@ -36,7 +36,7 @@ class HadoopConfExecutorFeatureStepSuite extends SparkFunSuite  {
     val confFiles = Set("core-site.xml", "hdfs-site.xml")
 
     confFiles.foreach { f =>
-      Files.write("some data", new File(confDir, f), UTF_8)
+      Files.asCharSink(new File(confDir, f), UTF_8).write("some data")
     }
 
     Seq(
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
index 163d87643abd3..b172bdc06ddca 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/KerberosConfDriverFeatureStepSuite.scala
@@ -55,7 +55,7 @@ class KerberosConfDriverFeatureStepSuite extends SparkFunSuite {
 
   test("create krb5.conf config map if local config provided") {
     val krbConf = File.createTempFile("krb5", ".conf", tmpDir)
-    Files.write("some data", krbConf, UTF_8)
+    Files.asCharSink(krbConf, UTF_8).write("some data")
 
     val sparkConf = new SparkConf(false)
       .set(KUBERNETES_KERBEROS_KRB5_FILE, krbConf.getAbsolutePath())
@@ -70,7 +70,7 @@ class KerberosConfDriverFeatureStepSuite extends SparkFunSuite {
 
   test("create keytab secret if client keytab file used") {
     val keytab = File.createTempFile("keytab", ".bin", tmpDir)
-    Files.write("some data", keytab, UTF_8)
+    Files.asCharSink(keytab, UTF_8).write("some data")
 
     val sparkConf = new SparkConf(false)
       .set(KEYTAB, keytab.getAbsolutePath())
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
index eaadad163f064..3a9561051a894 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/LocalDirsFeatureStepSuite.scala
@@ -137,8 +137,9 @@ class LocalDirsFeatureStepSuite extends SparkFunSuite {
       "spark-local-dir-test",
       "/tmp",
       "",
+      "",
       false,
-      KubernetesHostPathVolumeConf("/hostPath/tmp")
+      KubernetesHostPathVolumeConf("/hostPath/tmp", "")
     )
     val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val mountVolumeStep = new MountVolumesFeatureStep(kubernetesConf)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
index 54796def95e53..293773ddb9ec5 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/MountVolumesFeatureStepSuite.scala
@@ -27,8 +27,9 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       false,
-      KubernetesHostPathVolumeConf("/hostPath/tmp")
+      KubernetesHostPathVolumeConf("/hostPath/tmp", "type")
     )
     val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
     val step = new MountVolumesFeatureStep(kubernetesConf)
@@ -36,6 +37,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
 
     assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
     assert(configuredPod.pod.getSpec.getVolumes.get(0).getHostPath.getPath === "/hostPath/tmp")
+    assert(configuredPod.pod.getSpec.getVolumes.get(0).getHostPath.getType === "type")
     assert(configuredPod.container.getVolumeMounts.size() === 1)
     assert(configuredPod.container.getVolumeMounts.get(0).getMountPath === "/tmp")
     assert(configuredPod.container.getVolumeMounts.get(0).getName === "testVolume")
@@ -47,6 +49,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
@@ -69,6 +72,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("pvc-spark-SPARK_EXECUTOR_ID")
     )
@@ -94,6 +98,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("pvc-spark-SPARK_EXECUTOR_ID", Some("fast"), Some("512M"))
     )
@@ -119,6 +124,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("OnDemand")
     )
@@ -131,11 +137,89 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(pvcClaim.getClaimName.endsWith("-driver-pvc-0"))
   }
 
+  test("SPARK-49598: Create and mounts persistentVolumeClaims in driver with labels") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = MountVolumesFeatureStep.PVC_ON_DEMAND,
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        labels = Some(Map("foo" -> "bar", "env" -> "test")))
+    )
+
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
+    val pvcClaim = configuredPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(pvcClaim.getClaimName.endsWith("-driver-pvc-0"))
+  }
+
+  test("SPARK-49598: Create and mounts persistentVolumeClaims in executors with labels") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = MountVolumesFeatureStep.PVC_ON_DEMAND,
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        labels = Some(Map("foo1" -> "bar1", "env" -> "exec-test")))
+    )
+
+    val executorConf = KubernetesTestConf.createExecutorConf(volumes = Seq(volumeConf))
+    val executorStep = new MountVolumesFeatureStep(executorConf)
+    val executorPod = executorStep.configurePod(SparkPod.initialPod())
+
+    assert(executorPod.pod.getSpec.getVolumes.size() === 1)
+    val executorPVC = executorPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(executorPVC.getClaimName.endsWith("-exec-1-pvc-0"))
+  }
+
+  test("SPARK-49598: Mount multiple volumes to executor with labels") {
+    val pvcVolumeConf1 = KubernetesVolumeSpec(
+      "checkpointVolume1",
+      "/checkpoints1",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = "pvcClaim1",
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        labels = Some(Map("foo1" -> "bar1", "env1" -> "exec-test-1")))
+    )
+
+    val pvcVolumeConf2 = KubernetesVolumeSpec(
+      "checkpointVolume2",
+      "/checkpoints2",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = "pvcClaim2",
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        labels = Some(Map("foo2" -> "bar2", "env2" -> "exec-test-2")))
+    )
+
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(
+      volumes = Seq(pvcVolumeConf1, pvcVolumeConf2))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 2)
+    assert(configuredPod.container.getVolumeMounts.size() === 2)
+  }
+
   test("Create and mount persistentVolumeClaims in executors") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf(MountVolumesFeatureStep.PVC_ON_DEMAND)
     )
@@ -153,6 +237,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       false,
       KubernetesEmptyDirVolumeConf(Some("Memory"), Some("6G"))
     )
@@ -176,6 +261,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       false,
       KubernetesEmptyDirVolumeConf(None, None)
     )
@@ -198,6 +284,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       false,
       KubernetesNFSVolumeConf("/share/name", "nfs.example.com")
     )
@@ -220,6 +307,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "",
+      "",
       true,
       KubernetesNFSVolumeConf("/share/name", "nfs.example.com")
     )
@@ -242,13 +330,15 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "hpVolume",
       "/tmp",
       "",
+      "",
       false,
-      KubernetesHostPathVolumeConf("/hostPath/tmp")
+      KubernetesHostPathVolumeConf("/hostPath/tmp", "")
     )
     val pvcVolumeConf = KubernetesVolumeSpec(
       "checkpointVolume",
       "/checkpoints",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
@@ -266,13 +356,15 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "hpVolume",
       "/data",
       "",
+      "",
       false,
-      KubernetesHostPathVolumeConf("/hostPath/tmp")
+      KubernetesHostPathVolumeConf("/hostPath/tmp", "")
     )
     val pvcVolumeConf = KubernetesVolumeSpec(
       "checkpointVolume",
       "/data",
       "",
+      "",
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
@@ -291,6 +383,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testVolume",
       "/tmp",
       "foo",
+      "",
       false,
       KubernetesEmptyDirVolumeConf(None, None)
     )
@@ -305,11 +398,32 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(emptyDirMount.getSubPath === "foo")
   }
 
+  test("Mounts subpathexpr on emptyDir") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "foo",
+      false,
+      KubernetesEmptyDirVolumeConf(None, None)
+    )
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
+    val emptyDirMount = configuredPod.container.getVolumeMounts.get(0)
+    assert(emptyDirMount.getMountPath === "/tmp")
+    assert(emptyDirMount.getName === "testVolume")
+    assert(emptyDirMount.getSubPathExpr === "foo")
+  }
+
   test("Mounts subpath on persistentVolumeClaims") {
     val volumeConf = KubernetesVolumeSpec(
       "testVolume",
       "/tmp",
       "bar",
+      "",
       true,
       KubernetesPVCVolumeConf("pvcClaim")
     )
@@ -327,12 +441,36 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(pvcMount.getSubPath === "bar")
   }
 
+  test("Mounts subpathexpr on persistentVolumeClaims") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "bar",
+      true,
+      KubernetesPVCVolumeConf("pvcClaim")
+    )
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
+    val pvcClaim = configuredPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(pvcClaim.getClaimName === "pvcClaim")
+    assert(configuredPod.container.getVolumeMounts.size() === 1)
+    val pvcMount = configuredPod.container.getVolumeMounts.get(0)
+    assert(pvcMount.getMountPath === "/tmp")
+    assert(pvcMount.getName === "testVolume")
+    assert(pvcMount.getSubPathExpr === "bar")
+  }
+
   test("Mounts multiple subpaths") {
     val volumeConf = KubernetesEmptyDirVolumeConf(None, None)
     val emptyDirSpec = KubernetesVolumeSpec(
       "testEmptyDir",
       "/tmp/foo",
       "foo",
+      "",
       true,
       KubernetesEmptyDirVolumeConf(None, None)
     )
@@ -340,6 +478,7 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
       "testPVC",
       "/tmp/bar",
       "bar",
+      "",
       true,
       KubernetesEmptyDirVolumeConf(None, None)
     )
@@ -357,4 +496,81 @@ class MountVolumesFeatureStepSuite extends SparkFunSuite {
     assert(mounts(1).getMountPath === "/tmp/bar")
     assert(mounts(1).getSubPath === "bar")
   }
+
+  test("SPARK-49833: Create and mounts persistentVolumeClaims in driver with annotations") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = MountVolumesFeatureStep.PVC_ON_DEMAND,
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        annotations = Some(Map("env" -> "test")))
+    )
+
+    val kubernetesConf = KubernetesTestConf.createDriverConf(volumes = Seq(volumeConf))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 1)
+    val pvcClaim = configuredPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(pvcClaim.getClaimName.endsWith("-driver-pvc-0"))
+  }
+
+  test("SPARK-49833: Create and mounts persistentVolumeClaims in executors with annotations") {
+    val volumeConf = KubernetesVolumeSpec(
+      "testVolume",
+      "/tmp",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = MountVolumesFeatureStep.PVC_ON_DEMAND,
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        annotations = Some(Map("env" -> "exec-test")))
+    )
+
+    val executorConf = KubernetesTestConf.createExecutorConf(volumes = Seq(volumeConf))
+    val executorStep = new MountVolumesFeatureStep(executorConf)
+    val executorPod = executorStep.configurePod(SparkPod.initialPod())
+
+    assert(executorPod.pod.getSpec.getVolumes.size() === 1)
+    val executorPVC = executorPod.pod.getSpec.getVolumes.get(0).getPersistentVolumeClaim
+    assert(executorPVC.getClaimName.endsWith("-exec-1-pvc-0"))
+  }
+
+  test("SPARK-49833: Mount multiple volumes to executor with annotations") {
+    val pvcVolumeConf1 = KubernetesVolumeSpec(
+      "checkpointVolume1",
+      "/checkpoints1",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = "pvcClaim1",
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        annotations = Some(Map("env1" -> "exec-test-1")))
+    )
+
+    val pvcVolumeConf2 = KubernetesVolumeSpec(
+      "checkpointVolume2",
+      "/checkpoints2",
+      "",
+      "",
+      true,
+      KubernetesPVCVolumeConf(claimName = "pvcClaim2",
+        storageClass = Some("gp3"),
+        size = Some("1Mi"),
+        annotations = Some(Map("env2" -> "exec-test-2")))
+    )
+
+    val kubernetesConf = KubernetesTestConf.createExecutorConf(
+      volumes = Seq(pvcVolumeConf1, pvcVolumeConf2))
+    val step = new MountVolumesFeatureStep(kubernetesConf)
+    val configuredPod = step.configurePod(SparkPod.initialPod())
+
+    assert(configuredPod.pod.getSpec.getVolumes.size() === 2)
+    assert(configuredPod.container.getVolumeMounts.size() === 2)
+  }
 }
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
index 299979071b5d7..fc75414e4a7e0 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorLifecycleTestUtils.scala
@@ -29,6 +29,7 @@ import org.apache.spark.resource.ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID
 object ExecutorLifecycleTestUtils {
 
   val TEST_SPARK_APP_ID = "spark-app-id"
+  val TEST_SPARK_EXECUTOR_CONTAINER_NAME = "spark-executor"
 
   def failedExecutorWithoutDeletion(
       executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
@@ -37,7 +38,7 @@ object ExecutorLifecycleTestUtils {
         .withPhase("failed")
         .withStartTime(Instant.now.toString)
         .addNewContainerStatus()
-          .withName("spark-executor")
+          .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
           .withImage("k8s-spark")
           .withNewState()
             .withNewTerminated()
@@ -49,6 +50,38 @@ object ExecutorLifecycleTestUtils {
         .addNewContainerStatus()
           .withName("spark-executor-sidecar")
           .withImage("k8s-spark-sidecar")
+          .withNewState()
+            .withNewTerminated()
+              .withMessage("Failed")
+              .withExitCode(2)
+              .endTerminated()
+            .endState()
+          .endContainerStatus()
+        .withMessage("Executor failed.")
+        .withReason("Executor failed because of a thrown error.")
+        .endStatus()
+      .build()
+  }
+
+  def failedExecutorWithSidecarStatusListedFirst(
+      executorId: Long, rpId: Int = DEFAULT_RESOURCE_PROFILE_ID): Pod = {
+    new PodBuilder(podWithAttachedContainerForId(executorId, rpId))
+      .editOrNewStatus()
+        .withPhase("failed")
+        .withStartTime(Instant.now.toString)
+        .addNewContainerStatus() // sidecar status listed before executor's container status
+          .withName("spark-executor-sidecar")
+          .withImage("k8s-spark-sidecar")
+          .withNewState()
+            .withNewTerminated()
+              .withMessage("Failed")
+              .withExitCode(2)
+              .endTerminated()
+            .endState()
+          .endContainerStatus()
+        .addNewContainerStatus()
+          .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
+          .withImage("k8s-spark")
           .withNewState()
             .withNewTerminated()
               .withMessage("Failed")
@@ -200,7 +233,7 @@ object ExecutorLifecycleTestUtils {
       .endSpec()
       .build()
     val container = new ContainerBuilder()
-      .withName("spark-executor")
+      .withName(TEST_SPARK_EXECUTOR_CONTAINER_NAME)
       .withImage("k8s-spark")
       .build()
     SparkPod(pod, container)
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
index adad63748abc2..d4f7b9f67fd6f 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsAllocatorSuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.internal.config._
 import org.apache.spark.resource._
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
-import org.apache.spark.util.{ManualClock, SparkExitCode}
+import org.apache.spark.util.ManualClock
 
 class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
@@ -150,44 +150,12 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     when(persistentVolumeClaimList.getItems).thenReturn(Seq.empty[PersistentVolumeClaim].asJava)
   }
 
-  test("SPARK-41210: Window based executor failure tracking mechanism") {
-    var _exitCode = -1
-    val _conf = conf.clone
-      .set(MAX_EXECUTOR_FAILURES.key, "2")
-      .set(EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS.key, "2s")
-    podsAllocatorUnderTest = new ExecutorPodsAllocator(_conf, secMgr,
-      executorBuilder, kubernetesClient, snapshotsStore, waitForExecutorPodsClock) {
-      override private[spark] def stopApplication(exitCode: Int): Unit = {
-        _exitCode = exitCode
-      }
-    }
-    podsAllocatorUnderTest.setTotalExpectedExecutors(Map(defaultProfile -> 3))
-    podsAllocatorUnderTest.start(TEST_SPARK_APP_ID, schedulerBackend)
-    assert(podsAllocatorUnderTest.getNumExecutorsFailed === 0)
-
-    waitForExecutorPodsClock.advance(1000)
-    snapshotsStore.updatePod(failedExecutorWithoutDeletion(1))
-    snapshotsStore.updatePod(failedExecutorWithoutDeletion(2))
-    snapshotsStore.notifySubscribers()
-    assert(podsAllocatorUnderTest.getNumExecutorsFailed === 2)
-    assert(_exitCode === -1)
-
-    waitForExecutorPodsClock.advance(1000)
-    snapshotsStore.notifySubscribers()
-    assert(podsAllocatorUnderTest.getNumExecutorsFailed === 2)
-    assert(_exitCode === -1)
-
-    waitForExecutorPodsClock.advance(2000)
-    assert(podsAllocatorUnderTest.getNumExecutorsFailed === 0)
-    assert(_exitCode === -1)
-
-    waitForExecutorPodsClock.advance(1000)
-    snapshotsStore.updatePod(failedExecutorWithoutDeletion(3))
-    snapshotsStore.updatePod(failedExecutorWithoutDeletion(4))
-    snapshotsStore.updatePod(failedExecutorWithoutDeletion(5))
-    snapshotsStore.notifySubscribers()
-    assert(podsAllocatorUnderTest.getNumExecutorsFailed === 3)
-    assert(_exitCode === SparkExitCode.EXCEED_MAX_EXECUTOR_FAILURES)
+  test("SPARK-49447: Prevent small values less than 100 for batch delay") {
+    val m = intercept[IllegalArgumentException] {
+      val conf = new SparkConf().set(KUBERNETES_ALLOCATION_BATCH_DELAY.key, "1")
+      conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY)
+    }.getMessage
+    assert(m.contains("Allocation batch delay must be greater than 0.1s."))
   }
 
   test("SPARK-36052: test splitSlots") {
@@ -771,7 +739,7 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
 
     val pvc = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
     pvc.getMetadata
-      .setCreationTimestamp(Instant.now().minus(podAllocationDelay + 1, MILLIS).toString)
+      .setCreationTimestamp(Instant.now().minus(podCreationTimeout + 1, MILLIS).toString)
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc).asJava)
     when(executorBuilder.buildFromFeatures(any(classOf[KubernetesExecutorConf]), meq(secMgr),
         meq(kubernetesClient), any(classOf[ResourceProfile])))
@@ -845,15 +813,17 @@ class ExecutorPodsAllocatorSuite extends SparkFunSuite with BeforeAndAfter {
     val getReusablePVCs =
       PrivateMethod[mutable.Buffer[PersistentVolumeClaim]](Symbol("getReusablePVCs"))
 
-    val pvc1 = persistentVolumeClaim("pvc-0", "gp2", "200Gi")
-    val pvc2 = persistentVolumeClaim("pvc-1", "gp2", "200Gi")
+    val pvc1 = persistentVolumeClaim("pvc-1", "gp2", "200Gi")
+    val pvc2 = persistentVolumeClaim("pvc-2", "gp2", "200Gi")
 
     val now = Instant.now()
-    pvc1.getMetadata.setCreationTimestamp(now.minus(2 * podAllocationDelay, MILLIS).toString)
+    pvc1.getMetadata.setCreationTimestamp(now.minus(podCreationTimeout + 1, MILLIS).toString)
     pvc2.getMetadata.setCreationTimestamp(now.toString)
 
     when(persistentVolumeClaimList.getItems).thenReturn(Seq(pvc1, pvc2).asJava)
-    podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq("pvc-1"))
+    val reusablePVCs = podsAllocatorUnderTest invokePrivate getReusablePVCs("appId", Seq.empty)
+    assert(reusablePVCs.size == 1)
+    assert(reusablePVCs.head.getMetadata.getName == "pvc-1")
   }
 
   test("SPARK-41410: Support waitToReusePersistentVolumeClaims") {
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
index 96be5dfabd121..4c7ffe692b105 100644
--- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
+++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/k8s/ExecutorPodsLifecycleManagerSuite.scala
@@ -33,11 +33,14 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkConf, SparkFunSuite}
 import org.apache.spark.deploy.k8s.Config
+import org.apache.spark.deploy.k8s.Config._
 import org.apache.spark.deploy.k8s.Constants._
 import org.apache.spark.deploy.k8s.Fabric8Aliases._
 import org.apache.spark.deploy.k8s.KubernetesUtils._
+import org.apache.spark.internal.config._
 import org.apache.spark.scheduler.ExecutorExited
 import org.apache.spark.scheduler.cluster.k8s.ExecutorLifecycleTestUtils._
+import org.apache.spark.util.{ManualClock, SparkExitCode}
 
 class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfter {
 
@@ -60,19 +63,64 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
 
   before {
     MockitoAnnotations.openMocks(this).close()
+    val sparkConf = new SparkConf()
+      .set(KUBERNETES_EXECUTOR_PODTEMPLATE_CONTAINER_NAME, TEST_SPARK_EXECUTOR_CONTAINER_NAME)
     snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
     namedExecutorPods = mutable.Map.empty[String, PodResource]
     when(schedulerBackend.getExecutorsWithRegistrationTs()).thenReturn(Map.empty[String, Long])
+    when(schedulerBackend.getExecutorIds()).thenReturn(Seq.empty)
     when(kubernetesClient.pods()).thenReturn(podOperations)
     when(podOperations.inNamespace(anyString())).thenReturn(podsWithNamespace)
     when(podsWithNamespace.withName(any(classOf[String]))).thenAnswer(namedPodsAnswer())
     eventHandlerUnderTest = new ExecutorPodsLifecycleManager(
-      new SparkConf(),
+      sparkConf,
       kubernetesClient,
       snapshotsStore)
     eventHandlerUnderTest.start(schedulerBackend)
   }
 
+  test("SPARK-41210: Window based executor failure tracking mechanism") {
+    var _exitCode = -1
+    var waitForExecutorPodsClock = new ManualClock(0L)
+    val _conf = eventHandlerUnderTest.conf.clone
+      .set(MAX_EXECUTOR_FAILURES.key, "2")
+      .set(EXECUTOR_ATTEMPT_FAILURE_VALIDITY_INTERVAL_MS.key, "2s")
+    snapshotsStore = new DeterministicExecutorPodsSnapshotsStore()
+    eventHandlerUnderTest = new ExecutorPodsLifecycleManager(_conf,
+      kubernetesClient, snapshotsStore, waitForExecutorPodsClock) {
+      override private[k8s] def stopApplication(exitCode: Int): Unit = {
+        logError("!!!")
+        _exitCode = exitCode
+      }
+    }
+    eventHandlerUnderTest.start(schedulerBackend)
+    assert(eventHandlerUnderTest.getNumExecutorsFailed === 0)
+
+    waitForExecutorPodsClock.advance(1000)
+    snapshotsStore.updatePod(failedExecutorWithoutDeletion(1))
+    snapshotsStore.updatePod(failedExecutorWithoutDeletion(2))
+    snapshotsStore.notifySubscribers()
+    assert(eventHandlerUnderTest.getNumExecutorsFailed === 2)
+    assert(_exitCode === -1)
+
+    waitForExecutorPodsClock.advance(1000)
+    snapshotsStore.notifySubscribers()
+    assert(eventHandlerUnderTest.getNumExecutorsFailed === 2)
+    assert(_exitCode === -1)
+
+    waitForExecutorPodsClock.advance(2000)
+    assert(eventHandlerUnderTest.getNumExecutorsFailed === 0)
+    assert(_exitCode === -1)
+
+    waitForExecutorPodsClock.advance(1000)
+    snapshotsStore.updatePod(failedExecutorWithoutDeletion(3))
+    snapshotsStore.updatePod(failedExecutorWithoutDeletion(4))
+    snapshotsStore.updatePod(failedExecutorWithoutDeletion(5))
+    snapshotsStore.notifySubscribers()
+    assert(eventHandlerUnderTest.getNumExecutorsFailed === 3)
+    assert(_exitCode === SparkExitCode.EXCEED_MAX_EXECUTOR_FAILURES)
+  }
+
   test("When an executor reaches error states immediately, remove from the scheduler backend.") {
     val failedPod = failedExecutorWithoutDeletion(1)
     val mockPodResource = mock(classOf[PodResource])
@@ -162,6 +210,15 @@ class ExecutorPodsLifecycleManagerSuite extends SparkFunSuite with BeforeAndAfte
       .edit(any[UnaryOperator[Pod]]())
   }
 
+  test("SPARK-49804: Use the exit code of executor container always") {
+    val failedPod = failedExecutorWithSidecarStatusListedFirst(1)
+    snapshotsStore.updatePod(failedPod)
+    snapshotsStore.notifySubscribers()
+    val msg = exitReasonMessage(1, failedPod, 1)
+    val expectedLossReason = ExecutorExited(1, exitCausedByApp = true, msg)
+    verify(schedulerBackend).doRemoveExecutor("1", expectedLossReason)
+  }
+
   private def exitReasonMessage(execId: Int, failedPod: Pod, exitCode: Int): String = {
     val reason = Option(failedPod.getStatus.getReason)
     val message = Option(failedPod.getStatus.getMessage)
diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala b/resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
similarity index 100%
rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
rename to resource-managers/kubernetes/core/volcano/src/main/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStep.scala
diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala b/resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
similarity index 100%
rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
rename to resource-managers/kubernetes/core/volcano/src/test/scala/org/apache/spark/deploy/k8s/features/VolcanoFeatureStepSuite.scala
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
index 8693561f8e566..e088399091ef2 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/Dockerfile
@@ -17,6 +17,11 @@
 ARG java_image_tag=21
 
 FROM azul/zulu-openjdk:${java_image_tag}
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Scala/Java Image"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
 
 ARG spark_uid=185
 
@@ -51,7 +56,7 @@ COPY sbin /opt/spark/sbin
 COPY kubernetes/dockerfiles/spark/entrypoint.sh /opt/
 COPY kubernetes/dockerfiles/spark/decom.sh /opt/
 COPY examples /opt/spark/examples
-RUN ln -s $(basename $(ls /opt/spark/examples/jars/spark-examples_*.jar)) /opt/spark/examples/jars/spark-examples.jar
+RUN ln -s $(basename /opt/spark/examples/jars/spark-examples_*.jar) /opt/spark/examples/jars/spark-examples.jar
 COPY kubernetes/tests /opt/spark/tests
 COPY data /opt/spark/data
 
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
index d8d9b1aebaa18..64e66288b838d 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/R/Dockerfile
@@ -17,7 +17,12 @@
 
 ARG base_img
 
-FROM $base_img
+FROM ${base_img:-spark}
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark R Image"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
 WORKDIR /
 
 # Reset to root to run installation tasks
diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
index 740aa7f0c43dc..14ee735d0333f 100644
--- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
+++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark/bindings/python/Dockerfile
@@ -17,7 +17,12 @@
 
 ARG base_img
 
-FROM $base_img
+FROM ${base_img:-spark}
+LABEL org.opencontainers.image.authors="Apache Spark project <dev@spark.apache.org>"
+LABEL org.opencontainers.image.licenses="Apache-2.0"
+LABEL org.opencontainers.image.ref.name="Apache Spark Python Image"
+# Overwrite this label to avoid exposing the underlying Ubuntu OS version label
+LABEL org.opencontainers.image.version=""
 WORKDIR /
 
 # Reset to root to run installation tasks
diff --git a/resource-managers/kubernetes/integration-tests/README.md b/resource-managers/kubernetes/integration-tests/README.md
index f8070ec4ce938..5b80fe10596c1 100644
--- a/resource-managers/kubernetes/integration-tests/README.md
+++ b/resource-managers/kubernetes/integration-tests/README.md
@@ -330,11 +330,11 @@ You can also specify your specific dockerfile to build JVM/Python/R based image
 
 ## Requirements
 - A minimum of 6 CPUs and 9G of memory is required to complete all Volcano test cases.
-- Volcano v1.8.2.
+- Volcano v1.9.0.
 
 ## Installation
 
-    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml
+    kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml
 
 ## Run tests
 
@@ -355,5 +355,5 @@ You can also specify `volcano` tag to only run Volcano test:
 
 ## Cleanup Volcano
 
-    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.8.2/installer/volcano-development.yaml
+    kubectl delete -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.9.0/installer/volcano-development.yaml
 
diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml
index 518c5bc217071..cebef07821f39 100644
--- a/resource-managers/kubernetes/integration-tests/pom.xml
+++ b/resource-managers/kubernetes/integration-tests/pom.xml
@@ -46,7 +46,6 @@
     <test.exclude.tags></test.exclude.tags>
     <test.default.exclude.tags>org.apache.spark.deploy.k8s.integrationtest.YuniKornTag</test.default.exclude.tags>
     <test.include.tags></test.include.tags>
-    <volcano.exclude>**/*Volcano*.scala</volcano.exclude>
   </properties>
   <packaging>jar</packaging>
   <name>Spark Project Kubernetes Integration Tests</name>
@@ -80,22 +79,15 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>software.amazon.awssdk</groupId>
+      <artifactId>bundle</artifactId>
+      <version>${aws.java.sdk.v2.version}</version>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
 
   <build>
-    <pluginManagement>
-      <plugins>
-        <plugin>
-          <groupId>net.alchim31.maven</groupId>
-          <artifactId>scala-maven-plugin</artifactId>
-          <configuration>
-            <excludes>
-              <exclude>${volcano.exclude}</exclude>
-            </excludes>
-          </configuration>
-        </plugin>
-      </plugins>
-    </pluginManagement>
     <plugins>
       <plugin>
         <groupId>org.codehaus.mojo</groupId>
@@ -203,25 +195,8 @@
   </build>
 
   <profiles>
-    <profile>
-      <id>hadoop-3</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
-      <dependencies>
-        <dependency>
-          <groupId>software.amazon.awssdk</groupId>
-          <artifactId>bundle</artifactId>
-          <version>${aws.java.sdk.v2.version}</version>
-          <scope>test</scope>
-        </dependency>
-      </dependencies>
-    </profile>
     <profile>
       <id>volcano</id>
-      <properties>
-        <volcano.exclude></volcano.exclude>
-      </properties>
       <dependencies>
         <dependency>
           <groupId>io.fabric8</groupId>
@@ -229,6 +204,28 @@
           <version>${kubernetes-client.version}</version>
         </dependency>
       </dependencies>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>org.codehaus.mojo</groupId>
+            <artifactId>build-helper-maven-plugin</artifactId>
+            <executions>
+              <execution>
+                <id>add-volcano-test-sources</id>
+                <phase>generate-test-sources</phase>
+                <goals>
+                  <goal>add-test-source</goal>
+                </goals>
+                <configuration>
+                  <sources>
+                    <source>volcano/src/test/scala</source>
+                  </sources>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
     </profile>
   </profiles>
 </project>
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
index ae5f037c6b7d4..950079dcb5362 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DecommissionSuite.scala
@@ -40,7 +40,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
     val logConfFilePath = s"${sparkHomeDir.toFile}/conf/log4j2.properties"
 
     try {
-      Files.write(
+      Files.asCharSink(new File(logConfFilePath), StandardCharsets.UTF_8).write(
         """rootLogger.level = info
           |rootLogger.appenderRef.stdout.ref = console
           |appender.console.type = Console
@@ -51,9 +51,7 @@ private[spark] trait DecommissionSuite { k8sSuite: KubernetesSuite =>
           |
           |logger.spark.name = org.apache.spark
           |logger.spark.level = debug
-      """.stripMargin,
-        new File(logConfFilePath),
-        StandardCharsets.UTF_8)
+      """.stripMargin)
 
       f()
     } finally {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
index c0f5e0fe265d7..8ab7e994788f3 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/DepsTestsSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.deploy.k8s.integrationtest
 
 import java.io.File
-import java.net.{URI, URL}
+import java.net.URI
 import java.nio.file.Files
 
 import scala.jdk.CollectionConverters._
@@ -371,7 +371,7 @@ private[spark] trait DepsTestsSuite { k8sSuite: KubernetesSuite =>
   }
 
   private def getServiceHostAndPort(minioUrlStr : String) : (String, Int) = {
-    val minioUrl = new URL(minioUrlStr)
+    val minioUrl = new URI(minioUrlStr).toURL
     (minioUrl.getHost, minioUrl.getPort)
   }
 
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
index 0b0b30e5e04fd..cf129677ad9c2 100644
--- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
+++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/KubernetesSuite.scala
@@ -129,7 +129,7 @@ class KubernetesSuite extends SparkFunSuite
         val tagFile = new File(path)
         require(tagFile.isFile,
           s"No file found for image tag at ${tagFile.getAbsolutePath}.")
-        Files.toString(tagFile, Charsets.UTF_8).trim
+        Files.asCharSource(tagFile, Charsets.UTF_8).read().trim
       }
       .orElse(sys.props.get(CONFIG_KEY_IMAGE_TAG))
       .getOrElse {
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala b/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
similarity index 100%
rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
rename to resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoSuite.scala
diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala b/resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
similarity index 100%
rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
rename to resource-managers/kubernetes/integration-tests/volcano/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/VolcanoTestsSuite.scala
diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml
index 694d81b3c25e3..5a10aa797c1b1 100644
--- a/resource-managers/yarn/pom.xml
+++ b/resource-managers/yarn/pom.xml
@@ -29,43 +29,8 @@
   <name>Spark Project YARN</name>
   <properties>
     <sbt.project.name>yarn</sbt.project.name>
-    <jersey-1.version>1.19</jersey-1.version>
   </properties>
 
-  <profiles>
-    <profile>
-      <id>hadoop-3</id>
-      <activation>
-        <activeByDefault>true</activeByDefault>
-      </activation>
-      <dependencies>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client-runtime</artifactId>
-          <version>${hadoop.version}</version>
-          <scope>${hadoop.deps.scope}</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.apache.hadoop</groupId>
-          <artifactId>hadoop-client-minicluster</artifactId>
-          <version>${hadoop.version}</version>
-          <scope>test</scope>
-        </dependency>
-        <!-- Used by MiniYARNCluster -->
-        <dependency>
-          <groupId>org.bouncycastle</groupId>
-          <artifactId>bcprov-jdk18on</artifactId>
-          <scope>test</scope>
-        </dependency>
-        <dependency>
-          <groupId>org.bouncycastle</groupId>
-          <artifactId>bcpkix-jdk18on</artifactId>
-          <scope>test</scope>
-        </dependency>
-      </dependencies>
-    </profile>
-  </profiles>
-
   <dependencies>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -102,6 +67,35 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-client-api</artifactId>
       <version>${hadoop.version}</version>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client-runtime</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>${hadoop.deps.scope}</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.hadoop</groupId>
+      <artifactId>hadoop-client-minicluster</artifactId>
+      <version>${hadoop.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <!-- Used by MiniYARNCluster -->
+    <dependency>
+      <groupId>javax.xml.bind</groupId>
+      <artifactId>jaxb-api</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcprov-jdk18on</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.bouncycastle</groupId>
+      <artifactId>bcpkix-jdk18on</artifactId>
+      <scope>test</scope>
     </dependency>
 
     <!-- Explicit listing of transitive deps that are shaded. Otherwise, odd compiler crashes. -->
@@ -135,85 +129,20 @@
     </dependency>
     <!-- End of shaded deps. -->
 
-    <!--
-      SPARK-10059: Explicitly add JSP dependencies for tests since the MiniYARN cluster needs them.
-    -->
-    <dependency>
-      <groupId>org.eclipse.jetty.orbit</groupId>
-      <artifactId>javax.servlet.jsp</artifactId>
-      <version>2.2.0.v201112011158</version>
-      <scope>test</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.eclipse.jetty.orbit</groupId>
-      <artifactId>javax.servlet.jsp.jstl</artifactId>
-      <version>1.2.0.v201105211821</version>
-      <scope>test</scope>
-    </dependency>
-
     <dependency>
       <groupId>org.mockito</groupId>
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
-
-     <!--
-       Jersey 1 dependencies only required for YARN integration testing. Creating a YARN cluster
-       in the JVM requires starting a Jersey 1-based web application.
-     -->
-     <dependency>
-       <groupId>com.sun.jersey</groupId>
-       <artifactId>jersey-core</artifactId>
-       <scope>test</scope>
-       <version>${jersey-1.version}</version>
-     </dependency>
-     <dependency>
-       <groupId>com.sun.jersey</groupId>
-       <artifactId>jersey-json</artifactId>
-       <scope>test</scope>
-       <version>${jersey-1.version}</version>
-     </dependency>
-     <dependency>
-       <groupId>com.sun.jersey</groupId>
-       <artifactId>jersey-server</artifactId>
-       <scope>test</scope>
-       <version>${jersey-1.version}</version>
-     </dependency>
-     <dependency>
-       <groupId>com.sun.jersey.contribs</groupId>
-       <artifactId>jersey-guice</artifactId>
-       <scope>test</scope>
-       <version>${jersey-1.version}</version>
-     </dependency>
-     <dependency>
-       <groupId>com.sun.jersey</groupId>
-       <artifactId>jersey-servlet</artifactId>
-       <scope>test</scope>
-       <version>${jersey-1.version}</version>
-     </dependency>
-
-    <!-- These dependencies are duplicated from core, because dependencies in the "provided"
-    scope are not transitive.-->
-    <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-exec</artifactId>
-      <classifier>${hive.classifier}</classifier>
-      <scope>provided</scope>
-    </dependency>
     <dependency>
-      <groupId>${hive.group}</groupId>
-      <artifactId>hive-metastore</artifactId>
-      <scope>provided</scope>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libthrift</artifactId>
-      <scope>provided</scope>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
     </dependency>
     <dependency>
-      <groupId>org.apache.thrift</groupId>
-      <artifactId>libfb303</artifactId>
-      <scope>provided</scope>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
     </dependency>
   </dependencies>
 
diff --git a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java
index 60e880d1ac4aa..8b3d012ec5311 100644
--- a/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java
+++ b/resource-managers/yarn/src/main/java/org/apache/spark/deploy/yarn/AmIpFilter.java
@@ -89,9 +89,9 @@ public void init(FilterConfig conf) throws ServletException {
       proxyUriBases = new HashMap<>(proxyUriBasesArr.length);
       for (String proxyUriBase : proxyUriBasesArr) {
         try {
-          URL url = new URL(proxyUriBase);
+          URL url = new URI(proxyUriBase).toURL();
           proxyUriBases.put(url.getHost() + ":" + url.getPort(), proxyUriBase);
-        } catch(MalformedURLException e) {
+        } catch (MalformedURLException | URISyntaxException e) {
           LOG.warn(proxyUriBase + " does not appear to be a valid URL", e);
         }
       }
@@ -215,7 +215,7 @@ public String findRedirectUrl() throws ServletException {
   public boolean isValidUrl(String url) {
     boolean isValid = false;
     try {
-      HttpURLConnection conn = (HttpURLConnection) new URL(url).openConnection();
+      HttpURLConnection conn = (HttpURLConnection) new URI(url).toURL().openConnection();
       conn.connect();
       isValid = conn.getResponseCode() == HttpURLConnection.HTTP_OK;
       // If security is enabled, any valid RM which can give 401 Unauthorized is
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
index b2c4d97bc7b07..8b621e82afe28 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala
@@ -960,14 +960,13 @@ private[spark] class Client(
   /**
    * Set up the environment for launching our ApplicationMaster container.
    */
-  private def setupLaunchEnv(
+  private[yarn] def setupLaunchEnv(
       stagingDirPath: Path,
       pySparkArchives: Seq[String]): HashMap[String, String] = {
     logInfo("Setting up the launch environment for our AM container")
     val env = new HashMap[String, String]()
     populateClasspath(args, hadoopConf, sparkConf, env, sparkConf.get(DRIVER_CLASS_PATH))
     env("SPARK_YARN_STAGING_DIR") = stagingDirPath.toString
-    env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
     env("SPARK_PREFER_IPV6") = Utils.preferIPv6.toString
 
     // Pick up any environment variables for the AM provided through spark.yarn.appMasterEnv.*
@@ -977,6 +976,10 @@ private[spark] class Client(
       .map { case (k, v) => (k.substring(amEnvPrefix.length), v) }
       .foreach { case (k, v) => YarnSparkHadoopUtil.addPathToEnvironment(env, k, v) }
 
+    if (!env.contains("SPARK_USER")) {
+      env("SPARK_USER") = UserGroupInformation.getCurrentUser().getShortUserName()
+    }
+
     // If pyFiles contains any .py files, we need to add LOCALIZED_PYTHON_DIR to the PYTHONPATH
     // of the container processes too. Add all non-.py files directly to PYTHONPATH.
     //
diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
index 618f0dc8a4daa..d6e814f5c30a5 100644
--- a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
+++ b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/SparkRackResolver.scala
@@ -25,9 +25,6 @@ import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.CommonConfigurationKeysPublic
 import org.apache.hadoop.net._
 import org.apache.hadoop.util.ReflectionUtils
-import org.apache.hadoop.yarn.util.RackResolver
-import org.apache.logging.log4j.{Level, LogManager}
-import org.apache.logging.log4j.core.Logger
 
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.NODE_LOCATION
@@ -39,12 +36,6 @@ import org.apache.spark.internal.LogKeys.NODE_LOCATION
  */
 private[spark] class SparkRackResolver(conf: Configuration) extends Logging {
 
-  // RackResolver logs an INFO message whenever it resolves a rack, which is way too often.
-  val logger = LogManager.getLogger(classOf[RackResolver])
-  if (logger.getLevel != Level.WARN) {
-    logger.asInstanceOf[Logger].setLevel(Level.WARN)
-  }
-
   private val dnsToSwitchMapping: DNSToSwitchMapping = {
     val dnsToSwitchMappingClass =
       conf.getClass(CommonConfigurationKeysPublic.NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY,
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
index f0177541accc1..e0dfac62847ea 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/BaseYarnClusterSuite.scala
@@ -86,7 +86,7 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
     logConfDir.mkdir()
 
     val logConfFile = new File(logConfDir, "log4j2.properties")
-    Files.write(LOG4J_CONF, logConfFile, StandardCharsets.UTF_8)
+    Files.asCharSink(logConfFile, StandardCharsets.UTF_8).write(LOG4J_CONF)
 
     // Disable the disk utilization check to avoid the test hanging when people's disks are
     // getting full.
@@ -232,11 +232,11 @@ abstract class BaseYarnClusterSuite extends SparkFunSuite with Matchers {
     // an error message
     val output = new Object() {
       override def toString: String = outFile
-          .map(Files.toString(_, StandardCharsets.UTF_8))
+          .map(Files.asCharSource(_, StandardCharsets.UTF_8).read())
           .getOrElse("(stdout/stderr was not captured)")
     }
     assert(finalState === SparkAppHandle.State.FINISHED, output)
-    val resultString = Files.toString(result, StandardCharsets.UTF_8)
+    val resultString = Files.asCharSource(result, StandardCharsets.UTF_8).read()
     assert(resultString === expected, output)
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
index 78e84690900e1..93d6cc474d20f 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/ClientSuite.scala
@@ -29,6 +29,7 @@ import scala.jdk.CollectionConverters._
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 import org.apache.hadoop.mapreduce.MRJobConfig
+import org.apache.hadoop.security.UserGroupInformation
 import org.apache.hadoop.yarn.api.ApplicationConstants.Environment
 import org.apache.hadoop.yarn.api.protocolrecords.{GetNewApplicationResponse, SubmitApplicationRequest}
 import org.apache.hadoop.yarn.api.records._
@@ -739,6 +740,21 @@ class ClientSuite extends SparkFunSuite
       }
     }
 
+  test("SPARK-49760: default app master SPARK_USER") {
+    val sparkConf = new SparkConf()
+    val client = createClient(sparkConf)
+    val env = client.setupLaunchEnv(new Path("/staging/dir/path"), Seq())
+    env("SPARK_USER") should be (UserGroupInformation.getCurrentUser().getShortUserName())
+  }
+
+  test("SPARK-49760: override app master SPARK_USER") {
+    val sparkConf = new SparkConf()
+        .set("spark.yarn.appMasterEnv.SPARK_USER", "overrideuser")
+    val client = createClient(sparkConf)
+    val env = client.setupLaunchEnv(new Path("/staging/dir/path"), Seq())
+    env("SPARK_USER") should be ("overrideuser")
+  }
+
   private val matching = Seq(
     ("files URI match test1", "file:///file1", "file:///file2"),
     ("files URI match test2", "file:///c:file1", "file://c:file2"),
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
index 806efd39800fb..92d9f2d62d1c1 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnClusterSuite.scala
@@ -141,7 +141,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       |  </property>
       |</configuration>
       |""".stripMargin
-    Files.write(coreSite, new File(customConf, "core-site.xml"), StandardCharsets.UTF_8)
+    Files.asCharSink(new File(customConf, "core-site.xml"), StandardCharsets.UTF_8).write(coreSite)
 
     val result = File.createTempFile("result", null, tempDir)
     val finalState = runSpark(false,
@@ -295,23 +295,22 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
   test("running Spark in yarn-cluster mode displays driver log links") {
     val log4jConf = new File(tempDir, "log4j.properties")
     val logOutFile = new File(tempDir, "logs")
-    Files.write(
+    Files.asCharSink(log4jConf, StandardCharsets.UTF_8).write(
       s"""rootLogger.level = debug
          |rootLogger.appenderRef.file.ref = file
          |appender.file.type = File
          |appender.file.name = file
          |appender.file.fileName = $logOutFile
          |appender.file.layout.type = PatternLayout
-         |""".stripMargin,
-      log4jConf, StandardCharsets.UTF_8)
+         |""".stripMargin)
     // Since this test is trying to extract log output from the SparkSubmit process itself,
     // standard options to the Spark process don't take effect. Leverage the java-opts file which
     // will get picked up for the SparkSubmit process.
     val confDir = new File(tempDir, "conf")
     confDir.mkdir()
     val javaOptsFile = new File(confDir, "java-opts")
-    Files.write(s"-Dlog4j.configurationFile=file://$log4jConf\n", javaOptsFile,
-      StandardCharsets.UTF_8)
+    Files.asCharSink(javaOptsFile, StandardCharsets.UTF_8)
+      .write(s"-Dlog4j.configurationFile=file://$log4jConf\n")
 
     val result = File.createTempFile("result", null, tempDir)
     val finalState = runSpark(clientMode = false,
@@ -320,7 +319,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       extraEnv = Map("SPARK_CONF_DIR" -> confDir.getAbsolutePath),
       extraConf = Map(CLIENT_INCLUDE_DRIVER_LOGS_LINK.key -> true.toString))
     checkResult(finalState, result)
-    val logOutput = Files.toString(logOutFile, StandardCharsets.UTF_8)
+    val logOutput = Files.asCharSource(logOutFile, StandardCharsets.UTF_8).read()
     val logFilePattern = raw"""(?s).+\sDriver Logs \(<NAME>\): https?://.+/<NAME>(\?\S+)?\s.+"""
     logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stdout")
     logOutput should fullyMatch regex logFilePattern.replace("<NAME>", "stderr")
@@ -374,7 +373,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       extraEnv: Map[String, String] = Map()): Unit = {
     assume(isPythonAvailable)
     val primaryPyFile = new File(tempDir, "test.py")
-    Files.write(TEST_PYFILE, primaryPyFile, StandardCharsets.UTF_8)
+    Files.asCharSink(primaryPyFile, StandardCharsets.UTF_8).write(TEST_PYFILE)
 
     // When running tests, let's not assume the user has built the assembly module, which also
     // creates the pyspark archive. Instead, let's use PYSPARK_ARCHIVES_PATH to point at the
@@ -396,7 +395,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
       subdir
     }
     val pyModule = new File(moduleDir, "mod1.py")
-    Files.write(TEST_PYMODULE, pyModule, StandardCharsets.UTF_8)
+    Files.asCharSink(pyModule, StandardCharsets.UTF_8).write(TEST_PYMODULE)
 
     val mod2Archive = TestUtils.createJarWithFiles(Map("mod2.py" -> TEST_PYMODULE), moduleDir)
     val pyFiles = Seq(pyModule.getAbsolutePath(), mod2Archive.getPath()).mkString(",")
@@ -443,7 +442,7 @@ class YarnClusterSuite extends BaseYarnClusterSuite {
 
   def createEmptyIvySettingsFile: File = {
     val emptyIvySettings = File.createTempFile("ivy", ".xml")
-    Files.write("<ivysettings />", emptyIvySettings, StandardCharsets.UTF_8)
+    Files.asCharSink(emptyIvySettings, StandardCharsets.UTF_8).write("<ivysettings />")
     emptyIvySettings
   }
 
@@ -555,7 +554,7 @@ private object YarnClusterDriverUseSparkHadoopUtilConf extends Logging with Matc
       }
       result = "success"
     } finally {
-      Files.write(result, status, StandardCharsets.UTF_8)
+      Files.asCharSink(status, StandardCharsets.UTF_8).write(result)
       sc.stop()
     }
   }
@@ -658,7 +657,7 @@ private object YarnClusterDriver extends Logging with Matchers {
         assert(driverAttributes === expectationAttributes)
       }
     } finally {
-      Files.write(result, status, StandardCharsets.UTF_8)
+      Files.asCharSink(status, StandardCharsets.UTF_8).write(result)
       sc.stop()
     }
   }
@@ -707,7 +706,7 @@ private object YarnClasspathTest extends Logging {
       case t: Throwable =>
         error(s"loading test.resource to $resultPath", t)
     } finally {
-      Files.write(result, new File(resultPath), StandardCharsets.UTF_8)
+      Files.asCharSink(new File(resultPath), StandardCharsets.UTF_8).write(result)
     }
   }
 
@@ -751,7 +750,7 @@ private object YarnAddJarTest extends Logging {
         result = "success"
       }
     } finally {
-      Files.write(result, new File(resultPath), StandardCharsets.UTF_8)
+      Files.asCharSink(new File(resultPath), StandardCharsets.UTF_8).write(result)
       sc.stop()
     }
   }
@@ -796,7 +795,7 @@ private object ExecutorEnvTestApp {
       executorEnvs.get(k).contains(v)
     }
 
-    Files.write(result.toString, new File(status), StandardCharsets.UTF_8)
+    Files.asCharSink(new File(status), StandardCharsets.UTF_8).write(result.toString)
     sc.stop()
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
index f745265eddfd9..f8d69c0ae568e 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/deploy/yarn/YarnShuffleIntegrationSuite.scala
@@ -181,7 +181,7 @@ private object YarnExternalShuffleDriver extends Logging with Matchers {
       if (execStateCopy != null) {
         FileUtils.deleteDirectory(execStateCopy)
       }
-      Files.write(result, status, StandardCharsets.UTF_8)
+      Files.asCharSink(status, StandardCharsets.UTF_8).write(result)
     }
   }
 
diff --git a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
index 1e0de9d6db318..aa08024e062f8 100644
--- a/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
+++ b/resource-managers/yarn/src/test/scala/org/apache/spark/scheduler/cluster/YarnSchedulerBackendSuite.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.scheduler.cluster
 
-import java.net.URL
+import java.net.URI
 import java.util.concurrent.atomic.AtomicReference
 
 import jakarta.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
@@ -97,7 +97,7 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
     val sched = mock[TaskSchedulerImpl]
     when(sched.sc).thenReturn(sc)
 
-    val url = new URL(sc.uiWebUrl.get)
+    val url = new URI(sc.uiWebUrl.get).toURL()
     // Before adding the "YARN" filter, should get the code from the filter in SparkConf.
     assert(TestUtils.httpResponseCode(url) === HttpServletResponse.SC_BAD_GATEWAY)
 
@@ -124,10 +124,10 @@ class YarnSchedulerBackendSuite extends SparkFunSuite with MockitoSugar with Loc
 
     sc.ui.get.attachHandler("/new-handler", servlet, "/")
 
-    val newUrl = new URL(sc.uiWebUrl.get + "/new-handler/")
+    val newUrl = new URI(sc.uiWebUrl.get + "/new-handler/").toURL()
     assert(TestUtils.httpResponseCode(newUrl) === HttpServletResponse.SC_NOT_ACCEPTABLE)
 
-    val bypassUrl = new URL(sc.uiWebUrl.get + "/new-handler/?bypass")
+    val bypassUrl = new URI(sc.uiWebUrl.get + "/new-handler/?bypass").toURL()
     assert(TestUtils.httpResponseCode(bypassUrl) === HttpServletResponse.SC_CONFLICT)
   }
 
diff --git a/sbin/start-connect-server.sh b/sbin/start-connect-server.sh
index 668423bad1cbb..7f0c430a468a9 100755
--- a/sbin/start-connect-server.sh
+++ b/sbin/start-connect-server.sh
@@ -33,7 +33,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   echo "Usage: ./sbin/start-connect-server.sh [--wait] [options]"
 
   "${SPARK_HOME}"/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
-  exit 1
+  exit 0
 fi
 
 . "${SPARK_HOME}/bin/load-spark-env.sh"
diff --git a/sbin/start-history-server.sh b/sbin/start-history-server.sh
index 71dace47767cb..a99c8e557885b 100755
--- a/sbin/start-history-server.sh
+++ b/sbin/start-history-server.sh
@@ -40,7 +40,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  exit 0
 fi
 
 . "${SPARK_HOME}/sbin/spark-config.sh"
diff --git a/sbin/start-master.sh b/sbin/start-master.sh
index 36fe4b4abeb91..25e739132f0d5 100755
--- a/sbin/start-master.sh
+++ b/sbin/start-master.sh
@@ -35,7 +35,7 @@ if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  exit 0
 fi
 
 ORIGINAL_ARGS="$@"
diff --git a/sbin/start-thriftserver.sh b/sbin/start-thriftserver.sh
index b1d38713218b7..a457526979341 100755
--- a/sbin/start-thriftserver.sh
+++ b/sbin/start-thriftserver.sh
@@ -52,7 +52,7 @@ function usage {
 
 if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   usage
-  exit 1
+  exit 0
 fi
 
 export SUBMIT_USAGE_FUNCTION=usage
diff --git a/sbin/start-worker.sh b/sbin/start-worker.sh
index fd58f01bac2eb..c0147a51b3f2c 100755
--- a/sbin/start-worker.sh
+++ b/sbin/start-worker.sh
@@ -47,7 +47,7 @@ if [[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
   pattern+="\|Registered signal handler for"
 
   "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
-  exit 1
+  [[ $# -lt 1 ]] && exit 1 || exit 0
 fi
 
 . "${SPARK_HOME}/sbin/spark-config.sh"
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index cd5a576c086fe..05b3f6a268985 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -499,4 +499,9 @@ This file is divided into 3 sections:
       and URIs to and from String. If possible, please use SparkPath.
     ]]></customMessage>
   </check>
+
+  <check customId="URLConstructor" level="error" class="org.scalastyle.file.RegexChecker" enabled="true">
+    <parameters><parameter name="regex">new URL\(</parameter></parameters>
+    <customMessage>Use URI.toURL or URL.of instead of URL constructors.</customMessage>
+  </check>
 </scalastyle>
diff --git a/sql/README.md b/sql/README.md
index ae5ebd1d75370..b04ae0f883b34 100644
--- a/sql/README.md
+++ b/sql/README.md
@@ -3,7 +3,8 @@ Spark SQL
 
 This module provides support for executing relational queries expressed in either SQL or the DataFrame/Dataset API.
 
-Spark SQL is broken up into four subprojects:
+Spark SQL is broken up into five subprojects:
+ - API (sql/api) - Includes some public API like DataType, Row, etc. This component can be shared between Catalyst and Spark Connect client.
  - Catalyst (sql/catalyst) - An implementation-agnostic framework for manipulating trees of relational operators and expressions.
  - Execution (sql/core) - A query planner / execution engine for translating Catalyst's logical query plans into Spark RDDs.  This component also includes a new public interface, SQLContext, that allows users to execute SQL or LINQ statements against existing RDDs and Parquet files.
  - Hive Support (sql/hive) - Includes extensions that allow users to write queries using a subset of HiveQL and access data from a Hive Metastore using Hive SerDes. There are also wrappers that allow users to run queries that include Hive UDFs, UDAFs, and UDTFs.
diff --git a/sql/api/pom.xml b/sql/api/pom.xml
index 9a63f73ab1918..9c50a2567c5fe 100644
--- a/sql/api/pom.xml
+++ b/sql/api/pom.xml
@@ -53,6 +53,17 @@
             <artifactId>spark-unsafe_${scala.binary.version}</artifactId>
             <version>${project.version}</version>
         </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-sketch_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.spark</groupId>
+            <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+            <version>${project.version}</version>
+            <scope>compile</scope>
+        </dependency>
         <dependency>
             <groupId>org.json4s</groupId>
             <artifactId>json4s-jackson_${scala.binary.version}</artifactId>
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index ca0afd34f57cb..ab1098cce6c80 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -120,6 +120,7 @@ BANG: '!';
 //--SPARK-KEYWORD-LIST-START
 ADD: 'ADD';
 AFTER: 'AFTER';
+AGGREGATE: 'AGGREGATE';
 ALL: 'ALL';
 ALTER: 'ALTER';
 ALWAYS: 'ALWAYS';
@@ -146,6 +147,7 @@ BUCKETS: 'BUCKETS';
 BY: 'BY';
 BYTE: 'BYTE';
 CACHE: 'CACHE';
+CALL: 'CALL';
 CALLED: 'CALLED';
 CASCADE: 'CASCADE';
 CASE: 'CASE';
@@ -212,6 +214,7 @@ DIRECTORY: 'DIRECTORY';
 DISTINCT: 'DISTINCT';
 DISTRIBUTE: 'DISTRIBUTE';
 DIV: 'DIV';
+DO: 'DO';
 DOUBLE: 'DOUBLE';
 DROP: 'DROP';
 ELSE: 'ELSE';
@@ -225,6 +228,7 @@ EXCLUDE: 'EXCLUDE';
 EXISTS: 'EXISTS';
 EXPLAIN: 'EXPLAIN';
 EXPORT: 'EXPORT';
+EXTEND: 'EXTEND';
 EXTENDED: 'EXTENDED';
 EXTERNAL: 'EXTERNAL';
 EXTRACT: 'EXTRACT';
@@ -254,12 +258,14 @@ BINARY_HEX: 'X';
 HOUR: 'HOUR';
 HOURS: 'HOURS';
 IDENTIFIER_KW: 'IDENTIFIER';
+IDENTITY: 'IDENTITY';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMMEDIATE: 'IMMEDIATE';
 IMPORT: 'IMPORT';
 IN: 'IN';
 INCLUDE: 'INCLUDE';
+INCREMENT: 'INCREMENT';
 INDEX: 'INDEX';
 INDEXES: 'INDEXES';
 INNER: 'INNER';
@@ -275,6 +281,7 @@ INTO: 'INTO';
 INVOKER: 'INVOKER';
 IS: 'IS';
 ITEMS: 'ITEMS';
+ITERATE: 'ITERATE';
 JOIN: 'JOIN';
 KEYS: 'KEYS';
 LANGUAGE: 'LANGUAGE';
@@ -282,6 +289,7 @@ LAST: 'LAST';
 LATERAL: 'LATERAL';
 LAZY: 'LAZY';
 LEADING: 'LEADING';
+LEAVE: 'LEAVE';
 LEFT: 'LEFT';
 LIKE: 'LIKE';
 ILIKE: 'ILIKE';
@@ -295,6 +303,7 @@ LOCK: 'LOCK';
 LOCKS: 'LOCKS';
 LOGICAL: 'LOGICAL';
 LONG: 'LONG';
+LOOP: 'LOOP';
 MACRO: 'MACRO';
 MAP: 'MAP' {incComplexTypeLevelCounter();};
 MATCHED: 'MATCHED';
@@ -361,6 +370,7 @@ REFERENCES: 'REFERENCES';
 REFRESH: 'REFRESH';
 RENAME: 'RENAME';
 REPAIR: 'REPAIR';
+REPEAT: 'REPEAT';
 REPEATABLE: 'REPEATABLE';
 REPLACE: 'REPLACE';
 RESET: 'RESET';
@@ -450,6 +460,7 @@ UNKNOWN: 'UNKNOWN';
 UNLOCK: 'UNLOCK';
 UNPIVOT: 'UNPIVOT';
 UNSET: 'UNSET';
+UNTIL: 'UNTIL';
 UPDATE: 'UPDATE';
 USE: 'USE';
 USER: 'USER';
@@ -468,6 +479,7 @@ WEEK: 'WEEK';
 WEEKS: 'WEEKS';
 WHEN: 'WHEN';
 WHERE: 'WHERE';
+WHILE: 'WHILE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
 WITHIN: 'WITHIN';
@@ -500,6 +512,7 @@ TILDE: '~';
 AMPERSAND: '&';
 PIPE: '|';
 CONCAT_PIPE: '||';
+OPERATOR_PIPE: '|>';
 HAT: '^';
 COLON: ':';
 DOUBLE_COLON: '::';
diff --git a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 1f2fd1b0896a6..c8fbc6ca2d64a 100644
--- a/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ b/sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -48,7 +48,7 @@ compoundOrSingleStatement
     ;
 
 singleCompoundStatement
-    : beginEndCompoundBlock SEMICOLON? EOF
+    : BEGIN compoundBody END SEMICOLON? EOF
     ;
 
 beginEndCompoundBlock
@@ -63,14 +63,59 @@ compoundStatement
     : statement
     | setStatementWithOptionalVarKeyword
     | beginEndCompoundBlock
+    | ifElseStatement
+    | caseStatement
+    | whileStatement
+    | repeatStatement
+    | leaveStatement
+    | iterateStatement
+    | loopStatement
+    | forStatement
     ;
 
 setStatementWithOptionalVarKeyword
-    : SET (VARIABLE | VAR)? assignmentList                      #setVariableWithOptionalKeyword
-    | SET (VARIABLE | VAR)? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+    : SET variable? assignmentList                              #setVariableWithOptionalKeyword
+    | SET variable? LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
         LEFT_PAREN query RIGHT_PAREN                            #setVariableWithOptionalKeyword
     ;
 
+whileStatement
+    : beginLabel? WHILE booleanExpression DO compoundBody END WHILE endLabel?
+    ;
+
+ifElseStatement
+    : IF booleanExpression THEN conditionalBodies+=compoundBody
+        (ELSE IF booleanExpression THEN conditionalBodies+=compoundBody)*
+        (ELSE elseBody=compoundBody)? END IF
+    ;
+
+repeatStatement
+    : beginLabel? REPEAT compoundBody UNTIL booleanExpression END REPEAT endLabel?
+    ;
+
+leaveStatement
+    : LEAVE multipartIdentifier
+    ;
+
+iterateStatement
+    : ITERATE multipartIdentifier
+    ;
+
+caseStatement
+    : CASE (WHEN conditions+=booleanExpression THEN conditionalBodies+=compoundBody)+
+        (ELSE elseBody=compoundBody)? END CASE                #searchedCaseStatement
+    | CASE caseVariable=expression (WHEN conditionExpressions+=expression THEN conditionalBodies+=compoundBody)+
+        (ELSE elseBody=compoundBody)? END CASE                #simpleCaseStatement
+    ;
+
+loopStatement
+    : beginLabel? LOOP compoundBody END LOOP endLabel?
+    ;
+
+forStatement
+    : beginLabel? FOR (multipartIdentifier AS)? query DO compoundBody END FOR endLabel?
+    ;
+
 singleStatement
     : (statement|setResetStatement) SEMICOLON* EOF
     ;
@@ -113,7 +158,7 @@ statement
     | ctes? dmlStatementNoWith                                         #dmlStatement
     | USE identifierReference                                          #use
     | USE namespace identifierReference                                #useNamespace
-    | SET CATALOG (errorCapturingIdentifier | stringLit)                  #setCatalog
+    | SET CATALOG catalogIdentifierReference                           #setCatalog
     | CREATE namespace (IF errorCapturingNot EXISTS)? identifierReference
         (commentSpec |
          locationSpec |
@@ -215,9 +260,9 @@ statement
         routineCharacteristics
         RETURN (query | expression)                                    #createUserDefinedFunction
     | DROP TEMPORARY? FUNCTION (IF EXISTS)? identifierReference        #dropFunction
-    | DECLARE (OR REPLACE)? VARIABLE?
+    | DECLARE (OR REPLACE)? variable?
         identifierReference dataType? variableDefaultExpression?       #createVariable
-    | DROP TEMPORARY VARIABLE (IF EXISTS)? identifierReference         #dropVariable
+    | DROP TEMPORARY variable (IF EXISTS)? identifierReference         #dropVariable
     | EXPLAIN (LOGICAL | FORMATTED | EXTENDED | CODEGEN | COST)?
         (statement|setResetStatement)                                  #explain
     | SHOW TABLES ((FROM | IN) identifierReference)?
@@ -264,6 +309,10 @@ statement
         LEFT_PAREN columns=multipartIdentifierPropertyList RIGHT_PAREN
         (OPTIONS options=propertyList)?                                #createIndex
     | DROP INDEX (IF EXISTS)? identifier ON TABLE? identifierReference #dropIndex
+    | CALL identifierReference
+        LEFT_PAREN
+        (functionArgument (COMMA functionArgument)*)?
+        RIGHT_PAREN                                                    #call
     | unsupportedHiveNativeCommands .*?                                #failNativeCommand
     ;
 
@@ -273,8 +322,8 @@ setResetStatement
     | SET TIME ZONE interval                                           #setTimeZone
     | SET TIME ZONE timezone                                           #setTimeZone
     | SET TIME ZONE .*?                                                #setTimeZone
-    | SET (VARIABLE | VAR) assignmentList                              #setVariable
-    | SET (VARIABLE | VAR) LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
+    | SET variable assignmentList                                      #setVariable
+    | SET variable LEFT_PAREN multipartIdentifierList RIGHT_PAREN EQ
         LEFT_PAREN query RIGHT_PAREN                                   #setVariable
     | SET configKey EQ configValue                                     #setQuotedConfiguration
     | SET configKey (EQ .*?)?                                          #setConfiguration
@@ -407,9 +456,9 @@ query
     ;
 
 insertInto
-    : INSERT OVERWRITE TABLE? identifierReference (partitionSpec (IF errorCapturingNot EXISTS)?)?  ((BY NAME) | identifierList)? #insertOverwriteTable
-    | INSERT INTO TABLE? identifierReference partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)?   #insertIntoTable
-    | INSERT INTO TABLE? identifierReference REPLACE whereClause                                             #insertIntoReplaceWhere
+    : INSERT OVERWRITE TABLE? identifierReference optionsClause? (partitionSpec (IF errorCapturingNot EXISTS)?)?  ((BY NAME) | identifierList)? #insertOverwriteTable
+    | INSERT INTO TABLE? identifierReference optionsClause? partitionSpec? (IF errorCapturingNot EXISTS)? ((BY NAME) | identifierList)?   #insertIntoTable
+    | INSERT INTO TABLE? identifierReference optionsClause? REPLACE whereClause                                             #insertIntoReplaceWhere
     | INSERT OVERWRITE LOCAL? DIRECTORY path=stringLit rowFormat? createFileFormat?                     #insertOverwriteHiveDir
     | INSERT OVERWRITE LOCAL? DIRECTORY (path=stringLit)? tableProvider (OPTIONS options=propertyList)? #insertOverwriteDir
     ;
@@ -439,6 +488,11 @@ namespaces
     | SCHEMAS
     ;
 
+variable
+    : VARIABLE
+    | VAR
+    ;
+
 describeFuncName
     : identifierReference
     | stringLit
@@ -551,6 +605,12 @@ identifierReference
     | multipartIdentifier
     ;
 
+catalogIdentifierReference
+    : IDENTIFIER_KW LEFT_PAREN expression RIGHT_PAREN
+    | errorCapturingIdentifier
+    | stringLit
+    ;
+
 queryOrganization
     : (ORDER BY order+=sortItem (COMMA order+=sortItem)*)?
       (CLUSTER BY clusterBy+=expression (COMMA clusterBy+=expression)*)?
@@ -573,6 +633,7 @@ queryTerm
         operator=INTERSECT setQuantifier? right=queryTerm                                #setOperation
     | left=queryTerm {!legacy_setops_precedence_enabled}?
         operator=(UNION | EXCEPT | SETMINUS) setQuantifier? right=queryTerm              #setOperation
+    | left=queryTerm OPERATOR_PIPE operatorPipeRightSide                                 #operatorPipeStatement
     ;
 
 queryPrimary
@@ -710,7 +771,7 @@ temporalClause
 aggregationClause
     : GROUP BY groupingExpressionsWithGroupingAnalytics+=groupByClause
         (COMMA groupingExpressionsWithGroupingAnalytics+=groupByClause)*
-    | GROUP BY groupingExpressions+=expression (COMMA groupingExpressions+=expression)* (
+    | GROUP BY groupingExpressions+=namedExpression (COMMA groupingExpressions+=namedExpression)* (
       WITH kind=ROLLUP
     | WITH kind=CUBE
     | kind=GROUPING SETS LEFT_PAREN groupingSet (COMMA groupingSet)* RIGHT_PAREN)?
@@ -1256,7 +1317,22 @@ colDefinitionOption
     ;
 
 generationExpression
-    : GENERATED ALWAYS AS LEFT_PAREN expression RIGHT_PAREN
+    : GENERATED ALWAYS AS LEFT_PAREN expression RIGHT_PAREN     #generatedColumn
+    | GENERATED (ALWAYS | BY DEFAULT) AS IDENTITY identityColSpec? #identityColumn
+    ;
+
+identityColSpec
+    : LEFT_PAREN sequenceGeneratorOption* RIGHT_PAREN
+    ;
+
+sequenceGeneratorOption
+    : START WITH start=sequenceGeneratorStartOrStep
+    | INCREMENT BY step=sequenceGeneratorStartOrStep
+    ;
+
+sequenceGeneratorStartOrStep
+    : MINUS? INTEGER_VALUE
+    | MINUS? BIGINT_LITERAL
     ;
 
 complexColTypeList
@@ -1431,6 +1507,35 @@ version
     | stringLit
     ;
 
+operatorPipeRightSide
+    : selectClause windowClause?
+    | EXTEND extendList=namedExpressionSeq
+    | SET operatorPipeSetAssignmentSeq
+    // Note that the WINDOW clause is not allowed in the WHERE pipe operator, but we add it here in
+    // the grammar simply for purposes of catching this invalid syntax and throwing a specific
+    // dedicated error message.
+    | whereClause windowClause?
+    // The following two cases match the PIVOT or UNPIVOT clause, respectively.
+    // For each one, we add the other clause as an option in order to return high-quality error
+    // messages in the event that both are present (this is not allowed).
+    | pivotClause unpivotClause?
+    | unpivotClause pivotClause?
+    | sample
+    | joinRelation
+    | operator=(UNION | EXCEPT | SETMINUS | INTERSECT) setQuantifier? right=queryTerm
+    | queryOrganization
+    | AGGREGATE namedExpressionSeq? aggregationClause?
+    ;
+
+operatorPipeSetAssignmentSeq
+    : ident+=errorCapturingIdentifier
+        (DOT errorCapturingIdentifier)*  // This is invalid syntax; we just capture it here.
+        EQ expression
+        (COMMA ident+=errorCapturingIdentifier
+          (DOT errorCapturingIdentifier)*  // This is invalid syntax; we just capture it here.
+          EQ expression)*
+    ;
+
 // When `SQL_standard_keyword_behavior=true`, there are 2 kinds of keywords in Spark SQL.
 // - Reserved keywords:
 //     Keywords that are reserved and can't be used as identifiers for table, view, column,
@@ -1445,6 +1550,7 @@ ansiNonReserved
 //--ANSI-NON-RESERVED-START
     : ADD
     | AFTER
+    | AGGREGATE
     | ALTER
     | ALWAYS
     | ANALYZE
@@ -1518,6 +1624,7 @@ ansiNonReserved
     | DIRECTORY
     | DISTRIBUTE
     | DIV
+    | DO
     | DOUBLE
     | DROP
     | ESCAPED
@@ -1527,6 +1634,7 @@ ansiNonReserved
     | EXISTS
     | EXPLAIN
     | EXPORT
+    | EXTEND
     | EXTENDED
     | EXTERNAL
     | EXTRACT
@@ -1545,11 +1653,13 @@ ansiNonReserved
     | HOUR
     | HOURS
     | IDENTIFIER_KW
+    | IDENTITY
     | IF
     | IGNORE
     | IMMEDIATE
     | IMPORT
     | INCLUDE
+    | INCREMENT
     | INDEX
     | INDEXES
     | INPATH
@@ -1561,10 +1671,12 @@ ansiNonReserved
     | INTERVAL
     | INVOKER
     | ITEMS
+    | ITERATE
     | KEYS
     | LANGUAGE
     | LAST
     | LAZY
+    | LEAVE
     | LIKE
     | ILIKE
     | LIMIT
@@ -1577,6 +1689,7 @@ ansiNonReserved
     | LOCKS
     | LOGICAL
     | LONG
+    | LOOP
     | MACRO
     | MAP
     | MATCHED
@@ -1631,6 +1744,7 @@ ansiNonReserved
     | REFRESH
     | RENAME
     | REPAIR
+    | REPEAT
     | REPEATABLE
     | REPLACE
     | RESET
@@ -1706,6 +1820,7 @@ ansiNonReserved
     | UNLOCK
     | UNPIVOT
     | UNSET
+    | UNTIL
     | UPDATE
     | USE
     | VALUES
@@ -1720,6 +1835,7 @@ ansiNonReserved
     | VOID
     | WEEK
     | WEEKS
+    | WHILE
     | WINDOW
     | YEAR
     | YEARS
@@ -1759,6 +1875,7 @@ nonReserved
 //--DEFAULT-NON-RESERVED-START
     : ADD
     | AFTER
+    | AGGREGATE
     | ALL
     | ALTER
     | ALWAYS
@@ -1785,6 +1902,7 @@ nonReserved
     | BY
     | BYTE
     | CACHE
+    | CALL
     | CALLED
     | CASCADE
     | CASE
@@ -1850,6 +1968,7 @@ nonReserved
     | DISTINCT
     | DISTRIBUTE
     | DIV
+    | DO
     | DOUBLE
     | DROP
     | ELSE
@@ -1863,6 +1982,7 @@ nonReserved
     | EXISTS
     | EXPLAIN
     | EXPORT
+    | EXTEND
     | EXTENDED
     | EXTERNAL
     | EXTRACT
@@ -1890,12 +2010,14 @@ nonReserved
     | HOUR
     | HOURS
     | IDENTIFIER_KW
+    | IDENTITY
     | IF
     | IGNORE
     | IMMEDIATE
     | IMPORT
     | IN
     | INCLUDE
+    | INCREMENT
     | INDEX
     | INDEXES
     | INPATH
@@ -1909,11 +2031,13 @@ nonReserved
     | INVOKER
     | IS
     | ITEMS
+    | ITERATE
     | KEYS
     | LANGUAGE
     | LAST
     | LAZY
     | LEADING
+    | LEAVE
     | LIKE
     | LONG
     | ILIKE
@@ -1927,6 +2051,7 @@ nonReserved
     | LOCKS
     | LOGICAL
     | LONG
+    | LOOP
     | MACRO
     | MAP
     | MATCHED
@@ -1991,6 +2116,7 @@ nonReserved
     | REFRESH
     | RENAME
     | REPAIR
+    | REPEAT
     | REPEATABLE
     | REPLACE
     | RESET
@@ -2075,6 +2201,7 @@ nonReserved
     | UNLOCK
     | UNPIVOT
     | UNSET
+    | UNTIL
     | UPDATE
     | USE
     | USER
@@ -2090,6 +2217,7 @@ nonReserved
     | VOID
     | WEEK
     | WEEKS
+    | WHILE
     | WHEN
     | WHERE
     | WINDOW
diff --git a/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentityColumnSpec.java b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentityColumnSpec.java
new file mode 100644
index 0000000000000..4a8943736bd31
--- /dev/null
+++ b/sql/api/src/main/java/org/apache/spark/sql/connector/catalog/IdentityColumnSpec.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+import org.apache.spark.annotation.Evolving;
+
+import java.util.Objects;
+
+/**
+ * Identity column specification.
+ */
+@Evolving
+public class IdentityColumnSpec {
+    private final long start;
+    private final long step;
+    private final boolean allowExplicitInsert;
+
+    /**
+     * Creates an identity column specification.
+     * @param start the start value to generate the identity values
+     * @param step the step value to generate the identity values
+     * @param allowExplicitInsert whether the identity column allows explicit insertion of values
+     */
+    public IdentityColumnSpec(long start, long step, boolean allowExplicitInsert) {
+      this.start = start;
+      this.step = step;
+      this.allowExplicitInsert = allowExplicitInsert;
+    }
+
+    /**
+     * @return the start value to generate the identity values
+     */
+    public long getStart() {
+      return start;
+    }
+
+    /**
+     * @return the step value to generate the identity values
+     */
+    public long getStep() {
+      return step;
+    }
+
+    /**
+     * @return whether the identity column allows explicit insertion of values
+     */
+    public boolean isAllowExplicitInsert() {
+      return allowExplicitInsert;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+      if (this == o) return true;
+      if (o == null || getClass() != o.getClass()) return false;
+      IdentityColumnSpec that = (IdentityColumnSpec) o;
+      return start == that.start &&
+              step == that.step &&
+              allowExplicitInsert == that.allowExplicitInsert;
+    }
+
+    @Override
+    public int hashCode() {
+      return Objects.hash(start, step, allowExplicitInsert);
+    }
+
+    @Override
+    public String toString() {
+      return "IdentityColumnSpec{" +
+              "start=" + start +
+              ", step=" + step +
+              ", allowExplicitInsert=" + allowExplicitInsert +
+              "}";
+    }
+}
diff --git a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java b/sql/api/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
similarity index 88%
rename from sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
rename to sql/api/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
index e1e4ba4c8e0dc..91a1231ec0303 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
+++ b/sql/api/src/main/java/org/apache/spark/sql/expressions/javalang/typed.java
@@ -19,13 +19,13 @@
 
 import org.apache.spark.api.java.function.MapFunction;
 import org.apache.spark.sql.TypedColumn;
-import org.apache.spark.sql.execution.aggregate.TypedAverage;
-import org.apache.spark.sql.execution.aggregate.TypedCount;
-import org.apache.spark.sql.execution.aggregate.TypedSumDouble;
-import org.apache.spark.sql.execution.aggregate.TypedSumLong;
+import org.apache.spark.sql.internal.TypedAverage;
+import org.apache.spark.sql.internal.TypedCount;
+import org.apache.spark.sql.internal.TypedSumDouble;
+import org.apache.spark.sql.internal.TypedSumLong;
 
 /**
- * Type-safe functions available for {@link org.apache.spark.sql.Dataset} operations in Java.
+ * Type-safe functions available for {@link org.apache.spark.sql.api.Dataset} operations in Java.
  *
  * Scala users should use {@link org.apache.spark.sql.expressions.scalalang.typed}.
  *
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/AnalysisException.scala b/sql/api/src/main/scala/org/apache/spark/sql/AnalysisException.scala
index 7a428f6cc3288..51825ee1a5bed 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/AnalysisException.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/AnalysisException.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.trees.{Origin, WithOrigin}
  * @since 1.3.0
  */
 @Stable
-class AnalysisException protected(
+class AnalysisException protected (
     val message: String,
     val line: Option[Int] = None,
     val startPosition: Option[Int] = None,
@@ -37,12 +37,12 @@ class AnalysisException protected(
     val errorClass: Option[String] = None,
     val messageParameters: Map[String, String] = Map.empty,
     val context: Array[QueryContext] = Array.empty)
-  extends Exception(message, cause.orNull) with SparkThrowable with Serializable with WithOrigin {
+    extends Exception(message, cause.orNull)
+    with SparkThrowable
+    with Serializable
+    with WithOrigin {
 
-  def this(
-      errorClass: String,
-      messageParameters: Map[String, String],
-      cause: Option[Throwable]) =
+  def this(errorClass: String, messageParameters: Map[String, String], cause: Option[Throwable]) =
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       errorClass = Some(errorClass),
@@ -73,18 +73,10 @@ class AnalysisException protected(
       cause = null,
       context = context)
 
-  def this(
-      errorClass: String,
-      messageParameters: Map[String, String]) =
-    this(
-      errorClass = errorClass,
-      messageParameters = messageParameters,
-      cause = None)
+  def this(errorClass: String, messageParameters: Map[String, String]) =
+    this(errorClass = errorClass, messageParameters = messageParameters, cause = None)
 
-  def this(
-      errorClass: String,
-      messageParameters: Map[String, String],
-      origin: Origin) =
+  def this(errorClass: String, messageParameters: Map[String, String], origin: Origin) =
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       line = origin.line,
@@ -115,8 +107,14 @@ class AnalysisException protected(
       errorClass: Option[String] = this.errorClass,
       messageParameters: Map[String, String] = this.messageParameters,
       context: Array[QueryContext] = this.context): AnalysisException =
-    new AnalysisException(message, line, startPosition, cause, errorClass,
-      messageParameters, context)
+    new AnalysisException(
+      message,
+      line,
+      startPosition,
+      cause,
+      errorClass,
+      messageParameters,
+      context)
 
   def withPosition(origin: Origin): AnalysisException = {
     val newException = this.copy(
@@ -141,7 +139,7 @@ class AnalysisException protected(
 
   override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
 
-  override def getErrorClass: String = errorClass.orNull
+  override def getCondition: String = errorClass.orNull
 
   override def getQueryContext: Array[QueryContext] = context
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Artifact.scala b/sql/api/src/main/scala/org/apache/spark/sql/Artifact.scala
new file mode 100644
index 0000000000000..7e020df06fe47
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Artifact.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.{ByteArrayInputStream, InputStream, PrintStream}
+import java.net.URI
+import java.nio.file.{Files, Path, Paths}
+
+import org.apache.commons.lang3.StringUtils
+
+import org.apache.spark.sql.Artifact.LocalData
+import org.apache.spark.sql.util.ArtifactUtils
+import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.MavenUtils
+
+private[sql] class Artifact private (val path: Path, val storage: LocalData) {
+  require(!path.isAbsolute, s"Bad path: $path")
+
+  lazy val size: Long = storage match {
+    case localData: LocalData => localData.size
+  }
+}
+
+private[sql] object Artifact {
+  val CLASS_PREFIX: Path = Paths.get("classes")
+  val JAR_PREFIX: Path = Paths.get("jars")
+  val CACHE_PREFIX: Path = Paths.get("cache")
+
+  def newArtifactFromExtension(
+      fileName: String,
+      targetFilePath: Path,
+      storage: LocalData): Artifact = {
+    fileName match {
+      case jar if jar.endsWith(".jar") =>
+        newJarArtifact(targetFilePath, storage)
+      case cf if cf.endsWith(".class") =>
+        newClassArtifact(targetFilePath, storage)
+      case other =>
+        throw new UnsupportedOperationException(s"Unsupported file format: $other")
+    }
+  }
+
+  def parseArtifacts(uri: URI): Seq[Artifact] = {
+    // Currently only local files with extensions .jar and .class are supported.
+    uri.getScheme match {
+      case "file" =>
+        val path = Paths.get(uri)
+        val artifact = Artifact.newArtifactFromExtension(
+          path.getFileName.toString,
+          path.getFileName,
+          new LocalFile(path))
+        Seq[Artifact](artifact)
+
+      case "ivy" =>
+        newIvyArtifacts(uri)
+
+      case other =>
+        throw new UnsupportedOperationException(s"Unsupported scheme: $other")
+    }
+  }
+
+  def newJarArtifact(targetFilePath: Path, storage: LocalData): Artifact = {
+    newArtifact(JAR_PREFIX, ".jar", targetFilePath, storage)
+  }
+
+  def newClassArtifact(targetFilePath: Path, storage: LocalData): Artifact = {
+    newArtifact(CLASS_PREFIX, ".class", targetFilePath, storage)
+  }
+
+  def newCacheArtifact(id: String, storage: LocalData): Artifact = {
+    newArtifact(CACHE_PREFIX, "", Paths.get(id), storage)
+  }
+
+  def newIvyArtifacts(uri: URI): Seq[Artifact] = {
+    implicit val printStream: PrintStream = System.err
+
+    val authority = uri.getAuthority
+    if (authority == null) {
+      throw new IllegalArgumentException(
+        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
+          " Expected 'org:module:version', found null.")
+    }
+    if (authority.split(":").length != 3) {
+      throw new IllegalArgumentException(
+        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
+          s" Expected 'org:module:version', found $authority.")
+    }
+
+    val (transitive, exclusions, repos) = MavenUtils.parseQueryParams(uri)
+
+    val exclusionsList: Seq[String] =
+      if (!StringUtils.isBlank(exclusions)) {
+        exclusions.split(",").toImmutableArraySeq
+      } else {
+        Nil
+      }
+
+    val ivySettings = MavenUtils.buildIvySettings(Some(repos), None)
+
+    val jars = MavenUtils.resolveMavenCoordinates(
+      authority,
+      ivySettings,
+      transitive = transitive,
+      exclusions = exclusionsList)
+    jars.map(p => Paths.get(p)).map(path => newJarArtifact(path.getFileName, new LocalFile(path)))
+  }
+
+  private def newArtifact(
+      prefix: Path,
+      requiredSuffix: String,
+      targetFilePath: Path,
+      storage: LocalData): Artifact = {
+    require(targetFilePath.toString.endsWith(requiredSuffix))
+    new Artifact(ArtifactUtils.concatenatePaths(prefix, targetFilePath), storage)
+  }
+
+  /**
+   * Payload stored on this machine.
+   */
+  sealed trait LocalData {
+    def stream: InputStream
+
+    def size: Long
+  }
+
+  /**
+   * Payload stored in a local file.
+   */
+  class LocalFile(val path: Path) extends LocalData {
+    override def size: Long = Files.size(path)
+
+    override def stream: InputStream = Files.newInputStream(path)
+  }
+
+  /**
+   * Payload stored in memory.
+   */
+  class InMemory(bytes: Array[Byte]) extends LocalData {
+    override def size: Long = bytes.length
+
+    override def stream: InputStream = new ByteArrayInputStream(bytes)
+  }
+
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala b/sql/api/src/main/scala/org/apache/spark/sql/Column.scala
similarity index 78%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/Column.scala
index 3562675898224..50ef61d4a7a19 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Column.scala
@@ -14,23 +14,101 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.annotation.{DeveloperApi, Since}
-import org.apache.spark.connect.proto
-import org.apache.spark.connect.proto.Expression.SortOrder.NullOrdering
-import org.apache.spark.connect.proto.Expression.SortOrder.SortDirection
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.annotation.Stable
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{LEFT_EXPR, RIGHT_EXPR}
 import org.apache.spark.sql.catalyst.parser.DataTypeParser
-import org.apache.spark.sql.connect.common.DataTypeProtoConverter
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions.{lit, map}
+import org.apache.spark.sql.internal.ColumnNode
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
 
+private[spark] object Column {
+
+  def apply(colName: String): Column = new Column(colName)
+
+  def apply(node: => ColumnNode): Column = withOrigin(new Column(node))
+
+  /**
+   * Invoke a function with an options map as its last argument. If there are no options, its
+   * column is dropped.
+   */
+  private[sql] def fnWithOptions(
+      name: String,
+      options: Iterator[(String, String)],
+      arguments: Column*): Column = {
+    val augmentedArguments = if (options.hasNext) {
+      val flattenedKeyValueIterator = options.flatMap { case (k, v) =>
+        Iterator(lit(k), lit(v))
+      }
+      arguments :+ map(flattenedKeyValueIterator.toSeq: _*)
+    } else {
+      arguments
+    }
+    Column.fn(name, augmentedArguments: _*)
+  }
+
+  private[spark] def fn(name: String, inputs: Column*): Column = {
+    fn(name, isDistinct = false, inputs: _*)
+  }
+
+  private[spark] def fn(name: String, isDistinct: Boolean, inputs: Column*): Column = {
+    fn(name, isDistinct = isDistinct, isInternal = false, inputs)
+  }
+
+  private[spark] def internalFn(name: String, inputs: Column*): Column = {
+    fn(name, isDistinct = false, isInternal = true, inputs)
+  }
+
+  private def fn(
+      name: String,
+      isDistinct: Boolean,
+      isInternal: Boolean,
+      inputs: Seq[Column]): Column = withOrigin {
+    Column(
+      internal.UnresolvedFunction(
+        name,
+        inputs.map(_.node),
+        isDistinct = isDistinct,
+        isInternal = isInternal))
+  }
+}
+
+/**
+ * A [[Column]] where an [[Encoder]] has been given for the expected input and return type. To
+ * create a [[TypedColumn]], use the `as` function on a [[Column]].
+ *
+ * @tparam T
+ *   The input type expected for this expression. Can be `Any` if the expression is type checked
+ *   by the analyzer instead of the compiler (i.e. `expr("sum(...)")`).
+ * @tparam U
+ *   The output type of this column.
+ *
+ * @since 1.6.0
+ */
+@Stable
+class TypedColumn[-T, U](node: ColumnNode, private[sql] val encoder: Encoder[U])
+    extends Column(node) {
+
+  /**
+   * Gives the [[TypedColumn]] a name (alias). If the current `TypedColumn` has metadata
+   * associated with it, this metadata will be propagated to the new column.
+   *
+   * @group expr_ops
+   * @since 2.0.0
+   */
+  override def name(alias: String): TypedColumn[T, U] =
+    new TypedColumn[T, U](super.name(alias).node, encoder)
+
+}
+
 /**
  * A column that will be computed based on the data in a `DataFrame`.
  *
@@ -48,57 +126,67 @@ import org.apache.spark.util.ArrayImplicits._
  *
  * {{{
  *   $"a" + 1
+ *   $"a" === $"b"
  * }}}
  *
- * @since 3.4.0
+ * @groupname java_expr_ops Java-specific expression operators
+ * @groupname expr_ops Expression operators
+ * @groupname df_ops DataFrame functions
+ * @groupname Ungrouped Support functions for DataFrames
+ *
+ * @since 1.3.0
  */
-class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
-
-  private[sql] def this(name: String, planId: Option[Long]) =
-    this(Column.nameToExpression(name, planId))
+@Stable
+class Column(val node: ColumnNode) extends Logging {
+  private[sql] def this(name: String, planId: Option[Long]) = this(withOrigin {
+    name match {
+      case "*" => internal.UnresolvedStar(None, planId)
+      case _ if name.endsWith(".*") => internal.UnresolvedStar(Option(name), planId)
+      case _ => internal.UnresolvedAttribute(name, planId)
+    }
+  })
 
-  private[sql] def this(name: String) =
-    this(name, None)
+  def this(name: String) = this(name, None)
 
-  private def fn(name: String): Column = Column.fn(name, this)
-  private def fn(name: String, other: Column): Column = Column.fn(name, this, other)
-  private def fn(name: String, other: Any): Column = Column.fn(name, this, lit(other))
+  private def fn(name: String): Column = {
+    Column.fn(name, this)
+  }
+  private def fn(name: String, other: Column): Column = {
+    Column.fn(name, this, other)
+  }
+  private def fn(name: String, other: Any): Column = {
+    Column.fn(name, this, lit(other))
+  }
 
-  override def toString: String = expr.toString
+  override def toString: String = node.sql
 
   override def equals(that: Any): Boolean = that match {
-    case that: Column => expr == that.expr
+    case that: Column => that.node.normalized == this.node.normalized
     case _ => false
   }
 
-  override def hashCode: Int = expr.hashCode()
+  override def hashCode: Int = this.node.normalized.hashCode()
 
   /**
    * Provides a type hint about the expected return value of this column. This information can be
    * used by operations such as `select` on a [[Dataset]] to automatically convert the results
    * into the correct JVM types.
-   * @since 3.4.0
+   * @since 1.6.0
    */
-  def as[U: Encoder]: TypedColumn[Any, U] = {
-    val encoder = implicitly[Encoder[U]].asInstanceOf[AgnosticEncoder[U]]
-    new TypedColumn[Any, U](expr, encoder)
-  }
+  def as[U: Encoder]: TypedColumn[Any, U] = new TypedColumn[Any, U](node, implicitly[Encoder[U]])
 
   /**
    * Extracts a value or values from a complex type. The following types of extraction are
-   * supported:
-   *   - Given an Array, an integer ordinal can be used to retrieve a single value.
-   *   - Given a Map, a key of the correct type can be used to retrieve an individual value.
-   *   - Given a Struct, a string fieldName can be used to extract that field.
-   *   - Given an Array of Structs, a string fieldName can be used to extract filed of every
-   *     struct in that array, and return an Array of fields.
-   * @group expr_ops
-   * @since 3.4.0
-   */
-  def apply(extraction: Any): Column = Column { builder =>
-    builder.getUnresolvedExtractValueBuilder
-      .setChild(expr)
-      .setExtraction(lit(extraction).expr)
+   * supported: <ul> <li>Given an Array, an integer ordinal can be used to retrieve a single
+   * value.</li> <li>Given a Map, a key of the correct type can be used to retrieve an individual
+   * value.</li> <li>Given a Struct, a string fieldName can be used to extract that field.</li>
+   * <li>Given an Array of Structs, a string fieldName can be used to extract filed of every
+   * struct in that array, and return an Array of fields.</li> </ul>
+   * @group expr_ops
+   * @since 1.4.0
+   */
+  def apply(extraction: Any): Column = Column {
+    internal.UnresolvedExtractValue(node, lit(extraction).node)
   }
 
   /**
@@ -113,7 +201,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def unary_- : Column = fn("negative")
 
@@ -129,7 +217,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def unary_! : Column = fn("!")
 
@@ -145,9 +233,22 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def ===(other: Any): Column = fn("=", other)
+  def ===(other: Any): Column = {
+    val right = lit(other)
+    checkTrivialPredicate(right)
+    fn("=", other)
+  }
+
+  private def checkTrivialPredicate(right: Column): Unit = {
+    if (this == right) {
+      logWarning(
+        log"Constructing trivially true equals predicate, " +
+          log"'${MDC(LEFT_EXPR, this)} == ${MDC(RIGHT_EXPR, right)}'. " +
+          log"Perhaps you need to use aliases.")
+    }
+  }
 
   /**
    * Equality test.
@@ -161,7 +262,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def equalTo(other: Any): Column = this === other
 
@@ -178,7 +279,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def =!=(other: Any): Column = !(this === other)
 
@@ -195,7 +296,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("!== does not have the same precedence as ===, use =!= instead", "2.0.0")
   def !==(other: Any): Column = this =!= other
@@ -213,7 +314,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def notEqual(other: Any): Column = this =!= other
 
@@ -229,7 +330,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def >(other: Any): Column = fn(">", other)
 
@@ -245,7 +346,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def gt(other: Any): Column = this > other
 
@@ -260,7 +361,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def <(other: Any): Column = fn("<", other)
 
@@ -275,7 +376,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def lt(other: Any): Column = this < other
 
@@ -290,7 +391,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def <=(other: Any): Column = fn("<=", other)
 
@@ -305,7 +406,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def leq(other: Any): Column = this <= other
 
@@ -320,7 +421,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def >=(other: Any): Column = fn(">=", other)
 
@@ -335,7 +436,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def geq(other: Any): Column = this >= other
 
@@ -343,33 +444,22 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * Equality test that is safe for null values.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def <=>(other: Any): Column = fn("<=>", other)
+  def <=>(other: Any): Column = {
+    val right = lit(other)
+    checkTrivialPredicate(right)
+    fn("<=>", right)
+  }
 
   /**
    * Equality test that is safe for null values.
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def eqNullSafe(other: Any): Column = this <=> other
 
-  private def extractWhen(name: String): java.util.List[proto.Expression] = {
-    def fail(): Nothing = {
-      throw new IllegalArgumentException(
-        s"$name() can only be applied on a Column previously generated by when() function")
-    }
-    if (!expr.hasUnresolvedFunction) {
-      fail()
-    }
-    val parentFn = expr.getUnresolvedFunction
-    if (parentFn.getFunctionName != "when") {
-      fail()
-    }
-    parentFn.getArgumentsList
-  }
-
   /**
    * Evaluates a list of conditions and returns one of multiple possible result expressions. If
    * otherwise is not defined at the end, null is returned for unmatched conditions.
@@ -389,19 +479,17 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
-   */
-  def when(condition: Column, value: Any): Column = {
-    val expressions = extractWhen("when")
-    if (expressions.size() % 2 == 1) {
-      throw new IllegalArgumentException("when() cannot be applied once otherwise() is applied")
-    }
-    Column { builder =>
-      builder.getUnresolvedFunctionBuilder
-        .setFunctionName("when")
-        .addAllArguments(expressions)
-        .addArguments(condition.expr)
-        .addArguments(lit(value).expr)
+   * @since 1.4.0
+   */
+  def when(condition: Column, value: Any): Column = Column {
+    node match {
+      case internal.CaseWhenOtherwise(branches, None, _) =>
+        internal.CaseWhenOtherwise(branches :+ ((condition.node, lit(value).node)), None)
+      case internal.CaseWhenOtherwise(_, Some(_), _) =>
+        throw new IllegalArgumentException("when() cannot be applied once otherwise() is applied")
+      case _ =>
+        throw new IllegalArgumentException(
+          "when() can only be applied on a Column previously generated by when() function")
     }
   }
 
@@ -424,19 +512,18 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
-   */
-  def otherwise(value: Any): Column = {
-    val expressions = extractWhen("otherwise")
-    if (expressions.size() % 2 == 1) {
-      throw new IllegalArgumentException(
-        "otherwise() can only be applied once on a Column previously generated by when()")
-    }
-    Column { builder =>
-      builder.getUnresolvedFunctionBuilder
-        .setFunctionName("when")
-        .addAllArguments(expressions)
-        .addArguments(lit(value).expr)
+   * @since 1.4.0
+   */
+  def otherwise(value: Any): Column = Column {
+    node match {
+      case internal.CaseWhenOtherwise(branches, None, _) =>
+        internal.CaseWhenOtherwise(branches, Option(lit(value).node))
+      case internal.CaseWhenOtherwise(_, Some(_), _) =>
+        throw new IllegalArgumentException(
+          "otherwise() can only be applied once on a Column previously generated by when()")
+      case _ =>
+        throw new IllegalArgumentException(
+          "otherwise() can only be applied on a Column previously generated by when()")
     }
   }
 
@@ -444,7 +531,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * True if the current column is between the lower bound and upper bound, inclusive.
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def between(lowerBound: Any, upperBound: Any): Column = {
     (this >= lowerBound) && (this <= upperBound)
@@ -454,7 +541,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * True if the current expression is NaN.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def isNaN: Column = fn("isNaN")
 
@@ -462,7 +549,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * True if the current expression is null.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def isNull: Column = fn("isNull")
 
@@ -470,7 +557,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * True if the current expression is NOT null.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def isNotNull: Column = fn("isNotNull")
 
@@ -485,7 +572,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def ||(other: Any): Column = fn("or", other)
 
@@ -500,7 +587,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def or(other: Column): Column = this || other
 
@@ -515,7 +602,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def &&(other: Any): Column = fn("and", other)
 
@@ -530,7 +617,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def and(other: Column): Column = this && other
 
@@ -545,7 +632,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def +(other: Any): Column = fn("+", other)
 
@@ -560,7 +647,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def plus(other: Any): Column = this + other
 
@@ -575,7 +662,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def -(other: Any): Column = fn("-", other)
 
@@ -590,7 +677,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def minus(other: Any): Column = this - other
 
@@ -605,7 +692,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def *(other: Any): Column = fn("*", other)
 
@@ -620,7 +707,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def multiply(other: Any): Column = this * other
 
@@ -635,7 +722,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def /(other: Any): Column = fn("/", other)
 
@@ -650,7 +737,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def divide(other: Any): Column = this / other
 
@@ -658,7 +745,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def %(other: Any): Column = fn("%", other)
 
@@ -666,7 +753,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * Modulo (a.k.a. remainder) expression.
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def mod(other: Any): Column = this % other
 
@@ -681,7 +768,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * "Double" and the comparison will look like "Double vs Double"
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def isin(list: Any*): Column = Column.fn("in", this +: list.map(lit): _*)
@@ -697,7 +784,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * to "Double" and the comparison will look like "Double vs Double"
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def isInCollection(values: scala.collection.Iterable[_]): Column = isin(values.toSeq: _*)
 
@@ -712,7 +799,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * to "Double" and the comparison will look like "Double vs Double"
    *
    * @group java_expr_ops
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def isInCollection(values: java.lang.Iterable[_]): Column = isInCollection(values.asScala)
 
@@ -720,7 +807,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * SQL like expression. Returns a boolean column based on a SQL LIKE match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def like(literal: String): Column = fn("like", literal)
 
@@ -728,7 +815,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * SQL RLIKE expression (LIKE with Regex). Returns a boolean column based on a regex match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def rlike(literal: String): Column = fn("rlike", literal)
 
@@ -736,7 +823,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * SQL ILIKE expression (case insensitive LIKE).
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def ilike(literal: String): Column = fn("ilike", literal)
 
@@ -745,7 +832,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * `key` in a `MapType`.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def getItem(key: Any): Column = apply(key)
 
@@ -797,18 +884,13 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 3.1.0
    */
   // scalastyle:on line.size.limit
   def withField(fieldName: String, col: Column): Column = {
     require(fieldName != null, "fieldName cannot be null")
     require(col != null, "col cannot be null")
-    Column { builder =>
-      builder.getUpdateFieldsBuilder
-        .setStructExpression(expr)
-        .setFieldName(fieldName)
-        .setValueExpression(col.expr)
-    }
+    Column(internal.UpdateFields(node, fieldName, Option(col.node)))
   }
 
   // scalastyle:off line.size.limit
@@ -868,16 +950,12 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 3.1.0
    */
   // scalastyle:on line.size.limit
-  def dropFields(fieldNames: String*): Column = {
-    fieldNames.foldLeft(this) { case (column, fieldName) =>
-      Column { builder =>
-        builder.getUpdateFieldsBuilder
-          .setStructExpression(column.expr)
-          .setFieldName(fieldName)
-      }
+  def dropFields(fieldNames: String*): Column = Column {
+    fieldNames.tail.foldLeft(internal.UpdateFields(node, fieldNames.head)) {
+      (resExpr, fieldName) => internal.UpdateFields(resExpr, fieldName)
     }
   }
 
@@ -885,7 +963,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * An expression that gets a field by name in a `StructType`.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def getField(fieldName: String): Column = apply(fieldName)
 
@@ -897,7 +975,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    *   expression for the length of the substring.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def substr(startPos: Column, len: Column): Column = Column.fn("substr", this, startPos, len)
 
@@ -909,7 +987,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    *   length of the substring.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def substr(startPos: Int, len: Int): Column = substr(lit(startPos), lit(len))
 
@@ -917,7 +995,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * Contains the other element. Returns a boolean column based on a string match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def contains(other: Any): Column = fn("contains", other)
 
@@ -925,7 +1003,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * String starts with. Returns a boolean column based on a string match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def startsWith(other: Column): Column = fn("startswith", other)
 
@@ -933,7 +1011,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * String starts with another string literal. Returns a boolean column based on a string match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def startsWith(literal: String): Column = startsWith(lit(literal))
 
@@ -941,7 +1019,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * String ends with. Returns a boolean column based on a string match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def endsWith(other: Column): Column = fn("endswith", other)
 
@@ -949,7 +1027,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * String ends with another string literal. Returns a boolean column based on a string match.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def endsWith(literal: String): Column = endsWith(lit(literal))
 
@@ -961,7 +1039,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def alias(alias: String): Column = name(alias)
 
@@ -977,7 +1055,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * explicit metadata.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def as(alias: String): Column = name(alias)
 
@@ -989,11 +1067,9 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def as(aliases: Seq[String]): Column = Column { builder =>
-    builder.getAliasBuilder.setExpr(expr).addAllName(aliases.asJava)
-  }
+  def as(aliases: Seq[String]): Column = Column(internal.Alias(node, aliases))
 
   /**
    * Assigns the given aliases to the results of a table generating function.
@@ -1003,7 +1079,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def as(aliases: Array[String]): Column = as(aliases.toImmutableArraySeq)
 
@@ -1019,7 +1095,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * explicit metadata.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def as(alias: Symbol): Column = name(alias.name)
 
@@ -1031,14 +1107,10 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def as(alias: String, metadata: Metadata): Column = Column { builder =>
-    builder.getAliasBuilder
-      .setExpr(expr)
-      .addName(alias)
-      .setMetadata(metadata.json)
-  }
+  def as(alias: String, metadata: Metadata): Column =
+    Column(internal.Alias(node, alias :: Nil, metadata = Option(metadata)))
 
   /**
    * Gives the column a name (alias).
@@ -1052,9 +1124,9 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * explicit metadata.
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def name(alias: String): Column = as(alias :: Nil)
+  def name(alias: String): Column = Column(internal.Alias(node, alias :: Nil))
 
   /**
    * Casts the column to a different data type.
@@ -1068,13 +1140,9 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def cast(to: DataType): Column = Column { builder =>
-    builder.getCastBuilder
-      .setExpr(expr)
-      .setType(DataTypeProtoConverter.toConnectProtoType(to))
-  }
+  def cast(to: DataType): Column = Column(internal.Cast(node, to))
 
   /**
    * Casts the column to a different data type, using the canonical string representation of the
@@ -1086,7 +1154,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def cast(to: String): Column = cast(DataTypeParser.parseDataType(to))
 
@@ -1104,11 +1172,8 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * @group expr_ops
    * @since 4.0.0
    */
-  def try_cast(to: DataType): Column = Column { builder =>
-    builder.getCastBuilder
-      .setExpr(expr)
-      .setType(DataTypeProtoConverter.toConnectProtoType(to))
-      .setEvalMode(proto.Expression.Cast.EvalMode.EVAL_MODE_TRY)
+  def try_cast(to: DataType): Column = {
+    Column(internal.Cast(node, to, Option(internal.Cast.Try)))
   }
 
   /**
@@ -1125,6 +1190,17 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
     try_cast(DataTypeParser.parseDataType(to))
   }
 
+  private def sortOrder(
+      sortDirection: internal.SortOrder.SortDirection,
+      nullOrdering: internal.SortOrder.NullOrdering): Column = {
+    Column(internal.SortOrder(node, sortDirection, nullOrdering))
+  }
+
+  private[sql] def sortOrder: internal.SortOrder = node match {
+    case order: internal.SortOrder => order
+    case _ => asc.node.asInstanceOf[internal.SortOrder]
+  }
+
   /**
    * Returns a sort expression based on the descending order of the column.
    * {{{
@@ -1136,7 +1212,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def desc: Column = desc_nulls_last
 
@@ -1152,10 +1228,10 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def desc_nulls_first: Column =
-    buildSortOrder(SortDirection.SORT_DIRECTION_DESCENDING, NullOrdering.SORT_NULLS_FIRST)
+    sortOrder(internal.SortOrder.Descending, internal.SortOrder.NullsFirst)
 
   /**
    * Returns a sort expression based on the descending order of the column, and null values appear
@@ -1169,10 +1245,10 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def desc_nulls_last: Column =
-    buildSortOrder(SortDirection.SORT_DIRECTION_DESCENDING, NullOrdering.SORT_NULLS_LAST)
+    sortOrder(internal.SortOrder.Descending, internal.SortOrder.NullsLast)
 
   /**
    * Returns a sort expression based on ascending order of the column.
@@ -1185,7 +1261,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def asc: Column = asc_nulls_first
 
@@ -1201,10 +1277,10 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def asc_nulls_first: Column =
-    buildSortOrder(SortDirection.SORT_DIRECTION_ASCENDING, NullOrdering.SORT_NULLS_FIRST)
+    sortOrder(internal.SortOrder.Ascending, internal.SortOrder.NullsFirst)
 
   /**
    * Returns a sort expression based on ascending order of the column, and null values appear
@@ -1218,41 +1294,23 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def asc_nulls_last: Column =
-    buildSortOrder(SortDirection.SORT_DIRECTION_ASCENDING, NullOrdering.SORT_NULLS_LAST)
-
-  private def buildSortOrder(sortDirection: SortDirection, nullOrdering: NullOrdering): Column = {
-    Column { builder =>
-      builder.getSortOrderBuilder
-        .setChild(expr)
-        .setDirection(sortDirection)
-        .setNullOrdering(nullOrdering)
-    }
-  }
-
-  private[sql] def sortOrder: proto.Expression.SortOrder = {
-    val base = if (expr.hasSortOrder) {
-      expr
-    } else {
-      asc.expr
-    }
-    base.getSortOrder
-  }
+    sortOrder(internal.SortOrder.Ascending, internal.SortOrder.NullsLast)
 
   /**
    * Prints the expression to the console for debugging purposes.
    *
    * @group df_ops
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def explain(extended: Boolean): Unit = {
     // scalastyle:off println
     if (extended) {
-      println(expr)
+      println(node)
     } else {
-      println(toString)
+      println(node.sql)
     }
     // scalastyle:on println
   }
@@ -1264,7 +1322,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def bitwiseOR(other: Any): Column = fn("|", other)
 
@@ -1275,7 +1333,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def bitwiseAND(other: Any): Column = fn("&", other)
 
@@ -1286,7 +1344,7 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def bitwiseXOR(other: Any): Column = fn("^", other)
 
@@ -1302,9 +1360,11 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def over(window: expressions.WindowSpec): Column = window.withAggregate(this)
+  def over(window: expressions.WindowSpec): Column = withOrigin {
+    window.withAggregate(this)
+  }
 
   /**
    * Defines an empty analytic clause. In this case the analytic function is applied and presented
@@ -1318,197 +1378,145 @@ class Column(@DeveloperApi val expr: proto.Expression) extends Logging {
    * }}}
    *
    * @group expr_ops
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def over(): Column = over(Window.spec)
-}
-
-object Column {
-
-  private[sql] def apply(name: String): Column = new Column(name)
 
-  private[sql] def apply(name: String, planId: Option[Long]): Column = new Column(name, planId)
-
-  private[sql] def nameToExpression(
-      name: String,
-      planId: Option[Long] = None): proto.Expression = {
-    val builder = proto.Expression.newBuilder()
-    name match {
-      case "*" =>
-        val starBuilder = builder.getUnresolvedStarBuilder
-        planId.foreach(starBuilder.setPlanId)
-      case _ if name.endsWith(".*") =>
-        builder.getUnresolvedStarBuilder.setUnparsedTarget(name)
-      case _ =>
-        val attributeBuilder = builder.getUnresolvedAttributeBuilder.setUnparsedIdentifier(name)
-        planId.foreach(attributeBuilder.setPlanId)
-    }
-    builder.build()
-  }
-
-  @Since("4.0.0")
-  @DeveloperApi
-  def apply(f: proto.Expression.Builder => Unit): Column = {
-    val builder = proto.Expression.newBuilder()
-    f(builder)
-    new Column(builder.build())
-  }
-
-  private[sql] def fn(name: String, inputs: Column*): Column = {
-    fn(name, isDistinct = false, inputs: _*)
-  }
+  /**
+   * Mark this column as an outer column if its expression refers to columns from an outer query.
+   * This is used to trigger lazy analysis of Spark Classic DataFrame, so that we can use it to
+   * build subquery expressions. Spark Connect DataFrame is always lazily analyzed and does not
+   * need to use this function.
+   *
+   * {{{
+   *   // Spark can't analyze this `df` now as it doesn't know how to resolve `t1.col`.
+   *   val df = spark.table("t2").where($"t2.col" === $"t1.col".outer())
+   *
+   *   // Since this `df` is lazily analyzed, you won't see any error until you try to execute it.
+   *   df.collect()  // Fails with UNRESOLVED_COLUMN error.
+   *
+   *   // Now Spark can resolve `t1.col` with the outer plan `spark.table("t1")`.
+   *   spark.table("t1").where(df.exists())
+   * }}}
+   *
+   * @group expr_ops
+   * @since 4.0.0
+   */
+  def outer(): Column = Column(internal.LazyExpression(node))
 
-  private[sql] def fn(name: String, isDistinct: Boolean, inputs: Column*): Column = Column {
-    builder =>
-      builder.getUnresolvedFunctionBuilder
-        .setFunctionName(name)
-        .setIsDistinct(isDistinct)
-        .addAllArguments(inputs.map(_.expr).asJava)
-  }
 }
 
 /**
  * A convenient class used for constructing schema.
  *
- * @since 3.4.0
+ * @since 1.3.0
  */
+@Stable
 class ColumnName(name: String) extends Column(name) {
 
   /**
    * Creates a new `StructField` of type boolean.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def boolean: StructField = StructField(name, BooleanType)
 
   /**
    * Creates a new `StructField` of type byte.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def byte: StructField = StructField(name, ByteType)
 
   /**
    * Creates a new `StructField` of type short.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def short: StructField = StructField(name, ShortType)
 
   /**
    * Creates a new `StructField` of type int.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def int: StructField = StructField(name, IntegerType)
 
   /**
    * Creates a new `StructField` of type long.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def long: StructField = StructField(name, LongType)
 
   /**
    * Creates a new `StructField` of type float.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def float: StructField = StructField(name, FloatType)
 
   /**
    * Creates a new `StructField` of type double.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def double: StructField = StructField(name, DoubleType)
 
   /**
    * Creates a new `StructField` of type string.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def string: StructField = StructField(name, StringType)
 
   /**
    * Creates a new `StructField` of type date.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def date: StructField = StructField(name, DateType)
 
   /**
    * Creates a new `StructField` of type decimal.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def decimal: StructField = StructField(name, DecimalType.USER_DEFAULT)
 
   /**
    * Creates a new `StructField` of type decimal.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def decimal(precision: Int, scale: Int): StructField =
     StructField(name, DecimalType(precision, scale))
 
   /**
    * Creates a new `StructField` of type timestamp.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def timestamp: StructField = StructField(name, TimestampType)
 
   /**
    * Creates a new `StructField` of type binary.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def binary: StructField = StructField(name, BinaryType)
 
   /**
    * Creates a new `StructField` of type array.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
 
   /**
    * Creates a new `StructField` of type map.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def map(keyType: DataType, valueType: DataType): StructField =
     map(MapType(keyType, valueType))
 
-  /**
-   * Creates a new `StructField` of type map.
-   * @since 3.4.0
-   */
   def map(mapType: MapType): StructField = StructField(name, mapType)
 
   /**
    * Creates a new `StructField` of type struct.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def struct(fields: StructField*): StructField = struct(StructType(fields))
 
   /**
    * Creates a new `StructField` of type struct.
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def struct(structType: StructType): StructField = StructField(name, structType)
 }
-
-/**
- * A [[Column]] where an [[Encoder]] has been given for the expected input and return type. To
- * create a [[TypedColumn]], use the `as` function on a [[Column]].
- *
- * @tparam T
- *   The input type expected for this expression. Can be `Any` if the expression is type checked
- *   by the analyzer instead of the compiler (i.e. `expr("sum(...)")`).
- * @tparam U
- *   The output type of this column.
- *
- * @since 3.4.0
- */
-class TypedColumn[-T, U] private[sql] (
-    expr: proto.Expression,
-    private[sql] val encoder: AgnosticEncoder[U])
-    extends Column(expr) {
-
-  /**
-   * Gives the [[TypedColumn]] a name (alias). If the current `TypedColumn` has metadata
-   * associated with it, this metadata will be propagated to the new column.
-   *
-   * @group expr_ops
-   * @since 3.4.0
-   */
-  override def name(alias: String): TypedColumn[T, U] =
-    new TypedColumn[T, U](super.name(alias).expr, encoder)
-}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
similarity index 75%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
index b499c2a5d9095..1838c6bc8468f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
@@ -14,25 +14,24 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql
 
-import java.util.{Locale, Properties}
+import java.util
 
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.connect.proto
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
+import org.apache.spark.sql.errors.CompilationErrors
 
 /**
- * Interface used to write a [[Dataset]] to external storage systems (e.g. file systems, key-value
- * stores, etc). Use `Dataset.write` to access this.
+ * Interface used to write a [[org.apache.spark.sql.api.Dataset]] to external storage systems
+ * (e.g. file systems, key-value stores, etc). Use `Dataset.write` to access this.
  *
- * @since 3.4.0
+ * @since 1.4.0
  */
 @Stable
-final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
+abstract class DataFrameWriter[T] {
 
   /**
    * Specifies the behavior when data or table already exists. Options include: <ul>
@@ -41,9 +40,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * <li>`SaveMode.ErrorIfExists`: throw an exception at runtime.</li> </ul> <p> The default
    * option is `ErrorIfExists`.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def mode(saveMode: SaveMode): DataFrameWriter[T] = {
+  def mode(saveMode: SaveMode): this.type = {
     this.mode = saveMode
     this
   }
@@ -54,27 +53,25 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * <li>`ignore`: ignore the operation (i.e. no-op).</li> <li>`error` or `errorifexists`: default
    * option, throw an exception at runtime.</li> </ul>
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def mode(saveMode: String): DataFrameWriter[T] = {
-    saveMode.toLowerCase(Locale.ROOT) match {
+  def mode(saveMode: String): this.type = {
+    saveMode.toLowerCase(util.Locale.ROOT) match {
       case "overwrite" => mode(SaveMode.Overwrite)
       case "append" => mode(SaveMode.Append)
       case "ignore" => mode(SaveMode.Ignore)
       case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists)
-      case _ =>
-        throw new IllegalArgumentException(s"Unknown save mode: $saveMode. Accepted " +
-          "save modes are 'overwrite', 'append', 'ignore', 'error', 'errorifexists', 'default'.")
+      case _ => throw CompilationErrors.invalidSaveModeError(saveMode)
     }
   }
 
   /**
    * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def format(source: String): DataFrameWriter[T] = {
-    this.source = Some(source)
+  def format(source: String): this.type = {
+    this.source = source
     this
   }
 
@@ -84,9 +81,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def option(key: String, value: String): DataFrameWriter[T] = {
+  def option(key: String, value: String): this.type = {
     this.extraOptions = this.extraOptions + (key -> value)
     this
   }
@@ -97,9 +94,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def option(key: String, value: Boolean): DataFrameWriter[T] = option(key, value.toString)
+  def option(key: String, value: Boolean): this.type = option(key, value.toString)
 
   /**
    * Adds an output option for the underlying data source.
@@ -107,9 +104,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def option(key: String, value: Long): DataFrameWriter[T] = option(key, value.toString)
+  def option(key: String, value: Long): this.type = option(key, value.toString)
 
   /**
    * Adds an output option for the underlying data source.
@@ -117,9 +114,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def option(key: String, value: Double): DataFrameWriter[T] = option(key, value.toString)
+  def option(key: String, value: Double): this.type = option(key, value.toString)
 
   /**
    * (Scala-specific) Adds output options for the underlying data source.
@@ -127,9 +124,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def options(options: scala.collection.Map[String, String]): DataFrameWriter[T] = {
+  def options(options: scala.collection.Map[String, String]): this.type = {
     this.extraOptions ++= options
     this
   }
@@ -140,9 +137,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * All options are maintained in a case-insensitive way in terms of key names. If a new option
    * has the same key case-insensitively, it will override the existing option.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def options(options: java.util.Map[String, String]): DataFrameWriter[T] = {
+  def options(options: util.Map[String, String]): this.type = {
     this.options(options.asScala)
     this
   }
@@ -161,11 +158,12 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
    * 2.1.0.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
-  def partitionBy(colNames: String*): DataFrameWriter[T] = {
+  def partitionBy(colNames: String*): this.type = {
     this.partitioningColumns = Option(colNames)
+    validatePartitioning()
     this
   }
 
@@ -177,13 +175,13 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
    * 2.1.0.
    *
-   * @since 3.4.0
+   * @since 2.0
    */
   @scala.annotation.varargs
-  def bucketBy(numBuckets: Int, colName: String, colNames: String*): DataFrameWriter[T] = {
-    require(numBuckets > 0, "The numBuckets should be > 0.")
+  def bucketBy(numBuckets: Int, colName: String, colNames: String*): this.type = {
     this.numBuckets = Option(numBuckets)
     this.bucketColumnNames = Option(colName +: colNames)
+    validatePartitioning()
     this
   }
 
@@ -193,69 +191,44 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
    * 2.1.0.
    *
-   * @since 3.4.0
+   * @since 2.0
    */
   @scala.annotation.varargs
-  def sortBy(colName: String, colNames: String*): DataFrameWriter[T] = {
+  def sortBy(colName: String, colNames: String*): this.type = {
     this.sortColumnNames = Option(colName +: colNames)
     this
   }
 
   /**
-   * Saves the content of the `DataFrame` at the specified path.
+   * Clusters the output by the given columns on the storage. The rows with matching values in the
+   * specified clustering columns will be consolidated within the same group.
+   *
+   * For instance, if you cluster a dataset by date, the data sharing the same date will be stored
+   * together in a file. This arrangement improves query efficiency when you apply selective
+   * filters to these clustering columns, thanks to data skipping.
    *
-   * @since 3.4.0
+   * @since 4.0
    */
-  def save(path: String): Unit = {
-    saveInternal(Some(path))
+  @scala.annotation.varargs
+  def clusterBy(colName: String, colNames: String*): this.type = {
+    this.clusteringColumns = Option(colName +: colNames)
+    validatePartitioning()
+    this
   }
 
   /**
-   * Saves the content of the `DataFrame` as the specified table.
+   * Saves the content of the `DataFrame` at the specified path.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def save(): Unit = saveInternal(None)
-
-  private def saveInternal(path: Option[String]): Unit = {
-    executeWriteOperation(builder => path.foreach(builder.setPath))
-  }
-
-  private def executeWriteOperation(f: proto.WriteOperation.Builder => Unit): Unit = {
-    val builder = proto.WriteOperation.newBuilder()
-
-    builder.setInput(ds.plan.getRoot)
-
-    // Set path or table
-    f(builder)
+  def save(path: String): Unit
 
-    // Cannot both be set
-    require(!(builder.hasPath && builder.hasTable))
-
-    builder.setMode(mode match {
-      case SaveMode.Append => proto.WriteOperation.SaveMode.SAVE_MODE_APPEND
-      case SaveMode.Overwrite => proto.WriteOperation.SaveMode.SAVE_MODE_OVERWRITE
-      case SaveMode.Ignore => proto.WriteOperation.SaveMode.SAVE_MODE_IGNORE
-      case SaveMode.ErrorIfExists => proto.WriteOperation.SaveMode.SAVE_MODE_ERROR_IF_EXISTS
-    })
-
-    source.foreach(builder.setSource)
-    sortColumnNames.foreach(names => builder.addAllSortColumnNames(names.asJava))
-    partitioningColumns.foreach(cols => builder.addAllPartitioningColumns(cols.asJava))
-
-    numBuckets.foreach(n => {
-      val bucketBuilder = proto.WriteOperation.BucketBy.newBuilder()
-      bucketBuilder.setNumBuckets(n)
-      bucketColumnNames.foreach(names => bucketBuilder.addAllBucketColumnNames(names.asJava))
-      builder.setBucketBy(bucketBuilder)
-    })
-
-    extraOptions.foreach { case (k, v) =>
-      builder.putOptions(k, v)
-    }
-
-    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperation(builder).build())
-  }
+  /**
+   * Saves the content of the `DataFrame` as the specified table.
+   *
+   * @since 1.4.0
+   */
+  def save(): Unit
 
   /**
    * Inserts the content of the `DataFrame` to the specified table. It requires that the schema of
@@ -264,7 +237,6 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * @note
    *   Unlike `saveAsTable`, `insertInto` ignores the column names and just uses position-based
    *   resolution. For example:
-   *
    * @note
    *   SaveMode.ErrorIfExists and SaveMode.Ignore behave as SaveMode.Append in `insertInto` as
    *   `insertInto` is not a table creating operation.
@@ -284,19 +256,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * }}}
    *
    * Because it inserts data to an existing table, format or options will be ignored.
-   *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def insertInto(tableName: String): Unit = {
-    executeWriteOperation(builder => {
-      builder.setTable(
-        proto.WriteOperation.SaveTable
-          .newBuilder()
-          .setTableName(tableName)
-          .setSaveMethod(
-            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_INSERT_INTO))
-    })
-  }
+  def insertInto(tableName: String): Unit
 
   /**
    * Saves the content of the `DataFrame` as the specified table.
@@ -334,18 +296,9 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * like Hive will be able to read this table. Otherwise, the table is persisted in a Spark SQL
    * specific format.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def saveAsTable(tableName: String): Unit = {
-    executeWriteOperation(builder => {
-      builder.setTable(
-        proto.WriteOperation.SaveTable
-          .newBuilder()
-          .setTableName(tableName)
-          .setSaveMethod(
-            proto.WriteOperation.SaveTable.TableSaveMethod.TABLE_SAVE_METHOD_SAVE_AS_TABLE))
-    })
-  }
+  def saveAsTable(tableName: String): Unit
 
   /**
    * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the
@@ -368,9 +321,12 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    *   "READ_UNCOMMITTED", "REPEATABLE_READ", or "SERIALIZABLE", corresponding to standard
    *   transaction isolation levels defined by JDBC's Connection object, with default of
    *   "READ_UNCOMMITTED".
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def jdbc(url: String, table: String, connectionProperties: Properties): Unit = {
+  def jdbc(url: String, table: String, connectionProperties: util.Properties): Unit = {
+    assertNotPartitioned("jdbc")
+    assertNotBucketed("jdbc")
+    assertNotClustered("jdbc")
     // connectionProperties should override settings in extraOptions.
     this.extraOptions ++= connectionProperties.asScala
     // explicit url and dbtable should override all
@@ -390,7 +346,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
    * Data Source Option</a> in the version you use.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def json(path: String): Unit = {
     format("json").save(path)
@@ -407,7 +363,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
    * Source Option</a> in the version you use.
    *
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def parquet(path: String): Unit = {
     format("parquet").save(path)
@@ -424,7 +380,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
    * Source Option</a> in the version you use.
    *
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def orc(path: String): Unit = {
     format("orc").save(path)
@@ -447,7 +403,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
    * Data Source Option</a> in the version you use.
    *
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def text(path: String): Unit = {
     format("text").save(path)
@@ -464,7 +420,7 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
    * Data Source Option</a> in the version you use.
    *
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def csv(path: String): Unit = {
     format("csv").save(path)
@@ -478,35 +434,84 @@ final class DataFrameWriter[T] private[sql] (ds: Dataset[T]) {
    * }}}
    *
    * Note that writing a XML file from `DataFrame` having a field `ArrayType` with its element as
-   * `ArrayType` would have an additional nested field for the element.
+   * `ArrayType` would have an additional nested field for the element. For example, the
+   * `DataFrame` having a field below,
+   *
+   * {@code fieldA [[data1], [data2]]}
+   *
+   * would produce a XML file below. {@code <fieldA> <item>data1</item> </fieldA> <fieldA>
+   * <item>data2</item> </fieldA>}
    *
    * Namely, roundtrip in writing and reading can end up in different schema structure.
    *
    * You can find the XML-specific options for writing XML files in <a
    * href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
    * Data Source Option</a> in the version you use.
-   *
-   * @since 4.0.0
    */
   def xml(path: String): Unit = {
     format("xml").save(path)
   }
 
+  protected def isBucketed(): Boolean = {
+    if (sortColumnNames.isDefined && numBuckets.isEmpty) {
+      throw CompilationErrors.sortByWithoutBucketingError()
+    }
+    numBuckets.isDefined
+  }
+
+  protected def assertNotBucketed(operation: String): Unit = {
+    if (isBucketed()) {
+      if (sortColumnNames.isEmpty) {
+        throw CompilationErrors.bucketByUnsupportedByOperationError(operation)
+      } else {
+        throw CompilationErrors.bucketByAndSortByUnsupportedByOperationError(operation)
+      }
+    }
+  }
+
+  protected def assertNotPartitioned(operation: String): Unit = {
+    if (partitioningColumns.isDefined) {
+      throw CompilationErrors.operationNotSupportPartitioningError(operation)
+    }
+  }
+
+  protected def assertNotClustered(operation: String): Unit = {
+    if (clusteringColumns.isDefined) {
+      throw CompilationErrors.operationNotSupportClusteringError(operation)
+    }
+  }
+
+  /**
+   * Validate that clusterBy is not used with partitionBy or bucketBy.
+   */
+  protected def validatePartitioning(): Unit = {
+    if (clusteringColumns.nonEmpty) {
+      if (partitioningColumns.nonEmpty) {
+        throw CompilationErrors.clusterByWithPartitionedBy()
+      }
+      if (isBucketed()) {
+        throw CompilationErrors.clusterByWithBucketing()
+      }
+    }
+  }
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Builder pattern config options
   ///////////////////////////////////////////////////////////////////////////////////////
 
-  private var source: Option[String] = None
+  protected var source: String = ""
+
+  protected var mode: SaveMode = SaveMode.ErrorIfExists
 
-  private var mode: SaveMode = SaveMode.ErrorIfExists
+  protected var extraOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap[String](Map.empty)
 
-  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
+  protected var partitioningColumns: Option[Seq[String]] = None
 
-  private var partitioningColumns: Option[Seq[String]] = None
+  protected var bucketColumnNames: Option[Seq[String]] = None
 
-  private var bucketColumnNames: Option[Seq[String]] = None
+  protected var numBuckets: Option[Int] = None
 
-  private var numBuckets: Option[Int] = None
+  protected var sortColumnNames: Option[Seq[String]] = None
 
-  private var sortColumnNames: Option[Seq[String]] = None
+  protected var clusteringColumns: Option[Seq[String]] = None
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
similarity index 60%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
index c419c9079f020..37a29c2e4b66d 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
@@ -14,102 +14,80 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql
 
-import scala.collection.mutable
-import scala.jdk.CollectionConverters._
+import java.util
 
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.connect.proto
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException}
 
 /**
- * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2
+ * Interface used to write a [[org.apache.spark.sql.api.Dataset]] to external storage using the v2
  * API.
  *
- * @since 3.4.0
+ * @since 3.0.0
  */
 @Experimental
-final class DataFrameWriterV2[T] private[sql] (table: String, ds: Dataset[T])
-    extends CreateTableWriter[T] {
-
-  private var provider: Option[String] = None
+abstract class DataFrameWriterV2[T] extends CreateTableWriter[T] {
 
-  private val options = new mutable.HashMap[String, String]()
+  /** @inheritdoc */
+  override def using(provider: String): this.type
 
-  private val properties = new mutable.HashMap[String, String]()
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = option(key, value.toString)
 
-  private var partitioning: Option[Seq[proto.Expression]] = None
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = option(key, value.toString)
 
-  private var overwriteCondition: Option[proto.Expression] = None
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = option(key, value.toString)
 
-  override def using(provider: String): CreateTableWriter[T] = {
-    this.provider = Some(provider)
-    this
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type
 
-  override def option(key: String, value: String): DataFrameWriterV2[T] = {
-    this.options.put(key, value)
-    this
-  }
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type
 
-  override def options(options: scala.collection.Map[String, String]): DataFrameWriterV2[T] = {
-    options.foreach { case (key, value) =>
-      this.options.put(key, value)
-    }
-    this
-  }
+  /** @inheritdoc */
+  override def options(options: util.Map[String, String]): this.type
 
-  override def options(options: java.util.Map[String, String]): DataFrameWriterV2[T] = {
-    this.options(options.asScala)
-    this
-  }
-
-  override def tableProperty(property: String, value: String): CreateTableWriter[T] = {
-    this.properties.put(property, value)
-    this
-  }
+  /** @inheritdoc */
+  override def tableProperty(property: String, value: String): this.type
 
+  /** @inheritdoc */
   @scala.annotation.varargs
-  override def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] = {
-    val asTransforms = (column +: columns).map(_.expr)
-    this.partitioning = Some(asTransforms)
-    this
-  }
-
-  override def create(): Unit = {
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE)
-  }
+  override def partitionedBy(column: Column, columns: Column*): this.type
 
-  override def replace(): Unit = {
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_REPLACE)
-  }
-
-  override def createOrReplace(): Unit = {
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_CREATE_OR_REPLACE)
-  }
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def clusterBy(colName: String, colNames: String*): this.type
 
   /**
    * Append the contents of the data frame to the output table.
    *
-   * If the output table does not exist, this operation will fail. The data frame will be
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
    * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
    */
-  def append(): Unit = {
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_APPEND)
-  }
+  @throws(classOf[NoSuchTableException])
+  def append(): Unit
 
   /**
    * Overwrite rows matching the given filter condition with the contents of the data frame in the
    * output table.
    *
-   * If the output table does not exist, this operation will fail. The data frame will be
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
    * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
    */
-  def overwrite(condition: Column): Unit = {
-    overwriteCondition = Some(condition.expr)
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE)
-  }
+  @throws(classOf[NoSuchTableException])
+  def overwrite(condition: Column): Unit
 
   /**
    * Overwrite all partition for which the data frame contains at least one row with the contents
@@ -118,84 +96,64 @@ final class DataFrameWriterV2[T] private[sql] (table: String, ds: Dataset[T])
    * This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
    * partitions dynamically depending on the contents of the data frame.
    *
-   * If the output table does not exist, this operation will fail. The data frame will be
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
    * validated to ensure it is compatible with the existing table.
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException
+   *   If the table does not exist
    */
-  def overwritePartitions(): Unit = {
-    executeWriteOperation(proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS)
-  }
-
-  private def executeWriteOperation(mode: proto.WriteOperationV2.Mode): Unit = {
-    val builder = proto.WriteOperationV2.newBuilder()
-
-    builder.setInput(ds.plan.getRoot)
-    builder.setTableName(table)
-    provider.foreach(builder.setProvider)
-
-    partitioning.foreach(columns => builder.addAllPartitioningColumns(columns.asJava))
-
-    options.foreach { case (k, v) =>
-      builder.putOptions(k, v)
-    }
-    properties.foreach { case (k, v) =>
-      builder.putTableProperties(k, v)
-    }
-
-    builder.setMode(mode)
-
-    overwriteCondition.foreach(builder.setOverwriteCondition)
-
-    ds.sparkSession.execute(proto.Command.newBuilder().setWriteOperationV2(builder).build())
-  }
+  @throws(classOf[NoSuchTableException])
+  def overwritePartitions(): Unit
 }
 
 /**
  * Configuration methods common to create/replace operations and insert/overwrite operations.
  * @tparam R
  *   builder type to return
- * @since 3.4.0
+ * @since 3.0.0
  */
 trait WriteConfigMethods[R] {
 
   /**
    * Add a write option.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def option(key: String, value: String): R
 
   /**
    * Add a boolean output option.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def option(key: String, value: Boolean): R = option(key, value.toString)
 
   /**
    * Add a long output option.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def option(key: String, value: Long): R = option(key, value.toString)
 
   /**
    * Add a double output option.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def option(key: String, value: Double): R = option(key, value.toString)
 
   /**
    * Add write options from a Scala Map.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def options(options: scala.collection.Map[String, String]): R
 
   /**
    * Add write options from a Java Map.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def options(options: java.util.Map[String, String]): R
 }
@@ -203,7 +161,7 @@ trait WriteConfigMethods[R] {
 /**
  * Trait to restrict calls to create and replace operations.
  *
- * @since 3.4.0
+ * @since 3.0.0
  */
 trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
 
@@ -213,8 +171,13 @@ trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
    * The new table's schema, partition layout, properties, and other configuration will be based
    * on the configuration set on this writer.
    *
-   * If the output table exists, this operation will fail.
+   * If the output table exists, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
+   *   If the table already exists
    */
+  @throws(classOf[TableAlreadyExistsException])
   def create(): Unit
 
   /**
@@ -223,8 +186,13 @@ trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
    * The existing table's schema, partition layout, properties, and other configuration will be
    * replaced with the contents of the data frame and the configuration set on this writer.
    *
-   * If the output table does not exist, this operation will fail.
+   * If the output table does not exist, this operation will fail with
+   * [[org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException]].
+   *
+   * @throws org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
+   *   If the table does not exist
    */
+  @throws(classOf[CannotReplaceMissingTableException])
   def replace(): Unit
 
   /**
@@ -250,15 +218,29 @@ trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
    * predicates on the partitioned columns. In order for partitioning to work well, the number of
    * distinct values in each column should typically be less than tens of thousands.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
+  @scala.annotation.varargs
   def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T]
 
+  /**
+   * Clusters the output by the given columns on the storage. The rows with matching values in the
+   * specified clustering columns will be consolidated within the same group.
+   *
+   * For instance, if you cluster a dataset by date, the data sharing the same date will be stored
+   * together in a file. This arrangement improves query efficiency when you apply selective
+   * filters to these clustering columns, thanks to data skipping.
+   *
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def clusterBy(colName: String, colNames: String*): CreateTableWriter[T]
+
   /**
    * Specifies a provider for the underlying output data source. Spark's default catalog supports
    * "parquet", "json", etc.
    *
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def using(provider: String): CreateTableWriter[T]
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala b/sql/api/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
similarity index 79%
rename from sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
index 1c4ffefb897ea..dd7e8e81a088c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/DatasetHolder.scala
@@ -18,9 +18,10 @@
 package org.apache.spark.sql
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.api.Dataset
 
 /**
- * A container for a [[Dataset]], used for implicit conversions in Scala.
+ * A container for a [[org.apache.spark.sql.api.Dataset]], used for implicit conversions in Scala.
  *
  * To use this, import implicit conversions in SQL:
  * {{{
@@ -31,15 +32,15 @@ import org.apache.spark.annotation.Stable
  * @since 1.6.0
  */
 @Stable
-case class DatasetHolder[T] private[sql](private val ds: Dataset[T]) {
+class DatasetHolder[T, DS[U] <: Dataset[U]](ds: DS[T]) {
 
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `rdd.toDS("1")` as invoking this toDS and then apply on the returned Dataset.
-  def toDS(): Dataset[T] = ds
+  def toDS(): DS[T] = ds
 
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `rdd.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
-  def toDF(): DataFrame = ds.toDF()
+  def toDF(): DS[Row] = ds.toDF().asInstanceOf[DS[Row]]
 
-  def toDF(colNames: String*): DataFrame = ds.toDF(colNames : _*)
+  def toDF(colNames: String*): DS[Row] = ds.toDF(colNames: _*).asInstanceOf[DS[Row]]
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Encoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/Encoder.scala
index ea760d80541c8..d125e89b8c410 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Encoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Encoder.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.types._
 /**
  * Used to convert a JVM object of type `T` to and from the internal Spark SQL representation.
  *
- * == Scala ==
+ * ==Scala==
  * Encoders are generally created automatically through implicits from a `SparkSession`, or can be
  * explicitly created by calling static methods on [[Encoders]].
  *
@@ -35,7 +35,7 @@ import org.apache.spark.sql.types._
  *   val ds = Seq(1, 2, 3).toDS() // implicitly provided (spark.implicits.newIntEncoder)
  * }}}
  *
- * == Java ==
+ * ==Java==
  * Encoders are specified by calling static methods on [[Encoders]].
  *
  * {{{
@@ -57,8 +57,8 @@ import org.apache.spark.sql.types._
  *   Encoders.bean(MyClass.class);
  * }}}
  *
- * == Implementation ==
- *  - Encoders should be thread-safe.
+ * ==Implementation==
+ *   - Encoders should be thread-safe.
  *
  * @since 1.6.0
  */
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
similarity index 66%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Encoders.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
index 74f0133803137..9976b34f7a01f 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Encoders.scala
@@ -14,94 +14,99 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql
 
+import java.lang.reflect.Modifier
+
+import scala.reflect.{classTag, ClassTag}
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, RowEncoder => RowEncoderFactory}
+import org.apache.spark.sql.catalyst.encoders.{Codec, JavaSerializationCodec, KryoSerializationCodec, RowEncoder => SchemaInference}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.errors.ExecutionErrors
+import org.apache.spark.sql.types._
 
 /**
  * Methods for creating an [[Encoder]].
  *
- * @since 3.5.0
+ * @since 1.6.0
  */
 object Encoders {
 
   /**
    * An encoder for nullable boolean type. The Scala primitive encoder is available as
    * [[scalaBoolean]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def BOOLEAN: Encoder[java.lang.Boolean] = BoxedBooleanEncoder
 
   /**
    * An encoder for nullable byte type. The Scala primitive encoder is available as [[scalaByte]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def BYTE: Encoder[java.lang.Byte] = BoxedByteEncoder
 
   /**
    * An encoder for nullable short type. The Scala primitive encoder is available as
    * [[scalaShort]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def SHORT: Encoder[java.lang.Short] = BoxedShortEncoder
 
   /**
    * An encoder for nullable int type. The Scala primitive encoder is available as [[scalaInt]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def INT: Encoder[java.lang.Integer] = BoxedIntEncoder
 
   /**
    * An encoder for nullable long type. The Scala primitive encoder is available as [[scalaLong]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def LONG: Encoder[java.lang.Long] = BoxedLongEncoder
 
   /**
    * An encoder for nullable float type. The Scala primitive encoder is available as
    * [[scalaFloat]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def FLOAT: Encoder[java.lang.Float] = BoxedFloatEncoder
 
   /**
    * An encoder for nullable double type. The Scala primitive encoder is available as
    * [[scalaDouble]].
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def DOUBLE: Encoder[java.lang.Double] = BoxedDoubleEncoder
 
   /**
    * An encoder for nullable string type.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def STRING: Encoder[java.lang.String] = StringEncoder
 
   /**
    * An encoder for nullable decimal type.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def DECIMAL: Encoder[java.math.BigDecimal] = DEFAULT_JAVA_DECIMAL_ENCODER
 
   /**
    * An encoder for nullable date type.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
-  def DATE: Encoder[java.sql.Date] = DateEncoder(lenientSerialization = false)
+  def DATE: Encoder[java.sql.Date] = STRICT_DATE_ENCODER
 
   /**
    * Creates an encoder that serializes instances of the `java.time.LocalDate` class to the
    * internal representation of nullable Catalyst's DateType.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   def LOCALDATE: Encoder[java.time.LocalDate] = STRICT_LOCAL_DATE_ENCODER
 
@@ -109,14 +114,14 @@ object Encoders {
    * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class to the
    * internal representation of nullable Catalyst's TimestampNTZType.
    *
-   * @since 3.5.0
+   * @since 3.4.0
    */
   def LOCALDATETIME: Encoder[java.time.LocalDateTime] = LocalDateTimeEncoder
 
   /**
    * An encoder for nullable timestamp type.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def TIMESTAMP: Encoder[java.sql.Timestamp] = STRICT_TIMESTAMP_ENCODER
 
@@ -124,14 +129,14 @@ object Encoders {
    * Creates an encoder that serializes instances of the `java.time.Instant` class to the internal
    * representation of nullable Catalyst's TimestampType.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   def INSTANT: Encoder[java.time.Instant] = STRICT_INSTANT_ENCODER
 
   /**
    * An encoder for arrays of bytes.
    *
-   * @since 3.5.0
+   * @since 1.6.1
    */
   def BINARY: Encoder[Array[Byte]] = BinaryEncoder
 
@@ -139,7 +144,7 @@ object Encoders {
    * Creates an encoder that serializes instances of the `java.time.Duration` class to the
    * internal representation of nullable Catalyst's DayTimeIntervalType.
    *
-   * @since 3.5.0
+   * @since 3.2.0
    */
   def DURATION: Encoder[java.time.Duration] = DayTimeIntervalEncoder
 
@@ -147,7 +152,7 @@ object Encoders {
    * Creates an encoder that serializes instances of the `java.time.Period` class to the internal
    * representation of nullable Catalyst's YearMonthIntervalType.
    *
-   * @since 3.5.0
+   * @since 3.2.0
    */
   def PERIOD: Encoder[java.time.Period] = YearMonthIntervalEncoder
 
@@ -165,7 +170,7 @@ object Encoders {
    *   - collection types: array, java.util.List, and map
    *   - nested java bean.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def bean[T](beanClass: Class[T]): Encoder[T] = JavaTypeInference.encoderFor(beanClass)
 
@@ -174,23 +179,96 @@ object Encoders {
    *
    * @since 3.5.0
    */
-  def row(schema: StructType): Encoder[Row] = RowEncoderFactory.encoderFor(schema)
+  def row(schema: StructType): Encoder[Row] = SchemaInference.encoderFor(schema)
+
+  /**
+   * (Scala-specific) Creates an encoder that serializes objects of type T using Kryo. This
+   * encoder maps T into a single byte array (binary) field.
+   *
+   * T must be publicly accessible.
+   *
+   * @since 1.6.0
+   */
+  def kryo[T: ClassTag]: Encoder[T] = genericSerializer(KryoSerializationCodec)
+
+  /**
+   * Creates an encoder that serializes objects of type T using Kryo. This encoder maps T into a
+   * single byte array (binary) field.
+   *
+   * T must be publicly accessible.
+   *
+   * @since 1.6.0
+   */
+  def kryo[T](clazz: Class[T]): Encoder[T] = kryo(ClassTag[T](clazz))
+
+  /**
+   * (Scala-specific) Creates an encoder that serializes objects of type T using generic Java
+   * serialization. This encoder maps T into a single byte array (binary) field.
+   *
+   * T must be publicly accessible.
+   *
+   * @note
+   *   This is extremely inefficient and should only be used as the last resort.
+   *
+   * @since 1.6.0
+   */
+  def javaSerialization[T: ClassTag]: Encoder[T] = genericSerializer(JavaSerializationCodec)
+
+  /**
+   * Creates an encoder that serializes objects of type T using generic Java serialization. This
+   * encoder maps T into a single byte array (binary) field.
+   *
+   * T must be publicly accessible.
+   *
+   * @note
+   *   This is extremely inefficient and should only be used as the last resort.
+   *
+   * @since 1.6.0
+   */
+  def javaSerialization[T](clazz: Class[T]): Encoder[T] =
+    javaSerialization(ClassTag[T](clazz))
+
+  /** Throws an exception if T is not a public class. */
+  private def validatePublicClass[T: ClassTag](): Unit = {
+    if (!Modifier.isPublic(classTag[T].runtimeClass.getModifiers)) {
+      throw ExecutionErrors.notPublicClassError(classTag[T].runtimeClass.getName)
+    }
+  }
+
+  /** A way to construct encoders using generic serializers. */
+  private def genericSerializer[T: ClassTag](
+      provider: () => Codec[Any, Array[Byte]]): Encoder[T] = {
+    if (classTag[T].runtimeClass.isPrimitive) {
+      throw ExecutionErrors.primitiveTypesNotSupportedError()
+    }
+
+    validatePublicClass[T]()
+
+    TransformingEncoder(classTag[T], BinaryEncoder, provider)
+  }
 
-  private def tupleEncoder[T](encoders: Encoder[_]*): Encoder[T] = {
-    ProductEncoder.tuple(encoders.asInstanceOf[Seq[AgnosticEncoder[_]]]).asInstanceOf[Encoder[T]]
+  private[sql] def tupleEncoder[T](encoders: Encoder[_]*): Encoder[T] = {
+    ProductEncoder.tuple(encoders.map(agnosticEncoderFor(_))).asInstanceOf[Encoder[T]]
   }
 
+  /**
+   * An encoder for 1-ary tuples.
+   *
+   * @since 4.0.0
+   */
+  def tuple[T1](e1: Encoder[T1]): Encoder[(T1)] = tupleEncoder(e1)
+
   /**
    * An encoder for 2-ary tuples.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def tuple[T1, T2](e1: Encoder[T1], e2: Encoder[T2]): Encoder[(T1, T2)] = tupleEncoder(e1, e2)
 
   /**
    * An encoder for 3-ary tuples.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def tuple[T1, T2, T3](
       e1: Encoder[T1],
@@ -200,7 +278,7 @@ object Encoders {
   /**
    * An encoder for 4-ary tuples.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def tuple[T1, T2, T3, T4](
       e1: Encoder[T1],
@@ -211,7 +289,7 @@ object Encoders {
   /**
    * An encoder for 5-ary tuples.
    *
-   * @since 3.5.0
+   * @since 1.6.0
    */
   def tuple[T1, T2, T3, T4, T5](
       e1: Encoder[T1],
@@ -222,49 +300,50 @@ object Encoders {
 
   /**
    * An encoder for Scala's product type (tuples, case classes, etc).
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def product[T <: Product: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
 
   /**
    * An encoder for Scala's primitive int type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaInt: Encoder[Int] = PrimitiveIntEncoder
 
   /**
    * An encoder for Scala's primitive long type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaLong: Encoder[Long] = PrimitiveLongEncoder
 
   /**
    * An encoder for Scala's primitive double type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaDouble: Encoder[Double] = PrimitiveDoubleEncoder
 
   /**
    * An encoder for Scala's primitive float type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaFloat: Encoder[Float] = PrimitiveFloatEncoder
 
   /**
    * An encoder for Scala's primitive byte type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaByte: Encoder[Byte] = PrimitiveByteEncoder
 
   /**
    * An encoder for Scala's primitive short type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaShort: Encoder[Short] = PrimitiveShortEncoder
 
   /**
    * An encoder for Scala's primitive boolean type.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def scalaBoolean: Encoder[Boolean] = PrimitiveBooleanEncoder
+
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
similarity index 99%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
index 756356f7f0282..4e2bb35146a63 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
@@ -89,7 +89,7 @@ package org.apache.spark.sql
  *  });
  * }}}
  *
- * @since 3.5.0
+ * @since 2.0.0
  */
 abstract class ForeachWriter[T] extends Serializable {
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
similarity index 64%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
index 197cfac32a4b0..db56b39e28aeb 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
@@ -14,46 +14,25 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql
 
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.SparkRuntimeException
 import org.apache.spark.annotation.Experimental
-import org.apache.spark.connect.proto
-import org.apache.spark.connect.proto.{Expression, MergeIntoTableCommand}
-import org.apache.spark.connect.proto.MergeAction
-import org.apache.spark.sql.MergeIntoWriter.buildMergeAction
-import org.apache.spark.sql.functions.expr
 
 /**
  * `MergeIntoWriter` provides methods to define and execute merge actions based on specified
  * conditions.
  *
+ * Please note that schema evolution is disabled by default.
+ *
  * @tparam T
  *   the type of data in the Dataset.
- * @param table
- *   the name of the target table for the merge operation.
- * @param ds
- *   the source Dataset to merge into the target table.
- * @param on
- *   the merge condition.
- * @param schemaEvolutionEnabled
- *   whether to enable automatic schema evolution for this merge operation. Default is `false`.
- *
  * @since 4.0.0
  */
 @Experimental
-class MergeIntoWriter[T] private[sql] (
-    table: String,
-    ds: Dataset[T],
-    on: Column,
-    schemaEvolutionEnabled: Boolean = false) {
+abstract class MergeIntoWriter[T] {
+  private var schemaEvolution: Boolean = false
 
-  private[sql] var matchedActions: Seq[MergeAction] = Seq.empty[MergeAction]
-  private[sql] var notMatchedActions: Seq[MergeAction] = Seq.empty[MergeAction]
-  private[sql] var notMatchedBySourceActions: Seq[MergeAction] = Seq.empty[MergeAction]
+  private[sql] def schemaEvolutionEnabled: Boolean = schemaEvolution
 
   /**
    * Initialize a `WhenMatched` action without any condition.
@@ -92,7 +71,7 @@ class MergeIntoWriter[T] private[sql] (
    *   a new `WhenMatched` object configured with the specified condition.
    */
   def whenMatched(condition: Column): WhenMatched[T] = {
-    new WhenMatched[T](this, Some(condition.expr))
+    new WhenMatched[T](this, Some(condition))
   }
 
   /**
@@ -130,7 +109,7 @@ class MergeIntoWriter[T] private[sql] (
    *   a new `WhenNotMatched` object configured with the specified condition.
    */
   def whenNotMatched(condition: Column): WhenNotMatched[T] = {
-    new WhenNotMatched[T](this, Some(condition.expr))
+    new WhenNotMatched[T](this, Some(condition))
   }
 
   /**
@@ -171,69 +150,44 @@ class MergeIntoWriter[T] private[sql] (
    *   a new `WhenNotMatchedBySource` object configured with the specified condition.
    */
   def whenNotMatchedBySource(condition: Column): WhenNotMatchedBySource[T] = {
-    new WhenNotMatchedBySource[T](this, Some(condition.expr))
+    new WhenNotMatchedBySource[T](this, Some(condition))
   }
 
   /**
    * Enable automatic schema evolution for this merge operation.
+   *
    * @return
    *   A `MergeIntoWriter` instance with schema evolution enabled.
    */
   def withSchemaEvolution(): MergeIntoWriter[T] = {
-    new MergeIntoWriter[T](this.table, this.ds, this.on, schemaEvolutionEnabled = true)
-      .withNewMatchedActions(this.matchedActions: _*)
-      .withNewNotMatchedActions(this.notMatchedActions: _*)
-      .withNewNotMatchedBySourceActions(this.notMatchedBySourceActions: _*)
+    schemaEvolution = true
+    this
   }
 
   /**
    * Executes the merge operation.
    */
-  def merge(): Unit = {
-    if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
-      throw new SparkRuntimeException(
-        errorClass = "NO_MERGE_ACTION_SPECIFIED",
-        messageParameters = Map.empty)
-    }
+  def merge(): Unit
 
-    val matchedActionExpressions =
-      matchedActions.map(Expression.newBuilder().setMergeAction(_)).map(_.build())
-    val notMatchedActionExpressions =
-      notMatchedActions.map(Expression.newBuilder().setMergeAction(_)).map(_.build())
-    val notMatchedBySourceActionExpressions =
-      notMatchedBySourceActions.map(Expression.newBuilder().setMergeAction(_)).map(_.build())
-    val mergeIntoCommand = MergeIntoTableCommand
-      .newBuilder()
-      .setTargetTableName(table)
-      .setSourceTablePlan(ds.plan.getRoot)
-      .setMergeCondition(on.expr)
-      .addAllMatchActions(matchedActionExpressions.asJava)
-      .addAllNotMatchedActions(notMatchedActionExpressions.asJava)
-      .addAllNotMatchedBySourceActions(notMatchedBySourceActionExpressions.asJava)
-      .setWithSchemaEvolution(schemaEvolutionEnabled)
-      .build()
+  // Action callbacks.
+  protected[sql] def insertAll(condition: Option[Column]): MergeIntoWriter[T]
 
-    ds.sparkSession.execute(
-      proto.Command
-        .newBuilder()
-        .setMergeIntoTableCommand(mergeIntoCommand)
-        .build())
-  }
+  protected[sql] def insert(
+      condition: Option[Column],
+      map: Map[String, Column]): MergeIntoWriter[T]
 
-  private[sql] def withNewMatchedActions(action: MergeAction*): MergeIntoWriter[T] = {
-    this.matchedActions = this.matchedActions :++ action
-    this
-  }
+  protected[sql] def updateAll(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T]
 
-  private[sql] def withNewNotMatchedActions(action: MergeAction*): MergeIntoWriter[T] = {
-    this.notMatchedActions = this.notMatchedActions :++ action
-    this
-  }
+  protected[sql] def update(
+      condition: Option[Column],
+      map: Map[String, Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T]
 
-  private[sql] def withNewNotMatchedBySourceActions(action: MergeAction*): MergeIntoWriter[T] = {
-    this.notMatchedBySourceActions = this.notMatchedBySourceActions :++ action
-    this
-  }
+  protected[sql] def delete(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T]
 }
 
 /**
@@ -245,13 +199,12 @@ class MergeIntoWriter[T] private[sql] (
  * @param condition
  *   An optional condition Expression that specifies when the actions should be applied. If the
  *   condition is None, the actions will be applied to all matched rows.
- *
  * @tparam T
  *   The type of data in the MergeIntoWriter.
  */
 case class WhenMatched[T] private[sql] (
     mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
+    condition: Option[Column]) {
 
   /**
    * Specifies an action to update all matched rows in the DataFrame.
@@ -259,10 +212,8 @@ case class WhenMatched[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the update all action configured.
    */
-  def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_UPDATE_STAR, condition))
-  }
+  def updateAll(): MergeIntoWriter[T] =
+    mergeIntoWriter.updateAll(condition, notMatchedBySource = false)
 
   /**
    * Specifies an action to update matched rows in the DataFrame with the provided column
@@ -273,10 +224,8 @@ case class WhenMatched[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the update action configured.
    */
-  def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_UPDATE, condition, Some(map)))
-  }
+  def update(map: Map[String, Column]): MergeIntoWriter[T] =
+    mergeIntoWriter.update(condition, map, notMatchedBySource = false)
 
   /**
    * Specifies an action to delete matched rows from the DataFrame.
@@ -284,10 +233,8 @@ case class WhenMatched[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the delete action configured.
    */
-  def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_DELETE, condition))
-  }
+  def delete(): MergeIntoWriter[T] =
+    mergeIntoWriter.delete(condition, notMatchedBySource = false)
 }
 
 /**
@@ -305,7 +252,7 @@ case class WhenMatched[T] private[sql] (
  */
 case class WhenNotMatched[T] private[sql] (
     mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
+    condition: Option[Column]) {
 
   /**
    * Specifies an action to insert all non-matched rows into the DataFrame.
@@ -313,10 +260,8 @@ case class WhenNotMatched[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the insert all action configured.
    */
-  def insertAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_INSERT_STAR, condition))
-  }
+  def insertAll(): MergeIntoWriter[T] =
+    mergeIntoWriter.insertAll(condition)
 
   /**
    * Specifies an action to insert non-matched rows into the DataFrame with the provided column
@@ -327,11 +272,8 @@ case class WhenNotMatched[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the insert action configured.
    */
-  def insert(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_INSERT, condition, Some(map)))
-
-  }
+  def insert(map: Map[String, Column]): MergeIntoWriter[T] =
+    mergeIntoWriter.insert(condition, map)
 }
 
 /**
@@ -347,7 +289,7 @@ case class WhenNotMatched[T] private[sql] (
  */
 case class WhenNotMatchedBySource[T] private[sql] (
     mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
+    condition: Option[Column]) {
 
   /**
    * Specifies an action to update all non-matched rows in the target DataFrame when not matched
@@ -356,10 +298,8 @@ case class WhenNotMatchedBySource[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the update all action configured.
    */
-  def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_UPDATE_STAR, condition))
-  }
+  def updateAll(): MergeIntoWriter[T] =
+    mergeIntoWriter.updateAll(condition, notMatchedBySource = true)
 
   /**
    * Specifies an action to update non-matched rows in the target DataFrame with the provided
@@ -370,10 +310,8 @@ case class WhenNotMatchedBySource[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the update action configured.
    */
-  def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_UPDATE, condition, Some(map)))
-  }
+  def update(map: Map[String, Column]): MergeIntoWriter[T] =
+    mergeIntoWriter.update(condition, map, notMatchedBySource = true)
 
   /**
    * Specifies an action to delete non-matched rows from the target DataFrame when not matched by
@@ -382,30 +320,6 @@ case class WhenNotMatchedBySource[T] private[sql] (
    * @return
    *   The MergeIntoWriter instance with the delete action configured.
    */
-  def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(
-      buildMergeAction(MergeAction.ActionType.ACTION_TYPE_DELETE, condition))
-  }
-}
-
-private object MergeIntoWriter {
-  private[sql] def buildMergeAction(
-      actionType: MergeAction.ActionType,
-      conditionOpt: Option[Expression],
-      assignmentsOpt: Option[Map[String, Column]] = None): MergeAction = {
-    val assignmentsProtoOpt = assignmentsOpt.map {
-      _.map { case (k, v) =>
-        MergeAction.Assignment
-          .newBuilder()
-          .setKey(expr(k).expr)
-          .setValue(v.expr)
-          .build()
-      }.toSeq.asJava
-    }
-
-    var builder = MergeAction.newBuilder().setActionType(actionType)
-    builder = conditionOpt.map(builder.setCondition).getOrElse(builder)
-    builder = assignmentsProtoOpt.map(builder.addAllAssignments).getOrElse(builder)
-    builder.build()
-  }
+  def delete(): MergeIntoWriter[T] =
+    mergeIntoWriter.delete(condition, notMatchedBySource = true)
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/api/src/main/scala/org/apache/spark/sql/Observation.scala
new file mode 100644
index 0000000000000..fa427fe651907
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Observation.scala
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.UUID
+import java.util.concurrent.atomic.AtomicBoolean
+
+import scala.concurrent.{Future, Promise}
+import scala.concurrent.duration.{Duration, DurationInt}
+import scala.jdk.CollectionConverters.MapHasAsJava
+import scala.util.Try
+
+import org.apache.spark.util.SparkThreadUtils
+
+/**
+ * Helper class to simplify usage of `Dataset.observe(String, Column, Column*)`:
+ *
+ * {{{
+ *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
+ *   val observation = Observation("my metrics")
+ *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
+ *   observed_ds.write.parquet("ds.parquet")
+ *   val metrics = observation.get
+ * }}}
+ *
+ * This collects the metrics while the first action is executed on the observed dataset.
+ * Subsequent actions do not modify the metrics returned by [[get]]. Retrieval of the metric via
+ * [[get]] blocks until the first action has finished and metrics become available.
+ *
+ * This class does not support streaming datasets.
+ *
+ * @param name
+ *   name of the metric
+ * @since 3.3.0
+ */
+class Observation(val name: String) {
+  require(name.nonEmpty, "Name must not be empty")
+
+  /**
+   * Create an Observation with a random name.
+   */
+  def this() = this(UUID.randomUUID().toString)
+
+  private val isRegistered = new AtomicBoolean()
+
+  private val promise = Promise[Map[String, Any]]()
+
+  /**
+   * Future holding the (yet to be completed) observation.
+   */
+  val future: Future[Map[String, Any]] = promise.future
+
+  /**
+   * (Scala-specific) Get the observed metrics. This waits for the observed dataset to finish its
+   * first action. Only the result of the first action is available. Subsequent actions do not
+   * modify the result.
+   *
+   * @return
+   *   the observed metrics as a `Map[String, Any]`
+   * @throws InterruptedException
+   *   interrupted while waiting
+   */
+  @throws[InterruptedException]
+  def get: Map[String, Any] = SparkThreadUtils.awaitResult(future, Duration.Inf)
+
+  /**
+   * (Java-specific) Get the observed metrics. This waits for the observed dataset to finish its
+   * first action. Only the result of the first action is available. Subsequent actions do not
+   * modify the result.
+   *
+   * @return
+   *   the observed metrics as a `java.util.Map[String, Object]`
+   * @throws InterruptedException
+   *   interrupted while waiting
+   */
+  @throws[InterruptedException]
+  def getAsJava: java.util.Map[String, Any] = get.asJava
+
+  /**
+   * Get the observed metrics. This returns the metrics if they are available, otherwise an empty.
+   *
+   * @return
+   *   the observed metrics as a `Map[String, Any]`
+   */
+  @throws[InterruptedException]
+  private[sql] def getOrEmpty: Map[String, Any] = {
+    Try(SparkThreadUtils.awaitResult(future, 100.millis)).getOrElse(Map.empty)
+  }
+
+  /**
+   * Mark this Observation as registered.
+   */
+  private[sql] def markRegistered(): Unit = {
+    if (!isRegistered.compareAndSet(false, true)) {
+      throw new IllegalArgumentException("An Observation can be used with a Dataset only once")
+    }
+  }
+
+  /**
+   * Set the observed metrics and notify all waiting threads to resume.
+   *
+   * @return
+   *   `true` if all waiting threads were notified, `false` if otherwise.
+   */
+  private[sql] def setMetricsAndNotify(metrics: Row): Boolean = {
+    val metricsMap = metrics.getValuesMap(metrics.schema.map(_.name))
+    promise.trySuccess(metricsMap)
+  }
+}
+
+/**
+ * (Scala-specific) Create instances of Observation via Scala `apply`.
+ * @since 3.3.0
+ */
+object Observation {
+
+  /**
+   * Observation constructor for creating an anonymous observation.
+   */
+  def apply(): Observation = new Observation()
+
+  /**
+   * Observation constructor for creating a named observation.
+   */
+  def apply(name: String): Observation = new Observation(name)
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala b/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala
deleted file mode 100644
index 4789ae8975d12..0000000000000
--- a/sql/api/src/main/scala/org/apache/spark/sql/ObservationBase.scala
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.jdk.CollectionConverters.MapHasAsJava
-
-/**
- * Helper class to simplify usage of `Dataset.observe(String, Column, Column*)`:
- *
- * {{{
- *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
- *   val observation = Observation("my metrics")
- *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
- *   observed_ds.write.parquet("ds.parquet")
- *   val metrics = observation.get
- * }}}
- *
- * This collects the metrics while the first action is executed on the observed dataset. Subsequent
- * actions do not modify the metrics returned by [[get]]. Retrieval of the metric via [[get]]
- * blocks until the first action has finished and metrics become available.
- *
- * This class does not support streaming datasets.
- *
- * @param name name of the metric
- * @since 3.3.0
- */
-abstract class ObservationBase(val name: String) {
-
-  if (name.isEmpty) throw new IllegalArgumentException("Name must not be empty")
-
-  @volatile protected var metrics: Option[Map[String, Any]] = None
-
-  /**
-   * (Scala-specific) Get the observed metrics. This waits for the observed dataset to finish
-   * its first action. Only the result of the first action is available. Subsequent actions do not
-   * modify the result.
-   *
-   * @return the observed metrics as a `Map[String, Any]`
-   * @throws InterruptedException interrupted while waiting
-   */
-  @throws[InterruptedException]
-  def get: Map[String, _] = {
-    synchronized {
-      // we need to loop as wait might return without us calling notify
-      // https://en.wikipedia.org/w/index.php?title=Spurious_wakeup&oldid=992601610
-      while (this.metrics.isEmpty) {
-        wait()
-      }
-    }
-
-    this.metrics.get
-  }
-
-  /**
-   * (Java-specific) Get the observed metrics. This waits for the observed dataset to finish
-   * its first action. Only the result of the first action is available. Subsequent actions do not
-   * modify the result.
-   *
-   * @return the observed metrics as a `java.util.Map[String, Object]`
-   * @throws InterruptedException interrupted while waiting
-   */
-  @throws[InterruptedException]
-  def getAsJava: java.util.Map[String, AnyRef] = {
-    get.map { case (key, value) => (key, value.asInstanceOf[Object]) }.asJava
-  }
-
-  /**
-   * Get the observed metrics. This returns the metrics if they are available, otherwise an empty.
-   *
-   * @return the observed metrics as a `Map[String, Any]`
-   */
-  @throws[InterruptedException]
-  private[sql] def getOrEmpty: Map[String, _] = {
-    synchronized {
-      if (metrics.isEmpty) {
-        wait(100) // Wait for 100ms to see if metrics are available
-      }
-      metrics.getOrElse(Map.empty)
-    }
-  }
-
-  /**
-   * Set the observed metrics and notify all waiting threads to resume.
-   *
-   * @return `true` if all waiting threads were notified, `false` if otherwise.
-   */
-  private[spark] def setMetricsAndNotify(metrics: Option[Map[String, Any]]): Boolean = {
-    synchronized {
-      this.metrics = metrics
-      if(metrics.isDefined) {
-        notifyAll()
-        true
-      } else {
-        false
-      }
-    }
-  }
-}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala
index 0c065dd4d4baa..764bdb17b37e2 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/Row.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/Row.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDate}
+import java.time.{Instant, LocalDate, LocalDateTime}
 import java.util.Base64
 
 import scala.collection.mutable
@@ -45,6 +45,7 @@ import org.apache.spark.util.ArrayImplicits._
  */
 @Stable
 object Row {
+
   /**
    * This method can be used to extract fields from a [[Row]] object in a pattern match. Example:
    * {{{
@@ -83,9 +84,8 @@ object Row {
   val empty = apply()
 }
 
-
 /**
- * Represents one row of output from a relational operator.  Allows both generic access by ordinal,
+ * Represents one row of output from a relational operator. Allows both generic access by ordinal,
  * which will incur boxing overhead for primitives, as well as native primitive access.
  *
  * It is invalid to use the native primitive interface to retrieve a value that is null, instead a
@@ -103,9 +103,9 @@ object Row {
  * Row.fromSeq(Seq(value1, value2, ...))
  * }}}
  *
- * A value of a row can be accessed through both generic access by ordinal,
- * which will incur boxing overhead for primitives, as well as native primitive access.
- * An example of generic access by ordinal:
+ * A value of a row can be accessed through both generic access by ordinal, which will incur
+ * boxing overhead for primitives, as well as native primitive access. An example of generic
+ * access by ordinal:
  * {{{
  * import org.apache.spark.sql._
  *
@@ -117,10 +117,9 @@ object Row {
  * // fourthValue: Any = null
  * }}}
  *
- * For native primitive access, it is invalid to use the native primitive interface to retrieve
- * a value that is null, instead a user must check `isNullAt` before attempting to retrieve a
- * value that might be null.
- * An example of native primitive access:
+ * For native primitive access, it is invalid to use the native primitive interface to retrieve a
+ * value that is null, instead a user must check `isNullAt` before attempting to retrieve a value
+ * that might be null. An example of native primitive access:
  * {{{
  * // using the row from the previous example.
  * val firstValue = row.getInt(0)
@@ -143,6 +142,7 @@ object Row {
  */
 @Stable
 trait Row extends Serializable {
+
   /** Number of elements in the Row. */
   def size: Int = length
 
@@ -155,8 +155,8 @@ trait Row extends Serializable {
   def schema: StructType = null
 
   /**
-   * Returns the value at position i. If the value is null, null is returned. The following
-   * is a mapping between Spark SQL types and return types:
+   * Returns the value at position i. If the value is null, null is returned. The following is a
+   * mapping between Spark SQL types and return types:
    *
    * {{{
    *   BooleanType -> java.lang.Boolean
@@ -184,8 +184,8 @@ trait Row extends Serializable {
   def apply(i: Int): Any = get(i)
 
   /**
-   * Returns the value at position i. If the value is null, null is returned. The following
-   * is a mapping between Spark SQL types and return types:
+   * Returns the value at position i. If the value is null, null is returned. The following is a
+   * mapping between Spark SQL types and return types:
    *
    * {{{
    *   BooleanType -> java.lang.Boolean
@@ -218,116 +218,145 @@ trait Row extends Serializable {
   /**
    * Returns the value at position i as a primitive boolean.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getBoolean(i: Int): Boolean = getAnyValAs[Boolean](i)
 
   /**
    * Returns the value at position i as a primitive byte.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getByte(i: Int): Byte = getAnyValAs[Byte](i)
 
   /**
    * Returns the value at position i as a primitive short.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getShort(i: Int): Short = getAnyValAs[Short](i)
 
   /**
    * Returns the value at position i as a primitive int.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getInt(i: Int): Int = getAnyValAs[Int](i)
 
   /**
    * Returns the value at position i as a primitive long.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getLong(i: Int): Long = getAnyValAs[Long](i)
 
   /**
-   * Returns the value at position i as a primitive float.
-   * Throws an exception if the type mismatches or if the value is null.
+   * Returns the value at position i as a primitive float. Throws an exception if the type
+   * mismatches or if the value is null.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getFloat(i: Int): Float = getAnyValAs[Float](i)
 
   /**
    * Returns the value at position i as a primitive double.
    *
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   def getDouble(i: Int): Double = getAnyValAs[Double](i)
 
   /**
    * Returns the value at position i as a String object.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getString(i: Int): String = getAs[String](i)
 
   /**
    * Returns the value at position i of decimal type as java.math.BigDecimal.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getDecimal(i: Int): java.math.BigDecimal = getAs[java.math.BigDecimal](i)
 
   /**
    * Returns the value at position i of date type as java.sql.Date.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getDate(i: Int): java.sql.Date = getAs[java.sql.Date](i)
 
   /**
    * Returns the value at position i of date type as java.time.LocalDate.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getLocalDate(i: Int): java.time.LocalDate = getAs[java.time.LocalDate](i)
 
   /**
    * Returns the value at position i of date type as java.sql.Timestamp.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getTimestamp(i: Int): java.sql.Timestamp = getAs[java.sql.Timestamp](i)
 
   /**
    * Returns the value at position i of date type as java.time.Instant.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getInstant(i: Int): java.time.Instant = getAs[java.time.Instant](i)
 
   /**
    * Returns the value at position i of array type as a Scala Seq.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getSeq[T](i: Int): Seq[T] = {
-    val res = getAs[scala.collection.Seq[T]](i)
-    if (res != null) res.toSeq else null
+    getAs[scala.collection.Seq[T]](i) match {
+      // SPARK-49178: When the type of `Seq[T]` is `mutable.ArraySeq[T]`,
+      // rewrap `mutable.ArraySeq[T].array` as `immutable.ArraySeq[T]`
+      // to avoid a collection copy.
+      case seq: mutable.ArraySeq[T] =>
+        seq.array.toImmutableArraySeq.asInstanceOf[Seq[T]]
+      case other if other != null => other.toSeq
+      case _ => null
+    }
   }
 
   /**
    * Returns the value at position i of array type as `java.util.List`.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getList[T](i: Int): java.util.List[T] =
     getSeq[T](i).asJava
@@ -335,14 +364,16 @@ trait Row extends Serializable {
   /**
    * Returns the value at position i of map type as a Scala Map.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getMap[K, V](i: Int): scala.collection.Map[K, V] = getAs[Map[K, V]](i)
 
   /**
    * Returns the value at position i of array type as a `java.util.Map`.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getJavaMap[K, V](i: Int): java.util.Map[K, V] =
     getMap[K, V](i).asJava
@@ -350,48 +381,56 @@ trait Row extends Serializable {
   /**
    * Returns the value at position i of struct type as a [[Row]] object.
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getStruct(i: Int): Row = getAs[Row](i)
 
   /**
-   * Returns the value at position i.
-   * For primitive types if value is null it returns 'zero value' specific for primitive
-   * i.e. 0 for Int - use isNullAt to ensure that value is not null
+   * Returns the value at position i. For primitive types if value is null it returns 'zero value'
+   * specific for primitive i.e. 0 for Int - use isNullAt to ensure that value is not null
    *
-   * @throws ClassCastException when data type does not match.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getAs[T](i: Int): T = get(i).asInstanceOf[T]
 
   /**
-   * Returns the value of a given fieldName.
-   * For primitive types if value is null it returns 'zero value' specific for primitive
-   * i.e. 0 for Int - use isNullAt to ensure that value is not null
+   * Returns the value of a given fieldName. For primitive types if value is null it returns 'zero
+   * value' specific for primitive i.e. 0 for Int - use isNullAt to ensure that value is not null
    *
-   * @throws UnsupportedOperationException when schema is not defined.
-   * @throws IllegalArgumentException when fieldName do not exist.
-   * @throws ClassCastException when data type does not match.
+   * @throws UnsupportedOperationException
+   *   when schema is not defined.
+   * @throws IllegalArgumentException
+   *   when fieldName do not exist.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getAs[T](fieldName: String): T = getAs[T](fieldIndex(fieldName))
 
   /**
    * Returns the index of a given field name.
    *
-   * @throws UnsupportedOperationException when schema is not defined.
-   * @throws IllegalArgumentException when a field `name` does not exist.
+   * @throws UnsupportedOperationException
+   *   when schema is not defined.
+   * @throws IllegalArgumentException
+   *   when a field `name` does not exist.
    */
   def fieldIndex(name: String): Int = {
     throw DataTypeErrors.fieldIndexOnRowWithoutSchemaError(fieldName = name)
   }
 
   /**
-   * Returns a Map consisting of names and values for the requested fieldNames
-   * For primitive types if value is null it returns 'zero value' specific for primitive
-   * i.e. 0 for Int - use isNullAt to ensure that value is not null
+   * Returns a Map consisting of names and values for the requested fieldNames For primitive types
+   * if value is null it returns 'zero value' specific for primitive i.e. 0 for Int - use isNullAt
+   * to ensure that value is not null
    *
-   * @throws UnsupportedOperationException when schema is not defined.
-   * @throws IllegalArgumentException when fieldName do not exist.
-   * @throws ClassCastException when data type does not match.
+   * @throws UnsupportedOperationException
+   *   when schema is not defined.
+   * @throws IllegalArgumentException
+   *   when fieldName do not exist.
+   * @throws ClassCastException
+   *   when data type does not match.
    */
   def getValuesMap[T](fieldNames: Seq[String]): Map[String, T] = {
     fieldNames.map { name =>
@@ -438,24 +477,25 @@ trait Row extends Serializable {
         o1 match {
           case b1: Array[Byte] =>
             if (!o2.isInstanceOf[Array[Byte]] ||
-                !java.util.Arrays.equals(b1, o2.asInstanceOf[Array[Byte]])) {
+              !java.util.Arrays.equals(b1, o2.asInstanceOf[Array[Byte]])) {
               return false
             }
           case f1: Float if java.lang.Float.isNaN(f1) =>
-            if (!o2.isInstanceOf[Float] || ! java.lang.Float.isNaN(o2.asInstanceOf[Float])) {
+            if (!o2.isInstanceOf[Float] || !java.lang.Float.isNaN(o2.asInstanceOf[Float])) {
               return false
             }
           case d1: Double if java.lang.Double.isNaN(d1) =>
-            if (!o2.isInstanceOf[Double] || ! java.lang.Double.isNaN(o2.asInstanceOf[Double])) {
+            if (!o2.isInstanceOf[Double] || !java.lang.Double.isNaN(o2.asInstanceOf[Double])) {
               return false
             }
           case d1: java.math.BigDecimal if o2.isInstanceOf[java.math.BigDecimal] =>
             if (d1.compareTo(o2.asInstanceOf[java.math.BigDecimal]) != 0) {
               return false
             }
-          case _ => if (o1 != o2) {
-            return false
-          }
+          case _ =>
+            if (o1 != o2) {
+              return false
+            }
         }
       }
       i += 1
@@ -498,8 +538,8 @@ trait Row extends Serializable {
   def mkString(sep: String): String = mkString("", sep, "")
 
   /**
-   * Displays all elements of this traversable or iterator in a string using
-   * start, end, and separator strings.
+   * Displays all elements of this traversable or iterator in a string using start, end, and
+   * separator strings.
    */
   def mkString(start: String, sep: String, end: String): String = {
     val n = length
@@ -521,9 +561,12 @@ trait Row extends Serializable {
   /**
    * Returns the value at position i.
    *
-   * @throws UnsupportedOperationException when schema is not defined.
-   * @throws ClassCastException when data type does not match.
-   * @throws NullPointerException when value is null.
+   * @throws UnsupportedOperationException
+   *   when schema is not defined.
+   * @throws ClassCastException
+   *   when data type does not match.
+   * @throws org.apache.spark.SparkRuntimeException
+   *   when value is null.
    */
   private def getAnyValAs[T <: AnyVal](i: Int): T =
     if (isNullAt(i)) throw DataTypeErrors.valueIsNullError(i)
@@ -549,7 +592,8 @@ trait Row extends Serializable {
    * Note that this only supports the data types that are also supported by
    * [[org.apache.spark.sql.catalyst.encoders.RowEncoder]].
    *
-   * @return the JSON representation of the row.
+   * @return
+   *   the JSON representation of the row.
    */
   private[sql] def jsonValue: JValue = {
     require(schema != null, "JSON serialization requires a non-null schema.")
@@ -583,6 +627,7 @@ trait Row extends Serializable {
       case (d: Date, _) => JString(dateFormatter.format(d))
       case (i: Instant, _) => JString(timestampFormatter.format(i))
       case (t: Timestamp, _) => JString(timestampFormatter.format(t))
+      case (d: LocalDateTime, _) => JString(timestampFormatter.format(d))
       case (i: CalendarInterval, _) => JString(i.toString)
       case (a: Array[_], ArrayType(elementType, _)) =>
         iteratorToJsonArray(a.iterator, elementType)
@@ -591,13 +636,12 @@ trait Row extends Serializable {
       case (s: Seq[_], ArrayType(elementType, _)) =>
         iteratorToJsonArray(s.iterator, elementType)
       case (m: Map[String @unchecked, _], MapType(StringType, valueType, _)) =>
-        new JObject(m.toList.sortBy(_._1).map {
-          case (k, v) => k -> toJson(v, valueType)
+        new JObject(m.toList.sortBy(_._1).map { case (k, v) =>
+          k -> toJson(v, valueType)
         })
       case (m: Map[_, _], MapType(keyType, valueType, _)) =>
-        new JArray(m.iterator.map {
-          case (k, v) =>
-            new JObject("key" -> toJson(k, keyType) :: "value" -> toJson(v, valueType) :: Nil)
+        new JArray(m.iterator.map { case (k, v) =>
+          new JObject("key" -> toJson(k, keyType) :: "value" -> toJson(v, valueType) :: Nil)
         }.toList)
       case (row: Row, schema: StructType) =>
         var n = 0
@@ -611,13 +655,13 @@ trait Row extends Serializable {
         new JObject(elements.toList)
       case (v: Any, udt: UserDefinedType[Any @unchecked]) =>
         toJson(UDTUtils.toRow(v, udt), udt.sqlType)
-      case _ => throw new SparkIllegalArgumentException(
-        errorClass = "FAILED_ROW_TO_JSON",
-        messageParameters = Map(
-          "value" -> toSQLValue(value.toString),
-          "class" -> value.getClass.toString,
-          "sqlType" -> toSQLType(dataType.toString))
-      )
+      case _ =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "FAILED_ROW_TO_JSON",
+          messageParameters = Map(
+            "value" -> toSQLValue(value.toString),
+            "class" -> value.getClass.toString,
+            "sqlType" -> toSQLType(dataType.toString)))
     }
     toJson(this, schema)
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
new file mode 100644
index 0000000000000..9e6e0e97f0302
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.annotation.Stable
+
+/**
+ * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
+ *
+ * Options set here are automatically propagated to the Hadoop configuration during I/O.
+ *
+ * @since 2.0.0
+ */
+@Stable
+abstract class RuntimeConfig {
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 2.0.0
+   */
+  def set(key: String, value: String): Unit
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 2.0.0
+   */
+  def set(key: String, value: Boolean): Unit = {
+    set(key, value.toString)
+  }
+
+  /**
+   * Sets the given Spark runtime configuration property.
+   *
+   * @since 2.0.0
+   */
+  def set(key: String, value: Long): Unit = {
+    set(key, value.toString)
+  }
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return its default value if possible, otherwise `NoSuchElementException` will be
+   * thrown.
+   *
+   * @throws java.util.NoSuchElementException
+   *   if the key is not set and does not have a default value
+   * @since 2.0.0
+   */
+  @throws[NoSuchElementException]("if the key is not set and there is no default value")
+  def get(key: String): String
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return the user given `default`. This is useful when its default value defined
+   * by Apache Spark is not the desired one.
+   *
+   * @since 2.0.0
+   */
+  def get(key: String, default: String): String
+
+  /**
+   * Returns all properties set in this conf.
+   *
+   * @since 2.0.0
+   */
+  def getAll: Map[String, String]
+
+  /**
+   * Returns the value of Spark runtime configuration property for the given key. If the key is
+   * not set yet, return its default value if possible, otherwise `None` will be returned.
+   *
+   * @since 2.0.0
+   */
+  def getOption(key: String): Option[String]
+
+  /**
+   * Resets the configuration property for the given key.
+   *
+   * @since 2.0.0
+   */
+  def unset(key: String): Unit
+
+  /**
+   * Indicates whether the configuration property with the given key is modifiable in the current
+   * session.
+   *
+   * @return
+   *   `true` if the configuration property is modifiable. For static SQL, Spark Core, invalid
+   *   (not existing) and other non-modifiable configuration properties, the returned value is
+   *   `false`.
+   * @since 2.4.0
+   */
+  def isModifiable(key: String): Boolean
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala
new file mode 100644
index 0000000000000..a0f51d30dc572
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/Catalog.scala
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java.util
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.catalog.{CatalogMetadata, Column, Database, Function, Table}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.storage.StorageLevel
+
+/**
+ * Catalog interface for Spark. To access this, use `SparkSession.catalog`.
+ *
+ * @since 2.0.0
+ */
+@Stable
+abstract class Catalog {
+
+  /**
+   * Returns the current database (namespace) in this session.
+   *
+   * @since 2.0.0
+   */
+  def currentDatabase: String
+
+  /**
+   * Sets the current database (namespace) in this session.
+   *
+   * @since 2.0.0
+   */
+  def setCurrentDatabase(dbName: String): Unit
+
+  /**
+   * Returns a list of databases (namespaces) available within the current catalog.
+   *
+   * @since 2.0.0
+   */
+  def listDatabases(): Dataset[Database]
+
+  /**
+   * Returns a list of databases (namespaces) which name match the specify pattern and available
+   * within the current catalog.
+   *
+   * @since 3.5.0
+   */
+  def listDatabases(pattern: String): Dataset[Database]
+
+  /**
+   * Returns a list of tables/views in the current database (namespace). This includes all
+   * temporary views.
+   *
+   * @since 2.0.0
+   */
+  def listTables(): Dataset[Table]
+
+  /**
+   * Returns a list of tables/views in the specified database (namespace) (the name can be
+   * qualified with catalog). This includes all temporary views.
+   *
+   * @since 2.0.0
+   */
+  @throws[AnalysisException]("database does not exist")
+  def listTables(dbName: String): Dataset[Table]
+
+  /**
+   * Returns a list of tables/views in the specified database (namespace) which name match the
+   * specify pattern (the name can be qualified with catalog). This includes all temporary views.
+   *
+   * @since 3.5.0
+   */
+  @throws[AnalysisException]("database does not exist")
+  def listTables(dbName: String, pattern: String): Dataset[Table]
+
+  /**
+   * Returns a list of functions registered in the current database (namespace). This includes all
+   * temporary functions.
+   *
+   * @since 2.0.0
+   */
+  def listFunctions(): Dataset[Function]
+
+  /**
+   * Returns a list of functions registered in the specified database (namespace) (the name can be
+   * qualified with catalog). This includes all built-in and temporary functions.
+   *
+   * @since 2.0.0
+   */
+  @throws[AnalysisException]("database does not exist")
+  def listFunctions(dbName: String): Dataset[Function]
+
+  /**
+   * Returns a list of functions registered in the specified database (namespace) which name match
+   * the specify pattern (the name can be qualified with catalog). This includes all built-in and
+   * temporary functions.
+   *
+   * @since 3.5.0
+   */
+  @throws[AnalysisException]("database does not exist")
+  def listFunctions(dbName: String, pattern: String): Dataset[Function]
+
+  /**
+   * Returns a list of columns for the given table/view or temporary view.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. It follows the same
+   *   resolution rule with SQL: search for temp views first then table/views in the current
+   *   database (namespace).
+   * @since 2.0.0
+   */
+  @throws[AnalysisException]("table does not exist")
+  def listColumns(tableName: String): Dataset[Column]
+
+  /**
+   * Returns a list of columns for the given table/view in the specified database under the Hive
+   * Metastore.
+   *
+   * To list columns for table/view in other catalogs, please use `listColumns(tableName)` with
+   * qualified table/view name instead.
+   *
+   * @param dbName
+   *   is an unqualified name that designates a database.
+   * @param tableName
+   *   is an unqualified name that designates a table/view.
+   * @since 2.0.0
+   */
+  @throws[AnalysisException]("database or table does not exist")
+  def listColumns(dbName: String, tableName: String): Dataset[Column]
+
+  /**
+   * Get the database (namespace) with the specified name (can be qualified with catalog). This
+   * throws an AnalysisException when the database (namespace) cannot be found.
+   *
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]("database does not exist")
+  def getDatabase(dbName: String): Database
+
+  /**
+   * Get the table or view with the specified name. This table can be a temporary view or a
+   * table/view. This throws an AnalysisException when no Table can be found.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. It follows the same
+   *   resolution rule with SQL: search for temp views first then table/views in the current
+   *   database (namespace).
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]("table does not exist")
+  def getTable(tableName: String): Table
+
+  /**
+   * Get the table or view with the specified name in the specified database under the Hive
+   * Metastore. This throws an AnalysisException when no Table can be found.
+   *
+   * To get table/view in other catalogs, please use `getTable(tableName)` with qualified
+   * table/view name instead.
+   *
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]("database or table does not exist")
+  def getTable(dbName: String, tableName: String): Table
+
+  /**
+   * Get the function with the specified name. This function can be a temporary function or a
+   * function. This throws an AnalysisException when the function cannot be found.
+   *
+   * @param functionName
+   *   is either a qualified or unqualified name that designates a function. It follows the same
+   *   resolution rule with SQL: search for built-in/temp functions first then functions in the
+   *   current database (namespace).
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]("function does not exist")
+  def getFunction(functionName: String): Function
+
+  /**
+   * Get the function with the specified name in the specified database under the Hive Metastore.
+   * This throws an AnalysisException when the function cannot be found.
+   *
+   * To get functions in other catalogs, please use `getFunction(functionName)` with qualified
+   * function name instead.
+   *
+   * @param dbName
+   *   is an unqualified name that designates a database.
+   * @param functionName
+   *   is an unqualified name that designates a function in the specified database
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]("database or function does not exist")
+  def getFunction(dbName: String, functionName: String): Function
+
+  /**
+   * Check if the database (namespace) with the specified name exists (the name can be qualified
+   * with catalog).
+   *
+   * @since 2.1.0
+   */
+  def databaseExists(dbName: String): Boolean
+
+  /**
+   * Check if the table or view with the specified name exists. This can either be a temporary
+   * view or a table/view.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. It follows the same
+   *   resolution rule with SQL: search for temp views first then table/views in the current
+   *   database (namespace).
+   * @since 2.1.0
+   */
+  def tableExists(tableName: String): Boolean
+
+  /**
+   * Check if the table or view with the specified name exists in the specified database under the
+   * Hive Metastore.
+   *
+   * To check existence of table/view in other catalogs, please use `tableExists(tableName)` with
+   * qualified table/view name instead.
+   *
+   * @param dbName
+   *   is an unqualified name that designates a database.
+   * @param tableName
+   *   is an unqualified name that designates a table.
+   * @since 2.1.0
+   */
+  def tableExists(dbName: String, tableName: String): Boolean
+
+  /**
+   * Check if the function with the specified name exists. This can either be a temporary function
+   * or a function.
+   *
+   * @param functionName
+   *   is either a qualified or unqualified name that designates a function. It follows the same
+   *   resolution rule with SQL: search for built-in/temp functions first then functions in the
+   *   current database (namespace).
+   * @since 2.1.0
+   */
+  def functionExists(functionName: String): Boolean
+
+  /**
+   * Check if the function with the specified name exists in the specified database under the Hive
+   * Metastore.
+   *
+   * To check existence of functions in other catalogs, please use `functionExists(functionName)`
+   * with qualified function name instead.
+   *
+   * @param dbName
+   *   is an unqualified name that designates a database.
+   * @param functionName
+   *   is an unqualified name that designates a function.
+   * @since 2.1.0
+   */
+  def functionExists(dbName: String, functionName: String): Boolean
+
+  /**
+   * Creates a table from the given path and returns the corresponding DataFrame. It will use the
+   * default data source configured by spark.sql.sources.default.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(tableName: String, path: String): Dataset[Row] = {
+    createTable(tableName, path)
+  }
+
+  /**
+   * Creates a table from the given path and returns the corresponding DataFrame. It will use the
+   * default data source configured by spark.sql.sources.default.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(tableName: String, path: String): Dataset[Row]
+
+  /**
+   * Creates a table from the given path based on a data source and returns the corresponding
+   * DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(tableName: String, path: String, source: String): Dataset[Row] = {
+    createTable(tableName, path, source)
+  }
+
+  /**
+   * Creates a table from the given path based on a data source and returns the corresponding
+   * DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(tableName: String, path: String, source: String): Dataset[Row]
+
+  /**
+   * Creates a table from the given path based on a data source and a set of options. Then,
+   * returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, options)
+  }
+
+  /**
+   * Creates a table based on the dataset in a data source and a set of options. Then, returns the
+   * corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, options.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Creates a table from the given path based on a data source and a set of
+   * options. Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      options: Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, options)
+  }
+
+  /**
+   * (Scala-specific) Creates a table based on the dataset in a data source and a set of options.
+   * Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(tableName: String, source: String, options: Map[String, String]): Dataset[Row]
+
+  /**
+   * Create a table from the given path based on a data source, a schema and a set of options.
+   * Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, schema, options)
+  }
+
+  /**
+   * Creates a table based on the dataset in a data source and a set of options. Then, returns the
+   * corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 3.1.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      description: String,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(
+      tableName,
+      source = source,
+      description = description,
+      options = options.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Creates a table based on the dataset in a data source and a set of options.
+   * Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 3.1.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      description: String,
+      options: Map[String, String]): Dataset[Row]
+
+  /**
+   * Create a table based on the dataset in a data source, a schema and a set of options. Then,
+   * returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, schema, options.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Create a table from the given path based on a data source, a schema and a
+   * set of options. Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.0.0
+   */
+  @deprecated("use createTable instead.", "2.2.0")
+  def createExternalTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row] = {
+    createTable(tableName, source, schema, options)
+  }
+
+  /**
+   * (Scala-specific) Create a table based on the dataset in a data source, a schema and a set of
+   * options. Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.2.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      options: Map[String, String]): Dataset[Row]
+
+  /**
+   * Create a table based on the dataset in a data source, a schema and a set of options. Then,
+   * returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 3.1.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      description: String,
+      options: util.Map[String, String]): Dataset[Row] = {
+    createTable(
+      tableName,
+      source = source,
+      schema = schema,
+      description = description,
+      options = options.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Create a table based on the dataset in a data source, a schema and a set of
+   * options. Then, returns the corresponding DataFrame.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 3.1.0
+   */
+  def createTable(
+      tableName: String,
+      source: String,
+      schema: StructType,
+      description: String,
+      options: Map[String, String]): Dataset[Row]
+
+  /**
+   * Drops the local temporary view with the given view name in the catalog. If the view has been
+   * cached before, then it will also be uncached.
+   *
+   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
+   * created it, i.e. it will be automatically dropped when the session terminates. It's not tied
+   * to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
+   *
+   * Note that, the return type of this method was Unit in Spark 2.0, but changed to Boolean in
+   * Spark 2.1.
+   *
+   * @param viewName
+   *   the name of the temporary view to be dropped.
+   * @return
+   *   true if the view is dropped successfully, false otherwise.
+   * @since 2.0.0
+   */
+  def dropTempView(viewName: String): Boolean
+
+  /**
+   * Drops the global temporary view with the given view name in the catalog. If the view has been
+   * cached before, then it will also be uncached.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
+   * application, i.e. it will be automatically dropped when the application terminates. It's tied
+   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
+   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
+   *
+   * @param viewName
+   *   the unqualified name of the temporary view to be dropped.
+   * @return
+   *   true if the view is dropped successfully, false otherwise.
+   * @since 2.1.0
+   */
+  def dropGlobalTempView(viewName: String): Boolean
+
+  /**
+   * Recovers all the partitions in the directory of a table and update the catalog. Only works
+   * with a partitioned table, and not a view.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table. If no database
+   *   identifier is provided, it refers to a table in the current database.
+   * @since 2.1.1
+   */
+  def recoverPartitions(tableName: String): Unit
+
+  /**
+   * Returns true if the table is currently cached in-memory.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. If no database
+   *   identifier is provided, it refers to a temporary view or a table/view in the current
+   *   database.
+   * @since 2.0.0
+   */
+  def isCached(tableName: String): Boolean
+
+  /**
+   * Caches the specified table in-memory.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. If no database
+   *   identifier is provided, it refers to a temporary view or a table/view in the current
+   *   database.
+   * @since 2.0.0
+   */
+  def cacheTable(tableName: String): Unit
+
+  /**
+   * Caches the specified table with the given storage level.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. If no database
+   *   identifier is provided, it refers to a temporary view or a table/view in the current
+   *   database.
+   * @param storageLevel
+   *   storage level to cache table.
+   * @since 2.3.0
+   */
+  def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
+
+  /**
+   * Removes the specified table from the in-memory cache.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. If no database
+   *   identifier is provided, it refers to a temporary view or a table/view in the current
+   *   database.
+   * @since 2.0.0
+   */
+  def uncacheTable(tableName: String): Unit
+
+  /**
+   * Removes all cached tables from the in-memory cache.
+   *
+   * @since 2.0.0
+   */
+  def clearCache(): Unit
+
+  /**
+   * Invalidates and refreshes all the cached data and metadata of the given table. For
+   * performance reasons, Spark SQL or the external data source library it uses might cache
+   * certain metadata about a table, such as the location of blocks. When those change outside of
+   * Spark SQL, users should call this function to invalidate the cache.
+   *
+   * If this table is cached as an InMemoryRelation, drop the original cached version and make the
+   * new version cached lazily.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table/view. If no database
+   *   identifier is provided, it refers to a temporary view or a table/view in the current
+   *   database.
+   * @since 2.0.0
+   */
+  def refreshTable(tableName: String): Unit
+
+  /**
+   * Invalidates and refreshes all the cached data (and the associated metadata) for any `Dataset`
+   * that contains the given data source path. Path matching is by checking for sub-directories,
+   * i.e. "/" would invalidate everything that is cached and "/test/parent" would invalidate
+   * everything that is a subdirectory of "/test/parent".
+   *
+   * @since 2.0.0
+   */
+  def refreshByPath(path: String): Unit
+
+  /**
+   * Returns the current catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  def currentCatalog(): String
+
+  /**
+   * Sets the current catalog in this session.
+   *
+   * @since 3.4.0
+   */
+  def setCurrentCatalog(catalogName: String): Unit
+
+  /**
+   * Returns a list of catalogs available in this session.
+   *
+   * @since 3.4.0
+   */
+  def listCatalogs(): Dataset[CatalogMetadata]
+
+  /**
+   * Returns a list of catalogs which name match the specify pattern and available in this
+   * session.
+   *
+   * @since 3.5.0
+   */
+  def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameNaFunctions.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameNaFunctions.scala
new file mode 100644
index 0000000000000..ef6cc64c058a4
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameNaFunctions.scala
@@ -0,0 +1,362 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java.util
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.Row
+import org.apache.spark.util.ArrayImplicits._
+
+/**
+ * Functionality for working with missing data in `DataFrame`s.
+ *
+ * @since 1.3.1
+ */
+@Stable
+abstract class DataFrameNaFunctions {
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values.
+   *
+   * @since 1.3.1
+   */
+  def drop(): Dataset[Row] = drop("any")
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing null or NaN values.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values. If `how` is "all", then
+   * drop rows only if every column is null or NaN for that row.
+   *
+   * @since 1.3.1
+   */
+  def drop(how: String): Dataset[Row] = drop(toMinNonNulls(how))
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing any null or NaN values in the specified
+   * columns.
+   *
+   * @since 1.3.1
+   */
+  def drop(cols: Array[String]): Dataset[Row] = drop(cols.toImmutableArraySeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values
+   * in the specified columns.
+   *
+   * @since 1.3.1
+   */
+  def drop(cols: Seq[String]): Dataset[Row] = drop(cols.size, cols)
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing null or NaN values in the specified
+   * columns.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
+   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
+   *
+   * @since 1.3.1
+   */
+  def drop(how: String, cols: Array[String]): Dataset[Row] = drop(how, cols.toImmutableArraySeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values in
+   * the specified columns.
+   *
+   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
+   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
+   *
+   * @since 1.3.1
+   */
+  def drop(how: String, cols: Seq[String]): Dataset[Row] = drop(toMinNonNulls(how), cols)
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
+   * non-NaN values.
+   *
+   * @since 1.3.1
+   */
+  def drop(minNonNulls: Int): Dataset[Row] = drop(Option(minNonNulls))
+
+  /**
+   * Returns a new `DataFrame` that drops rows containing less than `minNonNulls` non-null and
+   * non-NaN values in the specified columns.
+   *
+   * @since 1.3.1
+   */
+  def drop(minNonNulls: Int, cols: Array[String]): Dataset[Row] =
+    drop(minNonNulls, cols.toImmutableArraySeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than `minNonNulls`
+   * non-null and non-NaN values in the specified columns.
+   *
+   * @since 1.3.1
+   */
+  def drop(minNonNulls: Int, cols: Seq[String]): Dataset[Row] = drop(Option(minNonNulls), cols)
+
+  private def toMinNonNulls(how: String): Option[Int] = {
+    how.toLowerCase(util.Locale.ROOT) match {
+      case "any" => None // No-Op. Do nothing.
+      case "all" => Some(1)
+      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
+    }
+  }
+
+  protected def drop(minNonNulls: Option[Int]): Dataset[Row]
+
+  protected def drop(minNonNulls: Option[Int], cols: Seq[String]): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
+   *
+   * @since 2.2.0
+   */
+  def fill(value: Long): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
+   * @since 1.3.1
+   */
+  def fill(value: Double): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in string columns with `value`.
+   *
+   * @since 1.3.1
+   */
+  def fill(value: String): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
+   * specified column is not a numeric column, it is ignored.
+   *
+   * @since 2.2.0
+   */
+  def fill(value: Long, cols: Array[String]): Dataset[Row] = fill(value, cols.toImmutableArraySeq)
+
+  /**
+   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns. If a
+   * specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
+   */
+  def fill(value: Double, cols: Array[String]): Dataset[Row] =
+    fill(value, cols.toImmutableArraySeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 2.2.0
+   */
+  def fill(value: Long, cols: Seq[String]): Dataset[Row]
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
+   * numeric columns. If a specified column is not a numeric column, it is ignored.
+   *
+   * @since 1.3.1
+   */
+  def fill(value: Double, cols: Seq[String]): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in specified string columns. If a
+   * specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
+   */
+  def fill(value: String, cols: Array[String]): Dataset[Row] =
+    fill(value, cols.toImmutableArraySeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified string
+   * columns. If a specified column is not a string column, it is ignored.
+   *
+   * @since 1.3.1
+   */
+  def fill(value: String, cols: Seq[String]): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in boolean columns with `value`.
+   *
+   * @since 2.3.0
+   */
+  def fill(value: Boolean): Dataset[Row]
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified boolean
+   * columns. If a specified column is not a boolean column, it is ignored.
+   *
+   * @since 2.3.0
+   */
+  def fill(value: Boolean, cols: Seq[String]): Dataset[Row]
+
+  /**
+   * Returns a new `DataFrame` that replaces null values in specified boolean columns. If a
+   * specified column is not a boolean column, it is ignored.
+   *
+   * @since 2.3.0
+   */
+  def fill(value: Boolean, cols: Array[String]): Dataset[Row] =
+    fill(value, cols.toImmutableArraySeq)
+
+  /**
+   * Returns a new `DataFrame` that replaces null values.
+   *
+   * The key of the map is the column name, and the value of the map is the replacement value. The
+   * value must be of the following type: `Integer`, `Long`, `Float`, `Double`, `String`,
+   * `Boolean`. Replacement values are cast to the column data type.
+   *
+   * For example, the following replaces null values in column "A" with string "unknown", and null
+   * values in column "B" with numeric value 1.0.
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
+   * }}}
+   *
+   * @since 1.3.1
+   */
+  def fill(valueMap: util.Map[String, Any]): Dataset[Row] = fillMap(valueMap.asScala.toSeq)
+
+  /**
+   * (Scala-specific) Returns a new `DataFrame` that replaces null values.
+   *
+   * The key of the map is the column name, and the value of the map is the replacement value. The
+   * value must be of the following type: `Int`, `Long`, `Float`, `Double`, `String`, `Boolean`.
+   * Replacement values are cast to the column data type.
+   *
+   * For example, the following replaces null values in column "A" with string "unknown", and null
+   * values in column "B" with numeric value 1.0.
+   * {{{
+   *   df.na.fill(Map(
+   *     "A" -> "unknown",
+   *     "B" -> 1.0
+   *   ))
+   * }}}
+   *
+   * @since 1.3.1
+   */
+  def fill(valueMap: Map[String, Any]): Dataset[Row] = fillMap(valueMap.toSeq)
+
+  protected def fillMap(values: Seq[(String, Any)]): Dataset[Row]
+
+  /**
+   * Replaces values matching keys in `replacement` map with the corresponding values.
+   *
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
+   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
+   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
+   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
+   * }}}
+   *
+   * @param col
+   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
+   *   on all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   *
+   * @since 1.3.1
+   */
+  def replace[T](col: String, replacement: util.Map[T, T]): Dataset[Row] = {
+    replace[T](col, replacement.asScala.toMap)
+  }
+
+  /**
+   * Replaces values matching keys in `replacement` map with the corresponding values.
+   *
+   * {{{
+   *   import com.google.common.collect.ImmutableMap;
+   *
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
+   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
+   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
+   * }}}
+   *
+   * @param cols
+   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
+   *   all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   *
+   * @since 1.3.1
+   */
+  def replace[T](cols: Array[String], replacement: util.Map[T, T]): Dataset[Row] = {
+    replace(cols.toImmutableArraySeq, replacement.asScala.toMap)
+  }
+
+  /**
+   * (Scala-specific) Replaces values matching keys in `replacement` map.
+   *
+   * {{{
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
+   *   df.na.replace("height", Map(1.0 -> 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
+   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
+   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
+   * }}}
+   *
+   * @param col
+   *   name of the column to apply the value replacement. If `col` is "*", replacement is applied
+   *   on all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   *
+   * @since 1.3.1
+   */
+  def replace[T](col: String, replacement: Map[T, T]): Dataset[Row]
+
+  /**
+   * (Scala-specific) Replaces values matching keys in `replacement` map.
+   *
+   * {{{
+   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
+   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
+   *
+   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
+   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
+   * }}}
+   *
+   * @param cols
+   *   list of columns to apply the value replacement. If `col` is "*", replacement is applied on
+   *   all string, numeric or boolean columns.
+   * @param replacement
+   *   value replacement map. Key and value of `replacement` map must have the same type, and can
+   *   only be doubles, strings or booleans. The map value can have nulls.
+   *
+   * @since 1.3.1
+   */
+  def replace[T](cols: Seq[String], replacement: Map[T, T]): Dataset[Row]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameReader.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameReader.scala
new file mode 100644
index 0000000000000..8c88387714228
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameReader.scala
@@ -0,0 +1,605 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java.util
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.StringEncoder
+import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, SparkCharVarcharUtils}
+import org.apache.spark.sql.errors.DataTypeErrors
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Interface used to load a [[Dataset]] from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.read` to access this.
+ *
+ * @since 1.4.0
+ */
+@Stable
+abstract class DataFrameReader {
+  type DS[U] <: Dataset[U]
+
+  /**
+   * Specifies the input data source format.
+   *
+   * @since 1.4.0
+   */
+  def format(source: String): this.type = {
+    this.source = source
+    this
+  }
+
+  /**
+   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
+   * automatically from data. By specifying the schema here, the underlying data source can skip
+   * the schema inference step, and thus speed up data loading.
+   *
+   * @since 1.4.0
+   */
+  def schema(schema: StructType): this.type = {
+    if (schema != null) {
+      val replaced = SparkCharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
+      this.userSpecifiedSchema = Option(replaced)
+      validateSingleVariantColumn()
+    }
+    this
+  }
+
+  /**
+   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON)
+   * can infer the input schema automatically from data. By specifying the schema here, the
+   * underlying data source can skip the schema inference step, and thus speed up data loading.
+   *
+   * {{{
+   *   spark.read.schema("a INT, b STRING, c DOUBLE").csv("test.csv")
+   * }}}
+   *
+   * @since 2.3.0
+   */
+  def schema(schemaString: String): this.type = schema(StructType.fromDDL(schemaString))
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 1.4.0
+   */
+  def option(key: String, value: String): this.type = {
+    this.extraOptions = this.extraOptions + (key -> value)
+    validateSingleVariantColumn()
+    this
+  }
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Boolean): this.type = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Long): this.type = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Double): this.type = option(key, value.toString)
+
+  /**
+   * (Scala-specific) Adds input options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 1.4.0
+   */
+  def options(options: scala.collection.Map[String, String]): this.type = {
+    this.extraOptions ++= options
+    validateSingleVariantColumn()
+    this
+  }
+
+  /**
+   * Adds input options for the underlying data source.
+   *
+   * All options are maintained in a case-insensitive way in terms of key names. If a new option
+   * has the same key case-insensitively, it will override the existing option.
+   *
+   * @since 1.4.0
+   */
+  def options(opts: util.Map[String, String]): this.type = options(opts.asScala)
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external
+   * key-value stores).
+   *
+   * @since 1.4.0
+   */
+  def load(): Dataset[Row]
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by a
+   * local or distributed file system).
+   *
+   * @since 1.4.0
+   */
+  def load(path: String): Dataset[Row]
+
+  /**
+   * Loads input in as a `DataFrame`, for data sources that support multiple paths. Only works if
+   * the source is a HadoopFsRelationProvider.
+   *
+   * @since 1.6.0
+   */
+  @scala.annotation.varargs
+  def load(paths: String*): Dataset[Row]
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table and connection properties.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 1.4.0
+   */
+  def jdbc(url: String, table: String, properties: util.Properties): Dataset[Row] = {
+    assertNoSpecifiedSchema("jdbc")
+    // properties should override settings in extraOptions.
+    this.extraOptions ++= properties.asScala
+    // explicit url and dbtable should override all
+    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
+    format("jdbc").load()
+  }
+
+  // scalastyle:off line.size.limit
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table. Partitions of the table will be retrieved in parallel based on the parameters passed
+   * to this function.
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param table
+   *   Name of the table in the external database.
+   * @param columnName
+   *   Alias of `partitionColumn` option. Refer to `partitionColumn` in <a
+   *   href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   *   Data Source Option</a> in the version you use.
+   * @param connectionProperties
+   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
+   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
+   *   number of rows per fetch and "queryTimeout" can be used to wait for a Statement object to
+   *   execute to the given number of seconds.
+   * @since 1.4.0
+   */
+  // scalastyle:on line.size.limit
+  def jdbc(
+      url: String,
+      table: String,
+      columnName: String,
+      lowerBound: Long,
+      upperBound: Long,
+      numPartitions: Int,
+      connectionProperties: util.Properties): Dataset[Row] = {
+    // columnName, lowerBound, upperBound and numPartitions override settings in extraOptions.
+    this.extraOptions ++= Map(
+      "partitionColumn" -> columnName,
+      "lowerBound" -> lowerBound.toString,
+      "upperBound" -> upperBound.toString,
+      "numPartitions" -> numPartitions.toString)
+    jdbc(url, table, connectionProperties)
+  }
+
+  /**
+   * Construct a `DataFrame` representing the database table accessible via JDBC URL url named
+   * table using connection properties. The `predicates` parameter gives a list expressions
+   * suitable for inclusion in WHERE clauses; each one defines one partition of the `DataFrame`.
+   *
+   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
+   * your external database systems.
+   *
+   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC
+   * in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param table
+   *   Name of the table in the external database.
+   * @param predicates
+   *   Condition in the where clause for each partition.
+   * @param connectionProperties
+   *   JDBC database connection arguments, a list of arbitrary string tag/value. Normally at least
+   *   a "user" and "password" property should be included. "fetchsize" can be used to control the
+   *   number of rows per fetch.
+   * @since 1.4.0
+   */
+  def jdbc(
+      url: String,
+      table: String,
+      predicates: Array[String],
+      connectionProperties: util.Properties): Dataset[Row]
+
+  /**
+   * Loads a JSON file and returns the results as a `DataFrame`.
+   *
+   * See the documentation on the overloaded `json()` method with varargs for more details.
+   *
+   * @since 1.4.0
+   */
+  def json(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    json(Seq(path): _*)
+  }
+
+  /**
+   * Loads JSON files and returns the results as a `DataFrame`.
+   *
+   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
+   * default. For JSON (one record per file), set the `multiLine` option to true.
+   *
+   * This function goes through the input once to determine the input schema. If you know the
+   * schema in advance, use the version that specifies the schema to avoid the extra scan.
+   *
+   * You can find the JSON-specific options for reading JSON files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def json(paths: String*): Dataset[Row] = {
+    validateJsonSchema()
+    format("json").load(paths: _*)
+  }
+
+  /**
+   * Loads a `Dataset[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
+   *
+   * Unless the schema is specified using `schema` function, this function goes through the input
+   * once to determine the input schema.
+   *
+   * @param jsonDataset
+   *   input Dataset with one JSON object per record
+   * @since 2.2.0
+   */
+  def json(jsonDataset: DS[String]): Dataset[Row]
+
+  /**
+   * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
+   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
+   *
+   * Unless the schema is specified using `schema` function, this function goes through the input
+   * once to determine the input schema.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @param jsonRDD
+   *   input RDD with one JSON object per record
+   * @since 1.4.0
+   */
+  @deprecated("Use json(Dataset[String]) instead.", "2.2.0")
+  def json(jsonRDD: JavaRDD[String]): DS[Row]
+
+  /**
+   * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines text
+   * format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
+   *
+   * Unless the schema is specified using `schema` function, this function goes through the input
+   * once to determine the input schema.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @param jsonRDD
+   *   input RDD with one JSON object per record
+   * @since 1.4.0
+   */
+  @deprecated("Use json(Dataset[String]) instead.", "2.2.0")
+  def json(jsonRDD: RDD[String]): DS[Row]
+
+  /**
+   * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the other
+   * overloaded `csv()` method for more details.
+   *
+   * @since 2.0.0
+   */
+  def csv(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    csv(Seq(path): _*)
+  }
+
+  /**
+   * Loads an `Dataset[String]` storing CSV rows and returns the result as a `DataFrame`.
+   *
+   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
+   * this function goes through the input once to determine the input schema.
+   *
+   * If the schema is not specified using `schema` function and `inferSchema` option is disabled,
+   * it determines the columns as string types and it reads only the first line to determine the
+   * names and the number of fields.
+   *
+   * If the enforceSchema is set to `false`, only the CSV header in the first line is checked to
+   * conform specified or inferred schema.
+   *
+   * @note
+   *   if `header` option is set to `true` when calling this API, all lines same with the header
+   *   will be removed if exists.
+   *
+   * @param csvDataset
+   *   input Dataset with one CSV row per record
+   * @since 2.2.0
+   */
+  def csv(csvDataset: DS[String]): Dataset[Row]
+
+  /**
+   * Loads CSV files and returns the result as a `DataFrame`.
+   *
+   * This function will go through the input once to determine the input schema if `inferSchema`
+   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
+   * specify the schema explicitly using `schema`.
+   *
+   * You can find the CSV-specific options for reading CSV files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def csv(paths: String*): Dataset[Row] = format("csv").load(paths: _*)
+
+  /**
+   * Loads a XML file and returns the result as a `DataFrame`. See the documentation on the other
+   * overloaded `xml()` method for more details.
+   *
+   * @since 4.0.0
+   */
+  def xml(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    xml(Seq(path): _*)
+  }
+
+  /**
+   * Loads XML files and returns the result as a `DataFrame`.
+   *
+   * This function will go through the input once to determine the input schema if `inferSchema`
+   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
+   * specify the schema explicitly using `schema`.
+   *
+   * You can find the XML-specific options for reading XML files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def xml(paths: String*): Dataset[Row] = {
+    validateXmlSchema()
+    format("xml").load(paths: _*)
+  }
+
+  /**
+   * Loads an `Dataset[String]` storing XML object and returns the result as a `DataFrame`.
+   *
+   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
+   * this function goes through the input once to determine the input schema.
+   *
+   * @param xmlDataset
+   *   input Dataset with one XML object per record
+   * @since 4.0.0
+   */
+  def xml(xmlDataset: DS[String]): Dataset[Row]
+
+  /**
+   * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation on the
+   * other overloaded `parquet()` method for more details.
+   *
+   * @since 2.0.0
+   */
+  def parquet(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    parquet(Seq(path): _*)
+  }
+
+  /**
+   * Loads a Parquet file, returning the result as a `DataFrame`.
+   *
+   * Parquet-specific option(s) for reading Parquet files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 1.4.0
+   */
+  @scala.annotation.varargs
+  def parquet(paths: String*): Dataset[Row] = format("parquet").load(paths: _*)
+
+  /**
+   * Loads an ORC file and returns the result as a `DataFrame`.
+   *
+   * @param path
+   *   input path
+   * @since 1.5.0
+   */
+  def orc(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    orc(Seq(path): _*)
+  }
+
+  /**
+   * Loads ORC files and returns the result as a `DataFrame`.
+   *
+   * ORC-specific option(s) for reading ORC files can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @param paths
+   *   input paths
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def orc(paths: String*): Dataset[Row] = format("orc").load(paths: _*)
+
+  /**
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
+   * view, the returned DataFrame is simply the query plan of the view, which can either be a
+   * batch or streaming query plan.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table or view. If a database is
+   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
+   *   find a temporary view with the given name and then match the table/view from the current
+   *   database. Note that, the global temporary view database is also valid here.
+   * @since 1.4.0
+   */
+  def table(tableName: String): Dataset[Row]
+
+  /**
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
+   * "value", and followed by partitioned columns if there are any. See the documentation on the
+   * other overloaded `text()` method for more details.
+   *
+   * @since 2.0.0
+   */
+  def text(path: String): Dataset[Row] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    text(Seq(path): _*)
+  }
+
+  /**
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
+   * "value", and followed by partitioned columns if there are any. The text files must be encoded
+   * as UTF-8.
+   *
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
+   * {{{
+   *   // Scala:
+   *   spark.read.text("/path/to/spark/README.md")
+   *
+   *   // Java:
+   *   spark.read().text("/path/to/spark/README.md")
+   * }}}
+   *
+   * You can find the text-specific options for reading text files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @param paths
+   *   input paths
+   * @since 1.6.0
+   */
+  @scala.annotation.varargs
+  def text(paths: String*): Dataset[Row] = format("text").load(paths: _*)
+
+  /**
+   * Loads text files and returns a [[Dataset]] of String. See the documentation on the other
+   * overloaded `textFile()` method for more details.
+   * @since 2.0.0
+   */
+  def textFile(path: String): Dataset[String] = {
+    // This method ensures that calls that explicit need single argument works, see SPARK-16009
+    textFile(Seq(path): _*)
+  }
+
+  /**
+   * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset
+   * contains a single string column named "value". The text files must be encoded as UTF-8.
+   *
+   * If the directory structure of the text files contains partitioning information, those are
+   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
+   *
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
+   * {{{
+   *   // Scala:
+   *   spark.read.textFile("/path/to/spark/README.md")
+   *
+   *   // Java:
+   *   spark.read().textFile("/path/to/spark/README.md")
+   * }}}
+   *
+   * You can set the text-specific options as specified in `DataFrameReader.text`.
+   *
+   * @param paths
+   *   input path
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def textFile(paths: String*): Dataset[String] = {
+    assertNoSpecifiedSchema("textFile")
+    text(paths: _*).select("value").as(StringEncoder)
+  }
+
+  /**
+   * A convenient function for schema validation in APIs.
+   */
+  protected def assertNoSpecifiedSchema(operation: String): Unit = {
+    if (userSpecifiedSchema.nonEmpty) {
+      throw DataTypeErrors.userSpecifiedSchemaUnsupportedError(operation)
+    }
+  }
+
+  /**
+   * Ensure that the `singleVariantColumn` option cannot be used if there is also a user specified
+   * schema.
+   */
+  protected def validateSingleVariantColumn(): Unit = ()
+
+  protected def validateJsonSchema(): Unit = ()
+
+  protected def validateXmlSchema(): Unit = ()
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  protected var source: String = _
+
+  protected var userSpecifiedSchema: Option[StructType] = None
+
+  protected var extraOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap[String](Map.empty)
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameStatFunctions.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameStatFunctions.scala
new file mode 100644
index 0000000000000..ae7c256b30ace
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/DataFrameStatFunctions.scala
@@ -0,0 +1,586 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java.{lang => jl, util => ju}
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.{Column, Row}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.BinaryEncoder
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
+import org.apache.spark.sql.functions.{count_min_sketch, lit}
+import org.apache.spark.util.ArrayImplicits.SparkArrayOps
+import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
+
+/**
+ * Statistic functions for `DataFrame`s.
+ *
+ * @since 1.4.0
+ */
+@Stable
+abstract class DataFrameStatFunctions {
+  protected def df: Dataset[Row]
+
+  /**
+   * Calculates the approximate quantiles of a numerical column of a DataFrame.
+   *
+   * The result of this algorithm has the following deterministic bound: If the DataFrame has N
+   * elements and if we request the quantile at probability `p` up to error `err`, then the
+   * algorithm will return a sample `x` from the DataFrame so that the *exact* rank of `x` is
+   * close to (p * N). More precisely,
+   *
+   * {{{
+   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N)
+   * }}}
+   *
+   * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
+   * optimizations). The algorithm was first present in <a
+   * href="https://doi.org/10.1145/375663.375670"> Space-efficient Online Computation of Quantile
+   * Summaries</a> by Greenwald and Khanna.
+   *
+   * @param col
+   *   the name of the numerical column
+   * @param probabilities
+   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
+   *   minimum, 0.5 is the median, 1 is the maximum.
+   * @param relativeError
+   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
+   *   exact quantiles are computed, which could be very expensive. Note that values greater than
+   *   1 are accepted but give the same result as 1.
+   * @return
+   *   the approximate quantiles at the given probabilities
+   *
+   * @note
+   *   null and NaN values will be removed from the numerical column before calculation. If the
+   *   dataframe is empty or the column only contains null or NaN, an empty array is returned.
+   *
+   * @since 2.0.0
+   */
+  def approxQuantile(
+      col: String,
+      probabilities: Array[Double],
+      relativeError: Double): Array[Double] = withOrigin {
+    approxQuantile(Array(col), probabilities, relativeError).head
+  }
+
+  /**
+   * Calculates the approximate quantiles of numerical columns of a DataFrame.
+   * @see
+   *   `approxQuantile(col:Str* approxQuantile)` for detailed description.
+   *
+   * @param cols
+   *   the names of the numerical columns
+   * @param probabilities
+   *   a list of quantile probabilities Each number must belong to [0, 1]. For example 0 is the
+   *   minimum, 0.5 is the median, 1 is the maximum.
+   * @param relativeError
+   *   The relative target precision to achieve (greater than or equal to 0). If set to zero, the
+   *   exact quantiles are computed, which could be very expensive. Note that values greater than
+   *   1 are accepted but give the same result as 1.
+   * @return
+   *   the approximate quantiles at the given probabilities of each column
+   *
+   * @note
+   *   null and NaN values will be ignored in numerical columns before calculation. For columns
+   *   only containing null or NaN values, an empty array is returned.
+   *
+   * @since 2.2.0
+   */
+  def approxQuantile(
+      cols: Array[String],
+      probabilities: Array[Double],
+      relativeError: Double): Array[Array[Double]]
+
+  /**
+   * Calculate the sample covariance of two numerical columns of a DataFrame.
+   *
+   * @param col1
+   *   the name of the first column
+   * @param col2
+   *   the name of the second column
+   * @return
+   *   the covariance of the two columns.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.cov("rand1", "rand2")
+   *    res1: Double = 0.065...
+   * }}}
+   * @since 1.4.0
+   */
+  def cov(col1: String, col2: String): Double
+
+  /**
+   * Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
+   * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
+   * MLlib's Statistics.
+   *
+   * @param col1
+   *   the name of the column
+   * @param col2
+   *   the name of the column to calculate the correlation against
+   * @return
+   *   The Pearson Correlation Coefficient as a Double.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.corr("rand1", "rand2")
+   *    res1: Double = 0.613...
+   * }}}
+   * @since 1.4.0
+   */
+  def corr(col1: String, col2: String, method: String): Double
+
+  /**
+   * Calculates the Pearson Correlation Coefficient of two columns of a DataFrame.
+   *
+   * @param col1
+   *   the name of the column
+   * @param col2
+   *   the name of the column to calculate the correlation against
+   * @return
+   *   The Pearson Correlation Coefficient as a Double.
+   *
+   * {{{
+   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
+   *      .withColumn("rand2", rand(seed=27))
+   *    df.stat.corr("rand1", "rand2", "pearson")
+   *    res1: Double = 0.613...
+   * }}}
+   * @since 1.4.0
+   */
+  def corr(col1: String, col2: String): Double = {
+    corr(col1, col2, "pearson")
+  }
+
+  /**
+   * Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
+   * The first column of each row will be the distinct values of `col1` and the column names will
+   * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts
+   * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
+   * Null elements will be replaced by "null", and back ticks will be dropped from elements if
+   * they exist.
+   *
+   * @param col1
+   *   The name of the first column. Distinct items will make the first item of each row.
+   * @param col2
+   *   The name of the second column. Distinct items will make the column names of the DataFrame.
+   * @return
+   *   A DataFrame containing for the contingency table.
+   *
+   * {{{
+   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)))
+   *      .toDF("key", "value")
+   *    val ct = df.stat.crosstab("key", "value")
+   *    ct.show()
+   *    +---------+---+---+---+
+   *    |key_value|  1|  2|  3|
+   *    +---------+---+---+---+
+   *    |        2|  2|  0|  1|
+   *    |        1|  1|  1|  0|
+   *    |        3|  0|  1|  1|
+   *    +---------+---+---+---+
+   * }}}
+   *
+   * @since 1.4.0
+   */
+  def crosstab(col1: String, col2: String): Dataset[Row]
+
+  /**
+   * Finding frequent items for columns, possibly with false positives. Using the frequent element
+   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
+   * proposed by Karp, Schenker, and Papadimitriou. The `support` should be greater than 1e-4.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @param support
+   *   The minimum frequency for an item to be considered `frequent`. Should be greater than 1e-4.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * {{{
+   *    val rows = Seq.tabulate(100) { i =>
+   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
+   *    }
+   *    val df = spark.createDataFrame(rows).toDF("a", "b")
+   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
+   *    // "a" and "b"
+   *    val freqSingles = df.stat.freqItems(Array("a", "b"), 0.4)
+   *    freqSingles.show()
+   *    +-----------+-------------+
+   *    |a_freqItems|  b_freqItems|
+   *    +-----------+-------------+
+   *    |    [1, 99]|[-1.0, -99.0]|
+   *    +-----------+-------------+
+   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
+   *    val pairDf = df.select(struct("a", "b").as("a-b"))
+   *    val freqPairs = pairDf.stat.freqItems(Array("a-b"), 0.1)
+   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
+   *    +----------+
+   *    |   freq_ab|
+   *    +----------+
+   *    |  [1,-1.0]|
+   *    |   ...    |
+   *    +----------+
+   * }}}
+   * @since 1.4.0
+   */
+  def freqItems(cols: Array[String], support: Double): Dataset[Row] =
+    freqItems(cols.toImmutableArraySeq, support)
+
+  /**
+   * Finding frequent items for columns, possibly with false positives. Using the frequent element
+   * count algorithm described in <a href="https://doi.org/10.1145/762471.762473">here</a>,
+   * proposed by Karp, Schenker, and Papadimitriou. Uses a `default` support of 1%.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   * @since 1.4.0
+   */
+  def freqItems(cols: Array[String]): Dataset[Row] = freqItems(cols, 0.01)
+
+  /**
+   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+   * frequent element count algorithm described in <a
+   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
+   * Papadimitriou.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   *
+   * {{{
+   *    val rows = Seq.tabulate(100) { i =>
+   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
+   *    }
+   *    val df = spark.createDataFrame(rows).toDF("a", "b")
+   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
+   *    // "a" and "b"
+   *    val freqSingles = df.stat.freqItems(Seq("a", "b"), 0.4)
+   *    freqSingles.show()
+   *    +-----------+-------------+
+   *    |a_freqItems|  b_freqItems|
+   *    +-----------+-------------+
+   *    |    [1, 99]|[-1.0, -99.0]|
+   *    +-----------+-------------+
+   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
+   *    val pairDf = df.select(struct("a", "b").as("a-b"))
+   *    val freqPairs = pairDf.stat.freqItems(Seq("a-b"), 0.1)
+   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
+   *    +----------+
+   *    |   freq_ab|
+   *    +----------+
+   *    |  [1,-1.0]|
+   *    |   ...    |
+   *    +----------+
+   * }}}
+   *
+   * @since 1.4.0
+   */
+  def freqItems(cols: Seq[String], support: Double): Dataset[Row]
+
+  /**
+   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
+   * frequent element count algorithm described in <a
+   * href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker, and
+   * Papadimitriou. Uses a `default` support of 1%.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting `DataFrame`.
+   *
+   * @param cols
+   *   the names of the columns to search frequent items in.
+   * @return
+   *   A Local DataFrame with the Array of frequent items for each column.
+   * @since 1.4.0
+   */
+  def freqItems(cols: Seq[String]): Dataset[Row] = freqItems(cols, 0.01)
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * {{{
+   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2),
+   *      (3, 3))).toDF("key", "value")
+   *    val fractions = Map(1 -> 1.0, 3 -> 0.5)
+   *    df.stat.sampleBy("key", fractions, 36L).show()
+   *    +---+-----+
+   *    |key|value|
+   *    +---+-----+
+   *    |  1|    1|
+   *    |  1|    2|
+   *    |  3|    2|
+   *    +---+-----+
+   * }}}
+   *
+   * @since 1.5.0
+   */
+  def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): Dataset[Row] = {
+    sampleBy(Column(col), fractions, seed)
+  }
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * @since 1.5.0
+   */
+  def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): Dataset[Row] = {
+    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
+  }
+
+  /**
+   * Returns a stratified sample without replacement based on the fraction given on each stratum.
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   *
+   * The stratified sample can be performed over multiple columns:
+   * {{{
+   *    import org.apache.spark.sql.Row
+   *    import org.apache.spark.sql.functions.struct
+   *
+   *    val df = spark.createDataFrame(Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 17),
+   *      ("Alice", 10))).toDF("name", "age")
+   *    val fractions = Map(Row("Alice", 10) -> 0.3, Row("Nico", 8) -> 1.0)
+   *    df.stat.sampleBy(struct($"name", $"age"), fractions, 36L).show()
+   *    +-----+---+
+   *    | name|age|
+   *    +-----+---+
+   *    | Nico|  8|
+   *    |Alice| 10|
+   *    +-----+---+
+   * }}}
+   *
+   * @since 3.0.0
+   */
+  def sampleBy[T](col: Column, fractions: Map[T, Double], seed: Long): Dataset[Row]
+
+  /**
+   * (Java-specific) Returns a stratified sample without replacement based on the fraction given
+   * on each stratum.
+   *
+   * @param col
+   *   column that defines strata
+   * @param fractions
+   *   sampling fraction for each stratum. If a stratum is not specified, we treat its fraction as
+   *   zero.
+   * @param seed
+   *   random seed
+   * @tparam T
+   *   stratum type
+   * @return
+   *   a new `DataFrame` that represents the stratified sample
+   * @since 3.0.0
+   */
+  def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): Dataset[Row] = {
+    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the sketch is built
+   * @param depth
+   *   depth of the sketch
+   * @param width
+   *   width of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 2.0.0
+   */
+  def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = {
+    countMinSketch(Column(colName), depth, width, seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the sketch is built
+   * @param eps
+   *   relative error of the sketch
+   * @param confidence
+   *   confidence of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 2.0.0
+   */
+  def countMinSketch(
+      colName: String,
+      eps: Double,
+      confidence: Double,
+      seed: Int): CountMinSketch = {
+    countMinSketch(Column(colName), eps, confidence, seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param col
+   *   the column over which the sketch is built
+   * @param depth
+   *   depth of the sketch
+   * @param width
+   *   width of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 2.0.0
+   */
+  def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = {
+    val eps = 2.0 / width
+    val confidence = 1 - 1 / Math.pow(2, depth)
+    countMinSketch(col, eps, confidence, seed)
+  }
+
+  /**
+   * Builds a Count-min Sketch over a specified column.
+   *
+   * @param col
+   *   the column over which the sketch is built
+   * @param eps
+   *   relative error of the sketch
+   * @param confidence
+   *   confidence of the sketch
+   * @param seed
+   *   random seed
+   * @return
+   *   a `CountMinSketch` over column `colName`
+   * @since 2.0.0
+   */
+  def countMinSketch(col: Column, eps: Double, confidence: Double, seed: Int): CountMinSketch =
+    withOrigin {
+      val cms = count_min_sketch(col, lit(eps), lit(confidence), lit(seed))
+      val bytes: Array[Byte] = df.select(cms).as(BinaryEncoder).head()
+      CountMinSketch.readFrom(bytes)
+    }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param fpp
+   *   expected false positive probability of the filter.
+   * @since 2.0.0
+   */
+  def bloomFilter(colName: String, expectedNumItems: Long, fpp: Double): BloomFilter = {
+    bloomFilter(Column(colName), expectedNumItems, fpp)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param col
+   *   the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param fpp
+   *   expected false positive probability of the filter.
+   * @since 2.0.0
+   */
+  def bloomFilter(col: Column, expectedNumItems: Long, fpp: Double): BloomFilter = {
+    val numBits = BloomFilter.optimalNumOfBits(expectedNumItems, fpp)
+    bloomFilter(col, expectedNumItems, numBits)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param colName
+   *   name of the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param numBits
+   *   expected number of bits of the filter.
+   * @since 2.0.0
+   */
+  def bloomFilter(colName: String, expectedNumItems: Long, numBits: Long): BloomFilter = {
+    bloomFilter(Column(colName), expectedNumItems, numBits)
+  }
+
+  /**
+   * Builds a Bloom filter over a specified column.
+   *
+   * @param col
+   *   the column over which the filter is built
+   * @param expectedNumItems
+   *   expected number of items which will be put into the filter.
+   * @param numBits
+   *   expected number of bits of the filter.
+   * @since 2.0.0
+   */
+  def bloomFilter(col: Column, expectedNumItems: Long, numBits: Long): BloomFilter = withOrigin {
+    val bf = Column.internalFn("bloom_filter_agg", col, lit(expectedNumItems), lit(numBits))
+    val bytes: Array[Byte] = df.select(bf).as(BinaryEncoder).head()
+    BloomFilter.readFrom(bytes)
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamReader.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamReader.scala
new file mode 100644
index 0000000000000..219ecb77d4033
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamReader.scala
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.{Encoders, Row}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Interface used to load a streaming `Dataset` from external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `SparkSession.readStream` to access this.
+ *
+ * @since 2.0.0
+ */
+@Evolving
+abstract class DataStreamReader {
+
+  /**
+   * Specifies the input data source format.
+   *
+   * @since 2.0.0
+   */
+  def format(source: String): this.type
+
+  /**
+   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
+   * automatically from data. By specifying the schema here, the underlying data source can skip
+   * the schema inference step, and thus speed up data loading.
+   *
+   * @since 2.0.0
+   */
+  def schema(schema: StructType): this.type
+
+  /**
+   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON)
+   * can infer the input schema automatically from data. By specifying the schema here, the
+   * underlying data source can skip the schema inference step, and thus speed up data loading.
+   *
+   * @since 2.3.0
+   */
+  def schema(schemaString: String): this.type = {
+    schema(StructType.fromDDL(schemaString))
+  }
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: String): this.type
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Boolean): this.type = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Long): this.type = option(key, value.toString)
+
+  /**
+   * Adds an input option for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def option(key: String, value: Double): this.type = option(key, value.toString)
+
+  /**
+   * (Scala-specific) Adds input options for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def options(options: scala.collection.Map[String, String]): this.type
+
+  /**
+   * (Java-specific) Adds input options for the underlying data source.
+   *
+   * @since 2.0.0
+   */
+  def options(options: java.util.Map[String, String]): this.type = {
+    this.options(options.asScala)
+    this
+  }
+
+  /**
+   * Loads input data stream in as a `DataFrame`, for data streams that don't require a path (e.g.
+   * external key-value stores).
+   *
+   * @since 2.0.0
+   */
+  def load(): Dataset[Row]
+
+  /**
+   * Loads input in as a `DataFrame`, for data streams that read from some path.
+   *
+   * @since 2.0.0
+   */
+  def load(path: String): Dataset[Row]
+
+  /**
+   * Loads a JSON file stream and returns the results as a `DataFrame`.
+   *
+   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
+   * default. For JSON (one record per file), set the `multiLine` option to true.
+   *
+   * This function goes through the input once to determine the input schema. If you know the
+   * schema in advance, use the version that specifies the schema to avoid the extra scan.
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * You can find the JSON-specific options for reading JSON file stream in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  def json(path: String): Dataset[Row] = {
+    validateJsonSchema()
+    format("json").load(path)
+  }
+
+  /**
+   * Loads a CSV file stream and returns the result as a `DataFrame`.
+   *
+   * This function will go through the input once to determine the input schema if `inferSchema`
+   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
+   * specify the schema explicitly using `schema`.
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * You can find the CSV-specific options for reading CSV file stream in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  def csv(path: String): Dataset[Row] = format("csv").load(path)
+
+  /**
+   * Loads a XML file stream and returns the result as a `DataFrame`.
+   *
+   * This function will go through the input once to determine the input schema if `inferSchema`
+   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
+   * specify the schema explicitly using `schema`.
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * You can find the XML-specific options for reading XML file stream in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 4.0.0
+   */
+  def xml(path: String): Dataset[Row] = {
+    validateXmlSchema()
+    format("xml").load(path)
+  }
+
+  /**
+   * Loads a ORC file stream, returning the result as a `DataFrame`.
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * ORC-specific option(s) for reading ORC file stream can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 2.3.0
+   */
+  def orc(path: String): Dataset[Row] = {
+    format("orc").load(path)
+  }
+
+  /**
+   * Loads a Parquet file stream, returning the result as a `DataFrame`.
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * Parquet-specific option(s) for reading Parquet file stream can be found in <a href=
+   * "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option"> Data
+   * Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  def parquet(path: String): Dataset[Row] = {
+    format("parquet").load(path)
+  }
+
+  /**
+   * Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
+   * support streaming mode.
+   * @param tableName
+   *   The name of the table
+   * @since 3.1.0
+   */
+  def table(tableName: String): Dataset[Row]
+
+  /**
+   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
+   * "value", and followed by partitioned columns if there are any. The text files must be encoded
+   * as UTF-8.
+   *
+   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
+   * {{{
+   *   // Scala:
+   *   spark.readStream.text("/path/to/directory/")
+   *
+   *   // Java:
+   *   spark.readStream().text("/path/to/directory/")
+   * }}}
+   *
+   * You can set the following option(s): <ul> <li>`maxFilesPerTrigger` (default: no max limit):
+   * sets the maximum number of new files to be considered in every trigger.</li>
+   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files to
+   * be considered in every trigger.</li> </ul>
+   *
+   * You can find the text-specific options for reading text files in <a
+   * href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
+   * Data Source Option</a> in the version you use.
+   *
+   * @since 2.0.0
+   */
+  def text(path: String): Dataset[Row] = format("text").load(path)
+
+  /**
+   * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
+   * contains a single string column named "value". The text files must be encoded as UTF-8.
+   *
+   * If the directory structure of the text files contains partitioning information, those are
+   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
+   *
+   * By default, each line in the text file is a new element in the resulting Dataset. For
+   * example:
+   * {{{
+   *   // Scala:
+   *   spark.readStream.textFile("/path/to/spark/README.md")
+   *
+   *   // Java:
+   *   spark.readStream().textFile("/path/to/spark/README.md")
+   * }}}
+   *
+   * You can set the text-specific options as specified in `DataStreamReader.text`.
+   *
+   * @param path
+   *   input path
+   * @since 2.1.0
+   */
+  def textFile(path: String): Dataset[String] = {
+    assertNoSpecifiedSchema("textFile")
+    text(path).select("value").as(Encoders.STRING)
+  }
+
+  protected def assertNoSpecifiedSchema(operation: String): Unit
+
+  protected def validateJsonSchema(): Unit = ()
+
+  protected def validateXmlSchema(): Unit = ()
+
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamWriter.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamWriter.scala
new file mode 100644
index 0000000000000..f627eb3e167a3
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/DataStreamWriter.scala
@@ -0,0 +1,230 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import _root_.java
+import _root_.java.util.concurrent.TimeoutException
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.api.java.function.VoidFunction2
+import org.apache.spark.sql.{ForeachWriter, WriteConfigMethods}
+import org.apache.spark.sql.streaming.{OutputMode, Trigger}
+
+/**
+ * Interface used to write a streaming `Dataset` to external storage systems (e.g. file systems,
+ * key-value stores, etc). Use `Dataset.writeStream` to access this.
+ *
+ * @since 2.0.0
+ */
+@Evolving
+abstract class DataStreamWriter[T] extends WriteConfigMethods[DataStreamWriter[T]] {
+  type DS[U] <: Dataset[U]
+
+  /**
+   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink. <ul> <li>
+   * `OutputMode.Append()`: only the new rows in the streaming DataFrame/Dataset will be written
+   * to the sink.</li> <li> `OutputMode.Complete()`: all the rows in the streaming
+   * DataFrame/Dataset will be written to the sink every time there are some updates.</li> <li>
+   * `OutputMode.Update()`: only the rows that were updated in the streaming DataFrame/Dataset
+   * will be written to the sink every time there are some updates. If the query doesn't contain
+   * aggregations, it will be equivalent to `OutputMode.Append()` mode.</li> </ul>
+   *
+   * @since 2.0.0
+   */
+  def outputMode(outputMode: OutputMode): this.type
+
+  /**
+   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink. <ul> <li>
+   * `append`: only the new rows in the streaming DataFrame/Dataset will be written to the
+   * sink.</li> <li> `complete`: all the rows in the streaming DataFrame/Dataset will be written
+   * to the sink every time there are some updates.</li> <li> `update`: only the rows that were
+   * updated in the streaming DataFrame/Dataset will be written to the sink every time there are
+   * some updates. If the query doesn't contain aggregations, it will be equivalent to `append`
+   * mode.</li> </ul>
+   *
+   * @since 2.0.0
+   */
+  def outputMode(outputMode: String): this.type
+
+  /**
+   * Set the trigger for the stream query. The default value is `ProcessingTime(0)` and it will
+   * run the query as fast as possible.
+   *
+   * Scala Example:
+   * {{{
+   *   df.writeStream.trigger(ProcessingTime("10 seconds"))
+   *
+   *   import scala.concurrent.duration._
+   *   df.writeStream.trigger(ProcessingTime(10.seconds))
+   * }}}
+   *
+   * Java Example:
+   * {{{
+   *   df.writeStream().trigger(ProcessingTime.create("10 seconds"))
+   *
+   *   import java.util.concurrent.TimeUnit
+   *   df.writeStream().trigger(ProcessingTime.create(10, TimeUnit.SECONDS))
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def trigger(trigger: Trigger): this.type
+
+  /**
+   * Specifies the name of the [[org.apache.spark.sql.api.StreamingQuery]] that can be started
+   * with `start()`. This name must be unique among all the currently active queries in the
+   * associated SparkSession.
+   *
+   * @since 2.0.0
+   */
+  def queryName(queryName: String): this.type
+
+  /**
+   * Specifies the underlying output data source.
+   *
+   * @since 2.0.0
+   */
+  def format(source: String): this.type
+
+  /**
+   * Partitions the output by the given columns on the file system. If specified, the output is
+   * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
+   * partition a dataset by year and then month, the directory layout would look like:
+   *
+   * <ul> <li> year=2016/month=01/</li> <li> year=2016/month=02/</li> </ul>
+   *
+   * Partitioning is one of the most widely used techniques to optimize physical data layout. It
+   * provides a coarse-grained index for skipping unnecessary data reads when queries have
+   * predicates on the partitioned columns. In order for partitioning to work well, the number of
+   * distinct values in each column should typically be less than tens of thousands.
+   *
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def partitionBy(colNames: String*): this.type
+
+  /**
+   * Clusters the output by the given columns. If specified, the output is laid out such that
+   * records with similar values on the clustering column are grouped together in the same file.
+   *
+   * Clustering improves query efficiency by allowing queries with predicates on the clustering
+   * columns to skip unnecessary data. Unlike partitioning, clustering can be used on very high
+   * cardinality columns.
+   *
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def clusterBy(colNames: String*): this.type
+
+  /**
+   * Sets the output of the streaming query to be processed using the provided writer object.
+   * object. See [[org.apache.spark.sql.ForeachWriter]] for more details on the lifecycle and
+   * semantics.
+   *
+   * @since 2.0.0
+   */
+  def foreach(writer: ForeachWriter[T]): this.type
+
+  /**
+   * :: Experimental ::
+   *
+   * (Scala-specific) Sets the output of the streaming query to be processed using the provided
+   * function. This is supported only in the micro-batch execution modes (that is, when the
+   * trigger is not continuous). In every micro-batch, the provided function will be called in
+   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. The
+   * batchId can be used to deduplicate and transactionally write the output (that is, the
+   * provided Dataset) to external systems. The output Dataset is guaranteed to be exactly the
+   * same for the same batchId (assuming all operations are deterministic in the query).
+   *
+   * @since 2.4.0
+   */
+  @Evolving
+  def foreachBatch(function: (DS[T], Long) => Unit): this.type
+
+  /**
+   * :: Experimental ::
+   *
+   * (Java-specific) Sets the output of the streaming query to be processed using the provided
+   * function. This is supported only in the micro-batch execution modes (that is, when the
+   * trigger is not continuous). In every micro-batch, the provided function will be called in
+   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier. The
+   * batchId can be used to deduplicate and transactionally write the output (that is, the
+   * provided Dataset) to external systems. The output Dataset is guaranteed to be exactly the
+   * same for the same batchId (assuming all operations are deterministic in the query).
+   *
+   * @since 2.4.0
+   */
+  @Evolving
+  def foreachBatch(function: VoidFunction2[DS[T], java.lang.Long]): this.type = {
+    foreachBatch((batchDs: DS[T], batchId: Long) => function.call(batchDs, batchId))
+  }
+
+  /**
+   * Starts the execution of the streaming query, which will continually output results to the
+   * given path as new data arrives. The returned [[org.apache.spark.sql.api.StreamingQuery]]
+   * object can be used to interact with the stream.
+   *
+   * @since 2.0.0
+   */
+  def start(path: String): StreamingQuery
+
+  /**
+   * Starts the execution of the streaming query, which will continually output results to the
+   * given path as new data arrives. The returned [[org.apache.spark.sql.api.StreamingQuery]]
+   * object can be used to interact with the stream. Throws a `TimeoutException` if the following
+   * conditions are met:
+   *   - Another run of the same streaming query, that is a streaming query sharing the same
+   *     checkpoint location, is already active on the same Spark Driver
+   *   - The SQL configuration `spark.sql.streaming.stopActiveRunOnRestart` is enabled
+   *   - The active run cannot be stopped within the timeout controlled by the SQL configuration
+   *     `spark.sql.streaming.stopTimeout`
+   *
+   * @since 2.0.0
+   */
+  @throws[TimeoutException]
+  def start(): StreamingQuery
+
+  /**
+   * Starts the execution of the streaming query, which will continually output results to the
+   * given table as new data arrives. The returned [[org.apache.spark.sql.api.StreamingQuery]]
+   * object can be used to interact with the stream.
+   *
+   * For v1 table, partitioning columns provided by `partitionBy` will be respected no matter the
+   * table exists or not. A new table will be created if the table not exists.
+   *
+   * For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will
+   * be respected only if the v2 table does not exist. Besides, the v2 table created by this API
+   * lacks some functionalities (e.g., customized properties, options, and serde info). If you
+   * need them, please create the v2 table manually before the execution to avoid creating a table
+   * with incomplete information.
+   *
+   * @since 3.1.0
+   */
+  @Evolving
+  @throws[TimeoutException]
+  def toTable(tableName: String): StreamingQuery
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Covariant Overrides
+  ///////////////////////////////////////////////////////////////////////////////////////
+  override def option(key: String, value: Boolean): this.type =
+    super.option(key, value).asInstanceOf[this.type]
+  override def option(key: String, value: Long): this.type =
+    super.option(key, value).asInstanceOf[this.type]
+  override def option(key: String, value: Double): this.type =
+    super.option(key, value).asInstanceOf[this.type]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala
new file mode 100644
index 0000000000000..9d41998f11dc6
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/Dataset.scala
@@ -0,0 +1,3193 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+import scala.reflect.runtime.universe.TypeTag
+
+import _root_.java.util
+
+import org.apache.spark.annotation.{DeveloperApi, Stable, Unstable}
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.api.java.function.{FilterFunction, FlatMapFunction, ForeachFunction, ForeachPartitionFunction, MapFunction, MapPartitionsFunction, ReduceFunction}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{functions, AnalysisException, Column, DataFrameWriter, DataFrameWriterV2, Encoder, MergeIntoWriter, Observation, Row, TypedColumn}
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.internal.{ToScalaUDF, UDFAdaptors}
+import org.apache.spark.sql.types.{Metadata, StructType}
+import org.apache.spark.storage.StorageLevel
+import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.SparkClassUtils
+
+/**
+ * A Dataset is a strongly typed collection of domain-specific objects that can be transformed in
+ * parallel using functional or relational operations. Each Dataset also has an untyped view
+ * called a `DataFrame`, which is a Dataset of [[org.apache.spark.sql.Row]].
+ *
+ * Operations available on Datasets are divided into transformations and actions. Transformations
+ * are the ones that produce new Datasets, and actions are the ones that trigger computation and
+ * return results. Example transformations include map, filter, select, and aggregate (`groupBy`).
+ * Example actions count, show, or writing data out to file systems.
+ *
+ * Datasets are "lazy", i.e. computations are only triggered when an action is invoked.
+ * Internally, a Dataset represents a logical plan that describes the computation required to
+ * produce the data. When an action is invoked, Spark's query optimizer optimizes the logical plan
+ * and generates a physical plan for efficient execution in a parallel and distributed manner. To
+ * explore the logical plan as well as optimized physical plan, use the `explain` function.
+ *
+ * To efficiently support domain-specific objects, an [[org.apache.spark.sql.Encoder]] is
+ * required. The encoder maps the domain specific type `T` to Spark's internal type system. For
+ * example, given a class `Person` with two fields, `name` (string) and `age` (int), an encoder is
+ * used to tell Spark to generate code at runtime to serialize the `Person` object into a binary
+ * structure. This binary structure often has much lower memory footprint as well as are optimized
+ * for efficiency in data processing (e.g. in a columnar format). To understand the internal
+ * binary representation for data, use the `schema` function.
+ *
+ * There are typically two ways to create a Dataset. The most common way is by pointing Spark to
+ * some files on storage systems, using the `read` function available on a `SparkSession`.
+ * {{{
+ *   val people = spark.read.parquet("...").as[Person]  // Scala
+ *   Dataset<Person> people = spark.read().parquet("...").as(Encoders.bean(Person.class)); // Java
+ * }}}
+ *
+ * Datasets can also be created through transformations available on existing Datasets. For
+ * example, the following creates a new Dataset by applying a filter on the existing one:
+ * {{{
+ *   val names = people.map(_.name)  // in Scala; names is a Dataset[String]
+ *   Dataset<String> names = people.map(
+ *     (MapFunction<Person, String>) p -> p.name, Encoders.STRING()); // Java
+ * }}}
+ *
+ * Dataset operations can also be untyped, through various domain-specific-language (DSL)
+ * functions defined in: Dataset (this class), [[org.apache.spark.sql.Column]], and
+ * [[org.apache.spark.sql.functions]]. These operations are very similar to the operations
+ * available in the data frame abstraction in R or Python.
+ *
+ * To select a column from the Dataset, use `apply` method in Scala and `col` in Java.
+ * {{{
+ *   val ageCol = people("age")  // in Scala
+ *   Column ageCol = people.col("age"); // in Java
+ * }}}
+ *
+ * Note that the [[org.apache.spark.sql.Column]] type can also be manipulated through its various
+ * functions.
+ * {{{
+ *   // The following creates a new column that increases everybody's age by 10.
+ *   people("age") + 10  // in Scala
+ *   people.col("age").plus(10);  // in Java
+ * }}}
+ *
+ * A more concrete example in Scala:
+ * {{{
+ *   // To create Dataset[Row] using SparkSession
+ *   val people = spark.read.parquet("...")
+ *   val department = spark.read.parquet("...")
+ *
+ *   people.filter("age > 30")
+ *     .join(department, people("deptId") === department("id"))
+ *     .groupBy(department("name"), people("gender"))
+ *     .agg(avg(people("salary")), max(people("age")))
+ * }}}
+ *
+ * and in Java:
+ * {{{
+ *   // To create Dataset<Row> using SparkSession
+ *   Dataset<Row> people = spark.read().parquet("...");
+ *   Dataset<Row> department = spark.read().parquet("...");
+ *
+ *   people.filter(people.col("age").gt(30))
+ *     .join(department, people.col("deptId").equalTo(department.col("id")))
+ *     .groupBy(department.col("name"), people.col("gender"))
+ *     .agg(avg(people.col("salary")), max(people.col("age")));
+ * }}}
+ *
+ * @groupname basic Basic Dataset functions
+ * @groupname action Actions
+ * @groupname untypedrel Untyped transformations
+ * @groupname typedrel Typed transformations
+ * @since 1.6.0
+ */
+@Stable
+abstract class Dataset[T] extends Serializable {
+  type DS[U] <: Dataset[U]
+
+  def sparkSession: SparkSession
+
+  val encoder: Encoder[T]
+
+  @DeveloperApi @Unstable def queryExecution: QueryExecution
+
+  /**
+   * Converts this strongly typed collection of data to generic Dataframe. In contrast to the
+   * strongly typed objects that Dataset operations work on, a Dataframe returns generic
+   * [[org.apache.spark.sql.Row]] objects that allow fields to be accessed by ordinal or name.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  // This is declared with parentheses to prevent the Scala compiler from treating
+  // `ds.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
+  def toDF(): Dataset[Row]
+
+  /**
+   * Returns a new Dataset where each record has been mapped on to the specified type. The method
+   * used to map columns depend on the type of `U`: <ul> <li>When `U` is a class, fields for the
+   * class will be mapped to columns of the same name (case sensitivity is determined by
+   * `spark.sql.caseSensitive`).</li> <li>When `U` is a tuple, the columns will be mapped by
+   * ordinal (i.e. the first column will be assigned to `_1`).</li> <li>When `U` is a primitive
+   * type (i.e. String, Int, etc), then the first column of the `DataFrame` will be used.</li>
+   * </ul>
+   *
+   * If the schema of the Dataset does not match the desired `U` type, you can use `select` along
+   * with `alias` or `as` to rearrange or rename as required.
+   *
+   * Note that `as[]` only changes the view of the data that is passed into typed operations, such
+   * as `map()`, and does not eagerly project away any columns that are not present in the
+   * specified class.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def as[U: Encoder]: Dataset[U]
+
+  /**
+   * Returns a new DataFrame where each row is reconciled to match the specified schema. Spark
+   * will: <ul> <li>Reorder columns and/or inner fields by name to match the specified
+   * schema.</li> <li>Project away columns and/or inner fields that are not needed by the
+   * specified schema. Missing columns and/or inner fields (present in the specified schema but
+   * not input DataFrame) lead to failures.</li> <li>Cast the columns and/or inner fields to match
+   * the data types in the specified schema, if the types are compatible, e.g., numeric to numeric
+   * (error if overflows), but not string to int.</li> <li>Carry over the metadata from the
+   * specified schema, while the columns and/or inner fields still keep their own metadata if not
+   * overwritten by the specified schema.</li> <li>Fail if the nullability is not compatible. For
+   * example, the column and/or inner field is nullable but the specified schema requires them to
+   * be not nullable.</li> </ul>
+   *
+   * @group basic
+   * @since 3.4.0
+   */
+  def to(schema: StructType): Dataset[Row]
+
+  /**
+   * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
+   * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
+   * meaningful names. For example:
+   * {{{
+   *   val rdd: RDD[(Int, String)] = ...
+   *   rdd.toDF()  // this implicit conversion creates a DataFrame with column name `_1` and `_2`
+   *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
+   * }}}
+   *
+   * @group basic
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def toDF(colNames: String*): Dataset[Row]
+
+  /**
+   * Returns the schema of this Dataset.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def schema: StructType
+
+  /**
+   * Prints the schema to the console in a nice tree format.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def printSchema(): Unit = printSchema(Int.MaxValue)
+
+  // scalastyle:off println
+
+  /**
+   * Prints the schema up to the given level to the console in a nice tree format.
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def printSchema(level: Int): Unit = println(schema.treeString(level))
+  // scalastyle:on println
+
+  /**
+   * Prints the plans (logical and physical) with a format specified by a given explain mode.
+   *
+   * @param mode
+   *   specifies the expected output format of plans. <ul> <li>`simple` Print only a physical
+   *   plan.</li> <li>`extended`: Print both logical and physical plans.</li> <li>`codegen`: Print
+   *   a physical plan and generated codes if they are available.</li> <li>`cost`: Print a logical
+   *   plan and statistics if they are available.</li> <li>`formatted`: Split explain output into
+   *   two sections: a physical plan outline and node details.</li> </ul>
+   * @group basic
+   * @since 3.0.0
+   */
+  def explain(mode: String): Unit
+
+  /**
+   * Prints the plans (logical and physical) to the console for debugging purposes.
+   *
+   * @param extended
+   *   default `false`. If `false`, prints only the physical plan.
+   * @group basic
+   * @since 1.6.0
+   */
+  def explain(extended: Boolean): Unit = if (extended) {
+    explain("extended")
+  } else {
+    explain("simple")
+  }
+
+  /**
+   * Prints the physical plan to the console for debugging purposes.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def explain(): Unit = explain("simple")
+
+  /**
+   * Returns all column names and their data types as an array.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def dtypes: Array[(String, String)] = schema.fields.map { field =>
+    (field.name, field.dataType.toString)
+  }
+
+  /**
+   * Returns all column names as an array.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def columns: Array[String] = schema.fields.map(_.name)
+
+  /**
+   * Returns true if the `collect` and `take` methods can be run locally (without any Spark
+   * executors).
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def isLocal: Boolean
+
+  /**
+   * Returns true if the `Dataset` is empty.
+   *
+   * @group basic
+   * @since 2.4.0
+   */
+  def isEmpty: Boolean
+
+  /**
+   * Returns true if this Dataset contains one or more sources that continuously return data as it
+   * arrives. A Dataset that reads data from a streaming source must be executed as a
+   * `StreamingQuery` using the `start()` method in `DataStreamWriter`. Methods that return a
+   * single answer, e.g. `count()` or `collect()`, will throw an
+   * [[org.apache.spark.sql.AnalysisException]] when there is a streaming source present.
+   *
+   * @group streaming
+   * @since 2.0.0
+   */
+  def isStreaming: Boolean
+
+  /**
+   * Eagerly checkpoint a Dataset and return the new Dataset. Checkpointing can be used to
+   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
+   * where the plan may grow exponentially. It will be saved to files inside the checkpoint
+   * directory set with `SparkContext#setCheckpointDir`.
+   *
+   * @group basic
+   * @since 2.1.0
+   */
+  def checkpoint(): Dataset[T] =
+    checkpoint(eager = true, reliableCheckpoint = true, storageLevel = None)
+
+  /**
+   * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
+   * logical plan of this Dataset, which is especially useful in iterative algorithms where the
+   * plan may grow exponentially. It will be saved to files inside the checkpoint directory set
+   * with `SparkContext#setCheckpointDir`.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   * @note
+   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
+   *   first action may be different from the data that was used during the job due to
+   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
+   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
+   *   only deterministic after the first execution, after the checkpoint was finalized.
+   * @group basic
+   * @since 2.1.0
+   */
+  def checkpoint(eager: Boolean): Dataset[T] =
+    checkpoint(eager = eager, reliableCheckpoint = true, storageLevel = None)
+
+  /**
+   * Eagerly locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used
+   * to truncate the logical plan of this Dataset, which is especially useful in iterative
+   * algorithms where the plan may grow exponentially. Local checkpoints are written to executor
+   * storage and despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @group basic
+   * @since 2.3.0
+   */
+  def localCheckpoint(): Dataset[T] =
+    checkpoint(eager = true, reliableCheckpoint = false, storageLevel = None)
+
+  /**
+   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to
+   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
+   * where the plan may grow exponentially. Local checkpoints are written to executor storage and
+   * despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   * @note
+   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
+   *   first action may be different from the data that was used during the job due to
+   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
+   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
+   *   only deterministic after the first execution, after the checkpoint was finalized.
+   * @group basic
+   * @since 2.3.0
+   */
+  def localCheckpoint(eager: Boolean): Dataset[T] =
+    checkpoint(eager = eager, reliableCheckpoint = false, storageLevel = None)
+
+  /**
+   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to
+   * truncate the logical plan of this Dataset, which is especially useful in iterative algorithms
+   * where the plan may grow exponentially. Local checkpoints are written to executor storage and
+   * despite potentially faster they are unreliable and may compromise job completion.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   * @param storageLevel
+   *   StorageLevel with which to checkpoint the data.
+   * @note
+   *   When checkpoint is used with eager = false, the final data that is checkpointed after the
+   *   first action may be different from the data that was used during the job due to
+   *   non-determinism of the underlying operation and retries. If checkpoint is used to achieve
+   *   saving a deterministic snapshot of the data, eager = true should be used. Otherwise, it is
+   *   only deterministic after the first execution, after the checkpoint was finalized.
+   * @group basic
+   * @since 4.0.0
+   */
+  def localCheckpoint(eager: Boolean, storageLevel: StorageLevel): Dataset[T] =
+    checkpoint(eager = eager, reliableCheckpoint = false, storageLevel = Some(storageLevel))
+
+  /**
+   * Returns a checkpointed version of this Dataset.
+   *
+   * @param eager
+   *   Whether to checkpoint this dataframe immediately
+   * @param reliableCheckpoint
+   *   Whether to create a reliable checkpoint saved to files inside the checkpoint directory. If
+   *   false creates a local checkpoint using the caching subsystem
+   * @param storageLevel
+   *   Option. If defined, StorageLevel with which to checkpoint the data. Only with
+   *   reliableCheckpoint = false.
+   */
+  protected def checkpoint(
+      eager: Boolean,
+      reliableCheckpoint: Boolean,
+      storageLevel: Option[StorageLevel]): Dataset[T]
+
+  /**
+   * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
+   * before which we assume no more late data is going to arrive.
+   *
+   * Spark will use this watermark for several purposes: <ul> <li>To know when a given time window
+   * aggregation can be finalized and thus can be emitted when using output modes that do not
+   * allow updates.</li> <li>To minimize the amount of state that we need to keep for on-going
+   * aggregations, `mapGroupsWithState` and `dropDuplicates` operators.</li> </ul> The current
+   * watermark is computed by looking at the `MAX(eventTime)` seen across all of the partitions in
+   * the query minus a user specified `delayThreshold`. Due to the cost of coordinating this value
+   * across partitions, the actual watermark used is only guaranteed to be at least
+   * `delayThreshold` behind the actual event time. In some cases we may still process records
+   * that arrive more than `delayThreshold` late.
+   *
+   * @param eventTime
+   *   the name of the column that contains the event time of the row.
+   * @param delayThreshold
+   *   the minimum delay to wait to data to arrive late, relative to the latest record that has
+   *   been processed in the form of an interval (e.g. "1 minute" or "5 hours"). NOTE: This should
+   *   not be negative.
+   * @group streaming
+   * @since 2.1.0
+   */
+  // We only accept an existing column name, not a derived column here as a watermark that is
+  // defined on a derived column cannot referenced elsewhere in the plan.
+  def withWatermark(eventTime: String, delayThreshold: String): Dataset[T]
+
+  /**
+   * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
+   * and all cells will be aligned right. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def show(numRows: Int): Unit = show(numRows, truncate = true)
+
+  /**
+   * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters will
+   * be truncated, and all cells will be aligned right.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def show(): Unit = show(20)
+
+  /**
+   * Displays the top 20 rows of Dataset in a tabular form.
+   *
+   * @param truncate
+   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
+   *   and all cells will be aligned right
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def show(truncate: Boolean): Unit = show(20, truncate)
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   Whether truncate long strings. If true, strings more than 20 characters will be truncated
+   *   and all cells will be aligned right
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  // scalastyle:off println
+  def show(numRows: Int, truncate: Boolean): Unit
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
+   *   aligned right.
+   * @group action
+   * @since 1.6.0
+   */
+  def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, vertical = false)
+
+  /**
+   * Displays the Dataset in a tabular form. For example:
+   * {{{
+   *   year  month AVG('Adj Close) MAX('Adj Close)
+   *   1980  12    0.503218        0.595103
+   *   1981  01    0.523289        0.570307
+   *   1982  02    0.436504        0.475256
+   *   1983  03    0.410516        0.442194
+   *   1984  04    0.450090        0.483521
+   * }}}
+   *
+   * If `vertical` enabled, this command prints output rows vertically (one line per column
+   * value)?
+   *
+   * {{{
+   * -RECORD 0-------------------
+   *  year            | 1980
+   *  month           | 12
+   *  AVG('Adj Close) | 0.503218
+   *  AVG('Adj Close) | 0.595103
+   * -RECORD 1-------------------
+   *  year            | 1981
+   *  month           | 01
+   *  AVG('Adj Close) | 0.523289
+   *  AVG('Adj Close) | 0.570307
+   * -RECORD 2-------------------
+   *  year            | 1982
+   *  month           | 02
+   *  AVG('Adj Close) | 0.436504
+   *  AVG('Adj Close) | 0.475256
+   * -RECORD 3-------------------
+   *  year            | 1983
+   *  month           | 03
+   *  AVG('Adj Close) | 0.410516
+   *  AVG('Adj Close) | 0.442194
+   * -RECORD 4-------------------
+   *  year            | 1984
+   *  month           | 04
+   *  AVG('Adj Close) | 0.450090
+   *  AVG('Adj Close) | 0.483521
+   * }}}
+   *
+   * @param numRows
+   *   Number of rows to show
+   * @param truncate
+   *   If set to more than 0, truncates strings to `truncate` characters and all cells will be
+   *   aligned right.
+   * @param vertical
+   *   If set to true, prints output rows vertically (one line per column value).
+   * @group action
+   * @since 2.3.0
+   */
+  // scalastyle:off println
+  def show(numRows: Int, truncate: Int, vertical: Boolean): Unit
+
+  /**
+   * Returns a [[DataFrameNaFunctions]] for working with missing data.
+   * {{{
+   *   // Dropping rows containing any null values.
+   *   ds.na.drop()
+   * }}}
+   *
+   * @group untypedrel
+   * @since 1.6.0
+   */
+  def na: DataFrameNaFunctions
+
+  /**
+   * Returns a [[DataFrameStatFunctions]] for working statistic functions support.
+   * {{{
+   *   // Finding frequent items in column with name 'a'.
+   *   ds.stat.freqItems(Seq("a"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 1.6.0
+   */
+  def stat: DataFrameStatFunctions
+
+  /**
+   * Join with another `DataFrame`.
+   *
+   * Behaves as an INNER JOIN and requires a subsequent join predicate.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_]): Dataset[Row]
+
+  /**
+   * Inner equi-join with another `DataFrame` using the given column.
+   *
+   * Different from other join functions, the join column will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * {{{
+   *   // Joining df1 and df2 using the column "user_id"
+   *   df1.join(df2, "user_id")
+   * }}}
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumn
+   *   Name of the column to join on. This column must exist on both sides.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_], usingColumn: String): Dataset[Row] = {
+    join(right, Seq(usingColumn))
+  }
+
+  /**
+   * (Java-specific) Inner equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: DS[_], usingColumns: Array[String]): Dataset[Row] = {
+    join(right, usingColumns.toImmutableArraySeq)
+  }
+
+  /**
+   * (Scala-specific) Inner equi-join with another `DataFrame` using the given columns.
+   *
+   * Different from other join functions, the join columns will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * {{{
+   *   // Joining df1 and df2 using the columns "user_id" and "user_name"
+   *   df1.join(df2, Seq("user_id", "user_name"))
+   * }}}
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_], usingColumns: Seq[String]): Dataset[Row] = {
+    join(right, usingColumns, "inner")
+  }
+
+  /**
+   * Equi-join with another `DataFrame` using the given column. A cross join with a predicate is
+   * specified as an inner join. If you would explicitly like to perform a cross join use the
+   * `crossJoin` method.
+   *
+   * Different from other join functions, the join column will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumn
+   *   Name of the column to join on. This column must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: DS[_], usingColumn: String, joinType: String): Dataset[Row] = {
+    join(right, Seq(usingColumn), joinType)
+  }
+
+  /**
+   * (Java-specific) Equi-join with another `DataFrame` using the given columns. See the
+   * Scala-specific overload for more details.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def join(right: DS[_], usingColumns: Array[String], joinType: String): Dataset[Row] = {
+    join(right, usingColumns.toImmutableArraySeq, joinType)
+  }
+
+  /**
+   * (Scala-specific) Equi-join with another `DataFrame` using the given columns. A cross join
+   * with a predicate is specified as an inner join. If you would explicitly like to perform a
+   * cross join use the `crossJoin` method.
+   *
+   * Different from other join functions, the join columns will only appear once in the output,
+   * i.e. similar to SQL's `JOIN USING` syntax.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @param usingColumns
+   *   Names of the columns to join on. This columns must exist on both sides.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @note
+   *   If you perform a self-join using this function without aliasing the input `DataFrame`s, you
+   *   will NOT be able to reference any columns after the join, since there is no way to
+   *   disambiguate which side of the join you would like to reference.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_], usingColumns: Seq[String], joinType: String): Dataset[Row]
+
+  /**
+   * Inner join with another `DataFrame`, using the given join expression.
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   df1.join(df2, $"df1Key" === $"df2Key")
+   *   df1.join(df2).where($"df1Key" === $"df2Key")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_], joinExprs: Column): Dataset[Row] =
+    join(right, joinExprs, "inner")
+
+  /**
+   * Join with another `DataFrame`, using the given join expression. The following performs a full
+   * outer join between `df1` and `df2`.
+   *
+   * {{{
+   *   // Scala:
+   *   import org.apache.spark.sql.functions._
+   *   df1.join(df2, $"df1Key" === $"df2Key", "outer")
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df1.join(df2, col("df1Key").equalTo(col("df2Key")), "outer");
+   * }}}
+   *
+   * @param right
+   *   Right side of the join.
+   * @param joinExprs
+   *   Join expression.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`, `full_outer`, `left`, `leftouter`, `left_outer`, `right`,
+   *   `rightouter`, `right_outer`, `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`,
+   *   `left_anti`.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def join(right: DS[_], joinExprs: Column, joinType: String): Dataset[Row]
+
+  /**
+   * Explicit cartesian join with another `DataFrame`.
+   *
+   * @param right
+   *   Right side of the join operation.
+   * @note
+   *   Cartesian joins are very expensive without an extra filter that can be pushed down.
+   * @group untypedrel
+   * @since 2.1.0
+   */
+  def crossJoin(right: DS[_]): Dataset[Row]
+
+  /**
+   * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to true.
+   *
+   * This is similar to the relation `join` function with one important difference in the result
+   * schema. Since `joinWith` preserves objects present on either side of the join, the result
+   * schema is similarly nested into a tuple under the column names `_1` and `_2`.
+   *
+   * This type of join can be useful both for preserving type-safety with the original object
+   * types as well as working with relational data where either side of the join has column names
+   * in common.
+   *
+   * @param other
+   *   Right side of the join.
+   * @param condition
+   *   Join expression.
+   * @param joinType
+   *   Type of join to perform. Default `inner`. Must be one of: `inner`, `cross`, `outer`,
+   *   `full`, `fullouter`,`full_outer`, `left`, `leftouter`, `left_outer`, `right`, `rightouter`,
+   *   `right_outer`.
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def joinWith[U](other: DS[U], condition: Column, joinType: String): Dataset[(T, U)]
+
+  /**
+   * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair where
+   * `condition` evaluates to true.
+   *
+   * @param other
+   *   Right side of the join.
+   * @param condition
+   *   Join expression.
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def joinWith[U](other: DS[U], condition: Column): Dataset[(T, U)] = {
+    joinWith(other, condition, "inner")
+  }
+
+  protected def sortInternal(global: Boolean, sortExprs: Seq[Column]): Dataset[T]
+
+  /**
+   * Returns a new Dataset with each partition sorted by the given expressions.
+   *
+   * This is the same operation as "SORT BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] = {
+    sortWithinPartitions((sortCol +: sortCols).map(Column(_)): _*)
+  }
+
+  /**
+   * Returns a new Dataset with each partition sorted by the given expressions.
+   *
+   * This is the same operation as "SORT BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def sortWithinPartitions(sortExprs: Column*): Dataset[T] = {
+    sortInternal(global = false, sortExprs)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the specified column, all in ascending order.
+   * {{{
+   *   // The following 3 are equivalent
+   *   ds.sort("sortcol")
+   *   ds.sort($"sortcol")
+   *   ds.sort($"sortcol".asc)
+   * }}}
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def sort(sortCol: String, sortCols: String*): Dataset[T] = {
+    sort((sortCol +: sortCols).map(Column(_)): _*)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. For example:
+   * {{{
+   *   ds.sort($"col1", $"col2".desc)
+   * }}}
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def sort(sortExprs: Column*): Dataset[T] = {
+    sortInternal(global = true, sortExprs)
+  }
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
+   * function.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols: _*)
+
+  /**
+   * Returns a new Dataset sorted by the given expressions. This is an alias of the `sort`
+   * function.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def orderBy(sortExprs: Column*): Dataset[T] = sort(sortExprs: _*)
+
+  /**
+   * Specifies some hint on the current Dataset. As an example, the following code specifies that
+   * one of the plan can be broadcasted:
+   *
+   * {{{
+   *   df1.join(df2.hint("broadcast"))
+   * }}}
+   *
+   * the following code specifies that this dataset could be rebalanced with given number of
+   * partitions:
+   *
+   * {{{
+   *    df1.hint("rebalance", 10)
+   * }}}
+   *
+   * @param name
+   *   the name of the hint
+   * @param parameters
+   *   the parameters of the hint, all the parameters should be a `Column` or `Expression` or
+   *   `Symbol` or could be converted into a `Literal`
+   * @group basic
+   * @since 2.2.0
+   */
+  @scala.annotation.varargs
+  def hint(name: String, parameters: Any*): Dataset[T]
+
+  /**
+   * Selects column based on the column name and returns it as a [[org.apache.spark.sql.Column]].
+   *
+   * @note
+   *   The column name can also reference to a nested column like `a.b`.
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def apply(colName: String): Column = col(colName)
+
+  /**
+   * Selects column based on the column name and returns it as a [[org.apache.spark.sql.Column]].
+   *
+   * @note
+   *   The column name can also reference to a nested column like `a.b`.
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def col(colName: String): Column
+
+  /**
+   * Selects a metadata column based on its logical column name, and returns it as a
+   * [[org.apache.spark.sql.Column]].
+   *
+   * A metadata column can be accessed this way even if the underlying data source defines a data
+   * column with a conflicting name.
+   *
+   * @group untypedrel
+   * @since 3.5.0
+   */
+  def metadataColumn(colName: String): Column
+
+  /**
+   * Selects column based on the column name specified as a regex and returns it as
+   * [[org.apache.spark.sql.Column]].
+   *
+   * @group untypedrel
+   * @since 2.3.0
+   */
+  def colRegex(colName: String): Column
+
+  /**
+   * Returns a new Dataset with an alias set.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def as(alias: String): Dataset[T]
+
+  /**
+   * (Scala-specific) Returns a new Dataset with an alias set.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def as(alias: Symbol): Dataset[T] = as(alias.name)
+
+  /**
+   * Returns a new Dataset with an alias set. Same as `as`.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def alias(alias: String): Dataset[T] = as(alias)
+
+  /**
+   * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def alias(alias: Symbol): Dataset[T] = as(alias)
+
+  /**
+   * Selects a set of column based expressions.
+   * {{{
+   *   ds.select($"colA", $"colB" + 1)
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def select(cols: Column*): Dataset[Row]
+
+  /**
+   * Selects a set of columns. This is a variant of `select` that can only select existing columns
+   * using column names (i.e. cannot construct expressions).
+   *
+   * {{{
+   *   // The following two are equivalent:
+   *   ds.select("colA", "colB")
+   *   ds.select($"colA", $"colB")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def select(col: String, cols: String*): Dataset[Row] = select((col +: cols).map(Column(_)): _*)
+
+  /**
+   * Selects a set of SQL expressions. This is a variant of `select` that accepts SQL expressions.
+   *
+   * {{{
+   *   // The following are equivalent:
+   *   ds.selectExpr("colA", "colB as newName", "abs(colC)")
+   *   ds.select(expr("colA"), expr("colB as newName"), expr("abs(colC)"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def selectExpr(exprs: String*): Dataset[Row] = select(exprs.map(functions.expr): _*)
+
+  /**
+   * Returns a new Dataset by computing the given [[org.apache.spark.sql.Column]] expression for
+   * each element.
+   *
+   * {{{
+   *   val ds = Seq(1, 2, 3).toDS()
+   *   val newDS = ds.select(expr("value + 1").as[Int])
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def select[U1](c1: TypedColumn[T, U1]): Dataset[U1]
+
+  /**
+   * Internal helper function for building typed selects that return tuples. For simplicity and
+   * code reuse, we do this without the help of the type system and then use helper functions that
+   * cast appropriately for the user facing interface.
+   */
+  protected def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_]
+
+  /**
+   * Returns a new Dataset by computing the given [[org.apache.spark.sql.Column]] expressions for
+   * each element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
+    selectUntyped(c1, c2).asInstanceOf[Dataset[(U1, U2)]]
+
+  /**
+   * Returns a new Dataset by computing the given [[org.apache.spark.sql.Column]] expressions for
+   * each element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def select[U1, U2, U3](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] =
+    selectUntyped(c1, c2, c3).asInstanceOf[Dataset[(U1, U2, U3)]]
+
+  /**
+   * Returns a new Dataset by computing the given [[org.apache.spark.sql.Column]] expressions for
+   * each element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def select[U1, U2, U3, U4](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] =
+    selectUntyped(c1, c2, c3, c4).asInstanceOf[Dataset[(U1, U2, U3, U4)]]
+
+  /**
+   * Returns a new Dataset by computing the given [[org.apache.spark.sql.Column]] expressions for
+   * each element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def select[U1, U2, U3, U4, U5](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4],
+      c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] =
+    selectUntyped(c1, c2, c3, c4, c5).asInstanceOf[Dataset[(U1, U2, U3, U4, U5)]]
+
+  /**
+   * Filters rows using the given condition.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDs.filter($"age" > 15)
+   *   peopleDs.where($"age" > 15)
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def filter(condition: Column): Dataset[T]
+
+  /**
+   * Filters rows using the given SQL expression.
+   * {{{
+   *   peopleDs.filter("age > 15")
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def filter(conditionExpr: String): Dataset[T] =
+    filter(functions.expr(conditionExpr))
+
+  /**
+   * (Scala-specific) Returns a new Dataset that only contains elements where `func` returns
+   * `true`.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def filter(func: T => Boolean): Dataset[T]
+
+  /**
+   * (Java-specific) Returns a new Dataset that only contains elements where `func` returns
+   * `true`.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def filter(func: FilterFunction[T]): Dataset[T]
+
+  /**
+   * Filters rows using the given condition. This is an alias for `filter`.
+   * {{{
+   *   // The following are equivalent:
+   *   peopleDs.filter($"age" > 15)
+   *   peopleDs.where($"age" > 15)
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def where(condition: Column): Dataset[T] = filter(condition)
+
+  /**
+   * Filters rows using the given SQL expression.
+   * {{{
+   *   peopleDs.where("age > 15")
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def where(conditionExpr: String): Dataset[T] = filter(conditionExpr)
+
+  /**
+   * Groups the Dataset using the specified columns, so we can run aggregation on them. See
+   * [[RelationalGroupedDataset]] for all the available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   ds.groupBy($"department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   ds.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def groupBy(cols: Column*): RelationalGroupedDataset
+
+  /**
+   * Groups the Dataset using the specified columns, so that we can run aggregation on them. See
+   * [[RelationalGroupedDataset]] for all the available aggregate functions.
+   *
+   * This is a variant of groupBy that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns grouped by department.
+   *   ds.groupBy("department").avg()
+   *
+   *   // Compute the max age and average salary, grouped by department and gender.
+   *   ds.groupBy($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def groupBy(col1: String, cols: String*): RelationalGroupedDataset = groupBy(
+    (col1 +: cols).map(col): _*)
+
+  /**
+   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolled up by department and group.
+   *   ds.rollup($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, rolled up by department and gender.
+   *   ds.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def rollup(cols: Column*): RelationalGroupedDataset
+
+  /**
+   * Create a multi-dimensional rollup for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * This is a variant of rollup that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns rolled up by department and group.
+   *   ds.rollup("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, rolled up by department and gender.
+   *   ds.rollup($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def rollup(col1: String, cols: String*): RelationalGroupedDataset = rollup(
+    (col1 +: cols).map(col): _*)
+
+  /**
+   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   ds.cube($"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   ds.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def cube(cols: Column*): RelationalGroupedDataset
+
+  /**
+   * Create a multi-dimensional cube for the current Dataset using the specified columns, so we
+   * can run aggregation on them. See [[RelationalGroupedDataset]] for all the available aggregate
+   * functions.
+   *
+   * This is a variant of cube that can only group by existing columns using column names (i.e.
+   * cannot construct expressions).
+   *
+   * {{{
+   *   // Compute the average for all numeric columns cubed by department and group.
+   *   ds.cube("department", "group").avg()
+   *
+   *   // Compute the max age and average salary, cubed by department and gender.
+   *   ds.cube($"department", $"gender").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def cube(col1: String, cols: String*): RelationalGroupedDataset = cube(
+    (col1 +: cols).map(col): _*)
+
+  /**
+   * Create multi-dimensional aggregation for the current Dataset using the specified grouping
+   * sets, so we can run aggregation on them. See [[RelationalGroupedDataset]] for all the
+   * available aggregate functions.
+   *
+   * {{{
+   *   // Compute the average for all numeric columns group by specific grouping sets.
+   *   ds.groupingSets(Seq(Seq($"department", $"group"), Seq()), $"department", $"group").avg()
+   *
+   *   // Compute the max age and average salary, group by specific grouping sets.
+   *   ds.groupingSets(Seq($"department", $"gender"), Seq()), $"department", $"group").agg(Map(
+   *     "salary" -> "avg",
+   *     "age" -> "max"
+   *   ))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def groupingSets(groupingSets: Seq[Seq[Column]], cols: Column*): RelationalGroupedDataset
+
+  /**
+   * (Scala-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg("age" -> "max", "salary" -> "avg")
+   *   ds.groupBy().agg("age" -> "max", "salary" -> "avg")
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): Dataset[Row] = {
+    groupBy().agg(aggExpr, aggExprs: _*)
+  }
+
+  /**
+   * (Scala-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def agg(exprs: Map[String, String]): Dataset[Row] = groupBy().agg(exprs)
+
+  /**
+   * (Java-specific) Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
+   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def agg(exprs: util.Map[String, String]): Dataset[Row] = groupBy().agg(exprs)
+
+  /**
+   * Aggregates on the entire Dataset without groups.
+   * {{{
+   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
+   *   ds.agg(max($"age"), avg($"salary"))
+   *   ds.groupBy().agg(max($"age"), avg($"salary"))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): Dataset[Row] = groupBy().agg(expr, exprs: _*)
+
+  /**
+   * (Scala-specific) Reduces the elements of this Dataset using the specified binary function.
+   * The given `func` must be commutative and associative or the result may be non-deterministic.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def reduce(func: (T, T) => T): T
+
+  /**
+   * (Java-specific) Reduces the elements of this Dataset using the specified binary function. The
+   * given `func` must be commutative and associative or the result may be non-deterministic.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def reduce(func: ReduceFunction[T]): T = reduce(ToScalaUDF(func))
+
+  /**
+   * (Scala-specific) Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given
+   * key `func`.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T]
+
+  /**
+   * (Java-specific) Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given
+   * key `func`.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def groupByKey[K](
+      func: MapFunction[T, K],
+      encoder: Encoder[K]): KeyValueGroupedDataset[K, T] = {
+    groupByKey(ToScalaUDF(func))(encoder)
+  }
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * This function is useful to massage a DataFrame into a format where some columns are
+   * identifier columns ("ids"), while all other columns ("values") are "unpivoted" to the rows,
+   * leaving just two non-id columns, named as given by `variableColumnName` and
+   * `valueColumnName`.
+   *
+   * {{{
+   *   val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
+   *   df.show()
+   *   // output:
+   *   // +---+---+----+
+   *   // | id|int|long|
+   *   // +---+---+----+
+   *   // |  1| 11|  12|
+   *   // |  2| 21|  22|
+   *   // +---+---+----+
+   *
+   *   df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
+   *   // output:
+   *   // +---+--------+-----+
+   *   // | id|variable|value|
+   *   // +---+--------+-----+
+   *   // |  1|     int|   11|
+   *   // |  1|    long|   12|
+   *   // |  2|     int|   21|
+   *   // |  2|    long|   22|
+   *   // +---+--------+-----+
+   *   // schema:
+   *   //root
+   *   // |-- id: integer (nullable = false)
+   *   // |-- variable: string (nullable = false)
+   *   // |-- value: long (nullable = true)
+   * }}}
+   *
+   * When no "id" columns are given, the unpivoted DataFrame consists of only the "variable" and
+   * "value" columns.
+   *
+   * All "value" columns must share a least common data type. Unless they are the same data type,
+   * all "value" columns are cast to the nearest common data type. For instance, types
+   * `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType` do
+   * not have a common data type and `unpivot` fails with an `AnalysisException`.
+   *
+   * @param ids
+   *   Id columns
+   * @param values
+   *   Value columns to unpivot
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): Dataset[Row]
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
+   * is set to all non-id columns that exist in the DataFrame.
+   *
+   * @param ids
+   *   Id columns
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def unpivot(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): Dataset[Row]
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   * @param ids
+   *   Id columns
+   * @param values
+   *   Value columns to unpivot
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): Dataset[Row] =
+    unpivot(ids, values, variableColumnName, valueColumnName)
+
+  /**
+   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns
+   * set. This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
+   * which cannot be reversed. This is an alias for `unpivot`.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
+   *
+   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)` where `values`
+   * is set to all non-id columns that exist in the DataFrame.
+   * @param ids
+   *   Id columns
+   * @param variableColumnName
+   *   Name of the variable column
+   * @param valueColumnName
+   *   Name of the value column
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def melt(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): Dataset[Row] =
+    unpivot(ids, variableColumnName, valueColumnName)
+
+  /**
+   * Transposes a DataFrame such that the values in the specified index column become the new
+   * columns of the DataFrame.
+   *
+   * Please note:
+   *   - All columns except the index column must share a least common data type. Unless they are
+   *     the same data type, all columns are cast to the nearest common data type.
+   *   - The name of the column into which the original column names are transposed defaults to
+   *     "key".
+   *   - null values in the index column are excluded from the column names for the transposed
+   *     table, which are ordered in ascending order.
+   *
+   * {{{
+   *   val df = Seq(("A", 1, 2), ("B", 3, 4)).toDF("id", "val1", "val2")
+   *   df.show()
+   *   // output:
+   *   // +---+----+----+
+   *   // | id|val1|val2|
+   *   // +---+----+----+
+   *   // |  A|   1|   2|
+   *   // |  B|   3|   4|
+   *   // +---+----+----+
+   *
+   *   df.transpose($"id").show()
+   *   // output:
+   *   // +----+---+---+
+   *   // | key|  A|  B|
+   *   // +----+---+---+
+   *   // |val1|  1|  3|
+   *   // |val2|  2|  4|
+   *   // +----+---+---+
+   *   // schema:
+   *   // root
+   *   //  |-- key: string (nullable = false)
+   *   //  |-- A: integer (nullable = true)
+   *   //  |-- B: integer (nullable = true)
+   *
+   *   df.transpose().show()
+   *   // output:
+   *   // +----+---+---+
+   *   // | key|  A|  B|
+   *   // +----+---+---+
+   *   // |val1|  1|  3|
+   *   // |val2|  2|  4|
+   *   // +----+---+---+
+   *   // schema:
+   *   // root
+   *   //  |-- key: string (nullable = false)
+   *   //  |-- A: integer (nullable = true)
+   *   //  |-- B: integer (nullable = true)
+   * }}}
+   *
+   * @param indexColumn
+   *   The single column that will be treated as the index for the transpose operation. This
+   *   column will be used to pivot the data, transforming the DataFrame such that the values of
+   *   the indexColumn become the new columns in the transposed DataFrame.
+   *
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def transpose(indexColumn: Column): Dataset[Row]
+
+  /**
+   * Transposes a DataFrame, switching rows to columns. This function transforms the DataFrame
+   * such that the values in the first column become the new columns of the DataFrame.
+   *
+   * This is equivalent to calling `Dataset#transpose(Column)` where `indexColumn` is set to the
+   * first column.
+   *
+   * Please note:
+   *   - All columns except the index column must share a least common data type. Unless they are
+   *     the same data type, all columns are cast to the nearest common data type.
+   *   - The name of the column into which the original column names are transposed defaults to
+   *     "key".
+   *   - Non-"key" column names for the transposed table are ordered in ascending order.
+   *
+   * @group untypedrel
+   * @since 4.0.0
+   */
+  def transpose(): Dataset[Row]
+
+  /**
+   * Return a `Column` object for a SCALAR Subquery containing exactly one row and one column.
+   *
+   * The `scalar()` method is useful for extracting a `Column` object that represents a scalar
+   * value from a DataFrame, especially when the DataFrame results from an aggregation or
+   * single-value computation. This returned `Column` can then be used directly in `select`
+   * clauses or as predicates in filters on the outer DataFrame, enabling dynamic data filtering
+   * and calculations based on scalar values.
+   *
+   * @group subquery
+   * @since 4.0.0
+   */
+  def scalar(): Column
+
+  /**
+   * Return a `Column` object for an EXISTS Subquery.
+   *
+   * The `exists` method provides a way to create a boolean column that checks for the presence of
+   * related records in a subquery. When applied within a `DataFrame`, this method allows you to
+   * filter rows based on whether matching records exist in the related dataset. The resulting
+   * `Column` object can be used directly in filtering conditions or as a computed column.
+   *
+   * @group subquery
+   * @since 4.0.0
+   */
+  def exists(): Column
+
+  /**
+   * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
+   * that returns the same result as the input, with the following guarantees: <ul> <li>It will
+   * compute the defined aggregates (metrics) on all the data that is flowing through the Dataset
+   * at that point.</li> <li>It will report the value of the defined aggregate columns as soon as
+   * we reach a completion point. A completion point is either the end of a query (batch mode) or
+   * the end of a streaming epoch. The value of the aggregates only reflects the data processed
+   * since the previous completion point.</li> </ul> Please note that continuous execution is
+   * currently not supported.
+   *
+   * The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
+   * more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
+   * contain references to the input Dataset's columns must always be wrapped in an aggregate
+   * function.
+   *
+   * @group typedrel
+   * @since 3.0.0
+   */
+  @scala.annotation.varargs
+  def observe(name: String, expr: Column, exprs: Column*): Dataset[T]
+
+  /**
+   * Observe (named) metrics through an `org.apache.spark.sql.Observation` instance. This method
+   * does not support streaming datasets.
+   *
+   * A user can retrieve the metrics by accessing `org.apache.spark.sql.Observation.get`.
+   *
+   * {{{
+   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
+   *   val observation = Observation("my_metrics")
+   *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
+   *   observed_ds.write.parquet("ds.parquet")
+   *   val metrics = observation.get
+   * }}}
+   *
+   * @throws IllegalArgumentException
+   *   If this is a streaming Dataset (this.isStreaming == true)
+   *
+   * @group typedrel
+   * @since 3.3.0
+   */
+  @scala.annotation.varargs
+  def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T]
+
+  /**
+   * Returns a new Dataset by taking the first `n` rows. The difference between this function and
+   * `head` is that `head` is an action and returns an array (by triggering query execution) while
+   * `limit` returns a new Dataset.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def limit(n: Int): Dataset[T]
+
+  /**
+   * Returns a new Dataset by skipping the first `n` rows.
+   *
+   * @group typedrel
+   * @since 3.4.0
+   */
+  def offset(n: Int): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name):
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
+   *   df1.union(df2).show
+   *
+   *   // output:
+   *   // +----+----+----+
+   *   // |col0|col1|col2|
+   *   // +----+----+----+
+   *   // |   1|   2|   3|
+   *   // |   4|   5|   6|
+   *   // +----+----+----+
+   * }}}
+   *
+   * Notice that the column positions in the schema aren't necessarily matched with the fields in
+   * the strongly typed objects in a Dataset. This function resolves columns by their positions in
+   * the schema, not the fields in the strongly typed objects. Use [[unionByName]] to resolve
+   * columns by field name in the typed objects.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def union(other: DS[T]): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset. This is
+   * an alias for `union`.
+   *
+   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
+   * deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * Also as standard in SQL, this function resolves columns by position (not by name).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def unionAll(other: DS[T]): Dataset[T] = union(other)
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
+   * union (that does deduplication of elements), use this function followed by a [[distinct]].
+   *
+   * The difference between this function and [[union]] is that this function resolves columns by
+   * name (not by position):
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
+   *   df1.unionByName(df2).show
+   *
+   *   // output:
+   *   // +----+----+----+
+   *   // |col0|col1|col2|
+   *   // +----+----+----+
+   *   // |   1|   2|   3|
+   *   // |   6|   4|   5|
+   *   // +----+----+----+
+   * }}}
+   *
+   * Note that this supports nested columns in struct and array types. Nested columns in map types
+   * are not currently supported.
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  def unionByName(other: DS[T]): Dataset[T] = unionByName(other, allowMissingColumns = false)
+
+  /**
+   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
+   *
+   * The difference between this function and [[union]] is that this function resolves columns by
+   * name (not by position).
+   *
+   * When the parameter `allowMissingColumns` is `true`, the set of column names in this and other
+   * `Dataset` can differ; missing columns will be filled with null. Further, the missing columns
+   * of this `Dataset` will be added at the end in the schema of the union result:
+   *
+   * {{{
+   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
+   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col0", "col3")
+   *   df1.unionByName(df2, true).show
+   *
+   *   // output: "col3" is missing at left df1 and added at the end of schema.
+   *   // +----+----+----+----+
+   *   // |col0|col1|col2|col3|
+   *   // +----+----+----+----+
+   *   // |   1|   2|   3|NULL|
+   *   // |   5|   4|NULL|   6|
+   *   // +----+----+----+----+
+   *
+   *   df2.unionByName(df1, true).show
+   *
+   *   // output: "col2" is missing at left df2 and added at the end of schema.
+   *   // +----+----+----+----+
+   *   // |col1|col0|col3|col2|
+   *   // +----+----+----+----+
+   *   // |   4|   5|   6|NULL|
+   *   // |   2|   1|NULL|   3|
+   *   // +----+----+----+----+
+   * }}}
+   *
+   * Note that this supports nested columns in struct and array types. With `allowMissingColumns`,
+   * missing nested columns of struct columns with the same name will also be filled with null
+   * values and added to the end of struct. Nested columns in map types are not currently
+   * supported.
+   *
+   * @group typedrel
+   * @since 3.1.0
+   */
+  def unionByName(other: DS[T], allowMissingColumns: Boolean): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing rows only in both this Dataset and another Dataset. This is
+   * equivalent to `INTERSECT` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def intersect(other: DS[T]): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing rows only in both this Dataset and another Dataset while
+   * preserving the duplicates. This is equivalent to `INTERSECT ALL` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
+   *   function resolves columns by position (not by name).
+   * @group typedrel
+   * @since 2.4.0
+   */
+  def intersectAll(other: DS[T]): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing rows in this Dataset but not in another Dataset. This is
+   * equivalent to `EXCEPT DISTINCT` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def except(other: DS[T]): Dataset[T]
+
+  /**
+   * Returns a new Dataset containing rows in this Dataset but not in another Dataset while
+   * preserving the duplicates. This is equivalent to `EXCEPT ALL` in SQL.
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`. Also as standard in SQL, this
+   *   function resolves columns by position (not by name).
+   * @group typedrel
+   * @since 2.4.0
+   */
+  def exceptAll(other: DS[T]): Dataset[T]
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
+   * user-supplied seed.
+   *
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   * @param seed
+   *   Seed for sampling.
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   * @group typedrel
+   * @since 2.3.0
+   */
+  def sample(fraction: Double, seed: Long): Dataset[T] = {
+    sample(withReplacement = false, fraction = fraction, seed = seed)
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement), using a
+   * random seed.
+   *
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   * @group typedrel
+   * @since 2.3.0
+   */
+  def sample(fraction: Double): Dataset[T] = {
+    sample(withReplacement = false, fraction = fraction)
+  }
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
+   *
+   * @param withReplacement
+   *   Sample with replacement or not.
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   * @param seed
+   *   Seed for sampling.
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the count of the given
+   *   [[Dataset]].
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T]
+
+  /**
+   * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
+   *
+   * @param withReplacement
+   *   Sample with replacement or not.
+   * @param fraction
+   *   Fraction of rows to generate, range [0.0, 1.0].
+   *
+   * @note
+   *   This is NOT guaranteed to provide exactly the fraction of the total count of the given
+   *   [[Dataset]].
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def sample(withReplacement: Boolean, fraction: Double): Dataset[T] = {
+    sample(withReplacement, fraction, SparkClassUtils.random.nextLong)
+  }
+
+  /**
+   * Randomly splits this Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @param seed
+   *   Seed for sampling.
+   *
+   * For Java API, use [[randomSplitAsList]].
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def randomSplit(weights: Array[Double], seed: Long): Array[_ <: Dataset[T]]
+
+  /**
+   * Returns a Java list that contains randomly split Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @param seed
+   *   Seed for sampling.
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def randomSplitAsList(weights: Array[Double], seed: Long): util.List[_ <: Dataset[T]]
+
+  /**
+   * Randomly splits this Dataset with the provided weights.
+   *
+   * @param weights
+   *   weights for splits, will be normalized if they don't sum to 1.
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def randomSplit(weights: Array[Double]): Array[_ <: Dataset[T]]
+
+  /**
+   * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more rows
+   * by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of the
+   * input row are implicitly joined with each row that is output by the function.
+   *
+   * Given that this is deprecated, as an alternative, you can explode columns either using
+   * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count
+   * the number of books that contain a given word:
+   *
+   * {{{
+   *   case class Book(title: String, words: String)
+   *   val ds: Dataset[Book]
+   *
+   *   val allWords = ds.select($"title", explode(split($"words", " ")).as("word"))
+   *
+   *   val bookCountPerWord = allWords.groupBy("word").agg(count_distinct("title"))
+   * }}}
+   *
+   * Using `flatMap()` this can similarly be exploded as:
+   *
+   * {{{
+   *   ds.flatMap(_.words.split(" "))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
+  def explode[A <: Product: TypeTag](input: Column*)(f: Row => IterableOnce[A]): Dataset[Row]
+
+  /**
+   * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero or
+   * more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
+   * columns of the input row are implicitly joined with each value that is output by the
+   * function.
+   *
+   * Given that this is deprecated, as an alternative, you can explode columns either using
+   * `functions.explode()`:
+   *
+   * {{{
+   *   ds.select(explode(split($"words", " ")).as("word"))
+   * }}}
+   *
+   * or `flatMap()`:
+   *
+   * {{{
+   *   ds.flatMap(_.words.split(" "))
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
+  def explode[A, B: TypeTag](inputColumn: String, outputColumn: String)(
+      f: A => IterableOnce[B]): Dataset[Row]
+
+  /**
+   * Returns a new Dataset by adding a column or replacing the existing column that has the same
+   * name.
+   *
+   * `column`'s expression must only refer to attributes supplied by this Dataset. It is an error
+   * to add a column that refers to some other Dataset.
+   *
+   * @note
+   *   this method introduces a projection internally. Therefore, calling it multiple times, for
+   *   instance, via loops in order to add multiple columns can generate big plans which can cause
+   *   performance issues and even `StackOverflowException`. To avoid this, use `select` with the
+   *   multiple columns at once.
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def withColumn(colName: String, col: Column): Dataset[Row] = withColumns(Seq(colName), Seq(col))
+
+  /**
+   * (Scala-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attributes
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.3.0
+   */
+  def withColumns(colsMap: Map[String, Column]): Dataset[Row] = {
+    val (colNames, newCols) = colsMap.toSeq.unzip
+    withColumns(colNames, newCols)
+  }
+
+  /**
+   * (Java-specific) Returns a new Dataset by adding columns or replacing the existing columns
+   * that has the same names.
+   *
+   * `colsMap` is a map of column name and column, the column must only refer to attribute
+   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
+   *
+   * @group untypedrel
+   * @since 3.3.0
+   */
+  def withColumns(colsMap: util.Map[String, Column]): Dataset[Row] = withColumns(
+    colsMap.asScala.toMap)
+
+  /**
+   * Returns a new Dataset by adding columns or replacing the existing columns that has the same
+   * names.
+   */
+  protected def withColumns(colNames: Seq[String], cols: Seq[Column]): Dataset[Row]
+
+  /**
+   * Returns a new Dataset with a column renamed. This is a no-op if schema doesn't contain
+   * existingName.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def withColumnRenamed(existingName: String, newName: String): Dataset[Row] =
+    withColumnsRenamed(Seq(existingName), Seq(newName))
+
+  /**
+   * (Scala-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
+   * doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @throws org.apache.spark.sql.AnalysisException
+   *   if there are duplicate names in resulting projection
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @throws[AnalysisException]
+  def withColumnsRenamed(colsMap: Map[String, String]): Dataset[Row] = {
+    val (colNames, newColNames) = colsMap.toSeq.unzip
+    withColumnsRenamed(colNames, newColNames)
+  }
+
+  /**
+   * (Java-specific) Returns a new Dataset with a columns renamed. This is a no-op if schema
+   * doesn't contain existingName.
+   *
+   * `colsMap` is a map of existing column name and new column name.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  def withColumnsRenamed(colsMap: util.Map[String, String]): Dataset[Row] =
+    withColumnsRenamed(colsMap.asScala.toMap)
+
+  protected def withColumnsRenamed(colNames: Seq[String], newColNames: Seq[String]): Dataset[Row]
+
+  /**
+   * Returns a new Dataset by updating an existing column with metadata.
+   *
+   * @group untypedrel
+   * @since 3.3.0
+   */
+  def withMetadata(columnName: String, metadata: Metadata): Dataset[Row]
+
+  /**
+   * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain column
+   * name.
+   *
+   * This method can only be used to drop top level columns. the colName string is treated
+   * literally without further interpretation.
+   *
+   * Note: `drop(colName)` has different semantic with `drop(col(colName))`, for example: 1, multi
+   * column have the same colName:
+   * {{{
+   *   val df1 = spark.range(0, 2).withColumn("key1", lit(1))
+   *   val df2 = spark.range(0, 2).withColumn("key2", lit(2))
+   *   val df3 = df1.join(df2)
+   *
+   *   df3.show
+   *   // +---+----+---+----+
+   *   // | id|key1| id|key2|
+   *   // +---+----+---+----+
+   *   // |  0|   1|  0|   2|
+   *   // |  0|   1|  1|   2|
+   *   // |  1|   1|  0|   2|
+   *   // |  1|   1|  1|   2|
+   *   // +---+----+---+----+
+   *
+   *   df3.drop("id").show()
+   *   // output: the two 'id' columns are both dropped.
+   *   // |key1|key2|
+   *   // +----+----+
+   *   // |   1|   2|
+   *   // |   1|   2|
+   *   // |   1|   2|
+   *   // |   1|   2|
+   *   // +----+----+
+   *
+   *   df3.drop(col("id")).show()
+   *   // ...AnalysisException: [AMBIGUOUS_REFERENCE] Reference `id` is ambiguous...
+   * }}}
+   *
+   * 2, colName contains special characters, like dot.
+   * {{{
+   *   val df = spark.range(0, 2).withColumn("a.b.c", lit(1))
+   *
+   *   df.show()
+   *   // +---+-----+
+   *   // | id|a.b.c|
+   *   // +---+-----+
+   *   // |  0|    1|
+   *   // |  1|    1|
+   *   // +---+-----+
+   *
+   *   df.drop("a.b.c").show()
+   *   // +---+
+   *   // | id|
+   *   // +---+
+   *   // |  0|
+   *   // |  1|
+   *   // +---+
+   *
+   *   df.drop(col("a.b.c")).show()
+   *   // no column match the expression 'a.b.c'
+   *   // +---+-----+
+   *   // | id|a.b.c|
+   *   // +---+-----+
+   *   // |  0|    1|
+   *   // |  1|    1|
+   *   // +---+-----+
+   * }}}
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def drop(colName: String): Dataset[Row] = drop(colName :: Nil: _*)
+
+  /**
+   * Returns a new Dataset with columns dropped. This is a no-op if schema doesn't contain column
+   * name(s).
+   *
+   * This method can only be used to drop top level columns. the colName string is treated
+   * literally without further interpretation.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def drop(colNames: String*): Dataset[Row]
+
+  /**
+   * Returns a new Dataset with column dropped.
+   *
+   * This method can only be used to drop top level column. This version of drop accepts a
+   * [[org.apache.spark.sql.Column]] rather than a name. This is a no-op if the Dataset doesn't
+   * have a column with an equivalent expression.
+   *
+   * Note: `drop(col(colName))` has different semantic with `drop(colName)`, please refer to
+   * `Dataset#drop(colName: String)`.
+   *
+   * @group untypedrel
+   * @since 2.0.0
+   */
+  def drop(col: Column): Dataset[Row] = drop(col, Nil: _*)
+
+  /**
+   * Returns a new Dataset with columns dropped.
+   *
+   * This method can only be used to drop top level columns. This is a no-op if the Dataset
+   * doesn't have a columns with an equivalent expression.
+   *
+   * @group untypedrel
+   * @since 3.4.0
+   */
+  @scala.annotation.varargs
+  def drop(col: Column, cols: Column*): Dataset[Row]
+
+  /**
+   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
+   * for `distinct`.
+   *
+   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
+   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
+   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly
+   * limit the state. In addition, too late data older than watermark will be dropped to avoid any
+   * possibility of duplicates.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def dropDuplicates(): Dataset[T]
+
+  /**
+   * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only the
+   * subset of columns.
+   *
+   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
+   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
+   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly
+   * limit the state. In addition, too late data older than watermark will be dropped to avoid any
+   * possibility of duplicates.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def dropDuplicates(colNames: Seq[String]): Dataset[T]
+
+  /**
+   * Returns a new Dataset with duplicate rows removed, considering only the subset of columns.
+   *
+   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
+   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
+   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly
+   * limit the state. In addition, too late data older than watermark will be dropped to avoid any
+   * possibility of duplicates.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def dropDuplicates(colNames: Array[String]): Dataset[T] =
+    dropDuplicates(colNames.toImmutableArraySeq)
+
+  /**
+   * Returns a new [[Dataset]] with duplicate rows removed, considering only the subset of
+   * columns.
+   *
+   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
+   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
+   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly
+   * limit the state. In addition, too late data older than watermark will be dropped to avoid any
+   * possibility of duplicates.
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def dropDuplicates(col1: String, cols: String*): Dataset[T] = {
+    val colNames: Seq[String] = col1 +: cols
+    dropDuplicates(colNames)
+  }
+
+  /**
+   * Returns a new Dataset with duplicates rows removed, within watermark.
+   *
+   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
+   * set via [[withWatermark]].
+   *
+   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state to
+   * drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
+   * deduplicated as long as the time distance of earliest and latest events are smaller than the
+   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
+   * longer than max timestamp differences among duplicated events.
+   *
+   * Note: too late data older than watermark will be dropped.
+   *
+   * @group typedrel
+   * @since 3.5.0
+   */
+  def dropDuplicatesWithinWatermark(): Dataset[T]
+
+  /**
+   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
+   * within watermark.
+   *
+   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
+   * set via [[withWatermark]].
+   *
+   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state to
+   * drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
+   * deduplicated as long as the time distance of earliest and latest events are smaller than the
+   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
+   * longer than max timestamp differences among duplicated events.
+   *
+   * Note: too late data older than watermark will be dropped.
+   *
+   * @group typedrel
+   * @since 3.5.0
+   */
+  def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T]
+
+  /**
+   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
+   * within watermark.
+   *
+   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
+   * set via [[withWatermark]].
+   *
+   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state to
+   * drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
+   * deduplicated as long as the time distance of earliest and latest events are smaller than the
+   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
+   * longer than max timestamp differences among duplicated events.
+   *
+   * Note: too late data older than watermark will be dropped.
+   *
+   * @group typedrel
+   * @since 3.5.0
+   */
+  def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T] = {
+    dropDuplicatesWithinWatermark(colNames.toImmutableArraySeq)
+  }
+
+  /**
+   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
+   * within watermark.
+   *
+   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
+   * set via [[withWatermark]].
+   *
+   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state to
+   * drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
+   * deduplicated as long as the time distance of earliest and latest events are smaller than the
+   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
+   * longer than max timestamp differences among duplicated events.
+   *
+   * Note: too late data older than watermark will be dropped.
+   *
+   * @group typedrel
+   * @since 3.5.0
+   */
+  @scala.annotation.varargs
+  def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T] = {
+    val colNames: Seq[String] = col1 +: cols
+    dropDuplicatesWithinWatermark(colNames)
+  }
+
+  /**
+   * Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
+   * and max. If no columns are given, this function computes statistics for all numerical or
+   * string columns.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting Dataset. If you want to
+   * programmatically compute summary statistics, use the `agg` function instead.
+   *
+   * {{{
+   *   ds.describe("age", "height").show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // mean    53.3  178.05
+   *   // stddev  11.6  15.7
+   *   // min     18.0  163.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * Use [[summary]] for expanded statistics and control over which statistics to compute.
+   *
+   * @param cols
+   *   Columns to compute statistics on.
+   * @group action
+   * @since 1.6.0
+   */
+  @scala.annotation.varargs
+  def describe(cols: String*): Dataset[Row]
+
+  /**
+   * Computes specified statistics for numeric and string columns. Available statistics are: <ul>
+   * <li>count</li> <li>mean</li> <li>stddev</li> <li>min</li> <li>max</li> <li>arbitrary
+   * approximate percentiles specified as a percentage (e.g. 75%)</li> <li>count_distinct</li>
+   * <li>approx_count_distinct</li> </ul>
+   *
+   * If no statistics are given, this function computes count, mean, stddev, min, approximate
+   * quartiles (percentiles at 25%, 50%, and 75%), and max.
+   *
+   * This function is meant for exploratory data analysis, as we make no guarantee about the
+   * backward compatibility of the schema of the resulting Dataset. If you want to
+   * programmatically compute summary statistics, use the `agg` function instead.
+   *
+   * {{{
+   *   ds.summary().show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // mean    53.3  178.05
+   *   // stddev  11.6  15.7
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 50%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * {{{
+   *   ds.summary("count", "min", "25%", "75%", "max").show()
+   *
+   *   // output:
+   *   // summary age   height
+   *   // count   10.0  10.0
+   *   // min     18.0  163.0
+   *   // 25%     24.0  176.0
+   *   // 75%     32.0  180.0
+   *   // max     92.0  192.0
+   * }}}
+   *
+   * To do a summary for specific columns first select them:
+   *
+   * {{{
+   *   ds.select("age", "height").summary().show()
+   * }}}
+   *
+   * Specify statistics to output custom summaries:
+   *
+   * {{{
+   *   ds.summary("count", "count_distinct").show()
+   * }}}
+   *
+   * The distinct count isn't included by default.
+   *
+   * You can also run approximate distinct counts which are faster:
+   *
+   * {{{
+   *   ds.summary("count", "approx_count_distinct").show()
+   * }}}
+   *
+   * See also [[describe]] for basic statistics.
+   *
+   * @param statistics
+   *   Statistics from above list to be computed.
+   * @group action
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def summary(statistics: String*): Dataset[Row]
+
+  /**
+   * Returns the first `n` rows.
+   *
+   * @note
+   *   this method should only be used if the resulting array is expected to be small, as all the
+   *   data is loaded into the driver's memory.
+   * @group action
+   * @since 1.6.0
+   */
+  def head(n: Int): Array[T]
+
+  /**
+   * Returns the first row.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def head(): T = head(1).head
+
+  /**
+   * Returns the first row. Alias for head().
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def first(): T = head()
+
+  /**
+   * Concise syntax for chaining custom transformations.
+   * {{{
+   *   def featurize(ds: Dataset[T]): Dataset[U] = ...
+   *
+   *   ds
+   *     .transform(featurize)
+   *     .transform(...)
+   * }}}
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this.asInstanceOf[Dataset[T]])
+
+  /**
+   * (Scala-specific) Returns a new Dataset that contains the result of applying `func` to each
+   * element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def map[U: Encoder](func: T => U): Dataset[U]
+
+  /**
+   * (Java-specific) Returns a new Dataset that contains the result of applying `func` to each
+   * element.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U]
+
+  /**
+   * (Scala-specific) Returns a new Dataset that contains the result of applying `func` to each
+   * partition.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def mapPartitions[U: Encoder](func: Iterator[T] => Iterator[U]): Dataset[U]
+
+  /**
+   * (Java-specific) Returns a new Dataset that contains the result of applying `f` to each
+   * partition.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] =
+    mapPartitions(ToScalaUDF(f))(encoder)
+
+  /**
+   * (Scala-specific) Returns a new Dataset by first applying a function to all elements of this
+   * Dataset, and then flattening the results.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def flatMap[U: Encoder](func: T => IterableOnce[U]): Dataset[U] =
+    mapPartitions(UDFAdaptors.flatMapToMapPartitions[T, U](func))
+
+  /**
+   * (Java-specific) Returns a new Dataset by first applying a function to all elements of this
+   * Dataset, and then flattening the results.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
+    mapPartitions(UDFAdaptors.flatMapToMapPartitions(f))(encoder)
+  }
+
+  /**
+   * Applies a function `f` to all rows.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def foreach(f: T => Unit): Unit = {
+    foreachPartition(UDFAdaptors.foreachToForeachPartition(f))
+  }
+
+  /**
+   * (Java-specific) Runs `func` on each element of this Dataset.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def foreach(func: ForeachFunction[T]): Unit = {
+    foreachPartition(UDFAdaptors.foreachToForeachPartition(func))
+  }
+
+  /**
+   * Applies a function `f` to each partition of this Dataset.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def foreachPartition(f: Iterator[T] => Unit): Unit
+
+  /**
+   * (Java-specific) Runs `func` on each partition of this Dataset.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def foreachPartition(func: ForeachPartitionFunction[T]): Unit = {
+    foreachPartition(ToScalaUDF(func))
+  }
+
+  /**
+   * Returns the first `n` rows in the Dataset.
+   *
+   * Running take requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def take(n: Int): Array[T] = head(n)
+
+  /**
+   * Returns the last `n` rows in the Dataset.
+   *
+   * Running tail requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 3.0.0
+   */
+  def tail(n: Int): Array[T]
+
+  /**
+   * Returns the first `n` rows in the Dataset as a list.
+   *
+   * Running take requires moving data into the application's driver process, and doing so with a
+   * very large `n` can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def takeAsList(n: Int): util.List[T] = util.Arrays.asList(take(n): _*)
+
+  /**
+   * Returns an array that contains all rows in this Dataset.
+   *
+   * Running collect requires moving all the data into the application's driver process, and doing
+   * so on a very large dataset can crash the driver process with OutOfMemoryError.
+   *
+   * For Java API, use [[collectAsList]].
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def collect(): Array[T]
+
+  /**
+   * Returns a Java list that contains all rows in this Dataset.
+   *
+   * Running collect requires moving all the data into the application's driver process, and doing
+   * so on a very large dataset can crash the driver process with OutOfMemoryError.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def collectAsList(): util.List[T]
+
+  /**
+   * Returns an iterator that contains all rows in this Dataset.
+   *
+   * The iterator will consume as much memory as the largest partition in this Dataset.
+   *
+   * @note
+   *   this results in multiple Spark jobs, and if the input Dataset is the result of a wide
+   *   transformation (e.g. join with different partitioners), to avoid recomputing the input
+   *   Dataset should be cached first.
+   * @group action
+   * @since 2.0.0
+   */
+  def toLocalIterator(): util.Iterator[T]
+
+  /**
+   * Returns the number of rows in the Dataset.
+   *
+   * @group action
+   * @since 1.6.0
+   */
+  def count(): Long
+
+  /**
+   * Returns a new Dataset that has exactly `numPartitions` partitions.
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def repartition(numPartitions: Int): Dataset[T]
+
+  protected def repartitionByExpression(
+      numPartitions: Option[Int],
+      partitionExprs: Seq[Column]): Dataset[T]
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
+   * The resulting Dataset is hash partitioned.
+   *
+   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    repartitionByExpression(Some(numPartitions), partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions, using
+   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is hash
+   * partitioned.
+   *
+   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
+   *
+   * @group typedrel
+   * @since 2.0.0
+   */
+  @scala.annotation.varargs
+  def repartition(partitionExprs: Column*): Dataset[T] = {
+    repartitionByExpression(None, partitionExprs)
+  }
+
+  protected def repartitionByRange(
+      numPartitions: Option[Int],
+      partitionExprs: Seq[Column]): Dataset[T]
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions into `numPartitions`.
+   * The resulting Dataset is range partitioned.
+   *
+   * At least one partition-by expression must be specified. When no explicit sort order is
+   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
+   * partition of the resulting Dataset.
+   *
+   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
+   * the output may not be consistent, since sampling can return different values. The sample size
+   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
+    repartitionByRange(Some(numPartitions), partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset partitioned by the given partitioning expressions, using
+   * `spark.sql.shuffle.partitions` as number of partitions. The resulting Dataset is range
+   * partitioned.
+   *
+   * At least one partition-by expression must be specified. When no explicit sort order is
+   * specified, "ascending nulls first" is assumed. Note, the rows are not sorted in each
+   * partition of the resulting Dataset.
+   *
+   * Note that due to performance reasons this method uses sampling to estimate the ranges. Hence,
+   * the output may not be consistent, since sampling can return different values. The sample size
+   * can be controlled by the config `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
+   *
+   * @group typedrel
+   * @since 2.3.0
+   */
+  @scala.annotation.varargs
+  def repartitionByRange(partitionExprs: Column*): Dataset[T] = {
+    repartitionByRange(None, partitionExprs)
+  }
+
+  /**
+   * Returns a new Dataset that has exactly `numPartitions` partitions, when the fewer partitions
+   * are requested. If a larger number of partitions is requested, it will stay at the current
+   * number of partitions. Similar to coalesce defined on an `RDD`, this operation results in a
+   * narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not be a
+   * shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.
+   *
+   * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1, this may result in
+   * your computation taking place on fewer nodes than you like (e.g. one node in the case of
+   * numPartitions = 1). To avoid this, you can call repartition. This will add a shuffle step,
+   * but means the current upstream partitions will be executed in parallel (per whatever the
+   * current partitioning is).
+   *
+   * @group typedrel
+   * @since 1.6.0
+   */
+  def coalesce(numPartitions: Int): Dataset[T]
+
+  /**
+   * Returns a new Dataset that contains only the unique rows from this Dataset. This is an alias
+   * for `dropDuplicates`.
+   *
+   * Note that for a streaming [[Dataset]], this method returns distinct rows only once regardless
+   * of the output mode, which the behavior may not be same with `DISTINCT` in SQL against
+   * streaming [[Dataset]].
+   *
+   * @note
+   *   Equality checking is performed directly on the encoded representation of the data and thus
+   *   is not affected by a custom `equals` function defined on `T`.
+   * @group typedrel
+   * @since 2.0.0
+   */
+  def distinct(): Dataset[T] = dropDuplicates()
+
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def persist(): Dataset[T]
+
+  /**
+   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def cache(): Dataset[T]
+
+  /**
+   * Persist this Dataset with the given storage level.
+   *
+   * @param newLevel
+   *   One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`, `MEMORY_AND_DISK_SER`,
+   *   `DISK_ONLY`, `MEMORY_ONLY_2`, `MEMORY_AND_DISK_2`, etc.
+   * @group basic
+   * @since 1.6.0
+   */
+  def persist(newLevel: StorageLevel): Dataset[T]
+
+  /**
+   * Get the Dataset's current storage level, or StorageLevel.NONE if not persisted.
+   *
+   * @group basic
+   * @since 2.1.0
+   */
+  def storageLevel: StorageLevel
+
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @param blocking
+   *   Whether to block until all blocks are deleted.
+   * @group basic
+   * @since 1.6.0
+   */
+  def unpersist(blocking: Boolean): Dataset[T]
+
+  /**
+   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk. This
+   * will not un-persist any cached data that is built upon this Dataset.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def unpersist(): Dataset[T]
+
+  /**
+   * Registers this Dataset as a temporary table using the given name. The lifetime of this
+   * temporary table is tied to the `SparkSession` that was used to create this Dataset.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  @deprecated("Use createOrReplaceTempView(viewName) instead.", "2.0.0")
+  def registerTempTable(tableName: String): Unit = {
+    createOrReplaceTempView(tableName)
+  }
+
+  /**
+   * Creates a local temporary view using the given name. The lifetime of this temporary view is
+   * tied to the `SparkSession` that was used to create this Dataset.
+   *
+   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
+   * created it, i.e. it will be automatically dropped when the session terminates. It's not tied
+   * to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
+   *
+   * @throws org.apache.spark.sql.AnalysisException
+   *   if the view name is invalid or already exists
+   * @group basic
+   * @since 2.0.0
+   */
+  @throws[AnalysisException]
+  def createTempView(viewName: String): Unit = {
+    createTempView(viewName, replace = false, global = false)
+  }
+
+  /**
+   * Creates a local temporary view using the given name. The lifetime of this temporary view is
+   * tied to the `SparkSession` that was used to create this Dataset.
+   *
+   * @group basic
+   * @since 2.0.0
+   */
+  def createOrReplaceTempView(viewName: String): Unit = {
+    createTempView(viewName, replace = true, global = false)
+  }
+
+  /**
+   * Creates a global temporary view using the given name. The lifetime of this temporary view is
+   * tied to this Spark application.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
+   * application, i.e. it will be automatically dropped when the application terminates. It's tied
+   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
+   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
+   *
+   * @throws org.apache.spark.sql.AnalysisException
+   *   if the view name is invalid or already exists
+   * @group basic
+   * @since 2.1.0
+   */
+  @throws[AnalysisException]
+  def createGlobalTempView(viewName: String): Unit = {
+    createTempView(viewName, replace = false, global = true)
+  }
+
+  /**
+   * Creates or replaces a global temporary view using the given name. The lifetime of this
+   * temporary view is tied to this Spark application.
+   *
+   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark
+   * application, i.e. it will be automatically dropped when the application terminates. It's tied
+   * to a system preserved database `global_temp`, and we must use the qualified name to refer a
+   * global temp view, e.g. `SELECT * FROM global_temp.view1`.
+   *
+   * @group basic
+   * @since 2.2.0
+   */
+  def createOrReplaceGlobalTempView(viewName: String): Unit = {
+    createTempView(viewName, replace = true, global = true)
+  }
+
+  protected def createTempView(viewName: String, replace: Boolean, global: Boolean): Unit
+
+  /**
+   * Merges a set of updates, insertions, and deletions based on a source table into a target
+   * table.
+   *
+   * Scala Examples:
+   * {{{
+   *   spark.table("source")
+   *     .mergeInto("target", $"source.id" === $"target.id")
+   *     .whenMatched($"salary" === 100)
+   *     .delete()
+   *     .whenNotMatched()
+   *     .insertAll()
+   *     .whenNotMatchedBySource($"salary" === 100)
+   *     .update(Map(
+   *       "salary" -> lit(200)
+   *     ))
+   *     .merge()
+   * }}}
+   *
+   * @group basic
+   * @since 4.0.0
+   */
+  def mergeInto(table: String, condition: Column): MergeIntoWriter[T]
+
+  /**
+   * Interface for saving the content of the streaming Dataset out into external storage.
+   *
+   * @group basic
+   * @since 2.0.0
+   */
+  def writeStream: DataStreamWriter[T]
+
+  /**
+   * Create a write configuration builder for v2 sources.
+   *
+   * This builder is used to configure and execute write operations. For example, to append to an
+   * existing table, run:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").append()
+   * }}}
+   *
+   * This can also be used to create or replace existing tables:
+   *
+   * {{{
+   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
+   * }}}
+   *
+   * @group basic
+   * @since 3.0.0
+   */
+  def writeTo(table: String): DataFrameWriterV2[T]
+
+  /**
+   * Returns the content of the Dataset as a Dataset of JSON strings.
+   *
+   * @since 2.0.0
+   */
+  def toJSON: Dataset[String]
+
+  /**
+   * Returns a best-effort snapshot of the files that compose this Dataset. This method simply
+   * asks each constituent BaseRelation for its respective files and takes the union of all
+   * results. Depending on the source relations, this may not find all input files. Duplicates are
+   * removed.
+   *
+   * @group basic
+   * @since 2.0.0
+   */
+  def inputFiles: Array[String]
+
+  /**
+   * Returns `true` when the logical query plans inside both [[Dataset]]s are equal and therefore
+   * return same results.
+   *
+   * @note
+   *   The equality comparison here is simplified by tolerating the cosmetic differences such as
+   *   attribute names.
+   * @note
+   *   This API can compare both [[Dataset]]s very fast but can still return `false` on the
+   *   [[Dataset]] that return the same results, for instance, from different plans. Such false
+   *   negative semantic can be useful when caching as an example.
+   * @since 3.1.0
+   */
+  @DeveloperApi
+  def sameSemantics(other: DS[T]): Boolean
+
+  /**
+   * Returns a `hashCode` of the logical query plan against this [[Dataset]].
+   *
+   * @note
+   *   Unlike the standard `hashCode`, the hash is calculated against the query plan simplified by
+   *   tolerating the cosmetic differences such as attribute names.
+   * @since 3.1.0
+   */
+  @DeveloperApi
+  def semanticHash(): Int
+
+  /**
+   * Interface for saving the content of the non-streaming Dataset out into external storage.
+   *
+   * @group basic
+   * @since 1.6.0
+   */
+  def write: DataFrameWriter[T]
+
+  /**
+   * Represents the content of the Dataset as an `RDD` of `T`.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @group basic
+   * @since 1.6.0
+   */
+  def rdd: RDD[T]
+
+  /**
+   * Returns the content of the Dataset as a `JavaRDD` of `T`s.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @group basic
+   * @since 1.6.0
+   */
+  def toJavaRDD: JavaRDD[T]
+
+  /**
+   * Returns the content of the Dataset as a `JavaRDD` of `T`s.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @group basic
+   * @since 1.6.0
+   */
+  def javaRDD: JavaRDD[T] = toJavaRDD
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/KeyValueGroupedDataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/KeyValueGroupedDataset.scala
new file mode 100644
index 0000000000000..81f999430a128
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/KeyValueGroupedDataset.scala
@@ -0,0 +1,1022 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import org.apache.spark.api.java.function.{CoGroupFunction, FlatMapGroupsFunction, FlatMapGroupsWithStateFunction, MapFunction, MapGroupsFunction, MapGroupsWithStateFunction, ReduceFunction}
+import org.apache.spark.sql.{Column, Encoder, TypedColumn}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.PrimitiveLongEncoder
+import org.apache.spark.sql.functions.{count => cnt, lit}
+import org.apache.spark.sql.internal.{ToScalaUDF, UDFAdaptors}
+import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode, StatefulProcessor, StatefulProcessorWithInitialState, TimeMode}
+
+/**
+ * A [[Dataset]] has been logically grouped by a user specified grouping key. Users should not
+ * construct a [[KeyValueGroupedDataset]] directly, but should instead call `groupByKey` on an
+ * existing [[Dataset]].
+ *
+ * @since 2.0.0
+ */
+abstract class KeyValueGroupedDataset[K, V] extends Serializable {
+  type KVDS[KL, VL] <: KeyValueGroupedDataset[KL, VL]
+
+  /**
+   * Returns a new [[KeyValueGroupedDataset]] where the type of the key has been mapped to the
+   * specified type. The mapping of key columns to the type follows the same rules as `as` on
+   * [[Dataset]].
+   *
+   * @since 1.6.0
+   */
+  def keyAs[L: Encoder]: KeyValueGroupedDataset[L, V]
+
+  /**
+   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied to
+   * the data. The grouping key is unchanged by this.
+   *
+   * {{{
+   *   // Create values grouped by key from a Dataset[(K, V)]
+   *   ds.groupByKey(_._1).mapValues(_._2) // Scala
+   * }}}
+   *
+   * @since 2.1.0
+   */
+  def mapValues[W: Encoder](func: V => W): KeyValueGroupedDataset[K, W]
+
+  /**
+   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied to
+   * the data. The grouping key is unchanged by this.
+   *
+   * {{{
+   *   // Create Integer values grouped by String key from a Dataset<Tuple2<String, Integer>>
+   *   Dataset<Tuple2<String, Integer>> ds = ...;
+   *   KeyValueGroupedDataset<String, Integer> grouped =
+   *     ds.groupByKey(t -> t._1, Encoders.STRING()).mapValues(t -> t._2, Encoders.INT());
+   * }}}
+   *
+   * @since 2.1.0
+   */
+  def mapValues[W](func: MapFunction[V, W], encoder: Encoder[W]): KeyValueGroupedDataset[K, W] = {
+    mapValues(ToScalaUDF(func))(encoder)
+  }
+
+  /**
+   * Returns a [[Dataset]] that contains each unique key. This is equivalent to doing mapping over
+   * the Dataset to extract the keys and then running a distinct operation on those.
+   *
+   * @since 1.6.0
+   */
+  def keys: Dataset[K]
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and an iterator that contains all of the elements in
+   * the group. The function can return an iterator containing elements of an arbitrary type which
+   * will be returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * @since 1.6.0
+   */
+  def flatMapGroups[U: Encoder](f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
+    flatMapSortedGroups(Nil: _*)(f)
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and an iterator that contains all of the elements in
+   * the group. The function can return an iterator containing elements of an arbitrary type which
+   * will be returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * @since 1.6.0
+   */
+  def flatMapGroups[U](f: FlatMapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
+    flatMapGroups(ToScalaUDF(f))(encoder)
+  }
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and a sorted iterator that contains all of the elements
+   * in the group. The function can return an iterator containing elements of an arbitrary type
+   * which will be returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator to be
+   * sorted according to the given sort expressions. That sorting does not add computational
+   * complexity.
+   *
+   * @see
+   *   `org.apache.spark.sql.api.KeyValueGroupedDataset#flatMapGroups`
+   * @since 3.4.0
+   */
+  def flatMapSortedGroups[U: Encoder](sortExprs: Column*)(
+      f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U]
+
+  /**
+   * (Java-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and a sorted iterator that contains all of the elements
+   * in the group. The function can return an iterator containing elements of an arbitrary type
+   * which will be returned as a new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator to be
+   * sorted according to the given sort expressions. That sorting does not add computational
+   * complexity.
+   *
+   * @see
+   *   `org.apache.spark.sql.api.KeyValueGroupedDataset#flatMapGroups`
+   * @since 3.4.0
+   */
+  def flatMapSortedGroups[U](
+      SortExprs: Array[Column],
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = {
+    import org.apache.spark.util.ArrayImplicits._
+    flatMapSortedGroups(SortExprs.toImmutableArraySeq: _*)(ToScalaUDF(f))(encoder)
+  }
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and an iterator that contains all of the elements in
+   * the group. The function can return an element of arbitrary type which will be returned as a
+   * new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * @since 1.6.0
+   */
+  def mapGroups[U: Encoder](f: (K, Iterator[V]) => U): Dataset[U] = {
+    flatMapGroups(UDFAdaptors.mapGroupsToFlatMapGroups(f))
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each group of data. For each unique group, the
+   * function will be passed the group key and an iterator that contains all of the elements in
+   * the group. The function can return an element of arbitrary type which will be returned as a
+   * new [[Dataset]].
+   *
+   * This function does not support partial aggregation, and as a result requires shuffling all
+   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
+   * key, it is best to use the reduce function or an
+   * `org.apache.spark.sql.expressions#Aggregator`.
+   *
+   * Internally, the implementation will spill to disk if any given group is too large to fit into
+   * memory. However, users must take care to avoid materializing the whole iterator for a group
+   * (for example, by calling `toList`) unless they are sure that this is possible given the
+   * memory constraints of their cluster.
+   *
+   * @since 1.6.0
+   */
+  def mapGroups[U](f: MapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
+    mapGroups(ToScalaUDF(f))(encoder)
+  }
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See
+   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def mapGroupsWithState[S: Encoder, U: Encoder](
+      func: (K, Iterator[V], GroupState[S]) => U): Dataset[U]
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See
+   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def mapGroupsWithState[S: Encoder, U: Encoder](timeoutConf: GroupStateTimeout)(
+      func: (K, Iterator[V], GroupState[S]) => U): Dataset[U]
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See
+   * [[org.apache.spark.sql.streaming.GroupState]] for more details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param timeoutConf
+   *   Timeout Conf, see GroupStateTimeout for more details
+   * @param initialState
+   *   The user provided state that will be initialized when the first batch of data is processed
+   *   in the streaming query. The user defined function will be called on the state data even if
+   *   there are no other values in the group. To convert a Dataset ds of type Dataset[(K, S)] to
+   *   a KeyValueGroupedDataset[K, S] do {{{ds.groupByKey(x => x._1).mapValues(_._2)}}}
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 3.2.0
+   */
+  def mapGroupsWithState[S: Encoder, U: Encoder](
+      timeoutConf: GroupStateTimeout,
+      initialState: KVDS[K, S])(func: (K, Iterator[V], GroupState[S]) => U): Dataset[U]
+
+  /**
+   * (Java-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param stateEncoder
+   *   Encoder for the state type.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    mapGroupsWithState[S, U](ToScalaUDF(func))(stateEncoder, outputEncoder)
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param stateEncoder
+   *   Encoder for the state type.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout): Dataset[U] = {
+    mapGroupsWithState[S, U](timeoutConf)(ToScalaUDF(func))(stateEncoder, outputEncoder)
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param stateEncoder
+   *   Encoder for the state type.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   * @param initialState
+   *   The user provided state that will be initialized when the first batch of data is processed
+   *   in the streaming query. The user defined function will be called on the state data even if
+   *   there are no other values in the group.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 3.2.0
+   */
+  def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout,
+      initialState: KVDS[K, S]): Dataset[U] = {
+    val f = ToScalaUDF(func)
+    mapGroupsWithState[S, U](timeoutConf, initialState)(f)(stateEncoder, outputEncoder)
+  }
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param outputMode
+   *   The output mode of the function.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def flatMapGroupsWithState[S: Encoder, U: Encoder](
+      outputMode: OutputMode,
+      timeoutConf: GroupStateTimeout)(
+      func: (K, Iterator[V], GroupState[S]) => Iterator[U]): Dataset[U]
+
+  /**
+   * (Scala-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param outputMode
+   *   The output mode of the function.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   * @param initialState
+   *   The user provided state that will be initialized when the first batch of data is processed
+   *   in the streaming query. The user defined function will be called on the state data even if
+   *   there are no other values in the group. To covert a Dataset `ds` of type of type
+   *   `Dataset[(K, S)]` to a `KeyValueGroupedDataset[K, S]`, use
+   *   {{{ds.groupByKey(x => x._1).mapValues(_._2)}}} See [[org.apache.spark.sql.Encoder]] for
+   *   more details on what types are encodable to Spark SQL.
+   * @since 3.2.0
+   */
+  def flatMapGroupsWithState[S: Encoder, U: Encoder](
+      outputMode: OutputMode,
+      timeoutConf: GroupStateTimeout,
+      initialState: KVDS[K, S])(func: (K, Iterator[V], GroupState[S]) => Iterator[U]): Dataset[U]
+
+  /**
+   * (Java-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param outputMode
+   *   The output mode of the function.
+   * @param stateEncoder
+   *   Encoder for the state type.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 2.2.0
+   */
+  def flatMapGroupsWithState[S, U](
+      func: FlatMapGroupsWithStateFunction[K, V, S, U],
+      outputMode: OutputMode,
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout): Dataset[U] = {
+    val f = ToScalaUDF(func)
+    flatMapGroupsWithState[S, U](outputMode, timeoutConf)(f)(stateEncoder, outputEncoder)
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each group of data, while maintaining a
+   * user-defined per-group state. The result Dataset will represent the objects returned by the
+   * function. For a static batch Dataset, the function will be invoked once per group. For a
+   * streaming Dataset, the function will be invoked for each group repeatedly in every trigger,
+   * and updates to each group's state will be saved across invocations. See `GroupState` for more
+   * details.
+   *
+   * @tparam S
+   *   The type of the user-defined state. Must be encodable to Spark SQL types.
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param func
+   *   Function to be called on every group.
+   * @param outputMode
+   *   The output mode of the function.
+   * @param stateEncoder
+   *   Encoder for the state type.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param timeoutConf
+   *   Timeout configuration for groups that do not receive data for a while.
+   * @param initialState
+   *   The user provided state that will be initialized when the first batch of data is processed
+   *   in the streaming query. The user defined function will be called on the state data even if
+   *   there are no other values in the group. To covert a Dataset `ds` of type of type
+   *   `Dataset[(K, S)]` to a `KeyValueGroupedDataset[K, S]`, use
+   *   {{{ds.groupByKey(x => x._1).mapValues(_._2)}}}
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   * @since 3.2.0
+   */
+  def flatMapGroupsWithState[S, U](
+      func: FlatMapGroupsWithStateFunction[K, V, S, U],
+      outputMode: OutputMode,
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout,
+      initialState: KVDS[K, S]): Dataset[U] = {
+    flatMapGroupsWithState[S, U](outputMode, timeoutConf, initialState)(ToScalaUDF(func))(
+      stateEncoder,
+      outputEncoder)
+  }
+
+  /**
+   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
+   * API v2. We allow the user to act on per-group set of input rows along with keyed state and
+   * the user can choose to output/return 0 or more rows. For a streaming dataframe, we will
+   * repeatedly invoke the interface methods for new rows in each trigger and the user's
+   * state/state variables will be stored persistently across invocations.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param timeMode
+   *   The time mode semantics of the stateful processor for timers and TTL.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      timeMode: TimeMode,
+      outputMode: OutputMode): Dataset[U]
+
+  /**
+   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
+   * API v2. We allow the user to act on per-group set of input rows along with keyed state and
+   * the user can choose to output/return 0 or more rows. For a streaming dataframe, we will
+   * repeatedly invoke the interface methods for new rows in each trigger and the user's
+   * state/state variables will be stored persistently across invocations.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark. Note
+   * that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param eventTimeColumnName
+   *   eventTime column in the output dataset. Any operations after transformWithState will use
+   *   the new eventTimeColumn. The user needs to ensure that the eventTime for emitted output
+   *   adheres to the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode): Dataset[U]
+
+  /**
+   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
+   * v2. We allow the user to act on per-group set of input rows along with keyed state and the
+   * user can choose to output/return 0 or more rows. For a streaming dataframe, we will
+   * repeatedly invoke the interface methods for new rows in each trigger and the user's
+   * state/state variables will be stored persistently across invocations.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param timeMode
+   *   The time mode semantics of the stateful processor for timers and TTL.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    transformWithState(statefulProcessor, timeMode, outputMode)(outputEncoder)
+  }
+
+  /**
+   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
+   * v2. We allow the user to act on per-group set of input rows along with keyed state and the
+   * user can choose to output/return 0 or more rows.
+   *
+   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows in
+   * each trigger and the user's state/state variables will be stored persistently across
+   * invocations.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark. Note
+   * that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param eventTimeColumnName
+   *   eventTime column in the output dataset. Any operations after transformWithState will use
+   *   the new eventTimeColumn. The user needs to ensure that the eventTime for emitted output
+   *   adheres to the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]): Dataset[U] = {
+    transformWithState(statefulProcessor, eventTimeColumnName, outputMode)(outputEncoder)
+  }
+
+  /**
+   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
+   * API v2. Functions as the function above, but with additional initial state.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S
+   *   The type of initial state objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param timeMode
+   *   The time mode semantics of the stateful processor for timers and TTL.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param initialState
+   *   User provided initial state that will be used to initiate state for the query in the first
+   *   batch.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      initialState: KVDS[K, S]): Dataset[U]
+
+  /**
+   * (Scala-specific) Invokes methods defined in the stateful processor used in arbitrary state
+   * API v2. Functions as the function above, but with additional eventTimeColumnName for output.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S
+   *   The type of initial state objects. Must be encodable to Spark SQL types.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark. Note
+   * that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param eventTimeColumnName
+   *   eventTime column in the output dataset. Any operations after transformWithState will use
+   *   the new eventTimeColumn. The user needs to ensure that the eventTime for emitted output
+   *   adheres to the watermark boundary, otherwise streaming query will fail.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param initialState
+   *   User provided initial state that will be used to initiate state for the query in the first
+   *   batch.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      initialState: KVDS[K, S]): Dataset[U]
+
+  /**
+   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
+   * v2. Functions as the function above, but with additional initialStateEncoder for state
+   * encoding.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S
+   *   The type of initial state objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param timeMode
+   *   The time mode semantics of the stateful processor for timers and TTL.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param initialState
+   *   User provided initial state that will be used to initiate state for the query in the first
+   *   batch.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param initialStateEncoder
+   *   Encoder for the initial state type.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      initialState: KVDS[K, S],
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]): Dataset[U] = {
+    transformWithState(statefulProcessor, timeMode, outputMode, initialState)(
+      outputEncoder,
+      initialStateEncoder)
+  }
+
+  /**
+   * (Java-specific) Invokes methods defined in the stateful processor used in arbitrary state API
+   * v2. Functions as the function above, but with additional eventTimeColumnName for output.
+   *
+   * Downstream operators would use specified eventTimeColumnName to calculate watermark. Note
+   * that TimeMode is set to EventTime to ensure correct flow of watermark.
+   *
+   * @tparam U
+   *   The type of the output objects. Must be encodable to Spark SQL types.
+   * @tparam S
+   *   The type of initial state objects. Must be encodable to Spark SQL types.
+   * @param statefulProcessor
+   *   Instance of statefulProcessor whose functions will be invoked by the operator.
+   * @param outputMode
+   *   The output mode of the stateful processor.
+   * @param initialState
+   *   User provided initial state that will be used to initiate state for the query in the first
+   *   batch.
+   * @param eventTimeColumnName
+   *   event column in the output dataset. Any operations after transformWithState will use the
+   *   new eventTimeColumn. The user needs to ensure that the eventTime for emitted output adheres
+   *   to the watermark boundary, otherwise streaming query will fail.
+   * @param outputEncoder
+   *   Encoder for the output type.
+   * @param initialStateEncoder
+   *   Encoder for the initial state type.
+   *
+   * See [[org.apache.spark.sql.Encoder]] for more details on what types are encodable to Spark
+   * SQL.
+   */
+  private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      outputMode: OutputMode,
+      initialState: KVDS[K, S],
+      eventTimeColumnName: String,
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]): Dataset[U] = {
+    transformWithState(statefulProcessor, eventTimeColumnName, outputMode, initialState)(
+      outputEncoder,
+      initialStateEncoder)
+  }
+
+  /**
+   * (Scala-specific) Reduces the elements of each group of data using the specified binary
+   * function. The given function must be commutative and associative or the result may be
+   * non-deterministic.
+   *
+   * @since 1.6.0
+   */
+  def reduceGroups(f: (V, V) => V): Dataset[(K, V)]
+
+  /**
+   * (Java-specific) Reduces the elements of each group of data using the specified binary
+   * function. The given function must be commutative and associative or the result may be
+   * non-deterministic.
+   *
+   * @since 1.6.0
+   */
+  def reduceGroups(f: ReduceFunction[V]): Dataset[(K, V)] = {
+    reduceGroups(ToScalaUDF(f))
+  }
+
+  /**
+   * Internal helper function for building typed aggregations that return tuples. For simplicity
+   * and code reuse, we do this without the help of the type system and then use helper functions
+   * that cast appropriately for the user facing interface.
+   */
+  protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_]
+
+  /**
+   * Computes the given aggregation, returning a [[Dataset]] of tuples for each unique key and the
+   * result of computing this aggregation over all elements in the group.
+   *
+   * @since 1.6.0
+   */
+  def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)] =
+    aggUntyped(col1).asInstanceOf[Dataset[(K, U1)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 1.6.0
+   */
+  def agg[U1, U2](col1: TypedColumn[V, U1], col2: TypedColumn[V, U2]): Dataset[(K, U1, U2)] =
+    aggUntyped(col1, col2).asInstanceOf[Dataset[(K, U1, U2)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 1.6.0
+   */
+  def agg[U1, U2, U3](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3]): Dataset[(K, U1, U2, U3)] =
+    aggUntyped(col1, col2, col3).asInstanceOf[Dataset[(K, U1, U2, U3)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 1.6.0
+   */
+  def agg[U1, U2, U3, U4](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4]): Dataset[(K, U1, U2, U3, U4)] =
+    aggUntyped(col1, col2, col3, col4).asInstanceOf[Dataset[(K, U1, U2, U3, U4)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 3.0.0
+   */
+  def agg[U1, U2, U3, U4, U5](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5]): Dataset[(K, U1, U2, U3, U4, U5)] =
+    aggUntyped(col1, col2, col3, col4, col5).asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 3.0.0
+   */
+  def agg[U1, U2, U3, U4, U5, U6](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6]): Dataset[(K, U1, U2, U3, U4, U5, U6)] =
+    aggUntyped(col1, col2, col3, col4, col5, col6)
+      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 3.0.0
+   */
+  def agg[U1, U2, U3, U4, U5, U6, U7](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6],
+      col7: TypedColumn[V, U7]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7)] =
+    aggUntyped(col1, col2, col3, col4, col5, col6, col7)
+      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7)]]
+
+  /**
+   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key and
+   * the result of computing these aggregations over all elements in the group.
+   *
+   * @since 3.0.0
+   */
+  def agg[U1, U2, U3, U4, U5, U6, U7, U8](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2],
+      col3: TypedColumn[V, U3],
+      col4: TypedColumn[V, U4],
+      col5: TypedColumn[V, U5],
+      col6: TypedColumn[V, U6],
+      col7: TypedColumn[V, U7],
+      col8: TypedColumn[V, U8]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)] =
+    aggUntyped(col1, col2, col3, col4, col5, col6, col7, col8)
+      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)]]
+
+  /**
+   * Returns a [[Dataset]] that contains a tuple with each key and the number of items present for
+   * that key.
+   *
+   * @since 1.6.0
+   */
+  def count(): Dataset[(K, Long)] = agg(cnt(lit(1)).as(PrimitiveLongEncoder))
+
+  /**
+   * (Scala-specific) Applies the given function to each cogrouped data. For each unique group,
+   * the function will be passed the grouping key and 2 iterators containing all elements in the
+   * group from [[Dataset]] `this` and `other`. The function can return an iterator containing
+   * elements of an arbitrary type which will be returned as a new [[Dataset]].
+   *
+   * @since 1.6.0
+   */
+  def cogroup[U, R: Encoder](other: KVDS[K, U])(
+      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
+    cogroupSorted(other)(Nil: _*)(Nil: _*)(f)
+  }
+
+  /**
+   * (Java-specific) Applies the given function to each cogrouped data. For each unique group, the
+   * function will be passed the grouping key and 2 iterators containing all elements in the group
+   * from [[Dataset]] `this` and `other`. The function can return an iterator containing elements
+   * of an arbitrary type which will be returned as a new [[Dataset]].
+   *
+   * @since 1.6.0
+   */
+  def cogroup[U, R](
+      other: KVDS[K, U],
+      f: CoGroupFunction[K, V, U, R],
+      encoder: Encoder[R]): Dataset[R] = {
+    cogroup(other)(ToScalaUDF(f))(encoder)
+  }
+
+  /**
+   * (Scala-specific) Applies the given function to each sorted cogrouped data. For each unique
+   * group, the function will be passed the grouping key and 2 sorted iterators containing all
+   * elements in the group from [[Dataset]] `this` and `other`. The function can return an
+   * iterator containing elements of an arbitrary type which will be returned as a new
+   * [[Dataset]].
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators to be
+   * sorted according to the given sort expressions. That sorting does not add computational
+   * complexity.
+   *
+   * @see
+   *   `org.apache.spark.sql.api.KeyValueGroupedDataset#cogroup`
+   * @since 3.4.0
+   */
+  def cogroupSorted[U, R: Encoder](other: KVDS[K, U])(thisSortExprs: Column*)(
+      otherSortExprs: Column*)(f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R]
+
+  /**
+   * (Java-specific) Applies the given function to each sorted cogrouped data. For each unique
+   * group, the function will be passed the grouping key and 2 sorted iterators containing all
+   * elements in the group from [[Dataset]] `this` and `other`. The function can return an
+   * iterator containing elements of an arbitrary type which will be returned as a new
+   * [[Dataset]].
+   *
+   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators to be
+   * sorted according to the given sort expressions. That sorting does not add computational
+   * complexity.
+   *
+   * @see
+   *   `org.apache.spark.sql.api.KeyValueGroupedDataset#cogroup`
+   * @since 3.4.0
+   */
+  def cogroupSorted[U, R](
+      other: KVDS[K, U],
+      thisSortExprs: Array[Column],
+      otherSortExprs: Array[Column],
+      f: CoGroupFunction[K, V, U, R],
+      encoder: Encoder[R]): Dataset[R] = {
+    import org.apache.spark.util.ArrayImplicits._
+    cogroupSorted(other)(thisSortExprs.toImmutableArraySeq: _*)(
+      otherSortExprs.toImmutableArraySeq: _*)(ToScalaUDF(f))(encoder)
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/RelationalGroupedDataset.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/RelationalGroupedDataset.scala
new file mode 100644
index 0000000000000..118b8f1ecd488
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/RelationalGroupedDataset.scala
@@ -0,0 +1,360 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.jdk.CollectionConverters._
+
+import _root_.java.util
+
+import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.{functions, Column, Encoder, Row}
+
+/**
+ * A set of methods for aggregations on a `DataFrame`, created by [[Dataset#groupBy groupBy]],
+ * [[Dataset#cube cube]] or [[Dataset#rollup rollup]] (and also `pivot`).
+ *
+ * The main method is the `agg` function, which has multiple variants. This class also contains
+ * some first-order statistics such as `mean`, `sum` for convenience.
+ *
+ * @note
+ *   This class was named `GroupedData` in Spark 1.x.
+ * @since 2.0.0
+ */
+@Stable
+abstract class RelationalGroupedDataset {
+  protected def df: Dataset[Row]
+
+  /**
+   * Create a aggregation based on the grouping column, the grouping type, and the aggregations.
+   */
+  protected def toDF(aggCols: Seq[Column]): Dataset[Row]
+
+  protected def selectNumericColumns(colNames: Seq[String]): Seq[Column]
+
+  /**
+   * Convert a name method tuple into a Column.
+   */
+  private def toAggCol(colAndMethod: (String, String)): Column = {
+    val col = df.col(colAndMethod._1)
+    colAndMethod._2.toLowerCase(util.Locale.ROOT) match {
+      case "avg" | "average" | "mean" => functions.avg(col)
+      case "stddev" | "std" => functions.stddev(col)
+      case "count" | "size" => functions.count(col)
+      case name => Column.fn(name, col)
+    }
+  }
+
+  private def aggregateNumericColumns(
+      colNames: Seq[String],
+      function: Column => Column): Dataset[Row] = {
+    toDF(selectNumericColumns(colNames).map(function))
+  }
+
+  /**
+   * Returns a `KeyValueGroupedDataset` where the data is grouped by the grouping expressions of
+   * current `RelationalGroupedDataset`.
+   *
+   * @since 3.0.0
+   */
+  def as[K: Encoder, T: Encoder]: KeyValueGroupedDataset[K, T]
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying the column names and aggregate methods. The
+   * resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   )
+   * }}}
+   *
+   * @since 1.3.0
+   */
+  def agg(aggExpr: (String, String), aggExprs: (String, String)*): Dataset[Row] =
+    toDF((aggExpr +: aggExprs).map(toAggCol))
+
+  /**
+   * (Scala-specific) Compute aggregates by specifying a map from column name to aggregate
+   * methods. The resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   df.groupBy("department").agg(Map(
+   *     "age" -> "max",
+   *     "expense" -> "sum"
+   *   ))
+   * }}}
+   *
+   * @since 1.3.0
+   */
+  def agg(exprs: Map[String, String]): Dataset[Row] = toDF(exprs.map(toAggCol).toSeq)
+
+  /**
+   * (Java-specific) Compute aggregates by specifying a map from column name to aggregate methods.
+   * The resulting `DataFrame` will also contain the grouping columns.
+   *
+   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *   import com.google.common.collect.ImmutableMap;
+   *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
+   * }}}
+   *
+   * @since 1.3.0
+   */
+  def agg(exprs: util.Map[String, String]): Dataset[Row] = {
+    agg(exprs.asScala.toMap)
+  }
+
+  /**
+   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
+   * default retains the grouping columns in its output. To not retain grouping columns, set
+   * `spark.sql.retainGroupColumns` to false.
+   *
+   * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
+   *
+   * {{{
+   *   // Selects the age of the oldest employee and the aggregate expense for each department
+   *
+   *   // Scala:
+   *   import org.apache.spark.sql.functions._
+   *   df.groupBy("department").agg(max("age"), sum("expense"))
+   *
+   *   // Java:
+   *   import static org.apache.spark.sql.functions.*;
+   *   df.groupBy("department").agg(max("age"), sum("expense"));
+   * }}}
+   *
+   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
+   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
+   * {{{
+   *   // Scala, 1.3.x:
+   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
+   *
+   *   // Java, 1.3.x:
+   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
+   * }}}
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def agg(expr: Column, exprs: Column*): Dataset[Row] = toDF(expr +: exprs)
+
+  /**
+   * Count the number of rows for each group. The resulting `DataFrame` will also contain the
+   * grouping columns.
+   *
+   * @since 1.3.0
+   */
+  def count(): Dataset[Row] = toDF(functions.count(functions.lit(1)).as("count") :: Nil)
+
+  /**
+   * Compute the average value for each numeric columns for each group. This is an alias for
+   * `avg`. The resulting `DataFrame` will also contain the grouping columns. When specified
+   * columns are given, only compute the average values for them.
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def mean(colNames: String*): Dataset[Row] = aggregateNumericColumns(colNames, functions.avg)
+
+  /**
+   * Compute the max value for each numeric columns for each group. The resulting `DataFrame` will
+   * also contain the grouping columns. When specified columns are given, only compute the max
+   * values for them.
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def max(colNames: String*): Dataset[Row] = aggregateNumericColumns(colNames, functions.max)
+
+  /**
+   * Compute the mean value for each numeric columns for each group. The resulting `DataFrame`
+   * will also contain the grouping columns. When specified columns are given, only compute the
+   * mean values for them.
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def avg(colNames: String*): Dataset[Row] = aggregateNumericColumns(colNames, functions.avg)
+
+  /**
+   * Compute the min value for each numeric column for each group. The resulting `DataFrame` will
+   * also contain the grouping columns. When specified columns are given, only compute the min
+   * values for them.
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def min(colNames: String*): Dataset[Row] = aggregateNumericColumns(colNames, functions.min)
+
+  /**
+   * Compute the sum for each numeric columns for each group. The resulting `DataFrame` will also
+   * contain the grouping columns. When specified columns are given, only compute the sum for
+   * them.
+   *
+   * @since 1.3.0
+   */
+  @scala.annotation.varargs
+  def sum(colNames: String*): Dataset[Row] = aggregateNumericColumns(colNames, functions.sum)
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
+   *
+   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine the
+   * resulting schema of the transformation. To avoid any eager computations, provide an explicit
+   * list of values via `pivot(pivotColumn: String, values: Seq[Any])`.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course").sum("earnings")
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @since 1.6.0
+   */
+  def pivot(pivotColumn: String): RelationalGroupedDataset = pivot(df.col(pivotColumn))
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation. There are
+   * two versions of pivot function: one that requires the caller to specify the list of distinct
+   * values to pivot on, and one that does not. The latter is more concise but less efficient,
+   * because Spark needs to first compute the list of distinct values internally.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings")
+   *
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy("year").pivot("course").sum("earnings")
+   * }}}
+   *
+   * From Spark 3.0.0, values can be literal columns, for instance, struct. For pivoting by
+   * multiple columns, use the `struct` function to combine the columns and values:
+   *
+   * {{{
+   *   df.groupBy("year")
+   *     .pivot("trainingCourse", Seq(struct(lit("java"), lit("Experts"))))
+   *     .agg(sum($"earnings"))
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 1.6.0
+   */
+  def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset =
+    pivot(df.col(pivotColumn), values)
+
+  /**
+   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
+   * aggregation.
+   *
+   * There are two versions of pivot function: one that requires the caller to specify the list of
+   * distinct values to pivot on, and one that does not. The latter is more concise but less
+   * efficient, because Spark needs to first compute the list of distinct values internally.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy("year").pivot("course", Arrays.<Object>asList("dotNET", "Java")).sum("earnings");
+   *
+   *   // Or without specifying column values (less efficient)
+   *   df.groupBy("year").pivot("course").sum("earnings");
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   Name of the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 1.6.0
+   */
+  def pivot(pivotColumn: String, values: util.List[Any]): RelationalGroupedDataset =
+    pivot(df.col(pivotColumn), values)
+
+  /**
+   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
+   * aggregation. This is an overloaded version of the `pivot` method with `pivotColumn` of the
+   * `String` type.
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 2.4.0
+   */
+  def pivot(pivotColumn: Column, values: util.List[Any]): RelationalGroupedDataset =
+    pivot(pivotColumn, values.asScala.toSeq)
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
+   *
+   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine the
+   * resulting schema of the transformation. To avoid any eager computations, provide an explicit
+   * list of values via `pivot(pivotColumn: Column, values: Seq[Any])`.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy($"year").pivot($"course").sum($"earnings");
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   he column to pivot.
+   * @since 2.4.0
+   */
+  def pivot(pivotColumn: Column): RelationalGroupedDataset
+
+  /**
+   * Pivots a column of the current `DataFrame` and performs the specified aggregation. This is an
+   * overloaded version of the `pivot` method with `pivotColumn` of the `String` type.
+   *
+   * {{{
+   *   // Compute the sum of earnings for each year by course with each course as a separate column
+   *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
+   * }}}
+   *
+   * @see
+   *   `org.apache.spark.sql.Dataset.unpivot` for the reverse operation, except for the
+   *   aggregation.
+   * @param pivotColumn
+   *   the column to pivot.
+   * @param values
+   *   List of values that will be translated to columns in the output DataFrame.
+   * @since 2.4.0
+   */
+  def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataset
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala
new file mode 100644
index 0000000000000..5e022570d3ca7
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SQLImplicits.scala
@@ -0,0 +1,309 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.collection.Map
+import scala.language.implicitConversions
+import scala.reflect.classTag
+import scala.reflect.runtime.universe.TypeTag
+
+import _root_.java
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{ColumnName, DatasetHolder, Encoder, Encoders}
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, DEFAULT_SCALA_DECIMAL_ENCODER, IterableEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, StringEncoder}
+
+/**
+ * A collection of implicit methods for converting common Scala objects into
+ * [[org.apache.spark.sql.api.Dataset]]s.
+ *
+ * @since 1.6.0
+ */
+abstract class SQLImplicits extends LowPrioritySQLImplicits with Serializable {
+  type DS[U] <: Dataset[U]
+
+  protected def session: SparkSession
+
+  /**
+   * Converts $"col name" into a [[org.apache.spark.sql.Column]].
+   *
+   * @since 2.0.0
+   */
+  implicit class StringToColumn(val sc: StringContext) {
+    def $(args: Any*): ColumnName = {
+      new ColumnName(sc.s(args: _*))
+    }
+  }
+
+  // Primitives
+
+  /** @since 1.6.0 */
+  implicit def newIntEncoder: Encoder[Int] = Encoders.scalaInt
+
+  /** @since 1.6.0 */
+  implicit def newLongEncoder: Encoder[Long] = Encoders.scalaLong
+
+  /** @since 1.6.0 */
+  implicit def newDoubleEncoder: Encoder[Double] = Encoders.scalaDouble
+
+  /** @since 1.6.0 */
+  implicit def newFloatEncoder: Encoder[Float] = Encoders.scalaFloat
+
+  /** @since 1.6.0 */
+  implicit def newByteEncoder: Encoder[Byte] = Encoders.scalaByte
+
+  /** @since 1.6.0 */
+  implicit def newShortEncoder: Encoder[Short] = Encoders.scalaShort
+
+  /** @since 1.6.0 */
+  implicit def newBooleanEncoder: Encoder[Boolean] = Encoders.scalaBoolean
+
+  /** @since 1.6.0 */
+  implicit def newStringEncoder: Encoder[String] = Encoders.STRING
+
+  /** @since 2.2.0 */
+  implicit def newJavaDecimalEncoder: Encoder[java.math.BigDecimal] = Encoders.DECIMAL
+
+  /** @since 2.2.0 */
+  implicit def newScalaDecimalEncoder: Encoder[scala.math.BigDecimal] =
+    DEFAULT_SCALA_DECIMAL_ENCODER
+
+  /** @since 2.2.0 */
+  implicit def newDateEncoder: Encoder[java.sql.Date] = Encoders.DATE
+
+  /** @since 3.0.0 */
+  implicit def newLocalDateEncoder: Encoder[java.time.LocalDate] = Encoders.LOCALDATE
+
+  /** @since 3.4.0 */
+  implicit def newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] = Encoders.LOCALDATETIME
+
+  /** @since 2.2.0 */
+  implicit def newTimeStampEncoder: Encoder[java.sql.Timestamp] = Encoders.TIMESTAMP
+
+  /** @since 3.0.0 */
+  implicit def newInstantEncoder: Encoder[java.time.Instant] = Encoders.INSTANT
+
+  /** @since 3.2.0 */
+  implicit def newDurationEncoder: Encoder[java.time.Duration] = Encoders.DURATION
+
+  /** @since 3.2.0 */
+  implicit def newPeriodEncoder: Encoder[java.time.Period] = Encoders.PERIOD
+
+  /** @since 3.2.0 */
+  implicit def newJavaEnumEncoder[A <: java.lang.Enum[_]: TypeTag]: Encoder[A] =
+    ScalaReflection.encoderFor[A]
+
+  // Boxed primitives
+
+  /** @since 2.0.0 */
+  implicit def newBoxedIntEncoder: Encoder[java.lang.Integer] = Encoders.INT
+
+  /** @since 2.0.0 */
+  implicit def newBoxedLongEncoder: Encoder[java.lang.Long] = Encoders.LONG
+
+  /** @since 2.0.0 */
+  implicit def newBoxedDoubleEncoder: Encoder[java.lang.Double] = Encoders.DOUBLE
+
+  /** @since 2.0.0 */
+  implicit def newBoxedFloatEncoder: Encoder[java.lang.Float] = Encoders.FLOAT
+
+  /** @since 2.0.0 */
+  implicit def newBoxedByteEncoder: Encoder[java.lang.Byte] = Encoders.BYTE
+
+  /** @since 2.0.0 */
+  implicit def newBoxedShortEncoder: Encoder[java.lang.Short] = Encoders.SHORT
+
+  /** @since 2.0.0 */
+  implicit def newBoxedBooleanEncoder: Encoder[java.lang.Boolean] = Encoders.BOOLEAN
+
+  // Seqs
+  private def newSeqEncoder[E](elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Seq[E]] = {
+    IterableEncoder(
+      classTag[Seq[E]],
+      elementEncoder,
+      elementEncoder.nullable,
+      elementEncoder.lenientSerialization)
+  }
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newIntSeqEncoder: Encoder[Seq[Int]] = newSeqEncoder(PrimitiveIntEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newLongSeqEncoder: Encoder[Seq[Long]] = newSeqEncoder(PrimitiveLongEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newDoubleSeqEncoder: Encoder[Seq[Double]] = newSeqEncoder(PrimitiveDoubleEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newFloatSeqEncoder: Encoder[Seq[Float]] = newSeqEncoder(PrimitiveFloatEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newByteSeqEncoder: Encoder[Seq[Byte]] = newSeqEncoder(PrimitiveByteEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newShortSeqEncoder: Encoder[Seq[Short]] = newSeqEncoder(PrimitiveShortEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newBooleanSeqEncoder: Encoder[Seq[Boolean]] = newSeqEncoder(PrimitiveBooleanEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  val newStringSeqEncoder: Encoder[Seq[String]] = newSeqEncoder(StringEncoder)
+
+  /**
+   * @since 1.6.1
+   * @deprecated
+   *   use [[newSequenceEncoder]]
+   */
+  @deprecated("Use newSequenceEncoder instead", "2.2.0")
+  def newProductSeqEncoder[A <: Product: TypeTag]: Encoder[Seq[A]] =
+    newSeqEncoder(ScalaReflection.encoderFor[A])
+
+  /** @since 2.2.0 */
+  implicit def newSequenceEncoder[T <: Seq[_]: TypeTag]: Encoder[T] =
+    ScalaReflection.encoderFor[T]
+
+  // Maps
+  /** @since 2.3.0 */
+  implicit def newMapEncoder[T <: Map[_, _]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
+
+  /**
+   * Notice that we serialize `Set` to Catalyst array. The set property is only kept when
+   * manipulating the domain objects. The serialization format doesn't keep the set property. When
+   * we have a Catalyst array which contains duplicated elements and convert it to
+   * `Dataset[Set[T]]` by using the encoder, the elements will be de-duplicated.
+   *
+   * @since 2.3.0
+   */
+  implicit def newSetEncoder[T <: Set[_]: TypeTag]: Encoder[T] = ScalaReflection.encoderFor[T]
+
+  // Arrays
+  private def newArrayEncoder[E](
+      elementEncoder: AgnosticEncoder[E]): AgnosticEncoder[Array[E]] = {
+    ArrayEncoder(elementEncoder, elementEncoder.nullable)
+  }
+
+  /** @since 1.6.1 */
+  implicit val newIntArrayEncoder: Encoder[Array[Int]] = newArrayEncoder(PrimitiveIntEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newLongArrayEncoder: Encoder[Array[Long]] = newArrayEncoder(PrimitiveLongEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newDoubleArrayEncoder: Encoder[Array[Double]] =
+    newArrayEncoder(PrimitiveDoubleEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newFloatArrayEncoder: Encoder[Array[Float]] =
+    newArrayEncoder(PrimitiveFloatEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newByteArrayEncoder: Encoder[Array[Byte]] = Encoders.BINARY
+
+  /** @since 1.6.1 */
+  implicit val newShortArrayEncoder: Encoder[Array[Short]] =
+    newArrayEncoder(PrimitiveShortEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newBooleanArrayEncoder: Encoder[Array[Boolean]] =
+    newArrayEncoder(PrimitiveBooleanEncoder)
+
+  /** @since 1.6.1 */
+  implicit val newStringArrayEncoder: Encoder[Array[String]] =
+    newArrayEncoder(StringEncoder)
+
+  /** @since 1.6.1 */
+  implicit def newProductArrayEncoder[A <: Product: TypeTag]: Encoder[Array[A]] =
+    newArrayEncoder(ScalaReflection.encoderFor[A])
+
+  /**
+   * Creates a [[Dataset]] from a local Seq.
+   * @since 1.6.0
+   */
+  implicit def localSeqToDatasetHolder[T: Encoder](s: Seq[T]): DatasetHolder[T, DS] = {
+    new DatasetHolder(session.createDataset(s).asInstanceOf[DS[T]])
+  }
+
+  /**
+   * Creates a [[Dataset]] from an RDD.
+   *
+   * @since 1.6.0
+   */
+  implicit def rddToDatasetHolder[T: Encoder](rdd: RDD[T]): DatasetHolder[T, DS] =
+    new DatasetHolder(session.createDataset(rdd).asInstanceOf[DS[T]])
+
+  /**
+   * An implicit conversion that turns a Scala `Symbol` into a [[org.apache.spark.sql.Column]].
+   * @since 1.3.0
+   */
+  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
+}
+
+/**
+ * Lower priority implicit methods for converting Scala objects into
+ * [[org.apache.spark.sql.api.Dataset]]s. Conflicting implicits are placed here to disambiguate
+ * resolution.
+ *
+ * Reasons for including specific implicits: newProductEncoder - to disambiguate for `List`s which
+ * are both `Seq` and `Product`
+ */
+trait LowPrioritySQLImplicits {
+
+  /** @since 1.6.0 */
+  implicit def newProductEncoder[T <: Product: TypeTag]: Encoder[T] = Encoders.product[T]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
new file mode 100644
index 0000000000000..35f74497b96f4
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/SparkSession.scala
@@ -0,0 +1,1131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.concurrent.duration.NANOSECONDS
+import scala.jdk.CollectionConverters._
+import scala.reflect.runtime.universe.TypeTag
+
+import _root_.java.io.Closeable
+import _root_.java.lang
+import _root_.java.net.URI
+import _root_.java.util
+import _root_.java.util.concurrent.atomic.AtomicReference
+
+import org.apache.spark.{SparkConf, SparkContext, SparkException}
+import org.apache.spark.annotation.{DeveloperApi, Experimental, Stable, Unstable}
+import org.apache.spark.api.java.JavaRDD
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.{Encoder, ExperimentalMethods, Row, RuntimeConfig, SparkSessionExtensions, SQLContext}
+import org.apache.spark.sql.internal.{SessionState, SharedState}
+import org.apache.spark.sql.sources.BaseRelation
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.util.ExecutionListenerManager
+import org.apache.spark.util.SparkClassUtils
+
+/**
+ * The entry point to programming Spark with the Dataset and DataFrame API.
+ *
+ * In environments that this has been created upfront (e.g. REPL, notebooks), use the builder to
+ * get an existing session:
+ *
+ * {{{
+ *   SparkSession.builder().getOrCreate()
+ * }}}
+ *
+ * The builder can also be used to create a new session:
+ *
+ * {{{
+ *   SparkSession.builder
+ *     .master("local")
+ *     .appName("Word Count")
+ *     .config("spark.some.config.option", "some-value")
+ *     .getOrCreate()
+ * }}}
+ */
+abstract class SparkSession extends Serializable with Closeable {
+
+  /**
+   * The Spark context associated with this Spark session.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   */
+  def sparkContext: SparkContext
+
+  /**
+   * The version of Spark on which this application is running.
+   *
+   * @since 2.0.0
+   */
+  def version: String
+
+  /* ----------------------- *
+   |  Session-related state  |
+   * ----------------------- */
+
+  /**
+   * State shared across sessions, including the `SparkContext`, cached data, listener, and a
+   * catalog that interacts with external systems.
+   *
+   * This is internal to Spark and there is no guarantee on interface stability.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.2.0
+   */
+  @Unstable
+  @transient
+  def sharedState: SharedState
+
+  /**
+   * State isolated across sessions, including SQL configurations, temporary tables, registered
+   * functions, and everything else that accepts a `org.apache.spark.sql.internal.SQLConf`. If
+   * `parentSessionState` is not null, the `SessionState` will be a copy of the parent.
+   *
+   * This is internal to Spark and there is no guarantee on interface stability.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.2.0
+   */
+  @Unstable
+  @transient
+  def sessionState: SessionState
+
+  /**
+   * A wrapped version of this session in the form of a `SQLContext`, for backward compatibility.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  @transient
+  def sqlContext: SQLContext
+
+  /**
+   * Runtime configuration interface for Spark.
+   *
+   * This is the interface through which the user can get and set all Spark and Hadoop
+   * configurations that are relevant to Spark SQL. When getting the value of a config, this
+   * defaults to the value set in the underlying `SparkContext`, if any.
+   *
+   * @since 2.0.0
+   */
+  def conf: RuntimeConfig
+
+  /**
+   * An interface to register custom `org.apache.spark.sql.util.QueryExecutionListeners` that
+   * listen for execution metrics.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  def listenerManager: ExecutionListenerManager
+
+  /**
+   * :: Experimental :: A collection of methods that are considered experimental, but can be used
+   * to hook into the query planner for advanced functionality.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  @Experimental
+  @Unstable
+  def experimental: ExperimentalMethods
+
+  /**
+   * A collection of methods for registering user-defined functions (UDF).
+   *
+   * The following example registers a Scala closure as UDF:
+   * {{{
+   *   sparkSession.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
+   * }}}
+   *
+   * The following example registers a UDF in Java:
+   * {{{
+   *   sparkSession.udf().register("myUDF",
+   *       (Integer arg1, String arg2) -> arg2 + arg1,
+   *       DataTypes.StringType);
+   * }}}
+   *
+   * @note
+   *   The user-defined functions must be deterministic. Due to optimization, duplicate
+   *   invocations may be eliminated or the function may even be invoked more times than it is
+   *   present in the query.
+   * @since 2.0.0
+   */
+  def udf: UDFRegistration
+
+  /**
+   * Returns a `StreamingQueryManager` that allows managing all the `StreamingQuery`s active on
+   * `this`.
+   *
+   * @since 2.0.0
+   */
+  @Unstable
+  def streams: StreamingQueryManager
+
+  /**
+   * Start a new session with isolated SQL configurations, temporary tables, registered functions
+   * are isolated, but sharing the underlying `SparkContext` and cached data.
+   *
+   * @note
+   *   Other than the `SparkContext`, all shared state is initialized lazily. This method will
+   *   force the initialization of the shared state to ensure that parent and child sessions are
+   *   set up with the same shared state. If the underlying catalog implementation is Hive, this
+   *   will initialize the metastore, which may take some time.
+   * @since 2.0.0
+   */
+  def newSession(): SparkSession
+
+  /* --------------------------------- *
+   |  Methods for creating DataFrames  |
+   * --------------------------------- */
+
+  /**
+   * Returns a `DataFrame` with no rows or columns.
+   *
+   * @since 2.0.0
+   */
+  @transient
+  def emptyDataFrame: Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` from a local Seq of Product.
+   *
+   * @since 2.0.0
+   */
+  def createDataFrame[A <: Product: TypeTag](data: Seq[A]): Dataset[Row]
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from a `java.util.List` containing
+   * [[org.apache.spark.sql.Row]]s using the given schema.It is important to make sure that the
+   * structure of every [[org.apache.spark.sql.Row]] of the provided List matches the provided
+   * schema. Otherwise, there will be runtime exception.
+   *
+   * @since 2.0.0
+   */
+  @DeveloperApi
+  def createDataFrame(rows: util.List[Row], schema: StructType): Dataset[Row]
+
+  /**
+   * Applies a schema to a List of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   *
+   * @since 1.6.0
+   */
+  def createDataFrame(data: util.List[_], beanClass: Class[_]): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples).
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  def createDataFrame[A <: Product: TypeTag](rdd: RDD[A]): Dataset[Row]
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from an `RDD` containing
+   * [[org.apache.spark.sql.Row]]s using the given schema. It is important to make sure that the
+   * structure of every [[org.apache.spark.sql.Row]] of the provided RDD matches the provided
+   * schema. Otherwise, there will be runtime exception. Example:
+   * {{{
+   *  import org.apache.spark.sql._
+   *  import org.apache.spark.sql.types._
+   *  val sparkSession = new org.apache.spark.sql.SparkSession(sc)
+   *
+   *  val schema =
+   *    StructType(
+   *      StructField("name", StringType, false) ::
+   *      StructField("age", IntegerType, true) :: Nil)
+   *
+   *  val people =
+   *    sc.textFile("examples/src/main/resources/people.txt").map(
+   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
+   *  val dataFrame = sparkSession.createDataFrame(people, schema)
+   *  dataFrame.printSchema
+   *  // root
+   *  // |-- name: string (nullable = false)
+   *  // |-- age: integer (nullable = true)
+   *
+   *  dataFrame.createOrReplaceTempView("people")
+   *  sparkSession.sql("select name from people").collect.foreach(println)
+   * }}}
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  @DeveloperApi
+  def createDataFrame(rowRDD: RDD[Row], schema: StructType): Dataset[Row]
+
+  /**
+   * :: DeveloperApi :: Creates a `DataFrame` from a `JavaRDD` containing
+   * [[org.apache.spark.sql.Row]]s using the given schema. It is important to make sure that the
+   * structure of every [[org.apache.spark.sql.Row]] of the provided RDD matches the provided
+   * schema. Otherwise, there will be runtime exception.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  @DeveloperApi
+  def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): Dataset[Row]
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   *
+   * @since 2.0.0
+   */
+  def createDataFrame(rdd: RDD[_], beanClass: Class[_]): Dataset[Row]
+
+  /**
+   * Applies a schema to an RDD of Java Beans.
+   *
+   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean, SELECT * queries
+   * will return the columns in an undefined order.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): Dataset[Row]
+
+  /**
+   * Convert a `BaseRelation` created for external data sources into a `DataFrame`.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  def baseRelationToDataFrame(baseRelation: BaseRelation): Dataset[Row]
+
+  /* ------------------------------- *
+   |  Methods for creating DataSets  |
+   * ------------------------------- */
+
+  /**
+   * Creates a new [[Dataset]] of type T containing zero elements.
+   *
+   * @since 2.0.0
+   */
+  def emptyDataset[T: Encoder]: Dataset[T]
+
+  /**
+   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
+   * representation) that is generally created automatically through implicits from a
+   * `SparkSession`, or can be created explicitly by calling static methods on `Encoders`.
+   *
+   * ==Example==
+   *
+   * {{{
+   *
+   *   import spark.implicits._
+   *   case class Person(name: String, age: Long)
+   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
+   *   val ds = spark.createDataset(data)
+   *
+   *   ds.show()
+   *   // +-------+---+
+   *   // |   name|age|
+   *   // +-------+---+
+   *   // |Michael| 29|
+   *   // |   Andy| 30|
+   *   // | Justin| 19|
+   *   // +-------+---+
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def createDataset[T: Encoder](data: Seq[T]): Dataset[T]
+
+  /**
+   * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
+   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL
+   * representation) that is generally created automatically through implicits from a
+   * `SparkSession`, or can be created explicitly by calling static methods on `Encoders`.
+   *
+   * ==Java Example==
+   *
+   * {{{
+   *     List<String> data = Arrays.asList("hello", "world");
+   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def createDataset[T: Encoder](data: util.List[T]): Dataset[T]
+
+  /**
+   * Creates a [[Dataset]] from an RDD of a given type. This method requires an encoder (to
+   * convert a JVM object of type `T` to and from the internal Spark SQL representation) that is
+   * generally created automatically through implicits from a `SparkSession`, or can be created
+   * explicitly by calling static methods on `Encoders`.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.0.0
+   */
+  def createDataset[T: Encoder](data: RDD[T]): Dataset[T]
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from 0 to `end` (exclusive) with step value 1.
+   *
+   * @since 2.0.0
+   */
+  def range(end: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with step value 1.
+   *
+   * @since 2.0.0
+   */
+  def range(start: Long, end: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value.
+   *
+   * @since 2.0.0
+   */
+  def range(start: Long, end: Long, step: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value, with partition number specified.
+   *
+   * @since 2.0.0
+   */
+  def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[lang.Long]
+
+  /* ------------------------- *
+   |  Catalog-related methods  |
+   * ------------------------- */
+
+  /**
+   * Interface through which the user may create, drop, alter or query underlying databases,
+   * tables, functions etc.
+   *
+   * @since 2.0.0
+   */
+  def catalog: Catalog
+
+  /**
+   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
+   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a
+   * view, the returned DataFrame is simply the query plan of the view, which can either be a
+   * batch or streaming query plan.
+   *
+   * @param tableName
+   *   is either a qualified or unqualified name that designates a table or view. If a database is
+   *   specified, it identifies the table/view from the database. Otherwise, it first attempts to
+   *   find a temporary view with the given name and then match the table/view from the current
+   *   database. Note that, the global temporary view database is also valid here.
+   * @since 2.0.0
+   */
+  def table(tableName: String): Dataset[Row]
+
+  /* ----------------- *
+   |  Everything else  |
+   * ----------------- */
+
+  /**
+   * Executes a SQL query substituting positional parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with positional parameters to execute.
+   * @param args
+   *   An array of Java/Scala objects that can be converted to SQL literal expressions. See <a
+   *   href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html"> Supported Data
+   *   Types</a> for supported value types in Scala/Java. For example, 1, "Steven",
+   *   LocalDate.of(2023, 4, 2). A value can be also a `Column` of a literal or collection
+   *   constructor functions such as `map()`, `array()`, `struct()`, in that case it is taken as
+   *   is.
+   * @since 3.5.0
+   */
+  def sql(sqlText: String, args: Array[_]): Dataset[Row]
+
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
+   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
+   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
+   *   can be also a `Column` of a literal or collection constructor functions such as `map()`,
+   *   `array()`, `struct()`, in that case it is taken as is.
+   * @since 3.4.0
+   */
+  def sql(sqlText: String, args: Map[String, Any]): Dataset[Row]
+
+  /**
+   * Executes a SQL query substituting named parameters by the given arguments, returning the
+   * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @param sqlText
+   *   A SQL statement with named parameters to execute.
+   * @param args
+   *   A map of parameter names to Java/Scala objects that can be converted to SQL literal
+   *   expressions. See <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
+   *   Supported Data Types</a> for supported value types in Scala/Java. For example, map keys:
+   *   "rank", "name", "birthdate"; map values: 1, "Steven", LocalDate.of(2023, 4, 2). Map value
+   *   can be also a `Column` of a literal or collection constructor functions such as `map()`,
+   *   `array()`, `struct()`, in that case it is taken as is.
+   * @since 3.4.0
+   */
+  def sql(sqlText: String, args: util.Map[String, Any]): Dataset[Row] = {
+    sql(sqlText, args.asScala.toMap)
+  }
+
+  /**
+   * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly
+   * runs DDL/DML commands, but not for SELECT queries.
+   *
+   * @since 2.0.0
+   */
+  def sql(sqlText: String): Dataset[Row] = sql(sqlText, Map.empty[String, Any])
+
+  /**
+   * Execute an arbitrary string command inside an external execution engine rather than Spark.
+   * This could be useful when user wants to execute some commands out of Spark. For example,
+   * executing custom DDL/DML command for JDBC, creating index for ElasticSearch, creating cores
+   * for Solr and so on.
+   *
+   * The command will be eagerly executed after this method is called and the returned DataFrame
+   * will contain the output of the command(if any).
+   *
+   * @param runner
+   *   The class name of the runner that implements `ExternalCommandRunner`.
+   * @param command
+   *   The target command to be executed
+   * @param options
+   *   The options for the runner.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 3.0.0
+   */
+  @Unstable
+  def executeCommand(runner: String, command: String, options: Map[String, String]): Dataset[Row]
+
+  /**
+   * Add a single artifact to the current session.
+   *
+   * Currently only local files with extensions .jar and .class are supported.
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def addArtifact(path: String): Unit
+
+  /**
+   * Add a single artifact to the current session.
+   *
+   * Currently it supports local files with extensions .jar and .class and Apache Ivy URIs.
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def addArtifact(uri: URI): Unit
+
+  /**
+   * Add a single in-memory artifact to the session while preserving the directory structure
+   * specified by `target` under the session's working directory of that particular file
+   * extension.
+   *
+   * Supported target file extensions are .jar and .class.
+   *
+   * ==Example==
+   * {{{
+   *  addArtifact(bytesBar, "foo/bar.class")
+   *  addArtifact(bytesFlat, "flat.class")
+   *  // Directory structure of the session's working directory for class files would look like:
+   *  // ${WORKING_DIR_FOR_CLASS_FILES}/flat.class
+   *  // ${WORKING_DIR_FOR_CLASS_FILES}/foo/bar.class
+   * }}}
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def addArtifact(bytes: Array[Byte], target: String): Unit
+
+  /**
+   * Add a single artifact to the session while preserving the directory structure specified by
+   * `target` under the session's working directory of that particular file extension.
+   *
+   * Supported target file extensions are .jar and .class.
+   *
+   * ==Example==
+   * {{{
+   *  addArtifact("/Users/dummyUser/files/foo/bar.class", "foo/bar.class")
+   *  addArtifact("/Users/dummyUser/files/flat.class", "flat.class")
+   *  // Directory structure of the session's working directory for class files would look like:
+   *  // ${WORKING_DIR_FOR_CLASS_FILES}/flat.class
+   *  // ${WORKING_DIR_FOR_CLASS_FILES}/foo/bar.class
+   * }}}
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  def addArtifact(source: String, target: String): Unit
+
+  /**
+   * Add one or more artifacts to the session.
+   *
+   * Currently it supports local files with extensions .jar and .class and Apache Ivy URIs
+   *
+   * @since 4.0.0
+   */
+  @Experimental
+  @scala.annotation.varargs
+  def addArtifacts(uri: URI*): Unit
+
+  /**
+   * Add a tag to be assigned to all the operations started by this thread in this session.
+   *
+   * Often, a unit of execution in an application consists of multiple Spark executions.
+   * Application programmers can use this method to group all those jobs together and give a group
+   * tag. The application can use `org.apache.spark.sql.SparkSession.interruptTag` to cancel all
+   * running executions with this tag. For example:
+   * {{{
+   * // In the main thread:
+   * spark.addTag("myjobs")
+   * spark.range(10).map(i => { Thread.sleep(10); i }).collect()
+   *
+   * // In a separate thread:
+   * spark.interruptTag("myjobs")
+   * }}}
+   *
+   * There may be multiple tags present at the same time, so different parts of application may
+   * use different tags to perform cancellation at different levels of granularity.
+   *
+   * @param tag
+   *   The tag to be added. Cannot contain ',' (comma) character or be an empty string.
+   *
+   * @since 4.0.0
+   */
+  def addTag(tag: String): Unit
+
+  /**
+   * Remove a tag previously added to be assigned to all the operations started by this thread in
+   * this session. Noop if such a tag was not added earlier.
+   *
+   * @param tag
+   *   The tag to be removed. Cannot contain ',' (comma) character or be an empty string.
+   *
+   * @since 4.0.0
+   */
+  def removeTag(tag: String): Unit
+
+  /**
+   * Get the operation tags that are currently set to be assigned to all the operations started by
+   * this thread in this session.
+   *
+   * @since 4.0.0
+   */
+  def getTags(): Set[String]
+
+  /**
+   * Clear the current thread's operation tags.
+   *
+   * @since 4.0.0
+   */
+  def clearTags(): Unit
+
+  /**
+   * Request to interrupt all currently running operations of this session.
+   *
+   * @note
+   *   This method will wait up to 60 seconds for the interruption request to be issued.
+   *
+   * @return
+   *   Sequence of operation IDs requested to be interrupted.
+   *
+   * @since 4.0.0
+   */
+  def interruptAll(): Seq[String]
+
+  /**
+   * Request to interrupt all currently running operations of this session with the given job tag.
+   *
+   * @note
+   *   This method will wait up to 60 seconds for the interruption request to be issued.
+   *
+   * @return
+   *   Sequence of operation IDs requested to be interrupted.
+   *
+   * @since 4.0.0
+   */
+  def interruptTag(tag: String): Seq[String]
+
+  /**
+   * Request to interrupt an operation of this session, given its operation ID.
+   *
+   * @note
+   *   This method will wait up to 60 seconds for the interruption request to be issued.
+   *
+   * @return
+   *   The operation ID requested to be interrupted, as a single-element sequence, or an empty
+   *   sequence if the operation is not started by this session.
+   *
+   * @since 4.0.0
+   */
+  def interruptOperation(operationId: String): Seq[String]
+
+  /**
+   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
+   * `DataFrame`.
+   * {{{
+   *   sparkSession.read.parquet("/path/to/file.parquet")
+   *   sparkSession.read.schema(schema).json("/path/to/file.json")
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def read: DataFrameReader
+
+  /**
+   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
+   * {{{
+   *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
+   *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  def readStream: DataStreamReader
+
+  /**
+   * Returns a [[TableValuedFunction]] that can be used to call a table-valued function (TVF).
+   *
+   * @since 4.0.0
+   */
+  def tvf: TableValuedFunction
+
+  /**
+   * (Scala-specific) Implicit methods available in Scala for converting common Scala objects into
+   * `DataFrame`s.
+   *
+   * {{{
+   *   val sparkSession = SparkSession.builder.getOrCreate()
+   *   import sparkSession.implicits._
+   * }}}
+   *
+   * @since 2.0.0
+   */
+  val implicits: SQLImplicits
+
+  /**
+   * Executes some code block and prints to stdout the time taken to execute the block. This is
+   * available in Scala only and is used primarily for interactive testing and debugging.
+   *
+   * @since 2.1.0
+   */
+  def time[T](f: => T): T = {
+    val start = System.nanoTime()
+    val ret = f
+    val end = System.nanoTime()
+    // scalastyle:off println
+    println(s"Time taken: ${NANOSECONDS.toMillis(end - start)} ms")
+    // scalastyle:on println
+    ret
+  }
+
+  /**
+   * Synonym for `close()`.
+   *
+   * @since 2.0.0
+   */
+  def stop(): Unit = close()
+
+  /**
+   * Check to see if the session is still usable.
+   *
+   * In Classic this means that the underlying `SparkContext` is still active. In Connect this
+   * means the connection to the server is usable.
+   */
+  private[sql] def isUsable: Boolean
+}
+
+object SparkSession extends SparkSessionCompanion {
+  type Session = SparkSession
+
+  private[this] val companion: SparkSessionCompanion = {
+    val cls = SparkClassUtils.classForName("org.apache.spark.sql.SparkSession")
+    val mirror = scala.reflect.runtime.currentMirror
+    val module = mirror.classSymbol(cls).companion.asModule
+    mirror.reflectModule(module).instance.asInstanceOf[SparkSessionCompanion]
+  }
+
+  /** @inheritdoc */
+  override def builder(): SparkSessionBuilder = companion.builder()
+
+  /** @inheritdoc */
+  override def setActiveSession(session: SparkSession): Unit =
+    companion.setActiveSession(session.asInstanceOf[companion.Session])
+
+  /** @inheritdoc */
+  override def clearActiveSession(): Unit = companion.clearActiveSession()
+
+  /** @inheritdoc */
+  override def setDefaultSession(session: SparkSession): Unit =
+    companion.setDefaultSession(session.asInstanceOf[companion.Session])
+
+  /** @inheritdoc */
+  override def clearDefaultSession(): Unit = companion.clearDefaultSession()
+
+  /** @inheritdoc */
+  override def getActiveSession: Option[SparkSession] = companion.getActiveSession
+
+  /** @inheritdoc */
+  override def getDefaultSession: Option[SparkSession] = companion.getDefaultSession
+}
+
+/**
+ * Interface for a [[SparkSession]] Companion. The companion is responsible for building the
+ * session, and managing the active (thread local) and default (global) SparkSessions.
+ */
+private[sql] abstract class SparkSessionCompanion {
+  private[sql] type Session <: SparkSession
+
+  /**
+   * Changes the SparkSession that will be returned in this thread and its children when
+   * SparkSession.getOrCreate() is called. This can be used to ensure that a given thread receives
+   * a SparkSession with an isolated session, instead of the global (first created) context.
+   *
+   * @since 2.0.0
+   */
+  def setActiveSession(session: Session): Unit
+
+  /**
+   * Clears the active SparkSession for current thread. Subsequent calls to getOrCreate will
+   * return the first created context instead of a thread-local override.
+   *
+   * @since 2.0.0
+   */
+  def clearActiveSession(): Unit
+
+  /**
+   * Sets the default SparkSession that is returned by the builder.
+   *
+   * @since 2.0.0
+   */
+  def setDefaultSession(session: Session): Unit
+
+  /**
+   * Clears the default SparkSession that is returned by the builder.
+   *
+   * @since 2.0.0
+   */
+  def clearDefaultSession(): Unit
+
+  /**
+   * Returns the active SparkSession for the current thread, returned by the builder.
+   *
+   * @note
+   *   Return None, when calling this function on executors
+   *
+   * @since 2.2.0
+   */
+  def getActiveSession: Option[Session]
+
+  /**
+   * Returns the default SparkSession that is returned by the builder.
+   *
+   * @note
+   *   Return None, when calling this function on executors
+   *
+   * @since 2.2.0
+   */
+  def getDefaultSession: Option[Session]
+
+  /**
+   * Returns the currently active SparkSession, otherwise the default one. If there is no default
+   * SparkSession, throws an exception.
+   *
+   * @since 2.4.0
+   */
+  def active: Session = {
+    getActiveSession.getOrElse(
+      getDefaultSession.getOrElse(
+        throw SparkException.internalError("No active or default Spark session found")))
+  }
+
+  /**
+   * Creates a [[SparkSessionBuilder]] for constructing a [[SparkSession]].
+   *
+   * @since 2.0.0
+   */
+  def builder(): SparkSessionBuilder
+}
+
+/**
+ * Abstract class for [[SparkSession]] companions. This implements active and default session
+ * management.
+ */
+private[sql] abstract class BaseSparkSessionCompanion extends SparkSessionCompanion {
+
+  /** The active SparkSession for the current thread. */
+  private val activeThreadSession = new InheritableThreadLocal[Session]
+
+  /** Reference to the root SparkSession. */
+  private val defaultSession = new AtomicReference[Session]
+
+  /** @inheritdoc */
+  def setActiveSession(session: Session): Unit = {
+    activeThreadSession.set(session)
+  }
+
+  /** @inheritdoc */
+  def clearActiveSession(): Unit = {
+    activeThreadSession.remove()
+  }
+
+  /** @inheritdoc */
+  def setDefaultSession(session: Session): Unit = {
+    defaultSession.set(session)
+  }
+
+  /** @inheritdoc */
+  def clearDefaultSession(): Unit = {
+    defaultSession.set(null.asInstanceOf[Session])
+  }
+
+  /** @inheritdoc */
+  def getActiveSession: Option[Session] = usableSession(activeThreadSession.get())
+
+  /** @inheritdoc */
+  def getDefaultSession: Option[Session] = usableSession(defaultSession.get())
+
+  private def usableSession(session: Session): Option[Session] = {
+    if ((session ne null) && canUseSession(session)) {
+      Some(session)
+    } else {
+      None
+    }
+  }
+
+  protected def canUseSession(session: Session): Boolean = session.isUsable
+
+  /**
+   * Set the (global) default [[SparkSession]], and (thread-local) active [[SparkSession]] when
+   * they are not set yet or they are not usable.
+   */
+  protected def setDefaultAndActiveSession(session: Session): Unit = {
+    val currentDefault = defaultSession.getAcquire
+    if (currentDefault == null || !currentDefault.isUsable) {
+      // Update `defaultSession` if it is null or the contained session is not usable. There is a
+      // chance that the following `compareAndSet` fails if a new default session has just been set,
+      // but that does not matter since that event has happened after this method was invoked.
+      defaultSession.compareAndSet(currentDefault, session)
+    }
+    val active = getActiveSession
+    if (active.isEmpty || !active.get.isUsable) {
+      setActiveSession(session)
+    }
+  }
+
+  /**
+   * When the session is closed remove it from active and default.
+   */
+  private[sql] def onSessionClose(session: Session): Unit = {
+    defaultSession.compareAndSet(session, null.asInstanceOf[Session])
+    if (getActiveSession.contains(session)) {
+      clearActiveSession()
+    }
+  }
+}
+
+/**
+ * Builder for [[SparkSession]].
+ */
+@Stable
+abstract class SparkSessionBuilder {
+  protected val options = new scala.collection.mutable.HashMap[String, String]
+
+  /**
+   * Sets a name for the application, which will be shown in the Spark web UI. If no application
+   * name is set, a randomly generated name will be used.
+   *
+   * @since 2.0.0
+   */
+  def appName(name: String): this.type = config("spark.app.name", name)
+
+  /**
+   * Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to run
+   * locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
+   *
+   * @note
+   *   this is only supported in Classic.
+   * @since 2.0.0
+   */
+  def master(master: String): this.type = config("spark.master", master)
+
+  /**
+   * Enables Hive support, including connectivity to a persistent Hive metastore, support for Hive
+   * serdes, and Hive user-defined functions.
+   *
+   * @note
+   *   this is only supported in Classic.
+   * @since 2.0.0
+   */
+  def enableHiveSupport(): this.type = config("spark.sql.catalogImplementation", "hive")
+
+  /**
+   * Sets the Spark Connect remote URL.
+   *
+   * @note
+   *   this is only supported in Connect.
+   * @since 3.5.0
+   */
+  def remote(connectionString: String): this.type
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @note
+   *   this is only supported in Connect mode.
+   * @since 2.0.0
+   */
+  def config(key: String, value: String): this.type = synchronized {
+    options += key -> value
+    this
+  }
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @since 2.0.0
+   */
+  def config(key: String, value: Long): this.type = synchronized {
+    options += key -> value.toString
+    this
+  }
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @since 2.0.0
+   */
+  def config(key: String, value: Double): this.type = synchronized {
+    options += key -> value.toString
+    this
+  }
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @since 2.0.0
+   */
+  def config(key: String, value: Boolean): this.type = synchronized {
+    options += key -> value.toString
+    this
+  }
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @since 3.4.0
+   */
+  def config(map: Map[String, Any]): this.type = synchronized {
+    map.foreach { kv: (String, Any) =>
+      {
+        options += kv._1 -> kv._2.toString
+      }
+    }
+    this
+  }
+
+  /**
+   * Sets a config option. Options set using this method are automatically propagated to both
+   * `SparkConf` and SparkSession's own configuration.
+   *
+   * @since 3.4.0
+   */
+  def config(map: util.Map[String, Any]): this.type = synchronized {
+    config(map.asScala.toMap)
+  }
+
+  /**
+   * Sets a list of config options based on the given `SparkConf`.
+   *
+   * @since 2.0.0
+   */
+  def config(conf: SparkConf): this.type = synchronized {
+    conf.getAll.foreach { case (k, v) => options += k -> v }
+    this
+  }
+
+  /**
+   * Inject extensions into the [[SparkSession]]. This allows a user to add Analyzer rules,
+   * Optimizer rules, Planning Strategies or a customized parser.
+   *
+   * @note
+   *   this method is not supported in Spark Connect.
+   * @since 2.2.0
+   */
+  def withExtensions(f: SparkSessionExtensions => Unit): this.type
+
+  /**
+   * Gets an existing [[SparkSession]] or, if there is no existing one, creates a new one based on
+   * the options set in this builder.
+   *
+   * This method first checks whether there is a valid thread-local SparkSession, and if yes,
+   * return that one. It then checks whether there is a valid global default SparkSession, and if
+   * yes, return that one. If no valid global default SparkSession exists, the method creates a
+   * new SparkSession and assigns the newly created SparkSession as the global default.
+   *
+   * In case an existing SparkSession is returned, the non-static config options specified in this
+   * builder will be applied to the existing SparkSession.
+   *
+   * @since 2.0.0
+   */
+  def getOrCreate(): SparkSession
+
+  /**
+   * Create a new [[SparkSession]].
+   *
+   * This will always return a newly created session.
+   *
+   * This method will update the default and/or active session if they are not set.
+   *
+   * @since 3.5.0
+   */
+  def create(): SparkSession
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQuery.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQuery.scala
new file mode 100644
index 0000000000000..0aeb3518facd8
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQuery.scala
@@ -0,0 +1,178 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.api
+
+import _root_.java.util.UUID
+import _root_.java.util.concurrent.TimeoutException
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.streaming.{StreamingQueryException, StreamingQueryProgress, StreamingQueryStatus}
+
+/**
+ * A handle to a query that is executing continuously in the background as new data arrives. All
+ * these methods are thread-safe.
+ * @since 2.0.0
+ */
+@Evolving
+trait StreamingQuery {
+
+  /**
+   * Returns the user-specified name of the query, or null if not specified. This name can be
+   * specified in the `org.apache.spark.sql.streaming.DataStreamWriter` as
+   * `dataframe.writeStream.queryName("query").start()`. This name, if set, must be unique across
+   * all active queries.
+   *
+   * @since 2.0.0
+   */
+  def name: String
+
+  /**
+   * Returns the unique id of this query that persists across restarts from checkpoint data. That
+   * is, this id is generated when a query is started for the first time, and will be the same
+   * every time it is restarted from checkpoint data. Also see [[runId]].
+   *
+   * @since 2.1.0
+   */
+  def id: UUID
+
+  /**
+   * Returns the unique id of this run of the query. That is, every start/restart of a query will
+   * generate a unique runId. Therefore, every time a query is restarted from checkpoint, it will
+   * have the same [[id]] but different [[runId]]s.
+   */
+  def runId: UUID
+
+  /**
+   * Returns the `SparkSession` associated with `this`.
+   *
+   * @since 2.0.0
+   */
+  def sparkSession: SparkSession
+
+  /**
+   * Returns `true` if this query is actively running.
+   *
+   * @since 2.0.0
+   */
+  def isActive: Boolean
+
+  /**
+   * Returns the [[org.apache.spark.sql.streaming.StreamingQueryException]] if the query was
+   * terminated by an exception.
+   *
+   * @since 2.0.0
+   */
+  def exception: Option[StreamingQueryException]
+
+  /**
+   * Returns the current status of the query.
+   *
+   * @since 2.0.2
+   */
+  def status: StreamingQueryStatus
+
+  /**
+   * Returns an array of the most recent [[org.apache.spark.sql.streaming.StreamingQueryProgress]]
+   * updates for this query. The number of progress updates retained for each stream is configured
+   * by Spark session configuration `spark.sql.streaming.numRecentProgressUpdates`.
+   *
+   * @since 2.1.0
+   */
+  def recentProgress: Array[StreamingQueryProgress]
+
+  /**
+   * Returns the most recent [[org.apache.spark.sql.streaming.StreamingQueryProgress]] update of
+   * this streaming query.
+   *
+   * @since 2.1.0
+   */
+  def lastProgress: StreamingQueryProgress
+
+  /**
+   * Waits for the termination of `this` query, either by `query.stop()` or by an exception. If
+   * the query has terminated with an exception, then the exception will be thrown.
+   *
+   * If the query has terminated, then all subsequent calls to this method will either return
+   * immediately (if the query was terminated by `stop()`), or throw the exception immediately (if
+   * the query has terminated with exception).
+   *
+   * @throws org.apache.spark.sql.streaming.StreamingQueryException
+   *   if the query has terminated with an exception.
+   *
+   * @since 2.0.0
+   */
+  @throws[StreamingQueryException]
+  def awaitTermination(): Unit
+
+  /**
+   * Waits for the termination of `this` query, either by `query.stop()` or by an exception. If
+   * the query has terminated with an exception, then the exception will be thrown. Otherwise, it
+   * returns whether the query has terminated or not within the `timeoutMs` milliseconds.
+   *
+   * If the query has terminated, then all subsequent calls to this method will either return
+   * `true` immediately (if the query was terminated by `stop()`), or throw the exception
+   * immediately (if the query has terminated with exception).
+   *
+   * @throws org.apache.spark.sql.streaming.StreamingQueryException
+   *   if the query has terminated with an exception
+   *
+   * @since 2.0.0
+   */
+  @throws[StreamingQueryException]
+  def awaitTermination(timeoutMs: Long): Boolean
+
+  /**
+   * Blocks until all available data in the source has been processed and committed to the sink.
+   * This method is intended for testing. Note that in the case of continually arriving data, this
+   * method may block forever. Additionally, this method is only guaranteed to block until data
+   * that has been synchronously appended data to a
+   * `org.apache.spark.sql.execution.streaming.Source` prior to invocation. (i.e. `getOffset` must
+   * immediately reflect the addition).
+   * @since 2.0.0
+   */
+  def processAllAvailable(): Unit
+
+  /**
+   * Stops the execution of this query if it is running. This waits until the termination of the
+   * query execution threads or until a timeout is hit.
+   *
+   * By default stop will block indefinitely. You can configure a timeout by the configuration
+   * `spark.sql.streaming.stopTimeout`. A timeout of 0 (or negative) milliseconds will block
+   * indefinitely. If a `TimeoutException` is thrown, users can retry stopping the stream. If the
+   * issue persists, it is advisable to kill the Spark application.
+   *
+   * @since 2.0.0
+   */
+  @throws[TimeoutException]
+  def stop(): Unit
+
+  /**
+   * Prints the physical plan to the console for debugging purposes.
+   * @since 2.0.0
+   */
+  def explain(): Unit
+
+  /**
+   * Prints the physical plan to the console for debugging purposes.
+   *
+   * @param extended
+   *   whether to do extended explain or not
+   * @since 2.0.0
+   */
+  def explain(extended: Boolean): Unit
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQueryManager.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQueryManager.scala
new file mode 100644
index 0000000000000..88ba9a493d063
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/StreamingQueryManager.scala
@@ -0,0 +1,130 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import _root_.java.util.UUID
+
+import org.apache.spark.annotation.Evolving
+import org.apache.spark.sql.streaming.{StreamingQueryException, StreamingQueryListener}
+
+/**
+ * A class to manage all the [[StreamingQuery]] active in a `SparkSession`.
+ *
+ * @since 2.0.0
+ */
+@Evolving
+abstract class StreamingQueryManager {
+
+  /**
+   * Returns a list of active queries associated with this SQLContext
+   *
+   * @since 2.0.0
+   */
+  def active: Array[_ <: StreamingQuery]
+
+  /**
+   * Returns the query if there is an active query with the given id, or null.
+   *
+   * @since 2.1.0
+   */
+  def get(id: UUID): StreamingQuery
+
+  /**
+   * Returns the query if there is an active query with the given id, or null.
+   *
+   * @since 2.1.0
+   */
+  def get(id: String): StreamingQuery
+
+  /**
+   * Wait until any of the queries on the associated SQLContext has terminated since the creation
+   * of the context, or since `resetTerminated()` was called. If any query was terminated with an
+   * exception, then the exception will be thrown.
+   *
+   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
+   * return immediately (if the query was terminated by `query.stop()`), or throw the exception
+   * immediately (if the query was terminated with exception). Use `resetTerminated()` to clear
+   * past terminations and wait for new terminations.
+   *
+   * In the case where multiple queries have terminated since `resetTermination()` was called, if
+   * any query has terminated with exception, then `awaitAnyTermination()` will throw any of the
+   * exception. For correctly documenting exceptions across multiple queries, users need to stop
+   * all of them after any of them terminates with exception, and then check the
+   * `query.exception()` for each query.
+   *
+   * @throws org.apache.spark.sql.streaming.StreamingQueryException
+   *   if any query has terminated with an exception
+   * @since 2.0.0
+   */
+  @throws[StreamingQueryException]
+  def awaitAnyTermination(): Unit
+
+  /**
+   * Wait until any of the queries on the associated SQLContext has terminated since the creation
+   * of the context, or since `resetTerminated()` was called. Returns whether any query has
+   * terminated or not (multiple may have terminated). If any query has terminated with an
+   * exception, then the exception will be thrown.
+   *
+   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
+   * return `true` immediately (if the query was terminated by `query.stop()`), or throw the
+   * exception immediately (if the query was terminated with exception). Use `resetTerminated()`
+   * to clear past terminations and wait for new terminations.
+   *
+   * In the case where multiple queries have terminated since `resetTermination()` was called, if
+   * any query has terminated with exception, then `awaitAnyTermination()` will throw any of the
+   * exception. For correctly documenting exceptions across multiple queries, users need to stop
+   * all of them after any of them terminates with exception, and then check the
+   * `query.exception()` for each query.
+   *
+   * @throws org.apache.spark.sql.streaming.StreamingQueryException
+   *   if any query has terminated with an exception
+   * @since 2.0.0
+   */
+  @throws[StreamingQueryException]
+  def awaitAnyTermination(timeoutMs: Long): Boolean
+
+  /**
+   * Forget about past terminated queries so that `awaitAnyTermination()` can be used again to
+   * wait for new terminations.
+   *
+   * @since 2.0.0
+   */
+  def resetTerminated(): Unit
+
+  /**
+   * Register a [[org.apache.spark.sql.streaming.StreamingQueryListener]] to receive up-calls for
+   * life cycle events of [[StreamingQuery]].
+   *
+   * @since 2.0.0
+   */
+  def addListener(listener: StreamingQueryListener): Unit
+
+  /**
+   * Deregister a [[org.apache.spark.sql.streaming.StreamingQueryListener]].
+   *
+   * @since 2.0.0
+   */
+  def removeListener(listener: StreamingQueryListener): Unit
+
+  /**
+   * List all [[org.apache.spark.sql.streaming.StreamingQueryListener]]s attached to this
+   * [[StreamingQueryManager]].
+   *
+   * @since 3.0.0
+   */
+  def listListeners(): Array[StreamingQueryListener]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/TableValuedFunction.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/TableValuedFunction.scala
new file mode 100644
index 0000000000000..c03abe0e3d97c
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/TableValuedFunction.scala
@@ -0,0 +1,177 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import _root_.java.lang
+
+import org.apache.spark.sql.{Column, Row}
+
+abstract class TableValuedFunction {
+
+  /**
+   * Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
+   * range from 0 to `end` (exclusive) with step value 1.
+   *
+   * @since 4.0.0
+   */
+  def range(end: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with step value 1.
+   *
+   * @since 4.0.0
+   */
+  def range(start: Long, end: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value.
+   *
+   * @since 4.0.0
+   */
+  def range(start: Long, end: Long, step: Long): Dataset[lang.Long]
+
+  /**
+   * Creates a `Dataset` with a single `LongType` column named `id`, containing elements in a
+   * range from `start` to `end` (exclusive) with a step value, with partition number specified.
+   *
+   * @since 4.0.0
+   */
+  def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[lang.Long]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements in the array and `key` and `value` for
+   * elements in the map unless specified otherwise.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def explode(collection: Column): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element in the given array or map column.
+   * Uses the default column name `col` for elements in the array and `key` and `value` for
+   * elements in the map unless specified otherwise. Unlike explode, if the array/map is null or
+   * empty then null is produced.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def explode_outer(collection: Column): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element in the given array of structs.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def inline(input: Column): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element in the given array of structs.
+   * Unlike inline, if the array is null or empty then null is produced for each nested column.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def inline_outer(input: Column): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for a json column according to the given field
+   * names.
+   *
+   * @group json_funcs
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def json_tuple(input: Column, fields: Column*): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element with position in the given array
+   * or map column. Uses the default column name `pos` for position, and `col` for elements in the
+   * array and `key` and `value` for elements in the map unless specified otherwise.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def posexplode(collection: Column): Dataset[Row]
+
+  /**
+   * Creates a `DataFrame` containing a new row for each element with position in the given array
+   * or map column. Uses the default column name `pos` for position, and `col` for elements in the
+   * array and `key` and `value` for elements in the map unless specified otherwise. Unlike
+   * posexplode, if the array/map is null or empty then the row (null, null) is produced.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def posexplode_outer(collection: Column): Dataset[Row]
+
+  /**
+   * Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
+   * unless specified otherwise.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  @scala.annotation.varargs
+  def stack(n: Column, fields: Column*): Dataset[Row]
+
+  /**
+   * Gets all of the Spark SQL string collations.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def collations(): Dataset[Row]
+
+  /**
+   * Gets Spark SQL keywords.
+   *
+   * @group generator_funcs
+   * @since 4.0.0
+   */
+  def sql_keywords(): Dataset[Row]
+
+  /**
+   * Separates a variant object/array into multiple rows containing its fields/elements. Its
+   * result schema is `struct&lt;pos int, key string, value variant&gt;`. `pos` is the position of
+   * the field/element in its parent object/array, and `value` is the field/element value. `key`
+   * is the field name when exploding a variant object, or is NULL when exploding a variant array.
+   * It ignores any input that is not a variant array/object, including SQL NULL, variant null,
+   * and any other variant values.
+   *
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def variant_explode(input: Column): Dataset[Row]
+
+  /**
+   * Separates a variant object/array into multiple rows containing its fields/elements. Its
+   * result schema is `struct&lt;pos int, key string, value variant&gt;`. `pos` is the position of
+   * the field/element in its parent object/array, and `value` is the field/element value. `key`
+   * is the field name when exploding a variant object, or is NULL when exploding a variant array.
+   * Unlike variant_explode, if the given variant is not a variant array/object, including SQL
+   * NULL, variant null, and any other variant values, then NULL is produced.
+   *
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def variant_explode_outer(input: Column): Dataset[Row]
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/api/UDFRegistration.scala b/sql/api/src/main/scala/org/apache/spark/sql/api/UDFRegistration.scala
new file mode 100644
index 0000000000000..a8e8f5c5f8556
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/api/UDFRegistration.scala
@@ -0,0 +1,1344 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+import scala.reflect.runtime.universe.TypeTag
+
+import org.apache.spark.sql.api.java._
+import org.apache.spark.sql.catalyst.util.SparkCharVarcharUtils
+import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedFunction}
+import org.apache.spark.sql.internal.ToScalaUDF
+import org.apache.spark.sql.types.DataType
+
+/**
+ * Functions for registering user-defined functions. Use `SparkSession.udf` to access this:
+ *
+ * {{{
+ *   spark.udf
+ * }}}
+ *
+ * @since 4.0.0
+ */
+abstract class UDFRegistration {
+
+  /**
+   * Register a Java UDF class using it's class name. The class must implement one of the UDF
+   * interfaces in the [[org.apache.spark.sql.api.java]] package, and discoverable by the current
+   * session's class loader.
+   *
+   * @param name
+   *   Name of the UDF.
+   * @param className
+   *   Fully qualified class name of the UDF.
+   * @param returnDataType
+   *   Return type of UDF. If it is `null`, Spark would try to infer via reflection.
+   * @note
+   *   this method is currently not supported in Spark Connect.
+   * @since 4.0.0
+   */
+  def registerJava(name: String, className: String, returnDataType: DataType): Unit
+
+  /**
+   * Registers a user-defined function (UDF), for a UDF that's already defined using the Dataset
+   * API (i.e. of type UserDefinedFunction). To change a UDF to nondeterministic, call the API
+   * `UserDefinedFunction.asNondeterministic()`. To change a UDF to nonNullable, call the API
+   * `UserDefinedFunction.asNonNullable()`.
+   *
+   * Example:
+   * {{{
+   *   val foo = udf(() => Math.random())
+   *   spark.udf.register("random", foo.asNondeterministic())
+   *
+   *   val bar = udf(() => "bar")
+   *   spark.udf.register("stringLit", bar.asNonNullable())
+   * }}}
+   *
+   * @param name
+   *   the name of the UDF.
+   * @param udf
+   *   the UDF needs to be registered.
+   * @return
+   *   the registered UDF.
+   *
+   * @since 2.2.0
+   */
+  def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
+    register(name, udf, "scala_udf", validateParameterCount = false)
+  }
+
+  private def registerScalaUDF(
+      name: String,
+      func: AnyRef,
+      returnTypeTag: TypeTag[_],
+      inputTypeTags: TypeTag[_]*): UserDefinedFunction = {
+    val udf = SparkUserDefinedFunction(func, returnTypeTag, inputTypeTags: _*)
+    register(name, udf, "scala_udf", validateParameterCount = true)
+  }
+
+  private def registerJavaUDF(
+      name: String,
+      func: AnyRef,
+      returnDataType: DataType,
+      cardinality: Int): UserDefinedFunction = {
+    val validatedReturnDataType = SparkCharVarcharUtils.failIfHasCharVarchar(returnDataType)
+    val udf = SparkUserDefinedFunction(func, validatedReturnDataType, cardinality)
+    register(name, udf, "java_udf", validateParameterCount = true)
+  }
+
+  protected def register(
+      name: String,
+      udf: UserDefinedFunction,
+      source: String,
+      validateParameterCount: Boolean): UserDefinedFunction
+
+  // scalastyle:off line.size.limit
+
+  /* register 0-22 were generated by this script
+
+    (0 to 22).foreach { x =>
+      val types = (1 to x).foldRight("RT")((i, s) => s"A$i, $s")
+      val typeSeq = "RT" +: (1 to x).map(i => s"A$i")
+      val typeTags = typeSeq.map(t => s"$t: TypeTag").mkString(", ")
+      val implicitTypeTags = typeSeq.map(t => s"implicitly[TypeTag[$t]]").mkString(", ")
+      println(s"""
+        |/**
+        | * Registers a deterministic Scala closure of $x arguments as user-defined function (UDF).
+        | * @tparam RT return type of UDF.
+        | * @since 1.3.0
+        | */
+        |def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
+        |  registerScalaUDF(name, func, $implicitTypeTags)
+        |}""".stripMargin)
+    }
+
+    (0 to 22).foreach { i =>
+      val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
+      val version = if (i == 0) "2.3.0" else "1.3.0"
+      println(s"""
+        |/**
+        | * Register a deterministic Java UDF$i instance as user-defined function (UDF).
+        | * @since $version
+        | */
+        |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
+        |  registerJavaUDF(name, ToScalaUDF(f), returnType, $i)
+        |}""".stripMargin)
+    }
+   */
+
+  /**
+   * Registers a deterministic Scala closure of 0 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
+    registerScalaUDF(name, func, implicitly[TypeTag[RT]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 1 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag, A1: TypeTag](
+      name: String,
+      func: Function1[A1, RT]): UserDefinedFunction = {
+    registerScalaUDF(name, func, implicitly[TypeTag[RT]], implicitly[TypeTag[A1]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 2 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](
+      name: String,
+      func: Function2[A1, A2, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 3 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
+      name: String,
+      func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 4 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
+      name: String,
+      func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 5 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
+      name: String,
+      func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 6 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag](
+      name: String,
+      func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 7 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag](
+      name: String,
+      func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 8 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag](
+      name: String,
+      func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 9 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag](
+      name: String,
+      func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 10 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag](
+      name: String,
+      func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 11 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag](
+      name: String,
+      func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 12 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag](
+      name: String,
+      func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT])
+      : UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 13 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag](
+      name: String,
+      func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT])
+      : UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 14 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag](
+      name: String,
+      func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT])
+      : UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 15 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag](
+      name: String,
+      func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT])
+      : UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 16 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag](
+      name: String,
+      func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT])
+      : UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 17 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag](
+      name: String,
+      func: Function17[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 18 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag,
+      A18: TypeTag](
+      name: String,
+      func: Function18[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        A18,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]],
+      implicitly[TypeTag[A18]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 19 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag,
+      A18: TypeTag,
+      A19: TypeTag](
+      name: String,
+      func: Function19[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        A18,
+        A19,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]],
+      implicitly[TypeTag[A18]],
+      implicitly[TypeTag[A19]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 20 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag,
+      A18: TypeTag,
+      A19: TypeTag,
+      A20: TypeTag](
+      name: String,
+      func: Function20[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        A18,
+        A19,
+        A20,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]],
+      implicitly[TypeTag[A18]],
+      implicitly[TypeTag[A19]],
+      implicitly[TypeTag[A20]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 21 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag,
+      A18: TypeTag,
+      A19: TypeTag,
+      A20: TypeTag,
+      A21: TypeTag](
+      name: String,
+      func: Function21[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        A18,
+        A19,
+        A20,
+        A21,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]],
+      implicitly[TypeTag[A18]],
+      implicitly[TypeTag[A19]],
+      implicitly[TypeTag[A20]],
+      implicitly[TypeTag[A21]])
+  }
+
+  /**
+   * Registers a deterministic Scala closure of 22 arguments as user-defined function (UDF).
+   * @tparam RT
+   *   return type of UDF.
+   * @since 1.3.0
+   */
+  def register[
+      RT: TypeTag,
+      A1: TypeTag,
+      A2: TypeTag,
+      A3: TypeTag,
+      A4: TypeTag,
+      A5: TypeTag,
+      A6: TypeTag,
+      A7: TypeTag,
+      A8: TypeTag,
+      A9: TypeTag,
+      A10: TypeTag,
+      A11: TypeTag,
+      A12: TypeTag,
+      A13: TypeTag,
+      A14: TypeTag,
+      A15: TypeTag,
+      A16: TypeTag,
+      A17: TypeTag,
+      A18: TypeTag,
+      A19: TypeTag,
+      A20: TypeTag,
+      A21: TypeTag,
+      A22: TypeTag](
+      name: String,
+      func: Function22[
+        A1,
+        A2,
+        A3,
+        A4,
+        A5,
+        A6,
+        A7,
+        A8,
+        A9,
+        A10,
+        A11,
+        A12,
+        A13,
+        A14,
+        A15,
+        A16,
+        A17,
+        A18,
+        A19,
+        A20,
+        A21,
+        A22,
+        RT]): UserDefinedFunction = {
+    registerScalaUDF(
+      name,
+      func,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]],
+      implicitly[TypeTag[A11]],
+      implicitly[TypeTag[A12]],
+      implicitly[TypeTag[A13]],
+      implicitly[TypeTag[A14]],
+      implicitly[TypeTag[A15]],
+      implicitly[TypeTag[A16]],
+      implicitly[TypeTag[A17]],
+      implicitly[TypeTag[A18]],
+      implicitly[TypeTag[A19]],
+      implicitly[TypeTag[A20]],
+      implicitly[TypeTag[A21]],
+      implicitly[TypeTag[A22]])
+  }
+
+  //////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * Register a deterministic Java UDF0 instance as user-defined function (UDF).
+   * @since 2.3.0
+   */
+  def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 0)
+  }
+
+  /**
+   * Register a deterministic Java UDF1 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 1)
+  }
+
+  /**
+   * Register a deterministic Java UDF2 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 2)
+  }
+
+  /**
+   * Register a deterministic Java UDF3 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 3)
+  }
+
+  /**
+   * Register a deterministic Java UDF4 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 4)
+  }
+
+  /**
+   * Register a deterministic Java UDF5 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 5)
+  }
+
+  /**
+   * Register a deterministic Java UDF6 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 6)
+  }
+
+  /**
+   * Register a deterministic Java UDF7 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 7)
+  }
+
+  /**
+   * Register a deterministic Java UDF8 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 8)
+  }
+
+  /**
+   * Register a deterministic Java UDF9 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF9[_, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 9)
+  }
+
+  /**
+   * Register a deterministic Java UDF10 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF10[_, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 10)
+  }
+
+  /**
+   * Register a deterministic Java UDF11 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 11)
+  }
+
+  /**
+   * Register a deterministic Java UDF12 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 12)
+  }
+
+  /**
+   * Register a deterministic Java UDF13 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 13)
+  }
+
+  /**
+   * Register a deterministic Java UDF14 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 14)
+  }
+
+  /**
+   * Register a deterministic Java UDF15 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 15)
+  }
+
+  /**
+   * Register a deterministic Java UDF16 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 16)
+  }
+
+  /**
+   * Register a deterministic Java UDF17 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 17)
+  }
+
+  /**
+   * Register a deterministic Java UDF18 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 18)
+  }
+
+  /**
+   * Register a deterministic Java UDF19 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 19)
+  }
+
+  /**
+   * Register a deterministic Java UDF20 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 20)
+  }
+
+  /**
+   * Register a deterministic Java UDF21 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 21)
+  }
+
+  /**
+   * Register a deterministic Java UDF22 instance as user-defined function (UDF).
+   * @since 1.3.0
+   */
+  def register(
+      name: String,
+      f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _],
+      returnType: DataType): Unit = {
+    registerJavaUDF(name, ToScalaUDF(f), returnType, 22)
+  }
+
+  // scalastyle:on line.size.limit
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
similarity index 92%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
index 61a9c908c7a4b..fffad557aca5e 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/avro/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/avro/functions.scala
@@ -21,7 +21,7 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.functions.{fnWithOptions, lit}
+import org.apache.spark.sql.functions.lit
 
 // scalastyle:off: object.name
 object functions {
@@ -37,7 +37,7 @@ object functions {
    * @param jsonFormatSchema
    *   the avro schema in JSON string format.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def from_avro(data: Column, jsonFormatSchema: String): Column = {
@@ -57,14 +57,14 @@ object functions {
    * @param options
    *   options to control how the Avro record is parsed.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def from_avro(
       data: Column,
       jsonFormatSchema: String,
       options: java.util.Map[String, String]): Column = {
-    fnWithOptions("from_avro", options.asScala.iterator, data, lit(jsonFormatSchema))
+    Column.fnWithOptions("from_avro", options.asScala.iterator, data, lit(jsonFormatSchema))
   }
 
   /**
@@ -73,7 +73,7 @@ object functions {
    * @param data
    *   the data column.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def to_avro(data: Column): Column = {
@@ -88,7 +88,7 @@ object functions {
    * @param jsonFormatSchema
    *   user-specified output avro schema in JSON string format.
    *
-   * @since 3.5.0
+   * @since 3.0.0
    */
   @Experimental
   def to_avro(data: Column, jsonFormatSchema: String): Column = {
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
similarity index 89%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
index 94c8c219c6ba5..3a3ba9d261326 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/catalog/interface.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalog/interface.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalog
 
 import javax.annotation.Nullable
 
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.DefinedByConstructorParams
 
 // Note: all classes here are expected to be wrapped in Datasets and so must extend
@@ -31,16 +32,13 @@ import org.apache.spark.sql.catalyst.DefinedByConstructorParams
  *   name of the catalog
  * @param description
  *   description of the catalog
- * @since 3.5.0
+ * @since 3.4.0
  */
 class CatalogMetadata(val name: String, @Nullable val description: String)
     extends DefinedByConstructorParams {
 
-  override def toString: String = {
-    "Catalog[" +
-      s"name='$name', " +
-      Option(description).map { d => s"description='$d'] " }.getOrElse("]")
-  }
+  override def toString: String =
+    s"Catalog[name='$name', ${Option(description).map(d => s"description='$d'").getOrElse("")}]"
 }
 
 /**
@@ -54,8 +52,9 @@ class CatalogMetadata(val name: String, @Nullable val description: String)
  *   description of the database.
  * @param locationUri
  *   path (in the form of a uri) to data files.
- * @since 3.5.0
+ * @since 2.0.0
  */
+@Stable
 class Database(
     val name: String,
     @Nullable val catalog: String,
@@ -92,8 +91,9 @@ class Database(
  *   type of the table (e.g. view, table).
  * @param isTemporary
  *   whether the table is a temporary table.
- * @since 3.5.0
+ * @since 2.0.0
  */
+@Stable
 class Table(
     val name: String,
     @Nullable val catalog: String,
@@ -153,17 +153,31 @@ class Table(
  *   whether the column is a partition column.
  * @param isBucket
  *   whether the column is a bucket column.
- * @since 3.5.0
+ * @param isCluster
+ *   whether the column is a clustering column.
+ * @since 2.0.0
  */
+@Stable
 class Column(
     val name: String,
     @Nullable val description: String,
     val dataType: String,
     val nullable: Boolean,
     val isPartition: Boolean,
-    val isBucket: Boolean)
+    val isBucket: Boolean,
+    val isCluster: Boolean)
     extends DefinedByConstructorParams {
 
+  def this(
+      name: String,
+      description: String,
+      dataType: String,
+      nullable: Boolean,
+      isPartition: Boolean,
+      isBucket: Boolean) = {
+    this(name, description, dataType, nullable, isPartition, isBucket, isCluster = false)
+  }
+
   override def toString: String = {
     "Column[" +
       s"name='$name', " +
@@ -171,7 +185,8 @@ class Column(
       s"dataType='$dataType', " +
       s"nullable='$nullable', " +
       s"isPartition='$isPartition', " +
-      s"isBucket='$isBucket']"
+      s"isBucket='$isBucket', " +
+      s"isCluster='$isCluster']"
   }
 
 }
@@ -191,8 +206,9 @@ class Column(
  *   the fully qualified class name of the function.
  * @param isTemporary
  *   whether the function is a temporary function or not.
- * @since 3.5.0
+ * @since 2.0.0
  */
+@Stable
 class Function(
     val name: String,
     @Nullable val catalog: String,
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/DefinedByConstructorParams.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/DefinedByConstructorParams.scala
index fc6bc2095a821..efd9d457ef7d4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/DefinedByConstructorParams.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/DefinedByConstructorParams.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.catalyst
 
 /**
- * A helper trait to create [[org.apache.spark.sql.catalyst.encoders.ExpressionEncoder]]s
- * for classes whose fields are entirely defined by constructor params but should not be
- * case classes.
+ * A helper trait to create [[org.apache.spark.sql.catalyst.encoders.ExpressionEncoder]]s for
+ * classes whose fields are entirely defined by constructor params but should not be case classes.
  */
 trait DefinedByConstructorParams
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
index f85e96da2be11..8d0103ca69635 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/JavaTypeInference.scala
@@ -36,10 +36,13 @@ import org.apache.spark.util.ArrayImplicits._
  * Type-inference utilities for POJOs and Java collections.
  */
 object JavaTypeInference {
+
   /**
    * Infers the corresponding SQL data type of a Java type.
-   * @param beanType Java type
-   * @return (SQL data type, nullable)
+   * @param beanType
+   *   Java type
+   * @return
+   *   (SQL data type, nullable)
    */
   def inferDataType(beanType: Type): (DataType, Boolean) = {
     val encoder = encoderFor(beanType)
@@ -60,8 +63,10 @@ object JavaTypeInference {
     encoderFor(beanType, Set.empty).asInstanceOf[AgnosticEncoder[T]]
   }
 
-  private def encoderFor(t: Type, seenTypeSet: Set[Class[_]],
-    typeVariables: Map[TypeVariable[_], Type] = Map.empty): AgnosticEncoder[_] = t match {
+  private def encoderFor(
+      t: Type,
+      seenTypeSet: Set[Class[_]],
+      typeVariables: Map[TypeVariable[_], Type] = Map.empty): AgnosticEncoder[_] = t match {
 
     case c: Class[_] if c == java.lang.Boolean.TYPE => PrimitiveBooleanEncoder
     case c: Class[_] if c == java.lang.Byte.TYPE => PrimitiveByteEncoder
@@ -94,14 +99,22 @@ object JavaTypeInference {
     case c: Class[_] if c.isEnum => JavaEnumEncoder(ClassTag(c))
 
     case c: Class[_] if c.isAnnotationPresent(classOf[SQLUserDefinedType]) =>
-      val udt = c.getAnnotation(classOf[SQLUserDefinedType]).udt()
-        .getConstructor().newInstance().asInstanceOf[UserDefinedType[Any]]
+      val udt = c
+        .getAnnotation(classOf[SQLUserDefinedType])
+        .udt()
+        .getConstructor()
+        .newInstance()
+        .asInstanceOf[UserDefinedType[Any]]
       val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
       UDTEncoder(udt, udtClass)
 
     case c: Class[_] if UDTRegistration.exists(c.getName) =>
-      val udt = UDTRegistration.getUDTFor(c.getName).get.getConstructor().
-        newInstance().asInstanceOf[UserDefinedType[Any]]
+      val udt = UDTRegistration
+        .getUDTFor(c.getName)
+        .get
+        .getConstructor()
+        .newInstance()
+        .asInstanceOf[UserDefinedType[Any]]
       UDTEncoder(udt, udt.getClass)
 
     case c: Class[_] if c.isArray =>
@@ -160,7 +173,8 @@ object JavaTypeInference {
 
   def getJavaBeanReadableProperties(beanClass: Class[_]): Array[PropertyDescriptor] = {
     val beanInfo = Introspector.getBeanInfo(beanClass)
-    beanInfo.getPropertyDescriptors.filterNot(_.getName == "class")
+    beanInfo.getPropertyDescriptors
+      .filterNot(_.getName == "class")
       .filterNot(_.getName == "declaringClass")
       .filter(_.getReadMethod != null)
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
index f204421b0add2..cd12cbd267cc4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/ScalaReflection.scala
@@ -55,9 +55,8 @@ object ScalaReflection extends ScalaReflection {
   import scala.collection.Map
 
   /**
-   * Synchronize to prevent concurrent usage of `<:<` operator.
-   * This operator is not thread safe in any current version of scala; i.e.
-   * (2.11.12, 2.12.10, 2.13.0-M5).
+   * Synchronize to prevent concurrent usage of `<:<` operator. This operator is not thread safe
+   * in any current version of scala; i.e. (2.11.12, 2.12.10, 2.13.0-M5).
    *
    * See https://github.com/scala/bug/issues/10766
    */
@@ -91,11 +90,11 @@ object ScalaReflection extends ScalaReflection {
   /**
    * Workaround for [[https://github.com/scala/bug/issues/12190 Scala bug #12190]]
    *
-   * `ClassSymbol.selfType` can throw an exception in case of cyclic annotation reference
-   * in Java classes. A retry of this operation will succeed as the class which defines the
-   * cycle is now resolved. It can however expose further recursive annotation references, so
-   * we keep retrying until we exhaust our retry threshold. Default threshold is set to 5
-   * to allow for a few level of cyclic references.
+   * `ClassSymbol.selfType` can throw an exception in case of cyclic annotation reference in Java
+   * classes. A retry of this operation will succeed as the class which defines the cycle is now
+   * resolved. It can however expose further recursive annotation references, so we keep retrying
+   * until we exhaust our retry threshold. Default threshold is set to 5 to allow for a few level
+   * of cyclic references.
    */
   @tailrec
   private def selfType(clsSymbol: ClassSymbol, tries: Int = 5): Type = {
@@ -107,7 +106,7 @@ object ScalaReflection extends ScalaReflection {
         // Retry on Symbols#CyclicReference if we haven't exhausted our retry limit
         selfType(clsSymbol, tries - 1)
       case Failure(e: RuntimeException)
-        if e.getMessage.contains("illegal cyclic reference") && tries > 1 =>
+          if e.getMessage.contains("illegal cyclic reference") && tries > 1 =>
         // UnPickler.unpickle wraps the original Symbols#CyclicReference exception into a runtime
         // exception and does not set the cause, so we inspect the message. The previous case
         // statement is useful for Java classes while this one is for Scala classes.
@@ -131,14 +130,14 @@ object ScalaReflection extends ScalaReflection {
   }
 
   /**
-   * Returns the full class name for a type. The returned name is the canonical
-   * Scala name, where each component is separated by a period. It is NOT the
-   * Java-equivalent runtime name (no dollar signs).
+   * Returns the full class name for a type. The returned name is the canonical Scala name, where
+   * each component is separated by a period. It is NOT the Java-equivalent runtime name (no
+   * dollar signs).
    *
-   * In simple cases, both the Scala and Java names are the same, however when Scala
-   * generates constructs that do not map to a Java equivalent, such as singleton objects
-   * or nested classes in package objects, it uses the dollar sign ($) to create
-   * synthetic classes, emulating behaviour in Java bytecode.
+   * In simple cases, both the Scala and Java names are the same, however when Scala generates
+   * constructs that do not map to a Java equivalent, such as singleton objects or nested classes
+   * in package objects, it uses the dollar sign ($) to create synthetic classes, emulating
+   * behaviour in Java bytecode.
    */
   def getClassNameFromType(tpe: `Type`): String = {
     erasure(tpe).dealias.typeSymbol.asClass.fullName
@@ -152,20 +151,25 @@ object ScalaReflection extends ScalaReflection {
 
   case class Schema(dataType: DataType, nullable: Boolean)
 
-  /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
+  /**
+   * Returns a catalyst DataType and its nullability for the given Scala Type using reflection.
+   */
   def schemaFor[T: TypeTag]: Schema = schemaFor(localTypeOf[T])
 
-  /** Returns a catalyst DataType and its nullability for the given Scala Type using reflection. */
+  /**
+   * Returns a catalyst DataType and its nullability for the given Scala Type using reflection.
+   */
   def schemaFor(tpe: `Type`): Schema = {
     val enc = encoderFor(tpe)
     Schema(enc.dataType, enc.nullable)
   }
 
   /**
-   * Finds an accessible constructor with compatible parameters. This is a more flexible search than
-   * the exact matching algorithm in `Class.getConstructor`. The first assignment-compatible
+   * Finds an accessible constructor with compatible parameters. This is a more flexible search
+   * than the exact matching algorithm in `Class.getConstructor`. The first assignment-compatible
    * matching constructor is returned if it exists. Otherwise, we check for additional compatible
-   * constructors defined in the companion object as `apply` methods. Otherwise, it returns `None`.
+   * constructors defined in the companion object as `apply` methods. Otherwise, it returns
+   * `None`.
    */
   def findConstructor[T](cls: Class[T], paramTypes: Seq[Class[_]]): Option[Seq[AnyRef] => T] = {
     Option(ConstructorUtils.getMatchingAccessibleConstructor(cls, paramTypes: _*)) match {
@@ -174,24 +178,28 @@ object ScalaReflection extends ScalaReflection {
         val companion = mirror.staticClass(cls.getName).companion
         val moduleMirror = mirror.reflectModule(companion.asModule)
         val applyMethods = companion.asTerm.typeSignature
-          .member(universe.TermName("apply")).asTerm.alternatives
-        applyMethods.find { method =>
-          val params = method.typeSignature.paramLists.head
-          // Check that the needed params are the same length and of matching types
-          params.size == paramTypes.size &&
-          params.zip(paramTypes).forall { case(ps, pc) =>
-            ps.typeSignature.typeSymbol == mirror.classSymbol(pc)
+          .member(universe.TermName("apply"))
+          .asTerm
+          .alternatives
+        applyMethods
+          .find { method =>
+            val params = method.typeSignature.paramLists.head
+            // Check that the needed params are the same length and of matching types
+            params.size == paramTypes.size &&
+            params.zip(paramTypes).forall { case (ps, pc) =>
+              ps.typeSignature.typeSymbol == mirror.classSymbol(pc)
+            }
           }
-        }.map { applyMethodSymbol =>
-          val expectedArgsCount = applyMethodSymbol.typeSignature.paramLists.head.size
-          val instanceMirror = mirror.reflect(moduleMirror.instance)
-          val method = instanceMirror.reflectMethod(applyMethodSymbol.asMethod)
-          (_args: Seq[AnyRef]) => {
-            // Drop the "outer" argument if it is provided
-            val args = if (_args.size == expectedArgsCount) _args else _args.tail
-            method.apply(args: _*).asInstanceOf[T]
+          .map { applyMethodSymbol =>
+            val expectedArgsCount = applyMethodSymbol.typeSignature.paramLists.head.size
+            val instanceMirror = mirror.reflect(moduleMirror.instance)
+            val method = instanceMirror.reflectMethod(applyMethodSymbol.asMethod)
+            (_args: Seq[AnyRef]) => {
+              // Drop the "outer" argument if it is provided
+              val args = if (_args.size == expectedArgsCount) _args else _args.tail
+              method.apply(args: _*).asInstanceOf[T]
+            }
           }
-        }
     }
   }
 
@@ -201,8 +209,10 @@ object ScalaReflection extends ScalaReflection {
   def definedByConstructorParams(tpe: Type): Boolean = cleanUpReflectionObjects {
     tpe.dealias match {
       // `Option` is a `Product`, but we don't wanna treat `Option[Int]` as a struct type.
-      case t if isSubtype(t, localTypeOf[Option[_]]) => definedByConstructorParams(t.typeArgs.head)
-      case _ => isSubtype(tpe.dealias, localTypeOf[Product]) ||
+      case t if isSubtype(t, localTypeOf[Option[_]]) =>
+        definedByConstructorParams(t.typeArgs.head)
+      case _ =>
+        isSubtype(tpe.dealias, localTypeOf[Product]) ||
         isSubtype(tpe.dealias, localTypeOf[DefinedByConstructorParams])
     }
   }
@@ -214,16 +224,15 @@ object ScalaReflection extends ScalaReflection {
   /**
    * Create an [[AgnosticEncoder]] from a [[TypeTag]].
    *
-   * If the given type is not supported, i.e. there is no encoder can be built for this type,
-   * an [[SparkUnsupportedOperationException]] will be thrown with detailed error message to
-   * explain the type path walked so far and which class we are not supporting.
-   * There are 4 kinds of type path:
-   *  * the root type: `root class: "abc.xyz.MyClass"`
-   *  * the value type of [[Option]]: `option value class: "abc.xyz.MyClass"`
-   *  * the element type of [[Array]] or [[Seq]]: `array element class: "abc.xyz.MyClass"`
-   *  * the field of [[Product]]: `field (class: "abc.xyz.MyClass", name: "myField")`
+   * If the given type is not supported, i.e. there is no encoder can be built for this type, an
+   * [[SparkUnsupportedOperationException]] will be thrown with detailed error message to explain
+   * the type path walked so far and which class we are not supporting. There are 4 kinds of type
+   * path: * the root type: `root class: "abc.xyz.MyClass"` * the value type of [[Option]]:
+   * `option value class: "abc.xyz.MyClass"` * the element type of [[Array]] or [[Seq]]: `array
+   * element class: "abc.xyz.MyClass"` * the field of [[Product]]: `field (class:
+   * "abc.xyz.MyClass", name: "myField")`
    */
-  def encoderFor[E : TypeTag]: AgnosticEncoder[E] = {
+  def encoderFor[E: TypeTag]: AgnosticEncoder[E] = {
     encoderFor(typeTag[E].in(mirror).tpe).asInstanceOf[AgnosticEncoder[E]]
   }
 
@@ -239,13 +248,12 @@ object ScalaReflection extends ScalaReflection {
   /**
    * Create an [[AgnosticEncoder]] for a [[Type]].
    */
-  def encoderFor(
-     tpe: `Type`,
-     isRowEncoderSupported: Boolean = false): AgnosticEncoder[_] = cleanUpReflectionObjects {
-    val clsName = getClassNameFromType(tpe)
-    val walkedTypePath = WalkedTypePath().recordRoot(clsName)
-    encoderFor(tpe, Set.empty, walkedTypePath, isRowEncoderSupported)
-  }
+  def encoderFor(tpe: `Type`, isRowEncoderSupported: Boolean = false): AgnosticEncoder[_] =
+    cleanUpReflectionObjects {
+      val clsName = getClassNameFromType(tpe)
+      val walkedTypePath = WalkedTypePath().recordRoot(clsName)
+      encoderFor(tpe, Set.empty, walkedTypePath, isRowEncoderSupported)
+    }
 
   private def encoderFor(
       tpe: `Type`,
@@ -327,14 +335,22 @@ object ScalaReflection extends ScalaReflection {
 
       // UDT encoders
       case t if t.typeSymbol.annotations.exists(_.tree.tpe =:= typeOf[SQLUserDefinedType]) =>
-        val udt = getClassFromType(t).getAnnotation(classOf[SQLUserDefinedType]).udt().
-          getConstructor().newInstance().asInstanceOf[UserDefinedType[Any]]
+        val udt = getClassFromType(t)
+          .getAnnotation(classOf[SQLUserDefinedType])
+          .udt()
+          .getConstructor()
+          .newInstance()
+          .asInstanceOf[UserDefinedType[Any]]
         val udtClass = udt.userClass.getAnnotation(classOf[SQLUserDefinedType]).udt()
         UDTEncoder(udt, udtClass)
 
       case t if UDTRegistration.exists(getClassNameFromType(t)) =>
-        val udt = UDTRegistration.getUDTFor(getClassNameFromType(t)).get.getConstructor().
-          newInstance().asInstanceOf[UserDefinedType[Any]]
+        val udt = UDTRegistration
+          .getUDTFor(getClassNameFromType(t))
+          .get
+          .getConstructor()
+          .newInstance()
+          .asInstanceOf[UserDefinedType[Any]]
         UDTEncoder(udt, udt.getClass)
 
       // Complex encoders
@@ -380,20 +396,17 @@ object ScalaReflection extends ScalaReflection {
         if (seenTypeSet.contains(t)) {
           throw ExecutionErrors.cannotHaveCircularReferencesInClassError(t.toString)
         }
-        val params = getConstructorParameters(t).map {
-          case (fieldName, fieldType) =>
-            if (SourceVersion.isKeyword(fieldName) ||
-              !SourceVersion.isIdentifier(encodeFieldNameToIdentifier(fieldName))) {
-              throw ExecutionErrors.cannotUseInvalidJavaIdentifierAsFieldNameError(
-                fieldName,
-                path)
-            }
-            val encoder = encoderFor(
-              fieldType,
-              seenTypeSet + t,
-              path.recordField(getClassNameFromType(fieldType), fieldName),
-              isRowEncoderSupported)
-            EncoderField(fieldName, encoder, encoder.nullable, Metadata.empty)
+        val params = getConstructorParameters(t).map { case (fieldName, fieldType) =>
+          if (SourceVersion.isKeyword(fieldName) ||
+            !SourceVersion.isIdentifier(encodeFieldNameToIdentifier(fieldName))) {
+            throw ExecutionErrors.cannotUseInvalidJavaIdentifierAsFieldNameError(fieldName, path)
+          }
+          val encoder = encoderFor(
+            fieldType,
+            seenTypeSet + t,
+            path.recordField(getClassNameFromType(fieldType), fieldName),
+            isRowEncoderSupported)
+          EncoderField(fieldName, encoder, encoder.nullable, Metadata.empty)
         }
         val cls = getClassFromType(t)
         ProductEncoder(ClassTag(cls), params, Option(OuterScopes.getOuterScope(cls)))
@@ -404,10 +417,11 @@ object ScalaReflection extends ScalaReflection {
 }
 
 /**
- * Support for generating catalyst schemas for scala objects.  Note that unlike its companion
+ * Support for generating catalyst schemas for scala objects. Note that unlike its companion
  * object, this trait able to work in both the runtime and the compile time (macro) universe.
  */
 trait ScalaReflection extends Logging {
+
   /** The universe we work in (runtime or macro) */
   val universe: scala.reflect.api.Universe
 
@@ -421,7 +435,8 @@ trait ScalaReflection extends Logging {
    * clean up the Scala reflection garbage automatically. Otherwise, it will leak some objects to
    * `scala.reflect.runtime.JavaUniverse.undoLog`.
    *
-   * @see https://github.com/scala/bug/issues/8302
+   * @see
+   *   https://github.com/scala/bug/issues/8302
    */
   def cleanUpReflectionObjects[T](func: => T): T = {
     universe.asInstanceOf[scala.reflect.runtime.JavaUniverse].undoLog.undo(func)
@@ -430,12 +445,13 @@ trait ScalaReflection extends Logging {
   /**
    * Return the Scala Type for `T` in the current classloader mirror.
    *
-   * Use this method instead of the convenience method `universe.typeOf`, which
-   * assumes that all types can be found in the classloader that loaded scala-reflect classes.
-   * That's not necessarily the case when running using Eclipse launchers or even
-   * Sbt console or test (without `fork := true`).
+   * Use this method instead of the convenience method `universe.typeOf`, which assumes that all
+   * types can be found in the classloader that loaded scala-reflect classes. That's not
+   * necessarily the case when running using Eclipse launchers or even Sbt console or test
+   * (without `fork := true`).
    *
-   * @see SPARK-5281
+   * @see
+   *   SPARK-5281
    */
   def localTypeOf[T: TypeTag]: `Type` = {
     val tag = implicitly[TypeTag[T]]
@@ -474,8 +490,8 @@ trait ScalaReflection extends Logging {
   }
 
   /**
-   * If our type is a Scala trait it may have a companion object that
-   * only defines a constructor via `apply` method.
+   * If our type is a Scala trait it may have a companion object that only defines a constructor
+   * via `apply` method.
    */
   private def getCompanionConstructor(tpe: Type): Symbol = {
     def throwUnsupportedOperation = {
@@ -483,10 +499,11 @@ trait ScalaReflection extends Logging {
     }
     tpe.typeSymbol.asClass.companion match {
       case NoSymbol => throwUnsupportedOperation
-      case sym => sym.asTerm.typeSignature.member(universe.TermName("apply")) match {
-        case NoSymbol => throwUnsupportedOperation
-        case constructorSym => constructorSym
-      }
+      case sym =>
+        sym.asTerm.typeSignature.member(universe.TermName("apply")) match {
+          case NoSymbol => throwUnsupportedOperation
+          case constructorSym => constructorSym
+        }
     }
   }
 
@@ -499,8 +516,9 @@ trait ScalaReflection extends Logging {
       constructorSymbol.asMethod.paramLists
     } else {
       // Find the primary constructor, and use its parameter ordering.
-      val primaryConstructorSymbol: Option[Symbol] = constructorSymbol.asTerm.alternatives.find(
-        s => s.isMethod && s.asMethod.isPrimaryConstructor)
+      val primaryConstructorSymbol: Option[Symbol] =
+        constructorSymbol.asTerm.alternatives.find(s =>
+          s.isMethod && s.asMethod.isPrimaryConstructor)
       if (primaryConstructorSymbol.isEmpty) {
         throw ExecutionErrors.primaryConstructorNotFoundError(tpe.getClass)
       } else {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
index cbf1f01344c92..a81c071295e2c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/WalkedTypePath.scala
@@ -18,9 +18,9 @@
 package org.apache.spark.sql.catalyst
 
 /**
- * This class records the paths the serializer and deserializer walk through to reach current path.
- * Note that this class adds new path in prior to recorded paths so it maintains
- * the paths as reverse order.
+ * This class records the paths the serializer and deserializer walk through to reach current
+ * path. Note that this class adds new path in prior to recorded paths so it maintains the paths
+ * as reverse order.
  */
 case class WalkedTypePath(private val walkedPaths: Seq[String] = Nil) {
   def recordRoot(className: String): WalkedTypePath =
@@ -33,7 +33,8 @@ case class WalkedTypePath(private val walkedPaths: Seq[String] = Nil) {
     newInstance(s"""- array element class: "$elementClassName"""")
 
   def recordMap(keyClassName: String, valueClassName: String): WalkedTypePath = {
-    newInstance(s"""- map key class: "$keyClassName"""" +
+    newInstance(
+      s"""- map key class: "$keyClassName"""" +
         s""", value class: "$valueClassName"""")
   }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala
index 9955f1b7bd301..913881f326c90 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/NonEmptyException.scala
@@ -20,20 +20,17 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.util.QuotingUtils.quoted
 
-
 /**
- * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception
- * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
+ * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception as an
+ * [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
 case class NonEmptyNamespaceException(
     namespace: Array[String],
     details: String,
     override val cause: Option[Throwable] = None)
-  extends AnalysisException(
-    errorClass = "_LEGACY_ERROR_TEMP_3103",
-    messageParameters = Map(
-      "namespace" -> quoted(namespace),
-      "details" -> details)) {
+    extends AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_3103",
+      messageParameters = Map("namespace" -> quoted(namespace), "details" -> details)) {
 
   def this(namespace: Array[String]) = this(namespace, "", None)
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/SqlApiAnalysis.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/SqlApiAnalysis.scala
index 9f5a5b8875b33..f218a12209d61 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/SqlApiAnalysis.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/SqlApiAnalysis.scala
@@ -17,9 +17,10 @@
 package org.apache.spark.sql.catalyst.analysis
 
 object SqlApiAnalysis {
+
   /**
-   * Resolver should return true if the first string refers to the same entity as the second string.
-   * For example, by using case insensitive equality.
+   * Resolver should return true if the first string refers to the same entity as the second
+   * string. For example, by using case insensitive equality.
    */
   type Resolver = (String, String) => Boolean
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/alreadyExistException.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/alreadyExistException.scala
index 8932a0296428f..0c667dd8916ee 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/alreadyExistException.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/alreadyExistException.scala
@@ -25,21 +25,21 @@ import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception
- * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
+ * Thrown by a catalog when an item already exists. The analyzer will rethrow the exception as an
+ * [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
 class DatabaseAlreadyExistsException(db: String)
-  extends NamespaceAlreadyExistsException(Array(db))
+    extends NamespaceAlreadyExistsException(Array(db))
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class NamespaceAlreadyExistsException private(
+class NamespaceAlreadyExistsException private (
     message: String,
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(errorClass: String, messageParameters: Map[String, String]) = {
     this(
@@ -49,24 +49,28 @@ class NamespaceAlreadyExistsException private(
   }
 
   def this(namespace: Array[String]) = {
-    this(errorClass = "SCHEMA_ALREADY_EXISTS",
+    this(
+      errorClass = "SCHEMA_ALREADY_EXISTS",
       Map("schemaName" -> quoteNameParts(namespace.toImmutableArraySeq)))
   }
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class TableAlreadyExistsException private(
+class TableAlreadyExistsException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
-  def this(errorClass: String, messageParameters: Map[String, String], cause: Option[Throwable]) = {
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) = {
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       cause,
@@ -75,47 +79,53 @@ class TableAlreadyExistsException private(
   }
 
   def this(db: String, table: String) = {
-    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
-      messageParameters = Map("relationName" ->
-        (quoteIdentifier(db) + "." + quoteIdentifier(table))),
+    this(
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map(
+        "relationName" ->
+          (quoteIdentifier(db) + "." + quoteIdentifier(table))),
       cause = None)
   }
 
   def this(table: String) = {
-    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
-      messageParameters = Map("relationName" ->
-        quoteNameParts(AttributeNameParser.parseAttributeName(table))),
+    this(
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      messageParameters = Map(
+        "relationName" ->
+          quoteNameParts(AttributeNameParser.parseAttributeName(table))),
       cause = None)
   }
 
   def this(table: Seq[String]) = {
-    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+    this(
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
       messageParameters = Map("relationName" -> quoteNameParts(table)),
       cause = None)
   }
 
   def this(tableIdent: Identifier) = {
-    this(errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+    this(
+      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
       messageParameters = Map("relationName" -> quoted(tableIdent)),
       cause = None)
   }
 }
 
-class TempTableAlreadyExistsException private(
+class TempTableAlreadyExistsException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(
-    errorClass: String,
-    messageParameters: Map[String, String],
-    cause: Option[Throwable] = None) = {
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable] = None) = {
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       cause,
@@ -126,27 +136,33 @@ class TempTableAlreadyExistsException private(
   def this(table: String) = {
     this(
       errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
-      messageParameters = Map("relationName"
-        -> quoteNameParts(AttributeNameParser.parseAttributeName(table))))
+      messageParameters = Map(
+        "relationName"
+          -> quoteNameParts(AttributeNameParser.parseAttributeName(table))))
   }
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
 class ViewAlreadyExistsException(errorClass: String, messageParameters: Map[String, String])
-  extends AnalysisException(errorClass, messageParameters) {
+    extends AnalysisException(errorClass, messageParameters) {
 
   def this(ident: Identifier) =
-    this(errorClass = "VIEW_ALREADY_EXISTS",
+    this(
+      errorClass = "VIEW_ALREADY_EXISTS",
       messageParameters = Map("relationName" -> quoted(ident)))
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
 class FunctionAlreadyExistsException(errorClass: String, messageParameters: Map[String, String])
-  extends AnalysisException(errorClass, messageParameters) {
+    extends AnalysisException(errorClass, messageParameters) {
 
   def this(function: Seq[String]) = {
-    this (errorClass = "ROUTINE_ALREADY_EXISTS",
-      Map("routineName" -> quoteNameParts(function)))
+    this(
+      errorClass = "ROUTINE_ALREADY_EXISTS",
+      Map(
+        "routineName" -> quoteNameParts(function),
+        "newRoutineType" -> "routine",
+        "existingRoutineType" -> "routine"))
   }
 
   def this(db: String, func: String) = {
@@ -155,16 +171,16 @@ class FunctionAlreadyExistsException(errorClass: String, messageParameters: Map[
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class IndexAlreadyExistsException private(
+class IndexAlreadyExistsException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(
       errorClass: String,
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/noSuchItemsExceptions.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/noSuchItemsExceptions.scala
index 8977d0be24d77..dbc7622c761e7 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/noSuchItemsExceptions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/analysis/noSuchItemsExceptions.scala
@@ -24,21 +24,24 @@ import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * Thrown by a catalog when an item cannot be found. The analyzer will rethrow the exception
- * as an [[org.apache.spark.sql.AnalysisException]] with the correct position information.
+ * Thrown by a catalog when an item cannot be found. The analyzer will rethrow the exception as an
+ * [[org.apache.spark.sql.AnalysisException]] with the correct position information.
  */
-class NoSuchDatabaseException private[analysis](
+class NoSuchDatabaseException private[analysis] (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
-  def this(errorClass: String, messageParameters: Map[String, String], cause: Option[Throwable]) = {
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) = {
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       cause = cause,
@@ -55,16 +58,16 @@ class NoSuchDatabaseException private[analysis](
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class NoSuchNamespaceException private(
+class NoSuchNamespaceException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends NoSuchDatabaseException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends NoSuchDatabaseException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(errorClass: String, messageParameters: Map[String, String]) = {
     this(
@@ -75,29 +78,32 @@ class NoSuchNamespaceException private(
   }
 
   def this(namespace: Seq[String]) = {
-    this(errorClass = "SCHEMA_NOT_FOUND",
-      Map("schemaName" -> quoteNameParts(namespace)))
+    this(errorClass = "SCHEMA_NOT_FOUND", Map("schemaName" -> quoteNameParts(namespace)))
   }
 
   def this(namespace: Array[String]) = {
-    this(errorClass = "SCHEMA_NOT_FOUND",
+    this(
+      errorClass = "SCHEMA_NOT_FOUND",
       Map("schemaName" -> quoteNameParts(namespace.toImmutableArraySeq)))
   }
 }
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class NoSuchTableException private(
+class NoSuchTableException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
-  def this(errorClass: String, messageParameters: Map[String, String], cause: Option[Throwable]) = {
+  def this(
+      errorClass: String,
+      messageParameters: Map[String, String],
+      cause: Option[Throwable]) = {
     this(
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       cause = cause,
@@ -108,12 +114,13 @@ class NoSuchTableException private(
   def this(db: String, table: String) = {
     this(
       errorClass = "TABLE_OR_VIEW_NOT_FOUND",
-      messageParameters = Map("relationName" ->
-        (quoteIdentifier(db) + "." + quoteIdentifier(table))),
+      messageParameters = Map(
+        "relationName" ->
+          (quoteIdentifier(db) + "." + quoteIdentifier(table))),
       cause = None)
   }
 
-  def this(name : Seq[String]) = {
+  def this(name: Seq[String]) = {
     this(
       errorClass = "TABLE_OR_VIEW_NOT_FOUND",
       messageParameters = Map("relationName" -> quoteNameParts(name)),
@@ -130,28 +137,28 @@ class NoSuchTableException private(
 
 // any changes to this class should be backward compatible as it may be used by external connectors
 class NoSuchViewException(errorClass: String, messageParameters: Map[String, String])
-  extends AnalysisException(errorClass, messageParameters) {
+    extends AnalysisException(errorClass, messageParameters) {
 
   def this(ident: Identifier) =
-    this(errorClass = "VIEW_NOT_FOUND",
-      messageParameters = Map("relationName" -> quoted(ident)))
+    this(errorClass = "VIEW_NOT_FOUND", messageParameters = Map("relationName" -> quoted(ident)))
 }
 
 class NoSuchPermanentFunctionException(db: String, func: String)
-  extends AnalysisException(errorClass = "ROUTINE_NOT_FOUND",
-    Map("routineName" -> (quoteIdentifier(db) + "." + quoteIdentifier(func))))
+    extends AnalysisException(
+      errorClass = "ROUTINE_NOT_FOUND",
+      Map("routineName" -> (quoteIdentifier(db) + "." + quoteIdentifier(func))))
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class NoSuchFunctionException private(
+class NoSuchFunctionException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(errorClass: String, messageParameters: Map[String, String]) = {
     this(
@@ -162,7 +169,8 @@ class NoSuchFunctionException private(
   }
 
   def this(db: String, func: String) = {
-    this(errorClass = "ROUTINE_NOT_FOUND",
+    this(
+      errorClass = "ROUTINE_NOT_FOUND",
       Map("routineName" -> (quoteIdentifier(db) + "." + quoteIdentifier(func))))
   }
 
@@ -172,19 +180,19 @@ class NoSuchFunctionException private(
 }
 
 class NoSuchTempFunctionException(func: String)
-  extends AnalysisException(errorClass = "ROUTINE_NOT_FOUND", Map("routineName" -> s"`$func`"))
+    extends AnalysisException(errorClass = "ROUTINE_NOT_FOUND", Map("routineName" -> s"`$func`"))
 
 // any changes to this class should be backward compatible as it may be used by external connectors
-class NoSuchIndexException private(
+class NoSuchIndexException private (
     message: String,
     cause: Option[Throwable],
     errorClass: Option[String],
     messageParameters: Map[String, String])
-  extends AnalysisException(
-    message,
-    cause = cause,
-    errorClass = errorClass,
-    messageParameters = messageParameters) {
+    extends AnalysisException(
+      message,
+      cause = cause,
+      errorClass = errorClass,
+      messageParameters = messageParameters) {
 
   def this(
       errorClass: String,
@@ -201,3 +209,14 @@ class NoSuchIndexException private(
     this("INDEX_NOT_FOUND", Map("indexName" -> indexName, "tableName" -> tableName), cause)
   }
 }
+
+class CannotReplaceMissingTableException(
+    tableIdentifier: Identifier,
+    cause: Option[Throwable] = None)
+    extends AnalysisException(
+      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      messageParameters = Map(
+        "relationName"
+          -> quoteNameParts(
+            (tableIdentifier.namespace :+ tableIdentifier.name).toImmutableArraySeq)),
+      cause = cause)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
index 9133abce88adc..9ae7de97abf58 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/AgnosticEncoder.scala
@@ -19,24 +19,23 @@ package org.apache.spark.sql.catalyst.encoders
 import java.{sql => jsql}
 import java.math.{BigDecimal => JBigDecimal, BigInteger => JBigInt}
 import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period}
-import java.util.concurrent.ConcurrentHashMap
 
 import scala.reflect.{classTag, ClassTag}
 
 import org.apache.spark.sql.{Encoder, Row}
+import org.apache.spark.sql.errors.ExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.{CalendarInterval, VariantVal}
 import org.apache.spark.util.SparkClassUtils
 
 /**
- * A non implementation specific encoder. This encoder containers all the information needed
- * to generate an implementation specific encoder (e.g. InternalRow <=> Custom Object).
+ * A non implementation specific encoder. This encoder containers all the information needed to
+ * generate an implementation specific encoder (e.g. InternalRow <=> Custom Object).
  *
  * The input of the serialization does not need to match the external type of the encoder. This is
- * called lenient serialization. An example of this is lenient date serialization, in this case both
- * [[java.sql.Date]] and [[java.time.LocalDate]] are allowed. Deserialization is never lenient; it
- * will always produce instance of the external type.
- *
+ * called lenient serialization. An example of this is lenient date serialization, in this case
+ * both [[java.sql.Date]] and [[java.time.LocalDate]] are allowed. Deserialization is never
+ * lenient; it will always produce instance of the external type.
  */
 trait AgnosticEncoder[T] extends Encoder[T] {
   def isPrimitive: Boolean
@@ -47,16 +46,29 @@ trait AgnosticEncoder[T] extends Encoder[T] {
   def isStruct: Boolean = false
 }
 
+/**
+ * Extract an [[AgnosticEncoder]] from an [[Encoder]].
+ */
+trait ToAgnosticEncoder[T] {
+  def encoder: AgnosticEncoder[T]
+}
+
 object AgnosticEncoders {
+  def agnosticEncoderFor[T: Encoder]: AgnosticEncoder[T] = implicitly[Encoder[T]] match {
+    case a: AgnosticEncoder[T] => a
+    case e: ToAgnosticEncoder[T @unchecked] => e.encoder
+    case other => throw ExecutionErrors.invalidAgnosticEncoderError(other)
+  }
+
   case class OptionEncoder[E](elementEncoder: AgnosticEncoder[E])
-    extends AgnosticEncoder[Option[E]] {
+      extends AgnosticEncoder[Option[E]] {
     override def isPrimitive: Boolean = false
     override def dataType: DataType = elementEncoder.dataType
     override val clsTag: ClassTag[Option[E]] = ClassTag(classOf[Option[E]])
   }
 
   case class ArrayEncoder[E](element: AgnosticEncoder[E], containsNull: Boolean)
-    extends AgnosticEncoder[Array[E]] {
+      extends AgnosticEncoder[Array[E]] {
     override def isPrimitive: Boolean = false
     override def dataType: DataType = ArrayType(element.dataType, containsNull)
     override val clsTag: ClassTag[Array[E]] = element.clsTag.wrap
@@ -73,7 +85,7 @@ object AgnosticEncoders {
       element: AgnosticEncoder[E],
       containsNull: Boolean,
       override val lenientSerialization: Boolean)
-    extends AgnosticEncoder[C] {
+      extends AgnosticEncoder[C] {
     override def isPrimitive: Boolean = false
     override val dataType: DataType = ArrayType(element.dataType, containsNull)
   }
@@ -83,12 +95,10 @@ object AgnosticEncoders {
       keyEncoder: AgnosticEncoder[K],
       valueEncoder: AgnosticEncoder[V],
       valueContainsNull: Boolean)
-    extends AgnosticEncoder[C] {
+      extends AgnosticEncoder[C] {
     override def isPrimitive: Boolean = false
-    override val dataType: DataType = MapType(
-      keyEncoder.dataType,
-      valueEncoder.dataType,
-      valueContainsNull)
+    override val dataType: DataType =
+      MapType(keyEncoder.dataType, valueEncoder.dataType, valueContainsNull)
   }
 
   case class EncoderField(
@@ -114,17 +124,30 @@ object AgnosticEncoders {
   case class ProductEncoder[K](
       override val clsTag: ClassTag[K],
       override val fields: Seq[EncoderField],
-      outerPointerGetter: Option[() => AnyRef]) extends StructEncoder[K]
+      outerPointerGetter: Option[() => AnyRef])
+      extends StructEncoder[K]
 
   object ProductEncoder {
-    val cachedCls = new ConcurrentHashMap[Int, Class[_]]
-    private[sql] def tuple(encoders: Seq[AgnosticEncoder[_]]): AgnosticEncoder[_] = {
-      val fields = encoders.zipWithIndex.map {
-        case (e, id) => EncoderField(s"_${id + 1}", e, e.nullable, Metadata.empty)
+    private val MAX_TUPLE_ELEMENTS = 22
+
+    private val tupleClassTags = Array.tabulate[ClassTag[Any]](MAX_TUPLE_ELEMENTS + 1) {
+      case 0 => null
+      case i => ClassTag(SparkClassUtils.classForName(s"scala.Tuple$i"))
+    }
+
+    private[sql] def tuple(
+        encoders: Seq[AgnosticEncoder[_]],
+        elementsCanBeNull: Boolean = false): AgnosticEncoder[_] = {
+      val numElements = encoders.size
+      if (numElements < 1) {
+        throw ExecutionErrors.emptyTupleNotSupportedError()
+      } else if (numElements > MAX_TUPLE_ELEMENTS) {
+        throw ExecutionErrors.elementsOfTupleExceedLimitError()
+      }
+      val fields = encoders.zipWithIndex.map { case (e, id) =>
+        EncoderField(s"_${id + 1}", e, e.nullable || elementsCanBeNull, Metadata.empty)
       }
-      val cls = cachedCls.computeIfAbsent(encoders.size,
-        _ => SparkClassUtils.getContextOrSparkClassLoader.loadClass(s"scala.Tuple${encoders.size}"))
-      ProductEncoder[Any](ClassTag(cls), fields, None)
+      ProductEncoder[Any](tupleClassTags(numElements), fields, None)
     }
 
     private[sql] def isTuple(tag: ClassTag[_]): Boolean = {
@@ -141,19 +164,19 @@ object AgnosticEncoders {
   object UnboundRowEncoder extends BaseRowEncoder {
     override val schema: StructType = new StructType()
     override val fields: Seq[EncoderField] = Seq.empty
-}
+  }
 
   case class JavaBeanEncoder[K](
       override val clsTag: ClassTag[K],
       override val fields: Seq[EncoderField])
-    extends StructEncoder[K]
+      extends StructEncoder[K]
 
   // This will only work for encoding from/to Sparks' InternalRow format.
   // It is here for compatibility.
   case class UDTEncoder[E >: Null](
       udt: UserDefinedType[E],
       udtClass: Class[_ <: UserDefinedType[_]])
-    extends AgnosticEncoder[E] {
+      extends AgnosticEncoder[E] {
     override def isPrimitive: Boolean = false
     override def dataType: DataType = udt
     override def clsTag: ClassTag[E] = ClassTag(udt.userClass)
@@ -164,21 +187,19 @@ object AgnosticEncoders {
     override def isPrimitive: Boolean = false
     override def dataType: DataType = StringType
   }
-  case class ScalaEnumEncoder[T, E](
-     parent: Class[T],
-     override val clsTag: ClassTag[E])
-    extends EnumEncoder[E]
+  case class ScalaEnumEncoder[T, E](parent: Class[T], override val clsTag: ClassTag[E])
+      extends EnumEncoder[E]
   case class JavaEnumEncoder[E](override val clsTag: ClassTag[E]) extends EnumEncoder[E]
 
-  protected abstract class LeafEncoder[E : ClassTag](override val dataType: DataType)
-    extends AgnosticEncoder[E] {
+  protected abstract class LeafEncoder[E: ClassTag](override val dataType: DataType)
+      extends AgnosticEncoder[E] {
     override val clsTag: ClassTag[E] = classTag[E]
     override val isPrimitive: Boolean = clsTag.runtimeClass.isPrimitive
   }
 
   // Primitive encoders
-  abstract class PrimitiveLeafEncoder[E : ClassTag](dataType: DataType)
-    extends LeafEncoder[E](dataType)
+  abstract class PrimitiveLeafEncoder[E: ClassTag](dataType: DataType)
+      extends LeafEncoder[E](dataType)
   case object PrimitiveBooleanEncoder extends PrimitiveLeafEncoder[Boolean](BooleanType)
   case object PrimitiveByteEncoder extends PrimitiveLeafEncoder[Byte](ByteType)
   case object PrimitiveShortEncoder extends PrimitiveLeafEncoder[Short](ShortType)
@@ -188,24 +209,24 @@ object AgnosticEncoders {
   case object PrimitiveDoubleEncoder extends PrimitiveLeafEncoder[Double](DoubleType)
 
   // Primitive wrapper encoders.
-  abstract class BoxedLeafEncoder[E : ClassTag, P](
+  abstract class BoxedLeafEncoder[E: ClassTag, P](
       dataType: DataType,
       val primitive: PrimitiveLeafEncoder[P])
-    extends LeafEncoder[E](dataType)
+      extends LeafEncoder[E](dataType)
   case object BoxedBooleanEncoder
-    extends BoxedLeafEncoder[java.lang.Boolean, Boolean](BooleanType, PrimitiveBooleanEncoder)
+      extends BoxedLeafEncoder[java.lang.Boolean, Boolean](BooleanType, PrimitiveBooleanEncoder)
   case object BoxedByteEncoder
-    extends BoxedLeafEncoder[java.lang.Byte, Byte](ByteType, PrimitiveByteEncoder)
+      extends BoxedLeafEncoder[java.lang.Byte, Byte](ByteType, PrimitiveByteEncoder)
   case object BoxedShortEncoder
-    extends BoxedLeafEncoder[java.lang.Short, Short](ShortType, PrimitiveShortEncoder)
+      extends BoxedLeafEncoder[java.lang.Short, Short](ShortType, PrimitiveShortEncoder)
   case object BoxedIntEncoder
-    extends BoxedLeafEncoder[java.lang.Integer, Int](IntegerType, PrimitiveIntEncoder)
+      extends BoxedLeafEncoder[java.lang.Integer, Int](IntegerType, PrimitiveIntEncoder)
   case object BoxedLongEncoder
-    extends BoxedLeafEncoder[java.lang.Long, Long](LongType, PrimitiveLongEncoder)
+      extends BoxedLeafEncoder[java.lang.Long, Long](LongType, PrimitiveLongEncoder)
   case object BoxedFloatEncoder
-    extends BoxedLeafEncoder[java.lang.Float, Float](FloatType, PrimitiveFloatEncoder)
+      extends BoxedLeafEncoder[java.lang.Float, Float](FloatType, PrimitiveFloatEncoder)
   case object BoxedDoubleEncoder
-    extends BoxedLeafEncoder[java.lang.Double, Double](DoubleType, PrimitiveDoubleEncoder)
+      extends BoxedLeafEncoder[java.lang.Double, Double](DoubleType, PrimitiveDoubleEncoder)
 
   // Nullable leaf encoders
   case object NullEncoder extends LeafEncoder[java.lang.Void](NullType)
@@ -218,19 +239,19 @@ object AgnosticEncoders {
   case object YearMonthIntervalEncoder extends LeafEncoder[Period](YearMonthIntervalType())
   case object VariantEncoder extends LeafEncoder[VariantVal](VariantType)
   case class DateEncoder(override val lenientSerialization: Boolean)
-    extends LeafEncoder[jsql.Date](DateType)
+      extends LeafEncoder[jsql.Date](DateType)
   case class LocalDateEncoder(override val lenientSerialization: Boolean)
-    extends LeafEncoder[LocalDate](DateType)
+      extends LeafEncoder[LocalDate](DateType)
   case class TimestampEncoder(override val lenientSerialization: Boolean)
-    extends LeafEncoder[jsql.Timestamp](TimestampType)
+      extends LeafEncoder[jsql.Timestamp](TimestampType)
   case class InstantEncoder(override val lenientSerialization: Boolean)
-    extends LeafEncoder[Instant](TimestampType)
+      extends LeafEncoder[Instant](TimestampType)
   case object LocalDateTimeEncoder extends LeafEncoder[LocalDateTime](TimestampNTZType)
 
   case class SparkDecimalEncoder(dt: DecimalType) extends LeafEncoder[Decimal](dt)
   case class ScalaDecimalEncoder(dt: DecimalType) extends LeafEncoder[BigDecimal](dt)
   case class JavaDecimalEncoder(dt: DecimalType, override val lenientSerialization: Boolean)
-    extends LeafEncoder[JBigDecimal](dt)
+      extends LeafEncoder[JBigDecimal](dt)
 
   val STRICT_DATE_ENCODER: DateEncoder = DateEncoder(lenientSerialization = false)
   val STRICT_LOCAL_DATE_ENCODER: LocalDateEncoder = LocalDateEncoder(lenientSerialization = false)
@@ -247,5 +268,21 @@ object AgnosticEncoders {
     ScalaDecimalEncoder(DecimalType.SYSTEM_DEFAULT)
   val DEFAULT_JAVA_DECIMAL_ENCODER: JavaDecimalEncoder =
     JavaDecimalEncoder(DecimalType.SYSTEM_DEFAULT, lenientSerialization = false)
-}
 
+  /**
+   * Encoder that transforms external data into a representation that can be further processed by
+   * another encoder. This is fallback for scenarios where objects can't be represented using
+   * standard encoders, an example of this is where we use a different (opaque) serialization
+   * format (i.e. java serialization, kryo serialization, or protobuf).
+   */
+  case class TransformingEncoder[I, O](
+      clsTag: ClassTag[I],
+      transformed: AgnosticEncoder[O],
+      codecProvider: () => Codec[_ >: I, O])
+      extends AgnosticEncoder[I] {
+    override def isPrimitive: Boolean = transformed.isPrimitive
+    override def dataType: DataType = transformed.dataType
+    override def schema: StructType = transformed.schema
+    override def isStruct: Boolean = transformed.isStruct
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
index 8587688956950..909556492847f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
@@ -48,10 +48,9 @@ object OuterScopes {
 
   /**
    * Adds a new outer scope to this context that can be used when instantiating an `inner class`
-   * during deserialization. Inner classes are created when a case class is defined in the
-   * Spark REPL and registering the outer scope that this class was defined in allows us to create
-   * new instances on the spark executors.  In normal use, users should not need to call this
-   * function.
+   * during deserialization. Inner classes are created when a case class is defined in the Spark
+   * REPL and registering the outer scope that this class was defined in allows us to create new
+   * instances on the spark executors. In normal use, users should not need to call this function.
    *
    * Warning: this function operates on the assumption that there is only ever one instance of any
    * given wrapper class.
@@ -65,7 +64,7 @@ object OuterScopes {
   }
 
   /**
-   * Returns a function which can get the outer scope for the given inner class.  By using function
+   * Returns a function which can get the outer scope for the given inner class. By using function
    * as return type, we can delay the process of getting outer pointer to execution time, which is
    * useful for inner class defined in REPL.
    */
@@ -134,8 +133,8 @@ object OuterScopes {
           }
         case _ => null
       }
-    } else {
-      () => outer
+    } else { () =>
+      outer
     }
   }
 
@@ -162,7 +161,7 @@ object OuterScopes {
  * dead entries after GC (using a [[ReferenceQueue]]).
  */
 private[catalyst] class HashableWeakReference(v: AnyRef, queue: ReferenceQueue[AnyRef])
-  extends WeakReference[AnyRef](v, queue) {
+    extends WeakReference[AnyRef](v, queue) {
   def this(v: AnyRef) = this(v, null)
   private[this] val hash = v.hashCode()
   override def hashCode(): Int = hash
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
index c507e952630f6..8b6da805a6e87 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/RowEncoder.scala
@@ -28,8 +28,8 @@ import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * A factory for constructing encoders that convert external row to/from the Spark SQL
- * internal binary representation.
+ * A factory for constructing encoders that convert external row to/from the Spark SQL internal
+ * binary representation.
  *
  * The following is a mapping between Spark SQL types and its allowed external types:
  * {{{
@@ -68,67 +68,65 @@ object RowEncoder extends DataTypeErrorsBase {
     encoderForDataType(schema, lenient).asInstanceOf[AgnosticEncoder[Row]]
   }
 
-  private[sql] def encoderForDataType(
-      dataType: DataType,
-      lenient: Boolean): AgnosticEncoder[_] = dataType match {
-    case NullType => NullEncoder
-    case BooleanType => BoxedBooleanEncoder
-    case ByteType => BoxedByteEncoder
-    case ShortType => BoxedShortEncoder
-    case IntegerType => BoxedIntEncoder
-    case LongType => BoxedLongEncoder
-    case FloatType => BoxedFloatEncoder
-    case DoubleType => BoxedDoubleEncoder
-    case dt: DecimalType => JavaDecimalEncoder(dt, lenientSerialization = true)
-    case BinaryType => BinaryEncoder
-    case _: StringType => StringEncoder
-    case TimestampType if SqlApiConf.get.datetimeJava8ApiEnabled => InstantEncoder(lenient)
-    case TimestampType => TimestampEncoder(lenient)
-    case TimestampNTZType => LocalDateTimeEncoder
-    case DateType if SqlApiConf.get.datetimeJava8ApiEnabled => LocalDateEncoder(lenient)
-    case DateType => DateEncoder(lenient)
-    case CalendarIntervalType => CalendarIntervalEncoder
-    case _: DayTimeIntervalType => DayTimeIntervalEncoder
-    case _: YearMonthIntervalType => YearMonthIntervalEncoder
-    case _: VariantType => VariantEncoder
-    case p: PythonUserDefinedType =>
-      // TODO check if this works.
-      encoderForDataType(p.sqlType, lenient)
-    case udt: UserDefinedType[_] =>
-      val annotation = udt.userClass.getAnnotation(classOf[SQLUserDefinedType])
-      val udtClass: Class[_] = if (annotation != null) {
-        annotation.udt()
-      } else {
-        UDTRegistration.getUDTFor(udt.userClass.getName).getOrElse {
-          throw ExecutionErrors.userDefinedTypeNotAnnotatedAndRegisteredError(udt)
+  private[sql] def encoderForDataType(dataType: DataType, lenient: Boolean): AgnosticEncoder[_] =
+    dataType match {
+      case NullType => NullEncoder
+      case BooleanType => BoxedBooleanEncoder
+      case ByteType => BoxedByteEncoder
+      case ShortType => BoxedShortEncoder
+      case IntegerType => BoxedIntEncoder
+      case LongType => BoxedLongEncoder
+      case FloatType => BoxedFloatEncoder
+      case DoubleType => BoxedDoubleEncoder
+      case dt: DecimalType => JavaDecimalEncoder(dt, lenientSerialization = true)
+      case BinaryType => BinaryEncoder
+      case _: StringType => StringEncoder
+      case TimestampType if SqlApiConf.get.datetimeJava8ApiEnabled => InstantEncoder(lenient)
+      case TimestampType => TimestampEncoder(lenient)
+      case TimestampNTZType => LocalDateTimeEncoder
+      case DateType if SqlApiConf.get.datetimeJava8ApiEnabled => LocalDateEncoder(lenient)
+      case DateType => DateEncoder(lenient)
+      case CalendarIntervalType => CalendarIntervalEncoder
+      case _: DayTimeIntervalType => DayTimeIntervalEncoder
+      case _: YearMonthIntervalType => YearMonthIntervalEncoder
+      case _: VariantType => VariantEncoder
+      case p: PythonUserDefinedType =>
+        // TODO check if this works.
+        encoderForDataType(p.sqlType, lenient)
+      case udt: UserDefinedType[_] =>
+        val annotation = udt.userClass.getAnnotation(classOf[SQLUserDefinedType])
+        val udtClass: Class[_] = if (annotation != null) {
+          annotation.udt()
+        } else {
+          UDTRegistration.getUDTFor(udt.userClass.getName).getOrElse {
+            throw ExecutionErrors.userDefinedTypeNotAnnotatedAndRegisteredError(udt)
+          }
         }
-      }
-      UDTEncoder(udt, udtClass.asInstanceOf[Class[_ <: UserDefinedType[_]]])
-    case ArrayType(elementType, containsNull) =>
-      IterableEncoder(
-        classTag[mutable.ArraySeq[_]],
-        encoderForDataType(elementType, lenient),
-        containsNull,
-        lenientSerialization = true)
-    case MapType(keyType, valueType, valueContainsNull) =>
-      MapEncoder(
-        classTag[scala.collection.Map[_, _]],
-        encoderForDataType(keyType, lenient),
-        encoderForDataType(valueType, lenient),
-        valueContainsNull)
-    case StructType(fields) =>
-      AgnosticRowEncoder(fields.map { field =>
-        EncoderField(
-          field.name,
-          encoderForDataType(field.dataType, lenient),
-          field.nullable,
-          field.metadata)
-      }.toImmutableArraySeq)
+        UDTEncoder(udt, udtClass.asInstanceOf[Class[_ <: UserDefinedType[_]]])
+      case ArrayType(elementType, containsNull) =>
+        IterableEncoder(
+          classTag[mutable.ArraySeq[_]],
+          encoderForDataType(elementType, lenient),
+          containsNull,
+          lenientSerialization = true)
+      case MapType(keyType, valueType, valueContainsNull) =>
+        MapEncoder(
+          classTag[scala.collection.Map[_, _]],
+          encoderForDataType(keyType, lenient),
+          encoderForDataType(valueType, lenient),
+          valueContainsNull)
+      case StructType(fields) =>
+        AgnosticRowEncoder(fields.map { field =>
+          EncoderField(
+            field.name,
+            encoderForDataType(field.dataType, lenient),
+            field.nullable,
+            field.metadata)
+        }.toImmutableArraySeq)
 
-    case _ =>
-      throw new AnalysisException(
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
-        messageParameters = Map("dataType" -> toSQLType(dataType))
-    )
-  }
+      case _ =>
+        throw new AnalysisException(
+          errorClass = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
+          messageParameters = Map("dataType" -> toSQLType(dataType)))
+    }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
new file mode 100644
index 0000000000000..0f21972552339
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/encoders/codecs.scala
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.encoders
+
+import java.lang.invoke.{MethodHandle, MethodHandles, MethodType}
+
+import org.apache.spark.sql.errors.ExecutionErrors
+import org.apache.spark.util.{SparkClassUtils, SparkSerDeUtils}
+
+/**
+ * Codec for doing conversions between two representations.
+ *
+ * @tparam I
+ *   input type (typically the external representation of the data.
+ * @tparam O
+ *   output type (typically the internal representation of the data.
+ */
+trait Codec[I, O] extends Serializable {
+  def encode(in: I): O
+  def decode(out: O): I
+}
+
+/**
+ * A codec that uses Java Serialization as its output format.
+ */
+class JavaSerializationCodec[I] extends Codec[I, Array[Byte]] {
+  override def encode(in: I): Array[Byte] = SparkSerDeUtils.serialize(in)
+  override def decode(out: Array[Byte]): I = SparkSerDeUtils.deserialize(out)
+}
+
+object JavaSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
+  override def apply(): Codec[Any, Array[Byte]] = new JavaSerializationCodec[Any]
+}
+
+/**
+ * A codec that uses Kryo to (de)serialize arbitrary objects to and from a byte array.
+ *
+ * Please note that this is currently only supported for Classic Spark applications. The reason
+ * for this is that Connect applications can have a significantly different classpath than the
+ * driver or executor. This makes having a the same Kryo configuration on both the client and
+ * server (driver & executors) very tricky. As a workaround a user can define their own Codec
+ * which internalizes the Kryo configuration.
+ */
+object KryoSerializationCodec extends (() => Codec[Any, Array[Byte]]) {
+  private lazy val kryoCodecConstructor: MethodHandle = {
+    val cls = SparkClassUtils.classForName(
+      "org.apache.spark.sql.catalyst.encoders.KryoSerializationCodecImpl")
+    MethodHandles.lookup().findConstructor(cls, MethodType.methodType(classOf[Unit]))
+  }
+
+  override def apply(): Codec[Any, Array[Byte]] = {
+    try {
+      kryoCodecConstructor.invoke().asInstanceOf[Codec[Any, Array[Byte]]]
+    } catch {
+      case _: ClassNotFoundException =>
+        throw ExecutionErrors.cannotUseKryoSerialization()
+    }
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
index 385e0f00695a3..76442accbd35b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/OrderUtils.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.types.{ArrayType, AtomicType, DataType, NullType, StructType, UserDefinedType, VariantType}
 
 object OrderUtils {
+
   /**
    * Returns true iff the data type can be ordered (i.e. can be sorted).
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
index 7f21ab25ad4e5..6977d9f3185ac 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/expressions/rows.scala
@@ -21,11 +21,12 @@ import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * A row implementation that uses an array of objects as the underlying storage.  Note that, while
+ * A row implementation that uses an array of objects as the underlying storage. Note that, while
  * the array is not copied, and thus could technically be mutated after creation, this is not
  * allowed.
  */
 class GenericRow(protected[sql] val values: Array[Any]) extends Row {
+
   /** No-arg constructor for serialization. */
   protected def this() = this(null)
 
@@ -41,7 +42,7 @@ class GenericRow(protected[sql] val values: Array[Any]) extends Row {
 }
 
 class GenericRowWithSchema(values: Array[Any], override val schema: StructType)
-  extends GenericRow(values) {
+    extends GenericRow(values) {
 
   /** No-arg constructor for serialization. */
   protected def this() = this(null, null)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
index 38ecd29266db7..c2cb4a7154076 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeAstBuilder.scala
@@ -23,9 +23,11 @@ import scala.jdk.CollectionConverters._
 import org.antlr.v4.runtime.Token
 import org.antlr.v4.runtime.tree.ParseTree
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.catalyst.util.SparkParserUtils.{string, withOrigin}
+import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
 import org.apache.spark.sql.errors.QueryParsingErrors
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, MetadataBuilder, NullType, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, VariantType, YearMonthIntervalType}
@@ -74,7 +76,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
       case (TIMESTAMP_LTZ, Nil) => TimestampType
       case (STRING, Nil) =>
         typeCtx.children.asScala.toSeq match {
-          case Seq(_) => SqlApiConf.get.defaultStringType
+          case Seq(_) => StringType
           case Seq(_, ctx: CollateClauseContext) =>
             val collationName = visitCollateClause(ctx)
             val collationId = CollationFactory.collationNameToId(collationName)
@@ -120,7 +122,7 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
   override def visitDayTimeIntervalDataType(ctx: DayTimeIntervalDataTypeContext): DataType = {
     val startStr = ctx.from.getText.toLowerCase(Locale.ROOT)
     val start = DayTimeIntervalType.stringToField(startStr)
-    if (ctx.to != null ) {
+    if (ctx.to != null) {
       val endStr = ctx.to.getText.toLowerCase(Locale.ROOT)
       val end = DayTimeIntervalType.stringToField(endStr)
       if (end <= start) {
@@ -220,4 +222,59 @@ class DataTypeAstBuilder extends SqlBaseParserBaseVisitor[AnyRef] {
   override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
     ctx.identifier.getText
   }
+
+  /**
+   * Parse and verify IDENTITY column definition.
+   *
+   * @param ctx
+   *   The parser context.
+   * @param dataType
+   *   The data type of column defined as IDENTITY column. Used for verification.
+   * @return
+   *   Tuple containing start, step and allowExplicitInsert.
+   */
+  protected def visitIdentityColumn(
+      ctx: IdentityColumnContext,
+      dataType: DataType): IdentityColumnSpec = {
+    if (dataType != LongType && dataType != IntegerType) {
+      throw QueryParsingErrors.identityColumnUnsupportedDataType(ctx, dataType.toString)
+    }
+    // We support two flavors of syntax:
+    // (1) GENERATED ALWAYS AS IDENTITY (...)
+    // (2) GENERATED BY DEFAULT AS IDENTITY (...)
+    // (1) forbids explicit inserts, while (2) allows.
+    val allowExplicitInsert = ctx.BY() != null && ctx.DEFAULT() != null
+    val (start, step) = visitIdentityColSpec(ctx.identityColSpec())
+
+    new IdentityColumnSpec(start, step, allowExplicitInsert)
+  }
+
+  override def visitIdentityColSpec(ctx: IdentityColSpecContext): (Long, Long) = {
+    val defaultStart = 1
+    val defaultStep = 1
+    if (ctx == null) {
+      return (defaultStart, defaultStep)
+    }
+    var (start, step): (Option[Long], Option[Long]) = (None, None)
+    ctx.sequenceGeneratorOption().asScala.foreach { option =>
+      if (option.start != null) {
+        if (start.isDefined) {
+          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "START")
+        }
+        start = Some(option.start.getText.toLong)
+      } else if (option.step != null) {
+        if (step.isDefined) {
+          throw QueryParsingErrors.identityColumnDuplicatedSequenceGeneratorOption(ctx, "STEP")
+        }
+        step = Some(option.step.getText.toLong)
+        if (step.get == 0L) {
+          throw QueryParsingErrors.identityColumnIllegalStep(ctx)
+        }
+      } else {
+        throw SparkException
+          .internalError(s"Invalid identity column sequence generator option: ${option.getText}")
+      }
+    }
+    (start.getOrElse(defaultStart), step.getOrElse(defaultStep))
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserInterface.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserInterface.scala
index ab665f360b0a6..1a1a9b01de3b1 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserInterface.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserInterface.scala
@@ -22,9 +22,10 @@ import org.apache.spark.sql.types.{DataType, StructType}
  * Interface for [[DataType]] parsing functionality.
  */
 trait DataTypeParserInterface {
+
   /**
-   * Parse a string to a [[StructType]]. The passed SQL string should be a comma separated list
-   * of field definitions which will preserve the correct Hive metadata.
+   * Parse a string to a [[StructType]]. The passed SQL string should be a comma separated list of
+   * field definitions which will preserve the correct Hive metadata.
    */
   @throws[ParseException]("Text cannot be parsed to a schema")
   def parseTableSchema(sqlText: String): StructType
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
index 8ac5939bca944..7d1986e727f79 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/LegacyTypeStringParser.scala
@@ -23,35 +23,36 @@ import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.sql.types._
 
 /**
- * Parser that turns case class strings into datatypes. This is only here to maintain compatibility
- * with Parquet files written by Spark 1.1 and below.
+ * Parser that turns case class strings into datatypes. This is only here to maintain
+ * compatibility with Parquet files written by Spark 1.1 and below.
  */
 object LegacyTypeStringParser extends RegexParsers {
 
   protected lazy val primitiveType: Parser[DataType] =
-    ( "StringType" ^^^ StringType
-      | "FloatType" ^^^ FloatType
-      | "IntegerType" ^^^ IntegerType
-      | "ByteType" ^^^ ByteType
-      | "ShortType" ^^^ ShortType
-      | "DoubleType" ^^^ DoubleType
-      | "LongType" ^^^ LongType
-      | "BinaryType" ^^^ BinaryType
-      | "BooleanType" ^^^ BooleanType
-      | "DateType" ^^^ DateType
-      | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
-      | fixedDecimalType
-      | "TimestampType" ^^^ TimestampType
-      )
+    (
+      "StringType" ^^^ StringType
+        | "FloatType" ^^^ FloatType
+        | "IntegerType" ^^^ IntegerType
+        | "ByteType" ^^^ ByteType
+        | "ShortType" ^^^ ShortType
+        | "DoubleType" ^^^ DoubleType
+        | "LongType" ^^^ LongType
+        | "BinaryType" ^^^ BinaryType
+        | "BooleanType" ^^^ BooleanType
+        | "DateType" ^^^ DateType
+        | "DecimalType()" ^^^ DecimalType.USER_DEFAULT
+        | fixedDecimalType
+        | "TimestampType" ^^^ TimestampType
+    )
 
   protected lazy val fixedDecimalType: Parser[DataType] =
-    ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ {
-      case precision ~ scale => DecimalType(precision.toInt, scale.toInt)
+    ("DecimalType(" ~> "[0-9]+".r) ~ ("," ~> "[0-9]+".r <~ ")") ^^ { case precision ~ scale =>
+      DecimalType(precision.toInt, scale.toInt)
     }
 
   protected lazy val arrayType: Parser[DataType] =
-    "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ {
-      case tpe ~ _ ~ containsNull => ArrayType(tpe, containsNull)
+    "ArrayType" ~> "(" ~> dataType ~ "," ~ boolVal <~ ")" ^^ { case tpe ~ _ ~ containsNull =>
+      ArrayType(tpe, containsNull)
     }
 
   protected lazy val mapType: Parser[DataType] =
@@ -66,21 +67,23 @@ object LegacyTypeStringParser extends RegexParsers {
     }
 
   protected lazy val boolVal: Parser[Boolean] =
-    ( "true" ^^^ true
-      | "false" ^^^ false
-      )
+    (
+      "true" ^^^ true
+        | "false" ^^^ false
+    )
 
   protected lazy val structType: Parser[DataType] =
-    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ {
-      case fields => StructType(fields)
+    "StructType\\([A-zA-z]*\\(".r ~> repsep(structField, ",") <~ "))" ^^ { case fields =>
+      StructType(fields)
     }
 
   protected lazy val dataType: Parser[DataType] =
-    ( arrayType
-      | mapType
-      | structType
-      | primitiveType
-      )
+    (
+      arrayType
+        | mapType
+        | structType
+        | primitiveType
+    )
 
   /**
    * Parses a string representation of a DataType.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
index 99e63d783838f..461d79ec22cf0 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/SparkParserErrorStrategy.scala
@@ -32,7 +32,7 @@ class SparkRecognitionException(
     ctx: ParserRuleContext,
     val errorClass: Option[String] = None,
     val messageParameters: Map[String, String] = Map.empty)
-  extends RecognitionException(message, recognizer, input, ctx) {
+    extends RecognitionException(message, recognizer, input, ctx) {
 
   /** Construct from a given [[RecognitionException]], with additional error information. */
   def this(
@@ -50,7 +50,7 @@ class SparkRecognitionException(
       Some(errorClass),
       messageParameters)
 
-  /** Construct with pure errorClass and messageParameter information.  */
+  /** Construct with pure errorClass and messageParameter information. */
   def this(errorClass: String, messageParameters: Map[String, String]) =
     this("", null, null, null, Some(errorClass), messageParameters)
 }
@@ -59,12 +59,12 @@ class SparkRecognitionException(
  * A [[SparkParserErrorStrategy]] extends the [[DefaultErrorStrategy]], that does special handling
  * on errors.
  *
- * The intention of this class is to provide more information of these errors encountered in
- * ANTLR parser to the downstream consumers, to be able to apply the [[SparkThrowable]] error
- * message framework to these exceptions.
+ * The intention of this class is to provide more information of these errors encountered in ANTLR
+ * parser to the downstream consumers, to be able to apply the [[SparkThrowable]] error message
+ * framework to these exceptions.
  */
 class SparkParserErrorStrategy() extends DefaultErrorStrategy {
-  private val userWordDict : Map[String, String] = Map("'<EOF>'" -> "end of input")
+  private val userWordDict: Map[String, String] = Map("'<EOF>'" -> "end of input")
 
   /** Get the user-facing display of the error token. */
   override def getTokenErrorDisplay(t: Token): String = {
@@ -76,9 +76,7 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
     val exceptionWithErrorClass = new SparkRecognitionException(
       e,
       "PARSE_SYNTAX_ERROR",
-      messageParameters = Map(
-        "error" -> getTokenErrorDisplay(e.getOffendingToken),
-        "hint" -> ""))
+      messageParameters = Map("error" -> getTokenErrorDisplay(e.getOffendingToken), "hint" -> ""))
     recognizer.notifyErrorListeners(e.getOffendingToken, "", exceptionWithErrorClass)
   }
 
@@ -116,18 +114,17 @@ class SparkParserErrorStrategy() extends DefaultErrorStrategy {
 
 /**
  * Inspired by [[org.antlr.v4.runtime.BailErrorStrategy]], which is used in two-stage parsing:
- * This error strategy allows the first stage of two-stage parsing to immediately terminate
- * if an error is encountered, and immediately fall back to the second stage. In addition to
- * avoiding wasted work by attempting to recover from errors here, the empty implementation
- * of sync improves the performance of the first stage.
+ * This error strategy allows the first stage of two-stage parsing to immediately terminate if an
+ * error is encountered, and immediately fall back to the second stage. In addition to avoiding
+ * wasted work by attempting to recover from errors here, the empty implementation of sync
+ * improves the performance of the first stage.
  */
 class SparkParserBailErrorStrategy() extends SparkParserErrorStrategy {
 
   /**
-   * Instead of recovering from exception e, re-throw it wrapped
-   * in a [[ParseCancellationException]] so it is not caught by the
-   * rule function catches.  Use [[Exception#getCause]] to get the
-   * original [[RecognitionException]].
+   * Instead of recovering from exception e, re-throw it wrapped in a
+   * [[ParseCancellationException]] so it is not caught by the rule function catches. Use
+   * [[Exception#getCause]] to get the original [[RecognitionException]].
    */
   override def recover(recognizer: Parser, e: RecognitionException): Unit = {
     var context = recognizer.getContext
@@ -139,8 +136,8 @@ class SparkParserBailErrorStrategy() extends SparkParserErrorStrategy {
   }
 
   /**
-   * Make sure we don't attempt to recover inline; if the parser
-   * successfully recovers, it won't throw an exception.
+   * Make sure we don't attempt to recover inline; if the parser successfully recovers, it won't
+   * throw an exception.
    */
   @throws[RecognitionException]
   override def recoverInline(recognizer: Parser): Token = {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
index 0b9e6ea021be1..54af195847dac 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/parser/parsers.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.types.{DataType, StructType}
  * Base SQL parsing infrastructure.
  */
 abstract class AbstractParser extends DataTypeParserInterface with Logging {
+
   /** Creates/Resolves DataType for a given SQL string. */
   override def parseDataType(sqlText: String): DataType = parse(sqlText) { parser =>
     astBuilder.visitSingleDataType(parser.singleDataType())
@@ -78,8 +79,7 @@ abstract class AbstractParser extends DataTypeParserInterface with Logging {
         parser.setErrorHandler(new SparkParserBailErrorStrategy())
         parser.getInterpreter.setPredictionMode(PredictionMode.SLL)
         toResult(parser)
-      }
-      catch {
+      } catch {
         case e: ParseCancellationException =>
           // if we fail, parse with LL mode w/ SparkParserErrorStrategy
           tokenStream.seek(0) // rewind input stream
@@ -90,8 +90,7 @@ abstract class AbstractParser extends DataTypeParserInterface with Logging {
           parser.getInterpreter.setPredictionMode(PredictionMode.LL)
           toResult(parser)
       }
-    }
-    catch {
+    } catch {
       case e: ParseException if e.command.isDefined =>
         throw e
       case e: ParseException =>
@@ -101,7 +100,7 @@ abstract class AbstractParser extends DataTypeParserInterface with Logging {
           command = Option(command),
           start = e.origin,
           stop = e.origin,
-          errorClass = e.getErrorClass,
+          errorClass = e.getCondition,
           messageParameters = e.getMessageParameters.asScala.toMap,
           queryContext = e.getQueryContext)
     }
@@ -187,7 +186,7 @@ case object ParseErrorListener extends BaseErrorListener {
  * A [[ParseException]] is an [[SparkException]] that is thrown during the parse process. It
  * contains fields and an extended error message that make reporting and diagnosing errors easier.
  */
-class ParseException private(
+class ParseException private (
     val command: Option[String],
     message: String,
     val start: Origin,
@@ -195,17 +194,18 @@ class ParseException private(
     errorClass: Option[String] = None,
     messageParameters: Map[String, String] = Map.empty,
     queryContext: Array[QueryContext] = ParseException.getQueryContext())
-  extends AnalysisException(
-    message,
-    start.line,
-    start.startPosition,
-    None,
-    errorClass,
-    messageParameters,
-    queryContext) {
+    extends AnalysisException(
+      message,
+      start.line,
+      start.startPosition,
+      None,
+      errorClass,
+      messageParameters,
+      queryContext) {
 
   def this(errorClass: String, messageParameters: Map[String, String], ctx: ParserRuleContext) =
-    this(Option(SparkParserUtils.command(ctx)),
+    this(
+      Option(SparkParserUtils.command(ctx)),
       SparkThrowableHelper.getMessage(errorClass, messageParameters),
       SparkParserUtils.position(ctx.getStart),
       SparkParserUtils.position(ctx.getStop),
@@ -275,7 +275,7 @@ class ParseException private(
   }
 
   def withCommand(cmd: String): ParseException = {
-    val cl = getErrorClass
+    val cl = getCondition
     val (newCl, params) = if (cl == "PARSE_SYNTAX_ERROR" && cmd.trim().isEmpty) {
       // PARSE_EMPTY_STATEMENT error class overrides the PARSE_SYNTAX_ERROR when cmd is empty
       ("PARSE_EMPTY_STATEMENT", Map.empty[String, String])
@@ -287,7 +287,7 @@ class ParseException private(
 
   override def getQueryContext: Array[QueryContext] = queryContext
 
-  override def getErrorClass: String = errorClass.getOrElse {
+  override def getCondition: String = errorClass.getOrElse {
     throw SparkException.internalError("ParseException shall have an error class.")
   }
 }
@@ -310,14 +310,16 @@ case object PostProcessor extends SqlBaseParserBaseListener {
     throw QueryParsingErrors.invalidIdentifierError(ident, ctx)
   }
 
-  /** Throws error message when unquoted identifier contains characters outside a-z, A-Z, 0-9, _ */
+  /**
+   * Throws error message when unquoted identifier contains characters outside a-z, A-Z, 0-9, _
+   */
   override def exitUnquotedIdentifier(ctx: SqlBaseParser.UnquotedIdentifierContext): Unit = {
     val ident = ctx.getText
     if (ident.exists(c =>
-      !(c >= 'a' && c <= 'z') &&
-      !(c >= 'A' && c <= 'Z') &&
-      !(c >= '0' && c <= '9') &&
-      c != '_')) {
+        !(c >= 'a' && c <= 'z') &&
+          !(c >= 'A' && c <= 'Z') &&
+          !(c >= '0' && c <= '9') &&
+          c != '_')) {
       throw QueryParsingErrors.invalidIdentifierError(ident, ctx)
     }
   }
@@ -353,9 +355,7 @@ case object PostProcessor extends SqlBaseParserBaseListener {
     replaceTokenByIdentifier(ctx, 0)(identity)
   }
 
-  private def replaceTokenByIdentifier(
-      ctx: ParserRuleContext,
-      stripMargins: Int)(
+  private def replaceTokenByIdentifier(ctx: ParserRuleContext, stripMargins: Int)(
       f: CommonToken => CommonToken = identity): Unit = {
     val parent = ctx.getParent
     parent.removeLastChild()
@@ -373,8 +373,8 @@ case object PostProcessor extends SqlBaseParserBaseListener {
 /**
  * The post-processor checks the unclosed bracketed comment.
  */
-case class UnclosedCommentProcessor(
-    command: String, tokenStream: CommonTokenStream) extends SqlBaseParserBaseListener {
+case class UnclosedCommentProcessor(command: String, tokenStream: CommonTokenStream)
+    extends SqlBaseParserBaseListener {
 
   override def exitSingleDataType(ctx: SqlBaseParser.SingleDataTypeContext): Unit = {
     checkUnclosedComment(tokenStream, command)
@@ -384,7 +384,8 @@ case class UnclosedCommentProcessor(
     checkUnclosedComment(tokenStream, command)
   }
 
-  override def exitSingleTableIdentifier(ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = {
+  override def exitSingleTableIdentifier(
+      ctx: SqlBaseParser.SingleTableIdentifierContext): Unit = {
     checkUnclosedComment(tokenStream, command)
   }
 
@@ -414,6 +415,20 @@ case class UnclosedCommentProcessor(
     }
   }
 
+  override def exitCompoundOrSingleStatement(
+      ctx: SqlBaseParser.CompoundOrSingleStatementContext): Unit = {
+    // Same as in exitSingleStatement, we shouldn't parse the comments in SET command.
+    if (Option(ctx.singleStatement()).forall(
+        !_.setResetStatement().isInstanceOf[SqlBaseParser.SetConfigurationContext])) {
+      checkUnclosedComment(tokenStream, command)
+    }
+  }
+
+  override def exitSingleCompoundStatement(
+      ctx: SqlBaseParser.SingleCompoundStatementContext): Unit = {
+    checkUnclosedComment(tokenStream, command)
+  }
+
   /** check `has_unclosed_bracketed_comment` to find out the unclosed bracketed comment. */
   private def checkUnclosedComment(tokenStream: CommonTokenStream, command: String) = {
     assert(tokenStream.getTokenSource.isInstanceOf[SqlBaseLexer])
@@ -422,7 +437,8 @@ case class UnclosedCommentProcessor(
       // The last token is 'EOF' and the penultimate is unclosed bracketed comment
       val failedToken = tokenStream.get(tokenStream.size() - 2)
       assert(failedToken.getType() == SqlBaseParser.BRACKETED_COMMENT)
-      val position = Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
+      val position =
+        Origin(Option(failedToken.getLine), Option(failedToken.getCharPositionInLine))
       throw QueryParsingErrors.unclosedBracketedCommentError(
         command = command,
         start = Origin(Option(failedToken.getStartIndex)),
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeMode.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeMode.scala
index b6248e97aa3da..e870a83ec4ae6 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeMode.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/TimeMode.scala
@@ -16,6 +16,9 @@
  */
 package org.apache.spark.sql.catalyst.plans.logical
 
+import java.util.Locale
+
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.streaming.TimeMode
 
 /** TimeMode types used in transformWithState operator */
@@ -24,3 +27,20 @@ case object NoTime extends TimeMode
 case object ProcessingTime extends TimeMode
 
 case object EventTime extends TimeMode
+
+object TimeModes {
+  def apply(timeMode: String): TimeMode = {
+    timeMode.toLowerCase(Locale.ROOT) match {
+      case "none" =>
+        NoTime
+      case "processingtime" =>
+        ProcessingTime
+      case "eventtime" =>
+        EventTime
+      case _ =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "STATEFUL_PROCESSOR_UNKNOWN_TIME_MODE",
+          messageParameters = Map("timeMode" -> timeMode))
+    }
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
index bac64689053ea..8c54758ebc0bc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModes.scala
@@ -28,27 +28,26 @@ import org.apache.spark.sql.streaming.OutputMode
 private[sql] object InternalOutputModes {
 
   /**
-   * OutputMode in which only the new rows in the streaming DataFrame/Dataset will be
-   * written to the sink. This output mode can be only be used in queries that do not
-   * contain any aggregation.
+   * OutputMode in which only the new rows in the streaming DataFrame/Dataset will be written to
+   * the sink. This output mode can be only be used in queries that do not contain any
+   * aggregation.
    */
   case object Append extends OutputMode
 
   /**
-   * OutputMode in which all the rows in the streaming DataFrame/Dataset will be written
-   * to the sink every time these is some updates. This output mode can only be used in queries
-   * that contain aggregations.
+   * OutputMode in which all the rows in the streaming DataFrame/Dataset will be written to the
+   * sink every time these is some updates. This output mode can only be used in queries that
+   * contain aggregations.
    */
   case object Complete extends OutputMode
 
   /**
-   * OutputMode in which only the rows in the streaming DataFrame/Dataset that were updated will be
-   * written to the sink every time these is some updates. If the query doesn't contain
+   * OutputMode in which only the rows in the streaming DataFrame/Dataset that were updated will
+   * be written to the sink every time these is some updates. If the query doesn't contain
    * aggregations, it will be equivalent to `Append` mode.
    */
   case object Update extends OutputMode
 
-
   def apply(outputMode: String): OutputMode = {
     outputMode.toLowerCase(Locale.ROOT) match {
       case "append" =>
@@ -57,8 +56,9 @@ private[sql] object InternalOutputModes {
         OutputMode.Complete
       case "update" =>
         OutputMode.Update
-      case _ => throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3261",
+      case _ =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "STREAMING_OUTPUT_MODE.INVALID",
           messageParameters = Map("outputMode" -> outputMode))
     }
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
index 2b3f4674539e3..d5be65a2f36cf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/QueryContexts.scala
@@ -27,7 +27,8 @@ case class SQLQueryContext(
     originStopIndex: Option[Int],
     sqlText: Option[String],
     originObjectType: Option[String],
-    originObjectName: Option[String]) extends QueryContext {
+    originObjectName: Option[String])
+    extends QueryContext {
   override val contextType = QueryContextType.SQL
 
   override val objectType = originObjectType.getOrElse("")
@@ -37,9 +38,8 @@ case class SQLQueryContext(
 
   /**
    * The SQL query context of current node. For example:
-   * == SQL of VIEW v1 (line 1, position 25) ==
-   * SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i
-   *                          ^^^^^^^^^^^^^^^
+   * ==SQL of VIEW v1 (line 1, position 25)==
+   * SELECT '' AS five, i.f1, i.f1 - int('2') AS x FROM INT4_TBL i ^^^^^^^^^^^^^^^
    */
   override lazy val summary: String = {
     // If the query context is missing or incorrect, simply return an empty string.
@@ -127,8 +127,8 @@ case class SQLQueryContext(
 
   def isValid: Boolean = {
     sqlText.isDefined && originStartIndex.isDefined && originStopIndex.isDefined &&
-      originStartIndex.get >= 0 && originStopIndex.get < sqlText.get.length &&
-      originStartIndex.get <= originStopIndex.get
+    originStartIndex.get >= 0 && originStopIndex.get < sqlText.get.length &&
+    originStartIndex.get <= originStopIndex.get
   }
 
   override def callSite: String = throw SparkUnsupportedOperationException()
@@ -136,7 +136,8 @@ case class SQLQueryContext(
 
 case class DataFrameQueryContext(
     stackTrace: Seq[StackTraceElement],
-    pysparkErrorContext: Option[(String, String)]) extends QueryContext {
+    pysparkErrorContext: Option[(String, String)])
+    extends QueryContext {
   override val contextType = QueryContextType.DataFrame
 
   override def objectType: String = throw SparkUnsupportedOperationException()
@@ -146,19 +147,21 @@ case class DataFrameQueryContext(
 
   override val fragment: String = {
     pysparkErrorContext.map(_._1).getOrElse {
-      stackTrace.headOption.map { firstElem =>
-        val methodName = firstElem.getMethodName
-        if (methodName.length > 1 && methodName(0) == '$') {
-          methodName.substring(1)
-        } else {
-          methodName
+      stackTrace.headOption
+        .map { firstElem =>
+          val methodName = firstElem.getMethodName
+          if (methodName.length > 1 && methodName(0) == '$') {
+            methodName.substring(1)
+          } else {
+            methodName
+          }
         }
-      }.getOrElse("")
+        .getOrElse("")
     }
   }
 
-  override val callSite: String = pysparkErrorContext.map(
-    _._2).getOrElse(stackTrace.tail.mkString("\n"))
+  override val callSite: String =
+    pysparkErrorContext.map(_._2).getOrElse(stackTrace.tail.mkString("\n"))
 
   override lazy val summary: String = {
     val builder = new StringBuilder
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
index 4ecbfd631e7e8..563554d506c4a 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/trees/origin.scala
@@ -16,13 +16,16 @@
  */
 package org.apache.spark.sql.catalyst.trees
 
+import java.util.regex.Pattern
+
 import org.apache.spark.QueryContext
+import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * Contexts of TreeNodes, including location, SQL text, object type and object name.
- * The only supported object type is "VIEW" now. In the future, we may support SQL UDF or other
- * objects which contain SQL text.
+ * Contexts of TreeNodes, including location, SQL text, object type and object name. The only
+ * supported object type is "VIEW" now. In the future, we may support SQL UDF or other objects
+ * which contain SQL text.
  */
 case class Origin(
     line: Option[Int] = None,
@@ -38,8 +41,7 @@ case class Origin(
   lazy val context: QueryContext = if (stackTrace.isDefined) {
     DataFrameQueryContext(stackTrace.get.toImmutableArraySeq, pysparkErrorContext)
   } else {
-    SQLQueryContext(
-      line, startPosition, startIndex, stopIndex, sqlText, objectType, objectName)
+    SQLQueryContext(line, startPosition, startIndex, stopIndex, sqlText, objectType, objectName)
   }
 
   def getQueryContext: Array[QueryContext] = {
@@ -58,7 +60,7 @@ trait WithOrigin {
 }
 
 /**
- * Provides a location for TreeNodes to ask about the context of their origin.  For example, which
+ * Provides a location for TreeNodes to ask about the context of their origin. For example, which
  * line of code is currently being parsed.
  */
 object CurrentOrigin {
@@ -72,8 +74,7 @@ object CurrentOrigin {
   def reset(): Unit = value.set(Origin())
 
   def setPosition(line: Int, start: Int): Unit = {
-    value.set(
-      value.get.copy(line = Some(line), startPosition = Some(start)))
+    value.set(value.get.copy(line = Some(line), startPosition = Some(start)))
   }
 
   def withOrigin[A](o: Origin)(f: => A): A = {
@@ -81,9 +82,60 @@ object CurrentOrigin {
     // this way withOrigin can be recursive
     val previous = get
     set(o)
-    val ret = try f finally { set(previous) }
+    val ret =
+      try f
+      finally { set(previous) }
     ret
   }
+
+  /**
+   * This helper function captures the Spark API and its call site in the user code from the
+   * current stacktrace.
+   *
+   * As adding `withOrigin` explicitly to all Spark API definition would be a huge change,
+   * `withOrigin` is used only at certain places where all API implementation surely pass through
+   * and the current stacktrace is filtered to the point where first Spark API code is invoked
+   * from the user code.
+   *
+   * As there might be multiple nested `withOrigin` calls (e.g. any Spark API implementations can
+   * invoke other APIs) only the first `withOrigin` is captured because that is closer to the user
+   * code.
+   *
+   * `withOrigin` has non-trivial performance overhead, since it collects a stack trace. This
+   * feature can be disabled by setting "spark.sql.dataFrameQueryContext.enabled" to "false".
+   *
+   * @param f
+   *   The function that can use the origin.
+   * @return
+   *   The result of `f`.
+   */
+  private[sql] def withOrigin[T](f: => T): T = {
+    if (CurrentOrigin.get.stackTrace.isDefined || !SqlApiConf.get.dataFrameQueryContextEnabled) {
+      f
+    } else {
+      val st = Thread.currentThread().getStackTrace
+      var i = 0
+      // Find the beginning of Spark code traces
+      while (i < st.length && !sparkCode(st(i))) i += 1
+      // Stop at the end of the first Spark code traces
+      while (i < st.length && sparkCode(st(i))) i += 1
+      val origin = Origin(
+        stackTrace =
+          Some(st.slice(from = i - 1, until = i + SqlApiConf.get.stackTracesInDataFrameContext)),
+        pysparkErrorContext = PySparkCurrentOrigin.get())
+      CurrentOrigin.withOrigin(origin)(f)
+    }
+  }
+
+  private val sparkCodePattern = Pattern.compile("(org\\.apache\\.spark\\.sql\\." +
+    "(?:api\\.)?" +
+    "(?:functions|Column|ColumnName|SQLImplicits|Dataset|DataFrameStatFunctions|DatasetHolder)" +
+    "(?:|\\..*|\\$.*))" +
+    "|(scala\\.collection\\..*)")
+
+  private def sparkCode(ste: StackTraceElement): Boolean = {
+    sparkCodePattern.matcher(ste.getClassName).matches()
+  }
 }
 
 /**
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/AttributeNameParser.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/AttributeNameParser.scala
index e47ab1978d0ed..533b09e82df13 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/AttributeNameParser.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/AttributeNameParser.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.catalyst.util
 import org.apache.spark.sql.errors.DataTypeErrors
 
 trait AttributeNameParser {
+
   /**
-   * Used to split attribute name by dot with backticks rule.
-   * Backticks must appear in pairs, and the quoted string must be a complete name part,
-   * which means `ab..c`e.f is not allowed.
-   * We can use backtick only inside quoted name parts.
+   * Used to split attribute name by dot with backticks rule. Backticks must appear in pairs, and
+   * the quoted string must be a complete name part, which means `ab..c`e.f is not allowed. We can
+   * use backtick only inside quoted name parts.
    */
   def parseAttributeName(name: String): Seq[String] = {
     def e = DataTypeErrors.attributeNameSyntaxError(name)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
index 640304efce4b4..9e90feeb782d6 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/CaseInsensitiveMap.scala
@@ -23,12 +23,12 @@ import java.util.Locale
  * Builds a map in which keys are case insensitive. Input map can be accessed for cases where
  * case-sensitive information is required. The primary constructor is marked private to avoid
  * nested case-insensitive map creation, otherwise the keys in the original map will become
- * case-insensitive in this scenario.
- * Note: CaseInsensitiveMap is serializable. However, after transformation, e.g. `filterKeys()`,
- *       it may become not serializable.
+ * case-insensitive in this scenario. Note: CaseInsensitiveMap is serializable. However, after
+ * transformation, e.g. `filterKeys()`, it may become not serializable.
  */
-class CaseInsensitiveMap[T] private (val originalMap: Map[String, T]) extends Map[String, T]
-  with Serializable {
+class CaseInsensitiveMap[T] private (val originalMap: Map[String, T])
+    extends Map[String, T]
+    with Serializable {
 
   val keyLowerCasedMap = originalMap.map(kv => kv.copy(_1 = kv._1.toLowerCase(Locale.ROOT)))
 
@@ -62,4 +62,3 @@ object CaseInsensitiveMap {
     case _ => new CaseInsensitiveMap(params)
   }
 }
-
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeJsonUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeJsonUtils.scala
index e75429c58cc7b..b8ab633b2047f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeJsonUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DataTypeJsonUtils.scala
@@ -25,15 +25,16 @@ import org.json4s.jackson.{JValueDeserializer, JValueSerializer}
 import org.apache.spark.sql.types.DataType
 
 object DataTypeJsonUtils {
+
   /**
    * Jackson serializer for [[DataType]]. Internally this delegates to json4s based serialization.
    */
   class DataTypeJsonSerializer extends JsonSerializer[DataType] {
     private val delegate = new JValueSerializer
     override def serialize(
-      value: DataType,
-      gen: JsonGenerator,
-      provider: SerializerProvider): Unit = {
+        value: DataType,
+        gen: JsonGenerator,
+        provider: SerializerProvider): Unit = {
       delegate.serialize(value.jsonValue, gen, provider)
     }
   }
@@ -46,8 +47,8 @@ object DataTypeJsonUtils {
     private val delegate = new JValueDeserializer(classOf[Any])
 
     override def deserialize(
-      jsonParser: JsonParser,
-      deserializationContext: DeserializationContext): DataType = {
+        jsonParser: JsonParser,
+        deserializationContext: DeserializationContext): DataType = {
       val json = delegate.deserialize(jsonParser, deserializationContext)
       DataType.parseDataType(json.asInstanceOf[JValue])
     }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
index 34d19bb67b71a..5eada9a7be670 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateFormatter.scala
@@ -43,7 +43,8 @@ class Iso8601DateFormatter(
     locale: Locale,
     legacyFormat: LegacyDateFormats.LegacyDateFormat,
     isParsing: Boolean)
-  extends DateFormatter with DateTimeFormatterHelper {
+    extends DateFormatter
+    with DateTimeFormatterHelper {
 
   @transient
   private lazy val formatter = getOrCreateFormatter(pattern, locale, isParsing)
@@ -62,8 +63,7 @@ class Iso8601DateFormatter(
   override def format(localDate: LocalDate): String = {
     try {
       localDate.format(formatter)
-    } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)),
-      (d: Date) => format(d))
+    } catch checkFormattedDiff(toJavaDate(localDateToDays(localDate)), (d: Date) => format(d))
   }
 
   override def format(days: Int): String = {
@@ -83,19 +83,22 @@ class Iso8601DateFormatter(
 }
 
 /**
- * The formatter for dates which doesn't require users to specify a pattern. While formatting,
- * it uses the default pattern [[DateFormatter.defaultPattern]]. In parsing, it follows the CAST
+ * The formatter for dates which doesn't require users to specify a pattern. While formatting, it
+ * uses the default pattern [[DateFormatter.defaultPattern]]. In parsing, it follows the CAST
  * logic in conversion of strings to Catalyst's DateType.
  *
- * @param locale The locale overrides the system locale and is used in formatting.
- * @param legacyFormat Defines the formatter used for legacy dates.
- * @param isParsing Whether the formatter is used for parsing (`true`) or for formatting (`false`).
+ * @param locale
+ *   The locale overrides the system locale and is used in formatting.
+ * @param legacyFormat
+ *   Defines the formatter used for legacy dates.
+ * @param isParsing
+ *   Whether the formatter is used for parsing (`true`) or for formatting (`false`).
  */
 class DefaultDateFormatter(
     locale: Locale,
     legacyFormat: LegacyDateFormats.LegacyDateFormat,
     isParsing: Boolean)
-  extends Iso8601DateFormatter(DateFormatter.defaultPattern, locale, legacyFormat, isParsing) {
+    extends Iso8601DateFormatter(DateFormatter.defaultPattern, locale, legacyFormat, isParsing) {
 
   override def parse(s: String): Int = {
     try {
@@ -125,11 +128,13 @@ trait LegacyDateFormatter extends DateFormatter {
  * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
  *
  * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
- *       `SparkDateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
- *       JVM time zone too.
+ * `SparkDateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default JVM
+ * time zone too.
  *
- * @param pattern `java.text.SimpleDateFormat` compatible pattern.
- * @param locale The locale overrides the system locale and is used in parsing/formatting.
+ * @param pattern
+ *   `java.text.SimpleDateFormat` compatible pattern.
+ * @param locale
+ *   The locale overrides the system locale and is used in parsing/formatting.
  */
 class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
   @transient
@@ -145,14 +150,16 @@ class LegacyFastDateFormatter(pattern: String, locale: Locale) extends LegacyDat
  * JVM time zone intentionally for compatibility with Spark 2.4 and earlier versions.
  *
  * Note: Using of the default JVM time zone makes the formatter compatible with the legacy
- *       `SparkDateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default
- *       JVM time zone too.
+ * `SparkDateTimeUtils` methods `toJavaDate` and `fromJavaDate` that are based on the default JVM
+ * time zone too.
  *
- * @param pattern The pattern describing the date and time format.
- *                See <a href="https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html">
- *                Date and Time Patterns</a>
- * @param locale  The locale whose date format symbols should be used. It overrides the system
- *                locale in parsing/formatting.
+ * @param pattern
+ *   The pattern describing the date and time format. See <a
+ *   href="https://docs.oracle.com/javase/7/docs/api/java/text/SimpleDateFormat.html"> Date and
+ *   Time Patterns</a>
+ * @param locale
+ *   The locale whose date format symbols should be used. It overrides the system locale in
+ *   parsing/formatting.
  */
 // scalastyle:on line.size.limit
 class LegacySimpleDateFormatter(pattern: String, locale: Locale) extends LegacyDateFormatter {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
index 067e58893126c..71777906f868e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelper.scala
@@ -40,13 +40,18 @@ trait DateTimeFormatterHelper {
   }
 
   private def verifyLocalDate(
-      accessor: TemporalAccessor, field: ChronoField, candidate: LocalDate): Unit = {
+      accessor: TemporalAccessor,
+      field: ChronoField,
+      candidate: LocalDate): Unit = {
     if (accessor.isSupported(field)) {
       val actual = accessor.get(field)
       val expected = candidate.get(field)
       if (actual != expected) {
         throw ExecutionErrors.fieldDiffersFromDerivedLocalDateError(
-          field, actual, expected, candidate)
+          field,
+          actual,
+          expected,
+          candidate)
       }
     }
   }
@@ -133,7 +138,8 @@ trait DateTimeFormatterHelper {
   // SparkUpgradeException. On the contrary, if the legacy policy set to CORRECTED,
   // DateTimeParseException will address by the caller side.
   protected def checkParsedDiff[T](
-      s: String, legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
+      s: String,
+      legacyParseFunc: String => T): PartialFunction[Throwable, T] = {
     case e if needConvertToSparkUpgradeException(e) =>
       try {
         legacyParseFunc(s)
@@ -151,11 +157,12 @@ trait DateTimeFormatterHelper {
       d: T,
       legacyFormatFunc: T => String): PartialFunction[Throwable, String] = {
     case e if needConvertToSparkUpgradeException(e) =>
-      val resultCandidate = try {
-        legacyFormatFunc(d)
-      } catch {
-        case _: Throwable => throw e
-      }
+      val resultCandidate =
+        try {
+          legacyFormatFunc(d)
+        } catch {
+          case _: Throwable => throw e
+        }
       throw ExecutionErrors.failToParseDateTimeInNewParserError(resultCandidate, e)
   }
 
@@ -166,9 +173,11 @@ trait DateTimeFormatterHelper {
    * policy or follow our guide to correct their pattern. Otherwise, the original
    * IllegalArgumentException will be thrown.
    *
-   * @param pattern the date time pattern
-   * @param tryLegacyFormatter a func to capture exception, identically which forces a legacy
-   *                           datetime formatter to be initialized
+   * @param pattern
+   *   the date time pattern
+   * @param tryLegacyFormatter
+   *   a func to capture exception, identically which forces a legacy datetime formatter to be
+   *   initialized
    */
   protected def checkLegacyFormatter(
       pattern: String,
@@ -214,8 +223,7 @@ private object DateTimeFormatterHelper {
   /**
    * Building a formatter for parsing seconds fraction with variable length
    */
-  def createBuilderWithVarLengthSecondFraction(
-      pattern: String): DateTimeFormatterBuilder = {
+  def createBuilderWithVarLengthSecondFraction(pattern: String): DateTimeFormatterBuilder = {
     val builder = createBuilder()
     pattern.split("'").zipWithIndex.foreach {
       // Split string starting with the regex itself which is `'` here will produce an extra empty
@@ -229,12 +237,14 @@ private object DateTimeFormatterHelper {
             case extractor(prefix, secondFraction, suffix) =>
               builder.appendPattern(prefix)
               if (secondFraction.nonEmpty) {
-                builder.appendFraction(ChronoField.NANO_OF_SECOND, 1, secondFraction.length, false)
+                builder
+                  .appendFraction(ChronoField.NANO_OF_SECOND, 1, secondFraction.length, false)
               }
               rest = suffix
-            case _ => throw new SparkIllegalArgumentException(
-              errorClass = "INVALID_DATETIME_PATTERN",
-              messageParameters = Map("pattern" -> pattern))
+            case _ =>
+              throw new SparkIllegalArgumentException(
+                errorClass = "INVALID_DATETIME_PATTERN.SECONDS_FRACTION",
+                messageParameters = Map("pattern" -> pattern))
           }
         }
       case (patternPart, _) => builder.appendLiteral(patternPart)
@@ -258,8 +268,10 @@ private object DateTimeFormatterHelper {
     val builder = createBuilder()
       .append(DateTimeFormatter.ISO_LOCAL_DATE)
       .appendLiteral(' ')
-      .appendValue(ChronoField.HOUR_OF_DAY, 2).appendLiteral(':')
-      .appendValue(ChronoField.MINUTE_OF_HOUR, 2).appendLiteral(':')
+      .appendValue(ChronoField.HOUR_OF_DAY, 2)
+      .appendLiteral(':')
+      .appendValue(ChronoField.MINUTE_OF_HOUR, 2)
+      .appendLiteral(':')
       .appendValue(ChronoField.SECOND_OF_MINUTE, 2)
       .appendFraction(ChronoField.NANO_OF_SECOND, 0, 9, true)
     toFormatter(builder, TimestampFormatter.defaultLocale)
@@ -299,17 +311,21 @@ private object DateTimeFormatterHelper {
    * parsing/formatting datetime values. The pattern string is incompatible with the one defined
    * by SimpleDateFormat in Spark 2.4 and earlier. This function converts all incompatible pattern
    * for the new parser in Spark 3.0. See more details in SPARK-31030.
-   * @param pattern The input pattern.
-   * @return The pattern for new parser
+   * @param pattern
+   *   The input pattern.
+   * @return
+   *   The pattern for new parser
    */
   def convertIncompatiblePattern(pattern: String, isParsing: Boolean): String = {
-    val eraDesignatorContained = pattern.split("'").zipWithIndex.exists {
-      case (patternPart, index) =>
+    val eraDesignatorContained =
+      pattern.split("'").zipWithIndex.exists { case (patternPart, index) =>
         // Text can be quoted using single quotes, we only check the non-quote parts.
         index % 2 == 0 && patternPart.contains("G")
-    }
-    (pattern + " ").split("'").zipWithIndex.map {
-      case (patternPart, index) =>
+      }
+    (pattern + " ")
+      .split("'")
+      .zipWithIndex
+      .map { case (patternPart, index) =>
         if (index % 2 == 0) {
           for (c <- patternPart if weekBasedLetters.contains(c)) {
             throw new SparkIllegalArgumentException(
@@ -317,12 +333,10 @@ private object DateTimeFormatterHelper {
               messageParameters = Map("c" -> c.toString))
           }
           for (c <- patternPart if unsupportedLetters.contains(c) ||
-            (isParsing && unsupportedLettersForParsing.contains(c))) {
+              (isParsing && unsupportedLettersForParsing.contains(c))) {
             throw new SparkIllegalArgumentException(
               errorClass = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
-              messageParameters = Map(
-                "c" -> c.toString,
-                "pattern" -> pattern))
+              messageParameters = Map("c" -> c.toString, "pattern" -> pattern))
           }
           for (style <- unsupportedPatternLengths if patternPart.contains(style)) {
             throw new SparkIllegalArgumentException(
@@ -340,6 +354,8 @@ private object DateTimeFormatterHelper {
         } else {
           patternPart
         }
-    }.mkString("'").stripSuffix(" ")
+      }
+      .mkString("'")
+      .stripSuffix(" ")
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
index 96c3fb81aa66f..b113bccc74dfb 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/MathUtils.scala
@@ -89,10 +89,7 @@ object MathUtils {
 
   def floorMod(a: Long, b: Long): Long = withOverflow(Math.floorMod(a, b))
 
-  def withOverflow[A](
-      f: => A,
-      hint: String = "",
-      context: QueryContext = null): A = {
+  def withOverflow[A](f: => A, hint: String = "", context: QueryContext = null): A = {
     try {
       f
     } catch {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
index f9566b0e1fb13..9c043320dc812 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/RebaseDateTime.scala
@@ -34,8 +34,8 @@ import org.apache.spark.util.SparkClassUtils
 
 /**
  * The collection of functions for rebasing days and microseconds from/to the hybrid calendar
- * (Julian + Gregorian since 1582-10-15) which is used by Spark 2.4 and earlier versions
- * to/from Proleptic Gregorian calendar which is used by Spark since version 3.0. See SPARK-26651.
+ * (Julian + Gregorian since 1582-10-15) which is used by Spark 2.4 and earlier versions to/from
+ * Proleptic Gregorian calendar which is used by Spark since version 3.0. See SPARK-26651.
  */
 object RebaseDateTime {
 
@@ -46,20 +46,22 @@ object RebaseDateTime {
   }
 
   /**
-   * Rebases days since the epoch from an original to an target calendar, for instance,
-   * from a hybrid (Julian + Gregorian) to Proleptic Gregorian calendar.
+   * Rebases days since the epoch from an original to an target calendar, for instance, from a
+   * hybrid (Julian + Gregorian) to Proleptic Gregorian calendar.
    *
    * It finds the latest switch day which is less than the given `days`, and adds the difference
-   * in days associated with the switch days to the given `days`.
-   * The function is based on linear search which starts from the most recent switch days.
-   * This allows to perform less comparisons for modern dates.
+   * in days associated with the switch days to the given `days`. The function is based on linear
+   * search which starts from the most recent switch days. This allows to perform less comparisons
+   * for modern dates.
    *
-   * @param switches The days when difference in days between original and target calendar
-   *                 was changed.
-   * @param diffs The differences in days between calendars.
-   * @param days The number of days since the epoch 1970-01-01 to be rebased
-   *             to the target calendar.
-   * @return The rebased days.
+   * @param switches
+   *   The days when difference in days between original and target calendar was changed.
+   * @param diffs
+   *   The differences in days between calendars.
+   * @param days
+   *   The number of days since the epoch 1970-01-01 to be rebased to the target calendar.
+   * @return
+   *   The rebased days.
    */
   private def rebaseDays(switches: Array[Int], diffs: Array[Int], days: Int): Int = {
     var i = switches.length
@@ -77,9 +79,8 @@ object RebaseDateTime {
   // Julian calendar). This array is not applicable for dates before the staring point.
   // Rebasing switch days and diffs `julianGregDiffSwitchDay` and `julianGregDiffs`
   // was generated by the `localRebaseJulianToGregorianDays` function.
-  private val julianGregDiffSwitchDay = Array(
-    -719164, -682945, -646420, -609895, -536845, -500320, -463795,
-    -390745, -354220, -317695, -244645, -208120, -171595, -141427)
+  private val julianGregDiffSwitchDay = Array(-719164, -682945, -646420, -609895, -536845,
+    -500320, -463795, -390745, -354220, -317695, -244645, -208120, -171595, -141427)
 
   final val lastSwitchJulianDay: Int = julianGregDiffSwitchDay.last
 
@@ -88,20 +89,20 @@ object RebaseDateTime {
 
   /**
    * Converts the given number of days since the epoch day 1970-01-01 to a local date in Julian
-   * calendar, interprets the result as a local date in Proleptic Gregorian calendar, and takes the
-   * number of days since the epoch from the Gregorian local date.
+   * calendar, interprets the result as a local date in Proleptic Gregorian calendar, and takes
+   * the number of days since the epoch from the Gregorian local date.
    *
    * This is used to guarantee backward compatibility, as Spark 2.4 and earlier versions use
-   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic Gregorian
-   * calendar. See SPARK-26651.
+   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic
+   * Gregorian calendar. See SPARK-26651.
    *
-   * For example:
-   *   Julian calendar: 1582-01-01 -> -141704
-   *   Proleptic Gregorian calendar: 1582-01-01 -> -141714
-   * The code below converts -141704 to -141714.
+   * For example: Julian calendar: 1582-01-01 -> -141704 Proleptic Gregorian calendar: 1582-01-01
+   * -> -141714 The code below converts -141704 to -141714.
    *
-   * @param days The number of days since the epoch in Julian calendar. It can be negative.
-   * @return The rebased number of days in Gregorian calendar.
+   * @param days
+   *   The number of days since the epoch in Julian calendar. It can be negative.
+   * @return
+   *   The rebased number of days in Gregorian calendar.
    */
   private[sql] def localRebaseJulianToGregorianDays(days: Int): Int = {
     val utcCal = new Calendar.Builder()
@@ -111,14 +112,15 @@ object RebaseDateTime {
       .setTimeZone(TimeZoneUTC)
       .setInstant(Math.multiplyExact(days, MILLIS_PER_DAY))
       .build()
-    val localDate = LocalDate.of(
-      utcCal.get(YEAR),
-      utcCal.get(MONTH) + 1,
-      // The number of days will be added later to handle non-existing
-      // Julian dates in Proleptic Gregorian calendar.
-      // For example, 1000-02-29 exists in Julian calendar because 1000
-      // is a leap year but it is not a leap year in Gregorian calendar.
-      1)
+    val localDate = LocalDate
+      .of(
+        utcCal.get(YEAR),
+        utcCal.get(MONTH) + 1,
+        // The number of days will be added later to handle non-existing
+        // Julian dates in Proleptic Gregorian calendar.
+        // For example, 1000-02-29 exists in Julian calendar because 1000
+        // is a leap year but it is not a leap year in Gregorian calendar.
+        1)
       .`with`(ChronoField.ERA, utcCal.get(ERA))
       .plusDays(utcCal.get(DAY_OF_MONTH) - 1)
     Math.toIntExact(localDate.toEpochDay)
@@ -129,8 +131,10 @@ object RebaseDateTime {
    * pre-calculated rebasing array to save calculation. For dates of Before Common Era, the
    * function falls back to the regular unoptimized version.
    *
-   * @param days The number of days since the epoch in Julian calendar. It can be negative.
-   * @return The rebased number of days in Gregorian calendar.
+   * @param days
+   *   The number of days since the epoch in Julian calendar. It can be negative.
+   * @return
+   *   The rebased number of days in Gregorian calendar.
    */
   def rebaseJulianToGregorianDays(days: Int): Int = {
     if (days < julianCommonEraStartDay) {
@@ -143,18 +147,17 @@ object RebaseDateTime {
   // The differences in days between Proleptic Gregorian and Julian dates.
   // The diff at the index `i` is applicable for all days in the date interval:
   // [gregJulianDiffSwitchDay(i), gregJulianDiffSwitchDay(i+1))
-  private val gregJulianDiffs = Array(
-    -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+  private val gregJulianDiffs =
+    Array(-2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
   // The sorted days in Proleptic Gregorian calendar when difference in days between
   // Proleptic Gregorian and Julian was changed.
   // The starting point is the `0001-01-01` (-719162 days since the epoch in
   // Proleptic Gregorian calendar). This array is not applicable for dates before the staring point.
   // Rebasing switch days and diffs `gregJulianDiffSwitchDay` and `gregJulianDiffs`
   // was generated by the `localRebaseGregorianToJulianDays` function.
-  private val gregJulianDiffSwitchDay = Array(
-    -719162, -682944, -646420, -609896, -536847, -500323, -463799, -390750,
-    -354226, -317702, -244653, -208129, -171605, -141436, -141435, -141434,
-    -141433, -141432, -141431, -141430, -141429, -141428, -141427)
+  private val gregJulianDiffSwitchDay = Array(-719162, -682944, -646420, -609896, -536847,
+    -500323, -463799, -390750, -354226, -317702, -244653, -208129, -171605, -141436, -141435,
+    -141434, -141433, -141432, -141431, -141430, -141429, -141428, -141427)
 
   final val lastSwitchGregorianDay: Int = gregJulianDiffSwitchDay.last
 
@@ -171,17 +174,16 @@ object RebaseDateTime {
    * number of days since the epoch from the Julian local date.
    *
    * This is used to guarantee backward compatibility, as Spark 2.4 and earlier versions use
-   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic Gregorian
-   * calendar. See SPARK-26651.
+   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic
+   * Gregorian calendar. See SPARK-26651.
    *
-   * For example:
-   *   Proleptic Gregorian calendar: 1582-01-01 -> -141714
-   *   Julian calendar: 1582-01-01 -> -141704
-   * The code below converts -141714 to -141704.
+   * For example: Proleptic Gregorian calendar: 1582-01-01 -> -141714 Julian calendar: 1582-01-01
+   * -> -141704 The code below converts -141714 to -141704.
    *
-   * @param days The number of days since the epoch in Proleptic Gregorian calendar.
-   *             It can be negative.
-   * @return The rebased number of days in Julian calendar.
+   * @param days
+   *   The number of days since the epoch in Proleptic Gregorian calendar. It can be negative.
+   * @return
+   *   The rebased number of days in Julian calendar.
    */
   private[sql] def localRebaseGregorianToJulianDays(days: Int): Int = {
     var localDate = LocalDate.ofEpochDay(days)
@@ -204,8 +206,10 @@ object RebaseDateTime {
    * pre-calculated rebasing array to save calculation. For dates of Before Common Era, the
    * function falls back to the regular unoptimized version.
    *
-   * @param days The number of days since the epoch in Gregorian calendar. It can be negative.
-   * @return The rebased number of days since the epoch in Julian calendar.
+   * @param days
+   *   The number of days since the epoch in Gregorian calendar. It can be negative.
+   * @return
+   *   The rebased number of days since the epoch in Julian calendar.
    */
   def rebaseGregorianToJulianDays(days: Int): Int = {
     if (days < gregorianCommonEraStartDay) {
@@ -215,10 +219,9 @@ object RebaseDateTime {
     }
   }
 
-
   /**
-   * The class describes JSON records with microseconds rebasing info.
-   * Here is an example of JSON file:
+   * The class describes JSON records with microseconds rebasing info. Here is an example of JSON
+   * file:
    * {{{
    *   [
    *     {
@@ -229,37 +232,44 @@ object RebaseDateTime {
    *   ]
    * }}}
    *
-   * @param tz One of time zone ID which is expected to be acceptable by `ZoneId.of`.
-   * @param switches An ordered array of seconds since the epoch when the diff between
-   *                 two calendars are changed.
-   * @param diffs Differences in seconds associated with elements of `switches`.
+   * @param tz
+   *   One of time zone ID which is expected to be acceptable by `ZoneId.of`.
+   * @param switches
+   *   An ordered array of seconds since the epoch when the diff between two calendars are
+   *   changed.
+   * @param diffs
+   *   Differences in seconds associated with elements of `switches`.
    */
   private case class JsonRebaseRecord(tz: String, switches: Array[Long], diffs: Array[Long])
 
   /**
    * Rebasing info used to convert microseconds from an original to a target calendar.
    *
-   * @param switches An ordered array of microseconds since the epoch when the diff between
-   *                 two calendars are changed.
-   * @param diffs Differences in microseconds associated with elements of `switches`.
+   * @param switches
+   *   An ordered array of microseconds since the epoch when the diff between two calendars are
+   *   changed.
+   * @param diffs
+   *   Differences in microseconds associated with elements of `switches`.
    */
   private[sql] case class RebaseInfo(switches: Array[Long], diffs: Array[Long])
 
   /**
-   * Rebases micros since the epoch from an original to an target calendar, for instance,
-   * from a hybrid (Julian + Gregorian) to Proleptic Gregorian calendar.
+   * Rebases micros since the epoch from an original to an target calendar, for instance, from a
+   * hybrid (Julian + Gregorian) to Proleptic Gregorian calendar.
    *
    * It finds the latest switch micros which is less than the given `micros`, and adds the
-   * difference in micros associated with the switch micros to the given `micros`.
-   * The function is based on linear search which starts from the most recent switch micros.
-   * This allows to perform less comparisons for modern timestamps.
+   * difference in micros associated with the switch micros to the given `micros`. The function is
+   * based on linear search which starts from the most recent switch micros. This allows to
+   * perform less comparisons for modern timestamps.
    *
-   * @param rebaseInfo The rebasing info contains an ordered micros when difference in micros
-   *                   between original and target calendar was changed,
-   *                   and differences in micros between calendars
-   * @param micros The number of micros since the epoch 1970-01-01T00:00:00Z to be rebased
-   *               to the target calendar. It can be negative.
-   * @return The rebased micros.
+   * @param rebaseInfo
+   *   The rebasing info contains an ordered micros when difference in micros between original and
+   *   target calendar was changed, and differences in micros between calendars
+   * @param micros
+   *   The number of micros since the epoch 1970-01-01T00:00:00Z to be rebased to the target
+   *   calendar. It can be negative.
+   * @return
+   *   The rebased micros.
    */
   private def rebaseMicros(rebaseInfo: RebaseInfo, micros: Long): Long = {
     val switches = rebaseInfo.switches
@@ -296,18 +306,19 @@ object RebaseDateTime {
 
   /**
    * A map of time zone IDs to its ordered time points (instants in microseconds since the epoch)
-   * when the difference between 2 instances associated with the same local timestamp in
-   * Proleptic Gregorian and the hybrid calendar was changed, and to the diff at the index `i` is
-   * applicable for all microseconds in the time interval:
-   *   [gregJulianDiffSwitchMicros(i), gregJulianDiffSwitchMicros(i+1))
+   * when the difference between 2 instances associated with the same local timestamp in Proleptic
+   * Gregorian and the hybrid calendar was changed, and to the diff at the index `i` is applicable
+   * for all microseconds in the time interval: [gregJulianDiffSwitchMicros(i),
+   * gregJulianDiffSwitchMicros(i+1))
    */
   private val gregJulianRebaseMap = loadRebaseRecords("gregorian-julian-rebase-micros.json")
 
   private def getLastSwitchTs(rebaseMap: AnyRefMap[String, RebaseInfo]): Long = {
     val latestTs = rebaseMap.values.map(_.switches.last).max
-    require(rebaseMap.values.forall(_.diffs.last == 0),
+    require(
+      rebaseMap.values.forall(_.diffs.last == 0),
       s"Differences between Julian and Gregorian calendar after ${microsToInstant(latestTs)} " +
-      "are expected to be zero for all available time zones.")
+        "are expected to be zero for all available time zones.")
     latestTs
   }
   // The switch time point after which all diffs between Gregorian and Julian calendars
@@ -315,29 +326,30 @@ object RebaseDateTime {
   final val lastSwitchGregorianTs: Long = getLastSwitchTs(gregJulianRebaseMap)
 
   private final val gregorianStartTs = LocalDateTime.of(gregorianStartDate, LocalTime.MIDNIGHT)
-  private final val julianEndTs = LocalDateTime.of(
-    julianEndDate,
-    LocalTime.of(23, 59, 59, 999999999))
+  private final val julianEndTs =
+    LocalDateTime.of(julianEndDate, LocalTime.of(23, 59, 59, 999999999))
 
   /**
    * Converts the given number of microseconds since the epoch '1970-01-01T00:00:00Z', to a local
-   * date-time in Proleptic Gregorian calendar with timezone identified by `zoneId`, interprets the
-   * result as a local date-time in Julian calendar with the same timezone, and takes microseconds
-   * since the epoch from the Julian local date-time.
+   * date-time in Proleptic Gregorian calendar with timezone identified by `zoneId`, interprets
+   * the result as a local date-time in Julian calendar with the same timezone, and takes
+   * microseconds since the epoch from the Julian local date-time.
    *
    * This is used to guarantee backward compatibility, as Spark 2.4 and earlier versions use
-   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic Gregorian
-   * calendar. See SPARK-26651.
+   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic
+   * Gregorian calendar. See SPARK-26651.
    *
-   * For example:
-   *   Proleptic Gregorian calendar: 1582-01-01 00:00:00.123456 -> -12244061221876544
-   *   Julian calendar: 1582-01-01 00:00:00.123456 -> -12243196799876544
-   * The code below converts -12244061221876544 to -12243196799876544.
+   * For example: Proleptic Gregorian calendar: 1582-01-01 00:00:00.123456 -> -12244061221876544
+   * Julian calendar: 1582-01-01 00:00:00.123456 -> -12243196799876544 The code below converts
+   * -12244061221876544 to -12243196799876544.
    *
-   * @param tz The time zone at which the rebasing should be performed.
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in Proleptic Gregorian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Julian calendar.
+   * @param tz
+   *   The time zone at which the rebasing should be performed.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in Proleptic Gregorian
+   *   calendar. It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Julian calendar.
    */
   private[sql] def rebaseGregorianToJulianMicros(tz: TimeZone, micros: Long): Long = {
     val instant = microsToInstant(micros)
@@ -380,10 +392,13 @@ object RebaseDateTime {
    * contain information about the given time zone `timeZoneId` or `micros` is related to Before
    * Common Era, the function falls back to the regular unoptimized version.
    *
-   * @param timeZoneId A string identifier of a time zone.
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in Proleptic Gregorian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Julian calendar.
+   * @param timeZoneId
+   *   A string identifier of a time zone.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in Proleptic Gregorian
+   *   calendar. It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Julian calendar.
    */
   def rebaseGregorianToJulianMicros(timeZoneId: String, micros: Long): Long = {
     if (micros >= lastSwitchGregorianTs) {
@@ -404,12 +419,14 @@ object RebaseDateTime {
    * contain information about the current JVM system time zone or `micros` is related to Before
    * Common Era, the function falls back to the regular unoptimized version.
    *
-   * Note: The function assumes that the input micros was derived from a local timestamp
-   *       at the default system JVM time zone in Proleptic Gregorian calendar.
+   * Note: The function assumes that the input micros was derived from a local timestamp at the
+   * default system JVM time zone in Proleptic Gregorian calendar.
    *
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in Proleptic Gregorian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Julian calendar.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in Proleptic Gregorian
+   *   calendar. It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Julian calendar.
    */
   def rebaseGregorianToJulianMicros(micros: Long): Long = {
     rebaseGregorianToJulianMicros(TimeZone.getDefault.getID, micros)
@@ -418,22 +435,24 @@ object RebaseDateTime {
   /**
    * Converts the given number of microseconds since the epoch '1970-01-01T00:00:00Z', to a local
    * date-time in Julian calendar with timezone identified by `zoneId`, interprets the result as a
-   * local date-time in Proleptic Gregorian calendar with the same timezone, and takes microseconds
-   * since the epoch from the Gregorian local date-time.
+   * local date-time in Proleptic Gregorian calendar with the same timezone, and takes
+   * microseconds since the epoch from the Gregorian local date-time.
    *
    * This is used to guarantee backward compatibility, as Spark 2.4 and earlier versions use
-   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic Gregorian
-   * calendar. See SPARK-26651.
+   * Julian calendar for dates before 1582-10-15, while Spark 3.0 and later use Proleptic
+   * Gregorian calendar. See SPARK-26651.
    *
-   * For example:
-   *   Julian calendar: 1582-01-01 00:00:00.123456 -> -12243196799876544
-   *   Proleptic Gregorian calendar: 1582-01-01 00:00:00.123456 -> -12244061221876544
-   * The code below converts -12243196799876544 to -12244061221876544.
+   * For example: Julian calendar: 1582-01-01 00:00:00.123456 -> -12243196799876544 Proleptic
+   * Gregorian calendar: 1582-01-01 00:00:00.123456 -> -12244061221876544 The code below converts
+   * -12243196799876544 to -12244061221876544.
    *
-   * @param tz The time zone at which the rebasing should be performed.
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in the Julian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Proleptic Gregorian calendar.
+   * @param tz
+   *   The time zone at which the rebasing should be performed.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in the Julian calendar.
+   *   It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Proleptic Gregorian calendar.
    */
   private[sql] def rebaseJulianToGregorianMicros(tz: TimeZone, micros: Long): Long = {
     val cal = new Calendar.Builder()
@@ -442,18 +461,19 @@ object RebaseDateTime {
       .setInstant(microsToMillis(micros))
       .setTimeZone(tz)
       .build()
-    val localDateTime = LocalDateTime.of(
-      cal.get(YEAR),
-      cal.get(MONTH) + 1,
-      // The number of days will be added later to handle non-existing
-      // Julian dates in Proleptic Gregorian calendar.
-      // For example, 1000-02-29 exists in Julian calendar because 1000
-      // is a leap year but it is not a leap year in Gregorian calendar.
-      1,
-      cal.get(HOUR_OF_DAY),
-      cal.get(MINUTE),
-      cal.get(SECOND),
-      (Math.floorMod(micros, MICROS_PER_SECOND) * NANOS_PER_MICROS).toInt)
+    val localDateTime = LocalDateTime
+      .of(
+        cal.get(YEAR),
+        cal.get(MONTH) + 1,
+        // The number of days will be added later to handle non-existing
+        // Julian dates in Proleptic Gregorian calendar.
+        // For example, 1000-02-29 exists in Julian calendar because 1000
+        // is a leap year but it is not a leap year in Gregorian calendar.
+        1,
+        cal.get(HOUR_OF_DAY),
+        cal.get(MINUTE),
+        cal.get(SECOND),
+        (Math.floorMod(micros, MICROS_PER_SECOND) * NANOS_PER_MICROS).toInt)
       .`with`(ChronoField.ERA, cal.get(ERA))
       .plusDays(cal.get(DAY_OF_MONTH) - 1)
     val zoneId = tz.toZoneId
@@ -494,10 +514,13 @@ object RebaseDateTime {
    * contain information about the given time zone `timeZoneId` or `micros` is related to Before
    * Common Era, the function falls back to the regular unoptimized version.
    *
-   * @param timeZoneId A string identifier of a time zone.
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in the Julian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Proleptic Gregorian calendar.
+   * @param timeZoneId
+   *   A string identifier of a time zone.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in the Julian calendar.
+   *   It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Proleptic Gregorian calendar.
    */
   def rebaseJulianToGregorianMicros(timeZoneId: String, micros: Long): Long = {
     if (micros >= lastSwitchJulianTs) {
@@ -518,12 +541,14 @@ object RebaseDateTime {
    * contain information about the current JVM system time zone or `micros` is related to Before
    * Common Era, the function falls back to the regular unoptimized version.
    *
-   * Note: The function assumes that the input micros was derived from a local timestamp
-   *       at the default system JVM time zone in the Julian calendar.
+   * Note: The function assumes that the input micros was derived from a local timestamp at the
+   * default system JVM time zone in the Julian calendar.
    *
-   * @param micros The number of microseconds since the epoch '1970-01-01T00:00:00Z'
-   *               in the Julian calendar. It can be negative.
-   * @return The rebased microseconds since the epoch in Proleptic Gregorian calendar.
+   * @param micros
+   *   The number of microseconds since the epoch '1970-01-01T00:00:00Z' in the Julian calendar.
+   *   It can be negative.
+   * @return
+   *   The rebased microseconds since the epoch in Proleptic Gregorian calendar.
    */
   def rebaseJulianToGregorianMicros(micros: Long): Long = {
     rebaseJulianToGregorianMicros(TimeZone.getDefault.getID, micros)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
index 498eb83566eb3..2a26c079e8d4d 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkCharVarcharUtils.scala
@@ -21,15 +21,18 @@ import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{ArrayType, CharType, DataType, MapType, StringType, StructType, VarcharType}
 
 trait SparkCharVarcharUtils {
+
   /**
-   * Returns true if the given data type is CharType/VarcharType or has nested CharType/VarcharType.
+   * Returns true if the given data type is CharType/VarcharType or has nested
+   * CharType/VarcharType.
    */
   def hasCharVarchar(dt: DataType): Boolean = {
     dt.existsRecursively(f => f.isInstanceOf[CharType] || f.isInstanceOf[VarcharType])
   }
 
   /**
-   * Validate the given [[DataType]] to fail if it is char or varchar types or contains nested ones
+   * Validate the given [[DataType]] to fail if it is char or varchar types or contains nested
+   * ones
    */
   def failIfHasCharVarchar(dt: DataType): DataType = {
     if (!SqlApiConf.get.charVarcharAsString && hasCharVarchar(dt)) {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
index 0447d813e26a5..4d05f9079548c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkDateTimeUtils.scala
@@ -41,12 +41,16 @@ trait SparkDateTimeUtils {
   final val singleMinuteTz = Pattern.compile("(\\+|\\-)(\\d\\d):(\\d)$")
 
   def getZoneId(timeZoneId: String): ZoneId = {
-    // To support the (+|-)h:mm format because it was supported before Spark 3.0.
-    var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:")
-    // To support the (+|-)hh:m format because it was supported before Spark 3.0.
-    formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3")
-
-    ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS)
+    try {
+      // To support the (+|-)h:mm format because it was supported before Spark 3.0.
+      var formattedZoneId = singleHourTz.matcher(timeZoneId).replaceFirst("$10$2:")
+      // To support the (+|-)hh:m format because it was supported before Spark 3.0.
+      formattedZoneId = singleMinuteTz.matcher(formattedZoneId).replaceFirst("$1$2:0$3")
+      ZoneId.of(formattedZoneId, ZoneId.SHORT_IDS)
+    } catch {
+      case e: java.time.DateTimeException =>
+        throw ExecutionErrors.zoneOffsetError(timeZoneId, e)
+    }
   }
 
   def getTimeZone(timeZoneId: String): TimeZone = TimeZone.getTimeZone(getZoneId(timeZoneId))
@@ -54,8 +58,10 @@ trait SparkDateTimeUtils {
   /**
    * Converts an Java object to days.
    *
-   * @param obj Either an object of `java.sql.Date` or `java.time.LocalDate`.
-   * @return The number of days since 1970-01-01.
+   * @param obj
+   *   Either an object of `java.sql.Date` or `java.time.LocalDate`.
+   * @return
+   *   The number of days since 1970-01-01.
    */
   def anyToDays(obj: Any): Int = obj match {
     case d: Date => fromJavaDate(d)
@@ -65,8 +71,10 @@ trait SparkDateTimeUtils {
   /**
    * Converts an Java object to microseconds.
    *
-   * @param obj Either an object of `java.sql.Timestamp` or `java.time.{Instant,LocalDateTime}`.
-   * @return The number of micros since the epoch.
+   * @param obj
+   *   Either an object of `java.sql.Timestamp` or `java.time.{Instant,LocalDateTime}`.
+   * @return
+   *   The number of micros since the epoch.
    */
   def anyToMicros(obj: Any): Long = obj match {
     case t: Timestamp => fromJavaTimestamp(t)
@@ -75,8 +83,8 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * Converts the timestamp to milliseconds since epoch. In Spark timestamp values have microseconds
-   * precision, so this conversion is lossy.
+   * Converts the timestamp to milliseconds since epoch. In Spark timestamp values have
+   * microseconds precision, so this conversion is lossy.
    */
   def microsToMillis(micros: Long): Long = {
     // When the timestamp is negative i.e before 1970, we need to adjust the milliseconds portion.
@@ -97,8 +105,8 @@ trait SparkDateTimeUtils {
   private val MIN_SECONDS = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND)
 
   /**
-   * Obtains an instance of `java.time.Instant` using microseconds from
-   * the epoch of 1970-01-01 00:00:00Z.
+   * Obtains an instance of `java.time.Instant` using microseconds from the epoch of 1970-01-01
+   * 00:00:00Z.
    */
   def microsToInstant(micros: Long): Instant = {
     val secs = Math.floorDiv(micros, MICROS_PER_SECOND)
@@ -110,8 +118,8 @@ trait SparkDateTimeUtils {
 
   /**
    * Gets the number of microseconds since the epoch of 1970-01-01 00:00:00Z from the given
-   * instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count of
-   * microseconds where microsecond 0 is 1970-01-01 00:00:00Z.
+   * instance of `java.time.Instant`. The epoch microsecond count is a simple incrementing count
+   * of microseconds where microsecond 0 is 1970-01-01 00:00:00Z.
    */
   def instantToMicros(instant: Instant): Long = {
     val secs = instant.getEpochSecond
@@ -127,8 +135,8 @@ trait SparkDateTimeUtils {
   /**
    * Converts the timestamp `micros` from one timezone to another.
    *
-   * Time-zone rules, such as daylight savings, mean that not every local date-time
-   * is valid for the `toZone` time zone, thus the local date-time may be adjusted.
+   * Time-zone rules, such as daylight savings, mean that not every local date-time is valid for
+   * the `toZone` time zone, thus the local date-time may be adjusted.
    */
   def convertTz(micros: Long, fromZone: ZoneId, toZone: ZoneId): Long = {
     val rebasedDateTime = getLocalDateTime(micros, toZone).atZone(fromZone)
@@ -160,14 +168,16 @@ trait SparkDateTimeUtils {
   def daysToLocalDate(days: Int): LocalDate = LocalDate.ofEpochDay(days)
 
   /**
-   * Converts microseconds since 1970-01-01 00:00:00Z to days since 1970-01-01 at the given zone ID.
+   * Converts microseconds since 1970-01-01 00:00:00Z to days since 1970-01-01 at the given zone
+   * ID.
    */
   def microsToDays(micros: Long, zoneId: ZoneId): Int = {
     localDateToDays(getLocalDateTime(micros, zoneId).toLocalDate)
   }
 
   /**
-   * Converts days since 1970-01-01 at the given zone ID to microseconds since 1970-01-01 00:00:00Z.
+   * Converts days since 1970-01-01 at the given zone ID to microseconds since 1970-01-01
+   * 00:00:00Z.
    */
   def daysToMicros(days: Int, zoneId: ZoneId): Long = {
     val instant = daysToLocalDate(days).atStartOfDay(zoneId).toInstant
@@ -175,20 +185,22 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * Converts a local date at the default JVM time zone to the number of days since 1970-01-01
-   * in the hybrid calendar (Julian + Gregorian) by discarding the time part. The resulted days are
+   * Converts a local date at the default JVM time zone to the number of days since 1970-01-01 in
+   * the hybrid calendar (Julian + Gregorian) by discarding the time part. The resulted days are
    * rebased from the hybrid to Proleptic Gregorian calendar. The days rebasing is performed via
-   * UTC time zone for simplicity because the difference between two calendars is the same in
-   * any given time zone and UTC time zone.
+   * UTC time zone for simplicity because the difference between two calendars is the same in any
+   * given time zone and UTC time zone.
    *
-   * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
-   *       with Spark 2.4 and earlier versions. The goal of the shift is to get a local date derived
-   *       from the number of days that has the same date fields (year, month, day) as the original
-   *       `date` at the default JVM time zone.
+   * Note: The date is shifted by the offset of the default JVM time zone for backward
+   * compatibility with Spark 2.4 and earlier versions. The goal of the shift is to get a local
+   * date derived from the number of days that has the same date fields (year, month, day) as the
+   * original `date` at the default JVM time zone.
    *
-   * @param date It represents a specific instant in time based on the hybrid calendar which
-   *             combines Julian and Gregorian calendars.
-   * @return The number of days since the epoch in Proleptic Gregorian calendar.
+   * @param date
+   *   It represents a specific instant in time based on the hybrid calendar which combines Julian
+   *   and Gregorian calendars.
+   * @return
+   *   The number of days since the epoch in Proleptic Gregorian calendar.
    */
   def fromJavaDate(date: Date): Int = {
     val millisUtc = date.getTime
@@ -207,18 +219,20 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date
-   * at the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
+   * Converts days since the epoch 1970-01-01 in Proleptic Gregorian calendar to a local date at
+   * the default JVM time zone in the hybrid calendar (Julian + Gregorian). It rebases the given
    * days from Proleptic Gregorian to the hybrid calendar at UTC time zone for simplicity because
    * the difference between two calendars doesn't depend on any time zone. The result is shifted
-   * by the time zone offset in wall clock to have the same date fields (year, month, day)
-   * at the default JVM time zone as the input `daysSinceEpoch` in Proleptic Gregorian calendar.
+   * by the time zone offset in wall clock to have the same date fields (year, month, day) at the
+   * default JVM time zone as the input `daysSinceEpoch` in Proleptic Gregorian calendar.
    *
-   * Note: The date is shifted by the offset of the default JVM time zone for backward compatibility
-   *       with Spark 2.4 and earlier versions.
+   * Note: The date is shifted by the offset of the default JVM time zone for backward
+   * compatibility with Spark 2.4 and earlier versions.
    *
-   * @param days The number of days since 1970-01-01 in Proleptic Gregorian calendar.
-   * @return A local date in the hybrid calendar as `java.sql.Date` from number of days since epoch.
+   * @param days
+   *   The number of days since 1970-01-01 in Proleptic Gregorian calendar.
+   * @return
+   *   A local date in the hybrid calendar as `java.sql.Date` from number of days since epoch.
    */
   def toJavaDate(days: Int): Date = {
     val rebasedDays = rebaseGregorianToJulianDays(days)
@@ -233,29 +247,36 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * Converts microseconds since the epoch to an instance of `java.sql.Timestamp`
-   * via creating a local timestamp at the system time zone in Proleptic Gregorian
-   * calendar, extracting date and time fields like `year` and `hours`, and forming
-   * new timestamp in the hybrid calendar from the extracted fields.
+   * Converts microseconds since the epoch to an instance of `java.sql.Timestamp` via creating a
+   * local timestamp at the system time zone in Proleptic Gregorian calendar, extracting date and
+   * time fields like `year` and `hours`, and forming new timestamp in the hybrid calendar from
+   * the extracted fields.
    *
-   * The conversion is based on the JVM system time zone because the `java.sql.Timestamp`
-   * uses the time zone internally.
+   * The conversion is based on the JVM system time zone because the `java.sql.Timestamp` uses the
+   * time zone internally.
    *
    * The method performs the conversion via local timestamp fields to have the same date-time
-   * representation as `year`, `month`, `day`, ..., `seconds` in the original calendar
-   * and in the target calendar.
+   * representation as `year`, `month`, `day`, ..., `seconds` in the original calendar and in the
+   * target calendar.
    *
-   * @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
-   * @return A `java.sql.Timestamp` from number of micros since epoch.
+   * @param micros
+   *   The number of microseconds since 1970-01-01T00:00:00.000000Z.
+   * @return
+   *   A `java.sql.Timestamp` from number of micros since epoch.
    */
   def toJavaTimestamp(micros: Long): Timestamp =
     toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(micros))
 
+  def toJavaTimestamp(timeZoneId: String, micros: Long): Timestamp =
+    toJavaTimestampNoRebase(rebaseGregorianToJulianMicros(timeZoneId, micros))
+
   /**
    * Converts microseconds since the epoch to an instance of `java.sql.Timestamp`.
    *
-   * @param micros The number of microseconds since 1970-01-01T00:00:00.000000Z.
-   * @return A `java.sql.Timestamp` from number of micros since epoch.
+   * @param micros
+   *   The number of microseconds since 1970-01-01T00:00:00.000000Z.
+   * @return
+   *   A `java.sql.Timestamp` from number of micros since epoch.
    */
   def toJavaTimestampNoRebase(micros: Long): Timestamp = {
     val seconds = Math.floorDiv(micros, MICROS_PER_SECOND)
@@ -267,54 +288,54 @@ trait SparkDateTimeUtils {
 
   /**
    * Converts an instance of `java.sql.Timestamp` to the number of microseconds since
-   * 1970-01-01T00:00:00.000000Z. It extracts date-time fields from the input, builds
-   * a local timestamp in Proleptic Gregorian calendar from the fields, and binds
-   * the timestamp to the system time zone. The resulted instant is converted to
-   * microseconds since the epoch.
+   * 1970-01-01T00:00:00.000000Z. It extracts date-time fields from the input, builds a local
+   * timestamp in Proleptic Gregorian calendar from the fields, and binds the timestamp to the
+   * system time zone. The resulted instant is converted to microseconds since the epoch.
    *
-   * The conversion is performed via the system time zone because it is used internally
-   * in `java.sql.Timestamp` while extracting date-time fields.
+   * The conversion is performed via the system time zone because it is used internally in
+   * `java.sql.Timestamp` while extracting date-time fields.
    *
    * The goal of the function is to have the same local date-time in the original calendar
-   * - the hybrid calendar (Julian + Gregorian) and in the target calendar which is
-   * Proleptic Gregorian calendar, see SPARK-26651.
+   *   - the hybrid calendar (Julian + Gregorian) and in the target calendar which is Proleptic
+   *     Gregorian calendar, see SPARK-26651.
    *
-   * @param t It represents a specific instant in time based on
-   *          the hybrid calendar which combines Julian and
-   *          Gregorian calendars.
-   * @return The number of micros since epoch from `java.sql.Timestamp`.
+   * @param t
+   *   It represents a specific instant in time based on the hybrid calendar which combines Julian
+   *   and Gregorian calendars.
+   * @return
+   *   The number of micros since epoch from `java.sql.Timestamp`.
    */
   def fromJavaTimestamp(t: Timestamp): Long =
     rebaseJulianToGregorianMicros(fromJavaTimestampNoRebase(t))
 
+  def fromJavaTimestamp(timeZoneId: String, t: Timestamp): Long =
+    rebaseJulianToGregorianMicros(timeZoneId, fromJavaTimestampNoRebase(t))
+
   /**
    * Converts an instance of `java.sql.Timestamp` to the number of microseconds since
    * 1970-01-01T00:00:00.000000Z.
    *
-   * @param t an instance of `java.sql.Timestamp`.
-   * @return The number of micros since epoch from `java.sql.Timestamp`.
+   * @param t
+   *   an instance of `java.sql.Timestamp`.
+   * @return
+   *   The number of micros since epoch from `java.sql.Timestamp`.
    */
   def fromJavaTimestampNoRebase(t: Timestamp): Long =
     millisToMicros(t.getTime) + (t.getNanos / NANOS_PER_MICROS) % MICROS_PER_MILLIS
 
   /**
-   * Trims and parses a given UTF8 date string to a corresponding [[Int]] value.
-   * The return type is [[Option]] in order to distinguish between 0 and null. The following
-   * formats are allowed:
+   * Trims and parses a given UTF8 date string to a corresponding [[Int]] value. The return type
+   * is [[Option]] in order to distinguish between 0 and null. The following formats are allowed:
    *
-   * `[+-]yyyy*`
-   * `[+-]yyyy*-[m]m`
-   * `[+-]yyyy*-[m]m-[d]d`
-   * `[+-]yyyy*-[m]m-[d]d `
-   * `[+-]yyyy*-[m]m-[d]d *`
-   * `[+-]yyyy*-[m]m-[d]dT*`
+   * `[+-]yyyy*` `[+-]yyyy*-[m]m` `[+-]yyyy*-[m]m-[d]d` `[+-]yyyy*-[m]m-[d]d `
+   * `[+-]yyyy*-[m]m-[d]d *` `[+-]yyyy*-[m]m-[d]dT*`
    */
   def stringToDate(s: UTF8String): Option[Int] = {
     def isValidDigits(segment: Int, digits: Int): Boolean = {
       // An integer is able to represent a date within [+-]5 million years.
       val maxDigitsYear = 7
       (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
-        (segment != 0 && digits > 0 && digits <= 2)
+      (segment != 0 && digits > 0 && digits <= 2)
     }
     if (s == null) {
       return None
@@ -374,24 +395,18 @@ trait SparkDateTimeUtils {
     }
   }
 
-  def stringToDateAnsi(
-      s: UTF8String,
-      context: QueryContext = null): Int = {
+  def stringToDateAnsi(s: UTF8String, context: QueryContext = null): Int = {
     stringToDate(s).getOrElse {
       throw ExecutionErrors.invalidInputInCastToDatetimeError(s, DateType, context)
     }
   }
 
   /**
-   * Trims and parses a given UTF8 timestamp string to the corresponding timestamp segments,
-   * time zone id and whether it is just time without a date.
-   * value. The return type is [[Option]] in order to distinguish between 0L and null. The following
-   * formats are allowed:
+   * Trims and parses a given UTF8 timestamp string to the corresponding timestamp segments, time
+   * zone id and whether it is just time without a date. value. The return type is [[Option]] in
+   * order to distinguish between 0L and null. The following formats are allowed:
    *
-   * `[+-]yyyy*`
-   * `[+-]yyyy*-[m]m`
-   * `[+-]yyyy*-[m]m-[d]d`
-   * `[+-]yyyy*-[m]m-[d]d `
+   * `[+-]yyyy*` `[+-]yyyy*-[m]m` `[+-]yyyy*-[m]m-[d]d` `[+-]yyyy*-[m]m-[d]d `
    * `[+-]yyyy*-[m]m-[d]d [h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
    * `[+-]yyyy*-[m]m-[d]dT[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
    * `[h]h:[m]m:[s]s.[ms][ms][ms][us][us][us][zone_id]`
@@ -401,16 +416,17 @@ trait SparkDateTimeUtils {
    *   - Z - Zulu time zone UTC+0
    *   - +|-[h]h:[m]m
    *   - A short id, see https://docs.oracle.com/javase/8/docs/api/java/time/ZoneId.html#SHORT_IDS
-   *   - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-,
-   *     and a suffix in the formats:
+   *   - An id with one of the prefixes UTC+, UTC-, GMT+, GMT-, UT+ or UT-, and a suffix in the
+   *     formats:
    *     - +|-h[h]
    *     - +|-hh[:]mm
    *     - +|-hh:mm:ss
    *     - +|-hhmmss
-   *  - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
+   *   - Region-based zone IDs in the form `area/city`, such as `Europe/Paris`
    *
-   * @return timestamp segments, time zone id and whether the input is just time without a date. If
-   *         the input string can't be parsed as timestamp, the result timestamp segments are empty.
+   * @return
+   *   timestamp segments, time zone id and whether the input is just time without a date. If the
+   *   input string can't be parsed as timestamp, the result timestamp segments are empty.
    */
   def parseTimestampString(s: UTF8String): (Array[Int], Option[ZoneId], Boolean) = {
     def isValidDigits(segment: Int, digits: Int): Boolean = {
@@ -418,9 +434,9 @@ trait SparkDateTimeUtils {
       val maxDigitsYear = 6
       // For the nanosecond part, more than 6 digits is allowed, but will be truncated.
       segment == 6 || (segment == 0 && digits >= 4 && digits <= maxDigitsYear) ||
-        // For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
-        (segment == 7 && digits <= 2) ||
-        (segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
+      // For the zoneId segment(7), it's could be zero digits when it's a region-based zone ID
+      (segment == 7 && digits <= 2) ||
+      (segment != 0 && segment != 6 && segment != 7 && digits > 0 && digits <= 2)
     }
     if (s == null) {
       return (Array.empty, None, false)
@@ -517,7 +533,7 @@ trait SparkDateTimeUtils {
             tz = Some(new String(bytes, j, strEndTrimmed - j))
             j = strEndTrimmed - 1
           }
-          if (i == 6  && b != '.') {
+          if (i == 6 && b != '.') {
             i += 1
           }
         } else {
@@ -606,11 +622,11 @@ trait SparkDateTimeUtils {
    *
    * If the input string contains a component associated with time zone, the method will return
    * `None` if `allowTimeZone` is set to `false`. If `allowTimeZone` is set to `true`, the method
-   * will simply discard the time zone component. Enable the check to detect situations like parsing
-   * a timestamp with time zone as TimestampNTZType.
+   * will simply discard the time zone component. Enable the check to detect situations like
+   * parsing a timestamp with time zone as TimestampNTZType.
    *
-   * The return type is [[Option]] in order to distinguish between 0L and null. Please
-   * refer to `parseTimestampString` for the allowed formats.
+   * The return type is [[Option]] in order to distinguish between 0L and null. Please refer to
+   * `parseTimestampString` for the allowed formats.
    */
   def stringToTimestampWithoutTimeZone(s: UTF8String, allowTimeZone: Boolean): Option[Long] = {
     try {
@@ -631,10 +647,13 @@ trait SparkDateTimeUtils {
   }
 
   /**
-   * Returns the index of the first non-whitespace and non-ISO control character in the byte array.
+   * Returns the index of the first non-whitespace and non-ISO control character in the byte
+   * array.
    *
-   * @param bytes The byte array to be processed.
-   * @return The start index after trimming.
+   * @param bytes
+   *   The byte array to be processed.
+   * @return
+   *   The start index after trimming.
    */
   @inline private def getTrimmedStart(bytes: Array[Byte]) = {
     var start = 0
@@ -649,9 +668,12 @@ trait SparkDateTimeUtils {
   /**
    * Returns the index of the last non-whitespace and non-ISO control character in the byte array.
    *
-   * @param start The starting index for the search.
-   * @param bytes The byte array to be processed.
-   * @return The end index after trimming.
+   * @param start
+   *   The starting index for the search.
+   * @param bytes
+   *   The byte array to be processed.
+   * @return
+   *   The end index after trimming.
    */
   @inline private def getTrimmedEnd(start: Int, bytes: Array[Byte]) = {
     var end = bytes.length - 1
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkIntervalUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkIntervalUtils.scala
index 5e236187a4a0f..b8387b78ae3e2 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkIntervalUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkIntervalUtils.scala
@@ -38,17 +38,16 @@ trait SparkIntervalUtils {
   private final val minDurationSeconds = Math.floorDiv(Long.MinValue, MICROS_PER_SECOND)
 
   /**
-   * Converts this duration to the total length in microseconds.
-   * <p>
-   * If this duration is too large to fit in a [[Long]] microseconds, then an
-   * exception is thrown.
-   * <p>
-   * If this duration has greater than microsecond precision, then the conversion
-   * will drop any excess precision information as though the amount in nanoseconds
-   * was subject to integer division by one thousand.
+   * Converts this duration to the total length in microseconds. <p> If this duration is too large
+   * to fit in a [[Long]] microseconds, then an exception is thrown. <p> If this duration has
+   * greater than microsecond precision, then the conversion will drop any excess precision
+   * information as though the amount in nanoseconds was subject to integer division by one
+   * thousand.
    *
-   * @return The total length of the duration in microseconds
-   * @throws ArithmeticException If numeric overflow occurs
+   * @return
+   *   The total length of the duration in microseconds
+   * @throws ArithmeticException
+   *   If numeric overflow occurs
    */
   def durationToMicros(duration: Duration): Long = {
     durationToMicros(duration, DT.SECOND)
@@ -59,7 +58,8 @@ trait SparkIntervalUtils {
     val micros = if (seconds == minDurationSeconds) {
       val microsInSeconds = (minDurationSeconds + 1) * MICROS_PER_SECOND
       val nanoAdjustment = duration.getNano
-      assert(0 <= nanoAdjustment && nanoAdjustment < NANOS_PER_SECOND,
+      assert(
+        0 <= nanoAdjustment && nanoAdjustment < NANOS_PER_SECOND,
         "Duration.getNano() must return the adjustment to the seconds field " +
           "in the range from 0 to 999999999 nanoseconds, inclusive.")
       Math.addExact(microsInSeconds, (nanoAdjustment - NANOS_PER_SECOND) / NANOS_PER_MICROS)
@@ -77,14 +77,13 @@ trait SparkIntervalUtils {
   }
 
   /**
-   * Gets the total number of months in this period.
-   * <p>
-   * This returns the total number of months in the period by multiplying the
-   * number of years by 12 and adding the number of months.
-   * <p>
+   * Gets the total number of months in this period. <p> This returns the total number of months
+   * in the period by multiplying the number of years by 12 and adding the number of months. <p>
    *
-   * @return The total number of months in the period, may be negative
-   * @throws ArithmeticException If numeric overflow occurs
+   * @return
+   *   The total number of months in the period, may be negative
+   * @throws ArithmeticException
+   *   If numeric overflow occurs
    */
   def periodToMonths(period: Period): Int = {
     periodToMonths(period, YM.MONTH)
@@ -103,39 +102,41 @@ trait SparkIntervalUtils {
   /**
    * Obtains a [[Duration]] representing a number of microseconds.
    *
-   * @param micros The number of microseconds, positive or negative
-   * @return A [[Duration]], not null
+   * @param micros
+   *   The number of microseconds, positive or negative
+   * @return
+   *   A [[Duration]], not null
    */
   def microsToDuration(micros: Long): Duration = Duration.of(micros, ChronoUnit.MICROS)
 
   /**
-   * Obtains a [[Period]] representing a number of months. The days unit will be zero, and the years
-   * and months units will be normalized.
+   * Obtains a [[Period]] representing a number of months. The days unit will be zero, and the
+   * years and months units will be normalized.
    *
-   * <p>
-   * The months unit is adjusted to have an absolute value < 12, with the years unit being adjusted
-   * to compensate. For example, the method returns "2 years and 3 months" for the 27 input months.
-   * <p>
-   * The sign of the years and months units will be the same after normalization.
-   * For example, -13 months will be converted to "-1 year and -1 month".
+   * <p> The months unit is adjusted to have an absolute value < 12, with the years unit being
+   * adjusted to compensate. For example, the method returns "2 years and 3 months" for the 27
+   * input months. <p> The sign of the years and months units will be the same after
+   * normalization. For example, -13 months will be converted to "-1 year and -1 month".
    *
-   * @param months The number of months, positive or negative
-   * @return The period of months, not null
+   * @param months
+   *   The number of months, positive or negative
+   * @return
+   *   The period of months, not null
    */
   def monthsToPeriod(months: Int): Period = Period.ofMonths(months).normalized()
 
   /**
    * Converts a string to [[CalendarInterval]] case-insensitively.
    *
-   * @throws IllegalArgumentException if the input string is not in valid interval format.
+   * @throws IllegalArgumentException
+   *   if the input string is not in valid interval format.
    */
   def stringToInterval(input: UTF8String): CalendarInterval = {
     import ParseState._
     if (input == null) {
       throw new SparkIllegalArgumentException(
         errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_NULL",
-        messageParameters = Map(
-          "input" -> "null"))
+        messageParameters = Map("input" -> "null"))
     }
     // scalastyle:off caselocale .toLowerCase
     val s = input.trimAll().toLowerCase
@@ -144,8 +145,7 @@ trait SparkIntervalUtils {
     if (bytes.isEmpty) {
       throw new SparkIllegalArgumentException(
         errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
-        messageParameters = Map(
-          "input" -> input.toString))
+        messageParameters = Map("input" -> input.toString))
     }
     var state = PREFIX
     var i = 0
@@ -182,14 +182,11 @@ trait SparkIntervalUtils {
             if (s.numBytes() == intervalStr.numBytes()) {
               throw new SparkIllegalArgumentException(
                 errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
-                messageParameters = Map(
-                  "input" -> input.toString))
+                messageParameters = Map("input" -> input.toString))
             } else if (!Character.isWhitespace(bytes(i + intervalStr.numBytes()))) {
               throw new SparkIllegalArgumentException(
                 errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PREFIX",
-                messageParameters = Map(
-                  "input" -> input.toString,
-                  "prefix" -> currentWord))
+                messageParameters = Map("input" -> input.toString, "prefix" -> currentWord))
             } else {
               i += intervalStr.numBytes() + 1
             }
@@ -224,11 +221,10 @@ trait SparkIntervalUtils {
               pointPrefixed = true
               i += 1
               state = VALUE_FRACTIONAL_PART
-            case _ => throw new SparkIllegalArgumentException(
-              errorClass = "INVALID_INTERVAL_FORMAT.UNRECOGNIZED_NUMBER",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "number" -> currentWord))
+            case _ =>
+              throw new SparkIllegalArgumentException(
+                errorClass = "INVALID_INTERVAL_FORMAT.UNRECOGNIZED_NUMBER",
+                messageParameters = Map("input" -> input.toString, "number" -> currentWord))
           }
         case TRIM_BEFORE_VALUE => trimToNextState(b, VALUE)
         case VALUE =>
@@ -237,20 +233,19 @@ trait SparkIntervalUtils {
               try {
                 currentValue = Math.addExact(Math.multiplyExact(10, currentValue), (b - '0'))
               } catch {
-                case e: ArithmeticException => throw new SparkIllegalArgumentException(
-                  errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
-                  messageParameters = Map(
-                    "input" -> input.toString))
+                case e: ArithmeticException =>
+                  throw new SparkIllegalArgumentException(
+                    errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
+                    messageParameters = Map("input" -> input.toString))
               }
             case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_UNIT
             case '.' =>
               fractionScale = initialFractionScale
               state = VALUE_FRACTIONAL_PART
-            case _ => throw new SparkIllegalArgumentException(
-              errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "value" -> currentWord))
+            case _ =>
+              throw new SparkIllegalArgumentException(
+                errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
+                messageParameters = Map("input" -> input.toString, "value" -> currentWord))
           }
           i += 1
         case VALUE_FRACTIONAL_PART =>
@@ -264,15 +259,11 @@ trait SparkIntervalUtils {
           } else if ('0' <= b && b <= '9') {
             throw new SparkIllegalArgumentException(
               errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PRECISION",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "value" -> currentWord))
+              messageParameters = Map("input" -> input.toString, "value" -> currentWord))
           } else {
             throw new SparkIllegalArgumentException(
               errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "value" -> currentWord))
+              messageParameters = Map("input" -> input.toString, "value" -> currentWord))
           }
           i += 1
         case TRIM_BEFORE_UNIT => trimToNextState(b, UNIT_BEGIN)
@@ -281,9 +272,7 @@ trait SparkIntervalUtils {
           if (b != 's' && fractionScale >= 0) {
             throw new SparkIllegalArgumentException(
               errorClass = "INVALID_INTERVAL_FORMAT.INVALID_FRACTION",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "unit" -> currentWord))
+              messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
           }
           if (isNegative) {
             currentValue = -currentValue
@@ -328,44 +317,38 @@ trait SparkIntervalUtils {
                 } else {
                   throw new SparkIllegalArgumentException(
                     errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
-                    messageParameters = Map(
-                      "input" -> input.toString,
-                      "unit" -> currentWord))
+                    messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
                 }
-              case _ => throw new SparkIllegalArgumentException(
-                errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
-                messageParameters = Map(
-                  "input" -> input.toString,
-                  "unit" -> currentWord))
+              case _ =>
+                throw new SparkIllegalArgumentException(
+                  errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
+                  messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
             }
           } catch {
-            case e: ArithmeticException => throw new SparkIllegalArgumentException(
-              errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
-              messageParameters = Map(
-                "input" -> input.toString))
+            case e: ArithmeticException =>
+              throw new SparkIllegalArgumentException(
+                errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
+                messageParameters = Map("input" -> input.toString))
           }
           state = UNIT_SUFFIX
         case UNIT_SUFFIX =>
           b match {
             case 's' => state = UNIT_END
             case _ if Character.isWhitespace(b) => state = TRIM_BEFORE_SIGN
-            case _ => throw new SparkIllegalArgumentException(
-              errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "unit" -> currentWord))
+            case _ =>
+              throw new SparkIllegalArgumentException(
+                errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
+                messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
           }
           i += 1
         case UNIT_END =>
-          if (Character.isWhitespace(b) ) {
+          if (Character.isWhitespace(b)) {
             i += 1
             state = TRIM_BEFORE_SIGN
           } else {
             throw new SparkIllegalArgumentException(
               errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
-              messageParameters = Map(
-                "input" -> input.toString,
-                "unit" -> currentWord))
+              messageParameters = Map("input" -> input.toString, "unit" -> currentWord))
           }
       }
     }
@@ -373,36 +356,37 @@ trait SparkIntervalUtils {
     val result = state match {
       case UNIT_SUFFIX | UNIT_END | TRIM_BEFORE_SIGN =>
         new CalendarInterval(months, days, microseconds)
-      case TRIM_BEFORE_VALUE => throw new SparkIllegalArgumentException(
-        errorClass = "INVALID_INTERVAL_FORMAT.MISSING_NUMBER",
-        messageParameters = Map(
-          "input" -> input.toString,
-          "word" -> currentWord))
+      case TRIM_BEFORE_VALUE =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "INVALID_INTERVAL_FORMAT.MISSING_NUMBER",
+          messageParameters = Map("input" -> input.toString, "word" -> currentWord))
       case VALUE | VALUE_FRACTIONAL_PART =>
         throw new SparkIllegalArgumentException(
           errorClass = "INVALID_INTERVAL_FORMAT.MISSING_UNIT",
-          messageParameters = Map(
-            "input" -> input.toString,
-            "word" -> currentWord))
-      case _ => throw new SparkIllegalArgumentException(
-        errorClass = "INVALID_INTERVAL_FORMAT.UNKNOWN_PARSING_ERROR",
-        messageParameters = Map(
-          "input" -> input.toString,
-          "word" -> currentWord))
+          messageParameters = Map("input" -> input.toString, "word" -> currentWord))
+      case _ =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "INVALID_INTERVAL_FORMAT.UNKNOWN_PARSING_ERROR",
+          messageParameters = Map("input" -> input.toString, "word" -> currentWord))
     }
 
     result
   }
 
   /**
-   * Converts an year-month interval as a number of months to its textual representation
-   * which conforms to the ANSI SQL standard.
+   * Converts an year-month interval as a number of months to its textual representation which
+   * conforms to the ANSI SQL standard.
    *
-   * @param months The number of months, positive or negative
-   * @param style The style of textual representation of the interval
-   * @param startField The start field (YEAR or MONTH) which the interval comprises of.
-   * @param endField The end field (YEAR or MONTH) which the interval comprises of.
-   * @return Year-month interval string
+   * @param months
+   *   The number of months, positive or negative
+   * @param style
+   *   The style of textual representation of the interval
+   * @param startField
+   *   The start field (YEAR or MONTH) which the interval comprises of.
+   * @param endField
+   *   The end field (YEAR or MONTH) which the interval comprises of.
+   * @return
+   *   Year-month interval string
    */
   def toYearMonthIntervalString(
       months: Int,
@@ -434,14 +418,19 @@ trait SparkIntervalUtils {
   }
 
   /**
-   * Converts a day-time interval as a number of microseconds to its textual representation
-   * which conforms to the ANSI SQL standard.
+   * Converts a day-time interval as a number of microseconds to its textual representation which
+   * conforms to the ANSI SQL standard.
    *
-   * @param micros The number of microseconds, positive or negative
-   * @param style The style of textual representation of the interval
-   * @param startField The start field (DAY, HOUR, MINUTE, SECOND) which the interval comprises of.
-   * @param endField The end field (DAY, HOUR, MINUTE, SECOND) which the interval comprises of.
-   * @return Day-time interval string
+   * @param micros
+   *   The number of microseconds, positive or negative
+   * @param style
+   *   The style of textual representation of the interval
+   * @param startField
+   *   The start field (DAY, HOUR, MINUTE, SECOND) which the interval comprises of.
+   * @param endField
+   *   The end field (DAY, HOUR, MINUTE, SECOND) which the interval comprises of.
+   * @return
+   *   Day-time interval string
    */
   def toDayTimeIntervalString(
       micros: Long,
@@ -514,8 +503,9 @@ trait SparkIntervalUtils {
             rest %= MICROS_PER_MINUTE
           case DT.SECOND =>
             val leadZero = if (rest < 10 * MICROS_PER_SECOND) "0" else ""
-            formatBuilder.append(s"$leadZero" +
-              s"${java.math.BigDecimal.valueOf(rest, 6).stripTrailingZeros.toPlainString}")
+            formatBuilder.append(
+              s"$leadZero" +
+                s"${java.math.BigDecimal.valueOf(rest, 6).stripTrailingZeros.toPlainString}")
         }
 
         if (startField < DT.HOUR && DT.HOUR <= endField) {
@@ -565,20 +555,11 @@ trait SparkIntervalUtils {
   protected val microsStr: UTF8String = unitToUtf8("microsecond")
   protected val nanosStr: UTF8String = unitToUtf8("nanosecond")
 
-
   private object ParseState extends Enumeration {
     type ParseState = Value
 
-    val PREFIX,
-    TRIM_BEFORE_SIGN,
-    SIGN,
-    TRIM_BEFORE_VALUE,
-    VALUE,
-    VALUE_FRACTIONAL_PART,
-    TRIM_BEFORE_UNIT,
-    UNIT_BEGIN,
-    UNIT_SUFFIX,
-    UNIT_END = Value
+    val PREFIX, TRIM_BEFORE_SIGN, SIGN, TRIM_BEFORE_VALUE, VALUE, VALUE_FRACTIONAL_PART,
+        TRIM_BEFORE_UNIT, UNIT_BEGIN, UNIT_SUFFIX, UNIT_END = Value
   }
 }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
index 7597cb1d9087d..01ee899085701 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/SparkParserUtils.scala
@@ -62,8 +62,8 @@ trait SparkParserUtils {
       val secondChar = s.charAt(start + 1)
       val thirdChar = s.charAt(start + 2)
       (firstChar == '0' || firstChar == '1') &&
-        (secondChar >= '0' && secondChar <= '7') &&
-        (thirdChar >= '0' && thirdChar <= '7')
+      (secondChar >= '0' && secondChar <= '7') &&
+      (thirdChar >= '0' && thirdChar <= '7')
     }
 
     val isRawString = {
@@ -97,15 +97,18 @@ trait SparkParserUtils {
             // \u0000 style 16-bit unicode character literals.
             sb.append(Integer.parseInt(b, i + 1, i + 1 + 4, 16).toChar)
             i += 1 + 4
-          } else if (cAfterBackslash == 'U' && i + 1 + 8 <= length && allCharsAreHex(b, i + 1, 8)) {
+          } else if (cAfterBackslash == 'U' && i + 1 + 8 <= length && allCharsAreHex(
+              b,
+              i + 1,
+              8)) {
             // \U00000000 style 32-bit unicode character literals.
             // Use Long to treat codePoint as unsigned in the range of 32-bit.
             val codePoint = JLong.parseLong(b, i + 1, i + 1 + 8, 16)
             if (codePoint < 0x10000) {
-              sb.append((codePoint & 0xFFFF).toChar)
+              sb.append((codePoint & 0xffff).toChar)
             } else {
-              val highSurrogate = (codePoint - 0x10000) / 0x400 + 0xD800
-              val lowSurrogate = (codePoint - 0x10000) % 0x400 + 0xDC00
+              val highSurrogate = (codePoint - 0x10000) / 0x400 + 0xd800
+              val lowSurrogate = (codePoint - 0x10000) % 0x400 + 0xdc00
               sb.append(highSurrogate.toChar)
               sb.append(lowSurrogate.toChar)
             }
@@ -147,8 +150,13 @@ trait SparkParserUtils {
     if (text.isEmpty) {
       CurrentOrigin.set(position(ctx.getStart))
     } else {
-      CurrentOrigin.set(positionAndText(ctx.getStart, ctx.getStop, text.get,
-        current.objectType, current.objectName))
+      CurrentOrigin.set(
+        positionAndText(
+          ctx.getStart,
+          ctx.getStop,
+          text.get,
+          current.objectType,
+          current.objectName))
     }
     try {
       f
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
index edb1ee371b156..0608322be13b3 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala
@@ -24,9 +24,8 @@ import org.apache.spark.unsafe.array.ByteArrayUtils
 import org.apache.spark.util.ArrayImplicits._
 
 /**
- * Concatenation of sequence of strings to final string with cheap append method
- * and one memory allocation for the final string.  Can also bound the final size of
- * the string.
+ * Concatenation of sequence of strings to final string with cheap append method and one memory
+ * allocation for the final string. Can also bound the final size of the string.
  */
 class StringConcat(val maxLength: Int = ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH) {
   protected val strings = new java.util.ArrayList[String]
@@ -35,9 +34,9 @@ class StringConcat(val maxLength: Int = ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH)
   def atLimit: Boolean = length >= maxLength
 
   /**
-   * Appends a string and accumulates its length to allocate a string buffer for all
-   * appended strings once in the toString method.  Returns true if the string still
-   * has room for further appends before it hits its max limit.
+   * Appends a string and accumulates its length to allocate a string buffer for all appended
+   * strings once in the toString method. Returns true if the string still has room for further
+   * appends before it hits its max limit.
    */
   def append(s: String): Unit = {
     if (s != null) {
@@ -56,8 +55,8 @@ class StringConcat(val maxLength: Int = ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH)
   }
 
   /**
-   * The method allocates memory for all appended strings, writes them to the memory and
-   * returns concatenated string.
+   * The method allocates memory for all appended strings, writes them to the memory and returns
+   * concatenated string.
    */
   override def toString: String = {
     val finalLength = if (atLimit) maxLength else length
@@ -68,6 +67,7 @@ class StringConcat(val maxLength: Int = ByteArrayUtils.MAX_ROUNDED_ARRAY_LENGTH)
 }
 
 object SparkStringUtils extends Logging {
+
   /** Whether we have warned about plan string truncation yet. */
   private val truncationWarningPrinted = new AtomicBoolean(false)
 
@@ -75,7 +75,8 @@ object SparkStringUtils extends Logging {
    * Format a sequence with semantics similar to calling .mkString(). Any elements beyond
    * maxNumToStringFields will be dropped and replaced by a "... N more fields" placeholder.
    *
-   * @return the trimmed and formatted string.
+   * @return
+   *   the trimmed and formatted string.
    */
   def truncatedString[T](
       seq: Seq[T],
@@ -90,8 +91,9 @@ object SparkStringUtils extends Logging {
             s"behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.")
       }
       val numFields = math.max(0, maxFields - 1)
-      seq.take(numFields).mkString(
-        start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
+      seq
+        .take(numFields)
+        .mkString(start, sep, sep + "... " + (seq.length - numFields) + " more fields" + end)
     } else {
       seq.mkString(start, sep, end)
     }
@@ -106,8 +108,8 @@ object SparkStringUtils extends Logging {
     HexFormat.of().withDelimiter(" ").withUpperCase()
 
   /**
-   * Returns a pretty string of the byte array which prints each byte as a hex digit and add spaces
-   * between them. For example, [1A C0].
+   * Returns a pretty string of the byte array which prints each byte as a hex digit and add
+   * spaces between them. For example, [1A C0].
    */
   def getHexString(bytes: Array[Byte]): String = {
     s"[${SPACE_DELIMITED_UPPERCASE_HEX.formatHex(bytes)}]"
@@ -122,8 +124,8 @@ object SparkStringUtils extends Logging {
     val leftPadded = left ++ Seq.fill(math.max(right.size - left.size, 0))("")
     val rightPadded = right ++ Seq.fill(math.max(left.size - right.size, 0))("")
 
-    leftPadded.zip(rightPadded).map {
-      case (l, r) => (if (l == r) " " else "!") + l + (" " * ((maxLeftSize - l.length) + 3)) + r
+    leftPadded.zip(rightPadded).map { case (l, r) =>
+      (if (l == r) " " else "!") + l + (" " * ((maxLeftSize - l.length) + 3)) + r
     }
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
index 9f57f8375c54d..4fcb84daf187d 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/TimestampFormatter.scala
@@ -41,14 +41,20 @@ import org.apache.spark.sql.types.{Decimal, TimestampNTZType}
 import org.apache.spark.unsafe.types.UTF8String
 
 sealed trait TimestampFormatter extends Serializable {
+
   /**
    * Parses a timestamp in a string and converts it to microseconds.
    *
-   * @param s - string with timestamp to parse
-   * @return microseconds since epoch.
-   * @throws ParseException can be thrown by legacy parser
-   * @throws DateTimeParseException can be thrown by new parser
-   * @throws DateTimeException unable to obtain local date or time
+   * @param s
+   *   \- string with timestamp to parse
+   * @return
+   *   microseconds since epoch.
+   * @throws ParseException
+   *   can be thrown by legacy parser
+   * @throws DateTimeParseException
+   *   can be thrown by new parser
+   * @throws DateTimeException
+   *   unable to obtain local date or time
    */
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
@@ -58,11 +64,16 @@ sealed trait TimestampFormatter extends Serializable {
   /**
    * Parses a timestamp in a string and converts it to an optional number of microseconds.
    *
-   * @param s - string with timestamp to parse
-   * @return An optional number of microseconds since epoch. The result is None on invalid input.
-   * @throws ParseException can be thrown by legacy parser
-   * @throws DateTimeParseException can be thrown by new parser
-   * @throws DateTimeException unable to obtain local date or time
+   * @param s
+   *   \- string with timestamp to parse
+   * @return
+   *   An optional number of microseconds since epoch. The result is None on invalid input.
+   * @throws ParseException
+   *   can be thrown by legacy parser
+   * @throws DateTimeParseException
+   *   can be thrown by new parser
+   * @throws DateTimeException
+   *   unable to obtain local date or time
    */
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
@@ -75,16 +86,24 @@ sealed trait TimestampFormatter extends Serializable {
     }
 
   /**
-   * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local time.
+   * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local
+   * time.
    *
-   * @param s - string with timestamp to parse
-   * @param allowTimeZone - indicates strict parsing of timezone
-   * @return microseconds since epoch.
-   * @throws ParseException can be thrown by legacy parser
-   * @throws DateTimeParseException can be thrown by new parser
-   * @throws DateTimeException unable to obtain local date or time
-   * @throws IllegalStateException The formatter for timestamp without time zone should always
-   *                               implement this method. The exception should never be hit.
+   * @param s
+   *   \- string with timestamp to parse
+   * @param allowTimeZone
+   *   \- indicates strict parsing of timezone
+   * @return
+   *   microseconds since epoch.
+   * @throws ParseException
+   *   can be thrown by legacy parser
+   * @throws DateTimeParseException
+   *   can be thrown by new parser
+   * @throws DateTimeException
+   *   unable to obtain local date or time
+   * @throws IllegalStateException
+   *   The formatter for timestamp without time zone should always implement this method. The
+   *   exception should never be hit.
    */
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
@@ -99,14 +118,21 @@ sealed trait TimestampFormatter extends Serializable {
    * Parses a timestamp in a string and converts it to an optional number of microseconds since
    * Unix Epoch in local time.
    *
-   * @param s - string with timestamp to parse
-   * @param allowTimeZone - indicates strict parsing of timezone
-   * @return An optional number of microseconds since epoch. The result is None on invalid input.
-   * @throws ParseException can be thrown by legacy parser
-   * @throws DateTimeParseException can be thrown by new parser
-   * @throws DateTimeException unable to obtain local date or time
-   * @throws IllegalStateException The formatter for timestamp without time zone should always
-   *                               implement this method. The exception should never be hit.
+   * @param s
+   *   \- string with timestamp to parse
+   * @param allowTimeZone
+   *   \- indicates strict parsing of timezone
+   * @return
+   *   An optional number of microseconds since epoch. The result is None on invalid input.
+   * @throws ParseException
+   *   can be thrown by legacy parser
+   * @throws DateTimeParseException
+   *   can be thrown by new parser
+   * @throws DateTimeException
+   *   unable to obtain local date or time
+   * @throws IllegalStateException
+   *   The formatter for timestamp without time zone should always implement this method. The
+   *   exception should never be hit.
    */
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
@@ -120,8 +146,8 @@ sealed trait TimestampFormatter extends Serializable {
     }
 
   /**
-   * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local time.
-   * Zone-id and zone-offset components are ignored.
+   * Parses a timestamp in a string and converts it to microseconds since Unix Epoch in local
+   * time. Zone-id and zone-offset components are ignored.
    */
   @throws(classOf[ParseException])
   @throws(classOf[DateTimeParseException])
@@ -144,9 +170,10 @@ sealed trait TimestampFormatter extends Serializable {
 
   /**
    * Validates the pattern string.
-   * @param checkLegacy  if true and the pattern is invalid, check whether the pattern is valid for
-   *                     legacy formatters and show hints for using legacy formatter.
-   *                     Otherwise, simply check the pattern string.
+   * @param checkLegacy
+   *   if true and the pattern is invalid, check whether the pattern is valid for legacy
+   *   formatters and show hints for using legacy formatter. Otherwise, simply check the pattern
+   *   string.
    */
   def validatePatternString(checkLegacy: Boolean): Unit
 }
@@ -157,7 +184,8 @@ class Iso8601TimestampFormatter(
     locale: Locale,
     legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
     isParsing: Boolean)
-  extends TimestampFormatter with DateTimeFormatterHelper {
+    extends TimestampFormatter
+    with DateTimeFormatterHelper {
   @transient
   protected lazy val formatter: DateTimeFormatter =
     getOrCreateFormatter(pattern, locale, isParsing)
@@ -166,8 +194,8 @@ class Iso8601TimestampFormatter(
   private lazy val zonedFormatter: DateTimeFormatter = formatter.withZone(zoneId)
 
   @transient
-  protected lazy val legacyFormatter = TimestampFormatter.getLegacyFormatter(
-    pattern, zoneId, locale, legacyFormat)
+  protected lazy val legacyFormatter =
+    TimestampFormatter.getLegacyFormatter(pattern, zoneId, locale, legacyFormat)
 
   override def parseOptional(s: String): Option[Long] = {
     try {
@@ -235,8 +263,8 @@ class Iso8601TimestampFormatter(
   override def format(instant: Instant): String = {
     try {
       zonedFormatter.format(instant)
-    } catch checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)),
-      (t: Timestamp) => format(t))
+    } catch
+      checkFormattedDiff(toJavaTimestamp(instantToMicros(instant)), (t: Timestamp) => format(t))
   }
 
   override def format(us: Long): String = {
@@ -256,8 +284,8 @@ class Iso8601TimestampFormatter(
     if (checkLegacy) {
       try {
         formatter
-      } catch checkLegacyFormatter(pattern,
-        legacyFormatter.validatePatternString(checkLegacy = true))
+      } catch
+        checkLegacyFormatter(pattern, legacyFormatter.validatePatternString(checkLegacy = true))
       ()
     } else {
       try {
@@ -268,22 +296,30 @@ class Iso8601TimestampFormatter(
 }
 
 /**
- * The formatter for timestamps which doesn't require users to specify a pattern. While formatting,
- * it uses the default pattern [[TimestampFormatter.defaultPattern()]]. In parsing, it follows
- * the CAST logic in conversion of strings to Catalyst's TimestampType.
+ * The formatter for timestamps which doesn't require users to specify a pattern. While
+ * formatting, it uses the default pattern [[TimestampFormatter.defaultPattern()]]. In parsing, it
+ * follows the CAST logic in conversion of strings to Catalyst's TimestampType.
  *
- * @param zoneId The time zone ID in which timestamps should be formatted or parsed.
- * @param locale The locale overrides the system locale and is used in formatting.
- * @param legacyFormat Defines the formatter used for legacy timestamps.
- * @param isParsing Whether the formatter is used for parsing (`true`) or for formatting (`false`).
+ * @param zoneId
+ *   The time zone ID in which timestamps should be formatted or parsed.
+ * @param locale
+ *   The locale overrides the system locale and is used in formatting.
+ * @param legacyFormat
+ *   Defines the formatter used for legacy timestamps.
+ * @param isParsing
+ *   Whether the formatter is used for parsing (`true`) or for formatting (`false`).
  */
 class DefaultTimestampFormatter(
     zoneId: ZoneId,
     locale: Locale,
     legacyFormat: LegacyDateFormat = LENIENT_SIMPLE_DATE_FORMAT,
     isParsing: Boolean)
-  extends Iso8601TimestampFormatter(
-    TimestampFormatter.defaultPattern(), zoneId, locale, legacyFormat, isParsing) {
+    extends Iso8601TimestampFormatter(
+      TimestampFormatter.defaultPattern(),
+      zoneId,
+      locale,
+      legacyFormat,
+      isParsing) {
 
   override def parse(s: String): Long = {
     try {
@@ -299,7 +335,9 @@ class DefaultTimestampFormatter(
       val utf8Value = UTF8String.fromString(s)
       SparkDateTimeUtils.stringToTimestampWithoutTimeZone(utf8Value, allowTimeZone).getOrElse {
         throw ExecutionErrors.cannotParseStringAsDataTypeError(
-          TimestampFormatter.defaultPattern(), s, TimestampNTZType)
+          TimestampFormatter.defaultPattern(),
+          s,
+          TimestampNTZType)
       }
     } catch checkParsedDiff(s, legacyFormatter.parse)
   }
@@ -311,20 +349,21 @@ class DefaultTimestampFormatter(
 }
 
 /**
- * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd HH:mm:ss.[..fff..]`
- * where `[..fff..]` is a fraction of second up to microsecond resolution. The formatter does not
- * output trailing zeros in the fraction. For example, the timestamp `2019-03-05 15:00:01.123400` is
- * formatted as the string `2019-03-05 15:00:01.1234`.
+ * The formatter parses/formats timestamps according to the pattern `yyyy-MM-dd
+ * HH:mm:ss.[..fff..]` where `[..fff..]` is a fraction of second up to microsecond resolution. The
+ * formatter does not output trailing zeros in the fraction. For example, the timestamp
+ * `2019-03-05 15:00:01.123400` is formatted as the string `2019-03-05 15:00:01.1234`.
  *
- * @param zoneId the time zone identifier in which the formatter parses or format timestamps
+ * @param zoneId
+ *   the time zone identifier in which the formatter parses or format timestamps
  */
 class FractionTimestampFormatter(zoneId: ZoneId)
-  extends Iso8601TimestampFormatter(
-    TimestampFormatter.defaultPattern(),
-    zoneId,
-    TimestampFormatter.defaultLocale,
-    LegacyDateFormats.FAST_DATE_FORMAT,
-    isParsing = false) {
+    extends Iso8601TimestampFormatter(
+      TimestampFormatter.defaultPattern(),
+      zoneId,
+      TimestampFormatter.defaultLocale,
+      LegacyDateFormats.FAST_DATE_FORMAT,
+      isParsing = false) {
 
   @transient
   override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter
@@ -366,16 +405,14 @@ class FractionTimestampFormatter(zoneId: ZoneId)
 }
 
 /**
- * The custom sub-class of `GregorianCalendar` is needed to get access to
- * protected `fields` immediately after parsing. We cannot use
- * the `get()` method because it performs normalization of the fraction
- * part. Accordingly, the `MILLISECOND` field doesn't contain original value.
+ * The custom sub-class of `GregorianCalendar` is needed to get access to protected `fields`
+ * immediately after parsing. We cannot use the `get()` method because it performs normalization
+ * of the fraction part. Accordingly, the `MILLISECOND` field doesn't contain original value.
  *
- * Also this class allows to set raw value to the `MILLISECOND` field
- * directly before formatting.
+ * Also this class allows to set raw value to the `MILLISECOND` field directly before formatting.
  */
 class MicrosCalendar(tz: TimeZone, digitsInFraction: Int)
-  extends GregorianCalendar(tz, Locale.US) {
+    extends GregorianCalendar(tz, Locale.US) {
   // Converts parsed `MILLISECOND` field to seconds fraction in microsecond precision.
   // For example if the fraction pattern is `SSSS` then `digitsInFraction` = 4, and
   // if the `MILLISECOND` field was parsed to `1234`.
@@ -397,16 +434,13 @@ class MicrosCalendar(tz: TimeZone, digitsInFraction: Int)
   }
 }
 
-class LegacyFastTimestampFormatter(
-    pattern: String,
-    zoneId: ZoneId,
-    locale: Locale) extends TimestampFormatter {
+class LegacyFastTimestampFormatter(pattern: String, zoneId: ZoneId, locale: Locale)
+    extends TimestampFormatter {
 
   @transient private lazy val fastDateFormat =
     FastDateFormat.getInstance(pattern, TimeZone.getTimeZone(zoneId), locale)
-  @transient private lazy val cal = new MicrosCalendar(
-    fastDateFormat.getTimeZone,
-    fastDateFormat.getPattern.count(_ == 'S'))
+  @transient private lazy val cal =
+    new MicrosCalendar(fastDateFormat.getTimeZone, fastDateFormat.getPattern.count(_ == 'S'))
 
   override def parse(s: String): Long = {
     cal.clear() // Clear the calendar because it can be re-used many times
@@ -435,11 +469,11 @@ class LegacyFastTimestampFormatter(
     val micros = cal.getMicros()
     cal.set(Calendar.MILLISECOND, 0)
     val julianMicros = Math.addExact(millisToMicros(cal.getTimeInMillis), micros)
-    rebaseJulianToGregorianMicros(julianMicros)
+    rebaseJulianToGregorianMicros(TimeZone.getTimeZone(zoneId), julianMicros)
   }
 
   override def format(timestamp: Long): String = {
-    val julianMicros = rebaseGregorianToJulianMicros(timestamp)
+    val julianMicros = rebaseGregorianToJulianMicros(TimeZone.getTimeZone(zoneId), timestamp)
     cal.setTimeInMillis(Math.floorDiv(julianMicros, MICROS_PER_SECOND) * MILLIS_PER_SECOND)
     cal.setMicros(Math.floorMod(julianMicros, MICROS_PER_SECOND))
     fastDateFormat.format(cal)
@@ -449,7 +483,7 @@ class LegacyFastTimestampFormatter(
     if (ts.getNanos == 0) {
       fastDateFormat.format(ts)
     } else {
-      format(fromJavaTimestamp(ts))
+      format(fromJavaTimestamp(zoneId.getId, ts))
     }
   }
 
@@ -464,7 +498,8 @@ class LegacySimpleTimestampFormatter(
     pattern: String,
     zoneId: ZoneId,
     locale: Locale,
-    lenient: Boolean = true) extends TimestampFormatter {
+    lenient: Boolean = true)
+    extends TimestampFormatter {
   @transient private lazy val sdf = {
     val formatter = new SimpleDateFormat(pattern, locale)
     formatter.setTimeZone(TimeZone.getTimeZone(zoneId))
@@ -473,7 +508,7 @@ class LegacySimpleTimestampFormatter(
   }
 
   override def parse(s: String): Long = {
-    fromJavaTimestamp(new Timestamp(sdf.parse(s).getTime))
+    fromJavaTimestamp(zoneId.getId, new Timestamp(sdf.parse(s).getTime))
   }
 
   override def parseOptional(s: String): Option[Long] = {
@@ -481,12 +516,12 @@ class LegacySimpleTimestampFormatter(
     if (date == null) {
       None
     } else {
-      Some(fromJavaTimestamp(new Timestamp(date.getTime)))
+      Some(fromJavaTimestamp(zoneId.getId, new Timestamp(date.getTime)))
     }
   }
 
   override def format(us: Long): String = {
-    sdf.format(toJavaTimestamp(us))
+    sdf.format(toJavaTimestamp(zoneId.getId, us))
   }
 
   override def format(ts: Timestamp): String = {
@@ -586,14 +621,10 @@ object TimestampFormatter {
       legacyFormat: LegacyDateFormat,
       isParsing: Boolean,
       forTimestampNTZ: Boolean): TimestampFormatter = {
-    getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, isParsing,
-      forTimestampNTZ)
+    getFormatter(Some(format), zoneId, defaultLocale, legacyFormat, isParsing, forTimestampNTZ)
   }
 
-  def apply(
-      format: String,
-      zoneId: ZoneId,
-      isParsing: Boolean): TimestampFormatter = {
+  def apply(format: String, zoneId: ZoneId, isParsing: Boolean): TimestampFormatter = {
     getFormatter(Some(format), zoneId, isParsing = isParsing)
   }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/UDTUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/UDTUtils.scala
index a98aa26d02ef7..73ab43f04a5a0 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/UDTUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/catalyst/util/UDTUtils.scala
@@ -28,11 +28,12 @@ import org.apache.spark.util.SparkClassUtils
  * with catalyst because they (amongst others) require access to Spark SQLs internal data
  * representation.
  *
- * This interface and its companion object provide an escape hatch for working with UDTs from within
- * the api project (e.g. Row.toJSON). The companion will try to bind to an implementation of the
- * interface in catalyst, if none is found it will bind to [[DefaultUDTUtils]].
+ * This interface and its companion object provide an escape hatch for working with UDTs from
+ * within the api project (e.g. Row.toJSON). The companion will try to bind to an implementation
+ * of the interface in catalyst, if none is found it will bind to [[DefaultUDTUtils]].
  */
 private[sql] trait UDTUtils {
+
   /**
    * Convert the UDT instance to something that is compatible with [[org.apache.spark.sql.Row]].
    * The returned value must conform to the schema of the UDT.
@@ -41,13 +42,14 @@ private[sql] trait UDTUtils {
 }
 
 private[sql] object UDTUtils extends UDTUtils {
-  private val delegate = try {
-    val cls = SparkClassUtils.classForName("org.apache.spark.sql.catalyst.util.UDTUtilsImpl")
-    cls.getConstructor().newInstance().asInstanceOf[UDTUtils]
-  } catch {
-    case NonFatal(_) =>
-      DefaultUDTUtils
-  }
+  private val delegate =
+    try {
+      val cls = SparkClassUtils.classForName("org.apache.spark.sql.catalyst.util.UDTUtilsImpl")
+      cls.getConstructor().newInstance().asInstanceOf[UDTUtils]
+    } catch {
+      case NonFatal(_) =>
+        DefaultUDTUtils
+    }
 
   override def toRow(value: Any, udt: UserDefinedType[Any]): Any = delegate.toRow(value, udt)
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
index 7c0b3c6cf3083..3e63b8281f739 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/CompilationErrors.scala
@@ -23,9 +23,7 @@ private[sql] trait CompilationErrors extends DataTypeErrorsBase {
   def ambiguousColumnOrFieldError(name: Seq[String], numMatches: Int): AnalysisException = {
     new AnalysisException(
       errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
-      messageParameters = Map(
-        "name" -> toSQLId(name),
-        "n" -> numMatches.toString))
+      messageParameters = Map("name" -> toSQLId(name), "n" -> numMatches.toString))
   }
 
   def columnNotFoundError(colName: String): AnalysisException = {
@@ -49,6 +47,78 @@ private[sql] trait CompilationErrors extends DataTypeErrorsBase {
       messageParameters = Map("filePath" -> filePath),
       cause = Option(cause))
   }
+
+  def usingUntypedScalaUDFError(): Throwable = {
+    new AnalysisException(errorClass = "UNTYPED_SCALA_UDF", messageParameters = Map.empty)
+  }
+
+  def invalidBoundaryStartError(start: Long): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_BOUNDARY.START",
+      messageParameters = Map(
+        "boundary" -> toSQLId("start"),
+        "invalidValue" -> toSQLValue(start),
+        "longMinValue" -> toSQLValue(Long.MinValue),
+        "intMinValue" -> toSQLValue(Int.MinValue),
+        "intMaxValue" -> toSQLValue(Int.MaxValue)))
+  }
+
+  def invalidBoundaryEndError(end: Long): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_BOUNDARY.END",
+      messageParameters = Map(
+        "boundary" -> toSQLId("end"),
+        "invalidValue" -> toSQLValue(end),
+        "longMaxValue" -> toSQLValue(Long.MaxValue),
+        "intMinValue" -> toSQLValue(Int.MinValue),
+        "intMaxValue" -> toSQLValue(Int.MaxValue)))
+  }
+
+  def invalidSaveModeError(saveMode: String): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_SAVE_MODE",
+      messageParameters = Map("mode" -> toDSOption(saveMode)))
+  }
+
+  def sortByWithoutBucketingError(): Throwable = {
+    new AnalysisException(errorClass = "SORT_BY_WITHOUT_BUCKETING", messageParameters = Map.empty)
+  }
+
+  def bucketByUnsupportedByOperationError(operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      messageParameters = Map("operation" -> operation))
+  }
+
+  def bucketByAndSortByUnsupportedByOperationError(operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      messageParameters = Map("operation" -> operation))
+  }
+
+  def operationNotSupportPartitioningError(operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "_LEGACY_ERROR_TEMP_1197",
+      messageParameters = Map("operation" -> operation))
+  }
+
+  def operationNotSupportClusteringError(operation: String): Throwable = {
+    new AnalysisException(
+      errorClass = "CLUSTERING_NOT_SUPPORTED",
+      messageParameters = Map("operation" -> operation))
+  }
+
+  def clusterByWithPartitionedBy(): Throwable = {
+    new AnalysisException(
+      errorClass = "SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED",
+      messageParameters = Map.empty)
+  }
+
+  def clusterByWithBucketing(): Throwable = {
+    new AnalysisException(
+      errorClass = "SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED",
+      messageParameters = Map.empty)
+  }
 }
 
 private[sql] object CompilationErrors extends CompilationErrors
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
index 63413b69f210b..3492421b43786 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrors.scala
@@ -25,9 +25,9 @@ import org.apache.spark.sql.types.{DataType, Decimal, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
- * Object for grouping error messages from (most) exceptions thrown during query execution.
- * This does not include exceptions thrown during the eager execution of commands, which are
- * grouped into [[CompilationErrors]].
+ * Object for grouping error messages from (most) exceptions thrown during query execution. This
+ * does not include exceptions thrown during the eager execution of commands, which are grouped
+ * into [[CompilationErrors]].
  */
 private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   def unsupportedOperationExceptionError(): SparkUnsupportedOperationException = {
@@ -35,13 +35,12 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   }
 
   def decimalPrecisionExceedsMaxPrecisionError(
-      precision: Int, maxPrecision: Int): SparkArithmeticException = {
+      precision: Int,
+      maxPrecision: Int): SparkArithmeticException = {
     new SparkArithmeticException(
       errorClass = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
-      messageParameters = Map(
-        "precision" -> precision.toString,
-        "maxPrecision" -> maxPrecision.toString
-      ),
+      messageParameters =
+        Map("precision" -> precision.toString, "maxPrecision" -> maxPrecision.toString),
       context = Array.empty,
       summary = "")
   }
@@ -53,8 +52,7 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   def outOfDecimalTypeRangeError(str: UTF8String): SparkArithmeticException = {
     new SparkArithmeticException(
       errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
-      messageParameters = Map(
-        "value" -> str.toString),
+      messageParameters = Map("value" -> str.toString),
       context = Array.empty,
       summary = "")
   }
@@ -68,25 +66,20 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   def nullLiteralsCannotBeCastedError(name: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
       errorClass = "_LEGACY_ERROR_TEMP_2226",
-      messageParameters = Map(
-        "name" -> name))
+      messageParameters = Map("name" -> name))
   }
 
   def notUserDefinedTypeError(name: String, userClass: String): Throwable = {
     new SparkException(
       errorClass = "_LEGACY_ERROR_TEMP_2227",
-      messageParameters = Map(
-        "name" -> name,
-        "userClass" -> userClass),
+      messageParameters = Map("name" -> name, "userClass" -> userClass),
       cause = null)
   }
 
   def cannotLoadUserDefinedTypeError(name: String, userClass: String): Throwable = {
     new SparkException(
       errorClass = "_LEGACY_ERROR_TEMP_2228",
-      messageParameters = Map(
-        "name" -> name,
-        "userClass" -> userClass),
+      messageParameters = Map("name" -> name, "userClass" -> userClass),
       cause = null)
   }
 
@@ -99,74 +92,64 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   def schemaFailToParseError(schema: String, e: Throwable): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_SCHEMA.PARSE_ERROR",
-      messageParameters = Map(
-        "inputSchema" -> toSQLSchema(schema),
-        "reason" -> e.getMessage
-      ),
+      messageParameters = Map("inputSchema" -> toSQLSchema(schema), "reason" -> e.getMessage),
       cause = Some(e))
   }
 
   def invalidDayTimeIntervalType(startFieldName: String, endFieldName: String): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1224",
-      messageParameters = Map(
-        "startFieldName" -> startFieldName,
-        "endFieldName" -> endFieldName))
+      messageParameters = Map("startFieldName" -> startFieldName, "endFieldName" -> endFieldName))
   }
 
   def invalidDayTimeField(field: Byte, supportedIds: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1223",
-      messageParameters = Map(
-        "field" -> field.toString,
-        "supportedIds" -> supportedIds.mkString(", ")))
+      messageParameters =
+        Map("field" -> field.toString, "supportedIds" -> supportedIds.mkString(", ")))
   }
 
   def invalidYearMonthField(field: Byte, supportedIds: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1225",
-      messageParameters = Map(
-        "field" -> field.toString,
-        "supportedIds" -> supportedIds.mkString(", ")))
+      messageParameters =
+        Map("field" -> field.toString, "supportedIds" -> supportedIds.mkString(", ")))
   }
 
   def decimalCannotGreaterThanPrecisionError(scale: Int, precision: Int): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1228",
-      messageParameters = Map(
-        "scale" -> scale.toString,
-        "precision" -> precision.toString))
+      messageParameters = Map("scale" -> scale.toString, "precision" -> precision.toString))
   }
 
   def negativeScaleNotAllowedError(scale: Int): Throwable = {
     val sqlConf = QuotingUtils.toSQLConf("spark.sql.legacy.allowNegativeScaleOfDecimal")
-    SparkException.internalError(s"Negative scale is not allowed: ${scale.toString}." +
-      s" Set the config ${sqlConf}" +
-      " to \"true\" to allow it.")
+    SparkException.internalError(
+      s"Negative scale is not allowed: ${scale.toString}." +
+        s" Set the config ${sqlConf}" +
+        " to \"true\" to allow it.")
   }
 
   def attributeNameSyntaxError(name: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1049",
+      errorClass = "INVALID_ATTRIBUTE_NAME_SYNTAX",
       messageParameters = Map("name" -> name))
   }
 
   def cannotMergeIncompatibleDataTypesError(left: DataType, right: DataType): Throwable = {
     new SparkException(
       errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
-      messageParameters = Map(
-        "left" -> toSQLType(left),
-        "right" -> toSQLType(right)),
+      messageParameters = Map("left" -> toSQLType(left), "right" -> toSQLType(right)),
       cause = null)
   }
 
   def cannotMergeDecimalTypesWithIncompatibleScaleError(
-      leftScale: Int, rightScale: Int): Throwable = {
+      leftScale: Int,
+      rightScale: Int): Throwable = {
     new SparkException(
       errorClass = "_LEGACY_ERROR_TEMP_2124",
-      messageParameters = Map(
-        "leftScale" -> leftScale.toString(),
-        "rightScale" -> rightScale.toString()),
+      messageParameters =
+        Map("leftScale" -> leftScale.toString(), "rightScale" -> rightScale.toString()),
       cause = null)
   }
 
@@ -179,9 +162,7 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
   def invalidFieldName(fieldName: Seq[String], path: Seq[String], context: Origin): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_FIELD_NAME",
-      messageParameters = Map(
-        "fieldName" -> toSQLId(fieldName),
-        "path" -> toSQLId(path)),
+      messageParameters = Map("fieldName" -> toSQLId(fieldName), "path" -> toSQLId(path)),
       origin = context)
   }
 
@@ -227,30 +208,26 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
       messageParameters = Map(
         "expression" -> convertedValueStr,
         "sourceType" -> toSQLType(StringType),
-        "targetType" -> toSQLType(to),
-        "ansiConfig" -> toSQLConf("spark.sql.ansi.enabled")),
+        "targetType" -> toSQLType(to)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
 
   def ambiguousColumnOrFieldError(
-      name: Seq[String], numMatches: Int, context: Origin): Throwable = {
+      name: Seq[String],
+      numMatches: Int,
+      context: Origin): Throwable = {
     new AnalysisException(
       errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
-      messageParameters = Map(
-        "name" -> toSQLId(name),
-        "n" -> numMatches.toString),
+      messageParameters = Map("name" -> toSQLId(name), "n" -> numMatches.toString),
       origin = context)
   }
 
   def castingCauseOverflowError(t: String, from: DataType, to: DataType): ArithmeticException = {
     new SparkArithmeticException(
       errorClass = "CAST_OVERFLOW",
-      messageParameters = Map(
-        "value" -> t,
-        "sourceType" -> toSQLType(from),
-        "targetType" -> toSQLType(to),
-        "ansiConfig" -> toSQLConf("spark.sql.ansi.enabled")),
+      messageParameters =
+        Map("value" -> t, "sourceType" -> toSQLType(from), "targetType" -> toSQLType(to)),
       context = Array.empty,
       summary = "")
   }
@@ -267,15 +244,13 @@ private[sql] object DataTypeErrors extends DataTypeErrorsBase {
       messageParameters = Map(
         "methodName" -> "fieldIndex",
         "className" -> "Row",
-        "fieldName" -> toSQLId(fieldName))
-    )
+        "fieldName" -> toSQLId(fieldName)))
   }
 
   def valueIsNullError(index: Int): Throwable = {
-    new SparkException(
-      errorClass = "_LEGACY_ERROR_TEMP_2232",
-      messageParameters = Map(
-        "index" -> index.toString),
+    new SparkRuntimeException(
+      errorClass = "ROW_VALUE_IS_NULL",
+      messageParameters = Map("index" -> index.toString),
       cause = null)
   }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
index 930f92db26826..d22f35b3fe508 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/DataTypeErrorsBase.scala
@@ -96,4 +96,8 @@ private[sql] trait DataTypeErrorsBase {
   def getQueryContext(context: QueryContext): Array[QueryContext] = {
     if (context == null) Array.empty else Array(context)
   }
+
+  def toDSOption(option: String): String = {
+    quoteByDefault(option)
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala
index 7910c386fcf14..fca3ea8fdb908 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/ExecutionErrors.scala
@@ -60,7 +60,8 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
   }
 
   def failToRecognizePatternAfterUpgradeError(
-      pattern: String, e: Throwable): SparkUpgradeException = {
+      pattern: String,
+      e: Throwable): SparkUpgradeException = {
     new SparkUpgradeException(
       errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
       messageParameters = Map(
@@ -73,9 +74,8 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
   def failToRecognizePatternError(pattern: String, e: Throwable): SparkRuntimeException = {
     new SparkRuntimeException(
       errorClass = "_LEGACY_ERROR_TEMP_2130",
-      messageParameters = Map(
-        "pattern" -> toSQLValue(pattern),
-        "docroot" -> SparkBuildInfo.spark_doc_root),
+      messageParameters =
+        Map("pattern" -> toSQLValue(pattern), "docroot" -> SparkBuildInfo.spark_doc_root),
       cause = e)
   }
 
@@ -93,9 +93,9 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
   }
 
   def invalidInputInCastToDatetimeError(
-     value: Double,
-     to: DataType,
-     context: QueryContext): SparkDateTimeException = {
+      value: Double,
+      to: DataType,
+      context: QueryContext): SparkDateTimeException = {
     invalidInputInCastToDatetimeErrorInternal(toSQLValue(value), DoubleType, to, context)
   }
 
@@ -109,18 +109,17 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
       messageParameters = Map(
         "expression" -> sqlValue,
         "sourceType" -> toSQLType(from),
-        "targetType" -> toSQLType(to),
-        "ansiConfig" -> toSQLConf(SqlApiConf.ANSI_ENABLED_KEY)),
+        "targetType" -> toSQLType(to)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
 
   def arithmeticOverflowError(
       message: String,
-      hint: String = "",
+      suggestedFunc: String = "",
       context: QueryContext = null): ArithmeticException = {
-    val alternative = if (hint.nonEmpty) {
-      s" Use '$hint' to tolerate overflow and return NULL instead."
+    val alternative = if (suggestedFunc.nonEmpty) {
+      s" Use '$suggestedFunc' to tolerate overflow and return NULL instead."
     } else ""
     new SparkArithmeticException(
       errorClass = "ARITHMETIC_OVERFLOW",
@@ -132,8 +131,10 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
       summary = getSummary(context))
   }
 
-  def cannotParseStringAsDataTypeError(pattern: String, value: String, dataType: DataType)
-  : Throwable = {
+  def cannotParseStringAsDataTypeError(
+      pattern: String,
+      value: String,
+      dataType: DataType): Throwable = {
     SparkException.internalError(
       s"Cannot parse field value ${toSQLValue(value)} for pattern ${toSQLValue(pattern)} " +
         s"as the target spark data type ${toSQLType(dataType)}.")
@@ -161,46 +162,42 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
   def userDefinedTypeNotAnnotatedAndRegisteredError(udt: UserDefinedType[_]): Throwable = {
     new SparkException(
       errorClass = "_LEGACY_ERROR_TEMP_2155",
-      messageParameters = Map(
-        "userClass" -> udt.userClass.getName),
+      messageParameters = Map("userClass" -> udt.userClass.getName),
       cause = null)
   }
 
   def cannotFindEncoderForTypeError(typeName: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
       errorClass = "ENCODER_NOT_FOUND",
-      messageParameters = Map(
-        "typeName" -> typeName,
-        "docroot" -> SparkBuildInfo.spark_doc_root))
+      messageParameters = Map("typeName" -> typeName, "docroot" -> SparkBuildInfo.spark_doc_root))
   }
 
   def cannotHaveCircularReferencesInBeanClassError(
       clazz: Class[_]): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2138",
-      messageParameters = Map("clazz" -> clazz.toString))
+      errorClass = "CIRCULAR_CLASS_REFERENCE",
+      messageParameters = Map("t" -> toSQLValue(clazz.toString)))
   }
 
   def cannotFindConstructorForTypeError(tpe: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
       errorClass = "_LEGACY_ERROR_TEMP_2144",
-      messageParameters = Map(
-        "tpe" -> tpe))
+      messageParameters = Map("tpe" -> tpe))
   }
 
   def cannotHaveCircularReferencesInClassError(t: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2139",
-      messageParameters = Map("t" -> t))
+      errorClass = "CIRCULAR_CLASS_REFERENCE",
+      messageParameters = Map("t" -> toSQLValue(t)))
   }
 
   def cannotUseInvalidJavaIdentifierAsFieldNameError(
-      fieldName: String, walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = {
+      fieldName: String,
+      walkedTypePath: WalkedTypePath): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2140",
-      messageParameters = Map(
-        "fieldName" -> fieldName,
-        "walkedTypePath" -> walkedTypePath.toString))
+      errorClass = "INVALID_JAVA_IDENTIFIER_AS_FIELD_NAME",
+      messageParameters =
+        Map("fieldName" -> toSQLId(fieldName), "walkedTypePath" -> walkedTypePath.toString))
   }
 
   def primaryConstructorNotFoundError(cls: Class[_]): SparkRuntimeException = {
@@ -213,8 +210,48 @@ private[sql] trait ExecutionErrors extends DataTypeErrorsBase {
   def cannotGetOuterPointerForInnerClassError(innerCls: Class[_]): SparkRuntimeException = {
     new SparkRuntimeException(
       errorClass = "_LEGACY_ERROR_TEMP_2154",
+      messageParameters = Map("innerCls" -> innerCls.getName))
+  }
+
+  def cannotUseKryoSerialization(): SparkRuntimeException = {
+    new SparkRuntimeException(errorClass = "CANNOT_USE_KRYO", messageParameters = Map.empty)
+  }
+
+  def notPublicClassError(name: String): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(
+      errorClass = "_LEGACY_ERROR_TEMP_2229",
+      messageParameters = Map("name" -> name))
+  }
+
+  def primitiveTypesNotSupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException(errorClass = "_LEGACY_ERROR_TEMP_2230")
+  }
+
+  def elementsOfTupleExceedLimitError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException("TUPLE_SIZE_EXCEEDS_LIMIT")
+  }
+
+  def emptyTupleNotSupportedError(): SparkUnsupportedOperationException = {
+    new SparkUnsupportedOperationException("TUPLE_IS_EMPTY")
+  }
+
+  def invalidAgnosticEncoderError(encoder: AnyRef): Throwable = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_AGNOSTIC_ENCODER",
       messageParameters = Map(
-        "innerCls" -> innerCls.getName))
+        "encoderType" -> encoder.getClass.getName,
+        "docroot" -> SparkBuildInfo.spark_doc_root))
+  }
+
+  def zoneOffsetError(
+      timeZone: String,
+      e: java.time.DateTimeException): SparkDateTimeException = {
+    new SparkDateTimeException(
+      errorClass = "INVALID_TIMEZONE",
+      messageParameters = Map("timeZone" -> timeZone),
+      context = Array.empty,
+      summary = "",
+      cause = Some(e))
   }
 }
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
index e7ae9f2bfb7bb..0bd9f38014984 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/errors/QueryParsingErrors.scala
@@ -27,8 +27,8 @@ import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.trees.Origin
 
 /**
- * Object for grouping all error messages of the query parsing.
- * Currently it includes all ParseException.
+ * Object for grouping all error messages of the query parsing. Currently it includes all
+ * ParseException.
  */
 private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
 
@@ -37,9 +37,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def parserStackOverflow(parserRuleContext: ParserRuleContext): Throwable = {
-    throw new ParseException(
-      errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
-      ctx = parserRuleContext)
+    throw new ParseException(errorClass = "FAILED_TO_PARSE_TOO_COMPLEX", ctx = parserRuleContext)
   }
 
   def insertOverwriteDirectoryUnsupportedError(): Throwable = {
@@ -62,7 +60,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def mergeStatementWithoutWhenClauseError(ctx: MergeIntoTableContext): Throwable = {
-    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0008", ctx)
+    new ParseException(errorClass = "MERGE_WITHOUT_WHEN", ctx)
   }
 
   def nonLastMatchedClauseOmitConditionError(ctx: MergeIntoTableContext): Throwable = {
@@ -84,10 +82,42 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
+  def clausesWithPipeOperatorsUnsupportedError(
+      ctx: QueryOrganizationContext,
+      clauses: String): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.CLAUSE_WITH_PIPE_OPERATORS",
+      messageParameters = Map("clauses" -> clauses),
+      ctx)
+  }
+
+  def multipleQueryResultClausesWithPipeOperatorsUnsupportedError(
+      ctx: QueryOrganizationContext,
+      clause1: String,
+      clause2: String): Throwable = {
+    new ParseException(
+      errorClass = "MULTIPLE_QUERY_RESULT_CLAUSES_WITH_PIPE_OPERATORS",
+      messageParameters = Map("clause1" -> clause1, "clause2" -> clause2),
+      ctx)
+  }
+
   def combinationQueryResultClausesUnsupportedError(ctx: QueryOrganizationContext): Throwable = {
     new ParseException(errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES", ctx)
   }
 
+  def pipeOperatorAggregateUnsupportedCaseError(
+      caseArgument: String,
+      ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+      messageParameters = Map("case" -> caseArgument),
+      ctx)
+  }
+
+  def windowClauseInPipeOperatorWhereClauseNotAllowedError(ctx: ParserRuleContext): Throwable = {
+    new ParseException(errorClass = "NOT_ALLOWED_IN_PIPE_OPERATOR_WHERE.WINDOW_CLAUSE", ctx)
+  }
+
   def distributeByUnsupportedError(ctx: QueryOrganizationContext): Throwable = {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0012", ctx)
   }
@@ -160,7 +190,9 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def incompatibleJoinTypesError(
-      joinType1: String, joinType2: String, ctx: ParserRuleContext): Throwable = {
+      joinType1: String,
+      joinType2: String,
+      ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INCOMPATIBLE_JOIN_TYPES",
       messageParameters = Map(
@@ -209,13 +241,12 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def cannotParseValueTypeError(
-      valueType: String, value: String, ctx: TypeConstructorContext): Throwable = {
+      valueType: String,
+      value: String,
+      ctx: TypeConstructorContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_TYPED_LITERAL",
-      messageParameters = Map(
-        "valueType" -> toSQLType(valueType),
-        "value" -> toSQLValue(value)
-      ),
+      messageParameters = Map("valueType" -> toSQLType(valueType), "value" -> toSQLValue(value)),
       ctx)
   }
 
@@ -231,8 +262,12 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
-  def invalidNumericLiteralRangeError(rawStrippedQualifier: String, minValue: BigDecimal,
-      maxValue: BigDecimal, typeName: String, ctx: NumberContext): Throwable = {
+  def invalidNumericLiteralRangeError(
+      rawStrippedQualifier: String,
+      minValue: BigDecimal,
+      maxValue: BigDecimal,
+      typeName: String,
+      ctx: NumberContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
       messageParameters = Map(
@@ -259,10 +294,19 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def fromToIntervalUnsupportedError(
-      from: String, to: String, ctx: ParserRuleContext): Throwable = {
+      from: String,
+      to: String,
+      ctx: ParserRuleContext): Throwable = {
+    val intervalInput = ctx.getText()
+    val pattern = "'([^']*)'".r
+    val input = pattern.findFirstMatchIn(intervalInput) match {
+      case Some(m) => m.group(1)
+      case None => ""
+    }
+
     new ParseException(
-      errorClass = "_LEGACY_ERROR_TEMP_0028",
-      messageParameters = Map("from" -> from, "to" -> to),
+      errorClass = "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+      messageParameters = Map("input" -> input, "from" -> from, "to" -> to),
       ctx)
   }
 
@@ -288,7 +332,8 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def nestedTypeMissingElementTypeError(
-      dataType: String, ctx: PrimitiveDataTypeContext): Throwable = {
+      dataType: String,
+      ctx: PrimitiveDataTypeContext): Throwable = {
     dataType.toUpperCase(Locale.ROOT) match {
       case "ARRAY" =>
         new ParseException(
@@ -309,23 +354,25 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def partitionTransformNotExpectedError(
-      name: String, expr: String, ctx: ApplyTransformContext): Throwable = {
+      name: String,
+      expr: String,
+      ctx: ApplyTransformContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.INVALID_COLUMN_REFERENCE",
-      messageParameters = Map(
-        "transform" -> toSQLId(name),
-        "expr" -> expr),
+      messageParameters = Map("transform" -> toSQLId(name), "expr" -> expr),
       ctx)
   }
 
   def wrongNumberArgumentsForTransformError(
-      name: String, actualNum: Int, ctx: ApplyTransformContext): Throwable = {
+      name: String,
+      actualNum: Int,
+      ctx: ApplyTransformContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.TRANSFORM_WRONG_NUM_ARGS",
       messageParameters = Map(
         "transform" -> toSQLId(name),
-      "expectedNum" -> "1",
-      "actualNum" -> actualNum.toString),
+        "expectedNum" -> "1",
+        "actualNum" -> actualNum.toString),
       ctx)
   }
 
@@ -337,7 +384,9 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def cannotCleanReservedNamespacePropertyError(
-      property: String, ctx: ParserRuleContext, msg: String): Throwable = {
+      property: String,
+      ctx: ParserRuleContext,
+      msg: String): Throwable = {
     new ParseException(
       errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
       messageParameters = Map("property" -> property, "msg" -> msg),
@@ -348,12 +397,13 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
     new ParseException(
       errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
       messageParameters = Map.empty,
-      ctx
-    )
+      ctx)
   }
 
   def cannotCleanReservedTablePropertyError(
-      property: String, ctx: ParserRuleContext, msg: String): Throwable = {
+      property: String,
+      ctx: ParserRuleContext,
+      msg: String): Throwable = {
     new ParseException(
       errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
       messageParameters = Map("property" -> property, "msg" -> msg),
@@ -361,12 +411,12 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def duplicatedTablePathsFoundError(
-      pathOne: String, pathTwo: String, ctx: ParserRuleContext): Throwable = {
+      pathOne: String,
+      pathTwo: String,
+      ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "_LEGACY_ERROR_TEMP_0032",
-      messageParameters = Map(
-        "pathOne" -> pathOne,
-        "pathTwo" -> pathTwo),
+      messageParameters = Map("pathOne" -> pathOne, "pathTwo" -> pathTwo),
       ctx)
   }
 
@@ -374,15 +424,17 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0033", ctx)
   }
 
-  def operationInHiveStyleCommandUnsupportedError(operation: String,
-      command: String, ctx: StatementContext, msgOpt: Option[String] = None): Throwable = {
+  def operationInHiveStyleCommandUnsupportedError(
+      operation: String,
+      command: String,
+      ctx: StatementContext,
+      msgOpt: Option[String] = None): Throwable = {
     new ParseException(
       errorClass = "_LEGACY_ERROR_TEMP_0034",
       messageParameters = Map(
         "operation" -> operation,
         "command" -> command,
-        "msg" -> msgOpt.map(m => s", $m").getOrElse("")
-      ),
+        "msg" -> msgOpt.map(m => s", $m").getOrElse("")),
       ctx)
   }
 
@@ -415,7 +467,8 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def addCatalogInCacheTableAsSelectNotAllowedError(
-      quoted: String, ctx: CacheTableContext): Throwable = {
+      quoted: String,
+      ctx: CacheTableContext): Throwable = {
     new ParseException(
       errorClass = "_LEGACY_ERROR_TEMP_0037",
       messageParameters = Map("quoted" -> quoted),
@@ -438,7 +491,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
 
   def duplicateCteDefinitionNamesError(duplicateNames: String, ctx: CtesContext): Throwable = {
     new ParseException(
-      errorClass = "_LEGACY_ERROR_TEMP_0038",
+      errorClass = "DUPLICATED_CTE_NAMES",
       messageParameters = Map("duplicateNames" -> duplicateNames),
       ctx)
   }
@@ -448,8 +501,8 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       command = Option(sqlText),
       start = position,
       stop = position,
-      errorClass = "_LEGACY_ERROR_TEMP_0039",
-      messageParameters = Map.empty)
+      errorClass = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+      messageParameters = Map("sqlText" -> sqlText))
   }
 
   def invalidIdentifierError(ident: String, ctx: ParserRuleContext): Throwable = {
@@ -479,31 +532,34 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def invalidPropertyKeyForSetQuotedConfigurationError(
-      keyCandidate: String, valueStr: String, ctx: ParserRuleContext): Throwable = {
+      keyCandidate: String,
+      valueStr: String,
+      ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_PROPERTY_KEY",
-      messageParameters = Map(
-        "key" -> toSQLConf(keyCandidate),
-        "value" -> toSQLConf(valueStr)),
+      messageParameters = Map("key" -> toSQLConf(keyCandidate), "value" -> toSQLConf(valueStr)),
       ctx)
   }
 
   def invalidPropertyValueForSetQuotedConfigurationError(
-      valueCandidate: String, keyStr: String, ctx: ParserRuleContext): Throwable = {
+      valueCandidate: String,
+      keyStr: String,
+      ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_PROPERTY_VALUE",
-      messageParameters = Map(
-        "value" -> toSQLConf(valueCandidate),
-        "key" -> toSQLConf(keyStr)),
+      messageParameters = Map("value" -> toSQLConf(valueCandidate), "key" -> toSQLConf(keyStr)),
       ctx)
   }
 
   def unexpectedFormatForResetConfigurationError(ctx: ResetConfigurationContext): Throwable = {
-    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0043", ctx)
+    new ParseException(errorClass = "INVALID_RESET_COMMAND_FORMAT", ctx)
   }
 
-  def intervalValueOutOfRangeError(ctx: IntervalContext): Throwable = {
-    new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0044", ctx)
+  def intervalValueOutOfRangeError(input: String, ctx: IntervalContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+      messageParameters = Map("input" -> input),
+      ctx)
   }
 
   def invalidTimeZoneDisplacementValueError(ctx: SetTimeZoneContext): Throwable = {
@@ -542,12 +598,32 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       ctx)
   }
 
+  def identityColumnUnsupportedDataType(
+      ctx: IdentityColumnContext,
+      dataType: String): Throwable = {
+    new ParseException("IDENTITY_COLUMNS_UNSUPPORTED_DATA_TYPE", Map("dataType" -> dataType), ctx)
+  }
+
+  def identityColumnIllegalStep(ctx: IdentityColSpecContext): Throwable = {
+    new ParseException("IDENTITY_COLUMNS_ILLEGAL_STEP", Map.empty, ctx)
+  }
+
+  def identityColumnDuplicatedSequenceGeneratorOption(
+      ctx: IdentityColSpecContext,
+      sequenceGeneratorOption: String): Throwable = {
+    new ParseException(
+      "IDENTITY_COLUMNS_DUPLICATED_SEQUENCE_GENERATOR_OPTION",
+      Map("sequenceGeneratorOption" -> sequenceGeneratorOption),
+      ctx)
+  }
+
   def createViewWithBothIfNotExistsAndReplaceError(ctx: CreateViewContext): Throwable = {
     new ParseException(errorClass = "_LEGACY_ERROR_TEMP_0052", ctx)
   }
 
   def temporaryViewWithSchemaBindingMode(ctx: StatementContext): Throwable = {
-    new ParseException(errorClass = "UNSUPPORTED_FEATURE.TEMPORARY_VIEW_WITH_SCHEMA_BINDING_MODE",
+    new ParseException(
+      errorClass = "UNSUPPORTED_FEATURE.TEMPORARY_VIEW_WITH_SCHEMA_BINDING_MODE",
       messageParameters = Map.empty,
       ctx)
   }
@@ -581,17 +657,14 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   }
 
   def defineTempFuncWithIfNotExistsError(ctx: ParserRuleContext): Throwable = {
-    new ParseException(
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS",
-      ctx)
+    new ParseException(errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS", ctx)
   }
 
   def unsupportedFunctionNameError(funcName: Seq[String], ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
-      messageParameters = Map(
-        "statement" -> toSQLStmt("CREATE TEMPORARY FUNCTION"),
-        "funcName" -> toSQLId(funcName)),
+      messageParameters =
+        Map("statement" -> toSQLStmt("CREATE TEMPORARY FUNCTION"), "name" -> toSQLId(funcName)),
       ctx)
   }
 
@@ -632,9 +705,15 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
   def invalidNameForDropTempFunc(name: Seq[String], ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
-      messageParameters = Map(
-        "statement" -> toSQLStmt("DROP TEMPORARY FUNCTION"),
-        "funcName" -> toSQLId(name)),
+      messageParameters =
+        Map("statement" -> toSQLStmt("DROP TEMPORARY FUNCTION"), "name" -> toSQLId(name)),
+      ctx)
+  }
+
+  def invalidNameForSetCatalog(name: Seq[String], ctx: ParserRuleContext): Throwable = {
+    new ParseException(
+      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      messageParameters = Map("statement" -> toSQLStmt("SET CATALOG"), "name" -> toSQLId(name)),
       ctx)
   }
 
@@ -650,9 +729,7 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
     new ParseException(errorClass = "REF_DEFAULT_VALUE_IS_NOT_ALLOWED_IN_PARTITION", ctx)
   }
 
-  def duplicateArgumentNamesError(
-      arguments: Seq[String],
-      ctx: ParserRuleContext): Throwable = {
+  def duplicateArgumentNamesError(arguments: Seq[String], ctx: ParserRuleContext): Throwable = {
     new ParseException(
       errorClass = "EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES",
       messageParameters = Map("aliases" -> arguments.map(toSQLId).mkString(", ")),
@@ -679,12 +756,9 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
       }
     new ParseException(
       errorClass = errorClass,
-      messageParameters = alterTypeMap ++ Map(
-          "columnName" -> columnName,
-          "optionName" -> optionName
-        ),
-      ctx
-    )
+      messageParameters =
+        alterTypeMap ++ Map("columnName" -> columnName, "optionName" -> optionName),
+      ctx)
   }
 
   def invalidDatetimeUnitError(
@@ -697,19 +771,17 @@ private[sql] object QueryParsingErrors extends DataTypeErrorsBase {
         "functionName" -> toSQLId(functionName),
         "parameter" -> toSQLId("unit"),
         "invalidValue" -> invalidValue),
-      ctx
-    )
+      ctx)
   }
 
   def invalidTableFunctionIdentifierArgumentMissingParentheses(
-      ctx: ParserRuleContext, argumentName: String): Throwable = {
+      ctx: ParserRuleContext,
+      argumentName: String): Throwable = {
     new ParseException(
       errorClass =
         "INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES",
-      messageParameters = Map(
-        "argumentName" -> toSQLId(argumentName)),
-      ctx
-    )
+      messageParameters = Map("argumentName" -> toSQLId(argumentName)),
+      ctx)
   }
 
   def clusterByWithPartitionedBy(ctx: ParserRuleContext): Throwable = {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala b/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
index 146012b4266dd..7d8b33aa5e228 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/execution/streaming/Triggers.scala
@@ -88,8 +88,8 @@ object ProcessingTimeTrigger {
 }
 
 /**
- * A [[Trigger]] that continuously processes streaming data, asynchronously checkpointing at
- * the specified interval.
+ * A [[Trigger]] that continuously processes streaming data, asynchronously checkpointing at the
+ * specified interval.
  */
 case class ContinuousTrigger(intervalMs: Long) extends Trigger {
   Triggers.validate(intervalMs)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
similarity index 80%
rename from sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
index 88550fac7303f..5bdaebe3b073a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/Aggregator.scala
@@ -18,8 +18,7 @@
 package org.apache.spark.sql.expressions
 
 import org.apache.spark.sql.{Encoder, TypedColumn}
-import org.apache.spark.sql.catalyst.encoders.encoderFor
-import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
+import org.apache.spark.sql.internal.{InvokeInlineUserDefinedFunction, UserDefinedFunctionLike}
 
 /**
  * A base class for user-defined aggregations, which can be used in `Dataset` operations to take
@@ -44,13 +43,16 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
  *
  * Based loosely on Aggregator from Algebird: https://github.com/twitter/algebird
  *
- * @tparam IN The input type for the aggregation.
- * @tparam BUF The type of the intermediate value of the reduction.
- * @tparam OUT The type of the final output result.
+ * @tparam IN
+ *   The input type for the aggregation.
+ * @tparam BUF
+ *   The type of the intermediate value of the reduction.
+ * @tparam OUT
+ *   The type of the final output result.
  * @since 1.6.0
  */
 @SerialVersionUID(2093413866369130093L)
-abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
+abstract class Aggregator[-IN, BUF, OUT] extends Serializable with UserDefinedFunctionLike {
 
   /**
    * A zero value for this aggregation. Should satisfy the property that any b + zero = b.
@@ -59,7 +61,7 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
   def zero: BUF
 
   /**
-   * Combine two values to produce a new value.  For performance, the function may modify `b` and
+   * Combine two values to produce a new value. For performance, the function may modify `b` and
    * return it instead of constructing new object for b.
    * @since 1.6.0
    */
@@ -94,11 +96,6 @@ abstract class Aggregator[-IN, BUF, OUT] extends Serializable {
    * @since 1.6.0
    */
   def toColumn: TypedColumn[IN, OUT] = {
-    implicit val bEncoder = bufferEncoder
-    implicit val cEncoder = outputEncoder
-
-    val expr = TypedAggregateExpression(this).toAggregateExpression()
-
-    new TypedColumn[IN, OUT](expr, encoderFor[OUT])
+    new TypedColumn[IN, OUT](InvokeInlineUserDefinedFunction(this, Nil), outputEncoder)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala
similarity index 88%
rename from sql/core/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala
index fd3df372a2d56..9d98d1a98b00d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/ReduceAggregator.scala
@@ -18,18 +18,17 @@
 package org.apache.spark.sql.expressions
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.Encoder
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.{Encoder, Encoders}
 
 /**
  * An aggregator that uses a single associative and commutative reduce function. This reduce
- * function can be used to go through all input values and reduces them to a single value.
- * If there is no input, a null value is returned.
+ * function can be used to go through all input values and reduces them to a single value. If
+ * there is no input, a null value is returned.
  *
  * This class currently assumes there is at least one input row.
  */
 private[sql] class ReduceAggregator[T: Encoder](func: (T, T) => T)
-  extends Aggregator[T, (Boolean, T), T] {
+    extends Aggregator[T, (Boolean, T), T] {
 
   @transient private val encoder = implicitly[Encoder[T]]
 
@@ -47,9 +46,7 @@ private[sql] class ReduceAggregator[T: Encoder](func: (T, T) => T)
   override def zero: (Boolean, T) = (false, _zero.asInstanceOf[T])
 
   override def bufferEncoder: Encoder[(Boolean, T)] =
-    ExpressionEncoder.tuple(
-      ExpressionEncoder[Boolean](),
-      encoder.asInstanceOf[ExpressionEncoder[T]])
+    Encoders.tuple(Encoders.scalaBoolean, encoder)
 
   override def outputEncoder: Encoder[T] = encoder
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
similarity index 60%
rename from sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
index a75384fb0f4e0..6a22cbfaf351e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/UserDefinedFunction.scala
@@ -17,11 +17,14 @@
 
 package org.apache.spark.sql.expressions
 
+import scala.reflect.runtime.universe.TypeTag
+import scala.util.Try
+
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Encoder}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.execution.aggregate.ScalaAggregator
+import org.apache.spark.sql.catalyst.ScalaReflection
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.internal.{InvokeInlineUserDefinedFunction, UserDefinedFunctionLike}
 import org.apache.spark.sql.types.DataType
 
 /**
@@ -39,7 +42,7 @@ import org.apache.spark.sql.types.DataType
  * @since 1.3.0
  */
 @Stable
-sealed abstract class UserDefinedFunction {
+sealed abstract class UserDefinedFunction extends UserDefinedFunctionLike {
 
   /**
    * Returns true when the UDF can return a nullable value.
@@ -49,8 +52,8 @@ sealed abstract class UserDefinedFunction {
   def nullable: Boolean
 
   /**
-   * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the same
-   * input.
+   * Returns true iff the UDF is deterministic, i.e. the UDF produces the same output given the
+   * same input.
    *
    * @since 2.3.0
    */
@@ -62,7 +65,9 @@ sealed abstract class UserDefinedFunction {
    * @since 1.3.0
    */
   @scala.annotation.varargs
-  def apply(exprs: Column*): Column
+  def apply(exprs: Column*): Column = {
+    Column(InvokeInlineUserDefinedFunction(this, exprs.map(_.node)))
+  }
 
   /**
    * Updates UserDefinedFunction with a given name.
@@ -89,31 +94,15 @@ sealed abstract class UserDefinedFunction {
 private[spark] case class SparkUserDefinedFunction(
     f: AnyRef,
     dataType: DataType,
-    inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Nil,
-    outputEncoder: Option[ExpressionEncoder[_]] = None,
-    name: Option[String] = None,
+    inputEncoders: Seq[Option[Encoder[_]]] = Nil,
+    outputEncoder: Option[Encoder[_]] = None,
+    givenName: Option[String] = None,
     nullable: Boolean = true,
-    deterministic: Boolean = true) extends UserDefinedFunction {
-
-  @scala.annotation.varargs
-  override def apply(exprs: Column*): Column = {
-    Column(createScalaUDF(exprs.map(_.expr)))
-  }
-
-  private[sql] def createScalaUDF(exprs: Seq[Expression]): ScalaUDF = {
-    ScalaUDF(
-      f,
-      dataType,
-      exprs,
-      inputEncoders,
-      outputEncoder,
-      udfName = name,
-      nullable = nullable,
-      udfDeterministic = deterministic)
-  }
+    deterministic: Boolean = true)
+    extends UserDefinedFunction {
 
   override def withName(name: String): SparkUserDefinedFunction = {
-    copy(name = Option(name))
+    copy(givenName = Option(name))
   }
 
   override def asNonNullable(): SparkUserDefinedFunction = {
@@ -131,30 +120,61 @@ private[spark] case class SparkUserDefinedFunction(
       copy(deterministic = false)
     }
   }
+
+  override def name: String = givenName.getOrElse("UDF")
 }
 
-private[sql] case class UserDefinedAggregator[IN, BUF, OUT](
-    aggregator: Aggregator[IN, BUF, OUT],
-    inputEncoder: Encoder[IN],
-    name: Option[String] = None,
-    nullable: Boolean = true,
-    deterministic: Boolean = true) extends UserDefinedFunction {
+object SparkUserDefinedFunction {
+  private[sql] def apply(
+      function: AnyRef,
+      returnTypeTag: TypeTag[_],
+      inputTypeTags: TypeTag[_]*): SparkUserDefinedFunction = {
+    val outputEncoder = ScalaReflection.encoderFor(returnTypeTag)
+    val inputEncoders = inputTypeTags.map { tag =>
+      Try(ScalaReflection.encoderFor(tag)).toOption
+    }
+    SparkUserDefinedFunction(
+      f = function,
+      inputEncoders = inputEncoders,
+      dataType = outputEncoder.dataType,
+      outputEncoder = Option(outputEncoder),
+      nullable = outputEncoder.nullable)
+  }
 
-  @scala.annotation.varargs
-  def apply(exprs: Column*): Column = {
-    Column(scalaAggregator(exprs.map(_.expr)).toAggregateExpression())
+  private[sql] def apply(
+      function: AnyRef,
+      inputEncoders: Seq[AgnosticEncoder[_]],
+      outputEncoder: AgnosticEncoder[_]): SparkUserDefinedFunction = {
+    SparkUserDefinedFunction(
+      f = function,
+      inputEncoders = inputEncoders.map(Option.apply),
+      outputEncoder = Option(outputEncoder),
+      dataType = outputEncoder.dataType,
+      nullable = outputEncoder.nullable)
   }
 
-  // This is also used by udf.register(...) when it detects a UserDefinedAggregator
-  def scalaAggregator(exprs: Seq[Expression]): ScalaAggregator[IN, BUF, OUT] = {
-    val iEncoder = inputEncoder.asInstanceOf[ExpressionEncoder[IN]]
-    val bEncoder = aggregator.bufferEncoder.asInstanceOf[ExpressionEncoder[BUF]]
-    ScalaAggregator(
-      exprs, aggregator, iEncoder, bEncoder, nullable, deterministic, aggregatorName = name)
+  private[sql] def apply(
+      function: AnyRef,
+      returnType: DataType,
+      cardinality: Int): SparkUserDefinedFunction = {
+    SparkUserDefinedFunction(
+      function,
+      returnType,
+      inputEncoders = Seq.fill(cardinality)(None),
+      None)
   }
+}
+
+private[sql] case class UserDefinedAggregator[IN, BUF, OUT](
+    aggregator: Aggregator[IN, BUF, OUT],
+    inputEncoder: Encoder[IN],
+    givenName: Option[String] = None,
+    nullable: Boolean = true,
+    deterministic: Boolean = true)
+    extends UserDefinedFunction {
 
   override def withName(name: String): UserDefinedAggregator[IN, BUF, OUT] = {
-    copy(name = Option(name))
+    copy(givenName = Option(name))
   }
 
   override def asNonNullable(): UserDefinedAggregator[IN, BUF, OUT] = {
@@ -172,4 +192,6 @@ private[sql] case class UserDefinedAggregator[IN, BUF, OUT](
       copy(deterministic = false)
     }
   }
+
+  override def name: String = givenName.getOrElse(aggregator.name)
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/Window.scala
similarity index 97%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/Window.scala
index c85e7bc9c5c0a..dbe2da8f97341 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/Window.scala
@@ -37,14 +37,14 @@ import org.apache.spark.sql.Column
  *   unboundedFollowing) is used by default. When ordering is defined, a growing window frame
  *   (rangeFrame, unboundedPreceding, currentRow) is used by default.
  *
- * @since 3.4.0
+ * @since 1.4.0
  */
 @Stable
 object Window {
 
   /**
    * Creates a [[WindowSpec]] with the partitioning defined.
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def partitionBy(colName: String, colNames: String*): WindowSpec = {
@@ -53,7 +53,7 @@ object Window {
 
   /**
    * Creates a [[WindowSpec]] with the partitioning defined.
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def partitionBy(cols: Column*): WindowSpec = {
@@ -62,7 +62,7 @@ object Window {
 
   /**
    * Creates a [[WindowSpec]] with the ordering defined.
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def orderBy(colName: String, colNames: String*): WindowSpec = {
@@ -86,7 +86,7 @@ object Window {
    *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
    * }}}
    *
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def unboundedPreceding: Long = Long.MinValue
 
@@ -98,7 +98,7 @@ object Window {
    *   Window.rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
    * }}}
    *
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def unboundedFollowing: Long = Long.MaxValue
 
@@ -109,7 +109,7 @@ object Window {
    *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
    * }}}
    *
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def currentRow: Long = 0
 
@@ -157,7 +157,7 @@ object Window {
    * @param end
    *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
    *   (`Window.unboundedFollowing`).
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
   def rowsBetween(start: Long, end: Long): WindowSpec = {
@@ -210,7 +210,7 @@ object Window {
    * @param end
    *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
    *   (`Window.unboundedFollowing`).
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
   def rangeBetween(start: Long, end: Long): WindowSpec = {
@@ -235,7 +235,7 @@ object Window {
  *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
  * }}}
  *
- * @since 3.4.0
+ * @since 1.4.0
  */
 @Stable
 class Window private () // So we can see Window in JavaDoc.
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
similarity index 70%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
index 681eec17cf22e..9abdee9c79ebc 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
@@ -17,28 +17,27 @@
 
 package org.apache.spark.sql.expressions
 
-import scala.jdk.CollectionConverters._
-
 import org.apache.spark.annotation.Stable
-import org.apache.spark.connect.proto
 import org.apache.spark.sql.Column
+import org.apache.spark.sql.errors.CompilationErrors
+import org.apache.spark.sql.internal.{ColumnNode, SortOrder, Window => EvalWindow, WindowFrame, WindowSpec => InternalWindowSpec}
 
 /**
  * A window specification that defines the partitioning, ordering, and frame boundaries.
  *
  * Use the static methods in [[Window]] to create a [[WindowSpec]].
  *
- * @since 3.4.0
+ * @since 1.4.0
  */
 @Stable
 class WindowSpec private[sql] (
-    partitionSpec: Seq[proto.Expression],
-    orderSpec: Seq[proto.Expression.SortOrder],
-    frame: Option[proto.Expression.Window.WindowFrame]) {
+    partitionSpec: Seq[ColumnNode],
+    orderSpec: Seq[SortOrder],
+    frame: Option[WindowFrame]) {
 
   /**
    * Defines the partitioning columns in a [[WindowSpec]].
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def partitionBy(colName: String, colNames: String*): WindowSpec = {
@@ -47,16 +46,16 @@ class WindowSpec private[sql] (
 
   /**
    * Defines the partitioning columns in a [[WindowSpec]].
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def partitionBy(cols: Column*): WindowSpec = {
-    new WindowSpec(cols.map(_.expr), orderSpec, frame)
+    new WindowSpec(cols.map(_.node), orderSpec, frame)
   }
 
   /**
    * Defines the ordering columns in a [[WindowSpec]].
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def orderBy(colName: String, colNames: String*): WindowSpec = {
@@ -65,12 +64,11 @@ class WindowSpec private[sql] (
 
   /**
    * Defines the ordering columns in a [[WindowSpec]].
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def orderBy(cols: Column*): WindowSpec = {
-    val sortOrder: Seq[proto.Expression.SortOrder] = cols.map(_.sortOrder)
-    new WindowSpec(partitionSpec, sortOrder, frame)
+    new WindowSpec(partitionSpec, cols.map(_.sortOrder), frame)
   }
 
   /**
@@ -116,19 +114,25 @@ class WindowSpec private[sql] (
    * @param end
    *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
    *   (`Window.unboundedFollowing`).
-   * @since 3.4.0
+   * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update Window.rowsBetween.
   def rowsBetween(start: Long, end: Long): WindowSpec = {
-    new WindowSpec(
-      partitionSpec,
-      orderSpec,
-      Some(
-        toWindowFrame(
-          proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_ROW,
-          start,
-          end,
-          true)))
+    val boundaryStart = start match {
+      case 0 => WindowFrame.CurrentRow
+      case Long.MinValue => WindowFrame.UnboundedPreceding
+      case x if Int.MinValue <= x && x <= Int.MaxValue => WindowFrame.value(x.toInt)
+      case x => throw CompilationErrors.invalidBoundaryStartError(x)
+    }
+
+    val boundaryEnd = end match {
+      case 0 => WindowFrame.CurrentRow
+      case Long.MaxValue => WindowFrame.UnboundedFollowing
+      case x if Int.MinValue <= x && x <= Int.MaxValue => WindowFrame.value(x.toInt)
+      case x => throw CompilationErrors.invalidBoundaryEndError(x)
+    }
+
+    withFrame(WindowFrame.Row, boundaryStart, boundaryEnd)
   }
 
   /**
@@ -176,65 +180,37 @@ class WindowSpec private[sql] (
    * @param end
    *   boundary end, inclusive. The frame is unbounded if this is the maximum long value
    *   (`Window.unboundedFollowing`).
-   * @since 3.4.0
+   * @since 1.4.0
    */
   // Note: when updating the doc for this method, also update Window.rangeBetween.
   def rangeBetween(start: Long, end: Long): WindowSpec = {
-    new WindowSpec(
-      partitionSpec,
-      orderSpec,
-      Some(
-        toWindowFrame(
-          proto.Expression.Window.WindowFrame.FrameType.FRAME_TYPE_RANGE,
-          start,
-          end,
-          false)))
+    val boundaryStart = start match {
+      case 0 => WindowFrame.CurrentRow
+      case Long.MinValue => WindowFrame.UnboundedPreceding
+      case x => WindowFrame.value(x)
+    }
+
+    val boundaryEnd = end match {
+      case 0 => WindowFrame.CurrentRow
+      case Long.MaxValue => WindowFrame.UnboundedFollowing
+      case x => WindowFrame.value(x)
+    }
+    withFrame(WindowFrame.Range, boundaryStart, boundaryEnd)
+  }
+
+  private[sql] def withFrame(
+      frameType: WindowFrame.FrameType,
+      lower: WindowFrame.FrameBoundary,
+      uppper: WindowFrame.FrameBoundary): WindowSpec = {
+    val frame = WindowFrame(frameType, lower, uppper)
+    new WindowSpec(partitionSpec, orderSpec, Some(frame))
   }
 
   /**
    * Converts this [[WindowSpec]] into a [[Column]] with an aggregate expression.
    */
   private[sql] def withAggregate(aggregate: Column): Column = {
-    Column { builder =>
-      val windowBuilder = builder.getWindowBuilder
-      windowBuilder.setWindowFunction(aggregate.expr)
-      if (frame.isDefined) {
-        windowBuilder.setFrameSpec(frame.get)
-      }
-      windowBuilder.addAllPartitionSpec(partitionSpec.asJava)
-      windowBuilder.addAllOrderSpec(orderSpec.asJava)
-    }
-  }
-
-  private[sql] def toWindowFrame(
-      frameType: proto.Expression.Window.WindowFrame.FrameType,
-      start: Long,
-      end: Long,
-      isRowBetween: Boolean): proto.Expression.Window.WindowFrame = {
-    val windowFrameBuilder = proto.Expression.Window.WindowFrame.newBuilder()
-    windowFrameBuilder.setFrameType(frameType)
-    start match {
-      case 0 => windowFrameBuilder.getLowerBuilder.setCurrentRow(true)
-      case Long.MinValue => windowFrameBuilder.getLowerBuilder.setUnbounded(true)
-      case x if isRowBetween && Int.MinValue <= x && x <= Int.MaxValue =>
-        windowFrameBuilder.getLowerBuilder.getValueBuilder.getLiteralBuilder
-          .setInteger(start.toInt)
-      case _ if !isRowBetween =>
-        windowFrameBuilder.getLowerBuilder.getValueBuilder.getLiteralBuilder.setLong(start)
-      case _ => throw new UnsupportedOperationException()
-    }
-
-    end match {
-      case 0 => windowFrameBuilder.getUpperBuilder.setCurrentRow(true)
-      case Long.MaxValue => windowFrameBuilder.getUpperBuilder.setUnbounded(true)
-      case x if isRowBetween && Int.MinValue <= x && x <= Int.MaxValue =>
-        windowFrameBuilder.getUpperBuilder.getValueBuilder.getLiteralBuilder
-          .setInteger(end.toInt)
-      case _ if !isRowBetween =>
-        windowFrameBuilder.getUpperBuilder.getValueBuilder.getLiteralBuilder.setLong(end)
-      case _ => throw new UnsupportedOperationException()
-    }
-
-    windowFrameBuilder.build()
+    val spec = InternalWindowSpec(partitionSpec, sortColumns = orderSpec, frame = frame)
+    Column(EvalWindow(aggregate.node, spec))
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
similarity index 89%
rename from sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
index 6277f8b459248..9ea3ab8cd4e1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/scalalang/typed.scala
@@ -17,8 +17,8 @@
 
 package org.apache.spark.sql.expressions.scalalang
 
-import org.apache.spark.sql._
-import org.apache.spark.sql.execution.aggregate._
+import org.apache.spark.sql.TypedColumn
+import org.apache.spark.sql.internal.{TypedAverage, TypedCount, TypedSumDouble, TypedSumLong}
 
 /**
  * Type-safe functions available for `Dataset` operations in Scala.
@@ -39,11 +39,6 @@ object typed {
   // For example, avg in the Scala version returns Scala primitive Double, whose bytecode
   // signature is just a java.lang.Object; avg in the Java version returns java.lang.Double.
 
-  // TODO: This is pretty hacky. Maybe we should have an object for implicit encoders.
-  private val implicits = new SQLImplicits {
-    override protected def session: SparkSession = null
-  }
-
   /**
    * Average aggregate function.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala b/sql/api/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
similarity index 71%
rename from sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
index b387695ef2379..5e7c993fae414 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/expressions/udaf.scala
@@ -19,25 +19,28 @@ package org.apache.spark.sql.expressions
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.{Column, Row}
-import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.internal.{InvokeInlineUserDefinedFunction, UserDefinedFunctionLike}
 import org.apache.spark.sql.types._
 
 /**
  * The base class for implementing user-defined aggregate functions (UDAF).
  *
  * @since 1.5.0
- * @deprecated UserDefinedAggregateFunction is deprecated.
- * Aggregator[IN, BUF, OUT] should now be registered as a UDF via the functions.udaf(agg) method.
+ * @deprecated
+ *   UserDefinedAggregateFunction is deprecated. Aggregator[IN, BUF, OUT] should now be registered
+ *   as a UDF via the functions.udaf(agg) method.
  */
 @Stable
-@deprecated("Aggregator[IN, BUF, OUT] should now be registered as a UDF" +
-  " via the functions.udaf(agg) method.", "3.0.0")
-abstract class UserDefinedAggregateFunction extends Serializable {
+@deprecated(
+  "Aggregator[IN, BUF, OUT] should now be registered as a UDF" +
+    " via the functions.udaf(agg) method.",
+  "3.0.0")
+abstract class UserDefinedAggregateFunction extends Serializable with UserDefinedFunctionLike {
 
   /**
-   * A `StructType` represents data types of input arguments of this aggregate function.
-   * For example, if a [[UserDefinedAggregateFunction]] expects two input arguments
-   * with type of `DoubleType` and `LongType`, the returned `StructType` will look like
+   * A `StructType` represents data types of input arguments of this aggregate function. For
+   * example, if a [[UserDefinedAggregateFunction]] expects two input arguments with type of
+   * `DoubleType` and `LongType`, the returned `StructType` will look like
    *
    * ```
    *   new StructType()
@@ -45,18 +48,17 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    *    .add("longInput", LongType)
    * ```
    *
-   * The name of a field of this `StructType` is only used to identify the corresponding
-   * input argument. Users can choose names to identify the input arguments.
+   * The name of a field of this `StructType` is only used to identify the corresponding input
+   * argument. Users can choose names to identify the input arguments.
    *
    * @since 1.5.0
    */
   def inputSchema: StructType
 
   /**
-   * A `StructType` represents data types of values in the aggregation buffer.
-   * For example, if a [[UserDefinedAggregateFunction]]'s buffer has two values
-   * (i.e. two intermediate values) with type of `DoubleType` and `LongType`,
-   * the returned `StructType` will look like
+   * A `StructType` represents data types of values in the aggregation buffer. For example, if a
+   * [[UserDefinedAggregateFunction]]'s buffer has two values (i.e. two intermediate values) with
+   * type of `DoubleType` and `LongType`, the returned `StructType` will look like
    *
    * ```
    *   new StructType()
@@ -64,8 +66,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    *    .add("longInput", LongType)
    * ```
    *
-   * The name of a field of this `StructType` is only used to identify the corresponding
-   * buffer value. Users can choose names to identify the input arguments.
+   * The name of a field of this `StructType` is only used to identify the corresponding buffer
+   * value. Users can choose names to identify the input arguments.
    *
    * @since 1.5.0
    */
@@ -79,8 +81,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
   def dataType: DataType
 
   /**
-   * Returns true iff this function is deterministic, i.e. given the same input,
-   * always return the same output.
+   * Returns true iff this function is deterministic, i.e. given the same input, always return the
+   * same output.
    *
    * @since 1.5.0
    */
@@ -90,8 +92,8 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    * Initializes the given aggregation buffer, i.e. the zero value of the aggregation buffer.
    *
    * The contract should be that applying the merge function on two initial buffers should just
-   * return the initial buffer itself, i.e.
-   * `merge(initialBuffer, initialBuffer)` should equal `initialBuffer`.
+   * return the initial buffer itself, i.e. `merge(initialBuffer, initialBuffer)` should equal
+   * `initialBuffer`.
    *
    * @since 1.5.0
    */
@@ -130,21 +132,18 @@ abstract class UserDefinedAggregateFunction extends Serializable {
    */
   @scala.annotation.varargs
   def apply(exprs: Column*): Column = {
-    val aggregateExpression = ScalaUDAF(exprs.map(_.expr), this).toAggregateExpression()
-    Column(aggregateExpression)
+    Column(InvokeInlineUserDefinedFunction(this, exprs.map(_.node)))
   }
 
   /**
-   * Creates a `Column` for this UDAF using the distinct values of the given
-   * `Column`s as input arguments.
+   * Creates a `Column` for this UDAF using the distinct values of the given `Column`s as input
+   * arguments.
    *
    * @since 1.5.0
    */
   @scala.annotation.varargs
   def distinct(exprs: Column*): Column = {
-    val aggregateExpression =
-      ScalaUDAF(exprs.map(_.expr), this).toAggregateExpression(isDistinct = true)
-    Column(aggregateExpression)
+    Column(InvokeInlineUserDefinedFunction(this, exprs.map(_.node), isDistinct = true))
   }
 }
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
similarity index 86%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/functions.scala
index c0bf9c9d013ca..2a04212ee2585 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/functions.scala
@@ -14,43 +14,40 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.spark.sql
 
 import java.util.Collections
 
 import scala.jdk.CollectionConverters._
-import scala.reflect.runtime.universe.{typeTag, TypeTag}
+import scala.reflect.runtime.universe.TypeTag
+import scala.util.Try
 
-import org.apache.spark.connect.proto
+import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.api.java._
 import org.apache.spark.sql.catalyst.ScalaReflection
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.PrimitiveLongEncoder
-import org.apache.spark.sql.connect.common.LiteralValueProtoConverter._
-import org.apache.spark.sql.connect.common.UdfUtils
-import org.apache.spark.sql.errors.DataTypeErrors
-import org.apache.spark.sql.expressions.{Aggregator, ScalaUserDefinedFunction, UserDefinedFunction}
-import org.apache.spark.sql.types.{DataType, StructType}
-import org.apache.spark.sql.types.DataType.parseTypeWithFallback
+import org.apache.spark.sql.errors.CompilationErrors
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
+import org.apache.spark.sql.internal.{SqlApiConf, ToScalaUDF}
+import org.apache.spark.sql.types._
 import org.apache.spark.util.SparkClassUtils
 
 /**
  * Commonly used functions available for DataFrame operations. Using functions defined here
  * provides a little bit more compile-time safety to make sure the function exists.
  *
- * Spark also includes more built-in functions that are less common and are not defined here. You
- * can still access them (and all the functions defined here) using the `functions.expr()` API and
- * calling them through a SQL expression string. You can find the entire list of functions at SQL
- * API documentation of your Spark version, see also <a
- * href="https://spark.apache.org/docs/latest/api/sql/index.html">the latest list</a>
+ * You can call the functions defined here by two ways: `_FUNC_(...)` and
+ * `functions.expr("_FUNC_(...)")`.
  *
- * As an example, `isnan` is a function that is defined here. You can use `isnan(col("myCol"))` to
- * invoke the `isnan` function. This way the programming language's compiler ensures `isnan`
- * exists and is of the proper form. You can also use `expr("isnan(myCol)")` function to invoke
- * the same function. In this case, Spark itself will ensure `isnan` exists when it analyzes the
- * query.
+ * As an example, `regr_count` is a function that is defined here. You can use
+ * `regr_count(col("yCol", col("xCol")))` to invoke the `regr_count` function. This way the
+ * programming language's compiler ensures `regr_count` exists and is of the proper form. You can
+ * also use `expr("regr_count(yCol, xCol)")` function to invoke the same function. In this case,
+ * Spark itself will ensure `regr_count` exists when it analyzes the query.
  *
- * `regr_count` is an example of a function that is built-in but not defined here, because it is
- * less commonly used. To invoke it, use `expr("regr_count(yCol, xCol)")`.
+ * You can find the entire list of functions at SQL API documentation of your Spark version, see
+ * also <a href="https://spark.apache.org/docs/latest/api/sql/index.html">the latest list</a>
  *
  * This function APIs usually have methods with `Column` signature only because it can support not
  * only `Column` but also other types such as a native string. The other variants currently exist
@@ -81,9 +78,9 @@ import org.apache.spark.util.SparkClassUtils
  * @groupname url_funcs URL functions
  * @groupname partition_transforms Partition transform functions
  * @groupname Ungrouped Support functions for DataFrames
- *
- * @since 3.4.0
+ * @since 1.3.0
  */
+@Stable
 // scalastyle:off
 object functions {
 // scalastyle:on
@@ -92,7 +89,7 @@ object functions {
    * Returns a [[Column]] based on the given column name.
    *
    * @group normal_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def col(colName: String): Column = Column(colName)
 
@@ -100,13 +97,9 @@ object functions {
    * Returns a [[Column]] based on the given column name. Alias of [[col]].
    *
    * @group normal_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def column(colName: String): Column = col(colName)
-
-  private def createLiteral(literalBuilder: proto.Expression.Literal.Builder): Column = Column {
-    builder => builder.setLiteral(literalBuilder)
-  }
+  def column(colName: String): Column = Column(colName)
 
   /**
    * Creates a [[Column]] of literal value.
@@ -116,13 +109,19 @@ object functions {
    * to represent the literal value.
    *
    * @group normal_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def lit(literal: Any): Column = {
     literal match {
       case c: Column => c
-      case s: Symbol => Column(s.name)
-      case _ => createLiteral(toLiteralProtoBuilder(literal))
+      case s: Symbol => new ColumnName(s.name)
+      case _ =>
+        // This is different from `typedlit`. `typedlit` calls `Literal.create` to use
+        // `ScalaReflection` to get the type of `literal`. However, since we use `Any` in this
+        // method, `typedLit[Any](literal)` will always fail and fallback to `Literal.apply`. Hence,
+        // we can just manually call `Literal.apply` to skip the expensive `ScalaReflection` code.
+        // This is significantly better when there are many threads calling `lit` concurrently.
+        Column(internal.Literal(literal))
     }
   }
 
@@ -132,9 +131,11 @@ object functions {
    * An alias of `typedlit`, and it is encouraged to use `typedlit` directly.
    *
    * @group normal_funcs
-   * @since 3.5.0
+   * @since 2.2.0
    */
-  def typedLit[T: TypeTag](literal: T): Column = typedlit(literal)
+  def typedLit[T: TypeTag](literal: T): Column = {
+    typedlit(literal)
+  }
 
   /**
    * Creates a [[Column]] of literal value.
@@ -149,22 +150,18 @@ object functions {
    *   Scala types are not used.
    *
    * @group normal_funcs
-   * @since 3.5.0
+   * @since 3.2.0
    */
-  def typedlit[T: TypeTag](literal: T): Column = literal match {
-    case c: Column => c
-    case s: Symbol => new Column(s.name)
-    case _ => createLiteral(create(literal))
+  def typedlit[T: TypeTag](literal: T): Column = {
+    literal match {
+      case c: Column => c
+      case s: Symbol => new ColumnName(s.name)
+      case _ =>
+        val dataType = Try(ScalaReflection.schemaFor[T].dataType).toOption
+        Column(internal.Literal(literal, dataType))
+    }
   }
 
-  /**
-   * Creates a struct with the given field names and values.
-   *
-   * @group struct_funcs
-   * @since 3.5.0
-   */
-  def named_struct(cols: Column*): Column = Column.fn("named_struct", cols: _*)
-
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Sort functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -176,7 +173,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def asc(columnName: String): Column = Column(columnName).asc
 
@@ -188,7 +185,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def asc_nulls_first(columnName: String): Column = Column(columnName).asc_nulls_first
 
@@ -200,7 +197,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last
 
@@ -211,7 +208,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def desc(columnName: String): Column = Column(columnName).desc
 
@@ -223,7 +220,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def desc_nulls_first(columnName: String): Column = Column(columnName).desc_nulls_first
 
@@ -235,7 +232,7 @@ object functions {
    * }}}
    *
    * @group sort_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def desc_nulls_last(columnName: String): Column = Column(columnName).desc_nulls_last
 
@@ -245,28 +242,28 @@ object functions {
 
   /**
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use approx_count_distinct", "2.1.0")
   def approxCountDistinct(e: Column): Column = approx_count_distinct(e)
 
   /**
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use approx_count_distinct", "2.1.0")
   def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName)
 
   /**
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use approx_count_distinct", "2.1.0")
   def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd)
 
   /**
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use approx_count_distinct", "2.1.0")
   def approxCountDistinct(columnName: String, rsd: Double): Column = {
@@ -277,7 +274,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def approx_count_distinct(e: Column): Column = Column.fn("approx_count_distinct", e)
 
@@ -285,7 +282,7 @@ object functions {
    * Aggregate function: returns the approximate number of distinct items in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def approx_count_distinct(columnName: String): Column = approx_count_distinct(
     column(columnName))
@@ -297,7 +294,7 @@ object functions {
    *   maximum relative standard deviation allowed (default = 0.05)
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def approx_count_distinct(e: Column, rsd: Double): Column = {
     Column.fn("approx_count_distinct", e, lit(rsd))
@@ -310,7 +307,7 @@ object functions {
    *   maximum relative standard deviation allowed (default = 0.05)
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def approx_count_distinct(columnName: String, rsd: Double): Column = {
     approx_count_distinct(Column(columnName), rsd)
@@ -320,7 +317,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def avg(e: Column): Column = Column.fn("avg", e)
 
@@ -328,7 +325,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def avg(columnName: String): Column = avg(Column(columnName))
 
@@ -340,7 +337,7 @@ object functions {
    *   order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def collect_list(e: Column): Column = Column.fn("collect_list", e)
 
@@ -352,7 +349,7 @@ object functions {
    *   order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def collect_list(columnName: String): Column = collect_list(Column(columnName))
 
@@ -364,7 +361,7 @@ object functions {
    *   order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def collect_set(e: Column): Column = Column.fn("collect_set", e)
 
@@ -376,15 +373,39 @@ object functions {
    *   order of the rows which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def collect_set(columnName: String): Column = collect_set(Column(columnName))
 
+  /**
+   * Returns a count-min sketch of a column with the given esp, confidence and seed. The result is
+   * an array of bytes, which can be deserialized to a `CountMinSketch` before usage. Count-min
+   * sketch is a probabilistic data structure used for cardinality estimation using sub-linear
+   * space.
+   *
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def count_min_sketch(e: Column, eps: Column, confidence: Column, seed: Column): Column =
+    Column.fn("count_min_sketch", e, eps, confidence, seed)
+
+  /**
+   * Returns a count-min sketch of a column with the given esp, confidence and seed. The result is
+   * an array of bytes, which can be deserialized to a `CountMinSketch` before usage. Count-min
+   * sketch is a probabilistic data structure used for cardinality estimation using sub-linear
+   * space.
+   *
+   * @group agg_funcs
+   * @since 4.0.0
+   */
+  def count_min_sketch(e: Column, eps: Column, confidence: Column): Column =
+    count_min_sketch(e, eps, confidence, lit(SparkClassUtils.random.nextLong))
+
   /**
    * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def corr(column1: Column, column2: Column): Column = Column.fn("corr", column1, column2)
 
@@ -392,7 +413,7 @@ object functions {
    * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def corr(columnName1: String, columnName2: String): Column = {
     corr(Column(columnName1), Column(columnName2))
@@ -402,22 +423,16 @@ object functions {
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def count(e: Column): Column = {
-    val withoutStar = e.expr.getExprTypeCase match {
-      // Turn count(*) into count(1)
-      case proto.Expression.ExprTypeCase.UNRESOLVED_STAR => lit(1)
-      case _ => e
-    }
-    Column.fn("count", withoutStar)
-  }
+  def count(e: Column): Column =
+    Column.fn("count", e)
 
   /**
    * Aggregate function: returns the number of items in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def count(columnName: String): TypedColumn[Any, Long] =
     count(Column(columnName)).as(PrimitiveLongEncoder)
@@ -428,7 +443,7 @@ object functions {
    * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(expr: Column, exprs: Column*): Column = count_distinct(expr, exprs: _*)
@@ -439,7 +454,7 @@ object functions {
    * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def countDistinct(columnName: String, columnNames: String*): Column =
@@ -449,7 +464,7 @@ object functions {
    * Aggregate function: returns the number of distinct items in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   @scala.annotation.varargs
   def count_distinct(expr: Column, exprs: Column*): Column =
@@ -459,7 +474,7 @@ object functions {
    * Aggregate function: returns the population covariance for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def covar_pop(column1: Column, column2: Column): Column =
     Column.fn("covar_pop", column1, column2)
@@ -468,7 +483,7 @@ object functions {
    * Aggregate function: returns the population covariance for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def covar_pop(columnName1: String, columnName2: String): Column = {
     covar_pop(Column(columnName1), Column(columnName2))
@@ -478,7 +493,7 @@ object functions {
    * Aggregate function: returns the sample covariance for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def covar_samp(column1: Column, column2: Column): Column =
     Column.fn("covar_samp", column1, column2)
@@ -487,10 +502,11 @@ object functions {
    * Aggregate function: returns the sample covariance for two columns.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def covar_samp(columnName1: String, columnName2: String): Column =
+  def covar_samp(columnName1: String, columnName2: String): Column = {
     covar_samp(Column(columnName1), Column(columnName2))
+  }
 
   /**
    * Aggregate function: returns the first value in a group.
@@ -503,10 +519,10 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def first(e: Column, ignoreNulls: Boolean): Column =
-    Column.fn("first", e, lit(ignoreNulls))
+    Column.fn("first", false, e, lit(ignoreNulls))
 
   /**
    * Aggregate function: returns the first value of a column in a group.
@@ -519,7 +535,7 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def first(columnName: String, ignoreNulls: Boolean): Column = {
     first(Column(columnName), ignoreNulls)
@@ -536,7 +552,7 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def first(e: Column): Column = first(e, ignoreNulls = false)
 
@@ -551,7 +567,7 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def first(columnName: String): Column = first(Column(columnName))
 
@@ -588,7 +604,7 @@ object functions {
    * not, returns 1 for aggregated or 0 for not aggregated in the result set.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def grouping(e: Column): Column = Column.fn("grouping", e)
 
@@ -597,7 +613,7 @@ object functions {
    * not, returns 1 for aggregated or 0 for not aggregated in the result set.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def grouping(columnName: String): Column = grouping(Column(columnName))
 
@@ -613,7 +629,7 @@ object functions {
    *   grouping columns).
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def grouping_id(cols: Column*): Column = Column.fn("grouping_id", cols: _*)
 
@@ -628,10 +644,11 @@ object functions {
    *   The list of columns should match with grouping columns exactly.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def grouping_id(colName: String, colNames: String*): Column =
+  def grouping_id(colName: String, colNames: String*): Column = {
     grouping_id((Seq(colName) ++ colNames).map(n => Column(n)): _*)
+  }
 
   /**
    * Aggregate function: returns the updatable binary representation of the Datasketches HllSketch
@@ -660,8 +677,9 @@ object functions {
    * @group agg_funcs
    * @since 3.5.0
    */
-  def hll_sketch_agg(columnName: String, lgConfigK: Int): Column =
+  def hll_sketch_agg(columnName: String, lgConfigK: Int): Column = {
     hll_sketch_agg(Column(columnName), lgConfigK)
+  }
 
   /**
    * Aggregate function: returns the updatable binary representation of the Datasketches HllSketch
@@ -680,7 +698,9 @@ object functions {
    * @group agg_funcs
    * @since 3.5.0
    */
-  def hll_sketch_agg(columnName: String): Column = hll_sketch_agg(Column(columnName))
+  def hll_sketch_agg(columnName: String): Column = {
+    hll_sketch_agg(Column(columnName))
+  }
 
   /**
    * Aggregate function: returns the updatable binary representation of the Datasketches
@@ -715,8 +735,9 @@ object functions {
    * @group agg_funcs
    * @since 3.5.0
    */
-  def hll_union_agg(columnName: String, allowDifferentLgConfigK: Boolean): Column =
+  def hll_union_agg(columnName: String, allowDifferentLgConfigK: Boolean): Column = {
     hll_union_agg(Column(columnName), allowDifferentLgConfigK)
+  }
 
   /**
    * Aggregate function: returns the updatable binary representation of the Datasketches
@@ -737,13 +758,15 @@ object functions {
    * @group agg_funcs
    * @since 3.5.0
    */
-  def hll_union_agg(columnName: String): Column = hll_union_agg(Column(columnName))
+  def hll_union_agg(columnName: String): Column = {
+    hll_union_agg(Column(columnName))
+  }
 
   /**
    * Aggregate function: returns the kurtosis of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def kurtosis(e: Column): Column = Column.fn("kurtosis", e)
 
@@ -751,7 +774,7 @@ object functions {
    * Aggregate function: returns the kurtosis of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def kurtosis(columnName: String): Column = kurtosis(Column(columnName))
 
@@ -766,10 +789,10 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def last(e: Column, ignoreNulls: Boolean): Column =
-    Column.fn("last", e, lit(ignoreNulls))
+    Column.fn("last", false, e, lit(ignoreNulls))
 
   /**
    * Aggregate function: returns the last value of the column in a group.
@@ -782,10 +805,11 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
-  def last(columnName: String, ignoreNulls: Boolean): Column =
+  def last(columnName: String, ignoreNulls: Boolean): Column = {
     last(Column(columnName), ignoreNulls)
+  }
 
   /**
    * Aggregate function: returns the last value in a group.
@@ -798,7 +822,7 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def last(e: Column): Column = last(e, ignoreNulls = false)
 
@@ -813,7 +837,7 @@ object functions {
    *   which may be non-deterministic after a shuffle.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def last(columnName: String): Column = last(Column(columnName), ignoreNulls = false)
 
@@ -851,7 +875,7 @@ object functions {
    * @group agg_funcs
    * @since 3.4.0
    */
-  def mode(e: Column): Column = mode(e, deterministic = false)
+  def mode(e: Column): Column = Column.fn("mode", e)
 
   /**
    * Aggregate function: returns the most frequent value in a group.
@@ -869,7 +893,7 @@ object functions {
    * Aggregate function: returns the maximum value of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def max(e: Column): Column = Column.fn("max", e)
 
@@ -877,7 +901,7 @@ object functions {
    * Aggregate function: returns the maximum value of the column in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def max(columnName: String): Column = max(Column(columnName))
 
@@ -889,7 +913,7 @@ object functions {
    *   the same values of `e`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def max_by(e: Column, ord: Column): Column = Column.fn("max_by", e, ord)
 
@@ -897,7 +921,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group. Alias for avg.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def mean(e: Column): Column = avg(e)
 
@@ -905,7 +929,7 @@ object functions {
    * Aggregate function: returns the average of the values in a group. Alias for avg.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def mean(columnName: String): Column = avg(columnName)
 
@@ -921,7 +945,7 @@ object functions {
    * Aggregate function: returns the minimum value of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def min(e: Column): Column = Column.fn("min", e)
 
@@ -929,7 +953,7 @@ object functions {
    * Aggregate function: returns the minimum value of the column in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def min(columnName: String): Column = min(Column(columnName))
 
@@ -941,7 +965,7 @@ object functions {
    *   the same values of `e`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def min_by(e: Column, ord: Column): Column = Column.fn("min_by", e, ord)
 
@@ -977,7 +1001,7 @@ object functions {
    * relative error of the approximation.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def percentile_approx(e: Column, percentage: Column, accuracy: Column): Column =
     Column.fn("percentile_approx", e, percentage, accuracy)
@@ -1005,15 +1029,15 @@ object functions {
    * Aggregate function: returns the product of all numerical elements in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
-  def product(e: Column): Column = Column.fn("product", e)
+  def product(e: Column): Column = Column.internalFn("product", e)
 
   /**
    * Aggregate function: returns the skewness of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def skewness(e: Column): Column = Column.fn("skewness", e)
 
@@ -1021,7 +1045,7 @@ object functions {
    * Aggregate function: returns the skewness of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def skewness(columnName: String): Column = skewness(Column(columnName))
 
@@ -1037,7 +1061,7 @@ object functions {
    * Aggregate function: alias for `stddev_samp`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev(e: Column): Column = Column.fn("stddev", e)
 
@@ -1045,7 +1069,7 @@ object functions {
    * Aggregate function: alias for `stddev_samp`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev(columnName: String): Column = stddev(Column(columnName))
 
@@ -1053,7 +1077,7 @@ object functions {
    * Aggregate function: returns the sample standard deviation of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev_samp(e: Column): Column = Column.fn("stddev_samp", e)
 
@@ -1061,7 +1085,7 @@ object functions {
    * Aggregate function: returns the sample standard deviation of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev_samp(columnName: String): Column = stddev_samp(Column(columnName))
 
@@ -1069,7 +1093,7 @@ object functions {
    * Aggregate function: returns the population standard deviation of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev_pop(e: Column): Column = Column.fn("stddev_pop", e)
 
@@ -1077,7 +1101,7 @@ object functions {
    * Aggregate function: returns the population standard deviation of the expression in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def stddev_pop(columnName: String): Column = stddev_pop(Column(columnName))
 
@@ -1085,7 +1109,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the expression.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def sum(e: Column): Column = Column.fn("sum", e)
 
@@ -1093,7 +1117,7 @@ object functions {
    * Aggregate function: returns the sum of all values in the given column.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def sum(columnName: String): Column = sum(Column(columnName))
 
@@ -1101,7 +1125,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use sum_distinct", "3.2.0")
   def sumDistinct(e: Column): Column = sum_distinct(e)
@@ -1110,7 +1134,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @deprecated("Use sum_distinct", "3.2.0")
   def sumDistinct(columnName: String): Column = sum_distinct(Column(columnName))
@@ -1119,7 +1143,7 @@ object functions {
    * Aggregate function: returns the sum of distinct values in the expression.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def sum_distinct(e: Column): Column = Column.fn("sum", isDistinct = true, e)
 
@@ -1127,7 +1151,7 @@ object functions {
    * Aggregate function: alias for `var_samp`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def variance(e: Column): Column = Column.fn("variance", e)
 
@@ -1135,7 +1159,7 @@ object functions {
    * Aggregate function: alias for `var_samp`.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def variance(columnName: String): Column = variance(Column(columnName))
 
@@ -1143,7 +1167,7 @@ object functions {
    * Aggregate function: returns the unbiased variance of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def var_samp(e: Column): Column = Column.fn("var_samp", e)
 
@@ -1151,7 +1175,7 @@ object functions {
    * Aggregate function: returns the unbiased variance of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def var_samp(columnName: String): Column = var_samp(Column(columnName))
 
@@ -1159,7 +1183,7 @@ object functions {
    * Aggregate function: returns the population variance of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def var_pop(e: Column): Column = Column.fn("var_pop", e)
 
@@ -1167,7 +1191,7 @@ object functions {
    * Aggregate function: returns the population variance of the values in a group.
    *
    * @group agg_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def var_pop(columnName: String): Column = var_pop(Column(columnName))
 
@@ -1360,29 +1384,6 @@ object functions {
    */
   def bit_xor(e: Column): Column = Column.fn("bit_xor", e)
 
-  /**
-   * Aggregate function: returns a list of objects with duplicates.
-   *
-   * @note
-   *   The function is non-deterministic because the order of collected results depends on the
-   *   order of the rows which may be non-deterministic after a shuffle.
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def array_agg(e: Column): Column = Column.fn("array_agg", e)
-
-  /**
-   * Returns a count-min sketch of a column with the given esp, confidence and seed. The result is
-   * an array of bytes, which can be deserialized to a `CountMinSketch` before usage. Count-min
-   * sketch is a probabilistic data structure used for cardinality estimation using sub-linear
-   * space.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def count_min_sketch(e: Column, eps: Column, confidence: Column, seed: Column): Column =
-    Column.fn("count_min_sketch", e, eps, confidence, seed)
-
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Window functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -1397,7 +1398,7 @@ object functions {
    * }}}
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def cume_dist(): Column = Column.fn("cume_dist")
 
@@ -1413,7 +1414,7 @@ object functions {
    * This is equivalent to the DENSE_RANK function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def dense_rank(): Column = Column.fn("dense_rank")
 
@@ -1425,7 +1426,7 @@ object functions {
    * This is equivalent to the LAG function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lag(e: Column, offset: Int): Column = lag(e, offset, null)
 
@@ -1437,7 +1438,7 @@ object functions {
    * This is equivalent to the LAG function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lag(columnName: String, offset: Int): Column = lag(columnName, offset, null)
 
@@ -1449,7 +1450,7 @@ object functions {
    * This is equivalent to the LAG function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lag(columnName: String, offset: Int, defaultValue: Any): Column = {
     lag(Column(columnName), offset, defaultValue)
@@ -1463,10 +1464,10 @@ object functions {
    * This is equivalent to the LAG function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lag(e: Column, offset: Int, defaultValue: Any): Column = {
-    lag(e, offset, defaultValue, ignoreNulls = false)
+    lag(e, offset, defaultValue, false)
   }
 
   /**
@@ -1479,10 +1480,10 @@ object functions {
    * This is equivalent to the LAG function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def lag(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
-    Column.fn("lag", e, lit(offset), lit(defaultValue), lit(ignoreNulls))
+    Column.fn("lag", false, e, lit(offset), lit(defaultValue), lit(ignoreNulls))
 
   /**
    * Window function: returns the value that is `offset` rows after the current row, and `null` if
@@ -1492,11 +1493,9 @@ object functions {
    * This is equivalent to the LEAD function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def lead(columnName: String, offset: Int): Column = {
-    lead(columnName, offset, null)
-  }
+  def lead(columnName: String, offset: Int): Column = { lead(columnName, offset, null) }
 
   /**
    * Window function: returns the value that is `offset` rows after the current row, and `null` if
@@ -1506,11 +1505,9 @@ object functions {
    * This is equivalent to the LEAD function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def lead(e: Column, offset: Int): Column = {
-    lead(e, offset, null)
-  }
+  def lead(e: Column, offset: Int): Column = { lead(e, offset, null) }
 
   /**
    * Window function: returns the value that is `offset` rows after the current row, and
@@ -1520,7 +1517,7 @@ object functions {
    * This is equivalent to the LEAD function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lead(columnName: String, offset: Int, defaultValue: Any): Column = {
     lead(Column(columnName), offset, defaultValue)
@@ -1534,10 +1531,10 @@ object functions {
    * This is equivalent to the LEAD function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def lead(e: Column, offset: Int, defaultValue: Any): Column = {
-    lead(e, offset, defaultValue, ignoreNulls = false)
+    lead(e, offset, defaultValue, false)
   }
 
   /**
@@ -1550,10 +1547,10 @@ object functions {
    * This is equivalent to the LEAD function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def lead(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
-    Column.fn("lead", e, lit(offset), lit(defaultValue), lit(ignoreNulls))
+    Column.fn("lead", false, e, lit(offset), lit(defaultValue), lit(ignoreNulls))
 
   /**
    * Window function: returns the value that is the `offset`th row of the window frame (counting
@@ -1565,10 +1562,10 @@ object functions {
    * This is equivalent to the nth_value function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def nth_value(e: Column, offset: Int, ignoreNulls: Boolean): Column =
-    Column.fn("nth_value", e, lit(offset), lit(ignoreNulls))
+    Column.fn("nth_value", false, e, lit(offset), lit(ignoreNulls))
 
   /**
    * Window function: returns the value that is the `offset`th row of the window frame (counting
@@ -1577,7 +1574,7 @@ object functions {
    * This is equivalent to the nth_value function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def nth_value(e: Column, offset: Int): Column = nth_value(e, offset, false)
 
@@ -1589,7 +1586,7 @@ object functions {
    * This is equivalent to the NTILE function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def ntile(n: Int): Column = Column.fn("ntile", lit(n))
 
@@ -1605,7 +1602,7 @@ object functions {
    * This is equivalent to the PERCENT_RANK function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def percent_rank(): Column = Column.fn("percent_rank")
 
@@ -1621,7 +1618,7 @@ object functions {
    * This is equivalent to the RANK function in SQL.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def rank(): Column = Column.fn("rank")
 
@@ -1629,7 +1626,7 @@ object functions {
    * Window function: returns a sequential number starting at 1 within a window partition.
    *
    * @group window_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def row_number(): Column = Column.fn("row_number")
 
@@ -1641,7 +1638,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def array(cols: Column*): Column = Column.fn("array", cols: _*)
@@ -1650,7 +1647,7 @@ object functions {
    * Creates a new array column. The input columns must all have the same data type.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def array(colName: String, colNames: String*): Column = {
@@ -1663,17 +1660,25 @@ object functions {
    * null. The value columns must all have the same data type.
    *
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.0
    */
   @scala.annotation.varargs
   def map(cols: Column*): Column = Column.fn("map", cols: _*)
 
+  /**
+   * Creates a struct with the given field names and values.
+   *
+   * @group struct_funcs
+   * @since 3.5.0
+   */
+  def named_struct(cols: Column*): Column = Column.fn("named_struct", cols: _*)
+
   /**
    * Creates a new map column. The array in the first column is used for keys. The array in the
    * second column is used for values. All elements in the array for key should not be null.
    *
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.4
    */
   def map_from_arrays(keys: Column, values: Column): Column =
     Column.fn("map_from_arrays", keys, values)
@@ -1716,10 +1721,10 @@ object functions {
    * }}}
    *
    * @group normal_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
-  def broadcast[T](df: Dataset[T]): Dataset[T] = {
-    df.hint("broadcast")
+  def broadcast[DS[U] <: api.Dataset[U]](df: DS[_]): df.type = {
+    df.hint("broadcast").asInstanceOf[df.type]
   }
 
   /**
@@ -1729,7 +1734,7 @@ object functions {
    * not null, or c if both a and b are null but c is not null.
    *
    * @group conditional_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   @scala.annotation.varargs
   def coalesce(e: Column*): Column = Column.fn("coalesce", e: _*)
@@ -1738,7 +1743,7 @@ object functions {
    * Creates a string column for the file name of the current Spark task.
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def input_file_name(): Column = Column.fn("input_file_name")
 
@@ -1746,7 +1751,7 @@ object functions {
    * Return true iff the column is NaN.
    *
    * @group predicate_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def isnan(e: Column): Column = e.isNaN
 
@@ -1754,7 +1759,7 @@ object functions {
    * Return true iff the column is null.
    *
    * @group predicate_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def isnull(e: Column): Column = e.isNull
 
@@ -1774,7 +1779,7 @@ object functions {
    * }}}
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use monotonically_increasing_id()", "2.0.0")
   def monotonicallyIncreasingId(): Column = monotonically_increasing_id()
@@ -1795,7 +1800,7 @@ object functions {
    * }}}
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def monotonically_increasing_id(): Column = Column.fn("monotonically_increasing_id")
 
@@ -1805,7 +1810,7 @@ object functions {
    * Both inputs should be floating point columns (DoubleType or FloatType).
    *
    * @group conditional_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def nanvl(col1: Column, col2: Column): Column = Column.fn("nanvl", col1, col2)
 
@@ -1821,7 +1826,7 @@ object functions {
    * }}}
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def negate(e: Column): Column = -e
 
@@ -1836,7 +1841,7 @@ object functions {
    * }}}
    *
    * @group predicate_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def not(e: Column): Column = !e
 
@@ -1848,7 +1853,7 @@ object functions {
    *   The function is non-deterministic in general case.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def rand(seed: Long): Column = Column.fn("rand", lit(seed))
 
@@ -1860,9 +1865,9 @@ object functions {
    *   The function is non-deterministic in general case.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def rand(): Column = Column.fn("rand", lit(SparkClassUtils.random.nextLong))
+  def rand(): Column = rand(SparkClassUtils.random.nextLong)
 
   /**
    * Generate a column with independent and identically distributed (i.i.d.) samples from the
@@ -1872,7 +1877,7 @@ object functions {
    *   The function is non-deterministic in general case.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def randn(seed: Long): Column = Column.fn("randn", lit(seed))
 
@@ -1884,9 +1889,30 @@ object functions {
    *   The function is non-deterministic in general case.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
+   */
+  def randn(): Column = randn(SparkClassUtils.random.nextLong)
+
+  /**
+   * Returns a string of the specified length whose characters are chosen uniformly at random from
+   * the following pool of characters: 0-9, a-z, A-Z. The string length must be a constant
+   * two-byte or four-byte integer (SMALLINT or INT, respectively).
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def randstr(length: Column): Column =
+    randstr(length, lit(SparkClassUtils.random.nextLong))
+
+  /**
+   * Returns a string of the specified length whose characters are chosen uniformly at random from
+   * the following pool of characters: 0-9, a-z, A-Z, with the chosen random seed. The string
+   * length must be a constant two-byte or four-byte integer (SMALLINT or INT, respectively).
+   *
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def randn(): Column = Column.fn("randn", lit(SparkClassUtils.random.nextLong))
+  def randstr(length: Column, seed: Column): Column = Column.fn("randstr", length, seed)
 
   /**
    * Partition ID.
@@ -1895,7 +1921,7 @@ object functions {
    *   This is non-deterministic because it depends on data partitioning and task scheduling.
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def spark_partition_id(): Column = Column.fn("spark_partition_id")
 
@@ -1903,7 +1929,7 @@ object functions {
    * Computes the square root of the specified float value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def sqrt(e: Column): Column = Column.fn("sqrt", e)
 
@@ -1911,7 +1937,7 @@ object functions {
    * Computes the square root of the specified float value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def sqrt(colName: String): Column = sqrt(Column(colName))
 
@@ -1982,7 +2008,7 @@ object functions {
    * as `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...
    *
    * @group struct_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def struct(cols: Column*): Column = Column.fn("struct", cols: _*)
@@ -1991,7 +2017,7 @@ object functions {
    * Creates a new struct column that composes multiple input columns.
    *
    * @group struct_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @scala.annotation.varargs
   def struct(colName: String, colNames: String*): Column = {
@@ -2017,20 +2043,16 @@ object functions {
    * }}}
    *
    * @group conditional_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
-  def when(condition: Column, value: Any): Column = Column { builder =>
-    builder.getUnresolvedFunctionBuilder
-      .setFunctionName("when")
-      .addArguments(condition.expr)
-      .addArguments(lit(value).expr)
-  }
+  def when(condition: Column, value: Any): Column =
+    Column(internal.CaseWhenOtherwise(Seq(condition.node -> lit(value).node)))
 
   /**
    * Computes bitwise NOT (~) of a number.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use bitwise_not", "3.2.0")
   def bitwiseNOT(e: Column): Column = bitwise_not(e)
@@ -2039,7 +2061,7 @@ object functions {
    * Computes bitwise NOT (~) of a number.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def bitwise_not(e: Column): Column = Column.fn("~", e)
 
@@ -2080,9 +2102,7 @@ object functions {
    *
    * @group normal_funcs
    */
-  def expr(expr: String): Column = Column { builder =>
-    builder.getExpressionStringBuilder.setExpression(expr)
-  }
+  def expr(expr: String): Column = Column(internal.SqlExpression(expr))
 
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Math Functions
@@ -2092,7 +2112,7 @@ object functions {
    * Computes the absolute value of a numeric value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def abs(e: Column): Column = Column.fn("abs", e)
 
@@ -2101,7 +2121,7 @@ object functions {
    *   inverse cosine of `e` in radians, as if computed by `java.lang.Math.acos`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def acos(e: Column): Column = Column.fn("acos", e)
 
@@ -2110,7 +2130,7 @@ object functions {
    *   inverse cosine of `columnName`, as if computed by `java.lang.Math.acos`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def acos(columnName: String): Column = acos(Column(columnName))
 
@@ -2119,7 +2139,7 @@ object functions {
    *   inverse hyperbolic cosine of `e`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def acosh(e: Column): Column = Column.fn("acosh", e)
 
@@ -2128,7 +2148,7 @@ object functions {
    *   inverse hyperbolic cosine of `columnName`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def acosh(columnName: String): Column = acosh(Column(columnName))
 
@@ -2137,7 +2157,7 @@ object functions {
    *   inverse sine of `e` in radians, as if computed by `java.lang.Math.asin`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def asin(e: Column): Column = Column.fn("asin", e)
 
@@ -2146,7 +2166,7 @@ object functions {
    *   inverse sine of `columnName`, as if computed by `java.lang.Math.asin`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def asin(columnName: String): Column = asin(Column(columnName))
 
@@ -2155,7 +2175,7 @@ object functions {
    *   inverse hyperbolic sine of `e`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def asinh(e: Column): Column = Column.fn("asinh", e)
 
@@ -2164,7 +2184,7 @@ object functions {
    *   inverse hyperbolic sine of `columnName`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def asinh(columnName: String): Column = asinh(Column(columnName))
 
@@ -2173,7 +2193,7 @@ object functions {
    *   inverse tangent of `e` as if computed by `java.lang.Math.atan`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan(e: Column): Column = Column.fn("atan", e)
 
@@ -2182,7 +2202,7 @@ object functions {
    *   inverse tangent of `columnName`, as if computed by `java.lang.Math.atan`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan(columnName: String): Column = atan(Column(columnName))
 
@@ -2197,7 +2217,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(y: Column, x: Column): Column = Column.fn("atan2", y, x)
 
@@ -2212,7 +2232,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(y: Column, xName: String): Column = atan2(y, Column(xName))
 
@@ -2227,7 +2247,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(yName: String, x: Column): Column = atan2(Column(yName), x)
 
@@ -2242,7 +2262,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(yName: String, xName: String): Column =
     atan2(Column(yName), Column(xName))
@@ -2258,7 +2278,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(y: Column, xValue: Double): Column = atan2(y, lit(xValue))
 
@@ -2273,7 +2293,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(yName: String, xValue: Double): Column = atan2(Column(yName), xValue)
 
@@ -2288,7 +2308,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(yValue: Double, x: Column): Column = atan2(lit(yValue), x)
 
@@ -2303,7 +2323,7 @@ object functions {
    *   `java.lang.Math.atan2`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def atan2(yValue: Double, xName: String): Column = atan2(yValue, Column(xName))
 
@@ -2312,7 +2332,7 @@ object functions {
    *   inverse hyperbolic tangent of `e`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def atanh(e: Column): Column = Column.fn("atanh", e)
 
@@ -2321,7 +2341,7 @@ object functions {
    *   inverse hyperbolic tangent of `columnName`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def atanh(columnName: String): Column = atanh(Column(columnName))
 
@@ -2330,7 +2350,7 @@ object functions {
    * column. For example, bin("12") returns "1100".
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def bin(e: Column): Column = Column.fn("bin", e)
 
@@ -2339,7 +2359,7 @@ object functions {
    * column. For example, bin("12") returns "1100".
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def bin(columnName: String): Column = bin(Column(columnName))
 
@@ -2347,7 +2367,7 @@ object functions {
    * Computes the cube-root of the given value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cbrt(e: Column): Column = Column.fn("cbrt", e)
 
@@ -2355,7 +2375,7 @@ object functions {
    * Computes the cube-root of the given column.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cbrt(columnName: String): Column = cbrt(Column(columnName))
 
@@ -2363,7 +2383,7 @@ object functions {
    * Computes the ceiling of the given value of `e` to `scale` decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def ceil(e: Column, scale: Column): Column = Column.fn("ceil", e, scale)
 
@@ -2371,7 +2391,7 @@ object functions {
    * Computes the ceiling of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def ceil(e: Column): Column = Column.fn("ceil", e)
 
@@ -2379,7 +2399,7 @@ object functions {
    * Computes the ceiling of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def ceil(columnName: String): Column = ceil(Column(columnName))
 
@@ -2403,7 +2423,7 @@ object functions {
    * Convert a number in a string column from one base to another.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def conv(num: Column, fromBase: Int, toBase: Int): Column =
     Column.fn("conv", num, lit(fromBase), lit(toBase))
@@ -2415,7 +2435,7 @@ object functions {
    *   cosine of the angle, as if computed by `java.lang.Math.cos`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cos(e: Column): Column = Column.fn("cos", e)
 
@@ -2426,7 +2446,7 @@ object functions {
    *   cosine of the angle, as if computed by `java.lang.Math.cos`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cos(columnName: String): Column = cos(Column(columnName))
 
@@ -2437,7 +2457,7 @@ object functions {
    *   hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cosh(e: Column): Column = Column.fn("cosh", e)
 
@@ -2448,7 +2468,7 @@ object functions {
    *   hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def cosh(columnName: String): Column = cosh(Column(columnName))
 
@@ -2459,7 +2479,7 @@ object functions {
    *   cotangent of the angle
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def cot(e: Column): Column = Column.fn("cot", e)
 
@@ -2470,7 +2490,7 @@ object functions {
    *   cosecant of the angle
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def csc(e: Column): Column = Column.fn("csc", e)
 
@@ -2486,7 +2506,7 @@ object functions {
    * Computes the exponential of the given value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def exp(e: Column): Column = Column.fn("exp", e)
 
@@ -2494,7 +2514,7 @@ object functions {
    * Computes the exponential of the given column.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def exp(columnName: String): Column = exp(Column(columnName))
 
@@ -2502,7 +2522,7 @@ object functions {
    * Computes the exponential of the given value minus one.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def expm1(e: Column): Column = Column.fn("expm1", e)
 
@@ -2510,7 +2530,7 @@ object functions {
    * Computes the exponential of the given column minus one.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def expm1(columnName: String): Column = expm1(Column(columnName))
 
@@ -2518,7 +2538,7 @@ object functions {
    * Computes the factorial of the given value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def factorial(e: Column): Column = Column.fn("factorial", e)
 
@@ -2526,7 +2546,7 @@ object functions {
    * Computes the floor of the given value of `e` to `scale` decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def floor(e: Column, scale: Column): Column = Column.fn("floor", e, scale)
 
@@ -2534,7 +2554,7 @@ object functions {
    * Computes the floor of the given value of `e` to 0 decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def floor(e: Column): Column = Column.fn("floor", e)
 
@@ -2542,7 +2562,7 @@ object functions {
    * Computes the floor of the given column value to 0 decimal places.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def floor(columnName: String): Column = floor(Column(columnName))
 
@@ -2551,7 +2571,7 @@ object functions {
    * at least 2 parameters. It will return null iff all parameters are null.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def greatest(exprs: Column*): Column = Column.fn("greatest", exprs: _*)
@@ -2561,17 +2581,18 @@ object functions {
    * takes at least 2 parameters. It will return null iff all parameters are null.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
-  def greatest(columnName: String, columnNames: String*): Column =
+  def greatest(columnName: String, columnNames: String*): Column = {
     greatest((columnName +: columnNames).map(Column.apply): _*)
+  }
 
   /**
    * Computes hex value of the given column.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def hex(column: Column): Column = Column.fn("hex", column)
 
@@ -2580,7 +2601,7 @@ object functions {
    * the byte representation of number.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def unhex(column: Column): Column = Column.fn("unhex", column)
 
@@ -2588,7 +2609,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Column): Column = Column.fn("hypot", l, r)
 
@@ -2596,7 +2617,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -2604,7 +2625,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
 
@@ -2612,7 +2633,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(leftName: String, rightName: String): Column =
     hypot(Column(leftName), Column(rightName))
@@ -2621,7 +2642,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(l: Column, r: Double): Column = hypot(l, lit(r))
 
@@ -2629,7 +2650,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
 
@@ -2637,7 +2658,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(l: Double, r: Column): Column = hypot(lit(l), r)
 
@@ -2645,7 +2666,7 @@ object functions {
    * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
 
@@ -2654,7 +2675,7 @@ object functions {
    * least 2 parameters. It will return null iff all parameters are null.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def least(exprs: Column*): Column = Column.fn("least", exprs: _*)
@@ -2664,11 +2685,12 @@ object functions {
    * takes at least 2 parameters. It will return null iff all parameters are null.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
-  def least(columnName: String, columnNames: String*): Column =
+  def least(columnName: String, columnNames: String*): Column = {
     least((columnName +: columnNames).map(Column.apply): _*)
+  }
 
   /**
    * Computes the natural logarithm of the given value.
@@ -2682,7 +2704,7 @@ object functions {
    * Computes the natural logarithm of the given value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log(e: Column): Column = ln(e)
 
@@ -2690,7 +2712,7 @@ object functions {
    * Computes the natural logarithm of the given column.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log(columnName: String): Column = log(Column(columnName))
 
@@ -2698,7 +2720,7 @@ object functions {
    * Returns the first argument-base logarithm of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log(base: Double, a: Column): Column = Column.fn("log", lit(base), a)
 
@@ -2706,7 +2728,7 @@ object functions {
    * Returns the first argument-base logarithm of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log(base: Double, columnName: String): Column = log(base, Column(columnName))
 
@@ -2714,7 +2736,7 @@ object functions {
    * Computes the logarithm of the given value in base 10.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log10(e: Column): Column = Column.fn("log10", e)
 
@@ -2722,7 +2744,7 @@ object functions {
    * Computes the logarithm of the given value in base 10.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log10(columnName: String): Column = log10(Column(columnName))
 
@@ -2730,7 +2752,7 @@ object functions {
    * Computes the natural logarithm of the given value plus one.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log1p(e: Column): Column = Column.fn("log1p", e)
 
@@ -2738,7 +2760,7 @@ object functions {
    * Computes the natural logarithm of the given column plus one.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def log1p(columnName: String): Column = log1p(Column(columnName))
 
@@ -2746,7 +2768,7 @@ object functions {
    * Computes the logarithm of the given column in base 2.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def log2(expr: Column): Column = Column.fn("log2", expr)
 
@@ -2754,7 +2776,7 @@ object functions {
    * Computes the logarithm of the given value in base 2.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def log2(columnName: String): Column = log2(Column(columnName))
 
@@ -2786,7 +2808,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(l: Column, r: Column): Column = Column.fn("power", l, r)
 
@@ -2794,7 +2816,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
 
@@ -2802,7 +2824,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
 
@@ -2810,7 +2832,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
 
@@ -2818,7 +2840,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(l: Column, r: Double): Column = pow(l, lit(r))
 
@@ -2826,7 +2848,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
 
@@ -2834,7 +2856,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(l: Double, r: Column): Column = pow(lit(l), r)
 
@@ -2842,7 +2864,7 @@ object functions {
    * Returns the value of the first argument raised to the power of the second argument.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
 
@@ -2858,7 +2880,7 @@ object functions {
    * Returns the positive value of dividend mod divisor.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def pmod(dividend: Column, divisor: Column): Column = Column.fn("pmod", dividend, divisor)
 
@@ -2867,7 +2889,7 @@ object functions {
    * mathematical integer.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def rint(e: Column): Column = Column.fn("rint", e)
 
@@ -2876,7 +2898,7 @@ object functions {
    * mathematical integer.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def rint(columnName: String): Column = rint(Column(columnName))
 
@@ -2884,7 +2906,7 @@ object functions {
    * Returns the value of the column `e` rounded to 0 decimal places with HALF_UP round mode.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def round(e: Column): Column = round(e, 0)
 
@@ -2893,7 +2915,7 @@ object functions {
    * greater than or equal to 0 or at integral part when `scale` is less than 0.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def round(e: Column, scale: Int): Column = Column.fn("round", e, lit(scale))
 
@@ -2910,7 +2932,7 @@ object functions {
    * Returns the value of the column `e` rounded to 0 decimal places with HALF_EVEN round mode.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def bround(e: Column): Column = bround(e, 0)
 
@@ -2919,7 +2941,7 @@ object functions {
    * greater than or equal to 0 or at integral part when `scale` is less than 0.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def bround(e: Column, scale: Int): Column = Column.fn("bround", e, lit(scale))
 
@@ -2939,7 +2961,7 @@ object functions {
    *   secant of the angle
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def sec(e: Column): Column = Column.fn("sec", e)
 
@@ -2948,7 +2970,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @deprecated("Use shiftleft", "3.2.0")
   def shiftLeft(e: Column, numBits: Int): Column = shiftleft(e, numBits)
@@ -2958,7 +2980,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def shiftleft(e: Column, numBits: Int): Column = Column.fn("shiftleft", e, lit(numBits))
 
@@ -2967,7 +2989,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @deprecated("Use shiftright", "3.2.0")
   def shiftRight(e: Column, numBits: Int): Column = shiftright(e, numBits)
@@ -2977,7 +2999,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def shiftright(e: Column, numBits: Int): Column = Column.fn("shiftright", e, lit(numBits))
 
@@ -2986,7 +3008,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @deprecated("Use shiftrightunsigned", "3.2.0")
   def shiftRightUnsigned(e: Column, numBits: Int): Column = shiftrightunsigned(e, numBits)
@@ -2996,7 +3018,7 @@ object functions {
    * return a long value else it will return an integer value.
    *
    * @group bitwise_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def shiftrightunsigned(e: Column, numBits: Int): Column =
     Column.fn("shiftrightunsigned", e, lit(numBits))
@@ -3013,7 +3035,7 @@ object functions {
    * Computes the signum of the given value.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def signum(e: Column): Column = Column.fn("signum", e)
 
@@ -3021,7 +3043,7 @@ object functions {
    * Computes the signum of the given column.
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def signum(columnName: String): Column = signum(Column(columnName))
 
@@ -3032,7 +3054,7 @@ object functions {
    *   sine of the angle, as if computed by `java.lang.Math.sin`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def sin(e: Column): Column = Column.fn("sin", e)
 
@@ -3043,7 +3065,7 @@ object functions {
    *   sine of the angle, as if computed by `java.lang.Math.sin`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def sin(columnName: String): Column = sin(Column(columnName))
 
@@ -3054,7 +3076,7 @@ object functions {
    *   hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def sinh(e: Column): Column = Column.fn("sinh", e)
 
@@ -3065,7 +3087,7 @@ object functions {
    *   hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def sinh(columnName: String): Column = sinh(Column(columnName))
 
@@ -3076,7 +3098,7 @@ object functions {
    *   tangent of the given value, as if computed by `java.lang.Math.tan`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def tan(e: Column): Column = Column.fn("tan", e)
 
@@ -3087,7 +3109,7 @@ object functions {
    *   tangent of the given value, as if computed by `java.lang.Math.tan`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def tan(columnName: String): Column = tan(Column(columnName))
 
@@ -3098,7 +3120,7 @@ object functions {
    *   hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def tanh(e: Column): Column = Column.fn("tanh", e)
 
@@ -3109,20 +3131,20 @@ object functions {
    *   hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   def tanh(columnName: String): Column = tanh(Column(columnName))
 
   /**
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use degrees", "2.1.0")
   def toDegrees(e: Column): Column = degrees(e)
 
   /**
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use degrees", "2.1.0")
   def toDegrees(columnName: String): Column = degrees(Column(columnName))
@@ -3137,7 +3159,7 @@ object functions {
    *   angle in degrees, as if computed by `java.lang.Math.toDegrees`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def degrees(e: Column): Column = Column.fn("degrees", e)
 
@@ -3151,20 +3173,20 @@ object functions {
    *   angle in degrees, as if computed by `java.lang.Math.toDegrees`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def degrees(columnName: String): Column = degrees(Column(columnName))
 
   /**
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use radians", "2.1.0")
   def toRadians(e: Column): Column = radians(e)
 
   /**
    * @group math_funcs
-   * @since 3.4.0
+   * @since 1.4.0
    */
   @deprecated("Use radians", "2.1.0")
   def toRadians(columnName: String): Column = radians(Column(columnName))
@@ -3179,7 +3201,7 @@ object functions {
    *   angle in radians, as if computed by `java.lang.Math.toRadians`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def radians(e: Column): Column = Column.fn("radians", e)
 
@@ -3193,7 +3215,7 @@ object functions {
    *   angle in radians, as if computed by `java.lang.Math.toRadians`
    *
    * @group math_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def radians(columnName: String): Column = radians(Column(columnName))
 
@@ -3259,7 +3281,7 @@ object functions {
    * string.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def md5(e: Column): Column = Column.fn("md5", e)
 
@@ -3268,7 +3290,7 @@ object functions {
    * string.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def sha1(e: Column): Column = Column.fn("sha1", e)
 
@@ -3282,7 +3304,7 @@ object functions {
    *   one of 224, 256, 384, or 512.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def sha2(e: Column, numBits: Int): Column = {
     require(
@@ -3296,7 +3318,7 @@ object functions {
    * as a bigint.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def crc32(e: Column): Column = Column.fn("crc32", e)
 
@@ -3304,7 +3326,7 @@ object functions {
    * Calculates the hash code of given columns, and returns the result as an int column.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   @scala.annotation.varargs
   def hash(cols: Column*): Column = Column.fn("hash", cols: _*)
@@ -3314,7 +3336,7 @@ object functions {
    * and returns the result as a long column. The hash computation uses an initial seed of 42.
    *
    * @group hash_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   @scala.annotation.varargs
   def xxhash64(cols: Column*): Column = Column.fn("xxhash64", cols: _*)
@@ -3323,7 +3345,7 @@ object functions {
    * Returns null if the condition is true, and throws an exception otherwise.
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def assert_true(c: Column): Column = Column.fn("assert_true", c)
 
@@ -3331,7 +3353,7 @@ object functions {
    * Returns null if the condition is true; throws an exception with the error message otherwise.
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def assert_true(c: Column, e: Column): Column = Column.fn("assert_true", c, e)
 
@@ -3339,7 +3361,7 @@ object functions {
    * Throws an exception with the provided error message.
    *
    * @group misc_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def raise_error(c: Column): Column = Column.fn("raise_error", c)
 
@@ -3350,8 +3372,7 @@ object functions {
    * @group misc_funcs
    * @since 3.5.0
    */
-  def hll_sketch_estimate(c: Column): Column =
-    Column.fn("hll_sketch_estimate", c)
+  def hll_sketch_estimate(c: Column): Column = Column.fn("hll_sketch_estimate", c)
 
   /**
    * Returns the estimated number of unique values given the binary representation of a
@@ -3360,8 +3381,9 @@ object functions {
    * @group misc_funcs
    * @since 3.5.0
    */
-  def hll_sketch_estimate(columnName: String): Column =
+  def hll_sketch_estimate(columnName: String): Column = {
     hll_sketch_estimate(Column(columnName))
+  }
 
   /**
    * Merges two binary representations of Datasketches HllSketch objects, using a Datasketches
@@ -3374,14 +3396,15 @@ object functions {
     Column.fn("hll_union", c1, c2)
 
   /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a Dataskethes
+   * Merges two binary representations of Datasketches HllSketch objects, using a Datasketches
    * Union object. Throws an exception if sketches have different lgConfigK values.
    *
    * @group misc_funcs
    * @since 3.5.0
    */
-  def hll_union(columnName1: String, columnName2: String): Column =
+  def hll_union(columnName1: String, columnName2: String): Column = {
     hll_union(Column(columnName1), Column(columnName2))
+  }
 
   /**
    * Merges two binary representations of Datasketches HllSketch objects, using a Datasketches
@@ -3395,7 +3418,7 @@ object functions {
     Column.fn("hll_union", c1, c2, lit(allowDifferentLgConfigK))
 
   /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a Dataskethes
+   * Merges two binary representations of Datasketches HllSketch objects, using a Datasketches
    * Union object. Throws an exception if sketches have different lgConfigK values and
    * allowDifferentLgConfigK is set to false.
    *
@@ -3405,8 +3428,9 @@ object functions {
   def hll_union(
       columnName1: String,
       columnName2: String,
-      allowDifferentLgConfigK: Boolean): Column =
+      allowDifferentLgConfigK: Boolean): Column = {
     hll_union(Column(columnName1), Column(columnName2), allowDifferentLgConfigK)
+  }
 
   /**
    * Returns the user name of current execution context.
@@ -3729,11 +3753,37 @@ object functions {
    * Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
    * unless specified otherwise.
    *
-   * @group misc_funcs
+   * @group generator_funcs
    * @since 3.5.0
    */
   def stack(cols: Column*): Column = Column.fn("stack", cols: _*)
 
+  /**
+   * Returns a random value with independent and identically distributed (i.i.d.) values with the
+   * specified range of numbers. The provided numbers specifying the minimum and maximum values of
+   * the range must be constant. If both of these numbers are integers, then the result will also
+   * be an integer. Otherwise if one or both of these are floating-point numbers, then the result
+   * will also be a floating-point number.
+   *
+   * @group math_funcs
+   * @since 4.0.0
+   */
+  def uniform(min: Column, max: Column): Column =
+    uniform(min, max, lit(SparkClassUtils.random.nextLong))
+
+  /**
+   * Returns a random value with independent and identically distributed (i.i.d.) values with the
+   * specified range of numbers, with the chosen random seed. The provided numbers specifying the
+   * minimum and maximum values of the range must be constant. If both of these numbers are
+   * integers, then the result will also be an integer. Otherwise if one or both of these are
+   * floating-point numbers, then the result will also be a floating-point number.
+   *
+   * @group math_funcs
+   * @since 4.0.0
+   */
+  def uniform(min: Column, max: Column, seed: Column): Column =
+    Column.fn("uniform", min, max, seed)
+
   /**
    * Returns a random value with independent and identically distributed (i.i.d.) uniformly
    * distributed values in [0, 1).
@@ -3750,10 +3800,10 @@ object functions {
    * @group math_funcs
    * @since 3.5.0
    */
-  def random(): Column = Column.fn("random", lit(SparkClassUtils.random.nextLong))
+  def random(): Column = random(lit(SparkClassUtils.random.nextLong))
 
   /**
-   * Returns the bit position for the given input column.
+   * Returns the bucket number for the given input column.
    *
    * @group misc_funcs
    * @since 3.5.0
@@ -3762,7 +3812,7 @@ object functions {
     Column.fn("bitmap_bit_position", col)
 
   /**
-   * Returns the bucket number for the given input column.
+   * Returns the bit position for the given input column.
    *
    * @group misc_funcs
    * @since 3.5.0
@@ -3806,7 +3856,7 @@ object functions {
    * result as an int column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def ascii(e: Column): Column = Column.fn("ascii", e)
 
@@ -3815,7 +3865,7 @@ object functions {
    * the reverse of unbase64.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def base64(e: Column): Column = Column.fn("base64", e)
 
@@ -3823,7 +3873,7 @@ object functions {
    * Calculates the bit length for the specified string column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def bit_length(e: Column): Column = Column.fn("bit_length", e)
 
@@ -3831,8 +3881,11 @@ object functions {
    * Concatenates multiple input string columns together into a single string column, using the
    * given separator.
    *
+   * @note
+   *   Input strings which are null are skipped.
+   *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def concat_ws(sep: String, exprs: Column*): Column =
@@ -3840,26 +3893,64 @@ object functions {
 
   /**
    * Computes the first argument into a string from a binary using the provided character set (one
-   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). If either argument
-   * is null, the result will also be null.
+   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'). If either
+   * argument is null, the result will also be null.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def decode(value: Column, charset: String): Column =
     Column.fn("decode", value, lit(charset))
 
   /**
    * Computes the first argument into a binary from a string using the provided character set (one
-   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'). If either argument
-   * is null, the result will also be null.
+   * of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', 'UTF-32'). If either
+   * argument is null, the result will also be null.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def encode(value: Column, charset: String): Column =
     Column.fn("encode", value, lit(charset))
 
+  /**
+   * Returns true if the input is a valid UTF-8 string, otherwise returns false.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def is_valid_utf8(str: Column): Column =
+    Column.fn("is_valid_utf8", str)
+
+  /**
+   * Returns a new string in which all invalid UTF-8 byte sequences, if any, are replaced by the
+   * Unicode replacement character (U+FFFD).
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def make_valid_utf8(str: Column): Column =
+    Column.fn("make_valid_utf8", str)
+
+  /**
+   * Returns the input value if it corresponds to a valid UTF-8 string, or emits a
+   * SparkIllegalArgumentException exception otherwise.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def validate_utf8(str: Column): Column =
+    Column.fn("validate_utf8", str)
+
+  /**
+   * Returns the input value if it corresponds to a valid UTF-8 string, or NULL otherwise.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def try_validate_utf8(str: Column): Column =
+    Column.fn("try_validate_utf8", str)
+
   /**
    * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places with
    * HALF_EVEN round mode, and returns the result as a string column.
@@ -3868,7 +3959,7 @@ object functions {
    * result will be null.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def format_number(x: Column, d: Int): Column = Column.fn("format_number", x, lit(d))
 
@@ -3876,7 +3967,7 @@ object functions {
    * Formats the arguments in printf-style and returns the result as a string column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def format_string(format: String, arguments: Column*): Column =
@@ -3889,7 +3980,7 @@ object functions {
    * For example, "hello world" will become "Hello World".
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def initcap(e: Column): Column = Column.fn("initcap", e)
 
@@ -3902,9 +3993,22 @@ object functions {
    *   in str.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
+   */
+  def instr(str: Column, substring: String): Column = instr(str, lit(substring))
+
+  /**
+   * Locate the position of the first occurrence of substr column in the given string. Returns
+   * null if either of the arguments are null.
+   *
+   * @note
+   *   The position is not zero based, but 1 based index. Returns 0 if substr could not be found
+   *   in str.
+   *
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def instr(str: Column, substring: String): Column = Column.fn("instr", str, lit(substring))
+  def instr(str: Column, substring: Column): Column = Column.fn("instr", str, substring)
 
   /**
    * Computes the character length of a given string or number of bytes of a binary string. The
@@ -3912,7 +4016,7 @@ object functions {
    * includes binary zeros.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def length(e: Column): Column = Column.fn("length", e)
 
@@ -3930,7 +4034,7 @@ object functions {
    * Converts a string column to lower case.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def lower(e: Column): Column = Column.fn("lower", e)
 
@@ -3948,7 +4052,7 @@ object functions {
   /**
    * Computes the Levenshtein distance of the two given string columns.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def levenshtein(l: Column, r: Column): Column = Column.fn("levenshtein", l, r)
 
@@ -3960,7 +4064,7 @@ object functions {
    *   in str.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def locate(substr: String, str: Column): Column = Column.fn("locate", lit(substr), str)
 
@@ -3972,7 +4076,7 @@ object functions {
    *   in str.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def locate(substr: String, str: Column, pos: Int): Column =
     Column.fn("locate", lit(substr), str, lit(pos))
@@ -3982,44 +4086,74 @@ object functions {
    * len, the return value is shortened to len characters.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
-  def lpad(str: Column, len: Int, pad: String): Column =
-    Column.fn("lpad", str, lit(len), lit(pad))
+  def lpad(str: Column, len: Int, pad: String): Column = lpad(str, lit(len), lit(pad))
 
   /**
    * Left-pad the binary column with pad to a byte length of len. If the binary column is longer
    * than len, the return value is shortened to len bytes.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.3.0
+   */
+  def lpad(str: Column, len: Int, pad: Array[Byte]): Column = lpad(str, lit(len), lit(pad))
+
+  /**
+   * Left-pad the string column with pad to a length of len. If the string column is longer than
+   * len, the return value is shortened to len characters.
+   *
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def lpad(str: Column, len: Int, pad: Array[Byte]): Column =
-    Column.fn("lpad", str, lit(len), lit(pad))
+  def lpad(str: Column, len: Column, pad: Column): Column = Column.fn("lpad", str, len, pad)
 
   /**
    * Trim the spaces from left end for the specified string value.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def ltrim(e: Column): Column = Column.fn("ltrim", e)
 
   /**
    * Trim the specified character string from left end for the specified string column.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 2.3.0
+   */
+  def ltrim(e: Column, trimString: String): Column = ltrim(e, lit(trimString))
+
+  /**
+   * Trim the specified character string from left end for the specified string column.
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def ltrim(e: Column, trimString: String): Column = Column.fn("ltrim", lit(trimString), e)
+  def ltrim(e: Column, trim: Column): Column = Column.fn("ltrim", trim, e)
 
   /**
    * Calculates the byte length for the specified string column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def octet_length(e: Column): Column = Column.fn("octet_length", e)
 
+  /**
+   * Marks a given column with specified collation.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def collate(e: Column, collation: String): Column = Column.fn("collate", e, lit(collation))
+
+  /**
+   * Returns the collation name of a given column.
+   *
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def collation(e: Column): Column = Column.fn("collation", e)
+
   /**
    * Returns true if `str` matches `regexp`, or false otherwise.
    *
@@ -4060,7 +4194,7 @@ object functions {
    * be thrown.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def regexp_extract(e: Column, exp: String, groupIdx: Int): Column =
     Column.fn("regexp_extract", e, lit(exp), lit(groupIdx))
@@ -4089,7 +4223,7 @@ object functions {
    * Replace all substrings of the specified string value that match regexp with rep.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def regexp_replace(e: Column, pattern: String, replacement: String): Column =
     regexp_replace(e, lit(pattern), lit(replacement))
@@ -4098,7 +4232,7 @@ object functions {
    * Replace all substrings of the specified string value that match regexp with rep.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def regexp_replace(e: Column, pattern: Column, replacement: Column): Column =
     Column.fn("regexp_replace", e, pattern, replacement)
@@ -4138,7 +4272,7 @@ object functions {
    * of base64.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def unbase64(e: Column): Column = Column.fn("unbase64", e)
 
@@ -4147,26 +4281,33 @@ object functions {
    * len, the return value is shortened to len characters.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
-  def rpad(str: Column, len: Int, pad: String): Column =
-    Column.fn("rpad", str, lit(len), lit(pad))
+  def rpad(str: Column, len: Int, pad: String): Column = rpad(str, lit(len), lit(pad))
 
   /**
    * Right-pad the binary column with pad to a byte length of len. If the binary column is longer
    * than len, the return value is shortened to len bytes.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.3.0
+   */
+  def rpad(str: Column, len: Int, pad: Array[Byte]): Column = rpad(str, lit(len), lit(pad))
+
+  /**
+   * Right-pad the string column with pad to a length of len. If the string column is longer than
+   * len, the return value is shortened to len characters.
+   *
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def rpad(str: Column, len: Int, pad: Array[Byte]): Column =
-    Column.fn("rpad", str, lit(len), lit(pad))
+  def rpad(str: Column, len: Column, pad: Column): Column = Column.fn("rpad", str, len, pad)
 
   /**
    * Repeats a string column n times, and returns it as a new string column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def repeat(str: Column, n: Int): Column = Column.fn("repeat", str, lit(n))
 
@@ -4182,22 +4323,29 @@ object functions {
    * Trim the spaces from right end for the specified string value.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def rtrim(e: Column): Column = Column.fn("rtrim", e)
 
   /**
    * Trim the specified character string from right end for the specified string column.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 2.3.0
+   */
+  def rtrim(e: Column, trimString: String): Column = rtrim(e, lit(trimString))
+
+  /**
+   * Trim the specified character string from right end for the specified string column.
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def rtrim(e: Column, trimString: String): Column = Column.fn("rtrim", lit(trimString), e)
+  def rtrim(e: Column, trim: Column): Column = Column.fn("rtrim", trim, e)
 
   /**
    * Returns the soundex code for the specified expression.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def soundex(e: Column): Column = Column.fn("soundex", e)
 
@@ -4211,7 +4359,7 @@ object functions {
    *   expression.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def split(str: Column, pattern: String): Column = Column.fn("split", str, lit(pattern))
 
@@ -4245,7 +4393,7 @@ object functions {
    *   the resulting array can be of any size.</li> </ul>
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def split(str: Column, pattern: String, limit: Int): Column =
     Column.fn("split", str, lit(pattern), lit(limit))
@@ -4279,7 +4427,7 @@ object functions {
    *   The position is not zero based, but 1 based index.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def substring(str: Column, pos: Int, len: Int): Column =
     Column.fn("substring", str, lit(pos), lit(len))
@@ -4313,7 +4461,7 @@ object functions {
    * `src` and proceeding for `len` bytes.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def overlay(src: Column, replace: Column, pos: Column, len: Column): Column =
     Column.fn("overlay", src, replace, pos, len)
@@ -4323,7 +4471,7 @@ object functions {
    * `src`.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def overlay(src: Column, replace: Column, pos: Column): Column =
     Column.fn("overlay", src, replace, pos)
@@ -4331,16 +4479,25 @@ object functions {
   /**
    * Splits a string into arrays of sentences, where each sentence is an array of words.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def sentences(string: Column, language: Column, country: Column): Column =
     Column.fn("sentences", string, language, country)
 
+  /**
+   * Splits a string into arrays of sentences, where each sentence is an array of words. The
+   * default `country`('') is used.
+   * @group string_funcs
+   * @since 4.0.0
+   */
+  def sentences(string: Column, language: Column): Column =
+    Column.fn("sentences", string, language)
+
   /**
    * Splits a string into arrays of sentences, where each sentence is an array of words. The
    * default locale is used.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def sentences(string: Column): Column = Column.fn("sentences", string)
 
@@ -4350,7 +4507,7 @@ object functions {
    * any character in the string matches the character in the `matchingString`.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def translate(src: Column, matchingString: String, replaceString: String): Column =
     Column.fn("translate", src, lit(matchingString), lit(replaceString))
@@ -4359,29 +4516,36 @@ object functions {
    * Trim the spaces from both ends for the specified string column.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def trim(e: Column): Column = Column.fn("trim", e)
 
   /**
    * Trim the specified character from both ends for the specified string column.
    * @group string_funcs
-   * @since 3.4.0
+   * @since 2.3.0
+   */
+  def trim(e: Column, trimString: String): Column = trim(e, lit(trimString))
+
+  /**
+   * Trim the specified character from both ends for the specified string column.
+   * @group string_funcs
+   * @since 4.0.0
    */
-  def trim(e: Column, trimString: String): Column = Column.fn("trim", lit(trimString), e)
+  def trim(e: Column, trim: Column): Column = Column.fn("trim", trim, e)
 
   /**
    * Converts a string column to upper case.
    *
    * @group string_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def upper(e: Column): Column = Column.fn("upper", e)
 
   /**
-   * Converts the input `e` to a binary value based on the supplied `f`. The `f` can be a
-   * case-insensitive string literal of "hex", "utf-8", "utf8", or "base64". By default, the
-   * binary format for conversion is "hex" if `fmt` is omitted. The function returns NULL if at
+   * Converts the input `e` to a binary value based on the supplied `format`. The `format` can be
+   * a case-insensitive string literal of "hex", "utf-8", "utf8", or "base64". By default, the
+   * binary format for conversion is "hex" if `format` is omitted. The function returns NULL if at
    * least one of the input parameters is NULL.
    *
    * @group string_funcs
@@ -4390,8 +4554,8 @@ object functions {
   def to_binary(e: Column, f: Column): Column = Column.fn("to_binary", e, f)
 
   /**
-   * Converts the input `e` to a binary value based on the format "hex". The function returns NULL
-   * if at least one of the input parameters is NULL.
+   * Converts the input `e` to a binary value based on the default format "hex". The function
+   * returns NULL if at least one of the input parameters is NULL.
    *
    * @group string_funcs
    * @since 3.5.0
@@ -4401,30 +4565,26 @@ object functions {
   // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
-   *
-   * @param e
-   *   A column of number to be converted
-   * @param format
-   *   The format can consist of the following characters, case insensitive: <ul> <li> '0' or '9':
-   *   Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format string
-   *   matches a sequence of digits in the input value, generating a result string of the same
-   *   length as the corresponding sequence in the format string. The result string is left-padded
-   *   with zeros if the 0/9 sequence comprises more digits than the matching part of the decimal
-   *   value, starts with 0, and is before the decimal point. Otherwise, it is padded with
-   *   spaces.</li> <li>'.' or 'D': Specifies the position of the decimal point (optional, only
-   *   allowed once).</li> <li>',' or 'G': Specifies the position of the grouping (thousands)
-   *   separator (,). There must be a 0 or 9 to the left and right of each grouping
-   *   separator.</li> <li>'$': Specifies the location of the $ currency sign. This character may
-   *   only be specified once.</li> <li>'S' or 'MI': Specifies the position of a '-' or '+' sign
-   *   (optional, only allowed once at the beginning or end of the format string). Note that 'S'
-   *   prints '+' for positive values but 'MI' prints a space.</li> <li>'PR': Only allowed at the
-   *   end of the format string; specifies that the result string will be wrapped by angle
-   *   brackets if the input value is negative.</li> </ul> If `e` is a datetime, `format` shall be
-   *   a valid datetime pattern, see <a
-   *   href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
-   *   Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats: <ul>
-   *   <li>'base64': a base 64 string.</li> <li>'hex': a string in the hexadecimal format.</li>
-   *   <li>'utf-8': the input binary is decoded to UTF-8 string.</li> </ul>
+   * The format can consist of the following characters, case insensitive: '0' or '9': Specifies
+   * an expected digit between 0 and 9. A sequence of 0 or 9 in the format string matches a
+   * sequence of digits in the input value, generating a result string of the same length as the
+   * corresponding sequence in the format string. The result string is left-padded with zeros if
+   * the 0/9 sequence comprises more digits than the matching part of the decimal value, starts
+   * with 0, and is before the decimal point. Otherwise, it is padded with spaces. '.' or 'D':
+   * Specifies the position of the decimal point (optional, only allowed once). ',' or 'G':
+   * Specifies the position of the grouping (thousands) separator (,). There must be a 0 or 9 to
+   * the left and right of each grouping separator. '$': Specifies the location of the $ currency
+   * sign. This character may only be specified once. 'S' or 'MI': Specifies the position of a '-'
+   * or '+' sign (optional, only allowed once at the beginning or end of the format string). Note
+   * that 'S' prints '+' for positive values but 'MI' prints a space. 'PR': Only allowed at the
+   * end of the format string; specifies that the result string will be wrapped by angle brackets
+   * if the input value is negative.
+   *
+   * If `e` is a datetime, `format` shall be a valid datetime pattern, see <a
+   * href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
+   * Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats:
+   * 'base64': a base 64 string. 'hex': a string in the hexadecimal format. 'utf-8': the input
+   * binary is decoded to UTF-8 string.
    *
    * @group string_funcs
    * @since 3.5.0
@@ -4435,30 +4595,26 @@ object functions {
   // scalastyle:off line.size.limit
   /**
    * Convert `e` to a string based on the `format`. Throws an exception if the conversion fails.
-   *
-   * @param e
-   *   A column of number to be converted
-   * @param format
-   *   The format can consist of the following characters, case insensitive: <ul> <li> '0' or '9':
-   *   Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format string
-   *   matches a sequence of digits in the input value, generating a result string of the same
-   *   length as the corresponding sequence in the format string. The result string is left-padded
-   *   with zeros if the 0/9 sequence comprises more digits than the matching part of the decimal
-   *   value, starts with 0, and is before the decimal point. Otherwise, it is padded with
-   *   spaces.</li> <li>'.' or 'D': Specifies the position of the decimal point (optional, only
-   *   allowed once).</li> <li>',' or 'G': Specifies the position of the grouping (thousands)
-   *   separator (,). There must be a 0 or 9 to the left and right of each grouping
-   *   separator.</li> <li>'$': Specifies the location of the $ currency sign. This character may
-   *   only be specified once.</li> <li>'S' or 'MI': Specifies the position of a '-' or '+' sign
-   *   (optional, only allowed once at the beginning or end of the format string). Note that 'S'
-   *   prints '+' for positive values but 'MI' prints a space.</li> <li>'PR': Only allowed at the
-   *   end of the format string; specifies that the result string will be wrapped by angle
-   *   brackets if the input value is negative.</li> </ul> If `e` is a datetime, `format` shall be
-   *   a valid datetime pattern, see <a
-   *   href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
-   *   Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats: <ul>
-   *   <li>'base64': a base 64 string.</li> <li>'hex': a string in the hexadecimal format.</li>
-   *   <li>'utf-8': the input binary is decoded to UTF-8 string.</li> </ul>
+   * The format can consist of the following characters, case insensitive: '0' or '9': Specifies
+   * an expected digit between 0 and 9. A sequence of 0 or 9 in the format string matches a
+   * sequence of digits in the input value, generating a result string of the same length as the
+   * corresponding sequence in the format string. The result string is left-padded with zeros if
+   * the 0/9 sequence comprises more digits than the matching part of the decimal value, starts
+   * with 0, and is before the decimal point. Otherwise, it is padded with spaces. '.' or 'D':
+   * Specifies the position of the decimal point (optional, only allowed once). ',' or 'G':
+   * Specifies the position of the grouping (thousands) separator (,). There must be a 0 or 9 to
+   * the left and right of each grouping separator. '$': Specifies the location of the $ currency
+   * sign. This character may only be specified once. 'S' or 'MI': Specifies the position of a '-'
+   * or '+' sign (optional, only allowed once at the beginning or end of the format string). Note
+   * that 'S' prints '+' for positive values but 'MI' prints a space. 'PR': Only allowed at the
+   * end of the format string; specifies that the result string will be wrapped by angle brackets
+   * if the input value is negative.
+   *
+   * If `e` is a datetime, `format` shall be a valid datetime pattern, see <a
+   * href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime
+   * Patterns</a>. If `e` is a binary, it is converted to a string in one of the formats:
+   * 'base64': a base 64 string. 'hex': a string in the hexadecimal format. 'utf-8': the input
+   * binary is decoded to UTF-8 string.
    *
    * @group string_funcs
    * @since 3.5.0
@@ -4468,25 +4624,20 @@ object functions {
 
   /**
    * Convert string 'e' to a number based on the string format 'format'. Throws an exception if
-   * the conversion fails.
-   *
-   * @param e
-   *   A column of string to be converted
-   * @param format
-   *   The format can consist of the following characters, case insensitive: <ul><li> '0' or '9':
-   *   Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format string
-   *   matches a sequence of digits in the input string. If the 0/9 sequence starts with 0 and is
-   *   before the decimal point, it can only match a digit sequence of the same size. Otherwise,
-   *   if the sequence starts with 9 or is after the decimal point, it can match a digit sequence
-   *   that has the same or smaller size.</li> <li>'.' or 'D': Specifies the position of the
-   *   decimal point (optional, only allowed once).</li> <li>',' or 'G': Specifies the position of
-   *   the grouping (thousands) separator (,). There must be a 0 or 9 to the left and right of
-   *   each grouping separator. 'expr' must match the grouping separator relevant for the size of
-   *   the number.</li> <li>'$': Specifies the location of the $ currency sign. This character may
-   *   only be specified once.</li> <li>'S' or 'MI': Specifies the position of a '-' or '+' sign
-   *   (optional, only allowed once at the beginning or end of the format string). Note that 'S'
-   *   allows '-' but 'MI' does not.</li> <li>'PR': Only allowed at the end of the format string;
-   *   specifies that 'expr' indicates a negative number with wrapping angled brackets.</li></ul>
+   * the conversion fails. The format can consist of the following characters, case insensitive:
+   * '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
+   * string matches a sequence of digits in the input string. If the 0/9 sequence starts with 0
+   * and is before the decimal point, it can only match a digit sequence of the same size.
+   * Otherwise, if the sequence starts with 9 or is after the decimal point, it can match a digit
+   * sequence that has the same or smaller size. '.' or 'D': Specifies the position of the decimal
+   * point (optional, only allowed once). ',' or 'G': Specifies the position of the grouping
+   * (thousands) separator (,). There must be a 0 or 9 to the left and right of each grouping
+   * separator. 'expr' must match the grouping separator relevant for the size of the number. '$':
+   * Specifies the location of the $ currency sign. This character may only be specified once. 'S'
+   * or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at the
+   * beginning or end of the format string). Note that 'S' allows '-' but 'MI' does not. 'PR':
+   * Only allowed at the end of the format string; specifies that 'expr' indicates a negative
+   * number with wrapping angled brackets.
    *
    * @group string_funcs
    * @since 3.5.0
@@ -4503,6 +4654,7 @@ object functions {
    * @param replace
    *   A column of string, If `replace` is not specified or is an empty string, nothing replaces
    *   the string that is removed from `str`.
+   *
    * @group string_funcs
    * @since 3.5.0
    */
@@ -4516,6 +4668,7 @@ object functions {
    *   A column of string to be replaced
    * @param search
    *   A column of string, If `search` is not found in `src`, `src` is returned unchanged.
+   *
    * @group string_funcs
    * @since 3.5.0
    */
@@ -4556,19 +4709,37 @@ object functions {
    * Extracts a part from a URL.
    *
    * @group url_funcs
-   * @since 3.5.0
+   * @since 4.0.0
    */
-  def parse_url(url: Column, partToExtract: Column, key: Column): Column =
-    Column.fn("parse_url", url, partToExtract, key)
+  def try_parse_url(url: Column, partToExtract: Column, key: Column): Column =
+    Column.fn("try_parse_url", url, partToExtract, key)
 
   /**
    * Extracts a part from a URL.
    *
    * @group url_funcs
-   * @since 3.5.0
+   * @since 4.0.0
    */
-  def parse_url(url: Column, partToExtract: Column): Column =
-    Column.fn("parse_url", url, partToExtract)
+  def try_parse_url(url: Column, partToExtract: Column): Column =
+    Column.fn("try_parse_url", url, partToExtract)
+
+  /**
+   * Extracts a part from a URL.
+   *
+   * @group url_funcs
+   * @since 3.5.0
+   */
+  def parse_url(url: Column, partToExtract: Column, key: Column): Column =
+    Column.fn("parse_url", url, partToExtract, key)
+
+  /**
+   * Extracts a part from a URL.
+   *
+   * @group url_funcs
+   * @since 3.5.0
+   */
+  def parse_url(url: Column, partToExtract: Column): Column =
+    Column.fn("parse_url", url, partToExtract)
 
   /**
    * Formats the arguments in printf-style and returns the result as a string column.
@@ -4832,110 +5003,6 @@ object functions {
    */
   def right(str: Column, len: Column): Column = Column.fn("right", str, len)
 
-  /**
-   * Masks the given string value. The function replaces characters with 'X' or 'x', and numbers
-   * with 'n'. This can be useful for creating copies of tables with sensitive information
-   * removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column): Column = Column.fn("mask", input)
-
-  /**
-   * Masks the given string value. The function replaces upper-case characters with specific
-   * character, lower-case characters with 'x', and numbers with 'n'. This can be useful for
-   * creating copies of tables with sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column): Column =
-    Column.fn("mask", input, upperChar)
-
-  /**
-   * Masks the given string value. The function replaces upper-case, lower-case characters with
-   * specific characters, and numbers with 'n'. This can be useful for creating copies of tables
-   * with sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column, lowerChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar)
-
-  /**
-   * Masks the given string value. The function replaces upper-case, lower-case characters and
-   * numbers with specific characters. This can be useful for creating copies of tables with
-   * sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   * @param digitChar
-   *   character to replace digit characters with. Specify NULL to retain original character.
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column, lowerChar: Column, digitChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar, digitChar)
-
-  /**
-   * Masks the given string value. This can be useful for creating copies of tables with sensitive
-   * information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   * @param digitChar
-   *   character to replace digit characters with. Specify NULL to retain original character.
-   * @param otherChar
-   *   character to replace all other characters with. Specify NULL to retain original character.
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(
-      input: Column,
-      upperChar: Column,
-      lowerChar: Column,
-      digitChar: Column,
-      otherChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar, digitChar, otherChar)
-
-  /**
-   * Marks a given column with specified collation.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def collate(e: Column, collation: String): Column = Column.fn("collate", e, lit(collation))
-
-  /**
-   * Returns the collation name of a given column.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def collation(e: Column): Column = Column.fn("collation", e)
-
   //////////////////////////////////////////////////////////////////////////////////////////////
   // DateTime functions
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -4951,7 +5018,7 @@ object functions {
    * @return
    *   A date, or null if `startDate` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def add_months(startDate: Column, numMonths: Int): Column =
     add_months(startDate, lit(numMonths))
@@ -4967,7 +5034,7 @@ object functions {
    * @return
    *   A date, or null if `startDate` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def add_months(startDate: Column, numMonths: Column): Column =
     Column.fn("add_months", startDate, numMonths)
@@ -4986,7 +5053,7 @@ object functions {
    * current_date within the same query return the same value.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def current_date(): Column = Column.fn("current_date")
 
@@ -5003,7 +5070,7 @@ object functions {
    * calls of current_timestamp within the same query return the same value.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def current_timestamp(): Column = Column.fn("current_timestamp")
 
@@ -5021,7 +5088,7 @@ object functions {
    * the same value.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def localtimestamp(): Column = Column.fn("localtimestamp")
 
@@ -5045,7 +5112,7 @@ object functions {
    * @throws IllegalArgumentException
    *   if the `format` pattern is invalid
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def date_format(dateExpr: Column, format: String): Column =
     Column.fn("date_format", dateExpr, lit(format))
@@ -5061,7 +5128,7 @@ object functions {
    * @return
    *   A date, or null if `start` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
 
@@ -5076,7 +5143,7 @@ object functions {
    * @return
    *   A date, or null if `start` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def date_add(start: Column, days: Column): Column = Column.fn("date_add", start, days)
 
@@ -5106,7 +5173,7 @@ object functions {
    * @return
    *   A date, or null if `start` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
 
@@ -5121,7 +5188,7 @@ object functions {
    * @return
    *   A date, or null if `start` was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def date_sub(start: Column, days: Column): Column =
     Column.fn("date_sub", start, days)
@@ -5145,7 +5212,7 @@ object functions {
    *   An integer, or null if either `end` or `start` were strings that could not be cast to a
    *   date. Negative if `end` is before `start`
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def datediff(end: Column, start: Column): Column = Column.fn("datediff", end, start)
 
@@ -5185,7 +5252,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def year(e: Column): Column = Column.fn("year", e)
 
@@ -5194,7 +5261,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def quarter(e: Column): Column = Column.fn("quarter", e)
 
@@ -5203,7 +5270,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def month(e: Column): Column = Column.fn("month", e)
 
@@ -5213,7 +5280,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.3.0
    */
   def dayofweek(e: Column): Column = Column.fn("dayofweek", e)
 
@@ -5222,7 +5289,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def dayofmonth(e: Column): Column = Column.fn("dayofmonth", e)
 
@@ -5240,7 +5307,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def dayofyear(e: Column): Column = Column.fn("dayofyear", e)
 
@@ -5249,7 +5316,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def hour(e: Column): Column = Column.fn("hour", e)
 
@@ -5291,7 +5358,7 @@ object functions {
    *
    * @param field
    *   selects which part of the source should be extracted, and supported string values are as
-   *   same as the fields of the equivalent function `extract`.
+   *   same as the fields of the equivalent function `EXTRACT`.
    * @param source
    *   a date/timestamp or interval column from where `field` should be extracted.
    * @return
@@ -5313,7 +5380,7 @@ object functions {
    * @return
    *   A date, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def last_day(e: Column): Column = Column.fn("last_day", e)
 
@@ -5322,7 +5389,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def minute(e: Column): Column = Column.fn("minute", e)
 
@@ -5338,7 +5405,7 @@ object functions {
    * @return
    *   A date created from year, month and day fields.
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def make_date(year: Column, month: Column, day: Column): Column =
     Column.fn("make_date", year, month, day)
@@ -5367,7 +5434,7 @@ object functions {
    *   A double, or null if either `end` or `start` were strings that could not be cast to a
    *   timestamp. Negative if `end` is before `start`
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def months_between(end: Column, start: Column): Column =
     Column.fn("months_between", end, start)
@@ -5376,7 +5443,7 @@ object functions {
    * Returns number of months between dates `end` and `start`. If `roundOff` is set to true, the
    * result is rounded off to 8 digits; it is not rounded otherwise.
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def months_between(end: Column, start: Column, roundOff: Boolean): Column =
     Column.fn("months_between", end, start, lit(roundOff))
@@ -5397,7 +5464,7 @@ object functions {
    *   A date, or null if `date` was a string that could not be cast to a date or if `dayOfWeek`
    *   was an invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def next_day(date: Column, dayOfWeek: String): Column = next_day(date, lit(dayOfWeek))
 
@@ -5418,7 +5485,7 @@ object functions {
    *   A date, or null if `date` was a string that could not be cast to a date or if `dayOfWeek`
    *   was an invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def next_day(date: Column, dayOfWeek: Column): Column =
     Column.fn("next_day", date, dayOfWeek)
@@ -5428,7 +5495,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a timestamp
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def second(e: Column): Column = Column.fn("second", e)
 
@@ -5441,7 +5508,7 @@ object functions {
    * @return
    *   An integer, or null if the input was a string that could not be cast to a date
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def weekofyear(e: Column): Column = Column.fn("weekofyear", e)
 
@@ -5456,7 +5523,7 @@ object functions {
    * @return
    *   A string, or null if the input was a string that could not be cast to a long
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def from_unixtime(ut: Column): Column = Column.fn("from_unixtime", ut)
 
@@ -5477,7 +5544,7 @@ object functions {
    *   A string, or null if `ut` was a string that could not be cast to a long or `f` was an
    *   invalid date time pattern
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def from_unixtime(ut: Column, f: String): Column =
     Column.fn("from_unixtime", ut, lit(f))
@@ -5490,7 +5557,7 @@ object functions {
    *   timestamp is calculated at the start of query evaluation).
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def unix_timestamp(): Column = unix_timestamp(current_timestamp())
 
@@ -5504,7 +5571,7 @@ object functions {
    * @return
    *   A long, or null if the input was a string not of the correct format
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def unix_timestamp(s: Column): Column = Column.fn("unix_timestamp", s)
 
@@ -5523,7 +5590,7 @@ object functions {
    *   A long, or null if `s` was a string that could not be cast to a date or `p` was an invalid
    *   format
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def unix_timestamp(s: Column, p: String): Column =
     Column.fn("unix_timestamp", s, lit(p))
@@ -5537,7 +5604,7 @@ object functions {
    * @return
    *   A timestamp, or null if the input was a string that could not be cast to a timestamp
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def to_timestamp(s: Column): Column = Column.fn("to_timestamp", s)
 
@@ -5556,7 +5623,7 @@ object functions {
    *   A timestamp, or null if `s` was a string that could not be cast to a timestamp or `fmt` was
    *   an invalid format
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt))
 
@@ -5572,9 +5639,9 @@ object functions {
     Column.fn("try_to_timestamp", s, format)
 
   /**
-   * Parses the `s` expression to a timestamp. The function always returns null on an invalid
-   * input with`/`without ANSI SQL mode enabled. It follows casting rules to a timestamp. The
-   * result data type is consistent with the value of configuration `spark.sql.timestampType`.
+   * Parses the `s` to a timestamp. The function always returns null on an invalid input
+   * with`/`without ANSI SQL mode enabled. It follows casting rules to a timestamp. The result
+   * data type is consistent with the value of configuration `spark.sql.timestampType`.
    *
    * @group datetime_funcs
    * @since 3.5.0
@@ -5585,7 +5652,7 @@ object functions {
    * Converts the column into `DateType` by casting rules to `DateType`.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def to_date(e: Column): Column = Column.fn("to_date", e)
 
@@ -5604,7 +5671,7 @@ object functions {
    *   A date, or null if `e` was a string that could not be cast to a date or `fmt` was an
    *   invalid format
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def to_date(e: Column, fmt: String): Column = Column.fn("to_date", e, lit(fmt))
 
@@ -5658,7 +5725,7 @@ object functions {
    *   A date, or null if `date` was a string that could not be cast to a date or `format` was an
    *   invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def trunc(date: Column, format: String): Column = Column.fn("trunc", date, lit(format))
 
@@ -5678,7 +5745,7 @@ object functions {
    *   A timestamp, or null if `timestamp` was a string that could not be cast to a timestamp or
    *   `format` was an invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.3.0
    */
   def date_trunc(format: String, timestamp: Column): Column =
     Column.fn("date_trunc", lit(format), timestamp)
@@ -5701,7 +5768,7 @@ object functions {
    *   A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was
    *   an invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def from_utc_timestamp(ts: Column, tz: String): Column = from_utc_timestamp(ts, lit(tz))
 
@@ -5710,7 +5777,7 @@ object functions {
    * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield '2017-07-14
    * 03:40:00.0'.
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def from_utc_timestamp(ts: Column, tz: Column): Column =
     Column.fn("from_utc_timestamp", ts, tz)
@@ -5733,7 +5800,7 @@ object functions {
    *   A timestamp, or null if `ts` was a string that could not be cast to a timestamp or `tz` was
    *   an invalid value
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def to_utc_timestamp(ts: Column, tz: String): Column = to_utc_timestamp(ts, lit(tz))
 
@@ -5742,7 +5809,7 @@ object functions {
    * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
    * '2017-07-14 01:40:00.0'.
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def to_utc_timestamp(ts: Column, tz: Column): Column = Column.fn("to_utc_timestamp", ts, tz)
 
@@ -5789,7 +5856,7 @@ object functions {
    *   e.g. 12:15-13:15, 13:15-14:15... provide `startTime` as `15 minutes`.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def window(
       timeColumn: Column,
@@ -5838,7 +5905,7 @@ object functions {
    *   duration is likewise absolute, and does not vary according to a calendar.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def window(timeColumn: Column, windowDuration: String, slideDuration: String): Column = {
     window(timeColumn, windowDuration, slideDuration, "0 second")
@@ -5876,7 +5943,7 @@ object functions {
    *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 2.0.0
    */
   def window(timeColumn: Column, windowDuration: String): Column = {
     window(timeColumn, windowDuration, windowDuration, "0 second")
@@ -5920,7 +5987,7 @@ object functions {
    *   `org.apache.spark.unsafe.types.CalendarInterval` for valid duration identifiers.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def session_window(timeColumn: Column, gapDuration: String): Column =
     session_window(timeColumn, lit(gapDuration))
@@ -5956,15 +6023,15 @@ object functions {
    *   the input row.
    *
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.2.0
    */
   def session_window(timeColumn: Column, gapDuration: Column): Column =
-    Column.fn("session_window", timeColumn, gapDuration).as("session_window")
+    Column.fn("session_window", timeColumn, gapDuration)
 
   /**
    * Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z) to a timestamp.
    * @group datetime_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def timestamp_seconds(e: Column): Column = Column.fn("timestamp_seconds", e)
 
@@ -5992,7 +6059,7 @@ object functions {
    * @since 4.0.0
    */
   def timestamp_diff(unit: String, start: Column, end: Column): Column =
-    Column.fn("timestampdiff", lit(unit), start, end)
+    Column.internalFn("timestampdiff", lit(unit), start, end)
 
   /**
    * Adds the specified number of units to the given timestamp.
@@ -6001,7 +6068,7 @@ object functions {
    * @since 4.0.0
    */
   def timestamp_add(unit: String, quantity: Column, ts: Column): Column =
-    Column.fn("timestampadd", lit(unit), quantity, ts)
+    Column.internalFn("timestampadd", lit(unit), quantity, ts)
 
   /**
    * Parses the `timestamp` expression with the `format` expression to a timestamp without time
@@ -6024,8 +6091,8 @@ object functions {
     Column.fn("to_timestamp_ltz", timestamp)
 
   /**
-   * Parses the `timestamp` expression with the `format` expression to a timestamp without time
-   * zone. Returns null with invalid input.
+   * Parses the `timestamp_str` expression with the `format` expression to a timestamp without
+   * time zone. Returns null with invalid input.
    *
    * @group datetime_funcs
    * @since 3.5.0
@@ -6071,7 +6138,7 @@ object functions {
     Column.fn("monthname", timeExp)
 
   /**
-   * Extracts the three-letter abbreviated month name from a given date/timestamp/string.
+   * Extracts the three-letter abbreviated day name from a given date/timestamp/string.
    *
    * @group datetime_funcs
    * @since 4.0.0
@@ -6086,7 +6153,7 @@ object functions {
   /**
    * Returns null if the array is null, true if the array contains `value`, and false otherwise.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def array_contains(column: Column, value: Any): Column =
     Column.fn("array_contains", column, lit(value))
@@ -6106,7 +6173,7 @@ object functions {
    * the arrays are non-empty and any of them contains a `null`, it returns `null`. It returns
    * `false` otherwise.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def arrays_overlap(a1: Column, a2: Column): Column = Column.fn("arrays_overlap", a1, a2)
 
@@ -6122,7 +6189,7 @@ object functions {
    *   the length of the slice
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def slice(x: Column, start: Int, length: Int): Column =
     slice(x, lit(start), lit(length))
@@ -6139,7 +6206,7 @@ object functions {
    *   the length of the slice
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 3.1.0
    */
   def slice(x: Column, start: Column, length: Column): Column =
     Column.fn("slice", x, start, length)
@@ -6148,7 +6215,7 @@ object functions {
    * Concatenates the elements of `column` using the `delimiter`. Null values are replaced with
    * `nullReplacement`.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_join(column: Column, delimiter: String, nullReplacement: String): Column =
     Column.fn("array_join", column, lit(delimiter), lit(nullReplacement))
@@ -6156,7 +6223,7 @@ object functions {
   /**
    * Concatenates the elements of `column` using the `delimiter`.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_join(column: Column, delimiter: String): Column =
     Column.fn("array_join", column, lit(delimiter))
@@ -6165,8 +6232,11 @@ object functions {
    * Concatenates multiple input columns together into a single column. The function works with
    * strings, binary and compatible array columns.
    *
+   * @note
+   *   Returns null if any of the input columns are null.
+   *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   def concat(exprs: Column*): Column = Column.fn("concat", exprs: _*)
@@ -6180,7 +6250,7 @@ object functions {
    *   array.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_position(column: Column, value: Any): Column =
     Column.fn("array_position", column, lit(value))
@@ -6190,7 +6260,7 @@ object functions {
    * given key in value if column is map.
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def element_at(column: Column, value: Any): Column = Column.fn("element_at", column, lit(value))
 
@@ -6223,7 +6293,7 @@ object functions {
    * at the end of the returned array.
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_sort(e: Column): Column = Column.fn("array_sort", e)
 
@@ -6243,7 +6313,7 @@ object functions {
    * Remove all elements that equal to element from the given array.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_remove(column: Column, element: Any): Column =
     Column.fn("array_remove", column, lit(element))
@@ -6269,7 +6339,7 @@ object functions {
   /**
    * Removes duplicate values from the array.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_distinct(e: Column): Column = Column.fn("array_distinct", e)
 
@@ -6278,7 +6348,7 @@ object functions {
    * duplicates.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_intersect(col1: Column, col2: Column): Column =
     Column.fn("array_intersect", col1, col2)
@@ -6296,7 +6366,7 @@ object functions {
    * Returns an array of the elements in the union of the given two arrays, without duplicates.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_union(col1: Column, col2: Column): Column =
     Column.fn("array_union", col1, col2)
@@ -6306,194 +6376,85 @@ object functions {
    * duplicates. The order of elements in the result is not determined
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_except(col1: Column, col2: Column): Column =
     Column.fn("array_except", col1, col2)
 
-  /**
-   * Returns a string array of values within the nodes of xml that match the XPath expression.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath(xml: Column, path: Column): Column =
-    Column.fn("xpath", xml, path)
+  private def createLambda(f: Column => Column) = {
+    val x = internal.UnresolvedNamedLambdaVariable("x")
+    val function = f(Column(x)).node
+    Column(internal.LambdaFunction(function, Seq(x)))
+  }
+
+  private def createLambda(f: (Column, Column) => Column) = {
+    val x = internal.UnresolvedNamedLambdaVariable("x")
+    val y = internal.UnresolvedNamedLambdaVariable("y")
+    val function = f(Column(x), Column(y)).node
+    Column(internal.LambdaFunction(function, Seq(x, y)))
+  }
+
+  private def createLambda(f: (Column, Column, Column) => Column) = {
+    val x = internal.UnresolvedNamedLambdaVariable("x")
+    val y = internal.UnresolvedNamedLambdaVariable("y")
+    val z = internal.UnresolvedNamedLambdaVariable("z")
+    val function = f(Column(x), Column(y), Column(z)).node
+    Column(internal.LambdaFunction(function, Seq(x, y, z)))
+  }
 
   /**
-   * Returns true if the XPath expression evaluates to true, or if a matching node is found.
+   * Returns an array of elements after applying a transformation to each element in the input
+   * array.
+   * {{{
+   *   df.select(transform(col("i"), x => x + 1))
+   * }}}
    *
-   * @group xml_funcs
-   * @since 3.5.0
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => transformed_col, the lambda function to transform the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
    */
-  def xpath_boolean(xml: Column, path: Column): Column =
-    Column.fn("xpath_boolean", xml, path)
+  def transform(column: Column, f: Column => Column): Column =
+    Column.fn("transform", column, createLambda(f))
 
   /**
-   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
-   * the value is non-numeric.
+   * Returns an array of elements after applying a transformation to each element in the input
+   * array.
+   * {{{
+   *   df.select(transform(col("i"), (x, i) => x + i))
+   * }}}
    *
-   * @group xml_funcs
-   * @since 3.5.0
+   * @param column
+   *   the input array column
+   * @param f
+   *   (col, index) => transformed_col, the lambda function to transform the input column given
+   *   the index. Indices start at 0.
+   *
+   * @group collection_funcs
+   * @since 3.0.0
    */
-  def xpath_double(xml: Column, path: Column): Column =
-    Column.fn("xpath_double", xml, path)
+  def transform(column: Column, f: (Column, Column) => Column): Column =
+    Column.fn("transform", column, createLambda(f))
 
   /**
-   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
-   * the value is non-numeric.
+   * Returns whether a predicate holds for one or more elements in the array.
+   * {{{
+   *   df.select(exists(col("i"), _ % 2 === 0))
+   * }}}
    *
-   * @group xml_funcs
-   * @since 3.5.0
+   * @param column
+   *   the input array column
+   * @param f
+   *   col => predicate, the Boolean predicate to check the input column
+   *
+   * @group collection_funcs
+   * @since 3.0.0
    */
-  def xpath_number(xml: Column, path: Column): Column =
-    Column.fn("xpath_number", xml, path)
-
-  /**
-   * Returns a float value, the value zero if no match is found, or NaN if a match is found but
-   * the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_float(xml: Column, path: Column): Column =
-    Column.fn("xpath_float", xml, path)
-
-  /**
-   * Returns an integer value, or the value zero if no match is found, or a match is found but the
-   * value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_int(xml: Column, path: Column): Column =
-    Column.fn("xpath_int", xml, path)
-
-  /**
-   * Returns a long integer value, or the value zero if no match is found, or a match is found but
-   * the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_long(xml: Column, path: Column): Column =
-    Column.fn("xpath_long", xml, path)
-
-  /**
-   * Returns a short integer value, or the value zero if no match is found, or a match is found
-   * but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_short(xml: Column, path: Column): Column =
-    Column.fn("xpath_short", xml, path)
-
-  /**
-   * Returns the text contents of the first xml node that matches the XPath expression.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_string(xml: Column, path: Column): Column =
-    Column.fn("xpath_string", xml, path)
-
-  private def newLambdaVariable(name: String): proto.Expression.UnresolvedNamedLambdaVariable = {
-    proto.Expression.UnresolvedNamedLambdaVariable
-      .newBuilder()
-      .addNameParts(name)
-      .build()
-  }
-
-  private def toLambdaVariableColumn(
-      v: proto.Expression.UnresolvedNamedLambdaVariable): Column = {
-    Column(_.setUnresolvedNamedLambdaVariable(v))
-  }
-
-  private def createLambda(f: Column => Column): Column = Column { builder =>
-    val x = newLambdaVariable("x")
-    val function = f(toLambdaVariableColumn(x))
-    builder.getLambdaFunctionBuilder
-      .setFunction(function.expr)
-      .addArguments(x)
-  }
-
-  private def createLambda(f: (Column, Column) => Column) = Column { builder =>
-    val x = newLambdaVariable("x")
-    val y = newLambdaVariable("y")
-    val function = f(toLambdaVariableColumn(x), toLambdaVariableColumn(y))
-    builder.getLambdaFunctionBuilder
-      .setFunction(function.expr)
-      .addArguments(x)
-      .addArguments(y)
-  }
-
-  private def createLambda(f: (Column, Column, Column) => Column) = Column { builder =>
-    val x = newLambdaVariable("x")
-    val y = newLambdaVariable("y")
-    val z = newLambdaVariable("z")
-    val function =
-      f(toLambdaVariableColumn(x), toLambdaVariableColumn(y), toLambdaVariableColumn(z))
-    builder.getLambdaFunctionBuilder
-      .setFunction(function.expr)
-      .addArguments(x)
-      .addArguments(y)
-      .addArguments(z)
-  }
-
-  /**
-   * Returns an array of elements after applying a transformation to each element in the input
-   * array.
-   * {{{
-   *   df.select(transform(col("i"), x => x + 1))
-   * }}}
-   *
-   * @param column
-   *   the input array column
-   * @param f
-   *   col => transformed_col, the lambda function to transform the input column
-   *
-   * @group collection_funcs
-   * @since 3.4.0
-   */
-  def transform(column: Column, f: Column => Column): Column =
-    Column.fn("transform", column, createLambda(f))
-
-  /**
-   * Returns an array of elements after applying a transformation to each element in the input
-   * array.
-   * {{{
-   *   df.select(transform(col("i"), (x, i) => x + i))
-   * }}}
-   *
-   * @param column
-   *   the input array column
-   * @param f
-   *   (col, index) => transformed_col, the lambda function to filter the input column given the
-   *   index. Indices start at 0.
-   *
-   * @group collection_funcs
-   * @since 3.4.0
-   */
-  def transform(column: Column, f: (Column, Column) => Column): Column =
-    Column.fn("transform", column, createLambda(f))
-
-  /**
-   * Returns whether a predicate holds for one or more elements in the array.
-   * {{{
-   *   df.select(exists(col("i"), _ % 2 === 0))
-   * }}}
-   *
-   * @param column
-   *   the input array column
-   * @param f
-   *   col => predicate, the Boolean predicate to check the input column
-   *
-   * @group collection_funcs
-   * @since 3.4.0
-   */
-  def exists(column: Column, f: Column => Column): Column =
-    Column.fn("exists", column, createLambda(f))
+  def exists(column: Column, f: Column => Column): Column =
+    Column.fn("exists", column, createLambda(f))
 
   /**
    * Returns whether a predicate holds for every element in the array.
@@ -6507,7 +6468,7 @@ object functions {
    *   col => predicate, the Boolean predicate to check the input column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def forall(column: Column, f: Column => Column): Column =
     Column.fn("forall", column, createLambda(f))
@@ -6524,7 +6485,7 @@ object functions {
    *   col => predicate, the Boolean predicate to filter the input column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def filter(column: Column, f: Column => Column): Column =
     Column.fn("filter", column, createLambda(f))
@@ -6542,7 +6503,7 @@ object functions {
    *   index. Indices start at 0.
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def filter(column: Column, f: (Column, Column) => Column): Column =
     Column.fn("filter", column, createLambda(f))
@@ -6567,7 +6528,7 @@ object functions {
    *   inputs to final result
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def aggregate(
       expr: Column,
@@ -6591,7 +6552,7 @@ object functions {
    *   (combined_value, input_value) => combined_value, the merge function to merge an input value
    *   to the combined_value
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
     aggregate(expr, initialValue, merge, c => c)
@@ -6661,7 +6622,7 @@ object functions {
    *   (lCol, rCol) => col, the lambda function to merge two input columns into one column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def zip_with(left: Column, right: Column, f: (Column, Column) => Column): Column =
     Column.fn("zip_with", left, right, createLambda(f))
@@ -6679,7 +6640,7 @@ object functions {
    *   (key, value) => new_key, the lambda function to transform the key of input map column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def transform_keys(expr: Column, f: (Column, Column) => Column): Column =
     Column.fn("transform_keys", expr, createLambda(f))
@@ -6697,7 +6658,7 @@ object functions {
    *   (key, value) => new_value, the lambda function to transform the value of input map column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def transform_values(expr: Column, f: (Column, Column) => Column): Column =
     Column.fn("transform_values", expr, createLambda(f))
@@ -6714,7 +6675,7 @@ object functions {
    *   (key, value) => predicate, the Boolean predicate to filter the input map column
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def map_filter(expr: Column, f: (Column, Column) => Column): Column =
     Column.fn("map_filter", expr, createLambda(f))
@@ -6733,7 +6694,7 @@ object functions {
    *   (key, value1, value2) => new_value, the lambda function to merge the map values
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def map_zip_with(left: Column, right: Column, f: (Column, Column, Column) => Column): Column =
     Column.fn("map_zip_with", left, right, createLambda(f))
@@ -6744,7 +6705,7 @@ object functions {
    * specified otherwise.
    *
    * @group generator_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def explode(e: Column): Column = Column.fn("explode", e)
 
@@ -6754,7 +6715,7 @@ object functions {
    * specified otherwise. Unlike explode, if the array/map is null or empty then null is produced.
    *
    * @group generator_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def explode_outer(e: Column): Column = Column.fn("explode_outer", e)
 
@@ -6764,7 +6725,7 @@ object functions {
    * `value` for elements in the map unless specified otherwise.
    *
    * @group generator_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def posexplode(e: Column): Column = Column.fn("posexplode", e)
 
@@ -6775,7 +6736,7 @@ object functions {
    * array/map is null or empty then the row (null, null) is produced.
    *
    * @group generator_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def posexplode_outer(e: Column): Column = Column.fn("posexplode_outer", e)
 
@@ -6801,7 +6762,7 @@ object functions {
    * of the extracted json object. It will return null if the input json string is invalid.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   def get_json_object(e: Column, path: String): Column =
     Column.fn("get_json_object", e, lit(path))
@@ -6810,7 +6771,7 @@ object functions {
    * Creates a new row for a json column according to the given field names.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 1.6.0
    */
   @scala.annotation.varargs
   def json_tuple(json: Column, fields: String*): Column = {
@@ -6834,7 +6795,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: StructType, options: Map[String, String]): Column =
@@ -6857,7 +6818,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = {
@@ -6880,7 +6841,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: StructType, options: java.util.Map[String, String]): Column =
@@ -6903,7 +6864,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = {
@@ -6920,7 +6881,7 @@ object functions {
    *   the schema to use when parsing the json string
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def from_json(e: Column, schema: StructType): Column =
     from_json(e, schema, Map.empty[String, String])
@@ -6936,7 +6897,7 @@ object functions {
    *   the schema to use when parsing the json string
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.2.0
    */
   def from_json(e: Column, schema: DataType): Column =
     from_json(e, schema, Map.empty[String, String])
@@ -6958,7 +6919,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
@@ -6982,13 +6943,11 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.3.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: String, options: Map[String, String]): Column = {
-    val dataType =
-      parseTypeWithFallback(schema, DataType.fromJson, fallbackParser = DataType.fromDDL)
-    from_json(e, dataType, options)
+    from_json(e, lit(schema), options.asJava)
   }
 
   /**
@@ -7002,10 +6961,10 @@ object functions {
    *   the schema to use when parsing the json string
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def from_json(e: Column, schema: Column): Column = {
-    from_json(e, schema, Iterator.empty)
+    from_json(e, schema, Map.empty[String, String].asJava)
   }
 
   // scalastyle:off line.size.limit
@@ -7025,37 +6984,18 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   // scalastyle:on line.size.limit
   def from_json(e: Column, schema: Column, options: java.util.Map[String, String]): Column = {
     from_json(e, schema, options.asScala.iterator)
   }
 
-  /**
-   * Invoke a function with an options map as its last argument. If there are no options, its
-   * column is dropped.
-   */
-  private[sql] def fnWithOptions(
-      name: String,
-      options: Iterator[(String, String)],
-      arguments: Column*): Column = {
-    val augmentedArguments = if (options.hasNext) {
-      val flattenedKeyValueIterator = options.flatMap { case (k, v) =>
-        Iterator(lit(k), lit(v))
-      }
-      arguments :+ map(flattenedKeyValueIterator.toSeq: _*)
-    } else {
-      arguments
-    }
-    Column.fn(name, augmentedArguments: _*)
-  }
-
   private def from_json(
       e: Column,
       schema: Column,
       options: Iterator[(String, String)]): Column = {
-    fnWithOptions("from_json", options, e, schema)
+    Column.fnWithOptions("from_json", options, e, schema)
   }
 
   /**
@@ -7080,6 +7020,18 @@ object functions {
    */
   def parse_json(json: Column): Column = Column.fn("parse_json", json)
 
+  /**
+   * Converts a column containing nested inputs (array/map/struct) into a variants where maps and
+   * structs are converted to variant objects which are unordered unlike SQL structs. Input maps
+   * can only have string keys.
+   *
+   * @param col
+   *   a column with a nested schema or column name.
+   * @group variant_funcs
+   * @since 4.0.0
+   */
+  def to_variant_object(col: Column): Column = Column.fn("to_variant_object", col)
+
   /**
    * Check if a variant value is a variant null. Returns true if and only if the input is a
    * variant null and false otherwise (including in the case of SQL NULL).
@@ -7152,7 +7104,7 @@ object functions {
    *   a JSON string.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def schema_of_json(json: String): Column = schema_of_json(lit(json))
 
@@ -7163,7 +7115,7 @@ object functions {
    *   a foldable string column containing a JSON string.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def schema_of_json(json: Column): Column = Column.fn("schema_of_json", json)
 
@@ -7182,11 +7134,30 @@ object functions {
    *   a column with string literal containing schema in DDL format.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   // scalastyle:on line.size.limit
   def schema_of_json(json: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("schema_of_json", options.asScala.iterator, json)
+    Column.fnWithOptions("schema_of_json", options.asScala.iterator, json)
+
+  /**
+   * Returns the number of elements in the outermost JSON array. `NULL` is returned in case of any
+   * other valid JSON string, `NULL` or an invalid JSON.
+   *
+   * @group json_funcs
+   * @since 3.5.0
+   */
+  def json_array_length(e: Column): Column = Column.fn("json_array_length", e)
+
+  /**
+   * Returns all the keys of the outermost JSON object as an array. If a valid JSON object is
+   * given, all the keys of the outermost object will be returned as an array. If it is any other
+   * valid JSON string, an invalid JSON string or an empty string, the function returns null.
+   *
+   * @group json_funcs
+   * @since 3.5.0
+   */
+  def json_object_keys(e: Column): Column = Column.fn("json_object_keys", e)
 
   // scalastyle:off line.size.limit
   /**
@@ -7204,11 +7175,11 @@ object functions {
    *   option which enables pretty JSON generation.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // scalastyle:on line.size.limit
   def to_json(e: Column, options: Map[String, String]): Column =
-    fnWithOptions("to_json", options.iterator, e)
+    Column.fnWithOptions("to_json", options.iterator, e)
 
   // scalastyle:off line.size.limit
   /**
@@ -7226,7 +7197,7 @@ object functions {
    *   option which enables pretty JSON generation.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   // scalastyle:on line.size.limit
   def to_json(e: Column, options: java.util.Map[String, String]): Column =
@@ -7240,11 +7211,104 @@ object functions {
    *   a column containing a struct, an array or a map.
    *
    * @group json_funcs
-   * @since 3.4.0
+   * @since 2.1.0
    */
   def to_json(e: Column): Column =
     to_json(e, Map.empty[String, String])
 
+  /**
+   * Masks the given string value. The function replaces characters with 'X' or 'x', and numbers
+   * with 'n'. This can be useful for creating copies of tables with sensitive information
+   * removed.
+   *
+   * @param input
+   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
+   *
+   * @group string_funcs
+   * @since 3.5.0
+   */
+  def mask(input: Column): Column = Column.fn("mask", input)
+
+  /**
+   * Masks the given string value. The function replaces upper-case characters with specific
+   * character, lower-case characters with 'x', and numbers with 'n'. This can be useful for
+   * creating copies of tables with sensitive information removed.
+   *
+   * @param input
+   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
+   * @param upperChar
+   *   character to replace upper-case characters with. Specify NULL to retain original character.
+   *
+   * @group string_funcs
+   * @since 3.5.0
+   */
+  def mask(input: Column, upperChar: Column): Column =
+    Column.fn("mask", input, upperChar)
+
+  /**
+   * Masks the given string value. The function replaces upper-case and lower-case characters with
+   * the characters specified respectively, and numbers with 'n'. This can be useful for creating
+   * copies of tables with sensitive information removed.
+   *
+   * @param input
+   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
+   * @param upperChar
+   *   character to replace upper-case characters with. Specify NULL to retain original character.
+   * @param lowerChar
+   *   character to replace lower-case characters with. Specify NULL to retain original character.
+   *
+   * @group string_funcs
+   * @since 3.5.0
+   */
+  def mask(input: Column, upperChar: Column, lowerChar: Column): Column =
+    Column.fn("mask", input, upperChar, lowerChar)
+
+  /**
+   * Masks the given string value. The function replaces upper-case, lower-case characters and
+   * numbers with the characters specified respectively. This can be useful for creating copies of
+   * tables with sensitive information removed.
+   *
+   * @param input
+   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
+   * @param upperChar
+   *   character to replace upper-case characters with. Specify NULL to retain original character.
+   * @param lowerChar
+   *   character to replace lower-case characters with. Specify NULL to retain original character.
+   * @param digitChar
+   *   character to replace digit characters with. Specify NULL to retain original character.
+   *
+   * @group string_funcs
+   * @since 3.5.0
+   */
+  def mask(input: Column, upperChar: Column, lowerChar: Column, digitChar: Column): Column =
+    Column.fn("mask", input, upperChar, lowerChar, digitChar)
+
+  /**
+   * Masks the given string value. This can be useful for creating copies of tables with sensitive
+   * information removed.
+   *
+   * @param input
+   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
+   * @param upperChar
+   *   character to replace upper-case characters with. Specify NULL to retain original character.
+   * @param lowerChar
+   *   character to replace lower-case characters with. Specify NULL to retain original character.
+   * @param digitChar
+   *   character to replace digit characters with. Specify NULL to retain original character.
+   * @param otherChar
+   *   character to replace all other characters with. Specify NULL to retain original character.
+   *
+   * @group string_funcs
+   * @since 3.5.0
+   */
+  def mask(
+      input: Column,
+      upperChar: Column,
+      lowerChar: Column,
+      digitChar: Column,
+      otherChar: Column): Column =
+    Column.fn("mask", input, upperChar, lowerChar, digitChar, otherChar)
+
   /**
    * Returns length of array or map.
    *
@@ -7253,17 +7317,29 @@ object functions {
    * default settings, the function returns null for null input.
    *
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def size(e: Column): Column = Column.fn("size", e)
 
+  /**
+   * Returns length of array or map. This is an alias of `size` function.
+   *
+   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
+   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input. With the
+   * default settings, the function returns null for null input.
+   *
+   * @group collection_funcs
+   * @since 3.5.0
+   */
+  def cardinality(e: Column): Column = Column.fn("cardinality", e)
+
   /**
    * Sorts the input array for the given column in ascending order, according to the natural
    * ordering of the array elements. Null elements will be placed at the beginning of the returned
    * array.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def sort_array(e: Column): Column = sort_array(e, asc = true)
 
@@ -7274,7 +7350,7 @@ object functions {
    * ascending order or at the end of the returned array in descending order.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def sort_array(e: Column, asc: Boolean): Column = Column.fn("sort_array", e, lit(asc))
 
@@ -7283,7 +7359,7 @@ object functions {
    * double/float type. NULL elements are skipped.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_min(e: Column): Column = Column.fn("array_min", e)
 
@@ -7292,10 +7368,29 @@ object functions {
    * double/float type. NULL elements are skipped.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_max(e: Column): Column = Column.fn("array_max", e)
 
+  /**
+   * Returns the total number of elements in the array. The function returns null for null input.
+   *
+   * @group array_funcs
+   * @since 3.5.0
+   */
+  def array_size(e: Column): Column = Column.fn("array_size", e)
+
+  /**
+   * Aggregate function: returns a list of objects with duplicates.
+   *
+   * @note
+   *   The function is non-deterministic because the order of collected results depends on the
+   *   order of the rows which may be non-deterministic after a shuffle.
+   * @group agg_funcs
+   * @since 3.5.0
+   */
+  def array_agg(e: Column): Column = Column.fn("array_agg", e)
+
   /**
    * Returns a random permutation of the given array.
    *
@@ -7303,14 +7398,25 @@ object functions {
    *   The function is non-deterministic.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
+   */
+  def shuffle(e: Column): Column = shuffle(e, lit(SparkClassUtils.random.nextLong))
+
+  /**
+   * Returns a random permutation of the given array.
+   *
+   * @note
+   *   The function is non-deterministic.
+   *
+   * @group array_funcs
+   * @since 4.0.0
    */
-  def shuffle(e: Column): Column = Column.fn("shuffle", e, lit(SparkClassUtils.random.nextLong))
+  def shuffle(e: Column, seed: Column): Column = Column.fn("shuffle", e, seed)
 
   /**
    * Returns a reversed string or an array with reverse order of elements.
    * @group collection_funcs
-   * @since 3.4.0
+   * @since 1.5.0
    */
   def reverse(e: Column): Column = Column.fn("reverse", e)
 
@@ -7318,7 +7424,7 @@ object functions {
    * Creates a single array from an array of arrays. If a structure of nested arrays is deeper
    * than two levels, only one level of nesting is removed.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def flatten(e: Column): Column = Column.fn("flatten", e)
 
@@ -7326,7 +7432,7 @@ object functions {
    * Generate a sequence of integers from start to stop, incrementing by step.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def sequence(start: Column, stop: Column, step: Column): Column =
     Column.fn("sequence", start, stop, step)
@@ -7336,17 +7442,16 @@ object functions {
    * or equal to stop, otherwise -1.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
-  def sequence(start: Column, stop: Column): Column =
-    Column.fn("sequence", start, stop)
+  def sequence(start: Column, stop: Column): Column = Column.fn("sequence", start, stop)
 
   /**
    * Creates an array containing the left argument repeated the number of times given by the right
    * argument.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_repeat(left: Column, right: Column): Column = Column.fn("array_repeat", left, right)
 
@@ -7355,14 +7460,14 @@ object functions {
    * argument.
    *
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def array_repeat(e: Column, count: Int): Column = array_repeat(e, lit(count))
 
   /**
    * Returns true if the map contains the key.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 3.3.0
    */
   def map_contains_key(column: Column, key: Any): Column =
     Column.fn("map_contains_key", column, lit(key))
@@ -7370,28 +7475,28 @@ object functions {
   /**
    * Returns an unordered array containing the keys of the map.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.3.0
    */
   def map_keys(e: Column): Column = Column.fn("map_keys", e)
 
   /**
    * Returns an unordered array containing the values of the map.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.3.0
    */
   def map_values(e: Column): Column = Column.fn("map_values", e)
 
   /**
    * Returns an unordered array of all entries in the given map.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def map_entries(e: Column): Column = Column.fn("map_entries", e)
 
   /**
    * Returns a map created from the given array of entries.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   def map_from_entries(e: Column): Column = Column.fn("map_from_entries", e)
 
@@ -7399,7 +7504,7 @@ object functions {
    * Returns a merged array of structs in which the N-th struct contains all N-th values of input
    * arrays.
    * @group array_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   @scala.annotation.varargs
   def arrays_zip(e: Column*): Column = Column.fn("arrays_zip", e: _*)
@@ -7407,7 +7512,7 @@ object functions {
   /**
    * Returns the union of all the given maps.
    * @group map_funcs
-   * @since 3.4.0
+   * @since 2.4.0
    */
   @scala.annotation.varargs
   def map_concat(cols: Column*): Column = Column.fn("map_concat", cols: _*)
@@ -7428,7 +7533,7 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   // scalastyle:on line.size.limit
   def from_csv(e: Column, schema: StructType, options: Map[String, String]): Column =
@@ -7450,14 +7555,14 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   // scalastyle:on line.size.limit
   def from_csv(e: Column, schema: Column, options: java.util.Map[String, String]): Column =
     from_csv(e, schema, options.asScala.iterator)
 
   private def from_csv(e: Column, schema: Column, options: Iterator[(String, String)]): Column =
-    fnWithOptions("from_csv", options, e, schema)
+    Column.fnWithOptions("from_csv", options, e, schema)
 
   /**
    * Parses a CSV string and infers its schema in DDL format.
@@ -7466,7 +7571,7 @@ object functions {
    *   a CSV string.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def schema_of_csv(csv: String): Column = schema_of_csv(lit(csv))
 
@@ -7477,7 +7582,7 @@ object functions {
    *   a foldable string column containing a CSV string.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def schema_of_csv(csv: Column): Column = schema_of_csv(csv, Collections.emptyMap())
 
@@ -7496,11 +7601,11 @@ object functions {
    *   a column with string literal containing schema in DDL format.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   // scalastyle:on line.size.limit
   def schema_of_csv(csv: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("schema_of_csv", options.asScala.iterator, csv)
+    Column.fnWithOptions("schema_of_csv", options.asScala.iterator, csv)
 
   // scalastyle:off line.size.limit
   /**
@@ -7516,11 +7621,11 @@ object functions {
    *   Source Option</a> in the version you use.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
   // scalastyle:on line.size.limit
   def to_csv(e: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("to_csv", options.asScala.iterator, e)
+    Column.fnWithOptions("to_csv", options.asScala.iterator, e)
 
   /**
    * Converts a column containing a `StructType` into a CSV string with the specified schema.
@@ -7530,9 +7635,9 @@ object functions {
    *   a column containing a struct.
    *
    * @group csv_funcs
-   * @since 3.4.0
+   * @since 3.0.0
    */
-  def to_csv(e: Column): Column = to_csv(e, Collections.emptyMap())
+  def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
 
   // scalastyle:off line.size.limit
   /**
@@ -7556,7 +7661,6 @@ object functions {
     from_xml(e, lit(schema.json), options.asScala.iterator)
 
   // scalastyle:off line.size.limit
-
   /**
    * (Java-specific) Parses a column containing a XML string into a `StructType` with the
    * specified schema. Returns `null`, in the case of an unparseable string.
@@ -7575,13 +7679,7 @@ object functions {
    */
   // scalastyle:on line.size.limit
   def from_xml(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
-    val dataType =
-      parseTypeWithFallback(schema, DataType.fromJson, fallbackParser = DataType.fromDDL)
-    val structType = dataType match {
-      case t: StructType => t
-      case _ => throw DataTypeErrors.failedParsingStructTypeError(schema)
-    }
-    from_xml(e, structType, options)
+    from_xml(e, lit(schema), options)
   }
 
   // scalastyle:off line.size.limit
@@ -7603,8 +7701,8 @@ object functions {
 
   // scalastyle:off line.size.limit
   /**
-   * (Java-specific) Parses a column containing a XML string into the data type corresponding to
-   * the specified schema. Returns `null`, in the case of an unparseable string.
+   * (Java-specific) Parses a column containing a XML string into a `StructType` with the
+   * specified schema. Returns `null`, in the case of an unparseable string.
    *
    * @param e
    *   a string column containing XML data.
@@ -7616,7 +7714,6 @@ object functions {
    *   "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option"> Data
    *   Source Option</a> in the version you use.
    * @group xml_funcs
-   *
    * @since 4.0.0
    */
   // scalastyle:on line.size.limit
@@ -7631,15 +7728,15 @@ object functions {
    *   a string column containing XML data.
    * @param schema
    *   the schema to use when parsing the XML string
-   * @group xml_funcs
    *
+   * @group xml_funcs
    * @since 4.0.0
    */
   def from_xml(e: Column, schema: StructType): Column =
     from_xml(e, schema, Map.empty[String, String].asJava)
 
   private def from_xml(e: Column, schema: Column, options: Iterator[(String, String)]): Column = {
-    fnWithOptions("from_xml", options, e, schema)
+    Column.fnWithOptions("from_xml", options, e, schema)
   }
 
   /**
@@ -7647,7 +7744,7 @@ object functions {
    *
    * @param xml
    *   a XML string.
-   * @group collection_funcs
+   * @group xml_funcs
    * @since 4.0.0
    */
   def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))
@@ -7680,9 +7777,8 @@ object functions {
    * @since 4.0.0
    */
   // scalastyle:on line.size.limit
-  def schema_of_xml(xml: Column, options: java.util.Map[String, String]): Column = {
-    fnWithOptions("schema_of_xml", options.asScala.iterator, xml)
-  }
+  def schema_of_xml(xml: Column, options: java.util.Map[String, String]): Column =
+    Column.fnWithOptions("schema_of_xml", options.asScala.iterator, xml)
 
   // scalastyle:off line.size.limit
 
@@ -7702,7 +7798,7 @@ object functions {
    */
   // scalastyle:on line.size.limit
   def to_xml(e: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("to_xml", options.asScala.iterator, e)
+    Column.fnWithOptions("to_xml", options.asScala.iterator, e)
 
   /**
    * Converts a column containing a `StructType` into a XML string with the specified schema.
@@ -7713,80 +7809,124 @@ object functions {
    * @group xml_funcs
    * @since 4.0.0
    */
-  def to_xml(e: Column): Column = to_xml(e, Collections.emptyMap())
+  def to_xml(e: Column): Column = to_xml(e, Map.empty[String, String].asJava)
+
+  /**
+   * (Java-specific) A transform for timestamps and dates to partition data into years.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def years(e: Column): Column = partitioning.years(e)
+
+  /**
+   * (Java-specific) A transform for timestamps and dates to partition data into months.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def months(e: Column): Column = partitioning.months(e)
+
+  /**
+   * (Java-specific) A transform for timestamps and dates to partition data into days.
+   *
+   * @group partition_transforms
+   * @since 3.0.0
+   */
+  def days(e: Column): Column = partitioning.days(e)
+
+  /**
+   * Returns a string array of values within the nodes of xml that match the XPath expression.
+   *
+   * @group xml_funcs
+   * @since 3.5.0
+   */
+  def xpath(xml: Column, path: Column): Column =
+    Column.fn("xpath", xml, path)
+
+  /**
+   * Returns true if the XPath expression evaluates to true, or if a matching node is found.
+   *
+   * @group xml_funcs
+   * @since 3.5.0
+   */
+  def xpath_boolean(xml: Column, path: Column): Column =
+    Column.fn("xpath_boolean", xml, path)
 
   /**
-   * Returns the total number of elements in the array. The function returns null for null input.
+   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
+   * the value is non-numeric.
    *
-   * @group array_funcs
+   * @group xml_funcs
    * @since 3.5.0
    */
-  def array_size(e: Column): Column = Column.fn("array_size", e)
+  def xpath_double(xml: Column, path: Column): Column =
+    Column.fn("xpath_double", xml, path)
 
   /**
-   * Returns length of array or map. This is an alias of `size` function.
-   *
-   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
-   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input. With the
-   * default settings, the function returns null for null input.
+   * Returns a double value, the value zero if no match is found, or NaN if a match is found but
+   * the value is non-numeric.
    *
-   * @group collection_funcs
+   * @group xml_funcs
    * @since 3.5.0
    */
-  def cardinality(e: Column): Column = Column.fn("cardinality", e)
+  def xpath_number(xml: Column, path: Column): Column =
+    Column.fn("xpath_number", xml, path)
 
   /**
-   * Returns the number of elements in the outermost JSON array. `NULL` is returned in case of any
-   * other valid JSON string, `NULL` or an invalid JSON.
+   * Returns a float value, the value zero if no match is found, or NaN if a match is found but
+   * the value is non-numeric.
    *
-   * @group json_funcs
+   * @group xml_funcs
    * @since 3.5.0
    */
-  def json_array_length(e: Column): Column = Column.fn("json_array_length", e)
+  def xpath_float(xml: Column, path: Column): Column =
+    Column.fn("xpath_float", xml, path)
 
   /**
-   * Returns all the keys of the outermost JSON object as an array. If a valid JSON object is
-   * given, all the keys of the outermost object will be returned as an array. If it is any other
-   * valid JSON string, an invalid JSON string or an empty string, the function returns null.
+   * Returns an integer value, or the value zero if no match is found, or a match is found but the
+   * value is non-numeric.
    *
-   * @group json_funcs
+   * @group xml_funcs
    * @since 3.5.0
    */
-  def json_object_keys(e: Column): Column = Column.fn("json_object_keys", e)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Partition Transforms functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
+  def xpath_int(xml: Column, path: Column): Column =
+    Column.fn("xpath_int", xml, path)
 
   /**
-   * (Java-specific) A transform for timestamps and dates to partition data into years.
+   * Returns a long integer value, or the value zero if no match is found, or a match is found but
+   * the value is non-numeric.
    *
-   * @group partition_transforms
-   * @since 3.4.0
+   * @group xml_funcs
+   * @since 3.5.0
    */
-  def years(e: Column): Column = partitioning.years(e)
+  def xpath_long(xml: Column, path: Column): Column =
+    Column.fn("xpath_long", xml, path)
 
   /**
-   * (Java-specific) A transform for timestamps and dates to partition data into months.
+   * Returns a short integer value, or the value zero if no match is found, or a match is found
+   * but the value is non-numeric.
    *
-   * @group partition_transforms
-   * @since 3.4.0
+   * @group xml_funcs
+   * @since 3.5.0
    */
-  def months(e: Column): Column = partitioning.months(e)
+  def xpath_short(xml: Column, path: Column): Column =
+    Column.fn("xpath_short", xml, path)
 
   /**
-   * (Java-specific) A transform for timestamps and dates to partition data into days.
+   * Returns the text contents of the first xml node that matches the XPath expression.
    *
-   * @group partition_transforms
-   * @since 3.4.0
+   * @group xml_funcs
+   * @since 3.5.0
    */
-  def days(e: Column): Column = partitioning.days(e)
+  def xpath_string(xml: Column, path: Column): Column =
+    Column.fn("xpath_string", xml, path)
 
   /**
    * (Java-specific) A transform for timestamps to partition data into hours.
    *
    * @group partition_transforms
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def hours(e: Column): Column = partitioning.hours(e)
 
@@ -7865,6 +8005,23 @@ object functions {
   def make_dt_interval(): Column =
     Column.fn("make_dt_interval")
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(
+      years: Column,
+      months: Column,
+      weeks: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column): Column =
+    Column.fn("try_make_interval", years, months, weeks, days, hours, mins, secs)
+
   /**
    * Make interval from years, months, weeks, days, hours, mins and secs.
    *
@@ -7881,6 +8038,22 @@ object functions {
       secs: Column): Column =
     Column.fn("make_interval", years, months, weeks, days, hours, mins, secs)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(
+      years: Column,
+      months: Column,
+      weeks: Column,
+      days: Column,
+      hours: Column,
+      mins: Column): Column =
+    Column.fn("try_make_interval", years, months, weeks, days, hours, mins)
+
   /**
    * Make interval from years, months, weeks, days, hours and mins.
    *
@@ -7896,6 +8069,21 @@ object functions {
       mins: Column): Column =
     Column.fn("make_interval", years, months, weeks, days, hours, mins)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(
+      years: Column,
+      months: Column,
+      weeks: Column,
+      days: Column,
+      hours: Column): Column =
+    Column.fn("try_make_interval", years, months, weeks, days, hours)
+
   /**
    * Make interval from years, months, weeks, days and hours.
    *
@@ -7910,6 +8098,16 @@ object functions {
       hours: Column): Column =
     Column.fn("make_interval", years, months, weeks, days, hours)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
+    Column.fn("try_make_interval", years, months, weeks, days)
+
   /**
    * Make interval from years, months, weeks and days.
    *
@@ -7919,6 +8117,16 @@ object functions {
   def make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
     Column.fn("make_interval", years, months, weeks, days)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(years: Column, months: Column, weeks: Column): Column =
+    Column.fn("try_make_interval", years, months, weeks)
+
   /**
    * Make interval from years, months and weeks.
    *
@@ -7928,6 +8136,16 @@ object functions {
   def make_interval(years: Column, months: Column, weeks: Column): Column =
     Column.fn("make_interval", years, months, weeks)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(years: Column, months: Column): Column =
+    Column.fn("try_make_interval", years, months)
+
   /**
    * Make interval from years and months.
    *
@@ -7937,6 +8155,16 @@ object functions {
   def make_interval(years: Column, months: Column): Column =
     Column.fn("make_interval", years, months)
 
+  /**
+   * This is a special version of `make_interval` that performs the same operation, but returns a
+   * NULL value instead of raising an error if interval cannot be created.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_interval(years: Column): Column =
+    Column.fn("try_make_interval", years)
+
   /**
    * Make interval from years.
    *
@@ -7992,6 +8220,41 @@ object functions {
       secs: Column): Column =
     Column.fn("make_timestamp", years, months, days, hours, mins, secs)
 
+  /**
+   * Try to create a timestamp from years, months, days, hours, mins, secs and timezone fields.
+   * The result data type is consistent with the value of configuration `spark.sql.timestampType`.
+   * The function returns NULL on invalid inputs.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_timestamp(
+      years: Column,
+      months: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column,
+      timezone: Column): Column =
+    Column.fn("try_make_timestamp", years, months, days, hours, mins, secs, timezone)
+
+  /**
+   * Try to create a timestamp from years, months, days, hours, mins, and secs fields. The result
+   * data type is consistent with the value of configuration `spark.sql.timestampType`. The
+   * function returns NULL on invalid inputs.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_timestamp(
+      years: Column,
+      months: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column): Column =
+    Column.fn("try_make_timestamp", years, months, days, hours, mins, secs)
+
   /**
    * Create the current timestamp with local time zone from years, months, days, hours, mins, secs
    * and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function
@@ -8027,6 +8290,39 @@ object functions {
       secs: Column): Column =
     Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs)
 
+  /**
+   * Try to create the current timestamp with local time zone from years, months, days, hours,
+   * mins, secs and timezone fields. The function returns NULL on invalid inputs.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_timestamp_ltz(
+      years: Column,
+      months: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column,
+      timezone: Column): Column =
+    Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)
+
+  /**
+   * Try to create the current timestamp with local time zone from years, months, days, hours,
+   * mins and secs fields. The function returns NULL on invalid inputs.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_timestamp_ltz(
+      years: Column,
+      months: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column): Column =
+    Column.fn("try_make_timestamp_ltz", years, months, days, hours, mins, secs)
+
   /**
    * Create local date-time from years, months, days, hours, mins, secs fields. If the
    * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
@@ -8044,6 +8340,22 @@ object functions {
       secs: Column): Column =
     Column.fn("make_timestamp_ntz", years, months, days, hours, mins, secs)
 
+  /**
+   * Try to create a local date-time from years, months, days, hours, mins, secs fields. The
+   * function returns NULL on invalid inputs.
+   *
+   * @group datetime_funcs
+   * @since 4.0.0
+   */
+  def try_make_timestamp_ntz(
+      years: Column,
+      months: Column,
+      days: Column,
+      hours: Column,
+      mins: Column,
+      secs: Column): Column =
+    Column.fn("try_make_timestamp_ntz", years, months, days, hours, mins, secs)
+
   /**
    * Make year-month interval from years, months.
    *
@@ -8073,7 +8385,7 @@ object functions {
    * (Java-specific) A transform for any type that partitions by a hash of the input column.
    *
    * @group partition_transforms
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def bucket(numBuckets: Column, e: Column): Column = partitioning.bucket(numBuckets, e)
 
@@ -8081,7 +8393,7 @@ object functions {
    * (Java-specific) A transform for any type that partitions by a hash of the input column.
    *
    * @group partition_transforms
-   * @since 3.4.0
+   * @since 3.0.0
    */
   def bucket(numBuckets: Int, e: Column): Column = partitioning.bucket(numBuckets, e)
 
@@ -8122,6 +8434,14 @@ object functions {
    */
   def nullif(col1: Column, col2: Column): Column = Column.fn("nullif", col1, col2)
 
+  /**
+   * Returns null if `col` is equal to zero, or `col` otherwise.
+   *
+   * @group conditional_funcs
+   * @since 4.0.0
+   */
+  def nullifzero(col: Column): Column = Column.fn("nullifzero", col)
+
   /**
    * Returns `col2` if `col1` is null, or `col1` otherwise.
    *
@@ -8138,12 +8458,62 @@ object functions {
    */
   def nvl2(col1: Column, col2: Column, col3: Column): Column = Column.fn("nvl2", col1, col2, col3)
 
+  /**
+   * Returns zero if `col` is null, or `col` otherwise.
+   *
+   * @group conditional_funcs
+   * @since 4.0.0
+   */
+  def zeroifnull(col: Column): Column = Column.fn("zeroifnull", col)
+
+  // scalastyle:off line.size.limit
+  // scalastyle:off parameter.number
+
+  /* Use the following code to generate:
+
+  (0 to 10).foreach { x =>
+    val types = (1 to x).foldRight("RT")((i, s) => s"A$i, $s")
+    val typeSeq = "RT" +: (1 to x).map(i => s"A$i")
+    val typeTags = typeSeq.map(t => s"$t: TypeTag").mkString(", ")
+    val implicitTypeTags = typeSeq.map(t => s"implicitly[TypeTag[$t]]").mkString(", ")
+    println(s"""
+      |/**
+      | * Defines a Scala closure of $x arguments as user-defined function (UDF).
+      | * The data types are automatically inferred based on the Scala closure's
+      | * signature. By default the returned UDF is deterministic. To change it to
+      | * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
+      | *
+      | * @group udf_funcs
+      | * @since 1.3.0
+      | */
+      |def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
+      |  SparkUserDefinedFunction(f, $implicitTypeTags)
+      |}""".stripMargin)
+  }
+
+  (0 to 10).foreach { i =>
+    val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
+    println(s"""
+      |/**
+      | * Defines a Java UDF$i instance as user-defined function (UDF).
+      | * The caller must specify the output data type, and there is no automatic input type coercion.
+      | * By default the returned UDF is deterministic. To change it to nondeterministic, call the
+      | * API `UserDefinedFunction.asNondeterministic()`.
+      | *
+      | * @group udf_funcs
+      | * @since 2.3.0
+      | */
+      |def udf(f: UDF$i[$extTypeArgs], returnType: DataType): UserDefinedFunction = {
+      |  SparkUserDefinedFunction(ToScalaUDF(f), returnType, $i)
+      |}""".stripMargin)
+  }
+
+   */
+
   //////////////////////////////////////////////////////////////////////////////////////////////
   // Scala UDF functions
   //////////////////////////////////////////////////////////////////////////////////////////////
 
-  // scalastyle:off line.size.limit
-
   /**
    * Obtains a `UserDefinedFunction` that wraps the given `Aggregator` so that it may be used with
    * untyped Data Frames.
@@ -8174,7 +8544,6 @@ object functions {
    * @group udf_funcs
    * @note
    *   The input encoder is inferred from the input type IN.
-   * @since 4.0.0
    */
   def udaf[IN: TypeTag, BUF, OUT](agg: Aggregator[IN, BUF, OUT]): UserDefinedFunction = {
     udaf(agg, ScalaReflection.encoderFor[IN])
@@ -8213,16 +8582,11 @@ object functions {
    * @group udf_funcs
    * @note
    *   This overloading takes an explicit input encoder, to support UDAF declarations in Java.
-   * @since 4.0.0
    */
   def udaf[IN, BUF, OUT](
       agg: Aggregator[IN, BUF, OUT],
       inputEncoder: Encoder[IN]): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
-      agg,
-      Seq(encoderFor(inputEncoder)),
-      encoderFor(agg.outputEncoder),
-      aggregate = true)
+    UserDefinedAggregator(agg, inputEncoder)
   }
 
   /**
@@ -8232,10 +8596,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def udf[RT: TypeTag](f: () => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, typeTag[RT])
+  def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(f, implicitly[TypeTag[RT]])
   }
 
   /**
@@ -8245,10 +8609,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def udf[RT: TypeTag, A1: TypeTag](f: A1 => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1])
+  def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(f, implicitly[TypeTag[RT]], implicitly[TypeTag[A1]])
   }
 
   /**
@@ -8258,10 +8622,15 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: (A1, A2) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2])
+  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](
+      f: Function2[A1, A2, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
+      f,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]])
   }
 
   /**
@@ -8271,11 +8640,16 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](
-      f: (A1, A2, A3) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3])
+      f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
+      f,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]])
   }
 
   /**
@@ -8285,11 +8659,17 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](
-      f: (A1, A2, A3, A4) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, typeTag[RT], typeTag[A1], typeTag[A2], typeTag[A3], typeTag[A4])
+      f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
+      f,
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]])
   }
 
   /**
@@ -8299,18 +8679,18 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](
-      f: (A1, A2, A3, A4, A5) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]])
   }
 
   /**
@@ -8320,7 +8700,7 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[
       RT: TypeTag,
@@ -8329,16 +8709,16 @@ object functions {
       A3: TypeTag,
       A4: TypeTag,
       A5: TypeTag,
-      A6: TypeTag](f: (A1, A2, A3, A4, A5, A6) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]])
   }
 
   /**
@@ -8348,7 +8728,7 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[
       RT: TypeTag,
@@ -8358,17 +8738,17 @@ object functions {
       A4: TypeTag,
       A5: TypeTag,
       A6: TypeTag,
-      A7: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]])
   }
 
   /**
@@ -8378,7 +8758,7 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[
       RT: TypeTag,
@@ -8389,18 +8769,18 @@ object functions {
       A5: TypeTag,
       A6: TypeTag,
       A7: TypeTag,
-      A8: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]])
   }
 
   /**
@@ -8410,7 +8790,7 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[
       RT: TypeTag,
@@ -8422,19 +8802,19 @@ object functions {
       A6: TypeTag,
       A7: TypeTag,
       A8: TypeTag,
-      A9: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]])
   }
 
   /**
@@ -8444,7 +8824,7 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.4.0
+   * @since 1.3.0
    */
   def udf[
       RT: TypeTag,
@@ -8457,20 +8837,21 @@ object functions {
       A7: TypeTag,
       A8: TypeTag,
       A9: TypeTag,
-      A10: TypeTag](f: (A1, A2, A3, A4, A5, A6, A7, A8, A9, A10) => RT): UserDefinedFunction = {
-    ScalaUserDefinedFunction(
+      A10: TypeTag](
+      f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
+    SparkUserDefinedFunction(
       f,
-      typeTag[RT],
-      typeTag[A1],
-      typeTag[A2],
-      typeTag[A3],
-      typeTag[A4],
-      typeTag[A5],
-      typeTag[A6],
-      typeTag[A7],
-      typeTag[A8],
-      typeTag[A9],
-      typeTag[A10])
+      implicitly[TypeTag[RT]],
+      implicitly[TypeTag[A1]],
+      implicitly[TypeTag[A2]],
+      implicitly[TypeTag[A3]],
+      implicitly[TypeTag[A4]],
+      implicitly[TypeTag[A5]],
+      implicitly[TypeTag[A6]],
+      implicitly[TypeTag[A7]],
+      implicitly[TypeTag[A8]],
+      implicitly[TypeTag[A9]],
+      implicitly[TypeTag[A10]])
   }
 
   //////////////////////////////////////////////////////////////////////////////////////////////
@@ -8484,10 +8865,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF0[_], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 0)
   }
 
   /**
@@ -8497,10 +8878,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF1[_, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 1)
   }
 
   /**
@@ -8510,10 +8891,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF2[_, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 2)
   }
 
   /**
@@ -8523,10 +8904,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF3[_, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 3)
   }
 
   /**
@@ -8536,10 +8917,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF4[_, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 4)
   }
 
   /**
@@ -8549,10 +8930,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF5[_, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 5)
   }
 
   /**
@@ -8562,10 +8943,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF6[_, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 6)
   }
 
   /**
@@ -8575,10 +8956,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 7)
   }
 
   /**
@@ -8588,10 +8969,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 8)
   }
 
   /**
@@ -8601,10 +8982,10 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 9)
   }
 
   /**
@@ -8614,14 +8995,16 @@ object functions {
    * `UserDefinedFunction.asNondeterministic()`.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.3.0
    */
   def udf(
       f: UDF10[_, _, _, _, _, _, _, _, _, _, _],
       returnType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(UdfUtils.wrap(f), returnType)
+    SparkUserDefinedFunction(ToScalaUDF(f), returnType, 10)
   }
-  // scalastyle:off line.size.limit
+
+  // scalastyle:on parameter.number
+  // scalastyle:on line.size.limit
 
   /**
    * Defines a deterministic user-defined function (UDF) using a Scala closure. For this variant,
@@ -8642,26 +9025,28 @@ object functions {
    *   The output data type of the UDF
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 2.0.0
    */
   @deprecated(
     "Scala `udf` method with return type parameter is deprecated. " +
       "Please use Scala `udf` method without return type parameter.",
     "3.0.0")
   def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
-    ScalaUserDefinedFunction(f, dataType)
+    if (!SqlApiConf.get.legacyAllowUntypedScalaUDFs) {
+      throw CompilationErrors.usingUntypedScalaUDFError()
+    }
+    SparkUserDefinedFunction(f, dataType, inputEncoders = Nil)
   }
 
   /**
    * Call an user-defined function.
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 1.5.0
    */
   @scala.annotation.varargs
   @deprecated("Use call_udf")
-  def callUDF(udfName: String, cols: Column*): Column =
-    call_function(udfName, cols: _*)
+  def callUDF(udfName: String, cols: Column*): Column = call_function(udfName, cols: _*)
 
   /**
    * Call an user-defined function. Example:
@@ -8675,7 +9060,7 @@ object functions {
    * }}}
    *
    * @group udf_funcs
-   * @since 3.5.0
+   * @since 3.2.0
    */
   @scala.annotation.varargs
   def call_udf(udfName: String, cols: Column*): Column = call_function(udfName, cols: _*)
@@ -8691,12 +9076,17 @@ object functions {
    * @since 3.5.0
    */
   @scala.annotation.varargs
-  def call_function(funcName: String, cols: Column*): Column = Column { builder =>
-    builder.getCallFunctionBuilder
-      .setFunctionName(funcName)
-      .addAllArguments(cols.map(_.expr).asJava)
+  def call_function(funcName: String, cols: Column*): Column = {
+    Column(internal.UnresolvedFunction(funcName, cols.map(_.node), isUserDefinedFunction = true))
   }
 
+  /**
+   * Unwrap UDT data type column into its underlying type.
+   * @group udf_funcs
+   * @since 3.4.0
+   */
+  def unwrap_udt(column: Column): Column = Column.internalFn("unwrap_udt", column)
+
   // scalastyle:off
   // TODO(SPARK-45970): Use @static annotation so Java can access to those
   //   API in the same way. Once we land this fix, should deprecate
@@ -8709,7 +9099,7 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def years(e: Column): Column = Column.fn("years", e)
+    def years(e: Column): Column = Column.internalFn("years", e)
 
     /**
      * (Scala-specific) A transform for timestamps and dates to partition data into months.
@@ -8717,7 +9107,7 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def months(e: Column): Column = Column.fn("months", e)
+    def months(e: Column): Column = Column.internalFn("months", e)
 
     /**
      * (Scala-specific) A transform for timestamps and dates to partition data into days.
@@ -8725,7 +9115,7 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def days(e: Column): Column = Column.fn("days", e)
+    def days(e: Column): Column = Column.internalFn("days", e)
 
     /**
      * (Scala-specific) A transform for timestamps to partition data into hours.
@@ -8733,7 +9123,7 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def hours(e: Column): Column = Column.fn("hours", e)
+    def hours(e: Column): Column = Column.internalFn("hours", e)
 
     /**
      * (Scala-specific) A transform for any type that partitions by a hash of the input column.
@@ -8741,7 +9131,7 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def bucket(numBuckets: Column, e: Column): Column = Column.fn("bucket", numBuckets, e)
+    def bucket(numBuckets: Column, e: Column): Column = Column.internalFn("bucket", numBuckets, e)
 
     /**
      * (Scala-specific) A transform for any type that partitions by a hash of the input column.
@@ -8749,6 +9139,6 @@ object functions {
      * @group partition_transforms
      * @since 4.0.0
      */
-    def bucket(numBuckets: Int, e: Column): Column = Column.fn("bucket", lit(numBuckets), e)
+    def bucket(numBuckets: Int, e: Column): Column = bucket(lit(numBuckets), e)
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
index 813e8228a511b..d5668cc721750 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConf.scala
@@ -25,8 +25,8 @@ import org.apache.spark.util.SparkClassUtils
 
 /**
  * Configuration for all objects that are placed in the `sql/api` project. The normal way of
- * accessing this class is through `SqlApiConf.get`. If this code is being used with sql/core
- * then its values are bound to the currently set SQLConf. With Spark Connect, it will default to
+ * accessing this class is through `SqlApiConf.get`. If this code is being used with sql/core then
+ * its values are bound to the currently set SQLConf. With Spark Connect, it will default to
  * hardcoded values.
  */
 private[sql] trait SqlApiConf {
@@ -44,6 +44,9 @@ private[sql] trait SqlApiConf {
   def sessionLocalTimeZone: String
   def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value
   def defaultStringType: StringType
+  def stackTracesInDataFrameContext: Int
+  def dataFrameQueryContextEnabled: Boolean
+  def legacyAllowUntypedScalaUDFs: Boolean
 }
 
 private[sql] object SqlApiConf {
@@ -81,4 +84,7 @@ private[sql] object DefaultSqlApiConf extends SqlApiConf {
   override def sessionLocalTimeZone: String = TimeZone.getDefault.getID
   override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = LegacyBehaviorPolicy.CORRECTED
   override def defaultStringType: StringType = StringType
+  override def stackTracesInDataFrameContext: Int = 1
+  override def dataFrameQueryContextEnabled: Boolean = true
+  override def legacyAllowUntypedScalaUDFs: Boolean = false
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
index b7b8e14afb387..13ef13e5894e0 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/SqlApiConfHelper.scala
@@ -21,9 +21,9 @@ import java.util.concurrent.atomic.AtomicReference
 /**
  * SqlApiConfHelper is created to avoid a deadlock during a concurrent access to SQLConf and
  * SqlApiConf, which is because SQLConf and SqlApiConf tries to load each other upon
- * initializations. SqlApiConfHelper is private to sql package and is not supposed to be
- * accessed by end users. Variables and methods within SqlApiConfHelper are defined to
- * be used by SQLConf and SqlApiConf only.
+ * initializations. SqlApiConfHelper is private to sql package and is not supposed to be accessed
+ * by end users. Variables and methods within SqlApiConfHelper are defined to be used by SQLConf
+ * and SqlApiConf only.
  */
 private[sql] object SqlApiConfHelper {
   // Shared keys.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/ToScalaUDF.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/ToScalaUDF.scala
new file mode 100644
index 0000000000000..4d476108d9ec5
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/ToScalaUDF.scala
@@ -0,0 +1,836 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.api.java.function.{CoGroupFunction, FilterFunction, FlatMapFunction, FlatMapGroupsFunction, FlatMapGroupsWithStateFunction, ForeachFunction, ForeachPartitionFunction, MapFunction, MapGroupsFunction, MapGroupsWithStateFunction, MapPartitionsFunction, ReduceFunction}
+import org.apache.spark.sql.api.java._
+import org.apache.spark.sql.streaming.GroupState
+
+/**
+ * Helper class that provides conversions from org.apache.spark.sql.api.java.Function* and
+ * org.apache.spark.api.java.function.* to scala functions.
+ *
+ * Please note that this class is being used in Spark Connect Scala UDFs. We need to be careful
+ * with any modifications to this class, otherwise we will break backwards compatibility.
+ * Concretely this means you can only add methods to this class. You cannot rename the class, move
+ * it, change its `serialVersionUID`, remove methods, change method signatures, or change method
+ * semantics.
+ */
+@SerialVersionUID(2019907615267866045L)
+private[sql] object ToScalaUDF extends Serializable {
+  def apply[T](f: FilterFunction[T]): T => Boolean = f.call
+
+  def apply[T](f: ReduceFunction[T]): (T, T) => T = f.call
+
+  def apply[V, W](f: MapFunction[V, W]): V => W = f.call
+
+  def apply[K, V, U](f: MapGroupsFunction[K, V, U]): (K, Iterator[V]) => U =
+    (key, values) => f.call(key, values.asJava)
+
+  def apply[K, V, S, U](
+      f: MapGroupsWithStateFunction[K, V, S, U]): (K, Iterator[V], GroupState[S]) => U =
+    (key, values, state) => f.call(key, values.asJava, state)
+
+  def apply[V, U](f: MapPartitionsFunction[V, U]): Iterator[V] => Iterator[U] =
+    values => f.call(values.asJava).asScala
+
+  def apply[K, V, U](f: FlatMapGroupsFunction[K, V, U]): (K, Iterator[V]) => Iterator[U] =
+    (key, values) => f.call(key, values.asJava).asScala
+
+  def apply[K, V, S, U](f: FlatMapGroupsWithStateFunction[K, V, S, U])
+      : (K, Iterator[V], GroupState[S]) => Iterator[U] =
+    (key, values, state) => f.call(key, values.asJava, state).asScala
+
+  def apply[K, V, U, R](
+      f: CoGroupFunction[K, V, U, R]): (K, Iterator[V], Iterator[U]) => Iterator[R] =
+    (key, left, right) => f.call(key, left.asJava, right.asJava).asScala
+
+  def apply[V](f: ForeachFunction[V]): V => Unit = f.call
+
+  def apply[V](f: ForeachPartitionFunction[V]): Iterator[V] => Unit =
+    values => f.call(values.asJava)
+
+  // scalastyle:off line.size.limit
+
+  /* register 0-22 were generated by this script
+
+    (0 to 22).foreach { i =>
+      val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
+      val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
+      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
+      val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
+      val funcCall = if (i == 0) s"() => f$anyCast.call($anyParams)" else s"f$anyCast.call($anyParams)"
+      println(s"""
+        |/**
+        | * Create a scala.Function$i wrapper for a org.apache.spark.sql.api.java.UDF$i instance.
+        | */
+        |def apply(f: UDF$i[$extTypeArgs]): Function$i[$anyTypeArgs] = {
+        |  $funcCall
+        |}""".stripMargin)
+    }
+   */
+
+  /**
+   * Create a scala.Function0 wrapper for a org.apache.spark.sql.api.java.UDF0 instance.
+   */
+  def apply(f: UDF0[_]): () => Any = { () =>
+    f.asInstanceOf[UDF0[Any]].call()
+  }
+
+  /**
+   * Create a scala.Function1 wrapper for a org.apache.spark.sql.api.java.UDF1 instance.
+   */
+  def apply(f: UDF1[_, _]): (Any) => Any = {
+    f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
+  }
+
+  /**
+   * Create a scala.Function2 wrapper for a org.apache.spark.sql.api.java.UDF2 instance.
+   */
+  def apply(f: UDF2[_, _, _]): (Any, Any) => Any = {
+    f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function3 wrapper for a org.apache.spark.sql.api.java.UDF3 instance.
+   */
+  def apply(f: UDF3[_, _, _, _]): (Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function4 wrapper for a org.apache.spark.sql.api.java.UDF4 instance.
+   */
+  def apply(f: UDF4[_, _, _, _, _]): (Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function5 wrapper for a org.apache.spark.sql.api.java.UDF5 instance.
+   */
+  def apply(f: UDF5[_, _, _, _, _, _]): (Any, Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function6 wrapper for a org.apache.spark.sql.api.java.UDF6 instance.
+   */
+  def apply(f: UDF6[_, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function7 wrapper for a org.apache.spark.sql.api.java.UDF7 instance.
+   */
+  def apply(f: UDF7[_, _, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function8 wrapper for a org.apache.spark.sql.api.java.UDF8 instance.
+   */
+  def apply(
+      f: UDF8[_, _, _, _, _, _, _, _, _]): (Any, Any, Any, Any, Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function9 wrapper for a org.apache.spark.sql.api.java.UDF9 instance.
+   */
+  def apply(f: UDF9[_, _, _, _, _, _, _, _, _, _])
+      : (Any, Any, Any, Any, Any, Any, Any, Any, Any) => Any = {
+    f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function10 wrapper for a org.apache.spark.sql.api.java.UDF10 instance.
+   */
+  def apply(f: UDF10[_, _, _, _, _, _, _, _, _, _, _])
+      : Function10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
+    f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
+  }
+
+  /**
+   * Create a scala.Function11 wrapper for a org.apache.spark.sql.api.java.UDF11 instance.
+   */
+  def apply(f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _])
+      : Function11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
+    f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function12 wrapper for a org.apache.spark.sql.api.java.UDF12 instance.
+   */
+  def apply(f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _])
+      : Function12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
+    f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function13 wrapper for a org.apache.spark.sql.api.java.UDF13 instance.
+   */
+  def apply(f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _])
+      : Function13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
+    f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function14 wrapper for a org.apache.spark.sql.api.java.UDF14 instance.
+   */
+  def apply(f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _])
+      : Function14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any] = {
+    f.asInstanceOf[
+      UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function15 wrapper for a org.apache.spark.sql.api.java.UDF15 instance.
+   */
+  def apply(f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function15[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[
+      UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function16 wrapper for a org.apache.spark.sql.api.java.UDF16 instance.
+   */
+  def apply(f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function16[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[
+      UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function17 wrapper for a org.apache.spark.sql.api.java.UDF17 instance.
+   */
+  def apply(f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function17[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF17[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function18 wrapper for a org.apache.spark.sql.api.java.UDF18 instance.
+   */
+  def apply(f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function18[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF18[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function19 wrapper for a org.apache.spark.sql.api.java.UDF19 instance.
+   */
+  def apply(f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function19[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF19[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function20 wrapper for a org.apache.spark.sql.api.java.UDF20 instance.
+   */
+  def apply(f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function20[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF20[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function21 wrapper for a org.apache.spark.sql.api.java.UDF21 instance.
+   */
+  def apply(
+      f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function21[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF21[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+
+  /**
+   * Create a scala.Function22 wrapper for a org.apache.spark.sql.api.java.UDF22 instance.
+   */
+  def apply(
+      f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _]): Function22[
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any,
+    Any] = {
+    f.asInstanceOf[UDF22[
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any,
+      Any]]
+      .call(
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any,
+        _: Any)
+  }
+  // scalastyle:on line.size.limit
+}
+
+/**
+ * Adaptors from one UDF shape to another. For example adapting a foreach function for use in
+ * foreachPartition.
+ *
+ * Please note that this class is being used in Spark Connect Scala UDFs. We need to be careful
+ * with any modifications to this class, otherwise we will break backwards compatibility.
+ * Concretely this means you can only add methods to this class. You cannot rename the class, move
+ * it, change its `serialVersionUID`, remove methods, change method signatures, or change method
+ * semantics.
+ */
+@SerialVersionUID(0L) // TODO
+object UDFAdaptors extends Serializable {
+  def flatMapToMapPartitions[V, U](f: V => IterableOnce[U]): Iterator[V] => Iterator[U] =
+    values => values.flatMap(f)
+
+  def flatMapToMapPartitions[V, U](f: FlatMapFunction[V, U]): Iterator[V] => Iterator[U] =
+    values => values.flatMap(v => f.call(v).asScala)
+
+  def mapToMapPartitions[V, U](f: V => U): Iterator[V] => Iterator[U] = values => values.map(f)
+
+  def mapToMapPartitions[V, U](f: MapFunction[V, U]): Iterator[V] => Iterator[U] =
+    values => values.map(f.call)
+
+  def foreachToForeachPartition[T](f: T => Unit): Iterator[T] => Unit =
+    values => values.foreach(f)
+
+  def foreachToForeachPartition[T](f: ForeachFunction[T]): Iterator[T] => Unit =
+    values => values.foreach(f.call)
+
+  def foreachPartitionToMapPartitions[V, U](f: Iterator[V] => Unit): Iterator[V] => Iterator[U] =
+    values => {
+      f(values)
+      Iterator.empty[U]
+    }
+
+  def iterableOnceToSeq[A, B](f: A => IterableOnce[B]): A => Seq[B] =
+    value => f(value).iterator.toSeq
+
+  def mapGroupsToFlatMapGroups[K, V, U](
+      f: (K, Iterator[V]) => U): (K, Iterator[V]) => Iterator[U] =
+    (key, values) => Iterator.single(f(key, values))
+
+  def mapGroupsWithStateToFlatMapWithState[K, V, S, U](
+      f: (K, Iterator[V], GroupState[S]) => U): (K, Iterator[V], GroupState[S]) => Iterator[U] =
+    (key: K, values: Iterator[V], state: GroupState[S]) => Iterator(f(key, values, state))
+
+  def coGroupWithMappedValues[K, V, U, R, IV, IU](
+      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R],
+      leftValueMapFunc: Option[IV => V],
+      rightValueMapFunc: Option[IU => U]): (K, Iterator[IV], Iterator[IU]) => IterableOnce[R] = {
+    (leftValueMapFunc, rightValueMapFunc) match {
+      case (None, None) =>
+        f.asInstanceOf[(K, Iterator[IV], Iterator[IU]) => IterableOnce[R]]
+      case (Some(mapLeft), None) =>
+        (k, left, right) => f(k, left.map(mapLeft), right.asInstanceOf[Iterator[U]])
+      case (None, Some(mapRight)) =>
+        (k, left, right) => f(k, left.asInstanceOf[Iterator[V]], right.map(mapRight))
+      case (Some(mapLeft), Some(mapRight)) =>
+        (k, left, right) => f(k, left.map(mapLeft), right.map(mapRight))
+    }
+  }
+
+  def flatMapGroupsWithMappedValues[K, IV, V, R](
+      f: (K, Iterator[V]) => IterableOnce[R],
+      valueMapFunc: Option[IV => V]): (K, Iterator[IV]) => IterableOnce[R] = valueMapFunc match {
+    case Some(mapValue) => (k, values) => f(k, values.map(mapValue))
+    case None => f.asInstanceOf[(K, Iterator[IV]) => IterableOnce[R]]
+  }
+
+  def flatMapGroupsWithStateWithMappedValues[K, IV, V, S, U](
+      f: (K, Iterator[V], GroupState[S]) => Iterator[U],
+      valueMapFunc: Option[IV => V]): (K, Iterator[IV], GroupState[S]) => Iterator[U] = {
+    valueMapFunc match {
+      case Some(mapValue) => (k, values, state) => f(k, values.map(mapValue), state)
+      case None => f.asInstanceOf[(K, Iterator[IV], GroupState[S]) => Iterator[U]]
+    }
+  }
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala
new file mode 100644
index 0000000000000..f745c152170ee
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/columnNodes.scala
@@ -0,0 +1,592 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import java.util.concurrent.atomic.AtomicLong
+
+import ColumnNode._
+
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.catalyst.util.AttributeNameParser
+import org.apache.spark.sql.errors.DataTypeErrorsBase
+import org.apache.spark.sql.types.{DataType, IntegerType, LongType, Metadata}
+import org.apache.spark.util.SparkClassUtils
+
+/**
+ * AST for constructing columns. This API is implementation agnostic and allows us to build a
+ * single Column implementation that can be shared between implementations. Consequently a
+ * Dataframe API implementations will have to provide conversions from this AST to its
+ * implementation specific form (e.g. Catalyst expressions, or Connect protobuf messages).
+ *
+ * This API is a mirror image of Connect's expression.proto. There are a couple of extensions to
+ * make constructing nodes easier (e.g. [[CaseWhenOtherwise]]). We could not use the actual
+ * connect protobuf messages because of classpath clashes (e.g. Guava & gRPC) and Maven shading
+ * issues.
+ */
+private[sql] trait ColumnNode extends ColumnNodeLike {
+
+  /**
+   * Origin where the node was created.
+   */
+  def origin: Origin
+
+  /**
+   * A normalized version of this node. This is stripped of dataset related (contextual) metadata.
+   * This is mostly used to power Column.equals and Column.hashcode.
+   */
+  lazy val normalized: ColumnNode = {
+    val transformed = normalize()
+    if (this != transformed) {
+      transformed
+    } else {
+      this
+    }
+  }
+
+  override private[internal] def normalize(): ColumnNode = this
+
+  /**
+   * Return a SQL-a-like representation of the node.
+   *
+   * This is best effort; there are no guarantees that the returned SQL is valid.
+   */
+  def sql: String
+}
+
+trait ColumnNodeLike {
+  private[internal] def normalize(): ColumnNodeLike = this
+  private[internal] def sql: String
+}
+
+private[internal] object ColumnNode {
+  val NO_ORIGIN: Origin = Origin()
+  def normalize[T <: ColumnNodeLike](option: Option[T]): Option[T] =
+    option.map(_.normalize().asInstanceOf[T])
+  def normalize[T <: ColumnNodeLike](nodes: Seq[T]): Seq[T] =
+    nodes.map(_.normalize().asInstanceOf[T])
+  def argumentsToSql(nodes: Seq[ColumnNodeLike]): String =
+    textArgumentsToSql(nodes.map(_.sql))
+  def textArgumentsToSql(parts: Seq[String]): String = parts.mkString("(", ", ", ")")
+  def elementsToSql(elements: Seq[ColumnNodeLike], prefix: String = ""): String = {
+    if (elements.nonEmpty) {
+      elements.map(_.sql).mkString(prefix, ", ", "")
+    } else {
+      ""
+    }
+  }
+  def optionToSql(option: Option[ColumnNodeLike]): String = {
+    option.map(_.sql).getOrElse("")
+  }
+}
+
+/**
+ * A literal column.
+ *
+ * @param value
+ *   of the literal. This is the unconverted input value.
+ * @param dataType
+ *   of the literal. If none is provided the dataType is inferred.
+ */
+private[sql] case class Literal(
+    value: Any,
+    dataType: Option[DataType] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode
+    with DataTypeErrorsBase {
+  override private[internal] def normalize(): Literal = copy(origin = NO_ORIGIN)
+
+  override def sql: String = value match {
+    case null => "NULL"
+    case v: String => toSQLValue(v)
+    case v: Long => toSQLValue(v)
+    case v: Float => toSQLValue(v)
+    case v: Double => toSQLValue(v)
+    case v: Short => toSQLValue(v)
+    case _ => value.toString
+  }
+}
+
+/**
+ * Reference to an attribute produced by one of the underlying DataFrames.
+ *
+ * @param nameParts
+ *   name of the attribute.
+ * @param planId
+ *   id of the plan (Dataframe) that produces the attribute.
+ * @param isMetadataColumn
+ *   whether this is a metadata column.
+ */
+private[sql] case class UnresolvedAttribute(
+    nameParts: Seq[String],
+    planId: Option[Long] = None,
+    isMetadataColumn: Boolean = false,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+
+  override private[internal] def normalize(): UnresolvedAttribute =
+    copy(planId = None, origin = NO_ORIGIN)
+
+  override def sql: String = nameParts.map(n => if (n.contains(".")) s"`$n`" else n).mkString(".")
+}
+
+private[sql] object UnresolvedAttribute {
+  def apply(
+      unparsedIdentifier: String,
+      planId: Option[Long],
+      isMetadataColumn: Boolean,
+      origin: Origin): UnresolvedAttribute = UnresolvedAttribute(
+    AttributeNameParser.parseAttributeName(unparsedIdentifier),
+    planId = planId,
+    isMetadataColumn = isMetadataColumn,
+    origin = origin)
+
+  def apply(
+      unparsedIdentifier: String,
+      planId: Option[Long],
+      isMetadataColumn: Boolean): UnresolvedAttribute =
+    apply(unparsedIdentifier, planId, isMetadataColumn, CurrentOrigin.get)
+
+  def apply(unparsedIdentifier: String, planId: Option[Long]): UnresolvedAttribute =
+    apply(unparsedIdentifier, planId, false, CurrentOrigin.get)
+
+  def apply(unparsedIdentifier: String): UnresolvedAttribute =
+    apply(unparsedIdentifier, None, false, CurrentOrigin.get)
+}
+
+/**
+ * Reference to all columns in a namespace (global, a Dataframe, or a nested struct).
+ *
+ * @param unparsedTarget
+ *   name of the namespace. None if the global namespace is supposed to be used.
+ * @param planId
+ *   id of the plan (Dataframe) that produces the attribute.
+ */
+private[sql] case class UnresolvedStar(
+    unparsedTarget: Option[String],
+    planId: Option[Long] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UnresolvedStar =
+    copy(planId = None, origin = NO_ORIGIN)
+  override def sql: String = unparsedTarget.map(_ + ".*").getOrElse("*")
+}
+
+/**
+ * Call a function. This can either be a built-in function, a UDF, or a UDF registered in the
+ * Catalog.
+ *
+ * @param functionName
+ *   of the function to invoke.
+ * @param arguments
+ *   to pass into the function.
+ * @param isDistinct
+ *   (aggregate only) whether the input of the aggregate function should be de-duplicated.
+ */
+private[sql] case class UnresolvedFunction(
+    functionName: String,
+    arguments: Seq[ColumnNode],
+    isDistinct: Boolean = false,
+    isUserDefinedFunction: Boolean = false,
+    isInternal: Boolean = false,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UnresolvedFunction =
+    copy(arguments = ColumnNode.normalize(arguments), origin = NO_ORIGIN)
+
+  override def sql: String = functionName + argumentsToSql(arguments)
+}
+
+/**
+ * Evaluate a SQL expression.
+ *
+ * @param expression
+ *   text to execute.
+ */
+private[sql] case class SqlExpression(
+    expression: String,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): SqlExpression = copy(origin = NO_ORIGIN)
+  override def sql: String = expression
+}
+
+/**
+ * Name a column, and (optionally) modify its metadata.
+ *
+ * @param child
+ *   to name
+ * @param name
+ *   to use
+ * @param metadata
+ *   (optional) metadata to add.
+ */
+private[sql] case class Alias(
+    child: ColumnNode,
+    name: Seq[String],
+    metadata: Option[Metadata] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): Alias =
+    copy(child = child.normalize(), origin = NO_ORIGIN)
+
+  override def sql: String = {
+    val alias = name match {
+      case Seq(single) => single
+      case multiple => textArgumentsToSql(multiple)
+    }
+    s"${child.sql} AS $alias"
+  }
+}
+
+/**
+ * Cast the value of a Column to a different [[DataType]]. The behavior of the cast can be
+ * influenced by the `evalMode`.
+ *
+ * @param child
+ *   that produces the input value.
+ * @param dataType
+ *   to cast to.
+ * @param evalMode
+ *   (try/ansi/legacy) to use for the cast.
+ */
+private[sql] case class Cast(
+    child: ColumnNode,
+    dataType: DataType,
+    evalMode: Option[Cast.EvalMode] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): Cast =
+    copy(child = child.normalize(), origin = NO_ORIGIN)
+
+  override def sql: String = {
+    s"${optionToSql(evalMode)}CAST(${child.sql} AS ${dataType.sql})"
+  }
+}
+
+private[sql] object Cast {
+  sealed abstract class EvalMode(override val sql: String = "") extends ColumnNodeLike
+  object Legacy extends EvalMode
+  object Ansi extends EvalMode
+  object Try extends EvalMode("TRY_")
+}
+
+/**
+ * Reference to all columns in the global namespace in that match a regex.
+ *
+ * @param regex
+ *   name of the namespace. None if the global namespace is supposed to be used.
+ * @param planId
+ *   id of the plan (Dataframe) that produces the attribute.
+ */
+private[sql] case class UnresolvedRegex(
+    regex: String,
+    planId: Option[Long] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UnresolvedRegex =
+    copy(planId = None, origin = NO_ORIGIN)
+  override def sql: String = regex
+}
+
+/**
+ * Sort the input column.
+ *
+ * @param child
+ *   to sort.
+ * @param sortDirection
+ *   to sort in, either Ascending or Descending.
+ * @param nullOrdering
+ *   where to place nulls, either at the begin or the end.
+ */
+private[sql] case class SortOrder(
+    child: ColumnNode,
+    sortDirection: SortOrder.SortDirection,
+    nullOrdering: SortOrder.NullOrdering,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): SortOrder =
+    copy(child = child.normalize(), origin = NO_ORIGIN)
+
+  override def sql: String = s"${child.sql} ${sortDirection.sql} ${nullOrdering.sql}"
+}
+
+private[sql] object SortOrder {
+  sealed abstract class SortDirection(override val sql: String) extends ColumnNodeLike
+  object Ascending extends SortDirection("ASC")
+  object Descending extends SortDirection("DESC")
+  sealed abstract class NullOrdering(override val sql: String) extends ColumnNodeLike
+  object NullsFirst extends NullOrdering("NULLS FIRST")
+  object NullsLast extends NullOrdering("NULLS LAST")
+}
+
+/**
+ * Evaluate a function within a window.
+ *
+ * @param windowFunction
+ *   function to execute.
+ * @param windowSpec
+ *   of the window.
+ */
+private[sql] case class Window(
+    windowFunction: ColumnNode,
+    windowSpec: WindowSpec,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): Window = copy(
+    windowFunction = windowFunction.normalize(),
+    windowSpec = windowSpec.normalize(),
+    origin = NO_ORIGIN)
+
+  override def sql: String = s"${windowFunction.sql} OVER (${windowSpec.sql})"
+}
+
+private[sql] case class WindowSpec(
+    partitionColumns: Seq[ColumnNode],
+    sortColumns: Seq[SortOrder],
+    frame: Option[WindowFrame] = None)
+    extends ColumnNodeLike {
+  override private[internal] def normalize(): WindowSpec = copy(
+    partitionColumns = ColumnNode.normalize(partitionColumns),
+    sortColumns = ColumnNode.normalize(sortColumns),
+    frame = ColumnNode.normalize(frame))
+  override private[internal] def sql: String = {
+    val parts = Seq(
+      elementsToSql(partitionColumns, "PARTITION BY "),
+      elementsToSql(sortColumns, "ORDER BY "),
+      optionToSql(frame))
+    parts.filter(_.nonEmpty).mkString(" ")
+  }
+}
+
+private[sql] case class WindowFrame(
+    frameType: WindowFrame.FrameType,
+    lower: WindowFrame.FrameBoundary,
+    upper: WindowFrame.FrameBoundary)
+    extends ColumnNodeLike {
+  override private[internal] def normalize(): WindowFrame =
+    copy(lower = lower.normalize(), upper = upper.normalize())
+  override private[internal] def sql: String =
+    s"${frameType.sql} BETWEEN ${lower.sql} AND ${upper.sql}"
+}
+
+private[sql] object WindowFrame {
+  sealed abstract class FrameType(override val sql: String) extends ColumnNodeLike
+  object Row extends FrameType("ROWS")
+  object Range extends FrameType("RANGE")
+
+  sealed abstract class FrameBoundary extends ColumnNodeLike {
+    override private[internal] def normalize(): FrameBoundary = this
+  }
+  object CurrentRow extends FrameBoundary {
+    override private[internal] def sql = "CURRENT ROW"
+  }
+  object UnboundedPreceding extends FrameBoundary {
+    override private[internal] def sql = "UNBOUNDED PRECEDING"
+  }
+  object UnboundedFollowing extends FrameBoundary {
+    override private[internal] def sql = "UNBOUNDED FOLLOWING"
+  }
+  case class Value(value: ColumnNode) extends FrameBoundary {
+    override private[internal] def normalize(): Value = copy(value.normalize())
+    override private[internal] def sql: String = value.sql
+  }
+  def value(i: Int): Value = Value(Literal(i, Some(IntegerType)))
+  def value(l: Long): Value = Value(Literal(l, Some(LongType)))
+}
+
+/**
+ * Lambda function to execute. This typically passed as an argument to a function.
+ *
+ * @param function
+ *   to execute.
+ * @param arguments
+ *   the bound lambda variables.
+ */
+private[sql] case class LambdaFunction(
+    function: ColumnNode,
+    arguments: Seq[UnresolvedNamedLambdaVariable],
+    override val origin: Origin)
+    extends ColumnNode {
+
+  override private[internal] def normalize(): LambdaFunction = copy(
+    function = function.normalize(),
+    arguments = ColumnNode.normalize(arguments),
+    origin = NO_ORIGIN)
+
+  override def sql: String = {
+    val argumentsSql = arguments match {
+      case Seq(arg) => arg.sql
+      case _ => argumentsToSql(arguments)
+    }
+    argumentsSql + " -> " + function.sql
+  }
+}
+
+object LambdaFunction {
+  def apply(function: ColumnNode, arguments: Seq[UnresolvedNamedLambdaVariable]): LambdaFunction =
+    new LambdaFunction(function, arguments, CurrentOrigin.get)
+}
+
+/**
+ * Variable used in a [[LambdaFunction]].
+ *
+ * @param name
+ *   of the variable.
+ */
+private[sql] case class UnresolvedNamedLambdaVariable(
+    name: String,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UnresolvedNamedLambdaVariable =
+    copy(origin = NO_ORIGIN)
+
+  override def sql: String = name
+}
+
+object UnresolvedNamedLambdaVariable {
+  private val nextId = new AtomicLong()
+
+  /**
+   * Create a lambda variable with a unique name.
+   */
+  def apply(name: String): UnresolvedNamedLambdaVariable = {
+    // Generate a unique name because we reuse lambda variable names (e.g. x, y, or z).
+    new UnresolvedNamedLambdaVariable(s"${name}_${nextId.incrementAndGet()}")
+  }
+
+  /**
+   * Reset the ID generator. For testing purposes only!
+   */
+  private[sql] def resetIdGenerator(): Unit = {
+    nextId.set(0)
+  }
+}
+
+/**
+ * Extract a value from a complex type. This can be a field from a struct, a value from a map, or
+ * an element from an array.
+ *
+ * @param child
+ *   that produces a complex value.
+ * @param extraction
+ *   that is used to access the complex type. This needs to be a string type for structs and maps,
+ *   and it needs to be an integer for arrays.
+ */
+private[sql] case class UnresolvedExtractValue(
+    child: ColumnNode,
+    extraction: ColumnNode,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UnresolvedExtractValue =
+    copy(child = child.normalize(), extraction = extraction.normalize(), origin = NO_ORIGIN)
+
+  override def sql: String = s"${child.sql}[${extraction.sql}]"
+}
+
+/**
+ * Update or drop the field of a struct.
+ *
+ * @param structExpression
+ *   that will be updated.
+ * @param fieldName
+ *   name of the field to update.
+ * @param valueExpression
+ *   new value of the field. If this is None the field will be dropped.
+ */
+private[sql] case class UpdateFields(
+    structExpression: ColumnNode,
+    fieldName: String,
+    valueExpression: Option[ColumnNode] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): UpdateFields = copy(
+    structExpression = structExpression.normalize(),
+    valueExpression = ColumnNode.normalize(valueExpression),
+    origin = NO_ORIGIN)
+  override def sql: String = valueExpression match {
+    case Some(value) => s"update_field(${structExpression.sql}, $fieldName, ${value.sql})"
+    case None => s"drop_field(${structExpression.sql}, $fieldName)"
+  }
+}
+
+/**
+ * Evaluate one or more conditional branches. The value of the first branch for which the
+ * predicate evalutes to true is returned. If none of the branches evaluate to true, the value of
+ * `otherwise` is returned.
+ *
+ * @param branches
+ *   to evaluate. Each entry if a pair of condition and value.
+ * @param otherwise
+ *   (optional) to evaluate when none of the branches evaluate to true.
+ */
+private[sql] case class CaseWhenOtherwise(
+    branches: Seq[(ColumnNode, ColumnNode)],
+    otherwise: Option[ColumnNode] = None,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  assert(branches.nonEmpty)
+  override private[internal] def normalize(): CaseWhenOtherwise = copy(
+    branches = branches.map(kv => (kv._1.normalize(), kv._2.normalize())),
+    otherwise = ColumnNode.normalize(otherwise),
+    origin = NO_ORIGIN)
+
+  override def sql: String =
+    "CASE" +
+      branches.map(cv => s" WHEN ${cv._1.sql} THEN ${cv._2.sql}").mkString +
+      otherwise.map(o => s" ELSE ${o.sql}").getOrElse("") +
+      " END"
+}
+
+/**
+ * Invoke an inline user defined function.
+ *
+ * @param function
+ *   to invoke.
+ * @param arguments
+ *   to pass into the user defined function.
+ */
+private[sql] case class InvokeInlineUserDefinedFunction(
+    function: UserDefinedFunctionLike,
+    arguments: Seq[ColumnNode],
+    isDistinct: Boolean = false,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): InvokeInlineUserDefinedFunction =
+    copy(arguments = ColumnNode.normalize(arguments), origin = NO_ORIGIN)
+
+  override def sql: String =
+    function.name + argumentsToSql(arguments)
+}
+
+private[sql] trait UserDefinedFunctionLike {
+  def name: String = SparkClassUtils.getFormattedClassName(this)
+}
+
+/**
+ * A marker node to trigger Spark Classic DataFrame lazy analysis.
+ *
+ * @param child
+ *   that needs to be lazily analyzed in Spark Classic DataFrame.
+ */
+private[sql] case class LazyExpression(
+    child: ColumnNode,
+    override val origin: Origin = CurrentOrigin.get)
+    extends ColumnNode {
+  override private[internal] def normalize(): ColumnNode =
+    copy(child = child.normalize(), origin = NO_ORIGIN)
+  override def sql: String = "lazy" + argumentsToSql(Seq(child))
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/typedaggregators.scala
similarity index 81%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/internal/typedaggregators.scala
index b6550bf3e4aac..aabb3a6f00fd5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/typedaggregators.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/typedaggregators.scala
@@ -15,26 +15,24 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.execution.aggregate
+package org.apache.spark.sql.internal
 
 import org.apache.spark.api.java.function.MapFunction
-import org.apache.spark.sql.{Encoder, TypedColumn}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.{Encoder, Encoders, TypedColumn}
 import org.apache.spark.sql.expressions.Aggregator
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 // This file defines internal implementations for aggregators.
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
-
 class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Double] {
   override def zero: Double = 0.0
   override def reduce(b: Double, a: IN): Double = b + f(a)
   override def merge(b1: Double, b2: Double): Double = b1 + b2
   override def finish(reduction: Double): Double = reduction
 
-  override def bufferEncoder: Encoder[Double] = ExpressionEncoder[Double]()
-  override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+  override def bufferEncoder: Encoder[Double] = Encoders.scalaDouble
+  override def outputEncoder: Encoder[Double] = Encoders.scalaDouble
 
   // Java api support
   def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double])
@@ -44,15 +42,14 @@ class TypedSumDouble[IN](val f: IN => Double) extends Aggregator[IN, Double, Dou
   }
 }
 
-
 class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] {
   override def zero: Long = 0L
   override def reduce(b: Long, a: IN): Long = b + f(a)
   override def merge(b1: Long, b2: Long): Long = b1 + b2
   override def finish(reduction: Long): Long = reduction
 
-  override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]()
-  override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+  override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
 
   // Java api support
   def this(f: MapFunction[IN, java.lang.Long]) = this((x: IN) => f.call(x).asInstanceOf[Long])
@@ -62,7 +59,6 @@ class TypedSumLong[IN](val f: IN => Long) extends Aggregator[IN, Long, Long] {
   }
 }
 
-
 class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] {
   override def zero: Long = 0
   override def reduce(b: Long, a: IN): Long = {
@@ -71,8 +67,8 @@ class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] {
   override def merge(b1: Long, b2: Long): Long = b1 + b2
   override def finish(reduction: Long): Long = reduction
 
-  override def bufferEncoder: Encoder[Long] = ExpressionEncoder[Long]()
-  override def outputEncoder: Encoder[Long] = ExpressionEncoder[Long]()
+  override def bufferEncoder: Encoder[Long] = Encoders.scalaLong
+  override def outputEncoder: Encoder[Long] = Encoders.scalaLong
 
   // Java api support
   def this(f: MapFunction[IN, Object]) = this((x: IN) => f.call(x).asInstanceOf[Any])
@@ -81,7 +77,6 @@ class TypedCount[IN](val f: IN => Any) extends Aggregator[IN, Long, Long] {
   }
 }
 
-
 class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long), Double] {
   override def zero: (Double, Long) = (0.0, 0L)
   override def reduce(b: (Double, Long), a: IN): (Double, Long) = (f(a) + b._1, 1 + b._2)
@@ -90,8 +85,10 @@ class TypedAverage[IN](val f: IN => Double) extends Aggregator[IN, (Double, Long
     (b1._1 + b2._1, b1._2 + b2._2)
   }
 
-  override def bufferEncoder: Encoder[(Double, Long)] = ExpressionEncoder[(Double, Long)]()
-  override def outputEncoder: Encoder[Double] = ExpressionEncoder[Double]()
+  override def bufferEncoder: Encoder[(Double, Long)] =
+    Encoders.tuple(Encoders.scalaDouble, Encoders.scalaLong)
+
+  override def outputEncoder: Encoder[Double] = Encoders.scalaDouble
 
   // Java api support
   def this(f: MapFunction[IN, java.lang.Double]) = this((x: IN) => f.call(x).asInstanceOf[Double])
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractArrayType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractArrayType.scala
index 406449a337271..5c8c77985bb2c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractArrayType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractArrayType.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.internal.types
 
 import org.apache.spark.sql.types.{AbstractDataType, ArrayType, DataType}
 
-
 /**
  * Use AbstractArrayType(AbstractDataType) for defining expected types for expression parameters.
  */
@@ -30,7 +29,7 @@ case class AbstractArrayType(elementType: AbstractDataType) extends AbstractData
 
   override private[sql] def acceptsType(other: DataType): Boolean = {
     other.isInstanceOf[ArrayType] &&
-      elementType.acceptsType(other.asInstanceOf[ArrayType].elementType)
+    elementType.acceptsType(other.asInstanceOf[ArrayType].elementType)
   }
 
   override private[spark] def simpleString: String = s"array<${elementType.simpleString}>"
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala
index 62f422f6f80a7..32f4341839f01 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractMapType.scala
@@ -19,23 +19,20 @@ package org.apache.spark.sql.internal.types
 
 import org.apache.spark.sql.types.{AbstractDataType, DataType, MapType}
 
-
 /**
- * Use AbstractMapType(AbstractDataType, AbstractDataType)
- * for defining expected types for expression parameters.
+ * Use AbstractMapType(AbstractDataType, AbstractDataType) for defining expected types for
+ * expression parameters.
  */
-case class AbstractMapType(
-    keyType: AbstractDataType,
-    valueType: AbstractDataType
-  ) extends AbstractDataType {
+case class AbstractMapType(keyType: AbstractDataType, valueType: AbstractDataType)
+    extends AbstractDataType {
 
   override private[sql] def defaultConcreteType: DataType =
     MapType(keyType.defaultConcreteType, valueType.defaultConcreteType, valueContainsNull = true)
 
   override private[sql] def acceptsType(other: DataType): Boolean = {
     other.isInstanceOf[MapType] &&
-      keyType.acceptsType(other.asInstanceOf[MapType].keyType) &&
-      valueType.acceptsType(other.asInstanceOf[MapType].valueType)
+    keyType.acceptsType(other.asInstanceOf[MapType].keyType) &&
+    valueType.acceptsType(other.asInstanceOf[MapType].valueType)
   }
 
   override private[spark] def simpleString: String =
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
index 0828c2d6fc104..6dcb8a876b7a2 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/internal/types/AbstractStringType.scala
@@ -17,37 +17,105 @@
 
 package org.apache.spark.sql.internal.types
 
-import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, StringType}
 
 /**
- * StringTypeCollated is an abstract class for StringType with collation support.
+ * AbstractStringType is an abstract class for StringType with collation support.
  */
-abstract class AbstractStringType extends AbstractDataType {
-  override private[sql] def defaultConcreteType: DataType = SqlApiConf.get.defaultStringType
+abstract class AbstractStringType(supportsTrimCollation: Boolean = false)
+    extends AbstractDataType
+    with Serializable {
+  override private[sql] def defaultConcreteType: DataType = StringType
   override private[sql] def simpleString: String = "string"
+
+  override private[sql] def acceptsType(other: DataType): Boolean = other match {
+    case st: StringType =>
+      canUseTrimCollation(st) && acceptsStringType(st)
+    case _ =>
+      false
+  }
+
+  private[sql] def canUseTrimCollation(other: StringType): Boolean =
+    supportsTrimCollation || !other.usesTrimCollation
+
+  def acceptsStringType(other: StringType): Boolean
+}
+
+/**
+ * Used for expressions supporting only binary collation.
+ */
+case class StringTypeBinary(supportsTrimCollation: Boolean)
+    extends AbstractStringType(supportsTrimCollation) {
+
+  override def acceptsStringType(other: StringType): Boolean =
+    other.supportsBinaryEquality
+}
+
+object StringTypeBinary extends StringTypeBinary(false) {
+  def apply(supportsTrimCollation: Boolean): StringTypeBinary = {
+    new StringTypeBinary(supportsTrimCollation)
+  }
 }
 
 /**
- * Use StringTypeBinary for expressions supporting only binary collation.
+ * Used for expressions supporting only binary and lowercase collation.
  */
-case object StringTypeBinary extends AbstractStringType {
-  override private[sql] def acceptsType(other: DataType): Boolean =
-    other.isInstanceOf[StringType] && other.asInstanceOf[StringType].supportsBinaryEquality
+case class StringTypeBinaryLcase(supportsTrimCollation: Boolean)
+    extends AbstractStringType(supportsTrimCollation) {
+
+  override def acceptsStringType(other: StringType): Boolean =
+    other.supportsBinaryEquality || other.isUTF8LcaseCollation
+}
+
+object StringTypeBinaryLcase extends StringTypeBinaryLcase(false) {
+  def apply(supportsTrimCollation: Boolean): StringTypeBinaryLcase = {
+    new StringTypeBinaryLcase(supportsTrimCollation)
+  }
 }
 
 /**
- * Use StringTypeBinaryLcase for expressions supporting only binary and lowercase collation.
+ * Used for expressions supporting collation types with optional case, accent, and trim
+ * sensitivity specifiers.
+ *
+ * Case and accent sensitivity specifiers are supported by default.
  */
-case object StringTypeBinaryLcase extends AbstractStringType {
-  override private[sql] def acceptsType(other: DataType): Boolean =
-    other.isInstanceOf[StringType] && (other.asInstanceOf[StringType].supportsBinaryEquality ||
-      other.asInstanceOf[StringType].isUTF8BinaryLcaseCollation)
+case class StringTypeWithCollation(
+    supportsTrimCollation: Boolean,
+    supportsCaseSpecifier: Boolean,
+    supportsAccentSpecifier: Boolean)
+    extends AbstractStringType(supportsTrimCollation) {
+
+  override def acceptsStringType(other: StringType): Boolean = {
+    (supportsCaseSpecifier || !other.isCaseInsensitive) &&
+    (supportsAccentSpecifier || !other.isAccentInsensitive)
+  }
+}
+
+object StringTypeWithCollation extends StringTypeWithCollation(false, true, true) {
+  def apply(
+      supportsTrimCollation: Boolean = false,
+      supportsCaseSpecifier: Boolean = true,
+      supportsAccentSpecifier: Boolean = true): StringTypeWithCollation = {
+    new StringTypeWithCollation(
+      supportsTrimCollation,
+      supportsCaseSpecifier,
+      supportsAccentSpecifier)
+  }
 }
 
 /**
- * Use StringTypeAnyCollation for expressions supporting all possible collation types.
+ * Used for expressions supporting all possible collation types except those that are
+ * case-sensitive but accent insensitive (CS_AI).
  */
-case object StringTypeAnyCollation extends AbstractStringType {
-  override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[StringType]
+case class StringTypeNonCSAICollation(supportsTrimCollation: Boolean)
+    extends AbstractStringType(supportsTrimCollation) {
+
+  override def acceptsStringType(other: StringType): Boolean =
+    other.isCaseInsensitive || !other.isAccentInsensitive
+}
+
+object StringTypeNonCSAICollation extends StringTypeNonCSAICollation(false) {
+  def apply(supportsTrimCollation: Boolean): StringTypeNonCSAICollation = {
+    new StringTypeNonCSAICollation(supportsTrimCollation)
+  }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala b/sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
similarity index 82%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
index 40d4dd0642db1..fab5cdc8de1b7 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/protobuf/functions.scala
@@ -16,21 +16,16 @@
  */
 package org.apache.spark.sql.protobuf
 
-import java.io.FileNotFoundException
-import java.nio.file.{Files, NoSuchFileException, Paths}
-import java.util.Collections
-
 import scala.jdk.CollectionConverters._
-import scala.util.control.NonFatal
 
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.sql.Column
-import org.apache.spark.sql.errors.CompilationErrors
-import org.apache.spark.sql.functions.{fnWithOptions, lit}
+import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.util.ProtobufUtils
 
 // scalastyle:off: object.name
 object functions {
-  // scalastyle:on: object.name
+// scalastyle:on: object.name
 
   /**
    * Converts a binary column of Protobuf format into its corresponding catalyst value. The
@@ -44,7 +39,7 @@ object functions {
    *   The Protobuf descriptor file. This file is usually created using `protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(
@@ -52,8 +47,8 @@ object functions {
       messageName: String,
       descFilePath: String,
       options: java.util.Map[String, String]): Column = {
-    val binaryFileDescSet = readDescriptorFileContent(descFilePath)
-    from_protobuf(data, messageName, binaryFileDescSet, options)
+    val descriptorFileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    from_protobuf(data, messageName, descriptorFileContent, options)
   }
 
   /**
@@ -76,7 +71,7 @@ object functions {
       messageName: String,
       binaryFileDescriptorSet: Array[Byte],
       options: java.util.Map[String, String]): Column = {
-    fnWithOptions(
+    Column.fnWithOptions(
       "from_protobuf",
       options.asScala.iterator,
       data,
@@ -95,31 +90,12 @@ object functions {
    * @param descFilePath
    *   The Protobuf descriptor file. This file is usually created using `protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
-    from_protobuf(data, messageName, descFilePath, emptyOptions)
-  }
-
-  /**
-   * Converts a binary column of Protobuf format into its corresponding catalyst value.
-   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
-   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
-   * `org.sparkproject.spark_protobuf.protobuf.*`.
-   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
-   * Protobuf files.
-   *
-   * @param data
-   *   the binary column.
-   * @param messageClassName
-   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
-   *   The jar with these classes needs to be shaded as described above.
-   * @since 3.5.0
-   */
-  @Experimental
-  def from_protobuf(data: Column, messageClassName: String): Column = {
-    Column.fn("from_protobuf", data, lit(messageClassName))
+    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    from_protobuf(data, messageName, fileContent)
   }
 
   /**
@@ -140,7 +116,27 @@ object functions {
       data: Column,
       messageName: String,
       binaryFileDescriptorSet: Array[Byte]): Column = {
-    from_protobuf(data, messageName, binaryFileDescriptorSet, emptyOptions)
+    Column.fn("from_protobuf", data, lit(messageName), lit(binaryFileDescriptorSet))
+  }
+
+  /**
+   * Converts a binary column of Protobuf format into its corresponding catalyst value.
+   * `messageClassName` points to Protobuf Java class. The jar containing Java class should be
+   * shaded. Specifically, `com.google.protobuf.*` should be shaded to
+   * `org.sparkproject.spark_protobuf.protobuf.*`.
+   * https://github.com/rangadi/shaded-protobuf-classes is useful to create shaded jar from
+   * Protobuf files.
+   *
+   * @param data
+   *   the binary column.
+   * @param messageClassName
+   *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
+   *   The jar with these classes needs to be shaded as described above.
+   * @since 3.4.0
+   */
+  @Experimental
+  def from_protobuf(data: Column, messageClassName: String): Column = {
+    Column.fn("from_protobuf", data, lit(messageClassName))
   }
 
   /**
@@ -157,14 +153,14 @@ object functions {
    *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def from_protobuf(
       data: Column,
       messageClassName: String,
       options: java.util.Map[String, String]): Column = {
-    fnWithOptions("from_protobuf", options.asScala.iterator, data, lit(messageClassName))
+    Column.fnWithOptions("from_protobuf", options.asScala.iterator, data, lit(messageClassName))
   }
 
   /**
@@ -178,11 +174,11 @@ object functions {
    * @param descFilePath
    *   The Protobuf descriptor file. This file is usually created using `protoc` with
    *   `--descriptor_set_out` and `--include_imports` options.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(data: Column, messageName: String, descFilePath: String): Column = {
-    to_protobuf(data, messageName, descFilePath, emptyOptions)
+    to_protobuf(data, messageName, descFilePath, Map.empty[String, String].asJava)
   }
 
   /**
@@ -204,7 +200,7 @@ object functions {
       data: Column,
       messageName: String,
       binaryFileDescriptorSet: Array[Byte]): Column = {
-    to_protobuf(data, messageName, binaryFileDescriptorSet, emptyOptions)
+    Column.fn("to_protobuf", data, lit(messageName), lit(binaryFileDescriptorSet))
   }
 
   /**
@@ -216,10 +212,9 @@ object functions {
    * @param messageName
    *   the protobuf MessageName to look for in descriptor file.
    * @param descFilePath
-   *   The Protobuf descriptor file. This file is usually created using `protoc` with
-   *   `--descriptor_set_out` and `--include_imports` options.
+   *   the protobuf descriptor file.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(
@@ -227,8 +222,8 @@ object functions {
       messageName: String,
       descFilePath: String,
       options: java.util.Map[String, String]): Column = {
-    val binaryFileDescriptorSet = readDescriptorFileContent(descFilePath)
-    to_protobuf(data, messageName, binaryFileDescriptorSet, options)
+    val fileContent = ProtobufUtils.readDescriptorFileContent(descFilePath)
+    to_protobuf(data, messageName, fileContent, options)
   }
 
   /**
@@ -251,7 +246,7 @@ object functions {
       messageName: String,
       binaryFileDescriptorSet: Array[Byte],
       options: java.util.Map[String, String]): Column = {
-    fnWithOptions(
+    Column.fnWithOptions(
       "to_protobuf",
       options.asScala.iterator,
       data,
@@ -271,7 +266,7 @@ object functions {
    * @param messageClassName
    *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(data: Column, messageClassName: String): Column = {
@@ -291,28 +286,13 @@ object functions {
    *   The full name for Protobuf Java class. E.g. <code>com.example.protos.ExampleEvent</code>.
    *   The jar with these classes needs to be shaded as described above.
    * @param options
-   * @since 3.5.0
+   * @since 3.4.0
    */
   @Experimental
   def to_protobuf(
       data: Column,
       messageClassName: String,
       options: java.util.Map[String, String]): Column = {
-    fnWithOptions("to_protobuf", options.asScala.iterator, data, lit(messageClassName))
-  }
-
-  private def emptyOptions: java.util.Map[String, String] = Collections.emptyMap[String, String]()
-
-  // This method is copied from org.apache.spark.sql.protobuf.util.ProtobufUtils
-  private def readDescriptorFileContent(filePath: String): Array[Byte] = {
-    try {
-      Files.readAllBytes(Paths.get(filePath))
-    } catch {
-      case ex: FileNotFoundException =>
-        throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
-      case ex: NoSuchFileException =>
-        throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
-      case NonFatal(ex) => throw CompilationErrors.descriptorParseError(ex)
-    }
+    Column.fnWithOptions("to_protobuf", options.asScala.iterator, data, lit(messageClassName))
   }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ExpiredTimerInfo.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ExpiredTimerInfo.scala
index 49dc393f8481b..c5d3adda8b87e 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ExpiredTimerInfo.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ExpiredTimerInfo.scala
@@ -22,16 +22,11 @@ import java.io.Serializable
 import org.apache.spark.annotation.{Evolving, Experimental}
 
 /**
- * Class used to provide access to expired timer's expiry time. These values
- * are only relevant if the ExpiredTimerInfo is valid.
+ * Class used to provide access to expired timer's expiry time.
  */
 @Experimental
 @Evolving
 private[sql] trait ExpiredTimerInfo extends Serializable {
-  /**
-   * Check if provided ExpiredTimerInfo is valid.
-   */
-  def isValid(): Boolean
 
   /**
    * Get the expired timer's expiry time as milliseconds in epoch time.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
index f08a2fd3cc55c..146990917a3fc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/GroupState.scala
@@ -27,89 +27,89 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState
  * `flatMapGroupsWithState` operations on `KeyValueGroupedDataset`.
  *
  * Detail description on `[map/flatMap]GroupsWithState` operation
- * --------------------------------------------------------------
- * Both, `mapGroupsWithState` and `flatMapGroupsWithState` in `KeyValueGroupedDataset`
- * will invoke the user-given function on each group (defined by the grouping function in
- * `Dataset.groupByKey()`) while maintaining a user-defined per-group state between invocations.
- * For a static batch Dataset, the function will be invoked once per group. For a streaming
- * Dataset, the function will be invoked for each group repeatedly in every trigger.
- * That is, in every batch of the `StreamingQuery`,
- * the function will be invoked once for each group that has data in the trigger. Furthermore,
- * if timeout is set, then the function will be invoked on timed-out groups (more detail below).
+ * -------------------------------------------------------------- Both, `mapGroupsWithState` and
+ * `flatMapGroupsWithState` in `KeyValueGroupedDataset` will invoke the user-given function on
+ * each group (defined by the grouping function in `Dataset.groupByKey()`) while maintaining a
+ * user-defined per-group state between invocations. For a static batch Dataset, the function will
+ * be invoked once per group. For a streaming Dataset, the function will be invoked for each group
+ * repeatedly in every trigger. That is, in every batch of the `StreamingQuery`, the function will
+ * be invoked once for each group that has data in the trigger. Furthermore, if timeout is set,
+ * then the function will be invoked on timed-out groups (more detail below).
  *
  * The function is invoked with the following parameters.
- *  - The key of the group.
- *  - An iterator containing all the values for this group.
- *  - A user-defined state object set by previous invocations of the given function.
+ *   - The key of the group.
+ *   - An iterator containing all the values for this group.
+ *   - A user-defined state object set by previous invocations of the given function.
  *
  * In case of a batch Dataset, there is only one invocation and the state object will be empty as
- * there is no prior state. Essentially, for batch Datasets, `[map/flatMap]GroupsWithState`
- * is equivalent to `[map/flatMap]Groups` and any updates to the state and/or timeouts have
- * no effect.
+ * there is no prior state. Essentially, for batch Datasets, `[map/flatMap]GroupsWithState` is
+ * equivalent to `[map/flatMap]Groups` and any updates to the state and/or timeouts have no
+ * effect.
  *
  * The major difference between `mapGroupsWithState` and `flatMapGroupsWithState` is that the
- * former allows the function to return one and only one record, whereas the latter
- * allows the function to return any number of records (including no records). Furthermore, the
+ * former allows the function to return one and only one record, whereas the latter allows the
+ * function to return any number of records (including no records). Furthermore, the
  * `flatMapGroupsWithState` is associated with an operation output mode, which can be either
- * `Append` or `Update`. Semantically, this defines whether the output records of one trigger
- * is effectively replacing the previously output records (from previous triggers) or is appending
- * to the list of previously output records. Essentially, this defines how the Result Table (refer
- * to the semantics in the programming guide) is updated, and allows us to reason about the
- * semantics of later operations.
+ * `Append` or `Update`. Semantically, this defines whether the output records of one trigger is
+ * effectively replacing the previously output records (from previous triggers) or is appending to
+ * the list of previously output records. Essentially, this defines how the Result Table (refer to
+ * the semantics in the programming guide) is updated, and allows us to reason about the semantics
+ * of later operations.
  *
- * Important points to note about the function (both mapGroupsWithState and flatMapGroupsWithState).
- *  - In a trigger, the function will be called only the groups present in the batch. So do not
- *    assume that the function will be called in every trigger for every group that has state.
- *  - There is no guaranteed ordering of values in the iterator in the function, neither with
- *    batch, nor with streaming Datasets.
- *  - All the data will be shuffled before applying the function.
- *  - If timeout is set, then the function will also be called with no values.
- *    See more details on `GroupStateTimeout` below.
+ * Important points to note about the function (both mapGroupsWithState and
+ * flatMapGroupsWithState).
+ *   - In a trigger, the function will be called only the groups present in the batch. So do not
+ *     assume that the function will be called in every trigger for every group that has state.
+ *   - There is no guaranteed ordering of values in the iterator in the function, neither with
+ *     batch, nor with streaming Datasets.
+ *   - All the data will be shuffled before applying the function.
+ *   - If timeout is set, then the function will also be called with no values. See more details
+ *     on `GroupStateTimeout` below.
  *
  * Important points to note about using `GroupState`.
- *  - The value of the state cannot be null. So updating state with null will throw
- *    `IllegalArgumentException`.
- *  - Operations on `GroupState` are not thread-safe. This is to avoid memory barriers.
- *  - If `remove()` is called, then `exists()` will return `false`,
- *    `get()` will throw `NoSuchElementException` and `getOption()` will return `None`
- *  - After that, if `update(newState)` is called, then `exists()` will again return `true`,
- *    `get()` and `getOption()`will return the updated value.
+ *   - The value of the state cannot be null. So updating state with null will throw
+ *     `IllegalArgumentException`.
+ *   - Operations on `GroupState` are not thread-safe. This is to avoid memory barriers.
+ *   - If `remove()` is called, then `exists()` will return `false`, `get()` will throw
+ *     `NoSuchElementException` and `getOption()` will return `None`
+ *   - After that, if `update(newState)` is called, then `exists()` will again return `true`,
+ *     `get()` and `getOption()`will return the updated value.
  *
  * Important points to note about using `GroupStateTimeout`.
- *  - The timeout type is a global param across all the groups (set as `timeout` param in
- *    `[map|flatMap]GroupsWithState`, but the exact timeout duration/timestamp is configurable per
- *    group by calling `setTimeout...()` in `GroupState`.
- *  - Timeouts can be either based on processing time (i.e.
- *    `GroupStateTimeout.ProcessingTimeTimeout`) or event time (i.e.
- *    `GroupStateTimeout.EventTimeTimeout`).
- *  - With `ProcessingTimeTimeout`, the timeout duration can be set by calling
- *    `GroupState.setTimeoutDuration`. The timeout will occur when the clock has advanced by the set
- *    duration. Guarantees provided by this timeout with a duration of D ms are as follows:
- *    - Timeout will never occur before the clock time has advanced by D ms
- *    - Timeout will occur eventually when there is a trigger in the query
- *      (i.e. after D ms). So there is no strict upper bound on when the timeout would occur.
- *      For example, the trigger interval of the query will affect when the timeout actually occurs.
- *      If there is no data in the stream (for any group) for a while, then there will not be
- *      any trigger and timeout function call will not occur until there is data.
- *    - Since the processing time timeout is based on the clock time, it is affected by the
- *      variations in the system clock (i.e. time zone changes, clock skew, etc.).
- *  - With `EventTimeTimeout`, the user also has to specify the event time watermark in
- *    the query using `Dataset.withWatermark()`. With this setting, data that is older than the
- *    watermark is filtered out. The timeout can be set for a group by setting a timeout timestamp
- *    using`GroupState.setTimeoutTimestamp()`, and the timeout would occur when the watermark
- *    advances beyond the set timestamp. You can control the timeout delay by two parameters -
- *    (i) watermark delay and an additional duration beyond the timestamp in the event (which
- *    is guaranteed to be newer than watermark due to the filtering). Guarantees provided by this
- *    timeout are as follows:
- *    - Timeout will never occur before the watermark has exceeded the set timeout.
- *    - Similar to processing time timeouts, there is no strict upper bound on the delay when
- *      the timeout actually occurs. The watermark can advance only when there is data in the
- *      stream and the event time of the data has actually advanced.
- *  - When the timeout occurs for a group, the function is called for that group with no values, and
- *    `GroupState.hasTimedOut()` set to true.
- *  - The timeout is reset every time the function is called on a group, that is,
- *    when the group has new data, or the group has timed out. So the user has to set the timeout
- *    duration every time the function is called, otherwise, there will not be any timeout set.
+ *   - The timeout type is a global param across all the groups (set as `timeout` param in
+ *     `[map|flatMap]GroupsWithState`, but the exact timeout duration/timestamp is configurable
+ *     per group by calling `setTimeout...()` in `GroupState`.
+ *   - Timeouts can be either based on processing time (i.e.
+ *     `GroupStateTimeout.ProcessingTimeTimeout`) or event time (i.e.
+ *     `GroupStateTimeout.EventTimeTimeout`).
+ *   - With `ProcessingTimeTimeout`, the timeout duration can be set by calling
+ *     `GroupState.setTimeoutDuration`. The timeout will occur when the clock has advanced by the
+ *     set duration. Guarantees provided by this timeout with a duration of D ms are as follows:
+ *     - Timeout will never occur before the clock time has advanced by D ms
+ *     - Timeout will occur eventually when there is a trigger in the query (i.e. after D ms). So
+ *       there is no strict upper bound on when the timeout would occur. For example, the trigger
+ *       interval of the query will affect when the timeout actually occurs. If there is no data
+ *       in the stream (for any group) for a while, then there will not be any trigger and timeout
+ *       function call will not occur until there is data.
+ *     - Since the processing time timeout is based on the clock time, it is affected by the
+ *       variations in the system clock (i.e. time zone changes, clock skew, etc.).
+ *   - With `EventTimeTimeout`, the user also has to specify the event time watermark in the query
+ *     using `Dataset.withWatermark()`. With this setting, data that is older than the watermark
+ *     is filtered out. The timeout can be set for a group by setting a timeout timestamp
+ *     using`GroupState.setTimeoutTimestamp()`, and the timeout would occur when the watermark
+ *     advances beyond the set timestamp. You can control the timeout delay by two parameters -
+ *     (i) watermark delay and an additional duration beyond the timestamp in the event (which is
+ *     guaranteed to be newer than watermark due to the filtering). Guarantees provided by this
+ *     timeout are as follows:
+ *     - Timeout will never occur before the watermark has exceeded the set timeout.
+ *     - Similar to processing time timeouts, there is no strict upper bound on the delay when the
+ *       timeout actually occurs. The watermark can advance only when there is data in the stream
+ *       and the event time of the data has actually advanced.
+ *   - When the timeout occurs for a group, the function is called for that group with no values,
+ *     and `GroupState.hasTimedOut()` set to true.
+ *   - The timeout is reset every time the function is called on a group, that is, when the group
+ *     has new data, or the group has timed out. So the user has to set the timeout duration every
+ *     time the function is called, otherwise, there will not be any timeout set.
  *
  * `[map/flatMap]GroupsWithState` can take a user defined initial state as an additional argument.
  * This state will be applied when the first batch of the streaming query is processed. If there
@@ -181,7 +181,7 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState
  *          state.setTimeoutDuration("1 hour");   // Set the timeout
  *        }
  *        ...
-*         // return something
+ *         // return something
  *      }
  *    };
  *
@@ -191,8 +191,9 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalGroupState
  *         mappingFunction, Encoders.INT, Encoders.STRING, GroupStateTimeout.ProcessingTimeTimeout);
  * }}}
  *
- * @tparam S User-defined type of the state to be stored for each group. Must be encodable into
- *           Spark SQL types (see `Encoder` for more details).
+ * @tparam S
+ *   User-defined type of the state to be stored for each group. Must be encodable into Spark SQL
+ *   types (see `Encoder` for more details).
  * @since 2.2.0
  */
 @Experimental
@@ -217,44 +218,48 @@ trait GroupState[S] extends LogicalGroupState[S] {
 
   /**
    * Whether the function has been called because the key has timed out.
-   * @note This can return true only when timeouts are enabled in `[map/flatMap]GroupsWithState`.
+   * @note
+   *   This can return true only when timeouts are enabled in `[map/flatMap]GroupsWithState`.
    */
   def hasTimedOut: Boolean
 
-
   /**
    * Set the timeout duration in ms for this key.
    *
-   * @note [[GroupStateTimeout Processing time timeout]] must be enabled in
-   *       `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Processing time timeout]] must be enabled in
+   *   `[map/flatMap]GroupsWithState` for calling this method.
+   * @note
+   *   This method has no effect when used in a batch query.
    */
   @throws[IllegalArgumentException]("if 'durationMs' is not positive")
   @throws[UnsupportedOperationException](
     "if processing time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutDuration(durationMs: Long): Unit
 
-
   /**
    * Set the timeout duration for this key as a string. For example, "1 hour", "2 days", etc.
    *
-   * @note [[GroupStateTimeout Processing time timeout]] must be enabled in
-   *       `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Processing time timeout]] must be enabled in
+   *   `[map/flatMap]GroupsWithState` for calling this method.
+   * @note
+   *   This method has no effect when used in a batch query.
    */
   @throws[IllegalArgumentException]("if 'duration' is not a valid duration")
   @throws[UnsupportedOperationException](
     "if processing time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutDuration(duration: String): Unit
 
-
   /**
-   * Set the timeout timestamp for this key as milliseconds in epoch time.
-   * This timestamp cannot be older than the current watermark.
+   * Set the timeout timestamp for this key as milliseconds in epoch time. This timestamp cannot
+   * be older than the current watermark.
    *
-   * @note [[GroupStateTimeout Event time timeout]] must be enabled in
-   *       `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Event time timeout]] must be enabled in `[map/flatMap]GroupsWithState`
+   *   for calling this method.
+   * @note
+   *   This method has no effect when used in a batch query.
    */
   @throws[IllegalArgumentException](
     "if 'timestampMs' is not positive or less than the current watermark in a streaming query")
@@ -262,16 +267,16 @@ trait GroupState[S] extends LogicalGroupState[S] {
     "if event time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutTimestamp(timestampMs: Long): Unit
 
-
   /**
    * Set the timeout timestamp for this key as milliseconds in epoch time and an additional
-   * duration as a string (e.g. "1 hour", "2 days", etc.).
-   * The final timestamp (including the additional duration) cannot be older than the
-   * current watermark.
+   * duration as a string (e.g. "1 hour", "2 days", etc.). The final timestamp (including the
+   * additional duration) cannot be older than the current watermark.
    *
-   * @note [[GroupStateTimeout Event time timeout]] must be enabled in
-   *       `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no side effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Event time timeout]] must be enabled in `[map/flatMap]GroupsWithState`
+   *   for calling this method.
+   * @note
+   *   This method has no side effect when used in a batch query.
    */
   @throws[IllegalArgumentException](
     "if 'additionalDuration' is invalid or the final timeout timestamp is less than " +
@@ -280,56 +285,57 @@ trait GroupState[S] extends LogicalGroupState[S] {
     "if event time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutTimestamp(timestampMs: Long, additionalDuration: String): Unit
 
-
   /**
-   * Set the timeout timestamp for this key as a java.sql.Date.
-   * This timestamp cannot be older than the current watermark.
+   * Set the timeout timestamp for this key as a java.sql.Date. This timestamp cannot be older
+   * than the current watermark.
    *
-   * @note [[GroupStateTimeout Event time timeout]] must be enabled in
-   *       `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no side effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Event time timeout]] must be enabled in `[map/flatMap]GroupsWithState`
+   *   for calling this method.
+   * @note
+   *   This method has no side effect when used in a batch query.
    */
   @throws[UnsupportedOperationException](
     "if event time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutTimestamp(timestamp: java.sql.Date): Unit
 
-
   /**
-   * Set the timeout timestamp for this key as a java.sql.Date and an additional
-   * duration as a string (e.g. "1 hour", "2 days", etc.).
-   * The final timestamp (including the additional duration) cannot be older than the
-   * current watermark.
+   * Set the timeout timestamp for this key as a java.sql.Date and an additional duration as a
+   * string (e.g. "1 hour", "2 days", etc.). The final timestamp (including the additional
+   * duration) cannot be older than the current watermark.
    *
-   * @note [[GroupStateTimeout Event time timeout]] must be enabled in
-   *      `[map/flatMap]GroupsWithState` for calling this method.
-   * @note This method has no side effect when used in a batch query.
+   * @note
+   *   [[GroupStateTimeout Event time timeout]] must be enabled in `[map/flatMap]GroupsWithState`
+   *   for calling this method.
+   * @note
+   *   This method has no side effect when used in a batch query.
    */
   @throws[IllegalArgumentException]("if 'additionalDuration' is invalid")
   @throws[UnsupportedOperationException](
     "if event time timeout has not been enabled in [map|flatMap]GroupsWithState")
   def setTimeoutTimestamp(timestamp: java.sql.Date, additionalDuration: String): Unit
 
-
   /**
    * Get the current event time watermark as milliseconds in epoch time.
    *
-   * @note In a streaming query, this can be called only when watermark is set before calling
-   *       `[map/flatMap]GroupsWithState`. In a batch query, this method always returns -1.
-   * @note The watermark gets propagated in the end of each query. As a result, this method will
-   *       return 0 (1970-01-01T00:00:00) for the first micro-batch. If you use this value
-   *       as a part of the timestamp set in the `setTimeoutTimestamp`, it may lead to the
-   *       state expiring immediately in the next micro-batch, once the watermark gets the
-   *       real value from your data.
+   * @note
+   *   In a streaming query, this can be called only when watermark is set before calling
+   *   `[map/flatMap]GroupsWithState`. In a batch query, this method always returns -1.
+   * @note
+   *   The watermark gets propagated in the end of each query. As a result, this method will
+   *   return 0 (1970-01-01T00:00:00) for the first micro-batch. If you use this value as a part
+   *   of the timestamp set in the `setTimeoutTimestamp`, it may lead to the state expiring
+   *   immediately in the next micro-batch, once the watermark gets the real value from your data.
    */
   @throws[UnsupportedOperationException](
     "if watermark has not been set before in [map|flatMap]GroupsWithState")
   def getCurrentWatermarkMs(): Long
 
-
   /**
    * Get the current processing time as milliseconds in epoch time.
-   * @note In a streaming query, this will return a constant value throughout the duration of a
-   *       trigger, even if the trigger is re-executed.
+   * @note
+   *   In a streaming query, this will return a constant value throughout the duration of a
+   *   trigger, even if the trigger is re-executed.
    */
   def getCurrentProcessingTimeMs(): Long
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ListState.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ListState.scala
index 0e2d6cc3778c6..568578d1f4ff6 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ListState.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ListState.scala
@@ -21,8 +21,7 @@ import org.apache.spark.annotation.{Evolving, Experimental}
 @Experimental
 @Evolving
 /**
- * Interface used for arbitrary stateful operations with the v2 API to capture
- * list value state.
+ * Interface used for arbitrary stateful operations with the v2 API to capture list value state.
  */
 private[sql] trait ListState[S] extends Serializable {
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/MapState.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/MapState.scala
index 030c3ee989c6f..7b01888bbac49 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/MapState.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/MapState.scala
@@ -21,10 +21,10 @@ import org.apache.spark.annotation.{Evolving, Experimental}
 @Experimental
 @Evolving
 /**
- * Interface used for arbitrary stateful operations with the v2 API to capture
- * map value state.
+ * Interface used for arbitrary stateful operations with the v2 API to capture map value state.
  */
 trait MapState[K, V] extends Serializable {
+
   /** Whether state exists or not. */
   def exists(): Boolean
 
@@ -35,7 +35,7 @@ trait MapState[K, V] extends Serializable {
   def containsKey(key: K): Boolean
 
   /** Update value for given user key */
-  def updateValue(key: K, value: V) : Unit
+  def updateValue(key: K, value: V): Unit
 
   /** Get the map associated with grouping key */
   def iterator(): Iterator[(K, V)]
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/QueryInfo.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/QueryInfo.scala
index 7754a514fdd6b..f239bcff49fea 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/QueryInfo.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/QueryInfo.scala
@@ -22,8 +22,8 @@ import java.util.UUID
 import org.apache.spark.annotation.{Evolving, Experimental}
 
 /**
- * Represents the query info provided to the stateful processor used in the
- * arbitrary state API v2 to easily identify task retries on the same partition.
+ * Represents the query info provided to the stateful processor used in the arbitrary state API v2
+ * to easily identify task retries on the same partition.
  */
 @Experimental
 @Evolving
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
index 54e6a9a4ab678..55477b4dda0c9 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessor.scala
@@ -31,49 +31,65 @@ import org.apache.spark.sql.errors.ExecutionErrors
 private[sql] abstract class StatefulProcessor[K, I, O] extends Serializable {
 
   /**
-   * Handle to the stateful processor that provides access to the state store and other
-   * stateful processing related APIs.
+   * Handle to the stateful processor that provides access to the state store and other stateful
+   * processing related APIs.
    */
   private var statefulProcessorHandle: StatefulProcessorHandle = null
 
   /**
-   * Function that will be invoked as the first method that allows for users to
-   * initialize all their state variables and perform other init actions before handling data.
-   * @param outputMode - output mode for the stateful processor
-   * @param timeMode - time mode for the stateful processor.
+   * Function that will be invoked as the first method that allows for users to initialize all
+   * their state variables and perform other init actions before handling data.
+   * @param outputMode
+   *   \- output mode for the stateful processor
+   * @param timeMode
+   *   \- time mode for the stateful processor.
    */
-  def init(
-      outputMode: OutputMode,
-      timeMode: TimeMode): Unit
+  def init(outputMode: OutputMode, timeMode: TimeMode): Unit
 
   /**
    * Function that will allow users to interact with input data rows along with the grouping key
    * and current timer values and optionally provide output rows.
-   * @param key - grouping key
-   * @param inputRows - iterator of input rows associated with grouping key
-   * @param timerValues - instance of TimerValues that provides access to current processing/event
-   *                    time if available
-   * @param expiredTimerInfo - instance of ExpiredTimerInfo that provides access to expired timer
-   *                         if applicable
-   * @return - Zero or more output rows
+   * @param key
+   *   \- grouping key
+   * @param inputRows
+   *   \- iterator of input rows associated with grouping key
+   * @param timerValues
+   *   \- instance of TimerValues that provides access to current processing/event time if
+   *   available
+   * @return
+   *   \- Zero or more output rows
    */
-  def handleInputRows(
+  def handleInputRows(key: K, inputRows: Iterator[I], timerValues: TimerValues): Iterator[O]
+
+  /**
+   * Function that will be invoked when a timer is fired for a given key. Users can choose to
+   * evict state, register new timers and optionally provide output rows.
+   * @param key
+   *   \- grouping key
+   * @param timerValues
+   *   \- instance of TimerValues that provides access to current processing/event
+   * @param expiredTimerInfo
+   *   \- instance of ExpiredTimerInfo that provides access to expired timer
+   * @return
+   *   Zero or more output rows
+   */
+  def handleExpiredTimer(
       key: K,
-      inputRows: Iterator[I],
       timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[O]
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[O] = Iterator.empty
 
   /**
-   * Function called as the last method that allows for users to perform
-   * any cleanup or teardown operations.
+   * Function called as the last method that allows for users to perform any cleanup or teardown
+   * operations.
    */
-  def close (): Unit = {}
+  def close(): Unit = {}
 
   /**
    * Function to set the stateful processor handle that will be used to interact with the state
    * store and other stateful processor related operations.
    *
-   * @param handle - instance of StatefulProcessorHandle
+   * @param handle
+   *   \- instance of StatefulProcessorHandle
    */
   final def setHandle(handle: StatefulProcessorHandle): Unit = {
     statefulProcessorHandle = handle
@@ -82,7 +98,8 @@ private[sql] abstract class StatefulProcessor[K, I, O] extends Serializable {
   /**
    * Function to get the stateful processor handle that will be used to interact with the state
    *
-   * @return handle - instance of StatefulProcessorHandle
+   * @return
+   *   handle - instance of StatefulProcessorHandle
    */
   final def getHandle: StatefulProcessorHandle = {
     if (statefulProcessorHandle == null) {
@@ -93,23 +110,28 @@ private[sql] abstract class StatefulProcessor[K, I, O] extends Serializable {
 }
 
 /**
- * Stateful processor with support for specifying initial state.
- * Accepts a user-defined type as initial state to be initialized in the first batch.
- * This can be used for starting a new streaming query with existing state from a
- * previous streaming query.
+ * Stateful processor with support for specifying initial state. Accepts a user-defined type as
+ * initial state to be initialized in the first batch. This can be used for starting a new
+ * streaming query with existing state from a previous streaming query.
  */
 @Experimental
 @Evolving
 private[sql] abstract class StatefulProcessorWithInitialState[K, I, O, S]
-  extends StatefulProcessor[K, I, O] {
+    extends StatefulProcessor[K, I, O] {
 
   /**
    * Function that will be invoked only in the first batch for users to process initial states.
+   * The provided initial state can be arbitrary dataframe with the same grouping key schema with
+   * the input rows, e.g. dataframe from data source reader of existing streaming query
+   * checkpoint.
    *
-   * @param key - grouping key
-   * @param initialState - A row in the initial state to be processed
-   * @param timerValues  - instance of TimerValues that provides access to current processing/event
-   *                     time if available
+   * @param key
+   *   \- grouping key
+   * @param initialState
+   *   \- A row in the initial state to be processed
+   * @param timerValues
+   *   \- instance of TimerValues that provides access to current processing/event time if
+   *   available
    */
   def handleInitialState(key: K, initialState: S, timerValues: TimerValues): Unit
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
index 4dc2ca875ef0e..f458f0de37cbd 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StatefulProcessorHandle.scala
@@ -22,39 +22,33 @@ import org.apache.spark.annotation.{Evolving, Experimental}
 import org.apache.spark.sql.Encoder
 
 /**
- * Represents the operation handle provided to the stateful processor used in the
- * arbitrary state API v2.
+ * Represents the operation handle provided to the stateful processor used in the arbitrary state
+ * API v2.
  */
 @Experimental
 @Evolving
 private[sql] trait StatefulProcessorHandle extends Serializable {
 
   /**
-   * Function to create new or return existing single value state variable of given type.
-   * The user must ensure to call this function only within the `init()` method of the
-   * StatefulProcessor.
-   *
-   * @param stateName  - name of the state variable
-   * @param valEncoder - SQL encoder for state variable
-   * @tparam T - type of state variable
-   * @return - instance of ValueState of type T that can be used to store state persistently
-   */
-  def getValueState[T](stateName: String, valEncoder: Encoder[T]): ValueState[T]
-
-  /**
-   * Function to create new or return existing single value state variable of given type
-   * with ttl. State values will not be returned past ttlDuration, and will be eventually removed
-   * from the state store. Any state update resets the ttl to current processing time plus
-   * ttlDuration.
+   * Function to create new or return existing single value state variable of given type with ttl.
+   * State values will not be returned past ttlDuration, and will be eventually removed from the
+   * state store. Any state update resets the ttl to current processing time plus ttlDuration.
+   * Users can use the helper method `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java for
+   * the TTLConfig parameter to disable TTL for the state variable.
    *
    * The user must ensure to call this function only within the `init()` method of the
    * StatefulProcessor.
    *
-   * @param stateName  - name of the state variable
-   * @param valEncoder - SQL encoder for state variable
-   * @param ttlConfig  - the ttl configuration (time to live duration etc.)
-   * @tparam T - type of state variable
-   * @return - instance of ValueState of type T that can be used to store state persistently
+   * @param stateName
+   *   \- name of the state variable
+   * @param valEncoder
+   *   \- SQL encoder for state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam T
+   *   \- type of state variable
+   * @return
+   *   \- instance of ValueState of type T that can be used to store state persistently
    */
   def getValueState[T](
       stateName: String,
@@ -62,30 +56,48 @@ private[sql] trait StatefulProcessorHandle extends Serializable {
       ttlConfig: TTLConfig): ValueState[T]
 
   /**
-   * Creates new or returns existing list state associated with stateName.
-   * The ListState persists values of type T.
+   * (Scala-specific) Function to create new or return existing single value state variable of
+   * given type with ttl. State values will not be returned past ttlDuration, and will be
+   * eventually removed from the state store. Any state update resets the ttl to current
+   * processing time plus ttlDuration. Users can use the helper method `TTLConfig.NONE` in Scala
+   * or `TTLConfig.NONE()` in Java for the TTLConfig parameter to disable TTL for the state
+   * variable.
+   *
+   * The user must ensure to call this function only within the `init()` method of the
+   * StatefulProcessor. Note that this API uses the implicit SQL encoder in Scala.
    *
-   * @param stateName  - name of the state variable
-   * @param valEncoder - SQL encoder for state variable
-   * @tparam T - type of state variable
-   * @return - instance of ListState of type T that can be used to store state persistently
+   * @param stateName
+   *   \- name of the state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam T
+   *   \- type of state variable
+   * @return
+   *   \- instance of ValueState of type T that can be used to store state persistently
    */
-  def getListState[T](stateName: String, valEncoder: Encoder[T]): ListState[T]
+  def getValueState[T: Encoder](stateName: String, ttlConfig: TTLConfig): ValueState[T]
 
   /**
-   * Function to create new or return existing list state variable of given type
-   * with ttl. State values will not be returned past ttlDuration, and will be eventually removed
-   * from the state store. Any values in listState which have expired after ttlDuration will not
-   * be returned on get() and will be eventually removed from the state.
+   * Function to create new or return existing list state variable of given type with ttl. State
+   * values will not be returned past ttlDuration, and will be eventually removed from the state
+   * store. Any values in listState which have expired after ttlDuration will not be returned on
+   * get() and will be eventually removed from the state. Users can use the helper method
+   * `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java for the TTLConfig parameter to
+   * disable TTL for the state variable.
    *
    * The user must ensure to call this function only within the `init()` method of the
    * StatefulProcessor.
    *
-   * @param stateName  - name of the state variable
-   * @param valEncoder - SQL encoder for state variable
-   * @param ttlConfig  - the ttl configuration (time to live duration etc.)
-   * @tparam T - type of state variable
-   * @return - instance of ListState of type T that can be used to store state persistently
+   * @param stateName
+   *   \- name of the state variable
+   * @param valEncoder
+   *   \- SQL encoder for state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam T
+   *   \- type of state variable
+   * @return
+   *   \- instance of ListState of type T that can be used to store state persistently
    */
   def getListState[T](
       stateName: String,
@@ -93,73 +105,116 @@ private[sql] trait StatefulProcessorHandle extends Serializable {
       ttlConfig: TTLConfig): ListState[T]
 
   /**
-   * Creates new or returns existing map state associated with stateName.
-   * The MapState persists Key-Value pairs of type [K, V].
+   * (Scala-specific) Function to create new or return existing list state variable of given type
+   * with ttl. State values will not be returned past ttlDuration, and will be eventually removed
+   * from the state store. Any values in listState which have expired after ttlDuration will not
+   * be returned on get() and will be eventually removed from the state. Users can use the helper
+   * method `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java for the TTLConfig parameter to
+   * disable TTL for the state variable.
+   *
+   * The user must ensure to call this function only within the `init()` method of the
+   * StatefulProcessor. Note that this API uses the implicit SQL encoder in Scala.
+   *
+   * @param stateName
+   *   \- name of the state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam T
+   *   \- type of state variable
+   * @return
+   *   \- instance of ListState of type T that can be used to store state persistently
+   */
+  def getListState[T: Encoder](stateName: String, ttlConfig: TTLConfig): ListState[T]
+
+  /**
+   * Function to create new or return existing map state variable of given type with ttl. State
+   * values will not be returned past ttlDuration, and will be eventually removed from the state
+   * store. Any values in mapState which have expired after ttlDuration will not returned on get()
+   * and will be eventually removed from the state. Users can use the helper method
+   * `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java for the TTLConfig parameter to
+   * disable TTL for the state variable.
+   *
+   * The user must ensure to call this function only within the `init()` method of the
+   * StatefulProcessor.
    *
-   * @param stateName  - name of the state variable
-   * @param userKeyEnc  - spark sql encoder for the map key
-   * @param valEncoder  - spark sql encoder for the map value
-   * @tparam K - type of key for map state variable
-   * @tparam V - type of value for map state variable
-   * @return - instance of MapState of type [K,V] that can be used to store state persistently
+   * @param stateName
+   *   \- name of the state variable
+   * @param userKeyEnc
+   *   \- spark sql encoder for the map key
+   * @param valEncoder
+   *   \- SQL encoder for state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam K
+   *   \- type of key for map state variable
+   * @tparam V
+   *   \- type of value for map state variable
+   * @return
+   *   \- instance of MapState of type [K,V] that can be used to store state persistently
    */
   def getMapState[K, V](
       stateName: String,
       userKeyEnc: Encoder[K],
-      valEncoder: Encoder[V]): MapState[K, V]
+      valEncoder: Encoder[V],
+      ttlConfig: TTLConfig): MapState[K, V]
 
   /**
-   * Function to create new or return existing map state variable of given type
+   * (Scala-specific) Function to create new or return existing map state variable of given type
    * with ttl. State values will not be returned past ttlDuration, and will be eventually removed
-   * from the state store. Any values in mapState which have expired after ttlDuration will not
-   * returned on get() and will be eventually removed from the state.
+   * from the state store. Any values in mapState which have expired after ttlDuration will not be
+   * returned on get() and will be eventually removed from the state. Users can use the helper
+   * method `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java for the TTLConfig parameter to
+   * disable TTL for the state variable.
    *
    * The user must ensure to call this function only within the `init()` method of the
-   * StatefulProcessor.
+   * StatefulProcessor. Note that this API uses the implicit SQL encoder in Scala.
    *
-   * @param stateName  - name of the state variable
-   * @param userKeyEnc  - spark sql encoder for the map key
-   * @param valEncoder - SQL encoder for state variable
-   * @param ttlConfig  - the ttl configuration (time to live duration etc.)
-   * @tparam K - type of key for map state variable
-   * @tparam V - type of value for map state variable
-   * @return - instance of MapState of type [K,V] that can be used to store state persistently
+   * @param stateName
+   *   \- name of the state variable
+   * @param ttlConfig
+   *   \- the ttl configuration (time to live duration etc.)
+   * @tparam K
+   *   \- type of key for map state variable
+   * @tparam V
+   *   \- type of value for map state variable
+   * @return
+   *   \- instance of MapState of type [K,V] that can be used to store state persistently
    */
-  def getMapState[K, V](
-     stateName: String,
-     userKeyEnc: Encoder[K],
-     valEncoder: Encoder[V],
-     ttlConfig: TTLConfig): MapState[K, V]
+  def getMapState[K: Encoder, V: Encoder](stateName: String, ttlConfig: TTLConfig): MapState[K, V]
 
   /** Function to return queryInfo for currently running task */
   def getQueryInfo(): QueryInfo
 
   /**
-   * Function to register a processing/event time based timer for given implicit grouping key
-   * and provided timestamp
-   * @param expiryTimestampMs - timer expiry timestamp in milliseconds
+   * Function to register a processing/event time based timer for given implicit grouping key and
+   * provided timestamp
+   * @param expiryTimestampMs
+   *   \- timer expiry timestamp in milliseconds
    */
   def registerTimer(expiryTimestampMs: Long): Unit
 
   /**
-   * Function to delete a processing/event time based timer for given implicit grouping key
-   * and provided timestamp
-   * @param expiryTimestampMs - timer expiry timestamp in milliseconds
+   * Function to delete a processing/event time based timer for given implicit grouping key and
+   * provided timestamp
+   * @param expiryTimestampMs
+   *   \- timer expiry timestamp in milliseconds
    */
   def deleteTimer(expiryTimestampMs: Long): Unit
 
   /**
-   * Function to list all the timers registered for given implicit grouping key
-   * Note: calling listTimers() within the `handleInputRows` method of the StatefulProcessor
-   * will return all the unprocessed registered timers, including the one being fired within the
-   * invocation of `handleInputRows`.
-   * @return - list of all the registered timers for given implicit grouping key
+   * Function to list all the timers registered for given implicit grouping key Note: calling
+   * listTimers() within the `handleInputRows` method of the StatefulProcessor will return all the
+   * unprocessed registered timers, including the one being fired within the invocation of
+   * `handleInputRows`.
+   * @return
+   *   \- list of all the registered timers for given implicit grouping key
    */
   def listTimers(): Iterator[Long]
 
   /**
    * Function to delete and purge state variable if defined previously
-   * @param stateName - name of the state variable
+   * @param stateName
+   *   \- name of the state variable
    */
   def deleteIfExists(stateName: String): Unit
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
similarity index 63%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
index fcb4bdcb327bc..a6684969ff1ec 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
@@ -20,19 +20,24 @@ package org.apache.spark.sql.streaming
 import java.util.UUID
 
 import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
+import com.fasterxml.jackson.databind.node.TreeTraversingParser
 import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
-import org.json4s.{JObject, JString, JValue}
+import org.json4s.{JObject, JString}
+import org.json4s.JsonAST.JValue
 import org.json4s.JsonDSL.{jobject2assoc, pair2Assoc}
 import org.json4s.jackson.JsonMethods.{compact, render}
 
 import org.apache.spark.annotation.Evolving
+import org.apache.spark.scheduler.SparkListenerEvent
 
 /**
- * Interface for listening to events related to [[StreamingQuery StreamingQueries]].
+ * Interface for listening to events related to
+ * [[org.apache.spark.sql.api.StreamingQuery StreamingQueries]].
+ *
  * @note
  *   The methods are not thread-safe as they may be called from different threads.
  *
- * @since 3.5.0
+ * @since 2.0.0
  */
 @Evolving
 abstract class StreamingQueryListener extends Serializable {
@@ -42,12 +47,11 @@ abstract class StreamingQueryListener extends Serializable {
   /**
    * Called when a query is started.
    * @note
-   *   This is called synchronously with
-   *   [[org.apache.spark.sql.streaming.DataStreamWriter `DataStreamWriter.start()`]], that is,
-   *   `onQueryStart` will be called on all listeners before `DataStreamWriter.start()` returns
-   *   the corresponding [[StreamingQuery]]. Please don't block this method as it will block your
-   *   query.
-   * @since 3.5.0
+   *   This is called synchronously with `DataStreamWriter.start()`, that is, `onQueryStart` will
+   *   be called on all listeners before `DataStreamWriter.start()` returns the corresponding
+   *   [[org.apache.spark.sql.api.StreamingQuery]]. Please don't block this method as it will
+   *   block your query.
+   * @since 2.0.0
    */
   def onQueryStarted(event: QueryStartedEvent): Unit
 
@@ -55,11 +59,12 @@ abstract class StreamingQueryListener extends Serializable {
    * Called when there is some status update (ingestion rate updated, etc.)
    *
    * @note
-   *   This method is asynchronous. The status in [[StreamingQuery]] will always be latest no
-   *   matter when this method is called. Therefore, the status of [[StreamingQuery]] may be
-   *   changed before/when you process the event. E.g., you may find [[StreamingQuery]] is
-   *   terminated when you are processing `QueryProgressEvent`.
-   * @since 3.5.0
+   *   This method is asynchronous. The status in [[org.apache.spark.sql.api.StreamingQuery]] will
+   *   always be latest no matter when this method is called. Therefore, the status of
+   *   [[org.apache.spark.sql.api.StreamingQuery]] may be changed before/when you process the
+   *   event. E.g., you may find [[org.apache.spark.sql.api.StreamingQuery]] is terminated when
+   *   you are processing `QueryProgressEvent`.
+   * @since 2.0.0
    */
   def onQueryProgress(event: QueryProgressEvent): Unit
 
@@ -71,24 +76,68 @@ abstract class StreamingQueryListener extends Serializable {
 
   /**
    * Called when a query is stopped, with or without error.
-   * @since 3.5.0
+   * @since 2.0.0
    */
   def onQueryTerminated(event: QueryTerminatedEvent): Unit
 }
 
+/**
+ * Py4J allows a pure interface so this proxy is required.
+ */
+private[spark] trait PythonStreamingQueryListener {
+  import StreamingQueryListener._
+
+  def onQueryStarted(event: QueryStartedEvent): Unit
+
+  def onQueryProgress(event: QueryProgressEvent): Unit
+
+  def onQueryIdle(event: QueryIdleEvent): Unit
+
+  def onQueryTerminated(event: QueryTerminatedEvent): Unit
+}
+
+private[spark] class PythonStreamingQueryListenerWrapper(listener: PythonStreamingQueryListener)
+    extends StreamingQueryListener {
+  import StreamingQueryListener._
+
+  def onQueryStarted(event: QueryStartedEvent): Unit = listener.onQueryStarted(event)
+
+  def onQueryProgress(event: QueryProgressEvent): Unit = listener.onQueryProgress(event)
+
+  override def onQueryIdle(event: QueryIdleEvent): Unit = listener.onQueryIdle(event)
+
+  def onQueryTerminated(event: QueryTerminatedEvent): Unit = listener.onQueryTerminated(event)
+}
+
 /**
  * Companion object of [[StreamingQueryListener]] that defines the listener events.
- * @since 3.5.0
+ * @since 2.0.0
  */
 @Evolving
 object StreamingQueryListener extends Serializable {
+  private val mapper = {
+    val ret = new ObjectMapper() with ClassTagExtensions
+    ret.registerModule(DefaultScalaModule)
+    ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
+    ret
+  }
+
+  private case class EventParser(json: String) {
+    private val tree = mapper.readTree(json)
+    def getString(name: String): String = tree.get(name).asText()
+    def getUUID(name: String): UUID = UUID.fromString(getString(name))
+    def getProgress(name: String): StreamingQueryProgress = {
+      val parser = new TreeTraversingParser(tree.get(name), mapper)
+      parser.readValueAs(classOf[StreamingQueryProgress])
+    }
+  }
 
   /**
    * Base type of [[StreamingQueryListener]] events
-   * @since 3.5.0
+   * @since 2.0.0
    */
   @Evolving
-  trait Event
+  trait Event extends SparkListenerEvent
 
   /**
    * Event representing the start of a query
@@ -100,7 +149,7 @@ object StreamingQueryListener extends Serializable {
    *   User-specified name of the query, null if not specified.
    * @param timestamp
    *   The timestamp to start a query.
-   * @since 3.5.0
+   * @since 2.1.0
    */
   @Evolving
   class QueryStartedEvent private[sql] (
@@ -122,25 +171,22 @@ object StreamingQueryListener extends Serializable {
   }
 
   private[spark] object QueryStartedEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
 
-    private[spark] def jsonString(event: QueryStartedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryStartedEvent =
-      mapper.readValue[QueryStartedEvent](json)
+    private[spark] def fromJson(json: String): QueryStartedEvent = {
+      val parser = EventParser(json)
+      new QueryStartedEvent(
+        parser.getUUID("id"),
+        parser.getUUID("runId"),
+        parser.getString("name"),
+        parser.getString("name"))
+    }
   }
 
   /**
    * Event representing any progress updates in a query.
    * @param progress
    *   The query progress updates.
-   * @since 3.5.0
+   * @since 2.1.0
    */
   @Evolving
   class QueryProgressEvent private[sql] (val progress: StreamingQueryProgress)
@@ -153,18 +199,11 @@ object StreamingQueryListener extends Serializable {
   }
 
   private[spark] object QueryProgressEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
-
-    private[spark] def jsonString(event: QueryProgressEvent): String =
-      mapper.writeValueAsString(event)
 
-    private[spark] def fromJson(json: String): QueryProgressEvent =
-      mapper.readValue[QueryProgressEvent](json)
+    private[spark] def fromJson(json: String): QueryProgressEvent = {
+      val parser = EventParser(json)
+      new QueryProgressEvent(parser.getProgress("progress"))
+    }
   }
 
   /**
@@ -193,18 +232,14 @@ object StreamingQueryListener extends Serializable {
   }
 
   private[spark] object QueryIdleEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
 
-    private[spark] def jsonString(event: QueryTerminatedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryTerminatedEvent =
-      mapper.readValue[QueryTerminatedEvent](json)
+    private[spark] def fromJson(json: String): QueryIdleEvent = {
+      val parser = EventParser(json)
+      new QueryIdleEvent(
+        parser.getUUID("id"),
+        parser.getUUID("runId"),
+        parser.getString("timestamp"))
+    }
   }
 
   /**
@@ -221,7 +256,7 @@ object StreamingQueryListener extends Serializable {
    *   The error class from the exception if the query was terminated with an exception which is a
    *   part of error class framework. If the query was terminated without an exception, or the
    *   exception is not a part of error class framework, it will be `None`.
-   * @since 3.5.0
+   * @since 2.1.0
    */
   @Evolving
   class QueryTerminatedEvent private[sql] (
@@ -247,17 +282,13 @@ object StreamingQueryListener extends Serializable {
   }
 
   private[spark] object QueryTerminatedEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
+    private[spark] def fromJson(json: String): QueryTerminatedEvent = {
+      val parser = EventParser(json)
+      new QueryTerminatedEvent(
+        parser.getUUID("id"),
+        parser.getUUID("runId"),
+        Option(parser.getString("exception")),
+        Option(parser.getString("errorClassOnException")))
     }
-
-    private[spark] def jsonString(event: QueryTerminatedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryTerminatedEvent =
-      mapper.readValue[QueryTerminatedEvent](json)
   }
 }
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
similarity index 95%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
index cdda25876b250..c37cdd00c8866 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
@@ -36,7 +36,7 @@ import org.apache.spark.annotation.Evolving
  *   True when the trigger is actively firing, false when waiting for the next trigger time.
  *   Doesn't apply to ContinuousExecution where it is always false.
  *
- * @since 3.5.0
+ * @since 2.1.0
  */
 @Evolving
 class StreamingQueryStatus protected[sql] (
@@ -44,7 +44,6 @@ class StreamingQueryStatus protected[sql] (
     val isDataAvailable: Boolean,
     val isTriggerActive: Boolean)
     extends Serializable {
-  // This is a copy of the same class in sql/core/.../streaming/StreamingQueryStatus.scala
 
   /** The compact JSON representation of this status. */
   def json: String = compact(render(jsonValue))
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/TTLConfig.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/TTLConfig.scala
index 576e09d5d7fe2..7ec4fbc8c1b55 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/TTLConfig.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/TTLConfig.scala
@@ -20,11 +20,26 @@ package org.apache.spark.sql.streaming
 import java.time.Duration
 
 /**
- * TTL Configuration for state variable. State values will not be returned past ttlDuration,
- * and will be eventually removed from the state store. Any state update resets the ttl to
- * current processing time plus ttlDuration.
+ * TTL Configuration for state variable. State values will not be returned past ttlDuration, and
+ * will be eventually removed from the state store. Any state update resets the ttl to current
+ * processing time plus ttlDuration.
  *
- * @param ttlDuration time to live duration for state
- *                    stored in the state variable.
+ * Passing a TTL duration of zero will disable the TTL for the state variable. Users can also use
+ * the helper method `TTLConfig.NONE` in Scala or `TTLConfig.NONE()` in Java to disable TTL for
+ * the state variable.
+ *
+ * @param ttlDuration
+ *   time to live duration for state stored in the state variable.
  */
 case class TTLConfig(ttlDuration: Duration)
+
+object TTLConfig {
+
+  /**
+   * Helper method to create a TTLConfig with expiry duration as Zero
+   * @return
+   *   \- TTLConfig with expiry duration as Zero
+   */
+  def NONE: TTLConfig = TTLConfig(Duration.ZERO)
+
+}
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/TimerValues.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/TimerValues.scala
index f0aef58228188..04c5f59428f7f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/TimerValues.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/TimerValues.scala
@@ -22,25 +22,29 @@ import java.io.Serializable
 import org.apache.spark.annotation.{Evolving, Experimental}
 
 /**
- * Class used to provide access to timer values for processing and event time populated
- * before method invocations using the arbitrary state API v2.
+ * Class used to provide access to timer values for processing and event time populated before
+ * method invocations using the arbitrary state API v2.
  */
 @Experimental
 @Evolving
 private[sql] trait TimerValues extends Serializable {
+
   /**
    * Get the current processing time as milliseconds in epoch time.
-   * @note This will return a constant value throughout the duration of a streaming query trigger,
-   *       even if the trigger is re-executed.
+   * @note
+   *   This will return a constant value throughout the duration of a streaming query trigger,
+   *   even if the trigger is re-executed.
    */
   def getCurrentProcessingTimeInMs(): Long
 
   /**
    * Get the current event time watermark as milliseconds in epoch time.
    *
-   * @note This can be called only when watermark is set before calling `transformWithState`.
-   * @note The watermark gets propagated at the end of each query. As a result, this method will
-   *       return 0 (1970-01-01T00:00:00) for the first micro-batch.
+   * @note
+   *   This can be called only when watermark is set before calling `transformWithState`.
+   * @note
+   *   The watermark gets propagated at the end of each query. As a result, this method will
+   *   return 0 (1970-01-01T00:00:00) for the first micro-batch.
    */
   def getCurrentWatermarkInMs(): Long
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ValueState.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ValueState.scala
index 8a2661e1a55b1..edb5f65365ab8 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/streaming/ValueState.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/ValueState.scala
@@ -24,8 +24,7 @@ import org.apache.spark.annotation.{Evolving, Experimental}
 @Experimental
 @Evolving
 /**
- * Interface used for arbitrary stateful operations with the v2 API to capture
- * single value state.
+ * Interface used for arbitrary stateful operations with the v2 API to capture single value state.
  */
 private[sql] trait ValueState[S] extends Serializable {
 
@@ -34,7 +33,8 @@ private[sql] trait ValueState[S] extends Serializable {
 
   /**
    * Get the state value if it exists
-   * @throws java.util.NoSuchElementException if the state does not exist
+   * @throws java.util.NoSuchElementException
+   *   if the state does not exist
    */
   @throws[NoSuchElementException]
   def get(): S
@@ -45,7 +45,8 @@ private[sql] trait ValueState[S] extends Serializable {
   /**
    * Update the value of the state.
    *
-   * @param newState    the new value
+   * @param newState
+   *   the new value
    */
   def update(newState: S): Unit
 
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/api/src/main/scala/org/apache/spark/sql/streaming/progress.scala
similarity index 99%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/streaming/progress.scala
index ebd13bc248f97..b7573cb280444 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/streaming/progress.scala
@@ -139,7 +139,7 @@ class StateOperatorProgress private[spark] (
  *   Information about operators in the query that store state.
  * @param sources
  *   detailed statistics on data being read from each of the streaming sources.
- * @since 3.5.0
+ * @since 2.1.0
  */
 @Evolving
 class StreamingQueryProgress private[spark] (
@@ -195,7 +195,7 @@ class StreamingQueryProgress private[spark] (
 }
 
 private[spark] object StreamingQueryProgress {
-  private val mapper = {
+  private[this] val mapper = {
     val ret = new ObjectMapper() with ClassTagExtensions
     ret.registerModule(DefaultScalaModule)
     ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
@@ -227,7 +227,7 @@ private[spark] object StreamingQueryProgress {
  *   The rate at which data is arriving from this source.
  * @param processedRowsPerSecond
  *   The rate at which data from this source is being processed by Spark.
- * @since 3.5.0
+ * @since 2.1.0
  */
 @Evolving
 class SourceProgress protected[spark] (
@@ -276,7 +276,7 @@ class SourceProgress protected[spark] (
  * @param numOutputRows
  *   Number of rows written to the sink or -1 for Continuous Mode (temporarily) or Sink V1 (until
  *   decommissioned).
- * @since 3.5.0
+ * @since 2.1.0
  */
 @Evolving
 class SinkProgress protected[spark] (
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
index 67f634f8379cd..9590fb23e16b1 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.errors.DataTypeErrors
  * A non-concrete data type, reserved for internal uses.
  */
 private[sql] abstract class AbstractDataType {
+
   /**
    * The default concrete type to use if we want to cast a null literal into this type.
    */
@@ -47,7 +48,6 @@ private[sql] abstract class AbstractDataType {
   private[sql] def simpleString: String
 }
 
-
 /**
  * A collection of types that can be used to specify type constraints. The sequence also specifies
  * precedence: an earlier type takes precedence over a latter type.
@@ -59,7 +59,7 @@ private[sql] abstract class AbstractDataType {
  * This means that we prefer StringType over BinaryType if it is possible to cast to StringType.
  */
 private[sql] class TypeCollection(private val types: Seq[AbstractDataType])
-  extends AbstractDataType {
+    extends AbstractDataType {
 
   require(types.nonEmpty, s"TypeCollection ($types) cannot be empty")
 
@@ -73,22 +73,20 @@ private[sql] class TypeCollection(private val types: Seq[AbstractDataType])
   }
 }
 
-
 private[sql] object TypeCollection {
 
   /**
    * Types that include numeric types and ANSI interval types.
    */
-  val NumericAndAnsiInterval = TypeCollection(
-    NumericType,
-    DayTimeIntervalType,
-    YearMonthIntervalType)
+  val NumericAndAnsiInterval =
+    TypeCollection(NumericType, DayTimeIntervalType, YearMonthIntervalType)
 
   /**
-   * Types that include numeric and ANSI interval types, and additionally the legacy interval type.
-   * They are only used in unary_minus, unary_positive, add and subtract operations.
+   * Types that include numeric and ANSI interval types, and additionally the legacy interval
+   * type. They are only used in unary_minus, unary_positive, add and subtract operations.
    */
-  val NumericAndInterval = new TypeCollection(NumericAndAnsiInterval.types :+ CalendarIntervalType)
+  val NumericAndInterval = new TypeCollection(
+    NumericAndAnsiInterval.types :+ CalendarIntervalType)
 
   def apply(types: AbstractDataType*): TypeCollection = new TypeCollection(types)
 
@@ -98,7 +96,6 @@ private[sql] object TypeCollection {
   }
 }
 
-
 /**
  * An `AbstractDataType` that matches any concrete data types.
  */
@@ -114,15 +111,14 @@ protected[sql] object AnyDataType extends AbstractDataType with Serializable {
   override private[sql] def acceptsType(other: DataType): Boolean = true
 }
 
-
 /**
- * An internal type used to represent everything that is not null, UDTs, arrays, structs, and maps.
+ * An internal type used to represent everything that is not null, UDTs, arrays, structs, and
+ * maps.
  */
 protected[sql] abstract class AtomicType extends DataType
 
 object AtomicType
 
-
 /**
  * Numeric data types.
  *
@@ -131,7 +127,6 @@ object AtomicType
 @Stable
 abstract class NumericType extends AtomicType
 
-
 private[spark] object NumericType extends AbstractDataType {
   override private[spark] def defaultConcreteType: DataType = DoubleType
 
@@ -141,22 +136,19 @@ private[spark] object NumericType extends AbstractDataType {
     other.isInstanceOf[NumericType]
 }
 
-
 private[sql] object IntegralType extends AbstractDataType {
   override private[sql] def defaultConcreteType: DataType = IntegerType
 
   override private[sql] def simpleString: String = "integral"
 
-  override private[sql] def acceptsType(other: DataType): Boolean = other.isInstanceOf[IntegralType]
+  override private[sql] def acceptsType(other: DataType): Boolean =
+    other.isInstanceOf[IntegralType]
 }
 
-
 private[sql] abstract class IntegralType extends NumericType
 
-
 private[sql] object FractionalType
 
-
 private[sql] abstract class FractionalType extends NumericType
 
 private[sql] object AnyTimestampType extends AbstractDataType with Serializable {
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
index e5af472d90e25..53dfc5e9b2828 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ArrayType.scala
@@ -29,12 +29,14 @@ import org.apache.spark.sql.catalyst.util.StringConcat
  */
 @Stable
 object ArrayType extends AbstractDataType {
+
   /**
    * Construct a [[ArrayType]] object with the given element type. The `containsNull` is true.
    */
   def apply(elementType: DataType): ArrayType = ArrayType(elementType, containsNull = true)
 
-  override private[sql] def defaultConcreteType: DataType = ArrayType(NullType, containsNull = true)
+  override private[sql] def defaultConcreteType: DataType =
+    ArrayType(NullType, containsNull = true)
 
   override private[sql] def acceptsType(other: DataType): Boolean = {
     other.isInstanceOf[ArrayType]
@@ -44,18 +46,19 @@ object ArrayType extends AbstractDataType {
 }
 
 /**
- * The data type for collections of multiple values.
- * Internally these are represented as columns that contain a ``scala.collection.Seq``.
+ * The data type for collections of multiple values. Internally these are represented as columns
+ * that contain a ``scala.collection.Seq``.
  *
  * Please use `DataTypes.createArrayType()` to create a specific instance.
  *
- * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and
- * `containsNull: Boolean`.
- * The field of `elementType` is used to specify the type of array elements.
- * The field of `containsNull` is used to specify if the array can have `null` values.
+ * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and `containsNull:
+ * Boolean`. The field of `elementType` is used to specify the type of array elements. The field
+ * of `containsNull` is used to specify if the array can have `null` values.
  *
- * @param elementType The data type of values.
- * @param containsNull Indicates if the array can have `null` values
+ * @param elementType
+ *   The data type of values.
+ * @param containsNull
+ *   Indicates if the array can have `null` values
  *
  * @since 1.3.0
  */
@@ -82,8 +85,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
       ("containsNull" -> containsNull)
 
   /**
-   * The default size of a value of the ArrayType is the default size of the element type.
-   * We assume that there is only 1 element on average in an array. See SPARK-18853.
+   * The default size of a value of the ArrayType is the default size of the element type. We
+   * assume that there is only 1 element on average in an array. See SPARK-18853.
    */
   override def defaultSize: Int = 1 * elementType.defaultSize
 
@@ -97,8 +100,8 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
     ArrayType(elementType.asNullable, containsNull = true)
 
   /**
-   * Returns the same data type but set all nullability fields are true
-   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+   * Returns the same data type but set all nullability fields are true (`StructField.nullable`,
+   * `ArrayType.containsNull`, and `MapType.valueContainsNull`).
    *
    * @since 4.0.0
    */
@@ -107,4 +110,13 @@ case class ArrayType(elementType: DataType, containsNull: Boolean) extends DataT
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || elementType.existsRecursively(f)
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      f(this)
+    } else {
+      ArrayType(elementType.transformRecursively(f), containsNull)
+    }
+  }
 }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/BinaryType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
index c280f66f943aa..20bfd9bf5684f 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/BinaryType.scala
@@ -20,11 +20,12 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Stable
 
 /**
- * The data type representing `Array[Byte]` values.
- * Please use the singleton `DataTypes.BinaryType`.
+ * The data type representing `Array[Byte]` values. Please use the singleton
+ * `DataTypes.BinaryType`.
  */
 @Stable
-class BinaryType private() extends AtomicType {
+class BinaryType private () extends AtomicType {
+
   /**
    * The default size of a value of the BinaryType is 100 bytes.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/BooleanType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
index 836c41a996ac4..090c56eaf9af7 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/BooleanType.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class BooleanType private() extends AtomicType {
+class BooleanType private () extends AtomicType {
+
   /**
    * The default size of a value of the BooleanType is 1 byte.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ByteType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ByteType.scala
index 546ac02f2639a..4a27a00dacb8a 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ByteType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ByteType.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class ByteType private() extends IntegralType {
+class ByteType private () extends IntegralType {
+
   /**
    * The default size of a value of the ByteType is 1 byte.
    */
@@ -36,7 +37,6 @@ class ByteType private() extends IntegralType {
   private[spark] override def asNullable: ByteType = this
 }
 
-
 /**
  * @since 1.3.0
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
index d506a1521e183..f6b6256db0417 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/CalendarIntervalType.scala
@@ -21,19 +21,19 @@ import org.apache.spark.annotation.Stable
 
 /**
  * The data type representing calendar intervals. The calendar interval is stored internally in
- * three components:
- *   an integer value representing the number of `months` in this interval,
- *   an integer value representing the number of `days` in this interval,
- *   a long value representing the number of `microseconds` in this interval.
+ * three components: an integer value representing the number of `months` in this interval, an
+ * integer value representing the number of `days` in this interval, a long value representing the
+ * number of `microseconds` in this interval.
  *
  * Please use the singleton `DataTypes.CalendarIntervalType` to refer the type.
  *
- * @note Calendar intervals are not comparable.
+ * @note
+ *   Calendar intervals are not comparable.
  *
  * @since 1.5.0
  */
 @Stable
-class CalendarIntervalType private() extends DataType {
+class CalendarIntervalType private () extends DataType {
 
   override def defaultSize: Int = 16
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
index 12c7905f62d1a..12cfed5b58685 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DataType.scala
@@ -49,6 +49,7 @@ import org.apache.spark.util.SparkClassUtils
 @JsonSerialize(using = classOf[DataTypeJsonSerializer])
 @JsonDeserialize(using = classOf[DataTypeJsonDeserializer])
 abstract class DataType extends AbstractDataType {
+
   /**
    * The default size of a value of this data type, used internally for size estimation.
    */
@@ -57,7 +58,9 @@ abstract class DataType extends AbstractDataType {
   /** Name of the type used in JSON serialization. */
   def typeName: String = {
     this.getClass.getSimpleName
-      .stripSuffix("$").stripSuffix("Type").stripSuffix("UDT")
+      .stripSuffix("$")
+      .stripSuffix("Type")
+      .stripSuffix("UDT")
       .toLowerCase(Locale.ROOT)
   }
 
@@ -92,8 +95,8 @@ abstract class DataType extends AbstractDataType {
     }
 
   /**
-   * Returns the same data type but set all nullability fields are true
-   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+   * Returns the same data type but set all nullability fields are true (`StructField.nullable`,
+   * `ArrayType.containsNull`, and `MapType.valueContainsNull`).
    */
   private[spark] def asNullable: DataType
 
@@ -102,12 +105,18 @@ abstract class DataType extends AbstractDataType {
    */
   private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = f(this)
 
+  /**
+   * Recursively applies the provided partial function `f` to transform this DataType tree.
+   */
+  private[spark] def transformRecursively(f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) f(this) else this
+  }
+
   final override private[sql] def defaultConcreteType: DataType = this
 
   override private[sql] def acceptsType(other: DataType): Boolean = sameType(other)
 }
 
-
 /**
  * @since 1.3.0
  */
@@ -128,14 +137,18 @@ object DataType {
   }
 
   /**
-   * Parses data type from a string with schema. It calls `parser` for `schema`.
-   * If it fails, calls `fallbackParser`. If the fallback function fails too, combines error message
-   * from `parser` and `fallbackParser`.
+   * Parses data type from a string with schema. It calls `parser` for `schema`. If it fails,
+   * calls `fallbackParser`. If the fallback function fails too, combines error message from
+   * `parser` and `fallbackParser`.
    *
-   * @param schema The schema string to parse by `parser` or `fallbackParser`.
-   * @param parser The function that should be invoke firstly.
-   * @param fallbackParser The function that is called when `parser` fails.
-   * @return The data type parsed from the `schema` schema.
+   * @param schema
+   *   The schema string to parse by `parser` or `fallbackParser`.
+   * @param parser
+   *   The function that should be invoke firstly.
+   * @param fallbackParser
+   *   The function that is called when `parser` fails.
+   * @return
+   *   The data type parsed from the `schema` schema.
    */
   def parseTypeWithFallback(
       schema: String,
@@ -148,7 +161,8 @@ object DataType {
         try {
           fallbackParser(schema)
         } catch {
-          case NonFatal(_) =>
+          case NonFatal(suppressed) =>
+            e.addSuppressed(suppressed)
             if (e.isInstanceOf[SparkThrowable]) {
               throw e
             }
@@ -160,8 +174,20 @@ object DataType {
   def fromJson(json: String): DataType = parseDataType(parse(json))
 
   private val otherTypes = {
-    Seq(NullType, DateType, TimestampType, BinaryType, IntegerType, BooleanType, LongType,
-      DoubleType, FloatType, ShortType, ByteType, StringType, CalendarIntervalType,
+    Seq(
+      NullType,
+      DateType,
+      TimestampType,
+      BinaryType,
+      IntegerType,
+      BooleanType,
+      LongType,
+      DoubleType,
+      FloatType,
+      ShortType,
+      ByteType,
+      StringType,
+      CalendarIntervalType,
       DayTimeIntervalType(DAY),
       DayTimeIntervalType(DAY, HOUR),
       DayTimeIntervalType(DAY, MINUTE),
@@ -177,7 +203,8 @@ object DataType {
       YearMonthIntervalType(YEAR, MONTH),
       TimestampNTZType,
       VariantType)
-      .map(t => t.typeName -> t).toMap
+      .map(t => t.typeName -> t)
+      .toMap
   }
 
   /** Given the string representation of a type, return its DataType */
@@ -190,11 +217,12 @@ object DataType {
       // For backwards compatibility, previously the type name of NullType is "null"
       case "null" => NullType
       case "timestamp_ltz" => TimestampType
-      case other => otherTypes.getOrElse(
-        other,
-        throw new SparkIllegalArgumentException(
-          errorClass = "INVALID_JSON_DATA_TYPE",
-          messageParameters = Map("invalidType" -> name)))
+      case other =>
+        otherTypes.getOrElse(
+          other,
+          throw new SparkIllegalArgumentException(
+            errorClass = "INVALID_JSON_DATA_TYPE",
+            messageParameters = Map("invalidType" -> name)))
     }
   }
 
@@ -219,56 +247,55 @@ object DataType {
       }
 
     case JSortedObject(
-    ("containsNull", JBool(n)),
-    ("elementType", t: JValue),
-    ("type", JString("array"))) =>
+          ("containsNull", JBool(n)),
+          ("elementType", t: JValue),
+          ("type", JString("array"))) =>
       assertValidTypeForCollations(fieldPath, "array", collationsMap)
-      val elementType = parseDataType(t, fieldPath + ".element", collationsMap)
+      val elementType = parseDataType(t, appendFieldToPath(fieldPath, "element"), collationsMap)
       ArrayType(elementType, n)
 
     case JSortedObject(
-    ("keyType", k: JValue),
-    ("type", JString("map")),
-    ("valueContainsNull", JBool(n)),
-    ("valueType", v: JValue)) =>
+          ("keyType", k: JValue),
+          ("type", JString("map")),
+          ("valueContainsNull", JBool(n)),
+          ("valueType", v: JValue)) =>
       assertValidTypeForCollations(fieldPath, "map", collationsMap)
-      val keyType = parseDataType(k, fieldPath + ".key", collationsMap)
-      val valueType = parseDataType(v, fieldPath + ".value", collationsMap)
+      val keyType = parseDataType(k, appendFieldToPath(fieldPath, "key"), collationsMap)
+      val valueType = parseDataType(v, appendFieldToPath(fieldPath, "value"), collationsMap)
       MapType(keyType, valueType, n)
 
-    case JSortedObject(
-    ("fields", JArray(fields)),
-    ("type", JString("struct"))) =>
+    case JSortedObject(("fields", JArray(fields)), ("type", JString("struct"))) =>
       assertValidTypeForCollations(fieldPath, "struct", collationsMap)
       StructType(fields.map(parseStructField))
 
     // Scala/Java UDT
     case JSortedObject(
-    ("class", JString(udtClass)),
-    ("pyClass", _),
-    ("sqlType", _),
-    ("type", JString("udt"))) =>
+          ("class", JString(udtClass)),
+          ("pyClass", _),
+          ("sqlType", _),
+          ("type", JString("udt"))) =>
       SparkClassUtils.classForName[UserDefinedType[_]](udtClass).getConstructor().newInstance()
 
     // Python UDT
     case JSortedObject(
-    ("pyClass", JString(pyClass)),
-    ("serializedClass", JString(serialized)),
-    ("sqlType", v: JValue),
-    ("type", JString("udt"))) =>
-        new PythonUserDefinedType(parseDataType(v), pyClass, serialized)
-
-    case other => throw new SparkIllegalArgumentException(
-      errorClass = "INVALID_JSON_DATA_TYPE",
-      messageParameters = Map("invalidType" -> compact(render(other))))
+          ("pyClass", JString(pyClass)),
+          ("serializedClass", JString(serialized)),
+          ("sqlType", v: JValue),
+          ("type", JString("udt"))) =>
+      new PythonUserDefinedType(parseDataType(v), pyClass, serialized)
+
+    case other =>
+      throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_JSON_DATA_TYPE",
+        messageParameters = Map("invalidType" -> compact(render(other))))
   }
 
   private def parseStructField(json: JValue): StructField = json match {
     case JSortedObject(
-    ("metadata", JObject(metadataFields)),
-    ("name", JString(name)),
-    ("nullable", JBool(nullable)),
-    ("type", dataType: JValue)) =>
+          ("metadata", JObject(metadataFields)),
+          ("name", JString(name)),
+          ("nullable", JBool(nullable)),
+          ("type", dataType: JValue)) =>
       val collationsMap = getCollationsMap(metadataFields)
       val metadataWithoutCollations =
         JObject(metadataFields.filterNot(_._1 == COLLATIONS_METADATA_KEY))
@@ -279,18 +306,17 @@ object DataType {
         Metadata.fromJObject(metadataWithoutCollations))
     // Support reading schema when 'metadata' is missing.
     case JSortedObject(
-    ("name", JString(name)),
-    ("nullable", JBool(nullable)),
-    ("type", dataType: JValue)) =>
+          ("name", JString(name)),
+          ("nullable", JBool(nullable)),
+          ("type", dataType: JValue)) =>
       StructField(name, parseDataType(dataType), nullable)
     // Support reading schema when 'nullable' is missing.
-    case JSortedObject(
-    ("name", JString(name)),
-    ("type", dataType: JValue)) =>
+    case JSortedObject(("name", JString(name)), ("type", dataType: JValue)) =>
       StructField(name, parseDataType(dataType))
-    case other => throw new SparkIllegalArgumentException(
-      errorClass = "INVALID_JSON_DATA_TYPE",
-      messageParameters = Map("invalidType" -> compact(render(other))))
+    case other =>
+      throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_JSON_DATA_TYPE",
+        messageParameters = Map("invalidType" -> compact(render(other))))
   }
 
   private def assertValidTypeForCollations(
@@ -304,6 +330,13 @@ object DataType {
     }
   }
 
+  /**
+   * Appends a field name to a given path, using a dot separator if the path is not empty.
+   */
+  private def appendFieldToPath(basePath: String, fieldName: String): String = {
+    if (basePath.isEmpty) fieldName else s"$basePath.$fieldName"
+  }
+
   /**
    * Returns a map of field path to collation name.
    */
@@ -311,13 +344,12 @@ object DataType {
     val collationsJsonOpt = metadataFields.find(_._1 == COLLATIONS_METADATA_KEY).map(_._2)
     collationsJsonOpt match {
       case Some(JObject(fields)) =>
-        fields.collect {
-          case (fieldPath, JString(collation)) =>
-            collation.split("\\.", 2) match {
-              case Array(provider: String, collationName: String) =>
-                CollationFactory.assertValidProvider(provider)
-                fieldPath -> collationName
-            }
+        fields.collect { case (fieldPath, JString(collation)) =>
+          collation.split("\\.", 2) match {
+            case Array(provider: String, collationName: String) =>
+              CollationFactory.assertValidProvider(provider)
+              fieldPath -> collationName
+          }
         }.toMap
 
       case _ => Map.empty
@@ -348,15 +380,15 @@ object DataType {
    * Compares two types, ignoring compatible nullability of ArrayType, MapType, StructType.
    *
    * Compatible nullability is defined as follows:
-   *   - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to`
-   *   if and only if `to.containsNull` is true, or both of `from.containsNull` and
-   *   `to.containsNull` are false.
-   *   - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to`
-   *   if and only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and
-   *   `to.valueContainsNull` are false.
-   *   - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to`
-   *   if and only if for all every pair of fields, `to.nullable` is true, or both
-   *   of `fromField.nullable` and `toField.nullable` are false.
+   *   - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to` if and
+   *     only if `to.containsNull` is true, or both of `from.containsNull` and `to.containsNull`
+   *     are false.
+   *   - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to` if and
+   *     only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and
+   *     `to.valueContainsNull` are false.
+   *   - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to` if and
+   *     only if for all every pair of fields, `to.nullable` is true, or both of
+   *     `fromField.nullable` and `toField.nullable` are false.
    */
   private[sql] def equalsIgnoreCompatibleNullability(from: DataType, to: DataType): Boolean = {
     equalsIgnoreCompatibleNullability(from, to, ignoreName = false)
@@ -367,15 +399,15 @@ object DataType {
    * also the field name. It compares based on the position.
    *
    * Compatible nullability is defined as follows:
-   *   - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to`
-   *   if and only if `to.containsNull` is true, or both of `from.containsNull` and
-   *   `to.containsNull` are false.
-   *   - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to`
-   *   if and only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and
-   *   `to.valueContainsNull` are false.
-   *   - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to`
-   *   if and only if for all every pair of fields, `to.nullable` is true, or both
-   *   of `fromField.nullable` and `toField.nullable` are false.
+   *   - If `from` and `to` are ArrayTypes, `from` has a compatible nullability with `to` if and
+   *     only if `to.containsNull` is true, or both of `from.containsNull` and `to.containsNull`
+   *     are false.
+   *   - If `from` and `to` are MapTypes, `from` has a compatible nullability with `to` if and
+   *     only if `to.valueContainsNull` is true, or both of `from.valueContainsNull` and
+   *     `to.valueContainsNull` are false.
+   *   - If `from` and `to` are StructTypes, `from` has a compatible nullability with `to` if and
+   *     only if for all every pair of fields, `to.nullable` is true, or both of
+   *     `fromField.nullable` and `toField.nullable` are false.
    */
   private[sql] def equalsIgnoreNameAndCompatibleNullability(
       from: DataType,
@@ -393,16 +425,16 @@ object DataType {
 
       case (MapType(fromKey, fromValue, fn), MapType(toKey, toValue, tn)) =>
         (tn || !fn) &&
-          equalsIgnoreCompatibleNullability(fromKey, toKey, ignoreName) &&
-          equalsIgnoreCompatibleNullability(fromValue, toValue, ignoreName)
+        equalsIgnoreCompatibleNullability(fromKey, toKey, ignoreName) &&
+        equalsIgnoreCompatibleNullability(fromValue, toValue, ignoreName)
 
       case (StructType(fromFields), StructType(toFields)) =>
         fromFields.length == toFields.length &&
-          fromFields.zip(toFields).forall { case (fromField, toField) =>
-            (ignoreName || fromField.name == toField.name) &&
-              (toField.nullable || !fromField.nullable) &&
-              equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType, ignoreName)
-          }
+        fromFields.zip(toFields).forall { case (fromField, toField) =>
+          (ignoreName || fromField.name == toField.name) &&
+          (toField.nullable || !fromField.nullable) &&
+          equalsIgnoreCompatibleNullability(fromField.dataType, toField.dataType, ignoreName)
+        }
 
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
@@ -412,42 +444,21 @@ object DataType {
    * Check if `from` is equal to `to` type except for collations, which are checked to be
    * compatible so that data of type `from` can be interpreted as of type `to`.
    */
-  private[sql] def equalsIgnoreCompatibleCollation(
-      from: DataType,
-      to: DataType): Boolean = {
+  private[sql] def equalsIgnoreCompatibleCollation(from: DataType, to: DataType): Boolean = {
     (from, to) match {
       // String types with possibly different collations are compatible.
       case (_: StringType, _: StringType) => true
 
-      case (ArrayType(fromElement, fromContainsNull), ArrayType(toElement, toContainsNull)) =>
-        (fromContainsNull == toContainsNull) &&
-          equalsIgnoreCompatibleCollation(fromElement, toElement)
-
-      case (MapType(fromKey, fromValue, fromContainsNull),
-          MapType(toKey, toValue, toContainsNull)) =>
-        fromContainsNull == toContainsNull &&
-          // Map keys cannot change collation.
-          fromKey == toKey &&
-          equalsIgnoreCompatibleCollation(fromValue, toValue)
-
-      case (StructType(fromFields), StructType(toFields)) =>
-        fromFields.length == toFields.length &&
-          fromFields.zip(toFields).forall { case (fromField, toField) =>
-            fromField.name == toField.name &&
-              fromField.nullable == toField.nullable &&
-              fromField.metadata == toField.metadata &&
-              equalsIgnoreCompatibleCollation(fromField.dataType, toField.dataType)
-          }
-
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
   }
 
   /**
-   * Returns true if the two data types share the same "shape", i.e. the types
-   * are the same, but the field names don't need to be the same.
+   * Returns true if the two data types share the same "shape", i.e. the types are the same, but
+   * the field names don't need to be the same.
    *
-   * @param ignoreNullability whether to ignore nullability when comparing the types
+   * @param ignoreNullability
+   *   whether to ignore nullability when comparing the types
    */
   def equalsStructurally(
       from: DataType,
@@ -456,20 +467,21 @@ object DataType {
     (from, to) match {
       case (left: ArrayType, right: ArrayType) =>
         equalsStructurally(left.elementType, right.elementType, ignoreNullability) &&
-          (ignoreNullability || left.containsNull == right.containsNull)
+        (ignoreNullability || left.containsNull == right.containsNull)
 
       case (left: MapType, right: MapType) =>
         equalsStructurally(left.keyType, right.keyType, ignoreNullability) &&
-          equalsStructurally(left.valueType, right.valueType, ignoreNullability) &&
-          (ignoreNullability || left.valueContainsNull == right.valueContainsNull)
+        equalsStructurally(left.valueType, right.valueType, ignoreNullability) &&
+        (ignoreNullability || left.valueContainsNull == right.valueContainsNull)
 
       case (StructType(fromFields), StructType(toFields)) =>
         fromFields.length == toFields.length &&
-          fromFields.zip(toFields)
-            .forall { case (l, r) =>
-              equalsStructurally(l.dataType, r.dataType, ignoreNullability) &&
-                (ignoreNullability || l.nullable == r.nullable)
-            }
+        fromFields
+          .zip(toFields)
+          .forall { case (l, r) =>
+            equalsStructurally(l.dataType, r.dataType, ignoreNullability) &&
+            (ignoreNullability || l.nullable == r.nullable)
+          }
 
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
@@ -488,14 +500,15 @@ object DataType {
 
       case (left: MapType, right: MapType) =>
         equalsStructurallyByName(left.keyType, right.keyType, resolver) &&
-          equalsStructurallyByName(left.valueType, right.valueType, resolver)
+        equalsStructurallyByName(left.valueType, right.valueType, resolver)
 
       case (StructType(fromFields), StructType(toFields)) =>
         fromFields.length == toFields.length &&
-          fromFields.zip(toFields)
-            .forall { case (l, r) =>
-              resolver(l.name, r.name) && equalsStructurallyByName(l.dataType, r.dataType, resolver)
-            }
+        fromFields
+          .zip(toFields)
+          .forall { case (l, r) =>
+            resolver(l.name, r.name) && equalsStructurallyByName(l.dataType, r.dataType, resolver)
+          }
 
       case _ => true
     }
@@ -510,12 +523,12 @@ object DataType {
         equalsIgnoreNullability(leftElementType, rightElementType)
       case (MapType(leftKeyType, leftValueType, _), MapType(rightKeyType, rightValueType, _)) =>
         equalsIgnoreNullability(leftKeyType, rightKeyType) &&
-          equalsIgnoreNullability(leftValueType, rightValueType)
+        equalsIgnoreNullability(leftValueType, rightValueType)
       case (StructType(leftFields), StructType(rightFields)) =>
         leftFields.length == rightFields.length &&
-          leftFields.zip(rightFields).forall { case (l, r) =>
-            l.name == r.name && equalsIgnoreNullability(l.dataType, r.dataType)
-          }
+        leftFields.zip(rightFields).forall { case (l, r) =>
+          l.name == r.name && equalsIgnoreNullability(l.dataType, r.dataType)
+        }
       case (l, r) => l == r
     }
   }
@@ -531,14 +544,14 @@ object DataType {
 
       case (MapType(fromKey, fromValue, _), MapType(toKey, toValue, _)) =>
         equalsIgnoreCaseAndNullability(fromKey, toKey) &&
-          equalsIgnoreCaseAndNullability(fromValue, toValue)
+        equalsIgnoreCaseAndNullability(fromValue, toValue)
 
       case (StructType(fromFields), StructType(toFields)) =>
         fromFields.length == toFields.length &&
-          fromFields.zip(toFields).forall { case (l, r) =>
-            l.name.equalsIgnoreCase(r.name) &&
-              equalsIgnoreCaseAndNullability(l.dataType, r.dataType)
-          }
+        fromFields.zip(toFields).forall { case (l, r) =>
+          l.name.equalsIgnoreCase(r.name) &&
+          equalsIgnoreCaseAndNullability(l.dataType, r.dataType)
+        }
 
       case (fromDataType, toDataType) => fromDataType == toDataType
     }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DateType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DateType.scala
index d37ebbcdad727..402c4c0d95298 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DateType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DateType.scala
@@ -20,14 +20,15 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Stable
 
 /**
- * The date type represents a valid date in the proleptic Gregorian calendar.
- * Valid range is [0001-01-01, 9999-12-31].
+ * The date type represents a valid date in the proleptic Gregorian calendar. Valid range is
+ * [0001-01-01, 9999-12-31].
  *
  * Please use the singleton `DataTypes.DateType` to refer the type.
  * @since 1.3.0
  */
 @Stable
-class DateType private() extends DatetimeType {
+class DateType private () extends DatetimeType {
+
   /**
    * The default size of a value of the DateType is 4 bytes.
    */
@@ -37,10 +38,10 @@ class DateType private() extends DatetimeType {
 }
 
 /**
- * The companion case object and the DateType class is separated so the companion object
- * also subclasses the class. Otherwise, the companion object would be of type "DateType$"
- * in byte code. The DateType class is defined with a private constructor so its companion
- * object is the only possible instantiation.
+ * The companion case object and the DateType class is separated so the companion object also
+ * subclasses the class. Otherwise, the companion object would be of type "DateType$" in byte
+ * code. The DateType class is defined with a private constructor so its companion object is the
+ * only possible instantiation.
  *
  * @since 1.3.0
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
index a1d014fa51f1c..90d6d7c29a6ba 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DayTimeIntervalType.scala
@@ -22,8 +22,8 @@ import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.sql.types.DayTimeIntervalType.fieldToString
 
 /**
- * The type represents day-time intervals of the SQL standard. A day-time interval is made up
- * of a contiguous subset of the following fields:
+ * The type represents day-time intervals of the SQL standard. A day-time interval is made up of a
+ * contiguous subset of the following fields:
  *   - SECOND, seconds within minutes and possibly fractions of a second [0..59.999999],
  *   - MINUTE, minutes within hours [0..59],
  *   - HOUR, hours within days [0..23],
@@ -31,18 +31,21 @@ import org.apache.spark.sql.types.DayTimeIntervalType.fieldToString
  *
  * `DayTimeIntervalType` represents positive as well as negative day-time intervals.
  *
- * @param startField The leftmost field which the type comprises of. Valid values:
- *                   0 (DAY), 1 (HOUR), 2 (MINUTE), 3 (SECOND).
- * @param endField The rightmost field which the type comprises of. Valid values:
- *                 0 (DAY), 1 (HOUR), 2 (MINUTE), 3 (SECOND).
+ * @param startField
+ *   The leftmost field which the type comprises of. Valid values: 0 (DAY), 1 (HOUR), 2 (MINUTE),
+ *   3 (SECOND).
+ * @param endField
+ *   The rightmost field which the type comprises of. Valid values: 0 (DAY), 1 (HOUR), 2 (MINUTE),
+ *   3 (SECOND).
  *
  * @since 3.2.0
  */
 @Unstable
 case class DayTimeIntervalType(startField: Byte, endField: Byte) extends AnsiIntervalType {
+
   /**
-   * The day-time interval type has constant precision. A value of the type always occupies 8 bytes.
-   * The DAY field is constrained by the upper bound 106751991 to fit to `Long`.
+   * The day-time interval type has constant precision. A value of the type always occupies 8
+   * bytes. The DAY field is constrained by the upper bound 106751991 to fit to `Long`.
    */
   override def defaultSize: Int = 8
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
index 6de8570b1422f..bd94c386ab533 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/Decimal.scala
@@ -31,9 +31,9 @@ import org.apache.spark.unsafe.types.UTF8String
  * A mutable implementation of BigDecimal that can hold a Long if values are small enough.
  *
  * The semantics of the fields are as follows:
- * - _precision and _scale represent the SQL precision and scale we are looking for
- * - If decimalVal is set, it represents the whole decimal value
- * - Otherwise, the decimal value is longVal / (10 ** _scale)
+ *   - _precision and _scale represent the SQL precision and scale we are looking for
+ *   - If decimalVal is set, it represents the whole decimal value
+ *   - Otherwise, the decimal value is longVal / (10 ** _scale)
  *
  * Note, for values between -1.0 and 1.0, precision digits are only counted after dot.
  */
@@ -88,22 +88,22 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   }
 
   /**
-   * Set this Decimal to the given unscaled Long, with a given precision and scale,
-   * and return it, or return null if it cannot be set due to overflow.
+   * Set this Decimal to the given unscaled Long, with a given precision and scale, and return it,
+   * or return null if it cannot be set due to overflow.
    */
   def setOrNull(unscaled: Long, precision: Int, scale: Int): Decimal = {
     DecimalType.checkNegativeScale(scale)
     if (unscaled <= -POW_10(MAX_LONG_DIGITS) || unscaled >= POW_10(MAX_LONG_DIGITS)) {
       // We can't represent this compactly as a long without risking overflow
       if (precision < 19) {
-        return null  // Requested precision is too low to represent this value
+        return null // Requested precision is too low to represent this value
       }
       this.decimalVal = BigDecimal(unscaled, scale)
       this.longVal = 0L
     } else {
       val p = POW_10(math.min(precision, MAX_LONG_DIGITS))
       if (unscaled <= -p || unscaled >= p) {
-        return null  // Requested precision is too low to represent this value
+        return null // Requested precision is too low to represent this value
       }
       this.decimalVal = null
       this.longVal = unscaled
@@ -126,7 +126,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
           "roundedValue" -> decimalVal.toString,
           "originalValue" -> decimal.toString,
           "precision" -> precision.toString,
-          "scale" -> scale.toString), Array.empty)
+          "scale" -> scale.toString),
+        Array.empty)
     }
     this.longVal = 0L
     this._precision = precision
@@ -160,8 +161,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   }
 
   /**
-   * If the value is not in the range of long, convert it to BigDecimal and
-   * the precision and scale are based on the converted value.
+   * If the value is not in the range of long, convert it to BigDecimal and the precision and
+   * scale are based on the converted value.
    *
    * This code avoids BigDecimal object allocation as possible to improve runtime efficiency
    */
@@ -262,37 +263,47 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   def toByte: Byte = toLong.toByte
 
   /**
-   * @return the Byte value that is equal to the rounded decimal.
-   * @throws ArithmeticException if the decimal is too big to fit in Byte type.
+   * @return
+   *   the Byte value that is equal to the rounded decimal.
+   * @throws ArithmeticException
+   *   if the decimal is too big to fit in Byte type.
    */
   private[sql] def roundToByte(): Byte =
-    roundToNumeric[Byte](ByteType, Byte.MaxValue, Byte.MinValue) (_.toByte) (_.toByte)
+    roundToNumeric[Byte](ByteType, Byte.MaxValue, Byte.MinValue)(_.toByte)(_.toByte)
 
   /**
-   * @return the Short value that is equal to the rounded decimal.
-   * @throws ArithmeticException if the decimal is too big to fit in Short type.
+   * @return
+   *   the Short value that is equal to the rounded decimal.
+   * @throws ArithmeticException
+   *   if the decimal is too big to fit in Short type.
    */
   private[sql] def roundToShort(): Short =
-    roundToNumeric[Short](ShortType, Short.MaxValue, Short.MinValue) (_.toShort) (_.toShort)
+    roundToNumeric[Short](ShortType, Short.MaxValue, Short.MinValue)(_.toShort)(_.toShort)
 
   /**
-   * @return the Int value that is equal to the rounded decimal.
-   * @throws ArithmeticException if the decimal too big to fit in Int type.
+   * @return
+   *   the Int value that is equal to the rounded decimal.
+   * @throws ArithmeticException
+   *   if the decimal too big to fit in Int type.
    */
   private[sql] def roundToInt(): Int =
-    roundToNumeric[Int](IntegerType, Int.MaxValue, Int.MinValue) (_.toInt) (_.toInt)
+    roundToNumeric[Int](IntegerType, Int.MaxValue, Int.MinValue)(_.toInt)(_.toInt)
 
   private def toSqlValue: String = this.toString + "BD"
 
-  private def roundToNumeric[T <: AnyVal](integralType: IntegralType, maxValue: Int, minValue: Int)
-      (f1: Long => T) (f2: Double => T): T = {
+  private def roundToNumeric[T <: AnyVal](
+      integralType: IntegralType,
+      maxValue: Int,
+      minValue: Int)(f1: Long => T)(f2: Double => T): T = {
     if (decimalVal.eq(null)) {
       val numericVal = f1(actualLongVal)
       if (actualLongVal == numericVal) {
         numericVal
       } else {
         throw DataTypeErrors.castingCauseOverflowError(
-          toSqlValue, DecimalType(this.precision, this.scale), integralType)
+          toSqlValue,
+          DecimalType(this.precision, this.scale),
+          integralType)
       }
     } else {
       val doubleVal = decimalVal.toDouble
@@ -300,14 +311,18 @@ final class Decimal extends Ordered[Decimal] with Serializable {
         f2(doubleVal)
       } else {
         throw DataTypeErrors.castingCauseOverflowError(
-          toSqlValue, DecimalType(this.precision, this.scale), integralType)
+          toSqlValue,
+          DecimalType(this.precision, this.scale),
+          integralType)
       }
     }
   }
 
   /**
-   * @return the Long value that is equal to the rounded decimal.
-   * @throws ArithmeticException if the decimal too big to fit in Long type.
+   * @return
+   *   the Long value that is equal to the rounded decimal.
+   * @throws ArithmeticException
+   *   if the decimal too big to fit in Long type.
    */
   private[sql] def roundToLong(): Long = {
     if (decimalVal.eq(null)) {
@@ -321,7 +336,9 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       } catch {
         case _: ArithmeticException =>
           throw DataTypeErrors.castingCauseOverflowError(
-            toSqlValue, DecimalType(this.precision, this.scale), LongType)
+            toSqlValue,
+            DecimalType(this.precision, this.scale),
+            LongType)
       }
     }
   }
@@ -329,7 +346,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   /**
    * Update precision and scale while keeping our value the same, and return true if successful.
    *
-   * @return true if successful, false if overflow would occur
+   * @return
+   *   true if successful, false if overflow would occur
    */
   def changePrecision(precision: Int, scale: Int): Boolean = {
     changePrecision(precision, scale, ROUND_HALF_UP)
@@ -338,8 +356,9 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   /**
    * Create new `Decimal` with given precision and scale.
    *
-   * @return a non-null `Decimal` value if successful. Otherwise, if `nullOnOverflow` is true, null
-   *         is returned; if `nullOnOverflow` is false, an `ArithmeticException` is thrown.
+   * @return
+   *   a non-null `Decimal` value if successful. Otherwise, if `nullOnOverflow` is true, null is
+   *   returned; if `nullOnOverflow` is false, an `ArithmeticException` is thrown.
    */
   private[sql] def toPrecision(
       precision: Int,
@@ -354,8 +373,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
       if (nullOnOverflow) {
         null
       } else {
-        throw DataTypeErrors.cannotChangeDecimalPrecisionError(
-          this, precision, scale, context)
+        throw DataTypeErrors.cannotChangeDecimalPrecisionError(this, precision, scale, context)
       }
     }
   }
@@ -363,7 +381,8 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   /**
    * Update precision and scale while keeping our value the same, and return true if successful.
    *
-   * @return true if successful, false if overflow would occur
+   * @return
+   *   true if successful, false if overflow would occur
    */
   private[sql] def changePrecision(
       precision: Int,
@@ -482,7 +501,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   // ------------------------------------------------------------------------
   // e1 + e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
   // e1 - e2      max(s1, s2) + max(p1-s1, p2-s2) + 1     max(s1, s2)
-  def + (that: Decimal): Decimal = {
+  def +(that: Decimal): Decimal = {
     if (decimalVal.eq(null) && that.decimalVal.eq(null) && scale == that.scale) {
       Decimal(longVal + that.longVal, Math.max(precision, that.precision) + 1, scale)
     } else {
@@ -490,7 +509,7 @@ final class Decimal extends Ordered[Decimal] with Serializable {
     }
   }
 
-  def - (that: Decimal): Decimal = {
+  def -(that: Decimal): Decimal = {
     if (decimalVal.eq(null) && that.decimalVal.eq(null) && scale == that.scale) {
       Decimal(longVal - that.longVal, Math.max(precision, that.precision) + 1, scale)
     } else {
@@ -499,14 +518,19 @@ final class Decimal extends Ordered[Decimal] with Serializable {
   }
 
   // TypeCoercion will take care of the precision, scale of result
-  def * (that: Decimal): Decimal =
+  def *(that: Decimal): Decimal =
     Decimal(toJavaBigDecimal.multiply(that.toJavaBigDecimal, MATH_CONTEXT))
 
-  def / (that: Decimal): Decimal =
-    if (that.isZero) null else Decimal(toJavaBigDecimal.divide(that.toJavaBigDecimal,
-      DecimalType.MAX_SCALE + 1, MATH_CONTEXT.getRoundingMode))
+  def /(that: Decimal): Decimal =
+    if (that.isZero) {
+      null
+    } else {
+      Decimal(
+        toJavaBigDecimal
+          .divide(that.toJavaBigDecimal, DecimalType.MAX_SCALE + 1, MATH_CONTEXT.getRoundingMode))
+    }
 
-  def % (that: Decimal): Decimal =
+  def %(that: Decimal): Decimal =
     if (that.isZero) null
     else Decimal(toJavaBigDecimal.remainder(that.toJavaBigDecimal, MATH_CONTEXT))
 
@@ -526,12 +550,14 @@ final class Decimal extends Ordered[Decimal] with Serializable {
 
   def abs: Decimal = if (this < Decimal.ZERO) this.unary_- else this
 
-  def floor: Decimal = if (scale == 0) this else {
+  def floor: Decimal = if (scale == 0) this
+  else {
     val newPrecision = DecimalType.bounded(precision - scale + 1, 0).precision
     toPrecision(newPrecision, 0, ROUND_FLOOR, nullOnOverflow = false)
   }
 
-  def ceil: Decimal = if (scale == 0) this else {
+  def ceil: Decimal = if (scale == 0) this
+  else {
     val newPrecision = DecimalType.bounded(precision - scale + 1, 0).precision
     toPrecision(newPrecision, 0, ROUND_CEILING, nullOnOverflow = false)
   }
@@ -612,7 +638,7 @@ object Decimal {
       // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow.
       // For example: Decimal("6.0790316E+25569151")
       if (numDigitsInIntegralPart(bigDecimal) > DecimalType.MAX_PRECISION &&
-          !SqlApiConf.get.allowNegativeScaleOfDecimalEnabled) {
+        !SqlApiConf.get.allowNegativeScaleOfDecimalEnabled) {
         null
       } else {
         Decimal(bigDecimal)
@@ -632,7 +658,7 @@ object Decimal {
       // We fast fail because constructing a very large JavaBigDecimal to Decimal is very slow.
       // For example: Decimal("6.0790316E+25569151")
       if (numDigitsInIntegralPart(bigDecimal) > DecimalType.MAX_PRECISION &&
-          !SqlApiConf.get.allowNegativeScaleOfDecimalEnabled) {
+        !SqlApiConf.get.allowNegativeScaleOfDecimalEnabled) {
         throw DataTypeErrors.outOfDecimalTypeRangeError(str)
       } else {
         Decimal(bigDecimal)
@@ -657,16 +683,18 @@ object Decimal {
 
   // Max precision of a decimal value stored in `numBytes` bytes
   def maxPrecisionForBytes(numBytes: Int): Int = {
-    Math.round(                               // convert double to long
-      Math.floor(Math.log10(                  // number of base-10 digits
-        Math.pow(2, 8 * numBytes - 1) - 1)))  // max value stored in numBytes
+    Math
+      .round( // convert double to long
+        Math.floor(Math.log10( // number of base-10 digits
+          Math.pow(2, 8 * numBytes - 1) - 1))
+      ) // max value stored in numBytes
       .asInstanceOf[Int]
   }
 
   // Returns the minimum number of bytes needed to store a decimal with a given `precision`.
   lazy val minBytesForPrecision = Array.tabulate[Int](39)(computeMinBytesForPrecision)
 
-  private def computeMinBytesForPrecision(precision : Int) : Int = {
+  private def computeMinBytesForPrecision(precision: Int): Int = {
     var numBytes = 1
     while (math.pow(2.0, 8 * numBytes - 1) < math.pow(10.0, precision)) {
       numBytes += 1
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DecimalType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
index 9de34d0b3bc16..bff483cefda91 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DecimalType.scala
@@ -26,9 +26,8 @@ import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.sql.internal.SqlApiConf
 
 /**
- * The data type representing `java.math.BigDecimal` values.
- * A Decimal that must have fixed precision (the maximum number of digits) and scale (the number
- * of digits on right side of dot).
+ * The data type representing `java.math.BigDecimal` values. A Decimal that must have fixed
+ * precision (the maximum number of digits) and scale (the number of digits on right side of dot).
  *
  * The precision can be up to 38, scale can also be up to 38 (less or equal to precision).
  *
@@ -49,7 +48,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
 
   if (precision > DecimalType.MAX_PRECISION) {
     throw DataTypeErrors.decimalPrecisionExceedsMaxPrecisionError(
-      precision, DecimalType.MAX_PRECISION)
+      precision,
+      DecimalType.MAX_PRECISION)
   }
 
   // default constructor for Java
@@ -63,8 +63,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   override def sql: String = typeName.toUpperCase(Locale.ROOT)
 
   /**
-   * Returns whether this DecimalType is wider than `other`. If yes, it means `other`
-   * can be casted into `this` safely without losing any precision or range.
+   * Returns whether this DecimalType is wider than `other`. If yes, it means `other` can be
+   * casted into `this` safely without losing any precision or range.
    */
   private[sql] def isWiderThan(other: DataType): Boolean = isWiderThanInternal(other)
 
@@ -78,8 +78,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   }
 
   /**
-   * Returns whether this DecimalType is tighter than `other`. If yes, it means `this`
-   * can be casted into `other` safely without losing any precision or range.
+   * Returns whether this DecimalType is tighter than `other`. If yes, it means `this` can be
+   * casted into `other` safely without losing any precision or range.
    */
   private[sql] def isTighterThan(other: DataType): Boolean = other match {
     case dt: DecimalType =>
@@ -94,8 +94,8 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   }
 
   /**
-   * The default size of a value of the DecimalType is 8 bytes when precision is at most 18,
-   * and 16 bytes otherwise.
+   * The default size of a value of the DecimalType is 8 bytes when precision is at most 18, and
+   * 16 bytes otherwise.
    */
   override def defaultSize: Int = if (precision <= Decimal.MAX_LONG_DIGITS) 8 else 16
 
@@ -104,7 +104,6 @@ case class DecimalType(precision: Int, scale: Int) extends FractionalType {
   private[spark] override def asNullable: DecimalType = this
 }
 
-
 /**
  * Extra factory methods and pattern matchers for Decimals.
  *
@@ -167,10 +166,11 @@ object DecimalType extends AbstractDataType {
   /**
    * Scale adjustment implementation is based on Hive's one, which is itself inspired to
    * SQLServer's one. In particular, when a result precision is greater than
-   * {@link #MAX_PRECISION}, the corresponding scale is reduced to prevent the integral part of a
+   * {@link #MAX_PRECISION} , the corresponding scale is reduced to prevent the integral part of a
    * result from being truncated.
    *
-   * This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to true.
+   * This method is used only when `spark.sql.decimalOperations.allowPrecisionLoss` is set to
+   * true.
    */
   private[sql] def adjustPrecisionScale(precision: Int, scale: Int): DecimalType = {
     // Assumptions:
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/DoubleType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
index bc0ed725cf266..873f0c237c6c4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/DoubleType.scala
@@ -27,7 +27,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class DoubleType private() extends FractionalType {
+class DoubleType private () extends FractionalType {
+
   /**
    * The default size of a value of the DoubleType is 8 bytes.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/FloatType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/FloatType.scala
index 8b54f830d48a3..df4b03cd42bd4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/FloatType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/FloatType.scala
@@ -27,7 +27,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class FloatType private() extends FractionalType {
+class FloatType private () extends FractionalType {
+
   /**
    * The default size of a value of the FloatType is 4 bytes.
    */
@@ -36,7 +37,6 @@ class FloatType private() extends FractionalType {
   private[spark] override def asNullable: FloatType = this
 }
 
-
 /**
  * @since 1.3.0
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/IntegerType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
index b26a555c9b572..dc4727cb1215b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/IntegerType.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class IntegerType private() extends IntegralType {
+class IntegerType private () extends IntegralType {
+
   /**
    * The default size of a value of the IntegerType is 4 bytes.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/LongType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/LongType.scala
index 87ebacfe9ce88..f65c4c70acd27 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/LongType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/LongType.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class LongType private() extends IntegralType {
+class LongType private () extends IntegralType {
+
   /**
    * The default size of a value of the LongType is 8 bytes.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
index dba870466fc1c..de656c13ca4bf 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/MapType.scala
@@ -28,15 +28,16 @@ import org.apache.spark.sql.catalyst.util.StringConcat
  *
  * Please use `DataTypes.createMapType()` to create a specific instance.
  *
- * @param keyType The data type of map keys.
- * @param valueType The data type of map values.
- * @param valueContainsNull Indicates if map values have `null` values.
+ * @param keyType
+ *   The data type of map keys.
+ * @param valueType
+ *   The data type of map values.
+ * @param valueContainsNull
+ *   Indicates if map values have `null` values.
  */
 @Stable
-case class MapType(
-  keyType: DataType,
-  valueType: DataType,
-  valueContainsNull: Boolean) extends DataType {
+case class MapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean)
+    extends DataType {
 
   /** No-arg constructor for kryo. */
   def this() = this(null, null, false)
@@ -48,8 +49,9 @@ case class MapType(
     if (maxDepth > 0) {
       stringConcat.append(s"$prefix-- key: ${keyType.typeName}\n")
       DataType.buildFormattedString(keyType, s"$prefix    |", stringConcat, maxDepth)
-      stringConcat.append(s"$prefix-- value: ${valueType.typeName} " +
-        s"(valueContainsNull = $valueContainsNull)\n")
+      stringConcat.append(
+        s"$prefix-- value: ${valueType.typeName} " +
+          s"(valueContainsNull = $valueContainsNull)\n")
       DataType.buildFormattedString(valueType, s"$prefix    |", stringConcat, maxDepth)
     }
   }
@@ -61,9 +63,9 @@ case class MapType(
       ("valueContainsNull" -> valueContainsNull)
 
   /**
-   * The default size of a value of the MapType is
-   * (the default size of the key type + the default size of the value type).
-   * We assume that there is only 1 element on average in a map. See SPARK-18853.
+   * The default size of a value of the MapType is (the default size of the key type + the default
+   * size of the value type). We assume that there is only 1 element on average in a map. See
+   * SPARK-18853.
    */
   override def defaultSize: Int = 1 * (keyType.defaultSize + valueType.defaultSize)
 
@@ -77,8 +79,8 @@ case class MapType(
     MapType(keyType.asNullable, valueType.asNullable, valueContainsNull = true)
 
   /**
-   * Returns the same data type but set all nullability fields are true
-   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+   * Returns the same data type but set all nullability fields are true (`StructField.nullable`,
+   * `ArrayType.containsNull`, and `MapType.valueContainsNull`).
    *
    * @since 4.0.0
    */
@@ -87,6 +89,18 @@ case class MapType(
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || keyType.existsRecursively(f) || valueType.existsRecursively(f)
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      f(this)
+    } else {
+      MapType(
+        keyType.transformRecursively(f),
+        valueType.transformRecursively(f),
+        valueContainsNull)
+    }
+  }
 }
 
 /**
@@ -104,8 +118,8 @@ object MapType extends AbstractDataType {
   override private[sql] def simpleString: String = "map"
 
   /**
-   * Construct a [[MapType]] object with the given key type and value type.
-   * The `valueContainsNull` is true.
+   * Construct a [[MapType]] object with the given key type and value type. The
+   * `valueContainsNull` is true.
    */
   def apply(keyType: DataType, valueType: DataType): MapType =
     MapType(keyType: DataType, valueType: DataType, valueContainsNull = true)
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
index 70e03905d4b05..05f91b5ba2313 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/Metadata.scala
@@ -26,7 +26,6 @@ import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.errors.DataTypeErrors
 import org.apache.spark.util.ArrayImplicits._
 
-
 /**
  * Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
  * Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
@@ -35,13 +34,14 @@ import org.apache.spark.util.ArrayImplicits._
  * The default constructor is private. User should use either [[MetadataBuilder]] or
  * `Metadata.fromJson()` to create Metadata instances.
  *
- * @param map an immutable map that stores the data
+ * @param map
+ *   an immutable map that stores the data
  *
  * @since 1.3.0
  */
 @Stable
 sealed class Metadata private[types] (private[types] val map: Map[String, Any])
-  extends Serializable {
+    extends Serializable {
 
   /** No-arg constructor for kryo. */
   protected def this() = this(null)
@@ -173,7 +173,8 @@ object Metadata {
               builder.putStringArray(key, value.asInstanceOf[List[JString]].map(_.s).toArray)
             case _: JObject =>
               builder.putMetadataArray(
-                key, value.asInstanceOf[List[JObject]].map(fromJObject).toArray)
+                key,
+                value.asInstanceOf[List[JObject]].map(fromJObject).toArray)
             case other =>
               throw DataTypeErrors.unsupportedArrayTypeError(other.getClass)
           }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/NullType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/NullType.scala
index d211fac70c641..4e7fd3a00a8af 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/NullType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/NullType.scala
@@ -25,7 +25,7 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class NullType private() extends DataType {
+class NullType private () extends DataType {
   // The companion object and this class is separated so the companion object also subclasses
   // this type. Otherwise, the companion object would be of type "NullType$" in byte code.
   // Defined with a private constructor so the companion object is the only possible instantiation.
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/ShortType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/ShortType.scala
index 66696793e6279..c3b6bc75facd3 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/ShortType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/ShortType.scala
@@ -25,7 +25,8 @@ import org.apache.spark.annotation.Stable
  * @since 1.3.0
  */
 @Stable
-class ShortType private() extends IntegralType {
+class ShortType private () extends IntegralType {
+
   /**
    * The default size of a value of the ShortType is 2 bytes.
    */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
index 6ec55db008c75..b2cf502f8bdc1 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StringType.scala
@@ -26,42 +26,63 @@ import org.apache.spark.sql.catalyst.util.CollationFactory
  * The data type representing `String` values. Please use the singleton `DataTypes.StringType`.
  *
  * @since 1.3.0
- * @param collationId The id of collation for this StringType.
+ * @param collationId
+ *   The id of collation for this StringType.
  */
 @Stable
-class StringType private(val collationId: Int) extends AtomicType with Serializable {
+class StringType private[sql] (val collationId: Int) extends AtomicType with Serializable {
+
   /**
-   * Support for Binary Equality implies that strings are considered equal only if
-   * they are byte for byte equal. E.g. all accent or case-insensitive collations are considered
-   * non-binary. If this field is true, byte level operations can be used against this datatype
-   * (e.g. for equality and hashing).
+   * Support for Binary Equality implies that strings are considered equal only if they are byte
+   * for byte equal. E.g. all accent or case-insensitive collations are considered non-binary. If
+   * this field is true, byte level operations can be used against this datatype (e.g. for
+   * equality and hashing).
    */
-  def supportsBinaryEquality: Boolean =
+  private[sql] def supportsBinaryEquality: Boolean =
     CollationFactory.fetchCollation(collationId).supportsBinaryEquality
 
-  def isUTF8BinaryCollation: Boolean =
+  private[sql] def supportsLowercaseEquality: Boolean =
+    CollationFactory.fetchCollation(collationId).supportsLowercaseEquality
+
+  private[sql] def isCaseInsensitive: Boolean =
+    CollationFactory.isCaseInsensitive(collationId)
+
+  private[sql] def isAccentInsensitive: Boolean =
+    CollationFactory.isAccentInsensitive(collationId)
+
+  private[sql] def usesTrimCollation: Boolean =
+    CollationFactory.fetchCollation(collationId).supportsSpaceTrimming
+
+  private[sql] def isUTF8BinaryCollation: Boolean =
     collationId == CollationFactory.UTF8_BINARY_COLLATION_ID
 
-  def isUTF8BinaryLcaseCollation: Boolean =
+  private[sql] def isUTF8LcaseCollation: Boolean =
     collationId == CollationFactory.UTF8_LCASE_COLLATION_ID
 
   /**
-   * Support for Binary Ordering implies that strings are considered equal only
-   * if they are byte for byte equal. E.g. all accent or case-insensitive collations are
-   * considered non-binary. Also their ordering does not require calls to ICU library, as
-   * it follows spark internal implementation. If this field is true, byte level operations
-   * can be used against this datatype (e.g. for equality, hashing and ordering).
+   * Support for Binary Ordering implies that strings are considered equal only if they are byte
+   * for byte equal. E.g. all accent or case-insensitive collations are considered non-binary.
+   * Also their ordering does not require calls to ICU library, as it follows spark internal
+   * implementation. If this field is true, byte level operations can be used against this
+   * datatype (e.g. for equality, hashing and ordering).
    */
-  def supportsBinaryOrdering: Boolean =
+  private[sql] def supportsBinaryOrdering: Boolean =
     CollationFactory.fetchCollation(collationId).supportsBinaryOrdering
 
   /**
-   * Type name that is shown to the customer.
-   * If this is an UTF8_BINARY collation output is `string` due to backwards compatibility.
+   * Type name that is shown to the customer. If this is an UTF8_BINARY collation output is
+   * `string` due to backwards compatibility.
    */
   override def typeName: String =
     if (isUTF8BinaryCollation) "string"
-    else s"string collate ${CollationFactory.fetchCollation(collationId).collationName}"
+    else s"string collate $collationName"
+
+  override def toString: String =
+    if (isUTF8BinaryCollation) "StringType"
+    else s"StringType($collationName)"
+
+  private[sql] def collationName: String =
+    CollationFactory.fetchCollation(collationId).collationName
 
   // Due to backwards compatibility and compatibility with other readers
   // all string types are serialized in json as regular strings and
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
index 3ff96fea9ee04..d4e590629921c 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructField.scala
@@ -31,11 +31,15 @@ import org.apache.spark.util.SparkSchemaUtils
 
 /**
  * A field inside a StructType.
- * @param name The name of this field.
- * @param dataType The data type of this field.
- * @param nullable Indicates if values of this field can be `null` values.
- * @param metadata The metadata of this field. The metadata should be preserved during
- *                 transformation if the content of the column is not modified, e.g, in selection.
+ * @param name
+ *   The name of this field.
+ * @param dataType
+ *   The data type of this field.
+ * @param nullable
+ *   Indicates if values of this field can be `null` values.
+ * @param metadata
+ *   The metadata of this field. The metadata should be preserved during transformation if the
+ *   content of the column is not modified, e.g, in selection.
  *
  * @since 1.3.0
  */
@@ -54,8 +58,9 @@ case class StructField(
       stringConcat: StringConcat,
       maxDepth: Int): Unit = {
     if (maxDepth > 0) {
-      stringConcat.append(s"$prefix-- ${SparkSchemaUtils.escapeMetaCharacters(name)}: " +
-        s"${dataType.typeName} (nullable = $nullable)\n")
+      stringConcat.append(
+        s"$prefix-- ${SparkSchemaUtils.escapeMetaCharacters(name)}: " +
+          s"${dataType.typeName} (nullable = $nullable)\n")
       DataType.buildFormattedString(dataType, s"$prefix    |", stringConcat, maxDepth)
     }
   }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
index 3e637b5110122..cc95d8ee94b02 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/StructType.scala
@@ -41,10 +41,10 @@ import org.apache.spark.util.SparkCollectionUtils
  * {{{
  * StructType(fields: Seq[StructField])
  * }}}
- * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names.
- * If multiple [[StructField]]s are extracted, a [[StructType]] object will be returned.
- * If a provided name does not have a matching field, it will be ignored. For the case
- * of extracting a single [[StructField]], a `null` will be returned.
+ * For a [[StructType]] object, one or multiple [[StructField]]s can be extracted by names. If
+ * multiple [[StructField]]s are extracted, a [[StructType]] object will be returned. If a
+ * provided name does not have a matching field, it will be ignored. For the case of extracting a
+ * single [[StructField]], a `null` will be returned.
  *
  * Scala Example:
  * {{{
@@ -126,8 +126,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   override def equals(that: Any): Boolean = {
     that match {
       case StructType(otherFields) =>
-        java.util.Arrays.equals(
-          fields.asInstanceOf[Array[AnyRef]], otherFields.asInstanceOf[Array[AnyRef]])
+        java.util.Arrays
+          .equals(fields.asInstanceOf[Array[AnyRef]], otherFields.asInstanceOf[Array[AnyRef]])
       case _ => false
     }
   }
@@ -146,7 +146,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *   .add(StructField("a", IntegerType, true))
    *   .add(StructField("b", LongType, false))
    *   .add(StructField("c", StringType, true))
-   *}}}
+   * }}}
    */
   def add(field: StructField): StructType = {
     StructType(fields :+ field)
@@ -155,10 +155,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   /**
    * Creates a new [[StructType]] by adding a new nullable field with no metadata.
    *
-   * val struct = (new StructType)
-   *   .add("a", IntegerType)
-   *   .add("b", LongType)
-   *   .add("c", StringType)
+   * val struct = (new StructType) .add("a", IntegerType) .add("b", LongType) .add("c",
+   * StringType)
    */
   def add(name: String, dataType: DataType): StructType = {
     StructType(fields :+ StructField(name, dataType, nullable = true, Metadata.empty))
@@ -167,10 +165,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   /**
    * Creates a new [[StructType]] by adding a new field with no metadata.
    *
-   * val struct = (new StructType)
-   *   .add("a", IntegerType, true)
-   *   .add("b", LongType, false)
-   *   .add("c", StringType, true)
+   * val struct = (new StructType) .add("a", IntegerType, true) .add("b", LongType, false)
+   * .add("c", StringType, true)
    */
   def add(name: String, dataType: DataType, nullable: Boolean): StructType = {
     StructType(fields :+ StructField(name, dataType, nullable, Metadata.empty))
@@ -185,11 +181,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *   .add("c", StringType, true, Metadata.empty)
    * }}}
    */
-  def add(
-      name: String,
-      dataType: DataType,
-      nullable: Boolean,
-      metadata: Metadata): StructType = {
+  def add(name: String, dataType: DataType, nullable: Boolean, metadata: Metadata): StructType = {
     StructType(fields :+ StructField(name, dataType, nullable, metadata))
   }
 
@@ -202,11 +194,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *   .add("c", StringType, true, "comment3")
    * }}}
    */
-  def add(
-      name: String,
-      dataType: DataType,
-      nullable: Boolean,
-      comment: String): StructType = {
+  def add(name: String, dataType: DataType, nullable: Boolean, comment: String): StructType = {
     StructType(fields :+ StructField(name, dataType, nullable).withComment(comment))
   }
 
@@ -226,8 +214,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   }
 
   /**
-   * Creates a new [[StructType]] by adding a new field with no metadata where the
-   * dataType is specified as a String.
+   * Creates a new [[StructType]] by adding a new field with no metadata where the dataType is
+   * specified as a String.
    *
    * {{{
    * val struct = (new StructType)
@@ -241,8 +229,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   }
 
   /**
-   * Creates a new [[StructType]] by adding a new field and specifying metadata where the
-   * dataType is specified as a String.
+   * Creates a new [[StructType]] by adding a new field and specifying metadata where the dataType
+   * is specified as a String.
    * {{{
    * val struct = (new StructType)
    *   .add("a", "int", true, Metadata.empty)
@@ -250,17 +238,13 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *   .add("c", "string", true, Metadata.empty)
    * }}}
    */
-  def add(
-      name: String,
-      dataType: String,
-      nullable: Boolean,
-      metadata: Metadata): StructType = {
+  def add(name: String, dataType: String, nullable: Boolean, metadata: Metadata): StructType = {
     add(name, DataTypeParser.parseDataType(dataType), nullable, metadata)
   }
 
   /**
-   * Creates a new [[StructType]] by adding a new field and specifying metadata where the
-   * dataType is specified as a String.
+   * Creates a new [[StructType]] by adding a new field and specifying metadata where the dataType
+   * is specified as a String.
    * {{{
    * val struct = (new StructType)
    *   .add("a", "int", true, "comment1")
@@ -268,21 +252,19 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *   .add("c", "string", true, "comment3")
    * }}}
    */
-  def add(
-      name: String,
-      dataType: String,
-      nullable: Boolean,
-      comment: String): StructType = {
+  def add(name: String, dataType: String, nullable: Boolean, comment: String): StructType = {
     add(name, DataTypeParser.parseDataType(dataType), nullable, comment)
   }
 
   /**
    * Extracts the [[StructField]] with the given name.
    *
-   * @throws IllegalArgumentException if a field with the given name does not exist
+   * @throws IllegalArgumentException
+   *   if a field with the given name does not exist
    */
   def apply(name: String): StructField = {
-    nameToField.getOrElse(name,
+    nameToField.getOrElse(
+      name,
       throw new SparkIllegalArgumentException(
         errorClass = "FIELD_NOT_FOUND",
         messageParameters = immutable.Map(
@@ -294,7 +276,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    * Returns a [[StructType]] containing [[StructField]]s of the given names, preserving the
    * original order of fields.
    *
-   * @throws IllegalArgumentException if at least one given field name does not exist
+   * @throws IllegalArgumentException
+   *   if at least one given field name does not exist
    */
   def apply(names: Set[String]): StructType = {
     val nonExistFields = names -- fieldNamesSet
@@ -312,10 +295,12 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   /**
    * Returns the index of a given field.
    *
-   * @throws IllegalArgumentException if a field with the given name does not exist
+   * @throws IllegalArgumentException
+   *   if a field with the given name does not exist
    */
   def fieldIndex(name: String): Int = {
-    nameToIndex.getOrElse(name,
+    nameToIndex.getOrElse(
+      name,
       throw new SparkIllegalArgumentException(
         errorClass = "FIELD_NOT_FOUND",
         messageParameters = immutable.Map(
@@ -336,7 +321,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
    *
    * If includeCollections is true, this will return fields that are nested in maps and arrays.
    */
-  private[sql] def findNestedField(
+  private[spark] def findNestedField(
       fieldNames: Seq[String],
       includeCollections: Boolean = false,
       resolver: SqlApiAnalysis.Resolver = _ == _,
@@ -354,10 +339,7 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
       } else if (found.isEmpty) {
         None
       } else {
-        findField(
-          parent = found.head,
-          searchPath = searchPath.tail,
-          normalizedPath)
+        findField(parent = found.head, searchPath = searchPath.tail, normalizedPath)
       }
     }
 
@@ -433,11 +415,14 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   override def defaultSize: Int = fields.map(_.dataType.defaultSize).sum
 
   override def simpleString: String = {
-    val fieldTypes = fields.to(LazyList)
+    val fieldTypes = fields
+      .to(LazyList)
       .map(field => s"${field.name}:${field.dataType.simpleString}")
     SparkStringUtils.truncatedString(
       fieldTypes,
-      "struct<", ",", ">",
+      "struct<",
+      ",",
+      ">",
       SqlApiConf.get.maxToStringFields)
   }
 
@@ -460,9 +445,9 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   /**
    * Returns a string containing a schema in DDL format. For example, the following value:
-   * `StructType(Seq(StructField("eventId", IntegerType), StructField("s", StringType)))`
-   * will be converted to `eventId` INT, `s` STRING.
-   * The returned DDL schema can be used in a table creation.
+   * `StructType(Seq(StructField("eventId", IntegerType), StructField("s", StringType)))` will be
+   * converted to `eventId` INT, `s` STRING. The returned DDL schema can be used in a table
+   * creation.
    *
    * @since 2.4.0
    */
@@ -470,8 +455,8 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
 
   private[sql] override def simpleString(maxNumberFields: Int): String = {
     val builder = new StringBuilder
-    val fieldTypes = fields.take(maxNumberFields).map {
-      f => s"${f.name}: ${f.dataType.simpleString(maxNumberFields)}"
+    val fieldTypes = fields.take(maxNumberFields).map { f =>
+      s"${f.name}: ${f.dataType.simpleString(maxNumberFields)}"
     }
     builder.append("struct<")
     builder.append(fieldTypes.mkString(", "))
@@ -486,31 +471,29 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   }
 
   /**
-   * Merges with another schema (`StructType`).  For a struct field A from `this` and a struct field
-   * B from `that`,
+   * Merges with another schema (`StructType`). For a struct field A from `this` and a struct
+   * field B from `that`,
    *
-   * 1. If A and B have the same name and data type, they are merged to a field C with the same name
-   *    and data type.  C is nullable if and only if either A or B is nullable.
-   * 2. If A doesn't exist in `that`, it's included in the result schema.
-   * 3. If B doesn't exist in `this`, it's also included in the result schema.
-   * 4. Otherwise, `this` and `that` are considered as conflicting schemas and an exception would be
-   *    thrown.
+   *   1. If A and B have the same name and data type, they are merged to a field C with the same
+   *      name and data type. C is nullable if and only if either A or B is nullable. 2. If A
+   *      doesn't exist in `that`, it's included in the result schema. 3. If B doesn't exist in
+   *      `this`, it's also included in the result schema. 4. Otherwise, `this` and `that` are
+   *      considered as conflicting schemas and an exception would be thrown.
    */
   private[sql] def merge(that: StructType, caseSensitive: Boolean = true): StructType =
     StructType.merge(this, that, caseSensitive).asInstanceOf[StructType]
 
   override private[spark] def asNullable: StructType = {
-    val newFields = fields.map {
-      case StructField(name, dataType, nullable, metadata) =>
-        StructField(name, dataType.asNullable, nullable = true, metadata)
+    val newFields = fields.map { case StructField(name, dataType, nullable, metadata) =>
+      StructField(name, dataType.asNullable, nullable = true, metadata)
     }
 
     StructType(newFields)
   }
 
   /**
-   * Returns the same data type but set all nullability fields are true
-   * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`).
+   * Returns the same data type but set all nullability fields are true (`StructField.nullable`,
+   * `ArrayType.containsNull`, and `MapType.valueContainsNull`).
    *
    * @since 4.0.0
    */
@@ -519,6 +502,18 @@ case class StructType(fields: Array[StructField]) extends DataType with Seq[Stru
   override private[spark] def existsRecursively(f: (DataType) => Boolean): Boolean = {
     f(this) || fields.exists(field => field.dataType.existsRecursively(f))
   }
+
+  override private[spark] def transformRecursively(
+      f: PartialFunction[DataType, DataType]): DataType = {
+    if (f.isDefinedAt(this)) {
+      return f(this)
+    }
+
+    val newFields = fields.map { field =>
+      field.copy(dataType = field.dataType.transformRecursively(f))
+    }
+    StructType(newFields)
+  }
 }
 
 /**
@@ -562,7 +557,8 @@ object StructType extends AbstractDataType {
       case StructType(fields) =>
         val newFields = fields.map { f =>
           val mb = new MetadataBuilder()
-          f.copy(dataType = removeMetadata(key, f.dataType),
+          f.copy(
+            dataType = removeMetadata(key, f.dataType),
             metadata = mb.withMetadata(f.metadata).remove(key).build())
         }
         StructType(newFields)
@@ -570,39 +566,49 @@ object StructType extends AbstractDataType {
     }
 
   /**
-   * This leverages `merge` to merge data types for UNION operator by specializing
-   * the handling of struct types to follow UNION semantics.
+   * This leverages `merge` to merge data types for UNION operator by specializing the handling of
+   * struct types to follow UNION semantics.
    */
   private[sql] def unionLikeMerge(left: DataType, right: DataType): DataType =
-    mergeInternal(left, right, (s1: StructType, s2: StructType) => {
-      val leftFields = s1.fields
-      val rightFields = s2.fields
-      require(leftFields.length == rightFields.length, "To merge nullability, " +
-        "two structs must have same number of fields.")
-
-      val newFields = leftFields.zip(rightFields).map {
-        case (leftField, rightField) =>
+    mergeInternal(
+      left,
+      right,
+      (s1: StructType, s2: StructType) => {
+        val leftFields = s1.fields
+        val rightFields = s2.fields
+        require(
+          leftFields.length == rightFields.length,
+          "To merge nullability, " +
+            "two structs must have same number of fields.")
+
+        val newFields = leftFields.zip(rightFields).map { case (leftField, rightField) =>
           leftField.copy(
             dataType = unionLikeMerge(leftField.dataType, rightField.dataType),
             nullable = leftField.nullable || rightField.nullable)
-      }
-      StructType(newFields)
-    })
-
-  private[sql] def merge(left: DataType, right: DataType, caseSensitive: Boolean = true): DataType =
-    mergeInternal(left, right, (s1: StructType, s2: StructType) => {
-      val leftFields = s1.fields
-      val rightFields = s2.fields
-      val newFields = mutable.ArrayBuffer.empty[StructField]
+        }
+        StructType(newFields)
+      })
 
-      def normalize(name: String): String = {
-        if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
-      }
+  private[sql] def merge(
+      left: DataType,
+      right: DataType,
+      caseSensitive: Boolean = true): DataType =
+    mergeInternal(
+      left,
+      right,
+      (s1: StructType, s2: StructType) => {
+        val leftFields = s1.fields
+        val rightFields = s2.fields
+        val newFields = mutable.ArrayBuffer.empty[StructField]
+
+        def normalize(name: String): String = {
+          if (caseSensitive) name else name.toLowerCase(Locale.ROOT)
+        }
 
-      val rightMapped = fieldsMap(rightFields, caseSensitive)
-      leftFields.foreach {
-        case leftField @ StructField(leftName, leftType, leftNullable, _) =>
-          rightMapped.get(normalize(leftName))
+        val rightMapped = fieldsMap(rightFields, caseSensitive)
+        leftFields.foreach { case leftField @ StructField(leftName, leftType, leftNullable, _) =>
+          rightMapped
+            .get(normalize(leftName))
             .map { case rightField @ StructField(rightName, rightType, rightNullable, _) =>
               try {
                 leftField.copy(
@@ -610,39 +616,40 @@ object StructType extends AbstractDataType {
                   nullable = leftNullable || rightNullable)
               } catch {
                 case NonFatal(e) =>
-                  throw DataTypeErrors.cannotMergeIncompatibleDataTypesError(
-                    leftType, rightType)
+                  throw DataTypeErrors.cannotMergeIncompatibleDataTypesError(leftType, rightType)
               }
             }
             .orElse {
               Some(leftField)
             }
             .foreach(newFields += _)
-      }
-
-      val leftMapped = fieldsMap(leftFields, caseSensitive)
-      rightFields
-        .filterNot(f => leftMapped.contains(normalize(f.name)))
-        .foreach { f =>
-          newFields += f
         }
 
-      StructType(newFields.toArray)
-    })
+        val leftMapped = fieldsMap(leftFields, caseSensitive)
+        rightFields
+          .filterNot(f => leftMapped.contains(normalize(f.name)))
+          .foreach { f =>
+            newFields += f
+          }
+
+        StructType(newFields.toArray)
+      })
 
   private def mergeInternal(
       left: DataType,
       right: DataType,
       mergeStruct: (StructType, StructType) => StructType): DataType =
     (left, right) match {
-      case (ArrayType(leftElementType, leftContainsNull),
-      ArrayType(rightElementType, rightContainsNull)) =>
+      case (
+            ArrayType(leftElementType, leftContainsNull),
+            ArrayType(rightElementType, rightContainsNull)) =>
         ArrayType(
           mergeInternal(leftElementType, rightElementType, mergeStruct),
           leftContainsNull || rightContainsNull)
 
-      case (MapType(leftKeyType, leftValueType, leftContainsNull),
-      MapType(rightKeyType, rightValueType, rightContainsNull)) =>
+      case (
+            MapType(leftKeyType, leftValueType, leftContainsNull),
+            MapType(rightKeyType, rightValueType, rightContainsNull)) =>
         MapType(
           mergeInternal(leftKeyType, rightKeyType, mergeStruct),
           mergeInternal(leftValueType, rightValueType, mergeStruct),
@@ -650,17 +657,20 @@ object StructType extends AbstractDataType {
 
       case (s1: StructType, s2: StructType) => mergeStruct(s1, s2)
 
-      case (DecimalType.Fixed(leftPrecision, leftScale),
-        DecimalType.Fixed(rightPrecision, rightScale)) =>
+      case (
+            DecimalType.Fixed(leftPrecision, leftScale),
+            DecimalType.Fixed(rightPrecision, rightScale)) =>
         if (leftScale == rightScale) {
           DecimalType(leftPrecision.max(rightPrecision), leftScale)
         } else {
           throw DataTypeErrors.cannotMergeDecimalTypesWithIncompatibleScaleError(
-            leftScale, rightScale)
+            leftScale,
+            rightScale)
         }
 
       case (leftUdt: UserDefinedType[_], rightUdt: UserDefinedType[_])
-        if leftUdt.userClass == rightUdt.userClass => leftUdt
+          if leftUdt.userClass == rightUdt.userClass =>
+        leftUdt
 
       case (YearMonthIntervalType(lstart, lend), YearMonthIntervalType(rstart, rend)) =>
         YearMonthIntervalType(Math.min(lstart, rstart).toByte, Math.max(lend, rend).toByte)
@@ -706,10 +716,12 @@ object StructType extends AbstractDataType {
         // Found a missing field in `source`.
         newFields += field
       } else if (bothStructType(found.get.dataType, field.dataType) &&
-          !found.get.dataType.sameType(field.dataType)) {
+        !found.get.dataType.sameType(field.dataType)) {
         // Found a field with same name, but different data type.
-        findMissingFields(found.get.dataType.asInstanceOf[StructType],
-          field.dataType.asInstanceOf[StructType], resolver).map { missingType =>
+        findMissingFields(
+          found.get.dataType.asInstanceOf[StructType],
+          field.dataType.asInstanceOf[StructType],
+          resolver).map { missingType =>
           newFields += found.get.copy(dataType = missingType)
         }
       }
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
index 9968d75dd2577..b08d16f0e2c97 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampNTZType.scala
@@ -20,16 +20,17 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Unstable
 
 /**
- * The timestamp without time zone type represents a local time in microsecond precision,
- * which is independent of time zone.
- * Its valid range is [0001-01-01T00:00:00.000000, 9999-12-31T23:59:59.999999].
- * To represent an absolute point in time, use `TimestampType` instead.
+ * The timestamp without time zone type represents a local time in microsecond precision, which is
+ * independent of time zone. Its valid range is [0001-01-01T00:00:00.000000,
+ * 9999-12-31T23:59:59.999999]. To represent an absolute point in time, use `TimestampType`
+ * instead.
  *
  * Please use the singleton `DataTypes.TimestampNTZType` to refer the type.
  * @since 3.4.0
  */
 @Unstable
-class TimestampNTZType private() extends DatetimeType {
+class TimestampNTZType private () extends DatetimeType {
+
   /**
    * The default size of a value of the TimestampNTZType is 8 bytes.
    */
@@ -42,9 +43,9 @@ class TimestampNTZType private() extends DatetimeType {
 
 /**
  * The companion case object and its class is separated so the companion object also subclasses
- * the TimestampNTZType class. Otherwise, the companion object would be of type
- * "TimestampNTZType" in byte code. Defined with a private constructor so the companion
- * object is the only possible instantiation.
+ * the TimestampNTZType class. Otherwise, the companion object would be of type "TimestampNTZType"
+ * in byte code. Defined with a private constructor so the companion object is the only possible
+ * instantiation.
  *
  * @since 3.4.0
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
index 1185e4a9e32ca..bf869d1f38c57 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/TimestampType.scala
@@ -20,16 +20,16 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Stable
 
 /**
- * The timestamp type represents a time instant in microsecond precision.
- * Valid range is [0001-01-01T00:00:00.000000Z, 9999-12-31T23:59:59.999999Z] where
- * the left/right-bound is a date and time of the proleptic Gregorian
- * calendar in UTC+00:00.
+ * The timestamp type represents a time instant in microsecond precision. Valid range is
+ * [0001-01-01T00:00:00.000000Z, 9999-12-31T23:59:59.999999Z] where the left/right-bound is a date
+ * and time of the proleptic Gregorian calendar in UTC+00:00.
  *
  * Please use the singleton `DataTypes.TimestampType` to refer the type.
  * @since 1.3.0
  */
 @Stable
-class TimestampType private() extends DatetimeType {
+class TimestampType private () extends DatetimeType {
+
   /**
    * The default size of a value of the TimestampType is 8 bytes.
    */
@@ -40,8 +40,8 @@ class TimestampType private() extends DatetimeType {
 
 /**
  * The companion case object and its class is separated so the companion object also subclasses
- * the TimestampType class. Otherwise, the companion object would be of type "TimestampType$"
- * in byte code. Defined with a private constructor so the companion object is the only possible
+ * the TimestampType class. Otherwise, the companion object would be of type "TimestampType$" in
+ * byte code. Defined with a private constructor so the companion object is the only possible
  * instantiation.
  *
  * @since 1.3.0
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
index 9219c1d139b99..85d421a07577b 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UDTRegistration.scala
@@ -45,21 +45,26 @@ object UDTRegistration extends Serializable with Logging {
 
   /**
    * Queries if a given user class is already registered or not.
-   * @param userClassName the name of user class
-   * @return boolean value indicates if the given user class is registered or not
+   * @param userClassName
+   *   the name of user class
+   * @return
+   *   boolean value indicates if the given user class is registered or not
    */
   def exists(userClassName: String): Boolean = udtMap.contains(userClassName)
 
   /**
-   * Registers an UserDefinedType to an user class. If the user class is already registered
-   * with another UserDefinedType, warning log message will be shown.
-   * @param userClass the name of user class
-   * @param udtClass the name of UserDefinedType class for the given userClass
+   * Registers an UserDefinedType to an user class. If the user class is already registered with
+   * another UserDefinedType, warning log message will be shown.
+   * @param userClass
+   *   the name of user class
+   * @param udtClass
+   *   the name of UserDefinedType class for the given userClass
    */
   def register(userClass: String, udtClass: String): Unit = {
     if (udtMap.contains(userClass)) {
-      logWarning(log"Cannot register UDT for ${MDC(LogKeys.CLASS_NAME, userClass)}, " +
-        log"which is already registered.")
+      logWarning(
+        log"Cannot register UDT for ${MDC(LogKeys.CLASS_NAME, userClass)}, " +
+          log"which is already registered.")
     } else {
       // When register UDT with class name, we can't check if the UDT class is an UserDefinedType,
       // or not. The check is deferred.
@@ -69,8 +74,10 @@ object UDTRegistration extends Serializable with Logging {
 
   /**
    * Returns the Class of UserDefinedType for the name of a given user class.
-   * @param userClass class name of user class
-   * @return Option value of the Class object of UserDefinedType
+   * @param userClass
+   *   class name of user class
+   * @return
+   *   Option value of the Class object of UserDefinedType
    */
   def getUDTFor(userClass: String): Option[Class[_]] = {
     udtMap.get(userClass).map { udtClassName =>
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
index 7ec00bde0b25f..4993e249b3059 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UpCastRule.scala
@@ -22,13 +22,8 @@ package org.apache.spark.sql.types
 private[sql] object UpCastRule {
   // See https://cwiki.apache.org/confluence/display/Hive/LanguageManual+Types.
   // The conversion for integral and floating point types have a linear widening hierarchy:
-  val numericPrecedence: IndexedSeq[NumericType] = IndexedSeq(
-    ByteType,
-    ShortType,
-    IntegerType,
-    LongType,
-    FloatType,
-    DoubleType)
+  val numericPrecedence: IndexedSeq[NumericType] =
+    IndexedSeq(ByteType, ShortType, IntegerType, LongType, FloatType, DoubleType)
 
   /**
    * Returns true iff we can safely up-cast the `from` type to `to` type without any truncating or
@@ -62,10 +57,9 @@ private[sql] object UpCastRule {
 
     case (StructType(fromFields), StructType(toFields)) =>
       fromFields.length == toFields.length &&
-        fromFields.zip(toFields).forall {
-          case (f1, f2) =>
-            resolvableNullability(f1.nullable, f2.nullable) && canUpCast(f1.dataType, f2.dataType)
-        }
+      fromFields.zip(toFields).forall { case (f1, f2) =>
+        resolvableNullability(f1.nullable, f2.nullable) && canUpCast(f1.dataType, f2.dataType)
+      }
 
     case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true
     case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
index 5cbd876b31e68..dd8ca26c52462 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/UserDefinedType.scala
@@ -27,15 +27,14 @@ import org.apache.spark.annotation.{DeveloperApi, Since}
 /**
  * The data type for User Defined Types (UDTs).
  *
- * This interface allows a user to make their own classes more interoperable with SparkSQL;
- * e.g., by creating a [[UserDefinedType]] for a class X, it becomes possible to create
- * a `DataFrame` which has class X in the schema.
+ * This interface allows a user to make their own classes more interoperable with SparkSQL; e.g.,
+ * by creating a [[UserDefinedType]] for a class X, it becomes possible to create a `DataFrame`
+ * which has class X in the schema.
  *
- * For SparkSQL to recognize UDTs, the UDT must be annotated with
- * [[SQLUserDefinedType]].
+ * For SparkSQL to recognize UDTs, the UDT must be annotated with [[SQLUserDefinedType]].
  *
- * The conversion via `serialize` occurs when instantiating a `DataFrame` from another RDD.
- * The conversion via `deserialize` occurs when reading from a `DataFrame`.
+ * The conversion via `serialize` occurs when instantiating a `DataFrame` from another RDD. The
+ * conversion via `deserialize` occurs when reading from a `DataFrame`.
  */
 @DeveloperApi
 @Since("3.2.0")
@@ -81,7 +80,7 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
   override private[sql] def acceptsType(dataType: DataType): Boolean = dataType match {
     case other: UserDefinedType[_] if this.userClass != null && other.userClass != null =>
       this.getClass == other.getClass ||
-        this.userClass.isAssignableFrom(other.userClass)
+      this.userClass.isAssignableFrom(other.userClass)
     case _ => false
   }
 
@@ -98,6 +97,7 @@ abstract class UserDefinedType[UserType >: Null] extends DataType with Serializa
 }
 
 private[spark] object UserDefinedType {
+
   /**
    * Get the sqlType of a (potential) [[UserDefinedType]].
    */
@@ -115,7 +115,8 @@ private[spark] object UserDefinedType {
 private[sql] class PythonUserDefinedType(
     val sqlType: DataType,
     override val pyUDT: String,
-    override val serializedPyClass: String) extends UserDefinedType[Any] {
+    override val serializedPyClass: String)
+    extends UserDefinedType[Any] {
 
   /* The serialization is handled by UDT class in Python */
   override def serialize(obj: Any): Any = obj
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/VariantType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/VariantType.scala
index 103fe7a59fc83..4d775c3e1e390 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/VariantType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/VariantType.scala
@@ -20,9 +20,9 @@ package org.apache.spark.sql.types
 import org.apache.spark.annotation.Unstable
 
 /**
- * The data type representing semi-structured values with arbitrary hierarchical data structures. It
- * is intended to store parsed JSON values and most other data types in the system (e.g., it cannot
- * store a map with a non-string key type).
+ * The data type representing semi-structured values with arbitrary hierarchical data structures.
+ * It is intended to store parsed JSON values and most other data types in the system (e.g., it
+ * cannot store a map with a non-string key type).
  *
  * @since 4.0.0
  */
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala b/sql/api/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
index 6532a3b220c5b..f69054f2c1fbc 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/types/YearMonthIntervalType.scala
@@ -29,18 +29,19 @@ import org.apache.spark.sql.types.YearMonthIntervalType.fieldToString
  *
  * `YearMonthIntervalType` represents positive as well as negative year-month intervals.
  *
- * @param startField The leftmost field which the type comprises of. Valid values:
- *                   0 (YEAR), 1 (MONTH).
- * @param endField The rightmost field which the type comprises of. Valid values:
- *                 0 (YEAR), 1 (MONTH).
+ * @param startField
+ *   The leftmost field which the type comprises of. Valid values: 0 (YEAR), 1 (MONTH).
+ * @param endField
+ *   The rightmost field which the type comprises of. Valid values: 0 (YEAR), 1 (MONTH).
  *
  * @since 3.2.0
  */
 @Unstable
 case class YearMonthIntervalType(startField: Byte, endField: Byte) extends AnsiIntervalType {
+
   /**
-   * Year-month interval values always occupy 4 bytes.
-   * The YEAR field is constrained by the upper bound 178956970 to fit to `Int`.
+   * Year-month interval values always occupy 4 bytes. The YEAR field is constrained by the upper
+   * bound 178956970 to fit to `Int`.
    */
   override def defaultSize: Int = 4
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
index 6852fe09ef96b..55d1aff8261d4 100644
--- a/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArrowUtils.scala
@@ -38,33 +38,33 @@ private[sql] object ArrowUtils {
   // todo: support more types.
 
   /** Maps data type from Spark to Arrow. NOTE: timeZoneId required for TimestampTypes */
-  def toArrowType(
-      dt: DataType, timeZoneId: String, largeVarTypes: Boolean = false): ArrowType = dt match {
-    case BooleanType => ArrowType.Bool.INSTANCE
-    case ByteType => new ArrowType.Int(8, true)
-    case ShortType => new ArrowType.Int(8 * 2, true)
-    case IntegerType => new ArrowType.Int(8 * 4, true)
-    case LongType => new ArrowType.Int(8 * 8, true)
-    case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
-    case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
-    case _: StringType if !largeVarTypes => ArrowType.Utf8.INSTANCE
-    case BinaryType if !largeVarTypes => ArrowType.Binary.INSTANCE
-    case _: StringType if largeVarTypes => ArrowType.LargeUtf8.INSTANCE
-    case BinaryType if largeVarTypes => ArrowType.LargeBinary.INSTANCE
-    case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale, 8 * 16)
-    case DateType => new ArrowType.Date(DateUnit.DAY)
-    case TimestampType if timeZoneId == null =>
-      throw SparkException.internalError("Missing timezoneId where it is mandatory.")
-    case TimestampType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId)
-    case TimestampNTZType =>
-      new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
-    case NullType => ArrowType.Null.INSTANCE
-    case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
-    case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND)
-    case CalendarIntervalType => new ArrowType.Interval(IntervalUnit.MONTH_DAY_NANO)
-    case _ =>
-      throw ExecutionErrors.unsupportedDataTypeError(dt)
-  }
+  def toArrowType(dt: DataType, timeZoneId: String, largeVarTypes: Boolean = false): ArrowType =
+    dt match {
+      case BooleanType => ArrowType.Bool.INSTANCE
+      case ByteType => new ArrowType.Int(8, true)
+      case ShortType => new ArrowType.Int(8 * 2, true)
+      case IntegerType => new ArrowType.Int(8 * 4, true)
+      case LongType => new ArrowType.Int(8 * 8, true)
+      case FloatType => new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)
+      case DoubleType => new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)
+      case _: StringType if !largeVarTypes => ArrowType.Utf8.INSTANCE
+      case BinaryType if !largeVarTypes => ArrowType.Binary.INSTANCE
+      case _: StringType if largeVarTypes => ArrowType.LargeUtf8.INSTANCE
+      case BinaryType if largeVarTypes => ArrowType.LargeBinary.INSTANCE
+      case DecimalType.Fixed(precision, scale) => new ArrowType.Decimal(precision, scale, 8 * 16)
+      case DateType => new ArrowType.Date(DateUnit.DAY)
+      case TimestampType if timeZoneId == null =>
+        throw SparkException.internalError("Missing timezoneId where it is mandatory.")
+      case TimestampType => new ArrowType.Timestamp(TimeUnit.MICROSECOND, timeZoneId)
+      case TimestampNTZType =>
+        new ArrowType.Timestamp(TimeUnit.MICROSECOND, null)
+      case NullType => ArrowType.Null.INSTANCE
+      case _: YearMonthIntervalType => new ArrowType.Interval(IntervalUnit.YEAR_MONTH)
+      case _: DayTimeIntervalType => new ArrowType.Duration(TimeUnit.MICROSECOND)
+      case CalendarIntervalType => new ArrowType.Interval(IntervalUnit.MONTH_DAY_NANO)
+      case _ =>
+        throw ExecutionErrors.unsupportedDataTypeError(dt)
+    }
 
   def fromArrowType(dt: ArrowType): DataType = dt match {
     case ArrowType.Bool.INSTANCE => BooleanType
@@ -73,9 +73,11 @@ private[sql] object ArrowUtils {
     case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 4 => IntegerType
     case int: ArrowType.Int if int.getIsSigned && int.getBitWidth == 8 * 8 => LongType
     case float: ArrowType.FloatingPoint
-      if float.getPrecision() == FloatingPointPrecision.SINGLE => FloatType
+        if float.getPrecision() == FloatingPointPrecision.SINGLE =>
+      FloatType
     case float: ArrowType.FloatingPoint
-      if float.getPrecision() == FloatingPointPrecision.DOUBLE => DoubleType
+        if float.getPrecision() == FloatingPointPrecision.DOUBLE =>
+      DoubleType
     case ArrowType.Utf8.INSTANCE => StringType
     case ArrowType.Binary.INSTANCE => BinaryType
     case ArrowType.LargeUtf8.INSTANCE => StringType
@@ -83,13 +85,15 @@ private[sql] object ArrowUtils {
     case d: ArrowType.Decimal => DecimalType(d.getPrecision, d.getScale)
     case date: ArrowType.Date if date.getUnit == DateUnit.DAY => DateType
     case ts: ArrowType.Timestamp
-      if ts.getUnit == TimeUnit.MICROSECOND && ts.getTimezone == null => TimestampNTZType
+        if ts.getUnit == TimeUnit.MICROSECOND && ts.getTimezone == null =>
+      TimestampNTZType
     case ts: ArrowType.Timestamp if ts.getUnit == TimeUnit.MICROSECOND => TimestampType
     case ArrowType.Null.INSTANCE => NullType
-    case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH => YearMonthIntervalType()
+    case yi: ArrowType.Interval if yi.getUnit == IntervalUnit.YEAR_MONTH =>
+      YearMonthIntervalType()
     case di: ArrowType.Duration if di.getUnit == TimeUnit.MICROSECOND => DayTimeIntervalType()
-    case ci: ArrowType.Interval
-      if ci.getUnit == IntervalUnit.MONTH_DAY_NANO => CalendarIntervalType
+    case ci: ArrowType.Interval if ci.getUnit == IntervalUnit.MONTH_DAY_NANO =>
+      CalendarIntervalType
     case _ => throw ExecutionErrors.unsupportedArrowTypeError(dt)
   }
 
@@ -103,41 +107,71 @@ private[sql] object ArrowUtils {
     dt match {
       case ArrayType(elementType, containsNull) =>
         val fieldType = new FieldType(nullable, ArrowType.List.INSTANCE, null)
-        new Field(name, fieldType,
-          Seq(toArrowField("element", elementType, containsNull, timeZoneId,
-            largeVarTypes)).asJava)
+        new Field(
+          name,
+          fieldType,
+          Seq(
+            toArrowField("element", elementType, containsNull, timeZoneId, largeVarTypes)).asJava)
       case StructType(fields) =>
         val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null)
-        new Field(name, fieldType,
-          fields.map { field =>
-            toArrowField(field.name, field.dataType, field.nullable, timeZoneId, largeVarTypes)
-          }.toImmutableArraySeq.asJava)
+        new Field(
+          name,
+          fieldType,
+          fields
+            .map { field =>
+              toArrowField(field.name, field.dataType, field.nullable, timeZoneId, largeVarTypes)
+            }
+            .toImmutableArraySeq
+            .asJava)
       case MapType(keyType, valueType, valueContainsNull) =>
         val mapType = new FieldType(nullable, new ArrowType.Map(false), null)
         // Note: Map Type struct can not be null, Struct Type key field can not be null
-        new Field(name, mapType,
-          Seq(toArrowField(MapVector.DATA_VECTOR_NAME,
-            new StructType()
-              .add(MapVector.KEY_NAME, keyType, nullable = false)
-              .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
-            nullable = false,
-            timeZoneId,
-            largeVarTypes)).asJava)
+        new Field(
+          name,
+          mapType,
+          Seq(
+            toArrowField(
+              MapVector.DATA_VECTOR_NAME,
+              new StructType()
+                .add(MapVector.KEY_NAME, keyType, nullable = false)
+                .add(MapVector.VALUE_NAME, valueType, nullable = valueContainsNull),
+              nullable = false,
+              timeZoneId,
+              largeVarTypes)).asJava)
       case udt: UserDefinedType[_] =>
         toArrowField(name, udt.sqlType, nullable, timeZoneId, largeVarTypes)
       case _: VariantType =>
-        val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null,
+        val fieldType = new FieldType(nullable, ArrowType.Struct.INSTANCE, null)
+        // The metadata field is tagged with additional metadata so we can identify that the arrow
+        // struct actually represents a variant schema.
+        val metadataFieldType = new FieldType(
+          false,
+          toArrowType(BinaryType, timeZoneId, largeVarTypes),
+          null,
           Map("variant" -> "true").asJava)
-        new Field(name, fieldType,
-          Seq(toArrowField("value", BinaryType, false, timeZoneId, largeVarTypes),
-            toArrowField("metadata", BinaryType, false, timeZoneId, largeVarTypes)).asJava)
+        new Field(
+          name,
+          fieldType,
+          Seq(
+            toArrowField("value", BinaryType, false, timeZoneId, largeVarTypes),
+            new Field("metadata", metadataFieldType, Seq.empty[Field].asJava)).asJava)
       case dataType =>
-        val fieldType = new FieldType(nullable, toArrowType(dataType, timeZoneId,
-          largeVarTypes), null)
+        val fieldType =
+          new FieldType(nullable, toArrowType(dataType, timeZoneId, largeVarTypes), null)
         new Field(name, fieldType, Seq.empty[Field].asJava)
     }
   }
 
+  def isVariantField(field: Field): Boolean = {
+    assert(field.getType.isInstanceOf[ArrowType.Struct])
+    field.getChildren.asScala
+      .map(_.getName)
+      .asJava
+      .containsAll(Seq("value", "metadata").asJava) && field.getChildren.asScala.exists { child =>
+      child.getName == "metadata" && child.getMetadata.getOrDefault("variant", "false") == "true"
+    }
+  }
+
   def fromArrowField(field: Field): DataType = {
     field.getType match {
       case _: ArrowType.Map =>
@@ -149,9 +183,7 @@ private[sql] object ArrowUtils {
         val elementField = field.getChildren().get(0)
         val elementType = fromArrowField(elementField)
         ArrayType(elementType, containsNull = elementField.isNullable)
-      case ArrowType.Struct.INSTANCE if field.getMetadata.getOrDefault("variant", "") == "true"
-        && field.getChildren.asScala.map(_.getName).asJava
-          .containsAll(Seq("value", "metadata").asJava) =>
+      case ArrowType.Struct.INSTANCE if isVariantField(field) =>
         VariantType
       case ArrowType.Struct.INSTANCE =>
         val fields = field.getChildren().asScala.map { child =>
@@ -163,7 +195,9 @@ private[sql] object ArrowUtils {
     }
   }
 
-  /** Maps schema from Spark to Arrow. NOTE: timeZoneId required for TimestampType in StructType */
+  /**
+   * Maps schema from Spark to Arrow. NOTE: timeZoneId required for TimestampType in StructType
+   */
   def toArrowSchema(
       schema: StructType,
       timeZoneId: String,
@@ -187,14 +221,17 @@ private[sql] object ArrowUtils {
   }
 
   private def deduplicateFieldNames(
-      dt: DataType, errorOnDuplicatedFieldNames: Boolean): DataType = dt match {
-    case udt: UserDefinedType[_] => deduplicateFieldNames(udt.sqlType, errorOnDuplicatedFieldNames)
+      dt: DataType,
+      errorOnDuplicatedFieldNames: Boolean): DataType = dt match {
+    case udt: UserDefinedType[_] =>
+      deduplicateFieldNames(udt.sqlType, errorOnDuplicatedFieldNames)
     case st @ StructType(fields) =>
       val newNames = if (st.names.toSet.size == st.names.length) {
         st.names
       } else {
         if (errorOnDuplicatedFieldNames) {
-          throw ExecutionErrors.duplicatedFieldNameInArrowStructError(st.names.toImmutableArraySeq)
+          throw ExecutionErrors.duplicatedFieldNameInArrowStructError(
+            st.names.toImmutableArraySeq)
         }
         val genNawName = st.names.groupBy(identity).map {
           case (name, names) if names.length > 1 =>
@@ -207,7 +244,10 @@ private[sql] object ArrowUtils {
       val newFields =
         fields.zip(newNames).map { case (StructField(_, dataType, nullable, metadata), name) =>
           StructField(
-            name, deduplicateFieldNames(dataType, errorOnDuplicatedFieldNames), nullable, metadata)
+            name,
+            deduplicateFieldNames(dataType, errorOnDuplicatedFieldNames),
+            nullable,
+            metadata)
         }
       StructType(newFields)
     case ArrayType(elementType, containsNull) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/util/ArtifactUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ArtifactUtils.scala
similarity index 97%
rename from sql/core/src/main/scala/org/apache/spark/sql/artifact/util/ArtifactUtils.scala
rename to sql/api/src/main/scala/org/apache/spark/sql/util/ArtifactUtils.scala
index f16d01501d7cf..8cd239b55cff0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/artifact/util/ArtifactUtils.scala
+++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ArtifactUtils.scala
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql.artifact.util
+package org.apache.spark.sql.util
 
 import java.nio.file.{Path, Paths}
 
diff --git a/sql/api/src/main/scala/org/apache/spark/sql/util/ProtobufUtils.scala b/sql/api/src/main/scala/org/apache/spark/sql/util/ProtobufUtils.scala
new file mode 100644
index 0000000000000..11f35ceb060c0
--- /dev/null
+++ b/sql/api/src/main/scala/org/apache/spark/sql/util/ProtobufUtils.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.util
+
+import java.io.{File, FileNotFoundException}
+import java.nio.file.NoSuchFileException
+
+import scala.util.control.NonFatal
+
+import org.apache.commons.io.FileUtils
+
+import org.apache.spark.sql.errors.CompilationErrors
+
+object ProtobufUtils {
+  def readDescriptorFileContent(filePath: String): Array[Byte] = {
+    try {
+      FileUtils.readFileToByteArray(new File(filePath))
+    } catch {
+      case ex: FileNotFoundException =>
+        throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
+      case ex: NoSuchFileException =>
+        throw CompilationErrors.cannotFindDescriptorFileError(filePath, ex)
+      case NonFatal(ex) => throw CompilationErrors.descriptorParseError(ex)
+    }
+  }
+}
diff --git a/sql/api/src/test/scala/org/apache/spark/sql/api/SparkSessionBuilderImplementationBindingSuite.scala b/sql/api/src/test/scala/org/apache/spark/sql/api/SparkSessionBuilderImplementationBindingSuite.scala
new file mode 100644
index 0000000000000..84b6b85f639a3
--- /dev/null
+++ b/sql/api/src/test/scala/org/apache/spark/sql/api/SparkSessionBuilderImplementationBindingSuite.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.api
+
+// scalastyle:off funsuite
+import org.scalatest.BeforeAndAfterAll
+import org.scalatest.funsuite.AnyFunSuite
+
+import org.apache.spark.sql.functions.sum
+
+/**
+ * Test suite for SparkSession implementation binding.
+ */
+trait SparkSessionBuilderImplementationBindingSuite extends AnyFunSuite with BeforeAndAfterAll {
+// scalastyle:on
+  protected def configure(builder: SparkSessionBuilder): builder.type = builder
+
+  test("range") {
+    val session = configure(SparkSession.builder()).getOrCreate()
+    import session.implicits._
+    val df = session.range(10).agg(sum("id")).as[Long]
+    assert(df.head() == 45)
+  }
+}
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
index 0f3cb3d9d3ac2..290568730a22c 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1028           1028           1        130.6           7.7       1.0X
+Call setInterval & getInterval                     1028           1028           0        130.5           7.7       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
index 28e1630ae9624..526008a3fced1 100644
--- a/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/CalendarIntervalBenchmark-results.txt
@@ -2,10 +2,10 @@
 CalendarInterval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 CalendarInterval:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Call setInterval & getInterval                     1092           1093           1        122.9           8.1       1.0X
+Call setInterval & getInterval                     1018           1021           4        131.8           7.6       1.0X
 
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
index 0c5014db37346..b5635dcb20d33 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-jdk21-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1391.6           0.7       1.0X
-Use EnumSet                                           2              2           0        441.1           2.3       0.3X
+Use HashSet                                           1              1           0       1390.0           0.7       1.0X
+Use EnumSet                                           2              2           0        441.2           2.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        494.0           2.0       1.0X
-Use EnumSet                                           2              2           0        564.3           1.8       1.1X
+Use HashSet                                           2              2           0        485.8           2.1       1.0X
+Use EnumSet                                           2              2           0        544.4           1.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        486.6           2.1       1.0X
-Use EnumSet                                           2              2           0        502.8           2.0       1.0X
+Use HashSet                                           2              2           0        493.1           2.0       1.0X
+Use EnumSet                                           2              2           0        575.2           1.7       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9              9           0        114.6           8.7       1.0X
-Use EnumSet                                           2              2           0        424.0           2.4       3.7X
+Use HashSet                                           9              9           0        107.2           9.3       1.0X
+Use EnumSet                                           2              2           0        534.9           1.9       5.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0        100.0          10.0       1.0X
-Use EnumSet                                           2              2           0        423.9           2.4       4.2X
+Use HashSet                                          10             10           0         98.5          10.1       1.0X
+Use EnumSet                                           2              2           0        534.9           1.9       5.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        407.9           2.5       1.0X
-Use EnumSet                                           1              1           0        136.9           7.3       0.3X
+Use HashSet                                           0              0           0        408.8           2.4       1.0X
+Use EnumSet                                           1              1           0        136.6           7.3       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0        102.8           9.7       1.0X
-Use EnumSet                                           0              0           0        291.7           3.4       2.8X
+Use HashSet                                           1              1           0        102.5           9.8       1.0X
+Use EnumSet                                           0              0           0        291.4           3.4       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         10.5          95.5       1.0X
-Use EnumSet                                           1              1           0        132.6           7.5      12.7X
+Use HashSet                                           7              7           0         14.6          68.6       1.0X
+Use EnumSet                                           1              1           0        132.3           7.6       9.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          30             30           0          3.3         300.1       1.0X
-Use EnumSet                                           1              1           0        144.8           6.9      43.5X
+Use HashSet                                          34             35           0          2.9         342.4       1.0X
+Use EnumSet                                           1              1           0        150.1           6.7      51.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          59             61           1          1.7         594.5       1.0X
-Use EnumSet                                           1              1           0        129.9           7.7      77.2X
+Use HashSet                                          63             63           1          1.6         627.2       1.0X
+Use EnumSet                                           1              1           0        138.3           7.2      86.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        230.7           4.3       1.0X
-Use EnumSet                                           6              6           0        179.2           5.6       0.8X
+Use HashSet                                           4              4           0        227.4           4.4       1.0X
+Use EnumSet                                           5              5           0        187.2           5.3       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          13             14           0         75.4          13.3       1.0X
-Use EnumSet                                           7              7           0        147.3           6.8       2.0X
+Use HashSet                                          14             14           0         70.4          14.2       1.0X
+Use EnumSet                                           7              7           0        150.5           6.6       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          27             28           1         37.6          26.6       1.0X
-Use EnumSet                                           7              7           0        149.6           6.7       4.0X
+Use HashSet                                          27             28           0         36.6          27.3       1.0X
+Use EnumSet                                           7              7           0        151.3           6.6       4.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          48             49           1         20.7          48.3       1.0X
-Use EnumSet                                           7              7           0        147.4           6.8       7.1X
+Use HashSet                                          54             55           1         18.4          54.4       1.0X
+Use EnumSet                                           7              7           0        147.6           6.8       8.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           79             80           1         12.6          79.2       1.0X
-Use EnumSet                                            7              7           0        140.3           7.1      11.1X
+Use HashSet                                           84             85           0         11.9          83.9       1.0X
+Use EnumSet                                            7              7           0        137.2           7.3      11.5X
 
diff --git a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
index 1714661841022..1794f82b64b11 100644
--- a/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EnumTypeSetBenchmark-results.txt
@@ -1,105 +1,105 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use empty Set:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           1              1           0       1391.5           0.7       1.0X
-Use EnumSet                                           2              2           0        503.5           2.0       0.4X
+Use HashSet                                           1              1           0       1441.2           0.7       1.0X
+Use EnumSet                                           2              2           0        563.7           1.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 1 item Set:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        509.3           2.0       1.0X
-Use EnumSet                                           2              2           0        488.5           2.0       1.0X
+Use HashSet                                           2              2           0        445.8           2.2       1.0X
+Use EnumSet                                           2              2           0        554.4           1.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 3 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0        501.9           2.0       1.0X
-Use EnumSet                                           2              2           0        564.7           1.8       1.1X
+Use HashSet                                           2              2           0        547.8           1.8       1.0X
+Use EnumSet                                           2              2           0        561.3           1.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 5 items Set:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           8              8           0        122.9           8.1       1.0X
-Use EnumSet                                           2              2           0        545.7           1.8       4.4X
+Use HashSet                                           8              8           0        121.9           8.2       1.0X
+Use EnumSet                                           2              2           0        545.1           1.8       4.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test contains use 10 items Set:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           9              9           0        107.8           9.3       1.0X
-Use EnumSet                                           2              2           0        545.7           1.8       5.1X
+Use HashSet                                           9              9           0        110.1           9.1       1.0X
+Use EnumSet                                           2              2           0        545.0           1.8       5.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create empty Set:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           0              0           0        395.7           2.5       1.0X
-Use EnumSet                                           1              1           0        132.7           7.5       0.3X
+Use HashSet                                           0              0           0        409.8           2.4       1.0X
+Use EnumSet                                           1              1           0        127.6           7.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 1 item Set:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           2              2           0         59.6          16.8       1.0X
-Use EnumSet                                           1              1           0        151.1           6.6       2.5X
+Use HashSet                                           2              2           0         60.0          16.7       1.0X
+Use EnumSet                                           1              1           0        145.0           6.9       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 3 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          10             10           0         10.2          98.3       1.0X
-Use EnumSet                                           1              1           0        132.4           7.6      13.0X
+Use HashSet                                          10             10           0         10.2          97.7       1.0X
+Use EnumSet                                           1              1           0        137.8           7.3      13.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 5 items Set:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          35             36           1          2.8         351.5       1.0X
-Use EnumSet                                           1              1           0        132.4           7.6      46.5X
+Use HashSet                                          33             33           1          3.1         327.8       1.0X
+Use EnumSet                                           1              1           0        137.9           7.3      45.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create 10 items Set:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          61             62           1          1.6         607.7       1.0X
-Use EnumSet                                           1              1           0        127.2           7.9      77.3X
+Use HashSet                                          60             60           0          1.7         596.5       1.0X
+Use EnumSet                                           1              1           0        131.7           7.6      78.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use empty Set:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           4              4           0        248.3           4.0       1.0X
-Use EnumSet                                           5              5           0        188.3           5.3       0.8X
+Use HashSet                                           4              4           0        250.2           4.0       1.0X
+Use EnumSet                                           5              5           0        190.5           5.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 1 item Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          11             11           0         92.9          10.8       1.0X
-Use EnumSet                                           7              7           0        141.6           7.1       1.5X
+Use HashSet                                          13             14           0         74.9          13.3       1.0X
+Use EnumSet                                           7              7           0        148.9           6.7       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 3 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          32             32           0         31.7          31.5       1.0X
-Use EnumSet                                           6              7           0        154.0           6.5       4.9X
+Use HashSet                                          32             33           1         31.6          31.7       1.0X
+Use EnumSet                                           7              7           0        150.4           6.7       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 5 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                          58             59           1         17.2          58.1       1.0X
-Use EnumSet                                           7              7           0        140.3           7.1       8.2X
+Use HashSet                                          55             58           9         18.2          55.1       1.0X
+Use EnumSet                                           7              7           0        146.6           6.8       8.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test create and contains use 10 items Set:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Use HashSet                                           84             85           1         11.9          83.8       1.0X
-Use EnumSet                                            7              7           0        144.7           6.9      12.1X
+Use HashSet                                           82             83           1         12.1          82.3       1.0X
+Use EnumSet                                            7              7           0        145.2           6.9      12.0X
 
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
index 3d16c874e8c9b..73f125fc87862 100644
--- a/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-jdk21-results.txt
@@ -2,23 +2,23 @@
 Escape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             6996           7009           9          0.1        6996.5       1.0X
-New                                                 771            776           3          1.3         770.7       9.1X
+Legacy                                             7441           7453          11          0.1        7440.7       1.0X
+New                                                 768            770           1          1.3         768.3       9.7X
 
 
 ================================================================================================
 Unescape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             5127           5137           6          0.2        5127.3       1.0X
-New                                                 579            583           4          1.7         579.3       8.9X
+Legacy                                             4446           4454           5          0.2        4446.4       1.0X
+New                                                 605            611           3          1.7         605.1       7.3X
 
 
diff --git a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
index 7cfa134652c27..87f5177d28715 100644
--- a/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/EscapePathBenchmark-results.txt
@@ -2,23 +2,23 @@
 Escape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Escape Tests:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             6966           6978          12          0.1        6965.9       1.0X
-New                                                 725            730           4          1.4         725.4       9.6X
+Legacy                                             6851           6867           7          0.1        6850.8       1.0X
+New                                                 741            755          38          1.3         741.0       9.2X
 
 
 ================================================================================================
 Unescape
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Unescape Tests:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Legacy                                             6665           6677          11          0.2        6664.6       1.0X
-New                                                 602            606           2          1.7         602.1      11.1X
+Legacy                                             5810           5823          15          0.2        5809.8       1.0X
+New                                                 597            602           5          1.7         596.6       9.7X
 
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
index f5ac49b25f6e1..b09cc75270118 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-jdk21-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           1       1619.9           0.6       1.0X
-arrayOfAnyAsObject                                    6              6           0       1619.8           0.6       1.0X
-arrayOfAnyAsSeq                                     215            216           1         46.5          21.5       0.0X
-arrayOfInt                                          270            271           1         37.0          27.0       0.0X
-arrayOfIntAsObject                                  250            251           1         40.0          25.0       0.0X
+arrayOfAny                                            3              3           0       3243.9           0.3       1.0X
+arrayOfAnyAsObject                                    3              3           0       3243.9           0.3       1.0X
+arrayOfAnyAsSeq                                     225            230           6         44.5          22.5       0.0X
+arrayOfInt                                          273            278           5         36.7          27.3       0.0X
+arrayOfIntAsObject                                  274            278           3         36.5          27.4       0.0X
 
diff --git a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
index 5431cc0ccd8bb..56d0a136c2933 100644
--- a/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/GenericArrayDataBenchmark-results.txt
@@ -1,10 +1,10 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 constructor:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-arrayOfAny                                            6              6           0       1620.1           0.6       1.0X
-arrayOfAnyAsObject                                    6              6           0       1620.1           0.6       1.0X
-arrayOfAnyAsSeq                                     155            155           1         64.7          15.5       0.0X
-arrayOfInt                                          253            254           1         39.6          25.3       0.0X
-arrayOfIntAsObject                                  252            253           1         39.7          25.2       0.0X
+arrayOfAny                                            6              6           0       1619.9           0.6       1.0X
+arrayOfAnyAsObject                                    6              6           0       1619.9           0.6       1.0X
+arrayOfAnyAsSeq                                     157            158           1         63.5          15.7       0.0X
+arrayOfInt                                          252            254           4         39.6          25.2       0.0X
+arrayOfIntAsObject                                  252            253           2         39.6          25.2       0.0X
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
index 1a1d7bb5627e0..d246505fc26ca 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2149           2153           5        249.8           4.0       1.0X
-codegen version                                    3579           3579           1        150.0           6.7       0.6X
-codegen version 64-bit                             3401           3403           2        157.8           6.3       0.6X
-codegen HiveHash version                           2799           2802           5        191.8           5.2       0.8X
+interpreted version                                2151           2154           4        249.6           4.0       1.0X
+codegen version                                    3580           3597          23        149.9           6.7       0.6X
+codegen version 64-bit                             3385           3408          33        158.6           6.3       0.6X
+codegen HiveHash version                           2884           2886           2        186.1           5.4       0.7X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2761           2793          45        194.4           5.1       1.0X
-codegen version                                    5093           5095           4        105.4           9.5       0.5X
-codegen version 64-bit                             4112           4115           4        130.6           7.7       0.7X
-codegen HiveHash version                           3215           3216           1        167.0           6.0       0.9X
+interpreted version                                2710           2711           1        198.1           5.0       1.0X
+codegen version                                    5082           5083           0        105.6           9.5       0.5X
+codegen version 64-bit                             3962           3964           3        135.5           7.4       0.7X
+codegen HiveHash version                           3309           3310           1        162.2           6.2       0.8X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1462           1462           1          1.4         696.9       1.0X
-codegen version                                    1868           1868           1          1.1         890.7       0.8X
-codegen version 64-bit                              732            734           1          2.9         349.0       2.0X
-codegen HiveHash version                           3733           3734           2          0.6        1780.0       0.4X
+interpreted version                                1425           1425           1          1.5         679.5       1.0X
+codegen version                                    1648           1648           1          1.3         785.8       0.9X
+codegen version 64-bit                              725            726           2          2.9         345.7       2.0X
+codegen HiveHash version                           3675           3677           3          0.6        1752.2       0.4X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                1084           1084           0          0.1        8269.4       1.0X
-codegen version                                    3681           3688          10          0.0       28080.6       0.3X
-codegen version 64-bit                             2527           2527           0          0.1       19280.1       0.4X
-codegen HiveHash version                            810            810           0          0.2        6178.0       1.3X
+interpreted version                                1018           1019           1          0.1        7768.0       1.0X
+codegen version                                    3632           3633           2          0.0       27706.7       0.3X
+codegen version 64-bit                             2340           2342           3          0.1       17849.7       0.4X
+codegen HiveHash version                            750            751           1          0.2        5721.5       1.4X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         84.9          11.8       1.0X
-codegen version                                     260            260           0          0.0       63397.9       0.0X
-codegen version 64-bit                              176            176           0          0.0       43056.2       0.0X
-codegen HiveHash version                             29             29           0          0.1        6968.9       0.0X
+interpreted version                                   0              0           0         97.3          10.3       1.0X
+codegen version                                     249            249           1          0.0       60732.6       0.0X
+codegen version 64-bit                              169            170           1          0.0       41356.6       0.0X
+codegen HiveHash version                             27             28           0          0.1        6709.4       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashBenchmark-results.txt b/sql/catalyst/benchmarks/HashBenchmark-results.txt
index a864b60913439..571a8a1d82881 100644
--- a/sql/catalyst/benchmarks/HashBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashBenchmark-results.txt
@@ -2,69 +2,69 @@
 single ints
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single ints:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2174           2175           1        246.9           4.1       1.0X
-codegen version                                    3591           3602          17        149.5           6.7       0.6X
-codegen version 64-bit                             3475           3475           0        154.5           6.5       0.6X
-codegen HiveHash version                           2849           2852           4        188.5           5.3       0.8X
+interpreted version                                2276           2276           0        235.9           4.2       1.0X
+codegen version                                    3664           3669           7        146.5           6.8       0.6X
+codegen version 64-bit                             3478           3483           6        154.3           6.5       0.7X
+codegen HiveHash version                           3008           3010           3        178.5           5.6       0.8X
 
 
 ================================================================================================
 single longs
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For single longs:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                3000           3001           2        179.0           5.6       1.0X
-codegen version                                    5207           5220          17        103.1           9.7       0.6X
-codegen version 64-bit                             3619           3645          36        148.3           6.7       0.8X
-codegen HiveHash version                           3408           3456          69        157.6           6.3       0.9X
+interpreted version                                3006           3007           2        178.6           5.6       1.0X
+codegen version                                    5317           5322           7        101.0           9.9       0.6X
+codegen version 64-bit                             3761           3765           6        142.8           7.0       0.8X
+codegen HiveHash version                           3401           3429          41        157.9           6.3       0.9X
 
 
 ================================================================================================
 normal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For normal:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                2521           2524           3          0.8        1202.3       1.0X
-codegen version                                    2232           2232           0          0.9        1064.4       1.1X
-codegen version 64-bit                              700            701           2          3.0         333.8       3.6X
-codegen HiveHash version                           3672           3682          14          0.6        1750.8       0.7X
+interpreted version                                2549           2549           0          0.8        1215.4       1.0X
+codegen version                                    2291           2292           1          0.9        1092.5       1.1X
+codegen version 64-bit                              724            726           2          2.9         345.4       3.5X
+codegen HiveHash version                           3719           3726          10          0.6        1773.2       0.7X
 
 
 ================================================================================================
 array
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For array:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                 971            976           4          0.1        7410.9       1.0X
-codegen version                                    3558           3582          34          0.0       27147.3       0.3X
-codegen version 64-bit                             2357           2363           9          0.1       17985.0       0.4X
-codegen HiveHash version                            721            723           4          0.2        5497.9       1.3X
+interpreted version                                 990            992           4          0.1        7549.7       1.0X
+codegen version                                    3619           3619           0          0.0       27611.8       0.3X
+codegen version 64-bit                             2385           2386           0          0.1       18199.3       0.4X
+codegen HiveHash version                            727            727           0          0.2        5543.0       1.4X
 
 
 ================================================================================================
 map
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash For map:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-interpreted version                                   0              0           0         90.3          11.1       1.0X
-codegen version                                     213            214           0          0.0       52051.3       0.0X
-codegen version 64-bit                              144            144           1          0.0       35164.7       0.0X
-codegen HiveHash version                             24             24           3          0.2        5812.5       0.0X
+interpreted version                                   0              0           0        101.8           9.8       1.0X
+codegen version                                     240            240           0          0.0       58478.2       0.0X
+codegen version 64-bit                              169            170           0          0.0       41373.6       0.0X
+codegen HiveHash version                             29             29           0          0.1        7006.4       0.0X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
index f7dc5d3a8a87d..4e4d54c6da6fe 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-jdk21-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       10             10           0        206.8           4.8       1.0X
-xxHash 64-bit                                        10             10           0        201.8           5.0       1.0X
+Murmur3_x86_32                                       10             10           0        205.6           4.9       1.0X
+xxHash 64-bit                                        11             11           0        190.0           5.3       0.9X
 HiveHasher                                           14             14           0        152.3           6.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       14             14           0        145.8           6.9       1.0X
-xxHash 64-bit                                        12             12           0        169.6           5.9       1.2X
-HiveHasher                                           23             23           0         92.0          10.9       0.6X
+Murmur3_x86_32                                       14             15           0        145.8           6.9       1.0X
+xxHash 64-bit                                        13             13           0        161.0           6.2       1.1X
+HiveHasher                                           23             23           1         92.4          10.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       20             20           0        104.5           9.6       1.0X
-xxHash 64-bit                                        15             15           0        144.0           6.9       1.4X
-HiveHasher                                           33             34           1         62.9          15.9       0.6X
+Murmur3_x86_32                                       20             20           0        104.9           9.5       1.0X
+xxHash 64-bit                                        15             15           0        139.7           7.2       1.3X
+HiveHasher                                           34             34           0         61.9          16.1       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       31             31           0         68.3          14.7       1.0X
-xxHash 64-bit                                        27             27           0         77.5          12.9       1.1X
-HiveHasher                                           43             43           0         48.3          20.7       0.7X
+Murmur3_x86_32                                       31             31           0         68.5          14.6       1.0X
+xxHash 64-bit                                        26             26           0         80.0          12.5       1.2X
+HiveHasher                                           45             45           1         46.9          21.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       68             69           0         30.6          32.6       1.0X
-xxHash 64-bit                                        57             58           0         36.5          27.4       1.2X
-HiveHasher                                          156            156           1         13.4          74.5       0.4X
+Murmur3_x86_32                                       68             70           3         30.7          32.6       1.0X
+xxHash 64-bit                                        57             57           0         36.8          27.2       1.2X
+HiveHasher                                          158            158           0         13.3          75.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      205            208           1         10.2          97.9       1.0X
-xxHash 64-bit                                       102            102           0         20.6          48.4       2.0X
-HiveHasher                                          529            530           0          4.0         252.5       0.4X
+Murmur3_x86_32                                      208            210           3         10.1          99.2       1.0X
+xxHash 64-bit                                       102            103           0         20.5          48.8       2.0X
+HiveHasher                                          531            532           0          3.9         253.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      713            713           1          2.9         339.8       1.0X
-xxHash 64-bit                                       292            293           0          7.2         139.4       2.4X
-HiveHasher                                         2030           2030           0          1.0         967.8       0.4X
+Murmur3_x86_32                                      711            718          12          3.0         338.9       1.0X
+xxHash 64-bit                                       296            298           4          7.1         141.0       2.4X
+HiveHasher                                         2031           2032           2          1.0         968.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1379           1381           2          1.5         657.7       1.0X
-xxHash 64-bit                                       559            564           9          3.8         266.5       2.5X
-HiveHasher                                         4022           4024           4          0.5        1917.6       0.3X
+Murmur3_x86_32                                     1398           1401           5          1.5         666.7       1.0X
+xxHash 64-bit                                       552            553           1          3.8         263.3       2.5X
+HiveHasher                                         4026           4026           0          0.5        1919.5       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     5701           5704           5          0.4        2718.2       1.0X
-xxHash 64-bit                                      2067           2068           1          1.0         985.6       2.8X
-HiveHasher                                        15981          15982           1          0.1        7620.3       0.4X
+Murmur3_x86_32                                     5707           5709           3          0.4        2721.3       1.0X
+xxHash 64-bit                                      2074           2074           1          1.0         988.7       2.8X
+HiveHasher                                        15993          15993           0          0.1        7626.2       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
index 6c649e7b0d42d..236b9e5b404d4 100644
--- a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark for MurMurHash 3 and xxHash64
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 Murmur3_x86_32                                       11             11           0        184.1           5.4       1.0X
 xxHash 64-bit                                        10             10           0        214.5           4.7       1.2X
-HiveHasher                                           14             14           0        146.3           6.8       0.8X
+HiveHasher                                           14             14           0        150.9           6.6       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 16:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 Murmur3_x86_32                                       17             17           0        123.5           8.1       1.0X
-xxHash 64-bit                                        12             12           0        176.5           5.7       1.4X
-HiveHasher                                           24             25           1         85.7          11.7       0.7X
+xxHash 64-bit                                        12             12           0        176.1           5.7       1.4X
+HiveHasher                                           22             23           0         93.3          10.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 24:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       23             24           0         89.5          11.2       1.0X
-xxHash 64-bit                                        14             14           0        146.1           6.8       1.6X
-HiveHasher                                           35             35           0         59.8          16.7       0.7X
+Murmur3_x86_32                                       23             24           0         89.4          11.2       1.0X
+xxHash 64-bit                                        14             14           0        145.9           6.9       1.6X
+HiveHasher                                           33             33           0         63.2          15.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 31:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       35             36           0         59.1          16.9       1.0X
-xxHash 64-bit                                        27             28           0         76.3          13.1       1.3X
-HiveHasher                                           45             45           0         47.0          21.3       0.8X
+Murmur3_x86_32                                       36             36           0         58.7          17.0       1.0X
+xxHash 64-bit                                        27             28           0         76.4          13.1       1.3X
+HiveHasher                                           42             44           5         49.4          20.2       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 95:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                       87             87           0         24.2          41.4       1.0X
-xxHash 64-bit                                        63             64           0         33.0          30.3       1.4X
-HiveHasher                                          160            160           0         13.1          76.1       0.5X
+Murmur3_x86_32                                       87             87           0         24.2          41.3       1.0X
+xxHash 64-bit                                        61             62           0         34.1          29.3       1.4X
+HiveHasher                                          158            158           0         13.3          75.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 287:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      243            243           0          8.6         115.8       1.0X
-xxHash 64-bit                                       122            122           0         17.2          58.2       2.0X
-HiveHasher                                          533            534           0          3.9         254.4       0.5X
+Murmur3_x86_32                                      244            244           0          8.6         116.3       1.0X
+xxHash 64-bit                                       117            117           1         18.0          55.6       2.1X
+HiveHasher                                          531            531           0          3.9         253.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 1055:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                      872            873           1          2.4         415.9       1.0X
-xxHash 64-bit                                       397            398           1          5.3         189.4       2.2X
-HiveHasher                                         2036           2036           0          1.0         970.7       0.4X
+Murmur3_x86_32                                      873            873           0          2.4         416.1       1.0X
+xxHash 64-bit                                       387            388           1          5.4         184.6       2.3X
+HiveHasher                                         2032           2032           0          1.0         968.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 2079:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     1704           1713          14          1.2         812.4       1.0X
-xxHash 64-bit                                       776            778           4          2.7         370.0       2.2X
-HiveHasher                                         4028           4029           1          0.5        1920.9       0.4X
+Murmur3_x86_32                                     1704           1704           0          1.2         812.5       1.0X
+xxHash 64-bit                                       762            763           1          2.8         363.2       2.2X
+HiveHasher                                         4024           4024           0          0.5        1918.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Hash byte arrays with length 8223:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Murmur3_x86_32                                     6698           6699           2          0.3        3194.0       1.0X
-xxHash 64-bit                                      3021           3021           0          0.7        1440.4       2.2X
-HiveHasher                                        15982          15984           3          0.1        7620.8       0.4X
+Murmur3_x86_32                                     6702           6703           1          0.3        3195.8       1.0X
+xxHash 64-bit                                      2999           3003           6          0.7        1429.8       2.2X
+HiveHasher                                        15981          15981           1          0.1        7620.1       0.4X
 
 
diff --git a/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
index afa3efa7a919b..c1b127d9e7884 100644
--- a/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/HexBenchmark-jdk21-results.txt
@@ -2,13 +2,13 @@
 UnHex Comparison
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Common Codecs                                      4755           4766          13          0.2        4755.0       1.0X
-Java                                               4018           4048          45          0.2        4018.3       1.2X
-Spark                                              3473           3476           3          0.3        3472.8       1.4X
-Spark Binary                                       2625           2628           3          0.4        2624.6       1.8X
+Common Codecs                                      4444           4451          11          0.2        4444.1       1.0X
+Java                                               5500           5533          41          0.2        5500.5       0.8X
+Spark                                              3466           3472           6          0.3        3466.0       1.3X
+Spark Binary                                       2625           2627           2          0.4        2625.3       1.7X
 
 
diff --git a/sql/catalyst/benchmarks/HexBenchmark-results.txt b/sql/catalyst/benchmarks/HexBenchmark-results.txt
index 55a6a07fed406..c544346c34d33 100644
--- a/sql/catalyst/benchmarks/HexBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/HexBenchmark-results.txt
@@ -2,13 +2,13 @@
 UnHex Comparison
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1021-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Cardinality 1000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Common Codecs                                      4881           4897          25          0.2        4880.8       1.0X
-Java                                               4220           4226           9          0.2        4220.0       1.2X
-Spark                                              3954           3956           2          0.3        3954.5       1.2X
-Spark Binary                                       2738           2750          11          0.4        2737.9       1.8X
+Common Codecs                                      4794           4800           6          0.2        4793.6       1.0X
+Java                                               4247           4262          16          0.2        4247.2       1.1X
+Spark                                              3957           3963           8          0.3        3957.5       1.2X
+Spark Binary                                       2743           2745           2          0.4        2743.4       1.7X
 
 
diff --git a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..1cdf1d8e42753
--- /dev/null
+++ b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-jdk21-results.txt
@@ -0,0 +1,7 @@
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+internal row comparable wrapper:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+toSet                                               100            102           3          2.0         500.5       1.0X
+mergePartitions                                     183            185           2          1.1         913.5       0.5X
+
diff --git a/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt
new file mode 100644
index 0000000000000..b920e5255016e
--- /dev/null
+++ b/sql/catalyst/benchmarks/InternalRowComparableWrapperBenchmark-results.txt
@@ -0,0 +1,7 @@
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+internal row comparable wrapper:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+toSet                                               113            115           3          1.8         566.2       1.0X
+mergePartitions                                     206            208           2          1.0        1030.5       0.5X
+
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
index 650028b464207..384cce30b67aa 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-jdk21-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1326           1327           1        202.4           4.9       1.0X
-single nullable long                               2360           2374          19        113.7           8.8       0.6X
-7 primitive types                                  7076           7081           8         37.9          26.4       0.2X
-7 nullable primitive types                        10618          10621           5         25.3          39.6       0.1X
+single long                                        1327           1327           0        202.4           4.9       1.0X
+single nullable long                               2362           2377          22        113.6           8.8       0.6X
+7 primitive types                                  7062           7064           2         38.0          26.3       0.2X
+7 nullable primitive types                        10610          10625          21         25.3          39.5       0.1X
 
 
diff --git a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
index 066c5f9a6f82a..60c49d2917eb5 100644
--- a/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
+++ b/sql/catalyst/benchmarks/UnsafeProjectionBenchmark-results.txt
@@ -2,13 +2,13 @@
 unsafe projection
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 unsafe projection:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-single long                                        1287           1290           4        208.5           4.8       1.0X
-single nullable long                               2432           2433           2        110.4           9.1       0.5X
-7 primitive types                                  6968           6970           3         38.5          26.0       0.2X
-7 nullable primitive types                        10256          10290          48         26.2          38.2       0.1X
+single long                                        1363           1363           1        197.0           5.1       1.0X
+single nullable long                               2454           2456           2        109.4           9.1       0.6X
+7 primitive types                                  6944           6946           2         38.7          25.9       0.2X
+7 nullable primitive types                        10300          10314          19         26.1          38.4       0.1X
 
 
diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml
index 2244ab1d3247f..aa1aa5f67a2a9 100644
--- a/sql/catalyst/pom.xml
+++ b/sql/catalyst/pom.xml
@@ -44,6 +44,12 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -72,6 +78,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
 
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -118,10 +134,6 @@
       <groupId>org.apache.ws.xmlschema</groupId>
       <artifactId>xmlschema-core</artifactId>
     </dependency>
-    <dependency>
-      <groupId>org.glassfish.jaxb</groupId>
-      <artifactId>txw2</artifactId>
-    </dependency>
     <dependency>
       <groupId>org.apache.datasketches</groupId>
       <artifactId>datasketches-java</artifactId>
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
new file mode 100644
index 0000000000000..5411aa684ea5f
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ArrayExpressionUtils.java
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions;
+
+import java.util.Arrays;
+import java.util.Comparator;
+
+import org.apache.spark.sql.catalyst.util.SQLOrderingUtil;
+
+public class ArrayExpressionUtils {
+
+  // comparator
+  // Boolean ascending nullable comparator
+  private static final Comparator<Boolean> booleanComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return o1.equals(o2) ? 0 : (o1 ? 1 : -1);
+  };
+
+  // Byte ascending nullable comparator
+  private static final Comparator<Byte> byteComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return Byte.compare(o1, o2);
+  };
+
+  // Short ascending nullable comparator
+  private static final Comparator<Short> shortComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return Short.compare(o1, o2);
+  };
+
+  // Integer ascending nullable comparator
+  private static final Comparator<Integer> integerComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return Integer.compare(o1, o2);
+  };
+
+  // Long ascending nullable comparator
+  private static final Comparator<Long> longComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return Long.compare(o1, o2);
+  };
+
+  // Float ascending nullable comparator
+  private static final Comparator<Float> floatComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return SQLOrderingUtil.compareFloats(o1, o2);
+  };
+
+  // Double ascending nullable comparator
+  private static final Comparator<Double> doubleComp = (o1, o2) -> {
+    if (o1 == null && o2 == null) {
+      return 0;
+    } else if (o1 == null) {
+      return -1;
+    } else if (o2 == null) {
+      return 1;
+    }
+    return SQLOrderingUtil.compareDoubles(o1, o2);
+  };
+
+  // boolean
+  // boolean non-nullable
+  public static int binarySearch(boolean[] data, boolean value) {
+    int low = 0;
+    int high = data.length - 1;
+
+    while (low <= high) {
+      int mid = (low + high) >>> 1;
+      boolean midVal = data[mid];
+
+      if (value == midVal) {
+        return mid; // key found
+      } else if (value) {
+        low = mid + 1;
+      } else {
+        high = mid - 1;
+      }
+    }
+
+    return -(low + 1);  // key not found.
+  }
+
+  // Boolean nullable
+  public static int binarySearch(Boolean[] data, Boolean value) {
+    return Arrays.binarySearch(data, value, booleanComp);
+  }
+
+  // byte
+  // byte non-nullable
+  public static int binarySearch(byte[] data, byte value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Byte nullable
+  public static int binarySearch(Byte[] data, Byte value) {
+    return Arrays.binarySearch(data, value, byteComp);
+  }
+
+  // short
+  // short non-nullable
+  public static int binarySearch(short[] data, short value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Short nullable
+  public static int binarySearch(Short[] data, Short value) {
+    return Arrays.binarySearch(data, value, shortComp);
+  }
+
+  // int
+  // int non-nullable
+  public static int binarySearch(int[] data, int value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Integer nullable
+  public static int binarySearch(Integer[] data, Integer value) {
+    return Arrays.binarySearch(data, value, integerComp);
+  }
+
+  // long
+  // long non-nullable
+  public static int binarySearch(long[] data, long value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Long nullable
+  public static int binarySearch(Long[] data, Long value) {
+    return Arrays.binarySearch(data, value, longComp);
+  }
+
+  // float
+  // float non-nullable
+  public static int binarySearch(float[] data, float value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Float nullable
+  public static int binarySearch(Float[] data, Float value) {
+    return Arrays.binarySearch(data, value, floatComp);
+  }
+
+  // double
+  // double non-nullable
+  public static int binarySearch(double[] data, double value) {
+    return Arrays.binarySearch(data, value);
+  }
+
+  // Double nullable
+  public static int binarySearch(Double[] data, Double value) {
+    return Arrays.binarySearch(data, value, doubleComp);
+  }
+
+  // Object
+  public static int binarySearch(Object[] data, Object value, Comparator<Object> comp) {
+    return Arrays.binarySearch(data, value, comp);
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
index 07a9409bc57a2..2fad36efe8cc1 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtils.java
@@ -17,20 +17,26 @@
 
 package org.apache.spark.sql.catalyst.expressions;
 
-import org.apache.spark.SparkBuildInfo;
-import org.apache.spark.sql.errors.QueryExecutionErrors;
-import org.apache.spark.unsafe.types.UTF8String;
-import org.apache.spark.util.VersionUtils;
-
-import javax.crypto.Cipher;
-import javax.crypto.spec.GCMParameterSpec;
-import javax.crypto.spec.IvParameterSpec;
-import javax.crypto.spec.SecretKeySpec;
 import java.nio.ByteBuffer;
 import java.security.GeneralSecurityException;
 import java.security.SecureRandom;
 import java.security.spec.AlgorithmParameterSpec;
+import java.text.BreakIterator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Locale;
+import javax.crypto.Cipher;
+import javax.crypto.spec.GCMParameterSpec;
+import javax.crypto.spec.IvParameterSpec;
+import javax.crypto.spec.SecretKeySpec;
 
+import org.apache.spark.SparkBuildInfo;
+import org.apache.spark.sql.catalyst.util.ArrayData;
+import org.apache.spark.sql.catalyst.util.GenericArrayData;
+import org.apache.spark.sql.errors.QueryExecutionErrors;
+import org.apache.spark.unsafe.types.UTF8String;
+import org.apache.spark.util.VersionUtils;
+import org.apache.spark.util.random.XORShiftRandom;
 
 /**
  * A utility class for constructing expressions.
@@ -272,4 +278,60 @@ private static byte[] aesInternal(
       throw QueryExecutionErrors.aesCryptoError(e.getMessage());
     }
   }
+
+  public static ArrayData getSentences(
+      UTF8String str,
+      UTF8String language,
+      UTF8String country) {
+    if (str == null) return null;
+    Locale locale;
+    if (language != null && country != null) {
+      locale = new Locale(language.toString(), country.toString());
+    } else if (language != null) {
+      locale = new Locale(language.toString());
+    } else {
+      locale = Locale.US;
+    }
+    String sentences = str.toString();
+    BreakIterator sentenceInstance = BreakIterator.getSentenceInstance(locale);
+    sentenceInstance.setText(sentences);
+
+    int sentenceIndex = 0;
+    List<GenericArrayData> res = new ArrayList<>();
+    while (sentenceInstance.next() != BreakIterator.DONE) {
+      String sentence = sentences.substring(sentenceIndex, sentenceInstance.current());
+      sentenceIndex = sentenceInstance.current();
+      BreakIterator wordInstance = BreakIterator.getWordInstance(locale);
+      wordInstance.setText(sentence);
+      int wordIndex = 0;
+      List<UTF8String> words = new ArrayList<>();
+      while (wordInstance.next() != BreakIterator.DONE) {
+        String word = sentence.substring(wordIndex, wordInstance.current());
+        wordIndex = wordInstance.current();
+        if (Character.isLetterOrDigit(word.charAt(0))) {
+          words.add(UTF8String.fromString(word));
+        }
+      }
+      res.add(new GenericArrayData(words.toArray(new UTF8String[0])));
+    }
+    return new GenericArrayData(res.toArray(new GenericArrayData[0]));
+  }
+
+  public static UTF8String randStr(XORShiftRandom rng, int length) {
+    byte[] bytes = new byte[length];
+    for (int i = 0; i < bytes.length; i++) {
+      // We generate a random number between 0 and 61, inclusive. Between the 62 different choices
+      // we choose 0-9, a-z, or A-Z, where each category comprises 10 choices, 26 choices, or 26
+      // choices, respectively (10 + 26 + 26 = 62).
+      int v = Math.abs(rng.nextInt() % 62);
+      if (v < 10) {
+        bytes[i] = (byte)('0' + v);
+      } else if (v < 36) {
+        bytes[i] = (byte)('a' + (v - 10));
+      } else {
+        bytes[i] = (byte)('A' + (v - 36));
+      }
+    }
+    return UTF8String.fromBytes(bytes);
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
index 3683507d6f72a..4200619d3c5f9 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionInfo.java
@@ -52,7 +52,7 @@ public class ExpressionInfo {
 
     private static final Set<String> validSources =
             new HashSet<>(Arrays.asList("built-in", "hive", "python_udf", "scala_udf",
-                    "java_udf", "python_udtf"));
+                    "java_udf", "python_udtf", "internal"));
 
     public String getClassName() {
         return className;
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ToJavaArrayUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ToJavaArrayUtils.java
new file mode 100644
index 0000000000000..ead138590ca50
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ToJavaArrayUtils.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions;
+
+import scala.reflect.ClassTag$;
+
+import org.apache.spark.sql.catalyst.util.ArrayData;
+
+import static org.apache.spark.sql.types.DataTypes.BooleanType;
+import static org.apache.spark.sql.types.DataTypes.ByteType;
+import static org.apache.spark.sql.types.DataTypes.DoubleType;
+import static org.apache.spark.sql.types.DataTypes.FloatType;
+import static org.apache.spark.sql.types.DataTypes.IntegerType;
+import static org.apache.spark.sql.types.DataTypes.LongType;
+import static org.apache.spark.sql.types.DataTypes.ShortType;
+
+public class ToJavaArrayUtils {
+
+  // boolean
+  // boolean non-nullable
+  public static boolean[] toBooleanArray(ArrayData arrayData) {
+    return arrayData.toBooleanArray();
+  }
+
+  // Boolean nullable
+  public static Boolean[] toBoxedBooleanArray(ArrayData arrayData) {
+    return (Boolean[]) arrayData.toArray(BooleanType,
+        ClassTag$.MODULE$.apply(java.lang.Boolean.class));
+  }
+
+  // byte
+  // byte non-nullable
+  public static byte[] toByteArray(ArrayData arrayData) {
+    return arrayData.toByteArray();
+  }
+
+  // Byte nullable
+  public static Byte[] toBoxedByteArray(ArrayData arrayData) {
+    return (Byte[]) arrayData.toArray(ByteType, ClassTag$.MODULE$.apply(java.lang.Byte.class));
+  }
+
+  // short
+  // short non-nullable
+  public static short[] toShortArray(ArrayData arrayData) {
+    return arrayData.toShortArray();
+  }
+
+  // Short nullable
+  public static Short[] toBoxedShortArray(ArrayData arrayData) {
+    return (Short[]) arrayData.toArray(ShortType, ClassTag$.MODULE$.apply(java.lang.Short.class));
+  }
+
+  // int
+  // int non-nullable
+  public static int[] toIntegerArray(ArrayData arrayData) {
+    return arrayData.toIntArray();
+  }
+
+  // Integer nullable
+  public static Integer[] toBoxedIntegerArray(ArrayData arrayData) {
+    return (Integer[]) arrayData.toArray(IntegerType,
+        ClassTag$.MODULE$.apply(java.lang.Integer.class));
+  }
+
+  // long
+  // long non-nullable
+  public static long[] toLongArray(ArrayData arrayData) {
+    return arrayData.toLongArray();
+  }
+
+  // Long nullable
+  public static Long[] toBoxedLongArray(ArrayData arrayData) {
+    return (Long[]) arrayData.toArray(LongType, ClassTag$.MODULE$.apply(java.lang.Long.class));
+  }
+
+  // float
+  // float non-nullable
+  public static float[] toFloatArray(ArrayData arrayData) {
+    return arrayData.toFloatArray();
+  }
+
+  // Float nullable
+  public static Float[] toBoxedFloatArray(ArrayData arrayData) {
+    return (Float[]) arrayData.toArray(FloatType, ClassTag$.MODULE$.apply(java.lang.Float.class));
+  }
+
+  // double
+  // double non-nullable
+  public static double[] toDoubleArray(ArrayData arrayData) {
+    return arrayData.toDoubleArray();
+  }
+
+  // Double nullable
+  public static Double[] toBoxedDoubleArray(ArrayData arrayData) {
+    return (Double[]) arrayData.toArray(DoubleType,
+        ClassTag$.MODULE$.apply(java.lang.Double.class));
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java
new file mode 100644
index 0000000000000..2bad67d426af6
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionUtils.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.json;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonToken;
+
+import org.apache.spark.sql.catalyst.expressions.SharedFactory;
+import org.apache.spark.sql.catalyst.json.CreateJacksonParser;
+import org.apache.spark.sql.catalyst.util.GenericArrayData;
+import org.apache.spark.unsafe.types.UTF8String;
+
+public class JsonExpressionUtils {
+
+  public static Integer lengthOfJsonArray(UTF8String json) {
+    try (JsonParser jsonParser =
+        CreateJacksonParser.utf8String(SharedFactory.jsonFactory(), json)) {
+      if (jsonParser.nextToken() == null) {
+        return null;
+      }
+      // Only JSON array are supported for this function.
+      if (jsonParser.currentToken() != JsonToken.START_ARRAY) {
+        return null;
+      }
+      // Parse the array to compute its length.
+      int length = 0;
+      // Keep traversing until the end of JSON array
+      while (jsonParser.nextToken() != JsonToken.END_ARRAY) {
+        length += 1;
+        // skip all the child of inner object or array
+        jsonParser.skipChildren();
+      }
+      return length;
+    } catch (IOException e) {
+      return null;
+    }
+  }
+
+  public static GenericArrayData jsonObjectKeys(UTF8String json) {
+    try (JsonParser jsonParser =
+        CreateJacksonParser.utf8String(SharedFactory.jsonFactory(), json)) {
+      // return null if an empty string or any other valid JSON string is encountered
+      if (jsonParser.nextToken() == null || jsonParser.currentToken() != JsonToken.START_OBJECT) {
+        return null;
+      }
+      // Parse the JSON string to get all the keys of outermost JSON object
+      List<UTF8String> arrayBufferOfKeys = new ArrayList<>();
+
+      // traverse until the end of input and ensure it returns valid key
+      while (jsonParser.nextValue() != null && jsonParser.currentName() != null) {
+        // add current fieldName to the ArrayBuffer
+        arrayBufferOfKeys.add(UTF8String.fromString(jsonParser.currentName()));
+
+        // skip all the children of inner object or array
+        jsonParser.skipChildren();
+      }
+      return new GenericArrayData(arrayBufferOfKeys.toArray());
+    } catch (IOException e) {
+      return null;
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
index b191438dbc3ee..8b32940d7a657 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/Column.java
@@ -53,7 +53,7 @@ static Column create(
       boolean nullable,
       String comment,
       String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, null, null, metadataInJSON);
+    return new ColumnImpl(name, dataType, nullable, comment, null, null, null, metadataInJSON);
   }
 
   static Column create(
@@ -63,7 +63,8 @@ static Column create(
       String comment,
       ColumnDefaultValue defaultValue,
       String metadataInJSON) {
-    return new ColumnImpl(name, dataType, nullable, comment, defaultValue, null, metadataInJSON);
+    return new ColumnImpl(name, dataType, nullable, comment, defaultValue,
+            null, null, metadataInJSON);
   }
 
   static Column create(
@@ -74,7 +75,18 @@ static Column create(
       String generationExpression,
       String metadataInJSON) {
     return new ColumnImpl(name, dataType, nullable, comment, null,
-            generationExpression, metadataInJSON);
+            generationExpression, null, metadataInJSON);
+  }
+
+  static Column create(
+          String name,
+          DataType dataType,
+          boolean nullable,
+          String comment,
+          IdentityColumnSpec identityColumnSpec,
+          String metadataInJSON) {
+    return new ColumnImpl(name, dataType, nullable, comment, null,
+            null, identityColumnSpec, metadataInJSON);
   }
 
   /**
@@ -113,6 +125,12 @@ static Column create(
   @Nullable
   String generationExpression();
 
+  /**
+   * Returns the identity column specification of this table column. Null means no identity column.
+   */
+  @Nullable
+  IdentityColumnSpec identityColumnSpec();
+
   /**
    * Returns the column metadata in JSON format.
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
index f6686d2e4d3b6..786821514822e 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/DelegatingCatalogExtension.java
@@ -38,7 +38,7 @@
 @Evolving
 public abstract class DelegatingCatalogExtension implements CatalogExtension {
 
-  private CatalogPlugin delegate;
+  protected CatalogPlugin delegate;
 
   @Override
   public final void setDelegateCatalog(CatalogPlugin delegate) {
@@ -51,7 +51,7 @@ public String name() {
   }
 
   @Override
-  public final void initialize(String name, CaseInsensitiveStringMap options) {}
+  public void initialize(String name, CaseInsensitiveStringMap options) {}
 
   @Override
   public Set<TableCatalogCapability> capabilities() {
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ProcedureCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ProcedureCatalog.java
new file mode 100644
index 0000000000000..6eaacf340cb80
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/ProcedureCatalog.java
@@ -0,0 +1,37 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.connector.catalog.procedures.UnboundProcedure;
+
+/**
+ * A catalog API for working with procedures.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface ProcedureCatalog extends CatalogPlugin {
+  /**
+   * Load a procedure by {@link Identifier identifier} from the catalog.
+   *
+   * @param ident a procedure identifier
+   * @return the loaded unbound procedure
+   */
+  UnboundProcedure loadProcedure(Identifier ident);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
index 60b250adb41ef..cbaea8cad8582 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagedTable.java
@@ -21,7 +21,9 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
 import org.apache.spark.sql.connector.write.LogicalWriteInfo;
+import org.apache.spark.sql.connector.write.Write;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -52,4 +54,16 @@ public interface StagedTable extends Table {
    * table's writers.
    */
   void abortStagedChanges();
+
+  /**
+   * Retrieve driver metrics after a commit. This is analogous
+   * to {@link Write#reportDriverMetrics()}. Note that these metrics must be included in the
+   * supported custom metrics reported by `supportedCustomMetrics` of the
+   * {@link StagingTableCatalog} that returned the staged table.
+   *
+   * @return an Array of commit metric values. Throws if the table has not been committed yet.
+   */
+  default CustomTaskMetric[] reportDriverMetrics() throws RuntimeException {
+      return new CustomTaskMetric[0];
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
index a8e1757a492d8..f457a4a3d7863 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/StagingTableCatalog.java
@@ -21,12 +21,15 @@
 
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.connector.expressions.Transform;
+import org.apache.spark.sql.connector.metric.CustomMetric;
 import org.apache.spark.sql.connector.write.LogicalWriteInfo;
 import org.apache.spark.sql.catalyst.analysis.NoSuchNamespaceException;
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException;
 import org.apache.spark.sql.connector.write.BatchWrite;
+import org.apache.spark.sql.connector.write.Write;
 import org.apache.spark.sql.connector.write.WriterCommitMessage;
+import org.apache.spark.sql.errors.QueryCompilationErrors;
 import org.apache.spark.sql.types.StructType;
 
 /**
@@ -61,11 +64,13 @@ public interface StagingTableCatalog extends TableCatalog {
    * {@link #stageCreate(Identifier, Column[], Transform[], Map)} instead.
    */
   @Deprecated(since = "3.4.0")
-  StagedTable stageCreate(
+  default StagedTable stageCreate(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    throw QueryCompilationErrors.mustOverrideOneMethodError("stageCreate");
+  }
 
   /**
    * Stage the creation of a table, preparing it to be committed into the metastore.
@@ -80,7 +85,8 @@ StagedTable stageCreate(
    * @param columns the column of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws TableAlreadyExistsException If a table or view already exists for the identifier
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
@@ -98,13 +104,16 @@ default StagedTable stageCreate(
    * returned table's {@link StagedTable#commitStagedChanges()} is called.
    * <p>
    * This is deprecated, please override
-   * {@link #stageReplace(Identifier, StructType, Transform[], Map)} instead.
+   * {@link #stageReplace(Identifier, Column[], Transform[], Map)} instead.
    */
-  StagedTable stageReplace(
+  @Deprecated(since = "3.4.0")
+  default StagedTable stageReplace(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException;
+      Map<String, String> properties) throws NoSuchNamespaceException, NoSuchTableException {
+    throw QueryCompilationErrors.mustOverrideOneMethodError("stageReplace");
+  }
 
   /**
    * Stage the replacement of a table, preparing it to be committed into the metastore when the
@@ -128,7 +137,8 @@ StagedTable stageReplace(
    * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    * @throws NoSuchTableException If the table does not exist
@@ -149,11 +159,14 @@ default StagedTable stageReplace(
    * This is deprecated, please override
    * {@link #stageCreateOrReplace(Identifier, Column[], Transform[], Map)} instead.
    */
-  StagedTable stageCreateOrReplace(
+  @Deprecated(since = "3.4.0")
+  default StagedTable stageCreateOrReplace(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws NoSuchNamespaceException;
+      Map<String, String> properties) throws NoSuchNamespaceException {
+    throw QueryCompilationErrors.mustOverrideOneMethodError("stageCreateOrReplace");
+  }
 
   /**
    * Stage the creation or replacement of a table, preparing it to be committed into the metastore
@@ -176,7 +189,8 @@ StagedTable stageCreateOrReplace(
    * @param columns the columns of the new table
    * @param partitions transforms to use for partitioning data in the table
    * @param properties a string map of table properties
-   * @return metadata for the new table
+   * @return metadata for the new table. This can be null if the catalog does not support atomic
+   *         creation for this table. Spark will call {@link #loadTable(Identifier)} later.
    * @throws UnsupportedOperationException If a requested partition transform is not supported
    * @throws NoSuchNamespaceException If the identifier namespace does not exist (optional)
    */
@@ -188,4 +202,14 @@ default StagedTable stageCreateOrReplace(
     return stageCreateOrReplace(
       ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties);
   }
+
+  /**
+   * @return An Array of commit metrics that are supported by the catalog. This is analogous to
+   *        {@link Write#supportedCustomMetrics()}. The corresponding
+   *        {@link StagedTable#reportDriverMetrics()} method must be called to
+   *        retrieve the actual metric values after a commit. The methods are not in the same class
+   *        because the supported metrics are required before the staged table object is created
+   *        and only the staged table object can capture the write metrics during the commit.
+   */
+  default CustomMetric[] supportedCustomMetrics() { return new CustomMetric[0]; }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
index 74700789dde0f..ba3470f85338c 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalog.java
@@ -46,13 +46,14 @@ public interface TableCatalog extends CatalogPlugin {
 
   /**
    * A reserved property to specify the location of the table. The files of the table
-   * should be under this location.
+   * should be under this location. The location is a Hadoop Path string.
    */
   String PROP_LOCATION = "location";
 
   /**
    * A reserved property to indicate that the table location is managed, not user-specified.
-   * If this property is "true", SHOW CREATE TABLE will not generate the LOCATION clause.
+   * If this property is "true", it means it's a managed table even if it has a location. As an
+   * example, SHOW CREATE TABLE will not generate the LOCATION clause.
    */
   String PROP_IS_MANAGED_LOCATION = "is_managed_location";
 
@@ -109,6 +110,26 @@ public interface TableCatalog extends CatalogPlugin {
    */
   Table loadTable(Identifier ident) throws NoSuchTableException;
 
+  /**
+   * Load table metadata by {@link Identifier identifier} from the catalog. Spark will write data
+   * into this table later.
+   * <p>
+   * If the catalog supports views and contains a view for the identifier and not a table, this
+   * must throw {@link NoSuchTableException}.
+   *
+   * @param ident a table identifier
+   * @param writePrivileges
+   * @return the table's metadata
+   * @throws NoSuchTableException If the table doesn't exist or is a view
+   *
+   * @since 3.5.3
+   */
+  default Table loadTable(
+      Identifier ident,
+      Set<TableWritePrivilege> writePrivileges) throws NoSuchTableException {
+    return loadTable(ident);
+  }
+
   /**
    * Load table metadata of a specific version by {@link Identifier identifier} from the catalog.
    * <p>
@@ -174,11 +195,13 @@ default boolean tableExists(Identifier ident) {
    * {@link #createTable(Identifier, Column[], Transform[], Map)} instead.
    */
   @Deprecated(since = "3.4.0")
-  Table createTable(
+  default Table createTable(
       Identifier ident,
       StructType schema,
       Transform[] partitions,
-      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException;
+      Map<String, String> properties) throws TableAlreadyExistsException, NoSuchNamespaceException {
+    throw QueryCompilationErrors.mustOverrideOneMethodError("createTable");
+  }
 
   /**
    * Create a table in the catalog.
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java
index 5ccb15ff1f0a4..dceac1b484cf2 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableCatalogCapability.java
@@ -59,5 +59,23 @@ public enum TableCatalogCapability {
    * {@link TableCatalog#createTable}.
    * See {@link Column#defaultValue()}.
    */
-  SUPPORT_COLUMN_DEFAULT_VALUE
+  SUPPORT_COLUMN_DEFAULT_VALUE,
+
+  /**
+   * Signals that the TableCatalog supports defining identity columns upon table creation in SQL.
+   * <p>
+   * Without this capability, any create/replace table statements with an identity column defined
+   * in the table schema will throw an exception during analysis.
+   * <p>
+   * An identity column is defined with syntax:
+   * {@code colName colType GENERATED ALWAYS AS IDENTITY(identityColumnSpec)}
+   * or
+   * {@code colName colType GENERATED BY DEFAULT AS IDENTITY(identityColumnSpec)}
+   * identityColumnSpec is defined with syntax: {@code [START WITH start | INCREMENT BY step]*}
+   * <p>
+   * IdentitySpec is included in the column definition for APIs like
+   * {@link TableCatalog#createTable}.
+   * See {@link Column#identityColumnSpec()}.
+   */
+  SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
index 117f1748e209b..d7a51c519e09b 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableChange.java
@@ -781,5 +781,10 @@ public boolean equals(Object o) {
       ClusterBy that = (ClusterBy) o;
       return Arrays.equals(clusteringColumns, that.clusteringColumns());
     }
+
+    @Override
+    public int hashCode() {
+      return Arrays.hashCode(clusteringColumns);
+    }
   }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java
new file mode 100644
index 0000000000000..ca2d4ba9e7b4e
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/TableWritePrivilege.java
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog;
+
+/**
+ * The table write privileges that will be provided when loading a table.
+ *
+ * @since 3.5.3
+ */
+public enum TableWritePrivilege {
+  /**
+   * The privilege for adding rows to the table.
+   */
+  INSERT,
+
+  /**
+   * The privilege for changing existing rows in th table.
+   */
+  UPDATE,
+
+  /**
+   * The privilege for deleting rows from the table.
+   */
+  DELETE
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java
index ca4ea5114c26b..c0078872bd843 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/functions/ScalarFunction.java
@@ -20,8 +20,11 @@
 import org.apache.spark.SparkUnsupportedOperationException;
 import org.apache.spark.annotation.Evolving;
 import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.catalyst.util.QuotingUtils;
 import org.apache.spark.sql.types.DataType;
 
+import java.util.Map;
+
 /**
  * Interface for a function that produces a result value for each input row.
  * <p>
@@ -149,7 +152,10 @@ public interface ScalarFunction<R> extends BoundFunction {
    * @return a result value
    */
   default R produceResult(InternalRow input) {
-    throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3146");
+    throw new SparkUnsupportedOperationException(
+      "SCALAR_FUNCTION_NOT_COMPATIBLE",
+      Map.of("scalarFunc", QuotingUtils.quoteIdentifier(name()))
+    );
   }
 
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/BoundProcedure.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/BoundProcedure.java
new file mode 100644
index 0000000000000..99f0836576f80
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/BoundProcedure.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.procedures;
+
+import java.util.Iterator;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.catalyst.InternalRow;
+import org.apache.spark.sql.connector.read.LocalScan;
+import org.apache.spark.sql.connector.read.Scan;
+
+/**
+ * A procedure that is bound to input types.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface BoundProcedure extends Procedure {
+  /**
+   * Returns parameters of this procedure.
+   */
+  ProcedureParameter[] parameters();
+
+  /**
+   * Indicates whether this procedure is deterministic.
+   */
+  boolean isDeterministic();
+
+  /**
+   * Executes this procedure with the given input.
+   * <p>
+   * Spark validates and rearranges arguments provided in the CALL statement to ensure that
+   * the order and data types of the fields in {@code input} matches the expected order and
+   * types defined by {@link #parameters() parameters}.
+   * <p>
+   * Each procedure can return any number of result sets. Each result set is represented by
+   * a {@link Scan scan} that reports the type of records it produces and can be used to
+   * collect the output, if needed. If a result set is local and does not a distributed job,
+   * implementations should use {@link LocalScan}.
+   */
+  Iterator<Scan> call(InternalRow input);
+}
diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/Procedure.java
similarity index 58%
rename from connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala
rename to sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/Procedure.java
index 75629b6000f91..4f88d215d3197 100644
--- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Observation.scala
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/Procedure.java
@@ -15,32 +15,26 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.connector.catalog.procedures;
 
-import java.util.UUID
-
-class Observation(name: String) extends ObservationBase(name) {
-
-  /**
-   * Create an Observation instance without providing a name. This generates a random name.
-   */
-  def this() = this(UUID.randomUUID().toString)
-}
+import org.apache.spark.annotation.Evolving;
 
 /**
- * (Scala-specific) Create instances of Observation via Scala `apply`.
+ * A base interface for all procedures.
+ *
  * @since 4.0.0
  */
-object Observation {
-
+@Evolving
+public interface Procedure {
   /**
-   * Observation constructor for creating an anonymous observation.
+   * Returns the name of this procedure.
    */
-  def apply(): Observation = new Observation()
+  String name();
 
   /**
-   * Observation constructor for creating a named observation.
+   * Returns the description of this procedure.
    */
-  def apply(name: String): Observation = new Observation(name)
-
+  default String description() {
+    return getClass().toString();
+  }
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/ProcedureParameter.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/ProcedureParameter.java
new file mode 100644
index 0000000000000..18c76833c5879
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/ProcedureParameter.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.procedures;
+
+import javax.annotation.Nullable;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.internal.connector.ProcedureParameterImpl;
+import org.apache.spark.sql.types.DataType;
+
+import static org.apache.spark.sql.connector.catalog.procedures.ProcedureParameter.Mode.IN;
+
+/**
+ * A {@link Procedure procedure} parameter.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface ProcedureParameter {
+  /**
+   * A field metadata key that indicates whether an argument is passed by name.
+   */
+  String BY_NAME_METADATA_KEY = "BY_NAME";
+
+  /**
+   * Creates a builder for an IN procedure parameter.
+   *
+   * @param name the name of the parameter
+   * @param dataType the type of the parameter
+   * @return the constructed stored procedure parameter
+   */
+  static Builder in(String name, DataType dataType) {
+    return new Builder(IN, name, dataType);
+  }
+
+  /**
+   * Returns the mode of this parameter.
+   */
+  Mode mode();
+
+  /**
+   * Returns the name of this parameter.
+   */
+  String name();
+
+  /**
+   * Returns the data type of this parameter.
+   */
+  DataType dataType();
+
+  /**
+   * Returns the SQL string (Spark SQL dialect) of the default value expression of this parameter or
+   * null if not provided.
+   */
+  @Nullable
+  String defaultValueExpression();
+
+  /**
+   * Returns the comment of this parameter or null if not provided.
+   */
+  @Nullable
+  String comment();
+
+  /**
+   * An enum representing procedure parameter modes.
+   */
+  enum Mode {
+    IN,
+    INOUT,
+    OUT
+  }
+
+  class Builder {
+    private final Mode mode;
+    private final String name;
+    private final DataType dataType;
+    private String defaultValueExpression;
+    private String comment;
+
+    private Builder(Mode mode, String name, DataType dataType) {
+      this.mode = mode;
+      this.name = name;
+      this.dataType = dataType;
+    }
+
+    /**
+     * Sets the default value expression of the parameter.
+     */
+    public Builder defaultValue(String defaultValueExpression) {
+      this.defaultValueExpression = defaultValueExpression;
+      return this;
+    }
+
+    /**
+     * Sets the comment of the parameter.
+     */
+    public Builder comment(String comment) {
+      this.comment = comment;
+      return this;
+    }
+
+    /**
+     * Builds the stored procedure parameter.
+     */
+    public ProcedureParameter build() {
+      return new ProcedureParameterImpl(mode, name, dataType, defaultValueExpression, comment);
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/UnboundProcedure.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/UnboundProcedure.java
new file mode 100644
index 0000000000000..1a91fd21bf07e
--- /dev/null
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/catalog/procedures/UnboundProcedure.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector.catalog.procedures;
+
+import org.apache.spark.annotation.Evolving;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * A procedure that is not bound to input types.
+ *
+ * @since 4.0.0
+ */
+@Evolving
+public interface UnboundProcedure extends Procedure {
+  /**
+   * Binds this procedure to input types.
+   * <p>
+   * If the catalog supports procedure overloading, the implementation is expected to pick the best
+   * matching version of the procedure. If overloading is not supported, the implementation can
+   * validate if the input types are compatible while binding or delegate that to Spark. Regardless,
+   * Spark will always perform the final validation of the arguments and rearrange them as needed
+   * based on {@link BoundProcedure#parameters() reported parameters}.
+   * <p>
+   * The provided {@code inputType} is based on the procedure arguments. If an argument is passed
+   * by name, its metadata will indicate this with {@link ProcedureParameter#BY_NAME_METADATA_KEY}
+   * set to {@code true}. In such cases, the field name will match the name of the target procedure
+   * parameter. If the argument is not named, {@link ProcedureParameter#BY_NAME_METADATA_KEY} will
+   * not be set and the name will be assigned randomly.
+   *
+   * @param inputType the input types to bind to
+   * @return the bound procedure that is most suitable for the given input types
+   */
+  BoundProcedure bind(StructType inputType);
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
index 14e2112b7201a..bd2dec9e27be0 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/util/V2ExpressionSQLBuilder.java
@@ -221,7 +221,8 @@ protected String inputToSQL(Expression input) {
 
   protected String visitBinaryComparison(String name, String l, String r) {
     if (name.equals("<=>")) {
-      return "(" + l + " = " + r + ") OR (" + l + " IS NULL AND " + r + " IS NULL)";
+      return "((" + l + " IS NOT NULL AND " + r + " IS NOT NULL AND " + l + " = " + r + ") " +
+              "OR (" + l + " IS NULL AND " + r + " IS NULL))";
     }
     return l + " " + name + " " + r;
   }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
index dc5712e93f470..f62d194fa7f9f 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/connector/write/Write.java
@@ -24,6 +24,7 @@
 import org.apache.spark.sql.connector.catalog.Table;
 import org.apache.spark.sql.connector.catalog.TableCapability;
 import org.apache.spark.sql.connector.metric.CustomMetric;
+import org.apache.spark.sql.connector.metric.CustomTaskMetric;
 import org.apache.spark.sql.connector.write.streaming.StreamingWrite;
 
 /**
@@ -76,4 +77,14 @@ default StreamingWrite toStreaming() {
   default CustomMetric[] supportedCustomMetrics() {
     return new CustomMetric[]{};
   }
+
+  /**
+   * Returns an array of custom metrics which are collected with values at the driver side only.
+   * Note that these metrics must be included in the supported custom metrics reported by
+   * `supportedCustomMetrics`.
+   */
+  default CustomTaskMetric[] reportDriverMetrics() {
+    return new CustomTaskMetric[]{};
+  }
+
 }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
index cd3c30fa69335..bfb1833b731a7 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnVector.java
@@ -68,6 +68,18 @@ public abstract class ColumnVector implements AutoCloseable {
   @Override
   public abstract void close();
 
+  /**
+   * Cleans up memory for this column vector if it's not writable. The column vector is not usable
+   * after this.
+   *
+   * If this is a writable column vector, it is a no-op.
+   */
+  public void closeIfNotWritable() {
+    // By default, we just call close() for all column vectors. If a column vector is writable, it
+    // should override this method and do nothing.
+    close();
+  }
+
   /**
    * Returns true if this column vector contains any null values.
    */
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
index 721e6a60befe2..12a2879794b10 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarArray.java
@@ -58,7 +58,7 @@ public int numElements() {
   private UnsafeArrayData setNullBits(UnsafeArrayData arrayData) {
     if (data.hasNull()) {
       for (int i = 0; i < length; i++) {
-        if (data.isNullAt(i)) {
+        if (data.isNullAt(offset + i)) {
           arrayData.setNullAt(i);
         }
       }
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
index 9e859e77644ac..52e4115af336a 100644
--- a/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
+++ b/sql/catalyst/src/main/java/org/apache/spark/sql/vectorized/ColumnarBatch.java
@@ -45,6 +45,16 @@ public void close() {
     }
   }
 
+  /**
+   * Called to close all the columns if they are not writable. This is used to clean up memory
+   * allocated during columnar processing.
+   */
+  public void closeIfNotWritable() {
+    for (ColumnVector c: columns) {
+      c.closeIfNotWritable();
+    }
+  }
+
   /**
    * Returns an iterator over the rows in this batch.
    */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
deleted file mode 100644
index 9b95f74db3a49..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Encoders.scala
+++ /dev/null
@@ -1,359 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.lang.reflect.Modifier
-
-import scala.reflect.{classTag, ClassTag}
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.sql.catalyst.analysis.GetColumnByOrdinal
-import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
-import org.apache.spark.sql.catalyst.expressions.{BoundReference, Cast}
-import org.apache.spark.sql.catalyst.expressions.objects.{DecodeUsingSerializer, EncodeUsingSerializer}
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.types._
-
-/**
- * Methods for creating an [[Encoder]].
- *
- * @since 1.6.0
- */
-object Encoders {
-
-  /**
-   * An encoder for nullable boolean type.
-   * The Scala primitive encoder is available as [[scalaBoolean]].
-   * @since 1.6.0
-   */
-  def BOOLEAN: Encoder[java.lang.Boolean] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable byte type.
-   * The Scala primitive encoder is available as [[scalaByte]].
-   * @since 1.6.0
-   */
-  def BYTE: Encoder[java.lang.Byte] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable short type.
-   * The Scala primitive encoder is available as [[scalaShort]].
-   * @since 1.6.0
-   */
-  def SHORT: Encoder[java.lang.Short] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable int type.
-   * The Scala primitive encoder is available as [[scalaInt]].
-   * @since 1.6.0
-   */
-  def INT: Encoder[java.lang.Integer] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable long type.
-   * The Scala primitive encoder is available as [[scalaLong]].
-   * @since 1.6.0
-   */
-  def LONG: Encoder[java.lang.Long] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable float type.
-   * The Scala primitive encoder is available as [[scalaFloat]].
-   * @since 1.6.0
-   */
-  def FLOAT: Encoder[java.lang.Float] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable double type.
-   * The Scala primitive encoder is available as [[scalaDouble]].
-   * @since 1.6.0
-   */
-  def DOUBLE: Encoder[java.lang.Double] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable string type.
-   *
-   * @since 1.6.0
-   */
-  def STRING: Encoder[java.lang.String] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable decimal type.
-   *
-   * @since 1.6.0
-   */
-  def DECIMAL: Encoder[java.math.BigDecimal] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable date type.
-   *
-   * @since 1.6.0
-   */
-  def DATE: Encoder[java.sql.Date] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder that serializes instances of the `java.time.LocalDate` class
-   * to the internal representation of nullable Catalyst's DateType.
-   *
-   * @since 3.0.0
-   */
-  def LOCALDATE: Encoder[java.time.LocalDate] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder that serializes instances of the `java.time.LocalDateTime` class
-   * to the internal representation of nullable Catalyst's TimestampNTZType.
-   *
-   * @since 3.4.0
-   */
-  def LOCALDATETIME: Encoder[java.time.LocalDateTime] = ExpressionEncoder()
-
-  /**
-   * An encoder for nullable timestamp type.
-   *
-   * @since 1.6.0
-   */
-  def TIMESTAMP: Encoder[java.sql.Timestamp] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder that serializes instances of the `java.time.Instant` class
-   * to the internal representation of nullable Catalyst's TimestampType.
-   *
-   * @since 3.0.0
-   */
-  def INSTANT: Encoder[java.time.Instant] = ExpressionEncoder()
-
-  /**
-   * An encoder for arrays of bytes.
-   *
-   * @since 1.6.1
-   */
-  def BINARY: Encoder[Array[Byte]] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder that serializes instances of the `java.time.Duration` class
-   * to the internal representation of nullable Catalyst's DayTimeIntervalType.
-   *
-   * @since 3.2.0
-   */
-  def DURATION: Encoder[java.time.Duration] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder that serializes instances of the `java.time.Period` class
-   * to the internal representation of nullable Catalyst's YearMonthIntervalType.
-   *
-   * @since 3.2.0
-   */
-  def PERIOD: Encoder[java.time.Period] = ExpressionEncoder()
-
-  /**
-   * Creates an encoder for Java Bean of type T.
-   *
-   * T must be publicly accessible.
-   *
-   * supported types for java bean field:
-   *  - primitive types: boolean, int, double, etc.
-   *  - boxed types: Boolean, Integer, Double, etc.
-   *  - String
-   *  - java.math.BigDecimal, java.math.BigInteger
-   *  - time related: java.sql.Date, java.sql.Timestamp, java.time.LocalDate, java.time.Instant
-   *  - collection types: array, java.util.List, and map
-   *  - nested java bean.
-   *
-   * @since 1.6.0
-   */
-  def bean[T](beanClass: Class[T]): Encoder[T] = ExpressionEncoder.javaBean(beanClass)
-
-  /**
-   * Creates a [[Row]] encoder for schema `schema`.
-   *
-   * @since 3.5.0
-   */
-  def row(schema: StructType): Encoder[Row] = ExpressionEncoder(schema)
-
-  /**
-   * (Scala-specific) Creates an encoder that serializes objects of type T using Kryo.
-   * This encoder maps T into a single byte array (binary) field.
-   *
-   * T must be publicly accessible.
-   *
-   * @since 1.6.0
-   */
-  def kryo[T: ClassTag]: Encoder[T] = genericSerializer(useKryo = true)
-
-  /**
-   * Creates an encoder that serializes objects of type T using Kryo.
-   * This encoder maps T into a single byte array (binary) field.
-   *
-   * T must be publicly accessible.
-   *
-   * @since 1.6.0
-   */
-  def kryo[T](clazz: Class[T]): Encoder[T] = kryo(ClassTag[T](clazz))
-
-  /**
-   * (Scala-specific) Creates an encoder that serializes objects of type T using generic Java
-   * serialization. This encoder maps T into a single byte array (binary) field.
-   *
-   * T must be publicly accessible.
-   *
-   * @note This is extremely inefficient and should only be used as the last resort.
-   *
-   * @since 1.6.0
-   */
-  def javaSerialization[T: ClassTag]: Encoder[T] = genericSerializer(useKryo = false)
-
-  /**
-   * Creates an encoder that serializes objects of type T using generic Java serialization.
-   * This encoder maps T into a single byte array (binary) field.
-   *
-   * T must be publicly accessible.
-   *
-   * @note This is extremely inefficient and should only be used as the last resort.
-   *
-   * @since 1.6.0
-   */
-  def javaSerialization[T](clazz: Class[T]): Encoder[T] = javaSerialization(ClassTag[T](clazz))
-
-  /** Throws an exception if T is not a public class. */
-  private def validatePublicClass[T: ClassTag](): Unit = {
-    if (!Modifier.isPublic(classTag[T].runtimeClass.getModifiers)) {
-      throw QueryExecutionErrors.notPublicClassError(classTag[T].runtimeClass.getName)
-    }
-  }
-
-  /** A way to construct encoders using generic serializers. */
-  private def genericSerializer[T: ClassTag](useKryo: Boolean): Encoder[T] = {
-    if (classTag[T].runtimeClass.isPrimitive) {
-      throw QueryExecutionErrors.primitiveTypesNotSupportedError()
-    }
-
-    validatePublicClass[T]()
-
-    ExpressionEncoder[T](
-      objSerializer =
-        EncodeUsingSerializer(
-          BoundReference(0, ObjectType(classOf[AnyRef]), nullable = true), kryo = useKryo),
-      objDeserializer =
-        DecodeUsingSerializer[T](
-          Cast(GetColumnByOrdinal(0, BinaryType), BinaryType),
-          classTag[T],
-          kryo = useKryo),
-      clsTag = classTag[T]
-    )
-  }
-
-  /**
-   * An encoder for 2-ary tuples.
-   *
-   * @since 1.6.0
-   */
-  def tuple[T1, T2](
-    e1: Encoder[T1],
-    e2: Encoder[T2]): Encoder[(T1, T2)] = {
-    ExpressionEncoder.tuple(encoderFor(e1), encoderFor(e2))
-  }
-
-  /**
-   * An encoder for 3-ary tuples.
-   *
-   * @since 1.6.0
-   */
-  def tuple[T1, T2, T3](
-    e1: Encoder[T1],
-    e2: Encoder[T2],
-    e3: Encoder[T3]): Encoder[(T1, T2, T3)] = {
-    ExpressionEncoder.tuple(encoderFor(e1), encoderFor(e2), encoderFor(e3))
-  }
-
-  /**
-   * An encoder for 4-ary tuples.
-   *
-   * @since 1.6.0
-   */
-  def tuple[T1, T2, T3, T4](
-    e1: Encoder[T1],
-    e2: Encoder[T2],
-    e3: Encoder[T3],
-    e4: Encoder[T4]): Encoder[(T1, T2, T3, T4)] = {
-    ExpressionEncoder.tuple(encoderFor(e1), encoderFor(e2), encoderFor(e3), encoderFor(e4))
-  }
-
-  /**
-   * An encoder for 5-ary tuples.
-   *
-   * @since 1.6.0
-   */
-  def tuple[T1, T2, T3, T4, T5](
-    e1: Encoder[T1],
-    e2: Encoder[T2],
-    e3: Encoder[T3],
-    e4: Encoder[T4],
-    e5: Encoder[T5]): Encoder[(T1, T2, T3, T4, T5)] = {
-    ExpressionEncoder.tuple(
-      encoderFor(e1), encoderFor(e2), encoderFor(e3), encoderFor(e4), encoderFor(e5))
-  }
-
-  /**
-   * An encoder for Scala's product type (tuples, case classes, etc).
-   * @since 2.0.0
-   */
-  def product[T <: Product : TypeTag]: Encoder[T] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive int type.
-   * @since 2.0.0
-   */
-  def scalaInt: Encoder[Int] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive long type.
-   * @since 2.0.0
-   */
-  def scalaLong: Encoder[Long] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive double type.
-   * @since 2.0.0
-   */
-  def scalaDouble: Encoder[Double] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive float type.
-   * @since 2.0.0
-   */
-  def scalaFloat: Encoder[Float] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive byte type.
-   * @since 2.0.0
-   */
-  def scalaByte: Encoder[Byte] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive short type.
-   * @since 2.0.0
-   */
-  def scalaShort: Encoder[Short] = ExpressionEncoder()
-
-  /**
-   * An encoder for Scala's primitive boolean type.
-   * @since 2.0.0
-   */
-  def scalaBoolean: Encoder[Boolean] = ExpressionEncoder()
-
-}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
index 0b88d5a4130e3..4752434015375 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/DeserializerBuildHelper.scala
@@ -19,11 +19,11 @@ package org.apache.spark.sql.catalyst
 
 import org.apache.spark.sql.catalyst.{expressions => exprs}
 import org.apache.spark.sql.catalyst.analysis.{GetColumnByOrdinal, UnresolvedExtractValue}
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, UDTEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, Codec, JavaSerializationCodec, KryoSerializationCodec}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedLeafEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveBooleanEncoder, PrimitiveByteEncoder, PrimitiveDoubleEncoder, PrimitiveFloatEncoder, PrimitiveIntEncoder, PrimitiveLongEncoder, PrimitiveShortEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder}
-import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, IsNull, MapKeys, MapValues, UpCast}
-import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, InitializeJavaBean, Invoke, NewInstance, StaticInvoke, UnresolvedCatalystToExternalMap, UnresolvedMapObjects, WrapOption}
+import org.apache.spark.sql.catalyst.expressions.{Expression, GetStructField, IsNull, Literal, MapKeys, MapValues, UpCast}
+import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, CreateExternalRow, DecodeUsingSerializer, InitializeJavaBean, Invoke, NewInstance, StaticInvoke, UnresolvedCatalystToExternalMap, UnresolvedMapObjects, WrapOption}
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, IntervalUtils}
 import org.apache.spark.sql.types._
 
@@ -410,6 +410,19 @@ object DeserializerBuildHelper {
       val newInstance = NewInstance(cls, Nil, ObjectType(cls), propagateNull = false)
       val result = InitializeJavaBean(newInstance, setters.toMap)
       exprs.If(IsNull(path), exprs.Literal.create(null, ObjectType(cls)), result)
+
+    case TransformingEncoder(tag, _, codec) if codec == JavaSerializationCodec =>
+      DecodeUsingSerializer(path, tag, kryo = false)
+
+    case TransformingEncoder(tag, _, codec) if codec == KryoSerializationCodec =>
+      DecodeUsingSerializer(path, tag, kryo = true)
+
+    case TransformingEncoder(tag, encoder, provider) =>
+      Invoke(
+        Literal.create(provider(), ObjectType(classOf[Codec[_, _]])),
+        "decode",
+        ObjectType(tag.runtimeClass),
+        createDeserializer(encoder, path, walkedTypePath) :: Nil)
   }
 
   private def deserializeArray(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
index 408bd65333cac..20cf80e88e42a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/ProjectingInternalRow.scala
@@ -26,7 +26,8 @@ import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
  * An [[InternalRow]] that projects particular columns from another [[InternalRow]] without copying
  * the underlying data.
  */
-case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) extends InternalRow {
+case class ProjectingInternalRow(schema: StructType,
+    colOrdinals: IndexedSeq[Int]) extends InternalRow {
   assert(schema.size == colOrdinals.size)
 
   private var row: InternalRow = _
@@ -116,3 +117,9 @@ case class ProjectingInternalRow(schema: StructType, colOrdinals: Seq[Int]) exte
     row.get(colOrdinals(ordinal), dataType)
   }
 }
+
+object ProjectingInternalRow {
+  def apply(schema: StructType, colOrdinals: Seq[Int]): ProjectingInternalRow = {
+    new ProjectingInternalRow(schema, colOrdinals.toIndexedSeq)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
index cd087514f4be3..daebe15c298f6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SerializerBuildHelper.scala
@@ -21,10 +21,10 @@ import scala.language.existentials
 
 import org.apache.spark.sql.catalyst.{expressions => exprs}
 import org.apache.spark.sql.catalyst.DeserializerBuildHelper.expressionWithNullSafety
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders}
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLeafEncoder, BoxedLongEncoder, BoxedShortEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveLeafEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, UDTEncoder, YearMonthIntervalEncoder}
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, AgnosticEncoders, Codec, JavaSerializationCodec, KryoSerializationCodec}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{ArrayEncoder, BoxedBooleanEncoder, BoxedByteEncoder, BoxedDoubleEncoder, BoxedFloatEncoder, BoxedIntEncoder, BoxedLeafEncoder, BoxedLongEncoder, BoxedShortEncoder, DateEncoder, DayTimeIntervalEncoder, InstantEncoder, IterableEncoder, JavaBeanEncoder, JavaBigIntEncoder, JavaDecimalEncoder, JavaEnumEncoder, LocalDateEncoder, LocalDateTimeEncoder, MapEncoder, OptionEncoder, PrimitiveLeafEncoder, ProductEncoder, ScalaBigIntEncoder, ScalaDecimalEncoder, ScalaEnumEncoder, StringEncoder, TimestampEncoder, TransformingEncoder, UDTEncoder, YearMonthIntervalEncoder}
 import org.apache.spark.sql.catalyst.encoders.EncoderUtils.{externalDataTypeFor, isNativeEncoder, lenientExternalDataTypeFor}
-import org.apache.spark.sql.catalyst.expressions.{BoundReference, CheckOverflow, CreateNamedStruct, Expression, IsNull, KnownNotNull, UnsafeArrayData}
+import org.apache.spark.sql.catalyst.expressions.{BoundReference, CheckOverflow, CreateNamedStruct, Expression, IsNull, KnownNotNull, Literal, UnsafeArrayData}
 import org.apache.spark.sql.catalyst.expressions.objects._
 import org.apache.spark.sql.catalyst.util.{ArrayData, DateTimeUtils, GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.internal.SQLConf
@@ -397,6 +397,20 @@ object SerializerBuildHelper {
         f.name -> createSerializer(f.enc, fieldValue)
       }
       createSerializerForObject(input, serializedFields)
+
+    case TransformingEncoder(_, _, codec) if codec == JavaSerializationCodec =>
+      EncodeUsingSerializer(input, kryo = false)
+
+    case TransformingEncoder(_, _, codec) if codec == KryoSerializationCodec =>
+      EncodeUsingSerializer(input, kryo = true)
+
+    case TransformingEncoder(_, encoder, codecProvider) =>
+      val encoded = Invoke(
+        Literal(codecProvider(), ObjectType(classOf[Codec[_, _]])),
+        "encode",
+        externalDataTypeFor(encoder),
+        input :: Nil)
+      createSerializer(encoder, encoded)
   }
 
   private def serializerForArray(
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AliasResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AliasResolution.scala
new file mode 100644
index 0000000000000..fa3300d57de81
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AliasResolution.scala
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.analysis.MultiAlias
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  Attribute,
+  Cast,
+  Expression,
+  ExtractValue,
+  Generator,
+  GeneratorOuter,
+  Literal,
+  NamedExpression
+}
+import org.apache.spark.sql.catalyst.trees.TreePattern.UNRESOLVED_ALIAS
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, AUTO_GENERATED_ALIAS}
+import org.apache.spark.sql.types.MetadataBuilder
+
+object AliasResolution {
+  def hasUnresolvedAlias(exprs: Seq[NamedExpression]): Boolean = {
+    exprs.exists(_.exists(_.isInstanceOf[UnresolvedAlias]))
+  }
+
+  def assignAliases(exprs: Seq[NamedExpression]): Seq[NamedExpression] = {
+    exprs
+      .map(_.transformUpWithPruning(_.containsPattern(UNRESOLVED_ALIAS)) {
+        case u: UnresolvedAlias => resolve(u)
+      })
+      .asInstanceOf[Seq[NamedExpression]]
+  }
+
+  def resolve(u: UnresolvedAlias): Expression = {
+    val UnresolvedAlias(child, optGenAliasFunc) = u
+    child match {
+      case ne: NamedExpression => ne
+      case go @ GeneratorOuter(g: Generator) if g.resolved => MultiAlias(go, Nil)
+      case e if !e.resolved => u
+      case g: Generator => MultiAlias(g, Nil)
+      case c @ Cast(ne: NamedExpression, _, _, _) => Alias(c, ne.name)()
+      case e: ExtractValue if extractOnly(e) => Alias(e, toPrettySQL(e))()
+      case e if optGenAliasFunc.isDefined =>
+        Alias(child, optGenAliasFunc.get.apply(e))()
+      case l: Literal => Alias(l, toPrettySQL(l))()
+      case e =>
+        val metaForAutoGeneratedAlias = new MetadataBuilder()
+          .putString(AUTO_GENERATED_ALIAS, "true")
+          .build()
+        Alias(e, toPrettySQL(e))(explicitMetadata = Some(metaForAutoGeneratedAlias))
+    }
+  }
+
+  private def extractOnly(e: Expression): Boolean = e match {
+    case _: ExtractValue => e.children.forall(extractOnly)
+    case _: Literal => true
+    case _: Attribute => true
+    case _ => false
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index 1b194da5ab0a2..8e1b9da927c9f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql.catalyst.analysis
 
 import java.util
 import java.util.Locale
-import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
 import scala.util.{Failure, Random, Success, Try}
 
-import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.catalog._
@@ -38,25 +38,24 @@ import org.apache.spark.sql.catalyst.optimizer.OptimizeUpdateFields
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules._
-import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, AUTO_GENERATED_ALIAS, CharVarcharUtils}
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.connector.catalog.{View => _, _}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.{After, ColumnPosition}
-import org.apache.spark.sql.connector.catalog.functions.{AggregateFunction => V2AggregateFunction, ScalarFunction, UnboundFunction}
-import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.connector.catalog.procedures.{BoundProcedure, ProcedureParameter, UnboundProcedure}
+import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.{PartitionOverwriteMode, StoreAssignmentPolicy}
 import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -80,24 +79,32 @@ object SimpleAnalyzer extends Analyzer(
   override def resolver: Resolver = caseSensitiveResolution
 }
 
-object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog {
+object FakeV2SessionCatalog extends TableCatalog with FunctionCatalog with SupportsNamespaces {
   private def fail() = throw SparkUnsupportedOperationException()
   override def listTables(namespace: Array[String]): Array[Identifier] = fail()
   override def loadTable(ident: Identifier): Table = {
     throw new NoSuchTableException(ident.asMultipartIdentifier)
   }
-  override def createTable(
-      ident: Identifier,
-      schema: StructType,
-      partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = fail()
   override def alterTable(ident: Identifier, changes: TableChange*): Table = fail()
   override def dropTable(ident: Identifier): Boolean = fail()
   override def renameTable(oldIdent: Identifier, newIdent: Identifier): Unit = fail()
-  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = fail()
+  override def initialize(name: String, options: CaseInsensitiveStringMap): Unit = {}
   override def name(): String = CatalogManager.SESSION_CATALOG_NAME
   override def listFunctions(namespace: Array[String]): Array[Identifier] = fail()
   override def loadFunction(ident: Identifier): UnboundFunction = fail()
+  override def listNamespaces(): Array[Array[String]] = fail()
+  override def listNamespaces(namespace: Array[String]): Array[Array[String]] = fail()
+  override def loadNamespaceMetadata(namespace: Array[String]): util.Map[String, String] = {
+    if (namespace.length == 1) {
+      mutable.HashMap[String, String]().asJava
+    } else {
+      throw new NoSuchNamespaceException(namespace)
+    }
+  }
+  override def createNamespace(
+    namespace: Array[String], metadata: util.Map[String, String]): Unit = fail()
+  override def alterNamespace(namespace: Array[String], changes: NamespaceChange*): Unit = fail()
+  override def dropNamespace(namespace: Array[String], cascade: Boolean): Boolean = fail()
 }
 
 /**
@@ -193,6 +200,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
   with CheckAnalysis with SQLConfHelper with ColumnResolutionHelper {
 
   private val v1SessionCatalog: SessionCatalog = catalogManager.v1SessionCatalog
+  private val relationResolution = new RelationResolution(catalogManager)
+  private val functionResolution = new FunctionResolution(catalogManager, relationResolution)
 
   override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
@@ -241,6 +250,11 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
    */
   val extendedResolutionRules: Seq[Rule[LogicalPlan]] = Nil
 
+  /**
+   * Override to provide additional rules for the "Hints" resolution batch.
+   */
+  val hintResolutionRules: Seq[Rule[LogicalPlan]] = Nil
+
   /**
    * Override to provide rules to do post-hoc resolution. Note that these rules will be executed
    * in an individual batch. This batch is to run right after the normal resolution batch and
@@ -254,7 +268,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     TypeCoercion.typeCoercionRules
   }
 
-  private def earlyBatches: Seq[Batch] = Seq(
+  override def batches: Seq[Batch] = Seq(
     Batch("Substitution", fixedPoint,
       new SubstituteExecuteImmediate(catalogManager),
       // This rule optimizes `UpdateFields` expression chains so looks more like optimization rule.
@@ -265,19 +279,18 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       CTESubstitution,
       WindowsSubstitution,
       EliminateUnions,
-      SubstituteUnresolvedOrdinals),
+      SubstituteUnresolvedOrdinals,
+      EliminateLazyExpression),
     Batch("Disable Hints", Once,
       new ResolveHints.DisableHints),
     Batch("Hints", fixedPoint,
-      ResolveHints.ResolveJoinStrategyHints,
-      ResolveHints.ResolveCoalesceHints),
+      Seq(ResolveHints.ResolveJoinStrategyHints,
+        ResolveHints.ResolveCoalesceHints) ++
+        hintResolutionRules: _*),
     Batch("Simple Sanity Check", Once,
       LookupFunctions),
     Batch("Keep Legacy Outputs", Once,
-      KeepLegacyOutputs)
-  )
-
-  override def batches: Seq[Batch] = earlyBatches ++ Seq(
+      KeepLegacyOutputs),
     Batch("Resolution", fixedPoint,
       new ResolveCatalogs(catalogManager) ::
       ResolveInsertInto ::
@@ -301,10 +314,13 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ExtractGenerator ::
       ResolveGenerate ::
       ResolveFunctions ::
+      ResolveProcedures ::
+      BindProcedures ::
       ResolveTableSpec ::
       ResolveAliases ::
       ResolveSubquery ::
       ResolveSubqueryColumnAliases ::
+      ResolveDefaultStringTypes ::
       ResolveWindowOrder ::
       ResolveWindowFrame ::
       ResolveNaturalAndUsingJoin ::
@@ -322,7 +338,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       ResolveTimeZone ::
       ResolveRandomSeed ::
       ResolveBinaryArithmetic ::
-      new ResolveIdentifierClause(earlyBatches) ::
+      ResolveIdentifierClause ::
       ResolveUnion ::
       ResolveRowLevelCommandAssignments ::
       MoveParameterizedQueriesDown ::
@@ -399,74 +415,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
   object ResolveBinaryArithmetic extends Rule[LogicalPlan] {
     override def apply(plan: LogicalPlan): LogicalPlan =
       plan.resolveExpressionsUpWithPruning(_.containsPattern(BINARY_ARITHMETIC), ruleId) {
-        case a @ Add(l, r, mode) if a.childrenResolved => (l.dataType, r.dataType) match {
-          case (DateType, DayTimeIntervalType(DAY, DAY)) => DateAdd(l, ExtractANSIIntervalDays(r))
-          case (DateType, _: DayTimeIntervalType) => TimeAdd(Cast(l, TimestampType), r)
-          case (DayTimeIntervalType(DAY, DAY), DateType) => DateAdd(r, ExtractANSIIntervalDays(l))
-          case (_: DayTimeIntervalType, DateType) => TimeAdd(Cast(r, TimestampType), l)
-          case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(l, r)
-          case (_: YearMonthIntervalType, DateType) => DateAddYMInterval(r, l)
-          case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
-            TimestampAddYMInterval(l, r)
-          case (_: YearMonthIntervalType, TimestampType | TimestampNTZType) =>
-            TimestampAddYMInterval(r, l)
-          case (CalendarIntervalType, CalendarIntervalType) |
-               (_: DayTimeIntervalType, _: DayTimeIntervalType) => a
-          case (_: NullType, _: AnsiIntervalType) =>
-            a.copy(left = Cast(a.left, a.right.dataType))
-          case (_: AnsiIntervalType, _: NullType) =>
-            a.copy(right = Cast(a.right, a.left.dataType))
-          case (DateType, CalendarIntervalType) =>
-            DateAddInterval(l, r, ansiEnabled = mode == EvalMode.ANSI)
-          case (_, CalendarIntervalType | _: DayTimeIntervalType) => Cast(TimeAdd(l, r), l.dataType)
-          case (CalendarIntervalType, DateType) =>
-            DateAddInterval(r, l, ansiEnabled = mode == EvalMode.ANSI)
-          case (CalendarIntervalType | _: DayTimeIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
-          case (DateType, dt) if dt != StringType => DateAdd(l, r)
-          case (dt, DateType) if dt != StringType => DateAdd(r, l)
-          case _ => a
-        }
-        case s @ Subtract(l, r, mode) if s.childrenResolved => (l.dataType, r.dataType) match {
-          case (DateType, DayTimeIntervalType(DAY, DAY)) =>
-            DateAdd(l, UnaryMinus(ExtractANSIIntervalDays(r), mode == EvalMode.ANSI))
-          case (DateType, _: DayTimeIntervalType) =>
-            DatetimeSub(l, r, TimeAdd(Cast(l, TimestampType), UnaryMinus(r, mode == EvalMode.ANSI)))
-          case (DateType, _: YearMonthIntervalType) =>
-            DatetimeSub(l, r, DateAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
-          case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
-            DatetimeSub(l, r, TimestampAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
-          case (CalendarIntervalType, CalendarIntervalType) |
-               (_: DayTimeIntervalType, _: DayTimeIntervalType) => s
-          case (_: NullType, _: AnsiIntervalType) =>
-            s.copy(left = Cast(s.left, s.right.dataType))
-          case (_: AnsiIntervalType, _: NullType) =>
-            s.copy(right = Cast(s.right, s.left.dataType))
-          case (DateType, CalendarIntervalType) =>
-            DatetimeSub(l, r, DateAddInterval(l,
-              UnaryMinus(r, mode == EvalMode.ANSI), ansiEnabled = mode == EvalMode.ANSI))
-          case (_, CalendarIntervalType | _: DayTimeIntervalType) =>
-            Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, mode == EvalMode.ANSI))), l.dataType)
-          case _ if AnyTimestampTypeExpression.unapply(l) ||
-            AnyTimestampTypeExpression.unapply(r) => SubtractTimestamps(l, r)
-          case (_, DateType) => SubtractDates(l, r)
-          case (DateType, dt) if dt != StringType => DateSub(l, r)
-          case _ => s
-        }
-        case m @ Multiply(l, r, mode) if m.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => MultiplyInterval(l, r, mode == EvalMode.ANSI)
-          case (_, CalendarIntervalType) => MultiplyInterval(r, l, mode == EvalMode.ANSI)
-          case (_: YearMonthIntervalType, _) => MultiplyYMInterval(l, r)
-          case (_, _: YearMonthIntervalType) => MultiplyYMInterval(r, l)
-          case (_: DayTimeIntervalType, _) => MultiplyDTInterval(l, r)
-          case (_, _: DayTimeIntervalType) => MultiplyDTInterval(r, l)
-          case _ => m
-        }
-        case d @ Divide(l, r, mode) if d.childrenResolved => (l.dataType, r.dataType) match {
-          case (CalendarIntervalType, _) => DivideInterval(l, r, mode == EvalMode.ANSI)
-          case (_: YearMonthIntervalType, _) => DivideYMInterval(l, r)
-          case (_: DayTimeIntervalType, _) => DivideDTInterval(l, r)
-          case _ => d
-        }
+        case expr @ (_: Add | _: Subtract | _: Multiply | _: Divide) if expr.childrenResolved =>
+          BinaryArithmeticWithDatetimeResolver.resolve(expr)
       }
   }
 
@@ -477,12 +427,18 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsAnyPattern(WITH_WINDOW_DEFINITION, UNRESOLVED_WINDOW_EXPRESSION), ruleId) {
       // Lookup WindowSpecDefinitions. This rule works with unresolved children.
-      case WithWindowDefinition(windowDefinitions, child) => child.resolveExpressions {
-        case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
-          val windowSpecDefinition = windowDefinitions.getOrElse(windowName,
-            throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName))
-          WindowExpression(c, windowSpecDefinition)
-      }
+      case WithWindowDefinition(windowDefinitions, child, forPipeSQL) =>
+        val resolveWindowExpression: PartialFunction[Expression, Expression] = {
+          case UnresolvedWindowExpression(c, WindowSpecReference(windowName)) =>
+            val windowSpecDefinition = windowDefinitions.getOrElse(windowName,
+              throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName))
+            WindowExpression(c, windowSpecDefinition)
+        }
+        if (forPipeSQL) {
+          child.transformExpressions(resolveWindowExpression)
+        } else {
+          child.resolveExpressions(resolveWindowExpression)
+        }
     }
   }
 
@@ -490,57 +446,42 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
    * Replaces [[UnresolvedAlias]]s with concrete aliases.
    */
   object ResolveAliases extends Rule[LogicalPlan] {
-    private def assignAliases(exprs: Seq[NamedExpression]) = {
-      def extractOnly(e: Expression): Boolean = e match {
-        case _: ExtractValue => e.children.forall(extractOnly)
-        case _: Literal => true
-        case _: Attribute => true
-        case _ => false
-      }
-      exprs.map(_.transformUpWithPruning(_.containsPattern(UNRESOLVED_ALIAS)) {
-          case u @ UnresolvedAlias(child, optGenAliasFunc) =>
-          child match {
-            case ne: NamedExpression => ne
-            case go @ GeneratorOuter(g: Generator) if g.resolved => MultiAlias(go, Nil)
-            case e if !e.resolved => u
-            case g: Generator => MultiAlias(g, Nil)
-            case c @ Cast(ne: NamedExpression, _, _, _) => Alias(c, ne.name)()
-            case e: ExtractValue if extractOnly(e) => Alias(e, toPrettySQL(e))()
-            case e if optGenAliasFunc.isDefined =>
-              Alias(child, optGenAliasFunc.get.apply(e))()
-            case l: Literal => Alias(l, toPrettySQL(l))()
-            case e =>
-              val metaForAutoGeneratedAlias = new MetadataBuilder()
-                .putString(AUTO_GENERATED_ALIAS, "true")
-                .build()
-              Alias(e, toPrettySQL(e))(explicitMetadata = Some(metaForAutoGeneratedAlias))
-          }
-        }
-      ).asInstanceOf[Seq[NamedExpression]]
-    }
-
-    private def hasUnresolvedAlias(exprs: Seq[NamedExpression]) =
-      exprs.exists(_.exists(_.isInstanceOf[UnresolvedAlias]))
-
-    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
-      _.containsPattern(UNRESOLVED_ALIAS), ruleId) {
-      case Aggregate(groups, aggs, child) if child.resolved && hasUnresolvedAlias(aggs) =>
-        Aggregate(groups, assignAliases(aggs), child)
-
-      case Pivot(groupByOpt, pivotColumn, pivotValues, aggregates, child)
-        if child.resolved && groupByOpt.isDefined && hasUnresolvedAlias(groupByOpt.get) =>
-        Pivot(Some(assignAliases(groupByOpt.get)), pivotColumn, pivotValues, aggregates, child)
+    def apply(plan: LogicalPlan): LogicalPlan =
+      plan.resolveOperatorsUpWithPruning(_.containsPattern(UNRESOLVED_ALIAS), ruleId) {
+        case Aggregate(groups, aggs, child, _)
+            if child.resolved && AliasResolution.hasUnresolvedAlias(aggs) =>
+          Aggregate(groups, AliasResolution.assignAliases(aggs), child)
+
+        case Pivot(groupByOpt, pivotColumn, pivotValues, aggregates, child)
+            if child.resolved &&
+            groupByOpt.isDefined &&
+            AliasResolution.hasUnresolvedAlias(groupByOpt.get) =>
+          Pivot(
+            Some(AliasResolution.assignAliases(groupByOpt.get)),
+            pivotColumn,
+            pivotValues,
+            aggregates,
+            child
+          )
 
-      case up: Unpivot if up.child.resolved &&
-        (up.ids.exists(hasUnresolvedAlias) || up.values.exists(_.exists(hasUnresolvedAlias))) =>
-        up.copy(ids = up.ids.map(assignAliases), values = up.values.map(_.map(assignAliases)))
+        case up: Unpivot
+            if up.child.resolved &&
+            (up.ids.exists(AliasResolution.hasUnresolvedAlias) || up.values.exists(
+              _.exists(AliasResolution.hasUnresolvedAlias)
+            )) =>
+          up.copy(
+            ids = up.ids.map(AliasResolution.assignAliases),
+            values = up.values.map(_.map(AliasResolution.assignAliases))
+          )
 
-      case Project(projectList, child) if child.resolved && hasUnresolvedAlias(projectList) =>
-        Project(assignAliases(projectList), child)
+        case Project(projectList, child)
+            if child.resolved && AliasResolution.hasUnresolvedAlias(projectList) =>
+          Project(AliasResolution.assignAliases(projectList), child)
 
-      case c: CollectMetrics if c.child.resolved && hasUnresolvedAlias(c.metrics) =>
-        c.copy(metrics = assignAliases(c.metrics))
-    }
+        case c: CollectMetrics
+            if c.child.resolved && AliasResolution.hasUnresolvedAlias(c.metrics) =>
+          c.copy(metrics = AliasResolution.assignAliases(c.metrics))
+      }
   }
 
   object ResolveGroupingAnalytics extends Rule[LogicalPlan] {
@@ -739,7 +680,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsDownWithPruning(
       _.containsPattern(GROUPING_ANALYTICS), ruleId) {
       case h @ UnresolvedHaving(_, agg @ Aggregate(
-        GroupingAnalytics(selectedGroupByExprs, groupByExprs), aggExprs, _))
+        GroupingAnalytics(selectedGroupByExprs, groupByExprs), aggExprs, _, _))
         if agg.childrenResolved && aggExprs.forall(_.resolved) =>
         tryResolveHavingCondition(h, agg, selectedGroupByExprs, groupByExprs)
 
@@ -749,7 +690,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case a if !a.childrenResolved => a
 
       // Ensure group by expressions and aggregate expressions have been resolved.
-      case Aggregate(GroupingAnalytics(selectedGroupByExprs, groupByExprs), aggExprs, child)
+      case Aggregate(GroupingAnalytics(selectedGroupByExprs, groupByExprs), aggExprs, child, _)
         if aggExprs.forall(_.resolved) =>
         constructAggregate(selectedGroupByExprs, groupByExprs, aggExprs, child)
 
@@ -761,7 +702,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         f.copy(condition = newCond)
 
       // We should make sure all [[SortOrder]]s have been resolved.
-      case s @ Sort(order, _, child)
+      case s @ Sort(order, _, child, _)
         if order.exists(hasGroupingFunction) && order.forall(_.resolved) =>
         val groupingExprs = findGroupingExprs(child)
         val gid = VirtualColumn.groupingIdAttribute
@@ -950,30 +891,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     }
   }
 
-  private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty
-  private def isReferredTempViewName(nameParts: Seq[String]): Boolean = {
-    AnalysisContext.get.referredTempViewNames.exists { n =>
-      (n.length == nameParts.length) && n.zip(nameParts).forall {
-        case (a, b) => resolver(a, b)
-      }
-    }
-  }
-
-  // If we are resolving database objects (relations, functions, etc.) insides views, we may need to
-  // expand single or multi-part identifiers with the current catalog and namespace of when the
-  // view was created.
-  private def expandIdentifier(nameParts: Seq[String]): Seq[String] = {
-    if (!isResolvingView || isReferredTempViewName(nameParts)) return nameParts
-
-    if (nameParts.length == 1) {
-      AnalysisContext.get.catalogAndNamespace :+ nameParts.head
-    } else if (catalogManager.isCatalogRegistered(nameParts.head)) {
-      nameParts
-    } else {
-      AnalysisContext.get.catalogAndNamespace.head +: nameParts
-    }
-  }
-
   /**
    * Adds metadata columns to output for child relations when nodes are missing resolved attributes.
    *
@@ -1100,7 +1017,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case i @ InsertIntoStatement(table, _, _, _, _, _, _) =>
         val relation = table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            resolveRelation(u).getOrElse(u)
+            relationResolution.resolveRelation(u).getOrElse(u)
           case other => other
         }
 
@@ -1117,7 +1034,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case write: V2WriteCommand =>
         write.table match {
           case u: UnresolvedRelation if !u.isStreaming =>
-            resolveRelation(u).map(unwrapRelationPlan).map {
+            relationResolution.resolveRelation(u).map(unwrapRelationPlan).map {
               case v: View => throw QueryCompilationErrors.writeIntoViewNotAllowedError(
                 v.desc.identifier, write)
               case r: DataSourceV2Relation => write.withNewTable(r)
@@ -1132,12 +1049,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         }
 
       case u: UnresolvedRelation =>
-        resolveRelation(u).map(resolveViews).getOrElse(u)
+        relationResolution.resolveRelation(u).map(resolveViews).getOrElse(u)
 
       case r @ RelationTimeTravel(u: UnresolvedRelation, timestamp, version)
           if timestamp.forall(ts => ts.resolved && !SubqueryExpression.hasSubquery(ts)) =>
         val timeTravelSpec = TimeTravelSpec.create(timestamp, version, conf.sessionLocalTimeZone)
-        resolveRelation(u, timeTravelSpec).getOrElse(r)
+        relationResolution.resolveRelation(u, timeTravelSpec).getOrElse(r)
 
       case u @ UnresolvedTable(identifier, cmd, suggestAlternative) =>
         lookupTableOrView(identifier).map {
@@ -1172,29 +1089,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         }.getOrElse(u)
     }
 
-    private def lookupTempView(identifier: Seq[String]): Option[TemporaryViewRelation] = {
-      // We are resolving a view and this name is not a temp view when that view was created. We
-      // return None earlier here.
-      if (isResolvingView && !isReferredTempViewName(identifier)) return None
-      v1SessionCatalog.getRawLocalOrGlobalTempView(identifier)
-    }
-
-    private def resolveTempView(
-        identifier: Seq[String],
-        isStreaming: Boolean = false,
-        isTimeTravel: Boolean = false): Option[LogicalPlan] = {
-      lookupTempView(identifier).map { v =>
-        val tempViewPlan = v1SessionCatalog.getTempViewRelation(v)
-        if (isStreaming && !tempViewPlan.isStreaming) {
-          throw QueryCompilationErrors.readNonStreamingTempViewError(identifier.quoted)
-        }
-        if (isTimeTravel) {
-          throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(identifier))
-        }
-        tempViewPlan
-      }
-    }
-
     /**
      * Resolves relations to `ResolvedTable` or `Resolved[Temp/Persistent]View`. This is
      * for resolving DDL and misc commands.
@@ -1202,10 +1096,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     private def lookupTableOrView(
         identifier: Seq[String],
         viewOnly: Boolean = false): Option[LogicalPlan] = {
-      lookupTempView(identifier).map { tempView =>
+      relationResolution.lookupTempView(identifier).map { tempView =>
         ResolvedTempView(identifier.asIdentifier, tempView.tableMeta)
       }.orElse {
-        expandIdentifier(identifier) match {
+        relationResolution.expandIdentifier(identifier) match {
           case CatalogAndIdentifier(catalog, ident) =>
             if (viewOnly && !CatalogV2Util.isSessionCatalog(catalog)) {
               throw QueryCompilationErrors.catalogOperationNotSupported(catalog, "views")
@@ -1224,102 +1118,6 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         }
       }
     }
-
-    private def createRelation(
-        catalog: CatalogPlugin,
-        ident: Identifier,
-        table: Option[Table],
-        options: CaseInsensitiveStringMap,
-        isStreaming: Boolean): Option[LogicalPlan] = {
-      table.map {
-        case v1Table: V1Table if CatalogV2Util.isSessionCatalog(catalog) =>
-          if (isStreaming) {
-            if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
-              throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
-                ident.quoted)
-            }
-            SubqueryAlias(
-              catalog.name +: ident.asMultipartIdentifier,
-              UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true))
-          } else {
-            v1SessionCatalog.getRelation(v1Table.v1Table, options)
-          }
-
-        case table =>
-          if (isStreaming) {
-            val v1Fallback = table match {
-              case withFallback: V2TableWithV1Fallback =>
-                Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true))
-              case _ => None
-            }
-            SubqueryAlias(
-              catalog.name +: ident.asMultipartIdentifier,
-              StreamingRelationV2(None, table.name, table, options, table.columns.toAttributes,
-                Some(catalog), Some(ident), v1Fallback))
-          } else {
-            SubqueryAlias(
-              catalog.name +: ident.asMultipartIdentifier,
-              DataSourceV2Relation.create(table, Some(catalog), Some(ident), options))
-          }
-      }
-    }
-
-    /**
-     * Resolves relations to v1 relation if it's a v1 table from the session catalog, or to v2
-     * relation. This is for resolving DML commands and SELECT queries.
-     */
-    private def resolveRelation(
-        u: UnresolvedRelation,
-        timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
-      val timeTravelSpecFromOptions = TimeTravelSpec.fromOptions(
-        u.options,
-        conf.getConf(SQLConf.TIME_TRAVEL_TIMESTAMP_KEY),
-        conf.getConf(SQLConf.TIME_TRAVEL_VERSION_KEY),
-        conf.sessionLocalTimeZone
-      )
-      if (timeTravelSpec.nonEmpty && timeTravelSpecFromOptions.nonEmpty) {
-        throw new AnalysisException("MULTIPLE_TIME_TRAVEL_SPEC", Map.empty[String, String])
-      }
-      val finalTimeTravelSpec = timeTravelSpec.orElse(timeTravelSpecFromOptions)
-      resolveTempView(u.multipartIdentifier, u.isStreaming, finalTimeTravelSpec.isDefined).orElse {
-        expandIdentifier(u.multipartIdentifier) match {
-          case CatalogAndIdentifier(catalog, ident) =>
-            val key =
-              ((catalog.name +: ident.namespace :+ ident.name).toImmutableArraySeq,
-              finalTimeTravelSpec)
-            AnalysisContext.get.relationCache.get(key).map { cache =>
-              val cachedRelation = cache.transform {
-                case multi: MultiInstanceRelation =>
-                  val newRelation = multi.newInstance()
-                  newRelation.copyTagsFrom(multi)
-                  newRelation
-              }
-              u.getTagValue(LogicalPlan.PLAN_ID_TAG).map { planId =>
-                val cachedConnectRelation = cachedRelation.clone()
-                cachedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
-                cachedConnectRelation
-              }.getOrElse(cachedRelation)
-            }.orElse {
-              val table = CatalogV2Util.loadTable(catalog, ident, finalTimeTravelSpec)
-              val loaded = createRelation(catalog, ident, table, u.options, u.isStreaming)
-              loaded.foreach(AnalysisContext.get.relationCache.update(key, _))
-              u.getTagValue(LogicalPlan.PLAN_ID_TAG).map { planId =>
-                loaded.map { loadedRelation =>
-                  val loadedConnectRelation = loadedRelation.clone()
-                  loadedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
-                  loadedConnectRelation
-                }
-              }.getOrElse(loaded)
-            }
-          case _ => None
-        }
-      }
-    }
-
-    /** Consumes an unresolved relation and resolves it to a v1 or v2 relation or temporary view. */
-    def resolveRelationOrTempView(u: UnresolvedRelation): LogicalPlan = {
-      EliminateSubqueryAliases(resolveRelation(u).getOrElse(u))
-    }
   }
 
   /** Handle INSERT INTO for DSv2 */
@@ -1558,7 +1356,11 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       // If the projection list contains Stars, expand it.
       case p: Project if containsStar(p.projectList) =>
-        p.copy(projectList = buildExpandedProjectList(p.projectList, p.child))
+        val expanded = p.copy(projectList = buildExpandedProjectList(p.projectList, p.child))
+        if (expanded.projectList.size < p.projectList.size) {
+          checkTrailingCommaInSelect(expanded, starRemoved = true)
+        }
+        expanded
       // If the filter list contains Stars, expand it.
       case p: Filter if containsStar(Seq(p.condition)) =>
         p.copy(expandStarExpression(p.condition, p.child))
@@ -1567,8 +1369,15 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         if (a.groupingExpressions.exists(_.isInstanceOf[UnresolvedOrdinal])) {
           throw QueryCompilationErrors.starNotAllowedWhenGroupByOrdinalPositionUsedError()
         } else {
-          a.copy(aggregateExpressions = buildExpandedProjectList(a.aggregateExpressions, a.child))
+          val expanded = a.copy(aggregateExpressions =
+            buildExpandedProjectList(a.aggregateExpressions, a.child))
+          if (expanded.aggregateExpressions.size < a.aggregateExpressions.size) {
+            checkTrailingCommaInSelect(expanded, starRemoved = true)
+          }
+          expanded
         }
+      case c: CollectMetrics if containsStar(c.metrics) =>
+        c.copy(metrics = buildExpandedProjectList(c.metrics, c.child))
       case g: Generate if containsStar(g.generator.children) =>
         throw QueryCompilationErrors.invalidStarUsageError("explode/json_tuple/UDTF",
           extractStar(g.generator.children))
@@ -1877,8 +1686,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         } catch {
           case e: AnalysisException =>
             AnalysisContext.get.outerPlan.map {
-              // Only Project and Aggregate can host star expressions.
-              case u @ (_: Project | _: Aggregate) =>
+              // Only Project, Aggregate, CollectMetrics can host star expressions.
+              case u @ (_: Project | _: Aggregate | _: CollectMetrics) =>
                 Try(s.expand(u.children.head, resolver)) match {
                   case Success(expanded) => expanded.map(wrapOuterReference)
                   case Failure(_) => throw e
@@ -1916,6 +1725,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
     private def extractStar(exprs: Seq[Expression]): Seq[Star] =
       exprs.flatMap(_.collect { case s: Star => s })
 
+    private def isCountStarExpansionAllowed(arguments: Seq[Expression]): Boolean = arguments match {
+      case Seq(UnresolvedStar(None)) => true
+      case Seq(_: ResolvedStar) => true
+      case _ => false
+    }
+
     /**
      * Expands the matching attribute.*'s in `child`'s output.
      */
@@ -1923,7 +1738,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       expr.transformUp {
         case f0: UnresolvedFunction if !f0.isDistinct &&
           f0.nameParts.map(_.toLowerCase(Locale.ROOT)) == Seq("count") &&
-          f0.arguments == Seq(UnresolvedStar(None)) =>
+          isCountStarExpansionAllowed(f0.arguments) =>
           // Transform COUNT(*) into COUNT(1).
           f0.copy(nameParts = Seq("count"), arguments = Seq(Literal(1)))
         case f1: UnresolvedFunction if containsStar(f1.arguments) =>
@@ -2002,7 +1817,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case p if !p.childrenResolved => p
       // Replace the index with the related attribute for ORDER BY,
       // which is a 1-base position of the projection list.
-      case Sort(orders, global, child)
+      case Sort(orders, global, child, hint)
         if orders.exists(_.child.isInstanceOf[UnresolvedOrdinal]) =>
         val newOrders = orders map {
           case s @ SortOrder(UnresolvedOrdinal(index), direction, nullOrdering, _) =>
@@ -2013,14 +1828,14 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
             }
           case o => o
         }
-        Sort(newOrders, global, child)
+        Sort(newOrders, global, child, hint)
 
       // Replace the index with the corresponding expression in aggregateExpressions. The index is
       // a 1-base position of aggregateExpressions, which is output columns (select expression)
-      case Aggregate(groups, aggs, child) if aggs.forall(_.resolved) &&
+      case Aggregate(groups, aggs, child, hint) if aggs.forall(_.resolved) &&
         groups.exists(containUnresolvedOrdinal) =>
         val newGroups = groups.map(resolveGroupByExpressionOrdinal(_, aggs))
-        Aggregate(newGroups, aggs, child)
+        Aggregate(newGroups, aggs, child, hint)
     }
 
     private def containUnresolvedOrdinal(e: Expression): Boolean = e match {
@@ -2081,11 +1896,12 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       val externalFunctionNameSet = new mutable.HashSet[Seq[String]]()
 
       plan.resolveExpressionsWithPruning(_.containsAnyPattern(UNRESOLVED_FUNCTION)) {
-        case f @ UnresolvedFunction(nameParts, _, _, _, _, _) =>
-          if (ResolveFunctions.lookupBuiltinOrTempFunction(nameParts).isDefined) {
+        case f @ UnresolvedFunction(nameParts, _, _, _, _, _, _) =>
+          if (functionResolution.lookupBuiltinOrTempFunction(nameParts, Some(f)).isDefined) {
             f
           } else {
-            val CatalogAndIdentifier(catalog, ident) = expandIdentifier(nameParts)
+            val CatalogAndIdentifier(catalog, ident) =
+              relationResolution.expandIdentifier(nameParts)
             val fullName =
               normalizeFuncName((catalog.name +: ident.namespace :+ ident.name).toImmutableArraySeq)
             if (externalFunctionNameSet.contains(fullName)) {
@@ -2120,15 +1936,13 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
    * Replaces [[UnresolvedTableValuedFunction]]s with concrete [[LogicalPlan]]s.
    */
   object ResolveFunctions extends Rule[LogicalPlan] {
-    val trimWarningEnabled = new AtomicBoolean(true)
-
     def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
       _.containsAnyPattern(UNRESOLVED_FUNC, UNRESOLVED_FUNCTION, GENERATOR,
         UNRESOLVED_TABLE_VALUED_FUNCTION, UNRESOLVED_TVF_ALIASES), ruleId) {
       // Resolve functions with concrete relations from v2 catalog.
       case u @ UnresolvedFunctionName(nameParts, cmd, requirePersistentFunc, mismatchHint, _) =>
-        lookupBuiltinOrTempFunction(nameParts)
-          .orElse(lookupBuiltinOrTempTableFunction(nameParts)).map { info =>
+        functionResolution.lookupBuiltinOrTempFunction(nameParts, None)
+          .orElse(functionResolution.lookupBuiltinOrTempTableFunction(nameParts)).map { info =>
           if (requirePersistentFunc) {
             throw QueryCompilationErrors.expectPersistentFuncError(
               nameParts.head, cmd, mismatchHint, u)
@@ -2136,7 +1950,8 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
             ResolvedNonPersistentFunc(nameParts.head, V1Function(info))
           }
         }.getOrElse {
-          val CatalogAndIdentifier(catalog, ident) = expandIdentifier(nameParts)
+          val CatalogAndIdentifier(catalog, ident) =
+            relationResolution.expandIdentifier(nameParts)
           val fullName = catalog.name +: ident.namespace :+ ident.name
           CatalogV2Util.loadFunction(catalog, ident).map { func =>
             ResolvedPersistentFunc(catalog.asFunctionCatalog, ident, func)
@@ -2147,8 +1962,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
       case u: UnresolvedTableValuedFunction if u.functionArgs.forall(_.resolved) =>
         withPosition(u) {
           try {
-            val resolvedFunc = resolveBuiltinOrTempTableFunction(u.name, u.functionArgs).getOrElse {
-              val CatalogAndIdentifier(catalog, ident) = expandIdentifier(u.name)
+            val resolvedFunc = functionResolution.resolveBuiltinOrTempTableFunction(
+                u.name, u.functionArgs).getOrElse {
+              val CatalogAndIdentifier(catalog, ident) =
+                relationResolution.expandIdentifier(u.name)
               if (CatalogV2Util.isSessionCatalog(catalog)) {
                 v1SessionCatalog.resolvePersistentTableFunction(
                   ident.asFunctionIdentifier, u.functionArgs)
@@ -2249,9 +2066,9 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         q.transformExpressionsUpWithPruning(
           _.containsAnyPattern(UNRESOLVED_FUNCTION, GENERATOR),
           ruleId) {
-          case u @ UnresolvedFunction(nameParts, arguments, _, _, _, _)
-              if hasLambdaAndResolvedArguments(arguments) => withPosition(u) {
-            resolveBuiltinOrTempFunction(nameParts, arguments, Some(u)).map {
+          case u @ UnresolvedFunction(nameParts, arguments, _, _, _, _, _)
+              if functionResolution.hasLambdaAndResolvedArguments(arguments) => withPosition(u) {
+            functionResolution.resolveBuiltinOrTempFunction(nameParts, arguments, u).map {
               case func: HigherOrderFunction => func
               case other => other.failAnalysis(
                 errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NON_HIGHER_ORDER_FUNCTION",
@@ -2278,16 +2095,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
             }
           }
 
-          case u @ UnresolvedFunction(nameParts, arguments, _, _, _, _) => withPosition(u) {
-            resolveBuiltinOrTempFunction(nameParts, arguments, Some(u)).getOrElse {
-              val CatalogAndIdentifier(catalog, ident) = expandIdentifier(nameParts)
-              if (CatalogV2Util.isSessionCatalog(catalog)) {
-                resolveV1Function(ident.asFunctionIdentifier, arguments, u)
-              } else {
-                resolveV2Function(catalog.asFunctionCatalog, ident, arguments, u)
-              }
-            }
-          }
+          case u: UnresolvedFunction => functionResolution.resolveFunction(u)
 
           case u: UnresolvedPolymorphicPythonUDTF => withPosition(u) {
             // Check if this is a call to a Python user-defined table function whose polymorphic
@@ -2310,256 +2118,66 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
           }
         }
     }
+  }
 
-    /**
-     * Check if the arguments of a function are either resolved or a lambda function.
-     */
-    private def hasLambdaAndResolvedArguments(expressions: Seq[Expression]): Boolean = {
-      val (lambdas, others) = expressions.partition(_.isInstanceOf[LambdaFunction])
-      lambdas.nonEmpty && others.forall(_.resolved)
-    }
-
-    def lookupBuiltinOrTempFunction(name: Seq[String]): Option[ExpressionInfo] = {
-      if (name.length == 1) {
-        v1SessionCatalog.lookupBuiltinOrTempFunction(name.head)
-      } else {
-        None
-      }
-    }
-
-    def lookupBuiltinOrTempTableFunction(name: Seq[String]): Option[ExpressionInfo] = {
-      if (name.length == 1) {
-        v1SessionCatalog.lookupBuiltinOrTempTableFunction(name.head)
-      } else {
-        None
-      }
-    }
-
-    private def resolveBuiltinOrTempFunction(
-        name: Seq[String],
-        arguments: Seq[Expression],
-        u: Option[UnresolvedFunction]): Option[Expression] = {
-      if (name.length == 1) {
-        v1SessionCatalog.resolveBuiltinOrTempFunction(name.head, arguments).map { func =>
-          if (u.isDefined) validateFunction(func, arguments.length, u.get) else func
-        }
-      } else {
-        None
-      }
-    }
-
-    private def resolveBuiltinOrTempTableFunction(
-        name: Seq[String],
-        arguments: Seq[Expression]): Option[LogicalPlan] = {
-      if (name.length == 1) {
-        v1SessionCatalog.resolveBuiltinOrTempTableFunction(name.head, arguments)
-      } else {
-        None
-      }
-    }
-
-    private def resolveV1Function(
-        ident: FunctionIdentifier,
-        arguments: Seq[Expression],
-        u: UnresolvedFunction): Expression = {
-      val func = v1SessionCatalog.resolvePersistentFunction(ident, arguments)
-      validateFunction(func, arguments.length, u)
-    }
-
-    private def validateFunction(
-        func: Expression,
-        numArgs: Int,
-        u: UnresolvedFunction): Expression = {
-      func match {
-        case owg: SupportsOrderingWithinGroup if u.isDistinct =>
-          throw QueryCompilationErrors.distinctInverseDistributionFunctionUnsupportedError(
-            owg.prettyName)
-        case owg: SupportsOrderingWithinGroup
-          if !owg.orderingFilled && u.orderingWithinGroup.isEmpty =>
-          throw QueryCompilationErrors.inverseDistributionFunctionMissingWithinGroupError(
-            owg.prettyName)
-        case owg: SupportsOrderingWithinGroup
-          if owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
-          throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
-            owg.prettyName, 0, u.orderingWithinGroup.length)
-        case f
-          if !f.isInstanceOf[SupportsOrderingWithinGroup] && u.orderingWithinGroup.nonEmpty =>
-          throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-            func.prettyName, "WITHIN GROUP (ORDER BY ...)")
-        // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
-        // the context of a Window clause. They do not need to be wrapped in an
-        // AggregateExpression.
-        case wf: AggregateWindowFunction =>
-          if (u.isDistinct) {
-            throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-              wf.prettyName, "DISTINCT")
-          } else if (u.filter.isDefined) {
-            throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-              wf.prettyName, "FILTER clause")
-          } else if (u.ignoreNulls) {
-            wf match {
-              case nthValue: NthValue =>
-                nthValue.copy(ignoreNulls = u.ignoreNulls)
-              case _ =>
-                throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-                  wf.prettyName, "IGNORE NULLS")
-            }
-          } else {
-            wf
-          }
-        case owf: FrameLessOffsetWindowFunction =>
-          if (u.isDistinct) {
-            throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-              owf.prettyName, "DISTINCT")
-          } else if (u.filter.isDefined) {
-            throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-              owf.prettyName, "FILTER clause")
-          } else if (u.ignoreNulls) {
-            owf match {
-              case lead: Lead =>
-                lead.copy(ignoreNulls = u.ignoreNulls)
-              case lag: Lag =>
-                lag.copy(ignoreNulls = u.ignoreNulls)
-            }
-          } else {
-            owf
-          }
-        // We get an aggregate function, we need to wrap it in an AggregateExpression.
-        case agg: AggregateFunction =>
-          // Note: PythonUDAF does not support these advanced clauses.
-          if (agg.isInstanceOf[PythonUDAF]) checkUnsupportedAggregateClause(agg, u)
-          // After parse, the inverse distribution functions not set the ordering within group yet.
-          val newAgg = agg match {
-            case owg: SupportsOrderingWithinGroup
-              if !owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
-              owg.withOrderingWithinGroup(u.orderingWithinGroup)
-            case _ =>
-              agg
-          }
-
-          u.filter match {
-            case Some(filter) if !filter.deterministic =>
-              throw QueryCompilationErrors.nonDeterministicFilterInAggregateError(
-                filterExpr = filter)
-            case Some(filter) if filter.dataType != BooleanType =>
-              throw QueryCompilationErrors.nonBooleanFilterInAggregateError(
-                filterExpr = filter)
-            case Some(filter) if filter.exists(_.isInstanceOf[AggregateExpression]) =>
-              throw QueryCompilationErrors.aggregateInAggregateFilterError(
-                filterExpr = filter,
-                aggExpr = filter.find(_.isInstanceOf[AggregateExpression]).get)
-            case Some(filter) if filter.exists(_.isInstanceOf[WindowExpression]) =>
-              throw QueryCompilationErrors.windowFunctionInAggregateFilterError(
-                filterExpr = filter,
-                windowExpr = filter.find(_.isInstanceOf[WindowExpression]).get)
-            case _ =>
-          }
-          if (u.ignoreNulls) {
-            val aggFunc = newAgg match {
-              case first: First => first.copy(ignoreNulls = u.ignoreNulls)
-              case last: Last => last.copy(ignoreNulls = u.ignoreNulls)
-              case any_value: AnyValue => any_value.copy(ignoreNulls = u.ignoreNulls)
-              case _ =>
-                throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-                  newAgg.prettyName, "IGNORE NULLS")
-            }
-            aggFunc.toAggregateExpression(u.isDistinct, u.filter)
-          } else {
-            newAgg.toAggregateExpression(u.isDistinct, u.filter)
-          }
-        // This function is not an aggregate function, just return the resolved one.
-        case other =>
-          checkUnsupportedAggregateClause(other, u)
-          if (other.isInstanceOf[String2TrimExpression] && numArgs == 2) {
-            if (trimWarningEnabled.get) {
-              log.warn("Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
-                " Use SQL syntax `TRIM((BOTH | LEADING | TRAILING)? trimStr FROM str)`" +
-                " instead.")
-              trimWarningEnabled.set(false)
-            }
-          }
-          other
-      }
-    }
-
-    private def checkUnsupportedAggregateClause(func: Expression, u: UnresolvedFunction): Unit = {
-      if (u.isDistinct) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          func.prettyName, "DISTINCT")
-      }
-      if (u.filter.isDefined) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          func.prettyName, "FILTER clause")
-      }
-      if (u.ignoreNulls) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          func.prettyName, "IGNORE NULLS")
-      }
+  /**
+   * A rule that resolves procedures.
+   */
+  object ResolveProcedures extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
+      _.containsPattern(UNRESOLVED_PROCEDURE), ruleId) {
+      case Call(UnresolvedProcedure(CatalogAndIdentifier(catalog, ident)), args, execute) =>
+        val procedureCatalog = catalog.asProcedureCatalog
+        val procedure = load(procedureCatalog, ident)
+        Call(ResolvedProcedure(procedureCatalog, ident, procedure), args, execute)
     }
 
-    private def resolveV2Function(
-        catalog: FunctionCatalog,
-        ident: Identifier,
-        arguments: Seq[Expression],
-        u: UnresolvedFunction): Expression = {
-      val unbound = catalog.loadFunction(ident)
-      val inputType = StructType(arguments.zipWithIndex.map {
-        case (exp, pos) => StructField(s"_$pos", exp.dataType, exp.nullable)
-      })
-      val bound = try {
-        unbound.bind(inputType)
+    private def load(catalog: ProcedureCatalog, ident: Identifier): UnboundProcedure = {
+      try {
+        catalog.loadProcedure(ident)
       } catch {
-        case unsupported: UnsupportedOperationException =>
-          throw QueryCompilationErrors.functionCannotProcessInputError(
-            unbound, arguments, unsupported)
-      }
-
-      if (bound.inputTypes().length != arguments.length) {
-        throw QueryCompilationErrors.v2FunctionInvalidInputTypeLengthError(
-          bound, arguments)
-      }
-
-      bound match {
-        case scalarFunc: ScalarFunction[_] =>
-          processV2ScalarFunction(scalarFunc, arguments, u)
-        case aggFunc: V2AggregateFunction[_, _] =>
-          processV2AggregateFunction(aggFunc, arguments, u)
-        case _ =>
-          failAnalysis(
-            errorClass = "INVALID_UDF_IMPLEMENTATION",
-            messageParameters = Map("funcName" -> toSQLId(bound.name())))
-      }
-    }
-
-    private def processV2ScalarFunction(
-        scalarFunc: ScalarFunction[_],
-        arguments: Seq[Expression],
-        u: UnresolvedFunction): Expression = {
-      if (u.isDistinct) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          scalarFunc.name(), "DISTINCT")
-      } else if (u.filter.isDefined) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          scalarFunc.name(), "FILTER clause")
-      } else if (u.ignoreNulls) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          scalarFunc.name(), "IGNORE NULLS")
-      } else {
-        V2ExpressionUtils.resolveScalarFunction(scalarFunc, arguments)
+        case e: Exception if !e.isInstanceOf[SparkThrowable] =>
+          val nameParts = catalog.name +: ident.asMultipartIdentifier
+          throw QueryCompilationErrors.failedToLoadRoutineError(nameParts, e)
       }
     }
+  }
 
-    private def processV2AggregateFunction(
-        aggFunc: V2AggregateFunction[_, _],
-        arguments: Seq[Expression],
-        u: UnresolvedFunction): Expression = {
-      if (u.ignoreNulls) {
-        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
-          aggFunc.name(), "IGNORE NULLS")
-      }
-      val aggregator = V2Aggregator(aggFunc, arguments)
-      aggregator.toAggregateExpression(u.isDistinct, u.filter)
-    }
+  /**
+   * A rule that binds procedures to the input types and rearranges arguments as needed.
+   */
+  object BindProcedures extends Rule[LogicalPlan] {
+    def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case Call(ResolvedProcedure(catalog, ident, unbound: UnboundProcedure), args, execute)
+          if args.forall(_.resolved) =>
+        val inputType = extractInputType(args)
+        val bound = unbound.bind(inputType)
+        validateParameterModes(bound)
+        val rearrangedArgs = NamedParametersSupport.defaultRearrange(bound, args)
+        Call(ResolvedProcedure(catalog, ident, bound), rearrangedArgs, execute)
+    }
+
+    private def extractInputType(args: Seq[Expression]): StructType = {
+      val fields = args.zipWithIndex.map {
+        case (NamedArgumentExpression(name, value), _) =>
+          StructField(name, value.dataType, value.nullable, byNameMetadata)
+        case (arg, index) =>
+          StructField(s"param$index", arg.dataType, arg.nullable)
+      }
+      StructType(fields)
+    }
+
+    private def byNameMetadata: Metadata = {
+      new MetadataBuilder()
+        .putBoolean(ProcedureParameter.BY_NAME_METADATA_KEY, value = true)
+        .build()
+    }
+
+   private def validateParameterModes(procedure: BoundProcedure): Unit = {
+     procedure.parameters.find(_.mode != ProcedureParameter.Mode.IN).foreach { param =>
+       throw SparkException.internalError(s"Unsupported parameter mode: ${param.mode}")
+     }
+   }
   }
 
   /**
@@ -2604,7 +2222,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
      */
     private def resolveSubQueries(plan: LogicalPlan, outer: LogicalPlan): LogicalPlan = {
       plan.transformAllExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION), ruleId) {
-        case s @ ScalarSubquery(sub, _, exprId, _, _, _) if !sub.resolved =>
+        case s @ ScalarSubquery(sub, _, exprId, _, _, _, _) if !sub.resolved =>
           resolveSubQuery(s, outer)(ScalarSubquery(_, _, exprId))
         case e @ Exists(sub, _, exprId, _, _) if !sub.resolved =>
           resolveSubQuery(e, outer)(Exists(_, _, exprId))
@@ -2729,7 +2347,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
           Filter(newExprs.head, newChild)
         })
 
-      case s @ Sort(_, _, agg: Aggregate) if agg.resolved && s.order.forall(_.resolved) =>
+      case s @ Sort(_, _, agg: Aggregate, _) if agg.resolved && s.order.forall(_.resolved) =>
         resolveOperatorWithAggregate(s.order.map(_.child), agg, (newExprs, newChild) => {
           val newSortOrder = s.order.zip(newExprs).map {
             case (sortOrder, expr) => sortOrder.copy(child = expr)
@@ -2737,7 +2355,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
           s.copy(order = newSortOrder, child = newChild)
         })
 
-      case s @ Sort(_, _, f @ Filter(cond, agg: Aggregate))
+      case s @ Sort(_, _, f @ Filter(cond, agg: Aggregate), _)
           if agg.resolved && cond.resolved && s.order.forall(_.resolved) =>
         resolveOperatorWithAggregate(s.order.map(_.child), agg, (newExprs, newChild) => {
           val newSortOrder = s.order.zip(newExprs).map {
@@ -2928,15 +2546,15 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         val nestedGenerator = projectList.find(hasNestedGenerator).get
         throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
-      case Aggregate(_, aggList, _) if aggList.exists(hasNestedGenerator) =>
+      case Aggregate(_, aggList, _, _) if aggList.exists(hasNestedGenerator) =>
         val nestedGenerator = aggList.find(hasNestedGenerator).get
         throw QueryCompilationErrors.nestedGeneratorError(trimAlias(nestedGenerator))
 
-      case Aggregate(_, aggList, _) if aggList.count(hasGenerator) > 1 =>
+      case Aggregate(_, aggList, _, _) if aggList.count(hasGenerator) > 1 =>
         val generators = aggList.filter(hasGenerator).map(trimAlias)
         throw QueryCompilationErrors.moreThanOneGeneratorError(generators)
 
-      case Aggregate(groupList, aggList, child) if canRewriteGenerator(aggList) &&
+      case Aggregate(groupList, aggList, child, _) if canRewriteGenerator(aggList) &&
           aggList.exists(hasGenerator) =>
         // If generator in the aggregate list was visited, set the boolean flag true.
         var generatorVisited = false
@@ -3302,7 +2920,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       // Aggregate with Having clause. This rule works with an unresolved Aggregate because
       // a resolved Aggregate will not have Window Functions.
-      case f @ UnresolvedHaving(condition, a @ Aggregate(groupingExprs, aggregateExprs, child))
+      case f @ UnresolvedHaving(condition, a @ Aggregate(groupingExprs, aggregateExprs, child, _))
         if child.resolved &&
           hasWindowFunction(aggregateExprs) &&
           a.expressions.forall(_.resolved) =>
@@ -3327,7 +2945,7 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
 
       // Aggregate without Having clause.
       // Make sure the lateral column aliases are properly handled first.
-      case a @ Aggregate(groupingExprs, aggregateExprs, child)
+      case a @ Aggregate(groupingExprs, aggregateExprs, child, _)
         if hasWindowFunction(aggregateExprs) &&
           a.expressions.forall(_.resolved) &&
           !aggregateExprs.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
@@ -3909,7 +3527,10 @@ class Analyzer(override val catalogManager: CatalogManager) extends RuleExecutor
         fieldName: Seq[String],
         context: Expression): ResolvedFieldName = {
       resolveFieldNamesOpt(table, fieldName, context)
-        .getOrElse(throw QueryCompilationErrors.missingFieldError(fieldName, table, context.origin))
+        .getOrElse {
+          throw QueryCompilationErrors.unresolvedColumnError(fieldName, table.schema.fieldNames,
+            context.origin)
+        }
     }
 
     private def resolveFieldNamesOpt(
@@ -3983,14 +3604,14 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
       val cleanedProjectList = projectList.map(trimNonTopLevelAliases)
       Project(cleanedProjectList, child)
 
-    case Aggregate(grouping, aggs, child) =>
+    case Aggregate(grouping, aggs, child, hint) =>
       val cleanedAggs = aggs.map(trimNonTopLevelAliases)
-      Aggregate(grouping.map(trimAliases), cleanedAggs, child)
+      Aggregate(grouping.map(trimAliases), cleanedAggs, child, hint)
 
-    case Window(windowExprs, partitionSpec, orderSpec, child) =>
+    case Window(windowExprs, partitionSpec, orderSpec, child, hint) =>
       val cleanedWindowExprs = windowExprs.map(trimNonTopLevelAliases)
       Window(cleanedWindowExprs, partitionSpec.map(trimAliases),
-        orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child)
+        orderSpec.map(trimAliases(_).asInstanceOf[SortOrder]), child, hint)
 
     case CollectMetrics(name, metrics, child, dataframeId) =>
       val cleanedMetrics = metrics.map(trimNonTopLevelAliases)
@@ -4022,12 +3643,11 @@ object CleanupAliases extends Rule[LogicalPlan] with AliasHelper {
 
 /**
  * Ignore event time watermark in batch query, which is only supported in Structured Streaming.
- * TODO: add this rule into analyzer rule list.
  */
 object EliminateEventTimeWatermark extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
     _.containsPattern(EVENT_TIME_WATERMARK)) {
-    case EventTimeWatermark(_, _, child) if child.resolved && !child.isStreaming => child
+    case EventTimeWatermark(_, _, _, child) if child.resolved && !child.isStreaming => child
     case UpdateEventTimeWatermarkColumn(_, _, child) if child.resolved && !child.isStreaming =>
       child
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala
new file mode 100644
index 0000000000000..7be18d0e44e08
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiGetDateFieldOperationsTypeCoercion.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, GetDateField}
+import org.apache.spark.sql.types.{AnyTimestampTypeExpression, DateType}
+
+/**
+ * ANSI type coercion helper that matches against [[GetDateField]] expressions in order to type
+ * coerce children to [[DateType]], if necessary.
+ */
+object AnsiGetDateFieldOperationsTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    case g: GetDateField if AnyTimestampTypeExpression.unapply(g.child) =>
+      g.withNewChildren(Seq(Cast(g.child, DateType)))
+
+    case other => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala
new file mode 100644
index 0000000000000..8345a4b9637e2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiStringPromotionTypeCoercion.scala
@@ -0,0 +1,117 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{
+  Abs,
+  BinaryOperator,
+  Cast,
+  DateAdd,
+  DateSub,
+  Expression,
+  Literal,
+  SubtractDates,
+  SubtractTimestamps,
+  TimeAdd,
+  UnaryMinus,
+  UnaryPositive
+}
+import org.apache.spark.sql.types.{
+  AnsiIntervalType,
+  AtomicType,
+  DataType,
+  DateType,
+  DoubleType,
+  FractionalType,
+  IntegerType,
+  IntegralType,
+  LongType,
+  NullType,
+  StringType,
+  StringTypeExpression,
+  TimestampType
+}
+
+/**
+ * ANSI type coercion helper that matches against expressions in order to type coerce children to
+ * a wider type when one of the children is a string.
+ */
+object AnsiStringPromotionTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    case b @ BinaryOperator(left, right)
+        if findWiderTypeForString(left.dataType, right.dataType).isDefined =>
+      val promoteType = findWiderTypeForString(left.dataType, right.dataType).get
+      b.withNewChildren(Seq(castExpr(left, promoteType), castExpr(right, promoteType)))
+
+    case Abs(e @ StringTypeExpression(), failOnError) => Abs(Cast(e, DoubleType), failOnError)
+    case m @ UnaryMinus(e @ StringTypeExpression(), _) =>
+      m.withNewChildren(Seq(Cast(e, DoubleType)))
+    case UnaryPositive(e @ StringTypeExpression()) => UnaryPositive(Cast(e, DoubleType))
+
+    case d @ DateAdd(left @ StringTypeExpression(), _) =>
+      d.copy(startDate = Cast(d.startDate, DateType))
+    case d @ DateAdd(_, right @ StringTypeExpression()) =>
+      d.copy(days = Cast(right, IntegerType))
+    case d @ DateSub(left @ StringTypeExpression(), _) =>
+      d.copy(startDate = Cast(d.startDate, DateType))
+    case d @ DateSub(_, right @ StringTypeExpression()) =>
+      d.copy(days = Cast(right, IntegerType))
+
+    case s @ SubtractDates(left @ StringTypeExpression(), _, _) =>
+      s.copy(left = Cast(s.left, DateType))
+    case s @ SubtractDates(_, right @ StringTypeExpression(), _) =>
+      s.copy(right = Cast(s.right, DateType))
+    case t @ TimeAdd(left @ StringTypeExpression(), _, _) =>
+      t.copy(start = Cast(t.start, TimestampType))
+    case t @ SubtractTimestamps(left @ StringTypeExpression(), _, _, _) =>
+      t.copy(left = Cast(t.left, t.right.dataType))
+    case t @ SubtractTimestamps(_, right @ StringTypeExpression(), _, _) =>
+      t.copy(right = Cast(right, t.left.dataType))
+
+    case other => other
+  }
+
+  /** Promotes StringType to other data types. */
+  @scala.annotation.tailrec
+  private[catalyst] def findWiderTypeForString(dt1: DataType, dt2: DataType): Option[DataType] = {
+    (dt1, dt2) match {
+      case (_: StringType, _: IntegralType) => Some(LongType)
+      case (_: StringType, _: FractionalType) => Some(DoubleType)
+      case (st: StringType, NullType) => Some(st)
+      // If a binary operation contains interval type and string, we can't decide which
+      // interval type the string should be promoted as. There are many possible interval
+      // types, such as year interval, month interval, day interval, hour interval, etc.
+      case (_: StringType, _: AnsiIntervalType) => None
+      // [SPARK-50060] If a binary operation contains two collated string types with different
+      // collation IDs, we can't decide which collation ID the result should have.
+      case (st1: StringType, st2: StringType) if st1.collationId != st2.collationId => None
+      case (_: StringType, a: AtomicType) => Some(a)
+      case (other, st: StringType) if !other.isInstanceOf[StringType] =>
+        findWiderTypeForString(st, other)
+      case _ => None
+    }
+  }
+
+  private def castExpr(expr: Expression, targetType: DataType): Expression = {
+    expr.dataType match {
+      case NullType => Literal.create(null, targetType)
+      case l if l != targetType => Cast(expr, targetType)
+      case _ => expr
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 9989ca79ed270..3ba17c8b8e1a3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -77,6 +77,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   override def typeCoercionRules: List[Rule[LogicalPlan]] =
     UnpivotCoercion ::
     WidenSetOperationTypes ::
+    ProcedureArgumentCoercion ::
     new AnsiCombinedTypeCoercionRule(
       CollationTypeCasts ::
       InConversion ::
@@ -132,28 +133,10 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   override def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType] = {
     findTightestCommonType(t1, t2)
       .orElse(findWiderTypeForDecimal(t1, t2))
-      .orElse(findWiderTypeForString(t1, t2))
+      .orElse(AnsiStringPromotionTypeCoercion.findWiderTypeForString(t1, t2))
       .orElse(findTypeForComplex(t1, t2, findWiderTypeForTwo))
   }
 
-  /** Promotes StringType to other data types. */
-  @scala.annotation.tailrec
-  private def findWiderTypeForString(dt1: DataType, dt2: DataType): Option[DataType] = {
-    (dt1, dt2) match {
-      case (_: StringType, _: IntegralType) => Some(LongType)
-      case (_: StringType, _: FractionalType) => Some(DoubleType)
-      case (st: StringType, NullType) => Some(st)
-      // If a binary operation contains interval type and string, we can't decide which
-      // interval type the string should be promoted as. There are many possible interval
-      // types, such as year interval, month interval, day interval, hour interval, etc.
-      case (_: StringType, _: AnsiIntervalType) => None
-      case (_: StringType, a: AtomicType) => Some(a)
-      case (other, st: StringType) if !other.isInstanceOf[StringType] =>
-        findWiderTypeForString(st, other)
-      case _ => None
-    }
-  }
-
   override def findWiderCommonType(types: Seq[DataType]): Option[DataType] = {
     types.foldLeft[Option[DataType]](Some(NullType))((r, c) =>
       r match {
@@ -198,7 +181,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
         Some(a.defaultConcreteType)
 
       case (ArrayType(fromType, _), AbstractArrayType(toType)) =>
-        Some(implicitCast(fromType, toType).map(ArrayType(_, true)).orNull)
+        implicitCast(fromType, toType).map(ArrayType(_, true))
 
       // When the target type is `TypeCollection`, there is another branch to find the
       // "closet convertible data type" below.
@@ -222,47 +205,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
   override def canCast(from: DataType, to: DataType): Boolean = Cast.canAnsiCast(from, to)
 
   object PromoteStrings extends TypeCoercionRule {
-    private def castExpr(expr: Expression, targetType: DataType): Expression = {
-      expr.dataType match {
-        case NullType => Literal.create(null, targetType)
-        case l if l != targetType => Cast(expr, targetType)
-        case _ => expr
-      }
-    }
 
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-
-      case b @ BinaryOperator(left, right)
-        if findWiderTypeForString(left.dataType, right.dataType).isDefined =>
-        val promoteType = findWiderTypeForString(left.dataType, right.dataType).get
-        b.withNewChildren(Seq(castExpr(left, promoteType), castExpr(right, promoteType)))
-
-      case Abs(e @ StringTypeExpression(), failOnError) => Abs(Cast(e, DoubleType), failOnError)
-      case m @ UnaryMinus(e @ StringTypeExpression(), _) =>
-        m.withNewChildren(Seq(Cast(e, DoubleType)))
-      case UnaryPositive(e @ StringTypeExpression()) => UnaryPositive(Cast(e, DoubleType))
-
-      case d @ DateAdd(left @ StringTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateAdd(_, right @ StringTypeExpression()) =>
-        d.copy(days = Cast(right, IntegerType))
-      case d @ DateSub(left @ StringTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(_, right @ StringTypeExpression()) =>
-        d.copy(days = Cast(right, IntegerType))
-
-      case s @ SubtractDates(left @ StringTypeExpression(), _, _) =>
-        s.copy(left = Cast(s.left, DateType))
-      case s @ SubtractDates(_, right @ StringTypeExpression(), _) =>
-        s.copy(right = Cast(s.right, DateType))
-      case t @ TimeAdd(left @ StringTypeExpression(), _, _) =>
-        t.copy(start = Cast(t.start, TimestampType))
-      case t @ SubtractTimestamps(left @ StringTypeExpression(), _, _, _) =>
-        t.copy(left = Cast(t.left, t.right.dataType))
-      case t @ SubtractTimestamps(_, right @ StringTypeExpression(), _, _) =>
-        t.copy(right = Cast(right, t.left.dataType))
+      case withChildrenResolved => AnsiStringPromotionTypeCoercion(withChildrenResolved)
     }
   }
 
@@ -278,9 +225,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case g if !g.childrenResolved => g
-
-      case g: GetDateField if AnyTimestampTypeExpression.unapply(g.child) =>
-        g.withNewChildren(Seq(Cast(g.child, DateType)))
+      case withChildrenResolved => AnsiGetDateFieldOperationsTypeCoercion(withChildrenResolved)
     }
   }
 
@@ -288,21 +233,7 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     override val transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-
-      case d @ DateAdd(AnyTimestampTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(AnyTimestampTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-
-      case s @ SubtractTimestamps(DateTypeExpression(), AnyTimestampTypeExpression(), _, _) =>
-        s.copy(left = Cast(s.left, s.right.dataType))
-      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), DateTypeExpression(), _, _) =>
-        s.copy(right = Cast(s.right, s.left.dataType))
-      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), AnyTimestampTypeExpression(), _, _)
-        if s.left.dataType != s.right.dataType =>
-        val newLeft = castIfNotSameType(s.left, TimestampNTZType)
-        val newRight = castIfNotSameType(s.right, TimestampNTZType)
-        s.copy(left = newLeft, right = newRight)
+      case withChildrenResolved => AnsiDateTimeOperationsTypeCoercion(withChildrenResolved)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyCharTypePaddingHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyCharTypePaddingHelper.scala
new file mode 100644
index 0000000000000..54f9abe0b9f16
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ApplyCharTypePaddingHelper.scala
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  Attribute,
+  BinaryComparison,
+  Expression,
+  In,
+  Literal,
+  NamedExpression,
+  OuterReference,
+  StringRPad
+}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, IN}
+import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.types.{CharType, Metadata, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Helper object used by the [[ApplyCharTypePadding]] rule. This object is under catalyst
+ * package in order to make the methods accessible to single-pass [[Resolver]].
+ */
+object ApplyCharTypePaddingHelper {
+
+  object AttrOrOuterRef {
+    def unapply(e: Expression): Option[Attribute] = e match {
+      case a: Attribute => Some(a)
+      case OuterReference(a: Attribute) => Some(a)
+      case _ => None
+    }
+  }
+
+  private[sql] def readSidePadding(
+      relation: LogicalPlan,
+      cleanedRelation: () => LogicalPlan): (LogicalPlan, Seq[(Attribute, Attribute)]) = {
+    val projectList = relation.output.map { attr =>
+      CharVarcharUtils.addPaddingForScan(attr) match {
+        case ne: NamedExpression => ne
+        case other => Alias(other, attr.name)(explicitMetadata = Some(attr.metadata))
+      }
+    }
+    if (projectList == relation.output) {
+      relation -> Nil
+    } else {
+      val newPlan = Project(projectList, cleanedRelation())
+      newPlan -> relation.output.zip(newPlan.output)
+    }
+  }
+
+  private[sql] def paddingForStringComparison(
+      plan: LogicalPlan,
+      padCharCol: Boolean): LogicalPlan = {
+    plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
+      case operator =>
+        operator.transformExpressionsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
+          case e if !e.childrenResolved => e
+          case withChildrenResolved =>
+            singleNodePaddingForStringComparison(withChildrenResolved, padCharCol)
+        }
+    }
+  }
+
+  private[sql] def singleNodePaddingForStringComparison(
+      expression: Expression,
+      padCharCol: Boolean): Expression =
+    expression match {
+      // String literal is treated as char type when it's compared to a char type column.
+      // We should pad the shorter one to the longer length.
+      case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
+        padAttrLitCmp(e, attr.metadata, padCharCol, lit)
+          .map { newChildren =>
+            b.withNewChildren(newChildren)
+          }
+          .getOrElse(b)
+
+      case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
+        padAttrLitCmp(e, attr.metadata, padCharCol, lit)
+          .map { newChildren =>
+            b.withNewChildren(newChildren.reverse)
+          }
+          .getOrElse(b)
+
+      case i @ In(e @ AttrOrOuterRef(attr), list)
+          if attr.dataType == StringType && list.forall(_.foldable) =>
+        CharVarcharUtils
+          .getRawType(attr.metadata)
+          .flatMap {
+            case CharType(length) =>
+              val (nulls, literalChars) =
+                list.map(_.eval().asInstanceOf[UTF8String]).partition(_ == null)
+              val literalCharLengths = literalChars.map(_.numChars())
+              val targetLen = (length +: literalCharLengths).max
+              Some(
+                i.copy(
+                  value = addPadding(e, length, targetLen, alwaysPad = padCharCol),
+                  list = list.zip(literalCharLengths).map {
+                      case (lit, charLength) =>
+                        addPadding(lit, charLength, targetLen, alwaysPad = false)
+                    } ++ nulls.map(Literal.create(_, StringType))
+                )
+              )
+            case _ => None
+          }
+          .getOrElse(i)
+
+      // For char type column or inner field comparison, pad the shorter one to the longer length.
+      case b @ BinaryComparison(e1 @ AttrOrOuterRef(left), e2 @ AttrOrOuterRef(right))
+          // For the same attribute, they must be the same length and no padding is needed.
+          if !left.semanticEquals(right) =>
+        val outerRefs = (e1, e2) match {
+          case (_: OuterReference, _: OuterReference) => Seq(left, right)
+          case (_: OuterReference, _) => Seq(left)
+          case (_, _: OuterReference) => Seq(right)
+          case _ => Nil
+        }
+        val newChildren =
+          CharVarcharUtils.addPaddingInStringComparison(Seq(left, right), padCharCol)
+        if (outerRefs.nonEmpty) {
+          b.withNewChildren(newChildren.map(_.transform {
+            case a: Attribute if outerRefs.exists(_.semanticEquals(a)) => OuterReference(a)
+          }))
+        } else {
+          b.withNewChildren(newChildren)
+        }
+
+      case i @ In(e @ AttrOrOuterRef(attr), list) if list.forall(_.isInstanceOf[Attribute]) =>
+        val newChildren = CharVarcharUtils.addPaddingInStringComparison(
+          attr +: list.map(_.asInstanceOf[Attribute]),
+          padCharCol
+        )
+        if (e.isInstanceOf[OuterReference]) {
+          i.copy(value = newChildren.head.transform {
+            case a: Attribute if a.semanticEquals(attr) => OuterReference(a)
+          }, list = newChildren.tail)
+        } else {
+          i.copy(value = newChildren.head, list = newChildren.tail)
+        }
+
+      case other => other
+    }
+
+  private def padAttrLitCmp(
+      expr: Expression,
+      metadata: Metadata,
+      padCharCol: Boolean,
+      lit: Expression): Option[Seq[Expression]] = {
+    if (expr.dataType == StringType) {
+      CharVarcharUtils.getRawType(metadata).flatMap {
+        case CharType(length) =>
+          val str = lit.eval().asInstanceOf[UTF8String]
+          if (str == null) {
+            None
+          } else {
+            val stringLitLen = str.numChars()
+            if (length < stringLitLen) {
+              Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
+            } else if (length > stringLitLen) {
+              val paddedExpr = if (padCharCol) {
+                StringRPad(expr, Literal(length))
+              } else {
+                expr
+              }
+              Some(Seq(paddedExpr, StringRPad(lit, Literal(length))))
+            } else if (padCharCol) {
+              Some(Seq(StringRPad(expr, Literal(length)), lit))
+            } else {
+              None
+            }
+          }
+        case _ => None
+      }
+    } else {
+      None
+    }
+  }
+
+  private def addPadding(
+      expr: Expression,
+      charLength: Int,
+      targetLength: Int,
+      alwaysPad: Boolean): Expression = {
+    if (targetLength > charLength) {
+      StringRPad(expr, Literal(targetLength))
+    } else if (alwaysPad) {
+      StringRPad(expr, Literal(charLength))
+    } else expr
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BinaryArithmeticWithDatetimeResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BinaryArithmeticWithDatetimeResolver.scala
new file mode 100644
index 0000000000000..36a059b2b0f3c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BinaryArithmeticWithDatetimeResolver.scala
@@ -0,0 +1,146 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{
+  Add,
+  Cast,
+  DateAdd,
+  DateAddInterval,
+  DateAddYMInterval,
+  DateSub,
+  DatetimeSub,
+  Divide,
+  DivideDTInterval,
+  DivideInterval,
+  DivideYMInterval,
+  EvalMode,
+  Expression,
+  ExtractANSIIntervalDays,
+  Multiply,
+  MultiplyDTInterval,
+  MultiplyInterval,
+  MultiplyYMInterval,
+  Subtract,
+  SubtractDates,
+  SubtractTimestamps,
+  TimeAdd,
+  TimestampAddYMInterval,
+  UnaryMinus
+}
+import org.apache.spark.sql.types.{
+  AnsiIntervalType,
+  AnyTimestampTypeExpression,
+  CalendarIntervalType,
+  DateType,
+  DayTimeIntervalType,
+  NullType,
+  StringType,
+  TimestampNTZType,
+  TimestampType,
+  YearMonthIntervalType
+}
+import org.apache.spark.sql.types.DayTimeIntervalType.DAY
+
+object BinaryArithmeticWithDatetimeResolver {
+  def resolve(expr: Expression): Expression = expr match {
+    case a @ Add(l, r, mode) =>
+      (l.dataType, r.dataType) match {
+        case (DateType, DayTimeIntervalType(DAY, DAY)) => DateAdd(l, ExtractANSIIntervalDays(r))
+        case (DateType, _: DayTimeIntervalType) => TimeAdd(Cast(l, TimestampType), r)
+        case (DayTimeIntervalType(DAY, DAY), DateType) => DateAdd(r, ExtractANSIIntervalDays(l))
+        case (_: DayTimeIntervalType, DateType) => TimeAdd(Cast(r, TimestampType), l)
+        case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(l, r)
+        case (_: YearMonthIntervalType, DateType) => DateAddYMInterval(r, l)
+        case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
+          TimestampAddYMInterval(l, r)
+        case (_: YearMonthIntervalType, TimestampType | TimestampNTZType) =>
+          TimestampAddYMInterval(r, l)
+        case (CalendarIntervalType, CalendarIntervalType) |
+             (_: DayTimeIntervalType, _: DayTimeIntervalType) =>
+          a
+        case (_: NullType, _: AnsiIntervalType) =>
+          a.copy(left = Cast(a.left, a.right.dataType))
+        case (_: AnsiIntervalType, _: NullType) =>
+          a.copy(right = Cast(a.right, a.left.dataType))
+        case (DateType, CalendarIntervalType) =>
+          DateAddInterval(l, r, ansiEnabled = mode == EvalMode.ANSI)
+        case (_, CalendarIntervalType | _: DayTimeIntervalType) => Cast(TimeAdd(l, r), l.dataType)
+        case (CalendarIntervalType, DateType) =>
+          DateAddInterval(r, l, ansiEnabled = mode == EvalMode.ANSI)
+        case (CalendarIntervalType | _: DayTimeIntervalType, _) => Cast(TimeAdd(r, l), r.dataType)
+        case (DateType, dt) if dt != StringType => DateAdd(l, r)
+        case (dt, DateType) if dt != StringType => DateAdd(r, l)
+        case _ => a
+      }
+    case s @ Subtract(l, r, mode) =>
+      (l.dataType, r.dataType) match {
+        case (DateType, DayTimeIntervalType(DAY, DAY)) =>
+          DateAdd(l, UnaryMinus(ExtractANSIIntervalDays(r), mode == EvalMode.ANSI))
+        case (DateType, _: DayTimeIntervalType) =>
+          DatetimeSub(l, r, TimeAdd(Cast(l, TimestampType), UnaryMinus(r, mode == EvalMode.ANSI)))
+        case (DateType, _: YearMonthIntervalType) =>
+          DatetimeSub(l, r, DateAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
+        case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
+          DatetimeSub(l, r, TimestampAddYMInterval(l, UnaryMinus(r, mode == EvalMode.ANSI)))
+        case (CalendarIntervalType, CalendarIntervalType) |
+             (_: DayTimeIntervalType, _: DayTimeIntervalType) =>
+          s
+        case (_: NullType, _: AnsiIntervalType) =>
+          s.copy(left = Cast(s.left, s.right.dataType))
+        case (_: AnsiIntervalType, _: NullType) =>
+          s.copy(right = Cast(s.right, s.left.dataType))
+        case (DateType, CalendarIntervalType) =>
+          DatetimeSub(
+            l,
+            r,
+            DateAddInterval(
+              l,
+              UnaryMinus(r, mode == EvalMode.ANSI),
+              ansiEnabled = mode == EvalMode.ANSI
+            )
+          )
+        case (_, CalendarIntervalType | _: DayTimeIntervalType) =>
+          Cast(DatetimeSub(l, r, TimeAdd(l, UnaryMinus(r, mode == EvalMode.ANSI))), l.dataType)
+        case _
+          if AnyTimestampTypeExpression.unapply(l) ||
+            AnyTimestampTypeExpression.unapply(r) =>
+          SubtractTimestamps(l, r)
+        case (_, DateType) => SubtractDates(l, r)
+        case (DateType, dt) if dt != StringType => DateSub(l, r)
+        case _ => s
+      }
+    case m @ Multiply(l, r, mode) =>
+      (l.dataType, r.dataType) match {
+        case (CalendarIntervalType, _) => MultiplyInterval(l, r, mode == EvalMode.ANSI)
+        case (_, CalendarIntervalType) => MultiplyInterval(r, l, mode == EvalMode.ANSI)
+        case (_: YearMonthIntervalType, _) => MultiplyYMInterval(l, r)
+        case (_, _: YearMonthIntervalType) => MultiplyYMInterval(r, l)
+        case (_: DayTimeIntervalType, _) => MultiplyDTInterval(l, r)
+        case (_, _: DayTimeIntervalType) => MultiplyDTInterval(r, l)
+        case _ => m
+      }
+    case d @ Divide(l, r, mode) =>
+      (l.dataType, r.dataType) match {
+        case (CalendarIntervalType, _) => DivideInterval(l, r, mode == EvalMode.ANSI)
+        case (_: YearMonthIntervalType, _) => DivideYMInterval(l, r)
+        case (_: DayTimeIntervalType, _) => DivideDTInterval(l, r)
+        case _ => d
+      }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BooleanEqualityTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BooleanEqualityTypeCoercion.scala
new file mode 100644
index 0000000000000..9589195f26f78
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/BooleanEqualityTypeCoercion.scala
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{
+  And,
+  Cast,
+  EqualNullSafe,
+  EqualTo,
+  Expression,
+  IsNotNull,
+  Literal,
+  Not
+}
+import org.apache.spark.sql.types.{
+  BooleanTypeExpression,
+  Decimal,
+  NumericType,
+  NumericTypeExpression
+}
+
+/**
+ * Type coercion helper that matches against [[Equality]] expressions in order to type coerce
+ * children from numeric type to boolean.
+ */
+object BooleanEqualityTypeCoercion {
+  private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, Decimal.ONE)
+  private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, Decimal.ZERO)
+
+  def apply(expression: Expression): Expression = expression match {
+    // Hive treats (true = 1) as true and (false = 0) as true,
+    // all other cases are considered as false.
+
+    // We may simplify the expression if one side is literal numeric values
+    // TODO: Maybe these rules should go into the optimizer.
+    case EqualTo(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
+        if trueValues.contains(value) =>
+      bool
+    case EqualTo(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
+        if falseValues.contains(value) =>
+      Not(bool)
+    case EqualTo(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
+        if trueValues.contains(value) =>
+      bool
+    case EqualTo(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
+        if falseValues.contains(value) =>
+      Not(bool)
+    case EqualNullSafe(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
+        if trueValues.contains(value) =>
+      And(IsNotNull(bool), bool)
+    case EqualNullSafe(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
+        if falseValues.contains(value) =>
+      And(IsNotNull(bool), Not(bool))
+    case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
+        if trueValues.contains(value) =>
+      And(IsNotNull(bool), bool)
+    case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
+        if falseValues.contains(value) =>
+      And(IsNotNull(bool), Not(bool))
+
+    case EqualTo(left @ BooleanTypeExpression(), right @ NumericTypeExpression()) =>
+      EqualTo(Cast(left, right.dataType), right)
+    case EqualTo(left @ NumericTypeExpression(), right @ BooleanTypeExpression()) =>
+      EqualTo(left, Cast(right, left.dataType))
+    case EqualNullSafe(left @ BooleanTypeExpression(), right @ NumericTypeExpression()) =>
+      EqualNullSafe(Cast(left, right.dataType), right)
+    case EqualNullSafe(left @ NumericTypeExpression(), right @ BooleanTypeExpression()) =>
+      EqualNullSafe(left, Cast(right, left.dataType))
+
+    case other => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2bc6785aa40c3..573619af1b5fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.analysis
 
 import scala.collection.mutable
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
@@ -57,6 +57,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
   val DATA_TYPE_MISMATCH_ERROR = TreeNodeTag[Unit]("dataTypeMismatchError")
   val INVALID_FORMAT_ERROR = TreeNodeTag[Unit]("invalidFormatError")
 
+  // Error that is not supposed to throw immediately on triggering, e.g. certain internal errors.
+  // The error will be thrown at the end of the whole check analysis process, if no other error
+  // occurs.
+  val preemptedError = new PreemptedError()
+
   /**
    * Fails the analysis at the point where a specific tree node was parsed using a provided
    * error class and message parameters.
@@ -114,10 +119,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
   private def checkNotContainingLCA(exprs: Seq[Expression], plan: LogicalPlan): Unit = {
     exprs.foreach(_.transformDownWithPruning(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) {
       case lcaRef: LateralColumnAliasReference =>
-        throw SparkException.internalError("Resolved plan should not contain any " +
-          s"LateralColumnAliasReference.\nDebugging information: plan:\n$plan",
-          context = lcaRef.origin.getQueryContext,
-          summary = lcaRef.origin.context.summary)
+        // this should be a low priority internal error to be preempted
+        preemptedError.set(
+          SparkException.internalError(
+            "Resolved plan should not contain any " +
+            s"LateralColumnAliasReference.\nDebugging information: plan:\n$plan",
+            context = lcaRef.origin.getQueryContext,
+            summary = lcaRef.origin.context.summary)
+        )
+        lcaRef
     })
   }
 
@@ -154,6 +164,45 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     }
   }
 
+  private def checkForUnspecifiedWindow(expressions: Seq[Expression]): Unit = {
+    expressions.foreach(_.transformDownWithPruning(
+      _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION)) {
+      case UnresolvedWindowExpression(_, windowSpec) =>
+        throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowSpec.name)
+      }
+    )
+  }
+
+  /**
+   * Checks for errors in a `SELECT` clause, such as a trailing comma or an empty select list.
+   *
+   * @param plan The logical plan of the query.
+   * @param starRemoved Whether a '*' (wildcard) was removed from the select list.
+   * @throws AnalysisException if the select list is empty or ends with a trailing comma.
+   */
+  protected def checkTrailingCommaInSelect(
+      plan: LogicalPlan,
+      starRemoved: Boolean = false): Unit = {
+    val exprList = plan match {
+      case proj: Project if proj.projectList.nonEmpty =>
+        proj.projectList
+      case agg: Aggregate if agg.aggregateExpressions.nonEmpty =>
+        agg.aggregateExpressions
+      case _ =>
+        Seq.empty
+    }
+
+    exprList.lastOption match {
+      case Some(Alias(UnresolvedAttribute(Seq(name)), _)) =>
+        if (name.equalsIgnoreCase("FROM") && plan.exists(_.isInstanceOf[OneRowRelation])) {
+          if (exprList.size > 1  || starRemoved) {
+            throw QueryCompilationErrors.trailingCommaInSelectError(exprList.last.origin)
+          }
+        }
+      case _ =>
+    }
+  }
+
   def checkAnalysis(plan: LogicalPlan): Unit = {
     // We should inline all CTE relations to restore the original plan shape, as the analysis check
     // may need to match certain plan shapes. For dangling CTE relations, they will still be kept
@@ -165,11 +214,15 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       case e: AnalysisException =>
         throw new ExtendedAnalysisException(e, plan)
     }
+    preemptedError.clear()
     try {
       checkAnalysis0(inlinedPlan)
+      preemptedError.getErrorOpt().foreach(throw _) // throw preempted error if any
     } catch {
       case e: AnalysisException =>
         throw new ExtendedAnalysisException(e, inlinedPlan)
+    } finally {
+      preemptedError.clear()
     }
     plan.setAnalyzed()
   }
@@ -187,6 +240,13 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         val tblName = write.table.asInstanceOf[UnresolvedRelation].multipartIdentifier
         write.table.tableNotFound(tblName)
 
+      // We should check for trailing comma errors first, since we would get less obvious
+      // unresolved column errors if we do it bottom up
+      case proj: Project =>
+        checkTrailingCommaInSelect(proj)
+      case agg: Aggregate =>
+        checkTrailingCommaInSelect(agg)
+
       case _ =>
     }
 
@@ -227,6 +287,9 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
           context = u.origin.getQueryContext,
           summary = u.origin.context.summary)
 
+      case u: UnresolvedInlineTable if unresolvedInlineTableContainsScalarSubquery(u) =>
+        throw QueryCompilationErrors.inlineTableContainsScalarSubquery(u)
+
       case command: V2PartitionCommand =>
         command.table match {
           case r @ ResolvedTable(_, _, table, _) => table match {
@@ -296,6 +359,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               throw QueryCompilationErrors.invalidStarUsageError(operator.nodeName, Seq(s))
             }
 
+          // Should be before `e.checkInputDataTypes()` to produce the correct error for unknown
+          // window expressions nested inside other expressions
+          case UnresolvedWindowExpression(_, WindowSpecReference(windowName)) =>
+            throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowName)
+
           case e: Expression if e.checkInputDataTypes().isFailure =>
             e.checkInputDataTypes() match {
               case checkRes: TypeCheckResult.DataTypeMismatch =>
@@ -391,6 +459,17 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
 
           case _ =>
         })
+
+        // Check for unresolved TABLE arguments after the main check above to allow other analysis
+        // errors to apply first, providing better error messages.
+        getAllExpressions(operator).foreach(_.foreachUp {
+          case expr: FunctionTableSubqueryArgumentExpression =>
+            expr.failAnalysis(
+              errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
+              messageParameters = Map("treeNode" -> planToString(plan)))
+          case _ =>
+        })
+
         if (stagedError.isDefined) stagedError.get.apply()
 
         operator match {
@@ -519,7 +598,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
           case up: Unpivot if up.canBeCoercioned && !up.valuesTypeCoercioned =>
             throw QueryCompilationErrors.unpivotValueDataTypeMismatchError(up.values.get)
 
-          case Sort(orders, _, _) =>
+          case Sort(orders, _, _, _) =>
             orders.foreach { order =>
               if (!RowOrdering.isOrderable(order.dataType)) {
                 order.failAnalysis(
@@ -528,7 +607,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               }
             }
 
-          case Window(_, partitionSpec, _, _) =>
+          case Window(_, partitionSpec, _, _, _) =>
             // Both `partitionSpec` and `orderSpec` must be orderable. We only need an extra check
             // for `partitionSpec` here because `orderSpec` has the type check itself.
             partitionSpec.foreach { p =>
@@ -634,6 +713,14 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
               varName,
               c.defaultExpr.originalSQL)
 
+          case c: Call if c.resolved && c.bound && c.checkArgTypes().isFailure =>
+            c.checkArgTypes() match {
+              case mismatch: TypeCheckResult.DataTypeMismatch =>
+                c.dataTypeMismatch("CALL", mismatch)
+              case _ =>
+                throw SparkException.internalError("Invalid input for procedure")
+            }
+
           case _ => // Falls back to the following checks
         }
 
@@ -668,11 +755,11 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             }
 
           case p @ Project(projectList, _) =>
-            projectList.foreach(_.transformDownWithPruning(
-              _.containsPattern(UNRESOLVED_WINDOW_EXPRESSION)) {
-              case UnresolvedWindowExpression(_, windowSpec) =>
-                throw QueryCompilationErrors.windowSpecificationNotDefinedError(windowSpec.name)
-            })
+            checkForUnspecifiedWindow(projectList)
+
+          case agg@Aggregate(_, aggregateExpressions, _, _) if
+            PlanHelper.specialExpressionsInUnsupportedOperator(agg).isEmpty =>
+            checkForUnspecifiedWindow(aggregateExpressions)
 
           case j: Join if !j.duplicateResolved =>
             val conflictingAttributes =
@@ -902,29 +989,22 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
           messageParameters = Map.empty)
       }
 
-      // SPARK-18504/SPARK-18814: Block cases where GROUP BY columns
-      // are not part of the correlated columns.
-
-      // Note: groupByCols does not contain outer refs - grouping by an outer ref is always ok
-      val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
-      // Collect the inner query attributes that are guaranteed to have a single value for each
-      // outer row. See comment on getCorrelatedEquivalentInnerColumns.
-      val correlatedEquivalentCols = getCorrelatedEquivalentInnerColumns(query)
-      val nonEquivalentGroupByCols = groupByCols -- correlatedEquivalentCols
-
+      val nonEquivalentGroupByExprs = nonEquivalentGroupbyCols(query, agg)
       val invalidCols = if (!SQLConf.get.getConf(
         SQLConf.LEGACY_SCALAR_SUBQUERY_ALLOW_GROUP_BY_NON_EQUALITY_CORRELATED_PREDICATE)) {
-        nonEquivalentGroupByCols
+        nonEquivalentGroupByExprs
       } else {
         // Legacy incorrect logic for checking for invalid group-by columns (see SPARK-48503).
         // Allows any inner attribute that appears in a correlated predicate, even if it is a
         // non-equality predicate or under an operator that can change the values of the attribute
         // (see comments on getCorrelatedEquivalentInnerColumns for examples).
+        // Note: groupByCols does not contain outer refs - grouping by an outer ref is always ok
+        val groupByCols = AttributeSet(agg.groupingExpressions.flatMap(_.references))
         val subqueryColumns = getCorrelatedPredicates(query).flatMap(_.references)
           .filterNot(conditions.flatMap(_.references).contains)
         val correlatedCols = AttributeSet(subqueryColumns)
         val invalidColsLegacy = groupByCols -- correlatedCols
-        if (!nonEquivalentGroupByCols.isEmpty && invalidColsLegacy.isEmpty) {
+        if (!nonEquivalentGroupByExprs.isEmpty && invalidColsLegacy.isEmpty) {
           logWarning(log"Using legacy behavior for " +
             log"${MDC(LogKeys.CONFIG, SQLConf
             .LEGACY_SCALAR_SUBQUERY_ALLOW_GROUP_BY_NON_EQUALITY_CORRELATED_PREDICATE.key)}. " +
@@ -936,10 +1016,16 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       }
 
       if (invalidCols.nonEmpty) {
+        val names = invalidCols.map { el =>
+          el match {
+            case attr: Attribute => attr.name
+            case expr: Expression => expr.toString
+          }
+        }
         expr.failAnalysis(
           errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
             "NON_CORRELATED_COLUMNS_IN_GROUP_BY",
-          messageParameters = Map("value" -> invalidCols.map(_.name).mkString(",")))
+          messageParameters = Map("value" -> names.mkString(",")))
       }
     }
 
@@ -983,7 +1069,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
     checkOuterReference(plan, expr)
 
     expr match {
-      case ScalarSubquery(query, outerAttrs, _, _, _, _) =>
+      case ScalarSubquery(query, outerAttrs, _, _, _, _, _) =>
         // Scalar subquery must return one column as output.
         if (query.output.size != 1) {
           throw QueryCompilationErrors.subqueryReturnMoreThanOneColumn(query.output.size,
@@ -991,15 +1077,17 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         }
 
         if (outerAttrs.nonEmpty) {
-          cleanQueryInScalarSubquery(query) match {
-            case a: Aggregate => checkAggregateInScalarSubquery(outerAttrs, query, a)
-            case Filter(_, a: Aggregate) => checkAggregateInScalarSubquery(outerAttrs, query, a)
-            case p: LogicalPlan if p.maxRows.exists(_ <= 1) => // Ok
-            case other =>
-              expr.failAnalysis(
-                errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
-                  "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
-                messageParameters = Map.empty)
+          if (!SQLConf.get.getConf(SQLConf.SCALAR_SUBQUERY_USE_SINGLE_JOIN)) {
+            cleanQueryInScalarSubquery(query) match {
+              case a: Aggregate => checkAggregateInScalarSubquery(outerAttrs, query, a)
+              case Filter(_, a: Aggregate) => checkAggregateInScalarSubquery(outerAttrs, query, a)
+              case p: LogicalPlan if p.maxRows.exists(_ <= 1) => // Ok
+              case other =>
+                expr.failAnalysis(
+                  errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+                    "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
+                  messageParameters = Map.empty)
+            }
           }
 
           // Only certain operators are allowed to host subquery expression containing
@@ -1058,9 +1146,7 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
         checkCorrelationsInSubquery(expr.plan, isLateral = true)
 
       case _: FunctionTableSubqueryArgumentExpression =>
-        expr.failAnalysis(
-          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
-          messageParameters = Map("treeNode" -> planToString(plan)))
+        // Do nothing here, since we will check for this pattern later.
 
       case inSubqueryOrExistsSubquery =>
         plan match {
@@ -1487,9 +1573,10 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
             case (CharType(l1), CharType(l2)) => l1 == l2
             case (CharType(l1), VarcharType(l2)) => l1 <= l2
             case (VarcharType(l1), VarcharType(l2)) => l1 <= l2
-            case _ => Cast.canUpCast(from, to)
+            case _ =>
+              Cast.canUpCast(from, to) ||
+                DataType.equalsIgnoreCompatibleCollation(field.dataType, newDataType)
           }
-
           if (!canAlterColumnType(field.dataType, newDataType)) {
             alter.failAnalysis(
               errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
@@ -1511,4 +1598,41 @@ trait CheckAnalysis extends PredicateHelper with LookupCatalog with QueryErrorsB
       case _ =>
     }
   }
+
+  private def unresolvedInlineTableContainsScalarSubquery(
+      unresolvedInlineTable: UnresolvedInlineTable) = {
+    unresolvedInlineTable.rows.exists { row =>
+      row.exists { expression =>
+        expression.exists(_.isInstanceOf[ScalarSubquery])
+      }
+    }
+  }
+}
+
+// a heap of the preempted error that only keeps the top priority element, representing the sole
+// error to be thrown at the end of the whole check analysis process, if no other error occurs.
+class PreemptedError() {
+  case class ErrorWithPriority(error: Exception with SparkThrowable, priority: Int) {}
+
+  private var errorOpt: Option[ErrorWithPriority] = None
+
+  // Set/overwrite the given error as the preempted error, if no other errors are preempted, or it
+  // has a higher priority than the existing one.
+  // If the priority is not provided, it will be calculated based on error class. Currently internal
+  // errors have the lowest priority.
+  def set(error: Exception with SparkThrowable, priority: Option[Int] = None): Unit = {
+    val calculatedPriority = priority.getOrElse {
+      error.getCondition match {
+        case c if c.startsWith("INTERNAL_ERROR") => 1
+        case _ => 2
+      }
+    }
+    if (errorOpt.isEmpty || calculatedPriority > errorOpt.get.priority) {
+      errorOpt = Some(ErrorWithPriority(error, calculatedPriority))
+    }
+  }
+
+  def getErrorOpt(): Option[Exception with SparkThrowable] = errorOpt.map(_.error)
+
+  def clear(): Unit = errorOpt = None
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
index 276062ce211d2..d6d3574e84129 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCasts.scala
@@ -17,163 +17,11 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import javax.annotation.Nullable
-
-import scala.annotation.tailrec
-
-import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StringType}
 
 object CollationTypeCasts extends TypeCoercionRule {
   override val transform: PartialFunction[Expression, Expression] = {
     case e if !e.childrenResolved => e
-
-    case ifExpr: If =>
-      ifExpr.withNewChildren(
-        ifExpr.predicate +: collateToSingleType(Seq(ifExpr.trueValue, ifExpr.falseValue)))
-
-    case caseWhenExpr: CaseWhen if !haveSameType(caseWhenExpr.inputTypesForMerging) =>
-      val outputStringType =
-        getOutputCollation(caseWhenExpr.branches.map(_._2) ++ caseWhenExpr.elseValue)
-        val newBranches = caseWhenExpr.branches.map { case (condition, value) =>
-          (condition, castStringType(value, outputStringType).getOrElse(value))
-        }
-        val newElseValue =
-          caseWhenExpr.elseValue.map(e => castStringType(e, outputStringType).getOrElse(e))
-        CaseWhen(newBranches, newElseValue)
-
-    case stringLocate: StringLocate =>
-      stringLocate.withNewChildren(collateToSingleType(
-        Seq(stringLocate.first, stringLocate.second)) :+ stringLocate.third)
-
-    case substringIndex: SubstringIndex =>
-      substringIndex.withNewChildren(
-        collateToSingleType(
-          Seq(substringIndex.first, substringIndex.second)) :+ substringIndex.third)
-
-    case eltExpr: Elt =>
-      eltExpr.withNewChildren(eltExpr.children.head +: collateToSingleType(eltExpr.children.tail))
-
-    case overlayExpr: Overlay =>
-      overlayExpr.withNewChildren(collateToSingleType(Seq(overlayExpr.input, overlayExpr.replace))
-        ++ Seq(overlayExpr.pos, overlayExpr.len))
-
-    case regExpReplace: RegExpReplace =>
-      val Seq(subject, rep) = collateToSingleType(Seq(regExpReplace.subject, regExpReplace.rep))
-      val newChildren = Seq(subject, regExpReplace.regexp, rep, regExpReplace.pos)
-      regExpReplace.withNewChildren(newChildren)
-
-    case stringPadExpr @ (_: StringRPad | _: StringLPad) =>
-      val Seq(str, len, pad) = stringPadExpr.children
-      val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad))
-      stringPadExpr.withNewChildren(Seq(newStr, len, newPad))
-
-    case raiseError: RaiseError =>
-      val newErrorParams = raiseError.errorParms.dataType match {
-        case MapType(StringType, StringType, _) => raiseError.errorParms
-        case _ => Cast(raiseError.errorParms, MapType(StringType, StringType))
-      }
-      raiseError.withNewChildren(Seq(raiseError.errorClass, newErrorParams))
-
-    case framelessOffsetWindow @ (_: Lag | _: Lead) =>
-      val Seq(input, offset, default) = framelessOffsetWindow.children
-      val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
-      framelessOffsetWindow.withNewChildren(Seq(newInput, offset, newDefault))
-
-    case otherExpr @ (
-      _: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat | _: Greatest | _: Least |
-      _: Coalesce | _: ArrayContains | _: ArrayExcept | _: ConcatWs | _: Mask | _: StringReplace |
-      _: StringTranslate | _: StringTrim | _: StringTrimLeft | _: StringTrimRight |
-      _: ArrayIntersect | _: ArrayPosition | _: ArrayRemove | _: ArrayUnion | _: ArraysOverlap |
-      _: Contains | _: EndsWith | _: EqualNullSafe | _: EqualTo | _: FindInSet | _: GreaterThan |
-      _: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual | _: StartsWith | _: StringInstr |
-      _: ToNumber | _: TryToNumber) =>
-      val newChildren = collateToSingleType(otherExpr.children)
-      otherExpr.withNewChildren(newChildren)
-  }
-  /**
-   * Extracts StringTypes from filtered hasStringType
-   */
-  @tailrec
-  private def extractStringType(dt: DataType): StringType = dt match {
-    case st: StringType => st
-    case ArrayType(et, _) => extractStringType(et)
-  }
-
-  /**
-   * Casts given expression to collated StringType with id equal to collationId only
-   * if expression has StringType in the first place.
-   * @param expr
-   * @param collationId
-   * @return
-   */
-  def castStringType(expr: Expression, st: StringType): Option[Expression] =
-    castStringType(expr.dataType, st).map { dt => Cast(expr, dt)}
-
-  private def castStringType(inType: DataType, castType: StringType): Option[DataType] = {
-    @Nullable val ret: DataType = inType match {
-      case st: StringType if st.collationId != castType.collationId => castType
-      case ArrayType(arrType, nullable) =>
-        castStringType(arrType, castType).map(ArrayType(_, nullable)).orNull
-      case _ => null
-    }
-    Option(ret)
-  }
-
-  /**
-   * Collates input expressions to a single collation.
-   */
-  def collateToSingleType(exprs: Seq[Expression]): Seq[Expression] = {
-    val st = getOutputCollation(exprs)
-
-    exprs.map(e => castStringType(e, st).getOrElse(e))
-  }
-
-  /**
-   * Based on the data types of the input expressions this method determines
-   * a collation type which the output will have. This function accepts Seq of
-   * any expressions, but will only be affected by collated StringTypes or
-   * complex DataTypes with collated StringTypes (e.g. ArrayType)
-   */
-  def getOutputCollation(expr: Seq[Expression]): StringType = {
-    val explicitTypes = expr.filter {
-        case _: Collate => true
-        case _ => false
-      }
-      .map(_.dataType.asInstanceOf[StringType].collationId)
-      .distinct
-
-    explicitTypes.size match {
-      // We have 1 explicit collation
-      case 1 => StringType(explicitTypes.head)
-      // Multiple explicit collations occurred
-      case size if size > 1 =>
-        throw QueryCompilationErrors
-          .explicitCollationMismatchError(
-            explicitTypes.map(t => StringType(t).typeName)
-          )
-      // Only implicit or default collations present
-      case 0 =>
-        val implicitTypes = expr.filter {
-            case Literal(_, _: StringType) => false
-            case cast: Cast if cast.getTagValue(Cast.USER_SPECIFIED_CAST).isEmpty =>
-              cast.child.dataType.isInstanceOf[StringType]
-            case _ => true
-          }
-          .map(_.dataType)
-          .filter(hasStringType)
-          .map(extractStringType(_).collationId)
-          .distinct
-
-        if (implicitTypes.length > 1) {
-          throw QueryCompilationErrors.implicitCollationMismatchError()
-        }
-        else {
-          implicitTypes.headOption.map(StringType(_)).getOrElse(SQLConf.get.defaultStringType)
-        }
-    }
+    case withChildrenResolved => CollationTypeCoercion(withChildrenResolved)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
new file mode 100644
index 0000000000000..cca1d21df3a7e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CollationTypeCoercion.scala
@@ -0,0 +1,373 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import scala.annotation.tailrec
+
+import org.apache.spark.sql.catalyst.analysis.CollationStrength.{Default, Explicit, Implicit}
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.{hasStringType, haveSameType}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.trees.TreeNodeTag
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{ArrayType, DataType, StringType}
+import org.apache.spark.sql.util.SchemaUtils
+
+/**
+ * Type coercion helper that matches against expressions in order to apply collation type coercion.
+ */
+object CollationTypeCoercion {
+
+  private val COLLATION_CONTEXT_TAG = new TreeNodeTag[CollationContext]("collationContext")
+
+  private def hasCollationContextTag(expr: Expression): Boolean = {
+    expr.getTagValue(COLLATION_CONTEXT_TAG).isDefined
+  }
+
+  def apply(expression: Expression): Expression = expression match {
+    case cast: Cast if shouldRemoveCast(cast) =>
+      cast.child
+
+    case ifExpr: If =>
+      ifExpr.withNewChildren(
+        ifExpr.predicate +: collateToSingleType(Seq(ifExpr.trueValue, ifExpr.falseValue))
+      )
+
+    case caseWhenExpr: CaseWhen if !haveSameType(caseWhenExpr.inputTypesForMerging) =>
+      val outputStringType = findLeastCommonStringType(
+        caseWhenExpr.branches.map(_._2) ++ caseWhenExpr.elseValue)
+      outputStringType match {
+        case Some(st) =>
+          val newBranches = caseWhenExpr.branches.map { case (condition, value) =>
+            (condition, castStringType(value, st))
+          }
+          val newElseValue =
+            caseWhenExpr.elseValue.map(e => castStringType(e, st))
+          CaseWhen(newBranches, newElseValue)
+
+        case _ =>
+          caseWhenExpr
+      }
+
+    case stringLocate: StringLocate =>
+      stringLocate.withNewChildren(
+        collateToSingleType(Seq(stringLocate.first, stringLocate.second)) :+ stringLocate.third
+      )
+
+    case substringIndex: SubstringIndex =>
+      substringIndex.withNewChildren(
+        collateToSingleType(Seq(substringIndex.first, substringIndex.second)) :+
+        substringIndex.third
+      )
+
+    case eltExpr: Elt =>
+      eltExpr.withNewChildren(eltExpr.children.head +: collateToSingleType(eltExpr.children.tail))
+
+    case overlayExpr: Overlay =>
+      overlayExpr.withNewChildren(
+        collateToSingleType(Seq(overlayExpr.input, overlayExpr.replace))
+        ++ Seq(overlayExpr.pos, overlayExpr.len)
+      )
+
+    case regExpReplace: RegExpReplace =>
+      val Seq(subject, rep) = collateToSingleType(Seq(regExpReplace.subject, regExpReplace.rep))
+      val newChildren = Seq(subject, regExpReplace.regexp, rep, regExpReplace.pos)
+      regExpReplace.withNewChildren(newChildren)
+
+    case stringPadExpr @ (_: StringRPad | _: StringLPad) =>
+      val Seq(str, len, pad) = stringPadExpr.children
+      val Seq(newStr, newPad) = collateToSingleType(Seq(str, pad))
+      stringPadExpr.withNewChildren(Seq(newStr, len, newPad))
+
+    case framelessOffsetWindow @ (_: Lag | _: Lead) =>
+      val Seq(input, offset, default) = framelessOffsetWindow.children
+      val Seq(newInput, newDefault) = collateToSingleType(Seq(input, default))
+      framelessOffsetWindow.withNewChildren(Seq(newInput, offset, newDefault))
+
+    case mapCreate: CreateMap if mapCreate.children.size % 2 == 0 =>
+      // We only take in mapCreate if it has even number of children, as otherwise it should fail
+      // with wrong number of arguments
+      val newKeys = collateToSingleType(mapCreate.keys)
+      val newValues = collateToSingleType(mapCreate.values)
+      mapCreate.withNewChildren(newKeys.zip(newValues).flatMap(pair => Seq(pair._1, pair._2)))
+
+    case namedStruct: CreateNamedStruct if namedStruct.children.size % 2 == 0 =>
+      val newNames = collateToSingleType(namedStruct.nameExprs)
+      val newValues = collateToSingleType(namedStruct.valExprs)
+      val interleaved = newNames.zip(newValues).flatMap(pair => Seq(pair._1, pair._2))
+      namedStruct.withNewChildren(interleaved)
+
+    case splitPart: SplitPart =>
+      val Seq(str, delimiter, partNum) = splitPart.children
+      val Seq(newStr, newDelimiter) = collateToSingleType(Seq(str, delimiter))
+      splitPart.withNewChildren(Seq(newStr, newDelimiter, partNum))
+
+    case stringSplitSQL: StringSplitSQL =>
+      val Seq(str, delimiter) = stringSplitSQL.children
+      val Seq(newStr, newDelimiter) = collateToSingleType(Seq(str, delimiter))
+      stringSplitSQL.withNewChildren(Seq(newStr, newDelimiter))
+
+    case levenshtein: Levenshtein =>
+      val Seq(left, right, threshold @ _*) = levenshtein.children
+      val Seq(newLeft, newRight) = collateToSingleType(Seq(left, right))
+      levenshtein.withNewChildren(Seq(newLeft, newRight) ++ threshold)
+
+    case getMap @ GetMapValue(child, key) if getMap.keyType != key.dataType =>
+      key match {
+        case Literal(_, _: StringType) =>
+          GetMapValue(child, Cast(key, getMap.keyType))
+        case _ =>
+          getMap
+      }
+
+    case otherExpr @ (_: In | _: InSubquery | _: CreateArray | _: ArrayJoin | _: Concat |
+        _: Greatest | _: Least | _: Coalesce | _: ArrayContains | _: ArrayExcept | _: ConcatWs |
+        _: Mask | _: StringReplace | _: StringTranslate | _: StringTrim | _: StringTrimLeft |
+        _: StringTrimRight | _: ArrayAppend | _: ArrayIntersect | _: ArrayPosition |
+        _: ArrayRemove | _: ArrayUnion | _: ArraysOverlap | _: Contains | _: EndsWith |
+        _: EqualNullSafe | _: EqualTo | _: FindInSet | _: GreaterThan | _: GreaterThanOrEqual |
+        _: LessThan | _: LessThanOrEqual | _: StartsWith | _: StringInstr | _: ToNumber |
+        _: TryToNumber | _: StringToMap) =>
+      val newChildren = collateToSingleType(otherExpr.children)
+      otherExpr.withNewChildren(newChildren)
+
+    case other => other
+  }
+
+  /**
+   * If childType is collated and target is UTF8_BINARY, the collation of the output
+   * should be that of the childType.
+   */
+  private def shouldRemoveCast(cast: Cast): Boolean = {
+    val isUserDefined = cast.getTagValue(Cast.USER_SPECIFIED_CAST).isDefined
+    val isChildTypeCollatedString = cast.child.dataType match {
+      case st: StringType => !st.isUTF8BinaryCollation
+      case _ => false
+    }
+    val targetType = cast.dataType
+
+    isUserDefined && isChildTypeCollatedString && targetType == StringType
+  }
+
+  /**
+   * Extracts StringTypes from filtered hasStringType
+   */
+  @tailrec
+  private def extractStringType(dt: DataType): Option[StringType] = dt match {
+    case st: StringType => Some(st)
+    case ArrayType(et, _) => extractStringType(et)
+    case _ => None
+  }
+
+  /**
+   * Casts given expression to collated StringType with id equal to collationId only
+   * if expression has StringType in the first place.
+   */
+  def castStringType(expr: Expression, st: StringType): Expression = {
+    castStringType(expr.dataType, st)
+      .map(dt => Cast(expr, dt))
+      .getOrElse(expr)
+  }
+
+  private def castStringType(inType: DataType, castType: StringType): Option[DataType] = {
+    inType match {
+      case st: StringType if st.collationId != castType.collationId =>
+        Some(castType)
+      case ArrayType(arrType, nullable) =>
+        castStringType(arrType, castType).map(ArrayType(_, nullable))
+      case _ => None
+    }
+  }
+
+  /**
+   * Collates input expressions to a single collation.
+   */
+  def collateToSingleType(expressions: Seq[Expression]): Seq[Expression] = {
+    val lctOpt = findLeastCommonStringType(expressions)
+
+    lctOpt match {
+      case Some(lct) =>
+        expressions.map(e => castStringType(e, lct))
+      case _ =>
+        expressions
+    }
+  }
+
+  /**
+   * Tries to find the least common StringType among the given expressions.
+   */
+  private def findLeastCommonStringType(expressions: Seq[Expression]): Option[StringType] = {
+    if (!expressions.exists(e => SchemaUtils.hasNonUTF8BinaryCollation(e.dataType))) {
+      // if there are no collated types we don't need to do anything
+      return None
+    } else if (ResolveDefaultStringTypes.needsResolution(expressions)) {
+      // if any of the strings types are still not resolved
+      // we need to wait for them to be resolved first
+      return None
+    }
+
+    val collationContextWinner = expressions.foldLeft(findCollationContext(expressions.head)) {
+      case (Some(left), right) =>
+        findCollationContext(right).flatMap { ctx =>
+          collationPrecedenceWinner(left, ctx)
+        }
+      case (None, _) => return None
+    }
+
+    collationContextWinner.flatMap { cc =>
+      extractStringType(cc.dataType)
+    }
+  }
+
+  /**
+   * Tries to find the collation context for the given expression.
+   * If found, it will also set the [[COLLATION_CONTEXT_TAG]] on the expression,
+   * so that the context can be reused later.
+   */
+  private def findCollationContext(expr: Expression): Option[CollationContext] = {
+    val contextOpt = expr match {
+      case _ if hasCollationContextTag(expr) =>
+        Some(expr.getTagValue(COLLATION_CONTEXT_TAG).get)
+
+      // if `expr` doesn't have a string in its dataType then it doesn't
+      // have the collation context either
+      case _ if !expr.dataType.existsRecursively(_.isInstanceOf[StringType]) =>
+        None
+
+      case collate: Collate =>
+        Some(CollationContext(collate.dataType, Explicit))
+
+      case _: Alias | _: SubqueryExpression | _: AttributeReference | _: VariableReference =>
+        Some(CollationContext(expr.dataType, Implicit))
+
+      case _: Literal =>
+        Some(CollationContext(expr.dataType, Default))
+
+      // if it does have a string type but none of its children do
+      // then the collation context strength is default
+      case _ if !expr.children.exists(_.dataType.existsRecursively(_.isInstanceOf[StringType])) =>
+        Some(CollationContext(expr.dataType, Default))
+
+      case _ =>
+        val contextWinnerOpt = getContextRelevantChildren(expr)
+          .flatMap(findCollationContext)
+          .foldLeft(Option.empty[CollationContext]) {
+            case (Some(left), right) =>
+              collationPrecedenceWinner(left, right)
+            case (None, right) =>
+              Some(right)
+          }
+
+        contextWinnerOpt.map { context =>
+          if (hasStringType(expr.dataType)) {
+            CollationContext(expr.dataType, context.strength)
+          } else {
+            context
+          }
+        }
+    }
+
+    contextOpt.foreach(expr.setTagValue(COLLATION_CONTEXT_TAG, _))
+    contextOpt
+  }
+
+  /**
+   * Returns the children of the given expression that should be used for calculating the
+   * winning collation context.
+   */
+  private def getContextRelevantChildren(expression: Expression): Seq[Expression] = {
+    expression match {
+      // collation context for named struct should be calculated based on its values only
+      case createStruct: CreateNamedStruct =>
+        createStruct.valExprs
+
+      // collation context does not depend on the key for extracting the value
+      case extract: ExtractValue =>
+        Seq(extract.child)
+
+      // we currently don't support collation precedence for maps,
+      // as this would involve calculating them for keys and values separately
+      case _: CreateMap =>
+        Seq.empty
+
+      case _ =>
+        expression.children
+    }
+  }
+
+  /**
+   * Returns the collation context that wins in precedence between left and right.
+   */
+  private def collationPrecedenceWinner(
+      left: CollationContext,
+      right: CollationContext): Option[CollationContext] = {
+
+    val (leftStringType, rightStringType) =
+      (extractStringType(left.dataType), extractStringType(right.dataType)) match {
+        case (Some(l), Some(r)) =>
+          (l, r)
+        case (None, None) =>
+          return None
+        case (Some(_), None) =>
+          return Some(left)
+        case (None, Some(_)) =>
+          return Some(right)
+      }
+
+    (left.strength, right.strength) match {
+      case (Explicit, Explicit) if leftStringType != rightStringType =>
+        throw QueryCompilationErrors.explicitCollationMismatchError(
+          Seq(leftStringType, rightStringType))
+
+      case (Explicit, _) => Some(left)
+      case (_, Explicit) => Some(right)
+
+      case (Implicit, Implicit) if leftStringType != rightStringType =>
+        throw QueryCompilationErrors.implicitCollationMismatchError(
+          Seq(leftStringType, rightStringType))
+
+      case (Implicit, _) => Some(left)
+      case (_, Implicit) => Some(right)
+
+      case (Default, Default) if leftStringType != rightStringType =>
+        throw QueryCompilationErrors.implicitCollationMismatchError(
+          Seq(leftStringType, rightStringType))
+
+      case _ =>
+        Some(left)
+    }
+  }
+}
+
+/**
+ * Represents the strength of collation used for determining precedence in collation resolution.
+ */
+private sealed trait CollationStrength {}
+
+  private object CollationStrength {
+  case object Explicit extends CollationStrength {}
+  case object Implicit extends CollationStrength {}
+  case object Default extends CollationStrength {}
+}
+
+/**
+ * Encapsulates the context for collation, including data type and strength.
+ *
+ * @param dataType The data type associated with this collation context.
+ * @param strength The strength level of the collation, which determines its precedence.
+ */
+private case class CollationContext(dataType: DataType, strength: CollationStrength) {}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
index c10e000a098c9..36fd4d02f8da1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ColumnResolutionHelper.scala
@@ -72,7 +72,7 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
               newProject.copyTagsFrom(p)
               (newExprs, newProject)
 
-            case a @ Aggregate(groupExprs, aggExprs, child) =>
+            case a @ Aggregate(groupExprs, aggExprs, child, _) =>
               if (missingAttrs.forall(attr => groupExprs.exists(_.semanticEquals(attr)))) {
                 // All the missing attributes are grouping expressions, valid case.
                 (newExprs,
@@ -146,7 +146,12 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
 
         case GetColumnByOrdinal(ordinal, _) =>
           val attrCandidates = getAttrCandidates()
-          assert(ordinal >= 0 && ordinal < attrCandidates.length)
+          if (ordinal < 0 || ordinal >= attrCandidates.length) {
+            throw QueryCompilationErrors.ordinalOutOfBoundsError(
+              ordinal,
+              attrCandidates
+            )
+          }
           attrCandidates(ordinal)
 
         case GetViewColumnByNameAndOrdinal(
@@ -234,7 +239,8 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
         None
     }
 
-    e.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE, TEMP_RESOLVED_COLUMN)) {
+    e.transformWithPruning(
+      _.containsAnyPattern(UNRESOLVED_ATTRIBUTE, TEMP_RESOLVED_COLUMN)) {
       case u: UnresolvedAttribute =>
         resolve(u.nameParts).getOrElse(u)
       // Re-resolves `TempResolvedColumn` as outer references if it has tried to be resolved with
@@ -585,7 +591,12 @@ trait ColumnResolutionHelper extends Logging with DataTypeErrorsBase {
       }
       (resolved.map(r => (r, currentDepth)), true)
     } else {
-      resolveDataFrameColumnByPlanId(u, id, isMetadataAccess, p.children, currentDepth + 1)
+      val children = p match {
+        // treat Union node as the leaf node
+        case _: Union => Seq.empty[LogicalPlan]
+        case _ => p.children
+      }
+      resolveDataFrameColumnByPlanId(u, id, isMetadataAccess, children, currentDepth + 1)
     }
 
     // In self join case like:
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
index 6524ff9b2c57a..6005759ffe10b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecision.scala
@@ -18,11 +18,6 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.Literal._
-import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
-
 
 // scalastyle:off
 /**
@@ -53,181 +48,9 @@ import org.apache.spark.sql.types._
  */
 // scalastyle:on
 object DecimalPrecision extends TypeCoercionRule {
-  import scala.math.max
-
-  private def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
-
-  // Returns the wider decimal type that's wider than both of them
-  def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = {
-    widerDecimalType(d1.precision, d1.scale, d2.precision, d2.scale)
-  }
-  // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
-  def widerDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
-    val scale = max(s1, s2)
-    val range = max(p1 - s1, p2 - s2)
-    bounded(scale + range, scale)
-  }
-
-  def bounded(precision: Int, scale: Int): DecimalType = {
-    if (conf.getConf(SQLConf.LEGACY_RETAIN_FRACTION_DIGITS_FIRST)) {
-      DecimalType.bounded(precision, scale)
-    } else {
-      DecimalType.boundedPreferIntegralDigits(precision, scale)
-    }
-  }
-
   override def transform: PartialFunction[Expression, Expression] = {
-    decimalAndDecimal()
-      .orElse(integralAndDecimalLiteral)
-      .orElse(nondecimalAndDecimal(conf.literalPickMinimumPrecision))
-  }
-
-  /** Decimal precision promotion for  binary comparison. */
-  private def decimalAndDecimal(): PartialFunction[Expression, Expression] = {
     // Skip nodes whose children have not been resolved yet
     case e if !e.childrenResolved => e
-
-    case b @ BinaryComparison(e1 @ DecimalExpression(p1, s1),
-    e2 @ DecimalExpression(p2, s2)) if p1 != p2 || s1 != s2 =>
-      val resultType = widerDecimalType(p1, s1, p2, s2)
-      val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
-      val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
-      b.withNewChildren(Seq(newE1, newE2))
-  }
-
-  /**
-   * Strength reduction for comparing integral expressions with decimal literals.
-   * 1. int_col > decimal_literal => int_col > floor(decimal_literal)
-   * 2. int_col >= decimal_literal => int_col >= ceil(decimal_literal)
-   * 3. int_col < decimal_literal => int_col < ceil(decimal_literal)
-   * 4. int_col <= decimal_literal => int_col <= floor(decimal_literal)
-   * 5. decimal_literal > int_col => ceil(decimal_literal) > int_col
-   * 6. decimal_literal >= int_col => floor(decimal_literal) >= int_col
-   * 7. decimal_literal < int_col => floor(decimal_literal) < int_col
-   * 8. decimal_literal <= int_col => ceil(decimal_literal) <= int_col
-   *
-   * Note that technically this is an "optimization" and should go into the optimizer. However,
-   * by the time the optimizer runs, these comparison expressions would be pretty hard to pattern
-   * match because there are multiple (at least 2) levels of casts involved.
-   *
-   * There are a lot more possible rules we can implement, but we don't do them
-   * because we are not sure how common they are.
-   */
-  private val integralAndDecimalLiteral: PartialFunction[Expression, Expression] = {
-
-    case GreaterThan(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        TrueLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        FalseLiteral
-      } else {
-        GreaterThan(i, Literal(value.floor.toLong))
-      }
-
-    case GreaterThanOrEqual(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        TrueLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        FalseLiteral
-      } else {
-        GreaterThanOrEqual(i, Literal(value.ceil.toLong))
-      }
-
-    case LessThan(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        FalseLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        TrueLiteral
-      } else {
-        LessThan(i, Literal(value.ceil.toLong))
-      }
-
-    case LessThanOrEqual(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        FalseLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        TrueLiteral
-      } else {
-        LessThanOrEqual(i, Literal(value.floor.toLong))
-      }
-
-    case GreaterThan(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        FalseLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        TrueLiteral
-      } else {
-        GreaterThan(Literal(value.ceil.toLong), i)
-      }
-
-    case GreaterThanOrEqual(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        FalseLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        TrueLiteral
-      } else {
-        GreaterThanOrEqual(Literal(value.floor.toLong), i)
-      }
-
-    case LessThan(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        TrueLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        FalseLiteral
-      } else {
-        LessThan(Literal(value.floor.toLong), i)
-      }
-
-    case LessThanOrEqual(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
-      if (DecimalLiteral.smallerThanSmallestLong(value)) {
-        TrueLiteral
-      } else if (DecimalLiteral.largerThanLargestLong(value)) {
-        FalseLiteral
-      } else {
-        LessThanOrEqual(Literal(value.ceil.toLong), i)
-      }
+    case withChildrenResolved => DecimalPrecisionTypeCoercion(withChildrenResolved)
   }
-
-  /**
-   * Type coercion for BinaryOperator in which one side is a non-decimal numeric, and the other
-   * side is a decimal.
-   */
-  private def nondecimalAndDecimal(literalPickMinimumPrecision: Boolean)
-    : PartialFunction[Expression, Expression] = {
-    // Promote integers inside a binary expression with fixed-precision decimals to decimals,
-    // and fixed-precision decimals in an expression with floats / doubles to doubles
-    case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
-      (left, right) match {
-        // Promote literal integers inside a binary expression with fixed-precision decimals to
-        // decimals. The precision and scale are the ones strictly needed by the integer value.
-        // Requiring more precision than necessary may lead to a useless loss of precision.
-        // Consider the following example: multiplying a column which is DECIMAL(38, 18) by 2.
-        // If we use the default precision and scale for the integer type, 2 is considered a
-        // DECIMAL(10, 0). According to the rules, the result would be DECIMAL(38 + 10 + 1, 18),
-        // which is out of range and therefore it will become DECIMAL(38, 7), leading to
-        // potentially loosing 11 digits of the fractional part. Using only the precision needed
-        // by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 18), which would
-        // become DECIMAL(38, 16), safely having a much lower precision loss.
-        case (l: Literal, r) if r.dataType.isInstanceOf[DecimalType] &&
-            l.dataType.isInstanceOf[IntegralType] &&
-            literalPickMinimumPrecision =>
-          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
-        case (l, r: Literal) if l.dataType.isInstanceOf[DecimalType] &&
-            r.dataType.isInstanceOf[IntegralType] &&
-            literalPickMinimumPrecision =>
-          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
-        // Promote integers inside a binary expression with fixed-precision decimals to decimals,
-        // and fixed-precision decimals in an expression with floats / doubles to doubles
-        case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
-          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
-        case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
-          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
-        case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
-          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
-        case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
-          b.withNewChildren(Seq(Cast(l, DoubleType), r))
-        case _ => b
-      }
-  }
-
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionTypeCoercion.scala
new file mode 100644
index 0000000000000..64f4c2b03c928
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DecimalPrecisionTypeCoercion.scala
@@ -0,0 +1,208 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.expressions.{BinaryComparison, BinaryOperator, Cast, DecimalLiteral, Expression, GreaterThan, GreaterThanOrEqual, LessThan, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{DataType, DecimalExpression, DecimalType, DoubleType, FloatType, IntegralType, IntegralTypeExpression}
+
+/**
+ * Type coercion helper that matches against [[BinaryComparison]] and [[BinaryOperator]]
+ * expression in order to type coerce children to common precision.
+ */
+object DecimalPrecisionTypeCoercion extends SQLConfHelper {
+  import scala.math.max
+
+  /**
+   * Strength reduction for comparing integral expressions with decimal literals.
+   * 1. int_col > decimal_literal => int_col > floor(decimal_literal)
+   * 2. int_col >= decimal_literal => int_col >= ceil(decimal_literal)
+   * 3. int_col < decimal_literal => int_col < ceil(decimal_literal)
+   * 4. int_col <= decimal_literal => int_col <= floor(decimal_literal)
+   * 5. decimal_literal > int_col => ceil(decimal_literal) > int_col
+   * 6. decimal_literal >= int_col => floor(decimal_literal) >= int_col
+   * 7. decimal_literal < int_col => floor(decimal_literal) < int_col
+   * 8. decimal_literal <= int_col => ceil(decimal_literal) <= int_col
+   *
+   * Note that technically this is an "optimization" and should go into the optimizer. However,
+   * by the time the optimizer runs, these comparison expressions would be pretty hard to pattern
+   * match because there are multiple (at least 2) levels of casts involved.
+   *
+   * There are a lot more possible rules we can implement, but we don't do them
+   * because we are not sure how common they are.
+   */
+  private val integralAndDecimalLiteral: PartialFunction[Expression, Expression] = {
+
+    case GreaterThan(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        TrueLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        FalseLiteral
+      } else {
+        GreaterThan(i, Literal(value.floor.toLong))
+      }
+
+    case GreaterThanOrEqual(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        TrueLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        FalseLiteral
+      } else {
+        GreaterThanOrEqual(i, Literal(value.ceil.toLong))
+      }
+
+    case LessThan(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        FalseLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        TrueLiteral
+      } else {
+        LessThan(i, Literal(value.ceil.toLong))
+      }
+
+    case LessThanOrEqual(i @ IntegralTypeExpression(), DecimalLiteral(value)) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        FalseLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        TrueLiteral
+      } else {
+        LessThanOrEqual(i, Literal(value.floor.toLong))
+      }
+
+    case GreaterThan(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        FalseLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        TrueLiteral
+      } else {
+        GreaterThan(Literal(value.ceil.toLong), i)
+      }
+
+    case GreaterThanOrEqual(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        FalseLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        TrueLiteral
+      } else {
+        GreaterThanOrEqual(Literal(value.floor.toLong), i)
+      }
+
+    case LessThan(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        TrueLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        FalseLiteral
+      } else {
+        LessThan(Literal(value.floor.toLong), i)
+      }
+
+    case LessThanOrEqual(DecimalLiteral(value), i @ IntegralTypeExpression()) =>
+      if (DecimalLiteral.smallerThanSmallestLong(value)) {
+        TrueLiteral
+      } else if (DecimalLiteral.largerThanLargestLong(value)) {
+        FalseLiteral
+      } else {
+        LessThanOrEqual(Literal(value.ceil.toLong), i)
+      }
+  }
+
+  def apply(expression: Expression): Expression = {
+    decimalAndDecimal()
+      .orElse(integralAndDecimalLiteral)
+      .orElse(nondecimalAndDecimal(conf.literalPickMinimumPrecision))
+      .lift(expression).getOrElse(expression)
+  }
+
+  private def isFloat(t: DataType): Boolean = t == FloatType || t == DoubleType
+
+  /** Decimal precision promotion for  binary comparison. */
+  private def decimalAndDecimal(): PartialFunction[Expression, Expression] = {
+    case b @ BinaryComparison(e1 @ DecimalExpression(p1, s1), e2 @ DecimalExpression(p2, s2))
+        if p1 != p2 || s1 != s2 =>
+      val resultType = widerDecimalType(p1, s1, p2, s2)
+      val newE1 = if (e1.dataType == resultType) e1 else Cast(e1, resultType)
+      val newE2 = if (e2.dataType == resultType) e2 else Cast(e2, resultType)
+      b.withNewChildren(Seq(newE1, newE2))
+  }
+
+  /**
+   * Type coercion for BinaryOperator in which one side is a non-decimal numeric, and the other
+   * side is a decimal.
+   */
+  private def nondecimalAndDecimal(
+      literalPickMinimumPrecision: Boolean): PartialFunction[Expression, Expression] = {
+    // Promote integers inside a binary expression with fixed-precision decimals to decimals,
+    // and fixed-precision decimals in an expression with floats / doubles to doubles
+    case b @ BinaryOperator(left, right) if left.dataType != right.dataType =>
+      (left, right) match {
+        // Promote literal integers inside a binary expression with fixed-precision decimals to
+        // decimals. The precision and scale are the ones strictly needed by the integer value.
+        // Requiring more precision than necessary may lead to a useless loss of precision.
+        // Consider the following example: multiplying a column which is DECIMAL(38, 18) by 2.
+        // If we use the default precision and scale for the integer type, 2 is considered a
+        // DECIMAL(10, 0). According to the rules, the result would be DECIMAL(38 + 10 + 1, 18),
+        // which is out of range and therefore it will become DECIMAL(38, 7), leading to
+        // potentially loosing 11 digits of the fractional part. Using only the precision needed
+        // by the Literal, instead, the result would be DECIMAL(38 + 1 + 1, 18), which would
+        // become DECIMAL(38, 16), safely having a much lower precision loss.
+        case (l: Literal, r)
+            if r.dataType.isInstanceOf[DecimalType] &&
+            l.dataType.isInstanceOf[IntegralType] &&
+            literalPickMinimumPrecision =>
+          b.withNewChildren(Seq(Cast(l, DataTypeUtils.fromLiteral(l)), r))
+        case (l, r: Literal)
+            if l.dataType.isInstanceOf[DecimalType] &&
+            r.dataType.isInstanceOf[IntegralType] &&
+            literalPickMinimumPrecision =>
+          b.withNewChildren(Seq(l, Cast(r, DataTypeUtils.fromLiteral(r))))
+        // Promote integers inside a binary expression with fixed-precision decimals to decimals,
+        // and fixed-precision decimals in an expression with floats / doubles to doubles
+        case (l @ IntegralTypeExpression(), r @ DecimalExpression(_, _)) =>
+          b.withNewChildren(Seq(Cast(l, DecimalType.forType(l.dataType)), r))
+        case (l @ DecimalExpression(_, _), r @ IntegralTypeExpression()) =>
+          b.withNewChildren(Seq(l, Cast(r, DecimalType.forType(r.dataType))))
+        case (l, r @ DecimalExpression(_, _)) if isFloat(l.dataType) =>
+          b.withNewChildren(Seq(l, Cast(r, DoubleType)))
+        case (l @ DecimalExpression(_, _), r) if isFloat(r.dataType) =>
+          b.withNewChildren(Seq(Cast(l, DoubleType), r))
+        case _ => b
+      }
+  }
+
+  // Returns the wider decimal type that's wider than both of them
+  def widerDecimalType(d1: DecimalType, d2: DecimalType): DecimalType = {
+    widerDecimalType(d1.precision, d1.scale, d2.precision, d2.scale)
+  }
+  // max(s1, s2) + max(p1-s1, p2-s2), max(s1, s2)
+  def widerDecimalType(p1: Int, s1: Int, p2: Int, s2: Int): DecimalType = {
+    val scale = max(s1, s2)
+    val range = max(p1 - s1, p2 - s2)
+    bounded(scale + range, scale)
+  }
+
+  def bounded(precision: Int, scale: Int): DecimalType = {
+    if (conf.getConf(SQLConf.LEGACY_RETAIN_FRACTION_DIGITS_FIRST)) {
+      DecimalType.bounded(precision, scale)
+    } else {
+      DecimalType.boundedPreferIntegralDigits(precision, scale)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
index 0fa11b9c45038..c1535343d7686 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DeduplicateRelations.scala
@@ -24,20 +24,12 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 
-/**
- * A helper class used to detect duplicate relations fast in `DeduplicateRelations`. Two relations
- * are duplicated if:
- *  1. they are the same class.
- *  2. they have the same output attribute IDs.
- *
- * The first condition is necessary because the CTE relation definition node and reference node have
- * the same output attribute IDs but they are not duplicated.
- */
-case class RelationWrapper(cls: Class[_], outputAttrIds: Seq[Long])
-
 object DeduplicateRelations extends Rule[LogicalPlan] {
+
+  type ExprIdMap = mutable.HashMap[Class[_], mutable.HashSet[Long]]
+
   override def apply(plan: LogicalPlan): LogicalPlan = {
-    val newPlan = renewDuplicatedRelations(mutable.HashSet.empty, plan)._1
+    val newPlan = renewDuplicatedRelations(mutable.HashMap.empty, plan)._1
 
     // Wait for `ResolveMissingReferences` to resolve missing attributes first
     def noMissingInput(p: LogicalPlan) = !p.exists(_.missingInput.nonEmpty)
@@ -86,10 +78,10 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
   }
 
   private def existDuplicatedExprId(
-      existingRelations: mutable.HashSet[RelationWrapper],
-      plan: RelationWrapper): Boolean = {
-    existingRelations.filter(_.cls == plan.cls)
-      .exists(_.outputAttrIds.intersect(plan.outputAttrIds).nonEmpty)
+      existingRelations: ExprIdMap,
+      planClass: Class[_], exprIds: Seq[Long]): Boolean = {
+    val attrSet = existingRelations.getOrElse(planClass, mutable.HashSet.empty)
+    exprIds.exists(attrSet.contains)
   }
 
   /**
@@ -100,7 +92,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
    *          whether the plan is changed or not)
    */
   private def renewDuplicatedRelations(
-      existingRelations: mutable.HashSet[RelationWrapper],
+      existingRelations: ExprIdMap,
       plan: LogicalPlan): (LogicalPlan, Boolean) = plan match {
     case p: LogicalPlan if p.isStreaming => (plan, false)
 
@@ -203,7 +195,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
   }
 
   private def deduplicate(
-      existingRelations: mutable.HashSet[RelationWrapper],
+      existingRelations: ExprIdMap,
       plan: LogicalPlan): (LogicalPlan, Boolean) = {
     var planChanged = false
     val newPlan = if (plan.children.nonEmpty) {
@@ -287,20 +279,21 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
   }
 
   private def deduplicateAndRenew[T <: LogicalPlan](
-      existingRelations: mutable.HashSet[RelationWrapper], plan: T,
+      existingRelations: ExprIdMap, plan: T,
       getExprIds: T => Seq[Long],
       copyNewPlan: T => T): (LogicalPlan, Boolean) = {
     var (newPlan, planChanged) = deduplicate(existingRelations, plan)
     if (newPlan.resolved) {
       val exprIds = getExprIds(newPlan.asInstanceOf[T])
       if (exprIds.nonEmpty) {
-        val planWrapper = RelationWrapper(newPlan.getClass, exprIds)
-        if (existDuplicatedExprId(existingRelations, planWrapper)) {
+        if (existDuplicatedExprId(existingRelations, newPlan.getClass, exprIds)) {
           newPlan = copyNewPlan(newPlan.asInstanceOf[T])
           newPlan.copyTagsFrom(plan)
           (newPlan, true)
         } else {
-          existingRelations.add(planWrapper)
+          val attrSet = existingRelations.getOrElseUpdate(newPlan.getClass, mutable.HashSet.empty)
+          exprIds.foreach(attrSet.add)
+          existingRelations.put(newPlan.getClass, attrSet)
           (newPlan, planChanged)
         }
       } else {
@@ -368,14 +361,14 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
         if findAliases(projectList).size == projectList.size =>
         Nil
 
-      case oldVersion @ Aggregate(_, aggregateExpressions, _)
+      case oldVersion @ Aggregate(_, aggregateExpressions, _, _)
           if findAliases(aggregateExpressions).intersect(conflictingAttributes).nonEmpty =>
         val newVersion = oldVersion.copy(aggregateExpressions = newAliases(aggregateExpressions))
         newVersion.copyTagsFrom(oldVersion)
         Seq((oldVersion, newVersion))
 
       // We don't search the child plan recursively for the same reason as the above Project.
-      case _ @ Aggregate(_, aggregateExpressions, _)
+      case _ @ Aggregate(_, aggregateExpressions, _, _)
         if findAliases(aggregateExpressions).size == aggregateExpressions.size =>
         Nil
 
@@ -430,7 +423,7 @@ object DeduplicateRelations extends Rule[LogicalPlan] {
         newVersion.copyTagsFrom(oldVersion)
         Seq((oldVersion, newVersion))
 
-      case oldVersion @ Window(windowExpressions, _, _, child)
+      case oldVersion @ Window(windowExpressions, _, _, child, _)
           if AttributeSet(windowExpressions.map(_.toAttribute)).intersect(conflictingAttributes)
           .nonEmpty =>
         val newVersion = oldVersion.copy(windowExpressions = newAliases(windowExpressions))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DivisionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DivisionTypeCoercion.scala
new file mode 100644
index 0000000000000..8dedc6a36a82b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/DivisionTypeCoercion.scala
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Divide, Expression}
+import org.apache.spark.sql.types.{DecimalType, DoubleType, NullType, NumericType}
+
+/**
+ * Type coercion helper that matches against [[Divide]] expressions in order to type coerce
+ * children to [[DoubleType]].
+ */
+object DivisionTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    // Decimal and Double remain the same
+    case d: Divide if d.dataType == DoubleType => d
+    case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
+    case d @ Divide(left, right, _) if isNumericOrNull(left) && isNumericOrNull(right) =>
+      d.copy(left = Cast(left, DoubleType), right = Cast(right, DoubleType))
+    case other => other
+  }
+
+  private def isNumericOrNull(ex: Expression): Boolean = {
+    // We need to handle null types in case a query contains null literals.
+    ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala
similarity index 59%
rename from sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
rename to sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala
index f3e0c0aca29ca..68f3f90e193b6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CannotReplaceMissingTableException.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/EliminateLazyExpression.scala
@@ -15,19 +15,20 @@
  * limitations under the License.
  */
 
-
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.util.quoteNameParts
-import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern.LAZY_EXPRESSION
 
-class CannotReplaceMissingTableException(
-    tableIdentifier: Identifier,
-    cause: Option[Throwable] = None)
-  extends AnalysisException(
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
-      messageParameters = Map("relationName"
-        -> quoteNameParts((tableIdentifier.namespace :+ tableIdentifier.name).toImmutableArraySeq)),
-      cause = cause)
+/**
+ * `LazyExpression` is a marker node to trigger lazy analysis in DataFrames. It's useless when
+ * entering the analyzer and this rule removes it.
+ */
+object EliminateLazyExpression extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan.resolveExpressionsUpWithPruning(_.containsPattern(LAZY_EXPRESSION)) {
+      case l: LazyExpression => l.child
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 739fac1f33fdd..5103f8048856a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -367,8 +367,8 @@ object FunctionRegistry {
     expressionGeneratorBuilderOuter("explode_outer", ExplodeExpressionBuilder),
     expression[Greatest]("greatest"),
     expression[If]("if"),
-    expression[Inline]("inline"),
-    expressionGeneratorOuter[Inline]("inline_outer"),
+    expressionBuilder("inline", InlineExpressionBuilder),
+    expressionGeneratorBuilderOuter("inline_outer", InlineExpressionBuilder),
     expression[IsNaN]("isnan"),
     expression[Nvl]("ifnull", setAlias = true),
     expression[IsNull]("isnull"),
@@ -376,15 +376,19 @@ object FunctionRegistry {
     expression[Least]("least"),
     expression[NaNvl]("nanvl"),
     expression[NullIf]("nullif"),
+    expression[NullIfZero]("nullifzero"),
     expression[Nvl]("nvl"),
     expression[Nvl2]("nvl2"),
-    expression[PosExplode]("posexplode"),
-    expressionGeneratorOuter[PosExplode]("posexplode_outer"),
+    expressionBuilder("posexplode", PosExplodeExpressionBuilder),
+    expressionGeneratorBuilderOuter("posexplode_outer", PosExplodeExpressionBuilder),
     expression[Rand]("rand"),
     expression[Rand]("random", true, Some("3.0.0")),
     expression[Randn]("randn"),
+    expression[RandStr]("randstr"),
     expression[Stack]("stack"),
-    expression[CaseWhen]("when"),
+    expression[Uniform]("uniform"),
+    expression[ZeroIfNull]("zeroifnull"),
+    CaseWhen.registryEntry,
 
     // math functions
     expression[Acos]("acos"),
@@ -462,6 +466,7 @@ object FunctionRegistry {
     expression[TryAesDecrypt]("try_aes_decrypt"),
     expression[TryReflect]("try_reflect"),
     expression[TryUrlDecode]("try_url_decode"),
+    expression[TryMakeInterval]("try_make_interval"),
 
     // aggregate functions
     expression[HyperLogLogPlusPlus]("approx_count_distinct"),
@@ -611,6 +616,7 @@ object FunctionRegistry {
     expression[UrlEncode]("url_encode"),
     expression[UrlDecode]("url_decode"),
     expression[ParseUrl]("parse_url"),
+    expression[TryParseUrl]("try_parse_url"),
 
     // datetime functions
     expression[AddMonths]("add_months"),
@@ -660,10 +666,15 @@ object FunctionRegistry {
     expression[WindowTime]("window_time"),
     expression[MakeDate]("make_date"),
     expression[MakeTimestamp]("make_timestamp"),
+    expression[TryMakeTimestamp]("try_make_timestamp"),
     expression[MonthName]("monthname"),
     // We keep the 2 expression builders below to have different function docs.
     expressionBuilder("make_timestamp_ntz", MakeTimestampNTZExpressionBuilder, setAlias = true),
     expressionBuilder("make_timestamp_ltz", MakeTimestampLTZExpressionBuilder, setAlias = true),
+    expressionBuilder(
+      "try_make_timestamp_ntz", TryMakeTimestampNTZExpressionBuilder, setAlias = true),
+    expressionBuilder(
+      "try_make_timestamp_ltz", TryMakeTimestampLTZExpressionBuilder, setAlias = true),
     expression[MakeInterval]("make_interval"),
     expression[MakeDTInterval]("make_dt_interval"),
     expression[MakeYMInterval]("make_ym_interval"),
@@ -837,6 +848,7 @@ object FunctionRegistry {
     expressionBuilder("try_variant_get", TryVariantGetExpressionBuilder),
     expression[SchemaOfVariant]("schema_of_variant"),
     expression[SchemaOfVariantAgg]("schema_of_variant_agg"),
+    expression[ToVariantObject]("to_variant_object"),
 
     // cast
     expression[Cast]("cast"),
@@ -869,7 +881,11 @@ object FunctionRegistry {
 
     // Avro
     expression[FromAvro]("from_avro"),
-    expression[ToAvro]("to_avro")
+    expression[ToAvro]("to_avro"),
+
+    // Protobuf
+    expression[FromProtobuf]("from_protobuf"),
+    expression[ToProtobuf]("to_protobuf")
   )
 
   val builtin: SimpleFunctionRegistry = {
@@ -883,6 +899,50 @@ object FunctionRegistry {
 
   val functionSet: Set[FunctionIdentifier] = builtin.listFunction().toSet
 
+  /** Registry for internal functions used by Connect and the Column API. */
+  private[sql] val internal: SimpleFunctionRegistry = new SimpleFunctionRegistry
+
+  private def registerInternalExpression[T <: Expression : ClassTag](
+      name: String,
+      setAlias: Boolean = false): Unit = {
+    val (info, builder) = FunctionRegistryBase.build[T](name, None)
+    val newBuilder = if (setAlias) {
+      (expressions: Seq[Expression]) => {
+        val expr = builder(expressions)
+        expr.setTagValue(FUNC_ALIAS, name)
+        expr
+      }
+    } else {
+      builder
+    }
+    internal.internalRegisterFunction(FunctionIdentifier(name), info, newBuilder)
+  }
+
+  registerInternalExpression[Product]("product")
+  registerInternalExpression[BloomFilterAggregate]("bloom_filter_agg")
+  registerInternalExpression[CollectTopK]("collect_top_k")
+  registerInternalExpression[TimestampAdd]("timestampadd")
+  registerInternalExpression[TimestampDiff]("timestampdiff")
+  registerInternalExpression[Bucket]("bucket")
+  registerInternalExpression[Years]("years")
+  registerInternalExpression[Months]("months")
+  registerInternalExpression[Days]("days")
+  registerInternalExpression[Hours]("hours")
+  registerInternalExpression[UnwrapUDT]("unwrap_udt")
+  registerInternalExpression[MonotonicallyIncreasingID]("distributed_id", setAlias = true)
+  registerInternalExpression[DistributedSequenceID]("distributed_sequence_id")
+  registerInternalExpression[PandasProduct]("pandas_product")
+  registerInternalExpression[PandasStddev]("pandas_stddev")
+  registerInternalExpression[PandasVariance]("pandas_var")
+  registerInternalExpression[PandasSkewness]("pandas_skew")
+  registerInternalExpression[PandasKurtosis]("pandas_kurt")
+  registerInternalExpression[PandasCovar]("pandas_covar")
+  registerInternalExpression[PandasMode]("pandas_mode")
+  registerInternalExpression[EWM]("ewm")
+  registerInternalExpression[NullIndex]("null_index")
+  registerInternalExpression[CastTimestampNTZToLong]("timestamp_ntz_to_long")
+  registerInternalExpression[ArrayBinarySearch]("array_binary_search")
+
   private def makeExprInfoForVirtualOperator(name: String, usage: String): ExpressionInfo = {
     new ExpressionInfo(
       null,
@@ -1113,15 +1173,16 @@ object TableFunctionRegistry {
     logicalPlan[Range]("range"),
     generatorBuilder("explode", ExplodeGeneratorBuilder),
     generatorBuilder("explode_outer", ExplodeOuterGeneratorBuilder),
-    generator[Inline]("inline"),
-    generator[Inline]("inline_outer", outer = true),
+    generatorBuilder("inline", InlineGeneratorBuilder),
+    generatorBuilder("inline_outer", InlineOuterGeneratorBuilder),
     generator[JsonTuple]("json_tuple"),
-    generator[PosExplode]("posexplode"),
-    generator[PosExplode]("posexplode_outer", outer = true),
+    generatorBuilder("posexplode", PosExplodeGeneratorBuilder),
+    generatorBuilder("posexplode_outer", PosExplodeOuterGeneratorBuilder),
     generator[Stack]("stack"),
+    generator[Collations]("collations"),
     generator[SQLKeywords]("sql_keywords"),
-    generator[VariantExplode]("variant_explode"),
-    generator[VariantExplode]("variant_explode_outer", outer = true)
+    generatorBuilder("variant_explode", VariantExplodeGeneratorBuilder),
+    generatorBuilder("variant_explode_outer", VariantExplodeOuterGeneratorBuilder)
   )
 
   val builtin: SimpleTableFunctionRegistry = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
new file mode 100644
index 0000000000000..5a27a72190325
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionResolution.scala
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import java.util.concurrent.atomic.AtomicBoolean
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.FunctionIdentifier
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate._
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.connector.catalog.{
+  CatalogManager,
+  CatalogV2Util,
+  FunctionCatalog,
+  Identifier,
+  LookupCatalog
+}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.connector.catalog.functions.{
+  AggregateFunction => V2AggregateFunction,
+  ScalarFunction
+}
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
+import org.apache.spark.sql.types._
+
+class FunctionResolution(
+    override val catalogManager: CatalogManager,
+    relationResolution: RelationResolution)
+    extends DataTypeErrorsBase with LookupCatalog {
+  private val v1SessionCatalog = catalogManager.v1SessionCatalog
+
+  private val trimWarningEnabled = new AtomicBoolean(true)
+
+  def resolveFunction(u: UnresolvedFunction): Expression = {
+    withPosition(u) {
+      resolveBuiltinOrTempFunction(u.nameParts, u.arguments, u).getOrElse {
+        val CatalogAndIdentifier(catalog, ident) =
+          relationResolution.expandIdentifier(u.nameParts)
+        if (CatalogV2Util.isSessionCatalog(catalog)) {
+          resolveV1Function(ident.asFunctionIdentifier, u.arguments, u)
+        } else {
+          resolveV2Function(catalog.asFunctionCatalog, ident, u.arguments, u)
+        }
+      }
+    }
+  }
+
+  /**
+   * Check if the arguments of a function are either resolved or a lambda function.
+   */
+  def hasLambdaAndResolvedArguments(expressions: Seq[Expression]): Boolean = {
+    val (lambdas, others) = expressions.partition(_.isInstanceOf[LambdaFunction])
+    lambdas.nonEmpty && others.forall(_.resolved)
+  }
+
+  def lookupBuiltinOrTempFunction(
+      name: Seq[String],
+      u: Option[UnresolvedFunction]): Option[ExpressionInfo] = {
+    if (name.size == 1 && u.exists(_.isInternal)) {
+      FunctionRegistry.internal.lookupFunction(FunctionIdentifier(name.head))
+    } else if (name.size == 1) {
+      v1SessionCatalog.lookupBuiltinOrTempFunction(name.head)
+    } else {
+      None
+    }
+  }
+
+  def lookupBuiltinOrTempTableFunction(name: Seq[String]): Option[ExpressionInfo] = {
+    if (name.length == 1) {
+      v1SessionCatalog.lookupBuiltinOrTempTableFunction(name.head)
+    } else {
+      None
+    }
+  }
+
+  def resolveBuiltinOrTempFunction(
+      name: Seq[String],
+      arguments: Seq[Expression],
+      u: UnresolvedFunction): Option[Expression] = {
+    val expression = if (name.size == 1 && u.isInternal) {
+      Option(FunctionRegistry.internal.lookupFunction(FunctionIdentifier(name.head), arguments))
+    } else if (name.size == 1) {
+      v1SessionCatalog.resolveBuiltinOrTempFunction(name.head, arguments)
+    } else {
+      None
+    }
+    expression.map { func =>
+      validateFunction(func, arguments.length, u)
+    }
+  }
+
+  def resolveBuiltinOrTempTableFunction(
+      name: Seq[String],
+      arguments: Seq[Expression]): Option[LogicalPlan] = {
+    if (name.length == 1) {
+      v1SessionCatalog.resolveBuiltinOrTempTableFunction(name.head, arguments)
+    } else {
+      None
+    }
+  }
+
+  private def resolveV1Function(
+      ident: FunctionIdentifier,
+      arguments: Seq[Expression],
+      u: UnresolvedFunction): Expression = {
+    val func = v1SessionCatalog.resolvePersistentFunction(ident, arguments)
+    validateFunction(func, arguments.length, u)
+  }
+
+  private def validateFunction(
+      func: Expression,
+      numArgs: Int,
+      u: UnresolvedFunction): Expression = {
+    func match {
+      case owg: SupportsOrderingWithinGroup if u.isDistinct =>
+        throw QueryCompilationErrors.distinctInverseDistributionFunctionUnsupportedError(
+          owg.prettyName
+        )
+      case owg: SupportsOrderingWithinGroup
+          if !owg.orderingFilled && u.orderingWithinGroup.isEmpty =>
+        throw QueryCompilationErrors.inverseDistributionFunctionMissingWithinGroupError(
+          owg.prettyName
+        )
+      case owg: SupportsOrderingWithinGroup
+          if owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
+        throw QueryCompilationErrors.wrongNumOrderingsForInverseDistributionFunctionError(
+          owg.prettyName,
+          0,
+          u.orderingWithinGroup.length
+        )
+      case f if !f.isInstanceOf[SupportsOrderingWithinGroup] && u.orderingWithinGroup.nonEmpty =>
+        throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+          func.prettyName,
+          "WITHIN GROUP (ORDER BY ...)"
+        )
+      // AggregateWindowFunctions are AggregateFunctions that can only be evaluated within
+      // the context of a Window clause. They do not need to be wrapped in an
+      // AggregateExpression.
+      case wf: AggregateWindowFunction =>
+        if (u.isDistinct) {
+          throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(wf.prettyName, "DISTINCT")
+        } else if (u.filter.isDefined) {
+          throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+            wf.prettyName,
+            "FILTER clause"
+          )
+        } else if (u.ignoreNulls) {
+          wf match {
+            case nthValue: NthValue =>
+              nthValue.copy(ignoreNulls = u.ignoreNulls)
+            case _ =>
+              throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                wf.prettyName,
+                "IGNORE NULLS"
+              )
+          }
+        } else {
+          wf
+        }
+      case owf: FrameLessOffsetWindowFunction =>
+        if (u.isDistinct) {
+          throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+            owf.prettyName,
+            "DISTINCT"
+          )
+        } else if (u.filter.isDefined) {
+          throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+            owf.prettyName,
+            "FILTER clause"
+          )
+        } else if (u.ignoreNulls) {
+          owf match {
+            case lead: Lead =>
+              lead.copy(ignoreNulls = u.ignoreNulls)
+            case lag: Lag =>
+              lag.copy(ignoreNulls = u.ignoreNulls)
+          }
+        } else {
+          owf
+        }
+      // We get an aggregate function, we need to wrap it in an AggregateExpression.
+      case agg: AggregateFunction =>
+        // Note: PythonUDAF does not support these advanced clauses.
+        if (agg.isInstanceOf[PythonUDAF]) checkUnsupportedAggregateClause(agg, u)
+        // After parse, the inverse distribution functions not set the ordering within group yet.
+        val newAgg = agg match {
+          case owg: SupportsOrderingWithinGroup
+              if !owg.orderingFilled && u.orderingWithinGroup.nonEmpty =>
+            owg.withOrderingWithinGroup(u.orderingWithinGroup)
+          case _ =>
+            agg
+        }
+
+        u.filter match {
+          case Some(filter) if !filter.deterministic =>
+            throw QueryCompilationErrors.nonDeterministicFilterInAggregateError(filterExpr = filter)
+          case Some(filter) if filter.dataType != BooleanType =>
+            throw QueryCompilationErrors.nonBooleanFilterInAggregateError(filterExpr = filter)
+          case Some(filter) if filter.exists(_.isInstanceOf[AggregateExpression]) =>
+            throw QueryCompilationErrors.aggregateInAggregateFilterError(
+              filterExpr = filter,
+              aggExpr = filter.find(_.isInstanceOf[AggregateExpression]).get
+            )
+          case Some(filter) if filter.exists(_.isInstanceOf[WindowExpression]) =>
+            throw QueryCompilationErrors.windowFunctionInAggregateFilterError(
+              filterExpr = filter,
+              windowExpr = filter.find(_.isInstanceOf[WindowExpression]).get
+            )
+          case _ =>
+        }
+        if (u.ignoreNulls) {
+          val aggFunc = newAgg match {
+            case first: First => first.copy(ignoreNulls = u.ignoreNulls)
+            case last: Last => last.copy(ignoreNulls = u.ignoreNulls)
+            case any_value: AnyValue => any_value.copy(ignoreNulls = u.ignoreNulls)
+            case _ =>
+              throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+                newAgg.prettyName,
+                "IGNORE NULLS"
+              )
+          }
+          aggFunc.toAggregateExpression(u.isDistinct, u.filter)
+        } else {
+          newAgg.toAggregateExpression(u.isDistinct, u.filter)
+        }
+      // This function is not an aggregate function, just return the resolved one.
+      case other =>
+        checkUnsupportedAggregateClause(other, u)
+        if (other.isInstanceOf[String2TrimExpression] && numArgs == 2) {
+          if (trimWarningEnabled.get) {
+            log.warn(
+              "Two-parameter TRIM/LTRIM/RTRIM function signatures are deprecated." +
+              " Use SQL syntax `TRIM((BOTH | LEADING | TRAILING)? trimStr FROM str)`" +
+              " instead."
+            )
+            trimWarningEnabled.set(false)
+          }
+        }
+        other
+    }
+  }
+
+  private def checkUnsupportedAggregateClause(func: Expression, u: UnresolvedFunction): Unit = {
+    if (u.isDistinct) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(func.prettyName, "DISTINCT")
+    }
+    if (u.filter.isDefined) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+        func.prettyName,
+        "FILTER clause"
+      )
+    }
+    if (u.ignoreNulls) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+        func.prettyName,
+        "IGNORE NULLS"
+      )
+    }
+  }
+
+  private def resolveV2Function(
+      catalog: FunctionCatalog,
+      ident: Identifier,
+      arguments: Seq[Expression],
+      u: UnresolvedFunction): Expression = {
+    val unbound = catalog.loadFunction(ident)
+    val inputType = StructType(arguments.zipWithIndex.map {
+      case (exp, pos) => StructField(s"_$pos", exp.dataType, exp.nullable)
+    })
+    val bound = try {
+      unbound.bind(inputType)
+    } catch {
+      case unsupported: UnsupportedOperationException =>
+        throw QueryCompilationErrors.functionCannotProcessInputError(
+          unbound,
+          arguments,
+          unsupported
+        )
+    }
+
+    if (bound.inputTypes().length != arguments.length) {
+      throw QueryCompilationErrors.v2FunctionInvalidInputTypeLengthError(bound, arguments)
+    }
+
+    bound match {
+      case scalarFunc: ScalarFunction[_] =>
+        processV2ScalarFunction(scalarFunc, arguments, u)
+      case aggFunc: V2AggregateFunction[_, _] =>
+        processV2AggregateFunction(aggFunc, arguments, u)
+      case _ =>
+        failAnalysis(
+          errorClass = "INVALID_UDF_IMPLEMENTATION",
+          messageParameters = Map("funcName" -> toSQLId(bound.name()))
+        )
+    }
+  }
+
+  private def processV2ScalarFunction(
+      scalarFunc: ScalarFunction[_],
+      arguments: Seq[Expression],
+      u: UnresolvedFunction): Expression = {
+    if (u.isDistinct) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(scalarFunc.name(), "DISTINCT")
+    } else if (u.filter.isDefined) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+        scalarFunc.name(),
+        "FILTER clause"
+      )
+    } else if (u.ignoreNulls) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+        scalarFunc.name(),
+        "IGNORE NULLS"
+      )
+    } else {
+      V2ExpressionUtils.resolveScalarFunction(scalarFunc, arguments)
+    }
+  }
+
+  private def processV2AggregateFunction(
+      aggFunc: V2AggregateFunction[_, _],
+      arguments: Seq[Expression],
+      u: UnresolvedFunction): Expression = {
+    if (u.ignoreNulls) {
+      throw QueryCompilationErrors.functionWithUnsupportedSyntaxError(
+        aggFunc.name(),
+        "IGNORE NULLS"
+      )
+    }
+    val aggregator = V2Aggregator(aggFunc, arguments)
+    aggregator.toAggregateExpression(u.isDistinct, u.filter)
+  }
+
+  private def failAnalysis(errorClass: String, messageParameters: Map[String, String]): Nothing = {
+    throw new AnalysisException(
+      errorClass = errorClass,
+      messageParameters = messageParameters)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/IntegralDivisionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/IntegralDivisionTypeCoercion.scala
new file mode 100644
index 0000000000000..8eaf4a7075724
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/IntegralDivisionTypeCoercion.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, IntegralDivide}
+import org.apache.spark.sql.types.{ByteType, IntegerType, LongType, ShortType}
+
+/**
+ * Type coercion helper that matches against [[IntegralDivide]] expressions in order to type coerce
+ * children to [[LongType]].
+ */
+object IntegralDivisionTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    case d @ IntegralDivide(left, right, _) =>
+      d.copy(left = mayCastToLong(left), right = mayCastToLong(right))
+    case other => other
+  }
+
+  private def mayCastToLong(expr: Expression): Expression = expr.dataType match {
+    case _: ByteType | _: ShortType | _: IntegerType => Cast(expr, LongType)
+    case _ => expr
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
new file mode 100644
index 0000000000000..08be456f090e2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/RelationResolution.scala
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.SQLConfHelper
+import org.apache.spark.sql.catalyst.catalog.{
+  CatalogTableType,
+  TemporaryViewRelation,
+  UnresolvedCatalogRelation
+}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
+import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
+import org.apache.spark.sql.connector.catalog.{
+  CatalogManager,
+  CatalogPlugin,
+  CatalogV2Util,
+  Identifier,
+  LookupCatalog,
+  Table,
+  V1Table,
+  V2TableWithV1Fallback
+}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors}
+import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.util.CaseInsensitiveStringMap
+import org.apache.spark.util.ArrayImplicits._
+
+class RelationResolution(override val catalogManager: CatalogManager)
+    extends DataTypeErrorsBase
+    with Logging
+    with LookupCatalog
+    with SQLConfHelper {
+  val v1SessionCatalog = catalogManager.v1SessionCatalog
+
+  /**
+   * If we are resolving database objects (relations, functions, etc.) inside views, we may need to
+   * expand single or multi-part identifiers with the current catalog and namespace of when the
+   * view was created.
+   */
+  def expandIdentifier(nameParts: Seq[String]): Seq[String] = {
+    if (!isResolvingView || isReferredTempViewName(nameParts)) {
+      return nameParts
+    }
+
+    if (nameParts.length == 1) {
+      AnalysisContext.get.catalogAndNamespace :+ nameParts.head
+    } else if (catalogManager.isCatalogRegistered(nameParts.head)) {
+      nameParts
+    } else {
+      AnalysisContext.get.catalogAndNamespace.head +: nameParts
+    }
+  }
+
+  /**
+   * Lookup temporary view by `identifier`. Returns `None` if the view wasn't found.
+   */
+  def lookupTempView(identifier: Seq[String]): Option[TemporaryViewRelation] = {
+    // We are resolving a view and this name is not a temp view when that view was created. We
+    // return None earlier here.
+    if (isResolvingView && !isReferredTempViewName(identifier)) {
+      return None
+    }
+
+    v1SessionCatalog.getRawLocalOrGlobalTempView(identifier)
+  }
+
+  /**
+   * Resolve relation `u` to v1 relation if it's a v1 table from the session catalog, or to v2
+   * relation. This is for resolving DML commands and SELECT queries.
+   */
+  def resolveRelation(
+      u: UnresolvedRelation,
+      timeTravelSpec: Option[TimeTravelSpec] = None): Option[LogicalPlan] = {
+    val timeTravelSpecFromOptions = TimeTravelSpec.fromOptions(
+      u.options,
+      conf.getConf(SQLConf.TIME_TRAVEL_TIMESTAMP_KEY),
+      conf.getConf(SQLConf.TIME_TRAVEL_VERSION_KEY),
+      conf.sessionLocalTimeZone
+    )
+    if (timeTravelSpec.nonEmpty && timeTravelSpecFromOptions.nonEmpty) {
+      throw new AnalysisException("MULTIPLE_TIME_TRAVEL_SPEC", Map.empty[String, String])
+    }
+    val finalTimeTravelSpec = timeTravelSpec.orElse(timeTravelSpecFromOptions)
+    resolveTempView(
+      u.multipartIdentifier,
+      u.isStreaming,
+      finalTimeTravelSpec.isDefined
+    ).orElse {
+      expandIdentifier(u.multipartIdentifier) match {
+        case CatalogAndIdentifier(catalog, ident) =>
+          val key =
+            (
+              (catalog.name +: ident.namespace :+ ident.name).toImmutableArraySeq,
+              finalTimeTravelSpec
+            )
+          AnalysisContext.get.relationCache
+            .get(key)
+            .map { cache =>
+              val cachedRelation = cache.transform {
+                case multi: MultiInstanceRelation =>
+                  val newRelation = multi.newInstance()
+                  newRelation.copyTagsFrom(multi)
+                  newRelation
+              }
+              u.getTagValue(LogicalPlan.PLAN_ID_TAG)
+                .map { planId =>
+                  val cachedConnectRelation = cachedRelation.clone()
+                  cachedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
+                  cachedConnectRelation
+                }
+                .getOrElse(cachedRelation)
+            }
+            .orElse {
+              val writePrivilegesString =
+                Option(u.options.get(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES))
+              val table =
+                CatalogV2Util.loadTable(catalog, ident, finalTimeTravelSpec, writePrivilegesString)
+              val loaded = createRelation(
+                catalog,
+                ident,
+                table,
+                u.clearWritePrivileges.options,
+                u.isStreaming
+              )
+              loaded.foreach(AnalysisContext.get.relationCache.update(key, _))
+              u.getTagValue(LogicalPlan.PLAN_ID_TAG)
+                .map { planId =>
+                  loaded.map { loadedRelation =>
+                    val loadedConnectRelation = loadedRelation.clone()
+                    loadedConnectRelation.setTagValue(LogicalPlan.PLAN_ID_TAG, planId)
+                    loadedConnectRelation
+                  }
+                }
+                .getOrElse(loaded)
+            }
+        case _ => None
+      }
+    }
+  }
+
+  private def createRelation(
+      catalog: CatalogPlugin,
+      ident: Identifier,
+      table: Option[Table],
+      options: CaseInsensitiveStringMap,
+      isStreaming: Boolean): Option[LogicalPlan] = {
+    table.map {
+      // To utilize this code path to execute V1 commands, e.g. INSERT,
+      // either it must be session catalog, or tracksPartitionsInCatalog
+      // must be false so it does not require use catalog to manage partitions.
+      // Obviously we cannot execute V1Table by V1 code path if the table
+      // is not from session catalog and the table still requires its catalog
+      // to manage partitions.
+      case v1Table: V1Table
+          if CatalogV2Util.isSessionCatalog(catalog)
+          || !v1Table.catalogTable.tracksPartitionsInCatalog =>
+        if (isStreaming) {
+          if (v1Table.v1Table.tableType == CatalogTableType.VIEW) {
+            throw QueryCompilationErrors.permanentViewNotSupportedByStreamingReadingAPIError(
+              ident.quoted
+            )
+          }
+          SubqueryAlias(
+            catalog.name +: ident.asMultipartIdentifier,
+            UnresolvedCatalogRelation(v1Table.v1Table, options, isStreaming = true)
+          )
+        } else {
+          v1SessionCatalog.getRelation(v1Table.v1Table, options)
+        }
+
+      case table =>
+        if (isStreaming) {
+          val v1Fallback = table match {
+            case withFallback: V2TableWithV1Fallback =>
+              Some(UnresolvedCatalogRelation(withFallback.v1Table, isStreaming = true))
+            case _ => None
+          }
+          SubqueryAlias(
+            catalog.name +: ident.asMultipartIdentifier,
+            StreamingRelationV2(
+              None,
+              table.name,
+              table,
+              options,
+              table.columns.toAttributes,
+              Some(catalog),
+              Some(ident),
+              v1Fallback
+            )
+          )
+        } else {
+          SubqueryAlias(
+            catalog.name +: ident.asMultipartIdentifier,
+            DataSourceV2Relation.create(table, Some(catalog), Some(ident), options)
+          )
+        }
+    }
+  }
+
+  private def resolveTempView(
+      identifier: Seq[String],
+      isStreaming: Boolean = false,
+      isTimeTravel: Boolean = false): Option[LogicalPlan] = {
+    lookupTempView(identifier).map { v =>
+      val tempViewPlan = v1SessionCatalog.getTempViewRelation(v)
+      if (isStreaming && !tempViewPlan.isStreaming) {
+        throw QueryCompilationErrors.readNonStreamingTempViewError(identifier.quoted)
+      }
+      if (isTimeTravel) {
+        throw QueryCompilationErrors.timeTravelUnsupportedError(toSQLId(identifier))
+      }
+      tempViewPlan
+    }
+  }
+
+  private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty
+
+  private def isReferredTempViewName(nameParts: Seq[String]): Boolean = {
+    val resolver = conf.resolver
+    AnalysisContext.get.referredTempViewNames.exists { n =>
+      (n.length == nameParts.length) && n.zip(nameParts).forall {
+        case (a, b) => resolver(a, b)
+      }
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
index 2642b4a1c5daa..0f9b93cc2986d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDataFrameDropColumns.scala
@@ -36,7 +36,7 @@ class ResolveDataFrameDropColumns(val catalogManager: CatalogManager)
       //   df.drop(col("non-existing-column"))
       val dropped = d.dropList.map {
         case u: UnresolvedAttribute =>
-          resolveExpressionByPlanChildren(u, d.child)
+          resolveExpressionByPlanChildren(u, d)
         case e => e
       }
       val remaining = d.child.output.filterNot(attr => dropped.exists(_.semanticEquals(attr)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala
new file mode 100644
index 0000000000000..75958ff3e1177
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveDefaultStringTypes.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{AddColumns, AlterColumn, AlterViewAs, ColumnDefinition, CreateView, LogicalPlan, QualifiedColType, ReplaceColumns, V1CreateTablePlan, V2CreateTablePlan}
+import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.types.{DataType, StringType}
+
+/**
+ * Resolves default string types in queries and commands. For queries, the default string type is
+ * determined by the session's default string type. For DDL, the default string type is the
+ * default type of the object (table -> schema -> catalog). However, this is not implemented yet.
+ * So, we will just use UTF8_BINARY for now.
+ */
+object ResolveDefaultStringTypes extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = {
+    val newPlan = apply0(plan)
+    if (plan.ne(newPlan)) {
+      // Due to how tree transformations work and StringType object being equal to
+      // StringType("UTF8_BINARY"), we need to transform the plan twice
+      // to ensure the correct results for occurrences of default string type.
+      val finalPlan = apply0(newPlan)
+      RuleExecutor.forceAdditionalIteration(finalPlan)
+      finalPlan
+    } else {
+      newPlan
+    }
+  }
+
+  private def apply0(plan: LogicalPlan): LogicalPlan = {
+    if (isDDLCommand(plan)) {
+      transformDDL(plan)
+    } else {
+      transformPlan(plan, sessionDefaultStringType)
+    }
+  }
+
+  /**
+   * Returns whether any of the given `plan` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(plan: LogicalPlan): Boolean = {
+    if (!isDDLCommand(plan) && isDefaultSessionCollationUsed) {
+      return false
+    }
+
+    plan.exists(node => needsResolution(node.expressions))
+  }
+
+  /**
+   * Returns whether any of the given `expressions` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(expressions: Seq[Expression]): Boolean = {
+    expressions.exists(needsResolution)
+  }
+
+  /**
+   * Returns whether the given `expression` needs to have its
+   * default string type resolved.
+   */
+  def needsResolution(expression: Expression): Boolean = {
+    expression.exists(e => transformExpression.isDefinedAt(e))
+  }
+
+  private def isDefaultSessionCollationUsed: Boolean = conf.defaultStringType == StringType
+
+  /**
+   * Returns the default string type that should be used in a given DDL command (for now always
+   * UTF8_BINARY).
+   */
+  private def stringTypeForDDLCommand(table: LogicalPlan): StringType =
+    StringType("UTF8_BINARY")
+
+  /** Returns the session default string type */
+  private def sessionDefaultStringType: StringType =
+    StringType(conf.defaultStringType.collationId)
+
+  private def isDDLCommand(plan: LogicalPlan): Boolean = plan exists {
+    case _: AddColumns | _: ReplaceColumns | _: AlterColumn => true
+    case _ => isCreateOrAlterPlan(plan)
+  }
+
+  private def isCreateOrAlterPlan(plan: LogicalPlan): Boolean = plan match {
+    case _: V1CreateTablePlan | _: V2CreateTablePlan | _: CreateView | _: AlterViewAs => true
+    case _ => false
+  }
+
+  private def transformDDL(plan: LogicalPlan): LogicalPlan = {
+    val newType = stringTypeForDDLCommand(plan)
+
+    plan resolveOperators {
+      case p if isCreateOrAlterPlan(p) =>
+        transformPlan(p, newType)
+
+      case addCols: AddColumns =>
+        addCols.copy(columnsToAdd = replaceColumnTypes(addCols.columnsToAdd, newType))
+
+      case replaceCols: ReplaceColumns =>
+        replaceCols.copy(columnsToAdd = replaceColumnTypes(replaceCols.columnsToAdd, newType))
+
+      case alter: AlterColumn
+        if alter.dataType.isDefined && hasDefaultStringType(alter.dataType.get) =>
+        alter.copy(dataType = Some(replaceDefaultStringType(alter.dataType.get, newType)))
+    }
+  }
+
+  /**
+   * Transforms the given plan, by transforming all expressions in its operators to use the given
+   * new type instead of the default string type.
+   */
+  private def transformPlan(plan: LogicalPlan, newType: StringType): LogicalPlan = {
+    plan resolveExpressionsUp { expression =>
+      transformExpression
+        .andThen(_.apply(newType))
+        .applyOrElse(expression, identity[Expression])
+    }
+  }
+
+  /**
+   * Transforms the given expression, by changing all default string types to the given new type.
+   */
+  private def transformExpression: PartialFunction[Expression, StringType => Expression] = {
+    case columnDef: ColumnDefinition if hasDefaultStringType(columnDef.dataType) =>
+      newType => columnDef.copy(dataType = replaceDefaultStringType(columnDef.dataType, newType))
+
+    case cast: Cast if hasDefaultStringType(cast.dataType) =>
+      newType => cast.copy(dataType = replaceDefaultStringType(cast.dataType, newType))
+
+    case Literal(value, dt) if hasDefaultStringType(dt) =>
+      newType => Literal(value, replaceDefaultStringType(dt, newType))
+  }
+
+  private def hasDefaultStringType(dataType: DataType): Boolean =
+    dataType.existsRecursively(isDefaultStringType)
+
+  private def isDefaultStringType(dataType: DataType): Boolean = {
+    dataType match {
+      case st: StringType =>
+        // should only return true for StringType object and not StringType("UTF8_BINARY")
+        st.eq(StringType) || st.isInstanceOf[TemporaryStringType]
+      case _ => false
+    }
+  }
+
+  private def replaceDefaultStringType(dataType: DataType, newType: StringType): DataType = {
+    dataType.transformRecursively {
+      case currentType: StringType if isDefaultStringType(currentType) =>
+        if (currentType == newType) {
+          TemporaryStringType()
+        } else {
+          newType
+        }
+    }
+  }
+
+  private def replaceColumnTypes(
+      colTypes: Seq[QualifiedColType],
+      newType: StringType): Seq[QualifiedColType] = {
+    colTypes.map {
+      case colWithDefault if hasDefaultStringType(colWithDefault.dataType) =>
+        val replaced = replaceDefaultStringType(colWithDefault.dataType, newType)
+        colWithDefault.copy(dataType = replaced)
+
+      case col => col
+    }
+  }
+}
+
+case class TemporaryStringType() extends StringType(1) {
+  override def toString: String = s"TemporaryStringType($collationId)"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
index e0142c445ae82..0e1e71a658c8b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveIdentifierClause.scala
@@ -20,24 +20,19 @@ package org.apache.spark.sql.catalyst.analysis
 import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.{CTERelationRef, LogicalPlan, SubqueryAlias}
-import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern.{UNRESOLVED_IDENTIFIER, UNRESOLVED_IDENTIFIER_WITH_CTE}
 import org.apache.spark.sql.types.StringType
 
 /**
  * Resolves the identifier expressions and builds the original plans/expressions.
  */
-class ResolveIdentifierClause(earlyBatches: Seq[RuleExecutor[LogicalPlan]#Batch])
-  extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
-
-  private val executor = new RuleExecutor[LogicalPlan] {
-    override def batches: Seq[Batch] = earlyBatches.asInstanceOf[Seq[Batch]]
-  }
+object ResolveIdentifierClause extends Rule[LogicalPlan] with AliasHelper with EvalHelper {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUpWithPruning(
     _.containsAnyPattern(UNRESOLVED_IDENTIFIER, UNRESOLVED_IDENTIFIER_WITH_CTE)) {
-    case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved =>
-      executor.execute(p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr)))
+    case p: PlanWithUnresolvedIdentifier if p.identifierExpr.resolved && p.childrenResolved =>
+      p.planBuilder.apply(evalIdentifierExpr(p.identifierExpr), p.children)
     case u @ UnresolvedWithCTERelations(p, cteRelations) =>
       this.apply(p) match {
         case u @ UnresolvedRelation(Seq(table), _, _) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
index 3b9c6799bfaf3..b9e9e49a39647 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTables.scala
@@ -17,113 +17,24 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
-import org.apache.spark.sql.catalyst.optimizer.EvalInlineTables
+import org.apache.spark.sql.catalyst.EvaluateUnresolvedInlineTable
+import org.apache.spark.sql.catalyst.expressions.EvalHelper
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
-import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
-import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLExpr, toSQLId}
-import org.apache.spark.sql.types.{StructField, StructType}
 
 /**
  * An analyzer rule that replaces [[UnresolvedInlineTable]] with [[ResolvedInlineTable]].
  */
-object ResolveInlineTables extends Rule[LogicalPlan]
-  with CastSupport with AliasHelper with EvalHelper {
+object ResolveInlineTables extends Rule[LogicalPlan] with EvalHelper {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan.resolveOperatorsWithPruning(AlwaysProcess.fn, ruleId) {
-      case table: UnresolvedInlineTable if table.expressionsResolved =>
-        validateInputDimension(table)
-        validateInputEvaluable(table)
-        val resolvedTable = findCommonTypesAndCast(table)
-        earlyEvalIfPossible(resolvedTable)
+      case table: UnresolvedInlineTable if canResolveTable(table) =>
+        EvaluateUnresolvedInlineTable.evaluateUnresolvedInlineTable(table)
     }
   }
 
-  /**
-   * Validates the input data dimension:
-   * 1. All rows have the same cardinality.
-   * 2. The number of column aliases defined is consistent with the number of columns in data.
-   *
-   * This is package visible for unit testing.
-   */
-  private[analysis] def validateInputDimension(table: UnresolvedInlineTable): Unit = {
-    if (table.rows.nonEmpty) {
-      val numCols = table.names.size
-      table.rows.zipWithIndex.foreach { case (row, rowIndex) =>
-        if (row.size != numCols) {
-          table.failAnalysis(
-            errorClass = "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
-            messageParameters = Map(
-              "expectedNumCols" -> numCols.toString,
-              "actualNumCols" -> row.size.toString,
-              "rowIndex" -> rowIndex.toString))
-        }
-      }
-    }
-  }
-
-  /**
-   * Validates that all inline table data are valid expressions that can be evaluated
-   * (in this they must be foldable).
-   * Note that nondeterministic expressions are not supported since they are not foldable.
-   * Exception are CURRENT_LIKE expressions, which are replaced by a literal in later stages.
-   * This is package visible for unit testing.
-   */
-  private[analysis] def validateInputEvaluable(table: UnresolvedInlineTable): Unit = {
-    table.rows.foreach { row =>
-      row.foreach { e =>
-        if (e.containsPattern(CURRENT_LIKE)) {
-          // Do nothing.
-        } else if (!e.resolved || !trimAliases(prepareForEval(e)).foldable) {
-          e.failAnalysis(
-            errorClass = "INVALID_INLINE_TABLE.CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE",
-            messageParameters = Map("expr" -> toSQLExpr(e)))
-        }
-      }
-    }
-  }
-
-  /**
-   * This function attempts to coerce inputs into consistent types.
-   *
-   * This is package visible for unit testing.
-   */
-  private[analysis] def findCommonTypesAndCast(table: UnresolvedInlineTable):
-    ResolvedInlineTable = {
-    // For each column, traverse all the values and find a common data type and nullability.
-    val (fields, columns) = table.rows.transpose.zip(table.names).map { case (column, name) =>
-      val inputTypes = column.map(_.dataType)
-      val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse {
-        table.failAnalysis(
-          errorClass = "INVALID_INLINE_TABLE.INCOMPATIBLE_TYPES_IN_INLINE_TABLE",
-          messageParameters = Map("colName" -> toSQLId(name)))
-      }
-      val newColumn = column.map {
-        case expr if DataTypeUtils.sameType(expr.dataType, tpe) =>
-          expr
-        case expr =>
-          cast(expr, tpe)
-      }
-      (StructField(name, tpe, nullable = column.exists(_.nullable)), newColumn)
-    }.unzip
-    assert(fields.size == table.names.size)
-    val attributes = DataTypeUtils.toAttributes(StructType(fields))
-    val castedRows: Seq[Seq[Expression]] = columns.transpose
-
-    ResolvedInlineTable(castedRows, attributes)
-  }
-
-  /**
-   * This function attempts to early evaluate rows in inline table.
-   * If evaluation doesn't rely on non-deterministic expressions (e.g. current_like)
-   * expressions will be evaluated and inlined as [[LocalRelation]]
-   * This is package visible for unit testing.
-   */
-  private[analysis] def earlyEvalIfPossible(table: ResolvedInlineTable): LogicalPlan = {
-      val earlyEvalPossible = table.rows.flatten.forall(!_.containsPattern(CURRENT_LIKE))
-      if (earlyEvalPossible) EvalInlineTables(table) else table
+  private def canResolveTable(table: UnresolvedInlineTable): Boolean = {
+    table.expressionsResolved && !ResolveDefaultStringTypes.needsResolution(table)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
index c249a3506f2d0..da8065eab606d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveLateralColumnAliasReference.scala
@@ -196,7 +196,7 @@ object ResolveLateralColumnAliasReference extends Rule[LogicalPlan] {
         if ruleApplicableOnOperator(aggOriginal, aggOriginal.aggregateExpressions)
           && aggOriginal.aggregateExpressions.exists(
             _.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE)) =>
-        val agg @ Aggregate(groupingExpressions, aggregateExpressions, _) =
+        val agg @ Aggregate(groupingExpressions, aggregateExpressions, _, _) =
           aggOriginal.mapChildren(apply0)
 
         // Check if current Aggregate is eligible to lift up with Project: the aggregate
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
index a6688f2766214..a8680d0a01816 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTimeWindows.scala
@@ -87,85 +87,86 @@ object TimeWindowing extends Rule[LogicalPlan] {
 
         val window = windowExpressions.head
 
+        // time window is provided as time column of window function, replace it with WindowTime
         if (StructType.acceptsType(window.timeColumn.dataType)) {
-          return p.transformExpressions {
+          p.transformExpressions {
             case t: TimeWindow => t.copy(timeColumn = WindowTime(window.timeColumn))
           }
-        }
-
-        val metadata = window.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
-
-        val newMetadata = new MetadataBuilder()
-          .withMetadata(metadata)
-          .putBoolean(TimeWindow.marker, true)
-          .build()
+        } else {
+          val metadata = window.timeColumn match {
+            case a: Attribute => a.metadata
+            case _ => Metadata.empty
+          }
 
-        def getWindow(i: Int, dataType: DataType): Expression = {
-          val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
-          val remainder = (timestamp - window.startTime) % window.slideDuration
-          val lastStart = timestamp - CaseWhen(Seq((LessThan(remainder, 0),
-            remainder + window.slideDuration)), Some(remainder))
-          val windowStart = lastStart - i * window.slideDuration
-          val windowEnd = windowStart + window.windowDuration
+          val newMetadata = new MetadataBuilder()
+            .withMetadata(metadata)
+            .putBoolean(TimeWindow.marker, true)
+            .build()
 
-          // We make sure value fields are nullable since the dataType of TimeWindow defines them
-          // as nullable.
-          CreateNamedStruct(
-            Literal(WINDOW_START) ::
-              PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
-              Literal(WINDOW_END) ::
-              PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
-              Nil)
-        }
+          def getWindow(i: Int, dataType: DataType): Expression = {
+            val timestamp = PreciseTimestampConversion(window.timeColumn, dataType, LongType)
+            val remainder = (timestamp - window.startTime) % window.slideDuration
+            val lastStart = timestamp - CaseWhen(Seq((LessThan(remainder, 0),
+              remainder + window.slideDuration)), Some(remainder))
+            val windowStart = lastStart - i * window.slideDuration
+            val windowEnd = windowStart + window.windowDuration
+
+            // We make sure value fields are nullable since the dataType of TimeWindow defines them
+            // as nullable.
+            CreateNamedStruct(
+              Literal(WINDOW_START) ::
+                PreciseTimestampConversion(windowStart, LongType, dataType).castNullable() ::
+                Literal(WINDOW_END) ::
+                PreciseTimestampConversion(windowEnd, LongType, dataType).castNullable() ::
+                Nil)
+          }
 
-        val windowAttr = AttributeReference(
-          WINDOW_COL_NAME, window.dataType, metadata = newMetadata)()
+          val windowAttr = AttributeReference(
+            WINDOW_COL_NAME, window.dataType, metadata = newMetadata)()
 
-        if (window.windowDuration == window.slideDuration) {
-          val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
-            exprId = windowAttr.exprId, explicitMetadata = Some(newMetadata))
+          if (window.windowDuration == window.slideDuration) {
+            val windowStruct = Alias(getWindow(0, window.timeColumn.dataType), WINDOW_COL_NAME)(
+              exprId = windowAttr.exprId, explicitMetadata = Some(newMetadata))
 
-          val replacedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
+            val replacedPlan = p transformExpressions {
+              case t: TimeWindow => windowAttr
+            }
 
-          // For backwards compatibility we add a filter to filter out nulls
-          val filterExpr = IsNotNull(window.timeColumn)
+            // For backwards compatibility we add a filter to filter out nulls
+            val filterExpr = IsNotNull(window.timeColumn)
 
-          replacedPlan.withNewChildren(
-            Project(windowStruct +: child.output,
-              Filter(filterExpr, child)) :: Nil)
-        } else {
-          val overlappingWindows =
-            math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
-          val windows =
-            Seq.tabulate(overlappingWindows)(i =>
-              getWindow(i, window.timeColumn.dataType))
-
-          val projections = windows.map(_ +: child.output)
-
-          // When the condition windowDuration % slideDuration = 0 is fulfilled,
-          // the estimation of the number of windows becomes exact one,
-          // which means all produced windows are valid.
-          val filterExpr =
-          if (window.windowDuration % window.slideDuration == 0) {
-            IsNotNull(window.timeColumn)
+            replacedPlan.withNewChildren(
+              Project(windowStruct +: child.output,
+                Filter(filterExpr, child)) :: Nil)
           } else {
-            window.timeColumn >= windowAttr.getField(WINDOW_START) &&
-              window.timeColumn < windowAttr.getField(WINDOW_END)
+            val overlappingWindows =
+              math.ceil(window.windowDuration * 1.0 / window.slideDuration).toInt
+            val windows =
+              Seq.tabulate(overlappingWindows)(i =>
+                getWindow(i, window.timeColumn.dataType))
+
+            val projections = windows.map(_ +: child.output)
+
+            // When the condition windowDuration % slideDuration = 0 is fulfilled,
+            // the estimation of the number of windows becomes exact one,
+            // which means all produced windows are valid.
+            val filterExpr =
+            if (window.windowDuration % window.slideDuration == 0) {
+              IsNotNull(window.timeColumn)
+            } else {
+              window.timeColumn >= windowAttr.getField(WINDOW_START) &&
+                window.timeColumn < windowAttr.getField(WINDOW_END)
+            }
+
+            val substitutedPlan = Filter(filterExpr,
+              Expand(projections, windowAttr +: child.output, child))
+
+            val renamedPlan = p transformExpressions {
+              case t: TimeWindow => windowAttr
+            }
+
+            renamedPlan.withNewChildren(substitutedPlan :: Nil)
           }
-
-          val substitutedPlan = Filter(filterExpr,
-            Expand(projections, windowAttr +: child.output, child))
-
-          val renamedPlan = p transformExpressions {
-            case t: TimeWindow => windowAttr
-          }
-
-          renamedPlan.withNewChildren(substitutedPlan :: Nil)
         }
       } else if (numWindowExpr > 1) {
         throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
@@ -210,71 +211,74 @@ object SessionWindowing extends Rule[LogicalPlan] {
         val session = sessionExpressions.head
 
         if (StructType.acceptsType(session.timeColumn.dataType)) {
-          return p transformExpressions {
+          p transformExpressions {
             case t: SessionWindow => t.copy(timeColumn = WindowTime(session.timeColumn))
           }
-        }
+        } else {
+          val metadata = session.timeColumn match {
+            case a: Attribute => a.metadata
+            case _ => Metadata.empty
+          }
 
-        val metadata = session.timeColumn match {
-          case a: Attribute => a.metadata
-          case _ => Metadata.empty
-        }
+          val newMetadata = new MetadataBuilder()
+            .withMetadata(metadata)
+            .putBoolean(SessionWindow.marker, true)
+            .build()
 
-        val newMetadata = new MetadataBuilder()
-          .withMetadata(metadata)
-          .putBoolean(SessionWindow.marker, true)
-          .build()
-
-        val sessionAttr = AttributeReference(
-          SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
-
-        val sessionStart =
-          PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
-        val gapDuration = session.gapDuration match {
-          case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
-            Cast(expr, CalendarIntervalType)
-          case other =>
-            throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
-        }
-        val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
-          session.timeColumn.dataType, LongType)
-
-        // We make sure value fields are nullable since the dataType of SessionWindow defines them
-        // as nullable.
-        val literalSessionStruct = CreateNamedStruct(
-          Literal(SESSION_START) ::
-            PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Literal(SESSION_END) ::
-            PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
-              .castNullable() ::
-            Nil)
-
-        val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
-          exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
+          val sessionAttr = AttributeReference(
+            SESSION_COL_NAME, session.dataType, metadata = newMetadata)()
+
+          val sessionStart =
+            PreciseTimestampConversion(session.timeColumn, session.timeColumn.dataType, LongType)
+          val gapDuration = session.gapDuration match {
+            case expr if expr.dataType == CalendarIntervalType =>
+              expr
+            case expr if Cast.canCast(expr.dataType, CalendarIntervalType) =>
+              Cast(expr, CalendarIntervalType)
+            case other =>
+              throw QueryCompilationErrors.sessionWindowGapDurationDataTypeError(other.dataType)
+          }
+          val sessionEnd = PreciseTimestampConversion(session.timeColumn + gapDuration,
+            session.timeColumn.dataType, LongType)
 
-        val replacedPlan = p transformExpressions {
-          case s: SessionWindow => sessionAttr
-        }
+          // We make sure value fields are nullable since the dataType of SessionWindow defines them
+          // as nullable.
+          val literalSessionStruct = CreateNamedStruct(
+            Literal(SESSION_START) ::
+              PreciseTimestampConversion(sessionStart, LongType, session.timeColumn.dataType)
+                .castNullable() ::
+              Literal(SESSION_END) ::
+              PreciseTimestampConversion(sessionEnd, LongType, session.timeColumn.dataType)
+                .castNullable() ::
+              Nil)
+
+          val sessionStruct = Alias(literalSessionStruct, SESSION_COL_NAME)(
+            exprId = sessionAttr.exprId, explicitMetadata = Some(newMetadata))
 
-        val filterByTimeRange = session.gapDuration match {
-          case Literal(interval: CalendarInterval, CalendarIntervalType) =>
+          val replacedPlan = p transformExpressions {
+            case s: SessionWindow => sessionAttr
+          }
+
+          val filterByTimeRange = if (gapDuration.foldable) {
+            val interval = gapDuration.eval().asInstanceOf[CalendarInterval]
             interval == null || interval.months + interval.days + interval.microseconds <= 0
-          case _ => true
-        }
+          } else {
+            true
+          }
 
-        // As same as tumbling window, we add a filter to filter out nulls.
-        // And we also filter out events with negative or zero or invalid gap duration.
-        val filterExpr = if (filterByTimeRange) {
-          IsNotNull(session.timeColumn) &&
-            (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
-        } else {
-          IsNotNull(session.timeColumn)
-        }
+          // As same as tumbling window, we add a filter to filter out nulls.
+          // And we also filter out events with negative or zero or invalid gap duration.
+          val filterExpr = if (filterByTimeRange) {
+            IsNotNull(session.timeColumn) &&
+              (sessionAttr.getField(SESSION_END) > sessionAttr.getField(SESSION_START))
+          } else {
+            IsNotNull(session.timeColumn)
+          }
 
-        replacedPlan.withNewChildren(
-          Filter(filterExpr,
-            Project(sessionStruct +: child.output, child)) :: Nil)
+          replacedPlan.withNewChildren(
+            Filter(filterExpr,
+              Project(sessionStruct +: child.output, child)) :: Nil)
+        }
       } else if (numWindowExpr > 1) {
         throw QueryCompilationErrors.multiTimeWindowExpressionsNotSupportedError(p)
       } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala
index 31c4f068a83eb..cddc519d0887e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveUpdateEventTimeWatermarkColumn.scala
@@ -36,7 +36,7 @@ object ResolveUpdateEventTimeWatermarkColumn extends Rule[LogicalPlan] {
     _.containsPattern(UPDATE_EVENT_TIME_WATERMARK_COLUMN), ruleId) {
     case u: UpdateEventTimeWatermarkColumn if u.delay.isEmpty && u.childrenResolved =>
       val existingWatermarkDelay = u.child.collect {
-        case EventTimeWatermark(_, delay, _) => delay
+        case EventTimeWatermark(_, _, delay, _) => delay
       }
 
       if (existingWatermarkDelay.isEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StackTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StackTypeCoercion.scala
new file mode 100644
index 0000000000000..1199fe72005ea
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StackTypeCoercion.scala
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Stack}
+import org.apache.spark.sql.types.NullType
+
+/**
+ * Type coercion helper that matches against [[Stack]] expressions in order to type coerce children
+ * that are of [[NullType]] to the expected column type.
+ */
+object StackTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    case s @ Stack(children) if s.hasFoldableNumRows =>
+      Stack(children.zipWithIndex.map {
+        // The first child is the number of rows for stack.
+        case (e, 0) => e
+        case (Literal(null, NullType), index: Int) =>
+          Literal.create(null, s.findDataType(index))
+        case (e, _) => e
+      })
+
+    case other => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringLiteralTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringLiteralTypeCoercion.scala
new file mode 100644
index 0000000000000..d1b63dc90b06c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringLiteralTypeCoercion.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.expressions.{Cast, DateAdd, DateSub, Expression, Literal}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{IntegerType, StringType}
+
+/**
+ * Type coercion helper that matches against [[DateAdd]] and [[DateSub]] expressions in order to
+ * type coerce the second argument to [[IntegerType]], if necessary.
+ */
+object StringLiteralTypeCoercion {
+  def apply(expression: Expression): Expression = expression match {
+    case DateAdd(l, r) if r.dataType.isInstanceOf[StringType] && r.foldable =>
+      val days = try {
+        Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
+      } catch {
+        case e: NumberFormatException =>
+          throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_add", e)
+      }
+      DateAdd(l, Literal(days))
+    case DateSub(l, r) if r.dataType.isInstanceOf[StringType] && r.foldable =>
+      val days = try {
+        Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
+      } catch {
+        case e: NumberFormatException =>
+          throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_sub", e)
+      }
+      DateSub(l, Literal(days))
+    case other => other
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringPromotionTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringPromotionTypeCoercion.scala
new file mode 100644
index 0000000000000..5ca3736d2352b
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/StringPromotionTypeCoercion.scala
@@ -0,0 +1,83 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.PromoteStrings.conf
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.findCommonTypeForBinaryComparison
+import org.apache.spark.sql.catalyst.expressions.{
+  BinaryArithmetic,
+  BinaryComparison,
+  Cast,
+  Equality,
+  Expression,
+  Literal
+}
+import org.apache.spark.sql.types.{
+  AnsiIntervalType,
+  CalendarIntervalType,
+  DataType,
+  DoubleType,
+  NullType,
+  StringTypeExpression,
+  TimestampType,
+  TimestampTypeExpression
+}
+
+/**
+ * Type coercion helper that matches against [[BinaryArithmetic]] and [[BinaryComparison]]
+ * expressions in order to type coerce children to a wider type when one of the children is a
+ * string.
+ */
+object StringPromotionTypeCoercion {
+
+  def apply(expression: Expression): Expression = expression match {
+    case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
+        if !isIntervalType(right.dataType) =>
+      a.withNewChildren(Seq(Cast(left, DoubleType), right))
+    case a @ BinaryArithmetic(left, right @ StringTypeExpression())
+        if !isIntervalType(left.dataType) =>
+      a.withNewChildren(Seq(left, Cast(right, DoubleType)))
+
+    // For equality between string and timestamp we cast the string to a timestamp
+    // so that things like rounding of subsecond precision does not affect the comparison.
+    case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
+      p.withNewChildren(Seq(Cast(left, TimestampType), right))
+    case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
+      p.withNewChildren(Seq(left, Cast(right, TimestampType)))
+
+    case p @ BinaryComparison(left, right)
+        if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
+      val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
+      p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
+
+    case other => other
+  }
+
+  private def castExpr(expr: Expression, targetType: DataType): Expression = {
+    (expr.dataType, targetType) match {
+      case (NullType, dt) => Literal.create(null, targetType)
+      case (l, dt) if (l != dt) => Cast(expr, targetType)
+      case _ => expr
+    }
+  }
+
+  private def isIntervalType(dt: DataType): Boolean = dt match {
+    case _: CalendarIntervalType | _: AnsiIntervalType => true
+    case _ => false
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
index 5b559becbb118..4a6504666d41f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TableOutputResolver.scala
@@ -34,7 +34,7 @@ import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.StoreAssignmentPolicy
-import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, IntegralType, MapType, StructType}
+import org.apache.spark.sql.types.{ArrayType, DataType, DecimalType, IntegralType, MapType, StructType, UserDefinedType}
 
 object TableOutputResolver extends SQLConfHelper with Logging {
 
@@ -413,8 +413,16 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       resolveColumnsByPosition(tableName, Seq(param), Seq(fakeAttr), conf, addError, colPath)
     }
     if (res.length == 1) {
-      val func = LambdaFunction(res.head, Seq(param))
-      Some(Alias(ArrayTransform(nullCheckedInput, func), expected.name)())
+      if (res.head == param) {
+        // If the element type is the same, we can reuse the input array directly.
+        Some(
+          Alias(nullCheckedInput, expected.name)(
+            nonInheritableMetadataKeys =
+              Seq(CharVarcharUtils.CHAR_VARCHAR_TYPE_STRING_METADATA_KEY)))
+      } else {
+        val func = LambdaFunction(res.head, Seq(param))
+        Some(Alias(ArrayTransform(nullCheckedInput, func), expected.name)())
+      }
     } else {
       None
     }
@@ -531,7 +539,8 @@ object TableOutputResolver extends SQLConfHelper with Logging {
       }
     } else {
       val nullCheckedQueryExpr = checkNullability(queryExpr, tableAttr, conf, colPath)
-      val casted = cast(nullCheckedQueryExpr, attrTypeWithoutCharVarchar, conf, colPath.quoted)
+      val udtUnwrapped = unwrapUDT(nullCheckedQueryExpr)
+      val casted = cast(udtUnwrapped, attrTypeWithoutCharVarchar, conf, colPath.quoted)
       val exprWithStrLenCheck = if (conf.charVarcharAsString || !attrTypeHasCharVarchar) {
         casted
       } else {
@@ -550,6 +559,39 @@ object TableOutputResolver extends SQLConfHelper with Logging {
     if (canWriteExpr) outputField else None
   }
 
+  private def unwrapUDT(expr: Expression): Expression = expr.dataType match {
+    case ArrayType(et, containsNull) =>
+      val param = NamedLambdaVariable("element", et, containsNull)
+      val func = LambdaFunction(unwrapUDT(param), Seq(param))
+      ArrayTransform(expr, func)
+
+    case MapType(kt, vt, valueContainsNull) =>
+      val keyParam = NamedLambdaVariable("key", kt, nullable = false)
+      val valueParam = NamedLambdaVariable("value", vt, valueContainsNull)
+      val keyFunc = LambdaFunction(unwrapUDT(keyParam), Seq(keyParam))
+      val valueFunc = LambdaFunction(unwrapUDT(valueParam), Seq(valueParam))
+      val newKeys = ArrayTransform(MapKeys(expr), keyFunc)
+      val newValues = ArrayTransform(MapValues(expr), valueFunc)
+      MapFromArrays(newKeys, newValues)
+
+    case st: StructType =>
+      val newFieldExprs = st.indices.map { i =>
+        unwrapUDT(GetStructField(expr, i))
+      }
+      val struct = CreateNamedStruct(st.zip(newFieldExprs).flatMap {
+        case (field, newExpr) => Seq(Literal(field.name), newExpr)
+      })
+      if (expr.nullable) {
+        If(IsNull(expr), Literal(null, struct.dataType), struct)
+      } else {
+        struct
+      }
+
+    case _: UserDefinedType[_] => UnwrapUDT(expr)
+
+    case _ => expr
+  }
+
   private def cast(
       expr: Expression,
       expectedType: DataType,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 08c5b3531b4c8..c30aa9bf91a1d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -20,806 +20,17 @@ package org.apache.spark.sql.catalyst.analysis
 import javax.annotation.Nullable
 
 import scala.annotation.tailrec
-import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.AlwaysProcess
-import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.{AbstractArrayType, AbstractMapType, AbstractStringType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractArrayType, AbstractMapType, AbstractStringType}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.UpCastRule.numericPrecedence
 
-abstract class TypeCoercionBase {
-  /**
-   * A collection of [[Rule]] that can be used to coerce differing types that participate in
-   * operations into compatible ones.
-   */
-  def typeCoercionRules: List[Rule[LogicalPlan]]
-
-  /**
-   * Find the tightest common type of two types that might be used in a binary expression.
-   * This handles all numeric types except fixed-precision decimals interacting with each other or
-   * with primitive types, because in that case the precision and scale of the result depends on
-   * the operation. Those rules are implemented in [[DecimalPrecision]].
-   */
-  val findTightestCommonType: (DataType, DataType) => Option[DataType]
-
-  /**
-   * Looking for a widened data type of two given data types with some acceptable loss of precision.
-   * E.g. there is no common type for double and decimal because double's range
-   * is larger than decimal, and yet decimal is more precise than double, but in
-   * union we would cast the decimal into double.
-   */
-  def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType]
-
-  /**
-   * Looking for a widened data type of a given sequence of data types with some acceptable loss
-   * of precision.
-   * E.g. there is no common type for double and decimal because double's range
-   * is larger than decimal, and yet decimal is more precise than double, but in
-   * union we would cast the decimal into double.
-   */
-  def findWiderCommonType(types: Seq[DataType]): Option[DataType]
-
-  /**
-   * Given an expected data type, try to cast the expression and return the cast expression.
-   *
-   * If the expression already fits the input type, we simply return the expression itself.
-   * If the expression has an incompatible type that cannot be implicitly cast, return None.
-   */
-  def implicitCast(e: Expression, expectedType: AbstractDataType): Option[Expression]
-
-  /**
-   * Whether casting `from` as `to` is valid.
-   */
-  def canCast(from: DataType, to: DataType): Boolean
-
-  protected def findTypeForComplex(
-      t1: DataType,
-      t2: DataType,
-      findTypeFunc: (DataType, DataType) => Option[DataType]): Option[DataType] = (t1, t2) match {
-    case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) =>
-      findTypeFunc(et1, et2).map { et =>
-        ArrayType(et, containsNull1 || containsNull2 ||
-          Cast.forceNullable(et1, et) || Cast.forceNullable(et2, et))
-      }
-    case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, valueContainsNull2)) =>
-      findTypeFunc(kt1, kt2)
-        .filter { kt => !Cast.forceNullable(kt1, kt) && !Cast.forceNullable(kt2, kt) }
-        .flatMap { kt =>
-          findTypeFunc(vt1, vt2).map { vt =>
-            MapType(kt, vt, valueContainsNull1 || valueContainsNull2 ||
-              Cast.forceNullable(vt1, vt) || Cast.forceNullable(vt2, vt))
-          }
-      }
-    case (StructType(fields1), StructType(fields2)) if fields1.length == fields2.length =>
-      val resolver = SQLConf.get.resolver
-      fields1.zip(fields2).foldLeft(Option(new StructType())) {
-        case (Some(struct), (field1, field2)) if resolver(field1.name, field2.name) =>
-          findTypeFunc(field1.dataType, field2.dataType).map { dt =>
-            struct.add(field1.name, dt, field1.nullable || field2.nullable ||
-              Cast.forceNullable(field1.dataType, dt) || Cast.forceNullable(field2.dataType, dt))
-          }
-        case _ => None
-      }
-    case _ => None
-  }
-
-  /**
-   * Finds a wider type when one or both types are decimals. If the wider decimal type exceeds
-   * system limitation, this rule will truncate the decimal type. If a decimal and other fractional
-   * types are compared, returns a double type.
-   */
-  protected def findWiderTypeForDecimal(dt1: DataType, dt2: DataType): Option[DataType] = {
-    (dt1, dt2) match {
-      case (t1: DecimalType, t2: DecimalType) =>
-        Some(DecimalPrecision.widerDecimalType(t1, t2))
-      case (t: IntegralType, d: DecimalType) =>
-        Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
-      case (d: DecimalType, t: IntegralType) =>
-        Some(DecimalPrecision.widerDecimalType(DecimalType.forType(t), d))
-      case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
-        Some(DoubleType)
-      case _ => None
-    }
-  }
-
-  /**
-   * Similar to [[findWiderTypeForTwo]] that can handle decimal types, but can't promote to
-   * string. If the wider decimal type exceeds system limitation, this rule will truncate
-   * the decimal type before return it.
-   */
-  private[catalyst] def findWiderTypeWithoutStringPromotionForTwo(
-      t1: DataType,
-      t2: DataType): Option[DataType] = {
-    findTightestCommonType(t1, t2)
-      .orElse(findWiderTypeForDecimal(t1, t2))
-      .orElse(findTypeForComplex(t1, t2, findWiderTypeWithoutStringPromotionForTwo))
-  }
-
-  def findWiderTypeWithoutStringPromotion(types: Seq[DataType]): Option[DataType] = {
-    types.foldLeft[Option[DataType]](Some(NullType))((r, c) => r match {
-      case Some(d) => findWiderTypeWithoutStringPromotionForTwo(d, c)
-      case None => None
-    })
-  }
-
-  /**
-   * Check whether the given types are equal ignoring nullable, containsNull and valueContainsNull.
-   */
-  def haveSameType(types: Seq[DataType]): Boolean = {
-    if (types.size <= 1) {
-      true
-    } else {
-      val head = types.head
-      types.tail.forall(e => DataTypeUtils.sameType(e, head))
-    }
-  }
-
-  protected def castIfNotSameType(expr: Expression, dt: DataType): Expression = {
-    if (!DataTypeUtils.sameType(expr.dataType, dt)) {
-      Cast(expr, dt)
-    } else {
-      expr
-    }
-  }
-
-  protected def findWiderDateTimeType(d1: DatetimeType, d2: DatetimeType): DatetimeType =
-    (d1, d2) match {
-      case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
-        TimestampType
-
-      case (_: TimestampType, _: TimestampNTZType) | (_: TimestampNTZType, _: TimestampType) =>
-        TimestampType
-
-      case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
-        TimestampNTZType
-    }
-
-  /**
-   * Type coercion rule that combines multiple type coercion rules and applies them in a single tree
-   * traversal.
-   */
-  class CombinedTypeCoercionRule(rules: Seq[TypeCoercionRule]) extends TypeCoercionRule {
-    override def transform: PartialFunction[Expression, Expression] = {
-      val transforms = rules.map(_.transform)
-      Function.unlift { e: Expression =>
-        val result = transforms.foldLeft(e) {
-          case (current, transform) => transform.applyOrElse(current, identity[Expression])
-        }
-        if (result ne e) {
-          Some(result)
-        } else {
-          None
-        }
-      }
-    }
-  }
-
-  /**
-   * Widens the data types of the [[Unpivot]] values.
-   */
-  object UnpivotCoercion extends Rule[LogicalPlan] {
-    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
-      case up: Unpivot if up.canBeCoercioned && !up.valuesTypeCoercioned =>
-        // get wider data type of inner values at same idx
-        val valueDataTypes = up.values.get.head.zipWithIndex.map {
-          case (_, idx) => findWiderTypeWithoutStringPromotion(up.values.get.map(_ (idx).dataType))
-        }
-
-        // cast inner values to type according to their idx
-        val values = up.values.get.map(values =>
-          values.zipWithIndex.map {
-            case (value, idx) => (value, valueDataTypes(idx))
-          } map {
-            case (value, Some(valueType)) if value.dataType != valueType =>
-              Alias(Cast(value, valueType), value.name)()
-            case (value, _) => value
-          }
-        )
-
-        up.copy(values = Some(values))
-    }
-  }
-
-  /**
-   * Widens the data types of the children of Union/Except/Intersect.
-   * 1. When ANSI mode is off:
-   *   Loosely based on rules from "Hadoop: The Definitive Guide" 2nd edition, by Tom White
-   *
-   *   The implicit conversion rules can be summarized as follows:
-   *     - Any integral numeric type can be implicitly converted to a wider type.
-   *     - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be
-   *       implicitly converted to DOUBLE.
-   *     - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
-   *     - BOOLEAN types cannot be converted to any other type.
-   *     - Any integral numeric type can be implicitly converted to decimal type.
-   *     - two different decimal types will be converted into a wider decimal type for both of them.
-   *     - decimal type will be converted into double if there float or double together with it.
-   *
-   *   All types when UNION-ed with strings will be promoted to
-   *   strings. Other string conversions are handled by PromoteStrings.
-   *
-   *   Widening types might result in loss of precision in the following cases:
-   *   - IntegerType to FloatType
-   *   - LongType to FloatType
-   *   - LongType to DoubleType
-   *   - DecimalType to Double
-   *
-   * 2. When ANSI mode is on:
-   *   The implicit conversion is determined by the closest common data type from the precedent
-   *   lists from left and right child. See the comments of Object `AnsiTypeCoercion` for details.
-   */
-  object WidenSetOperationTypes extends Rule[LogicalPlan] {
-
-    override def apply(plan: LogicalPlan): LogicalPlan = {
-      plan resolveOperatorsUpWithNewOutput {
-        case s @ Except(left, right, isAll) if s.childrenResolved &&
-          left.output.length == right.output.length && !s.resolved =>
-          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
-          if (newChildren.isEmpty) {
-            s -> Nil
-          } else {
-            assert(newChildren.length == 2)
-            val attrMapping = left.output.zip(newChildren.head.output)
-            Except(newChildren.head, newChildren.last, isAll) -> attrMapping
-          }
-
-        case s @ Intersect(left, right, isAll) if s.childrenResolved &&
-          left.output.length == right.output.length && !s.resolved =>
-          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
-          if (newChildren.isEmpty) {
-            s -> Nil
-          } else {
-            assert(newChildren.length == 2)
-            val attrMapping = left.output.zip(newChildren.head.output)
-            Intersect(newChildren.head, newChildren.last, isAll) -> attrMapping
-          }
-
-        case s: Union if s.childrenResolved && !s.byName &&
-          s.children.forall(_.output.length == s.children.head.output.length) && !s.resolved =>
-          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(s.children)
-          if (newChildren.isEmpty) {
-            s -> Nil
-          } else {
-            val attrMapping = s.children.head.output.zip(newChildren.head.output)
-            s.copy(children = newChildren) -> attrMapping
-          }
-      }
-    }
-
-    /** Build new children with the widest types for each attribute among all the children */
-    private def buildNewChildrenWithWiderTypes(children: Seq[LogicalPlan]): Seq[LogicalPlan] = {
-      require(children.forall(_.output.length == children.head.output.length))
-
-      // Get a sequence of data types, each of which is the widest type of this specific attribute
-      // in all the children
-      val targetTypes: Seq[Option[DataType]] =
-        getWidestTypes(children, attrIndex = 0, mutable.Queue[Option[DataType]]())
-
-      if (targetTypes.exists(_.isDefined)) {
-        // Add an extra Project if the targetTypes are different from the original types.
-        children.map(widenTypes(_, targetTypes))
-      } else {
-        Nil
-      }
-    }
-
-    /** Get the widest type for each attribute in all the children */
-    @tailrec private def getWidestTypes(
-        children: Seq[LogicalPlan],
-        attrIndex: Int,
-        castedTypes: mutable.Queue[Option[DataType]]): Seq[Option[DataType]] = {
-      // Return the result after the widen data types have been found for all the children
-      if (attrIndex >= children.head.output.length) return castedTypes.toSeq
-
-      // For the attrIndex-th attribute, find the widest type
-      val widenTypeOpt = findWiderCommonType(children.map(_.output(attrIndex).dataType))
-      castedTypes.enqueue(widenTypeOpt)
-      getWidestTypes(children, attrIndex + 1, castedTypes)
-    }
-
-    /** Given a plan, add an extra project on top to widen some columns' data types. */
-    private def widenTypes(plan: LogicalPlan, targetTypes: Seq[Option[DataType]]): LogicalPlan = {
-      var changed = false
-      val casted = plan.output.zip(targetTypes).map {
-        case (e, Some(dt)) if e.dataType != dt =>
-          changed = true
-          Alias(Cast(e, dt, Some(conf.sessionLocalTimeZone)), e.name)()
-        case (e, _) => e
-      }
-      if (changed) {
-        Project(casted, plan)
-      } else {
-        plan
-      }
-    }
-  }
-
-  /**
-   * Handles type coercion for both IN expression with subquery and IN
-   * expressions without subquery.
-   * 1. In the first case, find the common type by comparing the left hand side (LHS)
-   *    expression types against corresponding right hand side (RHS) expression derived
-   *    from the subquery expression's plan output. Inject appropriate casts in the
-   *    LHS and RHS side of IN expression.
-   *
-   * 2. In the second case, convert the value and in list expressions to the
-   *    common operator type by looking at all the argument types and finding
-   *    the closest one that all the arguments can be cast to. When no common
-   *    operator type is found the original expression will be returned and an
-   *    Analysis Exception will be raised at the type checking phase.
-   */
-  object InConversion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes who's children have not been resolved yet.
-      case e if !e.childrenResolved => e
-
-      // Handle type casting required between value expression and subquery output
-      // in IN subquery.
-      case i @ InSubquery(lhs, l: ListQuery)
-          if !i.resolved && lhs.length == l.plan.output.length =>
-        // LHS is the value expressions of IN subquery.
-        // RHS is the subquery output.
-        val rhs = l.plan.output
-
-        val commonTypes = lhs.zip(rhs).flatMap { case (l, r) =>
-          findWiderTypeForTwo(l.dataType, r.dataType)
-        }
-
-        // The number of columns/expressions must match between LHS and RHS of an
-        // IN subquery expression.
-        if (commonTypes.length == lhs.length) {
-          val castedRhs = rhs.zip(commonTypes).map {
-            case (e, dt) if e.dataType != dt => Alias(Cast(e, dt), e.name)()
-            case (e, _) => e
-          }
-          val newLhs = lhs.zip(commonTypes).map {
-            case (e, dt) if e.dataType != dt => Cast(e, dt)
-            case (e, _) => e
-          }
-
-          InSubquery(newLhs, l.withNewPlan(Project(castedRhs, l.plan)))
-        } else {
-          i
-        }
-
-      case i @ In(a, b) if b.exists(_.dataType != a.dataType) =>
-        findWiderCommonType(i.children.map(_.dataType)) match {
-          case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType)))
-          case None => i
-        }
-    }
-  }
-
-  /**
-   * This ensure that the types for various functions are as expected.
-   */
-  object FunctionArgumentConversion extends TypeCoercionRule {
-
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes who's children have not been resolved yet.
-      case e if !e.childrenResolved => e
-
-      case a @ CreateArray(children, _) if !haveSameType(children.map(_.dataType)) =>
-        val types = children.map(_.dataType)
-        findWiderCommonType(types) match {
-          case Some(finalDataType) => a.copy(children.map(castIfNotSameType(_, finalDataType)))
-          case None => a
-        }
-
-      case c @ Concat(children) if children.forall(c => ArrayType.acceptsType(c.dataType)) &&
-        !haveSameType(c.inputTypesForMerging) =>
-        val types = children.map(_.dataType)
-        findWiderCommonType(types) match {
-          case Some(finalDataType) => Concat(children.map(castIfNotSameType(_, finalDataType)))
-          case None => c
-        }
-
-      case aj @ ArrayJoin(arr, d, nr)
-          if !AbstractArrayType(StringTypeAnyCollation).acceptsType(arr.dataType) &&
-          ArrayType.acceptsType(arr.dataType) =>
-        val containsNull = arr.dataType.asInstanceOf[ArrayType].containsNull
-        implicitCast(arr, ArrayType(StringType, containsNull)) match {
-          case Some(castedArr) => ArrayJoin(castedArr, d, nr)
-          case None => aj
-        }
-
-      case s @ Sequence(_, _, _, timeZoneId)
-          if !haveSameType(s.coercibleChildren.map(_.dataType)) =>
-        val types = s.coercibleChildren.map(_.dataType)
-        findWiderCommonType(types) match {
-          case Some(widerDataType) => s.castChildrenTo(widerDataType)
-          case None => s
-        }
-
-      case m @ MapConcat(children) if children.forall(c => MapType.acceptsType(c.dataType)) &&
-          !haveSameType(m.inputTypesForMerging) =>
-        val types = children.map(_.dataType)
-        findWiderCommonType(types) match {
-          case Some(finalDataType) => MapConcat(children.map(castIfNotSameType(_, finalDataType)))
-          case None => m
-        }
-
-      case m @ CreateMap(children, _) if m.keys.length == m.values.length &&
-          (!haveSameType(m.keys.map(_.dataType)) || !haveSameType(m.values.map(_.dataType))) =>
-        val keyTypes = m.keys.map(_.dataType)
-        val newKeys = findWiderCommonType(keyTypes) match {
-          case Some(finalDataType) => m.keys.map(castIfNotSameType(_, finalDataType))
-          case None => m.keys
-        }
-
-        val valueTypes = m.values.map(_.dataType)
-        val newValues = findWiderCommonType(valueTypes) match {
-          case Some(finalDataType) => m.values.map(castIfNotSameType(_, finalDataType))
-          case None => m.values
-        }
-
-        m.copy(newKeys.zip(newValues).flatMap { case (k, v) => Seq(k, v) })
-
-      // Hive lets you do aggregation of timestamps... for some reason
-      case Sum(e @ TimestampTypeExpression(), _) => Sum(Cast(e, DoubleType))
-      case Average(e @ TimestampTypeExpression(), _) => Average(Cast(e, DoubleType))
-
-      // Coalesce should return the first non-null value, which could be any column
-      // from the list. So we need to make sure the return type is deterministic and
-      // compatible with every child column.
-      case c @ Coalesce(es) if !haveSameType(c.inputTypesForMerging) =>
-        val types = es.map(_.dataType)
-        findWiderCommonType(types) match {
-          case Some(finalDataType) =>
-            Coalesce(es.map(castIfNotSameType(_, finalDataType)))
-          case None =>
-            c
-        }
-
-      // When finding wider type for `Greatest` and `Least`, we should handle decimal types even if
-      // we need to truncate, but we should not promote one side to string if the other side is
-      // string.g
-      case g @ Greatest(children) if !haveSameType(g.inputTypesForMerging) =>
-        val types = children.map(_.dataType)
-        findWiderTypeWithoutStringPromotion(types) match {
-          case Some(finalDataType) => Greatest(children.map(castIfNotSameType(_, finalDataType)))
-          case None => g
-        }
-
-      case l @ Least(children) if !haveSameType(l.inputTypesForMerging) =>
-        val types = children.map(_.dataType)
-        findWiderTypeWithoutStringPromotion(types) match {
-          case Some(finalDataType) => Least(children.map(castIfNotSameType(_, finalDataType)))
-          case None => l
-        }
-
-      case NaNvl(l, r) if l.dataType == DoubleType && r.dataType == FloatType =>
-        NaNvl(l, Cast(r, DoubleType))
-      case NaNvl(l, r) if l.dataType == FloatType && r.dataType == DoubleType =>
-        NaNvl(Cast(l, DoubleType), r)
-      case NaNvl(l, r) if r.dataType == NullType => NaNvl(l, Cast(r, l.dataType))
-    }
-  }
-
-  /**
-   * Hive only performs integral division with the DIV operator. The arguments to / are always
-   * converted to fractional types.
-   */
-  object Division extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes who has not been resolved yet,
-      // as this is an extra rule which should be applied at last.
-      case e if !e.childrenResolved => e
-
-      // Decimal and Double remain the same
-      case d: Divide if d.dataType == DoubleType => d
-      case d: Divide if d.dataType.isInstanceOf[DecimalType] => d
-      case d @ Divide(left, right, _) if isNumericOrNull(left) && isNumericOrNull(right) =>
-        d.copy(left = Cast(left, DoubleType), right = Cast(right, DoubleType))
-    }
-
-    private def isNumericOrNull(ex: Expression): Boolean = {
-      // We need to handle null types in case a query contains null literals.
-      ex.dataType.isInstanceOf[NumericType] || ex.dataType == NullType
-    }
-  }
-
-  /**
-   * The DIV operator always returns long-type value.
-   * This rule cast the integral inputs to long type, to avoid overflow during calculation.
-   */
-  object IntegralDivision extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      case e if !e.childrenResolved => e
-      case d @ IntegralDivide(left, right, _) =>
-        d.copy(left = mayCastToLong(left), right = mayCastToLong(right))
-    }
-
-    private def mayCastToLong(expr: Expression): Expression = expr.dataType match {
-      case _: ByteType | _: ShortType | _: IntegerType => Cast(expr, LongType)
-      case _ => expr
-    }
-  }
-
-  /**
-   * Coerces the type of different branches of a CASE WHEN statement to a common type.
-   */
-  object CaseWhenCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      case c: CaseWhen if c.childrenResolved && !haveSameType(c.inputTypesForMerging) =>
-        val maybeCommonType = findWiderCommonType(c.inputTypesForMerging)
-        maybeCommonType.map { commonType =>
-          val newBranches = c.branches.map { case (condition, value) =>
-            (condition, castIfNotSameType(value, commonType))
-          }
-          val newElseValue = c.elseValue.map(castIfNotSameType(_, commonType))
-          CaseWhen(newBranches, newElseValue)
-        }.getOrElse(c)
-    }
-  }
-
-  /**
-   * Coerces the type of different branches of If statement to a common type.
-   */
-  object IfCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      case e if !e.childrenResolved => e
-      // Find tightest common type for If, if the true value and false value have different types.
-      case i @ If(pred, left, right) if !haveSameType(i.inputTypesForMerging) =>
-        findWiderTypeForTwo(left.dataType, right.dataType).map { widestType =>
-          val newLeft = castIfNotSameType(left, widestType)
-          val newRight = castIfNotSameType(right, widestType)
-          If(pred, newLeft, newRight)
-        }.getOrElse(i)  // If there is no applicable conversion, leave expression unchanged.
-      case If(Literal(null, NullType), left, right) =>
-        If(Literal.create(null, BooleanType), left, right)
-      case If(pred, left, right) if pred.dataType == NullType =>
-        If(Cast(pred, BooleanType), left, right)
-    }
-  }
-
-  /**
-   * Coerces NullTypes in the Stack expression to the column types of the corresponding positions.
-   */
-  object StackCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      case s @ Stack(children) if s.childrenResolved && s.hasFoldableNumRows =>
-        Stack(children.zipWithIndex.map {
-          // The first child is the number of rows for stack.
-          case (e, 0) => e
-          case (Literal(null, NullType), index: Int) =>
-            Literal.create(null, s.findDataType(index))
-          case (e, _) => e
-        })
-    }
-  }
-
-  /**
-   * Coerces the types of [[Concat]] children to expected ones.
-   *
-   * If `spark.sql.function.concatBinaryAsString` is false and all children types are binary,
-   * the expected types are binary. Otherwise, the expected ones are strings.
-   */
-  object ConcatCoercion extends TypeCoercionRule {
-
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes if unresolved or empty children
-      case c @ Concat(children) if !c.childrenResolved || children.isEmpty => c
-      case c @ Concat(children) if conf.concatBinaryAsString ||
-        !children.map(_.dataType).forall(_ == BinaryType) =>
-        val newChildren = c.children.map { e =>
-          implicitCast(e, SQLConf.get.defaultStringType).getOrElse(e)
-        }
-        c.copy(children = newChildren)
-    }
-  }
-
-  /**
-   * Coerces key types of two different [[MapType]] arguments of the [[MapZipWith]] expression
-   * to a common type.
-   */
-  object MapZipWithCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Lambda function isn't resolved when the rule is executed.
-      case m @ MapZipWith(left, right, function) if m.arguments.forall(a => a.resolved &&
-          MapType.acceptsType(a.dataType)) &&
-        !DataTypeUtils.sameType(m.leftKeyType, m.rightKeyType) =>
-        findWiderTypeForTwo(m.leftKeyType, m.rightKeyType) match {
-          case Some(finalKeyType) if !Cast.forceNullable(m.leftKeyType, finalKeyType) &&
-              !Cast.forceNullable(m.rightKeyType, finalKeyType) =>
-            val newLeft = castIfNotSameType(
-              left,
-              MapType(finalKeyType, m.leftValueType, m.leftValueContainsNull))
-            val newRight = castIfNotSameType(
-              right,
-              MapType(finalKeyType, m.rightValueType, m.rightValueContainsNull))
-            MapZipWith(newLeft, newRight, function)
-          case _ => m
-        }
-    }
-  }
-
-  /**
-   * Coerces the types of [[Elt]] children to expected ones.
-   *
-   * If `spark.sql.function.eltOutputAsString` is false and all children types are binary,
-   * the expected types are binary. Otherwise, the expected ones are strings.
-   */
-  object EltCoercion extends TypeCoercionRule {
-
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes if unresolved or not enough children
-      case c @ Elt(children, _) if !c.childrenResolved || children.size < 2 => c
-      case c @ Elt(children, _) =>
-        val index = children.head
-        val newIndex = implicitCast(index, IntegerType).getOrElse(index)
-        val newInputs = if (conf.eltOutputAsString ||
-          !children.tail.map(_.dataType).forall(_ == BinaryType)) {
-          children.tail.map { e =>
-            implicitCast(e, SQLConf.get.defaultStringType).getOrElse(e)
-          }
-        } else {
-          children.tail
-        }
-        c.copy(children = newIndex +: newInputs)
-    }
-  }
-
-  /**
-   * Casts types according to the expected input types for [[Expression]]s.
-   */
-  object ImplicitTypeCasts extends TypeCoercionRule {
-
-    private def canHandleTypeCoercion(leftType: DataType, rightType: DataType): Boolean = {
-      (leftType, rightType) match {
-        case (_: DecimalType, NullType) => true
-        case (NullType, _: DecimalType) => true
-        case _ =>
-          // If DecimalType operands are involved except for the two cases above,
-          // DecimalPrecision will handle it.
-          !leftType.isInstanceOf[DecimalType] && !rightType.isInstanceOf[DecimalType] &&
-            leftType != rightType
-      }
-    }
-
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes who's children have not been resolved yet.
-      case e if !e.childrenResolved => e
-
-      case b @ BinaryOperator(left, right)
-          if canHandleTypeCoercion(left.dataType, right.dataType) =>
-        findTightestCommonType(left.dataType, right.dataType).map { commonType =>
-          if (b.inputType.acceptsType(commonType)) {
-            // If the expression accepts the tightest common type, cast to that.
-            val newLeft = if (left.dataType == commonType) left else Cast(left, commonType)
-            val newRight = if (right.dataType == commonType) right else Cast(right, commonType)
-            b.withNewChildren(Seq(newLeft, newRight))
-          } else {
-            // Otherwise, don't do anything with the expression.
-            b
-          }
-        }.getOrElse(b)  // If there is no applicable conversion, leave expression unchanged.
-
-      case e: ImplicitCastInputTypes if e.inputTypes.nonEmpty =>
-        val children: Seq[Expression] = e.children.zip(e.inputTypes).map { case (in, expected) =>
-          // If we cannot do the implicit cast, just use the original input.
-          implicitCast(in, expected).getOrElse(in)
-        }
-        e.withNewChildren(children)
-
-      case e: ExpectsInputTypes if e.inputTypes.nonEmpty =>
-        // Convert NullType into some specific target type for ExpectsInputTypes that don't do
-        // general implicit casting.
-        val children: Seq[Expression] = e.children.zip(e.inputTypes).map { case (in, expected) =>
-          if (in.dataType == NullType && !expected.acceptsType(NullType)) {
-            Literal.create(null, expected.defaultConcreteType)
-          } else {
-            in
-          }
-        }
-        e.withNewChildren(children)
-
-      case udf: ScalaUDF if udf.inputTypes.nonEmpty =>
-        val children = udf.children.zip(udf.inputTypes).map { case (in, expected) =>
-          // Currently Scala UDF will only expect `AnyDataType` at top level, so this trick works.
-          // In the future we should create types like `AbstractArrayType`, so that Scala UDF can
-          // accept inputs of array type of arbitrary element type.
-          if (expected == AnyDataType) {
-            in
-          } else {
-            implicitCast(
-              in,
-              udfInputToCastType(in.dataType, expected.asInstanceOf[DataType])
-            ).getOrElse(in)
-          }
-
-        }
-        udf.copy(children = children)
-    }
-
-    private def udfInputToCastType(input: DataType, expectedType: DataType): DataType = {
-      (input, expectedType) match {
-        // SPARK-26308: avoid casting to an arbitrary precision and scale for decimals. Please note
-        // that precision and scale cannot be inferred properly for a ScalaUDF because, when it is
-        // created, it is not bound to any column. So here the precision and scale of the input
-        // column is used.
-        case (in: DecimalType, _: DecimalType) => in
-        case (ArrayType(dtIn, _), ArrayType(dtExp, nullableExp)) =>
-          ArrayType(udfInputToCastType(dtIn, dtExp), nullableExp)
-        case (MapType(keyDtIn, valueDtIn, _), MapType(keyDtExp, valueDtExp, nullableExp)) =>
-          MapType(udfInputToCastType(keyDtIn, keyDtExp),
-            udfInputToCastType(valueDtIn, valueDtExp),
-            nullableExp)
-        case (StructType(fieldsIn), StructType(fieldsExp)) =>
-          val fieldTypes =
-            fieldsIn.map(_.dataType).zip(fieldsExp.map(_.dataType)).map { case (dtIn, dtExp) =>
-              udfInputToCastType(dtIn, dtExp)
-            }
-          StructType(fieldsExp.zip(fieldTypes).map { case (field, newDt) =>
-            field.copy(dataType = newDt)
-          })
-        case (_, other) => other
-      }
-    }
-  }
-
-  /**
-   * Cast WindowFrame boundaries to the type they operate upon.
-   */
-  object WindowFrameCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      case s @ WindowSpecDefinition(_, Seq(order), SpecifiedWindowFrame(RangeFrame, lower, upper))
-          if order.resolved =>
-        s.copy(frameSpecification = SpecifiedWindowFrame(
-          RangeFrame,
-          createBoundaryCast(lower, order.dataType),
-          createBoundaryCast(upper, order.dataType)))
-    }
-
-    private def createBoundaryCast(boundary: Expression, dt: DataType): Expression = {
-      (boundary, dt) match {
-        case (e: SpecialFrameBoundary, _) => e
-        case (e, _: DateType) => e
-        case (e, _: TimestampType) => e
-        case (e: Expression, t) if e.dataType != t && canCast(e.dataType, t) =>
-          Cast(e, t)
-        case _ => boundary
-      }
-    }
-  }
-
-  /**
-   * A special rule to support string literal as the second argument of date_add/date_sub functions,
-   * to keep backward compatibility as a temporary workaround.
-   * TODO(SPARK-28589): implement ANSI type type coercion and handle string literals.
-   */
-  object StringLiteralCoercion extends TypeCoercionRule {
-    override val transform: PartialFunction[Expression, Expression] = {
-      // Skip nodes who's children have not been resolved yet.
-      case e if !e.childrenResolved => e
-      case DateAdd(l, r) if r.dataType.isInstanceOf[StringType] && r.foldable =>
-        val days = try {
-          Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
-        } catch {
-          case e: NumberFormatException =>
-            throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_add", e)
-        }
-        DateAdd(l, Literal(days))
-      case DateSub(l, r) if r.dataType.isInstanceOf[StringType] && r.foldable =>
-        val days = try {
-          Cast(r, IntegerType, ansiEnabled = true).eval().asInstanceOf[Int]
-        } catch {
-          case e: NumberFormatException =>
-            throw QueryCompilationErrors.secondArgumentOfFunctionIsNotIntegerError("date_sub", e)
-        }
-        DateSub(l, Literal(days))
-    }
-  }
-}
-
 /**
  * A collection of [[Rule]] that can be used to coerce differing types that participate in
  * operations into compatible ones.
@@ -838,6 +49,7 @@ object TypeCoercion extends TypeCoercionBase {
   override def typeCoercionRules: List[Rule[LogicalPlan]] =
     UnpivotCoercion ::
     WidenSetOperationTypes ::
+    ProcedureArgumentCoercion ::
     new CombinedTypeCoercionRule(
       CollationTypeCasts ::
       InConversion ::
@@ -888,6 +100,9 @@ object TypeCoercion extends TypeCoercionBase {
 
   /** Promotes all the way to StringType. */
   private def stringPromotion(dt1: DataType, dt2: DataType): Option[DataType] = (dt1, dt2) match {
+    // [SPARK-50060] If a binary operation contains two collated string types with different
+    // collation IDs, we can't decide which collation ID the result should have.
+    case (st1: StringType, st2: StringType) if st1.collationId != st2.collationId => None
     case (st: StringType, t2: AtomicType) if t2 != BinaryType && t2 != BooleanType => Some(st)
     case (t1: AtomicType, st: StringType) if t1 != BinaryType && t1 != BooleanType => Some(st)
     case _ => None
@@ -997,7 +212,7 @@ object TypeCoercion extends TypeCoercionBase {
       case (_: StringType, datetime: DatetimeType) => datetime
       case (_: StringType, AnyTimestampType) => AnyTimestampType.defaultConcreteType
       case (_: StringType, BinaryType) => BinaryType
-      // Cast any atomic type to string.
+      // Cast any atomic type to string except if there are strings with different collations.
       case (any: AtomicType, st: StringType) if !any.isInstanceOf[StringType] => st
       case (any: AtomicType, st: AbstractStringType)
         if !any.isInstanceOf[StringType] =>
@@ -1101,41 +316,11 @@ object TypeCoercion extends TypeCoercionBase {
    * Promotes strings that appear in arithmetic expressions.
    */
   object PromoteStrings extends TypeCoercionRule {
-    private def castExpr(expr: Expression, targetType: DataType): Expression = {
-      (expr.dataType, targetType) match {
-        case (NullType, dt) => Literal.create(null, targetType)
-        case (l, dt) if (l != dt) => Cast(expr, targetType)
-        case _ => expr
-      }
-    }
-
-    private def isIntervalType(dt: DataType): Boolean = dt match {
-      case _: CalendarIntervalType | _: AnsiIntervalType => true
-      case _ => false
-    }
 
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-
-      case a @ BinaryArithmetic(left @ StringTypeExpression(), right)
-        if !isIntervalType(right.dataType) =>
-        a.withNewChildren(Seq(Cast(left, DoubleType), right))
-      case a @ BinaryArithmetic(left, right @ StringTypeExpression())
-        if !isIntervalType(left.dataType) =>
-        a.withNewChildren(Seq(left, Cast(right, DoubleType)))
-
-      // For equality between string and timestamp we cast the string to a timestamp
-      // so that things like rounding of subsecond precision does not affect the comparison.
-      case p @ Equality(left @ StringTypeExpression(), right @ TimestampTypeExpression()) =>
-        p.withNewChildren(Seq(Cast(left, TimestampType), right))
-      case p @ Equality(left @ TimestampTypeExpression(), right @ StringTypeExpression()) =>
-        p.withNewChildren(Seq(left, Cast(right, TimestampType)))
-
-      case p @ BinaryComparison(left, right)
-          if findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).isDefined =>
-        val commonType = findCommonTypeForBinaryComparison(left.dataType, right.dataType, conf).get
-        p.withNewChildren(Seq(castExpr(left, commonType), castExpr(right, commonType)))
+      case withChildrenResolved => StringPromotionTypeCoercion(withChildrenResolved)
     }
   }
 
@@ -1143,43 +328,10 @@ object TypeCoercion extends TypeCoercionBase {
    * Changes numeric values to booleans so that expressions like true = 1 can be evaluated.
    */
   object BooleanEquality extends TypeCoercionRule {
-    private val trueValues = Seq(1.toByte, 1.toShort, 1, 1L, Decimal.ONE)
-    private val falseValues = Seq(0.toByte, 0.toShort, 0, 0L, Decimal.ZERO)
-
     override def transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-
-      // Hive treats (true = 1) as true and (false = 0) as true,
-      // all other cases are considered as false.
-
-      // We may simplify the expression if one side is literal numeric values
-      // TODO: Maybe these rules should go into the optimizer.
-      case EqualTo(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
-        if trueValues.contains(value) => bool
-      case EqualTo(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
-        if falseValues.contains(value) => Not(bool)
-      case EqualTo(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
-        if trueValues.contains(value) => bool
-      case EqualTo(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
-        if falseValues.contains(value) => Not(bool)
-      case EqualNullSafe(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
-        if trueValues.contains(value) => And(IsNotNull(bool), bool)
-      case EqualNullSafe(bool @ BooleanTypeExpression(), Literal(value, _: NumericType))
-        if falseValues.contains(value) => And(IsNotNull(bool), Not(bool))
-      case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
-        if trueValues.contains(value) => And(IsNotNull(bool), bool)
-      case EqualNullSafe(Literal(value, _: NumericType), bool @ BooleanTypeExpression())
-        if falseValues.contains(value) => And(IsNotNull(bool), Not(bool))
-
-      case EqualTo(left @ BooleanTypeExpression(), right @ NumericTypeExpression()) =>
-        EqualTo(Cast(left, right.dataType), right)
-      case EqualTo(left @ NumericTypeExpression(), right @ BooleanTypeExpression()) =>
-        EqualTo(left, Cast(right, left.dataType))
-      case EqualNullSafe(left @ BooleanTypeExpression(), right @ NumericTypeExpression()) =>
-        EqualNullSafe(Cast(left, right.dataType), right)
-      case EqualNullSafe(left @ NumericTypeExpression(), right @ BooleanTypeExpression()) =>
-        EqualNullSafe(left, Cast(right, left.dataType))
+      case withChildrenResolved => BooleanEqualityTypeCoercion(withChildrenResolved)
     }
   }
 
@@ -1187,27 +339,9 @@ object TypeCoercion extends TypeCoercionBase {
     override val transform: PartialFunction[Expression, Expression] = {
       // Skip nodes who's children have not been resolved yet.
       case e if !e.childrenResolved => e
-      case d @ DateAdd(AnyTimestampTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateAdd(StringTypeExpression(), _) => d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(AnyTimestampTypeExpression(), _) =>
-        d.copy(startDate = Cast(d.startDate, DateType))
-      case d @ DateSub(StringTypeExpression(), _) => d.copy(startDate = Cast(d.startDate, DateType))
-
-      case s @ SubtractTimestamps(DateTypeExpression(), AnyTimestampTypeExpression(), _, _) =>
-        s.copy(left = Cast(s.left, s.right.dataType))
-      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), DateTypeExpression(), _, _) =>
-        s.copy(right = Cast(s.right, s.left.dataType))
-      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), AnyTimestampTypeExpression(), _, _)
-        if s.left.dataType != s.right.dataType =>
-        val newLeft = castIfNotSameType(s.left, TimestampNTZType)
-        val newRight = castIfNotSameType(s.right, TimestampNTZType)
-        s.copy(left = newLeft, right = newRight)
-
-      case t @ TimeAdd(StringTypeExpression(), _, _) => t.copy(start = Cast(t.start, TimestampType))
+      case withChildrenResolved => DateTimeOperationsTypeCoercion(withChildrenResolved)
     }
   }
-
 }
 
 trait TypeCoercionRule extends Rule[LogicalPlan] with Logging {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
new file mode 100644
index 0000000000000..294ee93a3c7bc
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionBase.scala
@@ -0,0 +1,388 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import scala.annotation.tailrec
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  CaseWhen,
+  Cast,
+  Concat,
+  Elt,
+  Expression,
+  MapZipWith,
+  Stack,
+  WindowSpecDefinition
+}
+import org.apache.spark.sql.catalyst.plans.logical.{
+  Call,
+  Except,
+  Intersect,
+  LogicalPlan,
+  Project,
+  Union,
+  Unpivot
+}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.connector.catalog.procedures.BoundProcedure
+import org.apache.spark.sql.types.DataType
+
+abstract class TypeCoercionBase extends TypeCoercionHelper {
+
+  /**
+   * Type coercion rule that combines multiple type coercion rules and applies them in a single tree
+   * traversal.
+   */
+  class CombinedTypeCoercionRule(rules: Seq[TypeCoercionRule]) extends TypeCoercionRule {
+    override def transform: PartialFunction[Expression, Expression] = {
+      val transforms = rules.map(_.transform)
+      Function.unlift { e: Expression =>
+        val result = transforms.foldLeft(e) {
+          case (current, transform) => transform.applyOrElse(current, identity[Expression])
+        }
+        if (result ne e) {
+          Some(result)
+        } else {
+          None
+        }
+      }
+    }
+  }
+
+  /**
+   * A type coercion rule that implicitly casts procedure arguments to expected types.
+   */
+  object ProcedureArgumentCoercion extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case c @ Call(ResolvedProcedure(_, _, procedure: BoundProcedure), args, _) if c.resolved =>
+        val expectedDataTypes = procedure.parameters.map(_.dataType)
+        val coercedArgs = args.zip(expectedDataTypes).map {
+          case (arg, expectedType) => implicitCast(arg, expectedType).getOrElse(arg)
+        }
+        c.copy(args = coercedArgs)
+    }
+  }
+
+  /**
+   * Widens the data types of the [[Unpivot]] values.
+   */
+  object UnpivotCoercion extends Rule[LogicalPlan] {
+    override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+      case up: Unpivot if up.canBeCoercioned && !up.valuesTypeCoercioned =>
+        // get wider data type of inner values at same idx
+        val valueDataTypes = up.values.get.head.zipWithIndex.map {
+          case (_, idx) => findWiderTypeWithoutStringPromotion(up.values.get.map(_(idx).dataType))
+        }
+
+        // cast inner values to type according to their idx
+        val values = up.values.get.map(
+          values =>
+            values.zipWithIndex.map {
+              case (value, idx) => (value, valueDataTypes(idx))
+            } map {
+              case (value, Some(valueType)) if value.dataType != valueType =>
+                Alias(Cast(value, valueType), value.name)()
+              case (value, _) => value
+            }
+        )
+
+        up.copy(values = Some(values))
+    }
+  }
+
+  /**
+   * Widens the data types of the children of Union/Except/Intersect.
+   * 1. When ANSI mode is off:
+   *  Loosely based on rules from "Hadoop: The Definitive Guide" 2nd edition, by Tom White
+   *
+   *  The implicit conversion rules can be summarized as follows:
+   *     - Any integral numeric type can be implicitly converted to a wider type.
+   *     - All the integral numeric types, FLOAT, and (perhaps surprisingly) STRING can be
+   *       implicitly converted to DOUBLE.
+   *     - TINYINT, SMALLINT, and INT can all be converted to FLOAT.
+   *     - BOOLEAN types cannot be converted to any other type.
+   *     - Any integral numeric type can be implicitly converted to decimal type.
+   *     - two different decimal types will be converted into a wider decimal type for both of them.
+   *     - decimal type will be converted into double if there float or double together with it.
+   *
+   *  All types when UNION-ed with strings will be promoted to
+   *  strings. Other string conversions are handled by PromoteStrings.
+   *
+   *  Widening types might result in loss of precision in the following cases:
+   *   - IntegerType to FloatType
+   *   - LongType to FloatType
+   *   - LongType to DoubleType
+   *   - DecimalType to Double
+   *
+   * 2. When ANSI mode is on:
+   *  The implicit conversion is determined by the closest common data type from the precedent
+   *  lists from left and right child. See the comments of Object `AnsiTypeCoercion` for details.
+   */
+  object WidenSetOperationTypes extends Rule[LogicalPlan] {
+
+    override def apply(plan: LogicalPlan): LogicalPlan = {
+      plan resolveOperatorsUpWithNewOutput {
+        case s @ Except(left, right, isAll)
+            if s.childrenResolved &&
+            left.output.length == right.output.length && !s.resolved =>
+          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
+          if (newChildren.isEmpty) {
+            s -> Nil
+          } else {
+            assert(newChildren.length == 2)
+            val attrMapping = left.output.zip(newChildren.head.output)
+            Except(newChildren.head, newChildren.last, isAll) -> attrMapping
+          }
+
+        case s @ Intersect(left, right, isAll)
+            if s.childrenResolved &&
+            left.output.length == right.output.length && !s.resolved =>
+          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(left :: right :: Nil)
+          if (newChildren.isEmpty) {
+            s -> Nil
+          } else {
+            assert(newChildren.length == 2)
+            val attrMapping = left.output.zip(newChildren.head.output)
+            Intersect(newChildren.head, newChildren.last, isAll) -> attrMapping
+          }
+
+        case s: Union
+            if s.childrenResolved && !s.byName &&
+            s.children.forall(_.output.length == s.children.head.output.length) && !s.resolved =>
+          val newChildren: Seq[LogicalPlan] = buildNewChildrenWithWiderTypes(s.children)
+          if (newChildren.isEmpty) {
+            s -> Nil
+          } else {
+            val attrMapping = s.children.head.output.zip(newChildren.head.output)
+            s.copy(children = newChildren) -> attrMapping
+          }
+      }
+    }
+
+    /** Build new children with the widest types for each attribute among all the children */
+    private def buildNewChildrenWithWiderTypes(children: Seq[LogicalPlan]): Seq[LogicalPlan] = {
+      require(children.forall(_.output.length == children.head.output.length))
+
+      // Get a sequence of data types, each of which is the widest type of this specific attribute
+      // in all the children
+      val targetTypes: Seq[Option[DataType]] =
+        getWidestTypes(children, attrIndex = 0, mutable.Queue[Option[DataType]]())
+
+      if (targetTypes.exists(_.isDefined)) {
+        // Add an extra Project if the targetTypes are different from the original types.
+        children.map(widenTypes(_, targetTypes))
+      } else {
+        Nil
+      }
+    }
+
+    /** Get the widest type for each attribute in all the children */
+    @tailrec private def getWidestTypes(
+        children: Seq[LogicalPlan],
+        attrIndex: Int,
+        castedTypes: mutable.Queue[Option[DataType]]): Seq[Option[DataType]] = {
+      // Return the result after the widen data types have been found for all the children
+      if (attrIndex >= children.head.output.length) return castedTypes.toSeq
+
+      // For the attrIndex-th attribute, find the widest type
+      val widenTypeOpt = findWiderCommonType(children.map(_.output(attrIndex).dataType))
+      castedTypes.enqueue(widenTypeOpt)
+      getWidestTypes(children, attrIndex + 1, castedTypes)
+    }
+
+    /** Given a plan, add an extra project on top to widen some columns' data types. */
+    private def widenTypes(plan: LogicalPlan, targetTypes: Seq[Option[DataType]]): LogicalPlan = {
+      var changed = false
+      val casted = plan.output.zip(targetTypes).map {
+        case (e, Some(dt)) if e.dataType != dt =>
+          changed = true
+          Alias(Cast(e, dt, Some(conf.sessionLocalTimeZone)), e.name)()
+        case (e, _) => e
+      }
+      if (changed) {
+        Project(casted, plan)
+      } else {
+        plan
+      }
+    }
+  }
+
+  /**
+   * Handles type coercion for both IN expression with subquery and IN
+   * expressions without subquery.
+   * 1. In the first case, find the common type by comparing the left hand side (LHS)
+   *    expression types against corresponding right hand side (RHS) expression derived
+   *    from the subquery expression's plan output. Inject appropriate casts in the
+   *    LHS and RHS side of IN expression.
+   *
+   * 2. In the second case, convert the value and in list expressions to the
+   *    common operator type by looking at all the argument types and finding
+   *    the closest one that all the arguments can be cast to. When no common
+   *    operator type is found the original expression will be returned and an
+   *    Analysis Exception will be raised at the type checking phase.
+   */
+  object InConversion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => InTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * This ensure that the types for various functions are as expected.
+   */
+  object FunctionArgumentConversion extends TypeCoercionRule {
+
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => FunctionArgumentTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Hive only performs integral division with the DIV operator. The arguments to / are always
+   * converted to fractional types.
+   */
+  object Division extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who has not been resolved yet,
+      // as this is an extra rule which should be applied at last.
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => DivisionTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * The DIV operator always returns long-type value.
+   * This rule cast the integral inputs to long type, to avoid overflow during calculation.
+   */
+  object IntegralDivision extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => IntegralDivisionTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Coerces the type of different branches of a CASE WHEN statement to a common type.
+   */
+  object CaseWhenCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case c: CaseWhen if !c.childrenResolved => c
+      case withChildrenResolved => CaseWhenTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Coerces the type of different branches of If statement to a common type.
+   */
+  object IfCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => IfTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Coerces NullTypes in the Stack expression to the column types of the corresponding positions.
+   */
+  object StackCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case s: Stack if !s.childrenResolved => s
+      case withChildrenResolved => StackTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Coerces the types of [[Concat]] children to expected ones.
+   *
+   * If `spark.sql.function.concatBinaryAsString` is false and all children types are binary,
+   * the expected types are binary. Otherwise, the expected ones are strings.
+   */
+  object ConcatCoercion extends TypeCoercionRule {
+
+    override val transform: PartialFunction[Expression, Expression] = {
+      case c: Concat if !c.childrenResolved => c
+      case withChildrenResolved => ConcatTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Coerces key types of two different [[MapType]] arguments of the [[MapZipWith]] expression
+   * to a common type.
+   */
+  object MapZipWithCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case m: MapZipWith if m.arguments.exists(a => !a.resolved) => m
+      case withArgumentsResolved => MapZipWithTypeCoercion(withArgumentsResolved)
+    }
+  }
+
+  /**
+   * Coerces the types of [[Elt]] children to expected ones.
+   *
+   * If `spark.sql.function.eltOutputAsString` is false and all children types are binary,
+   * the expected types are binary. Otherwise, the expected ones are strings.
+   */
+  object EltCoercion extends TypeCoercionRule {
+
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes if unresolved or not enough children
+      case c: Elt if !c.childrenResolved => c
+      case withChildrenResolved => EltTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Casts types according to the expected input types for [[Expression]]s.
+   */
+  object ImplicitTypeCasts extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => ImplicitTypeCoercion(withChildrenResolved)
+    }
+  }
+
+  /**
+   * Cast WindowFrame boundaries to the type they operate upon.
+   */
+  object WindowFrameCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      case s @ WindowSpecDefinition(_, Seq(order), _) if !order.resolved => s
+      case withOrderResolved => WindowFrameTypeCoercion(withOrderResolved)
+    }
+  }
+
+  /**
+   * A special rule to support string literal as the second argument of date_add/date_sub functions,
+   * to keep backward compatibility as a temporary workaround.
+   * TODO(SPARK-28589): implement ANSI type type coercion and handle string literals.
+   */
+  object StringLiteralCoercion extends TypeCoercionRule {
+    override val transform: PartialFunction[Expression, Expression] = {
+      // Skip nodes who's children have not been resolved yet.
+      case e if !e.childrenResolved => e
+      case withChildrenResolved => StringLiteralTypeCoercion(withChildrenResolved)
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
new file mode 100644
index 0000000000000..3fc4b71c986ff
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionHelper.scala
@@ -0,0 +1,717 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.catalyst.analysis.TypeCoercion.PromoteStrings.conf
+import org.apache.spark.sql.catalyst.expressions.{
+  Alias,
+  ArrayJoin,
+  BinaryOperator,
+  CaseWhen,
+  Cast,
+  Coalesce,
+  Concat,
+  CreateArray,
+  CreateMap,
+  DateAdd,
+  DateSub,
+  Elt,
+  ExpectsInputTypes,
+  Expression,
+  Greatest,
+  If,
+  ImplicitCastInputTypes,
+  In,
+  InSubquery,
+  Least,
+  ListQuery,
+  Literal,
+  MapConcat,
+  MapZipWith,
+  NaNvl,
+  RangeFrame,
+  ScalaUDF,
+  Sequence,
+  SpecialFrameBoundary,
+  SpecifiedWindowFrame,
+  SubtractTimestamps,
+  TimeAdd,
+  WindowSpecDefinition
+}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{Average, Sum}
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeWithCollation}
+import org.apache.spark.sql.types.{
+  AbstractDataType,
+  AnyDataType,
+  AnyTimestampTypeExpression,
+  ArrayType,
+  BinaryType,
+  BooleanType,
+  DataType,
+  DatetimeType,
+  DateType,
+  DateTypeExpression,
+  DecimalType,
+  DoubleType,
+  FloatType,
+  FractionalType,
+  IntegerType,
+  IntegralType,
+  MapType,
+  NullType,
+  StringType,
+  StringTypeExpression,
+  StructType,
+  TimestampNTZType,
+  TimestampType,
+  TimestampTypeExpression
+}
+
+abstract class TypeCoercionHelper {
+
+  /**
+   * A collection of [[Rule]] that can be used to coerce differing types that participate in
+   * operations into compatible ones.
+   */
+  def typeCoercionRules: List[Rule[LogicalPlan]]
+
+  /**
+   * Find the tightest common type of two types that might be used in a binary expression.
+   * This handles all numeric types except fixed-precision decimals interacting with each other or
+   * with primitive types, because in that case the precision and scale of the result depends on
+   * the operation. Those rules are implemented in [[DecimalPrecision]].
+   */
+  val findTightestCommonType: (DataType, DataType) => Option[DataType]
+
+  /**
+   * Looking for a widened data type of two given data types with some acceptable loss of precision.
+   * E.g. there is no common type for double and decimal because double's range
+   * is larger than decimal, and yet decimal is more precise than double, but in
+   * union we would cast the decimal into double.
+   */
+  def findWiderTypeForTwo(t1: DataType, t2: DataType): Option[DataType]
+
+  /**
+   * Looking for a widened data type of a given sequence of data types with some acceptable loss
+   * of precision.
+   * E.g. there is no common type for double and decimal because double's range
+   * is larger than decimal, and yet decimal is more precise than double, but in
+   * union we would cast the decimal into double.
+   */
+  def findWiderCommonType(types: Seq[DataType]): Option[DataType]
+
+  /**
+   * Given an expected data type, try to cast the expression and return the cast expression.
+   *
+   * If the expression already fits the input type, we simply return the expression itself.
+   * If the expression has an incompatible type that cannot be implicitly cast, return None.
+   */
+  def implicitCast(e: Expression, expectedType: AbstractDataType): Option[Expression]
+
+  /**
+   * Whether casting `from` as `to` is valid.
+   */
+  def canCast(from: DataType, to: DataType): Boolean
+
+  protected def findTypeForComplex(
+      t1: DataType,
+      t2: DataType,
+      findTypeFunc: (DataType, DataType) => Option[DataType]): Option[DataType] = (t1, t2) match {
+    case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) =>
+      findTypeFunc(et1, et2).map { et =>
+        ArrayType(
+          et,
+          containsNull1 || containsNull2 ||
+          Cast.forceNullable(et1, et) || Cast.forceNullable(et2, et)
+        )
+      }
+    case (MapType(kt1, vt1, valueContainsNull1), MapType(kt2, vt2, valueContainsNull2)) =>
+      findTypeFunc(kt1, kt2)
+        .filter { kt =>
+          !Cast.forceNullable(kt1, kt) && !Cast.forceNullable(kt2, kt)
+        }
+        .flatMap { kt =>
+          findTypeFunc(vt1, vt2).map { vt =>
+            MapType(
+              kt,
+              vt,
+              valueContainsNull1 || valueContainsNull2 ||
+              Cast.forceNullable(vt1, vt) || Cast.forceNullable(vt2, vt)
+            )
+          }
+        }
+    case (StructType(fields1), StructType(fields2)) if fields1.length == fields2.length =>
+      val resolver = SQLConf.get.resolver
+      fields1.zip(fields2).foldLeft(Option(new StructType())) {
+        case (Some(struct), (field1, field2)) if resolver(field1.name, field2.name) =>
+          findTypeFunc(field1.dataType, field2.dataType).map { dt =>
+            struct.add(
+              field1.name,
+              dt,
+              field1.nullable || field2.nullable ||
+              Cast.forceNullable(field1.dataType, dt) || Cast.forceNullable(field2.dataType, dt)
+            )
+          }
+        case _ => None
+      }
+    case _ => None
+  }
+
+  /**
+   * Finds a wider type when one or both types are decimals. If the wider decimal type exceeds
+   * system limitation, this rule will truncate the decimal type. If a decimal and other fractional
+   * types are compared, returns a double type.
+   */
+  protected def findWiderTypeForDecimal(dt1: DataType, dt2: DataType): Option[DataType] = {
+    (dt1, dt2) match {
+      case (t1: DecimalType, t2: DecimalType) =>
+        Some(DecimalPrecisionTypeCoercion.widerDecimalType(t1, t2))
+      case (t: IntegralType, d: DecimalType) =>
+        Some(DecimalPrecisionTypeCoercion.widerDecimalType(DecimalType.forType(t), d))
+      case (d: DecimalType, t: IntegralType) =>
+        Some(DecimalPrecisionTypeCoercion.widerDecimalType(DecimalType.forType(t), d))
+      case (_: FractionalType, _: DecimalType) | (_: DecimalType, _: FractionalType) =>
+        Some(DoubleType)
+      case _ => None
+    }
+  }
+
+  /**
+   * Similar to [[findWiderTypeForTwo]] that can handle decimal types, but can't promote to
+   * string. If the wider decimal type exceeds system limitation, this rule will truncate
+   * the decimal type before return it.
+   */
+  private[catalyst] def findWiderTypeWithoutStringPromotionForTwo(
+      t1: DataType,
+      t2: DataType): Option[DataType] = {
+    findTightestCommonType(t1, t2)
+      .orElse(findWiderTypeForDecimal(t1, t2))
+      .orElse(findTypeForComplex(t1, t2, findWiderTypeWithoutStringPromotionForTwo))
+  }
+
+  def findWiderTypeWithoutStringPromotion(types: Seq[DataType]): Option[DataType] = {
+    types.foldLeft[Option[DataType]](Some(NullType))(
+      (r, c) =>
+        r match {
+          case Some(d) => findWiderTypeWithoutStringPromotionForTwo(d, c)
+          case None => None
+        }
+    )
+  }
+
+  /**
+   * Check whether the given types are equal ignoring nullable, containsNull and valueContainsNull.
+   */
+  def haveSameType(types: Seq[DataType]): Boolean = {
+    if (types.size <= 1) {
+      true
+    } else {
+      val head = types.head
+      types.tail.forall(e => DataTypeUtils.sameType(e, head))
+    }
+  }
+
+  protected def castIfNotSameType(expr: Expression, dt: DataType): Expression = {
+    if (!DataTypeUtils.sameType(expr.dataType, dt)) {
+      Cast(expr, dt)
+    } else {
+      expr
+    }
+  }
+
+  protected def findWiderDateTimeType(d1: DatetimeType, d2: DatetimeType): DatetimeType =
+    (d1, d2) match {
+      case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
+        TimestampType
+
+      case (_: TimestampType, _: TimestampNTZType) | (_: TimestampNTZType, _: TimestampType) =>
+        TimestampType
+
+      case (_: TimestampNTZType, _: DateType) | (_: DateType, _: TimestampNTZType) =>
+        TimestampNTZType
+    }
+
+  /**
+   * Type coercion helper that matches agaist [[In]] and [[InSubquery]] expressions in order to
+   * type coerce LHS and RHS to expected types.
+   */
+  object InTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      // Handle type casting required between value expression and subquery output
+      // in IN subquery.
+      case i @ InSubquery(lhs, l: ListQuery) if !i.resolved && lhs.length == l.plan.output.length =>
+        // LHS is the value expressions of IN subquery.
+        // RHS is the subquery output.
+        val rhs = l.plan.output
+
+        val commonTypes = lhs.zip(rhs).flatMap {
+          case (l, r) =>
+            findWiderTypeForTwo(l.dataType, r.dataType)
+        }
+
+        // The number of columns/expressions must match between LHS and RHS of an
+        // IN subquery expression.
+        if (commonTypes.length == lhs.length) {
+          val castedRhs = rhs.zip(commonTypes).map {
+            case (e, dt) if e.dataType != dt => Alias(Cast(e, dt), e.name)()
+            case (e, _) => e
+          }
+          val newLhs = lhs.zip(commonTypes).map {
+            case (e, dt) if e.dataType != dt => Cast(e, dt)
+            case (e, _) => e
+          }
+
+          InSubquery(newLhs, l.withNewPlan(Project(castedRhs, l.plan)))
+        } else {
+          i
+        }
+
+      case i @ In(a, b) if b.exists(_.dataType != a.dataType) =>
+        findWiderCommonType(i.children.map(_.dataType)) match {
+          case Some(finalDataType) => i.withNewChildren(i.children.map(Cast(_, finalDataType)))
+          case None => i
+        }
+
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against function expression in order to type coerce function
+   * argument types to expected types.
+   */
+  object FunctionArgumentTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case a @ CreateArray(children, _) if !haveSameType(children.map(_.dataType)) =>
+        val types = children.map(_.dataType)
+        findWiderCommonType(types) match {
+          case Some(finalDataType) => a.copy(children.map(castIfNotSameType(_, finalDataType)))
+          case None => a
+        }
+
+      case c @ Concat(children)
+          if children.forall(c => ArrayType.acceptsType(c.dataType)) &&
+          !haveSameType(c.inputTypesForMerging) =>
+        val types = children.map(_.dataType)
+        findWiderCommonType(types) match {
+          case Some(finalDataType) => Concat(children.map(castIfNotSameType(_, finalDataType)))
+          case None => c
+        }
+
+      case aj @ ArrayJoin(arr, d, nr)
+          if !AbstractArrayType(StringTypeWithCollation).acceptsType(arr.dataType) &&
+          ArrayType.acceptsType(arr.dataType) =>
+        val containsNull = arr.dataType.asInstanceOf[ArrayType].containsNull
+        implicitCast(arr, ArrayType(StringType, containsNull)) match {
+          case Some(castedArr) => ArrayJoin(castedArr, d, nr)
+          case None => aj
+        }
+
+      case s @ Sequence(_, _, _, timeZoneId)
+          if !haveSameType(s.coercibleChildren.map(_.dataType)) =>
+        val types = s.coercibleChildren.map(_.dataType)
+        findWiderCommonType(types) match {
+          case Some(widerDataType) => s.castChildrenTo(widerDataType)
+          case None => s
+        }
+
+      case m @ MapConcat(children)
+          if children.forall(c => MapType.acceptsType(c.dataType)) &&
+          !haveSameType(m.inputTypesForMerging) =>
+        val types = children.map(_.dataType)
+        findWiderCommonType(types) match {
+          case Some(finalDataType) => MapConcat(children.map(castIfNotSameType(_, finalDataType)))
+          case None => m
+        }
+
+      case m @ CreateMap(children, _)
+          if m.keys.length == m.values.length &&
+          (!haveSameType(m.keys.map(_.dataType)) || !haveSameType(m.values.map(_.dataType))) =>
+        val keyTypes = m.keys.map(_.dataType)
+        val newKeys = findWiderCommonType(keyTypes) match {
+          case Some(finalDataType) => m.keys.map(castIfNotSameType(_, finalDataType))
+          case None => m.keys
+        }
+
+        val valueTypes = m.values.map(_.dataType)
+        val newValues = findWiderCommonType(valueTypes) match {
+          case Some(finalDataType) => m.values.map(castIfNotSameType(_, finalDataType))
+          case None => m.values
+        }
+
+        m.copy(newKeys.zip(newValues).flatMap { case (k, v) => Seq(k, v) })
+
+      // Hive lets you do aggregation of timestamps... for some reason
+      case Sum(e @ TimestampTypeExpression(), _) => Sum(Cast(e, DoubleType))
+      case Average(e @ TimestampTypeExpression(), _) => Average(Cast(e, DoubleType))
+
+      // Coalesce should return the first non-null value, which could be any column
+      // from the list. So we need to make sure the return type is deterministic and
+      // compatible with every child column.
+      case c @ Coalesce(es) if !haveSameType(c.inputTypesForMerging) =>
+        val types = es.map(_.dataType)
+        findWiderCommonType(types) match {
+          case Some(finalDataType) =>
+            Coalesce(es.map(castIfNotSameType(_, finalDataType)))
+          case None =>
+            c
+        }
+
+      // When finding wider type for `Greatest` and `Least`, we should handle decimal types even if
+      // we need to truncate, but we should not promote one side to string if the other side is
+      // string.g
+      case g @ Greatest(children) if !haveSameType(g.inputTypesForMerging) =>
+        val types = children.map(_.dataType)
+        findWiderTypeWithoutStringPromotion(types) match {
+          case Some(finalDataType) => Greatest(children.map(castIfNotSameType(_, finalDataType)))
+          case None => g
+        }
+
+      case l @ Least(children) if !haveSameType(l.inputTypesForMerging) =>
+        val types = children.map(_.dataType)
+        findWiderTypeWithoutStringPromotion(types) match {
+          case Some(finalDataType) => Least(children.map(castIfNotSameType(_, finalDataType)))
+          case None => l
+        }
+
+      case NaNvl(l, r) if l.dataType == DoubleType && r.dataType == FloatType =>
+        NaNvl(l, Cast(r, DoubleType))
+      case NaNvl(l, r) if l.dataType == FloatType && r.dataType == DoubleType =>
+        NaNvl(Cast(l, DoubleType), r)
+      case NaNvl(l, r) if r.dataType == NullType => NaNvl(l, Cast(r, l.dataType))
+
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against [[Concat]] expressions in order to type coerce
+   * expression's children to expected types.
+   */
+  object ConcatTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      // Skip nodes if unresolved or empty children
+      case c @ Concat(children) if children.isEmpty => c
+      case c @ Concat(children)
+          if conf.concatBinaryAsString ||
+          !children.map(_.dataType).forall(_ == BinaryType) =>
+        val newChildren = c.children.map { e =>
+          implicitCast(e, StringType).getOrElse(e)
+        }
+        c.copy(children = newChildren)
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against [[MapZipWith]] expressions in order to type coerce
+   * key types of input maps to a common type.
+   */
+  object MapZipWithTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      // Lambda function isn't resolved when the rule is executed.
+      case m @ MapZipWith(left, right, function)
+          if m.arguments.forall(a => MapType.acceptsType(a.dataType)) &&
+          !DataTypeUtils.sameType(m.leftKeyType, m.rightKeyType) =>
+        findWiderTypeForTwo(m.leftKeyType, m.rightKeyType) match {
+          case Some(finalKeyType)
+              if !Cast.forceNullable(m.leftKeyType, finalKeyType) &&
+              !Cast.forceNullable(m.rightKeyType, finalKeyType) =>
+            val newLeft = castIfNotSameType(
+              left,
+              MapType(finalKeyType, m.leftValueType, m.leftValueContainsNull)
+            )
+            val newRight = castIfNotSameType(
+              right,
+              MapType(finalKeyType, m.rightValueType, m.rightValueContainsNull)
+            )
+            MapZipWith(newLeft, newRight, function)
+          case _ => m
+        }
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against [[Elt]] expression in order to type coerce
+   * expression's children to expected types.
+   */
+  object EltTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case c @ Elt(children, _) if children.size < 2 => c
+      case c @ Elt(children, _) =>
+        val index = children.head
+        val newIndex = implicitCast(index, IntegerType).getOrElse(index)
+        val newInputs =
+          if (conf.eltOutputAsString ||
+            !children.tail.map(_.dataType).forall(_ == BinaryType)) {
+            children.tail.map { e =>
+              implicitCast(e, StringType).getOrElse(e)
+            }
+          } else {
+            children.tail
+          }
+        c.copy(children = newIndex +: newInputs)
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against a [[CaseWhen]] expression in order to type coerce
+   * different branches to a common type.
+   */
+  object CaseWhenTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case c: CaseWhen if !haveSameType(c.inputTypesForMerging) =>
+        val maybeCommonType = findWiderCommonType(c.inputTypesForMerging)
+        maybeCommonType
+          .map { commonType =>
+            val newBranches = c.branches.map {
+              case (condition, value) =>
+                (condition, castIfNotSameType(value, commonType))
+            }
+            val newElseValue = c.elseValue.map(castIfNotSameType(_, commonType))
+            CaseWhen(newBranches, newElseValue)
+          }
+          .getOrElse(c)
+
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against an [[If]] expression in order to type coerce
+   * different branches to a common type.
+   */
+  object IfTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      // Find tightest common type for If, if the true value and false value have different types.
+      case i @ If(pred, left, right) if !haveSameType(i.inputTypesForMerging) =>
+        findWiderTypeForTwo(left.dataType, right.dataType)
+          .map { widestType =>
+            val newLeft = castIfNotSameType(left, widestType)
+            val newRight = castIfNotSameType(right, widestType)
+            If(pred, newLeft, newRight)
+          }
+          .getOrElse(i) // If there is no applicable conversion, leave expression unchanged.
+      case If(Literal(null, NullType), left, right) =>
+        If(Literal.create(null, BooleanType), left, right)
+      case If(pred, left, right) if pred.dataType == NullType =>
+        If(Cast(pred, BooleanType), left, right)
+      case other => other
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against expression in order to type coerce expression's
+   * input types to expected types.
+   */
+  object ImplicitTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case b @ BinaryOperator(left, right)
+          if canHandleTypeCoercion(left.dataType, right.dataType) =>
+        findTightestCommonType(left.dataType, right.dataType)
+          .map { commonType =>
+            if (b.inputType.acceptsType(commonType)) {
+              // If the expression accepts the tightest common type, cast to that.
+              val newLeft = if (left.dataType == commonType) left else Cast(left, commonType)
+              val newRight = if (right.dataType == commonType) right else Cast(right, commonType)
+              b.withNewChildren(Seq(newLeft, newRight))
+            } else {
+              // Otherwise, don't do anything with the expression.
+              b
+            }
+          }
+          .getOrElse(b) // If there is no applicable conversion, leave expression unchanged.
+
+      case e: ImplicitCastInputTypes if e.inputTypes.nonEmpty =>
+        val children: Seq[Expression] = e.children.zip(e.inputTypes).map {
+          case (in, expected) =>
+            // If we cannot do the implicit cast, just use the original input.
+            implicitCast(in, expected).getOrElse(in)
+        }
+        e.withNewChildren(children)
+
+      case e: ExpectsInputTypes if e.inputTypes.nonEmpty =>
+        // Convert NullType into some specific target type for ExpectsInputTypes that don't do
+        // general implicit casting.
+        val children: Seq[Expression] = e.children.zip(e.inputTypes).map {
+          case (in, expected) =>
+            if (in.dataType == NullType && !expected.acceptsType(NullType)) {
+              Literal.create(null, expected.defaultConcreteType)
+            } else {
+              in
+            }
+        }
+        e.withNewChildren(children)
+
+      case udf: ScalaUDF if udf.inputTypes.nonEmpty =>
+        val children = udf.children.zip(udf.inputTypes).map {
+          case (in, expected) =>
+            // Currently Scala UDF will only expect `AnyDataType` at top level, so this trick works.
+            // In the future we should create types like `AbstractArrayType`, so that Scala UDF can
+            // accept inputs of array type of arbitrary element type.
+            if (expected == AnyDataType) {
+              in
+            } else {
+              implicitCast(
+                in,
+                udfInputToCastType(in.dataType, expected.asInstanceOf[DataType])
+              ).getOrElse(in)
+            }
+
+        }
+        udf.copy(children = children)
+
+      case other => other
+    }
+
+    private def canHandleTypeCoercion(leftType: DataType, rightType: DataType): Boolean = {
+      (leftType, rightType) match {
+        case (_: DecimalType, NullType) => true
+        case (NullType, _: DecimalType) => true
+        case _ =>
+          // If DecimalType operands are involved except for the two cases above,
+          // DecimalPrecision will handle it.
+          !leftType.isInstanceOf[DecimalType] && !rightType.isInstanceOf[DecimalType] &&
+          leftType != rightType
+      }
+    }
+
+    private def udfInputToCastType(input: DataType, expectedType: DataType): DataType = {
+      (input, expectedType) match {
+        // SPARK-26308: avoid casting to an arbitrary precision and scale for decimals. Please note
+        // that precision and scale cannot be inferred properly for a ScalaUDF because, when it is
+        // created, it is not bound to any column. So here the precision and scale of the input
+        // column is used.
+        case (in: DecimalType, _: DecimalType) => in
+        case (ArrayType(dtIn, _), ArrayType(dtExp, nullableExp)) =>
+          ArrayType(udfInputToCastType(dtIn, dtExp), nullableExp)
+        case (MapType(keyDtIn, valueDtIn, _), MapType(keyDtExp, valueDtExp, nullableExp)) =>
+          MapType(
+            udfInputToCastType(keyDtIn, keyDtExp),
+            udfInputToCastType(valueDtIn, valueDtExp),
+            nullableExp
+          )
+        case (StructType(fieldsIn), StructType(fieldsExp)) =>
+          val fieldTypes =
+            fieldsIn.map(_.dataType).zip(fieldsExp.map(_.dataType)).map {
+              case (dtIn, dtExp) =>
+                udfInputToCastType(dtIn, dtExp)
+            }
+          StructType(fieldsExp.zip(fieldTypes).map {
+            case (field, newDt) =>
+              field.copy(dataType = newDt)
+          })
+        case (_, other) => other
+      }
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against [[WindowFrameTypeCoercion]] expression in order to
+   * type coerce window boundaries to the type they operate upon.
+   */
+  object WindowFrameTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case s @ WindowSpecDefinition(
+            _,
+            Seq(order),
+            SpecifiedWindowFrame(RangeFrame, lower, upper)
+          ) =>
+        s.copy(
+          frameSpecification = SpecifiedWindowFrame(
+            RangeFrame,
+            createBoundaryCast(lower, order.dataType),
+            createBoundaryCast(upper, order.dataType)
+          )
+        )
+
+      case other => other
+    }
+
+    private def createBoundaryCast(boundary: Expression, dt: DataType): Expression = {
+      (boundary, dt) match {
+        case (e: SpecialFrameBoundary, _) => e
+        case (e, _: DateType) => e
+        case (e, _: TimestampType) => e
+        case (e: Expression, t) if e.dataType != t && canCast(e.dataType, t) =>
+          Cast(e, t)
+        case _ => boundary
+      }
+    }
+  }
+
+  /**
+   * Type coercion helper that matches against date-time expressions in order to type coerce
+   * children to expected types.
+   */
+  object DateTimeOperationsTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case d @ DateAdd(AnyTimestampTypeExpression(), _) =>
+        d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateAdd(StringTypeExpression(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(AnyTimestampTypeExpression(), _) =>
+        d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(StringTypeExpression(), _) => d.copy(startDate = Cast(d.startDate, DateType))
+
+      case s @ SubtractTimestamps(DateTypeExpression(), AnyTimestampTypeExpression(), _, _) =>
+        s.copy(left = Cast(s.left, s.right.dataType))
+      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), DateTypeExpression(), _, _) =>
+        s.copy(right = Cast(s.right, s.left.dataType))
+      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), AnyTimestampTypeExpression(), _, _)
+          if s.left.dataType != s.right.dataType =>
+        val newLeft = castIfNotSameType(s.left, TimestampNTZType)
+        val newRight = castIfNotSameType(s.right, TimestampNTZType)
+        s.copy(left = newLeft, right = newRight)
+
+      case t @ TimeAdd(StringTypeExpression(), _, _) => t.copy(start = Cast(t.start, TimestampType))
+
+      case other => other
+    }
+  }
+
+  /**
+   * ANSI type coercion helper that matches against date-time expressions in order to type coerce
+   * children to expected types.
+   */
+  object AnsiDateTimeOperationsTypeCoercion {
+    def apply(expression: Expression): Expression = expression match {
+      case d @ DateAdd(AnyTimestampTypeExpression(), _) =>
+        d.copy(startDate = Cast(d.startDate, DateType))
+      case d @ DateSub(AnyTimestampTypeExpression(), _) =>
+        d.copy(startDate = Cast(d.startDate, DateType))
+
+      case s @ SubtractTimestamps(DateTypeExpression(), AnyTimestampTypeExpression(), _, _) =>
+        s.copy(left = Cast(s.left, s.right.dataType))
+      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), DateTypeExpression(), _, _) =>
+        s.copy(right = Cast(s.right, s.left.dataType))
+      case s @ SubtractTimestamps(AnyTimestampTypeExpression(), AnyTimestampTypeExpression(), _, _)
+          if s.left.dataType != s.right.dataType =>
+        val newLeft = castIfNotSameType(s.left, TimestampNTZType)
+        val newRight = castIfNotSameType(s.right, TimestampNTZType)
+        s.copy(left = newLeft, right = newRight)
+
+      case other => other
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
index 2366dc4c0eb86..5b7583c763c06 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationChecker.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
 
@@ -103,6 +103,7 @@ object UnsupportedOperationChecker extends Logging {
     case d: Deduplicate if d.isStreaming && d.keys.exists(hasEventTimeCol) => true
     case d: DeduplicateWithinWatermark if d.isStreaming => true
     case t: TransformWithState if t.isStreaming => true
+    case t: TransformWithStateInPandas if t.isStreaming => true
     case _ => false
   }
 
@@ -212,9 +213,8 @@ object UnsupportedOperationChecker extends Logging {
         // We can append rows to the sink once the group is under the watermark. Without this
         // watermark a group is never "finished" so we would never output anything.
         if (watermarkAttributes.isEmpty) {
-          throwError(
-            s"$outputMode output mode not supported when there are streaming aggregations on " +
-                s"streaming DataFrames/DataSets without watermark")(plan)
+          throw QueryCompilationErrors.unsupportedOutputModeForStreamingOperationError(
+            outputMode, "streaming aggregations without watermark")
         }
 
       case InternalOutputModes.Update if aggregates.nonEmpty =>
@@ -228,14 +228,13 @@ object UnsupportedOperationChecker extends Logging {
         }
 
         if (existingSessionWindow) {
-          throwError(s"$outputMode output mode not supported for session window on " +
-            "streaming DataFrames/DataSets")(plan)
+          throw QueryCompilationErrors.unsupportedOutputModeForStreamingOperationError(
+            outputMode, "session window streaming aggregations")
         }
 
       case InternalOutputModes.Complete if aggregates.isEmpty =>
-        throwError(
-          s"$outputMode output mode not supported when there are no streaming aggregations on " +
-            s"streaming DataFrames/Datasets")(plan)
+        throw QueryCompilationErrors.unsupportedOutputModeForStreamingOperationError(
+          outputMode, "no streaming aggregations")
 
       case _ =>
     }
@@ -246,7 +245,7 @@ object UnsupportedOperationChecker extends Logging {
      * data.
      */
     def containsCompleteData(subplan: LogicalPlan): Boolean = {
-      val aggs = subplan.collect { case a@Aggregate(_, _, _) if a.isStreaming => a }
+      val aggs = subplan.collect { case a@Aggregate(_, _, _, _) if a.isStreaming => a }
       // Either the subplan has no streaming source, or it has aggregation with Complete mode
       !subplan.isStreaming || (aggs.nonEmpty && outputMode == InternalOutputModes.Complete)
     }
@@ -266,7 +265,7 @@ object UnsupportedOperationChecker extends Logging {
       // Operations that cannot exists anywhere in a streaming plan
       subPlan match {
 
-        case Aggregate(groupingExpressions, aggregateExpressions, child) =>
+        case Aggregate(groupingExpressions, aggregateExpressions, child, _) =>
           val distinctAggExprs = aggregateExpressions.flatMap { expr =>
             expr.collect { case ae: AggregateExpression if ae.isDistinct => ae }
           }
@@ -486,14 +485,14 @@ object UnsupportedOperationChecker extends Logging {
 
         case Offset(_, _) => throwError("Offset is not supported on streaming DataFrames/Datasets")
 
-        case Sort(_, _, _) if !containsCompleteData(subPlan) =>
+        case Sort(_, _, _, _) if !containsCompleteData(subPlan) =>
           throwError("Sorting is not supported on streaming DataFrames/Datasets, unless it is on " +
             "aggregated DataFrame/Dataset in Complete output mode")
 
         case Sample(_, _, _, _, child) if child.isStreaming =>
           throwError("Sampling is not supported on streaming DataFrames/Datasets")
 
-        case Window(windowExpression, _, _, child) if child.isStreaming =>
+        case Window(windowExpression, _, _, child, _) if child.isStreaming =>
           val (windowFuncList, columnNameList, windowSpecList) = windowExpression.flatMap { e =>
             e.collect {
               case we: WindowExpression =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
index c0689eb121679..daab9e4d78bf5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/package.scala
@@ -67,9 +67,13 @@ package object analysis {
     }
 
     def dataTypeMismatch(expr: Expression, mismatch: DataTypeMismatch): Nothing = {
+      dataTypeMismatch(toSQLExpr(expr), mismatch)
+    }
+
+    def dataTypeMismatch(sqlExpr: String, mismatch: DataTypeMismatch): Nothing = {
       throw new AnalysisException(
         errorClass = s"DATATYPE_MISMATCH.${mismatch.errorSubClass}",
-        messageParameters = mismatch.messageParameters + ("sqlExpr" -> toSQLExpr(expr)),
+        messageParameters = mismatch.messageParameters + ("sqlExpr" -> sqlExpr),
         origin = t.origin)
     }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
index 30619f21bb8f4..f24227abbb651 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/parameters.scala
@@ -21,7 +21,7 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, CreateMap, CreateNamedStruct, Expression, LeafExpression, Literal, MapFromArrays, MapFromEntries, SubqueryExpression, Unevaluable, VariableReference}
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SupervisingCommand}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_WITH}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{COMMAND, PARAMETER, PARAMETERIZED_QUERY, TreePattern, UNRESOLVED_IDENTIFIER_WITH_CTE, UNRESOLVED_WITH}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.types.DataType
 
@@ -189,7 +189,8 @@ object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
       // We should wait for `CTESubstitution` to resolve CTE before binding parameters, as CTE
       // relations are not children of `UnresolvedWith`.
       case NameParameterizedQuery(child, argNames, argValues)
-        if !child.containsPattern(UNRESOLVED_WITH) && argValues.forall(_.resolved) =>
+        if !child.containsAnyPattern(UNRESOLVED_WITH, UNRESOLVED_IDENTIFIER_WITH_CTE) &&
+          argValues.forall(_.resolved) =>
         if (argNames.length != argValues.length) {
           throw SparkException.internalError(s"The number of argument names ${argNames.length} " +
             s"must be equal to the number of argument values ${argValues.length}.")
@@ -199,7 +200,8 @@ object BindParameters extends ParameterizedQueryProcessor with QueryErrorsBase {
         bind(child) { case NamedParameter(name) if args.contains(name) => args(name) }
 
       case PosParameterizedQuery(child, args)
-        if !child.containsPattern(UNRESOLVED_WITH) && args.forall(_.resolved) =>
+        if !child.containsAnyPattern(UNRESOLVED_WITH, UNRESOLVED_IDENTIFIER_WITH_CTE) &&
+          args.forall(_.resolved) =>
         val indexedArgs = args.zipWithIndex
         checkArgs(indexedArgs.map(arg => (s"_${arg._2}", arg._1)))
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
index abb7e7956f184..7fc8aff72b81d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/unresolved.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{CTERelationDef, LeafNode, Lo
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.types.{DataType, Metadata, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -60,9 +61,19 @@ trait UnresolvedUnaryNode extends UnaryNode with UnresolvedNode
  */
 case class PlanWithUnresolvedIdentifier(
     identifierExpr: Expression,
-    planBuilder: Seq[String] => LogicalPlan)
-  extends UnresolvedLeafNode {
+    children: Seq[LogicalPlan],
+    planBuilder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan)
+  extends UnresolvedNode {
+
+  def this(identifierExpr: Expression, planBuilder: Seq[String] => LogicalPlan) = {
+    this(identifierExpr, Nil, (ident, _) => planBuilder(ident))
+  }
+
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_IDENTIFIER)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan =
+    copy(identifierExpr, newChildren, planBuilder)
 }
 
 /**
@@ -124,10 +135,36 @@ case class UnresolvedRelation(
 
   override def name: String = tableName
 
+  def requireWritePrivileges(privileges: Seq[TableWritePrivilege]): UnresolvedRelation = {
+    if (privileges.nonEmpty) {
+      val newOptions = new java.util.HashMap[String, String]
+      newOptions.putAll(options)
+      newOptions.put(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES, privileges.mkString(","))
+      copy(options = new CaseInsensitiveStringMap(newOptions))
+    } else {
+      this
+    }
+  }
+
+  def clearWritePrivileges: UnresolvedRelation = {
+    if (options.containsKey(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES)) {
+      val newOptions = new java.util.HashMap[String, String]
+      newOptions.putAll(options)
+      newOptions.remove(UnresolvedRelation.REQUIRED_WRITE_PRIVILEGES)
+      copy(options = new CaseInsensitiveStringMap(newOptions))
+    } else {
+      this
+    }
+  }
+
   final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_RELATION)
 }
 
 object UnresolvedRelation {
+  // An internal option of `UnresolvedRelation` to specify the required write privileges when
+  // writing data to this relation.
+  val REQUIRED_WRITE_PRIVILEGES = "__required_write_privileges__"
+
   def apply(
       tableIdentifier: TableIdentifier,
       extraOptions: CaseInsensitiveStringMap,
@@ -342,7 +379,8 @@ case class UnresolvedFunction(
     isDistinct: Boolean,
     filter: Option[Expression] = None,
     ignoreNulls: Boolean = false,
-    orderingWithinGroup: Seq[SortOrder] = Seq.empty)
+    orderingWithinGroup: Seq[SortOrder] = Seq.empty,
+    isInternal: Boolean = false)
   extends Expression with Unevaluable {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
@@ -424,13 +462,13 @@ abstract class Star extends LeafExpression with NamedExpression {
  * "SELECT record.* from (SELECT struct(a,b,c) as record ...)
  *
  * @param target an optional name that should be the target of the expansion.  If omitted all
- *              targets' columns are produced. This can either be a table name or struct name. This
- *              is a list of identifiers that is the path of the expansion.
+ *               targets' columns are produced. This can either be a table name or struct name. This
+ *               is a list of identifiers that is the path of the expansion.
  *
  * This class provides the shared behavior between the classes for SELECT * ([[UnresolvedStar]])
- * and SELECT * EXCEPT ([[UnresolvedStarExcept]]). [[UnresolvedStar]] is just a case class of this,
- * while [[UnresolvedStarExcept]] adds some additional logic to the expand method.
-  */
+ * and SELECT * EXCEPT ([[UnresolvedStarExceptOrReplace]]). [[UnresolvedStar]] is just a case class
+ * of this, while [[UnresolvedStarExceptOrReplace]] adds some additional logic to the expand method.
+ */
 abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with Unevaluable {
   /**
    * Returns true if the nameParts is a subset of the last elements of qualifier of the attribute.
@@ -462,25 +500,44 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
   override def expand(
       input: LogicalPlan,
       resolver: Resolver): Seq[NamedExpression] = {
+    expandStar(input.output, input.metadataOutput, input.resolve, input.inputSet.toSeq, resolver)
+  }
+
+  /**
+   * Method used to expand a star. It uses output and metadata output attributes of the child
+   * for the expansion and it supports both recursive and non-recursive data types.
+   *
+   * @param childOperatorOutput The output attributes of the child operator
+   * @param childOperatorMetadataOutput The metadata output attributes of the child operator
+   * @param resolve A function to resolve the given name parts to an attribute
+   * @param suggestedAttributes A list of attributes that are suggested for expansion
+   * @param resolver The resolver used to match the name parts
+   */
+  def expandStar(
+      childOperatorOutput: Seq[Attribute],
+      childOperatorMetadataOutput: Seq[Attribute],
+      resolve: (Seq[String], Resolver) => Option[NamedExpression],
+      suggestedAttributes: Seq[Attribute],
+      resolver: Resolver): Seq[NamedExpression] = {
     // If there is no table specified, use all non-hidden input attributes.
-    if (target.isEmpty) return input.output
+    if (target.isEmpty) return childOperatorOutput
 
     // If there is a table specified, use hidden input attributes as well
-    val hiddenOutput = input.metadataOutput.filter(_.qualifiedAccessOnly)
+    val hiddenOutput = childOperatorMetadataOutput.filter(_.qualifiedAccessOnly)
       // Remove the qualified-access-only restriction immediately. The expanded attributes will be
       // put in a logical plan node and becomes normal attributes. They can still keep the special
       // attribute metadata to indicate that they are from metadata columns, but they should not
       // keep any restrictions that may break column resolution for normal attributes.
       // See SPARK-42084 for more details.
       .map(_.markAsAllowAnyAccess())
-    val expandedAttributes = (hiddenOutput ++ input.output).filter(
+    val expandedAttributes = (hiddenOutput ++ childOperatorOutput).filter(
       matchedQualifier(_, target.get, resolver))
 
     if (expandedAttributes.nonEmpty) return expandedAttributes
 
     // Try to resolve it as a struct expansion. If there is a conflict and both are possible,
     // (i.e. [name].* is both a table and a struct), the struct path can always be qualified.
-    val attribute = input.resolve(target.get, resolver)
+    val attribute = resolve(target.get, resolver)
     if (attribute.isDefined) {
       // This target resolved to an attribute in child. It must be a struct. Expand it.
       attribute.get.dataType match {
@@ -494,7 +551,7 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
           throw QueryCompilationErrors.starExpandDataTypeNotSupportedError(target.get)
       }
     } else {
-      val from = input.inputSet.map(_.name).map(toSQLId).mkString(", ")
+      val from = suggestedAttributes.map(_.name).map(toSQLId).mkString(", ")
       val targetString = target.get.mkString(".")
       throw QueryCompilationErrors.cannotResolveStarExpandGivenInputColumnsError(
         targetString, from)
@@ -514,8 +571,15 @@ abstract class UnresolvedStarBase(target: Option[Seq[String]]) extends Star with
  *
  * @param excepts a list of names that should be excluded from the expansion.
  *
+ * @param replacements an optional list of expressions that should be used to replace the
+ *                     expressions removed by EXCEPT. If present, the length of this list must
+ *                     be the same as the length of the EXCEPT list. This supports replacing
+ *                     expressions instead of excluding them from the original SELECT list.
  */
-case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[String]])
+case class UnresolvedStarExceptOrReplace(
+    target: Option[Seq[String]],
+    excepts: Seq[Seq[String]],
+    replacements: Option[Seq[NamedExpression]])
   extends UnresolvedStarBase(target) {
 
   /**
@@ -595,7 +659,14 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
       // group the except pairs by the column they refer to. NOTE: no groupMap until scala 2.13
       val groupedExcepts: AttributeMap[Seq[Seq[String]]] =
         AttributeMap(excepts.groupBy(_._1.toAttribute).transform((_, v) => v.map(_._2)))
-
+      // If the 'replacements' list is populated to indicate we should replace excepted columns
+      // with new expressions, we must have the same number of replacements as excepts. Keep an
+      // index to track the current replacement.
+      replacements.foreach { r =>
+        assert(excepts.size == r.size,
+          "The number of replacements must be the same as the number of excepts")
+      }
+      var replacementIndex = 0
       // map input columns while searching for the except entry corresponding to the current column
       columns.map(col => col -> groupedExcepts.get(col.toAttribute)).collect {
         // pass through columns that don't match anything in groupedExcepts
@@ -622,11 +693,15 @@ case class UnresolvedStarExcept(target: Option[Seq[String]], excepts: Seq[Seq[St
             filterColumns(extractedFields.toImmutableArraySeq, newExcepts)), col.name)()
         // if there are multiple nestedExcepts but one is empty we must have overlapping except
         // columns. throw an error.
-        case (col, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
+        case (_, Some(nestedExcepts)) if nestedExcepts.size > 1 =>
           throw new AnalysisException(
             errorClass = "EXCEPT_OVERLAPPING_COLUMNS",
             messageParameters = Map(
               "columns" -> this.excepts.map(_.mkString(".")).mkString(", ")))
+        // found a match and the 'replacements' list is populated - replace the column
+        case (_, Some(_)) if replacements.nonEmpty =>
+          replacementIndex += 1
+          replacements.get(replacementIndex - 1)
       }
     }
 
@@ -936,3 +1011,24 @@ case object UnresolvedWithinGroup extends LeafExpression with Unevaluable {
   override def dataType: DataType = throw new UnresolvedException("dataType")
   override lazy val resolved = false
 }
+
+case class UnresolvedTranspose(
+    indices: Seq[Expression],
+    child: LogicalPlan
+) extends UnresolvedUnaryNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_TRANSPOSE)
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): UnresolvedTranspose =
+    copy(child = newChild)
+}
+
+// A marker node to indicate that the logical plan containing this expression should be lazily
+// analyzed in the DataFrame. This node will be removed at the beginning of analysis.
+case class LazyExpression(child: Expression) extends UnaryExpression with Unevaluable {
+  override lazy val resolved: Boolean = false
+  override def dataType: DataType = child.dataType
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(child = newChild)
+  }
+  final override val nodePatterns: Seq[TreePattern] = Seq(LAZY_EXPRESSION)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
index ecdf40e87a894..dee78b8f03af4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/v2ResolutionPlans.scala
@@ -23,13 +23,14 @@ import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Attribute, LeafExpression, Unevaluable}
 import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, Statistics}
-import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_FUNC}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_FUNC, UNRESOLVED_PROCEDURE}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogPlugin, FunctionCatalog, Identifier, ProcedureCatalog, Table, TableCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.connector.catalog.procedures.Procedure
 import org.apache.spark.sql.types.{DataType, StructField}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
@@ -135,6 +136,12 @@ case class UnresolvedFunctionName(
 case class UnresolvedIdentifier(nameParts: Seq[String], allowTemp: Boolean = false)
   extends UnresolvedLeafNode
 
+/**
+ * A procedure identifier that should be resolved into [[ResolvedProcedure]].
+ */
+case class UnresolvedProcedure(nameParts: Seq[String]) extends UnresolvedLeafNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(UNRESOLVED_PROCEDURE)
+}
 
 /**
  * A resolved leaf node whose statistics has no meaning.
@@ -192,6 +199,12 @@ case class ResolvedFieldName(path: Seq[String], field: StructField) extends Fiel
 
 case class ResolvedFieldPosition(position: ColumnPosition) extends FieldPosition
 
+case class ResolvedProcedure(
+    catalog: ProcedureCatalog,
+    ident: Identifier,
+    procedure: Procedure) extends LeafNodeWithoutStats {
+  override def output: Seq[Attribute] = Nil
+}
 
 /**
  * A plan containing resolved persistent views.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
index 749c9df40f14f..8960c7345521c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogUtils.scala
@@ -84,7 +84,7 @@ object ExternalCatalogUtils {
     } else {
       val sb = new java.lang.StringBuilder(length + 16)
       if (firstIndex != 0) sb.append(path, 0, firstIndex)
-      while(firstIndex < length) {
+      while (firstIndex < length) {
         val c = path.charAt(firstIndex)
         if (needsEscaping(c)) {
           sb.append('%').append(HEX_CHARS((c & 0xF0) >> 4)).append(HEX_CHARS(c & 0x0F))
@@ -109,7 +109,7 @@ object ExternalCatalogUtils {
     } else {
       val sb = new java.lang.StringBuilder(length)
       var plaintextStartIdx = 0
-      while(plaintextEndIdx != -1 && plaintextEndIdx + 2 < length) {
+      while (plaintextEndIdx != -1 && plaintextEndIdx + 2 < length) {
         if (plaintextEndIdx > plaintextStartIdx) sb.append(path, plaintextStartIdx, plaintextEndIdx)
         val high = path.charAt(plaintextEndIdx + 1)
         if ((high >>> 8) == 0 && unhexDigits(high) != -1) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
index a9bbda56e870c..284ca63d820fe 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/InMemoryCatalog.scala
@@ -509,7 +509,7 @@ class InMemoryCatalog(
         try {
           val fs = tablePath.getFileSystem(hadoopConfig)
           fs.mkdirs(newPartPath)
-          if(!fs.rename(oldPartPath, newPartPath)) {
+          if (!fs.rename(oldPartPath, newPartPath)) {
             throw new IOException(s"Renaming partition path from $oldPartPath to " +
               s"$newPartPath returned false")
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
index 701c68684c346..a0f7af10fefaf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalog.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, Subque
 import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, StringUtils}
 import org.apache.spark.sql.connector.catalog.CatalogManager
+import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
@@ -230,7 +231,8 @@ class SessionCatalog(
   /** This method discards any cached table relation plans for the given table identifier. */
   def invalidateCachedTable(name: TableIdentifier): Unit = {
     val qualified = qualifyIdentifier(name)
-    invalidateCachedTable(QualifiedTableName(qualified.database.get, qualified.table))
+    invalidateCachedTable(QualifiedTableName(
+      qualified.catalog.get, qualified.database.get, qualified.table))
   }
 
   /** This method provides a way to invalidate all the cached plans. */
@@ -299,7 +301,7 @@ class SessionCatalog(
     }
     if (cascade && databaseExists(dbName)) {
       listTables(dbName).foreach { t =>
-        invalidateCachedTable(QualifiedTableName(dbName, t.table))
+        invalidateCachedTable(QualifiedTableName(SESSION_CATALOG_NAME, dbName, t.table))
       }
     }
     externalCatalog.dropDatabase(dbName, ignoreIfNotExists, cascade)
@@ -334,12 +336,16 @@ class SessionCatalog(
   def getCurrentDatabase: String = synchronized { currentDb }
 
   def setCurrentDatabase(db: String): Unit = {
+    setCurrentDatabaseWithNameCheck(db, requireDbExists)
+  }
+
+  def setCurrentDatabaseWithNameCheck(db: String, nameCheck: String => Unit): Unit = {
     val dbName = format(db)
     if (dbName == globalTempDatabase) {
       throw QueryCompilationErrors.cannotUsePreservedDatabaseAsCurrentDatabaseError(
         globalTempDatabase)
     }
-    requireDbExists(dbName)
+    nameCheck(dbName)
     synchronized { currentDb = dbName }
   }
 
@@ -1177,7 +1183,8 @@ class SessionCatalog(
   def refreshTable(name: TableIdentifier): Unit = synchronized {
     getLocalOrGlobalTempView(name).map(_.refresh()).getOrElse {
       val qualifiedIdent = qualifyIdentifier(name)
-      val qualifiedTableName = QualifiedTableName(qualifiedIdent.database.get, qualifiedIdent.table)
+      val qualifiedTableName = QualifiedTableName(
+        qualifiedIdent.catalog.get, qualifiedIdent.database.get, qualifiedIdent.table)
       tableRelationCache.invalidate(qualifiedTableName)
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
index 5298f1f079c81..4ddbb2518e372 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/TempVariableManager.scala
@@ -23,7 +23,6 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.util.StringUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager.{SESSION_NAMESPACE, SYSTEM_CATALOG_NAME}
 import org.apache.spark.sql.errors.DataTypeErrorsBase
 
@@ -65,10 +64,8 @@ class TempVariableManager extends DataTypeErrorsBase {
     variables.clear()
   }
 
-  def listViewNames(pattern: String): Seq[(String, VariableDefinition)] = synchronized {
-    StringUtils.filterPattern(variables.keys.toSeq, pattern).map(name => {
-      (name, variables(name))
-    })
+  def listViewNames(): mutable.HashMap[String, VariableDefinition] = synchronized {
+    variables
   }
 }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
index c281b0df8a6da..dcd1d3137da3f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/catalog/interface.scala
@@ -197,10 +197,22 @@ object ClusterBySpec {
     ret
   }
 
+  /**
+   * Converts the clustering column property to a ClusterBySpec.
+   */
   def fromProperty(columns: String): ClusterBySpec = {
     ClusterBySpec(mapper.readValue[Seq[Seq[String]]](columns).map(FieldReference(_)))
   }
 
+  /**
+   * Converts a ClusterBySpec to a clustering column property map entry, with validation
+   * of the column names against the schema.
+   *
+   * @param schema the schema of the table.
+   * @param clusterBySpec the ClusterBySpec to be converted to a property.
+   * @param resolver the resolver used to match the column names.
+   * @return a map entry for the clustering column property.
+   */
   def toProperty(
       schema: StructType,
       clusterBySpec: ClusterBySpec,
@@ -209,10 +221,25 @@ object ClusterBySpec {
       normalizeClusterBySpec(schema, clusterBySpec, resolver).toJson
   }
 
+  /**
+   * Converts a ClusterBySpec to a clustering column property map entry, without validating
+   * the column names against the schema.
+   *
+   * @param clusterBySpec existing ClusterBySpec to be converted to properties.
+   * @return a map entry for the clustering column property.
+   */
+  def toPropertyWithoutValidation(clusterBySpec: ClusterBySpec): (String, String) = {
+    (CatalogTable.PROP_CLUSTERING_COLUMNS -> clusterBySpec.toJson)
+  }
+
   private def normalizeClusterBySpec(
       schema: StructType,
       clusterBySpec: ClusterBySpec,
       resolver: Resolver): ClusterBySpec = {
+    if (schema.isEmpty) {
+      return clusterBySpec
+    }
+
     val normalizedColumns = clusterBySpec.columnNames.map { columnName =>
       val position = SchemaUtils.findColumnPosition(
         columnName.fieldNames().toImmutableArraySeq, schema, resolver)
@@ -239,6 +266,10 @@ object ClusterBySpec {
     val normalizedClusterBySpec = normalizeClusterBySpec(schema, clusterBySpec, resolver)
     ClusterByTransform(normalizedClusterBySpec.columnNames)
   }
+
+  def fromColumnNames(names: Seq[String]): ClusterBySpec = {
+    ClusterBySpec(names.map(FieldReference(_)))
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
index 2c27da3cf6e15..5444ab6845867 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/CSVInferSchema.scala
@@ -21,12 +21,12 @@ import java.util.Locale
 
 import scala.util.control.Exception.allCatch
 
+import org.apache.spark.SparkException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.{DateFormatter, TimestampFormatter}
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.FAST_DATE_FORMAT
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
@@ -138,7 +138,7 @@ class CSVInferSchema(val options: CSVOptions) extends Serializable {
         case BooleanType => tryParseBoolean(field)
         case StringType => StringType
         case other: DataType =>
-          throw QueryExecutionErrors.dataTypeUnexpectedError(other)
+          throw SparkException.internalError(s"Unexpected data type $other")
       }
       compatibleType(typeSoFar, typeElemInfer).getOrElse(StringType)
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
index ccc8f30a9a9c3..0fd0601803a6a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/csv/UnivocityParser.scala
@@ -21,9 +21,10 @@ import java.io.InputStream
 
 import scala.util.control.NonFatal
 
+import com.univocity.parsers.common.TextParsingException
 import com.univocity.parsers.csv.CsvParser
 
-import org.apache.spark.SparkUpgradeException
+import org.apache.spark.{SparkRuntimeException, SparkUpgradeException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{InternalRow, NoopFilters, OrderedFilters}
 import org.apache.spark.sql.catalyst.expressions.{ExprUtils, GenericInternalRow}
@@ -294,6 +295,20 @@ class UnivocityParser(
     }
   }
 
+  private def parseLine(line: String): Array[String] = {
+    try {
+      tokenizer.parseLine(line)
+    }
+    catch {
+      case e: TextParsingException if e.getCause.isInstanceOf[ArrayIndexOutOfBoundsException] =>
+        throw new SparkRuntimeException(
+          errorClass = "MALFORMED_CSV_RECORD",
+          messageParameters = Map("badRecord" -> line),
+          cause = e
+        )
+    }
+  }
+
   /**
    * Parses a single CSV string and turns it into either one resulting row or no row (if the
    * the record is malformed).
@@ -306,7 +321,7 @@ class UnivocityParser(
       (_: String) => Some(InternalRow.empty)
     } else {
       // parse if the columnPruning is disabled or requiredSchema is nonEmpty
-      (input: String) => convert(tokenizer.parseLine(input))
+      (input: String) => convert(parseLine(input))
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
index 0b5ce65fed6df..f2f86a90d5172 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoder.scala
@@ -26,7 +26,7 @@ import org.apache.spark.sql.catalyst.{DeserializerBuildHelper, InternalRow, Java
 import org.apache.spark.sql.catalyst.analysis.{Analyzer, GetColumnByOrdinal, SimpleAnalyzer, UnresolvedAttribute, UnresolvedExtractValue}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder.{Deserializer, Serializer}
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, InitializeJavaBean, Invoke, NewInstance}
+import org.apache.spark.sql.catalyst.expressions.objects.{AssertNotNull, InitializeJavaBean, NewInstance}
 import org.apache.spark.sql.catalyst.optimizer.{ReassignLambdaVariableID, SimplifyCasts}
 import org.apache.spark.sql.catalyst.plans.logical.{CatalystSerde, DeserializeToObject, LeafNode, LocalRelation}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
@@ -54,9 +54,9 @@ object ExpressionEncoder {
 
   def apply[T](enc: AgnosticEncoder[T]): ExpressionEncoder[T] = {
     new ExpressionEncoder[T](
+      enc,
       SerializerBuildHelper.createSerializer(enc),
-      DeserializerBuildHelper.createDeserializer(enc),
-      enc.clsTag)
+      DeserializerBuildHelper.createDeserializer(enc))
   }
 
   def apply(schema: StructType): ExpressionEncoder[Row] = apply(schema, lenient = false)
@@ -70,107 +70,6 @@ object ExpressionEncoder {
      apply(JavaTypeInference.encoderFor(beanClass))
   }
 
-  /**
-   * Given a set of N encoders, constructs a new encoder that produce objects as items in an
-   * N-tuple.  Note that these encoders should be unresolved so that information about
-   * name/positional binding is preserved.
-   * When `useNullSafeDeserializer` is true, the deserialization result for a child will be null if
-   * the input is null. It is false by default as most deserializers handle null input properly and
-   * don't require an extra null check. Some of them are null-tolerant, such as the deserializer for
-   * `Option[T]`, and we must not set it to true in this case.
-   */
-  def tuple(
-      encoders: Seq[ExpressionEncoder[_]],
-      useNullSafeDeserializer: Boolean = false): ExpressionEncoder[_] = {
-    if (encoders.length > 22) {
-      throw QueryExecutionErrors.elementsOfTupleExceedLimitError()
-    }
-
-    encoders.foreach(_.assertUnresolved())
-
-    val cls = Utils.getContextOrSparkClassLoader.loadClass(s"scala.Tuple${encoders.size}")
-
-    val newSerializerInput = BoundReference(0, ObjectType(cls), nullable = true)
-    val serializers = encoders.zipWithIndex.map { case (enc, index) =>
-      val boundRefs = enc.objSerializer.collect { case b: BoundReference => b }.distinct
-      assert(boundRefs.size == 1, "object serializer should have only one bound reference but " +
-        s"there are ${boundRefs.size}")
-
-      val originalInputObject = boundRefs.head
-      val newInputObject = Invoke(
-        newSerializerInput,
-        s"_${index + 1}",
-        originalInputObject.dataType,
-        returnNullable = originalInputObject.nullable)
-
-      val newSerializer = enc.objSerializer.transformUp {
-        case BoundReference(0, _, _) => newInputObject
-      }
-
-      Alias(newSerializer, s"_${index + 1}")()
-    }
-    val newSerializer = CreateStruct(serializers)
-
-    def nullSafe(input: Expression, result: Expression): Expression = {
-      If(IsNull(input), Literal.create(null, result.dataType), result)
-    }
-
-    val newDeserializerInput = GetColumnByOrdinal(0, newSerializer.dataType)
-    val childrenDeserializers = encoders.zipWithIndex.map { case (enc, index) =>
-      val getColExprs = enc.objDeserializer.collect { case c: GetColumnByOrdinal => c }.distinct
-      assert(getColExprs.size == 1, "object deserializer should have only one " +
-        s"`GetColumnByOrdinal`, but there are ${getColExprs.size}")
-
-      val input = GetStructField(newDeserializerInput, index)
-      val childDeserializer = enc.objDeserializer.transformUp {
-        case GetColumnByOrdinal(0, _) => input
-      }
-
-      if (useNullSafeDeserializer && enc.objSerializer.nullable) {
-        nullSafe(input, childDeserializer)
-      } else {
-        childDeserializer
-      }
-    }
-    val newDeserializer =
-      NewInstance(cls, childrenDeserializers, ObjectType(cls), propagateNull = false)
-
-    new ExpressionEncoder[Any](
-      nullSafe(newSerializerInput, newSerializer),
-      nullSafe(newDeserializerInput, newDeserializer),
-      ClassTag(cls))
-  }
-
-  // Tuple1
-  def tuple[T](e: ExpressionEncoder[T]): ExpressionEncoder[Tuple1[T]] =
-    tuple(Seq(e)).asInstanceOf[ExpressionEncoder[Tuple1[T]]]
-
-  def tuple[T1, T2](
-      e1: ExpressionEncoder[T1],
-      e2: ExpressionEncoder[T2]): ExpressionEncoder[(T1, T2)] =
-    tuple(Seq(e1, e2)).asInstanceOf[ExpressionEncoder[(T1, T2)]]
-
-  def tuple[T1, T2, T3](
-      e1: ExpressionEncoder[T1],
-      e2: ExpressionEncoder[T2],
-      e3: ExpressionEncoder[T3]): ExpressionEncoder[(T1, T2, T3)] =
-    tuple(Seq(e1, e2, e3)).asInstanceOf[ExpressionEncoder[(T1, T2, T3)]]
-
-  def tuple[T1, T2, T3, T4](
-      e1: ExpressionEncoder[T1],
-      e2: ExpressionEncoder[T2],
-      e3: ExpressionEncoder[T3],
-      e4: ExpressionEncoder[T4]): ExpressionEncoder[(T1, T2, T3, T4)] =
-    tuple(Seq(e1, e2, e3, e4)).asInstanceOf[ExpressionEncoder[(T1, T2, T3, T4)]]
-
-  def tuple[T1, T2, T3, T4, T5](
-      e1: ExpressionEncoder[T1],
-      e2: ExpressionEncoder[T2],
-      e3: ExpressionEncoder[T3],
-      e4: ExpressionEncoder[T4],
-      e5: ExpressionEncoder[T5]): ExpressionEncoder[(T1, T2, T3, T4, T5)] =
-    tuple(Seq(e1, e2, e3, e4, e5)).asInstanceOf[ExpressionEncoder[(T1, T2, T3, T4, T5)]]
-
   private val anyObjectType = ObjectType(classOf[Any])
 
   /**
@@ -188,7 +87,7 @@ object ExpressionEncoder {
       }
       constructProjection(row).get(0, anyObjectType).asInstanceOf[T]
     } catch {
-      case e: SparkRuntimeException if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
+      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" =>
         throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionDecodingError(e, expressions)
@@ -216,7 +115,7 @@ object ExpressionEncoder {
       inputRow(0) = t
       extractProjection(inputRow)
     } catch {
-      case e: SparkRuntimeException if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
+      case e: SparkRuntimeException if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" =>
         throw e
       case e: Exception =>
         throw QueryExecutionErrors.expressionEncodingError(e, expressions)
@@ -228,6 +127,7 @@ object ExpressionEncoder {
  * A generic encoder for JVM objects that uses Catalyst Expressions for a `serializer`
  * and a `deserializer`.
  *
+ * @param encoder the `AgnosticEncoder` for type `T`.
  * @param objSerializer An expression that can be used to encode a raw object to corresponding
  *                   Spark SQL representation that can be a primitive column, array, map or a
  *                   struct. This represents how Spark SQL generally serializes an object of
@@ -236,13 +136,15 @@ object ExpressionEncoder {
  *                        representation. This represents how Spark SQL generally deserializes
  *                        a serialized value in Spark SQL representation back to an object of
  *                        type `T`.
- * @param clsTag A classtag for `T`.
  */
 case class ExpressionEncoder[T](
+    encoder: AgnosticEncoder[T],
     objSerializer: Expression,
-    objDeserializer: Expression,
-    clsTag: ClassTag[T])
-  extends Encoder[T] {
+    objDeserializer: Expression)
+  extends Encoder[T]
+  with ToAgnosticEncoder[T] {
+
+  override def clsTag: ClassTag[T] = encoder.clsTag
 
   /**
    * A sequence of expressions, one for each top-level field that can be used to
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/KryoSerializationCodecImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/KryoSerializationCodecImpl.scala
new file mode 100644
index 0000000000000..49c7b41f77472
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/KryoSerializationCodecImpl.scala
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.encoders
+
+import java.nio.ByteBuffer
+
+import org.apache.spark.sql.catalyst.expressions.objects.SerializerSupport
+
+/**
+ * A codec that uses Kryo to (de)serialize arbitrary objects to and from a byte array.
+ */
+class KryoSerializationCodecImpl extends Codec[Any, Array[Byte]] {
+  private val serializer = SerializerSupport.newSerializer(useKryo = true)
+  override def encode(in: Any): Array[Byte] =
+    serializer.serialize(in).array()
+
+  override def decode(out: Array[Byte]): Any =
+    serializer.deserialize(ByteBuffer.wrap(out))
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/package.scala
index 46e2a4098520a..73560420a07df 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/package.scala
@@ -31,6 +31,7 @@ package object encoders {
     case e: ExpressionEncoder[A] =>
       e.assertUnresolved()
       e
+    case a: AgnosticEncoder[A] => ExpressionEncoder(a)
     case other => throw QueryExecutionErrors.invalidExpressionEncoderError(other.getClass.getName)
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Between.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Between.scala
index de1122da646b7..c226e48c6be5e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Between.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Between.scala
@@ -21,7 +21,7 @@ import org.apache.spark.sql.internal.SQLConf
 
 // scalastyle:off line.size.limit
 @ExpressionDescription(
-  usage = "Usage: input [NOT] BETWEEN lower AND upper - evaluate if `input` is [not] in between `lower` and `upper`",
+  usage = "input [NOT] _FUNC_ lower AND upper - evaluate if `input` is [not] in between `lower` and `upper`",
   examples = """
     Examples:
       > SELECT 0.5 _FUNC_ 0.1 AND 1.0;
@@ -33,7 +33,7 @@ import org.apache.spark.sql.internal.SQLConf
       * lower - Lower bound of the between check.
       * upper - Upper bound of the between check.
   """,
-  since = "4.0.0",
+  since = "1.0.0",
   group = "conditional_funcs")
 case class Between private(input: Expression, lower: Expression, upper: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules  {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
index 13ea8c77c41b4..d38ee01485288 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflection.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch,
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
@@ -84,7 +84,7 @@ case class CallMethodViaReflection(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
               "inputName" -> toSQLId("class"),
-              "inputType" -> toSQLType(StringTypeAnyCollation),
+              "inputType" -> toSQLType(StringTypeWithCollation),
               "inputExpr" -> toSQLExpr(children.head)
             )
           )
@@ -97,7 +97,7 @@ case class CallMethodViaReflection(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
               "inputName" -> toSQLId("method"),
-              "inputType" -> toSQLType(StringTypeAnyCollation),
+              "inputType" -> toSQLType(StringTypeWithCollation),
               "inputExpr" -> toSQLExpr(children(1))
             )
           )
@@ -114,7 +114,8 @@ case class CallMethodViaReflection(
               "paramIndex" -> ordinalNumber(idx),
               "requiredType" -> toSQLType(
                 TypeCollection(BooleanType, ByteType, ShortType,
-                  IntegerType, LongType, FloatType, DoubleType, StringTypeAnyCollation)),
+                  IntegerType, LongType, FloatType, DoubleType,
+                  StringTypeWithCollation)),
               "inputSql" -> toSQLExpr(e),
               "inputType" -> toSQLType(e.dataType))
           )
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index 4a2b4b28e690e..154199d37c46d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -128,7 +128,10 @@ object Cast extends QueryErrorsBase {
     case (TimestampType, _: NumericType) => true
 
     case (VariantType, _) => variant.VariantGet.checkDataType(to)
-    case (_, VariantType) => variant.VariantGet.checkDataType(from)
+    // Structs and Maps can't be cast to Variants since the Variant spec does not yet contain
+    // lossless equivalents for these types. The `to_variant_object` expression can be used instead
+    // to convert data of these types to Variant Objects.
+    case (_, VariantType) => variant.VariantGet.checkDataType(from, allowStructsAndMaps = false)
 
     case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
       canAnsiCast(fromType, toType) && resolvableNullability(fn, tn)
@@ -237,7 +240,10 @@ object Cast extends QueryErrorsBase {
     case (_: NumericType, _: NumericType) => true
 
     case (VariantType, _) => variant.VariantGet.checkDataType(to)
-    case (_, VariantType) => variant.VariantGet.checkDataType(from)
+    // Structs and Maps can't be cast to Variants since the Variant spec does not yet contain
+    // lossless equivalents for these types. The `to_variant_object` expression can be used instead
+    // to convert data of these types to Variant Objects.
+    case (_, VariantType) => variant.VariantGet.checkDataType(from, allowStructsAndMaps = false)
 
     case (ArrayType(fromType, fn), ArrayType(toType, tn)) =>
       canCast(fromType, toType) &&
@@ -467,9 +473,9 @@ case class Cast(
   extends UnaryExpression
   with TimeZoneAwareExpression
   with ToStringBase
-  with NullIntolerant
   with SupportQueryContext
   with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   def this(child: Expression, dataType: DataType, timeZoneId: Option[String]) =
     this(child, dataType, timeZoneId, evalMode = EvalMode.fromSQLConf(SQLConf.get))
@@ -1012,7 +1018,7 @@ case class Cast(
         try doubleStr.toDouble catch {
           case _: NumberFormatException =>
             val d = Cast.processFloatingPointSpecialLiterals(doubleStr, false)
-            if(ansiEnabled && d == null) {
+            if (ansiEnabled && d == null) {
               throw QueryExecutionErrors.invalidInputInCastToNumberError(
                 DoubleType, s, getContextOrNull())
             } else {
@@ -1569,7 +1575,7 @@ case class Cast(
       val util = IntervalUtils.getClass.getCanonicalName.stripSuffix("$")
       (c, evPrim, evNull) =>
         code"""$evPrim = $util.safeStringToInterval($c);
-           if(${evPrim} == null) {
+           if (${evPrim} == null) {
              ${evNull} = true;
            }
          """.stripMargin
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala
index 6e400d026e0ee..5d2fd14eee298 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/CollationKey.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.util.CollationFactory
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 case class CollationKey(expr: Expression) extends UnaryExpression with ExpectsInputTypes {
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = BinaryType
 
   final lazy val collationId: Int = expr.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
index cd8f1bf1d688b..78f73f8778b86 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/EquivalentExpressions.scala
@@ -80,10 +80,6 @@ class EquivalentExpressions(
         case _ =>
           if (useCount > 0) {
             map.put(wrapper, ExpressionStats(expr)(useCount))
-          } else {
-            // Should not happen
-            throw SparkException.internalError(
-              s"Cannot update expression: $expr in map: $map with use count: $useCount")
           }
           false
       }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
index fde2093460876..e65a0200b064f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ExprUtils.scala
@@ -28,17 +28,20 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CharVarcharUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
-import org.apache.spark.sql.internal.types.{AbstractMapType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractMapType, StringTypeWithCollation}
 import org.apache.spark.sql.types.{DataType, MapType, StringType, StructType, VariantType}
 import org.apache.spark.unsafe.types.UTF8String
 
-object ExprUtils extends QueryErrorsBase {
+object ExprUtils extends EvalHelper with QueryErrorsBase {
 
   def evalTypeExpr(exp: Expression): DataType = {
     if (exp.foldable) {
-      exp.eval() match {
+      prepareForEval(exp).eval() match {
         case s: UTF8String if s != null =>
-          val dataType = DataType.fromDDL(s.toString)
+          val dataType = DataType.parseTypeWithFallback(
+            s.toString,
+            DataType.fromDDL,
+            DataType.fromJson)
           CharVarcharUtils.failIfHasCharVarchar(dataType)
         case _ => throw QueryCompilationErrors.unexpectedSchemaTypeError(exp)
 
@@ -58,7 +61,8 @@ object ExprUtils extends QueryErrorsBase {
 
   def convertToMapData(exp: Expression): Map[String, String] = exp match {
     case m: CreateMap
-      if AbstractMapType(StringTypeAnyCollation, StringTypeAnyCollation).acceptsType(m.dataType) =>
+      if AbstractMapType(StringTypeWithCollation, StringTypeWithCollation)
+        .acceptsType(m.dataType) =>
       val arrayMap = m.eval().asInstanceOf[ArrayBasedMapData]
       ArrayBasedMapData.toScalaMap(arrayMap).map { case (key, value) =>
         key.toString -> value.toString
@@ -79,7 +83,8 @@ object ExprUtils extends QueryErrorsBase {
     schema.getFieldIndex(columnNameOfCorruptRecord).foreach { corruptFieldIndex =>
       val f = schema(corruptFieldIndex)
       if (!f.dataType.isInstanceOf[StringType] || !f.nullable) {
-        throw QueryCompilationErrors.invalidFieldTypeForCorruptRecordError()
+        throw QueryCompilationErrors.invalidFieldTypeForCorruptRecordError(
+          columnNameOfCorruptRecord, f.dataType)
       }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
index de15ec43c4f31..2090aab3b1f5b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala
@@ -53,8 +53,6 @@ import org.apache.spark.sql.types._
  * - [[Unevaluable]]: an expression that is not supposed to be evaluated.
  * - [[CodegenFallback]]: an expression that does not have code gen implemented and falls back to
  *                        interpreted mode.
- * - [[NullIntolerant]]: an expression that is null intolerant (i.e. any null input will result in
- *                       null output).
  * - [[NonSQLExpression]]: a common base trait for the expressions that do not have SQL
  *                         expressions like representation. For example, `ScalaUDF`, `ScalaUDAF`,
  *                         and object `MapObjects` and `Invoke`.
@@ -141,6 +139,14 @@ abstract class Expression extends TreeNode[Expression] {
    */
   def stateful: Boolean = false
 
+
+  /**
+   * When an expression inherits this, meaning the expression is null intolerant (i.e. any null
+   * input will result in null output). We will use this information during constructing IsNotNull
+   * constraints.
+   */
+  def nullIntolerant: Boolean = false
+
   /**
    * Returns true if the expression could potentially throw an exception when evaluated.
    */
@@ -383,10 +389,10 @@ abstract class Expression extends TreeNode[Expression] {
 trait FoldableUnevaluable extends Expression {
   override def foldable: Boolean = true
 
-  final override def eval(input: InternalRow = null): Any =
+  override def eval(input: InternalRow = null): Any =
     throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
 
-  final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
     throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
 }
 
@@ -420,8 +426,12 @@ trait RuntimeReplaceable extends Expression {
   // are semantically equal.
   override lazy val canonicalized: Expression = replacement.canonicalized
 
-  final override def eval(input: InternalRow = null): Any =
-    throw QueryExecutionErrors.cannotEvaluateExpressionError(this)
+  final override def eval(input: InternalRow = null): Any = {
+    // For convenience, we allow to evaluate `RuntimeReplaceable` expressions, in case we need to
+    // get a constant from foldable expression before the query execution starts.
+    assert(input == null)
+    replacement.eval()
+  }
   final override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
     throw QueryExecutionErrors.cannotGenerateCodeForExpressionError(this)
 }
@@ -1347,9 +1357,21 @@ trait CommutativeExpression extends Expression {
   /** Collects adjacent commutative operations. */
   private def gatherCommutative(
       e: Expression,
-      f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] = e match {
-    case c: CommutativeExpression if f.isDefinedAt(c) => f(c).flatMap(gatherCommutative(_, f))
-    case other => other.canonicalized :: Nil
+      f: PartialFunction[CommutativeExpression, Seq[Expression]]): Seq[Expression] = {
+    val resultBuffer = scala.collection.mutable.Buffer[Expression]()
+    val stack = scala.collection.mutable.Stack[Expression](e)
+
+    // [SPARK-49977]: Use iterative approach to avoid creating many temporary List objects
+    // for deep expression trees through recursion.
+    while (stack.nonEmpty) {
+      stack.pop() match {
+        case c: CommutativeExpression if f.isDefinedAt(c) =>
+          stack.pushAll(f(c))
+        case other =>
+          resultBuffer += other.canonicalized
+      }
+    }
+    resultBuffer.toSeq
   }
 
   /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
index ab390618d4c5c..04d31b5797819 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PartitionTransforms.scala
@@ -17,7 +17,12 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types.{DataType, IntegerType}
 
 /**
@@ -36,8 +41,21 @@ import org.apache.spark.sql.types.{DataType, IntegerType}
 abstract class PartitionTransformExpression extends Expression with Unevaluable
   with UnaryLike[Expression] {
   override def nullable: Boolean = true
-}
 
+  override def eval(input: InternalRow): Any =
+    throw new SparkException(
+      errorClass = "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY",
+      messageParameters = Map("expression" -> toSQLExpr(this)),
+      cause = null
+    )
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    throw new SparkException(
+      errorClass = "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY",
+      messageParameters = Map("expression" -> toSQLExpr(this)),
+      cause = null
+    )
+}
 /**
  * Expression for the v2 partition transform years.
  */
@@ -74,6 +92,17 @@ case class Hours(child: Expression) extends PartitionTransformExpression {
  * Expression for the v2 partition transform bucket.
  */
 case class Bucket(numBuckets: Literal, child: Expression) extends PartitionTransformExpression {
+  def this(numBuckets: Expression, child: Expression) =
+    this(Bucket.expressionToNumBuckets(numBuckets, child), child)
+
   override def dataType: DataType = IntegerType
   override protected def withNewChildInternal(newChild: Expression): Bucket = copy(child = newChild)
 }
+
+private[sql] object Bucket {
+  def expressionToNumBuckets(numBuckets: Expression, e: Expression): Literal = numBuckets match {
+    case l @ Literal(_, IntegerType) => l
+    case _ =>
+      throw QueryCompilationErrors.invalidBucketsNumberError(numBuckets.toString, e.toString)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala
index 4fa9d746f25d6..53273b29a7c17 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/PythonUDF.scala
@@ -20,10 +20,9 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkException.internalError
 import org.apache.spark.api.python.{PythonEvalType, PythonFunction}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedException
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
-import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
 import org.apache.spark.sql.catalyst.trees.TreePattern.{PYTHON_UDF, TreePattern}
 import org.apache.spark.sql.catalyst.util.toPrettySQL
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -64,23 +63,6 @@ trait PythonFuncExpression extends NonSQLExpression with UserDefinedExpression {
   override def toString: String = s"$name(${children.mkString(", ")})#${resultId.id}$typeSuffix"
 
   override def nullable: Boolean = true
-
-  override def checkInputDataTypes(): TypeCheckResult = {
-    val check = super.checkInputDataTypes()
-    if (check.isFailure) {
-      check
-    } else {
-      val exprReturningVariant = children.collectFirst {
-        case e: Expression if VariantExpressionEvalUtils.typeContainsVariant(e.dataType) => e
-      }
-      exprReturningVariant match {
-        case Some(e) => TypeCheckResult.DataTypeMismatch(
-          errorSubClass = "UNSUPPORTED_UDF_INPUT_TYPE",
-          messageParameters = Map("dataType" -> s"${e.dataType.sql}"))
-        case None => TypeCheckResult.TypeCheckSuccess
-      }
-    }
-  }
 }
 
 /**
@@ -97,10 +79,6 @@ case class PythonUDF(
     resultId: ExprId = NamedExpression.newExprId)
   extends Expression with PythonFuncExpression with Unevaluable {
 
-  if (VariantExpressionEvalUtils.typeContainsVariant(dataType)) {
-    throw QueryCompilationErrors.unsupportedUDFOuptutType(this, dataType)
-  }
-
   lazy val resultAttribute: Attribute = AttributeReference(toPrettySQL(this), dataType, nullable)(
     exprId = resultId)
 
@@ -143,10 +121,6 @@ case class PythonUDAF(
     resultId: ExprId = NamedExpression.newExprId)
   extends UnevaluableAggregateFunc with PythonFuncExpression {
 
-  if (VariantExpressionEvalUtils.typeContainsVariant(dataType)) {
-    throw QueryCompilationErrors.unsupportedUDFOuptutType(this, dataType)
-  }
-
   override def evalType: Int = PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF
 
   override def sql(isDistinct: Boolean): String = {
@@ -213,13 +187,6 @@ case class PythonUDTF(
     pythonUDTFPartitionColumnIndexes: Option[PythonUDTFPartitionColumnIndexes] = None)
   extends UnevaluableGenerator with PythonFuncExpression {
 
-  elementSchema.collectFirst {
-    case sf: StructField if VariantExpressionEvalUtils.typeContainsVariant(sf.dataType) => sf
-  } match {
-    case Some(sf) => throw QueryCompilationErrors.unsupportedUDFOuptutType(this, sf.dataType)
-    case None =>
-  }
-
   override lazy val canonicalized: Expression = {
     val canonicalizedChildren = children.map(_.canonicalized)
     // `resultId` can be seen as cosmetic variation in PythonUDTF, as it doesn't affect the result.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
index 021f119e0a1a6..e39ff458ddc87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SessionWindow.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.trees.TreePattern.{SESSION_WINDOW, TreePattern}
-import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * Represent the session window.
@@ -105,12 +103,4 @@ case class SessionWindow(timeColumn: Expression, gapDuration: Expression) extend
 
 object SessionWindow {
   val marker = "spark.sessionWindow"
-
-  def apply(
-      timeColumn: Expression,
-      gapDuration: String): SessionWindow = {
-    SessionWindow(timeColumn,
-      Literal(IntervalUtils.safeStringToInterval(UTF8String.fromString(gapDuration)),
-        CalendarIntervalType))
-  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
index 673f9397bb03f..65d9e238eb502 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala
@@ -237,7 +237,8 @@ object TimeWindow {
 case class PreciseTimestampConversion(
     child: Expression,
     fromType: DataType,
-    toType: DataType) extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+    toType: DataType) extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[AbstractDataType] = Seq(fromType)
   override def dataType: DataType = toType
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToJavaArray.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToJavaArray.scala
new file mode 100644
index 0000000000000..8cf3cdef16c0e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/ToJavaArray.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import java.lang.reflect.{Array => JArray}
+
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
+import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.types._
+import org.apache.spark.util.Utils
+
+/**
+ * This expression converts data of `ArrayData` to an array of java type.
+ *
+ * NOTE: When the data type of expression is `ArrayType`, and the expression is foldable,
+ * the `ConstantFolding` can do constant folding optimization automatically,
+ * (avoiding frequent calls to `ArrayData.to{XXX}Array()`).
+ */
+case class ToJavaArray(array: Expression)
+  extends UnaryExpression
+  with RuntimeReplaceable
+  with QueryErrorsBase {
+
+  override def checkInputDataTypes(): TypeCheckResult = array.dataType match {
+    case ArrayType(_, _) =>
+      TypeCheckResult.TypeCheckSuccess
+    case _ =>
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> ordinalNumber(0),
+          "requiredType" -> toSQLType(ArrayType),
+          "inputSql" -> toSQLExpr(array),
+          "inputType" -> toSQLType(array.dataType))
+      )
+  }
+
+  override def foldable: Boolean = array.foldable
+  override def nullIntolerant: Boolean = true
+
+  override def child: Expression = array
+  override def prettyName: String = "to_java_array"
+
+  private def resultArrayElementNullable: Boolean =
+    array.dataType.asInstanceOf[ArrayType].containsNull
+  private def isPrimitiveType: Boolean = CodeGenerator.isPrimitiveType(elementType)
+  private def canPerformFast: Boolean = isPrimitiveType && !resultArrayElementNullable
+
+  @transient lazy val elementType: DataType =
+    array.dataType.asInstanceOf[ArrayType].elementType
+  @transient private lazy val elementObjectType = ObjectType(classOf[DataType])
+  @transient private lazy val elementCls: Class[_] = {
+    if (canPerformFast) {
+      CodeGenerator.javaClass(elementType)
+    } else if (isPrimitiveType) {
+      Utils.classForName(s"java.lang.${CodeGenerator.boxedType(elementType)}")
+    } else {
+      classOf[Object]
+    }
+  }
+  @transient private lazy val returnCls = JArray.newInstance(elementCls, 0).getClass
+
+  override def dataType: DataType = ObjectType(returnCls)
+
+  override def replacement: Expression = {
+    if (isPrimitiveType) {
+      val funcNamePrefix = if (resultArrayElementNullable) "toBoxed" else "to"
+      val funcName = s"$funcNamePrefix${CodeGenerator.boxedType(elementType)}Array"
+      StaticInvoke(
+        classOf[ToJavaArrayUtils],
+        dataType,
+        funcName,
+        Seq(array),
+        Seq(array.dataType))
+    } else {
+      Invoke(
+        array,
+        "toObjectArray",
+        dataType,
+        Seq(Literal(elementType, elementObjectType)),
+        Seq(elementObjectType))
+    }
+  }
+
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(array = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
index b7d0ffdb75fb0..7c84773006c26 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TryEval.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGe
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.types.{DataType, NumericType}
 
-case class TryEval(child: Expression) extends UnaryExpression with NullIntolerant {
+case class TryEval(child: Expression) extends UnaryExpression {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val childGen = child.genCode(ctx)
     ev.copy(code = code"""
@@ -48,6 +48,7 @@ case class TryEval(child: Expression) extends UnaryExpression with NullIntoleran
   override def dataType: DataType = child.dataType
 
   override def nullable: Boolean = true
+  override def nullIntolerant: Boolean = true
 
   override protected def withNewChildInternal(newChild: Expression): Expression =
     copy(child = newChild)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
index 220920a5a3198..d14c8cb675387 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/V2ExpressionUtils.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.connector.catalog.{FunctionCatalog, Identifier}
 import org.apache.spark.sql.connector.catalog.functions._
 import org.apache.spark.sql.connector.catalog.functions.ScalarFunction.MAGIC_METHOD_NAME
 import org.apache.spark.sql.connector.expressions.{BucketTransform, Expression => V2Expression, FieldReference, IdentityTransform, Literal => V2Literal, NamedReference, NamedTransform, NullOrdering => V2NullOrdering, SortDirection => V2SortDirection, SortOrder => V2SortOrder, SortValue, Transform}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
@@ -182,8 +183,8 @@ object V2ExpressionUtils extends SQLConfHelper with Logging {
             ApplyFunctionExpression(scalarFunc, arguments)
           case _ =>
             throw new AnalysisException(
-              errorClass = "_LEGACY_ERROR_TEMP_3055",
-              messageParameters = Map("scalarFunc" -> scalarFunc.name()))
+              errorClass = "SCALAR_FUNCTION_NOT_FULLY_IMPLEMENTED",
+              messageParameters = Map("scalarFunc" -> toSQLId(scalarFunc.name())))
         }
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
index 4987e31b49911..8ad062ab0e2f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/ApproximatePercentile.scala
@@ -21,6 +21,7 @@ import java.nio.ByteBuffer
 
 import com.google.common.primitives.{Doubles, Ints, Longs}
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
@@ -32,7 +33,6 @@ import org.apache.spark.sql.catalyst.types.PhysicalNumericType
 import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.catalyst.util.QuantileSummaries.{defaultCompressThreshold, Stats}
-import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
 
@@ -189,7 +189,7 @@ case class ApproximatePercentile(
           PhysicalNumericType.numeric(n)
             .toDouble(value.asInstanceOf[PhysicalNumericType#InternalType])
         case other: DataType =>
-          throw QueryExecutionErrors.dataTypeUnexpectedError(other)
+          throw SparkException.internalError(s"Unexpected data type $other")
       }
       buffer.add(doubleValue)
     }
@@ -214,7 +214,7 @@ case class ApproximatePercentile(
       case DoubleType => doubleResult
       case _: DecimalType => doubleResult.map(Decimal(_))
       case other: DataType =>
-        throw QueryExecutionErrors.dataTypeUnexpectedError(other)
+        throw SparkException.internalError(s"Unexpected data type $other")
     }
     if (result.length == 0) {
       null
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
index 20fc3c45e5870..99fb512ea047d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CentralMomentAgg.scala
@@ -351,6 +351,9 @@ case class PandasStddev(
     ddof: Int)
   extends CentralMomentAgg(child, true) {
 
+  def this(child: Expression, ddof: Expression) =
+    this(child, PandasAggregate.expressionToDDOF(ddof, "pandas_stddev"))
+
   override protected def momentOrder = 2
 
   override val evaluateExpression: Expression = {
@@ -373,6 +376,9 @@ case class PandasVariance(
     ddof: Int)
   extends CentralMomentAgg(child, true) {
 
+  def this(child: Expression, ddof: Expression) =
+    this(child, PandasAggregate.expressionToDDOF(ddof, "pandas_variance"))
+
   override protected def momentOrder = 2
 
   override val evaluateExpression: Expression = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
index c26c4a9bdfea3..f0a27677628dc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/CountMinSketchAgg.scala
@@ -63,7 +63,10 @@ case class CountMinSketchAgg(
   // Mark as lazy so that they are not evaluated during tree transformation.
   private lazy val eps: Double = epsExpression.eval().asInstanceOf[Double]
   private lazy val confidence: Double = confidenceExpression.eval().asInstanceOf[Double]
-  private lazy val seed: Int = seedExpression.eval().asInstanceOf[Int]
+  private lazy val seed: Int = seedExpression.eval() match {
+    case i: Int => i
+    case l: Long => l.toInt
+  }
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val defaultCheck = super.checkInputDataTypes()
@@ -168,7 +171,8 @@ case class CountMinSketchAgg(
     copy(inputAggBufferOffset = newInputAggBufferOffset)
 
   override def inputTypes: Seq[AbstractDataType] = {
-    Seq(TypeCollection(IntegralType, StringType, BinaryType), DoubleType, DoubleType, IntegerType)
+    Seq(TypeCollection(IntegralType, StringType, BinaryType), DoubleType, DoubleType,
+      TypeCollection(IntegerType, LongType))
   }
 
   override def nullable: Boolean = false
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
index 17fdfadc3e21f..8e03daee9b659 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Covariance.scala
@@ -155,6 +155,9 @@ case class PandasCovar(
     ddof: Int)
   extends Covariance(left, right, true) {
 
+  def this(left: Expression, right: Expression, ddof: Expression) =
+    this(left, right, PandasAggregate.expressionToDDOF(ddof, "pandas_covar"))
+
   override val evaluateExpression: Expression = {
     If(n === 0.0, Literal.create(null, DoubleType),
       If(n === ddof.toDouble, divideByZeroEvalResult, ck / (n - ddof.toDouble)))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
index 4fe00099ddc91..9a39a6fe98796 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/First.scala
@@ -104,7 +104,7 @@ case class First(child: Expression, ignoreNulls: Boolean)
 
   override lazy val mergeExpressions: Seq[Expression] = {
     // For first, we can just check if valueSet.left is set to true. If it is set
-    // to true, we use first.right. If not, we use first.right (even if valueSet.right is
+    // to true, we use first.left. If not, we use first.right (even if valueSet.right is
     // false, we are safe to do so because first.right will be null in this case).
     Seq(
       /* first = */ If(valueSet.left, first.left, first.right),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
index ba26c5a1022d0..eda2c742ab4b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/HistogramNumeric.scala
@@ -143,7 +143,8 @@ case class HistogramNumeric(
     if (buffer.getUsedBins < 1) {
       null
     } else {
-      val result = (0 until buffer.getUsedBins).map { index =>
+      val array = new Array[AnyRef](buffer.getUsedBins)
+      (0 until buffer.getUsedBins).foreach { index =>
         // Note that the 'coord.x' and 'coord.y' have double-precision floating point type here.
         val coord = buffer.getBin(index)
         if (propagateInputType) {
@@ -163,16 +164,16 @@ case class HistogramNumeric(
               coord.x.toLong
             case _ => coord.x
           }
-          InternalRow.apply(result, coord.y)
+          array(index) = InternalRow.apply(result, coord.y)
         } else {
           // Otherwise, just apply the double-precision values in 'coord.x' and 'coord.y' to the
           // output row directly. In this case: 'SELECT histogram_numeric(val, 3)
           // FROM VALUES (0L), (1L), (2L), (10L) AS tab(col)' returns an array of structs where the
           // first field has DoubleType.
-          InternalRow.apply(coord.x, coord.y)
+          array(index) = InternalRow.apply(coord.x, coord.y)
         }
       }
-      new GenericArrayData(result)
+      new GenericArrayData(array)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
index 5977eff4526da..97add0b8e45bc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Mode.scala
@@ -17,14 +17,17 @@
 
 package org.apache.spark.sql.catalyst.expressions.aggregate
 
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, TypeCheckResult, UnresolvedWithinGroup}
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, UnresolvedWithinGroup}
 import org.apache.spark.sql.catalyst.expressions.{Ascending, Descending, Expression, ExpressionDescription, ImplicitCastInputTypes, SortOrder}
+import org.apache.spark.sql.catalyst.expressions.Cast.toSQLExpr
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.types.PhysicalDataType
-import org.apache.spark.sql.catalyst.util.{CollationFactory, GenericArrayData, UnsafeRowUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, GenericArrayData, MapData, UnsafeRowUtils}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, BooleanType, DataType, StringType}
+import org.apache.spark.sql.types.{AbstractDataType, AnyDataType, ArrayType, BooleanType, DataType, MapType, StringType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.collection.OpenHashMap
 
@@ -49,21 +52,6 @@ case class Mode(
 
   override def inputTypes: Seq[AbstractDataType] = Seq(AnyDataType)
 
-  override def checkInputDataTypes(): TypeCheckResult = {
-    if (UnsafeRowUtils.isBinaryStable(child.dataType) || child.dataType.isInstanceOf[StringType]) {
-      /*
-        * The Mode class uses collation awareness logic to handle string data.
-        * Complex types with collated fields are not yet supported.
-       */
-      // TODO: SPARK-48700: Mode expression for complex types (all collations)
-      super.checkInputDataTypes()
-    } else {
-      TypeCheckResult.TypeCheckFailure("The input to the function 'mode' was" +
-        " a type of binary-unstable type that is " +
-        s"not currently supported by ${prettyName}.")
-    }
-  }
-
   override def prettyName: String = "mode"
 
   override def update(
@@ -86,6 +74,65 @@ case class Mode(
     buffer
   }
 
+  private def getCollationAwareBuffer(
+      childDataType: DataType,
+      buffer: OpenHashMap[AnyRef, Long]): Iterable[(AnyRef, Long)] = {
+    def groupAndReduceBuffer(groupingFunction: AnyRef => _): Iterable[(AnyRef, Long)] = {
+      buffer.groupMapReduce(t =>
+        groupingFunction(t._1))(x => x)((x, y) => (x._1, x._2 + y._2)).values
+    }
+    def determineBufferingFunction(
+        childDataType: DataType): Option[AnyRef => _] = {
+      childDataType match {
+        case _ if UnsafeRowUtils.isBinaryStable(child.dataType) => None
+        case _ => Some(collationAwareTransform(_, childDataType))
+      }
+    }
+    determineBufferingFunction(childDataType).map(groupAndReduceBuffer).getOrElse(buffer)
+  }
+
+  protected[sql] def collationAwareTransform(data: AnyRef, dataType: DataType): AnyRef = {
+    dataType match {
+      case _ if UnsafeRowUtils.isBinaryStable(dataType) => data
+      case st: StructType =>
+        processStructTypeWithBuffer(data.asInstanceOf[InternalRow].toSeq(st).zip(st.fields))
+      case at: ArrayType => processArrayTypeWithBuffer(at, data.asInstanceOf[ArrayData])
+      case mt: MapType => processMapTypeWithBuffer(mt, data.asInstanceOf[MapData])
+      case st: StringType =>
+        CollationFactory.getCollationKey(data.asInstanceOf[UTF8String], st.collationId)
+      case _ =>
+        throw new SparkIllegalArgumentException(
+          errorClass = "COMPLEX_EXPRESSION_UNSUPPORTED_INPUT.BAD_INPUTS",
+          messageParameters = Map(
+            "expression" -> toSQLExpr(this),
+            "functionName" -> toSQLType(prettyName),
+            "dataType" -> toSQLType(child.dataType))
+        )
+    }
+  }
+
+  private def processStructTypeWithBuffer(
+      tuples: Seq[(Any, StructField)]): Seq[Any] = {
+    tuples.map(t => collationAwareTransform(t._1.asInstanceOf[AnyRef], t._2.dataType))
+  }
+
+  private def processArrayTypeWithBuffer(
+      a: ArrayType,
+      data: ArrayData): Seq[Any] = {
+    (0 until data.numElements()).map(i =>
+      collationAwareTransform(data.get(i, a.elementType), a.elementType))
+  }
+
+  private def processMapTypeWithBuffer(mt: MapType, data: MapData): Map[Any, Any] = {
+    val transformedKeys = (0 until data.numElements()).map { i =>
+      collationAwareTransform(data.keyArray().get(i, mt.keyType), mt.keyType)
+    }
+    val transformedValues = (0 until data.numElements()).map { i =>
+      collationAwareTransform(data.valueArray().get(i, mt.valueType), mt.valueType)
+    }
+    transformedKeys.zip(transformedValues).toMap
+  }
+
   override def eval(buffer: OpenHashMap[AnyRef, Long]): Any = {
     if (buffer.isEmpty) {
       return null
@@ -102,17 +149,11 @@ case class Mode(
       *  to a single value (the sum of the counts), and finally reduces the groups to a single map.
       *
       * The new map is then used in the rest of the Mode evaluation logic.
+      *
+      * It is expected to work for all simple and complex types with collated fields.
       */
-    val collationAwareBuffer = child.dataType match {
-      case c: StringType if
-        !CollationFactory.fetchCollation(c.collationId).supportsBinaryEquality =>
-        val collationId = c.collationId
-        val modeMap = buffer.toSeq.groupMapReduce {
-         case (k, _) => CollationFactory.getCollationKey(k.asInstanceOf[UTF8String], collationId)
-        }(x => x)((x, y) => (x._1, x._2 + y._2)).values
-        modeMap
-      case _ => buffer
-    }
+    val collationAwareBuffer = getCollationAwareBuffer(child.dataType, buffer)
+
     reverseOpt.map { reverse =>
       val defaultKeyOrdering = if (reverse) {
         PhysicalDataType.ordering(child.dataType).asInstanceOf[Ordering[AnyRef]].reverse
@@ -245,6 +286,9 @@ case class PandasMode(
 
   def this(child: Expression) = this(child, true, 0, 0)
 
+  def this(child: Expression, ignoreNA: Expression) =
+    this(child, PandasAggregate.expressionToIgnoreNA(ignoreNA, "pandas_mode"))
+
   // Returns empty array for empty inputs
   override def nullable: Boolean = false
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PandasAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PandasAggregate.scala
new file mode 100644
index 0000000000000..07f00ad03504e
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PandasAggregate.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.aggregate
+
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+private[expressions] object PandasAggregate {
+  def expressionToIgnoreNA(e: Expression, source: String): Boolean = e.eval() match {
+    case b: Boolean => b
+    case _ => throw QueryCompilationErrors.invalidIgnoreNAParameter(source, e)
+  }
+
+  def expressionToDDOF(e: Expression, source: String): Int = e.eval() match {
+    case l: Long => l.toInt
+    case i: Int => i
+    case s: Short => s.toInt
+    case b: Byte => b.toInt
+    case _ => throw QueryCompilationErrors.invalidDdofParameter(source, e)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
index 3325c8f16a4f1..305ef8128f13c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Product.scala
@@ -76,6 +76,9 @@ case class PandasProduct(
     ignoreNA: Boolean)
     extends DeclarativeAggregate with ImplicitCastInputTypes with UnaryLike[Expression] {
 
+  def this(child: Expression, ignoreNA: Expression) =
+    this(child, PandasAggregate.expressionToIgnoreNA(ignoreNA, "pandas_product"))
+
   override def nullable: Boolean = !ignoreNA
 
   override def dataType: DataType = child.dataType match {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
index bb3460e6ccefe..3aaf353043a9a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/collect.scala
@@ -25,8 +25,8 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils}
-import org.apache.spark.sql.errors.QueryErrorsBase
+import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData, TypeUtils, UnsafeRowUtils}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.BoundedPriorityQueue
 
@@ -144,6 +144,7 @@ case class CollectList(
   """,
   group = "agg_funcs",
   since = "2.0.0")
+// TODO: Make CollectSet collation aware
 case class CollectSet(
     child: Expression,
     mutableAggBufferOffset: Int = 0,
@@ -170,21 +171,22 @@ case class CollectSet(
   override def eval(buffer: mutable.HashSet[Any]): Any = {
     val array = child.dataType match {
       case BinaryType =>
-        buffer.iterator.map(_.asInstanceOf[ArrayData].toByteArray()).toArray
+        buffer.iterator.map(_.asInstanceOf[ArrayData].toByteArray()).toArray[Any]
       case _ => buffer.toArray
     }
     new GenericArrayData(array)
   }
 
   override def checkInputDataTypes(): TypeCheckResult = {
-    if (!child.dataType.existsRecursively(_.isInstanceOf[MapType])) {
+    if (!child.dataType.existsRecursively(_.isInstanceOf[MapType]) &&
+        UnsafeRowUtils.isBinaryStable(child.dataType)) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       DataTypeMismatch(
         errorSubClass = "UNSUPPORTED_INPUT_TYPE",
         messageParameters = Map(
           "functionName" -> toSQLId(prettyName),
-          "dataType" -> toSQLType(MapType)
+          "dataType" -> (s"${toSQLType(MapType)} " + "or \"COLLATED STRING\"")
         )
       )
     }
@@ -219,6 +221,9 @@ case class CollectTopK(
   def this(child: Expression, num: Int) = this(child, num, false, 0, 0)
   def this(child: Expression, num: Int, reverse: Boolean) = this(child, num, reverse, 0, 0)
 
+  def this(child: Expression, num: Expression, reverse: Expression) =
+    this(child, CollectTopK.expressionToNum(num), CollectTopK.expressionToReverse(reverse))
+
   override protected lazy val bufferElementType: DataType = child.dataType
   override protected def convertToBufferElement(value: Any): Any = InternalRow.copyValue(value)
 
@@ -245,3 +250,18 @@ case class CollectTopK(
   override def withNewInputAggBufferOffset(newInputAggBufferOffset: Int): CollectTopK =
     copy(inputAggBufferOffset = newInputAggBufferOffset)
 }
+
+private[aggregate] object CollectTopK {
+  def expressionToReverse(e: Expression): Boolean = e.eval() match {
+    case b: Boolean => b
+    case _ => throw QueryCompilationErrors.invalidReverseParameter(e)
+  }
+
+  def expressionToNum(e: Expression): Int = e.eval() match {
+    case l: Long => l.toInt
+    case i: Int => i
+    case s: Short => s.toInt
+    case b: Byte => b.toInt
+    case _ => throw QueryCompilationErrors.invalidNumParameter(e)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
index 2102428131f64..cbc8a8f273e49 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/datasketchesAggregates.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression,
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types.{AbstractDataType, BinaryType, BooleanType, DataType, IntegerType, LongType, StringType, TypeCollection}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -105,7 +105,13 @@ case class HllSketchAgg(
   override def prettyName: String = "hll_sketch_agg"
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(IntegerType, LongType, StringTypeAnyCollation, BinaryType), IntegerType)
+    Seq(
+      TypeCollection(
+        IntegerType,
+        LongType,
+        StringTypeWithCollation(supportsTrimCollation = true),
+        BinaryType),
+      IntegerType)
 
   override def dataType: DataType = BinaryType
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
index d82ffd12b9df2..352aa76397be0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/linearRegression.scala
@@ -292,7 +292,7 @@ case class RegrSlope(left: Expression, right: Expression) extends DeclarativeAgg
   override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
 
   override lazy val updateExpressions: Seq[Expression] = {
-    // RegrSlope only handles paris where both y and x are non-empty, so we need additional
+    // RegrSlope only handles pairs where both y and x are non-empty, so we need additional
     // judgment for calculating VariancePop.
     val isNull = left.isNull || right.isNull
     covarPop.updateExpressions ++ varPop.updateExpressions.zip(varPop.aggBufferAttributes).map {
@@ -355,7 +355,7 @@ case class RegrIntercept(left: Expression, right: Expression) extends Declarativ
   override lazy val initialValues: Seq[Expression] = covarPop.initialValues ++ varPop.initialValues
 
   override lazy val updateExpressions: Seq[Expression] = {
-    // RegrIntercept only handles paris where both y and x are non-empty, so we need additional
+    // RegrIntercept only handles pairs where both y and x are non-empty, so we need additional
     // judgment for calculating VariancePop.
     val isNull = left.isNull || right.isNull
     covarPop.updateExpressions ++ varPop.updateExpressions.zip(varPop.aggBufferAttributes).map {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
index 77d2bc5b42282..89a6984b80852 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/percentiles.scala
@@ -133,7 +133,8 @@ abstract class PercentileBase
       if (frqLong > 0) {
         buffer.changeValue(key, frqLong, _ + frqLong)
       } else if (frqLong < 0) {
-        throw QueryExecutionErrors.negativeValueUnexpectedError(frequencyExpression)
+        throw QueryExecutionErrors.negativeValueUnexpectedError(
+          frequencyExpression, frqLong)
       }
     }
     buffer
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
index f889c3ebc4d9e..f9e8b6a17896f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala
@@ -46,7 +46,8 @@ import org.apache.spark.unsafe.types.CalendarInterval
 case class UnaryMinus(
     child: Expression,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(child: Expression) = this(child, SQLConf.get.ansiEnabled)
 
@@ -114,7 +115,7 @@ case class UnaryMinus(
   since = "1.5.0",
   group = "math_funcs")
 case class UnaryPositive(child: Expression)
-  extends RuntimeReplaceable with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with RuntimeReplaceable with ImplicitCastInputTypes {
 
   override def prettyName: String = "positive"
 
@@ -126,11 +127,8 @@ case class UnaryPositive(child: Expression)
 
   override lazy val replacement: Expression = child
 
-  override protected def withNewChildrenInternal(
-      newChildren: IndexedSeq[Expression]): UnaryPositive =
-    copy(newChildren.head)
-
-  override def children: Seq[Expression] = child :: Nil
+  override protected def withNewChildInternal(newChild: Expression): Expression =
+    copy(child = newChild)
 }
 
 /**
@@ -148,7 +146,8 @@ case class UnaryPositive(child: Expression)
   since = "1.2.0",
   group = "math_funcs")
 case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(child: Expression) = this(child, SQLConf.get.ansiEnabled)
 
@@ -185,8 +184,8 @@ case class Abs(child: Expression, failOnError: Boolean = SQLConf.get.ansiEnabled
   override protected def withNewChildInternal(newChild: Expression): Abs = copy(child = newChild)
 }
 
-abstract class BinaryArithmetic extends BinaryOperator
-  with NullIntolerant with SupportQueryContext {
+abstract class BinaryArithmetic extends BinaryOperator with SupportQueryContext {
+  override def nullIntolerant: Boolean = true
 
   protected val evalMode: EvalMode.Value
 
@@ -294,12 +293,18 @@ abstract class BinaryArithmetic extends BinaryOperator
     case ByteType | ShortType =>
       nullSafeCodeGen(ctx, ev, (eval1, eval2) => {
         val tmpResult = ctx.freshName("tmpResult")
+        val try_suggestion = symbol match {
+          case "+" => "try_add"
+          case "-" => "try_subtract"
+          case "*" => "try_multiply"
+          case _ => "unknown_function"
+        }
         val overflowCheck = if (failOnError) {
           val javaType = CodeGenerator.boxedType(dataType)
           s"""
              |if ($tmpResult < $javaType.MIN_VALUE || $tmpResult > $javaType.MAX_VALUE) {
              |  throw QueryExecutionErrors.binaryArithmeticCauseOverflowError(
-             |  $eval1, "$symbol", $eval2);
+             |  $eval1, "$symbol", $eval2, "$try_suggestion");
              |}
            """.stripMargin
         } else {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
index 88085636a5ff1..26743ca6ff15e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala
@@ -176,8 +176,8 @@ case class BitwiseXor(left: Expression, right: Expression) extends BinaryArithme
   since = "1.4.0",
   group = "bitwise_funcs")
 case class BitwiseNot(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType)
 
   override def dataType: DataType = child.dataType
@@ -218,7 +218,8 @@ case class BitwiseNot(child: Expression)
   since = "3.0.0",
   group = "bitwise_funcs")
 case class BitwiseCount(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegralType, BooleanType))
 
@@ -269,7 +270,8 @@ object BitwiseGetUtil {
   since = "3.2.0",
   group = "bitwise_funcs")
 case class BitwiseGet(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegralType, IntegerType)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index a39c10866984e..30c00f5bf96b8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.catalyst.encoders.HashableWeakReference
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.types._
-import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, CollationSupport, MapData, SQLOrderingUtil, UnsafeRowUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationAwareUTF8String, CollationFactory, CollationSupport, MapData, SQLOrderingUtil, UnsafeRowUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -1529,6 +1529,7 @@ object CodeGenerator extends Logging {
       classOf[TaskContext].getName,
       classOf[TaskKilledException].getName,
       classOf[InputMetrics].getName,
+      classOf[CollationAwareUTF8String].getName,
       classOf[CollationFactory].getName,
       classOf[CollationSupport].getName,
       QueryExecutionErrors.getClass.getName.stripSuffix("$")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
index 7a428e8ed45a9..c75bf30ad21f7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collationExpressions.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 
 // scalastyle:off line.contains.tab
@@ -36,24 +36,14 @@ import org.apache.spark.sql.types._
   """,
   examples = """
     Examples:
-      > SET spark.sql.collation.enabled=true;
-      spark.sql.collation.enabled	true
       > SELECT COLLATION('Spark SQL' _FUNC_ UTF8_LCASE);
       UTF8_LCASE
-      > SET spark.sql.collation.enabled=false;
-      spark.sql.collation.enabled	false
   """,
   since = "4.0.0",
   group = "string_funcs")
 // scalastyle:on line.contains.tab
 object CollateExpressionBuilder extends ExpressionBuilder {
   override def build(funcName: String, expressions: Seq[Expression]): Expression = {
-    // We need to throw collationNotEnabledError before unexpectedNullError
-    // and nonFoldableArgumentError, as we do not want user to see misleading
-    // messages that collation is enabled
-    if (!SQLConf.get.collationEnabled) {
-      throw QueryCompilationErrors.collationNotEnabledError()
-    }
     expressions match {
       case Seq(e: Expression, collationExpr: Expression) =>
         (collationExpr.dataType, collationExpr.foldable) match {
@@ -62,6 +52,10 @@ object CollateExpressionBuilder extends ExpressionBuilder {
             if (evalCollation == null) {
               throw QueryCompilationErrors.unexpectedNullError("collation", collationExpr)
             } else {
+              if (!SQLConf.get.trimCollationEnabled &&
+                evalCollation.toString.toUpperCase().contains("TRIM")) {
+                throw QueryCompilationErrors.trimCollationNotEnabledError()
+              }
               Collate(e, evalCollation.toString)
             }
           case (_: StringType, false) => throw QueryCompilationErrors.nonFoldableArgumentError(
@@ -83,7 +77,8 @@ case class Collate(child: Expression, collationName: String)
   extends UnaryExpression with ExpectsInputTypes {
   private val collationId = CollationFactory.collationNameToId(collationName)
   override def dataType: DataType = StringType(collationId)
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override protected def withNewChildInternal(
     newChild: Expression): Expression = copy(newChild)
@@ -107,12 +102,8 @@ case class Collate(child: Expression, collationName: String)
   """,
   examples = """
     Examples:
-      > SET spark.sql.collation.enabled=true;
-      spark.sql.collation.enabled	true
       > SELECT _FUNC_('Spark SQL');
       UTF8_BINARY
-      > SET spark.sql.collation.enabled=false;
-      spark.sql.collation.enabled	false
   """,
   since = "4.0.0",
   group = "string_funcs")
@@ -125,5 +116,6 @@ case class Collation(child: Expression)
     val collationName = CollationFactory.fetchCollation(collationId).collationName
     Literal.create(collationName, SQLConf.get.defaultStringType)
   }
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
index 708a060ceda5d..fb130574d3474 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
@@ -27,9 +27,10 @@ import org.apache.spark.SparkException.internalError
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion, UnresolvedAttribute, UnresolvedSeed}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
-import org.apache.spark.sql.catalyst.expressions.ArraySortLike.NullOrder
+import org.apache.spark.sql.catalyst.expressions.KnownNotContainsNull
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{ARRAYS_ZIP, CONCAT, TreePattern}
 import org.apache.spark.sql.catalyst.types.{DataTypeUtils, PhysicalDataType, PhysicalIntegralType}
@@ -38,7 +39,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.errors.{QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SQLOpenHashSet
 import org.apache.spark.unsafe.UTF8StringBuilder
@@ -187,7 +188,8 @@ case class ArraySize(child: Expression)
   group = "map_funcs",
   since = "2.0.0")
 case class MapKeys(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
 
@@ -433,7 +435,7 @@ case class ArraysZip(children: Seq[Expression], names: Seq[Expression])
         inputArrays.map(_.numElements()).max
       }
 
-      val result = new Array[InternalRow](biggestCardinality)
+      val result = new Array[AnyRef](biggestCardinality)
       val zippedArrs: Seq[(ArrayData, Int)] = inputArrays.zipWithIndex
 
       for (i <- 0 until biggestCardinality) {
@@ -476,7 +478,8 @@ object ArraysZip {
   group = "map_funcs",
   since = "2.0.0")
 case class MapValues(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
 
@@ -509,7 +512,8 @@ case class MapValues(child: Expression)
   group = "map_funcs",
   since = "3.0.0")
 case class MapEntries(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(MapType)
 
@@ -812,8 +816,8 @@ case class MapConcat(children: Seq[Expression])
   since = "2.4.0")
 case class MapFromEntries(child: Expression)
   extends UnaryExpression
-  with NullIntolerant
   with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   @transient
   private lazy val dataTypeDetails: Option[(MapType, Boolean, Boolean)] = child.dataType match {
@@ -893,7 +897,8 @@ case class MapFromEntries(child: Expression)
 }
 
 case class MapSort(base: Expression)
-  extends UnaryExpression with NullIntolerant with QueryErrorsBase {
+  extends UnaryExpression with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   val keyType: DataType = base.dataType.asInstanceOf[MapType].keyType
   val valueType: DataType = base.dataType.asInstanceOf[MapType].valueType
@@ -1025,15 +1030,80 @@ case class MapSort(base: Expression)
 }
 
 /**
- * Common base class for [[SortArray]] and [[ArraySort]].
+ * Sorts the input array in ascending / descending order according to the natural ordering of
+ * the array elements and returns it.
  */
-trait ArraySortLike extends ExpectsInputTypes {
-  protected def arrayExpression: Expression
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order
+      according to the natural ordering of the array elements. NaN is greater than any non-NaN
+      elements for double/float type. Null elements will be placed at the beginning of the returned
+      array in ascending order or at the end of the returned array in descending order.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
+       [null,"a","b","c","d"]
+      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), false);
+       ["d","c","b","a",null]
+  """,
+  group = "array_funcs",
+  since = "1.5.0")
+// scalastyle:on line.size.limit
+case class SortArray(base: Expression, ascendingOrder: Expression)
+  extends BinaryExpression with ExpectsInputTypes with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
+
+  def this(e: Expression) = this(e, Literal(true))
 
-  protected def nullOrder: NullOrder
+  override def left: Expression = base
+  override def right: Expression = ascendingOrder
+  override def dataType: DataType = base.dataType
+  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType)
+
+  override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
+    case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
+      if (!ascendingOrder.foldable) {
+        DataTypeMismatch(
+          errorSubClass = "NON_FOLDABLE_INPUT",
+          messageParameters = Map(
+            "inputName" -> toSQLId("ascendingOrder"),
+            "inputType" -> toSQLType(ascendingOrder.dataType),
+            "inputExpr" -> toSQLExpr(ascendingOrder)))
+      } else if (ascendingOrder.dataType != BooleanType) {
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> ordinalNumber(1),
+            "requiredType" -> toSQLType(BooleanType),
+            "inputSql" -> toSQLExpr(ascendingOrder),
+            "inputType" -> toSQLType(ascendingOrder.dataType))
+        )
+      } else {
+        TypeCheckResult.TypeCheckSuccess
+      }
+    case ArrayType(_, _) =>
+      DataTypeMismatch(
+        errorSubClass = "INVALID_ORDERING_TYPE",
+        messageParameters = Map(
+          "functionName" -> toSQLId(prettyName),
+          "dataType" -> toSQLType(base.dataType)
+        )
+      )
+    case _ =>
+      DataTypeMismatch(
+        errorSubClass = "UNEXPECTED_INPUT_TYPE",
+        messageParameters = Map(
+          "paramIndex" -> ordinalNumber(0),
+          "requiredType" -> toSQLType(ArrayType),
+          "inputSql" -> toSQLExpr(base),
+          "inputType" -> toSQLType(base.dataType))
+      )
+  }
 
   @transient private lazy val lt: Comparator[Any] = {
-    val ordering = arrayExpression.dataType match {
+    val ordering = base.dataType match {
       case _ @ ArrayType(n, _) =>
         PhysicalDataType.ordering(n)
     }
@@ -1041,9 +1111,9 @@ trait ArraySortLike extends ExpectsInputTypes {
       if (o1 == null && o2 == null) {
         0
       } else if (o1 == null) {
-        nullOrder
+        -1
       } else if (o2 == null) {
-        -nullOrder
+        1
       } else {
         ordering.compare(o1, o2)
       }
@@ -1051,7 +1121,7 @@ trait ArraySortLike extends ExpectsInputTypes {
   }
 
   @transient private lazy val gt: Comparator[Any] = {
-    val ordering = arrayExpression.dataType match {
+    val ordering = base.dataType match {
       case _ @ ArrayType(n, _) =>
         PhysicalDataType.ordering(n)
     }
@@ -1060,9 +1130,9 @@ trait ArraySortLike extends ExpectsInputTypes {
       if (o1 == null && o2 == null) {
         0
       } else if (o1 == null) {
-        -nullOrder
+        1
       } else if (o2 == null) {
-        nullOrder
+        -1
       } else {
         ordering.compare(o2, o1)
       }
@@ -1070,12 +1140,12 @@ trait ArraySortLike extends ExpectsInputTypes {
   }
 
   @transient lazy val elementType: DataType =
-    arrayExpression.dataType.asInstanceOf[ArrayType].elementType
+    base.dataType.asInstanceOf[ArrayType].elementType
 
   private def resultArrayElementNullable: Boolean =
-    arrayExpression.dataType.asInstanceOf[ArrayType].containsNull
+    base.dataType.asInstanceOf[ArrayType].containsNull
 
-  def sortEval(array: Any, ascending: Boolean): Any = {
+  private def sortEval(array: Any, ascending: Boolean): Any = {
     val data = array.asInstanceOf[ArrayData].toArray[AnyRef](elementType)
     if (elementType != NullType) {
       java.util.Arrays.sort(data, if (ascending) lt else gt)
@@ -1083,7 +1153,11 @@ trait ArraySortLike extends ExpectsInputTypes {
     new GenericArrayData(data.asInstanceOf[Array[Any]])
   }
 
-  def sortCodegen(ctx: CodegenContext, ev: ExprCode, base: String, order: String): String = {
+  private def sortCodegen(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      base: String,
+      order: String): String = {
     val genericArrayData = classOf[GenericArrayData].getName
     val unsafeArrayData = classOf[UnsafeArrayData].getName
     val array = ctx.freshName("array")
@@ -1111,18 +1185,18 @@ trait ArraySortLike extends ExpectsInputTypes {
       val canPerformFastSort = CodeGenerator.isPrimitiveType(elementType) &&
         elementType != BooleanType && !resultArrayElementNullable
       val nonNullPrimitiveAscendingSort = if (canPerformFastSort) {
-          val javaType = CodeGenerator.javaType(elementType)
-          val primitiveTypeName = CodeGenerator.primitiveTypeName(elementType)
-          s"""
-             |if ($order) {
-             |  $javaType[] $array = $base.to${primitiveTypeName}Array();
-             |  java.util.Arrays.sort($array);
-             |  ${ev.value} = $unsafeArrayData.fromPrimitiveArray($array);
-             |} else
+        val javaType = CodeGenerator.javaType(elementType)
+        val primitiveTypeName = CodeGenerator.primitiveTypeName(elementType)
+        s"""
+           |if ($order) {
+           |  $javaType[] $array = $base.to${primitiveTypeName}Array();
+           |  java.util.Arrays.sort($array);
+           |  ${ev.value} = $unsafeArrayData.fromPrimitiveArray($array);
+           |} else
            """.stripMargin
-        } else {
-          ""
-        }
+      } else {
+        ""
+      }
       s"""
          |$nonNullPrimitiveAscendingSort
          |{
@@ -1133,9 +1207,9 @@ trait ArraySortLike extends ExpectsInputTypes {
          |      if ($o1 == null && $o2 == null) {
          |        return 0;
          |      } else if ($o1 == null) {
-         |        return $sortOrder * $nullOrder;
+         |        return -$sortOrder;
          |      } else if ($o2 == null) {
-         |        return -$sortOrder * $nullOrder;
+         |        return $sortOrder;
          |      }
          |      $comp
          |      return $sortOrder * $c;
@@ -1147,85 +1221,6 @@ trait ArraySortLike extends ExpectsInputTypes {
     }
   }
 
-}
-
-object ArraySortLike {
-  type NullOrder = Int
-  // Least: place null element at the first of the array for ascending order
-  // Greatest: place null element at the end of the array for ascending order
-  object NullOrder {
-    val Least: NullOrder = -1
-    val Greatest: NullOrder = 1
-  }
-}
-
-/**
- * Sorts the input array in ascending / descending order according to the natural ordering of
- * the array elements and returns it.
- */
-// scalastyle:off line.size.limit
-@ExpressionDescription(
-  usage = """
-    _FUNC_(array[, ascendingOrder]) - Sorts the input array in ascending or descending order
-      according to the natural ordering of the array elements. NaN is greater than any non-NaN
-      elements for double/float type. Null elements will be placed at the beginning of the returned
-      array in ascending order or at the end of the returned array in descending order.
-  """,
-  examples = """
-    Examples:
-      > SELECT _FUNC_(array('b', 'd', null, 'c', 'a'), true);
-       [null,"a","b","c","d"]
-  """,
-  group = "array_funcs",
-  since = "1.5.0")
-// scalastyle:on line.size.limit
-case class SortArray(base: Expression, ascendingOrder: Expression)
-  extends BinaryExpression with ArraySortLike with NullIntolerant with QueryErrorsBase {
-
-  def this(e: Expression) = this(e, Literal(true))
-
-  override def left: Expression = base
-  override def right: Expression = ascendingOrder
-  override def dataType: DataType = base.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, BooleanType)
-
-  override def arrayExpression: Expression = base
-  override def nullOrder: NullOrder = NullOrder.Least
-
-  override def checkInputDataTypes(): TypeCheckResult = base.dataType match {
-    case ArrayType(dt, _) if RowOrdering.isOrderable(dt) =>
-      ascendingOrder match {
-        case Literal(_: Boolean, BooleanType) =>
-          TypeCheckResult.TypeCheckSuccess
-        case _ =>
-          DataTypeMismatch(
-            errorSubClass = "UNEXPECTED_INPUT_TYPE",
-            messageParameters = Map(
-              "paramIndex" -> ordinalNumber(1),
-              "requiredType" -> toSQLType(BooleanType),
-              "inputSql" -> toSQLExpr(ascendingOrder),
-              "inputType" -> toSQLType(ascendingOrder.dataType))
-          )
-      }
-    case ArrayType(dt, _) =>
-      DataTypeMismatch(
-        errorSubClass = "INVALID_ORDERING_TYPE",
-        messageParameters = Map(
-          "functionName" -> toSQLId(prettyName),
-          "dataType" -> toSQLType(base.dataType)
-        )
-      )
-    case _ =>
-      DataTypeMismatch(
-        errorSubClass = "UNEXPECTED_INPUT_TYPE",
-        messageParameters = Map(
-          "paramIndex" -> ordinalNumber(0),
-          "requiredType" -> toSQLType(ArrayType),
-          "inputSql" -> toSQLExpr(base),
-          "inputType" -> toSQLType(base.dataType))
-      )
-  }
-
   override def nullSafeEval(array: Any, ascending: Any): Any = {
     sortEval(array, ascending.asInstanceOf[Boolean])
   }
@@ -1263,6 +1258,9 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None) extends U
 
   def this(child: Expression) = this(child, None)
 
+  def this(child: Expression, seed: Expression) =
+    this(child, ExpressionWithRandomSeed.expressionToSeed(seed, "shuffle"))
+
   override def stateful: Boolean = true
 
   override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed)
@@ -1352,11 +1350,11 @@ case class Shuffle(child: Expression, randomSeed: Option[Long] = None) extends U
   """
 )
 case class Reverse(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   // Input types are utilized by type coercion in ImplicitTypeCasts.
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, ArrayType))
+    Seq(TypeCollection(StringTypeWithCollation, ArrayType))
 
   override def dataType: DataType = child.dataType
 
@@ -1428,8 +1426,9 @@ case class Reverse(child: Expression)
   group = "array_funcs",
   since = "1.5.0")
 case class ArrayContains(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Predicate
+  extends BinaryExpression with ImplicitCastInputTypes with Predicate
   with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(right.dataType)
@@ -1533,6 +1532,125 @@ case class ArrayContains(left: Expression, right: Expression)
     copy(left = newLeft, right = newRight)
 }
 
+/**
+ * Searches the specified array for the specified object using the binary search algorithm.
+ *
+ * NOTE: The input array must be in ascending order before calling this method; if the array is
+ * not sorted, the results are undefined.
+ *
+ * This expression is dedicated only for PySpark and Spark-ML.
+ */
+@ExpressionDescription(
+  usage = "_FUNC_(array, value) - Return index (0-based) of the search value, " +
+    "if it is contained in the array; otherwise, (-<insertion point> - 1).",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(1, 2, 3), 2);
+       1
+      > SELECT _FUNC_(array(null, 1, 2, 3), 2);
+       2
+      > SELECT _FUNC_(array(1.0F, 2.0F, 3.0F), 1.1F);
+       -2
+  """,
+  group = "array_funcs",
+  since = "4.0.0")
+case class ArrayBinarySearch(array: Expression, value: Expression)
+  extends BinaryExpression
+  with ImplicitCastInputTypes
+  with RuntimeReplaceable
+  with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
+
+  override def left: Expression = array
+  override def right: Expression = value
+  override def dataType: DataType = IntegerType
+
+  override def inputTypes: Seq[AbstractDataType] = {
+    (left.dataType, right.dataType) match {
+      case (_, NullType) => Seq.empty
+      case (ArrayType(e1, hasNull), e2) =>
+        TypeCoercion.findTightestCommonType(e1, e2) match {
+          case Some(dt) => Seq(ArrayType(dt, hasNull), dt)
+          case _ => Seq.empty
+        }
+      case _ => Seq.empty
+    }
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    (left.dataType, right.dataType) match {
+      case (NullType, _) | (_, NullType) =>
+        DataTypeMismatch(
+          errorSubClass = "NULL_TYPE",
+          Map("functionName" -> toSQLId(prettyName)))
+      case (t, _) if !ArrayType.acceptsType(t) =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> ordinalNumber(0),
+            "requiredType" -> toSQLType(ArrayType),
+            "inputSql" -> toSQLExpr(left),
+            "inputType" -> toSQLType(left.dataType))
+        )
+      case (ArrayType(e1, _), e2) if DataTypeUtils.sameType(e1, e2) =>
+        TypeUtils.checkForOrderingExpr(e2, prettyName)
+      case _ =>
+        DataTypeMismatch(
+          errorSubClass = "ARRAY_FUNCTION_DIFF_TYPES",
+          messageParameters = Map(
+            "functionName" -> toSQLId(prettyName),
+            "dataType" -> toSQLType(ArrayType),
+            "leftType" -> toSQLType(left.dataType),
+            "rightType" -> toSQLType(right.dataType)
+          )
+        )
+    }
+  }
+
+  @transient private lazy val elementType: DataType =
+    array.dataType.asInstanceOf[ArrayType].elementType
+  @transient private lazy val isPrimitiveType: Boolean = CodeGenerator.isPrimitiveType(elementType)
+
+  @transient private lazy val comp: Comparator[Any] = new Comparator[Any] with Serializable {
+    private val ordering = array.dataType match {
+      case _ @ ArrayType(n, _) =>
+        PhysicalDataType.ordering(n)
+    }
+
+    override def compare(o1: Any, o2: Any): Int =
+      (o1, o2) match {
+        case (null, null) => 0
+        case (null, _) => -1
+        case (_, null) => 1
+        case _ => ordering.compare(o1, o2)
+      }
+  }
+
+  @transient private lazy val comparatorObjectType = ObjectType(classOf[Comparator[Object]])
+
+  override def replacement: Expression = {
+    val toJavaArray = ToJavaArray(array)
+    val (arguments, inputTypes) = if (isPrimitiveType) {
+      (Seq(toJavaArray, value), Seq(toJavaArray.dataType, value.dataType))
+    } else {
+      (Seq(toJavaArray, value, Literal(comp, comparatorObjectType)),
+        Seq(toJavaArray.dataType, value.dataType, comparatorObjectType))
+    }
+    StaticInvoke(
+      classOf[ArrayExpressionUtils],
+      IntegerType,
+      "binarySearch",
+      arguments,
+      inputTypes)
+  }
+
+  override def prettyName: String = "array_binary_search"
+
+  override protected def withNewChildrenInternal(
+      newLeft: Expression, newRight: Expression): ArrayBinarySearch =
+    copy(array = newLeft, value = newRight)
+}
+
 trait ArrayPendBase extends RuntimeReplaceable
   with ImplicitCastInputTypes with BinaryLike[Expression] with QueryErrorsBase {
 
@@ -1652,7 +1770,8 @@ case class ArrayAppend(left: Expression, right: Expression) extends ArrayPendBas
   since = "2.4.0")
 // scalastyle:off line.size.limit
 case class ArraysOverlap(left: Expression, right: Expression)
-  extends BinaryArrayExpressionWithImplicitCast with NullIntolerant with Predicate {
+  extends BinaryArrayExpressionWithImplicitCast with Predicate {
+  override def nullIntolerant: Boolean = true
 
   override def checkInputDataTypes(): TypeCheckResult = super.checkInputDataTypes() match {
     case TypeCheckResult.TypeCheckSuccess =>
@@ -1878,7 +1997,8 @@ case class ArraysOverlap(left: Expression, right: Expression)
   since = "2.4.0")
 // scalastyle:on line.size.limit
 case class Slice(x: Expression, start: Expression, length: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = x.dataType
 
@@ -2007,9 +2127,12 @@ case class ArrayJoin(
     this(array, delimiter, Some(nullReplacement))
 
   override def inputTypes: Seq[AbstractDataType] = if (nullReplacement.isDefined) {
-    Seq(AbstractArrayType(StringTypeAnyCollation), StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(AbstractArrayType(StringTypeWithCollation),
+      StringTypeWithCollation,
+        StringTypeWithCollation)
   } else {
-    Seq(AbstractArrayType(StringTypeAnyCollation), StringTypeAnyCollation)
+    Seq(AbstractArrayType(StringTypeWithCollation),
+        StringTypeWithCollation)
   }
 
   override def children: Seq[Expression] = if (nullReplacement.isDefined) {
@@ -2174,8 +2297,8 @@ case class ArrayJoin(
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayMin(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
@@ -2247,8 +2370,8 @@ case class ArrayMin(child: Expression)
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayMax(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
@@ -2329,7 +2452,8 @@ case class ArrayMax(child: Expression)
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayPosition(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with QueryErrorsBase {
+  extends BinaryExpression with ImplicitCastInputTypes with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   @transient private lazy val ordering: Ordering[Any] =
     TypeUtils.getInterpretedOrdering(right.dataType)
@@ -2477,8 +2601,9 @@ case class ElementAt(
     // The value to return if index is out of bound
     defaultValueOutOfBound: Option[Literal] = None,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends GetMapValueUtil with GetArrayItemUtil with NullIntolerant with SupportQueryContext
+  extends GetMapValueUtil with GetArrayItemUtil with SupportQueryContext
   with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   def this(left: Expression, right: Expression) = this(left, right, None, SQLConf.get.ansiEnabled)
 
@@ -2730,7 +2855,7 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
   with QueryErrorsBase {
 
   private def allowedTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, BinaryType, ArrayType)
+    Seq(StringTypeWithCollation, BinaryType, ArrayType)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(CONCAT)
 
@@ -2940,8 +3065,9 @@ case class Concat(children: Seq[Expression]) extends ComplexTypeMergingExpressio
   """,
   group = "array_funcs",
   since = "2.4.0")
-case class Flatten(child: Expression) extends UnaryExpression with NullIntolerant
+case class Flatten(child: Expression) extends UnaryExpression
   with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
 
   private def childDataType: ArrayType = child.dataType.asInstanceOf[ArrayType]
 
@@ -3826,8 +3952,8 @@ case class ArrayRepeat(left: Expression, right: Expression)
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayRemove(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with QueryErrorsBase {
-
+  extends BinaryExpression with ImplicitCastInputTypes with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = left.dataType
 
   override def inputTypes: Seq[AbstractDataType] = {
@@ -4038,8 +4164,8 @@ trait ArraySetLike {
   group = "array_funcs",
   since = "2.4.0")
 case class ArrayDistinct(child: Expression)
-  extends UnaryExpression with ArraySetLike with ExpectsInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ArraySetLike with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
 
   override def dataType: DataType = child.dataType
@@ -4202,7 +4328,8 @@ case class ArrayDistinct(child: Expression)
  * Will become common base class for [[ArrayUnion]], [[ArrayIntersect]], and [[ArrayExcept]].
  */
 trait ArrayBinaryLike
-  extends BinaryArrayExpressionWithImplicitCast with ArraySetLike with NullIntolerant {
+  extends BinaryArrayExpressionWithImplicitCast with ArraySetLike {
+  override def nullIntolerant: Boolean = true
   override protected def dt: DataType = dataType
   override protected def et: DataType = elementType
 
@@ -5200,15 +5327,12 @@ case class ArrayCompact(child: Expression)
     child.dataType.asInstanceOf[ArrayType].elementType, true)
   lazy val lambda = LambdaFunction(isNotNull(lv), Seq(lv))
 
-  override lazy val replacement: Expression = ArrayFilter(child, lambda)
+  override lazy val replacement: Expression = KnownNotContainsNull(ArrayFilter(child, lambda))
 
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType)
 
   override def prettyName: String = "array_compact"
 
-  override def dataType: ArrayType =
-    child.dataType.asInstanceOf[ArrayType].copy(containsNull = false)
-
   override protected def withNewChildInternal(newChild: Expression): ArrayCompact =
     copy(child = newChild)
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
index 1bfa11d67af6f..e7cc174f7cf39 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeCreator.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeNonCSAICollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
@@ -301,8 +301,8 @@ object CreateMap {
   since = "2.4.0",
   group = "map_funcs")
 case class MapFromArrays(left: Expression, right: Expression)
-  extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
-
+  extends BinaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, ArrayType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -562,14 +562,17 @@ case class CreateNamedStruct(children: Seq[Expression]) extends Expression with
   group = "map_funcs")
 // scalastyle:on line.size.limit
 case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: Expression)
-  extends TernaryExpression with ExpectsInputTypes with NullIntolerant {
-
+  extends TernaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   def this(child: Expression, pairDelim: Expression) = {
-    this(child, pairDelim, Literal(":"))
+    this(child, pairDelim, Literal.create(":", SQLConf.get.defaultStringType))
   }
 
   def this(child: Expression) = {
-    this(child, Literal(","), Literal(":"))
+    this(
+      child,
+      Literal.create(",", SQLConf.get.defaultStringType),
+      Literal.create(":", SQLConf.get.defaultStringType))
   }
 
   override def stateful: Boolean = true
@@ -579,23 +582,26 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
   override def third: Expression = keyValueDelim
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(StringTypeNonCSAICollation, StringTypeNonCSAICollation, StringTypeNonCSAICollation)
 
   override def dataType: DataType = MapType(first.dataType, first.dataType)
 
   private lazy val mapBuilder = new ArrayBasedMapBuilder(first.dataType, first.dataType)
 
+  private final lazy val collationId: Int = text.dataType.asInstanceOf[StringType].collationId
+
   override def nullSafeEval(
       inputString: Any,
       stringDelimiter: Any,
       keyValueDelimiter: Any): Any = {
-    val keyValues =
-      inputString.asInstanceOf[UTF8String].split(stringDelimiter.asInstanceOf[UTF8String], -1)
+    val keyValues = CollationAwareUTF8String.splitSQL(inputString.asInstanceOf[UTF8String],
+      stringDelimiter.asInstanceOf[UTF8String], -1, collationId)
     val keyValueDelimiterUTF8String = keyValueDelimiter.asInstanceOf[UTF8String]
 
     var i = 0
     while (i < keyValues.length) {
-      val keyValueArray = keyValues(i).split(keyValueDelimiterUTF8String, 2)
+      val keyValueArray = CollationAwareUTF8String.splitSQL(
+        keyValues(i), keyValueDelimiterUTF8String, 2, collationId)
       val key = keyValueArray(0)
       val value = if (keyValueArray.length < 2) null else keyValueArray(1)
       mapBuilder.put(key, value)
@@ -610,9 +616,9 @@ case class StringToMap(text: Expression, pairDelim: Expression, keyValueDelim: E
 
     nullSafeCodeGen(ctx, ev, (text, pd, kvd) =>
       s"""
-         |UTF8String[] $keyValues = $text.split($pd, -1);
+         |UTF8String[] $keyValues = CollationAwareUTF8String.splitSQL($text, $pd, -1, $collationId);
          |for(UTF8String kvEntry: $keyValues) {
-         |  UTF8String[] kv = kvEntry.split($kvd, 2);
+         |  UTF8String[] kv = CollationAwareUTF8String.splitSQL(kvEntry, $kvd, 2, $collationId);
          |  $builderTerm.put(kv[0], kv.length == 2 ? kv[1] : null);
          |}
          |${ev.value} = $builderTerm.build();
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
index ff94322efdaa4..2013cd8d6e636 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/complexTypeExtractors.scala
@@ -90,8 +90,10 @@ object ExtractValue {
   }
 }
 
-trait ExtractValue extends Expression with NullIntolerant {
+trait ExtractValue extends Expression {
+  override def nullIntolerant: Boolean = true
   final override val nodePatterns: Seq[TreePattern] = Seq(EXTRACT_VALUE)
+  val child: Expression
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index 046d4cbcd5be3..609d457fbd067 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -18,8 +18,10 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistryBase, TypeCheckResult, TypeCoercion}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FunctionBuilder
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.TernaryLike
@@ -187,6 +189,13 @@ case class CaseWhen(
   }
 
   override def nullable: Boolean = {
+    if (branches.exists(_._1 == TrueLiteral)) {
+      // if any of the branch is always true
+      // nullability check should only be related to branches
+      // before the TrueLiteral and value of the first TrueLiteral branch
+      val (h, t) = branches.span(_._1 != TrueLiteral)
+      return h.exists(_._2.nullable) || t.head._2.nullable
+    }
     // Result is nullable if any of the branch is nullable, or if the else value is nullable
     branches.exists(_._2.nullable) || elseValue.map(_.nullable).getOrElse(true)
   }
@@ -328,11 +337,11 @@ case class CaseWhen(
 
     // This generates code like:
     //   caseWhenResultState = caseWhen_1(i);
-    //   if(caseWhenResultState != -1) {
+    //   if (caseWhenResultState != -1) {
     //     continue;
     //   }
     //   caseWhenResultState = caseWhen_2(i);
-    //   if(caseWhenResultState != -1) {
+    //   if (caseWhenResultState != -1) {
     //     continue;
     //   }
     //   ...
@@ -391,6 +400,7 @@ case class CaseWhen(
 
 /** Factory methods for CaseWhen. */
 object CaseWhen {
+
   def apply(branches: Seq[(Expression, Expression)], elseValue: Expression): CaseWhen = {
     CaseWhen(branches, Option(elseValue))
   }
@@ -408,6 +418,10 @@ object CaseWhen {
     val elseValue = if (branches.size % 2 != 0) Some(branches.last) else None
     CaseWhen(cases, elseValue)
   }
+
+  val registryEntry: (String, (ExpressionInfo, FunctionBuilder)) = {
+    ("when", (FunctionRegistryBase.expressionInfo[CaseWhen]("when", None), createFromParser))
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
index 75d912633a0fc..f05db0b090c90 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/constraintExpressions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode, FalseLiteral}
-import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.types.{ArrayType, DataType}
 
 trait TaggingExpression extends UnaryExpression {
   override def nullable: Boolean = child.nullable
@@ -52,6 +52,17 @@ case class KnownNotNull(child: Expression) extends TaggingExpression {
     copy(child = newChild)
 }
 
+case class KnownNotContainsNull(child: Expression) extends TaggingExpression {
+  override def dataType: DataType =
+    child.dataType.asInstanceOf[ArrayType].copy(containsNull = false)
+
+  override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode =
+    child.genCode(ctx)
+
+  override protected def withNewChildInternal(newChild: Expression): KnownNotContainsNull =
+    copy(child = newChild)
+}
+
 case class KnownFloatingPointNormalized(child: Expression) extends TaggingExpression {
   override protected def withNewChildInternal(newChild: Expression): KnownFloatingPointNormalized =
     copy(child = newChild)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala
new file mode 100644
index 0000000000000..fd298b33450b3
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csv/CsvExpressionEvalUtils.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.csv
+
+import com.univocity.parsers.csv.CsvParser
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.csv.{CSVInferSchema, CSVOptions, UnivocityParser}
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.util.{FailFastMode, FailureSafeParser, PermissiveMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{DataType, NullType, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * The expression `CsvToStructs` will utilize it to support codegen.
+ */
+case class CsvToStructsEvaluator(
+    options: Map[String, String],
+    nullableSchema: StructType,
+    nameOfCorruptRecord: String,
+    timeZoneId: Option[String],
+    requiredSchema: Option[StructType]) {
+
+  // This converts parsed rows to the desired output by the given schema.
+  @transient
+  private lazy val converter = (rows: Iterator[InternalRow]) => {
+    if (!rows.hasNext) {
+      throw SparkException.internalError("Expected one row from CSV parser.")
+    }
+    val result = rows.next()
+    // CSV's parser produces one record only.
+    assert(!rows.hasNext)
+    result
+  }
+
+  @transient
+  private lazy val parser = {
+    // 'lineSep' is a plan-wise option so we set a noncharacter, according to
+    // the unicode specification, which should not appear in Java's strings.
+    // See also SPARK-38955 and https://www.unicode.org/charts/PDF/UFFF0.pdf.
+    // scalastyle:off nonascii
+    val exprOptions = options ++ Map("lineSep" -> '\uFFFF'.toString)
+    // scalastyle:on nonascii
+    val parsedOptions = new CSVOptions(
+      exprOptions,
+      columnPruning = true,
+      defaultTimeZoneId = timeZoneId.get,
+      defaultColumnNameOfCorruptRecord = nameOfCorruptRecord)
+    val mode = parsedOptions.parseMode
+    if (mode != PermissiveMode && mode != FailFastMode) {
+      throw QueryCompilationErrors.parseModeUnsupportedError("from_csv", mode)
+    }
+    ExprUtils.verifyColumnNameOfCorruptRecord(
+      nullableSchema,
+      parsedOptions.columnNameOfCorruptRecord)
+
+    val actualSchema =
+      StructType(nullableSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+    val actualRequiredSchema =
+      StructType(requiredSchema.map(_.asNullable).getOrElse(nullableSchema)
+        .filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
+    val rawParser = new UnivocityParser(actualSchema,
+      actualRequiredSchema,
+      parsedOptions)
+    new FailureSafeParser[String](
+      input => rawParser.parse(input),
+      mode,
+      nullableSchema,
+      parsedOptions.columnNameOfCorruptRecord)
+  }
+
+  final def evaluate(csv: UTF8String): InternalRow = {
+    if (csv == null) return null
+    converter(parser.parse(csv.toString))
+  }
+}
+
+case class SchemaOfCsvEvaluator(options: Map[String, String]) {
+
+  @transient
+  private lazy val csvOptions: CSVOptions = {
+    // 'lineSep' is a plan-wise option so we set a noncharacter, according to
+    // the unicode specification, which should not appear in Java's strings.
+    // See also SPARK-38955 and https://www.unicode.org/charts/PDF/UFFF0.pdf.
+    // scalastyle:off nonascii
+    val exprOptions = options ++ Map("lineSep" -> '\uFFFF'.toString)
+    // scalastyle:on nonascii
+    new CSVOptions(exprOptions, true, "UTC")
+  }
+
+  @transient
+  private lazy val csvParser: CsvParser = new CsvParser(csvOptions.asParserSettings)
+
+  @transient
+  private lazy val csvInferSchema = new CSVInferSchema(csvOptions)
+
+  final def evaluate(csv: UTF8String): Any = {
+    val row = csvParser.parseLine(csv.toString)
+    assert(row != null, "Parsed CSV record should not be null.")
+    val header = row.zipWithIndex.map { case (_, index) => s"_c$index" }
+    val startType: Array[DataType] = Array.fill[DataType](header.length)(NullType)
+    val fieldTypes = csvInferSchema.inferRowType(startType, row)
+    val st = StructType(csvInferSchema.toStructFields(fieldTypes, header))
+    UTF8String.fromString(st.sql)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
index cb10440c48328..21b6295a59f02 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/csvExpressions.scala
@@ -19,19 +19,18 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.io.CharArrayWriter
 
-import com.univocity.parsers.csv.CsvParser
-
-import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.csv._
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
-import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
+import org.apache.spark.sql.catalyst.expressions.csv.{CsvToStructsEvaluator, SchemaOfCsvEvaluator}
+import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.catalyst.util.TypeUtils._
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
+import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -58,16 +57,12 @@ case class CsvToStructs(
     timeZoneId: Option[String] = None,
     requiredSchema: Option[StructType] = None)
   extends UnaryExpression
-    with TimeZoneAwareExpression
-    with CodegenFallback
-    with ExpectsInputTypes
-    with NullIntolerant {
+  with TimeZoneAwareExpression
+  with ExpectsInputTypes {
 
   override def nullable: Boolean = child.nullable
 
-  // The CSV input data might be missing certain fields. We force the nullability
-  // of the user-provided schema to avoid data corruptions.
-  val nullableSchema: StructType = schema.asNullable
+  override def nullIntolerant: Boolean = true
 
   // Used in `FunctionRegistry`
   def this(child: Expression, schema: Expression, options: Map[String, String]) =
@@ -86,70 +81,47 @@ case class CsvToStructs(
       child = child,
       timeZoneId = None)
 
-  // This converts parsed rows to the desired output by the given schema.
-  @transient
-  lazy val converter = (rows: Iterator[InternalRow]) => {
-    if (rows.hasNext) {
-      val result = rows.next()
-      // CSV's parser produces one record only.
-      assert(!rows.hasNext)
-      result
-    } else {
-      throw SparkException.internalError("Expected one row from CSV parser.")
-    }
-  }
-
-  val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
-
-  @transient lazy val parser = {
-    // 'lineSep' is a plan-wise option so we set a noncharacter, according to
-    // the unicode specification, which should not appear in Java's strings.
-    // See also SPARK-38955 and https://www.unicode.org/charts/PDF/UFFF0.pdf.
-    // scalastyle:off nonascii
-    val exprOptions = options ++ Map("lineSep" -> '\uFFFF'.toString)
-    // scalastyle:on nonascii
-    val parsedOptions = new CSVOptions(
-      exprOptions,
-      columnPruning = true,
-      defaultTimeZoneId = timeZoneId.get,
-      defaultColumnNameOfCorruptRecord = nameOfCorruptRecord)
-    val mode = parsedOptions.parseMode
-    if (mode != PermissiveMode && mode != FailFastMode) {
-      throw QueryCompilationErrors.parseModeUnsupportedError("from_csv", mode)
-    }
-    ExprUtils.verifyColumnNameOfCorruptRecord(
-      nullableSchema,
-      parsedOptions.columnNameOfCorruptRecord)
-
-    val actualSchema =
-      StructType(nullableSchema.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-    val actualRequiredSchema =
-      StructType(requiredSchema.map(_.asNullable).getOrElse(nullableSchema)
-        .filterNot(_.name == parsedOptions.columnNameOfCorruptRecord))
-    val rawParser = new UnivocityParser(actualSchema,
-      actualRequiredSchema,
-      parsedOptions)
-    new FailureSafeParser[String](
-      input => rawParser.parse(input),
-      mode,
-      nullableSchema,
-      parsedOptions.columnNameOfCorruptRecord)
-  }
-
   override def dataType: DataType = requiredSchema.getOrElse(schema).asNullable
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression = {
     copy(timeZoneId = Option(timeZoneId))
   }
 
+  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
+
+  override def prettyName: String = "from_csv"
+
+  // The CSV input data might be missing certain fields. We force the nullability
+  // of the user-provided schema to avoid data corruptions.
+  private val nullableSchema: StructType = schema.asNullable
+
+  @transient
+  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
+  @transient
+  private lazy val evaluator: CsvToStructsEvaluator = CsvToStructsEvaluator(
+    options, nullableSchema, nameOfCorruptRecord, timeZoneId, requiredSchema)
+
   override def nullSafeEval(input: Any): Any = {
-    val csv = input.asInstanceOf[UTF8String].toString
-    converter(parser.parse(csv))
+    evaluator.evaluate(input.asInstanceOf[UTF8String])
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
-
-  override def prettyName: String = "from_csv"
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val eval = child.genCode(ctx)
+    val resultType = CodeGenerator.boxedType(dataType)
+    val resultTerm = ctx.freshName("result")
+    ev.copy(code =
+      code"""
+         |${eval.code}
+         |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate(${eval.value});
+         |boolean ${ev.isNull} = $resultTerm == null;
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $resultTerm;
+         |}
+         |""".stripMargin)
+  }
 
   override protected def withNewChildInternal(newChild: Expression): CsvToStructs =
     copy(child = newChild)
@@ -170,7 +142,7 @@ case class CsvToStructs(
 case class SchemaOfCsv(
     child: Expression,
     options: Map[String, String])
-  extends UnaryExpression with CodegenFallback with QueryErrorsBase {
+  extends UnaryExpression with RuntimeReplaceable with QueryErrorsBase {
 
   def this(child: Expression) = this(child, Map.empty[String, String])
 
@@ -202,30 +174,21 @@ case class SchemaOfCsv(
     }
   }
 
-  override def eval(v: InternalRow): Any = {
-    // 'lineSep' is a plan-wise option so we set a noncharacter, according to
-    // the unicode specification, which should not appear in Java's strings.
-    // See also SPARK-38955 and https://www.unicode.org/charts/PDF/UFFF0.pdf.
-    // scalastyle:off nonascii
-    val exprOptions = options ++ Map("lineSep" -> '\uFFFF'.toString)
-    // scalastyle:on nonascii
-    val parsedOptions = new CSVOptions(exprOptions, true, "UTC")
-    val parser = new CsvParser(parsedOptions.asParserSettings)
-    val row = parser.parseLine(csv.toString)
-    assert(row != null, "Parsed CSV record should not be null.")
-
-    val header = row.zipWithIndex.map { case (_, index) => s"_c$index" }
-    val startType: Array[DataType] = Array.fill[DataType](header.length)(NullType)
-    val inferSchema = new CSVInferSchema(parsedOptions)
-    val fieldTypes = inferSchema.inferRowType(startType, row)
-    val st = StructType(inferSchema.toStructFields(fieldTypes, header))
-    UTF8String.fromString(st.sql)
-  }
-
   override def prettyName: String = "schema_of_csv"
 
   override protected def withNewChildInternal(newChild: Expression): SchemaOfCsv =
     copy(child = newChild)
+
+  @transient
+  private lazy val evaluator: SchemaOfCsvEvaluator = SchemaOfCsvEvaluator(options)
+
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[SchemaOfCsvEvaluator])),
+    "evaluate",
+    dataType,
+    Seq(child),
+    Seq(child.dataType),
+    returnNullable = false)
 }
 
 /**
@@ -248,7 +211,8 @@ case class StructsToCsv(
      options: Map[String, String],
      child: Expression,
      timeZoneId: Option[String] = None)
-  extends UnaryExpression with TimeZoneAwareExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def nullable: Boolean = true
 
   def this(options: Map[String, String], child: Expression) = this(options, child, None)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
index fa917dfc5c83f..a4ac0bdbb11d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datasketchesExpressions.scala
@@ -39,8 +39,8 @@ import org.apache.spark.sql.types.{AbstractDataType, BinaryType, BooleanType, Da
 case class HllSketchEstimate(child: Expression)
   extends UnaryExpression
     with CodegenFallback
-    with ExpectsInputTypes
-    with NullIntolerant {
+    with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override protected def withNewChildInternal(newChild: Expression): HllSketchEstimate =
     copy(child = newChild)
@@ -80,8 +80,8 @@ case class HllSketchEstimate(child: Expression)
 case class HllUnion(first: Expression, second: Expression, third: Expression)
   extends TernaryExpression
     with CodegenFallback
-    with ExpectsInputTypes
-    with NullIntolerant {
+    with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   // The default target type (register size) to use.
   private val targetType = TgtHllType.HLL_8
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
index 808ad54f8ecad..fba3927a0bc9c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils._
 import org.apache.spark.sql.catalyst.util.LegacyDateFormats.SIMPLE_DATE_FORMAT
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
@@ -69,6 +69,13 @@ trait TimeZoneAwareExpression extends Expression {
   }
 }
 
+private[catalyst] object TimeZoneAwareExpression {
+  def convertExpressionToUnit(e: Expression, source: String): String = e match {
+    case StringLiteral(unit) => unit
+    case _ => throw QueryExecutionErrors.invalidDatetimeUnitError(source, e.sql)
+  }
+}
+
 trait TimestampFormatterHelper extends TimeZoneAwareExpression {
 
   protected def formatString: Expression
@@ -302,8 +309,8 @@ case class CurrentBatchTimestamp(
   group = "datetime_funcs",
   since = "1.5.0")
 case class DateAdd(startDate: Expression, days: Expression)
-  extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
-
+  extends BinaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = startDate
   override def right: Expression = days
 
@@ -341,7 +348,8 @@ case class DateAdd(startDate: Expression, days: Expression)
   group = "datetime_funcs",
   since = "1.5.0")
 case class DateSub(startDate: Expression, days: Expression)
-  extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends BinaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = startDate
   override def right: Expression = days
 
@@ -367,8 +375,8 @@ case class DateSub(startDate: Expression, days: Expression)
 }
 
 trait GetTimeField extends UnaryExpression
-  with TimeZoneAwareExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  with TimeZoneAwareExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   val func: (Long, ZoneId) => Any
   val funcName: String
 
@@ -454,7 +462,8 @@ case class SecondWithFraction(child: Expression, timeZoneId: Option[String] = No
     copy(child = newChild)
 }
 
-trait GetDateField extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+trait GetDateField extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   val func: Int => Any
   val funcName: String
 
@@ -498,7 +507,9 @@ case class DayOfYear(child: Expression) extends GetDateField {
   group = "datetime_funcs",
   since = "3.1.0")
 case class DateFromUnixDate(child: Expression) extends UnaryExpression
-  with ImplicitCastInputTypes with NullIntolerant {
+  with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
+
   override def inputTypes: Seq[AbstractDataType] = Seq(IntegerType)
 
   override def dataType: DataType = DateType
@@ -524,7 +535,9 @@ case class DateFromUnixDate(child: Expression) extends UnaryExpression
   group = "datetime_funcs",
   since = "3.1.0")
 case class UnixDate(child: Expression) extends UnaryExpression
-  with ExpectsInputTypes with NullIntolerant {
+  with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
+
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
 
   override def dataType: DataType = IntegerType
@@ -541,7 +554,8 @@ case class UnixDate(child: Expression) extends UnaryExpression
 }
 
 abstract class IntegralToTimestampBase extends UnaryExpression
-  with ExpectsInputTypes with NullIntolerant {
+  with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   protected def upScaleFactor: Long
 
@@ -576,7 +590,8 @@ abstract class IntegralToTimestampBase extends UnaryExpression
   since = "3.1.0")
 // scalastyle:on line.size.limit
 case class SecondsToTimestamp(child: Expression) extends UnaryExpression
-  with ExpectsInputTypes with NullIntolerant {
+  with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(NumericType)
 
@@ -675,7 +690,8 @@ case class MicrosToTimestamp(child: Expression)
 }
 
 abstract class TimestampToLongBase extends UnaryExpression
-  with ExpectsInputTypes with NullIntolerant {
+  with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   protected def scaleFactor: Long
 
@@ -947,14 +963,15 @@ case class DayName(child: Expression) extends GetDateField {
   since = "1.5.0")
 // scalastyle:on line.size.limit
 case class DateFormatClass(left: Expression, right: Expression, timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimestampFormatterHelper with ImplicitCastInputTypes
-    with NullIntolerant {
+  extends BinaryExpression with TimestampFormatterHelper with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(left: Expression, right: Expression) = this(left, right, None)
 
   override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TimestampType, StringTypeWithCollation)
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1262,8 +1279,10 @@ abstract class ToTimestamp
   override def forTimestampNTZ: Boolean = left.dataType == TimestampNTZType
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType),
-      StringTypeAnyCollation)
+    Seq(TypeCollection(
+      StringTypeWithCollation, DateType, TimestampType, TimestampNTZType
+    ),
+      StringTypeWithCollation)
 
   override def dataType: DataType = LongType
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
@@ -1417,8 +1436,8 @@ abstract class UnixTime extends ToTimestamp {
   since = "1.5.0")
 // scalastyle:on line.size.limit
 case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimestampFormatterHelper with ImplicitCastInputTypes
-    with NullIntolerant {
+  extends BinaryExpression with TimestampFormatterHelper with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(sec: Expression, format: Expression) = this(sec, format, None)
 
@@ -1434,7 +1453,8 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(LongType, StringTypeWithCollation)
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
@@ -1486,7 +1506,9 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
   group = "datetime_funcs",
   since = "1.5.0")
 case class LastDay(startDate: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
+
   override def child: Expression = startDate
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DateType)
@@ -1535,14 +1557,16 @@ case class NextDay(
     startDate: Expression,
     dayOfWeek: Expression,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def left: Expression = startDate
   override def right: Expression = dayOfWeek
 
   def this(left: Expression, right: Expression) = this(left, right, SQLConf.get.ansiEnabled)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(DateType, StringTypeWithCollation)
 
   override def dataType: DataType = DateType
   override def nullable: Boolean = true
@@ -1616,7 +1640,8 @@ case class NextDay(
  * Adds an interval to timestamp.
  */
 case class TimeAdd(start: Expression, interval: Expression, timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes with NullIntolerant {
+  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(start: Expression, interval: Expression) = this(start, interval, None)
 
@@ -1697,7 +1722,8 @@ case class DateAddInterval(
     interval: Expression,
     timeZoneId: Option[String] = None,
     ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
-  extends BinaryExpression with ExpectsInputTypes with TimeZoneAwareExpression with NullIntolerant {
+  extends BinaryExpression with ExpectsInputTypes with TimeZoneAwareExpression {
+  override def nullIntolerant: Boolean = true
 
   override def left: Expression = start
   override def right: Expression = interval
@@ -1749,11 +1775,13 @@ case class DateAddInterval(
     copy(start = newLeft, interval = newRight)
 }
 
-sealed trait UTCTimestamp extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+sealed trait UTCTimestamp extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   val func: (Long, String) => Long
   val funcName: String
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(TimestampType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(TimestampType, StringTypeWithCollation)
   override def dataType: DataType = TimestampType
 
   override def nullSafeEval(time: Any, timezone: Any): Any = {
@@ -1866,8 +1894,8 @@ case class ToUTCTimestamp(left: Expression, right: Expression) extends UTCTimest
     copy(left = newLeft, right = newRight)
 }
 
-abstract class AddMonthsBase extends BinaryExpression with ImplicitCastInputTypes
-  with NullIntolerant {
+abstract class AddMonthsBase extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = DateType
 
   override def nullSafeEval(start: Any, months: Any): Any = {
@@ -1929,7 +1957,8 @@ case class TimestampAddYMInterval(
     timestamp: Expression,
     interval: Expression,
     timeZoneId: Option[String] = None)
-  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes with NullIntolerant {
+  extends BinaryExpression with TimeZoneAwareExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(timestamp: Expression, interval: Expression) = this(timestamp, interval, None)
 
@@ -1995,8 +2024,8 @@ case class MonthsBetween(
     date2: Expression,
     roundOff: Expression,
     timeZoneId: Option[String] = None)
-  extends TernaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes
-    with NullIntolerant {
+  extends TernaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(date1: Expression, date2: Expression) = this(date1, date2, Literal.TrueLiteral, None)
 
@@ -2093,8 +2122,9 @@ case class ParseToDate(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    TypeCollection(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType) +:
-      format.map(_ => StringTypeAnyCollation).toSeq
+    TypeCollection(
+      StringTypeWithCollation, DateType, TimestampType, TimestampNTZType) +:
+      format.map(_ => StringTypeWithCollation).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2165,10 +2195,10 @@ case class ParseToTimestamp(
   override def inputTypes: Seq[AbstractDataType] = {
     // Note: ideally this function should only take string input, but we allow more types here to
     // be backward compatible.
-    val types = Seq(StringTypeAnyCollation, DateType, TimestampType, TimestampNTZType)
+    val types = Seq(StringTypeWithCollation, DateType, TimestampType, TimestampNTZType)
     TypeCollection(
       (if (dataType.isInstanceOf[TimestampType]) types :+ NumericType else types): _*
-    ) +: format.map(_ => StringTypeAnyCollation).toSeq
+    ) +: format.map(_ => StringTypeWithCollation).toSeq
   }
 
   override protected def withNewChildrenInternal(
@@ -2298,7 +2328,8 @@ case class TruncDate(date: Expression, format: Expression)
   override def left: Expression = date
   override def right: Expression = format
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(DateType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(DateType, StringTypeWithCollation)
   override def dataType: DataType = DateType
   override def prettyName: String = "trunc"
   override val instant = date
@@ -2367,7 +2398,8 @@ case class TruncTimestamp(
   override def left: Expression = format
   override def right: Expression = timestamp
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation, TimestampType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation, TimestampType)
   override def dataType: TimestampType = TimestampType
   override def prettyName: String = "date_trunc"
   override val instant = timestamp
@@ -2411,7 +2443,8 @@ case class TruncTimestamp(
   group = "datetime_funcs",
   since = "1.5.0")
 case class DateDiff(endDate: Expression, startDate: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def left: Expression = endDate
   override def right: Expression = startDate
@@ -2455,7 +2488,8 @@ case class MakeDate(
     month: Expression,
     day: Expression,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(year: Expression, month: Expression, day: Expression) =
     this(year, month, day, SQLConf.get.ansiEnabled)
@@ -2473,14 +2507,14 @@ case class MakeDate(
       localDateToDays(ld)
     } catch {
       case e: java.time.DateTimeException =>
-        if (failOnError) throw QueryExecutionErrors.ansiDateTimeError(e) else null
+        if (failOnError) throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e) else null
     }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val dtu = DateTimeUtils.getClass.getName.stripSuffix("$")
     val failOnErrorBranch = if (failOnError) {
-      "throw QueryExecutionErrors.ansiDateTimeError(e);"
+      "throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);"
     } else {
       s"${ev.isNull} = true;"
     }
@@ -2545,6 +2579,53 @@ object MakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
   }
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(year, month, day, hour, min, sec) - Try to create local date-time from year, month, day, hour, min, sec fields. The function returns NULL on invalid inputs.",
+  arguments = """
+    Arguments:
+      * year - the year to represent, from 1 to 9999
+      * month - the month-of-year to represent, from 1 (January) to 12 (December)
+      * day - the day-of-month to represent, from 1 to 31
+      * hour - the hour-of-day to represent, from 0 to 23
+      * min - the minute-of-hour to represent, from 0 to 59
+      * sec - the second-of-minute and its micro-fraction to represent, from
+              0 to 60. If the sec argument equals to 60, the seconds field is set
+              to 0 and 1 minute is added to the final timestamp.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+       2014-12-28 06:30:45.887
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+       2019-07-01 00:00:00
+      > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
+       NULL
+      > SELECT _FUNC_(2024, 13, 22, 15, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.0.0")
+// scalastyle:on line.size.limit
+object TryMakeTimestampNTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 6) {
+      MakeTimestamp(
+        expressions(0),
+        expressions(1),
+        expressions(2),
+        expressions(3),
+        expressions(4),
+        expressions(5),
+        dataType = TimestampNTZType,
+        failOnError = false)
+    } else {
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs)
+    }
+  }
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
@@ -2593,6 +2674,57 @@ object MakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
   }
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to create the current timestamp with local time zone from year, month, day, hour, min, sec and timezone fields. The function returns NULL on invalid inputs.",
+  arguments = """
+    Arguments:
+      * year - the year to represent, from 1 to 9999
+      * month - the month-of-year to represent, from 1 (January) to 12 (December)
+      * day - the day-of-month to represent, from 1 to 31
+      * hour - the hour-of-day to represent, from 0 to 23
+      * min - the minute-of-hour to represent, from 0 to 59
+      * sec - the second-of-minute and its micro-fraction to represent, from
+              0 to 60. If the sec argument equals to 60, the seconds field is set
+              to 0 and 1 minute is added to the final timestamp.
+      * timezone - the time zone identifier. For example, CET, UTC and etc.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+       2014-12-28 06:30:45.887
+      > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
+       2014-12-27 21:30:45.887
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+       2019-07-01 00:00:00
+      > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
+       NULL
+      > SELECT _FUNC_(2024, 13, 22, 15, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.0.0")
+// scalastyle:on line.size.limit
+object TryMakeTimestampLTZExpressionBuilder extends ExpressionBuilder {
+  override def build(funcName: String, expressions: Seq[Expression]): Expression = {
+    val numArgs = expressions.length
+    if (numArgs == 6 || numArgs == 7) {
+      MakeTimestamp(
+        expressions(0),
+        expressions(1),
+        expressions(2),
+        expressions(3),
+        expressions(4),
+        expressions(5),
+        expressions.drop(6).lastOption,
+        dataType = TimestampType,
+        failOnError = false)
+    } else {
+      throw QueryCompilationErrors.wrongNumArgsError(funcName, Seq(6), numArgs)
+    }
+  }
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Create timestamp from year, month, day, hour, min, sec and timezone fields. The result data type is consistent with the value of configuration `spark.sql.timestampType`. If the configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it will throw an error instead.",
@@ -2636,8 +2768,8 @@ case class MakeTimestamp(
     timeZoneId: Option[String] = None,
     failOnError: Boolean = SQLConf.get.ansiEnabled,
     override val dataType: DataType = SQLConf.get.timestampType)
-  extends SeptenaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes
-    with NullIntolerant {
+  extends SeptenaryExpression with TimeZoneAwareExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(
       year: Expression,
@@ -2668,7 +2800,7 @@ case class MakeTimestamp(
   // casted into decimal safely, we use DecimalType(16, 6) which is wider than DecimalType(10, 0).
   override def inputTypes: Seq[AbstractDataType] =
     Seq(IntegerType, IntegerType, IntegerType, IntegerType, IntegerType, DecimalType(16, 6)) ++
-      timezone.map(_ => StringTypeAnyCollation)
+    timezone.map(_ => StringTypeWithCollation)
   override def nullable: Boolean = if (failOnError) children.exists(_.nullable) else true
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
@@ -2694,7 +2826,7 @@ case class MakeTimestamp(
           // This case of sec = 60 and nanos = 0 is supported for compatibility with PostgreSQL
           LocalDateTime.of(year, month, day, hour, min, 0, 0).plusMinutes(1)
         } else {
-          throw QueryExecutionErrors.invalidFractionOfSecondError()
+          throw QueryExecutionErrors.invalidFractionOfSecondError(secAndMicros.toDouble)
         }
       } else {
         LocalDateTime.of(year, month, day, hour, min, seconds, nanos)
@@ -2707,7 +2839,7 @@ case class MakeTimestamp(
     } catch {
       case e: SparkDateTimeException if failOnError => throw e
       case e: DateTimeException if failOnError =>
-        throw QueryExecutionErrors.ansiDateTimeError(e)
+        throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e)
       case _: DateTimeException => null
     }
   }
@@ -2738,7 +2870,7 @@ case class MakeTimestamp(
     val zid = ctx.addReferenceObj("zoneId", zoneId, classOf[ZoneId].getName)
     val d = Decimal.getClass.getName.stripSuffix("$")
     val failOnErrorBranch = if (failOnError) {
-      "throw QueryExecutionErrors.ansiDateTimeError(e);"
+      "throw QueryExecutionErrors.ansiDateTimeArgumentOutOfRange(e);"
     } else {
       s"${ev.isNull} = true;"
     }
@@ -2765,7 +2897,7 @@ case class MakeTimestamp(
             ldt = java.time.LocalDateTime.of(
               $year, $month, $day, $hour, $min, 0, 0).plusMinutes(1);
           } else {
-            throw QueryExecutionErrors.invalidFractionOfSecondError();
+            throw QueryExecutionErrors.invalidFractionOfSecondError($secAndNanos.toDouble());
           }
         } else {
           ldt = java.time.LocalDateTime.of($year, $month, $day, $hour, $min, seconds, nanos);
@@ -2796,6 +2928,89 @@ case class MakeTimestamp(
   }
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(year, month, day, hour, min, sec[, timezone]) - Try to create a timestamp from year, month, day, hour, min, sec and timezone fields. The result data type is consistent with the value of configuration `spark.sql.timestampType`. The function returns NULL on invalid inputs.",
+  arguments = """
+    Arguments:
+      * year - the year to represent, from 1 to 9999
+      * month - the month-of-year to represent, from 1 (January) to 12 (December)
+      * day - the day-of-month to represent, from 1 to 31
+      * hour - the hour-of-day to represent, from 0 to 23
+      * min - the minute-of-hour to represent, from 0 to 59
+      * sec - the second-of-minute and its micro-fraction to represent, from 0 to 60.
+              The value can be either an integer like 13 , or a fraction like 13.123.
+              If the sec argument equals to 60, the seconds field is set
+              to 0 and 1 minute is added to the final timestamp.
+      * timezone - the time zone identifier. For example, CET, UTC and etc.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887);
+       2014-12-28 06:30:45.887
+      > SELECT _FUNC_(2014, 12, 28, 6, 30, 45.887, 'CET');
+       2014-12-27 21:30:45.887
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 60);
+       2019-07-01 00:00:00
+      > SELECT _FUNC_(2019, 6, 30, 23, 59, 1);
+       2019-06-30 23:59:01
+      > SELECT _FUNC_(null, 7, 22, 15, 30, 0);
+       NULL
+      > SELECT _FUNC_(2024, 13, 22, 15, 30, 0);
+       NULL
+  """,
+  group = "datetime_funcs",
+  since = "4.0.0")
+// scalastyle:on line.size.limit
+case class TryMakeTimestamp(
+    year: Expression,
+    month: Expression,
+    day: Expression,
+    hour: Expression,
+    min: Expression,
+    sec: Expression,
+    timezone: Option[Expression],
+    timeZoneId: Option[String],
+    replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+
+  private def this(
+      year: Expression,
+      month: Expression,
+      day: Expression,
+      hour: Expression,
+      min: Expression,
+      sec: Expression,
+      timezone: Option[Expression]) = this(year, month, day, hour, min, sec, timezone, None,
+      MakeTimestamp(year, month, day, hour, min, sec, timezone, None, failOnError = false))
+
+  def this(
+      year: Expression,
+      month: Expression,
+      day: Expression,
+      hour: Expression,
+      min: Expression,
+      sec: Expression,
+      timezone: Expression) = this(year, month, day, hour, min, sec, Some(timezone))
+
+  def this(
+      year: Expression,
+      month: Expression,
+      day: Expression,
+      hour: Expression,
+      min: Expression,
+      sec: Expression) = this(year, month, day, hour, min, sec, None)
+
+  override def prettyName: String = "try_make_timestamp"
+
+  override def parameters: Seq[Expression] = Seq(
+    year, month, day, hour, min, sec)
+
+  override protected def withNewChildInternal(newChild: Expression): TryMakeTimestamp = {
+    copy(replacement = newChild)
+  }
+}
+
 object DatePart {
 
   def parseExtractField(
@@ -2974,8 +3189,8 @@ case class SubtractTimestamps(
     timeZoneId: Option[String] = None)
   extends BinaryExpression
   with TimeZoneAwareExpression
-  with ExpectsInputTypes
-  with NullIntolerant {
+  with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(endTimestamp: Expression, startTimestamp: Expression) =
     this(endTimestamp, startTimestamp, SQLConf.get.legacyIntervalEnabled)
@@ -3037,7 +3252,8 @@ case class SubtractDates(
     left: Expression,
     right: Expression,
     legacyInterval: Boolean)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(left: Expression, right: Expression) =
     this(left, right, SQLConf.get.legacyIntervalEnabled)
@@ -3106,7 +3322,8 @@ case class ConvertTimezone(
     sourceTz: Expression,
     targetTz: Expression,
     sourceTs: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(targetTz: Expression, sourceTs: Expression) =
     this(CurrentTimeZone(), targetTz, sourceTs)
@@ -3115,8 +3332,8 @@ case class ConvertTimezone(
   override def second: Expression = targetTz
   override def third: Expression = sourceTs
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation,
-    StringTypeAnyCollation, TimestampNTZType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation, StringTypeWithCollation, TimestampNTZType)
   override def dataType: DataType = TimestampNTZType
 
   override def nullSafeEval(srcTz: Any, tgtTz: Any, micros: Any): Any = {
@@ -3183,12 +3400,18 @@ case class TimestampAdd(
     timeZoneId: Option[String] = None)
   extends BinaryExpression
   with ImplicitCastInputTypes
-  with NullIntolerant
   with TimeZoneAwareExpression {
+  override def nullIntolerant: Boolean = true
 
   def this(unit: String, quantity: Expression, timestamp: Expression) =
     this(unit, quantity, timestamp, None)
 
+  def this(unit: Expression, quantity: Expression, timestamp: Expression) =
+    this(
+      TimeZoneAwareExpression.convertExpressionToUnit(unit, "timestamp_add"),
+      quantity,
+      timestamp)
+
   override def left: Expression = quantity
   override def right: Expression = timestamp
 
@@ -3266,11 +3489,17 @@ case class TimestampDiff(
     timeZoneId: Option[String] = None)
   extends BinaryExpression
   with ImplicitCastInputTypes
-  with NullIntolerant
   with TimeZoneAwareExpression {
+  override def nullIntolerant: Boolean = true
 
-  def this(unit: String, quantity: Expression, timestamp: Expression) =
-    this(unit, quantity, timestamp, None)
+  def this(unit: String, startTimestamp: Expression, endTimestamp: Expression) =
+    this(unit, startTimestamp, endTimestamp, None)
+
+  def this(unit: Expression, startTimestamp: Expression, endTimestamp: Expression) =
+    this(
+      TimeZoneAwareExpression.convertExpressionToUnit(unit, "timestamp_diff"),
+      startTimestamp,
+      endTimestamp)
 
   override def left: Expression = startTimestamp
   override def right: Expression = endTimestamp
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
index 5f13d397d1bf9..46ab43074409a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/decimalExpressions.scala
@@ -32,8 +32,8 @@ import org.apache.spark.sql.types._
  * Note: this expression is internal and created only by the optimizer,
  * we don't need to do type check for it.
  */
-case class UnscaledValue(child: Expression) extends UnaryExpression with NullIntolerant {
-
+case class UnscaledValue(child: Expression) extends UnaryExpression {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = LongType
   override def toString: String = s"UnscaledValue($child)"
 
@@ -57,7 +57,8 @@ case class MakeDecimal(
     child: Expression,
     precision: Int,
     scale: Int,
-    nullOnOverflow: Boolean) extends UnaryExpression with NullIntolerant {
+    nullOnOverflow: Boolean) extends UnaryExpression {
+  override def nullIntolerant: Boolean = true
 
   def this(child: Expression, precision: Int, scale: Int) = {
     this(child, precision, scale, !SQLConf.get.ansiEnabled)
@@ -166,7 +167,9 @@ case class CheckOverflowInSum(
     val value = child.eval(input)
     if (value == null) {
       if (nullOnOverflow) null
-      else throw QueryExecutionErrors.overflowInSumOfDecimalError(context)
+      else {
+        throw QueryExecutionErrors.overflowInSumOfDecimalError(context, suggestedFunc = "try_sum")
+      }
     } else {
       value.asInstanceOf[Decimal].toPrecision(
         dataType.precision,
@@ -183,7 +186,7 @@ case class CheckOverflowInSum(
     val nullHandling = if (nullOnOverflow) {
       ""
     } else {
-      s"throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode);"
+      s"""throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode, "try_sum");"""
     }
     // scalastyle:off line.size.limit
     val code = code"""
@@ -270,7 +273,8 @@ case class DecimalDivideWithOverflowCheck(
       if (nullOnOverflow)  {
         null
       } else {
-        throw QueryExecutionErrors.overflowInSumOfDecimalError(getContextOrNull())
+        throw QueryExecutionErrors.overflowInSumOfDecimalError(getContextOrNull(),
+          suggestedFunc = "try_avg")
       }
     } else {
       val value2 = right.eval(input)
@@ -286,7 +290,7 @@ case class DecimalDivideWithOverflowCheck(
     val nullHandling = if (nullOnOverflow) {
       ""
     } else {
-      s"throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode);"
+      s"""throw QueryExecutionErrors.overflowInSumOfDecimalError($errorContextCode, "try_avg");"""
     }
 
     val eval1 = left.genCode(ctx)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
index 2cc88a25f465d..b5ec59f8e8e8f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import scala.collection.mutable
+import scala.jdk.CollectionConverters.CollectionHasAsScala
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
@@ -28,7 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{GENERATOR, TreePattern}
-import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, MapData}
 import org.apache.spark.sql.catalyst.util.SQLKeywordUtils._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
@@ -444,9 +445,6 @@ trait ExplodeGeneratorBuilderBase extends GeneratorBuilder {
       > SELECT _FUNC_(collection => array(10, 20));
        10
        20
-      > SELECT * FROM _FUNC_(collection => array(10, 20));
-       10
-       20
   """,
   since = "1.0.0",
   group = "generator_funcs")
@@ -464,17 +462,14 @@ object ExplodeExpressionBuilder extends ExpressionBuilder {
   usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.",
   examples = """
     Examples:
-      > SELECT _FUNC_(array(10, 20));
-       10
-       20
-      > SELECT _FUNC_(collection => array(10, 20));
+      > SELECT * FROM _FUNC_(array(10, 20));
        10
        20
       > SELECT * FROM _FUNC_(collection => array(10, 20));
        10
        20
   """,
-  since = "1.0.0",
+  since = "3.4.0",
   group = "generator_funcs")
 // scalastyle:on line.size.limit
 object ExplodeGeneratorBuilder extends ExplodeGeneratorBuilderBase {
@@ -486,21 +481,20 @@ object ExplodeGeneratorBuilder extends ExplodeGeneratorBuilderBase {
   usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows, or the elements of map `expr` into multiple rows and columns. Unless specified otherwise, uses the default column name `col` for elements of the array or `key` and `value` for the elements of the map.",
   examples = """
     Examples:
-      > SELECT _FUNC_(array(10, 20));
+      > SELECT * FROM _FUNC_(array(10, 20));
        10
        20
-      > SELECT _FUNC_(collection => array(10, 20));
+      > SELECT * FROM _FUNC_(collection => array(10, 20));
        10
        20
   """,
-  since = "1.0.0",
+  since = "3.4.0",
   group = "generator_funcs")
 // scalastyle:on line.size.limit
 object ExplodeOuterGeneratorBuilder extends ExplodeGeneratorBuilderBase {
   override def isOuter: Boolean = true
 }
 
-
 /**
  * Given an input array produces a sequence of rows for each position and value in the array.
  *
@@ -510,6 +504,21 @@ object ExplodeOuterGeneratorBuilder extends ExplodeGeneratorBuilderBase {
  *   1  20
  * }}}
  */
+case class PosExplode(child: Expression) extends ExplodeBase {
+  override val position = true
+  override protected def withNewChildInternal(newChild: Expression): PosExplode =
+    copy(child = newChild)
+}
+
+trait PosExplodeGeneratorBuilderBase extends GeneratorBuilder {
+  override def functionSignature: Option[FunctionSignature] =
+    Some(FunctionSignature(Seq(InputParameter("collection"))))
+  override def buildGenerator(funcName: String, expressions: Seq[Expression]): Generator = {
+    assert(expressions.size == 1)
+    PosExplode(expressions(0))
+  }
+}
+
 // scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
   usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
@@ -518,34 +527,62 @@ object ExplodeOuterGeneratorBuilder extends ExplodeGeneratorBuilderBase {
       > SELECT _FUNC_(array(10,20));
        0	10
        1	20
-      > SELECT * FROM _FUNC_(array(10,20));
+      > SELECT _FUNC_(collection => array(10,20));
        0	10
        1	20
   """,
   since = "2.0.0",
   group = "generator_funcs")
 // scalastyle:on line.size.limit line.contains.tab
-case class PosExplode(child: Expression) extends ExplodeBase {
-  override val position = true
-  override protected def withNewChildInternal(newChild: Expression): PosExplode =
-    copy(child = newChild)
+object PosExplodeExpressionBuilder extends ExpressionBuilder {
+  override def functionSignature: Option[FunctionSignature] =
+    Some(FunctionSignature(Seq(InputParameter("collection"))))
+
+  override def build(funcName: String, expressions: Seq[Expression]) : Expression =
+    PosExplode(expressions(0))
 }
 
-/**
- * Explodes an array of structs into a table.
- */
 // scalastyle:off line.size.limit line.contains.tab
 @ExpressionDescription(
-  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
   examples = """
     Examples:
-      > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
-       1	a
-       2	b
+      > SELECT * FROM _FUNC_(array(10,20));
+       0	10
+       1	20
+      > SELECT * FROM _FUNC_(collection => array(10,20));
+       0	10
+       1	20
   """,
-  since = "2.0.0",
+  since = "3.5.0",
   group = "generator_funcs")
 // scalastyle:on line.size.limit line.contains.tab
+object PosExplodeGeneratorBuilder extends PosExplodeGeneratorBuilderBase {
+  override def isOuter: Boolean = false
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Separates the elements of array `expr` into multiple rows with positions, or the elements of map `expr` into multiple rows and columns with positions. Unless specified otherwise, uses the column name `pos` for position, `col` for elements of the array or `key` and `value` for elements of the map.",
+  examples = """
+    Examples:
+      > SELECT * FROM _FUNC_(array(10,20));
+       0	10
+       1	20
+      > SELECT * FROM _FUNC_(collection => array(10,20));
+       0	10
+       1	20
+  """,
+  since = "3.5.0",
+  group = "generator_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object PosExplodeOuterGeneratorBuilder extends PosExplodeGeneratorBuilderBase {
+  override def isOuter: Boolean = true
+}
+
+/**
+ * Explodes an array of structs into a table.
+ */
 case class Inline(child: Expression) extends UnaryExpression with CollectionGenerator {
   override val inline: Boolean = true
   override val position: Boolean = false
@@ -594,6 +631,76 @@ case class Inline(child: Expression) extends UnaryExpression with CollectionGene
   override protected def withNewChildInternal(newChild: Expression): Inline = copy(child = newChild)
 }
 
+trait InlineGeneratorBuilderBase extends GeneratorBuilder {
+  override def functionSignature: Option[FunctionSignature] =
+    Some(FunctionSignature(Seq(InputParameter("input"))))
+  override def buildGenerator(funcName: String, expressions: Seq[Expression]): Generator = {
+    assert(expressions.size == 1)
+    Inline(expressions(0))
+  }
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+      > SELECT _FUNC_(input => array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+  """,
+  since = "2.0.0",
+  group = "generator_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object InlineExpressionBuilder extends ExpressionBuilder {
+  override def functionSignature: Option[FunctionSignature] =
+    Some(FunctionSignature(Seq(InputParameter("input"))))
+
+  override def build(funcName: String, expressions: Seq[Expression]) : Expression =
+    Inline(expressions(0))
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
+  examples = """
+    Examples:
+      > SELECT * FROM _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+      > SELECT * FROM _FUNC_(input => array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+  """,
+  since = "3.4.0",
+  group = "generator_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object InlineGeneratorBuilder extends InlineGeneratorBuilderBase {
+  override def isOuter: Boolean = false
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Explodes an array of structs into a table. Uses column names col1, col2, etc. by default unless specified otherwise.",
+  examples = """
+    Examples:
+      > SELECT * FROM _FUNC_(array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+      > SELECT * FROM _FUNC_(input => array(struct(1, 'a'), struct(2, 'b')));
+       1	a
+       2	b
+  """,
+  since = "3.4.0",
+  group = "generator_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object InlineOuterGeneratorBuilder extends InlineGeneratorBuilderBase {
+  override def isOuter: Boolean = true
+}
+
 @ExpressionDescription(
   usage = """_FUNC_() - Get Spark SQL keywords""",
   examples = """
@@ -618,3 +725,70 @@ case class SQLKeywords() extends LeafExpression with Generator with CodegenFallb
 
   override def prettyName: String = "sql_keywords"
 }
+
+@ExpressionDescription(
+  usage = """_FUNC_() - Get all of the Spark SQL string collations""",
+  examples = """
+    Examples:
+      > SELECT * FROM _FUNC_() WHERE NAME = 'UTF8_BINARY';
+       SYSTEM  BUILTIN  UTF8_BINARY NULL  NULL  ACCENT_SENSITIVE  CASE_SENSITIVE  NO_PAD  NULL
+  """,
+  since = "4.0.0",
+  group = "generator_funcs")
+case class Collations() extends LeafExpression with Generator with CodegenFallback {
+  override def elementSchema: StructType = new StructType()
+    .add("CATALOG", StringType, nullable = false)
+    .add("SCHEMA", StringType, nullable = false)
+    .add("NAME", StringType, nullable = false)
+    .add("LANGUAGE", StringType)
+    .add("COUNTRY", StringType)
+    .add("ACCENT_SENSITIVITY", StringType, nullable = false)
+    .add("CASE_SENSITIVITY", StringType, nullable = false)
+    .add("PAD_ATTRIBUTE", StringType, nullable = false)
+    .add("ICU_VERSION", StringType)
+
+  override def eval(input: InternalRow): IterableOnce[InternalRow] = {
+    CollationFactory.listCollations().asScala.map(CollationFactory.loadCollationMeta).map { m =>
+      InternalRow(
+        UTF8String.fromString(m.catalog),
+        UTF8String.fromString(m.schema),
+        UTF8String.fromString(m.collationName),
+        UTF8String.fromString(m.language),
+        UTF8String.fromString(m.country),
+        UTF8String.fromString(
+          if (m.accentSensitivity) "ACCENT_SENSITIVE" else "ACCENT_INSENSITIVE"),
+        UTF8String.fromString(
+          if (m.caseSensitivity) "CASE_SENSITIVE" else "CASE_INSENSITIVE"),
+        UTF8String.fromString(m.padAttribute),
+        UTF8String.fromString(m.icuVersion))
+    }
+  }
+
+  override def prettyName: String = "collations"
+}
+
+@ExpressionDescription(
+  usage = """_FUNC_() - Get all variables""",
+  examples = """
+    Examples:
+      > SELECT * FROM _FUNC_() WHERE NAME = 'UTF8_BINARY';
+       SYSTEM  BUILTIN  UTF8_BINARY NULL  NULL  ACCENT_SENSITIVE  CASE_SENSITIVE  NO_PAD  NULL
+  """,
+  since = "4.0.0",
+  group = "generator_funcs")
+case class Variables() extends LeafExpression with Generator with CodegenFallback {
+
+  override def elementSchema: StructType = new StructType()
+    .add()
+
+  AttributeReference("name", StringType, nullable = false)(),
+  AttributeReference("data_type", StringType, nullable = false)(),
+  AttributeReference("default_value_sql", StringType, nullable = false)(),
+  AttributeReference("value", StringType, nullable = false)())
+
+  override def eval(input: InternalRow): IterableOnce[InternalRow] = {
+
+  }
+
+  override def prettyName: String = "variables"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
index fa342f6415097..79879dc0edb4c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/hash.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.{ArrayData, MapData}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, MapData}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
@@ -61,7 +61,8 @@ import org.apache.spark.util.ArrayImplicits._
   since = "1.5.0",
   group = "hash_funcs")
 case class Md5(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = SQLConf.get.defaultStringType
 
@@ -101,7 +102,8 @@ case class Md5(child: Expression)
   group = "hash_funcs")
 // scalastyle:on line.size.limit
 case class Sha2(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
@@ -167,7 +169,8 @@ case class Sha2(left: Expression, right: Expression)
   since = "1.5.0",
   group = "hash_funcs")
 case class Sha1(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = SQLConf.get.defaultStringType
 
@@ -199,7 +202,8 @@ case class Sha1(child: Expression)
   since = "1.5.0",
   group = "hash_funcs")
 case class Crc32(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = LongType
 
@@ -415,7 +419,7 @@ abstract class HashExpression[E] extends Expression {
 
   protected def genHashString(
       ctx: CodegenContext, stringType: StringType, input: String, result: String): String = {
-    if (stringType.supportsBinaryEquality) {
+    if (stringType.supportsBinaryEquality && !stringType.usesTrimCollation) {
       val baseObject = s"$input.getBaseObject()"
       val baseOffset = s"$input.getBaseOffset()"
       val numBytes = s"$input.numBytes()"
@@ -565,7 +569,15 @@ abstract class InterpretedHashFunction {
       case a: Array[Byte] =>
         hashUnsafeBytes(a, Platform.BYTE_ARRAY_OFFSET, a.length, seed)
       case s: UTF8String =>
-        hashUnsafeBytes(s.getBaseObject, s.getBaseOffset, s.numBytes(), seed)
+        val st = dataType.asInstanceOf[StringType]
+        if (st.supportsBinaryEquality && !st.usesTrimCollation) {
+          hashUnsafeBytes(s.getBaseObject, s.getBaseOffset, s.numBytes(), seed)
+        } else {
+          val stringHash = CollationFactory
+            .fetchCollation(st.collationId)
+            .hashFunction.applyAsLong(s)
+          hashLong(stringHash, seed)
+        }
 
       case array: ArrayData =>
         val elementType = dataType match {
@@ -809,7 +821,7 @@ case class HiveHash(children: Seq[Expression]) extends HashExpression[Int] {
 
   override protected def genHashString(
       ctx: CodegenContext, stringType: StringType, input: String, result: String): String = {
-    if (stringType.supportsBinaryEquality) {
+    if (stringType.supportsBinaryEquality && !stringType.usesTrimCollation) {
       val baseObject = s"$input.getBaseObject()"
       val baseOffset = s"$input.getBaseOffset()"
       val numBytes = s"$input.numBytes()"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
index 80bcf156133ed..7635690a4605d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
@@ -1124,7 +1124,7 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
 
   private def getKeysWithIndexesFast(keys1: ArrayData, keys2: ArrayData) = {
     val hashMap = new mutable.LinkedHashMap[Any, Array[Option[Int]]]
-    for((z, array) <- Array((0, keys1), (1, keys2))) {
+    for ((z, array) <- Array((0, keys1), (1, keys2))) {
       var i = 0
       while (i < array.numElements()) {
         val key = array.get(i, keyType)
@@ -1146,7 +1146,7 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
 
   private def getKeysWithIndexesBruteForce(keys1: ArrayData, keys2: ArrayData) = {
     val arrayBuffer = new mutable.ArrayBuffer[(Any, Array[Option[Int]])]
-    for((z, array) <- Array((0, keys1), (1, keys2))) {
+    for ((z, array) <- Array((0, keys1), (1, keys2))) {
       var i = 0
       while (i < array.numElements()) {
         val key = array.get(i, keyType)
@@ -1156,7 +1156,7 @@ case class MapZipWith(left: Expression, right: Expression, function: Expression)
           val (bufferKey, indexes) = arrayBuffer(j)
           if (ordering.equiv(bufferKey, key)) {
             found = true
-            if(indexes(z).isEmpty) {
+            if (indexes(z).isEmpty) {
               indexes(z) = Some(i)
             }
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
index 13676733a9bad..653ee9f836edd 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/intervalExpressions.scala
@@ -24,7 +24,6 @@ import com.google.common.math.{DoubleMath, IntMath, LongMath}
 
 import org.apache.spark.QueryContext
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode}
-import org.apache.spark.sql.catalyst.util.DateTimeConstants.MONTHS_PER_YEAR
 import org.apache.spark.sql.catalyst.util.IntervalUtils
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
@@ -37,7 +36,9 @@ import org.apache.spark.unsafe.types.CalendarInterval
 abstract class ExtractIntervalPart[T](
     val dataType: DataType,
     func: T => Any,
-    funcName: String) extends UnaryExpression with NullIntolerant with Serializable {
+    funcName: String) extends UnaryExpression with Serializable {
+  override def nullIntolerant: Boolean = true
+
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val iu = IntervalUtils.getClass.getName.stripSuffix("$")
     defineCodeGen(ctx, ev, c => s"$iu.$funcName($c)")
@@ -168,7 +169,9 @@ object ExtractIntervalPart {
 abstract class IntervalNumOperation(
     interval: Expression,
     num: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
+
   override def left: Expression = interval
   override def right: Expression = num
 
@@ -224,6 +227,89 @@ case class DivideInterval(
     copy(interval = newLeft, num = newRight)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_([years[, months[, weeks[, days[, hours[, mins[, secs]]]]]]]) - This is a special version of `make_interval` that performs the same operation, but returns NULL when an overflow occurs.",
+  arguments = """
+    Arguments:
+      * years - the number of years, positive or negative
+      * months - the number of months, positive or negative
+      * weeks - the number of weeks, positive or negative
+      * days - the number of days, positive or negative
+      * hours - the number of hours, positive or negative
+      * mins - the number of minutes, positive or negative
+      * secs - the number of seconds with the fractional part in microsecond precision.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(100, 11, 1, 1, 12, 30, 01.001001);
+       100 years 11 months 8 days 12 hours 30 minutes 1.001001 seconds
+      > SELECT _FUNC_(100, null, 3);
+       NULL
+      > SELECT _FUNC_(0, 1, 0, 1, 0, 0, 100.000001);
+       1 months 1 days 1 minutes 40.000001 seconds
+      > SELECT _FUNC_(2147483647);
+       NULL
+  """,
+  since = "4.0.0",
+  group = "datetime_funcs")
+// scalastyle:on line.size.limit
+case class TryMakeInterval(
+    years: Expression,
+    months: Expression,
+    weeks: Expression,
+    days: Expression,
+    hours: Expression,
+    mins: Expression,
+    secs: Expression,
+    replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+
+  def this(
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression,
+      mins: Expression,
+      secs: Expression) =
+    this(years, months, weeks, days, hours, mins, secs,
+      MakeInterval(years, months, weeks, days, hours, mins, secs, failOnError = false))
+
+  def this(
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression,
+      mins: Expression) =
+    this(years, months, weeks, days, hours, mins, Literal(Decimal(0, Decimal.MAX_LONG_DIGITS, 6)))
+
+  def this(
+      years: Expression,
+      months: Expression,
+      weeks: Expression,
+      days: Expression,
+      hours: Expression) = {
+    this(years, months, weeks, days, hours, Literal(0))
+  }
+  def this(years: Expression, months: Expression, weeks: Expression, days: Expression) =
+    this(years, months, weeks, days, Literal(0))
+  def this(years: Expression, months: Expression, weeks: Expression) =
+    this(years, months, weeks, Literal(0))
+  def this(years: Expression, months: Expression) = this(years, months, Literal(0))
+  def this(years: Expression) = this(years, Literal(0))
+  // We do not support this() in try version of the function, as it will never return overflow
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
+
+  override def parameters: Seq[Expression] = Seq(years, months, weeks, days, hours, mins, secs)
+
+  override def prettyName: String = "try_make_interval"
+}
+
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = "_FUNC_([years[, months[, weeks[, days[, hours[, mins[, secs]]]]]]]) - Make interval from years, months, weeks, days, hours, mins and secs.",
@@ -258,7 +344,8 @@ case class MakeInterval(
     mins: Expression,
     secs: Expression,
     failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends SeptenaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends SeptenaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(
       years: Expression,
@@ -336,7 +423,7 @@ case class MakeInterval(
       val iu = IntervalUtils.getClass.getName.stripSuffix("$")
       val secFrac = sec.getOrElse("0")
       val failOnErrorBranch = if (failOnError) {
-        "throw QueryExecutionErrors.arithmeticOverflowError(e);"
+        """throw QueryExecutionErrors.arithmeticOverflowError(e.getMessage(), "", null);"""
       } else {
         s"${ev.isNull} = true;"
       }
@@ -393,7 +480,8 @@ case class MakeDTInterval(
     hours: Expression,
     mins: Expression,
     secs: Expression)
-  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends QuaternaryExpression with ImplicitCastInputTypes with SupportQueryContext {
+  override def nullIntolerant: Boolean = true
 
   def this(
       days: Expression,
@@ -425,13 +513,15 @@ case class MakeDTInterval(
       day.asInstanceOf[Int],
       hour.asInstanceOf[Int],
       min.asInstanceOf[Int],
-      sec.asInstanceOf[Decimal])
+      sec.asInstanceOf[Decimal],
+      origin.context)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, (day, hour, min, sec) => {
+      val errorContext = getContextOrNullCode(ctx)
       val iu = IntervalUtils.getClass.getName.stripSuffix("$")
-      s"$iu.makeDayTimeInterval($day, $hour, $min, $sec)"
+      s"$iu.makeDayTimeInterval($day, $hour, $min, $sec, $errorContext)"
     })
   }
 
@@ -443,6 +533,8 @@ case class MakeDTInterval(
       mins: Expression,
       secs: Expression): MakeDTInterval =
     copy(days, hours, mins, secs)
+
+  override def initQueryContext(): Option[QueryContext] = Some(origin.context)
 }
 
 @ExpressionDescription(
@@ -467,7 +559,8 @@ case class MakeDTInterval(
   group = "datetime_funcs")
 // scalastyle:on line.size.limit
 case class MakeYMInterval(years: Expression, months: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable with SupportQueryContext {
+  override def nullIntolerant: Boolean = true
 
   def this(years: Expression) = this(years, Literal(0))
   def this() = this(Literal(0))
@@ -478,17 +571,14 @@ case class MakeYMInterval(years: Expression, months: Expression)
   override def dataType: DataType = YearMonthIntervalType()
 
   override def nullSafeEval(year: Any, month: Any): Any = {
-    Math.toIntExact(Math.addExact(month.asInstanceOf[Number].longValue(),
-      Math.multiplyExact(year.asInstanceOf[Number].longValue(), MONTHS_PER_YEAR)))
+    makeYearMonthInterval(year.asInstanceOf[Int], month.asInstanceOf[Int], origin.context)
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, (years, months) => {
-      val math = classOf[Math].getName.stripSuffix("$")
-      s"""
-         |$math.toIntExact(java.lang.Math.addExact($months,
-         |  $math.multiplyExact($years, $MONTHS_PER_YEAR)))
-         |""".stripMargin
+      val errorContext = getContextOrNullCode(ctx)
+      val iu = IntervalUtils.getClass.getName.stripSuffix("$")
+      s"$iu.makeYearMonthInterval($years, $months, $errorContext)"
     })
   }
 
@@ -497,13 +587,18 @@ case class MakeYMInterval(years: Expression, months: Expression)
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): Expression =
     copy(years = newLeft, months = newRight)
+
+  override def initQueryContext(): Option[QueryContext] = {
+    Some(origin.context)
+  }
 }
 
 // Multiply an year-month interval by a numeric
 case class MultiplyYMInterval(
     interval: Expression,
     num: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = interval
   override def right: Expression = num
 
@@ -555,7 +650,8 @@ case class MultiplyYMInterval(
 case class MultiplyDTInterval(
     interval: Expression,
     num: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = interval
   override def right: Expression = num
 
@@ -607,8 +703,8 @@ trait IntervalDivide {
       context: QueryContext): Unit = {
     if (value == minValue && num.dataType.isInstanceOf[IntegralType]) {
       if (numValue.asInstanceOf[Number].longValue() == -1) {
-        throw QueryExecutionErrors.intervalArithmeticOverflowError(
-          "Interval value overflows after being divided by -1", "try_divide", context)
+        throw QueryExecutionErrors.withSuggestionIntervalArithmeticOverflowError(
+          "try_divide", context)
       }
     }
   }
@@ -641,8 +737,8 @@ trait IntervalDivide {
 case class DivideYMInterval(
     interval: Expression,
     num: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with IntervalDivide
-    with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with IntervalDivide with Serializable {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = interval
   override def right: Expression = num
 
@@ -723,8 +819,9 @@ case class DivideYMInterval(
 case class DivideDTInterval(
     interval: Expression,
     num: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with IntervalDivide
-    with NullIntolerant with Serializable {
+  extends BinaryExpression with ImplicitCastInputTypes with IntervalDivide with Serializable {
+  override def nullIntolerant: Boolean = true
+
   override def left: Expression = interval
   override def right: Expression = num
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala
new file mode 100644
index 0000000000000..7a399aba4382c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/json/JsonExpressionEvalUtils.scala
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.json
+
+import java.io.CharArrayWriter
+
+import com.fasterxml.jackson.core.JsonFactory
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.ExprUtils
+import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
+import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonGenerator, JacksonParser, JsonInferSchema, JSONOptions}
+import org.apache.spark.sql.catalyst.util.{ArrayData, FailFastMode, FailureSafeParser, MapData, PermissiveMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType, VariantType}
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
+import org.apache.spark.util.Utils
+
+object JsonExpressionEvalUtils {
+
+  def schemaOfJson(
+      jsonFactory: JsonFactory,
+      jsonOptions: JSONOptions,
+      jsonInferSchema: JsonInferSchema,
+      json: UTF8String): UTF8String = {
+    val dt = Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
+      parser.nextToken()
+      // To match with schema inference from JSON datasource.
+      jsonInferSchema.inferField(parser) match {
+        case st: StructType =>
+          jsonInferSchema.canonicalizeType(st, jsonOptions).getOrElse(StructType(Nil))
+        case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
+          jsonInferSchema
+            .canonicalizeType(at.elementType, jsonOptions)
+            .map(ArrayType(_, containsNull = at.containsNull))
+            .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
+        case other: DataType =>
+          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(
+            SQLConf.get.defaultStringType)
+      }
+    }
+
+    UTF8String.fromString(dt.sql)
+  }
+}
+
+case class JsonToStructsEvaluator(
+    options: Map[String, String],
+    nullableSchema: DataType,
+    nameOfCorruptRecord: String,
+    timeZoneId: Option[String],
+    variantAllowDuplicateKeys: Boolean) {
+
+  // This converts parsed rows to the desired output by the given schema.
+  @transient
+  private lazy val converter = nullableSchema match {
+    case _: StructType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
+    case _: ArrayType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getArray(0) else null
+    case _: MapType =>
+      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getMap(0) else null
+  }
+
+  @transient
+  private lazy val parser = {
+    val parsedOptions = new JSONOptions(options, timeZoneId.get, nameOfCorruptRecord)
+    val mode = parsedOptions.parseMode
+    if (mode != PermissiveMode && mode != FailFastMode) {
+      throw QueryCompilationErrors.parseModeUnsupportedError("from_json", mode)
+    }
+    val (parserSchema, actualSchema) = nullableSchema match {
+      case s: StructType =>
+        ExprUtils.verifyColumnNameOfCorruptRecord(s, parsedOptions.columnNameOfCorruptRecord)
+        (s, StructType(s.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)))
+      case other =>
+        (StructType(Array(StructField("value", other))), other)
+    }
+
+    val rawParser = new JacksonParser(actualSchema, parsedOptions, allowArrayAsStructs = false)
+    val createParser = CreateJacksonParser.utf8String _
+
+    new FailureSafeParser[UTF8String](
+      input => rawParser.parse(input, createParser, identity[UTF8String]),
+      mode,
+      parserSchema,
+      parsedOptions.columnNameOfCorruptRecord)
+  }
+
+  final def evaluate(json: UTF8String): Any = {
+    if (json == null) return null
+    nullableSchema match {
+      case _: VariantType =>
+        VariantExpressionEvalUtils.parseJson(json,
+          allowDuplicateKeys = variantAllowDuplicateKeys)
+      case _ =>
+        converter(parser.parse(json))
+    }
+  }
+}
+
+case class StructsToJsonEvaluator(
+    options: Map[String, String],
+    inputSchema: DataType,
+    timeZoneId: Option[String]) {
+
+  @transient
+  private lazy val writer = new CharArrayWriter()
+
+  @transient
+  private lazy val gen = new JacksonGenerator(
+    inputSchema, writer, new JSONOptions(options, timeZoneId.get))
+
+  // This converts rows to the JSON output according to the given schema.
+  @transient
+  private lazy val converter: Any => UTF8String = {
+    def getAndReset(): UTF8String = {
+      gen.flush()
+      val json = writer.toString
+      writer.reset()
+      UTF8String.fromString(json)
+    }
+
+    inputSchema match {
+      case _: StructType =>
+        (row: Any) =>
+          gen.write(row.asInstanceOf[InternalRow])
+          getAndReset()
+      case _: ArrayType =>
+        (arr: Any) =>
+          gen.write(arr.asInstanceOf[ArrayData])
+          getAndReset()
+      case _: MapType =>
+        (map: Any) =>
+          gen.write(map.asInstanceOf[MapData])
+          getAndReset()
+      case _: VariantType =>
+        (v: Any) =>
+          gen.write(v.asInstanceOf[VariantVal])
+          getAndReset()
+    }
+  }
+
+  final def evaluate(value: Any): Any = {
+    converter(value)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
index 7005d663a3f96..68cce1c2a138b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/jsonExpressions.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.io._
 
-import scala.collection.mutable.ArrayBuffer
 import scala.util.parsing.combinator.RegexParsers
 
 import com.fasterxml.jackson.core._
@@ -31,15 +30,15 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, CodegenFallback, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
-import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
+import org.apache.spark.sql.catalyst.expressions.json.{JsonExpressionEvalUtils, JsonExpressionUtils, JsonToStructsEvaluator, StructsToJsonEvaluator}
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
 import org.apache.spark.sql.catalyst.json._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, TreePattern}
-import org.apache.spark.sql.catalyst.util._
+import org.apache.spark.sql.catalyst.trees.TreePattern.{JSON_TO_STRUCT, RUNTIME_REPLACEABLE, TreePattern}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
 private[this] sealed trait PathInstruction
@@ -134,7 +133,7 @@ case class GetJsonObject(json: Expression, path: Expression)
   override def left: Expression = json
   override def right: Expression = path
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(StringTypeWithCollation, StringTypeWithCollation)
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
   override def prettyName: String = "get_json_object"
@@ -489,7 +488,9 @@ case class JsonTuple(children: Seq[Expression])
       throw QueryCompilationErrors.wrongNumArgsError(
         toSQLId(prettyName), Seq("> 1"), children.length
       )
-    } else if (children.forall(child => StringTypeAnyCollation.acceptsType(child.dataType))) {
+    } else if (
+      children.forall(
+        child => StringTypeWithCollation.acceptsType(child.dataType))) {
       TypeCheckResult.TypeCheckSuccess
     } else {
       DataTypeMismatch(
@@ -632,23 +633,24 @@ case class JsonToStructs(
     schema: DataType,
     options: Map[String, String],
     child: Expression,
-    timeZoneId: Option[String] = None)
+    timeZoneId: Option[String] = None,
+    variantAllowDuplicateKeys: Boolean = SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS))
   extends UnaryExpression
   with TimeZoneAwareExpression
-  with CodegenFallback
   with ExpectsInputTypes
-  with NullIntolerant
   with QueryErrorsBase {
 
   // The JSON input data might be missing certain fields. We force the nullability
   // of the user-provided schema to avoid data corruptions. In particular, the parquet-mr encoder
   // can generate incorrect files if values are missing in columns declared as non-nullable.
-  val nullableSchema = schema.asNullable
+  private val nullableSchema: DataType = schema.asNullable
 
   override def nullable: Boolean = true
 
   final override def nodePatternsInternal(): Seq[TreePattern] = Seq(JSON_TO_STRUCT)
 
+  override def nullIntolerant: Boolean = true
+
   // Used in `FunctionRegistry`
   def this(child: Expression, schema: Expression, options: Map[String, String]) =
     this(
@@ -676,55 +678,38 @@ case class JsonToStructs(
         messageParameters = Map("schema" -> toSQLType(nullableSchema)))
   }
 
-  // This converts parsed rows to the desired output by the given schema.
-  @transient
-  lazy val converter = nullableSchema match {
-    case _: StructType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next() else null
-    case _: ArrayType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getArray(0) else null
-    case _: MapType =>
-      (rows: Iterator[InternalRow]) => if (rows.hasNext) rows.next().getMap(0) else null
-  }
-
-  val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
-  @transient lazy val parser = {
-    val parsedOptions = new JSONOptions(options, timeZoneId.get, nameOfCorruptRecord)
-    val mode = parsedOptions.parseMode
-    if (mode != PermissiveMode && mode != FailFastMode) {
-      throw QueryCompilationErrors.parseModeUnsupportedError("from_json", mode)
-    }
-    val (parserSchema, actualSchema) = nullableSchema match {
-      case s: StructType =>
-        ExprUtils.verifyColumnNameOfCorruptRecord(s, parsedOptions.columnNameOfCorruptRecord)
-        (s, StructType(s.filterNot(_.name == parsedOptions.columnNameOfCorruptRecord)))
-      case other =>
-        (StructType(Array(StructField("value", other))), other)
-    }
-
-    val rawParser = new JacksonParser(actualSchema, parsedOptions, allowArrayAsStructs = false)
-    val createParser = CreateJacksonParser.utf8String _
-
-    new FailureSafeParser[UTF8String](
-      input => rawParser.parse(input, createParser, identity[UTF8String]),
-      mode,
-      parserSchema,
-      parsedOptions.columnNameOfCorruptRecord)
-  }
-
   override def dataType: DataType = nullableSchema
 
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def nullSafeEval(json: Any): Any = nullableSchema match {
-    case _: VariantType =>
-      VariantExpressionEvalUtils.parseJson(json.asInstanceOf[UTF8String])
-    case _ =>
-      converter(parser.parse(json.asInstanceOf[UTF8String]))
+  @transient
+  private val nameOfCorruptRecord = SQLConf.get.getConf(SQLConf.COLUMN_NAME_OF_CORRUPT_RECORD)
+
+  @transient
+  private lazy val evaluator = new JsonToStructsEvaluator(
+    options, nullableSchema, nameOfCorruptRecord, timeZoneId, variantAllowDuplicateKeys)
+
+  override def nullSafeEval(json: Any): Any = evaluator.evaluate(json.asInstanceOf[UTF8String])
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val refEvaluator = ctx.addReferenceObj("evaluator", evaluator)
+    val eval = child.genCode(ctx)
+    val resultType = CodeGenerator.boxedType(dataType)
+    val resultTerm = ctx.freshName("result")
+    ev.copy(code =
+      code"""
+         |${eval.code}
+         |$resultType $resultTerm = ($resultType) $refEvaluator.evaluate(${eval.value});
+         |boolean ${ev.isNull} = $resultTerm == null;
+         |${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
+         |if (!${ev.isNull}) {
+         |  ${ev.value} = $resultTerm;
+         |}
+         |""".stripMargin)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
 
   override def sql: String = schema match {
     case _: MapType => "entries"
@@ -737,6 +722,12 @@ case class JsonToStructs(
     copy(child = newChild)
 }
 
+object JsonToStructs {
+  def unapply(
+      j: JsonToStructs): Option[(DataType, Map[String, String], Expression, Option[String])] =
+    Some((j.schema, j.options, j.child, j.timeZoneId))
+}
+
 /**
  * Converts a [[StructType]], [[ArrayType]] or [[MapType]] to a JSON output string.
  */
@@ -768,14 +759,15 @@ case class StructsToJson(
     child: Expression,
     timeZoneId: Option[String] = None)
   extends UnaryExpression
-  with TimeZoneAwareExpression
-  with CodegenFallback
+  with RuntimeReplaceable
   with ExpectsInputTypes
-  with NullIntolerant
+  with TimeZoneAwareExpression
   with QueryErrorsBase {
 
   override def nullable: Boolean = true
 
+  override def nodePatternsInternal(): Seq[TreePattern] = Seq(RUNTIME_REPLACEABLE)
+
   def this(options: Map[String, String], child: Expression) = this(options, child, None)
 
   // Used in `FunctionRegistry`
@@ -787,44 +779,7 @@ case class StructsToJson(
       timeZoneId = None)
 
   @transient
-  lazy val writer = new CharArrayWriter()
-
-  @transient
-  lazy val gen = new JacksonGenerator(
-    inputSchema, writer, new JSONOptions(options, timeZoneId.get))
-
-  @transient
-  lazy val inputSchema = child.dataType
-
-  // This converts rows to the JSON output according to the given schema.
-  @transient
-  lazy val converter: Any => UTF8String = {
-    def getAndReset(): UTF8String = {
-      gen.flush()
-      val json = writer.toString
-      writer.reset()
-      UTF8String.fromString(json)
-    }
-
-    inputSchema match {
-      case _: StructType =>
-        (row: Any) =>
-          gen.write(row.asInstanceOf[InternalRow])
-          getAndReset()
-      case _: ArrayType =>
-        (arr: Any) =>
-          gen.write(arr.asInstanceOf[ArrayData])
-          getAndReset()
-      case _: MapType =>
-        (map: Any) =>
-          gen.write(map.asInstanceOf[MapData])
-          getAndReset()
-      case _: VariantType =>
-        (v: Any) =>
-          gen.write(v.asInstanceOf[VariantVal])
-          getAndReset()
-    }
-  }
+  private lazy val inputSchema = child.dataType
 
   override def dataType: DataType = SQLConf.get.defaultStringType
 
@@ -840,14 +795,23 @@ case class StructsToJson(
   override def withTimeZone(timeZoneId: String): TimeZoneAwareExpression =
     copy(timeZoneId = Option(timeZoneId))
 
-  override def nullSafeEval(value: Any): Any = converter(value)
-
   override def inputTypes: Seq[AbstractDataType] = TypeCollection(ArrayType, StructType) :: Nil
 
   override def prettyName: String = "to_json"
 
   override protected def withNewChildInternal(newChild: Expression): StructsToJson =
     copy(child = newChild)
+
+  @transient
+  private lazy val evaluator = StructsToJsonEvaluator(options, inputSchema, timeZoneId)
+
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[StructsToJsonEvaluator])),
+    "evaluate",
+    dataType,
+    Seq(child),
+    Seq(child.dataType)
+  )
 }
 
 /**
@@ -867,7 +831,9 @@ case class StructsToJson(
 case class SchemaOfJson(
     child: Expression,
     options: Map[String, String])
-  extends UnaryExpression with CodegenFallback with QueryErrorsBase {
+  extends UnaryExpression
+  with RuntimeReplaceable
+  with QueryErrorsBase {
 
   def this(child: Expression) = this(child, Map.empty[String, String])
 
@@ -908,26 +874,20 @@ case class SchemaOfJson(
     }
   }
 
-  override def eval(v: InternalRow): Any = {
-    val dt = Utils.tryWithResource(CreateJacksonParser.utf8String(jsonFactory, json)) { parser =>
-      parser.nextToken()
-      // To match with schema inference from JSON datasource.
-      jsonInferSchema.inferField(parser) match {
-        case st: StructType =>
-          jsonInferSchema.canonicalizeType(st, jsonOptions).getOrElse(StructType(Nil))
-        case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
-          jsonInferSchema
-            .canonicalizeType(at.elementType, jsonOptions)
-            .map(ArrayType(_, containsNull = at.containsNull))
-            .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
-        case other: DataType =>
-          jsonInferSchema.canonicalizeType(other, jsonOptions).getOrElse(
-            SQLConf.get.defaultStringType)
-      }
-    }
-
-    UTF8String.fromString(dt.sql)
-  }
+  @transient private lazy val jsonFactoryObjectType = ObjectType(classOf[JsonFactory])
+  @transient private lazy val jsonOptionsObjectType = ObjectType(classOf[JSONOptions])
+  @transient private lazy val jsonInferSchemaObjectType = ObjectType(classOf[JsonInferSchema])
+
+  override def replacement: Expression = StaticInvoke(
+    JsonExpressionEvalUtils.getClass,
+    dataType,
+    "schemaOfJson",
+    Seq(Literal(jsonFactory, jsonFactoryObjectType),
+      Literal(jsonOptions, jsonOptionsObjectType),
+      Literal(jsonInferSchema, jsonInferSchemaObjectType),
+      child),
+    Seq(jsonFactoryObjectType, jsonOptionsObjectType, jsonInferSchemaObjectType, child.dataType),
+    returnNullable = false)
 
   override def prettyName: String = "schema_of_json"
 
@@ -957,54 +917,26 @@ case class SchemaOfJson(
   group = "json_funcs",
   since = "3.1.0"
 )
-case class LengthOfJsonArray(child: Expression) extends UnaryExpression
-  with CodegenFallback with ExpectsInputTypes {
+case class LengthOfJsonArray(child: Expression)
+  extends UnaryExpression
+  with ExpectsInputTypes
+  with RuntimeReplaceable {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
   override def dataType: DataType = IntegerType
   override def nullable: Boolean = true
   override def prettyName: String = "json_array_length"
 
-  override def eval(input: InternalRow): Any = {
-    val json = child.eval(input).asInstanceOf[UTF8String]
-    // return null for null input
-    if (json == null) {
-      return null
-    }
-
-    try {
-      Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) {
-        parser => {
-          // return null if null array is encountered.
-          if (parser.nextToken() == null) {
-            return null
-          }
-          // Parse the array to compute its length.
-          parseCounter(parser, input)
-        }
-      }
-    } catch {
-      case _: JsonProcessingException | _: IOException => null
-    }
-  }
-
-  private def parseCounter(parser: JsonParser, input: InternalRow): Any = {
-    var length = 0
-    // Only JSON array are supported for this function.
-    if (parser.currentToken != JsonToken.START_ARRAY) {
-      return null
-    }
-    // Keep traversing until the end of JSON array
-    while(parser.nextToken() != JsonToken.END_ARRAY) {
-      length += 1
-      // skip all the child of inner object or array
-      parser.skipChildren()
-    }
-    length
-  }
-
   override protected def withNewChildInternal(newChild: Expression): LengthOfJsonArray =
     copy(child = newChild)
+
+  override def replacement: Expression = StaticInvoke(
+    classOf[JsonExpressionUtils],
+    dataType,
+    "lengthOfJsonArray",
+    Seq(child),
+    inputTypes
+  )
 }
 
 /**
@@ -1030,50 +962,23 @@ case class LengthOfJsonArray(child: Expression) extends UnaryExpression
   group = "json_funcs",
   since = "3.1.0"
 )
-case class JsonObjectKeys(child: Expression) extends UnaryExpression with CodegenFallback
-  with ExpectsInputTypes {
+case class JsonObjectKeys(child: Expression)
+  extends UnaryExpression
+  with ExpectsInputTypes
+  with RuntimeReplaceable {
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
   override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType)
   override def nullable: Boolean = true
   override def prettyName: String = "json_object_keys"
 
-  override def eval(input: InternalRow): Any = {
-    val json = child.eval(input).asInstanceOf[UTF8String]
-    // return null for `NULL` input
-    if(json == null) {
-      return null
-    }
-
-    try {
-      Utils.tryWithResource(CreateJacksonParser.utf8String(SharedFactory.jsonFactory, json)) {
-        parser => {
-          // return null if an empty string or any other valid JSON string is encountered
-          if (parser.nextToken() == null || parser.currentToken() != JsonToken.START_OBJECT) {
-            return null
-          }
-          // Parse the JSON string to get all the keys of outermost JSON object
-          getJsonKeys(parser, input)
-        }
-      }
-    } catch {
-      case _: JsonProcessingException | _: IOException => null
-    }
-  }
-
-  private def getJsonKeys(parser: JsonParser, input: InternalRow): GenericArrayData = {
-    val arrayBufferOfKeys = ArrayBuffer.empty[UTF8String]
-
-    // traverse until the end of input and ensure it returns valid key
-    while(parser.nextValue() != null && parser.currentName() != null) {
-      // add current fieldName to the ArrayBuffer
-      arrayBufferOfKeys += UTF8String.fromString(parser.currentName)
-
-      // skip all the children of inner object or array
-      parser.skipChildren()
-    }
-    new GenericArrayData(arrayBufferOfKeys.toArray)
-  }
+  override def replacement: Expression = StaticInvoke(
+    classOf[JsonExpressionUtils],
+    dataType,
+    "jsonObjectKeys",
+    Seq(child),
+    inputTypes
+  )
 
   override protected def withNewChildInternal(newChild: Expression): JsonObjectKeys =
     copy(child = newChild)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
index c11357352c79a..7be6df14194fc 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/maskExpressions.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types.{AbstractDataType, DataType}
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -192,8 +192,12 @@ case class Mask(
    *      NumericType, IntegralType, FractionalType.
    */
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation,
-      StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation,
+      StringTypeWithCollation,
+      StringTypeWithCollation,
+      StringTypeWithCollation,
+      StringTypeWithCollation)
 
   override def nullable: Boolean = true
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
index 00274a16b888b..30f07dcc1e67e 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/mathExpressions.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.util.{MathUtils, NumberConverter, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -63,7 +63,8 @@ abstract class LeafMathExpression(c: Double, name: String)
  * @param name The short name of the function
  */
 abstract class UnaryMathExpression(val f: Double => Double, name: String)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+    extends UnaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(DoubleType)
   override def dataType: DataType = DoubleType
@@ -117,7 +118,8 @@ abstract class UnaryLogExpression(f: Double => Double, name: String)
  * @param name The short name of the function
  */
 abstract class BinaryMathExpression(f: (Double, Double) => Double, name: String)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
+    extends BinaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[DataType] = Seq(DoubleType, DoubleType)
 
@@ -443,8 +445,8 @@ case class Conv(
     ansiEnabled: Boolean = SQLConf.get.ansiEnabled)
   extends TernaryExpression
     with ImplicitCastInputTypes
-    with NullIntolerant
     with SupportQueryContext {
+  override def nullIntolerant: Boolean = true
 
   def this(numExpr: Expression, fromBaseExpr: Expression, toBaseExpr: Expression) =
     this(numExpr, fromBaseExpr, toBaseExpr, ansiEnabled = SQLConf.get.ansiEnabled)
@@ -453,7 +455,7 @@ case class Conv(
   override def second: Expression = fromBaseExpr
   override def third: Expression = toBaseExpr
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, IntegerType, IntegerType)
+    Seq(StringTypeWithCollation, IntegerType, IntegerType)
   override def dataType: DataType = first.dataType
   override def nullable: Boolean = true
 
@@ -629,7 +631,8 @@ object Factorial {
   since = "1.5.0",
   group = "math_funcs")
 case class Factorial(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[DataType] = Seq(IntegerType)
 
@@ -1002,8 +1005,8 @@ case class ToRadians(child: Expression) extends UnaryMathExpression(math.toRadia
   group = "math_funcs")
 // scalastyle:on line.size.limit
 case class Bin(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Serializable {
-
+  extends UnaryExpression with ImplicitCastInputTypes with Serializable {
+  override def nullIntolerant: Boolean = true
   override def inputTypes: Seq[DataType] = Seq(LongType)
   override def dataType: DataType = SQLConf.get.defaultStringType
 
@@ -1111,10 +1114,11 @@ object Hex {
   since = "1.5.0",
   group = "math_funcs")
 case class Hex(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(LongType, BinaryType, StringTypeAnyCollation))
+    Seq(TypeCollection(LongType, BinaryType, StringTypeWithCollation))
 
   override def dataType: DataType = child.dataType match {
     case st: StringType => st
@@ -1154,11 +1158,12 @@ case class Hex(child: Expression)
   since = "1.5.0",
   group = "math_funcs")
 case class Unhex(child: Expression, failOnError: Boolean = false)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(expr: Expression) = this(expr, false)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
 
   override def nullable: Boolean = true
   override def dataType: DataType = BinaryType
@@ -1251,7 +1256,8 @@ case class Pow(left: Expression, right: Expression)
 }
 
 sealed trait BitShiftOperation
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def symbol: String
   def shiftInt: (Int, Int) => Int
@@ -1293,7 +1299,7 @@ sealed trait BitShiftOperation
  * @param right number of bits to left shift.
  */
 @ExpressionDescription(
-  usage = "base << exp - Bitwise left shift.",
+  usage = "base _FUNC_ exp - Bitwise left shift.",
   examples = """
     Examples:
       > SELECT shiftleft(2, 1);
@@ -1322,7 +1328,7 @@ case class ShiftLeft(left: Expression, right: Expression) extends BitShiftOperat
  * @param right number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "base >> expr - Bitwise (signed) right shift.",
+  usage = "base _FUNC_ expr - Bitwise (signed) right shift.",
   examples = """
     Examples:
       > SELECT shiftright(4, 1);
@@ -1350,7 +1356,7 @@ case class ShiftRight(left: Expression, right: Expression) extends BitShiftOpera
  * @param right the number of bits to right shift.
  */
 @ExpressionDescription(
-  usage = "base >>> expr - Bitwise unsigned right shift.",
+  usage = "base _FUNC_ expr - Bitwise unsigned right shift.",
   examples = """
     Examples:
       > SELECT shiftrightunsigned(4, 1);
@@ -1832,7 +1838,8 @@ case class WidthBucket(
     minValue: Expression,
     maxValue: Expression,
     numBucket: Expression)
-  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends QuaternaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] = Seq(
     TypeCollection(DoubleType, YearMonthIntervalType, DayTimeIntervalType),
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
index e9fa362de14cd..622a0e0aa5bb7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/misc.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.util.{MapData, RandomUUIDGenerator}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors.raiseError
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.{AbstractMapType, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -85,7 +85,7 @@ case class RaiseError(errorClass: Expression, errorParms: Expression, dataType:
   override def foldable: Boolean = false
   override def nullable: Boolean = true
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, MapType(StringType, StringType))
+    Seq(StringTypeWithCollation, AbstractMapType(StringTypeWithCollation, StringTypeWithCollation))
 
   override def left: Expression = errorClass
   override def right: Expression = errorParms
@@ -244,6 +244,8 @@ case class Uuid(randomSeed: Option[Long] = None) extends LeafExpression with Non
 
   def this() = this(None)
 
+  def this(seed: Expression) = this(ExpressionWithRandomSeed.expressionToSeed(seed, "UUID"))
+
   override def seedExpression: Expression = randomSeed.map(Literal.apply).getOrElse(UnresolvedSeed)
 
   override def withNewSeed(seed: Long): Uuid = Uuid(Some(seed))
@@ -413,7 +415,9 @@ case class AesEncrypt(
   override def prettyName: String = "aes_encrypt"
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation,
+    Seq(BinaryType, BinaryType,
+      StringTypeWithCollation,
+      StringTypeWithCollation,
       BinaryType, BinaryType)
 
   override def children: Seq[Expression] = Seq(input, key, mode, padding, iv, aad)
@@ -487,7 +491,10 @@ case class AesDecrypt(
     this(input, key, Literal("GCM"))
 
   override def inputTypes: Seq[AbstractDataType] = {
-    Seq(BinaryType, BinaryType, StringTypeAnyCollation, StringTypeAnyCollation, BinaryType)
+    Seq(BinaryType,
+      BinaryType,
+      StringTypeWithCollation,
+      StringTypeWithCollation, BinaryType)
   }
 
   override def prettyName: String = "aes_decrypt"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
index 3258a57bb1236..f5f35050401ba 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala
@@ -104,7 +104,8 @@ trait NamedExpression extends Expression {
   def newInstance(): NamedExpression
 }
 
-abstract class Attribute extends LeafExpression with NamedExpression with NullIntolerant {
+abstract class Attribute extends LeafExpression with NamedExpression {
+  override def nullIntolerant: Boolean = true
 
   @transient
   override lazy val references: AttributeSet = AttributeSet(this)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
index 010d79f808d10..290f523cc02c1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/nullExpressions.scala
@@ -177,6 +177,47 @@ case class NullIf(left: Expression, right: Expression, replacement: Expression)
   }
 }
 
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns null if `expr` is equal to zero, or `expr` otherwise.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(0);
+       NULL
+      > SELECT _FUNC_(2);
+       2
+  """,
+  since = "4.0.0",
+  group = "conditional_funcs")
+case class NullIfZero(input: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(input: Expression) = this(input, If(EqualTo(input, Literal(0)), Literal(null), input))
+
+  override def parameters: Seq[Expression] = Seq(input)
+
+  override protected def withNewChildInternal(newInput: Expression): Expression =
+    copy(replacement = newInput)
+}
+
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Returns zero if `expr` is equal to null, or `expr` otherwise.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(NULL);
+       0
+      > SELECT _FUNC_(2);
+       2
+  """,
+  since = "4.0.0",
+  group = "conditional_funcs")
+case class ZeroIfNull(input: Expression, replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(input: Expression) = this(input, new Nvl(input, Literal(0)))
+
+  override def parameters: Seq[Expression] = Seq(input)
+
+  override protected def withNewChildInternal(newInput: Expression): Expression =
+    copy(replacement = newInput)
+}
 
 @ExpressionDescription(
   usage = "_FUNC_(expr1, expr2) - Returns `expr2` if `expr1` is null, or `expr1` otherwise.",
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
index e914190c06456..d4dcfdc5e72fb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/numberFormatExpressions.scala
@@ -27,12 +27,13 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block.BlockHelper
 import org.apache.spark.sql.catalyst.util.ToNumberParser
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types.{AbstractDataType, BinaryType, DataType, DatetimeType, Decimal, DecimalType, StringType}
 import org.apache.spark.unsafe.types.UTF8String
 
 abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Boolean)
-  extends BinaryExpression with Serializable with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with Serializable with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   private lazy val numberFormatter = {
     val value = right.eval()
@@ -50,7 +51,7 @@ abstract class ToNumberBase(left: Expression, right: Expression, errorOnFail: Bo
   }
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(StringTypeWithCollation, StringTypeWithCollation)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
@@ -273,7 +274,9 @@ object ToCharacterBuilder extends ExpressionBuilder {
 }
 
 case class ToCharacter(left: Expression, right: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
+
   private lazy val numberFormatter = {
     val value = right.eval()
     if (value != null) {
@@ -284,7 +287,8 @@ case class ToCharacter(left: Expression, right: Expression)
   }
 
   override def dataType: DataType = SQLConf.get.defaultStringType
-  override def inputTypes: Seq[AbstractDataType] = Seq(DecimalType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(DecimalType, StringTypeWithCollation)
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
     if (inputTypeCheck.isSuccess) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
index 09d024feccfa6..77c76e6f5e344 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects/objects.scala
@@ -86,11 +86,29 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput
 
   // Returns true if we can trust all values of the given DataType can be serialized.
   private def trustedSerializable(dt: DataType): Boolean = {
-    // Right now we conservatively block all ObjectType (Java objects) regardless of
-    // serializability, because the type-level info with java.io.Serializable and
-    // java.io.Externalizable marker interfaces are not strong guarantees.
+    // Right now we conservatively block all ObjectType (Java objects) except for
+    // it's `cls` equal to `Array[JavaBoxedPrimitive]` & `JavaBoxedPrimitive`
+    // regardless of serializability, because the type-level info with java.io.Serializable
+    // and java.io.Externalizable marker interfaces are not strong guarantees.
     // This restriction can be relaxed in the future to expose more optimizations.
-    !dt.existsRecursively(_.isInstanceOf[ObjectType])
+    !dt.existsRecursively {
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Boolean]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Byte]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Short]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Integer]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Long]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Float]] => false
+      case ObjectType(cls) if cls == classOf[Array[java.lang.Double]] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Boolean] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Byte] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Short] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Integer] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Long] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Float] => false
+      case ObjectType(cls) if cls == classOf[java.lang.Double] => false
+      case ObjectType(_) => true
+      case _ => false
+    }
   }
 
   /**
@@ -193,6 +211,25 @@ trait InvokeLike extends Expression with NonSQLExpression with ImplicitCastInput
       method
     }
   }
+
+  final def getFuncResult(
+      needTryCatch: Boolean,
+      resultVal: String,
+      funcCall: String,
+      returnType: Option[String] = None): String = {
+    val castFuncCall = if (returnType.isEmpty) funcCall else s"(${returnType.get}) $funcCall"
+    if (needTryCatch) {
+      s"""
+        try {
+          $resultVal = $castFuncCall;
+        } catch (Exception e) {
+          org.apache.spark.unsafe.Platform.throwException(e);
+        }
+      """
+    } else {
+      s"$resultVal = $castFuncCall;"
+    }
+  }
 }
 
 /**
@@ -295,6 +332,7 @@ case class StaticInvoke(
   }
 
   override def nullable: Boolean = needNullCheck || returnNullable
+  override def nullIntolerant: Boolean = propagateNull
   override def children: Seq[Expression] = arguments
   override lazy val deterministic: Boolean = isDeterministic && arguments.forall(_.deterministic)
 
@@ -310,6 +348,8 @@ case class StaticInvoke(
 
     val (argCode, argString, resultIsNull) = prepareArguments(ctx)
 
+    val needTryCatch = method.getExceptionTypes.nonEmpty
+
     val callFunc = s"$objectName.$functionName($argString)"
 
     val prepareIsNull = if (nullable) {
@@ -322,13 +362,15 @@ case class StaticInvoke(
     val evaluate = if (returnNullable && !method.getReturnType.isPrimitive) {
       if (CodeGenerator.defaultValue(dataType) == "null") {
         s"""
-          ${ev.value} = $callFunc;
+          ${getFuncResult(needTryCatch, ev.value, callFunc, Some(javaType))}
           ${ev.isNull} = ${ev.value} == null;
         """
       } else {
         val boxedResult = ctx.freshName("boxedResult")
+        val boxedJavaType = CodeGenerator.boxedType(dataType)
         s"""
-          ${CodeGenerator.boxedType(dataType)} $boxedResult = $callFunc;
+          $boxedJavaType $boxedResult = null;
+          ${getFuncResult(needTryCatch, boxedResult, callFunc, Some(boxedJavaType))}
           ${ev.isNull} = $boxedResult == null;
           if (!${ev.isNull}) {
             ${ev.value} = $boxedResult;
@@ -336,7 +378,7 @@ case class StaticInvoke(
         """
       }
     } else {
-      s"${ev.value} = $callFunc;"
+      getFuncResult(needTryCatch, ev.value, callFunc, Some(javaType))
     }
 
     val code = code"""
@@ -407,7 +449,7 @@ case class Invoke(
     propagateNull: Boolean = true,
     returnNullable : Boolean = true,
     isDeterministic: Boolean = true) extends InvokeLike {
-
+  override def nullIntolerant: Boolean = propagateNull
   lazy val argClasses = EncoderUtils.expressionJavaClasses(arguments)
 
   final override val nodePatterns: Seq[TreePattern] = Seq(INVOKE)
@@ -455,38 +497,27 @@ case class Invoke(
     val returnPrimitive = method.isDefined && method.get.getReturnType.isPrimitive
     val needTryCatch = method.isDefined && method.get.getExceptionTypes.nonEmpty
 
-    def getFuncResult(resultVal: String, funcCall: String): String = if (needTryCatch) {
-      s"""
-        try {
-          $resultVal = $funcCall;
-        } catch (Exception e) {
-          org.apache.spark.unsafe.Platform.throwException(e);
-        }
-      """
-    } else {
-      s"$resultVal = $funcCall;"
-    }
-
     val evaluate = if (returnPrimitive) {
-      getFuncResult(ev.value, s"${obj.value}.$encodedFunctionName($argString)")
+      getFuncResult(needTryCatch, ev.value, s"${obj.value}.$encodedFunctionName($argString)")
     } else {
       val funcResult = ctx.freshName("funcResult")
       // If the function can return null, we do an extra check to make sure our null bit is still
       // set correctly.
       val assignResult = if (!returnNullable) {
-        s"${ev.value} = (${CodeGenerator.boxedType(javaType)}) $funcResult;"
+        s"${ev.value} = $funcResult;"
       } else {
         s"""
           if ($funcResult != null) {
-            ${ev.value} = (${CodeGenerator.boxedType(javaType)}) $funcResult;
+            ${ev.value} = $funcResult;
           } else {
             ${ev.isNull} = true;
           }
         """
       }
       s"""
-        Object $funcResult = null;
-        ${getFuncResult(funcResult, s"${obj.value}.$encodedFunctionName($argString)")}
+        ${CodeGenerator.boxedType(javaType)} $funcResult = null;
+        ${getFuncResult(needTryCatch, funcResult, s"${obj.value}.$encodedFunctionName($argString)",
+          Some(CodeGenerator.boxedType(javaType)))}
         $assignResult
       """
     }
@@ -1664,7 +1695,7 @@ case class ExternalMapToCatalyst private(
           final Object[] $convertedValues = new Object[$length];
           int $index = 0;
           $defineEntries
-          while($entries.hasNext()) {
+          while ($entries.hasNext()) {
             $defineKeyValue
             $keyNullCheck
             $valueNullCheck
@@ -2023,8 +2054,6 @@ case class ValidateExternalType(child: Expression, expected: DataType, externalD
 
   override val dataType: DataType = externalDataType
 
-  private lazy val errMsg = s" is not a valid external type for schema of ${expected.simpleString}"
-
   private lazy val checkType: (Any) => Boolean = expected match {
     case _: DecimalType =>
       (value: Any) => {
@@ -2057,14 +2086,12 @@ case class ValidateExternalType(child: Expression, expected: DataType, externalD
     if (checkType(input)) {
       input
     } else {
-      throw new RuntimeException(s"${input.getClass.getName}$errMsg")
+      throw QueryExecutionErrors.invalidExternalTypeError(
+        input.getClass.getName, expected, child)
     }
   }
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
-    // Use unnamed reference that doesn't create a local field here to reduce the number of fields
-    // because errMsgField is used only when the type doesn't match.
-    val errMsgField = ctx.addReferenceObj("errMsg", errMsg)
     val input = child.genCode(ctx)
     val obj = input.value
     def genCheckTypes(classes: Seq[Class[_]]): String = {
@@ -2090,6 +2117,13 @@ case class ValidateExternalType(child: Expression, expected: DataType, externalD
         s"$obj instanceof ${CodeGenerator.boxedType(dataType)}"
     }
 
+    // Use unnamed reference that doesn't create a local field here to reduce the number of fields
+    // because errMsgField is used only when the type doesn't match.
+    val expectedTypeField = ctx.addReferenceObj(
+      "expectedTypeField", expected)
+    val childExpressionMsgField = ctx.addReferenceObj(
+      "childExpressionMsgField", child)
+
     val code = code"""
       ${input.code}
       ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
@@ -2097,7 +2131,8 @@ case class ValidateExternalType(child: Expression, expected: DataType, externalD
         if ($typeCheck) {
           ${ev.value} = (${CodeGenerator.boxedType(dataType)}) $obj;
         } else {
-          throw new RuntimeException($obj.getClass().getName() + $errMsgField);
+          throw QueryExecutionErrors.invalidExternalTypeError(
+            $obj.getClass().getName(), $expectedTypeField, $childExpressionMsgField);
         }
       }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
index 36fde4da2628b..114a43c34c040 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/package.scala
@@ -331,8 +331,36 @@ package object expressions  {
       (candidates, nestedFields)
     }
 
-    /** Perform attribute resolution given a name and a resolver. */
+    /**
+     * Resolve `nameParts` into a specific [[NamedExpression]] using the provided `resolver`.
+     *
+     * This method finds all suitable candidates for the resolution based on the name matches and
+     * checks if the nested fields are requested.
+     * - If there's only one match and nested fields are requested, wrap the matched attribute with
+     *   [[ExtractValue]], and recursively wrap that with additional [[ExtractValue]]s
+     *   for each nested field. In the end, alias the final expression with the last nested field
+     *   name.
+     * - If there's only one match and no nested fields are requested, return the matched attribute.
+     * - If there are no matches, return None.
+     * - If there is more than one match, throw [[QueryCompilationErrors.ambiguousReferenceError]].
+     */
     def resolve(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression] = {
+      val (candidates, nestedFields) = getCandidatesForResolution(nameParts, resolver)
+      val resolvedCandidates = resolveCandidates(nameParts, resolver, candidates, nestedFields)
+      resolvedCandidates match {
+        case Seq() => None
+        case Seq(a) => Some(a)
+        case _ =>
+          throw QueryCompilationErrors.ambiguousReferenceError(
+            UnresolvedAttribute(nameParts).name,
+            resolvedCandidates.map(_.toAttribute)
+          )
+      }
+    }
+
+    def getCandidatesForResolution(
+        nameParts: Seq[String],
+        resolver: Resolver): (Seq[Attribute], Seq[String]) = {
       val (candidates, nestedFields) = if (hasThreeOrLessQualifierParts) {
         matchWithThreeOrLessQualifierParts(nameParts, resolver)
       } else {
@@ -345,13 +373,21 @@ package object expressions  {
         candidates
       }
 
+      (prunedCandidates, nestedFields)
+    }
+
+    def resolveCandidates(
+        nameParts: Seq[String],
+        resolver: Resolver,
+        candidates: Seq[Attribute],
+        nestedFields: Seq[String]): Seq[NamedExpression] = {
       def name = UnresolvedAttribute(nameParts).name
       // We may have resolved the attributes from metadata columns. The resolved attributes will be
       // put in a logical plan node and becomes normal attributes. They can still keep the special
       // attribute metadata to indicate that they are from metadata columns, but they should not
       // keep any restrictions that may break column resolution for normal attributes.
       // See SPARK-42084 for more details.
-      prunedCandidates.distinct.map(_.markAsAllowAnyAccess()) match {
+      candidates.distinct.map(_.markAsAllowAnyAccess()) match {
         case Seq(a) if nestedFields.nonEmpty =>
           // One match, but we also need to extract the requested nested field.
           // The foldLeft adds ExtractValues for every remaining parts of the identifier,
@@ -362,27 +398,20 @@ package object expressions  {
           val fieldExprs = nestedFields.foldLeft(a: Expression) { (e, name) =>
             ExtractValue(e, Literal(name), resolver)
           }
-          Some(Alias(fieldExprs, nestedFields.last)())
+          Seq(Alias(fieldExprs, nestedFields.last)())
 
         case Seq(a) =>
           // One match, no nested fields, use it.
-          Some(a)
+          Seq(a)
 
         case Seq() =>
           // No matches.
-          None
+          Seq()
 
         case ambiguousReferences =>
           // More than one match.
-          throw QueryCompilationErrors.ambiguousReferenceError(name, ambiguousReferences)
+          ambiguousReferences
       }
     }
   }
-
-  /**
-   * When an expression inherits this, meaning the expression is null intolerant (i.e. any null
-   * input will result in null output). We will use this information during constructing IsNotNull
-   * constraints.
-   */
-  trait NullIntolerant extends Expression
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala
new file mode 100644
index 0000000000000..fe8f0f264e85f
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/pipeOperators.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateFunction
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * Represents an expression when used with a SQL pipe operator.
+ * We use this to check invariants about whether aggregate functions may exist in these expressions.
+ * @param child The child expression.
+ * @param isAggregate Whether the pipe operator is |> AGGREGATE.
+ *                    If true, the child expression must contain at least one aggregate function.
+ *                    If false, the child expression must not contain any aggregate functions.
+ * @param clause The clause of the pipe operator. This is used to generate error messages.
+ */
+case class PipeExpression(child: Expression, isAggregate: Boolean, clause: String)
+  extends UnaryExpression with RuntimeReplaceable {
+  override def withNewChildInternal(newChild: Expression): Expression =
+    PipeExpression(newChild, isAggregate, clause)
+  override lazy val replacement: Expression = {
+    val firstAggregateFunction: Option[AggregateFunction] = findFirstAggregate(child)
+    if (isAggregate && firstAggregateFunction.isEmpty) {
+      throw QueryCompilationErrors.pipeOperatorAggregateExpressionContainsNoAggregateFunction(child)
+    } else if (!isAggregate) {
+      firstAggregateFunction.foreach { a =>
+        throw QueryCompilationErrors.pipeOperatorContainsAggregateFunction(a, clause)
+      }
+    }
+    child
+  }
+
+  /** Returns the first aggregate function in the given expression, or None if not found. */
+  private def findFirstAggregate(e: Expression): Option[AggregateFunction] = e match {
+    case a: AggregateFunction =>
+      Some(a)
+    case _: WindowExpression =>
+      // Window functions are allowed in these pipe operators, so do not traverse into children.
+      None
+    case _ =>
+      e.children.flatMap(findFirstAggregate).headOption
+  }
+}
+
+object PipeOperators {
+  // These are definitions of query result clauses that can be used with the pipe operator.
+  val aggregateClause = "AGGREGATE"
+  val clusterByClause = "CLUSTER BY"
+  val distributeByClause = "DISTRIBUTE BY"
+  val extendClause = "EXTEND"
+  val limitClause = "LIMIT"
+  val offsetClause = "OFFSET"
+  val orderByClause = "ORDER BY"
+  val selectClause = "SELECT"
+  val setClause = "SET"
+  val sortByClause = "SORT BY"
+  val sortByDistributeByClause = "SORT BY ... DISTRIBUTE BY ..."
+  val windowClause = "WINDOW"
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 312493c949911..986bc63363d5d 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -17,8 +17,10 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import scala.collection.immutable
 import scala.collection.immutable.TreeSet
 
+import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
@@ -29,9 +31,10 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LeafNode, LogicalPlan, Project, Union}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.catalyst.util.TypeUtils
+import org.apache.spark.sql.catalyst.util.{CollationFactory, TypeUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -266,10 +269,8 @@ trait PredicateHelper extends AliasHelper with Logging {
   }
 
   // If one expression and its children are null intolerant, it is null intolerant.
-  protected def isNullIntolerant(expr: Expression): Boolean = expr match {
-    case e: NullIntolerant => e.children.forall(isNullIntolerant)
-    case _ => false
-  }
+  protected def isNullIntolerant(expr: Expression): Boolean =
+    expr.nullIntolerant && expr.children.forall(isNullIntolerant)
 
   protected def outputWithNullability(
       output: Seq[Attribute],
@@ -314,7 +315,8 @@ trait PredicateHelper extends AliasHelper with Logging {
   since = "1.0.0",
   group = "predicate_funcs")
 case class Not(child: Expression)
-  extends UnaryExpression with Predicate with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with Predicate with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def toString: String = s"NOT $child"
 
@@ -445,6 +447,10 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate {
 
   require(list != null, "list should not be null")
 
+  def this(expressions: Seq[Expression]) = {
+    this(expressions.head, expressions.tail)
+  }
+
   override def checkInputDataTypes(): TypeCheckResult = {
     val mismatchOpt = list.find(l => !DataType.equalsStructurally(l.dataType, value.dataType,
       ignoreNullability = true))
@@ -651,6 +657,9 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   }
 
   @transient lazy val set: Set[Any] = child.dataType match {
+    case st: StringType if !st.isUTF8BinaryCollation =>
+      new InSet.CollationAwareSet(
+        hset.asInstanceOf[Set[UTF8String]], st.collationId).asInstanceOf[Set[Any]]
     case t: AtomicType if !t.isInstanceOf[BinaryType] => hset
     case _: NullType => hset
     case _ =>
@@ -763,6 +772,26 @@ case class InSet(child: Expression, hset: Set[Any]) extends UnaryExpression with
   override protected def withNewChildInternal(newChild: Expression): InSet = copy(child = newChild)
 }
 
+object InSet {
+  class CollationAwareSet(inputSet: Set[UTF8String], collationId: Int)
+    extends immutable.Set[UTF8String] with Serializable {
+    private val keySet: Set[UTF8String] = inputSet.map { s =>
+      if (s == null) null
+      else CollationFactory.getCollationKey(s, collationId)
+    }
+    override def incl(elem: UTF8String): CollationAwareSet =
+      throw SparkUnsupportedOperationException()
+    override def excl(elem: UTF8String): CollationAwareSet =
+      throw SparkUnsupportedOperationException()
+    override def iterator: Iterator[UTF8String] = throw SparkUnsupportedOperationException()
+    override def contains(elem: UTF8String): Boolean = {
+      assert(elem != null, "InSet guarantees non-null input")
+      val elemKey = CollationFactory.getCollationKey(elem, collationId)
+      keySet.contains(elemKey)
+    }
+  }
+}
+
 @ExpressionDescription(
   usage = "expr1 _FUNC_ expr2 - Logical AND.",
   examples = """
@@ -1040,8 +1069,8 @@ object Equality {
   since = "1.0.0",
   group = "predicate_funcs")
 case class EqualTo(left: Expression, right: Expression)
-    extends BinaryComparison with NullIntolerant {
-
+    extends BinaryComparison {
+  override def nullIntolerant: Boolean = true
   override def symbol: String = "="
 
   // +---------+---------+---------+---------+
@@ -1189,8 +1218,8 @@ case class EqualNull(left: Expression, right: Expression, replacement: Expressio
   since = "1.0.0",
   group = "predicate_funcs")
 case class LessThan(left: Expression, right: Expression)
-    extends BinaryComparison with NullIntolerant {
-
+    extends BinaryComparison {
+  override def nullIntolerant: Boolean = true
   override def symbol: String = "<"
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2)
@@ -1224,8 +1253,8 @@ case class LessThan(left: Expression, right: Expression)
   since = "1.0.0",
   group = "predicate_funcs")
 case class LessThanOrEqual(left: Expression, right: Expression)
-    extends BinaryComparison with NullIntolerant {
-
+    extends BinaryComparison {
+  override def nullIntolerant: Boolean = true
   override def symbol: String = "<="
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2)
@@ -1259,7 +1288,8 @@ case class LessThanOrEqual(left: Expression, right: Expression)
   since = "1.0.0",
   group = "predicate_funcs")
 case class GreaterThan(left: Expression, right: Expression)
-    extends BinaryComparison with NullIntolerant {
+    extends BinaryComparison {
+  override def nullIntolerant: Boolean = true
 
   override def symbol: String = ">"
 
@@ -1294,7 +1324,8 @@ case class GreaterThan(left: Expression, right: Expression)
   since = "1.0.0",
   group = "predicate_funcs")
 case class GreaterThanOrEqual(left: Expression, right: Expression)
-    extends BinaryComparison with NullIntolerant {
+    extends BinaryComparison {
+  override def nullIntolerant: Boolean = true
 
   override def symbol: String = ">="
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
index db78415a0cc54..7148d3738f7fa 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/randomExpressions.scala
@@ -17,11 +17,16 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.UnresolvedSeed
+import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedSeed}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
+import org.apache.spark.sql.catalyst.expressions.ExpectsInputTypes.{ordinalNumber, toSQLExpr, toSQLId, toSQLType}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodeGenerator, ExprCode, FalseLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
-import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, TreePattern}
+import org.apache.spark.sql.catalyst.trees.{BinaryLike, TernaryLike, UnaryLike}
+import org.apache.spark.sql.catalyst.trees.TreePattern.{EXPRESSION_WITH_RANDOM_SEED, RUNTIME_REPLACEABLE, TreePattern}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -32,8 +37,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * Since this expression is stateful, it cannot be a case object.
  */
-abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterministic
-  with ExpressionWithRandomSeed {
+trait RDG extends Expression with ExpressionWithRandomSeed {
   /**
    * Record ID within each partition. By being transient, the Random Number Generator is
    * reset every time we serialize and deserialize and initialize it.
@@ -42,12 +46,6 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterm
 
   override def stateful: Boolean = true
 
-  override protected def initializeInternal(partitionIndex: Int): Unit = {
-    rng = new XORShiftRandom(seed + partitionIndex)
-  }
-
-  override def seedExpression: Expression = child
-
   @transient protected lazy val seed: Long = seedExpression match {
     case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int]
     case e if e.dataType == LongType => e.eval().asInstanceOf[Long]
@@ -56,6 +54,15 @@ abstract class RDG extends UnaryExpression with ExpectsInputTypes with Nondeterm
   override def nullable: Boolean = false
 
   override def dataType: DataType = DoubleType
+}
+
+abstract class NondeterministicUnaryRDG
+  extends RDG with UnaryLike[Expression] with Nondeterministic with ExpectsInputTypes {
+  override def seedExpression: Expression = child
+
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    rng = new XORShiftRandom(seed + partitionIndex)
+  }
 
   override def inputTypes: Seq[AbstractDataType] = Seq(TypeCollection(IntegerType, LongType))
 }
@@ -71,6 +78,15 @@ trait ExpressionWithRandomSeed extends Expression {
   def withNewSeed(seed: Long): Expression
 }
 
+private[catalyst] object ExpressionWithRandomSeed {
+  def expressionToSeed(e: Expression, source: String): Option[Long] = e match {
+    case IntegerLiteral(seed) => Some(seed)
+    case LongLiteral(seed) => Some(seed)
+    case Literal(null, _) => None
+    case _ => throw QueryCompilationErrors.invalidRandomSeedParameter(source, e)
+  }
+}
+
 /** Generate a random column with i.i.d. uniformly distributed values in [0, 1). */
 // scalastyle:off line.size.limit
 @ExpressionDescription(
@@ -90,7 +106,7 @@ trait ExpressionWithRandomSeed extends Expression {
   since = "1.5.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class Rand(child: Expression, hideSeed: Boolean = false) extends RDG {
+case class Rand(child: Expression, hideSeed: Boolean = false) extends NondeterministicUnaryRDG {
 
   def this() = this(UnresolvedSeed, true)
 
@@ -141,7 +157,7 @@ object Rand {
   since = "1.5.0",
   group = "math_funcs")
 // scalastyle:on line.size.limit
-case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG {
+case class Randn(child: Expression, hideSeed: Boolean = false) extends NondeterministicUnaryRDG {
 
   def this() = this(UnresolvedSeed, true)
 
@@ -172,3 +188,239 @@ case class Randn(child: Expression, hideSeed: Boolean = false) extends RDG {
 object Randn {
   def apply(seed: Long): Randn = Randn(Literal(seed, LongType))
 }
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(min, max[, seed]) - Returns a random value with independent and identically
+      distributed (i.i.d.) values with the specified range of numbers. The random seed is optional.
+      The provided numbers specifying the minimum and maximum values of the range must be constant.
+      If both of these numbers are integers, then the result will also be an integer. Otherwise if
+      one or both of these are floating-point numbers, then the result will also be a floating-point
+      number.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(10, 20, 0) > 0 AS result;
+      true
+  """,
+  since = "4.0.0",
+  group = "math_funcs")
+case class Uniform(min: Expression, max: Expression, seedExpression: Expression, hideSeed: Boolean)
+  extends RuntimeReplaceable with TernaryLike[Expression] with RDG {
+  def this(min: Expression, max: Expression) =
+    this(min, max, UnresolvedSeed, hideSeed = true)
+  def this(min: Expression, max: Expression, seedExpression: Expression) =
+    this(min, max, seedExpression, hideSeed = false)
+
+  final override lazy val deterministic: Boolean = false
+  override val nodePatterns: Seq[TreePattern] =
+    Seq(RUNTIME_REPLACEABLE, EXPRESSION_WITH_RANDOM_SEED)
+
+  override def dataType: DataType = {
+    val first = min.dataType
+    val second = max.dataType
+    (min.dataType, max.dataType) match {
+      case _ if !seedExpression.resolved || seedExpression.dataType == NullType =>
+        NullType
+      case (_, NullType) | (NullType, _) => NullType
+      case (_, LongType) | (LongType, _)
+        if Seq(first, second).forall(integer) => LongType
+      case (_, IntegerType) | (IntegerType, _)
+        if Seq(first, second).forall(integer) => IntegerType
+      case (_, ShortType) | (ShortType, _)
+        if Seq(first, second).forall(integer) => ShortType
+      case (_, DoubleType) | (DoubleType, _) => DoubleType
+      case (_, FloatType) | (FloatType, _) => FloatType
+      case _ =>
+        throw SparkException.internalError(
+          s"Unexpected argument data types: ${min.dataType}, ${max.dataType}")
+    }
+  }
+
+  private def integer(t: DataType): Boolean = t match {
+    case _: ShortType | _: IntegerType | _: LongType => true
+    case _ => false
+  }
+
+  override def sql: String = {
+    s"uniform(${min.sql}, ${max.sql}${if (hideSeed) "" else s", ${seedExpression.sql}"})"
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    def requiredType = "integer or floating-point"
+    Seq((min, "min", 0),
+      (max, "max", 1),
+      (seedExpression, "seed", 2)).foreach {
+      case (expr: Expression, name: String, index: Int) =>
+        if (result == TypeCheckResult.TypeCheckSuccess) {
+          if (!expr.foldable) {
+            result = DataTypeMismatch(
+              errorSubClass = "NON_FOLDABLE_INPUT",
+              messageParameters = Map(
+                "inputName" -> toSQLId(name),
+                "inputType" -> requiredType,
+                "inputExpr" -> toSQLExpr(expr)))
+          } else expr.dataType match {
+            case _: ShortType | _: IntegerType | _: LongType | _: FloatType | _: DoubleType |
+                 _: NullType =>
+            case _ =>
+              result = DataTypeMismatch(
+                errorSubClass = "UNEXPECTED_INPUT_TYPE",
+                messageParameters = Map(
+                  "paramIndex" -> ordinalNumber(index),
+                  "requiredType" -> requiredType,
+                  "inputSql" -> toSQLExpr(expr),
+                  "inputType" -> toSQLType(expr.dataType)))
+          }
+        }
+    }
+    result
+  }
+
+  override def first: Expression = min
+  override def second: Expression = max
+  override def third: Expression = seedExpression
+
+  override def withNewSeed(newSeed: Long): Expression =
+    Uniform(min, max, Literal(newSeed, LongType), hideSeed)
+
+  override def withNewChildrenInternal(
+      newFirst: Expression, newSecond: Expression, newThird: Expression): Expression =
+    Uniform(newFirst, newSecond, newThird, hideSeed)
+
+  override def replacement: Expression = {
+    if (Seq(min, max, seedExpression).exists(_.dataType == NullType)) {
+      Literal(null)
+    } else {
+      def cast(e: Expression, to: DataType): Expression = if (e.dataType == to) e else Cast(e, to)
+      cast(Add(
+        cast(min, DoubleType),
+        Multiply(
+          Subtract(
+            cast(max, DoubleType),
+            cast(min, DoubleType)),
+          Rand(seed))),
+        dataType)
+    }
+  }
+}
+
+object Uniform {
+  def apply(min: Expression, max: Expression): Uniform =
+    Uniform(min, max, UnresolvedSeed, hideSeed = true)
+  def apply(min: Expression, max: Expression, seedExpression: Expression): Uniform =
+    Uniform(min, max, seedExpression, hideSeed = false)
+}
+
+@ExpressionDescription(
+  usage = """
+    _FUNC_(length[, seed]) - Returns a string of the specified length whose characters are chosen
+      uniformly at random from the following pool of characters: 0-9, a-z, A-Z. The random seed is
+      optional. The string length must be a constant two-byte or four-byte integer (SMALLINT or INT,
+      respectively).
+  """,
+  examples =
+    """
+    Examples:
+      > SELECT _FUNC_(3, 0) AS result;
+       ceV
+  """,
+  since = "4.0.0",
+  group = "string_funcs")
+case class RandStr(
+    length: Expression, override val seedExpression: Expression, hideSeed: Boolean)
+  extends ExpressionWithRandomSeed with BinaryLike[Expression] with Nondeterministic {
+  def this(length: Expression) =
+    this(length, UnresolvedSeed, hideSeed = true)
+  def this(length: Expression, seedExpression: Expression) =
+    this(length, seedExpression, hideSeed = false)
+
+  override def nullable: Boolean = false
+  override def dataType: DataType = StringType
+  override def stateful: Boolean = true
+  override def left: Expression = length
+  override def right: Expression = seedExpression
+
+  /**
+   * Record ID within each partition. By being transient, the Random Number Generator is
+   * reset every time we serialize and deserialize and initialize it.
+   */
+  @transient protected var rng: XORShiftRandom = _
+
+  @transient protected lazy val seed: Long = seedExpression match {
+    case e if e.dataType == IntegerType => e.eval().asInstanceOf[Int]
+    case e if e.dataType == LongType => e.eval().asInstanceOf[Long]
+  }
+  override protected def initializeInternal(partitionIndex: Int): Unit = {
+    rng = new XORShiftRandom(seed + partitionIndex)
+  }
+
+  override def withNewSeed(newSeed: Long): Expression =
+    RandStr(length, Literal(newSeed, LongType), hideSeed)
+  override def withNewChildrenInternal(newFirst: Expression, newSecond: Expression): Expression =
+    RandStr(newFirst, newSecond, hideSeed)
+
+  override def sql: String = {
+    s"randstr(${length.sql}${if (hideSeed) "" else s", ${seedExpression.sql}"})"
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    var result: TypeCheckResult = TypeCheckResult.TypeCheckSuccess
+    def requiredType = "INT or SMALLINT"
+    Seq((length, "length", 0),
+      (seedExpression, "seed", 1)).foreach {
+      case (expr: Expression, name: String, index: Int) =>
+        if (result == TypeCheckResult.TypeCheckSuccess) {
+          if (!expr.foldable) {
+            result = DataTypeMismatch(
+              errorSubClass = "NON_FOLDABLE_INPUT",
+              messageParameters = Map(
+                "inputName" -> toSQLId(name),
+                "inputType" -> requiredType,
+                "inputExpr" -> toSQLExpr(expr)))
+          } else expr.dataType match {
+            case _: ShortType | _: IntegerType =>
+            case _: LongType if index == 1 =>
+            case _ =>
+              result = DataTypeMismatch(
+                errorSubClass = "UNEXPECTED_INPUT_TYPE",
+                messageParameters = Map(
+                  "paramIndex" -> ordinalNumber(index),
+                  "requiredType" -> requiredType,
+                  "inputSql" -> toSQLExpr(expr),
+                  "inputType" -> toSQLType(expr.dataType)))
+          }
+        }
+    }
+    result
+  }
+
+  override def evalInternal(input: InternalRow): Any = {
+    val numChars = length.eval(input).asInstanceOf[Number].intValue()
+    ExpressionImplUtils.randStr(rng, numChars)
+  }
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val className = classOf[XORShiftRandom].getName
+    val rngTerm = ctx.addMutableState(className, "rng")
+    ctx.addPartitionInitializationStatement(
+      s"$rngTerm = new $className(${seed}L + partitionIndex);")
+    val eval = length.genCode(ctx)
+    ev.copy(code =
+      code"""
+        |${eval.code}
+        |UTF8String ${ev.value} =
+        |  ${classOf[ExpressionImplUtils].getName}.randStr($rngTerm, (int)(${eval.value}));
+        |boolean ${ev.isNull} = false;
+        |""".stripMargin,
+      isNull = FalseLiteral)
+  }
+}
+
+object RandStr {
+  def apply(length: Expression): RandStr =
+    RandStr(length, UnresolvedSeed, hideSeed = true)
+  def apply(length: Expression, seedExpression: Expression): RandStr =
+    RandStr(length, seedExpression, hideSeed = false)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
index 297c709c6d7d9..be7d227d80026 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/regexpExpressions.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import java.lang.{StringBuilder => JStringBuilder}
 import java.util.Locale
 import java.util.regex.{Matcher, MatchResult, Pattern, PatternSyntaxException}
 
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
+import scala.util.control.NonFatal
 
 import org.apache.commons.text.StringEscapeUtils
 
@@ -34,19 +36,20 @@ import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.trees.BinaryLike
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LIKE_FAMLIY, REGEXP_EXTRACT_FAMILY, REGEXP_REPLACE, TreePattern}
 import org.apache.spark.sql.catalyst.util.{CollationSupport, GenericArrayData, StringUtils}
-import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.internal.types.{StringTypeAnyCollation, StringTypeBinaryLcase}
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
+import org.apache.spark.sql.internal.types.{
+  StringTypeBinaryLcase, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
 abstract class StringRegexExpression extends BinaryExpression
-  with ImplicitCastInputTypes with NullIntolerant with Predicate {
-
+  with ImplicitCastInputTypes with Predicate {
+  override def nullIntolerant: Boolean = true
   def escape(v: String): String
   def matches(regex: Pattern, str: String): Boolean
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation)
 
   final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
   final lazy val collationRegexFlags: Int = CollationSupport.collationAwareRegexFlags(collationId)
@@ -74,7 +77,7 @@ abstract class StringRegexExpression extends BinaryExpression
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any = {
     val regex = pattern(input2.asInstanceOf[UTF8String].toString)
-    if(regex == null) {
+    if (regex == null) {
       null
     } else {
       matches(regex, input1.asInstanceOf[UTF8String].toString)
@@ -82,6 +85,13 @@ abstract class StringRegexExpression extends BinaryExpression
   }
 }
 
+private[catalyst] object StringRegexExpression {
+  def expressionToEscapeChar(e: Expression): Char = e match {
+    case StringLiteral(v) if v.length == 1 => v.charAt(0)
+    case _ => throw QueryCompilationErrors.invalidEscapeChar(e)
+  }
+}
+
 // scalastyle:off line.contains.tab line.size.limit
 /**
  * Simple RegEx pattern matching function
@@ -137,6 +147,9 @@ case class Like(left: Expression, right: Expression, escapeChar: Char)
 
   def this(left: Expression, right: Expression) = this(left, right, '\\')
 
+  def this(left: Expression, right: Expression, escapeChar: Expression) =
+    this(left, right, StringRegexExpression.expressionToEscapeChar(escapeChar))
+
   override def escape(v: String): String = StringUtils.escapeLikeRegex(v, escapeChar)
 
   override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
@@ -259,13 +272,16 @@ case class ILike(
     escapeChar: Char) extends RuntimeReplaceable
   with ImplicitCastInputTypes with BinaryLike[Expression] {
 
+  def this(left: Expression, right: Expression, escapeChar: Expression) =
+    this(left, right, StringRegexExpression.expressionToEscapeChar(escapeChar))
+
   override lazy val replacement: Expression = Like(Lower(left), Lower(right), escapeChar)
 
   def this(left: Expression, right: Expression) =
     this(left, right, '\\')
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation)
 
   override protected def withNewChildrenInternal(
       newLeft: Expression, newRight: Expression): Expression = {
@@ -274,7 +290,8 @@ case class ILike(
 }
 
 sealed abstract class MultiLikeBase
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant with Predicate {
+  extends UnaryExpression with ImplicitCastInputTypes with Predicate {
+  override def nullIntolerant: Boolean = true
 
   protected def patterns: Seq[UTF8String]
 
@@ -550,11 +567,11 @@ case class RLike(left: Expression, right: Expression) extends StringRegexExpress
   since = "1.5.0",
   group = "string_funcs")
 case class StringSplit(str: Expression, regex: Expression, limit: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = ArrayType(str.dataType, containsNull = false)
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, IntegerType)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation, IntegerType)
   override def first: Expression = str
   override def second: Expression = regex
   override def third: Expression = limit
@@ -622,7 +639,8 @@ case class StringSplit(str: Expression, regex: Expression, limit: Expression)
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expression, pos: Expression)
-  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends QuaternaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(subject: Expression, regexp: Expression, rep: Expression) =
     this(subject, regexp, rep, Literal(1))
@@ -666,7 +684,7 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
   @transient private var lastReplacement: String = _
   @transient private var lastReplacementInUTF8: UTF8String = _
   // result buffer write by Matcher
-  @transient private lazy val result: StringBuffer = new StringBuffer
+  @transient private lazy val result: JStringBuilder = new JStringBuilder
   final override val nodePatterns: Seq[TreePattern] = Seq(REGEXP_REPLACE)
 
   override def nullSafeEval(s: Any, p: Any, r: Any, i: Any): Any = {
@@ -687,7 +705,13 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       m.region(position, source.length)
       result.delete(0, result.length())
       while (m.find) {
-        m.appendReplacement(result, lastReplacement)
+        try {
+          m.appendReplacement(result, lastReplacement)
+        } catch {
+          case NonFatal(e) =>
+            throw QueryExecutionErrors.invalidRegexpReplaceError(s.toString,
+              p.toString, r.toString, i.asInstanceOf[Int], e)
+        }
       }
       m.appendTail(result)
       UTF8String.fromString(result.toString)
@@ -698,14 +722,15 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
 
   override def dataType: DataType = subject.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, StringTypeBinaryLcase, IntegerType)
+    Seq(StringTypeBinaryLcase,
+      StringTypeWithCollation, StringTypeBinaryLcase, IntegerType)
   final lazy val collationId: Int = subject.dataType.asInstanceOf[StringType].collationId
   override def prettyName: String = "regexp_replace"
 
   override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val termResult = ctx.freshName("termResult")
 
-    val classNameStringBuffer = classOf[java.lang.StringBuffer].getCanonicalName
+    val classNameStringBuilder = classOf[JStringBuilder].getCanonicalName
 
     val matcher = ctx.freshName("matcher")
     val source = ctx.freshName("source")
@@ -731,11 +756,20 @@ case class RegExpReplace(subject: Expression, regexp: Expression, rep: Expressio
       String $source = $subject.toString();
       int $position = $pos - 1;
       if ($position == 0 || $position < $source.length()) {
-        $classNameStringBuffer $termResult = new $classNameStringBuffer();
+        $classNameStringBuilder $termResult = new $classNameStringBuilder();
         $matcher.region($position, $source.length());
 
         while ($matcher.find()) {
-          $matcher.appendReplacement($termResult, $termLastReplacement);
+          try {
+            $matcher.appendReplacement($termResult, $termLastReplacement);
+          } catch (Throwable e) {
+            if (scala.util.control.NonFatal.apply(e)) {
+              throw QueryExecutionErrors.invalidRegexpReplaceError($source, $regexp.toString(),
+                $rep.toString(), $pos, e);
+            } else {
+              throw e;
+            }
+          }
         }
         $matcher.appendTail($termResult);
         ${ev.value} = UTF8String.fromString($termResult.toString());
@@ -773,7 +807,8 @@ object RegExpExtractBase {
 }
 
 abstract class RegExpExtractBase
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   def subject: Expression
   def regexp: Expression
   def idx: Expression
@@ -786,7 +821,7 @@ abstract class RegExpExtractBase
   final override val nodePatterns: Seq[TreePattern] = Seq(REGEXP_EXTRACT_FAMILY)
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation, IntegerType)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation, IntegerType)
   override def first: Expression = subject
   override def second: Expression = regexp
   override def third: Expression = idx
@@ -949,7 +984,7 @@ case class RegExpExtractAll(subject: Expression, regexp: Expression, idx: Expres
   override def nullSafeEval(s: Any, p: Any, r: Any): Any = {
     val m = getLastMatcher(s, p)
     val matchResults = new ArrayBuffer[UTF8String]()
-    while(m.find) {
+    while (m.find) {
       val mr: MatchResult = m.toMatchResult
       val index = r.asInstanceOf[Int]
       RegExpExtractBase.checkGroupIndex(prettyName, mr.groupCount, index)
@@ -1039,7 +1074,7 @@ case class RegExpCount(left: Expression, right: Expression)
   override def children: Seq[Expression] = Seq(left, right)
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): RegExpCount =
@@ -1079,7 +1114,7 @@ case class RegExpSubStr(left: Expression, right: Expression)
   override def children: Seq[Expression] = Seq(left, right)
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeBinaryLcase, StringTypeAnyCollation)
+    Seq(StringTypeBinaryLcase, StringTypeWithCollation)
 
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): RegExpSubStr =
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index 08f373a86ae3e..2ea53350fea36 100755
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.expressions
 
 import java.nio.{ByteBuffer, CharBuffer}
 import java.nio.charset.CharacterCodingException
-import java.text.{BreakIterator, DecimalFormat, DecimalFormatSymbols}
+import java.text.{DecimalFormat, DecimalFormatSymbols}
 import java.util.{Base64 => JBase64, HashMap, Locale, Map => JMap}
 
 import scala.collection.mutable.ArrayBuffer
@@ -38,7 +38,8 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UPPER_OR_LO
 import org.apache.spark.sql.catalyst.util.{ArrayData, CharsetProvider, CollationFactory, CollationSupport, GenericArrayData, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeAnyCollation}
+import org.apache.spark.sql.internal.types.{AbstractArrayType,
+  StringTypeNonCSAICollation, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.UTF8StringBuilder
 import org.apache.spark.unsafe.array.ByteArrayMethods
@@ -81,8 +82,12 @@ case class ConcatWs(children: Seq[Expression])
   /** The 1st child (separator) is str, and rest are either str or array of str. */
   override def inputTypes: Seq[AbstractDataType] = {
     val arrayOrStr =
-      TypeCollection(AbstractArrayType(StringTypeAnyCollation), StringTypeAnyCollation)
-    StringTypeAnyCollation +: Seq.fill(children.size - 1)(arrayOrStr)
+      TypeCollection(AbstractArrayType(
+        StringTypeWithCollation(supportsTrimCollation = true)),
+        StringTypeWithCollation(supportsTrimCollation = true)
+      )
+    StringTypeWithCollation(supportsTrimCollation = true) +:
+      Seq.fill(children.size - 1)(arrayOrStr)
   }
 
   override def dataType: DataType = children.head.dataType
@@ -433,7 +438,8 @@ trait String2StringExpression extends ImplicitCastInputTypes {
   def convert(v: UTF8String): UTF8String
 
   override def dataType: DataType = child.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   protected override def nullSafeEval(input: Any): Any =
     convert(input.asInstanceOf[UTF8String])
@@ -452,7 +458,8 @@ trait String2StringExpression extends ImplicitCastInputTypes {
   since = "1.0.1",
   group = "string_funcs")
 case class Upper(child: Expression)
-  extends UnaryExpression with String2StringExpression with NullIntolerant {
+  extends UnaryExpression with String2StringExpression {
+  override def nullIntolerant: Boolean = true
 
   final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
 
@@ -484,7 +491,8 @@ case class Upper(child: Expression)
   since = "1.0.1",
   group = "string_funcs")
 case class Lower(child: Expression)
-  extends UnaryExpression with String2StringExpression with NullIntolerant {
+  extends UnaryExpression with String2StringExpression {
+  override def nullIntolerant: Boolean = true
 
   final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
 
@@ -508,14 +516,16 @@ case class Lower(child: Expression)
 
 /** A base trait for functions that compare two strings, returning a boolean. */
 abstract class StringPredicate extends BinaryExpression
-  with Predicate with ImplicitCastInputTypes with NullIntolerant {
+  with Predicate with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
 
   def compare(l: UTF8String, r: UTF8String): Boolean
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true))
 
   protected override def nullSafeEval(input1: Any, input2: Any): Any =
     compare(input1.asInstanceOf[UTF8String], input2.asInstanceOf[UTF8String])
@@ -609,6 +619,10 @@ case class Contains(left: Expression, right: Expression) extends StringPredicate
     defineCodeGen(ctx, ev, (c1, c2) =>
       CollationSupport.Contains.genCode(c1, c2, collationId))
   }
+  override def inputTypes : Seq[AbstractDataType] =
+    Seq(StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): Contains = copy(left = newLeft, right = newRight)
 }
@@ -650,6 +664,14 @@ case class StartsWith(left: Expression, right: Expression) extends StringPredica
     defineCodeGen(ctx, ev, (c1, c2) =>
       CollationSupport.StartsWith.genCode(c1, c2, collationId))
   }
+
+  override def inputTypes : Seq[AbstractDataType] =
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
+
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): StartsWith = copy(left = newLeft, right = newRight)
 }
@@ -691,6 +713,14 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
     defineCodeGen(ctx, ev, (c1, c2) =>
       CollationSupport.EndsWith.genCode(c1, c2, collationId))
   }
+
+  override def inputTypes : Seq[AbstractDataType] =
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
+
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): EndsWith = copy(left = newLeft, right = newRight)
 }
@@ -718,11 +748,13 @@ case class EndsWith(left: Expression, right: Expression) extends StringPredicate
   since = "4.0.0",
   group = "string_funcs")
 case class IsValidUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
-  with UnaryLike[Expression] with NullIntolerant {
+  with UnaryLike[Expression] {
+  override def nullIntolerant: Boolean = true
 
   override lazy val replacement: Expression = Invoke(input, "isValid", BooleanType)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nodeName: String = "is_valid_utf8"
 
@@ -765,11 +797,13 @@ case class IsValidUTF8(input: Expression) extends RuntimeReplaceable with Implic
   group = "string_funcs")
 // scalastyle:on
 case class MakeValidUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
-  with UnaryLike[Expression] with NullIntolerant {
+  with UnaryLike[Expression] {
+  override def nullIntolerant: Boolean = true
 
   override lazy val replacement: Expression = Invoke(input, "makeValid", input.dataType)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nodeName: String = "make_valid_utf8"
 
@@ -805,16 +839,19 @@ case class MakeValidUTF8(input: Expression) extends RuntimeReplaceable with Impl
   group = "string_funcs")
 // scalastyle:on
 case class ValidateUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
-  with UnaryLike[Expression] with NullIntolerant {
+  with UnaryLike[Expression] {
+  override def nullIntolerant: Boolean = true
 
-  override lazy val replacement: Expression = StaticInvoke(
-    classOf[ExpressionImplUtils],
-    input.dataType,
-    "validateUTF8String",
-    Seq(input),
-    inputTypes)
+  override lazy val replacement: Expression =
+    StaticInvoke(
+      classOf[ExpressionImplUtils],
+      input.dataType,
+      "validateUTF8String",
+      Seq(input),
+      inputTypes)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nodeName: String = "validate_utf8"
 
@@ -854,16 +891,19 @@ case class ValidateUTF8(input: Expression) extends RuntimeReplaceable with Impli
   group = "string_funcs")
 // scalastyle:on
 case class TryValidateUTF8(input: Expression) extends RuntimeReplaceable with ImplicitCastInputTypes
-  with UnaryLike[Expression] with NullIntolerant {
+  with UnaryLike[Expression] {
+  override def nullIntolerant: Boolean = true
 
-  override lazy val replacement: Expression = StaticInvoke(
-    classOf[ExpressionImplUtils],
-    input.dataType,
-    "tryValidateUTF8String",
-    Seq(input),
-    inputTypes)
+  override lazy val replacement: Expression =
+    StaticInvoke(
+      classOf[ExpressionImplUtils],
+      input.dataType,
+      "tryValidateUTF8String",
+      Seq(input),
+      inputTypes)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nodeName: String = "try_validate_utf8"
 
@@ -899,7 +939,8 @@ case class TryValidateUTF8(input: Expression) extends RuntimeReplaceable with Im
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExpr: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
 
@@ -919,7 +960,11 @@ case class StringReplace(srcExpr: Expression, searchExpr: Expression, replaceExp
 
   override def dataType: DataType = srcExpr.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
   override def first: Expression = srcExpr
   override def second: Expression = searchExpr
   override def third: Expression = replaceExpr
@@ -989,7 +1034,8 @@ object Overlay {
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Overlay(input: Expression, replace: Expression, pos: Expression, len: Expression)
-  extends QuaternaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends QuaternaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(str: Expression, replace: Expression, pos: Expression) = {
     this(str, replace, pos, Literal.create(-1, IntegerType))
@@ -998,8 +1044,14 @@ case class Overlay(input: Expression, replace: Expression, pos: Expression, len:
   override def dataType: DataType = input.dataType
 
   override def inputTypes: Seq[AbstractDataType] = Seq(
-    TypeCollection(StringTypeAnyCollation, BinaryType),
-    TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType, IntegerType)
+    TypeCollection(
+      StringTypeWithCollation(supportsTrimCollation = true), BinaryType
+    ),
+    TypeCollection(
+      StringTypeWithCollation(supportsTrimCollation = true), BinaryType
+    ),
+    IntegerType,
+    IntegerType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     val inputTypeCheck = super.checkInputDataTypes()
@@ -1122,7 +1174,8 @@ object StringTranslate {
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replaceExpr: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   @transient private var lastMatching: UTF8String = _
   @transient private var lastReplace: UTF8String = _
@@ -1160,15 +1213,17 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
         $termDict = org.apache.spark.sql.catalyst.expressions.StringTranslate
           .buildDict($termLastMatching, $termLastReplace, $collationId);
       }
-      ${ev.value} = CollationSupport.StringTranslate.
-      exec($src, $termDict, $collationId);
+      ${ev.value} = CollationSupport.StringTranslate.exec($src, $termDict, $collationId);
       """
     })
   }
 
   override def dataType: DataType = srcExpr.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true))
   override def first: Expression = srcExpr
   override def second: Expression = matchingExpr
   override def third: Expression = replaceExpr
@@ -1199,12 +1254,15 @@ case class StringTranslate(srcExpr: Expression, matchingExpr: Expression, replac
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class FindInSet(left: Expression, right: Expression) extends BinaryExpression
-    with ImplicitCastInputTypes with NullIntolerant {
-
+    with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true)
+    )
 
   override protected def nullSafeEval(word: Any, set: Any): Any = {
     CollationSupport.FindInSet.
@@ -1232,7 +1290,8 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
 
   override def children: Seq[Expression] = srcStr +: trimStr.toSeq
   override def dataType: DataType = srcStr.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq.fill(children.size)(StringTypeWithCollation(supportsTrimCollation = true))
 
   final lazy val collationId: Int = srcStr.dataType.asInstanceOf[StringType].collationId
 
@@ -1395,6 +1454,12 @@ case class StringTrim(srcStr: Expression, trimStr: Option[Expression] = None)
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
     CollationSupport.StringTrim.exec(srcString, trimString, collationId)
 
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
+
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
     copy(
       srcStr = newChildren.head,
@@ -1502,6 +1567,12 @@ case class StringTrimLeft(srcStr: Expression, trimStr: Option[Expression] = None
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
     CollationSupport.StringTrimLeft.exec(srcString, trimString, collationId)
 
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
+
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): StringTrimLeft =
     copy(
@@ -1562,6 +1633,12 @@ case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = Non
   override def doEval(srcString: UTF8String, trimString: UTF8String): UTF8String =
     CollationSupport.StringTrimRight.exec(srcString, trimString, collationId)
 
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
+
   override protected def withNewChildrenInternal(
       newChildren: IndexedSeq[Expression]): StringTrimRight =
     copy(
@@ -1588,15 +1665,18 @@ case class StringTrimRight(srcStr: Expression, trimStr: Option[Expression] = Non
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class StringInstr(str: Expression, substr: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
 
   override def left: Expression = str
   override def right: Expression = substr
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true)
+    )
 
   override def nullSafeEval(string: Any, sub: Any): Any = {
     CollationSupport.StringInstr.
@@ -1638,13 +1718,17 @@ case class StringInstr(str: Expression, substr: Expression)
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr: Expression)
- extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+ extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   final lazy val collationId: Int = first.dataType.asInstanceOf[StringType].collationId
 
   override def dataType: DataType = strExpr.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      IntegerType
+    )
   override def first: Expression = strExpr
   override def second: Expression = delimExpr
   override def third: Expression = countExpr
@@ -1657,7 +1741,7 @@ case class SubstringIndex(strExpr: Expression, delimExpr: Expression, countExpr:
 
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     defineCodeGen(ctx, ev, (str, delim, count) =>
-      CollationSupport.SubstringIndex.genCode(str, delim, Integer.parseInt(count, 10), collationId))
+      CollationSupport.SubstringIndex.genCode(str, delim, count, collationId))
   }
 
   override protected def withNewChildrenInternal(
@@ -1702,7 +1786,11 @@ case class StringLocate(substr: Expression, str: Expression, start: Expression)
   override def nullable: Boolean = substr.nullable || str.nullable
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      IntegerType
+    )
 
   override def eval(input: InternalRow): Any = {
     val s = start.eval(input)
@@ -1774,7 +1862,8 @@ trait PadExpressionBuilderBase extends ExpressionBuilder {
       if (expressions(0).dataType == BinaryType && behaviorChangeEnabled) {
         BinaryPad(funcName, expressions(0), expressions(1), Literal(Array[Byte](0)))
       } else {
-        createStringPad(expressions(0), expressions(1), Literal(" "))
+        createStringPad(expressions(0),
+          expressions(1), Literal(" "))
       }
     } else if (numArgs == 3) {
       if (expressions(0).dataType == BinaryType && expressions(2).dataType == BinaryType
@@ -1820,7 +1909,8 @@ object LPadExpressionBuilder extends PadExpressionBuilderBase {
 }
 
 case class StringLPad(str: Expression, len: Expression, pad: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def first: Expression = str
   override def second: Expression = len
@@ -1828,7 +1918,11 @@ case class StringLPad(str: Expression, len: Expression, pad: Expression)
 
   override def dataType: DataType = str.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, IntegerType, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      IntegerType,
+      StringTypeWithCollation(supportsTrimCollation = true)
+    )
 
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     string.asInstanceOf[UTF8String].lpad(len.asInstanceOf[Int], pad.asInstanceOf[UTF8String])
@@ -1899,16 +1993,23 @@ object RPadExpressionBuilder extends PadExpressionBuilderBase {
   }
 }
 
-case class StringRPad(str: Expression, len: Expression, pad: Expression = Literal(" "))
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+case class StringRPad(
+    str: Expression,
+    len: Expression,
+    pad: Expression = Literal.create(" ", SQLConf.get.defaultStringType))
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def first: Expression = str
   override def second: Expression = len
   override def third: Expression = pad
 
   override def dataType: DataType = str.dataType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, IntegerType, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      IntegerType,
+      StringTypeWithCollation(supportsTrimCollation = true)
+    )
 
   override def nullSafeEval(string: Any, len: Any, pad: Any): Any = {
     string.asInstanceOf[UTF8String].rpad(len.asInstanceOf[Int], pad.asInstanceOf[UTF8String])
@@ -1953,7 +2054,8 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
   override def dataType: DataType = children(0).dataType
 
   override def inputTypes: Seq[AbstractDataType] =
-    StringTypeAnyCollation :: List.fill(children.size - 1)(AnyDataType)
+    StringTypeWithCollation(supportsTrimCollation = true) ::
+      List.fill(children.size - 1)(AnyDataType)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.isEmpty) {
@@ -1970,13 +2072,10 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
     if (pattern == null) {
       null
     } else {
-      val sb = new StringBuffer()
-      val formatter = new java.util.Formatter(sb, Locale.US)
-
+      val formatter = new java.util.Formatter(Locale.US)
       val arglist = children.tail.map(_.eval(input).asInstanceOf[AnyRef])
-      formatter.format(pattern.asInstanceOf[UTF8String].toString, arglist: _*)
-
-      UTF8String.fromString(sb.toString)
+      UTF8String.fromString(
+        formatter.format(pattern.asInstanceOf[UTF8String].toString, arglist: _*).toString)
     }
   }
 
@@ -2007,19 +2106,16 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
 
     val form = ctx.freshName("formatter")
     val formatter = classOf[java.util.Formatter].getName
-    val sb = ctx.freshName("sb")
-    val stringBuffer = classOf[StringBuffer].getName
     ev.copy(code = code"""
       ${pattern.code}
       boolean ${ev.isNull} = ${pattern.isNull};
       ${CodeGenerator.javaType(dataType)} ${ev.value} = ${CodeGenerator.defaultValue(dataType)};
       if (!${ev.isNull}) {
-        $stringBuffer $sb = new $stringBuffer();
-        $formatter $form = new $formatter($sb, ${classOf[Locale].getName}.US);
+        $formatter $form = new $formatter(${classOf[Locale].getName}.US);
         Object[] $argList = new Object[$numArgLists];
         $argListCodes
-        $form.format(${pattern.value}.toString(), $argList);
-        ${ev.value} = UTF8String.fromString($sb.toString());
+        ${ev.value} = UTF8String.fromString(
+          $form.format(${pattern.value}.toString(), $argList).toString());
       }""")
   }
 
@@ -2063,14 +2159,15 @@ case class FormatString(children: Expression*) extends Expression with ImplicitC
   since = "1.5.0",
   group = "string_funcs")
 case class InitCap(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   final lazy val collationId: Int = child.dataType.asInstanceOf[StringType].collationId
 
   // Flag to indicate whether to use ICU instead of JVM case mappings for UTF8_BINARY collation.
   private final lazy val useICU = SQLConf.get.getConf(SQLConf.ICU_CASE_MAPPINGS_ENABLED)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
   override def dataType: DataType = child.dataType
 
   override def nullSafeEval(string: Any): Any = {
@@ -2097,12 +2194,16 @@ case class InitCap(child: Expression)
   since = "1.5.0",
   group = "string_funcs")
 case class StringRepeat(str: Expression, times: Expression)
-  extends BinaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends BinaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def left: Expression = str
   override def right: Expression = times
   override def dataType: DataType = str.dataType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      IntegerType
+    )
 
   override def nullSafeEval(string: Any, n: Any): Any = {
     string.asInstanceOf[UTF8String].repeat(n.asInstanceOf[Integer])
@@ -2131,8 +2232,8 @@ case class StringRepeat(str: Expression, times: Expression)
   since = "1.5.0",
   group = "string_funcs")
 case class StringSpace(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def inputTypes: Seq[DataType] = Seq(IntegerType)
 
@@ -2186,7 +2287,8 @@ case class StringSpace(child: Expression)
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Substring(str: Expression, pos: Expression, len: Expression)
-  extends TernaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends TernaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   def this(str: Expression, pos: Expression) = {
     this(str, pos, Literal(Integer.MAX_VALUE))
@@ -2195,7 +2297,11 @@ case class Substring(str: Expression, pos: Expression, len: Expression)
   override def dataType: DataType = str.dataType
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType, IntegerType)
+    Seq(
+      TypeCollection(StringTypeWithCollation(supportsTrimCollation = true), BinaryType),
+      IntegerType,
+      IntegerType
+    )
 
   override def first: Expression = str
   override def second: Expression = pos
@@ -2253,7 +2359,11 @@ case class Right(str: Expression, len: Expression) extends RuntimeReplaceable
     )
   )
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation, IntegerType)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      IntegerType
+    )
   override def left: Expression = str
   override def right: Expression = len
   override protected def withNewChildrenInternal(
@@ -2284,7 +2394,12 @@ case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
   override lazy val replacement: Expression = Substring(str, Literal(1), len)
 
   override def inputTypes: Seq[AbstractDataType] = {
-    Seq(TypeCollection(StringTypeAnyCollation, BinaryType), IntegerType)
+    Seq(
+      TypeCollection(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        BinaryType)
+      , IntegerType
+    )
   }
 
   override def left: Expression = str
@@ -2317,10 +2432,16 @@ case class Left(str: Expression, len: Expression) extends RuntimeReplaceable
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Length(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
+    Seq(
+      TypeCollection(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        BinaryType
+      )
+    )
 
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
     case _: StringType => value.asInstanceOf[UTF8String].numChars
@@ -2352,11 +2473,16 @@ case class Length(child: Expression)
   since = "2.3.0",
   group = "string_funcs")
 case class BitLength(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
-
+    Seq(
+      TypeCollection(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        BinaryType
+      )
+    )
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
     case _: StringType => value.asInstanceOf[UTF8String].numBytes * 8
     case BinaryType => value.asInstanceOf[Array[Byte]].length * 8
@@ -2391,10 +2517,16 @@ case class BitLength(child: Expression)
   since = "2.3.0",
   group = "string_funcs")
 case class OctetLength(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = IntegerType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(TypeCollection(StringTypeAnyCollation, BinaryType))
+    Seq(
+      TypeCollection(
+        StringTypeWithCollation(supportsTrimCollation = true),
+        BinaryType
+      )
+    )
 
   protected override def nullSafeEval(value: Any): Any = child.dataType match {
     case _: StringType => value.asInstanceOf[UTF8String].numBytes
@@ -2436,8 +2568,7 @@ case class Levenshtein(
     right: Expression,
     threshold: Option[Expression] = None)
   extends Expression
-  with ImplicitCastInputTypes
-  with NullIntolerant{
+  with ImplicitCastInputTypes {
 
   def this(left: Expression, right: Expression, threshold: Expression) =
     this(left, right, Option(threshold))
@@ -2454,8 +2585,17 @@ case class Levenshtein(
   }
 
   override def inputTypes: Seq[AbstractDataType] = threshold match {
-    case Some(_) => Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
-    case _ => Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+      case Some(_) =>
+        Seq(
+          StringTypeWithCollation(supportsTrimCollation = true),
+          StringTypeWithCollation(supportsTrimCollation = true),
+          IntegerType
+        )
+      case _ =>
+        Seq(
+          StringTypeWithCollation(supportsTrimCollation = true),
+          StringTypeWithCollation(supportsTrimCollation = true)
+        )
   }
 
   override def children: Seq[Expression] = threshold match {
@@ -2475,6 +2615,7 @@ case class Levenshtein(
   }
 
   override def nullable: Boolean = children.exists(_.nullable)
+  override def nullIntolerant: Boolean = true
 
   override def foldable: Boolean = children.forall(_.foldable)
 
@@ -2576,11 +2717,13 @@ case class Levenshtein(
   since = "1.5.0",
   group = "string_funcs")
 case class SoundEx(child: Expression)
-  extends UnaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends UnaryExpression with ExpectsInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def nullSafeEval(input: Any): Any = input.asInstanceOf[UTF8String].soundex()
 
@@ -2607,10 +2750,11 @@ case class SoundEx(child: Expression)
   since = "1.5.0",
   group = "string_funcs")
 case class Ascii(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
-
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
   override def dataType: DataType = IntegerType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   protected override def nullSafeEval(string: Any): Any = {
     // only pick the first character to reduce the `toString` cost
@@ -2654,7 +2798,8 @@ case class Ascii(child: Expression)
   group = "string_funcs")
 // scalastyle:on line.size.limit
 case class Chr(child: Expression)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def inputTypes: Seq[DataType] = Seq(LongType)
@@ -2715,7 +2860,8 @@ case class Base64(child: Expression, chunkBase64: Boolean)
     dataType,
     "encode",
     Seq(child, Literal(chunkBase64, BooleanType)),
-    Seq(BinaryType, BooleanType))
+    Seq(BinaryType, BooleanType),
+    returnNullable = false)
 
   override def toString: String = s"$prettyName($child)"
 
@@ -2751,10 +2897,12 @@ object Base64 {
   since = "1.5.0",
   group = "string_funcs")
 case class UnBase64(child: Expression, failOnError: Boolean = false)
-  extends UnaryExpression with ImplicitCastInputTypes with NullIntolerant {
+  extends UnaryExpression with ImplicitCastInputTypes {
+  override def nullIntolerant: Boolean = true
 
   override def dataType: DataType = BinaryType
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   def this(expr: Expression) = this(expr, false)
 
@@ -2933,7 +3081,10 @@ case class StringDecode(
     this(bin, charset, SQLConf.get.legacyJavaCharsets, SQLConf.get.legacyCodingErrorAction)
 
   override val dataType: DataType = SQLConf.get.defaultStringType
-  override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(
+      BinaryType,
+      StringTypeWithCollation(supportsTrimCollation = true)
+    )
   override def prettyName: String = "decode"
   override def toString: String = s"$prettyName($bin, $charset)"
 
@@ -2942,7 +3093,13 @@ case class StringDecode(
     SQLConf.get.defaultStringType,
     "decode",
     Seq(bin, charset, Literal(legacyCharsets), Literal(legacyErrorAction)),
-    Seq(BinaryType, StringTypeAnyCollation, BooleanType, BooleanType))
+    Seq(
+      BinaryType,
+      StringTypeWithCollation(supportsTrimCollation = true),
+      BooleanType,
+      BooleanType
+    )
+  )
 
   override def children: Seq[Expression] = Seq(bin, charset)
   override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
@@ -2999,15 +3156,23 @@ case class Encode(
 
   override def dataType: DataType = BinaryType
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true)
+    )
 
   override lazy val replacement: Expression = StaticInvoke(
     classOf[Encode],
     BinaryType,
     "encode",
     Seq(
-      str, charset, Literal(legacyCharsets, BooleanType), Literal(legacyErrorAction, BooleanType)),
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, BooleanType, BooleanType))
+      str, charset, Literal(legacyCharsets, BooleanType), Literal(legacyErrorAction, BooleanType)
+    ),
+    Seq(
+      StringTypeWithCollation(supportsTrimCollation = true),
+      StringTypeWithCollation(supportsTrimCollation = true),
+      BooleanType,
+      BooleanType))
 
   override def toString: String = s"$prettyName($str, $charset)"
 
@@ -3026,10 +3191,9 @@ object Encode {
       legacyCharsets: Boolean,
       legacyErrorAction: Boolean): Array[Byte] = {
     val toCharset = charset.toString
-    if (input.numBytes == 0 || "UTF-8".equalsIgnoreCase(toCharset)) {
-      return input.getBytes
-    }
+    if ("UTF-8".equalsIgnoreCase(toCharset) && input.isValid) return input.getBytes
     val encoder = CharsetProvider.newEncoder(toCharset, legacyCharsets, legacyErrorAction)
+    if (input.numBytes == 0) return input.getBytes
     try {
       val bb = encoder.encode(CharBuffer.wrap(input.toString))
       JavaUtils.bufferToArray(bb)
@@ -3092,7 +3256,8 @@ case class ToBinary(
 
   override def children: Seq[Expression] = expr +: format.toSeq
 
-  override def inputTypes: Seq[AbstractDataType] = children.map(_ => StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    children.map(_ => StringTypeWithCollation(supportsTrimCollation = true))
 
   override def checkInputDataTypes(): TypeCheckResult = {
     def isValidFormat: Boolean = {
@@ -3108,7 +3273,8 @@ case class ToBinary(
               errorSubClass = "INVALID_ARG_VALUE",
               messageParameters = Map(
                 "inputName" -> "fmt",
-                "requireType" -> s"case-insensitive ${toSQLType(StringTypeAnyCollation)}",
+                "requireType" ->
+                  s"case-insensitive ${toSQLType(StringTypeWithCollation)}",
                 "validValues" -> "'hex', 'utf-8', 'utf8', or 'base64'",
                 "inputValue" -> toSQLValue(fmt, f.dataType)
               )
@@ -3119,7 +3285,7 @@ case class ToBinary(
             errorSubClass = "NON_FOLDABLE_INPUT",
             messageParameters = Map(
               "inputName" -> toSQLId("fmt"),
-              "inputType" -> toSQLType(StringTypeAnyCollation),
+              "inputType" -> toSQLType(StringTypeWithCollation),
               "inputExpr" -> toSQLExpr(f)
             )
           )
@@ -3128,7 +3294,8 @@ case class ToBinary(
             errorSubClass = "INVALID_ARG_VALUE",
             messageParameters = Map(
               "inputName" -> "fmt",
-              "requireType" -> s"case-insensitive ${toSQLType(StringTypeAnyCollation)}",
+              "requireType" ->
+                s"case-insensitive ${toSQLType(StringTypeWithCollation)}",
               "validValues" -> "'hex', 'utf-8', 'utf8', or 'base64'",
               "inputValue" -> toSQLValue(f.eval(), f.dataType)
             )
@@ -3170,14 +3337,21 @@ case class ToBinary(
   since = "1.5.0",
   group = "string_funcs")
 case class FormatNumber(x: Expression, d: Expression)
-  extends BinaryExpression with ExpectsInputTypes with NullIntolerant {
+  extends BinaryExpression with ExpectsInputTypes {
 
   override def left: Expression = x
   override def right: Expression = d
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def nullable: Boolean = true
+  override def nullIntolerant: Boolean = true
+
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(NumericType, TypeCollection(IntegerType, StringTypeAnyCollation))
+    Seq(
+      NumericType,
+      TypeCollection(IntegerType,
+        StringTypeWithCollation(supportsTrimCollation = true)
+      )
+    )
 
   private val defaultFormat = "#,###,###,###,###,###,##0"
 
@@ -3333,14 +3507,37 @@ case class FormatNumber(x: Expression, d: Expression)
 
 /**
  * Splits a string into arrays of sentences, where each sentence is an array of words.
- * The 'lang' and 'country' arguments are optional, and if omitted, the default locale is used.
+ * The `lang` and `country` arguments are optional, their default values are all '',
+ *  - When they are omitted:
+ *    1. If they are both omitted, the `Locale.ROOT - locale(language='', country='')` is used.
+ *       The `Locale.ROOT` is regarded as the base locale of all locales, and is used as the
+ *       language/country neutral locale for the locale sensitive operations.
+ *    2. If the `country` is omitted, the `locale(language, country='')` is used.
+ *  - When they are null:
+ *    1. If they are both `null`, the `Locale.US - locale(language='en', country='US')` is used.
+ *    2. If the `language` is null and the `country` is not null,
+ *       the `Locale.US - locale(language='en', country='US')` is used.
+ *    3. If the `language` is not null and the `country` is null, the `locale(language)` is used.
+ *    4. If neither is `null`, the `locale(language, country)` is used.
  */
 @ExpressionDescription(
-  usage = "_FUNC_(str[, lang, country]) - Splits `str` into an array of array of words.",
+  usage = "_FUNC_(str[, lang[, country]]) - Splits `str` into an array of array of words.",
+  arguments = """
+    Arguments:
+      * str - A STRING expression to be parsed.
+      * lang - An optional STRING expression with a language code from ISO 639 Alpha-2 (e.g. 'DE'),
+          Alpha-3, or a language subtag of up to 8 characters.
+      * country - An optional STRING expression with a country code from ISO 3166 alpha-2 country
+          code or a UN M.49 numeric-3 area code.
+  """,
   examples = """
     Examples:
       > SELECT _FUNC_('Hi there! Good morning.');
        [["Hi","there"],["Good","morning"]]
+      > SELECT _FUNC_('Hi there! Good morning.', 'en');
+       [["Hi","there"],["Good","morning"]]
+      > SELECT _FUNC_('Hi there! Good morning.', 'en', 'US');
+       [["Hi","there"],["Good","morning"]]
   """,
   since = "2.0.0",
   group = "string_funcs")
@@ -3348,7 +3545,9 @@ case class Sentences(
     str: Expression,
     language: Expression = Literal(""),
     country: Expression = Literal(""))
-  extends TernaryExpression with ImplicitCastInputTypes with CodegenFallback {
+  extends TernaryExpression
+  with ImplicitCastInputTypes
+  with RuntimeReplaceable {
 
   def this(str: Expression) = this(str, Literal(""), Literal(""))
   def this(str: Expression, language: Expression) = this(str, language, Literal(""))
@@ -3357,54 +3556,27 @@ case class Sentences(
   override def dataType: DataType =
     ArrayType(ArrayType(str.dataType, containsNull = false), containsNull = false)
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(
+      StringTypeWithCollation,
+      StringTypeWithCollation,
+      StringTypeWithCollation
+    )
   override def first: Expression = str
   override def second: Expression = language
   override def third: Expression = country
 
-  override def eval(input: InternalRow): Any = {
-    val string = str.eval(input)
-    if (string == null) {
-      null
-    } else {
-      val languageStr = language.eval(input).asInstanceOf[UTF8String]
-      val countryStr = country.eval(input).asInstanceOf[UTF8String]
-      val locale = if (languageStr != null && countryStr != null) {
-        new Locale(languageStr.toString, countryStr.toString)
-      } else {
-        Locale.US
-      }
-      getSentences(string.asInstanceOf[UTF8String].toString, locale)
-    }
-  }
-
-  private def getSentences(sentences: String, locale: Locale) = {
-    val bi = BreakIterator.getSentenceInstance(locale)
-    bi.setText(sentences)
-    var idx = 0
-    val result = new ArrayBuffer[GenericArrayData]
-    while (bi.next != BreakIterator.DONE) {
-      val sentence = sentences.substring(idx, bi.current)
-      idx = bi.current
-
-      val wi = BreakIterator.getWordInstance(locale)
-      var widx = 0
-      wi.setText(sentence)
-      val words = new ArrayBuffer[UTF8String]
-      while (wi.next != BreakIterator.DONE) {
-        val word = sentence.substring(widx, wi.current)
-        widx = wi.current
-        if (Character.isLetterOrDigit(word.charAt(0))) words += UTF8String.fromString(word)
-      }
-      result += new GenericArrayData(words)
-    }
-    new GenericArrayData(result)
-  }
+  override def replacement: Expression =
+    StaticInvoke(
+      classOf[ExpressionImplUtils],
+      dataType,
+      "getSentences",
+      Seq(str, language, country),
+      inputTypes,
+      propagateNull = false)
 
   override protected def withNewChildrenInternal(
       newFirst: Expression, newSecond: Expression, newThird: Expression): Sentences =
     copy(str = newFirst, language = newSecond, country = newThird)
-
 }
 
 /**
@@ -3416,11 +3588,12 @@ case class Sentences(
  */
 case class StringSplitSQL(
     str: Expression,
-    delimiter: Expression) extends BinaryExpression with NullIntolerant {
+    delimiter: Expression) extends BinaryExpression {
   override def dataType: DataType = ArrayType(str.dataType, containsNull = false)
   final lazy val collationId: Int = left.dataType.asInstanceOf[StringType].collationId
   override def left: Expression = str
   override def right: Expression = delimiter
+  override def nullIntolerant: Boolean = true
 
   override def nullSafeEval(string: Any, delimiter: Any): Any = {
     val strings = CollationSupport.StringSplitSQL.exec(string.asInstanceOf[UTF8String],
@@ -3475,7 +3648,11 @@ case class SplitPart (
       false)
   override def nodeName: String = "split_part"
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation, IntegerType)
+    Seq(
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      StringTypeNonCSAICollation(supportsTrimCollation = true),
+      IntegerType
+    )
   def children: Seq[Expression] = Seq(str, delimiter, partNum)
   protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression = {
     copy(str = newChildren.apply(0), delimiter = newChildren.apply(1),
@@ -3534,10 +3711,10 @@ case class Luhncheck(input: Expression) extends RuntimeReplaceable with Implicit
     classOf[ExpressionImplUtils],
     BooleanType,
     "isLuhnNumber",
-    Seq(input),
-    inputTypes)
+    Seq(input), inputTypes)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(StringTypeWithCollation(supportsTrimCollation = true))
 
   override def prettyName: String = "luhn_check"
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
index 75ca4930cf8c1..0c8253659dd56 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/subquery.scala
@@ -20,7 +20,6 @@ package org.apache.spark.sql.catalyst.expressions
 import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.optimizer.DecorrelateInnerQuery
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern._
@@ -252,8 +251,42 @@ object SubExprUtils extends PredicateHelper {
   }
 
   /**
-   * Returns the inner query attributes that are guaranteed to have a single value for each
-   * outer row. Therefore, a scalar subquery is allowed to group-by on these attributes.
+   * Matches an equality 'expr = func(outer)', where 'func(outer)' depends on outer rows or
+   * is a constant.
+   * A scalar subquery is allowed to group-by on 'expr', as they are guaranteed to have exactly
+   * one value for every outer row.
+   * Positive examples:
+   *   - x + 1 = outer(a)
+   *   - cast(x as date) = outer(b)
+   *   - y + z = 100
+   *   - y / 10 = outer(b) + outer(c)
+   * In all of these examples, the left side of the equality will be returned.
+   *
+   * Negative examples:
+   *    - x < outer(b)
+   *    - x = y
+   * In all of these examples, None will be returned.
+   * @param expr
+   * @return
+   */
+  private def getEquivalentToOuter(expr: Expression): Option[Expression] = {
+    val allowConstants =
+      SQLConf.get.getConf(SQLConf.SCALAR_SUBQUERY_ALLOW_GROUP_BY_COLUMN_EQUAL_TO_CONSTANT)
+
+    expr match {
+      case EqualTo(left, x)
+        if ((allowConstants || containsOuter(x)) &&
+          !x.exists(_.isInstanceOf[Attribute])) => Some(left)
+      case EqualTo(x, right)
+        if ((allowConstants || containsOuter(x)) &&
+          !x.exists(_.isInstanceOf[Attribute])) => Some(right)
+      case _ => None
+    }
+  }
+
+  /**
+   * Returns the inner query expressions that are guaranteed to have a single value for each
+   * outer row. Therefore, a scalar subquery is allowed to group-by on these expressions.
    * We can derive these from correlated equality predicates, though we need to take care about
    * propagating this through operators like OUTER JOIN or UNION.
    *
@@ -261,6 +294,7 @@ object SubExprUtils extends PredicateHelper {
    * - x = outer(a) AND y = outer(b)
    * - x = 1
    * - x = outer(a) + 1
+   * - cast(x as date) = current_date() + outer(b)
    *
    * Negative examples:
    * - x <= outer(a)
@@ -274,31 +308,31 @@ object SubExprUtils extends PredicateHelper {
    *   select *, (select count(*) from
    *     (select * from y where y1 = x1 union all select * from y) group by y1) from x;
    */
-  def getCorrelatedEquivalentInnerColumns(plan: LogicalPlan): AttributeSet = {
+  def getCorrelatedEquivalentInnerExpressions(plan: LogicalPlan): ExpressionSet = {
     plan match {
       case Filter(cond, child) =>
-        val correlated = AttributeSet(splitConjunctivePredicates(cond)
+        val equivalentExprs = ExpressionSet(splitConjunctivePredicates(cond)
           .filter(
             SQLConf.get.getConf(SQLConf.SCALAR_SUBQUERY_ALLOW_GROUP_BY_COLUMN_EQUAL_TO_CONSTANT)
             || containsOuter(_))
-          .filter(DecorrelateInnerQuery.canPullUpOverAgg)
-          .flatMap(_.references))
-        correlated ++ getCorrelatedEquivalentInnerColumns(child)
+          .flatMap(getEquivalentToOuter))
+        equivalentExprs ++ getCorrelatedEquivalentInnerExpressions(child)
 
       case Join(left, right, joinType, _, _) =>
          joinType match {
           case _: InnerLike =>
-            AttributeSet(plan.children.flatMap(child => getCorrelatedEquivalentInnerColumns(child)))
-          case LeftOuter => getCorrelatedEquivalentInnerColumns(left)
-          case RightOuter => getCorrelatedEquivalentInnerColumns(right)
-          case FullOuter => AttributeSet.empty
-          case LeftSemi => getCorrelatedEquivalentInnerColumns(left)
-          case LeftAnti => getCorrelatedEquivalentInnerColumns(left)
-          case _ => AttributeSet.empty
+            ExpressionSet(plan.children.flatMap(
+              child => getCorrelatedEquivalentInnerExpressions(child)))
+          case LeftOuter => getCorrelatedEquivalentInnerExpressions(left)
+          case RightOuter => getCorrelatedEquivalentInnerExpressions(right)
+          case FullOuter => ExpressionSet().empty
+          case LeftSemi => getCorrelatedEquivalentInnerExpressions(left)
+          case LeftAnti => getCorrelatedEquivalentInnerExpressions(left)
+          case _ => ExpressionSet().empty
         }
 
-      case _: Union => AttributeSet.empty
-      case Except(left, right, _) => getCorrelatedEquivalentInnerColumns(left)
+      case _: Union => ExpressionSet().empty
+      case Except(left, _, _) => getCorrelatedEquivalentInnerExpressions(left)
 
       case
         _: Aggregate |
@@ -318,11 +352,26 @@ object SubExprUtils extends PredicateHelper {
         _: WithCTE |
         _: Range |
         _: SubqueryAlias =>
-        AttributeSet(plan.children.flatMap(child => getCorrelatedEquivalentInnerColumns(child)))
+        ExpressionSet(plan.children.flatMap(child =>
+          getCorrelatedEquivalentInnerExpressions(child)))
 
-      case _ => AttributeSet.empty
+      case _ => ExpressionSet().empty
     }
   }
+
+  // Returns grouping expressions of 'aggNode' of a scalar subquery that do not have equivalent
+  // columns in the outer query (bound by equality predicates like 'col = outer(c)').
+  // We use it to analyze whether a scalar subquery is guaranteed to return at most 1 row.
+  def nonEquivalentGroupbyCols(query: LogicalPlan, aggNode: Aggregate): ExpressionSet = {
+    val correlatedEquivalentExprs = getCorrelatedEquivalentInnerExpressions(query)
+    // Grouping expressions, except outer refs and constant expressions - grouping by an
+    // outer ref or a constant is always ok
+    val groupByExprs =
+    ExpressionSet(aggNode.groupingExpressions.filter(x => !x.isInstanceOf[OuterReference] &&
+      x.references.nonEmpty))
+    val nonEquivalentGroupByExprs = groupByExprs -- correlatedEquivalentExprs
+    nonEquivalentGroupByExprs
+  }
 }
 
 /**
@@ -336,6 +385,11 @@ object SubExprUtils extends PredicateHelper {
  * case the subquery yields no row at all on empty input to the GROUP BY, which evaluates to NULL.
  * It is set in PullupCorrelatedPredicates to true/false, before it is set its value is None.
  * See constructLeftJoins in RewriteCorrelatedScalarSubquery for more details.
+ *
+ * 'needSingleJoin' is set to true if we can't guarantee that the correlated scalar subquery
+ * returns at most 1 row. For such subqueries we use a modification of an outer join called
+ * LeftSingle join. This value is set in PullupCorrelatedPredicates and used in
+ * RewriteCorrelatedScalarSubquery.
  */
 case class ScalarSubquery(
     plan: LogicalPlan,
@@ -343,7 +397,8 @@ case class ScalarSubquery(
     exprId: ExprId = NamedExpression.newExprId,
     joinCond: Seq[Expression] = Seq.empty,
     hint: Option[HintInfo] = None,
-    mayHaveCountBug: Option[Boolean] = None)
+    mayHaveCountBug: Option[Boolean] = None,
+    needSingleJoin: Option[Boolean] = None)
   extends SubqueryExpression(plan, outerAttrs, exprId, joinCond, hint) with Unevaluable {
   override def dataType: DataType = {
     if (!plan.schema.fields.nonEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
index 88a53e4591185..457f469e0f687 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -60,6 +61,9 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
   override def second: Expression = jsonFormatSchema
   override def third: Expression = options
 
+  def this(child: Expression, jsonFormatSchema: Expression) =
+    this(child, jsonFormatSchema, Literal.create(null))
+
   override def withNewChildrenInternal(
       newFirst: Expression, newSecond: Expression, newThird: Expression): Expression = {
     copy(child = newFirst, jsonFormatSchema = newSecond, options = newThird)
@@ -117,6 +121,9 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
     val expr = constructor.newInstance(child, schemaValue, optionsValue)
     expr.asInstanceOf[Expression]
   }
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("from_avro")
 }
 
 /**
@@ -131,12 +138,14 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
 // scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-    _FUNC_(child, jsonFormatSchema) - Converts a Catalyst binary input value into its corresponding
-      Avro format result.
+    _FUNC_(child[, jsonFormatSchema]) - Converts a Catalyst binary input value into its
+      corresponding Avro format result.
   """,
   examples = """
     Examples:
-      > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}', MAP()) IS NULL FROM (SELECT NULL AS s);
+      > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }]}') IS NULL FROM (SELECT NULL AS s);
+       [true]
+      > SELECT _FUNC_(s) IS NULL FROM (SELECT NULL AS s);
        [true]
   """,
   group = "misc_funcs",
@@ -145,6 +154,9 @@ case class FromAvro(child: Expression, jsonFormatSchema: Expression, options: Ex
 // scalastyle:on line.size.limit
 case class ToAvro(child: Expression, jsonFormatSchema: Expression)
   extends BinaryExpression with RuntimeReplaceable {
+
+  def this(child: Expression) = this(child, Literal(null))
+
   override def left: Expression = child
 
   override def right: Expression = jsonFormatSchema
@@ -157,6 +169,9 @@ case class ToAvro(child: Expression, jsonFormatSchema: Expression)
     jsonFormatSchema.dataType match {
       case _: StringType if jsonFormatSchema.foldable =>
         TypeCheckResult.TypeCheckSuccess
+      case _: NullType =>
+        // The 'jsonFormatSchema' argument is optional.
+        TypeCheckResult.TypeCheckSuccess
       case _ =>
         TypeCheckResult.TypeCheckFailure(
           "The second argument of the TO_AVRO SQL function must be a constant string " +
@@ -181,4 +196,7 @@ case class ToAvro(child: Expression, jsonFormatSchema: Expression)
     val expr = constructor.newInstance(child, schemaValue)
     expr.asInstanceOf[Expression]
   }
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_avro")
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromProtobufSqlFunctions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromProtobufSqlFunctions.scala
new file mode 100644
index 0000000000000..96bcf49dbd097
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromProtobufSqlFunctions.scala
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.util.ArrayBasedMapData
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{BinaryType, MapType, NullType, StringType}
+import org.apache.spark.sql.util.ProtobufUtils
+import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
+
+/**
+ * Converts a binary column of Protobuf format into its corresponding catalyst value.
+ * The Protobuf definition is provided through Protobuf <i>descriptor file</i>.
+ *
+ * @param data
+ *   The Catalyst binary input column.
+ * @param messageName
+ *   The protobuf message name to look for in descriptor file.
+ * @param descFilePath
+ *   The Protobuf descriptor file. This file is usually created using `protoc` with
+ *   `--descriptor_set_out` and `--include_imports` options.
+ * @param options
+ *   the options to use when performing the conversion.
+ * @since 4.0.0
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(data, messageName, descFilePath, options) - Converts a binary Protobuf value into a Catalyst value.
+    """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(s, 'Person', '/path/to/descriptor.desc', map()) IS NULL AS result FROM (SELECT NAMED_STRUCT('name', name, 'id', id) AS s FROM VALUES ('John Doe', 1), (NULL,  2) tab(name, id));
+       [false]
+  """,
+  note = """
+    The specified Protobuf schema must match actual schema of the read data, otherwise the behavior
+    is undefined: it may fail or return arbitrary result.
+    To deserialize the data with a compatible and evolved schema, the expected Protobuf schema can be
+    set via the corresponding option.
+  """,
+  group = "misc_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class FromProtobuf(
+    data: Expression,
+    messageName: Expression,
+    descFilePath: Expression,
+    options: Expression) extends QuaternaryExpression with RuntimeReplaceable {
+
+  def this(data: Expression, messageName: Expression, descFilePathOrOptions: Expression) = {
+    this(
+      data,
+      messageName,
+      descFilePathOrOptions match {
+        case lit: Literal
+          if lit.dataType == StringType || lit.dataType == BinaryType => descFilePathOrOptions
+        case _ => Literal(null)
+      },
+      descFilePathOrOptions.dataType match {
+        case _: MapType => descFilePathOrOptions
+        case _ => Literal(null)
+      }
+    )
+  }
+
+  def this(data: Expression, messageName: Expression) = {
+    this(data, messageName, Literal(null), Literal(null))
+  }
+
+  override def first: Expression = data
+  override def second: Expression = messageName
+  override def third: Expression = descFilePath
+  override def fourth: Expression = options
+
+  override def withNewChildrenInternal(
+      newFirst: Expression,
+      newSecond: Expression,
+      newThird: Expression,
+      newFourth: Expression): Expression = {
+    copy(data = newFirst, messageName = newSecond, descFilePath = newThird, options = newFourth)
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val messageNameCheck = messageName.dataType match {
+      case _: StringType if messageName.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The second argument of the FROM_PROTOBUF SQL function must be a constant string " +
+            "representing the Protobuf message name"))
+    }
+    val descFilePathCheck = descFilePath.dataType match {
+      case _: StringType | BinaryType | NullType if descFilePath.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The third argument of the FROM_PROTOBUF SQL function must be a constant string " +
+            "or binary data representing the Protobuf descriptor file path"))
+    }
+    val optionsCheck = options.dataType match {
+      case MapType(StringType, StringType, _) |
+           MapType(NullType, NullType, _) |
+           _: NullType if options.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The fourth argument of the FROM_PROTOBUF SQL function must be a constant map of " +
+            "strings to strings containing the options to use for converting the value from " +
+            "Protobuf format"))
+    }
+    messageNameCheck.getOrElse(
+      descFilePathCheck.getOrElse(
+        optionsCheck.getOrElse(TypeCheckResult.TypeCheckSuccess)
+      )
+    )
+  }
+
+  override lazy val replacement: Expression = {
+    val messageNameValue: String = messageName.eval() match {
+      case null =>
+        throw new IllegalArgumentException("Message name cannot be null")
+      case s: UTF8String =>
+        s.toString
+    }
+    val descFilePathValue: Option[Array[Byte]] = descFilePath.eval() match {
+      case s: UTF8String if s.toString.isEmpty => None
+      case s: UTF8String => Some(ProtobufUtils.readDescriptorFileContent(s.toString))
+      case bytes: Array[Byte] if bytes.isEmpty => None
+      case bytes: Array[Byte] => Some(bytes)
+      case null => None
+    }
+    val optionsValue: Map[String, String] = options.eval() match {
+      case a: ArrayBasedMapData if a.keyArray.array.nonEmpty =>
+        val keys: Array[String] = a.keyArray.array.map(_.toString)
+        val values: Array[String] = a.valueArray.array.map(_.toString)
+        keys.zip(values).toMap
+      case _ => Map.empty
+    }
+    val constructor = try {
+      Utils.classForName(
+        "org.apache.spark.sql.protobuf.ProtobufDataToCatalyst").getConstructors().head
+    } catch {
+      case _: java.lang.ClassNotFoundException =>
+        throw QueryCompilationErrors.protobufNotLoadedSqlFunctionsUnusable(
+          functionName = "FROM_PROTOBUF")
+    }
+    val expr = constructor.newInstance(data, messageNameValue, descFilePathValue, optionsValue)
+    expr.asInstanceOf[Expression]
+  }
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("from_protobuf")
+}
+
+/**
+ * Converts a Catalyst binary input value into its corresponding Protobuf format result.
+ * This is a thin wrapper over the [[CatalystDataToProtobuf]] class to create a SQL function.
+ *
+ * @param data
+ *   The Catalyst binary input column.
+ * @param messageName
+ *   The Protobuf message name.
+ * @param descFilePath
+ *   The Protobuf descriptor file path.
+ * @param options
+ *   The options to use when performing the conversion.
+ * @since 4.0.0
+ */
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = """
+    _FUNC_(child, messageName, descFilePath, options) - Converts a Catalyst binary input value into its corresponding
+      Protobuf format result.
+  """,
+  examples = """
+    Examples:
+      > SELECT _FUNC_(s, 'Person', '/path/to/descriptor.desc', map('emitDefaultValues', 'true')) IS NULL FROM (SELECT NULL AS s);
+       [true]
+  """,
+  group = "misc_funcs",
+  since = "4.0.0"
+)
+// scalastyle:on line.size.limit
+case class ToProtobuf(
+    data: Expression,
+    messageName: Expression,
+    descFilePath: Expression,
+    options: Expression) extends QuaternaryExpression with RuntimeReplaceable {
+
+  def this(data: Expression, messageName: Expression, descFilePathOrOptions: Expression) = {
+    this(
+      data,
+      messageName,
+      descFilePathOrOptions match {
+        case lit: Literal
+          if lit.dataType == StringType || lit.dataType == BinaryType => descFilePathOrOptions
+        case _ => Literal(null)
+      },
+      descFilePathOrOptions.dataType match {
+        case _: MapType => descFilePathOrOptions
+        case _ => Literal(null)
+      }
+    )
+  }
+
+  def this(data: Expression, messageName: Expression) = {
+    this(data, messageName, Literal(null), Literal(null))
+  }
+
+  override def first: Expression = data
+  override def second: Expression = messageName
+  override def third: Expression = descFilePath
+  override def fourth: Expression = options
+
+  override def withNewChildrenInternal(
+      newFirst: Expression,
+      newSecond: Expression,
+      newThird: Expression,
+      newFourth: Expression): Expression = {
+    copy(data = newFirst, messageName = newSecond, descFilePath = newThird, options = newFourth)
+  }
+
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val messageNameCheck = messageName.dataType match {
+      case _: StringType if messageName.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The second argument of the TO_PROTOBUF SQL function must be a constant string " +
+            "representing the Protobuf message name"))
+    }
+    val descFilePathCheck = descFilePath.dataType match {
+      case _: StringType | BinaryType | NullType if descFilePath.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The third argument of the TO_PROTOBUF SQL function must be a constant string " +
+            "or binary data representing the Protobuf descriptor file path"))
+    }
+    val optionsCheck = options.dataType match {
+      case MapType(StringType, StringType, _) |
+           MapType(NullType, NullType, _) |
+           _: NullType if options.foldable => None
+      case _ =>
+        Some(TypeCheckResult.TypeCheckFailure(
+          "The fourth argument of the TO_PROTOBUF SQL function must be a constant map of " +
+            "strings to strings containing the options to use for converting the value to " +
+            "Protobuf format"))
+    }
+
+    messageNameCheck.getOrElse(
+      descFilePathCheck.getOrElse(
+        optionsCheck.getOrElse(TypeCheckResult.TypeCheckSuccess)
+      )
+    )
+  }
+
+  override lazy val replacement: Expression = {
+    val messageNameValue: String = messageName.eval() match {
+      case null =>
+        throw new IllegalArgumentException("Message name cannot be null")
+      case s: UTF8String =>
+        s.toString
+    }
+    val descFilePathValue: Option[Array[Byte]] = descFilePath.eval() match {
+      case s: UTF8String => Some(ProtobufUtils.readDescriptorFileContent(s.toString))
+      case bytes: Array[Byte] => Some(bytes)
+      case null => None
+    }
+    val optionsValue: Map[String, String] = options.eval() match {
+      case a: ArrayBasedMapData if a.keyArray.array.nonEmpty =>
+        val keys: Array[String] = a.keyArray.array.map(_.toString)
+        val values: Array[String] = a.valueArray.array.map(_.toString)
+        keys.zip(values).toMap
+      case _ => Map.empty
+    }
+    val constructor = try {
+      Utils.classForName(
+        "org.apache.spark.sql.protobuf.CatalystDataToProtobuf").getConstructors().head
+    } catch {
+      case _: java.lang.ClassNotFoundException =>
+        throw QueryCompilationErrors.protobufNotLoadedSqlFunctionsUnusable(
+          functionName = "TO_PROTOBUF")
+    }
+    val expr = constructor.newInstance(data, messageNameValue, descFilePathValue, optionsValue)
+    expr.asInstanceOf[Expression]
+  }
+
+  override def prettyName: String =
+    getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("to_protobuf")
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/url/UrlExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/url/UrlExpressionEvalUtils.scala
new file mode 100644
index 0000000000000..1eaa25a6bf72c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/url/UrlExpressionEvalUtils.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions.url
+
+import java.net.{URI, URISyntaxException}
+import java.util.regex.Pattern
+
+import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.unsafe.types.UTF8String
+
+case class ParseUrlEvaluator(
+    url: UTF8String,
+    extractPart: UTF8String,
+    pattern: UTF8String,
+    failOnError: Boolean) {
+
+  import ParseUrlEvaluator._
+
+  private lazy val cachedUrl: URI =
+    if (url != null) getUrl(url, failOnError) else null
+
+  private lazy val cachedExtractPartFunc: URI => String =
+    if (extractPart != null) getExtractPartFunc(extractPart) else null
+
+  private lazy val cachedPattern: Pattern =
+    if (pattern != null) getPattern(pattern) else null
+
+  private def extractValueFromQuery(query: UTF8String, pattern: Pattern): UTF8String = {
+    val m = pattern.matcher(query.toString)
+    if (m.find()) {
+      UTF8String.fromString(m.group(2))
+    } else {
+      null
+    }
+  }
+
+  private def extractFromUrl(url: URI, partToExtract: UTF8String): UTF8String = {
+    if (cachedExtractPartFunc ne null) {
+      UTF8String.fromString(cachedExtractPartFunc(url))
+    } else {
+      UTF8String.fromString(getExtractPartFunc(partToExtract)(url))
+    }
+  }
+
+  private def parseUrlWithoutKey(url: UTF8String, partToExtract: UTF8String): UTF8String = {
+    if (cachedUrl ne null) {
+      extractFromUrl(cachedUrl, partToExtract)
+    } else {
+      val currentUrl = getUrl(url, failOnError)
+      if (currentUrl ne null) {
+        extractFromUrl(currentUrl, partToExtract)
+      } else {
+        null
+      }
+    }
+  }
+
+  final def evaluate(url: UTF8String, path: UTF8String): Any = {
+    parseUrlWithoutKey(url, path)
+  }
+
+  final def evaluate(url: UTF8String, path: UTF8String, key: UTF8String): Any = {
+    if (path != QUERY) return null
+
+    val query = parseUrlWithoutKey(url, path)
+    if (query eq null) return null
+
+    if (cachedPattern ne null) {
+      extractValueFromQuery(query, cachedPattern)
+    } else {
+      extractValueFromQuery(query, getPattern(key))
+    }
+  }
+}
+
+object ParseUrlEvaluator {
+  private val HOST = UTF8String.fromString("HOST")
+  private val PATH = UTF8String.fromString("PATH")
+  private val QUERY = UTF8String.fromString("QUERY")
+  private val REF = UTF8String.fromString("REF")
+  private val PROTOCOL = UTF8String.fromString("PROTOCOL")
+  private val FILE = UTF8String.fromString("FILE")
+  private val AUTHORITY = UTF8String.fromString("AUTHORITY")
+  private val USERINFO = UTF8String.fromString("USERINFO")
+  private val REGEXPREFIX = "(&|^)"
+  private val REGEXSUBFIX = "=([^&]*)"
+
+  private def getPattern(key: UTF8String): Pattern = {
+    Pattern.compile(REGEXPREFIX + key.toString + REGEXSUBFIX)
+  }
+
+  private def getUrl(url: UTF8String, failOnError: Boolean): URI = {
+    try {
+      new URI(url.toString)
+    } catch {
+      case e: URISyntaxException if failOnError =>
+        throw QueryExecutionErrors.invalidUrlError(url, e)
+      case _: URISyntaxException => null
+    }
+  }
+
+  private def getExtractPartFunc(partToExtract: UTF8String): URI => String = {
+
+    // partToExtract match {
+    //   case HOST => _.toURL().getHost
+    //   case PATH => _.toURL().getPath
+    //   case QUERY => _.toURL().getQuery
+    //   case REF => _.toURL().getRef
+    //   case PROTOCOL => _.toURL().getProtocol
+    //   case FILE => _.toURL().getFile
+    //   case AUTHORITY => _.toURL().getAuthority
+    //   case USERINFO => _.toURL().getUserInfo
+    //   case _ => (url: URI) => null
+    // }
+
+    partToExtract match {
+      case HOST => _.getHost
+      case PATH => _.getRawPath
+      case QUERY => _.getRawQuery
+      case REF => _.getRawFragment
+      case PROTOCOL => _.getScheme
+      case FILE =>
+        (url: URI) =>
+          if (url.getRawQuery ne null) {
+            url.getRawPath + "?" + url.getRawQuery
+          } else {
+            url.getRawPath
+          }
+      case AUTHORITY => _.getRawAuthority
+      case USERINFO => _.getRawUserInfo
+      case _ => (_: URI) => null
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
index c2b999f301618..22dcd33937dfb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/urlExpressions.scala
@@ -17,19 +17,18 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import java.net.{URI, URISyntaxException, URLDecoder, URLEncoder}
-import java.util.regex.Pattern
+import java.net.{URLDecoder, URLEncoder}
+import java.nio.charset.StandardCharsets
 
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.expressions.Cast._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
+import org.apache.spark.sql.catalyst.expressions.url.ParseUrlEvaluator
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
-import org.apache.spark.sql.types.{AbstractDataType, DataType}
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
+import org.apache.spark.sql.types.{AbstractDataType, BooleanType, DataType, ObjectType}
 import org.apache.spark.unsafe.types.UTF8String
 
 // scalastyle:off line.size.limit
@@ -57,14 +56,14 @@ case class UrlEncode(child: Expression)
       UrlCodec.getClass,
       SQLConf.get.defaultStringType,
       "encode",
-      Seq(child, Literal("UTF-8")),
-      Seq(StringTypeAnyCollation, StringTypeAnyCollation))
+      Seq(child),
+      Seq(StringTypeWithCollation))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
 
   override def prettyName: String = "url_encode"
 }
@@ -86,22 +85,24 @@ case class UrlEncode(child: Expression)
   since = "3.4.0",
   group = "url_funcs")
 // scalastyle:on line.size.limit
-case class UrlDecode(child: Expression)
+case class UrlDecode(child: Expression, failOnError: Boolean = true)
   extends RuntimeReplaceable with UnaryLike[Expression] with ImplicitCastInputTypes {
 
+  def this(child: Expression) = this(child, true)
+
   override lazy val replacement: Expression =
     StaticInvoke(
       UrlCodec.getClass,
       SQLConf.get.defaultStringType,
       "decode",
-      Seq(child, Literal("UTF-8")),
-      Seq(StringTypeAnyCollation, StringTypeAnyCollation))
+      Seq(child, Literal(failOnError)),
+      Seq(StringTypeWithCollation, BooleanType))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(child = newChild)
   }
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] = Seq(StringTypeWithCollation)
 
   override def prettyName: String = "url_decode"
 }
@@ -126,7 +127,7 @@ case class UrlDecode(child: Expression)
 case class TryUrlDecode(expr: Expression, replacement: Expression)
   extends RuntimeReplaceable with InheritAnalysisRules {
 
-  def this(expr: Expression) = this(expr, TryEval(UrlDecode(expr)))
+  def this(expr: Expression) = this(expr, UrlDecode(expr, false))
 
   override protected def withNewChildInternal(newChild: Expression): Expression = {
     copy(replacement = newChild)
@@ -138,31 +139,49 @@ case class TryUrlDecode(expr: Expression, replacement: Expression)
 }
 
 object UrlCodec {
-  def encode(src: UTF8String, enc: UTF8String): UTF8String = {
-    UTF8String.fromString(URLEncoder.encode(src.toString, enc.toString))
+  def encode(src: UTF8String): UTF8String = {
+    UTF8String.fromString(URLEncoder.encode(src.toString, StandardCharsets.UTF_8))
   }
 
-  def decode(src: UTF8String, enc: UTF8String): UTF8String = {
+  def decode(src: UTF8String, failOnError: Boolean): UTF8String = {
     try {
-      UTF8String.fromString(URLDecoder.decode(src.toString, enc.toString))
+      UTF8String.fromString(URLDecoder.decode(src.toString, StandardCharsets.UTF_8))
     } catch {
-      case e: IllegalArgumentException =>
+      case e: IllegalArgumentException if failOnError =>
         throw QueryExecutionErrors.illegalUrlError(src, e)
+      case _: IllegalArgumentException => null
     }
   }
 }
 
-object ParseUrl {
-  private val HOST = UTF8String.fromString("HOST")
-  private val PATH = UTF8String.fromString("PATH")
-  private val QUERY = UTF8String.fromString("QUERY")
-  private val REF = UTF8String.fromString("REF")
-  private val PROTOCOL = UTF8String.fromString("PROTOCOL")
-  private val FILE = UTF8String.fromString("FILE")
-  private val AUTHORITY = UTF8String.fromString("AUTHORITY")
-  private val USERINFO = UTF8String.fromString("USERINFO")
-  private val REGEXPREFIX = "(&|^)"
-  private val REGEXSUBFIX = "=([^&]*)"
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(url, partToExtract[, key]) - This is a special version of `parse_url` that performs the same operation, but returns a NULL value instead of raising an error if the parsing cannot be performed.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'HOST');
+       spark.apache.org
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY');
+       query=1
+      > SELECT _FUNC_('inva lid://spark.apache.org/path?query=1', 'QUERY');
+       NULL
+      > SELECT _FUNC_('http://spark.apache.org/path?query=1', 'QUERY', 'query');
+       1
+  """,
+  since = "4.0.0",
+  group = "url_funcs")
+// scalastyle:on line.size.limit
+case class TryParseUrl(params: Seq[Expression], replacement: Expression)
+  extends RuntimeReplaceable with InheritAnalysisRules {
+  def this(children: Seq[Expression]) = this(children, ParseUrl(children, failOnError = false))
+
+  override def prettyName: String = "try_parse_url"
+
+  override def parameters: Seq[Expression] = params
+
+  override protected def withNewChildInternal(newChild: Expression): Expression = {
+    copy(replacement = newChild)
+  }
 }
 
 /**
@@ -181,38 +200,21 @@ object ParseUrl {
   """,
   since = "2.0.0",
   group = "url_funcs")
-case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.get.ansiEnabled)
-  extends Expression with ExpectsInputTypes with CodegenFallback {
+case class ParseUrl(
+    children: Seq[Expression],
+    failOnError: Boolean = SQLConf.get.ansiEnabled)
+  extends Expression
+  with ExpectsInputTypes
+  with RuntimeReplaceable {
+
   def this(children: Seq[Expression]) = this(children, SQLConf.get.ansiEnabled)
 
   override def nullable: Boolean = true
-  override def inputTypes: Seq[AbstractDataType] = Seq.fill(children.size)(StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq.fill(children.size)(StringTypeWithCollation)
   override def dataType: DataType = SQLConf.get.defaultStringType
   override def prettyName: String = "parse_url"
 
-  // If the url is a constant, cache the URL object so that we don't need to convert url
-  // from UTF8String to String to URL for every row.
-  @transient private lazy val cachedUrl = children(0) match {
-    case Literal(url: UTF8String, _) if url ne null => getUrl(url)
-    case _ => null
-  }
-
-  // If the key is a constant, cache the Pattern object so that we don't need to convert key
-  // from UTF8String to String to StringBuilder to String to Pattern for every row.
-  @transient private lazy val cachedPattern = children(2) match {
-    case Literal(key: UTF8String, _) if key ne null => getPattern(key)
-    case _ => null
-  }
-
-  // If the partToExtract is a constant, cache the Extract part function so that we don't need
-  // to check the partToExtract for every row.
-  @transient private lazy val cachedExtractPartFunc = children(1) match {
-    case Literal(part: UTF8String, _) => getExtractPartFunc(part)
-    case _ => null
-  }
-
-  import ParseUrl._
-
   override def checkInputDataTypes(): TypeCheckResult = {
     if (children.size > 3 || children.size < 2) {
       throw QueryCompilationErrors.wrongNumArgsError(
@@ -223,108 +225,41 @@ case class ParseUrl(children: Seq[Expression], failOnError: Boolean = SQLConf.ge
     }
   }
 
-  private def getPattern(key: UTF8String): Pattern = {
-    Pattern.compile(REGEXPREFIX + key.toString + REGEXSUBFIX)
-  }
-
-  private def getUrl(url: UTF8String): URI = {
-    try {
-      new URI(url.toString)
-    } catch {
-      case e: URISyntaxException if failOnError =>
-        throw QueryExecutionErrors.invalidUrlError(url, e)
-      case _: URISyntaxException => null
-    }
-  }
-
-  private def getExtractPartFunc(partToExtract: UTF8String): URI => String = {
-
-    // partToExtract match {
-    //   case HOST => _.toURL().getHost
-    //   case PATH => _.toURL().getPath
-    //   case QUERY => _.toURL().getQuery
-    //   case REF => _.toURL().getRef
-    //   case PROTOCOL => _.toURL().getProtocol
-    //   case FILE => _.toURL().getFile
-    //   case AUTHORITY => _.toURL().getAuthority
-    //   case USERINFO => _.toURL().getUserInfo
-    //   case _ => (url: URI) => null
-    // }
-
-    partToExtract match {
-      case HOST => _.getHost
-      case PATH => _.getRawPath
-      case QUERY => _.getRawQuery
-      case REF => _.getRawFragment
-      case PROTOCOL => _.getScheme
-      case FILE =>
-        (url: URI) =>
-          if (url.getRawQuery ne null) {
-            url.getRawPath + "?" + url.getRawQuery
-          } else {
-            url.getRawPath
-          }
-      case AUTHORITY => _.getRawAuthority
-      case USERINFO => _.getRawUserInfo
-      case _ => (url: URI) => null
-    }
-  }
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): ParseUrl =
+    copy(children = newChildren)
 
-  private def extractValueFromQuery(query: UTF8String, pattern: Pattern): UTF8String = {
-    val m = pattern.matcher(query.toString)
-    if (m.find()) {
-      UTF8String.fromString(m.group(2))
-    } else {
-      null
-    }
+  // If the url is a constant, cache the URL object so that we don't need to convert url
+  // from UTF8String to String to URL for every row.
+  @transient private lazy val url = children.head match {
+    case Literal(url: UTF8String, _) if url ne null => url
+    case _ => null
   }
 
-  private def extractFromUrl(url: URI, partToExtract: UTF8String): UTF8String = {
-    if (cachedExtractPartFunc ne null) {
-      UTF8String.fromString(cachedExtractPartFunc.apply(url))
-    } else {
-      UTF8String.fromString(getExtractPartFunc(partToExtract).apply(url))
-    }
+  // If the partToExtract is a constant, cache the Extract part function so that we don't need
+  // to check the partToExtract for every row.
+  @transient private lazy val extractPart = children(1) match {
+    case Literal(part: UTF8String, _) => part
+    case _ => null
   }
 
-  private def parseUrlWithoutKey(url: UTF8String, partToExtract: UTF8String): UTF8String = {
-    if (cachedUrl ne null) {
-      extractFromUrl(cachedUrl, partToExtract)
-    } else {
-      val currentUrl = getUrl(url)
-      if (currentUrl ne null) {
-        extractFromUrl(currentUrl, partToExtract)
-      } else {
-        null
-      }
+  // If the key is a constant, cache the Pattern object so that we don't need to convert key
+  // from UTF8String to String to StringBuilder to String to Pattern for every row.
+  @transient private lazy val pattern = children.size match {
+    case 3 => children(2) match {
+      case Literal(key: UTF8String, _) if key ne null => key
+      case _ => null
     }
+    case _ => null
   }
 
-  override def eval(input: InternalRow): Any = {
-    val evaluated = children.map{e => e.eval(input).asInstanceOf[UTF8String]}
-    if (evaluated.contains(null)) return null
-    if (evaluated.size == 2) {
-      parseUrlWithoutKey(evaluated(0), evaluated(1))
-    } else {
-      // 3-arg, i.e. QUERY with key
-      assert(evaluated.size == 3)
-      if (evaluated(1) != QUERY) {
-        return null
-      }
-
-      val query = parseUrlWithoutKey(evaluated(0), evaluated(1))
-      if (query eq null) {
-        return null
-      }
-
-      if (cachedPattern ne null) {
-        extractValueFromQuery(query, cachedPattern)
-      } else {
-        extractValueFromQuery(query, getPattern(evaluated(2)))
-      }
-    }
-  }
+  @transient
+  private lazy val evaluator: ParseUrlEvaluator = ParseUrlEvaluator(
+    url, extractPart, pattern, failOnError)
 
-  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): ParseUrl =
-    copy(children = newChildren)
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[ParseUrlEvaluator])),
+    "evaluate",
+    dataType,
+    children,
+    children.map(_.dataType))
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
index d31af81424818..61e51988c1e67 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtils.scala
@@ -31,7 +31,10 @@ import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
  */
 object VariantExpressionEvalUtils {
 
-  def parseJson(input: UTF8String, failOnError: Boolean = true): VariantVal = {
+  def parseJson(
+      input: UTF8String,
+      allowDuplicateKeys: Boolean = false,
+      failOnError: Boolean = true): VariantVal = {
     def parseJsonFailure(exception: Throwable): VariantVal = {
       if (failOnError) {
         throw exception
@@ -40,7 +43,7 @@ object VariantExpressionEvalUtils {
       }
     }
     try {
-      val v = VariantBuilder.parseJson(input.toString)
+      val v = VariantBuilder.parseJson(input.toString, allowDuplicateKeys)
       new VariantVal(v.getValue, v.getMetadata)
     } catch {
       case _: VariantSizeLimitException =>
@@ -69,7 +72,9 @@ object VariantExpressionEvalUtils {
 
   /** Cast a Spark value from `dataType` into the variant type. */
   def castToVariant(input: Any, dataType: DataType): VariantVal = {
-    val builder = new VariantBuilder
+    // Enforce strict check because it is illegal for input struct/map/variant to contain duplicate
+    // keys.
+    val builder = new VariantBuilder(false)
     buildVariant(builder, input, dataType)
     val v = builder.result()
     new VariantVal(v.getValue, v.getMetadata)
@@ -117,7 +122,7 @@ object VariantExpressionEvalUtils {
           buildVariant(builder, element, elementType)
         }
         builder.finishWritingArray(start, offsets)
-      case MapType(StringType, valueType, _) =>
+      case MapType(_: StringType, valueType, _) =>
         val data = input.asInstanceOf[MapData]
         val keys = data.keyArray()
         val values = data.valueArray()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
index b80fb11b6813b..06aec93912984 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/variant/variantExpressions.scala
@@ -23,8 +23,7 @@ import scala.util.parsing.combinator.RegexParsers
 
 import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder
-import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
+import org.apache.spark.sql.catalyst.analysis.{ExpressionBuilder, GeneratorBuilder, TypeCheckResult}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
@@ -32,13 +31,14 @@ import org.apache.spark.sql.catalyst.expressions.codegen._
 import org.apache.spark.sql.catalyst.expressions.codegen.Block._
 import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
 import org.apache.spark.sql.catalyst.json.JsonInferSchema
+import org.apache.spark.sql.catalyst.plans.logical.{FunctionSignature, InputParameter}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, VARIANT_GET}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, QuotingUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.types.variant._
 import org.apache.spark.types.variant.VariantUtil.Type
@@ -59,11 +59,14 @@ case class ParseJson(child: Expression, failOnError: Boolean = true)
     VariantExpressionEvalUtils.getClass,
     VariantType,
     "parseJson",
-    Seq(child, Literal(failOnError, BooleanType)),
-    inputTypes :+ BooleanType,
+    Seq(
+      child,
+      Literal(SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS), BooleanType),
+      Literal(failOnError, BooleanType)),
+    inputTypes :+ BooleanType :+ BooleanType,
     returnNullable = !failOnError)
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
 
   override def dataType: DataType = VariantType
 
@@ -114,6 +117,72 @@ case class IsVariantNull(child: Expression) extends UnaryExpression
     copy(child = newChild)
 }
 
+// scalastyle:off line.size.limit
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - Convert a nested input (array/map/struct) into a variant where maps and structs are converted to variant objects which are unordered unlike SQL structs. Input maps can only have string keys.",
+  examples = """
+    Examples:
+      > SELECT _FUNC_(named_struct('a', 1, 'b', 2));
+       {"a":1,"b":2}
+      > SELECT _FUNC_(array(1, 2, 3));
+       [1,2,3]
+      > SELECT _FUNC_(array(named_struct('a', 1)));
+       [{"a":1}]
+      > SELECT _FUNC_(array(map("a", 2)));
+       [{"a":2}]
+  """,
+  since = "4.0.0",
+  group = "variant_funcs")
+// scalastyle:on line.size.limit
+case class ToVariantObject(child: Expression)
+    extends UnaryExpression
+    with QueryErrorsBase {
+  override def nullIntolerant: Boolean = true
+  override val dataType: DataType = VariantType
+
+  // Only accept nested types at the root but any types can be nested inside.
+  override def checkInputDataTypes(): TypeCheckResult = {
+    val checkResult: Boolean = child.dataType match {
+      case _: StructType | _: ArrayType | _: MapType =>
+        VariantGet.checkDataType(child.dataType, allowStructsAndMaps = true)
+      case _ => false
+    }
+    if (!checkResult) {
+      DataTypeMismatch(
+        errorSubClass = "CAST_WITHOUT_SUGGESTION",
+        messageParameters =
+          Map("srcType" -> toSQLType(child.dataType), "targetType" -> toSQLType(VariantType)))
+    } else {
+      TypeCheckResult.TypeCheckSuccess
+    }
+  }
+
+  override def prettyName: String = "to_variant_object"
+
+  override protected def withNewChildInternal(newChild: Expression): ToVariantObject =
+    copy(child = newChild)
+
+  protected override def nullSafeEval(input: Any): Any =
+    VariantExpressionEvalUtils.castToVariant(input, child.dataType)
+
+  override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+    val childCode = child.genCode(ctx)
+    val cls = variant.VariantExpressionEvalUtils.getClass.getName.stripSuffix("$")
+    val fromArg = ctx.addReferenceObj("from", child.dataType)
+    val javaType = JavaCode.javaType(VariantType)
+    val code =
+      code"""
+        ${childCode.code}
+        boolean ${ev.isNull} = ${childCode.isNull};
+        $javaType ${ev.value} = ${CodeGenerator.defaultValue(VariantType)};
+        if (!${childCode.isNull}) {
+          ${ev.value} = $cls.castToVariant(${childCode.value}, $fromArg);
+        }
+      """
+    ev.copy(code = code)
+  }
+}
+
 object VariantPathParser extends RegexParsers {
   // A path segment in the `VariantGet` expression represents either an object key access or an
   // array index access.
@@ -166,7 +235,6 @@ case class VariantGet(
     timeZoneId: Option[String] = None)
     extends BinaryExpression
     with TimeZoneAwareExpression
-    with NullIntolerant
     with ExpectsInputTypes
     with QueryErrorsBase {
   override def checkInputDataTypes(): TypeCheckResult = {
@@ -201,11 +269,13 @@ case class VariantGet(
 
   final override def nodePatternsInternal(): Seq[TreePattern] = Seq(VARIANT_GET)
 
-  override def inputTypes: Seq[AbstractDataType] = Seq(VariantType, StringTypeAnyCollation)
+  override def inputTypes: Seq[AbstractDataType] =
+    Seq(VariantType, StringTypeWithCollation)
 
   override def prettyName: String = if (failOnError) "variant_get" else "try_variant_get"
 
   override def nullable: Boolean = true
+  override def nullIntolerant: Boolean = true
 
   protected override def nullSafeEval(input: Any, path: Any): Any = {
     VariantGet.variantGet(
@@ -257,13 +327,16 @@ case object VariantGet {
    * Returns whether a data type can be cast into/from variant. For scalar types, we allow a subset
    * of them. For nested types, we reject map types with a non-string key type.
    */
-  def checkDataType(dataType: DataType): Boolean = dataType match {
+  def checkDataType(dataType: DataType, allowStructsAndMaps: Boolean = true): Boolean =
+    dataType match {
     case _: NumericType | BooleanType | _: StringType | BinaryType | _: DatetimeType |
         VariantType =>
       true
-    case ArrayType(elementType, _) => checkDataType(elementType)
-    case MapType(_: StringType, valueType, _) => checkDataType(valueType)
-    case StructType(fields) => fields.forall(f => checkDataType(f.dataType))
+    case ArrayType(elementType, _) => checkDataType(elementType, allowStructsAndMaps)
+    case MapType(_: StringType, valueType, _) if allowStructsAndMaps =>
+        checkDataType(valueType, allowStructsAndMaps)
+    case StructType(fields) if allowStructsAndMaps =>
+        fields.forall(f => checkDataType(f.dataType, allowStructsAndMaps))
     case _ => false
   }
 
@@ -324,7 +397,7 @@ case object VariantGet {
 
     if (dataType == VariantType) {
       // Build a new variant, in order to strip off any unnecessary metadata.
-      val builder = new VariantBuilder
+      val builder = new VariantBuilder(false)
       builder.appendVariant(v)
       val result = builder.result()
       return new VariantVal(result.getValue, result.getMetadata)
@@ -355,7 +428,10 @@ case object VariantGet {
           case Type.BINARY => Literal(v.getBinary, BinaryType)
           // We have handled other cases and should never reach here. This case is only intended
           // to by pass the compiler exhaustiveness check.
-          case _ => throw QueryExecutionErrors.unreachableError()
+          case _ => throw new SparkRuntimeException(
+            errorClass = "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT",
+            messageParameters = Map("id" -> v.getTypeInfo.toString)
+          )
         }
         // We mostly use the `Cast` expression to implement the cast. However, `Cast` silently
         // ignores the overflow in the long/decimal -> timestamp cast, and we want to enforce
@@ -534,21 +610,6 @@ object VariantGetExpressionBuilder extends VariantGetExpressionBuilderBase(true)
 // scalastyle:on line.size.limit
 object TryVariantGetExpressionBuilder extends VariantGetExpressionBuilderBase(false)
 
-// scalastyle:off line.size.limit line.contains.tab
-@ExpressionDescription(
-  usage = "_FUNC_(expr) - It separates a variant object/array into multiple rows containing its fields/elements. Its result schema is `struct<pos int, key string, value variant>`. `pos` is the position of the field/element in its parent object/array, and `value` is the field/element value. `key` is the field name when exploding a variant object, or is NULL when exploding a variant array. It ignores any input that is not a variant array/object, including SQL NULL, variant null, and any other variant values.",
-  examples = """
-    Examples:
-      > SELECT * from _FUNC_(parse_json('["hello", "world"]'));
-       0	NULL	"hello"
-       1	NULL	"world"
-      > SELECT * from _FUNC_(parse_json('{"a": true, "b": 3.14}'));
-       0	a	true
-       1	b	3.14
-  """,
-  since = "4.0.0",
-  group = "variant_funcs")
-// scalastyle:on line.size.limit line.contains.tab
 case class VariantExplode(child: Expression) extends UnaryExpression with Generator
   with ExpectsInputTypes {
   override def inputTypes: Seq[AbstractDataType] = Seq(VariantType)
@@ -582,6 +643,53 @@ case class VariantExplode(child: Expression) extends UnaryExpression with Genera
   }
 }
 
+trait VariantExplodeGeneratorBuilderBase extends GeneratorBuilder {
+  override def functionSignature: Option[FunctionSignature] =
+    Some(FunctionSignature(Seq(InputParameter("input"))))
+  override def buildGenerator(funcName: String, expressions: Seq[Expression]): Generator = {
+    assert(expressions.size == 1)
+    VariantExplode(expressions(0))
+  }
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - It separates a variant object/array into multiple rows containing its fields/elements. Its result schema is `struct<pos int, key string, value variant>`. `pos` is the position of the field/element in its parent object/array, and `value` is the field/element value. `key` is the field name when exploding a variant object, or is NULL when exploding a variant array. It ignores any input that is not a variant array/object, including SQL NULL, variant null, and any other variant values.",
+  examples = """
+    Examples:
+      > SELECT * from _FUNC_(parse_json('["hello", "world"]'));
+       0	NULL	"hello"
+       1	NULL	"world"
+      > SELECT * from _FUNC_(input => parse_json('{"a": true, "b": 3.14}'));
+       0	a	true
+       1	b	3.14
+  """,
+  since = "4.0.0",
+  group = "variant_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object VariantExplodeGeneratorBuilder extends VariantExplodeGeneratorBuilderBase {
+  override def isOuter: Boolean = false
+}
+
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+  usage = "_FUNC_(expr) - It separates a variant object/array into multiple rows containing its fields/elements. Its result schema is `struct<pos int, key string, value variant>`. `pos` is the position of the field/element in its parent object/array, and `value` is the field/element value. `key` is the field name when exploding a variant object, or is NULL when exploding a variant array. It ignores any input that is not a variant array/object, including SQL NULL, variant null, and any other variant values.",
+  examples = """
+    Examples:
+      > SELECT * from _FUNC_(parse_json('["hello", "world"]'));
+       0	NULL	"hello"
+       1	NULL	"world"
+      > SELECT * from _FUNC_(input => parse_json('{"a": true, "b": 3.14}'));
+       0	a	true
+       1	b	3.14
+  """,
+  since = "4.0.0",
+  group = "variant_funcs")
+// scalastyle:on line.size.limit line.contains.tab
+object VariantExplodeOuterGeneratorBuilder extends VariantExplodeGeneratorBuilderBase {
+  override def isOuter: Boolean = true
+}
+
 object VariantExplode {
   /**
    * The actual implementation of the `VariantExplode` expression. We check `isNull` separately
@@ -623,7 +731,7 @@ object VariantExplode {
       > SELECT _FUNC_(parse_json('null'));
        VOID
       > SELECT _FUNC_(parse_json('[{"b":true,"a":0}]'));
-       ARRAY<STRUCT<a: BIGINT, b: BOOLEAN>>
+       ARRAY<OBJECT<a: BIGINT, b: BOOLEAN>>
   """,
   since = "4.0.0",
   group = "variant_funcs"
@@ -654,7 +762,24 @@ object SchemaOfVariant {
   /** The actual implementation of the `SchemaOfVariant` expression. */
   def schemaOfVariant(input: VariantVal): UTF8String = {
     val v = new Variant(input.getValue, input.getMetadata)
-    UTF8String.fromString(schemaOf(v).sql)
+    UTF8String.fromString(printSchema(schemaOf(v)))
+  }
+
+  /**
+   * Similar to `dataType.sql`. The only difference is that `StructType` is shown as
+   * `OBJECT<...>` rather than `STRUCT<...>`.
+   * SchemaOfVariant expressions use the Struct DataType to denote the Object type in the variant
+   * spec. However, the Object type is not equivalent to the struct type as an Object represents an
+   * unordered bag of key-value pairs while the Struct type is ordered.
+   */
+  def printSchema(dataType: DataType): String = dataType match {
+    case StructType(fields) =>
+      def printField(f: StructField): String =
+        s"${QuotingUtils.quoteIfNeeded(f.name)}: ${printSchema(f.dataType)}"
+
+      s"OBJECT<${fields.map(printField).mkString(", ")}>"
+    case ArrayType(elementType, _) => s"ARRAY<${printSchema(elementType)}>"
+    case _ => dataType.sql
   }
 
   /**
@@ -713,7 +838,7 @@ object SchemaOfVariant {
       > SELECT _FUNC_(parse_json(j)) FROM VALUES ('1'), ('2'), ('3') AS tab(j);
        BIGINT
       > SELECT _FUNC_(parse_json(j)) FROM VALUES ('{"a": 1}'), ('{"b": true}'), ('{"c": 1.23}') AS tab(j);
-       STRUCT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>
+       OBJECT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>
   """,
   since = "4.0.0",
   group = "variant_funcs")
@@ -749,7 +874,8 @@ case class SchemaOfVariantAgg(
   override def merge(buffer: DataType, input: DataType): DataType =
     SchemaOfVariant.mergeSchema(buffer, input)
 
-  override def eval(buffer: DataType): Any = UTF8String.fromString(buffer.sql)
+  override def eval(buffer: DataType): Any =
+    UTF8String.fromString(SchemaOfVariant.printSchema(buffer))
 
   override def serialize(buffer: DataType): Array[Byte] = buffer.json.getBytes("UTF-8")
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
index 5881c456f6e86..ab787663c9923 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala
@@ -23,11 +23,12 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.analysis.{TypeCheckResult, UnresolvedException}
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateFunction, DeclarativeAggregate, NoOp, PandasAggregate}
 import org.apache.spark.sql.catalyst.trees.{BinaryLike, LeafLike, TernaryLike, UnaryLike}
 import org.apache.spark.sql.catalyst.trees.TreePattern.{TreePattern, UNRESOLVED_WINDOW_EXPRESSION, WINDOW_EXPRESSION}
 import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors, QueryErrorsBase, QueryExecutionErrors}
 import org.apache.spark.sql.types._
+import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 
 /**
  * The trait of the Window Specification (specified in the OVER clause or WINDOW clause) for
@@ -108,6 +109,7 @@ case class WindowSpecDefinition(
   private def isValidFrameType(ft: DataType): Boolean = (orderSpec.head.dataType, ft) match {
     case (DateType, IntegerType) => true
     case (DateType, _: YearMonthIntervalType) => true
+    case (DateType, DayTimeIntervalType(DAY, DAY)) => true
     case (TimestampType | TimestampNTZType, CalendarIntervalType) => true
     case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) => true
     case (TimestampType | TimestampNTZType, _: DayTimeIntervalType) => true
@@ -333,6 +335,11 @@ object WindowExpression {
   def hasWindowExpression(e: Expression): Boolean = {
     e.find(_.isInstanceOf[WindowExpression]).isDefined
   }
+
+  def expressionToIngnoreNulls(e: Expression, source: String): Boolean = e match {
+    case BooleanLiteral(ignoreNulls) => ignoreNulls
+    case _ => throw QueryCompilationErrors.invalidIgnoreNullsParameter(source, e)
+  }
 }
 
 case class WindowExpression(
@@ -525,6 +532,9 @@ case class Lead(
   def this(input: Expression, offset: Expression, default: Expression) =
     this(input, offset, default, false)
 
+  def this(input: Expression, offset: Expression, default: Expression, ignoreNulls: Expression) =
+    this(input, offset, default, WindowExpression.expressionToIngnoreNulls(ignoreNulls, "lead"))
+
   def this(input: Expression, offset: Expression) = this(input, offset, Literal(null))
 
   def this(input: Expression) = this(input, Literal(1))
@@ -579,6 +589,9 @@ case class Lag(
   def this(input: Expression, inputOffset: Expression, default: Expression) =
     this(input, inputOffset, default, false)
 
+  def this(input: Expression, offset: Expression, default: Expression, ignoreNulls: Expression) =
+    this(input, offset, default, WindowExpression.expressionToIngnoreNulls(ignoreNulls, "lag"))
+
   def this(input: Expression, inputOffset: Expression) = this(input, inputOffset, Literal(null))
 
   def this(input: Expression) = this(input, Literal(1))
@@ -726,6 +739,8 @@ case class NthValue(input: Expression, offset: Expression, ignoreNulls: Boolean)
     extends AggregateWindowFunction with OffsetWindowFunction with ImplicitCastInputTypes
     with BinaryLike[Expression] with QueryErrorsBase {
 
+  def this(input: Expression, offset: Expression, ignoreNulls: Expression) =
+    this(input, offset, WindowExpression.expressionToIngnoreNulls(ignoreNulls, "nth_value"))
   def this(child: Expression, offset: Expression) = this(child, offset, false)
 
   override lazy val default = Literal.create(null, input.dataType)
@@ -1113,6 +1128,9 @@ case class EWM(input: Expression, alpha: Double, ignoreNA: Boolean)
   extends AggregateWindowFunction with UnaryLike[Expression] {
   assert(0 < alpha && alpha <= 1)
 
+  def this(input: Expression, alpha: Expression, ignoreNA: Expression) =
+    this(input, EWM.expressionToAlpha(alpha), PandasAggregate.expressionToIgnoreNA(ignoreNA, "ewm"))
+
   override def dataType: DataType = DoubleType
 
   private val numerator = AttributeReference("numerator", DoubleType, nullable = false)()
@@ -1154,6 +1172,13 @@ case class EWM(input: Expression, alpha: Double, ignoreNA: Boolean)
   override protected def withNewChildInternal(newChild: Expression): EWM = copy(input = newChild)
 }
 
+private[expressions] object EWM {
+  def expressionToAlpha(e: Expression): Double = e.eval() match {
+    case d: Double => d
+    case f: Float => f.toDouble
+    case _ => throw QueryCompilationErrors.invalidAlphaParameter(e)
+  }
+}
 
 /**
  * Return the indices for consecutive null values, for non-null values, it returns 0.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala
new file mode 100644
index 0000000000000..44b98026d62d5
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/XmlExpressionEvalUtils.scala
@@ -0,0 +1,122 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions.xml
+
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.xml.XmlInferSchema
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+object XmlExpressionEvalUtils {
+
+  def schemaOfXml(xmlInferSchema: XmlInferSchema, xml: UTF8String): UTF8String = {
+    val dataType = xmlInferSchema.infer(xml.toString).get match {
+      case st: StructType =>
+        xmlInferSchema.canonicalizeType(st).getOrElse(StructType(Nil))
+      case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
+        xmlInferSchema
+          .canonicalizeType(at.elementType)
+          .map(ArrayType(_, containsNull = at.containsNull))
+          .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
+      case other: DataType =>
+        xmlInferSchema.canonicalizeType(other).getOrElse(SQLConf.get.defaultStringType)
+    }
+
+    UTF8String.fromString(dataType.sql)
+  }
+}
+
+trait XPathEvaluator {
+
+  protected val path: UTF8String
+
+  @transient protected lazy val xpathUtil: UDFXPathUtil = new UDFXPathUtil
+
+  final def evaluate(xml: UTF8String): Any = {
+    if (xml == null || xml.toString.isEmpty || path == null || path.toString.isEmpty) return null
+    doEvaluate(xml)
+  }
+
+  def doEvaluate(xml: UTF8String): Any
+}
+
+case class XPathBooleanEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    xpathUtil.evalBoolean(xml.toString, path.toString)
+  }
+}
+
+case class XPathShortEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Short] else ret.shortValue()
+  }
+}
+
+case class XPathIntEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Int] else ret.intValue()
+  }
+}
+
+case class XPathLongEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Long] else ret.longValue()
+  }
+}
+
+case class XPathFloatEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Float] else ret.floatValue()
+  }
+}
+
+case class XPathDoubleEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalNumber(xml.toString, path.toString)
+    if (ret eq null) null.asInstanceOf[Double] else ret.doubleValue()
+  }
+}
+
+case class XPathStringEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val ret = xpathUtil.evalString(xml.toString, path.toString)
+    UTF8String.fromString(ret)
+  }
+}
+
+case class XPathListEvaluator(path: UTF8String) extends XPathEvaluator {
+  override def doEvaluate(xml: UTF8String): Any = {
+    val nodeList = xpathUtil.evalNodeList(xml.toString, path.toString)
+    if (nodeList ne null) {
+      val ret = new Array[AnyRef](nodeList.getLength)
+      var i = 0
+      while (i < nodeList.getLength) {
+        ret(i) = UTF8String.fromString(nodeList.item(i).getNodeValue)
+        i += 1
+      }
+      new GenericArrayData(ret)
+    } else {
+      null
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
index f65061e8d0ea9..2c18ffa2abecb 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xml/xpath.scala
@@ -21,10 +21,9 @@ import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.Cast._
-import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
-import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.catalyst.expressions.objects.Invoke
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -34,7 +33,7 @@ import org.apache.spark.unsafe.types.UTF8String
  * This is not the world's most efficient implementation due to type conversion, but works.
  */
 abstract class XPathExtract
-  extends BinaryExpression with ExpectsInputTypes with CodegenFallback with NullIntolerant {
+  extends BinaryExpression with RuntimeReplaceable with ExpectsInputTypes {
   override def left: Expression = xml
   override def right: Expression = path
 
@@ -42,7 +41,7 @@ abstract class XPathExtract
   override def nullable: Boolean = true
 
   override def inputTypes: Seq[AbstractDataType] =
-    Seq(StringTypeAnyCollation, StringTypeAnyCollation)
+    Seq(StringTypeWithCollation, StringTypeWithCollation)
 
   override def checkInputDataTypes(): TypeCheckResult = {
     if (!path.foldable) {
@@ -50,7 +49,7 @@ abstract class XPathExtract
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("path"),
-          "inputType" -> toSQLType(StringTypeAnyCollation),
+          "inputType" -> toSQLType(StringTypeWithCollation),
           "inputExpr" -> toSQLExpr(path)
         )
       )
@@ -59,12 +58,20 @@ abstract class XPathExtract
     }
   }
 
-  @transient protected lazy val xpathUtil = new UDFXPathUtil
-  @transient protected lazy val pathString: String = path.eval().asInstanceOf[UTF8String].toString
-
   /** Concrete implementations need to override the following three methods. */
   def xml: Expression
   def path: Expression
+
+  @transient protected lazy val pathUTF8String: UTF8String = path.eval().asInstanceOf[UTF8String]
+
+  protected def evaluator: XPathEvaluator
+
+  override def replacement: Expression = Invoke(
+    Literal.create(evaluator, ObjectType(classOf[XPathEvaluator])),
+    "evaluate",
+    dataType,
+    Seq(xml),
+    Seq(xml.dataType))
 }
 
 // scalastyle:off line.size.limit
@@ -80,11 +87,9 @@ abstract class XPathExtract
 // scalastyle:on line.size.limit
 case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract with Predicate {
 
-  override def prettyName: String = "xpath_boolean"
+  @transient override lazy val evaluator: XPathEvaluator = XPathBooleanEvaluator(pathUTF8String)
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    xpathUtil.evalBoolean(xml.asInstanceOf[UTF8String].toString, pathString)
-  }
+  override def prettyName: String = "xpath_boolean"
 
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathBoolean = copy(xml = newLeft, path = newRight)
@@ -102,14 +107,12 @@ case class XPathBoolean(xml: Expression, path: Expression) extends XPathExtract
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathShortEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_short"
   override def dataType: DataType = ShortType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.shortValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathShort = copy(xml = newLeft, path = newRight)
 }
@@ -126,14 +129,12 @@ case class XPathShort(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathIntEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_int"
   override def dataType: DataType = IntegerType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.intValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): Expression = copy(xml = newLeft, path = newRight)
 }
@@ -150,14 +151,12 @@ case class XPathInt(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathLongEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_long"
   override def dataType: DataType = LongType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.longValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathLong = copy(xml = newLeft, path = newRight)
 }
@@ -174,14 +173,12 @@ case class XPathLong(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathFloatEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_float"
   override def dataType: DataType = FloatType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.floatValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathFloat = copy(xml = newLeft, path = newRight)
 }
@@ -198,15 +195,13 @@ case class XPathFloat(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathDoubleEvaluator(pathUTF8String)
+
   override def prettyName: String =
     getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("xpath_double")
   override def dataType: DataType = DoubleType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalNumber(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (ret eq null) null else ret.doubleValue()
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathDouble = copy(xml = newLeft, path = newRight)
 }
@@ -223,14 +218,12 @@ case class XPathDouble(xml: Expression, path: Expression) extends XPathExtract {
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathStringEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath_string"
   override def dataType: DataType = SQLConf.get.defaultStringType
 
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val ret = xpathUtil.evalString(xml.asInstanceOf[UTF8String].toString, pathString)
-    UTF8String.fromString(ret)
-  }
-
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): Expression = copy(xml = newLeft, path = newRight)
 }
@@ -242,28 +235,18 @@ case class XPathString(xml: Expression, path: Expression) extends XPathExtract {
     Examples:
       > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()');
        ["b1","b2","b3"]
+      > SELECT _FUNC_('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b');
+       [null,null,null]
   """,
   since = "2.0.0",
   group = "xml_funcs")
 // scalastyle:on line.size.limit
 case class XPathList(xml: Expression, path: Expression) extends XPathExtract {
+
+  @transient override lazy val evaluator: XPathEvaluator = XPathListEvaluator(pathUTF8String)
+
   override def prettyName: String = "xpath"
-  override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType, containsNull = false)
-
-  override def nullSafeEval(xml: Any, path: Any): Any = {
-    val nodeList = xpathUtil.evalNodeList(xml.asInstanceOf[UTF8String].toString, pathString)
-    if (nodeList ne null) {
-      val ret = new Array[UTF8String](nodeList.getLength)
-      var i = 0
-      while (i < nodeList.getLength) {
-        ret(i) = UTF8String.fromString(nodeList.item(i).getNodeValue)
-        i += 1
-      }
-      new GenericArrayData(ret)
-    } else {
-      null
-    }
-  }
+  override def dataType: DataType = ArrayType(SQLConf.get.defaultStringType)
 
   override protected def withNewChildrenInternal(
     newLeft: Expression, newRight: Expression): XPathList = copy(xml = newLeft, path = newRight)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
index 48a87db291a8d..66f7f25e4abe8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/xmlExpressions.scala
@@ -21,13 +21,15 @@ import java.io.CharArrayWriter
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
-import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, CodegenFallback, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
+import org.apache.spark.sql.catalyst.expressions.objects.StaticInvoke
+import org.apache.spark.sql.catalyst.expressions.xml.XmlExpressionEvalUtils
 import org.apache.spark.sql.catalyst.util.{DropMalformedMode, FailFastMode, FailureSafeParser, PermissiveMode}
 import org.apache.spark.sql.catalyst.util.TypeUtils._
 import org.apache.spark.sql.catalyst.xml.{StaxXmlGenerator, StaxXmlParser, ValidatorUtil, XmlInferSchema, XmlOptions}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryErrorsBase}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -59,7 +61,6 @@ case class XmlToStructs(
   extends UnaryExpression
   with TimeZoneAwareExpression
   with ExpectsInputTypes
-  with NullIntolerant
   with QueryErrorsBase {
 
   def this(child: Expression, schema: Expression, options: Map[String, String]) =
@@ -70,6 +71,7 @@ case class XmlToStructs(
       timeZoneId = None)
 
   override def nullable: Boolean = true
+  override def nullIntolerant: Boolean = true
 
   // The XML input data might be missing certain fields. We force the nullability
   // of the user-provided schema to avoid data corruptions.
@@ -124,7 +126,7 @@ case class XmlToStructs(
     defineCodeGen(ctx, ev, input => s"(InternalRow) $expr.nullSafeEval($input)")
   }
 
-  override def inputTypes: Seq[AbstractDataType] = StringTypeAnyCollation :: Nil
+  override def inputTypes: Seq[AbstractDataType] = StringTypeWithCollation :: Nil
 
   override def prettyName: String = "from_xml"
 
@@ -149,7 +151,9 @@ case class XmlToStructs(
 case class SchemaOfXml(
     child: Expression,
     options: Map[String, String])
-  extends UnaryExpression with CodegenFallback with QueryErrorsBase {
+  extends UnaryExpression
+  with RuntimeReplaceable
+  with QueryErrorsBase {
 
   def this(child: Expression) = this(child, Map.empty[String, String])
 
@@ -192,26 +196,20 @@ case class SchemaOfXml(
     }
   }
 
-  override def eval(v: InternalRow): Any = {
-    val dataType = xmlInferSchema.infer(xml.toString).get match {
-      case st: StructType =>
-        xmlInferSchema.canonicalizeType(st).getOrElse(StructType(Nil))
-      case at: ArrayType if at.elementType.isInstanceOf[StructType] =>
-        xmlInferSchema
-          .canonicalizeType(at.elementType)
-          .map(ArrayType(_, containsNull = at.containsNull))
-          .getOrElse(ArrayType(StructType(Nil), containsNull = at.containsNull))
-      case other: DataType =>
-        xmlInferSchema.canonicalizeType(other).getOrElse(SQLConf.get.defaultStringType)
-    }
-
-    UTF8String.fromString(dataType.sql)
-  }
-
   override def prettyName: String = "schema_of_xml"
 
   override protected def withNewChildInternal(newChild: Expression): SchemaOfXml =
     copy(child = newChild)
+
+  @transient private lazy val xmlInferSchemaObjectType = ObjectType(classOf[XmlInferSchema])
+
+  override def replacement: Expression = StaticInvoke(
+    XmlExpressionEvalUtils.getClass,
+    dataType,
+    "schemaOfXml",
+    Seq(Literal(xmlInferSchema, xmlInferSchemaObjectType), child),
+    Seq(xmlInferSchemaObjectType, child.dataType),
+    returnNullable = false)
 }
 
 /**
@@ -241,9 +239,9 @@ case class StructsToXml(
     timeZoneId: Option[String] = None)
   extends UnaryExpression
   with TimeZoneAwareExpression
-  with ExpectsInputTypes
-  with NullIntolerant {
+  with ExpectsInputTypes {
   override def nullable: Boolean = true
+  override def nullIntolerant: Boolean = true
 
   def this(options: Map[String, String], child: Expression) = this(options, child, None)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
index 2f818fecad93a..ceced9313940a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/identifiers.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst
 
+import org.apache.spark.sql.connector.catalog.CatalogManager
+
 /**
  * An identifier that optionally specifies a database.
  *
@@ -107,8 +109,23 @@ case class TableIdentifier(table: String, database: Option[String], catalog: Opt
 }
 
 /** A fully qualified identifier for a table (i.e., database.tableName) */
-case class QualifiedTableName(database: String, name: String) {
-  override def toString: String = s"$database.$name"
+case class QualifiedTableName(catalog: String, database: String, name: String) {
+  /** Two argument ctor for backward compatibility. */
+  def this(database: String, name: String) = this(
+    catalog = CatalogManager.SESSION_CATALOG_NAME,
+    database = database,
+    name = name)
+
+  override def toString: String = s"$catalog.$database.$name"
+}
+
+object QualifiedTableName {
+  def apply(catalog: String, database: String, name: String): QualifiedTableName = {
+    new QualifiedTableName(catalog, database, name)
+  }
+
+  def apply(database: String, name: String): QualifiedTableName =
+    new QualifiedTableName(database = database, name = name)
 }
 
 object TableIdentifier {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
index ba7b54fc04e84..7190f6fff79f1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/CreateJacksonParser.scala
@@ -19,13 +19,13 @@ package org.apache.spark.sql.catalyst.json
 
 import java.io.{ByteArrayInputStream, InputStream, InputStreamReader, Reader}
 import java.nio.channels.Channels
-import java.nio.charset.Charset
 import java.nio.charset.StandardCharsets
 
 import com.fasterxml.jackson.core.{JsonFactory, JsonParser}
 import org.apache.hadoop.io.Text
 
 import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.CharsetProvider
 import org.apache.spark.unsafe.types.UTF8String
 
 object CreateJacksonParser extends Serializable {
@@ -61,8 +61,7 @@ object CreateJacksonParser extends Serializable {
     val bais = new ByteArrayInputStream(in, 0, length)
     val byteChannel = Channels.newChannel(bais)
     val decodingBufferSize = Math.min(length, 8192)
-    val decoder = Charset.forName(enc).newDecoder()
-
+    val decoder = CharsetProvider.newDecoder(enc, caller = "Jackson Parser")
     Channels.newReader(byteChannel, decoder, decodingBufferSize)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
index 247106a8b8cd5..0e1dfdf366a89 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JSONOptions.scala
@@ -150,7 +150,8 @@ class JSONOptions(
     sep
   }
 
-  protected def checkedEncoding(enc: String): String = enc
+  protected def checkedEncoding(enc: String): String =
+    CharsetProvider.forName(enc, caller = "JSONOptions").name()
 
   /**
    * Standard encoding (charset) name. For example UTF-8, UTF-16LE and UTF-32BE.
@@ -189,6 +190,9 @@ class JSONOptions(
   // E.g. spark.read.format("json").option("singleVariantColumn", "colName")
   val singleVariantColumn: Option[String] = parameters.get(SINGLE_VARIANT_COLUMN)
 
+  val useUnsafeRow: Boolean = parameters.get(USE_UNSAFE_ROW).map(_.toBoolean).getOrElse(
+    SQLConf.get.getConf(SQLConf.JSON_USE_UNSAFE_ROW))
+
   /** Build a Jackson [[JsonFactory]] using JSON options. */
   def buildJsonFactory(): JsonFactory = {
     val streamReadConstraints = StreamReadConstraints
@@ -230,16 +234,16 @@ class JSONOptionsInRead(
   }
 
   protected override def checkedEncoding(enc: String): String = {
-    val isDenied = JSONOptionsInRead.denyList.contains(Charset.forName(enc))
+    val charset = CharsetProvider.forName(enc, caller = "JSONOptionsInRead")
+    val isDenied = JSONOptionsInRead.denyList.contains(charset)
     require(multiLine || !isDenied,
       s"""The $enc encoding must not be included in the denyList when multiLine is disabled:
          |denylist: ${JSONOptionsInRead.denyList.mkString(", ")}""".stripMargin)
 
-    val isLineSepRequired =
-        multiLine || Charset.forName(enc) == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
+    val isLineSepRequired = multiLine || charset == StandardCharsets.UTF_8 || lineSeparator.nonEmpty
     require(isLineSepRequired, s"The lineSep option must be specified for the $enc encoding")
 
-    enc
+    charset.name()
   }
 }
 
@@ -284,6 +288,7 @@ object JSONOptions extends DataSourceOptions {
   val TIME_ZONE = newOption("timeZone")
   val WRITE_NON_ASCII_CHARACTER_AS_CODEPOINT = newOption("writeNonAsciiCharacterAsCodePoint")
   val SINGLE_VARIANT_COLUMN = newOption("singleVariantColumn")
+  val USE_UNSAFE_ROW = newOption("useUnsafeRow")
   // Options with alternative
   val ENCODING = "encoding"
   val CHARSET = "charset"
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
index e41e989f501e7..13129d44fe0c2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JacksonParser.scala
@@ -117,6 +117,8 @@ class JacksonParser(
     }
   }
 
+  private val variantAllowDuplicateKeys = SQLConf.get.getConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS)
+
   protected final def parseVariant(parser: JsonParser): VariantVal = {
     // Skips `FIELD_NAME` at the beginning. This check is adapted from `parseJsonToken`, but we
     // cannot directly use the function here because it also handles the `VALUE_NULL` token and
@@ -125,7 +127,7 @@ class JacksonParser(
       parser.nextToken()
     }
     try {
-      val v = VariantBuilder.parseJson(parser)
+      val v = VariantBuilder.parseJson(parser, variantAllowDuplicateKeys)
       new VariantVal(v.getValue, v.getMetadata)
     } catch {
       case _: VariantSizeLimitException =>
@@ -213,7 +215,7 @@ class JacksonParser(
             )
         }
 
-        Some(InternalRow(new GenericArrayData(res.toArray)))
+        Some(InternalRow(new GenericArrayData(res.toArray[Any])))
     }
   }
 
@@ -514,7 +516,7 @@ class JacksonParser(
       throw CannotParseJSONFieldException(parser.currentName, parser.getText, token, dataType)
   }
 
-  private val useUnsafeRow = SQLConf.get.getConf(SQLConf.JSON_USE_UNSAFE_ROW)
+  private val useUnsafeRow = options.useUnsafeRow
   private val cachedUnsafeProjection = mutable.HashMap.empty[StructType, UnsafeProjection]
 
   protected final def convertRow(row: InternalRow, schema: StructType): InternalRow = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
index d982e1f19da0c..b509c55ed6a3c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/json/JsonInferSchema.scala
@@ -69,7 +69,8 @@ class JsonInferSchema(options: JSONOptions) extends Serializable with Logging {
       case DropMalformedMode =>
         None
       case FailFastMode =>
-        throw QueryExecutionErrors.malformedRecordsDetectedInSchemaInferenceError(e)
+        throw QueryExecutionErrors.malformedRecordsDetectedInSchemaInferenceError(
+          e, columnNameOfCorruptRecord)
     }
   }
 
@@ -81,7 +82,8 @@ class JsonInferSchema(options: JSONOptions) extends Serializable with Logging {
    */
   def infer[T](
       json: RDD[T],
-      createParser: (JsonFactory, T) => JsonParser): StructType = {
+      createParser: (JsonFactory, T) => JsonParser,
+      isReadFile: Boolean = false): StructType = {
     val parseMode = options.parseMode
     val columnNameOfCorruptRecord = options.columnNameOfCorruptRecord
 
@@ -96,6 +98,9 @@ class JsonInferSchema(options: JSONOptions) extends Serializable with Logging {
             Some(inferField(parser))
           }
         } catch {
+          // If we are not reading from files but hit `RuntimeException`, it means corrupted record.
+          case e: RuntimeException if !isReadFile =>
+            handleJsonErrorsByParseMode(parseMode, columnNameOfCorruptRecord, e)
           case e @ (_: JsonProcessingException | _: MalformedInputException) =>
             handleJsonErrorsByParseMode(parseMode, columnNameOfCorruptRecord, e)
           case e: CharConversionException if options.encoding.isEmpty =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
index 1ebf0c7b39a43..2b97b2621b5be 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/DecorrelateInnerQuery.scala
@@ -655,19 +655,62 @@ object DecorrelateInnerQuery extends PredicateHelper {
             val newProject = Project(newProjectList ++ referencesToAdd, newChild)
             (newProject, joinCond, outerReferenceMap)
 
+          case Offset(offset, input) =>
+            // OFFSET K is decorrelated by skipping top k rows per every domain value
+            // via a row_number() window function, which is similar to limit decorrelation.
+            // Limit and Offset situation are handled by limit branch as offset is the child
+            // of limit in that case. This branch is for the case where there's no limit operator
+            // above offset.
+            val (child, ordering) = input match {
+              case Sort(order, _, child, _) => (child, order)
+              case _ => (input, Seq())
+            }
+            val (newChild, joinCond, outerReferenceMap) =
+              decorrelate(input, parentOuterReferences, aggregated = true, underSetOp)
+            val collectedChildOuterReferences = collectOuterReferencesInPlanTree(child)
+            // Add outer references to the PARTITION BY clause
+            val partitionFields = collectedChildOuterReferences
+              .filter(outerReferenceMap.contains(_))
+              .map(outerReferenceMap(_)).toSeq
+            if (partitionFields.isEmpty) {
+              // Underlying subquery has no predicates connecting inner and outer query.
+              // In this case, offset can be computed over the inner query directly.
+              (Offset(offset, newChild), joinCond, outerReferenceMap)
+            } else {
+              val orderByFields = replaceOuterReferences(ordering, outerReferenceMap)
+
+              val rowNumber = WindowExpression(RowNumber(),
+                WindowSpecDefinition(partitionFields, orderByFields,
+                  SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))
+              val rowNumberAlias = Alias(rowNumber, "rn")()
+              // Window function computes row_number() when partitioning by correlated references,
+              // and projects all the other fields from the input.
+              val window = Window(Seq(rowNumberAlias),
+                partitionFields, orderByFields, newChild)
+              val filter = Filter(GreaterThan(rowNumberAlias.toAttribute, offset), window)
+              val project = Project(newChild.output, filter)
+              (project, joinCond, outerReferenceMap)
+            }
+
           case Limit(limit, input) =>
-            // LIMIT K (with potential ORDER BY) is decorrelated by computing K rows per every
-            // domain value via a row_number() window function. For example, for a subquery
-            // (SELECT T2.a FROM T2 WHERE T2.b = OuterReference(x) ORDER BY T2.c LIMIT 3)
-            // -- we need to get top 3 values of T2.a (ordering by T2.c) for every value of x.
+            // LIMIT K (with potential ORDER BY or OFFSET) is decorrelated by computing
+            // K rows per every domain value via a row_number() window function.
+            // For example, for a subquery
+            // (SELECT T2.a FROM T2 WHERE T2.b = OuterReference(x) ORDER BY T2.c LIMIT 3 OFFSET 2)
+            // -- we need to get top 3 values of T2.a (ordering by T2.c) for every value of x with
+            // an offset 2.
             // Following our general decorrelation procedure, 'x' is then replaced by T2.b, so the
             // subquery is decorrelated as:
             // SELECT * FROM (
             //   SELECT T2.a, row_number() OVER (PARTITION BY T2.b ORDER BY T2.c) AS rn FROM T2)
-            // WHERE rn <= 3
-            val (child, ordering) = input match {
-              case Sort(order, _, child) => (child, order)
-              case _ => (input, Seq())
+            // WHERE rn > 2 AND rn <= 2+3
+            val (child, ordering, offsetExpr) = input match {
+              case Sort(order, _, child, _) => (child, order, Literal(0))
+              case Offset(offsetExpr, offsetChild@(Sort(order, _, child, _))) =>
+                (child, order, offsetExpr)
+              case Offset(offsetExpr, child) =>
+                (child, Seq(), offsetExpr)
+              case _ => (input, Seq(), Literal(0))
             }
             val (newChild, joinCond, outerReferenceMap) =
               decorrelate(child, parentOuterReferences, aggregated = true, underSetOp)
@@ -679,7 +722,10 @@ object DecorrelateInnerQuery extends PredicateHelper {
             if (partitionFields.isEmpty) {
               // Underlying subquery has no predicates connecting inner and outer query.
               // In this case, limit can be computed over the inner query directly.
-              (Limit(limit, newChild), joinCond, outerReferenceMap)
+              offsetExpr match {
+                case IntegerLiteral(0) => (Limit(limit, newChild), joinCond, outerReferenceMap)
+                case _ => (Limit(limit, Offset(offsetExpr, newChild)), joinCond, outerReferenceMap)
+              }
             } else {
               val orderByFields = replaceOuterReferences(ordering, outerReferenceMap)
 
@@ -691,12 +737,24 @@ object DecorrelateInnerQuery extends PredicateHelper {
               // and projects all the other fields from the input.
               val window = Window(Seq(rowNumberAlias),
                 partitionFields, orderByFields, newChild)
-              val filter = Filter(LessThanOrEqual(rowNumberAlias.toAttribute, limit), window)
+              val filter = offsetExpr match {
+                case IntegerLiteral(0) =>
+                  // If there is no offset, we can directly use the row number to filter the rows.
+                  Filter(LessThanOrEqual(rowNumberAlias.toAttribute, limit), window)
+                case _ =>
+                  Filter(
+                    And(
+                      GreaterThan(rowNumberAlias.toAttribute, offsetExpr),
+                      LessThanOrEqual(rowNumberAlias.toAttribute, Add(offsetExpr, limit))
+                    ),
+                    window
+                  )
+              }
               val project = Project(newChild.output, filter)
               (project, joinCond, outerReferenceMap)
             }
 
-          case w @ Window(projectList, partitionSpec, orderSpec, child) =>
+          case w @ Window(projectList, partitionSpec, orderSpec, child, hint) =>
             val outerReferences = collectOuterReferences(w.expressions)
             assert(outerReferences.isEmpty, s"Correlated column is not allowed in window " +
               s"function: $w")
@@ -712,10 +770,10 @@ object DecorrelateInnerQuery extends PredicateHelper {
 
             val newWindow = Window(newProjectList ++ referencesToAdd,
               partitionSpec = newPartitionSpec ++ referencesToAdd,
-              orderSpec = newOrderSpec, newChild)
+              orderSpec = newOrderSpec, newChild, hint)
             (newWindow, joinCond, outerReferenceMap)
 
-          case a @ Aggregate(groupingExpressions, aggregateExpressions, child) =>
+          case a @ Aggregate(groupingExpressions, aggregateExpressions, child, _) =>
             val outerReferences = collectOuterReferences(a.expressions)
             val newOuterReferences = parentOuterReferences ++ outerReferences
             val (newChild, joinCond, outerReferenceMap) =
@@ -1006,7 +1064,7 @@ object DecorrelateInnerQuery extends PredicateHelper {
                 // Project, they could get added at the beginning or the end of the output columns
                 // depending on the child plan.
                 // The inner expressions for the domain are the values of newOuterReferenceMap.
-                val domainProjections = collectedChildOuterReferences.map(newOuterReferenceMap(_))
+                val domainProjections = newOuterReferences.map(newOuterReferenceMap(_))
                 val newChild = Project(child.output ++ domainProjections, decorrelatedChild)
                 (newChild, newJoinCond, newOuterReferenceMap)
               }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateWindowPartitions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateWindowPartitions.scala
index e3d1b05443583..ca3ee12a3d7db 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateWindowPartitions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/EliminateWindowPartitions.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.{WINDOW, WINDOW_EXPRESSIO
 object EliminateWindowPartitions extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(WINDOW), ruleId) {
-    case w @ Window(windowExprs, partitionSpec, _, _) if partitionSpec.exists(_.foldable) =>
+    case w @ Window(windowExprs, partitionSpec, _, _, _) if partitionSpec.exists(_.foldable) =>
       val newWindowExprs = windowExprs.map(_.transformWithPruning(
         _.containsPattern(WINDOW_EXPRESSION)) {
         case windowExpr @ WindowExpression(_, wsd @ WindowSpecDefinition(ps, _, _))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
index f2e99721e9261..46815969e7ece 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InferWindowGroupLimit.scala
@@ -74,7 +74,7 @@ object InferWindowGroupLimit extends Rule[LogicalPlan] with PredicateHelper {
 
     plan.transformWithPruning(_.containsAllPatterns(FILTER, WINDOW), ruleId) {
       case filter @ Filter(condition,
-        window @ Window(windowExpressions, partitionSpec, orderSpec, child))
+        window @ Window(windowExpressions, partitionSpec, orderSpec, child, _))
         if !child.isInstanceOf[WindowGroupLimit] && windowExpressions.forall(isExpandingWindow) &&
           orderSpec.nonEmpty =>
         val limits = windowExpressions.collect {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
index 19aa1d96ccd3f..b3384c4e29566 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InlineCTE.scala
@@ -71,13 +71,13 @@ case class InlineCTE(
    * @param plan The plan to collect the CTEs from
    * @param cteMap A mutable map that accumulates the CTEs and their reference information by CTE
    *               ids.
-   * @param outerCTEId While collecting the map we use this optional CTE id to identify the
-   *                   current outer CTE.
+   * @param collectCTERefs A function to collect CTE references so that the caller side can do some
+   *                       bookkeeping work.
    */
   private def buildCTEMap(
       plan: LogicalPlan,
       cteMap: mutable.Map[Long, CTEReferenceInfo],
-      outerCTEId: Option[Long] = None): Unit = {
+      collectCTERefs: CTERelationRef => Unit = _ => ()): Unit = {
     plan match {
       case WithCTE(child, cteDefs) =>
         cteDefs.foreach { cteDef =>
@@ -89,26 +89,35 @@ case class InlineCTE(
           )
         }
         cteDefs.foreach { cteDef =>
-          buildCTEMap(cteDef, cteMap, Some(cteDef.id))
+          buildCTEMap(cteDef, cteMap, ref => {
+            // A CTE relation can references CTE relations defined before it in the same `WithCTE`.
+            // Here we update the out-going-ref-count for it, in case this CTE relation is not
+            // referenced at all and can be optimized out, and we need to decrease the ref counts
+            // for CTE relations that are referenced by it.
+            if (cteDefs.exists(_.id == ref.cteId)) {
+              cteMap(cteDef.id).increaseOutgoingRefCount(ref.cteId, 1)
+            }
+            // Similarly, a CTE relation can reference CTE relations defined in the outer `WithCTE`.
+            // Here we call the `collectCTERefs` function so that the outer CTE can also update the
+            // out-going-ref-count if needed.
+            collectCTERefs(ref)
+          })
         }
-        buildCTEMap(child, cteMap, outerCTEId)
+        buildCTEMap(child, cteMap, collectCTERefs)
 
       case ref: CTERelationRef =>
         cteMap(ref.cteId) = cteMap(ref.cteId).withRefCountIncreased(1)
-        outerCTEId.foreach { cteId =>
-          cteMap(cteId).increaseOutgoingRefCount(ref.cteId, 1)
-        }
-
+        collectCTERefs(ref)
       case _ =>
         if (plan.containsPattern(CTE)) {
           plan.children.foreach { child =>
-            buildCTEMap(child, cteMap, outerCTEId)
+            buildCTEMap(child, cteMap, collectCTERefs)
           }
 
           plan.expressions.foreach { expr =>
             if (expr.containsAllPatterns(PLAN_EXPRESSION, CTE)) {
               expr.foreach {
-                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, outerCTEId)
+                case e: SubqueryExpression => buildCTEMap(e.plan, cteMap, collectCTERefs)
                 case _ =>
               }
             }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala
index 3d883fa9d9ae2..b6ced6c49a36f 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/InsertMapSortInGroupingExpressions.scala
@@ -17,29 +17,110 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
-import org.apache.spark.sql.catalyst.expressions.MapSort
-import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
+import scala.collection.mutable
+
+import org.apache.spark.sql.catalyst.expressions.{Alias, ArrayTransform, CreateNamedStruct, Expression, GetStructField, If, IsNull, LambdaFunction, Literal, MapFromArrays, MapKeys, MapSort, MapValues, NamedExpression, NamedLambdaVariable}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.AGGREGATE
-import org.apache.spark.sql.types.MapType
+import org.apache.spark.sql.catalyst.trees.TreePattern
+import org.apache.spark.sql.types.{ArrayType, MapType, StructType}
+import org.apache.spark.util.ArrayImplicits.SparkArrayOps
 
 /**
- * Adds MapSort to group expressions containing map columns, as the key/value paris need to be
+ * Adds [[MapSort]] to group expressions containing map columns, as the key/value pairs need to be
  * in the correct order before grouping:
- * SELECT COUNT(*) FROM TABLE GROUP BY map_column =>
- * SELECT COUNT(*) FROM TABLE GROUP BY map_sort(map_column)
+ *
+ * SELECT map_column, COUNT(*) FROM TABLE GROUP BY map_column  =>
+ * SELECT _groupingmapsort as map_column, COUNT(*) FROM (
+ *   SELECT map_sort(map_column) as _groupingmapsort FROM TABLE
+ * ) GROUP BY _groupingmapsort
  */
 object InsertMapSortInGroupingExpressions extends Rule[LogicalPlan] {
-  override def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
-    _.containsPattern(AGGREGATE), ruleId) {
-    case a @ Aggregate(groupingExpr, _, _) =>
-      val newGrouping = groupingExpr.map { expr =>
-        if (!expr.isInstanceOf[MapSort] && expr.dataType.isInstanceOf[MapType]) {
-          MapSort(expr)
+  private def shouldAddMapSort(expr: Expression): Boolean = {
+    expr.dataType.existsRecursively(_.isInstanceOf[MapType])
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    if (!plan.containsPattern(TreePattern.AGGREGATE)) {
+      return plan
+    }
+    val shouldRewrite = plan.exists {
+      case agg: Aggregate if agg.groupingExpressions.exists(shouldAddMapSort) => true
+      case _ => false
+    }
+    if (!shouldRewrite) {
+      return plan
+    }
+
+    plan transformUpWithNewOutput {
+      case agg @ Aggregate(groupingExprs, aggregateExpressions, child, _)
+          if agg.groupingExpressions.exists(shouldAddMapSort) =>
+        val exprToMapSort = new mutable.HashMap[Expression, NamedExpression]
+        val newGroupingKeys = groupingExprs.map { expr =>
+          val inserted = insertMapSortRecursively(expr)
+          if (expr.ne(inserted)) {
+            exprToMapSort.getOrElseUpdate(
+              expr.canonicalized,
+              Alias(inserted, "_groupingmapsort")()
+            ).toAttribute
+          } else {
+            expr
+          }
+        }
+        val newAggregateExprs = aggregateExpressions.map {
+          case named if exprToMapSort.contains(named.canonicalized) =>
+            // If we replace the top-level named expr, then should add back the original name
+            exprToMapSort(named.canonicalized).toAttribute.withName(named.name)
+          case other =>
+            other.transformUp {
+              case e => exprToMapSort.get(e.canonicalized).map(_.toAttribute).getOrElse(e)
+            }.asInstanceOf[NamedExpression]
+        }
+        val newChild = Project(child.output ++ exprToMapSort.values, child)
+        val newAgg = Aggregate(newGroupingKeys, newAggregateExprs, newChild)
+        newAgg -> agg.output.zip(newAgg.output)
+    }
+  }
+
+  /**
+   * Inserts MapSort recursively taking into account when it is nested inside a struct or array.
+   */
+  private def insertMapSortRecursively(e: Expression): Expression = {
+    e.dataType match {
+      case m: MapType =>
+        // Check if value type of MapType contains MapType (possibly nested)
+        // and special handle this case.
+        val mapSortExpr = if (m.valueType.existsRecursively(_.isInstanceOf[MapType])) {
+          MapFromArrays(MapKeys(e), insertMapSortRecursively(MapValues(e)))
         } else {
-          expr
+          e
         }
-      }
-      a.copy(groupingExpressions = newGrouping)
+
+        MapSort(mapSortExpr)
+
+      case StructType(fields)
+        if fields.exists(_.dataType.existsRecursively(_.isInstanceOf[MapType])) =>
+        val struct = CreateNamedStruct(fields.zipWithIndex.flatMap { case (f, i) =>
+          Seq(Literal(f.name), insertMapSortRecursively(
+            GetStructField(e, i, Some(f.name))))
+        }.toImmutableArraySeq)
+        if (struct.valExprs.forall(_.isInstanceOf[GetStructField])) {
+          // No field needs MapSort processing, just return the original expression.
+          e
+        } else if (e.nullable) {
+          If(IsNull(e), Literal(null, struct.dataType), struct)
+        } else {
+          struct
+        }
+
+      case ArrayType(et, containsNull) if et.existsRecursively(_.isInstanceOf[MapType]) =>
+        val param = NamedLambdaVariable("x", et, containsNull)
+        val funcBody = insertMapSortRecursively(param)
+
+        ArrayTransform(e, LambdaFunction(funcBody, Seq(param)))
+
+      case _ => e
+    }
   }
+
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala
index de1368d28168f..c73d6ad8fa956 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/LimitPushDownThroughWindow.scala
@@ -43,14 +43,14 @@ object LimitPushDownThroughWindow extends Rule[LogicalPlan] {
     _.containsAllPatterns(WINDOW, LIMIT), ruleId) {
     // Adding an extra Limit below WINDOW when the partitionSpec of all window functions is empty.
     case LocalLimit(limitExpr @ IntegerLiteral(limit),
-        window @ Window(windowExpressions, Nil, orderSpec, child))
+        window @ Window(windowExpressions, Nil, orderSpec, child, _))
       if supportsPushdownThroughWindow(windowExpressions) && child.maxRows.forall(_ > limit) &&
         limit < conf.topKSortFallbackThreshold =>
       // Sort is needed here because we need global sort.
       window.copy(child = Limit(limitExpr, Sort(orderSpec, true, child)))
     // There is a Project between LocalLimit and Window if they do not have the same output.
     case LocalLimit(limitExpr @ IntegerLiteral(limit), project @ Project(_,
-        window @ Window(windowExpressions, Nil, orderSpec, child)))
+        window @ Window(windowExpressions, Nil, orderSpec, child, _)))
       if supportsPushdownThroughWindow(windowExpressions) && child.maxRows.forall(_ > limit) &&
         limit < conf.topKSortFallbackThreshold =>
       // Sort is needed here because we need global sort.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
index 2fcc689b9df2b..776efbed273e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingNumbers.scala
@@ -134,7 +134,17 @@ object NormalizeFloatingNumbers extends Rule[LogicalPlan] {
         case (name, i) => Seq(Literal(name), normalize(GetStructField(expr, i)))
       }
       val struct = CreateNamedStruct(fields.flatten.toImmutableArraySeq)
-      KnownFloatingPointNormalized(If(IsNull(expr), Literal(null, struct.dataType), struct))
+      // For nested structs (and other complex types), this branch is called again with either a
+      // `GetStructField` or a `NamedLambdaVariable` expression. Even if the field for which this
+      // has been recursively called might have `nullable = false`, directly creating an `If`
+      // predicate would end up creating an expression with `nullable = true` (as the trueBranch is
+      // nullable). Hence, use the `expr.nullable` to create an `If` predicate only when the column
+      // is nullable.
+      if (expr.nullable) {
+        KnownFloatingPointNormalized(If(IsNull(expr), Literal(null, struct.dataType), struct))
+      } else {
+        KnownFloatingPointNormalized(struct)
+      }
 
     case _ if expr.dataType.isInstanceOf[ArrayType] =>
       val ArrayType(et, containsNull) = expr.dataType
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
index 4347137bf68b8..04cc230f99b44 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeCsvJsonExprs.scala
@@ -112,9 +112,10 @@ object OptimizeCsvJsonExprs extends Rule[LogicalPlan] {
       val prunedSchema = StructType(Array(schema(ordinal)))
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0)
 
-    case g @ GetArrayStructFields(j @ JsonToStructs(schema: ArrayType, _, _, _), _, _, _, _)
-        if schema.elementType.asInstanceOf[StructType].length > 1 && j.options.isEmpty =>
-      val prunedSchema = ArrayType(StructType(Array(g.field)), g.containsNull)
+    case g @ GetArrayStructFields(j @ JsonToStructs(ArrayType(schema: StructType, _),
+        _, _, _), _, ordinal, _, _) if schema.length > 1 && j.options.isEmpty =>
+      // Obtain the pruned schema by picking the `ordinal` field of the struct.
+      val prunedSchema = ArrayType(StructType(Array(schema(ordinal))), g.containsNull)
       g.copy(child = j.copy(schema = prunedSchema), ordinal = 0, numFields = 1)
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
index 61c08eb8f8b6f..6f732b2d0f20a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeOneRowPlan.scala
@@ -46,11 +46,11 @@ object OptimizeOneRowPlan extends Rule[LogicalPlan] {
     val enableForStreaming = conf.getConf(SQLConf.STREAMING_OPTIMIZE_ONE_ROW_PLAN_ENABLED)
 
     plan.transformUpWithPruning(_.containsAnyPattern(SORT, AGGREGATE), ruleId) {
-      case Sort(_, _, child) if child.maxRows.exists(_ <= 1L) &&
+      case Sort(_, _, child, _) if child.maxRows.exists(_ <= 1L) &&
         isChildEligible(child, enableForStreaming) => child
-      case Sort(_, false, child) if child.maxRowsPerPartition.exists(_ <= 1L) &&
+      case Sort(_, false, child, _) if child.maxRowsPerPartition.exists(_ <= 1L) &&
         isChildEligible(child, enableForStreaming) => child
-      case agg @ Aggregate(_, _, child) if agg.groupOnly && child.maxRows.exists(_ <= 1L) &&
+      case agg @ Aggregate(_, _, child, _) if agg.groupOnly && child.maxRows.exists(_ <= 1L) &&
         isChildEligible(child, enableForStreaming) =>
         Project(agg.aggregateExpressions, child)
       case agg: Aggregate if agg.child.maxRows.exists(_ <= 1L) &&
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
index 647812ff80e78..7ec467badce5c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.SubqueryExpression.hasCorrelatedSubquery
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans._
@@ -51,7 +52,13 @@ abstract class Optimizer(catalogManager: CatalogManager)
   override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
       currentPlan: LogicalPlan): Option[String] = {
-    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan)
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan, lightweight = false)
+  }
+
+  override protected def validatePlanChangesLightweight(
+      previousPlan: LogicalPlan,
+      currentPlan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan, lightweight = true)
   }
 
   override protected val excludedOnceBatches: Set[String] =
@@ -150,7 +157,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
     }
 
     val batches = (
-    Batch("Finish Analysis", Once, FinishAnalysis) ::
+    Batch("Finish Analysis", FixedPoint(1), FinishAnalysis) ::
     // We must run this batch after `ReplaceExpressions`, as `RuntimeReplaceable` expression
     // may produce `With` expressions that need to be rewritten.
     Batch("Rewrite With expression", Once, RewriteWithExpression) ::
@@ -246,8 +253,6 @@ abstract class Optimizer(catalogManager: CatalogManager)
       CollapseProject,
       RemoveRedundantAliases,
       RemoveNoopOperators) :+
-    Batch("InsertMapSortInGroupingExpressions", Once,
-      InsertMapSortInGroupingExpressions) :+
     // This batch must be executed after the `RewriteSubquery` batch, which creates joins.
     Batch("NormalizeFloatingNumbers", Once, NormalizeFloatingNumbers) :+
     Batch("ReplaceUpdateFieldsExpression", Once, ReplaceUpdateFieldsExpression)
@@ -297,11 +302,17 @@ abstract class Optimizer(catalogManager: CatalogManager)
       ReplaceExpressions,
       RewriteNonCorrelatedExists,
       PullOutGroupingExpressions,
+      // Put `InsertMapSortInGroupingExpressions` after `PullOutGroupingExpressions`,
+      // so the grouping keys can only be attribute and literal which makes
+      // `InsertMapSortInGroupingExpressions` easy to insert `MapSort`.
+      InsertMapSortInGroupingExpressions,
       ComputeCurrentTime,
       ReplaceCurrentLike(catalogManager),
       SpecialDatetimeValues,
       RewriteAsOfJoin,
-      EvalInlineTables
+      EvalInlineTables,
+      ReplaceTranspose,
+      RewriteCollationJoin
     )
 
     override def apply(plan: LogicalPlan): LogicalPlan = {
@@ -323,7 +334,7 @@ abstract class Optimizer(catalogManager: CatalogManager)
         return plan
       }
       plan match {
-        case Sort(_, _, child) => child
+        case Sort(_, _, child, _) => child
         case Project(fields, child) => Project(fields, removeTopLevelSort(child))
         case other => other
       }
@@ -334,8 +345,8 @@ abstract class Optimizer(catalogManager: CatalogManager)
       // optimize it again, to save optimization time and avoid breaking broadcast/subquery reuse.
       case d: DynamicPruningSubquery => d
       case s @ ScalarSubquery(
-        PhysicalOperation(projections, predicates, a @ Aggregate(group, _, child)),
-        _, _, _, _, mayHaveCountBug)
+        PhysicalOperation(projections, predicates, a @ Aggregate(group, _, child, _)),
+        _, _, _, _, mayHaveCountBug, _)
         if conf.getConf(SQLConf.DECORRELATE_SUBQUERY_PREVENT_CONSTANT_FOLDING_FOR_COUNT_BUG) &&
           mayHaveCountBug.nonEmpty && mayHaveCountBug.get =>
         // This is a subquery with an aggregate that may suffer from a COUNT bug.
@@ -956,7 +967,7 @@ object ColumnPruning extends Rule[LogicalPlan] {
       d.copy(child = prunedChild(child, d.references))
 
     // Prunes the unused columns from child of Aggregate/Expand/Generate/ScriptTransformation
-    case a @ Aggregate(_, _, child) if !child.outputSet.subsetOf(a.references) =>
+    case a @ Aggregate(_, _, child, _) if !child.outputSet.subsetOf(a.references) =>
       a.copy(child = prunedChild(child, a.references))
     case f @ FlatMapGroupsInPandas(_, _, _, child) if !child.outputSet.subsetOf(f.references) =>
       f.copy(child = prunedChild(child, f.references))
@@ -1020,6 +1031,9 @@ object ColumnPruning extends Rule[LogicalPlan] {
     // Can't prune the columns on LeafNode
     case p @ Project(_, _: LeafNode) => p
 
+    // Can't prune the columns on UpdateEventTimeWatermarkColumn
+    case p @ Project(_, _: UpdateEventTimeWatermarkColumn) => p
+
     case NestedColumnAliasing(rewrittenPlan) => rewrittenPlan
 
     // for all other logical plans that inherits the output from it's children
@@ -1229,11 +1243,8 @@ object CollapseProject extends Rule[LogicalPlan] with AliasHelper {
    * in aggregate if they are also part of the grouping expressions. Otherwise the plan
    * after subquery rewrite will not be valid.
    */
-  private def canCollapseAggregate(p: Project, a: Aggregate): Boolean = {
-    p.projectList.forall(_.collect {
-      case s: ScalarSubquery if s.outerAttrs.nonEmpty => s
-    }.isEmpty)
-  }
+  private def canCollapseAggregate(p: Project, a: Aggregate): Boolean =
+    !p.projectList.exists(hasCorrelatedSubquery)
 
   def buildCleanedProjectList(
       upper: Seq[NamedExpression],
@@ -1295,7 +1306,7 @@ object CollapseRepartition extends Rule[LogicalPlan] {
     // Case 2: When a RepartitionByExpression has a child of global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
     case r @ RepartitionByExpression(
-        _, child @ (Sort(_, true, _) | _: RepartitionOperation), _, _) =>
+        _, child @ (Sort(_, true, _, _) | _: RepartitionOperation), _, _) =>
       r.withNewChildren(child.children)
     // Case 3: When a RebalancePartitions has a child of local or global Sort, Repartition or
     // RepartitionByExpression we can remove the child.
@@ -1362,11 +1373,11 @@ object CollapseWindow extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
     _.containsPattern(WINDOW), ruleId) {
-    case w1 @ Window(we1, _, _, w2 @ Window(we2, _, _, grandChild))
+    case w1 @ Window(we1, _, _, w2 @ Window(we2, _, _, grandChild, _), _)
         if windowsCompatible(w1, w2) =>
       w1.copy(windowExpressions = we2 ++ we1, child = grandChild)
 
-    case w1 @ Window(we1, _, _, Project(pl, w2 @ Window(we2, _, _, grandChild)))
+    case w1 @ Window(we1, _, _, Project(pl, w2 @ Window(we2, _, _, grandChild, _)), _)
         if windowsCompatible(w1, w2) && w1.references.subsetOf(grandChild.outputSet) =>
       Project(
         pl ++ w1.windowOutputSet,
@@ -1395,11 +1406,11 @@ object TransposeWindow extends Rule[LogicalPlan] {
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
     _.containsPattern(WINDOW), ruleId) {
-    case w1 @ Window(_, _, _, w2 @ Window(_, _, _, grandChild))
+    case w1 @ Window(_, _, _, w2 @ Window(_, _, _, grandChild, _), _)
       if windowsCompatible(w1, w2) =>
       Project(w1.output, w2.copy(child = w1.copy(child = grandChild)))
 
-    case w1 @ Window(_, _, _, Project(pl, w2 @ Window(_, _, _, grandChild)))
+    case w1 @ Window(_, _, _, Project(pl, w2 @ Window(_, _, _, grandChild, _)), _)
       if windowsCompatible(w1, w2) && w1.references.subsetOf(grandChild.outputSet) =>
       Project(
         pl ++ w1.windowOutputSet,
@@ -1641,18 +1652,18 @@ object CombineFilters extends Rule[LogicalPlan] with PredicateHelper {
  */
 object EliminateSorts extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(_.containsPattern(SORT)) {
-    case s @ Sort(orders, _, child) if orders.isEmpty || orders.exists(_.child.foldable) =>
+    case s @ Sort(orders, _, child, _) if orders.isEmpty || orders.exists(_.child.foldable) =>
       val newOrders = orders.filterNot(_.child.foldable)
       if (newOrders.isEmpty) {
         child
       } else {
         s.copy(order = newOrders)
       }
-    case s @ Sort(_, global, child) => s.copy(child = recursiveRemoveSort(child, global))
+    case s @ Sort(_, global, child, _) => s.copy(child = recursiveRemoveSort(child, global))
     case j @ Join(originLeft, originRight, _, cond, _) if cond.forall(_.deterministic) =>
       j.copy(left = recursiveRemoveSort(originLeft, true),
         right = recursiveRemoveSort(originRight, true))
-    case g @ Aggregate(_, aggs, originChild) if isOrderIrrelevantAggs(aggs) =>
+    case g @ Aggregate(_, aggs, originChild, _) if isOrderIrrelevantAggs(aggs) =>
       g.copy(child = recursiveRemoveSort(originChild, true))
   }
 
@@ -1667,7 +1678,7 @@ object EliminateSorts extends Rule[LogicalPlan] {
       return plan
     }
     plan match {
-      case Sort(_, global, child) if canRemoveGlobalSort || !global =>
+      case Sort(_, global, child, _) if canRemoveGlobalSort || !global =>
         recursiveRemoveSort(child, canRemoveGlobalSort)
       case other if canEliminateSort(other) =>
         other.withNewChildren(other.children.map(c => recursiveRemoveSort(c, canRemoveGlobalSort)))
@@ -1720,15 +1731,18 @@ object EliminateSorts extends Rule[LogicalPlan] {
  * 3) by eliminating the always-true conditions given the constraints on the child's output.
  */
 object PruneFilters extends Rule[LogicalPlan] with PredicateHelper {
+  private def shouldApply(child: LogicalPlan): Boolean =
+    SQLConf.get.getConf(SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN) || !child.isStreaming
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(FILTER), ruleId) {
     // If the filter condition always evaluate to true, remove the filter.
     case Filter(Literal(true, BooleanType), child) => child
     // If the filter condition always evaluate to null or false,
     // replace the input with an empty relation.
-    case Filter(Literal(null, _), child) =>
+    case Filter(Literal(null, _), child) if shouldApply(child) =>
       LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
-    case Filter(Literal(false, BooleanType), child) =>
+    case Filter(Literal(false, BooleanType), child) if shouldApply(child) =>
       LocalRelation(child.output, data = Seq.empty, isStreaming = child.isStreaming)
     // If any deterministic condition is guaranteed to be true given the constraints on the child's
     // output, remove the condition
@@ -1982,7 +1996,8 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
   }
 
   private def canPushThrough(joinType: JoinType): Boolean = joinType match {
-    case _: InnerLike | LeftSemi | RightOuter | LeftOuter | LeftAnti | ExistenceJoin(_) => true
+    case _: InnerLike | LeftSemi | RightOuter | LeftOuter | LeftSingle |
+         LeftAnti | ExistenceJoin(_) => true
     case _ => false
   }
 
@@ -2022,7 +2037,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
 
           (leftFilterConditions ++ commonFilterCondition).
             reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin)
-        case LeftOuter | LeftExistence(_) =>
+        case LeftOuter | LeftSingle | LeftExistence(_) =>
           // push down the left side only `where` condition
           val newLeft = leftFilterConditions.
             reduceLeftOption(And).map(Filter(_, left)).getOrElse(left)
@@ -2068,6 +2083,8 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper {
           val newJoinCond = (leftJoinConditions ++ commonJoinCondition).reduceLeftOption(And)
 
           Join(newLeft, newRight, joinType, newJoinCond, hint)
+        // Do not move join predicates of a single join.
+        case LeftSingle => j
 
         case other =>
           throw SparkException.internalError(s"Unexpected join type: $other")
@@ -2221,20 +2238,21 @@ object DecimalAggregates extends Rule[LogicalPlan] {
 object ConvertToLocalRelation extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(LOCAL_RELATION), ruleId) {
-    case Project(projectList, LocalRelation(output, data, isStreaming))
+    case Project(projectList, LocalRelation(output, data, isStreaming, stream))
         if !projectList.exists(hasUnevaluableExpr) =>
       val projection = new InterpretedMutableProjection(projectList, output)
       projection.initialize(0)
-      LocalRelation(projectList.map(_.toAttribute), data.map(projection(_).copy()), isStreaming)
+      LocalRelation(projectList.map(_.toAttribute), data.map(projection(_).copy()),
+        isStreaming, stream)
 
-    case Limit(IntegerLiteral(limit), LocalRelation(output, data, isStreaming)) =>
-      LocalRelation(output, data.take(limit), isStreaming)
+    case Limit(IntegerLiteral(limit), LocalRelation(output, data, isStreaming, stream)) =>
+      LocalRelation(output, data.take(limit), isStreaming, stream)
 
-    case Filter(condition, LocalRelation(output, data, isStreaming))
+    case Filter(condition, LocalRelation(output, data, isStreaming, stream))
         if !hasUnevaluableExpr(condition) =>
       val predicate = Predicate.create(condition, output)
       predicate.initialize(0)
-      LocalRelation(output, data.filter(row => predicate.eval(row)), isStreaming)
+      LocalRelation(output, data.filter(row => predicate.eval(row)), isStreaming, stream)
   }
 
   private def hasUnevaluableExpr(expr: Expression): Boolean = {
@@ -2474,7 +2492,7 @@ object RewriteIntersectAll extends Rule[LogicalPlan] {
 object RemoveLiteralFromGroupExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(AGGREGATE), ruleId) {
-    case a @ Aggregate(grouping, _, _) if grouping.nonEmpty =>
+    case a @ Aggregate(grouping, _, _, _) if grouping.nonEmpty =>
       val newGrouping = grouping.filter(!_.foldable)
       if (newGrouping.nonEmpty) {
         a.copy(groupingExpressions = newGrouping)
@@ -2530,7 +2548,7 @@ object GenerateOptimization extends Rule[LogicalPlan] {
 object RemoveRepetitionFromGroupExpressions extends Rule[LogicalPlan] {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(AGGREGATE), ruleId) {
-    case a @ Aggregate(grouping, _, _) if grouping.size > 1 =>
+    case a @ Aggregate(grouping, _, _, _) if grouping.size > 1 =>
       val newGrouping = ExpressionSet(grouping).toSeq
       if (newGrouping.size == grouping.size) {
         a
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
index 832af340c3397..86316494f6ff8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelation.scala
@@ -111,7 +111,8 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
           // Except is handled as LeftAnti by `ReplaceExceptWithAntiJoin` rule.
           case LeftOuter | LeftSemi | LeftAnti if isLeftEmpty => empty(p)
           case LeftSemi if isRightEmpty | isFalseCondition => empty(p)
-          case LeftAnti if isRightEmpty | isFalseCondition => p.left
+          case LeftAnti if (isRightEmpty | isFalseCondition) && canExecuteWithoutJoin(p.left) =>
+            p.left
           case FullOuter if isLeftEmpty && isRightEmpty => empty(p)
           case LeftOuter | FullOuter if isRightEmpty && canExecuteWithoutJoin(p.left) =>
             Project(p.left.output ++ nullValueProjectList(p.right), p.left)
@@ -168,7 +169,7 @@ abstract class PropagateEmptyRelationBase extends Rule[LogicalPlan] with CastSup
       // Aggregation on empty LocalRelation generated from a streaming source is not eliminated
       // as stateful streaming aggregation need to perform other state management operations other
       // than just processing the input data.
-      case Aggregate(ge, _, _) if ge.nonEmpty && !p.isStreaming => empty(p)
+      case Aggregate(ge, _, _, _) if ge.nonEmpty && !p.isStreaming => empty(p)
       // Generators like Hive-style UDTF may return their records within `close`.
       case Generate(_: Explode, _, _, _, _, _) => empty(p)
       case Expand(_, _, _) => empty(p)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
index badf4065f5fb5..d6a4bd030c9d6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantAggregates.scala
@@ -30,10 +30,10 @@ import org.apache.spark.sql.catalyst.trees.TreePattern.AGGREGATE
 object RemoveRedundantAggregates extends Rule[LogicalPlan] with AliasHelper {
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
     _.containsPattern(AGGREGATE), ruleId) {
-    case upper @ Aggregate(_, _, lower: Aggregate) if isLowerRedundant(upper, lower) =>
+    case upper @ Aggregate(_, _, lower: Aggregate, _) if isLowerRedundant(upper, lower) =>
       val projectList = lower.aggregateExpressions.filter(upper.references.contains(_))
       upper.copy(child = Project(projectList, lower.child))
-    case agg @ Aggregate(groupingExps, _, child)
+    case agg @ Aggregate(groupingExps, _, child, _)
         if agg.groupOnly && child.distinctKeys.exists(_.subsetOf(ExpressionSet(groupingExps))) =>
       Project(agg.aggregateExpressions, child)
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSorts.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSorts.scala
index 204d2a34675bc..3923b9b1b7fae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSorts.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RemoveRedundantSorts.scala
@@ -36,14 +36,14 @@ object RemoveRedundantSorts extends Rule[LogicalPlan] {
       return plan
     }
     plan match {
-      case s @ Sort(orders, false, child) =>
+      case s @ Sort(orders, false, child, _) =>
         if (SortOrder.orderingSatisfies(child.outputOrdering, orders)) {
           recursiveRemoveSort(child, optimizeGlobalSort = false)
         } else {
           s.withNewChildren(Seq(recursiveRemoveSort(child, optimizeGlobalSort = true)))
         }
 
-      case s @ Sort(orders, true, child) =>
+      case s @ Sort(orders, true, child, _) =>
         val newChild = recursiveRemoveSort(child, optimizeGlobalSort = false)
         if (optimizeGlobalSort) {
           // For this case, the upper sort is local so the ordering of present sort is unnecessary,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
index da3cf782f6682..5aef82b64ed32 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregates.scala
@@ -197,6 +197,17 @@ import org.apache.spark.util.collection.Utils
  * techniques.
  */
 object RewriteDistinctAggregates extends Rule[LogicalPlan] {
+  private def mustRewrite(
+      distinctAggs: Seq[AggregateExpression],
+      groupingExpressions: Seq[Expression]): Boolean = {
+    // If there are any distinct AggregateExpressions with filter, we need to rewrite the query.
+    // Also, if there are no grouping expressions and all distinct aggregate expressions are
+    // foldable, we need to rewrite the query, e.g. SELECT COUNT(DISTINCT 1). Without this case,
+    // non-grouping aggregation queries with distinct aggregate expressions will be incorrectly
+    // handled by the aggregation strategy, causing wrong results when working with empty tables.
+    distinctAggs.exists(_.filter.isDefined) || (groupingExpressions.isEmpty &&
+      distinctAggs.exists(_.aggregateFunction.children.forall(_.foldable)))
+  }
 
   private def mayNeedtoRewrite(a: Aggregate): Boolean = {
     val aggExpressions = collectAggregateExprs(a)
@@ -204,8 +215,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     // We need at least two distinct aggregates or the single distinct aggregate group exists filter
     // clause for this rule because aggregation strategy can handle a single distinct aggregate
     // group without filter clause.
-    // This check can produce false-positives, e.g., SUM(DISTINCT a) & COUNT(DISTINCT a).
-    distinctAggs.size > 1 || distinctAggs.exists(_.filter.isDefined)
+    distinctAggs.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)
   }
 
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformUpWithPruning(
@@ -236,7 +246,7 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
     }
 
     // Aggregation strategy can handle queries with a single distinct group without filter clause.
-    if (distinctAggGroups.size > 1 || distinctAggs.exists(_.filter.isDefined)) {
+    if (distinctAggGroups.size > 1 || mustRewrite(distinctAggs, a.groupingExpressions)) {
       // Create the attributes for the grouping id and the group by clause.
       val gid = AttributeReference("gid", IntegerType, nullable = false)()
       val groupByMap = a.groupingExpressions.collect {
@@ -390,13 +400,14 @@ object RewriteDistinctAggregates extends Rule[LogicalPlan] {
         (distinctAggOperatorMap.flatMap(_._2) ++
           regularAggOperatorMap.map(e => (e._1, e._3))).toMap
 
+      val groupByMapNonFoldable = groupByMap.filter(!_._1.foldable)
       val patchedAggExpressions = a.aggregateExpressions.map { e =>
         e.transformDown {
           case e: Expression =>
             // The same GROUP BY clauses can have different forms (different names for instance) in
             // the groupBy and aggregate expressions of an aggregate. This makes a map lookup
             // tricky. So we do a linear search for a semantically equal group by expression.
-            groupByMap
+            groupByMapNonFoldable
               .find(ge => e.semanticEquals(ge._1))
               .map(_._2)
               .getOrElse(transformations.getOrElse(e, e))
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
index d0ee9f2d110d5..e867953bcf282 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/expressions.scala
@@ -79,7 +79,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
     // Fold expressions that are foldable.
     case e if e.foldable =>
       try {
-        Literal.create(e.eval(EmptyRow), e.dataType)
+        Literal.create(e.freshCopyIfContainsStatefulExpression().eval(EmptyRow), e.dataType)
       } catch {
         case NonFatal(_) if isConditionalBranch =>
           // When doing constant folding inside conditional expressions, we should not fail
@@ -90,7 +90,7 @@ object ConstantFolding extends Rule[LogicalPlan] {
       }
 
     // Don't replace ScalarSubquery if its plan is an aggregate that may suffer from a COUNT bug.
-    case s @ ScalarSubquery(_, _, _, _, _, mayHaveCountBug)
+    case s @ ScalarSubquery(_, _, _, _, _, mayHaveCountBug, _)
       if conf.getConf(SQLConf.DECORRELATE_SUBQUERY_PREVENT_CONSTANT_FOLDING_FOR_COUNT_BUG) &&
         mayHaveCountBug.nonEmpty && mayHaveCountBug.get =>
       s
@@ -244,11 +244,16 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
   }
 
   private def collectGroupingExpressions(plan: LogicalPlan): ExpressionSet = plan match {
-    case Aggregate(groupingExpressions, aggregateExpressions, child) =>
+    case Aggregate(groupingExpressions, aggregateExpressions, child, _) =>
       ExpressionSet.apply(groupingExpressions)
     case _ => ExpressionSet(Seq.empty)
   }
 
+  private def isSameInteger(expr: Expression, value: Int): Boolean = expr match {
+    case l: Literal => l.value == value
+    case _ => false
+  }
+
   def apply(plan: LogicalPlan): LogicalPlan = plan.transformWithPruning(
     _.containsPattern(BINARY_ARITHMETIC), ruleId) {
     case q: LogicalPlan =>
@@ -259,20 +264,32 @@ object ReorderAssociativeOperator extends Rule[LogicalPlan] {
       val groupingExpressionSet = collectGroupingExpressions(q)
       q.transformExpressionsDownWithPruning(_.containsPattern(BINARY_ARITHMETIC)) {
       case a @ Add(_, _, f) if a.deterministic && a.dataType.isInstanceOf[IntegralType] =>
-        val (foldables, others) = flattenAdd(a, groupingExpressionSet).partition(_.foldable)
-        if (foldables.size > 1) {
-          val foldableExpr = foldables.reduce((x, y) => Add(x, y, f))
-          val c = Literal.create(foldableExpr.eval(EmptyRow), a.dataType)
-          if (others.isEmpty) c else Add(others.reduce((x, y) => Add(x, y, f)), c, f)
+        val (literals, others) = flattenAdd(a, groupingExpressionSet)
+          .partition(_.isInstanceOf[Literal])
+        if (literals.nonEmpty) {
+          val literalExpr = literals.reduce((x, y) => Add(x, y, f))
+          if (others.isEmpty) {
+            literalExpr
+          } else if (isSameInteger(literalExpr, 0)) {
+            others.reduce((x, y) => Add(x, y, f))
+          } else {
+            Add(others.reduce((x, y) => Add(x, y, f)), literalExpr, f)
+          }
         } else {
           a
         }
       case m @ Multiply(_, _, f) if m.deterministic && m.dataType.isInstanceOf[IntegralType] =>
-        val (foldables, others) = flattenMultiply(m, groupingExpressionSet).partition(_.foldable)
-        if (foldables.size > 1) {
-          val foldableExpr = foldables.reduce((x, y) => Multiply(x, y, f))
-          val c = Literal.create(foldableExpr.eval(EmptyRow), m.dataType)
-          if (others.isEmpty) c else Multiply(others.reduce((x, y) => Multiply(x, y, f)), c, f)
+        val (literals, others) = flattenMultiply(m, groupingExpressionSet)
+          .partition(_.isInstanceOf[Literal])
+        if (literals.nonEmpty) {
+          val literalExpr = literals.reduce((x, y) => Multiply(x, y, f))
+          if (others.isEmpty || (isSameInteger(literalExpr, 0) && !m.nullable)) {
+            literalExpr
+          } else if (isSameInteger(literalExpr, 1)) {
+            others.reduce((x, y) => Multiply(x, y, f))
+          } else {
+            Multiply(others.reduce((x, y) => Multiply(x, y, f)), literalExpr, f)
+          }
         } else {
           m
         }
@@ -881,7 +898,7 @@ object NullPropagation extends Rule[LogicalPlan] {
 
       // Non-leaf NullIntolerant expressions will return null, if at least one of its children is
       // a null literal.
-      case e: NullIntolerant if e.children.exists(isNullLiteral) =>
+      case e if e.nullIntolerant && e.children.exists(isNullLiteral) =>
         Literal.create(null, e.dataType)
     }
   }
@@ -901,7 +918,7 @@ object NullDownPropagation extends Rule[LogicalPlan] {
   // Applying to `EqualTo` is too disruptive for [SPARK-32290] optimization, not supported for now.
   // If e has multiple children, the deterministic check is required because optimizing
   // IsNull(a > b) to Or(IsNull(a), IsNull(b)), for example, may cause skipping the evaluation of b
-  private def supportedNullIntolerant(e: NullIntolerant): Boolean = (e match {
+  private def supportedNullIntolerant(e: Expression): Boolean = (e match {
     case _: Not => true
     case _: GreaterThan | _: GreaterThanOrEqual | _: LessThan | _: LessThanOrEqual
       if e.deterministic => true
@@ -912,9 +929,9 @@ object NullDownPropagation extends Rule[LogicalPlan] {
     _.containsPattern(NULL_CHECK), ruleId) {
     case q: LogicalPlan => q.transformExpressionsDownWithPruning(
       _.containsPattern(NULL_CHECK), ruleId) {
-      case IsNull(e: NullIntolerant) if supportedNullIntolerant(e) =>
+      case IsNull(e) if e.nullIntolerant && supportedNullIntolerant(e) =>
         e.children.map(IsNull(_): Expression).reduceLeft(Or)
-      case IsNotNull(e: NullIntolerant) if supportedNullIntolerant(e) =>
+      case IsNotNull(e) if e.nullIntolerant && supportedNullIntolerant(e) =>
         e.children.map(IsNotNull(_): Expression).reduceLeft(And)
     }
   }
@@ -1007,7 +1024,7 @@ object FoldablePropagation extends Rule[LogicalPlan] {
           replaceFoldable(j.withNewChildren(newChildren).asInstanceOf[Join], foldableMap)
         val missDerivedAttrsSet: AttributeSet = AttributeSet(newJoin.joinType match {
           case _: InnerLike | LeftExistence(_) => Nil
-          case LeftOuter => newJoin.right.output
+          case LeftOuter | LeftSingle => newJoin.right.output
           case RightOuter => newJoin.left.output
           case FullOuter => newJoin.left.output ++ newJoin.right.output
           case _ => Nil
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
index 48753fbfe3267..a524acc19aea8 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/finishAnalysis.scala
@@ -84,23 +84,26 @@ object RewriteNonCorrelatedExists extends Rule[LogicalPlan] {
 object EvalInlineTables extends Rule[LogicalPlan] with CastSupport {
   override def apply(plan: LogicalPlan): LogicalPlan = {
     plan.transformDownWithSubqueriesAndPruning(_.containsPattern(INLINE_TABLE_EVAL)) {
-      case table: ResolvedInlineTable =>
-        val newRows: Seq[InternalRow] =
-          table.rows.map { row => InternalRow.fromSeq(row.map { e =>
-              try {
-                prepareForEval(e).eval()
-              } catch {
-                case NonFatal(ex) =>
-                  table.failAnalysis(
-                    errorClass = "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
-                    messageParameters = Map("sqlExpr" -> toSQLExpr(e)),
-                    cause = ex)
-              }})
-          }
-
-        LocalRelation(table.output, newRows)
+      case table: ResolvedInlineTable => eval(table)
     }
   }
+
+    def eval(table: ResolvedInlineTable): LocalRelation = {
+      val newRows: Seq[InternalRow] =
+        table.rows.map { row => InternalRow.fromSeq(row.map { e =>
+          try {
+            prepareForEval(e).eval()
+          } catch {
+            case NonFatal(ex) =>
+              table.failAnalysis(
+                errorClass = "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
+                messageParameters = Map("sqlExpr" -> toSQLExpr(e)),
+                cause = ex)
+          }})
+        }
+
+      LocalRelation(table.output, newRows)
+    }
 }
 
 /**
@@ -181,3 +184,10 @@ object SpecialDatetimeValues extends Rule[LogicalPlan] {
     }
   }
 }
+
+object ReplaceTranspose extends Rule[LogicalPlan] {
+  def apply(plan: LogicalPlan): LogicalPlan = plan.transform {
+    case t @ Transpose(output, data) =>
+      LocalRelation(output, data)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
index 9fc4873c248b5..5fb30e810649b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/joins.scala
@@ -211,17 +211,17 @@ object EliminateOuterJoin extends Rule[LogicalPlan] with PredicateHelper {
       val newJoinType = buildNewJoinType(f, j)
       if (j.joinType == newJoinType) f else Filter(condition, j.copy(joinType = newJoinType))
 
-    case a @ Aggregate(_, _, Join(left, _, LeftOuter, _, _))
+    case a @ Aggregate(_, _, Join(left, _, LeftOuter, _, _), _)
         if a.references.subsetOf(left.outputSet) && allDuplicateAgnostic(a) =>
       a.copy(child = left)
-    case a @ Aggregate(_, _, Join(_, right, RightOuter, _, _))
+    case a @ Aggregate(_, _, Join(_, right, RightOuter, _, _), _)
         if a.references.subsetOf(right.outputSet) && allDuplicateAgnostic(a) =>
       a.copy(child = right)
-    case a @ Aggregate(_, _, p @ Project(projectList, Join(left, _, LeftOuter, _, _)))
+    case a @ Aggregate(_, _, p @ Project(projectList, Join(left, _, LeftOuter, _, _)), _)
         if projectList.forall(_.deterministic) && p.references.subsetOf(left.outputSet) &&
           allDuplicateAgnostic(a) =>
       a.copy(child = p.copy(child = left))
-    case a @ Aggregate(_, _, p @ Project(projectList, Join(_, right, RightOuter, _, _)))
+    case a @ Aggregate(_, _, p @ Project(projectList, Join(_, right, RightOuter, _, _)), _)
         if projectList.forall(_.deterministic) && p.references.subsetOf(right.outputSet) &&
           allDuplicateAgnostic(a) =>
       a.copy(child = p.copy(child = right))
@@ -339,8 +339,8 @@ trait JoinSelectionHelper extends Logging {
     )
   }
 
-  def getBroadcastNestedLoopJoinBuildSide(hint: JoinHint): Option[BuildSide] = {
-    if (hintToNotBroadcastAndReplicateLeft(hint)) {
+  def getBroadcastNestedLoopJoinBuildSide(hint: JoinHint, joinType: JoinType): Option[BuildSide] = {
+    if (hintToNotBroadcastAndReplicateLeft(hint) || joinType == LeftSingle) {
       Some(BuildRight)
     } else if (hintToNotBroadcastAndReplicateRight(hint)) {
       Some(BuildLeft)
@@ -375,7 +375,7 @@ trait JoinSelectionHelper extends Logging {
 
   def canBuildBroadcastRight(joinType: JoinType): Boolean = {
     joinType match {
-      case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin => true
+      case _: InnerLike | LeftOuter | LeftSingle | LeftSemi | LeftAnti | _: ExistenceJoin => true
       case _ => false
     }
   }
@@ -389,7 +389,7 @@ trait JoinSelectionHelper extends Logging {
 
   def canBuildShuffledHashJoinRight(joinType: JoinType): Boolean = {
     joinType match {
-      case _: InnerLike | LeftOuter | FullOuter | RightOuter |
+      case _: InnerLike | LeftOuter | LeftSingle | FullOuter | RightOuter |
            LeftSemi | LeftAnti | _: ExistenceJoin => true
       case _ => false
     }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
index c97e0aedf8c63..5a4e9f37c3951 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/subquery.scala
@@ -421,7 +421,7 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
         } else {
           p
         }
-      case a @ Aggregate(grouping, expressions, child) =>
+      case a @ Aggregate(grouping, expressions, child, _) =>
         val referencesToAdd = missingReferences(a)
         if (referencesToAdd.nonEmpty) {
           Aggregate(grouping ++ referencesToAdd, expressions ++ referencesToAdd, child)
@@ -456,6 +456,31 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
     (newPlan, newCond)
   }
 
+   // Returns true if 'query' is guaranteed to return at most 1 row.
+   private def guaranteedToReturnOneRow(query: LogicalPlan): Boolean = {
+     if (query.maxRows.exists(_ <= 1)) {
+       return true
+     }
+     val aggNode = query match {
+       case havingPart@Filter(_, aggPart: Aggregate) => Some(aggPart)
+       case aggPart: Aggregate => Some(aggPart)
+       // LIMIT 1 is handled above, this is for all other types of LIMITs
+       case Limit(_, aggPart: Aggregate) => Some(aggPart)
+       case Project(_, aggPart: Aggregate) => Some(aggPart)
+       case _: LogicalPlan => None
+     }
+     if (!aggNode.isDefined) {
+       return false
+     }
+     val aggregates = aggNode.get.expressions.flatMap(_.collect {
+       case a: AggregateExpression => a
+     })
+     if (aggregates.isEmpty) {
+       return false
+     }
+     nonEquivalentGroupbyCols(query, aggNode.get).isEmpty
+   }
+
   private def rewriteSubQueries(plan: LogicalPlan): LogicalPlan = {
     /**
      * This function is used as a aid to enforce idempotency of pullUpCorrelatedPredicate rule.
@@ -481,7 +506,8 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
     }
 
     plan.transformExpressionsWithPruning(_.containsPattern(PLAN_EXPRESSION)) {
-      case ScalarSubquery(sub, children, exprId, conditions, hint, mayHaveCountBugOld)
+      case ScalarSubquery(sub, children, exprId, conditions, hint,
+      mayHaveCountBugOld, needSingleJoinOld)
         if children.nonEmpty =>
 
         def mayHaveCountBugAgg(a: Aggregate): Boolean = {
@@ -527,8 +553,13 @@ object PullupCorrelatedPredicates extends Rule[LogicalPlan] with PredicateHelper
           val (topPart, havingNode, aggNode) = splitSubquery(sub)
           (aggNode.isDefined && aggNode.get.groupingExpressions.isEmpty)
         }
+        val needSingleJoin = if (needSingleJoinOld.isDefined) {
+          needSingleJoinOld.get
+        } else {
+          conf.getConf(SQLConf.SCALAR_SUBQUERY_USE_SINGLE_JOIN) && !guaranteedToReturnOneRow(sub)
+        }
         ScalarSubquery(newPlan, children, exprId, getJoinCondition(newCond, conditions),
-          hint, Some(mayHaveCountBug))
+          hint, Some(mayHaveCountBug), Some(needSingleJoin))
       case Exists(sub, children, exprId, conditions, hint) if children.nonEmpty =>
         val (newPlan, newCond) = if (SQLConf.get.decorrelateInnerQueryEnabledForExistsIn) {
           decorrelate(sub, plan, handleCountBug = true)
@@ -786,7 +817,8 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
       subqueries: ArrayBuffer[ScalarSubquery]): (LogicalPlan, AttributeMap[Attribute]) = {
     val subqueryAttrMapping = ArrayBuffer[(Attribute, Attribute)]()
     val newChild = subqueries.foldLeft(child) {
-      case (currentChild, ScalarSubquery(sub, _, _, conditions, subHint, mayHaveCountBug)) =>
+      case (currentChild, ScalarSubquery(sub, _, _, conditions, subHint, mayHaveCountBug,
+      needSingleJoin)) =>
         val query = DecorrelateInnerQuery.rewriteDomainJoins(currentChild, sub, conditions)
         val origOutput = query.output.head
         // The subquery appears on the right side of the join, hence add its hint to the right
@@ -794,14 +826,21 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
         val joinHint = JoinHint(None, subHint)
 
         val resultWithZeroTups = evalSubqueryOnZeroTups(query)
+        val joinType = needSingleJoin match {
+          case Some(true) => LeftSingle
+          case _ => LeftOuter
+        }
         lazy val planWithoutCountBug = Project(
           currentChild.output :+ origOutput,
-          Join(currentChild, query, LeftOuter, conditions.reduceOption(And), joinHint))
+          Join(currentChild, query, joinType, conditions.reduceOption(And), joinHint))
 
         if (Utils.isTesting) {
           assert(mayHaveCountBug.isDefined)
         }
-        if (resultWithZeroTups.isEmpty) {
+        if (!SQLConf.get.legacyDuplicateBetweenInput && currentChild.output.contains(origOutput)) {
+          // If we had multiple of the same scalar subqueries they will resolve to the same aliases.
+          currentChild
+        } else if (resultWithZeroTups.isEmpty) {
           // CASE 1: Subquery guaranteed not to have the COUNT bug because it evaluates to NULL
           // with zero tuples.
           planWithoutCountBug
@@ -842,7 +881,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
                 currentChild.output :+ subqueryResultExpr,
                 Join(currentChild,
                   Project(query.output :+ alwaysTrueExpr, query),
-                  LeftOuter, conditions.reduceOption(And), joinHint))
+                  joinType, conditions.reduceOption(And), joinHint))
 
             } else {
               // CASE 3: Subquery with HAVING clause. Pull the HAVING clause above the join.
@@ -874,7 +913,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
                 currentChild.output :+ caseExpr,
                 Join(currentChild,
                   Project(subqueryRoot.output :+ alwaysTrueExpr, subqueryRoot),
-                  LeftOuter, conditions.reduceOption(And), joinHint))
+                  joinType, conditions.reduceOption(And), joinHint))
             }
           }
         }
@@ -913,7 +952,7 @@ object RewriteCorrelatedScalarSubquery extends Rule[LogicalPlan] with AliasHelpe
    * subqueries.
    */
   def apply(plan: LogicalPlan): LogicalPlan = plan transformUpWithNewOutput {
-    case a @ Aggregate(grouping, expressions, child) =>
+    case a @ Aggregate(grouping, expressions, child, _) =>
       val subqueries = ArrayBuffer.empty[ScalarSubquery]
       val rewriteExprs = expressions.map(extractCorrelatedScalarSubqueries(_, subqueries))
       if (subqueries.nonEmpty) {
@@ -1025,7 +1064,7 @@ object OptimizeOneRowRelationSubquery extends Rule[LogicalPlan] {
 
     case p: LogicalPlan => p.transformExpressionsUpWithPruning(
       _.containsPattern(SCALAR_SUBQUERY)) {
-      case s @ ScalarSubquery(OneRowSubquery(p @ Project(_, _: OneRowRelation)), _, _, _, _, _)
+      case s @ ScalarSubquery(OneRowSubquery(p @ Project(_, _: OneRowRelation)), _, _, _, _, _, _)
           if !hasCorrelatedSubquery(s.plan) && s.joinCond.isEmpty =>
         assert(p.projectList.size == 1)
         stripOuterReferences(p.projectList).head
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
index 1c477964a6890..1928d4b23349b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AbstractSqlParser.scala
@@ -21,7 +21,7 @@ import org.antlr.v4.runtime.ParserRuleContext
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.parser.ParserUtils.withOrigin
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{CompoundPlanStatement, LogicalPlan}
 import org.apache.spark.sql.catalyst.trees.Origin
 import org.apache.spark.sql.errors.QueryParsingErrors
 
@@ -80,9 +80,10 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
 
   /** Creates LogicalPlan for a given SQL string. */
   override def parsePlan(sqlText: String): LogicalPlan = parse(sqlText) { parser =>
-    val ctx = parser.singleStatement()
+    val ctx = parser.compoundOrSingleStatement()
     withErrorHandling(ctx, Some(sqlText)) {
-      astBuilder.visitSingleStatement(ctx) match {
+      astBuilder.visitCompoundOrSingleStatement(ctx) match {
+        case compoundBody: CompoundPlanStatement => compoundBody
         case plan: LogicalPlan => plan
         case _ =>
           val position = Origin(None, None)
@@ -91,19 +92,6 @@ abstract class AbstractSqlParser extends AbstractParser with ParserInterface {
     }
   }
 
-  /** Creates [[CompoundBody]] for a given SQL script string. */
-  override def parseScript(sqlScriptText: String): CompoundBody = parse(sqlScriptText) { parser =>
-    val ctx = parser.compoundOrSingleStatement()
-    withErrorHandling(ctx, Some(sqlScriptText)) {
-      astBuilder.visitCompoundOrSingleStatement(ctx) match {
-        case body: CompoundBody => body
-        case _ =>
-          val position = Origin(None, None)
-          throw QueryParsingErrors.sqlStatementUnsupportedError(sqlScriptText, position)
-      }
-    }
-  }
-
   def withErrorHandling[T](ctx: ParserRuleContext, sqlText: Option[String])(toResult: => T): T = {
     withOrigin(ctx, sqlText) {
       try {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
index f5cf3e717a3ce..3d74e9d314d57 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala
@@ -24,14 +24,14 @@ import scala.collection.mutable.{ArrayBuffer, ListBuffer, Set}
 import scala.jdk.CollectionConverters._
 import scala.util.{Left, Right}
 
-import org.antlr.v4.runtime.{ParserRuleContext, Token}
+import org.antlr.v4.runtime.{ParserRuleContext, RuleContext, Token}
 import org.antlr.v4.runtime.misc.Interval
 import org.antlr.v4.runtime.tree.{ParseTree, RuleNode, TerminalNode}
 
 import org.apache.spark.{SparkArithmeticException, SparkException, SparkIllegalArgumentException, SparkThrowable}
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.PARTITION_SPECIFICATION
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, SQLConfHelper, TableIdentifier}
+import org.apache.spark.sql.catalyst.{EvaluateUnresolvedInlineTable, FunctionIdentifier, SQLConfHelper, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry.FUNC_ALIAS
 import org.apache.spark.sql.catalyst.catalog.{BucketSpec, CatalogStorageFormat, ClusterBySpec}
@@ -40,16 +40,15 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{AnyValue, First, Las
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.CurrentOrigin
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
 import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils, IntervalUtils, SparkParserUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeUtils.{convertSpecialDate, convertSpecialTimestamp, convertSpecialTimestampNTZ, getZoneId, stringToDate, stringToTimestamp, stringToTimestampWithoutTimeZone}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsNamespaces, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, DaysTransform, Expression => V2Expression, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
-import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
-import org.apache.spark.sql.errors.DataTypeErrors.toSQLStmt
+import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors, QueryParsingErrors, SqlScriptingErrors}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.LEGACY_BANG_EQUALS_NOT
 import org.apache.spark.sql.types._
@@ -62,7 +61,8 @@ import org.apache.spark.util.random.RandomSampler
  * The AstBuilder converts an ANTLR4 ParseTree into a catalyst Expression, LogicalPlan or
  * TableIdentifier.
  */
-class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
+class AstBuilder extends DataTypeAstBuilder
+  with SQLConfHelper with Logging with DataTypeErrorsBase {
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
   import ParserUtils._
 
@@ -71,20 +71,34 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       builder: Seq[String] => LogicalPlan): LogicalPlan = {
     val exprCtx = ctx.expression
     if (exprCtx != null) {
-      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, Nil,
+        (ident, _) => builder(ident))
     } else {
       builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier))
     }
   }
 
+  protected def withIdentClause(
+      ctx: IdentifierReferenceContext,
+      otherPlans: Seq[LogicalPlan],
+      builder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan): LogicalPlan = {
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, otherPlans, builder)
+    } else {
+      builder.apply(visitMultipartIdentifier(ctx.multipartIdentifier), otherPlans)
+    }
+  }
+
   protected def withFuncIdentClause(
       ctx: FunctionNameContext,
-      builder: Seq[String] => LogicalPlan): LogicalPlan = {
+      otherPlans: Seq[LogicalPlan],
+      builder: (Seq[String], Seq[LogicalPlan]) => LogicalPlan): LogicalPlan = {
     val exprCtx = ctx.expression
     if (exprCtx != null) {
-      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, builder)
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, otherPlans, builder)
     } else {
-      builder.apply(getFunctionMultiparts(ctx))
+      builder.apply(getFunctionMultiparts(ctx), otherPlans)
     }
   }
 
@@ -117,68 +131,313 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   override def visitCompoundOrSingleStatement(
-      ctx: CompoundOrSingleStatementContext): CompoundBody = withOrigin(ctx) {
+      ctx: CompoundOrSingleStatementContext): LogicalPlan = withOrigin(ctx) {
     Option(ctx.singleCompoundStatement()).map { s =>
+      if (!conf.getConf(SQLConf.SQL_SCRIPTING_ENABLED)) {
+        throw SqlScriptingErrors.sqlScriptingNotEnabled(CurrentOrigin.get)
+      }
       visit(s).asInstanceOf[CompoundBody]
     }.getOrElse {
-      val logicalPlan = visitSingleStatement(ctx.singleStatement())
-      CompoundBody(Seq(SingleStatement(parsedPlan = logicalPlan)),
-        Some(java.util.UUID.randomUUID.toString.toLowerCase(Locale.ROOT)))
+      visitSingleStatement(ctx.singleStatement())
     }
   }
 
   override def visitSingleCompoundStatement(ctx: SingleCompoundStatementContext): CompoundBody = {
-    visit(ctx.beginEndCompoundBlock()).asInstanceOf[CompoundBody]
+    val labelCtx = new SqlScriptingLabelContext()
+    visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = true, labelCtx)
   }
 
   private def visitCompoundBodyImpl(
       ctx: CompoundBodyContext,
-      label: Option[String]): CompoundBody = {
+      label: Option[String],
+      allowVarDeclare: Boolean,
+      labelCtx: SqlScriptingLabelContext): CompoundBody = {
     val buff = ListBuffer[CompoundPlanStatement]()
-    ctx.compoundStatements.forEach(compoundStatement => {
-      buff += visit(compoundStatement).asInstanceOf[CompoundPlanStatement]
-    })
+    ctx.compoundStatements.forEach(
+      compoundStatement => buff += visitCompoundStatementImpl(compoundStatement, labelCtx))
 
-    CompoundBody(buff.toSeq, label)
-  }
+    val compoundStatements = buff.toList
+
+    val candidates = if (allowVarDeclare) {
+      compoundStatements.dropWhile {
+        case SingleStatement(_: CreateVariable) => true
+        case _ => false
+      }
+    } else {
+      compoundStatements
+    }
 
-  override def visitBeginEndCompoundBlock(ctx: BeginEndCompoundBlockContext): CompoundBody = {
-    val beginLabelCtx = Option(ctx.beginLabel())
-    val endLabelCtx = Option(ctx.endLabel())
+    val declareVarStatement = candidates.collectFirst {
+      case SingleStatement(c: CreateVariable) => c
+    }
 
-    (beginLabelCtx, endLabelCtx) match {
-      case (Some(bl: BeginLabelContext), Some(el: EndLabelContext))
-        if bl.multipartIdentifier().getText.nonEmpty &&
-          bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT) !=
-              el.multipartIdentifier().getText.toLowerCase(Locale.ROOT) =>
-        throw SqlScriptingErrors.labelsMismatch(
-          bl.multipartIdentifier().getText, el.multipartIdentifier().getText)
-      case (None, Some(el: EndLabelContext)) =>
-        throw SqlScriptingErrors.endLabelWithoutBeginLabel(el.multipartIdentifier().getText)
+    declareVarStatement match {
+      case Some(c: CreateVariable) =>
+        if (allowVarDeclare) {
+          throw SqlScriptingErrors.variableDeclarationOnlyAtBeginning(
+            c.origin, c.name.asInstanceOf[UnresolvedIdentifier].nameParts)
+        } else {
+          throw SqlScriptingErrors.variableDeclarationNotAllowedInScope(
+            c.origin, c.name.asInstanceOf[UnresolvedIdentifier].nameParts)
+        }
       case _ =>
     }
 
-    val labelText = beginLabelCtx.
-      map(_.multipartIdentifier().getText).getOrElse(java.util.UUID.randomUUID.toString).
-      toLowerCase(Locale.ROOT)
-    visitCompoundBodyImpl(ctx.compoundBody(), Some(labelText))
+    CompoundBody(buff.toSeq, label)
   }
 
-  override def visitCompoundBody(ctx: CompoundBodyContext): CompoundBody = {
-    visitCompoundBodyImpl(ctx, None)
+  private def visitBeginEndCompoundBlockImpl(
+      ctx: BeginEndCompoundBlockContext,
+      labelCtx: SqlScriptingLabelContext): CompoundBody = {
+    val labelText =
+      labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+    val body = visitCompoundBodyImpl(
+      ctx.compoundBody(),
+      Some(labelText),
+      allowVarDeclare = true,
+      labelCtx
+    )
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+    body
   }
 
-  override def visitCompoundStatement(ctx: CompoundStatementContext): CompoundPlanStatement =
+  private def visitCompoundStatementImpl(
+      ctx: CompoundStatementContext,
+      labelCtx: SqlScriptingLabelContext): CompoundPlanStatement =
     withOrigin(ctx) {
       Option(ctx.statement().asInstanceOf[ParserRuleContext])
         .orElse(Option(ctx.setStatementWithOptionalVarKeyword().asInstanceOf[ParserRuleContext]))
         .map { s =>
           SingleStatement(parsedPlan = visit(s).asInstanceOf[LogicalPlan])
         }.getOrElse {
-          visit(ctx.beginEndCompoundBlock()).asInstanceOf[CompoundPlanStatement]
+          if (ctx.getChildCount == 1) {
+            ctx.getChild(0) match {
+              case compoundBodyContext: BeginEndCompoundBlockContext =>
+                visitBeginEndCompoundBlockImpl(compoundBodyContext, labelCtx)
+              case whileStmtContext: WhileStatementContext =>
+                visitWhileStatementImpl(whileStmtContext, labelCtx)
+              case repeatStmtContext: RepeatStatementContext =>
+                visitRepeatStatementImpl(repeatStmtContext, labelCtx)
+              case loopStatementContext: LoopStatementContext =>
+                visitLoopStatementImpl(loopStatementContext, labelCtx)
+              case ifElseStmtContext: IfElseStatementContext =>
+                visitIfElseStatementImpl(ifElseStmtContext, labelCtx)
+              case searchedCaseContext: SearchedCaseStatementContext =>
+                visitSearchedCaseStatementImpl(searchedCaseContext, labelCtx)
+              case simpleCaseContext: SimpleCaseStatementContext =>
+                visitSimpleCaseStatementImpl(simpleCaseContext, labelCtx)
+              case forStatementContext: ForStatementContext =>
+                visitForStatementImpl(forStatementContext, labelCtx)
+              case stmt => visit(stmt).asInstanceOf[CompoundPlanStatement]
+            }
+          } else {
+            null
+          }
         }
     }
 
+  private def visitIfElseStatementImpl(
+      ctx: IfElseStatementContext,
+      labelCtx: SqlScriptingLabelContext): IfElseStatement = {
+    IfElseStatement(
+      conditions = ctx.booleanExpression().asScala.toList.map(boolExpr => withOrigin(boolExpr) {
+        SingleStatement(
+          Project(
+            Seq(Alias(expression(boolExpr), "condition")()),
+            OneRowRelation()))
+      }),
+      conditionalBodies = ctx.conditionalBodies.asScala.toList.map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      ),
+      elseBody = Option(ctx.elseBody).map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      )
+    )
+  }
+
+  private def visitWhileStatementImpl(
+      ctx: WhileStatementContext,
+      labelCtx: SqlScriptingLabelContext): WhileStatement = {
+    val labelText =
+      labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+    val boolExpr = ctx.booleanExpression()
+
+    val condition = withOrigin(boolExpr) {
+      SingleStatement(
+        Project(
+          Seq(Alias(expression(boolExpr), "condition")()),
+          OneRowRelation()))}
+    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+
+    WhileStatement(condition, body, Some(labelText))
+  }
+
+  private def visitSearchedCaseStatementImpl(
+      ctx: SearchedCaseStatementContext,
+      labelCtx: SqlScriptingLabelContext): CaseStatement = {
+    val conditions = ctx.conditions.asScala.toList.map(boolExpr => withOrigin(boolExpr) {
+      SingleStatement(
+        Project(
+          Seq(Alias(expression(boolExpr), "condition")()),
+          OneRowRelation()))
+    })
+    val conditionalBodies =
+      ctx.conditionalBodies.asScala.toList.map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      )
+
+    if (conditions.length != conditionalBodies.length) {
+      throw SparkException.internalError(
+        s"Mismatched number of conditions ${conditions.length} and condition bodies" +
+          s" ${conditionalBodies.length} in case statement")
+    }
+
+    CaseStatement(
+      conditions = conditions,
+      conditionalBodies = conditionalBodies,
+      elseBody = Option(ctx.elseBody).map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      ))
+  }
+
+  private def visitSimpleCaseStatementImpl(
+      ctx: SimpleCaseStatementContext,
+      labelCtx: SqlScriptingLabelContext): CaseStatement = {
+    // uses EqualTo to compare the case variable(the main case expression)
+    // to the WHEN clause expressions
+    val conditions = ctx.conditionExpressions.asScala.toList.map(expr => withOrigin(expr) {
+      SingleStatement(
+        Project(
+          Seq(Alias(EqualTo(expression(ctx.caseVariable), expression(expr)), "condition")()),
+          OneRowRelation()))
+    })
+    val conditionalBodies =
+      ctx.conditionalBodies.asScala.toList.map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      )
+
+    if (conditions.length != conditionalBodies.length) {
+      throw SparkException.internalError(
+        s"Mismatched number of conditions ${conditions.length} and condition bodies" +
+          s" ${conditionalBodies.length} in case statement")
+    }
+
+    CaseStatement(
+      conditions = conditions,
+      conditionalBodies = conditionalBodies,
+      elseBody = Option(ctx.elseBody).map(
+        body => visitCompoundBodyImpl(body, None, allowVarDeclare = false, labelCtx)
+      ))
+  }
+
+  private def visitRepeatStatementImpl(
+      ctx: RepeatStatementContext,
+      labelCtx: SqlScriptingLabelContext): RepeatStatement = {
+    val labelText =
+      labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+    val boolExpr = ctx.booleanExpression()
+
+    val condition = withOrigin(boolExpr) {
+      SingleStatement(
+        Project(
+          Seq(Alias(expression(boolExpr), "condition")()),
+          OneRowRelation()))}
+    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+
+    RepeatStatement(condition, body, Some(labelText))
+  }
+
+  private def visitForStatementImpl(
+      ctx: ForStatementContext,
+      labelCtx: SqlScriptingLabelContext): ForStatement = {
+    val labelText = labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+
+    val queryCtx = ctx.query()
+    val query = withOrigin(queryCtx) {
+      SingleStatement(visitQuery(queryCtx))
+    }
+    val varName = Option(ctx.multipartIdentifier()).map(_.getText)
+    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+
+    ForStatement(query, varName, body, Some(labelText))
+  }
+
+  private def leaveOrIterateContextHasLabel(
+      ctx: RuleContext, label: String, isIterate: Boolean): Boolean = {
+    ctx match {
+      case c: BeginEndCompoundBlockContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => if (isIterate) {
+          throw SqlScriptingErrors.invalidIterateLabelUsageForCompound(CurrentOrigin.get, label)
+        }
+        true
+      case c: WhileStatementContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
+      case c: RepeatStatementContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
+      case c: LoopStatementContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
+      case c: ForStatementContext
+        if Option(c.beginLabel()).exists { b =>
+          b.multipartIdentifier().getText.toLowerCase(Locale.ROOT).equals(label)
+        } => true
+      case _ => false
+    }
+  }
+
+  override def visitLeaveStatement(ctx: LeaveStatementContext): LeaveStatement =
+    withOrigin(ctx) {
+      val labelText = ctx.multipartIdentifier().getText.toLowerCase(Locale.ROOT)
+      var parentCtx = ctx.parent
+
+      while (Option(parentCtx).isDefined) {
+        if (leaveOrIterateContextHasLabel(parentCtx, labelText, isIterate = false)) {
+          return LeaveStatement(labelText)
+        }
+        parentCtx = parentCtx.parent
+      }
+
+      throw SqlScriptingErrors.labelDoesNotExist(
+        CurrentOrigin.get, labelText, "LEAVE")
+    }
+
+  override def visitIterateStatement(ctx: IterateStatementContext): IterateStatement =
+    withOrigin(ctx) {
+      val labelText = ctx.multipartIdentifier().getText.toLowerCase(Locale.ROOT)
+      var parentCtx = ctx.parent
+
+      while (Option(parentCtx).isDefined) {
+        if (leaveOrIterateContextHasLabel(parentCtx, labelText, isIterate = true)) {
+          return IterateStatement(labelText)
+        }
+        parentCtx = parentCtx.parent
+      }
+
+      throw SqlScriptingErrors.labelDoesNotExist(
+        CurrentOrigin.get, labelText, "ITERATE")
+    }
+
+  private def visitLoopStatementImpl(
+      ctx: LoopStatementContext,
+      labelCtx: SqlScriptingLabelContext): LoopStatement = {
+    val labelText =
+      labelCtx.enterLabeledScope(Option(ctx.beginLabel()), Option(ctx.endLabel()))
+    val body = visitCompoundBodyImpl(ctx.compoundBody(), None, allowVarDeclare = false, labelCtx)
+    labelCtx.exitLabeledScope(Option(ctx.beginLabel()))
+
+    LoopStatement(body, Some(labelText))
+  }
+
   override def visitSingleStatement(ctx: SingleStatementContext): LogicalPlan = withOrigin(ctx) {
     Option(ctx.statement().asInstanceOf[ParserRuleContext])
       .orElse(Option(ctx.setResetStatement().asInstanceOf[ParserRuleContext]))
@@ -223,7 +482,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Create a top-level plan with Common Table Expressions.
    */
   override def visitQuery(ctx: QueryContext): LogicalPlan = withOrigin(ctx) {
-    val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(withQueryResultClauses)
+    val query = plan(ctx.queryTerm).optionalMap(ctx.queryOrganization)(
+      withQueryResultClauses(_, _, forPipeOperators = false))
 
     // Apply CTEs
     query.optionalMap(ctx.ctes)(withCTE)
@@ -244,7 +504,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val duplicates = ctes.groupBy(_._1).filter(_._2.size > 1).keys
     if (duplicates.nonEmpty) {
       throw QueryParsingErrors.duplicateCteDefinitionNamesError(
-        duplicates.mkString("'", "', '", "'"), ctx)
+        duplicates.map(toSQLId).mkString(", "), ctx)
     }
     UnresolvedWith(plan, ctes.toSeq)
   }
@@ -275,7 +535,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         ctx.aggregationClause,
         ctx.havingClause,
         ctx.windowClause,
-        plan
+        plan,
+        isPipeOperatorSelect = false
       )
     }
   }
@@ -285,7 +546,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val selects = ctx.fromStatementBody.asScala.map { body =>
       withFromStatementBody(body, from).
         // Add organization statements.
-        optionalMap(body.queryOrganization)(withQueryResultClauses)
+        optionalMap(body.queryOrganization)(withQueryResultClauses(_, _, forPipeOperators = false))
     }
     // If there are multiple SELECT just UNION them together into one query.
     if (selects.length == 1) {
@@ -331,7 +592,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     val inserts = ctx.multiInsertQueryBody.asScala.map { body =>
       withInsertInto(body.insertInto,
         withFromStatementBody(body.fromStatementBody, from).
-          optionalMap(body.fromStatementBody.queryOrganization)(withQueryResultClauses))
+          optionalMap(body.fromStatementBody.queryOrganization)(
+            withQueryResultClauses(_, _, forPipeOperators = false)))
     }
 
     // If there are multiple INSERTS just UNION them together into one query.
@@ -352,10 +614,11 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
   /**
    * Parameters used for writing query to a table:
-   *   (table ident, tableColumnList, partitionKeys, ifPartitionNotExists, byName).
+   *   (table ident, options, tableColumnList, partitionKeys, ifPartitionNotExists, byName).
    */
   type InsertTableParams =
-    (IdentifierReferenceContext, Seq[String], Map[String, Option[String]], Boolean, Boolean)
+    (IdentifierReferenceContext, Option[OptionsClauseContext], Seq[String],
+      Map[String, Option[String]], Boolean, Boolean)
 
   /**
    * Parameters used for writing query to a directory: (isLocal, CatalogStorageFormat, provider).
@@ -382,36 +645,39 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       //   2. Write commands do not hold the table logical plan as a child, and we need to add
       //      additional resolution code to resolve identifiers inside the write commands.
       case table: InsertIntoTableContext =>
-        val (relationCtx, cols, partition, ifPartitionNotExists, byName)
+        val (relationCtx, options, cols, partition, ifPartitionNotExists, byName)
         = visitInsertIntoTable(table)
-        withIdentClause(relationCtx, ident => {
+        withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
-            createUnresolvedRelation(relationCtx, ident),
+            createUnresolvedRelation(relationCtx, ident, options, Seq(TableWritePrivilege.INSERT)),
             partition,
             cols,
-            query,
+            otherPlans.head,
             overwrite = false,
             ifPartitionNotExists,
             byName)
         })
       case table: InsertOverwriteTableContext =>
-        val (relationCtx, cols, partition, ifPartitionNotExists, byName)
+        val (relationCtx, options, cols, partition, ifPartitionNotExists, byName)
         = visitInsertOverwriteTable(table)
-        withIdentClause(relationCtx, ident => {
+        withIdentClause(relationCtx, Seq(query), (ident, otherPlans) => {
           InsertIntoStatement(
-            createUnresolvedRelation(relationCtx, ident),
+            createUnresolvedRelation(relationCtx, ident, options,
+              Seq(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)),
             partition,
             cols,
-            query,
+            otherPlans.head,
             overwrite = true,
             ifPartitionNotExists,
             byName)
         })
       case ctx: InsertIntoReplaceWhereContext =>
-        withIdentClause(ctx.identifierReference, ident => {
+        val options = Option(ctx.optionsClause())
+        withIdentClause(ctx.identifierReference, Seq(query), (ident, otherPlans) => {
           OverwriteByExpression.byPosition(
-            createUnresolvedRelation(ctx.identifierReference, ident),
-            query,
+            createUnresolvedRelation(ctx.identifierReference, ident, options,
+              Seq(TableWritePrivilege.INSERT, TableWritePrivilege.DELETE)),
+            otherPlans.head,
             expression(ctx.whereClause().booleanExpression()))
         })
       case dir: InsertOverwriteDirContext =>
@@ -439,7 +705,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       invalidStatement("INSERT INTO ... IF NOT EXISTS", ctx)
     }
 
-    (ctx.identifierReference, cols, partitionKeys, false, ctx.NAME() != null)
+    (ctx.identifierReference, Option(ctx.optionsClause()), cols, partitionKeys, false,
+      ctx.NAME() != null)
   }
 
   /**
@@ -459,7 +726,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         dynamicPartitionKeys.keys.mkString(", "), ctx)
     }
 
-    (ctx.identifierReference, cols, partitionKeys, ctx.EXISTS() != null, ctx.NAME() != null)
+    (ctx.identifierReference, Option(ctx.optionsClause()), cols, partitionKeys,
+      ctx.EXISTS() != null, ctx.NAME() != null)
   }
 
   /**
@@ -493,7 +761,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
   override def visitDeleteFromTable(
       ctx: DeleteFromTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = createUnresolvedRelation(ctx.identifierReference)
+    val table = createUnresolvedRelation(
+      ctx.identifierReference, writePrivileges = Seq(TableWritePrivilege.DELETE))
     val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "DELETE")
     val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
     val predicate = if (ctx.whereClause() != null) {
@@ -505,7 +774,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   override def visitUpdateTable(ctx: UpdateTableContext): LogicalPlan = withOrigin(ctx) {
-    val table = createUnresolvedRelation(ctx.identifierReference)
+    val table = createUnresolvedRelation(
+      ctx.identifierReference, writePrivileges = Seq(TableWritePrivilege.UPDATE))
     val tableAlias = getTableAliasWithoutColumnAlias(ctx.tableAlias(), "UPDATE")
     val aliasedTable = tableAlias.map(SubqueryAlias(_, table)).getOrElse(table)
     val assignments = withAssignments(ctx.setClause().assignmentList())
@@ -528,9 +798,6 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
   override def visitMergeIntoTable(ctx: MergeIntoTableContext): LogicalPlan = withOrigin(ctx) {
     val withSchemaEvolution = ctx.EVOLUTION() != null
-    val targetTable = createUnresolvedRelation(ctx.target)
-    val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
-    val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
 
     val sourceTableOrQuery = if (ctx.source != null) {
       createUnresolvedRelation(ctx.source)
@@ -561,7 +828,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.matchedAction().getText}")
         }
       }
-    }
+    }.toSeq
     val notMatchedActions = ctx.notMatchedClause().asScala.map {
       clause => {
         if (clause.notMatchedAction().INSERT() != null) {
@@ -582,7 +849,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.notMatchedAction().getText}")
         }
       }
-    }
+    }.toSeq
     val notMatchedBySourceActions = ctx.notMatchedBySourceClause().asScala.map {
       clause => {
         val notMatchedBySourceAction = clause.notMatchedBySourceAction()
@@ -597,7 +864,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             s"Unrecognized matched action: ${clause.notMatchedBySourceAction().getText}")
         }
       }
-    }
+    }.toSeq
     if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
       throw QueryParsingErrors.mergeStatementWithoutWhenClauseError(ctx)
     }
@@ -616,13 +883,19 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       throw QueryParsingErrors.nonLastNotMatchedBySourceClauseOmitConditionError(ctx)
     }
 
+    val targetTable = createUnresolvedRelation(
+      ctx.target,
+      writePrivileges = MergeIntoTable.getWritePrivileges(
+        matchedActions, notMatchedActions, notMatchedBySourceActions))
+    val targetTableAlias = getTableAliasWithoutColumnAlias(ctx.targetAlias, "MERGE")
+    val aliasedTarget = targetTableAlias.map(SubqueryAlias(_, targetTable)).getOrElse(targetTable)
     MergeIntoTable(
       aliasedTarget,
       aliasedSource,
       mergeCondition,
-      matchedActions.toSeq,
-      notMatchedActions.toSeq,
-      notMatchedBySourceActions.toSeq,
+      matchedActions,
+      notMatchedActions,
+      notMatchedBySourceActions,
       withSchemaEvolution)
   }
 
@@ -759,31 +1032,38 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   /**
    * Add ORDER BY/SORT BY/CLUSTER BY/DISTRIBUTE BY/LIMIT/WINDOWS clauses to the logical plan. These
    * clauses determine the shape (ordering/partitioning/rows) of the query result.
+   *
+   * If 'forPipeOperators' is true, throws an error if the WINDOW clause is present (since it breaks
+   * the composability of the pipe operators) or if more than one clause is present (this can be
+   * useful when parsing clauses used with pipe operations which only allow one instance of these
+   * clauses each).
    */
   private def withQueryResultClauses(
       ctx: QueryOrganizationContext,
-      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+      query: LogicalPlan,
+      forPipeOperators: Boolean): LogicalPlan = withOrigin(ctx) {
     import ctx._
+    var clause = ""
 
     // Handle ORDER BY, SORT BY, DISTRIBUTE BY, and CLUSTER BY clause.
     val withOrder = if (
       !order.isEmpty && sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) {
-      // ORDER BY ...
+      clause = PipeOperators.orderByClause
       Sort(order.asScala.map(visitSortItem).toSeq, global = true, query)
     } else if (order.isEmpty && !sort.isEmpty && distributeBy.isEmpty && clusterBy.isEmpty) {
-      // SORT BY ...
+      clause = PipeOperators.sortByClause
       Sort(sort.asScala.map(visitSortItem).toSeq, global = false, query)
     } else if (order.isEmpty && sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) {
-      // DISTRIBUTE BY ...
+      clause = PipeOperators.distributeByClause
       withRepartitionByExpression(ctx, expressionList(distributeBy), query)
     } else if (order.isEmpty && !sort.isEmpty && !distributeBy.isEmpty && clusterBy.isEmpty) {
-      // SORT BY ... DISTRIBUTE BY ...
+      clause = PipeOperators.sortByDistributeByClause
       Sort(
         sort.asScala.map(visitSortItem).toSeq,
         global = false,
         withRepartitionByExpression(ctx, expressionList(distributeBy), query))
     } else if (order.isEmpty && sort.isEmpty && distributeBy.isEmpty && !clusterBy.isEmpty) {
-      // CLUSTER BY ...
+      clause = PipeOperators.clusterByClause
       val expressions = expressionList(clusterBy)
       Sort(
         expressions.map(SortOrder(_, Ascending)),
@@ -797,17 +1077,33 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
 
     // WINDOWS
-    val withWindow = withOrder.optionalMap(windowClause)(withWindowClause)
+    val withWindow = withOrder.optionalMap(windowClause) {
+      withWindowClause(_, _, forPipeOperators)
+    }
+    if (forPipeOperators && windowClause != null) {
+      throw QueryParsingErrors.clausesWithPipeOperatorsUnsupportedError(
+        ctx, s"the ${PipeOperators.windowClause} clause")
+    }
 
     // OFFSET
     // - OFFSET 0 is the same as omitting the OFFSET clause
     val withOffset = withWindow.optional(offset) {
+      if (forPipeOperators && clause.nonEmpty) {
+        throw QueryParsingErrors.multipleQueryResultClausesWithPipeOperatorsUnsupportedError(
+          ctx, clause, PipeOperators.offsetClause)
+      }
+      clause = PipeOperators.offsetClause
       Offset(typedVisit(offset), withWindow)
     }
 
     // LIMIT
     // - LIMIT ALL is the same as omitting the LIMIT clause
     withOffset.optional(limit) {
+      if (forPipeOperators && clause.nonEmpty && clause != PipeOperators.offsetClause) {
+        throw QueryParsingErrors.multipleQueryResultClausesWithPipeOperatorsUnsupportedError(
+          ctx, clause, PipeOperators.limitClause)
+      }
+      clause = PipeOperators.limitClause
       Limit(typedVisit(limit), withOffset)
     }
   }
@@ -852,7 +1148,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       ctx.aggregationClause,
       ctx.havingClause,
       ctx.windowClause,
-      from
+      from,
+      isPipeOperatorSelect = false
     )
   }
 
@@ -939,7 +1236,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       aggregationClause,
       havingClause,
       windowClause,
-      isDistinct = false)
+      isDistinct = false,
+      isPipeOperatorSelect = false)
 
     ScriptTransformation(
       string(visitStringLit(transformClause.script)),
@@ -960,6 +1258,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Add a regular (SELECT) query specification to a logical plan. The query specification
    * is the core of the logical plan, this is where sourcing (FROM clause), projection (SELECT),
    * aggregation (GROUP BY ... HAVING ...) and filtering (WHERE) takes place.
+   * If 'isPipeOperatorSelect' is true, wraps each projected expression with a [[PipeExpression]]
+   * expression for future validation of the expressions during analysis.
    *
    * Note that query hints are ignored (both by the parser and the builder).
    */
@@ -971,7 +1271,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       aggregationClause: AggregationClauseContext,
       havingClause: HavingClauseContext,
       windowClause: WindowClauseContext,
-      relation: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+      relation: LogicalPlan,
+      isPipeOperatorSelect: Boolean): LogicalPlan = withOrigin(ctx) {
     val isDistinct = selectClause.setQuantifier() != null &&
       selectClause.setQuantifier().DISTINCT() != null
 
@@ -983,7 +1284,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       aggregationClause,
       havingClause,
       windowClause,
-      isDistinct)
+      isDistinct,
+      isPipeOperatorSelect)
 
     // Hint
     selectClause.hints.asScala.foldRight(plan)(withHints)
@@ -997,7 +1299,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       aggregationClause: AggregationClauseContext,
       havingClause: HavingClauseContext,
       windowClause: WindowClauseContext,
-      isDistinct: Boolean): LogicalPlan = {
+      isDistinct: Boolean,
+      isPipeOperatorSelect: Boolean): LogicalPlan = {
     // Add lateral views.
     val withLateralView = lateralView.asScala.foldLeft(relation)(withGenerate)
 
@@ -1011,7 +1314,21 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
 
     def createProject() = if (namedExpressions.nonEmpty) {
-      Project(namedExpressions, withFilter)
+      val newProjectList: Seq[NamedExpression] = if (isPipeOperatorSelect) {
+        // If this is a pipe operator |> SELECT clause, add a [[PipeExpression]] wrapping
+        // each alias in the project list, so the analyzer can check invariants later.
+        namedExpressions.map {
+          case a: Alias =>
+            a.withNewChildren(Seq(
+                PipeExpression(a.child, isAggregate = false, PipeOperators.selectClause)))
+              .asInstanceOf[NamedExpression]
+          case other =>
+            other
+        }
+      } else {
+        namedExpressions
+      }
+      Project(newProjectList, withFilter)
     } else {
       withFilter
     }
@@ -1029,7 +1346,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
         withHavingClause(havingClause, Aggregate(Nil, namedExpressions, withFilter))
       }
     } else if (aggregationClause != null) {
-      val aggregate = withAggregationClause(aggregationClause, namedExpressions, withFilter)
+      val aggregate = withAggregationClause(
+        aggregationClause, namedExpressions, withFilter, allowNamedGroupingExpressions = false)
       aggregate.optionalMap(havingClause)(withHavingClause)
     } else {
       // When hitting this branch, `having` must be null.
@@ -1044,7 +1362,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     }
 
     // Window
-    val withWindow = withDistinct.optionalMap(windowClause)(withWindowClause)
+    val withWindow = withDistinct.optionalMap(windowClause) {
+      withWindowClause(_, _, isPipeOperatorSelect)
+    }
 
     withWindow
   }
@@ -1170,10 +1490,13 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * - INTERSECT [DISTINCT | ALL]
    */
   override def visitSetOperation(ctx: SetOperationContext): LogicalPlan = withOrigin(ctx) {
-    val left = plan(ctx.left)
-    val right = plan(ctx.right)
     val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
-    ctx.operator.getType match {
+    visitSetOperationImpl(plan(ctx.left), plan(ctx.right), all, ctx.operator.getType)
+  }
+
+  private def visitSetOperationImpl(
+      left: LogicalPlan, right: LogicalPlan, all: Boolean, operatorType: Int): LogicalPlan = {
+    operatorType match {
       case SqlBaseParser.UNION if all =>
         Union(left, right)
       case SqlBaseParser.UNION =>
@@ -1198,7 +1521,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   private def withWindowClause(
       ctx: WindowClauseContext,
-      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+      query: LogicalPlan,
+      forPipeSQL: Boolean): LogicalPlan = withOrigin(ctx) {
     // Collect all window specifications defined in the WINDOW clause.
     val baseWindowTuples = ctx.namedWindow.asScala.map {
       wCtx =>
@@ -1230,7 +1554,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
     // Note that mapValues creates a view instead of materialized map. We force materialization by
     // mapping over identity.
-    WithWindowDefinition(windowMapView.map(identity), query)
+    WithWindowDefinition(windowMapView.map(identity), query, forPipeSQL)
   }
 
   /**
@@ -1239,9 +1563,27 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   private def withAggregationClause(
       ctx: AggregationClauseContext,
       selectExpressions: Seq[NamedExpression],
-      query: LogicalPlan): LogicalPlan = withOrigin(ctx) {
+      query: LogicalPlan,
+      allowNamedGroupingExpressions: Boolean): LogicalPlan = withOrigin(ctx) {
     if (ctx.groupingExpressionsWithGroupingAnalytics.isEmpty) {
-      val groupByExpressions = expressionList(ctx.groupingExpressions)
+      val groupByExpressions: Seq[Expression] =
+        ctx.groupingExpressions.asScala.map { n: NamedExpressionContext =>
+          if (!allowNamedGroupingExpressions && (n.name != null || n.identifierList != null)) {
+            // If we do not allow grouping expressions to have aliases in this context, we throw a
+            // syntax error here accordingly.
+            val error: String = (if (n.name != null) n.name else n.identifierList).getText
+            throw new ParseException(
+              command = Some(SparkParserUtils.command(n)),
+              start = Origin(),
+              stop = Origin(),
+              errorClass = "PARSE_SYNTAX_ERROR",
+              messageParameters = Map(
+                "error" -> s"'$error'",
+                "hint" -> s": extra input '$error'"),
+              queryContext = Array.empty)
+          }
+          visitNamedExpression(n)
+        }.toSeq
       if (ctx.GROUPING != null) {
         // GROUP BY ... GROUPING SETS (...)
         // `groupByExpressions` can be non-empty for Hive compatibility. It may add extra grouping
@@ -1765,7 +2107,8 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
 
     withFuncIdentClause(
       func.functionName,
-      ident => {
+      Nil,
+      (ident, _) => {
         if (ident.length > 1) {
           throw QueryParsingErrors.invalidTableValuedFunctionNameError(ident, ctx)
         }
@@ -1806,10 +2149,51 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       Seq.tabulate(rows.head.size)(i => s"col${i + 1}")
     }
 
-    val table = UnresolvedInlineTable(aliases, rows.toSeq)
+    val unresolvedTable = UnresolvedInlineTable(aliases, rows.toSeq)
+    val table = if (canEagerlyEvaluateInlineTable(ctx, unresolvedTable)) {
+      EvaluateUnresolvedInlineTable.evaluate(unresolvedTable)
+    } else {
+      unresolvedTable
+    }
     table.optionalMap(ctx.tableAlias.strictIdentifier)(aliasPlan)
   }
 
+  /**
+   * Determines if the inline table can be eagerly evaluated.
+   */
+  private def canEagerlyEvaluateInlineTable(
+      ctx: InlineTableContext,
+      table: UnresolvedInlineTable): Boolean = {
+    if (!conf.getConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED)) {
+      return false
+    } else if (!ResolveDefaultStringTypes.needsResolution(table.expressions)) {
+      // if there are no strings to be resolved we can always evaluate eagerly
+      return true
+    }
+
+    val isSessionCollationSet = conf.defaultStringType != StringType
+
+    // if either of these are true we need to resolve
+    // the string types first
+    !isSessionCollationSet && !contextInsideCreate(ctx)
+  }
+
+  private def contextInsideCreate(ctx: ParserRuleContext): Boolean = {
+    var currentContext: RuleContext = ctx
+
+    while (currentContext != null) {
+      if (currentContext.isInstanceOf[CreateTableContext] ||
+          currentContext.isInstanceOf[ReplaceTableContext] ||
+          currentContext.isInstanceOf[CreateViewContext]) {
+        return true
+      }
+
+      currentContext = currentContext.parent
+    }
+
+    false
+  }
+
   /**
    * Create an alias (SubqueryAlias) for a join relation. This is practically the same as
    * visitAliasedQuery and visitNamedExpression, ANTLR4 however requires us to use 3 different
@@ -1949,9 +2333,10 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   def visitStarExcept(ctx: StarContext, target: Option[Seq[String]]): Expression = withOrigin(ctx) {
     val exceptCols = ctx.exceptClause
       .exceptCols.multipartIdentifier.asScala.map(typedVisit[Seq[String]])
-    UnresolvedStarExcept(
+    UnresolvedStarExceptOrReplace(
       target,
-      exceptCols.toSeq)
+      exceptCols.toSeq,
+      replacements = None)
   }
 
   /**
@@ -2144,8 +2529,14 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     // Create the predicate.
     ctx.kind.getType match {
       case SqlBaseParser.BETWEEN =>
-        invertIfNotDefined(UnresolvedFunction(
-          "between", Seq(e, expression(ctx.lower), expression(ctx.upper)), isDistinct = false))
+        if (!SQLConf.get.legacyDuplicateBetweenInput) {
+          invertIfNotDefined(UnresolvedFunction(
+            "between", Seq(e, expression(ctx.lower), expression(ctx.upper)), isDistinct = false))
+        } else {
+          invertIfNotDefined(And(
+            GreaterThanOrEqual(e, expression(ctx.lower)),
+            LessThanOrEqual(e, expression(ctx.upper))))
+        }
       case SqlBaseParser.IN if ctx.query != null =>
         invertIfNotDefined(InSubquery(getValueExpressions(e), ListQuery(plan(ctx.query))))
       case SqlBaseParser.IN =>
@@ -2319,8 +2710,9 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   override def visitCollateClause(ctx: CollateClauseContext): String = withOrigin(ctx) {
-    if (!SQLConf.get.collationEnabled) {
-      throw QueryCompilationErrors.collationNotEnabledError()
+    val collationName = ctx.collationName.getText
+    if (!SQLConf.get.trimCollationEnabled && collationName.toUpperCase().contains("TRIM")) {
+      throw QueryCompilationErrors.trimCollationNotEnabledError()
     }
     ctx.identifier.getText
   }
@@ -2506,7 +2898,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     } else {
       // It's a function call
       val funcCtx = ctx.functionName
-      val func = withFuncIdentClause(
+      val func: Expression = withFuncIdentClause(
         funcCtx,
         arguments ++ filter ++ order.toSeq,
         (ident, otherExprs) => {
@@ -3003,7 +3395,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     } catch {
       case e: SparkArithmeticException =>
         throw new ParseException(
-          errorClass = e.getErrorClass,
+          errorClass = e.getCondition,
           messageParameters = e.getMessageParameters.asScala.toMap,
           ctx)
     }
@@ -3013,7 +3405,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * Create a String literal expression.
    */
   override def visitStringLiteral(ctx: StringLiteralContext): Literal = withOrigin(ctx) {
-    Literal.create(createString(ctx), conf.defaultStringType)
+    Literal.create(createString(ctx), StringType)
   }
 
   /**
@@ -3036,20 +3428,33 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    */
   private def createUnresolvedRelation(
       ctx: IdentifierReferenceContext,
-      optionsClause: Option[OptionsClauseContext] = None): LogicalPlan = withOrigin(ctx) {
-    val options = optionsClause.map{ clause =>
-      new CaseInsensitiveStringMap(visitPropertyKeyValues(clause.options).asJava)
-    }.getOrElse(CaseInsensitiveStringMap.empty)
-    withIdentClause(ctx, parts =>
-      new UnresolvedRelation(parts, options, isStreaming = false))
+      optionsClause: Option[OptionsClauseContext] = None,
+      writePrivileges: Seq[TableWritePrivilege] = Nil): LogicalPlan = withOrigin(ctx) {
+    val options = resolveOptions(optionsClause)
+    withIdentClause(ctx, parts => {
+      val relation = new UnresolvedRelation(parts, options, isStreaming = false)
+      relation.requireWritePrivileges(writePrivileges)
+    })
   }
 
   /**
    * Create an [[UnresolvedRelation]] from a multi-part identifier.
    */
   private def createUnresolvedRelation(
-      ctx: ParserRuleContext, ident: Seq[String]): UnresolvedRelation = withOrigin(ctx) {
-    UnresolvedRelation(ident)
+      ctx: ParserRuleContext,
+      ident: Seq[String],
+      optionsClause: Option[OptionsClauseContext],
+      writePrivileges: Seq[TableWritePrivilege]): UnresolvedRelation = withOrigin(ctx) {
+    val options = resolveOptions(optionsClause)
+    val relation = new UnresolvedRelation(ident, options, isStreaming = false)
+    relation.requireWritePrivileges(writePrivileges)
+  }
+
+  private def resolveOptions(
+      optionsClause: Option[OptionsClauseContext]): CaseInsensitiveStringMap = {
+    optionsClause.map{ clause =>
+      new CaseInsensitiveStringMap(visitPropertyKeyValues(clause.options).asJava)
+    }.getOrElse(CaseInsensitiveStringMap.empty)
   }
 
   /**
@@ -3283,6 +3688,14 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
             throw QueryParsingErrors.fromToIntervalUnsupportedError(from, to, ctx)
         }
       } catch {
+        // Keep error class of SparkIllegalArgumentExceptions and enrich it with query context
+        case se: SparkIllegalArgumentException =>
+          val pe = new ParseException(
+            errorClass = se.getCondition,
+            messageParameters = se.getMessageParameters.asScala.toMap,
+            ctx)
+          pe.setStackTrace(se.getStackTrace)
+          throw pe
         // Handle Exceptions thrown by CalendarInterval
         case e: IllegalArgumentException =>
           val pe = new ParseException(
@@ -3363,13 +3776,19 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       }
     }
 
+    val dataType = typedVisit[DataType](ctx.dataType)
     ColumnDefinition(
       name = name,
-      dataType = typedVisit[DataType](ctx.dataType),
+      dataType = dataType,
       nullable = nullable,
       comment = commentSpec.map(visitCommentSpec),
       defaultValue = defaultExpression.map(visitDefaultExpression),
-      generationExpression = generationExpression.map(visitGenerationExpression)
+      generationExpression = generationExpression.collect {
+        case ctx: GeneratedColumnContext => visitGeneratedColumn(ctx)
+      },
+      identityColumnSpec = generationExpression.collect {
+        case ctx: IdentityColumnContext => visitIdentityColumn(ctx, dataType)
+      }
     )
   }
 
@@ -3425,7 +3844,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   /**
    * Create a generation expression string.
    */
-  override def visitGenerationExpression(ctx: GenerationExpressionContext): String =
+  override def visitGeneratedColumn(ctx: GeneratedColumnContext): String =
     withOrigin(ctx) {
       getDefaultExpression(ctx.expression(), "GENERATED").originalSQL
     }
@@ -4422,7 +4841,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   /**
-   * Parse a [[AlterTableAddColumns]] command.
+   * Parse a [[AddColumns]] command.
    *
    * For example:
    * {{{
@@ -4439,7 +4858,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   /**
-   * Parse a [[AlterTableRenameColumn]] command.
+   * Parse a [[RenameColumn]] command.
    *
    * For example:
    * {{{
@@ -4455,7 +4874,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   /**
-   * Parse a [[AlterTableAlterColumn]] command to alter a column's property.
+   * Parse a [[AlterColumn]] command to alter a column's property.
    *
    * For example:
    * {{{
@@ -4525,7 +4944,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   /**
-   * Parse a [[AlterTableAlterColumn]] command. This is Hive SQL syntax.
+   * Parse a [[AlterColumn]] command. This is Hive SQL syntax.
    *
    * For example:
    * {{{
@@ -4609,7 +5028,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
   }
 
   /**
-   * Parse a [[AlterTableDropColumns]] command.
+   * Parse a [[DropColumns]] command.
    *
    * For example:
    * {{{
@@ -4907,7 +5326,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val query = Option(ctx.query).map(plan)
-    withIdentClause(ctx.identifierReference, ident => {
+    withIdentClause(ctx.identifierReference, query.toSeq, (ident, children) => {
       if (query.isDefined && ident.length > 1) {
         val catalogAndNamespace = ident.init
         throw QueryParsingErrors.addCatalogInCacheTableAsSelectNotAllowedError(
@@ -4916,9 +5335,18 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       val options = Option(ctx.options).map(visitPropertyKeyValues).getOrElse(Map.empty)
       val isLazy = ctx.LAZY != null
       if (query.isDefined) {
-        CacheTableAsSelect(ident.head, query.get, source(ctx.query()), isLazy, options)
+        // Disallow parameter markers in the query of the cache.
+        // We need this limitation because we store the original query text, pre substitution.
+        // To lift this we would need to reconstitute the query with parameter markers replaced with
+        // the values given at CACHE TABLE time, or we would need to store the parameter values
+        // alongside the text.
+        // The same rule can be found in CREATE VIEW builder.
+        checkInvalidParameter(query.get, "the query of CACHE TABLE")
+        CacheTableAsSelect(ident.head, children.head, source(ctx.query()), isLazy, options)
       } else {
-        CacheTable(createUnresolvedRelation(ctx.identifierReference, ident), ident, isLazy, options)
+        CacheTable(
+          createUnresolvedRelation(ctx.identifierReference, ident, None, writePrivileges = Nil),
+          ident, isLazy, options)
       }
     })
   }
@@ -4949,7 +5377,7 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
    * A command for users to list the partition names of a table. If partition spec is specified,
    * partitions that match the spec are returned. Otherwise an empty result set is returned.
    *
-   * This function creates a [[ShowPartitionsStatement]] logical plan
+   * This function creates a [[ShowPartitions]] logical plan
    *
    * The syntax of using this command in SQL is:
    * {{{
@@ -5374,6 +5802,28 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
       ctx.EXISTS != null)
   }
 
+  /**
+   * Creates a plan for invoking a procedure.
+   *
+   * For example:
+   * {{{
+   *   CALL multi_part_name(v1, v2, ...);
+   *   CALL multi_part_name(v1, param2 => v2, ...);
+   *   CALL multi_part_name(param1 => v1, param2 => v2, ...);
+   * }}}
+   */
+  override def visitCall(ctx: CallContext): LogicalPlan = withOrigin(ctx) {
+    val procedure = withIdentClause(ctx.identifierReference, UnresolvedProcedure)
+    val args = ctx.functionArgument.asScala.map {
+      case expr if expr.namedArgumentExpression != null =>
+        val namedExpr = expr.namedArgumentExpression
+        NamedArgumentExpression(namedExpr.key.getText, expression(namedExpr.value))
+      case expr =>
+        expression(expr)
+    }.toSeq
+    Call(procedure, args)
+  }
+
   /**
    * Create a TimestampAdd expression.
    */
@@ -5510,4 +5960,213 @@ class AstBuilder extends DataTypeAstBuilder with SQLConfHelper with Logging {
     withOrigin(ctx) {
       visitSetVariableImpl(ctx.query(), ctx.multipartIdentifierList(), ctx.assignmentList())
     }
+
+  override def visitOperatorPipeStatement(ctx: OperatorPipeStatementContext): LogicalPlan = {
+    visitOperatorPipeRightSide(ctx.operatorPipeRightSide(), plan(ctx.left))
+  }
+
+  private def visitOperatorPipeRightSide(
+      ctx: OperatorPipeRightSideContext, left: LogicalPlan): LogicalPlan = {
+    if (!SQLConf.get.getConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED)) {
+      operationNotAllowed("Operator pipe SQL syntax using |>", ctx)
+    }
+    // This helper function adds a table subquery boundary between the new operator to be added
+    // (such as a filter or sort) and the input plan if one does not already exist. This helps the
+    // analyzer behave as if we had added the corresponding SQL clause after a table subquery
+    // containing the input plan.
+    def withSubqueryAlias(): LogicalPlan = left match {
+      case s: SubqueryAlias =>
+        s
+      case u: UnresolvedRelation =>
+        u
+      case _ =>
+        SubqueryAlias(SubqueryAlias.generateSubqueryName(), left)
+    }
+    Option(ctx.selectClause).map { c =>
+      withSelectQuerySpecification(
+        ctx = ctx,
+        selectClause = c,
+        lateralView = new java.util.ArrayList[LateralViewContext](),
+        whereClause = null,
+        aggregationClause = null,
+        havingClause = null,
+        windowClause = ctx.windowClause,
+        relation = left,
+        isPipeOperatorSelect = true)
+    }.getOrElse(Option(ctx.EXTEND).map { _ =>
+      // Visit each expression in the EXTEND operator, and add a PipeExpression expression on top of
+      // it to generate clear error messages if the expression contains any aggregate functions
+      // (this is not allowed in the EXTEND operator).
+      val extendExpressions: Seq[NamedExpression] =
+        Option(ctx.extendList).map { n: NamedExpressionSeqContext =>
+          visitNamedExpressionSeq(n).map {
+            case (a: Alias, _) =>
+              a.copy(
+                child = PipeExpression(a.child, isAggregate = false, PipeOperators.extendClause))(
+                a.exprId, a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys)
+            case (e: Expression, aliasFunc) =>
+              UnresolvedAlias(
+                PipeExpression(e, isAggregate = false, PipeOperators.extendClause), aliasFunc)
+          }
+        }.get
+      val projectList: Seq[NamedExpression] = Seq(UnresolvedStar(None)) ++ extendExpressions
+      Project(projectList, left)
+    }.getOrElse(Option(ctx.SET).map { _ =>
+      visitOperatorPipeSet(ctx, left)
+    }.getOrElse(Option(ctx.whereClause).map { c =>
+      if (ctx.windowClause() != null) {
+        throw QueryParsingErrors.windowClauseInPipeOperatorWhereClauseNotAllowedError(ctx)
+      }
+      withWhereClause(c, withSubqueryAlias())
+    }.getOrElse(Option(ctx.pivotClause()).map { c =>
+      if (ctx.unpivotClause() != null) {
+        throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+      }
+      withPivot(c, left)
+    }.getOrElse(Option(ctx.unpivotClause()).map { c =>
+      if (ctx.pivotClause() != null) {
+        throw QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+      }
+      withUnpivot(c, left)
+    }.getOrElse(Option(ctx.sample).map { c =>
+      withSample(c, left)
+    }.getOrElse(Option(ctx.joinRelation()).map { c =>
+      withJoinRelation(c, left)
+    }.getOrElse(Option(ctx.operator).map { c =>
+      val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
+      visitSetOperationImpl(left, plan(ctx.right), all, c.getType)
+    }.getOrElse(Option(ctx.queryOrganization).map { c =>
+      withQueryResultClauses(c, withSubqueryAlias(), forPipeOperators = true)
+    }.getOrElse(
+      visitOperatorPipeAggregate(ctx, left)
+    ))))))))))
+  }
+
+  private def visitOperatorPipeSet(
+      ctx: OperatorPipeRightSideContext, left: LogicalPlan): LogicalPlan = {
+    val (setIdentifiers: Seq[String], setTargets: Seq[Expression]) =
+      visitOperatorPipeSetAssignmentSeq(ctx.operatorPipeSetAssignmentSeq())
+    var plan = left
+    setIdentifiers.zip(setTargets).foreach {
+      case (_, _: Alias) =>
+        operationNotAllowed(
+          "SQL pipe syntax |> SET operator with an alias assigned with [AS] aliasName", ctx)
+      case (ident, target) =>
+        // Add an UnresolvedStarExceptOrReplace to exclude the SET expression name from the relation
+        // and add the new SET expression to the projection list.
+        // Use a PipeSelect expression to make sure it does not contain any aggregate functions.
+        val replacement =
+          Alias(PipeExpression(target, isAggregate = false, PipeOperators.setClause), ident)()
+        val projectList: Seq[NamedExpression] =
+          Seq(UnresolvedStarExceptOrReplace(
+            target = None, excepts = Seq(Seq(ident)), replacements = Some(Seq(replacement))))
+        // Add a projection to implement the SET operator using the UnresolvedStarExceptOrReplace
+        // expression. We do this once per SET assignment to allow for multiple SET assignments with
+        // optional lateral references to previous ones.
+        plan = Project(projectList, plan)
+    }
+    plan
+  }
+
+  override def visitOperatorPipeSetAssignmentSeq(
+      ctx: OperatorPipeSetAssignmentSeqContext): (Seq[String], Seq[Expression]) = {
+    withOrigin(ctx) {
+      if (!ctx.DOT.isEmpty) {
+        operationNotAllowed(
+          s"SQL pipe syntax |> SET operator with multi-part assignment key " +
+            s"(only single-part keys are allowed)", ctx)
+      }
+      val setIdentifiers: Seq[String] = ctx.errorCapturingIdentifier().asScala.map(_.getText).toSeq
+      val setTargets: Seq[Expression] = ctx.expression().asScala.map(typedVisit[Expression]).toSeq
+      (setIdentifiers, setTargets)
+    }
+  }
+
+  private def visitOperatorPipeAggregate(
+      ctx: OperatorPipeRightSideContext, left: LogicalPlan): LogicalPlan = {
+    assert(ctx.AGGREGATE != null)
+    if (ctx.namedExpressionSeq() == null && ctx.aggregationClause() == null) {
+      operationNotAllowed(
+        "The AGGREGATE clause requires a list of aggregate expressions " +
+          "or a list of grouping expressions, or both", ctx)
+    }
+    // Visit each aggregate expression, and add a PipeAggregate expression on top of it to generate
+    // clear error messages if the expression does not contain at least one aggregate function.
+    val aggregateExpressions: Seq[NamedExpression] =
+      Option(ctx.namedExpressionSeq()).map { n: NamedExpressionSeqContext =>
+        visitNamedExpressionSeq(n).map {
+          case (a: Alias, _) =>
+            a.copy(child =
+              PipeExpression(a.child, isAggregate = true, PipeOperators.aggregateClause))(
+              a.exprId, a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys)
+          case (e: Expression, aliasFunc) =>
+            UnresolvedAlias(
+              PipeExpression(e, isAggregate = true, PipeOperators.aggregateClause), aliasFunc)
+        }
+      }.getOrElse(Seq.empty)
+    Option(ctx.aggregationClause()).map { c: AggregationClauseContext =>
+      withAggregationClause(c, aggregateExpressions, left, allowNamedGroupingExpressions = true)
+      match {
+        case a: Aggregate =>
+          // GROUP BY ALL, GROUP BY CUBE, GROUPING_ID, GROUPING SETS, and GROUP BY ROLLUP are not
+          // supported yet.
+          def error(s: String): Unit =
+            throw QueryParsingErrors.pipeOperatorAggregateUnsupportedCaseError(s, c)
+          a.groupingExpressions match {
+            case Seq(key: UnresolvedAttribute) if key.equalsIgnoreCase("ALL") =>
+              error("GROUP BY ALL")
+            case _ =>
+          }
+          def visit(e: Expression): Unit = {
+            e match {
+              case _: Cube => error("GROUP BY CUBE")
+              case _: GroupingSets => error("GROUPING SETS")
+              case _: Rollup => error("GROUP BY ROLLUP")
+              case f: UnresolvedFunction if f.arguments.length == 1 && f.nameParts.length == 1 =>
+                Seq("GROUPING", "GROUPING_ID").foreach { name =>
+                  if (f.nameParts.head.equalsIgnoreCase(name)) error(name)
+                }
+              case _: WindowSpec => error("window functions")
+              case _ =>
+            }
+            e.children.foreach(visit)
+          }
+          a.groupingExpressions.foreach(visit)
+          a.aggregateExpressions.foreach(visit)
+          // Prepend grouping keys to the list of aggregate functions, since operator pipe AGGREGATE
+          // clause returns the GROUP BY expressions followed by the list of aggregate functions.
+          val namedGroupingExpressions: Seq[NamedExpression] =
+            a.groupingExpressions.map {
+              case n: NamedExpression => n
+              case e: Expression => UnresolvedAlias(e, None)
+            }
+          a.copy(aggregateExpressions = namedGroupingExpressions ++ a.aggregateExpressions)
+      }
+    }.getOrElse {
+      // This is a table aggregation with no grouping expressions.
+      Aggregate(
+        groupingExpressions = Seq.empty,
+        aggregateExpressions = aggregateExpressions,
+        child = left)
+    }
+  }
+
+  /**
+   * Check plan for any parameters.
+   * If it finds any throws UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT.
+   * This method is used to ban parameters in some contexts.
+   */
+  protected def checkInvalidParameter(plan: LogicalPlan, statement: String): Unit = {
+    plan.foreach { p =>
+      p.expressions.foreach { expr =>
+        if (expr.containsPattern(PARAMETER)) {
+          throw QueryParsingErrors.parameterMarkerNotAllowed(statement, p.origin)
+        }
+      }
+    }
+    plan.children.foreach(p => checkInvalidParameter(p, statement))
+    plan.innerChildren.collect {
+      case child: LogicalPlan => checkInvalidParameter(child, statement)
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
index 04edb0f75c463..3aec1dd431138 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserInterface.scala
@@ -62,10 +62,4 @@ trait ParserInterface extends DataTypeParserInterface {
    */
   @throws[ParseException]("Text cannot be parsed to a LogicalPlan")
   def parseQuery(sqlText: String): LogicalPlan
-
-  /**
-   * Parse a SQL script string to a [[CompoundBody]].
-   */
-  @throws[ParseException]("Text cannot be parsed to a CompoundBody")
-  def parseScript(sqlScriptText: String): CompoundBody
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
index 318c9ea76cc3d..8471c9f9dff13 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/ParserUtils.scala
@@ -19,12 +19,17 @@ package org.apache.spark.sql.catalyst.parser
 import java.util
 import java.util.Locale
 
+import scala.collection.mutable.Set
+
 import org.antlr.v4.runtime.{ParserRuleContext, Token}
 import org.antlr.v4.runtime.misc.Interval
 import org.antlr.v4.runtime.tree.{ParseTree, TerminalNodeImpl}
 
+import org.apache.spark.sql.catalyst.parser.SqlBaseParser.{BeginLabelContext, EndLabelContext}
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.util.SparkParserUtils
-import org.apache.spark.sql.errors.QueryParsingErrors
+import org.apache.spark.sql.catalyst.util.SparkParserUtils.withOrigin
+import org.apache.spark.sql.errors.{QueryParsingErrors, SqlScriptingErrors}
 
 /**
  * A collection of utility methods for use during the parsing process.
@@ -134,3 +139,80 @@ object ParserUtils extends SparkParserUtils {
     sb.toString()
   }
 }
+
+class SqlScriptingLabelContext {
+  /** Set to keep track of labels seen so far */
+  private val seenLabels = Set[String]()
+
+  /**
+   * Check if the beginLabelCtx and endLabelCtx match.
+   * If the labels are defined, they must follow rules:
+   *  - If both labels exist, they must match.
+   *  - Begin label must exist if end label exists.
+   */
+  private def checkLabels(
+      beginLabelCtx: Option[BeginLabelContext],
+      endLabelCtx: Option[EndLabelContext]) : Unit = {
+    (beginLabelCtx, endLabelCtx) match {
+      case (Some(bl: BeginLabelContext), Some(el: EndLabelContext))
+        if bl.multipartIdentifier().getText.toLowerCase(Locale.ROOT) !=
+            el.multipartIdentifier().getText.toLowerCase(Locale.ROOT) =>
+        withOrigin(bl) {
+          throw SqlScriptingErrors.labelsMismatch(
+            CurrentOrigin.get,
+            bl.multipartIdentifier().getText,
+            el.multipartIdentifier().getText)
+        }
+      case (None, Some(el: EndLabelContext)) =>
+        withOrigin(el) {
+          throw SqlScriptingErrors.endLabelWithoutBeginLabel(
+            CurrentOrigin.get, el.multipartIdentifier().getText)
+        }
+      case _ =>
+    }
+  }
+
+  /** Check if the label is defined. */
+  private def isLabelDefined(beginLabelCtx: Option[BeginLabelContext]): Boolean = {
+    beginLabelCtx.map(_.multipartIdentifier().getText).isDefined
+  }
+
+  /**
+   * Enter a labeled scope and return the label text.
+   * If the label is defined, it will be returned and added to seenLabels.
+   * If the label is not defined, a random UUID will be returned.
+   */
+  def enterLabeledScope(
+      beginLabelCtx: Option[BeginLabelContext],
+      endLabelCtx: Option[EndLabelContext]): String = {
+
+    // Check if this label already exists in parent scopes.
+    checkLabels(beginLabelCtx, endLabelCtx)
+
+    // Get label text and add it to seenLabels.
+    val labelText = if (isLabelDefined(beginLabelCtx)) {
+      val txt = beginLabelCtx.get.multipartIdentifier().getText.toLowerCase(Locale.ROOT)
+      if (seenLabels.contains(txt)) {
+        withOrigin(beginLabelCtx.get) {
+          throw SqlScriptingErrors.duplicateLabels(CurrentOrigin.get, txt)
+        }
+      }
+      seenLabels.add(beginLabelCtx.get.multipartIdentifier().getText)
+      txt
+    } else {
+      // Do not add the label to the seenLabels set if it is not defined.
+      java.util.UUID.randomUUID.toString.toLowerCase(Locale.ROOT)
+    }
+    labelText
+  }
+
+  /**
+   * Exit a labeled scope.
+   * If the label is defined, it will be removed from seenLabels.
+   */
+  def exitLabeledScope(beginLabelCtx: Option[BeginLabelContext]): Unit = {
+    if (isLabelDefined(beginLabelCtx)) {
+      seenLabels.remove(beginLabelCtx.get.multipartIdentifier().getText.toLowerCase(Locale.ROOT))
+    }
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala
deleted file mode 100644
index a68898d151e9c..0000000000000
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingLogicalOperators.scala
+++ /dev/null
@@ -1,60 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalyst.parser
-
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin, WithOrigin}
-
-/**
- * Trait for all SQL Scripting logical operators that are product of parsing phase.
- * These operators will be used by the SQL Scripting interpreter to generate execution nodes.
- */
-sealed trait CompoundPlanStatement
-
-/**
- * Logical operator representing result of parsing a single SQL statement
- *   that is supposed to be executed against Spark.
- * @param parsedPlan Result of SQL statement parsing.
- */
-case class SingleStatement(parsedPlan: LogicalPlan)
-  extends CompoundPlanStatement
-  with WithOrigin {
-
-  override val origin: Origin = CurrentOrigin.get
-
-  /**
-   * Get the SQL query text corresponding to this statement.
-   * @return
-   *   SQL query text.
-   */
-  def getText: String = {
-    assert(origin.sqlText.isDefined && origin.startIndex.isDefined && origin.stopIndex.isDefined)
-    origin.sqlText.get.substring(origin.startIndex.get, origin.stopIndex.get + 1)
-  }
-}
-
-/**
- * Logical operator for a compound body. Contains all statements within the compound body.
- * @param collection Collection of statements within the compound body.
- * @param label Label set to CompoundBody by user or UUID otherwise.
- *              It can be None in case when CompoundBody is not part of BeginEndCompoundBlock
- *              for example when CompoundBody is inside loop or conditional block.
- */
-case class CompoundBody(
-    collection: Seq[CompoundPlanStatement],
-    label: Option[String]) extends CompoundPlanStatement
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
index e48b44a603ad7..d0a0fc307756c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/planning/patterns.scala
@@ -291,7 +291,7 @@ object PhysicalAggregation {
     (Seq[NamedExpression], Seq[AggregateExpression], Seq[NamedExpression], LogicalPlan)
 
   def unapply(a: Any): Option[ReturnType] = a match {
-    case logical.Aggregate(groupingExpressions, resultExpressions, child) =>
+    case logical.Aggregate(groupingExpressions, resultExpressions, child, _) =>
       // A single aggregate expression might appear multiple times in resultExpressions.
       // In order to avoid evaluating an individual aggregate function multiple times, we'll
       // build a set of semantically distinct aggregate expressions and re-write expressions so
@@ -364,7 +364,7 @@ object PhysicalWindow {
     (WindowFunctionType, Seq[NamedExpression], Seq[Expression], Seq[SortOrder], LogicalPlan)
 
   def unapply(a: Any): Option[ReturnType] = a match {
-    case expr @ logical.Window(windowExpressions, partitionSpec, orderSpec, child) =>
+    case expr @ logical.Window(windowExpressions, partitionSpec, orderSpec, child, _) =>
 
       // The window expression should not be empty here, otherwise it's a bug.
       if (windowExpressions.isEmpty) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala
new file mode 100644
index 0000000000000..3b691f4f87778
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/NormalizePlan.scala
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans
+
+import org.apache.spark.sql.catalyst.analysis.GetViewColumnByNameAndOrdinal
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
+import org.apache.spark.sql.catalyst.plans.logical._
+
+object NormalizePlan extends PredicateHelper {
+  def apply(plan: LogicalPlan): LogicalPlan =
+    normalizePlan(normalizeExprIds(plan))
+
+  /**
+   * Since attribute references are given globally unique ids during analysis,
+   * we must normalize them to check if two different queries are identical.
+   */
+  def normalizeExprIds(plan: LogicalPlan): LogicalPlan = {
+    plan transformAllExpressions {
+      case s: ScalarSubquery =>
+        s.copy(plan = normalizeExprIds(s.plan), exprId = ExprId(0))
+      case s: LateralSubquery =>
+        s.copy(plan = normalizeExprIds(s.plan), exprId = ExprId(0))
+      case e: Exists =>
+        e.copy(plan = normalizeExprIds(e.plan), exprId = ExprId(0))
+      case l: ListQuery =>
+        l.copy(plan = normalizeExprIds(l.plan), exprId = ExprId(0))
+      case a: AttributeReference =>
+        AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
+      case OuterReference(a: AttributeReference) =>
+        OuterReference(AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0)))
+      case a: Alias =>
+        Alias(a.child, a.name)(exprId = ExprId(0))
+      case OuterReference(a: Alias) =>
+        OuterReference(Alias(a.child, a.name)(exprId = ExprId(0)))
+      case ae: AggregateExpression =>
+        ae.copy(resultId = ExprId(0))
+      case lv: NamedLambdaVariable =>
+        lv.copy(exprId = ExprId(0), value = null)
+      case udf: PythonUDF =>
+        udf.copy(resultId = ExprId(0))
+      case udaf: PythonUDAF =>
+        udaf.copy(resultId = ExprId(0))
+      case a: FunctionTableSubqueryArgumentExpression =>
+        a.copy(plan = normalizeExprIds(a.plan), exprId = ExprId(0))
+    }
+  }
+
+  /**
+   * Normalizes plans:
+   * - Filter the filter conditions that appear in a plan. For instance,
+   *   ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2)
+   *   etc., will all now be equivalent.
+   * - Sample the seed will replaced by 0L.
+   * - Join conditions will be resorted by hashCode.
+   */
+  def normalizePlan(plan: LogicalPlan): LogicalPlan = {
+    plan transform {
+      case Filter(condition: Expression, child: LogicalPlan) =>
+        Filter(
+          splitConjunctivePredicates(condition)
+            .map(rewriteBinaryComparison)
+            .sortBy(_.hashCode())
+            .reduce(And),
+          child
+        )
+      case sample: Sample =>
+        sample.copy(seed = 0L)
+      case Join(left, right, joinType, condition, hint) if condition.isDefined =>
+        val newJoinType = joinType match {
+          case ExistenceJoin(a: Attribute) =>
+            val newAttr = AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
+            ExistenceJoin(newAttr)
+          case other => other
+        }
+
+        val newCondition =
+          splitConjunctivePredicates(condition.get)
+            .map(rewriteBinaryComparison)
+            .sortBy(_.hashCode())
+            .reduce(And)
+        Join(left, right, newJoinType, Some(newCondition), hint)
+      case Project(projectList, child) =>
+        val projList = projectList
+          .map { e =>
+            e.transformUp {
+              case g: GetViewColumnByNameAndOrdinal => g.copy(viewDDL = None)
+            }
+          }
+          .asInstanceOf[Seq[NamedExpression]]
+        Project(projList, child)
+      case c: KeepAnalyzedQuery => c.storeAnalyzedQuery()
+    }
+  }
+
+  /**
+   * Rewrite [[BinaryComparison]] operator to keep order. The following cases will be
+   * equivalent:
+   * 1. (a = b), (b = a);
+   * 2. (a <=> b), (b <=> a).
+   * 3. (a > b), (b < a)
+   */
+  private def rewriteBinaryComparison(condition: Expression): Expression = condition match {
+    case EqualTo(l, r) => Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo)
+    case EqualNullSafe(l, r) => Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe)
+    case GreaterThan(l, r) if l.hashCode() > r.hashCode() => LessThan(r, l)
+    case LessThan(l, r) if l.hashCode() > r.hashCode() => GreaterThan(r, l)
+    case GreaterThanOrEqual(l, r) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l)
+    case LessThanOrEqual(l, r) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l)
+    case _ => condition // Don't reorder.
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
index 3f417644082c3..23813d94c5495 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/QueryPlan.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.trees.TreePatternBits
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.util.TransientLazy
 import org.apache.spark.util.collection.BitSet
 
 /**
@@ -94,10 +95,11 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
    * All Attributes that appear in expressions from this operator.  Note that this set does not
    * include attributes that are implicitly referenced by being passed through to the output tuple.
    */
-  @transient
-  lazy val references: AttributeSet = {
-    AttributeSet.fromAttributeSets(expressions.map(_.references)) -- producedAttributes
-  }
+  def references: AttributeSet = _references()
+
+  private val _references = new TransientLazy({
+    AttributeSet(expressions) -- producedAttributes
+  })
 
   /**
    * Returns true when the all the expressions in the current node as well as all of its children
@@ -635,22 +637,23 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
   protected def doCanonicalize(): PlanType = {
     val canonicalizedChildren = children.map(_.canonicalized)
     var id = -1
+    val allAttributesSeq = this.allAttributes
     mapExpressions {
       case a: Alias =>
         id += 1
         // As the root of the expression, Alias will always take an arbitrary exprId, we need to
         // normalize that for equality testing, by assigning expr id from 0 incrementally. The
         // alias name doesn't matter and should be erased.
-        val normalizedChild = QueryPlan.normalizeExpressions(a.child, allAttributes)
+        val normalizedChild = QueryPlan.normalizeExpressions(a.child, allAttributesSeq)
         Alias(normalizedChild, "")(ExprId(id), a.qualifier)
 
-      case ar: AttributeReference if allAttributes.indexOf(ar.exprId) == -1 =>
+      case ar: AttributeReference if allAttributesSeq.indexOf(ar.exprId) == -1 =>
         // Top level `AttributeReference` may also be used for output like `Alias`, we should
         // normalize the exprId too.
         id += 1
         ar.withExprId(ExprId(id)).canonicalized
 
-      case other => QueryPlan.normalizeExpressions(other, allAttributes)
+      case other => QueryPlan.normalizeExpressions(other, allAttributesSeq)
     }.withNewChildren(canonicalizedChildren)
   }
 
@@ -676,8 +679,14 @@ abstract class QueryPlan[PlanType <: QueryPlan[PlanType]]
 
   /**
    * All the attributes that are used for this plan.
+   *
+   * `def` instead of a `lazy val` to avoid holding references to a large number of
+   * attributes, thereby reducing memory pressure on the driver. The number of attributes
+   * referenced here can be very large, especially for logical plans with wide schemas where the
+   * column pruning hasn't happened yet. Holding references to all of them can lead to
+   * significant memory overhead on the driver.
    */
-  lazy val allAttributes: AttributeSeq = children.flatMap(_.output)
+  def allAttributes: AttributeSeq = children.flatMap(_.output)
 }
 
 object QueryPlan extends PredicateHelper {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
index d9da255eccc9d..41bba99673a2b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/joinTypes.scala
@@ -95,6 +95,10 @@ case object LeftAnti extends JoinType {
   override def sql: String = "LEFT ANTI"
 }
 
+case object LeftSingle extends JoinType {
+  override def sql: String = "LEFT SINGLE"
+}
+
 case class ExistenceJoin(exists: Attribute) extends JoinType {
   override def sql: String = {
     // This join type is only used in the end of optimizer and physical plans, we will not
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala
index 83e50aa33c70d..043214711ccf9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ColumnDefinition.scala
@@ -21,10 +21,10 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, UnaryExpression, Unevaluable}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
-import org.apache.spark.sql.catalyst.util.GeneratedColumn
+import org.apache.spark.sql.catalyst.util.{GeneratedColumn, IdentityColumn}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns.validateDefaultValueExpr
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumnsUtils.{CURRENT_DEFAULT_COLUMN_METADATA_KEY, EXISTS_DEFAULT_COLUMN_METADATA_KEY}
-import org.apache.spark.sql.connector.catalog.{Column => V2Column, ColumnDefaultValue}
+import org.apache.spark.sql.connector.catalog.{Column => V2Column, ColumnDefaultValue, IdentityColumnSpec}
 import org.apache.spark.sql.connector.expressions.LiteralValue
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.connector.ColumnImpl
@@ -41,7 +41,11 @@ case class ColumnDefinition(
     comment: Option[String] = None,
     defaultValue: Option[DefaultValueExpression] = None,
     generationExpression: Option[String] = None,
+    identityColumnSpec: Option[IdentityColumnSpec] = None,
     metadata: Metadata = Metadata.empty) extends Expression with Unevaluable {
+  assert(
+    generationExpression.isEmpty || identityColumnSpec.isEmpty,
+    "A ColumnDefinition cannot contain both a generation expression and an identity column spec.")
 
   override def children: Seq[Expression] = defaultValue.toSeq
 
@@ -58,6 +62,7 @@ case class ColumnDefinition(
       comment.orNull,
       defaultValue.map(_.toV2(statement, name)).orNull,
       generationExpression.orNull,
+      identityColumnSpec.orNull,
       if (metadata == Metadata.empty) null else metadata.json)
   }
 
@@ -75,8 +80,19 @@ case class ColumnDefinition(
     generationExpression.foreach { generationExpr =>
       metadataBuilder.putString(GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY, generationExpr)
     }
+    encodeIdentityColumnSpec(metadataBuilder)
     StructField(name, dataType, nullable, metadataBuilder.build())
   }
+
+  private def encodeIdentityColumnSpec(metadataBuilder: MetadataBuilder): Unit = {
+    identityColumnSpec.foreach { spec: IdentityColumnSpec =>
+      metadataBuilder.putLong(IdentityColumn.IDENTITY_INFO_START, spec.getStart)
+      metadataBuilder.putLong(IdentityColumn.IDENTITY_INFO_STEP, spec.getStep)
+      metadataBuilder.putBoolean(
+        IdentityColumn.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT,
+        spec.isAllowExplicitInsert)
+    }
+  }
 }
 
 object ColumnDefinition {
@@ -87,6 +103,9 @@ object ColumnDefinition {
     metadataBuilder.remove(CURRENT_DEFAULT_COLUMN_METADATA_KEY)
     metadataBuilder.remove(EXISTS_DEFAULT_COLUMN_METADATA_KEY)
     metadataBuilder.remove(GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY)
+    metadataBuilder.remove(IdentityColumn.IDENTITY_INFO_START)
+    metadataBuilder.remove(IdentityColumn.IDENTITY_INFO_STEP)
+    metadataBuilder.remove(IdentityColumn.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT)
 
     val hasDefaultValue = col.getCurrentDefaultValue().isDefined &&
       col.getExistenceDefaultValue().isDefined
@@ -97,6 +116,15 @@ object ColumnDefinition {
       None
     }
     val generationExpr = GeneratedColumn.getGenerationExpression(col)
+    val identityColumnSpec = if (col.metadata.contains(IdentityColumn.IDENTITY_INFO_START)) {
+      Some(new IdentityColumnSpec(
+        col.metadata.getLong(IdentityColumn.IDENTITY_INFO_START),
+        col.metadata.getLong(IdentityColumn.IDENTITY_INFO_STEP),
+        col.metadata.getBoolean(IdentityColumn.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT)
+      ))
+    } else {
+      None
+    }
     ColumnDefinition(
       col.name,
       col.dataType,
@@ -104,6 +132,7 @@ object ColumnDefinition {
       col.getComment(),
       defaultValue,
       generationExpr,
+      identityColumnSpec,
       metadataBuilder.build()
     )
   }
@@ -124,18 +153,8 @@ object ColumnDefinition {
               s"Command $cmd should not have column default value expression.")
         }
         cmd.columns.foreach { col =>
-          if (col.defaultValue.isDefined && col.generationExpression.isDefined) {
-            throw new AnalysisException(
-              errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
-              messageParameters = Map(
-                "colName" -> col.name,
-                "defaultValue" -> col.defaultValue.get.originalSQL,
-                "genExpr" -> col.generationExpression.get
-              )
-            )
-          }
-
           col.defaultValue.foreach { default =>
+            checkDefaultColumnConflicts(col)
             validateDefaultValueExpr(default, statement, col.name, col.dataType)
           }
         }
@@ -143,6 +162,29 @@ object ColumnDefinition {
       case _ =>
     }
   }
+
+  private def checkDefaultColumnConflicts(col: ColumnDefinition): Unit = {
+    if (col.generationExpression.isDefined) {
+      throw new AnalysisException(
+        errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+        messageParameters = Map(
+          "colName" -> col.name,
+          "defaultValue" -> col.defaultValue.get.originalSQL,
+          "genExpr" -> col.generationExpression.get
+        )
+      )
+    }
+    if (col.identityColumnSpec.isDefined) {
+      throw new AnalysisException(
+        errorClass = "IDENTITY_COLUMN_WITH_DEFAULT_VALUE",
+        messageParameters = Map(
+          "colName" -> col.name,
+          "defaultValue" -> col.defaultValue.get.originalSQL,
+          "identityColumnSpec" -> col.identityColumnSpec.get.toString
+        )
+      )
+    }
+  }
 }
 
 /**
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
index 8cfc939755ef7..0d7f2b1d0f3f4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/EventTimeWatermark.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
+import java.util.UUID
 import java.util.concurrent.TimeUnit
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
@@ -69,6 +70,7 @@ object EventTimeWatermark {
  * Used to mark a user specified column as holding the event time for a row.
  */
 case class EventTimeWatermark(
+    nodeId: UUID,
     eventTime: Attribute,
     delay: CalendarInterval,
     child: LogicalPlan) extends UnaryNode {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ExecutableDuringAnalysis.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ExecutableDuringAnalysis.scala
new file mode 100644
index 0000000000000..dc8dbf701f6a9
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/ExecutableDuringAnalysis.scala
@@ -0,0 +1,28 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+/**
+ * A logical plan node that requires execution during analysis.
+ */
+trait ExecutableDuringAnalysis extends LogicalPlan {
+  /**
+   * Returns the logical plan node that should be used for EXPLAIN.
+   */
+  def stageForExplain(): LogicalPlan
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
index 4701f4ea1e172..75b2fcd3a5f34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/FunctionBuilderBase.scala
@@ -17,6 +17,8 @@
 package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.sql.catalyst.expressions.{Expression, NamedArgumentExpression}
+import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns
+import org.apache.spark.sql.connector.catalog.procedures.{BoundProcedure, ProcedureParameter}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.util.ArrayImplicits._
 
@@ -122,12 +124,32 @@ object NamedParametersSupport {
       functionSignature: FunctionSignature,
       args: Seq[Expression],
       functionName: String): Seq[Expression] = {
-    val parameters: Seq[InputParameter] = functionSignature.parameters
+    defaultRearrange(functionName, functionSignature.parameters, args)
+  }
+
+  final def defaultRearrange(procedure: BoundProcedure, args: Seq[Expression]): Seq[Expression] = {
+    defaultRearrange(
+      procedure.name,
+      procedure.parameters.map(toInputParameter).toSeq,
+      args)
+  }
+
+  private def toInputParameter(param: ProcedureParameter): InputParameter = {
+    val defaultValue = Option(param.defaultValueExpression).map { expr =>
+      ResolveDefaultColumns.analyze(param.name, param.dataType, expr, "CALL")
+    }
+    InputParameter(param.name, defaultValue)
+  }
+
+  private def defaultRearrange(
+      routineName: String,
+      parameters: Seq[InputParameter],
+      args: Seq[Expression]): Seq[Expression] = {
     if (parameters.dropWhile(_.default.isEmpty).exists(_.default.isEmpty)) {
-      throw QueryCompilationErrors.unexpectedRequiredParameter(functionName, parameters)
+      throw QueryCompilationErrors.unexpectedRequiredParameter(routineName, parameters)
     }
 
-    val (positionalArgs, namedArgs) = splitAndCheckNamedArguments(args, functionName)
+    val (positionalArgs, namedArgs) = splitAndCheckNamedArguments(args, routineName)
     val namedParameters: Seq[InputParameter] = parameters.drop(positionalArgs.size)
 
     // The following loop checks for the following:
@@ -140,12 +162,12 @@ object NamedParametersSupport {
     namedArgs.foreach { namedArg =>
       val parameterName = namedArg.key
       if (!parameterNamesSet.contains(parameterName)) {
-        throw QueryCompilationErrors.unrecognizedParameterName(functionName, namedArg.key,
+        throw QueryCompilationErrors.unrecognizedParameterName(routineName, namedArg.key,
           parameterNamesSet.toSeq)
       }
       if (positionalParametersSet.contains(parameterName)) {
         throw QueryCompilationErrors.positionalAndNamedArgumentDoubleReference(
-          functionName, namedArg.key)
+          routineName, namedArg.key)
       }
     }
 
@@ -154,7 +176,7 @@ object NamedParametersSupport {
       val validParameterSizes =
         Array.range(parameters.count(_.default.isEmpty), parameters.size + 1).toImmutableArraySeq
       throw QueryCompilationErrors.wrongNumArgsError(
-        functionName, validParameterSizes, args.length)
+        routineName, validParameterSizes, args.length)
     }
 
     // This constructs a map from argument name to value for argument rearrangement.
@@ -168,7 +190,7 @@ object NamedParametersSupport {
         namedArgMap.getOrElse(
           param.name,
           if (param.default.isEmpty) {
-            throw QueryCompilationErrors.requiredParameterNotFound(functionName, param.name, index)
+            throw QueryCompilationErrors.requiredParameterNotFound(routineName, param.name, index)
           } else {
             param.default.get
           }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
index 8b9d8c91815fb..f52c38a64ab34 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LocalRelation.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, Literal}
 import org.apache.spark.sql.catalyst.plans.logical.statsEstimation.EstimationUtils
 import org.apache.spark.sql.catalyst.trees.TreePattern.{LOCAL_RELATION, TreePattern}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.util.Utils
 
@@ -61,19 +62,28 @@ case class LocalRelation(
     output: Seq[Attribute],
     data: Seq[InternalRow] = Nil,
     // Indicates whether this relation has data from a streaming source.
-    override val isStreaming: Boolean = false)
-  extends LeafNode with analysis.MultiInstanceRelation {
+    override val isStreaming: Boolean = false,
+    @transient stream: Option[SparkDataStream] = None)
+  extends LeafNode
+  with StreamSourceAwareLogicalPlan
+  with analysis.MultiInstanceRelation {
 
   // A local relation must have resolved output.
   require(output.forall(_.resolved), "Unresolved attributes found when constructing LocalRelation.")
 
+  override def withStream(stream: SparkDataStream): LocalRelation = {
+    copy(stream = Some(stream))
+  }
+
+  override def getStream: Option[SparkDataStream] = stream
+
   /**
    * Returns an identical copy of this relation with new exprIds for all attributes.  Different
    * attributes are required when a relation is going to be included multiple times in the same
    * query.
    */
   override final def newInstance(): this.type = {
-    LocalRelation(output.map(_.newInstance()), data, isStreaming).asInstanceOf[this.type]
+    LocalRelation(output.map(_.newInstance()), data, isStreaming, stream).asInstanceOf[this.type]
   }
 
   override protected def stringArgs: Iterator[Any] = {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
index 5563199205448..c236f7cf08e82 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/LogicalPlan.scala
@@ -433,10 +433,23 @@ object LogicalPlanIntegrity {
    * - has globally-unique attribute IDs
    * - has the same result schema as the previous plan
    * - has no dangling attribute references
+   * If `lightweight` is true, we only run the first check above.
    */
   def validateOptimizedPlan(
       previousPlan: LogicalPlan,
-      currentPlan: LogicalPlan): Option[String] = {
+      currentPlan: LogicalPlan,
+      lightweight: Boolean): Option[String] = {
+    // Lightweight validation logic. If `lightweight` is true, we only run this validation.
+    if (lightweight) {
+      val validation = if (previousPlan.resolved && !currentPlan.resolved) {
+        Some("The plan was previously resolved and now became unresolved.")
+      } else {
+        None
+      }
+      return validation
+    }
+
+    // Full validation logic.
     var validation = if (!currentPlan.resolved) {
       Some("The plan becomes unresolved: " + currentPlan.treeString + "\nThe previous plan: " +
         previousPlan.treeString)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MultiResult.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MultiResult.scala
new file mode 100644
index 0000000000000..f249e5c87eba2
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/MultiResult.scala
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
+case class MultiResult(children: Seq[LogicalPlan]) extends LogicalPlan {
+
+  override def output: Seq[Attribute] = children.lastOption.map(_.output).getOrElse(Nil)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): MultiResult = {
+    copy(children = newChildren)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
index 5769f006ccbc3..ef035eba5922c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/QueryPlanConstraints.scala
@@ -137,7 +137,7 @@ trait ConstraintHelper {
   private def scanNullIntolerantAttribute(expr: Expression): Seq[Expression] = expr match {
     case e: ExtractValue if isExtractOnly(e) => Seq(e)
     case a: Attribute => Seq(a)
-    case _: NullIntolerant => expr.children.flatMap(scanNullIntolerantAttribute)
+    case e if e.nullIntolerant => expr.children.flatMap(scanNullIntolerantAttribute)
     case _ => Seq.empty[Attribute]
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala
new file mode 100644
index 0000000000000..4faf1f5d26672
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/SqlScriptingLogicalPlans.scala
@@ -0,0 +1,297 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+
+
+/**
+ * Trait for all SQL Scripting logical operators that are product of parsing phase.
+ * These operators will be used by the SQL Scripting interpreter to generate execution nodes.
+ */
+sealed trait CompoundPlanStatement extends LogicalPlan
+
+/**
+ * Logical operator representing result of parsing a single SQL statement
+ *   that is supposed to be executed against Spark.
+ * @param parsedPlan Result of SQL statement parsing.
+ */
+case class SingleStatement(parsedPlan: LogicalPlan)
+  extends CompoundPlanStatement {
+
+  override val origin: Origin = CurrentOrigin.get
+
+  /**
+   * Get the SQL query text corresponding to this statement.
+   * @return
+   *   SQL query text.
+   */
+  def getText: String = {
+    assert(origin.sqlText.isDefined && origin.startIndex.isDefined && origin.stopIndex.isDefined)
+    origin.sqlText.get.substring(origin.startIndex.get, origin.stopIndex.get + 1)
+  }
+
+  override def output: Seq[Attribute] = parsedPlan.output
+
+  override def children: Seq[LogicalPlan] = parsedPlan.children
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan =
+    SingleStatement(parsedPlan.withNewChildren(newChildren))
+}
+
+/**
+ * Logical operator for a compound body. Contains all statements within the compound body.
+ * @param collection Collection of statements within the compound body.
+ * @param label Label set to CompoundBody by user or UUID otherwise.
+ *              It can be None in case when CompoundBody is not part of BeginEndCompoundBlock
+ *              for example when CompoundBody is inside loop or conditional block.
+ */
+case class CompoundBody(
+    collection: Seq[CompoundPlanStatement],
+    label: Option[String]) extends Command with CompoundPlanStatement {
+
+  override def children: Seq[LogicalPlan] = collection
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    CompoundBody(newChildren.map(_.asInstanceOf[CompoundPlanStatement]), label)
+  }
+}
+
+/**
+ * Logical operator for IF ELSE statement.
+ * @param conditions Collection of conditions. First condition corresponds to IF clause,
+ *                   while others (if any) correspond to following ELSE IF clauses.
+ * @param conditionalBodies Collection of bodies that have a corresponding condition,
+ *                          in IF or ELSE IF branches.
+ * @param elseBody Body that is executed if none of the conditions are met,
+ *                          i.e. ELSE branch.
+ */
+case class IfElseStatement(
+    conditions: Seq[SingleStatement],
+    conditionalBodies: Seq[CompoundBody],
+    elseBody: Option[CompoundBody]) extends CompoundPlanStatement {
+  assert(conditions.length == conditionalBodies.length)
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.concat(conditions, conditionalBodies, elseBody)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    val conditions = newChildren
+      .filter(_.isInstanceOf[SingleStatement])
+      .map(_.asInstanceOf[SingleStatement])
+    var conditionalBodies = newChildren
+      .filter(_.isInstanceOf[CompoundBody])
+      .map(_.asInstanceOf[CompoundBody])
+    var elseBody: Option[CompoundBody] = None
+
+    assert(conditions.length == conditionalBodies.length ||
+      conditions.length + 1 == conditionalBodies.length)
+
+    if (conditions.length < conditionalBodies.length) {
+      conditionalBodies = conditionalBodies.dropRight(1)
+      elseBody = Some(conditionalBodies.last)
+    }
+    IfElseStatement(conditions, conditionalBodies, elseBody)
+  }
+}
+
+/**
+ * Logical operator for while statement.
+ * @param condition Any expression evaluating to a Boolean.
+ *                 Body is executed as long as the condition evaluates to true
+ * @param body Compound body is a collection of statements that are executed if condition is true.
+ * @param label An optional label for the loop which is unique amongst all labels for statements
+ *              within which the WHILE statement is contained.
+ *              If an end label is specified it must match the beginning label.
+ *              The label can be used to LEAVE or ITERATE the loop.
+ */
+case class WhileStatement(
+    condition: SingleStatement,
+    body: CompoundBody,
+    label: Option[String]) extends CompoundPlanStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(condition, body)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    assert(newChildren.length == 2)
+    WhileStatement(
+      newChildren(0).asInstanceOf[SingleStatement],
+      newChildren(1).asInstanceOf[CompoundBody],
+      label)
+  }
+}
+
+/**
+ * Logical operator for REPEAT statement.
+ * @param condition Any expression evaluating to a Boolean.
+ *                 Body is executed as long as the condition evaluates to false
+ * @param body Compound body is a collection of statements that are executed once no matter what,
+ *             and then as long as condition is false.
+ * @param label An optional label for the loop which is unique amongst all labels for statements
+ *              within which the REPEAT statement is contained.
+ *              If an end label is specified it must match the beginning label.
+ *              The label can be used to LEAVE or ITERATE the loop.
+ */
+case class RepeatStatement(
+    condition: SingleStatement,
+    body: CompoundBody,
+    label: Option[String]) extends CompoundPlanStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(condition, body)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    assert(newChildren.length == 2)
+    RepeatStatement(
+      newChildren(0).asInstanceOf[SingleStatement],
+      newChildren(1).asInstanceOf[CompoundBody],
+      label)
+  }
+}
+
+/**
+ * Logical operator for LEAVE statement.
+ * The statement can be used both for compounds or any kind of loops.
+ * When used, the corresponding body/loop execution is skipped and the execution continues
+ *   with the next statement after the body/loop.
+ * @param label Label of the compound or loop to leave.
+ */
+case class LeaveStatement(label: String) extends CompoundPlanStatement {
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = LeaveStatement(label)
+}
+
+/**
+ * Logical operator for ITERATE statement.
+ * The statement can be used only for loops.
+ * When used, the rest of the loop is skipped and the loop execution continues
+ *   with the next iteration.
+ * @param label Label of the loop to iterate.
+ */
+case class IterateStatement(label: String) extends CompoundPlanStatement {
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.empty
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = IterateStatement(label)
+}
+
+/**
+ * Logical operator for CASE statement.
+ * @param conditions Collection of conditions which correspond to WHEN clauses.
+ * @param conditionalBodies Collection of bodies that have a corresponding condition,
+ *                          in WHEN branches.
+ * @param elseBody Body that is executed if none of the conditions are met, i.e. ELSE branch.
+ */
+case class CaseStatement(
+    conditions: Seq[SingleStatement],
+    conditionalBodies: Seq[CompoundBody],
+    elseBody: Option[CompoundBody]) extends CompoundPlanStatement {
+  assert(conditions.length == conditionalBodies.length)
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq.concat(conditions, conditionalBodies, elseBody)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    val conditions = newChildren
+      .filter(_.isInstanceOf[SingleStatement])
+      .map(_.asInstanceOf[SingleStatement])
+    var conditionalBodies = newChildren
+      .filter(_.isInstanceOf[CompoundBody])
+      .map(_.asInstanceOf[CompoundBody])
+    var elseBody: Option[CompoundBody] = None
+
+    assert(conditions.length == conditionalBodies.length ||
+      conditions.length + 1 == conditionalBodies.length)
+
+    if (conditions.length < conditionalBodies.length) {
+      conditionalBodies = conditionalBodies.dropRight(1)
+      elseBody = Some(conditionalBodies.last)
+    }
+    CaseStatement(conditions, conditionalBodies, elseBody)
+  }
+}
+
+/**
+ * Logical operator for LOOP statement.
+ * @param body Compound body is a collection of statements that are executed until the
+ *             LOOP statement is terminated by using the LEAVE statement.
+ * @param label An optional label for the loop which is unique amongst all labels for statements
+ *              within which the LOOP statement is contained.
+ *              If an end label is specified it must match the beginning label.
+ *              The label can be used to LEAVE or ITERATE the loop.
+ */
+case class LoopStatement(
+    body: CompoundBody,
+    label: Option[String]) extends CompoundPlanStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(body)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+    assert(newChildren.length == 1)
+    LoopStatement(newChildren(0).asInstanceOf[CompoundBody], label)
+  }
+}
+
+/**
+ * Logical operator for FOR statement.
+ * @param query Query which is executed once, then it's result set is iterated on, row by row.
+ * @param variableName Name of variable which is used to access the current row during iteration.
+ * @param body Compound body is a collection of statements that are executed for each row in
+ *             the result set of the query.
+ * @param label An optional label for the loop which is unique amongst all labels for statements
+ *              within which the FOR statement is contained.
+ *              If an end label is specified it must match the beginning label.
+ *              The label can be used to LEAVE or ITERATE the loop.
+ */
+case class ForStatement(
+    query: SingleStatement,
+    variableName: Option[String],
+    body: CompoundBody,
+    label: Option[String]) extends CompoundPlanStatement {
+
+  override def output: Seq[Attribute] = Seq.empty
+
+  override def children: Seq[LogicalPlan] = Seq(query, body)
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = newChildren match {
+    case IndexedSeq(query: SingleStatement, body: CompoundBody) =>
+      ForStatement(query, variableName, body, label)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StreamSourceAwareLogicalPlan.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StreamSourceAwareLogicalPlan.scala
new file mode 100644
index 0000000000000..fd73a19fbf981
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/StreamSourceAwareLogicalPlan.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.plans.logical
+
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
+
+/**
+ * This trait is a mixin for source logical nodes to represent the stream. This is required to the
+ * logical nodes which can be used in the leaf node of Source.getBatch().
+ */
+trait StreamSourceAwareLogicalPlan extends LogicalPlan {
+  /**
+   * Set the stream associated with this node.
+   * Spark will use this method to set the stream, and the implementation should copy the node with
+   * setting up the stream.
+   */
+  def withStream(stream: SparkDataStream): LogicalPlan
+
+  /** Get the stream associated with this node. */
+  def getStream: Option[SparkDataStream]
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index e784e6695dbd0..0cb04064a6178 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.plans.logical
 
-import org.apache.spark.sql.catalyst.{AliasIdentifier, SQLConfHelper}
+import org.apache.spark.sql.catalyst.{AliasIdentifier, InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.{AnsiTypeCoercion, MultiInstanceRelation, Resolver, TypeCoercion, TypeCoercionBase, UnresolvedUnaryNode}
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable.VIEW_STORING_ANALYZED_PLAN
@@ -559,12 +559,12 @@ case class Join(
 
   override def maxRows: Option[Long] = {
     joinType match {
-      case Inner | Cross | FullOuter | LeftOuter | RightOuter
+      case Inner | Cross | FullOuter | LeftOuter | RightOuter | LeftSingle
           if left.maxRows.isDefined && right.maxRows.isDefined =>
         val leftMaxRows = BigInt(left.maxRows.get)
         val rightMaxRows = BigInt(right.maxRows.get)
         val minRows = joinType match {
-          case LeftOuter => leftMaxRows
+          case LeftOuter | LeftSingle => leftMaxRows
           case RightOuter => rightMaxRows
           case FullOuter => leftMaxRows + rightMaxRows
           case _ => BigInt(0)
@@ -590,7 +590,7 @@ case class Join(
         left.output :+ j.exists
       case LeftExistence(_) =>
         left.output
-      case LeftOuter =>
+      case LeftOuter | LeftSingle =>
         left.output ++ right.output.map(_.withNullability(true))
       case RightOuter =>
         left.output.map(_.withNullability(true)) ++ right.output
@@ -627,7 +627,7 @@ case class Join(
         left.constraints.union(right.constraints)
       case LeftExistence(_) =>
         left.constraints
-      case LeftOuter =>
+      case LeftOuter | LeftSingle =>
         left.constraints
       case RightOuter =>
         right.constraints
@@ -659,7 +659,7 @@ case class Join(
     var patterns = Seq(JOIN)
     joinType match {
       case _: InnerLike => patterns = patterns :+ INNER_LIKE_JOIN
-      case LeftOuter | FullOuter | RightOuter => patterns = patterns :+ OUTER_JOIN
+      case LeftOuter | FullOuter | RightOuter | LeftSingle => patterns = patterns :+ OUTER_JOIN
       case LeftSemiOrAnti(_) => patterns = patterns :+ LEFT_SEMI_OR_ANTI_JOIN
       case NaturalJoin(_) | UsingJoin(_, _) => patterns = patterns :+ NATURAL_LIKE_JOIN
       case _ =>
@@ -929,7 +929,8 @@ trait CTEInChildren extends LogicalPlan {
 
 case class WithWindowDefinition(
     windowDefinitions: Map[String, WindowSpecDefinition],
-    child: LogicalPlan) extends UnaryNode {
+    child: LogicalPlan,
+    forPipeSQL: Boolean) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
   final override val nodePatterns: Seq[TreePattern] = Seq(WITH_WINDOW_DEFINITION)
   override protected def withNewChildInternal(newChild: LogicalPlan): WithWindowDefinition =
@@ -945,7 +946,8 @@ case class WithWindowDefinition(
 case class Sort(
     order: Seq[SortOrder],
     global: Boolean,
-    child: LogicalPlan) extends UnaryNode {
+    child: LogicalPlan,
+    hint: Option[SortHint] = None) extends UnaryNode {
   override def output: Seq[Attribute] = child.output
   override def maxRows: Option[Long] = child.maxRows
   override def maxRowsPerPartition: Option[Long] = {
@@ -992,12 +994,18 @@ object Range {
     castAndEval[Int](expression, IntegerType, paramIndex, paramName)
 }
 
+// scalastyle:off line.size.limit
 @ExpressionDescription(
   usage = """
-    _FUNC_(start: long, end: long, step: long, numSlices: integer)
-    _FUNC_(start: long, end: long, step: long)
-    _FUNC_(start: long, end: long)
-    _FUNC_(end: long)""",
+    _FUNC_(start[, end[, step[, numSlices]]]) / _FUNC_(end) - Returns a table of values within a specified range.
+  """,
+  arguments = """
+    Arguments:
+      * start - An optional BIGINT literal defaulted to 0, marking the first value generated.
+      * end - A BIGINT literal marking endpoint (exclusive) of the number generation.
+      * step - An optional BIGINT literal defaulted to 1, specifying the increment used when generating values.
+      * numParts - An optional INTEGER literal specifying how the production of rows is spread across partitions.
+  """,
   examples = """
     Examples:
       > SELECT * FROM _FUNC_(1);
@@ -1023,6 +1031,7 @@ object Range {
   """,
   since = "2.0.0",
   group = "table_funcs")
+// scalastyle:on line.size.limit
 case class Range(
     start: Long,
     end: Long,
@@ -1190,7 +1199,8 @@ case class Range(
 case class Aggregate(
     groupingExpressions: Seq[Expression],
     aggregateExpressions: Seq[NamedExpression],
-    child: LogicalPlan)
+    child: LogicalPlan,
+    hint: Option[AggregateHint] = None)
   extends UnaryNode {
 
   override lazy val resolved: Boolean = {
@@ -1257,7 +1267,8 @@ case class Window(
     windowExpressions: Seq[NamedExpression],
     partitionSpec: Seq[Expression],
     orderSpec: Seq[SortOrder],
-    child: LogicalPlan) extends UnaryNode {
+    child: LogicalPlan,
+    hint: Option[WindowHint] = None) extends UnaryNode {
   override def maxRows: Option[Long] = child.maxRows
   override def output: Seq[Attribute] =
     child.output ++ windowExpressions.map(_.toAttribute)
@@ -1440,7 +1451,7 @@ case class Offset(offsetExpr: Expression, child: LogicalPlan) extends OrderPrese
 }
 
 /**
- * A constructor for creating a pivot, which will later be converted to a [[Project]]
+ * A logical plan node for creating a pivot, which will later be converted to a [[Project]]
  * or an [[Aggregate]] during the query analysis.
  *
  * @param groupByExprsOpt A sequence of group by expressions. This field should be None if coming
@@ -1474,9 +1485,27 @@ case class Pivot(
   override protected def withNewChildInternal(newChild: LogicalPlan): Pivot = copy(child = newChild)
 }
 
+/**
+ * A logical plan node for transpose, which will later be converted to a [[LocalRelation]]
+ * at ReplaceTranspose during the query optimization.
+ *
+ * The result of the transpose operation is held in the `data` field, and the corresponding
+ * schema is stored in the `output` field. The `Transpose` node does not depend on any child
+ * logical plans after the data has been collected and transposed.
+ *
+ * @param output A sequence of output attributes representing the schema of the transposed data.
+ * @param data A sequence of [[InternalRow]] containing the transposed data.
+ */
+case class Transpose(
+    output: Seq[Attribute],
+    data: Seq[InternalRow] = Nil
+) extends LeafNode {
+  final override val nodePatterns: Seq[TreePattern] = Seq(TRANSPOSE)
+}
+
 
 /**
- * A constructor for creating an Unpivot, which will later be converted to an [[Expand]]
+ * A logical plan node for creating an Unpivot, which will later be converted to an [[Expand]]
  * during the query analysis.
  *
  * Either ids or values array must be set. The ids array can be empty,
@@ -1582,7 +1611,7 @@ case class Unpivot(
 }
 
 /**
- * A constructor for creating a logical limit, which is split into two separate logical nodes:
+ * A logical plan node for creating a logical limit, which is split into two separate logical nodes:
  * a [[LocalLimit]], which is a partition local limit, followed by a [[GlobalLimit]].
  *
  * This muds the water for clean logical/physical separation, and is done for better limit pushdown.
@@ -2072,7 +2101,7 @@ case class LateralJoin(
     joinType: JoinType,
     condition: Option[Expression]) extends UnaryNode {
 
-  override lazy val allAttributes: AttributeSeq = left.output ++ right.plan.output
+  override def allAttributes: AttributeSeq = left.output ++ right.plan.output
 
   require(Seq(Inner, LeftOuter, Cross).contains(joinType),
     s"Unsupported lateral join type $joinType")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
index ff7c79fbe89c2..c8d2be2987457 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/hints.scala
@@ -197,6 +197,12 @@ case object NO_BROADCAST_AND_REPLICATION extends JoinStrategyHint {
   override def hintAliases: Set[String] = Set.empty
 }
 
+abstract class AggregateHint;
+
+abstract class WindowHint;
+
+abstract class SortHint;
+
 /**
  * The callback for implementing customized strategies of handling hint errors.
  */
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
index 01d5a1bdea6a4..2f9bf2b52190a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/pythonLogicalOperators.scala
@@ -21,7 +21,7 @@ import org.apache.spark.resource.ResourceProfile
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet, Expression, PythonUDF, PythonUDTF}
 import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.catalyst.util.truncatedString
-import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode}
+import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode, TimeMode}
 import org.apache.spark.sql.types.StructType
 
 /**
@@ -161,6 +161,70 @@ case class FlatMapGroupsInPandasWithState(
     newChild: LogicalPlan): FlatMapGroupsInPandasWithState = copy(child = newChild)
 }
 
+/**
+ * Invokes methods defined in the stateful processor used in arbitrary state API v2. We allow the
+ * user to act on per-group set of input rows along with keyed state and the user can choose to
+ * output/return 0 or more rows. For a streaming dataframe, we will repeatedly invoke the interface
+ * methods for new rows in each trigger and the user's state/state variables will be stored
+ * persistently across invocations.
+ *
+ * Note that before invoking the function, please project the grouping attributes of input dataframe
+ * and initial state dataframe to the front of the output attributes. The attributes are not fully
+ * resolved when this function is invoked. Will return left and right attributes by taking the first
+ * `groupingAttributesLen` and `initGroupingAttrsLen` after attributes are resolved.
+ * The dedup of grouping attributes will happen in the physical operator.
+ * @param functionExpr function called on each group
+ * @param groupingAttributesLen length of the seq of grouping attributes for input dataframe.
+ * @param outputAttrs used to define the output rows
+ * @param outputMode defines the output mode for the statefulProcessor
+ * @param timeMode the time mode semantics of the stateful processor for timers and TTL.
+ * @param child logical plan of the underlying data
+ * @param initialState logical plan of initial state
+ * @param initGroupingAttrsLen length of the seq of grouping attributes for initial state dataframe
+ */
+case class TransformWithStateInPandas(
+    functionExpr: Expression,
+    groupingAttributesLen: Int,
+    outputAttrs: Seq[Attribute],
+    outputMode: OutputMode,
+    timeMode: TimeMode,
+    child: LogicalPlan,
+    hasInitialState: Boolean,
+    initialState: LogicalPlan,
+    initGroupingAttrsLen: Int,
+    initialStateSchema: StructType) extends BinaryNode {
+  override def left: LogicalPlan = child
+
+  override def right: LogicalPlan = initialState
+
+  override def output: Seq[Attribute] = outputAttrs
+
+  override def producedAttributes: AttributeSet = AttributeSet(outputAttrs)
+
+  override lazy val references: AttributeSet =
+    AttributeSet(leftAttributes ++ rightAttributes ++ functionExpr.references) -- producedAttributes
+
+  override protected def withNewChildrenInternal(
+      newLeft: LogicalPlan, newRight: LogicalPlan): TransformWithStateInPandas =
+    copy(child = newLeft, initialState = newRight)
+
+  def leftAttributes: Seq[Attribute] = {
+    assert(resolved, "This method is expected to be called after resolution.")
+    left.output.take(groupingAttributesLen)
+  }
+
+  def rightAttributes: Seq[Attribute] = {
+    assert(resolved, "This method is expected to be called after resolution.")
+    if (hasInitialState) {
+      right.output.take(initGroupingAttrsLen)
+    } else {
+      // Dummy variables for passing the distribution & ordering check
+      // in physical operators.
+      left.output.take(groupingAttributesLen)
+    }
+  }
+}
+
 /**
  * Flatmap cogroups using a udf: iter(pyarrow.RecordBatch) -> iter(pyarrow.RecordBatch)
  * This is used by DataFrame.groupby().cogroup().applyInArrow().
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
index 10646130a9106..a4672f9cd9f6b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala
@@ -191,7 +191,7 @@ case class JoinEstimation(join: Join) extends Logging {
     }
     val keyStatsAfterJoin = new mutable.HashMap[Attribute, ColumnStat]()
     var i = 0
-    while(i < keyPairs.length && joinCard != 0) {
+    while (i < keyPairs.length && joinCard != 0) {
       val (leftKey, rightKey) = keyPairs(i)
       // Check if the two sides are disjoint
       val leftKeyStat = leftStats.attributeStats(leftKey)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
index 6339a18796fa0..857522728eaff 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/v2Commands.scala
@@ -19,17 +19,22 @@ package org.apache.spark.sql.catalyst.plans.logical
 
 import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AssignmentUtils, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, UnresolvedException, ViewSchemaMode}
+import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, AssignmentUtils, EliminateSubqueryAliases, FieldName, NamedRelation, PartitionSpec, ResolvedIdentifier, ResolvedProcedure, TypeCheckResult, UnresolvedException, UnresolvedProcedure, ViewSchemaMode}
+import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch, TypeCheckSuccess}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.catalog.FunctionResource
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, AttributeSet, Expression, MetadataAttribute, NamedExpression, UnaryExpression, Unevaluable, V2ExpressionUtils}
 import org.apache.spark.sql.catalyst.plans.DescribeCommandSchema
 import org.apache.spark.sql.catalyst.trees.BinaryLike
-import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, RowDeltaUtils, WriteDeltaProjections}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, truncatedString, CharVarcharUtils, RowDeltaUtils, WriteDeltaProjections}
+import org.apache.spark.sql.catalyst.util.TypeUtils.{ordinalNumber, toSQLExpr}
 import org.apache.spark.sql.connector.catalog._
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{IdentifierHelper, MultipartIdentifierHelper}
+import org.apache.spark.sql.connector.catalog.procedures.BoundProcedure
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.write.{DeltaWrite, RowLevelOperation, RowLevelOperationTable, SupportsDelta, Write}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, MapType, MetadataBuilder, StringType, StructField, StructType}
 import org.apache.spark.util.ArrayImplicits._
@@ -454,6 +459,12 @@ trait V2CreateTableAsSelectPlan
       newQuery: LogicalPlan): V2CreateTableAsSelectPlan
 }
 
+/**
+ * A trait used for logical plan nodes that create V1 table definitions,
+ * and so that rules from the catalyst module can identify them.
+ */
+trait V1CreateTablePlan extends LogicalPlan
+
 /** A trait used for logical plan nodes that create or replace V2 table definitions. */
 trait V2CreateTablePlan extends LogicalPlan {
   def name: LogicalPlan
@@ -794,6 +805,21 @@ case class MergeIntoTable(
     copy(targetTable = newLeft, sourceTable = newRight)
 }
 
+object MergeIntoTable {
+  def getWritePrivileges(
+      matchedActions: Iterable[MergeAction],
+      notMatchedActions: Iterable[MergeAction],
+      notMatchedBySourceActions: Iterable[MergeAction]): Seq[TableWritePrivilege] = {
+    val privileges = scala.collection.mutable.HashSet.empty[TableWritePrivilege]
+    (matchedActions.iterator ++ notMatchedActions ++ notMatchedBySourceActions).foreach {
+      case _: DeleteAction => privileges.add(TableWritePrivilege.DELETE)
+      case _: UpdateAction | _: UpdateStarAction => privileges.add(TableWritePrivilege.UPDATE)
+      case _: InsertAction | _: InsertStarAction => privileges.add(TableWritePrivilege.INSERT)
+    }
+    privileges.toSeq
+  }
+}
+
 sealed abstract class MergeAction extends Expression with Unevaluable {
   def condition: Option[Expression]
   override def nullable: Boolean = false
@@ -1556,3 +1582,61 @@ case class SetVariable(
   override protected def withNewChildInternal(newChild: LogicalPlan): SetVariable =
     copy(sourceQuery = newChild)
 }
+
+/**
+ * The logical plan of the CALL statement.
+ */
+case class Call(
+    procedure: LogicalPlan,
+    args: Seq[Expression],
+    execute: Boolean = true)
+  extends UnaryNode with ExecutableDuringAnalysis {
+
+  override def output: Seq[Attribute] = Nil
+
+  override def child: LogicalPlan = procedure
+
+  def bound: Boolean = procedure match {
+    case ResolvedProcedure(_, _, _: BoundProcedure) => true
+    case _ => false
+  }
+
+  def checkArgTypes(): TypeCheckResult = {
+    require(resolved && bound, "can check arg types only after resolution and binding")
+
+    val params = procedure match {
+      case ResolvedProcedure(_, _, bound: BoundProcedure) => bound.parameters
+    }
+    require(args.length == params.length, "number of args and params must match after binding")
+
+    args.zip(params).zipWithIndex.collectFirst {
+      case ((arg, param), idx)
+          if !DataType.equalsIgnoreCompatibleNullability(arg.dataType, param.dataType) =>
+        DataTypeMismatch(
+          errorSubClass = "UNEXPECTED_INPUT_TYPE",
+          messageParameters = Map(
+            "paramIndex" -> ordinalNumber(idx),
+            "requiredType" -> toSQLType(param.dataType),
+            "inputSql" -> toSQLExpr(arg),
+            "inputType" -> toSQLType(arg.dataType)))
+    }.getOrElse(TypeCheckSuccess)
+  }
+
+  override def simpleString(maxFields: Int): String = {
+    val name = procedure match {
+      case ResolvedProcedure(catalog, ident, _) =>
+        s"${quoteIfNeeded(catalog.name)}.${ident.quoted}"
+      case UnresolvedProcedure(nameParts) =>
+        nameParts.quoted
+    }
+    val argsString = truncatedString(args, ", ", maxFields)
+    s"Call $name($argsString)"
+  }
+
+  override def stageForExplain(): Call = {
+    copy(execute = false)
+  }
+
+  override protected def withNewChildInternal(newChild: LogicalPlan): Call =
+    copy(procedure = newChild)
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
index f8e980747bf2a..30e223c3c3c87 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/physical/partitioning.scala
@@ -370,7 +370,7 @@ case class KeyGroupedPartitioning(
     expressions: Seq[Expression],
     numPartitions: Int,
     partitionValues: Seq[InternalRow] = Seq.empty,
-    originalPartitionValues: Seq[InternalRow] = Seq.empty) extends Partitioning {
+    originalPartitionValues: Seq[InternalRow] = Seq.empty) extends HashPartitioningLike {
 
   override def satisfies0(required: Distribution): Boolean = {
     super.satisfies0(required) || {
@@ -421,6 +421,9 @@ case class KeyGroupedPartitioning(
         .distinct
         .map(_.row)
   }
+
+  override protected def withNewChildrenInternal(newChildren: IndexedSeq[Expression]): Expression =
+    copy(expressions = newChildren)
 }
 
 object KeyGroupedPartitioning {
@@ -766,8 +769,8 @@ case class CoalescedHashShuffleSpec(
  *
  * @param partitioning key grouped partitioning
  * @param distribution distribution
- * @param joinKeyPosition position of join keys among cluster keys.
- *                        This is set if joining on a subset of cluster keys is allowed.
+ * @param joinKeyPositions position of join keys among cluster keys.
+ *                         This is set if joining on a subset of cluster keys is allowed.
  */
 case class KeyGroupedShuffleSpec(
     partitioning: KeyGroupedPartitioning,
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
index c8b3f224a3129..bdbf698db2e01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleExecutor.scala
@@ -22,7 +22,8 @@ import org.apache.spark.internal.{Logging, MessageWithContext}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
-import org.apache.spark.sql.catalyst.trees.TreeNode
+import org.apache.spark.sql.catalyst.rules.RuleExecutor.getForceIterationValue
+import org.apache.spark.sql.catalyst.trees.{TreeNode, TreeNodeTag}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MILLIS
 import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -30,6 +31,27 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.Utils
 
 object RuleExecutor {
+
+  /**
+   * A tag used to explicitly request an additional iteration of the current batch during
+   * rule execution, even if the query plan remains unchanged. Increment the tag's value
+   * to enforce another iteration.
+   */
+  private val FORCE_ADDITIONAL_ITERATION = TreeNodeTag[Int]("forceAdditionalIteration")
+
+  /**
+   * Increments the value of the FORCE_ADDITIONAL_ITERATION tag on the given plan to
+   * explicitly force another iteration of the current batch during rule execution.
+   */
+  def forceAdditionalIteration(plan: TreeNode[_]): Unit = {
+    val oldValue = getForceIterationValue(plan)
+    plan.setTagValue(FORCE_ADDITIONAL_ITERATION, oldValue + 1)
+  }
+
+  private def getForceIterationValue(plan: TreeNode[_]): Int = {
+    plan.getTagValue(FORCE_ADDITIONAL_ITERATION).getOrElse(0)
+  }
+
   protected val queryExecutionMeter = QueryExecutionMetering()
 
   /** Dump statistics about time spent running specific rules. */
@@ -87,13 +109,13 @@ class PlanChangeLogger[TreeType <: TreeNode[_]] extends Logging {
     }
   }
 
-  def logMetrics(metrics: QueryExecutionMetrics): Unit = {
+  def logMetrics(name: String, metrics: QueryExecutionMetrics): Unit = {
     val totalTime = metrics.time / NANOS_PER_MILLIS.toDouble
     val totalTimeEffective = metrics.timeEffective / NANOS_PER_MILLIS.toDouble
     // scalastyle:off line.size.limit
     val message: MessageWithContext =
       log"""
-         |=== Metrics of Executed Rules ===
+         |=== Metrics of Executed Rules ${MDC(RULE_EXECUTOR_NAME, name)} ===
          |Total number of runs: ${MDC(NUM_RULE_OF_RUNS, metrics.numRuns)}
          |Total time: ${MDC(TOTAL_TIME, totalTime)} ms
          |Total number of effective runs: ${MDC(NUM_EFFECTIVE_RULE_OF_RUNS, metrics.numEffectiveRuns)}
@@ -118,6 +140,12 @@ class PlanChangeLogger[TreeType <: TreeNode[_]] extends Logging {
 
 abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
 
+  /** Name for this rule executor, automatically inferred based on class name. */
+  protected def name: String = {
+    val className = getClass.getName
+    if (className endsWith "$") className.dropRight(1) else className
+  }
+
   /**
    * An execution strategy for rules that indicates the maximum number of executions. If the
    * execution reaches fix point (i.e. converge) before maxIterations, it will stop.
@@ -147,7 +175,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     override val maxIterationsSetting: String = null) extends Strategy
 
   /** A batch of rules. */
-  protected[catalyst] case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
+  protected case class Batch(name: String, strategy: Strategy, rules: Rule[TreeType]*)
 
   /** Defines a sequence of rule batches, to be overridden by the implementation. */
   protected def batches: Seq[Batch]
@@ -165,6 +193,15 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
       previousPlan: TreeType,
       currentPlan: TreeType): Option[String] = None
 
+  /**
+   * Defines a validate function that validates the plan changes after the execution of each rule,
+   * to make sure these rules make valid changes to the plan. Since this is enabled by default,
+   * this should only consist of very lightweight checks.
+   */
+  protected def validatePlanChangesLightweight(
+      previousPlan: TreeType,
+      currentPlan: TreeType): Option[String] = None
+
   /**
    * Util method for checking whether a plan remains the same if re-optimized.
    */
@@ -198,9 +235,10 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
     val tracker: Option[QueryPlanningTracker] = QueryPlanningTracker.get
     val beforeMetrics = RuleExecutor.getCurrentMetrics()
 
-    val enableValidation = SQLConf.get.getConf(SQLConf.PLAN_CHANGE_VALIDATION)
+    val fullValidation = SQLConf.get.getConf(SQLConf.PLAN_CHANGE_VALIDATION)
+    lazy val lightweightValidation = SQLConf.get.getConf(SQLConf.LIGHTWEIGHT_PLAN_CHANGE_VALIDATION)
     // Validate the initial input.
-    if (Utils.isTesting || enableValidation) {
+    if (fullValidation) {
       validatePlanChanges(plan, plan) match {
         case Some(msg) =>
           val ruleExecutorName = this.getClass.getName.stripSuffix("$")
@@ -218,7 +256,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
       var lastPlan = curPlan
       var continue = true
 
-      // Run until fix point (or the max number of iterations as specified in the strategy.
+      // Run until fix point or the max number of iterations as specified in the strategy.
       while (continue) {
         curPlan = batch.rules.foldLeft(curPlan) {
           case (plan, rule) =>
@@ -232,8 +270,14 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
               queryExecutionMetrics.incTimeEffectiveExecutionBy(rule.ruleName, runTime)
               planChangeLogger.logRule(rule.ruleName, plan, result)
               // Run the plan changes validation after each rule.
-              if (Utils.isTesting || enableValidation) {
-                validatePlanChanges(plan, result) match {
+              if (fullValidation || lightweightValidation) {
+                // Only run the lightweight version of validation if full validation is disabled.
+                val validationResult = if (fullValidation) {
+                  validatePlanChanges(plan, result)
+                } else {
+                  validatePlanChangesLightweight(plan, result)
+                }
+                validationResult match {
                   case Some(msg) =>
                     throw new SparkException(
                       errorClass = "PLAN_VALIDATION_FAILED_RULE_IN_BATCH",
@@ -281,7 +325,7 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
           continue = false
         }
 
-        if (curPlan.fastEquals(lastPlan)) {
+        if (isFixedPointReached(lastPlan, curPlan)) {
           logTrace(
             s"Fixed point reached for batch ${batch.name} after ${iteration - 1} iterations.")
           continue = false
@@ -291,8 +335,13 @@ abstract class RuleExecutor[TreeType <: TreeNode[_]] extends Logging {
 
       planChangeLogger.logBatch(batch.name, batchStartPlan, curPlan)
     }
-    planChangeLogger.logMetrics(RuleExecutor.getCurrentMetrics() - beforeMetrics)
+    planChangeLogger.logMetrics(name, RuleExecutor.getCurrentMetrics() - beforeMetrics)
 
     curPlan
   }
+
+  private def isFixedPointReached(oldPlan: TreeType, newPlan: TreeType): Boolean = {
+    oldPlan.fastEquals(newPlan) &&
+      getForceIterationValue(newPlan) <= getForceIterationValue(oldPlan)
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
index d36ce37406063..5ae2ca0d532b7 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/rules/RuleIdCollection.scala
@@ -54,6 +54,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveDeserializer" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveEncodersInUDF" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveFunctions" ::
+      "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveProcedures" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGenerate" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveGroupingAnalytics" ::
       "org.apache.spark.sql.catalyst.analysis.ResolveHigherOrderFunctions" ::
@@ -70,6 +71,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveSubqueryColumnAliases" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTables" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTempViews" ::
+      "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveTranspose" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUnpivot" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUpCast" ::
       "org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveUserSpecifiedColumns" ::
@@ -127,7 +129,6 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.EliminateSerialization" ::
       "org.apache.spark.sql.catalyst.optimizer.EliminateWindowPartitions" ::
       "org.apache.spark.sql.catalyst.optimizer.InferWindowGroupLimit" ::
-      "org.apache.spark.sql.catalyst.optimizer.InsertMapSortInGroupingExpressions" ::
       "org.apache.spark.sql.catalyst.optimizer.LikeSimplification" ::
       "org.apache.spark.sql.catalyst.optimizer.LimitPushDown" ::
       "org.apache.spark.sql.catalyst.optimizer.LimitPushDownThroughWindow" ::
@@ -175,7 +176,7 @@ object RuleIdCollection {
       "org.apache.spark.sql.catalyst.optimizer.UnwrapCastInBinaryComparison" ::  Nil
   }
 
-  if(Utils.isTesting) {
+  if (Utils.isTesting) {
     rulesNeedingIds = rulesNeedingIds ++ {
       // In the production code path, the following rules are run in CombinedTypeCoercionRule, and
       // hence we only need to add them for unit testing.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
index 6683f2dbfb392..4b8556b1bb5de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreeNode.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.trees
 
-import java.util.UUID
+import java.util.{IdentityHashMap, UUID}
 
 import scala.annotation.nowarn
 import scala.collection.{mutable, Map}
@@ -841,7 +841,13 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
    */
   protected def stringArgs: Iterator[Any] = productIterator
 
-  private lazy val allChildren: Set[TreeNode[_]] = (children ++ innerChildren).toSet[TreeNode[_]]
+  private lazy val allChildren: IdentityHashMap[TreeNode[_], Any] = {
+    val set = new IdentityHashMap[TreeNode[_], Any]()
+    (children ++ innerChildren).foreach {
+      set.put(_, null)
+    }
+    set
+  }
 
   private def redactMapString[K, V](map: Map[K, V], maxFields: Int): List[String] = {
     // For security reason, redact the map value if the key is in certain patterns
@@ -868,11 +874,11 @@ abstract class TreeNode[BaseType <: TreeNode[BaseType]]
 
   /** Returns a string representing the arguments to this node, minus any children */
   def argString(maxFields: Int): String = stringArgs.flatMap {
-    case tn: TreeNode[_] if allChildren.contains(tn) => Nil
-    case Some(tn: TreeNode[_]) if allChildren.contains(tn) => Nil
+    case tn: TreeNode[_] if allChildren.containsKey(tn) => Nil
+    case Some(tn: TreeNode[_]) if allChildren.containsKey(tn) => Nil
     case Some(tn: TreeNode[_]) => tn.simpleString(maxFields) :: Nil
     case tn: TreeNode[_] => tn.simpleString(maxFields) :: Nil
-    case seq: Seq[Any] if seq.toSet.subsetOf(allChildren.asInstanceOf[Set[Any]]) => Nil
+    case seq: Seq[Any] if seq.forall(allChildren.containsKey) => Nil
     case iter: Iterable[_] if iter.isEmpty => Nil
     case array: Array[_] if array.isEmpty => Nil
     case xs @ (_: Seq[_] | _: Set[_] | _: Array[_]) =>
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
index 6258bd615b440..7435f4c527034 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/trees/TreePatterns.scala
@@ -63,6 +63,7 @@ object TreePattern extends Enumeration  {
   val LAMBDA_VARIABLE: Value = Value
   val LATERAL_COLUMN_ALIAS_REFERENCE: Value = Value
   val LATERAL_SUBQUERY: Value = Value
+  val LAZY_EXPRESSION: Value = Value
   val LIKE_FAMLIY: Value = Value
   val LIST_SUBQUERY: Value = Value
   val LITERAL: Value = Value
@@ -72,6 +73,7 @@ object TreePattern extends Enumeration  {
   val NOT: Value = Value
   val NULL_CHECK: Value = Value
   val NULL_LITERAL: Value = Value
+  val PIPE_OPERATOR_SELECT: Value = Value
   val SERIALIZE_FROM_OBJECT: Value = Value
   val OR: Value = Value
   val OUTER_REFERENCE: Value = Value
@@ -92,6 +94,7 @@ object TreePattern extends Enumeration  {
   val SUM: Value = Value
   val TIME_WINDOW: Value = Value
   val TIME_ZONE_AWARE_EXPRESSION: Value = Value
+  val TRANSPOSE: Value = Value
   val TRUE_OR_FALSE_LITERAL: Value = Value
   val VARIANT_GET: Value = Value
   val WINDOW_EXPRESSION: Value = Value
@@ -155,8 +158,10 @@ object TreePattern extends Enumeration  {
 
   // Unresolved Plan patterns (Alphabetically ordered)
   val UNRESOLVED_FUNC: Value = Value
+  val UNRESOLVED_PROCEDURE: Value = Value
   val UNRESOLVED_SUBQUERY_COLUMN_ALIAS: Value = Value
   val UNRESOLVED_TABLE_VALUED_FUNCTION: Value = Value
+  val UNRESOLVED_TRANSPOSE: Value = Value
   val UNRESOLVED_TVF_ALIASES: Value = Value
 
   // Execution expression patterns (alphabetically ordered)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
index c43b81915a700..03389f14afa01 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/PhysicalDataType.scala
@@ -21,7 +21,7 @@ import scala.reflect.runtime.universe.TypeTag
 import scala.reflect.runtime.universe.typeTag
 
 import org.apache.spark.sql.catalyst.expressions.{Ascending, BoundReference, InterpretedOrdering, SortOrder}
-import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, SQLOrderingUtil}
+import org.apache.spark.sql.catalyst.util.{ArrayData, CollationFactory, MapData, SQLOrderingUtil}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.internal.SqlApiConf
 import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, ByteExactNumeric, ByteType, CalendarIntervalType, CharType, DataType, DateType, DayTimeIntervalType, Decimal, DecimalExactNumeric, DecimalType, DoubleExactNumeric, DoubleType, FloatExactNumeric, FloatType, FractionalType, IntegerExactNumeric, IntegerType, IntegralType, LongExactNumeric, LongType, MapType, NullType, NumericType, ShortExactNumeric, ShortType, StringType, StructField, StructType, TimestampNTZType, TimestampType, VarcharType, VariantType, YearMonthIntervalType}
@@ -234,16 +234,77 @@ case object PhysicalLongType extends PhysicalLongType
 
 case class PhysicalMapType(keyType: DataType, valueType: DataType, valueContainsNull: Boolean)
     extends PhysicalDataType {
-  override private[sql] def ordering =
-    throw QueryExecutionErrors.orderedOperationUnsupportedByDataTypeError("PhysicalMapType")
-  override private[sql] type InternalType = Any
+  // maps are not orderable, we use `ordering` just to support group by queries
+  override private[sql] def ordering = interpretedOrdering
+  override private[sql] type InternalType = MapData
   @transient private[sql] lazy val tag = typeTag[InternalType]
+
+  @transient
+  private[sql] lazy val interpretedOrdering: Ordering[MapData] = new Ordering[MapData] {
+    private[this] val keyOrdering =
+      PhysicalDataType(keyType).ordering.asInstanceOf[Ordering[Any]]
+    private[this] val valuesOrdering =
+      PhysicalDataType(valueType).ordering.asInstanceOf[Ordering[Any]]
+
+    override def compare(left: MapData, right: MapData): Int = {
+      val lengthLeft = left.numElements()
+      val lengthRight = right.numElements()
+      val keyArrayLeft = left.keyArray()
+      val valueArrayLeft = left.valueArray()
+      val keyArrayRight = right.keyArray()
+      val valueArrayRight = right.valueArray()
+      val minLength = math.min(lengthLeft, lengthRight)
+      var i = 0
+      while (i < minLength) {
+        var comp = compareElements(keyArrayLeft, keyArrayRight, keyType, i, keyOrdering)
+        if (comp != 0) {
+          return comp
+        }
+        comp = compareElements(valueArrayLeft, valueArrayRight, valueType, i, valuesOrdering)
+        if (comp != 0) {
+          return comp
+        }
+
+        i += 1
+      }
+
+      if (lengthLeft < lengthRight) {
+        -1
+      } else if (lengthLeft > lengthRight) {
+        1
+      } else {
+        0
+      }
+    }
+
+    private def compareElements(
+        arrayLeft: ArrayData,
+        arrayRight: ArrayData,
+        dataType: DataType,
+        position: Int,
+        ordering: Ordering[Any]): Int = {
+      val isNullLeft = arrayLeft.isNullAt(position)
+      val isNullRight = arrayRight.isNullAt(position)
+
+      if (isNullLeft && isNullRight) {
+        0
+      } else if (isNullLeft) {
+        -1
+      } else if (isNullRight) {
+        1
+      } else {
+        ordering.compare(
+          arrayLeft.get(position, dataType),
+          arrayRight.get(position, dataType)
+        )
+      }
+    }
+  }
 }
 
 class PhysicalNullType() extends PhysicalDataType with PhysicalPrimitiveType {
   override private[sql] def ordering =
-    throw QueryExecutionErrors.orderedOperationUnsupportedByDataTypeError(
-      "PhysicalNullType")
+    implicitly[Ordering[Unit]].asInstanceOf[Ordering[Any]]
   override private[sql] type InternalType = Any
   @transient private[sql] lazy val tag = typeTag[InternalType]
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala
index 3dbf5970aae77..f805d2ed87b52 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/CharsetProvider.scala
@@ -18,20 +18,24 @@
  import java.nio.charset.{Charset, CharsetDecoder, CharsetEncoder, CodingErrorAction, IllegalCharsetNameException, UnsupportedCharsetException}
  import java.util.Locale
 
+ import scala.collection.SortedSet
+
  import org.apache.spark.sql.errors.QueryExecutionErrors
+ import org.apache.spark.sql.internal.SQLConf
 
 private[sql] object CharsetProvider {
 
   final lazy val VALID_CHARSETS =
-    Set("US-ASCII", "ISO-8859-1", "UTF-8", "UTF-16BE", "UTF-16LE", "UTF-16", "UTF-32")
+    SortedSet("us-ascii", "iso-8859-1", "utf-8", "utf-16be", "utf-16le", "utf-16", "utf-32")
 
   def forName(
       charset: String,
-      legacyCharsets: Boolean,
+      legacyCharsets: Boolean = SQLConf.get.legacyJavaCharsets,
       caller: String = ""): Charset = {
-    if (legacyCharsets || VALID_CHARSETS.contains(charset.toUpperCase(Locale.ROOT))) {
+    val lowercasedCharset = charset.toLowerCase(Locale.ROOT)
+    if (legacyCharsets || VALID_CHARSETS.contains(lowercasedCharset)) {
       try {
-        Charset.forName(charset)
+        Charset.forName(lowercasedCharset)
       } catch {
         case _: IllegalCharsetNameException |
              _: UnsupportedCharsetException |
@@ -60,8 +64,8 @@ private[sql] object CharsetProvider {
   }
 
   def newDecoder(charset: String,
-      legacyCharsets: Boolean,
-      legacyErrorAction: Boolean,
+      legacyCharsets: Boolean = SQLConf.get.legacyJavaCharsets,
+      legacyErrorAction: Boolean = SQLConf.get.legacyCodingErrorAction,
       caller: String = "decode"): CharsetDecoder = {
     val codingErrorAction = if (legacyErrorAction) {
       CodingErrorAction.REPLACE
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
index f1c36f2f5c28f..c9ca3ed864c16 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/DateTimeUtils.scala
@@ -304,8 +304,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
      start: Int,
      interval: CalendarInterval): Int = {
     if (interval.microseconds != 0) {
-      throw QueryExecutionErrors.ansiIllegalArgumentError(
-        "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date")
+      throw QueryExecutionErrors.invalidIntervalWithMicrosecondsAdditionError()
     }
     val ld = daysToLocalDate(start).plusMonths(interval.months).plusDays(interval.days)
     localDateToDays(ld)
@@ -389,7 +388,7 @@ object DateTimeUtils extends SparkDateTimeUtils {
       case "SA" | "SAT" | "SATURDAY" => SATURDAY
       case _ =>
         throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3209",
+          errorClass = "ILLEGAL_DAY_OF_WEEK",
           messageParameters = Map("string" -> string.toString))
     }
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/EvaluateUnresolvedInlineTable.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/EvaluateUnresolvedInlineTable.scala
new file mode 100644
index 0000000000000..51cab6bff3b03
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/EvaluateUnresolvedInlineTable.scala
@@ -0,0 +1,131 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst
+
+import org.apache.spark.sql.catalyst.analysis._
+import org.apache.spark.sql.catalyst.expressions.{AliasHelper, EvalHelper, Expression}
+import org.apache.spark.sql.catalyst.optimizer.EvalInlineTables
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.catalyst.util.TypeUtils.{toSQLExpr, toSQLId}
+import org.apache.spark.sql.types.{StructField, StructType}
+
+/**
+ * Utility object used to replace [[UnresolvedInlineTable]] with [[ResolvedInlineTable]] or
+ * (whenever possible) with [[LocalRelation]]. Typically, [[UnresolvedInlineTable]] is
+ * a child of [[InsertIntoStatement]].
+ * Use the method: [[EvaluateUnresolvedInlineTable.evaluate]] as the entry point for this
+ * transformation.
+ */
+object EvaluateUnresolvedInlineTable extends SQLConfHelper
+  with AliasHelper with EvalHelper with CastSupport {
+
+  def evaluate(plan: UnresolvedInlineTable): LogicalPlan =
+    if (plan.expressionsResolved) evaluateUnresolvedInlineTable(plan) else plan
+
+  def evaluateUnresolvedInlineTable(table: UnresolvedInlineTable): LogicalPlan = {
+    validateInputDimension(table)
+    validateInputEvaluable(table)
+    val resolvedTable = findCommonTypesAndCast(table)
+    earlyEvalIfPossible(resolvedTable)
+  }
+
+  /**
+   * This function attempts to early evaluate rows in inline table.
+   * If evaluation doesn't rely on non-deterministic expressions (e.g. current_like)
+   * expressions will be evaluated and inlined as [[LocalRelation]]
+   * This is package visible for unit testing.
+   */
+  private def earlyEvalIfPossible(table: ResolvedInlineTable): LogicalPlan = {
+    val earlyEvalPossible = table.rows.flatten.forall(!_.containsPattern(CURRENT_LIKE))
+    if (earlyEvalPossible) EvalInlineTables.eval(table) else table
+  }
+
+  /**
+   * Validates the input data dimension:
+   * 1. All rows have the same cardinality.
+   * 2. The number of column aliases defined is consistent with the number of columns in data.
+   *
+   * This is package visible for unit testing.
+   */
+  def validateInputDimension(table: UnresolvedInlineTable): Unit = {
+    if (table.rows.nonEmpty) {
+      val numCols = table.names.size
+      table.rows.zipWithIndex.foreach { case (row, rowIndex) =>
+        if (row.size != numCols) {
+          table.failAnalysis(
+            errorClass = "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
+            messageParameters = Map(
+              "expectedNumCols" -> numCols.toString,
+              "actualNumCols" -> row.size.toString,
+              "rowIndex" -> rowIndex.toString))
+        }
+      }
+    }
+  }
+
+  /**
+   * Validates that all inline table data are valid expressions that can be evaluated
+   * (in this they must be foldable).
+   * Note that nondeterministic expressions are not supported since they are not foldable.
+   * Exception are CURRENT_LIKE expressions, which are replaced by a literal in later stages.
+   * This is package visible for unit testing.
+   */
+  def validateInputEvaluable(table: UnresolvedInlineTable): Unit = {
+    table.rows.foreach { row =>
+      row.foreach { e =>
+        if (e.containsPattern(CURRENT_LIKE)) {
+          // Do nothing.
+        } else if (!e.resolved || !trimAliases(prepareForEval(e)).foldable) {
+          e.failAnalysis(
+            errorClass = "INVALID_INLINE_TABLE.CANNOT_EVALUATE_EXPRESSION_IN_INLINE_TABLE",
+            messageParameters = Map("expr" -> toSQLExpr(e)))
+        }
+      }
+    }
+  }
+
+  /**
+   * This function attempts to coerce inputs into consistent types.
+   *
+   * This is package visible for unit testing.
+   */
+  def findCommonTypesAndCast(table: UnresolvedInlineTable): ResolvedInlineTable = {
+    // For each column, traverse all the values and find a common data type and nullability.
+    val (fields, columns) = table.rows.transpose.zip(table.names).map { case (column, name) =>
+      val inputTypes = column.map(_.dataType)
+      val tpe = TypeCoercion.findWiderTypeWithoutStringPromotion(inputTypes).getOrElse {
+        table.failAnalysis(
+          errorClass = "INVALID_INLINE_TABLE.INCOMPATIBLE_TYPES_IN_INLINE_TABLE",
+          messageParameters = Map("colName" -> toSQLId(name)))
+      }
+      val newColumn = column.map {
+        case expr if DataTypeUtils.sameType(expr.dataType, tpe) =>
+          expr
+        case expr =>
+          cast(expr, tpe)
+      }
+      (StructField(name, tpe, nullable = column.exists(_.nullable)), newColumn)
+    }.unzip
+    assert(fields.size == table.names.size)
+    val attributes = DataTypeUtils.toAttributes(StructType(fields))
+    val castedRows: Seq[Seq[Expression]] = columns.transpose
+
+    ResolvedInlineTable(castedRows, attributes)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
index 46f14876be363..8d88b05546ed2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/GeneratedColumn.scala
@@ -127,7 +127,7 @@ object GeneratedColumn {
     } catch {
       case ex: AnalysisException =>
         // Improve error message if possible
-        if (ex.getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION") {
+        if (ex.getCondition == "UNRESOLVED_COLUMN.WITH_SUGGESTION") {
           ex.messageParameters.get("objectName").foreach { unresolvedCol =>
             val resolver = SQLConf.get.resolver
             // Whether `col` = `unresolvedCol` taking into account case-sensitivity
@@ -144,7 +144,7 @@ object GeneratedColumn {
             }
           }
         }
-        if (ex.getErrorClass == "UNRESOLVED_ROUTINE") {
+        if (ex.getCondition == "UNRESOLVED_ROUTINE") {
           // Cannot resolve function using built-in catalog
           ex.messageParameters.get("routineName").foreach { fnName =>
             throw unsupportedExpressionError(s"failed to resolve $fnName to a built-in function")
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
index 6471a746f2edf..fc947386487a1 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/HyperLogLogPlusPlusHelper.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.XxHash64Function
 import org.apache.spark.sql.catalyst.optimizer.NormalizeFloatingNumbers.{DOUBLE_NORMALIZER, FLOAT_NORMALIZER}
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 // A helper class for HyperLogLogPlusPlus.
 class HyperLogLogPlusPlusHelper(relativeSD: Double) extends Serializable {
@@ -93,6 +94,8 @@ class HyperLogLogPlusPlusHelper(relativeSD: Double) extends Serializable {
     val value = dataType match {
       case FloatType => FLOAT_NORMALIZER.apply(_value)
       case DoubleType => DOUBLE_NORMALIZER.apply(_value)
+      case st: StringType if !st.supportsBinaryEquality =>
+        CollationFactory.getCollationKeyBytes(_value.asInstanceOf[UTF8String], st.collationId)
       case _ => _value
     }
     // Create the hashed value 'x'.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IdentityColumn.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IdentityColumn.scala
new file mode 100644
index 0000000000000..07ab8731de891
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IdentityColumn.scala
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import org.apache.spark.sql.connector.catalog.{Identifier, IdentityColumnSpec, TableCatalog, TableCatalogCapability}
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types.{StructField, StructType}
+
+/**
+ * This object contains utility methods and values for Identity Columns
+ */
+object IdentityColumn {
+  val IDENTITY_INFO_START = "identity.start"
+  val IDENTITY_INFO_STEP = "identity.step"
+  val IDENTITY_INFO_ALLOW_EXPLICIT_INSERT = "identity.allowExplicitInsert"
+
+  /**
+   * If `schema` contains any generated columns, check whether the table catalog supports identity
+   * columns. Otherwise throw an error.
+   */
+  def validateIdentityColumn(
+      schema: StructType,
+      catalog: TableCatalog,
+      ident: Identifier): Unit = {
+    if (hasIdentityColumns(schema)) {
+      if (!catalog
+          .capabilities()
+          .contains(TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS)) {
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          catalog, ident, operation = "identity column"
+        )
+      }
+    }
+  }
+
+  /**
+   * Whether the given `field` is an identity column
+   */
+  def isIdentityColumn(field: StructField): Boolean = {
+    field.metadata.contains(IDENTITY_INFO_START)
+  }
+
+  /**
+   * Returns the identity information stored in the column metadata if it exists
+   */
+  def getIdentityInfo(field: StructField): Option[IdentityColumnSpec] = {
+    if (isIdentityColumn(field)) {
+      Some(new IdentityColumnSpec(
+        field.metadata.getLong(IDENTITY_INFO_START),
+        field.metadata.getLong(IDENTITY_INFO_STEP),
+        field.metadata.getBoolean(IDENTITY_INFO_ALLOW_EXPLICIT_INSERT)))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Whether the `schema` has one or more identity columns
+   */
+  def hasIdentityColumns(schema: StructType): Boolean = {
+    schema.exists(isIdentityColumn)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
index 90e3bdcd082cd..d2bdad2d880de 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapper.scala
@@ -21,7 +21,6 @@ import scala.collection.mutable
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, Murmur3HashFunction, RowOrdering}
-import org.apache.spark.sql.catalyst.plans.physical.KeyGroupedPartitioning
 import org.apache.spark.sql.connector.read.{HasPartitionKey, InputPartition}
 import org.apache.spark.sql.types.{DataType, StructField, StructType}
 import org.apache.spark.util.NonFateSharingCache
@@ -85,22 +84,25 @@ object InternalRowComparableWrapper {
   }
 
   def mergePartitions(
-      leftPartitioning: KeyGroupedPartitioning,
-      rightPartitioning: KeyGroupedPartitioning,
-      partitionExpression: Seq[Expression]): Seq[InternalRow] = {
+      leftPartitioning: Seq[InternalRow],
+      rightPartitioning: Seq[InternalRow],
+      partitionExpression: Seq[Expression],
+      intersect: Boolean = false): Seq[InternalRowComparableWrapper] = {
     val partitionDataTypes = partitionExpression.map(_.dataType)
-    val partitionsSet = new mutable.HashSet[InternalRowComparableWrapper]
-    leftPartitioning.partitionValues
+    val leftPartitionSet = new mutable.HashSet[InternalRowComparableWrapper]
+    leftPartitioning
       .map(new InternalRowComparableWrapper(_, partitionDataTypes))
-      .foreach(partition => partitionsSet.add(partition))
-    rightPartitioning.partitionValues
+      .foreach(partition => leftPartitionSet.add(partition))
+    val rightPartitionSet = new mutable.HashSet[InternalRowComparableWrapper]
+    rightPartitioning
       .map(new InternalRowComparableWrapper(_, partitionDataTypes))
-      .foreach(partition => partitionsSet.add(partition))
-    // SPARK-41471: We keep to order of partitions to make sure the order of
-    // partitions is deterministic in different case.
-    val partitionOrdering: Ordering[InternalRow] = {
-      RowOrdering.createNaturalAscendingOrdering(partitionDataTypes)
+      .foreach(partition => rightPartitionSet.add(partition))
+
+    val result = if (intersect) {
+      leftPartitionSet.intersect(rightPartitionSet)
+    } else {
+      leftPartitionSet.union(rightPartitionSet)
     }
-    partitionsSet.map(_.row).toSeq.sorted(partitionOrdering)
+    result.toSeq
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala
index c935c60573763..756f2598f13f9 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalMathUtils.scala
@@ -35,12 +35,15 @@ object IntervalMathUtils {
 
   def negateExact(a: Long): Long = withOverflow(Math.negateExact(a))
 
-  private def withOverflow[A](f: => A, hint: String = ""): A = {
+  private def withOverflow[A](f: => A, suggestedFunc: String = ""): A = {
     try {
       f
     } catch {
-      case e: ArithmeticException =>
-        throw QueryExecutionErrors.intervalArithmeticOverflowError(e.getMessage, hint, null)
+      case _: ArithmeticException if suggestedFunc.isEmpty =>
+        throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(context = null)
+      case _: ArithmeticException =>
+        throw QueryExecutionErrors.withSuggestionIntervalArithmeticOverflowError(
+          suggestedFunc, context = null)
     }
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
index 3a7c7b0904ddf..8793c0407a9b5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/IntervalUtils.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.{SparkIllegalArgumentException, SparkThrowable}
+import org.apache.spark.{QueryContext, SparkIllegalArgumentException, SparkThrowable}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
@@ -105,16 +105,18 @@ object IntervalUtils extends SparkIntervalUtils {
       endField: Byte,
       intervalStr: String,
       typeName: String,
-      fallBackNotice: Option[String] = None) = {
+      fallBackNotice: Boolean = false) = {
     throw new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_3214",
+      errorClass = {
+        if (fallBackNotice) "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE"
+        else "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING"
+      },
       messageParameters = Map(
         "intervalStr" -> intervalStr,
         "supportedFormat" -> supportedFormat((intervalStr, startFiled, endField))
           .map(format => s"`$format`").mkString(", "),
         "typeName" -> typeName,
-        "input" -> input.toString,
-        "fallBackNotice" -> fallBackNotice.map(s => s", $s").getOrElse("")))
+        "input" -> input.toString))
   }
 
   val supportedFormat = Map(
@@ -145,14 +147,15 @@ object IntervalUtils extends SparkIntervalUtils {
     def checkTargetType(targetStartField: Byte, targetEndField: Byte): Boolean =
       startField == targetStartField && endField == targetEndField
 
-    input.trimAll().toString match {
+    val trimmedInput = input.trimAll().toString
+    trimmedInput match {
       case yearMonthRegex(sign, year, month) if checkTargetType(YM.YEAR, YM.MONTH) =>
-        toYMInterval(year, month, finalSign(sign))
+        toYMInterval(year, month, trimmedInput, finalSign(sign))
       case yearMonthLiteralRegex(firstSign, secondSign, year, month)
         if checkTargetType(YM.YEAR, YM.MONTH) =>
-        toYMInterval(year, month, finalSign(firstSign, secondSign))
+        toYMInterval(year, month, trimmedInput, finalSign(firstSign, secondSign))
       case yearMonthIndividualRegex(firstSign, value) =>
-        safeToInterval("year-month") {
+        safeToInterval("year-month", trimmedInput) {
           val sign = finalSign(firstSign)
           if (endField == YM.YEAR) {
             sign * Math.toIntExact(value.toLong * MONTHS_PER_YEAR)
@@ -164,7 +167,7 @@ object IntervalUtils extends SparkIntervalUtils {
           }
         }
       case yearMonthIndividualLiteralRegex(firstSign, secondSign, value, unit) =>
-        safeToInterval("year-month") {
+        safeToInterval("year-month", trimmedInput) {
           val sign = finalSign(firstSign, secondSign)
           unit.toUpperCase(Locale.ROOT) match {
             case "YEAR" if checkTargetType(YM.YEAR, YM.YEAR) =>
@@ -202,21 +205,21 @@ object IntervalUtils extends SparkIntervalUtils {
     new CalendarInterval(months, 0, 0)
   }
 
-  private def safeToInterval[T](interval: String)(f: => T): T = {
+  private def safeToInterval[T](interval: String, input: String)(f: => T): T = {
     try {
       f
     } catch {
       case e: SparkThrowable => throw e
       case NonFatal(e) =>
         throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3213",
-          messageParameters = Map("interval" -> interval, "msg" -> e.getMessage),
+          errorClass = "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+          messageParameters = Map("input" -> input, "interval" -> interval),
           cause = e)
     }
   }
 
-  private def toYMInterval(year: String, month: String, sign: Int): Int = {
-    safeToInterval("year-month") {
+  private def toYMInterval(year: String, month: String, input: String, sign: Int): Int = {
+    safeToInterval("year-month", input) {
       val years = toLongWithRange(yearStr, year, 0, Integer.MAX_VALUE / MONTHS_PER_YEAR)
       val totalMonths =
         sign * (years * MONTHS_PER_YEAR + toLongWithRange(monthStr, month, 0, 11))
@@ -285,7 +288,8 @@ object IntervalUtils extends SparkIntervalUtils {
     def checkTargetType(targetStartField: Byte, targetEndField: Byte): Boolean =
       startField == targetStartField && endField == targetEndField
 
-    input.trimAll().toString match {
+    val trimmedInput = input.trimAll().toString
+    trimmedInput match {
       case dayHourRegex(sign, day, hour) if checkTargetType(DT.DAY, DT.HOUR) =>
         toDTInterval(day, hour, "0", "0", finalSign(sign))
       case dayHourLiteralRegex(firstSign, secondSign, day, hour)
@@ -324,7 +328,7 @@ object IntervalUtils extends SparkIntervalUtils {
         toDTInterval(minute, secondAndMicro(second, micro), finalSign(firstSign, secondSign))
 
       case dayTimeIndividualRegex(firstSign, value, suffix) =>
-        safeToInterval("day-time") {
+        safeToInterval("day-time", trimmedInput) {
           val sign = finalSign(firstSign)
           (startField, endField) match {
             case (DT.DAY, DT.DAY) if suffix == null && value.length <= 9 =>
@@ -339,11 +343,11 @@ object IntervalUtils extends SparkIntervalUtils {
                 case -1 => parseSecondNano(s"-${secondAndMicro(value, suffix)}")
               }
             case (_, _) => throwIllegalIntervalFormatException(input, startField, endField,
-              "day-time", DT(startField, endField).typeName, Some(fallbackNotice))
+              "day-time", DT(startField, endField).typeName, true)
           }
         }
       case dayTimeIndividualLiteralRegex(firstSign, secondSign, value, suffix, unit) =>
-        safeToInterval("day-time") {
+        safeToInterval("day-time", trimmedInput) {
           val sign = finalSign(firstSign, secondSign)
           unit.toUpperCase(Locale.ROOT) match {
             case "DAY" if suffix == null && value.length <= 9 && checkTargetType(DT.DAY, DT.DAY) =>
@@ -360,11 +364,11 @@ object IntervalUtils extends SparkIntervalUtils {
                 case -1 => parseSecondNano(s"-${secondAndMicro(value, suffix)}")
               }
             case _ => throwIllegalIntervalFormatException(input, startField, endField,
-              "day-time", DT(startField, endField).typeName, Some(fallbackNotice))
+              "day-time", DT(startField, endField).typeName, true)
           }
         }
       case _ => throwIllegalIntervalFormatException(input, startField, endField,
-        "day-time", DT(startField, endField).typeName, Some(fallbackNotice))
+        "day-time", DT(startField, endField).typeName, true)
     }
   }
 
@@ -512,7 +516,7 @@ object IntervalUtils extends SparkIntervalUtils {
         case DT.SECOND =>
           // No-op
         case _ => throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3212",
+          errorClass = "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
           messageParameters = Map(
             "input" -> input,
             "from" -> DT.fieldToString(from),
@@ -524,10 +528,14 @@ object IntervalUtils extends SparkIntervalUtils {
       micros = Math.addExact(micros, Math.multiplyExact(seconds, MICROS_PER_SECOND))
       new CalendarInterval(0, sign * days, sign * micros)
     } catch {
+      // Bypass SparkIllegalArgumentExceptions
+      case se: SparkIllegalArgumentException => throw se
       case e: Exception =>
         throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3211",
-          messageParameters = Map("msg" -> e.getMessage),
+          errorClass = "INVALID_INTERVAL_FORMAT.DAY_TIME_PARSING",
+          messageParameters = Map(
+            "msg" -> e.getMessage,
+            "input" -> input),
           cause = e)
     }
   }
@@ -564,7 +572,9 @@ object IntervalUtils extends SparkIntervalUtils {
       case Array(secondsStr, nanosStr) =>
         val seconds = parseSeconds(secondsStr)
         Math.addExact(seconds, parseNanos(nanosStr, seconds < 0))
-      case _ => throw new SparkIllegalArgumentException("_LEGACY_ERROR_TEMP_3210")
+      case _ => throw new SparkIllegalArgumentException(
+        errorClass = "INVALID_INTERVAL_FORMAT.SECOND_NANO_FORMAT",
+        messageParameters = Map("input" -> secondNano))
     }
   }
 
@@ -772,13 +782,28 @@ object IntervalUtils extends SparkIntervalUtils {
       days: Int,
       hours: Int,
       mins: Int,
-      secs: Decimal): Long = {
+      secs: Decimal,
+      context: QueryContext): Long = {
     assert(secs.scale == 6, "Seconds fractional must have 6 digits for microseconds")
     var micros = secs.toUnscaledLong
-    micros = Math.addExact(micros, Math.multiplyExact(days, MICROS_PER_DAY))
-    micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
-    micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE))
-    micros
+    try {
+      micros = Math.addExact(micros, Math.multiplyExact(days, MICROS_PER_DAY))
+      micros = Math.addExact(micros, Math.multiplyExact(hours, MICROS_PER_HOUR))
+      micros = Math.addExact(micros, Math.multiplyExact(mins, MICROS_PER_MINUTE))
+      micros
+    } catch {
+      case _: ArithmeticException =>
+        throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(context)
+    }
+  }
+
+  def makeYearMonthInterval(year: Int, month: Int, context: QueryContext): Int = {
+    try {
+      Math.toIntExact(Math.addExact(month, Math.multiplyExact(year, MONTHS_PER_YEAR)))
+    } catch {
+      case _: ArithmeticException =>
+        throw QueryExecutionErrors.withoutSuggestionIntervalArithmeticOverflowError(context)
+    }
   }
 
   def intToYearMonthInterval(v: Int, startField: Byte, endField: Byte): Int = {
@@ -902,7 +927,7 @@ object IntervalUtils extends SparkIntervalUtils {
       case DAY => Decimal(v / MICROS_PER_DAY)
       case HOUR => Decimal(v / MICROS_PER_HOUR)
       case MINUTE => Decimal(v / MICROS_PER_MINUTE)
-      case SECOND => Decimal(v, Decimal.MAX_LONG_DIGITS, 6)
+      case SECOND => Decimal(v, Decimal.MAX_LONG_DIGITS + 1, 6)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
index 6b4f29bea7579..693ac8d94dbcf 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/ResolveDefaultColumnsUtil.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.catalyst.util
 
 import scala.collection.mutable.ArrayBuffer
 
-import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkException, SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.AnalysisException
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.{CatalogDatabase, InMemoryCatalog, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Literal => ExprLiteral}
-import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, ReplaceExpressions}
+import org.apache.spark.sql.catalyst.optimizer.{ConstantFolding, Optimizer}
 import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParseException}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.trees.TreePattern.PLAN_EXPRESSION
@@ -290,7 +290,9 @@ object ResolveDefaultColumns extends QueryErrorsBase
       val analyzer: Analyzer = DefaultColumnAnalyzer
       val analyzed = analyzer.execute(Project(Seq(Alias(parsed, colName)()), OneRowRelation()))
       analyzer.checkAnalysis(analyzed)
-      ConstantFolding(ReplaceExpressions(analyzed))
+      // Eagerly execute finish-analysis and constant-folding rules before checking whether the
+      // expression is foldable and resolved.
+      ConstantFolding(DefaultColumnOptimizer.FinishAnalysis(analyzed))
     } catch {
       case ex: AnalysisException =>
         throw QueryCompilationErrors.defaultValuesUnresolvedExprError(
@@ -410,8 +412,11 @@ object ResolveDefaultColumns extends QueryErrorsBase
             case _: ExprLiteral | _: Cast => expr
           }
         } catch {
-          case _: AnalysisException | _: MatchError =>
-            throw QueryCompilationErrors.failedToParseExistenceDefaultAsLiteral(field.name, text)
+          // AnalysisException thrown from analyze is already formatted, throw it directly.
+          case ae: AnalysisException => throw ae
+          case _: MatchError =>
+            throw SparkException.internalError(s"parse existence default as literal err," +
+            s" field name: ${field.name}, value: $text")
         }
         // The expression should be a literal value by this point, possibly wrapped in a cast
         // function. This is enforced by the execution of commands that assign default values.
@@ -517,6 +522,11 @@ object ResolveDefaultColumns extends QueryErrorsBase
     new CatalogManager(BuiltInFunctionCatalog, BuiltInFunctionCatalog.v1Catalog)) {
   }
 
+  /**
+   * This is an Optimizer for convert default column expressions to foldable literals.
+   */
+  object DefaultColumnOptimizer extends Optimizer(DefaultColumnAnalyzer.catalogManager)
+
   /**
    * This is a FunctionCatalog for performing analysis using built-in functions only. It is a helper
    * for the DefaultColumnAnalyzer above.
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
index e296b5be6134b..118dd92c3ed54 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/UnsafeRowUtils.scala
@@ -205,7 +205,9 @@ object UnsafeRowUtils {
    * can lead to rows being semantically equal even though their binary representations differ).
    */
   def isBinaryStable(dataType: DataType): Boolean = !dataType.existsRecursively {
-    case st: StringType => !CollationFactory.fetchCollation(st.collationId).supportsBinaryEquality
+    case st: StringType =>
+      val collation = CollationFactory.fetchCollation(st.collationId)
+      (!collation.supportsBinaryEquality)
     case _ => false
   }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/IndentingXMLStreamWriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/IndentingXMLStreamWriter.scala
new file mode 100644
index 0000000000000..376f79e011d63
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/IndentingXMLStreamWriter.scala
@@ -0,0 +1,194 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.xml
+
+import javax.xml.namespace.NamespaceContext
+import javax.xml.stream.XMLStreamWriter
+
+class IndentingXMLStreamWriter(wr: XMLStreamWriter) extends XMLStreamWriter {
+  private val writer = wr
+
+  private var indentStep = "  "
+  private var depth = 0
+
+  private var insideElementWithChild = false
+
+  def setIndentStep(s: String): Unit = {
+    indentStep = s
+  }
+
+  private def onStartElement(): Unit = {
+    if (depth > 0) {
+      writer.writeCharacters("\n")
+    }
+    insideElementWithChild = false
+    writeIndent()
+    depth += 1
+  }
+
+  private def writeIndent(): Unit = {
+    writer.writeCharacters(indentStep * depth)
+  }
+
+  override def writeStartElement(localName: String): Unit = {
+    onStartElement()
+    writer.writeStartElement(localName)
+  }
+
+  override def writeStartElement(namespaceURI: String, localName: String): Unit = {
+    onStartElement()
+    writer.writeStartElement(namespaceURI, localName)
+  }
+
+  override def writeStartElement(
+      prefix: String,
+      localName: String,
+      namespaceURI: String): Unit = {
+    onStartElement()
+    writer.writeStartElement(prefix, localName, namespaceURI)
+  }
+
+  override def writeEmptyElement(localName: String): Unit = {
+    writer.writeEmptyElement(localName)
+  }
+
+  override def writeEmptyElement(namespaceURI: String, localName: String): Unit = {
+    writer.writeEmptyElement(namespaceURI, localName)
+  }
+
+  override def writeEmptyElement(
+      prefix: String,
+      localName: String,
+      namespaceURI: String): Unit = {
+    writer.writeEmptyElement(prefix, localName, namespaceURI)
+  }
+
+  override def writeEndElement(): Unit = {
+    depth -= 1
+    if (insideElementWithChild) {
+      writer.writeCharacters("\n")
+      writeIndent()
+    }
+    insideElementWithChild = true
+    writer.writeEndElement()
+  }
+
+  override def writeEndDocument(): Unit = {
+    writer.writeEndDocument()
+  }
+
+  override def close(): Unit = {
+    writer.close()
+  }
+
+  override def flush(): Unit = {
+    writer.flush()
+  }
+
+  override def writeAttribute(
+      prefix: String,
+      namespaceURI: String,
+      localName: String,
+      value: String): Unit = {
+    writer.writeAttribute(prefix, namespaceURI, localName, value)
+  }
+
+  override def writeAttribute(namespaceURI: String, localName: String, value: String): Unit = {
+    writer.writeAttribute(namespaceURI, localName, value)
+  }
+
+  override def writeAttribute(localName: String, value: String): Unit = {
+    writer.writeAttribute(localName, value)
+  }
+
+  override def writeNamespace(prefix: String, namespaceURI: String): Unit = {
+    writer.writeNamespace(prefix, namespaceURI)
+  }
+
+  override def writeDefaultNamespace(namespaceURI: String): Unit = {
+    writer.writeDefaultNamespace(namespaceURI)
+  }
+
+  override def writeComment(data: String): Unit = {
+    writer.writeComment(data)
+  }
+
+  override def writeProcessingInstruction(target: String): Unit = {
+    writer.writeProcessingInstruction(target)
+  }
+
+  override def writeProcessingInstruction(target: String, data: String): Unit = {
+    writer.writeProcessingInstruction(target, data)
+  }
+
+  override def writeCData(data: String): Unit = {
+    writer.writeCData(data)
+  }
+
+  override def writeDTD(dtd: String): Unit = {
+    writer.writeDTD(dtd)
+  }
+
+  override def writeEntityRef(name: String): Unit = {
+    writer.writeEntityRef(name)
+  }
+
+  override def writeStartDocument(): Unit = {
+    writer.writeStartDocument()
+  }
+
+  override def writeStartDocument(version: String): Unit = {
+    writer.writeStartDocument(version)
+  }
+
+  override def writeStartDocument(encoding: String, version: String): Unit = {
+    writer.writeStartDocument(encoding, version)
+  }
+
+  override def writeCharacters(text: String): Unit = {
+    writer.writeCharacters(text)
+  }
+
+  override def writeCharacters(text: Array[Char], start: Int, len: Int): Unit = {
+    writer.writeCharacters(text, start, len)
+  }
+
+  override def getPrefix(uri: String): String = {
+    writer.getPrefix(uri)
+  }
+
+  override def setPrefix(prefix: String, uri: String): Unit = {
+    writer.setPrefix(prefix, uri)
+  }
+
+  override def setDefaultNamespace(uri: String): Unit = {
+    writer.setDefaultNamespace(uri)
+  }
+
+  override def setNamespaceContext(context: NamespaceContext): Unit = {
+    writer.setNamespaceContext(context)
+  }
+
+  override def getNamespaceContext: NamespaceContext = {
+    writer.getNamespaceContext
+  }
+
+  override def getProperty(name: String): AnyRef = {
+    writer.getProperty(name)
+  }
+
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index 514138ab7508a..b678824574476 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -22,7 +22,6 @@ import javax.xml.stream.XMLOutputFactory
 
 import scala.collection.Map
 
-import com.sun.xml.txw2.output.IndentingXMLStreamWriter
 import org.apache.hadoop.shaded.com.ctc.wstx.api.WstxOutputProperties
 
 import org.apache.spark.SparkIllegalArgumentException
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
index 4640f86d5997a..848e6ff45c5a2 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala
@@ -79,7 +79,8 @@ class XmlInferSchema(options: XmlOptions, caseSensitive: Boolean)
       case DropMalformedMode =>
         None
       case FailFastMode =>
-        throw QueryExecutionErrors.malformedRecordsDetectedInSchemaInferenceError(e)
+        throw QueryExecutionErrors.malformedRecordsDetectedInSchemaInferenceError(
+          e, columnNameOfCorruptRecord)
     }
   }
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
index 16c387a82373b..db94659b1033b 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogManager.scala
@@ -106,19 +106,25 @@ class CatalogManager(
     }
   }
 
-  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+  private def assertNamespaceExist(namespace: Array[String]): Unit = {
     currentCatalog match {
-      case _ if isSessionCatalog(currentCatalog) && namespace.length == 1 =>
-        v1SessionCatalog.setCurrentDatabase(namespace.head)
-      case _ if isSessionCatalog(currentCatalog) =>
-        throw QueryCompilationErrors.noSuchNamespaceError(namespace)
       case catalog: SupportsNamespaces if !catalog.namespaceExists(namespace) =>
-        throw QueryCompilationErrors.noSuchNamespaceError(namespace)
+        throw QueryCompilationErrors.noSuchNamespaceError(catalog.name() +: namespace)
       case _ =>
-        _currentNamespace = Some(namespace)
     }
   }
 
+  def setCurrentNamespace(namespace: Array[String]): Unit = synchronized {
+    if (isSessionCatalog(currentCatalog) && namespace.length == 1) {
+      v1SessionCatalog.setCurrentDatabaseWithNameCheck(
+        namespace.head,
+        _ => assertNamespaceExist(namespace))
+    } else {
+      assertNamespaceExist(namespace)
+    }
+    _currentNamespace = Some(namespace)
+  }
+
   private var _currentCatalogName: Option[String] = None
 
   def currentCatalog: CatalogPlugin = synchronized {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
index 65bdae85be12a..282350dda67d3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Implicits.scala
@@ -126,6 +126,13 @@ private[sql] object CatalogV2Implicits {
       case _ =>
         throw QueryCompilationErrors.missingCatalogAbilityError(plugin, "functions")
     }
+
+    def asProcedureCatalog: ProcedureCatalog = plugin match {
+        case procedureCatalog: ProcedureCatalog =>
+          procedureCatalog
+        case _ =>
+          throw QueryCompilationErrors.missingCatalogAbilityError(plugin, "procedures")
+    }
   }
 
   implicit class NamespaceHelper(namespace: Array[String]) {
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
index 283c550c4556f..e1f114a6170a4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/CatalogV2Util.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.analysis.{AsOfTimestamp, AsOfVersion, Named
 import org.apache.spark.sql.catalyst.catalog.ClusterBySpec
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.plans.logical.{SerdeInfo, TableSpec}
-import org.apache.spark.sql.catalyst.util.GeneratedColumn
+import org.apache.spark.sql.catalyst.util.{GeneratedColumn, IdentityColumn}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
 import org.apache.spark.sql.connector.catalog.TableChange._
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
@@ -336,8 +336,11 @@ private[sql] object CatalogV2Util {
         return struct
       } else {
         throw new SparkIllegalArgumentException(
-          errorClass = "_LEGACY_ERROR_TEMP_3227",
-          messageParameters = Map("fieldName" -> fieldNames.head))
+          errorClass = "FIELD_NOT_FOUND",
+          messageParameters = Map(
+            "fieldName" -> toSQLId(fieldNames.head),
+            "fields" -> struct.fields.map(f => toSQLId(f.name)).mkString(", "))
+        )
       }
     }
 
@@ -403,9 +406,10 @@ private[sql] object CatalogV2Util {
   def loadTable(
       catalog: CatalogPlugin,
       ident: Identifier,
-      timeTravelSpec: Option[TimeTravelSpec] = None): Option[Table] =
+      timeTravelSpec: Option[TimeTravelSpec] = None,
+      writePrivilegesString: Option[String] = None): Option[Table] =
     try {
-      Option(getTable(catalog, ident, timeTravelSpec))
+      Option(getTable(catalog, ident, timeTravelSpec, writePrivilegesString))
     } catch {
       case _: NoSuchTableException => None
       case _: NoSuchDatabaseException => None
@@ -414,8 +418,10 @@ private[sql] object CatalogV2Util {
   def getTable(
       catalog: CatalogPlugin,
       ident: Identifier,
-      timeTravelSpec: Option[TimeTravelSpec] = None): Table = {
+      timeTravelSpec: Option[TimeTravelSpec] = None,
+      writePrivilegesString: Option[String] = None): Table = {
     if (timeTravelSpec.nonEmpty) {
+      assert(writePrivilegesString.isEmpty, "Should not write to a table with time travel")
       timeTravelSpec.get match {
         case v: AsOfVersion =>
           catalog.asTableCatalog.loadTable(ident, v.version)
@@ -423,7 +429,13 @@ private[sql] object CatalogV2Util {
           catalog.asTableCatalog.loadTable(ident, ts.timestamp)
       }
     } else {
-      catalog.asTableCatalog.loadTable(ident)
+      if (writePrivilegesString.isDefined) {
+        val writePrivileges = writePrivilegesString.get.split(",").map(_.trim)
+          .map(TableWritePrivilege.valueOf).toSet.asJava
+        catalog.asTableCatalog.loadTable(ident, writePrivileges)
+      } else {
+        catalog.asTableCatalog.loadTable(ident)
+      }
     }
   }
 
@@ -570,18 +582,10 @@ private[sql] object CatalogV2Util {
     val isDefaultColumn = f.getCurrentDefaultValue().isDefined &&
       f.getExistenceDefaultValue().isDefined
     val isGeneratedColumn = GeneratedColumn.isGeneratedColumn(f)
-    if (isDefaultColumn && isGeneratedColumn) {
-      throw new AnalysisException(
-        errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
-        messageParameters = Map(
-          "colName" -> f.name,
-          "defaultValue" -> f.getCurrentDefaultValue().get,
-          "genExpr" -> GeneratedColumn.getGenerationExpression(f).get
-        )
-      )
-    }
-
+    val isIdentityColumn = IdentityColumn.isIdentityColumn(f)
     if (isDefaultColumn) {
+      checkDefaultColumnConflicts(f)
+
       val e = analyze(
         f,
         statementType = "Column analysis",
@@ -602,10 +606,41 @@ private[sql] object CatalogV2Util {
         Seq("comment", GeneratedColumn.GENERATION_EXPRESSION_METADATA_KEY))
       Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull,
         GeneratedColumn.getGenerationExpression(f).get, metadataAsJson(cleanedMetadata))
+    } else if (isIdentityColumn) {
+      val cleanedMetadata = metadataWithKeysRemoved(
+        Seq("comment",
+          IdentityColumn.IDENTITY_INFO_START,
+          IdentityColumn.IDENTITY_INFO_STEP,
+          IdentityColumn.IDENTITY_INFO_ALLOW_EXPLICIT_INSERT))
+        Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull,
+          IdentityColumn.getIdentityInfo(f).get, metadataAsJson(cleanedMetadata))
     } else {
       val cleanedMetadata = metadataWithKeysRemoved(Seq("comment"))
       Column.create(f.name, f.dataType, f.nullable, f.getComment().orNull,
         metadataAsJson(cleanedMetadata))
     }
   }
+
+  private def checkDefaultColumnConflicts(f: StructField): Unit = {
+    if (GeneratedColumn.isGeneratedColumn(f)) {
+      throw new AnalysisException(
+        errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+        messageParameters = Map(
+          "colName" -> f.name,
+          "defaultValue" -> f.getCurrentDefaultValue().get,
+          "genExpr" -> GeneratedColumn.getGenerationExpression(f).get
+        )
+      )
+    }
+    if (IdentityColumn.isIdentityColumn(f)) {
+      throw new AnalysisException(
+        errorClass = "IDENTITY_COLUMN_WITH_DEFAULT_VALUE",
+        messageParameters = Map(
+          "colName" -> f.name,
+          "defaultValue" -> f.getCurrentDefaultValue().get,
+          "identityColumnSpec" -> IdentityColumn.getIdentityInfo(f).get.toString
+        )
+      )
+    }
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
index d96b06789c402..4a5a607e8a8ae 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/connector/catalog/V1Table.scala
@@ -22,7 +22,7 @@ import java.util
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType}
+import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.TableIdentifierHelper
 import org.apache.spark.sql.connector.catalog.V1Table.addV2TableProperties
 import org.apache.spark.sql.connector.expressions.{LogicalExpressions, Transform}
@@ -38,7 +38,7 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
   lazy val options: Map[String, String] = {
     v1Table.storage.locationUri match {
       case Some(uri) =>
-        v1Table.storage.properties + ("path" -> uri.toString)
+        v1Table.storage.properties + ("path" -> CatalogUtils.URIToString(uri))
       case _ =>
         v1Table.storage.properties
     }
@@ -60,6 +60,10 @@ private[sql] case class V1Table(v1Table: CatalogTable) extends Table {
       partitions += spec.asTransform
     }
 
+    v1Table.clusterBySpec.foreach { spec =>
+      partitions += spec.asTransform
+    }
+
     partitions.toArray
   }
 
@@ -81,7 +85,9 @@ private[sql] object V1Table {
         TableCatalog.OPTION_PREFIX + key -> value } ++
       v1Table.provider.map(TableCatalog.PROP_PROVIDER -> _) ++
       v1Table.comment.map(TableCatalog.PROP_COMMENT -> _) ++
-      v1Table.storage.locationUri.map(TableCatalog.PROP_LOCATION -> _.toString) ++
+      v1Table.storage.locationUri.map { loc =>
+        TableCatalog.PROP_LOCATION -> CatalogUtils.URIToString(loc)
+      } ++
       (if (managed) Some(TableCatalog.PROP_IS_MANAGED_LOCATION -> "true") else None) ++
       (if (external) Some(TableCatalog.PROP_EXTERNAL -> "true") else None) ++
       Some(TableCatalog.PROP_OWNER -> v1Table.owner)
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 9de4fa2f6b26f..03471ae8a3da5 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -17,14 +17,14 @@
 
 package org.apache.spark.sql.errors
 
-import scala.collection.mutable
+import java.util.Locale
 
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkThrowable, SparkUnsupportedOperationException}
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkRuntimeException, SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, FunctionIdentifier, InternalRow, QualifiedTableName, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, ResolvedTable, Star, TableAlreadyExistsException, UnresolvedRegex}
+import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, FunctionAlreadyExistsException, NamespaceAlreadyExistsException, NoSuchFunctionException, NoSuchNamespaceException, NoSuchPartitionException, NoSuchTableException, Star, TableAlreadyExistsException, UnresolvedRegex}
 import org.apache.spark.sql.catalyst.catalog.{CatalogTable, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, AttributeReference, AttributeSet, CreateMap, CreateStruct, Expression, GroupingID, NamedExpression, SpecifiedWindowFrame, WindowFrame, WindowFunction, WindowSpecDefinition}
@@ -32,7 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue
 import org.apache.spark.sql.catalyst.plans.JoinType
 import org.apache.spark.sql.catalyst.plans.logical.{Assignment, InputParameter, Join, LogicalPlan, SerdeInfo, Window}
 import org.apache.spark.sql.catalyst.trees.{Origin, TreeNode}
-import org.apache.spark.sql.catalyst.util.{quoteIdentifier, FailFastMode, ParseMode, PermissiveMode}
+import org.apache.spark.sql.catalyst.util.{quoteIdentifier, ParseMode}
 import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, UnboundFunction}
@@ -175,6 +175,54 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "functionName" -> toSQLId(funcName)))
   }
 
+  def invalidRandomSeedParameter(functionName: String, invalidValue: Expression): Throwable = {
+    invalidParameter("LONG", functionName, "seed", invalidValue)
+  }
+
+  def invalidReverseParameter(invalidValue: Expression): Throwable = {
+    invalidParameter("BOOLEAN", "collect_top_k", "reverse", invalidValue)
+  }
+
+  def invalidNumParameter(invalidValue: Expression): Throwable = {
+    invalidParameter("INTEGER", "collect_top_k", "num", invalidValue)
+  }
+
+  def invalidIgnoreNullsParameter(functionName: String, invalidValue: Expression): Throwable = {
+    invalidParameter("BOOLEAN", functionName, "ignoreNulls", invalidValue)
+  }
+
+  def invalidIgnoreNAParameter(functionName: String, invalidValue: Expression): Throwable = {
+    invalidParameter("BOOLEAN", functionName, "ignoreNA", invalidValue)
+  }
+
+  def invalidDdofParameter(functionName: String, invalidValue: Expression): Throwable = {
+    invalidParameter("INTEGER", functionName, "ddof", invalidValue)
+  }
+
+  def invalidAlphaParameter(invalidValue: Expression): Throwable = {
+    invalidParameter("DOUBLE", "ewm", "alpha", invalidValue)
+  }
+
+  def invalidStringParameter(
+      functionName: String,
+      parameter: String,
+      invalidValue: Expression): Throwable = {
+    invalidParameter("STRING", functionName, parameter, invalidValue)
+  }
+
+  def invalidParameter(
+      subClass: String,
+      functionName: String,
+      parameter: String,
+      invalidValue: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "INVALID_PARAMETER_VALUE." + subClass,
+      messageParameters = Map(
+        "functionName" -> toSQLId(functionName),
+        "parameter" -> toSQLId(parameter),
+        "invalidValue" -> toSQLExpr(invalidValue)))
+  }
+
   def nullDataSourceOption(option: String): Throwable = {
     new AnalysisException(
       errorClass = "NULL_DATA_SOURCE_OPTION",
@@ -303,10 +351,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
-  def collationNotEnabledError(): Throwable = {
+  def trimCollationNotEnabledError(): Throwable = {
     new AnalysisException(
-      errorClass = "UNSUPPORTED_FEATURE.COLLATION",
-      messageParameters = Map.empty)
+      errorClass = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
+      messageParameters = Map.empty
+    )
+  }
+
+  def trailingCommaInSelectError(origin: Origin): Throwable = {
+    new AnalysisException(
+      errorClass = "TRAILING_COMMA_IN_SELECT",
+      messageParameters = Map.empty,
+      origin = origin
+    )
   }
 
   def unresolvedUsingColForJoinError(
@@ -419,8 +476,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def windowSpecificationNotDefinedError(windowName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1004",
-      messageParameters = Map("windowName" -> windowName))
+      errorClass = "MISSING_WINDOW_SPECIFICATION",
+      messageParameters = Map(
+        "windowName" -> windowName,
+        "docroot" -> SPARK_DOC_ROOT)
+      )
   }
 
   def selectExprNotInGroupByError(expr: Expression, groupByAliases: Seq[Alias]): Throwable = {
@@ -808,6 +868,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       origin = origin)
   }
 
+  def failedToLoadRoutineError(nameParts: Seq[String], e: Exception): Throwable = {
+    new AnalysisException(
+      errorClass = "FAILED_TO_LOAD_ROUTINE",
+      messageParameters = Map("routineName" -> toSQLId(nameParts)),
+      cause = Some(e))
+  }
+
   def unresolvedRoutineError(name: FunctionIdentifier, searchPath: Seq[String]): Throwable = {
     new AnalysisException(
       errorClass = "UNRESOLVED_ROUTINE",
@@ -982,20 +1049,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "operation" -> operation))
   }
 
-  def alterColumnWithV1TableCannotSpecifyNotNullError(): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1053",
-      messageParameters = Map.empty)
-  }
-
-  def alterColumnCannotFindColumnInV1TableError(colName: String, v1Table: V1Table): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1054",
-      messageParameters = Map(
-        "colName" -> colName,
-        "fieldNames" -> v1Table.schema.fieldNames.mkString(", ")))
-  }
-
   def wrongCommandForObjectTypeError(
       operation: String,
       requiredType: String,
@@ -1014,13 +1067,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
-  def showColumnsWithConflictDatabasesError(
-      db: Seq[String], v1TableName: TableIdentifier): Throwable = {
+  def showColumnsWithConflictNamespacesError(
+      namespaceA: Seq[String],
+      namespaceB: Seq[String]): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1057",
+      errorClass = "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
       messageParameters = Map(
-        "dbA" -> db.head,
-        "dbB" -> v1TableName.database.get))
+        "namespaceA" -> toSQLId(namespaceA),
+        "namespaceB" -> toSQLId(namespaceB)))
   }
 
   def cannotCreateTableWithBothProviderAndSerdeError(
@@ -1273,10 +1327,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map.empty)
   }
 
-  def invalidFieldTypeForCorruptRecordError(): Throwable = {
+  def invalidFieldTypeForCorruptRecordError(columnName: String, actualType: DataType): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1097",
-      messageParameters = Map.empty)
+      errorClass = "INVALID_CORRUPT_RECORD_TYPE",
+      messageParameters = Map(
+        "columnName" -> toSQLId(columnName), "actualType" -> toSQLType(actualType)))
   }
 
   def dataTypeUnsupportedByClassError(x: DataType, className: String): Throwable = {
@@ -1287,12 +1342,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def parseModeUnsupportedError(funcName: String, mode: ParseMode): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1099",
+      errorClass = "PARSE_MODE_UNSUPPORTED",
       messageParameters = Map(
-        "funcName" -> funcName,
-        "mode" -> mode.name,
-        "permissiveMode" -> PermissiveMode.name,
-        "failFastMode" -> FailFastMode.name))
+        "funcName" -> toSQLId(funcName),
+        "mode" -> mode.name))
   }
 
   def nonFoldableArgumentError(
@@ -1548,17 +1601,13 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     notSupportedForV2TablesError("SHOW CREATE TABLE AS SERDE")
   }
 
-  def showColumnsNotSupportedForV2TablesError(): Throwable = {
-    notSupportedForV2TablesError("SHOW COLUMNS")
-  }
-
   def repairTableNotSupportedForV2TablesError(): Throwable = {
     notSupportedForV2TablesError("MSCK REPAIR TABLE")
   }
 
   def databaseFromV1SessionCatalogNotSpecifiedError(): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1125",
+      errorClass = "MISSING_DATABASE_FOR_V1_SESSION_CATALOG",
       messageParameters = Map.empty)
   }
 
@@ -1595,12 +1644,21 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def dataSourceOutputModeUnsupportedError(
-      className: String, outputMode: OutputMode): Throwable = {
+      className: String, outputMode: OutputMode): AnalysisException = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1131",
+      errorClass = "STREAMING_OUTPUT_MODE.UNSUPPORTED_DATASOURCE",
       messageParameters = Map(
         "className" -> className,
-        "outputMode" -> outputMode.toString))
+        "outputMode" -> outputMode.toString.toLowerCase(Locale.ROOT)))
+  }
+
+  def unsupportedOutputModeForStreamingOperationError(
+      outputMode: OutputMode, operation: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      messageParameters = Map(
+        "outputMode" -> outputMode.toString().toLowerCase(Locale.ROOT),
+        "operation" -> operation))
   }
 
   def schemaNotSpecifiedForSchemaRelationProviderError(className: String): Throwable = {
@@ -1632,18 +1690,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("className" -> className))
   }
 
-  def cannotSaveIntervalIntoExternalStorageError(): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1136",
-      messageParameters = Map.empty)
-  }
-
-  def cannotSaveVariantIntoExternalStorageError(): Throwable = {
-    new AnalysisException(
-      errorClass = "CANNOT_SAVE_VARIANT",
-      messageParameters = Map.empty)
-  }
-
   def cannotResolveAttributeError(name: String, outputStr: String): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1137",
@@ -1676,10 +1722,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "sourceNames" -> sourceNames.mkString(", ")))
   }
 
-  def writeEmptySchemasUnsupportedByDataSourceError(): Throwable = {
+  def writeEmptySchemasUnsupportedByDataSourceError(format: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1142",
-      messageParameters = Map.empty)
+      errorClass = "EMPTY_SCHEMA_NOT_SUPPORTED_FOR_DATASOURCE",
+      messageParameters = Map("format" -> format))
   }
 
   def insertMismatchedColumnNumberError(
@@ -1786,12 +1832,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
   }
 
   def columnNotFoundInSchemaError(
-      col: StructField, tableSchema: Option[StructType]): Throwable = {
+      colType: DataType,
+      colName: String,
+      tableName: String,
+      tableCols: Array[String]): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1156",
+      errorClass = "COLUMN_NOT_DEFINED_IN_TABLE",
       messageParameters = Map(
-        "colName" -> col.name,
-        "tableSchema" -> tableSchema.toString))
+        "colType" -> toSQLType(colType),
+        "colName" -> toSQLId(colName),
+        "tableName" -> toSQLId(tableName),
+        "tableCols" -> tableCols.map(toSQLId).mkString(", "))
+    )
   }
 
   def saveDataIntoViewNotAllowedError(): Throwable = {
@@ -1866,6 +1918,18 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "existingBucketString" -> existingBucketString))
   }
 
+  def mismatchedTableClusteringError(
+      tableName: String,
+      specifiedClusteringString: String,
+      existingClusteringString: String): Throwable = {
+    new AnalysisException(
+      errorClass = "CLUSTERING_COLUMNS_MISMATCH",
+      messageParameters = Map(
+        "tableName" -> tableName,
+        "specifiedClusteringString" -> specifiedClusteringString,
+        "existingClusteringString" -> existingClusteringString))
+  }
+
   def specifyPartitionNotAllowedWhenTableSchemaNotDefinedError(): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1165",
@@ -1931,6 +1995,11 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("field" -> field))
   }
 
+  def invalidVariantShreddingSchema(schema: DataType): Throwable = {
+    new AnalysisException(errorClass = "INVALID_VARIANT_SHREDDING_SCHEMA",
+      messageParameters = Map("schema" -> toSQLType(schema)))
+  }
+
   def invalidVariantWrongNumFieldsError(): Throwable = {
     new AnalysisException(errorClass = "INVALID_VARIANT_FROM_PARQUET.WRONG_NUM_FIELDS",
       messageParameters = Map.empty)
@@ -1938,19 +2007,19 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def parquetTypeUnsupportedYetError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1172",
+      errorClass = "PARQUET_TYPE_NOT_SUPPORTED",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def illegalParquetTypeError(parquetType: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1173",
+      errorClass = "PARQUET_TYPE_ILLEGAL",
       messageParameters = Map("parquetType" -> parquetType))
   }
 
   def unrecognizedParquetTypeError(field: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1174",
+      errorClass = "PARQUET_TYPE_NOT_RECOGNIZED",
       messageParameters = Map("field" -> field))
   }
 
@@ -2108,6 +2177,12 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("identifier" -> toSQLId(originalIdentifier)))
   }
 
+  def identifierTooManyNamePartsError(names: Seq[String]): Throwable = {
+    new AnalysisException(
+      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      messageParameters = Map("identifier" -> toSQLId(names)))
+  }
+
   def emptyMultipartIdentifierError(): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1186",
@@ -2182,12 +2257,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "validColumnNames" -> validColumnNames.mkString(", ")))
   }
 
-  def operationNotSupportPartitioningError(operation: String): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1197",
-      messageParameters = Map("operation" -> operation))
-  }
-
   def mixedRefsInAggFunc(funcStr: String, origin: Origin): Throwable = {
     new AnalysisException(
       errorClass =
@@ -2722,10 +2791,10 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map.empty)
   }
 
-  def alterTableSetSerdeNotSupportedError(): Throwable = {
+  def alterTableSetSerdeNotSupportedError(tableName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1248",
-      messageParameters = Map.empty)
+      errorClass = "UNSUPPORTED_FEATURE.ALTER_TABLE_SERDE_FOR_DATASOURCE_TABLE",
+      messageParameters = Map("tableName" -> toSQLId(tableName)))
   }
 
   def cmdOnlyWorksOnPartitionedTablesError(
@@ -2766,16 +2835,24 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "tableName" -> tableName))
   }
 
-  def cannotAlterViewWithAlterTableError(): Throwable = {
+  def cannotAlterViewWithAlterTableError(viewName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1252",
-      messageParameters = Map.empty)
+      errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+      messageParameters = Map(
+        "operation" -> "ALTER TABLE",
+        "viewName" -> toSQLId(viewName)
+      )
+    )
   }
 
-  def cannotAlterTableWithAlterViewError(): Throwable = {
+  def cannotAlterTableWithAlterViewError(tableName: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1253",
-      messageParameters = Map.empty)
+      errorClass = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
+      messageParameters = Map(
+        "operation" -> "ALTER VIEW",
+        "tableName" -> toSQLId(tableName)
+      )
+    )
   }
 
   def cannotOverwritePathBeingReadFromError(path: String): Throwable = {
@@ -2916,49 +2993,41 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def showCreateTableNotSupportedOnTempView(table: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1270",
-      messageParameters = Map("table" -> table))
-  }
-
-  def showCreateTableFailToExecuteUnsupportedFeatureError(table: CatalogTable): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1271",
-      messageParameters = Map(
-        "unsupportedFeatures" -> table.unsupportedFeatures.map(" - " + _).mkString("\n"),
-        "table" -> table.identifier.toString))
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_TEMPORARY_VIEW",
+      messageParameters = Map("tableName" -> toSQLId(table)))
   }
 
   def showCreateTableNotSupportTransactionalHiveTableError(table: CatalogTable): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1272",
-      messageParameters = Map("table" -> table.identifier.toString))
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_TRANSACTIONAL_HIVE_TABLE",
+      messageParameters = Map("tableName" -> toSQLId(table.identifier.nameParts)))
   }
 
   def showCreateTableFailToExecuteUnsupportedConfError(
       table: TableIdentifier,
-      builder: mutable.StringBuilder): Throwable = {
+      configs: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1273",
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.WITH_UNSUPPORTED_SERDE_CONFIGURATION",
       messageParameters = Map(
-        "table" -> table.identifier,
-        "configs" -> builder.toString()))
+        "tableName" -> toSQLId(table.nameParts),
+        "configs" -> configs))
   }
 
   def showCreateTableAsSerdeNotAllowedOnSparkDataSourceTableError(
       table: TableIdentifier): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1274",
-      messageParameters = Map("table" -> table.toString))
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_DATA_SOURCE_TABLE_WITH_AS_SERDE",
+      messageParameters = Map("tableName" -> toSQLId(table.nameParts)))
   }
 
   def showCreateTableOrViewFailToExecuteUnsupportedFeatureError(
       table: CatalogTable,
-      features: Seq[String]): Throwable = {
+      unsupportedFeatures: Seq[String]): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1275",
+      errorClass = "UNSUPPORTED_SHOW_CREATE_TABLE.WITH_UNSUPPORTED_FEATURE",
       messageParameters = Map(
-        "table" -> table.identifier.toString,
-        "features" -> features.map(" - " + _).mkString("\n")))
+        "tableName" -> toSQLId(table.identifier.nameParts),
+        "unsupportedFeatures" -> unsupportedFeatures.map(" - " + _).mkString("\n")))
   }
 
   def logicalPlanForViewNotAnalyzedError(): Throwable = {
@@ -3087,7 +3156,7 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def queryFromRawFilesIncludeCorruptRecordColumnError(): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1285",
+      errorClass = "UNSUPPORTED_FEATURE.QUERY_ONLY_CORRUPT_RECORD_COLUMN",
       messageParameters = Map.empty)
   }
 
@@ -3201,28 +3270,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "actualColumns" -> relation.output.map(_.name).mkString(", ")))
   }
 
-  def invalidBoundaryStartError(start: Long): Throwable = {
-    new AnalysisException(
-      errorClass = "INVALID_BOUNDARY.START",
-      messageParameters = Map(
-        "boundary" -> toSQLId("start"),
-        "invalidValue" -> toSQLValue(start, LongType),
-        "longMinValue" -> toSQLValue(Long.MinValue, LongType),
-        "intMinValue" -> toSQLValue(Int.MinValue, IntegerType),
-        "intMaxValue" -> toSQLValue(Int.MaxValue, IntegerType)))
-  }
-
-  def invalidBoundaryEndError(end: Long): Throwable = {
-    new AnalysisException(
-      errorClass = "INVALID_BOUNDARY.END",
-      messageParameters = Map(
-        "boundary" -> toSQLId("end"),
-        "invalidValue" -> toSQLValue(end, LongType),
-        "longMaxValue" -> toSQLValue(Long.MaxValue, LongType),
-        "intMinValue" -> toSQLValue(Int.MinValue, IntegerType),
-        "intMaxValue" -> toSQLValue(Int.MaxValue, IntegerType)))
-  }
-
   def tableOrViewNotFound(ident: Seq[String]): Throwable = {
     new NoSuchTableException(ident)
   }
@@ -3247,13 +3294,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "config" -> SQLConf.LEGACY_PATH_OPTION_BEHAVIOR.key))
   }
 
-  def invalidSaveModeError(saveMode: String): Throwable = {
-    new AnalysisException(
-      errorClass = "INVALID_SAVE_MODE",
-      messageParameters = Map("mode" -> toDSOption(saveMode))
-    )
-  }
-
   def invalidSingleVariantColumn(): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
@@ -3280,24 +3320,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("quote" -> quote))
   }
 
-  def sortByWithoutBucketingError(): Throwable = {
-    new AnalysisException(
-      errorClass = "SORT_BY_WITHOUT_BUCKETING",
-      messageParameters = Map.empty)
-  }
-
-  def bucketByUnsupportedByOperationError(operation: String): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1312",
-      messageParameters = Map("operation" -> operation))
-  }
-
-  def bucketByAndSortByUnsupportedByOperationError(operation: String): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1313",
-      messageParameters = Map("operation" -> operation))
-  }
-
   def tableAlreadyExistsError(tableIdent: TableIdentifier): Throwable = {
     new TableAlreadyExistsException(tableIdent.nameParts)
   }
@@ -3318,6 +3340,16 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
+  def unresolvedColumnError(
+      fieldName: Seq[String], fields: Array[String], context: Origin): AnalysisException = {
+    new AnalysisException(
+      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      messageParameters = Map(
+        "objectName" -> toSQLId(fieldName),
+        "proposal" -> fields.map(toSQLId).mkString(", ")),
+      origin = context)
+  }
+
   def cannotParseIntervalError(delayThreshold: String, e: Throwable): Throwable = {
     val threshold = if (delayThreshold == null) "" else delayThreshold
     new AnalysisException(
@@ -3350,12 +3382,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("numBuckets" -> numBuckets, "e" -> e))
   }
 
-  def usingUntypedScalaUDFError(): Throwable = {
-    new AnalysisException(
-      errorClass = "UNTYPED_SCALA_UDF",
-      messageParameters = Map.empty)
-  }
-
   def aggregationFunctionAppliedOnNonNumericColumnError(colName: String): Throwable = {
     new AnalysisException(
       errorClass = "_LEGACY_ERROR_TEMP_1323",
@@ -3374,8 +3400,9 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
 
   def cannotModifyValueOfStaticConfigError(key: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1325",
-      messageParameters = Map("key" -> key))
+      errorClass = "CANNOT_MODIFY_CONFIG",
+      messageParameters = Map("key" -> toSQLConf(key), "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   def cannotModifyValueOfSparkConfigError(key: String, docroot: String): Throwable = {
@@ -3426,17 +3453,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("className" -> className))
   }
 
-  def missingFieldError(
-      fieldName: Seq[String], table: ResolvedTable, context: Origin): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1331",
-      messageParameters = Map(
-        "fieldName" -> fieldName.quoted,
-        "table" -> table.name,
-        "schema" -> table.schema.treeString),
-      origin = context)
-  }
-
   def invalidFieldName(fieldName: Seq[String], path: Seq[String], context: Origin): Throwable = {
     new AnalysisException(
       errorClass = "INVALID_FIELD_NAME",
@@ -3535,29 +3551,21 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "cond" -> toSQLExpr(cond)))
   }
 
-  def failedToParseExistenceDefaultAsLiteral(fieldName: String, defaultValue: String): Throwable = {
-    new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1344",
-      messageParameters = Map(
-        "fieldName" -> fieldName,
-        "defaultValue" -> defaultValue))
-  }
-
   def defaultReferencesNotAllowedInDataSource(
       statementType: String, dataSource: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1345",
+      errorClass = "DEFAULT_UNSUPPORTED",
       messageParameters = Map(
-        "statementType" -> statementType,
+        "statementType" -> toSQLStmt(statementType),
         "dataSource" -> dataSource))
   }
 
   def addNewDefaultColumnToExistingTableNotAllowed(
       statementType: String, dataSource: String): Throwable = {
     new AnalysisException(
-      errorClass = "_LEGACY_ERROR_TEMP_1346",
+      errorClass = "ADD_DEFAULT_UNSUPPORTED",
       messageParameters = Map(
-        "statementType" -> statementType,
+        "statementType" -> toSQLStmt(statementType),
         "dataSource" -> dataSource))
   }
 
@@ -3656,18 +3664,20 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
     )
   }
 
-  def implicitCollationMismatchError(): Throwable = {
+  def implicitCollationMismatchError(implicitTypes: Seq[StringType]): Throwable = {
     new AnalysisException(
       errorClass = "COLLATION_MISMATCH.IMPLICIT",
-      messageParameters = Map.empty
+      messageParameters = Map(
+        "implicitTypes" -> implicitTypes.map(toSQLType).mkString(", ")
+      )
     )
   }
 
-  def explicitCollationMismatchError(explicitTypes: Seq[String]): Throwable = {
+  def explicitCollationMismatchError(explicitTypes: Seq[StringType]): Throwable = {
     new AnalysisException(
       errorClass = "COLLATION_MISMATCH.EXPLICIT",
       messageParameters = Map(
-        "explicitTypes" -> explicitTypes.map(toSQLId).mkString(", ")
+        "explicitTypes" -> explicitTypes.map(toSQLType).mkString(", ")
       )
     )
   }
@@ -3817,12 +3827,6 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
         "class" -> unsupported.getClass.toString))
   }
 
-  def unsupportedUDFOuptutType(expr: Expression, dt: DataType): Throwable = {
-    new AnalysisException(
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-      messageParameters = Map("sqlExpr" -> toSQLExpr(expr), "dataType" -> dt.sql))
-  }
-
   def funcBuildError(funcName: String, cause: Exception): Throwable = {
     cause.getCause match {
       case st: SparkThrowable with Throwable => st
@@ -3978,6 +3982,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("provider" -> name))
   }
 
+  def externalDataSourceException(cause: Throwable): Throwable = {
+    new AnalysisException(
+      errorClass = "DATA_SOURCE_EXTERNAL_ERROR",
+      messageParameters = Map(),
+      cause = Some(cause)
+    )
+  }
+
   def foundMultipleDataSources(provider: String): Throwable = {
     new AnalysisException(
       errorClass = "FOUND_MULTIPLE_DATA_SOURCES",
@@ -4087,6 +4099,14 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       "createTable(..., Array[Column], ...)")
   }
 
+  def mustOverrideOneMethodError(methodName: String): RuntimeException = {
+    val msg = s"You must override one `$methodName`. It's preferred to not override the " +
+      "deprecated one."
+    new SparkRuntimeException(
+      "INTERNAL_ERROR",
+      Map("message" -> msg))
+  }
+
   def cannotAssignEventTimeColumn(): Throwable = {
     new AnalysisException(
       errorClass = "CANNOT_ASSIGN_EVENT_TIME_COLUMN_WITHOUT_WATERMARK",
@@ -4100,4 +4120,56 @@ private[sql] object QueryCompilationErrors extends QueryErrorsBase with Compilat
       messageParameters = Map("functionName" -> functionName)
     )
   }
+
+  def avroOptionsException(optionName: String, message: String): Throwable = {
+    new AnalysisException(
+      errorClass = "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE",
+      messageParameters = Map("optionName" -> optionName, "message" -> message)
+    )
+  }
+
+  def protobufNotLoadedSqlFunctionsUnusable(functionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PROTOBUF_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE",
+      messageParameters = Map("functionName" -> functionName)
+    )
+  }
+
+  def pipeOperatorAggregateExpressionContainsNoAggregateFunction(expr: Expression): Throwable = {
+    new AnalysisException(
+      errorClass = "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION",
+      messageParameters = Map(
+        "expr" -> expr.toString),
+      origin = expr.origin)
+  }
+
+  def pipeOperatorContainsAggregateFunction(expr: Expression, clause: String): Throwable = {
+    new AnalysisException(
+      errorClass = "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+      messageParameters = Map(
+        "expr" -> expr.toString,
+        "clause" -> clause),
+      origin = expr.origin)
+  }
+
+  def inlineTableContainsScalarSubquery(inlineTable: LogicalPlan): Throwable = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.SCALAR_SUBQUERY_IN_VALUES",
+      messageParameters = Map.empty,
+      origin = inlineTable.origin
+    )
+  }
+
+  def ordinalOutOfBoundsError(
+      ordinal: Int,
+      attributes: Seq[Attribute]): Throwable = {
+    new AnalysisException(
+      errorClass = "COLUMN_ORDINAL_OUT_OF_BOUNDS",
+      messageParameters = Map(
+        "ordinal" -> ordinal.toString,
+        "attributesLength" -> attributes.length.toString,
+        "attributes" -> attributes.map(attr => toSQLId(attr.name)).mkString(", ")
+      )
+    )
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
index b18937257bae5..350c709de07c3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryErrorsBase.scala
@@ -47,10 +47,6 @@ private[sql] trait QueryErrorsBase extends DataTypeErrorsBase {
     quoteByDefault(conf)
   }
 
-  def toDSOption(option: String): String = {
-    quoteByDefault(option)
-  }
-
   def toSQLExpr(e: Expression): String = {
     quoteByDefault(toPrettySQL(e))
   }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
index bdd53219de404..0852e773c87b4 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryExecutionErrors.scala
@@ -79,8 +79,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map(
         "value" -> toSQLValue(t, from),
         "sourceType" -> toSQLType(from),
-        "targetType" -> toSQLType(to),
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "targetType" -> toSQLType(to)),
       context = Array.empty,
       summary = "")
   }
@@ -124,8 +123,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map(
         "expression" -> toSQLValue(s, StringType),
         "sourceType" -> toSQLType(StringType),
-        "targetType" -> toSQLType(BooleanType),
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "targetType" -> toSQLType(BooleanType)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
@@ -139,8 +137,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map(
         "expression" -> toSQLValue(s, StringType),
         "sourceType" -> toSQLType(StringType),
-        "targetType" -> toSQLType(to),
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+        "targetType" -> toSQLType(to)),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
@@ -260,11 +257,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = "")
   }
 
-  def invalidFractionOfSecondError(): DateTimeException = {
+  def invalidFractionOfSecondError(secAndMicros: Double): DateTimeException = {
     new SparkDateTimeException(
       errorClass = "INVALID_FRACTION_OF_SECOND",
       messageParameters = Map(
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)
+        "secAndMicros" -> s"$secAndMicros"
       ),
       context = Array.empty,
       summary = "")
@@ -280,26 +277,27 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = "")
   }
 
-  def ansiDateTimeError(e: Exception): SparkDateTimeException = {
+  def ansiDateTimeArgumentOutOfRange(e: Exception): SparkDateTimeException = {
     new SparkDateTimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2000",
+      errorClass = "DATETIME_FIELD_OUT_OF_BOUNDS",
       messageParameters = Map(
-        "message" -> e.getMessage,
+        "rangeMessage" -> e.getMessage,
         "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
       context = Array.empty,
       summary = "")
   }
 
-  def ansiIllegalArgumentError(message: String): SparkIllegalArgumentException = {
+  def invalidIntervalWithMicrosecondsAdditionError(): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_2000",
-      messageParameters = Map(
-        "message" -> message,
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)))
+      errorClass = "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION",
+      messageParameters = Map("ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)))
   }
 
-  def overflowInSumOfDecimalError(context: QueryContext): ArithmeticException = {
-    arithmeticOverflowError("Overflow in sum of decimals", context = context)
+  def overflowInSumOfDecimalError(
+      context: QueryContext,
+      suggestedFunc: String): ArithmeticException = {
+    arithmeticOverflowError("Overflow in sum of decimals", suggestedFunc = suggestedFunc,
+      context = context)
   }
 
   def overflowInIntegralDivideError(context: QueryContext): ArithmeticException = {
@@ -365,12 +363,28 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "groupIndex" -> groupIndex.toString()))
   }
 
+  def invalidRegexpReplaceError(
+      source: String,
+      pattern: String,
+      replacement: String,
+      position: Int,
+      cause: Throwable): RuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_REGEXP_REPLACE",
+      messageParameters = Map(
+        "source" -> source,
+        "pattern" -> pattern,
+        "replacement" -> replacement,
+        "position" -> position.toString
+      ),
+      cause = cause
+    )
+  }
+
   def invalidUrlError(url: UTF8String, e: URISyntaxException): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
       errorClass = "INVALID_URL",
-      messageParameters = Map(
-        "url" -> url.toString,
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
+      messageParameters = Map("url" -> url.toString),
       cause = e)
   }
 
@@ -387,23 +401,13 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       s"The aggregate window function ${toSQLId(funcName)} does not support merging.")
   }
 
-  def dataTypeUnexpectedError(dataType: DataType): SparkUnsupportedOperationException = {
-    new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2011",
-      messageParameters = Map("dataType" -> dataType.catalogString))
-  }
-
-  def typeUnsupportedError(dataType: DataType): SparkIllegalArgumentException = {
-    new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_2011",
-      messageParameters = Map("dataType" -> dataType.toString()))
-  }
-
   def negativeValueUnexpectedError(
-      frequencyExpression : Expression): SparkIllegalArgumentException = {
+      frequencyExpression: Expression, negativeValue: Long): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_2013",
-      messageParameters = Map("frequencyExpression" -> frequencyExpression.sql))
+      errorClass = "NEGATIVE_VALUES_IN_FREQUENCY_EXPRESSION",
+      messageParameters = Map(
+        "frequencyExpression" -> toSQLExpr(frequencyExpression),
+        "negativeValue" -> toSQLValue(negativeValue)))
   }
 
   def addNewFunctionMismatchedWithFunctionError(funcName: String): Throwable = {
@@ -478,6 +482,20 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
     )
   }
 
+  def invalidExternalTypeError(
+      actualType: String,
+      expectedType: DataType,
+      childExpression: Expression): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "INVALID_EXTERNAL_TYPE",
+      messageParameters = Map(
+        "externalType" -> actualType,
+        "type" -> toSQLType(expectedType),
+        "expr" -> toSQLExpr(childExpression)
+      )
+    )
+  }
+
   def notOverrideExpectedMethodsError(
       className: String, m1: String, m2: String): SparkRuntimeException = {
     new SparkRuntimeException(
@@ -600,40 +618,37 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map("methodName" -> methodName))
   }
 
-  def arithmeticOverflowError(e: ArithmeticException): SparkArithmeticException = {
-    new SparkArithmeticException(
-      errorClass = "_LEGACY_ERROR_TEMP_2042",
-      messageParameters = Map(
-        "message" -> e.getMessage,
-        "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)),
-      context = Array.empty,
-      summary = "")
-  }
-
   def binaryArithmeticCauseOverflowError(
-      eval1: Short, symbol: String, eval2: Short): SparkArithmeticException = {
+      eval1: Short,
+      symbol: String,
+      eval2: Short,
+      suggestedFunc: String): SparkArithmeticException = {
     new SparkArithmeticException(
       errorClass = "BINARY_ARITHMETIC_OVERFLOW",
       messageParameters = Map(
         "value1" -> toSQLValue(eval1, ShortType),
         "symbol" -> symbol,
-        "value2" -> toSQLValue(eval2, ShortType)),
+        "value2" -> toSQLValue(eval2, ShortType),
+        "functionName" -> toSQLId(suggestedFunc)),
       context = Array.empty,
       summary = "")
   }
 
-  def intervalArithmeticOverflowError(
-      message: String,
-      hint: String = "",
+  def withSuggestionIntervalArithmeticOverflowError(
+      suggestedFunc: String,
       context: QueryContext): ArithmeticException = {
-    val alternative = if (hint.nonEmpty) {
-      s" Use '$hint' to tolerate overflow and return NULL instead."
-    } else ""
     new SparkArithmeticException(
-      errorClass = "INTERVAL_ARITHMETIC_OVERFLOW",
-      messageParameters = Map(
-        "message" -> message,
-        "alternative" -> alternative),
+      errorClass = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
+      messageParameters = Map("functionName" -> toSQLId(suggestedFunc)),
+      context = getQueryContext(context),
+      summary = getSummary(context))
+  }
+
+  def withoutSuggestionIntervalArithmeticOverflowError(
+    context: QueryContext): SparkArithmeticException = {
+    new SparkArithmeticException(
+      errorClass = "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+      messageParameters = Map(),
       context = getQueryContext(context),
       summary = getSummary(context))
   }
@@ -781,11 +796,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   def failedToCastValueToDataTypeForPartitionColumnError(
       value: String, dataType: DataType, columnName: String): SparkRuntimeException = {
     new SparkRuntimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2058",
+      errorClass = "INVALID_PARTITION_VALUE",
       messageParameters = Map(
-        "value" -> value,
-        "dataType" -> dataType.toString(),
-        "columnName" -> columnName))
+        "value" -> toSQLValue(value),
+        "dataType" -> toSQLType(dataType),
+        "columnName" -> toSQLId(columnName)))
   }
 
   def endOfStreamError(): Throwable = {
@@ -857,8 +872,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   def unsupportedPartitionTransformError(
       transform: Transform): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2067",
-      messageParameters = Map("transform" -> transform.toString()))
+      errorClass = "UNSUPPORTED_PARTITION_TRANSFORM",
+      messageParameters = Map("transform" -> toSQLId(transform.toString)))
   }
 
   def missingDatabaseLocationError(): SparkIllegalArgumentException = {
@@ -867,7 +882,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
 
   def cannotRemoveReservedPropertyError(property: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2069",
+      errorClass = "CANNOT_REMOVE_RESERVED_PROPERTY",
       messageParameters = Map("property" -> property))
   }
 
@@ -900,7 +915,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   }
 
   def writeUnsupportedForBinaryFileDataSourceError(): SparkUnsupportedOperationException = {
-    new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_2075")
+    new SparkUnsupportedOperationException("UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE")
   }
 
   def fileLengthExceedsMaxLengthError(status: FileStatus, maxLength: Int): Throwable = {
@@ -1088,8 +1103,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
 
   def endOfIteratorError(): Throwable = {
     new SparkException(
-      errorClass = "_LEGACY_ERROR_TEMP_2104",
-      messageParameters = Map.empty,
+      errorClass = "INTERNAL_ERROR",
+      messageParameters = Map("message" -> "End of the iterator."),
       cause = null)
   }
 
@@ -1109,7 +1124,8 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
 
   def cannotAcquireMemoryToBuildUnsafeHashedRelationError(): Throwable = {
     new SparkOutOfMemoryError(
-      "_LEGACY_ERROR_TEMP_2107")
+      "_LEGACY_ERROR_TEMP_2107",
+      new java.util.HashMap[String, String]())
   }
 
   def rowLargerThan256MUnsupportedError(): SparkUnsupportedOperationException = {
@@ -1140,15 +1156,15 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   def cannotParseStatisticAsPercentileError(
       stats: String, e: NumberFormatException): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_2113",
-      messageParameters = Map("stats" -> stats),
+      errorClass = "UNRECOGNIZED_STATISTIC",
+      messageParameters = Map("stats" -> toSQLValue(stats)),
       cause = e)
   }
 
   def statisticNotRecognizedError(stats: String): SparkIllegalArgumentException = {
     new SparkIllegalArgumentException(
-      errorClass = "_LEGACY_ERROR_TEMP_2114",
-      messageParameters = Map("stats" -> stats))
+      errorClass = "UNRECOGNIZED_STATISTIC",
+      messageParameters = Map("stats" -> toSQLValue(stats)))
   }
 
   def unknownColumnError(unknownColumn: String): SparkIllegalArgumentException = {
@@ -1254,6 +1270,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "dataType" -> toSQLType(dataType)))
   }
 
+  def wrongDatatypeInSomeRows(pos: Int, dataType: DataType): SparkSQLException = {
+    new SparkSQLException(
+      errorClass = "COLUMN_ARRAY_ELEMENT_TYPE_MISMATCH",
+      messageParameters = Map("pos" -> pos.toString(), "type" -> toSQLType(dataType)))
+  }
+
   def rootConverterReturnNullError(): SparkRuntimeException = {
     new SparkRuntimeException(
       errorClass = "INVALID_JSON_ROOT_FIELD",
@@ -1296,10 +1318,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       messageParameters = Map("badRecord" -> badRecord))
   }
 
-  def elementsOfTupleExceedLimitError(): SparkUnsupportedOperationException = {
-    new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_2150")
-  }
-
   def expressionDecodingError(e: Exception, expressions: Seq[Expression]): SparkRuntimeException = {
     new SparkRuntimeException(
       errorClass = "EXPRESSION_DECODING_FAILED",
@@ -1402,10 +1420,11 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "mapType" -> MapType.simpleString))
   }
 
-  def malformedRecordsDetectedInSchemaInferenceError(e: Throwable): Throwable = {
+  def malformedRecordsDetectedInSchemaInferenceError(e: Throwable, badRecord: String): Throwable = {
     new SparkException(
-      errorClass = "_LEGACY_ERROR_TEMP_2165",
+      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       messageParameters = Map(
+        "badRecord" -> badRecord,
         "failFastMode" -> FailFastMode.name),
       cause = e)
   }
@@ -1419,10 +1438,10 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
 
   def malformedRecordsDetectedInSchemaInferenceError(dataType: DataType): Throwable = {
     new SparkException(
-      errorClass = "_LEGACY_ERROR_TEMP_2167",
+      errorClass = "INVALID_JSON_RECORD_TYPE",
       messageParameters = Map(
         "failFastMode" -> FailFastMode.name,
-        "dataType" -> dataType.catalogString),
+        "invalidType" -> toSQLType(dataType)),
       cause = null)
   }
 
@@ -1749,12 +1768,14 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
     new SparkUnsupportedOperationException(errorClass = "_LEGACY_ERROR_TEMP_2210")
   }
 
-  def invalidStreamingOutputModeError(
-      outputMode: Option[OutputMode]): SparkUnsupportedOperationException = {
+  def unsupportedOutputModeForStreamingOperationError(
+      outputMode: OutputMode,
+      operation: String): SparkUnsupportedOperationException = {
     new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2211",
+      errorClass = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
       messageParameters = Map(
-        "outputMode" -> outputMode.toString()))
+        "outputMode" -> outputMode.toString().toLowerCase(Locale.ROOT),
+        "operation" -> operation))
   }
 
   def pythonStreamingDataSourceRuntimeError(
@@ -1879,17 +1900,6 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       cause = null)
   }
 
-  def notPublicClassError(name: String): SparkUnsupportedOperationException = {
-    new SparkUnsupportedOperationException(
-      errorClass = "_LEGACY_ERROR_TEMP_2229",
-      messageParameters = Map(
-        "name" -> name))
-  }
-
-  def primitiveTypesNotSupportedError(): SparkUnsupportedOperationException = {
-    new SparkUnsupportedOperationException(errorClass = "_LEGACY_ERROR_TEMP_2230")
-  }
-
   def onlySupportDataSourcesProvidingFileFormatError(providingClass: String): Throwable = {
     new SparkException(
       errorClass = "_LEGACY_ERROR_TEMP_2233",
@@ -1941,27 +1951,27 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
   def cannotCreateParquetConverterForTypeError(
       t: DecimalType, parquetType: String): SparkRuntimeException = {
     new SparkRuntimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2238",
+      errorClass = "PARQUET_CONVERSION_FAILURE.WITHOUT_DECIMAL_METADATA",
       messageParameters = Map(
-        "typeName" -> t.typeName,
+        "dataType" -> toSQLType(t),
         "parquetType" -> parquetType))
   }
 
   def cannotCreateParquetConverterForDecimalTypeError(
       t: DecimalType, parquetType: String): SparkRuntimeException = {
     new SparkRuntimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2239",
+      errorClass = "PARQUET_CONVERSION_FAILURE.DECIMAL",
       messageParameters = Map(
-        "t" -> t.json,
+        "dataType" -> toSQLType(t),
         "parquetType" -> parquetType))
   }
 
   def cannotCreateParquetConverterForDataTypeError(
       t: DataType, parquetType: String): SparkRuntimeException = {
     new SparkRuntimeException(
-      errorClass = "_LEGACY_ERROR_TEMP_2240",
+      errorClass = "PARQUET_CONVERSION_FAILURE.UNSUPPORTED",
       messageParameters = Map(
-        "t" -> t.json,
+        "dataType" -> toSQLType(t),
         "parquetType" -> parquetType))
   }
 
@@ -2285,7 +2295,7 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
 
   def unsupportedUpdateColumnNullabilityError(): SparkSQLFeatureNotSupportedException = {
     new SparkSQLFeatureNotSupportedException(
-      errorClass = "_LEGACY_ERROR_TEMP_2271",
+      errorClass = "UNSUPPORTED_FEATURE.UPDATE_COLUMN_NULLABILITY",
       messageParameters = Map.empty)
   }
 
@@ -2491,6 +2501,12 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       summary = getSummary(context))
   }
 
+  def scalarSubqueryReturnsMultipleRows(): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+      messageParameters = Map.empty)
+  }
+
   def comparatorReturnsNull(firstValue: String, secondValue: String): Throwable = {
     new SparkException(
       errorClass = "COMPARATOR_RETURNS_NULL",
@@ -2608,6 +2624,33 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
       cause = null)
   }
 
+  def invalidChangeLogReaderVersion(version: Long): Throwable = {
+    new SparkException(
+      errorClass = "CANNOT_LOAD_STATE_STORE.INVALID_CHANGE_LOG_READER_VERSION",
+      messageParameters = Map("version" -> version.toString),
+      cause = null
+    )
+  }
+
+  def invalidChangeLogWriterVersion(version: Long): Throwable = {
+    new SparkException(
+      errorClass = "CANNOT_LOAD_STATE_STORE.INVALID_CHANGE_LOG_WRITER_VERSION",
+      messageParameters = Map("version" -> version.toString),
+      cause = null
+    )
+  }
+
+  def notEnoughMemoryToLoadStore(
+      stateStoreId: String,
+      stateStoreProviderName: String,
+      e: Throwable): Throwable = {
+    new SparkException(
+      errorClass = s"CANNOT_LOAD_STATE_STORE.${stateStoreProviderName}_OUT_OF_MEMORY",
+      messageParameters = Map("stateStoreId" -> stateStoreId),
+      cause = e
+    )
+  }
+
   def cannotLoadStore(e: Throwable): Throwable = {
     new SparkException(
       errorClass = "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED",
@@ -2806,4 +2849,49 @@ private[sql] object QueryExecutionErrors extends QueryErrorsBase with ExecutionE
         "parameter" -> toSQLId("unit"),
         "invalidValue" -> s"'$invalidValue'"))
   }
+
+  def unsupportedStreamingOperatorWithoutWatermark(
+      outputMode: String,
+      statefulOperator: String): AnalysisException = {
+    new AnalysisException(
+      errorClass = "UNSUPPORTED_STREAMING_OPERATOR_WITHOUT_WATERMARK",
+      messageParameters = Map(
+        "outputMode" -> outputMode,
+        "statefulOperator" -> statefulOperator)
+    )
+  }
+
+  def conflictingDirectoryStructuresError(
+      discoveredBasePaths: Seq[String]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "CONFLICTING_DIRECTORY_STRUCTURES",
+      messageParameters = Map(
+        "discoveredBasePaths" -> discoveredBasePaths.distinct.mkString("\n\t", "\n\t", "\n")
+      )
+    )
+  }
+
+  def conflictingPartitionColumnNamesError(
+      distinctPartColLists: Seq[String],
+      suspiciousPaths: Seq[Path]): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "CONFLICTING_PARTITION_COLUMN_NAMES",
+      messageParameters = Map(
+        "distinctPartColLists" -> distinctPartColLists.mkString("\n\t", "\n\t", "\n"),
+        "suspiciousPaths" -> suspiciousPaths.map("\t" + _).mkString("\n", "\n", "")
+      )
+    )
+  }
+
+  def partitionColumnNotFoundInTheTableSchemaError(
+      column: Seq[String],
+      schema: StructType): SparkRuntimeException = {
+    new SparkRuntimeException(
+      errorClass = "PARTITION_COLUMN_NOT_FOUND_IN_SCHEMA",
+      messageParameters = Map(
+        "column" -> toSQLId(column),
+        "schema" -> toSQLType(schema)
+      )
+    )
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
index c1ce93e10553b..2a4b8fde6989c 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/SqlScriptingErrors.scala
@@ -17,26 +17,120 @@
 
 package org.apache.spark.sql.errors
 
-import org.apache.spark.SparkException
+import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLConf
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
+import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLStmt
+import org.apache.spark.sql.exceptions.SqlScriptingException
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Object for grouping error messages thrown during parsing/interpreting phase
  * of the SQL Scripting Language interpreter.
  */
-private[sql] object SqlScriptingErrors extends QueryErrorsBase {
+private[sql] object SqlScriptingErrors {
 
-  def labelsMismatch(beginLabel: String, endLabel: String): Throwable = {
-    new SparkException(
+  def duplicateLabels(origin: Origin, label: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "LABEL_ALREADY_EXISTS",
+      cause = null,
+      messageParameters = Map("label" -> toSQLId(label)))
+  }
+
+  def labelsMismatch(origin: Origin, beginLabel: String, endLabel: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
       errorClass = "LABELS_MISMATCH",
       cause = null,
-      messageParameters = Map("beginLabel" -> beginLabel, "endLabel" -> endLabel))
+      messageParameters = Map("beginLabel" -> toSQLId(beginLabel), "endLabel" -> toSQLId(endLabel)))
   }
 
-  def endLabelWithoutBeginLabel(endLabel: String): Throwable = {
-    new SparkException(
+  def endLabelWithoutBeginLabel(origin: Origin, endLabel: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
       errorClass = "END_LABEL_WITHOUT_BEGIN_LABEL",
       cause = null,
-      messageParameters = Map("endLabel" -> endLabel))
+      messageParameters = Map("endLabel" -> toSQLId(endLabel)))
+  }
+
+  def variableDeclarationNotAllowedInScope(
+      origin: Origin,
+      varName: Seq[String]): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE",
+      cause = null,
+      messageParameters = Map("varName" -> toSQLId(varName)))
+  }
+
+  def variableDeclarationOnlyAtBeginning(
+      origin: Origin,
+      varName: Seq[String]): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
+      cause = null,
+      messageParameters = Map("varName" -> toSQLId(varName)))
+  }
+
+  def invalidBooleanStatement(
+      origin: Origin,
+      stmt: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_BOOLEAN_STATEMENT",
+      cause = null,
+      messageParameters = Map("invalidStatement" -> toSQLStmt(stmt)))
+  }
+
+  def sqlScriptingNotEnabled(origin: Origin): Throwable = {
+    new SqlScriptingException(
+      errorClass = "UNSUPPORTED_FEATURE.SQL_SCRIPTING",
+      cause = null,
+      origin = origin,
+      messageParameters = Map(
+        "sqlScriptingEnabled" -> toSQLConf(SQLConf.SQL_SCRIPTING_ENABLED.key)))
   }
 
+  def booleanStatementWithEmptyRow(
+      origin: Origin,
+      stmt: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "BOOLEAN_STATEMENT_WITH_EMPTY_ROW",
+      cause = null,
+      messageParameters = Map("invalidStatement" -> toSQLStmt(stmt)))
+  }
+
+  def positionalParametersAreNotSupportedWithSqlScripting(): Throwable = {
+    new SqlScriptingException(
+      origin = null,
+      errorClass = "UNSUPPORTED_FEATURE.SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS",
+      cause = null,
+      messageParameters = Map.empty)
+  }
+
+  def labelDoesNotExist(
+      origin: Origin,
+      labelName: String,
+      statementType: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_LABEL_USAGE.DOES_NOT_EXIST",
+      cause = null,
+      messageParameters = Map(
+        "labelName" -> toSQLStmt(labelName),
+        "statementType" -> statementType))
+  }
+
+  def invalidIterateLabelUsageForCompound(
+      origin: Origin,
+      labelName: String): Throwable = {
+    new SqlScriptingException(
+      origin = origin,
+      errorClass = "INVALID_LABEL_USAGE.ITERATE_IN_COMPOUND",
+      cause = null,
+      messageParameters = Map("labelName" -> toSQLStmt(labelName)))
+  }
 }
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/exceptions/SqlScriptingException.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/exceptions/SqlScriptingException.scala
new file mode 100644
index 0000000000000..7602366c71a65
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/exceptions/SqlScriptingException.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.exceptions
+
+import scala.jdk.CollectionConverters.MapHasAsJava
+
+import org.apache.spark.{SparkThrowable, SparkThrowableHelper}
+import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.exceptions.SqlScriptingException.errorMessageWithLineNumber
+
+class SqlScriptingException (
+    errorClass: String,
+    cause: Throwable,
+    val origin: Origin,
+    messageParameters: Map[String, String] = Map.empty)
+  extends Exception(
+    errorMessageWithLineNumber(Option(origin), errorClass, messageParameters),
+    cause)
+  with SparkThrowable {
+
+  override def getCondition: String = errorClass
+  override def getMessageParameters: java.util.Map[String, String] = messageParameters.asJava
+}
+
+private object SqlScriptingException {
+
+  private def errorMessageWithLineNumber(
+      origin: Option[Origin],
+      errorClass: String,
+      messageParameters: Map[String, String]): String = {
+    val prefix = origin.flatMap(o => o.line.map(l => s"{LINE:$l} ")).getOrElse("")
+    prefix + SparkThrowableHelper.getMessage(errorClass, messageParameters)
+  }
+}
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
index ca7703bef48bb..065b4b8c821a6 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/execution/arrow/ArrowWriter.scala
@@ -373,7 +373,7 @@ private[arrow] class StructWriter(
     val valueVector: StructVector,
     children: Array[ArrowFieldWriter]) extends ArrowFieldWriter {
 
-  lazy val isVariant = valueVector.getField.getMetadata.get("variant") == "true"
+  lazy val isVariant = ArrowUtils.isVariantField(valueVector.getField)
 
   override def setNull(): Unit = {
     var i = 0
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index 04fdc7655bb36..2a05508a17544 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -326,7 +326,15 @@ object SQLConf {
       "catalyst rules, to make sure every rule returns a valid plan")
     .version("3.4.0")
     .booleanConf
-    .createWithDefault(false)
+    .createWithDefault(Utils.isTesting)
+
+  val LIGHTWEIGHT_PLAN_CHANGE_VALIDATION = buildConf("spark.sql.lightweightPlanChangeValidation")
+    .internal()
+    .doc(s"Similar to ${PLAN_CHANGE_VALIDATION.key}, this validates plan changes and runs after " +
+      s"every rule, however it is enabled by default and so it should be lightweight.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
 
   val ALLOW_NAMED_FUNCTION_ARGUMENTS = buildConf("spark.sql.allowNamedFunctionArguments")
     .doc("If true, Spark will turn on support for named parameters for all functions that has" +
@@ -759,10 +767,13 @@ object SQLConf {
       .checkValue(_ > 0, "The initial number of partitions must be positive.")
       .createOptional
 
-  lazy val COLLATION_ENABLED =
-    buildConf("spark.sql.collation.enabled")
-      .doc("Collations feature is under development and its use should be done under this" +
-        "feature flag.")
+  lazy val TRIM_COLLATION_ENABLED =
+    buildConf("spark.sql.collation.trim.enabled")
+      .internal()
+      .doc(
+        "Trim collation feature is under development and its use should be done under this" +
+        "feature flag. Trim collation trims trailing whitespaces from strings."
+      )
       .version("4.0.0")
       .booleanConf
       .createWithDefault(Utils.isTesting)
@@ -779,7 +790,7 @@ object SQLConf {
             CollationFactory.fetchCollation(collationName)
             true
           } catch {
-            case e: SparkException if e.getErrorClass == "COLLATION_INVALID_NAME" => false
+            case e: SparkException if e.getCondition == "COLLATION_INVALID_NAME" => false
           }
         },
         "DEFAULT_COLLATION",
@@ -977,6 +988,14 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED =
+    buildConf("spark.sql.parser.eagerEvalOfUnresolvedInlineTable")
+      .internal()
+      .doc("Controls whether we optimize the ASTree that gets generated when parsing " +
+        "VALUES lists (UnresolvedInlineTable) by eagerly evaluating it in the AST Builder.")
+      .booleanConf
+      .createWithDefault(true)
+
   val ESCAPED_STRING_LITERALS = buildConf("spark.sql.parser.escapedStringLiterals")
     .internal()
     .doc("When true, string literals (including regex patterns) remain escaped in our SQL " +
@@ -1487,6 +1506,13 @@ object SQLConf {
     .intConf
     .createWithDefault(200)
 
+  val DATA_SOURCE_DONT_ASSERT_ON_PREDICATE =
+    buildConf("spark.sql.dataSource.skipAssertOnPredicatePushdown")
+      .internal()
+      .doc("Enable skipping assert when expression in not translated to predicate.")
+      .booleanConf
+      .createWithDefault(!Utils.isTesting)
+
   // This is used to set the default data source
   val DEFAULT_DATA_SOURCE_NAME = buildConf("spark.sql.sources.default")
     .doc("The default data source to use in input/output.")
@@ -1635,6 +1661,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val V2_BUCKETING_PARTITION_FILTER_ENABLED =
+    buildConf("spark.sql.sources.v2.bucketing.partition.filter.enabled")
+      .doc(s"Whether to filter partitions when running storage-partition join. " +
+        s"When enabled, partitions without matches on the other side can be omitted for " +
+        s"scanning, if allowed by the join type. This config requires both " +
+        s"${V2_BUCKETING_ENABLED.key} and ${V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key} to be " +
+        s"enabled.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val BUCKETING_MAX_BUCKETS = buildConf("spark.sql.sources.bucketing.maxBuckets")
     .doc("The maximum number of buckets allowed.")
     .version("2.4.0")
@@ -1825,6 +1862,13 @@ object SQLConf {
     .intConf
     .createWithDefault(10000)
 
+  val DATAFRAME_TRANSPOSE_MAX_VALUES = buildConf("spark.sql.transposeMaxValues")
+    .doc("When doing a transpose without specifying values for the index column this is" +
+      " the maximum number of values that will be transposed without error.")
+    .version("4.0.0")
+    .intConf
+    .createWithDefault(500)
+
   val RUN_SQL_ON_FILES = buildConf("spark.sql.runSQLOnFiles")
     .internal()
     .doc("When true, we could use `datasource`.`path` as table in SQL query.")
@@ -2129,6 +2173,17 @@ object SQLConf {
     .intConf
     .createWithDefault(100)
 
+  val RATIO_EXTRA_SPACE_ALLOWED_IN_CHECKPOINT =
+    buildConf("spark.sql.streaming.ratioExtraSpaceAllowedInCheckpoint")
+    .internal()
+    .doc("The ratio of extra space allowed for batch deletion of files when maintenance is" +
+      "invoked. When value > 0, it optimizes the cost of discovering and deleting old checkpoint " +
+      "versions. The minimum number of stale versions we retain in checkpoint location for batch " +
+      "deletion is calculated by minBatchesToRetain * ratioExtraSpaceAllowedInCheckpoint.")
+    .version("4.0.0")
+    .doubleConf
+    .createWithDefault(0.3)
+
   val MAX_BATCHES_TO_RETAIN_IN_MEMORY = buildConf("spark.sql.streaming.maxBatchesToRetainInMemory")
     .internal()
     .doc("The maximum number of batches which will be retained in memory to avoid " +
@@ -2157,6 +2212,26 @@ object SQLConf {
       .intConf
       .createWithDefault(3)
 
+  // The feature is still in development, so it is still internal.
+  val STATE_STORE_CHECKPOINT_FORMAT_VERSION =
+    buildConf("spark.sql.streaming.stateStore.checkpointFormatVersion")
+      .internal()
+      .doc("The version of the approach of doing state store checkpoint")
+      .version("4.0.0")
+      .intConf
+      .createWithDefault(1)
+
+  val STREAMING_STATE_STORE_ENCODING_FORMAT =
+    buildConf("spark.sql.streaming.stateStore.encodingFormat")
+      .doc("The encoding format used for stateful operators to store information " +
+        "in the state store")
+      .version("4.0.0")
+      .stringConf
+      .transform(_.toLowerCase(Locale.ROOT))
+      .checkValue(v => Set("unsaferow", "avro").contains(v),
+        "Valid values are 'unsaferow' and 'avro'")
+      .createWithDefault("unsaferow")
+
   val STATE_STORE_COMPRESSION_CODEC =
     buildConf("spark.sql.streaming.stateStore.compression.codec")
       .internal()
@@ -3133,6 +3208,15 @@ object SQLConf {
       .version("4.0.0")
       .fallbackConf(Python.PYTHON_WORKER_FAULTHANLDER_ENABLED)
 
+  val PYSPARK_PLOT_MAX_ROWS =
+    buildConf("spark.sql.pyspark.plotting.max_rows")
+      .doc("The visual limit on plots. If set to 1000 for top-n-based plots (pie, bar, barh), " +
+        "the first 1000 data points will be used for plotting. For sampled-based plots " +
+        "(scatter, area, line), 1000 data points will be randomly sampled.")
+      .version("4.0.0")
+      .intConf
+      .createWithDefault(1000)
+
   val ARROW_SPARKR_EXECUTION_ENABLED =
     buildConf("spark.sql.execution.arrow.sparkr.enabled")
       .doc("When true, make use of Apache Arrow for columnar data transfers in SparkR. " +
@@ -3172,6 +3256,14 @@ object SQLConf {
       .intConf
       .createWithDefault(10000)
 
+  val ARROW_TRANSFORM_WITH_STATE_IN_PANDAS_MAX_RECORDS_PER_BATCH =
+    buildConf("spark.sql.execution.arrow.transformWithStateInPandas.maxRecordsPerBatch")
+      .doc("When using TransformWithStateInPandas, limit the maximum number of state records " +
+        "that can be written to a single ArrowRecordBatch in memory.")
+      .version("4.0.0")
+      .intConf
+      .createWithDefault(10000)
+
   val ARROW_EXECUTION_USE_LARGE_VAR_TYPES =
     buildConf("spark.sql.execution.arrow.useLargeVarTypes")
       .doc("When using Apache Arrow, use large variable width vectors for string and binary " +
@@ -3229,6 +3321,17 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val PYTHON_UDF_ARROW_CONCURRENCY_LEVEL =
+    buildConf("spark.sql.execution.pythonUDF.arrow.concurrency.level")
+      .doc("The level of concurrency to execute Arrow-optimized Python UDF. " +
+        "This can be useful if Python UDFs use I/O intensively.")
+      .version("4.0.0")
+      .intConf
+      .checkValue(_ > 1,
+        "The value of spark.sql.execution.pythonUDF.arrow.concurrency.level" +
+          " must be more than one.")
+      .createOptional
+
   val PYTHON_TABLE_UDF_ARROW_ENABLED =
     buildConf("spark.sql.execution.pythonUDTF.arrow.enabled")
       .doc("Enable Arrow optimization for Python UDTFs.")
@@ -3333,6 +3436,15 @@ object SQLConf {
       .version("2.3.0")
       .fallbackConf(org.apache.spark.internal.config.STRING_REDACTION_PATTERN)
 
+  val SQL_SCRIPTING_ENABLED =
+    buildConf("spark.sql.scripting.enabled")
+      .doc("SQL Scripting feature is under development and its use should be done under this " +
+        "feature flag. SQL Scripting enables users to write procedural SQL including control " +
+        "flow and error handling.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val CONCAT_BINARY_AS_STRING = buildConf("spark.sql.function.concatBinaryAsString")
     .doc("When this option is set to false and all inputs are binary, `functions.concat` returns " +
       "an output as binary. Otherwise, it returns as a string.")
@@ -3606,6 +3718,9 @@ object SQLConf {
       .internal()
       .doc("When true, skip validation for partition spec in ALTER PARTITION. E.g., " +
         "`ALTER TABLE .. ADD PARTITION(p='a')` would work even the partition type is int. " +
+        "Besides, this config will also be used to skip type validation on partition spec " +
+        "when reading partitioned table. E.g., if the table partition spec is added without " +
+        "type validation, it might not be read correctly with the type validation. " +
         s"When false, the behavior follows ${STORE_ASSIGNMENT_POLICY.key}")
       .version("3.4.0")
       .booleanConf
@@ -3708,7 +3823,7 @@ object SQLConf {
       .doc("Decorrelate subqueries with correlation under LIMIT with OFFSET.")
       .version("4.0.0")
       .booleanConf
-      .createWithDefault(false) // Disabled for now, see SPARK-46446
+      .createWithDefault(true)
 
   val DECORRELATE_EXISTS_IN_SUBQUERY_LEGACY_INCORRECT_COUNT_HANDLING_ENABLED =
     buildConf("spark.sql.optimizer.decorrelateExistsSubqueryLegacyIncorrectCountHandling.enabled")
@@ -3791,6 +3906,15 @@ object SQLConf {
       .intConf
       .createWithDefault(ByteArrayMethods.MAX_ROUNDED_ARRAY_LENGTH)
 
+  val PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN =
+    buildConf("spark.databricks.sql.optimizer.pruneFiltersCanPruneStreamingSubplan")
+      .internal()
+      .doc("Allow PruneFilters to remove streaming subplans when we encounter a false filter. " +
+        "This flag is to restore prior buggy behavior for broken pipelines.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   object Deprecated {
     val MAPRED_REDUCE_TASKS = "mapred.reduce.tasks"
   }
@@ -3855,6 +3979,28 @@ object SQLConf {
     .intConf
     .createWithDefault(20)
 
+  val ARTIFACTS_SESSION_ISOLATION_ENABLED =
+    buildConf("spark.sql.artifact.isolation.enabled")
+      .internal()
+      .doc("When enabled for a Spark Session, artifacts (such as JARs, files, archives) added to " +
+        "this session are isolated from other sessions within the same Spark instance. When " +
+        "disabled for a session, artifacts added to this session are visible to other sessions " +
+        "that have this config disabled. This config can only be set during the creation of a " +
+        "Spark Session and will have no effect when changed in the middle of session usage.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
+  val ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER =
+    buildConf("spark.sql.artifact.isolation.always.apply.classloader")
+      .internal()
+      .doc("When enabled, the classloader holding per-session artifacts will always be applied " +
+        "during SQL executions (useful for Spark Connect). When disabled, the classloader will " +
+        "be applied only when any artifact is added to the session.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val FAST_HASH_AGGREGATE_MAX_ROWS_CAPACITY_BIT =
     buildConf("spark.sql.codegen.aggregate.fastHashMap.capacityBit")
       .internal()
@@ -4377,8 +4523,18 @@ object SQLConf {
 
   val JSON_USE_UNSAFE_ROW =
     buildConf("spark.sql.json.useUnsafeRow")
+      .doc("When set to true, use UnsafeRow to represent struct result in the JSON parser. It " +
+        "can be overwritten by the JSON option `useUnsafeRow`.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
+  val VARIANT_ALLOW_DUPLICATE_KEYS =
+    buildConf("spark.sql.variant.allowDuplicateKeys")
       .internal()
-      .doc("When set to true, use UnsafeRow to represent struct result in the JSON parser.")
+      .doc("When set to false, parsing variant from JSON will throw an error if there are " +
+        "duplicate keys in the input JSON object. When set to true, the parser will keep the " +
+        "last occurrence of all fields with the same key.")
       .version("4.0.0")
       .booleanConf
       .createWithDefault(false)
@@ -4449,7 +4605,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase INT96 timestamps from Proleptic Gregorian calendar to " +
         "the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
         "When CORRECTED, Spark will not do rebase and write the timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees ancient " +
+        "When EXCEPTION, Spark will fail the writing if it sees ancient " +
         "timestamps that are ambiguous between the two calendars.")
       .version("3.1.0")
       .stringConf
@@ -4463,7 +4619,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
         "to the legacy hybrid (Julian + Gregorian) calendar when writing Parquet files. " +
         "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
+        "When EXCEPTION, Spark will fail the writing if it sees " +
         "ancient dates/timestamps that are ambiguous between the two calendars. " +
         "This config influences on writes of the following parquet logical types: DATE, " +
         "TIMESTAMP_MILLIS, TIMESTAMP_MICROS. The INT96 type has the separate config: " +
@@ -4480,7 +4636,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase INT96 timestamps from the legacy hybrid (Julian + " +
         "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
         "When CORRECTED, Spark will not do rebase and read the timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees ancient " +
+        "When EXCEPTION, Spark will fail the reading if it sees ancient " +
         "timestamps that are ambiguous between the two calendars. This config is only effective " +
         "if the writer info (like Spark, Hive) of the Parquet files is unknown.")
       .version("3.1.0")
@@ -4495,7 +4651,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
         "Gregorian) calendar to Proleptic Gregorian calendar when reading Parquet files. " +
         "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
+        "When EXCEPTION, Spark will fail the reading if it sees " +
         "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
         "only effective if the writer info (like Spark, Hive) of the Parquet files is unknown. " +
         "This config influences on reads of the following parquet logical types: DATE, " +
@@ -4514,7 +4670,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase dates/timestamps from Proleptic Gregorian calendar " +
         "to the legacy hybrid (Julian + Gregorian) calendar when writing Avro files. " +
         "When CORRECTED, Spark will not do rebase and write the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the writing if it sees " +
+        "When EXCEPTION, Spark will fail the writing if it sees " +
         "ancient dates/timestamps that are ambiguous between the two calendars.")
       .version("3.0.0")
       .stringConf
@@ -4528,7 +4684,7 @@ object SQLConf {
       .doc("When LEGACY, Spark will rebase dates/timestamps from the legacy hybrid (Julian + " +
         "Gregorian) calendar to Proleptic Gregorian calendar when reading Avro files. " +
         "When CORRECTED, Spark will not do rebase and read the dates/timestamps as it is. " +
-        "When EXCEPTION, which is the default, Spark will fail the reading if it sees " +
+        "When EXCEPTION, Spark will fail the reading if it sees " +
         "ancient dates/timestamps that are ambiguous between the two calendars. This config is " +
         "only effective if the writer info (like Spark, Hive) of the Avro files is unknown.")
       .version("3.0.0")
@@ -4592,6 +4748,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val LEGACY_DUPLICATE_BETWEEN_INPUT =
+    buildConf("spark.sql.legacy.duplicateBetweenInput")
+      .internal()
+      .doc("When true, we use legacy between implementation. This is a flag that fixes a " +
+        "problem introduced by a between optimization, see ticket SPARK-49063.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val LEGACY_COMPLEX_TYPES_TO_STRING =
     buildConf("spark.sql.legacy.castComplexTypesToString.enabled")
       .internal()
@@ -4740,7 +4905,7 @@ object SQLConf {
     buildConf("spark.sql.pyspark.legacy.inferMapTypeFromFirstPair.enabled")
       .internal()
       .doc("PySpark's SparkSession.createDataFrame infers the key/value types of a map from all " +
-        "paris in the map by default. If this config is set to true, it restores the legacy " +
+        "pairs in the map by default. If this config is set to true, it restores the legacy " +
         "behavior of only inferring the type from the first non-null pair.")
       .version("4.0.0")
       .booleanConf
@@ -4934,6 +5099,15 @@ object SQLConf {
       .stringConf
       .createWithDefault("versionAsOf")
 
+  val OPERATOR_PIPE_SYNTAX_ENABLED =
+    buildConf("spark.sql.operatorPipeSyntaxEnabled")
+      .doc("If true, enable operator pipe syntax for Apache Spark SQL. This uses the operator " +
+        "pipe marker |> to indicate separation between clauses of SQL in a manner that describes " +
+        "the sequence of steps that the query performs in a composable fashion.")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(Utils.isTesting)
+
   val LEGACY_PERCENTILE_DISC_CALCULATION = buildConf("spark.sql.legacy.percentileDiscCalculation")
     .internal()
     .doc("If true, the old bogus percentile_disc calculation is used. The old calculation " +
@@ -4994,6 +5168,15 @@ object SQLConf {
       .booleanConf
       .createWithDefault(true)
 
+  val SCALAR_SUBQUERY_USE_SINGLE_JOIN =
+    buildConf("spark.sql.optimizer.scalarSubqueryUseSingleJoin")
+      .internal()
+      .doc("When set to true, use LEFT_SINGLE join for correlated scalar subqueries where " +
+        "optimizer can't prove that only 1 row will be returned")
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(true)
+
   val ALLOW_SUBQUERY_EXPRESSIONS_IN_LAMBDAS_AND_HIGHER_ORDER_FUNCTIONS =
     buildConf("spark.sql.analyzer.allowSubqueryExpressionsInLambdasOrHigherOrderFunctions")
       .internal()
@@ -5048,11 +5231,21 @@ object SQLConf {
     .checkValue(_ > 0, "The number of stack traces in the DataFrame context must be positive.")
     .createWithDefault(1)
 
+  val DATA_FRAME_QUERY_CONTEXT_ENABLED = buildConf("spark.sql.dataFrameQueryContext.enabled")
+    .internal()
+    .doc(
+      "Enable the DataFrame query context. This feature is enabled by default, but has a " +
+      "non-trivial performance overhead because of the stack trace collection.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
+
   val LEGACY_JAVA_CHARSETS = buildConf("spark.sql.legacy.javaCharsets")
     .internal()
     .doc("When set to true, the functions like `encode()` can use charsets from JDK while " +
       "encoding or decoding string values. If it is false, such functions support only one of " +
-      "the charsets: 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16'.")
+      "the charsets: 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16', " +
+      "'UTF-32'.")
     .version("4.0.0")
     .booleanConf
     .createWithDefault(false)
@@ -5087,6 +5280,16 @@ object SQLConf {
     .booleanConf
     .createWithDefault(false)
 
+  val ORDERING_AWARE_LIMIT_OFFSET = buildConf("spark.sql.orderingAwareLimitOffset")
+    .internal()
+    .doc("When set to true, a local sort will be inserted between GlobalLimitExec and " +
+      "single-partition ShuffleExchangeExec, if the underlying plan produces sorted data. " +
+      "This is because shuffle reader in Spark fetches shuffle blocks in a random order and " +
+      "can not preserve the data ordering, while LIMIT/OFFSET must preserve ordering.")
+    .version("4.0.0")
+    .booleanConf
+    .createWithDefault(true)
+
   /**
    * Holds information about keys that have been deprecated.
    *
@@ -5382,13 +5585,13 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
     }
   }
 
-  def collationEnabled: Boolean = getConf(COLLATION_ENABLED)
+  def trimCollationEnabled: Boolean = getConf(TRIM_COLLATION_ENABLED)
 
   override def defaultStringType: StringType = {
     if (getConf(DEFAULT_COLLATION).toUpperCase(Locale.ROOT) == "UTF8_BINARY") {
       StringType
     } else {
-      StringType(CollationFactory.collationNameToId(getConf(DEFAULT_COLLATION)))
+      StringType(getConf(DEFAULT_COLLATION))
     }
   }
 
@@ -5405,12 +5608,18 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def minBatchesToRetain: Int = getConf(MIN_BATCHES_TO_RETAIN)
 
+  def ratioExtraSpaceAllowedInCheckpoint: Double = getConf(RATIO_EXTRA_SPACE_ALLOWED_IN_CHECKPOINT)
+
   def maxBatchesToRetainInMemory: Int = getConf(MAX_BATCHES_TO_RETAIN_IN_MEMORY)
 
   def streamingMaintenanceInterval: Long = getConf(STREAMING_MAINTENANCE_INTERVAL)
 
   def stateStoreCompressionCodec: String = getConf(STATE_STORE_COMPRESSION_CODEC)
 
+  def stateStoreCheckpointFormatVersion: Int = getConf(STATE_STORE_CHECKPOINT_FORMAT_VERSION)
+
+  def stateStoreEncodingFormat: String = getConf(STREAMING_STATE_STORE_ENCODING_FORMAT)
+
   def checkpointRenamedFileCheck: Boolean = getConf(CHECKPOINT_RENAMEDFILE_CHECK_ENABLED)
 
   def parquetFilterPushDown: Boolean = getConf(PARQUET_FILTER_PUSHDOWN_ENABLED)
@@ -5663,6 +5872,8 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def dataFramePivotMaxValues: Int = getConf(DATAFRAME_PIVOT_MAX_VALUES)
 
+  def dataFrameTransposeMaxValues: Int = getConf(DATAFRAME_TRANSPOSE_MAX_VALUES)
+
   def runSQLonFile: Boolean = getConf(RUN_SQL_ON_FILES)
 
   def enableTwoLevelAggMap: Boolean = getConf(ENABLE_TWOLEVEL_AGG_MAP)
@@ -5797,12 +6008,19 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
 
   def pythonUDFWorkerFaulthandlerEnabled: Boolean = getConf(PYTHON_UDF_WORKER_FAULTHANLDER_ENABLED)
 
+  def pythonUDFArrowConcurrencyLevel: Option[Int] = getConf(PYTHON_UDF_ARROW_CONCURRENCY_LEVEL)
+
+  def pysparkPlotMaxRows: Int = getConf(PYSPARK_PLOT_MAX_ROWS)
+
   def arrowSparkREnabled: Boolean = getConf(ARROW_SPARKR_EXECUTION_ENABLED)
 
   def arrowPySparkFallbackEnabled: Boolean = getConf(ARROW_PYSPARK_FALLBACK_ENABLED)
 
   def arrowMaxRecordsPerBatch: Int = getConf(ARROW_EXECUTION_MAX_RECORDS_PER_BATCH)
 
+  def arrowTransformWithStateInPandasMaxRecordsPerBatch: Int =
+    getConf(ARROW_TRANSFORM_WITH_STATE_IN_PANDAS_MAX_RECORDS_PER_BATCH)
+
   def arrowUseLargeVarTypes: Boolean = getConf(ARROW_EXECUTION_USE_LARGE_VAR_TYPES)
 
   def pandasUDFBufferSize: Int = getConf(PANDAS_UDF_BUFFER_SIZE)
@@ -5973,6 +6191,9 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def optimizeNullAwareAntiJoin: Boolean =
     getConf(SQLConf.OPTIMIZE_NULL_AWARE_ANTI_JOIN)
 
+  def legacyDuplicateBetweenInput: Boolean =
+    getConf(SQLConf.LEGACY_DUPLICATE_BETWEEN_INPUT)
+
   def legacyPathOptionBehavior: Boolean = getConf(SQLConf.LEGACY_PATH_OPTION_BEHAVIOR)
 
   def supportSecondOffsetFormat: Boolean = getConf(SQLConf.SUPPORT_SECOND_OFFSET_FORMAT)
@@ -6041,7 +6262,13 @@ class SQLConf extends Serializable with Logging with SqlApiConf {
   def legacyRaiseErrorWithoutErrorClass: Boolean =
     getConf(SQLConf.LEGACY_RAISE_ERROR_WITHOUT_ERROR_CLASS)
 
-  def stackTracesInDataFrameContext: Int = getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT)
+  override def stackTracesInDataFrameContext: Int =
+    getConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT)
+
+  def dataFrameQueryContextEnabled: Boolean = getConf(SQLConf.DATA_FRAME_QUERY_CONTEXT_ENABLED)
+
+  override def legacyAllowUntypedScalaUDFs: Boolean =
+    getConf(SQLConf.LEGACY_ALLOW_UNTYPED_SCALA_UDF)
 
   def legacyJavaCharsets: Boolean = getConf(SQLConf.LEGACY_JAVA_CHARSETS)
 
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
index aaeac8ce6fcec..cd17a63e5d433 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/StaticSQLConf.scala
@@ -135,6 +135,13 @@ object StaticSQLConf {
     .toSequence
     .createOptional
 
+  val LOAD_SESSION_EXTENSIONS_FROM_CLASSPATH =
+    buildStaticConf("spark.sql.extensions.test.loadFromCp")
+      .doc("Flag that determines if we should load extensions from the classpath using the " +
+        "SparkSessionExtensionsProvider mechanism. This is a test only flag.")
+      .booleanConf
+      .createWithDefault(true)
+
   val SPARK_CACHE_SERIALIZER = buildStaticConf("spark.sql.cache.serializer")
     .doc("The name of a class that implements " +
       "org.apache.spark.sql.columnar.CachedBatchSerializer. It will be used to " +
@@ -170,6 +177,16 @@ object StaticSQLConf {
       .intConf
       .createWithDefault(1000)
 
+  val SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD =
+    buildStaticConf("spark.sql.shuffleExchange.maxThreadThreshold")
+      .internal()
+      .doc("The maximum degree of parallelism for doing preparation of shuffle exchange, " +
+        "which includes subquery execution, file listing, etc.")
+      .version("4.0.0")
+      .intConf
+      .checkValue(thres => thres > 0 && thres <= 1024, "The threshold must be in (0,1024].")
+      .createWithDefault(1024)
+
   val BROADCAST_EXCHANGE_MAX_THREAD_THRESHOLD =
     buildStaticConf("spark.sql.broadcastExchange.maxThreadThreshold")
       .internal()
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
index 2a67ffc4bbef5..47889410561e3 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ColumnImpl.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.internal.connector
 
-import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue}
+import org.apache.spark.sql.connector.catalog.{Column, ColumnDefaultValue, IdentityColumnSpec}
 import org.apache.spark.sql.types.DataType
 
 // The standard concrete implementation of data source V2 column.
@@ -28,4 +28,5 @@ case class ColumnImpl(
     comment: String,
     defaultValue: ColumnDefaultValue,
     generationExpression: String,
+    identityColumnSpec: IdentityColumnSpec,
     metadataInJSON: String) extends Column
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ProcedureParameterImpl.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ProcedureParameterImpl.scala
new file mode 100644
index 0000000000000..01ea48af1537c
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/connector/ProcedureParameterImpl.scala
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal.connector
+
+import org.apache.spark.sql.connector.catalog.procedures.ProcedureParameter
+import org.apache.spark.sql.connector.catalog.procedures.ProcedureParameter.Mode
+import org.apache.spark.sql.types.DataType
+
+case class ProcedureParameterImpl(
+    mode: Mode,
+    name: String,
+    dataType: DataType,
+    defaultValueExpression: String,
+    comment: String) extends ProcedureParameter
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
index 19b1b5d8af267..ccd9ed209f92a 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/numerics.scala
@@ -24,27 +24,27 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.types.Decimal.DecimalIsConflicted
 
 private[sql] object ByteExactNumeric extends ByteIsIntegral with Ordering.ByteOrdering {
-  private def checkOverflow(res: Int, x: Byte, y: Byte, op: String): Unit = {
+  private def checkOverflow(res: Int, x: Byte, y: Byte, op: String, hint: String): Unit = {
     if (res > Byte.MaxValue || res < Byte.MinValue) {
-      throw QueryExecutionErrors.binaryArithmeticCauseOverflowError(x, op, y)
+      throw QueryExecutionErrors.binaryArithmeticCauseOverflowError(x, op, y, hint)
     }
   }
 
   override def plus(x: Byte, y: Byte): Byte = {
     val tmp = x + y
-    checkOverflow(tmp, x, y, "+")
+    checkOverflow(tmp, x, y, "+", "try_add")
     tmp.toByte
   }
 
   override def minus(x: Byte, y: Byte): Byte = {
     val tmp = x - y
-    checkOverflow(tmp, x, y, "-")
+    checkOverflow(tmp, x, y, "-", "try_subtract")
     tmp.toByte
   }
 
   override def times(x: Byte, y: Byte): Byte = {
     val tmp = x * y
-    checkOverflow(tmp, x, y, "*")
+    checkOverflow(tmp, x, y, "*", "try_multiply")
     tmp.toByte
   }
 
@@ -55,27 +55,32 @@ private[sql] object ByteExactNumeric extends ByteIsIntegral with Ordering.ByteOr
 
 
 private[sql] object ShortExactNumeric extends ShortIsIntegral with Ordering.ShortOrdering {
-  private def checkOverflow(res: Int, x: Short, y: Short, op: String): Unit = {
+  private def checkOverflow(
+      res: Int,
+      x: Short,
+      y: Short,
+      op: String,
+      hint: String = "unknown_function"): Unit = {
     if (res > Short.MaxValue || res < Short.MinValue) {
-      throw QueryExecutionErrors.binaryArithmeticCauseOverflowError(x, op, y)
+      throw QueryExecutionErrors.binaryArithmeticCauseOverflowError(x, op, y, hint)
     }
   }
 
   override def plus(x: Short, y: Short): Short = {
     val tmp = x + y
-    checkOverflow(tmp, x, y, "+")
+    checkOverflow(tmp, x, y, "+", "try_add")
     tmp.toShort
   }
 
   override def minus(x: Short, y: Short): Short = {
     val tmp = x - y
-    checkOverflow(tmp, x, y, "-")
+    checkOverflow(tmp, x, y, "-", "try_subtract")
     tmp.toShort
   }
 
   override def times(x: Short, y: Short): Short = {
     val tmp = x * y
-    checkOverflow(tmp, x, y, "*")
+    checkOverflow(tmp, x, y, "*", "try_multiply")
     tmp.toShort
   }
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/TestThrowExceptionMethod.java b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/TestThrowExceptionMethod.java
new file mode 100644
index 0000000000000..e74989021ea51
--- /dev/null
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/TestThrowExceptionMethod.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.catalyst.expressions;
+
+import java.io.IOException;
+import java.io.Serializable;
+
+public class TestThrowExceptionMethod implements Serializable {
+
+  public int invoke(int i) throws IOException {
+    if (i != 0) {
+      return i * 2;
+    } else {
+      throw new IOException("Invoke the method that throw IOException");
+    }
+  }
+
+  public static int staticInvoke(int i) throws IOException {
+    if (i != 0) {
+      return i * 2;
+    } else {
+      throw new IOException("StaticInvoke the method that throw IOException");
+    }
+  }
+}
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
index 0db155e88aea5..339f16407ae60 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
+++ b/sql/catalyst/src/test/java/org/apache/spark/sql/connector/catalog/CatalogLoadingSuite.java
@@ -80,7 +80,7 @@ public void testLoadWithoutConfig() {
     SparkException exc = Assertions.assertThrows(CatalogNotFoundException.class,
         () -> Catalogs.load("missing", conf));
 
-    Assertions.assertEquals(exc.getErrorClass(), "CATALOG_NOT_FOUND");
+    Assertions.assertEquals(exc.getCondition(), "CATALOG_NOT_FOUND");
     Assertions.assertEquals(exc.getMessageParameters().get("catalogName"), "`missing`");
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
index 3e72dc7da24b7..49138532616e7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowJsonSuite.scala
@@ -17,7 +17,7 @@
 package org.apache.spark.sql
 
 import java.sql.{Date, Timestamp}
-import java.time.LocalDate
+import java.time.{LocalDate, LocalDateTime}
 
 import org.json4s.JsonAST.{JArray, JBool, JDecimal, JDouble, JLong, JNull, JObject, JString, JValue}
 
@@ -84,6 +84,7 @@ class RowJsonSuite extends SparkFunSuite {
     Timestamp.valueOf("2017-05-30 10:22:03.00").toInstant,
     TimestampType,
     JString("2017-05-30 10:22:03"))
+  testJson(LocalDateTime.of(2018, 5, 14, 12, 13), TimestampNTZType, JString("2018-05-14 12:13:00"))
 
   // Complex types
   testJson(
@@ -136,7 +137,7 @@ class RowJsonSuite extends SparkFunSuite {
           new StructType().add("a", ObjectType(classOf[(Int, Int)])))
         row.jsonValue
       },
-      errorClass = "FAILED_ROW_TO_JSON",
+      condition = "FAILED_ROW_TO_JSON",
       parameters = Map(
         "value" -> toSQLValue("(1,2)"),
         "class" -> "class scala.Tuple2$mcII$sp",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
index dd4d1e5b9d461..732d03c146bfc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RowTest.scala
@@ -24,7 +24,7 @@ import org.scalatest.funspec.AnyFunSpec
 import org.scalatest.matchers.must.Matchers
 import org.scalatest.matchers.should.Matchers._
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkIllegalArgumentException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericRow, GenericRowWithSchema}
 import org.apache.spark.sql.types._
@@ -87,8 +87,9 @@ class RowTest extends AnyFunSpec with Matchers {
       sampleRowWithoutCol3.getValuesMap[String](List("col1", "col2")) shouldBe expected
     }
 
-    it("getAs() on type extending AnyVal throws an exception when accessing field that is null") {
-      intercept[SparkException] {
+    it("getAnyValAs() on type extending AnyVal throws an exception when accessing " +
+      "field that is null") {
+      intercept[SparkRuntimeException] {
         sampleRowWithoutCol3.getInt(sampleRowWithoutCol3.fieldIndex("col3"))
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
index 562aac766fc33..7572843f44a19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/CatalystTypeConvertersSuite.scala
@@ -108,7 +108,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         CatalystTypeConverters.createToCatalystConverter(structType)("test")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3219",
+      condition = "_LEGACY_ERROR_TEMP_3219",
       parameters = Map(
         "other" -> "test",
         "otherClass" -> "java.lang.String",
@@ -121,7 +121,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         CatalystTypeConverters.createToCatalystConverter(mapType)("test")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3221",
+      condition = "_LEGACY_ERROR_TEMP_3221",
       parameters = Map(
         "other" -> "test",
         "otherClass" -> "java.lang.String",
@@ -135,7 +135,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         CatalystTypeConverters.createToCatalystConverter(arrayType)("test")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3220",
+      condition = "_LEGACY_ERROR_TEMP_3220",
       parameters = Map(
         "other" -> "test",
         "otherClass" -> "java.lang.String",
@@ -148,7 +148,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         CatalystTypeConverters.createToCatalystConverter(decimalType)("test")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3219",
+      condition = "_LEGACY_ERROR_TEMP_3219",
       parameters = Map(
         "other" -> "test",
         "otherClass" -> "java.lang.String",
@@ -160,7 +160,7 @@ class CatalystTypeConvertersSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         CatalystTypeConverters.createToCatalystConverter(StringType)(0.1)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3219",
+      condition = "_LEGACY_ERROR_TEMP_3219",
       parameters = Map(
         "other" -> "0.1",
         "otherClass" -> "java.lang.Double",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
index daa8d12613f2c..a09dadbcd4816 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ScalaReflectionSuite.scala
@@ -494,7 +494,7 @@ class ScalaReflectionSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         schemaFor[TraitProductWithoutCompanion]
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2144",
+      condition = "_LEGACY_ERROR_TEMP_2144",
       parameters = Map("tpe" -> "org.apache.spark.sql.catalyst.TraitProductWithoutCompanion"))
   }
 
@@ -503,7 +503,7 @@ class ScalaReflectionSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         schemaFor[TraitProductWithNoConstructorCompanion]
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2144",
+      condition = "_LEGACY_ERROR_TEMP_2144",
       parameters = Map("tpe" ->
         "org.apache.spark.sql.catalyst.TraitProductWithNoConstructorCompanion"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
index 8a71496607466..fc5d39fd9c2bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/ShuffleSpecSuite.scala
@@ -474,7 +474,7 @@ class ShuffleSpecSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkUnsupportedOperationException] {
         RangeShuffleSpec(10, distribution).createPartitioning(distribution.clustering)
       },
-      errorClass = "UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+      condition = "UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
       parameters = Map(
         "methodName" -> "createPartitioning$",
         "className" -> "org.apache.spark.sql.catalyst.plans.physical.ShuffleSpec"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
index 6c6414dd190de..70cc50a23a6a2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisErrorSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
@@ -110,14 +110,15 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     }
   }
 
-  def errorClassTest(
+  def errorConditionTest(
       name: String,
       plan: LogicalPlan,
-      errorClass: String,
+      condition: String,
       messageParameters: Map[String, String],
       caseSensitive: Boolean = true): Unit = {
     test(name) {
-      assertAnalysisErrorClass(plan, errorClass, messageParameters, caseSensitive = caseSensitive)
+      assertAnalysisErrorCondition(
+        plan, condition, messageParameters, caseSensitive = caseSensitive)
     }
   }
 
@@ -134,10 +135,10 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     testRelation.select(ScalarSubquery(LocalRelation()).as("a")),
     "Scalar subquery must return only one column, but got 0" :: Nil)
 
-  errorClassTest(
+  errorConditionTest(
     "single invalid type, single arg",
     testRelation.select(TestFunction(dateLit :: Nil, IntegerType :: Nil).as("a")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"testfunction(NULL)\"",
       "paramIndex" -> "first",
@@ -145,11 +146,11 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "inputType" -> "\"DATE\"",
       "requiredType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "single invalid type, second arg",
     testRelation.select(
       TestFunction(dateLit :: dateLit :: Nil, DateType :: IntegerType :: Nil).as("a")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"testfunction(NULL, NULL)\"",
       "paramIndex" -> "second",
@@ -157,11 +158,11 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "inputType" -> "\"DATE\"",
       "requiredType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "multiple invalid type",
     testRelation.select(
       TestFunction(dateLit :: dateLit :: Nil, IntegerType :: IntegerType :: Nil).as("a")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"testfunction(NULL, NULL)\"",
       "paramIndex" -> "first",
@@ -169,17 +170,17 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "inputType" -> "\"DATE\"",
       "requiredType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-44477: type check failure",
     testRelation.select(
       TestFunctionWithTypeCheckFailure(dateLit :: Nil, BinaryType :: Nil).as("a")),
-    errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
+    condition = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
     messageParameters = Map(
       "sqlExpr" -> "\"testfunctionwithtypecheckfailure(NULL)\"",
       "msg" -> "Expression must be a binary",
       "hint" -> ""))
 
-  errorClassTest(
+  errorConditionTest(
     "invalid window function",
     testRelation2.select(
       WindowExpression(
@@ -188,10 +189,10 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW",
+    condition = "UNSUPPORTED_EXPR_FOR_WINDOW",
     messageParameters = Map("sqlExpr" -> "\"0\""))
 
-  errorClassTest(
+  errorConditionTest(
     "distinct aggregate function in window",
     testRelation2.select(
       WindowExpression(
@@ -200,7 +201,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    errorClass = "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED",
+    condition = "DISTINCT_WINDOW_FUNCTION_UNSUPPORTED",
     messageParameters = Map("windowExpr" ->
       s"""
          |"count(DISTINCT b) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST
@@ -221,9 +222,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   )
 
   test("distinct function") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT hex(DISTINCT a) FROM TaBlE"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("hex"),
         "syntax" -> toSQLStmt("DISTINCT")),
@@ -231,9 +232,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("non aggregate function with filter predicate") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT hex(a) FILTER (WHERE c = 1) FROM TaBlE2"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("hex"),
         "syntax" -> toSQLStmt("FILTER CLAUSE")),
@@ -241,9 +242,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("distinct window function") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT percent_rank(DISTINCT a) OVER () FROM TaBlE"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("percent_rank"),
         "syntax" -> toSQLStmt("DISTINCT")),
@@ -251,21 +252,40 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("window function with filter predicate") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan(
         "SELECT percent_rank(a) FILTER (WHERE c > 1) OVER () FROM TaBlE2"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("percent_rank"),
         "syntax" -> toSQLStmt("FILTER CLAUSE")),
       Array(ExpectedContext("percent_rank(a) FILTER (WHERE c > 1) OVER ()", 7, 50)))
   }
 
+  test("window specification error") {
+    assertAnalysisErrorCondition(
+      inputPlan = CatalystSqlParser.parsePlan(
+        """
+          |WITH sample_data AS (
+          |    SELECT 1 AS a, 10 AS b UNION ALL
+          |    SELECT 2 AS a, 20 AS b
+          |)
+          |SELECT
+          |    AVG(a) OVER (b) AS avg_a
+          |FROM sample_data
+          |GROUP BY a, b;
+          |""".stripMargin),
+      expectedErrorCondition = "MISSING_WINDOW_SPECIFICATION",
+      expectedMessageParameters = Map(
+        "windowName" -> "b",
+        "docroot" -> SPARK_DOC_ROOT))
+  }
+
   test("higher order function with filter predicate") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) " +
         "FILTER (WHERE c > 1)"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("aggregate"),
         "syntax" -> toSQLStmt("FILTER CLAUSE")),
@@ -274,9 +294,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("function don't support ignore nulls") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT hex(a) IGNORE NULLS FROM TaBlE2"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("hex"),
         "syntax" -> toSQLStmt("IGNORE NULLS")),
@@ -284,9 +304,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("some window function don't support ignore nulls") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT percent_rank(a) IGNORE NULLS FROM TaBlE2"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("percent_rank"),
         "syntax" -> toSQLStmt("IGNORE NULLS")),
@@ -294,9 +314,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("aggregate function don't support ignore nulls") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan("SELECT count(a) IGNORE NULLS FROM TaBlE2"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("count"),
         "syntax" -> toSQLStmt("IGNORE NULLS")),
@@ -304,10 +324,10 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
   }
 
   test("higher order function don't support ignore nulls") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CatalystSqlParser.parsePlan(
         "SELECT aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) IGNORE NULLS"),
-      expectedErrorClass = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
+      expectedErrorCondition = "INVALID_SQL_SYNTAX.FUNCTION_WITH_UNSUPPORTED_SYNTAX",
       expectedMessageParameters = Map(
         "prettyName" -> toSQLId("aggregate"),
         "syntax" -> toSQLStmt("IGNORE NULLS")),
@@ -315,11 +335,11 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
         "aggregate(array(1, 2, 3), 0, (acc, x) -> acc + x) IGNORE NULLS", 7, 68)))
   }
 
-  errorClassTest(
+  errorConditionTest(
     name = "nested aggregate functions",
     testRelation.groupBy($"a")(
       Max(Count(Literal(1)).toAggregateExpression()).toAggregateExpression()),
-    errorClass = "NESTED_AGGREGATE_FUNCTION",
+    condition = "NESTED_AGGREGATE_FUNCTION",
     messageParameters = Map.empty
   )
 
@@ -334,7 +354,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           SpecifiedWindowFrame(RangeFrame, Literal(1), Literal(2)))).as("window")),
     "Cannot specify window frame for lead function" :: Nil)
 
-  errorClassTest(
+  errorConditionTest(
     "the offset of nth_value window function is negative or zero",
     testRelation2.select(
       WindowExpression(
@@ -343,14 +363,14 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
-    errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+    condition = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
     messageParameters = Map(
       "sqlExpr" -> "\"nth_value(b, 0)\"",
       "exprName" -> "offset",
       "valueRange" -> "(0, 9223372036854775807]",
       "currentValue" -> "0L"))
 
-  errorClassTest(
+  errorConditionTest(
     "the offset of nth_value window function is not int literal",
     testRelation2.select(
       WindowExpression(
@@ -359,7 +379,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           SpecifiedWindowFrame(RowFrame, Literal(0), Literal(0)))).as("window")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"nth_value(b, true)\"",
       "paramIndex" -> "second",
@@ -367,7 +387,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "inputType" -> "\"BOOLEAN\"",
       "requiredType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "the buckets of ntile window function is not foldable",
     testRelation2.select(
       WindowExpression(
@@ -376,7 +396,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+    condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"ntile(99.9)\"",
       "paramIndex" -> "first",
@@ -385,7 +405,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "requiredType" -> "\"INT\""))
 
 
-  errorClassTest(
+  errorConditionTest(
     "the buckets of ntile window function is not int literal",
     testRelation2.select(
       WindowExpression(
@@ -394,20 +414,20 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           UnresolvedAttribute("a") :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+    condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
     messageParameters = Map(
       "sqlExpr" -> "\"ntile(b)\"",
       "inputName" -> "`buckets`",
       "inputExpr" -> "\"b\"",
       "inputType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "unresolved attributes",
     testRelation.select($"abcd"),
     "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     Map("objectName" -> "`abcd`", "proposal" -> "`a`"))
 
-  errorClassTest(
+  errorConditionTest(
     "unresolved attributes with a generated name",
     testRelation2.groupBy($"a")(max($"b"))
       .where(sum($"b") > 0)
@@ -415,41 +435,41 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     Map("objectName" -> "`havingCondition`", "proposal" -> "`max(b)`"))
 
-  errorClassTest(
+  errorConditionTest(
     "unresolved star expansion in max",
     testRelation2.groupBy($"a")(sum(UnresolvedStar(None))),
-    errorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+    condition = "INVALID_USAGE_OF_STAR_OR_REGEX",
     messageParameters = Map("elem" -> "'*'", "prettyName" -> "expression `sum`")
   )
 
-  errorClassTest(
+  errorConditionTest(
     "sorting by unsupported column types",
     mapRelation.orderBy($"map".asc),
-    errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+    condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
     messageParameters = Map(
       "sqlExpr" -> "\"map ASC NULLS FIRST\"",
       "functionName" -> "`sortorder`",
       "dataType" -> "\"MAP<INT, INT>\""))
 
-  errorClassTest(
+  errorConditionTest(
     "sorting by attributes are not from grouping expressions",
     testRelation2.groupBy($"a", $"c")($"a", $"c", count($"a").as("a3")).orderBy($"b".asc),
     "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     Map("objectName" -> "`b`", "proposal" -> "`a`, `c`, `a3`"))
 
-  errorClassTest(
+  errorConditionTest(
     "non-boolean filters",
     testRelation.where(Literal(1)),
-    errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+    condition = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
     messageParameters = Map("sqlExpr" -> "\"1\"", "filter" -> "\"1\"", "type" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "non-boolean join conditions",
     testRelation.join(testRelation, condition = Some(Literal(1))),
-    errorClass = "JOIN_CONDITION_IS_NOT_BOOLEAN_TYPE",
+    condition = "JOIN_CONDITION_IS_NOT_BOOLEAN_TYPE",
     messageParameters = Map("joinCondition" -> "\"1\"", "conditionType" -> "\"INT\""))
 
-  errorClassTest(
+  errorConditionTest(
     "missing group by",
     testRelation2.groupBy($"a")($"b"),
     "MISSING_AGGREGATION",
@@ -458,27 +478,27 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "expressionAnyValue" -> "\"any_value(b)\"")
   )
 
-  errorClassTest(
+  errorConditionTest(
     "ambiguous field",
     nestedRelation.select($"top.duplicateField"),
-    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
     messageParameters = Map(
       "field" -> "`duplicateField`",
       "count" -> "2"),
     caseSensitive = false
   )
 
-  errorClassTest(
+  errorConditionTest(
     "ambiguous field due to case insensitivity",
     nestedRelation.select($"top.differentCase"),
-    errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+    condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
     messageParameters = Map(
       "field" -> "`differentCase`",
       "count" -> "2"),
     caseSensitive = false
   )
 
-  errorClassTest(
+  errorConditionTest(
     "missing field",
     nestedRelation2.select($"top.c"),
     "FIELD_NOT_FOUND",
@@ -490,7 +510,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       val analyzer = getAnalyzer
       analyzer.checkAnalysis(analyzer.execute(UnresolvedTestPlan()))
     },
-    errorClass = "INTERNAL_ERROR",
+    condition = "INTERNAL_ERROR",
     parameters = Map("message" -> "Found the unresolved operator: 'UnresolvedTestPlan"))
 
   errorTest(
@@ -541,14 +561,14 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     testRelation3.except(testRelation4, isAll = false),
     "except" :: "compatible column types" :: "map" :: "decimal" :: Nil)
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-9955: correct error message for aggregate",
     // When parse SQL string, we will wrap aggregate expressions with UnresolvedAlias.
     testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
     "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     Map("objectName" -> "`bad_column`", "proposal" -> "`a`, `c`, `d`, `b`, `e`"))
 
-  errorClassTest(
+  errorConditionTest(
     "slide duration greater than window in time window",
     testRelation2.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "2 second", "0 second").as("window")),
@@ -563,7 +583,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "start time greater than slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "1 minute").as("window")),
@@ -578,7 +598,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "start time equal to slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "1 second").as("window")),
@@ -593,7 +613,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-21590: absolute value of start time greater than slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "-1 minute").as("window")),
@@ -608,7 +628,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-21590: absolute value of start time equal to slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "1 second", "-1 second").as("window")),
@@ -623,7 +643,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "negative window duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "-1 second", "1 second", "0 second").as("window")),
@@ -636,7 +656,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "zero window duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "0 second", "1 second", "0 second").as("window")),
@@ -649,7 +669,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "negative slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "-1 second", "0 second").as("window")),
@@ -662,7 +682,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "zero slide duration in time window",
     testRelation.select(
       TimeWindow(Literal("2016-01-01 01:01:01"), "1 second", "0 second", "0 second").as("window")),
@@ -714,7 +734,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     "The generator is not supported: outside the SELECT clause, found: Sort" :: Nil
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated limit class must not be string",
     testRelation.limit(Literal(UTF8String.fromString("abc"), StringType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
@@ -725,7 +745,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated limit class must not be long",
     testRelation.limit(Literal(10L, LongType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
@@ -736,7 +756,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated limit class must not be null",
     testRelation.limit(Literal(null, IntegerType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.IS_NULL",
@@ -746,7 +766,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "num_rows in limit clause must be equal to or greater than 0",
     listRelation.limit(-1),
     "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
@@ -757,7 +777,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated offset class must not be string",
     testRelation.offset(Literal(UTF8String.fromString("abc"), StringType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
@@ -768,7 +788,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated offset class must not be long",
     testRelation.offset(Literal(10L, LongType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.DATA_TYPE",
@@ -779,7 +799,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "an evaluated offset class must not be null",
     testRelation.offset(Literal(null, IntegerType)),
     "INVALID_LIMIT_LIKE_EXPRESSION.IS_NULL",
@@ -789,7 +809,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "num_rows in offset clause must be equal to or greater than 0",
     testRelation.offset(-1),
     "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
@@ -800,7 +820,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     )
   )
 
-  errorClassTest(
+  errorConditionTest(
     "the sum of num_rows in limit clause and num_rows in offset clause less than Int.MaxValue",
     testRelation.offset(Literal(2000000000, IntegerType)).limit(Literal(1000000000, IntegerType)),
     "SUM_OF_LIMIT_AND_OFFSET_EXCEEDS_MAX_INT",
@@ -814,14 +834,14 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       """"explode(array(min(a)))", "explode(array(max(a)))"""" :: Nil
   )
 
-  errorClassTest(
+  errorConditionTest(
     "EXEC IMMEDIATE - nested execute immediate not allowed",
     CatalystSqlParser.parsePlan("EXECUTE IMMEDIATE 'EXECUTE IMMEDIATE \\\'SELECT 42\\\''"),
     "NESTED_EXECUTE_IMMEDIATE",
     Map(
       "sqlString" -> "EXECUTE IMMEDIATE 'SELECT 42'"))
 
-  errorClassTest(
+  errorConditionTest(
     "EXEC IMMEDIATE - both positional and named used",
     CatalystSqlParser.parsePlan("EXECUTE IMMEDIATE 'SELECT 42 where ? = :first'" +
       " USING 1, 2 as first"),
@@ -834,9 +854,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       scala.util.Right(UnresolvedAttribute("testVarA")),
       Seq(UnresolvedAttribute("testVarA")))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = execImmediatePlan,
-      expectedErrorClass = "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE",
+      expectedErrorCondition = "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE",
       expectedMessageParameters = Map(
         "varType" -> "\"INT\""
       ))
@@ -848,9 +868,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       scala.util.Right(UnresolvedAttribute("testVarNull")),
       Seq(UnresolvedAttribute("testVarNull")))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = execImmediatePlan,
-      expectedErrorClass = "NULL_QUERY_STRING_EXECUTE_IMMEDIATE",
+      expectedErrorCondition = "NULL_QUERY_STRING_EXECUTE_IMMEDIATE",
       expectedMessageParameters = Map("varName" -> "`testVarNull`"))
   }
 
@@ -861,9 +881,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       scala.util.Left("SELECT ?"),
       Seq.empty)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = execImmediatePlan,
-      expectedErrorClass = "UNSUPPORTED_EXPR_FOR_PARAMETER",
+      expectedErrorCondition = "UNSUPPORTED_EXPR_FOR_PARAMETER",
       expectedMessageParameters = Map(
         "invalidExprSql" -> "\"nanvl(1, 1)\""
       ))
@@ -875,9 +895,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       scala.util.Left("SELECT :first"),
       Seq.empty)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = execImmediateSetVariablePlan,
-      expectedErrorClass = "ALL_PARAMETERS_MUST_BE_NAMED",
+      expectedErrorCondition = "ALL_PARAMETERS_MUST_BE_NAMED",
       expectedMessageParameters = Map(
         "exprs" -> "\"2\", \"3\""
       ))
@@ -889,9 +909,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       scala.util.Left("SET VAR testVarA = 1"),
       Seq(UnresolvedAttribute("testVarA")))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = execImmediateSetVariablePlan,
-      expectedErrorClass = "INVALID_STATEMENT_FOR_EXECUTE_INTO",
+      expectedErrorCondition = "INVALID_STATEMENT_FOR_EXECUTE_INTO",
       expectedMessageParameters = Map(
         "sqlString" -> "SET VAR TESTVARA = 1"
       ))
@@ -912,9 +932,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
 
     assert(plan.resolved)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = plan,
-      expectedErrorClass = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
+      expectedErrorCondition = "MISSING_ATTRIBUTES.RESOLVED_ATTRIBUTE_APPEAR_IN_OPERATION",
       expectedMessageParameters = Map(
         "missingAttributes" -> "\"a\", \"c\"",
         "input" -> "\"a\"",
@@ -930,7 +950,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       exception = intercept[SparkException] {
         SimpleAnalyzer.checkAnalysis(join)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         """
           |Failure when resolving conflicting references in Join:
@@ -947,7 +967,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       exception = intercept[SparkException] {
         SimpleAnalyzer.checkAnalysis(intersect)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         """
           |Failure when resolving conflicting references in Intersect All:
@@ -964,7 +984,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       exception = intercept[SparkException] {
         SimpleAnalyzer.checkAnalysis(except)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         """
           |Failure when resolving conflicting references in Except All:
@@ -984,7 +1004,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       exception = intercept[SparkException] {
         SimpleAnalyzer.checkAnalysis(asOfJoin)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         """
           |Failure when resolving conflicting references in AsOfJoin:
@@ -1040,9 +1060,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           AttributeReference("a", IntegerType)(exprId = ExprId(2)),
           AttributeReference("b", IntegerType)(exprId = ExprId(1))))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = plan,
-      expectedErrorClass = "NESTED_AGGREGATE_FUNCTION",
+      expectedErrorCondition = "NESTED_AGGREGATE_FUNCTION",
       expectedMessageParameters = Map.empty
     )
   }
@@ -1063,9 +1083,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       joinType = Cross,
       condition = Some($"b" === $"d"))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = plan2,
-      expectedErrorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      expectedErrorCondition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       expectedMessageParameters = Map(
         "functionName" -> "`=`",
         "dataType" -> "\"MAP<STRING, STRING>\"",
@@ -1126,8 +1146,8 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     val a = AttributeReference("a", IntegerType)()
     val b = AttributeReference("b", IntegerType)()
     val plan = Filter($"a" === UnresolvedFunction("max", Seq(b), true), LocalRelation(a, b))
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "INVALID_WHERE_CONDITION",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "INVALID_WHERE_CONDITION",
       expectedMessageParameters = Map(
         "condition" -> "\"(a = max(DISTINCT b))\"",
         "expressionList" -> "max(DISTINCT b)"))
@@ -1141,8 +1161,8 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
         Project(
           Alias(Literal(1), "x")() :: Nil,
           UnresolvedRelation(TableIdentifier("t", Option("nonexist")))))))
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "TABLE_OR_VIEW_NOT_FOUND",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "TABLE_OR_VIEW_NOT_FOUND",
       Map("relationName" -> "`nonexist`.`t`"))
   }
 
@@ -1151,8 +1171,8 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       (Randn("a".attr), "\"randn(a)\"")).foreach {
       case (r, expectedArg) =>
         val plan = Project(Seq(r.as("r")), testRelation)
-        assertAnalysisErrorClass(plan,
-          expectedErrorClass = "SEED_EXPRESSION_IS_UNFOLDABLE",
+        assertAnalysisErrorCondition(plan,
+          expectedErrorCondition = "SEED_EXPRESSION_IS_UNFOLDABLE",
           expectedMessageParameters = Map(
             "seedExpr" -> "\"a\"",
             "exprWithSeed" -> expectedArg),
@@ -1165,8 +1185,8 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       Randn("a") -> ("\"randn(a)\"", "\"a\"", "\"STRING\"")
     ).foreach { case (r, (sqlExpr, inputSql, inputType)) =>
       val plan = Project(Seq(r.as("r")), testRelation)
-      assertAnalysisErrorClass(plan,
-        expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      assertAnalysisErrorCondition(plan,
+        expectedErrorCondition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         expectedMessageParameters = Map(
           "sqlExpr" -> sqlExpr,
           "paramIndex" -> "first",
@@ -1189,9 +1209,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
             t.as("t2")))
       ) :: Nil,
       sum($"c2").as("sum") :: Nil, t.as("t1"))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       plan,
-      expectedErrorClass =
+      expectedErrorCondition =
         "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
       expectedMessageParameters = Map.empty)
   }
@@ -1207,37 +1227,37 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           Filter($"t1.c1" === $"t2.c1",
             t.as("t2")))
       ).as("sub") :: Nil, t.as("t1"))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       plan,
-      expectedErrorClass =
+      expectedErrorCondition =
         "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
       expectedMessageParameters = Map("sqlExpr" -> "\"scalarsubquery(c1)\""))
   }
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-34920: error code to error message",
     testRelation2.where($"bad_column" > 1).groupBy($"a")(UnresolvedAlias(max($"b"))),
-    errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     messageParameters = Map(
       "objectName" -> "`bad_column`",
       "proposal" -> "`a`, `c`, `d`, `b`, `e`"))
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-39783: backticks in error message for candidate column with dots",
     // This selects a column that does not exist,
     // the error message suggest the existing column with correct backticks
     testRelation6.select($"`the`.`id`"),
-    errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     messageParameters = Map(
       "objectName" -> "`the`.`id`",
       "proposal" -> "`the.id`"))
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-39783: backticks in error message for candidate struct column",
     // This selects a column that does not exist,
     // the error message suggest the existing column with correct backticks
     nestedRelation2.select($"`top.aField`"),
-    errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+    condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
     messageParameters = Map(
       "objectName" -> "`top.aField`",
       "proposal" -> "`top`"))
@@ -1253,7 +1273,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       exception = intercept[SparkException] {
         SimpleAnalyzer.checkAnalysis(plan)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Hint not found: `some_random_hint_that_does_not_exist`"))
 
     // UnresolvedHint be removed by batch `Remove Unresolved Hints`
@@ -1272,9 +1292,9 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
       "Scalar subquery must return only one column, but got 2" :: Nil)
 
     // t2.* cannot be resolved and the error should be the initial analysis exception.
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Project(ScalarSubquery(t0.select(star("t2"))).as("sub") :: Nil, t1),
-      expectedErrorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+      expectedErrorCondition = "CANNOT_RESOLVE_STAR_EXPAND",
       expectedMessageParameters = Map("targetString" -> "`t2`", "columns" -> "")
     )
   }
@@ -1287,70 +1307,70 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     val t2 = LocalRelation(b, c).as("t2")
 
     // SELECT * FROM t1 WHERE a = (SELECT sum(c) FROM t2 WHERE t1.* = t2.b)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Filter(EqualTo(a, ScalarSubquery(t2.select(sum(c)).where(star("t1") === b))), t1),
-      expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
       expectedMessageParameters = Map("elem" -> "'*'", "prettyName" -> "expression `equalto`")
     )
 
     // SELECT * FROM t1 JOIN t2 ON (EXISTS (SELECT 1 FROM t2 WHERE t1.* = b))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       t1.join(t2, condition = Some(Exists(t2.select(1).where(star("t1") === b)))),
-      expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
       expectedMessageParameters = Map("elem" -> "'*'", "prettyName" -> "expression `equalto`")
     )
   }
 
   test("SPARK-36488: Regular expression expansion should fail with a meaningful message") {
     withSQLConf(SQLConf.SUPPORT_QUOTED_REGEX_COLUMN_NAME.key -> "true") {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(Divide(UnresolvedRegex(".?", None, false), "a")),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "regular expression '.?'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(
           Divide(UnresolvedRegex(".?", None, false), UnresolvedRegex(".*", None, false))),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "regular expressions '.?', '.*'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(
           Divide(UnresolvedRegex(".?", None, false), UnresolvedRegex(".?", None, false))),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "regular expression '.?'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(Divide(UnresolvedStar(None), "a")),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "'*'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(Divide(UnresolvedStar(None), UnresolvedStar(None))),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "'*'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(Divide(UnresolvedStar(None), UnresolvedRegex(".?", None, false))),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "'*' and regular expression '.?'",
           "prettyName" -> "expression `divide`")
       )
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         testRelation.select(Least(Seq(UnresolvedStar(None),
           UnresolvedRegex(".*", None, false), UnresolvedRegex(".?", None, false)))),
-        expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+        expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
         expectedMessageParameters = Map(
           "elem" -> "'*' and regular expressions '.*', '.?'",
           "prettyName" -> "expression `least`")
@@ -1358,7 +1378,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
     }
   }
 
-  errorClassTest(
+  errorConditionTest(
     "SPARK-47572: Enforce Window partitionSpec is orderable",
     testRelation2.select(
       WindowExpression(
@@ -1367,7 +1387,7 @@ class AnalysisErrorSuite extends AnalysisTest with DataTypeErrorsBase {
           CreateMap(Literal("key") :: UnresolvedAttribute("a") :: Nil) :: Nil,
           SortOrder(UnresolvedAttribute("b"), Ascending) :: Nil,
           UnspecifiedFrame)).as("window")),
-    errorClass = "EXPRESSION_TYPE_IS_NOT_ORDERABLE",
+    condition = "EXPRESSION_TYPE_IS_NOT_ORDERABLE",
     messageParameters = Map(
       "expr" -> "\"_w0\"",
       "exprType" -> "\"MAP<STRING, STRING>\""))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
index be256adbd8929..55f59f7a22574 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisExceptionPositionSuite.scala
@@ -48,7 +48,7 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
     verifyTableOrViewPosition("REFRESH TABLE unknown", "unknown")
     verifyTableOrViewPosition("SHOW COLUMNS FROM unknown", "unknown")
     // Special case where namespace is prepended to the table name.
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsePlan("SHOW COLUMNS FROM unknown IN db"),
       "TABLE_OR_VIEW_NOT_FOUND",
       Map("relationName" -> "`db`.`unknown`"),
@@ -94,7 +94,7 @@ class AnalysisExceptionPositionSuite extends AnalysisTest {
   private def verifyPosition(sql: String, table: String): Unit = {
     val startPos = sql.indexOf(table)
     assert(startPos != -1)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsePlan(sql),
       "TABLE_OR_VIEW_NOT_FOUND",
       Map("relationName" -> s"`$table`"),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index 62856a96f7ee8..939801e3f07af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.analysis
 
-import java.util.TimeZone
+import java.util.{TimeZone, UUID}
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -43,6 +43,7 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.connector.catalog.InMemoryTable
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
@@ -73,7 +74,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
             None,
             CaseInsensitiveStringMap.empty()).analyze
         },
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map("message" ->
           "Logical plan should not have output of char/varchar type.*\n"),
         matchPVals = true)
@@ -111,7 +112,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")))),
       Project(testRelation.output, testRelation))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Project(Seq(UnresolvedAttribute("tBl.a")),
         SubqueryAlias("TbL", UnresolvedRelation(TableIdentifier("TaBlE")))),
       "UNRESOLVED_COLUMN.WITH_SUGGESTION",
@@ -358,7 +359,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val plan = Project(Alias(In(Literal(null), Seq(Literal(true), Literal(1))), "a")() :: Nil,
       LocalRelation()
     )
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       plan,
       "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       Map(
@@ -554,7 +555,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     assertAnalysisSuccess(rangeWithAliases(3 :: Nil, "a" :: Nil))
     assertAnalysisSuccess(rangeWithAliases(1 :: 4 :: Nil, "b" :: Nil))
     assertAnalysisSuccess(rangeWithAliases(2 :: 6 :: 2 :: Nil, "c" :: Nil))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       rangeWithAliases(3 :: Nil, "a" :: "b" :: Nil),
       "NUM_TABLE_VALUE_ALIASES_MISMATCH",
       Map("funcName" -> "`range`", "aliasesNum" -> "2", "outColsNum" -> "1"))
@@ -568,12 +569,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       ).select(star())
     }
     assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       tableColumnsWithAliases("col1" :: Nil),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "1", "numTarget" -> "4")
     )
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "5", "numTarget" -> "4")
@@ -590,12 +591,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       ).select(star())
     }
     assertAnalysisSuccess(tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       tableColumnsWithAliases("col1" :: Nil),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "1", "numTarget" -> "4")
     )
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       tableColumnsWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "5", "numTarget" -> "4")
@@ -614,12 +615,12 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       ).select(star())
     }
     assertAnalysisSuccess(joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: Nil))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       joinRelationWithAliases("col1" :: Nil),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "1", "numTarget" -> "4")
     )
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       joinRelationWithAliases("col1" :: "col2" :: "col3" :: "col4" :: "col5" :: Nil),
         "ASSIGNMENT_ARITY_MISMATCH",
         Map("numExpr" -> "5", "numTarget" -> "4")
@@ -754,7 +755,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
   test("SPARK-34741: Avoid ambiguous reference in MergeIntoTable") {
     val cond = $"a" > 1
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       MergeIntoTable(
         testRelation,
         testRelation,
@@ -793,7 +794,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("CTE with non-existing column alias") {
-    assertAnalysisErrorClass(parsePlan("WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE y = 1"),
+    assertAnalysisErrorCondition(parsePlan("WITH t(x) AS (SELECT 1) SELECT * FROM t WHERE y = 1"),
       "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       Map("objectName" -> "`y`", "proposal" -> "`x`"),
       Array(ExpectedContext("y", 46, 46))
@@ -801,7 +802,8 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("CTE with non-matching column alias") {
-    assertAnalysisErrorClass(parsePlan("WITH t(x, y) AS (SELECT 1) SELECT * FROM t WHERE x = 1"),
+    assertAnalysisErrorCondition(
+      parsePlan("WITH t(x, y) AS (SELECT 1) SELECT * FROM t WHERE x = 1"),
       "ASSIGNMENT_ARITY_MISMATCH",
       Map("numExpr" -> "2", "numTarget" -> "1"),
       Array(ExpectedContext("t(x, y) AS (SELECT 1)", 5, 25))
@@ -809,7 +811,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-28251: Insert into non-existing table error message is user friendly") {
-    assertAnalysisErrorClass(parsePlan("INSERT INTO test VALUES (1)"),
+    assertAnalysisErrorCondition(parsePlan("INSERT INTO test VALUES (1)"),
       "TABLE_OR_VIEW_NOT_FOUND", Map("relationName" -> "`test`"),
       Array(ExpectedContext("test", 12, 15)))
   }
@@ -825,9 +827,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     // Bad name
     assert(!CollectMetrics("", sum :: Nil, testRelation, 0).resolved)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics("", sum :: Nil, testRelation, 0),
-      expectedErrorClass = "INVALID_OBSERVED_METRICS.MISSING_NAME",
+      expectedErrorCondition = "INVALID_OBSERVED_METRICS.MISSING_NAME",
       expectedMessageParameters = Map(
         "operator" ->
           "'CollectMetrics , [sum(a#x) AS sum#xL], 0\n+- LocalRelation <empty>, [a#x]\n")
@@ -852,37 +854,38 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     )
 
     // Unwrapped attribute
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics("event", a :: Nil, testRelation, 0),
-      expectedErrorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
+      expectedErrorCondition = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
       expectedMessageParameters = Map("expr" -> "\"a\"")
     )
 
     // Unwrapped non-deterministic expression
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics("event", Rand(10).as("rnd") :: Nil, testRelation, 0),
-      expectedErrorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_NON_DETERMINISTIC",
+      expectedErrorCondition =
+        "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_NON_DETERMINISTIC",
       expectedMessageParameters = Map("expr" -> "\"rand(10) AS rnd\"")
     )
 
     // Distinct aggregate
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics(
         "event",
         Sum(a).toAggregateExpression(isDistinct = true).as("sum") :: Nil,
         testRelation, 0),
-      expectedErrorClass =
+      expectedErrorCondition =
         "INVALID_OBSERVED_METRICS.AGGREGATE_EXPRESSION_WITH_DISTINCT_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(DISTINCT a) AS sum\"")
     )
 
     // Nested aggregate
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics(
         "event",
         Sum(Sum(a).toAggregateExpression()).toAggregateExpression().as("sum") :: Nil,
         testRelation, 0),
-      expectedErrorClass = "INVALID_OBSERVED_METRICS.NESTED_AGGREGATES_UNSUPPORTED",
+      expectedErrorCondition = "INVALID_OBSERVED_METRICS.NESTED_AGGREGATES_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(sum(a)) AS sum\"")
     )
 
@@ -891,9 +894,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       RowNumber(),
       WindowSpecDefinition(Nil, a.asc :: Nil,
         SpecifiedWindowFrame(RowFrame, UnboundedPreceding, CurrentRow)))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics("event", windowExpr.as("rn") :: Nil, testRelation, 0),
-      expectedErrorClass = "INVALID_OBSERVED_METRICS.WINDOW_EXPRESSIONS_UNSUPPORTED",
+      expectedErrorCondition = "INVALID_OBSERVED_METRICS.WINDOW_EXPRESSIONS_UNSUPPORTED",
       expectedMessageParameters = Map(
         "expr" ->
           """
@@ -914,22 +917,22 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       CollectMetrics("evt1", count :: Nil, testRelation, 0) :: Nil))
 
     // Same children, structurally different metrics - fail
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Union(
         CollectMetrics("evt1", count :: Nil, testRelation, 0) ::
           CollectMetrics("evt1", sum :: Nil, testRelation, 1) :: Nil),
-      expectedErrorClass = "DUPLICATED_METRICS_NAME",
+      expectedErrorCondition = "DUPLICATED_METRICS_NAME",
       expectedMessageParameters = Map("metricName" -> "evt1")
     )
 
     // Different children, same metrics - fail
     val b = $"b".string
     val tblB = LocalRelation(b)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Union(
         CollectMetrics("evt1", count :: Nil, testRelation, 0) ::
           CollectMetrics("evt1", count :: Nil, tblB, 1) :: Nil),
-      expectedErrorClass = "DUPLICATED_METRICS_NAME",
+      expectedErrorCondition = "DUPLICATED_METRICS_NAME",
       expectedMessageParameters = Map("metricName" -> "evt1")
     )
 
@@ -938,9 +941,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val query = Project(
       b :: ScalarSubquery(subquery, Nil).as("sum") :: Nil,
       CollectMetrics("evt1", count :: Nil, tblB, 1))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       query,
-      expectedErrorClass = "DUPLICATED_METRICS_NAME",
+      expectedErrorCondition = "DUPLICATED_METRICS_NAME",
       expectedMessageParameters = Map("metricName" -> "evt1")
     )
 
@@ -948,9 +951,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val sumWithFilter = sum.transform {
       case a: AggregateExpression => a.copy(filter = Some(true))
     }.asInstanceOf[NamedExpression]
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       CollectMetrics("evt1", sumWithFilter :: Nil, testRelation, 0),
-      expectedErrorClass =
+      expectedErrorCondition =
         "INVALID_OBSERVED_METRICS.AGGREGATE_EXPRESSION_WITH_FILTER_UNSUPPORTED",
       expectedMessageParameters = Map("expr" -> "\"sum(a) FILTER (WHERE true) AS sum\"")
     )
@@ -1061,9 +1064,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       AttributeReference("c", IntegerType)(),
       AttributeReference("d", TimestampType)())
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Union(firstTable, secondTable),
-      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedErrorCondition = "INCOMPATIBLE_COLUMN_TYPE",
       expectedMessageParameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "second",
@@ -1073,9 +1076,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         "dataType1" -> "\"TIMESTAMP\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Union(firstTable, thirdTable),
-      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedErrorCondition = "INCOMPATIBLE_COLUMN_TYPE",
       expectedMessageParameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "third",
@@ -1085,9 +1088,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         "dataType1" -> "\"TIMESTAMP\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Union(firstTable, fourthTable),
-      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedErrorCondition = "INCOMPATIBLE_COLUMN_TYPE",
       expectedMessageParameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "4th",
@@ -1097,9 +1100,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         "dataType1" -> "\"TIMESTAMP\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Except(firstTable, secondTable, isAll = false),
-      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedErrorCondition = "INCOMPATIBLE_COLUMN_TYPE",
       expectedMessageParameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "second",
@@ -1109,9 +1112,9 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         "dataType1" -> "\"TIMESTAMP\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       Intersect(firstTable, secondTable, isAll = false),
-      expectedErrorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      expectedErrorCondition = "INCOMPATIBLE_COLUMN_TYPE",
       expectedMessageParameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "second",
@@ -1123,21 +1126,21 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-31975: Throw user facing error when use WindowFunction directly") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = testRelation2.select(RowNumber()),
-      expectedErrorClass = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
+      expectedErrorCondition = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
       expectedMessageParameters = Map("funcName" -> "\"row_number()\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = testRelation2.select(Sum(RowNumber())),
-      expectedErrorClass = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
+      expectedErrorCondition = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
       expectedMessageParameters = Map("funcName" -> "\"row_number()\"")
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = testRelation2.select(RowNumber() + 1),
-      expectedErrorClass = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
+      expectedErrorCondition = "WINDOW_FUNCTION_WITHOUT_OVER_CLAUSE",
       expectedMessageParameters = Map("funcName" -> "\"row_number()\"")
     )
   }
@@ -1296,7 +1299,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         |    ORDER BY grouping__id > 0
       """.stripMargin), false)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsePlan(
         """
           |SELECT grouping__id FROM (
@@ -1327,7 +1330,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
         |ORDER BY c.x
         |""".stripMargin))
 
-    assertAnalysisErrorClass(parsePlan(
+    assertAnalysisErrorCondition(parsePlan(
      """
         |SELECT c.x
         |FROM VALUES NAMED_STRUCT('x', 'A', 'y', 1), NAMED_STRUCT('x', 'A', 'y', 2) AS t(c)
@@ -1341,7 +1344,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-38118: Func(wrong_type) in the HAVING clause should throw data mismatch error") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsePlan(
         s"""
            |WITH t as (SELECT true c)
@@ -1349,7 +1352,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
            |FROM t
            |GROUP BY t.c
            |HAVING mean(t.c) > 0d""".stripMargin),
-      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedErrorCondition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       expectedMessageParameters = Map(
         "sqlExpr" -> "\"mean(c)\"",
         "paramIndex" -> "first",
@@ -1360,7 +1363,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       caseSensitive = false
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsePlan(
         s"""
            |WITH t as (SELECT true c, false d)
@@ -1368,7 +1371,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
            |FROM t
            |GROUP BY t.c, t.d
            |HAVING mean(c) > 0d""".stripMargin),
-      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedErrorCondition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       expectedMessageParameters = Map(
         "sqlExpr" -> "\"mean(c)\"",
         "paramIndex" -> "first",
@@ -1378,7 +1381,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       queryContext = Array(ExpectedContext("mean(c)", 91, 97)),
       caseSensitive = false)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsePlan(
         s"""
            |WITH t as (SELECT true c)
@@ -1386,7 +1389,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
            |FROM t
            |GROUP BY t.c
            |HAVING abs(t.c) > 0d""".stripMargin),
-      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedErrorCondition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       expectedMessageParameters = Map(
         "sqlExpr" -> "\"abs(c)\"",
         "paramIndex" -> "first",
@@ -1398,7 +1401,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       caseSensitive = false
     )
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsePlan(
         s"""
          |WITH t as (SELECT true c, false d)
@@ -1406,7 +1409,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
          |FROM t
          |GROUP BY t.c, t.d
          |HAVING abs(c) > 0d""".stripMargin),
-      expectedErrorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      expectedErrorCondition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       expectedMessageParameters = Map(
         "sqlExpr" -> "\"abs(c)\"",
         "paramIndex" -> "first",
@@ -1420,7 +1423,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-39354: should be [TABLE_OR_VIEW_NOT_FOUND]") {
-    assertAnalysisErrorClass(parsePlan(
+    assertAnalysisErrorCondition(parsePlan(
       s"""
          |WITH t1 as (SELECT 1 user_id, CAST("2022-06-02" AS DATE) dt)
          |SELECT *
@@ -1530,13 +1533,13 @@ class AnalysisSuite extends AnalysisTest with Matchers {
   }
 
   test("SPARK-41489: type of filter expression should be a bool") {
-    assertAnalysisErrorClass(parsePlan(
+    assertAnalysisErrorCondition(parsePlan(
       s"""
          |WITH t1 as (SELECT 1 user_id)
          |SELECT *
          |FROM t1
          |WHERE 'true'""".stripMargin),
-      expectedErrorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+      expectedErrorCondition = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
       expectedMessageParameters = Map(
         "sqlExpr" -> "\"true\"", "filter" -> "\"true\"", "type" -> "\"STRING\"")
       ,
@@ -1752,15 +1755,15 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(testRelation.select(ident2), testRelation.select($"a").analyze)
     }
     withClue("IDENTIFIER as table") {
-      val ident = PlanWithUnresolvedIdentifier(name, _ => testRelation)
+      val ident = new PlanWithUnresolvedIdentifier(name, _ => testRelation)
       checkAnalysis(ident.select($"a"), testRelation.select($"a").analyze)
-      val ident2 = PlanWithUnresolvedIdentifier(replaceable, _ => testRelation)
+      val ident2 = new PlanWithUnresolvedIdentifier(replaceable, _ => testRelation)
       checkAnalysis(ident2.select($"a"), testRelation.select($"a").analyze)
     }
   }
 
   test("SPARK-46064 Basic functionality of elimination for watermark node in batch query") {
-    val dfWithEventTimeWatermark = EventTimeWatermark($"ts",
+    val dfWithEventTimeWatermark = EventTimeWatermark(UUID.randomUUID(), $"ts",
       IntervalUtils.fromIntervalString("10 seconds"), batchRelationWithTs)
 
     val analyzed = getAnalyzer.executeAndCheck(dfWithEventTimeWatermark, new QueryPlanningTracker)
@@ -1773,7 +1776,7 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     "EventTimeWatermark changes the isStreaming flag during resolution") {
     // UnresolvedRelation which is batch initially and will be resolved as streaming
     val dfWithTempView = UnresolvedRelation(TableIdentifier("streamingTable"))
-    val dfWithEventTimeWatermark = EventTimeWatermark($"ts",
+    val dfWithEventTimeWatermark = EventTimeWatermark(UUID.randomUUID(), $"ts",
       IntervalUtils.fromIntervalString("10 seconds"), dfWithTempView)
 
     val analyzed = getAnalyzer.executeAndCheck(dfWithEventTimeWatermark, new QueryPlanningTracker)
@@ -1807,4 +1810,36 @@ class AnalysisSuite extends AnalysisTest with Matchers {
     val plan = testRelation.select(udf.as("u")).select($"u").analyze
     assert(plan.output.head.nullable)
   }
+
+  test("test methods of PreemptedError") {
+    val preemptedError = new PreemptedError()
+    assert(preemptedError.getErrorOpt().isEmpty)
+
+    val internalError = SparkException.internalError("some internal error to be preempted")
+    preemptedError.set(internalError)
+    assert(preemptedError.getErrorOpt().contains(internalError))
+
+    // set error with higher priority will overwrite
+    val regularError = QueryCompilationErrors.unresolvedColumnError("name", Seq("a"))
+      .asInstanceOf[AnalysisException]
+    preemptedError.set(regularError)
+    assert(preemptedError.getErrorOpt().contains(regularError))
+
+    // set error with lower priority is noop
+    preemptedError.set(internalError)
+    assert(preemptedError.getErrorOpt().contains(regularError))
+
+    preemptedError.clear()
+    assert(preemptedError.getErrorOpt().isEmpty)
+  }
+
+  test("SPARK-49782: ResolveDataFrameDropColumns rule resolves complex UnresolvedAttribute") {
+    val function = UnresolvedFunction("trim", Seq(UnresolvedAttribute("i")), isDistinct = false)
+    val addColumnF = Project(Seq(UnresolvedAttribute("i"), Alias(function, "f")()), testRelation5)
+    // Drop column "f" via ResolveDataFrameDropColumns rule.
+    val inputPlan = DataFrameDropColumns(Seq(UnresolvedAttribute("f")), addColumnF)
+    // The expected Project (root node) should only have column "i".
+    val expectedPlan = Project(Seq(UnresolvedAttribute("i")), addColumnF).analyze
+    checkAnalysis(inputPlan, expectedPlan)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
index dc95198574fb4..71744f4d15105 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisTest.scala
@@ -178,9 +178,9 @@ trait AnalysisTest extends PlanTest {
     }
   }
 
-  protected def assertAnalysisErrorClass(
+  protected def assertAnalysisErrorCondition(
       inputPlan: LogicalPlan,
-      expectedErrorClass: String,
+      expectedErrorCondition: String,
       expectedMessageParameters: Map[String, String],
       queryContext: Array[ExpectedContext] = Array.empty,
       caseSensitive: Boolean = true): Unit = {
@@ -191,7 +191,7 @@ trait AnalysisTest extends PlanTest {
       }
       checkError(
         exception = e,
-        errorClass = expectedErrorClass,
+        condition = expectedErrorCondition,
         parameters = expectedMessageParameters,
         queryContext = queryContext
       )
@@ -199,14 +199,13 @@ trait AnalysisTest extends PlanTest {
   }
 
   protected def interceptParseException(parser: String => Any)(
-    sqlCommand: String, messages: String*)(
-    errorClass: Option[String] = None): Unit = {
+    sqlCommand: String, messages: String*)(condition: Option[String] = None): Unit = {
     val e = parseException(parser)(sqlCommand)
     messages.foreach { message =>
       assert(e.message.contains(message))
     }
-    if (errorClass.isDefined) {
-      assert(e.getErrorClass == errorClass.get)
+    if (condition.isDefined) {
+      assert(e.getCondition == condition.get)
     }
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
index 38acb56ad1e09..8cf7d78b510be 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercionSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.types.{AbstractArrayType, StringTypeWithCollation}
 import org.apache.spark.sql.types._
 
 class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
@@ -1047,4 +1048,56 @@ class AnsiTypeCoercionSuite extends TypeCoercionSuiteBase {
         AnsiTypeCoercion.GetDateFieldOperations, operation(ts), operation(Cast(ts, DateType)))
     }
   }
+
+  test("SPARK-49188: implicit casts of arrays") {
+    // Valid casts when inner type is non-complex type.
+    shouldCast(
+      ArrayType(IntegerType),
+      AbstractArrayType(IntegerType),
+      ArrayType(IntegerType))
+    shouldCast(
+      ArrayType(StringType),
+      AbstractArrayType(StringTypeWithCollation),
+      ArrayType(StringType))
+    shouldCast(
+      ArrayType(IntegerType),
+      AbstractArrayType(StringTypeWithCollation),
+      ArrayType(StringType))
+    shouldCast(
+      ArrayType(StringType),
+      AbstractArrayType(IntegerType),
+      ArrayType(IntegerType))
+
+    // Valid casts when inner type is array of non-complex types.
+    shouldCast(
+      ArrayType(ArrayType(IntegerType)),
+      AbstractArrayType(AbstractArrayType(IntegerType)),
+      ArrayType(ArrayType(IntegerType)))
+    shouldCast(
+      ArrayType(ArrayType(StringType)),
+      AbstractArrayType(AbstractArrayType(StringTypeWithCollation)),
+      ArrayType(ArrayType(StringType)))
+    shouldCast(
+      ArrayType(ArrayType(IntegerType)),
+      AbstractArrayType(AbstractArrayType(StringTypeWithCollation)),
+      ArrayType(ArrayType(StringType)))
+    shouldCast(
+      ArrayType(ArrayType(StringType)),
+      AbstractArrayType(AbstractArrayType(IntegerType)),
+      ArrayType(ArrayType(IntegerType)))
+
+    // Invalid casts involving casting arrays into non-complex types.
+    shouldNotCast(ArrayType(IntegerType), IntegerType)
+    shouldNotCast(ArrayType(StringType), StringTypeWithCollation)
+    shouldNotCast(ArrayType(StringType), IntegerType)
+    shouldNotCast(ArrayType(IntegerType), StringTypeWithCollation)
+
+    // Invalid casts involving casting arrays of arrays into arrays of non-complex types.
+    shouldNotCast(ArrayType(ArrayType(IntegerType)), AbstractArrayType(IntegerType))
+    shouldNotCast(ArrayType(ArrayType(StringType)),
+      AbstractArrayType(StringTypeWithCollation))
+    shouldNotCast(ArrayType(ArrayType(StringType)), AbstractArrayType(IntegerType))
+    shouldNotCast(ArrayType(ArrayType(IntegerType)),
+      AbstractArrayType(StringTypeWithCollation))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
index c9e37e255ab44..6b034d3dbee09 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/CreateTablePartitioningValidationSuite.scala
@@ -41,8 +41,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
       expectedMessageParameters = Map("cols" -> "`does_not_exist`"))
   }
 
@@ -56,8 +56,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
       expectedMessageParameters = Map("cols" -> "`does_not_exist`.`z`"))
   }
 
@@ -71,8 +71,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
       expectedMessageParameters = Map("cols" -> "`point`.`z`"))
   }
 
@@ -86,8 +86,8 @@ class CreateTablePartitioningValidationSuite extends AnalysisTest {
       ignoreIfExists = false)
 
     assert(!plan.resolved)
-    assertAnalysisErrorClass(plan,
-      expectedErrorClass = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
+    assertAnalysisErrorCondition(plan,
+      expectedErrorCondition = "UNSUPPORTED_FEATURE.PARTITION_WITH_NESTED_COLUMN_IS_UNSUPPORTED",
       expectedMessageParameters = Map("cols" -> "`does_not_exist`, `point`.`z`"))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
index 4367cbbd24a89..95e118a30771c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ExpressionTypeCheckingSuite.scala
@@ -53,7 +53,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       expr: Expression, messageParameters: Map[String, String]): Unit = {
     checkError(
       exception = analysisException(expr),
-      errorClass = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
       parameters = messageParameters)
   }
 
@@ -61,7 +61,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       expr: Expression, messageParameters: Map[String, String]): Unit = {
     checkError(
       exception = analysisException(expr),
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = messageParameters)
   }
 
@@ -69,7 +69,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       expr: Expression, messageParameters: Map[String, String]): Unit = {
     checkError(
       exception = analysisException(expr),
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = messageParameters)
   }
 
@@ -77,14 +77,14 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       expr: Expression, messageParameters: Map[String, String]): Unit = {
     checkError(
       exception = analysisException(expr),
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = messageParameters)
   }
 
   private def assertForWrongType(expr: Expression, messageParameters: Map[String, String]): Unit = {
     checkError(
       exception = analysisException(expr),
-      errorClass = "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
       parameters = messageParameters)
   }
 
@@ -93,7 +93,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(BitwiseNot($"stringField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"~stringField\"",
         "paramIndex" -> ordinalNumber(0),
@@ -426,7 +426,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Sum($"booleanField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"sum(booleanField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -437,7 +437,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Average($"booleanField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"avg(booleanField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -469,7 +469,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(coalesce)
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId(coalesce.prettyName),
         "expectedNum" -> "> 0",
@@ -481,7 +481,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(murmur3Hash)
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId(murmur3Hash.prettyName),
         "expectedNum" -> "> 0",
@@ -493,7 +493,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(xxHash64)
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId(xxHash64.prettyName),
         "expectedNum" -> "> 0",
@@ -504,7 +504,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Explode($"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"explode(intField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -516,7 +516,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(PosExplode($"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"posexplode(intField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -529,7 +529,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
   test("check types for CreateNamedStruct") {
     checkError(
       exception = analysisException(CreateNamedStruct(Seq("a", "b", 2.0))),
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`named_struct`",
         "expectedNum" -> "2n (n > 0)",
@@ -538,21 +538,21 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
     )
     checkError(
       exception = analysisException(CreateNamedStruct(Seq(1, "a", "b", 2.0))),
-      errorClass = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
+      condition = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
       parameters = Map(
         "sqlExpr" -> "\"named_struct(1, a, b, 2.0)\"",
         "inputExprs" -> "[\"1\"]")
     )
     checkError(
       exception = analysisException(CreateNamedStruct(Seq($"a".string.at(0), "a", "b", 2.0))),
-      errorClass = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
+      condition = "DATATYPE_MISMATCH.CREATE_NAMED_STRUCT_WITHOUT_FOLDABLE_STRING",
       parameters = Map(
         "sqlExpr" -> "\"named_struct(boundreference(), a, b, 2.0)\"",
         "inputExprs" -> "[\"boundreference()\"]")
     )
     checkError(
       exception = analysisException(CreateNamedStruct(Seq(Literal.create(null, StringType), "a"))),
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "sqlExpr" -> "\"named_struct(NULL, a)\"",
         "exprName" -> "[\"NULL\"]")
@@ -562,7 +562,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
   test("check types for CreateMap") {
     checkError(
       exception = analysisException(CreateMap(Seq("a", "b", 2.0))),
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`map`",
         "expectedNum" -> "2n (n > 0)",
@@ -572,7 +572,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
     checkError(
       exception = analysisException(CreateMap(Seq(Literal(1),
         Literal("a"), Literal(true), Literal("b")))),
-      errorClass = "DATATYPE_MISMATCH.CREATE_MAP_KEY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.CREATE_MAP_KEY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"map(1, a, true, b)\"",
         "functionName" -> "`map`",
@@ -582,7 +582,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
     checkError(
       exception = analysisException(CreateMap(Seq(Literal("a"),
         Literal(1), Literal("b"), Literal(true)))),
-      errorClass = "DATATYPE_MISMATCH.CREATE_MAP_VALUE_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.CREATE_MAP_VALUE_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"map(a, 1, b, true)\"",
         "functionName" -> "`map`",
@@ -599,7 +599,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Round($"intField", $"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"round(intField, intField)\"",
         "inputName" -> "`scale`",
@@ -610,7 +610,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Round($"intField", $"booleanField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"round(intField, booleanField)\"",
         "paramIndex" -> ordinalNumber(1),
@@ -621,7 +621,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Round($"intField", $"mapField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"round(intField, mapField)\"",
         "paramIndex" -> ordinalNumber(1),
@@ -632,7 +632,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(Round($"booleanField", $"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"round(booleanField, intField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -646,7 +646,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(BRound($"intField", $"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"bround(intField, intField)\"",
         "inputName" -> "`scale`",
@@ -656,7 +656,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(BRound($"intField", $"booleanField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"bround(intField, booleanField)\"",
         "paramIndex" -> ordinalNumber(1),
@@ -667,7 +667,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(BRound($"intField", $"mapField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"bround(intField, mapField)\"",
         "paramIndex" -> ordinalNumber(1),
@@ -678,7 +678,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[AnalysisException] {
         assertSuccess(BRound($"booleanField", $"intField"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"bround(booleanField, intField)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -806,7 +806,7 @@ class ExpressionTypeCheckingSuite extends SparkFunSuite with SQLHelper with Quer
       exception = intercept[SparkException] {
         wsd.checkInputDataTypes()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> ("Cannot use an UnspecifiedFrame. " +
         "This should have been converted during analysis."))
     )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
index ae32365e69bbc..1ae3e3fa68603 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/LookupFunctionsSuite.scala
@@ -47,7 +47,7 @@ class LookupFunctionsSuite extends PlanTest {
           ignoreIfExists = false)
         val catalog = new SessionCatalog(externalCatalog, new SimpleFunctionRegistry)
         val catalogManager = new CatalogManager(new CustomV2SessionCatalog(catalog), catalog)
-        catalog.setCurrentDatabase("db1")
+        catalogManager.setCurrentNamespace(Array("db1"))
         try {
           val analyzer = new Analyzer(catalogManager)
 
@@ -62,7 +62,7 @@ class LookupFunctionsSuite extends PlanTest {
           }
           checkError(
             exception = cause,
-            errorClass = "UNRESOLVED_ROUTINE",
+            condition = "UNRESOLVED_ROUTINE",
             parameters = Map(
               "routineName" -> "`undefined_fn`",
               "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`db1`]"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala
index 6bc0350a5785d..02543c9fba539 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/NamedParameterFunctionSuite.scala
@@ -98,18 +98,18 @@ class NamedParameterFunctionSuite extends AnalysisTest {
   }
 
   test("DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT") {
-    val errorClass =
+    val condition =
       "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED"
     checkError(
       exception = parseRearrangeException(
         signature, Seq(k1Arg, k2Arg, k3Arg, k4Arg, namedK1Arg), "foo"),
-      errorClass = errorClass,
+      condition = condition,
       parameters = Map("routineName" -> toSQLId("foo"), "parameterName" -> toSQLId("k1"))
     )
     checkError(
       exception = parseRearrangeException(
         signature, Seq(k1Arg, k2Arg, k3Arg, k4Arg, k4Arg), "foo"),
-      errorClass = "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
+      condition = "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
       parameters = Map("routineName" -> toSQLId("foo"), "parameterName" -> toSQLId("k4"))
     )
   }
@@ -117,7 +117,7 @@ class NamedParameterFunctionSuite extends AnalysisTest {
   test("REQUIRED_PARAMETER_NOT_FOUND") {
     checkError(
       exception = parseRearrangeException(signature, Seq(k1Arg, k2Arg, k3Arg), "foo"),
-      errorClass = "REQUIRED_PARAMETER_NOT_FOUND",
+      condition = "REQUIRED_PARAMETER_NOT_FOUND",
       parameters = Map(
         "routineName" -> toSQLId("foo"), "parameterName" -> toSQLId("k4"), "index" -> "2"))
   }
@@ -126,7 +126,7 @@ class NamedParameterFunctionSuite extends AnalysisTest {
     checkError(
       exception = parseRearrangeException(signature,
         Seq(k1Arg, k2Arg, k3Arg, k4Arg, NamedArgumentExpression("k5", Literal("k5"))), "foo"),
-      errorClass = "UNRECOGNIZED_PARAMETER_NAME",
+      condition = "UNRECOGNIZED_PARAMETER_NAME",
       parameters = Map("routineName" -> toSQLId("foo"), "argumentName" -> toSQLId("k5"),
         "proposal" -> (toSQLId("k1") + " " + toSQLId("k2") + " " + toSQLId("k3")))
     )
@@ -136,7 +136,7 @@ class NamedParameterFunctionSuite extends AnalysisTest {
     checkError(
       exception = parseRearrangeException(signature,
         Seq(k2Arg, k3Arg, k1Arg, k4Arg), "foo"),
-      errorClass = "UNEXPECTED_POSITIONAL_ARGUMENT",
+      condition = "UNEXPECTED_POSITIONAL_ARGUMENT",
       parameters = Map("routineName" -> toSQLId("foo"), "parameterName" -> toSQLId("k3"))
     )
   }
@@ -147,7 +147,7 @@ class NamedParameterFunctionSuite extends AnalysisTest {
       s" All required arguments should come before optional arguments."
     checkError(
       exception = parseRearrangeException(illegalSignature, args, "foo"),
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> errorMessage)
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
index 3e014d1c11d72..f231164d5c25a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveInlineTablesSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.analysis
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.catalyst.EvaluateUnresolvedInlineTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Cast, CurrentTimestamp, Literal, Rand}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.optimizer.{ComputeCurrentTime, EvalInlineTables}
@@ -36,45 +37,45 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
   private def lit(v: Any): Literal = Literal(v)
 
   test("validate inputs are foldable") {
-    ResolveInlineTables.validateInputEvaluable(
+    EvaluateUnresolvedInlineTable.validateInputEvaluable(
       UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)))))
 
     // Alias is OK
-    ResolveInlineTables.validateInputEvaluable(
+    EvaluateUnresolvedInlineTable.validateInputEvaluable(
       UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(Alias(lit(1), "a")()))))
 
     // nondeterministic (rand) should not work
     intercept[AnalysisException] {
-      ResolveInlineTables.validateInputEvaluable(
+      EvaluateUnresolvedInlineTable.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(Rand(1)))))
     }
 
     // aggregate should not work
     intercept[AnalysisException] {
-      ResolveInlineTables.validateInputEvaluable(
+      EvaluateUnresolvedInlineTable.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(Count(lit(1))))))
     }
 
     // unresolved attribute should not work
     intercept[AnalysisException] {
-      ResolveInlineTables.validateInputEvaluable(
+      EvaluateUnresolvedInlineTable.validateInputEvaluable(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(UnresolvedAttribute("A")))))
     }
   }
 
   test("validate input dimensions") {
-    ResolveInlineTables.validateInputDimension(
+    EvaluateUnresolvedInlineTable.validateInputDimension(
       UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2)))))
 
     // num alias != data dimension
     intercept[AnalysisException] {
-      ResolveInlineTables.validateInputDimension(
+      EvaluateUnresolvedInlineTable.validateInputDimension(
         UnresolvedInlineTable(Seq("c1", "c2"), Seq(Seq(lit(1)), Seq(lit(2)))))
     }
 
     // num alias == data dimension, but data themselves are inconsistent
     intercept[AnalysisException] {
-      ResolveInlineTables.validateInputDimension(
+      EvaluateUnresolvedInlineTable.validateInputDimension(
         UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(21), lit(22)))))
     }
   }
@@ -104,7 +105,7 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
     assert(resolved.isInstanceOf[ResolvedInlineTable])
 
     EvalInlineTables(ComputeCurrentTime(resolved)) match {
-      case LocalRelation(output, data, _) =>
+      case LocalRelation(output, data, _, _) =>
         assert(output.map(_.dataType) == Seq(TimestampType))
         assert(data.size == 2)
         // Make sure that both CURRENT_TIMESTAMP expressions are evaluated to the same value.
@@ -116,7 +117,8 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
     val table = UnresolvedInlineTable(Seq("c1"),
       Seq(Seq(Cast(lit("1991-12-06 00:00:00.0"), TimestampType))))
     val withTimeZone = ResolveTimeZone.apply(table)
-    val LocalRelation(output, data, _) = EvalInlineTables(ResolveInlineTables.apply(withTimeZone))
+    val LocalRelation(output, data, _, _) =
+      EvalInlineTables(ResolveInlineTables.apply(withTimeZone))
     val correct = Cast(lit("1991-12-06 00:00:00.0"), TimestampType)
       .withTimeZone(conf.sessionLocalTimeZone).eval().asInstanceOf[Long]
     assert(output.map(_.dataType) == Seq(TimestampType))
@@ -126,11 +128,11 @@ class ResolveInlineTablesSuite extends AnalysisTest with BeforeAndAfter {
 
   test("nullability inference in convert") {
     val table1 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(lit(2L))))
-    val converted1 = ResolveInlineTables.findCommonTypesAndCast(table1)
+    val converted1 = EvaluateUnresolvedInlineTable.findCommonTypesAndCast(table1)
     assert(!converted1.schema.fields(0).nullable)
 
     val table2 = UnresolvedInlineTable(Seq("c1"), Seq(Seq(lit(1)), Seq(Literal(null, NullType))))
-    val converted2 = ResolveInlineTables.findCommonTypesAndCast(table2)
+    val converted2 = EvaluateUnresolvedInlineTable.findCommonTypesAndCast(table2)
     assert(converted2.schema.fields(0).nullable)
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
index 5809d1e04b9cf..6e911324e0759 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveLambdaVariablesSuite.scala
@@ -82,7 +82,7 @@ class ResolveLambdaVariablesSuite extends PlanTest {
 
     checkError(
       exception = intercept[AnalysisException](Analyzer.execute(p)),
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.DUPLICATE_ARG_NAMES",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.DUPLICATE_ARG_NAMES",
       parameters = Map(
         "args" -> "`x`, `x`",
         "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\"")
@@ -96,7 +96,7 @@ class ResolveLambdaVariablesSuite extends PlanTest {
 
     checkError(
       exception = intercept[AnalysisException](Analyzer.execute(p)),
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "1")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
index 5c843d62d6d7c..b7afc803410cf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveNaturalJoinSuite.scala
@@ -108,14 +108,14 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
   }
 
   test("using unresolved attribute") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       r1.join(r2, UsingJoin(Inner, Seq("d"))),
-      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedErrorCondition = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
       expectedMessageParameters = Map(
         "colName" -> "`d`", "side" -> "left", "suggestion" -> "`a`, `b`"))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       r1.join(r2, UsingJoin(Inner, Seq("b"))),
-      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedErrorCondition = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
       expectedMessageParameters = Map(
         "colName" -> "`b`", "side" -> "right", "suggestion" -> "`a`, `c`"))
   }
@@ -126,17 +126,17 @@ class ResolveNaturalJoinSuite extends AnalysisTest {
     val usingPlan = r1.join(r2, UsingJoin(Inner, Seq("a")), None)
     checkAnalysis(usingPlan, expected, caseSensitive = true)
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       r1.join(r2, UsingJoin(Inner, Seq("A"))),
-      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedErrorCondition = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
       expectedMessageParameters = Map(
         "colName" -> "`A`", "side" -> "left", "suggestion" -> "`a`, `b`"))
   }
 
   test("using join on nested fields") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       r5.join(r6, UsingJoin(Inner, Seq("d.f1"))),
-      expectedErrorClass = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
+      expectedErrorCondition = "UNRESOLVED_USING_COLUMN_FOR_JOIN",
       expectedMessageParameters = Map(
         "colName" -> "`d`.`f1`", "side" -> "left", "suggestion" -> "`d`"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
index 4e17f4624f7e0..86718ee434311 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/ResolveSubquerySuite.scala
@@ -71,7 +71,7 @@ class ResolveSubquerySuite extends AnalysisTest {
 
   test("lateral join with ambiguous join conditions") {
     val plan = lateralJoin(t1, t0.select($"b"), condition = Some($"b" ===  1))
-    assertAnalysisErrorClass(plan,
+    assertAnalysisErrorCondition(plan,
       "AMBIGUOUS_REFERENCE", Map("name" -> "`b`", "referenceNames" -> "[`b`, `b`]")
     )
   }
@@ -123,7 +123,7 @@ class ResolveSubquerySuite extends AnalysisTest {
 
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT a, b, c))
     // TODO: support accessing columns from outer outer query.
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b", $"c"))),
       "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       Map("objectName" -> "`a`")
@@ -132,25 +132,25 @@ class ResolveSubquerySuite extends AnalysisTest {
 
   test("lateral subquery with unresolvable attributes") {
     // SELECT * FROM t1, LATERAL (SELECT a, c)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1, t0.select($"a", $"c")),
       "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       Map("objectName" -> "`c`")
     )
     // SELECT * FROM t1, LATERAL (SELECT a, b, c, d FROM t2)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1, t2.select($"a", $"b", $"c", $"d")),
       "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       Map("objectName" -> "`d`", "proposal" -> "`b`, `c`")
     )
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT t1.a))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1, lateralJoin(t2, t0.select($"t1.a"))),
       "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       Map("objectName" -> "`t1`.`a`")
     )
     // SELECT * FROM t1, LATERAL (SELECT * FROM t2, LATERAL (SELECT a, b))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1, lateralJoin(t2, t0.select($"a", $"b"))),
       "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       Map("objectName" -> "`a`")
@@ -165,7 +165,7 @@ class ResolveSubquerySuite extends AnalysisTest {
       LateralJoin(t4, LateralSubquery(Project(Seq(xa, ya), t0), Seq(x, y)), Inner, None)
     )
     // Analyzer will try to resolve struct first before subquery alias.
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1.as("x"), t4.select($"x.a", $"x.b")),
       "FIELD_NOT_FOUND",
       Map("fieldName" -> "`b`", "fields" -> "`a`"))
@@ -174,9 +174,9 @@ class ResolveSubquerySuite extends AnalysisTest {
   test("lateral join with unsupported expressions") {
     val plan = lateralJoin(t1, t0.select(($"a" + $"b").as("c")),
       condition = Some(sum($"a") === sum($"c")))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       plan,
-      expectedErrorClass = "UNSUPPORTED_EXPR_FOR_OPERATOR",
+      expectedErrorCondition = "UNSUPPORTED_EXPR_FOR_OPERATOR",
       expectedMessageParameters = Map("invalidExprSqls" -> "\"sum(a)\", \"sum(c)\"")
     )
   }
@@ -206,17 +206,17 @@ class ResolveSubquerySuite extends AnalysisTest {
         LateralSubquery(Project(Seq(outerA, outerB, b, c), t2.as("t2")), Seq(a, b)), Inner, None)
     )
     // SELECT * FROM t1, LATERAL (SELECT t2.*)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1.as("t1"), t0.select(star("t2"))),
-      expectedErrorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+      expectedErrorCondition = "CANNOT_RESOLVE_STAR_EXPAND",
       expectedMessageParameters = Map("targetString" -> "`t2`", "columns" -> "")
     )
     // Check case sensitivities.
     // SELECT * FROM t1, LATERAL (SELECT T1.*)
     val plan = lateralJoin(t1.as("t1"), t0.select(star("T1")))
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       plan,
-      expectedErrorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+      expectedErrorCondition = "CANNOT_RESOLVE_STAR_EXPAND",
       expectedMessageParameters = Map("targetString" -> "`T1`", "columns" -> "")
     )
     assertAnalysisSuccess(plan, caseSensitive = false)
@@ -232,9 +232,9 @@ class ResolveSubquerySuite extends AnalysisTest {
       LateralJoin(t1,
         LateralSubquery(t0.select(newArray.as(newArray.sql)), Seq(a, b)), Inner, None)
     )
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       lateralJoin(t1.as("t1"), t0.select(Count(star("t1")))),
-      expectedErrorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      expectedErrorCondition = "INVALID_USAGE_OF_STAR_OR_REGEX",
       expectedMessageParameters = Map("elem" -> "'*'", "prettyName" -> "expression `count`"))
   }
 
@@ -293,9 +293,9 @@ class ResolveSubquerySuite extends AnalysisTest {
           :: lv(Symbol("X"))
           :: Nil))
 
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = lambdaPlanScanFromTable,
-      expectedErrorClass =
+      expectedErrorCondition =
         "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.HIGHER_ORDER_FUNCTION",
       expectedMessageParameters = Map.empty[String, String])
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
index 02d70c6ace6b4..6ee19bab5180a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/UnsupportedOperationsSuite.scala
@@ -115,11 +115,11 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
     Aggregate(Seq(attributeWithWatermark), aggExprs("d"), streamRelation),
     outputMode = Append)
 
-  assertNotSupportedInStreamingPlan(
+  assertOutputModeNotSupportedInStreamingPlan(
     "aggregate - streaming aggregations without watermark in append mode",
     Aggregate(Nil, aggExprs("d"), streamRelation),
-    outputMode = Append,
-    expectedMsgs = Seq("streaming aggregations", "without watermark"))
+    Append,
+    "streaming aggregations without watermark")
 
   // Aggregation: Distinct aggregates not supported on streaming relation
   val distinctAggExprs = Seq(Count("*").toAggregateExpression(isDistinct = true).as("c"))
@@ -740,7 +740,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
   testUnaryOperatorInStreamingPlan(
     "window",
     Window(Nil, Nil, Nil, _),
-    errorClass = "NON_TIME_WINDOW_NOT_SUPPORTED_IN_STREAMING")
+    condition = "NON_TIME_WINDOW_NOT_SUPPORTED_IN_STREAMING")
 
   // Output modes with aggregation and non-aggregation plans
   testOutputMode(Append, shouldSupportAggregation = false, shouldSupportNonAggregation = true)
@@ -869,11 +869,11 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
    * supports having a batch child plan, forming a batch subplan inside a streaming plan.
    */
   def testUnaryOperatorInStreamingPlan(
-    operationName: String,
-    logicalPlanGenerator: LogicalPlan => LogicalPlan,
-    outputMode: OutputMode = Append,
-    expectedMsg: String = "",
-    errorClass: String = ""): Unit = {
+      operationName: String,
+      logicalPlanGenerator: LogicalPlan => LogicalPlan,
+      outputMode: OutputMode = Append,
+      expectedMsg: String = "",
+      condition: String = ""): Unit = {
 
     val expectedMsgs = if (expectedMsg.isEmpty) Seq(operationName) else Seq(expectedMsg)
 
@@ -882,7 +882,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       wrapInStreaming(logicalPlanGenerator(streamRelation)),
       outputMode,
       expectedMsgs,
-      errorClass)
+      condition)
 
     assertSupportedInStreamingPlan(
       s"$operationName with batch relation",
@@ -965,11 +965,11 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
         streamRelation.groupBy("a")("count(*)"),
         outputMode = outputMode)
     } else {
-      assertNotSupportedInStreamingPlan(
-        s"$outputMode output mode - aggregation",
+      assertOutputModeNotSupportedInStreamingPlan(
+        s"$outputMode output mode - aggregation without watermark",
         streamRelation.groupBy("a")("count(*)"),
-        outputMode = outputMode,
-        Seq("aggregation", s"$outputMode output mode"))
+        outputMode,
+        "streaming aggregations without watermark")
     }
 
     // non aggregation
@@ -979,11 +979,11 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
         streamRelation.where($"a" > 1),
         outputMode = outputMode)
     } else {
-      assertNotSupportedInStreamingPlan(
+      assertOutputModeNotSupportedInStreamingPlan(
         s"$outputMode output mode - no aggregation",
         streamRelation.where($"a" > 1),
-        outputMode = outputMode,
-        Seq("aggregation", s"$outputMode output mode"))
+        outputMode,
+        "no streaming aggregations")
     }
   }
 
@@ -1030,15 +1030,36 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
       plan: LogicalPlan,
       outputMode: OutputMode,
       expectedMsgs: Seq[String],
-      errorClass: String = ""): Unit = {
+      condition: String = ""): Unit = {
     testError(
       s"streaming plan - $name: not supported",
       expectedMsgs :+ "streaming" :+ "DataFrame" :+ "Dataset" :+ "not supported",
-      errorClass) {
+      condition) {
       UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
     }
   }
 
+  /**
+   * Assert that output model is not supported in streaming plan.
+   */
+  def assertOutputModeNotSupportedInStreamingPlan(
+      name: String,
+      plan: LogicalPlan,
+      outputMode: OutputMode,
+      operation: String): Unit = {
+    test(s"streaming plan - $name: not supported") {
+      checkError(
+        exception = intercept[AnalysisException] {
+          UnsupportedOperationChecker.checkForStreaming(wrapInStreaming(plan), outputMode)
+        },
+        condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+        sqlState = "42KDE",
+        parameters = Map(
+          "outputMode" -> outputMode.toString.toLowerCase(Locale.ROOT),
+          "operation" -> operation))
+    }
+  }
+
   /** Assert that the logical plan is supported as a batch plan */
   def assertSupportedInBatchPlan(name: String, plan: LogicalPlan): Unit = {
     test(s"batch plan - $name: supported") {
@@ -1099,7 +1120,7 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
   def testError(
       testName: String,
       expectedMsgs: Seq[String],
-      errorClass: String = "")(testBody: => Unit): Unit = {
+      condition: String = "")(testBody: => Unit): Unit = {
 
     test(testName) {
       val e = intercept[AnalysisException] {
@@ -1111,8 +1132,8 @@ class UnsupportedOperationsSuite extends SparkFunSuite with SQLHelper {
             s"actual exception message:\n\t'${e.getMessage}'")
         }
       }
-      if (!errorClass.isEmpty) {
-        assert(e.getErrorClass == errorClass)
+      if (!condition.isEmpty) {
+        assert(e.getCondition == condition)
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
index 3fd0c1ee5de4b..29c6c63ecfeab 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/V2WriteAnalysisSuite.scala
@@ -21,7 +21,7 @@ import java.util.Locale
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Alias, AttributeReference, Cast, CreateNamedStruct, GetStructField, If, IsNull, LessThanOrEqual, Literal}
+import org.apache.spark.sql.catalyst.expressions.{Alias, ArrayTransform, AttributeReference, Cast, CreateNamedStruct, GetStructField, If, IsNull, LessThanOrEqual, Literal}
 import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -154,16 +154,16 @@ abstract class V2ANSIWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     }
   }
 
-  override def assertAnalysisErrorClass(
+  override def assertAnalysisErrorCondition(
       inputPlan: LogicalPlan,
-      expectedErrorClass: String,
+      expectedErrorCondition: String,
       expectedMessageParameters: Map[String, String],
       queryContext: Array[ExpectedContext] = Array.empty,
       caseSensitive: Boolean = true): Unit = {
     withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.ANSI.toString) {
-      super.assertAnalysisErrorClass(
+      super.assertAnalysisErrorCondition(
         inputPlan,
-        expectedErrorClass,
+        expectedErrorCondition,
         expectedMessageParameters,
         queryContext,
         caseSensitive
@@ -191,16 +191,16 @@ abstract class V2StrictWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     }
   }
 
-  override def assertAnalysisErrorClass(
+  override def assertAnalysisErrorCondition(
       inputPlan: LogicalPlan,
-      expectedErrorClass: String,
+      expectedErrorCondition: String,
       expectedMessageParameters: Map[String, String],
       queryContext: Array[ExpectedContext] = Array.empty,
       caseSensitive: Boolean = true): Unit = {
     withSQLConf(SQLConf.STORE_ASSIGNMENT_POLICY.key -> StoreAssignmentPolicy.STRICT.toString) {
-      super.assertAnalysisErrorClass(
+      super.assertAnalysisErrorCondition(
         inputPlan,
-        expectedErrorClass,
+        expectedErrorCondition,
         expectedMessageParameters,
         queryContext,
         caseSensitive
@@ -212,9 +212,9 @@ abstract class V2StrictWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     val parsedPlan = byName(table, widerTable)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`x`",
@@ -235,9 +235,9 @@ abstract class V2StrictWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     val parsedPlan = byName(xRequiredTable, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`x`",
@@ -254,9 +254,9 @@ abstract class V2StrictWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     val parsedPlan = byPosition(table, widerTable)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`x`",
@@ -277,9 +277,9 @@ abstract class V2StrictWriteAnalysisSuiteBase extends V2WriteAnalysisSuiteBase {
     val parsedPlan = byPosition(xRequiredTable, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`x`",
@@ -303,6 +303,36 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
   def byPosition(table: NamedRelation, query: LogicalPlan): LogicalPlan
 
+  test("SPARK-49352: Avoid redundant array transform for identical expression") {
+    def assertArrayField(fromType: ArrayType, toType: ArrayType, hasTransform: Boolean): Unit = {
+      val table = TestRelation(Seq($"a".int, $"arr".array(toType)))
+      val query = TestRelation(Seq($"arr".array(fromType), $"a".int))
+
+      val writePlan = byName(table, query).analyze
+
+      assertResolved(writePlan)
+      checkAnalysis(writePlan, writePlan)
+
+      val transform = writePlan.children.head.expressions.exists { e =>
+        e.find {
+          case _: ArrayTransform => true
+          case _ => false
+        }.isDefined
+      }
+      if (hasTransform) {
+        assert(transform)
+      } else {
+        assert(!transform)
+      }
+    }
+
+    assertArrayField(ArrayType(LongType), ArrayType(LongType), hasTransform = false)
+    assertArrayField(
+      ArrayType(new StructType().add("x", "int").add("y", "int")),
+      ArrayType(new StructType().add("y", "int").add("x", "byte")),
+      hasTransform = true)
+  }
+
   test("SPARK-33136: output resolved on complex types for V2 write commands") {
     def assertTypeCompatibility(name: String, fromType: DataType, toType: DataType): Unit = {
       val table = TestRelation(StructType(Seq(StructField("a", toType))))
@@ -391,9 +421,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       expectedMessageParameters = Map("tableName" -> "`table-name`", "colName" -> "`x`")
     )
   }
@@ -406,9 +436,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       expectedMessageParameters = Map("tableName" -> "`table-name`", "colName" -> "`x`")
     )
   }
@@ -469,9 +499,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(requiredTable, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       expectedMessageParameters = Map("tableName" -> "`table-name`", "colName" -> "`x`")
     )
   }
@@ -484,9 +514,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       expectedMessageParameters = Map("tableName" -> "`table-name`", "colName" -> "`x`")
     )
   }
@@ -516,9 +546,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsedPlan,
-      expectedErrorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+      expectedErrorCondition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "tableColumns" -> "`x`, `y`",
@@ -531,9 +561,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val query = TestRelation(Seq($"b".struct($"y".int, $"x".int, $"z".int), $"a".int))
 
     val writePlan = byName(table, query)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       writePlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`b`",
@@ -606,9 +636,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byPosition(requiredTable, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsedPlan,
-      expectedErrorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+      expectedErrorCondition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "tableColumns" -> "`x`, `y`",
@@ -624,9 +654,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byPosition(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsedPlan,
-      expectedErrorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+      expectedErrorCondition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "tableColumns" -> "`x`, `y`",
@@ -663,9 +693,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = byName(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       inputPlan = parsedPlan,
-      expectedErrorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+      expectedErrorCondition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "tableColumns" -> "`x`, `y`",
@@ -710,9 +740,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     withClue("byName") {
       val parsedPlan = byName(tableWithStructCol, query)
       assertNotResolved(parsedPlan)
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         expectedMessageParameters = Map("tableName" -> "`table-name`", "colName" -> "`col`.`a`")
       )
     }
@@ -762,9 +792,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = if (byNameResolution) byName(table, query) else byPosition(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`b`.`n2`",
@@ -791,9 +821,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = if (byNameResolution) byName(table, query) else byPosition(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`arr`.`element`",
@@ -824,9 +854,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = if (byNameResolution) byName(table, query) else byPosition(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`m`.`key`",
@@ -857,9 +887,9 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan = if (byNameResolution) byName(table, query) else byPosition(table, query)
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
-      expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       expectedMessageParameters = Map(
         "tableName" -> "`table-name`",
         "colName" -> "`m`.`value`",
@@ -891,17 +921,17 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
     assertNotResolved(parsedPlan)
     if (byNameResolution) {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`b`.`n2`.`dn3`")
       )
     } else {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`b`.`n2`",
@@ -934,17 +964,17 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
     assertNotResolved(parsedPlan)
     if (byNameResolution) {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`arr`.`element`.`y`")
       )
     } else {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`arr`.`element`",
@@ -981,17 +1011,17 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
     assertNotResolved(parsedPlan)
     if (byNameResolution) {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`m`.`key`.`y`")
       )
     } else {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`m`.`key`",
@@ -1028,17 +1058,17 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
 
     assertNotResolved(parsedPlan)
     if (byNameResolution) {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`m`.`value`.`y`")
       )
     } else {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         parsedPlan,
-        expectedErrorClass = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+        expectedErrorCondition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
         expectedMessageParameters = Map(
           "tableName" -> "`table-name`",
           "colName" -> "`m`.`value`",
@@ -1333,7 +1363,7 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
       LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
 
     assertNotResolved(parsedPlan)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan,
       "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       Map("objectName" -> "`a`", "proposal" -> "`x`, `y`")
@@ -1346,7 +1376,7 @@ abstract class V2WriteAnalysisSuiteBase extends AnalysisTest {
     val parsedPlan2 = OverwriteByExpression.byPosition(tableAcceptAnySchema, query,
       LessThanOrEqual(UnresolvedAttribute(Seq("a")), Literal(15.0d)))
     assertNotResolved(parsedPlan2)
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       parsedPlan2,
       "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       Map("objectName" -> "`a`", "proposal" -> "`x`, `y`")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
index ba01f95591613..f4b0c232c25f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/ExternalCatalogSuite.scala
@@ -1073,7 +1073,8 @@ abstract class CatalogTestUtils {
   def newTable(
       name: String,
       database: Option[String] = None,
-      defaultColumns: Boolean = false): CatalogTable = {
+      defaultColumns: Boolean = false,
+      clusterBy: Boolean = false): CatalogTable = {
     CatalogTable(
       identifier = TableIdentifier(name, database),
       tableType = CatalogTableType.EXTERNAL,
@@ -1110,8 +1111,14 @@ abstract class CatalogTestUtils {
           .add("b", "string")
       },
       provider = Some(defaultProvider),
-      partitionColumnNames = Seq("a", "b"),
-      bucketSpec = Some(BucketSpec(4, Seq("col1"), Nil)))
+      partitionColumnNames = if (clusterBy) Seq.empty else Seq("a", "b"),
+      bucketSpec = if (clusterBy) None else Some(BucketSpec(4, Seq("col1"), Nil)),
+      properties = if (clusterBy) {
+        Map(
+          ClusterBySpec.toPropertyWithoutValidation(ClusterBySpec.fromColumnNames(Seq("c1", "c2"))))
+      } else {
+        Map.empty
+      })
   }
 
   def newView(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
index f5f6fac96872f..cfbc507fb5c74 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/catalog/SessionCatalogSuite.scala
@@ -121,7 +121,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
       exception = intercept[AnalysisException] {
         func(name)
       },
-      errorClass = "INVALID_SCHEMA_OR_RELATION_NAME",
+      condition = "INVALID_SCHEMA_OR_RELATION_NAME",
       parameters = Map("name" -> toSQLId(name))
     )
   }
@@ -171,7 +171,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           ResolveDefaultColumns.analyze(columnC, statementType)
         },
-        errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`c`",
@@ -180,7 +180,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           ResolveDefaultColumns.analyze(columnD, statementType)
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`d`",
@@ -189,7 +189,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           ResolveDefaultColumns.analyze(columnE, statementType)
         },
-        errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+        condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`e`",
@@ -589,7 +589,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           sessionCatalog.alterTableDataSchema(
             TableIdentifier("t1", Some("default")), StructType(oldTab.dataSchema.drop(1)))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1071",
+        condition = "_LEGACY_ERROR_TEMP_1071",
         parameters = Map("nonExistentColumnNames" -> "[col1]"))
     }
   }
@@ -817,14 +817,14 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[NoSuchTableException] {
           catalog.getTempViewOrPermanentTableMetadata(TableIdentifier("view1"))
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`default`.`view1`")
       )
       checkError(
         exception = intercept[NoSuchTableException] {
           catalog.getTempViewOrPermanentTableMetadata(TableIdentifier("view1", Some("default")))
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`default`.`view1`")
       )
 
@@ -838,7 +838,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[NoSuchTableException] {
           catalog.getTempViewOrPermanentTableMetadata(TableIdentifier("view1", Some("default")))
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`default`.`view1`")
       )
     }
@@ -1000,7 +1000,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl2", Some("db2")),
             Seq(part1, partWithLessColumns), ignoreIfExists = false)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a",
           "partitionColumnNames" -> "a, b",
@@ -1011,7 +1011,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl2", Some("db2")),
             Seq(part1, partWithMoreColumns), ignoreIfExists = true)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, b, c",
           "partitionColumnNames" -> "a, b",
@@ -1022,7 +1022,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl2", Some("db2")),
             Seq(partWithUnknownColumns, part1), ignoreIfExists = true)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, unknown",
           "partitionColumnNames" -> "a, b",
@@ -1033,7 +1033,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl2", Some("db2")),
             Seq(partWithEmptyValue, part1), ignoreIfExists = true)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1126,7 +1126,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             purge = false,
             retainData = false)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, b, c) must be contained within the partition " +
             s"spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1139,7 +1139,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             purge = false,
             retainData = false)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, unknown) must be contained within the partition " +
             s"spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1152,7 +1152,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             purge = false,
             retainData = false)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1192,7 +1192,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithLessColumns.spec)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a",
           "partitionColumnNames" -> "a, b",
@@ -1201,7 +1201,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithMoreColumns.spec)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, b, c",
           "partitionColumnNames" -> "a, b",
@@ -1210,7 +1210,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithUnknownColumns.spec)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, unknown",
           "partitionColumnNames" -> "a, b",
@@ -1219,7 +1219,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.getPartition(TableIdentifier("tbl1", Some("db2")), partWithEmptyValue.spec)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1277,7 +1277,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl1", Some("db2")),
             Seq(part1.spec), Seq(partWithLessColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a",
           "partitionColumnNames" -> "a, b",
@@ -1288,7 +1288,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl1", Some("db2")),
             Seq(part1.spec), Seq(partWithMoreColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, b, c",
           "partitionColumnNames" -> "a, b",
@@ -1299,7 +1299,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl1", Some("db2")),
             Seq(part1.spec), Seq(partWithUnknownColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, unknown",
           "partitionColumnNames" -> "a, b",
@@ -1310,7 +1310,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             TableIdentifier("tbl1", Some("db2")),
             Seq(part1.spec), Seq(partWithEmptyValue.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1364,7 +1364,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithLessColumns))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a",
           "partitionColumnNames" -> "a, b",
@@ -1373,7 +1373,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithMoreColumns))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, b, c",
           "partitionColumnNames" -> "a, b",
@@ -1382,7 +1382,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithUnknownColumns))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "a, unknown",
           "partitionColumnNames" -> "a, b",
@@ -1391,7 +1391,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[AnalysisException] {
           catalog.alterPartitions(TableIdentifier("tbl1", Some("db2")), Seq(partWithEmptyValue))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1423,7 +1423,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
             Some(partWithMoreColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, b, c) must be contained within the partition spec (a, b) " +
             s"defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1432,7 +1432,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
             Some(partWithUnknownColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, unknown) must be contained within the partition " +
             s"spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1441,7 +1441,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitionNames(TableIdentifier("tbl2", Some("db2")),
             Some(partWithEmptyValue.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1471,7 +1471,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitions(TableIdentifier("tbl2", Some("db2")),
             Some(partWithMoreColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, b, c) must be contained within the partition spec (a, b) " +
             s"defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1480,7 +1480,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitions(TableIdentifier("tbl2", Some("db2")),
             Some(partWithUnknownColumns.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec (a, unknown) must be contained within the partition " +
             s"spec (a, b) defined in table '`$SESSION_CATALOG_NAME`.`db2`.`tbl2`'")))
@@ -1489,7 +1489,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           catalog.listPartitions(TableIdentifier("tbl2", Some("db2")),
             Some(partWithEmptyValue.spec))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([a=3, b=]) contains an empty partition column value"))
     }
@@ -1582,8 +1582,10 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
           newFunc("temp1", None), overrideIfExists = false, functionBuilder = Some(tempFunc3))
       }
       checkError(e,
-        errorClass = "ROUTINE_ALREADY_EXISTS",
-        parameters = Map("routineName" -> "`temp1`"))
+        condition = "ROUTINE_ALREADY_EXISTS",
+        parameters = Map("routineName" -> "`temp1`",
+          "newRoutineType" -> "routine",
+          "existingRoutineType" -> "routine"))
       // Temporary function is overridden
       catalog.registerFunction(
         newFunc("temp1", None), overrideIfExists = true, functionBuilder = Some(tempFunc3))
@@ -1599,7 +1601,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
             overrideIfExists = false,
             None)
         },
-        errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+        condition = "CANNOT_LOAD_FUNCTION_CLASS",
         parameters = Map(
           "className" -> "function_class_cannot_load",
           "functionName" -> "`temp2`"
@@ -1710,14 +1712,14 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[NoSuchFunctionException] {
           catalog.lookupFunction(FunctionIdentifier("func1"), arguments)
         },
-        errorClass = "ROUTINE_NOT_FOUND",
+        condition = "ROUTINE_NOT_FOUND",
         parameters = Map("routineName" -> "`default`.`func1`")
       )
       checkError(
         exception = intercept[NoSuchTempFunctionException] {
           catalog.dropTempFunction("func1", ignoreIfNotExists = false)
         },
-        errorClass = "ROUTINE_NOT_FOUND",
+        condition = "ROUTINE_NOT_FOUND",
         parameters = Map("routineName" -> "`func1`")
       )
       catalog.dropTempFunction("func1", ignoreIfNotExists = true)
@@ -1726,7 +1728,7 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
         exception = intercept[NoSuchTempFunctionException] {
           catalog.dropTempFunction("func2", ignoreIfNotExists = false)
         },
-        errorClass = "ROUTINE_NOT_FOUND",
+        condition = "ROUTINE_NOT_FOUND",
         parameters = Map("routineName" -> "`func2`")
       )
     }
@@ -1881,7 +1883,8 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
     conf.setConf(StaticSQLConf.METADATA_CACHE_TTL_SECONDS, 1L)
 
     withConfAndEmptyCatalog(conf) { catalog =>
-      val table = QualifiedTableName(catalog.getCurrentDatabase, "test")
+      val table = QualifiedTableName(
+        CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "test")
 
       // First, make sure the test table is not cached.
       assert(catalog.getCachedTable(table) === null)
@@ -1900,13 +1903,14 @@ abstract class SessionCatalogSuite extends AnalysisTest with Eventually {
   test("SPARK-34197: refreshTable should not invalidate the relation cache for temporary views") {
     withBasicCatalog { catalog =>
       createTempView(catalog, "tbl1", Range(1, 10, 1, 10), false)
-      val qualifiedName1 = QualifiedTableName("default", "tbl1")
+      val qualifiedName1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "tbl1")
       catalog.cacheTable(qualifiedName1, Range(1, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl1"))
       assert(catalog.getCachedTable(qualifiedName1) != null)
 
       createGlobalTempView(catalog, "tbl2", Range(2, 10, 1, 10), false)
-      val qualifiedName2 = QualifiedTableName(catalog.globalTempDatabase, "tbl2")
+      val qualifiedName2 =
+        QualifiedTableName(SESSION_CATALOG_NAME, catalog.globalTempDatabase, "tbl2")
       catalog.cacheTable(qualifiedName2, Range(2, 10, 1, 10))
       catalog.refreshTable(TableIdentifier("tbl2", Some(catalog.globalTempDatabase)))
       assert(catalog.getCachedTable(qualifiedName2) != null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
index 7d9015e566a8c..f3817e4dd1a8b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/CSVExprUtilsSuite.scala
@@ -38,7 +38,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException]{
         CSVExprUtils.toDelimiterStr(null)
       },
-      errorClass = "INVALID_DELIMITER_VALUE.NULL_VALUE",
+      condition = "INVALID_DELIMITER_VALUE.NULL_VALUE",
       parameters = Map.empty)
   }
 
@@ -47,7 +47,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException]{
         CSVExprUtils.toChar("ab")
       },
-      errorClass = "INVALID_DELIMITER_VALUE.DELIMITER_LONGER_THAN_EXPECTED",
+      condition = "INVALID_DELIMITER_VALUE.DELIMITER_LONGER_THAN_EXPECTED",
       parameters = Map("str" -> "ab"))
   }
 
@@ -56,7 +56,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException]{
         CSVExprUtils.toChar("""\1""")
       },
-      errorClass = "INVALID_DELIMITER_VALUE.UNSUPPORTED_SPECIAL_CHARACTER",
+      condition = "INVALID_DELIMITER_VALUE.UNSUPPORTED_SPECIAL_CHARACTER",
       parameters = Map("str" -> """\1"""))
   }
 
@@ -65,7 +65,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException]{
         CSVExprUtils.toChar("""\""")
       },
-      errorClass = "INVALID_DELIMITER_VALUE.SINGLE_BACKSLASH",
+      condition = "INVALID_DELIMITER_VALUE.SINGLE_BACKSLASH",
       parameters = Map.empty)
   }
 
@@ -74,7 +74,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException]{
         CSVExprUtils.toChar("")
       },
-      errorClass = "INVALID_DELIMITER_VALUE.EMPTY_STRING",
+      condition = "INVALID_DELIMITER_VALUE.EMPTY_STRING",
       parameters = Map.empty)
   }
 
@@ -106,7 +106,7 @@ class CSVExprUtilsSuite extends SparkFunSuite {
       } catch {
         case e: SparkIllegalArgumentException =>
           assert(separatorStr.isEmpty)
-          assert(e.getErrorClass === expectedErrorClass.get)
+          assert(e.getCondition === expectedErrorClass.get)
       }
     }
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
index cbc98d2f23020..7974bf68bdd31 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/csv/UnivocityParserSuite.scala
@@ -23,12 +23,12 @@ import java.util.{Locale, TimeZone}
 
 import org.apache.commons.lang3.time.FastDateFormat
 
-import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
+import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.util.{BadRecordException, DateTimeUtils}
 import org.apache.spark.sql.catalyst.util.DateTimeConstants._
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
-import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.sources.{EqualTo, Filter, StringStartsWith}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
@@ -308,7 +308,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         check(filters = Seq(EqualTo("invalid attr", 1)), expected = None)
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`invalid attr`", "fields" -> "`i`"))
 
     checkError(
@@ -319,10 +319,45 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
           filters = Seq(EqualTo("i", 1)),
           expected = Some(InternalRow.empty))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`i`", "fields" -> ""))
   }
 
+  test("Bad records test in permissive mode") {
+    def checkBadRecord(
+      input: String = "1,a",
+      dataSchema: StructType = StructType.fromDDL("i INTEGER, s STRING, d DOUBLE"),
+      requiredSchema: StructType = StructType.fromDDL("i INTEGER, s STRING"),
+      options: Map[String, String] = Map("mode" -> "PERMISSIVE")): BadRecordException = {
+      val csvOptions = new CSVOptions(options, false, "UTC")
+      val parser = new UnivocityParser(dataSchema, requiredSchema, csvOptions, Seq())
+      intercept[BadRecordException] {
+        parser.parse(input)
+      }
+    }
+
+    // Bad record exception caused by conversion error
+    checkBadRecord(input = "1.5,a,10.3")
+
+    // Bad record exception caused by insufficient number of columns
+    checkBadRecord(input = "2")
+  }
+
+  test("Array index out of bounds when parsing CSV with more columns than expected") {
+    val input = "1,string,3.14,5,7"
+    val dataSchema: StructType = StructType.fromDDL("i INTEGER, a STRING")
+    val requiredSchema: StructType = StructType.fromDDL("i INTEGER, a STRING")
+    val options = new CSVOptions(Map("maxColumns" -> "2"), false, "UTC")
+    val filters = Seq()
+    val parser = new UnivocityParser(dataSchema, requiredSchema, options, filters)
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        parser.parse(input)
+      },
+      condition = "MALFORMED_CSV_RECORD",
+      parameters = Map("badRecord" -> "1,string,3.14,5,7"))
+  }
+
   test("SPARK-30960: parse date/timestamp string with legacy format") {
     def check(parser: UnivocityParser): Unit = {
       // The legacy format allows 1 or 2 chars for some fields.
@@ -374,7 +409,7 @@ class UnivocityParserSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         check(new UnivocityParser(StructType(Seq.empty), optionsWithPattern(false)))
       },
-      errorClass = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
+      condition = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
       parameters = Map(
         "c" -> "n",
         "pattern" -> "invalid"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
index e852b474aa18c..b7309923ac206 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderErrorMessageSuite.scala
@@ -55,7 +55,7 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
     checkError(
       exception = intercept[
         SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable1]()),
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
         "docroot" -> SPARK_DOC_ROOT)
@@ -64,7 +64,7 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
     checkError(
       exception = intercept[
         SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable2]()),
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
         "docroot" -> SPARK_DOC_ROOT)
@@ -73,7 +73,7 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
     checkError(
       exception = intercept[
         SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable3]()),
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
         "docroot" -> SPARK_DOC_ROOT)
@@ -82,7 +82,7 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
     checkError(
       exception = intercept[
         SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable4]()),
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
         "docroot" -> SPARK_DOC_ROOT)
@@ -91,7 +91,7 @@ class EncoderErrorMessageSuite extends SparkFunSuite {
     checkError(
       exception = intercept[
         SparkUnsupportedOperationException](ExpressionEncoder[ComplexNonEncodable5]()),
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "org.apache.spark.sql.catalyst.encoders.NonEncodable",
         "docroot" -> SPARK_DOC_ROOT)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
index e29609c741633..6bd5b457ea24e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/EncoderResolutionSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.encoders
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.SparkRuntimeException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.{AnalysisException, Encoders}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
@@ -71,9 +71,9 @@ class EncoderResolutionSuite extends PlanTest {
   }
 
   test("real type doesn't match encoder schema but they are compatible: tupled encoder") {
-    val encoder = ExpressionEncoder.tuple(
-      ExpressionEncoder[StringLongClass](),
-      ExpressionEncoder[Long]())
+    val encoder = encoderFor(Encoders.tuple(
+      Encoders.product[StringLongClass],
+      Encoders.scalaLong))
     val attrs = Seq($"a".struct($"a".string, $"b".byte), $"b".int)
     testFromRow(encoder, attrs, InternalRow(InternalRow(str, 1.toByte), 2))
   }
@@ -90,7 +90,7 @@ class EncoderResolutionSuite extends PlanTest {
     val attrs = Seq($"arr".array(StringType))
     checkError(
       exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map("expression" -> "array element",
         "sourceType" -> "\"STRING\"", "targetType" -> "\"BIGINT\"",
         "details" -> (
@@ -125,7 +125,7 @@ class EncoderResolutionSuite extends PlanTest {
     val attrs = Seq($"arr".int)
     checkError(
       exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-      errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+      condition = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
       parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
   }
 
@@ -134,7 +134,7 @@ class EncoderResolutionSuite extends PlanTest {
     val attrs = Seq($"arr".array(new StructType().add("c", "int")))
     checkError(
       exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`a`", "fields" -> "`c`"))
   }
 
@@ -145,7 +145,7 @@ class EncoderResolutionSuite extends PlanTest {
       val attrs = Seq($"nestedArr".array(new StructType().add("arr", "int")))
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+        condition = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
         parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
     }
 
@@ -154,7 +154,7 @@ class EncoderResolutionSuite extends PlanTest {
         .add("arr", ArrayType(new StructType().add("c", "int")))))
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`a`", "fields" -> "`c`"))
     }
   }
@@ -173,7 +173,7 @@ class EncoderResolutionSuite extends PlanTest {
     val exception = intercept[SparkRuntimeException] {
       fromRow(InternalRow(new GenericArrayData(Array(1, null))))
     }
-    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+    assert(exception.getCondition == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("the real number of fields doesn't match encoder schema: tuple encoder") {
@@ -183,7 +183,7 @@ class EncoderResolutionSuite extends PlanTest {
       val attrs = Seq($"a".string, $"b".long, $"c".int)
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
         parameters = Map("schema" -> "\"STRUCT<a: STRING, b: BIGINT, c: INT>\"",
           "ordinal" -> "2"))
     }
@@ -192,7 +192,7 @@ class EncoderResolutionSuite extends PlanTest {
       val attrs = Seq($"a".string)
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
         parameters = Map("schema" -> "\"STRUCT<a: STRING>\"",
           "ordinal" -> "2"))
     }
@@ -205,7 +205,7 @@ class EncoderResolutionSuite extends PlanTest {
       val attrs = Seq($"a".string, $"b".struct($"x".long, $"y".string, $"z".int))
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
         parameters = Map("schema" -> "\"STRUCT<x: BIGINT, y: STRING, z: INT>\"",
           "ordinal" -> "2"))
     }
@@ -214,7 +214,7 @@ class EncoderResolutionSuite extends PlanTest {
       val attrs = Seq($"a".string, $"b".struct($"x".long))
       checkError(
         exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-        errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+        condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
         parameters = Map("schema" -> "\"STRUCT<x: BIGINT>\"",
           "ordinal" -> "2"))
     }
@@ -233,7 +233,7 @@ class EncoderResolutionSuite extends PlanTest {
       .foreach { attr =>
         val attrs = Seq(attr)
         checkError(exception = intercept[AnalysisException](encoder.resolveAndBind(attrs)),
-          errorClass = "CANNOT_UP_CAST_DATATYPE",
+          condition = "CANNOT_UP_CAST_DATATYPE",
           parameters = Map("expression" -> "a",
             "sourceType" -> ("\"" + attr.dataType.sql + "\""), "targetType" -> "\"STRING\"",
             "details" -> (
@@ -250,7 +250,7 @@ class EncoderResolutionSuite extends PlanTest {
       ExpressionEncoder[StringIntClass]().resolveAndBind(Seq($"a".string, $"b".long))
     }
     checkError(exception = e1,
-      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map("expression" -> "b",
         "sourceType" -> ("\"BIGINT\""), "targetType" -> "\"INT\"",
         "details" -> (
@@ -267,7 +267,7 @@ class EncoderResolutionSuite extends PlanTest {
     }
 
     checkError(exception = e2,
-      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map("expression" -> "b.`b`",
         "sourceType" -> ("\"DECIMAL(38,18)\""), "targetType" -> "\"BIGINT\"",
         "details" -> (
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
index f46c02326e8b1..77c9672fd9574 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/ExpressionEncoderSuite.scala
@@ -22,6 +22,7 @@ import java.sql.{Date, Timestamp}
 import java.util.Arrays
 
 import scala.collection.mutable.ArrayBuffer
+import scala.reflect.classTag
 import scala.reflect.runtime.universe.TypeTag
 
 import org.apache.spark.{SPARK_DOC_ROOT, SparkArithmeticException, SparkRuntimeException, SparkUnsupportedOperationException}
@@ -29,6 +30,7 @@ import org.apache.spark.sql.{Encoder, Encoders, Row}
 import org.apache.spark.sql.catalyst.{FooClassWithEnum, FooEnum, OptionalData, PrimitiveData, ScroogeLikeExample}
 import org.apache.spark.sql.catalyst.analysis.AnalysisTest
 import org.apache.spark.sql.catalyst.dsl.plans._
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{BinaryEncoder, TransformingEncoder}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, NaNvl}
 import org.apache.spark.sql.catalyst.plans.CodegenInterpretedPlanTest
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
@@ -319,29 +321,29 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   encodeDecodeTest(
     1 -> 10L,
     "tuple with 2 flat encoders")(
-    ExpressionEncoder.tuple(ExpressionEncoder[Int](), ExpressionEncoder[Long]()))
+    encoderFor(Encoders.tuple(Encoders.scalaInt, Encoders.scalaLong)))
 
   encodeDecodeTest(
     (PrimitiveData(1, 1, 1, 1, 1, 1, true), (3, 30L)),
     "tuple with 2 product encoders")(
-    ExpressionEncoder.tuple(ExpressionEncoder[PrimitiveData](), ExpressionEncoder[(Int, Long)]()))
+    encoderFor(Encoders.tuple(Encoders.product[PrimitiveData], Encoders.product[(Int, Long)])))
 
   encodeDecodeTest(
     (PrimitiveData(1, 1, 1, 1, 1, 1, true), 3),
     "tuple with flat encoder and product encoder")(
-    ExpressionEncoder.tuple(ExpressionEncoder[PrimitiveData](), ExpressionEncoder[Int]()))
+    encoderFor(Encoders.tuple(Encoders.product[PrimitiveData], Encoders.scalaInt)))
 
   encodeDecodeTest(
     (3, PrimitiveData(1, 1, 1, 1, 1, 1, true)),
     "tuple with product encoder and flat encoder")(
-    ExpressionEncoder.tuple(ExpressionEncoder[Int](), ExpressionEncoder[PrimitiveData]()))
+    encoderFor(Encoders.tuple(Encoders.scalaInt, Encoders.product[PrimitiveData])))
 
   encodeDecodeTest(
     (1, (10, 100L)),
     "nested tuple encoder") {
-    val intEnc = ExpressionEncoder[Int]()
-    val longEnc = ExpressionEncoder[Long]()
-    ExpressionEncoder.tuple(intEnc, ExpressionEncoder.tuple(intEnc, longEnc))
+    val intEnc = Encoders.scalaInt
+    val longEnc = Encoders.scalaLong
+    encoderFor(Encoders.tuple(intEnc, Encoders.tuple(intEnc, longEnc)))
   }
 
   // test for value classes
@@ -433,7 +435,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
       implicitly[ExpressionEncoder[Foo]])
     checkError(
       exception = exception,
-      errorClass = "ENCODER_NOT_FOUND",
+      condition = "ENCODER_NOT_FOUND",
       parameters = Map(
         "typeName" -> "Any",
         "docroot" -> SPARK_DOC_ROOT)
@@ -466,9 +468,8 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
 
     // test for tupled encoders
     {
-      val schema = ExpressionEncoder.tuple(
-        ExpressionEncoder[Int](),
-        ExpressionEncoder[(String, Int)]()).schema
+      val encoder = encoderFor(Encoders.tuple(Encoders.scalaInt, Encoders.product[(String, Int)]))
+      val schema = encoder.schema
       assert(schema(0).nullable === false)
       assert(schema(1).nullable)
       assert(schema(1).dataType.asInstanceOf[StructType](0).nullable)
@@ -494,7 +495,7 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     assert(e.getCause.isInstanceOf[SparkRuntimeException])
     checkError(
       exception = e.getCause.asInstanceOf[SparkRuntimeException],
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
   }
@@ -505,19 +506,30 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     assert(e.getCause.isInstanceOf[SparkRuntimeException])
     checkError(
       exception = e.getCause.asInstanceOf[SparkRuntimeException],
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
   }
 
   test("throw exception for tuples with more than 22 elements") {
-    val encoders = (0 to 22).map(_ => Encoders.scalaInt.asInstanceOf[ExpressionEncoder[_]])
+    val encoders = (0 to 22).map(_ => Encoders.scalaInt)
 
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
-        ExpressionEncoder.tuple(encoders)
+        Encoders.tupleEncoder(encoders: _*)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2150",
+      condition = "TUPLE_SIZE_EXCEEDS_LIMIT",
+      parameters = Map.empty)
+  }
+
+  test("throw exception for empty tuple") {
+    val encoders = Seq.empty[Encoder[Int]]
+
+    checkError(
+      exception = intercept[SparkUnsupportedOperationException] {
+        Encoders.tupleEncoder(encoders: _*)
+      },
+      condition = "TUPLE_IS_EMPTY",
       parameters = Map.empty)
   }
 
@@ -529,11 +541,11 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
     val encoder = ExpressionEncoder(schema, lenient = true)
     val unexpectedSerializer = NaNvl(encoder.objSerializer, encoder.objSerializer)
     val exception = intercept[org.apache.spark.SparkRuntimeException] {
-      new ExpressionEncoder[Row](unexpectedSerializer, encoder.objDeserializer, encoder.clsTag)
+      new ExpressionEncoder[Row](encoder.encoder, unexpectedSerializer, encoder.objDeserializer)
     }
     checkError(
       exception = exception,
-      errorClass = "UNEXPECTED_SERIALIZER_FOR_CLASS",
+      condition = "UNEXPECTED_SERIALIZER_FOR_CLASS",
       parameters = Map(
         "className" -> Utils.getSimpleName(encoder.clsTag.runtimeClass),
         "expr" -> toSQLExpr(unexpectedSerializer))
@@ -550,6 +562,24 @@ class ExpressionEncoderSuite extends CodegenInterpretedPlanTest with AnalysisTes
   encodeDecodeTest(FooClassWithEnum(1, FooEnum.E1), "case class with Int and scala Enum")
   encodeDecodeTest(FooEnum.E1, "scala Enum")
 
+
+  private def testTransformingEncoder(
+      name: String,
+      provider: () => Codec[Any, Array[Byte]]): Unit = test(name) {
+    val encoder = ExpressionEncoder(TransformingEncoder(
+      classTag[(Long, Long)],
+      BinaryEncoder,
+      provider))
+      .resolveAndBind()
+    assert(encoder.schema == new StructType().add("value", BinaryType))
+    val toRow = encoder.createSerializer()
+    val fromRow = encoder.createDeserializer()
+    assert(fromRow(toRow((11, 14))) == (11, 14))
+  }
+
+  testTransformingEncoder("transforming java serialization encoder", JavaSerializationCodec)
+  testTransformingEncoder("transforming kryo encoder", KryoSerializationCodec)
+
   // Scala / Java big decimals ----------------------------------------------------------
 
   encodeDecodeTest(BigDecimal(("9" * 20) + "." + "9" * 18),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
index 01a3daa77d38d..79c6d07d6d218 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/encoders/RowEncoderSuite.scala
@@ -279,7 +279,7 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
     // Check the error class only since the parameters may change depending on how we are running
     // this test case.
     val exception = intercept[SparkRuntimeException](toRow(encoder, null))
-    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+    assert(exception.getCondition == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("RowEncoder should validate external type") {
@@ -288,14 +288,16 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       val encoder = ExpressionEncoder(schema)
       toRow(encoder, Row(1.toShort))
     }
-    assert(e1.getCause.getMessage.contains("java.lang.Short is not a valid external type"))
+    assert(e1.getCause.getMessage.contains("The external type java.lang.Short " +
+      "is not valid for the type \"INT\""))
 
     val e2 = intercept[RuntimeException] {
       val schema = new StructType().add("a", StringType)
       val encoder = ExpressionEncoder(schema)
       toRow(encoder, Row(1))
     }
-    assert(e2.getCause.getMessage.contains("java.lang.Integer is not a valid external type"))
+    assert(e2.getCause.getMessage.contains("The external type java.lang.Integer " +
+      "is not valid for the type \"STRING\""))
 
     val e3 = intercept[RuntimeException] {
       val schema = new StructType().add("a",
@@ -303,14 +305,16 @@ class RowEncoderSuite extends CodegenInterpretedPlanTest {
       val encoder = ExpressionEncoder(schema)
       toRow(encoder, Row(1 -> "a"))
     }
-    assert(e3.getCause.getMessage.contains("scala.Tuple2 is not a valid external type"))
+    assert(e3.getCause.getMessage.contains("The external type scala.Tuple2 is not valid " +
+      "for the type \"STRUCT<b: INT, c: STRING>\""))
 
     val e4 = intercept[RuntimeException] {
       val schema = new StructType().add("a", ArrayType(TimestampType))
       val encoder = ExpressionEncoder(schema)
       toRow(encoder, Row(Array("a")))
     }
-    assert(e4.getCause.getMessage.contains("java.lang.String is not a valid external type"))
+    assert(e4.getCause.getMessage.contains("The external type java.lang.String is not valid " +
+      "for the type \"TIMESTAMP\""))
   }
 
   private def roundTripArray[T](dt: DataType, nullable: Boolean, data: Array[T]): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
index 71fa60b0c0345..29c5bf3b8d2db 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/AttributeResolutionSuite.scala
@@ -69,7 +69,7 @@ class AttributeResolutionSuite extends SparkFunSuite {
       exception = intercept[AnalysisException] {
         attrs.resolve(Seq("a"), resolver)
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`a`",
         "referenceNames" -> "[`ns1`.`ns2`.`t2`.`a`, `ns1`.`t1`.`a`]"
@@ -86,7 +86,7 @@ class AttributeResolutionSuite extends SparkFunSuite {
       exception = intercept[AnalysisException] {
         attrs.resolve(Seq("ns1", "t", "a"), resolver)
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`ns1`.`t`.`a`",
         "referenceNames" -> "[`ns1`.`t`.`a`, `ns2`.`ns1`.`t`.`a`]"
@@ -108,7 +108,7 @@ class AttributeResolutionSuite extends SparkFunSuite {
       exception = intercept[AnalysisException] {
         attrs.resolve(Seq("ns1", "t", "a", "cc"), resolver)
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`cc`", "fields" -> "`aa`, `bb`"))
   }
 
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
similarity index 100%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/BitmapExpressionUtilsSuite.scala
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
index 77fdb33e515fc..995b519bd05d7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CallMethodViaReflectionSuite.scala
@@ -103,7 +103,7 @@ class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelp
       exception = intercept[AnalysisException] {
         CallMethodViaReflection(Seq.empty).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`reflect`",
         "expectedNum" -> "> 1",
@@ -114,7 +114,7 @@ class CallMethodViaReflectionSuite extends SparkFunSuite with ExpressionEvalHelp
       exception = intercept[AnalysisException] {
         CallMethodViaReflection(Seq(Literal(staticClassName))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`reflect`",
         "expectedNum" -> "> 1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
index 67a68fc92a300..e87b54339821f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuiteBase.scala
@@ -1107,8 +1107,10 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       .foreach { interval =>
         checkErrorInExpression[SparkIllegalArgumentException](
           cast(Literal.create(interval), YearMonthIntervalType()),
-          "_LEGACY_ERROR_TEMP_3213",
-          Map("interval" -> "year-month", "msg" -> "integer overflow"))
+          "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+          Map(
+            "interval" -> "year-month",
+            "input" -> interval))
       }
 
     Seq(Byte.MaxValue, Short.MaxValue, Int.MaxValue, Int.MinValue + 1, Int.MinValue)
@@ -1176,9 +1178,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       val dataType = YearMonthIntervalType()
       checkErrorInExpression[SparkIllegalArgumentException](
         cast(Literal.create(interval), dataType),
-        "_LEGACY_ERROR_TEMP_3214",
+        "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
         Map(
-          "fallBackNotice" -> "",
           "typeName" -> "interval year to month",
           "intervalStr" -> "year-month",
           "supportedFormat" -> "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
@@ -1198,9 +1199,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       .foreach { case (interval, dataType) =>
         checkErrorInExpression[SparkIllegalArgumentException](
           cast(Literal.create(interval), dataType),
-          "_LEGACY_ERROR_TEMP_3214",
+          "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
           Map(
-            "fallBackNotice" -> "",
             "typeName" -> dataType.typeName,
             "intervalStr" -> "year-month",
             "supportedFormat" ->
@@ -1322,10 +1322,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       .foreach { case (interval, dataType) =>
         checkErrorInExpression[SparkIllegalArgumentException](
           cast(Literal.create(interval), dataType),
-          "_LEGACY_ERROR_TEMP_3214",
-          Map("fallBackNotice" -> (", set spark.sql.legacy.fromDayTimeString.enabled" +
-            " to true to restore the behavior before Spark 3.0."),
-            "intervalStr" -> "day-time",
+          "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+          Map("intervalStr" -> "day-time",
             "typeName" -> dataType.typeName,
             "input" -> interval,
             "supportedFormat" ->
@@ -1348,10 +1346,8 @@ abstract class CastSuiteBase extends SparkFunSuite with ExpressionEvalHelper {
       .foreach { case (interval, dataType) =>
         checkErrorInExpression[SparkIllegalArgumentException](
           cast(Literal.create(interval), dataType),
-          "_LEGACY_ERROR_TEMP_3214",
-          Map("fallBackNotice" -> (", set spark.sql.legacy.fromDayTimeString.enabled" +
-            " to true to restore the behavior before Spark 3.0."),
-            "intervalStr" -> "day-time",
+          "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+          Map("intervalStr" -> "day-time",
             "typeName" -> dataType.typeName,
             "input" -> interval,
             "supportedFormat" ->
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
index a4651c6c4c7e9..77a3d6df69221 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationExpressionSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CollationFactory, GenericArrayData}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 
@@ -28,14 +28,14 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(collationId == 0)
     val collateExpr = Collate(Literal("abc"), "UTF8_BINARY")
     assert(collateExpr.dataType === StringType(collationId))
-    collateExpr.dataType.asInstanceOf[StringType].collationId == 0
+    assert(collateExpr.dataType.asInstanceOf[StringType].collationId == 0)
     checkEvaluation(collateExpr, "abc")
   }
 
   test("collate against literal") {
     val collateExpr = Collate(Literal("abc"), "UTF8_LCASE")
     val collationId = CollationFactory.collationNameToId("UTF8_LCASE")
-    assert(collateExpr.dataType == StringType(collationId))
+    assert(collateExpr.dataType === StringType(collationId))
     checkEvaluation(collateExpr, "abc")
   }
 
@@ -61,22 +61,22 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   test("collate on non existing collation") {
     checkError(
       exception = intercept[SparkException] { Collate(Literal("abc"), "UTF8_BS") },
-      errorClass = "COLLATION_INVALID_NAME",
+      condition = "COLLATION_INVALID_NAME",
       sqlState = "42704",
       parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
   }
 
   test("collation on non-explicit default collation") {
-    checkEvaluation(Collation(Literal("abc")).replacement, "UTF8_BINARY")
+    checkEvaluation(Collation(Literal("abc")), "UTF8_BINARY")
   }
 
   test("collation on explicitly collated string") {
     checkEvaluation(
       Collation(Literal.create("abc",
-        StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))).replacement,
+        StringType(CollationFactory.UTF8_LCASE_COLLATION_ID))),
       "UTF8_LCASE")
     checkEvaluation(
-      Collation(Collate(Literal("abc"), "UTF8_LCASE")).replacement,
+      Collation(Collate(Literal("abc"), "UTF8_LCASE")),
       "UTF8_LCASE")
   }
 
@@ -95,7 +95,10 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       (Seq("a"), Seq("A"), true, "UTF8_LCASE"),
       (Seq("a", "B"), Seq("A", "b"), true, "UTF8_LCASE"),
       (Seq("a"), Seq("A"), false, "UNICODE"),
-      (Seq("a", "B"), Seq("A", "b"), true, "UNICODE_CI")
+      (Seq("a", "B"), Seq("A", "b"), true, "UNICODE_CI"),
+      (Seq("c"), Seq("C"), false, "SR"),
+      (Seq("c"), Seq("C"), true, "SR_CI"),
+      (Seq("a", "c"), Seq("b", "C"), true, "SR_CI_AI")
     )
     for ((inLeft, inRight, out, collName) <- overlap) {
       val left = arrayLiteral(inLeft, collName)
@@ -165,23 +168,29 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("CollationKey generates correct collation key for collated string") {
+    // In version `75.1`, its value is 0x2A (42), while in version `76.1`, its value is 0x2B (43)
+    val b: Byte = 0x2B
     val testCases = Seq(
       ("", "UTF8_BINARY", UTF8String.fromString("").getBytes),
       ("aa", "UTF8_BINARY", UTF8String.fromString("aa").getBytes),
       ("AA", "UTF8_BINARY", UTF8String.fromString("AA").getBytes),
+      (" AA ", "UTF8_BINARY_RTRIM", UTF8String.fromString(" AA").getBytes),
       ("aA", "UTF8_BINARY", UTF8String.fromString("aA").getBytes),
       ("", "UTF8_LCASE", UTF8String.fromString("").getBytes),
       ("aa", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
       ("AA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
+      (" AA ", "UTF8_LCASE_RTRIM", UTF8String.fromString(" aa").getBytes),
       ("aA", "UTF8_LCASE", UTF8String.fromString("aa").getBytes),
       ("", "UNICODE", Array[Byte](1, 1, 0)),
-      ("aa", "UNICODE", Array[Byte](42, 42, 1, 6, 1, 6, 0)),
-      ("AA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -36, -36, 0)),
-      ("aA", "UNICODE", Array[Byte](42, 42, 1, 6, 1, -59, -36, 0)),
+      ("aa", "UNICODE", Array[Byte](b, b, 1, 6, 1, 6, 0)),
+      ("AA", "UNICODE", Array[Byte](b, b, 1, 6, 1, -36, -36, 0)),
+      ("aA", "UNICODE", Array[Byte](b, b, 1, 6, 1, -59, -36, 0)),
+      ("aa ", "UNICODE_RTRIM", Array[Byte](b, b, 1, 6, 1, 6, 0)),
       ("", "UNICODE_CI", Array[Byte](1, 0)),
-      ("aa", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
-      ("AA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0)),
-      ("aA", "UNICODE_CI", Array[Byte](42, 42, 1, 6, 0))
+      ("aa", "UNICODE_CI", Array[Byte](b, b, 1, 6, 0)),
+      ("aa ", "UNICODE_CI_RTRIM", Array[Byte](b, b, 1, 6, 0)),
+      ("AA", "UNICODE_CI", Array[Byte](b, b, 1, 6, 0)),
+      ("aA", "UNICODE_CI", Array[Byte](b, b, 1, 6, 0))
     )
     for ((input, collation, expected) <- testCases) {
       val str = Literal.create(input, StringType(collation))
@@ -212,8 +221,51 @@ class CollationExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       ("sR_cYRl_sRb", "sr_Cyrl_SRB")
     ).foreach {
       case (collation, normalized) =>
-        checkEvaluation(Collation(Literal.create("abc", StringType(collation))).replacement,
+        checkEvaluation(Collation(Literal.create("abc", StringType(collation))),
           normalized)
     }
   }
+
+  test("InSet") {
+    Seq(
+      ("1", "UTF8_BINARY", Set("1", "2", "3")) -> true,
+      ("aaa", "UTF8_BINARY", Set("b", "c", "Aaa")) -> false,
+      ("a", "UTF8_LCASE", Set("a")) -> true,
+      ("a", "UTF8_LCASE", Set("A", "b")) -> true,
+      ("Belgrade", "UTF8_LCASE", Set()) -> false,
+      ("aBc", "UTF8_LCASE", Set("b", "aa", "xyz")) -> false,
+      ("aBc", "UTF8_LCASE", Set("b", "AbC", null)) -> true,
+      (null, "UTF8_LCASE", Set("b", "AbC", null)) -> null,
+      (" aa", "UTF8_BINARY_RTRIM", Set(" aa")) -> true,
+      (" aa ", "UTF8_BINARY_RTRIM", Set(" aa")) -> true,
+      ("a  ", "UTF8_BINARY_RTRIM", Set()) -> false,
+      ("a  ", "UTF8_BINARY_RTRIM", Set("a", "b", null)) -> true,
+      (null, "UTF8_BINARY_RTRIM", Set("1", "2")) -> null
+    ).foreach { case ((elem, collation, inputSet), result) =>
+      checkEvaluation(
+        InSet(
+          Literal.create(
+            elem,
+            StringType(collation)),
+          inputSet.map(UTF8String.fromString).asInstanceOf[Set[Any]]),
+        result)
+      def arr(s: String): GenericArrayData = new GenericArrayData(Array(UTF8String.fromString(s)))
+      checkEvaluation(
+        InSet(
+          Literal.create(
+            if (elem == null) null else arr(elem),
+            ArrayType(StringType(collation))),
+          inputSet.map(arr).asInstanceOf[Set[Any]]),
+        result)
+      checkEvaluation(
+        InSet(
+          Literal.create(
+            if (elem == null) null
+            else new ArrayBasedMapData(arr(elem), arr("aBc")),
+            MapType(StringType(collation), StringType("UTF8_BINARY"))),
+          inputSet
+            .map(s => new ArrayBasedMapData(arr(s), arr("aBc"))).asInstanceOf[Set[Any]]),
+        result)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
index 6f0d0c13b32a3..2c1244eec3659 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollationRegexpExpressionsSuite.scala
@@ -21,6 +21,7 @@ import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.util.CollationFactory
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -47,7 +48,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
       // ILike
       checkEvaluation(ILike(
         Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
-        Literal.create(t.regexLike, StringType), '\\').replacement, t.expectedILike)
+        Literal.create(t.regexLike, StringType), '\\'), t.expectedILike)
       // RLike
       checkEvaluation(RLike(
         Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
@@ -106,7 +107,7 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
       // RegExpCount
       checkEvaluation(RegExpCount(
         Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
-        Literal.create(t.r, StringType)).replacement, t.expectedCount)
+        Literal.create(t.r, StringType)), t.expectedCount)
       // RegExpInStr
       def expectedInStr(count: Any): Any = count match {
         case null => null
@@ -120,50 +121,84 @@ class CollationRegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalH
   }
 
   test("MultiLikeBase regexp expressions with collated strings") {
-    val nullStr = Literal.create(null, StringType)
-    // Supported collations (StringTypeBinaryLcase)
-    val binaryCollation = StringType(CollationFactory.collationNameToId("UTF8_BINARY"))
-    val lowercaseCollation = StringType(CollationFactory.collationNameToId("UTF8_LCASE"))
     // LikeAll
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%oo"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAll("%foo%", "%bar%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%oo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", "%bar%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%foo%", nullStr), null)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAll("%feo%", nullStr), false)
-    checkEvaluation(Literal.create(null, binaryCollation).likeAll("%foo%", "%oo"), null)
+    case class LikeAllTestCase[R](l: String, p1: String, p2: String, collation: String,
+      expectedLikeAll: R)
+    val likeAllTestCases = Seq(
+      LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+      LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+      LikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", true),
+      LikeAllTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", false),
+      LikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", true),
+      LikeAllTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", false),
+      LikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+      LikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", false),
+      LikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    likeAllTestCases.foreach(t => {
+      checkEvaluation(LikeAll(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+          Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedLikeAll)
+    })
+
     // NotLikeAll
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%foo%", "%oo"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAll("%goo%", "%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", "%oo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%goo%", "%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%foo%", nullStr), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAll("%feo%", nullStr), null)
-    checkEvaluation(Literal.create(null, binaryCollation).notLikeAll("%foo%", "%oo"), null)
+    case class NotLikeAllTestCase[R](l: String, p1: String, p2: String, collation: String,
+      expectedNotLikeAll: R)
+    val notLikeAllTestCases = Seq(
+      NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+      NotLikeAllTestCase("Foo", "%foo%", "%oo", "UTF8_LCASE", false),
+      NotLikeAllTestCase("Foo", "%goo%", "%bar%", "UTF8_LCASE", true),
+      NotLikeAllTestCase("foo", "%foo%", "%oo", "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%goo%", "%bar%", "UTF8_BINARY", true),
+      NotLikeAllTestCase("foo", "%foo%", null, "UTF8_BINARY", false),
+      NotLikeAllTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+      NotLikeAllTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    notLikeAllTestCases.foreach(t => {
+      checkEvaluation(NotLikeAll(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedNotLikeAll)
+    })
+
     // LikeAny
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%goo%", "%hoo"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).likeAny("%foo%", "%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%goo%", "%hoo"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", "%bar%"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%foo%", nullStr), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).likeAny("%feo%", nullStr), null)
-    checkEvaluation(Literal.create(null, binaryCollation).likeAny("%foo%", "%oo"), null)
+    case class LikeAnyTestCase[R](l: String, p1: String, p2: String, collation: String,
+      expectedLikeAny: R)
+    val likeAnyTestCases = Seq(
+      LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+      LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+      LikeAnyTestCase("Foo", "%goo%", "%hoo", "UTF8_LCASE", false),
+      LikeAnyTestCase("Foo", "%foo%", "%bar%", "UTF8_LCASE", true),
+      LikeAnyTestCase("foo", "%goo%", "%hoo", "UTF8_BINARY", false),
+      LikeAnyTestCase("foo", "%foo%", "%bar%", "UTF8_BINARY", true),
+      LikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", true),
+      LikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", null),
+      LikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    likeAnyTestCases.foreach(t => {
+      checkEvaluation(LikeAny(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedLikeAny)
+    })
+
     // NotLikeAny
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%hoo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%Foo%", "%hoo"), true)
-    checkEvaluation(Literal.create("Foo", lowercaseCollation).notLikeAny("%foo%", "%oo%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%Foo%", "%hoo"), true)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", "%oo%"), false)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%foo%", nullStr), null)
-    checkEvaluation(Literal.create("foo", binaryCollation).notLikeAny("%feo%", nullStr), true)
-    checkEvaluation(Literal.create(null, binaryCollation).notLikeAny("%foo%", "%oo"), null)
+    case class NotLikeAnyTestCase[R](l: String, p1: String, p2: String, collation: String,
+      expectedNotLikeAny: R)
+    val notLikeAnyTestCases = Seq(
+      NotLikeAnyTestCase("foo", "%foo%", "%hoo", "UTF8_BINARY", true),
+      NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+      NotLikeAnyTestCase("Foo", "%Foo%", "%hoo", "UTF8_LCASE", true),
+      NotLikeAnyTestCase("Foo", "%foo%", "%oo%", "UTF8_LCASE", false),
+      NotLikeAnyTestCase("foo", "%Foo%", "%hoo", "UTF8_BINARY", true),
+      NotLikeAnyTestCase("foo", "%foo%", "%oo%", "UTF8_BINARY", false),
+      NotLikeAnyTestCase("foo", "%foo%", null, "UTF8_BINARY", null),
+      NotLikeAnyTestCase("foo", "%feo%", null, "UTF8_BINARY", true),
+      NotLikeAnyTestCase(null, "%foo%", "%oo", "UTF8_BINARY", null)
+    )
+    notLikeAnyTestCases.foreach(t => {
+      checkEvaluation(NotLikeAny(
+        Literal.create(t.l, StringType(CollationFactory.collationNameToId(t.collation))),
+        Seq(UTF8String.fromString(t.p1), UTF8String.fromString(t.p2))), t.expectedNotLikeAny)
+    })
   }
-
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
index d14118eb3f1d2..1907ec7c23aa6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
@@ -95,7 +95,7 @@ class CollectionExpressionsSuite
     }
     checkError(
       exception = exception,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> ("The size function doesn't support the operand type " +
           toSQLType(StringType))
@@ -135,6 +135,265 @@ class CollectionExpressionsSuite
     checkEvaluation(ArrayContains(MapKeys(m1), Literal("a")), null)
   }
 
+  test("ArrayBinarySearch") {
+    // primitive type: boolean、byte、short、int、long、float、double
+    // boolean
+    // boolean foldable
+    val a0_0 = Literal.create(Seq(false, true),
+      ArrayType(BooleanType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a0_0, Literal(true)), 1)
+    val a0_1 = Literal.create(Seq(null, false, true), ArrayType(BooleanType))
+    checkEvaluation(ArrayBinarySearch(a0_1, Literal(false)), 1)
+    val a0_2 = Literal.create(Seq(null, false, true), ArrayType(BooleanType))
+    checkEvaluation(ArrayBinarySearch(a0_2, Literal(null, BooleanType)), null)
+    val a0_3 = CreateArray(Seq(Literal(false), Literal(true)))
+    checkEvaluation(ArrayBinarySearch(a0_3, Literal(true)), 1)
+    val a0_4 = CreateArray(Seq(Literal(null, BooleanType), Literal(false), Literal(true)))
+    checkEvaluation(ArrayBinarySearch(a0_4, Literal(false)), 1)
+    val a0_5 = CreateArray(Seq(Literal(null, BooleanType), Literal(false), Literal(true)))
+    checkEvaluation(ArrayBinarySearch(a0_5, Literal(null, BooleanType)), null)
+    // boolean non-foldable
+    val a0_6 = NonFoldableLiteral.create(Seq(false, true),
+      ArrayType(BooleanType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a0_6, Literal(true)), 1)
+    val a0_7 = NonFoldableLiteral.create(Seq(null, false, true), ArrayType(BooleanType))
+    checkEvaluation(ArrayBinarySearch(a0_7, Literal(false)), 1)
+    val a0_8 = NonFoldableLiteral.create(Seq(null, false, true), ArrayType(BooleanType))
+    checkEvaluation(ArrayBinarySearch(a0_8, Literal(null, BooleanType)), null)
+
+    // byte
+    // byte foldable
+    val a1_0 = Literal.create(Seq(1.toByte, 2.toByte, 3.toByte),
+      ArrayType(ByteType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a1_0, Literal(3.toByte)), 2)
+    val a1_1 = Literal.create(Seq(null, 1.toByte, 2.toByte, 3.toByte), ArrayType(ByteType))
+    checkEvaluation(ArrayBinarySearch(a1_1, Literal(1.toByte)), 1)
+    val a1_2 = Literal.create(Seq(null, 1.toByte, 2.toByte, 3.toByte), ArrayType(ByteType))
+    checkEvaluation(ArrayBinarySearch(a1_2, Literal(null, ByteType)), null)
+    val a1_3 = Literal.create(Seq(1.toByte, 3.toByte, 4.toByte),
+      ArrayType(ByteType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a1_3, Literal(2.toByte, ByteType)), -2)
+    val a1_4 = CreateArray(Seq(Literal(1.toByte), Literal(2.toByte), Literal(3.toByte)))
+    checkEvaluation(ArrayBinarySearch(a1_4, Literal(3.toByte)), 2)
+    val a1_5 = CreateArray(Seq(Literal(null, ByteType),
+      Literal(1.toByte), Literal(2.toByte), Literal(3.toByte)))
+    checkEvaluation(ArrayBinarySearch(a1_5, Literal(1.toByte)), 1)
+    val a1_6 = CreateArray(Seq(Literal(null, ByteType),
+      Literal(1.toByte), Literal(2.toByte), Literal(3.toByte)))
+    checkEvaluation(ArrayBinarySearch(a1_6, Literal(null, ByteType)), null)
+    val a1_7 = CreateArray(Seq(Literal(1.toByte), Literal(3.toByte), Literal(4.toByte)))
+    checkEvaluation(ArrayBinarySearch(a1_7, Literal(2.toByte, ByteType)), -2)
+    // byte non-foldable
+    val a1_8 = NonFoldableLiteral.create(Seq(1.toByte, 2.toByte, 3.toByte),
+      ArrayType(ByteType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a1_8, Literal(3.toByte)), 2)
+    val a1_9 = NonFoldableLiteral.create(Seq(null, 1.toByte, 2.toByte, 3.toByte),
+      ArrayType(ByteType))
+    checkEvaluation(ArrayBinarySearch(a1_9, Literal(1.toByte)), 1)
+    val a1_10 = NonFoldableLiteral.create(Seq(null, 1.toByte, 2.toByte, 3.toByte),
+      ArrayType(ByteType))
+    checkEvaluation(ArrayBinarySearch(a1_10, Literal(null, ByteType)), null)
+    val a1_11 = NonFoldableLiteral.create(Seq(1.toByte, 3.toByte, 4.toByte),
+      ArrayType(ByteType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a1_11, Literal(2.toByte, ByteType)), -2)
+
+    // short
+    // short foldable
+    val a2_0 = Literal.create(Seq(1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a2_0, Literal(1.toShort)), 0)
+    val a2_1 = Literal.create(Seq(null, 1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType))
+    checkEvaluation(ArrayBinarySearch(a2_1, Literal(2.toShort)), 2)
+    val a2_2 = Literal.create(Seq(null, 1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType))
+    checkEvaluation(ArrayBinarySearch(a2_2, Literal(null, ShortType)), null)
+    val a2_3 = Literal.create(Seq(1.toShort, 3.toShort, 4.toShort),
+      ArrayType(ShortType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a2_3, Literal(2.toShort, ShortType)), -2)
+    val a2_4 = CreateArray(Seq(Literal(1.toShort), Literal(2.toShort), Literal(3.toShort)))
+    checkEvaluation(ArrayBinarySearch(a2_4, Literal(1.toShort)), 0)
+    val a2_5 = CreateArray(Seq(Literal(null, ShortType),
+      Literal(1.toShort), Literal(2.toShort), Literal(3.toShort)))
+    checkEvaluation(ArrayBinarySearch(a2_5, Literal(2.toShort)), 2)
+    val a2_6 = CreateArray(Seq(Literal(null, ShortType),
+      Literal(1.toShort), Literal(2.toShort), Literal(3.toShort)))
+    checkEvaluation(ArrayBinarySearch(a2_6, Literal(null, ShortType)), null)
+    val a2_7 = CreateArray(Seq(Literal(1.toShort), Literal(3.toShort), Literal(4.toShort)))
+    checkEvaluation(ArrayBinarySearch(a2_7, Literal(2.toShort, ShortType)), -2)
+    // short non-foldable
+    val a2_8 = NonFoldableLiteral.create(Seq(1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a2_8, Literal(1.toShort)), 0)
+    val a2_9 = NonFoldableLiteral.create(Seq(null, 1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType))
+    checkEvaluation(ArrayBinarySearch(a2_9, Literal(2.toShort)), 2)
+    val a2_10 = NonFoldableLiteral.create(Seq(null, 1.toShort, 2.toShort, 3.toShort),
+      ArrayType(ShortType))
+    checkEvaluation(ArrayBinarySearch(a2_10, Literal(null, ShortType)), null)
+    val a2_11 = NonFoldableLiteral.create(Seq(1.toShort, 3.toShort, 4.toShort),
+      ArrayType(ShortType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a2_11, Literal(2.toShort, ShortType)), -2)
+
+    // int
+    // int foldable
+    val a3_0 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a3_0, Literal(2)), 1)
+    val a3_1 = Literal.create(Seq(null, 1, 2, 3), ArrayType(IntegerType))
+    checkEvaluation(ArrayBinarySearch(a3_1, Literal(2)), 2)
+    val a3_2 = Literal.create(Seq(null, 1, 2, 3), ArrayType(IntegerType))
+    checkEvaluation(ArrayBinarySearch(a3_2, Literal(null, IntegerType)), null)
+    val a3_3 = Literal.create(Seq(1, 3, 4), ArrayType(IntegerType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a3_3, Literal(2, IntegerType)), -2)
+    val a3_4 = CreateArray(Seq(Literal(1), Literal(2), Literal(3)))
+    checkEvaluation(ArrayBinarySearch(a3_4, Literal(2)), 1)
+    val a3_5 = CreateArray(Seq(Literal(null, IntegerType), Literal(1), Literal(2), Literal(3)))
+    checkEvaluation(ArrayBinarySearch(a3_5, Literal(2)), 2)
+    val a3_6 = CreateArray(Seq(Literal(null, IntegerType), Literal(1), Literal(2), Literal(3)))
+    checkEvaluation(ArrayBinarySearch(a3_6, Literal(null, IntegerType)), null)
+    val a3_7 = CreateArray(Seq(Literal(1), Literal(3), Literal(4)))
+    checkEvaluation(ArrayBinarySearch(a3_7, Literal(2, IntegerType)), -2)
+    // int non-foldable
+    val a3_8 = NonFoldableLiteral.create(Seq(1, 2, 3),
+      ArrayType(IntegerType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a3_8, Literal(2)), 1)
+    val a3_9 = NonFoldableLiteral.create(Seq(null, 1, 2, 3), ArrayType(IntegerType))
+    checkEvaluation(ArrayBinarySearch(a3_9, Literal(2)), 2)
+    val a3_10 = NonFoldableLiteral.create(Seq(null, 1, 2, 3), ArrayType(IntegerType))
+    checkEvaluation(ArrayBinarySearch(a3_10, Literal(null, IntegerType)), null)
+    val a3_11 = NonFoldableLiteral.create(Seq(1, 3, 4),
+      ArrayType(IntegerType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a3_11, Literal(2, IntegerType)), -2)
+
+    // long
+    // long foldable
+    val a4_0 = Literal.create(Seq(1L, 2L, 3L), ArrayType(LongType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a4_0, Literal(2L)), 1)
+    val a4_1 = Literal.create(Seq(null, 1L, 2L, 3L), ArrayType(LongType))
+    checkEvaluation(ArrayBinarySearch(a4_1, Literal(2L)), 2)
+    val a4_2 = Literal.create(Seq(null, 1L, 2L, 3L), ArrayType(LongType))
+    checkEvaluation(ArrayBinarySearch(a4_2, Literal(null, LongType)), null)
+    val a4_3 = Literal.create(Seq(1L, 3L, 4L), ArrayType(LongType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a4_3, Literal(2L, LongType)), -2)
+    val a4_4 = CreateArray(Seq(Literal(1L), Literal(2L), Literal(3L)))
+    checkEvaluation(ArrayBinarySearch(a4_4, Literal(2L)), 1)
+    val a4_5 = CreateArray(Seq(Literal(null, LongType),
+      Literal(1L), Literal(2L), Literal(3L)))
+    checkEvaluation(ArrayBinarySearch(a4_5, Literal(2L)), 2)
+    val a4_6 = CreateArray(Seq(Literal(null, LongType),
+      Literal(1L), Literal(2L), Literal(3L)))
+    checkEvaluation(ArrayBinarySearch(a4_6, Literal(null, LongType)), null)
+    val a4_7 = CreateArray(Seq(Literal(1L), Literal(3L), Literal(4L)))
+    checkEvaluation(ArrayBinarySearch(a4_7, Literal(2L, LongType)), -2)
+    // long non-foldable
+    val a4_8 = NonFoldableLiteral.create(Seq(1L, 2L, 3L),
+      ArrayType(LongType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a4_8, Literal(2L)), 1)
+    val a4_9 = NonFoldableLiteral.create(Seq(null, 1L, 2L, 3L), ArrayType(LongType))
+    checkEvaluation(ArrayBinarySearch(a4_9, Literal(2L)), 2)
+    val a4_10 = NonFoldableLiteral.create(Seq(null, 1L, 2L, 3L), ArrayType(LongType))
+    checkEvaluation(ArrayBinarySearch(a4_10, Literal(null, LongType)), null)
+    val a4_11 = NonFoldableLiteral.create(Seq(1L, 3L, 4L),
+      ArrayType(LongType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a4_11, Literal(2L, LongType)), -2)
+
+    // float
+    // float foldable
+    val a5_0 = Literal.create(Seq(1.0F, 2.0F, 3.0F), ArrayType(FloatType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a5_0, Literal(3.0F)), 2)
+    val a5_1 = Literal.create(Seq(null, 1.0F, 2.0F, 3.0F), ArrayType(FloatType))
+    checkEvaluation(ArrayBinarySearch(a5_1, Literal(1.0F)), 1)
+    val a5_2 = Literal.create(Seq(null, 1.0F, 2.0F, 3.0F), ArrayType(FloatType))
+    checkEvaluation(ArrayBinarySearch(a5_2, Literal(null, FloatType)), null)
+    val a5_3 = Literal.create(Seq(1.0F, 2.0F, 3.0F), ArrayType(FloatType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a5_3, Literal(1.1F, FloatType)), -2)
+    val a5_4 = CreateArray(Seq(Literal(1.0F), Literal(2.0F), Literal(3.0F)))
+    checkEvaluation(ArrayBinarySearch(a5_4, Literal(3.0F)), 2)
+    val a5_5 = CreateArray(Seq(Literal(null, FloatType),
+      Literal(1.0F), Literal(2.0F), Literal(3.0F)))
+    checkEvaluation(ArrayBinarySearch(a5_5, Literal(1.0F)), 1)
+    val a5_6 = CreateArray(Seq(Literal(null, FloatType),
+      Literal(1.0F), Literal(2.0F), Literal(3.0F)))
+    checkEvaluation(ArrayBinarySearch(a5_6, Literal(null, FloatType)), null)
+    val a5_7 = CreateArray(Seq(Literal(1.0F), Literal(2.0F), Literal(3.0F)))
+    checkEvaluation(ArrayBinarySearch(a5_7, Literal(1.1F, FloatType)), -2)
+    // float non-foldable
+    val a5_8 = NonFoldableLiteral.create(Seq(1.0F, 2.0F, 3.0F),
+      ArrayType(FloatType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a5_8, Literal(3.0F)), 2)
+    val a5_9 = NonFoldableLiteral.create(Seq(null, 1.0F, 2.0F, 3.0F), ArrayType(FloatType))
+    checkEvaluation(ArrayBinarySearch(a5_9, Literal(1.0F)), 1)
+    val a5_10 = NonFoldableLiteral.create(Seq(null, 1.0F, 2.0F, 3.0F), ArrayType(FloatType))
+    checkEvaluation(ArrayBinarySearch(a5_10, Literal(null, FloatType)), null)
+    val a5_11 = NonFoldableLiteral.create(Seq(1.0F, 2.0F, 3.0F),
+      ArrayType(FloatType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a5_11, Literal(1.1F, FloatType)), -2)
+
+    // double
+    // double foldable
+    val a6_0 = Literal.create(Seq(1.0d, 2.0d, 3.0d), ArrayType(DoubleType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a6_0, Literal(1.0d)), 0)
+    val a6_1 = Literal.create(Seq(null, 1.0d, 2.0d, 3.0d), ArrayType(DoubleType))
+    checkEvaluation(ArrayBinarySearch(a6_1, Literal(1.0d)), 1)
+    val a6_2 = Literal.create(Seq(null, 1.0d, 2.0d, 3.0d), ArrayType(DoubleType))
+    checkEvaluation(ArrayBinarySearch(a6_2, Literal(null, DoubleType)), null)
+    val a6_3 = Literal.create(Seq(1.0d, 2.0d, 3.0d), ArrayType(DoubleType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a6_3, Literal(1.1d, DoubleType)), -2)
+    val a6_4 = CreateArray(Seq(Literal(1.0d), Literal(2.0d), Literal(3.0d)))
+    checkEvaluation(ArrayBinarySearch(a6_4, Literal(1.0d)), 0)
+    val a6_5 = CreateArray(Seq(Literal(null, DoubleType),
+      Literal(1.0d), Literal(2.0d), Literal(3.0d)))
+    checkEvaluation(ArrayBinarySearch(a6_5, Literal(1.0d)), 1)
+    val a6_6 = CreateArray(Seq(Literal(null, DoubleType),
+      Literal(1.0d), Literal(2.0d), Literal(3.0d)))
+    checkEvaluation(ArrayBinarySearch(a6_6, Literal(null, DoubleType)), null)
+    val a6_7 = CreateArray(Seq(Literal(1.0d), Literal(2.0d), Literal(3.0d)))
+    checkEvaluation(ArrayBinarySearch(a6_7, Literal(1.1d, DoubleType)), -2)
+    // double non-foldable
+    val a6_8 = NonFoldableLiteral.create(Seq(1.0d, 2.0d, 3.0d),
+      ArrayType(DoubleType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a6_8, Literal(1.0d)), 0)
+    val a6_9 = NonFoldableLiteral.create(Seq(null, 1.0d, 2.0d, 3.0d), ArrayType(DoubleType))
+    checkEvaluation(ArrayBinarySearch(a6_9, Literal(1.0d)), 1)
+    val a6_10 = NonFoldableLiteral.create(Seq(null, 1.0d, 2.0d, 3.0d), ArrayType(DoubleType))
+    checkEvaluation(ArrayBinarySearch(a6_10, Literal(null, DoubleType)), null)
+    val a6_11 = NonFoldableLiteral.create(Seq(1.0d, 2.0d, 3.0d),
+      ArrayType(DoubleType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a6_11, Literal(1.1d, DoubleType)), -2)
+
+    // string
+    // string foldable
+    val a7_0 = Literal.create(Seq("a", "b", "c"), ArrayType(StringType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a7_0, Literal("a")), 0)
+    val a7_1 = Literal.create(Seq(null, "a", "b", "c"), ArrayType(StringType))
+    checkEvaluation(ArrayBinarySearch(a7_1, Literal("c")), 3)
+    val a7_2 = Literal.create(Seq(null, "a", "b", "c"), ArrayType(StringType))
+    checkEvaluation(ArrayBinarySearch(a7_2, Literal(null, StringType)), null)
+    val a7_3 = Literal.create(Seq("a", "c", "d"), ArrayType(StringType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a7_3, Literal(UTF8String.fromString("b"), StringType)), -2)
+    val a7_4 = CreateArray(Seq(Literal("a"), Literal("b"), Literal("c")))
+    checkEvaluation(ArrayBinarySearch(a7_4, Literal("a")), 0)
+    val a7_5 = CreateArray(Seq(Literal(null, StringType),
+      Literal("a"), Literal("b"), Literal("c")))
+    checkEvaluation(ArrayBinarySearch(a7_5, Literal("c")), 3)
+    val a7_6 = CreateArray(Seq(Literal(null, StringType),
+      Literal("a"), Literal("b"), Literal("c")))
+    checkEvaluation(ArrayBinarySearch(a7_6, Literal(null, StringType)), null)
+    val a7_7 = CreateArray(Seq(Literal("a"), Literal("c"), Literal("d")))
+    checkEvaluation(ArrayBinarySearch(a7_7, Literal(UTF8String.fromString("b"), StringType)), -2)
+    // string non-foldable
+    val a7_8 = NonFoldableLiteral.create(Seq("a", "b", "c"),
+      ArrayType(StringType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a7_8, Literal("a")), 0)
+    val a7_9 = NonFoldableLiteral.create(Seq(null, "a", "b", "c"), ArrayType(StringType))
+    checkEvaluation(ArrayBinarySearch(a7_9, Literal("c")), 3)
+    val a7_10 = NonFoldableLiteral.create(Seq(null, "a", "b", "c"), ArrayType(StringType))
+    checkEvaluation(ArrayBinarySearch(a7_10, Literal(null, StringType)), null)
+    val a7_11 = NonFoldableLiteral.create(Seq("a", "c", "d"),
+      ArrayType(StringType, containsNull = false))
+    checkEvaluation(ArrayBinarySearch(a7_11, Literal(UTF8String.fromString("b"), StringType)), -2)
+  }
+
   test("MapEntries") {
     def r(values: Any*): InternalRow = create_row(values: _*)
 
@@ -187,7 +446,7 @@ class CollectionExpressionsSuite
 
     checkErrorInExpression[SparkRuntimeException](
       MapConcat(Seq(m0, m1)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "a",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -347,7 +606,7 @@ class CollectionExpressionsSuite
 
     checkErrorInExpression[SparkRuntimeException](
       MapFromEntries(ai4),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "1",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -379,7 +638,7 @@ class CollectionExpressionsSuite
 
     checkErrorInExpression[SparkRuntimeException](
       MapFromEntries(as4),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "a",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -641,7 +900,7 @@ class CollectionExpressionsSuite
     checkEvaluation(Slice(a0, Literal(-1), Literal(2)), Seq(6))
     checkErrorInExpression[SparkRuntimeException](
       expression = Slice(a0, Literal(1), Literal(-1)),
-      errorClass = "INVALID_PARAMETER_VALUE.LENGTH",
+      condition = "INVALID_PARAMETER_VALUE.LENGTH",
       parameters = Map(
         "parameter" -> toSQLId("length"),
         "length" -> (-1).toString,
@@ -649,7 +908,7 @@ class CollectionExpressionsSuite
       ))
     checkErrorInExpression[SparkRuntimeException](
       expression = Slice(a0, Literal(0), Literal(1)),
-      errorClass = "INVALID_PARAMETER_VALUE.START",
+      condition = "INVALID_PARAMETER_VALUE.START",
       parameters = Map(
         "parameter" -> toSQLId("start"),
         "functionName" -> toSQLId("slice")
@@ -831,7 +1090,7 @@ class CollectionExpressionsSuite
     // SPARK-43393: test Sequence overflow checking
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(Int.MinValue), Literal(Int.MaxValue), Literal(1)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> (BigInt(Int.MaxValue) - BigInt { Int.MinValue } + 1).toString,
         "functionName" -> toSQLId("sequence"),
@@ -839,7 +1098,7 @@ class CollectionExpressionsSuite
         "parameter" -> toSQLId("count")))
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(0L), Literal(Long.MaxValue), Literal(1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> (BigInt(Long.MaxValue) + 1).toString,
         "functionName" -> toSQLId("sequence"),
@@ -847,7 +1106,7 @@ class CollectionExpressionsSuite
         "parameter" -> toSQLId("count")))
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(0L), Literal(Long.MinValue), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> ((0 - BigInt(Long.MinValue)) + 1).toString(),
         "functionName" -> toSQLId("sequence"),
@@ -855,7 +1114,7 @@ class CollectionExpressionsSuite
         "parameter" -> toSQLId("count")))
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(Long.MinValue), Literal(Long.MaxValue), Literal(1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
         "functionName" -> toSQLId("sequence"),
@@ -863,7 +1122,7 @@ class CollectionExpressionsSuite
         "parameter" -> toSQLId("count")))
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(Long.MaxValue), Literal(Long.MinValue), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { Long.MinValue } + 1).toString,
         "functionName" -> toSQLId("sequence"),
@@ -871,7 +1130,7 @@ class CollectionExpressionsSuite
         "parameter" -> toSQLId("count")))
     checkErrorInExpression[SparkRuntimeException](
       new Sequence(Literal(Long.MaxValue), Literal(-1L), Literal(-1L)),
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "numberOfElements" -> (BigInt(Long.MaxValue) - BigInt { -1L } + 1).toString,
         "functionName" -> toSQLId("sequence"),
@@ -2214,6 +2473,14 @@ class CollectionExpressionsSuite
       evaluateWithMutableProjection(Shuffle(ai0, seed2)))
     assert(evaluateWithUnsafeProjection(Shuffle(ai0, seed1)) !==
       evaluateWithUnsafeProjection(Shuffle(ai0, seed2)))
+
+    val seed3 = Literal.create(r.nextInt())
+    assert(evaluateWithoutCodegen(new Shuffle(ai0, seed3)) ===
+      evaluateWithoutCodegen(new Shuffle(ai0, seed3)))
+    assert(evaluateWithMutableProjection(new Shuffle(ai0, seed3)) ===
+      evaluateWithMutableProjection(new Shuffle(ai0, seed3)))
+    assert(evaluateWithUnsafeProjection(new Shuffle(ai0, seed3)) ===
+      evaluateWithUnsafeProjection(new Shuffle(ai0, seed3)))
   }
 
   test("Array Except") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
index 497b335289b11..7baad5ea92a00 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ComplexTypeSuite.scala
@@ -285,7 +285,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkErrorInExpression[SparkRuntimeException](
       CreateMap(Seq(Literal(1), Literal(2), Literal(1), Literal(3))),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "1",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -328,7 +328,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         map3.checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`map`",
         "expectedNum" -> "2n (n > 0)",
@@ -430,7 +430,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       MapFromArrays(
         Literal.create(Seq(1, 1), ArrayType(IntegerType)),
         Literal.create(Seq(2, 3), ArrayType(IntegerType))),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "1",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -483,7 +483,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         namedStruct1.checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`named_struct`",
         "expectedNum" -> "2n (n > 0)",
@@ -556,7 +556,7 @@ class ComplexTypeSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     checkErrorInExpression[SparkRuntimeException](
       new StringToMap(Literal("a:1,b:2,a:3")),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "a",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
index 7ffb321217024..f4c71a1056939 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ConditionalExpressionSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.DataTypeMismatch
 import org.apache.spark.sql.catalyst.dsl.expressions._
+import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral}
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenContext
 import org.apache.spark.sql.types._
 
@@ -277,4 +278,27 @@ class ConditionalExpressionSuite extends SparkFunSuite with ExpressionEvalHelper
     assert(!caseWhenObj1.semanticEquals(caseWhenObj2))
     assert(!caseWhenObj2.semanticEquals(caseWhenObj1))
   }
+
+  test("SPARK-49396 accurate nullability check") {
+    val trueBranch = (TrueLiteral, Literal(5))
+    val normalBranch = (NonFoldableLiteral(true), Literal(10))
+
+    val nullLiteral = Literal.create(null, BooleanType)
+    val noElseValue = CaseWhen(normalBranch :: trueBranch :: Nil, None)
+    assert(!noElseValue.nullable)
+    val withElseValue = CaseWhen(normalBranch :: trueBranch :: Nil, Some(Literal(1)))
+    assert(!withElseValue.nullable)
+    val withNullableElseValue = CaseWhen(normalBranch :: trueBranch :: Nil, Some(nullLiteral))
+    assert(!withNullableElseValue.nullable)
+    val firstTrueNonNullableSecondTrueNullable = CaseWhen(trueBranch ::
+      (TrueLiteral, nullLiteral) :: Nil, None)
+    assert(!firstTrueNonNullableSecondTrueNullable.nullable)
+    val firstTrueNullableSecondTrueNonNullable = CaseWhen((TrueLiteral, nullLiteral) ::
+      trueBranch :: Nil, None)
+    assert(firstTrueNullableSecondTrueNonNullable.nullable)
+    val hasNullInNotTrueBranch = CaseWhen(trueBranch :: (FalseLiteral, nullLiteral) :: Nil, None)
+    assert(!hasNullInNotTrueBranch.nullable)
+    val noTrueBranch = CaseWhen(normalBranch :: Nil, Literal(1))
+    assert(!noTrueBranch.nullable)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
index a89cb58c3e03b..81dd8242c600b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CsvExpressionsSuite.scala
@@ -149,12 +149,17 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
   test("unsupported mode") {
     val csvData = "---"
     val schema = StructType(StructField("a", DoubleType) :: Nil)
-    val exception = intercept[TestFailedException] {
-      checkEvaluation(
-        CsvToStructs(schema, Map("mode" -> DropMalformedMode.name), Literal(csvData), UTC_OPT),
-        InternalRow(null))
-    }.getCause
-    assert(exception.getMessage.contains("from_csv() doesn't support the DROPMALFORMED mode"))
+
+    checkError(
+      exception = intercept[TestFailedException] {
+        checkEvaluation(
+          CsvToStructs(schema, Map("mode" -> DropMalformedMode.name), Literal(csvData), UTC_OPT),
+          InternalRow(null))
+      }.getCause.asInstanceOf[AnalysisException],
+      condition = "PARSE_MODE_UNSUPPORTED",
+      parameters = Map(
+        "funcName" -> "`from_csv`",
+        "mode" -> "DROPMALFORMED"))
   }
 
   test("infer schema of CSV strings") {
@@ -228,13 +233,13 @@ class CsvExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
   }
 
   test("verify corrupt column") {
-    checkExceptionInExpression[AnalysisException](
+    checkErrorInExpression[AnalysisException](
       CsvToStructs(
         schema = StructType.fromDDL("i int, _unparsed boolean"),
         options = Map("columnNameOfCorruptRecord" -> "_unparsed"),
         child = Literal.create("a"),
-        timeZoneId = UTC_OPT),
-      expectedErrMsg = "The field for corrupt records must be string type and nullable")
+        timeZoneId = UTC_OPT), null, "INVALID_CORRUPT_RECORD_TYPE",
+      Map("columnName" -> "`_unparsed`", "actualType" -> "\"BOOLEAN\""))
   }
 
   test("from/to csv with intervals") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
index f1c04c7e33821..5cd974838fa24 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala
@@ -436,10 +436,8 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     withSQLConf((SQLConf.ANSI_ENABLED.key, "true")) {
       checkErrorInExpression[SparkIllegalArgumentException](
         DateAddInterval(Literal(d), Literal(new CalendarInterval(1, 1, 25 * MICROS_PER_HOUR))),
-        "_LEGACY_ERROR_TEMP_2000",
-        Map("message" ->
-          "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date",
-          "ansiConfig" -> "\"spark.sql.ansi.enabled\""))
+        "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION",
+        Map("ansiConfig" -> "\"spark.sql.ansi.enabled\""))
     }
 
     withSQLConf((SQLConf.ANSI_ENABLED.key, "false")) {
@@ -1202,7 +1200,7 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
               Literal(23), Literal(59), Literal(Decimal(BigDecimal(60.0), 16, 6)))
             if (ansi) {
               checkExceptionInExpression[DateTimeException](makeTimestampExpr.copy(sec = Literal(
-                Decimal(BigDecimal(60.5), 16, 6))), EmptyRow, "The fraction of sec must be zero")
+                Decimal(BigDecimal(60.5), 16, 6))), EmptyRow, "Valid range for seconds is [0, 60]")
             } else {
               checkEvaluation(makeTimestampExpr, expectedAnswer("2019-07-01 00:00:00"))
             }
@@ -2033,12 +2031,12 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkErrorInExpression[SparkArithmeticException](TimestampAdd("DAY",
         Literal(106751992),
         Literal(0L, TimestampType)),
-        errorClass = "DATETIME_OVERFLOW",
+        condition = "DATETIME_OVERFLOW",
         parameters = Map("operation" -> "add 106751992 DAY to TIMESTAMP '1970-01-01 00:00:00'"))
       checkErrorInExpression[SparkArithmeticException](TimestampAdd("QUARTER",
         Literal(1431655764),
         Literal(0L, TimestampType)),
-        errorClass = "DATETIME_OVERFLOW",
+        condition = "DATETIME_OVERFLOW",
         parameters = Map("operation" ->
           "add 1431655764 QUARTER to TIMESTAMP '1970-01-01 00:00:00'"))
     }
@@ -2103,11 +2101,11 @@ class DateExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("datetime function CurrentDate and localtimestamp are Unevaluable") {
     checkError(exception = intercept[SparkException] { CurrentDate(UTC_OPT).eval(EmptyRow) },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Cannot evaluate expression: current_date(Some(UTC))"))
 
     checkError(exception = intercept[SparkException] { LocalTimestamp(UTC_OPT).eval(EmptyRow) },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Cannot evaluate expression: localtimestamp(Some(UTC))"))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
index a063e53486ad8..184f5a2a9485d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvalHelper.scala
@@ -79,7 +79,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
   private def prepareEvaluation(expression: Expression): Expression = {
     val serializer = new JavaSerializer(new SparkConf()).newInstance()
     val resolver = ResolveTimeZone
-    val expr = resolver.resolveTimeZones(replace(expression))
+    val expr = replace(resolver.resolveTimeZones(expression))
     assert(expr.resolved)
     serializer.deserialize(serializer.serialize(expr))
   }
@@ -154,22 +154,22 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
 
   protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
       expression: => Expression,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String] = Map.empty): Unit = {
-    checkErrorInExpression[T](expression, InternalRow.empty, errorClass, parameters)
+    checkErrorInExpression[T](expression, InternalRow.empty, condition, parameters)
   }
 
   protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
       expression: => Expression,
       inputRow: InternalRow,
-      errorClass: String): Unit = {
-    checkErrorInExpression[T](expression, inputRow, errorClass, Map.empty[String, String])
+      condition: String): Unit = {
+    checkErrorInExpression[T](expression, inputRow, condition, Map.empty[String, String])
   }
 
   protected def checkErrorInExpression[T <: SparkThrowable : ClassTag](
       expression: => Expression,
       inputRow: InternalRow,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String]): Unit = {
 
     def checkException(eval: => Unit, testMode: String): Unit = {
@@ -179,7 +179,7 @@ trait ExpressionEvalHelper extends ScalaCheckDrivenPropertyChecks with PlanTestB
           withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> fallbackMode.toString) {
             checkError(
               exception = intercept[T](eval),
-              errorClass = errorClass,
+              condition = condition,
               parameters = parameters
             )
           }
diff --git a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
similarity index 99%
rename from sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
index 40e6182e587b3..50e933ba97ae6 100644
--- a/sql/catalyst/src/test/java/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionImplUtilsSuite.scala
@@ -349,7 +349,7 @@ class ExpressionImplUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkRuntimeException] {
         f(t)
       },
-      errorClass = t.expectedErrorClassOpt.get,
+      condition = t.expectedErrorClassOpt.get,
       parameters = t.errorParamsMap
     )
   }
@@ -361,7 +361,7 @@ class ExpressionImplUtilsSuite extends SparkFunSuite {
           exception = intercept[SparkIllegalArgumentException] {
             ExpressionImplUtils.validateUTF8String(str)
           },
-          errorClass = "INVALID_UTF8_STRING",
+          condition = "INVALID_UTF8_STRING",
           parameters = Map(
             "str" -> str.getBytes.map(byte => f"\\x$byte%02X").mkString
           )
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
index dd512b0d83e5c..b6a3d61cb13a6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/GeneratorExpressionSuite.scala
@@ -82,7 +82,7 @@ class GeneratorExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         Stack(Seq(Literal(1))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`stack`",
         "expectedNum" -> "> 1",
@@ -93,7 +93,7 @@ class GeneratorExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         Stack(Seq(Literal(1.0))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`stack`",
         "expectedNum" -> "> 1",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
index 474c27c0de9a6..92ef24bb8ec63 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HashExpressionsSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.{RandomDataGenerator, Row}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.{ExamplePointUDT, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateMutableProjection
-import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, DateTimeUtils, GenericArrayData, IntervalUtils}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, CollationFactory, DateTimeUtils, GenericArrayData, IntervalUtils}
 import org.apache.spark.sql.types.{ArrayType, StructType, _}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.ArrayImplicits._
@@ -620,6 +620,30 @@ class HashExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkHiveHashForDecimal("123456.123456789012345678901234567890", 38, 31, 1728235666)
   }
 
+  for (collation <- Seq("UTF8_LCASE", "UNICODE_CI", "UTF8_BINARY")) {
+    test(s"hash check for collated $collation strings") {
+      val s1 = "aaa"
+      val s2 = "AAA"
+
+      val murmur3Hash1 = Murmur3Hash(Seq(Collate(Literal(s1), collation)), 42)
+      val murmur3Hash2 = Murmur3Hash(Seq(Collate(Literal(s2), collation)), 42)
+
+      // Interpreted hash values for s1 and s2
+      val interpretedHash1 = murmur3Hash1.eval()
+      val interpretedHash2 = murmur3Hash2.eval()
+
+      // Check that interpreted and codegen hashes are equal
+      checkEvaluation(murmur3Hash1, interpretedHash1)
+      checkEvaluation(murmur3Hash2, interpretedHash2)
+
+      if (CollationFactory.fetchCollation(collation).isUtf8BinaryType) {
+        assert(interpretedHash1 != interpretedHash2)
+      } else {
+        assert(interpretedHash1 == interpretedHash2)
+      }
+    }
+  }
+
   test("SPARK-18207: Compute hash for a lot of expressions") {
     def checkResult(schema: StructType, input: InternalRow): Unit = {
       val exprs = schema.fields.zipWithIndex.map { case (f, i) =>
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
index c06705606567d..cc36cd73d6d77 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/HigherOrderFunctionsSuite.scala
@@ -471,7 +471,7 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
 
     checkErrorInExpression[SparkRuntimeException](
       transformKeys(ai0, modKey),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "1",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -858,7 +858,7 @@ class HigherOrderFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper
         SQLConf.LEGACY_ALLOW_NULL_COMPARISON_RESULT_IN_ARRAY_SORT.key -> "false") {
       checkErrorInExpression[SparkException](
         expression = arraySort(Literal.create(Seq(3, 1, 1, 2)), comparator),
-        errorClass = "COMPARATOR_RETURNS_NULL",
+        condition = "COMPARATOR_RETURNS_NULL",
         parameters = Map("firstValue" -> "1", "secondValue" -> "1")
       )
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
index a60ab778623eb..8fb72ad53062e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/IntervalExpressionsSuite.scala
@@ -117,6 +117,16 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     checkEvaluation(ExtractIntervalSeconds("61 seconds 1 microseconds"), Decimal(1000001, 8, 6))
   }
 
+  test("cast large seconds to decimal") {
+    checkEvaluation(
+      Cast(
+        Cast(Literal(Decimal("9223372036854.775807")), DayTimeIntervalType(3, 3)),
+        DecimalType(19, 6)
+      ),
+      Decimal("9223372036854.775807")
+    )
+  }
+
   test("multiply") {
     def check(
         interval: String,
@@ -256,7 +266,7 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       val intervalExpr = MakeInterval(Literal(years), Literal(months), Literal(weeks),
         Literal(days), Literal(hours), Literal(minutes),
         Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
-      checkExceptionInExpression[ArithmeticException](intervalExpr, EmptyRow, "")
+      checkExceptionInExpression[ArithmeticException](intervalExpr, EmptyRow, "ARITHMETIC_OVERFLOW")
     }
 
     withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
@@ -306,7 +316,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       val secFrac = DateTimeTestUtils.secFrac(seconds, millis, micros)
       val durationExpr = MakeDTInterval(Literal(days), Literal(hours), Literal(minutes),
         Literal(Decimal(secFrac, Decimal.MAX_LONG_DIGITS, 6)))
-      checkExceptionInExpression[ArithmeticException](durationExpr, EmptyRow, "")
+      checkExceptionInExpression[ArithmeticException](
+        durationExpr, "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION")
     }
 
     check(millis = -123)
@@ -341,7 +352,7 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     Seq(
       (Period.ofMonths(2), Int.MaxValue) -> "overflow",
-      (Period.ofMonths(Int.MinValue), 10d) -> "not in range",
+      (Period.ofMonths(Int.MinValue), 10d) -> "out of range",
       (Period.ofMonths(-100), Float.NaN) -> "input is infinite or NaN",
       (Period.ofMonths(200), Double.PositiveInfinity) -> "input is infinite or NaN",
       (Period.ofMonths(-200), Float.NegativeInfinity) -> "input is infinite or NaN"
@@ -518,7 +529,8 @@ class IntervalExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     Seq(MakeYMInterval(Literal(178956970), Literal(8)),
       MakeYMInterval(Literal(-178956970), Literal(-9)))
       .foreach { ym =>
-        checkExceptionInExpression[ArithmeticException](ym, "integer overflow")
+        checkExceptionInExpression[ArithmeticException](
+          ym, "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION")
       }
 
     def checkImplicitEvaluation(expr: Expression, value: Any): Unit = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
index a23e7f44a48d1..2c82ce712d957 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/JsonExpressionsSuite.scala
@@ -448,7 +448,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     }.getCause
     checkError(
       exception = exception.asInstanceOf[SparkException],
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
     )
   }
@@ -582,7 +582,7 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
     val schema = StructType(StructField("\"quote", IntegerType) :: Nil)
     val struct = Literal.create(create_row(1), schema)
     GenerateUnsafeProjection.generate(
-      StructsToJson(Map.empty, struct, UTC_OPT) :: Nil)
+      StructsToJson(Map.empty, struct, UTC_OPT).replacement :: Nil)
   }
 
   test("to_json - struct") {
@@ -791,13 +791,13 @@ class JsonExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with
   }
 
   test("verify corrupt column") {
-    checkExceptionInExpression[AnalysisException](
+    checkErrorInExpression[AnalysisException](
       JsonToStructs(
         schema = StructType.fromDDL("i int, _unparsed boolean"),
         options = Map("columnNameOfCorruptRecord" -> "_unparsed"),
         child = Literal.create("""{"i":"a"}"""),
-        timeZoneId = UTC_OPT),
-      expectedErrMsg = "The field for corrupt records must be string type and nullable")
+        timeZoneId = UTC_OPT), null, "INVALID_CORRUPT_RECORD_TYPE",
+      Map("columnName" -> "`_unparsed`", "actualType" -> "\"BOOLEAN\""))
   }
 
   def decimalInput(langTag: String): (Decimal, String) = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
index d42e0b7d681db..b351d69d3a0bb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/LiteralExpressionSuite.scala
@@ -97,7 +97,7 @@ class LiteralExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
         exception = intercept[SparkException] {
           Literal.default(errType)
         },
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map("message" -> s"No default value for type: ${toSQLType(errType)}.")
       )
     })
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
index 1f37886f44258..40e6fe1a90a63 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MiscExpressionsSuite.scala
@@ -71,6 +71,13 @@ class MiscExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       evaluateWithMutableProjection(Uuid(seed2)))
     assert(evaluateWithUnsafeProjection(Uuid(seed1)) !==
       evaluateWithUnsafeProjection(Uuid(seed2)))
+
+    val seed3 = Literal.create(r.nextInt())
+    assert(evaluateWithoutCodegen(new Uuid(seed3)) === evaluateWithoutCodegen(new Uuid(seed3)))
+    assert(evaluateWithMutableProjection(new Uuid(seed3)) ===
+      evaluateWithMutableProjection(new Uuid(seed3)))
+    assert(evaluateWithUnsafeProjection(new Uuid(seed3)) ===
+      evaluateWithUnsafeProjection(new Uuid(seed3)))
   }
 
   test("PrintToStderr") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
index ace017b1cddc3..c74a9e35833d1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/NullExpressionsSuite.scala
@@ -57,7 +57,7 @@ class NullExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[SparkRuntimeException] {
         evaluateWithoutCodegen(AssertNotNull(Literal(null)))
       },
-      errorClass = "NOT_NULL_ASSERT_VIOLATION",
+      condition = "NOT_NULL_ASSERT_VIOLATION",
       sqlState = "42000",
       parameters = Map("walkedTypePath" -> "\n\n"))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
index 7f58516cf4eb1..215362c47b940 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ObjectExpressionsSuite.scala
@@ -82,7 +82,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[SparkException] {
         Invoke(inputObject, "zeroArgNotExistMethod", IntegerType).eval(inputRow)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         ("Couldn't find method zeroArgNotExistMethod with arguments " +
           "() on class java.lang.Object.")
@@ -98,7 +98,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
           Seq(Literal.fromObject(UTF8String.fromString("dummyInputString"))),
           Seq(StringType)).eval(inputRow)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" ->
         ("Couldn't find method oneArgNotExistMethod with arguments " +
           "(class org.apache.spark.unsafe.types.UTF8String) on class java.lang.Object.")
@@ -417,7 +417,7 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         exception = intercept[SparkRuntimeException] {
           testMapObjects(collection, classOf[scala.collection.Map[Int, Int]], inputType)
         },
-        errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+        condition = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
         parameters = Map("cls" -> "scala.collection.Map"))
     }
   }
@@ -547,13 +547,55 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkObjectExprEvaluation(validateType, input, InternalRow.fromSeq(Seq(Row(input))))
     }
 
-    checkExceptionInExpression[RuntimeException](
+    checkExceptionInExpression[SparkRuntimeException](
       ValidateExternalType(
         GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
         DoubleType,
         DoubleType),
       InternalRow.fromSeq(Seq(Row(1))),
-      "java.lang.Integer is not a valid external type for schema of double")
+      "The external type java.lang.Integer is not valid for the type \"DOUBLE\"")
+  }
+
+  test("SPARK-49044 ValidateExternalType should return child in error") {
+    val inputObject = BoundReference(0, ObjectType(classOf[Row]), nullable = true)
+    Seq(
+      (true, BooleanType),
+      (2.toByte, ByteType),
+      (5.toShort, ShortType),
+      (23, IntegerType),
+      (61L, LongType),
+      (1.0f, FloatType),
+      (10.0, DoubleType),
+      ("abcd".getBytes, BinaryType),
+      ("abcd", StringType),
+      (BigDecimal.valueOf(10), DecimalType.IntDecimal),
+      (IntervalUtils.stringToInterval(UTF8String.fromString("interval 3 day")),
+        CalendarIntervalType),
+      (java.math.BigDecimal.valueOf(10), DecimalType.BigIntDecimal),
+      (Array(3, 2, 1), ArrayType(IntegerType))
+    ).foreach { case (input, dt) =>
+      val enc = RowEncoder.encoderForDataType(dt, lenient = false)
+      val validateType = ValidateExternalType(
+        GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
+        dt,
+        EncoderUtils.lenientExternalDataTypeFor(enc))
+      checkObjectExprEvaluation(validateType, input, InternalRow.fromSeq(Seq(Row(input))))
+    }
+
+    checkErrorInExpression[SparkRuntimeException](
+      expression = ValidateExternalType(
+        GetExternalRowField(inputObject, index = 0, fieldName = "c0"),
+        DoubleType,
+        DoubleType),
+      inputRow = InternalRow.fromSeq(Seq(Row(1))),
+      condition = "INVALID_EXTERNAL_TYPE",
+      parameters = Map[String, String](
+        "externalType" -> "java.lang.Integer",
+        "type" -> "\"DOUBLE\"",
+        "expr" -> ("\"getexternalrowfield(input[0, org.apache.spark.sql.Row, true], " +
+          "0, c0)\"")
+      )
+    )
   }
 
   private def javaMapSerializerFor(
@@ -713,6 +755,55 @@ class ObjectExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val genCode = StaticInvoke(TestFun.getClass, IntegerType, "foo", arguments).genCode(ctx)
     assert(!genCode.code.toString.contains("boxedResult"))
   }
+
+  test("StaticInvoke call return `any` method") {
+    val cls = TestStaticInvokeReturnAny.getClass
+    Seq((0, IntegerType, true), (1, IntegerType, true), (2, IntegerType, false)).foreach {
+      case (arg, argDataType, returnNullable) =>
+        val dataType = arg match {
+          case 0 => ObjectType(classOf[java.lang.Integer])
+          case 1 => ShortType
+          case 2 => ObjectType(classOf[java.lang.Long])
+        }
+        val arguments = Seq(Literal(arg, argDataType))
+        val inputTypes = Seq(IntegerType)
+        val expected = arg match {
+          case 0 => java.lang.Integer.valueOf(1)
+          case 1 => 0.toShort
+          case 2 => java.lang.Long.valueOf(2)
+        }
+        val inputRow = InternalRow.fromSeq(Seq(arg))
+        checkObjectExprEvaluation(
+          StaticInvoke(cls, dataType, "func", arguments, inputTypes,
+            returnNullable = returnNullable),
+          expected,
+          inputRow)
+    }
+  }
+
+  test("Invoke call the method that throw Exception") {
+    val targetObject = new TestThrowExceptionMethod
+    val funcClass = classOf[TestThrowExceptionMethod]
+    val funcObj = Literal.create(targetObject, ObjectType(funcClass))
+
+    val inputInt = Seq(BoundReference(0, IntegerType, nullable = true))
+
+    checkObjectExprEvaluation(
+      Invoke(funcObj, "invoke", IntegerType, inputInt),
+      2,
+      InternalRow.fromSeq(Seq(Integer.valueOf(1))))
+  }
+
+  test("StaticInvoke call the method that throw Exception") {
+    val funcClass = classOf[TestThrowExceptionMethod]
+
+    val inputInt = Seq(BoundReference(0, IntegerType, nullable = true))
+
+    checkObjectExprEvaluation(
+      StaticInvoke(funcClass, IntegerType, "staticInvoke", inputInt),
+      2,
+      InternalRow.fromSeq(Seq(Integer.valueOf(1))))
+  }
 }
 
 class TestBean extends Serializable {
@@ -748,3 +839,10 @@ case object TestFun {
   def foo(left: Int, right: Int): Int = left + right
 }
 
+object TestStaticInvokeReturnAny {
+  def func(input: Int): Any = input match {
+    case 0 => java.lang.Integer.valueOf(1)
+    case 1 => 0.toShort
+    case 2 => java.lang.Long.valueOf(2)
+  }
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 2aa53f581555f..2d58d9d3136aa 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.expressions
 
 import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
 import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
@@ -41,4 +42,27 @@ class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
     assert(Rand(Literal(1L), false).sql === "rand(1L)")
     assert(Randn(Literal(1L), false).sql === "randn(1L)")
   }
+
+  test("SPARK-49505: Test the RANDSTR and UNIFORM SQL functions without codegen") {
+    // Note that we use a seed of zero in these tests to keep the results deterministic.
+    def testRandStr(first: Any, result: Any): Unit = {
+      checkEvaluationWithoutCodegen(
+        RandStr(Literal(first), Literal(0)), CatalystTypeConverters.convertToCatalyst(result))
+    }
+    testRandStr(1, "c")
+    testRandStr(5, "ceV0P")
+    testRandStr(10, "ceV0PXaR2I")
+    testRandStr(10L, "ceV0PXaR2I")
+
+    def testUniform(first: Any, second: Any, result: Any): Unit = {
+      checkEvaluationWithoutCodegen(
+        Uniform(Literal(first), Literal(second), Literal(0)).replacement,
+        CatalystTypeConverters.convertToCatalyst(result))
+    }
+    testUniform(0, 1, 0)
+    testUniform(0, 10, 7)
+    testUniform(0L, 10L, 7L)
+    testUniform(10.0F, 20.0F, 17.604954F)
+    testUniform(10L, 20.0F, 17.604954F)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
index 690db18bbfa69..12aeb7d6685bf 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RegexpExpressionsSuite.scala
@@ -158,13 +158,13 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         evaluateWithoutCodegen("""a""" like """\a""")
       },
-      errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+      condition = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
       parameters = Map("format" -> """'\\a'""", "char" -> "'a'"))
     checkError(
       exception = intercept[AnalysisException] {
         evaluateWithoutCodegen("""a""" like """a\""")
       },
-      errorClass = "INVALID_FORMAT.ESC_AT_THE_END",
+      condition = "INVALID_FORMAT.ESC_AT_THE_END",
       parameters = Map("format" -> """'a\\'"""))
 
     // case
@@ -238,7 +238,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         exception = intercept[AnalysisException] {
           evaluateWithoutCodegen("""a""" like(s"""${escapeChar}a""", escapeChar))
         },
-        errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+        condition = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
         parameters = Map("format" -> s"'${escapeChar}a'", "char" -> "'a'"))
 
       // case
@@ -283,7 +283,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[SparkRuntimeException] {
         evaluateWithoutCodegen("abbbbc" rlike "**")
       },
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("rlike"),
@@ -294,7 +294,7 @@ class RegexpExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         val regex = $"a".string.at(0)
         evaluateWithoutCodegen("abbbbc" rlike regex, create_row("**"))
       },
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("rlike"),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
index 847c783e19369..1687d614cc5eb 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala
@@ -26,9 +26,12 @@ import org.apache.spark.sql.catalyst.analysis.TypeCheckResult.{DataTypeMismatch,
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.GenerateUnsafeProjection
+import org.apache.spark.sql.catalyst.util.CharsetProvider
+import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLId
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.internal.types.StringTypeAnyCollation
+import org.apache.spark.sql.internal.types.StringTypeWithCollation
 import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
 
 class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
@@ -155,7 +158,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         Elt(Seq.empty).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`elt`",
         "expectedNum" -> "> 1",
@@ -166,7 +169,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         Elt(Seq(Literal(1))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`elt`",
         "expectedNum" -> "> 1",
@@ -356,6 +359,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // scalastyle:on
     checkEvaluation(
       SubstringIndex(Literal("www||apache||org"), Literal( "||"), Literal(2)), "www||apache")
+    checkEvaluation(SubstringIndex(
+      Literal("www.apache.org"), Literal("."), Literal.create(null, IntegerType)), null)
   }
 
   test("SPARK-40213: ascii for Latin-1 Supplement characters") {
@@ -465,6 +470,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     val b = $"b".binary.at(0)
     val bytes = Array[Byte](1, 2, 3, 4)
 
+    assert(!Base64(Literal(bytes)).nullable)
+    assert(Base64(Literal.create(null, BinaryType)).nullable)
+    assert(Base64(Literal(bytes).castNullable()).nullable)
+    assert(!UnBase64(Literal("AQIDBA==")).nullable)
+    assert(UnBase64(Literal.create(null, StringType)).nullable)
+    assert(UnBase64(Literal("AQIDBA==").castNullable()).nullable)
+
     checkEvaluation(Base64(Literal(bytes)), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal("AQIDBA=="))), "AQIDBA==", create_row("abdef"))
     checkEvaluation(Base64(UnBase64(Literal(""))), "", create_row("abdef"))
@@ -1454,7 +1466,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         errorSubClass = "NON_FOLDABLE_INPUT",
         messageParameters = Map(
           "inputName" -> toSQLId("fmt"),
-          "inputType" -> toSQLType(StringTypeAnyCollation),
+          "inputType" -> toSQLType(StringTypeWithCollation),
           "inputExpr" -> toSQLExpr(wrongFmt)
         )
       )
@@ -1496,7 +1508,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
       checkErrorInExpression[SparkIllegalArgumentException](
         toNumberExpr,
-        errorClass = "INVALID_FORMAT.MISMATCH_INPUT",
+        condition = "INVALID_FORMAT.MISMATCH_INPUT",
         parameters = Map(
           "inputType" -> "\"STRING\"",
           "input" -> str,
@@ -1893,7 +1905,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
     // exceptional cases
     intercept[java.util.regex.PatternSyntaxException] {
       evaluateWithoutCodegen(ParseUrl(Seq(Literal("http://spark.apache.org/path?"),
-        Literal("QUERY"), Literal("???"))))
+        Literal("QUERY"), Literal("???"))).replacement)
     }
 
     // arguments checking
@@ -1901,7 +1913,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         ParseUrl(Seq(Literal("1"))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`parse_url`",
         "expectedNum" -> "[2, 3]",
@@ -1913,7 +1925,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         ParseUrl(Seq(Literal("1"), Literal("2"), Literal("3"),
           Literal("4"))).checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`parse_url`",
         "expectedNum" -> "[2, 3]",
@@ -1944,7 +1956,8 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
         "inputType" -> "\"INT\"")))
 
     // Test escaping of arguments
-    GenerateUnsafeProjection.generate(ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))) :: Nil)
+    GenerateUnsafeProjection.generate(
+      ParseUrl(Seq(Literal("\"quote"), Literal("\"quote"))).replacement :: Nil)
   }
 
   test("SPARK-33468: ParseUrl in ANSI mode should fail if input string is not a valid url") {
@@ -1978,7 +1991,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     // Test escaping of arguments
     GenerateUnsafeProjection.generate(
-      Sentences(Literal("\"quote"), Literal("\"quote"), Literal("\"quote")) :: Nil)
+      Sentences(Literal("\"quote"), Literal("\"quote"), Literal("\"quote")).replacement :: Nil)
   }
 
   test("SPARK-33386: elt ArrayIndexOutOfBoundsException") {
@@ -2028,7 +2041,7 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       exception = intercept[AnalysisException] {
         expr1.checkInputDataTypes()
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`elt`",
         "expectedNum" -> "> 1",
@@ -2067,4 +2080,22 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper {
       )
     )
   }
+
+  test("SPARK-48712: Check whether input is valid utf-8 string or not before entering fast path") {
+    val str = UTF8String.fromBytes(Array[Byte](-1, -2, -3, -4))
+    assert(!str.isValid, "please use a string that is not valid UTF-8 for testing")
+    val expected = Array[Byte](-17, -65, -67, -17, -65, -67, -17, -65, -67, -17, -65, -67)
+    val bytes = Encode.encode(str, UTF8String.fromString("UTF-8"), false, false)
+    assert(bytes === expected)
+    checkEvaluation(Encode(Literal(str), Literal("UTF-8")), expected)
+    checkEvaluation(Encode(Literal(UTF8String.EMPTY_UTF8), Literal("UTF-8")), Array.emptyByteArray)
+    checkErrorInExpression[SparkIllegalArgumentException](
+      Encode(Literal(UTF8String.EMPTY_UTF8), Literal("UTF-12345")),
+      condition = "INVALID_PARAMETER_VALUE.CHARSET",
+      parameters = Map(
+        "charset" -> "UTF-12345",
+        "functionName" -> toSQLId("encode"),
+        "parameter" -> toSQLId("charset"),
+        "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", ")))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
index f369635a32671..e9faeba2411ce 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/SubexpressionEliminationSuite.scala
@@ -494,6 +494,18 @@ class SubexpressionEliminationSuite extends SparkFunSuite with ExpressionEvalHel
     checkShortcut(Or(equal, Literal(true)), 1)
     checkShortcut(Not(And(equal, Literal(false))), 1)
   }
+
+  test("Equivalent ternary expressions have different children") {
+    val add1 = Add(Add(Literal(1), Literal(2)), Literal(3))
+    val add2 = Add(Add(Literal(3), Literal(1)), Literal(2))
+    val conditions1 = (GreaterThan(add1, Literal(3)), Literal(1)) ::
+      (GreaterThan(add2, Literal(0)), Literal(2)) :: Nil
+
+    val caseWhenExpr1 = CaseWhen(conditions1, Literal(0))
+    val equivalence1 = new EquivalentExpressions
+    equivalence1.addExprTree(caseWhenExpr1)
+    assert(equivalence1.getCommonSubexpressions.size == 1)
+  }
 }
 
 case class CodegenFallbackExpression(child: Expression)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
index f138d9642d1e1..446514de91d69 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/TryCastSuite.scala
@@ -47,7 +47,7 @@ class TryCastSuite extends CastWithAnsiOnSuite {
   override def checkErrorInExpression[T <: SparkThrowable : ClassTag](
       expression: => Expression,
       inputRow: InternalRow,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String]): Unit = {
     checkEvaluation(expression, null, inputRow)
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/XmlExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/XmlExpressionsSuite.scala
index 9a10985153044..4f38cd0630f2c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/XmlExpressionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/XmlExpressionsSuite.scala
@@ -64,7 +64,7 @@ class XmlExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
     }.getCause
     checkError(
       exception = exception.asInstanceOf[SparkException],
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
     )
   }
@@ -391,13 +391,13 @@ class XmlExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper with P
   }
 
   test("verify corrupt column") {
-    checkExceptionInExpression[AnalysisException](
-      XmlToStructs(
+    checkErrorInExpression[AnalysisException](
+      JsonToStructs(
         schema = StructType.fromDDL("i int, _unparsed boolean"),
         options = Map("columnNameOfCorruptRecord" -> "_unparsed"),
         child = Literal.create("""{"i":"a"}"""),
-        timeZoneId = UTC_OPT),
-      expectedErrMsg = "The field for corrupt records must be string type and nullable")
+        timeZoneId = UTC_OPT), null, "INVALID_CORRUPT_RECORD_TYPE",
+      Map("columnName" -> "`_unparsed`", "actualType" -> "\"BOOLEAN\""))
   }
 
   def decimalInput(langTag: String): (Decimal, String) = {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
index 389c757eefb63..d3401613dcd83 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/aggregate/PercentileSuite.scala
@@ -188,7 +188,7 @@ class PercentileSuite extends SparkFunSuite {
         StringType, DateType, TimestampType,
       CalendarIntervalType, NullType)
 
-    for(dataType <- invalidDataTypes;
+    for (dataType <- invalidDataTypes;
         frequencyType <- validFrequencyTypes) {
       val child = AttributeReference("a", dataType)()
       val frq = AttributeReference("frq", frequencyType)()
@@ -207,7 +207,7 @@ class PercentileSuite extends SparkFunSuite {
       )
     }
 
-    for(dataType <- validDataTypes;
+    for (dataType <- validDataTypes;
         frequencyType <- invalidFrequencyDataTypes) {
       val child = AttributeReference("a", dataType)()
       val frq = AttributeReference("frq", frequencyType)()
@@ -410,8 +410,8 @@ class PercentileSuite extends SparkFunSuite {
           agg.update(buffer, InternalRow(1, -5))
           agg.eval(buffer)
         },
-      errorClass = "_LEGACY_ERROR_TEMP_2013",
-      parameters = Map("frequencyExpression" -> "CAST(boundreference() AS INT)"))
+      condition = "NEGATIVE_VALUES_IN_FREQUENCY_EXPRESSION",
+      parameters = Map("frequencyExpression" -> "\"boundreference()\"", "negativeValue" -> "-5L"))
   }
 
   private def compareEquals(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
index 3aeb0c882ac3c..891e2d048b7a8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSparkSubmitSuite.scala
@@ -64,7 +64,7 @@ object BufferHolderSparkSubmitSuite extends Assertions {
     val e1 = intercept[SparkIllegalArgumentException] {
       holder.grow(-1)
     }
-    assert(e1.getErrorClass === "_LEGACY_ERROR_TEMP_3198")
+    assert(e1.getCondition === "_LEGACY_ERROR_TEMP_3198")
 
     // while to reuse a buffer may happen, this test checks whether the buffer can be grown
     holder.grow(ARRAY_MAX / 2)
@@ -82,6 +82,6 @@ object BufferHolderSparkSubmitSuite extends Assertions {
     val e2 = intercept[SparkIllegalArgumentException] {
       holder.grow(ARRAY_MAX + 1 - holder.totalSize())
     }
-    assert(e2.getErrorClass === "_LEGACY_ERROR_TEMP_3199")
+    assert(e2.getCondition === "_LEGACY_ERROR_TEMP_3199")
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala
index 79f03f23eb245..ca2eaf7be0c21 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/BufferHolderSuite.scala
@@ -27,7 +27,7 @@ class BufferHolderSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         new BufferHolder(new UnsafeRow(Int.MaxValue / 8))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3130",
+      condition = "_LEGACY_ERROR_TEMP_3130",
       parameters = Map("numFields" -> "268435455"))
 
     val holder = new BufferHolder(new UnsafeRow(1000))
@@ -38,7 +38,7 @@ class BufferHolderSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         holder.grow(Integer.MAX_VALUE)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3199",
+      condition = "_LEGACY_ERROR_TEMP_3199",
       parameters = Map("neededSize" -> "2147483647", "arrayMax" -> "2147483632")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
index d51647ee96df9..4f81ef49e5736 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeBlockSuite.scala
@@ -121,7 +121,7 @@ class CodeBlockSuite extends SparkFunSuite {
       exception = intercept[SparkException] {
         code"$obj"
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> s"Can not interpolate ${obj.getClass.getName} into code block.")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala
index a968b1fe53506..8a8f0afeb1224 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/codegen/UnsafeArrayWriterSuite.scala
@@ -30,7 +30,7 @@ class UnsafeArrayWriterSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         arrayWriter.initialize(numElements)
       },
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.INITIALIZE",
       parameters = Map(
         "numberOfElements" -> (numElements * elementSize).toString,
         "maxRoundedArrayLength" -> Int.MaxValue.toString
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
index 8fc72caa47860..f599fead45015 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionEvalUtilsSuite.scala
@@ -24,12 +24,14 @@ import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
 
   test("parseJson type coercion") {
-    def check(json: String, expectedValue: Array[Byte], expectedMetadata: Array[Byte]): Unit = {
+    def check(json: String, expectedValue: Array[Byte], expectedMetadata: Array[Byte],
+              allowDuplicateKeys: Boolean = false): Unit = {
       // parse_json
-      val actual = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json))
+      val actual = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+        allowDuplicateKeys)
       // try_parse_json
       val tryActual = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
-        failOnError = false)
+        allowDuplicateKeys, failOnError = false)
       val expected = new VariantVal(expectedValue, expectedMetadata)
       assert(actual === expected && tryActual === expected)
     }
@@ -89,6 +91,13 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
       /* offset list */ 0, 2, 4, 6,
       /* field data */ primitiveHeader(INT1), 1, primitiveHeader(INT1), 2, shortStrHeader(1), '3'),
       Array(VERSION, 3, 0, 1, 2, 3, 'a', 'b', 'c'))
+    check("""{"a": 1, "b": 2, "c": "3", "a": 4}""", Array(objectHeader(false, 1, 1),
+      /* size */ 3,
+      /* id list */ 0, 1, 2,
+      /* offset list */ 4, 0, 2, 6,
+      /* field data */ primitiveHeader(INT1), 2, shortStrHeader(1), '3', primitiveHeader(INT1), 4),
+      Array(VERSION, 3, 0, 1, 2, 3, 'a', 'b', 'c'),
+      allowDuplicateKeys = true)
     check("""{"z": 1, "y": 2, "x": "3"}""", Array(objectHeader(false, 1, 1),
       /* size */ 3,
       /* id list */ 2, 1, 0,
@@ -107,14 +116,15 @@ class VariantExpressionEvalUtilsSuite extends SparkFunSuite {
   }
 
   test("parseJson negative") {
-    def checkException(json: String, errorClass: String, parameters: Map[String, String]): Unit = {
+    def checkException(json: String, condition: String, parameters: Map[String, String]): Unit = {
       val try_parse_json_output = VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
-        failOnError = false)
+        allowDuplicateKeys = false, failOnError = false)
       checkError(
         exception = intercept[SparkThrowable] {
-          VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json))
+          VariantExpressionEvalUtils.parseJson(UTF8String.fromString(json),
+            allowDuplicateKeys = false)
         },
-        errorClass = errorClass,
+        condition = condition,
         parameters = parameters
       )
       assert(try_parse_json_output === null)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
index a758fa84f6fca..d9671d59d9a10 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/variant/VariantExpressionSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.catalyst.expressions.variant
 
-import java.time.{LocalDateTime, ZoneId, ZoneOffset}
+import java.time.{LocalDateTime, Period, ZoneId, ZoneOffset}
 
 import scala.collection.mutable
 import scala.reflect.runtime.universe.TypeTag
@@ -45,12 +45,14 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
   }
 
   test("to_json malformed") {
-    def check(value: Array[Byte], metadata: Array[Byte],
-              errorClass: String = "MALFORMED_VARIANT"): Unit = {
+    def check(
+        value: Array[Byte],
+        metadata: Array[Byte],
+        condition: String = "MALFORMED_VARIANT"): Unit = {
       checkErrorInExpression[SparkRuntimeException](
         ResolveTimeZone.resolveTimeZones(
           StructsToJson(Map.empty, Literal(new VariantVal(value, metadata)))),
-        errorClass
+        condition
       )
     }
 
@@ -419,6 +421,49 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     testVariantGet("null", "$", DateType, null)
   }
 
+  test("SPARK-49985: Disable support for interval types in the variant spec") {
+    val emptyMetadata = Array[Byte](VERSION, 0, 0)
+
+    val resolver = ResolveTimeZone
+    // int to variant year-month interval
+    assert(!resolver.resolveTimeZones(variantGet(2147483647.toString, "$",
+      YearMonthIntervalType(0, 1))).resolved)
+
+    // decimal to variant day-time interval
+    assert(!resolver.resolveTimeZones(variantGet("9223372036854.775807", "$",
+      DayTimeIntervalType(0, 3))).resolved)
+
+    // year-month interval to variant
+    assert(!resolver.resolveTimeZones(Cast(Cast(Literal(0), YearMonthIntervalType(0, 0)),
+      VariantType)).resolved)
+
+    // day-time interval to variant
+    assert(!resolver.resolveTimeZones(Cast(Cast(Literal(0L), DayTimeIntervalType(0, 0)),
+      VariantType)).resolved)
+
+    // Remove test when overriding type ID 19: old variant year-month interval type ID (19) to int
+    checkErrorInExpression[SparkRuntimeException](
+      VariantGet(
+        Literal(new VariantVal(Array(primitiveHeader(19), 0, 0, 0, 0, 0),
+          emptyMetadata)), Literal("$"), IntegerType, failOnError = true
+      ),
+      "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT",
+      Map("id" -> "19")
+    )
+
+    // Remove test when overriding type ID 20: old variant day-time interval type ID (20) to int
+    checkErrorInExpression[SparkRuntimeException](
+      VariantGet(
+        Literal(
+          new VariantVal(Array(primitiveHeader(20), 0, 0, 0, 0, 0, 0, 0, 0, 0),
+            emptyMetadata)
+        ), Literal("$"), IntegerType, failOnError = true
+      ),
+      "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT",
+      Map("id" -> "20")
+    )
+  }
+
   test("variant_get path extraction") {
     // Test case adapted from `JsonExpressionsSuite`.
     val json =
@@ -710,6 +755,14 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
       checkEvaluation(StructsToJson(Map.empty, input), expected)
     }
 
+    def checkToJsonFail(value: Array[Byte], id: Int): Unit = {
+      val input = Literal(new VariantVal(value, emptyMetadata))
+      checkErrorInExpression[SparkRuntimeException](
+        ResolveTimeZone.resolveTimeZones(StructsToJson(Map.empty, input)),
+        "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT", Map("id" -> id.toString)
+      )
+    }
+
     def checkCast(value: Array[Byte], dataType: DataType, expected: Any): Unit = {
       val input = Literal(new VariantVal(value, emptyMetadata))
       checkEvaluation(Cast(input, dataType, evalMode = EvalMode.ANSI), expected)
@@ -728,6 +781,12 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
         MICROS_PER_DAY + 8 * MICROS_PER_HOUR)
     }
 
+    checkToJsonFail(Array(primitiveHeader(25)), 25)
+    // Remove these test cases when overriding type IDs 19 and 20.
+    // SPARK-49985: Disable support for interval types in the variant spec.
+    checkToJsonFail(Array(primitiveHeader(19)), 19)
+    checkToJsonFail(Array(primitiveHeader(20)), 20)
+
     def littleEndianLong(value: Long): Array[Byte] =
       BigInt(value).toByteArray.reverse.padTo(8, 0.toByte)
 
@@ -820,12 +879,24 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     )
   }
 
-  test("cast to variant") {
-    def check[T : TypeTag](input: T, expectedJson: String): Unit = {
-      val cast = Cast(Literal.create(input), VariantType, evalMode = EvalMode.ANSI)
-      checkEvaluation(StructsToJson(Map.empty, cast), expectedJson)
+  test("cast to variant/to_variant_object") {
+    def check[T : TypeTag](input: T, expectedJson: String,
+                           toVariantObject: Boolean = false): Unit = {
+      val expr =
+        if (toVariantObject) ToVariantObject(Literal.create(input))
+        else Cast(Literal.create(input), VariantType, evalMode = EvalMode.ANSI)
+      checkEvaluation(StructsToJson(Map.empty, expr), expectedJson)
     }
 
+    def checkFailure[T: TypeTag](input: T, toVariantObject: Boolean = false): Unit = {
+      val expr =
+        if (toVariantObject) ToVariantObject(Literal.create(input))
+        else Cast(Literal.create(input), VariantType, evalMode = EvalMode.ANSI)
+      val resolvedExpr = ResolveTimeZone.resolveTimeZones(expr)
+      assert(!resolvedExpr.resolved)
+    }
+
+    // cast to variant - success cases
     check(null.asInstanceOf[String], null)
     // The following tests cover all allowed scalar types.
     for (input <- Seq[Any](false, true, 0.toByte, 1.toShort, 2, 3L, 4.0F, 5.0D)) {
@@ -839,6 +910,7 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     check("x" * 128, "\"" + ("x" * 128) + "\"")
     check(Array[Byte](1, 2, 3), "\"AQID\"")
     check(Literal(0, DateType), "\"1970-01-01\"")
+
     withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> "UTC") {
       check(Literal(0L, TimestampType), "\"1970-01-01 00:00:00+00:00\"")
       check(Literal(0L, TimestampNTZType), "\"1970-01-01 00:00:00\"")
@@ -849,17 +921,68 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     }
 
     check(Array(null, "a", "b", "c"), """[null,"a","b","c"]""")
-    check(Map("z" -> 1, "y" -> 2, "x" -> 3), """{"x":3,"y":2,"z":1}""")
     check(Array(parseJson("""{"a": 1,"b": [1, 2, 3]}"""),
       parseJson("""{"c": true,"d": {"e": "str"}}""")),
       """[{"a":1,"b":[1,2,3]},{"c":true,"d":{"e":"str"}}]""")
-    val struct = Literal.create(
+
+    // cast to variant - failure cases - struct and map types
+    val mp = Map("z" -> 1, "y" -> 2, "x" -> 3)
+    val arrayMp = Array(Map("z" -> 1, "y" -> 2, "x" -> 3))
+    val arrayArrayMp = Array(Array(Map("z" -> 1, "y" -> 2, "x" -> 3)))
+    checkFailure(mp)
+    checkFailure(arrayMp)
+    checkFailure(arrayArrayMp)
+    val struct = Literal.create(create_row(1),
+      StructType(Array(StructField("a", IntegerType))))
+    checkFailure(struct)
+    val arrayStruct = Literal.create(
+      Array(create_row(1)),
+      ArrayType(StructType(Array(StructField("a", IntegerType)))))
+    checkFailure(arrayStruct)
+
+    // to_variant_object - success cases - nested types
+    check(Array(1, 2, 3), "[1,2,3]", toVariantObject = true)
+    check(mp, """{"x":3,"y":2,"z":1}""", toVariantObject = true)
+    check(arrayMp, """[{"x":3,"y":2,"z":1}]""", toVariantObject = true)
+    check(arrayArrayMp, """[[{"x":3,"y":2,"z":1}]]""", toVariantObject = true)
+    check(struct, """{"a":1}""", toVariantObject = true)
+    check(arrayStruct, """[{"a":1}]""", toVariantObject = true)
+    val complexStruct = Literal.create(
       Row(
         Seq("123", "true", "f"),
         Map("a" -> "123", "b" -> "true", "c" -> "f"),
+        Map("a" -> Row(132)),
         Row(0)),
-      StructType.fromDDL("c ARRAY<STRING>,b MAP<STRING, STRING>,a STRUCT<i: INT>"))
-    check(struct, """{"a":{"i":0},"b":{"a":"123","b":"true","c":"f"},"c":["123","true","f"]}""")
+      StructType.fromDDL("c ARRAY<STRING>,b MAP<STRING, STRING>,d MAP<STRING, STRUCT<i: INT>>," +
+        "a STRUCT<i: INT>"))
+    check(complexStruct,
+      """{"a":{"i":0},"b":{"a":"123","b":"true","c":"f"},"c":["123","true","f"],""" +
+      """"d":{"a":{"i":132}}}""",
+      toVariantObject = true)
+
+    // to_variant_object - failure cases - non-nested types or map with non-string key
+    checkFailure(1, toVariantObject = true)
+    checkFailure(true, toVariantObject = true)
+    checkFailure(Literal.create(Literal.create(Period.ofMonths(0))), toVariantObject = true)
+    checkFailure(Map(1 -> 1), toVariantObject = true)
+  }
+
+  test("schema_of_variant - unknown type") {
+    val emptyMetadata = Array[Byte](VERSION, 0, 0)
+
+    def checkErrorInSchemaOf(value: Array[Byte], id: Int): Unit = {
+      val input = Literal(new VariantVal(value, emptyMetadata))
+      checkErrorInExpression[SparkRuntimeException](
+        ResolveTimeZone.resolveTimeZones(SchemaOfVariant(input).replacement),
+        "UNKNOWN_PRIMITIVE_TYPE_IN_VARIANT", Map("id" -> id.toString)
+      )
+    }
+    checkErrorInSchemaOf(Array(primitiveHeader(25)), 25)
+
+    // SPARK-49985: Disable support for interval types in the variant spec.
+    // Remove these test cases when overriding type ids 19 and 20
+    checkErrorInSchemaOf(Array(primitiveHeader(19)), 19)
+    checkErrorInSchemaOf(Array(primitiveHeader(20)), 20)
   }
 
   test("schema_of_variant - schema merge") {
@@ -883,7 +1006,7 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
 
     val results = mutable.HashMap.empty[(Literal, Literal), String]
     for (i <- inputs) {
-      val inputType = if (i.value == null) "VOID" else i.dataType.sql
+      val inputType = if (i.value == null) "VOID" else SchemaOfVariant.printSchema(i.dataType)
       results.put((nul, i), inputType)
       results.put((i, i), inputType)
     }
@@ -897,12 +1020,22 @@ class VariantExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     results.put((timestamp, timestampNtz), "TIMESTAMP")
     results.put((float, decimal), "DOUBLE")
     results.put((array1, array2), "ARRAY<DOUBLE>")
-    results.put((struct1, struct2), "STRUCT<a: VARIANT, b: BIGINT>")
+    results.put((struct1, struct2), "OBJECT<a: VARIANT, b: BIGINT>")
 
     for (i1 <- inputs) {
       for (i2 <- inputs) {
         val expected = results.getOrElse((i1, i2), results.getOrElse((i2, i1), "VARIANT"))
-        val array = CreateArray(Seq(Cast(i1, VariantType), Cast(i2, VariantType)))
+        val elem1 =
+          if (i1.dataType.isInstanceOf[ArrayType] || i1.dataType.isInstanceOf[MapType] ||
+            i1.dataType.isInstanceOf[StructType]) {
+            ToVariantObject(i1)
+          } else Cast(i1, VariantType)
+        val elem2 =
+          if (i2.dataType.isInstanceOf[ArrayType] || i2.dataType.isInstanceOf[MapType] ||
+            i2.dataType.isInstanceOf[StructType]) {
+            ToVariantObject(i2)
+          } else Cast(i2, VariantType)
+        val array = CreateArray(Seq(elem1, elem2))
         checkEvaluation(SchemaOfVariant(Cast(array, VariantType)).replacement, s"ARRAY<$expected>")
       }
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
index 9e4e7bee28ae1..252bcea76007a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/xml/XPathExpressionSuite.scala
@@ -185,6 +185,11 @@ class XPathExpressionSuite extends SparkFunSuite with ExpressionEvalHelper {
     testExpr("<a><b class='bb'>b1</b><b>b2</b><b>b3</b><c class='bb'>c1</c><c>c2</c></a>",
       "a/*[@class='bb']/text()", Seq("b1", "c1"))
 
+    checkEvaluation(
+      Coalesce(Seq(
+          GetArrayItem(XPathList(Literal("<a></a>"), Literal("a")), Literal(0)),
+          Literal("nul"))), "nul")
+
     testNullAndErrorBehavior(testExpr)
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
index 854a3e8f7a74d..776600bbdcf5d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ExtractPythonUDFFromJoinConditionSuite.scala
@@ -189,7 +189,7 @@ class ExtractPythonUDFFromJoinConditionSuite extends PlanTest {
       }
       checkError(
         exception = e,
-        errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
+        condition = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
         parameters = Map("joinType" -> joinType.sql)
       )
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
index 5027222be6b80..9424ecda0ed8b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/FilterPushdownSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.optimizer
 
+import java.util.UUID
+
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.dsl.expressions._
@@ -1229,9 +1231,10 @@ class FilterPushdownSuite extends PlanTest {
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark($"b", interval, relation)
+    val nodeId = UUID.randomUUID()
+    val originalQuery = EventTimeWatermark(nodeId, $"b", interval, relation)
       .where($"a" === 5 && $"b" === new java.sql.Timestamp(0) && $"c" === 5)
-    val correctAnswer = EventTimeWatermark(
+    val correctAnswer = EventTimeWatermark(nodeId,
       $"b", interval, relation.where($"a" === 5 && $"c" === 5))
       .where($"b" === new java.sql.Timestamp(0))
 
@@ -1244,9 +1247,10 @@ class FilterPushdownSuite extends PlanTest {
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark($"c", interval, relation)
+    val nodeId = UUID.randomUUID()
+    val originalQuery = EventTimeWatermark(nodeId, $"c", interval, relation)
       .where($"a" === 5 && $"b" === Rand(10) && $"c" === new java.sql.Timestamp(0))
-    val correctAnswer = EventTimeWatermark(
+    val correctAnswer = EventTimeWatermark(nodeId,
       $"c", interval, relation.where($"a" === 5))
       .where($"b" === Rand(10) && $"c" === new java.sql.Timestamp(0))
 
@@ -1260,9 +1264,10 @@ class FilterPushdownSuite extends PlanTest {
 
     // Verify that all conditions except the watermark touching condition are pushed down
     // by the optimizer and others are not.
-    val originalQuery = EventTimeWatermark($"c", interval, relation)
+    val nodeId = UUID.randomUUID()
+    val originalQuery = EventTimeWatermark(nodeId, $"c", interval, relation)
       .where($"a" === 5 && $"b" === 10)
-    val correctAnswer = EventTimeWatermark(
+    val correctAnswer = EventTimeWatermark(nodeId,
       $"c", interval, relation.where($"a" === 5 && $"b" === 10))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
@@ -1273,9 +1278,10 @@ class FilterPushdownSuite extends PlanTest {
     val interval = new CalendarInterval(2, 2, 2000L)
     val relation = LocalRelation(Seq($"a".timestamp, attrB, attrC), Nil, isStreaming = true)
 
-    val originalQuery = EventTimeWatermark($"a", interval, relation)
+    val nodeId = UUID.randomUUID()
+    val originalQuery = EventTimeWatermark(nodeId, $"a", interval, relation)
       .where($"a" === new java.sql.Timestamp(0) && $"b" === 10)
-    val correctAnswer = EventTimeWatermark(
+    val correctAnswer = EventTimeWatermark(nodeId,
       $"a", interval, relation.where($"b" === 10)).where($"a" === new java.sql.Timestamp(0))
 
     comparePlans(Optimize.execute(originalQuery.analyze), correctAnswer.analyze,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
index 454619a2133d9..21049ca3546dc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NormalizeFloatingPointNumbersSuite.scala
@@ -124,5 +124,13 @@ class NormalizeFloatingPointNumbersSuite extends PlanTest {
 
     comparePlans(doubleOptimized, correctAnswer)
   }
+
+  test("SPARK-49863: NormalizeFloatingNumbers preserves nullability for nested struct") {
+    val relation = LocalRelation($"a".double, $"b".string)
+    val nestedExpr = namedStruct("struct", namedStruct("double", relation.output.head))
+      .as("nestedExpr").toAttribute
+    val normalizedExpr = NormalizeFloatingNumbers.normalize(nestedExpr)
+    assert(nestedExpr.dataType == normalizedExpr.dataType)
+  }
 }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
index c185de4c05d88..eed06da609f8e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizeJsonExprsSuite.scala
@@ -307,4 +307,21 @@ class OptimizeJsonExprsSuite extends PlanTest with ExpressionEvalHelper {
       comparePlans(optimized, query.analyze)
     }
   }
+
+  test("SPARK-49743: prune unnecessary columns from GetArrayStructFields does not change schema") {
+    val options = Map.empty[String, String]
+    val schema = ArrayType(StructType.fromDDL("a int, b int"), containsNull = true)
+
+    val field = StructField("A", IntegerType) // Instead of "a", use "A" to test case sensitivity.
+    val query = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(schema, options, $"json"), field, 0, 2, true).as("a"))
+    val optimized = Optimizer.execute(query.analyze)
+
+    val prunedSchema = ArrayType(StructType.fromDDL("a int"), containsNull = true)
+    val expected = testRelation2
+      .select(GetArrayStructFields(
+        JsonToStructs(prunedSchema, options, $"json"), field, 0, 1, true).as("a")).analyze
+    comparePlans(optimized, expected)
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
index 36a3fa3f2743b..94ed80916eed3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerStructuralIntegrityCheckerSuite.scala
@@ -36,7 +36,7 @@ class OptimizerStructuralIntegrityCheckerSuite extends PlanTest {
       case Project(projectList, child) =>
         val newAttr = UnresolvedAttribute("unresolvedAttr")
         Project(projectList ++ Seq(newAttr), child)
-      case agg @ Aggregate(Nil, aggregateExpressions, child) =>
+      case agg @ Aggregate(Nil, aggregateExpressions, child, _) =>
         // Project cannot host AggregateExpression
         Project(aggregateExpressions, child)
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
index 48cdbbe7be539..70a2ae94109fc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/OptimizerSuite.scala
@@ -21,13 +21,13 @@ import org.apache.spark.SparkException
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.{Add, Alias, AttributeReference, IntegerLiteral, Literal, Multiply, NamedExpression, Remainder}
+import org.apache.spark.sql.catalyst.expressions.{Add, Alias, ArrayCompact, AttributeReference, CreateArray, CreateStruct, IntegerLiteral, Literal, MapFromEntries, Multiply, NamedExpression, Remainder}
 import org.apache.spark.sql.catalyst.expressions.aggregate.Sum
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LocalRelation, LogicalPlan, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.IntegerType
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, StructField, StructType}
 
 /**
  * A dummy optimizer rule for testing that decrements integer literals until 0.
@@ -313,4 +313,25 @@ class OptimizerSuite extends PlanTest {
       assert(message1.contains("not a valid aggregate expression"))
     }
   }
+
+  test("SPARK-49924: Keep containsNull after ArrayCompact replacement") {
+    val optimizer = new SimpleTestOptimizer() {
+      override def defaultBatches: Seq[Batch] =
+        Batch("test", fixedPoint,
+          ReplaceExpressions) :: Nil
+    }
+
+    val array1 = ArrayCompact(CreateArray(Literal(1) :: Literal.apply(null) :: Nil, false))
+    val plan1 = Project(Alias(array1, "arr")() :: Nil, OneRowRelation()).analyze
+    val optimized1 = optimizer.execute(plan1)
+     assert(optimized1.schema ===
+       StructType(StructField("arr", ArrayType(IntegerType, false), false) :: Nil))
+
+    val struct = CreateStruct(Literal(1) :: Literal(2) :: Nil)
+    val array2 = ArrayCompact(CreateArray(struct :: Literal.apply(null) :: Nil, false))
+    val plan2 = Project(Alias(MapFromEntries(array2), "map")() :: Nil, OneRowRelation()).analyze
+    val optimized2 = optimizer.execute(plan2)
+    assert(optimized2.schema ===
+      StructType(StructField("map", MapType(IntegerType, IntegerType, false), false) :: Nil))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
index 5aeb27f7ee6b4..451236162343b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PropagateEmptyRelationSuite.scala
@@ -27,12 +27,13 @@ import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, Filter, LocalRelation, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{IntegerType, MetadataBuilder}
 
-class PropagateEmptyRelationSuite extends PlanTest {
+class   PropagateEmptyRelationSuite extends PlanTest {
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("PropagateEmptyRelation", Once,
+      Batch("PropagateEmptyRelation", FixedPoint(1),
         CombineUnions,
         ReplaceDistinctWithAggregate,
         ReplaceExceptWithAntiJoin,
@@ -45,7 +46,7 @@ class PropagateEmptyRelationSuite extends PlanTest {
 
   object OptimizeWithoutPropagateEmptyRelation extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("OptimizeWithoutPropagateEmptyRelation", Once,
+      Batch("OptimizeWithoutPropagateEmptyRelation", FixedPoint(1),
         CombineUnions,
         ReplaceDistinctWithAggregate,
         ReplaceExceptWithAntiJoin,
@@ -216,10 +217,24 @@ class PropagateEmptyRelationSuite extends PlanTest {
       .where($"a" =!= 200)
       .orderBy($"a".asc)
 
-    val optimized = Optimize.execute(query.analyze)
-    val correctAnswer = LocalRelation(output, isStreaming = true)
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true") {
+      val optimized = Optimize.execute(query.analyze)
+      val correctAnswer = LocalRelation(output, isStreaming = true)
+      comparePlans(optimized, correctAnswer)
+    }
 
-    comparePlans(optimized, correctAnswer)
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "false") {
+      val optimized = Optimize.execute(query.analyze)
+      val correctAnswer = relation
+        .where(false)
+        .where($"a" > 1)
+        .select($"a")
+        .where($"a" =!= 200)
+        .orderBy($"a".asc).analyze
+      comparePlans(optimized, correctAnswer)
+    }
   }
 
   test("SPARK-47305 correctly tag isStreaming when propagating empty relation " +
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
index b81a57f4f8cd5..66ded338340f3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/PruneFiltersSuite.scala
@@ -174,4 +174,38 @@ class PruneFiltersSuite extends PlanTest {
       testRelation.where(!$"a".attr.in(1, 3, 5) && $"a".attr === 7 && $"b".attr === 1)
         .where(Rand(10) > 0.1 && Rand(10) < 1.1).analyze)
   }
+
+  test("Streaming relation is not lost under true filter") {
+    Seq("true", "false").foreach(x => withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> x) {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 > 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    })
+  }
+
+  test("Streaming relation is not lost under false filter") {
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true") {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 < 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    }
+
+    withSQLConf(
+        SQLConf.PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "false") {
+      val streamingRelation =
+        LocalRelation(Seq($"a".int, $"b".int, $"c".int), Nil, isStreaming = true)
+      val originalQuery = streamingRelation.where(10 < 5).select($"a").analyze
+      val optimized = Optimize.execute(originalQuery)
+      val correctAnswer = streamingRelation.where(10 < 5).select($"a").analyze
+      comparePlans(optimized, correctAnswer)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
index bf9f922978f6d..677a5d7928fc1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReassignLambdaVariableIDSuite.scala
@@ -58,7 +58,7 @@ class ReassignLambdaVariableIDSuite extends PlanTest {
     val query = testRelation.where(var1 && var2)
     checkError(
       exception = intercept[SparkException](Optimize.execute(query)),
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> "LambdaVariable IDs in a query should be all positive or negative."))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
index f4b2fce74dc49..69c303d4773b4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReorderAssociativeOperatorSuite.scala
@@ -20,15 +20,18 @@ package org.apache.spark.sql.catalyst.optimizer
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.aggregate.Count
 import org.apache.spark.sql.catalyst.plans.{Inner, PlanTest}
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.internal.SQLConf
 
 class ReorderAssociativeOperatorSuite extends PlanTest {
 
   object Optimize extends RuleExecutor[LogicalPlan] {
     val batches =
-      Batch("ReorderAssociativeOperator", Once,
+      Batch("ReorderAssociativeOperator", FixedPoint(10),
+        ConstantFolding,
         ReorderAssociativeOperator) :: Nil
   }
 
@@ -43,7 +46,7 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
           ($"b" + 1) * 2 * 3 * 4,
           $"a" + 1 + $"b" + 2 + $"c" + 3,
           $"a" + 1 + $"b" * 2 + $"c" + 3,
-          Rand(0) * 1 * 2 * 3 * 4)
+          Rand(0) * 1.0 * 2.0 * 3.0 * 4.0)
 
     val optimized = Optimize.execute(originalQuery.analyze)
 
@@ -55,7 +58,7 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
           (($"b" + 1) * 24).as("((((b + 1) * 2) * 3) * 4)"),
           ($"a" + $"b" + $"c" + 6).as("(((((a + 1) + b) + 2) + c) + 3)"),
           ($"a" + $"b" * 2 + $"c" + 4).as("((((a + 1) + (b * 2)) + c) + 3)"),
-          Rand(0) * 1 * 2 * 3 * 4)
+          Rand(0) * 1.0 * 2.0 * 3.0 * 4.0)
         .analyze
 
     comparePlans(optimized, correctAnswer)
@@ -74,4 +77,50 @@ class ReorderAssociativeOperatorSuite extends PlanTest {
 
     comparePlans(optimized, correctAnswer)
   }
+
+  test("SPARK-49915: Handle zero and one in associative operators") {
+    val originalQuery =
+      testRelation.select(
+        $"a" + 0,
+        Literal(-3) + $"a" + 3,
+        $"b" * 0 * 1 * 2 * 3,
+        Count($"b") * 0,
+        $"b" * 1 * 1,
+        ($"b" + 0) * 1 * 2 * 3 * 4,
+        $"a" + 0 + $"b" + 0 + $"c" + 0,
+        $"a" + 0 + $"b" * 1 + $"c" + 0
+      )
+
+    val optimized = Optimize.execute(originalQuery.analyze)
+
+    val correctAnswer =
+      testRelation
+        .select(
+          $"a".as("(a + 0)"),
+          $"a".as("((-3 + a) + 3)"),
+          ($"b" * 0).as("((((b * 0) * 1) * 2) * 3)"),
+          Literal(0L).as("(count(b) * 0)"),
+          $"b".as("((b * 1) * 1)"),
+          ($"b" * 24).as("(((((b + 0) * 1) * 2) * 3) * 4)"),
+          ($"a" + $"b" + $"c").as("""(((((a + 0) + b) + 0) + c) + 0)"""),
+          ($"a" + $"b" + $"c").as("((((a + 0) + (b * 1)) + c) + 0)")
+        ).analyze
+
+    comparePlans(optimized, correctAnswer)
+  }
+
+  test("SPARK-50380: conditional branches with error expression") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> true.toString) {
+      val originalQuery1 = testRelation.select(If($"a" === 1, 1L, Literal(1).div(0) + $"b")).analyze
+      val optimized1 = Optimize.execute(originalQuery1)
+      comparePlans(optimized1, originalQuery1)
+
+      val originalQuery2 = testRelation.select(
+        If($"a" === 1, 1, ($"b" + Literal(Int.MaxValue)) + 1).as("col")).analyze
+      val optimized2 = Optimize.execute(originalQuery2)
+      val correctAnswer2 = testRelation.select(
+        If($"a" === 1, 1, $"b" + (Literal(Int.MaxValue) + 1)).as("col")).analyze
+      comparePlans(optimized2, correctAnswer2)
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
index a50842a26b2ce..eaa651e62455e 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ReplaceNullWithFalseInPredicateSuite.scala
@@ -59,7 +59,7 @@ class ReplaceNullWithFalseInPredicateSuite extends PlanTest {
       exception = intercept[AnalysisException] {
         testFilter(originalCond = Literal(null, IntegerType), expectedCond = FalseLiteral)
       },
-      errorClass = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
+      condition = "DATATYPE_MISMATCH.FILTER_NOT_BOOLEAN",
       parameters = Map("sqlExpr" -> "\"NULL\"", "filter" -> "\"NULL\"", "type" -> "\"INT\"")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
index ac136dfb898ef..08dd4011f04d6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/RewriteDistinctAggregatesSuite.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.catalyst.optimizer
 
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.dsl.plans._
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.expressions.{Literal, Round}
 import org.apache.spark.sql.catalyst.expressions.aggregate.CollectSet
 import org.apache.spark.sql.catalyst.plans.PlanTest
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Expand, LocalRelation, LogicalPlan}
@@ -31,7 +31,7 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
   val testRelation2 = LocalRelation($"a".double, $"b".int, $"c".int, $"d".int, $"e".int)
 
   private def checkRewrite(rewrite: LogicalPlan): Unit = rewrite match {
-    case Aggregate(_, _, Aggregate(_, _, _: Expand)) =>
+    case Aggregate(_, _, Aggregate(_, _, _: Expand, _), _) =>
     case _ => fail(s"Plan is not rewritten:\n$rewrite")
   }
 
@@ -87,7 +87,7 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
 
     val rewrite = RewriteDistinctAggregates(input)
     rewrite match {
-      case Aggregate(_, _, LocalRelation(_, _, _)) =>
+      case Aggregate(_, _, _: LocalRelation, _) =>
       case _ => fail(s"Plan is not as expected:\n$rewrite")
     }
   }
@@ -104,9 +104,25 @@ class RewriteDistinctAggregatesSuite extends PlanTest {
 
     val rewrite = RewriteDistinctAggregates(input)
     rewrite match {
-      case Aggregate(_, _, Aggregate(_, _, e: Expand)) =>
+      case Aggregate(_, _, Aggregate(_, _, e: Expand, _), _) =>
         assert(e.projections.size == 3)
       case _ => fail(s"Plan is not rewritten:\n$rewrite")
     }
   }
+
+  test("SPARK-49261: Literals in grouping expressions shouldn't result in unresolved aggregation") {
+    val relation = testRelation2
+      .select(Literal(6).as("gb"), $"a", $"b", $"c", $"d")
+    val input = relation
+      .groupBy($"a", $"gb")(
+        countDistinct($"b").as("agg1"),
+        countDistinct($"d").as("agg2"),
+        Round(sum($"c").as("sum1"), 6)).analyze
+    val rewriteFold = FoldablePropagation(input)
+    // without the fix, the below produces an unresolved plan
+    val rewrite = RewriteDistinctAggregates(rewriteFold)
+    if (!rewrite.resolved) {
+      fail(s"Plan is not as expected:\n$rewrite")
+    }
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
index d570716c37673..5e871208698af 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DDLParserSuite.scala
@@ -20,14 +20,16 @@ package org.apache.spark.sql.catalyst.parser
 import java.util.Locale
 
 import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.expressions.{EqualTo, Hex, Literal}
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.IdentityColumnSpec
 import org.apache.spark.sql.connector.catalog.TableChange.ColumnPosition.{after, first}
 import org.apache.spark.sql.connector.expressions.{ApplyTransform, BucketTransform, ClusterByTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, Transform, YearsTransform}
 import org.apache.spark.sql.connector.expressions.LogicalExpressions.bucket
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{Decimal, IntegerType, LongType, StringType, StructType, TimestampType}
+import org.apache.spark.sql.types.{DataType, Decimal, IntegerType, LongType, StringType, StructType, TimestampType}
 import org.apache.spark.storage.StorageLevelMapper
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
@@ -39,7 +41,16 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   private def parseCompare(sql: String, expected: LogicalPlan): Unit = {
-    comparePlans(parsePlan(sql), expected, checkAnalysis = false)
+    // We don't care the write privileges in this suite.
+    val parsed = parsePlan(sql).transform {
+      case u: UnresolvedRelation => u.clearWritePrivileges
+      case i: InsertIntoStatement =>
+        i.table match {
+          case u: UnresolvedRelation => i.copy(table = u.clearWritePrivileges)
+          case _ => i
+        }
+    }
+    comparePlans(parsed, expected, checkAnalysis = false)
   }
 
   private def internalException(sqlText: String): SparkThrowable = {
@@ -70,7 +81,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql = "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING) USING parquet"
     checkError(
       exception = parseException(sql),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "':'", "hint" -> ""))
   }
 
@@ -371,7 +382,7 @@ class DDLParserSuite extends AnalysisTest {
         |Columns: p2 string""".stripMargin
     checkError(
       exception = parseException(createSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value1),
       context = ExpectedContext(
         fragment = createSql,
@@ -381,7 +392,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value1),
       context = ExpectedContext(
         fragment = replaceSql,
@@ -396,7 +407,7 @@ class DDLParserSuite extends AnalysisTest {
         |Columns: p2 string""".stripMargin
     checkError(
       exception = parseException(createSqlWithExpr),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value2),
       context = ExpectedContext(
         fragment = createSqlWithExpr,
@@ -406,7 +417,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSqlWithExpr = createSqlWithExpr.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSqlWithExpr),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value2),
       context = ExpectedContext(
         fragment = replaceSqlWithExpr,
@@ -473,7 +484,7 @@ class DDLParserSuite extends AnalysisTest {
       "which also specifies a serde"
     checkError(
       exception = parseException(createSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value),
       context = ExpectedContext(
         fragment = createSql,
@@ -483,7 +494,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value),
       context = ExpectedContext(
         fragment = replaceSql,
@@ -529,7 +540,7 @@ class DDLParserSuite extends AnalysisTest {
     val value = "ROW FORMAT DELIMITED is only compatible with 'textfile', not 'otherformat'"
     checkError(
       exception = parseException(createFailSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value),
       context = ExpectedContext(
         fragment = createFailSql,
@@ -539,7 +550,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceFailSql = createFailSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceFailSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> value),
       context = ExpectedContext(
         fragment = replaceFailSql,
@@ -601,7 +612,7 @@ class DDLParserSuite extends AnalysisTest {
         |STORED AS parquet""".stripMargin
     checkError(
       exception = parseException(createSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... USING ... STORED AS PARQUET "),
       context = ExpectedContext(
         fragment = createSql,
@@ -611,7 +622,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "REPLACE TABLE ... USING ... STORED AS PARQUET "),
       context = ExpectedContext(
         fragment = replaceSql,
@@ -626,7 +637,7 @@ class DDLParserSuite extends AnalysisTest {
         |ROW FORMAT SERDE 'customSerde'""".stripMargin
     checkError(
       exception = parseException(createSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... USING ... ROW FORMAT SERDE CUSTOMSERDE"),
       context = ExpectedContext(
         fragment = createSql,
@@ -636,7 +647,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "REPLACE TABLE ... USING ... ROW FORMAT SERDE CUSTOMSERDE"),
       context = ExpectedContext(
         fragment = replaceSql,
@@ -651,7 +662,7 @@ class DDLParserSuite extends AnalysisTest {
         |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','""".stripMargin
     checkError(
       exception = parseException(createSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... USING ... ROW FORMAT DELIMITED"),
       context = ExpectedContext(
         fragment = createSql,
@@ -661,7 +672,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "REPLACE TABLE ... USING ... ROW FORMAT DELIMITED"),
       context = ExpectedContext(
         fragment = replaceSql,
@@ -676,7 +687,7 @@ class DDLParserSuite extends AnalysisTest {
     val fragment = "STORED BY 'handler'"
     checkError(
       exception = parseException(createSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "STORED BY"),
       context = ExpectedContext(
         fragment = fragment,
@@ -686,7 +697,7 @@ class DDLParserSuite extends AnalysisTest {
     val replaceSql = createSql.replaceFirst("CREATE", "REPLACE")
     checkError(
       exception = parseException(replaceSql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "STORED BY"),
       context = ExpectedContext(
         fragment = fragment,
@@ -698,7 +709,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = "CREATE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(
         fragment = sql1,
@@ -708,7 +719,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql2 = "REPLACE TABLE my_tab (id bigint) SKEWED BY (id) ON (1,2,3)"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(
         fragment = sql2,
@@ -728,7 +739,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = createTableHeader("TBLPROPERTIES('test' = 'test2')")
     checkError(
       exception = parseException(sql1),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "TBLPROPERTIES"),
       context = ExpectedContext(
         fragment = sql1,
@@ -738,7 +749,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql2 = createTableHeader("LOCATION '/tmp/file'")
     checkError(
       exception = parseException(sql2),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "LOCATION"),
       context = ExpectedContext(
         fragment = sql2,
@@ -748,7 +759,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql3 = createTableHeader("COMMENT 'a table'")
     checkError(
       exception = parseException(sql3),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "COMMENT"),
       context = ExpectedContext(
         fragment = sql3,
@@ -758,7 +769,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql4 = createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
     checkError(
       exception = parseException(sql4),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "CLUSTERED BY"),
       context = ExpectedContext(
         fragment = sql4,
@@ -768,7 +779,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql5 = createTableHeader("PARTITIONED BY (b)")
     checkError(
       exception = parseException(sql5),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "PARTITIONED BY"),
       context = ExpectedContext(
         fragment = sql5,
@@ -778,7 +789,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql6 = createTableHeader("PARTITIONED BY (c int)")
     checkError(
       exception = parseException(sql6),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "PARTITIONED BY"),
       context = ExpectedContext(
         fragment = sql6,
@@ -788,7 +799,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql7 = createTableHeader("STORED AS parquet")
     checkError(
       exception = parseException(sql7),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "STORED AS/BY"),
       context = ExpectedContext(
         fragment = sql7,
@@ -798,7 +809,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql8 = createTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'")
     checkError(
       exception = parseException(sql8),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "STORED AS/BY"),
       context = ExpectedContext(
         fragment = sql8,
@@ -808,7 +819,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql9 = createTableHeader("ROW FORMAT SERDE 'serde'")
     checkError(
       exception = parseException(sql9),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "ROW FORMAT"),
       context = ExpectedContext(
         fragment = sql9,
@@ -818,7 +829,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql10 = replaceTableHeader("TBLPROPERTIES('test' = 'test2')")
     checkError(
       exception = parseException(sql10),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "TBLPROPERTIES"),
       context = ExpectedContext(
         fragment = sql10,
@@ -828,7 +839,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql11 = replaceTableHeader("LOCATION '/tmp/file'")
     checkError(
       exception = parseException(sql11),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "LOCATION"),
       context = ExpectedContext(
         fragment = sql11,
@@ -838,7 +849,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql12 = replaceTableHeader("COMMENT 'a table'")
     checkError(
       exception = parseException(sql12),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "COMMENT"),
       context = ExpectedContext(
         fragment = sql12,
@@ -848,7 +859,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql13 = replaceTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
     checkError(
       exception = parseException(sql13),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "CLUSTERED BY"),
       context = ExpectedContext(
         fragment = sql13,
@@ -858,7 +869,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql14 = replaceTableHeader("PARTITIONED BY (b)")
     checkError(
       exception = parseException(sql14),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "PARTITIONED BY"),
       context = ExpectedContext(
         fragment = sql14,
@@ -868,7 +879,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql15 = replaceTableHeader("PARTITIONED BY (c int)")
     checkError(
       exception = parseException(sql15),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "PARTITIONED BY"),
       context = ExpectedContext(
         fragment = sql15,
@@ -878,7 +889,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql16 = replaceTableHeader("STORED AS parquet")
     checkError(
       exception = parseException(sql16),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "STORED AS/BY"),
       context = ExpectedContext(
         fragment = sql16,
@@ -888,7 +899,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql17 = replaceTableHeader("STORED AS INPUTFORMAT 'in' OUTPUTFORMAT 'out'")
     checkError(
       exception = parseException(sql17),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "STORED AS/BY"),
       context = ExpectedContext(
         fragment = sql17,
@@ -898,7 +909,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql18 = replaceTableHeader("ROW FORMAT SERDE 'serde'")
     checkError(
       exception = parseException(sql18),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "ROW FORMAT"),
       context = ExpectedContext(
         fragment = sql18,
@@ -908,7 +919,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql19 = createTableHeader("CLUSTER BY (a)")
     checkError(
       exception = parseException(sql19),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "CLUSTER BY"),
       context = ExpectedContext(
         fragment = sql19,
@@ -918,7 +929,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql20 = replaceTableHeader("CLUSTER BY (a)")
     checkError(
       exception = parseException(sql20),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "CLUSTER BY"),
       context = ExpectedContext(
         fragment = sql20,
@@ -1222,7 +1233,7 @@ class DDLParserSuite extends AnalysisTest {
     val fragment = "bad_type"
     checkError(
       exception = parseException(sql),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"BAD_TYPE\""),
       context = ExpectedContext(
         fragment = fragment,
@@ -1273,19 +1284,19 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment'"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
 
     val sql2 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT AFTER d"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
 
     val sql3 = "ALTER TABLE table_name ALTER COLUMN a.b.c TYPE bigint COMMENT 'new comment' AFTER d"
     checkError(
       exception = parseException(sql3),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'COMMENT'", "hint" -> ""))
   }
 
@@ -1355,7 +1366,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql4 = "ALTER TABLE table_name CHANGE COLUMN a.b.c new_name INT"
     checkError(
       exception = parseException(sql4),
-      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      condition = "_LEGACY_ERROR_TEMP_0034",
       parameters = Map(
         "operation" -> "Renaming column",
         "command" -> "ALTER COLUMN",
@@ -1369,7 +1380,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql5 = "ALTER TABLE table_name PARTITION (a='1') CHANGE COLUMN a.b.c c INT"
     checkError(
       exception = parseException(sql5),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE ... PARTITION ... CHANGE COLUMN"),
       context = ExpectedContext(
         fragment = sql5,
@@ -1416,7 +1427,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql5 = "ALTER TABLE table_name PARTITION (a='1') REPLACE COLUMNS (x string)"
     checkError(
       exception = parseException(sql5),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE ... PARTITION ... REPLACE COLUMNS"),
       context = ExpectedContext(
         fragment = sql5,
@@ -1426,7 +1437,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql6 = "ALTER TABLE table_name REPLACE COLUMNS (x string NOT NULL)"
     checkError(
       exception = parseException(sql6),
-      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      condition = "_LEGACY_ERROR_TEMP_0034",
       parameters = Map("operation" -> "NOT NULL", "command" -> "REPLACE COLUMNS", "msg" -> ""),
       context = ExpectedContext(
         fragment = sql6,
@@ -1436,7 +1447,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql7 = "ALTER TABLE table_name REPLACE COLUMNS (x string FIRST)"
     checkError(
       exception = parseException(sql7),
-      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      condition = "_LEGACY_ERROR_TEMP_0034",
       parameters = Map(
         "operation" -> "Column position",
         "command" -> "REPLACE COLUMNS",
@@ -1449,7 +1460,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql8 = "ALTER TABLE table_name REPLACE COLUMNS (a.b.c string)"
     checkError(
       exception = parseException(sql8),
-      errorClass = "_LEGACY_ERROR_TEMP_0034",
+      condition = "_LEGACY_ERROR_TEMP_0034",
       parameters = Map(
         "operation" -> "Replacing with a nested column",
         "command" -> "REPLACE COLUMNS",
@@ -1462,7 +1473,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql9 = "ALTER TABLE table_name REPLACE COLUMNS (a STRING COMMENT 'x' COMMENT 'y')"
     checkError(
       exception = parseException(sql9),
-      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "type" -> "REPLACE",
         "columnName" -> "a",
@@ -1637,7 +1648,7 @@ class DDLParserSuite extends AnalysisTest {
         |PARTITION (p1 = 3, p2) IF NOT EXISTS""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "IF NOT EXISTS with dynamic partitions: p2"),
       context = ExpectedContext(
         fragment = fragment,
@@ -1655,7 +1666,7 @@ class DDLParserSuite extends AnalysisTest {
         |PARTITION (p1 = 3) IF NOT EXISTS""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "INSERT INTO ... IF NOT EXISTS"),
       context = ExpectedContext(
         fragment = fragment,
@@ -1695,7 +1706,7 @@ class DDLParserSuite extends AnalysisTest {
     checkError(
       exception = parseException(
         "INSERT INTO TABLE t1 BY NAME (c1,c2) SELECT * FROM tmp_view"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map(
         "error" -> "'c1'",
         "hint" -> "")
@@ -1720,7 +1731,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql = "DELETE FROM testcat.ns1.ns2.tbl AS t(a,b,c,d) WHERE d = 2"
     checkError(
       exception = parseException(sql),
-      errorClass = "COLUMN_ALIASES_NOT_ALLOWED",
+      condition = "COLUMN_ALIASES_NOT_ALLOWED",
       parameters = Map("op" -> "DELETE"),
       context = ExpectedContext(
         fragment = sql,
@@ -1762,7 +1773,7 @@ class DDLParserSuite extends AnalysisTest {
         |WHERE d=2""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "COLUMN_ALIASES_NOT_ALLOWED",
+      condition = "COLUMN_ALIASES_NOT_ALLOWED",
       parameters = Map("op" -> "UPDATE"),
       context = ExpectedContext(
         fragment = sql,
@@ -1922,7 +1933,7 @@ class DDLParserSuite extends AnalysisTest {
       """.stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'*'", "hint" -> ""))
   }
 
@@ -1968,7 +1979,7 @@ class DDLParserSuite extends AnalysisTest {
           .stripMargin
         checkError(
           exception = parseException(sql),
-          errorClass = "COLUMN_ALIASES_NOT_ALLOWED",
+          condition = "COLUMN_ALIASES_NOT_ALLOWED",
           parameters = Map("op" -> "MERGE"),
           context = ExpectedContext(
             fragment = sql,
@@ -2047,7 +2058,7 @@ class DDLParserSuite extends AnalysisTest {
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION",
+      condition = "NON_LAST_MATCHED_CLAUSE_OMIT_CONDITION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
@@ -2070,7 +2081,7 @@ class DDLParserSuite extends AnalysisTest {
         |THEN INSERT (target.col1, target.col2) values (source.col1, source.col2)""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION",
+      condition = "NON_LAST_NOT_MATCHED_BY_TARGET_CLAUSE_OMIT_CONDITION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
@@ -2094,7 +2105,7 @@ class DDLParserSuite extends AnalysisTest {
         |WHEN NOT MATCHED BY SOURCE THEN DELETE""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION",
+      condition = "NON_LAST_NOT_MATCHED_BY_SOURCE_CLAUSE_OMIT_CONDITION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
@@ -2109,7 +2120,7 @@ class DDLParserSuite extends AnalysisTest {
         |ON target.col1 = source.col1""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0008",
+      condition = "MERGE_WITHOUT_WHEN",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
@@ -2200,7 +2211,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = "analyze table a.b.c compute statistics xxxx"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
+      condition = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
       parameters = Map("ctx" -> "XXXX"),
       context = ExpectedContext(
         fragment = sql1,
@@ -2210,7 +2221,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql2 = "analyze table a.b.c partition (a) compute statistics xxxx"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
+      condition = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
       parameters = Map("ctx" -> "XXXX"),
       context = ExpectedContext(
         fragment = sql2,
@@ -2229,7 +2240,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql = "ANALYZE TABLES IN a.b.c COMPUTE STATISTICS xxxx"
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
+      condition = "INVALID_SQL_SYNTAX.ANALYZE_TABLE_UNEXPECTED_NOSCAN",
       parameters = Map("ctx" -> "XXXX"),
       context = ExpectedContext(
         fragment = sql,
@@ -2241,7 +2252,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR COLUMNS"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
 
     comparePlans(
@@ -2278,13 +2289,13 @@ class DDLParserSuite extends AnalysisTest {
     val sql2 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL COLUMNS key, value"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'key'", "hint" -> "")) // expecting {<EOF>, ';'}
 
     val sql3 = "ANALYZE TABLE a.b.c COMPUTE STATISTICS FOR ALL"
     checkError(
       exception = parseException(sql3),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ": missing 'COLUMNS'"))
   }
 
@@ -2361,7 +2372,7 @@ class DDLParserSuite extends AnalysisTest {
     val sql = "CACHE TABLE a.b.c AS SELECT * FROM testData"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0037",
+      condition = "_LEGACY_ERROR_TEMP_0037",
       parameters = Map("quoted" -> "a.b"),
       context = ExpectedContext(
         fragment = sql,
@@ -2373,7 +2384,7 @@ class DDLParserSuite extends AnalysisTest {
     val createTableSql = "create table test_table using my_data_source options (password)"
     checkError(
       exception = parseException(createTableSql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "A value must be specified for the key: password."),
       context = ExpectedContext(
         fragment = createTableSql,
@@ -2397,29 +2408,6 @@ class DDLParserSuite extends AnalysisTest {
       RefreshTable(UnresolvedTableOrView(Seq("a", "b", "c"), "REFRESH TABLE", true)))
   }
 
-  test("show columns") {
-    val sql1 = "SHOW COLUMNS FROM t1"
-    val sql2 = "SHOW COLUMNS IN db1.t1"
-    val sql3 = "SHOW COLUMNS FROM t1 IN db1"
-    val sql4 = "SHOW COLUMNS FROM db1.t1 IN db1"
-
-    val parsed1 = parsePlan(sql1)
-    val expected1 = ShowColumns(UnresolvedTableOrView(Seq("t1"), "SHOW COLUMNS", true), None)
-    val parsed2 = parsePlan(sql2)
-    val expected2 = ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS", true), None)
-    val parsed3 = parsePlan(sql3)
-    val expected3 =
-      ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS", true), Some(Seq("db1")))
-    val parsed4 = parsePlan(sql4)
-    val expected4 =
-      ShowColumns(UnresolvedTableOrView(Seq("db1", "t1"), "SHOW COLUMNS", true), Some(Seq("db1")))
-
-    comparePlans(parsed1, expected1)
-    comparePlans(parsed2, expected2)
-    comparePlans(parsed3, expected3)
-    comparePlans(parsed4, expected4)
-  }
-
   test("alter view: add partition (not supported)") {
     val sql =
       """ALTER VIEW a.b.c ADD IF NOT EXISTS PARTITION
@@ -2427,7 +2415,7 @@ class DDLParserSuite extends AnalysisTest {
         |(dt='2009-09-09', country='uk')""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER VIEW ... ADD PARTITION"),
       context = ExpectedContext(
         fragment = sql,
@@ -2607,12 +2595,16 @@ class DDLParserSuite extends AnalysisTest {
   }
 
   test("SPARK-33474: Support typed literals as partition spec values") {
-    def insertPartitionPlan(part: String): InsertIntoStatement = {
+    def insertPartitionPlan(part: String, optimizeInsertIntoCmds: Boolean): InsertIntoStatement = {
       InsertIntoStatement(
         UnresolvedRelation(Seq("t")),
         Map("part" -> Some(part)),
         Seq.empty[String],
-        UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a")))),
+        if (optimizeInsertIntoCmds) {
+          ResolveInlineTables(UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a")))))
+        } else {
+          UnresolvedInlineTable(Seq("col1"), Seq(Seq(Literal("a"))))
+        },
         overwrite = false, ifPartitionNotExists = false)
     }
     val binaryStr = "Spark SQL"
@@ -2627,15 +2619,27 @@ class DDLParserSuite extends AnalysisTest {
     val timestampTypeSql = s"INSERT INTO t PARTITION(part = timestamp'$timestamp') VALUES('a')"
     val binaryTypeSql = s"INSERT INTO t PARTITION(part = X'$binaryHexStr') VALUES('a')"
 
-    comparePlans(parsePlan(dateTypeSql), insertPartitionPlan("2019-01-02"))
-    withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
-      comparePlans(parsePlan(intervalTypeSql), insertPartitionPlan(interval))
+    for (optimizeInsertIntoValues <- Seq(true, false)) {
+      withSQLConf(
+        SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+          optimizeInsertIntoValues.toString) {
+        parseCompare(dateTypeSql, insertPartitionPlan(
+          "2019-01-02", optimizeInsertIntoValues))
+        withSQLConf(SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
+          parseCompare(intervalTypeSql, insertPartitionPlan(
+            interval, optimizeInsertIntoValues))
+        }
+        parseCompare(ymIntervalTypeSql, insertPartitionPlan(
+          "INTERVAL '1-2' YEAR TO MONTH", optimizeInsertIntoValues))
+        parseCompare(dtIntervalTypeSql,
+          insertPartitionPlan(
+            "INTERVAL '1 02:03:04.128462' DAY TO SECOND", optimizeInsertIntoValues))
+        parseCompare(timestampTypeSql, insertPartitionPlan(
+          timestamp, optimizeInsertIntoValues))
+        parseCompare(binaryTypeSql, insertPartitionPlan(
+          binaryStr, optimizeInsertIntoValues))
+      }
     }
-    comparePlans(parsePlan(ymIntervalTypeSql), insertPartitionPlan("INTERVAL '1-2' YEAR TO MONTH"))
-    comparePlans(parsePlan(dtIntervalTypeSql),
-      insertPartitionPlan("INTERVAL '1 02:03:04.128462' DAY TO SECOND"))
-    comparePlans(parsePlan(timestampTypeSql), insertPartitionPlan(timestamp))
-    comparePlans(parsePlan(binaryTypeSql), insertPartitionPlan(binaryStr))
   }
 
   test("SPARK-38335: Implement parser support for DEFAULT values for columns in tables") {
@@ -2696,14 +2700,14 @@ class DDLParserSuite extends AnalysisTest {
     val sql1 = "ALTER TABLE t1 ALTER COLUMN a.b.c SET DEFAULT "
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
     // It is not possible to both SET DEFAULT and DROP DEFAULT at the same time.
     // This results in a parsing error.
     val sql2 = "ALTER TABLE t1 ALTER COLUMN a.b.c DROP DEFAULT SET DEFAULT 42"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'SET'", "hint" -> ""))
 
     comparePlans(
@@ -2722,7 +2726,7 @@ class DDLParserSuite extends AnalysisTest {
       val fragment = "b STRING NOT NULL DEFAULT \"abc\""
       checkError(
         exception = parseException(sql),
-        errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
+        condition = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
         parameters = Map.empty,
         context = ExpectedContext(
           fragment = fragment,
@@ -2732,12 +2736,12 @@ class DDLParserSuite extends AnalysisTest {
 
     // In each of the following cases, the DEFAULT reference parses as an unresolved attribute
     // reference. We can handle these cases after the parsing stage, at later phases of analysis.
-    comparePlans(parsePlan("VALUES (1, 2, DEFAULT) AS val"),
+    parseCompare("VALUES (1, 2, DEFAULT) AS val",
       SubqueryAlias("val",
         UnresolvedInlineTable(Seq("col1", "col2", "col3"), Seq(Seq(Literal(1), Literal(2),
           UnresolvedAttribute("DEFAULT"))))))
-    comparePlans(parsePlan(
-      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)"),
+    parseCompare(
+      "INSERT INTO t PARTITION(part = date'2019-01-02') VALUES ('a', DEFAULT)",
       InsertIntoStatement(
         UnresolvedRelation(Seq("t")),
         Map("part" -> Some("2019-01-02")),
@@ -2782,7 +2786,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING NOT NULL DEFAULT \"abc\" NOT NULL)")),
-      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "NOT NULL"),
@@ -2792,7 +2796,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\")")),
-      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "DEFAULT"),
@@ -2802,7 +2806,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan(
           "CREATE TABLE my_tab(a INT, b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\")")),
-      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map(
         "columnName" -> "b",
         "optionName" -> "COMMENT"),
@@ -2834,7 +2838,7 @@ class DDLParserSuite extends AnalysisTest {
     checkError(
       exception = parseException("CREATE TABLE my_tab(a INT, " +
           "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)) USING PARQUET"),
-      errorClass = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "CREATE_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("columnName" -> "b", "optionName" -> "GENERATED ALWAYS AS"),
       context = ExpectedContext(
         fragment = "b INT GENERATED ALWAYS AS (a + 1) GENERATED ALWAYS AS (a + 2)",
@@ -2846,18 +2850,225 @@ class DDLParserSuite extends AnalysisTest {
     checkError(
       exception = parseException(
         "CREATE TABLE my_tab(a INT, b INT GENERATED ALWAYS AS ()) USING PARQUET"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "')'", "hint" -> "")
     )
     // No parenthesis
     checkError(
       exception = parseException(
         "CREATE TABLE my_tab(a INT, b INT GENERATED ALWAYS AS a + 1) USING PARQUET"),
-      errorClass = "PARSE_SYNTAX_ERROR",
-      parameters = Map("error" -> "'a'", "hint" -> ": missing '('")
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'a'", "hint" -> "")
+    )
+  }
+
+  test("SPARK-48824: implement parser support for " +
+    "GENERATED ALWAYS/BY DEFAULT AS IDENTITY columns in tables ") {
+    def parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr: String,
+        identityColumnDefStr: String,
+        identityColumnSpecStr: String,
+        expectedDataType: DataType,
+        expectedStart: Long,
+        expectedStep: Long,
+        expectedAllowExplicitInsert: Boolean): Unit = {
+      val columnsWithIdentitySpec = Seq(
+        ColumnDefinition(
+          name = "id",
+          dataType = expectedDataType,
+          nullable = true,
+          identityColumnSpec = Some(
+            new IdentityColumnSpec(
+              expectedStart,
+              expectedStep,
+              expectedAllowExplicitInsert
+            )
+          )
+        ),
+        ColumnDefinition("val", IntegerType)
+      )
+      comparePlans(
+        parsePlan(
+          s"CREATE TABLE my_tab(id $identityColumnDataTypeStr GENERATED $identityColumnDefStr" +
+            s" AS IDENTITY $identityColumnSpecStr, val INT) USING parquet"
+        ),
+        CreateTable(
+          UnresolvedIdentifier(Seq("my_tab")),
+          columnsWithIdentitySpec,
+          Seq.empty[Transform],
+          UnresolvedTableSpec(
+            Map.empty[String, String],
+            Some("parquet"),
+            OptionList(Seq.empty),
+            None,
+            None,
+            None,
+            false
+          ),
+          false
+        )
+      )
+
+      comparePlans(
+        parsePlan(
+          s"REPLACE TABLE my_tab(id $identityColumnDataTypeStr GENERATED $identityColumnDefStr" +
+            s" AS IDENTITY $identityColumnSpecStr, val INT) USING parquet"
+        ),
+        ReplaceTable(
+          UnresolvedIdentifier(Seq("my_tab")),
+          columnsWithIdentitySpec,
+          Seq.empty[Transform],
+          UnresolvedTableSpec(
+            Map.empty[String, String],
+            Some("parquet"),
+            OptionList(Seq.empty),
+            None,
+            None,
+            None,
+            false
+          ),
+          false
+        )
+      )
+    }
+    for {
+      identityColumnDefStr <- Seq("BY DEFAULT", "ALWAYS")
+      identityColumnDataTypeStr <- Seq("BIGINT", "INT")
+    } {
+      val expectedAllowExplicitInsert = identityColumnDefStr == "BY DEFAULT"
+      val expectedDataType = identityColumnDataTypeStr match {
+        case "BIGINT" => LongType
+        case "INT" => IntegerType
+      }
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(START WITH 2 INCREMENT BY 2)",
+        expectedDataType,
+        expectedStart = 2,
+        expectedStep = 2,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(START WITH -2 INCREMENT BY -2)",
+        expectedDataType,
+        expectedStart = -2,
+        expectedStep = -2,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(START WITH 2)",
+        expectedDataType,
+        expectedStart = 2,
+        expectedStep = 1,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(START WITH -2)",
+        expectedDataType,
+        expectedStart = -2,
+        expectedStep = 1,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(INCREMENT BY 2)",
+        expectedDataType,
+        expectedStart = 1,
+        expectedStep = 2,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "(INCREMENT BY -2)",
+        expectedDataType,
+        expectedStart = 1,
+        expectedStep = -2,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "()",
+        expectedDataType,
+        expectedStart = 1,
+        expectedStep = 1,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+      parseAndCompareIdentityColumnPlan(
+        identityColumnDataTypeStr,
+        identityColumnDefStr,
+        "",
+        expectedDataType,
+        expectedStart = 1,
+        expectedStep = 1,
+        expectedAllowExplicitInsert = expectedAllowExplicitInsert)
+    }
+  }
+
+  test("SPARK-48824: Column cannot have both a generation expression and an identity column spec") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        parsePlan(s"CREATE TABLE testcat.my_tab(id BIGINT GENERATED ALWAYS AS 1" +
+          s" GENERATED ALWAYS AS IDENTITY, val INT) USING foo")
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'1'", "hint" -> "")
+    )
+  }
+
+  test("SPARK-48824: Identity column step must not be zero") {
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(
+          s"CREATE TABLE testcat.my_tab" +
+            s"(id BIGINT GENERATED ALWAYS AS IDENTITY(INCREMENT BY 0), val INT) USING foo"
+        )
+      },
+      condition = "IDENTITY_COLUMNS_ILLEGAL_STEP",
+      parameters = Map.empty,
+      context = ExpectedContext(
+        fragment = "id BIGINT GENERATED ALWAYS AS IDENTITY(INCREMENT BY 0)",
+        start = 28,
+        stop = 81)
     )
   }
 
+  test("SPARK-48824: Identity column datatype must be long or integer") {
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(
+          s"CREATE TABLE testcat.my_tab(id FLOAT GENERATED ALWAYS AS IDENTITY(), val INT) USING foo"
+        )
+      },
+      condition = "IDENTITY_COLUMNS_UNSUPPORTED_DATA_TYPE",
+      parameters = Map("dataType" -> "FloatType"),
+      context =
+        ExpectedContext(fragment = "id FLOAT GENERATED ALWAYS AS IDENTITY()", start = 28, stop = 66)
+    )
+  }
+
+  test("SPARK-48824: Identity column sequence generator option cannot be duplicated") {
+    val identityColumnSpecStrs = Seq(
+      "(START WITH 0 START WITH 1)",
+      "(INCREMENT BY 1 INCREMENT BY 2)",
+      "(START WITH 0 INCREMENT BY 1 START WITH 1)",
+      "(INCREMENT BY 1 START WITH 0 INCREMENT BY 2)"
+    )
+    for {
+      identitySpecStr <- identityColumnSpecStrs
+    } {
+      val exception = intercept[ParseException] {
+        parsePlan(
+          s"CREATE TABLE testcat.my_tab" +
+            s"(id BIGINT GENERATED ALWAYS AS IDENTITY $identitySpecStr, val INT) USING foo"
+        )
+      }
+      assert(exception.getCondition === "IDENTITY_COLUMNS_DUPLICATED_SEQUENCE_GENERATOR_OPTION")
+    }
+  }
+
   test("SPARK-42681: Relax ordering constraint for ALTER TABLE ADD COLUMN options") {
     // Positive test cases to verify that column definition options could be applied in any order.
     val expectedPlan = AddColumns(
@@ -2885,7 +3096,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING NOT NULL DEFAULT \"abc\" NOT NULL")
       ),
-      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "NOT NULL"),
       context = ExpectedContext(
         fragment = "b STRING NOT NULL DEFAULT \"abc\" NOT NULL",
@@ -2897,7 +3108,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\"")
       ),
-      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "DEFAULT"),
       context = ExpectedContext(
         fragment = "b STRING DEFAULT \"123\" NOT NULL DEFAULT \"abc\"",
@@ -2909,7 +3120,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\"")
       ),
-      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "COMMENT"),
       context = ExpectedContext(
         fragment = "b STRING COMMENT \"abc\" NOT NULL COMMENT \"abc\"",
@@ -2921,7 +3132,7 @@ class DDLParserSuite extends AnalysisTest {
       exception = intercept[ParseException](
         parsePlan("ALTER TABLE my_tab ADD COLUMN b STRING FIRST COMMENT \"abc\" AFTER y")
       ),
-      errorClass = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
+      condition = "ALTER_TABLE_COLUMN_DESCRIPTOR_DUPLICATE",
       parameters = Map("type" -> "ADD", "columnName" -> "b", "optionName" -> "FIRST|AFTER"),
       context =
         ExpectedContext(fragment = "b STRING FIRST COMMENT \"abc\" AFTER y", start = 30, stop = 65)
@@ -2933,7 +3144,7 @@ class DDLParserSuite extends AnalysisTest {
       "USING parquet CLUSTERED BY (a) INTO 2 BUCKETS CLUSTER BY (a)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED",
+      condition = "SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED",
       parameters = Map.empty,
       context = ExpectedContext(fragment = sql1, start = 0, stop = 96)
     )
@@ -2944,7 +3155,7 @@ class DDLParserSuite extends AnalysisTest {
       "USING parquet CLUSTERED BY (a) INTO 2 BUCKETS CLUSTER BY (a)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED",
+      condition = "SPECIFY_CLUSTER_BY_WITH_BUCKETING_IS_NOT_ALLOWED",
       parameters = Map.empty,
       context = ExpectedContext(fragment = sql1, start = 0, stop = 97)
     )
@@ -2955,7 +3166,7 @@ class DDLParserSuite extends AnalysisTest {
       "USING parquet CLUSTER BY (a) PARTITIONED BY (a)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED",
+      condition = "SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED",
       parameters = Map.empty,
       context = ExpectedContext(fragment = sql1, start = 0, stop = 83)
     )
@@ -2966,7 +3177,7 @@ class DDLParserSuite extends AnalysisTest {
       "USING parquet CLUSTER BY (a) PARTITIONED BY (a)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED",
+      condition = "SPECIFY_CLUSTER_BY_WITH_PARTITIONED_BY_IS_NOT_ALLOWED",
       parameters = Map.empty,
       context = ExpectedContext(fragment = sql1, start = 0, stop = 84)
     )
@@ -2984,7 +3195,7 @@ class DDLParserSuite extends AnalysisTest {
 
     checkError(
       exception = internalException(insertDirSql),
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "INSERT OVERWRITE DIRECTORY is not supported."))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
index f11e920e4c07d..c416d21cfd4b0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/DataTypeParserSuite.scala
@@ -138,12 +138,12 @@ class DataTypeParserSuite extends SparkFunSuite with SQLHelper {
   test("Do not print empty parentheses for no params") {
     checkError(
       exception = intercept("unknown"),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"UNKNOWN\"")
     )
     checkError(
       exception = intercept("unknown(1,2,3)"),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"UNKNOWN(1,2,3)\"")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
index cd1556a2e7916..603d5d779769d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ErrorParserSuite.scala
@@ -34,7 +34,7 @@ class ErrorParserSuite extends AnalysisTest {
   test("semantic errors") {
     checkError(
       exception = parseException("select *\nfrom r\norder by q\ncluster by q"),
-      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      condition = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
       parameters = Map.empty,
       context = ExpectedContext(fragment = "order by q\ncluster by q", start = 16, stop = 38))
   }
@@ -43,42 +43,42 @@ class ErrorParserSuite extends AnalysisTest {
     // scalastyle:off
     checkError(
       exception = parseException("USE \u0196pfel"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "\u0196pfel"))
     checkError(
       exception = parseException("USE \u88681"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "\u88681"))
     // scalastyle:on
     checkError(
       exception = parseException("USE https://www.spa.rk/bucket/pa-th.json?=&#%"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "https://www.spa.rk/bucket/pa-th.json?=&#%"))
   }
 
   test("hyphen in identifier - DDL tests") {
     checkError(
       exception = parseException("USE test-test"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-test"))
     checkError(
       exception = parseException("SET CATALOG test-test"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-test"))
     checkError(
       exception = parseException("CREATE DATABASE IF NOT EXISTS my-database"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "my-database"))
     checkError(
       exception = parseException(
       """
         |ALTER DATABASE my-database
         |SET DBPROPERTIES ('p1'='v1')""".stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "my-database"))
     checkError(
       exception = parseException("DROP DATABASE my-database"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "my-database"))
     checkError(
       exception = parseException(
@@ -87,7 +87,7 @@ class ErrorParserSuite extends AnalysisTest {
           |CHANGE COLUMN
           |test-col TYPE BIGINT
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-col"))
     checkError(
       exception = parseException(
@@ -96,23 +96,23 @@ class ErrorParserSuite extends AnalysisTest {
           |DROP COLUMN
           |test-col, test
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-col"))
     checkError(
       exception = parseException("CREATE TABLE test (attri-bute INT)"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "attri-bute"))
     checkError(
       exception = parseException("CREATE FUNCTION test-func as org.test.func"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-func"))
     checkError(
       exception = parseException("DROP FUNCTION test-func as org.test.func"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-func"))
     checkError(
       exception = parseException("SHOW FUNCTIONS LIKE test-func"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-func"))
     checkError(
       exception = parseException(
@@ -123,7 +123,7 @@ class ErrorParserSuite extends AnalysisTest {
           |LOCATION '/user/external/page_view'
           |TBLPROPERTIES ('p1'='v1', 'p2'='v2')
           |AS SELECT * FROM src""".stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "page-view"))
     checkError(
       exception = parseException(
@@ -131,35 +131,31 @@ class ErrorParserSuite extends AnalysisTest {
           |CREATE TABLE IF NOT EXISTS tab
           |USING test-provider
           |AS SELECT * FROM src""".stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-provider"))
     checkError(
       exception = parseException("SHOW TABLES IN hyphen-database"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "hyphen-database"))
     checkError(
       exception = parseException("SHOW TABLE EXTENDED IN hyphen-db LIKE \"str\""),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "hyphen-db"))
-    checkError(
-      exception = parseException("SHOW COLUMNS IN t FROM test-db"),
-      errorClass = "INVALID_IDENTIFIER",
-      parameters = Map("ident" -> "test-db"))
     checkError(
       exception = parseException("DESC SCHEMA EXTENDED test-db"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-db"))
     checkError(
       exception = parseException("ANALYZE TABLE test-table PARTITION (part1)"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-table"))
     checkError(
       exception = parseException("CREATE TABLE t(c1 struct<test-test INT, c2 INT>)"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-test"))
     checkError(
       exception = parseException("LOAD DATA INPATH \"path\" INTO TABLE my-tab"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "my-tab"))
   }
 
@@ -167,28 +163,28 @@ class ErrorParserSuite extends AnalysisTest {
     // dml tests
     checkError(
       exception = parseException("SELECT * FROM table-with-hyphen"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "table-with-hyphen"))
     // special test case: minus in expression shouldn't be treated as hyphen in identifiers
     checkError(
       exception = parseException("SELECT a-b FROM table-with-hyphen"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "table-with-hyphen"))
     checkError(
       exception = parseException("SELECT a-b AS a-b FROM t"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "a-b"))
     checkError(
       exception = parseException("SELECT a-b FROM table-hyphen WHERE a-b = 0"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "table-hyphen"))
     checkError(
       exception = parseException("SELECT (a - test_func(b-c)) FROM test-table"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-table"))
     checkError(
       exception = parseException("WITH a-b AS (SELECT 1 FROM s) SELECT * FROM s;"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "a-b"))
     checkError(
       exception = parseException(
@@ -197,7 +193,7 @@ class ErrorParserSuite extends AnalysisTest {
           |FROM t1 JOIN t2
           |USING (a, b, at-tr)
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "at-tr"))
     checkError(
       exception = parseException(
@@ -206,7 +202,7 @@ class ErrorParserSuite extends AnalysisTest {
           |OVER (PARTITION BY category ORDER BY revenue DESC) as hyphen-rank
           |FROM productRevenue
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "hyphen-rank"))
     checkError(
       exception = parseException(
@@ -217,7 +213,7 @@ class ErrorParserSuite extends AnalysisTest {
           |GROUP BY fake-breaker
           |ORDER BY c
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "grammar-breaker"))
     assert(parsePlan(
       """
@@ -238,7 +234,7 @@ class ErrorParserSuite extends AnalysisTest {
           |WINDOW hyphen-window AS
           |  (PARTITION BY a, b ORDER BY c rows BETWEEN 1 PRECEDING AND 1 FOLLOWING)
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "hyphen-window"))
     checkError(
       exception = parseException(
@@ -246,7 +242,7 @@ class ErrorParserSuite extends AnalysisTest {
           |SELECT * FROM tab
           |WINDOW window_ref AS window-ref
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "window-ref"))
     checkError(
       exception = parseException(
@@ -255,7 +251,7 @@ class ErrorParserSuite extends AnalysisTest {
           |FROM t-a INNER JOIN tb
           |ON ta.a = tb.a AND ta.tag = tb.tag
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "t-a"))
     checkError(
       exception = parseException(
@@ -264,7 +260,7 @@ class ErrorParserSuite extends AnalysisTest {
           |SELECT a
           |SELECT b
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-table"))
     checkError(
       exception = parseException(
@@ -277,7 +273,7 @@ class ErrorParserSuite extends AnalysisTest {
           |  FOR test-test IN ('dotNET', 'Java')
           |);
         """.stripMargin),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-test"))
   }
 
@@ -285,23 +281,23 @@ class ErrorParserSuite extends AnalysisTest {
     // general bad types
     checkError(
       exception = parseException("SELECT cast(1 as badtype)"),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"BADTYPE\""),
       context = ExpectedContext(fragment = "badtype", start = 17, stop = 23))
     // special handling on char and varchar
     checkError(
       exception = parseException("SELECT cast('a' as CHAR)"),
-      errorClass = "DATATYPE_MISSING_SIZE",
+      condition = "DATATYPE_MISSING_SIZE",
       parameters = Map("type" -> "\"CHAR\""),
       context = ExpectedContext(fragment = "CHAR", start = 19, stop = 22))
     checkError(
       exception = parseException("SELECT cast('a' as Varchar)"),
-      errorClass = "DATATYPE_MISSING_SIZE",
+      condition = "DATATYPE_MISSING_SIZE",
       parameters = Map("type" -> "\"VARCHAR\""),
       context = ExpectedContext(fragment = "Varchar", start = 19, stop = 25))
     checkError(
       exception = parseException("SELECT cast('a' as Character)"),
-      errorClass = "DATATYPE_MISSING_SIZE",
+      condition = "DATATYPE_MISSING_SIZE",
       parameters = Map("type" -> "\"CHARACTER\""),
       context = ExpectedContext(fragment = "Character", start = 19, stop = 27))
   }
@@ -309,32 +305,32 @@ class ErrorParserSuite extends AnalysisTest {
   test("'!' where only NOT should be allowed") {
     checkError(
       exception = parseException("SELECT 1 ! IN (2)"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 9, stop = 9))
     checkError(
       exception = parseException("SELECT 'a' ! LIKE 'b'"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 11, stop = 11))
     checkError(
       exception = parseException("SELECT 1 ! BETWEEN 1 AND 2"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 9, stop = 9))
     checkError(
       exception = parseException("SELECT 1 IS ! NULL"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 12, stop = 12))
     checkError(
       exception = parseException("CREATE TABLE IF ! EXISTS t(c1 INT)"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 16, stop = 16))
     checkError(
       exception = parseException("CREATE TABLE t(c1 INT ! NULL)"),
-      errorClass = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
+      condition = "SYNTAX_DISCONTINUED.BANG_EQUALS_NOT",
       parameters = Map("clause" -> "!"),
       context = ExpectedContext(fragment = "!", start = 22, stop = 22))
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
index 2654757177ee7..fc8bcfa3f6870 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ExpressionParserSuite.scala
@@ -201,7 +201,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
     checkError(
       exception = parseException("a like 'pattern%' escape '##'"),
-      errorClass = "INVALID_ESC",
+      condition = "INVALID_ESC",
       parameters = Map("invalidEscape" -> "'##'"),
       context = ExpectedContext(
         fragment = "like 'pattern%' escape '##'",
@@ -210,7 +210,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
     checkError(
       exception = parseException("a like 'pattern%' escape ''"),
-      errorClass = "INVALID_ESC",
+      condition = "INVALID_ESC",
       parameters = Map("invalidEscape" -> "''"),
       context = ExpectedContext(
         fragment = "like 'pattern%' escape ''",
@@ -222,7 +222,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
     checkError(
       exception = parseException("a not like 'pattern%' escape '\"/'"),
-      errorClass = "INVALID_ESC",
+      condition = "INVALID_ESC",
       parameters = Map("invalidEscape" -> "'\"/'"),
       context = ExpectedContext(
         fragment = "not like 'pattern%' escape '\"/'",
@@ -231,7 +231,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
     checkError(
       exception = parseException("a not like 'pattern%' escape ''"),
-      errorClass = "INVALID_ESC",
+      condition = "INVALID_ESC",
       parameters = Map("invalidEscape" -> "''"),
       context = ExpectedContext(
         fragment = "not like 'pattern%' escape ''",
@@ -261,7 +261,7 @@ class ExpressionParserSuite extends AnalysisTest {
     Seq("any", "some", "all").foreach { quantifier =>
       checkError(
         exception = parseException(s"a like $quantifier()"),
-        errorClass = "_LEGACY_ERROR_TEMP_0064",
+        condition = "_LEGACY_ERROR_TEMP_0064",
         parameters = Map("msg" -> "Expected something between '(' and ')'."),
         context = ExpectedContext(
           fragment = s"like $quantifier()",
@@ -328,7 +328,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("`select`(all a, b)", $"select".function($"a", $"b"))
     checkError(
       exception = parseException("foo(a x)"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'x'", "hint" -> ": extra input 'x'"))
   }
 
@@ -461,7 +461,7 @@ class ExpressionParserSuite extends AnalysisTest {
     // We cannot use an arbitrary expression.
     checkError(
       exception = parseException("foo(*) over (partition by a order by b rows exp(b) preceding)"),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> "Frame bound value must be a literal."),
       context = ExpectedContext(
         fragment = "exp(b) preceding",
@@ -540,7 +540,7 @@ class ExpressionParserSuite extends AnalysisTest {
         Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
       checkError(
         exception = parseException("timestamP_LTZ '2016-33-11 20:54:00.000'"),
-        errorClass = "INVALID_TYPED_LITERAL",
+        condition = "INVALID_TYPED_LITERAL",
         sqlState = "42604",
         parameters = Map(
           "valueType" -> "\"TIMESTAMP_LTZ\"",
@@ -556,7 +556,7 @@ class ExpressionParserSuite extends AnalysisTest {
         Literal(LocalDateTime.parse("2016-03-11T20:54:00.000")))
       checkError(
         exception = parseException("tImEstAmp_Ntz '2016-33-11 20:54:00.000'"),
-        errorClass = "INVALID_TYPED_LITERAL",
+        condition = "INVALID_TYPED_LITERAL",
         sqlState = "42604",
         parameters = Map(
           "valueType" -> "\"TIMESTAMP_NTZ\"",
@@ -572,7 +572,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("dAte '2016-03-11'", Literal(Date.valueOf("2016-03-11")))
     checkError(
       exception = parseException("DAtE 'mar 11 2016'"),
-      errorClass = "INVALID_TYPED_LITERAL",
+      condition = "INVALID_TYPED_LITERAL",
       sqlState = "42604",
       parameters = Map("valueType" -> "\"DATE\"", "value" -> "'mar 11 2016'"),
       context = ExpectedContext(
@@ -585,7 +585,7 @@ class ExpressionParserSuite extends AnalysisTest {
       Literal(Timestamp.valueOf("2016-03-11 20:54:00.000")))
     checkError(
       exception = parseException("timestamP '2016-33-11 20:54:00.000'"),
-      errorClass = "INVALID_TYPED_LITERAL",
+      condition = "INVALID_TYPED_LITERAL",
       sqlState = "42604",
       parameters = Map("valueType" -> "\"TIMESTAMP\"", "value" -> "'2016-33-11 20:54:00.000'"),
       context = ExpectedContext(
@@ -600,7 +600,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
       checkError(
         exception = parseException("timestamP '2016-33-11 20:54:00.000'"),
-        errorClass = "INVALID_TYPED_LITERAL",
+        condition = "INVALID_TYPED_LITERAL",
         sqlState = "42604",
         parameters = Map("valueType" -> "\"TIMESTAMP\"", "value" -> "'2016-33-11 20:54:00.000'"),
         context = ExpectedContext(
@@ -621,7 +621,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("INTERVAL '1 year 2 month'", ymIntervalLiteral)
     checkError(
       exception = parseException("Interval 'interval 1 yearsss 2 monthsss'"),
-      errorClass = "INVALID_TYPED_LITERAL",
+      condition = "INVALID_TYPED_LITERAL",
       parameters = Map(
         "valueType" -> "\"INTERVAL\"",
         "value" -> "'interval 1 yearsss 2 monthsss'"
@@ -638,7 +638,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("INTERVAL '1 day 2 hour 3 minute 4.005006 second'", dtIntervalLiteral)
     checkError(
       exception = parseException("Interval 'interval 1 daysss 2 hoursss'"),
-      errorClass = "INVALID_TYPED_LITERAL",
+      condition = "INVALID_TYPED_LITERAL",
       parameters = Map(
         "valueType" -> "\"INTERVAL\"",
         "value" -> "'interval 1 daysss 2 hoursss'"
@@ -651,7 +651,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("-interval '1 day 2 hour 3 minute 4.005006 second'", UnaryMinus(dtIntervalLiteral))
     checkError(
       exception = parseException("INTERVAL '1 year 2 second'"),
-      errorClass = "_LEGACY_ERROR_TEMP_0029",
+      condition = "_LEGACY_ERROR_TEMP_0029",
       parameters = Map("literal" -> "INTERVAL '1 year 2 second'"),
       context = ExpectedContext(
         fragment = "INTERVAL '1 year 2 second'",
@@ -664,7 +664,7 @@ class ExpressionParserSuite extends AnalysisTest {
       assertEqual("INTERVAL '3 month 1 hour'", intervalLiteral)
       checkError(
         exception = parseException("Interval 'interval 3 monthsss 1 hoursss'"),
-        errorClass = "INVALID_TYPED_LITERAL",
+        condition = "INVALID_TYPED_LITERAL",
         parameters = Map(
           "valueType" -> "\"INTERVAL\"",
           "value" -> "'interval 3 monthsss 1 hoursss'"
@@ -688,7 +688,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("x'A10C'", Literal(Array(0xa1, 0x0c).map(_.toByte)))
     checkError(
       exception = parseException("x'A1OC'"),
-      errorClass = "INVALID_TYPED_LITERAL",
+      condition = "INVALID_TYPED_LITERAL",
       sqlState = "42604",
       parameters = Map(
         "valueType" -> "\"X\"",
@@ -701,7 +701,7 @@ class ExpressionParserSuite extends AnalysisTest {
 
     checkError(
       exception = parseException("GEO '(10,-6)'"),
-      errorClass = "UNSUPPORTED_TYPED_LITERAL",
+      condition = "UNSUPPORTED_TYPED_LITERAL",
       parameters = Map(
         "unsupportedType" -> "\"GEO\"",
         "supportedTypes" ->
@@ -743,14 +743,14 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("9.e+1BD", Literal(BigDecimal("9.e+1").underlying()))
     checkError(
       exception = parseException(".e3"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'.'", "hint" -> ""))
 
     // Tiny Int Literal
     assertEqual("10Y", Literal(10.toByte))
     checkError(
       exception = parseException("1000Y"),
-      errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
+      condition = "INVALID_NUMERIC_LITERAL_RANGE",
       parameters = Map(
         "rawStrippedQualifier" -> "1000",
         "minValue" -> Byte.MinValue.toString,
@@ -765,7 +765,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("10S", Literal(10.toShort))
     checkError(
       exception = parseException("40000S"),
-      errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
+      condition = "INVALID_NUMERIC_LITERAL_RANGE",
       parameters = Map(
         "rawStrippedQualifier" -> "40000",
         "minValue" -> Short.MinValue.toString,
@@ -780,7 +780,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("10L", Literal(10L))
     checkError(
       exception = parseException("78732472347982492793712334L"),
-      errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
+      condition = "INVALID_NUMERIC_LITERAL_RANGE",
       parameters = Map(
         "rawStrippedQualifier" -> "78732472347982492793712334",
         "minValue" -> Long.MinValue.toString,
@@ -795,7 +795,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("10.0D", Literal(10.0D))
     checkError(
       exception = parseException("-1.8E308D"),
-      errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
+      condition = "INVALID_NUMERIC_LITERAL_RANGE",
       parameters = Map(
         "rawStrippedQualifier" -> "-1.8E308",
         "minValue" -> BigDecimal(Double.MinValue).toString,
@@ -807,7 +807,7 @@ class ExpressionParserSuite extends AnalysisTest {
         stop = 8))
     checkError(
       exception = parseException("1.8E308D"),
-      errorClass = "INVALID_NUMERIC_LITERAL_RANGE",
+      condition = "INVALID_NUMERIC_LITERAL_RANGE",
       parameters = Map(
         "rawStrippedQualifier" -> "1.8E308",
         "minValue" -> BigDecimal(Double.MinValue).toString,
@@ -825,7 +825,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("123.08BD", Literal(BigDecimal("123.08").underlying()))
     checkError(
       exception = parseException("1.20E-38BD"),
-      errorClass = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
+      condition = "DECIMAL_PRECISION_EXCEEDS_MAX_PRECISION",
       parameters = Map(
         "precision" -> "40",
         "maxPrecision" -> "38"),
@@ -899,7 +899,7 @@ class ExpressionParserSuite extends AnalysisTest {
           // when ESCAPED_STRING_LITERALS is enabled.
           checkError(
             exception = parseException("'\''"),
-            errorClass = "PARSE_SYNTAX_ERROR",
+            condition = "PARSE_SYNTAX_ERROR",
             parameters = Map("error" -> "'''", "hint" -> ": extra input '''"))
 
           // The unescape special characters (e.g., "\\t") for 2.0+ don't work
@@ -1082,8 +1082,8 @@ class ExpressionParserSuite extends AnalysisTest {
     // Unknown FROM TO intervals
     checkError(
       exception = parseException("interval '10' month to second"),
-      errorClass = "_LEGACY_ERROR_TEMP_0028",
-      parameters = Map("from" -> "month", "to" -> "second"),
+      condition = "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+      parameters = Map("input" -> "10", "from" -> "month", "to" -> "second"),
       context = ExpectedContext(
         fragment = "'10' month to second",
         start = 9,
@@ -1104,7 +1104,7 @@ class ExpressionParserSuite extends AnalysisTest {
         } else {
           checkError(
             exception = parseException(s"interval $intervalStr"),
-            errorClass = "_LEGACY_ERROR_TEMP_0029",
+            condition = "_LEGACY_ERROR_TEMP_0029",
             parameters = Map("literal" -> "interval 3 monThs 4 dayS 22 sEcond 1 millisecond"),
             context = ExpectedContext(
               fragment = s"interval $intervalStr",
@@ -1120,7 +1120,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual("1 - f('o', o(bar))", Literal(1) - $"f".function("o", $"o".function($"bar")))
     checkError(
       exception = parseException("1 - f('o', o(bar)) hello * world"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'*'", "hint" -> ""))
   }
 
@@ -1142,7 +1142,7 @@ class ExpressionParserSuite extends AnalysisTest {
     assertEqual(complexName.quotedString, UnresolvedAttribute(Seq("`fo`o", "`ba`r")))
     checkError(
       exception = parseException(complexName.unquotedString),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'.'", "hint" -> ""))
 
     // Function identifier contains continuous backticks should be treated correctly.
@@ -1225,7 +1225,7 @@ class ExpressionParserSuite extends AnalysisTest {
     Seq("any", "some", "all").foreach { quantifier =>
       checkError(
         exception = parseException(s"a ilike $quantifier()"),
-        errorClass = "_LEGACY_ERROR_TEMP_0064",
+        condition = "_LEGACY_ERROR_TEMP_0064",
         parameters = Map("msg" -> "Expected something between '(' and ')'."),
         context = ExpectedContext(
           fragment = s"ilike $quantifier()",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
index 83d2557108c57..e93ec751a7f82 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/ParserUtilsSuite.scala
@@ -159,7 +159,7 @@ class ParserUtilsSuite extends SparkFunSuite {
       exception = intercept[ParseException] {
         operationNotAllowed(errorMessage, showFuncContext)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> errorMessage))
   }
 
@@ -172,7 +172,7 @@ class ParserUtilsSuite extends SparkFunSuite {
       exception = intercept[ParseException] {
         checkDuplicateKeys(properties2, createDbContext)
       },
-      errorClass = "DUPLICATE_KEY",
+      condition = "DUPLICATE_KEY",
       parameters = Map("keyColumn" -> "`a`"))
   }
 
@@ -223,7 +223,7 @@ class ParserUtilsSuite extends SparkFunSuite {
       exception = intercept[ParseException] {
         validate(f1(emptyContext), message, emptyContext)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> message))
   }
 
@@ -244,7 +244,7 @@ class ParserUtilsSuite extends SparkFunSuite {
         Some(context)
       case _ =>
         val it = ctx.children.iterator()
-        while(it.hasNext) {
+        while (it.hasNext) {
           it.next() match {
             case p: ParserRuleContext =>
               val childResult = findCastContext(p)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
index 8d01040563361..c556a92373954 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/PlanParserSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.catalyst.parser
 import scala.annotation.nowarn
 
 import org.apache.spark.SparkThrowable
-import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
+import org.apache.spark.sql.catalyst.{EvaluateUnresolvedInlineTable, FunctionIdentifier, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, NamedParameter, PosParameter, RelationTimeTravel, UnresolvedAlias, UnresolvedAttribute, UnresolvedFunction, UnresolvedGenerator, UnresolvedInlineTable, UnresolvedRelation, UnresolvedStar, UnresolvedSubqueryColumnAliases, UnresolvedTableValuedFunction, UnresolvedTVFAliases}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans._
@@ -40,7 +40,16 @@ class PlanParserSuite extends AnalysisTest {
   import org.apache.spark.sql.catalyst.dsl.plans._
 
   private def assertEqual(sqlCommand: String, plan: LogicalPlan): Unit = {
-    comparePlans(parsePlan(sqlCommand), plan, checkAnalysis = false)
+    // We don't care the write privileges in this suite.
+    val parsed = parsePlan(sqlCommand).transform {
+      case u: UnresolvedRelation => u.clearWritePrivileges
+      case i: InsertIntoStatement =>
+        i.table match {
+          case u: UnresolvedRelation => i.copy(table = u.clearWritePrivileges)
+          case _ => i
+        }
+    }
+    comparePlans(parsed, plan, checkAnalysis = false)
   }
 
   private def parseException(sqlText: String): SparkThrowable = {
@@ -195,7 +204,7 @@ class PlanParserSuite extends AnalysisTest {
                   |""".stripMargin
     checkError(
       exception = parseException(query),
-      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      condition = "UNCLOSED_BRACKETED_COMMENT",
       parameters = Map.empty)
   }
 
@@ -213,7 +222,7 @@ class PlanParserSuite extends AnalysisTest {
                   |""".stripMargin
     checkError(
       exception = parseException(query),
-      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      condition = "UNCLOSED_BRACKETED_COMMENT",
       parameters = Map.empty)
   }
 
@@ -228,8 +237,8 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "EXPLAIN logical SELECT 1"
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0039",
-      parameters = Map.empty,
+      condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+      parameters = Map("sqlText" -> "EXPLAIN logical SELECT 1"),
       context = ExpectedContext(
         fragment = sql1,
         start = 0,
@@ -238,8 +247,8 @@ class PlanParserSuite extends AnalysisTest {
     val sql2 = "EXPLAIN formatted SELECT 1"
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0039",
-      parameters = Map.empty,
+      condition = "INVALID_SQL_SYNTAX.UNSUPPORTED_SQL_STATEMENT",
+      parameters = Map("sqlText" -> "EXPLAIN formatted SELECT 1"),
       context = ExpectedContext(
         fragment = sql2,
         start = 0,
@@ -286,8 +295,8 @@ class PlanParserSuite extends AnalysisTest {
     val sql = "with cte1 (select 1), cte1 as (select 1 from cte1) select * from cte1"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0038",
-      parameters = Map("duplicateNames" -> "'cte1'"),
+      condition = "DUPLICATED_CTE_NAMES",
+      parameters = Map("duplicateNames" -> "`cte1`"),
       context = ExpectedContext(
         fragment = sql,
         start = 0,
@@ -319,13 +328,13 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "from a"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
 
     val sql2 = "from (from a union all from b) c select *"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'union'", "hint" -> ""))
   }
 
@@ -336,12 +345,12 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "from a select * select * from x where a.s < 10"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'from'", "hint" -> ""))
     val sql2 = "from a select * from b"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'from'", "hint" -> ""))
     assertEqual(
       "from a insert into tbl1 select * insert into tbl2 select * where s < 10",
@@ -363,8 +372,9 @@ class PlanParserSuite extends AnalysisTest {
     val limitWindowClauses = Seq(
       ("", (p: LogicalPlan) => p),
       (" limit 10", (p: LogicalPlan) => p.limit(10)),
-      (" window w1 as ()", (p: LogicalPlan) => WithWindowDefinition(ws, p)),
-      (" window w1 as () limit 10", (p: LogicalPlan) => WithWindowDefinition(ws, p).limit(10))
+      (" window w1 as ()", (p: LogicalPlan) => WithWindowDefinition(ws, p, forPipeSQL = false)),
+      (" window w1 as () limit 10", (p: LogicalPlan) =>
+        WithWindowDefinition(ws, p, forPipeSQL = false).limit(10))
     )
 
     val orderSortDistrClusterClauses = Seq(
@@ -384,7 +394,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = s"$baseSql order by a sort by a"
     checkError(
       exception = parseException(sql1),
-      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      condition = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "order by a sort by a",
@@ -394,7 +404,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql2 = s"$baseSql cluster by a distribute by a"
     checkError(
       exception = parseException(sql2),
-      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      condition = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "cluster by a distribute by a",
@@ -404,7 +414,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql3 = s"$baseSql order by a cluster by a"
     checkError(
       exception = parseException(sql3),
-      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      condition = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "order by a cluster by a",
@@ -414,7 +424,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql4 = s"$baseSql order by a distribute by a"
     checkError(
       exception = parseException(sql4),
-      errorClass = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+      condition = "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "order by a distribute by a",
@@ -490,7 +500,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "SELECT a, b, count(distinct a, distinct b) as c FROM d GROUP BY a, b"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'b'", "hint" -> ": extra input 'b'"))
   }
 
@@ -515,7 +525,7 @@ class PlanParserSuite extends AnalysisTest {
          |window w1 as (partition by a, b order by c rows between 1 preceding and 1 following),
          |       w2 as w1,
          |       w3 as w1""".stripMargin,
-      WithWindowDefinition(ws1, plan))
+      WithWindowDefinition(ws1, plan, forPipeSQL = false))
   }
 
   test("lateral view") {
@@ -586,7 +596,7 @@ class PlanParserSuite extends AnalysisTest {
         |)""".stripMargin
     checkError(
       exception = parseException(sql1),
-      errorClass = "NOT_ALLOWED_IN_FROM.LATERAL_WITH_PIVOT",
+      condition = "NOT_ALLOWED_IN_FROM.LATERAL_WITH_PIVOT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = fragment1,
@@ -608,7 +618,7 @@ class PlanParserSuite extends AnalysisTest {
         |)""".stripMargin
     checkError(
       exception = parseException(sql2),
-      errorClass = "NOT_ALLOWED_IN_FROM.LATERAL_WITH_UNPIVOT",
+      condition = "NOT_ALLOWED_IN_FROM.LATERAL_WITH_UNPIVOT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = fragment2,
@@ -638,7 +648,7 @@ class PlanParserSuite extends AnalysisTest {
         |)""".stripMargin
     checkError(
       exception = parseException(sql3),
-      errorClass = "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+      condition = "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = fragment3,
@@ -702,7 +712,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "select * from a natural cross join b"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      condition = "INCOMPATIBLE_JOIN_TYPES",
       parameters = Map("joinType1" -> "NATURAL", "joinType2" -> "CROSS"),
       sqlState = "42613",
       context = ExpectedContext(
@@ -714,7 +724,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql2 = "select * from a natural join b on a.id = b.id"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'on'", "hint" -> ""))
 
     // Test multiple consecutive joins
@@ -735,7 +745,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql3 = "select * from t1 inner join t2 inner join t3 on col3 = col2 on col3 = col1"
     checkError(
       exception = parseException(sql3),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'on'", "hint" -> ""))
 
     // Parenthesis
@@ -825,7 +835,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment1 = "tablesample(bucket 4 out of 10 on x)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      condition = "_LEGACY_ERROR_TEMP_0015",
       parameters = Map("msg" -> "BUCKET x OUT OF y ON colname"),
       context = ExpectedContext(
         fragment = fragment1,
@@ -836,7 +846,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment2 = "tablesample(bucket 11 out of 10)"
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> "Sampling fraction (1.1) must be on interval [0, 1]"),
       context = ExpectedContext(
         fragment = fragment2,
@@ -847,7 +857,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment3 = "TABLESAMPLE(300M)"
     checkError(
       exception = parseException(sql3),
-      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      condition = "_LEGACY_ERROR_TEMP_0015",
       parameters = Map("msg" -> "byteLengthLiteral"),
       context = ExpectedContext(
         fragment = fragment3,
@@ -858,7 +868,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment4 = "TABLESAMPLE(BUCKET 3 OUT OF 32 ON rand())"
     checkError(
       exception = parseException(sql4),
-      errorClass = "_LEGACY_ERROR_TEMP_0015",
+      condition = "_LEGACY_ERROR_TEMP_0015",
       parameters = Map("msg" -> "BUCKET x OUT OF y ON function"),
       context = ExpectedContext(
         fragment = fragment4,
@@ -916,7 +926,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment1 = "default.range(2)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
+      condition = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
       parameters = Map("funcName" -> "`default`.`range`"),
       context = ExpectedContext(
         fragment = fragment1,
@@ -928,7 +938,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment2 = "spark_catalog.default.range(2)"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
+      condition = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
       parameters = Map("funcName" -> "`spark_catalog`.`default`.`range`"),
       context = ExpectedContext(
         fragment = fragment2,
@@ -1000,14 +1010,28 @@ class PlanParserSuite extends AnalysisTest {
   }
 
   test("inline table") {
-    assertEqual("values 1, 2, 3, 4",
-      UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x)))))
+    for (optimizeValues <- Seq(true, false)) {
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+          optimizeValues.toString) {
+        val unresolvedTable1 =
+          UnresolvedInlineTable(Seq("col1"), Seq(1, 2, 3, 4).map(x => Seq(Literal(x))))
+        val table1 = if (optimizeValues) {
+          EvaluateUnresolvedInlineTable.evaluate(unresolvedTable1)
+        } else {
+          unresolvedTable1
+        }
+        assertEqual("values 1, 2, 3, 4", table1)
 
-    assertEqual(
-      "values (1, 'a'), (2, 'b') as tbl(a, b)",
-      UnresolvedInlineTable(
-        Seq("a", "b"),
-        Seq(Literal(1), Literal("a")) :: Seq(Literal(2), Literal("b")) :: Nil).as("tbl"))
+        val unresolvedTable2 = UnresolvedInlineTable(
+          Seq("a", "b"), Seq(Literal(1), Literal("a")) :: Seq(Literal(2), Literal("b")) :: Nil)
+        val table2 = if (optimizeValues) {
+          EvaluateUnresolvedInlineTable.evaluate(unresolvedTable2)
+        } else {
+          unresolvedTable2
+        }
+        assertEqual("values (1, 'a'), (2, 'b') as tbl(a, b)", table2.as("tbl"))
+      }
+    }
   }
 
   test("simple select query with !> and !<") {
@@ -1024,67 +1048,66 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "SELECT /*+ HINT() */ * FROM t"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "')'", "hint" -> ""))
 
     // Disallow space as the delimiter.
     val sql2 = "SELECT /*+ INDEX(a b c) */ * from default.t"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'b'", "hint" -> ""))
 
-    comparePlans(
-      parsePlan("SELECT /*+ HINT */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ HINT */ * FROM t",
       UnresolvedHint("HINT", Seq.empty, table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ BROADCASTJOIN(u) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ BROADCASTJOIN(u) */ * FROM t",
       UnresolvedHint("BROADCASTJOIN", Seq($"u"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(u) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(u) */ * FROM t",
       UnresolvedHint("MAPJOIN", Seq($"u"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ STREAMTABLE(a,b,c) */ * FROM t",
       UnresolvedHint("STREAMTABLE", Seq($"a", $"b", $"c"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ INDEX(t, emp_job_ix) */ * FROM t",
       UnresolvedHint("INDEX", Seq($"t", $"emp_job_ix"), table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(`default.t`) */ * from `default.t`",
       UnresolvedHint("MAPJOIN", Seq(UnresolvedAttribute.quoted("default.t")),
         table("default.t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a"),
+    assertEqual(
+      "SELECT /*+ MAPJOIN(t) */ a from t where true group by a order by a",
       UnresolvedHint("MAPJOIN", Seq($"t"),
         table("t").where(Literal(true)).groupBy($"a")($"a")).orderBy($"a".asc))
 
-    comparePlans(
-      parsePlan("SELECT /*+ COALESCE(10) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ COALESCE(10) */ * FROM t",
       UnresolvedHint("COALESCE", Seq(Literal(10)),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100)),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan(
-        "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t"),
+    assertEqual(
+      "INSERT INTO s SELECT /*+ REPARTITION(100), COALESCE(500), COALESCE(10) */ * FROM t",
       InsertIntoStatement(table("s"), Map.empty, Nil,
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           UnresolvedHint("COALESCE", Seq(Literal(500)),
             UnresolvedHint("COALESCE", Seq(Literal(10)),
               table("t").select(star())))), overwrite = false, ifPartitionNotExists = false))
 
-    comparePlans(
-      parsePlan("SELECT /*+ BROADCASTJOIN(u), REPARTITION(100) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ BROADCASTJOIN(u), REPARTITION(100) */ * FROM t",
       UnresolvedHint("BROADCASTJOIN", Seq($"u"),
         UnresolvedHint("REPARTITION", Seq(Literal(100)),
           table("t").select(star()))))
@@ -1092,52 +1115,51 @@ class PlanParserSuite extends AnalysisTest {
     val sql3 = "SELECT /*+ COALESCE(30 + 50) */ * FROM t"
     checkError(
       exception = parseException(sql3),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'+'", "hint" -> ""))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(c) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c), COALESCE(50) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c), COALESCE(50) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("COALESCE", Seq(Literal(50)),
           table("t").select(star()))))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50) */ * FROM t",
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("BROADCASTJOIN", Seq($"u"),
           UnresolvedHint("COALESCE", Seq(Literal(50)),
             table("t").select(star())))))
 
-    comparePlans(
-      parsePlan(
-        """
-          |SELECT
-          |/*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50), REPARTITION(300, c) */
-          |* FROM t
-        """.stripMargin),
+    assertEqual(
+      """
+        |SELECT
+        |/*+ REPARTITION(100, c), BROADCASTJOIN(u), COALESCE(50), REPARTITION(300, c) */
+        |* FROM t
+      """.stripMargin,
       UnresolvedHint("REPARTITION", Seq(Literal(100), UnresolvedAttribute("c")),
         UnresolvedHint("BROADCASTJOIN", Seq($"u"),
           UnresolvedHint("COALESCE", Seq(Literal(50)),
             UnresolvedHint("REPARTITION", Seq(Literal(300), UnresolvedAttribute("c")),
               table("t").select(star()))))))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION_BY_RANGE(c) */ * FROM t",
       UnresolvedHint("REPARTITION_BY_RANGE", Seq(UnresolvedAttribute("c")),
         table("t").select(star())))
 
-    comparePlans(
-      parsePlan("SELECT /*+ REPARTITION_BY_RANGE(100, c) */ * FROM t"),
+    assertEqual(
+      "SELECT /*+ REPARTITION_BY_RANGE(100, c) */ * FROM t",
       UnresolvedHint("REPARTITION_BY_RANGE", Seq(Literal(100), UnresolvedAttribute("c")),
         table("t").select(star())))
   }
@@ -1220,13 +1242,13 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "select ltrim(both 'S' from 'SS abc S'"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'from'", "hint" -> "")) // expecting {')'
 
     val sql2 = "select rtrim(trailing 'S' from 'SS abc S'"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'from'", "hint" -> "")) // expecting {')'
 
     assertTrimPlans(
@@ -1340,7 +1362,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "CREATE VIEW testView AS INSERT INTO jt VALUES(1, 1)"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'INSERT'", "hint" -> ""))
 
     // Multi insert query
@@ -1350,13 +1372,13 @@ class PlanParserSuite extends AnalysisTest {
         |INSERT INTO tbl2 SELECT * WHERE jt.id > 4""".stripMargin
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'INSERT'", "hint" -> ""))
 
     val sql3 = "ALTER VIEW testView AS INSERT INTO jt VALUES(1, 1)"
     checkError(
       exception = parseException(sql3),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'INSERT'", "hint" -> ""))
 
     // Multi insert query
@@ -1366,7 +1388,7 @@ class PlanParserSuite extends AnalysisTest {
         |INSERT INTO tbl2 SELECT * WHERE jt.id > 4""".stripMargin
     checkError(
       exception = parseException(sql4),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'INSERT'", "hint" -> ""))
   }
 
@@ -1374,13 +1396,13 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "SELECT * FROM (INSERT INTO BAR VALUES (2))"
     checkError(
       exception = parseException(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'BAR'", "hint" -> ": missing ')'"))
 
     val sql2 = "SELECT * FROM S WHERE C1 IN (INSERT INTO T VALUES (2))"
     checkError(
       exception = parseException(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'IN'", "hint" -> ""))
   }
 
@@ -1485,7 +1507,7 @@ class PlanParserSuite extends AnalysisTest {
     val sql1 = "select * from my_tvf(arg1 => table v1)"
     checkError(
       exception = parseException(sql1),
-      errorClass =
+      condition =
         "INVALID_SQL_SYNTAX.INVALID_TABLE_FUNCTION_IDENTIFIER_ARGUMENT_MISSING_PARENTHESES",
       parameters = Map("argumentName" -> "`v1`"),
       context = ExpectedContext(
@@ -1606,14 +1628,14 @@ class PlanParserSuite extends AnalysisTest {
         val sql6 = "select * from my_tvf(arg1 => table(1) partition by col1 with single partition)"
         checkError(
           exception = parseException(sql6),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          condition = "PARSE_SYNTAX_ERROR",
           parameters = Map(
             "error" -> "'partition'",
             "hint" -> ""))
         val sql7 = "select * from my_tvf(arg1 => table(1) order by col1)"
         checkError(
           exception = parseException(sql7),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          condition = "PARSE_SYNTAX_ERROR",
           parameters = Map(
             "error" -> "'order'",
             "hint" -> ""))
@@ -1622,7 +1644,7 @@ class PlanParserSuite extends AnalysisTest {
         val sql8 = s"select * from my_tvf(arg1 => $sql8tableArg $sql8partition)"
         checkError(
           exception = parseException(sql8),
-          errorClass = "_LEGACY_ERROR_TEMP_0064",
+          condition = "_LEGACY_ERROR_TEMP_0064",
           parameters = Map(
             "msg" ->
               ("The table function call includes a table argument with an invalid " +
@@ -1745,7 +1767,7 @@ class PlanParserSuite extends AnalysisTest {
         |FROM testData""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+      condition = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
@@ -1803,7 +1825,7 @@ class PlanParserSuite extends AnalysisTest {
     val fragment = "TIMESTAMP AS OF col"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0056",
+      condition = "_LEGACY_ERROR_TEMP_0056",
       parameters = Map("reason" -> "timestamp expression cannot refer to any columns"),
       context = ExpectedContext(
         fragment = fragment,
@@ -1898,21 +1920,31 @@ class PlanParserSuite extends AnalysisTest {
     // Invalid empty name and invalid symbol in a name
     checkError(
       exception = parseException(s"SELECT :-"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'-'", "hint" -> ""))
     checkError(
       exception = parseException(s"SELECT :"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
   }
 
   test("SPARK-42553: NonReserved keyword 'interval' can be column name") {
-    comparePlans(
-      parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"),
-      UnresolvedInlineTable(
-        Seq("interval"),
-        Seq(Literal("abc")) :: Nil).as("tbl").select($"interval")
-    )
+    for (optimizeValues <- Seq(true, false)) {
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+          optimizeValues.toString) {
+        val unresolvedTable =
+          UnresolvedInlineTable(Seq("interval"), Seq(Literal("abc")) :: Nil)
+        val table = if (optimizeValues) {
+          EvaluateUnresolvedInlineTable.evaluate(unresolvedTable)
+        } else {
+          unresolvedTable
+        }
+        comparePlans(
+          parsePlan("SELECT interval FROM VALUES ('abc') AS tbl(interval);"),
+          table.as("tbl").select($"interval")
+        )
+      }
+    }
   }
 
   test("SPARK-44066: parsing of positional parameters") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
index 3e558ae5f977b..ab647f83b42a4 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/SqlScriptingParserSuite.scala
@@ -17,43 +17,58 @@
 
 package org.apache.spark.sql.catalyst.parser
 
-import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.expressions.{Alias, EqualTo, Expression, In, Literal, ScalarSubquery}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
+import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CreateVariable, ForStatement, IfElseStatement, IterateStatement, LeaveStatement, LoopStatement, Project, RepeatStatement, SingleStatement, WhileStatement}
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
+import org.apache.spark.sql.exceptions.SqlScriptingException
+import org.apache.spark.sql.internal.SQLConf
 
 class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
   import CatalystSqlParser._
 
-  test("single select") {
-    val sqlScriptText = "SELECT 1;"
-    val tree = parseScript(sqlScriptText)
-    assert(tree.collection.length == 1)
-    assert(tree.collection.head.isInstanceOf[SingleStatement])
-    val sparkStatement = tree.collection.head.asInstanceOf[SingleStatement]
-    assert(sparkStatement.getText == "SELECT 1;")
+  // Tests setup
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    conf.setConfString(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
   }
 
-  test("single select without ;") {
-    val sqlScriptText = "SELECT 1"
-    val tree = parseScript(sqlScriptText)
-    assert(tree.collection.length == 1)
-    assert(tree.collection.head.isInstanceOf[SingleStatement])
-    val sparkStatement = tree.collection.head.asInstanceOf[SingleStatement]
-    assert(sparkStatement.getText == "SELECT 1")
+  protected override def afterAll(): Unit = {
+    conf.unsetConf(SQLConf.SQL_SCRIPTING_ENABLED.key)
+    super.afterAll()
+  }
+
+  // Tests
+  test("single select") {
+    val sqlScriptText = "SELECT 1;"
+    val statement = parsePlan(sqlScriptText)
+    assert(!statement.isInstanceOf[CompoundBody])
   }
 
   test("multi select without ; - should fail") {
     val sqlScriptText = "SELECT 1 SELECT 1"
     val e = intercept[ParseException] {
-      parseScript(sqlScriptText)
+      parsePlan(sqlScriptText)
+    }
+    assert(e.getCondition === "PARSE_SYNTAX_ERROR")
+    assert(e.getMessage.contains("Syntax error"))
+    assert(e.getMessage.contains("SELECT"))
+  }
+
+  test("multi select with ; - should fail") {
+    val sqlScriptText = "SELECT 1; SELECT 1;"
+    val e = intercept[ParseException] {
+      parsePlan(sqlScriptText)
     }
-    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getCondition === "PARSE_SYNTAX_ERROR")
     assert(e.getMessage.contains("Syntax error"))
     assert(e.getMessage.contains("SELECT"))
   }
 
   test("multi select") {
     val sqlScriptText = "BEGIN SELECT 1;SELECT 2; END"
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 2)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
 
@@ -72,7 +87,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
       """
         |BEGIN
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.isEmpty)
   }
 
@@ -84,9 +99,9 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  SELECT 2;
         |END""".stripMargin
     val e = intercept[ParseException] {
-      parseScript(sqlScriptText)
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     }
-    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getCondition === "PARSE_SYNTAX_ERROR")
     assert(e.getMessage.contains("Syntax error"))
     assert(e.getMessage.contains("at or near ';'"))
   }
@@ -99,9 +114,9 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  SELECT 2
         |END""".stripMargin
     val e = intercept[ParseException] {
-      parseScript(sqlScriptText)
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     }
-    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getCondition === "PARSE_SYNTAX_ERROR")
     assert(e.getMessage.contains("Syntax error"))
     assert(e.getMessage.contains("at or near end of input"))
   }
@@ -116,7 +131,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  SELECT a, b, c FROM T;
         |  SELECT * FROM T;
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 5)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
     sqlScriptText.split(";")
@@ -143,7 +158,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |    END;
         |  END;
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 2)
     assert(tree.collection.head.isInstanceOf[CompoundBody])
     val body1 = tree.collection.head.asInstanceOf[CompoundBody]
@@ -161,17 +176,37 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
       == "SELECT 3")
   }
 
-  test("compound: beginLabel") {
+  // TODO: to be removed once the parser rule for top level compound is fixed to support labels!
+  test("top level compound: labels not allowed") {
     val sqlScriptText =
       """
         |lbl: BEGIN
         |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlScriptText)
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'lbl'", "hint" -> ""))
+  }
+
+  test("compound: beginLabel") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END;
+        |END""".stripMargin
+    val rootTree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(rootTree.collection.length == 1)
+
+    val tree = rootTree.collection.head.asInstanceOf[CompoundBody]
     assert(tree.collection.length == 5)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
     assert(tree.label.contains("lbl"))
@@ -180,14 +215,19 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
   test("compound: beginLabel + endLabel") {
     val sqlScriptText =
       """
-        |lbl: BEGIN
-        |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
-        |END lbl""".stripMargin
-    val tree = parseScript(sqlScriptText)
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END lbl;
+        |END""".stripMargin
+    val rootTree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(rootTree.collection.length == 1)
+
+    val tree = rootTree.collection.head.asInstanceOf[CompoundBody]
     assert(tree.collection.length == 5)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
     assert(tree.label.contains("lbl"))
@@ -196,73 +236,138 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
   test("compound: beginLabel + endLabel with different values") {
     val sqlScriptText =
       """
-        |lbl_begin: BEGIN
-        |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
-        |END lbl_end""".stripMargin
-
+        |BEGIN
+        |  lbl_begin: BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END lbl_end;
+        |END""".stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText)
+    }
     checkError(
-      exception = intercept[SparkException] {
-        parseScript(sqlScriptText)
-      },
-      errorClass = "LABELS_MISMATCH",
-      parameters = Map("beginLabel" -> "lbl_begin", "endLabel" -> "lbl_end"))
+      exception = exception,
+      condition = "LABELS_MISMATCH",
+      parameters = Map("beginLabel" -> toSQLId("lbl_begin"), "endLabel" -> toSQLId("lbl_end")))
+    assert(exception.origin.line.contains(3))
   }
 
   test("compound: endLabel") {
     val sqlScriptText =
       """
         |BEGIN
-        |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
-        |END lbl""".stripMargin
-
+        |  BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END lbl;
+        |END""".stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText)
+    }
     checkError(
-      exception = intercept[SparkException] {
-        parseScript(sqlScriptText)
-      },
-      errorClass = "END_LABEL_WITHOUT_BEGIN_LABEL",
-      parameters = Map("endLabel" -> "lbl"))
+      exception = exception,
+      condition = "END_LABEL_WITHOUT_BEGIN_LABEL",
+      parameters = Map("endLabel" -> toSQLId("lbl")))
+    assert(exception.origin.line.contains(9))
   }
 
   test("compound: beginLabel + endLabel with different casing") {
     val sqlScriptText =
       """
-        |LBL: BEGIN
-        |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
-        |END lbl""".stripMargin
-    val tree = parseScript(sqlScriptText)
+        |BEGIN
+        |  LBL: BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END lbl;
+        |END""".stripMargin
+    val rootTree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(rootTree.collection.length == 1)
+
+    val tree = rootTree.collection.head.asInstanceOf[CompoundBody]
     assert(tree.collection.length == 5)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
     assert(tree.label.contains("lbl"))
   }
 
-  test("compound: no labels provided") {
+  test("compound: no labels provided, random label should be generated") {
     val sqlScriptText =
       """
         |BEGIN
-        |  SELECT 1;
-        |  SELECT 2;
-        |  INSERT INTO A VALUES (a, b, 3);
-        |  SELECT a, b, c FROM T;
-        |  SELECT * FROM T;
+        |  BEGIN
+        |    SELECT 1;
+        |    SELECT 2;
+        |    INSERT INTO A VALUES (a, b, 3);
+        |    SELECT a, b, c FROM T;
+        |    SELECT * FROM T;
+        |  END;
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val rootTree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(rootTree.collection.length == 1)
+
+    val tree = rootTree.collection.head.asInstanceOf[CompoundBody]
     assert(tree.collection.length == 5)
     assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
     assert(tree.label.nonEmpty)
   }
 
+  test("declare at the beginning") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE testVariable1 VARCHAR(50);
+        |  DECLARE testVariable2 INTEGER;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 2)
+    assert(tree.collection.forall(_.isInstanceOf[SingleStatement]))
+    assert(tree.collection.forall(
+      _.asInstanceOf[SingleStatement].parsedPlan.isInstanceOf[CreateVariable]))
+  }
+
+  test("declare after beginning") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  DECLARE testVariable INTEGER;
+        |END""".stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText)
+    }
+    checkError(
+      exception = exception,
+      condition = "INVALID_VARIABLE_DECLARATION.ONLY_AT_BEGINNING",
+      parameters = Map("varName" -> "`testVariable`"))
+    assert(exception.origin.line.contains(4))
+  }
+
+  test("declare in wrong scope") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   DECLARE testVariable INTEGER;
+        | END IF;
+        |END""".stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText)
+    }
+    checkError(
+      exception = exception,
+      condition = "INVALID_VARIABLE_DECLARATION.NOT_ALLOWED_IN_SCOPE",
+      parameters = Map("varName" -> "`testVariable`"))
+    assert(exception.origin.line.contains(4))
+  }
+
   test("SET VAR statement test") {
     val sqlScriptText =
       """
@@ -270,7 +375,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  DECLARE totalInsCnt = 0;
         |  SET VAR totalInsCnt = (SELECT x FROM y WHERE id = 1);
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 2)
     assert(tree.collection.head.isInstanceOf[SingleStatement])
     assert(tree.collection(1).isInstanceOf[SingleStatement])
@@ -283,7 +388,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  DECLARE totalInsCnt = 0;
         |  SET VARIABLE totalInsCnt = (SELECT x FROM y WHERE id = 1);
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 2)
     assert(tree.collection.head.isInstanceOf[SingleStatement])
     assert(tree.collection(1).isInstanceOf[SingleStatement])
@@ -296,7 +401,7 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  DECLARE totalInsCnt = 0;
         |  SET totalInsCnt = (SELECT x FROM y WHERE id = 1);
         |END""".stripMargin
-    val tree = parseScript(sqlScriptText)
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     assert(tree.collection.length == 2)
     assert(tree.collection.head.isInstanceOf[SingleStatement])
     assert(tree.collection(1).isInstanceOf[SingleStatement])
@@ -310,18 +415,1777 @@ class SqlScriptingParserSuite extends SparkFunSuite with SQLHelper {
         |  SET totalInsCnt = (SELECT x FROMERROR y WHERE id = 1);
         |END""".stripMargin
     val e = intercept[ParseException] {
-      parseScript(sqlScriptText)
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
     }
-    assert(e.getErrorClass === "PARSE_SYNTAX_ERROR")
+    assert(e.getCondition === "PARSE_SYNTAX_ERROR")
     assert(e.getMessage.contains("Syntax error"))
   }
 
-  // Helper methods
-  def cleanupStatementString(statementStr: String): String = {
-    statementStr
-      .replace("\n", "")
-      .replace("BEGIN", "")
-      .replace("END", "")
-      .trim
+  test("if") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   SELECT 42;
+        | END IF;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[IfElseStatement])
+    val ifStmt = tree.collection.head.asInstanceOf[IfElseStatement]
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1=1")
+  }
+
+  test("if else") {
+    val sqlScriptText =
+      """BEGIN
+        |IF 1 = 1 THEN
+        |  SELECT 1;
+        |ELSE
+        |  SELECT 2;
+        |END IF;
+        |END
+        """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[IfElseStatement])
+
+    val ifStmt = tree.collection.head.asInstanceOf[IfElseStatement]
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditionalBodies.length == 1)
+    assert(ifStmt.elseBody.isDefined)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.length == 1)
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.elseBody.get.collection.length == 1)
+    assert(ifStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+  }
+
+  test("if else if") {
+    val sqlScriptText =
+      """BEGIN
+        |IF 1 = 1 THEN
+        |  SELECT 1;
+        |ELSE IF 2 = 2 THEN
+        |  SELECT 2;
+        |ELSE
+        |  SELECT 3;
+        |END IF;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[IfElseStatement])
+
+    val ifStmt = tree.collection.head.asInstanceOf[IfElseStatement]
+    assert(ifStmt.conditions.length == 2)
+    assert(ifStmt.conditionalBodies.length == 2)
+    assert(ifStmt.elseBody.isDefined)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.conditions(1).isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions(1).getText == "2 = 2")
+
+    assert(ifStmt.conditionalBodies(1).collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies(1).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+
+    assert(ifStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 3")
+  }
+
+  test("if multi else if") {
+    val sqlScriptText =
+      """BEGIN
+        |IF 1 = 1 THEN
+        |  SELECT 1;
+        |ELSE IF 2 = 2 THEN
+        |  SELECT 2;
+        |ELSE IF 3 = 3 THEN
+        |  SELECT 3;
+        |END IF;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[IfElseStatement])
+
+    val ifStmt = tree.collection.head.asInstanceOf[IfElseStatement]
+    assert(ifStmt.conditions.length == 3)
+    assert(ifStmt.conditionalBodies.length == 3)
+    assert(ifStmt.elseBody.isEmpty)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.conditions(1).isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions(1).getText == "2 = 2")
+
+    assert(ifStmt.conditionalBodies(1).collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies(1).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+
+    assert(ifStmt.conditions(2).isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions(2).getText == "3 = 3")
+
+    assert(ifStmt.conditionalBodies(2).collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies(2).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 3")
+  }
+
+  test("if nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   IF 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END IF;
+        | END IF;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[IfElseStatement])
+
+    val ifStmt = tree.collection.head.asInstanceOf[IfElseStatement]
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditionalBodies.length == 1)
+    assert(ifStmt.elseBody.isEmpty)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1=1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[IfElseStatement])
+    val nestedIfStmt = ifStmt.conditionalBodies.head.collection.head.asInstanceOf[IfElseStatement]
+
+    assert(nestedIfStmt.conditions.length == 1)
+    assert(nestedIfStmt.conditionalBodies.length == 1)
+    assert(nestedIfStmt.elseBody.isDefined)
+
+    assert(nestedIfStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(nestedIfStmt.conditions.head.getText == "2=1")
+
+    assert(nestedIfStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedIfStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 41")
+
+    assert(nestedIfStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedIfStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 42")
+  }
+
+  test("while") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: WHILE 1 = 1 DO
+        |  SELECT 1;
+        |END WHILE lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(whileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("while with complex condition") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+        |WHILE (SELECT COUNT(*) < 2 FROM t) DO
+        |  SELECT 42;
+        |END WHILE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 2)
+    assert(tree.collection(1).isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection(1).asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "(SELECT COUNT(*) < 2 FROM t)")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(whileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 42")
+  }
+
+  test("while with if else block") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: WHILE 1 = 1 DO
+        |  IF 1 = 1 THEN
+        |    SELECT 1;
+        |  ELSE
+        |    SELECT 2;
+        |  END IF;
+        |END WHILE lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[IfElseStatement])
+    val ifStmt = whileStmt.body.collection.head.asInstanceOf[IfElseStatement]
+
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditionalBodies.length == 1)
+    assert(ifStmt.elseBody.isDefined)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("nested while") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: WHILE 1 = 1 DO
+        |  WHILE 2 = 2 DO
+        |    SELECT 42;
+        |  END WHILE;
+        |END WHILE lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[WhileStatement])
+    val nestedWhileStmt = whileStmt.body.collection.head.asInstanceOf[WhileStatement]
+
+    assert(nestedWhileStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.condition.getText == "2 = 2")
+
+    assert(nestedWhileStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedWhileStmt.body.collection.length == 1)
+    assert(nestedWhileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT 42")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("leave compound block") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END;
+        |END""".stripMargin
+    val rootTree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(rootTree.collection.length == 1)
+
+    val tree = rootTree.collection.head.asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 2)
+    assert(tree.collection.head.isInstanceOf[SingleStatement])
+    assert(tree.collection(1).isInstanceOf[LeaveStatement])
+  }
+
+  test("leave while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END WHILE;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 2)
+
+    assert(whileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(whileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(whileStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(whileStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test("leave repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "1 = 2")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 2)
+
+    assert(repeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(repeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(repeatStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(repeatStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test ("iterate compound block - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    ITERATE lbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+      },
+      condition = "INVALID_LABEL_USAGE.ITERATE_IN_COMPOUND",
+      parameters = Map("labelName" -> "LBL"))
+  }
+
+  test("iterate while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    SELECT 1;
+        |    ITERATE lbl;
+        |  END WHILE;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 2)
+
+    assert(whileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(whileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(whileStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(whileStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("iterate repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    SELECT 1;
+        |    ITERATE lbl;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "1 = 2")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 2)
+
+    assert(repeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(repeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(repeatStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(repeatStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("leave with wrong label - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE randomlbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+      },
+      condition = "INVALID_LABEL_USAGE.DOES_NOT_EXIST",
+      parameters = Map("labelName" -> "RANDOMLBL", "statementType" -> "LEAVE"))
+  }
+
+  test("iterate with wrong label - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    ITERATE randomlbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+      },
+      condition = "INVALID_LABEL_USAGE.DOES_NOT_EXIST",
+      parameters = Map("labelName" -> "RANDOMLBL", "statementType" -> "ITERATE"))
+  }
+
+  test("leave outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+
+    val nestedWhileStmt = whileStmt.body.collection.head.asInstanceOf[WhileStatement]
+    assert(nestedWhileStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.condition.getText == "2 = 2")
+
+    assert(nestedWhileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedWhileStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(nestedWhileStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test("leave outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    UNTIL 2 = 2
+        |    END REPEAT;
+        |  UNTIL 1 = 1
+        |  END REPEAT;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "1 = 1")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 1)
+
+    val nestedRepeatStmt = repeatStmt.body.collection.head.asInstanceOf[RepeatStatement]
+    assert(nestedRepeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedRepeatStmt.condition.getText == "2 = 2")
+
+    assert(nestedRepeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(
+      nestedRepeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedRepeatStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(nestedRepeatStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test("iterate outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[WhileStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+
+    val nestedWhileStmt = whileStmt.body.collection.head.asInstanceOf[WhileStatement]
+    assert(nestedWhileStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.condition.getText == "2 = 2")
+
+    assert(nestedWhileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedWhileStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(nestedWhileStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("iterate outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    UNTIL 2 = 2
+        |    END REPEAT;
+        |  UNTIL 1 = 1
+        |  END REPEAT;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "1 = 1")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 1)
+
+    val nestedRepeatStmt = repeatStmt.body.collection.head.asInstanceOf[RepeatStatement]
+    assert(nestedRepeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedRepeatStmt.condition.getText == "2 = 2")
+
+    assert(nestedRepeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(
+      nestedRepeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedRepeatStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(nestedRepeatStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("repeat") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: REPEAT
+        |  SELECT 1;
+        | UNTIL 1 = 1
+        |END REPEAT lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "1 = 1")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 1)
+    assert(repeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(repeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(repeatStmt.label.contains("lbl"))
+  }
+
+  test("repeat with complex condition") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+        |REPEAT
+        | SELECT 42;
+        |UNTIL
+        | (SELECT COUNT(*) < 2 FROM t)
+        |END REPEAT;
+        |END
+        |""".stripMargin
+
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 2)
+    assert(tree.collection(1).isInstanceOf[RepeatStatement])
+
+    val repeatStmt = tree.collection(1).asInstanceOf[RepeatStatement]
+    assert(repeatStmt.condition.isInstanceOf[SingleStatement])
+    assert(repeatStmt.condition.getText == "(SELECT COUNT(*) < 2 FROM t)")
+
+    assert(repeatStmt.body.isInstanceOf[CompoundBody])
+    assert(repeatStmt.body.collection.length == 1)
+    assert(repeatStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(repeatStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 42")
+  }
+
+  test("repeat with if else block") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: REPEAT
+        |  IF 1 = 1 THEN
+        |    SELECT 1;
+        |  ELSE
+        |    SELECT 2;
+        |  END IF;
+        |UNTIL
+        |  1 = 1
+        |END REPEAT lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[IfElseStatement])
+    val ifStmt = whileStmt.body.collection.head.asInstanceOf[IfElseStatement]
+
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditionalBodies.length == 1)
+    assert(ifStmt.elseBody.isDefined)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("nested repeat") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: REPEAT
+        |  REPEAT
+        |    SELECT 42;
+        |  UNTIL
+        |    2 = 2
+        |  END REPEAT;
+        |UNTIL
+        |   1 = 1
+        |END REPEAT lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[RepeatStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[RepeatStatement]
+    assert(whileStmt.condition.isInstanceOf[SingleStatement])
+    assert(whileStmt.condition.getText == "1 = 1")
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 1)
+    assert(whileStmt.body.collection.head.isInstanceOf[RepeatStatement])
+    val nestedWhileStmt = whileStmt.body.collection.head.asInstanceOf[RepeatStatement]
+
+    assert(nestedWhileStmt.condition.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.condition.getText == "2 = 2")
+
+    assert(nestedWhileStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedWhileStmt.body.collection.length == 1)
+    assert(nestedWhileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedWhileStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT 42")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("searched case statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions.head.getText == "1 = 1")
+  }
+
+  test("searched case statement - multi when") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 IN (1,2,3) THEN
+        |     SELECT 1;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 3)
+    assert(caseStmt.conditionalBodies.length == 3)
+    assert(caseStmt.elseBody.isEmpty)
+
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions.head.getText == "1 IN (1,2,3)")
+
+    assert(caseStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(caseStmt.conditions(1).isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions(1).getText == "(SELECT * FROM t)")
+
+    assert(caseStmt.conditionalBodies(1).collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies(1).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT * FROM b")
+
+    assert(caseStmt.conditions(2).isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions(2).getText == "1 = 1")
+
+    assert(caseStmt.conditionalBodies(2).collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies(2).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 42")
+  }
+
+  test("searched case statement with else") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.elseBody.isDefined)
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions.head.getText == "1 = 1")
+
+    assert(caseStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 43")
+  }
+
+  test("searched case statement nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     CASE
+        |       WHEN 2 = 1 THEN
+        |         SELECT 41;
+        |       ELSE
+        |         SELECT 42;
+        |     END CASE;
+        |  END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditionalBodies.length == 1)
+    assert(caseStmt.elseBody.isEmpty)
+
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditions.head.getText == "1 = 1")
+
+    assert(caseStmt.conditionalBodies.head.collection.head.isInstanceOf[CaseStatement])
+    val nestedCaseStmt =
+      caseStmt.conditionalBodies.head.collection.head.asInstanceOf[CaseStatement]
+
+    assert(nestedCaseStmt.conditions.length == 1)
+    assert(nestedCaseStmt.conditionalBodies.length == 1)
+    assert(nestedCaseStmt.elseBody.isDefined)
+
+    assert(nestedCaseStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(nestedCaseStmt.conditions.head.getText == "2 = 1")
+
+    assert(nestedCaseStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedCaseStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 41")
+
+    assert(nestedCaseStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedCaseStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 42")
+  }
+
+  test("simple case statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 1;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(caseStmt.conditions.head, _ == Literal(1), _ == Literal(1))
+  }
+
+
+  test("simple case statement - multi when") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 1;
+        |   WHEN (SELECT 2) THEN
+        |     SELECT * FROM b;
+        |   WHEN 3 IN (1,2,3) THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 3)
+    assert(caseStmt.conditionalBodies.length == 3)
+    assert(caseStmt.elseBody.isEmpty)
+
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(caseStmt.conditions.head, _ == Literal(1), _ == Literal(1))
+
+    assert(caseStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(caseStmt.conditions(1).isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(
+      caseStmt.conditions(1), _ == Literal(1), _.isInstanceOf[ScalarSubquery])
+
+    assert(caseStmt.conditionalBodies(1).collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies(1).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT * FROM b")
+
+    assert(caseStmt.conditions(2).isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(
+      caseStmt.conditions(2), _ == Literal(1), _.isInstanceOf[In])
+
+    assert(caseStmt.conditionalBodies(2).collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.conditionalBodies(2).collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 42")
+  }
+
+  test("simple case statement with else") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 42;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.elseBody.isDefined)
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(caseStmt.conditions.head, _ == Literal(1), _ == Literal(1))
+
+    assert(caseStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(caseStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 43")
+  }
+
+  test("simple case statement nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        | CASE (SELECT 1)
+        |   WHEN 1 THEN
+        |     CASE 2
+        |       WHEN 2 THEN
+        |         SELECT 41;
+        |       ELSE
+        |         SELECT 42;
+        |     END CASE;
+        |  END CASE;
+        |END
+        |""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CaseStatement])
+
+    val caseStmt = tree.collection.head.asInstanceOf[CaseStatement]
+    assert(caseStmt.conditions.length == 1)
+    assert(caseStmt.conditionalBodies.length == 1)
+    assert(caseStmt.elseBody.isEmpty)
+
+    assert(caseStmt.conditions.head.isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(
+      caseStmt.conditions.head, _.isInstanceOf[ScalarSubquery], _ == Literal(1))
+
+    assert(caseStmt.conditionalBodies.head.collection.head.isInstanceOf[CaseStatement])
+    val nestedCaseStmt =
+      caseStmt.conditionalBodies.head.collection.head.asInstanceOf[CaseStatement]
+
+    assert(nestedCaseStmt.conditions.length == 1)
+    assert(nestedCaseStmt.conditionalBodies.length == 1)
+    assert(nestedCaseStmt.elseBody.isDefined)
+
+    assert(nestedCaseStmt.conditions.head.isInstanceOf[SingleStatement])
+    checkSimpleCaseStatementCondition(
+      nestedCaseStmt.conditions.head, _ == Literal(2), _ == Literal(2))
+
+    assert(nestedCaseStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedCaseStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 41")
+
+    assert(nestedCaseStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedCaseStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 42")
+  }
+
+  test("loop statement") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: LOOP
+        |  SELECT 1;
+        |  SELECT 2;
+        |END LOOP lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val whileStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(whileStmt.body.isInstanceOf[CompoundBody])
+    assert(whileStmt.body.collection.length == 2)
+    assert(whileStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(whileStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(whileStmt.label.contains("lbl"))
+  }
+
+  test("loop with if else block") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: LOOP
+        | IF 1 = 1 THEN
+        |   SELECT 1;
+        | ELSE
+        |   SELECT 2;
+        | END IF;
+        |END LOOP lbl;
+        |END
+      """.stripMargin
+
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 1)
+    assert(loopStmt.body.collection.head.isInstanceOf[IfElseStatement])
+    val ifStmt = loopStmt.body.collection.head.asInstanceOf[IfElseStatement]
+
+    assert(ifStmt.conditions.length == 1)
+    assert(ifStmt.conditionalBodies.length == 1)
+    assert(ifStmt.elseBody.isDefined)
+
+    assert(ifStmt.conditions.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditions.head.getText == "1 = 1")
+
+    assert(ifStmt.conditionalBodies.head.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.conditionalBodies.head.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 1")
+
+    assert(ifStmt.elseBody.get.collection.head.isInstanceOf[SingleStatement])
+    assert(ifStmt.elseBody.get.collection.head.asInstanceOf[SingleStatement]
+      .getText == "SELECT 2")
+
+    assert(loopStmt.label.contains("lbl"))
+  }
+
+  test("nested loop") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: LOOP
+        |  LOOP
+        |    SELECT 42;
+        |  END LOOP;
+        |END LOOP lbl;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 1)
+    assert(loopStmt.body.collection.head.isInstanceOf[LoopStatement])
+    val nestedLoopStmt = loopStmt.body.collection.head.asInstanceOf[LoopStatement]
+
+    assert(nestedLoopStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedLoopStmt.body.collection.length == 1)
+    assert(nestedLoopStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedLoopStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT 42")
+
+    assert(loopStmt.label.contains("lbl"))
+  }
+
+  test("leave loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END LOOP;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 2)
+
+    assert(loopStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(loopStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(loopStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(loopStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test("iterate loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    SELECT 1;
+        |    ITERATE lbl;
+        |  END LOOP;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 2)
+
+    assert(loopStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(loopStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(loopStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(loopStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("leave outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END LOOP;
+        |  END LOOP;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 1)
+
+    val nestedLoopStmt = loopStmt.body.collection.head.asInstanceOf[LoopStatement]
+
+    assert(nestedLoopStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(
+      nestedLoopStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedLoopStmt.body.collection(1).isInstanceOf[LeaveStatement])
+    assert(nestedLoopStmt.body.collection(1).asInstanceOf[LeaveStatement].label == "lbl")
+  }
+
+  test("iterate outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    END LOOP;
+        |  END LOOP;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[LoopStatement])
+
+    val loopStmt = tree.collection.head.asInstanceOf[LoopStatement]
+
+    assert(loopStmt.body.isInstanceOf[CompoundBody])
+    assert(loopStmt.body.collection.length == 1)
+
+    val nestedLoopStmt = loopStmt.body.collection.head.asInstanceOf[LoopStatement]
+
+    assert(nestedLoopStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(
+      nestedLoopStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(nestedLoopStmt.body.collection(1).isInstanceOf[IterateStatement])
+    assert(nestedLoopStmt.body.collection(1).asInstanceOf[IterateStatement].label == "lbl")
+  }
+
+  test("unique label names: nested begin-end blocks") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |  END;
+        |END;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("lbl")))
+  }
+
+  test("unique label names: nested begin-end blocks with same prefix") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl_1: BEGIN
+        |  lbl_11: BEGIN
+        |    SELECT 1;
+        |  END;
+        |END;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    val body_1 = tree.collection.head.asInstanceOf[CompoundBody]
+    assert(body_1.label.get == "lbl_1")
+    assert(body_1.collection.length == 1)
+    assert(body_1.collection.head.isInstanceOf[CompoundBody])
+    val body_11 = body_1.collection.head.asInstanceOf[CompoundBody]
+    assert(body_11.label.get == "lbl_11")
+  }
+
+  test("unique label names: multi-level nested begin-end blocks") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl_1: BEGIN
+        |  lbl_2: BEGIN
+        |    lbl_1: BEGIN
+        |      SELECT 1;
+        |    END;
+        |  END;
+        |END;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("lbl_1")))
+  }
+
+  test("unique label names: while loop in begin-end block") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: BEGIN
+        |  lbl: WHILE 1=1 DO
+        |    SELECT 1;
+        |  END WHILE;
+        |END;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("lbl")))
+  }
+
+  test("unique label names: begin-end block in while loop") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: WHILE 1=1 DO
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |  END;
+        |END WHILE;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("lbl")))
+  }
+
+  test("unique label names: nested while loops") {
+    val sqlScriptText =
+      """BEGIN
+        |w_loop: WHILE 1=1 DO
+        |  w_loop: WHILE 2=2 DO
+        |    SELECT 1;
+        |  END WHILE;
+        |END WHILE;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("w_loop")))
+  }
+
+  test("unique label names: nested repeat loops") {
+    val sqlScriptText =
+      """BEGIN
+        |r_loop: REPEAT
+        |  r_loop: REPEAT
+        |    SELECT 1;
+        |  UNTIL 1 = 1
+        |  END REPEAT;
+        |UNTIL 1 = 1
+        |END REPEAT;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("r_loop")))
+  }
+
+  test("unique label names: nested loop loops") {
+    val sqlScriptText =
+      """BEGIN
+        |l_loop: LOOP
+        |  l_loop: LOOP
+        |    SELECT 1;
+        |  END LOOP;
+        |END LOOP;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("l_loop")))
+  }
+
+  test("unique label names: nested for loops") {
+    val sqlScriptText =
+      """BEGIN
+        |f_loop: FOR x AS SELECT 1 DO
+        |  f_loop: FOR y AS SELECT 2 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END FOR;
+        |END
+      """.stripMargin
+    val exception = intercept[SqlScriptingException] {
+      parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    }
+    checkError(
+      exception = exception,
+      condition = "LABEL_ALREADY_EXISTS",
+      parameters = Map("label" -> toSQLId("f_loop")))
+  }
+
+  test("unique label names: begin-end block on the same level") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: BEGIN
+        |  SELECT 1;
+        |END;
+        |lbl: BEGIN
+        |  SELECT 2;
+        |END;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 2)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    assert(tree.collection.head.asInstanceOf[CompoundBody].label.get == "lbl")
+    assert(tree.collection(1).isInstanceOf[CompoundBody])
+    assert(tree.collection(1).asInstanceOf[CompoundBody].label.get == "lbl")
+  }
+
+  test("unique label names: begin-end block and loops on the same level") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl: BEGIN
+        |  SELECT 1;
+        |END;
+        |lbl: WHILE 1=1 DO
+        |  SELECT 2;
+        |END WHILE;
+        |lbl: LOOP
+        |  SELECT 3;
+        |END LOOP;
+        |lbl: REPEAT
+        |  SELECT 4;
+        |UNTIL 1=1
+        |END REPEAT;
+        |lbl: FOR x AS SELECT 1 DO
+        |  SELECT 5;
+        |END FOR;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 5)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    assert(tree.collection.head.asInstanceOf[CompoundBody].label.get == "lbl")
+    assert(tree.collection(1).isInstanceOf[WhileStatement])
+    assert(tree.collection(1).asInstanceOf[WhileStatement].label.get == "lbl")
+    assert(tree.collection(2).isInstanceOf[LoopStatement])
+    assert(tree.collection(2).asInstanceOf[LoopStatement].label.get == "lbl")
+    assert(tree.collection(3).isInstanceOf[RepeatStatement])
+    assert(tree.collection(3).asInstanceOf[RepeatStatement].label.get == "lbl")
+    assert(tree.collection(4).isInstanceOf[ForStatement])
+    assert(tree.collection(4).asInstanceOf[ForStatement].label.get == "lbl")
+  }
+
+  test("unique label names: nested labeled scope statements") {
+    val sqlScriptText =
+      """BEGIN
+        |lbl_0: BEGIN
+        |  lbl_1: WHILE 1=1 DO
+        |    lbl_2: LOOP
+        |      lbl_3: REPEAT
+        |        lbl_4: FOR x AS SELECT 1 DO
+        |          SELECT 4;
+        |        END FOR;
+        |      UNTIL 1=1
+        |      END REPEAT;
+        |    END LOOP;
+        |  END WHILE;
+        |END;
+        |END
+      """.stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[CompoundBody])
+    // Compound body
+    val body = tree.collection.head.asInstanceOf[CompoundBody]
+    assert(body.label.get == "lbl_0")
+    assert(body.collection.head.isInstanceOf[WhileStatement])
+    // While statement
+    val whileStatement = body.collection.head.asInstanceOf[WhileStatement]
+    assert(whileStatement.label.get == "lbl_1")
+    assert(whileStatement.body.collection.head.isInstanceOf[LoopStatement])
+    // Loop statement
+    val loopStatement = whileStatement.body.collection.head.asInstanceOf[LoopStatement]
+    assert(loopStatement.label.get == "lbl_2")
+    assert(loopStatement.body.collection.head.isInstanceOf[RepeatStatement])
+    // Repeat statement
+    val repeatStatement = loopStatement.body.collection.head.asInstanceOf[RepeatStatement]
+    assert(repeatStatement.label.get == "lbl_3")
+    // For statement
+    val forStatement = repeatStatement.body.collection.head.asInstanceOf[ForStatement]
+    assert(forStatement.label.get == "lbl_4")
+  }
+
+  test("for statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR x AS SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - no label") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  FOR x AS SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    // when not explicitly set, label is random UUID
+    assert(forStmt.label.isDefined)
+  }
+
+  test("for statement - with complex subquery") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR x AS SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1 DO
+        |    SELECT x.c1;
+        |    SELECT x.c2;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1")
+    assert(forStmt.variableName.contains("x"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 2)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT x.c1")
+    assert(forStmt.body.collection(1).isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection(1).asInstanceOf[SingleStatement].getText == "SELECT x.c2")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl1: FOR i AS SELECT 1 DO
+        |    lbl2: FOR j AS SELECT 2 DO
+        |      SELECT i + j;
+        |    END FOR lbl2;
+        |  END FOR lbl1;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 1")
+    assert(forStmt.variableName.contains("i"))
+    assert(forStmt.label.contains("lbl1"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[ForStatement])
+    val nestedForStmt = forStmt.body.collection.head.asInstanceOf[ForStatement]
+
+    assert(nestedForStmt.query.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.query.getText == "SELECT 2")
+    assert(nestedForStmt.variableName.contains("j"))
+    assert(nestedForStmt.label.contains("lbl2"))
+
+    assert(nestedForStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedForStmt.body.collection.length == 1)
+    assert(nestedForStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT i + j")
+  }
+
+  test("for statement - no variable") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - no variable - no label") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  FOR SELECT 5 DO
+        |    SELECT 1;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 5")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+
+    // when not explicitly set, label is random UUID
+    assert(forStmt.label.isDefined)
+  }
+
+  test("for statement - no variable - with complex subquery") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: FOR SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1 DO
+        |    SELECT 1;
+        |    SELECT 2;
+        |  END FOR;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT c1, c2 FROM t WHERE c2 = 5 GROUP BY c1 ORDER BY c1")
+    assert(forStmt.variableName.isEmpty)
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 2)
+    assert(forStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection.head.asInstanceOf[SingleStatement].getText == "SELECT 1")
+    assert(forStmt.body.collection(1).isInstanceOf[SingleStatement])
+    assert(forStmt.body.collection(1).asInstanceOf[SingleStatement].getText == "SELECT 2")
+
+    assert(forStmt.label.contains("lbl"))
+  }
+
+  test("for statement - no variable - nested") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl1: FOR SELECT 1 DO
+        |    lbl2: FOR SELECT 2 DO
+        |      SELECT 3;
+        |    END FOR lbl2;
+        |  END FOR lbl1;
+        |END""".stripMargin
+    val tree = parsePlan(sqlScriptText).asInstanceOf[CompoundBody]
+    assert(tree.collection.length == 1)
+    assert(tree.collection.head.isInstanceOf[ForStatement])
+
+    val forStmt = tree.collection.head.asInstanceOf[ForStatement]
+    assert(forStmt.query.isInstanceOf[SingleStatement])
+    assert(forStmt.query.getText == "SELECT 1")
+    assert(forStmt.variableName.isEmpty)
+    assert(forStmt.label.contains("lbl1"))
+
+    assert(forStmt.body.isInstanceOf[CompoundBody])
+    assert(forStmt.body.collection.length == 1)
+    assert(forStmt.body.collection.head.isInstanceOf[ForStatement])
+    val nestedForStmt = forStmt.body.collection.head.asInstanceOf[ForStatement]
+
+    assert(nestedForStmt.query.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.query.getText == "SELECT 2")
+    assert(nestedForStmt.variableName.isEmpty)
+    assert(nestedForStmt.label.contains("lbl2"))
+
+    assert(nestedForStmt.body.isInstanceOf[CompoundBody])
+    assert(nestedForStmt.body.collection.length == 1)
+    assert(nestedForStmt.body.collection.head.isInstanceOf[SingleStatement])
+    assert(nestedForStmt.body.collection.
+      head.asInstanceOf[SingleStatement].getText == "SELECT 3")
+  }
+
+  // Helper methods
+  def cleanupStatementString(statementStr: String): String = {
+    statementStr
+      .replace("\n", "")
+      .replace("BEGIN", "")
+      .replace("END", "")
+      .trim
+  }
+
+  private def checkSimpleCaseStatementCondition(
+      conditionStatement: SingleStatement,
+      predicateLeft: Expression => Boolean,
+      predicateRight: Expression => Boolean): Unit = {
+    assert(conditionStatement.parsedPlan.isInstanceOf[Project])
+    val project = conditionStatement.parsedPlan.asInstanceOf[Project]
+    assert(project.projectList.head.isInstanceOf[Alias])
+    assert(project.projectList.head.asInstanceOf[Alias].child.isInstanceOf[EqualTo])
+    val equalTo = project.projectList.head.asInstanceOf[Alias].child.asInstanceOf[EqualTo]
+    assert(predicateLeft(equalTo.left))
+    assert(predicateRight(equalTo.right))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
index 62557ead1d2ee..0f32922728814 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableIdentifierParserSuite.scala
@@ -296,10 +296,10 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
       "t:" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "':'", "hint" -> ": extra input ':'")),
       "${some.var.x}" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "'$'", "hint" -> "")),
       "tab:1" -> ("PARSE_SYNTAX_ERROR", Map("error" -> "':'", "hint" -> ""))
-    ).foreach { case (identifier, (errorClass, parameters)) =>
+    ).foreach { case (identifier, (condition, parameters)) =>
       checkError(
         exception = intercept[ParseException](parseTableIdentifier(identifier)),
-        errorClass = errorClass,
+        condition = condition,
         parameters = parameters)
     }
   }
@@ -318,7 +318,7 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
       reservedKeywordsInAnsiMode.foreach { keyword =>
         checkError(
           exception = intercept[ParseException](parseTableIdentifier(keyword)),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          condition = "PARSE_SYNTAX_ERROR",
           parameters = Map("error" -> s"'$keyword'", "hint" -> ""))
         assert(TableIdentifier(keyword) === parseTableIdentifier(s"`$keyword`"))
         assert(TableIdentifier(keyword, Option("db")) === parseTableIdentifier(s"db.`$keyword`"))
@@ -374,7 +374,7 @@ class TableIdentifierParserSuite extends SQLKeywordUtils {
     assert(complexName === parseTableIdentifier(complexName.quotedString))
     checkError(
       exception = intercept[ParseException](parseTableIdentifier(complexName.unquotedString)),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'b'", "hint" -> ""))
     // Table identifier contains continuous backticks should be treated correctly.
     val complexName2 = TableIdentifier("x``y", Some("d``b"))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
index a56ab8616df0f..74fb5a44ab0bc 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/TableSchemaParserSuite.scala
@@ -75,26 +75,26 @@ class TableSchemaParserSuite extends SparkFunSuite {
 
     checkError(
       exception = parseException(""),
-      errorClass = "PARSE_EMPTY_STATEMENT")
+      condition = "PARSE_EMPTY_STATEMENT")
     checkError(
       exception = parseException("a"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
     checkError(
       exception = parseException("a INT b long"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'b'", "hint" -> ""))
     checkError(
       exception = parseException("a INT,, b long"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "','", "hint" -> ""))
     checkError(
       exception = parseException("a INT, b long,,"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "','", "hint" -> ""))
     checkError(
       exception = parseException("a INT, b long, c int,"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "end of input", "hint" -> ""))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
index 3012ef6f1544d..3f59f8de95429 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/parser/UnpivotParserSuite.scala
@@ -31,8 +31,8 @@ class UnpivotParserSuite extends AnalysisTest {
     comparePlans(parsePlan(sqlCommand), plan, checkAnalysis = false)
   }
 
-  private def intercept(sqlCommand: String, errorClass: Option[String], messages: String*): Unit =
-    interceptParseException(parsePlan)(sqlCommand, messages: _*)(errorClass)
+  private def intercept(sqlCommand: String, condition: Option[String], messages: String*): Unit =
+    interceptParseException(parsePlan)(sqlCommand, messages: _*)(condition)
 
   test("unpivot - single value") {
     assertEqual(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
index 886b043ad79e6..7fa1935ccb058 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/JoinTypesTest.scala
@@ -68,7 +68,7 @@ class JoinTypesTest extends SparkFunSuite {
       exception = intercept[AnalysisException](
         JoinType(joinType)
       ),
-      errorClass = "UNSUPPORTED_JOIN_TYPE",
+      condition = "UNSUPPORTED_JOIN_TYPE",
       sqlState = "0A000",
       parameters = Map(
         "typ" -> joinType,
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
index e90a956ab4fde..37baa66049de3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/plans/PlanTest.scala
@@ -23,10 +23,9 @@ import org.scalatest.Tag
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.catalyst.SQLConfHelper
-import org.apache.spark.sql.catalyst.analysis.{GetViewColumnByNameAndOrdinal, SimpleAnalyzer}
+import org.apache.spark.sql.catalyst.analysis.SimpleAnalyzer
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.internal.SQLConf
@@ -65,40 +64,8 @@ trait CodegenInterpretedPlanTest extends PlanTest {
  */
 trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { self: Suite =>
 
-  /**
-   * Since attribute references are given globally unique ids during analysis,
-   * we must normalize them to check if two different queries are identical.
-   */
-  protected def normalizeExprIds(plan: LogicalPlan): LogicalPlan = {
-    plan transformAllExpressions {
-      case s: ScalarSubquery =>
-        s.copy(plan = normalizeExprIds(s.plan), exprId = ExprId(0))
-      case s: LateralSubquery =>
-        s.copy(plan = normalizeExprIds(s.plan), exprId = ExprId(0))
-      case e: Exists =>
-        e.copy(plan = normalizeExprIds(e.plan), exprId = ExprId(0))
-      case l: ListQuery =>
-        l.copy(plan = normalizeExprIds(l.plan), exprId = ExprId(0))
-      case a: AttributeReference =>
-        AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
-      case OuterReference(a: AttributeReference) =>
-        OuterReference(AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0)))
-      case a: Alias =>
-        Alias(a.child, a.name)(exprId = ExprId(0))
-      case OuterReference(a: Alias) =>
-        OuterReference(Alias(a.child, a.name)(exprId = ExprId(0)))
-      case ae: AggregateExpression =>
-        ae.copy(resultId = ExprId(0))
-      case lv: NamedLambdaVariable =>
-        lv.copy(exprId = ExprId(0), value = null)
-      case udf: PythonUDF =>
-        udf.copy(resultId = ExprId(0))
-      case udaf: PythonUDAF =>
-        udaf.copy(resultId = ExprId(0))
-      case a: FunctionTableSubqueryArgumentExpression =>
-        a.copy(plan = normalizeExprIds(a.plan), exprId = ExprId(0))
-    }
-  }
+  protected def normalizeExprIds(plan: LogicalPlan): LogicalPlan =
+    NormalizePlan.normalizeExprIds(plan)
 
   protected def rewriteNameFromAttrNullability(plan: LogicalPlan): LogicalPlan = {
     plan.transformAllExpressions {
@@ -107,60 +74,8 @@ trait PlanTestBase extends PredicateHelper with SQLHelper with SQLConfHelper { s
     }
   }
 
-  /**
-   * Normalizes plans:
-   * - Filter the filter conditions that appear in a plan. For instance,
-   *   ((expr 1 && expr 2) && expr 3), (expr 1 && expr 2 && expr 3), (expr 3 && (expr 1 && expr 2)
-   *   etc., will all now be equivalent.
-   * - Sample the seed will replaced by 0L.
-   * - Join conditions will be resorted by hashCode.
-   */
-  protected def normalizePlan(plan: LogicalPlan): LogicalPlan = {
-    plan transform {
-      case Filter(condition: Expression, child: LogicalPlan) =>
-        Filter(splitConjunctivePredicates(condition).map(rewriteBinaryComparison)
-          .sortBy(_.hashCode()).reduce(And), child)
-      case sample: Sample =>
-        sample.copy(seed = 0L)
-      case Join(left, right, joinType, condition, hint) if condition.isDefined =>
-        val newJoinType = joinType match {
-          case ExistenceJoin(a: Attribute) =>
-            val newAttr = AttributeReference(a.name, a.dataType, a.nullable)(exprId = ExprId(0))
-            ExistenceJoin(newAttr)
-          case other => other
-        }
-
-        val newCondition =
-          splitConjunctivePredicates(condition.get).map(rewriteBinaryComparison)
-            .sortBy(_.hashCode()).reduce(And)
-        Join(left, right, newJoinType, Some(newCondition), hint)
-      case Project(projectList, child) =>
-        val projList = projectList.map { e =>
-          e.transformUp {
-            case g: GetViewColumnByNameAndOrdinal => g.copy(viewDDL = None)
-          }
-        }.asInstanceOf[Seq[NamedExpression]]
-        Project(projList, child)
-      case c: KeepAnalyzedQuery => c.storeAnalyzedQuery()
-    }
-  }
-
-  /**
-   * Rewrite [[BinaryComparison]] operator to keep order. The following cases will be
-   * equivalent:
-   * 1. (a = b), (b = a);
-   * 2. (a <=> b), (b <=> a).
-   * 3. (a > b), (b < a)
-   */
-  private def rewriteBinaryComparison(condition: Expression): Expression = condition match {
-    case EqualTo(l, r) => Seq(l, r).sortBy(_.hashCode()).reduce(EqualTo)
-    case EqualNullSafe(l, r) => Seq(l, r).sortBy(_.hashCode()).reduce(EqualNullSafe)
-    case GreaterThan(l, r) if l.hashCode() > r.hashCode() => LessThan(r, l)
-    case LessThan(l, r) if l.hashCode() > r.hashCode() => GreaterThan(r, l)
-    case GreaterThanOrEqual(l, r) if l.hashCode() > r.hashCode() => LessThanOrEqual(r, l)
-    case LessThanOrEqual(l, r) if l.hashCode() > r.hashCode() => GreaterThanOrEqual(r, l)
-    case _ => condition // Don't reorder.
-  }
+  protected def normalizePlan(plan: LogicalPlan): LogicalPlan =
+    NormalizePlan.normalizePlan(plan)
 
   /** Fails the test if the two plans do not match */
   protected def comparePlans(
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModesSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModesSuite.scala
index ca1d64d42f96c..3a739ccbecb64 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModesSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/streaming/InternalOutputModesSuite.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.catalyst.streaming
 
-import java.util.Locale
-
 import org.apache.spark.{SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.streaming.OutputMode
 
@@ -38,13 +36,12 @@ class InternalOutputModesSuite extends SparkFunSuite {
   }
 
   test("unsupported strings") {
-    def testMode(outputMode: String): Unit = {
-      val acceptedModes = Seq("append", "update", "complete")
-      val e = intercept[SparkIllegalArgumentException](InternalOutputModes(outputMode))
-      (Seq("output mode", "unknown", outputMode) ++ acceptedModes).foreach { s =>
-        assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
-      }
-    }
-    testMode("Xyz")
+    val outputMode = "Xyz"
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        InternalOutputModes(outputMode)
+      },
+      condition = "STREAMING_OUTPUT_MODE.INVALID",
+      parameters = Map("outputMode" -> outputMode))
   }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
index df2a4db6bb15f..ae1941021044b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/trees/RuleExecutorSuite.scala
@@ -18,10 +18,12 @@
 package org.apache.spark.sql.catalyst.trees
 
 import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.expressions.{Expression, IntegerLiteral, Literal}
 import org.apache.spark.sql.catalyst.rules.{Rule, RuleExecutor}
+import org.apache.spark.sql.internal.SQLConf
 
-class RuleExecutorSuite extends SparkFunSuite {
+class RuleExecutorSuite extends SparkFunSuite with SQLConfHelper {
   object DecrementLiterals extends Rule[Expression] {
     def apply(e: Expression): Expression = e transform {
       case IntegerLiteral(i) if i > 0 => Literal(i - 1)
@@ -113,6 +115,76 @@ class RuleExecutorSuite extends SparkFunSuite {
     assert(e.getMessage.contains("not positive integer"))
   }
 
+  private object OptimizerWithLightweightValidation extends RuleExecutor[Expression] {
+    override protected def validatePlanChanges(
+        previousPlan: Expression,
+        currentPlan: Expression): Option[String] = {
+      (previousPlan, currentPlan) match {
+        case (IntegerLiteral(i), IntegerLiteral(j)) if i == j => None
+        case _ => Some("value changed")
+      }
+    }
+    override protected def validatePlanChangesLightweight(
+        previousPlan: Expression,
+        currentPlan: Expression): Option[String] = previousPlan match {
+      case IntegerLiteral(i) if i < 0 => None
+      case _ => Some("input is non-negative")
+    }
+    override val batches: Seq[Batch] = Batch("once", FixedPoint(1), DecrementLiterals) :: Nil
+  }
+
+  test("lightweight optimizer validation disabled") {
+    withSQLConf(SQLConf.LIGHTWEIGHT_PLAN_CHANGE_VALIDATION.key -> "false") {
+      // Test when full plan validation is both enabled and disabled.
+      Seq("true", "false").foreach { fullValidation =>
+        withSQLConf(SQLConf.PLAN_CHANGE_VALIDATION.key -> fullValidation) {
+          // Input passes validation
+          assert(OptimizerWithLightweightValidation.execute(Literal(0)) === Literal(0))
+
+          // Input does not pass validation
+          if (fullValidation == "false") {
+            // no validation runs
+            assert(OptimizerWithLightweightValidation.execute(Literal(1)) === Literal(0))
+          } else {
+            // full validation runs, taking the place of lightweight validation
+            val e = intercept[SparkException] {
+              OptimizerWithLightweightValidation.execute(Literal(1))
+            }
+            val ruleName = DecrementLiterals.ruleName
+            assert(e.getMessage.contains(s"Rule $ruleName in batch once generated an invalid plan"))
+            assert(e.getMessage.contains("value changed"))
+          }
+        }
+      }
+    }
+  }
+
+  test("lightweight optimizer validation enabled") {
+    withSQLConf(SQLConf.LIGHTWEIGHT_PLAN_CHANGE_VALIDATION.key -> "true") {
+      // Test when full plan validation is both enabled and disabled.
+      Seq("true", "false").foreach { fullValidation =>
+        withSQLConf(SQLConf.PLAN_CHANGE_VALIDATION.key -> fullValidation) {
+          // Input passes validation
+          assert(OptimizerWithLightweightValidation.execute(Literal(0)) === Literal(0))
+
+          // Input does not pass validation
+          val e = intercept[SparkException] {
+            OptimizerWithLightweightValidation.execute(Literal(1))
+          }
+          val ruleName = DecrementLiterals.ruleName
+          assert(e.getMessage.contains(s"Rule $ruleName in batch once generated an invalid plan"))
+          if (fullValidation == "false") {
+            // only lightweight validation runs
+            assert(e.getMessage.contains("input is non-negative"))
+          } else {
+            // full validation runs, taking the place of lightweight validation
+            assert(e.getMessage.contains("value changed"))
+          }
+        }
+      }
+    }
+  }
+
   test("SPARK-27243: dumpTimeSpent when no rule has run") {
     RuleExecutor.resetMetrics()
     // This should not throw an exception
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
index 1d3fb835f5a77..0e872dcdb6262 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayBasedMapBuilderSuite.scala
@@ -43,7 +43,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     builder.put(1, null) // null value is OK
     checkError(
       exception = intercept[SparkRuntimeException](builder.put(null, 1)),
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
   }
@@ -53,7 +53,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     builder.put(1, 1)
     checkError(
       exception = intercept[SparkRuntimeException](builder.put(1, 2)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "1",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -65,7 +65,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     builderDouble.put(-0.0, 1)
     checkError(
       exception = intercept[SparkRuntimeException](builderDouble.put(0.0, 2)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "0.0",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -110,7 +110,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     val arr = Array(1.toByte)
     checkError(
       exception = intercept[SparkRuntimeException](builder.put(arr, 3)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> arr.toString,
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -146,7 +146,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     // By default duplicated map key fails the query.
     checkError(
       exception = intercept[SparkRuntimeException](builder.put(unsafeRow, 3)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> "[0,1]",
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
@@ -180,7 +180,7 @@ class ArrayBasedMapBuilderSuite extends SparkFunSuite with SQLHelper {
     // By default duplicated map key fails the query.
     checkError(
       exception = intercept[SparkRuntimeException](builder.put(unsafeArray, 3)),
-      errorClass = "DUPLICATED_MAP_KEY",
+      condition = "DUPLICATED_MAP_KEY",
       parameters = Map(
         "key" -> unsafeArray.toString,
         "mapKeyDedupPolicy" -> "\"spark.sql.mapKeyDedupPolicy\"")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
index d55e672079484..632109a0cc8d0 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/ArrayDataIndexedSeqSuite.scala
@@ -58,7 +58,7 @@ class ArrayDataIndexedSeqSuite extends SparkFunSuite {
         exception = intercept[SparkException] {
           seq(index)
         },
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map(
           "message" -> s"Index $index must be between 0 and the length of the ArrayData."))
     }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
index 034010f5825b8..095dc3869571d 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeFormatterHelperSuite.scala
@@ -42,7 +42,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
         exception = intercept[SparkIllegalArgumentException] {
           convertIncompatiblePattern(s"yyyy-MM-dd $l G")
         },
-        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_WEEK_BASED_PATTERN",
+        condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_WEEK_BASED_PATTERN",
         parameters = Map("c" -> l.toString))
     }
     unsupportedLetters.foreach { l =>
@@ -50,7 +50,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
         exception = intercept[SparkIllegalArgumentException] {
           convertIncompatiblePattern(s"yyyy-MM-dd $l G")
         },
-        errorClass = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
+        condition = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
         parameters = Map(
           "c" -> l.toString,
           "pattern" -> s"yyyy-MM-dd $l G"))
@@ -60,7 +60,7 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
         exception = intercept[SparkIllegalArgumentException] {
           DateTimeFormatterHelper.convertIncompatiblePattern(s"$l", isParsing = true)
         },
-        errorClass = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
+        condition = "INVALID_DATETIME_PATTERN.ILLEGAL_CHARACTER",
         parameters = Map(
           "c" -> l.toString,
           "pattern" -> s"$l"))
@@ -70,17 +70,27 @@ class DateTimeFormatterHelperSuite extends SparkFunSuite {
         exception = intercept[SparkIllegalArgumentException] {
           convertIncompatiblePattern(s"yyyy-MM-dd $style")
         },
-        errorClass = "INVALID_DATETIME_PATTERN.LENGTH",
+        condition = "INVALID_DATETIME_PATTERN.LENGTH",
         parameters = Map("pattern" -> style))
       checkError(
         exception = intercept[SparkIllegalArgumentException] {
           convertIncompatiblePattern(s"yyyy-MM-dd $style${style.head}")
         },
-        errorClass = "INVALID_DATETIME_PATTERN.LENGTH",
+        condition = "INVALID_DATETIME_PATTERN.LENGTH",
         parameters = Map("pattern" -> style))
     }
     assert(convertIncompatiblePattern("yyyy-MM-dd EEEE") === "uuuu-MM-dd EEEE")
     assert(convertIncompatiblePattern("yyyy-MM-dd'e'HH:mm:ss") === "uuuu-MM-dd'e'HH:mm:ss")
     assert(convertIncompatiblePattern("yyyy-MM-dd'T'") === "uuuu-MM-dd'T'")
   }
+
+  test("SPARK-49583: invalid var length second fraction") {
+    val pattern = "\nSSSS\r"
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        createBuilderWithVarLengthSecondFraction(pattern)
+      },
+      condition = "INVALID_DATETIME_PATTERN.SECONDS_FRACTION",
+      parameters = Map("pattern" -> pattern))
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
index 8d8669aece894..790c834d83e97 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/DateTimeUtilsSuite.scala
@@ -542,10 +542,8 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
     checkError(
       exception = intercept[SparkIllegalArgumentException](
         dateAddInterval(input, new CalendarInterval(36, 47, 1))),
-      errorClass = "_LEGACY_ERROR_TEMP_2000",
-      parameters = Map(
-        "message" -> "Cannot add hours, minutes or seconds, milliseconds, microseconds to a date",
-        "ansiConfig" -> "\"spark.sql.ansi.enabled\""))
+      condition = "INVALID_INTERVAL_WITH_MICROSECONDS_ADDITION",
+      parameters = Map("ansiConfig" -> "\"spark.sql.ansi.enabled\""))
   }
 
   test("timestamp add interval") {
@@ -896,13 +894,13 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         getDayOfWeekFromString(UTF8String.fromString("xx"))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3209",
+      condition = "ILLEGAL_DAY_OF_WEEK",
       parameters = Map("string" -> "xx"))
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         getDayOfWeekFromString(UTF8String.fromString("\"quote"))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3209",
+      condition = "ILLEGAL_DAY_OF_WEEK",
       parameters = Map("string" -> "\"quote"))
   }
 
@@ -1043,7 +1041,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         timestampAdd("SECS", 1, date(1969, 1, 1, 0, 0, 0, 1, getZoneId("UTC")), getZoneId("UTC"))
       },
-      errorClass = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
+      condition = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
       parameters = Map(
         "functionName" -> "`TIMESTAMPADD`",
         "parameter" -> "`unit`",
@@ -1102,7 +1100,7 @@ class DateTimeUtilsSuite extends SparkFunSuite with Matchers with SQLHelper {
           date(2022, 1, 1, 0, 0, 0, 1, getZoneId("UTC")),
           getZoneId("UTC"))
       },
-      errorClass = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
+      condition = "INVALID_PARAMETER_VALUE.DATETIME_UNIT",
       parameters =
         Map("functionName" -> "`TIMESTAMPDIFF`",
           "parameter" -> "`unit`",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala
index cc28e85525162..f3dd232129e8b 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/InternalRowComparableWrapperBenchmark.scala
@@ -61,7 +61,7 @@ object InternalRowComparableWrapperBenchmark extends BenchmarkBase {
       val leftPartitioning = KeyGroupedPartitioning(expressions, bucketNum, partitions)
       val rightPartitioning = KeyGroupedPartitioning(expressions, bucketNum, partitions)
       val merged = InternalRowComparableWrapper.mergePartitions(
-        leftPartitioning, rightPartitioning, expressions)
+        leftPartitioning.partitionValues, rightPartitioning.partitionValues, expressions)
       assert(merged.size == bucketNum)
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
index 8c7a1d0818f9a..700dfe30a2389 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/IntervalUtilsSuite.scala
@@ -27,7 +27,9 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils.millisToMicros
 import org.apache.spark.sql.catalyst.util.IntervalStringStyles.{ANSI_STYLE, HIVE_STYLE}
 import org.apache.spark.sql.catalyst.util.IntervalUtils._
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.DayTimeIntervalType
+import org.apache.spark.sql.types.{YearMonthIntervalType => YM}
+import org.apache.spark.sql.types.DayTimeIntervalType._
+import org.apache.spark.sql.types.YearMonthIntervalType._
 import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
 
 class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
@@ -42,7 +44,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_NULL",
+      condition = "INVALID_INTERVAL_FORMAT.INPUT_IS_NULL",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null")))
     assert(safeStringToInterval(UTF8String.fromString(input)) === null)
@@ -53,7 +55,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
+      condition = "INVALID_INTERVAL_FORMAT.INPUT_IS_EMPTY",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null")))
     assert(safeStringToInterval(UTF8String.fromString(input)) === null)
@@ -64,7 +66,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PREFIX",
+      condition = "INVALID_INTERVAL_FORMAT.INVALID_PREFIX",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "prefix" -> prefix))
@@ -76,7 +78,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.UNRECOGNIZED_NUMBER",
+      condition = "INVALID_INTERVAL_FORMAT.UNRECOGNIZED_NUMBER",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "number" -> number))
@@ -88,7 +90,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
+      condition = "INVALID_INTERVAL_FORMAT.ARITHMETIC_EXCEPTION",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null")))
     assert(safeStringToInterval(UTF8String.fromString(input)) === null)
@@ -99,7 +101,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
+      condition = "INVALID_INTERVAL_FORMAT.INVALID_VALUE",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "value" -> value))
@@ -111,7 +113,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INVALID_PRECISION",
+      condition = "INVALID_INTERVAL_FORMAT.INVALID_PRECISION",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "value" -> value))
@@ -123,7 +125,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INVALID_FRACTION",
+      condition = "INVALID_INTERVAL_FORMAT.INVALID_FRACTION",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "unit" -> unit))
@@ -135,7 +137,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
+      condition = "INVALID_INTERVAL_FORMAT.INVALID_UNIT",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "unit" -> unit))
@@ -147,7 +149,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.MISSING_NUMBER",
+      condition = "INVALID_INTERVAL_FORMAT.MISSING_NUMBER",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "word" -> word))
@@ -159,7 +161,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.MISSING_UNIT",
+      condition = "INVALID_INTERVAL_FORMAT.MISSING_UNIT",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "word" -> word))
@@ -171,7 +173,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkIllegalArgumentException] {
         stringToInterval(UTF8String.fromString(input))
       },
-      errorClass = "INVALID_INTERVAL_FORMAT.UNKNOWN_PARSING_ERROR",
+      condition = "INVALID_INTERVAL_FORMAT.UNKNOWN_PARSING_ERROR",
       parameters = Map(
         "input" -> Option(input).map(_.toString).getOrElse("null"),
         "word" -> word))
@@ -293,7 +295,7 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     assert(fromYearMonthString("99-10") === new CalendarInterval(99 * 12 + 10, 0, 0L))
     assert(fromYearMonthString("+99-10") === new CalendarInterval(99 * 12 + 10, 0, 0L))
     assert(fromYearMonthString("-8-10") === new CalendarInterval(-8 * 12 - 10, 0, 0L))
-    failFuncWithInvalidInput("99-15", "month 15 outside range", fromYearMonthString)
+    failFuncWithInvalidInput("99-15", "year-month", fromYearMonthString)
     failFuncWithInvalidInput("9a9-15", "Interval string does not match year-month format",
       fromYearMonthString)
 
@@ -312,12 +314,12 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     val e1 = intercept[IllegalArgumentException]{
       assert(fromYearMonthString("178956970-8") == new CalendarInterval(Int.MinValue, 0, 0))
     }.getMessage
-    assert(e1.contains("integer overflow"))
+    assert(e1.contains("year-month"))
     assert(fromYearMonthString("-178956970-8") == new CalendarInterval(Int.MinValue, 0, 0))
     val e2 = intercept[IllegalArgumentException]{
       assert(fromYearMonthString("-178956970-9") == new CalendarInterval(Int.MinValue, 0, 0))
     }.getMessage
-    assert(e2.contains("integer overflow"))
+    assert(e2.contains("year-month"))
   }
 
   test("from day-time string - legacy") {
@@ -336,6 +338,29 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
           12 * MICROS_PER_MINUTE + millisToMicros(888)))
       assert(fromDayTimeString("-3 0:0:0") === new CalendarInterval(0, -3, 0L))
 
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          fromDayTimeString("5 30:12:20")
+        },
+        parameters = Map(
+          "msg" -> "requirement failed: hour 30 outside range [0, 23]",
+          "input" -> "5 30:12:20"),
+        condition = "INVALID_INTERVAL_FORMAT.DAY_TIME_PARSING",
+        sqlState = Some("22006")
+      )
+
+      checkError(
+        exception = intercept[SparkIllegalArgumentException] {
+          fromDayTimeString("5 12:40:30.999999999", 0, 0)
+        },
+        parameters = Map(
+          "from" -> "day",
+          "to" -> "day",
+          "input" -> "5 12:40:30.999999999"),
+        condition = "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+        sqlState = Some("22006")
+      )
+
       failFuncWithInvalidInput("5 30:12:20", "hour 30 outside range", fromDayTimeString)
       failFuncWithInvalidInput("5 30-12", "must match day-time format", fromDayTimeString)
     }
@@ -377,6 +402,17 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     assert(negate(new CalendarInterval(1, 2, 3)) === new CalendarInterval(-1, -2, -3))
   }
 
+  test("parsing second_nano string") {
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        toDTInterval("12", "33.33.33", 1)
+      },
+      condition = "INVALID_INTERVAL_FORMAT.SECOND_NANO_FORMAT",
+      parameters = Map("input" -> "33.33.33"),
+      sqlState = Some("22006")
+    )
+  }
+
   test("subtract one interval by another") {
     val input1 = new CalendarInterval(3, 1, 1 * MICROS_PER_HOUR)
     val input2 = new CalendarInterval(2, 4, 100 * MICROS_PER_HOUR)
@@ -447,7 +483,6 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("from day-time string") {
-    import org.apache.spark.sql.types.DayTimeIntervalType._
     def check(input: String, from: Byte, to: Byte, expected: String): Unit = {
       withClue(s"from = $from, to = $to") {
         val expectedUtf8 = UTF8String.fromString(expected)
@@ -554,6 +589,15 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   test("SPARK-34615: period to months") {
     assert(periodToMonths(Period.ZERO) === 0)
     assert(periodToMonths(Period.of(0, -1, 0)) === -1)
+    assert(periodToMonths(Period.of(0, -11, 0)) === -11)
+    assert(periodToMonths(Period.of(0, -12, 0)) === -12)
+    assert(periodToMonths(Period.of(0, -13, 0)) === -13)
+    assert(periodToMonths(Period.of(0, 11, 0), YM.YEAR) === 0)
+    assert(periodToMonths(Period.of(0, -11, 0), YM.YEAR) === 0)
+    assert(periodToMonths(Period.of(0, 12, 0), YM.YEAR) === 12)
+    assert(periodToMonths(Period.of(0, -12, 0), YM.YEAR) === -12)
+    assert(periodToMonths(Period.of(0, 13, 0), YM.YEAR) === 12)
+    assert(periodToMonths(Period.of(0, -13, 0), YM.YEAR) === -12)
     assert(periodToMonths(Period.of(-1, 0, 10)) === -12) // ignore days
     assert(periodToMonths(Period.of(178956970, 7, 0)) === Int.MaxValue)
     assert(periodToMonths(Period.of(-178956970, -8, 123)) === Int.MinValue)
@@ -637,11 +681,12 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-35016: format year-month intervals") {
-    import org.apache.spark.sql.types.YearMonthIntervalType._
     Seq(
       0 -> ("0-0", "INTERVAL '0-0' YEAR TO MONTH"),
       -11 -> ("-0-11", "INTERVAL '-0-11' YEAR TO MONTH"),
       11 -> ("0-11", "INTERVAL '0-11' YEAR TO MONTH"),
+      -12 -> ("-1-0", "INTERVAL '-1-0' YEAR TO MONTH"),
+      12 -> ("1-0", "INTERVAL '1-0' YEAR TO MONTH"),
       -13 -> ("-1-1", "INTERVAL '-1-1' YEAR TO MONTH"),
       13 -> ("1-1", "INTERVAL '1-1' YEAR TO MONTH"),
       -24 -> ("-2-0", "INTERVAL '-2-0' YEAR TO MONTH"),
@@ -654,8 +699,22 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  test("SPARK-49208 format year-month intervals") {
+    Seq(
+      0 -> ("0-0", "INTERVAL '0' MONTH"),
+      -11 -> ("-0-11", "INTERVAL '-11' MONTH"),
+      11 -> ("0-11", "INTERVAL '11' MONTH"),
+      -12 -> ("-1-0", "INTERVAL '-12' MONTH"),
+      12 -> ("1-0", "INTERVAL '12' MONTH"),
+      -13 -> ("-1-1", "INTERVAL '-13' MONTH"),
+      13 -> ("1-1", "INTERVAL '13' MONTH")
+    ).foreach { case (months, (hiveIntervalStr, ansiIntervalStr)) =>
+      assert(toYearMonthIntervalString(months, ANSI_STYLE, MONTH, MONTH) === ansiIntervalStr)
+      assert(toYearMonthIntervalString(months, HIVE_STYLE, MONTH, MONTH) === hiveIntervalStr)
+    }
+  }
+
   test("SPARK-35016: format day-time intervals") {
-    import DayTimeIntervalType._
     Seq(
       0L -> ("0 00:00:00.000000000", "INTERVAL '0 00:00:00' DAY TO SECOND"),
       -1L -> ("-0 00:00:00.000001000", "INTERVAL '-0 00:00:00.000001' DAY TO SECOND"),
@@ -670,8 +729,22 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
     }
   }
 
+  test("SPARK-49208: format negative month intervals") {
+    Seq(
+      0 -> ("0-0", "INTERVAL '0' MONTH"),
+      -11 -> ("-0-11", "INTERVAL '-11' MONTH"),
+      11 -> ("0-11", "INTERVAL '11' MONTH"),
+      -12 -> ("-1-0", "INTERVAL '-12' MONTH"),
+      12 -> ("1-0", "INTERVAL '12' MONTH"),
+      -13 -> ("-1-1", "INTERVAL '-13' MONTH"),
+      13 -> ("1-1", "INTERVAL '13' MONTH")
+    ).foreach { case (months, (hiveIntervalStr, ansiIntervalStr)) =>
+      assert(toYearMonthIntervalString(months, ANSI_STYLE, MONTH, MONTH) === ansiIntervalStr)
+      assert(toYearMonthIntervalString(months, HIVE_STYLE, MONTH, MONTH) === hiveIntervalStr)
+    }
+  }
+
   test("SPARK-35734: Format day-time intervals using type fields") {
-    import DayTimeIntervalType._
     Seq(
       0L ->
         ("INTERVAL '0 00:00:00' DAY TO SECOND",
@@ -777,12 +850,13 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-35771: Format year-month intervals using type fields") {
-    import org.apache.spark.sql.types.YearMonthIntervalType._
     Seq(
       0 ->
         ("INTERVAL '0-0' YEAR TO MONTH", "INTERVAL '0' YEAR", "INTERVAL '0' MONTH"),
       -11 -> ("INTERVAL '-0-11' YEAR TO MONTH", "INTERVAL '-0' YEAR", "INTERVAL '-11' MONTH"),
       11 -> ("INTERVAL '0-11' YEAR TO MONTH", "INTERVAL '0' YEAR", "INTERVAL '11' MONTH"),
+      -12 -> ("INTERVAL '-1-0' YEAR TO MONTH", "INTERVAL '-1' YEAR", "INTERVAL '-12' MONTH"),
+      12 -> ("INTERVAL '1-0' YEAR TO MONTH", "INTERVAL '1' YEAR", "INTERVAL '12' MONTH"),
       -13 -> ("INTERVAL '-1-1' YEAR TO MONTH", "INTERVAL '-1' YEAR", "INTERVAL '-13' MONTH"),
       13 -> ("INTERVAL '1-1' YEAR TO MONTH", "INTERVAL '1' YEAR", "INTERVAL '13' MONTH"),
       -24 -> ("INTERVAL '-2-0' YEAR TO MONTH", "INTERVAL '-2' YEAR", "INTERVAL '-24' MONTH"),
@@ -803,7 +877,6 @@ class IntervalUtilsSuite extends SparkFunSuite with SQLHelper {
   }
 
   test("SPARK-38324: The second range is not [0, 59] in the day time ANSI interval") {
-    import org.apache.spark.sql.types.DayTimeIntervalType._
     Seq(
       ("10 12:40:60", 60, DAY, SECOND),
       ("10 12:40:60.999999999", 60, DAY, SECOND),
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
index 7eac6e0feef64..558d7eda78b4a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampFormatterSuite.scala
@@ -299,6 +299,40 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
     }
   }
 
+  test("SPARK-49065: rebasing in legacy formatters/parsers with non-default time zone") {
+    val defaultTimeZone = LA
+    withSQLConf(SQLConf.LEGACY_TIME_PARSER_POLICY.key -> LegacyBehaviorPolicy.LEGACY.toString) {
+      outstandingZoneIds.foreach { zoneId =>
+        withSQLConf(SQLConf.SESSION_LOCAL_TIMEZONE.key -> defaultTimeZone.getId) {
+          withDefaultTimeZone(defaultTimeZone) {
+            withClue(s"zoneId = ${zoneId.getId}") {
+              val formatters = LegacyDateFormats.values.toSeq.map { legacyFormat =>
+                TimestampFormatter(
+                  TimestampFormatter.defaultPattern(),
+                  zoneId,
+                  TimestampFormatter.defaultLocale,
+                  legacyFormat,
+                  isParsing = false)
+              } :+ TimestampFormatter.getFractionFormatter(zoneId)
+              formatters.foreach { formatter =>
+                assert(microsToInstant(formatter.parse("1000-01-01 01:02:03"))
+                  .atZone(zoneId)
+                  .toLocalDateTime === LocalDateTime.of(1000, 1, 1, 1, 2, 3))
+
+                assert(formatter.format(
+                  LocalDateTime.of(1000, 1, 1, 1, 2, 3).atZone(zoneId).toInstant) ===
+                  "1000-01-01 01:02:03")
+                assert(formatter.format(instantToMicros(
+                  LocalDateTime.of(1000, 1, 1, 1, 2, 3)
+                    .atZone(zoneId).toInstant)) === "1000-01-01 01:02:03")
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
   test("parsing hour with various patterns") {
     def createFormatter(pattern: String): TimestampFormatter = {
       // Use `SIMPLE_DATE_FORMAT`, so that the legacy parser also fails with invalid value range.
@@ -519,7 +553,7 @@ class TimestampFormatterSuite extends DatetimeFormatterSuite {
       exception = intercept[SparkException] {
         formatter.parseWithoutTimeZone(invalidTimestampStr, allowTimeZone = false)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> ("Cannot parse field value '2021-13-01T25:61:61' for pattern " +
           "'yyyy-MM-dd HH:mm:ss' as the target spark data type \"TIMESTAMP_NTZ\"."))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
index e20dfd4f60512..51ea945984b50 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/CatalogSuite.scala
@@ -362,7 +362,7 @@ class CatalogSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         catalog.alterTable(testIdent, TableChange.addColumn(Array("data", "ts"), TimestampType))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3229",
+      condition = "_LEGACY_ERROR_TEMP_3229",
       parameters = Map("name" -> "data"))
 
     // the table has not changed
@@ -381,8 +381,8 @@ class CatalogSuite extends SparkFunSuite {
         catalog.alterTable(testIdent,
           TableChange.addColumn(Array("missing_col", "new_field"), StringType))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "missing_col"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: update column data type") {
@@ -427,8 +427,8 @@ class CatalogSuite extends SparkFunSuite {
         catalog.alterTable(testIdent,
           TableChange.updateColumnType(Array("missing_col"), LongType))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "missing_col"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: add comment") {
@@ -478,8 +478,8 @@ class CatalogSuite extends SparkFunSuite {
         catalog.alterTable(testIdent,
           TableChange.updateColumnComment(Array("missing_col"), "comment"))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "missing_col"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: rename top-level column") {
@@ -546,8 +546,8 @@ class CatalogSuite extends SparkFunSuite {
         catalog.alterTable(testIdent,
           TableChange.renameColumn(Array("missing_col"), "new_name"))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "missing_col"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: multiple changes") {
@@ -614,8 +614,8 @@ class CatalogSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col"), false))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "missing_col"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
 
     // with if exists it should pass
     catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col"), true))
@@ -636,8 +636,8 @@ class CatalogSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z"), false))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3227",
-      parameters = Map("fieldName" -> "z"))
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`z`", "fields" -> "`x`, `y`"))
 
     // with if exists it should pass
     catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z"), true))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
index 505a5a6169204..ab17b93ad6146 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryBaseTable.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, JoinedRow,
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateTimeUtils}
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
 import org.apache.spark.sql.connector.expressions._
-import org.apache.spark.sql.connector.metric.{CustomMetric, CustomTaskMetric}
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomSumMetric, CustomTaskMetric}
 import org.apache.spark.sql.connector.read._
 import org.apache.spark.sql.connector.read.colstats.{ColumnStatistics, Histogram, HistogramBin}
 import org.apache.spark.sql.connector.read.partitioning.{KeyGroupedPartitioning, Partitioning, UnknownPartitioning}
@@ -194,6 +194,10 @@ abstract class InMemoryBaseTable(
           case (v, t) =>
             throw new IllegalArgumentException(s"Match: unsupported argument(s) type - ($v, $t)")
         }
+      case ClusterByTransform(columnNames) =>
+        columnNames.map { colName =>
+          extractor(colName.fieldNames, cleanedSchema, row)._1
+        }
     }.toImmutableArraySeq
   }
 
@@ -291,7 +295,7 @@ abstract class InMemoryBaseTable(
     TableCapability.TRUNCATE)
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new InMemoryScanBuilder(schema)
+    new InMemoryScanBuilder(schema, options)
   }
 
   private def canEvaluate(filter: Filter): Boolean = {
@@ -305,8 +309,10 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  class InMemoryScanBuilder(tableSchema: StructType) extends ScanBuilder
-      with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
+  class InMemoryScanBuilder(
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap) extends ScanBuilder
+    with SupportsPushDownRequiredColumns with SupportsPushDownFilters {
     private var schema: StructType = tableSchema
     private var postScanFilters: Array[Filter] = Array.empty
     private var evaluableFilters: Array[Filter] = Array.empty
@@ -314,7 +320,7 @@ abstract class InMemoryBaseTable(
 
     override def build: Scan = {
       val scan = InMemoryBatchScan(
-        data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema)
+        data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema, options)
       if (evaluableFilters.nonEmpty) {
         scan.filter(evaluableFilters)
       }
@@ -438,7 +444,8 @@ abstract class InMemoryBaseTable(
   case class InMemoryBatchScan(
       var _data: Seq[InputPartition],
       readSchema: StructType,
-      tableSchema: StructType)
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
     extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeFiltering {
 
     override def filterAttributes(): Array[NamedReference] = {
@@ -470,17 +477,17 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  abstract class InMemoryWriterBuilder() extends SupportsTruncate with SupportsDynamicOverwrite
-    with SupportsStreamingUpdateAsAppend {
+  abstract class InMemoryWriterBuilder(val info: LogicalWriteInfo)
+    extends SupportsTruncate with SupportsDynamicOverwrite with SupportsStreamingUpdateAsAppend {
 
-    protected var writer: BatchWrite = Append
-    protected var streamingWriter: StreamingWrite = StreamingAppend
+    protected var writer: BatchWrite = new Append(info)
+    protected var streamingWriter: StreamingWrite = new StreamingAppend(info)
 
     override def overwriteDynamicPartitions(): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
-      writer = DynamicOverwrite
+      writer = new DynamicOverwrite(info)
       streamingWriter = new StreamingNotSupportedOperation("overwriteDynamicPartitions")
       this
     }
@@ -508,7 +515,11 @@ abstract class InMemoryBaseTable(
       }
 
       override def supportedCustomMetrics(): Array[CustomMetric] = {
-        Array(new InMemorySimpleCustomMetric)
+        Array(new InMemorySimpleCustomMetric, new InMemoryCustomDriverMetric)
+      }
+
+      override def reportDriverMetrics(): Array[CustomTaskMetric] = {
+        Array(new InMemoryCustomDriverTaskMetric(rows.size))
       }
     }
   }
@@ -521,13 +532,13 @@ abstract class InMemoryBaseTable(
     override def abort(messages: Array[WriterCommitMessage]): Unit = {}
   }
 
-  protected object Append extends TestBatchWrite {
+  class Append(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       withData(messages.map(_.asInstanceOf[BufferedRows]))
     }
   }
 
-  private object DynamicOverwrite extends TestBatchWrite {
+  class DynamicOverwrite(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       val newData = messages.map(_.asInstanceOf[BufferedRows])
       dataMap --= newData.flatMap(_.rows.map(getKey))
@@ -535,7 +546,7 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  protected object TruncateAndAppend extends TestBatchWrite {
+  class TruncateAndAppend(val info: LogicalWriteInfo) extends TestBatchWrite {
     override def commit(messages: Array[WriterCommitMessage]): Unit = dataMap.synchronized {
       dataMap.clear()
       withData(messages.map(_.asInstanceOf[BufferedRows]))
@@ -564,7 +575,7 @@ abstract class InMemoryBaseTable(
       s"${operation} isn't supported for streaming query.")
   }
 
-  private object StreamingAppend extends TestStreamingWrite {
+  class StreamingAppend(val info: LogicalWriteInfo) extends TestStreamingWrite {
     override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
       dataMap.synchronized {
         withData(messages.map(_.asInstanceOf[BufferedRows]))
@@ -572,7 +583,7 @@ abstract class InMemoryBaseTable(
     }
   }
 
-  protected object StreamingTruncateAndAppend extends TestStreamingWrite {
+  class StreamingTruncateAndAppend(val info: LogicalWriteInfo) extends TestStreamingWrite {
     override def commit(epochId: Long, messages: Array[WriterCommitMessage]): Unit = {
       dataMap.synchronized {
         dataMap.clear()
@@ -750,3 +761,13 @@ class InMemorySimpleCustomMetric extends CustomMetric {
     s"in-memory rows: ${taskMetrics.sum}"
   }
 }
+
+class InMemoryCustomDriverMetric extends CustomSumMetric {
+  override def name(): String = "number_of_rows_from_driver"
+  override def description(): String = "number of rows from driver"
+}
+
+class InMemoryCustomDriverTaskMetric(value: Long) extends CustomTaskMetric {
+  override def name(): String = "number_of_rows_from_driver"
+  override def value(): Long = value
+}
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
index 8d8d2317f0986..411a88b8765f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryCatalog.scala
@@ -24,10 +24,13 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.sql.catalyst.analysis.{NoSuchFunctionException, NoSuchNamespaceException}
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
+import org.apache.spark.sql.connector.catalog.procedures.UnboundProcedure
 
-class InMemoryCatalog extends InMemoryTableCatalog with FunctionCatalog {
+class InMemoryCatalog extends InMemoryTableCatalog with FunctionCatalog with ProcedureCatalog {
   protected val functions: util.Map[Identifier, UnboundFunction] =
     new ConcurrentHashMap[Identifier, UnboundFunction]()
+  protected val procedures: util.Map[Identifier, UnboundProcedure] =
+    new ConcurrentHashMap[Identifier, UnboundProcedure]()
 
   override protected def allNamespaces: Seq[Seq[String]] = {
     (tables.keySet.asScala.map(_.namespace.toSeq) ++
@@ -63,4 +66,18 @@ class InMemoryCatalog extends InMemoryTableCatalog with FunctionCatalog {
   def clearFunctions(): Unit = {
     functions.clear()
   }
+
+  override def loadProcedure(ident: Identifier): UnboundProcedure = {
+    val procedure = procedures.get(ident)
+    if (procedure == null) throw new RuntimeException("Procedure not found: " + ident)
+    procedure
+  }
+
+  def createProcedure(ident: Identifier, procedure: UnboundProcedure): UnboundProcedure = {
+    procedures.put(ident, procedure)
+  }
+
+  def clearProcedures(): Unit = {
+    procedures.clear()
+  }
 }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
index 4abe4c8b3e3fb..3a684dc57c02f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryRowLevelOperationTable.scala
@@ -59,7 +59,7 @@ class InMemoryRowLevelOperationTable(
     }
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-      new InMemoryScanBuilder(schema) {
+      new InMemoryScanBuilder(schema, options) {
         override def build: Scan = {
           val scan = super.build()
           configuredScan = scan.asInstanceOf[InMemoryBatchScan]
@@ -115,7 +115,7 @@ class InMemoryRowLevelOperationTable(
     override def rowId(): Array[NamedReference] = Array(PK_COLUMN_REF)
 
     override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-      new InMemoryScanBuilder(schema)
+      new InMemoryScanBuilder(schema, options)
     }
 
     override def newWriteBuilder(info: LogicalWriteInfo): DeltaWriteBuilder =
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
index af04816e6b6f0..c27b8fea059f7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTable.scala
@@ -84,23 +84,23 @@ class InMemoryTable(
     InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
     InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
 
-    new InMemoryWriterBuilderWithOverWrite()
+    new InMemoryWriterBuilderWithOverWrite(info)
   }
 
-  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
-    with SupportsOverwrite {
+  class InMemoryWriterBuilderWithOverWrite(override val info: LogicalWriteInfo)
+    extends InMemoryWriterBuilder(info) with SupportsOverwrite {
 
     override def truncate(): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
-      writer = TruncateAndAppend
-      streamingWriter = StreamingTruncateAndAppend
+      writer = new TruncateAndAppend(info)
+      streamingWriter = new StreamingTruncateAndAppend(info)
       this
     }
 
     override def overwrite(filters: Array[Filter]): WriteBuilder = {
-      if (writer != Append) {
+      if (!writer.isInstanceOf[Append]) {
         throw new IllegalArgumentException(s"Unsupported writer type: $writer")
       }
       writer = new Overwrite(filters)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
index 0515237adfae5..56ed3bb243e19 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableCatalog.scala
@@ -25,8 +25,6 @@ import scala.jdk.CollectionConverters._
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NonEmptyNamespaceException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.connector.distributions.{Distribution, Distributions}
 import org.apache.spark.sql.connector.expressions.{SortOrder, Transform}
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class BasicInMemoryTableCatalog extends TableCatalog {
@@ -85,14 +83,6 @@ class BasicInMemoryTableCatalog extends TableCatalog {
     invalidatedTables.add(ident)
   }
 
-  override def createTable(
-      ident: Identifier,
-      schema: StructType,
-      partitions: Array[Transform],
-      properties: util.Map[String, String]): Table = {
-    throw QueryCompilationErrors.createTableDeprecatedError()
-  }
-
   override def createTable(
       ident: Identifier,
       columns: Array[Column],
@@ -177,7 +167,8 @@ class InMemoryTableCatalog extends BasicInMemoryTableCatalog with SupportsNamesp
   override def capabilities: java.util.Set[TableCatalogCapability] = {
     Set(
       TableCatalogCapability.SUPPORT_COLUMN_DEFAULT_VALUE,
-      TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS
+      TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_GENERATED_COLUMNS,
+      TableCatalogCapability.SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS
     ).asJava
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
index 20ada0d622bca..9b7a90774f91c 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/InMemoryTableWithV2Filter.scala
@@ -47,19 +47,22 @@ class InMemoryTableWithV2Filter(
   }
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
-    new InMemoryV2FilterScanBuilder(schema)
+    new InMemoryV2FilterScanBuilder(schema, options)
   }
 
-  class InMemoryV2FilterScanBuilder(tableSchema: StructType)
-    extends InMemoryScanBuilder(tableSchema) {
+  class InMemoryV2FilterScanBuilder(
+     tableSchema: StructType,
+     options: CaseInsensitiveStringMap)
+    extends InMemoryScanBuilder(tableSchema, options) {
     override def build: Scan = InMemoryV2FilterBatchScan(
-      data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema)
+      data.map(_.asInstanceOf[InputPartition]).toImmutableArraySeq, schema, tableSchema, options)
   }
 
   case class InMemoryV2FilterBatchScan(
       var _data: Seq[InputPartition],
       readSchema: StructType,
-      tableSchema: StructType)
+      tableSchema: StructType,
+      options: CaseInsensitiveStringMap)
     extends BatchScanBaseClass(_data, readSchema, tableSchema) with SupportsRuntimeV2Filtering {
 
     override def filterAttributes(): Array[NamedReference] = {
@@ -93,21 +96,21 @@ class InMemoryTableWithV2Filter(
     InMemoryBaseTable.maybeSimulateFailedTableWrite(new CaseInsensitiveStringMap(properties))
     InMemoryBaseTable.maybeSimulateFailedTableWrite(info.options)
 
-    new InMemoryWriterBuilderWithOverWrite()
+    new InMemoryWriterBuilderWithOverWrite(info)
   }
 
-  private class InMemoryWriterBuilderWithOverWrite() extends InMemoryWriterBuilder
-    with SupportsOverwriteV2 {
+  class InMemoryWriterBuilderWithOverWrite(override val info: LogicalWriteInfo)
+    extends InMemoryWriterBuilder(info) with SupportsOverwriteV2 {
 
     override def truncate(): WriteBuilder = {
-      assert(writer == Append)
-      writer = TruncateAndAppend
-      streamingWriter = StreamingTruncateAndAppend
+      assert(writer.isInstanceOf[Append])
+      writer = new TruncateAndAppend(info)
+      streamingWriter = new StreamingTruncateAndAppend(info)
       this
     }
 
     override def overwrite(predicates: Array[Predicate]): WriteBuilder = {
-      assert(writer == Append)
+      assert(writer.isInstanceOf[Append])
       writer = new Overwrite(predicates)
       streamingWriter = new StreamingNotSupportedOperation(
         s"overwrite (${predicates.mkString("filters(", ", ", ")")})")
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
index 8038bb335d91c..2a207901b83f5 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/StagingInMemoryTableCatalog.scala
@@ -33,35 +33,38 @@ class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTable
 
   override def stageCreate(
       ident: Identifier,
-      schema: StructType,
+      columns: Array[Column],
       partitions: Array[Transform],
       properties: util.Map[String, String]): StagedTable = {
     validateStagedTable(partitions, properties)
     new TestStagedCreateTable(
       ident,
-      new InMemoryTable(s"$name.${ident.quoted}", schema, partitions, properties))
+      new InMemoryTable(s"$name.${ident.quoted}",
+        CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties))
   }
 
   override def stageReplace(
       ident: Identifier,
-      schema: StructType,
+      columns: Array[Column],
       partitions: Array[Transform],
       properties: util.Map[String, String]): StagedTable = {
     validateStagedTable(partitions, properties)
     new TestStagedReplaceTable(
       ident,
-      new InMemoryTable(s"$name.${ident.quoted}", schema, partitions, properties))
+      new InMemoryTable(s"$name.${ident.quoted}",
+        CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties))
   }
 
   override def stageCreateOrReplace(
       ident: Identifier,
-      schema: StructType,
+      columns: Array[Column],
       partitions: Array[Transform],
       properties: util.Map[String, String]): StagedTable = {
     validateStagedTable(partitions, properties)
     new TestStagedCreateOrReplaceTable(
       ident,
-      new InMemoryTable(s"$name.${ident.quoted}", schema, partitions, properties))
+      new InMemoryTable(s"$name.${ident.quoted}",
+        CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties))
   }
 
   private def validateStagedTable(
@@ -75,7 +78,7 @@ class StagingInMemoryTableCatalog extends InMemoryTableCatalog with StagingTable
     maybeSimulateFailedTableCreation(properties)
   }
 
-  private abstract class TestStagedTable(
+  protected abstract class TestStagedTable(
       ident: Identifier,
       delegateTable: InMemoryTable)
     extends StagedTable with SupportsWrite with SupportsRead {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
index 1aa0b408366bf..a9d8a69128ae2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsAtomicPartitionManagementSuite.scala
@@ -121,7 +121,7 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         partTable.purgePartitions(partIdents)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
+      condition = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
       parameters = Map.empty
     )
   }
@@ -170,7 +170,7 @@ class SupportsAtomicPartitionManagementSuite extends SparkFunSuite {
       partTable.truncatePartitions(Array(InternalRow("5"), InternalRow("6")))
     }
     checkError(e,
-      errorClass = "PARTITIONS_NOT_FOUND",
+      condition = "PARTITIONS_NOT_FOUND",
       parameters = Map("partitionList" -> "PARTITION (`dt` = 6)",
       "tableName" -> "`test`.`ns`.`test_table`"))
     assert(partTable.rows === InternalRow(2, "zyx", "5") :: Nil)
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
index 06a23e7fda207..8581d4dec1fb8 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/connector/catalog/SupportsPartitionManagementSuite.scala
@@ -93,7 +93,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         partTable.purgePartition(InternalRow.apply("3"))
       },
-      errorClass = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
+      condition = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
       parameters = Map.empty
     )
   }
@@ -217,7 +217,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         partTable.partitionExists(InternalRow(0))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3208",
+      condition = "_LEGACY_ERROR_TEMP_3208",
       parameters = Map("numFields" -> "1", "schemaLen" -> "2"))
   }
 
@@ -228,7 +228,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       partTable.renamePartition(InternalRow(0, "abc"), InternalRow(1, "abc"))
     }
     checkError(e,
-      errorClass = "PARTITIONS_ALREADY_EXIST",
+      condition = "PARTITIONS_ALREADY_EXIST",
       parameters = Map("partitionList" -> "PARTITION (`part0` = 1, `part1` = abc)",
       "tableName" -> "`test`.`ns`.`test_table`"))
 
@@ -237,7 +237,7 @@ class SupportsPartitionManagementSuite extends SparkFunSuite {
       partTable.renamePartition(newPart, InternalRow(3, "abc"))
     }
     checkError(e2,
-      errorClass = "PARTITIONS_NOT_FOUND",
+      condition = "PARTITIONS_NOT_FOUND",
       parameters = Map("partitionList" -> "PARTITION (`part0` = 2, `part1` = xyz)",
         "tableName" -> "`test`.`ns`.`test_table`"))
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
index 8fd9b7c43a659..7250b6e2b90e6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.types
 
 import com.fasterxml.jackson.core.JsonParseException
+import org.json4s.jackson.JsonMethods
 
 import org.apache.spark.{SparkException, SparkFunSuite, SparkIllegalArgumentException}
 import org.apache.spark.sql.catalyst.analysis.{caseInsensitiveResolution, caseSensitiveResolution}
@@ -160,7 +161,7 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkException] {
         left.merge(right)
       },
-      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
       parameters = Map("left" -> "\"FLOAT\"", "right" -> "\"BIGINT\""
       )
     )
@@ -298,21 +299,21 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson(""""abcd"""")
       },
-      errorClass = "INVALID_JSON_DATA_TYPE",
+      condition = "INVALID_JSON_DATA_TYPE",
       parameters = Map("invalidType" -> "abcd"))
 
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson("""{"abcd":"a"}""")
       },
-      errorClass = "INVALID_JSON_DATA_TYPE",
+      condition = "INVALID_JSON_DATA_TYPE",
       parameters = Map("invalidType" -> """{"abcd":"a"}"""))
 
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson("""{"fields": [{"a":123}], "type": "struct"}""")
       },
-      errorClass = "INVALID_JSON_DATA_TYPE",
+      condition = "INVALID_JSON_DATA_TYPE",
       parameters = Map("invalidType" -> """{"a":123}"""))
 
     // Malformed JSON string
@@ -714,12 +715,12 @@ class DataTypeSuite extends SparkFunSuite {
   checkEqualsIgnoreCompatibleCollation(StringType, StringType("UTF8_LCASE"),
     expected = true)
   checkEqualsIgnoreCompatibleCollation(
-    StringType("UTF8_BINARY"), StringType("UTF8_LCASE"), expected = true)
+    StringType("UTF8_LCASE"), StringType("UTF8_BINARY"), expected = true)
   // Complex types.
   checkEqualsIgnoreCompatibleCollation(
     ArrayType(StringType),
     ArrayType(StringType("UTF8_LCASE")),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     ArrayType(StringType),
@@ -729,12 +730,12 @@ class DataTypeSuite extends SparkFunSuite {
   checkEqualsIgnoreCompatibleCollation(
     ArrayType(ArrayType(StringType)),
     ArrayType(ArrayType(StringType("UTF8_LCASE"))),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     MapType(StringType, StringType),
     MapType(StringType, StringType("UTF8_LCASE")),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     MapType(StringType("UTF8_LCASE"), StringType),
@@ -744,7 +745,7 @@ class DataTypeSuite extends SparkFunSuite {
   checkEqualsIgnoreCompatibleCollation(
     MapType(StringType("UTF8_LCASE"), ArrayType(StringType)),
     MapType(StringType("UTF8_LCASE"), ArrayType(StringType("UTF8_LCASE"))),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     MapType(ArrayType(StringType), IntegerType),
@@ -759,12 +760,12 @@ class DataTypeSuite extends SparkFunSuite {
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", StringType) :: Nil),
     StructType(StructField("a", StringType("UTF8_LCASE")) :: Nil),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", ArrayType(StringType)) :: Nil),
     StructType(StructField("a", ArrayType(StringType("UTF8_LCASE"))) :: Nil),
-    expected = true
+    expected = false
   )
   checkEqualsIgnoreCompatibleCollation(
     StructType(StructField("a", MapType(StringType, IntegerType)) :: Nil),
@@ -899,7 +900,7 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson(json)
       },
-      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      condition = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
       parameters = Map("jsonType" -> "integer")
     )
   }
@@ -933,7 +934,7 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson(json)
       },
-      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      condition = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
       parameters = Map("jsonType" -> "integer")
     )
   }
@@ -967,7 +968,7 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         DataType.fromJson(json)
       },
-      errorClass = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
+      condition = "INVALID_JSON_DATA_TYPE_FOR_COLLATIONS",
       parameters = Map("jsonType" -> "map")
     )
   }
@@ -996,11 +997,55 @@ class DataTypeSuite extends SparkFunSuite {
       exception = intercept[SparkException] {
         DataType.fromJson(json)
       },
-      errorClass = "COLLATION_INVALID_PROVIDER",
+      condition = "COLLATION_INVALID_PROVIDER",
       parameters = Map("provider" -> "badProvider", "supportedProviders" -> "spark, icu")
     )
   }
 
+  test("parse array type with collation metadata") {
+    val unicodeCollationId = CollationFactory.collationNameToId("UNICODE")
+    val arrayJson =
+      s"""
+         |{
+         |  "type": "array",
+         |  "elementType": "string",
+         |  "containsNull": true
+         |}
+         |""".stripMargin
+
+    val collationsMap = Map("element" -> "UNICODE")
+
+    // Parse without collations map
+    assert(DataType.parseDataType(JsonMethods.parse(arrayJson)) === ArrayType(StringType))
+
+    val parsedWithCollations = DataType.parseDataType(
+        JsonMethods.parse(arrayJson), collationsMap = collationsMap)
+    assert(parsedWithCollations === ArrayType(StringType(unicodeCollationId)))
+  }
+
+  test("parse map type with collation metadata") {
+    val unicodeCollationId = CollationFactory.collationNameToId("UNICODE")
+    val mapJson =
+      s"""
+         |{
+         |  "type": "map",
+         |  "keyType": "string",
+         |  "valueType": "string",
+         |  "valueContainsNull": true
+         |}
+         |""".stripMargin
+
+    val collationsMap = Map("key" -> "UNICODE", "value" -> "UNICODE")
+
+    // Parse without collations map
+    assert(DataType.parseDataType(JsonMethods.parse(mapJson)) === MapType(StringType, StringType))
+
+    val parsedWithCollations = DataType.parseDataType(
+      JsonMethods.parse(mapJson), collationsMap = collationsMap)
+    assert(parsedWithCollations ===
+      MapType(StringType(unicodeCollationId), StringType(unicodeCollationId)))
+  }
+
   test("SPARK-48680: Add CharType and VarcharType to DataTypes JAVA API") {
     assert(DataTypes.createCharType(1) === CharType(1))
     assert(DataTypes.createVarcharType(100) === VarcharType(100))
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
index 8c9196cc33ca5..f07ee8b35bbb2 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DataTypeWriteCompatibilitySuite.scala
@@ -41,7 +41,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
         DataTypeUtils.canWrite("", widerPoint2, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`t`.`x`",
@@ -60,7 +60,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
         DataTypeUtils.canWrite("", arrayOfLong, arrayOfInt, true,
           analysis.caseSensitiveResolution, "arr", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`arr`.`element`",
@@ -79,7 +79,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
         DataTypeUtils.canWrite("", mapOfLong, mapOfInt, true,
           analysis.caseSensitiveResolution, "m", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`m`.`value`",
@@ -98,7 +98,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
         DataTypeUtils.canWrite("", mapKeyLong, mapKeyInt, true,
           analysis.caseSensitiveResolution, "m", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`m`.`key`",
@@ -116,7 +116,7 @@ class StrictDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBa
             analysis.caseSensitiveResolution, "nulls", storeAssignmentPolicy,
             errMsg => errs += errMsg)
         ),
-        errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
         parameters = Map(
           "tableName" -> "``",
           "colName" -> "`nulls`",
@@ -143,7 +143,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
         DataTypeUtils.canWrite("", mapOfString, mapOfInt, true,
           analysis.caseSensitiveResolution, "m", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`m`.`value`",
@@ -163,7 +163,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
         DataTypeUtils.canWrite("", stringPoint2, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`t`.`x`",
@@ -182,7 +182,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
         DataTypeUtils.canWrite("", arrayOfString, arrayOfInt, true,
           analysis.caseSensitiveResolution, "arr", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`arr`.`element`",
@@ -201,7 +201,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
         DataTypeUtils.canWrite("", mapKeyString, mapKeyInt, true,
           analysis.caseSensitiveResolution, "arr", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`arr`.`key`",
@@ -218,7 +218,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
           analysis.caseSensitiveResolution, "longToTimestamp", storeAssignmentPolicy,
           errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`longToTimestamp`",
@@ -231,7 +231,7 @@ class ANSIDataTypeWriteCompatibilitySuite extends DataTypeWriteCompatibilityBase
           analysis.caseSensitiveResolution, "timestampToLong", storeAssignmentPolicy,
           errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`timestampToLong`",
@@ -306,7 +306,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
               DataTypeUtils.canWrite("", w, r, true, analysis.caseSensitiveResolution, "t",
                 storeAssignmentPolicy, errMsg => errs += errMsg)
             ),
-            errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
             parameters = Map(
               "tableName" -> "``",
               "colName" -> "`t`",
@@ -328,7 +328,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", missingRequiredField, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.STRUCT_MISSING_FIELDS",
       parameters = Map("tableName" -> "``", "colName" -> "`t`", "missingFields" -> "`y`")
     )
   }
@@ -341,7 +341,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", missingRequiredField, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.UNEXPECTED_COLUMN_NAME",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.UNEXPECTED_COLUMN_NAME",
       parameters = Map(
         "expected" -> "`x`",
         "found" -> "`y`",
@@ -369,7 +369,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", missingMiddleField, expectedStruct, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.UNEXPECTED_COLUMN_NAME",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.UNEXPECTED_COLUMN_NAME",
       parameters = Map(
         "expected" -> "`y`",
         "found" -> "`z`",
@@ -406,7 +406,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", requiredFieldIsOptional, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_COLUMN",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_COLUMN",
       parameters = Map("tableName" -> "``", "colName" -> "`t`.`x`")
     )
   }
@@ -418,7 +418,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", point3, point2, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.EXTRA_STRUCT_FIELDS",
       parameters = Map("tableName" -> "``", "colName" -> "`t`", "extraFields" -> "`z`")
     )
   }
@@ -459,7 +459,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", arrayOfOptional, arrayOfRequired, true,
           analysis.caseSensitiveResolution, "arr", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_ARRAY_ELEMENTS",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_ARRAY_ELEMENTS",
       parameters = Map("tableName" -> "``", "colName" -> "`arr`")
     )
   }
@@ -489,7 +489,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", mapOfOptional, mapOfRequired, true,
           analysis.caseSensitiveResolution, "m", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_MAP_VALUES",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_MAP_VALUES",
       parameters = Map("tableName" -> "``", "colName" -> "`m`")
     )
   }
@@ -560,7 +560,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", sqlType, udtType, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_COLUMN",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.NULLABLE_COLUMN",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`t`.`col2`"
@@ -595,7 +595,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
           DataTypeUtils.canWrite("", udtType, sqlType, true,
             analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
         ),
-        errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
         parameters = Map(
           "tableName" -> "``",
           "colName" -> "`t`.`col2`",
@@ -633,7 +633,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
           DataTypeUtils.canWrite("", sqlType, udtType, true,
             analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
         ),
-        errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
         parameters = Map(
           "tableName" -> "``",
           "colName" -> "`t`.`col2`",
@@ -675,7 +675,7 @@ abstract class DataTypeWriteCompatibilityBaseSuite extends SparkFunSuite {
         DataTypeUtils.canWrite("", writeType, readType, true,
           analysis.caseSensitiveResolution, "t", storeAssignmentPolicy, errMsg => errs += errMsg)
       ),
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
       parameters = Map(
         "tableName" -> "``",
         "colName" -> "`t`.`a`.`element`",
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
index e6d915903f9bc..794112db5502a 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/DecimalSuite.scala
@@ -65,7 +65,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
 
     checkError(
       exception = intercept[SparkArithmeticException](Decimal(170L, 2, 1)),
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
       parameters = Map(
         "value" -> "0",
         "precision" -> "2",
@@ -73,7 +73,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
         "config" -> "\"spark.sql.ansi.enabled\""))
     checkError(
       exception = intercept[SparkArithmeticException](Decimal(170L, 2, 0)),
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
       parameters = Map(
         "value" -> "0",
         "precision" -> "2",
@@ -81,7 +81,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
         "config" -> "\"spark.sql.ansi.enabled\""))
     checkError(
       exception = intercept[SparkArithmeticException](Decimal(BigDecimal("10.030"), 2, 1)),
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION",
       parameters = Map(
         "roundedValue" -> "10.0",
         "originalValue" -> "10.030",
@@ -89,7 +89,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
         "scale" -> "1"))
     checkError(
       exception = intercept[SparkArithmeticException](Decimal(BigDecimal("-9.95"), 2, 1)),
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITHOUT_SUGGESTION",
       parameters = Map(
         "roundedValue" -> "-10.0",
         "originalValue" -> "-9.95",
@@ -97,7 +97,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
         "scale" -> "1"))
     checkError(
       exception = intercept[SparkArithmeticException](Decimal(1e17.toLong, 17, 0)),
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
       parameters = Map(
         "value" -> "0",
         "precision" -> "17",
@@ -120,7 +120,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     def checkNegativeScaleDecimal(d: => Decimal): Unit = {
       checkError(
         exception = intercept[SparkException] (d),
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map("message" -> ("Negative scale is not allowed: -3. " +
           "Set the config \"spark.sql.legacy.allowNegativeScaleOfDecimal\" " +
           "to \"true\" to allow it."))
@@ -317,7 +317,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
       exception = intercept[SparkException] {
         d.toPrecision(5, 50, BigDecimal.RoundingMode.HALF_DOWN)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Not supported rounding mode: HALF_DOWN.")
     )
   }
@@ -350,7 +350,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
       checkError(
         exception = intercept[SparkArithmeticException](
           Decimal.fromStringANSI(UTF8String.fromString(string))),
-        errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
+        condition = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
         parameters = Map("value" -> string))
     }
 
@@ -370,12 +370,11 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
     checkError(
       exception = intercept[SparkNumberFormatException](
         Decimal.fromStringANSI(UTF8String.fromString("str"))),
-      errorClass = "CAST_INVALID_INPUT",
+      condition = "CAST_INVALID_INPUT",
       parameters = Map(
         "expression" -> "'str'",
         "sourceType" -> "\"STRING\"",
-        "targetType" -> "\"DECIMAL(10,0)\"",
-        "ansiConfig" -> "\"spark.sql.ansi.enabled\""))
+        "targetType" -> "\"DECIMAL(10,0)\""))
   }
 
   test("SPARK-35841: Casting string to decimal type doesn't work " +
@@ -398,7 +397,7 @@ class DecimalSuite extends SparkFunSuite with PrivateMethodTester with SQLHelper
       checkError(
         exception = intercept[SparkArithmeticException](
           Decimal.fromStringANSI(UTF8String.fromString(string))),
-        errorClass = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
+        condition = "NUMERIC_OUT_OF_SUPPORTED_RANGE",
         parameters = Map("value" -> string))
     }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
index 562febe381130..6a67525dd02d3 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/types/StructTypeSuite.scala
@@ -45,21 +45,21 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
   test("lookup a single missing field should output existing fields") {
     checkError(
       exception = intercept[SparkIllegalArgumentException](s("c")),
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"))
   }
 
   test("lookup a set of missing fields should output existing fields") {
     checkError(
       exception = intercept[SparkIllegalArgumentException](s(Set("a", "c"))),
-      errorClass = "NONEXISTENT_FIELD_NAME_IN_LIST",
+      condition = "NONEXISTENT_FIELD_NAME_IN_LIST",
       parameters = Map("nonExistFields" -> "`c`", "fieldNames" -> "`a`, `b`"))
   }
 
   test("lookup fieldIndex for missing field should output existing fields") {
     checkError(
       exception = intercept[SparkIllegalArgumentException](s.fieldIndex("c")),
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"))
   }
 
@@ -341,7 +341,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`S1`.`S12`.`S123`",
         "path" -> "`s1`.`s12`"))
@@ -352,7 +352,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      condition = "AMBIGUOUS_COLUMN_OR_FIELD",
       parameters = Map("name" -> "`S2`.`x`", "n" -> "2"))
     caseSensitiveCheck(Seq("s2", "x"), Some(Seq("s2") -> StructField("x", IntegerType)))
 
@@ -362,7 +362,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`m1`.`key`",
         "path" -> "`m1`"))
@@ -373,7 +373,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`M1`.`key`.`name`",
         "path" -> "`m1`.`key`"))
@@ -382,7 +382,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`M1`.`value`.`name`",
         "path" -> "`m1`.`value`"))
@@ -399,7 +399,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`m2`.`key`.`A`.`name`",
         "path" -> "`m2`.`key`.`a`"))
@@ -408,7 +408,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`M2`.`value`.`b`.`name`",
         "path" -> "`m2`.`value`.`b`"))
@@ -418,7 +418,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`A1`.`element`",
         "path" -> "`a1`"))
@@ -428,7 +428,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`A1`.`element`.`name`",
         "path" -> "`a1`.`element`"))
@@ -442,7 +442,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`a2`.`element`.`C`.`name`",
         "path" -> "`a2`.`element`.`c`"))
@@ -456,7 +456,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`M3`.`value`.`value`.`MA`.`name`",
         "path" -> "`m3`.`value`.`value`.`ma`"))
@@ -470,7 +470,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_FIELD_NAME",
+      condition = "INVALID_FIELD_NAME",
       parameters = Map(
         "fieldName" -> "`A3`.`element`.`element`.`D`.`name`",
         "path" -> "`a3`.`element`.`element`.`d`")
@@ -522,7 +522,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkException] {
         StructType.fromDDL("c1 DECIMAL(10, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
       },
-      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
       parameters = Map("left" -> "\"DECIMAL(10,5)\"", "right" -> "\"DECIMAL(12,2)\"")
     )
 
@@ -530,7 +530,7 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
       exception = intercept[SparkException] {
         StructType.fromDDL("c1 DECIMAL(12, 5)").merge(StructType.fromDDL("c1 DECIMAL(12, 2)"))
       },
-      errorClass = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
+      condition = "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE",
       parameters = Map("left" -> "\"DECIMAL(12,5)\"", "right" -> "\"DECIMAL(12,2)\"")
     )
   }
@@ -564,7 +564,6 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
         .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "1 + 1")
           .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "1 + 1")
           .build())))
-    val error = "fails to parse as a valid literal value"
     assert(ResolveDefaultColumns.existenceDefaultValues(source2).length == 1)
     assert(ResolveDefaultColumns.existenceDefaultValues(source2)(0) == 2)
 
@@ -576,9 +575,13 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
           .putString(ResolveDefaultColumns.EXISTS_DEFAULT_COLUMN_METADATA_KEY, "invalid")
           .putString(ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY, "invalid")
           .build())))
-    assert(intercept[AnalysisException] {
-      ResolveDefaultColumns.existenceDefaultValues(source3)
-    }.getMessage.contains(error))
+
+    checkError(
+      exception = intercept[AnalysisException]{
+        ResolveDefaultColumns.existenceDefaultValues(source3)
+      },
+      condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+      parameters = Map("statement" -> "", "colName" -> "`c1`", "defaultValue" -> "invalid"))
 
     // Negative test: StructType.defaultValues fails because the existence default value fails to
     // resolve.
@@ -592,9 +595,15 @@ class StructTypeSuite extends SparkFunSuite with SQLHelper {
             ResolveDefaultColumns.CURRENT_DEFAULT_COLUMN_METADATA_KEY,
             "(SELECT 'abc' FROM missingtable)")
           .build())))
-    assert(intercept[AnalysisException] {
-      ResolveDefaultColumns.existenceDefaultValues(source4)
-    }.getMessage.contains(error))
+
+    checkError(
+      exception = intercept[AnalysisException]{
+        ResolveDefaultColumns.existenceDefaultValues(source4)
+      },
+      condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+      parameters = Map("statement" -> "",
+        "colName" -> "`c1`",
+        "defaultValue" -> "(SELECT 'abc' FROM missingtable)"))
   }
 
   test("SPARK-46629: Test STRUCT DDL with NOT NULL round trip") {
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
index c0fa43ff9bde0..c705a6b791bd1 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala
@@ -54,13 +54,13 @@ class ArrowUtilsSuite extends SparkFunSuite {
       exception = intercept[SparkException] {
         roundtrip(TimestampType)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Missing timezoneId where it is mandatory."))
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
         ArrowUtils.fromArrowType(new ArrowType.Int(8, false))
       },
-      errorClass = "UNSUPPORTED_ARROWTYPE",
+      condition = "UNSUPPORTED_ARROWTYPE",
       parameters = Map("typeName" -> "Int(8, false)")
     )
   }
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
index 98c2a3d1e2726..932fb0a733371 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/CaseInsensitiveStringMapSuite.scala
@@ -67,7 +67,7 @@ class CaseInsensitiveStringMapSuite extends SparkFunSuite {
       exception = intercept[SparkIllegalArgumentException] {
         options.getBoolean("FOO", true)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3206",
+      condition = "_LEGACY_ERROR_TEMP_3206",
       parameters = Map("value" -> "bar"))
   }
 
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
index c5f19b438f27f..a277bb021c3f6 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/util/SchemaUtilsSuite.scala
@@ -46,20 +46,20 @@ class SchemaUtilsSuite extends SparkFunSuite {
           exception = intercept[AnalysisException] {
             SchemaUtils.checkSchemaColumnNameDuplication(schema, caseSensitive)
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> "`a`"))
         checkError(
           exception = intercept[AnalysisException] {
             SchemaUtils.checkColumnNameDuplication(schema.map(_.name), resolver(caseSensitive))
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> "`a`"))
         checkError(
           exception = intercept[AnalysisException] {
             SchemaUtils.checkColumnNameDuplication(
               schema.map(_.name), caseSensitiveAnalysis = caseSensitive)
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> "`a`"))
       }
 
@@ -106,7 +106,7 @@ class SchemaUtilsSuite extends SparkFunSuite {
         exception = intercept[AnalysisException] {
           SchemaUtils.checkSchemaColumnNameDuplication(schema)
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`camelcase`"))
     }
   }
diff --git a/connect/common/README.md b/sql/connect/common/README.md
similarity index 100%
rename from connect/common/README.md
rename to sql/connect/common/README.md
diff --git a/connect/common/pom.xml b/sql/connect/common/pom.xml
similarity index 99%
rename from connect/common/pom.xml
rename to sql/connect/common/pom.xml
index fd25bf954e5b8..b0f015246f4c9 100644
--- a/connect/common/pom.xml
+++ b/sql/connect/common/pom.xml
@@ -23,7 +23,7 @@
         <groupId>org.apache.spark</groupId>
         <artifactId>spark-parent_2.13</artifactId>
         <version>4.0.0-SNAPSHOT</version>
-        <relativePath>../../pom.xml</relativePath>
+        <relativePath>../../../pom.xml</relativePath>
     </parent>
 
     <artifactId>spark-connect-common_2.13</artifactId>
diff --git a/connect/common/src/main/buf.gen.yaml b/sql/connect/common/src/main/buf.gen.yaml
similarity index 76%
rename from connect/common/src/main/buf.gen.yaml
rename to sql/connect/common/src/main/buf.gen.yaml
index 9b0b07932eae8..c9fcfb8f0d305 100644
--- a/connect/common/src/main/buf.gen.yaml
+++ b/sql/connect/common/src/main/buf.gen.yaml
@@ -16,20 +16,20 @@
 #
 version: v1
 plugins:
-  - plugin: buf.build/protocolbuffers/cpp:v21.7
+  - plugin: buf.build/protocolbuffers/cpp:v28.3
     out: gen/proto/cpp
-  - plugin: buf.build/protocolbuffers/csharp:v21.7
+  - plugin: buf.build/protocolbuffers/csharp:v28.3
     out: gen/proto/csharp
-  - plugin: buf.build/protocolbuffers/java:v21.7
+  - plugin: buf.build/protocolbuffers/java:v28.3
     out: gen/proto/java
-  - plugin: buf.build/grpc/ruby:v1.62.0
+  - plugin: buf.build/grpc/ruby:v1.67.0
     out: gen/proto/ruby
-  - plugin: buf.build/protocolbuffers/ruby:v21.7
+  - plugin: buf.build/protocolbuffers/ruby:v28.3
     out: gen/proto/ruby
    # Building the Python build and building the mypy interfaces.
-  - plugin: buf.build/protocolbuffers/python:v21.7
+  - plugin: buf.build/protocolbuffers/python:v28.3
     out: gen/proto/python
-  - plugin: buf.build/grpc/python:v1.62.0
+  - plugin: buf.build/grpc/python:v1.67.0
     out: gen/proto/python
   - name: mypy
     out: gen/proto/python
diff --git a/connect/common/src/main/buf.work.yaml b/sql/connect/common/src/main/buf.work.yaml
similarity index 100%
rename from connect/common/src/main/buf.work.yaml
rename to sql/connect/common/src/main/buf.work.yaml
diff --git a/connect/common/src/main/protobuf/buf.yaml b/sql/connect/common/src/main/protobuf/buf.yaml
similarity index 100%
rename from connect/common/src/main/protobuf/buf.yaml
rename to sql/connect/common/src/main/protobuf/buf.yaml
diff --git a/connect/common/src/main/protobuf/spark/connect/base.proto b/sql/connect/common/src/main/protobuf/spark/connect/base.proto
similarity index 99%
rename from connect/common/src/main/protobuf/spark/connect/base.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/base.proto
index 33ed73836616a..e3c84ddd5e887 100644
--- a/connect/common/src/main/protobuf/spark/connect/base.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/base.proto
@@ -522,7 +522,7 @@ message ConfigRequest {
   }
 
   message GetWithDefault {
-    // (Required) The config key-value paris to get. The value will be used as the default value.
+    // (Required) The config key-value pairs to get. The value will be used as the default value.
     repeated KeyValue pairs = 1;
   }
 
diff --git a/connect/common/src/main/protobuf/spark/connect/catalog.proto b/sql/connect/common/src/main/protobuf/spark/connect/catalog.proto
similarity index 100%
rename from connect/common/src/main/protobuf/spark/connect/catalog.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/catalog.proto
diff --git a/connect/common/src/main/protobuf/spark/connect/commands.proto b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
similarity index 98%
rename from connect/common/src/main/protobuf/spark/connect/commands.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/commands.proto
index b861598ad39af..a01d4369a7aed 100644
--- a/connect/common/src/main/protobuf/spark/connect/commands.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/commands.proto
@@ -240,6 +240,9 @@ message WriteStreamOperationStart {
 
   StreamingForeachFunction foreach_writer = 13;
   StreamingForeachFunction foreach_batch = 14;
+
+  // (Optional) Columns used for clustering the table.
+  repeated string clustering_column_names = 15;
 }
 
 message StreamingForeachFunction {
@@ -504,6 +507,9 @@ message CheckpointCommand {
 
   // (Required) Whether to checkpoint this dataframe immediately.
   bool eager = 3;
+
+  // (Optional) For local checkpoint, the storage level to use.
+  optional StorageLevel storage_level = 4;
 }
 
 message MergeIntoTableCommand {
diff --git a/connect/common/src/main/protobuf/spark/connect/common.proto b/sql/connect/common/src/main/protobuf/spark/connect/common.proto
similarity index 100%
rename from connect/common/src/main/protobuf/spark/connect/common.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/common.proto
diff --git a/connect/common/src/main/protobuf/spark/connect/example_plugins.proto b/sql/connect/common/src/main/protobuf/spark/connect/example_plugins.proto
similarity index 100%
rename from connect/common/src/main/protobuf/spark/connect/example_plugins.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/example_plugins.proto
diff --git a/connect/common/src/main/protobuf/spark/connect/expressions.proto b/sql/connect/common/src/main/protobuf/spark/connect/expressions.proto
similarity index 100%
rename from connect/common/src/main/protobuf/spark/connect/expressions.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/expressions.proto
diff --git a/connect/common/src/main/protobuf/spark/connect/relations.proto b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
similarity index 97%
rename from connect/common/src/main/protobuf/spark/connect/relations.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/relations.proto
index 04fe21086097c..a7b9137c3400a 100644
--- a/connect/common/src/main/protobuf/spark/connect/relations.proto
+++ b/sql/connect/common/src/main/protobuf/spark/connect/relations.proto
@@ -76,6 +76,8 @@ message Relation {
     AsOfJoin as_of_join = 39;
     CommonInlineUserDefinedDataSource common_inline_user_defined_data_source = 40;
     WithRelations with_relations = 41;
+    Transpose transpose = 42;
+    UnresolvedTableValuedFunction unresolved_table_valued_function = 43;
 
     // NA functions
     NAFill fill_na = 90;
@@ -889,6 +891,26 @@ message Unpivot {
   }
 }
 
+// Transpose a DataFrame, switching rows to columns.
+// Transforms the DataFrame such that the values in the specified index column
+// become the new columns of the DataFrame.
+message Transpose {
+  // (Required) The input relation.
+  Relation input = 1;
+
+  // (Optional) A list of columns that will be treated as the indices.
+  // Only single column is supported now.
+  repeated Expression index_columns = 2;
+}
+
+message UnresolvedTableValuedFunction {
+  // (Required) name (or unparsed name for user defined function) for the unresolved function.
+  string function_name = 1;
+
+  // (Optional) Function arguments. Empty arguments are allowed.
+  repeated Expression arguments = 2;
+}
+
 message ToSchema {
   // (Required) The input relation.
   Relation input = 1;
diff --git a/connect/common/src/main/protobuf/spark/connect/types.proto b/sql/connect/common/src/main/protobuf/spark/connect/types.proto
similarity index 100%
rename from connect/common/src/main/protobuf/spark/connect/types.proto
rename to sql/connect/common/src/main/protobuf/spark/connect/types.proto
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/ConnectProtoUtils.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
similarity index 74%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
index 6eb59bd37574e..e9411dc3db61b 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ArtifactManager.scala
@@ -16,7 +16,7 @@
  */
 package org.apache.spark.sql.connect.client
 
-import java.io.{ByteArrayInputStream, File, InputStream, PrintStream}
+import java.io.InputStream
 import java.net.URI
 import java.nio.file.{Files, Path, Paths}
 import java.util.Arrays
@@ -29,7 +29,6 @@ import scala.concurrent.duration.Duration
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
-import Artifact._
 import com.google.protobuf.ByteString
 import io.grpc.StatusRuntimeException
 import io.grpc.stub.StreamObserver
@@ -40,8 +39,9 @@ import org.apache.spark.SparkException
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.AddArtifactsResponse
 import org.apache.spark.connect.proto.AddArtifactsResponse.ArtifactSummary
-import org.apache.spark.util.{MavenUtils, SparkFileUtils, SparkThreadUtils}
-import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.sql.Artifact
+import org.apache.spark.sql.Artifact.{newCacheArtifact, newIvyArtifacts}
+import org.apache.spark.util.{SparkFileUtils, SparkThreadUtils}
 
 /**
  * The Artifact Manager is responsible for handling and transferring artifacts from the local
@@ -89,7 +89,7 @@ class ArtifactManager(
         val artifact = Artifact.newArtifactFromExtension(
           path.getFileName.toString,
           path.getFileName,
-          new LocalFile(path))
+          new Artifact.LocalFile(path))
         Seq[Artifact](artifact)
 
       case "ivy" =>
@@ -128,7 +128,7 @@ class ArtifactManager(
     val artifact = Artifact.newArtifactFromExtension(
       targetPath.getFileName.toString,
       targetPath,
-      new InMemory(bytes))
+      new Artifact.InMemory(bytes))
     addArtifacts(artifact :: Nil)
   }
 
@@ -152,7 +152,7 @@ class ArtifactManager(
     val artifact = Artifact.newArtifactFromExtension(
       targetPath.getFileName.toString,
       targetPath,
-      new LocalFile(Paths.get(source)))
+      new Artifact.LocalFile(Paths.get(source)))
     addArtifacts(artifact :: Nil)
   }
 
@@ -164,7 +164,7 @@ class ArtifactManager(
   def addArtifacts(uris: Seq[URI]): Unit = addArtifacts(uris.flatMap(parseArtifacts))
 
   private[client] def isCachedArtifact(hash: String): Boolean = {
-    val artifactName = s"$CACHE_PREFIX/$hash"
+    val artifactName = s"${Artifact.CACHE_PREFIX}/$hash"
     val request = proto.ArtifactStatusesRequest
       .newBuilder()
       .setUserContext(clientConfig.userContext)
@@ -191,7 +191,7 @@ class ArtifactManager(
   def cacheArtifact(blob: Array[Byte]): String = {
     val hash = sha256Hex(blob)
     if (!isCachedArtifact(hash)) {
-      addArtifacts(newCacheArtifact(hash, new InMemory(blob)) :: Nil)
+      addArtifacts(newCacheArtifact(hash, new Artifact.InMemory(blob)) :: Nil)
     }
     hash
   }
@@ -214,7 +214,9 @@ class ArtifactManager(
     try {
       stream.forEach { path =>
         if (Files.isRegularFile(path) && path.toString.endsWith(".class")) {
-          builder += Artifact.newClassArtifact(base.relativize(path), new LocalFile(path))
+          builder += Artifact.newClassArtifact(
+            base.relativize(path),
+            new Artifact.LocalFile(path))
         }
       }
     } finally {
@@ -414,127 +416,3 @@ class ArtifactManager(
     }
   }
 }
-
-class Artifact private (val path: Path, val storage: LocalData) {
-  require(!path.isAbsolute, s"Bad path: $path")
-
-  lazy val size: Long = storage match {
-    case localData: LocalData => localData.size
-  }
-}
-
-object Artifact {
-  val CLASS_PREFIX: Path = Paths.get("classes")
-  val JAR_PREFIX: Path = Paths.get("jars")
-  val CACHE_PREFIX: Path = Paths.get("cache")
-
-  def newArtifactFromExtension(
-      fileName: String,
-      targetFilePath: Path,
-      storage: LocalData): Artifact = {
-    fileName match {
-      case jar if jar.endsWith(".jar") =>
-        newJarArtifact(targetFilePath, storage)
-      case cf if cf.endsWith(".class") =>
-        newClassArtifact(targetFilePath, storage)
-      case other =>
-        throw new UnsupportedOperationException(s"Unsupported file format: $other")
-    }
-  }
-
-  def newJarArtifact(targetFilePath: Path, storage: LocalData): Artifact = {
-    newArtifact(JAR_PREFIX, ".jar", targetFilePath, storage)
-  }
-
-  def newClassArtifact(targetFilePath: Path, storage: LocalData): Artifact = {
-    newArtifact(CLASS_PREFIX, ".class", targetFilePath, storage)
-  }
-
-  def newCacheArtifact(id: String, storage: LocalData): Artifact = {
-    newArtifact(CACHE_PREFIX, "", Paths.get(id), storage)
-  }
-
-  def newIvyArtifacts(uri: URI): Seq[Artifact] = {
-    implicit val printStream: PrintStream = System.err
-
-    val authority = uri.getAuthority
-    if (authority == null) {
-      throw new IllegalArgumentException(
-        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
-          " Expected 'org:module:version', found null.")
-    }
-    if (authority.split(":").length != 3) {
-      throw new IllegalArgumentException(
-        s"Invalid Ivy URI authority in uri ${uri.toString}:" +
-          s" Expected 'org:module:version', found $authority.")
-    }
-
-    val (transitive, exclusions, repos) = MavenUtils.parseQueryParams(uri)
-
-    val exclusionsList: Seq[String] =
-      if (!StringUtils.isBlank(exclusions)) {
-        exclusions.split(",").toImmutableArraySeq
-      } else {
-        Nil
-      }
-
-    val ivySettings = MavenUtils.buildIvySettings(Some(repos), None)
-
-    val jars = MavenUtils.resolveMavenCoordinates(
-      authority,
-      ivySettings,
-      transitive = transitive,
-      exclusions = exclusionsList)
-    jars.map(p => Paths.get(p)).map(path => newJarArtifact(path.getFileName, new LocalFile(path)))
-  }
-
-  private def concatenatePaths(basePath: Path, otherPath: Path): Path = {
-    // We avoid using the `.resolve()` method here to ensure that we're concatenating the two
-    // paths even if `otherPath` is absolute.
-    val concatenatedPath = Paths.get(basePath.toString, otherPath.toString)
-    // Note: The normalized resulting path may still reference parent directories if the
-    // `otherPath` contains sufficient number of parent operators (i.e "..").
-    // Example: `basePath` = "/base", `otherPath` = "subdir/../../file.txt"
-    // Then, `concatenatedPath` = "/base/subdir/../../file.txt"
-    // and `normalizedPath` = "/base/file.txt".
-    val normalizedPath = concatenatedPath.normalize()
-    // Verify that the prefix of the `normalizedPath` starts with `basePath/`.
-    require(
-      normalizedPath != basePath && normalizedPath.startsWith(s"$basePath${File.separator}"))
-    normalizedPath
-  }
-
-  private def newArtifact(
-      prefix: Path,
-      requiredSuffix: String,
-      targetFilePath: Path,
-      storage: LocalData): Artifact = {
-    require(targetFilePath.toString.endsWith(requiredSuffix))
-    new Artifact(concatenatePaths(prefix, targetFilePath), storage)
-  }
-
-  /**
-   * Payload stored on this machine.
-   */
-  sealed trait LocalData {
-    def stream: InputStream
-    def size: Long
-  }
-
-  /**
-   * Payload stored in a local file.
-   */
-  class LocalFile(val path: Path) extends LocalData {
-    override def size: Long = Files.size(path)
-    override def stream: InputStream = Files.newInputStream(path)
-  }
-
-  /**
-   * Payload stored in memory.
-   */
-  class InMemory(bytes: Array[Byte]) extends LocalData {
-    override def size: Long = bytes.length
-    override def stream: InputStream = new ByteArrayInputStream(bytes)
-  }
-
-}
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
similarity index 92%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
index 94486c31a1636..261bcc01d7019 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala
@@ -21,7 +21,7 @@ import java.nio.file.{Files, LinkOption, Path, Paths}
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.connect.client.Artifact.LocalFile
+import org.apache.spark.sql.Artifact
 
 trait ClassFinder {
   def findClasses(): Iterator[Artifact]
@@ -48,7 +48,7 @@ class REPLClassDirMonitor(_rootDir: String) extends ClassFinder {
 
   private def toArtifact(path: Path): Artifact = {
     // Persist the relative path of the classfile
-    Artifact.newClassArtifact(rootDir.relativize(path), new LocalFile(path))
+    Artifact.newClassArtifact(rootDir.relativize(path), new Artifact.LocalFile(path))
   }
 
   private def isClass(path: Path): Boolean = path.toString.endsWith(".class")
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CloseableIterator.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectBlockingStub.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectStub.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectStub.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectStub.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/CustomSparkConnectStub.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ExecutePlanResponseReattachableIterator.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcExceptionConverter.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/GrpcRetryHandler.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
similarity index 94%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
index 42c3387335be9..03548120457f3 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ResponseValidator.scala
@@ -18,7 +18,7 @@ package org.apache.spark.sql.connect.client
 
 import java.util.concurrent.atomic.AtomicBoolean
 
-import com.google.protobuf.GeneratedMessageV3
+import com.google.protobuf.GeneratedMessage
 import io.grpc.{Status, StatusRuntimeException}
 import io.grpc.stub.StreamObserver
 
@@ -59,7 +59,7 @@ class ResponseValidator extends Logging {
     isSessionActive.getAcquire
   }
 
-  def verifyResponse[RespT <: GeneratedMessageV3](fn: => RespT): RespT = {
+  def verifyResponse[RespT <: GeneratedMessage](fn: => RespT): RespT = {
     val response =
       try {
         fn
@@ -100,7 +100,7 @@ class ResponseValidator extends Logging {
    * Wraps an existing iterator with another closeable iterator that verifies the response. This
    * is needed for server-side streaming calls that are converted to iterators.
    */
-  def wrapIterator[T <: GeneratedMessageV3, V <: CloseableIterator[T]](
+  def wrapIterator[T <: GeneratedMessage, V <: CloseableIterator[T]](
       inner: V): WrappedCloseableIterator[T] = {
     new WrappedCloseableIterator[T] {
 
@@ -118,7 +118,7 @@ class ResponseValidator extends Logging {
    * Wraps an existing stream observer with another stream observer that verifies the response.
    * This is necessary for client-side streaming calls.
    */
-  def wrapStreamObserver[T <: GeneratedMessageV3](inner: StreamObserver[T]): StreamObserver[T] = {
+  def wrapStreamObserver[T <: GeneratedMessage](inner: StreamObserver[T]): StreamObserver[T] = {
     new StreamObserver[T] {
       private val innerObserver = inner
       override def onNext(value: T): Unit = {
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetriesExceeded.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetriesExceeded.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetriesExceeded.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetriesExceeded.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/RetryPolicy.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
similarity index 98%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
index 7c3108fdb1b0e..ff734ee9c3a9f 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClient.scala
@@ -386,7 +386,7 @@ private[sql] class SparkConnectClient(
   /**
    * Cache the given local relation at the server, and return its key in the remote cache.
    */
-  def cacheLocalRelation(data: ByteString, schema: String): String = {
+  private[sql] def cacheLocalRelation(data: ByteString, schema: String): String = {
     val localRelation = proto.Relation
       .newBuilder()
       .getLocalRelationBuilder
@@ -399,7 +399,7 @@ private[sql] class SparkConnectClient(
 
 object SparkConnectClient {
 
-  private val SPARK_REMOTE: String = "SPARK_REMOTE"
+  private[sql] val SPARK_REMOTE: String = "SPARK_REMOTE"
 
   private val DEFAULT_USER_AGENT: String = "_SPARK_CONNECT_SCALA"
 
@@ -620,7 +620,9 @@ object SparkConnectClient {
      * Configure the builder using the env SPARK_REMOTE environment variable.
      */
     def loadFromEnvironment(): Builder = {
-      sys.env.get(SparkConnectClient.SPARK_REMOTE).foreach(connectionString)
+      Option(System.getProperty("spark.remote")) // Set from Spark Submit
+        .orElse(sys.env.get(SparkConnectClient.SPARK_REMOTE))
+        .foreach(connectionString)
       this
     }
 
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectStubState.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectStubState.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectStubState.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectStubState.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
similarity index 97%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
index 0905ee76c3f34..3aad90e96f8cd 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkResult.scala
@@ -42,7 +42,7 @@ private[sql] class SparkResult[T](
     allocator: BufferAllocator,
     encoder: AgnosticEncoder[T],
     timeZoneId: String,
-    setObservationMetricsOpt: Option[(Long, Map[String, Any]) => Unit] = None)
+    setObservationMetricsOpt: Option[(Long, Row) => Unit] = None)
     extends AutoCloseable { self =>
 
   case class StageInfo(
@@ -211,21 +211,21 @@ private[sql] class SparkResult[T](
     metrics.asScala.map { metric =>
       assert(metric.getKeysCount == metric.getValuesCount)
       var schema = new StructType()
-      val keys = mutable.ListBuffer.empty[String]
-      val values = mutable.ListBuffer.empty[Any]
-      (0 until metric.getKeysCount).map { i =>
+      val values = mutable.ArrayBuilder.make[Any]
+      values.sizeHint(metric.getKeysCount)
+      (0 until metric.getKeysCount).foreach { i =>
         val key = metric.getKeys(i)
         val value = LiteralValueProtoConverter.toCatalystValue(metric.getValues(i))
         schema = schema.add(key, LiteralValueProtoConverter.toDataType(value.getClass))
-        keys += key
         values += value
       }
+      val row = new GenericRowWithSchema(values.result(), schema)
       // If the metrics is registered by an Observation object, attach them and unblock any
       // blocked thread.
       setObservationMetricsOpt.foreach { setObservationMetrics =>
-        setObservationMetrics(metric.getPlanId, keys.zip(values).toMap)
+        setObservationMetrics(metric.getPlanId, row)
       }
-      metric.getName -> new GenericRowWithSchema(values.toArray, schema)
+      metric.getName -> row
     }
   }
 
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
similarity index 98%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
index 17d8444574f61..f3abaddb0110b 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowDeserializer.scala
@@ -359,6 +359,13 @@ object ArrowDeserializers {
           }
         }
 
+      case (TransformingEncoder(_, encoder, provider), v) =>
+        new Deserializer[Any] {
+          private[this] val codec = provider()
+          private[this] val deserializer = deserializerFor(encoder, v, timeZoneId)
+          override def get(i: Int): Any = codec.decode(deserializer.get(i))
+        }
+
       case (CalendarIntervalEncoder | VariantEncoder | _: UDTEncoder[_], _) =>
         throw ExecutionErrors.unsupportedDataTypeError(encoder.dataType)
 
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderUtils.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderUtils.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowEncoderUtils.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
similarity index 98%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
index 4b7b394235458..f8a5c63ac3abe 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowSerializer.scala
@@ -35,7 +35,7 @@ import org.apache.arrow.vector.util.Text
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.DefinedByConstructorParams
-import org.apache.spark.sql.catalyst.encoders.AgnosticEncoder
+import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, Codec}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
 import org.apache.spark.sql.catalyst.util.{SparkDateTimeUtils, SparkIntervalUtils}
 import org.apache.spark.sql.connect.client.CloseableIterator
@@ -442,6 +442,14 @@ object ArrowSerializer {
           o => getter.invoke(o)
         }
 
+      case (TransformingEncoder(_, encoder, provider), v) =>
+        new Serializer {
+          private[this] val codec = provider().asInstanceOf[Codec[Any, Any]]
+          private[this] val delegate: Serializer = serializerFor(encoder, v)
+          override def write(index: Int, value: Any): Unit =
+            delegate.write(index, codec.encode(value))
+        }
+
       case (CalendarIntervalEncoder | VariantEncoder | _: UDTEncoder[_], _) =>
         throw ExecutionErrors.unsupportedDataTypeError(encoder.dataType)
 
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ArrowVectorReader.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ConcatenatingArrowStreamReader.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ConcatenatingArrowStreamReader.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ConcatenatingArrowStreamReader.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ConcatenatingArrowStreamReader.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ScalaCollectionUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ScalaCollectionUtils.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ScalaCollectionUtils.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/arrow/ScalaCollectionUtils.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/client/package.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/package.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/client/package.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/package.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala
similarity index 85%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala
index 2c141b8c52023..76ac1bff5bdba 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/Abbreviator.scala
@@ -26,11 +26,17 @@ private[connect] class Abbreviator(thresholds: Map[String, Int]) extends Seriali
   private val format = java.text.NumberFormat.getInstance()
   private val MAX_BYTES_SIZE = 8
   private val MAX_STRING_SIZE = 1024
+  private val MAX_LEVEL = Int.MaxValue
 
   private val maxStringSize = thresholds.getOrElse("STRING", MAX_STRING_SIZE)
   private val maxBytesSize = thresholds.getOrElse("BYTES", MAX_BYTES_SIZE)
+  private val maxLevel = thresholds.getOrElse("MAX_LEVEL", MAX_LEVEL)
 
   def abbreviate[T <: Message](message: T): T = {
+    abbreviateRecursive(message, 0)
+  }
+
+  private def abbreviateRecursive[T <: Message](message: T, level: Int): T = {
     val builder = message.toBuilder
 
     message.getAllFields.asScala.iterator.foreach {
@@ -61,14 +67,14 @@ private[connect] class Abbreviator(thresholds: Map[String, Int]) extends Seriali
       case (field, msg: Message)
           if field.getJavaType == JavaType.MESSAGE && !field.isRepeated
             && msg != null =>
-        builder.setField(field, abbreviate(msg))
+        builder.setField(field, truncateMessage(msg, level))
 
       case (field, msgs: java.util.List[_])
           if field.getJavaType == JavaType.MESSAGE && field.isRepeated
             && msgs != null && !msgs.isEmpty =>
         msgs.iterator().asScala.zipWithIndex.foreach {
           case (msg: Message, i) if msg != null =>
-            builder.setRepeatedField(field, i, abbreviate(msg))
+            builder.setRepeatedField(field, i, truncateMessage(msg, level))
           case _ =>
         }
 
@@ -93,4 +99,14 @@ private[connect] class Abbreviator(thresholds: Map[String, Int]) extends Seriali
   private def byteStringSuffix(size: Int): ByteString = {
     ByteString.copyFromUtf8(s"[truncated(size=${format.format(size)})]")
   }
+
+  private def truncateMessage[T <: Message](msg: T, level: Int): T = {
+    if (level == maxLevel) {
+      val builder = msg.toBuilder
+      builder.clear()
+      builder.buildPartial().asInstanceOf[T]
+    } else {
+      abbreviateRecursive(msg, level + 1)
+    }
+  }
 }
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/DataTypeProtoConverter.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ForeachWriterPacket.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ForeachWriterPacket.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/ForeachWriterPacket.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ForeachWriterPacket.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/InvalidPlanInput.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/LiteralValueProtoConverter.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoDataTypes.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoDataTypes.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoDataTypes.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoDataTypes.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
similarity index 91%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
index af07ef11cdf30..2f68f2fb91cb4 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/ProtoUtils.scala
@@ -20,8 +20,12 @@ package org.apache.spark.sql.connect.common
 import com.google.protobuf.{CodedInputStream, InvalidProtocolBufferException, Message, Parser}
 
 private[sql] object ProtoUtils {
-  def abbreviate[T <: Message](message: T, maxStringSize: Int = 1024): T = {
-    abbreviate[T](message, Map("STRING" -> maxStringSize))
+  // A partial message can be returned if "maxLevel" is specified.
+  def abbreviate[T <: Message](
+      message: T,
+      maxStringSize: Int = 1024,
+      maxLevel: Int = Int.MaxValue): T = {
+    abbreviate[T](message, Map("STRING" -> maxStringSize, "MAX_LEVEL" -> maxLevel))
   }
 
   def abbreviate[T <: Message](message: T, thresholds: Map[String, Int]): T = {
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StorageLevelProtoConverter.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StreamingListenerPacket.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StreamingListenerPacket.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/StreamingListenerPacket.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/StreamingListenerPacket.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfPacket.scala
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala
similarity index 97%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala
index adcc6e3d6bf1e..2dba8fc3b7778 100644
--- a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala
+++ b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/UdfUtils.scala
@@ -28,6 +28,9 @@ import org.apache.spark.sql.streaming.GroupState
  * mapPartitions etc. This class is shared between the client and the server so that when the
  * methods are used in client UDFs, the server will be able to find them when actually executing
  * the UDFs.
+ *
+ * DO NOT REMOVE/CHANGE THIS OBJECT OR ANY OF ITS METHODS, THEY ARE NEEDED FOR BACKWARDS
+ * COMPATIBILITY WITH OLDER CLIENTS!
  */
 @SerialVersionUID(8464839273647598302L)
 private[sql] object UdfUtils extends Serializable {
@@ -135,6 +138,9 @@ private[sql] object UdfUtils extends Serializable {
     f(value).iterator.to(Seq)
   }
 
+  // ----------------------------------------------------------------------------------------------
+  // Scala Functions wrappers for java UDFs.
+  // ----------------------------------------------------------------------------------------------
   //  (1 to 22).foreach { i =>
   //    val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
   //    val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
diff --git a/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala b/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
similarity index 100%
rename from connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
rename to sql/connect/common/src/main/scala/org/apache/spark/sql/connect/common/config/ConnectCommon.scala
diff --git a/connect/common/src/test/resources/artifact-tests/Hello.class b/sql/connect/common/src/test/resources/artifact-tests/Hello.class
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/Hello.class
rename to sql/connect/common/src/test/resources/artifact-tests/Hello.class
diff --git a/connect/common/src/test/resources/artifact-tests/crc/Hello.txt b/sql/connect/common/src/test/resources/artifact-tests/crc/Hello.txt
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/Hello.txt
rename to sql/connect/common/src/test/resources/artifact-tests/crc/Hello.txt
diff --git a/connect/common/src/test/resources/artifact-tests/crc/README.md b/sql/connect/common/src/test/resources/artifact-tests/crc/README.md
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/README.md
rename to sql/connect/common/src/test/resources/artifact-tests/crc/README.md
diff --git a/connect/common/src/test/resources/artifact-tests/crc/junitLargeJar.txt b/sql/connect/common/src/test/resources/artifact-tests/crc/junitLargeJar.txt
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/junitLargeJar.txt
rename to sql/connect/common/src/test/resources/artifact-tests/crc/junitLargeJar.txt
diff --git a/connect/common/src/test/resources/artifact-tests/crc/smallClassFile.txt b/sql/connect/common/src/test/resources/artifact-tests/crc/smallClassFile.txt
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/smallClassFile.txt
rename to sql/connect/common/src/test/resources/artifact-tests/crc/smallClassFile.txt
diff --git a/connect/common/src/test/resources/artifact-tests/crc/smallClassFileDup.txt b/sql/connect/common/src/test/resources/artifact-tests/crc/smallClassFileDup.txt
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/smallClassFileDup.txt
rename to sql/connect/common/src/test/resources/artifact-tests/crc/smallClassFileDup.txt
diff --git a/connect/common/src/test/resources/artifact-tests/crc/smallJar.txt b/sql/connect/common/src/test/resources/artifact-tests/crc/smallJar.txt
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/crc/smallJar.txt
rename to sql/connect/common/src/test/resources/artifact-tests/crc/smallJar.txt
diff --git a/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar b/sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
rename to sql/connect/common/src/test/resources/artifact-tests/junitLargeJar.jar
diff --git a/connect/common/src/test/resources/artifact-tests/smallClassFile.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/smallClassFile.class
rename to sql/connect/common/src/test/resources/artifact-tests/smallClassFile.class
diff --git a/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class b/sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
rename to sql/connect/common/src/test/resources/artifact-tests/smallClassFileDup.class
diff --git a/connect/common/src/test/resources/artifact-tests/smallJar.jar b/sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
similarity index 100%
rename from connect/common/src/test/resources/artifact-tests/smallJar.jar
rename to sql/connect/common/src/test/resources/artifact-tests/smallJar.jar
diff --git a/connect/common/src/test/resources/protobuf-tests/common.desc b/sql/connect/common/src/test/resources/protobuf-tests/common.desc
similarity index 100%
rename from connect/common/src/test/resources/protobuf-tests/common.desc
rename to sql/connect/common/src/test/resources/protobuf-tests/common.desc
diff --git a/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/alias_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/alias_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/alias_symbol.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/apply.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/apply.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/apply.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/apply.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/as_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/as_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/as_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/as_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/as_symbol.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/between_expr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/between_expr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/between_expr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/between_expr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/coalesce.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/coalesce.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/col.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/col.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/col.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/col.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/colRegex.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/colRegex.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_add.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_add.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_add.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_add.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_alias.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_alias.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_and.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_and.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_and.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_and.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_apply.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_apply.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_as_multi.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_as_with_metadata.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_asc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_asc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_first.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_asc_nulls_last.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_between.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_between.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_between.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_between.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseAND.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseOR.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_bitwiseXOR.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_cast.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_cast.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_contains.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_contains.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_desc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_desc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_first.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_desc_nulls_last.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_divide.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_divide.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_dropFields.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_endsWith.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_eqNullSafe.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_equals.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_equals.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_geq.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_geq.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_getField.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_getField.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_getItem.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_gt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_gt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_ilike.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_isNaN.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_isNotNull.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_isNull.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_isin.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_isin.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_leq.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_leq.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_like.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_like.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_like.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_like.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_lt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_lt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_modulo.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_multiply.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_not.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_not.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_not.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_not.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_not_equals.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_or.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_or.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_or.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_or.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_rlike.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_star.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_star.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_star.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_star.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_star_with_target.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_startsWith.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_substr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_substr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_subtract.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_try_cast.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_try_cast.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_try_cast.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_try_cast.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_unary_minus.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_when_otherwise.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/column_withField.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/column_withField.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/crossJoin.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/crosstab.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/crosstab.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/csv_from_dataset.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/cube_column.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/cube_column.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/cube_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/cube_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/describe.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/describe.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/describe.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/describe.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/distinct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/distinct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/distinct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/distinct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/drop.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/drop.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/drop.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/drop.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_names_seq.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/dropDuplicates_varargs.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_column.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/drop_multiple_strings.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/drop_single_column.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/drop_single_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/except.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/except.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/except.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/except.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/exceptAll.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/expression_extension.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/expression_extension_deprecated.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/expression_extension_deprecated.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/expression_extension_deprecated.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/expression_extension_deprecated.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/fill.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/fill.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/fill.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/fill.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/filter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/filter.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/filter.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/filter.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/filter_expr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/freqItems.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/freqItems.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain
new file mode 100644
index 0000000000000..f08c804d3b88a
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_with_options.explain
@@ -0,0 +1,2 @@
+Project [from_avro(bytes#0, {"type": "int", "name": "id"}, (mode,FAILFAST), (compression,zstandard)) AS from_avro(bytes, {"type": "int", "name": "id"}, map(mode, FAILFAST, compression, zstandard))#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain
similarity index 62%
rename from connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain
index 8fca0b5341694..6fe4a8babc689 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/from_avro_without_options.explain
@@ -1,2 +1,2 @@
-Project [from_avro(bytes#0, {"type": "string", "name": "name"}) AS from_avro(bytes)#0]
+Project [from_avro(bytes#0, {"type": "string", "name": "name"}) AS from_avro(bytes, {"type": "string", "name": "name"}, NULL)#0]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain
new file mode 100644
index 0000000000000..61d81bad95c65
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath.explain
@@ -0,0 +1,2 @@
+Project [from_protobuf(bytes#0, StorageLevel, Some([B)) AS from_protobuf(bytes, StorageLevel, X'0AFC010A0C636F6D6D6F6E2E70726F746F120D737061726B2E636F6E6E65637422B0010A0C53746F726167654C6576656C12190A087573655F6469736B18012001280852077573654469736B121D0A0A7573655F6D656D6F727918022001280852097573654D656D6F727912200A0C7573655F6F66665F68656170180320012808520A7573654F66664865617012220A0C646573657269616C697A6564180420012808520C646573657269616C697A656412200A0B7265706C69636174696F6E180520012805520B7265706C69636174696F6E42220A1E6F72672E6170616368652E737061726B2E636F6E6E6563742E70726F746F5001620670726F746F33', NULL)#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain
new file mode 100644
index 0000000000000..066dba527a09a
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/from_protobuf_messageClassName_descFilePath_options.explain
@@ -0,0 +1,2 @@
+Project [from_protobuf(bytes#0, StorageLevel, Some([B), (recursive.fields.max.depth,2)) AS from_protobuf(bytes, StorageLevel, X'0AFC010A0C636F6D6D6F6E2E70726F746F120D737061726B2E636F6E6E65637422B0010A0C53746F726167654C6576656C12190A087573655F6469736B18012001280852077573654469736B121D0A0A7573655F6D656D6F727918022001280852097573654D656D6F727912200A0C7573655F6F66665F68656170180320012808520A7573654F66664865617012220A0C646573657269616C697A6564180420012808520C646573657269616C697A656412200A0B7265706C69636174696F6E180520012805520B7265706C69636174696F6E42220A1E6F72672E6170616368652E737061726B2E636F6E6E6563742E70726F746F5001620670726F746F33', map(recursive.fields.max.depth, 2))#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_abs.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_abs.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_acos.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_acos.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_acosh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_add_months.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_decrypt_with_mode_padding_aad.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_aes_encrypt_with_mode_padding_iv_aad.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
new file mode 100644
index 0000000000000..e19e34b37ca46
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate.explain
@@ -0,0 +1,2 @@
+Project [aggregate(e#0, 0, lambdafunction((lambda x_1#0 + lambda y_2#0), lambda x_1#0, lambda y_2#0, false), lambdafunction(lambda x_3#0, lambda x_3#0, false)) AS aggregate(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate_with_finish_lambda.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate_with_finish_lambda.explain
new file mode 100644
index 0000000000000..f52800203c0e4
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_aggregate_with_finish_lambda.explain
@@ -0,0 +1,2 @@
+Project [aggregate(e#0, 0, lambdafunction((lambda x_1#0 + lambda y_2#0), lambda x_1#0, lambda y_2#0, false), lambdafunction((lambda x_3#0 + 2), lambda x_3#0, false)) AS aggregate(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction((namedlambdavariable() + 2), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_any.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_any.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_any.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_any.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_ignore_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_ignore_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_ignore_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_ignore_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_respect_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_respect_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_respect_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_any_value_with_respect_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_count_distinct_rsd.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_approx_percentile.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_percentile.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_approx_percentile.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_approx_percentile.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_append.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
new file mode 100644
index 0000000000000..d42d0fd0a46ee
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_compact.explain
@@ -0,0 +1,2 @@
+Project [knownnotcontainsnull(filter(e#0, lambdafunction(isnotnull(lambda arg#0), lambda arg#0, false))) AS array_compact(e)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_contains.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_distinct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_except.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_insert.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_intersect.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_join.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_join_with_null_replacement.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_max.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_min.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_position.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_prepend.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_remove.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_repeat.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_size.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_size.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_size.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_size.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_sort.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
new file mode 100644
index 0000000000000..455e3409a71cd
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_sort_with_comparator.explain
@@ -0,0 +1,2 @@
+Project [array_sort(e#0, lambdafunction((lambda x_1#0 - lambda y_2#0), lambda x_1#0, lambda y_2#0, false), false) AS array_sort(e, lambdafunction((namedlambdavariable() - namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_array_union.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_arrays_overlap.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_arrays_zip.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_asc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_asc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_first.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_asc_nulls_last.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ascii.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_asin.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_asin.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_asinh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_assert_true_with_message.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_atan.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_atan.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_atan2.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_atanh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_avg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_avg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_base64.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bin.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bin.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_and.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_get.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_or.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bit_xor.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bit_position.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_bucket_number.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_construct_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_construct_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitmap_construct_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_construct_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_or_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_or_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitmap_or_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitmap_or_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bitwise_not.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bool_and.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bool_and.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bool_and.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bool_and.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bool_or.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bool_or.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bool_or.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bool_or.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bround.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bround.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_btrim.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_btrim_with_specified_trim_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_bucket.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_call_function.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_call_function.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_call_function.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_call_function.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_call_function_orphaned.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_call_function_orphaned.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_cardinality.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_cardinality.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_cardinality.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_cardinality.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ceil.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ceil_scale.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ceiling.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ceiling_scale.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_char.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_char.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_char.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_char.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_char_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_character_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_chr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_chr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_coalesce.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_col.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_col.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_col.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_col.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_collate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collate.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_collate.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_collate.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_collation.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_collect_list.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_collect_set.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_concat.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_concat.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_concat_ws.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_contains.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_contains.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_conv.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_conv.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_with_source_time_zone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_with_source_time_zone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_with_source_time_zone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_with_source_time_zone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_without_source_time_zone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_without_source_time_zone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_without_source_time_zone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_convert_timezone_without_source_time_zone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_corr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_corr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_cos.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_cos.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_cosh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_cot.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_cot.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_countDistinct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_count_if.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_count_min_sketch.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_count_min_sketch.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_count_min_sketch.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_count_min_sketch.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_count_typed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_covar_pop.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_covar_samp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_crc32.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_csc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_csc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_cume_dist.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_curdate.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_catalog.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_catalog.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_catalog.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_catalog.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_database.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_date.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_schema.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_timezone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_timezone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_timezone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_current_user.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_current_user.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_current_user.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_current_user.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_add.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_diff.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_from_unix_date.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_part.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_part.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_part.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_part.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_sub.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_date_trunc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dateadd.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_datediff.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_datepart.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_datepart.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_datepart.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_datepart.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_day.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_day.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_day.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_day.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dayname.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofmonth.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofweek.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dayofyear.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_days.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_days.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_days.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_decode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_degrees.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_dense_rank.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_desc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_desc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_first.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_desc_nulls_last.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_e.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_e.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_e.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_e.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_element_at.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_elt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_elt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_encode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_endswith.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_equal_null.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_equal_null.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_equal_null.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_equal_null.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_every.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_every.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_every.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_every.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
new file mode 100644
index 0000000000000..c34ab1899990c
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_exists.explain
@@ -0,0 +1,2 @@
+Project [exists(e#0, lambdafunction((lambda x_1#0 > 10), lambda x_1#0, false)) AS exists(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_exp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_exp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_explode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_explode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_explode_outer.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_expm1.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_expr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_expr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_extract.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_extract.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_extract.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_extract.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_factorial.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
new file mode 100644
index 0000000000000..da87cc58611e3
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter.explain
@@ -0,0 +1,2 @@
+Project [filter(e#0, lambdafunction((lambda x_1#0 > 10), lambda x_1#0, false)) AS filter(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
new file mode 100644
index 0000000000000..a0d28fea8611c
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_filter_with_pair_input.explain
@@ -0,0 +1,2 @@
+Project [filter(e#0, lambdafunction(((lambda x_1#0 > 10) AND (lambda y_2#0 > 2)), lambda x_1#0, lambda y_2#0, false)) AS filter(e, lambdafunction(((namedlambdavariable() > 10) AND (namedlambdavariable() > 2)), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_find_in_set.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_ignore_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_ignore_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_ignore_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_ignore_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_respect_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_respect_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_respect_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_first_value_with_respect_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_first_with_ignore_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_first_with_ignore_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_first_with_ignore_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_first_with_ignore_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_first_with_respect_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_first_with_respect_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_first_with_respect_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_first_with_respect_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_flatten.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_floor.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_floor.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_floor_scale.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
new file mode 100644
index 0000000000000..f36a64c45d0dc
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_forall.explain
@@ -0,0 +1,2 @@
+Project [forall(e#0, lambdafunction((lambda x_1#0 > 10), lambda x_1#0, false)) AS forall(e, lambdafunction((namedlambdavariable() > 10), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_format_number.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_from_csv.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
similarity index 81%
rename from connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
index 1219f11d4696e..8d1d122d156ff 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json.explain
@@ -1,2 +1,2 @@
-Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles)) AS from_json(g)#0]
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain
new file mode 100644
index 0000000000000..8d1d122d156ff
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_orphaned.explain
@@ -0,0 +1,2 @@
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain
new file mode 100644
index 0000000000000..8d1d122d156ff
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_json_with_json_schema.explain
@@ -0,0 +1,2 @@
+Project [from_json(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles), false) AS from_json(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_from_unixtime.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_from_utc_timestamp.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml.explain
new file mode 100644
index 0000000000000..a053ed92a7e47
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml.explain
@@ -0,0 +1,2 @@
+Project [from_xml(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles)) AS from_xml(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml_with_json_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml_with_json_schema.explain
new file mode 100644
index 0000000000000..a053ed92a7e47
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_from_xml_with_json_schema.explain
@@ -0,0 +1,2 @@
+Project [from_xml(StructField(id,LongType,true), StructField(a,IntegerType,true), StructField(b,DoubleType,true), g#0, Some(America/Los_Angeles)) AS from_xml(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_get.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_get.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_get.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_get_json_object.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_getbit.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_greatest.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_hash.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_hash.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_hex.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_hex.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_histogram_numeric.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_histogram_numeric.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_histogram_numeric.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_histogram_numeric.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_hour.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_hour.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_hours.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_hypot.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ifnull.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ifnull.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ifnull.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ifnull.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ilike.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ilike_with_escape.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_initcap.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_inline.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_inline.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_inline_outer.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_start.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_start.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_start.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_block_start.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_input_file_name.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
similarity index 54%
rename from connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
index e750021ce22bb..988447c2d6418 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_is_variant_null.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS is_variant_null(parse_json(g))#0]
+Project [static_invoke(VariantExpressionEvalUtils.isVariantNull(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)))) AS is_variant_null(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_isnan.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_isnotnull.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_isnotnull.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_isnotnull.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_isnotnull.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_isnull.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_java_method.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_java_method.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_java_method.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_java_method.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain
new file mode 100644
index 0000000000000..d70e2eb60aba5
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_array_length.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(JsonExpressionUtils.lengthOfJsonArray(g#0)) AS json_array_length(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain
new file mode 100644
index 0000000000000..8a2ea5336c160
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(JsonExpressionUtils.jsonObjectKeys(g#0)) AS json_object_keys(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_json_tuple.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_kurtosis.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lag.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lag.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_last_day.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_ignore_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_ignore_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_ignore_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_ignore_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_respect_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_respect_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_respect_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_last_value_with_respect_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_last_with_ignore_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_last_with_ignore_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_last_with_ignore_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_last_with_ignore_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_last_with_respect_nulls.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_last_with_respect_nulls.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_last_with_respect_nulls.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_last_with_respect_nulls.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lcase.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lead.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lead.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_least.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_least.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_least.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_least.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_left.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_left.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_left.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_left.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_len.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_len.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_len.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_len.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein_with_threshold.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein_with_threshold.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_levenshtein_with_threshold.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_levenshtein_with_threshold.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_like.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_like.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_like.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_like.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_like_with_escape.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lit.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lit_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ln.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_localtimestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_locate.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_locate.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_locate_with_pos.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_log.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_log.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_log.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_log10.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_log10.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_log1p.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_log2.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_log2.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_log_with_base.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
similarity index 55%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
index 5c88a1f7b3abd..d905689c35dd4 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lower.explain
@@ -1,2 +1,2 @@
-Project [sentences(g#0, , ) AS sentences(g, , )#0]
+Project [lower(g#0) AS lower(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lpad.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_lpad_binary.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ltrim.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ltrim_with_pattern.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_date.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins_secs.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins_secs.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins_secs.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_dt_interval_days_hours_mins_secs.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins_secs.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins_secs.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins_secs.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_interval_years_months_weeks_days_hours_mins_secs.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_with_timezone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_with_timezone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_with_timezone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_without_timezone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_without_timezone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ltz_without_timezone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ntz.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ntz.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ntz.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_ntz.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_with_timezone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_with_timezone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_with_timezone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_without_timezone.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_without_timezone.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_timestamp_without_timezone.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years_months.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years_months.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years_months.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_make_ym_interval_years_months.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_concat.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_contains_key.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_entries.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
new file mode 100644
index 0000000000000..a22f1782baad4
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_filter.explain
@@ -0,0 +1,2 @@
+Project [map_filter(f#0, lambdafunction(Contains(lambda x_1#0, baz), lambda x_1#0, lambda y_2#0, false)) AS map_filter(f, lambdafunction(contains(namedlambdavariable(), baz), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_from_arrays.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
new file mode 100644
index 0000000000000..c52da614f519c
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_from_entries.explain
@@ -0,0 +1,2 @@
+Project [map_from_entries(transform(e#0, lambdafunction(struct(y_2, lambda y_2#0, x_1, lambda x_1#0), lambda x_1#0, lambda y_2#0, false))) AS map_from_entries(transform(e, lambdafunction(struct(namedlambdavariable(), namedlambdavariable()), namedlambdavariable(), namedlambdavariable())))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_keys.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_map_values.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
new file mode 100644
index 0000000000000..a096ad7480397
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_map_zip_with.explain
@@ -0,0 +1,2 @@
+Project [map_zip_with(f#0, f#0, lambdafunction((lambda y_2#0.id + lambda z_3#0.id), lambda x_1#0, lambda y_2#0, lambda z_3#0, false)) AS map_zip_with(f, f, lambdafunction((namedlambdavariable().id + namedlambdavariable().id), namedlambdavariable(), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mask.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mask.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mask.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mask.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_max.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_max.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_max.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_max.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_max_by.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_md5.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_md5.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_median.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_median.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_median.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_median.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_min.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_min.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_min.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_min.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_min_by.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_minute.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_minute.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_mode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mode_orphaned.explain
similarity index 52%
rename from connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_mode_orphaned.explain
index 30153bb192e55..0952c9a14ef3e 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_json_object_keys.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_mode_orphaned.explain
@@ -1,2 +1,2 @@
-Project [json_object_keys(g#0) AS json_object_keys(g)#0]
+Aggregate [mode(a#0, 0, 0, None) AS mode(a)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_monotonically_increasing_id.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_month.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_month.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_month.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_month.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_monthname.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_months.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_months.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_months.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_months_between.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_months_between_with_roundoff.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_named_struct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_named_struct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_named_struct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_named_struct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_nanvl.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_negate.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_negate.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_negative.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_negative.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_next_day.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_now.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_now.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_now.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_now.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_nth_value.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ntile.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_nullif.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_nullif.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_nullif.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_nullif.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_nvl.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_nvl.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_nvl.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_nvl.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_nvl2.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_nvl2.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_nvl2.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_nvl2.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_octet_length.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_overlay.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_overlay_with_len.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
similarity index 78%
rename from connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
index cbcf803b39010..a40f89c03c888 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_json.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)) AS parse_json(g)#0]
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)) AS parse_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
new file mode 100644
index 0000000000000..1f9f3df800b8f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url.explain
@@ -0,0 +1,2 @@
+Project [invoke(ParseUrlEvaluator(null,null,null,false).evaluate(g#0, g#0)) AS parse_url(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
new file mode 100644
index 0000000000000..900de9c243a83
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_parse_url_with_key.explain
@@ -0,0 +1,2 @@
+Project [invoke(ParseUrlEvaluator(null,null,null,false).evaluate(g#0, g#0, g#0)) AS parse_url(g, g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_percent_rank.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_approx.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_percentile_with_frequency.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_with_frequency.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_percentile_with_frequency.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_with_frequency.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_percentile_without_frequency.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_without_frequency.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_percentile_without_frequency.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_percentile_without_frequency.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_pi.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_pi.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_pmod.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_posexplode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_posexplode_outer.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_position.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_position.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_position.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_position.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_position_with_start.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_positive.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_pow.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_pow.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_power.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_power.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_power.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_power.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_printf.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_printf.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_product.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_product.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_product.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_product.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_quarter.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_radians.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_radians.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_raise_error.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rand_with_seed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_randn_with_seed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_random_with_seed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rank.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rank.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain
new file mode 100644
index 0000000000000..3dfee03781d5a
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_reduce.explain
@@ -0,0 +1,2 @@
+Project [reduce(e#0, 0, lambdafunction((lambda x_1#0 + lambda y_2#0), lambda x_1#0, lambda y_2#0, false), lambdafunction(lambda x_3#0, lambda x_3#0, false)) AS reduce(e, 0, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_reflect.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_reflect.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_reflect.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_reflect.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_with_regex_group_index.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_extract_all_without_regex_group_index.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_with_regex_group_index.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_instr_without_regex_group_index.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_like.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_like.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_like.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_like.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_replace.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regexp_substr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_substr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regexp_substr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regexp_substr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgx.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgx.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_avgx.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgx.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgy.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgy.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_avgy.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_avgy.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_intercept.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_intercept.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_intercept.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_intercept.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_r2.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_r2.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_r2.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_r2.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_slope.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_slope.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_slope.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_slope.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxx.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxx.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_sxx.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxx.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxy.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxy.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_sxy.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_sxy.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_regr_syy.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_syy.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_regr_syy.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_regr_syy.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_replace.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_replace.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_replace_with_specified_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_reverse.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_right.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_right.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_right.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_right.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rint.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rint.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rlike.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rlike.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rlike.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rlike.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_round.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_round.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_round.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_round.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_row_number.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rpad.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rpad_binary.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rtrim.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_rtrim_with_pattern.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
new file mode 100644
index 0000000000000..23cd52a6e5663
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_csv.explain
@@ -0,0 +1,2 @@
+Project [invoke(SchemaOfCsvEvaluator(Map(sep -> |)).evaluate(1|abc)) AS schema_of_csv(1|abc)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
new file mode 100644
index 0000000000000..b400aeeca5af2
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(JsonExpressionEvalUtils.schemaOfJson(com.fasterxml.jackson.core.JsonFactory, org.apache.spark.sql.catalyst.json.JSONOptions, org.apache.spark.sql.catalyst.json.JsonInferSchema, [{"col":01}])) AS schema_of_json([{"col":01}])#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
new file mode 100644
index 0000000000000..b400aeeca5af2
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_json_with_options.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(JsonExpressionEvalUtils.schemaOfJson(com.fasterxml.jackson.core.JsonFactory, org.apache.spark.sql.catalyst.json.JSONOptions, org.apache.spark.sql.catalyst.json.JsonInferSchema, [{"col":01}])) AS schema_of_json([{"col":01}])#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
similarity index 55%
rename from connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
index 04b33fdd70678..c82a10655c332 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)))) AS schema_of_variant(parse_json(g))#0]
+Project [static_invoke(SchemaOfVariant.schemaOfVariant(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)))) AS schema_of_variant(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
similarity index 58%
rename from connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
index 18e8801bb2986..3d894628ab7e0 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_schema_of_variant_agg.explain
@@ -1,2 +1,2 @@
-Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
+Aggregate [schema_of_variant_agg(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), 0, 0) AS schema_of_variant_agg(parse_json(g))#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sec.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sec.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_second.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_second.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_second.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_second.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
new file mode 100644
index 0000000000000..f4532e70675ae
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(ExpressionImplUtils.getSentences(g#0, , )) AS sentences(g, , )#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language.explain
new file mode 100644
index 0000000000000..37bcbf9a319b5
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(ExpressionImplUtils.getSentences(g#0, en, )) AS sentences(g, en, )#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language_and_country.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language_and_country.explain
new file mode 100644
index 0000000000000..8a8d54cfa0d10
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sentences_with_language_and_country.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(ExpressionImplUtils.getSentences(g#0, en, US)) AS sentences(g, en, US)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sequence.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_session_user.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain
new file mode 100644
index 0000000000000..ac482b02c40a3
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain
@@ -0,0 +1,4 @@
+Project [session_window#0]
++- Filter isnotnull(t#0)
+   +- Project [named_struct(start, precisetimestampconversion(precisetimestampconversion(t#0, TimestampType, LongType), LongType, TimestampType), end, knownnullable(precisetimestampconversion(precisetimestampconversion(cast(t#0 + cast(10 minutes as interval) as timestamp), TimestampType, LongType), LongType, TimestampType))) AS session_window#0, d#0, t#0, s#0, x#0L, wt#0]
+      +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_session_window_orphaned.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_session_window.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_session_window_orphaned.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sha.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sha.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sha.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sha.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sha1.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sha2.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftleft.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftright.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_shiftrightunsigned.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sign.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sign.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_signum.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_signum.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sin.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sin.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sinh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_size.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_size.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_size.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_size.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_skewness.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_slice.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_slice.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_some.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_some.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_some.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_some.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sort_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_spark_partition_id.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_split.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_split.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_split.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_split.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_split_part.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_split_using_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_split_with_limit_using_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sqrt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_stack.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_stack.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_stack.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_stack.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_startswith.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_std.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_std.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_std.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_std.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev_pop.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_stddev_samp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_str_to_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_and_keyValue_delimiter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_and_keyValue_delimiter.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_and_keyValue_delimiter.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_and_keyValue_delimiter.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_delimiter.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_delimiter.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_delimiter.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_str_to_map_with_pair_delimiter.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_struct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_struct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substr_with_len.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substring.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substring.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_index.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_using_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_substring_with_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sum.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sum.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_sum_distinct.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_tan.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_tan.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_tanh.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_add.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_diff.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_micros.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_micros.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_timestamp_micros.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_micros.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_millis.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_millis.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_timestamp_millis.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_millis.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_timestamp_seconds.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_binary.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_binary.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_binary.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_binary.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_binary_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_char.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_char.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_char.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_char.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_csv.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_date_with_format.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
new file mode 100644
index 0000000000000..da90d9c4c6e16
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_json.explain
@@ -0,0 +1,2 @@
+Project [invoke(StructsToJsonEvaluator(Map(timestampFormat -> dd/MM/yyyy),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),Some(America/Los_Angeles)).evaluate(d#0)) AS to_json(d)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_number.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ltz_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_ntz_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_timestamp_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_unix_timestamp_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_utc_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_to_varchar.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
new file mode 100644
index 0000000000000..09b3019d5b204
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform.explain
@@ -0,0 +1,2 @@
+Project [transform(e#0, lambdafunction((lambda x_1#0 + 1), lambda x_1#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + 1), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
new file mode 100644
index 0000000000000..851cccd82d274
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_keys.explain
@@ -0,0 +1,2 @@
+Project [transform_keys(f#0, lambdafunction(concat(lambda x_1#0, cast(lambda y_2#0.id as string)), lambda x_1#0, lambda y_2#0, false)) AS transform_keys(f, lambdafunction(concat(namedlambdavariable(), namedlambdavariable().id), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
new file mode 100644
index 0000000000000..cfe70bd3f0a89
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_values.explain
@@ -0,0 +1,2 @@
+Project [transform_values(f#0, lambdafunction(update_fields(lambda y_2#0, WithField(key, lambda x_1#0)), lambda x_1#0, lambda y_2#0, false)) AS transform_values(f, lambdafunction(update_fields(namedlambdavariable(), WithField(namedlambdavariable())), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
new file mode 100644
index 0000000000000..76ce50eb0b0dc
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_transform_with_index.explain
@@ -0,0 +1,2 @@
+Project [transform(e#0, lambdafunction((lambda x_1#0 + lambda y_2#0), lambda x_1#0, lambda y_2#0, false)) AS transform(e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_translate.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_translate.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_trim.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_trim.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_trim_with_pattern.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_trunc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_add.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_aes_decrypt_with_mode_padding_aad.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_avg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_divide.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_element_at_map.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years.explain
new file mode 100644
index 0000000000000..862363f90fa2f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, 0, 0, 0, 0, 0, 0.000000, false) AS try_make_interval(a, 0, 0, 0, 0, 0, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months.explain
new file mode 100644
index 0000000000000..9b60a8e0ce842
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, 0, 0, 0, 0, 0.000000, false) AS try_make_interval(a, a, 0, 0, 0, 0, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks.explain
new file mode 100644
index 0000000000000..409a2a5206262
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, a#0, 0, 0, 0, 0.000000, false) AS try_make_interval(a, a, a, 0, 0, 0, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days.explain
new file mode 100644
index 0000000000000..347379a633979
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, a#0, a#0, 0, 0, 0.000000, false) AS try_make_interval(a, a, a, a, 0, 0, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours.explain
new file mode 100644
index 0000000000000..99e1e3d2a4194
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, a#0, a#0, a#0, 0, 0.000000, false) AS try_make_interval(a, a, a, a, a, 0, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins.explain
new file mode 100644
index 0000000000000..af6441b08ad92
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, a#0, a#0, a#0, a#0, 0.000000, false) AS try_make_interval(a, a, a, a, a, a, 0.000000)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins_secs.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins_secs.explain
new file mode 100644
index 0000000000000..4af5fd680836f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_interval_years_months_weeks_days_hours_mins_secs.explain
@@ -0,0 +1,2 @@
+Project [make_interval(a#0, a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(18,6)), false) AS try_make_interval(a, a, a, a, a, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain
new file mode 100644
index 0000000000000..ec8a7336a9b71
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_with_timezone.explain
@@ -0,0 +1,2 @@
+Project [try_make_timestamp_ltz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), Some(g#0), Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp_ltz(a, a, a, a, a, b, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain
new file mode 100644
index 0000000000000..39f8095a1e095
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ltz_without_timezone.explain
@@ -0,0 +1,2 @@
+Project [try_make_timestamp_ltz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp_ltz(a, a, a, a, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain
new file mode 100644
index 0000000000000..aa6613263622e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_ntz.explain
@@ -0,0 +1,2 @@
+Project [try_make_timestamp_ntz(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampNTZType) AS try_make_timestamp_ntz(a, a, a, a, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain
new file mode 100644
index 0000000000000..91d8e638750e6
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_with_timezone.explain
@@ -0,0 +1,2 @@
+Project [make_timestamp(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), Some(g#0), Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp(a, a, a, a, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain
new file mode 100644
index 0000000000000..5bca1302ead5e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_make_timestamp_without_timezone.explain
@@ -0,0 +1,2 @@
+Project [make_timestamp(a#0, a#0, a#0, a#0, a#0, cast(b#0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS try_make_timestamp(a, a, a, a, a, b)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_multiply.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
similarity index 76%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
index 826ec4fc81d83..7a0c0078128f5 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_json.explain
@@ -1,2 +1,2 @@
-Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false)) AS try_parse_json(g)#0]
+Project [static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, false)) AS try_parse_json(g)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url.explain
new file mode 100644
index 0000000000000..87bad58090a08
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url.explain
@@ -0,0 +1,2 @@
+Project [invoke(ParseUrlEvaluator(null,null,null,false).evaluate(g#0, g#0)) AS try_parse_url(g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url_with_key.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url_with_key.explain
new file mode 100644
index 0000000000000..aed35cfeb7009
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_parse_url_with_key.explain
@@ -0,0 +1,2 @@
+Project [invoke(ParseUrlEvaluator(null,null,null,false).evaluate(g#0, g#0, g#0)) AS try_parse_url(g, g, g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_subtract.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_sum.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_binary_without_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_number.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_to_timestamp_without_format.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain
new file mode 100644
index 0000000000000..45f811d1c62fc
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_url_decode.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(UrlCodec.decode(g#0, false)) AS try_url_decode(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
similarity index 51%
rename from connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
index 933fbff8e1f3d..65f527da78c4a 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_try_variant_get.explain
@@ -1,2 +1,2 @@
-Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
+Project [try_variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), $, IntegerType, false, Some(America/Los_Angeles)) AS try_variant_get(parse_json(g), $)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_typedLit.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_typeof.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_typeof.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_typeof.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_typeof.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_ucase.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unbase64.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unhex.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_date.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_micros.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_millis.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_seconds.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_unix_timestamp_with_format.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_upper.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_upper.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
new file mode 100644
index 0000000000000..c8d8354b11223
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_decode.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(UrlCodec.decode(g#0, true)) AS url_decode(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
new file mode 100644
index 0000000000000..3a1a2bbd9aa5d
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_url_encode.explain
@@ -0,0 +1,2 @@
+Project [static_invoke(UrlCodec.encode(g#0)) AS url_encode(g)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_user.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_user.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_user.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_var_pop.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_var_samp.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_variance.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_variance.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
similarity index 53%
rename from connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
index 2e0baf058f72a..33c6b3a52f529 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_variant_get.explain
@@ -1,2 +1,2 @@
-Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
+Project [variant_get(static_invoke(VariantExpressionEvalUtils.parseJson(g#0, false, true)), $, IntegerType, true, Some(America/Los_Angeles)) AS variant_get(parse_json(g), $)#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_weekday.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_weekday.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_weekday.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_weekday.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_weekofyear.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_window.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
similarity index 97%
rename from connect/common/src/test/resources/query-tests/explain-results/function_window.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
index 6adefaa786538..01b2a8907033e 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_window.explain
@@ -1,4 +1,4 @@
-Project [window#0 AS window#0]
+Project [window#0]
 +- Project [named_struct(start, knownnullable(precisetimestampconversion(((precisetimestampconversion(t#0, TimestampType, LongType) - CASE WHEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) < cast(0 as bigint)) THEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) + 1000000) ELSE ((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) END) - 0), LongType, TimestampType)), end, knownnullable(precisetimestampconversion((((precisetimestampconversion(t#0, TimestampType, LongType) - CASE WHEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) < cast(0 as bigint)) THEN (((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) + 1000000) ELSE ((precisetimestampconversion(t#0, TimestampType, LongType) - 0) % 1000000) END) - 0) + 1000000), LongType, TimestampType))) AS window#0, d#0, t#0, s#0, x#0L, wt#0]
    +- Filter isnotnull(t#0)
       +- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_window_time.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
new file mode 100644
index 0000000000000..4752e5218bb12
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathListEvaluator(a/b/text()).evaluate(s#0)) AS xpath(s, a/b/text())#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
new file mode 100644
index 0000000000000..b537366736d25
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_boolean.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathBooleanEvaluator(a/b).evaluate(s#0)) AS xpath_boolean(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
new file mode 100644
index 0000000000000..76e0b01721841
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_double.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathDoubleEvaluator(a/b).evaluate(s#0)) AS xpath_double(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
new file mode 100644
index 0000000000000..21aebb357928f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_float.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathFloatEvaluator(a/b).evaluate(s#0)) AS xpath_float(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
new file mode 100644
index 0000000000000..eee74472b1cff
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_int.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathIntEvaluator(a/b).evaluate(s#0)) AS xpath_int(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
new file mode 100644
index 0000000000000..8356c2c8e18c1
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_long.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathLongEvaluator(a/b).evaluate(s#0)) AS xpath_long(s, a/b)#0L]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
new file mode 100644
index 0000000000000..bc32d4fefffb8
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_number.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathDoubleEvaluator(a/b).evaluate(s#0)) AS xpath_number(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
new file mode 100644
index 0000000000000..e0ba76b3acd0e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_short.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathShortEvaluator(a/b).evaluate(s#0)) AS xpath_short(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
new file mode 100644
index 0000000000000..80f2600e6cdd4
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xpath_string.explain
@@ -0,0 +1,2 @@
+Project [invoke(XPathStringEvaluator(a/b).evaluate(s#0)) AS xpath_string(s, a/b)#0]
++- LocalRelation <empty>, [d#0, t#0, s#0, x#0L, wt#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_xxhash64.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_year.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_year.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_year.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_year.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/function_years.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/function_years.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/function_years.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
new file mode 100644
index 0000000000000..5ef99a40a473c
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/function_zip_with.explain
@@ -0,0 +1,2 @@
+Project [zip_with(e#0, e#0, lambdafunction((lambda x_1#0 + lambda y_2#0), lambda x_1#0, lambda y_2#0, false)) AS zip_with(e, e, lambdafunction((namedlambdavariable() + namedlambdavariable()), namedlambdavariable(), namedlambdavariable()))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_columns.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_orphaned.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_orphaned.explain
new file mode 100644
index 0000000000000..acb42c1408c66
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_orphaned.explain
@@ -0,0 +1,2 @@
+Aggregate [id#0L], [id#0L, max(a#0) AS max(a)#0, stddev(b#0) AS stddev(b)#0, stddev(b#0) AS stddev(b)#0, avg(b#0) AS avg(b)#0, avg(b#0) AS avg(b)#0, avg(b#0) AS avg(b)#0, count(1) AS count(1)#0L, count(a#0) AS count(a)#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_agg_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_avg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_count.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_max.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_mean.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_min.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupby_sum.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/groupingSets.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/grouping_and_grouping_id.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hint.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hint.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hint.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hint.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName_lgConfigK_int.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName_lgConfigK_int.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName_lgConfigK_int.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_columnName_lgConfigK_int.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK_int.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK_int.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK_int.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_sketch_agg_with_column_lgConfigK_int.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_union_agg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/intersect.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/intersect.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/intersect.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/intersect.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/intersectAll.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_condition.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_condition.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_condition.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_no_condition.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_multiple_col_seq.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_inner_using_single_col.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_array.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_using_multiple_col_seq.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/join_using_single_col.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/json_from_dataset.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/limit.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/limit.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/limit.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/limit.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
new file mode 100644
index 0000000000000..053937d84ec8f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_no_values.explain
@@ -0,0 +1,2 @@
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
new file mode 100644
index 0000000000000..5a953f792cd35
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/melt_values.explain
@@ -0,0 +1,2 @@
+Expand [[a#0, id, id#0L]], [a#0, name#0, value#0L]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/offset.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/offset.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/offset.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/offset.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/orderBy_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/orderBy_strings.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/pivot.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/pivot.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/pivot.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/pivot_without_column_values.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/range.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/range.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/range.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/range.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_csv.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_csv.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_partition.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_jdbc_with_predicates.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_json.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_json.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_json.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_json.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_orc.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_orc.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_parquet.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_table.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_table.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_table.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/read_text.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/read_text.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/read_text.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/read_text.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/relation_extension.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/relation_extension_deprecated.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/relation_extension_deprecated.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/relation_extension_deprecated.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/relation_extension_deprecated.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/repartition.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/repartition.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/repartition.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/repartition.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_expressions.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/repartitionByRange_num_partitions_expressions.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/repartition_expressions.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/repartition_num_partitions_expressions.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/replace.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/replace.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/replace.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/replace.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/rollup_column.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/rollup_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sampleBy.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sample_fraction_seed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sample_withReplacement_fraction_seed.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/selectExpr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_collated_string.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_collated_string.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_collated_string.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_collated_string.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_strings.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_strings.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_1-arg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_typed_2-arg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_2-arg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_typed_2-arg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_2-arg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_typed_3-arg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_3-arg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_typed_3-arg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_3-arg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_typed_4-arg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_4-arg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_typed_4-arg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_4-arg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/select_typed_5-arg.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_5-arg.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/select_typed_5-arg.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/select_typed_5-arg.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sortWithinPartitions_strings.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sort_columns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/sort_strings.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/streaming_table_API_with_options.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/summary.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/summary.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/summary.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/summary.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/table.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/table.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/table.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/table.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/table_API_with_options.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/test_broadcast.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/to.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/to.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/toDF.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/toDF.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/toDF.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/toDF.explain
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
new file mode 100644
index 0000000000000..fcb3e173ecaad
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/toJSON.explain
@@ -0,0 +1,2 @@
+Project [invoke(StructsToJsonEvaluator(Map(),StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true),StructField(d,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),StructField(e,ArrayType(IntegerType,true),true),StructField(f,MapType(StringType,StructType(StructField(id,LongType,true),StructField(a,IntegerType,true),StructField(b,DoubleType,true)),true),true),StructField(g,StringType,true)),Some(America/Los_Angeles)).evaluate(struct(id, id#0L, a, a#0, b, b#0, d, d#0, e, e#0, f, f#0, g, g#0))) AS to_json(struct(id, a, b, d, e, f, g))#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain
similarity index 71%
rename from connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain
index cd2dc984e3ffa..8ba9248f844c7 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_with_schema.explain
@@ -1,2 +1,2 @@
-Project [to_avro(a#0, Some({"type": "int", "name": "id"})) AS to_avro(a)#0]
+Project [to_avro(a#0, Some({"type": "int", "name": "id"})) AS to_avro(a, {"type": "int", "name": "id"})#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain
new file mode 100644
index 0000000000000..b2947334945e3
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_avro_without_schema.explain
@@ -0,0 +1,2 @@
+Project [to_avro(id#0L, None) AS to_avro(id, NULL)#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain
similarity index 51%
rename from connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain
index e7f70fa2c1a9e..3533406c0bf0a 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName.explain
@@ -1,2 +1,2 @@
-Project [to_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel, None) AS to_protobuf(bytes)#0]
+Project [to_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel, None) AS to_protobuf(bytes, org.apache.spark.connect.proto.StorageLevel, NULL, NULL)#0]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain
new file mode 100644
index 0000000000000..f6a33a20a5dcd
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath.explain
@@ -0,0 +1,2 @@
+Project [to_protobuf(bytes#0, StorageLevel, Some([B)) AS to_protobuf(bytes, StorageLevel, X'0AFC010A0C636F6D6D6F6E2E70726F746F120D737061726B2E636F6E6E65637422B0010A0C53746F726167654C6576656C12190A087573655F6469736B18012001280852077573654469736B121D0A0A7573655F6D656D6F727918022001280852097573654D656D6F727912200A0C7573655F6F66665F68656170180320012808520A7573654F66664865617012220A0C646573657269616C697A6564180420012808520C646573657269616C697A656412200A0B7265706C69636174696F6E180520012805520B7265706C69636174696F6E42220A1E6F72672E6170616368652E737061726B2E636F6E6E6563742E70726F746F5001620670726F746F33', NULL)#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain
new file mode 100644
index 0000000000000..393529a15670d
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_descFilePath_options.explain
@@ -0,0 +1,2 @@
+Project [to_protobuf(bytes#0, StorageLevel, Some([B), (recursive.fields.max.depth,2)) AS to_protobuf(bytes, StorageLevel, X'0AFC010A0C636F6D6D6F6E2E70726F746F120D737061726B2E636F6E6E65637422B0010A0C53746F726167654C6576656C12190A087573655F6469736B18012001280852077573654469736B121D0A0A7573655F6D656D6F727918022001280852097573654D656D6F727912200A0C7573655F6F66665F68656170180320012808520A7573654F66664865617012220A0C646573657269616C697A6564180420012808520C646573657269616C697A656412200A0B7265706C69636174696F6E180520012805520B7265706C69636174696F6E42220A1E6F72672E6170616368652E737061726B2E636F6E6E6563742E70726F746F5001620670726F746F33', map(recursive.fields.max.depth, 2))#0]
++- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain
similarity index 63%
rename from connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain
index a5d8851a7d1f3..e0c7e1625fe5c 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/to_protobuf_messageClassName_options.explain
@@ -1,2 +1,2 @@
-Project [to_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel, None, (recursive.fields.max.depth,2)) AS to_protobuf(bytes)#0]
+Project [to_protobuf(bytes#0, org.apache.spark.connect.proto.StorageLevel, None, (recursive.fields.max.depth,2)) AS to_protobuf(bytes, org.apache.spark.connect.proto.StorageLevel, NULL, map(recursive.fields.max.depth, 2))#0]
 +- LocalRelation <empty>, [id#0L, bytes#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/union.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/union.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/union.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/union.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/unionAll.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/unionAll.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/unionByName.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/unionByName.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/unionByName_allowMissingColumns.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
similarity index 84%
rename from connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
index 8d1749ee74c5a..2b2ba19d0c3db 100644
--- a/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_no_values.explain
@@ -1,2 +1,2 @@
-Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, #0, value#0]
+Expand [[id#0L, a, cast(a#0 as double)], [id#0L, b, b#0]], [id#0L, name#0, value#0]
 +- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
new file mode 100644
index 0000000000000..053937d84ec8f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/explain-results/unpivot_values.explain
@@ -0,0 +1,2 @@
+Expand [[id#0L, a#0, b, b#0]], [id#0L, a#0, name#0, value#0]
++- LocalRelation <empty>, [id#0L, a#0, b#0]
diff --git a/connect/common/src/test/resources/query-tests/explain-results/where_column.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/where_column.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/where_column.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/where_column.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/where_expr.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/where_expr.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/width_bucket.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/window.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/window.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/window.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/window.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_java_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_scala_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumnRenamed_single.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumn_single.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumns_java_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withColumns_scala_map.explain
diff --git a/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain b/sql/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain
similarity index 100%
rename from connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain
rename to sql/connect/common/src/test/resources/query-tests/explain-results/withMetadata.explain
diff --git a/connect/common/src/test/resources/query-tests/queries/alias_string.json b/sql/connect/common/src/test/resources/query-tests/queries/alias_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/alias_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/alias_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/alias_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/alias_symbol.json b/sql/connect/common/src/test/resources/query-tests/queries/alias_symbol.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/alias_symbol.json
rename to sql/connect/common/src/test/resources/query-tests/queries/alias_symbol.json
diff --git a/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/alias_symbol.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/apply.json b/sql/connect/common/src/test/resources/query-tests/queries/apply.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/apply.json
rename to sql/connect/common/src/test/resources/query-tests/queries/apply.json
diff --git a/connect/common/src/test/resources/query-tests/queries/apply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/apply.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/apply.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/apply.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/as_string.json b/sql/connect/common/src/test/resources/query-tests/queries/as_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/as_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/as_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/as_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/as_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/as_symbol.json b/sql/connect/common/src/test/resources/query-tests/queries/as_symbol.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/as_symbol.json
rename to sql/connect/common/src/test/resources/query-tests/queries/as_symbol.json
diff --git a/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/as_symbol.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/between_expr.json b/sql/connect/common/src/test/resources/query-tests/queries/between_expr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/between_expr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/between_expr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/between_expr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/between_expr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/between_expr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/between_expr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/coalesce.json b/sql/connect/common/src/test/resources/query-tests/queries/coalesce.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/coalesce.json
rename to sql/connect/common/src/test/resources/query-tests/queries/coalesce.json
diff --git a/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/coalesce.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/col.json b/sql/connect/common/src/test/resources/query-tests/queries/col.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/col.json
rename to sql/connect/common/src/test/resources/query-tests/queries/col.json
diff --git a/connect/common/src/test/resources/query-tests/queries/col.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/col.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/col.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/col.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/colRegex.json b/sql/connect/common/src/test/resources/query-tests/queries/colRegex.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/colRegex.json
rename to sql/connect/common/src/test/resources/query-tests/queries/colRegex.json
diff --git a/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/colRegex.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_add.json b/sql/connect/common/src/test/resources/query-tests/queries/column_add.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_add.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_add.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_add.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_add.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_alias.json b/sql/connect/common/src/test/resources/query-tests/queries/column_alias.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_alias.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_alias.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_alias.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_and.json b/sql/connect/common/src/test/resources/query-tests/queries/column_and.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_and.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_and.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_and.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_and.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_apply.json b/sql/connect/common/src/test/resources/query-tests/queries/column_apply.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_apply.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_apply.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_apply.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_as_multi.json b/sql/connect/common/src/test/resources/query-tests/queries/column_as_multi.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_as_multi.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_as_multi.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_as_multi.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json b/sql/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_as_with_metadata.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc.json b/sql/connect/common/src/test/resources/query-tests/queries/column_asc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json b/sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_first.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json b/sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_asc_nulls_last.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_between.json b/sql/connect/common/src/test/resources/query-tests/queries/column_between.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_between.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_between.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_between.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_between.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseAND.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseOR.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_bitwiseXOR.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_cast.json b/sql/connect/common/src/test/resources/query-tests/queries/column_cast.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_cast.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_cast.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_cast.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_contains.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_contains.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_contains.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc.json b/sql/connect/common/src/test/resources/query-tests/queries/column_desc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json b/sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_first.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json b/sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_desc_nulls_last.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_divide.json b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_divide.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_divide.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_divide.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_dropFields.json b/sql/connect/common/src/test/resources/query-tests/queries/column_dropFields.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_dropFields.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_dropFields.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_dropFields.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_endsWith.json b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_endsWith.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_endsWith.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_eqNullSafe.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_equals.json b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_equals.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_equals.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_equals.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_geq.json b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_geq.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_geq.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_geq.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_getField.json b/sql/connect/common/src/test/resources/query-tests/queries/column_getField.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_getField.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_getField.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_getField.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_getItem.json b/sql/connect/common/src/test/resources/query-tests/queries/column_getItem.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_getItem.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_getItem.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_getItem.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_gt.json b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_gt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_gt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_gt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_ilike.json b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_ilike.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_ilike.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_ilike.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNaN.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNaN.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNaN.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNotNull.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNull.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNull.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNull.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isNull.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isin.json b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isin.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isin.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_isin.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_leq.json b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_leq.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_leq.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_leq.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_like.json b/sql/connect/common/src/test/resources/query-tests/queries/column_like.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_like.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_like.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_like.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_like.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_lt.json b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_lt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_lt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_lt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_modulo.json b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_modulo.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_modulo.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_modulo.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_multiply.json b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_multiply.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_multiply.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_multiply.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_not.json b/sql/connect/common/src/test/resources/query-tests/queries/column_not.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_not.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_not.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_not.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_not.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_not_equals.json b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_not_equals.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_not_equals.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_or.json b/sql/connect/common/src/test/resources/query-tests/queries/column_or.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_or.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_or.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_or.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_or.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_rlike.json b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_rlike.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_rlike.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_rlike.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_star.json b/sql/connect/common/src/test/resources/query-tests/queries/column_star.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_star.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_star.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_star.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_star.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json b/sql/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_star_with_target.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_star_with_target.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_star_with_target.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_startsWith.json b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_startsWith.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_startsWith.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_substr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_substr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_substr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_subtract.json b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_subtract.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_subtract.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_subtract.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_try_cast.json b/sql/connect/common/src/test/resources/query-tests/queries/column_try_cast.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_try_cast.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_try_cast.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_try_cast.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_try_cast.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_try_cast.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_try_cast.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_unary_minus.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_when_otherwise.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/column_withField.json b/sql/connect/common/src/test/resources/query-tests/queries/column_withField.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_withField.json
rename to sql/connect/common/src/test/resources/query-tests/queries/column_withField.json
diff --git a/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/column_withField.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/crossJoin.json b/sql/connect/common/src/test/resources/query-tests/queries/crossJoin.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/crossJoin.json
rename to sql/connect/common/src/test/resources/query-tests/queries/crossJoin.json
diff --git a/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/crossJoin.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/crosstab.json b/sql/connect/common/src/test/resources/query-tests/queries/crosstab.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/crosstab.json
rename to sql/connect/common/src/test/resources/query-tests/queries/crosstab.json
diff --git a/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/crosstab.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json b/sql/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
rename to sql/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.json
diff --git a/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/csv_from_dataset.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/cube_column.json b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/cube_column.json
rename to sql/connect/common/src/test/resources/query-tests/queries/cube_column.json
diff --git a/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/cube_column.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/cube_string.json b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
similarity index 86%
rename from connect/common/src/test/resources/query-tests/queries/cube_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
index 5b9709ff06576..03625861d88f2 100644
--- a/connect/common/src/test/resources/query-tests/queries/cube_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.json
@@ -14,11 +14,13 @@
     "groupType": "GROUP_TYPE_CUBE",
     "groupingExpressions": [{
       "unresolvedAttribute": {
-        "unparsedIdentifier": "a"
+        "unparsedIdentifier": "a",
+        "planId": "0"
       }
     }, {
       "unresolvedAttribute": {
-        "unparsedIdentifier": "b"
+        "unparsedIdentifier": "b",
+        "planId": "0"
       }
     }],
     "aggregateExpressions": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin
new file mode 100644
index 0000000000000..59c7a55571201
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/cube_string.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/describe.json b/sql/connect/common/src/test/resources/query-tests/queries/describe.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/describe.json
rename to sql/connect/common/src/test/resources/query-tests/queries/describe.json
diff --git a/connect/common/src/test/resources/query-tests/queries/describe.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/describe.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/describe.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/describe.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/distinct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/distinct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/distinct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/distinct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/distinct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/drop.json b/sql/connect/common/src/test/resources/query-tests/queries/drop.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop.json
rename to sql/connect/common/src/test/resources/query-tests/queries/drop.json
diff --git a/connect/common/src/test/resources/query-tests/queries/drop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/drop.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/drop.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates.json
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates.json
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.json
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_names_seq.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.json
diff --git a/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/dropDuplicates_varargs.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json b/sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.json
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_column.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json b/sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.json
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_multiple_strings.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_single_column.json b/sql/connect/common/src/test/resources/query-tests/queries/drop_single_column.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_single_column.json
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_single_column.json
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_single_column.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_single_string.json b/sql/connect/common/src/test/resources/query-tests/queries/drop_single_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_single_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_single_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/drop_single_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/except.json b/sql/connect/common/src/test/resources/query-tests/queries/except.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/except.json
rename to sql/connect/common/src/test/resources/query-tests/queries/except.json
diff --git a/connect/common/src/test/resources/query-tests/queries/except.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/except.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/except.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/except.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/exceptAll.json b/sql/connect/common/src/test/resources/query-tests/queries/exceptAll.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/exceptAll.json
rename to sql/connect/common/src/test/resources/query-tests/queries/exceptAll.json
diff --git a/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/exceptAll.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/expression_extension.json b/sql/connect/common/src/test/resources/query-tests/queries/expression_extension.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/expression_extension.json
rename to sql/connect/common/src/test/resources/query-tests/queries/expression_extension.json
diff --git a/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/expression_extension.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.json b/sql/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.json
rename to sql/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.json
diff --git a/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/expression_extension_deprecated.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/fill.json b/sql/connect/common/src/test/resources/query-tests/queries/fill.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/fill.json
rename to sql/connect/common/src/test/resources/query-tests/queries/fill.json
diff --git a/connect/common/src/test/resources/query-tests/queries/fill.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/fill.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/fill.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/fill.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/filter.json b/sql/connect/common/src/test/resources/query-tests/queries/filter.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/filter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/filter.json
diff --git a/connect/common/src/test/resources/query-tests/queries/filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/filter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/filter.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/filter_expr.json b/sql/connect/common/src/test/resources/query-tests/queries/filter_expr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/filter_expr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/filter_expr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/filter_expr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/freqItems.json b/sql/connect/common/src/test/resources/query-tests/queries/freqItems.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/freqItems.json
rename to sql/connect/common/src/test/resources/query-tests/queries/freqItems.json
diff --git a/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/freqItems.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/from_avro_with_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/from_avro_without_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
rename to sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.json
diff --git a/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/from_protobuf_messageClassName_descFilePath_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_abs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_abs.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_abs.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_abs.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_acos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_acos.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_acos.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_acos.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_acosh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_acosh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_acosh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_acosh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_add_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_add_months.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_add_months.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_add_months.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_decrypt_with_mode_padding_aad.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aes_encrypt_with_mode_padding_iv_aad.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aggregate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
similarity index 85%
rename from connect/common/src/test/resources/query-tests/queries/function_aggregate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
index 3116837aeb876..df1813aed64c5 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.json
@@ -29,30 +29,30 @@
                 "functionName": "+",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 }]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }, {
           "lambdaFunction": {
             "function": {
               "unresolvedNamedLambdaVariable": {
-                "nameParts": ["x"]
+                "nameParts": ["x_3"]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_3"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin
similarity index 61%
rename from connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin
index f97843e086a58..c43f4e6dbbc1b 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json
new file mode 100644
index 0000000000000..956b42db65639
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.json
@@ -0,0 +1,71 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "aggregate",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "e"
+          }
+        }, {
+          "literal": {
+            "integer": 0
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x_1"]
+                  }
+                }, {
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["y_2"]
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x_1"]
+            }, {
+              "nameParts": ["y_2"]
+            }]
+          }
+        }, {
+          "lambdaFunction": {
+            "function": {
+              "unresolvedFunction": {
+                "functionName": "+",
+                "arguments": [{
+                  "unresolvedNamedLambdaVariable": {
+                    "nameParts": ["x_3"]
+                  }
+                }, {
+                  "literal": {
+                    "integer": 2
+                  }
+                }]
+              }
+            },
+            "arguments": [{
+              "nameParts": ["x_3"]
+            }]
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin
new file mode 100644
index 0000000000000..cf32ea4ddd3e7
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_aggregate_with_finish_lambda.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_ignore_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_any_value_with_respect_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_count_distinct_rsd.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_approx_percentile.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_append.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_append.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_append.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_append.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_compact.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_compact.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_compact.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_contains.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_contains.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_distinct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_except.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_except.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_except.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_except.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_insert.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_insert.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_insert.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_intersect.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_join.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_join.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_join.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_join.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_join_with_null_replacement.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_max.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_max.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_max.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_max.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_min.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_min.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_min.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_min.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_position.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_position.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_position.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_prepend.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_remove.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_remove.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_remove.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_repeat.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_size.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_size.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_size.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_size.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_sort.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_sort.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_sort.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
similarity index 87%
rename from connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
index 95be74d0b4c81..f8178ddd64aaf 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.json
@@ -25,19 +25,19 @@
                 "functionName": "-",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 }]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin
similarity index 72%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin
index 10cf8c503f420..c506889388c97 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_union.json b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_union.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_union.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_array_union.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_arrays_overlap.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_arrays_zip.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_first.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asc_nulls_last.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ascii.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ascii.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ascii.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ascii.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asin.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asin.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asin.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asinh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asinh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asinh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_asinh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_assert_true_with_message.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atan.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atan.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atan.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atan2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atan2.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atan2.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atan2.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atanh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atanh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atanh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_atanh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_avg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_avg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_avg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_base64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_base64.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_base64.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_base64.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bin.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bin.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bin.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_and.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_and.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_and.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_get.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_get.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_or.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_or.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_or.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bit_xor.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bit_position.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_bucket_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_construct_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitmap_or_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bitwise_not.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bool_and.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bool_and.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bool_and.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bool_or.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bool_or.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bool_or.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bround.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bround.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bround.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bround.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_btrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_btrim.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_btrim.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_btrim.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_btrim_with_specified_trim_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bucket.json b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bucket.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bucket.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_bucket.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json
new file mode 100644
index 0000000000000..0d78dd471f20c
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.json
@@ -0,0 +1,26 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "lower",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }],
+        "isUserDefinedFunction": true
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin
new file mode 100644
index 0000000000000..aee05767813f9
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_call_function.json b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function_orphaned.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_call_function.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_call_function_orphaned.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_call_function_orphaned.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_call_function.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_call_function_orphaned.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cardinality.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cardinality.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cardinality.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceil.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceil.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceil.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceil.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceil_scale.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceiling.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceiling.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceiling.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ceiling_scale.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_char.json b/sql/connect/common/src/test/resources/query-tests/queries/function_char.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_char.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_char.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_char.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_char.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_char_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_char_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_char_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_char_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_character_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_character_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_character_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_character_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_chr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_chr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_chr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_chr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_coalesce.json b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_coalesce.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_coalesce.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_col.json b/sql/connect/common/src/test/resources/query-tests/queries/function_col.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_col.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_col.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_col.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_col.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collate.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collate.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collation.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collation.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collation.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collation.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collect_list.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collect_list.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collect_list.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collect_set.json b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collect_set.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_collect_set.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_concat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_concat.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_concat.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_concat.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_concat_ws.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_contains.json b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_contains.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_contains.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_contains.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_conv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_conv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_conv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_conv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_with_source_time_zone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_convert_timezone_without_source_time_zone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_corr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_corr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_corr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_corr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cos.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cos.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cos.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cosh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cosh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cosh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cosh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cot.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cot.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cot.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cot.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_countDistinct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_if.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_if.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_if.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_if.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_min_sketch.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_typed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_typed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_count_typed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_covar_pop.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_covar_samp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_crc32.json b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_crc32.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_crc32.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_crc32.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_csc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_csc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_csc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_csc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_cume_dist.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_curdate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_curdate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_curdate.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_curdate.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_catalog.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_database.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_database.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_database.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_database.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_date.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_date.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_date.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_schema.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_schema.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_timezone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_user.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_user.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_current_user.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_add.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_add.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_add.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_diff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_diff.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_diff.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_from_unix_date.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_part.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_part.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_part.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_part.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_sub.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_sub.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_sub.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_date_trunc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dateadd.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dateadd.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dateadd.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_datediff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_datediff.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_datediff.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_datediff.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_datepart.json b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_datepart.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_datepart.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_datepart.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_day.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_day.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_day.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_day.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_day.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayname.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayname.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayname.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayname.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofmonth.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofweek.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dayofyear.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_days.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_days.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_days.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_days.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_decode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_decode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_decode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_degrees.json b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_degrees.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_degrees.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_degrees.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_dense_rank.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_desc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json b/sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_first.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json b/sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_desc_nulls_last.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_e.json b/sql/connect/common/src/test/resources/query-tests/queries/function_e.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_e.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_e.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_e.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_e.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_element_at.json b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_element_at.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_element_at.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_element_at.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_elt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_elt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_elt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_elt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_encode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_encode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_encode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_encode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_endswith.json b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_endswith.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_endswith.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_endswith.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_equal_null.json b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_equal_null.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_equal_null.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_every.json b/sql/connect/common/src/test/resources/query-tests/queries/function_every.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_every.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_every.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_every.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_every.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_exists.json b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
similarity index 93%
rename from connect/common/src/test/resources/query-tests/queries/function_exists.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
index 76d107092ae1e..3ae49d13c5fc6 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_exists.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.json
@@ -25,7 +25,7 @@
                 "functionName": "\u003e",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "literal": {
@@ -35,7 +35,7 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin
similarity index 70%
rename from connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin
index 27fbc03c69880..d808227fdc659 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_exists.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_exp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_exp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_exp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_exp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_explode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_explode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_explode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_explode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_explode_outer.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_expm1.json b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_expm1.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_expm1.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_expm1.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_expr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_expr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_expr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_expr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_expr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_extract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_extract.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_extract.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_extract.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_factorial.json b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_factorial.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_factorial.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_factorial.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_filter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
similarity index 93%
rename from connect/common/src/test/resources/query-tests/queries/function_filter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
index f6b565324b8ba..1c71362f75247 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_filter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.json
@@ -25,7 +25,7 @@
                 "functionName": "\u003e",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "literal": {
@@ -35,7 +35,7 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin
similarity index 70%
rename from connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin
index a53c554598662..aa776b474a4d6 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_filter.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
similarity index 91%
rename from connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
index 1d9667c88901f..f2b85c21af755 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.json
@@ -28,7 +28,7 @@
                     "functionName": "\u003e",
                     "arguments": [{
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["x"]
+                        "nameParts": ["x_1"]
                       }
                     }, {
                       "literal": {
@@ -41,7 +41,7 @@
                     "functionName": "\u003e",
                     "arguments": [{
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["y"]
+                        "nameParts": ["y_2"]
                       }
                     }, {
                       "literal": {
@@ -53,9 +53,9 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin
similarity index 59%
rename from connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin
index 5b7db291cc37f..8cf5f2d65cf29 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_filter_with_pair_input.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_find_in_set.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_ignore_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_value_with_respect_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_with_ignore_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_first_with_respect_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_flatten.json b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_flatten.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_flatten.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_flatten.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_floor.json b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_floor.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_floor.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_floor.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_floor_scale.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_forall.json b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
similarity index 93%
rename from connect/common/src/test/resources/query-tests/queries/function_forall.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
index 93134aba0fa9c..4a4914d6a9b1d 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_forall.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.json
@@ -25,7 +25,7 @@
                 "functionName": "\u003e",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "literal": {
@@ -35,7 +35,7 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin
similarity index 70%
rename from connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin
index 3199c758c04ac..7fc2821694589 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_forall.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_format_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_format_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_format_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_format_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_csv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_csv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_json.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.json
new file mode 100644
index 0000000000000..ddfa91abca05e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.proto.bin
new file mode 100644
index 0000000000000..ad95d0f2b343d
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_orphaned.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json
new file mode 100644
index 0000000000000..ddfa91abca05e
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_json",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin
new file mode 100644
index 0000000000000..ad95d0f2b343d
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_json_with_json_schema.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_unixtime.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_from_utc_timestamp.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
new file mode 100644
index 0000000000000..cfcd40a74b3a7
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_xml",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin
new file mode 100644
index 0000000000000..1cc3a26c254fb
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json
new file mode 100644
index 0000000000000..cfcd40a74b3a7
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "from_xml",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "{\"type\":\"struct\",\"fields\":[{\"name\":\"id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"b\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}}]}"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin
new file mode 100644
index 0000000000000..1cc3a26c254fb
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_from_xml_with_json_schema.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_get.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_get.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_get.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_get.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_get.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_get_json_object.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_getbit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_getbit.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_getbit.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_getbit.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_greatest.json b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_greatest.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_greatest.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_greatest.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hash.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hash.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hash.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hash.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hex.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hex.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hex.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hex.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_histogram_numeric.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hour.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hour.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hour.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hour.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hours.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hours.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hours.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hypot.json b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hypot.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hypot.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_hypot.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ifnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ifnull.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ifnull.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ilike.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ilike.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ilike.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ilike.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ilike_with_escape.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_initcap.json b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_initcap.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_initcap.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_initcap.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_inline.json b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_inline.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_inline.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_inline.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_inline_outer.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_block_start.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_input_file_name.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_is_variant_null.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnan.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnan.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnan.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnotnull.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnull.json b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnull.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnull.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_isnull.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_java_method.json b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_java_method.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_java_method.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_java_method.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_array_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_object_keys.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_json_tuple.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_kurtosis.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lag.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lag.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lag.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lag.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_day.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_day.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_day.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_ignore_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_value_with_respect_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_with_ignore_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_last_with_respect_nulls.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lcase.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lcase.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lcase.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lcase.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lead.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lead.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lead.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lead.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_least.json b/sql/connect/common/src/test/resources/query-tests/queries/function_least.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_least.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_least.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_least.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_least.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_left.json b/sql/connect/common/src/test/resources/query-tests/queries/function_left.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_left.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_left.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_left.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_left.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_len.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_len.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_len.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_len.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_len.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_levenshtein_with_threshold.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_like.json b/sql/connect/common/src/test/resources/query-tests/queries/function_like.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_like.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_like.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_like.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_like.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_like_with_escape.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lit.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lit.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lit.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lit_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lit_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lit_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ln.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ln.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ln.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ln.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_localtimestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_locate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_locate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_locate.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_locate.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_locate_with_pos.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log10.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log10.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log10.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log10.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log1p.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log1p.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log1p.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log1p.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log2.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log2.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log2.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_log_with_base.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lower.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lower.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lower.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lower.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lpad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lpad.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lpad.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lpad.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_lpad_binary.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ltrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ltrim.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ltrim.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ltrim_with_pattern.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_date.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_date.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_date.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_dt_interval_days_hours_mins_secs.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_with_timezone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ltz_without_timezone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_ntz.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_with_timezone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_timestamp_without_timezone.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_make_ym_interval_years_months.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_concat.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_concat.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_concat.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_contains_key.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_entries.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_entries.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_entries.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_filter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
similarity index 90%
rename from connect/common/src/test/resources/query-tests/queries/function_map_filter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
index 5099377a52a06..b50e77c0bf8e9 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.json
@@ -25,7 +25,7 @@
                 "functionName": "contains",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "literal": {
@@ -35,9 +35,9 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin
similarity index 64%
rename from connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin
index fac64e79a5bf0..7f3d0c31fd6fe 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_filter.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_from_arrays.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
similarity index 88%
rename from connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
index 399ba8d1021bf..1e48a1c2082df 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.json
@@ -28,19 +28,19 @@
                     "functionName": "struct",
                     "arguments": [{
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["y"]
+                        "nameParts": ["y_2"]
                       }
                     }, {
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["x"]
+                        "nameParts": ["x_1"]
                       }
                     }]
                   }
                 },
                 "arguments": [{
-                  "nameParts": ["x"]
+                  "nameParts": ["x_1"]
                 }, {
-                  "nameParts": ["y"]
+                  "nameParts": ["y_2"]
                 }]
               }
             }]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin
new file mode 100644
index 0000000000000..0dd0d31350991
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_keys.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_keys.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_values.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_values.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_values.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
similarity index 89%
rename from connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
index 9d035545eb313..d13bd8dce75f3 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.json
@@ -31,7 +31,7 @@
                   "unresolvedExtractValue": {
                     "child": {
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["y"]
+                        "nameParts": ["y_2"]
                       }
                     },
                     "extraction": {
@@ -44,7 +44,7 @@
                   "unresolvedExtractValue": {
                     "child": {
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["z"]
+                        "nameParts": ["z_3"]
                       }
                     },
                     "extraction": {
@@ -57,11 +57,11 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }, {
-              "nameParts": ["z"]
+              "nameParts": ["z_3"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin
similarity index 59%
rename from connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin
index f14eb1a3c93d3..2770b083e32ef 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_map_zip_with.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mask_with_specific_upperChar_lowerChar_digitChar_otherChar.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_max.json b/sql/connect/common/src/test/resources/query-tests/queries/function_max.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_max.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_max.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_max.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_max.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_max_by.json b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_max_by.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_max_by.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_max_by.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_md5.json b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_md5.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_md5.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_md5.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_median.json b/sql/connect/common/src/test/resources/query-tests/queries/function_median.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_median.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_median.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_median.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_median.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_min.json b/sql/connect/common/src/test/resources/query-tests/queries/function_min.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_min.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_min.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_min.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_min.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_min_by.json b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_min_by.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_min_by.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_min_by.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_minute.json b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_minute.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_minute.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_minute.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json
new file mode 100644
index 0000000000000..8e8183e9e0883
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "mode",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin
new file mode 100644
index 0000000000000..dca0953a387b1
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_mode_orphaned.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mode_orphaned.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_mode_orphaned.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_mode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_mode_orphaned.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_monotonically_increasing_id.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_month.json b/sql/connect/common/src/test/resources/query-tests/queries/function_month.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_month.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_month.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_month.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_month.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_monthname.json b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_monthname.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_monthname.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_monthname.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months_between.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months_between.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months_between.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months_between.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_months_between_with_roundoff.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_named_struct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_named_struct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_named_struct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nanvl.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nanvl.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nanvl.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_negate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_negate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_negate.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_negate.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_negative.json b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_negative.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_negative.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_negative.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_next_day.json b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_next_day.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_next_day.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_next_day.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_now.json b/sql/connect/common/src/test/resources/query-tests/queries/function_now.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_now.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_now.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_now.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_now.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nth_value.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nth_value.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nth_value.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ntile.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ntile.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ntile.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ntile.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nullif.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nullif.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nullif.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nullif.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nvl.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nvl.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nvl.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nvl.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nvl2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nvl2.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_nvl2.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_octet_length.json b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_octet_length.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_octet_length.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_overlay.json b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_overlay.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_overlay.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_overlay.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_overlay_with_len.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_json.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_url.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_url.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_url.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_parse_url_with_key.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percent_rank.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_approx.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_with_frequency.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_percentile_without_frequency.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pi.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pi.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pi.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pi.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pmod.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pmod.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pmod.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pmod.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_posexplode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_posexplode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_posexplode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_posexplode_outer.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_position.json b/sql/connect/common/src/test/resources/query-tests/queries/function_position.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_position.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_position.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_position.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_position_with_start.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_positive.json b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_positive.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_positive.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_positive.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pow.json b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pow.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pow.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_pow.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_power.json b/sql/connect/common/src/test/resources/query-tests/queries/function_power.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_power.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_power.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_power.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_power.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_printf.json b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_printf.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_printf.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_printf.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_product.json b/sql/connect/common/src/test/resources/query-tests/queries/function_product.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_product.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_product.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_product.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_product.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_quarter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_quarter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_quarter.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_quarter.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_radians.json b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_radians.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_radians.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_radians.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_raise_error.json b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_raise_error.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_raise_error.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rand_with_seed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_randn_with_seed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_random_with_seed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rank.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rank.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rank.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rank.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reduce.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
similarity index 85%
rename from connect/common/src/test/resources/query-tests/queries/function_reduce.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
index b35437bd980cd..4928145bda572 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_reduce.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.json
@@ -29,30 +29,30 @@
                 "functionName": "+",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 }]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }, {
           "lambdaFunction": {
             "function": {
               "unresolvedNamedLambdaVariable": {
-                "nameParts": ["x"]
+                "nameParts": ["x_3"]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_3"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin
similarity index 61%
rename from connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin
index 0cc2cd204d839..2532c111e3874 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_reduce.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reflect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_reflect.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reflect.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reflect.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_with_regex_group_index.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_extract_all_without_regex_group_index.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_with_regex_group_index.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_instr_without_regex_group_index.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_like.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_replace.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regexp_substr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgx.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_avgy.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_count.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_intercept.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_r2.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_slope.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxx.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_sxy.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_regr_syy.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_replace.json b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_replace.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_replace.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_replace.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_replace_with_specified_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reverse.json b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_reverse.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reverse.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_reverse.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_right.json b/sql/connect/common/src/test/resources/query-tests/queries/function_right.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_right.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_right.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_right.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_right.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rint.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rint.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rint.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rint.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rlike.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rlike.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rlike.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rlike.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_round.json b/sql/connect/common/src/test/resources/query-tests/queries/function_round.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_round.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_round.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_round.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_round.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_row_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_row_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_row_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_row_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rpad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rpad.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rpad.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rpad.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rpad_binary.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rtrim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rtrim.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rtrim.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_rtrim_with_pattern.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_csv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_json_with_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_schema_of_variant_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sec.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sec.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sec.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sec.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_second.json b/sql/connect/common/src/test/resources/query-tests/queries/function_second.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_second.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_second.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_second.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_second.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sentences.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sentences.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sentences.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sentences.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json
new file mode 100644
index 0000000000000..869e074ccd604
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "sentences",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "literal": {
+            "string": "en"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin
new file mode 100644
index 0000000000000..7514b380a1c82
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sentences_with_locale.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sentences_with_language_and_country.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sequence.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sequence.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sequence.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sequence.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_session_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_session_user.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_session_user.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_session_user.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json
new file mode 100644
index 0000000000000..92995656bd265
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cd:date,t:timestamp,s:string,x:bigint,wt:struct\u003cstart:timestamp,end:timestamp\u003e\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "session_window",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "t"
+          }
+        }, {
+          "literal": {
+            "string": "10 minutes"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin
new file mode 100644
index 0000000000000..364ecdf2aaa28
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_session_window.json b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window_orphaned.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_session_window.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_session_window_orphaned.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_session_window_orphaned.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_session_window.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_session_window_orphaned.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha1.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha1.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha1.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha1.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha2.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha2.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha2.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sha2.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftleft.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftright.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftright.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftright.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_shiftrightunsigned.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sign.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sign.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sign.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sign.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_signum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_signum.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_signum.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_signum.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sin.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sin.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sin.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sin.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sinh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sinh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sinh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sinh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_size.json b/sql/connect/common/src/test/resources/query-tests/queries/function_size.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_size.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_size.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_size.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_size.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_skewness.json b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_skewness.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_skewness.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_skewness.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_slice.json b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_slice.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_slice.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_slice.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_some.json b/sql/connect/common/src/test/resources/query-tests/queries/function_some.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_some.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_some.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_some.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_some.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sort_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sort_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sort_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_spark_partition_id.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_part.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_part.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_part.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_part.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_using_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_split_with_limit_using_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sqrt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sqrt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sqrt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stack.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stack.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stack.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stack.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_startswith.json b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_startswith.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_startswith.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_startswith.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_std.json b/sql/connect/common/src/test/resources/query-tests/queries/function_std.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_std.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_std.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_std.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_std.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev_pop.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_stddev_samp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_and_keyValue_delimiter.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_str_to_map_with_pair_delimiter.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_struct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_struct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_struct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_struct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substr.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substr_with_len.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring_index.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_substring_using_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sum.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sum.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sum.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_sum_distinct.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_tan.json b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_tan.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_tan.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_tan.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_tanh.json b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_tanh.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_tanh.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_tanh.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_add.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_diff.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_micros.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_millis.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_timestamp_seconds.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_binary.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_binary.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_binary_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_char.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_char.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_char.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_char.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_csv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_csv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_date.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_date.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_date.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_date_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_json.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ltz_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_ntz_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_timestamp_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_unix_timestamp_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_utc_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_to_varchar.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
similarity index 93%
rename from connect/common/src/test/resources/query-tests/queries/function_transform.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
index 2b357a3577318..3ad6fe9435644 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_transform.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.json
@@ -25,7 +25,7 @@
                 "functionName": "+",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "literal": {
@@ -35,7 +35,7 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin
similarity index 69%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin
index 86f29399b9560..266b093f7a99b 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
similarity index 89%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
index 0b6a6c24504b6..86349f460adaa 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.json
@@ -25,13 +25,13 @@
                 "functionName": "concat",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedExtractValue": {
                     "child": {
                       "unresolvedNamedLambdaVariable": {
-                        "nameParts": ["y"]
+                        "nameParts": ["y_2"]
                       }
                     },
                     "extraction": {
@@ -44,9 +44,9 @@
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin
similarity index 62%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin
index 338aa87e01832..827b6f273ceea 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_keys.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_values.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
similarity index 87%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
index 71911ab5ed99b..02aeca229ce5d 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.json
@@ -24,21 +24,21 @@
               "updateFields": {
                 "structExpression": {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 },
                 "fieldName": "key",
                 "valueExpression": {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin
new file mode 100644
index 0000000000000..b4a653ff77a5d
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
similarity index 87%
rename from connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
index 1b296e891bca9..df5e15b44fdd3 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.json
@@ -25,19 +25,19 @@
                 "functionName": "+",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 }]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin
similarity index 61%
rename from connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin
index 2938c84f77116..e502c18dcd9e8 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_map_from_entries.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_transform_with_index.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_translate.json b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_translate.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_translate.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_translate.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trim.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trim.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trim.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trim.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trim_with_pattern.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trunc.json b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trunc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trunc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_trunc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_add.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_add.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_add.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_add.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_aes_decrypt_with_mode_padding_aad.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_avg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_avg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_divide.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_divide.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_divide.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_element_at_map.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json
new file mode 100644
index 0000000000000..a7a2348496040
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.json
@@ -0,0 +1,25 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin
new file mode 100644
index 0000000000000..d459b6e8ec677
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json
new file mode 100644
index 0000000000000..14aaa41ee2cb5
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin
new file mode 100644
index 0000000000000..5123b995417ba
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json
new file mode 100644
index 0000000000000..a6ac2f27e3dc5
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin
new file mode 100644
index 0000000000000..cecfca97f7e20
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json
new file mode 100644
index 0000000000000..c9d4f1d4d2f1f
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.json
@@ -0,0 +1,37 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin
similarity index 77%
rename from connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin
index 44b83a9b98c53..423172405c397 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_transform.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json
new file mode 100644
index 0000000000000..7f2a42f01db45
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.json
@@ -0,0 +1,41 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin
similarity index 75%
rename from connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin
index c1e2363f0fdab..71259b402aa51 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_array_sort_with_comparator.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json
new file mode 100644
index 0000000000000..35ab05a90b3cd
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin
new file mode 100644
index 0000000000000..f8cf29d15aabf
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json
new file mode 100644
index 0000000000000..2f9c1d019359b
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.json
@@ -0,0 +1,49 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_interval",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin
new file mode 100644
index 0000000000000..d7343a059b53d
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_interval_years_months_weeks_days_hours_mins_secs.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json
new file mode 100644
index 0000000000000..179f6e06988fc
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.json
@@ -0,0 +1,49 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_timestamp_ltz",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin
new file mode 100644
index 0000000000000..d0c60ba1c7bf8
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json
new file mode 100644
index 0000000000000..29aa2096c2273
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_timestamp_ltz",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin
new file mode 100644
index 0000000000000..9caf6f6ba5285
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ltz_without_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json
new file mode 100644
index 0000000000000..6b8d31d0c58e5
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_timestamp_ntz",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin
new file mode 100644
index 0000000000000..7d7e2a8029def
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_ntz.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json
new file mode 100644
index 0000000000000..79e11efc20d41
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.json
@@ -0,0 +1,49 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin
new file mode 100644
index 0000000000000..53b9839cf8c1f
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_with_timezone.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json
new file mode 100644
index 0000000000000..39ce728a38862
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.json
@@ -0,0 +1,45 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_make_timestamp",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin
new file mode 100644
index 0000000000000..74918d42f89c6
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_make_timestamp_without_timezone.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_multiply.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_json.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json
new file mode 100644
index 0000000000000..b9603d5af2634
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.json
@@ -0,0 +1,29 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_parse_url",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin
new file mode 100644
index 0000000000000..696c4ddde519c
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json
new file mode 100644
index 0000000000000..137ed4bd9bc80
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.json
@@ -0,0 +1,33 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "project": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e"
+      }
+    },
+    "expressions": [{
+      "unresolvedFunction": {
+        "functionName": "try_parse_url",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }, {
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "g"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin
new file mode 100644
index 0000000000000..f4a13872e3c8f
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/function_try_parse_url_with_key.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_subtract.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_sum.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_sum.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_binary_without_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_to_timestamp_without_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_url_decode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_try_variant_get.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_typedLit.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_typedLit.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_typedLit.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_typeof.json b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_typeof.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_typeof.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_typeof.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ucase.json b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ucase.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ucase.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_ucase.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unbase64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unbase64.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unbase64.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unhex.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unhex.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unhex.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unhex.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_date.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_date.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_date.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_micros.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_millis.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_seconds.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_unix_timestamp_with_format.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_upper.json b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_upper.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_upper.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_upper.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_url_decode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_url_decode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_url_decode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_url_encode.json b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_url_encode.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_url_encode.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_user.json b/sql/connect/common/src/test/resources/query-tests/queries/function_user.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_user.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_user.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_user.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_user.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_var_pop.json b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_var_pop.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_var_pop.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_var_samp.json b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_var_samp.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_var_samp.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_variance.json b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_variance.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_variance.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_variance.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_variant_get.json b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_variant_get.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_variant_get.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_weekday.json b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_weekday.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_weekday.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_weekday.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_weekofyear.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_window.json b/sql/connect/common/src/test/resources/query-tests/queries/function_window.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_window.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_window.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_window.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_window.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_window_time.json b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_window_time.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_window_time.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_window_time.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_boolean.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_double.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_float.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_int.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_long.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_number.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_short.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xpath_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_xxhash64.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_year.json b/sql/connect/common/src/test/resources/query-tests/queries/function_year.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_year.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_year.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_year.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_year.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_years.json b/sql/connect/common/src/test/resources/query-tests/queries/function_years.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_years.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_years.json
diff --git a/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_years.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/function_zip_with.json b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
similarity index 88%
rename from connect/common/src/test/resources/query-tests/queries/function_zip_with.json
rename to sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
index d1d0e7293c8ff..660ca1931137e 100644
--- a/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.json
@@ -29,19 +29,19 @@
                 "functionName": "+",
                 "arguments": [{
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["x"]
+                    "nameParts": ["x_1"]
                   }
                 }, {
                   "unresolvedNamedLambdaVariable": {
-                    "nameParts": ["y"]
+                    "nameParts": ["y_2"]
                   }
                 }]
               }
             },
             "arguments": [{
-              "nameParts": ["x"]
+              "nameParts": ["x_1"]
             }, {
-              "nameParts": ["y"]
+              "nameParts": ["y_2"]
             }]
           }
         }]
diff --git a/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin
similarity index 67%
rename from connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin
index c9a6dff84736b..edbfe197af4dc 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/function_zip_with.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
new file mode 100644
index 0000000000000..b7b4c98518e6b
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.json
@@ -0,0 +1,101 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "aggregate": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "groupType": "GROUP_TYPE_GROUPBY",
+    "groupingExpressions": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }],
+    "aggregateExpressions": [{
+      "unresolvedFunction": {
+        "functionName": "max",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "stddev",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "stddev",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "avg",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "b",
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedStar": {
+            "planId": "0"
+          }
+        }]
+      }
+    }, {
+      "unresolvedFunction": {
+        "functionName": "count",
+        "arguments": [{
+          "unresolvedAttribute": {
+            "unparsedIdentifier": "a",
+            "planId": "0"
+          }
+        }]
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin
new file mode 100644
index 0000000000000..d7b1b94ed04a2
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_orphaned.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_orphaned.json
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_orphaned.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_orphaned.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
similarity index 88%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
index 26320d404835f..285c13f4bc8b3 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.json
@@ -14,11 +14,13 @@
     "groupType": "GROUP_TYPE_GROUPBY",
     "groupingExpressions": [{
       "unresolvedAttribute": {
-        "unparsedIdentifier": "id"
+        "unparsedIdentifier": "id",
+        "planId": "0"
       }
     }, {
       "unresolvedAttribute": {
-        "unparsedIdentifier": "b"
+        "unparsedIdentifier": "b",
+        "planId": "0"
       }
     }],
     "aggregateExpressions": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin
new file mode 100644
index 0000000000000..674d506fa4a07
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_avg.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
similarity index 84%
rename from connect/common/src/test/resources/query-tests/queries/groupby_avg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
index 5785eee2cadb5..0ded46cf6cc7c 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.json
@@ -22,7 +22,8 @@
         "functionName": "avg",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
+            "unparsedIdentifier": "a",
+            "planId": "0"
           }
         }]
       }
@@ -31,7 +32,8 @@
         "functionName": "avg",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "b"
+            "unparsedIdentifier": "b",
+            "planId": "0"
           }
         }]
       }
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin
similarity index 52%
rename from connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin
index 4a18ea2d82d93..444b0c3853f16 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_avg.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_count.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_count.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_count.json
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_count.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_max.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
similarity index 84%
rename from connect/common/src/test/resources/query-tests/queries/groupby_max.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
index 3225a475a9b35..ed186ff713519 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_max.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.json
@@ -22,7 +22,8 @@
         "functionName": "max",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
+            "unparsedIdentifier": "a",
+            "planId": "0"
           }
         }]
       }
@@ -31,7 +32,8 @@
         "functionName": "max",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "b"
+            "unparsedIdentifier": "b",
+            "planId": "0"
           }
         }]
       }
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin
similarity index 56%
rename from connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin
index 818146f7f6935..11cd163e91738 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_agg_string.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_max.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_mean.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
similarity index 84%
rename from connect/common/src/test/resources/query-tests/queries/groupby_mean.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
index 5785eee2cadb5..0ded46cf6cc7c 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.json
@@ -22,7 +22,8 @@
         "functionName": "avg",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
+            "unparsedIdentifier": "a",
+            "planId": "0"
           }
         }]
       }
@@ -31,7 +32,8 @@
         "functionName": "avg",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "b"
+            "unparsedIdentifier": "b",
+            "planId": "0"
           }
         }]
       }
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin
similarity index 52%
rename from connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin
index 4a18ea2d82d93..444b0c3853f16 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_mean.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_min.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
similarity index 84%
rename from connect/common/src/test/resources/query-tests/queries/groupby_min.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
index afcc07d2c869c..8c0ad283cb0a4 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_min.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.json
@@ -22,7 +22,8 @@
         "functionName": "min",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
+            "unparsedIdentifier": "a",
+            "planId": "0"
           }
         }]
       }
@@ -31,7 +32,8 @@
         "functionName": "min",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "b"
+            "unparsedIdentifier": "b",
+            "planId": "0"
           }
         }]
       }
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin
similarity index 52%
rename from connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin
index 6e038bf0b315c..2bc985a1fe9f3 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_min.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_sum.json b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
similarity index 84%
rename from connect/common/src/test/resources/query-tests/queries/groupby_sum.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
index 74dd5b045aa57..788b964491c6a 100644
--- a/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.json
@@ -22,7 +22,8 @@
         "functionName": "sum",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "a"
+            "unparsedIdentifier": "a",
+            "planId": "0"
           }
         }]
       }
@@ -31,7 +32,8 @@
         "functionName": "sum",
         "arguments": [{
           "unresolvedAttribute": {
-            "unparsedIdentifier": "b"
+            "unparsedIdentifier": "b",
+            "planId": "0"
           }
         }]
       }
diff --git a/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin
similarity index 52%
rename from connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin
index fe2451ca18fbd..e92041399cbca 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/groupby_sum.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/groupingSets.json b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupingSets.json
rename to sql/connect/common/src/test/resources/query-tests/queries/groupingSets.json
diff --git a/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/groupingSets.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
similarity index 90%
rename from connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
rename to sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
index 07bbd315a5fe9..8ff81d95d2988 100644
--- a/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.json
@@ -14,11 +14,13 @@
     "groupType": "GROUP_TYPE_CUBE",
     "groupingExpressions": [{
       "unresolvedAttribute": {
-        "unparsedIdentifier": "a"
+        "unparsedIdentifier": "a",
+        "planId": "0"
       }
     }, {
       "unresolvedAttribute": {
-        "unparsedIdentifier": "b"
+        "unparsedIdentifier": "b",
+        "planId": "0"
       }
     }],
     "aggregateExpressions": [{
diff --git a/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin
similarity index 50%
rename from connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin
index 88b3f05931328..d1dded43ddf99 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/grouping_and_grouping_id.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/hint.json b/sql/connect/common/src/test/resources/query-tests/queries/hint.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hint.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hint.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hint.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hint.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hint.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hint.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_columnName_lgConfigK_int.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_sketch_agg_with_column_lgConfigK_int.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_columnName_allowDifferentLgConfigK_boolean.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.json
diff --git a/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/hll_union_agg_with_column_allowDifferentLgConfigK_boolean.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/intersect.json b/sql/connect/common/src/test/resources/query-tests/queries/intersect.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/intersect.json
rename to sql/connect/common/src/test/resources/query-tests/queries/intersect.json
diff --git a/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/intersect.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/intersect.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/intersectAll.json b/sql/connect/common/src/test/resources/query-tests/queries/intersectAll.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/intersectAll.json
rename to sql/connect/common/src/test/resources/query-tests/queries/intersectAll.json
diff --git a/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/intersectAll.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_condition.json b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_condition.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_condition.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_condition.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_condition.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_no_condition.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_multiple_col_seq.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_inner_using_single_col.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json b/sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_array.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json b/sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_multiple_col_seq.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json b/sql/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_single_col.json
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_single_col.json
diff --git a/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/join_using_single_col.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json b/sql/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
rename to sql/connect/common/src/test/resources/query-tests/queries/json_from_dataset.json
diff --git a/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/json_from_dataset.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/limit.json b/sql/connect/common/src/test/resources/query-tests/queries/limit.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/limit.json
rename to sql/connect/common/src/test/resources/query-tests/queries/limit.json
diff --git a/connect/common/src/test/resources/query-tests/queries/limit.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/limit.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/limit.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/limit.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/melt_no_values.json b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
similarity index 92%
rename from connect/common/src/test/resources/query-tests/queries/melt_no_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
index 12db0a5abe368..a17da06b925b9 100644
--- a/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.json
@@ -20,6 +20,7 @@
         "unparsedIdentifier": "a"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
similarity index 71%
rename from connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin
index 23a6aa1289a99..eebb7ad6df8e2 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/melt_no_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/melt_values.json b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
similarity index 93%
rename from connect/common/src/test/resources/query-tests/queries/melt_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
index e2a004f46e781..a8142ee3a8461 100644
--- a/connect/common/src/test/resources/query-tests/queries/melt_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.json
@@ -23,6 +23,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
similarity index 70%
rename from connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin
index e021e1110def5..35829fc62dae9 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/melt_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/offset.json b/sql/connect/common/src/test/resources/query-tests/queries/offset.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/offset.json
rename to sql/connect/common/src/test/resources/query-tests/queries/offset.json
diff --git a/connect/common/src/test/resources/query-tests/queries/offset.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/offset.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/offset.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/offset.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/orderBy_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/orderBy_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/orderBy_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json b/sql/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/orderBy_strings.json
rename to sql/connect/common/src/test/resources/query-tests/queries/orderBy_strings.json
diff --git a/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/orderBy_strings.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/pivot.json b/sql/connect/common/src/test/resources/query-tests/queries/pivot.json
similarity index 92%
rename from connect/common/src/test/resources/query-tests/queries/pivot.json
rename to sql/connect/common/src/test/resources/query-tests/queries/pivot.json
index 30bff04c531db..2af86606b9fcb 100644
--- a/connect/common/src/test/resources/query-tests/queries/pivot.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/pivot.json
@@ -30,7 +30,8 @@
     "pivot": {
       "col": {
         "unresolvedAttribute": {
-          "unparsedIdentifier": "a"
+          "unparsedIdentifier": "a",
+          "planId": "0"
         }
       },
       "values": [{
diff --git a/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin
similarity index 68%
rename from connect/common/src/test/resources/query-tests/queries/pivot.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin
index 67063209a184c..f545179e84968 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/pivot.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
similarity index 91%
rename from connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
index 5218a88988ea3..aa043613795c4 100644
--- a/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.json
@@ -30,7 +30,8 @@
     "pivot": {
       "col": {
         "unresolvedAttribute": {
-          "unparsedIdentifier": "a"
+          "unparsedIdentifier": "a",
+          "planId": "0"
         }
       }
     }
diff --git a/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin
similarity index 78%
rename from connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin
index aee3c980eaee4..588b56f247e07 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/pivot_without_column_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/range.json b/sql/connect/common/src/test/resources/query-tests/queries/range.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/range.json
rename to sql/connect/common/src/test/resources/query-tests/queries/range.json
diff --git a/connect/common/src/test/resources/query-tests/queries/range.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/range.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/range.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/range.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read.json b/sql/connect/common/src/test/resources/query-tests/queries/read.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_csv.json b/sql/connect/common/src/test/resources/query-tests/queries/read_csv.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_csv.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_csv.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_csv.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc.json b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_partition.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_jdbc_with_predicates.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_json.json b/sql/connect/common/src/test/resources/query-tests/queries/read_json.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_json.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_json.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_json.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_json.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_orc.json b/sql/connect/common/src/test/resources/query-tests/queries/read_orc.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_orc.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_orc.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_orc.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_parquet.json b/sql/connect/common/src/test/resources/query-tests/queries/read_parquet.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_parquet.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_parquet.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_parquet.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_table.json b/sql/connect/common/src/test/resources/query-tests/queries/read_table.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_table.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_table.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_table.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_table.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/read_text.json b/sql/connect/common/src/test/resources/query-tests/queries/read_text.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_text.json
rename to sql/connect/common/src/test/resources/query-tests/queries/read_text.json
diff --git a/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/read_text.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/read_text.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/relation_extension.json b/sql/connect/common/src/test/resources/query-tests/queries/relation_extension.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/relation_extension.json
rename to sql/connect/common/src/test/resources/query-tests/queries/relation_extension.json
diff --git a/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/relation_extension.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.json b/sql/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.json
rename to sql/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.json
diff --git a/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/relation_extension_deprecated.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition.json b/sql/connect/common/src/test/resources/query-tests/queries/repartition.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition.json
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition.json
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json b/sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json
rename to sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.json
diff --git a/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_expressions.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json b/sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json
rename to sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.json
diff --git a/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/repartitionByRange_num_partitions_expressions.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json b/sql/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition_expressions.json
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition_expressions.json
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition_expressions.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json b/sql/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.json
diff --git a/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/repartition_num_partitions_expressions.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/replace.json b/sql/connect/common/src/test/resources/query-tests/queries/replace.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/replace.json
rename to sql/connect/common/src/test/resources/query-tests/queries/replace.json
diff --git a/connect/common/src/test/resources/query-tests/queries/replace.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/replace.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/replace.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/replace.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/rollup_column.json b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/rollup_column.json
rename to sql/connect/common/src/test/resources/query-tests/queries/rollup_column.json
diff --git a/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/rollup_column.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/rollup_string.json b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
similarity index 86%
rename from connect/common/src/test/resources/query-tests/queries/rollup_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
index 1102db18830bd..5082051031f81 100644
--- a/connect/common/src/test/resources/query-tests/queries/rollup_string.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.json
@@ -14,11 +14,13 @@
     "groupType": "GROUP_TYPE_ROLLUP",
     "groupingExpressions": [{
       "unresolvedAttribute": {
-        "unparsedIdentifier": "a"
+        "unparsedIdentifier": "a",
+        "planId": "0"
       }
     }, {
       "unresolvedAttribute": {
-        "unparsedIdentifier": "b"
+        "unparsedIdentifier": "b",
+        "planId": "0"
       }
     }],
     "aggregateExpressions": [{
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin
new file mode 100644
index 0000000000000..63fdead641dad
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/rollup_string.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/sampleBy.json b/sql/connect/common/src/test/resources/query-tests/queries/sampleBy.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sampleBy.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sampleBy.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sampleBy.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sample_fraction_seed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json b/sql/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sample_withReplacement_fraction_seed.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select.json b/sql/connect/common/src/test/resources/query-tests/queries/select.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/selectExpr.json b/sql/connect/common/src/test/resources/query-tests/queries/selectExpr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/selectExpr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/selectExpr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/selectExpr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_collated_string.json b/sql/connect/common/src/test/resources/query-tests/queries/select_collated_string.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_collated_string.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_collated_string.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_collated_string.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_strings.json b/sql/connect/common/src/test/resources/query-tests/queries/select_strings.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_strings.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_strings.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_strings.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_1-arg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_2-arg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_3-arg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_4-arg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.json
diff --git a/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/select_typed_5-arg.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json b/sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sortWithinPartitions_strings.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sort_columns.json b/sql/connect/common/src/test/resources/query-tests/queries/sort_columns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sort_columns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sort_columns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sort_columns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/sort_strings.json b/sql/connect/common/src/test/resources/query-tests/queries/sort_strings.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sort_strings.json
rename to sql/connect/common/src/test/resources/query-tests/queries/sort_strings.json
diff --git a/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/sort_strings.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/streaming_table_API_with_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/summary.json b/sql/connect/common/src/test/resources/query-tests/queries/summary.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/summary.json
rename to sql/connect/common/src/test/resources/query-tests/queries/summary.json
diff --git a/connect/common/src/test/resources/query-tests/queries/summary.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/summary.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/summary.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/summary.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/table.json b/sql/connect/common/src/test/resources/query-tests/queries/table.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/table.json
rename to sql/connect/common/src/test/resources/query-tests/queries/table.json
diff --git a/connect/common/src/test/resources/query-tests/queries/table.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/table.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/table.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/table.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json b/sql/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/table_API_with_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/table_API_with_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/test_broadcast.json b/sql/connect/common/src/test/resources/query-tests/queries/test_broadcast.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/test_broadcast.json
rename to sql/connect/common/src/test/resources/query-tests/queries/test_broadcast.json
diff --git a/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/test_broadcast.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to.json b/sql/connect/common/src/test/resources/query-tests/queries/to.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/toDF.json b/sql/connect/common/src/test/resources/query-tests/queries/toDF.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/toDF.json
rename to sql/connect/common/src/test/resources/query-tests/queries/toDF.json
diff --git a/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/toDF.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/toDF.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/toJSON.json b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/toJSON.json
rename to sql/connect/common/src/test/resources/query-tests/queries/toJSON.json
diff --git a/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/toJSON.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_avro_with_schema.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_avro_without_schema.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_descFilePath_options.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.json
diff --git a/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/to_protobuf_messageClassName_options.proto.bin
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.json b/sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.json
new file mode 100644
index 0000000000000..19a2086c8d7df
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.json
@@ -0,0 +1,20 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "transpose": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    },
+    "indexColumns": [{
+      "unresolvedAttribute": {
+        "unparsedIdentifier": "id"
+      }
+    }]
+  }
+}
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.proto.bin
similarity index 73%
rename from connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.proto.bin
index ac3bad8bd04ed..8590932d95cb4 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/transpose_index_column.proto.bin differ
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.json b/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.json
new file mode 100644
index 0000000000000..82b0448c373e1
--- /dev/null
+++ b/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.json
@@ -0,0 +1,15 @@
+{
+  "common": {
+    "planId": "1"
+  },
+  "transpose": {
+    "input": {
+      "common": {
+        "planId": "0"
+      },
+      "localRelation": {
+        "schema": "struct\u003cid:bigint,a:int,b:double\u003e"
+      }
+    }
+  }
+}
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.proto.bin
new file mode 100644
index 0000000000000..c1ea985a64a4b
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/transpose_no_index_column.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/union.json b/sql/connect/common/src/test/resources/query-tests/queries/union.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/union.json
rename to sql/connect/common/src/test/resources/query-tests/queries/union.json
diff --git a/connect/common/src/test/resources/query-tests/queries/union.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/union.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/union.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/union.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/unionAll.json b/sql/connect/common/src/test/resources/query-tests/queries/unionAll.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionAll.json
rename to sql/connect/common/src/test/resources/query-tests/queries/unionAll.json
diff --git a/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/unionAll.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/unionByName.json b/sql/connect/common/src/test/resources/query-tests/queries/unionByName.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionByName.json
rename to sql/connect/common/src/test/resources/query-tests/queries/unionByName.json
diff --git a/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/unionByName.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json b/sql/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json
rename to sql/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.json
diff --git a/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/unionByName_allowMissingColumns.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
similarity index 91%
rename from connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
index 9f550c0319147..96b76443b6790 100644
--- a/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.json
@@ -16,6 +16,7 @@
         "unparsedIdentifier": "id"
       }
     }],
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin
new file mode 100644
index 0000000000000..b700190a9f667
Binary files /dev/null and b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_no_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/unpivot_values.json b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
similarity index 94%
rename from connect/common/src/test/resources/query-tests/queries/unpivot_values.json
rename to sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
index 92bc19d195c6e..6c31afb04e741 100644
--- a/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
+++ b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.json
@@ -27,6 +27,7 @@
         }
       }]
     },
+    "variableColumnName": "name",
     "valueColumnName": "value"
   }
 }
\ No newline at end of file
diff --git a/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
similarity index 74%
rename from connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin
index 7f717cb23517b..a1cd388fd8a46 100644
Binary files a/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin and b/sql/connect/common/src/test/resources/query-tests/queries/unpivot_values.proto.bin differ
diff --git a/connect/common/src/test/resources/query-tests/queries/where_column.json b/sql/connect/common/src/test/resources/query-tests/queries/where_column.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/where_column.json
rename to sql/connect/common/src/test/resources/query-tests/queries/where_column.json
diff --git a/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/where_column.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/where_column.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/where_expr.json b/sql/connect/common/src/test/resources/query-tests/queries/where_expr.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/where_expr.json
rename to sql/connect/common/src/test/resources/query-tests/queries/where_expr.json
diff --git a/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/where_expr.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/width_bucket.json b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/width_bucket.json
rename to sql/connect/common/src/test/resources/query-tests/queries/width_bucket.json
diff --git a/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/width_bucket.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/window.json b/sql/connect/common/src/test/resources/query-tests/queries/window.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/window.json
rename to sql/connect/common/src/test/resources/query-tests/queries/window.json
diff --git a/connect/common/src/test/resources/query-tests/queries/window.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/window.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/window.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_java_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_scala_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumnRenamed_single.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumn_single.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumn_single.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumn_single.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumn_single.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumn_single.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumns_java_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json b/sql/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withColumns_scala_map.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/queries/withMetadata.json b/sql/connect/common/src/test/resources/query-tests/queries/withMetadata.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withMetadata.json
rename to sql/connect/common/src/test/resources/query-tests/queries/withMetadata.json
diff --git a/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin b/sql/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin
similarity index 100%
rename from connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin
rename to sql/connect/common/src/test/resources/query-tests/queries/withMetadata.proto.bin
diff --git a/connect/common/src/test/resources/query-tests/test-data/people.csv b/sql/connect/common/src/test/resources/query-tests/test-data/people.csv
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/people.csv
rename to sql/connect/common/src/test/resources/query-tests/test-data/people.csv
diff --git a/connect/common/src/test/resources/query-tests/test-data/people.json b/sql/connect/common/src/test/resources/query-tests/test-data/people.json
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/people.json
rename to sql/connect/common/src/test/resources/query-tests/test-data/people.json
diff --git a/connect/common/src/test/resources/query-tests/test-data/people.txt b/sql/connect/common/src/test/resources/query-tests/test-data/people.txt
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/people.txt
rename to sql/connect/common/src/test/resources/query-tests/test-data/people.txt
diff --git a/connect/common/src/test/resources/query-tests/test-data/streaming/csv/people.csv b/sql/connect/common/src/test/resources/query-tests/test-data/streaming/csv/people.csv
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/streaming/csv/people.csv
rename to sql/connect/common/src/test/resources/query-tests/test-data/streaming/csv/people.csv
diff --git a/connect/common/src/test/resources/query-tests/test-data/streaming/txt/people.txt b/sql/connect/common/src/test/resources/query-tests/test-data/streaming/txt/people.txt
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/streaming/txt/people.txt
rename to sql/connect/common/src/test/resources/query-tests/test-data/streaming/txt/people.txt
diff --git a/connect/common/src/test/resources/query-tests/test-data/users.orc b/sql/connect/common/src/test/resources/query-tests/test-data/users.orc
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/users.orc
rename to sql/connect/common/src/test/resources/query-tests/test-data/users.orc
diff --git a/connect/common/src/test/resources/query-tests/test-data/users.parquet b/sql/connect/common/src/test/resources/query-tests/test-data/users.parquet
similarity index 100%
rename from connect/common/src/test/resources/query-tests/test-data/users.parquet
rename to sql/connect/common/src/test/resources/query-tests/test-data/users.parquet
diff --git a/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala b/sql/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala
similarity index 100%
rename from connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala
rename to sql/connect/common/src/test/scala/org/apache/spark/sql/TestUDFs.scala
diff --git a/connect/server/README.md b/sql/connect/server/README.md
similarity index 100%
rename from connect/server/README.md
rename to sql/connect/server/README.md
diff --git a/connect/server/pom.xml b/sql/connect/server/pom.xml
similarity index 96%
rename from connect/server/pom.xml
rename to sql/connect/server/pom.xml
index ecbb22168aa1b..f2a7f1b1da9d9 100644
--- a/connect/server/pom.xml
+++ b/sql/connect/server/pom.xml
@@ -23,7 +23,7 @@
     <groupId>org.apache.spark</groupId>
     <artifactId>spark-parent_2.13</artifactId>
     <version>4.0.0-SNAPSHOT</version>
-    <relativePath>../../pom.xml</relativePath>
+    <relativePath>../../../pom.xml</relativePath>
   </parent>
 
   <artifactId>spark-connect_2.13</artifactId>
@@ -52,6 +52,10 @@
       <artifactId>spark-connect-common_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
       <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+        </exclusion>
         <exclusion>
           <groupId>com.google.guava</groupId>
           <artifactId>guava</artifactId>
@@ -105,13 +109,13 @@
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-avro_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-      <scope>provided</scope>
+      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-protobuf_${scala.binary.version}</artifactId>
       <version>${project.version}</version>
-      <scope>provided</scope>
+      <scope>test</scope>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
@@ -196,17 +200,6 @@
       <artifactId>grpc-protobuf</artifactId>
       <version>${io.grpc.version}</version>
     </dependency>
-    <dependency>
-      <groupId>io.grpc</groupId>
-      <artifactId>grpc-protobuf-lite</artifactId>
-      <version>${io.grpc.version}</version>
-      <exclusions>
-        <exclusion>
-          <groupId>com.google.protobuf</groupId>
-          <artifactId>protobuf-javalite</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
     <dependency>
       <groupId>io.grpc</groupId>
       <artifactId>grpc-services</artifactId>
@@ -251,10 +244,20 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.3.230</version>
+      <version>2.3.232</version>
       <scope>test</scope>
     </dependency>
     <dependency>
diff --git a/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/CommandPlugin.java b/sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/CommandPlugin.java
similarity index 100%
rename from connect/server/src/main/java/org/apache/spark/sql/connect/plugin/CommandPlugin.java
rename to sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/CommandPlugin.java
diff --git a/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/ExpressionPlugin.java b/sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/ExpressionPlugin.java
similarity index 100%
rename from connect/server/src/main/java/org/apache/spark/sql/connect/plugin/ExpressionPlugin.java
rename to sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/ExpressionPlugin.java
diff --git a/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/RelationPlugin.java b/sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/RelationPlugin.java
similarity index 100%
rename from connect/server/src/main/java/org/apache/spark/sql/connect/plugin/RelationPlugin.java
rename to sql/connect/server/src/main/java/org/apache/spark/sql/connect/plugin/RelationPlugin.java
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
similarity index 92%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
index 1b6bdd8cd9393..8061e913dc0da 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/SimpleSparkConnectService.scala
@@ -25,6 +25,7 @@ import scala.sys.exit
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connect.service.SparkConnectService
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * A simple main class method to start the spark connect server as a service for client tests
@@ -40,6 +41,8 @@ private[sql] object SimpleSparkConnectService {
   def main(args: Array[String]): Unit = {
     val conf = new SparkConf()
       .set("spark.plugins", "org.apache.spark.sql.connect.SparkConnectPlugin")
+      .set(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED, true)
+      .set(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER, true)
     val sparkSession = SparkSession.builder().config(conf).getOrCreate()
     val sparkContext = sparkSession.sparkContext // init spark context
     // scalastyle:off println
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/SparkConnectPlugin.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
similarity index 99%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
index dc45684a75ebd..b64637f7d2472 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/config/Connect.scala
@@ -63,7 +63,7 @@ object Connect {
           "conservatively use 70% of it because the size is not accurate but estimated.")
       .version("3.4.0")
       .bytesConf(ByteUnit.BYTE)
-      .createWithDefault(4 * 1024 * 1024)
+      .createWithDefault(ConnectCommon.CONNECT_GRPC_MAX_MESSAGE_SIZE)
 
   val CONNECT_GRPC_MAX_INBOUND_MESSAGE_SIZE =
     buildStaticConf("spark.connect.grpc.maxInboundMessageSize")
@@ -289,7 +289,7 @@ object Connect {
       .version("4.0.0")
       .internal()
       .intConf
-      .createWithDefault(5)
+      .createWithDefault(16)
 
   val CONNECT_SESSION_PLAN_CACHE_ENABLED =
     buildConf("spark.connect.session.planCache.enabled")
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/CachedStreamResponse.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/CachedStreamResponse.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/execution/CachedStreamResponse.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/CachedStreamResponse.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala
similarity index 98%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala
index a1881765a416c..72c77fd033d76 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListener.scala
@@ -144,7 +144,9 @@ private[connect] class ConnectProgressExecutionListener extends SparkListener wi
         tracker.stages.get(taskEnd.stageId).foreach { stage =>
           stage.update { i =>
             i.completedTasks += 1
-            i.inputBytesRead += taskEnd.taskMetrics.inputMetrics.bytesRead
+            i.inputBytesRead += Option(taskEnd.taskMetrics)
+              .map(_.inputMetrics.bytesRead)
+              .getOrElse(0L)
           }
         }
         // This should never become negative, simply reset to zero if it does.
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
similarity index 98%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
index 3e360372d5600..051093fcad277 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteGrpcResponseSender.scala
@@ -142,7 +142,9 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
    * client, but rather enqueued to in the response observer.
    */
   private def enqueueProgressMessage(force: Boolean = false): Unit = {
-    if (executeHolder.sessionHolder.session.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL) > 0) {
+    val progressReportInterval = executeHolder.sessionHolder.session.sessionState.conf
+      .getConf(CONNECT_PROGRESS_REPORT_INTERVAL)
+    if (progressReportInterval > 0) {
       SparkConnectService.executionListener.foreach { listener =>
         // It is possible, that the tracker is no longer available and in this
         // case we simply ignore it and do not send any progress message. This avoids
@@ -240,8 +242,8 @@ private[connect] class ExecuteGrpcResponseSender[T <: Message](
           // monitor, and will notify upon state change.
           if (response.isEmpty) {
             // Wake up more frequently to send the progress updates.
-            val progressTimeout =
-              executeHolder.sessionHolder.session.conf.get(CONNECT_PROGRESS_REPORT_INTERVAL)
+            val progressTimeout = executeHolder.sessionHolder.session.sessionState.conf
+              .getConf(CONNECT_PROGRESS_REPORT_INTERVAL)
             // If the progress feature is disabled, wait for the deadline.
             val timeout = if (progressTimeout > 0) {
               progressTimeout
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteResponseObserver.scala
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
new file mode 100644
index 0000000000000..d27f390a23f95
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/ExecuteThreadRunner.scala
@@ -0,0 +1,369 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.execution
+
+import java.util.concurrent.atomic.AtomicReference
+
+import scala.jdk.CollectionConverters._
+import scala.util.control.NonFatal
+
+import com.google.protobuf.Message
+import org.apache.commons.lang3.StringUtils
+
+import org.apache.spark.SparkSQLException
+import org.apache.spark.connect.proto
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.connect.common.ProtoUtils
+import org.apache.spark.sql.connect.planner.SparkConnectPlanner
+import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteSessionTag, SparkConnectService}
+import org.apache.spark.sql.connect.utils.ErrorUtils
+import org.apache.spark.util.Utils
+
+/**
+ * This class launches the actual execution in an execution thread. The execution pushes the
+ * responses to a ExecuteResponseObserver in executeHolder.
+ */
+private[connect] class ExecuteThreadRunner(executeHolder: ExecuteHolder) extends Logging {
+
+  /** The thread state. */
+  private val state: AtomicReference[ThreadStateInfo] = new AtomicReference(
+    ThreadState.notStarted)
+
+  // The newly created thread will inherit all InheritableThreadLocals used by Spark,
+  // e.g. SparkContext.localProperties. If considering implementing a thread-pool,
+  // forwarding of thread locals needs to be taken into account.
+  private val executionThread: ExecutionThread = new ExecutionThread()
+
+  /**
+   * Launches the execution in a background thread, returns immediately. This method is expected
+   * to be invoked only once for an ExecuteHolder.
+   */
+  private[connect] def start(): Unit = {
+    val currentState = state.getAcquire()
+    if (currentState == ThreadState.notStarted) {
+      executionThread.start()
+    } else {
+      // This assertion does not hold if it is called more than once.
+      assert(currentState == ThreadState.interrupted)
+    }
+  }
+
+  /**
+   * Interrupts the execution thread if the execution has been interrupted by this method call.
+   *
+   * @return
+   *   true if the thread is running and interrupted.
+   */
+  private[connect] def interrupt(): Boolean = {
+    var currentState = state.getAcquire()
+    while (currentState == ThreadState.notStarted || currentState == ThreadState.started) {
+      val newState = if (currentState == ThreadState.notStarted) {
+        ThreadState.interrupted
+      } else {
+        ThreadState.startedInterrupted
+      }
+
+      val prevState = state.compareAndExchangeRelease(currentState, newState)
+      if (prevState == currentState) {
+        if (prevState == ThreadState.notStarted) {
+          // The execution thread has not been started, or will immediately return because the state
+          // transition happens at the beginning of executeInternal.
+          try {
+            ErrorUtils.handleError(
+              "execute",
+              executeHolder.responseObserver,
+              executeHolder.sessionHolder.userId,
+              executeHolder.sessionHolder.sessionId,
+              Some(executeHolder.eventsManager),
+              true)(new SparkSQLException("OPERATION_CANCELED", Map.empty))
+          } finally {
+            executeHolder.cleanup()
+          }
+        } else {
+          // Interrupt execution.
+          executionThread.interrupt()
+        }
+        return true
+      }
+      currentState = prevState
+    }
+
+    // Already interrupted, completed, or not started.
+    false
+  }
+
+  private def execute(): Unit = {
+    // Outer execute handles errors.
+    // Separate it from executeInternal to save on indent and improve readability.
+    try {
+      try {
+        executeInternal()
+      } catch {
+        // Need to catch throwable instead of NonFatal, because e.g. InterruptedException is fatal.
+        case e: Throwable =>
+          logDebug(log"Exception in execute: ${MDC(LogKeys.EXCEPTION, e)}")
+          // Always cancel all remaining execution after error.
+          executeHolder.sessionHolder.session.sparkContext.cancelJobsWithTag(
+            executeHolder.jobTag,
+            s"A job with the same tag ${executeHolder.jobTag} has failed.")
+          // Rethrow the original error.
+          throw e
+      } finally {
+        executeHolder.sessionHolder.session.sparkContext.removeJobTag(executeHolder.jobTag)
+        SparkConnectService.executionListener.foreach(_.removeJobTag(executeHolder.jobTag))
+        executeHolder.sparkSessionTags.foreach { tag =>
+          executeHolder.sessionHolder.session.sparkContext.removeJobTag(
+            ExecuteSessionTag(
+              executeHolder.sessionHolder.userId,
+              executeHolder.sessionHolder.sessionId,
+              tag))
+        }
+      }
+    } catch {
+      case e: Throwable if state.getAcquire() != ThreadState.startedInterrupted =>
+        ErrorUtils.handleError(
+          "execute",
+          executeHolder.responseObserver,
+          executeHolder.sessionHolder.userId,
+          executeHolder.sessionHolder.sessionId,
+          Some(executeHolder.eventsManager),
+          false)(e)
+    } finally {
+      // Make sure to transition to completed in order to prevent the thread from being interrupted
+      // afterwards.
+      var currentState = state.getAcquire()
+      while (currentState == ThreadState.started ||
+        currentState == ThreadState.startedInterrupted) {
+        val interrupted = currentState == ThreadState.startedInterrupted
+        val prevState = state.compareAndExchangeRelease(currentState, ThreadState.completed)
+        if (prevState == currentState) {
+          if (interrupted) {
+            try {
+              ErrorUtils.handleError(
+                "execute",
+                executeHolder.responseObserver,
+                executeHolder.sessionHolder.userId,
+                executeHolder.sessionHolder.sessionId,
+                Some(executeHolder.eventsManager),
+                true)(new SparkSQLException("OPERATION_CANCELED", Map.empty))
+            } finally {
+              executeHolder.cleanup()
+            }
+          }
+          return
+        }
+        currentState = prevState
+      }
+    }
+  }
+
+  // Inner executeInternal is wrapped by execute() for error handling.
+  private def executeInternal(): Unit = {
+    val prevState = state.compareAndExchangeRelease(ThreadState.notStarted, ThreadState.started)
+    if (prevState != ThreadState.notStarted) {
+      // Silently return, expecting that the caller would handle the interruption.
+      assert(prevState == ThreadState.interrupted)
+      return
+    }
+
+    // `withSession` ensures that session-specific artifacts (such as JARs and class files) are
+    // available during processing.
+    executeHolder.sessionHolder.withSession { session =>
+      val debugString = requestString(executeHolder.request)
+
+      // Set tag for query cancellation
+      session.sparkContext.addJobTag(executeHolder.jobTag)
+      // Register the job for progress reports.
+      SparkConnectService.executionListener.foreach(_.registerJobTag(executeHolder.jobTag))
+      // Also set all user defined tags as Spark Job tags.
+      executeHolder.sparkSessionTags.foreach { tag =>
+        session.sparkContext.addJobTag(
+          ExecuteSessionTag(
+            executeHolder.sessionHolder.userId,
+            executeHolder.sessionHolder.sessionId,
+            tag))
+      }
+      session.sparkContext.setJobDescription(
+        s"Spark Connect - ${StringUtils.abbreviate(debugString, 128)}")
+      session.sparkContext.setInterruptOnCancel(true)
+
+      // Add debug information to the query execution so that the jobs are traceable.
+      session.sparkContext.setLocalProperty(
+        "callSite.short",
+        s"Spark Connect - ${StringUtils.abbreviate(debugString, 128)}")
+      session.sparkContext.setLocalProperty(
+        "callSite.long",
+        StringUtils.abbreviate(debugString, 2048))
+
+      executeHolder.request.getPlan.getOpTypeCase match {
+        case proto.Plan.OpTypeCase.COMMAND => handleCommand(executeHolder.request)
+        case proto.Plan.OpTypeCase.ROOT => handlePlan(executeHolder.request)
+        case _ =>
+          throw new UnsupportedOperationException(
+            s"${executeHolder.request.getPlan.getOpTypeCase} not supported.")
+      }
+
+      val observedMetrics: Map[String, Seq[(Option[String], Any)]] = {
+        executeHolder.observations.map { case (name, observation) =>
+          val values = observation.getOrEmpty.map { case (key, value) =>
+            (Some(key), value)
+          }.toSeq
+          name -> values
+        }.toMap
+      }
+      val accumulatedInPython: Map[String, Seq[(Option[String], Any)]] = {
+        executeHolder.sessionHolder.pythonAccumulator.flatMap { accumulator =>
+          accumulator.synchronized {
+            val value = accumulator.value.asScala.toSeq
+            if (value.nonEmpty) {
+              accumulator.reset()
+              Some("__python_accumulator__" -> value.map(value => (None, value)))
+            } else {
+              None
+            }
+          }
+        }.toMap
+      }
+      if (observedMetrics.nonEmpty || accumulatedInPython.nonEmpty) {
+        executeHolder.responseObserver.onNext(
+          SparkConnectPlanExecution
+            .createObservedMetricsResponse(
+              executeHolder.sessionHolder.sessionId,
+              executeHolder.sessionHolder.serverSessionId,
+              executeHolder.request.getPlan.getRoot.getCommon.getPlanId,
+              observedMetrics ++ accumulatedInPython))
+      }
+
+      // State transition should be atomic to prevent a situation in which a client of reattachable
+      // execution receives ResultComplete, and proceeds to send ReleaseExecute, and that triggers
+      // an interrupt before it finishes. Failing to transition to completed means that the thread
+      // was interrupted, and that will be checked at the end of the execution.
+      if (state.compareAndExchangeRelease(
+          ThreadState.started,
+          ThreadState.completed) == ThreadState.started) {
+        // Now, the execution cannot be interrupted.
+
+        // If the request starts a long running iterator (e.g. StreamingQueryListener needs
+        // a long-running iterator to continuously stream back events, it runs in a separate
+        // thread, and holds the responseObserver to send back the listener events.)
+        // In such cases, even after the ExecuteThread returns, we still want to keep the
+        // client side iterator open, i.e. don't send the ResultComplete to the client.
+        // So delegate the sending of the final ResultComplete to the listener thread itself.
+        if (!shouldDelegateCompleteResponse(executeHolder.request)) {
+          if (executeHolder.reattachable) {
+            // Reattachable execution sends a ResultComplete at the end of the stream
+            // to signal that there isn't more coming.
+            executeHolder.responseObserver.onNextComplete(createResultComplete())
+          } else {
+            executeHolder.responseObserver.onCompleted()
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Perform a check to see if we should delegate sending ResultCompelete. Currently, the
+   * ADD_LISTENER_BUS_LISTENER command creates a new thread and continuously streams back listener
+   * events to the client side StreamingQueryListenerBus. In this case, we would like to delegate
+   * the sending of the final ResultComplete to the handler thread itself.
+   * @param request
+   *   The request to check
+   * @return
+   *   True if we should delegate sending the final ResultComplete to the handler thread, i.e.
+   *   don't send a ResultComplete when the ExecuteThread returns.
+   */
+  private def shouldDelegateCompleteResponse(request: proto.ExecutePlanRequest): Boolean = {
+    request.getPlan.getOpTypeCase == proto.Plan.OpTypeCase.COMMAND &&
+    request.getPlan.getCommand.getCommandTypeCase ==
+      proto.Command.CommandTypeCase.STREAMING_QUERY_LISTENER_BUS_COMMAND &&
+      request.getPlan.getCommand.getStreamingQueryListenerBusCommand.getCommandCase ==
+      proto.StreamingQueryListenerBusCommand.CommandCase.ADD_LISTENER_BUS_LISTENER
+  }
+
+  private def handlePlan(request: proto.ExecutePlanRequest): Unit = {
+    val responseObserver = executeHolder.responseObserver
+
+    val execution = new SparkConnectPlanExecution(executeHolder)
+    execution.handlePlan(responseObserver)
+  }
+
+  private def handleCommand(request: proto.ExecutePlanRequest): Unit = {
+    val responseObserver = executeHolder.responseObserver
+
+    val command = request.getPlan.getCommand
+    val planner = new SparkConnectPlanner(executeHolder)
+    planner.process(command = command, responseObserver = responseObserver)
+  }
+
+  private def requestString(request: Message) = {
+    try {
+      Utils.redact(
+        executeHolder.sessionHolder.session.sessionState.conf.stringRedactionPattern,
+        ProtoUtils.abbreviate(request, maxLevel = 8).toString)
+    } catch {
+      case NonFatal(e) =>
+        logWarning(log"Fail to extract debug information: ${MDC(LogKeys.EXCEPTION, e)}")
+        "UNKNOWN"
+    }
+  }
+
+  private def createResultComplete(): proto.ExecutePlanResponse = {
+    // Send the Spark data type
+    proto.ExecutePlanResponse
+      .newBuilder()
+      .setResultComplete(proto.ExecutePlanResponse.ResultComplete.newBuilder().build())
+      .build()
+  }
+
+  private class ExecutionThread()
+      extends Thread(s"SparkConnectExecuteThread_opId=${executeHolder.operationId}") {
+    override def run(): Unit = execute()
+  }
+}
+
+/**
+ * Defines possible execution thread states.
+ *
+ * The state transitions as follows.
+ *   - notStarted -> interrupted.
+ *   - notStarted -> started -> startedInterrupted -> completed.
+ *   - notStarted -> started -> completed.
+ *
+ * The thread can only be interrupted if the thread is in the startedInterrupted state.
+ */
+private object ThreadState {
+
+  /** The thread has not started: transition to interrupted or started. */
+  val notStarted: ThreadStateInfo = ThreadStateInfo(0)
+
+  /** Execution was interrupted: terminal state. */
+  val interrupted: ThreadStateInfo = ThreadStateInfo(1)
+
+  /** The thread has started: transition to startedInterrupted or completed. */
+  val started: ThreadStateInfo = ThreadStateInfo(2)
+
+  /** The thread was started and execution has been interrupted: transition to completed. */
+  val startedInterrupted: ThreadStateInfo = ThreadStateInfo(3)
+
+  /** Execution has been completed: terminal state. */
+  val completed: ThreadStateInfo = ThreadStateInfo(4)
+}
+
+/** Represents the state of an execution thread. */
+case class ThreadStateInfo(val transitionState: Int)
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
similarity index 99%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
index 660951f229849..c0fd00b2eeaa7 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/execution/SparkConnectPlanExecution.scala
@@ -137,7 +137,7 @@ private[execution] class SparkConnectPlanExecution(executeHolder: ExecuteHolder)
     }
 
     dataframe.queryExecution.executedPlan match {
-      case LocalTableScanExec(_, rows) =>
+      case LocalTableScanExec(_, rows, _) =>
         executePlan.eventsManager.postFinished(Some(rows.length))
         var offset = 0L
         converter(rows.iterator).foreach { case (bytes, count) =>
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverter.scala
diff --git a/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/MockObserver.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/MockObserver.scala
new file mode 100644
index 0000000000000..1519fb8a4675d
--- /dev/null
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/MockObserver.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connect.planner
+
+import io.grpc.stub.StreamObserver
+
+import org.apache.spark.connect.proto
+
+/*
+ * The [[MockObserver]] class is a mock of the [[StreamObserver]] class from
+ * the gRPC library, used for testing the process function of [[SparkConnectPlanner]].
+ *
+ * Currently, the [[MockObserver]] class is located in the test folder,
+ * which is not shaded by sbt-assembly, unlike the main folder.
+ *
+ * This results in a compilation error when open-source libraries call the
+ * [[transform(spark: SparkSession, command: proto.Command)]] function in
+ * [[SparkConnectPlannerTestUtils]], which in turn calls
+ * [[SparkConnectPlanner(executeHolder).process(command, new MockObserver())]].
+ *
+ * To resolve this, the [[MockObserver]] class should be moved to the main folder
+ * to be shaded as well.
+ */
+private[connect] class MockObserver extends StreamObserver[proto.ExecutePlanResponse] {
+  override def onNext(value: proto.ExecutePlanResponse): Unit = {}
+  override def onError(t: Throwable): Unit = {}
+  override def onCompleted(): Unit = {}
+}
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SaveModeConverter.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
similarity index 87%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
index 013f0d83391cb..979fd83612e7b 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala
@@ -40,24 +40,22 @@ import org.apache.spark.connect.proto.ExecutePlanResponse.SqlCommandResult
 import org.apache.spark.connect.proto.Parse.ParseFormat
 import org.apache.spark.connect.proto.StreamingQueryManagerCommandResult.StreamingQueryInstance
 import org.apache.spark.connect.proto.WriteStreamOperationStart.TriggerCase
-import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.LogKeys.{DATAFRAME_ID, SESSION_ID}
-import org.apache.spark.ml.{functions => MLFunctions}
 import org.apache.spark.resource.{ExecutorResourceRequest, ResourceProfile, TaskResourceProfile, TaskResourceRequest}
-import org.apache.spark.sql.{withOrigin, Column, Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, SparkSession}
-import org.apache.spark.sql.avro.{AvroDataToCatalyst, CatalystDataToAvro}
+import org.apache.spark.sql.{Dataset, Encoders, ForeachWriter, Observation, RelationalGroupedDataset, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{expressions, AliasIdentifier, FunctionIdentifier, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar}
-import org.apache.spark.sql.catalyst.encoders.{AgnosticEncoder, ExpressionEncoder, RowEncoder}
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, GlobalTempView, LocalTempView, MultiAlias, NameParameterizedQuery, PosParameterizedQuery, UnresolvedAlias, UnresolvedAttribute, UnresolvedDataFrameStar, UnresolvedDeserializer, UnresolvedExtractValue, UnresolvedFunction, UnresolvedRegex, UnresolvedRelation, UnresolvedStar, UnresolvedTableValuedFunction, UnresolvedTranspose}
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder, ExpressionEncoder, RowEncoder}
 import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, BloomFilterAggregate}
+import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
 import org.apache.spark.sql.catalyst.plans.{Cross, FullOuter, Inner, JoinType, LeftAnti, LeftOuter, LeftSemi, RightOuter, UsingJoin}
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.plans.logical.{AppendColumns, Assignment, CoGroup, CollectMetrics, CommandResult, Deduplicate, DeduplicateWithinWatermark, DeleteAction, DeserializeToObject, Except, FlatMapGroupsWithState, InsertAction, InsertStarAction, Intersect, JoinWith, LocalRelation, LogicalGroupState, LogicalPlan, MapGroups, MapPartitions, MergeAction, Project, Sample, SerializeFromObject, Sort, SubqueryAlias, TypedFilter, Union, Unpivot, UnresolvedHint, UpdateAction, UpdateStarAction}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
-import org.apache.spark.sql.catalyst.trees.PySparkCurrentOrigin
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
 import org.apache.spark.sql.connect.common.{DataTypeProtoConverter, ForeachWriterPacket, InvalidPlanInput, LiteralValueProtoConverter, StorageLevelProtoConverter, StreamingListenerPacket, UdfPacket}
@@ -65,9 +63,9 @@ import org.apache.spark.sql.connect.config.Connect.CONNECT_GRPC_ARROW_MAX_BATCH_
 import org.apache.spark.sql.connect.plugin.SparkConnectPluginRegistry
 import org.apache.spark.sql.connect.service.{ExecuteHolder, SessionHolder, SparkConnectService}
 import org.apache.spark.sql.connect.utils.MetricGenerator
-import org.apache.spark.sql.errors.{DataTypeErrors, QueryCompilationErrors}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
+import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, TypedAggregateExpression}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.execution.command.CreateViewCommand
 import org.apache.spark.sql.execution.datasources.LogicalRelation
@@ -78,8 +76,8 @@ import org.apache.spark.sql.execution.stat.StatFunctions
 import org.apache.spark.sql.execution.streaming.GroupStateImpl.groupStateTimeoutFromString
 import org.apache.spark.sql.execution.streaming.StreamingQueryWrapper
 import org.apache.spark.sql.expressions.{Aggregator, ReduceAggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
-import org.apache.spark.sql.internal.{CatalogImpl, TypedAggUtils}
-import org.apache.spark.sql.protobuf.{CatalystDataToProtobuf, ProtobufDataToCatalyst}
+import org.apache.spark.sql.internal.{CatalogImpl, MergeIntoWriterImpl, TypedAggUtils}
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.streaming.{GroupStateTimeout, OutputMode, StreamingQuery, StreamingQueryListener, StreamingQueryProgress, Trigger}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -108,6 +106,7 @@ class SparkConnectPlanner(
   @Since("4.0.0")
   @DeveloperApi
   def session: SparkSession = sessionHolder.session
+  import sessionHolder.session.RichColumn
 
   private[connect] def parser = session.sessionState.sqlParser
 
@@ -202,6 +201,9 @@ class SparkConnectPlanner(
           transformCachedLocalRelation(rel.getCachedLocalRelation)
         case proto.Relation.RelTypeCase.HINT => transformHint(rel.getHint)
         case proto.Relation.RelTypeCase.UNPIVOT => transformUnpivot(rel.getUnpivot)
+        case proto.Relation.RelTypeCase.TRANSPOSE => transformTranspose(rel.getTranspose)
+        case proto.Relation.RelTypeCase.UNRESOLVED_TABLE_VALUED_FUNCTION =>
+          transformUnresolvedTableValuedFunction(rel.getUnresolvedTableValuedFunction)
         case proto.Relation.RelTypeCase.REPARTITION_BY_EXPRESSION =>
           transformRepartitionByExpression(rel.getRepartitionByExpression)
         case proto.Relation.RelTypeCase.MAP_PARTITIONS =>
@@ -552,7 +554,7 @@ class SparkConnectPlanner(
       .ofRows(session, transformRelation(rel.getInput))
       .stat
       .sampleBy(
-        col = Column(transformExpression(rel.getCol)),
+        col = column(transformExpression(rel.getCol)),
         fractions = fractions.toMap,
         seed = if (rel.hasSeed) rel.getSeed else Utils.random.nextLong)
       .logicalPlan
@@ -644,17 +646,17 @@ class SparkConnectPlanner(
         val pythonUdf = transformPythonUDF(commonUdf)
         val cols =
           rel.getGroupingExpressionsList.asScala.toSeq.map(expr =>
-            Column(transformExpression(expr)))
+            column(transformExpression(expr)))
         val group = Dataset
           .ofRows(session, transformRelation(rel.getInput))
           .groupBy(cols: _*)
 
         pythonUdf.evalType match {
           case PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF =>
-            group.flatMapGroupsInPandas(pythonUdf).logicalPlan
+            group.flatMapGroupsInPandas(column(pythonUdf)).logicalPlan
 
           case PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF =>
-            group.flatMapGroupsInArrow(pythonUdf).logicalPlan
+            group.flatMapGroupsInArrow(column(pythonUdf)).logicalPlan
 
           case _ =>
             throw InvalidPlanInput(
@@ -763,10 +765,10 @@ class SparkConnectPlanner(
       case proto.CommonInlineUserDefinedFunction.FunctionCase.PYTHON_UDF =>
         val inputCols =
           rel.getInputGroupingExpressionsList.asScala.toSeq.map(expr =>
-            Column(transformExpression(expr)))
+            column(transformExpression(expr)))
         val otherCols =
           rel.getOtherGroupingExpressionsList.asScala.toSeq.map(expr =>
-            Column(transformExpression(expr)))
+            column(transformExpression(expr)))
 
         val input = Dataset
           .ofRows(session, transformRelation(rel.getInput))
@@ -980,7 +982,7 @@ class SparkConnectPlanner(
   private def transformApplyInPandasWithState(rel: proto.ApplyInPandasWithState): LogicalPlan = {
     val pythonUdf = transformPythonUDF(rel.getFunc)
     val cols =
-      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
+      rel.getGroupingExpressionsList.asScala.toSeq.map(expr => column(transformExpression(expr)))
 
     val outputSchema = parseSchema(rel.getOutputSchema)
 
@@ -990,7 +992,7 @@ class SparkConnectPlanner(
       .ofRows(session, transformRelation(rel.getInput))
       .groupBy(cols: _*)
       .applyInPandasWithState(
-        pythonUdf,
+        column(pythonUdf),
         outputSchema,
         stateSchema,
         rel.getOutputMode,
@@ -1078,7 +1080,7 @@ class SparkConnectPlanner(
           Metadata.empty
         }
 
-        (alias.getName(0), Column(transformExpression(alias.getExpr)), metadata)
+        (alias.getName(0), column(transformExpression(alias.getExpr)), metadata)
       }.unzip3
 
     Dataset
@@ -1124,9 +1126,23 @@ class SparkConnectPlanner(
     UnresolvedHint(rel.getName, params, transformRelation(rel.getInput))
   }
 
+  private def transformTranspose(rel: proto.Transpose): LogicalPlan = {
+    val child = transformRelation(rel.getInput)
+    val indices = rel.getIndexColumnsList.asScala.map(transformExpression).toSeq
+
+    UnresolvedTranspose(indices = indices, child = child)
+  }
+
+  private def transformUnresolvedTableValuedFunction(
+      rel: proto.UnresolvedTableValuedFunction): LogicalPlan = {
+    UnresolvedTableValuedFunction(
+      rel.getFunctionName,
+      rel.getArgumentsList.asScala.map(transformExpression).toSeq)
+  }
+
   private def transformUnpivot(rel: proto.Unpivot): LogicalPlan = {
     val ids = rel.getIdsList.asScala.toArray.map { expr =>
-      Column(transformExpression(expr))
+      column(transformExpression(expr))
     }
 
     if (!rel.hasValues) {
@@ -1139,7 +1155,7 @@ class SparkConnectPlanner(
         transformRelation(rel.getInput))
     } else {
       val values = rel.getValues.getValuesList.asScala.toArray.map { expr =>
-        Column(transformExpression(expr))
+        column(transformExpression(expr))
       }
 
       Unpivot(
@@ -1168,7 +1184,7 @@ class SparkConnectPlanner(
 
   private def transformCollectMetrics(rel: proto.CollectMetrics, planId: Long): LogicalPlan = {
     val metrics = rel.getMetricsList.asScala.toSeq.map { expr =>
-      Column(transformExpression(expr))
+      column(transformExpression(expr))
     }
     val name = rel.getName
     val input = transformRelation(rel.getInput)
@@ -1176,8 +1192,10 @@ class SparkConnectPlanner(
     if (input.isStreaming || executeHolderOpt.isEmpty) {
       CollectMetrics(name, metrics.map(_.named), transformRelation(rel.getInput), planId)
     } else {
+      // TODO this might be too complex for no good reason. It might
+      //  be easier to inspect the plan after it completes.
       val observation = Observation(name)
-      observation.register(session, planId)
+      session.observationManager.register(observation, planId)
       executeHolderOpt.get.addObservation(name, observation)
       CollectMetrics(name, metrics.map(_.named), transformRelation(rel.getInput), planId)
     }
@@ -1491,14 +1509,13 @@ class SparkConnectPlanner(
   def transformExpression(
       exp: proto.Expression,
       baseRelationOpt: Option[LogicalPlan]): Expression = if (exp.hasCommon) {
-    try {
-      val origin = exp.getCommon.getOrigin
-      PySparkCurrentOrigin.set(
-        origin.getPythonOrigin.getFragment,
-        origin.getPythonOrigin.getCallSite)
-      withOrigin { doTransformExpression(exp, baseRelationOpt) }
-    } finally {
-      PySparkCurrentOrigin.clear()
+    CurrentOrigin.withOrigin {
+      val pythonOrigin = exp.getCommon.getOrigin.getPythonOrigin
+      val pysparkErrorContext = (pythonOrigin.getFragment, pythonOrigin.getCallSite)
+      val newOrigin = CurrentOrigin.get.copy(pysparkErrorContext = Some(pysparkErrorContext))
+      CurrentOrigin.withOrigin(newOrigin) {
+        doTransformExpression(exp, baseRelationOpt)
+      }
     }
   } else {
     doTransformExpression(exp, baseRelationOpt)
@@ -1512,8 +1529,7 @@ class SparkConnectPlanner(
       case proto.Expression.ExprTypeCase.UNRESOLVED_ATTRIBUTE =>
         transformUnresolvedAttribute(exp.getUnresolvedAttribute)
       case proto.Expression.ExprTypeCase.UNRESOLVED_FUNCTION =>
-        transformUnregisteredFunction(exp.getUnresolvedFunction)
-          .getOrElse(transformUnresolvedFunction(exp.getUnresolvedFunction))
+        transformUnresolvedFunction(exp.getUnresolvedFunction)
       case proto.Expression.ExprTypeCase.ALIAS => transformAlias(exp.getAlias)
       case proto.Expression.ExprTypeCase.EXPRESSION_STRING =>
         transformExpressionString(exp.getExpressionString)
@@ -1614,14 +1630,23 @@ class SparkConnectPlanner(
       fun: proto.Expression.UnresolvedFunction): Expression = {
     if (fun.getIsUserDefinedFunction) {
       UnresolvedFunction(
-        parser.parseFunctionIdentifier(fun.getFunctionName),
+        parser.parseMultipartIdentifier(fun.getFunctionName),
         fun.getArgumentsList.asScala.map(transformExpression).toSeq,
         isDistinct = fun.getIsDistinct)
     } else {
+      // Spark Connect historically used the global namespace to lookup a couple of internal
+      // functions (e.g. product, collect_top_k, unwrap_udt, ...). In Spark 4 we moved these
+      // functions to a dedicated namespace, however in order to stay backwards compatible we still
+      // need to allow connect to use the global namespace. Here we check if a function is
+      // registered in the internal function registry, and we reroute the lookup to the internal
+      // registry.
+      val name = fun.getFunctionName
+      val internal = FunctionRegistry.internal.functionExists(FunctionIdentifier(name))
       UnresolvedFunction(
-        FunctionIdentifier(fun.getFunctionName),
+        name :: Nil,
         fun.getArgumentsList.asScala.map(transformExpression).toSeq,
-        isDistinct = fun.getIsDistinct)
+        isDistinct = fun.getIsDistinct,
+        isInternal = internal)
     }
   }
 
@@ -1709,9 +1734,9 @@ class SparkConnectPlanner(
     val udf = fun.getScalarScalaUdf
     val udfPacket = unpackUdf(fun)
     if (udf.getAggregate) {
-      transformScalaFunction(fun)
-        .asInstanceOf[UserDefinedAggregator[Any, Any, Any]]
-        .scalaAggregator(fun.getArgumentsList.asScala.map(transformExpression).toSeq)
+      ScalaAggregator(
+        transformScalaFunction(fun).asInstanceOf[UserDefinedAggregator[Any, Any, Any]],
+        fun.getArgumentsList.asScala.map(transformExpression).toSeq)
         .toAggregateExpression()
     } else {
       ScalaUDF(
@@ -1735,7 +1760,7 @@ class SparkConnectPlanner(
       UserDefinedAggregator(
         aggregator = udfPacket.function.asInstanceOf[Aggregator[Any, Any, Any]],
         inputEncoder = ExpressionEncoder(udfPacket.inputEncoders.head),
-        name = Option(fun.getFunctionName),
+        givenName = Option(fun.getFunctionName),
         nullable = udf.getNullable,
         deterministic = fun.getDeterministic)
     } else {
@@ -1744,7 +1769,7 @@ class SparkConnectPlanner(
         dataType = transformDataType(udf.getOutputType),
         inputEncoders = udfPacket.inputEncoders.map(e => Try(ExpressionEncoder(e)).toOption),
         outputEncoder = Option(ExpressionEncoder(udfPacket.outputEncoder)),
-        name = Option(fun.getFunctionName),
+        givenName = Option(fun.getFunctionName),
         nullable = udf.getNullable,
         deterministic = fun.getDeterministic)
     }
@@ -1824,355 +1849,6 @@ class SparkConnectPlanner(
     UnresolvedNamedLambdaVariable(variable.getNamePartsList.asScala.toSeq)
   }
 
-  /**
-   * For some reason, not all functions are registered in 'FunctionRegistry'. For a unregistered
-   * function, we can still wrap it under the proto 'UnresolvedFunction', and then resolve it in
-   * this method.
-   */
-  private def transformUnregisteredFunction(
-      fun: proto.Expression.UnresolvedFunction): Option[Expression] = {
-    fun.getFunctionName match {
-      case "product" if fun.getArgumentsCount == 1 =>
-        Some(
-          aggregate
-            .Product(transformExpression(fun.getArgumentsList.asScala.head))
-            .toAggregateExpression())
-
-      case "when" if fun.getArgumentsCount > 0 =>
-        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
-        Some(CaseWhen.createFromParser(children))
-
-      case "in" if fun.getArgumentsCount > 0 =>
-        val children = fun.getArgumentsList.asScala.toSeq.map(transformExpression)
-        Some(In(children.head, children.tail))
-
-      case "nth_value" if fun.getArgumentsCount == 3 =>
-        // NthValue does not have a constructor which accepts Expression typed 'ignoreNulls'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ignoreNulls = extractBoolean(children(2), "ignoreNulls")
-        Some(NthValue(children(0), children(1), ignoreNulls))
-
-      case "like" if fun.getArgumentsCount == 3 =>
-        // Like does not have a constructor which accepts Expression typed 'escapeChar'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val escapeChar = extractString(children(2), "escapeChar")
-        Some(Like(children(0), children(1), escapeChar.charAt(0)))
-
-      case "ilike" if fun.getArgumentsCount == 3 =>
-        // ILike does not have a constructor which accepts Expression typed 'escapeChar'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val escapeChar = extractString(children(2), "escapeChar")
-        Some(ILike(children(0), children(1), escapeChar.charAt(0)))
-
-      case "lag" if fun.getArgumentsCount == 4 =>
-        // Lag does not have a constructor which accepts Expression typed 'ignoreNulls'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ignoreNulls = extractBoolean(children(3), "ignoreNulls")
-        Some(Lag(children.head, children(1), children(2), ignoreNulls))
-
-      case "lead" if fun.getArgumentsCount == 4 =>
-        // Lead does not have a constructor which accepts Expression typed 'ignoreNulls'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ignoreNulls = extractBoolean(children(3), "ignoreNulls")
-        Some(Lead(children.head, children(1), children(2), ignoreNulls))
-
-      case "bloom_filter_agg" if fun.getArgumentsCount == 3 =>
-        // [col, expectedNumItems: Long, numBits: Long]
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        Some(
-          new BloomFilterAggregate(children(0), children(1), children(2))
-            .toAggregateExpression())
-
-      case "timestampdiff" if fun.getArgumentsCount == 3 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val unit = extractString(children(0), "unit")
-        Some(TimestampDiff(unit, children(1), children(2)))
-
-      case "timestampadd" if fun.getArgumentsCount == 3 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val unit = extractString(children(0), "unit")
-        Some(TimestampAdd(unit, children(1), children(2)))
-
-      case "window" if Seq(2, 3, 4).contains(fun.getArgumentsCount) =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val timeCol = children.head
-        val windowDuration = extractString(children(1), "windowDuration")
-        var slideDuration = windowDuration
-        if (fun.getArgumentsCount >= 3) {
-          slideDuration = extractString(children(2), "slideDuration")
-        }
-        var startTime = "0 second"
-        if (fun.getArgumentsCount == 4) {
-          startTime = extractString(children(3), "startTime")
-        }
-        Some(
-          Alias(TimeWindow(timeCol, windowDuration, slideDuration, startTime), "window")(
-            nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)))
-
-      case "session_window" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val timeCol = children.head
-        val sessionWindow = children.last match {
-          case Literal(s, StringType) if s != null => SessionWindow(timeCol, s.toString)
-          case other => SessionWindow(timeCol, other)
-        }
-        Some(
-          Alias(sessionWindow, "session_window")(nonInheritableMetadataKeys =
-            Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)))
-
-      case "bucket" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        (children.head, children.last) match {
-          case (numBuckets: Literal, child) if numBuckets.dataType == IntegerType =>
-            Some(Bucket(numBuckets, child))
-          case (other, _) =>
-            throw InvalidPlanInput(s"numBuckets should be a literal integer, but got $other")
-        }
-
-      case "years" if fun.getArgumentsCount == 1 =>
-        Some(Years(transformExpression(fun.getArguments(0))))
-
-      case "months" if fun.getArgumentsCount == 1 =>
-        Some(Months(transformExpression(fun.getArguments(0))))
-
-      case "days" if fun.getArgumentsCount == 1 =>
-        Some(Days(transformExpression(fun.getArguments(0))))
-
-      case "hours" if fun.getArgumentsCount == 1 =>
-        Some(Hours(transformExpression(fun.getArguments(0))))
-
-      case "unwrap_udt" if fun.getArgumentsCount == 1 =>
-        Some(UnwrapUDT(transformExpression(fun.getArguments(0))))
-
-      case "from_json" if Seq(2, 3).contains(fun.getArgumentsCount) =>
-        // JsonToStructs constructor doesn't accept JSON-formatted schema.
-        extractDataTypeFromJSON(fun.getArguments(1)).map { dataType =>
-          val children = fun.getArgumentsList.asScala.map(transformExpression)
-          val schema = CharVarcharUtils.failIfHasCharVarchar(dataType)
-          var options = Map.empty[String, String]
-          if (children.length == 3) {
-            options = extractMapData(children(2), "Options")
-          }
-          JsonToStructs(schema = schema, options = options, child = children.head)
-        }
-
-      case "from_xml" if Seq(2, 3).contains(fun.getArgumentsCount) =>
-        // XmlToStructs constructor doesn't accept JSON-formatted schema.
-        extractDataTypeFromJSON(fun.getArguments(1)).map { dataType =>
-          val children = fun.getArgumentsList.asScala.map(transformExpression)
-          val schema = dataType match {
-            case t: StructType =>
-              CharVarcharUtils
-                .failIfHasCharVarchar(t)
-                .asInstanceOf[StructType]
-            case _ => throw DataTypeErrors.failedParsingStructTypeError(dataType.sql)
-          }
-          var options = Map.empty[String, String]
-          if (children.length == 3) {
-            options = extractMapData(children(2), "Options")
-          }
-          XmlToStructs(schema = schema, options = options, child = children.head)
-        }
-
-      // Avro-specific functions
-      case "from_avro" if Seq(2, 3).contains(fun.getArgumentsCount) =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val jsonFormatSchema = extractString(children(1), "jsonFormatSchema")
-        var options = Map.empty[String, String]
-        if (fun.getArgumentsCount == 3) {
-          options = extractMapData(children(2), "Options")
-        }
-        Some(AvroDataToCatalyst(children.head, jsonFormatSchema, options))
-
-      case "to_avro" if Seq(1, 2).contains(fun.getArgumentsCount) =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        var jsonFormatSchema = Option.empty[String]
-        if (fun.getArgumentsCount == 2) {
-          jsonFormatSchema = Some(extractString(children(1), "jsonFormatSchema"))
-        }
-        Some(CatalystDataToAvro(children.head, jsonFormatSchema))
-
-      // PS(Pandas API on Spark)-specific functions
-      case "distributed_sequence_id" if fun.getArgumentsCount == 0 =>
-        Some(DistributedSequenceID())
-
-      case "pandas_product" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val dropna = extractBoolean(children(1), "dropna")
-        Some(aggregate.PandasProduct(children(0), dropna).toAggregateExpression(false))
-
-      case "pandas_stddev" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ddof = extractInteger(children(1), "ddof")
-        Some(aggregate.PandasStddev(children(0), ddof).toAggregateExpression(false))
-
-      case "pandas_skew" if fun.getArgumentsCount == 1 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        Some(aggregate.PandasSkewness(children(0)).toAggregateExpression(false))
-
-      case "pandas_kurt" if fun.getArgumentsCount == 1 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        Some(aggregate.PandasKurtosis(children(0)).toAggregateExpression(false))
-
-      case "pandas_var" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ddof = extractInteger(children(1), "ddof")
-        Some(aggregate.PandasVariance(children(0), ddof).toAggregateExpression(false))
-
-      case "pandas_covar" if fun.getArgumentsCount == 3 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ddof = extractInteger(children(2), "ddof")
-        Some(aggregate.PandasCovar(children(0), children(1), ddof).toAggregateExpression(false))
-
-      case "pandas_mode" if fun.getArgumentsCount == 2 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val ignoreNA = extractBoolean(children(1), "ignoreNA")
-        Some(aggregate.PandasMode(children(0), ignoreNA).toAggregateExpression(false))
-
-      case "ewm" if fun.getArgumentsCount == 3 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val alpha = extractDouble(children(1), "alpha")
-        val ignoreNA = extractBoolean(children(2), "ignoreNA")
-        Some(EWM(children(0), alpha, ignoreNA))
-
-      case "null_index" if fun.getArgumentsCount == 1 =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        Some(NullIndex(children(0)))
-
-      // ML-specific functions
-      case "vector_to_array" if fun.getArgumentsCount == 2 =>
-        val expr = transformExpression(fun.getArguments(0))
-        val dtype = extractString(transformExpression(fun.getArguments(1)), "dtype")
-        dtype match {
-          case "float64" =>
-            Some(transformUnregisteredUDF(MLFunctions.vectorToArrayUdf, Seq(expr)))
-          case "float32" =>
-            Some(transformUnregisteredUDF(MLFunctions.vectorToArrayFloatUdf, Seq(expr)))
-          case other =>
-            throw InvalidPlanInput(s"Unsupported dtype: $other. Valid values: float64, float32.")
-        }
-
-      case "array_to_vector" if fun.getArgumentsCount == 1 =>
-        val expr = transformExpression(fun.getArguments(0))
-        Some(transformUnregisteredUDF(MLFunctions.arrayToVectorUdf, Seq(expr)))
-
-      // Protobuf-specific functions
-      case "from_protobuf" if Seq(2, 3, 4).contains(fun.getArgumentsCount) =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val (msgName, desc, options) = extractProtobufArgs(children.toSeq)
-        Some(ProtobufDataToCatalyst(children(0), msgName, desc, options))
-
-      case "to_protobuf" if Seq(2, 3, 4).contains(fun.getArgumentsCount) =>
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val (msgName, desc, options) = extractProtobufArgs(children.toSeq)
-        Some(CatalystDataToProtobuf(children(0), msgName, desc, options))
-
-      case "uuid" if fun.getArgumentsCount == 1 =>
-        // Uuid does not have a constructor which accepts Expression typed 'seed'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val seed = extractLong(children(0), "seed")
-        Some(Uuid(Some(seed)))
-
-      case "shuffle" if fun.getArgumentsCount == 2 =>
-        // Shuffle does not have a constructor which accepts Expression typed 'seed'
-        val children = fun.getArgumentsList.asScala.map(transformExpression)
-        val seed = extractLong(children(1), "seed")
-        Some(Shuffle(children(0), Some(seed)))
-
-      case _ => None
-    }
-  }
-
-  /**
-   * There are some built-in yet not registered UDFs, for example, 'ml.function.array_to_vector'.
-   * This method is to convert them to ScalaUDF expressions.
-   */
-  private def transformUnregisteredUDF(
-      fun: org.apache.spark.sql.expressions.UserDefinedFunction,
-      exprs: Seq[Expression]): ScalaUDF = {
-    val f = fun.asInstanceOf[org.apache.spark.sql.expressions.SparkUserDefinedFunction]
-    ScalaUDF(
-      function = f.f,
-      dataType = f.dataType,
-      children = exprs,
-      inputEncoders = f.inputEncoders,
-      outputEncoder = f.outputEncoder,
-      udfName = f.name,
-      nullable = f.nullable,
-      udfDeterministic = f.deterministic)
-  }
-
-  private def extractProtobufArgs(children: Seq[Expression]) = {
-    val msgName = extractString(children(1), "MessageClassName")
-    var desc = Option.empty[Array[Byte]]
-    var options = Map.empty[String, String]
-    if (children.length == 3) {
-      children(2) match {
-        case b: Literal => desc = Some(extractBinary(b, "binaryFileDescriptorSet"))
-        case o => options = extractMapData(o, "options")
-      }
-    } else if (children.length == 4) {
-      desc = Some(extractBinary(children(2), "binaryFileDescriptorSet"))
-      options = extractMapData(children(3), "options")
-    }
-    (msgName, desc, options)
-  }
-
-  private def extractBoolean(expr: Expression, field: String): Boolean = expr match {
-    case Literal(bool: Boolean, BooleanType) => bool
-    case other => throw InvalidPlanInput(s"$field should be a literal boolean, but got $other")
-  }
-
-  private def extractDouble(expr: Expression, field: String): Double = expr match {
-    case Literal(double: Double, DoubleType) => double
-    case other => throw InvalidPlanInput(s"$field should be a literal double, but got $other")
-  }
-
-  private def extractInteger(expr: Expression, field: String): Int = expr match {
-    case Literal(int: Int, IntegerType) => int
-    case other => throw InvalidPlanInput(s"$field should be a literal integer, but got $other")
-  }
-
-  private def extractLong(expr: Expression, field: String): Long = expr match {
-    case Literal(long: Long, LongType) => long
-    case other => throw InvalidPlanInput(s"$field should be a literal long, but got $other")
-  }
-
-  private def extractString(expr: Expression, field: String): String = expr match {
-    case Literal(s, StringType) if s != null => s.toString
-    case other => throw InvalidPlanInput(s"$field should be a literal string, but got $other")
-  }
-
-  private def extractBinary(expr: Expression, field: String): Array[Byte] = expr match {
-    case Literal(b: Array[Byte], BinaryType) if b != null => b
-    case other => throw InvalidPlanInput(s"$field should be a literal binary, but got $other")
-  }
-
-  @scala.annotation.tailrec
-  private def extractMapData(expr: Expression, field: String): Map[String, String] = expr match {
-    case map: CreateMap => ExprUtils.convertToMapData(map)
-    case UnresolvedFunction(Seq("map"), args, _, _, _, _) =>
-      extractMapData(CreateMap(args), field)
-    case other => throw InvalidPlanInput(s"$field should be created by map, but got $other")
-  }
-
-  // Extract the schema from a literal string representing a JSON-formatted schema
-  private def extractDataTypeFromJSON(exp: proto.Expression): Option[DataType] = {
-    exp.getExprTypeCase match {
-      case proto.Expression.ExprTypeCase.LITERAL =>
-        exp.getLiteral.getLiteralTypeCase match {
-          case proto.Expression.Literal.LiteralTypeCase.STRING =>
-            try {
-              Some(DataType.fromJson(exp.getLiteral.getString))
-            } catch {
-              case _: Exception => None
-            }
-          case _ => None
-        }
-      case _ => None
-    }
-  }
-
   private def transformAlias(alias: proto.Expression.Alias): NamedExpression = {
     if (alias.getNameCount == 1) {
       val metadata = if (alias.hasMetadata() && alias.getMetadata.nonEmpty) {
@@ -2436,10 +2112,10 @@ class SparkConnectPlanner(
   private def transformAsOfJoin(rel: proto.AsOfJoin): LogicalPlan = {
     val left = Dataset.ofRows(session, transformRelation(rel.getLeft))
     val right = Dataset.ofRows(session, transformRelation(rel.getRight))
-    val leftAsOf = Column(transformExpression(rel.getLeftAsOf))
-    val rightAsOf = Column(transformExpression(rel.getRightAsOf))
+    val leftAsOf = column(transformExpression(rel.getLeftAsOf))
+    val rightAsOf = column(transformExpression(rel.getRightAsOf))
     val joinType = rel.getJoinType
-    val tolerance = if (rel.hasTolerance) Column(transformExpression(rel.getTolerance)) else null
+    val tolerance = if (rel.hasTolerance) column(transformExpression(rel.getTolerance)) else null
     val allowExactMatches = rel.getAllowExactMatches
     val direction = rel.getDirection
 
@@ -2455,7 +2131,7 @@ class SparkConnectPlanner(
         allowExactMatches = allowExactMatches,
         direction = direction)
     } else {
-      val joinExprs = if (rel.hasJoinExpr) Column(transformExpression(rel.getJoinExpr)) else null
+      val joinExprs = if (rel.hasJoinExpr) column(transformExpression(rel.getJoinExpr)) else null
       left.joinAsOf(
         other = right,
         leftAsOf = leftAsOf,
@@ -2496,7 +2172,7 @@ class SparkConnectPlanner(
   private def transformDrop(rel: proto.Drop): LogicalPlan = {
     var output = Dataset.ofRows(session, transformRelation(rel.getInput))
     if (rel.getColumnsCount > 0) {
-      val cols = rel.getColumnsList.asScala.toSeq.map(expr => Column(transformExpression(expr)))
+      val cols = rel.getColumnsList.asScala.toSeq.map(expr => column(transformExpression(expr)))
       output = output.drop(cols.head, cols.tail: _*)
     }
     if (rel.getColumnNamesCount > 0) {
@@ -2571,7 +2247,7 @@ class SparkConnectPlanner(
           rel.getPivot.getValuesList.asScala.toSeq.map(transformLiteral)
         } else {
           RelationalGroupedDataset
-            .collectPivotValues(Dataset.ofRows(session, input), Column(pivotExpr))
+            .collectPivotValues(Dataset.ofRows(session, input), column(pivotExpr))
             .map(expressions.Literal.apply)
         }
         logical.Pivot(
@@ -2604,16 +2280,17 @@ class SparkConnectPlanner(
     if (fun.getArgumentsCount != 1) {
       throw InvalidPlanInput("reduce requires single child expression")
     }
-    val udf = fun.getArgumentsList.asScala.map(transformExpression) match {
-      case collection.Seq(f: ScalaUDF) =>
-        f
+    val udf = fun.getArgumentsList.asScala match {
+      case collection.Seq(e)
+          if e.hasCommonInlineUserDefinedFunction &&
+            e.getCommonInlineUserDefinedFunction.hasScalarScalaUdf =>
+        unpackUdf(e.getCommonInlineUserDefinedFunction)
       case other =>
         throw InvalidPlanInput(s"reduce should carry a scalar scala udf, but got $other")
     }
-    assert(udf.outputEncoder.isDefined)
-    val tEncoder = udf.outputEncoder.get // (T, T) => T
-    val reduce = ReduceAggregator(udf.function)(tEncoder).toColumn.expr
-    TypedAggUtils.withInputType(reduce, tEncoder, dataAttributes)
+    val encoder = udf.outputEncoder
+    val reduce = ReduceAggregator(udf.function)(encoder).toColumn.expr
+    TypedAggUtils.withInputType(reduce, encoderFor(encoder), dataAttributes)
   }
 
   private def transformExpressionWithTypedReduceExpression(
@@ -2897,12 +2574,12 @@ class SparkConnectPlanner(
     if (!namedArguments.isEmpty) {
       session.sql(
         sql.getQuery,
-        namedArguments.asScala.toMap.transform((_, e) => Column(transformExpression(e))),
+        namedArguments.asScala.toMap.transform((_, e) => column(transformExpression(e))),
         tracker)
     } else if (!posArguments.isEmpty) {
       session.sql(
         sql.getQuery,
-        posArguments.asScala.map(e => Column(transformExpression(e))).toArray,
+        posArguments.asScala.map(e => column(transformExpression(e))).toArray,
         tracker)
     } else if (!args.isEmpty) {
       session.sql(
@@ -3090,6 +2767,11 @@ class SparkConnectPlanner(
       w.partitionBy(names.toSeq: _*)
     }
 
+    if (writeOperation.getClusteringColumnsCount > 0) {
+      val names = writeOperation.getClusteringColumnsList.asScala
+      w.clusterBy(names.head, names.tail.toSeq: _*)
+    }
+
     if (writeOperation.hasSource) {
       w.format(writeOperation.getSource)
     }
@@ -3148,11 +2830,16 @@ class SparkConnectPlanner(
     if (writeOperation.getPartitioningColumnsCount > 0) {
       val names = writeOperation.getPartitioningColumnsList.asScala
         .map(transformExpression)
-        .map(Column(_))
+        .map(column)
         .toSeq
       w.partitionedBy(names.head, names.tail: _*)
     }
 
+    if (writeOperation.getClusteringColumnsCount > 0) {
+      val names = writeOperation.getClusteringColumnsList.asScala
+      w.clusterBy(names.head, names.tail.toSeq: _*)
+    }
+
     writeOperation.getMode match {
       case proto.WriteOperationV2.Mode.MODE_CREATE =>
         if (writeOperation.hasProvider) {
@@ -3161,7 +2848,7 @@ class SparkConnectPlanner(
           w.create()
         }
       case proto.WriteOperationV2.Mode.MODE_OVERWRITE =>
-        w.overwrite(Column(transformExpression(writeOperation.getOverwriteCondition)))
+        w.overwrite(column(transformExpression(writeOperation.getOverwriteCondition)))
       case proto.WriteOperationV2.Mode.MODE_OVERWRITE_PARTITIONS =>
         w.overwritePartitions()
       case proto.WriteOperationV2.Mode.MODE_APPEND =>
@@ -3206,6 +2893,10 @@ class SparkConnectPlanner(
       writer.partitionBy(writeOp.getPartitioningColumnNamesList.asScala.toList: _*)
     }
 
+    if (writeOp.getClusteringColumnNamesCount > 0) {
+      writer.clusterBy(writeOp.getClusteringColumnNamesList.asScala.toList: _*)
+    }
+
     writeOp.getTriggerCase match {
       case TriggerCase.PROCESSING_TIME_INTERVAL =>
         writer.trigger(Trigger.ProcessingTime(writeOp.getProcessingTimeInterval))
@@ -3324,10 +3015,13 @@ class SparkConnectPlanner(
       sessionHolder.streamingServersideListenerHolder.streamingQueryStartedEventCache.remove(
         query.runId.toString))
     queryStartedEvent.foreach {
-      logDebug(
-        s"[SessionId: $sessionId][UserId: $userId][operationId: " +
-          s"${executeHolder.operationId}][query id: ${query.id}][query runId: ${query.runId}] " +
-          s"Adding QueryStartedEvent to response")
+      logInfo(
+        log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+          log"[UserId: ${MDC(LogKeys.USER_ID, userId)}] " +
+          log"[operationId: ${MDC(LogKeys.OPERATION_ID, executeHolder.operationId)}] " +
+          log"[query id: ${MDC(LogKeys.QUERY_ID, query.id)}]" +
+          log"[query runId: ${MDC(LogKeys.QUERY_RUN_ID, query.runId)}] " +
+          log"Adding QueryStartedEvent to response")
       e => resultBuilder.setQueryStartedEventJson(e.json)
     }
 
@@ -3431,7 +3125,7 @@ class SparkConnectPlanner(
             .newBuilder()
           exception_builder
             .setExceptionMessage(e.toString())
-            .setErrorClass(e.getErrorClass)
+            .setErrorClass(e.getCondition)
 
           val stackTrace = Option(ExceptionUtils.getStackTrace(e))
           stackTrace.foreach { s =>
@@ -3667,9 +3361,18 @@ class SparkConnectPlanner(
       responseObserver: StreamObserver[proto.ExecutePlanResponse]): Unit = {
     val target = Dataset
       .ofRows(session, transformRelation(checkpointCommand.getRelation))
-    val checkpointed = target.checkpoint(
-      eager = checkpointCommand.getEager,
-      reliableCheckpoint = !checkpointCommand.getLocal)
+    val checkpointed = if (checkpointCommand.getLocal) {
+      if (checkpointCommand.hasStorageLevel) {
+        target.localCheckpoint(
+          eager = checkpointCommand.getEager,
+          storageLevel =
+            StorageLevelProtoConverter.toStorageLevel(checkpointCommand.getStorageLevel))
+      } else {
+        target.localCheckpoint(eager = checkpointCommand.getEager)
+      }
+    } else {
+      target.checkpoint(eager = checkpointCommand.getEager)
+    }
 
     val dfId = UUID.randomUUID().toString
     logInfo(log"Caching DataFrame with id ${MDC(DATAFRAME_ID, dfId)}")
@@ -3706,16 +3409,14 @@ class SparkConnectPlanner(
     val notMatchedBySourceActions = transformActions(cmd.getNotMatchedBySourceActionsList)
 
     val sourceDs = Dataset.ofRows(session, transformRelation(cmd.getSourceTablePlan))
-    var mergeInto = sourceDs
-      .mergeInto(cmd.getTargetTableName, Column(transformExpression(cmd.getMergeCondition)))
-      .withNewMatchedActions(matchedActions: _*)
-      .withNewNotMatchedActions(notMatchedActions: _*)
-      .withNewNotMatchedBySourceActions(notMatchedBySourceActions: _*)
-
-    mergeInto = if (cmd.getWithSchemaEvolution) {
+    val mergeInto = sourceDs
+      .mergeInto(cmd.getTargetTableName, column(transformExpression(cmd.getMergeCondition)))
+      .asInstanceOf[MergeIntoWriterImpl[Row]]
+    mergeInto.matchedActions ++= matchedActions
+    mergeInto.notMatchedActions ++= notMatchedActions
+    mergeInto.notMatchedBySourceActions ++= notMatchedBySourceActions
+    if (cmd.getWithSchemaEvolution) {
       mergeInto.withSchemaEvolution()
-    } else {
-      mergeInto
     }
     mergeInto.merge()
     executeHolder.eventsManager.postFinished()
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
similarity index 84%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
index ce5aa0888ca53..70f18a1a9c1a0 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectStreamingQueryListenerHandler.scala
@@ -94,22 +94,20 @@ class SparkConnectStreamingQueryListenerHandler(executeHolder: ExecuteHolder) ex
                 return
             }
         }
-        logInfo(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
-          log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
-          log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
-          log"Server side listener added. Now blocking until " +
-          log"all client side listeners are removed or there is error transmitting the event back.")
-        // Block the handling thread, and have serverListener continuously send back new events
-        listenerHolder.streamingQueryListenerLatch.await()
         logInfo(
           log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
             log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
             log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
-            log"Server side listener long-running handling thread ended.")
+            log"Server side listener added.")
+
       case StreamingQueryListenerBusCommand.CommandCase.REMOVE_LISTENER_BUS_LISTENER =>
         listenerHolder.isServerSideListenerRegistered match {
           case true =>
             sessionHolder.streamingServersideListenerHolder.cleanUp()
+            logInfo(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
+              log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
+              log"[operationId: ${MDC(LogKeys.OPERATION_HANDLE_ID, executeHolder.operationId)}] " +
+              log"Server side listener removed.")
           case false =>
             logWarning(log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionId)}]" +
               log"[UserId: ${MDC(LogKeys.USER_ID, userId)}]" +
@@ -121,11 +119,6 @@ class SparkConnectStreamingQueryListenerHandler(executeHolder: ExecuteHolder) ex
       case StreamingQueryListenerBusCommand.CommandCase.COMMAND_NOT_SET =>
         throw new IllegalArgumentException("Missing command in StreamingQueryListenerBusCommand")
     }
-    // If this thread is the handling thread of the original ADD_LISTENER_BUS_LISTENER command,
-    // this will be sent when the latch is counted down (either through
-    // a REMOVE_LISTENER_BUS_LISTENER command, or long-lived gRPC throws.
-    // If this thread is the handling thread of the REMOVE_LISTENER_BUS_LISTENER command,
-    // this is hit right away.
     executeHolder.eventsManager.postFinished()
   }
 }
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelper.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/StreamingQueryListenerHelper.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/TableSaveMethodConverter.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistry.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
similarity index 98%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
index 760e3065e1e88..faa7582d169f1 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteEventsManager.scala
@@ -30,6 +30,7 @@ import org.apache.spark.util.{Clock, Utils}
 object ExecuteEventsManager {
   // TODO: Make this configurable
   val MAX_STATEMENT_TEXT_SIZE = 65535
+  val MAX_STATEMENT_NESTING_LEVEL = 8
 }
 
 sealed abstract class ExecuteStatus(value: Int)
@@ -128,7 +129,12 @@ case class ExecuteEventsManager(executeHolder: ExecuteHolder, clock: Clock) {
       request.getUserContext.getUserName,
       Utils.redact(
         sessionHolder.session.sessionState.conf.stringRedactionPattern,
-        ProtoUtils.abbreviate(plan, ExecuteEventsManager.MAX_STATEMENT_TEXT_SIZE).toString),
+        ProtoUtils
+          .abbreviate(
+            plan,
+            ExecuteEventsManager.MAX_STATEMENT_TEXT_SIZE,
+            ExecuteEventsManager.MAX_STATEMENT_NESTING_LEVEL)
+          .toString),
       sparkSessionTags)
     event.planRequest = Some(request)
     listenerBus.post(event)
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
similarity index 89%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
index ec7ebbe92d72e..821ddb2c85d58 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.sql.connect.service
 
-import java.util.UUID
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.{SparkEnv, SparkSQLException}
+import org.apache.spark.SparkEnv
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Observation
@@ -35,30 +35,19 @@ import org.apache.spark.util.SystemClock
  * Object used to hold the Spark Connect execution state.
  */
 private[connect] class ExecuteHolder(
+    val executeKey: ExecuteKey,
     val request: proto.ExecutePlanRequest,
     val sessionHolder: SessionHolder)
     extends Logging {
 
   val session = sessionHolder.session
 
-  val operationId = if (request.hasOperationId) {
-    try {
-      UUID.fromString(request.getOperationId).toString
-    } catch {
-      case _: IllegalArgumentException =>
-        throw new SparkSQLException(
-          errorClass = "INVALID_HANDLE.FORMAT",
-          messageParameters = Map("handle" -> request.getOperationId))
-    }
-  } else {
-    UUID.randomUUID().toString
-  }
-
   /**
    * Tag that is set for this execution on SparkContext, via SparkContext.addJobTag. Used
    * (internally) for cancellation of the Spark Jobs ran by this execution.
    */
-  val jobTag = ExecuteJobTag(sessionHolder.userId, sessionHolder.sessionId, operationId)
+  val jobTag =
+    ExecuteJobTag(sessionHolder.userId, sessionHolder.sessionId, executeKey.operationId)
 
   /**
    * Tags set by Spark Connect client users via SparkSession.addTag. Used to identify and group
@@ -117,8 +106,8 @@ private[connect] class ExecuteHolder(
       : mutable.ArrayBuffer[ExecuteGrpcResponseSender[proto.ExecutePlanResponse]] =
     new mutable.ArrayBuffer[ExecuteGrpcResponseSender[proto.ExecutePlanResponse]]()
 
-  /** For testing. Whether the async completion callback is called. */
-  @volatile private[connect] var completionCallbackCalled: Boolean = false
+  /** Indicates whether the cleanup method was called. */
+  private[connect] val completionCallbackCalled: AtomicBoolean = new AtomicBoolean(false)
 
   /**
    * Start the execution. The execution is started in a background thread in ExecuteThreadRunner.
@@ -240,16 +229,7 @@ private[connect] class ExecuteHolder(
   def close(): Unit = synchronized {
     if (closedTimeMs.isEmpty) {
       // interrupt execution, if still running.
-      runner.interrupt()
-      // Do not wait for the execution to finish, clean up resources immediately.
-      runner.processOnCompletion { _ =>
-        completionCallbackCalled = true
-        // The execution may not immediately get interrupted, clean up any remaining resources when
-        // it does.
-        responseObserver.removeAll()
-        // post closed to UI
-        eventsManager.postClosed()
-      }
+      val interrupted = runner.interrupt()
       // interrupt any attached grpcResponseSenders
       grpcResponseSenders.foreach(_.interrupt())
       // if there were still any grpcResponseSenders, register detach time
@@ -257,12 +237,26 @@ private[connect] class ExecuteHolder(
         lastAttachedRpcTimeMs = Some(System.currentTimeMillis())
         grpcResponseSenders.clear()
       }
-      // remove all cached responses from observer
-      responseObserver.removeAll()
+      if (!interrupted) {
+        cleanup()
+      }
       closedTimeMs = Some(System.currentTimeMillis())
     }
   }
 
+  /**
+   * A piece of code that is called only once when this execute holder is closed or the
+   * interrupted execution thread is terminated.
+   */
+  private[connect] def cleanup(): Unit = {
+    if (completionCallbackCalled.compareAndSet(false, true)) {
+      // Remove all cached responses from the observer.
+      responseObserver.removeAll()
+      // Post "closed" to UI.
+      eventsManager.postClosed()
+    }
+  }
+
   /**
    * Spark Connect tags are also added as SparkContext job tags, but to make the tag unique, they
    * need to be combined with userId and sessionId.
@@ -278,7 +272,7 @@ private[connect] class ExecuteHolder(
       request = request,
       userId = sessionHolder.userId,
       sessionId = sessionHolder.sessionId,
-      operationId = operationId,
+      operationId = executeKey.operationId,
       jobTag = jobTag,
       sparkSessionTags = sparkSessionTags,
       reattachable = reattachable,
@@ -289,7 +283,10 @@ private[connect] class ExecuteHolder(
   }
 
   /** Get key used by SparkConnectExecutionManager global tracker. */
-  def key: ExecuteKey = ExecuteKey(sessionHolder.userId, sessionHolder.sessionId, operationId)
+  def key: ExecuteKey = executeKey
+
+  /** Get the operation ID. */
+  def operationId: String = key.operationId
 }
 
 /** Used to identify ExecuteHolder jobTag among SparkContext.SPARK_JOB_TAGS. */
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LocalPropertiesCleanupInterceptor.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LocalPropertiesCleanupInterceptor.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/LocalPropertiesCleanupInterceptor.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LocalPropertiesCleanupInterceptor.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/LoggingInterceptor.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionEventsManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionEventsManager.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionEventsManager.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionEventsManager.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
similarity index 87%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
index fbae94afc43df..5dced7acfb0d2 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SessionHolder.scala
@@ -18,9 +18,7 @@
 package org.apache.spark.sql.connect.service
 
 import java.nio.file.Path
-import java.util.UUID
 import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, TimeUnit}
-import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
@@ -41,6 +39,7 @@ import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.PythonStreamingQueryListener
 import org.apache.spark.sql.connect.planner.StreamingForeachBatchHelper
+import org.apache.spark.sql.connect.service.ExecuteKey
 import org.apache.spark.sql.connect.service.SessionHolder.{ERROR_CACHE_SIZE, ERROR_CACHE_TIMEOUT_SEC}
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.util.{SystemClock, Utils}
@@ -92,8 +91,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   // Setting it to -1 indicated forever.
   @volatile private var customInactiveTimeoutMs: Option[Long] = None
 
-  private val executions: ConcurrentMap[String, ExecuteHolder] =
-    new ConcurrentHashMap[String, ExecuteHolder]()
+  private val operationIds: ConcurrentMap[String, Boolean] =
+    new ConcurrentHashMap[String, Boolean]()
 
   // The cache that maps an error id to a throwable. The throwable in cache is independent to
   // each other.
@@ -129,8 +128,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   // session ID.
   def serverSessionId: String = {
     if (Utils.isTesting && session == null) {
-      // Testing-only: Some sessions created by SessionHolder.forTesting are not fully initialized
-      // and don't have an underlying SparkSession.
+      // Testing-only: Some sessions created by SparkConnectTestUtils.createDummySessionHolder are
+      // not fully initialized and don't have an underlying SparkSession.
       ""
     } else {
       assert(session.sessionUUID != sessionId)
@@ -139,12 +138,11 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
   }
 
   /**
-   * Add ExecuteHolder to this session.
+   * Add an operation ID to this session.
    *
-   * Called only by SparkConnectExecutionManager under executionsLock.
+   * Called only by SparkConnectExecutionManager when a new execution is started.
    */
-  @GuardedBy("SparkConnectService.executionManager.executionsLock")
-  private[service] def addExecuteHolder(executeHolder: ExecuteHolder): Unit = {
+  private[service] def addOperationId(operationId: String): Unit = {
     if (closedTimeMs.isDefined) {
       // Do not accept new executions if the session is closing.
       throw new SparkSQLException(
@@ -152,26 +150,20 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
         messageParameters = Map("handle" -> sessionId))
     }
 
-    val oldExecute = executions.putIfAbsent(executeHolder.operationId, executeHolder)
-    if (oldExecute != null) {
-      // the existence of this should alrady be checked by SparkConnectExecutionManager
-      throw new IllegalStateException(
-        s"ExecuteHolder with opId=${executeHolder.operationId} already exists!")
+    val alreadyExists = operationIds.putIfAbsent(operationId, true)
+    if (alreadyExists) {
+      // The existence of it should have been checked by SparkConnectExecutionManager.
+      throw new IllegalStateException(s"ExecuteHolder with opId=${operationId} already exists!")
     }
   }
 
   /**
-   * Remove ExecuteHolder from this session.
+   * Remove an operation ID from this session.
    *
-   * Called only by SparkConnectExecutionManager under executionsLock.
+   * Called only by SparkConnectExecutionManager when an execution is ended.
    */
-  @GuardedBy("SparkConnectService.executionManager.executionsLock")
-  private[service] def removeExecuteHolder(operationId: String): Unit = {
-    executions.remove(operationId)
-  }
-
-  private[connect] def executeHolder(operationId: String): Option[ExecuteHolder] = {
-    Option(executions.get(operationId))
+  private[service] def removeOperationId(operationId: String): Unit = {
+    operationIds.remove(operationId)
   }
 
   /**
@@ -183,9 +175,12 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
     val interruptedIds = new mutable.ArrayBuffer[String]()
     val operationsIds =
       SparkConnectService.streamingSessionManager.cleanupRunningQueries(this, blocking = false)
-    executions.asScala.values.foreach { execute =>
-      if (execute.interrupt()) {
-        interruptedIds += execute.operationId
+    operationIds.asScala.foreach { case (operationId, _) =>
+      val executeKey = ExecuteKey(userId, sessionId, operationId)
+      SparkConnectService.executionManager.getExecuteHolder(executeKey).foreach { executeHolder =>
+        if (executeHolder.interrupt()) {
+          interruptedIds += operationId
+        }
       }
     }
     interruptedIds.toSeq ++ operationsIds
@@ -200,10 +195,13 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
     val interruptedIds = new mutable.ArrayBuffer[String]()
     val queries = SparkConnectService.streamingSessionManager.getTaggedQuery(tag, session)
     queries.foreach(q => Future(q.query.stop())(ExecutionContext.global))
-    executions.asScala.values.foreach { execute =>
-      if (execute.sparkSessionTags.contains(tag)) {
-        if (execute.interrupt()) {
-          interruptedIds += execute.operationId
+    operationIds.asScala.foreach { case (operationId, _) =>
+      val executeKey = ExecuteKey(userId, sessionId, operationId)
+      SparkConnectService.executionManager.getExecuteHolder(executeKey).foreach { executeHolder =>
+        if (executeHolder.sparkSessionTags.contains(tag)) {
+          if (executeHolder.interrupt()) {
+            interruptedIds += operationId
+          }
         }
       }
     }
@@ -217,9 +215,10 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[service] def interruptOperation(operationId: String): Seq[String] = {
     val interruptedIds = new mutable.ArrayBuffer[String]()
-    Option(executions.get(operationId)).foreach { execute =>
-      if (execute.interrupt()) {
-        interruptedIds += execute.operationId
+    val executeKey = ExecuteKey(userId, sessionId, operationId)
+    SparkConnectService.executionManager.getExecuteHolder(executeKey).foreach { executeHolder =>
+      if (executeHolder.interrupt()) {
+        interruptedIds += operationId
       }
     }
     interruptedIds.toSeq
@@ -290,8 +289,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
     closedTimeMs = Some(System.currentTimeMillis())
 
     if (Utils.isTesting && eventManager.status == SessionStatus.Pending) {
-      // Testing-only: Some sessions created by SessionHolder.forTesting are not fully initialized
-      // and can't be closed.
+      // Testing-only: Some sessions created by SparkConnectTestUtils.createDummySessionHolder are
+      // not fully initialized and can't be closed.
       return
     }
 
@@ -445,8 +444,8 @@ case class SessionHolder(userId: String, sessionId: String, session: SparkSessio
    */
   private[connect] def usePlanCache(rel: proto.Relation, cachePlan: Boolean)(
       transform: proto.Relation => LogicalPlan): LogicalPlan = {
-    val planCacheEnabled =
-      Option(session).forall(_.conf.get(Connect.CONNECT_SESSION_PLAN_CACHE_ENABLED, true))
+    val planCacheEnabled = Option(session)
+      .forall(_.sessionState.conf.getConf(Connect.CONNECT_SESSION_PLAN_CACHE_ENABLED, true))
     // We only cache plans that have a plan ID.
     val hasPlanId = rel.hasCommon && rel.getCommon.hasPlanId
 
@@ -489,17 +488,6 @@ object SessionHolder {
 
   // The maximum time for an error to stay in the cache.
   private val ERROR_CACHE_TIMEOUT_SEC = 60
-
-  /** Creates a dummy session holder for use in tests. */
-  def forTesting(session: SparkSession): SessionHolder = {
-    val ret =
-      SessionHolder(
-        userId = "testUser",
-        sessionId = UUID.randomUUID().toString,
-        session = session)
-    SparkConnectService.sessionManager.putSessionForTesting(ret)
-    ret
-  }
 }
 
 /** Basic information about SessionHolder. */
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
similarity index 99%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
index b0d9337c64489..72403016404c8 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAddArtifactsHandler.scala
@@ -30,8 +30,8 @@ import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.{AddArtifactsRequest, AddArtifactsResponse}
 import org.apache.spark.connect.proto.AddArtifactsResponse.ArtifactSummary
 import org.apache.spark.sql.artifact.ArtifactManager
-import org.apache.spark.sql.artifact.util.ArtifactUtils
 import org.apache.spark.sql.connect.utils.ErrorUtils
+import org.apache.spark.sql.util.ArtifactUtils
 import org.apache.spark.util.Utils
 
 /**
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectAnalyzeHandler.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectArtifactStatusesHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectArtifactStatusesHandler.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectArtifactStatusesHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectArtifactStatusesHandler.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
similarity index 87%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
index b146f164ae492..c5e484e022bc4 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectConfigHandler.scala
@@ -34,34 +34,38 @@ class SparkConnectConfigHandler(responseObserver: StreamObserver[proto.ConfigRes
       case true => Some(request.getClientObservedServerSideSessionId)
       case false => None
     }
-    val holder =
+    doHandle(
+      request,
       SparkConnectService
         .getOrCreateIsolatedSession(
           request.getUserContext.getUserId,
           request.getSessionId,
-          previousSessionId)
-    val session = holder.session
+          previousSessionId))
+  }
 
-    val builder = request.getOperation.getOpTypeCase match {
+  private def doHandle(r: proto.ConfigRequest, h: SessionHolder): Unit = h.withSession { s =>
+    // Make sure we're using the current running session.
+    val builder = r.getOperation.getOpTypeCase match {
       case proto.ConfigRequest.Operation.OpTypeCase.SET =>
-        handleSet(request.getOperation.getSet, session.conf)
+        handleSet(r.getOperation.getSet, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.GET =>
-        handleGet(request.getOperation.getGet, session.conf)
+        handleGet(r.getOperation.getGet, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.GET_WITH_DEFAULT =>
-        handleGetWithDefault(request.getOperation.getGetWithDefault, session.conf)
+        handleGetWithDefault(r.getOperation.getGetWithDefault, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.GET_OPTION =>
-        handleGetOption(request.getOperation.getGetOption, session.conf)
+        handleGetOption(r.getOperation.getGetOption, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.GET_ALL =>
-        handleGetAll(request.getOperation.getGetAll, session.conf)
+        handleGetAll(r.getOperation.getGetAll, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.UNSET =>
-        handleUnset(request.getOperation.getUnset, session.conf)
+        handleUnset(r.getOperation.getUnset, s.conf)
       case proto.ConfigRequest.Operation.OpTypeCase.IS_MODIFIABLE =>
-        handleIsModifiable(request.getOperation.getIsModifiable, session.conf)
-      case _ => throw new UnsupportedOperationException(s"${request.getOperation} not supported.")
+        handleIsModifiable(r.getOperation.getIsModifiable, s.conf)
+      case _ =>
+        throw new UnsupportedOperationException(s"${r.getOperation} not supported.")
     }
 
-    builder.setSessionId(request.getSessionId)
-    builder.setServerSideSessionId(holder.serverSessionId)
+    builder.setSessionId(r.getSessionId)
+    builder.setServerSideSessionId(h.serverSessionId)
     responseObserver.onNext(builder.build())
     responseObserver.onCompleted()
   }
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala
similarity index 82%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala
index 1ab5f26f90b13..73a20e448be87 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutePlanHandler.scala
@@ -31,6 +31,15 @@ class SparkConnectExecutePlanHandler(responseObserver: StreamObserver[proto.Exec
     try {
       executeHolder.eventsManager.postStarted()
       executeHolder.start()
+    } catch {
+      // Errors raised before the execution holder has finished spawning a thread are considered
+      // plan execution failure, and the client should not try reattaching it afterwards.
+      case t: Throwable =>
+        SparkConnectService.executionManager.removeExecuteHolder(executeHolder.key)
+        throw t
+    }
+
+    try {
       val responseSender =
         new ExecuteGrpcResponseSender[proto.ExecutePlanResponse](executeHolder, responseObserver)
       executeHolder.runGrpcResponseSender(responseSender)
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
similarity index 51%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
index 6681a5f509c6e..f750ca6db67a8 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectExecutionManager.scala
@@ -17,10 +17,10 @@
 
 package org.apache.spark.sql.connect.service
 
-import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit}
-import javax.annotation.concurrent.GuardedBy
+import java.util.UUID
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, Executors, ScheduledExecutorService, TimeUnit}
+import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
 
-import scala.collection.mutable
 import scala.concurrent.duration.FiniteDuration
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
@@ -36,6 +36,24 @@ import org.apache.spark.util.ThreadUtils
 // Unique key identifying execution by combination of user, session and operation id
 case class ExecuteKey(userId: String, sessionId: String, operationId: String)
 
+object ExecuteKey {
+  def apply(request: proto.ExecutePlanRequest, sessionHolder: SessionHolder): ExecuteKey = {
+    val operationId = if (request.hasOperationId) {
+      try {
+        UUID.fromString(request.getOperationId).toString
+      } catch {
+        case _: IllegalArgumentException =>
+          throw new SparkSQLException(
+            errorClass = "INVALID_HANDLE.FORMAT",
+            messageParameters = Map("handle" -> request.getOperationId))
+      }
+    } else {
+      UUID.randomUUID().toString
+    }
+    ExecuteKey(sessionHolder.userId, sessionHolder.sessionId, operationId)
+  }
+}
+
 /**
  * Global tracker of all ExecuteHolder executions.
  *
@@ -44,11 +62,9 @@ case class ExecuteKey(userId: String, sessionId: String, operationId: String)
  */
 private[connect] class SparkConnectExecutionManager() extends Logging {
 
-  /** Hash table containing all current executions. Guarded by executionsLock. */
-  @GuardedBy("executionsLock")
-  private val executions: mutable.HashMap[ExecuteKey, ExecuteHolder] =
-    new mutable.HashMap[ExecuteKey, ExecuteHolder]()
-  private val executionsLock = new Object
+  /** Concurrent hash table containing all the current executions. */
+  private val executions: ConcurrentMap[ExecuteKey, ExecuteHolder] =
+    new ConcurrentHashMap[ExecuteKey, ExecuteHolder]()
 
   /** Graveyard of tombstones of executions that were abandoned and removed. */
   private val abandonedTombstones = CacheBuilder
@@ -56,12 +72,12 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     .maximumSize(SparkEnv.get.conf.get(CONNECT_EXECUTE_MANAGER_ABANDONED_TOMBSTONES_SIZE))
     .build[ExecuteKey, ExecuteInfo]()
 
-  /** None if there are no executions. Otherwise, the time when the last execution was removed. */
-  @GuardedBy("executionsLock")
-  private var lastExecutionTimeMs: Option[Long] = Some(System.currentTimeMillis())
+  /** The time when the last execution was removed. */
+  private var lastExecutionTimeMs: AtomicLong = new AtomicLong(System.currentTimeMillis())
 
   /** Executor for the periodic maintenance */
-  private var scheduledExecutor: Option[ScheduledExecutorService] = None
+  private var scheduledExecutor: AtomicReference[ScheduledExecutorService] =
+    new AtomicReference[ScheduledExecutorService]()
 
   /**
    * Create a new ExecuteHolder and register it with this global manager and with its session.
@@ -76,27 +92,30 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
         request.getUserContext.getUserId,
         request.getSessionId,
         previousSessionId)
-    val executeHolder = new ExecuteHolder(request, sessionHolder)
-    executionsLock.synchronized {
-      // Check if the operation already exists, both in active executions, and in the graveyard
-      // of tombstones of executions that have been abandoned.
-      // The latter is to prevent double execution when a client retries execution, thinking it
-      // never reached the server, but in fact it did, and already got removed as abandoned.
-      if (executions.get(executeHolder.key).isDefined) {
-        throw new SparkSQLException(
-          errorClass = "INVALID_HANDLE.OPERATION_ALREADY_EXISTS",
-          messageParameters = Map("handle" -> executeHolder.operationId))
-      }
-      if (getAbandonedTombstone(executeHolder.key).isDefined) {
-        throw new SparkSQLException(
-          errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
-          messageParameters = Map("handle" -> executeHolder.operationId))
-      }
-      sessionHolder.addExecuteHolder(executeHolder)
-      executions.put(executeHolder.key, executeHolder)
-      lastExecutionTimeMs = None
-      logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, executeHolder.key)} is created.")
-    }
+    val executeKey = ExecuteKey(request, sessionHolder)
+    val executeHolder = executions.compute(
+      executeKey,
+      (executeKey, oldExecuteHolder) => {
+        // Check if the operation already exists, either in the active execution map, or in the
+        // graveyard of tombstones of executions that have been abandoned. The latter is to prevent
+        // double executions when the client retries, thinking it never reached the server, but in
+        // fact it did, and already got removed as abandoned.
+        if (oldExecuteHolder != null) {
+          throw new SparkSQLException(
+            errorClass = "INVALID_HANDLE.OPERATION_ALREADY_EXISTS",
+            messageParameters = Map("handle" -> executeKey.operationId))
+        }
+        if (getAbandonedTombstone(executeKey).isDefined) {
+          throw new SparkSQLException(
+            errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
+            messageParameters = Map("handle" -> executeKey.operationId))
+        }
+        new ExecuteHolder(executeKey, request, sessionHolder)
+      })
+
+    sessionHolder.addOperationId(executeHolder.operationId)
+
+    logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, executeHolder.key)} is created.")
 
     schedulePeriodicChecks() // Starts the maintenance thread if it hasn't started.
 
@@ -108,48 +127,46 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
    * execution if still running, free all resources.
    */
   private[connect] def removeExecuteHolder(key: ExecuteKey, abandoned: Boolean = false): Unit = {
-    var executeHolder: Option[ExecuteHolder] = None
-    executionsLock.synchronized {
-      executeHolder = executions.remove(key)
-      executeHolder.foreach { e =>
-        // Put into abandonedTombstones under lock, so that if it's accessed it will end up
-        // with INVALID_HANDLE.OPERATION_ABANDONED error.
-        if (abandoned) {
-          abandonedTombstones.put(key, e.getExecuteInfo)
-        }
-        e.sessionHolder.removeExecuteHolder(e.operationId)
-      }
-      if (executions.isEmpty) {
-        lastExecutionTimeMs = Some(System.currentTimeMillis())
-      }
-      logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, key)} is removed.")
+    val executeHolder = executions.get(key)
+
+    if (executeHolder == null) {
+      return
     }
-    // close the execution outside the lock
-    executeHolder.foreach { e =>
-      e.close()
-      if (abandoned) {
-        // Update in abandonedTombstones: above it wasn't yet updated with closedTime etc.
-        abandonedTombstones.put(key, e.getExecuteInfo)
-      }
+
+    // Put into abandonedTombstones before removing it from executions, so that the client ends up
+    // getting an INVALID_HANDLE.OPERATION_ABANDONED error on a retry.
+    if (abandoned) {
+      abandonedTombstones.put(key, executeHolder.getExecuteInfo)
+    }
+
+    // Remove the execution from the map *after* putting it in abandonedTombstones.
+    executions.remove(key)
+    executeHolder.sessionHolder.removeOperationId(executeHolder.operationId)
+
+    updateLastExecutionTime()
+
+    logInfo(log"ExecuteHolder ${MDC(LogKeys.EXECUTE_KEY, key)} is removed.")
+
+    executeHolder.close()
+    if (abandoned) {
+      // Update in abandonedTombstones: above it wasn't yet updated with closedTime etc.
+      abandonedTombstones.put(key, executeHolder.getExecuteInfo)
     }
   }
 
   private[connect] def getExecuteHolder(key: ExecuteKey): Option[ExecuteHolder] = {
-    executionsLock.synchronized {
-      executions.get(key)
-    }
+    Option(executions.get(key))
   }
 
   private[connect] def removeAllExecutionsForSession(key: SessionKey): Unit = {
-    val sessionExecutionHolders = executionsLock.synchronized {
-      executions.filter(_._2.sessionHolder.key == key)
-    }
-    sessionExecutionHolders.foreach { case (_, executeHolder) =>
-      val info = executeHolder.getExecuteInfo
-      logInfo(
-        log"Execution ${MDC(LogKeys.EXECUTE_INFO, info)} removed in removeSessionExecutions.")
-      removeExecuteHolder(executeHolder.key, abandoned = true)
-    }
+    executions.forEach((_, executeHolder) => {
+      if (executeHolder.sessionHolder.key == key) {
+        val info = executeHolder.getExecuteInfo
+        logInfo(
+          log"Execution ${MDC(LogKeys.EXECUTE_INFO, info)} removed in removeSessionExecutions.")
+        removeExecuteHolder(executeHolder.key, abandoned = true)
+      }
+    })
   }
 
   /** Get info about abandoned execution, if there is one. */
@@ -161,11 +178,11 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
    * If there are no executions, return Left with System.currentTimeMillis of last active
    * execution. Otherwise return Right with list of ExecuteInfo of all executions.
    */
-  def listActiveExecutions: Either[Long, Seq[ExecuteInfo]] = executionsLock.synchronized {
+  def listActiveExecutions: Either[Long, Seq[ExecuteInfo]] = {
     if (executions.isEmpty) {
-      Left(lastExecutionTimeMs.get)
+      Left(lastExecutionTimeMs.getAcquire())
     } else {
-      Right(executions.values.map(_.getExecuteInfo).toBuffer.toSeq)
+      Right(executions.values().asScala.map(_.getExecuteInfo).toBuffer.toSeq)
     }
   }
 
@@ -177,17 +194,24 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
     abandonedTombstones.asMap.asScala.values.toSeq
   }
 
-  private[connect] def shutdown(): Unit = executionsLock.synchronized {
-    scheduledExecutor.foreach { executor =>
+  private[connect] def shutdown(): Unit = {
+    val executor = scheduledExecutor.getAndSet(null)
+    if (executor != null) {
       ThreadUtils.shutdown(executor, FiniteDuration(1, TimeUnit.MINUTES))
     }
-    scheduledExecutor = None
+
     // note: this does not cleanly shut down the executions, but the server is shutting down.
     executions.clear()
     abandonedTombstones.invalidateAll()
-    if (lastExecutionTimeMs.isEmpty) {
-      lastExecutionTimeMs = Some(System.currentTimeMillis())
-    }
+
+    updateLastExecutionTime()
+  }
+
+  /**
+   * Updates the last execution time after the last execution has been removed.
+   */
+  private def updateLastExecutionTime(): Unit = {
+    lastExecutionTimeMs.getAndUpdate(prev => prev.max(System.currentTimeMillis()))
   }
 
   /**
@@ -195,16 +219,16 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
    * for executions that have not been closed, but are left with no RPC attached to them, and
    * removes them after a timeout.
    */
-  private def schedulePeriodicChecks(): Unit = executionsLock.synchronized {
-    scheduledExecutor match {
-      case Some(_) => // Already running.
-      case None =>
+  private def schedulePeriodicChecks(): Unit = {
+    var executor = scheduledExecutor.getAcquire()
+    if (executor == null) {
+      executor = Executors.newSingleThreadScheduledExecutor()
+      if (scheduledExecutor.compareAndExchangeRelease(null, executor) == null) {
         val interval = SparkEnv.get.conf.get(CONNECT_EXECUTE_MANAGER_MAINTENANCE_INTERVAL)
         logInfo(
           log"Starting thread for cleanup of abandoned executions every " +
             log"${MDC(LogKeys.INTERVAL, interval)} ms")
-        scheduledExecutor = Some(Executors.newSingleThreadScheduledExecutor())
-        scheduledExecutor.get.scheduleAtFixedRate(
+        executor.scheduleAtFixedRate(
           () => {
             try {
               val timeout = SparkEnv.get.conf.get(CONNECT_EXECUTE_MANAGER_DETACHED_TIMEOUT)
@@ -216,50 +240,44 @@ private[connect] class SparkConnectExecutionManager() extends Logging {
           interval,
           interval,
           TimeUnit.MILLISECONDS)
+      }
     }
   }
 
   // Visible for testing.
   private[connect] def periodicMaintenance(timeout: Long): Unit = {
+    // Find any detached executions that expired and should be removed.
     logInfo("Started periodic run of SparkConnectExecutionManager maintenance.")
 
-    // Find any detached executions that expired and should be removed.
-    val toRemove = new mutable.ArrayBuffer[ExecuteHolder]()
-    executionsLock.synchronized {
-      val nowMs = System.currentTimeMillis()
-
-      executions.values.foreach { executeHolder =>
-        executeHolder.lastAttachedRpcTimeMs match {
-          case Some(detached) =>
-            if (detached + timeout <= nowMs) {
-              toRemove += executeHolder
-            }
-          case _ => // execution is active
-        }
+    val nowMs = System.currentTimeMillis()
+    executions.forEach((_, executeHolder) => {
+      executeHolder.lastAttachedRpcTimeMs match {
+        case Some(detached) =>
+          if (detached + timeout <= nowMs) {
+            val info = executeHolder.getExecuteInfo
+            logInfo(
+              log"Found execution ${MDC(LogKeys.EXECUTE_INFO, info)} that was abandoned " +
+                log"and expired and will be removed.")
+            removeExecuteHolder(executeHolder.key, abandoned = true)
+          }
+        case _ => // execution is active
       }
-    }
-    // .. and remove them.
-    toRemove.foreach { executeHolder =>
-      val info = executeHolder.getExecuteInfo
-      logInfo(
-        log"Found execution ${MDC(LogKeys.EXECUTE_INFO, info)} that was abandoned " +
-          log"and expired and will be removed.")
-      removeExecuteHolder(executeHolder.key, abandoned = true)
-    }
+    })
+
     logInfo("Finished periodic run of SparkConnectExecutionManager maintenance.")
   }
 
   // For testing.
-  private[connect] def setAllRPCsDeadline(deadlineMs: Long) = executionsLock.synchronized {
-    executions.values.foreach(_.setGrpcResponseSendersDeadline(deadlineMs))
+  private[connect] def setAllRPCsDeadline(deadlineMs: Long) = {
+    executions.values().asScala.foreach(_.setGrpcResponseSendersDeadline(deadlineMs))
   }
 
   // For testing.
-  private[connect] def interruptAllRPCs() = executionsLock.synchronized {
-    executions.values.foreach(_.interruptGrpcResponseSenders())
+  private[connect] def interruptAllRPCs() = {
+    executions.values().asScala.foreach(_.interruptGrpcResponseSenders())
   }
 
-  private[connect] def listExecuteHolders: Seq[ExecuteHolder] = executionsLock.synchronized {
-    executions.values.toSeq
+  private[connect] def listExecuteHolders: Seq[ExecuteHolder] = {
+    executions.values().asScala.toSeq
   }
 }
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectFetchErrorDetailsHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectFetchErrorDetailsHandler.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectFetchErrorDetailsHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectFetchErrorDetailsHandler.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterceptorRegistry.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectInterruptHandler.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala
similarity index 82%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala
index c6baad72ee181..445f40d25edcd 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListener.scala
@@ -17,7 +17,8 @@
 
 package org.apache.spark.sql.connect.service
 
-import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, CountDownLatch}
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
+import java.util.concurrent.atomic.AtomicReference
 
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
@@ -29,6 +30,7 @@ import org.apache.spark.connect.proto.StreamingQueryEventType
 import org.apache.spark.connect.proto.StreamingQueryListenerEvent
 import org.apache.spark.connect.proto.StreamingQueryListenerEventsResult
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.connect.execution.ExecuteResponseObserver
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.util.ArrayImplicits._
 
@@ -40,9 +42,8 @@ private[sql] class ServerSideListenerHolder(val sessionHolder: SessionHolder) {
   // The server side listener that is responsible to stream streaming query events back to client.
   // There is only one listener per sessionHolder, but each listener is responsible for all events
   // of all streaming queries in the SparkSession.
-  var streamingQueryServerSideListener: Option[SparkConnectListenerBusListener] = None
-  // The count down latch to hold the long-running listener thread before sending ResultComplete.
-  var streamingQueryListenerLatch = new CountDownLatch(1)
+  var streamingQueryServerSideListener: AtomicReference[SparkConnectListenerBusListener] =
+    new AtomicReference()
   // The cache for QueryStartedEvent, key is query runId and value is the actual QueryStartedEvent.
   // Events for corresponding query will be sent back to client with
   // the WriteStreamOperationStart response, so that the client can handle the event before
@@ -51,10 +52,8 @@ private[sql] class ServerSideListenerHolder(val sessionHolder: SessionHolder) {
   val streamingQueryStartedEventCache
       : ConcurrentMap[String, StreamingQueryListener.QueryStartedEvent] = new ConcurrentHashMap()
 
-  val lock = new Object()
-
-  def isServerSideListenerRegistered: Boolean = lock.synchronized {
-    streamingQueryServerSideListener.isDefined
+  def isServerSideListenerRegistered: Boolean = {
+    streamingQueryServerSideListener.getAcquire() != null
   }
 
   /**
@@ -66,27 +65,25 @@ private[sql] class ServerSideListenerHolder(val sessionHolder: SessionHolder) {
    * @param responseObserver
    *   the responseObserver created from the first long running executeThread.
    */
-  def init(responseObserver: StreamObserver[ExecutePlanResponse]): Unit = lock.synchronized {
+  def init(responseObserver: StreamObserver[ExecutePlanResponse]): Unit = {
     val serverListener = new SparkConnectListenerBusListener(this, responseObserver)
     sessionHolder.session.streams.addListener(serverListener)
-    streamingQueryServerSideListener = Some(serverListener)
-    streamingQueryListenerLatch = new CountDownLatch(1)
+    streamingQueryServerSideListener.setRelease(serverListener)
   }
 
   /**
    * The cleanup of the server side listener and related resources. This method is called when the
    * REMOVE_LISTENER_BUS_LISTENER command is received or when responseObserver.onNext throws an
-   * exception. It removes the listener from the session, clears the cache. Also it counts down
-   * the latch, so the long-running thread can proceed to send back the final ResultComplete
-   * response.
+   * exception. It removes the listener from the session, clears the cache. Also it sends back the
+   * final ResultComplete response.
    */
-  def cleanUp(): Unit = lock.synchronized {
-    streamingQueryServerSideListener.foreach { listener =>
+  def cleanUp(): Unit = {
+    var listener = streamingQueryServerSideListener.getAndSet(null)
+    if (listener != null) {
       sessionHolder.session.streams.removeListener(listener)
+      listener.sendResultComplete()
+      streamingQueryStartedEventCache.clear()
     }
-    streamingQueryStartedEventCache.clear()
-    streamingQueryServerSideListener = None
-    streamingQueryListenerLatch.countDown()
   }
 }
 
@@ -104,8 +101,6 @@ private[sql] class SparkConnectListenerBusListener(
   val sessionHolder = serverSideListenerHolder.sessionHolder
   // The method used to stream back the events to the client.
   // The event is serialized to json and sent to the client.
-  // The responseObserver is what of the first executeThread (long running thread),
-  // which is held still by the streamingQueryListenerLatch.
   // If any exception is thrown while transmitting back the event, the listener is removed,
   // all related sources are cleaned up, and the long-running thread will proceed to send
   // the final ResultComplete response.
@@ -141,6 +136,16 @@ private[sql] class SparkConnectListenerBusListener(
     }
   }
 
+  def sendResultComplete(): Unit = {
+    responseObserver
+      .asInstanceOf[ExecuteResponseObserver[ExecutePlanResponse]]
+      .onNextComplete(
+        ExecutePlanResponse
+          .newBuilder()
+          .setResultComplete(ExecutePlanResponse.ResultComplete.newBuilder().build())
+          .build())
+  }
+
   // QueryStartedEvent is sent to client along with WriteStreamOperationStartResult
   override def onQueryStarted(event: StreamingQueryListener.QueryStartedEvent): Unit = {
     serverSideListenerHolder.streamingQueryStartedEventCache.put(event.runId.toString, event)
@@ -155,9 +160,11 @@ private[sql] class SparkConnectListenerBusListener(
   }
 
   override def onQueryTerminated(event: StreamingQueryListener.QueryTerminatedEvent): Unit = {
-    logDebug(
-      s"[SessionId: ${sessionHolder.sessionId}][UserId: ${sessionHolder.userId}] " +
-        s"Sending QueryTerminatedEvent to client, id: ${event.id} runId: ${event.runId}.")
+    logInfo(
+      log"[SessionId: ${MDC(LogKeys.SESSION_ID, sessionHolder.sessionId)}]" +
+        log"[UserId: ${MDC(LogKeys.USER_ID, sessionHolder.userId)}] " +
+        log"Sending QueryTerminatedEvent to client, id: ${MDC(LogKeys.QUERY_ID, event.id)} " +
+        log"runId: ${MDC(LogKeys.QUERY_RUN_ID, event.runId)}.")
     send(event.json, StreamingQueryEventType.QUERY_TERMINATED_EVENT)
   }
 
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
similarity index 70%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
index 534937f84eaee..a2696311bd843 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala
@@ -23,6 +23,7 @@ import org.apache.spark.SparkSQLException
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.connect.execution.ExecuteGrpcResponseSender
+import org.apache.spark.sql.connect.service.ExecuteKey
 
 class SparkConnectReattachExecuteHandler(
     responseObserver: StreamObserver[proto.ExecutePlanResponse])
@@ -38,22 +39,24 @@ class SparkConnectReattachExecuteHandler(
         SessionKey(v.getUserContext.getUserId, v.getSessionId),
         previousSessionId)
 
-    val executeHolder = sessionHolder.executeHolder(v.getOperationId).getOrElse {
-      if (SparkConnectService.executionManager
-          .getAbandonedTombstone(
-            ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId))
-          .isDefined) {
-        logDebug(s"Reattach operation abandoned: ${v.getOperationId}")
-        throw new SparkSQLException(
-          errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
-          messageParameters = Map("handle" -> v.getOperationId))
-      } else {
-        logDebug(s"Reattach operation not found: ${v.getOperationId}")
-        throw new SparkSQLException(
-          errorClass = "INVALID_HANDLE.OPERATION_NOT_FOUND",
-          messageParameters = Map("handle" -> v.getOperationId))
+    val executeKey = ExecuteKey(sessionHolder.userId, sessionHolder.sessionId, v.getOperationId)
+    val executeHolder =
+      SparkConnectService.executionManager.getExecuteHolder(executeKey).getOrElse {
+        if (SparkConnectService.executionManager
+            .getAbandonedTombstone(
+              ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId))
+            .isDefined) {
+          logDebug(s"Reattach operation abandoned: ${v.getOperationId}")
+          throw new SparkSQLException(
+            errorClass = "INVALID_HANDLE.OPERATION_ABANDONED",
+            messageParameters = Map("handle" -> v.getOperationId))
+        } else {
+          logDebug(s"Reattach operation not found: ${v.getOperationId}")
+          throw new SparkSQLException(
+            errorClass = "INVALID_HANDLE.OPERATION_NOT_FOUND",
+            messageParameters = Map("handle" -> v.getOperationId))
+        }
       }
-    }
     if (!executeHolder.reattachable) {
       logWarning(s"Reattach to not reattachable operation.")
       throw new SparkSQLException(
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala
similarity index 91%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala
index a2dbf3b2eec9f..6beba13d55156 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseExecuteHandler.scala
@@ -22,6 +22,7 @@ import io.grpc.stub.StreamObserver
 import org.apache.spark.SparkSQLException
 import org.apache.spark.connect.proto
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.connect.service.ExecuteKey
 
 class SparkConnectReleaseExecuteHandler(
     responseObserver: StreamObserver[proto.ReleaseExecuteResponse])
@@ -42,8 +43,9 @@ class SparkConnectReleaseExecuteHandler(
     // ReleaseExecute arrived after it was abandoned and timed out.
     // An asynchronous ReleastUntil operation may also arrive after ReleaseAll.
     // Because of that, make it noop and not fail if the ExecuteHolder is no longer there.
+    val executeKey = ExecuteKey(sessionHolder.userId, sessionHolder.sessionId, v.getOperationId)
     val executeHolderOption =
-      sessionHolder.executeHolder(v.getOperationId).foreach { executeHolder =>
+      SparkConnectService.executionManager.getExecuteHolder(executeKey).foreach { executeHolder =>
         if (!executeHolder.reattachable) {
           throw new SparkSQLException(
             errorClass = "INVALID_CURSOR.NOT_REATTACHABLE",
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReleaseSessionHandler.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
similarity index 87%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
index 4f05ea927e12b..b2c4d1abb17b4 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectServer.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.connect.service
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * The Spark Connect server
@@ -28,7 +29,11 @@ object SparkConnectServer extends Logging {
   def main(args: Array[String]): Unit = {
     // Set the active Spark Session, and starts SparkEnv instance (via Spark Context)
     logInfo("Starting Spark session.")
-    val session = SparkSession.builder().getOrCreate()
+    val session = SparkSession
+      .builder()
+      .config(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED.key, true)
+      .config(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER.key, true)
+      .getOrCreate()
     try {
       try {
         SparkConnectService.start(session.sparkContext)
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
similarity index 93%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
index e9c92f8d007ea..e62c19b66c8e5 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala
@@ -22,16 +22,16 @@ import java.util.concurrent.TimeUnit
 
 import scala.jdk.CollectionConverters._
 
-import com.google.protobuf.MessageLite
+import com.google.protobuf.Message
 import io.grpc.{BindableService, MethodDescriptor, Server, ServerMethodDefinition, ServerServiceDefinition}
 import io.grpc.MethodDescriptor.PrototypeMarshaller
 import io.grpc.netty.NettyServerBuilder
-import io.grpc.protobuf.lite.ProtoLiteUtils
+import io.grpc.protobuf.ProtoUtils
 import io.grpc.protobuf.services.ProtoReflectionService
 import io.grpc.stub.StreamObserver
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
+import org.apache.spark.{SparkContext, SparkEnv}
 import org.apache.spark.connect.proto
 import org.apache.spark.connect.proto.{AddArtifactsRequest, AddArtifactsResponse, SparkConnectServiceGrpc}
 import org.apache.spark.connect.proto.SparkConnectServiceGrpc.AsyncService
@@ -231,20 +231,20 @@ class SparkConnectService(debug: Boolean) extends AsyncService with BindableServ
     }
   }
 
-  private def methodWithCustomMarshallers(methodDesc: MethodDescriptor[MessageLite, MessageLite])
-      : MethodDescriptor[MessageLite, MessageLite] = {
+  private def methodWithCustomMarshallers(
+      methodDesc: MethodDescriptor[Message, Message]): MethodDescriptor[Message, Message] = {
     val recursionLimit =
       SparkEnv.get.conf.get(CONNECT_GRPC_MARSHALLER_RECURSION_LIMIT)
     val requestMarshaller =
-      ProtoLiteUtils.marshallerWithRecursionLimit(
+      ProtoUtils.marshallerWithRecursionLimit(
         methodDesc.getRequestMarshaller
-          .asInstanceOf[PrototypeMarshaller[MessageLite]]
+          .asInstanceOf[PrototypeMarshaller[Message]]
           .getMessagePrototype,
         recursionLimit)
     val responseMarshaller =
-      ProtoLiteUtils.marshallerWithRecursionLimit(
+      ProtoUtils.marshallerWithRecursionLimit(
         methodDesc.getResponseMarshaller
-          .asInstanceOf[PrototypeMarshaller[MessageLite]]
+          .asInstanceOf[PrototypeMarshaller[Message]]
           .getMessagePrototype,
         recursionLimit)
     methodDesc.toBuilder
@@ -259,17 +259,18 @@ class SparkConnectService(debug: Boolean) extends AsyncService with BindableServ
 
     // Create a new ServerServiceDefinition builder
     // using the name of the original service definition.
-    val builder = io.grpc.ServerServiceDefinition.builder(serviceDef.getServiceDescriptor.getName)
+    val builder = ServerServiceDefinition.builder(serviceDef.getServiceDescriptor.getName)
 
     // Iterate through all the methods of the original service definition.
     // For each method, add a customized method descriptor (with updated marshallers)
     // and the original server call handler to the builder.
     serviceDef.getMethods.asScala
-      .asInstanceOf[Iterable[ServerMethodDefinition[MessageLite, MessageLite]]]
-      .foreach(method =>
+      .asInstanceOf[Iterable[ServerMethodDefinition[Message, Message]]]
+      .foreach { method =>
         builder.addMethod(
           methodWithCustomMarshallers(method.getMethodDescriptor),
-          method.getServerCallHandler))
+          method.getServerCallHandler)
+      }
 
     // Build the final ServerServiceDefinition and return it.
     builder.build()
@@ -344,7 +345,7 @@ object SparkConnectService extends Logging {
     val kvStore = sc.statusStore.store.asInstanceOf[ElementTrackingStore]
     listener = new SparkConnectServerListener(kvStore, sc.conf)
     sc.listenerBus.addToStatusQueue(listener)
-    uiTab = if (sc.getConf.get(UI_ENABLED)) {
+    uiTab = if (sc.conf.get(UI_ENABLED)) {
       Some(
         new SparkConnectServerTab(
           new SparkConnectServerAppStatusStore(kvStore),
@@ -419,7 +420,7 @@ object SparkConnectService extends Logging {
 
     started = true
     stopped = false
-    postSparkConnectServiceStarted(sc)
+    postSparkConnectServiceStarted()
   }
 
   def stop(timeout: Option[Long] = None, unit: Option[TimeUnit] = None): Unit = synchronized {
@@ -455,13 +456,9 @@ object SparkConnectService extends Logging {
    * Post the event that the Spark Connect service has started. This is expected to be called only
    * once after the service is ready.
    */
-  private def postSparkConnectServiceStarted(sc: SparkContext): Unit = {
+  private def postSparkConnectServiceStarted(): Unit = {
     postServiceEvent(isa =>
-      SparkListenerConnectServiceStarted(
-        hostAddress,
-        isa.getPort,
-        sc.conf,
-        System.currentTimeMillis()))
+      SparkListenerConnectServiceStarted(hostAddress, isa.getPort, System.currentTimeMillis()))
   }
 
   /**
@@ -520,15 +517,12 @@ object SparkConnectService extends Logging {
  *   The host address of the started Spark Connect service.
  * @param bindingPort:
  *   The binding port of the started Spark Connect service.
- * @param sparkConf:
- *   The SparkConf of the active SparkContext that associated with the service.
  * @param eventTime:
  *   The time in ms when the event was generated.
  */
 case class SparkListenerConnectServiceStarted(
     hostAddress: String,
     bindingPort: Int,
-    sparkConf: SparkConf,
     eventTime: Long)
     extends SparkListenerEvent
 
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
similarity index 70%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
index edaaa640bf12e..a306856efa33c 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManager.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.sql.connect.service
 
 import java.util.UUID
-import java.util.concurrent.{Executors, ScheduledExecutorService, TimeUnit}
-import javax.annotation.concurrent.GuardedBy
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, Executors, ScheduledExecutorService, TimeUnit}
+import java.util.concurrent.atomic.AtomicReference
 
-import scala.collection.mutable
 import scala.concurrent.duration.FiniteDuration
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
@@ -40,10 +39,8 @@ import org.apache.spark.util.ThreadUtils
  */
 class SparkConnectSessionManager extends Logging {
 
-  private val sessionsLock = new Object
-
-  @GuardedBy("sessionsLock")
-  private val sessionStore = mutable.HashMap[SessionKey, SessionHolder]()
+  private val sessionStore: ConcurrentMap[SessionKey, SessionHolder] =
+    new ConcurrentHashMap[SessionKey, SessionHolder]()
 
   private val closedSessionsCache =
     CacheBuilder
@@ -52,7 +49,8 @@ class SparkConnectSessionManager extends Logging {
       .build[SessionKey, SessionHolderInfo]()
 
   /** Executor for the periodic maintenance */
-  private var scheduledExecutor: Option[ScheduledExecutorService] = None
+  private var scheduledExecutor: AtomicReference[ScheduledExecutorService] =
+    new AtomicReference[ScheduledExecutorService]()
 
   private def validateSessionId(
       key: SessionKey,
@@ -74,8 +72,6 @@ class SparkConnectSessionManager extends Logging {
     val holder = getSession(
       key,
       Some(() => {
-        // Executed under sessionsState lock in getSession,  to guard against concurrent removal
-        // and insertion into closedSessionsCache.
         validateSessionCreate(key)
         val holder = SessionHolder(key.userId, key.sessionId, newIsolatedSession())
         holder.initializeSession()
@@ -121,43 +117,39 @@ class SparkConnectSessionManager extends Logging {
   private def getSession(key: SessionKey, default: Option[() => SessionHolder]): SessionHolder = {
     schedulePeriodicChecks() // Starts the maintenance thread if it hasn't started yet.
 
-    sessionsLock.synchronized {
-      // try to get existing session from store
-      val sessionOpt = sessionStore.get(key)
-      // create using default if missing
-      val session = sessionOpt match {
-        case Some(s) => s
-        case None =>
-          default match {
-            case Some(callable) =>
-              val session = callable()
-              sessionStore.put(key, session)
-              session
-            case None =>
-              null
-          }
-      }
-      // record access time before returning
-      session match {
-        case null =>
-          null
-        case s: SessionHolder =>
-          s.updateAccessTime()
-          s
-      }
+    // Get the existing session from the store or create a new one.
+    val session = default match {
+      case Some(callable) =>
+        sessionStore.computeIfAbsent(key, _ => callable())
+      case None =>
+        sessionStore.get(key)
+    }
+
+    // Record the access time before returning the session holder.
+    if (session != null) {
+      session.updateAccessTime()
     }
+
+    session
   }
 
   // Removes session from sessionStore and returns it.
   private def removeSessionHolder(key: SessionKey): Option[SessionHolder] = {
     var sessionHolder: Option[SessionHolder] = None
-    sessionsLock.synchronized {
-      sessionHolder = sessionStore.remove(key)
-      sessionHolder.foreach { s =>
-        // Put into closedSessionsCache, so that it cannot get accidentally recreated
-        // by getOrCreateIsolatedSession.
-        closedSessionsCache.put(s.key, s.getSessionHolderInfo)
-      }
+
+    // The session holder should remain in the session store until it is added to the closed session
+    // cache, because of a subtle data race: a new session with the same key can be created if the
+    // closed session cache does not contain the key right after the key has been removed from the
+    // session store.
+    sessionHolder = Option(sessionStore.get(key))
+
+    sessionHolder.foreach { s =>
+      // Put into closedSessionsCache to prevent the same session from being recreated by
+      // getOrCreateIsolatedSession.
+      closedSessionsCache.put(s.key, s.getSessionHolderInfo)
+
+      // Then, remove the session holder from the session store.
+      sessionStore.remove(key)
     }
     sessionHolder
   }
@@ -171,26 +163,26 @@ class SparkConnectSessionManager extends Logging {
 
   def closeSession(key: SessionKey): Unit = {
     val sessionHolder = removeSessionHolder(key)
-    // Rest of the cleanup outside sessionLock - the session cannot be accessed anymore by
-    // getOrCreateIsolatedSession.
+    // Rest of the cleanup: the session cannot be accessed anymore by getOrCreateIsolatedSession.
     sessionHolder.foreach(shutdownSessionHolder(_))
   }
 
-  private[connect] def shutdown(): Unit = sessionsLock.synchronized {
-    scheduledExecutor.foreach { executor =>
+  private[connect] def shutdown(): Unit = {
+    val executor = scheduledExecutor.getAndSet(null)
+    if (executor != null) {
       ThreadUtils.shutdown(executor, FiniteDuration(1, TimeUnit.MINUTES))
     }
-    scheduledExecutor = None
+
     // note: this does not cleanly shut down the sessions, but the server is shutting down.
     sessionStore.clear()
     closedSessionsCache.invalidateAll()
   }
 
-  def listActiveSessions: Seq[SessionHolderInfo] = sessionsLock.synchronized {
-    sessionStore.values.map(_.getSessionHolderInfo).toSeq
+  def listActiveSessions: Seq[SessionHolderInfo] = {
+    sessionStore.values().asScala.map(_.getSessionHolderInfo).toSeq
   }
 
-  def listClosedSessions: Seq[SessionHolderInfo] = sessionsLock.synchronized {
+  def listClosedSessions: Seq[SessionHolderInfo] = {
     closedSessionsCache.asMap.asScala.values.toSeq
   }
 
@@ -199,16 +191,16 @@ class SparkConnectSessionManager extends Logging {
    *
    * The checks are looking to remove sessions that expired.
    */
-  private def schedulePeriodicChecks(): Unit = sessionsLock.synchronized {
-    scheduledExecutor match {
-      case Some(_) => // Already running.
-      case None =>
+  private def schedulePeriodicChecks(): Unit = {
+    var executor = scheduledExecutor.getAcquire()
+    if (executor == null) {
+      executor = Executors.newSingleThreadScheduledExecutor()
+      if (scheduledExecutor.compareAndExchangeRelease(null, executor) == null) {
         val interval = SparkEnv.get.conf.get(CONNECT_SESSION_MANAGER_MAINTENANCE_INTERVAL)
         logInfo(
           log"Starting thread for cleanup of expired sessions every " +
             log"${MDC(INTERVAL, interval)} ms")
-        scheduledExecutor = Some(Executors.newSingleThreadScheduledExecutor())
-        scheduledExecutor.get.scheduleAtFixedRate(
+        executor.scheduleAtFixedRate(
           () => {
             try {
               val defaultInactiveTimeoutMs =
@@ -221,6 +213,7 @@ class SparkConnectSessionManager extends Logging {
           interval,
           interval,
           TimeUnit.MILLISECONDS)
+      }
     }
   }
 
@@ -232,9 +225,8 @@ class SparkConnectSessionManager extends Logging {
   private def periodicMaintenance(
       defaultInactiveTimeoutMs: Long,
       ignoreCustomTimeout: Boolean): Unit = {
-    logInfo("Started periodic run of SparkConnectSessionManager maintenance.")
     // Find any sessions that expired and should be removed.
-    val toRemove = new mutable.ArrayBuffer[SessionHolder]()
+    logInfo("Started periodic run of SparkConnectSessionManager maintenance.")
 
     def shouldExpire(info: SessionHolderInfo, nowMs: Long): Boolean = {
       val timeoutMs = if (info.customInactiveTimeoutMs.isDefined && !ignoreCustomTimeout) {
@@ -246,35 +238,22 @@ class SparkConnectSessionManager extends Logging {
       timeoutMs != -1 && info.lastAccessTimeMs + timeoutMs <= nowMs
     }
 
-    sessionsLock.synchronized {
-      val nowMs = System.currentTimeMillis()
-      sessionStore.values.foreach { sessionHolder =>
-        if (shouldExpire(sessionHolder.getSessionHolderInfo, nowMs)) {
-          toRemove += sessionHolder
-        }
-      }
-    }
-    // .. and remove them.
-    toRemove.foreach { sessionHolder =>
-      // This doesn't use closeSession to be able to do the extra last chance check under lock.
-      val removedSession = sessionsLock.synchronized {
-        // Last chance - check expiration time and remove under lock if expired.
-        val info = sessionHolder.getSessionHolderInfo
-        if (shouldExpire(info, System.currentTimeMillis())) {
-          logInfo(
-            log"Found session ${MDC(SESSION_HOLD_INFO, info)} that expired " +
-              log"and will be closed.")
-          removeSessionHolder(info.key)
-        } else {
-          None
+    val nowMs = System.currentTimeMillis()
+    sessionStore.forEach((_, sessionHolder) => {
+      val info = sessionHolder.getSessionHolderInfo
+      if (shouldExpire(info, nowMs)) {
+        logInfo(
+          log"Found session ${MDC(SESSION_HOLD_INFO, info)} that expired " +
+            log"and will be closed.")
+        removeSessionHolder(info.key)
+        try {
+          shutdownSessionHolder(sessionHolder)
+        } catch {
+          case NonFatal(ex) => logWarning("Unexpected exception closing session", ex)
         }
       }
-      // do shutdown and cleanup outside of lock.
-      try removedSession.foreach(shutdownSessionHolder(_))
-      catch {
-        case NonFatal(ex) => logWarning("Unexpected exception closing session", ex)
-      }
-    }
+    })
+
     logInfo("Finished periodic run of SparkConnectSessionManager maintenance.")
   }
 
@@ -309,7 +288,7 @@ class SparkConnectSessionManager extends Logging {
   /**
    * Used for testing
    */
-  private[connect] def invalidateAllSessions(): Unit = sessionsLock.synchronized {
+  private[connect] def invalidateAllSessions(): Unit = {
     periodicMaintenance(defaultInactiveTimeoutMs = 0L, ignoreCustomTimeout = true)
     assert(sessionStore.isEmpty)
     closedSessionsCache.invalidateAll()
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
similarity index 51%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
index 03719ddd87419..3da2548b456e8 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCache.scala
@@ -17,10 +17,8 @@
 
 package org.apache.spark.sql.connect.service
 
-import java.util.concurrent.Executors
-import java.util.concurrent.ScheduledExecutorService
-import java.util.concurrent.TimeUnit
-import javax.annotation.concurrent.GuardedBy
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap, Executors, ScheduledExecutorService, TimeUnit}
+import java.util.concurrent.atomic.AtomicReference
 
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
@@ -28,7 +26,7 @@ import scala.concurrent.duration.{Duration, DurationInt, FiniteDuration}
 import scala.util.control.NonFatal
 
 import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKeys.{DURATION, NEW_VALUE, OLD_VALUE, QUERY_CACHE_VALUE, QUERY_ID, SESSION_ID}
+import org.apache.spark.internal.LogKeys.{DURATION, NEW_VALUE, OLD_VALUE, QUERY_CACHE_VALUE, QUERY_ID, QUERY_RUN_ID, SESSION_ID}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.util.{Clock, SystemClock, ThreadUtils}
@@ -60,36 +58,34 @@ private[connect] class SparkConnectStreamingQueryCache(
       sessionHolder: SessionHolder,
       query: StreamingQuery,
       tags: Set[String],
-      operationId: String): Unit = queryCacheLock.synchronized {
-    taggedQueriesLock.synchronized {
-      val value = QueryCacheValue(
-        userId = sessionHolder.userId,
-        sessionId = sessionHolder.sessionId,
-        session = sessionHolder.session,
-        query = query,
-        operationId = operationId,
-        expiresAtMs = None)
-
-      val queryKey = QueryCacheKey(query.id.toString, query.runId.toString)
-      tags.foreach { tag =>
-        taggedQueries
-          .getOrElseUpdate(tag, new mutable.ArrayBuffer[QueryCacheKey])
-          .addOne(queryKey)
-      }
-
-      queryCache.put(queryKey, value) match {
-        case Some(existing) => // Query is being replace. Not really expected.
+      operationId: String): Unit = {
+    val value = QueryCacheValue(
+      userId = sessionHolder.userId,
+      sessionId = sessionHolder.sessionId,
+      session = sessionHolder.session,
+      query = query,
+      operationId = operationId,
+      expiresAtMs = None)
+
+    val queryKey = QueryCacheKey(query.id.toString, query.runId.toString)
+    tags.foreach { tag => addTaggedQuery(tag, queryKey) }
+
+    queryCache.compute(
+      queryKey,
+      (key, existing) => {
+        if (existing != null) { // The query is being replaced: allowed, though not expected.
           logWarning(log"Replacing existing query in the cache (unexpected). " +
             log"Query Id: ${MDC(QUERY_ID, query.id)}.Existing value ${MDC(OLD_VALUE, existing)}, " +
             log"new value ${MDC(NEW_VALUE, value)}.")
-        case None =>
+        } else {
           logInfo(
             log"Adding new query to the cache. Query Id ${MDC(QUERY_ID, query.id)}, " +
               log"value ${MDC(QUERY_CACHE_VALUE, value)}.")
-      }
+        }
+        value
+      })
 
-      schedulePeriodicChecks() // Starts the scheduler thread if it hasn't started.
-    }
+    schedulePeriodicChecks() // Start the scheduler thread if it has not been started.
   }
 
   /**
@@ -103,44 +99,35 @@ private[connect] class SparkConnectStreamingQueryCache(
       runId: String,
       tags: Set[String],
       session: SparkSession): Option[QueryCacheValue] = {
-    taggedQueriesLock.synchronized {
-      val key = QueryCacheKey(queryId, runId)
-      val result = getCachedQuery(QueryCacheKey(queryId, runId), session)
-      tags.foreach { tag =>
-        taggedQueries.getOrElseUpdate(tag, new mutable.ArrayBuffer[QueryCacheKey]).addOne(key)
-      }
-      result
-    }
+    val queryKey = QueryCacheKey(queryId, runId)
+    val result = getCachedQuery(QueryCacheKey(queryId, runId), session)
+    tags.foreach { tag => addTaggedQuery(tag, queryKey) }
+    result
   }
 
   /**
    * Similar with [[getCachedQuery]] but it gets queries tagged previously.
    */
   def getTaggedQuery(tag: String, session: SparkSession): Seq[QueryCacheValue] = {
-    taggedQueriesLock.synchronized {
-      taggedQueries
-        .get(tag)
-        .map { k =>
-          k.flatMap(getCachedQuery(_, session)).toSeq
-        }
-        .getOrElse(Seq.empty[QueryCacheValue])
-    }
+    val queryKeySet = Option(taggedQueries.get(tag))
+    queryKeySet
+      .map(_.flatMap(k => getCachedQuery(k, session)))
+      .getOrElse(Seq.empty[QueryCacheValue])
   }
 
   private def getCachedQuery(
       key: QueryCacheKey,
       session: SparkSession): Option[QueryCacheValue] = {
-    queryCacheLock.synchronized {
-      queryCache.get(key).flatMap { v =>
-        if (v.session == session) {
-          v.expiresAtMs.foreach { _ =>
-            // Extend the expiry time as the client is accessing it.
-            val expiresAtMs = clock.getTimeMillis() + stoppedQueryInactivityTimeout.toMillis
-            queryCache.put(key, v.copy(expiresAtMs = Some(expiresAtMs)))
-          }
-          Some(v)
-        } else None // Should be rare, may be client is trying access from a different session.
-      }
+    val value = Option(queryCache.get(key))
+    value.flatMap { v =>
+      if (v.session == session) {
+        v.expiresAtMs.foreach { _ =>
+          // Extend the expiry time as the client is accessing it.
+          val expiresAtMs = clock.getTimeMillis() + stoppedQueryInactivityTimeout.toMillis
+          queryCache.put(key, v.copy(expiresAtMs = Some(expiresAtMs)))
+        }
+        Some(v)
+      } else None // Should be rare, may be client is trying access from a different session.
     }
   }
 
@@ -153,11 +140,12 @@ private[connect] class SparkConnectStreamingQueryCache(
       sessionHolder: SessionHolder,
       blocking: Boolean = true): Seq[String] = {
     val operationIds = new mutable.ArrayBuffer[String]()
-    for ((k, v) <- queryCache) {
+    queryCache.forEach((k, v) => {
       if (v.userId.equals(sessionHolder.userId) && v.sessionId.equals(sessionHolder.sessionId)) {
         if (v.query.isActive && Option(v.session.streams.get(k.queryId)).nonEmpty) {
           logInfo(
-            log"Stopping the query with id ${MDC(QUERY_ID, k.queryId)} " +
+            log"Stopping the query with id: ${MDC(QUERY_ID, k.queryId)} " +
+              log"runId: ${MDC(QUERY_RUN_ID, k.runId)} " +
               log"since the session has timed out")
           try {
             if (blocking) {
@@ -169,49 +157,48 @@ private[connect] class SparkConnectStreamingQueryCache(
           } catch {
             case NonFatal(ex) =>
               logWarning(
-                log"Failed to stop the query ${MDC(QUERY_ID, k.queryId)}. " +
+                log"Failed to stop the with id: ${MDC(QUERY_ID, k.queryId)} " +
+                  log"runId: ${MDC(QUERY_RUN_ID, k.runId)} " +
                   log"Error is ignored.",
                 ex)
           }
         }
       }
-    }
+    })
     operationIds.toSeq
   }
 
   // Visible for testing
   private[service] def getCachedValue(queryId: String, runId: String): Option[QueryCacheValue] =
-    queryCache.get(QueryCacheKey(queryId, runId))
+    Option(queryCache.get(QueryCacheKey(queryId, runId)))
 
   // Visible for testing.
-  private[service] def shutdown(): Unit = queryCacheLock.synchronized {
-    scheduledExecutor.foreach { executor =>
+  private[service] def shutdown(): Unit = {
+    val executor = scheduledExecutor.getAndSet(null)
+    if (executor != null) {
       ThreadUtils.shutdown(executor, FiniteDuration(1, TimeUnit.MINUTES))
     }
-    scheduledExecutor = None
   }
 
-  @GuardedBy("queryCacheLock")
-  private val queryCache = new mutable.HashMap[QueryCacheKey, QueryCacheValue]
-  private val queryCacheLock = new Object
+  private val queryCache: ConcurrentMap[QueryCacheKey, QueryCacheValue] =
+    new ConcurrentHashMap[QueryCacheKey, QueryCacheValue]
 
-  @GuardedBy("queryCacheLock")
-  private val taggedQueries = new mutable.HashMap[String, mutable.ArrayBuffer[QueryCacheKey]]
-  private val taggedQueriesLock = new Object
+  private[service] val taggedQueries: ConcurrentMap[String, QueryCacheKeySet] =
+    new ConcurrentHashMap[String, QueryCacheKeySet]
 
-  @GuardedBy("queryCacheLock")
-  private var scheduledExecutor: Option[ScheduledExecutorService] = None
+  private var scheduledExecutor: AtomicReference[ScheduledExecutorService] =
+    new AtomicReference[ScheduledExecutorService]()
 
   /** Schedules periodic checks if it is not already scheduled */
-  private def schedulePeriodicChecks(): Unit = queryCacheLock.synchronized {
-    scheduledExecutor match {
-      case Some(_) => // Already running.
-      case None =>
+  private def schedulePeriodicChecks(): Unit = {
+    var executor = scheduledExecutor.getAcquire()
+    if (executor == null) {
+      executor = Executors.newSingleThreadScheduledExecutor()
+      if (scheduledExecutor.compareAndExchangeRelease(null, executor) == null) {
         logInfo(
           log"Starting thread for polling streaming sessions " +
             log"every ${MDC(DURATION, sessionPollingPeriod.toMillis)}")
-        scheduledExecutor = Some(Executors.newSingleThreadScheduledExecutor())
-        scheduledExecutor.get.scheduleAtFixedRate(
+        executor.scheduleAtFixedRate(
           () => {
             try periodicMaintenance()
             catch {
@@ -221,61 +208,113 @@ private[connect] class SparkConnectStreamingQueryCache(
           sessionPollingPeriod.toMillis,
           sessionPollingPeriod.toMillis,
           TimeUnit.MILLISECONDS)
+      }
     }
   }
 
+  private def addTaggedQuery(tag: String, queryKey: QueryCacheKey): Unit = {
+    taggedQueries.compute(
+      tag,
+      (k, v) => {
+        if (v == null || !v.addKey(queryKey)) {
+          // Create a new QueryCacheKeySet if the entry is absent or being removed.
+          var keys = mutable.HashSet.empty[QueryCacheKey]
+          keys.add(queryKey)
+          new QueryCacheKeySet(keys = keys)
+        } else {
+          v
+        }
+      })
+  }
+
   /**
    * Periodic maintenance task to do the following:
    *   - Update status of query if it is inactive. Sets an expiry time for such queries
    *   - Drop expired queries from the cache.
    */
-  private def periodicMaintenance(): Unit = taggedQueriesLock.synchronized {
+  private def periodicMaintenance(): Unit = {
+    val nowMs = clock.getTimeMillis()
 
-    queryCacheLock.synchronized {
-      val nowMs = clock.getTimeMillis()
+    queryCache.forEach((k, v) => {
+      val id = k.queryId
+      val runId = k.runId
+      v.expiresAtMs match {
 
-      for ((k, v) <- queryCache) {
-        val id = k.queryId
-        v.expiresAtMs match {
+        case Some(ts) if nowMs >= ts => // Expired. Drop references.
+          logInfo(
+            log"Removing references for id: ${MDC(QUERY_ID, id)} " +
+              log"runId: ${MDC(QUERY_RUN_ID, runId)} in " +
+              log"session ${MDC(SESSION_ID, v.sessionId)} after expiry period")
+          queryCache.remove(k)
 
-          case Some(ts) if nowMs >= ts => // Expired. Drop references.
-            logInfo(
-              log"Removing references for ${MDC(QUERY_ID, id)} in " +
-                log"session ${MDC(SESSION_ID, v.sessionId)} after expiry period")
-            queryCache.remove(k)
+        case Some(_) => // Inactive query waiting for expiration. Do nothing.
+          logInfo(
+            log"Waiting for the expiration for id: ${MDC(QUERY_ID, id)} " +
+              log"runId: ${MDC(QUERY_RUN_ID, runId)} in " +
+              log"session ${MDC(SESSION_ID, v.sessionId)}")
+
+        case None => // Active query, check if it is stopped. Enable timeout if it is stopped.
+          val isActive = v.query.isActive && Option(v.session.streams.get(id)).nonEmpty
 
-          case Some(_) => // Inactive query waiting for expiration. Do nothing.
+          if (!isActive) {
             logInfo(
-              log"Waiting for the expiration for ${MDC(QUERY_ID, id)} in " +
-                log"session ${MDC(SESSION_ID, v.sessionId)}")
-
-          case None => // Active query, check if it is stopped. Enable timeout if it is stopped.
-            val isActive = v.query.isActive && Option(v.session.streams.get(id)).nonEmpty
-
-            if (!isActive) {
-              logInfo(
-                log"Marking query ${MDC(QUERY_ID, id)} in " +
-                  log"session ${MDC(SESSION_ID, v.sessionId)} inactive.")
-              val expiresAtMs = nowMs + stoppedQueryInactivityTimeout.toMillis
-              queryCache.put(k, v.copy(expiresAtMs = Some(expiresAtMs)))
-              // To consider: Clean up any runner registered for this query with the session holder
-              // for this session. Useful in case listener events are delayed (such delays are
-              // seen in practice, especially when users have heavy processing inside listeners).
-              // Currently such workers would be cleaned up when the connect session expires.
-            }
-        }
+              log"Marking query id: ${MDC(QUERY_ID, id)} " +
+                log"runId: ${MDC(QUERY_RUN_ID, runId)} in " +
+                log"session ${MDC(SESSION_ID, v.sessionId)} inactive.")
+            val expiresAtMs = nowMs + stoppedQueryInactivityTimeout.toMillis
+            queryCache.put(k, v.copy(expiresAtMs = Some(expiresAtMs)))
+            // To consider: Clean up any runner registered for this query with the session holder
+            // for this session. Useful in case listener events are delayed (such delays are
+            // seen in practice, especially when users have heavy processing inside listeners).
+            // Currently such workers would be cleaned up when the connect session expires.
+          }
       }
+    })
 
-      taggedQueries.toArray.foreach { case (key, value) =>
-        value.zipWithIndex.toArray.foreach { case (queryKey, i) =>
-          if (queryCache.contains(queryKey)) {
-            value.remove(i)
-          }
+    // Removes any tagged queries that do not correspond to cached queries.
+    taggedQueries.forEach((key, value) => {
+      if (value.filter(k => queryCache.containsKey(k))) {
+        taggedQueries.remove(key, value)
+      }
+    })
+  }
+
+  case class QueryCacheKeySet(keys: mutable.HashSet[QueryCacheKey]) {
+
+    /** Tries to add the key if the set is not empty, otherwise returns false. */
+    def addKey(key: QueryCacheKey): Boolean = {
+      keys.synchronized {
+        if (keys.isEmpty) {
+          // The entry is about to be removed.
+          return false
         }
+        keys.add(key)
+        true
+      }
+    }
 
-        if (value.isEmpty) {
-          taggedQueries.remove(key)
+    /** Removes the key and returns true if the set is empty. */
+    def removeKey(key: QueryCacheKey): Boolean = {
+      keys.synchronized {
+        if (keys.remove(key)) {
+          return keys.isEmpty
         }
+        false
+      }
+    }
+
+    /** Removes entries that do not satisfy the predicate. */
+    def filter(pred: QueryCacheKey => Boolean): Boolean = {
+      keys.synchronized {
+        keys.filterInPlace(k => pred(k))
+        keys.isEmpty
+      }
+    }
+
+    /** Iterates over entries, apply the function individually, and then flatten the result. */
+    def flatMap[T](function: QueryCacheKey => Option[T]): Seq[T] = {
+      keys.synchronized {
+        keys.flatMap(k => function(k)).toSeq
       }
     }
   }
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerAppStatusStore.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerAppStatusStore.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerAppStatusStore.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerAppStatusStore.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerHistoryServerPlugin.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerHistoryServerPlugin.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerHistoryServerPlugin.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerHistoryServerPlugin.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
similarity index 72%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
index 65db08be7f904..3a93bbae3f2b8 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListener.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.connect.ui
 
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentMap}
+
 import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 
@@ -37,8 +39,10 @@ private[connect] class SparkConnectServerListener(
     extends SparkListener
     with Logging {
 
-  private val sessionList = new mutable.LinkedHashMap[String, LiveSessionData]
-  private val executionList = new mutable.LinkedHashMap[String, LiveExecutionData]
+  private val sessionList: ConcurrentMap[String, LiveSessionData] =
+    new ConcurrentHashMap[String, LiveSessionData]()
+  private val executionList: ConcurrentMap[String, LiveExecutionData] =
+    new ConcurrentHashMap[String, LiveExecutionData]
 
   private val (retainedStatements: Int, retainedSessions: Int) = {
     (
@@ -52,8 +56,8 @@ private[connect] class SparkConnectServerListener(
   private val liveUpdatePeriodNs = if (live) sparkConf.get(LIVE_ENTITY_UPDATE_PERIOD) else -1L
 
   // Returns true if this listener has no live data. Exposed for tests only.
-  private[connect] def noLiveData(): Boolean = synchronized {
-    sessionList.isEmpty && executionList.isEmpty
+  private[connect] def noLiveData(): Boolean = {
+    sessionList.isEmpty() && executionList.isEmpty()
   }
 
   kvstore.addTrigger(classOf[SessionInfo], retainedSessions) { count =>
@@ -80,11 +84,10 @@ private[connect] class SparkConnectServerListener(
       return
     }
     val executeJobTag = executeJobTagOpt.get
-    val exec = executionList.get(executeJobTag)
+    val exec = Option(executionList.get(executeJobTag))
     if (exec.nonEmpty) {
       exec.foreach { exec =>
-        exec.jobId += jobStart.jobId.toString
-        updateLiveStore(exec)
+        updateLiveStore(exec) { exec => exec.jobId += jobStart.jobId.toString }
       }
     } else {
       // It may possible that event reordering happens, such a way that JobStart event come after
@@ -103,9 +106,10 @@ private[connect] class SparkConnectServerListener(
           exec.userId,
           exec.operationId,
           exec.sparkSessionTags)
-        liveExec.sqlExecId = exec.sqlExecId
-        liveExec.jobId += jobStart.jobId.toString
-        updateStoreWithTriggerEnabled(liveExec)
+        updateStoreWithTriggerEnabled(liveExec) { liveExec =>
+          liveExec.sqlExecId = exec.sqlExecId
+          liveExec.jobId += jobStart.jobId.toString
+        }
         executionList.remove(liveExec.jobTag)
       }
     }
@@ -136,11 +140,10 @@ private[connect] class SparkConnectServerListener(
       return
     }
     val executeJobTag = executeJobTagOpt.get
-    val exec = executionList.get(executeJobTag)
+    val exec = Option(executionList.get(executeJobTag))
     if (exec.nonEmpty) {
       exec.foreach { exec =>
-        exec.sqlExecId += e.executionId.toString
-        updateLiveStore(exec)
+        updateLiveStore(exec) { exec => exec.sqlExecId += e.executionId.toString }
       }
     } else {
       // This block guards against potential event re-ordering where a SQLExecutionStart
@@ -158,15 +161,16 @@ private[connect] class SparkConnectServerListener(
           exec.userId,
           exec.operationId,
           exec.sparkSessionTags)
-        liveExec.jobId = exec.jobId
-        liveExec.sqlExecId += e.executionId.toString
-        updateStoreWithTriggerEnabled(liveExec)
+        updateStoreWithTriggerEnabled(liveExec) { liveExec =>
+          liveExec.jobId = exec.jobId
+          liveExec.sqlExecId += e.executionId.toString
+        }
         executionList.remove(liveExec.jobTag)
       }
     }
   }
 
-  private def onOperationStarted(e: SparkListenerConnectOperationStarted) = synchronized {
+  private def onOperationStarted(e: SparkListenerConnectOperationStarted) = {
     val executionData = getOrCreateExecution(
       e.jobTag,
       e.statementText,
@@ -175,13 +179,12 @@ private[connect] class SparkConnectServerListener(
       e.userId,
       e.operationId,
       e.sparkSessionTags)
-    executionData.state = ExecutionState.STARTED
-    executionList.put(e.jobTag, executionData)
-    updateLiveStore(executionData)
-    sessionList.get(e.sessionId) match {
+    updateLiveStore(executionData) { executionData =>
+      executionData.state = ExecutionState.STARTED
+    }
+    Option(sessionList.get(e.sessionId)) match {
       case Some(sessionData) =>
-        sessionData.totalExecution += 1
-        updateLiveStore(sessionData)
+        updateLiveStore(sessionData) { sessionData => sessionData.totalExecution += 1 }
       case None =>
         logWarning(
           log"onOperationStart called with unknown session id: ${MDC(SESSION_ID, e.sessionId)}." +
@@ -189,11 +192,12 @@ private[connect] class SparkConnectServerListener(
     }
   }
 
-  private def onOperationAnalyzed(e: SparkListenerConnectOperationAnalyzed) = synchronized {
-    executionList.get(e.jobTag) match {
+  private def onOperationAnalyzed(e: SparkListenerConnectOperationAnalyzed) = {
+    Option(executionList.get(e.jobTag)) match {
       case Some(executionData) =>
-        executionData.state = ExecutionState.COMPILED
-        updateLiveStore(executionData)
+        updateLiveStore(executionData) { executionData =>
+          executionData.state = ExecutionState.COMPILED
+        }
       case None =>
         logWarning(
           log"onOperationAnalyzed called with " +
@@ -202,11 +206,12 @@ private[connect] class SparkConnectServerListener(
   }
 
   private def onOperationReadyForExecution(
-      e: SparkListenerConnectOperationReadyForExecution): Unit = synchronized {
-    executionList.get(e.jobTag) match {
+      e: SparkListenerConnectOperationReadyForExecution): Unit = {
+    Option(executionList.get(e.jobTag)) match {
       case Some(executionData) =>
-        executionData.state = ExecutionState.READY
-        updateLiveStore(executionData)
+        updateLiveStore(executionData) { executionData =>
+          executionData.state = ExecutionState.READY
+        }
       case None =>
         logWarning(
           log"onOperationReadyForExecution called with " +
@@ -214,97 +219,113 @@ private[connect] class SparkConnectServerListener(
     }
   }
 
-  private def onOperationCanceled(e: SparkListenerConnectOperationCanceled) = synchronized {
-    executionList.get(e.jobTag) match {
+  private def onOperationCanceled(e: SparkListenerConnectOperationCanceled) = {
+    Option(executionList.get(e.jobTag)) match {
       case Some(executionData) =>
-        executionData.finishTimestamp = e.eventTime
-        executionData.state = ExecutionState.CANCELED
-        updateLiveStore(executionData)
+        updateLiveStore(executionData) { executionData =>
+          executionData.finishTimestamp = e.eventTime
+          executionData.state = ExecutionState.CANCELED
+        }
       case None =>
         logWarning(
           log"onOperationCanceled called with " +
             log"unknown operation id: ${MDC(OP_ID, e.jobTag)}")
     }
   }
-  private def onOperationFailed(e: SparkListenerConnectOperationFailed) = synchronized {
-    executionList.get(e.jobTag) match {
+  private def onOperationFailed(e: SparkListenerConnectOperationFailed) = {
+    Option(executionList.get(e.jobTag)) match {
       case Some(executionData) =>
-        executionData.finishTimestamp = e.eventTime
-        executionData.detail = e.errorMessage
-        executionData.state = ExecutionState.FAILED
-        updateLiveStore(executionData)
+        updateLiveStore(executionData) { executionData =>
+          executionData.finishTimestamp = e.eventTime
+          executionData.detail = e.errorMessage
+          executionData.state = ExecutionState.FAILED
+        }
       case None =>
         logWarning(
           log"onOperationFailed called with " +
             log"unknown operation id: ${MDC(OP_ID, e.jobTag)}")
     }
   }
-  private def onOperationFinished(e: SparkListenerConnectOperationFinished) = synchronized {
-    executionList.get(e.jobTag) match {
+  private def onOperationFinished(e: SparkListenerConnectOperationFinished) = {
+    Option(executionList.get(e.jobTag)) match {
       case Some(executionData) =>
-        executionData.finishTimestamp = e.eventTime
-        executionData.state = ExecutionState.FINISHED
-        updateLiveStore(executionData)
+        updateLiveStore(executionData) { executionData =>
+          executionData.finishTimestamp = e.eventTime
+          executionData.state = ExecutionState.FINISHED
+        }
       case None =>
         logWarning(
           log"onOperationFinished called with " +
             log"unknown operation id: ${MDC(OP_ID, e.jobTag)}")
     }
   }
-  private def onOperationClosed(e: SparkListenerConnectOperationClosed) = synchronized {
-    executionList.get(e.jobTag) match {
-      case Some(executionData) =>
-        executionData.closeTimestamp = e.eventTime
-        executionData.state = ExecutionState.CLOSED
-        updateStoreWithTriggerEnabled(executionData)
-        executionList.remove(e.jobTag)
-      case None =>
-        logWarning(
-          log"onOperationClosed called with " +
-            log"unknown operation id: ${MDC(OP_ID, e.jobTag)}")
-    }
+  private def onOperationClosed(e: SparkListenerConnectOperationClosed) = {
+    executionList.compute(
+      e.jobTag,
+      (_, executionData) => {
+        if (executionData != null) {
+          updateStoreWithTriggerEnabled(executionData) { executionData =>
+            executionData.closeTimestamp = e.eventTime
+            executionData.state = ExecutionState.CLOSED
+          }
+        } else {
+          logWarning(
+            log"onOperationClosed called with " +
+              log"unknown operation id: ${MDC(OP_ID, e.jobTag)}")
+        }
+        null
+      })
   }
 
-  private def onSessionStarted(e: SparkListenerConnectSessionStarted) = synchronized {
+  private def onSessionStarted(e: SparkListenerConnectSessionStarted) = {
     val session = getOrCreateSession(e.sessionId, e.userId, e.eventTime)
-    sessionList.put(e.sessionId, session)
-    updateLiveStore(session)
+    updateLiveStore(session) { _ => () }
   }
 
-  private def onSessionClosed(e: SparkListenerConnectSessionClosed) = synchronized {
-    sessionList.get(e.sessionId) match {
-      case Some(sessionData) =>
-        sessionData.finishTimestamp = e.eventTime
-        updateStoreWithTriggerEnabled(sessionData)
-        sessionList.remove(e.sessionId)
-
-      case None =>
-        logWarning(
-          log"onSessionClosed called with " +
-            log"unknown session id: ${MDC(SESSION_ID, e.sessionId)}")
-    }
+  private def onSessionClosed(e: SparkListenerConnectSessionClosed) = {
+    sessionList.compute(
+      e.sessionId,
+      (_, sessionData) => {
+        if (sessionData != null) {
+          updateStoreWithTriggerEnabled(sessionData) { sessionData =>
+            sessionData.finishTimestamp = e.eventTime
+          }
+        } else {
+          logWarning(
+            log"onSessionClosed called with " +
+              log"unknown session id: ${MDC(SESSION_ID, e.sessionId)}")
+        }
+        null
+      })
   }
 
   // Update both live and history stores. Trigger is enabled by default, hence
   // it will cleanup the entity which exceeds the threshold.
-  def updateStoreWithTriggerEnabled(entity: LiveEntity): Unit = synchronized {
-    entity.write(kvstore, System.nanoTime(), checkTriggers = true)
-  }
+  def updateStoreWithTriggerEnabled[T <: LiveEntity](entity: T)(updater: T => Unit): Unit =
+    entity.synchronized {
+      updater(entity)
+      entity.write(kvstore, System.nanoTime(), checkTriggers = true)
+    }
 
   // Update only live stores. If trigger is enabled, it will cleanup entity
   // which exceeds the threshold.
-  def updateLiveStore(entity: LiveEntity, trigger: Boolean = false): Unit = synchronized {
-    val now = System.nanoTime()
-    if (live && liveUpdatePeriodNs >= 0 && now - entity.lastWriteTime > liveUpdatePeriodNs) {
-      entity.write(kvstore, now, checkTriggers = trigger)
+  def updateLiveStore[T <: LiveEntity](entity: T, trigger: Boolean = false)(
+      updater: T => Unit): Unit =
+    entity.synchronized {
+      updater(entity)
+      val now = System.nanoTime()
+      if (live && liveUpdatePeriodNs >= 0 && now - entity.lastWriteTime > liveUpdatePeriodNs) {
+        entity.write(kvstore, now, checkTriggers = trigger)
+      }
     }
-  }
 
   private def getOrCreateSession(
       sessionId: String,
       userName: String,
-      startTime: Long): LiveSessionData = synchronized {
-    sessionList.getOrElseUpdate(sessionId, new LiveSessionData(sessionId, startTime, userName))
+      startTime: Long): LiveSessionData = {
+    sessionList.computeIfAbsent(
+      sessionId,
+      _ => new LiveSessionData(sessionId, startTime, userName))
   }
 
   private def getOrCreateExecution(
@@ -314,17 +335,18 @@ private[connect] class SparkConnectServerListener(
       startTimestamp: Long,
       userId: String,
       operationId: String,
-      sparkSessionTags: Set[String]): LiveExecutionData = synchronized {
-    executionList.getOrElseUpdate(
+      sparkSessionTags: Set[String]): LiveExecutionData = {
+    executionList.computeIfAbsent(
       jobTag,
-      new LiveExecutionData(
-        jobTag,
-        statement,
-        sessionId,
-        startTimestamp,
-        userId,
-        operationId,
-        sparkSessionTags))
+      _ =>
+        new LiveExecutionData(
+          jobTag,
+          statement,
+          sessionId,
+          startTimestamp,
+          userId,
+          operationId,
+          sparkSessionTags))
   }
 
   private def cleanupExecutions(count: Long): Unit = {
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPage.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPage.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPage.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPage.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerSessionPage.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerSessionPage.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerSessionPage.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerSessionPage.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerTab.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerTab.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerTab.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/SparkConnectServerTab.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/ToolTips.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/ToolTips.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/ui/ToolTips.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/ui/ToolTips.scala
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
similarity index 96%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
index 355048cf30363..837d4a4d3ee78 100644
--- a/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
+++ b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/ErrorUtils.scala
@@ -114,8 +114,8 @@ private[connect] object ErrorUtils extends Logging {
         case sparkThrowable: SparkThrowable =>
           val sparkThrowableBuilder = FetchErrorDetailsResponse.SparkThrowable
             .newBuilder()
-          if (sparkThrowable.getErrorClass != null) {
-            sparkThrowableBuilder.setErrorClass(sparkThrowable.getErrorClass)
+          if (sparkThrowable.getCondition != null) {
+            sparkThrowableBuilder.setErrorClass(sparkThrowable.getCondition)
           }
           for (queryCtx <- sparkThrowable.getQueryContext) {
             val builder = FetchErrorDetailsResponse.QueryContext
@@ -193,7 +193,7 @@ private[connect] object ErrorUtils extends Logging {
         if (state != null && state.nonEmpty) {
           errorInfo.putMetadata("sqlState", state)
         }
-        val errorClass = e.getErrorClass
+        val errorClass = e.getCondition
         if (errorClass != null && errorClass.nonEmpty) {
           val messageParameters = JsonMethods.compact(
             JsonMethods.render(map2jvalue(e.getMessageParameters.asScala.toMap)))
@@ -205,7 +205,9 @@ private[connect] object ErrorUtils extends Logging {
       case _ =>
     }
 
-    if (sessionHolderOpt.exists(_.session.conf.get(Connect.CONNECT_ENRICH_ERROR_ENABLED))) {
+    val enrichErrorEnabled = sessionHolderOpt.exists(
+      _.session.sessionState.conf.getConf(Connect.CONNECT_ENRICH_ERROR_ENABLED))
+    if (enrichErrorEnabled) {
       // Generate a new unique key for this exception.
       val errorId = UUID.randomUUID().toString
 
@@ -216,9 +218,10 @@ private[connect] object ErrorUtils extends Logging {
     }
 
     lazy val stackTrace = Option(ExceptionUtils.getStackTrace(st))
+    val stackTraceEnabled = sessionHolderOpt.exists(
+      _.session.sessionState.conf.getConf(SQLConf.PYSPARK_JVM_STACKTRACE_ENABLED))
     val withStackTrace =
-      if (sessionHolderOpt.exists(
-          _.session.conf.get(SQLConf.PYSPARK_JVM_STACKTRACE_ENABLED) && stackTrace.nonEmpty)) {
+      if (stackTraceEnabled && stackTrace.nonEmpty) {
         val maxSize = Math.min(
           SparkEnv.get.conf.get(Connect.CONNECT_JVM_STACK_TRACE_MAX_SIZE),
           maxMetadataSize)
diff --git a/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala b/sql/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
similarity index 100%
rename from connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
rename to sql/connect/server/src/main/scala/org/apache/spark/sql/connect/utils/MetricGenerator.scala
diff --git a/connect/server/src/test/resources/log4j2.properties b/sql/connect/server/src/test/resources/log4j2.properties
similarity index 100%
rename from connect/server/src/test/resources/log4j2.properties
rename to sql/connect/server/src/test/resources/log4j2.properties
diff --git a/connect/server/src/test/resources/udf b/sql/connect/server/src/test/resources/udf
similarity index 100%
rename from connect/server/src/test/resources/udf
rename to sql/connect/server/src/test/resources/udf
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
similarity index 97%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
index 264c6aa70ae2e..29ad97ad9fbe8 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ProtoToParsedPlanTestSuite.scala
@@ -36,7 +36,6 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.RuleExecutor
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.SparkConnectPlanner
-import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, Identifier, InMemoryCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.test.SharedSparkSession
@@ -180,10 +179,15 @@ class ProtoToParsedPlanTestSuite
       logError(log"Skipping ${MDC(PATH, fileName)}")
       return
     }
+    // TODO: enable below by SPARK-49487
+    if (fileName.contains("transpose")) {
+      logError(log"Skipping ${MDC(PATH, fileName)} because of SPARK-49487")
+      return
+    }
     val name = fileName.stripSuffix(".proto.bin")
     test(name) {
       val relation = readRelation(file)
-      val planner = new SparkConnectPlanner(SessionHolder.forTesting(spark))
+      val planner = new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(spark))
       val catalystPlan =
         analyzer.executeAndCheck(planner.transformRelation(relation), new QueryPlanningTracker)
       val finalAnalyzedPlan = {
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala
similarity index 84%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala
index c72fd1feadf97..e98e995e9551e 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ResourceHelper.scala
@@ -23,10 +23,10 @@ import org.apache.spark.SparkFunSuite
 trait ResourceHelper extends SparkFunSuite {
 
   protected val baseResourcePath: Path = {
-    getWorkspaceFilePath("connect", "server", "src", "test", "resources").toAbsolutePath
+    getWorkspaceFilePath("sql", "connect", "server", "src", "test", "resources").toAbsolutePath
   }
 
   protected val commonResourcePath: Path = {
-    getWorkspaceFilePath("connect", "common", "src", "test", "resources").toAbsolutePath
+    getWorkspaceFilePath("sql", "connect", "common", "src", "test", "resources").toAbsolutePath
   }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala
diff --git a/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectTestUtils.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectTestUtils.scala
new file mode 100644
index 0000000000000..1a656605b4154
--- /dev/null
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectTestUtils.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.connect
+
+import java.util.UUID
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.connect.service.{SessionHolder, SparkConnectService}
+
+object SparkConnectTestUtils {
+
+  /** Creates a dummy session holder for use in tests. */
+  def createDummySessionHolder(session: SparkSession): SessionHolder = {
+    val ret =
+      SessionHolder(
+        userId = "testUser",
+        sessionId = UUID.randomUUID().toString,
+        session = session)
+    SparkConnectService.sessionManager.putSessionForTesting(ret)
+    ret
+  }
+}
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
similarity index 99%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
index 3edb63ee8e815..fdbfc39cc9a5b 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/dsl/package.scala
@@ -219,6 +219,7 @@ package object dsl {
           mode: Option[String] = None,
           sortByColumns: Seq[String] = Seq.empty,
           partitionByCols: Seq[String] = Seq.empty,
+          clusterByCols: Seq[String] = Seq.empty,
           bucketByCols: Seq[String] = Seq.empty,
           numBuckets: Option[Int] = None): Command = {
         val writeOp = WriteOperation.newBuilder()
@@ -242,6 +243,7 @@ package object dsl {
         }
         sortByColumns.foreach(writeOp.addSortColumnNames(_))
         partitionByCols.foreach(writeOp.addPartitioningColumns(_))
+        clusterByCols.foreach(writeOp.addClusteringColumns(_))
 
         if (numBuckets.nonEmpty && bucketByCols.nonEmpty) {
           val op = WriteOperation.BucketBy.newBuilder()
@@ -272,6 +274,7 @@ package object dsl {
           options: Map[String, String] = Map.empty,
           tableProperties: Map[String, String] = Map.empty,
           partitionByCols: Seq[Expression] = Seq.empty,
+          clusterByCols: Seq[String] = Seq.empty,
           mode: Option[String] = None,
           overwriteCondition: Option[Expression] = None): Command = {
         val writeOp = WriteOperationV2.newBuilder()
@@ -279,6 +282,7 @@ package object dsl {
         tableName.foreach(writeOp.setTableName)
         provider.foreach(writeOp.setProvider)
         partitionByCols.foreach(writeOp.addPartitioningColumns)
+        clusterByCols.foreach(writeOp.addClusteringColumns)
         options.foreach { case (k, v) =>
           writeOp.putOptions(k, v)
         }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala
similarity index 87%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala
index 7c1b9362425d9..df5df23e77505 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ConnectProgressExecutionListenerSuite.scala
@@ -79,11 +79,16 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu
     }
   }
 
-  test("taskDone") {
+  def testTaskDone(metricsPopulated: Boolean): Unit = {
     val listener = new ConnectProgressExecutionListener
     listener.registerJobTag(testTag)
     listener.onJobStart(testJobStart)
 
+    val metricsOrNull = if (metricsPopulated) {
+      testStage1Task1Metrics
+    } else {
+      null
+    }
     // Finish the tasks
     val taskEnd = SparkListenerTaskEnd(
       1,
@@ -92,7 +97,7 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu
       Success,
       testStage1Task1,
       testStage1Task1ExecutorMetrics,
-      testStage1Task1Metrics)
+      metricsOrNull)
 
     val t = listener.trackedTags(testTag)
     var yielded = false
@@ -117,7 +122,11 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu
       assert(stages.map(_.numTasks).sum == 2)
       assert(stages.map(_.completedTasks).sum == 1)
       assert(stages.size == 2)
-      assert(stages.map(_.inputBytesRead).sum == 500)
+      if (metricsPopulated) {
+        assert(stages.map(_.inputBytesRead).sum == 500)
+      } else {
+        assert(stages.map(_.inputBytesRead).sum == 0)
+      }
       assert(
         stages
           .map(_.completed match {
@@ -140,7 +149,11 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu
       assert(stages.map(_.numTasks).sum == 2)
       assert(stages.map(_.completedTasks).sum == 1)
       assert(stages.size == 2)
-      assert(stages.map(_.inputBytesRead).sum == 500)
+      if (metricsPopulated) {
+        assert(stages.map(_.inputBytesRead).sum == 500)
+      } else {
+        assert(stages.map(_.inputBytesRead).sum == 0)
+      }
       assert(
         stages
           .map(_.completed match {
@@ -153,4 +166,12 @@ class ConnectProgressExecutionListenerSuite extends SparkFunSuite with MockitoSu
     assert(yielded, "Must updated with results")
   }
 
+  test("taskDone - populated metrics") {
+    testTaskDone(metricsPopulated = true)
+  }
+
+  test("taskDone - null metrics") {
+    testTaskDone(metricsPopulated = false)
+  }
+
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
similarity index 95%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
index 25e6cc48a1998..2606284c25bd5 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala
@@ -429,4 +429,27 @@ class ReattachableExecuteSuite extends SparkConnectServerTest {
     val abandonedExecutions = manager.listAbandonedExecutions
     assert(abandonedExecutions.forall(_.operationId != dummyOpId))
   }
+
+  test("SPARK-49492: reattach must not succeed on an inactive execution holder") {
+    withRawBlockingStub { stub =>
+      val operationId = UUID.randomUUID().toString
+
+      // supply an invalid plan so that the execute plan handler raises an error
+      val iter = stub.executePlan(
+        buildExecutePlanRequest(proto.Plan.newBuilder().build(), operationId = operationId))
+
+      // expect that the execution fails before spawning an execute thread
+      val ee = intercept[StatusRuntimeException] {
+        iter.next()
+      }
+      assert(ee.getMessage.contains("INTERNAL"))
+
+      // reattach must fail
+      val reattach = stub.reattachExecute(buildReattachExecuteRequest(operationId, None))
+      val re = intercept[StatusRuntimeException] {
+        reattach.hasNext()
+      }
+      assert(re.getMessage.contains("INVALID_HANDLE.OPERATION_NOT_FOUND"))
+    }
+  }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala
similarity index 82%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala
index 9c7ce77b7ee54..71dd8f2c45b5c 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/AbbreviateSuite.scala
@@ -258,4 +258,72 @@ class AbbreviateSuite extends SparkFunSuite {
       assert(v.getUnresolvedAttribute.getUnparsedIdentifier.indexOf("[truncated") === threshold)
     }
   }
+
+  test("truncate nested message") {
+    val threshold = 512
+
+    // Level 3.
+    val sql = proto.Relation
+      .newBuilder()
+      .setSql(
+        // Level 4.
+        proto.SQL
+          .newBuilder()
+          .setQuery(
+            // Level 5.
+            "x" * (threshold + 32))
+          .build())
+      .build()
+
+    // Level 2.
+    val drop = proto.Relation
+      .newBuilder()
+      .setDrop(
+        proto.Drop
+          .newBuilder()
+          .setInput(sql)
+          .addAllColumnNames(
+            // Level 3.
+            Seq("a", "b").asJava)
+          .build())
+      .build()
+
+    // Level 0.
+    val limit = proto.Relation
+      .newBuilder()
+      .setLimit(
+        // Level 1.
+        proto.Limit
+          .newBuilder()
+          .setInput(drop)
+          .setLimit(100)
+          .build())
+      .build()
+
+    (0 until 6).foreach { maxLevel =>
+      val truncated = ProtoUtils.abbreviate(limit, threshold, maxLevel)
+
+      // The top level message is always included.
+      assert(truncated.toString.contains("limit"), s"$truncated")
+      val truncatedLimit = truncated.getLimit
+
+      if (maxLevel != 0) {
+        assert(truncatedLimit.getLimit === 100)
+        val truncatedDrop = truncatedLimit.getInput.getDrop
+
+        if (maxLevel < 3) {
+          // Column names should have been truncated.
+          assert(truncatedDrop.getColumnNamesList.asScala.toSeq === Seq())
+        } else {
+          assert(truncatedDrop.getColumnNamesList.asScala.toSeq === Seq("a", "b"))
+          val truncatedSql = truncatedDrop.getInput.getSql
+
+          if (maxLevel == 5) {
+            // The query string must have been truncated.
+            assert(truncatedSql.getQuery.indexOf("[truncated") === threshold)
+          }
+        }
+      }
+    }
+  }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/messages/ConnectProtoMessagesSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/LiteralExpressionProtoConverterSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
similarity index 98%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
index 70da1f0a2a1d0..e44d3eacc66df 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerSuite.scala
@@ -30,9 +30,9 @@ import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.logical
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.common.LiteralValueProtoConverter.toLiteralProto
-import org.apache.spark.sql.connect.service.SessionHolder
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, StringType, StructField, StructType}
@@ -87,7 +87,8 @@ trait SparkConnectPlanTest extends SharedSparkSession {
    */
   def createLocalRelationProto(
       attrs: Seq[AttributeReference],
-      data: Seq[InternalRow]): proto.Relation = {
+      data: Seq[InternalRow],
+      timeZoneId: String = "UTC"): proto.Relation = {
     val localRelationBuilder = proto.LocalRelation.newBuilder()
 
     val bytes = ArrowConverters
@@ -96,7 +97,7 @@ trait SparkConnectPlanTest extends SharedSparkSession {
         DataTypeUtils.fromAttributes(attrs.map(_.toAttribute)),
         Long.MaxValue,
         Long.MaxValue,
-        null,
+        timeZoneId,
         true)
       .next()
 
@@ -114,7 +115,7 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
 
   test("Simple Limit") {
     assertThrows[IndexOutOfBoundsException] {
-      new SparkConnectPlanner(SessionHolder.forTesting(None.orNull))
+      new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(None.orNull))
         .transformRelation(
           proto.Relation.newBuilder
             .setLimit(proto.Limit.newBuilder.setLimit(10))
@@ -125,11 +126,11 @@ class SparkConnectPlannerSuite extends SparkFunSuite with SparkConnectPlanTest {
   test("InvalidInputs") {
     // No Relation Set
     intercept[IndexOutOfBoundsException](
-      new SparkConnectPlanner(SessionHolder.forTesting(None.orNull))
+      new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(None.orNull))
         .transformRelation(proto.Relation.newBuilder().build()))
 
     intercept[InvalidPlanInput](
-      new SparkConnectPlanner(SessionHolder.forTesting(None.orNull))
+      new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(None.orNull))
         .transformRelation(
           proto.Relation.newBuilder.setUnknown(proto.Unknown.newBuilder().build()).build()))
   }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
similarity index 79%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
index c9d282af2e5ea..f700fd67d37fa 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectPlannerTestUtils.scala
@@ -17,17 +17,16 @@
 
 package org.apache.spark.sql.connect.planner
 
-import io.grpc.stub.StreamObserver
-
 import org.apache.spark.connect.proto
-import org.apache.spark.connect.proto.ExecutePlanResponse
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteStatus, SessionHolder, SessionStatus, SparkConnectService}
+import org.apache.spark.sql.connect.SparkConnectTestUtils
+import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteStatus, SessionStatus, SparkConnectService}
 
 object SparkConnectPlannerTestUtils {
   def transform(spark: SparkSession, relation: proto.Relation): LogicalPlan = {
-    new SparkConnectPlanner(SessionHolder.forTesting(spark)).transformRelation(relation)
+    new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(spark))
+      .transformRelation(relation)
   }
 
   def transform(spark: SparkSession, command: proto.Command): Unit = {
@@ -38,7 +37,7 @@ object SparkConnectPlannerTestUtils {
   private def buildExecutePlanHolder(
       spark: SparkSession,
       command: proto.Command): ExecuteHolder = {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     sessionHolder.eventManager.status_(SessionStatus.Started)
 
     val context = proto.UserContext
@@ -60,10 +59,4 @@ object SparkConnectPlannerTestUtils {
     executeHolder.eventsManager.status_(ExecuteStatus.Started)
     executeHolder
   }
-
-  private class MockObserver extends StreamObserver[proto.ExecutePlanResponse] {
-    override def onNext(value: ExecutePlanResponse): Unit = {}
-    override def onError(t: Throwable): Unit = {}
-    override def onCompleted(): Unit = {}
-  }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
similarity index 95%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
index 6721555220fe6..cad7fe6370827 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectProtoSuite.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTableCatalog,
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.types.{ArrayType, BooleanType, ByteType, DataType, DateType, DecimalType, DoubleType, FloatType, IntegerType, LongType, MapType, Metadata, ShortType, StringType, StructField, StructType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
@@ -64,6 +64,11 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
       Seq(AttributeReference("id", IntegerType)(), AttributeReference("name", StringType)()),
       Seq.empty)
 
+  lazy val connectTestRelation3 =
+    createLocalRelationProto(
+      Seq(AttributeReference("id", IntegerType)(), AttributeReference("date", TimestampType)()),
+      Seq.empty)
+
   lazy val connectTestRelationMap =
     createLocalRelationProto(
       Seq(AttributeReference("id", MapType(StringType, StringType))()),
@@ -79,6 +84,11 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
       new java.util.ArrayList[Row](),
       StructType(Seq(StructField("id", IntegerType), StructField("name", StringType))))
 
+  lazy val sparkTestRelation3: DataFrame =
+    spark.createDataFrame(
+      new java.util.ArrayList[Row](),
+      StructType(Seq(StructField("id", IntegerType), StructField("date", TimestampType))))
+
   lazy val sparkTestRelationMap: DataFrame =
     spark.createDataFrame(
       new java.util.ArrayList[Row](),
@@ -93,6 +103,12 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
     comparePlans(connectPlan, sparkPlan)
   }
 
+  test("Basic select timestamp") {
+    val connectPlan = connectTestRelation3.select("date".protoAttr)
+    val sparkPlan = sparkTestRelation3.select("date")
+    comparePlans(connectPlan, sparkPlan)
+  }
+
   test("Test select expression in strings") {
     val connectPlan = connectTestRelation.selectExpr("abs(id)", "name")
     val sparkPlan = sparkTestRelation.selectExpr("abs(id)", "name")
@@ -596,6 +612,48 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
     }
   }
 
+  test("Write with clustering") {
+    // Cluster by existing column.
+    withTable("testtable") {
+      transform(
+        localRelation.write(
+          tableName = Some("testtable"),
+          tableSaveMethod = Some("save_as_table"),
+          format = Some("parquet"),
+          clusterByCols = Seq("id")))
+    }
+
+    // Cluster by non-existing column.
+    assertThrows[AnalysisException](
+      transform(
+        localRelation
+          .write(
+            tableName = Some("testtable"),
+            tableSaveMethod = Some("save_as_table"),
+            format = Some("parquet"),
+            clusterByCols = Seq("noid"))))
+  }
+
+  test("Write V2 with clustering") {
+    // Cluster by existing column.
+    withTable("testtable") {
+      transform(
+        localRelation.writeV2(
+          tableName = Some("testtable"),
+          mode = Some("MODE_CREATE"),
+          clusterByCols = Seq("id")))
+    }
+
+    // Cluster by non-existing column.
+    assertThrows[AnalysisException](
+      transform(
+        localRelation
+          .writeV2(
+            tableName = Some("testtable"),
+            mode = Some("MODE_CREATE"),
+            clusterByCols = Seq("noid"))))
+  }
+
   test("Write with invalid bucketBy configuration") {
     val cmd = localRelation.write(bucketByCols = Seq("id"), numBuckets = Some(0))
     assertThrows[InvalidCommandInput] {
@@ -984,7 +1042,7 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
         analyzePlan(
           transform(connectTestRelation.observe("my_metric", "id".protoAttr.cast("string"))))
       },
-      errorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
+      condition = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
       parameters = Map("expr" -> "\"CAST(id AS STRING) AS id\""))
 
     val connectPlan2 =
@@ -1015,7 +1073,7 @@ class SparkConnectProtoSuite extends PlanTest with SparkConnectPlanTest {
           transform(
             connectTestRelation.observe(Observation("my_metric"), "id".protoAttr.cast("string"))))
       },
-      errorClass = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
+      condition = "INVALID_OBSERVED_METRICS.NON_AGGREGATE_FUNC_ARG_IS_ATTRIBUTE",
       parameters = Map("expr" -> "\"CAST(id AS STRING) AS id\""))
   }
 
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
similarity index 96%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
index 71ca0f44af680..d6d137e6d91aa 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectServiceSuite.scala
@@ -41,13 +41,14 @@ import org.apache.spark.connect.proto.CreateDataFrameViewCommand
 import org.apache.spark.internal.Logging
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection}
+import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.connect.common.DataTypeProtoConverter
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.dsl.MockRemoteSession
 import org.apache.spark.sql.connect.dsl.expressions._
 import org.apache.spark.sql.connect.dsl.plans._
 import org.apache.spark.sql.connect.plugin.SparkConnectPluginRegistry
-import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteStatus, SessionHolder, SessionStatus, SparkConnectAnalyzeHandler, SparkConnectService, SparkListenerConnectOperationStarted}
+import org.apache.spark.sql.connect.service.{ExecuteHolder, ExecuteKey, ExecuteStatus, SessionStatus, SparkConnectAnalyzeHandler, SparkConnectService, SparkListenerConnectOperationStarted}
 import org.apache.spark.sql.connector.catalog.InMemoryPartitionTableCatalog
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.sql.test.SharedSparkSession
@@ -64,7 +65,7 @@ class SparkConnectServiceSuite
     with Logging
     with SparkConnectPlanTest {
 
-  private def sparkSessionHolder = SessionHolder.forTesting(spark)
+  private def sparkSessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
   private def DEFAULT_UUID = UUID.fromString("89ea6117-1f45-4c03-ae27-f47c6aded093")
 
   test("Test schema in analyze response") {
@@ -870,10 +871,16 @@ class SparkConnectServiceSuite
   class VerifyEvents(val sparkContext: SparkContext) {
     val listener: MockSparkListener = new MockSparkListener()
     val listenerBus = sparkContext.listenerBus
+    val EVENT_WAIT_TIMEOUT = timeout(10.seconds)
     val LISTENER_BUS_TIMEOUT = 30000
     def executeHolder: ExecuteHolder = {
-      assert(listener.executeHolder.isDefined)
-      listener.executeHolder.get
+      // An ExecuteHolder shall be set eventually through MockSparkListener
+      Eventually.eventually(EVENT_WAIT_TIMEOUT) {
+        assert(
+          listener.executeHolder.isDefined,
+          s"No events have been posted in $EVENT_WAIT_TIMEOUT")
+        listener.executeHolder.get
+      }
     }
     def onNext(v: proto.ExecutePlanResponse): Unit = {
       if (v.hasSchema) {
@@ -890,8 +897,10 @@ class SparkConnectServiceSuite
     def onCompleted(producedRowCount: Option[Long] = None): Unit = {
       assert(executeHolder.eventsManager.getProducedRowCount == producedRowCount)
       // The eventsManager is closed asynchronously
-      Eventually.eventually(timeout(1.seconds)) {
-        assert(executeHolder.eventsManager.status == ExecuteStatus.Closed)
+      Eventually.eventually(EVENT_WAIT_TIMEOUT) {
+        assert(
+          executeHolder.eventsManager.status == ExecuteStatus.Closed,
+          s"Execution has not been completed in $EVENT_WAIT_TIMEOUT")
       }
     }
     def onCanceled(): Unit = {
@@ -917,7 +926,9 @@ class SparkConnectServiceSuite
           semaphoreStarted.release()
           val sessionHolder =
             SparkConnectService.getOrCreateIsolatedSession(e.userId, e.sessionId, None)
-          executeHolder = sessionHolder.executeHolder(e.operationId)
+          val executeKey =
+            ExecuteKey(sessionHolder.userId, sessionHolder.sessionId, e.operationId)
+          executeHolder = SparkConnectService.executionManager.getExecuteHolder(executeKey)
         case _ =>
       }
     }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
similarity index 89%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
index c234b4f068bc9..e8b955cf33ebc 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/SparkConnectWithSessionExtensionSuite.scala
@@ -23,9 +23,9 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.parser.{CompoundBody, ParserInterface}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.connect.service.SessionHolder
+import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.types.{DataType, StructType}
 
 class SparkConnectWithSessionExtensionSuite extends SparkFunSuite {
@@ -54,9 +54,6 @@ class SparkConnectWithSessionExtensionSuite extends SparkFunSuite {
 
     override def parseQuery(sqlText: String): LogicalPlan =
       delegate.parseQuery(sqlText)
-
-    override def parseScript(sqlScriptText: String): CompoundBody =
-      delegate.parseScript(sqlScriptText)
   }
 
   test("Parse table name with test parser") {
@@ -72,7 +69,8 @@ class SparkConnectWithSessionExtensionSuite extends SparkFunSuite {
       .build()
     val rel = proto.Relation.newBuilder.setRead(readWithTable).build()
 
-    val res = new SparkConnectPlanner(SessionHolder.forTesting(spark)).transformRelation(rel)
+    val res = new SparkConnectPlanner(SparkConnectTestUtils.createDummySessionHolder(spark))
+      .transformRelation(rel)
 
     assert(res !== null)
     assert(res.nodeName === "UnresolvedRelation")
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
similarity index 93%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
index 820a1b047957b..657be7b7954b5 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/planner/StreamingForeachBatchHelperSuite.scala
@@ -23,7 +23,7 @@ import org.mockito.Mockito.verify
 import org.mockito.Mockito.when
 import org.scalatestplus.mockito.MockitoSugar
 
-import org.apache.spark.sql.connect.service.SessionHolder
+import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.streaming.StreamingQuery
 import org.apache.spark.sql.streaming.StreamingQueryListener
 import org.apache.spark.sql.test.SharedSparkSession
@@ -46,7 +46,8 @@ class StreamingForeachBatchHelperSuite extends SharedSparkSession with MockitoSu
     val query1 = mockQuery()
     val query2 = mockQuery()
 
-    val cache = new StreamingForeachBatchHelper.CleanerCache(SessionHolder.forTesting(spark))
+    val cache = new StreamingForeachBatchHelper.CleanerCache(
+      SparkConnectTestUtils.createDummySessionHolder(spark))
 
     cache.registerCleanerForQuery(query1, cleaner1)
 
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
similarity index 98%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
index 512cdad62b921..2b768875c6e20 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/plugin/SparkConnectPluginRegistrySuite.scala
@@ -217,7 +217,7 @@ class SparkConnectPluginRegistrySuite extends SharedSparkSession with SparkConne
         exception = intercept[SparkException] {
           SparkConnectPluginRegistry.loadRelationPlugins()
         },
-        errorClass = "CONNECT.PLUGIN_CTOR_MISSING",
+        condition = "CONNECT.PLUGIN_CTOR_MISSING",
         parameters = Map("cls" -> "org.apache.spark.sql.connect.plugin.DummyPluginNoTrivialCtor"))
     }
 
@@ -228,7 +228,7 @@ class SparkConnectPluginRegistrySuite extends SharedSparkSession with SparkConne
         exception = intercept[SparkException] {
           SparkConnectPluginRegistry.loadRelationPlugins()
         },
-        errorClass = "CONNECT.PLUGIN_RUNTIME_ERROR",
+        condition = "CONNECT.PLUGIN_RUNTIME_ERROR",
         parameters = Map("msg" -> "Bad Plugin Error"))
     }
   }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/AddArtifactsHandlerSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ArtifactStatusesHandlerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ArtifactStatusesHandlerSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/ArtifactStatusesHandlerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ArtifactStatusesHandlerSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala
similarity index 98%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala
index dbe8420eab03d..a9843e261fff8 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/ExecuteEventsManagerSuite.scala
@@ -374,7 +374,8 @@ class ExecuteEventsManagerSuite
       .setClientType(DEFAULT_CLIENT_TYPE)
       .build()
 
-    val executeHolder = new ExecuteHolder(executePlanRequest, sessionHolder)
+    val executeKey = ExecuteKey(executePlanRequest, sessionHolder)
+    val executeHolder = new ExecuteHolder(executeKey, executePlanRequest, sessionHolder)
 
     val eventsManager = ExecuteEventsManager(executeHolder, DEFAULT_CLOCK)
     eventsManager.status_(executeStatus)
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/FetchErrorDetailsHandlerSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
similarity index 97%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
index 8f76d58a31476..f125cb2d5c6c0 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/InterceptorRegistrySuite.scala
@@ -118,7 +118,7 @@ class InterceptorRegistrySuite extends SharedSparkSession {
         exception = intercept[SparkException] {
           SparkConnectInterceptorRegistry.chainInterceptors(sb)
         },
-        errorClass = "CONNECT.INTERCEPTOR_CTOR_MISSING",
+        condition = "CONNECT.INTERCEPTOR_CTOR_MISSING",
         parameters =
           Map("cls" -> "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor"))
     }
@@ -132,7 +132,7 @@ class InterceptorRegistrySuite extends SharedSparkSession {
         exception = intercept[SparkException] {
           SparkConnectInterceptorRegistry.createConfiguredInterceptors()
         },
-        errorClass = "CONNECT.INTERCEPTOR_CTOR_MISSING",
+        condition = "CONNECT.INTERCEPTOR_CTOR_MISSING",
         parameters =
           Map("cls" -> "org.apache.spark.sql.connect.service.TestingInterceptorNoTrivialCtor"))
     }
@@ -144,7 +144,7 @@ class InterceptorRegistrySuite extends SharedSparkSession {
         exception = intercept[SparkException] {
           SparkConnectInterceptorRegistry.createConfiguredInterceptors()
         },
-        errorClass = "CONNECT.INTERCEPTOR_RUNTIME_ERROR",
+        condition = "CONNECT.INTERCEPTOR_RUNTIME_ERROR",
         parameters = Map("msg" -> "Bad Error"))
     }
   }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SessionEventsManagerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SessionEventsManagerSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SessionEventsManagerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SessionEventsManagerSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala
similarity index 93%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala
index 4c2962fda5079..2404dea21d91e 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectListenerBusListenerSuite.scala
@@ -31,6 +31,8 @@ import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.connect.proto.{Command, ExecutePlanResponse}
+import org.apache.spark.sql.connect.SparkConnectTestUtils
+import org.apache.spark.sql.connect.execution.ExecuteResponseObserver
 import org.apache.spark.sql.connect.planner.SparkConnectStreamingQueryListenerHandler
 import org.apache.spark.sql.streaming.{StreamingQuery, StreamingQueryListener}
 import org.apache.spark.sql.streaming.Trigger.ProcessingTime
@@ -96,7 +98,7 @@ class SparkConnectListenerBusListenerSuite
     test(
       "Basic functionalities - onQueryStart, onQueryProgress, onQueryTerminated" +
         s" - $queryNum queries") {
-      val sessionHolder = SessionHolder.forTesting(spark)
+      val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
       val responseObserver = mock[StreamObserver[ExecutePlanResponse]]
       val eventJsonBuffer = ArrayBuffer.empty[String]
       val startEventsBuffer = ArrayBuffer.empty[StreamingQueryListener.QueryStartedEvent]
@@ -139,7 +141,7 @@ class SparkConnectListenerBusListenerSuite
   }
 
   test("Basic functionalities - Slow query") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     val responseObserver = mock[StreamObserver[ExecutePlanResponse]]
     val eventJsonBuffer = ArrayBuffer.empty[String]
     val startEventsBuffer = ArrayBuffer.empty[StreamingQueryListener.QueryStartedEvent]
@@ -179,13 +181,13 @@ class SparkConnectListenerBusListenerSuite
   }
 
   test("Proper handling on onNext throw - initial response") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
 
     val executeHolder = mock[ExecuteHolder]
     when(executeHolder.sessionHolder).thenReturn(sessionHolder)
     when(executeHolder.operationId).thenReturn("operationId")
 
-    val responseObserver = mock[StreamObserver[ExecutePlanResponse]]
+    val responseObserver = mock[ExecuteResponseObserver[ExecutePlanResponse]]
     doThrow(new RuntimeException("I'm dead"))
       .when(responseObserver)
       .onNext(any[ExecutePlanResponse]())
@@ -200,17 +202,17 @@ class SparkConnectListenerBusListenerSuite
     val listenerHolder = sessionHolder.streamingServersideListenerHolder
     eventually(timeout(5.seconds), interval(500.milliseconds)) {
       assert(
-        sessionHolder.streamingServersideListenerHolder.streamingQueryServerSideListener.isEmpty)
+        sessionHolder.streamingServersideListenerHolder.streamingQueryServerSideListener.get() ==
+          null)
       assert(spark.streams.listListeners().size === listenerCntBeforeThrow)
       assert(listenerHolder.streamingQueryStartedEventCache.isEmpty)
-      assert(listenerHolder.streamingQueryListenerLatch.getCount === 0)
     }
 
   }
 
   test("Proper handling on onNext throw - query progress") {
-    val sessionHolder = SessionHolder.forTesting(spark)
-    val responseObserver = mock[StreamObserver[ExecutePlanResponse]]
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
+    val responseObserver = mock[ExecuteResponseObserver[ExecutePlanResponse]]
     doThrow(new RuntimeException("I'm dead"))
       .when(responseObserver)
       .onNext(any[ExecutePlanResponse]())
@@ -234,7 +236,6 @@ class SparkConnectListenerBusListenerSuite
     eventually(timeout(5.seconds), interval(500.milliseconds)) {
       assert(!spark.streams.listListeners().contains(listenerBusListener))
       assert(listenerHolder.streamingQueryStartedEventCache.isEmpty)
-      assert(listenerHolder.streamingQueryListenerLatch.getCount === 0)
     }
   }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
similarity index 99%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
index cb0bd8f771ebc..f86298a8b5b98 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceE2ESuite.scala
@@ -109,7 +109,7 @@ class SparkConnectServiceE2ESuite extends SparkConnectServerTest {
       }
       // Check the async execute cleanup get called
       Eventually.eventually(timeout(eventuallyTimeout)) {
-        assert(executeHolder1.completionCallbackCalled)
+        assert(executeHolder1.completionCallbackCalled.get())
       }
     }
   }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
similarity index 97%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
index 3240b33f3f090..173dc5c672bc3 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectServiceInternalServerSuite.scala
@@ -189,13 +189,6 @@ class SparkConnectServiceInternalServerSuite extends SparkFunSuite with LocalSpa
     // In the meanwhile, no any end event should be posted
     assert(listenerInstance.serviceEndEvents.size() == 0)
 
-    // The listener is able to get the SparkConf from the event
-    val event = listenerInstance.serviceStartedEvents.get(0)
-    assert(event.sparkConf != null)
-    val sparkConf = event.sparkConf
-    assert(sparkConf.contains("spark.driver.host"))
-    assert(sparkConf.contains("spark.app.id"))
-
     // Try to start an already started SparkConnectService
     SparkConnectService.start(sc)
     // The listener should still receive only one started event
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
similarity index 93%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
index 62b4151aad8a5..21f84291a2f07 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionHolderSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.api.python.SimplePythonFunction
 import org.apache.spark.connect.proto
 import org.apache.spark.sql.IntegratedUDFTestUtils
+import org.apache.spark.sql.connect.SparkConnectTestUtils
 import org.apache.spark.sql.connect.common.InvalidPlanInput
 import org.apache.spark.sql.connect.config.Connect
 import org.apache.spark.sql.connect.planner.{PythonStreamingQueryListener, SparkConnectPlanner, StreamingForeachBatchHelper}
@@ -42,7 +43,7 @@ import org.apache.spark.util.ArrayImplicits._
 class SparkConnectSessionHolderSuite extends SharedSparkSession {
 
   test("DataFrame cache: Successful put and get") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     import sessionHolder.session.implicits._
 
     val data1 = Seq(("k1", "v1"), ("k2", "v2"), ("k3", "v3"))
@@ -63,7 +64,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
   }
 
   test("DataFrame cache: Should throw when dataframe is not found") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     import sessionHolder.session.implicits._
 
     val key1 = "key_1"
@@ -84,7 +85,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
   }
 
   test("DataFrame cache: Remove cache and then get should fail") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     import sessionHolder.session.implicits._
 
     val key1 = "key_1"
@@ -179,7 +180,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
     assume(IntegratedUDFTestUtils.shouldTestPandasUDFs)
     // scalastyle:on assume
 
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     try {
       SparkConnectService.start(spark.sparkContext)
 
@@ -229,7 +230,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
     assume(IntegratedUDFTestUtils.shouldTestPandasUDFs)
     // scalastyle:on assume
 
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     try {
       SparkConnectService.start(spark.sparkContext)
 
@@ -319,7 +320,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
   }
 
   test("Test session plan cache") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     try {
       // Set cache size to 2
       SparkEnv.get.conf.set(Connect.CONNECT_SESSION_PLAN_CACHE_SIZE, 2)
@@ -374,7 +375,7 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
   }
 
   test("Test session plan cache - cache size zero or negative") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     try {
       // Set cache size to -1
       SparkEnv.get.conf.set(Connect.CONNECT_SESSION_PLAN_CACHE_SIZE, -1)
@@ -396,9 +397,9 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
   }
 
   test("Test session plan cache - disabled") {
-    val sessionHolder = SessionHolder.forTesting(spark)
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
     // Disable plan cache of the session
-    sessionHolder.session.conf.set(Connect.CONNECT_SESSION_PLAN_CACHE_ENABLED, false)
+    sessionHolder.session.conf.set(Connect.CONNECT_SESSION_PLAN_CACHE_ENABLED.key, false)
     val planner = new SparkConnectPlanner(sessionHolder)
 
     val query = buildRelation("select 1")
@@ -412,4 +413,13 @@ class SparkConnectSessionHolderSuite extends SharedSparkSession {
     planner.transformRelation(query, cachePlan = true)
     assertPlanCache(sessionHolder, Some(Set()))
   }
+
+  test("Test duplicate operation IDs") {
+    val sessionHolder = SparkConnectTestUtils.createDummySessionHolder(spark)
+    sessionHolder.addOperationId("DUMMY")
+    val ex = intercept[IllegalStateException] {
+      sessionHolder.addOperationId("DUMMY")
+    }
+    assert(ex.getMessage.contains("already exists"))
+  }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala
similarity index 94%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala
index 42bb93de05e26..1f522ea28b761 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectSessionManagerSuite.scala
@@ -37,7 +37,7 @@ class SparkConnectSessionManagerSuite extends SharedSparkSession with BeforeAndA
     val exGetOrCreate = intercept[SparkSQLException] {
       SparkConnectService.sessionManager.getOrCreateIsolatedSession(key, None)
     }
-    assert(exGetOrCreate.getErrorClass == "INVALID_HANDLE.FORMAT")
+    assert(exGetOrCreate.getCondition == "INVALID_HANDLE.FORMAT")
   }
 
   test(
@@ -72,7 +72,7 @@ class SparkConnectSessionManagerSuite extends SharedSparkSession with BeforeAndA
         key,
         Some(sessionHolder.session.sessionUUID + "invalid"))
     }
-    assert(exGet.getErrorClass == "INVALID_HANDLE.SESSION_CHANGED")
+    assert(exGet.getCondition == "INVALID_HANDLE.SESSION_CHANGED")
   }
 
   test(
@@ -85,12 +85,12 @@ class SparkConnectSessionManagerSuite extends SharedSparkSession with BeforeAndA
     val exGetOrCreate = intercept[SparkSQLException] {
       SparkConnectService.sessionManager.getOrCreateIsolatedSession(key, None)
     }
-    assert(exGetOrCreate.getErrorClass == "INVALID_HANDLE.SESSION_CLOSED")
+    assert(exGetOrCreate.getCondition == "INVALID_HANDLE.SESSION_CLOSED")
 
     val exGet = intercept[SparkSQLException] {
       SparkConnectService.sessionManager.getIsolatedSession(key, None)
     }
-    assert(exGet.getErrorClass == "INVALID_HANDLE.SESSION_CLOSED")
+    assert(exGet.getCondition == "INVALID_HANDLE.SESSION_CLOSED")
 
     val sessionGetIfPresent = SparkConnectService.sessionManager.getIsolatedSessionIfPresent(key)
     assert(sessionGetIfPresent.isEmpty)
@@ -102,7 +102,7 @@ class SparkConnectSessionManagerSuite extends SharedSparkSession with BeforeAndA
     val exGet = intercept[SparkSQLException] {
       SparkConnectService.sessionManager.getIsolatedSession(key, None)
     }
-    assert(exGet.getErrorClass == "INVALID_HANDLE.SESSION_NOT_FOUND")
+    assert(exGet.getCondition == "INVALID_HANDLE.SESSION_NOT_FOUND")
 
     val sessionGetIfPresent = SparkConnectService.sessionManager.getIsolatedSessionIfPresent(key)
     assert(sessionGetIfPresent.isEmpty)
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
similarity index 93%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
index 512a0a80c4a91..729a995f46145 100644
--- a/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
+++ b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/service/SparkConnectStreamingQueryCacheSuite.scala
@@ -48,6 +48,7 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
 
     val queryId = UUID.randomUUID().toString
     val runId = UUID.randomUUID().toString
+    val tag = "test_tag"
     val mockSession = mock[SparkSession]
     val mockQuery = mock[StreamingQuery]
     val mockStreamingQueryManager = mock[StreamingQueryManager]
@@ -67,13 +68,16 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
 
     // Register the query.
 
-    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set.empty[String], "")
+    sessionMgr.registerNewStreamingQuery(sessionHolder, mockQuery, Set(tag), "")
 
     sessionMgr.getCachedValue(queryId, runId) match {
       case Some(v) =>
         assert(v.sessionId == sessionHolder.sessionId)
         assert(v.expiresAtMs.isEmpty, "No expiry time should be set for active query")
 
+        val taggedQueries = sessionMgr.getTaggedQuery(tag, mockSession)
+        assert(taggedQueries.contains(v))
+
       case None => assert(false, "Query should be found")
     }
 
@@ -127,6 +131,9 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     assert(sessionMgr.getCachedValue(queryId, runId).map(_.query).contains(mockQuery))
     assert(
       sessionMgr.getCachedValue(queryId, restartedRunId).map(_.query).contains(restartedQuery))
+    eventually(timeout(1.minute)) {
+      assert(sessionMgr.taggedQueries.containsKey(tag))
+    }
 
     // Advance time by 1 minute and verify the first query is dropped from the cache.
     clock.advance(1.minute.toMillis)
@@ -144,8 +151,11 @@ class SparkConnectStreamingQueryCacheSuite extends SparkFunSuite with MockitoSug
     clock.advance(1.minute.toMillis)
     eventually(timeout(1.minute)) {
       assert(sessionMgr.getCachedValue(queryId, restartedRunId).isEmpty)
+      assert(sessionMgr.getTaggedQuery(tag, mockSession).isEmpty)
+    }
+    eventually(timeout(1.minute)) {
+      assert(!sessionMgr.taggedQueries.containsKey(tag))
     }
-
     sessionMgr.shutdown()
   }
 }
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerListenerSuite.scala
diff --git a/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPageSuite.scala b/sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPageSuite.scala
similarity index 100%
rename from connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPageSuite.scala
rename to sql/connect/server/src/test/scala/org/apache/spark/sql/connect/ui/SparkConnectServerPageSuite.scala
diff --git a/sql/connect/shims/README.md b/sql/connect/shims/README.md
new file mode 100644
index 0000000000000..07b593dd04b4b
--- /dev/null
+++ b/sql/connect/shims/README.md
@@ -0,0 +1 @@
+This module defines shims used by the interface defined in sql/api.
diff --git a/sql/connect/shims/pom.xml b/sql/connect/shims/pom.xml
new file mode 100644
index 0000000000000..d177b4a9971f5
--- /dev/null
+++ b/sql/connect/shims/pom.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.spark</groupId>
+    <artifactId>spark-parent_2.13</artifactId>
+    <version>4.0.0-SNAPSHOT</version>
+    <relativePath>../../../pom.xml</relativePath>
+  </parent>
+
+  <artifactId>spark-connect-shims_2.13</artifactId>
+  <packaging>jar</packaging>
+  <name>Spark Project Connect Shims</name>
+  <url>https://spark.apache.org/</url>
+  <properties>
+    <sbt.project.name>connect-shims</sbt.project.name>
+  </properties>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.scala-lang</groupId>
+      <artifactId>scala-library</artifactId>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
+    <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
+  </build>
+</project>
diff --git a/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
new file mode 100644
index 0000000000000..ad8771a03b287
--- /dev/null
+++ b/sql/connect/shims/src/main/scala/org/apache/spark/shims.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark
+
+class SparkContext
+class SparkConf {
+  def getAll: Array[(String, String)] = Array.empty
+}
+
+package api.java {
+  class JavaRDD[T]
+}
+
+package rdd {
+  class RDD[T]
+}
+
+package sql {
+  class ExperimentalMethods
+  class SparkSessionExtensions
+  class SQLContext
+
+  package execution {
+    class QueryExecution
+  }
+  package internal {
+    class SharedState
+    class SessionState
+  }
+  package util {
+    class ExecutionListenerManager
+  }
+  package sources {
+    class BaseRelation
+  }
+}
diff --git a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
index 15fe089202fb8..50a31e7e73bb1 100644
--- a/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-jdk21-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      35342          35962         876         59.3          16.9       1.0X
-agg w/o group wholestage on                        2831           2851          16        740.7           1.4      12.5X
+agg w/o group wholestage off                      35098          35975        1240         59.8          16.7       1.0X
+agg w/o group wholestage on                        2835           2844           9        739.9           1.4      12.4X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4117           4150          47         25.5          39.3       1.0X
-stddev wholestage on                                976            980           4        107.4           9.3       4.2X
+stddev wholestage off                              4102           4138          51         25.6          39.1       1.0X
+stddev wholestage on                                974            983           6        107.6           9.3       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           19477          19555         110          5.4         185.7       1.0X
-kurtosis wholestage on                              986            994           6        106.3           9.4      19.8X
+kurtosis wholestage off                           21188          21367         253          4.9         202.1       1.0X
+kurtosis wholestage on                              992            993           2        105.7           9.5      21.4X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6889           6905          23         12.2          82.1       1.0X
-codegen = T, hashmap = F                           3899           3935          32         21.5          46.5       1.8X
-codegen = T, row-based hashmap = T                 1248           1254           6         67.2          14.9       5.5X
-codegen = T, vectorized hashmap = T                 825            837          12        101.7           9.8       8.4X
+codegen = F                                        6757           6835         110         12.4          80.5       1.0X
+codegen = T, hashmap = F                           3850           4003         160         21.8          45.9       1.8X
+codegen = T, row-based hashmap = T                 1222           1238          15         68.6          14.6       5.5X
+codegen = T, vectorized hashmap = T                 804            814           9        104.3           9.6       8.4X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7875           7877           2         10.7          93.9       1.0X
-codegen = T, hashmap = F                           4903           4941          43         17.1          58.4       1.6X
-codegen = T, row-based hashmap = T                 1807           1818           9         46.4          21.5       4.4X
-codegen = T, vectorized hashmap = T                1300           1344          49         64.5          15.5       6.1X
+codegen = F                                        7331           7374          60         11.4          87.4       1.0X
+codegen = T, hashmap = F                           4664           4687          24         18.0          55.6       1.6X
+codegen = T, row-based hashmap = T                 1620           1627           7         51.8          19.3       4.5X
+codegen = T, vectorized hashmap = T                1113           1171          72         75.4          13.3       6.6X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2552           2573          30          8.2         121.7       1.0X
-codegen = T, hashmap = F                           1537           1545           9         13.6          73.3       1.7X
-codegen = T, row-based hashmap = T                  887            921          27         23.6          42.3       2.9X
-codegen = T, vectorized hashmap = T                 744            752           8         28.2          35.5       3.4X
+codegen = F                                        2485           2510          35          8.4         118.5       1.0X
+codegen = T, hashmap = F                           1519           1529          10         13.8          72.4       1.6X
+codegen = T, row-based hashmap = T                  994           1010          16         21.1          47.4       2.5X
+codegen = T, vectorized hashmap = T                 804            815          11         26.1          38.3       3.1X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2460           2464           6          8.5         117.3       1.0X
-codegen = T, hashmap = F                           1601           1611          14         13.1          76.4       1.5X
-codegen = T, row-based hashmap = T                  475            497          29         44.1          22.7       5.2X
-codegen = T, vectorized hashmap = T                 336            343           5         62.5          16.0       7.3X
+codegen = F                                        2018           2050          46         10.4          96.2       1.0X
+codegen = T, hashmap = F                           1305           1318          18         16.1          62.2       1.5X
+codegen = T, row-based hashmap = T                  499            505           6         42.0          23.8       4.0X
+codegen = T, vectorized hashmap = T                 313            317           4         67.0          14.9       6.4X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4272           4302          43          4.9         203.7       1.0X
-codegen = T, hashmap = F                           2260           2262           2          9.3         107.8       1.9X
-codegen = T, row-based hashmap = T                 1652           1665          19         12.7          78.8       2.6X
-codegen = T, vectorized hashmap = T                1519           1527          11         13.8          72.4       2.8X
+codegen = F                                        4453           4457           6          4.7         212.3       1.0X
+codegen = T, hashmap = F                           2320           2333          20          9.0         110.6       1.9X
+codegen = T, row-based hashmap = T                 1821           1826           6         11.5          86.8       2.4X
+codegen = T, vectorized hashmap = T                1600           1652          74         13.1          76.3       2.8X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         375            407          29          1.7         572.3       1.0X
-codegen = T, hugeMethodLimit = 10000                137            160          19          4.8         209.5       2.7X
-codegen = T, hugeMethodLimit = 1500                 132            143          13          5.0         201.8       2.8X
+codegen = F                                         358            384          22          1.8         545.8       1.0X
+codegen = T, hugeMethodLimit = 10000                134            160          24          4.9         204.1       2.7X
+codegen = T, hugeMethodLimit = 1500                 129            145          16          5.1         196.1       2.8X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                1986           2005          26          2.6         378.9       1.0X
-cube wholestage on                                 1079           1106          46          4.9         205.8       1.8X
+cube wholestage off                                1962           1973          16          2.7         374.2       1.0X
+cube wholestage on                                 1054           1075          24          5.0         201.0       1.9X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       146            146           1        143.7           7.0       1.0X
-murmur3 hash                                         53             54           0        392.5           2.5       2.7X
-fast hash                                            24             24           0        887.4           1.1       6.2X
+UnsafeRowhash                                       146            147           3        143.7           7.0       1.0X
+murmur3 hash                                         53             54           1        392.4           2.5       2.7X
+fast hash                                            24             24           0        887.7           1.1       6.2X
 arrayEqual                                          136            136           0        153.9           6.5       1.1X
-Java HashMap (Long)                                  62             67           6        337.2           3.0       2.3X
-Java HashMap (two ints)                              87             91           8        242.3           4.1       1.7X
-Java HashMap (UnsafeRow)                            505            509           4         41.5          24.1       0.3X
-LongToUnsafeRowMap (opt=false)                      351            352           2         59.8          16.7       0.4X
-LongToUnsafeRowMap (opt=true)                        76             77           1        274.8           3.6       1.9X
-BytesToBytesMap (off Heap)                          450            460           9         46.6          21.5       0.3X
-BytesToBytesMap (on Heap)                           462            473          10         45.4          22.0       0.3X
-Aggregate HashMap                                    30             30           1        699.0           1.4       4.9X
+Java HashMap (Long)                                  62             72           8        338.3           3.0       2.4X
+Java HashMap (two ints)                              85             88           2        245.8           4.1       1.7X
+Java HashMap (UnsafeRow)                            492            495           2         42.6          23.5       0.3X
+LongToUnsafeRowMap (opt=false)                      350            354           3         59.9          16.7       0.4X
+LongToUnsafeRowMap (opt=true)                        79             82           5        263.9           3.8       1.8X
+BytesToBytesMap (off Heap)                          459            471          12         45.7          21.9       0.3X
+BytesToBytesMap (on Heap)                           466            468           2         45.0          22.2       0.3X
+Aggregate HashMap                                    30             30           2        697.8           1.4       4.9X
 
 
diff --git a/sql/core/benchmarks/AggregateBenchmark-results.txt b/sql/core/benchmarks/AggregateBenchmark-results.txt
index bdfa6bd673586..f1118da89122d 100644
--- a/sql/core/benchmarks/AggregateBenchmark-results.txt
+++ b/sql/core/benchmarks/AggregateBenchmark-results.txt
@@ -2,147 +2,147 @@
 aggregate without grouping
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 agg w/o group:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-agg w/o group wholestage off                      38161          38820         933         55.0          18.2       1.0X
-agg w/o group wholestage on                        2472           2488          10        848.5           1.2      15.4X
+agg w/o group wholestage off                      37435          38685        1769         56.0          17.9       1.0X
+agg w/o group wholestage on                        3364           3369           3        623.4           1.6      11.1X
 
 
 ================================================================================================
 stat functions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 stddev:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-stddev wholestage off                              4488           4498          14         23.4          42.8       1.0X
-stddev wholestage on                                961            975           8        109.1           9.2       4.7X
+stddev wholestage off                              4461           4505          63         23.5          42.5       1.0X
+stddev wholestage on                                976            980           3        107.5           9.3       4.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 kurtosis:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-kurtosis wholestage off                           20771          20817          65          5.0         198.1       1.0X
-kurtosis wholestage on                             1004           1009           4        104.5           9.6      20.7X
+kurtosis wholestage off                           20698          20799         143          5.1         197.4       1.0X
+kurtosis wholestage on                              990            992           2        105.9           9.4      20.9X
 
 
 ================================================================================================
 aggregate with linear keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        6648           6749         142         12.6          79.3       1.0X
-codegen = T, hashmap = F                           3893           3974          83         21.6          46.4       1.7X
-codegen = T, row-based hashmap = T                 1198           1218          17         70.0          14.3       5.5X
-codegen = T, vectorized hashmap = T                 815            826           7        102.9           9.7       8.2X
+codegen = F                                        6646           6677          43         12.6          79.2       1.0X
+codegen = T, hashmap = F                           4024           4116         118         20.8          48.0       1.7X
+codegen = T, row-based hashmap = T                 1240           1255          13         67.7          14.8       5.4X
+codegen = T, vectorized hashmap = T                 816            838          14        102.8           9.7       8.1X
 
 
 ================================================================================================
 aggregate with randomized keys
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w keys:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        7395           7411          24         11.3          88.1       1.0X
-codegen = T, hashmap = F                           4675           4833         165         17.9          55.7       1.6X
-codegen = T, row-based hashmap = T                 1658           1709          82         50.6          19.8       4.5X
-codegen = T, vectorized hashmap = T                1066           1080          23         78.7          12.7       6.9X
+codegen = F                                        7348           7361          18         11.4          87.6       1.0X
+codegen = T, hashmap = F                           4766           4799          30         17.6          56.8       1.5X
+codegen = T, row-based hashmap = T                 1712           1734          23         49.0          20.4       4.3X
+codegen = T, vectorized hashmap = T                1052           1057           5         79.7          12.5       7.0X
 
 
 ================================================================================================
 aggregate with string key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w string key:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2400           2406           8          8.7         114.5       1.0X
-codegen = T, hashmap = F                           1499           1512          19         14.0          71.5       1.6X
-codegen = T, row-based hashmap = T                  942            972          28         22.3          44.9       2.5X
-codegen = T, vectorized hashmap = T                 756            764           5         27.7          36.1       3.2X
+codegen = F                                        2303           2306           3          9.1         109.8       1.0X
+codegen = T, hashmap = F                           1467           1472           7         14.3          70.0       1.6X
+codegen = T, row-based hashmap = T                  989            998           9         21.2          47.2       2.3X
+codegen = T, vectorized hashmap = T                 794            799           4         26.4          37.9       2.9X
 
 
 ================================================================================================
 aggregate with decimal key
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w decimal key:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        2103           2115          17         10.0         100.3       1.0X
-codegen = T, hashmap = F                           1324           1330           9         15.8          63.1       1.6X
-codegen = T, row-based hashmap = T                  454            473          13         46.1          21.7       4.6X
-codegen = T, vectorized hashmap = T                 313            324           6         66.9          14.9       6.7X
+codegen = F                                        2026           2039          18         10.4          96.6       1.0X
+codegen = T, hashmap = F                           1349           1352           4         15.5          64.3       1.5X
+codegen = T, row-based hashmap = T                  437            445           6         47.9          20.9       4.6X
+codegen = T, vectorized hashmap = T                 316            322           5         66.4          15.1       6.4X
 
 
 ================================================================================================
 aggregate with multiple key types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Aggregate w multiple keys:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                        4133           4161          39          5.1         197.1       1.0X
-codegen = T, hashmap = F                           2402           2405           4          8.7         114.5       1.7X
-codegen = T, row-based hashmap = T                 1618           1626          12         13.0          77.1       2.6X
-codegen = T, vectorized hashmap = T                1516           1525          12         13.8          72.3       2.7X
+codegen = F                                        4168           4201          47          5.0         198.7       1.0X
+codegen = T, hashmap = F                           2412           2418           8          8.7         115.0       1.7X
+codegen = T, row-based hashmap = T                 1661           1663           2         12.6          79.2       2.5X
+codegen = T, vectorized hashmap = T                1606           1610           5         13.1          76.6       2.6X
 
 
 ================================================================================================
 max function bytecode size of wholestagecodegen
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 max function bytecode size:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-codegen = F                                         402            410           4          1.6         614.1       1.0X
-codegen = T, hugeMethodLimit = 10000                164            194          14          4.0         249.7       2.5X
-codegen = T, hugeMethodLimit = 1500                 132            153          15          5.0         201.8       3.0X
+codegen = F                                         385            401          10          1.7         587.4       1.0X
+codegen = T, hugeMethodLimit = 10000                141            157          12          4.7         214.5       2.7X
+codegen = T, hugeMethodLimit = 1500                 127            141          11          5.2         193.9       3.0X
 
 
 ================================================================================================
 cube
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cube:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cube wholestage off                                2101           2141          56          2.5         400.8       1.0X
-cube wholestage on                                 1072           1084          13          4.9         204.5       2.0X
+cube wholestage off                                1943           1948           7          2.7         370.6       1.0X
+cube wholestage on                                 1110           1130          17          4.7         211.7       1.8X
 
 
 ================================================================================================
 hash and BytesToBytesMap
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BytesToBytesMap:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeRowhash                                       198            203          14        106.1           9.4       1.0X
-murmur3 hash                                         66             68           1        320.1           3.1       3.0X
-fast hash                                            69             71           1        305.8           3.3       2.9X
-arrayEqual                                          142            145           2        148.0           6.8       1.4X
-Java HashMap (Long)                                  64             68           4        327.5           3.1       3.1X
-Java HashMap (two ints)                              82             84           2        257.0           3.9       2.4X
-Java HashMap (UnsafeRow)                            537            542           5         39.1          25.6       0.4X
-LongToUnsafeRowMap (opt=false)                      335            338           2         62.5          16.0       0.6X
-LongToUnsafeRowMap (opt=true)                        74             75           2        281.7           3.6       2.7X
-BytesToBytesMap (off Heap)                          489            494           7         42.9          23.3       0.4X
-BytesToBytesMap (on Heap)                           496            499           3         42.3          23.7       0.4X
-Aggregate HashMap                                    30             31           2        705.1           1.4       6.6X
+UnsafeRowhash                                       203            204           2        103.4           9.7       1.0X
+murmur3 hash                                         68             69           1        308.6           3.2       3.0X
+fast hash                                            71             71           0        296.3           3.4       2.9X
+arrayEqual                                          144            145           1        145.7           6.9       1.4X
+Java HashMap (Long)                                  66             69           4        318.6           3.1       3.1X
+Java HashMap (two ints)                              80             84          10        263.7           3.8       2.5X
+Java HashMap (UnsafeRow)                            532            536           3         39.4          25.4       0.4X
+LongToUnsafeRowMap (opt=false)                      335            337           1         62.6          16.0       0.6X
+LongToUnsafeRowMap (opt=true)                        78             78           1        269.7           3.7       2.6X
+BytesToBytesMap (off Heap)                          484            487           3         43.4          23.1       0.4X
+BytesToBytesMap (on Heap)                           484            491           5         43.4          23.1       0.4X
+Aggregate HashMap                                    30             31           1        690.1           1.4       6.7X
 
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
index c9f022901b947..dd6aabd2695fd 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-jdk21-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           22681          22902         342          4.4         226.8       1.0X
-year month interval one column disable radix          31984          32121         199          3.1         319.8       0.7X
+year month interval one column enable radix           23157          23546         354          4.3         231.6       1.0X
+year month interval one column disable radix          33035          33049          14          3.0         330.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33382          33682         365          3.0         333.8       1.0X
-year month interval two columns disable radix          33180          33612         586          3.0         331.8       1.0X
+year month interval two columns enable radix           33726          33825          89          3.0         337.3       1.0X
+year month interval two columns disable radix          33759          34063         472          3.0         337.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           20327          20446         140          4.9         203.3       1.0X
-day time interval one columns disable radix          31683          32138         457          3.2         316.8       0.6X
+day time interval one columns enable radix           23123          23169          61          4.3         231.2       1.0X
+day time interval one columns disable radix          34121          34201          96          2.9         341.2       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           32522          32691         224          3.1         325.2       1.0X
-day time interval two columns disable radix          32478          32743         285          3.1         324.8       1.0X
+day time interval two columns enable radix           35022          35250         212          2.9         350.2       1.0X
+day time interval two columns disable radix          35240          35498         224          2.8         352.4       1.0X
 
diff --git a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
index def37b0d628ab..e8aadd025df2d 100644
--- a/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
+++ b/sql/core/benchmarks/AnsiIntervalSortBenchmark-results.txt
@@ -1,28 +1,28 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year month interval one column:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-year month interval one column enable radix           22540          22607          70          4.4         225.4       1.0X
-year month interval one column disable radix          32453          32592         126          3.1         324.5       0.7X
+year month interval one column enable radix           22561          22685         121          4.4         225.6       1.0X
+year month interval one column disable radix          32247          32353         132          3.1         322.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year month interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-year month interval two columns enable radix           33313          33384         114          3.0         333.1       1.0X
-year month interval two columns disable radix          33284          33357          83          3.0         332.8       1.0X
+year month interval two columns enable radix           33236          33446         207          3.0         332.4       1.0X
+year month interval two columns disable radix          34800          34873          63          2.9         348.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day time interval one columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval one columns enable radix           21112          21150          63          4.7         211.1       1.0X
-day time interval one columns disable radix          32667          32837         159          3.1         326.7       0.6X
+day time interval one columns enable radix           21978          22020          66          4.5         219.8       1.0X
+day time interval one columns disable radix          33183          33211          38          3.0         331.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day time interval two columns:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-day time interval two columns enable radix           36203          36323         110          2.8         362.0       1.0X
-day time interval two columns disable radix          34964          35031          70          2.9         349.6       1.0X
+day time interval two columns enable radix           34526          34670         185          2.9         345.3       1.0X
+day time interval two columns disable radix          35632          35826         191          2.8         356.3       1.0X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
index f9ddb8465f4f0..52092328fd576 100644
--- a/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-jdk21-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2038           2103          63          9.8         101.9       1.0X
-apache                                            11269          11369          86          1.8         563.4       0.2X
+java                                               1974           2002          47         10.1          98.7       1.0X
+apache                                            10784          10862          90          1.9         539.2       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2462           2507          76          8.1         123.1       1.0X
-apache                                            12414          12475          54          1.6         620.7       0.2X
+java                                               2431           2445          12          8.2         121.6       1.0X
+apache                                            12049          12094          41          1.7         602.5       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3084           3093           8          6.5         154.2       1.0X
-apache                                            13548          13629          86          1.5         677.4       0.2X
+java                                               2857           2867          14          7.0         142.8       1.0X
+apache                                            13281          13344          56          1.5         664.0       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3182           3189           9          6.3         159.1       1.0X
-apache                                            14637          14678          40          1.4         731.8       0.2X
+java                                               2943           2973          50          6.8         147.1       1.0X
+apache                                            14384          14421          32          1.4         719.2       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3117           3254         175          6.4         155.8       1.0X
-apache                                            12666          12718          47          1.6         633.3       0.2X
+java                                               3435           3439           4          5.8         171.7       1.0X
+apache                                            12572          12615          40          1.6         628.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3759           3765           6          5.3         187.9       1.0X
-apache                                            13854          13870          15          1.4         692.7       0.3X
+java                                               4040           4052          11          5.0         202.0       1.0X
+apache                                            14274          14363         120          1.4         713.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4773           4781          12          4.2         238.6       1.0X
-apache                                            15439          15482          42          1.3         771.9       0.3X
+java                                               4756           4767          17          4.2         237.8       1.0X
+apache                                            16291          16304          20          1.2         814.6       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5200           5228          25          3.8         260.0       1.0X
-apache                                            16847          16904          69          1.2         842.4       0.3X
+java                                               5200           5209           8          3.8         260.0       1.0X
+apache                                            17434          17540         101          1.1         871.7       0.3X
 
diff --git a/sql/core/benchmarks/Base64Benchmark-results.txt b/sql/core/benchmarks/Base64Benchmark-results.txt
index 975e6b9bcad23..3e8d7e2727c34 100644
--- a/sql/core/benchmarks/Base64Benchmark-results.txt
+++ b/sql/core/benchmarks/Base64Benchmark-results.txt
@@ -1,56 +1,56 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2292           2321          30          8.7         114.6       1.0X
-apache                                            11003          11085          79          1.8         550.2       0.2X
+java                                               2287           2338          50          8.7         114.3       1.0X
+apache                                            10870          10993         126          1.8         543.5       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               2945           2956          18          6.8         147.3       1.0X
-apache                                            12199          12295         135          1.6         609.9       0.2X
+java                                               2930           2945          21          6.8         146.5       1.0X
+apache                                            12069          12172         108          1.7         603.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3477           3489          12          5.8         173.9       1.0X
-apache                                            13666          13776          96          1.5         683.3       0.3X
+java                                               3403           3410           7          5.9         170.1       1.0X
+apache                                            13236          13327         139          1.5         661.8       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3958           3973          22          5.1         197.9       1.0X
-apache                                            14953          14993          45          1.3         747.7       0.3X
+java                                               3914           3947          29          5.1         195.7       1.0X
+apache                                            14411          14441          27          1.4         720.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 1:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               3223           3313          79          6.2         161.1       1.0X
-apache                                            12096          12321         196          1.7         604.8       0.3X
+java                                               3572           3580           9          5.6         178.6       1.0X
+apache                                            12652          12656           7          1.6         632.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 3:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               4637           4655          18          4.3         231.9       1.0X
-apache                                            14167          14307         124          1.4         708.4       0.3X
+java                                               4918           4919           2          4.1         245.9       1.0X
+apache                                            14579          14601          20          1.4         728.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 5:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               5811           5821           9          3.4         290.6       1.0X
-apache                                            15871          15886          15          1.3         793.6       0.4X
+java                                               5939           5949          10          3.4         296.9       1.0X
+apache                                            16626          16675          58          1.2         831.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 decode for 7:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-java                                               6410           6436          23          3.1         320.5       1.0X
-apache                                            17301          17395         109          1.2         865.1       0.4X
+java                                               6666           6672           5          3.0         333.3       1.0X
+apache                                            18901          18922          24          1.1         945.0       0.4X
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
index fd0fe949392b3..5cf56352fa761 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-jdk21-results.txt
@@ -2,191 +2,195 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               8033           8137         147         12.4          80.3       1.0X
-With bloom filter                                 10113          10202         125          9.9         101.1       0.8X
+Without bloom filter                               8070           8132          88         12.4          80.7       1.0X
+With bloom filter                                 10025          10082          81         10.0         100.2       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            895            909          17        111.7           8.9       1.0X
-With bloom filter, blocksize: 2097152               592            603           9        169.1           5.9       1.5X
+Without bloom filter, blocksize: 2097152            882            890           7        113.4           8.8       1.0X
+With bloom filter, blocksize: 2097152               567            577          10        176.4           5.7       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            852            871          23        117.4           8.5       1.0X
-With bloom filter, blocksize: 4194304               542            573          37        184.6           5.4       1.6X
+Without bloom filter, blocksize: 4194304            810            836          22        123.4           8.1       1.0X
+With bloom filter, blocksize: 4194304               550            568          22        181.8           5.5       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            834            857          32        120.0           8.3       1.0X
-With bloom filter, blocksize: 6291456               547            567          26        182.9           5.5       1.5X
+Without bloom filter, blocksize: 6291456            823            836          11        121.5           8.2       1.0X
+With bloom filter, blocksize: 6291456               540            563          17        185.3           5.4       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            819            844          21        122.0           8.2       1.0X
-With bloom filter, blocksize: 8388608               542            572          25        184.4           5.4       1.5X
+Without bloom filter, blocksize: 8388608            797            821          21        125.5           8.0       1.0X
+With bloom filter, blocksize: 8388608               533            553          23        187.5           5.3       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            866            882          16        115.5           8.7       1.0X
-With bloom filter, blocksize: 12582912               537            560          21        186.1           5.4       1.6X
+Without bloom filter, blocksize: 12582912            859            876          15        116.4           8.6       1.0X
+With bloom filter, blocksize: 12582912               545            576          22        183.4           5.5       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            805            829          21        124.3           8.0       1.0X
-With bloom filter, blocksize: 16777216               537            567          30        186.2           5.4       1.5X
+Without bloom filter, blocksize: 16777216            810            841          26        123.4           8.1       1.0X
+With bloom filter, blocksize: 16777216               554            575          15        180.5           5.5       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            807            826          17        123.9           8.1       1.0X
-With bloom filter, blocksize: 33554432               535            552          12        186.9           5.3       1.5X
+Without bloom filter, blocksize: 33554432            845            852           7        118.4           8.4       1.0X
+With bloom filter, blocksize: 33554432               545            564          16        183.4           5.5       1.5X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
-Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              10510          10559          69          9.5         105.1       1.0X
-With bloom filter                                 13372          13429          81          7.5         133.7       0.8X
+Write 100M rows:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                                 12141          12156          21          8.2         121.4       1.0X
+With bloom filter                                    21175          21296         172          4.7         211.7       0.6X
+With adaptive bloom filter & 3 candidates            20846          20897          71          4.8         208.5       0.6X
+With adaptive bloom filter & 5 candidates            20731          20989         365          4.8         207.3       0.6X
+With adaptive bloom filter & 9 candidates            23208          23264          79          4.3         232.1       0.5X
+With adaptive bloom filter & 15 candidates           23293          23349          78          4.3         232.9       0.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            400            430          24        250.0           4.0       1.0X
-With bloom filter, blocksize: 2097152               148            160          12        677.1           1.5       2.7X
+Without bloom filter, blocksize: 2097152            451            502          37        221.9           4.5       1.0X
+With bloom filter, blocksize: 2097152               174            186          12        573.8           1.7       2.6X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            380            385           4        263.0           3.8       1.0X
-With bloom filter, blocksize: 4194304               103            115           8        972.0           1.0       3.7X
+Without bloom filter, blocksize: 4194304            404            409           4        247.6           4.0       1.0X
+With bloom filter, blocksize: 4194304               139            150           7        719.2           1.4       2.9X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            380            384           5        263.0           3.8       1.0X
-With bloom filter, blocksize: 6291456               123            136           7        811.1           1.2       3.1X
+Without bloom filter, blocksize: 6291456            416            423           7        240.5           4.2       1.0X
+With bloom filter, blocksize: 6291456               141            152          10        709.9           1.4       3.0X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            382            391          13        261.8           3.8       1.0X
-With bloom filter, blocksize: 8388608               175            188           7        571.1           1.8       2.2X
+Without bloom filter, blocksize: 8388608            419            432          10        238.6           4.2       1.0X
+With bloom filter, blocksize: 8388608               210            223           7        476.2           2.1       2.0X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            386            393           7        259.1           3.9       1.0X
-With bloom filter, blocksize: 12582912               316            322           5        316.2           3.2       1.2X
+Without bloom filter, blocksize: 12582912            422            430           9        236.8           4.2       1.0X
+With bloom filter, blocksize: 12582912               325            330           4        307.2           3.3       1.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            390            395           6        256.6           3.9       1.0X
-With bloom filter, blocksize: 16777216               299            305           4        334.1           3.0       1.3X
+Without bloom filter, blocksize: 16777216            420            436          22        238.3           4.2       1.0X
+With bloom filter, blocksize: 16777216               398            428          29        251.2           4.0       1.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            397            409          10        252.0           4.0       1.0X
-With bloom filter, blocksize: 33554432               583            640          39        171.7           5.8       0.7X
+Without bloom filter, blocksize: 33554432            428            439           9        233.5           4.3       1.0X
+With bloom filter, blocksize: 33554432               430            441          15        232.4           4.3       1.0X
 
 
diff --git a/sql/core/benchmarks/BloomFilterBenchmark-results.txt b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
index 7dfcdecded143..286df98479f97 100644
--- a/sql/core/benchmarks/BloomFilterBenchmark-results.txt
+++ b/sql/core/benchmarks/BloomFilterBenchmark-results.txt
@@ -2,191 +2,195 @@
 ORC Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                               7751           7823         102         12.9          77.5       1.0X
-With bloom filter                                  9924           9966          59         10.1          99.2       0.8X
+Without bloom filter                               8021           8137         165         12.5          80.2       1.0X
+With bloom filter                                 10132          10186          76          9.9         101.3       0.8X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            882            896          20        113.3           8.8       1.0X
-With bloom filter, blocksize: 2097152               589            597           8        169.7           5.9       1.5X
+Without bloom filter, blocksize: 2097152            876            940          61        114.2           8.8       1.0X
+With bloom filter, blocksize: 2097152               588            618          21        169.9           5.9       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            817            823           5        122.4           8.2       1.0X
-With bloom filter, blocksize: 4194304               524            534          10        191.0           5.2       1.6X
+Without bloom filter, blocksize: 4194304            837            839           2        119.4           8.4       1.0X
+With bloom filter, blocksize: 4194304               579            601          34        172.7           5.8       1.4X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            821            828           9        121.7           8.2       1.0X
-With bloom filter, blocksize: 6291456               516            531          10        193.7           5.2       1.6X
+Without bloom filter, blocksize: 6291456            787            797           9        127.0           7.9       1.0X
+With bloom filter, blocksize: 6291456               532            548          12        188.1           5.3       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            791            811          24        126.5           7.9       1.0X
-With bloom filter, blocksize: 8388608               531            566          27        188.5           5.3       1.5X
+Without bloom filter, blocksize: 8388608            796            799           4        125.7           8.0       1.0X
+With bloom filter, blocksize: 8388608               534            548          10        187.1           5.3       1.5X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            851            861          15        117.5           8.5       1.0X
-With bloom filter, blocksize: 12582912               500            513           9        199.9           5.0       1.7X
+Without bloom filter, blocksize: 12582912            836            839           3        119.7           8.4       1.0X
+With bloom filter, blocksize: 12582912               517            544          19        193.4           5.2       1.6X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            811            819           8        123.2           8.1       1.0X
-With bloom filter, blocksize: 16777216               502            516           9        199.0           5.0       1.6X
+Without bloom filter, blocksize: 16777216            793            796           4        126.1           7.9       1.0X
+With bloom filter, blocksize: 16777216               570            574           5        175.3           5.7       1.4X
 
 
 ================================================================================================
 ORC Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            819            843          21        122.1           8.2       1.0X
-With bloom filter, blocksize: 33554432               512            517           6        195.3           5.1       1.6X
+Without bloom filter, blocksize: 33554432            784            794          12        127.5           7.8       1.0X
+With bloom filter, blocksize: 33554432               565            587          27        177.1           5.6       1.4X
 
 
 ================================================================================================
 Parquet Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
-Write 100M rows:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
-------------------------------------------------------------------------------------------------------------------------
-Without bloom filter                              11467          11609         202          8.7         114.7       1.0X
-With bloom filter                                 14502          14626         176          6.9         145.0       0.8X
+Write 100M rows:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+---------------------------------------------------------------------------------------------------------------------------
+Without bloom filter                                 11173          11180          11          9.0         111.7       1.0X
+With bloom filter                                    19387          19485         138          5.2         193.9       0.6X
+With adaptive bloom filter & 3 candidates            19252          19395         202          5.2         192.5       0.6X
+With adaptive bloom filter & 5 candidates            19204          19337         188          5.2         192.0       0.6X
+With adaptive bloom filter & 9 candidates            19267          19380         160          5.2         192.7       0.6X
+With adaptive bloom filter & 15 candidates           19144          19184          57          5.2         191.4       0.6X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 2097152            430            450          14        232.6           4.3       1.0X
-With bloom filter, blocksize: 2097152               146            158           9        684.2           1.5       2.9X
+Without bloom filter, blocksize: 2097152            447            476          24        223.6           4.5       1.0X
+With bloom filter, blocksize: 2097152               177            185           5        565.6           1.8       2.5X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 4194304            410            414           3        243.8           4.1       1.0X
-With bloom filter, blocksize: 4194304               103            109           4        968.2           1.0       4.0X
+Without bloom filter, blocksize: 4194304            424            440          14        236.0           4.2       1.0X
+With bloom filter, blocksize: 4194304               127            135           7        790.4           1.3       3.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 6291456            409            414           5        244.5           4.1       1.0X
-With bloom filter, blocksize: 6291456               130            139           7        772.2           1.3       3.2X
+Without bloom filter, blocksize: 6291456            423            439          16        236.2           4.2       1.0X
+With bloom filter, blocksize: 6291456               130            139           9        768.6           1.3       3.3X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 8388608            413            417           4        242.3           4.1       1.0X
-With bloom filter, blocksize: 8388608               179            191          13        559.3           1.8       2.3X
+Without bloom filter, blocksize: 8388608            426            435           7        235.0           4.3       1.0X
+With bloom filter, blocksize: 8388608               204            214           6        489.3           2.0       2.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 12582912            412            418           5        242.7           4.1       1.0X
-With bloom filter, blocksize: 12582912               346            351           3        288.8           3.5       1.2X
+Without bloom filter, blocksize: 12582912            426            447          23        234.5           4.3       1.0X
+With bloom filter, blocksize: 12582912               295            306           8        339.2           2.9       1.4X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 16777216            416            424          12        240.3           4.2       1.0X
-With bloom filter, blocksize: 16777216               327            336           7        306.2           3.3       1.3X
+Without bloom filter, blocksize: 16777216            427            441           9        234.0           4.3       1.0X
+With bloom filter, blocksize: 16777216               372            392          12        268.5           3.7       1.1X
 
 
 ================================================================================================
 Parquet Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read a row from 100M rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Without bloom filter, blocksize: 33554432            423            427           3        236.7           4.2       1.0X
-With bloom filter, blocksize: 33554432               683            695          11        146.5           6.8       0.6X
+Without bloom filter, blocksize: 33554432            508            524          14        197.0           5.1       1.0X
+With bloom filter, blocksize: 33554432               439            463          31        227.7           4.4       1.2X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
index a260bc0396455..ac33c0edbcd24 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-jdk21-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1732           1745          19          9.1         110.1       1.0X
-Output Single Double Column                        1754           1758           7          9.0         111.5       1.0X
-Output Int and String Column                       4309           4363          76          3.7         273.9       0.4X
-Output Partitions                                  3252           3350         139          4.8         206.8       0.5X
-Output Buckets                                     4487           4575         124          3.5         285.3       0.4X
+Output Single Int Column                           1630           1688          82          9.7         103.6       1.0X
+Output Single Double Column                        1848           1854          10          8.5         117.5       0.9X
+Output Int and String Column                       4604           4635          44          3.4         292.7       0.4X
+Output Partitions                                  3399           3432          46          4.6         216.1       0.5X
+Output Buckets                                     4919           4925           9          3.2         312.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1938           1978          55          8.1         123.2       1.0X
-Output Single Double Column                        1762           1769          10          8.9         112.0       1.1X
-Output Int and String Column                       4920           4932          17          3.2         312.8       0.4X
-Output Partitions                                  3385           3389           7          4.6         215.2       0.6X
-Output Buckets                                     4528           4538          14          3.5         287.9       0.4X
+Output Single Int Column                           1917           1930          19          8.2         121.9       1.0X
+Output Single Double Column                        1739           1765          35          9.0         110.6       1.1X
+Output Int and String Column                       5231           5240          13          3.0         332.6       0.4X
+Output Partitions                                  3531           3537           9          4.5         224.5       0.5X
+Output Buckets                                     4815           4816           1          3.3         306.1       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1137           1142           7         13.8          72.3       1.0X
-Output Single Double Column                        1700           1705           6          9.3         108.1       0.7X
-Output Int and String Column                       4028           4096          97          3.9         256.1       0.3X
-Output Partitions                                  2562           2582          28          6.1         162.9       0.4X
-Output Buckets                                     3524           3530           9          4.5         224.1       0.3X
+Output Single Int Column                           1145           1150           8         13.7          72.8       1.0X
+Output Single Double Column                        1775           1788          18          8.9         112.8       0.6X
+Output Int and String Column                       4092           4104          17          3.8         260.2       0.3X
+Output Partitions                                  2516           2532          22          6.3         160.0       0.5X
+Output Buckets                                     3555           3574          26          4.4         226.0       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1618           1645          37          9.7         102.9       1.0X
-Output Single Double Column                        2398           2399           1          6.6         152.5       0.7X
-Output Int and String Column                       3766           3778          17          4.2         239.5       0.4X
-Output Partitions                                  3162           3164           3          5.0         201.0       0.5X
-Output Buckets                                     4015           4028          18          3.9         255.3       0.4X
+Output Single Int Column                           1710           1721          15          9.2         108.7       1.0X
+Output Single Double Column                        2405           2421          22          6.5         152.9       0.7X
+Output Int and String Column                       4262           4274          18          3.7         271.0       0.4X
+Output Partitions                                  3190           3211          30          4.9         202.8       0.5X
+Output Buckets                                     4134           4160          36          3.8         262.8       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3985           3993          11          3.9         253.4       1.0X
-Output Single Double Column                        4148           4210          88          3.8         263.7       1.0X
-Output Int and String Column                       6728           6741          18          2.3         427.8       0.6X
-Output Partitions                                  5431           5447          23          2.9         345.3       0.7X
-Output Buckets                                     6927           6942          22          2.3         440.4       0.6X
+Output Single Int Column                           3871           3895          34          4.1         246.1       1.0X
+Output Single Double Column                        4379           4382           4          3.6         278.4       0.9X
+Output Int and String Column                       6820           6835          21          2.3         433.6       0.6X
+Output Partitions                                  5555           5573          26          2.8         353.2       0.7X
+Output Buckets                                     6679           6696          24          2.4         424.6       0.6X
 
 
diff --git a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
index e43b3b53dfb25..56c83e0940856 100644
--- a/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
+++ b/sql/core/benchmarks/BuiltInDataSourceWriteBenchmark-results.txt
@@ -2,69 +2,69 @@
 Parquet writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_1_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1813           1881          96          8.7         115.3       1.0X
-Output Single Double Column                        1976           1977           1          8.0         125.6       0.9X
-Output Int and String Column                       4403           4438          50          3.6         279.9       0.4X
-Output Partitions                                  3388           3421          46          4.6         215.4       0.5X
-Output Buckets                                     4670           4680          15          3.4         296.9       0.4X
+Output Single Int Column                           1736           1765          40          9.1         110.4       1.0X
+Output Single Double Column                        1840           1879          56          8.6         117.0       0.9X
+Output Int and String Column                       4395           4435          57          3.6         279.4       0.4X
+Output Partitions                                  3279           3373         132          4.8         208.5       0.5X
+Output Buckets                                     4598           4602           6          3.4         292.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet(PARQUET_2_0) writer benchmark:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1903           1926          33          8.3         121.0       1.0X
-Output Single Double Column                        1998           1998           0          7.9         127.0       1.0X
-Output Int and String Column                       4916           4936          29          3.2         312.6       0.4X
-Output Partitions                                  3366           3375          13          4.7         214.0       0.6X
-Output Buckets                                     4560           4583          33          3.4         289.9       0.4X
+Output Single Int Column                           1790           1801          15          8.8         113.8       1.0X
+Output Single Double Column                        1857           1868          17          8.5         118.0       1.0X
+Output Int and String Column                       4717           4735          26          3.3         299.9       0.4X
+Output Partitions                                  3187           3212          35          4.9         202.6       0.6X
+Output Buckets                                     4353           4358           6          3.6         276.8       0.4X
 
 
 ================================================================================================
 ORC writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 ORC writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1034           1039           7         15.2          65.8       1.0X
-Output Single Double Column                        1687           1691           7          9.3         107.2       0.6X
-Output Int and String Column                       3941           3955          20          4.0         250.6       0.3X
-Output Partitions                                  2553           2674         172          6.2         162.3       0.4X
-Output Buckets                                     3544           3548           6          4.4         225.3       0.3X
+Output Single Int Column                            941            953          20         16.7          59.8       1.0X
+Output Single Double Column                        1563           1569           8         10.1          99.4       0.6X
+Output Int and String Column                       3838           3868          43          4.1         244.0       0.2X
+Output Partitions                                  2514           2542          40          6.3         159.8       0.4X
+Output Buckets                                     3554           3555           2          4.4         225.9       0.3X
 
 
 ================================================================================================
 JSON writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON writer benchmark:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           1669           1686          24          9.4         106.1       1.0X
-Output Single Double Column                        2342           2369          37          6.7         148.9       0.7X
-Output Int and String Column                       3776           3805          42          4.2         240.0       0.4X
-Output Partitions                                  3060           3064           7          5.1         194.5       0.5X
-Output Buckets                                     4009           4052          60          3.9         254.9       0.4X
+Output Single Int Column                           1606           1613          10          9.8         102.1       1.0X
+Output Single Double Column                        2245           2257          17          7.0         142.7       0.7X
+Output Int and String Column                       3818           3837          26          4.1         242.7       0.4X
+Output Partitions                                  3154           3181          38          5.0         200.5       0.5X
+Output Buckets                                     4123           4132          12          3.8         262.1       0.4X
 
 
 ================================================================================================
 CSV writer benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 CSV writer benchmark:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Output Single Int Column                           3877           3889          18          4.1         246.5       1.0X
-Output Single Double Column                        4079           4086          10          3.9         259.3       1.0X
-Output Int and String Column                       6266           6269           4          2.5         398.4       0.6X
-Output Partitions                                  5432           5438           8          2.9         345.4       0.7X
-Output Buckets                                     6528           6530           4          2.4         415.0       0.6X
+Output Single Int Column                           3260           3286          36          4.8         207.3       1.0X
+Output Single Double Column                        4065           4076          15          3.9         258.4       0.8X
+Output Int and String Column                       6295           6310          21          2.5         400.2       0.5X
+Output Partitions                                  5151           5177          37          3.1         327.5       0.6X
+Output Buckets                                     6173           6209          51          2.5         392.5       0.5X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
index 7fe68e003db73..c650aa1efbb32 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-jdk21-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            254            258           2        257.7           3.9       1.0X
-8-16 byte                                           386            408          28        170.0           5.9       0.7X
-16-32 byte                                          384            386           1        170.5           5.9       0.7X
-512-1024 byte                                       518            521           3        126.5           7.9       0.5X
-512 byte slow                                      1530           1555          22         42.8          23.4       0.2X
-2-7 byte                                            313            314           1        209.2           4.8       0.8X
+2-7 byte                                            254            257           1        257.8           3.9       1.0X
+8-16 byte                                           409            437          37        160.3           6.2       0.6X
+16-32 byte                                          415            416           1        158.0           6.3       0.6X
+512-1024 byte                                       540            542           1        121.3           8.2       0.5X
+512 byte slow                                      1524           1553          23         43.0          23.3       0.2X
+2-7 byte                                            313            314           1        209.5           4.8       0.8X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   517            518           1        309.7           3.2       1.0X
+Byte Array equals                                   538            541           8        297.6           3.4       1.0X
 
 
diff --git a/sql/core/benchmarks/ByteArrayBenchmark-results.txt b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
index 028b1ea55b5d6..723af23b06a3f 100644
--- a/sql/core/benchmarks/ByteArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ByteArrayBenchmark-results.txt
@@ -2,26 +2,26 @@
 byte array comparisons
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array compareTo:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-2-7 byte                                            259            260           1        253.4           3.9       1.0X
-8-16 byte                                           411            445          24        159.4           6.3       0.6X
-16-32 byte                                          458            461           5        143.0           7.0       0.6X
-512-1024 byte                                       587            591           3        111.7           9.0       0.4X
-512 byte slow                                      1496           1507           9         43.8          22.8       0.2X
-2-7 byte                                            276            277           1        237.4           4.2       0.9X
+2-7 byte                                            258            259           1        254.2           3.9       1.0X
+8-16 byte                                           392            402          11        167.4           6.0       0.7X
+16-32 byte                                          396            398           1        165.4           6.0       0.7X
+512-1024 byte                                       519            523           2        126.4           7.9       0.5X
+512 byte slow                                      3255           3273          13         20.1          49.7       0.1X
+2-7 byte                                            249            250           1        263.0           3.8       1.0X
 
 
 ================================================================================================
 byte array equals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Byte Array equals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Byte Array equals                                   523            524           4        306.2           3.3       1.0X
+Byte Array equals                                   555            556           1        288.3           3.5       1.0X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
index 50a13bd51ebe4..cc0b3cdaffd11 100644
--- a/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-jdk21-results.txt
@@ -2,76 +2,76 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 23962          24182         316          0.0      479231.3       1.0X
+One quoted string                                 25656          25710          55          0.0      513115.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               56724          57115         570          0.0       56724.1       1.0X
-Select 100 columns                                20740          20855         115          0.0       20739.7       2.7X
-Select one column                                 17304          17377         114          0.1       17304.3       3.3X
-count()                                            3719           3740          21          0.3        3719.0      15.3X
-Select 100 columns, one bad input field           24943          24999          69          0.0       24943.2       2.3X
-Select 100 columns, corrupt record field          28306          28341          31          0.0       28306.2       2.0X
+Select 1000 columns                               59317          59851         631          0.0       59316.9       1.0X
+Select 100 columns                                22419          22524         133          0.0       22419.0       2.6X
+Select one column                                 18736          18821          95          0.1       18736.0       3.2X
+count()                                            4289           4377          88          0.2        4289.5      13.8X
+Select 100 columns, one bad input field           27081          27108          26          0.0       27080.9       2.2X
+Select 100 columns, corrupt record field          30668          30949         319          0.0       30668.3       1.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                       10977          10982           5          0.9        1097.7       1.0X
-Select 1 column + count()                          7406           7554         131          1.4         740.6       1.5X
-count()                                            1550           1558           9          6.5         155.0       7.1X
+Select 10 columns + count()                       10795          10819          21          0.9        1079.5       1.0X
+Select 1 column + count()                          7409           7416           8          1.3         740.9       1.5X
+count()                                            1712           1714           1          5.8         171.2       6.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      845            847           3         11.8          84.5       1.0X
-to_csv(timestamp)                                  5546           5597          57          1.8         554.6       0.2X
-write timestamps to files                          5760           5768           8          1.7         576.0       0.1X
-Create a dataset of dates                          1053           1064           9          9.5         105.3       0.8X
-to_csv(date)                                       4115           4122           9          2.4         411.5       0.2X
-write dates to files                               4102           4108           5          2.4         410.2       0.2X
+Create a dataset of timestamps                      859            861           2         11.6          85.9       1.0X
+to_csv(timestamp)                                  6073           6115          62          1.6         607.3       0.1X
+write timestamps to files                          6478           6487           7          1.5         647.8       0.1X
+Create a dataset of dates                           974            981          11         10.3          97.4       0.9X
+to_csv(date)                                       4516           4523           9          2.2         451.6       0.2X
+write dates to files                               4714           4723           9          2.1         471.4       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1107           1119          16          9.0         110.7       1.0X
-read timestamps from files                                                      9511           9553          49          1.1         951.1       0.1X
-infer timestamps from files                                                    19084          19114          27          0.5        1908.4       0.1X
-read date text from files                                                       1036           1046          14          9.7         103.6       1.1X
-read date from files                                                            8299           8309          15          1.2         829.9       0.1X
-infer date from files                                                          17290          17294           4          0.6        1729.0       0.1X
-timestamp strings                                                               1188           1197           7          8.4         118.8       0.9X
-parse timestamps from Dataset[String]                                          11442          11458          14          0.9        1144.2       0.1X
-infer timestamps from Dataset[String]                                          21076          21116          39          0.5        2107.6       0.1X
-date strings                                                                    1651           1659          10          6.1         165.1       0.7X
-parse dates from Dataset[String]                                               10181          10186           5          1.0        1018.1       0.1X
-from_csv(timestamp)                                                            10023          10062          34          1.0        1002.3       0.1X
-from_csv(date)                                                                  9335           9351          15          1.1         933.5       0.1X
-infer error timestamps from Dataset[String] with default format                11187          11205          16          0.9        1118.7       0.1X
-infer error timestamps from Dataset[String] with user-provided format          11201          11216          13          0.9        1120.1       0.1X
-infer error timestamps from Dataset[String] with legacy format                 11210          11227          17          0.9        1121.0       0.1X
+read timestamp text from files                                                  1167           1177          11          8.6         116.7       1.0X
+read timestamps from files                                                      9490           9517          29          1.1         949.0       0.1X
+infer timestamps from files                                                    19176          19254         112          0.5        1917.6       0.1X
+read date text from files                                                       1133           1149          23          8.8         113.3       1.0X
+read date from files                                                            8327           8344          30          1.2         832.7       0.1X
+infer date from files                                                          17583          17672          77          0.6        1758.3       0.1X
+timestamp strings                                                               1310           1318           7          7.6         131.0       0.9X
+parse timestamps from Dataset[String]                                          11767          11853          85          0.8        1176.7       0.1X
+infer timestamps from Dataset[String]                                          21178          21486         268          0.5        2117.8       0.1X
+date strings                                                                    1602           1610           8          6.2         160.2       0.7X
+parse dates from Dataset[String]                                               10041          10114         112          1.0        1004.1       0.1X
+from_csv(timestamp)                                                            10377          10493         115          1.0        1037.7       0.1X
+from_csv(date)                                                                  9618           9622           3          1.0         961.8       0.1X
+infer error timestamps from Dataset[String] with default format                11925          11968          40          0.8        1192.5       0.1X
+infer error timestamps from Dataset[String] with user-provided format          11724          11807          72          0.9        1172.4       0.1X
+infer error timestamps from Dataset[String] with legacy format                 11781          11879          86          0.8        1178.1       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4365           4377          13          0.0       43653.8       1.0X
-pushdown disabled                                  4348           4370          22          0.0       43477.7       1.0X
-w/ filters                                          695            713          29          0.1        6950.2       6.3X
+w/o filters                                        4681           4704          32          0.0       46811.8       1.0X
+pushdown disabled                                  4660           4679          28          0.0       46601.3       1.0X
+w/ filters                                          762            778          16          0.1        7623.6       6.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Interval:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Read as Intervals                                  7089           7096           7          0.4        2362.1       1.0X
-Read Raw Strings                                   2071           2075           6          1.4         690.1       3.4X
+Read as Intervals                                   781            785           7          0.4        2602.2       1.0X
+Read Raw Strings                                    291            294           3          1.0         969.3       2.7X
 
 
diff --git a/sql/core/benchmarks/CSVBenchmark-results.txt b/sql/core/benchmarks/CSVBenchmark-results.txt
index c1e4f53e1cf4a..5626bbfb08fbd 100644
--- a/sql/core/benchmarks/CSVBenchmark-results.txt
+++ b/sql/core/benchmarks/CSVBenchmark-results.txt
@@ -2,76 +2,76 @@
 Benchmark to measure CSV read/write performance
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parsing quoted values:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-One quoted string                                 25841          26207         446          0.0      516822.6       1.0X
+One quoted string                                 25766          25929         155          0.0      515313.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Wide rows with 1000 columns:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 1000 columns                               57462          57944         502          0.0       57462.5       1.0X
-Select 100 columns                                23373          23449          71          0.0       23372.8       2.5X
-Select one column                                 20009          20041          40          0.0       20008.9       2.9X
-count()                                            4326           4402          66          0.2        4325.9      13.3X
-Select 100 columns, one bad input field           28362          28472         104          0.0       28361.6       2.0X
-Select 100 columns, corrupt record field          31926          32002         113          0.0       31925.7       1.8X
+Select 1000 columns                               51465          51922         628          0.0       51465.3       1.0X
+Select 100 columns                                21796          21940         152          0.0       21796.0       2.4X
+Select one column                                 18651          18703          52          0.1       18651.2       2.8X
+count()                                            3342           3448         103          0.3        3341.9      15.4X
+Select 100 columns, one bad input field           27416          27481          60          0.0       27416.2       1.9X
+Select 100 columns, corrupt record field          30540          30699         138          0.0       30539.8       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Count a dataset with 10 columns:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns + count()                        9591           9615          23          1.0         959.1       1.0X
-Select 1 column + count()                          6827           6845          21          1.5         682.7       1.4X
-count()                                            1754           1759           5          5.7         175.4       5.5X
+Select 10 columns + count()                        9495           9525          26          1.1         949.5       1.0X
+Select 1 column + count()                          6922           6961          52          1.4         692.2       1.4X
+count()                                            1742           1752           9          5.7         174.2       5.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      932            938           9         10.7          93.2       1.0X
-to_csv(timestamp)                                  7214           7275          83          1.4         721.4       0.1X
-write timestamps to files                          7593           7604          10          1.3         759.3       0.1X
-Create a dataset of dates                          1036           1042           7          9.6         103.6       0.9X
-to_csv(date)                                       5121           5139          15          2.0         512.1       0.2X
-write dates to files                               5203           5215          16          1.9         520.3       0.2X
+Create a dataset of timestamps                      912            958          65         11.0          91.2       1.0X
+to_csv(timestamp)                                  7089           7112          31          1.4         708.9       0.1X
+write timestamps to files                          7242           7267          22          1.4         724.2       0.1X
+Create a dataset of dates                          1157           1185          38          8.6         115.7       0.8X
+to_csv(date)                                       5034           5080          65          2.0         503.4       0.2X
+write dates to files                               5089           5107          29          2.0         508.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                  1258           1260           3          7.9         125.8       1.0X
-read timestamps from files                                                     10113          10133          17          1.0        1011.3       0.1X
-infer timestamps from files                                                    19797          19926         134          0.5        1979.7       0.1X
-read date text from files                                                       1131           1133           3          8.8         113.1       1.1X
-read date from files                                                           10275          10283           8          1.0        1027.5       0.1X
-infer date from files                                                          20823          20856          29          0.5        2082.3       0.1X
-timestamp strings                                                               1330           1335           7          7.5         133.0       0.9X
-parse timestamps from Dataset[String]                                          11297          11345          43          0.9        1129.7       0.1X
-infer timestamps from Dataset[String]                                          20968          20999          41          0.5        2096.8       0.1X
-date strings                                                                    1786           1793           7          5.6         178.6       0.7X
-parse dates from Dataset[String]                                               11565          11595          34          0.9        1156.5       0.1X
-from_csv(timestamp)                                                             9920           9948          42          1.0         992.0       0.1X
-from_csv(date)                                                                 10506          10512           5          1.0        1050.6       0.1X
-infer error timestamps from Dataset[String] with default format                12344          12370          23          0.8        1234.4       0.1X
-infer error timestamps from Dataset[String] with user-provided format          12351          12367          17          0.8        1235.1       0.1X
-infer error timestamps from Dataset[String] with legacy format                 12345          12366          18          0.8        1234.5       0.1X
+read timestamp text from files                                                  1228           1233           4          8.1         122.8       1.0X
+read timestamps from files                                                     10598          10626          30          0.9        1059.8       0.1X
+infer timestamps from files                                                    21159          21181          19          0.5        2115.9       0.1X
+read date text from files                                                       1148           1151           3          8.7         114.8       1.1X
+read date from files                                                           10147          10180          35          1.0        1014.7       0.1X
+infer date from files                                                          21078          21110          47          0.5        2107.8       0.1X
+timestamp strings                                                               1354           1366          21          7.4         135.4       0.9X
+parse timestamps from Dataset[String]                                          12127          12153          23          0.8        1212.7       0.1X
+infer timestamps from Dataset[String]                                          22539          22566          27          0.4        2253.9       0.1X
+date strings                                                                    1857           1862           5          5.4         185.7       0.7X
+parse dates from Dataset[String]                                               11906          11931          30          0.8        1190.6       0.1X
+from_csv(timestamp)                                                            10716          10744          37          0.9        1071.6       0.1X
+from_csv(date)                                                                 11123          11140          15          0.9        1112.3       0.1X
+infer error timestamps from Dataset[String] with default format                12274          12281           9          0.8        1227.4       0.1X
+infer error timestamps from Dataset[String] with user-provided format          12281          12304          26          0.8        1228.1       0.1X
+infer error timestamps from Dataset[String] with legacy format                 12300          12307           9          0.8        1230.0       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        4103           4108           5          0.0       41030.6       1.0X
-pushdown disabled                                  4129           4132           4          0.0       41291.7       1.0X
-w/ filters                                          769            775           6          0.1        7689.8       5.3X
+w/o filters                                        4058           4061           2          0.0       40583.1       1.0X
+pushdown disabled                                  4092           4099          10          0.0       40924.1       1.0X
+w/ filters                                          699            705           8          0.1        6990.7       5.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Interval:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Read as Intervals                                  6922           6964          44          0.4        2306.6       1.0X
-Read Raw Strings                                   2360           2391          27          1.3         786.5       2.9X
+Read as Intervals                                   737            742           9          0.4        2456.8       1.0X
+Read Raw Strings                                    294            300          10          1.0         979.5       2.5X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
index 25b685baf20d1..47781a2cc6e1f 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-jdk21-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         6814           6942         128          5.9         170.4       1.0X
-write char with length 5                           9886           9963          67          4.0         247.1       0.7X
-write varchar with length 5                        7603           7633          27          5.3         190.1       0.9X
+write string with length 5                         6905           7223         322          5.8         172.6       1.0X
+write char with length 5                          10769          10842          66          3.7         269.2       0.6X
+write varchar with length 5                        7615           7654          35          5.3         190.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3542           3558          21          5.6         177.1       1.0X
-write char with length 10                          6351           6465         100          3.1         317.5       0.6X
-write varchar with length 10                       3772           3776           4          5.3         188.6       0.9X
+write string with length 10                        3624           3637          21          5.5         181.2       1.0X
+write char with length 10                          6455           6488          33          3.1         322.7       0.6X
+write varchar with length 10                       3802           3861          79          5.3         190.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1810           1841          42          5.5         181.0       1.0X
-write char with length 20                          4390           4411          18          2.3         439.0       0.4X
-write varchar with length 20                       2008           2023          19          5.0         200.8       0.9X
+write string with length 20                        1770           1784          17          5.6         177.0       1.0X
+write char with length 20                          4741           4751          13          2.1         474.1       0.4X
+write varchar with length 20                       1921           1926           6          5.2         192.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1061           1068           6          4.7         212.3       1.0X
-write char with length 40                          3566           3568           2          1.4         713.3       0.3X
-write varchar with length 40                       1042           1052           9          4.8         208.5       1.0X
+write string with length 40                         945            955          13          5.3         189.0       1.0X
+write char with length 40                          3669           3697          26          1.4         733.7       0.3X
+write varchar with length 40                       1024           1029           5          4.9         204.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         683            689           5          4.9         204.9       1.0X
-write char with length 60                          3179           3188           7          1.0         953.7       0.2X
-write varchar with length 60                        741            753          11          4.5         222.4       0.9X
+write string with length 60                         648            671          29          5.1         194.5       1.0X
+write char with length 60                          3258           3278          17          1.0         977.5       0.2X
+write varchar with length 60                        726            738          12          4.6         217.8       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         577            590          20          4.3         230.9       1.0X
-write char with length 80                          3064           3071          11          0.8        1225.5       0.2X
-write varchar with length 80                        554            560           7          4.5         221.6       1.0X
+write string with length 80                         522            526           6          4.8         208.7       1.0X
+write char with length 80                          3151           3173          31          0.8        1260.3       0.2X
+write varchar with length 80                        555            564           8          4.5         222.2       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        472            491          16          4.2         235.9       1.0X
-write char with length 100                         2972           2975           5          0.7        1485.8       0.2X
-write varchar with length 100                       479            485           5          4.2         239.6       1.0X
+write string with length 100                        423            450          26          4.7         211.3       1.0X
+write char with length 100                         3057           3067          14          0.7        1528.3       0.1X
+write varchar with length 100                       472            478           6          4.2         235.9       0.9X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                        10481          10507          33          3.8         262.0       1.0X
-write char with length 5                          11773          11799          33          3.4         294.3       0.9X
-write varchar with length 5                       11851          11879          28          3.4         296.3       0.9X
+write string with length 5                        10664          10748         114          3.8         266.6       1.0X
+write char with length 5                          13099          13173          91          3.1         327.5       0.8X
+write varchar with length 5                       12595          12606          10          3.2         314.9       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        5211           5215           4          3.8         260.6       1.0X
-write char with length 10                          7437           7455          16          2.7         371.8       0.7X
-write varchar with length 10                       7284           7301          22          2.7         364.2       0.7X
+write string with length 10                        5412           5423          14          3.7         270.6       1.0X
+write char with length 10                          8402           8405           5          2.4         420.1       0.6X
+write varchar with length 10                       8000           8031          31          2.5         400.0       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3315           3339          23          3.0         331.5       1.0X
-write char with length 20                          5353           5358           8          1.9         535.3       0.6X
-write varchar with length 20                       5318           5322           4          1.9         531.8       0.6X
+write string with length 20                        3326           3331           5          3.0         332.6       1.0X
+write char with length 20                          5576           5586           9          1.8         557.6       0.6X
+write varchar with length 20                       5699           5708           9          1.8         569.9       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        2229           2231           2          2.2         445.8       1.0X
-write char with length 40                          4283           4287           3          1.2         856.6       0.5X
-write varchar with length 40                       4269           4270           1          1.2         853.8       0.5X
+write string with length 40                        2210           2214           4          2.3         441.9       1.0X
+write char with length 40                          4306           4306           0          1.2         861.2       0.5X
+write varchar with length 40                       4509           4524          16          1.1         901.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1839           1845           5          1.8         551.8       1.0X
-write char with length 60                          3958           3961           4          0.8        1187.3       0.5X
-write varchar with length 60                       3895           3900           4          0.9        1168.5       0.5X
+write string with length 60                        1894           1901           6          1.8         568.2       1.0X
+write char with length 60                          4088           4093           5          0.8        1226.4       0.5X
+write varchar with length 60                       3982           3987           5          0.8        1194.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1646           1650           4          1.5         658.4       1.0X
-write char with length 80                          3789           3790           2          0.7        1515.4       0.4X
-write varchar with length 80                       3704           3705           1          0.7        1481.5       0.4X
+write string with length 80                        1785           1793           6          1.4         714.2       1.0X
+write char with length 80                          3937           3952          14          0.6        1574.6       0.5X
+write varchar with length 80                       3942           3959          24          0.6        1576.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1543           1547           4          1.3         771.6       1.0X
-write char with length 100                         3663           3676          21          0.5        1831.6       0.4X
-write varchar with length 100                      3611           3612           2          0.6        1805.3       0.4X
+write string with length 100                       1629           1640          10          1.2         814.6       1.0X
+write char with length 100                         3686           3693          12          0.5        1842.9       0.4X
+write varchar with length 100                      3905           3921          15          0.5        1952.6       0.4X
 
 
diff --git a/sql/core/benchmarks/CharVarcharBenchmark-results.txt b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
index e3d4e34db1489..03a64c6904e9c 100644
--- a/sql/core/benchmarks/CharVarcharBenchmark-results.txt
+++ b/sql/core/benchmarks/CharVarcharBenchmark-results.txt
@@ -2,121 +2,121 @@
 Char Varchar Write Side Perf w/o Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         6296           6549         352          6.4         157.4       1.0X
-write char with length 5                           9227           9375         209          4.3         230.7       0.7X
-write varchar with length 5                        6706           6722          15          6.0         167.7       0.9X
+write string with length 5                         6760           7092         292          5.9         169.0       1.0X
+write char with length 5                           9848           9929          87          4.1         246.2       0.7X
+write varchar with length 5                        7633           7676          37          5.2         190.8       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        3218           3245          23          6.2         160.9       1.0X
-write char with length 10                          6113           6165          55          3.3         305.6       0.5X
-write varchar with length 10                       3604           3621          19          5.5         180.2       0.9X
+write string with length 10                        3611           3629          27          5.5         180.5       1.0X
+write char with length 10                          6130           6165          50          3.3         306.5       0.6X
+write varchar with length 10                       3742           3772          26          5.3         187.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        1700           1717          25          5.9         170.0       1.0X
-write char with length 20                          4275           4283           9          2.3         427.5       0.4X
-write varchar with length 20                       1737           1743           7          5.8         173.7       1.0X
+write string with length 20                        1775           1786          11          5.6         177.5       1.0X
+write char with length 20                          4560           4562           2          2.2         456.0       0.4X
+write varchar with length 20                       1923           1933           8          5.2         192.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                         915            916           1          5.5         183.0       1.0X
-write char with length 40                          3430           3456          22          1.5         686.0       0.3X
-write varchar with length 40                        958            969          17          5.2         191.6       1.0X
+write string with length 40                         935            958          30          5.3         187.1       1.0X
+write char with length 40                          3475           3480           4          1.4         695.0       0.3X
+write varchar with length 40                       1019           1038          18          4.9         203.9       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                         614            629          15          5.4         184.1       1.0X
-write char with length 60                          3176           3189          12          1.0         952.8       0.2X
-write varchar with length 60                        643            648           6          5.2         192.8       1.0X
+write string with length 60                         663            666           4          5.0         199.0       1.0X
+write char with length 60                          3240           3248          10          1.0         972.1       0.2X
+write varchar with length 60                        711            715           5          4.7         213.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                         500            503           2          5.0         200.2       1.0X
-write char with length 80                          3003           3004           2          0.8        1201.1       0.2X
-write varchar with length 80                        507            517          11          4.9         202.6       1.0X
+write string with length 80                         533            544          10          4.7         213.2       1.0X
+write char with length 80                          3024           3028           5          0.8        1209.6       0.2X
+write varchar with length 80                        560            561           1          4.5         223.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                        388            404          14          5.2         193.9       1.0X
-write char with length 100                         2927           2932           6          0.7        1463.6       0.1X
-write varchar with length 100                       422            431          10          4.7         211.1       0.9X
+write string with length 100                        460            464           4          4.3         230.0       1.0X
+write char with length 100                         2973           2975           1          0.7        1486.7       0.2X
+write varchar with length 100                       483            486           3          4.1         241.4       1.0X
 
 
 ================================================================================================
 Char Varchar Write Side Perf w/ Tailing Spaces
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 5:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 5                         8732           8757          29          4.6         218.3       1.0X
-write char with length 5                          10464          10517          46          3.8         261.6       0.8X
-write varchar with length 5                       10783          10834          50          3.7         269.6       0.8X
+write string with length 5                         8798           8827          25          4.5         219.9       1.0X
+write char with length 5                          11984          11999          19          3.3         299.6       0.7X
+write varchar with length 5                       12379          12401          20          3.2         309.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 10:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 10                        4713           4733          21          4.2         235.6       1.0X
-write char with length 10                          6723           6746          37          3.0         336.2       0.7X
-write varchar with length 10                       6682           6694          11          3.0         334.1       0.7X
+write string with length 10                        5290           5307          19          3.8         264.5       1.0X
+write char with length 10                          7536           7538           2          2.7         376.8       0.7X
+write varchar with length 10                       7489           7519          39          2.7         374.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 20:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 20                        3067           3081          12          3.3         306.7       1.0X
-write char with length 20                          4638           4654          17          2.2         463.8       0.7X
-write varchar with length 20                       4698           4705           7          2.1         469.8       0.7X
+write string with length 20                        3213           3218           6          3.1         321.3       1.0X
+write char with length 20                          5570           5578           7          1.8         557.0       0.6X
+write varchar with length 20                       5245           5261          15          1.9         524.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 40:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 40                        1967           1971           4          2.5         393.4       1.0X
-write char with length 40                          3615           3628          20          1.4         723.0       0.5X
-write varchar with length 40                       3603           3609           5          1.4         720.7       0.5X
+write string with length 40                        2121           2125           7          2.4         424.2       1.0X
+write char with length 40                          4399           4419          17          1.1         879.8       0.5X
+write varchar with length 40                       4118           4124           5          1.2         823.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 60:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 60                        1651           1658           8          2.0         495.3       1.0X
-write char with length 60                          3360           3370          10          1.0        1007.9       0.5X
-write varchar with length 60                       3305           3307           2          1.0         991.6       0.5X
+write string with length 60                        1884           1891           6          1.8         565.1       1.0X
+write char with length 60                          3939           3941           4          0.8        1181.6       0.5X
+write varchar with length 60                       3584           3591           5          0.9        1075.3       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 80:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 80                        1470           1475           5          1.7         587.9       1.0X
-write char with length 80                          3158           3168          10          0.8        1263.1       0.5X
-write varchar with length 80                       3091           3101          15          0.8        1236.4       0.5X
+write string with length 80                        1838           1842           3          1.4         735.4       1.0X
+write char with length 80                          3823           3835          13          0.7        1529.1       0.5X
+write varchar with length 80                       3454           3456           2          0.7        1381.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write with length 100:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-write string with length 100                       1348           1358          10          1.5         673.8       1.0X
-write char with length 100                         3034           3040           7          0.7        1517.0       0.4X
-write varchar with length 100                      3029           3033           5          0.7        1514.3       0.4X
+write string with length 100                       1635           1643          10          1.2         817.3       1.0X
+write char with length 100                         3697           3704           6          0.5        1848.3       0.4X
+write varchar with length 100                      3355           3375          18          0.6        1677.3       0.5X
 
 
diff --git a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
index 56b1523344a72..88db9ebfa1e34 100644
--- a/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-jdk21-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                          1352           1352           1          0.1       13516.4       1.0X
-UTF8_LCASE                                           4678           4693          21          0.0       46778.6       0.3X
-UNICODE                                             17213          17223          13          0.0      172131.7       0.1X
-UNICODE_CI                                          17101          17133          46          0.0      171009.6       0.1X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                          1193           1194           1          0.1       11929.0       1.0X
+UTF8_LCASE                                           2717           2721           6          0.0       27168.5       2.3X
+UNICODE                                             17991          17993           2          0.0      179913.6      15.1X
+UNICODE_CI                                          17837          17842           7          0.0      178369.9      15.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           1775           1775           0          0.1       17749.8       1.0X
-UTF8_LCASE                                            5342           5367          35          0.0       53423.0       0.3X
-UNICODE                                              17011          17020          12          0.0      170110.1       0.1X
-UNICODE_CI                                           16734          16760          37          0.0      167338.2       0.1X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                           1523           1523           0          0.1       15233.9       1.0X
+UTF8_LCASE                                            2441           2441           0          0.0       24407.9       1.6X
+UNICODE                                              17875          17884          13          0.0      178749.6      11.7X
+UNICODE_CI                                           17701          17703           2          0.0      177013.8      11.6X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        7253           7256           4          0.0       72529.6       1.0X
-UTF8_LCASE                                        16634          16676          59          0.0      166342.5       0.4X
-UNICODE                                           66146          66163          23          0.0      661461.1       0.1X
-UNICODE_CI                                        54563          54606          62          0.0      545625.5       0.1X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2660           2666           9          0.0       26601.1       1.0X
+UTF8_LCASE                                         5013           5016           3          0.0       50134.0       1.9X
+UNICODE                                           75622          75623           1          0.0      756217.3      28.4X
+UNICODE_CI                                        63036          63042           9          0.0      630360.9      23.7X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        6570           6577          10          0.0       65696.6       1.0X
-UTF8_LCASE                                       120073         120137          91          0.0     1200726.4       0.1X
-UNICODE                                          364027         364291         374          0.0     3640267.9       0.0X
-UNICODE_CI                                       421444         422138         981          0.0     4214438.7       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2121           2122           0          0.0       21214.2       1.0X
+UTF8_LCASE                                        27635          27636           1          0.0      276347.7      13.0X
+UNICODE                                          523746         524012         376          0.0     5237460.5     246.9X
+UNICODE_CI                                       520134         520227         131          0.0     5201343.3     245.2X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        6573           6596          32          0.0       65733.4       1.0X
-UTF8_LCASE                                        60284          60293          12          0.0      602844.4       0.1X
-UNICODE                                          363685         364220         757          0.0     3636848.4       0.0X
-UNICODE_CI                                       422761         423000         337          0.0     4227611.0       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2767           2769           4          0.0       27666.3       1.0X
+UTF8_LCASE                                        26861          26861           1          0.0      268606.4       9.7X
+UNICODE                                          518540         518815         389          0.0     5185401.3     187.4X
+UNICODE_CI                                       521156         521261         148          0.0     5211559.5     188.4X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        6507           6508           1          0.0       65068.3       1.0X
-UTF8_LCASE                                        59098          59118          28          0.0      590983.6       0.1X
-UNICODE                                          378437         378790         499          0.0     3784367.4       0.0X
-UNICODE_CI                                       433987         434294         435          0.0     4339869.2       0.0X
+UTF8_BINARY                                        2919           2921           3          0.0       29190.2       1.0X
+UTF8_LCASE                                        26862          26862           1          0.0      268618.0       9.2X
+UNICODE                                          504534         504927         556          0.0     5045340.3     172.8X
+UNICODE_CI                                       506542         506565          32          0.0     5065423.0     173.5X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE                                                           419            425           5          0.2        4189.2       1.0X
+UNICODE_CI                                                        416            426           6          0.2        4163.2       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             575            576           0          0.2        5754.0       1.0X
+UTF8_LCASE                                                              575            576           1          0.2        5747.8       1.0X
+UNICODE                                                                 576            576           0          0.2        5761.5       1.0X
+UNICODE_CI                                                              576            578           2          0.2        5758.0       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                          159            159           1          0.6        1587.6       1.0X
+UTF8_LCASE                                                           159            159           0          0.6        1586.6       1.0X
+UNICODE                                                              158            159           1          0.6        1584.9       1.0X
+UNICODE_CI                                                           159            160           1          0.6        1586.1       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             397            405           5          0.3        3974.4       1.0X
+UTF8_LCASE                                                              401            405           5          0.2        4009.5       1.0X
+UNICODE                                                                 395            399           3          0.3        3953.9       1.0X
+UNICODE_CI                                                              395            400           3          0.3        3952.0       1.0X
 
diff --git a/sql/core/benchmarks/CollationBenchmark-results.txt b/sql/core/benchmarks/CollationBenchmark-results.txt
index 09847bbcaa260..8402a2db6d869 100644
--- a/sql/core/benchmarks/CollationBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationBenchmark-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                          1372           1374           3          0.1       13718.1       1.0X
-UTF8_LCASE                                           6311           6311           0          0.0       63106.7       0.2X
-UNICODE                                             19273          19300          37          0.0      192731.3       0.1X
-UNICODE_CI                                          18991          18998          10          0.0      189906.3       0.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                          1223           1224           1          0.1       12231.5       1.0X
+UTF8_LCASE                                           3280           3281           1          0.0       32803.3       2.7X
+UNICODE                                             17207          17207           0          0.0      172065.7      14.1X
+UNICODE_CI                                          16560          16565           7          0.0      165604.3      13.5X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           1725           1726           2          0.1       17249.0       1.0X
-UTF8_LCASE                                            5806           5828          31          0.0       58061.5       0.3X
-UNICODE                                              19105          19111           8          0.0      191051.5       0.1X
-UNICODE_CI                                           18991          18996           7          0.0      189913.3       0.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                           1656           1657           0          0.1       16564.0       1.0X
+UTF8_LCASE                                            3320           3321           0          0.0       33203.0       2.0X
+UNICODE                                              16392          16393           2          0.0      163921.3       9.9X
+UNICODE_CI                                           16314          16319           6          0.0      163143.3       9.8X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        3019           3021           3          0.0       30194.7       1.0X
-UTF8_LCASE                                        19437          19439           3          0.0      194372.6       0.2X
-UNICODE                                           63550          63568          25          0.0      635504.3       0.0X
-UNICODE_CI                                        57839          57866          39          0.0      578385.0       0.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2812           2813           1          0.0       28119.0       1.0X
+UTF8_LCASE                                         5682           5685           4          0.0       56823.2       2.0X
+UNICODE                                           71678          71685          10          0.0      716777.4      25.5X
+UNICODE_CI                                        60660          60670          15          0.0      606597.4      21.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        7022           7023           2          0.0       70216.8       1.0X
-UTF8_LCASE                                       118836         118887          72          0.0     1188364.9       0.1X
-UNICODE                                          376381         376546         234          0.0     3763807.3       0.0X
-UNICODE_CI                                       427858         427981         174          0.0     4278584.6       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2528           2528           1          0.0       25276.8       1.0X
+UTF8_LCASE                                        28034          28050          24          0.0      280335.5      11.1X
+UNICODE                                          521518         521690         242          0.0     5215184.7     206.3X
+UNICODE_CI                                       508188         508312         176          0.0     5081880.5     201.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        6720           6723           5          0.0       67197.9       1.0X
-UTF8_LCASE                                        67132          67177          63          0.0      671324.6       0.1X
-UNICODE                                          368690         369292         852          0.0     3686899.6       0.0X
-UNICODE_CI                                       431481         431583         144          0.0     4314814.9       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                        2772           2774           4          0.0       27715.0       1.0X
+UTF8_LCASE                                        27387          27390           4          0.0      273872.8       9.9X
+UNICODE                                          501025         501076          72          0.0     5010249.5     180.8X
+UNICODE_CI                                       506654         506666          16          0.0     5066544.6     182.8X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        7097           7099           3          0.0       70970.8       1.0X
-UTF8_LCASE                                        57327          57351          35          0.0      573265.4       0.1X
-UNICODE                                          375819         376473         924          0.0     3758191.8       0.0X
-UNICODE_CI                                       445167         445212          64          0.0     4451666.7       0.0X
+UTF8_BINARY                                        2886           2888           3          0.0       28858.9       1.0X
+UTF8_LCASE                                        27433          27445          17          0.0      274326.2       9.5X
+UNICODE                                          501068         501186         168          0.0     5010676.2     173.6X
+UNICODE_CI                                       506619         506655          52          0.0     5066185.6     175.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE                                                           407            411           4          0.2        4065.4       1.0X
+UNICODE_CI                                                        419            423           3          0.2        4194.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             564            565           2          0.2        5639.2       1.0X
+UTF8_LCASE                                                              563            563           0          0.2        5629.0       1.0X
+UNICODE                                                                 563            565           2          0.2        5634.3       1.0X
+UNICODE_CI                                                              564            564           0          0.2        5640.9       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                          165            166           1          0.6        1647.3       1.0X
+UTF8_LCASE                                                           165            165           1          0.6        1646.7       1.0X
+UNICODE                                                              165            165           1          0.6        1646.5       1.0X
+UNICODE_CI                                                           165            166           1          0.6        1648.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             391            399           6          0.3        3912.1       1.0X
+UTF8_LCASE                                                              389            399           7          0.3        3894.2       1.0X
+UNICODE                                                                 383            391           6          0.3        3828.6       1.0X
+UNICODE_CI                                                              383            387           2          0.3        3833.0       1.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
index a7d2afb3fffc9..4da64ade11d68 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-jdk21-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           171            171           1          0.2        4268.9       1.0X
-UTF8_LCASE                                           6540           6549          12          0.0      163512.0       0.0X
-UNICODE                                              5195           5196           2          0.0      129870.7       0.0X
-UNICODE_CI                                           5129           5134           7          0.0      128222.9       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                           156            156           0          0.3        3887.8       1.0X
+UTF8_LCASE                                           9717           9729          18          0.0      242914.7      62.5X
+UNICODE                                              5026           5027           2          0.0      125640.1      32.3X
+UNICODE_CI                                           4969           4972           4          0.0      124224.9      32.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                            316            316           1          0.1        7899.6       1.0X
-UTF8_LCASE                                            6525           6528           3          0.0      163136.8       0.0X
-UNICODE                                               5186           5201          21          0.0      129654.8       0.1X
-UNICODE_CI                                            5119           5120           0          0.0      127985.4       0.1X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                            279            279           0          0.1        6969.5       1.0X
+UTF8_LCASE                                            9624           9628           5          0.0      240611.6      34.5X
+UNICODE                                               5243           5244           0          0.0      131080.1      18.8X
+UNICODE_CI                                            5173           5173           0          0.0      129322.8      18.6X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         384            386           2          0.1        9604.9       1.0X
-UTF8_LCASE                                         3250           3255           7          0.0       81245.9       0.1X
-UNICODE                                           14666          14668           3          0.0      366645.0       0.0X
-UNICODE_CI                                        11055          11073          25          0.0      276376.4       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         383            383           0          0.1        9576.7       1.0X
+UTF8_LCASE                                         4927           4931           6          0.0      123170.3      12.9X
+UNICODE                                           17244          17261          24          0.0      431096.6      45.0X
+UNICODE_CI                                        12968          12970           3          0.0      324194.1      33.9X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1439           1440           1          0.0       35973.9       1.0X
-UTF8_LCASE                                        33643          33664          30          0.0      841072.8       0.0X
-UNICODE                                           69901          69945          62          0.0     1747527.1       0.0X
-UNICODE_CI                                        78298          78390         129          0.0     1957458.9       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         535            536           2          0.1       13371.6       1.0X
+UTF8_LCASE                                         9479           9480           2          0.0      236964.5      17.7X
+UNICODE                                           93629          93676          66          0.0     2340726.5     175.1X
+UNICODE_CI                                        93222          93309         124          0.0     2330541.2     174.3X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1019           1020           1          0.0       25470.5       1.0X
-UTF8_LCASE                                        18811          18833          32          0.0      470272.7       0.1X
-UNICODE                                           67687          67758         101          0.0     1692181.1       0.0X
-UNICODE_CI                                        77039          77148         154          0.0     1925975.7       0.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         430            431           1          0.1       10755.8       1.0X
+UTF8_LCASE                                         6550           6551           2          0.0      163753.7      15.2X
+UNICODE                                           87435          87467          45          0.0     2185886.8     203.2X
+UNICODE_CI                                        90113          90255         201          0.0     2252836.0     209.5X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1020           1031          16          0.0       25493.0       1.0X
-UTF8_LCASE                                        18574          18583          12          0.0      464350.7       0.1X
-UNICODE                                           73937          74335         563          0.0     1848436.0       0.0X
-UNICODE_CI                                        82022          82303         397          0.0     2050548.1       0.0X
+UTF8_BINARY                                         455            456           2          0.1       11369.5       1.0X
+UTF8_LCASE                                         7108           7115           9          0.0      177705.2      15.6X
+UNICODE                                          101835         101866          43          0.0     2545883.9     223.9X
+UNICODE_CI                                       100962         101026          91          0.0     2524045.2     222.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE                                                           254            255           1          0.2        6346.5       1.0X
+UNICODE_CI                                                        254            254           0          0.2        6348.1       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             322            323           1          0.1        8046.3       1.0X
+UTF8_LCASE                                                              322            324           2          0.1        8059.0       1.0X
+UNICODE                                                                 322            323           1          0.1        8050.7       1.0X
+UNICODE_CI                                                              322            325           4          0.1        8062.4       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                          119            120           1          0.3        2972.1       1.0X
+UTF8_LCASE                                                           119            120           1          0.3        2971.9       1.0X
+UNICODE                                                              119            120           1          0.3        2970.3       1.0X
+UNICODE_CI                                                           119            120           1          0.3        2968.6       1.0X
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             254            255           1          0.2        6345.2       1.0X
+UTF8_LCASE                                                              254            255           0          0.2        6351.8       1.0X
+UNICODE                                                                 254            255           0          0.2        6352.9       1.0X
+UNICODE_CI                                                              254            254           0          0.2        6341.2       1.0X
 
diff --git a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
index 06d2e883cf788..fba59f3893e22 100644
--- a/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
+++ b/sql/core/benchmarks/CollationNonASCIIBenchmark-results.txt
@@ -1,54 +1,88 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - equalsFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 --------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                           133            133           0          0.3        3318.0       1.0X
-UTF8_LCASE                                           9483           9494          16          0.0      237079.3       0.0X
-UNICODE                                              5963           5965           3          0.0      149081.4       0.0X
-UNICODE_CI                                           5661           5663           3          0.0      141518.7       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                           125            126           1          0.3        3128.6       1.0X
+UTF8_LCASE                                          10335          10345          14          0.0      258377.4      82.6X
+UNICODE                                              5604           5610           8          0.0      140110.8      44.8X
+UNICODE_CI                                           5570           5577           9          0.0      139252.7      44.5X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - compareFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ---------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                            560            561           1          0.1       14008.3       1.0X
-UTF8_LCASE                                            7535           7550          21          0.0      188384.6       0.1X
-UNICODE                                               5868           5873           8          0.0      146691.2       0.1X
-UNICODE_CI                                            5838           5839           1          0.0      145945.7       0.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                            293            294           2          0.1        7326.8       1.0X
+UTF8_LCASE                                           10035          10035           1          0.0      250865.2      34.2X
+UNICODE                                               5578           5580           3          0.0      139455.8      19.0X
+UNICODE_CI                                            5539           5541           2          0.0      138483.8      18.9X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - hashFunction:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                         420            420           0          0.1       10489.3       1.0X
-UTF8_LCASE                                         3524           3529           7          0.0       88101.6       0.1X
-UNICODE                                           15630          15659          40          0.0      390755.8       0.0X
-UNICODE_CI                                        12822          12838          22          0.0      320560.2       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         388            388           0          0.1        9699.6       1.0X
+UTF8_LCASE                                         4965           4967           3          0.0      124121.3      12.8X
+UNICODE                                           15750          15753           5          0.0      393740.9      40.6X
+UNICODE_CI                                        12509          12511           2          0.0      312735.5      32.2X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - contains:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1269           1270           2          0.0       31731.7       1.0X
-UTF8_LCASE                                        34422          34448          37          0.0      860554.5       0.0X
-UNICODE                                           66641          66780         196          0.0     1666024.0       0.0X
-UNICODE_CI                                        76047          76084          52          0.0     1901185.9       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         421            422           2          0.1       10512.9       1.0X
+UTF8_LCASE                                        10793          10796           5          0.0      269819.0      25.7X
+UNICODE                                           94324          94330           9          0.0     2358090.9     224.3X
+UNICODE_CI                                        91647          91748         143          0.0     2291174.6     217.9X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - startsWith:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1114           1115           2          0.0       27839.1       1.0X
-UTF8_LCASE                                        19656          19659           4          0.0      491401.9       0.1X
-UNICODE                                           65990          66056          93          0.0     1649760.3       0.0X
-UNICODE_CI                                        75764          75877         161          0.0     1894091.8       0.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
-AMD EPYC 7763 64-Core Processor
-collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+UTF8_BINARY                                         452            453           0          0.1       11307.9       1.0X
+UTF8_LCASE                                         6871           6872           2          0.0      171782.0      15.2X
+UNICODE                                           90881          90924          60          0.0     2272034.5     200.9X
+UNICODE_CI                                        91333          91363          42          0.0     2283331.3     201.9X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - endsWith:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
 ------------------------------------------------------------------------------------------------------------------------
-UTF8_BINARY                                        1137           1137           0          0.0       28427.5       1.0X
-UTF8_LCASE                                        18734          18765          44          0.0      468347.9       0.1X
-UNICODE                                           74629          74646          24          0.0     1865724.7       0.0X
-UNICODE_CI                                        83838          83888          70          0.0     2095948.2       0.0X
+UTF8_BINARY                                         451            452           2          0.1       11268.1       1.0X
+UTF8_LCASE                                         6685           6686           2          0.0      167120.8      14.8X
+UNICODE                                           99387          99484         138          0.0     2484672.5     220.5X
+UNICODE_CI                                        98525          98597         101          0.0     2463132.9     218.6X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------
+UNICODE                                                           231            232           0          0.2        5784.5       1.0X
+UNICODE_CI                                                        231            232           1          0.2        5780.4       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinaryICU:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             312            314           1          0.1        7811.2       1.0X
+UTF8_LCASE                                                              313            314           2          0.1        7822.9       1.0X
+UNICODE                                                                 313            314           1          0.1        7815.5       1.0X
+UNICODE_CI                                                              313            315           4          0.1        7825.7       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execBinary:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+-----------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                          132            133           0          0.3        3302.0       1.0X
+UTF8_LCASE                                                           132            132           0          0.3        3297.5       1.0X
+UNICODE                                                              132            133           1          0.3        3296.9       1.0X
+UNICODE_CI                                                           132            132           0          0.3        3298.1       1.0X
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.8.0-1018-aws
+Intel(R) Xeon(R) Platinum 8252C CPU @ 3.80GHz
+collation unit benchmarks - initCap using impl execLowercase:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns) Relative time
+--------------------------------------------------------------------------------------------------------------------------------------------
+UTF8_BINARY                                                             231            231           0          0.2        5770.4       1.0X
+UTF8_LCASE                                                              231            232           1          0.2        5776.4       1.0X
+UNICODE                                                                 231            231           0          0.2        5767.5       1.0X
+UNICODE_CI                                                              231            232           1          0.2        5770.2       1.0X
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
index daa0fff3b464b..e6d3fa3dfbe5e 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          118            119           1       2768.7           0.4       1.0X
-ByteBuffer Unsafe                                   198            208           4       1655.2           0.6       0.6X
-ByteBuffer API                                      388            394           7        845.5           1.2       0.3X
-DirectByteBuffer                                    494            496           5        663.8           1.5       0.2X
-Unsafe Buffer                                       154            154           0       2129.1           0.5       0.8X
-Column(on heap)                                     123            123           0       2663.7           0.4       1.0X
-Column(off heap)                                    154            154           0       2129.4           0.5       0.8X
-Column(off heap direct)                             154            154           0       2124.4           0.5       0.8X
-UnsafeRow (on heap)                                 460            461           1        712.8           1.4       0.3X
-UnsafeRow (off heap)                                293            294           1       1116.6           0.9       0.4X
-Column On Heap Append                               336            337           2        975.8           1.0       0.4X
+Java Array                                          122            123           1       2676.8           0.4       1.0X
+ByteBuffer Unsafe                                   194            201           8       1685.2           0.6       0.6X
+ByteBuffer API                                      501            503           2        653.5           1.5       0.2X
+DirectByteBuffer                                    418            419           1        784.8           1.3       0.3X
+Unsafe Buffer                                       154            154           0       2134.6           0.5       0.8X
+Column(on heap)                                     123            123           1       2668.6           0.4       1.0X
+Column(off heap)                                    154            154           1       2134.0           0.5       0.8X
+Column(off heap direct)                             154            154           1       2128.0           0.5       0.8X
+UnsafeRow (on heap)                                 432            433           2        758.6           1.3       0.3X
+UnsafeRow (off heap)                                294            295           1       1116.1           0.9       0.4X
+Column On Heap Append                               336            337           2        976.5           1.0       0.4X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              420            421           2        799.8           1.3       1.0X
-Byte Array                                          236            236           0       1421.4           0.7       1.8X
+Bitset                                              430            431           2        780.3           1.3       1.0X
+Byte Array                                          249            250           2       1348.7           0.7       1.7X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             126            127           1        130.0           7.7       1.0X
-Off Heap                                            427            463          23         38.4          26.0       0.3X
+On Heap                                             121            122           1        134.9           7.4       1.0X
+Off Heap                                            523            535           9         31.3          31.9       0.2X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               87             88           1       1873.1           0.5       1.0X
-Off Heap Read Size Only                             306            306           1        535.8           1.9       0.3X
-On Heap Read Elements                              2464           2464           1         66.5          15.0       0.0X
-Off Heap Read Elements                             2518           2519           2         65.1          15.4       0.0X
+On Heap Read Size Only                               87             88           1       1878.9           0.5       1.0X
+Off Heap Read Size Only                             425            425           0        385.7           2.6       0.2X
+On Heap Read Elements                              2464           2467           5         66.5          15.0       0.0X
+Off Heap Read Elements                             2409           2412           5         68.0          14.7       0.0X
 
 
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
index cd00e0bbd71e9..ea5edb89dcfe6 100644
--- a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -2,58 +2,58 @@
 Int Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int Read/Write:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Java Array                                          174            174           1       1884.2           0.5       1.0X
-ByteBuffer Unsafe                                   283            284           0       1157.5           0.9       0.6X
-ByteBuffer API                                      499            500           1        656.1           1.5       0.3X
-DirectByteBuffer                                    404            405           1        812.1           1.2       0.4X
-Unsafe Buffer                                       161            163           1       2039.7           0.5       1.1X
-Column(on heap)                                     177            177           0       1855.7           0.5       1.0X
-Column(off heap)                                    162            162           0       2025.9           0.5       1.1X
-Column(off heap direct)                             155            156           1       2108.0           0.5       1.1X
-UnsafeRow (on heap)                                 447            448           0        732.3           1.4       0.4X
-UnsafeRow (off heap)                                312            316           2       1049.5           1.0       0.6X
-Column On Heap Append                               361            388          65        907.1           1.1       0.5X
+Java Array                                          174            175           1       1883.1           0.5       1.0X
+ByteBuffer Unsafe                                   278            282           6       1177.8           0.8       0.6X
+ByteBuffer API                                      508            509           1        645.6           1.5       0.3X
+DirectByteBuffer                                    468            469           1        700.7           1.4       0.4X
+Unsafe Buffer                                       159            161           1       2057.9           0.5       1.1X
+Column(on heap)                                     170            171           0       1923.5           0.5       1.0X
+Column(off heap)                                    162            162           0       2023.8           0.5       1.1X
+Column(off heap direct)                             157            158           1       2083.7           0.5       1.1X
+UnsafeRow (on heap)                                 436            436           1        751.7           1.3       0.4X
+UnsafeRow (off heap)                                314            321          14       1042.5           1.0       0.6X
+Column On Heap Append                               361            362           1        906.5           1.1       0.5X
 
 
 ================================================================================================
 Boolean Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Boolean Read/Write:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Bitset                                              516            518           2        650.3           1.5       1.0X
-Byte Array                                          314            315           2       1067.8           0.9       1.6X
+Bitset                                              451            452           1        744.8           1.3       1.0X
+Byte Array                                          288            289           2       1163.9           0.9       1.6X
 
 
 ================================================================================================
 String Read/Write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String Read/Write:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap                                             193            242          21         84.7          11.8       1.0X
-Off Heap                                            340            375          21         48.2          20.7       0.6X
+On Heap                                             193            236          33         85.0          11.8       1.0X
+Off Heap                                            424            440          15         38.7          25.9       0.5X
 
 
 ================================================================================================
 Array Vector Read
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array Vector Read:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-On Heap Read Size Only                               89             90           1       1831.4           0.5       1.0X
-Off Heap Read Size Only                              85             85           1       1937.9           0.5       1.1X
-On Heap Read Elements                              2298           2302           6         71.3          14.0       0.0X
-Off Heap Read Elements                             2615           2617           3         62.7          16.0       0.0X
+On Heap Read Size Only                               90             90           1       1826.1           0.5       1.0X
+Off Heap Read Size Only                              85             85           1       1927.9           0.5       1.1X
+On Heap Read Elements                              2177           2178           1         75.3          13.3       0.0X
+Off Heap Read Elements                             2732           2735           4         60.0          16.7       0.0X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
index df10c315b6871..3338d6b4df0eb 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-jdk21-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      49573.5           0.0       1.0X
-RunLengthEncoding(2.501)                            931            957          41         72.1          13.9       0.0X
-BooleanBitSet(0.125)                                295            296           1        227.6           4.4       0.0X
+PassThrough(1.000)                                    1              1           0      46950.3           0.0       1.0X
+RunLengthEncoding(2.517)                            983            989           6         68.2          14.7       0.0X
+BooleanBitSet(0.125)                                233            234           1        287.8           3.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         210            212           2        319.6           3.1       1.0X
-RunLengthEncoding                                   593            594           0        113.1           8.8       0.4X
-BooleanBitSet                                       681            684           3         98.5          10.1       0.3X
+PassThrough                                         210            211           1        319.3           3.1       1.0X
+RunLengthEncoding                                   598            605          10        112.3           8.9       0.4X
+BooleanBitSet                                       696            699           3         96.5          10.4       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      24654.7           0.0       1.0X
-RunLengthEncoding(1.502)                           1208           1209           1         55.6          18.0       0.0X
+PassThrough(1.000)                                    3              3           0      23190.9           0.0       1.0X
+RunLengthEncoding(1.495)                           1229           1229           1         54.6          18.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         894            895           2         75.1          13.3       1.0X
-RunLengthEncoding                                  1053           1055           2         63.7          15.7       0.8X
+PassThrough                                         667            668           1        100.6           9.9       1.0X
+RunLengthEncoding                                  1030           1032           3         65.2          15.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23243.9           0.0       1.0X
-RunLengthEncoding(1.994)                           1224           1226           2         54.8          18.2       0.0X
+PassThrough(1.000)                                    3              3           0      23427.9           0.0       1.0X
+RunLengthEncoding(2.000)                           1234           1234           0         54.4          18.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         894            896           4         75.1          13.3       1.0X
-RunLengthEncoding                                  1000           1001           1         67.1          14.9       0.9X
+PassThrough                                         665            666           1        100.9           9.9       1.0X
+RunLengthEncoding                                  1007           1007           0         66.6          15.0       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11188.7           0.1       1.0X
-RunLengthEncoding(1.003)                           1245           1246           2         53.9          18.6       0.0X
-DictionaryEncoding(0.500)                          1376           1379           4         48.8          20.5       0.0X
-IntDelta(0.250)                                     110            113           9        612.8           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11347.4           0.1       1.0X
+RunLengthEncoding(0.997)                           1072           1073           1         62.6          16.0       0.0X
+DictionaryEncoding(0.500)                           378            378           0        177.5           5.6       0.0X
+IntDelta(0.250)                                     139            141           3        481.6           2.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         644            645           2        104.2           9.6       1.0X
-RunLengthEncoding                                  1155           1156           0         58.1          17.2       0.6X
-DictionaryEncoding                                  523            525           3        128.4           7.8       1.2X
-IntDelta                                            499            501           3        134.5           7.4       1.3X
+PassThrough                                         644            646           2        104.2           9.6       1.0X
+RunLengthEncoding                                  1181           1182           2         56.8          17.6       0.5X
+DictionaryEncoding                                  521            522           0        128.8           7.8       1.2X
+IntDelta                                            498            499           2        134.7           7.4       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11231.0           0.1       1.0X
-RunLengthEncoding(1.337)                           1087           1104          24         61.7          16.2       0.0X
-DictionaryEncoding(0.501)                           565            567           3        118.9           8.4       0.0X
-IntDelta(0.250)                                     109            110           1        613.0           1.6       0.1X
+PassThrough(1.000)                                    6              6           0      11261.2           0.1       1.0X
+RunLengthEncoding(1.329)                           1128           1129           2         59.5          16.8       0.0X
+DictionaryEncoding(0.501)                           378            379           2        177.5           5.6       0.0X
+IntDelta(0.250)                                     125            125           0        536.8           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         706            709           4         95.0          10.5       1.0X
-RunLengthEncoding                                  1132           1136           5         59.3          16.9       0.6X
-DictionaryEncoding                                  659            663           4        101.8           9.8       1.1X
-IntDelta                                            624            626           4        107.5           9.3       1.1X
+PassThrough                                         711            712           1         94.3          10.6       1.0X
+RunLengthEncoding                                  1150           1154           5         58.4          17.1       0.6X
+DictionaryEncoding                                  651            655           4        103.0           9.7       1.1X
+IntDelta                                            520            573          59        129.1           7.7       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5066.3           0.2       1.0X
-RunLengthEncoding(0.744)                           1050           1056           8         63.9          15.6       0.0X
-DictionaryEncoding(0.250)                           593            595           2        113.1           8.8       0.0X
-LongDelta(0.125)                                    110            111           1        608.6           1.6       0.1X
+PassThrough(1.000)                                   13             13           0       5052.7           0.2       1.0X
+RunLengthEncoding(0.748)                           1072           1073           0         62.6          16.0       0.0X
+DictionaryEncoding(0.250)                           521            521           0        128.8           7.8       0.0X
+LongDelta(0.125)                                    110            110           0        609.1           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         646            647           2        103.9           9.6       1.0X
-RunLengthEncoding                                  1203           1205           3         55.8          17.9       0.5X
-DictionaryEncoding                                  724            725           2         92.7          10.8       0.9X
-LongDelta                                           524            525           1        128.1           7.8       1.2X
+PassThrough                                         771            774           3         87.1          11.5       1.0X
+RunLengthEncoding                                  1232           1233           1         54.5          18.4       0.6X
+DictionaryEncoding                                  720            724           6         93.2          10.7       1.1X
+LongDelta                                           541            543           3        124.1           8.1       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5065.9           0.2       1.0X
-RunLengthEncoding(1.002)                           1099           1107          10         61.0          16.4       0.0X
-DictionaryEncoding(0.251)                           603            605           2        111.2           9.0       0.0X
-LongDelta(0.125)                                    110            111           1        608.5           1.6       0.1X
+PassThrough(1.000)                                   13             13           0       5054.5           0.2       1.0X
+RunLengthEncoding(1.007)                           1110           1111           1         60.4          16.5       0.0X
+DictionaryEncoding(0.251)                           533            534           2        126.0           7.9       0.0X
+LongDelta(0.125)                                    111            112           0        605.2           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         750            754           4         89.5          11.2       1.0X
-RunLengthEncoding                                  1213           1216           3         55.3          18.1       0.6X
-DictionaryEncoding                                  722            725           3         92.9          10.8       1.0X
-LongDelta                                           653            653           0        102.8           9.7       1.1X
+PassThrough                                         769            770           0         87.2          11.5       1.0X
+RunLengthEncoding                                  1234           1236           4         54.4          18.4       0.6X
+DictionaryEncoding                                  721            723           3         93.0          10.7       1.1X
+LongDelta                                           669            672           3        100.2          10.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   29             29           0       2332.2           0.4       1.0X
-RunLengthEncoding(0.889)                           1744           1745           1         38.5          26.0       0.0X
-DictionaryEncoding(0.167)                          1441           1443           2         46.6          21.5       0.0X
+PassThrough(1.000)                                   20             20           0       3376.5           0.3       1.0X
+RunLengthEncoding(0.892)                           2013           2014           1         33.3          30.0       0.0X
+DictionaryEncoding(0.167)                          1687           1691           6         39.8          25.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1613           1614           1         41.6          24.0       1.0X
-RunLengthEncoding                                  2107           2108           1         31.8          31.4       0.8X
-DictionaryEncoding                                 1830           1832           3         36.7          27.3       0.9X
+PassThrough                                        1776           1776           1         37.8          26.5       1.0X
+RunLengthEncoding                                  2518           2518           0         26.7          37.5       0.7X
+DictionaryEncoding                                 2028           2030           4         33.1          30.2       0.9X
 
 
diff --git a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
index ea810d3a26e36..c56288558bd5f 100644
--- a/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
+++ b/sql/core/benchmarks/CompressionSchemeBenchmark-results.txt
@@ -2,136 +2,136 @@
 Compression Scheme Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Encode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    1              1           0      50902.6           0.0       1.0X
-RunLengthEncoding(2.510)                            897            898           2         74.8          13.4       0.0X
-BooleanBitSet(0.125)                                229            229           0        293.3           3.4       0.0X
+PassThrough(1.000)                                    1              1           0      47046.4           0.0       1.0X
+RunLengthEncoding(2.514)                            882            883           0         76.0          13.1       0.0X
+BooleanBitSet(0.125)                                234            235           0        286.3           3.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 BOOLEAN Decode:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         166            167           1        403.2           2.5       1.0X
-RunLengthEncoding                                   534            535           2        125.7           8.0       0.3X
-BooleanBitSet                                       659            663           2        101.8           9.8       0.3X
+PassThrough                                         167            168           1        402.1           2.5       1.0X
+RunLengthEncoding                                   532            534           1        126.1           7.9       0.3X
+BooleanBitSet                                       663            665           2        101.2           9.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23313.0           0.0       1.0X
-RunLengthEncoding(1.503)                           1142           1144           4         58.8          17.0       0.0X
+PassThrough(1.000)                                    3              3           0      23535.9           0.0       1.0X
+RunLengthEncoding(1.501)                           1218           1219           1         55.1          18.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Lower Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         812            868          62         82.7          12.1       1.0X
-RunLengthEncoding                                  1094           1094           1         61.3          16.3       0.7X
+PassThrough                                         710            712           2         94.6          10.6       1.0X
+RunLengthEncoding                                  1043           1055          18         64.4          15.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Encode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    3              3           0      23874.9           0.0       1.0X
-RunLengthEncoding(2.021)                           1131           1133           3         59.4          16.8       0.0X
+PassThrough(1.000)                                    3              3           0      24666.2           0.0       1.0X
+RunLengthEncoding(2.012)                           1157           1159           3         58.0          17.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SHORT Decode (Higher Skew):               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         851            877          39         78.8          12.7       1.0X
-RunLengthEncoding                                  1063           1068           6         63.1          15.8       0.8X
+PassThrough                                         675            675           0         99.5          10.1       1.0X
+RunLengthEncoding                                  1021           1024           4         65.7          15.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11690.1           0.1       1.0X
-RunLengthEncoding(1.001)                            948            959          12         70.8          14.1       0.0X
-DictionaryEncoding(0.500)                           628            631           3        106.9           9.4       0.0X
-IntDelta(0.250)                                     112            115           1        600.4           1.7       0.1X
+PassThrough(1.000)                                    6              6           0      11233.4           0.1       1.0X
+RunLengthEncoding(1.002)                           1012           1021          12         66.3          15.1       0.0X
+DictionaryEncoding(0.500)                           386            387           1        174.1           5.7       0.0X
+IntDelta(0.250)                                     115            115           1        585.5           1.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Lower Skew):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         632            639           7        106.2           9.4       1.0X
-RunLengthEncoding                                  1053           1053           1         63.7          15.7       0.6X
-DictionaryEncoding                                  502            506           4        133.7           7.5       1.3X
-IntDelta                                            449            456           4        149.3           6.7       1.4X
+PassThrough                                         644            647           3        104.3           9.6       1.0X
+RunLengthEncoding                                  1194           1194           0         56.2          17.8       0.5X
+DictionaryEncoding                                  502            504           2        133.7           7.5       1.3X
+IntDelta                                            457            458           1        146.9           6.8       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Encode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                    6              6           0      11598.2           0.1       1.0X
-RunLengthEncoding(1.336)                            979            984           7         68.5          14.6       0.0X
-DictionaryEncoding(0.501)                           640            646           5        104.9           9.5       0.0X
-IntDelta(0.250)                                     114            115           1        589.0           1.7       0.1X
+PassThrough(1.000)                                    6              6           0      11739.3           0.1       1.0X
+RunLengthEncoding(1.336)                           1040           1040           1         64.5          15.5       0.0X
+DictionaryEncoding(0.501)                           387            388           1        173.2           5.8       0.0X
+IntDelta(0.250)                                     115            115           1        585.4           1.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 INT Decode (Higher Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         764            766           2         87.8          11.4       1.0X
-RunLengthEncoding                                  1142           1144           3         58.8          17.0       0.7X
-DictionaryEncoding                                  671            679           7        100.0          10.0       1.1X
-IntDelta                                            466            470           2        143.9           6.9       1.6X
+PassThrough                                         727            729           3         92.3          10.8       1.0X
+RunLengthEncoding                                  1178           1182           5         57.0          17.6       0.6X
+DictionaryEncoding                                  687            690           3         97.7          10.2       1.1X
+IntDelta                                            480            482           2        139.7           7.2       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             13           0       5217.0           0.2       1.0X
-RunLengthEncoding(0.751)                            990            990           1         67.8          14.7       0.0X
-DictionaryEncoding(0.250)                           615            616           2        109.2           9.2       0.0X
-LongDelta(0.125)                                    108            110           1        622.0           1.6       0.1X
+PassThrough(1.000)                                   13             13           0       5037.6           0.2       1.0X
+RunLengthEncoding(0.750)                           1017           1019           3         66.0          15.2       0.0X
+DictionaryEncoding(0.250)                           442            443           2        152.0           6.6       0.0X
+LongDelta(0.125)                                    110            110           1        609.8           1.6       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Lower Skew):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         740            759          16         90.7          11.0       1.0X
-RunLengthEncoding                                  1169           1178          12         57.4          17.4       0.6X
-DictionaryEncoding                                  757            763           7         88.7          11.3       1.0X
-LongDelta                                           499            502           2        134.5           7.4       1.5X
+PassThrough                                         755            758           3         88.8          11.3       1.0X
+RunLengthEncoding                                  1216           1216           0         55.2          18.1       0.6X
+DictionaryEncoding                                  774            774           0         86.8          11.5       1.0X
+LongDelta                                           485            488           2        138.4           7.2       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Encode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   13             19           2       5062.3           0.2       1.0X
-RunLengthEncoding(1.001)                           1005           1008           4         66.8          15.0       0.0X
-DictionaryEncoding(0.251)                           612            613           1        109.7           9.1       0.0X
-LongDelta(0.125)                                    106            110           1        634.4           1.6       0.1X
+PassThrough(1.000)                                   13             13           0       5032.6           0.2       1.0X
+RunLengthEncoding(1.003)                           1033           1035           3         65.0          15.4       0.0X
+DictionaryEncoding(0.251)                           444            446           3        151.1           6.6       0.0X
+LongDelta(0.125)                                    147            147           1        457.3           2.2       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LONG Decode (Higher Skew):                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                         865            875           9         77.6          12.9       1.0X
-RunLengthEncoding                                  1185           1188           4         56.6          17.7       0.7X
-DictionaryEncoding                                  754            761           7         89.0          11.2       1.1X
-LongDelta                                           660            667           7        101.8           9.8       1.3X
+PassThrough                                         753            755           2         89.1          11.2       1.0X
+RunLengthEncoding                                  1225           1227           3         54.8          18.3       0.6X
+DictionaryEncoding                                  773            774           0         86.8          11.5       1.0X
+LongDelta                                           672            675           6         99.9          10.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 STRING Encode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough(1.000)                                   27             27           0       2497.2           0.4       1.0X
-RunLengthEncoding(0.888)                           1584           1586           3         42.4          23.6       0.0X
-DictionaryEncoding(0.167)                          1597           1600           4         42.0          23.8       0.0X
+PassThrough(1.000)                                   20             26           2       3350.9           0.3       1.0X
+RunLengthEncoding(0.887)                           1812           1813           2         37.0          27.0       0.0X
+DictionaryEncoding(0.167)                          2262           2263           1         29.7          33.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 STRING Decode:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-PassThrough                                        1465           1466           1         45.8          21.8       1.0X
-RunLengthEncoding                                  1894           1894           1         35.4          28.2       0.8X
-DictionaryEncoding                                 1775           1776           2         37.8          26.4       0.8X
+PassThrough                                        1645           1650           7         40.8          24.5       1.0X
+RunLengthEncoding                                  2281           2284           4         29.4          34.0       0.7X
+DictionaryEncoding                                 1845           1847           3         36.4          27.5       0.9X
 
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
index 8c09e95988536..c53ca57d7242e 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-jdk21-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     386171.0           0.0       1.0X
-OnHeapColumnVector                                   2709           2711           3        151.2           6.6       0.0X
-OffHeapColumnVector                                  5028           5031           4         81.5          12.3       0.0X
+ConstantColumnVector                                    1              1           0     365410.9           0.0       1.0X
+OnHeapColumnVector                                   3342           3368          36        122.6           8.2       0.0X
+OffHeapColumnVector                                  5519           5519           0         74.2          13.5       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     382579.9           0.0       1.0X
-OnHeapColumnVector                                   3353           3353           1        122.2           8.2       0.0X
-OffHeapColumnVector                                  5136           5142           7         79.7          12.5       0.0X
+ConstantColumnVector                                    1              1           0     399028.5           0.0       1.0X
+OnHeapColumnVector                                   4031           4035           6        101.6           9.8       0.0X
+OffHeapColumnVector                                  4792           4796           6         85.5          11.7       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     382916.1           0.0       1.0X
-OnHeapColumnVector                                    3715           3715           1        110.3           9.1       0.0X
-OffHeapColumnVector                                   5746           5747           2         71.3          14.0       0.0X
+ConstantColumnVector                                     1              1           0     399083.0           0.0       1.0X
+OnHeapColumnVector                                    4041           4043           4        101.4           9.9       0.0X
+OffHeapColumnVector                                   4684           4701          25         87.5          11.4       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     382658.2           0.0       1.0X
-OnHeapColumnVector                                    3514           3522          11        116.5           8.6       0.0X
-OffHeapColumnVector                                   5138           5142           6         79.7          12.5       0.0X
+ConstantColumnVector                                     1              1           0     399168.5           0.0       1.0X
+OnHeapColumnVector                                    4762           4762           0         86.0          11.6       0.0X
+OffHeapColumnVector                                   5314           5316           3         77.1          13.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     382564.9           0.0       1.0X
-OnHeapColumnVector                                    3808           3810           3        107.6           9.3       0.0X
-OffHeapColumnVector                                   5476           5481           7         74.8          13.4       0.0X
+ConstantColumnVector                                     1              1           0     399059.2           0.0       1.0X
+OnHeapColumnVector                                    8010           8011           0         51.1          19.6       0.0X
+OffHeapColumnVector                                   5170           5183          19         79.2          12.6       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     382528.8           0.0       1.0X
-OnHeapColumnVector                                    3875           3877           3        105.7           9.5       0.0X
-OffHeapColumnVector                                   5487           5495          11         74.6          13.4       0.0X
+ConstantColumnVector                                     1              1           0     399074.8           0.0       1.0X
+OnHeapColumnVector                                    4366           4366           0         93.8          10.7       0.0X
+OffHeapColumnVector                                   4960           4963           4         82.6          12.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     632701.2           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25265.4           0.0       0.0X
-OffHeapColumnVector                                  65             67           7       6265.1           0.2       0.0X
+ConstantColumnVector                                  1              1           0     632717.8           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25522.9           0.0       0.0X
+OffHeapColumnVector                                  65             65           0       6306.1           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     603950.7           0.0       1.0X
-OnHeapColumnVector                                   33             33           0      12426.1           0.1       0.0X
-OffHeapColumnVector                                  68             68           1       6051.2           0.2       0.0X
+ConstantColumnVector                                  1              1           0     577697.2           0.0       1.0X
+OnHeapColumnVector                                   33             33           0      12488.2           0.1       0.0X
+OffHeapColumnVector                                  66             66           1       6198.2           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     491622.3           0.0       1.0X
-OnHeapColumnVector                                   17             17           0      24658.2           0.0       0.1X
-OffHeapColumnVector                                 127            128           0       3214.9           0.3       0.0X
+ConstantColumnVector                                  1              1           0     442449.7           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25047.7           0.0       0.1X
+OffHeapColumnVector                                 127            128           0       3216.3           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     491621.7           0.0       1.0X
-OnHeapColumnVector                                   34             34           3      12179.0           0.1       0.0X
-OffHeapColumnVector                                 129            129           0       3181.8           0.3       0.0X
+ConstantColumnVector                                  1              1           0     491627.0           0.0       1.0X
+OnHeapColumnVector                                   33             33           0      12493.3           0.1       0.0X
+OffHeapColumnVector                                 129            129           0       3184.4           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    99             99           1       4150.4           0.2       0.0X
-OffHeapColumnVector                                 1988           1997          12        206.0           4.9       0.0X
+ConstantColumnVector                                   0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                   309            310           1       1324.1           0.8       0.0X
+OffHeapColumnVector                                 3767           3768           1        108.7           9.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                   0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                    99             99           0       4149.0           0.2       0.0X
-OffHeapColumnVector                                 2065           2065           1        198.4           5.0       0.0X
+ConstantColumnVector                                   0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                  4118           4123           7         99.5          10.1       0.0X
+OffHeapColumnVector                                 3746           3755          13        109.3           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                     99             99           0       4150.1           0.2       0.0X
-OffHeapColumnVector                                  1980           1983           5        206.9           4.8       0.0X
+ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                   4114           4115           2         99.6          10.0       0.0X
+OffHeapColumnVector                                  3744           3763          27        109.4           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                     99            102          13       4152.9           0.2       0.0X
-OffHeapColumnVector                                  1980           1981           2        206.8           4.8       0.0X
+ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                   4107           4122          21         99.7          10.0       0.0X
+OffHeapColumnVector                                  3763           3779          21        108.8           9.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                     99             99           1       4152.0           0.2       0.0X
-OffHeapColumnVector                                  1977           1978           2        207.2           4.8       0.0X
+ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                   4102           4104           4         99.9          10.0       0.0X
+OffHeapColumnVector                                  3820           3824           7        107.2           9.3       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    0              0           0  409190809.2           0.0       1.0X
-OnHeapColumnVector                                     99             99           1       4148.5           0.2       0.0X
-OffHeapColumnVector                                  1973           1975           2        207.6           4.8       0.0X
+ConstantColumnVector                                    0              0           0  405143422.4           0.0       1.0X
+OnHeapColumnVector                                   4246           4248           2         96.5          10.4       0.0X
+OffHeapColumnVector                                  3743           3777          48        109.4           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0   53029518.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1203831.3           0.0       0.0X
-OffHeapColumnVector                                 889            889           1        461.0           2.2       0.0X
+ConstantColumnVector                                  0              0           0   26549131.4           0.0       1.0X
+OnHeapColumnVector                                    1              1           0     492066.4           0.0       0.0X
+OffHeapColumnVector                                 889            890           2        461.0           2.2       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               1935           1935           0        211.7           4.7       1.0X
-OnHeapColumnVector                                 2089           2097          11        196.0           5.1       0.9X
-OffHeapColumnVector                                2593           2594           1        157.9           6.3       0.7X
+ConstantColumnVector                               1939           1940           2        211.3           4.7       1.0X
+OnHeapColumnVector                                 2075           2089          19        197.4           5.1       0.9X
+OffHeapColumnVector                                2601           2603           2        157.5           6.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2011           2011           1        203.7           4.9       1.0X
-OnHeapColumnVector                                 2196           2199           4        186.5           5.4       0.9X
-OffHeapColumnVector                                2606           2623          25        157.2           6.4       0.8X
+ConstantColumnVector                               1846           1848           2        221.9           4.5       1.0X
+OnHeapColumnVector                                 2099           2101           4        195.2           5.1       0.9X
+OffHeapColumnVector                                2613           2638          35        156.7           6.4       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2112           2112           1        194.0           5.2       1.0X
-OnHeapColumnVector                                 2255           2257           2        181.6           5.5       0.9X
-OffHeapColumnVector                                2759           2792          45        148.4           6.7       0.8X
+ConstantColumnVector                               1986           1987           1        206.3           4.8       1.0X
+OnHeapColumnVector                                 2120           2121           1        193.2           5.2       0.9X
+OffHeapColumnVector                                2753           2753           0        148.8           6.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1936           1941           8        211.6           4.7       1.0X
-OnHeapColumnVector                                            4457           4459           2         91.9          10.9       0.4X
-OffHeapColumnVector                                           3980           3982           4        102.9           9.7       0.5X
+ConstantColumnVector                                          1943           1943           0        210.8           4.7       1.0X
+OnHeapColumnVector                                            5899           5903           5         69.4          14.4       0.3X
+OffHeapColumnVector                                           5086           5089           5         80.5          12.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                          1936           1936           1        211.6           4.7       1.0X
-OnHeapColumnVector                                            4453           4459           9         92.0          10.9       0.4X
-OffHeapColumnVector                                           3974           3974           0        103.1           9.7       0.5X
+ConstantColumnVector                                          1943           1943           1        210.8           4.7       1.0X
+OnHeapColumnVector                                            5919           5922           5         69.2          14.5       0.3X
+OffHeapColumnVector                                           5089           5096          10         80.5          12.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1928           1930           3        212.4           4.7       1.0X
-OnHeapColumnVector                                             4460           4462           3         91.8          10.9       0.4X
-OffHeapColumnVector                                            3961           3966           8        103.4           9.7       0.5X
+ConstantColumnVector                                           1940           1946           9        211.1           4.7       1.0X
+OnHeapColumnVector                                             5901           5907           8         69.4          14.4       0.3X
+OffHeapColumnVector                                            5132           5142          14         79.8          12.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1934           1938           5        211.8           4.7       1.0X
-OnHeapColumnVector                                             4458           4462           6         91.9          10.9       0.4X
-OffHeapColumnVector                                            3978           3980           3        103.0           9.7       0.5X
+ConstantColumnVector                                           1943           1944           2        210.8           4.7       1.0X
+OnHeapColumnVector                                             5913           5914           1         69.3          14.4       0.3X
+OffHeapColumnVector                                            5133           5159          37         79.8          12.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1937           1938           2        211.5           4.7       1.0X
-OnHeapColumnVector                                             4465           4467           3         91.7          10.9       0.4X
-OffHeapColumnVector                                            3979           3983           5        102.9           9.7       0.5X
+ConstantColumnVector                                           1945           1949           6        210.6           4.7       1.0X
+OnHeapColumnVector                                             5954           5955           2         68.8          14.5       0.3X
+OffHeapColumnVector                                            5081           5083           3         80.6          12.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                           1941           1944           4        211.0           4.7       1.0X
-OnHeapColumnVector                                             4453           4455           2         92.0          10.9       0.4X
-OffHeapColumnVector                                            3981           3982           1        102.9           9.7       0.5X
+ConstantColumnVector                                           1956           1957           2        209.4           4.8       1.0X
+OnHeapColumnVector                                             5956           5997          58         68.8          14.5       0.3X
+OffHeapColumnVector                                            5076           5077           1         80.7          12.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                888            888           0        461.4           2.2       1.0X
-OnHeapColumnVector                                 1020           1020           1        401.7           2.5       0.9X
-OffHeapColumnVector                                 888            889           1        461.5           2.2       1.0X
+OnHeapColumnVector                                  889            890           1        461.0           2.2       1.0X
+OffHeapColumnVector                                 888            889           1        461.3           2.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2849           2849           0        143.8           7.0       1.0X
-OnHeapColumnVector                                 2971           2974           5        137.9           7.3       1.0X
-OffHeapColumnVector                                2978           2979           1        137.5           7.3       1.0X
+ConstantColumnVector                               2850           2850           0        143.7           7.0       1.0X
+OnHeapColumnVector                                 2978           2978           1        137.6           7.3       1.0X
+OffHeapColumnVector                                2977           2978           1        137.6           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2859           2865           8        143.3           7.0       1.0X
-OnHeapColumnVector                                 3111           3112           1        131.6           7.6       0.9X
-OffHeapColumnVector                                2981           2982           2        137.4           7.3       1.0X
+ConstantColumnVector                               2867           2872           6        142.9           7.0       1.0X
+OnHeapColumnVector                                 2993           2994           1        136.8           7.3       1.0X
+OffHeapColumnVector                                2991           2995           5        136.9           7.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                               2363           2365           3        173.3           5.8       1.0X
-OnHeapColumnVector                                 3130           3133           4        130.9           7.6       0.8X
-OffHeapColumnVector                                3127           3127           0        131.0           7.6       0.8X
+ConstantColumnVector                               3009           3011           3        136.1           7.3       1.0X
+OnHeapColumnVector                                 3137           3139           3        130.6           7.7       1.0X
+OffHeapColumnVector                                3141           3142           2        130.4           7.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321143.9           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321197.8           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
 ConstantColumnVector                                  0              0           0  405143422.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    3321386.3           0.0       0.0X
+OnHeapColumnVector                                    0              0           0    3321467.1           0.0       0.0X
 OffHeapColumnVector                                   0              0           0  405143422.4           0.0       1.0X
 
diff --git a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
index 71245ab989f80..c381cbab325fc 100644
--- a/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
+++ b/sql/core/benchmarks/ConstantColumnVectorBenchmark-results.txt
@@ -1,280 +1,280 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     375313.9           0.0       1.0X
-OnHeapColumnVector                                   2845           2855          14        144.0           6.9       0.0X
-OffHeapColumnVector                                  3241           3246           7        126.4           7.9       0.0X
+ConstantColumnVector                                    1              1           0     329000.5           0.0       1.0X
+OnHeapColumnVector                                   2882           2884           3        142.1           7.0       0.0X
+OffHeapColumnVector                                  3380           3382           2        121.2           8.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                    1              1           0     385212.6           0.0       1.0X
-OnHeapColumnVector                                   3602           3608           9        113.7           8.8       0.0X
-OffHeapColumnVector                                  4441           4442           1         92.2          10.8       0.0X
+ConstantColumnVector                                    1              1           0     387377.7           0.0       1.0X
+OnHeapColumnVector                                   3661           3670          12        111.9           8.9       0.0X
+OffHeapColumnVector                                  4386           4388           3         93.4          10.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     385394.2           0.0       1.0X
-OnHeapColumnVector                                    3931           3932           3        104.2           9.6       0.0X
-OffHeapColumnVector                                   4526           4527           1         90.5          11.1       0.0X
+ConstantColumnVector                                     1              1           0     387399.7           0.0       1.0X
+OnHeapColumnVector                                    3915           3918           4        104.6           9.6       0.0X
+OffHeapColumnVector                                   4559           4560           2         89.8          11.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     385444.6           0.0       1.0X
-OnHeapColumnVector                                    3625           3639          20        113.0           8.9       0.0X
-OffHeapColumnVector                                   4792           4792           0         85.5          11.7       0.0X
+ConstantColumnVector                                     1              1           0     387538.2           0.0       1.0X
+OnHeapColumnVector                                    3628           3632           6        112.9           8.9       0.0X
+OffHeapColumnVector                                   4489           4490           2         91.2          11.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     385238.3           0.0       1.0X
-OnHeapColumnVector                                    3706           3711           7        110.5           9.0       0.0X
-OffHeapColumnVector                                   5015           5015           1         81.7          12.2       0.0X
+ConstantColumnVector                                     1              1           0     387487.6           0.0       1.0X
+OnHeapColumnVector                                    4219           4222           5         97.1          10.3       0.0X
+OffHeapColumnVector                                   4701           4702           2         87.1          11.5       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                     1              1           0     385509.9           0.0       1.0X
-OnHeapColumnVector                                    4026           4051          35        101.7           9.8       0.0X
-OffHeapColumnVector                                   5144           5166          30         79.6          12.6       0.0X
+ConstantColumnVector                                     1              1           0     387405.9           0.0       1.0X
+OnHeapColumnVector                                    4336           4342           8         94.5          10.6       0.0X
+OffHeapColumnVector                                   4376           4376           0         93.6          10.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with IntegerType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     699304.3           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25324.1           0.0       0.0X
-OffHeapColumnVector                                  66             66           0       6241.0           0.2       0.0X
+ConstantColumnVector                                  1              1           0     699323.4           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25587.5           0.0       0.0X
+OffHeapColumnVector                                  65             65           0       6320.1           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with LongType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     664338.7           0.0       1.0X
-OnHeapColumnVector                                   34             35           0      11906.0           0.1       0.0X
-OffHeapColumnVector                                  66             66           0       6201.7           0.2       0.0X
+ConstantColumnVector                                  1              1           0     664355.9           0.0       1.0X
+OnHeapColumnVector                                   33             34           0      12331.0           0.1       0.0X
+OffHeapColumnVector                                  67             67           0       6114.9           0.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with FloatType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     553053.1           0.0       1.0X
-OnHeapColumnVector                                   16             16           0      25324.9           0.0       0.0X
-OffHeapColumnVector                                 127            127           0       3216.6           0.3       0.0X
+ConstantColumnVector                                  1              1           0     553059.9           0.0       1.0X
+OnHeapColumnVector                                   16             16           0      25179.1           0.0       0.0X
+OffHeapColumnVector                                 127            127           0       3217.6           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write with DoubleType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  1              1           0     553051.6           0.0       1.0X
-OnHeapColumnVector                                   36             36           0      11516.6           0.1       0.0X
-OffHeapColumnVector                                 128            129           0       3190.1           0.3       0.0X
+ConstantColumnVector                                  1              1           0     510543.7           0.0       1.0X
+OnHeapColumnVector                                   34             34           0      12081.9           0.1       0.0X
+OffHeapColumnVector                                 128            129           0       3191.4           0.3       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                 693            698           9        591.4           1.7       1.0X
-OnHeapColumnVector                                  1672           1673           0        244.9           4.1       0.4X
-OffHeapColumnVector                                 3044           3046           2        134.5           7.4       0.2X
+ConstantColumnVector                                1041           1041           1        393.6           2.5       1.0X
+OnHeapColumnVector                                  2191           2191           0        186.9           5.3       0.5X
+OffHeapColumnVector                                 4378           4379           1         93.6          10.7       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                 795            797           2        515.0           1.9       1.0X
-OnHeapColumnVector                                  3428           3429           1        119.5           8.4       0.2X
-OffHeapColumnVector                                 3089           3101          18        132.6           7.5       0.3X
+ConstantColumnVector                                 826            827           2        496.1           2.0       1.0X
+OnHeapColumnVector                                  4856           4859           4         84.4          11.9       0.2X
+OffHeapColumnVector                                 4645           4667          32         88.2          11.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  793            795           3        516.4           1.9       1.0X
-OnHeapColumnVector                                   3442           3443           2        119.0           8.4       0.2X
-OffHeapColumnVector                                  3083           3085           4        132.9           7.5       0.3X
+ConstantColumnVector                                  828            828           1        494.9           2.0       1.0X
+OnHeapColumnVector                                   4917           4918           2         83.3          12.0       0.2X
+OffHeapColumnVector                                  4624           4631           9         88.6          11.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  796            799           4        514.9           1.9       1.0X
-OnHeapColumnVector                                   3460           3462           3        118.4           8.4       0.2X
-OffHeapColumnVector                                  3073           3073           1        133.3           7.5       0.3X
+ConstantColumnVector                                  826            828           2        495.9           2.0       1.0X
+OnHeapColumnVector                                   4914           4917           5         83.4          12.0       0.2X
+OffHeapColumnVector                                  4635           4637           3         88.4          11.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  795            796           1        515.1           1.9       1.0X
-OnHeapColumnVector                                   3447           3447           0        118.8           8.4       0.2X
-OffHeapColumnVector                                  3076           3084          12        133.2           7.5       0.3X
+ConstantColumnVector                                  827            829           2        495.1           2.0       1.0X
+OnHeapColumnVector                                   4931           4932           1         83.1          12.0       0.2X
+OffHeapColumnVector                                  4642           4644           2         88.2          11.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  795            795           1        515.5           1.9       1.0X
-OnHeapColumnVector                                   3453           3453           1        118.6           8.4       0.2X
-OffHeapColumnVector                                  3084           3084           0        132.8           7.5       0.3X
+ConstantColumnVector                                  826            827           1        496.0           2.0       1.0X
+OnHeapColumnVector                                   4908           4921          19         83.5          12.0       0.2X
+OffHeapColumnVector                                  4627           4628           1         88.5          11.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with IntegerType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    6641264.7           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1476254.1           0.0       0.2X
-OffHeapColumnVector                                 762            786          39        537.5           1.9       0.0X
+ConstantColumnVector                               1811           1811           0        226.2           4.4       1.0X
+OnHeapColumnVector                                 2128           2130           4        192.5           5.2       0.9X
+OffHeapColumnVector                                2340           2343           3        175.0           5.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with LongType:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    3321170.8           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1328632.1           0.0       0.4X
-OffHeapColumnVector                                 762            762           0        537.2           1.9       0.0X
+ConstantColumnVector                                  0              0           0    2657221.1           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    1022070.8           0.0       0.4X
+OffHeapColumnVector                                 691            692           1        592.6           1.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with FloatType:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2952306.1           0.0       1.0X
-OnHeapColumnVector                                    0              1           0    1207849.8           0.0       0.4X
-OffHeapColumnVector                                 762            765           2        537.5           1.9       0.0X
+ConstantColumnVector                                  0              0           0    2214485.0           0.0       1.0X
+OnHeapColumnVector                                    0              1           0     949064.3           0.0       0.4X
+OffHeapColumnVector                                 767            769           3        533.8           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test read with DoubleType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0    2214341.3           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    1207885.5           0.0       0.5X
-OffHeapColumnVector                                 890            891           2        460.5           2.2       0.0X
+ConstantColumnVector                                  0              0           0    1022070.8           0.0       1.0X
+OnHeapColumnVector                                    1              1           0     738160.3           0.0       0.7X
+OffHeapColumnVector                                 762            762           0        537.5           1.9       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 1:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1897930.6           0.0       1.0X
-OnHeapColumnVector                                            2249           2251           3        182.1           5.5       0.0X
-OffHeapColumnVector                                           1965           1966           1        208.4           4.8       0.0X
+ConstantColumnVector                                             0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                            3784           3785           2        108.3           9.2       0.0X
+OffHeapColumnVector                                           3768           3782          20        108.7           9.2       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 5:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                             0              0           0    1897930.6           0.0       1.0X
-OnHeapColumnVector                                            2249           2252           4        182.1           5.5       0.0X
-OffHeapColumnVector                                           1976           1980           5        207.2           4.8       0.0X
+ConstantColumnVector                                             0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                            3788           3808          28        108.1           9.2       0.0X
+OffHeapColumnVector                                           3680           3687          10        111.3           9.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 10:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1897948.2           0.0       1.0X
-OnHeapColumnVector                                             2272           2272           1        180.3           5.5       0.0X
-OffHeapColumnVector                                            1980           1989          12        206.9           4.8       0.0X
+ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                             3804           3807           5        107.7           9.3       0.0X
+OffHeapColumnVector                                            3712           3713           1        110.3           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 15:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
-OnHeapColumnVector                                             2267           2273           9        180.7           5.5       0.0X
-OffHeapColumnVector                                            1973           1974           3        207.7           4.8       0.0X
+ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                             3801           3802           2        107.8           9.3       0.0X
+OffHeapColumnVector                                            3704           3704           1        110.6           9.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 20:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
-OnHeapColumnVector                                             2263           2265           4        181.0           5.5       0.0X
-OffHeapColumnVector                                            1979           1981           4        207.0           4.8       0.0X
+ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                             3795           3797           2        107.9           9.3       0.0X
+OffHeapColumnVector                                            3703           3715          16        110.6           9.0       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with StringType, row length = 30:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                              0              0           0    1897939.4           0.0       1.0X
-OnHeapColumnVector                                             2253           2253           0        181.8           5.5       0.0X
-OffHeapColumnVector                                            1966           1969           4        208.4           4.8       0.0X
+ConstantColumnVector                                              0              0           0    1660794.1           0.0       1.0X
+OnHeapColumnVector                                             3794           3797           4        108.0           9.3       0.0X
+OffHeapColumnVector                                            3719           3720           1        110.1           9.1       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with IntegerType:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            762           0        538.1           1.9       1.0X
-OnHeapColumnVector                                  888            891           4        461.3           2.2       0.9X
-OffHeapColumnVector                                 888            889           1        461.3           2.2       0.9X
+ConstantColumnVector                               3673           3675           3        111.5           9.0       1.0X
+OnHeapColumnVector                                 2448           2450           3        167.3           6.0       1.5X
+OffHeapColumnVector                                2585           2585           1        158.5           6.3       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with LongType:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            762           1        538.3           1.9       1.0X
-OnHeapColumnVector                                  763            764           0        536.6           1.9       1.0X
-OffHeapColumnVector                                 764            764           1        536.5           1.9       1.0X
+ConstantColumnVector                                  0              0           0    2657221.1           0.0       1.0X
+OnHeapColumnVector                                  651            652           1        629.3           1.6       0.0X
+OffHeapColumnVector                                 691            692           1        592.4           1.7       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with FloatType:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            764           4        538.0           1.9       1.0X
-OnHeapColumnVector                                  766            767           1        534.9           1.9       1.0X
-OffHeapColumnVector                                 762            763           1        537.6           1.9       1.0X
+ConstantColumnVector                                887            888           1        461.6           2.2       1.0X
+OnHeapColumnVector                                  764            764           0        535.9           1.9       1.2X
+OffHeapColumnVector                                 762            763           1        537.5           1.9       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test write and read with DoubleType:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                761            762           1        538.3           1.9       1.0X
-OnHeapColumnVector                                  889            889           0        460.7           2.2       0.9X
-OffHeapColumnVector                                 890            894           5        460.1           2.2       0.9X
+ConstantColumnVector                                761            761           0        538.5           1.9       1.0X
+OnHeapColumnVector                                  765            765           1        535.7           1.9       1.0X
+OffHeapColumnVector                                 763            763           1        537.2           1.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with StringType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211973.6           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with IntegerType:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with LongType:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with FloatType:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Test isNull with DoubleType:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ConstantColumnVector                                  0              0           0  105648697.4           0.0       1.0X
-OnHeapColumnVector                                    0              0           0    2211949.7           0.0       0.0X
-OffHeapColumnVector                                   0              0           0  105648697.4           0.0       1.0X
+ConstantColumnVector                                  0              0           0  409190809.2           0.0       1.0X
+OnHeapColumnVector                                    0              0           0    2211985.5           0.0       0.0X
+OffHeapColumnVector                                   0              0           0  409190809.2           0.0       1.0X
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
index e03f67ab905b5..bdc453db1735d 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-jdk21-results.txt
@@ -2,437 +2,437 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9728           9736          11          1.6         618.5       1.0X
-SQL Json                                           7883           7959         107          2.0         501.2       1.2X
-SQL Json with UnsafeRow                            8410           8425          21          1.9         534.7       1.2X
-SQL Parquet Vectorized: DataPageV1                   80             94           7        195.5           5.1     120.9X
-SQL Parquet Vectorized: DataPageV2                   93            107           8        169.1           5.9     104.6X
-SQL Parquet MR: DataPageV1                         1767           1774           9          8.9         112.4       5.5X
-SQL Parquet MR: DataPageV2                         1650           1651           1          9.5         104.9       5.9X
-SQL ORC Vectorized                                  120            131           9        131.2           7.6      81.1X
-SQL ORC MR                                         1503           1523          28         10.5          95.6       6.5X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           10214          10246          45          1.5         649.4       1.0X
+SQL Json                                           7831           7865          48          2.0         497.9       1.3X
+SQL Json with UnsafeRow                            8565           8571           8          1.8         544.6       1.2X
+SQL Parquet Vectorized: DataPageV1                   81             96          11        193.3           5.2     125.6X
+SQL Parquet Vectorized: DataPageV2                  201            210           8         78.4          12.8      50.9X
+SQL Parquet MR: DataPageV1                         1794           1818          34          8.8         114.1       5.7X
+SQL Parquet MR: DataPageV2                         1650           1651           1          9.5         104.9       6.2X
+SQL ORC Vectorized                                  120            132           8        130.5           7.7      84.8X
+SQL ORC MR                                         1447           1453           9         10.9          92.0       7.1X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    90             91           1        173.9           5.8       1.0X
-ParquetReader Vectorized: DataPageV2                   109            111           2        144.5           6.9       0.8X
-ParquetReader Vectorized -> Row: DataPageV1             73             74           1        216.2           4.6       1.2X
-ParquetReader Vectorized -> Row: DataPageV2             92             93           1        171.0           5.8       1.0X
+ParquetReader Vectorized: DataPageV1                    84             86           1        187.3           5.3       1.0X
+ParquetReader Vectorized: DataPageV2                   208            211           4         75.7          13.2       0.4X
+ParquetReader Vectorized -> Row: DataPageV1             72             73           1        219.2           4.6       1.2X
+ParquetReader Vectorized -> Row: DataPageV2            199            201           4         79.2          12.6       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            9593           9609          24          1.6         609.9       1.0X
-SQL Json                                           8658           8671          18          1.8         550.5       1.1X
-SQL Json with UnsafeRow                            9473           9473           1          1.7         602.2       1.0X
-SQL Parquet Vectorized: DataPageV1                   95            102           5        165.5           6.0     100.9X
-SQL Parquet Vectorized: DataPageV2                   96            113          13        163.8           6.1      99.9X
-SQL Parquet MR: DataPageV1                         1871           1877           8          8.4         119.0       5.1X
-SQL Parquet MR: DataPageV2                         1864           1879          21          8.4         118.5       5.1X
-SQL ORC Vectorized                                  139            150          11        113.3           8.8      69.1X
-SQL ORC MR                                         1638           1640           3          9.6         104.1       5.9X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            9574           9607          46          1.6         608.7       1.0X
+SQL Json                                           8719           8757          55          1.8         554.3       1.1X
+SQL Json with UnsafeRow                            9120           9130          13          1.7         579.9       1.0X
+SQL Parquet Vectorized: DataPageV1                   95            101           5        164.9           6.1     100.4X
+SQL Parquet Vectorized: DataPageV2                   95            104           8        165.3           6.0     100.6X
+SQL Parquet MR: DataPageV1                         1927           1938          15          8.2         122.5       5.0X
+SQL Parquet MR: DataPageV2                         1792           1851          84          8.8         114.0       5.3X
+SQL ORC Vectorized                                  110            118           7        143.1           7.0      87.1X
+SQL ORC MR                                         1579           1582           4         10.0         100.4       6.1X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    68             71           1        231.7           4.3       1.0X
-ParquetReader Vectorized: DataPageV2                    68             71           2        232.2           4.3       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             44             47           2        354.7           2.8       1.5X
-ParquetReader Vectorized -> Row: DataPageV2             45             46           1        351.5           2.8       1.5X
+ParquetReader Vectorized: DataPageV1                    80             83           2        196.0           5.1       1.0X
+ParquetReader Vectorized: DataPageV2                    81             83           1        194.9           5.1       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             44             46           2        353.7           2.8       1.8X
+ParquetReader Vectorized -> Row: DataPageV2             45             46           1        352.4           2.8       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10289          10295           9          1.5         654.1       1.0X
-SQL Json                                           8947           8950           4          1.8         568.8       1.1X
-SQL Json with UnsafeRow                            9474           9475           2          1.7         602.3       1.1X
-SQL Parquet Vectorized: DataPageV1                  113            125          13        138.8           7.2      90.8X
-SQL Parquet Vectorized: DataPageV2                  135            146          14        116.9           8.6      76.5X
-SQL Parquet MR: DataPageV1                         1985           2001          22          7.9         126.2       5.2X
-SQL Parquet MR: DataPageV2                         1947           1972          36          8.1         123.8       5.3X
-SQL ORC Vectorized                                  140            151          15        112.1           8.9      73.3X
-SQL ORC MR                                         1668           1708          56          9.4         106.1       6.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           10409          10436          39          1.5         661.8       1.0X
+SQL Json                                           8942           8944           4          1.8         568.5       1.2X
+SQL Json with UnsafeRow                            9693           9697           5          1.6         616.3       1.1X
+SQL Parquet Vectorized: DataPageV1                  118            134          17        133.8           7.5      88.5X
+SQL Parquet Vectorized: DataPageV2                  139            152          16        113.5           8.8      75.1X
+SQL Parquet MR: DataPageV1                         2019           2054          50          7.8         128.4       5.2X
+SQL Parquet MR: DataPageV2                         2011           2011           0          7.8         127.9       5.2X
+SQL ORC Vectorized                                  140            148           8        112.1           8.9      74.2X
+SQL ORC MR                                         1818           1825          10          8.7         115.6       5.7X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   156            164           5        100.6           9.9       1.0X
-ParquetReader Vectorized: DataPageV2                   173            188           8         91.0          11.0       0.9X
-ParquetReader Vectorized -> Row: DataPageV1            151            155           4        104.5           9.6       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            182            186           4         86.7          11.5       0.9X
+ParquetReader Vectorized: DataPageV1                   138            146           9        113.8           8.8       1.0X
+ParquetReader Vectorized: DataPageV2                   169            176          10         93.2          10.7       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            134            139           5        117.0           8.5       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            183            186           5         86.1          11.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11381          11468         123          1.4         723.6       1.0X
-SQL Json                                           9302           9306           7          1.7         591.4       1.2X
-SQL Json with UnsafeRow                            9981           9989          12          1.6         634.6       1.1X
-SQL Parquet Vectorized: DataPageV1                  100            115          13        156.7           6.4     113.4X
-SQL Parquet Vectorized: DataPageV2                  180            196          15         87.4          11.4      63.2X
-SQL Parquet MR: DataPageV1                         1959           1971          16          8.0         124.6       5.8X
-SQL Parquet MR: DataPageV2                         1995           2009          19          7.9         126.8       5.7X
-SQL ORC Vectorized                                  176            192          24         89.3          11.2      64.6X
-SQL ORC MR                                         1660           1673          18          9.5         105.6       6.9X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           11428          11435           9          1.4         726.6       1.0X
+SQL Json                                           9048           9074          37          1.7         575.2       1.3X
+SQL Json with UnsafeRow                            9790           9800          14          1.6         622.4       1.2X
+SQL Parquet Vectorized: DataPageV1                   97            110          13        162.2           6.2     117.8X
+SQL Parquet Vectorized: DataPageV2                  176            197          18         89.2          11.2      64.8X
+SQL Parquet MR: DataPageV1                         1974           1978           6          8.0         125.5       5.8X
+SQL Parquet MR: DataPageV2                         2028           2031           5          7.8         128.9       5.6X
+SQL ORC Vectorized                                  177            201          27         89.0          11.2      64.6X
+SQL ORC MR                                         2053           2059           9          7.7         130.5       5.6X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   134            139           5        117.1           8.5       1.0X
-ParquetReader Vectorized: DataPageV2                   229            234           4         68.6          14.6       0.6X
-ParquetReader Vectorized -> Row: DataPageV1            142            145           3        110.8           9.0       0.9X
-ParquetReader Vectorized -> Row: DataPageV2            224            230           5         70.2          14.2       0.6X
+ParquetReader Vectorized: DataPageV1                   158            162           5         99.5          10.0       1.0X
+ParquetReader Vectorized: DataPageV2                   237            248          18         66.4          15.1       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            128            134           7        122.5           8.2       1.2X
+ParquetReader Vectorized -> Row: DataPageV2            209            216           6         75.3          13.3       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11003          11024          30          1.4         699.5       1.0X
-SQL Json                                           9303           9305           2          1.7         591.5       1.2X
-SQL Json with UnsafeRow                            9951           9962          16          1.6         632.7       1.1X
-SQL Parquet Vectorized: DataPageV1                  275            296          16         57.3          17.5      40.1X
-SQL Parquet Vectorized: DataPageV2                  275            287          11         57.3          17.5      40.1X
-SQL Parquet MR: DataPageV1                         2535           2546          15          6.2         161.2       4.3X
-SQL Parquet MR: DataPageV2                         2089           2116          38          7.5         132.8       5.3X
-SQL ORC Vectorized                                  163            172          12         96.6          10.4      67.6X
-SQL ORC MR                                         1731           1745          20          9.1         110.0       6.4X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           11758          11763           8          1.3         747.6       1.0X
+SQL Json                                           9255           9264          12          1.7         588.4       1.3X
+SQL Json with UnsafeRow                            9871           9876           6          1.6         627.6       1.2X
+SQL Parquet Vectorized: DataPageV1                  286            308          13         54.9          18.2      41.1X
+SQL Parquet Vectorized: DataPageV2                  238            269          14         66.0          15.2      49.3X
+SQL Parquet MR: DataPageV1                         2493           2494           1          6.3         158.5       4.7X
+SQL Parquet MR: DataPageV2                         2053           2054           2          7.7         130.5       5.7X
+SQL ORC Vectorized                                  165            174          10         95.5          10.5      71.4X
+SQL ORC MR                                         1821           1822           1          8.6         115.8       6.5X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   308            314           4         51.0          19.6       1.0X
-ParquetReader Vectorized: DataPageV2                   278            287           9         56.5          17.7       1.1X
-ParquetReader Vectorized -> Row: DataPageV1            316            323           6         49.7          20.1       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            258            276           9         60.9          16.4       1.2X
+ParquetReader Vectorized: DataPageV1                   305            313          10         51.6          19.4       1.0X
+ParquetReader Vectorized: DataPageV2                   258            270          15         60.8          16.4       1.2X
+ParquetReader Vectorized -> Row: DataPageV1            317            319           3         49.6          20.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            254            268           9         61.9          16.2       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11484          11544          84          1.4         730.2       1.0X
-SQL Json                                          10930          10957          38          1.4         694.9       1.1X
-SQL Json with UnsafeRow                           11604          11605           2          1.4         737.8       1.0X
-SQL Parquet Vectorized: DataPageV1                  110            134           9        142.3           7.0     103.9X
-SQL Parquet Vectorized: DataPageV2                   85            119          20        185.0           5.4     135.0X
-SQL Parquet MR: DataPageV1                         2069           2077          12          7.6         131.6       5.6X
-SQL Parquet MR: DataPageV2                         1981           1988          10          7.9         126.0       5.8X
-SQL ORC Vectorized                                  236            250          17         66.7          15.0      48.7X
-SQL ORC MR                                         1767           1768           3          8.9         112.3       6.5X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           11470          11490          28          1.4         729.3       1.0X
+SQL Json                                          10456          10495          56          1.5         664.8       1.1X
+SQL Json with UnsafeRow                           11508          11514          10          1.4         731.6       1.0X
+SQL Parquet Vectorized: DataPageV1                   85            101          17        185.0           5.4     134.9X
+SQL Parquet Vectorized: DataPageV2                   84             96          12        187.7           5.3     136.9X
+SQL Parquet MR: DataPageV1                         2003           2039          51          7.9         127.3       5.7X
+SQL Parquet MR: DataPageV2                         1969           1969           1          8.0         125.2       5.8X
+SQL ORC Vectorized                                  239            248          14         65.9          15.2      48.0X
+SQL ORC MR                                         1782           1791          13          8.8         113.3       6.4X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   136            138           2        115.6           8.6       1.0X
-ParquetReader Vectorized: DataPageV2                   134            152           9        117.4           8.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            129            133           5        122.0           8.2       1.1X
-ParquetReader Vectorized -> Row: DataPageV2            143            149           7        109.7           9.1       0.9X
+ParquetReader Vectorized: DataPageV1                   145            153          12        108.7           9.2       1.0X
+ParquetReader Vectorized: DataPageV2                   143            149           7        110.0           9.1       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            136            143           8        115.2           8.7       1.1X
+ParquetReader Vectorized -> Row: DataPageV2            135            141           6        116.3           8.6       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11706          11860         218          1.3         744.2       1.0X
-SQL Json                                          11600          11609          13          1.4         737.5       1.0X
-SQL Json with UnsafeRow                           12254          12258           5          1.3         779.1       1.0X
-SQL Parquet Vectorized: DataPageV1                  267            290          17         58.9          17.0      43.8X
-SQL Parquet Vectorized: DataPageV2                  272            288          13         57.8          17.3      43.1X
-SQL Parquet MR: DataPageV1                         2470           2478          10          6.4         157.1       4.7X
-SQL Parquet MR: DataPageV2                         2418           2419           2          6.5         153.7       4.8X
-SQL ORC Vectorized                                  575            580           7         27.3          36.6      20.4X
-SQL ORC MR                                         2173           2185          17          7.2         138.1       5.4X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           11799          11829          43          1.3         750.1       1.0X
+SQL Json                                          11125          11128           3          1.4         707.3       1.1X
+SQL Json with UnsafeRow                           11800          11815          22          1.3         750.2       1.0X
+SQL Parquet Vectorized: DataPageV1                  266            288          20         59.1          16.9      44.4X
+SQL Parquet Vectorized: DataPageV2                  263            286          14         59.7          16.8      44.8X
+SQL Parquet MR: DataPageV1                         2457           2472          22          6.4         156.2       4.8X
+SQL Parquet MR: DataPageV2                         2414           2423          13          6.5         153.5       4.9X
+SQL ORC Vectorized                                  576            581           9         27.3          36.6      20.5X
+SQL ORC MR                                         2192           2197           7          7.2         139.4       5.4X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   319            339          17         49.3          20.3       1.0X
-ParquetReader Vectorized: DataPageV2                   327            331           6         48.2          20.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            322            327           5         48.8          20.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            324            330           7         48.6          20.6       1.0X
+ParquetReader Vectorized: DataPageV1                   341            346           6         46.2          21.6       1.0X
+ParquetReader Vectorized: DataPageV2                   351            358           5         44.8          22.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            324            331           6         48.5          20.6       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            323            326           4         48.7          20.5       1.1X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2120           2193         103          7.4         134.8       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2119           2170          71          7.4         134.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             148            155           9        106.1           9.4      14.3X
-SQL Parquet MR: DataPageV1                                            2352           2428         108          6.7         149.5       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2712           2736          34          5.8         172.4       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             106            118          14        148.4           6.7      20.0X
-SQL Parquet MR: DataPageV2                                            2359           2364           6          6.7         150.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2661           2670          12          5.9         169.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             145            161          12        108.4           9.2      14.6X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2295           2333          53          6.9         145.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2261           2268          10          7.0         143.8       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             128            136          11        122.7           8.2      17.9X
+SQL Parquet MR: DataPageV1                                            2378           2387          13          6.6         151.2       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2801           2804           5          5.6         178.1       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             103            119          19        152.5           6.6      22.3X
+SQL Parquet MR: DataPageV2                                            2295           2312          25          6.9         145.9       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2669           2679          14          5.9         169.7       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             104            114          13        150.9           6.6      22.0X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2099           2100           1          7.5         133.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2112           2131          27          7.4         134.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             274            286          19         57.4          17.4       7.7X
-SQL Parquet MR: DataPageV1                                            2431           2432           2          6.5         154.6       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3258           3266          12          4.8         207.1       0.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             131            133           1        120.3           8.3      16.1X
-SQL Parquet MR: DataPageV2                                            2400           2419          27          6.6         152.6       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3188           3199          16          4.9         202.7       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             163            176          14         96.5          10.4      12.9X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2265           2302          52          6.9         144.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2221           2276          78          7.1         141.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             261            274          21         60.3          16.6       8.7X
+SQL Parquet MR: DataPageV1                                            2435           2440           6          6.5         154.8       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2972           2982          15          5.3         188.9       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             110            121          14        143.6           7.0      20.7X
+SQL Parquet MR: DataPageV2                                            2429           2437          12          6.5         154.4       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2882           2884           4          5.5         183.2       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             142            160          21        110.5           9.0      15.9X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2190           2206          22          7.2         139.3       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2227           2246          27          7.1         141.6       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             299            307          10         52.6          19.0       7.3X
-SQL Parquet MR: DataPageV1                                            2553           2556           4          6.2         162.3       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2928           2930           4          5.4         186.1       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             116            136          21        135.3           7.4      18.8X
-SQL Parquet MR: DataPageV2                                            2508           2513           8          6.3         159.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2991           2996           6          5.3         190.2       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             281            287           6         55.9          17.9       7.8X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2182           2205          32          7.2         138.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2192           2223          45          7.2         139.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             284            293          14         55.4          18.1       7.7X
+SQL Parquet MR: DataPageV1                                            2445           2464          26          6.4         155.4       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3037           3038           2          5.2         193.1       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             113            116           2        138.7           7.2      19.2X
+SQL Parquet MR: DataPageV2                                            2437           2448          17          6.5         154.9       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3036           3037           1          5.2         193.0       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             265            271           5         59.4          16.8       8.2X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2177           2268         129          7.2         138.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2218           2222           6          7.1         141.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             301            312          16         52.2          19.1       7.2X
-SQL Parquet MR: DataPageV1                                            2806           2814          12          5.6         178.4       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3519           3528          12          4.5         223.7       0.6X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             329            333           4         47.8          20.9       6.6X
-SQL Parquet MR: DataPageV2                                            2412           2434          31          6.5         153.4       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3047           3066          27          5.2         193.7       0.7X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             266            303          18         59.1          16.9       8.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2185           2193          12          7.2         138.9       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2202           2216          19          7.1         140.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             283            298          14         55.5          18.0       7.7X
+SQL Parquet MR: DataPageV1                                            2872           2882          14          5.5         182.6       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3376           3392          23          4.7         214.7       0.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             320            329           8         49.2          20.3       6.8X
+SQL Parquet MR: DataPageV2                                            2512           2518           9          6.3         159.7       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3009           3010           2          5.2         191.3       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             291            298          11         54.1          18.5       7.5X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2286           2306          28          6.9         145.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2308           2324          24          6.8         146.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             355            372          20         44.4          22.5       6.4X
-SQL Parquet MR: DataPageV1                                            2513           2521          11          6.3         159.8       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2957           2979          31          5.3         188.0       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             100            107           6        156.8           6.4      22.8X
-SQL Parquet MR: DataPageV2                                            2465           2476          17          6.4         156.7       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2923           2937          19          5.4         185.9       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             110            115           5        142.6           7.0      20.7X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2205           2207           4          7.1         140.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2238           2243           7          7.0         142.3       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             346            374          27         45.5          22.0       6.4X
+SQL Parquet MR: DataPageV1                                            2463           2465           2          6.4         156.6       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3059           3060           2          5.1         194.5       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              91            109          17        172.3           5.8      24.2X
+SQL Parquet MR: DataPageV2                                            2419           2446          37          6.5         153.8       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3078           3084           9          5.1         195.7       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              91            108          16        172.9           5.8      24.2X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2635           2636           2          6.0         167.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2617           2652          50          6.0         166.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             708            718          11         22.2          45.0       3.7X
-SQL Parquet MR: DataPageV1                                            2892           2893           0          5.4         183.9       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3227           3234           9          4.9         205.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             312            326          18         50.4          19.8       8.4X
-SQL Parquet MR: DataPageV2                                            2808           2809           1          5.6         178.5       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3184           3190           8          4.9         202.5       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             278            314          26         56.6          17.7       9.5X
+SQL ORC MR                                                            2639           2643           6          6.0         167.8       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2676           2677           1          5.9         170.1       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             700            703           4         22.5          44.5       3.8X
+SQL Parquet MR: DataPageV1                                            2949           2962          17          5.3         187.5       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3307           3315          12          4.8         210.2       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             310            324          17         50.7          19.7       8.5X
+SQL Parquet MR: DataPageV2                                            2785           2810          36          5.6         177.0       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3269           3269           1          4.8         207.8       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             310            324          11         50.7          19.7       8.5X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           12644          12768          64          0.1       12058.1       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          12692          12849         156          0.1       12104.4       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7121           7151          18          0.1        6791.0       1.8X
-SQL Parquet MR: DataPageV1                                            9395           9657         344          0.1        8959.8       1.3X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9651           9737          64          0.1        9203.9       1.3X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            6027           6091          31          0.2        5748.1       2.1X
-SQL Parquet MR: DataPageV2                                            9789          10011         235          0.1        9335.6       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10103          10325         198          0.1        9634.9       1.3X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5717           5737          15          0.2        5451.9       2.2X
+SQL ORC MR                                                           12995          13153         131          0.1       12393.4       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          13011          13181         142          0.1       12408.4       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7084           7096          11          0.1        6755.6       1.8X
+SQL Parquet MR: DataPageV1                                            9427           9453          27          0.1        8990.6       1.4X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9722           9802          39          0.1        9271.2       1.3X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5931           6030          41          0.2        5656.2       2.2X
+SQL Parquet MR: DataPageV2                                            9704           9744          59          0.1        9254.3       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10391          10496          55          0.1        9909.7       1.3X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5687           5729          23          0.2        5423.2       2.3X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10032          10142         155          1.0         956.7       1.0X
-SQL Json                                          10411          10433          31          1.0         992.9       1.0X
-SQL Parquet Vectorized: DataPageV1                 1695           1712          24          6.2         161.7       5.9X
-SQL Parquet Vectorized: DataPageV2                 1914           1924          15          5.5         182.5       5.2X
-SQL Parquet MR: DataPageV1                         3927           3951          34          2.7         374.5       2.6X
-SQL Parquet MR: DataPageV2                         3890           3905          21          2.7         371.0       2.6X
-SQL ORC Vectorized                                 1738           1784          65          6.0         165.7       5.8X
-SQL ORC MR                                         3380           3387          11          3.1         322.3       3.0X
+SQL CSV                                           10241          10290          70          1.0         976.6       1.0X
+SQL Json                                           9827           9840          19          1.1         937.1       1.0X
+SQL Parquet Vectorized: DataPageV1                 1711           1736          35          6.1         163.2       6.0X
+SQL Parquet Vectorized: DataPageV2                 1912           1916           6          5.5         182.3       5.4X
+SQL Parquet MR: DataPageV1                         4027           4028           1          2.6         384.1       2.5X
+SQL Parquet MR: DataPageV2                         3967           3967           1          2.6         378.3       2.6X
+SQL ORC Vectorized                                 1819           1845          37          5.8         173.5       5.6X
+SQL ORC MR                                         3460           3468          11          3.0         330.0       3.0X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5964           5978          20          1.8         568.8       1.0X
-SQL Json                                           6741           6752          15          1.6         642.9       0.9X
-SQL Parquet Vectorized: DataPageV1                  431            433           3         24.3          41.1      13.8X
-SQL Parquet Vectorized: DataPageV2                  427            435          13         24.6          40.7      14.0X
-SQL Parquet MR: DataPageV1                         1557           1576          28          6.7         148.4       3.8X
-SQL Parquet MR: DataPageV2                         1499           1518          27          7.0         143.0       4.0X
-SQL ORC Vectorized                                  374            380           9         28.0          35.7      16.0X
-SQL ORC MR                                         1611           1616           7          6.5         153.7       3.7X
+SQL CSV                                            5689           5724          49          1.8         542.6       1.0X
+SQL Json                                           6157           6173          22          1.7         587.1       0.9X
+SQL Parquet Vectorized: DataPageV1                  465            470           4         22.5          44.4      12.2X
+SQL Parquet Vectorized: DataPageV2                  459            460           1         22.8          43.8      12.4X
+SQL Parquet MR: DataPageV1                         1551           1558          10          6.8         147.9       3.7X
+SQL Parquet MR: DataPageV2                         1501           1506           7          7.0         143.2       3.8X
+SQL ORC Vectorized                                  366            369           3         28.7          34.9      15.5X
+SQL ORC MR                                         1703           1740          51          6.2         162.4       3.3X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          11247          11259          17          1.4         715.0       1.0X
-Data column - Json                                          9667           9668           2          1.6         614.6       1.2X
-Data column - Parquet Vectorized: DataPageV1                 108            120          10        145.9           6.9     104.3X
-Data column - Parquet Vectorized: DataPageV2                 225            241          14         70.0          14.3      50.0X
-Data column - Parquet MR: DataPageV1                        2495           2496           2          6.3         158.6       4.5X
-Data column - Parquet MR: DataPageV2                        2492           2508          24          6.3         158.4       4.5X
-Data column - ORC Vectorized                                 178            201          25         88.6          11.3      63.3X
-Data column - ORC MR                                        2099           2121          32          7.5         133.4       5.4X
-Partition column - CSV                                      3830           3831           2          4.1         243.5       2.9X
-Partition column - Json                                     9238           9241           4          1.7         587.4       1.2X
-Partition column - Parquet Vectorized: DataPageV1             30             42          11        533.0           1.9     381.1X
-Partition column - Parquet Vectorized: DataPageV2             28             34           7        563.1           1.8     402.7X
-Partition column - Parquet MR: DataPageV1                   1218           1240          30         12.9          77.5       9.2X
-Partition column - Parquet MR: DataPageV2                   1214           1224          14         13.0          77.2       9.3X
-Partition column - ORC Vectorized                             28             34           6        554.3           1.8     396.3X
-Partition column - ORC MR                                   1349           1361          16         11.7          85.8       8.3X
-Both columns - CSV                                         11136          11146          15          1.4         708.0       1.0X
-Both columns - Json                                        10111          10125          20          1.6         642.8       1.1X
-Both columns - Parquet Vectorized: DataPageV1                153            165          14        102.6           9.7      73.4X
-Both columns - Parquet Vectorized: DataPageV2                276            320          37         57.0          17.5      40.8X
-Both columns - Parquet MR: DataPageV1                       2626           2649          33          6.0         167.0       4.3X
-Both columns - Parquet MR: DataPageV2                       2559           2589          42          6.1         162.7       4.4X
-Both columns - ORC Vectorized                                193            203          13         81.5          12.3      58.3X
-Both columns - ORC MR                                       2224           2260          50          7.1         141.4       5.1X
+Data column - CSV                                          10920          10985          92          1.4         694.2       1.0X
+Data column - Json                                          9064           9065           2          1.7         576.3       1.2X
+Data column - Parquet Vectorized: DataPageV1                 117            124           6        134.6           7.4      93.5X
+Data column - Parquet Vectorized: DataPageV2                 223            239          14         70.5          14.2      49.0X
+Data column - Parquet MR: DataPageV1                        2287           2295          12          6.9         145.4       4.8X
+Data column - Parquet MR: DataPageV2                        2302           2305           4          6.8         146.4       4.7X
+Data column - ORC Vectorized                                 179            191          20         87.9          11.4      61.0X
+Data column - ORC MR                                        2135           2161          36          7.4         135.8       5.1X
+Partition column - CSV                                      3806           3806           0          4.1         242.0       2.9X
+Partition column - Json                                     8340           8352          16          1.9         530.3       1.3X
+Partition column - Parquet Vectorized: DataPageV1             30             34           6        529.7           1.9     367.7X
+Partition column - Parquet Vectorized: DataPageV2             29             34           6        549.7           1.8     381.6X
+Partition column - Parquet MR: DataPageV1                   1425           1435          14         11.0          90.6       7.7X
+Partition column - Parquet MR: DataPageV2                   1414           1428          20         11.1          89.9       7.7X
+Partition column - ORC Vectorized                             30             33           5        525.5           1.9     364.8X
+Partition column - ORC MR                                   1284           1293          13         12.3          81.6       8.5X
+Both columns - CSV                                         11211          11232          30          1.4         712.8       1.0X
+Both columns - Json                                         9167           9184          24          1.7         582.8       1.2X
+Both columns - Parquet Vectorized: DataPageV1                153            167          13        102.5           9.8      71.2X
+Both columns - Parquet Vectorized: DataPageV2                267            298          31         58.8          17.0      40.8X
+Both columns - Parquet MR: DataPageV1                       2567           2611          62          6.1         163.2       4.3X
+Both columns - Parquet MR: DataPageV2                       2647           2659          17          5.9         168.3       4.1X
+Both columns - ORC Vectorized                                178            200          26         88.3          11.3      61.3X
+Both columns - ORC MR                                       2119           2131          17          7.4         134.7       5.2X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7230           7323         132          1.5         689.5       1.0X
-SQL Json                                           9219           9228          12          1.1         879.2       0.8X
-SQL Parquet Vectorized: DataPageV1                 1100           1130          42          9.5         104.9       6.6X
-SQL Parquet Vectorized: DataPageV2                 1394           1403          12          7.5         133.0       5.2X
-SQL Parquet MR: DataPageV1                         3277           3284          10          3.2         312.5       2.2X
-SQL Parquet MR: DataPageV2                         3383           3390          10          3.1         322.6       2.1X
-ParquetReader Vectorized: DataPageV1                751            760           8         14.0          71.6       9.6X
-ParquetReader Vectorized: DataPageV2               1055           1075          28          9.9         100.6       6.9X
-SQL ORC Vectorized                                  758            774          25         13.8          72.3       9.5X
-SQL ORC MR                                         2768           2783          22          3.8         264.0       2.6X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            7385           7393          11          1.4         704.3       1.0X
+SQL Json                                           8624           8638          20          1.2         822.4       0.9X
+SQL Parquet Vectorized: DataPageV1                 1123           1130          10          9.3         107.1       6.6X
+SQL Parquet Vectorized: DataPageV2                 1398           1403           6          7.5         133.3       5.3X
+SQL Parquet MR: DataPageV1                         3770           3795          35          2.8         359.6       2.0X
+SQL Parquet MR: DataPageV2                         3738           3769          43          2.8         356.5       2.0X
+ParquetReader Vectorized: DataPageV1                753            760           7         13.9          71.8       9.8X
+ParquetReader Vectorized: DataPageV2               1084           1095          16          9.7         103.3       6.8X
+SQL ORC Vectorized                                  818            836          23         12.8          78.1       9.0X
+SQL ORC MR                                         2885           2904          27          3.6         275.1       2.6X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5476           5500          33          1.9         522.2       1.0X
-SQL Json                                           7780           7781           2          1.3         742.0       0.7X
-SQL Parquet Vectorized: DataPageV1                  709            735          30         14.8          67.6       7.7X
-SQL Parquet Vectorized: DataPageV2                  920            942          32         11.4          87.8       6.0X
-SQL Parquet MR: DataPageV1                         2805           2814          13          3.7         267.5       2.0X
-SQL Parquet MR: DataPageV2                         2849           2903          76          3.7         271.7       1.9X
-ParquetReader Vectorized: DataPageV1                691            692           2         15.2          65.9       7.9X
-ParquetReader Vectorized: DataPageV2                915            919           4         11.5          87.2       6.0X
-SQL ORC Vectorized                                  946            973          24         11.1          90.3       5.8X
-SQL ORC MR                                         2749           2749           0          3.8         262.2       2.0X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            5899           5900           2          1.8         562.6       1.0X
+SQL Json                                           7189           7199          14          1.5         685.6       0.8X
+SQL Parquet Vectorized: DataPageV1                  737            756          22         14.2          70.3       8.0X
+SQL Parquet Vectorized: DataPageV2                 1004           1035          45         10.4          95.7       5.9X
+SQL Parquet MR: DataPageV1                         2744           2752          12          3.8         261.6       2.2X
+SQL Parquet MR: DataPageV2                         2917           2923           8          3.6         278.2       2.0X
+ParquetReader Vectorized: DataPageV1                719            734          19         14.6          68.6       8.2X
+ParquetReader Vectorized: DataPageV2                950            957          12         11.0          90.6       6.2X
+SQL ORC Vectorized                                  986           1002          22         10.6          94.1       6.0X
+SQL ORC MR                                         2840           2866          36          3.7         270.9       2.1X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4258           4276          24          2.5         406.1       1.0X
-SQL Json                                           5410           5416           9          1.9         516.0       0.8X
-SQL Parquet Vectorized: DataPageV1                  151            155           5         69.3          14.4      28.2X
-SQL Parquet Vectorized: DataPageV2                  179            184           5         58.6          17.1      23.8X
-SQL Parquet MR: DataPageV1                         1612           1657          63          6.5         153.8       2.6X
-SQL Parquet MR: DataPageV2                         1585           1590           8          6.6         151.1       2.7X
-ParquetReader Vectorized: DataPageV1                163            166           4         64.5          15.5      26.2X
-ParquetReader Vectorized: DataPageV2                191            193           1         54.8          18.2      22.3X
-SQL ORC Vectorized                                  301            310           9         34.9          28.7      14.2X
-SQL ORC MR                                         1542           1543           1          6.8         147.1       2.8X
+SQL CSV                                            3951           3956           7          2.7         376.8       1.0X
+SQL Json                                           4888           4888           1          2.1         466.1       0.8X
+SQL Parquet Vectorized: DataPageV1                  173            193          11         60.5          16.5      22.8X
+SQL Parquet Vectorized: DataPageV2                  194            199           3         54.0          18.5      20.3X
+SQL Parquet MR: DataPageV1                         1666           1672           8          6.3         158.9       2.4X
+SQL Parquet MR: DataPageV2                         1626           1633          10          6.5         155.0       2.4X
+ParquetReader Vectorized: DataPageV1                174            178           5         60.2          16.6      22.7X
+ParquetReader Vectorized: DataPageV2                201            203           2         52.1          19.2      19.6X
+SQL ORC Vectorized                                  328            331           4         32.0          31.2      12.1X
+SQL ORC MR                                         1633           1636           3          6.4         155.8       2.4X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1341           1342           1          0.8        1278.9       1.0X
-SQL Json                                           1749           1760          16          0.6        1667.7       0.8X
-SQL Parquet Vectorized: DataPageV1                   23             28           6         45.1          22.2      57.6X
-SQL Parquet Vectorized: DataPageV2                   31             36           6         34.3          29.1      43.9X
-SQL Parquet MR: DataPageV1                          153            161           7          6.9         145.7       8.8X
-SQL Parquet MR: DataPageV2                          151            160           6          6.9         144.3       8.9X
-SQL ORC Vectorized                                   27             31           6         39.3          25.5      50.2X
-SQL ORC MR                                          131            138           9          8.0         124.5      10.3X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            1259           1261           2          0.8        1201.0       1.0X
+SQL Json                                           1688           1695           9          0.6        1610.1       0.7X
+SQL Parquet Vectorized: DataPageV1                   24             29           6         43.9          22.8      52.7X
+SQL Parquet Vectorized: DataPageV2                   32             36           6         32.8          30.5      39.4X
+SQL Parquet MR: DataPageV1                          169            176           6          6.2         161.2       7.5X
+SQL Parquet MR: DataPageV2                          157            165           7          6.7         149.6       8.0X
+SQL ORC Vectorized                                   29             35           6         36.2          27.6      43.5X
+SQL ORC MR                                          132            140           6          7.9         126.2       9.5X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2706           2724          26          0.4        2580.6       1.0X
-SQL Json                                           5370           5376           9          0.2        5121.3       0.5X
-SQL Parquet Vectorized: DataPageV1                   27             32           6         39.2          25.5     101.2X
-SQL Parquet Vectorized: DataPageV2                   34             40           7         30.5          32.8      78.6X
-SQL Parquet MR: DataPageV1                          156            162           6          6.7         149.0      17.3X
-SQL Parquet MR: DataPageV2                          155            166           8          6.8         147.9      17.5X
-SQL ORC Vectorized                                   30             34           5         35.2          28.4      90.9X
-SQL ORC MR                                          134            144           9          7.8         128.0      20.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            2656           2659           4          0.4        2533.4       1.0X
+SQL Json                                           6186           6199          19          0.2        5899.5       0.4X
+SQL Parquet Vectorized: DataPageV1                   27             33           7         39.1          25.6      99.1X
+SQL Parquet Vectorized: DataPageV2                   35             40           6         30.3          33.0      76.9X
+SQL Parquet MR: DataPageV1                          170            176           6          6.2         162.4      15.6X
+SQL Parquet MR: DataPageV2                          163            173          10          6.5         155.0      16.3X
+SQL ORC Vectorized                                   33             38           6         32.3          31.0      81.7X
+SQL ORC MR                                          137            145           8          7.7         130.4      19.4X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4390           4395           7          0.2        4187.0       1.0X
-SQL Json                                           9854           9936         116          0.1        9397.6       0.4X
-SQL Parquet Vectorized: DataPageV1                   34             39           7         30.7          32.5     128.7X
-SQL Parquet Vectorized: DataPageV2                   42             46           6         25.1          39.8     105.2X
-SQL Parquet MR: DataPageV1                          167            176           9          6.3         159.0      26.3X
-SQL Parquet MR: DataPageV2                          164            174           9          6.4         156.4      26.8X
-SQL ORC Vectorized                                   36             40           6         29.0          34.4     121.5X
-SQL ORC MR                                          141            147           7          7.4         134.3      31.2X
+SQL CSV                                            4422           4439          25          0.2        4217.1       1.0X
+SQL Json                                          11222          11248          37          0.1       10702.2       0.4X
+SQL Parquet Vectorized: DataPageV1                   35             41           6         30.2          33.1     127.5X
+SQL Parquet Vectorized: DataPageV2                   42             46           6         25.0          40.0     105.5X
+SQL Parquet MR: DataPageV1                          182            191           8          5.8         173.8      24.3X
+SQL Parquet MR: DataPageV2                          182            185           2          5.8         173.6      24.3X
+SQL ORC Vectorized                                   39             44           5         27.0          37.0     114.0X
+SQL ORC MR                                          148            159           6          7.1         141.2      29.9X
 
 
diff --git a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
index 09f7b9d337712..d2180ecb771d5 100644
--- a/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
+++ b/sql/core/benchmarks/DataSourceReadBenchmark-results.txt
@@ -2,437 +2,437 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BOOLEAN Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10854          10862          12          1.4         690.1       1.0X
-SQL Json                                           8728           8896         238          1.8         554.9       1.2X
-SQL Json with UnsafeRow                            9797           9841          62          1.6         622.9       1.1X
-SQL Parquet Vectorized: DataPageV1                  105            119           8        149.2           6.7     103.0X
-SQL Parquet Vectorized: DataPageV2                  108            115           6        146.2           6.8     100.9X
-SQL Parquet MR: DataPageV1                         1861           1872          16          8.5         118.3       5.8X
-SQL Parquet MR: DataPageV2                         1770           1771           1          8.9         112.5       6.1X
-SQL ORC Vectorized                                  147            154           3        107.2           9.3      74.0X
-SQL ORC MR                                         1650           1650           0          9.5         104.9       6.6X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           10580          10590          15          1.5         672.6       1.0X
+SQL Json                                           8244           8399         219          1.9         524.2       1.3X
+SQL Json with UnsafeRow                            9338           9354          22          1.7         593.7       1.1X
+SQL Parquet Vectorized: DataPageV1                  103            117           7        152.2           6.6     102.4X
+SQL Parquet Vectorized: DataPageV2                  105            116           8        149.7           6.7     100.7X
+SQL Parquet MR: DataPageV1                         1871           1932          87          8.4         118.9       5.7X
+SQL Parquet MR: DataPageV2                         1762           1767           8          8.9         112.0       6.0X
+SQL ORC Vectorized                                  142            151           6        110.8           9.0      74.5X
+SQL ORC MR                                         1697           1702           7          9.3         107.9       6.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BOOLEAN Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    96             97           1        163.7           6.1       1.0X
-ParquetReader Vectorized: DataPageV2                   102            104           4        154.4           6.5       0.9X
-ParquetReader Vectorized -> Row: DataPageV1             75             77           1        208.5           4.8       1.3X
-ParquetReader Vectorized -> Row: DataPageV2             82             83           2        192.8           5.2       1.2X
+ParquetReader Vectorized: DataPageV1                    94             96           3        167.8           6.0       1.0X
+ParquetReader Vectorized: DataPageV2                   101            103           1        155.0           6.4       0.9X
+ParquetReader Vectorized -> Row: DataPageV1             74             76           2        211.8           4.7       1.3X
+ParquetReader Vectorized -> Row: DataPageV2             83             84           2        190.4           5.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10361          10395          48          1.5         658.7       1.0X
-SQL Json                                           9825           9848          32          1.6         624.7       1.1X
-SQL Json with UnsafeRow                           10692          10700          11          1.5         679.8       1.0X
-SQL Parquet Vectorized: DataPageV1                  108            115           6        145.6           6.9      95.9X
-SQL Parquet Vectorized: DataPageV2                  106            115           6        147.9           6.8      97.4X
-SQL Parquet MR: DataPageV1                         1924           1937          18          8.2         122.4       5.4X
-SQL Parquet MR: DataPageV2                         1841           1858          25          8.5         117.0       5.6X
-SQL ORC Vectorized                                  113            117           4        138.8           7.2      91.4X
-SQL ORC MR                                         1554           1564          14         10.1          98.8       6.7X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           11731          11783          74          1.3         745.8       1.0X
+SQL Json                                           9315           9364          69          1.7         592.2       1.3X
+SQL Json with UnsafeRow                           10241          10246           7          1.5         651.1       1.1X
+SQL Parquet Vectorized: DataPageV1                  113            120           7        138.9           7.2     103.6X
+SQL Parquet Vectorized: DataPageV2                  111            118           6        142.1           7.0     106.0X
+SQL Parquet MR: DataPageV1                         1992           2010          26          7.9         126.6       5.9X
+SQL Parquet MR: DataPageV2                         1918           1939          29          8.2         122.0       6.1X
+SQL ORC Vectorized                                  112            120           6        139.9           7.1     104.4X
+SQL ORC MR                                         1643           1647           5          9.6         104.5       7.1X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single TINYINT Column Scan:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                    85             88           4        185.9           5.4       1.0X
-ParquetReader Vectorized: DataPageV2                    84             86           2        186.5           5.4       1.0X
-ParquetReader Vectorized -> Row: DataPageV1             62             64           1        252.7           4.0       1.4X
-ParquetReader Vectorized -> Row: DataPageV2             62             63           1        253.9           3.9       1.4X
+ParquetReader Vectorized: DataPageV1                    83             85           2        190.1           5.3       1.0X
+ParquetReader Vectorized: DataPageV2                    83             84           2        189.8           5.3       1.0X
+ParquetReader Vectorized -> Row: DataPageV1             62             63           1        254.3           3.9       1.3X
+ParquetReader Vectorized -> Row: DataPageV2             62             64           2        253.5           3.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           10958          10970          18          1.4         696.7       1.0X
-SQL Json                                          10164          10169           7          1.5         646.2       1.1X
-SQL Json with UnsafeRow                           11113          11137          33          1.4         706.5       1.0X
-SQL Parquet Vectorized: DataPageV1                  110            116           6        142.8           7.0      99.5X
-SQL Parquet Vectorized: DataPageV2                  131            136           3        120.3           8.3      83.8X
-SQL Parquet MR: DataPageV1                         2110           2116           8          7.5         134.2       5.2X
-SQL Parquet MR: DataPageV2                         2044           2061          23          7.7         130.0       5.4X
-SQL ORC Vectorized                                  130            137           5        121.2           8.2      84.5X
-SQL ORC MR                                         1813           1834          31          8.7         115.2       6.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           12442          12467          35          1.3         791.1       1.0X
+SQL Json                                           9536           9578          58          1.6         606.3       1.3X
+SQL Json with UnsafeRow                           10484          10484           1          1.5         666.5       1.2X
+SQL Parquet Vectorized: DataPageV1                  110            115           3        142.4           7.0     112.6X
+SQL Parquet Vectorized: DataPageV2                  139            144           5        112.9           8.9      89.3X
+SQL Parquet MR: DataPageV1                         2082           2122          57          7.6         132.4       6.0X
+SQL Parquet MR: DataPageV2                         2050           2071          30          7.7         130.3       6.1X
+SQL ORC Vectorized                                  143            148           4        110.2           9.1      87.2X
+SQL ORC MR                                         1722           1723           1          9.1         109.5       7.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single SMALLINT Column Scan:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   155            158           2        101.3           9.9       1.0X
-ParquetReader Vectorized: DataPageV2                   172            174           3         91.4          10.9       0.9X
-ParquetReader Vectorized -> Row: DataPageV1            148            150           2        106.0           9.4       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            165            166           1         95.5          10.5       0.9X
+ParquetReader Vectorized: DataPageV1                   140            144           3        112.3           8.9       1.0X
+ParquetReader Vectorized: DataPageV2                   168            170           2         93.8          10.7       0.8X
+ParquetReader Vectorized -> Row: DataPageV1            138            140           3        114.1           8.8       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            166            167           2         95.0          10.5       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12062          12063           1          1.3         766.9       1.0X
-SQL Json                                          10430          10455          36          1.5         663.1       1.2X
-SQL Json with UnsafeRow                           11379          11381           4          1.4         723.5       1.1X
-SQL Parquet Vectorized: DataPageV1                  102            108           4        154.0           6.5     118.1X
-SQL Parquet Vectorized: DataPageV2                  175            181           4         90.1          11.1      69.1X
-SQL Parquet MR: DataPageV1                         2106           2117          16          7.5         133.9       5.7X
-SQL Parquet MR: DataPageV2                         2044           2051           9          7.7         130.0       5.9X
-SQL ORC Vectorized                                  141            150          13        111.7           8.9      85.7X
-SQL ORC MR                                         1797           1798           1          8.8         114.3       6.7X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           13427          13451          33          1.2         853.7       1.0X
+SQL Json                                          10000          10014          20          1.6         635.8       1.3X
+SQL Json with UnsafeRow                           10816          10829          18          1.5         687.7       1.2X
+SQL Parquet Vectorized: DataPageV1                  121            126           3        130.5           7.7     111.4X
+SQL Parquet Vectorized: DataPageV2                  197            203          12         79.7          12.5      68.0X
+SQL Parquet MR: DataPageV1                         2149           2246         137          7.3         136.7       6.2X
+SQL Parquet MR: DataPageV2                         2058           2072          19          7.6         130.9       6.5X
+SQL ORC Vectorized                                  159            165           6         98.8          10.1      84.3X
+SQL ORC MR                                         1868           1869           1          8.4         118.8       7.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single INT Column Scan:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   145            147           1        108.3           9.2       1.0X
-ParquetReader Vectorized: DataPageV2                   219            221           2         71.9          13.9       0.7X
-ParquetReader Vectorized -> Row: DataPageV1            138            141           3        113.6           8.8       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            212            215           3         74.1          13.5       0.7X
+ParquetReader Vectorized: DataPageV1                   142            144           1        110.5           9.0       1.0X
+ParquetReader Vectorized: DataPageV2                   215            219           7         73.0          13.7       0.7X
+ParquetReader Vectorized -> Row: DataPageV1            141            142           1        111.9           8.9       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            212            213           1         74.2          13.5       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12027          12040          18          1.3         764.7       1.0X
-SQL Json                                          10400          10419          27          1.5         661.2       1.2X
-SQL Json with UnsafeRow                           11274          11284          14          1.4         716.8       1.1X
-SQL Parquet Vectorized: DataPageV1                  279            282           2         56.4          17.7      43.1X
-SQL Parquet Vectorized: DataPageV2                  175            180           3         89.6          11.2      68.5X
-SQL Parquet MR: DataPageV1                         2508           2510           3          6.3         159.4       4.8X
-SQL Parquet MR: DataPageV2                         2093           2125          45          7.5         133.0       5.7X
-SQL ORC Vectorized                                  152            157           3        103.2           9.7      78.9X
-SQL ORC MR                                         1849           1861          17          8.5         117.5       6.5X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           13182          13188           8          1.2         838.1       1.0X
+SQL Json                                          10134          10141          10          1.6         644.3       1.3X
+SQL Json with UnsafeRow                           10915          10920           7          1.4         693.9       1.2X
+SQL Parquet Vectorized: DataPageV1                  281            285           4         55.9          17.9      46.8X
+SQL Parquet Vectorized: DataPageV2                  176            181           4         89.2          11.2      74.8X
+SQL Parquet MR: DataPageV1                         2659           2694          49          5.9         169.1       5.0X
+SQL Parquet MR: DataPageV2                         2191           2194           5          7.2         139.3       6.0X
+SQL ORC Vectorized                                  144            151           4        109.2           9.2      91.5X
+SQL ORC MR                                         1814           1887         103          8.7         115.4       7.3X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single BIGINT Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   322            324           2         48.9          20.5       1.0X
-ParquetReader Vectorized: DataPageV2                   215            218           2         73.1          13.7       1.5X
-ParquetReader Vectorized -> Row: DataPageV1            338            341           2         46.5          21.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            234            235           2         67.3          14.9       1.4X
+ParquetReader Vectorized: DataPageV1                   316            317           1         49.8          20.1       1.0X
+ParquetReader Vectorized: DataPageV2                   214            218           7         73.5          13.6       1.5X
+ParquetReader Vectorized -> Row: DataPageV1            338            343           8         46.6          21.5       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            234            236           2         67.1          14.9       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12458          12497          55          1.3         792.1       1.0X
-SQL Json                                          12317          12326          13          1.3         783.1       1.0X
-SQL Json with UnsafeRow                           13080          13087           9          1.2         831.6       1.0X
-SQL Parquet Vectorized: DataPageV1                   85             91           3        184.7           5.4     146.3X
-SQL Parquet Vectorized: DataPageV2                   86             89           3        183.8           5.4     145.6X
-SQL Parquet MR: DataPageV1                         2126           2154          40          7.4         135.2       5.9X
-SQL Parquet MR: DataPageV2                         2050           2084          48          7.7         130.4       6.1X
-SQL ORC Vectorized                                  240            251           8         65.5          15.3      51.9X
-SQL ORC MR                                         1944           1954          13          8.1         123.6       6.4X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           13724          13734          14          1.1         872.5       1.0X
+SQL Json                                          11883          11914          45          1.3         755.5       1.2X
+SQL Json with UnsafeRow                           12737          12740           4          1.2         809.8       1.1X
+SQL Parquet Vectorized: DataPageV1                   86             97          10        183.4           5.5     160.0X
+SQL Parquet Vectorized: DataPageV2                   94            107           8        168.1           5.9     146.7X
+SQL Parquet MR: DataPageV1                         2291           2295           6          6.9         145.7       6.0X
+SQL Parquet MR: DataPageV2                         2156           2157           2          7.3         137.1       6.4X
+SQL ORC Vectorized                                  258            270          11         60.9          16.4      53.1X
+SQL ORC MR                                         1903           1908           7          8.3         121.0       7.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single FLOAT Column Scan:     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   140            142           1        112.2           8.9       1.0X
-ParquetReader Vectorized: DataPageV2                   140            142           1        112.3           8.9       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            134            136           2        117.3           8.5       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            135            137           2        116.7           8.6       1.0X
+ParquetReader Vectorized: DataPageV1                   141            141           0        111.9           8.9       1.0X
+ParquetReader Vectorized: DataPageV2                   152            155           4        103.2           9.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV1            152            157           4        103.2           9.7       0.9X
+ParquetReader Vectorized -> Row: DataPageV2            152            156           6        103.6           9.7       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           12683          12695          16          1.2         806.4       1.0X
-SQL Json                                          12559          12560           1          1.3         798.5       1.0X
-SQL Json with UnsafeRow                           13265          13265           0          1.2         843.4       1.0X
-SQL Parquet Vectorized: DataPageV1                  270            273           3         58.2          17.2      46.9X
-SQL Parquet Vectorized: DataPageV2                  265            269           3         59.4          16.8      47.9X
-SQL Parquet MR: DataPageV1                         2525           2525           0          6.2         160.5       5.0X
-SQL Parquet MR: DataPageV2                         2419           2422           4          6.5         153.8       5.2X
-SQL ORC Vectorized                                  604            607           2         26.1          38.4      21.0X
-SQL ORC MR                                         2440           2448          11          6.4         155.1       5.2X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                           13926          13932           8          1.1         885.4       1.0X
+SQL Json                                          12135          12148          19          1.3         771.5       1.1X
+SQL Json with UnsafeRow                           12983          13003          29          1.2         825.4       1.1X
+SQL Parquet Vectorized: DataPageV1                  292            298           7         53.9          18.5      47.7X
+SQL Parquet Vectorized: DataPageV2                  292            297           4         53.9          18.5      47.7X
+SQL Parquet MR: DataPageV1                         2769           2775           9          5.7         176.1       5.0X
+SQL Parquet MR: DataPageV2                         2619           2623           6          6.0         166.5       5.3X
+SQL ORC Vectorized                                  632            649          18         24.9          40.2      22.0X
+SQL ORC MR                                         2386           2405          27          6.6         151.7       5.8X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet Reader Single DOUBLE Column Scan:    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-ParquetReader Vectorized: DataPageV1                   339            344           5         46.3          21.6       1.0X
-ParquetReader Vectorized: DataPageV2                   339            340           1         46.4          21.6       1.0X
-ParquetReader Vectorized -> Row: DataPageV1            341            344           3         46.1          21.7       1.0X
-ParquetReader Vectorized -> Row: DataPageV2            339            340           1         46.4          21.6       1.0X
+ParquetReader Vectorized: DataPageV1                   357            360           2         44.0          22.7       1.0X
+ParquetReader Vectorized: DataPageV2                   356            359           2         44.1          22.7       1.0X
+ParquetReader Vectorized -> Row: DataPageV1            365            371           7         43.1          23.2       1.0X
+ParquetReader Vectorized -> Row: DataPageV2            367            370           4         42.9          23.3       1.0X
 
 
 ================================================================================================
 SQL Single Numeric Column Scan in Struct
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan in Struct:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2261           2269          12          7.0         143.7       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2265           2267           3          6.9         144.0       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             125            133           5        125.5           8.0      18.0X
-SQL Parquet MR: DataPageV1                                            2387           2388           1          6.6         151.8       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2751           2758          10          5.7         174.9       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             111            119           6        141.9           7.0      20.4X
-SQL Parquet MR: DataPageV2                                            2373           2406          47          6.6         150.9       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2702           2713          16          5.8         171.8       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             112            118           5        140.0           7.1      20.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2238           2269          44          7.0         142.3       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2290           2319          42          6.9         145.6       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             129            144          34        121.9           8.2      17.3X
+SQL Parquet MR: DataPageV1                                            2487           2501          20          6.3         158.1       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3250           3274          35          4.8         206.6       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             112            118           7        140.2           7.1      19.9X
+SQL Parquet MR: DataPageV2                                            2368           2393          35          6.6         150.5       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3091           3118          37          5.1         196.5       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             113            118           7        139.2           7.2      19.8X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan in Struct:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2281           2325          62          6.9         145.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2273           2278           6          6.9         144.5       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             271            280           8         58.1          17.2       8.4X
-SQL Parquet MR: DataPageV1                                            2540           2544           6          6.2         161.5       0.9X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2950           2951           1          5.3         187.5       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             116            119           3        135.8           7.4      19.7X
-SQL Parquet MR: DataPageV2                                            2389           2396          10          6.6         151.9       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2851           2855           7          5.5         181.2       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             158            162           3         99.8          10.0      14.5X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2156           2195          55          7.3         137.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2174           2191          24          7.2         138.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             259            264           4         60.6          16.5       8.3X
+SQL Parquet MR: DataPageV1                                            2617           2631          20          6.0         166.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3209           3215           8          4.9         204.0       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             171            182          14         92.0          10.9      12.6X
+SQL Parquet MR: DataPageV2                                            2463           2498          50          6.4         156.6       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3018           3023           6          5.2         191.9       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             287            303          14         54.8          18.2       7.5X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan in Struct:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2315           2399         118          6.8         147.2       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2310           2319          12          6.8         146.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             272            278           7         57.9          17.3       8.5X
-SQL Parquet MR: DataPageV1                                            2370           2407          52          6.6         150.7       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2834           2837           5          5.6         180.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             125            128           3        125.6           8.0      18.5X
-SQL Parquet MR: DataPageV2                                            2343           2400          80          6.7         149.0       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2742           2755          18          5.7         174.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             269            274           4         58.4          17.1       8.6X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2301           2367          94          6.8         146.3       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2417           2421           6          6.5         153.7       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             282            288           4         55.7          17.9       8.2X
+SQL Parquet MR: DataPageV1                                            2681           2694          18          5.9         170.5       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3187           3213          36          4.9         202.6       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             152            159           6        103.8           9.6      15.2X
+SQL Parquet MR: DataPageV2                                            2636           2650          20          6.0         167.6       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3077           3089          17          5.1         195.6       0.7X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             287            304          31         54.7          18.3       8.0X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2305           2340          49          6.8         146.5       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2317           2322           7          6.8         147.3       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             275            284           7         57.3          17.5       8.4X
-SQL Parquet MR: DataPageV1                                            2882           2899          25          5.5         183.2       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3541           3583          59          4.4         225.1       0.7X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             286            295           5         54.9          18.2       8.0X
-SQL Parquet MR: DataPageV2                                            2548           2622         105          6.2         162.0       0.9X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2900           2904           6          5.4         184.4       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             182            185           3         86.2          11.6      12.6X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2419           2419           1          6.5         153.8       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2342           2392          71          6.7         148.9       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             285            291           4         55.2          18.1       8.5X
+SQL Parquet MR: DataPageV1                                            2915           2931          23          5.4         185.4       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3405           3418          19          4.6         216.5       0.7X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             321            346          15         49.0          20.4       7.5X
+SQL Parquet MR: DataPageV2                                            2554           2570          24          6.2         162.4       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2943           2954          15          5.3         187.1       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             210            215           3         74.9          13.4      11.5X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan in Struct:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2313           2359          64          6.8         147.1       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2371           2401          43          6.6         150.7       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             347            358          10         45.3          22.1       6.7X
-SQL Parquet MR: DataPageV1                                            2414           2449          49          6.5         153.5       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           2846           2858          16          5.5         181.0       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)              94            106          11        166.6           6.0      24.5X
-SQL Parquet MR: DataPageV2                                            2359           2403          63          6.7         150.0       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2750           2760          14          5.7         174.8       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)              93            100           7        169.7           5.9      25.0X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL ORC MR                                                            2457           2629         243          6.4         156.2       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2469           2481          17          6.4         157.0       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             354            368          10         44.4          22.5       6.9X
+SQL Parquet MR: DataPageV1                                            2592           2592           1          6.1         164.8       0.9X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3017           3022           7          5.2         191.8       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             104            123          14        151.0           6.6      23.6X
+SQL Parquet MR: DataPageV2                                            2511           2554          61          6.3         159.6       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           2932           2964          44          5.4         186.4       0.8X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             102            106           7        154.9           6.5      24.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan in Struct:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                            2822           2901         112          5.6         179.4       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                           2829           2857          40          5.6         179.9       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                             754            760           7         20.8          48.0       3.7X
-SQL Parquet MR: DataPageV1                                            2869           2926          81          5.5         182.4       1.0X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3425           3426           2          4.6         217.7       0.8X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             277            283           5         56.8          17.6      10.2X
-SQL Parquet MR: DataPageV2                                            2936           2938           3          5.4         186.7       1.0X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3321           3325           7          4.7         211.1       0.8X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             286            293           6         54.9          18.2       9.9X
+SQL ORC MR                                                            2921           2933          17          5.4         185.7       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                           2929           2950          30          5.4         186.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                             790            793           4         19.9          50.2       3.7X
+SQL Parquet MR: DataPageV1                                            2944           2952          12          5.3         187.2       1.0X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           3441           3485          62          4.6         218.8       0.8X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)             300            313          12         52.5          19.0       9.8X
+SQL Parquet MR: DataPageV2                                            2922           2972          71          5.4         185.8       1.0X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           3389           3393           7          4.6         215.4       0.9X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)             299            306           7         52.6          19.0       9.8X
 
 
 ================================================================================================
 SQL Nested Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Nested Column Scan:                                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------
-SQL ORC MR                                                           13594          13772         227          0.1       12964.0       1.0X
-SQL ORC Vectorized (Nested Column Disabled)                          13494          13741         198          0.1       12869.2       1.0X
-SQL ORC Vectorized (Nested Column Enabled)                            7230           7257          19          0.1        6895.2       1.9X
-SQL Parquet MR: DataPageV1                                            8787           8812          31          0.1        8379.7       1.5X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9165           9226          34          0.1        8740.6       1.5X
-SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            5807           5843          23          0.2        5538.3       2.3X
-SQL Parquet MR: DataPageV2                                            9607           9651          30          0.1        9161.9       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)           9961           9991          27          0.1        9499.6       1.4X
-SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5624           5650          17          0.2        5363.2       2.4X
+SQL ORC MR                                                           14325          14522         154          0.1       13661.0       1.0X
+SQL ORC Vectorized (Nested Column Disabled)                          14107          14392         251          0.1       13453.2       1.0X
+SQL ORC Vectorized (Nested Column Enabled)                            7445           7470          16          0.1        7099.8       1.9X
+SQL Parquet MR: DataPageV1                                            8992           9032          32          0.1        8575.8       1.6X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Disabled)           9615           9741          77          0.1        9169.2       1.5X
+SQL Parquet Vectorized: DataPageV1 (Nested Column Enabled)            6242           6391          95          0.2        5952.4       2.3X
+SQL Parquet MR: DataPageV2                                           10019          10415         264          0.1        9555.2       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Disabled)          10273          10371         146          0.1        9796.8       1.4X
+SQL Parquet Vectorized: DataPageV2 (Nested Column Enabled)            5730           5779          33          0.2        5464.9       2.5X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                           11004          11110         151          1.0        1049.4       1.0X
-SQL Json                                          10865          10875          15          1.0        1036.1       1.0X
-SQL Parquet Vectorized: DataPageV1                 1790           1804          20          5.9         170.7       6.1X
-SQL Parquet Vectorized: DataPageV2                 1907           1910           4          5.5         181.9       5.8X
-SQL Parquet MR: DataPageV1                         4100           4124          35          2.6         391.0       2.7X
-SQL Parquet MR: DataPageV2                         4108           4113           7          2.6         391.8       2.7X
-SQL ORC Vectorized                                 1839           1848          13          5.7         175.4       6.0X
-SQL ORC MR                                         3844           3846           4          2.7         366.5       2.9X
+SQL CSV                                           12003          12156         217          0.9        1144.7       1.0X
+SQL Json                                          10706          10711           7          1.0        1021.0       1.1X
+SQL Parquet Vectorized: DataPageV1                 1800           1807          10          5.8         171.6       6.7X
+SQL Parquet Vectorized: DataPageV2                 1923           1930          10          5.5         183.4       6.2X
+SQL Parquet MR: DataPageV1                         4008           4018          14          2.6         382.2       3.0X
+SQL Parquet MR: DataPageV2                         4075           4082          10          2.6         388.7       2.9X
+SQL ORC Vectorized                                 1903           1925          30          5.5         181.5       6.3X
+SQL ORC MR                                         3934           3949          21          2.7         375.2       3.1X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            6465           6473          12          1.6         616.5       1.0X
-SQL Json                                           7466           7469           4          1.4         712.0       0.9X
-SQL Parquet Vectorized: DataPageV1                  481            494          11         21.8          45.8      13.4X
-SQL Parquet Vectorized: DataPageV2                  484            491           9         21.7          46.2      13.3X
-SQL Parquet MR: DataPageV1                         1756           1757           1          6.0         167.5       3.7X
-SQL Parquet MR: DataPageV2                         1737           1739           3          6.0         165.6       3.7X
-SQL ORC Vectorized                                  398            404           4         26.3          38.0      16.2X
-SQL ORC MR                                         1974           1980           8          5.3         188.3       3.3X
+SQL CSV                                            7254           7268          20          1.4         691.8       1.0X
+SQL Json                                           6959           6959           1          1.5         663.6       1.0X
+SQL Parquet Vectorized: DataPageV1                  477            482           6         22.0          45.5      15.2X
+SQL Parquet Vectorized: DataPageV2                  475            488          21         22.1          45.3      15.3X
+SQL Parquet MR: DataPageV1                         1778           1780           3          5.9         169.6       4.1X
+SQL Parquet MR: DataPageV2                         1723           1726           5          6.1         164.3       4.2X
+SQL ORC Vectorized                                  396            409          22         26.5          37.7      18.3X
+SQL ORC MR                                         1884           1905          30          5.6         179.6       3.9X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Data column - CSV                                          12344          12372          40          1.3         784.8       1.0X
-Data column - Json                                         10569          10573           6          1.5         671.9       1.2X
-Data column - Parquet Vectorized: DataPageV1                 105            126          19        150.2           6.7     117.9X
-Data column - Parquet Vectorized: DataPageV2                 244            252           6         64.5          15.5      50.6X
-Data column - Parquet MR: DataPageV1                        2438           2453          22          6.5         155.0       5.1X
-Data column - Parquet MR: DataPageV2                        2304           2307           3          6.8         146.5       5.4X
-Data column - ORC Vectorized                                 154            162          10        102.5           9.8      80.4X
-Data column - ORC MR                                        2123           2130          10          7.4         135.0       5.8X
-Partition column - CSV                                      4053           4135         116          3.9         257.7       3.0X
-Partition column - Json                                     8918           8937          27          1.8         567.0       1.4X
-Partition column - Parquet Vectorized: DataPageV1             35             38           3        447.3           2.2     351.0X
-Partition column - Parquet Vectorized: DataPageV2             34             38           4        464.2           2.2     364.3X
-Partition column - Parquet MR: DataPageV1                   1270           1270           0         12.4          80.7       9.7X
-Partition column - Parquet MR: DataPageV2                   1258           1266          12         12.5          80.0       9.8X
-Partition column - ORC Vectorized                             36             41           4        442.9           2.3     347.6X
-Partition column - ORC MR                                   1297           1300           5         12.1          82.4       9.5X
-Both columns - CSV                                         11984          12065         116          1.3         761.9       1.0X
-Both columns - Json                                        11067          11120          76          1.4         703.6       1.1X
-Both columns - Parquet Vectorized: DataPageV1                144            157          11        109.3           9.2      85.8X
-Both columns - Parquet Vectorized: DataPageV2                305            322           9         51.6          19.4      40.5X
-Both columns - Parquet MR: DataPageV1                       2656           2668          17          5.9         168.9       4.6X
-Both columns - Parquet MR: DataPageV2                       2604           2631          38          6.0         165.6       4.7X
-Both columns - ORC Vectorized                                185            221          24         85.1          11.8      66.8X
-Both columns - ORC MR                                       2222           2233          17          7.1         141.2       5.6X
+Data column - CSV                                          13466          13514          67          1.2         856.2       1.0X
+Data column - Json                                         10162          10191          42          1.5         646.1       1.3X
+Data column - Parquet Vectorized: DataPageV1                 119            134          10        132.4           7.6     113.3X
+Data column - Parquet Vectorized: DataPageV2                 294            302          13         53.6          18.7      45.9X
+Data column - Parquet MR: DataPageV1                        2489           2627         195          6.3         158.2       5.4X
+Data column - Parquet MR: DataPageV2                        2443           2466          33          6.4         155.3       5.5X
+Data column - ORC Vectorized                                 187            200          11         84.0          11.9      71.9X
+Data column - ORC MR                                        2306           2313           9          6.8         146.6       5.8X
+Partition column - CSV                                      3790           3809          27          4.2         241.0       3.6X
+Partition column - Json                                     8570           8579          12          1.8         544.9       1.6X
+Partition column - Parquet Vectorized: DataPageV1             35             38           3        444.9           2.2     380.9X
+Partition column - Parquet Vectorized: DataPageV2             35             38           3        452.0           2.2     387.0X
+Partition column - Parquet MR: DataPageV1                   1411           1422          15         11.1          89.7       9.5X
+Partition column - Parquet MR: DataPageV2                   1396           1435          54         11.3          88.8       9.6X
+Partition column - ORC Vectorized                             36             39           3        432.0           2.3     369.9X
+Partition column - ORC MR                                   1503           1514          16         10.5          95.6       9.0X
+Both columns - CSV                                         13408          13425          24          1.2         852.5       1.0X
+Both columns - Json                                        10284          10301          24          1.5         653.9       1.3X
+Both columns - Parquet Vectorized: DataPageV1                154            182          24        101.8           9.8      87.2X
+Both columns - Parquet Vectorized: DataPageV2                341            350          17         46.1          21.7      39.5X
+Both columns - Parquet MR: DataPageV1                       2465           2490          35          6.4         156.7       5.5X
+Both columns - Parquet MR: DataPageV2                       2450           2489          55          6.4         155.8       5.5X
+Both columns - ORC Vectorized                                220            245          19         71.4          14.0      61.1X
+Both columns - ORC MR                                       2333           2334           1          6.7         148.4       5.8X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            7865           7883          26          1.3         750.1       1.0X
-SQL Json                                           9607           9625          26          1.1         916.2       0.8X
-SQL Parquet Vectorized: DataPageV1                 1269           1282          17          8.3         121.1       6.2X
-SQL Parquet Vectorized: DataPageV2                 1373           1378           7          7.6         130.9       5.7X
-SQL Parquet MR: DataPageV1                         3515           3519           6          3.0         335.2       2.2X
-SQL Parquet MR: DataPageV2                         3705           3720          22          2.8         353.3       2.1X
-ParquetReader Vectorized: DataPageV1                819            825           6         12.8          78.1       9.6X
-ParquetReader Vectorized: DataPageV2                891            892           2         11.8          84.9       8.8X
-SQL ORC Vectorized                                  927            935           8         11.3          88.4       8.5X
-SQL ORC MR                                         3000           3018          26          3.5         286.1       2.6X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            8487           8504          24          1.2         809.4       1.0X
+SQL Json                                           9230           9236           9          1.1         880.3       0.9X
+SQL Parquet Vectorized: DataPageV1                 1279           1294          20          8.2         122.0       6.6X
+SQL Parquet Vectorized: DataPageV2                 1327           1382          78          7.9         126.5       6.4X
+SQL Parquet MR: DataPageV1                         3655           3662          10          2.9         348.6       2.3X
+SQL Parquet MR: DataPageV2                         3708           3742          47          2.8         353.6       2.3X
+ParquetReader Vectorized: DataPageV1                837            838           1         12.5          79.8      10.1X
+ParquetReader Vectorized: DataPageV2                898            900           3         11.7          85.7       9.4X
+SQL ORC Vectorized                                  970           1025          77         10.8          92.5       8.7X
+SQL ORC MR                                         3092           3123          44          3.4         294.9       2.7X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            5928           5949          30          1.8         565.3       1.0X
-SQL Json                                           8205           8210           6          1.3         782.5       0.7X
-SQL Parquet Vectorized: DataPageV1                  862            887          22         12.2          82.2       6.9X
-SQL Parquet Vectorized: DataPageV2                  911            932          19         11.5          86.9       6.5X
-SQL Parquet MR: DataPageV1                         3011           3016           7          3.5         287.1       2.0X
-SQL Parquet MR: DataPageV2                         3182           3190          11          3.3         303.5       1.9X
-ParquetReader Vectorized: DataPageV1                758            766           8         13.8          72.2       7.8X
-ParquetReader Vectorized: DataPageV2                826            833           8         12.7          78.8       7.2X
-SQL ORC Vectorized                                  970            971           2         10.8          92.5       6.1X
-SQL ORC MR                                         2809           2817          10          3.7         267.9       2.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            6254           6267          18          1.7         596.4       1.0X
+SQL Json                                           7852           7857           8          1.3         748.8       0.8X
+SQL Parquet Vectorized: DataPageV1                  889            907          17         11.8          84.8       7.0X
+SQL Parquet Vectorized: DataPageV2                  978            983           8         10.7          93.3       6.4X
+SQL Parquet MR: DataPageV1                         2939           2948          14          3.6         280.2       2.1X
+SQL Parquet MR: DataPageV2                         3175           3189          20          3.3         302.8       2.0X
+ParquetReader Vectorized: DataPageV1                756            761           7         13.9          72.1       8.3X
+ParquetReader Vectorized: DataPageV2                853            858           5         12.3          81.3       7.3X
+SQL ORC Vectorized                                 1024           1027           4         10.2          97.6       6.1X
+SQL ORC MR                                         2930           2933           4          3.6         279.4       2.1X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4372           4394          31          2.4         416.9       1.0X
-SQL Json                                           5965           5967           2          1.8         568.9       0.7X
-SQL Parquet Vectorized: DataPageV1                  166            173           5         63.3          15.8      26.4X
-SQL Parquet Vectorized: DataPageV2                  179            184           4         58.5          17.1      24.4X
-SQL Parquet MR: DataPageV1                         1841           1842           1          5.7         175.6       2.4X
-SQL Parquet MR: DataPageV2                         1834           1838           4          5.7         174.9       2.4X
-ParquetReader Vectorized: DataPageV1                171            173           4         61.5          16.3      25.6X
-ParquetReader Vectorized: DataPageV2                184            185           1         57.0          17.5      23.8X
-SQL ORC Vectorized                                  299            301           3         35.1          28.5      14.6X
-SQL ORC MR                                         1618           1624           9          6.5         154.3       2.7X
+SQL CSV                                            4333           4340          10          2.4         413.3       1.0X
+SQL Json                                           5440           5448          11          1.9         518.8       0.8X
+SQL Parquet Vectorized: DataPageV1                  166            173          10         63.1          15.8      26.1X
+SQL Parquet Vectorized: DataPageV2                  184            187           3         56.9          17.6      23.5X
+SQL Parquet MR: DataPageV1                         1846           1854          11          5.7         176.0       2.3X
+SQL Parquet MR: DataPageV2                         1813           1815           2          5.8         172.9       2.4X
+ParquetReader Vectorized: DataPageV1                171            174           4         61.2          16.3      25.3X
+ParquetReader Vectorized: DataPageV2                190            191           1         55.2          18.1      22.8X
+SQL ORC Vectorized                                  308            310           1         34.0          29.4      14.1X
+SQL ORC MR                                         1700           1707          10          6.2         162.2       2.5X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 10 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            1285           1287           3          0.8        1225.1       1.0X
-SQL Json                                           1771           1772           2          0.6        1689.1       0.7X
-SQL Parquet Vectorized: DataPageV1                   27             32           4         38.2          26.2      46.8X
-SQL Parquet Vectorized: DataPageV2                   37             40           3         28.4          35.2      34.8X
-SQL Parquet MR: DataPageV1                          178            182           5          5.9         169.6       7.2X
-SQL Parquet MR: DataPageV2                          176            180           2          6.0         167.9       7.3X
-SQL ORC Vectorized                                   31             34           4         33.9          29.5      41.5X
-SQL ORC MR                                          136            145           8          7.7         129.8       9.4X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            1183           1208          34          0.9        1128.7       1.0X
+SQL Json                                           1836           1837           2          0.6        1750.6       0.6X
+SQL Parquet Vectorized: DataPageV1                   28             31           3         37.5          26.6      42.4X
+SQL Parquet Vectorized: DataPageV2                   38             42           5         27.8          35.9      31.4X
+SQL Parquet MR: DataPageV1                          185            189           3          5.7         176.1       6.4X
+SQL Parquet MR: DataPageV2                          180            188          10          5.8         171.9       6.6X
+SQL ORC Vectorized                                   33             36           3         31.4          31.8      35.5X
+SQL ORC MR                                          167            175           5          6.3         159.1       7.1X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 50 columns:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            2784           2787           5          0.4        2655.2       1.0X
-SQL Json                                           6099           6175         107          0.2        5816.6       0.5X
-SQL Parquet Vectorized: DataPageV1                   31             35           3         33.9          29.5      90.1X
-SQL Parquet Vectorized: DataPageV2                   42             45           4         25.2          39.7      66.9X
-SQL Parquet MR: DataPageV1                          184            188           4          5.7         175.5      15.1X
-SQL Parquet MR: DataPageV2                          183            187           3          5.7         174.5      15.2X
-SQL ORC Vectorized                                   35             39           4         30.4          32.9      80.7X
-SQL ORC MR                                          143            145           3          7.4         135.9      19.5X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1023-azure
+SQL CSV                                            2586           2589           4          0.4        2466.4       1.0X
+SQL Json                                           6706           6852         207          0.2        6395.3       0.4X
+SQL Parquet Vectorized: DataPageV1                   32             35           3         33.0          30.3      81.5X
+SQL Parquet Vectorized: DataPageV2                   42             47           6         25.0          40.0      61.7X
+SQL Parquet MR: DataPageV1                          187            193           4          5.6         178.3      13.8X
+SQL Parquet MR: DataPageV2                          181            186           5          5.8         172.2      14.3X
+SQL ORC Vectorized                                   38             41           3         27.3          36.7      67.2X
+SQL ORC MR                                          171            178          11          6.1         163.5      15.1X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-SQL CSV                                            4605           4642          53          0.2        4391.7       1.0X
-SQL Json                                          11392          11433          57          0.1       10864.7       0.4X
-SQL Parquet Vectorized: DataPageV1                   39             44           4         26.7          37.4     117.4X
-SQL Parquet Vectorized: DataPageV2                   48             53           4         21.6          46.2      95.0X
-SQL Parquet MR: DataPageV1                          198            202           2          5.3         188.8      23.3X
-SQL Parquet MR: DataPageV2                          196            202           4          5.4         186.7      23.5X
-SQL ORC Vectorized                                   41             45           3         25.4          39.3     111.8X
-SQL ORC MR                                          153            157           4          6.9         145.8      30.1X
+SQL CSV                                            4290           4320          42          0.2        4091.7       1.0X
+SQL Json                                          12544          12642         139          0.1       11963.0       0.3X
+SQL Parquet Vectorized: DataPageV1                   41             45           5         25.7          38.9     105.2X
+SQL Parquet Vectorized: DataPageV2                   50             57           9         20.8          48.0      85.3X
+SQL Parquet MR: DataPageV1                          199            205           4          5.3         189.9      21.5X
+SQL Parquet MR: DataPageV2                          196            200           2          5.3         187.0      21.9X
+SQL ORC Vectorized                                   46             49           4         22.6          44.2      92.5X
+SQL ORC MR                                          181            185           3          5.8         172.7      23.7X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
index 321b8c5014cb8..a98af93289208 100644
--- a/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6408           6469          86         15.6          64.1       1.0X
-DataFrame                                          1151           1152           2         86.9          11.5       5.6X
-Dataset                                            1725           1850         177         58.0          17.2       3.7X
+RDD                                                6410           6514         147         15.6          64.1       1.0X
+DataFrame                                          1121           1133          17         89.2          11.2       5.7X
+Dataset                                            1691           1698          10         59.1          16.9       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                7374           7378           6         13.6          73.7       1.0X
-DataFrame                                          3111           3119          11         32.1          31.1       2.4X
-Dataset                                            6397           6516         168         15.6          64.0       1.2X
+RDD                                                7313           7329          23         13.7          73.1       1.0X
+DataFrame                                          2721           2764          60         36.7          27.2       2.7X
+Dataset                                            6563           6672         155         15.2          65.6       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                4022           4162         197         24.9          40.2       1.0X
-DataFrame                                           701            722          19        142.6           7.0       5.7X
-Dataset                                            1528           1545          24         65.4          15.3       2.6X
+RDD                                                3870           3894          35         25.8          38.7       1.0X
+DataFrame                                           723            733          11        138.3           7.2       5.4X
+Dataset                                            1534           1566          45         65.2          15.3       2.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2065           2095          43         48.4          20.6       1.0X
-DataFrame                                           106            120          12        944.9           1.1      19.5X
-Dataset                                            2302           2326          34         43.4          23.0       0.9X
+RDD                                                1967           1996          41         50.8          19.7       1.0X
+DataFrame                                           116            126          10        864.5           1.2      17.0X
+Dataset                                            2234           2273          55         44.8          22.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1424           1452          40         70.2          14.2       1.0X
-DataFrame sum                                        61             73           9       1636.9           0.6      23.3X
-Dataset sum using Aggregator                       1953           2020          94         51.2          19.5       0.7X
-Dataset complex Aggregator                         5019           5030          16         19.9          50.2       0.3X
+RDD sum                                            1470           1504          48         68.0          14.7       1.0X
+DataFrame sum                                        66             84          13       1506.4           0.7      22.1X
+Dataset sum using Aggregator                       1929           1944          21         51.8          19.3       0.8X
+Dataset complex Aggregator                         4979           5163         260         20.1          49.8       0.3X
 
 
diff --git a/sql/core/benchmarks/DatasetBenchmark-results.txt b/sql/core/benchmarks/DatasetBenchmark-results.txt
index 79a6ed1e9ce33..78e90cf783593 100644
--- a/sql/core/benchmarks/DatasetBenchmark-results.txt
+++ b/sql/core/benchmarks/DatasetBenchmark-results.txt
@@ -2,45 +2,45 @@
 Dataset Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map long:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                6764           6789          36         14.8          67.6       1.0X
-DataFrame                                          1296           1297           0         77.1          13.0       5.2X
-Dataset                                            1448           1472          35         69.1          14.5       4.7X
+RDD                                                6908           6969          86         14.5          69.1       1.0X
+DataFrame                                          1286           1300          21         77.8          12.9       5.4X
+Dataset                                            1763           1778          21         56.7          17.6       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back map:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                8070           8080          14         12.4          80.7       1.0X
-DataFrame                                          3215           3220           7         31.1          32.2       2.5X
-Dataset                                            7842           7868          36         12.8          78.4       1.0X
+RDD                                                8250           8274          34         12.1          82.5       1.0X
+DataFrame                                          2867           2868           2         34.9          28.7       2.9X
+Dataset                                            6939           6971          45         14.4          69.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter Long:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                4145           4221         107         24.1          41.5       1.0X
-DataFrame                                           758            782          32        131.9           7.6       5.5X
-Dataset                                            1601           1622          29         62.5          16.0       2.6X
+RDD                                                4265           4343         110         23.4          42.7       1.0X
+DataFrame                                           712            763          45        140.5           7.1       6.0X
+Dataset                                            1722           1732          14         58.1          17.2       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 back-to-back filter:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD                                                2267           2394         180         44.1          22.7       1.0X
-DataFrame                                           110            121          10        907.1           1.1      20.6X
-Dataset                                            2384           2413          42         42.0          23.8       1.0X
+RDD                                                2250           2275          36         44.4          22.5       1.0X
+DataFrame                                           115            126          10        873.3           1.1      19.7X
+Dataset                                            2441           2459          25         41.0          24.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 aggregate:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-RDD sum                                            1406           1433          38         71.1          14.1       1.0X
-DataFrame sum                                        69             81          11       1459.0           0.7      20.5X
-Dataset sum using Aggregator                       2216           2225          13         45.1          22.2       0.6X
-Dataset complex Aggregator                         4974           5165         269         20.1          49.7       0.3X
+RDD sum                                            1434           1444          14         69.7          14.3       1.0X
+DataFrame sum                                        67             80          10       1483.2           0.7      21.3X
+Dataset sum using Aggregator                       2083           2146          90         48.0          20.8       0.7X
+Dataset complex Aggregator                         5100           5116          23         19.6          51.0       0.3X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
index dc4af0de55307..c230aea8da606 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-jdk21-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                  845            871          24         11.8          84.5       1.0X
-date + interval(m, d)                               871            882          10         11.5          87.1       1.0X
-date + interval(m, d, ms)                          3744           3747           4          2.7         374.4       0.2X
-date - interval(m)                                  833            836           5         12.0          83.3       1.0X
-date - interval(m, d)                               879            886           9         11.4          87.9       1.0X
-date - interval(m, d, ms)                          3703           3710           9          2.7         370.3       0.2X
-timestamp + interval(m)                            1768           1771           5          5.7         176.8       0.5X
-timestamp + interval(m, d)                         1809           1811           2          5.5         180.9       0.5X
-timestamp + interval(m, d, ms)                     1739           1746          10          5.8         173.9       0.5X
-timestamp - interval(m)                            1519           1530          16          6.6         151.9       0.6X
-timestamp - interval(m, d)                         1565           1584          27          6.4         156.5       0.5X
-timestamp - interval(m, d, ms)                     1734           1736           3          5.8         173.4       0.5X
+date + interval(m)                                 1185           1217          45          8.4         118.5       1.0X
+date + interval(m, d)                              1166           1188          31          8.6         116.6       1.0X
+date + interval(m, d, ms)                          3784           3794          13          2.6         378.4       0.3X
+date - interval(m)                                 1098           1101           3          9.1         109.8       1.1X
+date - interval(m, d)                              1119           1128          13          8.9         111.9       1.1X
+date - interval(m, d, ms)                          3792           3799           9          2.6         379.2       0.3X
+timestamp + interval(m)                            1516           1522           8          6.6         151.6       0.8X
+timestamp + interval(m, d)                         1571           1573           3          6.4         157.1       0.8X
+timestamp + interval(m, d, ms)                     1716           1717           2          5.8         171.6       0.7X
+timestamp - interval(m)                            1503           1505           2          6.7         150.3       0.8X
+timestamp - interval(m, d)                         1557           1558           1          6.4         155.7       0.8X
+timestamp - interval(m, d, ms)                     1714           1716           3          5.8         171.4       0.7X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    197            199           3         50.8          19.7       1.0X
-cast to timestamp wholestage on                     207            217           7         48.3          20.7       1.0X
+cast to timestamp wholestage off                    200            202           3         49.9          20.0       1.0X
+cast to timestamp wholestage on                     220            231           9         45.5          22.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    628            636          11         15.9          62.8       1.0X
-year of timestamp wholestage on                     626            632          11         16.0          62.6       1.0X
+year of timestamp wholestage off                    707            709           3         14.2          70.7       1.0X
+year of timestamp wholestage on                     718            721           4         13.9          71.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 664            665           2         15.1          66.4       1.0X
-quarter of timestamp wholestage on                  666            668           3         15.0          66.6       1.0X
+quarter of timestamp wholestage off                 743            744           2         13.5          74.3       1.0X
+quarter of timestamp wholestage on                  747            754           9         13.4          74.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   642            643           2         15.6          64.2       1.0X
-month of timestamp wholestage on                    631            636           3         15.8          63.1       1.0X
+month of timestamp wholestage off                   720            724           5         13.9          72.0       1.0X
+month of timestamp wholestage on                    729            731           2         13.7          72.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1004           1005           2         10.0         100.4       1.0X
-weekofyear of timestamp wholestage on              1059           1068           6          9.4         105.9       0.9X
+weekofyear of timestamp wholestage off             1098           1098           0          9.1         109.8       1.0X
+weekofyear of timestamp wholestage on              1141           1151          17          8.8         114.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     655            666          15         15.3          65.5       1.0X
-day of timestamp wholestage on                      643            648           4         15.5          64.3       1.0X
+day of timestamp wholestage off                     732            737           7         13.7          73.2       1.0X
+day of timestamp wholestage on                      756            760           3         13.2          75.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               685            686           1         14.6          68.5       1.0X
-dayofyear of timestamp wholestage on                680            685           5         14.7          68.0       1.0X
+dayofyear of timestamp wholestage off               767            770           4         13.0          76.7       1.0X
+dayofyear of timestamp wholestage on                780            785           3         12.8          78.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              671            677           8         14.9          67.1       1.0X
-dayofmonth of timestamp wholestage on               639            643           3         15.6          63.9       1.0X
+dayofmonth of timestamp wholestage off              755            763          11         13.2          75.5       1.0X
+dayofmonth of timestamp wholestage on               758            764           7         13.2          75.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               813            818           8         12.3          81.3       1.0X
-dayofweek of timestamp wholestage on                804            810           6         12.4          80.4       1.0X
+dayofweek of timestamp wholestage off               886            889           5         11.3          88.6       1.0X
+dayofweek of timestamp wholestage on                933            943           9         10.7          93.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 745            748           5         13.4          74.5       1.0X
-weekday of timestamp wholestage on                  746            752           7         13.4          74.6       1.0X
+weekday of timestamp wholestage off                 822            822           0         12.2          82.2       1.0X
+weekday of timestamp wholestage on                  839            845           9         11.9          83.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    537            541           6         18.6          53.7       1.0X
-hour of timestamp wholestage on                     544            547           3         18.4          54.4       1.0X
+hour of timestamp wholestage off                    549            558          13         18.2          54.9       1.0X
+hour of timestamp wholestage on                     564            567           2         17.7          56.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  547            548           1         18.3          54.7       1.0X
-minute of timestamp wholestage on                   554            557           3         18.1          55.4       1.0X
+minute of timestamp wholestage off                  547            549           2         18.3          54.7       1.0X
+minute of timestamp wholestage on                   561            567           4         17.8          56.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  540            544           6         18.5          54.0       1.0X
-second of timestamp wholestage on                   546            551           5         18.3          54.6       1.0X
+second of timestamp wholestage off                  552            555           5         18.1          55.2       1.0X
+second of timestamp wholestage on                   561            564           3         17.8          56.1       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         172            174           3         58.2          17.2       1.0X
-current_date wholestage on                          207            212           6         48.3          20.7       0.8X
+current_date wholestage off                         179            179           0         55.9          17.9       1.0X
+current_date wholestage on                          224            228           6         44.7          22.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    172            174           2         58.0          17.2       1.0X
-current_timestamp wholestage on                     224            239          24         44.7          22.4       0.8X
+current_timestamp wholestage off                    176            181           6         56.7          17.6       1.0X
+current_timestamp wholestage on                     236            241           5         42.3          23.6       0.7X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         599            602           4         16.7          59.9       1.0X
-cast to date wholestage on                          603            606           4         16.6          60.3       1.0X
+cast to date wholestage off                         639            641           2         15.6          63.9       1.0X
+cast to date wholestage on                          717            721           6         14.0          71.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             714            718           6         14.0          71.4       1.0X
-last_day wholestage on                              700            708           6         14.3          70.0       1.0X
+last_day wholestage off                             794            797           3         12.6          79.4       1.0X
+last_day wholestage on                              817            821           4         12.2          81.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             637            639           4         15.7          63.7       1.0X
-next_day wholestage on                              635            644          13         15.7          63.5       1.0X
+next_day wholestage off                             724            728           6         13.8          72.4       1.0X
+next_day wholestage on                              744            747           3         13.4          74.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             585            600          22         17.1          58.5       1.0X
-date_add wholestage on                              596            598           1         16.8          59.6       1.0X
+date_add wholestage off                             676            679           4         14.8          67.6       1.0X
+date_add wholestage on                              700            704           3         14.3          70.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             585            588           4         17.1          58.5       1.0X
-date_sub wholestage on                              597            600           2         16.7          59.7       1.0X
+date_sub wholestage off                             684            684           1         14.6          68.4       1.0X
+date_sub wholestage on                              698            701           3         14.3          69.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           816            817           1         12.3          81.6       1.0X
-add_months wholestage on                            830            835           5         12.1          83.0       1.0X
+add_months wholestage off                           983            984           2         10.2          98.3       1.0X
+add_months wholestage on                           1069           1074           7          9.4         106.9       0.9X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3079           3082           5          3.2         307.9       1.0X
-format date wholestage on                          3310           3363          31          3.0         331.0       0.9X
+format date wholestage off                         3043           3062          27          3.3         304.3       1.0X
+format date wholestage on                          3118           3133          14          3.2         311.8       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       2774           2777           3          3.6         277.4       1.0X
-from_unixtime wholestage on                        2749           2794          67          3.6         274.9       1.0X
+from_unixtime wholestage off                       2560           2560           0          3.9         256.0       1.0X
+from_unixtime wholestage on                        2594           2653          37          3.9         259.4       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   616            621           6         16.2          61.6       1.0X
-from_utc_timestamp wholestage on                    736            740           3         13.6          73.6       0.8X
+from_utc_timestamp wholestage off                   641            642           2         15.6          64.1       1.0X
+from_utc_timestamp wholestage on                    767            770           3         13.0          76.7       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                     764            764           1         13.1          76.4       1.0X
-to_utc_timestamp wholestage on                      825            827           2         12.1          82.5       0.9X
+to_utc_timestamp wholestage off                     809            812           5         12.4          80.9       1.0X
+to_utc_timestamp wholestage on                      882            889           7         11.3          88.2       0.9X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        228            232           6         43.8          22.8       1.0X
-cast interval wholestage on                         207            220          17         48.2          20.7       1.1X
+cast interval wholestage off                        225            242          24         44.5          22.5       1.0X
+cast interval wholestage on                         225            226           2         44.5          22.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                             999            999           1         10.0          99.9       1.0X
-datediff wholestage on                              997            999           2         10.0          99.7       1.0X
+datediff wholestage off                            1226           1229           3          8.2         122.6       1.0X
+datediff wholestage on                             1220           1224           3          8.2         122.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3358           3360           3          3.0         335.8       1.0X
-months_between wholestage on                       3328           3347          22          3.0         332.8       1.0X
+months_between wholestage off                      3576           3582           8          2.8         357.6       1.0X
+months_between wholestage on                       3568           3581          23          2.8         356.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               375            382          11          2.7         374.7       1.0X
-window wholestage on                                676            687          13          1.5         675.8       0.6X
+window wholestage off                               383            395          18          2.6         382.5       1.0X
+window wholestage on                                634            657          25          1.6         633.8       0.6X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1715           1719           5          5.8         171.5       1.0X
-date_trunc YEAR wholestage on                      1742           1743           2          5.7         174.2       1.0X
+date_trunc YEAR wholestage off                     1862           1863           0          5.4         186.2       1.0X
+date_trunc YEAR wholestage on                      1867           1875           6          5.4         186.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1722           1732          14          5.8         172.2       1.0X
-date_trunc YYYY wholestage on                      1734           1738           2          5.8         173.4       1.0X
+date_trunc YYYY wholestage off                     1864           1867           4          5.4         186.4       1.0X
+date_trunc YYYY wholestage on                      1865           1871           4          5.4         186.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1726           1730           5          5.8         172.6       1.0X
-date_trunc YY wholestage on                        1739           1745           7          5.8         173.9       1.0X
+date_trunc YY wholestage off                       1867           1869           3          5.4         186.7       1.0X
+date_trunc YY wholestage on                        1867           1874           5          5.4         186.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1703           1705           3          5.9         170.3       1.0X
-date_trunc MON wholestage on                       1745           1748           3          5.7         174.5       1.0X
+date_trunc MON wholestage off                      1897           1904          10          5.3         189.7       1.0X
+date_trunc MON wholestage on                       1857           1862           5          5.4         185.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1703           1703           1          5.9         170.3       1.0X
-date_trunc MONTH wholestage on                     1744           1748           3          5.7         174.4       1.0X
+date_trunc MONTH wholestage off                    1901           1901           1          5.3         190.1       1.0X
+date_trunc MONTH wholestage on                     1858           1863           4          5.4         185.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1701           1703           3          5.9         170.1       1.0X
-date_trunc MM wholestage on                        1744           1762          20          5.7         174.4       1.0X
+date_trunc MM wholestage off                       1890           1895           7          5.3         189.0       1.0X
+date_trunc MM wholestage on                        1858           1861           2          5.4         185.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1228           1233           8          8.1         122.8       1.0X
-date_trunc DAY wholestage on                       1199           1207           7          8.3         119.9       1.0X
+date_trunc DAY wholestage off                      1232           1234           2          8.1         123.2       1.0X
+date_trunc DAY wholestage on                       1330           1336           4          7.5         133.0       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1228           1229           3          8.1         122.8       1.0X
-date_trunc DD wholestage on                        1204           1206           2          8.3         120.4       1.0X
+date_trunc DD wholestage off                       1231           1233           4          8.1         123.1       1.0X
+date_trunc DD wholestage on                        1334           1337           5          7.5         133.4       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1238           1244           8          8.1         123.8       1.0X
-date_trunc HOUR wholestage on                      1228           1238          15          8.1         122.8       1.0X
+date_trunc HOUR wholestage off                     1200           1201           3          8.3         120.0       1.0X
+date_trunc HOUR wholestage on                      1162           1168           5          8.6         116.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1223           1228           8          8.2         122.3       1.0X
-date_trunc MINUTE wholestage on                    1226           1239          15          8.2         122.6       1.0X
+date_trunc MINUTE wholestage off                   1206           1209           5          8.3         120.6       1.0X
+date_trunc MINUTE wholestage on                    1170           1174           3          8.5         117.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    326            336          13         30.6          32.6       1.0X
-date_trunc SECOND wholestage on                     295            300           6         33.9          29.5       1.1X
+date_trunc SECOND wholestage off                    289            289           0         34.6          28.9       1.0X
+date_trunc SECOND wholestage on                     264            271           4         37.8          26.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1648           1649           0          6.1         164.8       1.0X
-date_trunc WEEK wholestage on                      1633           1638           6          6.1         163.3       1.0X
+date_trunc WEEK wholestage off                     1788           1794           8          5.6         178.8       1.0X
+date_trunc WEEK wholestage on                      1753           1756           3          5.7         175.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2063           2064           2          4.8         206.3       1.0X
-date_trunc QUARTER wholestage on                   2064           2069           4          4.8         206.4       1.0X
+date_trunc QUARTER wholestage off                  2320           2323           4          4.3         232.0       1.0X
+date_trunc QUARTER wholestage on                   2324           2349          54          4.3         232.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                           821            822           1         12.2          82.1       1.0X
-trunc year wholestage on                            793            796           3         12.6          79.3       1.0X
+trunc year wholestage off                           849            851           2         11.8          84.9       1.0X
+trunc year wholestage on                            829            832           3         12.1          82.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                           822            823           2         12.2          82.2       1.0X
-trunc yyyy wholestage on                            790            799          11         12.7          79.0       1.0X
+trunc yyyy wholestage off                           850            853           4         11.8          85.0       1.0X
+trunc yyyy wholestage on                            829            843          20         12.1          82.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                             818            820           2         12.2          81.8       1.0X
-trunc yy wholestage on                              792            795           2         12.6          79.2       1.0X
+trunc yy wholestage off                             851            851           0         11.8          85.1       1.0X
+trunc yy wholestage on                              827            831           4         12.1          82.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            767            767           0         13.0          76.7       1.0X
-trunc mon wholestage on                             741            745           5         13.5          74.1       1.0X
+trunc mon wholestage off                            815            817           3         12.3          81.5       1.0X
+trunc mon wholestage on                             809            812           4         12.4          80.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          765            766           2         13.1          76.5       1.0X
-trunc month wholestage on                           742            746           2         13.5          74.2       1.0X
+trunc month wholestage off                          815            817           4         12.3          81.5       1.0X
+trunc month wholestage on                           806            809           2         12.4          80.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             766            767           2         13.1          76.6       1.0X
-trunc mm wholestage on                              742            744           3         13.5          74.2       1.0X
+trunc mm wholestage off                             812            813           1         12.3          81.2       1.0X
+trunc mm wholestage on                              805            810           4         12.4          80.5       1.0X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                      99            100           2         10.1          98.7       1.0X
-to timestamp str wholestage on                       93             99           6         10.8          93.0       1.1X
+to timestamp str wholestage off                      97             99           2         10.3          96.9       1.0X
+to timestamp str wholestage on                      103            106           3          9.7         102.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         656            664          10          1.5         656.4       1.0X
-to_timestamp wholestage on                          664            668           4          1.5         664.5       1.0X
+to_timestamp wholestage off                         678            679           2          1.5         677.5       1.0X
+to_timestamp wholestage on                          676            680           3          1.5         676.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    679            681           3          1.5         679.3       1.0X
-to_unix_timestamp wholestage on                     658            660           1          1.5         658.0       1.0X
+to_unix_timestamp wholestage off                    677            682           6          1.5         677.3       1.0X
+to_unix_timestamp wholestage on                     669            672           2          1.5         669.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          129            133           6          7.8         128.6       1.0X
-to date str wholestage on                           123            129           6          8.1         122.7       1.0X
+to date str wholestage off                          133            135           2          7.5         133.1       1.0X
+to date str wholestage on                           126            131           3          7.9         126.4       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              662            664           3          1.5         661.6       1.0X
-to_date wholestage on                               659            665           4          1.5         659.4       1.0X
+to_date wholestage off                              716            723          10          1.4         716.2       1.0X
+to_date wholestage on                               690            693           3          1.4         690.4       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  335            337           1         14.9          67.0       1.0X
-From java.time.LocalDate                            232            238           6         21.6          46.3       1.4X
-Collect java.sql.Date                              1204           1314          95          4.2         240.9       0.3X
-Collect java.time.LocalDate                         946           1072         112          5.3         189.3       0.4X
-From java.sql.Timestamp                             239            256          15         20.9          47.8       1.4X
-From java.time.Instant                              201            219          26         24.9          40.2       1.7X
-Collect longs                                       948           1007          61          5.3         189.7       0.4X
-Collect java.sql.Timestamp                         1067           1183         100          4.7         213.5       0.3X
-Collect java.time.Instant                           961           1044          75          5.2         192.2       0.3X
-java.sql.Date to Hive string                       4059           4129          64          1.2         811.7       0.1X
-java.time.LocalDate to Hive string                 3007           3166         141          1.7         601.3       0.1X
-java.sql.Timestamp to Hive string                  3795           3876          88          1.3         759.1       0.1X
-java.time.Instant to Hive string                   4301           4398          84          1.2         860.1       0.1X
+From java.sql.Date                                  282            284           3         17.8          56.3       1.0X
+From java.time.LocalDate                            265            276          12         18.8          53.1       1.1X
+Collect java.sql.Date                              1145           1206          97          4.4         229.0       0.2X
+Collect java.time.LocalDate                         959           1050         100          5.2         191.7       0.3X
+From java.sql.Timestamp                             229            245          22         21.9          45.7       1.2X
+From java.time.Instant                              173            176           6         28.9          34.5       1.6X
+Collect longs                                       910            960          73          5.5         182.0       0.3X
+Collect java.sql.Timestamp                          920           1118         173          5.4         183.9       0.3X
+Collect java.time.Instant                           877            967          79          5.7         175.3       0.3X
+java.sql.Date to Hive string                       3960           4078         184          1.3         792.0       0.1X
+java.time.LocalDate to Hive string                 3039           3117         128          1.6         607.8       0.1X
+java.sql.Timestamp to Hive string                  6521           6619         162          0.8        1304.1       0.0X
+java.time.Instant to Hive string                   4252           4346          91          1.2         850.4       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeBenchmark-results.txt b/sql/core/benchmarks/DateTimeBenchmark-results.txt
index 051b9107109d3..176cc1dc361b8 100644
--- a/sql/core/benchmarks/DateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeBenchmark-results.txt
@@ -2,460 +2,460 @@
 datetime +/- interval
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 datetime +/- interval:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date + interval(m)                                 1014           1017           5          9.9         101.4       1.0X
-date + interval(m, d)                              1015           1015           0          9.9         101.5       1.0X
-date + interval(m, d, ms)                          3966           3979          19          2.5         396.6       0.3X
-date - interval(m)                                  971            985          13         10.3          97.1       1.0X
-date - interval(m, d)                              1008           1011           5          9.9         100.8       1.0X
-date - interval(m, d, ms)                          4016           4024          11          2.5         401.6       0.3X
-timestamp + interval(m)                            1917           1939          31          5.2         191.7       0.5X
-timestamp + interval(m, d)                         1948           1959          17          5.1         194.8       0.5X
-timestamp + interval(m, d, ms)                     2056           2057           0          4.9         205.6       0.5X
-timestamp - interval(m)                            1814           1822          11          5.5         181.4       0.6X
-timestamp - interval(m, d)                         1871           1877           9          5.3         187.1       0.5X
-timestamp - interval(m, d, ms)                     2012           2017           7          5.0         201.2       0.5X
+date + interval(m)                                 1253           1259           9          8.0         125.3       1.0X
+date + interval(m, d)                              1258           1260           2          7.9         125.8       1.0X
+date + interval(m, d, ms)                          3904           3923          26          2.6         390.4       0.3X
+date - interval(m)                                 1201           1214          18          8.3         120.1       1.0X
+date - interval(m, d)                              1236           1238           4          8.1         123.6       1.0X
+date - interval(m, d, ms)                          3983           3987           5          2.5         398.3       0.3X
+timestamp + interval(m)                            1846           1852           9          5.4         184.6       0.7X
+timestamp + interval(m, d)                         1919           1932          18          5.2         191.9       0.7X
+timestamp + interval(m, d, ms)                     2264           2273          12          4.4         226.4       0.6X
+timestamp - interval(m)                            2025           2027           3          4.9         202.5       0.6X
+timestamp - interval(m, d)                         2097           2104          10          4.8         209.7       0.6X
+timestamp - interval(m, d, ms)                     2265           2270           8          4.4         226.5       0.6X
 
 
 ================================================================================================
 Extract components
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast to timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp wholestage off                    205            218          19         48.9          20.5       1.0X
-cast to timestamp wholestage on                     225            228           3         44.5          22.5       0.9X
+cast to timestamp wholestage off                    193            195           3         51.9          19.3       1.0X
+cast to timestamp wholestage on                     213            220           7         47.0          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 year of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-year of timestamp wholestage off                    777            788          16         12.9          77.7       1.0X
-year of timestamp wholestage on                     777            781           4         12.9          77.7       1.0X
+year of timestamp wholestage off                    828            832           5         12.1          82.8       1.0X
+year of timestamp wholestage on                     855            865          11         11.7          85.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 quarter of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-quarter of timestamp wholestage off                 793            804          16         12.6          79.3       1.0X
-quarter of timestamp wholestage on                  791            801          11         12.6          79.1       1.0X
+quarter of timestamp wholestage off                 854            854           0         11.7          85.4       1.0X
+quarter of timestamp wholestage on                  884            893           9         11.3          88.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 month of timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-month of timestamp wholestage off                   766            769           5         13.1          76.6       1.0X
-month of timestamp wholestage on                    772            775           2         13.0          77.2       1.0X
+month of timestamp wholestage off                   833            834           1         12.0          83.3       1.0X
+month of timestamp wholestage on                    845            849           4         11.8          84.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 weekofyear of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekofyear of timestamp wholestage off             1114           1124          15          9.0         111.4       1.0X
-weekofyear of timestamp wholestage on              1190           1196           4          8.4         119.0       0.9X
+weekofyear of timestamp wholestage off             1210           1214           5          8.3         121.0       1.0X
+weekofyear of timestamp wholestage on              1255           1266          12          8.0         125.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 day of timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-day of timestamp wholestage off                     766            773          10         13.1          76.6       1.0X
-day of timestamp wholestage on                      770            775           5         13.0          77.0       1.0X
+day of timestamp wholestage off                     828            830           3         12.1          82.8       1.0X
+day of timestamp wholestage on                      847            854          12         11.8          84.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofyear of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofyear of timestamp wholestage off               809            812           5         12.4          80.9       1.0X
-dayofyear of timestamp wholestage on                811            815           4         12.3          81.1       1.0X
+dayofyear of timestamp wholestage off               854            855           2         11.7          85.4       1.0X
+dayofyear of timestamp wholestage on                913            921           5         10.9          91.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofmonth of timestamp:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofmonth of timestamp wholestage off              779            783           5         12.8          77.9       1.0X
-dayofmonth of timestamp wholestage on               769            773           4         13.0          76.9       1.0X
+dayofmonth of timestamp wholestage off              849            854           8         11.8          84.9       1.0X
+dayofmonth of timestamp wholestage on               848            859           9         11.8          84.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dayofweek of timestamp:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-dayofweek of timestamp wholestage off               918            918           0         10.9          91.8       1.0X
-dayofweek of timestamp wholestage on                915            920           5         10.9          91.5       1.0X
+dayofweek of timestamp wholestage off               984            989           7         10.2          98.4       1.0X
+dayofweek of timestamp wholestage on               1026           1038           7          9.7         102.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 weekday of timestamp:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-weekday of timestamp wholestage off                 868            868           0         11.5          86.8       1.0X
-weekday of timestamp wholestage on                  874            880           5         11.4          87.4       1.0X
+weekday of timestamp wholestage off                 944            945           2         10.6          94.4       1.0X
+weekday of timestamp wholestage on                  978            985           7         10.2          97.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 hour of timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hour of timestamp wholestage off                    604            608           5         16.6          60.4       1.0X
-hour of timestamp wholestage on                     619            625           5         16.2          61.9       1.0X
+hour of timestamp wholestage off                    601            604           3         16.6          60.1       1.0X
+hour of timestamp wholestage on                     609            613           4         16.4          60.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 minute of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-minute of timestamp wholestage off                  608            616          12         16.4          60.8       1.0X
-minute of timestamp wholestage on                   614            621           4         16.3          61.4       1.0X
+minute of timestamp wholestage off                  603            606           5         16.6          60.3       1.0X
+minute of timestamp wholestage on                   609            622          21         16.4          60.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 second of timestamp:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-second of timestamp wholestage off                  606            612           9         16.5          60.6       1.0X
-second of timestamp wholestage on                   616            620           4         16.2          61.6       1.0X
+second of timestamp wholestage off                  603            604           1         16.6          60.3       1.0X
+second of timestamp wholestage on                   612            617           5         16.3          61.2       1.0X
 
 
 ================================================================================================
 Current date and time
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 current_date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_date wholestage off                         192            194           3         52.1          19.2       1.0X
-current_date wholestage on                          214            228          16         46.8          21.4       0.9X
+current_date wholestage off                         188            190           2         53.1          18.8       1.0X
+current_date wholestage on                          213            217           3         47.0          21.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 current_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-current_timestamp wholestage off                    187            189           3         53.4          18.7       1.0X
-current_timestamp wholestage on                     227            238          13         44.0          22.7       0.8X
+current_timestamp wholestage off                    185            189           6         54.0          18.5       1.0X
+current_timestamp wholestage on                     225            228           2         44.4          22.5       0.8X
 
 
 ================================================================================================
 Date arithmetic
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast to date:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date wholestage off                         705            708           5         14.2          70.5       1.0X
-cast to date wholestage on                          677            681           2         14.8          67.7       1.0X
+cast to date wholestage off                         754            757           5         13.3          75.4       1.0X
+cast to date wholestage on                          771            777           6         13.0          77.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 last_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-last_day wholestage off                             791            791           1         12.6          79.1       1.0X
-last_day wholestage on                              775            782           6         12.9          77.5       1.0X
+last_day wholestage off                             854            855           1         11.7          85.4       1.0X
+last_day wholestage on                              868            871           3         11.5          86.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 next_day:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-next_day wholestage off                             696            697           2         14.4          69.6       1.0X
-next_day wholestage on                              704            706           2         14.2          70.4       1.0X
+next_day wholestage off                             782            783           2         12.8          78.2       1.0X
+next_day wholestage on                              811            818           9         12.3          81.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_add:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_add wholestage off                             646            648           3         15.5          64.6       1.0X
-date_add wholestage on                              648            651           2         15.4          64.8       1.0X
+date_add wholestage off                             735            736           1         13.6          73.5       1.0X
+date_add wholestage on                              754            759           8         13.3          75.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_sub:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_sub wholestage off                             646            647           1         15.5          64.6       1.0X
-date_sub wholestage on                              653            659           9         15.3          65.3       1.0X
+date_sub wholestage off                             740            741           0         13.5          74.0       1.0X
+date_sub wholestage on                              753            757           6         13.3          75.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 add_months:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-add_months wholestage off                           922            930          11         10.8          92.2       1.0X
-add_months wholestage on                            908            911           2         11.0          90.8       1.0X
+add_months wholestage off                          1142           1143           1          8.8         114.2       1.0X
+add_months wholestage on                           1138           1149          14          8.8         113.8       1.0X
 
 
 ================================================================================================
 Formatting dates
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 format date:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-format date wholestage off                         3228           3232           6          3.1         322.8       1.0X
-format date wholestage on                          3205           3215          10          3.1         320.5       1.0X
+format date wholestage off                         3591           3598           9          2.8         359.1       1.0X
+format date wholestage on                          3704           3724          25          2.7         370.4       1.0X
 
 
 ================================================================================================
 Formatting timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_unixtime:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_unixtime wholestage off                       3681           3684           5          2.7         368.1       1.0X
-from_unixtime wholestage on                        3557           3603          27          2.8         355.7       1.0X
+from_unixtime wholestage off                       3881           3886           7          2.6         388.1       1.0X
+from_unixtime wholestage on                        3844           4051         117          2.6         384.4       1.0X
 
 
 ================================================================================================
 Convert timestamps
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_utc_timestamp:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-from_utc_timestamp wholestage off                   690            692           4         14.5          69.0       1.0X
-from_utc_timestamp wholestage on                    782            785           2         12.8          78.2       0.9X
+from_utc_timestamp wholestage off                   673            677           6         14.9          67.3       1.0X
+from_utc_timestamp wholestage on                    782            788           6         12.8          78.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_utc_timestamp:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_utc_timestamp wholestage off                    1040           1040           0          9.6         104.0       1.0X
-to_utc_timestamp wholestage on                     1026           1032           7          9.7         102.6       1.0X
+to_utc_timestamp wholestage off                    1038           1038           0          9.6         103.8       1.0X
+to_utc_timestamp wholestage on                     1025           1031           5          9.8         102.5       1.0X
 
 
 ================================================================================================
 Intervals
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast interval:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast interval wholestage off                        225            226           2         44.5          22.5       1.0X
-cast interval wholestage on                         216            225           7         46.4          21.6       1.0X
+cast interval wholestage off                        250            257          10         40.1          25.0       1.0X
+cast interval wholestage on                         215            220           6         46.6          21.5       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 datediff:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-datediff wholestage off                            1139           1141           4          8.8         113.9       1.0X
-datediff wholestage on                             1157           1162           5          8.6         115.7       1.0X
+datediff wholestage off                            1340           1342           4          7.5         134.0       1.0X
+datediff wholestage on                             1389           1395           5          7.2         138.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 months_between:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-months_between wholestage off                      3217           3219           4          3.1         321.7       1.0X
-months_between wholestage on                       3254           3266           9          3.1         325.4       1.0X
+months_between wholestage off                      3444           3450           8          2.9         344.4       1.0X
+months_between wholestage on                       3439           3453          14          2.9         343.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 window:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-window wholestage off                               406            410           6          2.5         406.2       1.0X
-window wholestage on                                667            677          10          1.5         667.2       0.6X
+window wholestage off                               427            449          31          2.3         427.2       1.0X
+window wholestage on                                656            690          23          1.5         655.8       0.7X
 
 
 ================================================================================================
 Truncation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YEAR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YEAR wholestage off                     1720           1726           9          5.8         172.0       1.0X
-date_trunc YEAR wholestage on                      1742           1752           7          5.7         174.2       1.0X
+date_trunc YEAR wholestage off                     1828           1834           8          5.5         182.8       1.0X
+date_trunc YEAR wholestage on                      1776           1780           6          5.6         177.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YYYY:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YYYY wholestage off                     1722           1724           2          5.8         172.2       1.0X
-date_trunc YYYY wholestage on                      1734           1748          11          5.8         173.4       1.0X
+date_trunc YYYY wholestage off                     1847           1849           2          5.4         184.7       1.0X
+date_trunc YYYY wholestage on                      1774           1781           6          5.6         177.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc YY:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc YY wholestage off                       1721           1722           1          5.8         172.1       1.0X
-date_trunc YY wholestage on                        1738           1749           8          5.8         173.8       1.0X
+date_trunc YY wholestage off                       1843           1844           1          5.4         184.3       1.0X
+date_trunc YY wholestage on                        1778           1781           2          5.6         177.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MON:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MON wholestage off                      1806           1808           4          5.5         180.6       1.0X
-date_trunc MON wholestage on                       1863           1870           5          5.4         186.3       1.0X
+date_trunc MON wholestage off                      1857           1861           6          5.4         185.7       1.0X
+date_trunc MON wholestage on                       1786           1791           6          5.6         178.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MONTH:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MONTH wholestage off                    1808           1811           5          5.5         180.8       1.0X
-date_trunc MONTH wholestage on                     1865           1871           8          5.4         186.5       1.0X
+date_trunc MONTH wholestage off                    1860           1871          15          5.4         186.0       1.0X
+date_trunc MONTH wholestage on                     1782           1789           4          5.6         178.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MM:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MM wholestage off                       1809           1820          15          5.5         180.9       1.0X
-date_trunc MM wholestage on                        1862           1865           2          5.4         186.2       1.0X
+date_trunc MM wholestage off                       1860           1868          11          5.4         186.0       1.0X
+date_trunc MM wholestage on                        1789           1792           4          5.6         178.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DAY:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DAY wholestage off                      1364           1365           2          7.3         136.4       1.0X
-date_trunc DAY wholestage on                       1339           1344           3          7.5         133.9       1.0X
+date_trunc DAY wholestage off                      1328           1338          14          7.5         132.8       1.0X
+date_trunc DAY wholestage on                       1281           1286           5          7.8         128.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc DD:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc DD wholestage off                       1371           1374           3          7.3         137.1       1.0X
-date_trunc DD wholestage on                        1336           1338           3          7.5         133.6       1.0X
+date_trunc DD wholestage off                       1330           1335           6          7.5         133.0       1.0X
+date_trunc DD wholestage on                        1277           1280           2          7.8         127.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc HOUR:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc HOUR wholestage off                     1408           1409           1          7.1         140.8       1.0X
-date_trunc HOUR wholestage on                      1316           1322           9          7.6         131.6       1.1X
+date_trunc HOUR wholestage off                     1342           1347           7          7.5         134.2       1.0X
+date_trunc HOUR wholestage on                      1281           1285           3          7.8         128.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc MINUTE:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc MINUTE wholestage off                   1412           1413           1          7.1         141.2       1.0X
-date_trunc MINUTE wholestage on                    1317           1321           4          7.6         131.7       1.1X
+date_trunc MINUTE wholestage off                   1344           1346           2          7.4         134.4       1.0X
+date_trunc MINUTE wholestage on                    1306           1310           3          7.7         130.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc SECOND:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc SECOND wholestage off                    357            361           6         28.0          35.7       1.0X
-date_trunc SECOND wholestage on                     306            308           3         32.7          30.6       1.2X
+date_trunc SECOND wholestage off                    331            334           4         30.2          33.1       1.0X
+date_trunc SECOND wholestage on                     278            282           6         35.9          27.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc WEEK:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc WEEK wholestage off                     1646           1664          25          6.1         164.6       1.0X
-date_trunc WEEK wholestage on                      1667           1671           7          6.0         166.7       1.0X
+date_trunc WEEK wholestage off                     1742           1746           5          5.7         174.2       1.0X
+date_trunc WEEK wholestage on                      1688           1692           7          5.9         168.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 date_trunc QUARTER:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-date_trunc QUARTER wholestage off                  2239           2241           3          4.5         223.9       1.0X
-date_trunc QUARTER wholestage on                   2199           2202           4          4.5         219.9       1.0X
+date_trunc QUARTER wholestage off                  2385           2385           1          4.2         238.5       1.0X
+date_trunc QUARTER wholestage on                   2479           2495          32          4.0         247.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc year:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc year wholestage off                          1006           1010           5          9.9         100.6       1.0X
-trunc year wholestage on                            925            931           8         10.8          92.5       1.1X
+trunc year wholestage off                          1025           1025           1          9.8         102.5       1.0X
+trunc year wholestage on                            995           1003           8         10.0          99.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc yyyy:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yyyy wholestage off                          1008           1009           2          9.9         100.8       1.0X
-trunc yyyy wholestage on                            925            927           2         10.8          92.5       1.1X
+trunc yyyy wholestage off                          1024           1027           4          9.8         102.4       1.0X
+trunc yyyy wholestage on                            995            999           4         10.1          99.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc yy:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc yy wholestage off                            1004           1010           9         10.0         100.4       1.0X
-trunc yy wholestage on                              926            928           1         10.8          92.6       1.1X
+trunc yy wholestage off                            1026           1026           0          9.8         102.6       1.0X
+trunc yy wholestage on                              999           1001           2         10.0          99.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc mon:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mon wholestage off                            947            947           0         10.6          94.7       1.0X
-trunc mon wholestage on                             896            902           6         11.2          89.6       1.1X
+trunc mon wholestage off                            998           1000           3         10.0          99.8       1.0X
+trunc mon wholestage on                             952            953           1         10.5          95.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc month:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc month wholestage off                          948            950           2         10.5          94.8       1.0X
-trunc month wholestage on                           895            899           6         11.2          89.5       1.1X
+trunc month wholestage off                          999           1000           1         10.0          99.9       1.0X
+trunc month wholestage on                           951            961          18         10.5          95.1       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trunc mm:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-trunc mm wholestage off                             950            950           1         10.5          95.0       1.0X
-trunc mm wholestage on                              895            897           2         11.2          89.5       1.1X
+trunc mm wholestage off                            1001           1003           3         10.0         100.1       1.0X
+trunc mm wholestage on                              951            953           2         10.5          95.1       1.1X
 
 
 ================================================================================================
 Parsing
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to timestamp str:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to timestamp str wholestage off                     102            107           6          9.8         102.2       1.0X
-to timestamp str wholestage on                       94             99           4         10.6          94.4       1.1X
+to timestamp str wholestage off                     104            113          12          9.6         104.0       1.0X
+to timestamp str wholestage on                      100            103           3         10.0          99.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_timestamp:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_timestamp wholestage off                         717            719           3          1.4         716.5       1.0X
-to_timestamp wholestage on                          704            706           2          1.4         703.8       1.0X
+to_timestamp wholestage off                         760            763           3          1.3         760.4       1.0X
+to_timestamp wholestage on                          757            766          12          1.3         757.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_unix_timestamp:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_unix_timestamp wholestage off                    706            707           0          1.4         706.2       1.0X
-to_unix_timestamp wholestage on                     698            701           2          1.4         697.6       1.0X
+to_unix_timestamp wholestage off                    754            754           0          1.3         753.6       1.0X
+to_unix_timestamp wholestage on                     742            743           2          1.3         742.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to date str:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to date str wholestage off                          140            142           3          7.1         139.9       1.0X
-to date str wholestage on                           131            137           3          7.6         131.0       1.1X
+to date str wholestage off                          137            141           6          7.3         137.0       1.0X
+to date str wholestage on                           130            136           3          7.7         130.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 to_date:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-to_date wholestage off                              596            597           2          1.7         596.2       1.0X
-to_date wholestage on                               604            606           2          1.7         603.6       1.0X
+to_date wholestage off                              655            656           1          1.5         655.0       1.0X
+to_date wholestage on                               637            642           5          1.6         636.8       1.0X
 
 
 ================================================================================================
 Conversion from/to external types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 To/from Java's date-time:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-From java.sql.Date                                  304            317          13         16.4          60.8       1.0X
-From java.time.LocalDate                            269            271           1         18.6          53.9       1.1X
-Collect java.sql.Date                              1269           1296          38          3.9         253.8       0.2X
-Collect java.time.LocalDate                         987           1054          59          5.1         197.5       0.3X
-From java.sql.Timestamp                             251            264          22         19.9          50.3       1.2X
-From java.time.Instant                              194            201           6         25.7          38.9       1.6X
-Collect longs                                       876            977          87          5.7         175.1       0.3X
-Collect java.sql.Timestamp                         1028           1081          53          4.9         205.5       0.3X
-Collect java.time.Instant                          1017           1114          85          4.9         203.4       0.3X
-java.sql.Date to Hive string                       3697           3897         178          1.4         739.3       0.1X
-java.time.LocalDate to Hive string                 3366           3505         131          1.5         673.1       0.1X
-java.sql.Timestamp to Hive string                  3490           3648         151          1.4         698.1       0.1X
-java.time.Instant to Hive string                   5279           5339          97          0.9        1055.7       0.1X
+From java.sql.Date                                  266            269           3         18.8          53.2       1.0X
+From java.time.LocalDate                            286            294          12         17.5          57.2       0.9X
+Collect java.sql.Date                              1152           1337         171          4.3         230.5       0.2X
+Collect java.time.LocalDate                         962           1123         144          5.2         192.4       0.3X
+From java.sql.Timestamp                             197            202           5         25.3          39.5       1.3X
+From java.time.Instant                              176            196          33         28.3          35.3       1.5X
+Collect longs                                       847           1023         198          5.9         169.4       0.3X
+Collect java.sql.Timestamp                         1160           1208          80          4.3         232.0       0.2X
+Collect java.time.Instant                          1083           1158          78          4.6         216.6       0.2X
+java.sql.Date to Hive string                       4114           4175          91          1.2         822.8       0.1X
+java.time.LocalDate to Hive string                 3656           3737          98          1.4         731.2       0.1X
+java.sql.Timestamp to Hive string                  6474           6727         243          0.8        1294.8       0.0X
+java.time.Instant to Hive string                   5303           5420         117          0.9        1060.6       0.1X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
index 8119824cb769f..c15fb78f2f165 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-jdk21-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  10287          10287           0          9.7         102.9       1.0X
-before 1582, noop                                  6015           6015           0         16.6          60.2       1.7X
-after 1582, rebase EXCEPTION                      19200          19200           0          5.2         192.0       0.5X
-after 1582, rebase LEGACY                         19267          19267           0          5.2         192.7       0.5X
-after 1582, rebase CORRECTED                      19175          19175           0          5.2         191.7       0.5X
-before 1582, rebase LEGACY                        15262          15262           0          6.6         152.6       0.7X
-before 1582, rebase CORRECTED                     15273          15273           0          6.5         152.7       0.7X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1582, noop                                  11193          11193           0          8.9         111.9       1.0X
+before 1582, noop                                  7070           7070           0         14.1          70.7       1.6X
+after 1582, rebase EXCEPTION                      19836          19836           0          5.0         198.4       0.6X
+after 1582, rebase LEGACY                         19368          19368           0          5.2         193.7       0.6X
+after 1582, rebase CORRECTED                      19627          19627           0          5.1         196.3       0.6X
+before 1582, rebase LEGACY                        16301          16301           0          6.1         163.0       0.7X
+before 1582, rebase CORRECTED                     15612          15612           0          6.4         156.1       0.7X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             11299          11345          61          8.9         113.0       1.0X
-after 1582, vec off, rebase LEGACY                12489          12538          43          8.0         124.9       0.9X
-after 1582, vec off, rebase CORRECTED             12542          12552          16          8.0         125.4       0.9X
-after 1582, vec on, rebase EXCEPTION               2429           2471          42         41.2          24.3       4.7X
-after 1582, vec on, rebase LEGACY                  2473           2500          33         40.4          24.7       4.6X
-after 1582, vec on, rebase CORRECTED               2462           2483          29         40.6          24.6       4.6X
-before 1582, vec off, rebase LEGACY               12815          12872          59          7.8         128.2       0.9X
-before 1582, vec off, rebase CORRECTED            12553          12596          40          8.0         125.5       0.9X
-before 1582, vec on, rebase LEGACY                 2771           2802          29         36.1          27.7       4.1X
-before 1582, vec on, rebase CORRECTED              2403           2447          39         41.6          24.0       4.7X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1582, vec off, rebase EXCEPTION             11601          11657          95          8.6         116.0       1.0X
+after 1582, vec off, rebase LEGACY                11671          11751          72          8.6         116.7       1.0X
+after 1582, vec off, rebase CORRECTED             11593          11632          67          8.6         115.9       1.0X
+after 1582, vec on, rebase EXCEPTION               2394           2427          33         41.8          23.9       4.8X
+after 1582, vec on, rebase LEGACY                  2466           2489          38         40.6          24.7       4.7X
+after 1582, vec on, rebase CORRECTED               2487           2500          18         40.2          24.9       4.7X
+before 1582, vec off, rebase LEGACY               11937          11951          14          8.4         119.4       1.0X
+before 1582, vec off, rebase CORRECTED            11542          11600          60          8.7         115.4       1.0X
+before 1582, vec on, rebase LEGACY                 2708           2729          30         36.9          27.1       4.3X
+before 1582, vec on, rebase CORRECTED              2436           2445           8         41.1          24.4       4.8X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2413           2413           0         41.4          24.1       1.0X
-before 1900, noop                                  2439           2439           0         41.0          24.4       1.0X
-after 1900, rebase EXCEPTION                      12548          12548           0          8.0         125.5       0.2X
-after 1900, rebase LEGACY                         12525          12525           0          8.0         125.2       0.2X
-after 1900, rebase CORRECTED                      12548          12548           0          8.0         125.5       0.2X
-before 1900, rebase LEGACY                        14343          14343           0          7.0         143.4       0.2X
-before 1900, rebase CORRECTED                     12758          12758           0          7.8         127.6       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2403           2403           0         41.6          24.0       1.0X
+before 1900, noop                                  2443           2443           0         40.9          24.4       1.0X
+after 1900, rebase EXCEPTION                      12805          12805           0          7.8         128.1       0.2X
+after 1900, rebase LEGACY                         12529          12529           0          8.0         125.3       0.2X
+after 1900, rebase CORRECTED                      12474          12474           0          8.0         124.7       0.2X
+before 1900, rebase LEGACY                        14628          14628           0          6.8         146.3       0.2X
+before 1900, rebase CORRECTED                     12601          12601           0          7.9         126.0       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15626          15663          37          6.4         156.3       1.0X
-after 1900, vec off, rebase LEGACY                16695          16750          47          6.0         167.0       0.9X
-after 1900, vec off, rebase CORRECTED             15958          16047          77          6.3         159.6       1.0X
-after 1900, vec on, rebase EXCEPTION               4039           4056          20         24.8          40.4       3.9X
-after 1900, vec on, rebase LEGACY                  4113           4132          27         24.3          41.1       3.8X
-after 1900, vec on, rebase CORRECTED               4062           4071           9         24.6          40.6       3.8X
-before 1900, vec off, rebase LEGACY               18025          18067          48          5.5         180.3       0.9X
-before 1900, vec off, rebase CORRECTED            16044          16064          23          6.2         160.4       1.0X
-before 1900, vec on, rebase LEGACY                 6302           6317          22         15.9          63.0       2.5X
-before 1900, vec on, rebase CORRECTED              4041           4061          18         24.7          40.4       3.9X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1900, vec off, rebase EXCEPTION             15545          15658          99          6.4         155.4       1.0X
+after 1900, vec off, rebase LEGACY                15927          15945          25          6.3         159.3       1.0X
+after 1900, vec off, rebase CORRECTED             15558          15620          54          6.4         155.6       1.0X
+after 1900, vec on, rebase EXCEPTION               4050           4074          34         24.7          40.5       3.8X
+after 1900, vec on, rebase LEGACY                  4024           4059          32         24.9          40.2       3.9X
+after 1900, vec on, rebase CORRECTED               4062           4074          17         24.6          40.6       3.8X
+before 1900, vec off, rebase LEGACY               18219          18234          22          5.5         182.2       0.9X
+before 1900, vec off, rebase CORRECTED            15584          15633          45          6.4         155.8       1.0X
+before 1900, vec on, rebase LEGACY                 6080           6106          23         16.4          60.8       2.6X
+before 1900, vec on, rebase CORRECTED              4045           4057          14         24.7          40.4       3.8X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2449           2449           0         40.8          24.5       1.0X
-before 1900, noop                                  2448           2448           0         40.8          24.5       1.0X
-after 1900, rebase EXCEPTION                      11787          11787           0          8.5         117.9       0.2X
-after 1900, rebase LEGACY                         11894          11894           0          8.4         118.9       0.2X
-after 1900, rebase CORRECTED                      11807          11807           0          8.5         118.1       0.2X
-before 1900, rebase LEGACY                        13934          13934           0          7.2         139.3       0.2X
-before 1900, rebase CORRECTED                     11771          11771           0          8.5         117.7       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2378           2378           0         42.0          23.8       1.0X
+before 1900, noop                                  2426           2426           0         41.2          24.3       1.0X
+after 1900, rebase EXCEPTION                      14475          14475           0          6.9         144.8       0.2X
+after 1900, rebase LEGACY                         13685          13685           0          7.3         136.8       0.2X
+after 1900, rebase CORRECTED                      13448          13448           0          7.4         134.5       0.2X
+before 1900, rebase LEGACY                        15085          15085           0          6.6         150.8       0.2X
+before 1900, rebase CORRECTED                     13668          13668           0          7.3         136.7       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14723          14750          35          6.8         147.2       1.0X
-after 1900, vec off, rebase LEGACY                14871          14964         115          6.7         148.7       1.0X
-after 1900, vec off, rebase CORRECTED             14771          14797          27          6.8         147.7       1.0X
-after 1900, vec on, rebase EXCEPTION               3748           3753           8         26.7          37.5       3.9X
-after 1900, vec on, rebase LEGACY                  3754           3767          11         26.6          37.5       3.9X
-after 1900, vec on, rebase CORRECTED               3737           3756          24         26.8          37.4       3.9X
-before 1900, vec off, rebase LEGACY               17307          17328          31          5.8         173.1       0.9X
-before 1900, vec off, rebase CORRECTED            14712          14757          76          6.8         147.1       1.0X
-before 1900, vec on, rebase LEGACY                 5700           5718          16         17.5          57.0       2.6X
-before 1900, vec on, rebase CORRECTED              3734           3773          34         26.8          37.3       3.9X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1900, vec off, rebase EXCEPTION             14595          14621          23          6.9         146.0       1.0X
+after 1900, vec off, rebase LEGACY                14689          14699          12          6.8         146.9       1.0X
+after 1900, vec off, rebase CORRECTED             14626          14648          25          6.8         146.3       1.0X
+after 1900, vec on, rebase EXCEPTION               3732           3745          14         26.8          37.3       3.9X
+after 1900, vec on, rebase LEGACY                  3753           3771          29         26.6          37.5       3.9X
+after 1900, vec on, rebase CORRECTED               3714           3734          23         26.9          37.1       3.9X
+before 1900, vec off, rebase LEGACY               17073          17151         107          5.9         170.7       0.9X
+before 1900, vec off, rebase CORRECTED            14575          14613          33          6.9         145.8       1.0X
+before 1900, vec on, rebase LEGACY                 5581           5602          34         17.9          55.8       2.6X
+before 1900, vec on, rebase CORRECTED              3680           3698          30         27.2          36.8       4.0X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2380           2380           0         42.0          23.8       1.0X
-before 1900, noop                                  2378           2378           0         42.1          23.8       1.0X
-after 1900, rebase EXCEPTION                      11216          11216           0          8.9         112.2       0.2X
-after 1900, rebase LEGACY                         11924          11924           0          8.4         119.2       0.2X
-after 1900, rebase CORRECTED                      12410          12410           0          8.1         124.1       0.2X
-before 1900, rebase LEGACY                        13779          13779           0          7.3         137.8       0.2X
-before 1900, rebase CORRECTED                     11367          11367           0          8.8         113.7       0.2X
-
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2418           2418           0         41.4          24.2       1.0X
+before 1900, noop                                  2413           2413           0         41.5          24.1       1.0X
+after 1900, rebase EXCEPTION                      11749          11749           0          8.5         117.5       0.2X
+after 1900, rebase LEGACY                         11757          11757           0          8.5         117.6       0.2X
+after 1900, rebase CORRECTED                      12081          12081           0          8.3         120.8       0.2X
+before 1900, rebase LEGACY                        13503          13503           0          7.4         135.0       0.2X
+before 1900, rebase CORRECTED                     11649          11649           0          8.6         116.5       0.2X
+
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14340          14389          75          7.0         143.4       1.0X
-after 1900, vec off, rebase LEGACY                14608          14627          23          6.8         146.1       1.0X
-after 1900, vec off, rebase CORRECTED             14466          14528          60          6.9         144.7       1.0X
-after 1900, vec on, rebase EXCEPTION               4894           4932          38         20.4          48.9       2.9X
-after 1900, vec on, rebase LEGACY                  4569           4593          28         21.9          45.7       3.1X
-after 1900, vec on, rebase CORRECTED               4918           4943          23         20.3          49.2       2.9X
-before 1900, vec off, rebase LEGACY               16912          16926          15          5.9         169.1       0.8X
-before 1900, vec off, rebase CORRECTED            14495          14499           4          6.9         144.9       1.0X
-before 1900, vec on, rebase LEGACY                 6265           6307          41         16.0          62.7       2.3X
-before 1900, vec on, rebase CORRECTED              4892           4930          38         20.4          48.9       2.9X
+after 1900, vec off, rebase EXCEPTION             14322          14376          53          7.0         143.2       1.0X
+after 1900, vec off, rebase LEGACY                14453          14478          22          6.9         144.5       1.0X
+after 1900, vec off, rebase CORRECTED             14429          14523          94          6.9         144.3       1.0X
+after 1900, vec on, rebase EXCEPTION               4876           4914          42         20.5          48.8       2.9X
+after 1900, vec on, rebase LEGACY                  4418           4465          41         22.6          44.2       3.2X
+after 1900, vec on, rebase CORRECTED               4876           4909          50         20.5          48.8       2.9X
+before 1900, vec off, rebase LEGACY               17196          17238          45          5.8         172.0       0.8X
+before 1900, vec off, rebase CORRECTED            14462          14509          57          6.9         144.6       1.0X
+before 1900, vec on, rebase LEGACY                 6120           6135          16         16.3          61.2       2.3X
+before 1900, vec on, rebase CORRECTED              4887           4929          42         20.5          48.9       2.9X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  11339          11339           0          8.8         113.4       1.0X
-before 1582, noop                                  5604           5604           0         17.8          56.0       2.0X
-after 1582                                        15457          15457           0          6.5         154.6       0.7X
-before 1582                                       10049          10049           0         10.0         100.5       1.1X
+after 1582, noop                                  12614          12614           0          7.9         126.1       1.0X
+before 1582, noop                                  6620           6620           0         15.1          66.2       1.9X
+after 1582                                        17066          17066           0          5.9         170.7       0.7X
+before 1582                                       10573          10573           0          9.5         105.7       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8478           8853         595         11.8          84.8       1.0X
-after 1582, vec on                                 2380           2428          60         42.0          23.8       3.6X
-before 1582, vec off                               8570           8583          14         11.7          85.7       1.0X
-before 1582, vec on                                2510           2518           7         39.8          25.1       3.4X
+after 1582, vec off                                8422           8523         131         11.9          84.2       1.0X
+after 1582, vec on                                 2386           2401          20         41.9          23.9       3.5X
+before 1582, vec off                               8447           8474          42         11.8          84.5       1.0X
+before 1582, vec on                                2526           2542          24         39.6          25.3       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2275           2275           0         44.0          22.7       1.0X
-before 1900, noop                                  2288           2288           0         43.7          22.9       1.0X
-after 1900                                         9472           9472           0         10.6          94.7       0.2X
-before 1900                                       11470          11470           0          8.7         114.7       0.2X
+after 1900, noop                                   2336           2336           0         42.8          23.4       1.0X
+before 1900, noop                                  2309           2309           0         43.3          23.1       1.0X
+after 1900                                         9646           9646           0         10.4          96.5       0.2X
+before 1900                                       12150          12150           0          8.2         121.5       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                                9871           9914          39         10.1          98.7       1.0X
-after 1900, vec on                                 4138           4153          13         24.2          41.4       2.4X
-before 1900, vec off                              11828          11874          53          8.5         118.3       0.8X
-before 1900, vec on                                5976           5984          13         16.7          59.8       1.7X
+after 1900, vec off                                9781           9845          86         10.2          97.8       1.0X
+after 1900, vec on                                 3778           3792          13         26.5          37.8       2.6X
+before 1900, vec off                              11757          11781          21          8.5         117.6       0.8X
+before 1900, vec on                                5490           5511          21         18.2          54.9       1.8X
 
 
diff --git a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
index 39d679bd8b1d9..249b478e772a8 100644
--- a/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
+++ b/sql/core/benchmarks/DateTimeRebaseBenchmark-results.txt
@@ -2,153 +2,153 @@
 Rebasing dates/timestamps in Parquet datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to parquet:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  12846          12846           0          7.8         128.5       1.0X
-before 1582, noop                                  7874           7874           0         12.7          78.7       1.6X
-after 1582, rebase EXCEPTION                      20968          20968           0          4.8         209.7       0.6X
-after 1582, rebase LEGACY                         20802          20802           0          4.8         208.0       0.6X
-after 1582, rebase CORRECTED                      21961          21961           0          4.6         219.6       0.6X
-before 1582, rebase LEGACY                        16440          16440           0          6.1         164.4       0.8X
-before 1582, rebase CORRECTED                     16324          16324           0          6.1         163.2       0.8X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1582, noop                                  11170          11170           0          9.0         111.7       1.0X
+before 1582, noop                                  6505           6505           0         15.4          65.0       1.7X
+after 1582, rebase EXCEPTION                      19873          19873           0          5.0         198.7       0.6X
+after 1582, rebase LEGACY                         19726          19726           0          5.1         197.3       0.6X
+after 1582, rebase CORRECTED                      19931          19931           0          5.0         199.3       0.6X
+before 1582, rebase LEGACY                        15590          15590           0          6.4         155.9       0.7X
+before 1582, rebase CORRECTED                     15523          15523           0          6.4         155.2       0.7X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from parquet:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off, rebase EXCEPTION             11932          12128         170          8.4         119.3       1.0X
-after 1582, vec off, rebase LEGACY                11902          11976          67          8.4         119.0       1.0X
-after 1582, vec off, rebase CORRECTED             11866          11900          59          8.4         118.7       1.0X
-after 1582, vec on, rebase EXCEPTION               2554           2578          39         39.2          25.5       4.7X
-after 1582, vec on, rebase LEGACY                  2550           2599          84         39.2          25.5       4.7X
-after 1582, vec on, rebase CORRECTED               2529           2548          19         39.5          25.3       4.7X
-before 1582, vec off, rebase LEGACY               12073          12082          10          8.3         120.7       1.0X
-before 1582, vec off, rebase CORRECTED            11835          11890          47          8.4         118.4       1.0X
-before 1582, vec on, rebase LEGACY                 2809           2829          19         35.6          28.1       4.2X
-before 1582, vec on, rebase CORRECTED              2487           2509          21         40.2          24.9       4.8X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1582, vec off, rebase EXCEPTION             11571          11596          22          8.6         115.7       1.0X
+after 1582, vec off, rebase LEGACY                11892          11909          27          8.4         118.9       1.0X
+after 1582, vec off, rebase CORRECTED             11681          11724          47          8.6         116.8       1.0X
+after 1582, vec on, rebase EXCEPTION               2516           2530          13         39.7          25.2       4.6X
+after 1582, vec on, rebase LEGACY                  2555           2563           8         39.1          25.5       4.5X
+after 1582, vec on, rebase CORRECTED               2487           2503          22         40.2          24.9       4.7X
+before 1582, vec off, rebase LEGACY               11947          11996          69          8.4         119.5       1.0X
+before 1582, vec off, rebase CORRECTED            11792          11821          41          8.5         117.9       1.0X
+before 1582, vec on, rebase LEGACY                 2826           2856          25         35.4          28.3       4.1X
+before 1582, vec on, rebase CORRECTED              2465           2489          21         40.6          24.6       4.7X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_INT96 to parquet:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2307           2307           0         43.3          23.1       1.0X
-before 1900, noop                                  2414           2414           0         41.4          24.1       1.0X
-after 1900, rebase EXCEPTION                      13251          13251           0          7.5         132.5       0.2X
-after 1900, rebase LEGACY                         13218          13218           0          7.6         132.2       0.2X
-after 1900, rebase CORRECTED                      12495          12495           0          8.0         124.9       0.2X
-before 1900, rebase LEGACY                        14825          14825           0          6.7         148.2       0.2X
-before 1900, rebase CORRECTED                     12741          12741           0          7.8         127.4       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2340           2340           0         42.7          23.4       1.0X
+before 1900, noop                                  2284           2284           0         43.8          22.8       1.0X
+after 1900, rebase EXCEPTION                      13230          13230           0          7.6         132.3       0.2X
+after 1900, rebase LEGACY                         13238          13238           0          7.6         132.4       0.2X
+after 1900, rebase CORRECTED                      13264          13264           0          7.5         132.6       0.2X
+before 1900, rebase LEGACY                        15216          15216           0          6.6         152.2       0.2X
+before 1900, rebase CORRECTED                     13382          13382           0          7.5         133.8       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_INT96 from parquet:        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             14943          14997          65          6.7         149.4       1.0X
-after 1900, vec off, rebase LEGACY                15155          15243         101          6.6         151.6       1.0X
-after 1900, vec off, rebase CORRECTED             14988          15008          20          6.7         149.9       1.0X
-after 1900, vec on, rebase EXCEPTION               5430           5440          11         18.4          54.3       2.8X
-after 1900, vec on, rebase LEGACY                  5446           5458          11         18.4          54.5       2.7X
-after 1900, vec on, rebase CORRECTED               5409           5440          41         18.5          54.1       2.8X
-before 1900, vec off, rebase LEGACY               18150          18193          46          5.5         181.5       0.8X
-before 1900, vec off, rebase CORRECTED            15954          15969          16          6.3         159.5       0.9X
-before 1900, vec on, rebase LEGACY                 7145           7152           8         14.0          71.5       2.1X
-before 1900, vec on, rebase CORRECTED              5396           5408          11         18.5          54.0       2.8X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1900, vec off, rebase EXCEPTION             16170          16195          22          6.2         161.7       1.0X
+after 1900, vec off, rebase LEGACY                16720          16755          31          6.0         167.2       1.0X
+after 1900, vec off, rebase CORRECTED             16152          16213          54          6.2         161.5       1.0X
+after 1900, vec on, rebase EXCEPTION               4090           4101          18         24.4          40.9       4.0X
+after 1900, vec on, rebase LEGACY                  4114           4144          33         24.3          41.1       3.9X
+after 1900, vec on, rebase CORRECTED               4158           4191          28         24.0          41.6       3.9X
+before 1900, vec off, rebase LEGACY               18554          18584          31          5.4         185.5       0.9X
+before 1900, vec off, rebase CORRECTED            16192          16267          84          6.2         161.9       1.0X
+before 1900, vec on, rebase LEGACY                 6256           6271          22         16.0          62.6       2.6X
+before 1900, vec on, rebase CORRECTED              4074           4104          27         24.5          40.7       4.0X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MICROS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2344           2344           0         42.7          23.4       1.0X
-before 1900, noop                                  2360           2360           0         42.4          23.6       1.0X
-after 1900, rebase EXCEPTION                      11075          11075           0          9.0         110.7       0.2X
-after 1900, rebase LEGACY                         11018          11018           0          9.1         110.2       0.2X
-after 1900, rebase CORRECTED                      15681          15681           0          6.4         156.8       0.1X
-before 1900, rebase LEGACY                        13002          13002           0          7.7         130.0       0.2X
-before 1900, rebase CORRECTED                     11179          11179           0          8.9         111.8       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2390           2390           0         41.8          23.9       1.0X
+before 1900, noop                                  2291           2291           0         43.6          22.9       1.0X
+after 1900, rebase EXCEPTION                      12537          12537           0          8.0         125.4       0.2X
+after 1900, rebase LEGACY                         12047          12047           0          8.3         120.5       0.2X
+after 1900, rebase CORRECTED                      12151          12151           0          8.2         121.5       0.2X
+before 1900, rebase LEGACY                        13960          13960           0          7.2         139.6       0.2X
+before 1900, rebase CORRECTED                     11985          11985           0          8.3         119.9       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MICROS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15439          15460          29          6.5         154.4       1.0X
-after 1900, vec off, rebase LEGACY                15345          15375          44          6.5         153.5       1.0X
-after 1900, vec off, rebase CORRECTED             15418          15470          85          6.5         154.2       1.0X
-after 1900, vec on, rebase EXCEPTION               3850           3895          51         26.0          38.5       4.0X
-after 1900, vec on, rebase LEGACY                  3862           3896          31         25.9          38.6       4.0X
-after 1900, vec on, rebase CORRECTED               3827           3846          20         26.1          38.3       4.0X
-before 1900, vec off, rebase LEGACY               17672          17726          48          5.7         176.7       0.9X
-before 1900, vec off, rebase CORRECTED            15368          15407          45          6.5         153.7       1.0X
-before 1900, vec on, rebase LEGACY                 5715           5729          14         17.5          57.1       2.7X
-before 1900, vec on, rebase CORRECTED              3809           3872          63         26.3          38.1       4.1X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1900, vec off, rebase EXCEPTION             14977          15018          36          6.7         149.8       1.0X
+after 1900, vec off, rebase LEGACY                14924          14960          33          6.7         149.2       1.0X
+after 1900, vec off, rebase CORRECTED             14965          14994          26          6.7         149.7       1.0X
+after 1900, vec on, rebase EXCEPTION               3810           3819           8         26.2          38.1       3.9X
+after 1900, vec on, rebase LEGACY                  3829           3835           8         26.1          38.3       3.9X
+after 1900, vec on, rebase CORRECTED               3785           3837          47         26.4          37.9       4.0X
+before 1900, vec off, rebase LEGACY               17323          17343          19          5.8         173.2       0.9X
+before 1900, vec off, rebase CORRECTED            14933          14962          26          6.7         149.3       1.0X
+before 1900, vec on, rebase LEGACY                 5763           5783          17         17.4          57.6       2.6X
+before 1900, vec on, rebase CORRECTED              3798           3817          32         26.3          38.0       3.9X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP_MILLIS to parquet:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2350           2350           0         42.6          23.5       1.0X
-before 1900, noop                                  2285           2285           0         43.8          22.8       1.0X
-after 1900, rebase EXCEPTION                      10977          10977           0          9.1         109.8       0.2X
-after 1900, rebase LEGACY                         10489          10489           0          9.5         104.9       0.2X
-after 1900, rebase CORRECTED                      10558          10558           0          9.5         105.6       0.2X
-before 1900, rebase LEGACY                        12991          12991           0          7.7         129.9       0.2X
-before 1900, rebase CORRECTED                     10591          10591           0          9.4         105.9       0.2X
-
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+after 1900, noop                                   2309           2309           0         43.3          23.1       1.0X
+before 1900, noop                                  2358           2358           0         42.4          23.6       1.0X
+after 1900, rebase EXCEPTION                      11266          11266           0          8.9         112.7       0.2X
+after 1900, rebase LEGACY                         11582          11582           0          8.6         115.8       0.2X
+after 1900, rebase CORRECTED                      11555          11555           0          8.7         115.5       0.2X
+before 1900, rebase LEGACY                        13600          13600           0          7.4         136.0       0.2X
+before 1900, rebase CORRECTED                     12113          12113           0          8.3         121.1       0.2X
+
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP_MILLIS from parquet:       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off, rebase EXCEPTION             15242          15326          82          6.6         152.4       1.0X
-after 1900, vec off, rebase LEGACY                15273          15325          51          6.5         152.7       1.0X
-after 1900, vec off, rebase CORRECTED             15273          15291          19          6.5         152.7       1.0X
-after 1900, vec on, rebase EXCEPTION               3942           3964          23         25.4          39.4       3.9X
-after 1900, vec on, rebase LEGACY                  4558           4595          33         21.9          45.6       3.3X
-after 1900, vec on, rebase CORRECTED               3912           3949          39         25.6          39.1       3.9X
-before 1900, vec off, rebase LEGACY               17593          17643          44          5.7         175.9       0.9X
-before 1900, vec off, rebase CORRECTED            15336          15346          11          6.5         153.4       1.0X
-before 1900, vec on, rebase LEGACY                 6310           6332          37         15.8          63.1       2.4X
-before 1900, vec on, rebase CORRECTED              3947           3956           8         25.3          39.5       3.9X
+after 1900, vec off, rebase EXCEPTION             15076          15125          46          6.6         150.8       1.0X
+after 1900, vec off, rebase LEGACY                15480          15491          16          6.5         154.8       1.0X
+after 1900, vec off, rebase CORRECTED             15171          15189          17          6.6         151.7       1.0X
+after 1900, vec on, rebase EXCEPTION               3976           4001          28         25.2          39.8       3.8X
+after 1900, vec on, rebase LEGACY                  4582           4609          46         21.8          45.8       3.3X
+after 1900, vec on, rebase CORRECTED               3934           3953          29         25.4          39.3       3.8X
+before 1900, vec off, rebase LEGACY               17602          17644          37          5.7         176.0       0.9X
+before 1900, vec off, rebase CORRECTED            15201          15238          34          6.6         152.0       1.0X
+before 1900, vec on, rebase LEGACY                 6306           6311           6         15.9          63.1       2.4X
+before 1900, vec on, rebase CORRECTED              3926           3961          50         25.5          39.3       3.8X
 
 
 ================================================================================================
 Rebasing dates/timestamps in ORC datasource
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save DATE to ORC:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, noop                                  12923          12923           0          7.7         129.2       1.0X
-before 1582, noop                                  7980           7980           0         12.5          79.8       1.6X
-after 1582                                        16656          16656           0          6.0         166.6       0.8X
-before 1582                                       11823          11823           0          8.5         118.2       1.1X
+after 1582, noop                                  11208          11208           0          8.9         112.1       1.0X
+before 1582, noop                                  6567           6567           0         15.2          65.7       1.7X
+after 1582                                        15130          15130           0          6.6         151.3       0.7X
+before 1582                                       10992          10992           0          9.1         109.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load DATE from ORC:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1582, vec off                                8836           8854          18         11.3          88.4       1.0X
-after 1582, vec on                                 2492           2520          24         40.1          24.9       3.5X
-before 1582, vec off                               8903           8931          25         11.2          89.0       1.0X
-before 1582, vec on                                2644           2652           9         37.8          26.4       3.3X
+after 1582, vec off                                8770           8777           8         11.4          87.7       1.0X
+after 1582, vec on                                 2445           2478          31         40.9          24.5       3.6X
+before 1582, vec off                               8820           8896         106         11.3          88.2       1.0X
+before 1582, vec on                                2580           2615          37         38.8          25.8       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Save TIMESTAMP to ORC:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, noop                                   2310           2310           0         43.3          23.1       1.0X
-before 1900, noop                                  2237           2237           0         44.7          22.4       1.0X
-after 1900                                         9656           9656           0         10.4          96.6       0.2X
-before 1900                                       11859          11859           0          8.4         118.6       0.2X
+after 1900, noop                                   2221           2221           0         45.0          22.2       1.0X
+before 1900, noop                                  2218           2218           0         45.1          22.2       1.0X
+after 1900                                         9916           9916           0         10.1          99.2       0.2X
+before 1900                                       12130          12130           0          8.2         121.3       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Load TIMESTAMP from ORC:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-after 1900, vec off                               10102          10150          47          9.9         101.0       1.0X
-after 1900, vec on                                 4274           4296          20         23.4          42.7       2.4X
-before 1900, vec off                              12042          12119          76          8.3         120.4       0.8X
-before 1900, vec on                                5850           5859          13         17.1          58.5       1.7X
+after 1900, vec off                               10569          11038         802          9.5         105.7       1.0X
+after 1900, vec on                                 4361           4415          55         22.9          43.6       2.4X
+before 1900, vec off                              12223          12227           7          8.2         122.2       0.9X
+before 1900, vec on                                6103           6136          30         16.4          61.0       1.7X
 
 
diff --git a/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
index 0a6164bc652e1..d74eb426cf341 100644
--- a/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/EncodeBenchmark-jdk21-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF-32                                            47469          47482          19          0.2        4746.9       1.0X
-UTF-16                                            57463          57487          35          0.2        5746.3       0.8X
-UTF-8                                              2803           2805           3          3.6         280.3      16.9X
+UTF-32                                            47715          47833         167          0.2        4771.5       1.0X
+UTF-16                                            57379          57408          42          0.2        5737.9       0.8X
+UTF-8                                              2840           2872          45          3.5         284.0      16.8X
 
diff --git a/sql/core/benchmarks/EncodeBenchmark-results.txt b/sql/core/benchmarks/EncodeBenchmark-results.txt
index 404138db7d36d..5fdbbf72d7e77 100644
--- a/sql/core/benchmarks/EncodeBenchmark-results.txt
+++ b/sql/core/benchmarks/EncodeBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 encode:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UTF-32                                            31107          31205         138          0.3        3110.7       1.0X
-UTF-16                                            47904          47934          43          0.2        4790.4       0.6X
-UTF-8                                              2957           2978          30          3.4         295.7      10.5X
+UTF-32                                            29962          30019          81          0.3        2996.2       1.0X
+UTF-16                                            47699          47702           3          0.2        4769.9       0.6X
+UTF-8                                              3112           3154          59          3.2         311.2       9.6X
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
index 08cd0f2c47f86..08f3d54f5ae81 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2445           2451          10         41.9          23.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3464           3489          36         29.6          33.8       0.7X
+ArrayBuffer                                        2456           2456           0         41.7          24.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3572           3595          33         28.7          34.9       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5292           5328          50         49.5          20.2       1.0X
-ExternalAppendOnlyUnsafeRowArray                  11921          11927           9         22.0          45.5       0.4X
+ArrayBuffer                                        5511           5519          11         47.6          21.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12331          12382          73         21.3          47.0       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       10418          10422           6         47.2          21.2       1.0X
-ExternalAppendOnlyUnsafeRowArray                  16589          16692         145         29.6          33.8       0.6X
+ArrayBuffer                                       10731          10759          39         45.8          21.8       1.0X
+ExternalAppendOnlyUnsafeRowArray                  18516          18568          72         26.5          37.7       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8507           8542          50         30.8          32.5       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6301           6314          18         41.6          24.0       1.4X
+UnsafeExternalSorter                               8284           8328          63         31.6          31.6       1.0X
+ExternalAppendOnlyUnsafeRowArray                   6615           6624          14         39.6          25.2       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           0         33.0          30.3       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         40.1          24.9       1.2X
+UnsafeExternalSorter                                  5              5           0         32.8          30.5       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         38.5          26.0       1.2X
 
 
diff --git a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
index 10af77fdd8bb2..ca447f9e97dbc 100644
--- a/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt
@@ -2,44 +2,44 @@
 WITHOUT SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 100000 rows:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        2330           2333           4         44.0          22.8       1.0X
-ExternalAppendOnlyUnsafeRowArray                   3306           3317          15         31.0          32.3       0.7X
+ArrayBuffer                                        2496           2499           4         41.0          24.4       1.0X
+ExternalAppendOnlyUnsafeRowArray                   3495           3513          24         29.3          34.1       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 1000 rows:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                        5594           5598           6         46.9          21.3       1.0X
-ExternalAppendOnlyUnsafeRowArray                  12278          12332          75         21.4          46.8       0.5X
+ArrayBuffer                                        5277           5284          10         49.7          20.1       1.0X
+ExternalAppendOnlyUnsafeRowArray                  12169          12171           3         21.5          46.4       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Array with 30000 rows:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-ArrayBuffer                                       10249          10252           4         48.0          20.9       1.0X
-ExternalAppendOnlyUnsafeRowArray                  16386          16397          16         30.0          33.3       0.6X
+ArrayBuffer                                       10107          10110           4         48.6          20.6       1.0X
+ExternalAppendOnlyUnsafeRowArray                  17021          17035          20         28.9          34.6       0.6X
 
 
 ================================================================================================
 WITH SPILL
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 1000 rows:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                               8294           8315          30         31.6          31.6       1.0X
-ExternalAppendOnlyUnsafeRowArray                   6767           6797          42         38.7          25.8       1.2X
+UnsafeExternalSorter                               8435           8499          89         31.1          32.2       1.0X
+ExternalAppendOnlyUnsafeRowArray                   7126           7131           6         36.8          27.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Spilling with 10000 rows:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-UnsafeExternalSorter                                  5              5           0         34.2          29.2       1.0X
-ExternalAppendOnlyUnsafeRowArray                      4              4           0         38.8          25.8       1.1X
+UnsafeExternalSorter                                  5              5           0         34.5          29.0       1.0X
+ExternalAppendOnlyUnsafeRowArray                      4              4           0         36.6          27.3       1.1X
 
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
index a1c284712c3d4..78df1f6557073 100644
--- a/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-jdk21-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   288            299          15         34.7          28.8       1.0X
-YEAR of timestamp                                   663            672           8         15.1          66.3       0.4X
-YEAROFWEEK of timestamp                             638            659          25         15.7          63.8       0.5X
-QUARTER of timestamp                                631            633           2         15.9          63.1       0.5X
-MONTH of timestamp                                  568            575           6         17.6          56.8       0.5X
-WEEK of timestamp                                   858            861           4         11.7          85.8       0.3X
-DAY of timestamp                                    573            576           2         17.4          57.3       0.5X
-DAYOFWEEK of timestamp                              745            748           5         13.4          74.5       0.4X
-DOW of timestamp                                    736            747          14         13.6          73.6       0.4X
-DOW_ISO of timestamp                                676            681           5         14.8          67.6       0.4X
-DAYOFWEEK_ISO of timestamp                          680            682           3         14.7          68.0       0.4X
-DOY of timestamp                                    591            598           6         16.9          59.1       0.5X
-HOUR of timestamp                                   474            479           4         21.1          47.4       0.6X
-MINUTE of timestamp                                 474            477           3         21.1          47.4       0.6X
-SECOND of timestamp                                 534            539           6         18.7          53.4       0.5X
+cast to timestamp                                   278            295          28         35.9          27.8       1.0X
+YEAR of timestamp                                   604            616          15         16.6          60.4       0.5X
+YEAROFWEEK of timestamp                             648            659          10         15.4          64.8       0.4X
+QUARTER of timestamp                                653            672          30         15.3          65.3       0.4X
+MONTH of timestamp                                  572            581          11         17.5          57.2       0.5X
+WEEK of timestamp                                   865            868           3         11.6          86.5       0.3X
+DAY of timestamp                                    576            583           9         17.4          57.6       0.5X
+DAYOFWEEK of timestamp                              755            759           7         13.3          75.5       0.4X
+DOW of timestamp                                    751            775          39         13.3          75.1       0.4X
+DOW_ISO of timestamp                                709            716           6         14.1          70.9       0.4X
+DAYOFWEEK_ISO of timestamp                          708            709           1         14.1          70.8       0.4X
+DOY of timestamp                                    603            614          18         16.6          60.3       0.5X
+HOUR of timestamp                                   475            479           3         21.1          47.5       0.6X
+MINUTE of timestamp                                 479            479           1         20.9          47.9       0.6X
+SECOND of timestamp                                 533            536           3         18.7          53.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   246            248           2         40.7          24.6       1.0X
-YEAR of timestamp                                   532            533           1         18.8          53.2       0.5X
-YEAROFWEEK of timestamp                             613            616           3         16.3          61.3       0.4X
-QUARTER of timestamp                                615            620           6         16.3          61.5       0.4X
-MONTH of timestamp                                  563            564           1         17.8          56.3       0.4X
-WEEK of timestamp                                   851            855           5         11.8          85.1       0.3X
-DAY of timestamp                                    567            568           1         17.6          56.7       0.4X
-DAYOFWEEK of timestamp                              731            738           8         13.7          73.1       0.3X
-DOW of timestamp                                    730            734           5         13.7          73.0       0.3X
-DOW_ISO of timestamp                                668            668           1         15.0          66.8       0.4X
-DAYOFWEEK_ISO of timestamp                          666            678          10         15.0          66.6       0.4X
-DOY of timestamp                                    586            591           5         17.1          58.6       0.4X
-HOUR of timestamp                                   471            472           2         21.2          47.1       0.5X
-MINUTE of timestamp                                 473            478           5         21.1          47.3       0.5X
-SECOND of timestamp                                 533            534           2         18.8          53.3       0.5X
+cast to timestamp                                   251            251           1         39.9          25.1       1.0X
+YEAR of timestamp                                   537            539           2         18.6          53.7       0.5X
+YEAROFWEEK of timestamp                             624            633           8         16.0          62.4       0.4X
+QUARTER of timestamp                                634            635           1         15.8          63.4       0.4X
+MONTH of timestamp                                  556            564          10         18.0          55.6       0.5X
+WEEK of timestamp                                   854            859           4         11.7          85.4       0.3X
+DAY of timestamp                                    572            579          11         17.5          57.2       0.4X
+DAYOFWEEK of timestamp                              741            747           5         13.5          74.1       0.3X
+DOW of timestamp                                    741            743           2         13.5          74.1       0.3X
+DOW_ISO of timestamp                                703            704           1         14.2          70.3       0.4X
+DAYOFWEEK_ISO of timestamp                          701            701           1         14.3          70.1       0.4X
+DOY of timestamp                                    592            595           3         16.9          59.2       0.4X
+HOUR of timestamp                                   474            476           2         21.1          47.4       0.5X
+MINUTE of timestamp                                 476            479           5         21.0          47.6       0.5X
+SECOND of timestamp                                 528            530           2         18.9          52.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        511            514           3         19.6          51.1       1.0X
-YEAR of date                                        528            535           6         18.9          52.8       1.0X
-YEAROFWEEK of date                                  610            615           7         16.4          61.0       0.8X
-QUARTER of date                                     609            613           6         16.4          60.9       0.8X
-MONTH of date                                       559            560           1         17.9          55.9       0.9X
-WEEK of date                                        849            859          10         11.8          84.9       0.6X
-DAY of date                                         564            568           4         17.7          56.4       0.9X
-DAYOFWEEK of date                                   696            715          19         14.4          69.6       0.7X
-DOW of date                                         692            693           1         14.4          69.2       0.7X
-DOW_ISO of date                                     628            634           7         15.9          62.8       0.8X
-DAYOFWEEK_ISO of date                               629            633           7         15.9          62.9       0.8X
-DOY of date                                         550            556           7         18.2          55.0       0.9X
-HOUR of date                                        952            955           3         10.5          95.2       0.5X
-MINUTE of date                                      953            962          12         10.5          95.3       0.5X
-SECOND of date                                     1027           1031           6          9.7         102.7       0.5X
+cast to date                                        523            526           4         19.1          52.3       1.0X
+YEAR of date                                        533            535           3         18.8          53.3       1.0X
+YEAROFWEEK of date                                  628            639          17         15.9          62.8       0.8X
+QUARTER of date                                     629            631           3         15.9          62.9       0.8X
+MONTH of date                                       566            577          18         17.7          56.6       0.9X
+WEEK of date                                        859            872          21         11.6          85.9       0.6X
+DAY of date                                         572            590          18         17.5          57.2       0.9X
+DAYOFWEEK of date                                   741            746           9         13.5          74.1       0.7X
+DOW of date                                         740            766          45         13.5          74.0       0.7X
+DOW_ISO of date                                     700            707          10         14.3          70.0       0.7X
+DAYOFWEEK_ISO of date                               698            703           7         14.3          69.8       0.7X
+DOY of date                                         592            596           5         16.9          59.2       0.9X
+HOUR of date                                        993           1014          24         10.1          99.3       0.5X
+MINUTE of date                                      995           1003          10         10.0          99.5       0.5X
+SECOND of date                                     1058           1058           0          9.5         105.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        481            484           5         20.8          48.1       1.0X
-YEAR of date                                        489            495           5         20.5          48.9       1.0X
-YEAROFWEEK of date                                  569            574           5         17.6          56.9       0.8X
-QUARTER of date                                     573            574           1         17.5          57.3       0.8X
-MONTH of date                                       515            518           3         19.4          51.5       0.9X
-WEEK of date                                        816            818           3         12.3          81.6       0.6X
-DAY of date                                         528            528           0         18.9          52.8       0.9X
-DAYOFWEEK of date                                   694            706          18         14.4          69.4       0.7X
-DOW of date                                         692            693           2         14.4          69.2       0.7X
-DOW_ISO of date                                     628            630           3         15.9          62.8       0.8X
-DAYOFWEEK_ISO of date                               628            635           7         15.9          62.8       0.8X
-DOY of date                                         551            557           7         18.2          55.1       0.9X
-HOUR of date                                        954            961           9         10.5          95.4       0.5X
-MINUTE of date                                      954            955           3         10.5          95.4       0.5X
-SECOND of date                                     1034           1039           9          9.7         103.4       0.5X
+cast to date                                        525            574          83         19.1          52.5       1.0X
+YEAR of date                                        539            540           1         18.6          53.9       1.0X
+YEAROFWEEK of date                                  628            631           5         15.9          62.8       0.8X
+QUARTER of date                                     629            640          15         15.9          62.9       0.8X
+MONTH of date                                       553            555           2         18.1          55.3       0.9X
+WEEK of date                                        850            852           1         11.8          85.0       0.6X
+DAY of date                                         568            574          10         17.6          56.8       0.9X
+DAYOFWEEK of date                                   740            741           1         13.5          74.0       0.7X
+DOW of date                                         739            746           6         13.5          73.9       0.7X
+DOW_ISO of date                                     699            703           4         14.3          69.9       0.8X
+DAYOFWEEK_ISO of date                               699            700           1         14.3          69.9       0.8X
+DOY of date                                         590            592           3         17.0          59.0       0.9X
+HOUR of date                                        991            992           0         10.1          99.1       0.5X
+MINUTE of date                                      989            990           1         10.1          98.9       0.5X
+SECOND of date                                     1058           1062           5          9.4         105.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    705            710           4         14.2          70.5       1.0X
-YEAR of interval                                    671            673           3         14.9          67.1       1.1X
-MONTH of interval                                   679            686           9         14.7          67.9       1.0X
-DAY of interval                                     674            678           6         14.8          67.4       1.0X
-HOUR of interval                                    680            684           4         14.7          68.0       1.0X
-MINUTE of interval                                  682            688           6         14.7          68.2       1.0X
-SECOND of interval                                  736            741           5         13.6          73.6       1.0X
+cast to interval                                    738            741           2         13.5          73.8       1.0X
+YEAR of interval                                    718            721           4         13.9          71.8       1.0X
+MONTH of interval                                   721            725           3         13.9          72.1       1.0X
+DAY of interval                                     718            722           4         13.9          71.8       1.0X
+HOUR of interval                                    730            733           4         13.7          73.0       1.0X
+MINUTE of interval                                  724            728           3         13.8          72.4       1.0X
+SECOND of interval                                  775            785          13         12.9          77.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                    708            709           0         14.1          70.8       1.0X
-YEAR of interval                                    674            677           3         14.8          67.4       1.1X
-MONTH of interval                                   675            677           2         14.8          67.5       1.0X
-DAY of interval                                     670            671           3         14.9          67.0       1.1X
-HOUR of interval                                    681            683           2         14.7          68.1       1.0X
-MINUTE of interval                                  685            686           2         14.6          68.5       1.0X
-SECOND of interval                                  725            735          13         13.8          72.5       1.0X
+cast to interval                                    742            745           4         13.5          74.2       1.0X
+YEAR of interval                                    724            725           0         13.8          72.4       1.0X
+MONTH of interval                                   722            724           2         13.9          72.2       1.0X
+DAY of interval                                     728            730           1         13.7          72.8       1.0X
+HOUR of interval                                    731            739           8         13.7          73.1       1.0X
+MINUTE of interval                                  733            740          11         13.6          73.3       1.0X
+SECOND of interval                                  785            800          16         12.7          78.5       0.9X
 
diff --git a/sql/core/benchmarks/ExtractBenchmark-results.txt b/sql/core/benchmarks/ExtractBenchmark-results.txt
index e0c939c54947d..a60f24142bc60 100644
--- a/sql/core/benchmarks/ExtractBenchmark-results.txt
+++ b/sql/core/benchmarks/ExtractBenchmark-results.txt
@@ -1,104 +1,104 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for timestamp:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   310            342          28         32.3          31.0       1.0X
-YEAR of timestamp                                   786            795          11         12.7          78.6       0.4X
-YEAROFWEEK of timestamp                             847            891          52         11.8          84.7       0.4X
-QUARTER of timestamp                                795            800           8         12.6          79.5       0.4X
-MONTH of timestamp                                  785            801          14         12.7          78.5       0.4X
-WEEK of timestamp                                  1087           1091           4          9.2         108.7       0.3X
-DAY of timestamp                                    783            784           0         12.8          78.3       0.4X
-DAYOFWEEK of timestamp                              919            921           2         10.9          91.9       0.3X
-DOW of timestamp                                    923            925           2         10.8          92.3       0.3X
-DOW_ISO of timestamp                                982            991          10         10.2          98.2       0.3X
-DAYOFWEEK_ISO of timestamp                          988            993           5         10.1          98.8       0.3X
-DOY of timestamp                                    791            793           2         12.6          79.1       0.4X
-HOUR of timestamp                                   549            551           2         18.2          54.9       0.6X
-MINUTE of timestamp                                 545            550           8         18.4          54.5       0.6X
-SECOND of timestamp                                 648            652           4         15.4          64.8       0.5X
+cast to timestamp                                   286            311          23         34.9          28.6       1.0X
+YEAR of timestamp                                   792            798           8         12.6          79.2       0.4X
+YEAROFWEEK of timestamp                             879            901          19         11.4          87.9       0.3X
+QUARTER of timestamp                                842            849           9         11.9          84.2       0.3X
+MONTH of timestamp                                  799            804           5         12.5          79.9       0.4X
+WEEK of timestamp                                  1104           1107           3          9.1         110.4       0.3X
+DAY of timestamp                                    780            788           9         12.8          78.0       0.4X
+DAYOFWEEK of timestamp                              967            973           5         10.3          96.7       0.3X
+DOW of timestamp                                    965            970           5         10.4          96.5       0.3X
+DOW_ISO of timestamp                               1022           1024           4          9.8         102.2       0.3X
+DAYOFWEEK_ISO of timestamp                         1022           1024           4          9.8         102.2       0.3X
+DOY of timestamp                                    844            855          13         11.8          84.4       0.3X
+HOUR of timestamp                                   558            563           5         17.9          55.8       0.5X
+MINUTE of timestamp                                 564            564           0         17.7          56.4       0.5X
+SECOND of timestamp                                 657            658           1         15.2          65.7       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for timestamp:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to timestamp                                   248            250           2         40.4          24.8       1.0X
-YEAR of timestamp                                   771            779          10         13.0          77.1       0.3X
-YEAROFWEEK of timestamp                             825            827           4         12.1          82.5       0.3X
-QUARTER of timestamp                                780            783           4         12.8          78.0       0.3X
-MONTH of timestamp                                  779            785           8         12.8          77.9       0.3X
-WEEK of timestamp                                  1075           1082          11          9.3         107.5       0.2X
-DAY of timestamp                                    777            781           7         12.9          77.7       0.3X
-DAYOFWEEK of timestamp                              908            915           7         11.0          90.8       0.3X
-DOW of timestamp                                    906            914           7         11.0          90.6       0.3X
-DOW_ISO of timestamp                                982            986           3         10.2          98.2       0.3X
-DAYOFWEEK_ISO of timestamp                          986            988           2         10.1          98.6       0.3X
-DOY of timestamp                                    792            801          11         12.6          79.2       0.3X
-HOUR of timestamp                                   546            549           3         18.3          54.6       0.5X
-MINUTE of timestamp                                 553            557           4         18.1          55.3       0.4X
-SECOND of timestamp                                 646            657          12         15.5          64.6       0.4X
+cast to timestamp                                   245            248           4         40.8          24.5       1.0X
+YEAR of timestamp                                   785            788           3         12.7          78.5       0.3X
+YEAROFWEEK of timestamp                             859            859           0         11.6          85.9       0.3X
+QUARTER of timestamp                                818            819           1         12.2          81.8       0.3X
+MONTH of timestamp                                  776            781           4         12.9          77.6       0.3X
+WEEK of timestamp                                  1099           1109          17          9.1         109.9       0.2X
+DAY of timestamp                                    778            780           2         12.9          77.8       0.3X
+DAYOFWEEK of timestamp                              964            966           2         10.4          96.4       0.3X
+DOW of timestamp                                    964            966           3         10.4          96.4       0.3X
+DOW_ISO of timestamp                               1015           1020           5          9.9         101.5       0.2X
+DAYOFWEEK_ISO of timestamp                         1012           1014           3          9.9         101.2       0.2X
+DOY of timestamp                                    847            850           6         11.8          84.7       0.3X
+HOUR of timestamp                                   560            562           4         17.9          56.0       0.4X
+MINUTE of timestamp                                 560            569          11         17.8          56.0       0.4X
+SECOND of timestamp                                 656            660           6         15.2          65.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for date:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        706            728          37         14.2          70.6       1.0X
-YEAR of date                                        768            771           3         13.0          76.8       0.9X
-YEAROFWEEK of date                                  821            826           5         12.2          82.1       0.9X
-QUARTER of date                                     778            782           6         12.8          77.8       0.9X
-MONTH of date                                       779            780           1         12.8          77.9       0.9X
-WEEK of date                                       1074           1075           1          9.3         107.4       0.7X
-DAY of date                                         773            777           3         12.9          77.3       0.9X
-DAYOFWEEK of date                                   907            910           3         11.0          90.7       0.8X
-DOW of date                                         907            910           3         11.0          90.7       0.8X
-DOW_ISO of date                                     974            978           4         10.3          97.4       0.7X
-DAYOFWEEK_ISO of date                               978            979           1         10.2          97.8       0.7X
-DOY of date                                         790            803          16         12.7          79.0       0.9X
-HOUR of date                                       1188           1191           3          8.4         118.8       0.6X
-MINUTE of date                                     1189           1192           4          8.4         118.9       0.6X
-SECOND of date                                     1335           1340           6          7.5         133.5       0.5X
+cast to date                                        727            729           4         13.8          72.7       1.0X
+YEAR of date                                        777            787          10         12.9          77.7       0.9X
+YEAROFWEEK of date                                  852            858           8         11.7          85.2       0.9X
+QUARTER of date                                     813            815           3         12.3          81.3       0.9X
+MONTH of date                                       772            775           4         12.9          77.2       0.9X
+WEEK of date                                       1091           1093           2          9.2         109.1       0.7X
+DAY of date                                         777            778           1         12.9          77.7       0.9X
+DAYOFWEEK of date                                   963            965           3         10.4          96.3       0.8X
+DOW of date                                         960            963           3         10.4          96.0       0.8X
+DOW_ISO of date                                    1017           1018           1          9.8         101.7       0.7X
+DAYOFWEEK_ISO of date                              1010           1013           2          9.9         101.0       0.7X
+DOY of date                                         840            841           1         11.9          84.0       0.9X
+HOUR of date                                       1288           1295           8          7.8         128.8       0.6X
+MINUTE of date                                     1299           1313          20          7.7         129.9       0.6X
+SECOND of date                                     1383           1393          10          7.2         138.3       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for date:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to date                                        706            709           5         14.2          70.6       1.0X
-YEAR of date                                        771            773           2         13.0          77.1       0.9X
-YEAROFWEEK of date                                  820            823           4         12.2          82.0       0.9X
-QUARTER of date                                     776            779           3         12.9          77.6       0.9X
-MONTH of date                                       771            780          12         13.0          77.1       0.9X
-WEEK of date                                       1075           1078           3          9.3         107.5       0.7X
-DAY of date                                         772            774           2         13.0          77.2       0.9X
-DAYOFWEEK of date                                   902            911           8         11.1          90.2       0.8X
-DOW of date                                         901            912           9         11.1          90.1       0.8X
-DOW_ISO of date                                     973            976           4         10.3          97.3       0.7X
-DAYOFWEEK_ISO of date                               974            976           2         10.3          97.4       0.7X
-DOY of date                                         787            789           2         12.7          78.7       0.9X
-HOUR of date                                       1186           1187           2          8.4         118.6       0.6X
-MINUTE of date                                     1188           1191           3          8.4         118.8       0.6X
-SECOND of date                                     1278           1310          51          7.8         127.8       0.6X
+cast to date                                        731            735           3         13.7          73.1       1.0X
+YEAR of date                                        779            784           7         12.8          77.9       0.9X
+YEAROFWEEK of date                                  851            856           6         11.8          85.1       0.9X
+QUARTER of date                                     816            817           1         12.3          81.6       0.9X
+MONTH of date                                       771            774           4         13.0          77.1       0.9X
+WEEK of date                                       1095           1097           4          9.1         109.5       0.7X
+DAY of date                                         774            777           3         12.9          77.4       0.9X
+DAYOFWEEK of date                                   960            961           2         10.4          96.0       0.8X
+DOW of date                                         959            962           4         10.4          95.9       0.8X
+DOW_ISO of date                                    1009           1011           2          9.9         100.9       0.7X
+DAYOFWEEK_ISO of date                              1009           1011           2          9.9         100.9       0.7X
+DOY of date                                         843            844           1         11.9          84.3       0.9X
+HOUR of date                                       1289           1290           1          7.8         128.9       0.6X
+MINUTE of date                                     1285           1289           5          7.8         128.5       0.6X
+SECOND of date                                     1390           1395           5          7.2         139.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke extract for interval:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1059           1064           8          9.4         105.9       1.0X
-YEAR of interval                                   1054           1063          11          9.5         105.4       1.0X
-MONTH of interval                                  1046           1047           2          9.6         104.6       1.0X
-DAY of interval                                    1048           1052           4          9.5         104.8       1.0X
-HOUR of interval                                   1042           1047           4          9.6         104.2       1.0X
-MINUTE of interval                                 1070           1075           5          9.4         107.0       1.0X
-SECOND of interval                                 1142           1146           5          8.8         114.2       0.9X
+cast to interval                                   1127           1130           2          8.9         112.7       1.0X
+YEAR of interval                                   1119           1123           5          8.9         111.9       1.0X
+MONTH of interval                                  1117           1118           2          9.0         111.7       1.0X
+DAY of interval                                    1124           1126           2          8.9         112.4       1.0X
+HOUR of interval                                   1119           1120           2          8.9         111.9       1.0X
+MINUTE of interval                                 1119           1122           3          8.9         111.9       1.0X
+SECOND of interval                                 1216           1224          10          8.2         121.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Invoke date_part for interval:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-cast to interval                                   1061           1065           5          9.4         106.1       1.0X
-YEAR of interval                                   1054           1056           4          9.5         105.4       1.0X
-MONTH of interval                                  1049           1053           5          9.5         104.9       1.0X
-DAY of interval                                    1057           1063           6          9.5         105.7       1.0X
-HOUR of interval                                   1048           1051           3          9.5         104.8       1.0X
-MINUTE of interval                                 1079           1083           5          9.3         107.9       1.0X
-SECOND of interval                                 1137           1140           3          8.8         113.7       0.9X
+cast to interval                                   1130           1131           2          8.8         113.0       1.0X
+YEAR of interval                                   1113           1116           2          9.0         111.3       1.0X
+MONTH of interval                                  1122           1122           1          8.9         112.2       1.0X
+DAY of interval                                    1122           1124           4          8.9         112.2       1.0X
+HOUR of interval                                   1119           1121           2          8.9         111.9       1.0X
+MINUTE of interval                                 1118           1125           9          8.9         111.8       1.0X
+SECOND of interval                                 1208           1211           3          8.3         120.8       0.9X
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
index d3b677b84562e..17ffe9f3fab41 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-jdk21-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6309           6370          57          2.5         401.1       1.0X
-Parquet Vectorized (Pushdown)                       294            324          23         53.4          18.7      21.4X
-Native ORC Vectorized                              5129           5216          60          3.1         326.1       1.2X
-Native ORC Vectorized (Pushdown)                    323            330           6         48.7          20.5      19.5X
+Parquet Vectorized                                 6345           6437          61          2.5         403.4       1.0X
+Parquet Vectorized (Pushdown)                       341            363          12         46.2          21.7      18.6X
+Native ORC Vectorized                              5118           5274         131          3.1         325.4       1.2X
+Native ORC Vectorized (Pushdown)                    318            323           5         49.5          20.2      20.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6334           6358          22          2.5         402.7       1.0X
-Parquet Vectorized (Pushdown)                                  270            290          12         58.2          17.2      23.4X
-Native ORC Vectorized                                         5237           5252          12          3.0         332.9       1.2X
-Native ORC Vectorized (Pushdown)                               318            333          11         49.4          20.2      19.9X
+Parquet Vectorized                                            6333           6355          22          2.5         402.7       1.0X
+Parquet Vectorized (Pushdown)                                  331            347           9         47.5          21.1      19.1X
+Native ORC Vectorized                                         5259           5281          25          3.0         334.4       1.2X
+Native ORC Vectorized (Pushdown)                               310            330          19         50.7          19.7      20.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6322           6345          15          2.5         401.9       1.0X
-Parquet Vectorized (Pushdown)                       256            267           7         61.4          16.3      24.7X
-Native ORC Vectorized                              5290           5305           9          3.0         336.3       1.2X
-Native ORC Vectorized (Pushdown)                    297            312           9         52.9          18.9      21.3X
+Parquet Vectorized                                 6378           6405          21          2.5         405.5       1.0X
+Parquet Vectorized (Pushdown)                       315            324          10         50.0          20.0      20.3X
+Native ORC Vectorized                              5359           5364           5          2.9         340.7       1.2X
+Native ORC Vectorized (Pushdown)                    301            308           5         52.2          19.2      21.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6313           6327          11          2.5         401.4       1.0X
-Parquet Vectorized (Pushdown)                        256            264           7         61.4          16.3      24.6X
-Native ORC Vectorized                               5262           5293          33          3.0         334.6       1.2X
-Native ORC Vectorized (Pushdown)                     289            306          14         54.4          18.4      21.8X
+Parquet Vectorized                                  6371           6394          16          2.5         405.1       1.0X
+Parquet Vectorized (Pushdown)                        310            315           7         50.7          19.7      20.5X
+Native ORC Vectorized                               5354           5384          25          2.9         340.4       1.2X
+Native ORC Vectorized (Pushdown)                     291            299           6         54.1          18.5      21.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6370           6387          11          2.5         405.0       1.0X
-Parquet Vectorized (Pushdown)                                    254            265          11         61.9          16.2      25.1X
-Native ORC Vectorized                                           5284           5294           7          3.0         335.9       1.2X
-Native ORC Vectorized (Pushdown)                                 292            306          14         53.8          18.6      21.8X
+Parquet Vectorized                                              6427           6456          19          2.4         408.6       1.0X
+Parquet Vectorized (Pushdown)                                    310            312           1         50.7          19.7      20.7X
+Native ORC Vectorized                                           5240           5253          10          3.0         333.2       1.2X
+Native ORC Vectorized (Pushdown)                                 288            301          11         54.7          18.3      22.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  12536          12596          56          1.3         797.0       1.0X
-Parquet Vectorized (Pushdown)                       12610          12645          26          1.2         801.7       1.0X
-Native ORC Vectorized                               11428          11444          16          1.4         726.6       1.1X
-Native ORC Vectorized (Pushdown)                    11524          11532          10          1.4         732.7       1.1X
+Parquet Vectorized                                  13717          13822          93          1.1         872.1       1.0X
+Parquet Vectorized (Pushdown)                       13817          13833          19          1.1         878.4       1.0X
+Native ORC Vectorized                               12689          12724          34          1.2         806.7       1.1X
+Native ORC Vectorized (Pushdown)                    12802          12812           9          1.2         813.9       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6369           6487         151          2.5         404.9       1.0X
-Parquet Vectorized (Pushdown)                       292            381          77         53.9          18.5      21.8X
-Native ORC Vectorized                              4726           4808          85          3.3         300.5       1.3X
-Native ORC Vectorized (Pushdown)                    294            320          13         53.5          18.7      21.6X
+Parquet Vectorized                                 6152           6231          51          2.6         391.2       1.0X
+Parquet Vectorized (Pushdown)                       306            315           9         51.4          19.5      20.1X
+Native ORC Vectorized                              4694           4761         104          3.4         298.4       1.3X
+Native ORC Vectorized (Pushdown)                    274            282           9         57.4          17.4      22.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6121           6133          16          2.6         389.2       1.0X
-Parquet Vectorized (Pushdown)                           257            276          25         61.3          16.3      23.8X
-Native ORC Vectorized                                  4735           4759          20          3.3         301.0       1.3X
-Native ORC Vectorized (Pushdown)                        294            309          11         53.6          18.7      20.8X
+Parquet Vectorized                                     5996           6015          14          2.6         381.2       1.0X
+Parquet Vectorized (Pushdown)                           302            311           7         52.2          19.2      19.9X
+Native ORC Vectorized                                  4684           4691           5          3.4         297.8       1.3X
+Native ORC Vectorized (Pushdown)                        281            290           9         56.0          17.9      21.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6166           6182          16          2.6         392.0       1.0X
-Parquet Vectorized (Pushdown)                       249            261          13         63.2          15.8      24.8X
-Native ORC Vectorized                              4797           4812          14          3.3         305.0       1.3X
-Native ORC Vectorized (Pushdown)                    284            296           6         55.4          18.1      21.7X
+Parquet Vectorized                                 6036           6049          10          2.6         383.8       1.0X
+Parquet Vectorized (Pushdown)                       296            302           4         53.1          18.8      20.4X
+Native ORC Vectorized                              4725           4753          22          3.3         300.4       1.3X
+Native ORC Vectorized (Pushdown)                    276            286           6         56.9          17.6      21.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6139           6164          17          2.6         390.3       1.0X
-Parquet Vectorized (Pushdown)                       241            256          16         65.3          15.3      25.5X
-Native ORC Vectorized                              4798           4837          59          3.3         305.1       1.3X
-Native ORC Vectorized (Pushdown)                    285            299           9         55.3          18.1      21.6X
+Parquet Vectorized                                 6041           6050           6          2.6         384.1       1.0X
+Parquet Vectorized (Pushdown)                       292            302           7         53.8          18.6      20.7X
+Native ORC Vectorized                              4711           4747          26          3.3         299.5       1.3X
+Native ORC Vectorized (Pushdown)                    271            286           8         58.0          17.2      22.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       6152           6176          26          2.6         391.2       1.0X
-Parquet Vectorized (Pushdown)                             244            253           7         64.3          15.5      25.2X
-Native ORC Vectorized                                    4789           4803          15          3.3         304.5       1.3X
-Native ORC Vectorized (Pushdown)                          285            294           7         55.2          18.1      21.6X
+Parquet Vectorized                                       6046           6067          22          2.6         384.4       1.0X
+Parquet Vectorized (Pushdown)                             296            302           4         53.1          18.8      20.4X
+Native ORC Vectorized                                    4767           4804          28          3.3         303.1       1.3X
+Native ORC Vectorized (Pushdown)                          274            286           7         57.4          17.4      22.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6146           6179          43          2.6         390.8       1.0X
-Parquet Vectorized (Pushdown)                           239            253          13         65.9          15.2      25.7X
-Native ORC Vectorized                                  4791           4801          10          3.3         304.6       1.3X
-Native ORC Vectorized (Pushdown)                        281            293           9         55.9          17.9      21.9X
+Parquet Vectorized                                     6053           6060           6          2.6         384.9       1.0X
+Parquet Vectorized (Pushdown)                           296            298           2         53.2          18.8      20.5X
+Native ORC Vectorized                                  4792           4801           8          3.3         304.7       1.3X
+Native ORC Vectorized (Pushdown)                        273            286           8         57.7          17.3      22.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6748           6773          25          2.3         429.1       1.0X
-Parquet Vectorized (Pushdown)                      1435           1445          12         11.0          91.3       4.7X
-Native ORC Vectorized                              5345           5351           5          2.9         339.8       1.3X
-Native ORC Vectorized (Pushdown)                   1328           1335           4         11.8          84.4       5.1X
+Parquet Vectorized                                 6751           6789          38          2.3         429.2       1.0X
+Parquet Vectorized (Pushdown)                      1591           1607          15          9.9         101.1       4.2X
+Native ORC Vectorized                              5460           5476          19          2.9         347.1       1.2X
+Native ORC Vectorized (Pushdown)                   1457           1469          11         10.8          92.7       4.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 8886           8910          25          1.8         564.9       1.0X
-Parquet Vectorized (Pushdown)                      5996           6011          15          2.6         381.2       1.5X
-Native ORC Vectorized                              7499           7508          10          2.1         476.8       1.2X
-Native ORC Vectorized (Pushdown)                   5316           5330          18          3.0         338.0       1.7X
+Parquet Vectorized                                 9467           9480          15          1.7         601.9       1.0X
+Parquet Vectorized (Pushdown)                      6594           6601          10          2.4         419.2       1.4X
+Native ORC Vectorized                              8160           8178          19          1.9         518.8       1.2X
+Native ORC Vectorized (Pushdown)                   5978           5991          14          2.6         380.1       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11048          11060          11          1.4         702.4       1.0X
-Parquet Vectorized (Pushdown)                     10492          10509          13          1.5         667.1       1.1X
-Native ORC Vectorized                              9684           9706          19          1.6         615.7       1.1X
-Native ORC Vectorized (Pushdown)                   9296           9311          21          1.7         591.0       1.2X
+Parquet Vectorized                                11963          11975          19          1.3         760.6       1.0X
+Parquet Vectorized (Pushdown)                     11449          11464          16          1.4         727.9       1.0X
+Native ORC Vectorized                             10773          10783          10          1.5         684.9       1.1X
+Native ORC Vectorized (Pushdown)                  10394          10409          19          1.5         660.8       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11567          11612          50          1.4         735.4       1.0X
-Parquet Vectorized (Pushdown)                     11631          11642           9          1.4         739.5       1.0X
-Native ORC Vectorized                             10373          10388          11          1.5         659.5       1.1X
-Native ORC Vectorized (Pushdown)                  10450          10466          11          1.5         664.4       1.1X
+Parquet Vectorized                                12713          12733          15          1.2         808.3       1.0X
+Parquet Vectorized (Pushdown)                     12801          12815          14          1.2         813.9       1.0X
+Native ORC Vectorized                             11367          11387          16          1.4         722.7       1.1X
+Native ORC Vectorized (Pushdown)                  11474          11480          10          1.4         729.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11581          11600          12          1.4         736.3       1.0X
-Parquet Vectorized (Pushdown)                     11623          11644          18          1.4         738.9       1.0X
-Native ORC Vectorized                             10326          10333           9          1.5         656.5       1.1X
-Native ORC Vectorized (Pushdown)                  10394          10407          14          1.5         660.8       1.1X
+Parquet Vectorized                                12741          12750           9          1.2         810.1       1.0X
+Parquet Vectorized (Pushdown)                     12807          12836          31          1.2         814.2       1.0X
+Native ORC Vectorized                             11501          11506           6          1.4         731.2       1.1X
+Native ORC Vectorized (Pushdown)                  11585          11594           8          1.4         736.6       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11803          11819          20          1.3         750.4       1.0X
-Parquet Vectorized (Pushdown)                     11859          11868           9          1.3         754.0       1.0X
-Native ORC Vectorized                             10609          10614           4          1.5         674.5       1.1X
-Native ORC Vectorized (Pushdown)                  10681          10697          16          1.5         679.1       1.1X
+Parquet Vectorized                                12572          12595          22          1.3         799.3       1.0X
+Parquet Vectorized (Pushdown)                     12635          12654          28          1.2         803.3       1.0X
+Native ORC Vectorized                             11466          11493          19          1.4         729.0       1.1X
+Native ORC Vectorized (Pushdown)                  11548          11558          10          1.4         734.2       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5546           5587          37          2.8         352.6       1.0X
-Parquet Vectorized (Pushdown)                           218            235          18         72.2          13.8      25.5X
-Native ORC Vectorized                                  6190           6206          15          2.5         393.6       0.9X
-Native ORC Vectorized (Pushdown)                        943            957          16         16.7          60.0       5.9X
+Parquet Vectorized                                     5530           5572          29          2.8         351.6       1.0X
+Parquet Vectorized (Pushdown)                           243            256          15         64.7          15.5      22.7X
+Native ORC Vectorized                                  6173           6214          31          2.5         392.5       0.9X
+Native ORC Vectorized (Pushdown)                        933            935           4         16.9          59.3       5.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5551           5567          13          2.8         352.9       1.0X
-Parquet Vectorized (Pushdown)                                   215            224           7         73.0          13.7      25.8X
-Native ORC Vectorized                                          6364           6372           6          2.5         404.6       0.9X
-Native ORC Vectorized (Pushdown)                                941            956          11         16.7          59.8       5.9X
+Parquet Vectorized                                             5521           5537          14          2.8         351.0       1.0X
+Parquet Vectorized (Pushdown)                                   245            257          11         64.2          15.6      22.5X
+Native ORC Vectorized                                          6340           6348           5          2.5         403.1       0.9X
+Native ORC Vectorized (Pushdown)                                931            935           4         16.9          59.2       5.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5490           5498           6          2.9         349.1       1.0X
-Parquet Vectorized (Pushdown)                           259            271          10         60.8          16.4      21.2X
-Native ORC Vectorized                                  6359           6377          12          2.5         404.3       0.9X
-Native ORC Vectorized (Pushdown)                       1003           1008           7         15.7          63.8       5.5X
+Parquet Vectorized                                     5476           5490          14          2.9         348.2       1.0X
+Parquet Vectorized (Pushdown)                           288            298          12         54.7          18.3      19.0X
+Native ORC Vectorized                                  6322           6341          11          2.5         401.9       0.9X
+Native ORC Vectorized (Pushdown)                        964            971           7         16.3          61.3       5.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5492           5495           4          2.9         349.2       1.0X
-Parquet Vectorized (Pushdown)                             256            266           8         61.5          16.3      21.5X
-Native ORC Vectorized                                    6367           6378          16          2.5         404.8       0.9X
-Native ORC Vectorized (Pushdown)                         1002           1005           4         15.7          63.7       5.5X
+Parquet Vectorized                                       5477           5493          10          2.9         348.2       1.0X
+Parquet Vectorized (Pushdown)                             286            302          20         55.0          18.2      19.2X
+Native ORC Vectorized                                    6324           6340          17          2.5         402.1       0.9X
+Native ORC Vectorized (Pushdown)                          966            975          11         16.3          61.4       5.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5566           5582          18          2.8         353.9       1.0X
-Parquet Vectorized (Pushdown)                                     275            278           5         57.3          17.5      20.3X
-Native ORC Vectorized                                            6443           6451           8          2.4         409.6       0.9X
-Native ORC Vectorized (Pushdown)                                 1003           1015          10         15.7          63.8       5.5X
+Parquet Vectorized                                               5550           5561           9          2.8         352.8       1.0X
+Parquet Vectorized (Pushdown)                                     288            296           8         54.6          18.3      19.3X
+Native ORC Vectorized                                            6438           6452          10          2.4         409.3       0.9X
+Native ORC Vectorized (Pushdown)                                  972            977           5         16.2          61.8       5.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           12200          12218          15          1.3         775.6       1.0X
-Parquet Vectorized (Pushdown)                                12173          12206          40          1.3         773.9       1.0X
-Native ORC Vectorized                                        13191          13208          19          1.2         838.7       0.9X
-Native ORC Vectorized (Pushdown)                             13378          13410          29          1.2         850.5       0.9X
+Parquet Vectorized                                           13575          13632          39          1.2         863.1       1.0X
+Parquet Vectorized (Pushdown)                                13578          13607          21          1.2         863.2       1.0X
+Native ORC Vectorized                                        14550          14590          49          1.1         925.0       0.9X
+Native ORC Vectorized (Pushdown)                             14664          14775          78          1.1         932.3       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    6707           6727          17          2.3         426.4       1.0X
-Parquet Vectorized (Pushdown)                          846            855          12         18.6          53.8       7.9X
-Native ORC Vectorized                                 5473           5506          67          2.9         347.9       1.2X
-Native ORC Vectorized (Pushdown)                      5550           5556           6          2.8         352.9       1.2X
+Parquet Vectorized                                    6543           6644          84          2.4         416.0       1.0X
+Parquet Vectorized (Pushdown)                          891            922          41         17.7          56.6       7.3X
+Native ORC Vectorized                                 5543           5553          11          2.8         352.4       1.2X
+Native ORC Vectorized (Pushdown)                      5605           5619           8          2.8         356.4       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6438           6452          14          2.4         409.3       1.0X
-Parquet Vectorized (Pushdown)                            255            262           8         61.7          16.2      25.2X
-Native ORC Vectorized                                   5314           5320           5          3.0         337.8       1.2X
-Native ORC Vectorized (Pushdown)                        5404           5415          15          2.9         343.6       1.2X
+Parquet Vectorized                                      6367           6383          16          2.5         404.8       1.0X
+Parquet Vectorized (Pushdown)                            279            286           6         56.3          17.8      22.8X
+Native ORC Vectorized                                   5367           5377           8          2.9         341.2       1.2X
+Native ORC Vectorized (Pushdown)                        5436           5463          21          2.9         345.6       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6436           6455          11          2.4         409.2       1.0X
-Parquet Vectorized (Pushdown)                              260            266           7         60.5          16.5      24.8X
-Native ORC Vectorized                                     5316           5327          10          3.0         338.0       1.2X
-Native ORC Vectorized (Pushdown)                          5394           5403           8          2.9         343.0       1.2X
+Parquet Vectorized                                        6369           6386          15          2.5         404.9       1.0X
+Parquet Vectorized (Pushdown)                              277            284           6         56.9          17.6      23.0X
+Native ORC Vectorized                                     5341           5370          28          2.9         339.6       1.2X
+Native ORC Vectorized (Pushdown)                          5435           5443          10          2.9         345.5       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5500           5593          76          2.9         349.7       1.0X
-Parquet Vectorized (Pushdown)                        335            339           4         46.9          21.3      16.4X
-Native ORC Vectorized                               6373           6395          24          2.5         405.2       0.9X
-Native ORC Vectorized (Pushdown)                    6573           6584          15          2.4         417.9       0.8X
+Parquet Vectorized                                  5594           5712          85          2.8         355.6       1.0X
+Parquet Vectorized (Pushdown)                        384            401          14         41.0          24.4      14.6X
+Native ORC Vectorized                               6399           6440          49          2.5         406.8       0.9X
+Native ORC Vectorized (Pushdown)                    6587           6606          15          2.4         418.8       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5431           5435           5          2.9         345.3       1.0X
-Parquet Vectorized (Pushdown)                          246            249           3         64.0          15.6      22.1X
-Native ORC Vectorized                                 6318           6330           9          2.5         401.7       0.9X
-Native ORC Vectorized (Pushdown)                      6518           6545          32          2.4         414.4       0.8X
+Parquet Vectorized                                    5518           5545          19          2.9         350.8       1.0X
+Parquet Vectorized (Pushdown)                          294            324          50         53.5          18.7      18.8X
+Native ORC Vectorized                                 6314           6348          27          2.5         401.5       0.9X
+Native ORC Vectorized (Pushdown)                      6509           6530          20          2.4         413.8       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5427           5436          16          2.9         345.0       1.0X
-Parquet Vectorized (Pushdown)                            246            249           2         63.9          15.6      22.1X
-Native ORC Vectorized                                   6332           6345          12          2.5         402.6       0.9X
-Native ORC Vectorized (Pushdown)                        6501           6507           7          2.4         413.3       0.8X
+Parquet Vectorized                                      5527           5535           7          2.8         351.4       1.0X
+Parquet Vectorized (Pushdown)                            284            296           9         55.3          18.1      19.4X
+Native ORC Vectorized                                   6290           6301          10          2.5         399.9       0.9X
+Native ORC Vectorized (Pushdown)                        6552           6565          13          2.4         416.6       0.8X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   5671           5760          67          2.8         360.6       1.0X
-Parquet Vectorized (Pushdown)                         737            745           9         21.3          46.9       7.7X
-Native ORC Vectorized                                6477           6515          48          2.4         411.8       0.9X
-Native ORC Vectorized (Pushdown)                     6669           6689          19          2.4         424.0       0.9X
+Parquet Vectorized                                   5855           6077         170          2.7         372.3       1.0X
+Parquet Vectorized (Pushdown)                         922            952          40         17.1          58.6       6.4X
+Native ORC Vectorized                                6452           6541          82          2.4         410.2       0.9X
+Native ORC Vectorized (Pushdown)                     6639           6651          10          2.4         422.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5446           5453           5          2.9         346.2       1.0X
-Parquet Vectorized (Pushdown)                           245            256          12         64.2          15.6      22.2X
-Native ORC Vectorized                                  6275           6282           4          2.5         399.0       0.9X
-Native ORC Vectorized (Pushdown)                       6474           6482           7          2.4         411.6       0.8X
+Parquet Vectorized                                     5542           5550           8          2.8         352.3       1.0X
+Parquet Vectorized (Pushdown)                           296            310           8         53.2          18.8      18.7X
+Native ORC Vectorized                                  6214           6226          11          2.5         395.1       0.9X
+Native ORC Vectorized (Pushdown)                       6419           6431          20          2.5         408.1       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5438           5442           6          2.9         345.7       1.0X
-Parquet Vectorized (Pushdown)                             248            256           6         63.4          15.8      21.9X
-Native ORC Vectorized                                    6266           6273           8          2.5         398.4       0.9X
-Native ORC Vectorized (Pushdown)                         6462           6475          13          2.4         410.8       0.8X
+Parquet Vectorized                                       5537           5552          17          2.8         352.0       1.0X
+Parquet Vectorized (Pushdown)                             297            308          10         53.0          18.9      18.7X
+Native ORC Vectorized                                    6232           6246          15          2.5         396.2       0.9X
+Native ORC Vectorized (Pushdown)                         6407           6419          14          2.5         407.4       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2417           2431          19          6.5         153.7       1.0X
-Parquet Vectorized (Pushdown)                            64             67           4        244.0           4.1      37.5X
-Native ORC Vectorized                                  3441           3463          31          4.6         218.7       0.7X
-Native ORC Vectorized (Pushdown)                         58             62           7        272.0           3.7      41.8X
+Parquet Vectorized                                     2442           2459          21          6.4         155.3       1.0X
+Parquet Vectorized (Pushdown)                            77             90          14        203.1           4.9      31.5X
+Native ORC Vectorized                                  3128           3145          24          5.0         198.9       0.8X
+Native ORC Vectorized (Pushdown)                         57             72           9        273.6           3.7      42.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        3560           3606          62          4.4         226.3       1.0X
-Parquet Vectorized (Pushdown)                             1678           1686          10          9.4         106.7       2.1X
-Native ORC Vectorized                                     4686           4710          24          3.4         297.9       0.8X
-Native ORC Vectorized (Pushdown)                          1978           1992          21          8.0         125.7       1.8X
+Parquet Vectorized                                        3756           3808          31          4.2         238.8       1.0X
+Parquet Vectorized (Pushdown)                             1912           1937          42          8.2         121.6       2.0X
+Native ORC Vectorized                                     4593           4618          26          3.4         292.0       0.8X
+Native ORC Vectorized (Pushdown)                          2069           2105          29          7.6         131.5       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        7360           7379          20          2.1         468.0       1.0X
-Parquet Vectorized (Pushdown)                             7055           7090          28          2.2         448.5       1.0X
-Native ORC Vectorized                                     8850           8867          21          1.8         562.7       0.8X
-Native ORC Vectorized (Pushdown)                          8382           8394          10          1.9         532.9       0.9X
+Parquet Vectorized                                        8252           8270          22          1.9         524.6       1.0X
+Parquet Vectorized (Pushdown)                             7939           7987          28          2.0         504.7       1.0X
+Native ORC Vectorized                                     9304           9335          42          1.7         591.5       0.9X
+Native ORC Vectorized (Pushdown)                          8912           8946          32          1.8         566.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8369           8388          20          1.9         532.1       1.0X
-Parquet Vectorized (Pushdown)                              8409           8420           9          1.9         534.6       1.0X
-Native ORC Vectorized                                      9926           9960          24          1.6         631.1       0.8X
-Native ORC Vectorized (Pushdown)                           9974          10012          34          1.6         634.2       0.8X
+Parquet Vectorized                                         9337           9380          67          1.7         593.6       1.0X
+Parquet Vectorized (Pushdown)                              9347           9376          22          1.7         594.3       1.0X
+Native ORC Vectorized                                     10538          10565          29          1.5         670.0       0.9X
+Native ORC Vectorized (Pushdown)                          10533          10559          28          1.5         669.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2601           2612           7          6.0         165.4       1.0X
-Parquet Vectorized (Pushdown)                             64             69           8        247.5           4.0      40.9X
-Native ORC Vectorized                                   3243           3261          21          4.9         206.2       0.8X
-Native ORC Vectorized (Pushdown)                          56             63           5        279.6           3.6      46.2X
+Parquet Vectorized                                      2598           2612          15          6.1         165.2       1.0X
+Parquet Vectorized (Pushdown)                             72             83          13        217.1           4.6      35.9X
+Native ORC Vectorized                                   3113           3124          14          5.1         197.9       0.8X
+Native ORC Vectorized (Pushdown)                          55             64          12        285.4           3.5      47.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3202           3240          25          4.9         203.6       1.0X
-Parquet Vectorized (Pushdown)                               919            952          45         17.1          58.4       3.5X
-Native ORC Vectorized                                      3936           3954          22          4.0         250.2       0.8X
-Native ORC Vectorized (Pushdown)                           1001           1009          10         15.7          63.7       3.2X
+Parquet Vectorized                                         3358           3379          34          4.7         213.5       1.0X
+Parquet Vectorized (Pushdown)                              1080           1111          28         14.6          68.7       3.1X
+Native ORC Vectorized                                      3874           3884          13          4.1         246.3       0.9X
+Native ORC Vectorized (Pushdown)                           1111           1137          34         14.2          70.6       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5598           5605           8          2.8         355.9       1.0X
-Parquet Vectorized (Pushdown)                              4366           4389          26          3.6         277.6       1.3X
-Native ORC Vectorized                                      6410           6424          10          2.5         407.6       0.9X
-Native ORC Vectorized (Pushdown)                           4843           4852           9          3.2         307.9       1.2X
+Parquet Vectorized                                         6310           6332          17          2.5         401.2       1.0X
+Parquet Vectorized (Pushdown)                              5049           5073          21          3.1         321.0       1.2X
+Native ORC Vectorized                                      6975           6984           7          2.3         443.5       0.9X
+Native ORC Vectorized (Pushdown)                           5396           5411          14          2.9         343.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          7861           7918          46          2.0         499.8       1.0X
-Parquet Vectorized (Pushdown)                               7637           7659          22          2.1         485.5       1.0X
-Native ORC Vectorized                                       8921           8930           9          1.8         567.2       0.9X
-Native ORC Vectorized (Pushdown)                            8753           8784          26          1.8         556.5       0.9X
+Parquet Vectorized                                          9115           9121           4          1.7         579.5       1.0X
+Parquet Vectorized (Pushdown)                               8907           8924          18          1.8         566.3       1.0X
+Native ORC Vectorized                                       9981           9994           9          1.6         634.6       0.9X
+Native ORC Vectorized (Pushdown)                            9656           9675          13          1.6         613.9       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      3766           3780          20          4.2         239.4       1.0X
-Parquet Vectorized (Pushdown)                             70             74           5        225.8           4.4      54.1X
-Native ORC Vectorized                                   3250           3259          10          4.8         206.6       1.2X
-Native ORC Vectorized (Pushdown)                          55             61           6        285.2           3.5      68.3X
+Parquet Vectorized                                      3755           3781          23          4.2         238.7       1.0X
+Parquet Vectorized (Pushdown)                             78             81           2        201.4           5.0      48.1X
+Native ORC Vectorized                                   3131           3155          36          5.0         199.0       1.2X
+Native ORC Vectorized (Pushdown)                          54             56           4        292.6           3.4      69.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         4596           4632          53          3.4         292.2       1.0X
-Parquet Vectorized (Pushdown)                              1245           1257           9         12.6          79.1       3.7X
-Native ORC Vectorized                                      4063           4077          14          3.9         258.3       1.1X
-Native ORC Vectorized (Pushdown)                           1126           1133           6         14.0          71.6       4.1X
+Parquet Vectorized                                         4741           4755          18          3.3         301.4       1.0X
+Parquet Vectorized (Pushdown)                              1415           1417           2         11.1          90.0       3.3X
+Native ORC Vectorized                                      4049           4065          20          3.9         257.4       1.2X
+Native ORC Vectorized (Pushdown)                           1220           1231          17         12.9          77.6       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         7843           7862          14          2.0         498.6       1.0X
-Parquet Vectorized (Pushdown)                              6005           6027          19          2.6         381.8       1.3X
-Native ORC Vectorized                                      7116           7128          17          2.2         452.5       1.1X
-Native ORC Vectorized (Pushdown)                           5410           5438          19          2.9         343.9       1.4X
+Parquet Vectorized                                         8556           8564           9          1.8         543.9       1.0X
+Parquet Vectorized (Pushdown)                              6743           6755           7          2.3         428.7       1.3X
+Native ORC Vectorized                                      7513           7524           9          2.1         477.7       1.1X
+Native ORC Vectorized (Pushdown)                           5906           5914           5          2.7         375.5       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         11034          11064          18          1.4         701.5       1.0X
-Parquet Vectorized (Pushdown)                              10676          10694          30          1.5         678.8       1.0X
-Native ORC Vectorized                                      10144          10166          21          1.6         644.9       1.1X
-Native ORC Vectorized (Pushdown)                            9838           9874          32          1.6         625.5       1.1X
+Parquet Vectorized                                         12314          12332          16          1.3         782.9       1.0X
+Parquet Vectorized (Pushdown)                              11976          11983           6          1.3         761.4       1.0X
+Native ORC Vectorized                                      10898          10916          14          1.4         692.9       1.1X
+Native ORC Vectorized (Pushdown)                           10605          10636          30          1.5         674.3       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6250           6291          27          2.5         397.4       1.0X
-Parquet Vectorized (Pushdown)                                     257            275          16         61.1          16.4      24.3X
-Native ORC Vectorized                                            4812           4839          21          3.3         305.9       1.3X
-Native ORC Vectorized (Pushdown)                                  312            320           7         50.4          19.9      20.0X
+Parquet Vectorized                                               6282           6318          39          2.5         399.4       1.0X
+Parquet Vectorized (Pushdown)                                     329            370          54         47.8          20.9      19.1X
+Native ORC Vectorized                                            4793           4843          35          3.3         304.7       1.3X
+Native ORC Vectorized (Pushdown)                                  307            321          15         51.2          19.5      20.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6162           6168           6          2.6         391.8       1.0X
-Parquet Vectorized (Pushdown)                                     272            274           1         57.9          17.3      22.7X
-Native ORC Vectorized                                            4794           4806          10          3.3         304.8       1.3X
-Native ORC Vectorized (Pushdown)                                  309            316           7         50.9          19.6      19.9X
+Parquet Vectorized                                               6120           6135          12          2.6         389.1       1.0X
+Parquet Vectorized (Pushdown)                                     294            304           8         53.5          18.7      20.8X
+Native ORC Vectorized                                            4787           4815          38          3.3         304.4       1.3X
+Native ORC Vectorized (Pushdown)                                  285            301          12         55.2          18.1      21.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6148           6156           5          2.6         390.9       1.0X
-Parquet Vectorized (Pushdown)                                     270            271           2         58.3          17.2      22.8X
-Native ORC Vectorized                                            4778           4786           8          3.3         303.7       1.3X
-Native ORC Vectorized (Pushdown)                                  293            302           7         53.6          18.6      21.0X
+Parquet Vectorized                                               6136           6151          20          2.6         390.1       1.0X
+Parquet Vectorized (Pushdown)                                     297            304           9         53.0          18.9      20.7X
+Native ORC Vectorized                                            4787           4802          26          3.3         304.3       1.3X
+Native ORC Vectorized (Pushdown)                                  286            296           7         55.0          18.2      21.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6165           6182          16          2.6         391.9       1.0X
-Parquet Vectorized (Pushdown)                                      269            280          11         58.4          17.1      22.9X
-Native ORC Vectorized                                             4812           4821          11          3.3         306.0       1.3X
-Native ORC Vectorized (Pushdown)                                   301            310           6         52.3          19.1      20.5X
+Parquet Vectorized                                                6143           6155          13          2.6         390.6       1.0X
+Parquet Vectorized (Pushdown)                                      304            307           3         51.7          19.3      20.2X
+Native ORC Vectorized                                             4811           4826          16          3.3         305.9       1.3X
+Native ORC Vectorized (Pushdown)                                   294            301           5         53.5          18.7      20.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6175           6178           5          2.5         392.6       1.0X
-Parquet Vectorized (Pushdown)                                      274            281           7         57.4          17.4      22.5X
-Native ORC Vectorized                                             4802           4830          42          3.3         305.3       1.3X
-Native ORC Vectorized (Pushdown)                                   322            326           8         48.8          20.5      19.2X
+Parquet Vectorized                                                6127           6133           6          2.6         389.5       1.0X
+Parquet Vectorized (Pushdown)                                      307            312           6         51.3          19.5      20.0X
+Native ORC Vectorized                                             4818           4845          35          3.3         306.3       1.3X
+Native ORC Vectorized (Pushdown)                                   298            310           9         52.8          18.9      20.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6153           6187          30          2.6         391.2       1.0X
-Parquet Vectorized (Pushdown)                                      289            289           1         54.5          18.3      21.3X
-Native ORC Vectorized                                             4816           4819           3          3.3         306.2       1.3X
-Native ORC Vectorized (Pushdown)                                   309            316           5         50.9          19.6      19.9X
+Parquet Vectorized                                                6122           6141          22          2.6         389.2       1.0X
+Parquet Vectorized (Pushdown)                                      310            317           7         50.8          19.7      19.8X
+Native ORC Vectorized                                             4813           4835          23          3.3         306.0       1.3X
+Native ORC Vectorized (Pushdown)                                   300            307           6         52.4          19.1      20.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6360           6379          13          2.5         404.3       1.0X
-Parquet Vectorized (Pushdown)                                      841            848           7         18.7          53.5       7.6X
-Native ORC Vectorized                                             5016           5025          11          3.1         318.9       1.3X
-Native ORC Vectorized (Pushdown)                                   429            431           2         36.6          27.3      14.8X
+Parquet Vectorized                                                6326           6331           5          2.5         402.2       1.0X
+Parquet Vectorized (Pushdown)                                      893            901           9         17.6          56.7       7.1X
+Native ORC Vectorized                                             5039           5049          12          3.1         320.3       1.3X
+Native ORC Vectorized (Pushdown)                                   399            402           3         39.4          25.4      15.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6363           6382          13          2.5         404.5       1.0X
-Parquet Vectorized (Pushdown)                                     3313           3325          11          4.7         210.6       1.9X
-Native ORC Vectorized                                             5009           5018          10          3.1         318.5       1.3X
-Native ORC Vectorized (Pushdown)                                   438            441           3         35.9          27.8      14.5X
+Parquet Vectorized                                                6309           6328          20          2.5         401.1       1.0X
+Parquet Vectorized (Pushdown)                                     3291           3308          16          4.8         209.2       1.9X
+Native ORC Vectorized                                             5019           5032          12          3.1         319.1       1.3X
+Native ORC Vectorized (Pushdown)                                   429            433           3         36.6          27.3      14.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6370           6384          14          2.5         405.0       1.0X
-Parquet Vectorized (Pushdown)                                     5709           5742          28          2.8         363.0       1.1X
-Native ORC Vectorized                                             5011           5015           6          3.1         318.6       1.3X
-Native ORC Vectorized (Pushdown)                                   433            436           2         36.3          27.5      14.7X
+Parquet Vectorized                                                6311           6335          22          2.5         401.3       1.0X
+Parquet Vectorized (Pushdown)                                     5508           5519           9          2.9         350.2       1.1X
+Native ORC Vectorized                                             5020           5036          13          3.1         319.2       1.3X
+Native ORC Vectorized (Pushdown)                                   442            444           2         35.6          28.1      14.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6339           6346           7          2.5         403.0       1.0X
-Parquet Vectorized (Pushdown)                                       845            849           3         18.6          53.7       7.5X
-Native ORC Vectorized                                              4991           4997           5          3.2         317.3       1.3X
-Native ORC Vectorized (Pushdown)                                    513            519           6         30.7          32.6      12.4X
+Parquet Vectorized                                                 6271           6290          14          2.5         398.7       1.0X
+Parquet Vectorized (Pushdown)                                       872            877           4         18.0          55.4       7.2X
+Native ORC Vectorized                                              4971           4981           9          3.2         316.0       1.3X
+Native ORC Vectorized (Pushdown)                                    497            502           4         31.7          31.6      12.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6328           6342          18          2.5         402.3       1.0X
-Parquet Vectorized (Pushdown)                                      3233           3262          28          4.9         205.6       2.0X
-Native ORC Vectorized                                              4966           4979          15          3.2         315.7       1.3X
-Native ORC Vectorized (Pushdown)                                    566            570           3         27.8          36.0      11.2X
+Parquet Vectorized                                                 6284           6295           8          2.5         399.5       1.0X
+Parquet Vectorized (Pushdown)                                      3320           3340          13          4.7         211.1       1.9X
+Native ORC Vectorized                                              4972           4984           8          3.2         316.1       1.3X
+Native ORC Vectorized (Pushdown)                                    564            567           3         27.9          35.9      11.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6319           6329           7          2.5         401.8       1.0X
-Parquet Vectorized (Pushdown)                                      5713           5731          15          2.8         363.2       1.1X
-Native ORC Vectorized                                              4957           4967          13          3.2         315.1       1.3X
-Native ORC Vectorized (Pushdown)                                    576            581           5         27.3          36.6      11.0X
+Parquet Vectorized                                                 6289           6307          12          2.5         399.8       1.0X
+Parquet Vectorized (Pushdown)                                      5740           5750          11          2.7         365.0       1.1X
+Native ORC Vectorized                                              4972           4982           6          3.2         316.1       1.3X
+Native ORC Vectorized (Pushdown)                                    559            567          11         28.1          35.6      11.2X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           2771           2801          32          5.7         176.2       1.0X
-Parquet Vectorized (Pushdown)                                 100            112          15        157.7           6.3      27.8X
-Native ORC Vectorized                                        2143           2161          28          7.3         136.3       1.3X
-Native ORC Vectorized (Pushdown)                              115            120           4        136.8           7.3      24.1X
+Parquet Vectorized                                           2748           2766          16          5.7         174.7       1.0X
+Parquet Vectorized (Pushdown)                                 107            114           8        146.5           6.8      25.6X
+Native ORC Vectorized                                        2194           2203           9          7.2         139.5       1.3X
+Native ORC Vectorized (Pushdown)                              112            121           9        140.3           7.1      24.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3243           3289          62          4.9         206.2       1.0X
-Parquet Vectorized (Pushdown)                                    851            863          14         18.5          54.1       3.8X
-Native ORC Vectorized                                           2632           2648          21          6.0         167.3       1.2X
-Native ORC Vectorized (Pushdown)                                 791            794           3         19.9          50.3       4.1X
+Parquet Vectorized                                              3325           3359          48          4.7         211.4       1.0X
+Parquet Vectorized (Pushdown)                                    960            973          13         16.4          61.0       3.5X
+Native ORC Vectorized                                           2691           2705          17          5.8         171.1       1.2X
+Native ORC Vectorized (Pushdown)                                 840            846           3         18.7          53.4       4.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5304           5315          17          3.0         337.2       1.0X
-Parquet Vectorized (Pushdown)                                   4024           4034          15          3.9         255.8       1.3X
-Native ORC Vectorized                                           4663           4675          11          3.4         296.5       1.1X
-Native ORC Vectorized (Pushdown)                                3665           3675          10          4.3         233.0       1.4X
+Parquet Vectorized                                              5896           5906           9          2.7         374.9       1.0X
+Parquet Vectorized (Pushdown)                                   4608           4631          20          3.4         293.0       1.3X
+Native ORC Vectorized                                           5059           5084          21          3.1         321.6       1.2X
+Native ORC Vectorized (Pushdown)                                4014           4027           8          3.9         255.2       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               7478           7496          14          2.1         475.4       1.0X
-Parquet Vectorized (Pushdown)                                    7250           7261          10          2.2         461.0       1.0X
-Native ORC Vectorized                                            6712           6716           3          2.3         426.7       1.1X
-Native ORC Vectorized (Pushdown)                                 6527           6538          18          2.4         415.0       1.1X
+Parquet Vectorized                                               8646           8666          24          1.8         549.7       1.0X
+Parquet Vectorized (Pushdown)                                    8379           8396          11          1.9         532.7       1.0X
+Native ORC Vectorized                                            7526           7539          14          2.1         478.5       1.1X
+Native ORC Vectorized (Pushdown)                                 7319           7342          16          2.1         465.4       1.2X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3103           3115           9          5.1         197.3       1.0X
-Parquet Vectorized (Pushdown)                                                         3107           3123           9          5.1         197.6       1.0X
-Native ORC Vectorized                                                                 1978           1991          12          7.9         125.8       1.6X
-Native ORC Vectorized (Pushdown)                                                        40             44           5        390.9           2.6      77.1X
+Parquet Vectorized                                                                    3102           3122          23          5.1         197.2       1.0X
+Parquet Vectorized (Pushdown)                                                         3096           3104           5          5.1         196.8       1.0X
+Native ORC Vectorized                                                                 1983           1994          15          7.9         126.1       1.6X
+Native ORC Vectorized (Pushdown)                                                        39             44           5        404.7           2.5      79.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3651           3670          20          4.3         232.1       1.0X
-Parquet Vectorized (Pushdown)                                                            3652           3660           7          4.3         232.2       1.0X
-Native ORC Vectorized                                                                    2524           2535          14          6.2         160.5       1.4X
-Native ORC Vectorized (Pushdown)                                                          786            793           9         20.0          50.0       4.6X
+Parquet Vectorized                                                                       3791           3820          27          4.1         241.0       1.0X
+Parquet Vectorized (Pushdown)                                                            3755           3774          12          4.2         238.8       1.0X
+Native ORC Vectorized                                                                    2618           2635          18          6.0         166.5       1.4X
+Native ORC Vectorized (Pushdown)                                                          860            865           6         18.3          54.7       4.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       5953           5959           7          2.6         378.5       1.0X
-Parquet Vectorized (Pushdown)                                                            5959           5970          12          2.6         378.9       1.0X
-Native ORC Vectorized                                                                    4855           4868           9          3.2         308.7       1.2X
-Native ORC Vectorized (Pushdown)                                                         3818           3823           4          4.1         242.7       1.6X
+Parquet Vectorized                                                                       6492           6500           8          2.4         412.8       1.0X
+Parquet Vectorized (Pushdown)                                                            6485           6497          12          2.4         412.3       1.0X
+Native ORC Vectorized                                                                    5272           5286          21          3.0         335.2       1.2X
+Native ORC Vectorized (Pushdown)                                                         4245           4253           9          3.7         269.9       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        8418           8436          18          1.9         535.2       1.0X
-Parquet Vectorized (Pushdown)                                                             8407           8431          25          1.9         534.5       1.0X
-Native ORC Vectorized                                                                     7012           7030          17          2.2         445.8       1.2X
-Native ORC Vectorized (Pushdown)                                                          6794           6849          89          2.3         431.9       1.2X
+Parquet Vectorized                                                                        9353           9362           9          1.7         594.7       1.0X
+Parquet Vectorized (Pushdown)                                                             9335           9349          17          1.7         593.5       1.0X
+Native ORC Vectorized                                                                     7781           7799          17          2.0         494.7       1.2X
+Native ORC Vectorized (Pushdown)                                                          7598           7613           9          2.1         483.1       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2583           2596          17          6.1         164.2       1.0X
-Parquet Vectorized (Pushdown)                                                                      63             66           3        248.8           4.0      40.9X
-Native ORC Vectorized                                                                            1970           1976           4          8.0         125.2       1.3X
-Native ORC Vectorized (Pushdown)                                                                   39             45           7        401.1           2.5      65.9X
+Parquet Vectorized                                                                               2578           2590          12          6.1         163.9       1.0X
+Parquet Vectorized (Pushdown)                                                                      70             77          11        223.4           4.5      36.6X
+Native ORC Vectorized                                                                            1982           1987           8          7.9         126.0       1.3X
+Native ORC Vectorized (Pushdown)                                                                   39             43           5        404.6           2.5      66.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3127           3140           8          5.0         198.8       1.0X
-Parquet Vectorized (Pushdown)                                                                        885            890           4         17.8          56.3       3.5X
-Native ORC Vectorized                                                                               2595           2599           6          6.1         165.0       1.2X
-Native ORC Vectorized (Pushdown)                                                                     794            799           7         19.8          50.5       3.9X
+Parquet Vectorized                                                                                  3228           3236           9          4.9         205.2       1.0X
+Parquet Vectorized (Pushdown)                                                                        983            987           5         16.0          62.5       3.3X
+Native ORC Vectorized                                                                               2607           2617           6          6.0         165.7       1.2X
+Native ORC Vectorized (Pushdown)                                                                     859            864           4         18.3          54.6       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5492           5514          36          2.9         349.2       1.0X
-Parquet Vectorized (Pushdown)                                                                       4254           4266           7          3.7         270.5       1.3X
-Native ORC Vectorized                                                                               4680           4690           9          3.4         297.5       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3612           3618           4          4.4         229.7       1.5X
+Parquet Vectorized                                                                                  5897           5908           9          2.7         374.9       1.0X
+Parquet Vectorized (Pushdown)                                                                       4693           4705          13          3.4         298.3       1.3X
+Native ORC Vectorized                                                                               5145           5195          75          3.1         327.1       1.1X
+Native ORC Vectorized (Pushdown)                                                                    4134           4139           5          3.8         262.8       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   7631           7642          10          2.1         485.2       1.0X
-Parquet Vectorized (Pushdown)                                                                        7408           7430          26          2.1         471.0       1.0X
-Native ORC Vectorized                                                                                7062           7081          14          2.2         449.0       1.1X
-Native ORC Vectorized (Pushdown)                                                                     6872           6890          10          2.3         436.9       1.1X
+Parquet Vectorized                                                                                   8540           8549           9          1.8         542.9       1.0X
+Parquet Vectorized (Pushdown)                                                                        8330           8339          11          1.9         529.6       1.0X
+Native ORC Vectorized                                                                                7638           7650          11          2.1         485.6       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7440           7448          11          2.1         473.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2739           2747           9          5.7         174.1       1.0X
-Parquet Vectorized (Pushdown)                                                                      64             67           5        247.5           4.0      43.1X
-Native ORC Vectorized                                                                            1962           1969           8          8.0         124.8       1.4X
-Native ORC Vectorized (Pushdown)                                                                   39             43           4        400.8           2.5      69.8X
+Parquet Vectorized                                                                               2748           2759          18          5.7         174.7       1.0X
+Parquet Vectorized (Pushdown)                                                                      70             73           3        224.7           4.5      39.3X
+Native ORC Vectorized                                                                            1986           1999          18          7.9         126.2       1.4X
+Native ORC Vectorized (Pushdown)                                                                   39             42           5        407.9           2.5      71.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3295           3309          16          4.8         209.5       1.0X
-Parquet Vectorized (Pushdown)                                                                        899            910          17         17.5          57.1       3.7X
-Native ORC Vectorized                                                                               2589           2598           7          6.1         164.6       1.3X
-Native ORC Vectorized (Pushdown)                                                                     793            800           9         19.8          50.4       4.2X
+Parquet Vectorized                                                                                  3397           3403           7          4.6         216.0       1.0X
+Parquet Vectorized (Pushdown)                                                                        999           1006           6         15.7          63.5       3.4X
+Native ORC Vectorized                                                                               2612           2620          12          6.0         166.1       1.3X
+Native ORC Vectorized (Pushdown)                                                                     876            879           2         18.0          55.7       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5633           5660          28          2.8         358.2       1.0X
-Parquet Vectorized (Pushdown)                                                                       4358           4401          53          3.6         277.1       1.3X
-Native ORC Vectorized                                                                               4621           4640          18          3.4         293.8       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3619           3638          18          4.3         230.1       1.6X
+Parquet Vectorized                                                                                  6116           6123           9          2.6         388.8       1.0X
+Parquet Vectorized (Pushdown)                                                                       4802           4813          11          3.3         305.3       1.3X
+Native ORC Vectorized                                                                               5152           5160           7          3.1         327.6       1.2X
+Native ORC Vectorized (Pushdown)                                                                    4126           4138           9          3.8         262.3       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   7895           7935          54          2.0         501.9       1.0X
-Parquet Vectorized (Pushdown)                                                                        7662           7702          35          2.1         487.1       1.0X
-Native ORC Vectorized                                                                                7068           7091          18          2.2         449.4       1.1X
-Native ORC Vectorized (Pushdown)                                                                     6868           6889          24          2.3         436.7       1.1X
+Parquet Vectorized                                                                                   8692           8746          68          1.8         552.6       1.0X
+Parquet Vectorized (Pushdown)                                                                        8481           8495          15          1.9         539.2       1.0X
+Native ORC Vectorized                                                                                7644           7653          10          2.1         486.0       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7449           7462           9          2.1         473.6       1.2X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   62             80          21          0.0    62474384.0       1.0X
-Parquet Vectorized (Pushdown)                        63             65           3          0.0    62745406.0       1.0X
-Native ORC Vectorized                                56             59           5          0.0    56291738.0       1.1X
-Native ORC Vectorized (Pushdown)                     58             60           2          0.0    57939662.0       1.1X
+Parquet Vectorized                                   58             72          19          0.0    57741888.0       1.0X
+Parquet Vectorized (Pushdown)                        58             61           3          0.0    58429929.0       1.0X
+Native ORC Vectorized                                51             53           2          0.0    51359839.0       1.1X
+Native ORC Vectorized (Pushdown)                     53             55           3          0.0    53142981.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  360            373          10          0.0   360453365.0       1.0X
-Parquet Vectorized (Pushdown)                       366            374           8          0.0   366449891.0       1.0X
-Native ORC Vectorized                               353            356           3          0.0   352735496.0       1.0X
-Native ORC Vectorized (Pushdown)                    359            369           8          0.0   358660716.0       1.0X
+Parquet Vectorized                                  400            420          16          0.0   400224642.0       1.0X
+Parquet Vectorized (Pushdown)                       399            417          11          0.0   399319343.0       1.0X
+Native ORC Vectorized                               387            393           7          0.0   387215337.0       1.0X
+Native ORC Vectorized (Pushdown)                    390            396           7          0.0   389851290.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 1895           1916          27          0.0  1895172425.0       1.0X
-Parquet Vectorized (Pushdown)                      1918           1962          56          0.0  1918148217.0       1.0X
-Native ORC Vectorized                              1889           1921          49          0.0  1888761721.0       1.0X
-Native ORC Vectorized (Pushdown)                   1903           1913           9          0.0  1902514400.0       1.0X
+Parquet Vectorized                                 2216           2266          67          0.0  2215862652.0       1.0X
+Parquet Vectorized (Pushdown)                      2237           2281          47          0.0  2237304947.0       1.0X
+Native ORC Vectorized                              2202           2257          58          0.0  2202335420.0       1.0X
+Native ORC Vectorized (Pushdown)                   2219           2262          65          0.0  2219444511.0       1.0X
 
 
diff --git a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
index ef89bc72e4576..f762a7147d31b 100644
--- a/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
+++ b/sql/core/benchmarks/FilterPushdownBenchmark-results.txt
@@ -2,733 +2,733 @@
 Pushdown for many distinct value case
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row (value IS NULL):      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6787           6843          55          2.3         431.5       1.0X
-Parquet Vectorized (Pushdown)                       298            322          20         52.8          18.9      22.8X
-Native ORC Vectorized                              5201           5298          95          3.0         330.7       1.3X
-Native ORC Vectorized (Pushdown)                    328            337           8         47.9          20.9      20.7X
+Parquet Vectorized                                 6564           6709         108          2.4         417.3       1.0X
+Parquet Vectorized (Pushdown)                       315            335          18         50.0          20.0      20.9X
+Native ORC Vectorized                              5085           5205          71          3.1         323.3       1.3X
+Native ORC Vectorized (Pushdown)                    296            309          11         53.2          18.8      22.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 string row ('7864320' < value < '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                            6854           6878          15          2.3         435.8       1.0X
-Parquet Vectorized (Pushdown)                                  289            299          10         54.4          18.4      23.7X
-Native ORC Vectorized                                         5278           5297          22          3.0         335.6       1.3X
-Native ORC Vectorized (Pushdown)                               331            362          34         47.5          21.0      20.7X
+Parquet Vectorized                                            6573           6616          51          2.4         417.9       1.0X
+Parquet Vectorized (Pushdown)                                  291            309          16         54.1          18.5      22.6X
+Native ORC Vectorized                                         5027           5047          17          3.1         319.6       1.3X
+Native ORC Vectorized (Pushdown)                               292            316          19         53.9          18.5      22.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value = '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6829           6844          12          2.3         434.2       1.0X
-Parquet Vectorized (Pushdown)                       266            281          14         59.1          16.9      25.7X
-Native ORC Vectorized                              5262           5283          14          3.0         334.5       1.3X
-Native ORC Vectorized (Pushdown)                    308            322           9         51.1          19.6      22.2X
+Parquet Vectorized                                 6453           6497          31          2.4         410.3       1.0X
+Parquet Vectorized (Pushdown)                       289            295           6         54.4          18.4      22.3X
+Native ORC Vectorized                              4973           5006          25          3.2         316.2       1.3X
+Native ORC Vectorized (Pushdown)                    276            288          14         57.0          17.5      23.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row (value <=> '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  6819           6838          13          2.3         433.6       1.0X
-Parquet Vectorized (Pushdown)                        261            274          11         60.3          16.6      26.1X
-Native ORC Vectorized                               5231           5251          13          3.0         332.6       1.3X
-Native ORC Vectorized (Pushdown)                     305            316          10         51.6          19.4      22.4X
+Parquet Vectorized                                  6498           6553          74          2.4         413.1       1.0X
+Parquet Vectorized (Pushdown)                        284            294           7         55.3          18.1      22.9X
+Native ORC Vectorized                               5070           5087          11          3.1         322.3       1.3X
+Native ORC Vectorized (Pushdown)                     272            287          14         57.9          17.3      23.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 string row ('7864320' <= value <= '7864320'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              6859           6869           6          2.3         436.1       1.0X
-Parquet Vectorized (Pushdown)                                    266            274          10         59.2          16.9      25.8X
-Native ORC Vectorized                                           5284           5296           9          3.0         336.0       1.3X
-Native ORC Vectorized (Pushdown)                                 308            330          20         51.0          19.6      22.2X
+Parquet Vectorized                                              6558           6601          43          2.4         416.9       1.0X
+Parquet Vectorized (Pushdown)                                    275            288           9         57.1          17.5      23.8X
+Native ORC Vectorized                                           5016           5046          26          3.1         318.9       1.3X
+Native ORC Vectorized (Pushdown)                                 273            289          18         57.5          17.4      24.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  12624          12760         116          1.2         802.6       1.0X
-Parquet Vectorized (Pushdown)                       12621          12631          11          1.2         802.4       1.0X
-Native ORC Vectorized                               11074          11089          10          1.4         704.1       1.1X
-Native ORC Vectorized (Pushdown)                    11168          11188          15          1.4         710.0       1.1X
+Parquet Vectorized                                  13444          13642         185          1.2         854.8       1.0X
+Parquet Vectorized (Pushdown)                       13455          13505          51          1.2         855.4       1.0X
+Native ORC Vectorized                               12196          12247          41          1.3         775.4       1.1X
+Native ORC Vectorized (Pushdown)                    12230          12264          21          1.3         777.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (value IS NULL):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6439           6458          16          2.4         409.4       1.0X
-Parquet Vectorized (Pushdown)                       250            257           7         63.0          15.9      25.8X
-Native ORC Vectorized                              4759           4770           8          3.3         302.6       1.4X
-Native ORC Vectorized (Pushdown)                    294            304          14         53.6          18.7      21.9X
+Parquet Vectorized                                 6126           6159          26          2.6         389.5       1.0X
+Parquet Vectorized (Pushdown)                       260            273          10         60.6          16.5      23.6X
+Native ORC Vectorized                              4546           4572          18          3.5         289.0       1.3X
+Native ORC Vectorized (Pushdown)                    260            275          11         60.5          16.5      23.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 int row (7864320 < value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6422           6457          38          2.4         408.3       1.0X
-Parquet Vectorized (Pushdown)                           254            266          15         61.9          16.2      25.3X
-Native ORC Vectorized                                  4755           4771          20          3.3         302.3       1.4X
-Native ORC Vectorized (Pushdown)                        300            306           4         52.4          19.1      21.4X
+Parquet Vectorized                                     6123           6138          11          2.6         389.3       1.0X
+Parquet Vectorized (Pushdown)                           273            286          15         57.7          17.3      22.4X
+Native ORC Vectorized                                  4557           4590          42          3.5         289.7       1.3X
+Native ORC Vectorized (Pushdown)                        265            277           8         59.3          16.9      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value = 7864320):       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6477           6498          32          2.4         411.8       1.0X
-Parquet Vectorized (Pushdown)                       253            263          12         62.2          16.1      25.6X
-Native ORC Vectorized                              4799           4806           7          3.3         305.1       1.3X
-Native ORC Vectorized (Pushdown)                    295            300           3         53.3          18.8      21.9X
+Parquet Vectorized                                 6105           6212          72          2.6         388.1       1.0X
+Parquet Vectorized (Pushdown)                       272            277           7         57.8          17.3      22.4X
+Native ORC Vectorized                              4581           4651          77          3.4         291.2       1.3X
+Native ORC Vectorized (Pushdown)                    264            275           9         59.7          16.8      23.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (value <=> 7864320):     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 6457           6483          21          2.4         410.5       1.0X
-Parquet Vectorized (Pushdown)                       251            261           8         62.7          16.0      25.7X
-Native ORC Vectorized                              4787           4807          18          3.3         304.3       1.3X
-Native ORC Vectorized (Pushdown)                    292            302          11         53.9          18.6      22.1X
+Parquet Vectorized                                 6162           6217          59          2.6         391.8       1.0X
+Parquet Vectorized (Pushdown)                       263            275           9         59.8          16.7      23.4X
+Native ORC Vectorized                              4611           4630          23          3.4         293.1       1.3X
+Native ORC Vectorized (Pushdown)                    259            267           5         60.8          16.4      23.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864320 <= value <= 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       6460           6475          13          2.4         410.7       1.0X
-Parquet Vectorized (Pushdown)                             251            256           6         62.6          16.0      25.7X
-Native ORC Vectorized                                    4830           4840          10          3.3         307.1       1.3X
-Native ORC Vectorized (Pushdown)                          292            297           4         53.8          18.6      22.1X
+Parquet Vectorized                                       6109           6137          21          2.6         388.4       1.0X
+Parquet Vectorized (Pushdown)                             260            270           5         60.4          16.5      23.5X
+Native ORC Vectorized                                    4596           4621          34          3.4         292.2       1.3X
+Native ORC Vectorized (Pushdown)                          263            272           7         59.7          16.8      23.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 int row (7864319 < value < 7864321):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     6464           6478          12          2.4         411.0       1.0X
-Parquet Vectorized (Pushdown)                           250            262           6         63.0          15.9      25.9X
-Native ORC Vectorized                                  4803           4822          12          3.3         305.4       1.3X
-Native ORC Vectorized (Pushdown)                        292            298           5         53.8          18.6      22.1X
+Parquet Vectorized                                     6104           6142          41          2.6         388.1       1.0X
+Parquet Vectorized (Pushdown)                           266            278          13         59.0          16.9      22.9X
+Native ORC Vectorized                                  4601           4668          40          3.4         292.5       1.3X
+Native ORC Vectorized (Pushdown)                        264            271           7         59.5          16.8      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% int rows (value < 1572864):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 7055           7068          19          2.2         448.6       1.0X
-Parquet Vectorized (Pushdown)                      1462           1468           6         10.8          92.9       4.8X
-Native ORC Vectorized                              5388           5401          10          2.9         342.5       1.3X
-Native ORC Vectorized (Pushdown)                   1324           1328           4         11.9          84.2       5.3X
+Parquet Vectorized                                 6775           6878         122          2.3         430.8       1.0X
+Parquet Vectorized (Pushdown)                      1502           1519          13         10.5          95.5       4.5X
+Native ORC Vectorized                              5241           5259          17          3.0         333.2       1.3X
+Native ORC Vectorized (Pushdown)                   1346           1359          11         11.7          85.6       5.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% int rows (value < 7864320):    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 9195           9226          24          1.7         584.6       1.0X
-Parquet Vectorized (Pushdown)                      6115           6134          14          2.6         388.8       1.5X
-Native ORC Vectorized                              7526           7553          19          2.1         478.5       1.2X
-Native ORC Vectorized (Pushdown)                   5330           5338          10          3.0         338.9       1.7X
+Parquet Vectorized                                 9068           9115          48          1.7         576.5       1.0X
+Parquet Vectorized (Pushdown)                      6144           6157          17          2.6         390.6       1.5X
+Native ORC Vectorized                              7649           7712          67          2.1         486.3       1.2X
+Native ORC Vectorized (Pushdown)                   5542           5561          15          2.8         352.4       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% int rows (value < 14155776):   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11369          11390          32          1.4         722.8       1.0X
-Parquet Vectorized (Pushdown)                     10766          10775          10          1.5         684.5       1.1X
-Native ORC Vectorized                              9650           9667          12          1.6         613.6       1.2X
-Native ORC Vectorized (Pushdown)                   9267           9305          56          1.7         589.2       1.2X
+Parquet Vectorized                                11477          11634         171          1.4         729.7       1.0X
+Parquet Vectorized (Pushdown)                     10963          11008          79          1.4         697.0       1.0X
+Native ORC Vectorized                              9938           9974          34          1.6         631.9       1.2X
+Native ORC Vectorized (Pushdown)                   9611           9667          77          1.6         611.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11931          11947          17          1.3         758.6       1.0X
-Parquet Vectorized (Pushdown)                     11917          11938          15          1.3         757.7       1.0X
-Native ORC Vectorized                             10059          10078          25          1.6         639.6       1.2X
-Native ORC Vectorized (Pushdown)                  10127          10148          16          1.6         643.8       1.2X
+Parquet Vectorized                                11960          12016          47          1.3         760.4       1.0X
+Parquet Vectorized (Pushdown)                     12071          12135          58          1.3         767.5       1.0X
+Native ORC Vectorized                             10598          10650          53          1.5         673.8       1.1X
+Native ORC Vectorized (Pushdown)                  10651          10736          70          1.5         677.2       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value > -1):         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11818          11836          12          1.3         751.4       1.0X
-Parquet Vectorized (Pushdown)                     11904          11917          14          1.3         756.9       1.0X
-Native ORC Vectorized                             10241          10264          20          1.5         651.1       1.2X
-Native ORC Vectorized (Pushdown)                  10308          10332          21          1.5         655.4       1.1X
+Parquet Vectorized                                11960          11979          19          1.3         760.4       1.0X
+Parquet Vectorized (Pushdown)                     12058          12147          68          1.3         766.7       1.0X
+Native ORC Vectorized                             10563          10620          37          1.5         671.6       1.1X
+Native ORC Vectorized (Pushdown)                  10708          10947         187          1.5         680.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all int rows (value != -1):        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                11863          11867           3          1.3         754.2       1.0X
-Parquet Vectorized (Pushdown)                     11916          11937          14          1.3         757.6       1.0X
-Native ORC Vectorized                             10068          10109          43          1.6         640.1       1.2X
-Native ORC Vectorized (Pushdown)                  10156          10185          37          1.5         645.7       1.2X
+Parquet Vectorized                                12110          12189          60          1.3         769.9       1.0X
+Parquet Vectorized (Pushdown)                     12337          12422          81          1.3         784.4       1.0X
+Native ORC Vectorized                             10589          10660         105          1.5         673.2       1.1X
+Native ORC Vectorized (Pushdown)                  10648          10762          72          1.5         677.0       1.1X
 
 
 ================================================================================================
 Pushdown for few distinct value case (use dictionary encoding)
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row (value IS NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5712           5727          29          2.8         363.2       1.0X
-Parquet Vectorized (Pushdown)                           209            215           5         75.2          13.3      27.3X
-Native ORC Vectorized                                  6631           6661          21          2.4         421.6       0.9X
-Native ORC Vectorized (Pushdown)                        970            974           4         16.2          61.6       5.9X
+Parquet Vectorized                                     5859           5871          12          2.7         372.5       1.0X
+Parquet Vectorized (Pushdown)                           237            246          10         66.4          15.1      24.8X
+Native ORC Vectorized                                  6491           6523          29          2.4         412.7       0.9X
+Native ORC Vectorized (Pushdown)                        907            910           2         17.3          57.6       6.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 0 distinct string row ('100' < value < '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                             5888           5896           9          2.7         374.4       1.0X
-Parquet Vectorized (Pushdown)                                   212            222          12         74.1          13.5      27.7X
-Native ORC Vectorized                                          6813           6820           5          2.3         433.2       0.9X
-Native ORC Vectorized (Pushdown)                                971            978           5         16.2          61.8       6.1X
+Parquet Vectorized                                             5937           5962          23          2.6         377.5       1.0X
+Parquet Vectorized (Pushdown)                                   239            245           9         65.8          15.2      24.9X
+Native ORC Vectorized                                          6769           6788          34          2.3         430.4       0.9X
+Native ORC Vectorized (Pushdown)                                914            925          13         17.2          58.1       6.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value = '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5826           5838          15          2.7         370.4       1.0X
-Parquet Vectorized (Pushdown)                           261            263           2         60.4          16.6      22.4X
-Native ORC Vectorized                                  6763           6785          18          2.3         430.0       0.9X
-Native ORC Vectorized (Pushdown)                       1019           1031          19         15.4          64.8       5.7X
+Parquet Vectorized                                     5868           5878           6          2.7         373.1       1.0X
+Parquet Vectorized (Pushdown)                           284            289           3         55.3          18.1      20.6X
+Native ORC Vectorized                                  6676           6696          23          2.4         424.5       0.9X
+Native ORC Vectorized (Pushdown)                        956            963          11         16.5          60.8       6.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row (value <=> '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5823           5836          13          2.7         370.2       1.0X
-Parquet Vectorized (Pushdown)                             258            264           6         60.9          16.4      22.6X
-Native ORC Vectorized                                    6715           6737          28          2.3         426.9       0.9X
-Native ORC Vectorized (Pushdown)                         1017           1029          14         15.5          64.6       5.7X
+Parquet Vectorized                                       5940           5949           6          2.6         377.7       1.0X
+Parquet Vectorized (Pushdown)                             290            295           5         54.2          18.5      20.5X
+Native ORC Vectorized                                    6733           6746          10          2.3         428.1       0.9X
+Native ORC Vectorized (Pushdown)                          953            966          12         16.5          60.6       6.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 distinct string row ('100' <= value <= '100'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               5911           5924          12          2.7         375.8       1.0X
-Parquet Vectorized (Pushdown)                                     260            262           1         60.4          16.6      22.7X
-Native ORC Vectorized                                            6793           6830          45          2.3         431.9       0.9X
-Native ORC Vectorized (Pushdown)                                 1023           1032           7         15.4          65.0       5.8X
+Parquet Vectorized                                               5946           5966          17          2.6         378.0       1.0X
+Parquet Vectorized (Pushdown)                                     292            296           5         53.9          18.5      20.4X
+Native ORC Vectorized                                            6741           6751           6          2.3         428.6       0.9X
+Native ORC Vectorized (Pushdown)                                  958            964           5         16.4          60.9       6.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select all distinct string rows (value IS NOT NULL):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           12245          12263          13          1.3         778.5       1.0X
-Parquet Vectorized (Pushdown)                                12336          12360          28          1.3         784.3       1.0X
-Native ORC Vectorized                                        13260          13282          14          1.2         843.0       0.9X
-Native ORC Vectorized (Pushdown)                             13461          13478          20          1.2         855.8       0.9X
+Parquet Vectorized                                           14342          14388          32          1.1         911.8       1.0X
+Parquet Vectorized (Pushdown)                                14351          14404          51          1.1         912.4       1.0X
+Native ORC Vectorized                                        14291          14316          30          1.1         908.6       1.0X
+Native ORC Vectorized (Pushdown)                             14452          14468          10          1.1         918.8       1.0X
 
 
 ================================================================================================
 Pushdown benchmark for StringStartsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    7051           7097          66          2.2         448.3       1.0X
-Parquet Vectorized (Pushdown)                          892            908          14         17.6          56.7       7.9X
-Native ORC Vectorized                                 5370           5406          28          2.9         341.4       1.3X
-Native ORC Vectorized (Pushdown)                      5447           5458          10          2.9         346.3       1.3X
+Parquet Vectorized                                    7092           7104          14          2.2         450.9       1.0X
+Parquet Vectorized (Pushdown)                          930            935           3         16.9          59.1       7.6X
+Native ORC Vectorized                                 5306           5327          28          3.0         337.3       1.3X
+Native ORC Vectorized (Pushdown)                      5385           5398          10          2.9         342.4       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      6897           6908          14          2.3         438.5       1.0X
-Parquet Vectorized (Pushdown)                            247            252           4         63.6          15.7      27.9X
-Native ORC Vectorized                                   5237           5242           4          3.0         333.0       1.3X
-Native ORC Vectorized (Pushdown)                        5307           5329          35          3.0         337.4       1.3X
+Parquet Vectorized                                      6867           6913          26          2.3         436.6       1.0X
+Parquet Vectorized (Pushdown)                            278            283           4         56.5          17.7      24.7X
+Native ORC Vectorized                                   5146           5154           5          3.1         327.2       1.3X
+Native ORC Vectorized (Pushdown)                        5225           5236           7          3.0         332.2       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringStartsWith filter: (value like '786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        6893           6917          26          2.3         438.3       1.0X
-Parquet Vectorized (Pushdown)                              241            246           8         65.4          15.3      28.7X
-Native ORC Vectorized                                     5240           5249           9          3.0         333.1       1.3X
-Native ORC Vectorized (Pushdown)                          5305           5317          14          3.0         337.3       1.3X
+Parquet Vectorized                                        6831           6849          11          2.3         434.3       1.0X
+Parquet Vectorized (Pushdown)                              265            272           6         59.2          16.9      25.7X
+Native ORC Vectorized                                     5114           5140          18          3.1         325.2       1.3X
+Native ORC Vectorized (Pushdown)                          5193           5227          27          3.0         330.1       1.3X
 
 
 ================================================================================================
 Pushdown benchmark for StringEndsWith
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%10'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  5852           5870          15          2.7         372.1       1.0X
-Parquet Vectorized (Pushdown)                        338            345           9         46.6          21.5      17.3X
-Native ORC Vectorized                               6740           6753          12          2.3         428.5       0.9X
-Native ORC Vectorized (Pushdown)                    6932           6955          18          2.3         440.7       0.8X
+Parquet Vectorized                                  5890           5903          16          2.7         374.5       1.0X
+Parquet Vectorized (Pushdown)                        366            369           3         43.0          23.3      16.1X
+Native ORC Vectorized                               6686           6712          29          2.4         425.1       0.9X
+Native ORC Vectorized (Pushdown)                    6877           6895          12          2.3         437.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%1000'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                    5787           5812          17          2.7         367.9       1.0X
-Parquet Vectorized (Pushdown)                          244            252           8         64.5          15.5      23.7X
-Native ORC Vectorized                                 6697           6716          11          2.3         425.8       0.9X
-Native ORC Vectorized (Pushdown)                      6884           6900          12          2.3         437.7       0.8X
+Parquet Vectorized                                    5860           5875          13          2.7         372.6       1.0X
+Parquet Vectorized (Pushdown)                          269            275           7         58.5          17.1      21.8X
+Native ORC Vectorized                                 6606           6637          23          2.4         420.0       0.9X
+Native ORC Vectorized (Pushdown)                      6803           6830          28          2.3         432.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringEndsWith filter: (value like '%786432'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      5784           5800          26          2.7         367.8       1.0X
-Parquet Vectorized (Pushdown)                            242            247           4         64.9          15.4      23.9X
-Native ORC Vectorized                                   6704           6717          10          2.3         426.2       0.9X
-Native ORC Vectorized (Pushdown)                        6895           6911          17          2.3         438.4       0.8X
+Parquet Vectorized                                      5806           5835          24          2.7         369.1       1.0X
+Parquet Vectorized (Pushdown)                            263            271           4         59.9          16.7      22.1X
+Native ORC Vectorized                                   6617           6624           5          2.4         420.7       0.9X
+Native ORC Vectorized (Pushdown)                        6771           6784          13          2.3         430.5       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for StringContains
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%10%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   6010           6037          20          2.6         382.1       1.0X
-Parquet Vectorized (Pushdown)                         755            758           4         20.8          48.0       8.0X
-Native ORC Vectorized                                6902           6918          11          2.3         438.8       0.9X
-Native ORC Vectorized (Pushdown)                     7113           7128          10          2.2         452.2       0.8X
+Parquet Vectorized                                   5983           6035          43          2.6         380.4       1.0X
+Parquet Vectorized (Pushdown)                         790            795           4         19.9          50.2       7.6X
+Native ORC Vectorized                                6785           6803          11          2.3         431.4       0.9X
+Native ORC Vectorized (Pushdown)                     6943           6977          24          2.3         441.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%1000%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     5791           5799           8          2.7         368.2       1.0X
-Parquet Vectorized (Pushdown)                           246            247           1         64.0          15.6      23.6X
-Native ORC Vectorized                                  6700           6712          12          2.3         426.0       0.9X
-Native ORC Vectorized (Pushdown)                       6906           6923          18          2.3         439.1       0.8X
+Parquet Vectorized                                     5790           5839          29          2.7         368.1       1.0X
+Parquet Vectorized (Pushdown)                           267            271           4         58.9          17.0      21.7X
+Native ORC Vectorized                                  6623           6635          14          2.4         421.1       0.9X
+Native ORC Vectorized (Pushdown)                       6782           6797          10          2.3         431.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 StringContains filter: (value like '%786432%'):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                       5803           5821          27          2.7         368.9       1.0X
-Parquet Vectorized (Pushdown)                             243            248           9         64.6          15.5      23.8X
-Native ORC Vectorized                                    6709           6730          18          2.3         426.5       0.9X
-Native ORC Vectorized (Pushdown)                         6910           6921          10          2.3         439.3       0.8X
+Parquet Vectorized                                       5811           5831          16          2.7         369.4       1.0X
+Parquet Vectorized (Pushdown)                             263            273          10         59.8          16.7      22.1X
+Native ORC Vectorized                                    6563           6609          27          2.4         417.3       0.9X
+Native ORC Vectorized (Pushdown)                         6734           6772          26          2.3         428.1       0.9X
 
 
 ================================================================================================
 Pushdown benchmark for decimal
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(9, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                     2857           2879          29          5.5         181.6       1.0X
-Parquet Vectorized (Pushdown)                            65             68           5        241.6           4.1      43.9X
-Native ORC Vectorized                                  3410           3429          17          4.6         216.8       0.8X
-Native ORC Vectorized (Pushdown)                         59             62           3        265.7           3.8      48.3X
+Parquet Vectorized                                     2805           2825          20          5.6         178.3       1.0X
+Parquet Vectorized (Pushdown)                            70             73           5        226.2           4.4      40.3X
+Native ORC Vectorized                                  3503           3543          26          4.5         222.7       0.8X
+Native ORC Vectorized (Pushdown)                         55             59           3        286.3           3.5      51.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(9, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        4091           4111          24          3.8         260.1       1.0X
-Parquet Vectorized (Pushdown)                             1849           1853           5          8.5         117.6       2.2X
-Native ORC Vectorized                                     4743           4778          46          3.3         301.6       0.9X
-Native ORC Vectorized (Pushdown)                          1983           1991           7          7.9         126.1       2.1X
+Parquet Vectorized                                        4151           4162           9          3.8         263.9       1.0X
+Parquet Vectorized (Pushdown)                             1966           1981          15          8.0         125.0       2.1X
+Native ORC Vectorized                                     5029           5082          51          3.1         319.7       0.8X
+Native ORC Vectorized (Pushdown)                          2193           2203           8          7.2         139.4       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(9, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                        8000           8018          20          2.0         508.6       1.0X
-Parquet Vectorized (Pushdown)                             7633           7645           7          2.1         485.3       1.0X
-Native ORC Vectorized                                     8981           8991           9          1.8         571.0       0.9X
-Native ORC Vectorized (Pushdown)                          8512           8613         192          1.8         541.1       0.9X
+Parquet Vectorized                                        8776           8810          40          1.8         558.0       1.0X
+Parquet Vectorized (Pushdown)                             8460           8484          13          1.9         537.9       1.0X
+Native ORC Vectorized                                     9930           9952          17          1.6         631.4       0.9X
+Native ORC Vectorized (Pushdown)                          9440           9476          30          1.7         600.2       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(9, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8941           8969          32          1.8         568.5       1.0X
-Parquet Vectorized (Pushdown)                              8968           9032         115          1.8         570.2       1.0X
-Native ORC Vectorized                                     10136          10159          28          1.6         644.4       0.9X
-Native ORC Vectorized (Pushdown)                          10162          10196          33          1.5         646.1       0.9X
+Parquet Vectorized                                        10066          10176          84          1.6         640.0       1.0X
+Parquet Vectorized (Pushdown)                             10147          10173          29          1.6         645.1       1.0X
+Native ORC Vectorized                                     10790          10854          60          1.5         686.0       0.9X
+Native ORC Vectorized (Pushdown)                          10900          11013         189          1.4         693.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(18, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      2995           3006           9          5.3         190.4       1.0X
-Parquet Vectorized (Pushdown)                             63             67           4        248.3           4.0      47.3X
-Native ORC Vectorized                                   3431           3441          13          4.6         218.1       0.9X
-Native ORC Vectorized (Pushdown)                          56             60           3        279.2           3.6      53.2X
+Parquet Vectorized                                      2978           2995          19          5.3         189.3       1.0X
+Parquet Vectorized (Pushdown)                             69             72           3        229.4           4.4      43.4X
+Native ORC Vectorized                                   3520           3535          13          4.5         223.8       0.8X
+Native ORC Vectorized (Pushdown)                          53             56           3        296.7           3.4      56.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(18, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         3718           3727          17          4.2         236.4       1.0X
-Parquet Vectorized (Pushdown)                              1012           1016           2         15.5          64.4       3.7X
-Native ORC Vectorized                                      4142           4181          26          3.8         263.4       0.9X
-Native ORC Vectorized (Pushdown)                           1056           1063           7         14.9          67.1       3.5X
+Parquet Vectorized                                         3733           3745           9          4.2         237.4       1.0X
+Parquet Vectorized (Pushdown)                              1078           1089           6         14.6          68.5       3.5X
+Native ORC Vectorized                                      4305           4316          11          3.7         273.7       0.9X
+Native ORC Vectorized (Pushdown)                           1110           1113           3         14.2          70.6       3.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(18, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         6331           6343          17          2.5         402.5       1.0X
-Parquet Vectorized (Pushdown)                              4843           4855          10          3.2         307.9       1.3X
-Native ORC Vectorized                                      6859           6864          10          2.3         436.1       0.9X
-Native ORC Vectorized (Pushdown)                           5112           5123          10          3.1         325.0       1.2X
+Parquet Vectorized                                         6729           6741          14          2.3         427.8       1.0X
+Parquet Vectorized (Pushdown)                              5185           5240          41          3.0         329.7       1.3X
+Native ORC Vectorized                                      7200           7224          21          2.2         457.8       0.9X
+Native ORC Vectorized (Pushdown)                           5405           5438          22          2.9         343.7       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(18, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                          8837           8842           4          1.8         561.8       1.0X
-Parquet Vectorized (Pushdown)                               8563           8571           6          1.8         544.4       1.0X
-Native ORC Vectorized                                       9504           9540          31          1.7         604.2       0.9X
-Native ORC Vectorized (Pushdown)                            9203           9209           7          1.7         585.1       1.0X
+Parquet Vectorized                                          9576           9593          18          1.6         608.8       1.0X
+Parquet Vectorized (Pushdown)                               9301           9312          16          1.7         591.3       1.0X
+Native ORC Vectorized                                      10115          10143          26          1.6         643.1       0.9X
+Native ORC Vectorized (Pushdown)                            9809           9814           3          1.6         623.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 decimal(38, 2) row (value = 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                      4207           4222          14          3.7         267.5       1.0X
-Parquet Vectorized (Pushdown)                             69             72           3        227.7           4.4      60.9X
-Native ORC Vectorized                                   3482           3502          19          4.5         221.4       1.2X
-Native ORC Vectorized (Pushdown)                          56             58           2        283.2           3.5      75.7X
+Parquet Vectorized                                      4258           4267           8          3.7         270.7       1.0X
+Parquet Vectorized (Pushdown)                             75             77           3        210.3           4.8      56.9X
+Native ORC Vectorized                                   3587           3638          85          4.4         228.1       1.2X
+Native ORC Vectorized (Pushdown)                          52             55           3        302.8           3.3      82.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% decimal(38, 2) rows (value < 1572864):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         5073           5086          12          3.1         322.5       1.0X
-Parquet Vectorized (Pushdown)                              1319           1322           3         11.9          83.9       3.8X
-Native ORC Vectorized                                      4294           4312          24          3.7         273.0       1.2X
-Native ORC Vectorized (Pushdown)                           1158           1164           5         13.6          73.7       4.4X
+Parquet Vectorized                                         5156           5167          18          3.1         327.8       1.0X
+Parquet Vectorized (Pushdown)                              1386           1395           9         11.3          88.1       3.7X
+Native ORC Vectorized                                      4486           4506          27          3.5         285.2       1.1X
+Native ORC Vectorized (Pushdown)                           1242           1251           8         12.7          79.0       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% decimal(38, 2) rows (value < 7864320):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         8420           8438          11          1.9         535.3       1.0X
-Parquet Vectorized (Pushdown)                              6373           6378           5          2.5         405.2       1.3X
-Native ORC Vectorized                                      7440           7449          13          2.1         473.0       1.1X
-Native ORC Vectorized (Pushdown)                           5638           5662          14          2.8         358.5       1.5X
+Parquet Vectorized                                         8812           8822           7          1.8         560.2       1.0X
+Parquet Vectorized (Pushdown)                              6728           6732           4          2.3         427.8       1.3X
+Native ORC Vectorized                                      7787           7836          60          2.0         495.1       1.1X
+Native ORC Vectorized (Pushdown)                           6007           6023          24          2.6         381.9       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% decimal(38, 2) rows (value < 14155776):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                         11729          11741           9          1.3         745.7       1.0X
-Parquet Vectorized (Pushdown)                              11358          11369           8          1.4         722.1       1.0X
-Native ORC Vectorized                                      10556          10591          25          1.5         671.1       1.1X
-Native ORC Vectorized (Pushdown)                           10164          10192          18          1.5         646.2       1.2X
+Parquet Vectorized                                         12367          12381           9          1.3         786.3       1.0X
+Parquet Vectorized (Pushdown)                              11977          12018          24          1.3         761.5       1.0X
+Native ORC Vectorized                                      11109          11169          79          1.4         706.3       1.1X
+Native ORC Vectorized (Pushdown)                           10772          10786          15          1.5         684.9       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for InSet -> InFilters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6436           6463          34          2.4         409.2       1.0X
-Parquet Vectorized (Pushdown)                                     255            260           4         61.6          16.2      25.2X
-Native ORC Vectorized                                            4805           4811           6          3.3         305.5       1.3X
-Native ORC Vectorized (Pushdown)                                  296            304           5         53.2          18.8      21.8X
+Parquet Vectorized                                               6442           6482          25          2.4         409.6       1.0X
+Parquet Vectorized (Pushdown)                                     284            293          15         55.4          18.0      22.7X
+Native ORC Vectorized                                            4965           4990          17          3.2         315.7       1.3X
+Native ORC Vectorized (Pushdown)                                  281            288           8         56.1          17.8      23.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6437           6443           6          2.4         409.2       1.0X
-Parquet Vectorized (Pushdown)                                     254            262           7         61.9          16.2      25.3X
-Native ORC Vectorized                                            4803           4813          14          3.3         305.3       1.3X
-Native ORC Vectorized (Pushdown)                                  299            305           6         52.6          19.0      21.5X
+Parquet Vectorized                                               6451           6461           9          2.4         410.1       1.0X
+Parquet Vectorized (Pushdown)                                     283            286           3         55.6          18.0      22.8X
+Native ORC Vectorized                                            4937           4981          34          3.2         313.9       1.3X
+Native ORC Vectorized (Pushdown)                                  279            288          11         56.4          17.7      23.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 5, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               6431           6444           8          2.4         408.9       1.0X
-Parquet Vectorized (Pushdown)                                     255            259           3         61.8          16.2      25.2X
-Native ORC Vectorized                                            4802           4814          12          3.3         305.3       1.3X
-Native ORC Vectorized (Pushdown)                                  296            300           3         53.1          18.8      21.7X
+Parquet Vectorized                                               6449           6462           8          2.4         410.0       1.0X
+Parquet Vectorized (Pushdown)                                     283            293          10         55.6          18.0      22.8X
+Native ORC Vectorized                                            4955           4964          12          3.2         315.0       1.3X
+Native ORC Vectorized (Pushdown)                                  280            284           3         56.2          17.8      23.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6453           6463          12          2.4         410.2       1.0X
-Parquet Vectorized (Pushdown)                                      275            279           4         57.1          17.5      23.4X
-Native ORC Vectorized                                             4794           4807          13          3.3         304.8       1.3X
-Native ORC Vectorized (Pushdown)                                   310            314           3         50.8          19.7      20.8X
+Parquet Vectorized                                                6448           6474          23          2.4         409.9       1.0X
+Parquet Vectorized (Pushdown)                                      302            321          43         52.1          19.2      21.4X
+Native ORC Vectorized                                             4977           4994          16          3.2         316.4       1.3X
+Native ORC Vectorized (Pushdown)                                   297            301           3         53.0          18.9      21.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6445           6456           7          2.4         409.8       1.0X
-Parquet Vectorized (Pushdown)                                      271            282          13         58.1          17.2      23.8X
-Native ORC Vectorized                                             4809           4828          19          3.3         305.8       1.3X
-Native ORC Vectorized (Pushdown)                                   314            317           2         50.1          20.0      20.5X
+Parquet Vectorized                                                6478           6503          36          2.4         411.8       1.0X
+Parquet Vectorized (Pushdown)                                      301            307           4         52.2          19.1      21.5X
+Native ORC Vectorized                                             4972           5002          20          3.2         316.1       1.3X
+Native ORC Vectorized (Pushdown)                                   297            305          11         52.9          18.9      21.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 10, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6457           6464          13          2.4         410.5       1.0X
-Parquet Vectorized (Pushdown)                                      272            276           3         57.8          17.3      23.7X
-Native ORC Vectorized                                             4811           4837          17          3.3         305.9       1.3X
-Native ORC Vectorized (Pushdown)                                   309            314           3         50.9          19.6      20.9X
+Parquet Vectorized                                                6506           6522          11          2.4         413.7       1.0X
+Parquet Vectorized (Pushdown)                                      305            309           3         51.6          19.4      21.3X
+Native ORC Vectorized                                             5057           5062           4          3.1         321.5       1.3X
+Native ORC Vectorized (Pushdown)                                   304            309           3         51.7          19.4      21.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6651           6690          50          2.4         422.8       1.0X
-Parquet Vectorized (Pushdown)                                      855            858           5         18.4          54.4       7.8X
-Native ORC Vectorized                                             5035           5071          26          3.1         320.1       1.3X
-Native ORC Vectorized (Pushdown)                                   414            417           4         38.0          26.3      16.1X
+Parquet Vectorized                                                6675           6693          24          2.4         424.4       1.0X
+Parquet Vectorized (Pushdown)                                      896            899           3         17.6          57.0       7.4X
+Native ORC Vectorized                                             5264           5272           9          3.0         334.7       1.3X
+Native ORC Vectorized (Pushdown)                                   407            410           4         38.7          25.9      16.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6652           6657           5          2.4         422.9       1.0X
-Parquet Vectorized (Pushdown)                                     3465           3482          15          4.5         220.3       1.9X
-Native ORC Vectorized                                             5025           5033           5          3.1         319.5       1.3X
-Native ORC Vectorized (Pushdown)                                   441            442           2         35.7          28.0      15.1X
+Parquet Vectorized                                                6662           6668           8          2.4         423.6       1.0X
+Parquet Vectorized (Pushdown)                                     3357           3364           6          4.7         213.4       2.0X
+Native ORC Vectorized                                             5164           5191          23          3.0         328.3       1.3X
+Native ORC Vectorized (Pushdown)                                   426            429           3         36.9          27.1      15.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 50, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                6656           6671          12          2.4         423.2       1.0X
-Parquet Vectorized (Pushdown)                                     5728           5738          11          2.7         364.2       1.2X
-Native ORC Vectorized                                             5040           5055          10          3.1         320.5       1.3X
-Native ORC Vectorized (Pushdown)                                   444            447           4         35.4          28.3      15.0X
+Parquet Vectorized                                                6648           6658          12          2.4         422.7       1.0X
+Parquet Vectorized (Pushdown)                                     5843           5860          15          2.7         371.5       1.1X
+Native ORC Vectorized                                             5182           5189           6          3.0         329.5       1.3X
+Native ORC Vectorized (Pushdown)                                   432            436           9         36.4          27.5      15.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6596           6605           8          2.4         419.4       1.0X
-Parquet Vectorized (Pushdown)                                       887            890           3         17.7          56.4       7.4X
-Native ORC Vectorized                                              4967           4974           7          3.2         315.8       1.3X
-Native ORC Vectorized (Pushdown)                                    511            515           4         30.8          32.5      12.9X
+Parquet Vectorized                                                 6611           6629          16          2.4         420.3       1.0X
+Parquet Vectorized (Pushdown)                                       892            898           5         17.6          56.7       7.4X
+Native ORC Vectorized                                              5126           5154          32          3.1         325.9       1.3X
+Native ORC Vectorized (Pushdown)                                    500            505           3         31.4          31.8      13.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6618           6626           7          2.4         420.8       1.0X
-Parquet Vectorized (Pushdown)                                      3375           3385           8          4.7         214.6       2.0X
-Native ORC Vectorized                                              4988           4995           9          3.2         317.1       1.3X
-Native ORC Vectorized (Pushdown)                                    587            591           7         26.8          37.3      11.3X
+Parquet Vectorized                                                 6556           6583          17          2.4         416.8       1.0X
+Parquet Vectorized (Pushdown)                                      3433           3448          17          4.6         218.2       1.9X
+Native ORC Vectorized                                              5099           5119          12          3.1         324.2       1.3X
+Native ORC Vectorized (Pushdown)                                    570            572           3         27.6          36.2      11.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 InSet -> InFilters (values count: 100, distribution: 90):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                 6604           6626          19          2.4         419.9       1.0X
-Parquet Vectorized (Pushdown)                                      5909           5920           8          2.7         375.7       1.1X
-Native ORC Vectorized                                              4987           4991           4          3.2         317.0       1.3X
-Native ORC Vectorized (Pushdown)                                    601            604           2         26.2          38.2      11.0X
+Parquet Vectorized                                                 6631           6642          10          2.4         421.6       1.0X
+Parquet Vectorized (Pushdown)                                      5877           5888           8          2.7         373.6       1.1X
+Native ORC Vectorized                                              5141           5148           8          3.1         326.9       1.3X
+Native ORC Vectorized (Pushdown)                                    585            587           2         26.9          37.2      11.3X
 
 
 ================================================================================================
 Pushdown benchmark for tinyint
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 tinyint row (value = CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                           3095           3112          16          5.1         196.8       1.0X
-Parquet Vectorized (Pushdown)                                 101            103           3        156.4           6.4      30.8X
-Native ORC Vectorized                                        2387           2394           5          6.6         151.8       1.3X
-Native ORC Vectorized (Pushdown)                              119            121           3        132.5           7.5      26.1X
+Parquet Vectorized                                           3074           3122          77          5.1         195.4       1.0X
+Parquet Vectorized (Pushdown)                                 107            111           6        146.8           6.8      28.7X
+Native ORC Vectorized                                        2473           2482           6          6.4         157.2       1.2X
+Native ORC Vectorized (Pushdown)                              114            117           5        138.0           7.2      27.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% tinyint rows (value < CAST(12 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              3619           3637          15          4.3         230.1       1.0X
-Parquet Vectorized (Pushdown)                                    879            886          10         17.9          55.9       4.1X
-Native ORC Vectorized                                           2876           2885           5          5.5         182.9       1.3X
-Native ORC Vectorized (Pushdown)                                 808            809           2         19.5          51.4       4.5X
+Parquet Vectorized                                              3739           3758          16          4.2         237.7       1.0X
+Parquet Vectorized (Pushdown)                                   1004           1011           5         15.7          63.8       3.7X
+Native ORC Vectorized                                           3078           3092          17          5.1         195.7       1.2X
+Native ORC Vectorized (Pushdown)                                 918            920           1         17.1          58.4       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% tinyint rows (value < CAST(63 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                              5707           5724          18          2.8         362.8       1.0X
-Parquet Vectorized (Pushdown)                                   4215           4219           3          3.7         268.0       1.4X
-Native ORC Vectorized                                           4873           4884           9          3.2         309.8       1.2X
-Native ORC Vectorized (Pushdown)                                3724           3748          14          4.2         236.8       1.5X
+Parquet Vectorized                                              6391           6394           2          2.5         406.3       1.0X
+Parquet Vectorized (Pushdown)                                   4890           4907          10          3.2         310.9       1.3X
+Native ORC Vectorized                                           5584           5613          20          2.8         355.0       1.1X
+Native ORC Vectorized (Pushdown)                                4397           4412          10          3.6         279.6       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% tinyint rows (value < CAST(114 AS tinyint)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                               7806           7815           9          2.0         496.3       1.0X
-Parquet Vectorized (Pushdown)                                    7548           7552           3          2.1         479.9       1.0X
-Native ORC Vectorized                                            7017           7034          30          2.2         446.1       1.1X
-Native ORC Vectorized (Pushdown)                                 6797           6808          13          2.3         432.2       1.1X
+Parquet Vectorized                                               9020           9050          32          1.7         573.4       1.0X
+Parquet Vectorized (Pushdown)                                    8757           8792          23          1.8         556.7       1.0X
+Native ORC Vectorized                                            8277           8316          42          1.9         526.2       1.1X
+Native ORC Vectorized (Pushdown)                                 8050           8069          14          2.0         511.8       1.1X
 
 
 ================================================================================================
 Pushdown benchmark for Timestamp
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as INT96 row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                    3215           3233          16          4.9         204.4       1.0X
-Parquet Vectorized (Pushdown)                                                         3202           3213          11          4.9         203.6       1.0X
-Native ORC Vectorized                                                                 2269           2275           6          6.9         144.2       1.4X
-Native ORC Vectorized (Pushdown)                                                        40             43           3        392.1           2.6      80.1X
+Parquet Vectorized                                                                    3218           3228          10          4.9         204.6       1.0X
+Parquet Vectorized (Pushdown)                                                         3203           3213          10          4.9         203.7       1.0X
+Native ORC Vectorized                                                                 2387           2391           5          6.6         151.7       1.3X
+Native ORC Vectorized (Pushdown)                                                        39             42           4        407.8           2.5      83.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as INT96 rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       3776           3799          32          4.2         240.1       1.0X
-Parquet Vectorized (Pushdown)                                                            3783           3786           4          4.2         240.5       1.0X
-Native ORC Vectorized                                                                    2818           2837          17          5.6         179.1       1.3X
-Native ORC Vectorized (Pushdown)                                                          807            812           3         19.5          51.3       4.7X
+Parquet Vectorized                                                                       3900           3912          11          4.0         247.9       1.0X
+Parquet Vectorized (Pushdown)                                                            3896           3903           6          4.0         247.7       1.0X
+Native ORC Vectorized                                                                    2987           2996          11          5.3         189.9       1.3X
+Native ORC Vectorized (Pushdown)                                                          889            892           3         17.7          56.5       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as INT96 rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                       6080           6082           2          2.6         386.6       1.0X
-Parquet Vectorized (Pushdown)                                                            6072           6103          30          2.6         386.0       1.0X
-Native ORC Vectorized                                                                    5071           5086          18          3.1         322.4       1.2X
-Native ORC Vectorized (Pushdown)                                                         3911           3920           6          4.0         248.6       1.6X
+Parquet Vectorized                                                                       6698           6711          15          2.3         425.9       1.0X
+Parquet Vectorized (Pushdown)                                                            6698           6708           9          2.3         425.8       1.0X
+Native ORC Vectorized                                                                    5550           5563          22          2.8         352.9       1.2X
+Native ORC Vectorized (Pushdown)                                                         4359           4374          18          3.6         277.2       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as INT96 rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                        8437           8445           7          1.9         536.4       1.0X
-Parquet Vectorized (Pushdown)                                                             8452           8458           4          1.9         537.4       1.0X
-Native ORC Vectorized                                                                     7424           7504          90          2.1         472.0       1.1X
-Native ORC Vectorized (Pushdown)                                                          7202           7230          25          2.2         457.9       1.2X
+Parquet Vectorized                                                                        9385           9400          18          1.7         596.7       1.0X
+Parquet Vectorized (Pushdown)                                                             9378           9390          11          1.7         596.3       1.0X
+Native ORC Vectorized                                                                     8168           8194          28          1.9         519.3       1.1X
+Native ORC Vectorized (Pushdown)                                                          7949           7959           9          2.0         505.4       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MICROS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               2975           2983           9          5.3         189.1       1.0X
-Parquet Vectorized (Pushdown)                                                                      63             68           6        247.8           4.0      46.9X
-Native ORC Vectorized                                                                            2264           2282          35          6.9         144.0       1.3X
-Native ORC Vectorized (Pushdown)                                                                   40             43           4        392.7           2.5      74.3X
+Parquet Vectorized                                                                               2972           2983          14          5.3         189.0       1.0X
+Parquet Vectorized (Pushdown)                                                                      68             71           3        231.7           4.3      43.8X
+Native ORC Vectorized                                                                            2359           2366           4          6.7         150.0       1.3X
+Native ORC Vectorized (Pushdown)                                                                   38             40           3        416.7           2.4      78.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3552           3568          25          4.4         225.8       1.0X
-Parquet Vectorized (Pushdown)                                                                        914            917           2         17.2          58.1       3.9X
-Native ORC Vectorized                                                                               2827           2832           7          5.6         179.8       1.3X
-Native ORC Vectorized (Pushdown)                                                                     813            816           2         19.3          51.7       4.4X
+Parquet Vectorized                                                                                  3649           3652           4          4.3         232.0       1.0X
+Parquet Vectorized (Pushdown)                                                                       1043           1047           3         15.1          66.3       3.5X
+Native ORC Vectorized                                                                               2989           2989           1          5.3         190.0       1.2X
+Native ORC Vectorized (Pushdown)                                                                     888            892           2         17.7          56.5       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5843           5849           6          2.7         371.5       1.0X
-Parquet Vectorized (Pushdown)                                                                       4410           4440          54          3.6         280.4       1.3X
-Native ORC Vectorized                                                                               5066           5081          16          3.1         322.1       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3908           3916           6          4.0         248.5       1.5X
+Parquet Vectorized                                                                                  6413           6426           8          2.5         407.7       1.0X
+Parquet Vectorized (Pushdown)                                                                       5009           5049          48          3.1         318.5       1.3X
+Native ORC Vectorized                                                                               5548           5553          11          2.8         352.7       1.2X
+Native ORC Vectorized (Pushdown)                                                                    4359           4368           9          3.6         277.1       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MICROS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   8214           8218           4          1.9         522.2       1.0X
-Parquet Vectorized (Pushdown)                                                                        7953           8002          63          2.0         505.6       1.0X
-Native ORC Vectorized                                                                                7436           7453          25          2.1         472.8       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7215           7248          46          2.2         458.7       1.1X
+Parquet Vectorized                                                                                   9114           9138          26          1.7         579.5       1.0X
+Parquet Vectorized (Pushdown)                                                                        8869           8880           9          1.8         563.9       1.0X
+Native ORC Vectorized                                                                                8175           8193          20          1.9         519.8       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7947           7956          11          2.0         505.3       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 timestamp stored as TIMESTAMP_MILLIS row (value = timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                               3003           3009           7          5.2         190.9       1.0X
-Parquet Vectorized (Pushdown)                                                                      63             67           4        248.8           4.0      47.5X
-Native ORC Vectorized                                                                            2258           2262           4          7.0         143.6       1.3X
-Native ORC Vectorized (Pushdown)                                                                   40             42           3        395.7           2.5      75.5X
+Parquet Vectorized                                                                               3001           3005           6          5.2         190.8       1.0X
+Parquet Vectorized (Pushdown)                                                                      68             70           3        232.2           4.3      44.3X
+Native ORC Vectorized                                                                            2359           2362           3          6.7         150.0       1.3X
+Native ORC Vectorized (Pushdown)                                                                   38             40           4        415.7           2.4      79.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 10% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(1572864)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  3586           3599          11          4.4         228.0       1.0X
-Parquet Vectorized (Pushdown)                                                                        916            919           3         17.2          58.2       3.9X
-Native ORC Vectorized                                                                               2836           2850          19          5.5         180.3       1.3X
-Native ORC Vectorized (Pushdown)                                                                     811            815           3         19.4          51.6       4.4X
+Parquet Vectorized                                                                                  3686           3693           5          4.3         234.4       1.0X
+Parquet Vectorized (Pushdown)                                                                       1044           1048           4         15.1          66.4       3.5X
+Native ORC Vectorized                                                                               2984           2989           4          5.3         189.7       1.2X
+Native ORC Vectorized (Pushdown)                                                                     889            891           2         17.7          56.5       4.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 50% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(7864320)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                  5866           5881          27          2.7         372.9       1.0X
-Parquet Vectorized (Pushdown)                                                                       4410           4417           8          3.6         280.4       1.3X
-Native ORC Vectorized                                                                               5077           5089          19          3.1         322.8       1.2X
-Native ORC Vectorized (Pushdown)                                                                    3911           3919           8          4.0         248.6       1.5X
+Parquet Vectorized                                                                                  6454           6464           8          2.4         410.3       1.0X
+Parquet Vectorized (Pushdown)                                                                       5018           5033          26          3.1         319.0       1.3X
+Native ORC Vectorized                                                                               5545           5556           9          2.8         352.5       1.2X
+Native ORC Vectorized (Pushdown)                                                                    4357           4377          14          3.6         277.0       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 90% timestamp stored as TIMESTAMP_MILLIS rows (value < timestamp_seconds(14155776)):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                                                                   8221           8229           7          1.9         522.7       1.0X
-Parquet Vectorized (Pushdown)                                                                        7970           7981           9          2.0         506.7       1.0X
-Native ORC Vectorized                                                                                7426           7442          23          2.1         472.1       1.1X
-Native ORC Vectorized (Pushdown)                                                                     7193           7204           9          2.2         457.3       1.1X
+Parquet Vectorized                                                                                   9143           9162          28          1.7         581.3       1.0X
+Parquet Vectorized (Pushdown)                                                                        8888           8895           6          1.8         565.1       1.0X
+Native ORC Vectorized                                                                                8163           8178          25          1.9         519.0       1.1X
+Native ORC Vectorized (Pushdown)                                                                     7942           7966          35          2.0         504.9       1.2X
 
 
 ================================================================================================
 Pushdown benchmark with many filters
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 1 filters:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                   63             65           3          0.0    63314623.0       1.0X
-Parquet Vectorized (Pushdown)                        64             66           3          0.0    64051869.0       1.0X
-Native ORC Vectorized                                58             59           2          0.0    57520375.0       1.1X
-Native ORC Vectorized (Pushdown)                     60             64           8          0.0    59737469.0       1.1X
+Parquet Vectorized                                   55             57           2          0.0    55430933.0       1.0X
+Parquet Vectorized (Pushdown)                        56             59           5          0.0    56257088.0       1.0X
+Native ORC Vectorized                                50             52           2          0.0    50120677.0       1.1X
+Native ORC Vectorized (Pushdown)                     52             55           3          0.0    52126525.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 250 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                  423            431          11          0.0   422883307.0       1.0X
-Parquet Vectorized (Pushdown)                       427            431           2          0.0   427230106.0       1.0X
-Native ORC Vectorized                               407            416           8          0.0   406712827.0       1.0X
-Native ORC Vectorized (Pushdown)                    418            423           4          0.0   418468099.0       1.0X
+Parquet Vectorized                                  405            417          11          0.0   404938298.0       1.0X
+Parquet Vectorized (Pushdown)                       407            412           6          0.0   406571487.0       1.0X
+Native ORC Vectorized                               394            397           4          0.0   394366762.0       1.0X
+Native ORC Vectorized (Pushdown)                    397            406           7          0.0   396723685.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select 1 row with 500 filters:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Parquet Vectorized                                 2351           2379          30          0.0  2351073582.0       1.0X
-Parquet Vectorized (Pushdown)                      2359           2383          14          0.0  2358892376.0       1.0X
-Native ORC Vectorized                              2349           2376          19          0.0  2348555337.0       1.0X
-Native ORC Vectorized (Pushdown)                   2351           2372          17          0.0  2350854713.0       1.0X
+Parquet Vectorized                                 2255           2270           9          0.0  2255338602.0       1.0X
+Parquet Vectorized (Pushdown)                      2258           2279          13          0.0  2258126416.0       1.0X
+Native ORC Vectorized                              2244           2260          10          0.0  2243733317.0       1.0X
+Native ORC Vectorized (Pushdown)                   2255           2290          29          0.0  2254729481.0       1.0X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
index c2e7f658a4dc6..bd83ba8858f29 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             88754          89024         381          1.1         887.5       1.0X
-GenerateExec Benchmark wholestage on              26904          27017         173          3.7         269.0       3.3X
+GenerateExec Benchmark wholestage off             71281          71290          12          1.4         712.8       1.0X
+GenerateExec Benchmark wholestage on              21377          22190         461          4.7         213.8       3.3X
 
 
diff --git a/sql/core/benchmarks/GenerateExecBenchmark-results.txt b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
index 8398bfdefbb40..7aaa8fad9e560 100644
--- a/sql/core/benchmarks/GenerateExecBenchmark-results.txt
+++ b/sql/core/benchmarks/GenerateExecBenchmark-results.txt
@@ -2,11 +2,11 @@
 GenerateExec benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 GenerateExec Benchmark:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-GenerateExec Benchmark wholestage off             85096          85378         398          1.2         851.0       1.0X
-GenerateExec Benchmark wholestage on              25729          25905         115          3.9         257.3       3.3X
+GenerateExec Benchmark wholestage off             73307          73512         290          1.4         733.1       1.0X
+GenerateExec Benchmark wholestage on              24438          24523          84          4.1         244.4       3.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
index b60eba694717c..8e47f7e27a85b 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-jdk21-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  361            363           2          1.4         721.9       1.0X
+LongToUnsafeRowMap                                  255            259           3          2.0         510.5       1.0X
 
 
diff --git a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
index d66030f047958..f201c27de387e 100644
--- a/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
+++ b/sql/core/benchmarks/HashedRelationMetricsBenchmark-results.txt
@@ -2,10 +2,10 @@
 LongToUnsafeRowMap metrics
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 LongToUnsafeRowMap metrics:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-LongToUnsafeRowMap                                  380            390           7          1.3         760.6       1.0X
+LongToUnsafeRowMap                                  261            268           6          1.9         521.5       1.0X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
index 02eef14d6c991..e571db07479a0 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-jdk21-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             34           7        358.3           2.8       1.0X
-InSet expression                                     51             65          15        194.7           5.1       0.5X
+In expression                                        46             61           9        217.9           4.6       1.0X
+InSet expression                                     68             73           6        146.3           6.8       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             35           5        326.8           3.1       1.0X
-InSet expression                                     50             54           3        200.1           5.0       0.6X
+In expression                                        36             42           6        275.1           3.6       1.0X
+InSet expression                                     62             66           4        160.3           6.2       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        59             62           2        168.2           5.9       1.0X
-InSet expression                                     66             75          11        150.9           6.6       0.9X
+In expression                                        65             70           6        153.5           6.5       1.0X
+InSet expression                                     77             81           4        130.5           7.7       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        97            103           4        103.0           9.7       1.0X
-InSet expression                                     79             83           3        126.7           7.9       1.2X
+In expression                                       106            111           7         94.6          10.6       1.0X
+InSet expression                                     84             87           3        119.7           8.4       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       171            176           4         58.4          17.1       1.0X
-InSet expression                                    107            112           4         93.5          10.7       1.6X
+In expression                                       184            189           4         54.3          18.4       1.0X
+InSet expression                                     98            102           4        102.1           9.8       1.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       387            403          13         25.8          38.7       1.0X
-InSet expression                                    188            196           8         53.3          18.8       2.1X
+In expression                                       393            395           2         25.5          39.3       1.0X
+InSet expression                                    187            192           6         53.5          18.7       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             31           4        379.5           2.6       1.0X
-InSet expression                                     73             78           4        137.2           7.3       0.4X
+In expression                                        29             33           5        345.2           2.9       1.0X
+InSet expression                                     75             79           3        132.5           7.5       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        36             40           4        278.6           3.6       1.0X
-InSet expression                                     87             91           3        114.3           8.7       0.4X
+In expression                                        36             41           5        274.6           3.6       1.0X
+InSet expression                                     92             95           2        109.1           9.2       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        59             63           6        170.4           5.9       1.0X
-InSet expression                                     87             91           3        115.0           8.7       0.7X
+In expression                                        62             66           4        161.1           6.2       1.0X
+InSet expression                                     91             93           2        110.1           9.1       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        99            103           3        101.2           9.9       1.0X
-InSet expression                                    119            125           9         84.2          11.9       0.8X
+In expression                                       119            122           4         84.3          11.9       1.0X
+InSet expression                                    128            129           1         78.4          12.8       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       182            186           4         54.8          18.2       1.0X
-InSet expression                                    105            111           4         95.2          10.5       1.7X
+In expression                                       186            189           5         53.7          18.6       1.0X
+InSet expression                                    114            116           3         87.5          11.4       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       343            356          14         29.1          34.3       1.0X
-InSet expression                                    117            123           3         85.1          11.7       2.9X
+In expression                                       368            372           5         27.2          36.8       1.0X
+InSet expression                                    122            124           1         81.9          12.2       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       523            540          10         19.1          52.3       1.0X
-InSet expression                                    129            136           4         77.6          12.9       4.1X
+In expression                                       551            559           5         18.1          55.1       1.0X
+InSet expression                                    135            138           2         74.0          13.5       4.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       683            709          21         14.6          68.3       1.0X
-InSet expression                                    142            149           6         70.7          14.2       4.8X
+In expression                                       734            737           3         13.6          73.4       1.0X
+InSet expression                                    148            151           2         67.5          14.8       4.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1370           1396          18          7.3         137.0       1.0X
-InSet expression                                    161            167           4         62.1          16.1       8.5X
+In expression                                       941            947           5         10.6          94.1       1.0X
+InSet expression                                    162            165           2         61.7          16.2       5.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             28           3        394.1           2.5       1.0X
-InSet expression                                     69             72           3        145.8           6.9       0.4X
+In expression                                        27             31           6        374.5           2.7       1.0X
+InSet expression                                     73             75           3        137.7           7.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             35           5        318.9           3.1       1.0X
-InSet expression                                     89             92           3        112.2           8.9       0.4X
+In expression                                        36             41           7        276.7           3.6       1.0X
+InSet expression                                     91             93           1        109.7           9.1       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             61           4        176.3           5.7       1.0X
-InSet expression                                    112            116           6         89.6          11.2       0.5X
+In expression                                        60             64           4        166.6           6.0       1.0X
+InSet expression                                    120            122           1         83.5          12.0       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       101            104           2         99.0          10.1       1.0X
-InSet expression                                    122            128           3         81.8          12.2       0.8X
+In expression                                       111            116           5         89.8          11.1       1.0X
+InSet expression                                    134            137           4         74.8          13.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       178            183           5         56.3          17.8       1.0X
-InSet expression                                    109            114           3         91.7          10.9       1.6X
+In expression                                       195            197           3         51.3          19.5       1.0X
+InSet expression                                    116            119           3         85.8          11.6       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       315            323           9         31.7          31.5       1.0X
-InSet expression                                    115            119           2         86.8          11.5       2.7X
+In expression                                       348            351           4         28.7          34.8       1.0X
+InSet expression                                    122            125           2         81.7          12.2       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       478            483           4         20.9          47.8       1.0X
-InSet expression                                    125            131           4         80.3          12.5       3.8X
+In expression                                       513            516           5         19.5          51.3       1.0X
+InSet expression                                    133            135           2         75.1          13.3       3.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       628            649          12         15.9          62.8       1.0X
-InSet expression                                    140            145           8         71.6          14.0       4.5X
+In expression                                       658            663           4         15.2          65.8       1.0X
+InSet expression                                    146            149           3         68.3          14.6       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       999           1005           5         10.0          99.9       1.0X
-InSet expression                                    153            157           4         65.2          15.3       6.5X
+In expression                                       847            853           7         11.8          84.7       1.0X
+InSet expression                                    159            162           2         62.7          15.9       5.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           3        427.9           2.3       1.0X
-InSet expression                                     72             76           2        138.3           7.2       0.3X
+In expression                                        27             30           4        368.5           2.7       1.0X
+InSet expression                                     80             83           3        124.6           8.0       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             34           4        324.0           3.1       1.0X
-InSet expression                                     88             92           3        113.2           8.8       0.3X
+In expression                                        35             38           5        285.9           3.5       1.0X
+InSet expression                                     97             99           1        103.0           9.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             66           3        161.3           6.2       1.0X
-InSet expression                                     88             92           3        113.7           8.8       0.7X
+In expression                                        63             65           3        157.7           6.3       1.0X
+InSet expression                                     97            100           4        102.8           9.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       104            108           3         96.2          10.4       1.0X
-InSet expression                                    123            127           3         81.5          12.3       0.8X
+In expression                                       116            119           4         86.1          11.6       1.0X
+InSet expression                                    135            137           1         74.3          13.5       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       179            184           4         55.9          17.9       1.0X
-InSet expression                                    105            111           8         95.4          10.5       1.7X
+In expression                                       184            197           6         54.5          18.4       1.0X
+InSet expression                                    117            119           2         85.4          11.7       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       327            330           3         30.6          32.7       1.0X
-InSet expression                                    113            118           3         88.2          11.3       2.9X
+In expression                                       342            351           8         29.2          34.2       1.0X
+InSet expression                                    124            126           1         80.6          12.4       2.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       473            477           4         21.1          47.3       1.0X
-InSet expression                                    123            129           4         81.0          12.3       3.8X
+In expression                                       511            518           9         19.6          51.1       1.0X
+InSet expression                                    136            140           3         73.3          13.6       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       614            635          19         16.3          61.4       1.0X
-InSet expression                                    137            142           3         72.8          13.7       4.5X
+In expression                                       668            675           9         15.0          66.8       1.0X
+InSet expression                                    149            152           4         67.1          14.9       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       780            788           9         12.8          78.0       1.0X
-InSet expression                                    151            157           3         66.2          15.1       5.2X
+In expression                                       826            831           3         12.1          82.6       1.0X
+InSet expression                                    161            163           1         62.2          16.1       5.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        19             21           3        530.8           1.9       1.0X
-InSet expression                                     70             74           2        143.5           7.0       0.3X
+In expression                                        20             24           4        497.1           2.0       1.0X
+InSet expression                                     77             80           1        129.1           7.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        26             29           4        387.9           2.6       1.0X
-InSet expression                                     90             93           2        111.5           9.0       0.3X
+In expression                                        28             31           4        356.5           2.8       1.0X
+InSet expression                                     94             96           1        105.9           9.4       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        52             56           4        193.0           5.2       1.0X
-InSet expression                                     99            103           2        100.8           9.9       0.5X
+In expression                                        56             60           4        177.3           5.6       1.0X
+InSet expression                                    108            110           1         92.5          10.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             96           3        108.4           9.2       1.0X
-InSet expression                                    121            125           4         82.9          12.1       0.8X
+In expression                                       100            102           3         99.7          10.0       1.0X
+InSet expression                                    133            135           1         75.1          13.3       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       168            174           5         59.5          16.8       1.0X
-InSet expression                                    110            115           3         90.8          11.0       1.5X
+In expression                                       179            182           4         55.7          17.9       1.0X
+InSet expression                                    120            123           3         83.2          12.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       318            329           8         31.4          31.8       1.0X
-InSet expression                                    114            118           3         87.6          11.4       2.8X
+In expression                                       337            347           7         29.6          33.7       1.0X
+InSet expression                                    127            131           9         78.9          12.7       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       467            473           5         21.4          46.7       1.0X
-InSet expression                                    124            128           3         80.7          12.4       3.8X
+In expression                                       506            517          21         19.8          50.6       1.0X
+InSet expression                                    135            139           4         73.8          13.5       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       620            628           5         16.1          62.0       1.0X
-InSet expression                                    135            143           7         74.1          13.5       4.6X
+In expression                                       666            672           4         15.0          66.6       1.0X
+InSet expression                                    148            152           3         67.4          14.8       4.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       772            804          60         12.9          77.2       1.0X
-InSet expression                                    147            153           4         68.1          14.7       5.3X
+In expression                                       831            860          62         12.0          83.1       1.0X
+InSet expression                                    159            162           1         62.7          15.9       5.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        17             19           3        576.6           1.7       1.0X
-InSet expression                                     67             70           2        149.6           6.7       0.3X
+In expression                                        19             20           2        526.2           1.9       1.0X
+InSet expression                                     86             87           1        116.9           8.6       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        27             29           3        377.2           2.7       1.0X
-InSet expression                                     85             88           2        117.5           8.5       0.3X
+In expression                                        28             31           3        352.8           2.8       1.0X
+InSet expression                                    101            103           2         98.9          10.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        47             51           3        212.0           4.7       1.0X
-InSet expression                                     83             87           3        120.8           8.3       0.6X
+In expression                                        53             56           3        190.2           5.3       1.0X
+InSet expression                                    101            103           2         98.9          10.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        86             90           3        116.7           8.6       1.0X
-InSet expression                                    117            122           2         85.4          11.7       0.7X
+In expression                                        92             94           3        108.2           9.2       1.0X
+InSet expression                                    135            138           2         73.8          13.5       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       160            166           3         62.4          16.0       1.0X
-InSet expression                                    100            104           3        100.4          10.0       1.6X
+In expression                                       171            173           2         58.4          17.1       1.0X
+InSet expression                                    121            123           2         82.4          12.1       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       309            321          10         32.3          30.9       1.0X
-InSet expression                                    106            109           2         94.6          10.6       2.9X
+In expression                                       328            331           3         30.5          32.8       1.0X
+InSet expression                                    129            131           2         77.7          12.9       2.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             37           3        294.2           3.4       1.0X
-InSet expression                                     70             74           2        142.0           7.0       0.5X
+In expression                                        36             40           4        274.9           3.6       1.0X
+InSet expression                                     77             81           8        130.3           7.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        55             59           3        180.6           5.5       1.0X
-InSet expression                                     89             94           3        112.4           8.9       0.6X
+In expression                                        61             63           3        164.9           6.1       1.0X
+InSet expression                                     98             99           1        102.5           9.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       135            138           2         74.2          13.5       1.0X
-InSet expression                                     91             96           3        110.1           9.1       1.5X
+In expression                                       147            150           3         67.8          14.7       1.0X
+InSet expression                                     99            101           1        101.1           9.9       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       225            228           3         44.4          22.5       1.0X
-InSet expression                                    131            137           5         76.4          13.1       1.7X
+In expression                                       227            232           6         44.0          22.7       1.0X
+InSet expression                                    144            146           2         69.5          14.4       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       378            387           8         26.5          37.8       1.0X
-InSet expression                                    108            112           3         92.8          10.8       3.5X
+In expression                                       384            387           4         26.0          38.4       1.0X
+InSet expression                                    116            118           1         86.0          11.6       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1764           1808          77          5.7         176.4       1.0X
-InSet expression                                    110            113           2         91.0          11.0      16.1X
+In expression                                      1895           1931          68          5.3         189.5       1.0X
+InSet expression                                    120            122           2         83.6          12.0      15.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        31             34           3        318.3           3.1       1.0X
-InSet expression                                     73             78           6        137.3           7.3       0.4X
+In expression                                        36             39           3        281.1           3.6       1.0X
+InSet expression                                     77             80           2        129.7           7.7       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        57             61           3        175.1           5.7       1.0X
-InSet expression                                     91             96           3        109.3           9.1       0.6X
+In expression                                        61             64           4        163.7           6.1       1.0X
+InSet expression                                     98            100           1        101.8           9.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       138            143           4         72.4          13.8       1.0X
-InSet expression                                     92             95           3        108.5           9.2       1.5X
+In expression                                       151            153           3         66.3          15.1       1.0X
+InSet expression                                    100            103           3         99.9          10.0       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       211            215           4         47.4          21.1       1.0X
-InSet expression                                    136            140           4         73.3          13.6       1.5X
+In expression                                       227            232          10         44.0          22.7       1.0X
+InSet expression                                    143            145           1         70.1          14.3       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       366            374           7         27.3          36.6       1.0X
-InSet expression                                    111            115           2         90.1          11.1       3.3X
+In expression                                       386            389           3         25.9          38.6       1.0X
+InSet expression                                    116            117           1         86.2          11.6       3.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      2083           2171          91          4.8         208.3       1.0X
-InSet expression                                    111            116           3         90.2          11.1      18.8X
+In expression                                      2266           2298          49          4.4         226.6       1.0X
+InSet expression                                    119            121           1         83.9          11.9      19.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        18             20           2         55.8          17.9       1.0X
-InSet expression                                     53             56           2         18.9          52.9       0.3X
+In expression                                        20             22           3         49.4          20.2       1.0X
+InSet expression                                     59             61           2         17.0          58.8       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           2         42.9          23.3       1.0X
-InSet expression                                     55             57           2         18.3          54.7       0.4X
+In expression                                        26             28           3         37.9          26.4       1.0X
+InSet expression                                     61             63           2         16.3          61.3       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        37             39           2         27.0          37.0       1.0X
-InSet expression                                     56             58           2         17.9          55.9       0.7X
+In expression                                        40             43           3         24.7          40.4       1.0X
+InSet expression                                     62             65           4         16.2          61.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        73             78           3         13.7          73.0       1.0X
-InSet expression                                     61             64           3         16.5          60.6       1.2X
+In expression                                        79             81           3         12.6          79.4       1.0X
+InSet expression                                     67             69           2         14.8          67.4       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       224            227           3          4.5         223.7       1.0X
-InSet expression                                     59             61           1         16.9          59.1       3.8X
+In expression                                       240            245           7          4.2         240.0       1.0X
+InSet expression                                     65             68           4         15.4          65.1       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       538            545           4          1.9         538.4       1.0X
-InSet expression                                     60             63           2         16.5          60.5       8.9X
+In expression                                       572            576           5          1.7         571.9       1.0X
+InSet expression                                     66             68           1         15.1          66.4       8.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        218.3           4.6       1.0X
-InSet expression                                      4              6           2        224.6           4.5       1.0X
+In expression                                         5              6           2        199.2           5.0       1.0X
+InSet expression                                      5              6           2        211.3           4.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        219.5           4.6       1.0X
-InSet expression                                      5              6           2        220.7           4.5       1.0X
+In expression                                         5              6           2        205.8           4.9       1.0X
+InSet expression                                      5              5           2        210.7           4.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           2        201.5           5.0       1.0X
-InSet expression                                      5              6           2        203.5           4.9       1.0X
+In expression                                         5              6           2        194.7           5.1       1.0X
+InSet expression                                      5              6           2        191.2           5.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           2        174.7           5.7       1.0X
-InSet expression                                      6              6           2        175.7           5.7       1.0X
+In expression                                         6              7           2        167.7           6.0       1.0X
+InSet expression                                      6              7           2        167.6           6.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         7              9           2        137.7           7.3       1.0X
-InSet expression                                      7              8           2        137.6           7.3       1.0X
+In expression                                         7              8           2        134.1           7.5       1.0X
+InSet expression                                      7              8           2        135.4           7.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        11             12           2         93.7          10.7       1.0X
-InSet expression                                     11             12           1         94.2          10.6       1.0X
+In expression                                        11             11           2         94.8          10.6       1.0X
+InSet expression                                     11             11           1         95.0          10.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        27             29           3         36.9          27.1       1.0X
-InSet expression                                     37             39           1         26.8          37.4       0.7X
+In expression                                        29             30           1         34.3          29.1       1.0X
+InSet expression                                     43             45           2         23.1          43.3       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        32             33           1         31.4          31.9       1.0X
-InSet expression                                     40             42           2         25.0          40.0       0.8X
+In expression                                        34             36           2         29.0          34.5       1.0X
+InSet expression                                     46             47           1         21.9          45.6       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             42           2         25.1          39.9       1.0X
-InSet expression                                     44             46           1         22.5          44.5       0.9X
+In expression                                        44             45           2         23.0          43.5       1.0X
+InSet expression                                     50             51           1         19.9          50.3       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        54             56           2         18.4          54.3       1.0X
-InSet expression                                     47             49           1         21.2          47.3       1.1X
+In expression                                        58             60           2         17.1          58.4       1.0X
+InSet expression                                     54             55           1         18.5          54.2       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       102            109           4          9.8         102.3       1.0X
-InSet expression                                     44             47           3         22.5          44.4       2.3X
+In expression                                        90             92           2         11.1          90.4       1.0X
+InSet expression                                     51             53           1         19.6          51.1       1.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       367            372           5          2.7         366.5       1.0X
-InSet expression                                     45             46           2         22.4          44.7       8.2X
+In expression                                       408            413           5          2.4         408.2       1.0X
+InSet expression                                     51             53           2         19.6          50.9       8.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        15             17           2        647.4           1.5       1.0X
-InSet expression                                     79             81           2        127.2           7.9       0.2X
+In expression                                        17             18           2        593.9           1.7       1.0X
+InSet expression                                     81             83           2        123.5           8.1       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        21             22           2        482.1           2.1       1.0X
-InSet expression                                     89             91           2        112.7           8.9       0.2X
+In expression                                        23             25           4        442.1           2.3       1.0X
+InSet expression                                     95             96           1        105.7           9.5       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        44             48           4        227.0           4.4       1.0X
-InSet expression                                    114            118           3         87.5          11.4       0.4X
+In expression                                        48             50           2        209.9           4.8       1.0X
+InSet expression                                    128            130           1         78.1          12.8       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        75             77           2        133.7           7.5       1.0X
-InSet expression                                    138            141           2         72.2          13.8       0.5X
+In expression                                        81             84           3        123.3           8.1       1.0X
+InSet expression                                    161            163           1         62.0          16.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       140            143           3         71.5          14.0       1.0X
-InSet expression                                    121            128           5         82.4          12.1       1.2X
+In expression                                       152            154           2         65.6          15.2       1.0X
+InSet expression                                    137            138           1         73.0          13.7       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       286            296           9         35.0          28.6       1.0X
-InSet expression                                    120            124           3         83.3          12.0       2.4X
+In expression                                       295            306          11         33.9          29.5       1.0X
+InSet expression                                    133            134           1         75.4          13.3       2.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       297            300           3         33.6          29.7       1.0X
-InSet expression                                    299            303           6         33.5          29.9       1.0X
+In expression                                       304            306           3         32.9          30.4       1.0X
+InSet expression                                    300            303           3         33.3          30.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       302            306           3         33.1          30.2       1.0X
-InSet expression                                    296            301           5         33.8          29.6       1.0X
+In expression                                       307            313           4         32.6          30.7       1.0X
+InSet expression                                    300            302           2         33.3          30.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       329            344          18         30.4          32.9       1.0X
-InSet expression                                    294            300           4         34.0          29.4       1.1X
+In expression                                       344            345           1         29.1          34.4       1.0X
+InSet expression                                    300            301           1         33.4          30.0       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       375            378           2         26.7          37.5       1.0X
-InSet expression                                    297            306           5         33.7          29.7       1.3X
+In expression                                       387            393           7         25.9          38.7       1.0X
+InSet expression                                    300            302           1         33.3          30.0       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       461            467           6         21.7          46.1       1.0X
-InSet expression                                    294            307           8         34.1          29.4       1.6X
+In expression                                       488            489           1         20.5          48.8       1.0X
+InSet expression                                    300            305           3         33.3          30.0       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       609            622           9         16.4          60.9       1.0X
-InSet expression                                    296            300           4         33.8          29.6       2.1X
+In expression                                       643            646           3         15.6          64.3       1.0X
+InSet expression                                    303            305           2         33.0          30.3       2.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       775            785           8         12.9          77.5       1.0X
-InSet expression                                    296            299           3         33.8          29.6       2.6X
+In expression                                       803            805           2         12.5          80.3       1.0X
+InSet expression                                    305            306           1         32.8          30.5       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       925            940          14         10.8          92.5       1.0X
-InSet expression                                    300            308           6         33.3          30.0       3.1X
+In expression                                       960            972           8         10.4          96.0       1.0X
+InSet expression                                    306            308           2         32.7          30.6       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1079           1103          15          9.3         107.9       1.0X
-InSet expression                                    396            401           5         25.3          39.6       2.7X
+In expression                                      1122           1145          26          8.9         112.2       1.0X
+InSet expression                                    371            374           2         27.0          37.1       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        23             25           1         42.8          23.3       1.0X
-InSet expression                                     53             56           2         18.8          53.3       0.4X
+In expression                                        26             27           2         38.8          25.8       1.0X
+InSet expression                                     58             60           2         17.2          58.2       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        37             38           2         27.4          36.5       1.0X
-InSet expression                                     80             83           2         12.4          80.4       0.5X
+In expression                                        41             43           1         24.3          41.1       1.0X
+InSet expression                                     87             90           4         11.5          87.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       158            164           4          6.3         157.9       1.0X
-InSet expression                                     95             99           3         10.5          95.4       1.7X
+In expression                                       162            165           3          6.2         162.3       1.0X
+InSet expression                                    102            104           2          9.8         101.6       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       355            361           5          2.8         355.2       1.0X
-InSet expression                                    123            128           4          8.1         123.1       2.9X
+In expression                                       393            400          12          2.5         393.1       1.0X
+InSet expression                                    130            134           3          7.7         130.4       3.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       763            766           3          1.3         763.2       1.0X
-InSet expression                                    139            141           2          7.2         138.8       5.5X
+In expression                                       818            822           4          1.2         818.1       1.0X
+InSet expression                                    146            149           2          6.8         146.5       5.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1623           1803         325          0.6        1623.3       1.0X
-InSet expression                                    155            163           6          6.5         154.8      10.5X
+In expression                                      1772           1922         315          0.6        1772.2       1.0X
+InSet expression                                    164            167           2          6.1         164.2      10.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        15             17           2         65.4          15.3       1.0X
-InSet expression                                     76             81           2         13.2          76.0       0.2X
+In expression                                        17             18           1         58.3          17.1       1.0X
+InSet expression                                     83             87           9         12.1          82.9       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        21             22           1         47.3          21.1       1.0X
-InSet expression                                    115            121           3          8.7         115.0       0.2X
+In expression                                        24             25           1         42.0          23.8       1.0X
+InSet expression                                    126            128           1          7.9         126.2       0.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        65             72           3         15.5          64.6       1.0X
-InSet expression                                    134            143           6          7.4         134.3       0.5X
+In expression                                        71             73           1         14.1          71.1       1.0X
+InSet expression                                    149            150           1          6.7         148.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       195            197           2          5.1         194.6       1.0X
-InSet expression                                    179            189          10          5.6         179.2       1.1X
+In expression                                       191            192           1          5.2         191.2       1.0X
+InSet expression                                    190            194           4          5.3         189.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       516            523           6          1.9         516.2       1.0X
-InSet expression                                    204            209           3          4.9         203.5       2.5X
+In expression                                       564            582          35          1.8         563.9       1.0X
+InSet expression                                    214            217           2          4.7         214.2       2.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1322           1432         221          0.8        1321.9       1.0X
-InSet expression                                    225            230           4          4.5         224.7       5.9X
+In expression                                      1282           1479         291          0.8        1282.4       1.0X
+InSet expression                                    243            252           7          4.1         243.3       5.3X
 
 
diff --git a/sql/core/benchmarks/InExpressionBenchmark-results.txt b/sql/core/benchmarks/InExpressionBenchmark-results.txt
index e3529cd7f9cda..5178c51124c4f 100644
--- a/sql/core/benchmarks/InExpressionBenchmark-results.txt
+++ b/sql/core/benchmarks/InExpressionBenchmark-results.txt
@@ -2,739 +2,739 @@
 In Expression Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 bytes:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             36           6        334.4           3.0       1.0X
-InSet expression                                     69             73           5        145.1           6.9       0.4X
+In expression                                        43             61          11        230.8           4.3       1.0X
+InSet expression                                     88             94           7        113.6           8.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             37           4        295.6           3.4       1.0X
-InSet expression                                     67             71           3        148.6           6.7       0.5X
+In expression                                        36             42           5        279.2           3.6       1.0X
+InSet expression                                     82             86           4        121.3           8.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        63             65           2        159.9           6.3       1.0X
-InSet expression                                     75             79           3        133.0           7.5       0.8X
+In expression                                        65             69           3        154.3           6.5       1.0X
+InSet expression                                     83             87           4        121.0           8.3       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 bytes:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       103            108           4         96.8          10.3       1.0X
-InSet expression                                     83             85           2        120.5           8.3       1.2X
+In expression                                       105            107           2         95.4          10.5       1.0X
+InSet expression                                     88             91           3        113.8           8.8       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       192            195           3         52.0          19.2       1.0X
-InSet expression                                     98             99           2        102.5           9.8       2.0X
+In expression                                       193            196           5         51.8          19.3       1.0X
+InSet expression                                     98            100           3        102.2           9.8       2.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 bytes:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       396            402           7         25.2          39.6       1.0X
-InSet expression                                    205            211           5         48.9          20.5       1.9X
+In expression                                       388            390           2         25.8          38.8       1.0X
+InSet expression                                    178            180           2         56.1          17.8       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             31           2        346.8           2.9       1.0X
-InSet expression                                     95             98           2        105.0           9.5       0.3X
+In expression                                        30             34           4        330.9           3.0       1.0X
+InSet expression                                     91             95           3        109.5           9.1       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             38           2        282.3           3.5       1.0X
-InSet expression                                    109            112           2         91.5          10.9       0.3X
+In expression                                        38             42           6        259.9           3.8       1.0X
+InSet expression                                    106            109           4         94.7          10.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        61             63           3        164.9           6.1       1.0X
-InSet expression                                    109            113           2         91.5          10.9       0.6X
+In expression                                        61             65           6        164.1           6.1       1.0X
+InSet expression                                    106            108           2         94.6          10.6       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       106            109           4         94.5          10.6       1.0X
-InSet expression                                    141            144           3         70.9          14.1       0.8X
+In expression                                       116            118           2         86.3          11.6       1.0X
+InSet expression                                    136            138           2         73.4          13.6       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       185            188           5         54.1          18.5       1.0X
-InSet expression                                    127            130           3         78.8          12.7       1.5X
+In expression                                       197            199           3         50.8          19.7       1.0X
+InSet expression                                    119            121           1         84.0          11.9       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       365            366           1         27.4          36.5       1.0X
-InSet expression                                    138            141           3         72.2          13.8       2.6X
+In expression                                       364            367           3         27.5          36.4       1.0X
+InSet expression                                    126            128           1         79.3          12.6       2.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       549            553           5         18.2          54.9       1.0X
-InSet expression                                    154            156           3         65.1          15.4       3.6X
+In expression                                       548            551           3         18.2          54.8       1.0X
+InSet expression                                    139            143           3         71.8          13.9       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       669            676           4         15.0          66.9       1.0X
-InSet expression                                    167            170           3         60.0          16.7       4.0X
+In expression                                       666            672           4         15.0          66.6       1.0X
+InSet expression                                    153            155           3         65.3          15.3       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       931            932           2         10.7          93.1       1.0X
-InSet expression                                    182            184           1         55.0          18.2       5.1X
+In expression                                       934            938           5         10.7          93.4       1.0X
+InSet expression                                    165            168           2         60.4          16.5       5.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 shorts (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             31           3        355.0           2.8       1.0X
-InSet expression                                     93             95           2        107.8           9.3       0.3X
+In expression                                        28             30           2        352.5           2.8       1.0X
+InSet expression                                     87             89           4        114.7           8.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             36           2        290.2           3.4       1.0X
-InSet expression                                    110            112           2         90.9          11.0       0.3X
+In expression                                        34             37           4        291.1           3.4       1.0X
+InSet expression                                    106            108           2         94.2          10.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        63             65           2        157.6           6.3       1.0X
-InSet expression                                    131            132           1         76.6          13.1       0.5X
+In expression                                        69             71           1        144.2           6.9       1.0X
+InSet expression                                    128            133           8         78.0          12.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 shorts (non-compact):                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       115            117           2         86.8          11.5       1.0X
-InSet expression                                    147            149           1         68.0          14.7       0.8X
+In expression                                       105            107           2         95.0          10.5       1.0X
+InSet expression                                    143            145           2         70.1          14.3       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       195            198           6         51.3          19.5       1.0X
-InSet expression                                    130            133           2         76.8          13.0       1.5X
+In expression                                       195            197           2         51.2          19.5       1.0X
+InSet expression                                    123            127           5         81.4          12.3       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       354            356           2         28.3          35.4       1.0X
-InSet expression                                    135            138           2         73.9          13.5       2.6X
+In expression                                       353            356           4         28.3          35.3       1.0X
+InSet expression                                    127            129           1         78.7          12.7       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       501            509          10         19.9          50.1       1.0X
-InSet expression                                    151            153           2         66.4          15.1       3.3X
+In expression                                       500            502           3         20.0          50.0       1.0X
+InSet expression                                    137            142           7         73.1          13.7       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       666            669           4         15.0          66.6       1.0X
-InSet expression                                    165            167           3         60.7          16.5       4.0X
+In expression                                       666            670           4         15.0          66.6       1.0X
+InSet expression                                    151            154           2         66.1          15.1       4.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 shorts (non-compact):                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       858            861           4         11.7          85.8       1.0X
-InSet expression                                    178            181           3         56.2          17.8       4.8X
+In expression                                       854            858           4         11.7          85.4       1.0X
+InSet expression                                    162            164           1         61.9          16.2       5.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 ints:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             27           3        397.2           2.5       1.0X
-InSet expression                                     94             97           4        106.1           9.4       0.3X
+In expression                                        27             29           3        372.2           2.7       1.0X
+InSet expression                                     89             92           2        111.7           8.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             35           2        294.5           3.4       1.0X
-InSet expression                                    109            111           1         91.4          10.9       0.3X
+In expression                                        34             39           2        291.3           3.4       1.0X
+InSet expression                                    107            109           1         93.5          10.7       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        60             61           1        166.1           6.0       1.0X
-InSet expression                                    112            114           1         89.2          11.2       0.5X
+In expression                                        60             65           9        166.2           6.0       1.0X
+InSet expression                                    109            112           2         91.9          10.9       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 ints:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       114            115           1         87.6          11.4       1.0X
-InSet expression                                    144            146           1         69.2          14.4       0.8X
+In expression                                       103            104           1         97.2          10.3       1.0X
+InSet expression                                    140            144           8         71.5          14.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       194            195           1         51.5          19.4       1.0X
-InSet expression                                    126            128           1         79.5          12.6       1.5X
+In expression                                       183            184           3         54.8          18.3       1.0X
+InSet expression                                    119            121           1         83.9          11.9       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       353            357           6         28.3          35.3       1.0X
-InSet expression                                    136            138           2         73.6          13.6       2.6X
+In expression                                       353            355           2         28.3          35.3       1.0X
+InSet expression                                    122            127           3         81.6          12.2       2.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       511            513           3         19.6          51.1       1.0X
-InSet expression                                    149            151           2         67.2          14.9       3.4X
+In expression                                       508            510           3         19.7          50.8       1.0X
+InSet expression                                    135            140           8         74.0          13.5       3.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       660            677          25         15.2          66.0       1.0X
-InSet expression                                    162            165           3         61.6          16.2       4.1X
+In expression                                       668            692          33         15.0          66.8       1.0X
+InSet expression                                    147            149           2         68.2          14.7       4.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 ints:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       842            849          10         11.9          84.2       1.0X
-InSet expression                                    175            178           2         57.2          17.5       4.8X
+In expression                                       835            841           8         12.0          83.5       1.0X
+InSet expression                                    160            162           2         62.6          16.0       5.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 ints (non-compact):                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             24           2        459.4           2.2       1.0X
-InSet expression                                     91             94           2        109.4           9.1       0.2X
+In expression                                        22             24           3        456.0           2.2       1.0X
+InSet expression                                     86             89           4        116.4           8.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             29           2        354.7           2.8       1.0X
-InSet expression                                    107            109           1         93.5          10.7       0.3X
+In expression                                        29             31           3        350.9           2.9       1.0X
+InSet expression                                    103            105           3         97.1          10.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        56             57           2        179.5           5.6       1.0X
-InSet expression                                    122            124           1         82.0          12.2       0.5X
+In expression                                        56             57           2        179.0           5.6       1.0X
+InSet expression                                    118            120           1         84.6          11.8       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 ints (non-compact):                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       100            102           2         99.8          10.0       1.0X
-InSet expression                                    142            144           1         70.4          14.2       0.7X
+In expression                                       100            101           1        100.0          10.0       1.0X
+InSet expression                                    138            140           2         72.6          13.8       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       180            182           3         55.6          18.0       1.0X
-InSet expression                                    129            132           3         77.5          12.9       1.4X
+In expression                                       179            181           2         55.8          17.9       1.0X
+InSet expression                                    122            124           1         82.2          12.2       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       346            348           2         28.9          34.6       1.0X
-InSet expression                                    134            138           4         74.4          13.4       2.6X
+In expression                                       344            347           3         29.1          34.4       1.0X
+InSet expression                                    126            128           2         79.7          12.6       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       507            508           1         19.7          50.7       1.0X
-InSet expression                                    148            150           1         67.7          14.8       3.4X
+In expression                                       505            506           2         19.8          50.5       1.0X
+InSet expression                                    136            139           2         73.5          13.6       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       666            669           2         15.0          66.6       1.0X
-InSet expression                                    161            163           1         62.1          16.1       4.1X
+In expression                                       661            665           3         15.1          66.1       1.0X
+InSet expression                                    147            149           1         68.1          14.7       4.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 ints (non-compact):                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       824            861          78         12.1          82.4       1.0X
-InSet expression                                    173            176           2         57.7          17.3       4.8X
+In expression                                       818            856          78         12.2          81.8       1.0X
+InSet expression                                    159            161           3         63.0          15.9       5.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 longs:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        21             22           2        486.8           2.1       1.0X
-InSet expression                                     86             88           2        115.8           8.6       0.2X
+In expression                                        22             24           4        459.0           2.2       1.0X
+InSet expression                                     82             86           5        121.2           8.2       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        28             30           2        350.9           2.8       1.0X
-InSet expression                                    101            104           4         98.7          10.1       0.3X
+In expression                                        28             29           2        355.6           2.8       1.0X
+InSet expression                                     99            101           1        101.1           9.9       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        53             55           2        188.7           5.3       1.0X
-InSet expression                                    104            106           2         96.3          10.4       0.5X
+In expression                                        53             55           2        190.1           5.3       1.0X
+InSet expression                                    102            105           5         98.5          10.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 longs:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        92             95           2        108.7           9.2       1.0X
-InSet expression                                    134            136           2         74.9          13.4       0.7X
+In expression                                        91             94           3        109.4           9.1       1.0X
+InSet expression                                    132            134           1         75.7          13.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       172            174           4         58.2          17.2       1.0X
-InSet expression                                    117            119           2         85.5          11.7       1.5X
+In expression                                       175            183          10         57.2          17.5       1.0X
+InSet expression                                    112            114           2         89.5          11.2       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 longs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       329            334           5         30.4          32.9       1.0X
-InSet expression                                    126            127           1         79.5          12.6       2.6X
+In expression                                       327            332           4         30.6          32.7       1.0X
+InSet expression                                    119            128          18         84.3          11.9       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 floats:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        35             36           1        282.0           3.5       1.0X
-InSet expression                                     98            100           2        102.5           9.8       0.4X
+In expression                                        35             37           3        286.9           3.5       1.0X
+InSet expression                                    113            115           1         88.8          11.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        64             66           5        157.0           6.4       1.0X
-InSet expression                                    118            119           1         85.1          11.8       0.5X
+In expression                                        64             65           2        157.2           6.4       1.0X
+InSet expression                                    143            148          10         70.2          14.3       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       127            129           2         78.7          12.7       1.0X
-InSet expression                                    118            120           1         84.7          11.8       1.1X
+In expression                                       125            127           2         79.8          12.5       1.0X
+InSet expression                                    143            147           5         70.1          14.3       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 floats:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       209            211           2         47.8          20.9       1.0X
-InSet expression                                    159            161           4         63.0          15.9       1.3X
+In expression                                       208            210           1         48.0          20.8       1.0X
+InSet expression                                    188            190           2         53.3          18.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       366            379          26         27.3          36.6       1.0X
-InSet expression                                    132            135           4         75.8          13.2       2.8X
+In expression                                       365            369           6         27.4          36.5       1.0X
+InSet expression                                    148            154          15         67.6          14.8       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 floats:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1675           1744         125          6.0         167.5       1.0X
-InSet expression                                    135            137           3         74.3          13.5      12.5X
+In expression                                      1682           1771         104          5.9         168.2       1.0X
+InSet expression                                    148            150           1         67.5          14.8      11.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 doubles:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        39             41           2        254.3           3.9       1.0X
-InSet expression                                    114            116           1         87.6          11.4       0.3X
+In expression                                        35             36           2        286.9           3.5       1.0X
+InSet expression                                     95             97           2        105.0           9.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        62             63           1        161.3           6.2       1.0X
-InSet expression                                    143            147           8         69.9          14.3       0.4X
+In expression                                        63             64           2        159.6           6.3       1.0X
+InSet expression                                    116            118           1         85.9          11.6       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       128            129           2         78.0          12.8       1.0X
-InSet expression                                    143            147           6         69.8          14.3       0.9X
+In expression                                       127            129           1         78.6          12.7       1.0X
+InSet expression                                    118            120           1         84.9          11.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 doubles:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       209            211           3         47.9          20.9       1.0X
-InSet expression                                    195            197           3         51.2          19.5       1.1X
+In expression                                       207            210           3         48.3          20.7       1.0X
+InSet expression                                    155            158           1         64.3          15.5       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       366            368           2         27.3          36.6       1.0X
-InSet expression                                    152            154           1         65.8          15.2       2.4X
+In expression                                       364            370          12         27.4          36.4       1.0X
+InSet expression                                    126            129           3         79.3          12.6       2.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 doubles:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1922           1995         100          5.2         192.2       1.0X
-InSet expression                                    155            157           1         64.7          15.5      12.4X
+In expression                                      1920           1928           7          5.2         192.0       1.0X
+InSet expression                                    134            139           5         74.8          13.4      14.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 small decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             21           1         50.0          20.0       1.0X
-InSet expression                                     60             61           1         16.7          59.7       0.3X
+In expression                                        20             21           3         51.3          19.5       1.0X
+InSet expression                                     56             57           1         17.9          56.0       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             28           3         39.3          25.5       1.0X
-InSet expression                                     62             64           1         16.2          61.9       0.4X
+In expression                                        25             27           2         40.2          24.9       1.0X
+InSet expression                                     58             60           1         17.3          57.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             42           3         24.7          40.4       1.0X
-InSet expression                                     62             65           3         16.1          62.0       0.7X
+In expression                                        40             41           2         25.3          39.5       1.0X
+InSet expression                                     58             61           4         17.3          58.0       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 small decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        78             80           1         12.8          78.3       1.0X
-InSet expression                                     67             69           1         14.9          67.0       1.2X
+In expression                                        76             77           1         13.2          75.8       1.0X
+InSet expression                                     62             64           1         16.1          62.2       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       236            333         252          4.2         235.9       1.0X
-InSet expression                                     66             70           7         15.1          66.3       3.6X
+In expression                                       234            236           4          4.3         234.3       1.0X
+InSet expression                                     60             62           2         16.7          60.0       3.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 small decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       558            613         121          1.8         558.2       1.0X
-InSet expression                                     67             69           1         15.0          66.8       8.4X
+In expression                                       548            605         125          1.8         547.7       1.0X
+InSet expression                                     63             65           4         15.9          62.7       8.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 large decimals:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        186.8           5.4       1.0X
-InSet expression                                      5              6           1        195.2           5.1       1.0X
+In expression                                         5              6           1        202.4           4.9       1.0X
+InSet expression                                      5              6           1        207.4           4.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         5              6           1        192.0           5.2       1.0X
-InSet expression                                      5              6           1        191.5           5.2       1.0X
+In expression                                         5              6           1        201.8           5.0       1.0X
+InSet expression                                      5              5           1        203.6           4.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              6           1        176.0           5.7       1.0X
-InSet expression                                      6              6           1        178.0           5.6       1.0X
+In expression                                         5              6           1        187.1           5.3       1.0X
+InSet expression                                      5              6           1        185.3           5.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 large decimals:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         6              7           1        155.1           6.4       1.0X
-InSet expression                                      6              7           1        155.7           6.4       1.0X
+In expression                                         6              7           1        163.5           6.1       1.0X
+InSet expression                                      6              7           1        161.6           6.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                         8              9           1        121.9           8.2       1.0X
-InSet expression                                      8              9           1        122.1           8.2       1.0X
+In expression                                         8              8           1        127.7           7.8       1.0X
+InSet expression                                      8              9           2        127.7           7.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 large decimals:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        12             12           1         86.0          11.6       1.0X
-InSet expression                                     12             12           1         86.3          11.6       1.0X
+In expression                                        11             12           1         90.6          11.0       1.0X
+InSet expression                                     11             12           1         90.6          11.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 strings:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        29             30           1         34.6          28.9       1.0X
-InSet expression                                     43             44           1         23.2          43.0       0.7X
+In expression                                        29             32           4         34.6          28.9       1.0X
+InSet expression                                     45             46           2         22.3          44.8       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        34             35           1         29.2          34.3       1.0X
-InSet expression                                     46             47           2         22.0          45.5       0.8X
+In expression                                        34             36           2         29.3          34.1       1.0X
+InSet expression                                     48             49           1         21.0          47.6       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        43             44           1         23.2          43.1       1.0X
-InSet expression                                     50             52           4         20.0          50.0       0.9X
+In expression                                        43             44           2         23.5          42.5       1.0X
+InSet expression                                     51             52           1         19.6          50.9       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 strings:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        58             59           1         17.3          57.7       1.0X
-InSet expression                                     54             55           1         18.6          53.8       1.1X
+In expression                                        58             59           2         17.4          57.6       1.0X
+InSet expression                                     55             57           1         18.2          54.8       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        89             90           2         11.3          88.6       1.0X
-InSet expression                                     52             53           2         19.4          51.7       1.7X
+In expression                                        88             90           1         11.3          88.4       1.0X
+InSet expression                                     51             52           1         19.6          51.1       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 strings:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       383            387           3          2.6         383.3       1.0X
-InSet expression                                     51             54           2         19.5          51.3       7.5X
+In expression                                       382            388          12          2.6         381.7       1.0X
+InSet expression                                     52             53           1         19.4          51.5       7.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 timestamps:                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        16             17           1        636.1           1.6       1.0X
-InSet expression                                     92             94           2        108.7           9.2       0.2X
+In expression                                        16             17           1        622.8           1.6       1.0X
+InSet expression                                     89             92           3        112.0           8.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        22             23           1        447.6           2.2       1.0X
-InSet expression                                    102            103           1         98.4          10.2       0.2X
+In expression                                        23             25           4        427.6           2.3       1.0X
+InSet expression                                     99            101           1        101.0           9.9       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        46             47           1        218.8           4.6       1.0X
-InSet expression                                    127            130           1         78.5          12.7       0.4X
+In expression                                        45             46           2        220.5           4.5       1.0X
+InSet expression                                    125            127           1         80.0          12.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 timestamps:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        81             83           2        123.6           8.1       1.0X
-InSet expression                                    151            153           2         66.1          15.1       0.5X
+In expression                                        80             82           1        124.5           8.0       1.0X
+InSet expression                                    151            153           2         66.2          15.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       153            155           5         65.4          15.3       1.0X
-InSet expression                                    132            135           1         75.5          13.2       1.2X
+In expression                                       152            153           2         65.8          15.2       1.0X
+InSet expression                                    130            132           1         76.7          13.0       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 timestamps:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       297            307          10         33.6          29.7       1.0X
-InSet expression                                    131            132           2         76.6          13.1       2.3X
+In expression                                       295            304           9         33.9          29.5       1.0X
+InSet expression                                    127            129           1         78.8          12.7       2.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 dates:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       450            455           6         22.2          45.0       1.0X
-InSet expression                                    445            447           2         22.5          44.5       1.0X
+In expression                                       441            442           1         22.7          44.1       1.0X
+InSet expression                                    434            440           9         23.1          43.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       451            456           4         22.2          45.1       1.0X
-InSet expression                                    445            449           3         22.5          44.5       1.0X
+In expression                                       445            450           7         22.5          44.5       1.0X
+InSet expression                                    437            438           3         22.9          43.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       484            487           2         20.6          48.4       1.0X
-InSet expression                                    445            447           2         22.5          44.5       1.1X
+In expression                                       485            486           1         20.6          48.5       1.0X
+InSet expression                                    436            438           2         23.0          43.6       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 dates:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       533            536           3         18.7          53.3       1.0X
-InSet expression                                    449            450           2         22.3          44.9       1.2X
+In expression                                       547            549           1         18.3          54.7       1.0X
+InSet expression                                    441            445           4         22.7          44.1       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       619            623           6         16.2          61.9       1.0X
-InSet expression                                    447            448           1         22.4          44.7       1.4X
+In expression                                       608            610           1         16.4          60.8       1.0X
+InSet expression                                    440            441           1         22.7          44.0       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       779            790          12         12.8          77.9       1.0X
-InSet expression                                    447            454          10         22.3          44.7       1.7X
+In expression                                       776            782           7         12.9          77.6       1.0X
+InSet expression                                    436            440           2         22.9          43.6       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 300 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       944            948           4         10.6          94.4       1.0X
-InSet expression                                    455            456           1         22.0          45.5       2.1X
+In expression                                       939            945           6         10.6          93.9       1.0X
+InSet expression                                    441            443           2         22.7          44.1       2.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 400 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1105           1107           2          9.0         110.5       1.0X
-InSet expression                                    451            454           3         22.2          45.1       2.4X
+In expression                                      1098           1105          10          9.1         109.8       1.0X
+InSet expression                                    447            450           2         22.4          44.7       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 500 dates:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1279           1289           6          7.8         127.9       1.0X
-InSet expression                                    542            544           3         18.4          54.2       2.4X
+In expression                                      1297           1305           7          7.7         129.7       1.0X
+InSet expression                                    537            539           1         18.6          53.7       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 arrays:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        25             26           2         39.7          25.2       1.0X
-InSet expression                                     57             58           2         17.6          56.7       0.4X
+In expression                                        24             25           2         41.2          24.3       1.0X
+InSet expression                                     57             58           1         17.6          56.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        40             41           1         25.0          40.0       1.0X
-InSet expression                                     85             87           3         11.7          85.1       0.5X
+In expression                                        41             42           1         24.4          41.0       1.0X
+InSet expression                                     83             86           4         12.0          83.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       158            161           3          6.3         158.3       1.0X
-InSet expression                                    100            103           4         10.0          99.9       1.6X
+In expression                                       157            158           2          6.4         156.6       1.0X
+InSet expression                                     98             99           1         10.2          97.6       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 arrays:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       400            401           1          2.5         400.1       1.0X
-InSet expression                                    128            131           2          7.8         128.3       3.1X
+In expression                                       371            374           2          2.7         371.2       1.0X
+InSet expression                                    125            127           1          8.0         125.2       3.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       751            754           4          1.3         751.0       1.0X
-InSet expression                                    145            147           2          6.9         144.5       5.2X
+In expression                                       757            759           3          1.3         756.8       1.0X
+InSet expression                                    142            144           1          7.0         142.5       5.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 arrays:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1672           1862         278          0.6        1671.8       1.0X
-InSet expression                                    162            164           2          6.2         162.4      10.3X
+In expression                                      1671           1822         219          0.6        1671.2       1.0X
+InSet expression                                    159            173          37          6.3         159.2      10.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 5 structs:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        20             22           2         48.9          20.5       1.0X
-InSet expression                                     81             84           2         12.3          81.3       0.3X
+In expression                                        20             21           1         49.8          20.1       1.0X
+InSet expression                                     76             78           5         13.2          75.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 10 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        30             32           3         32.9          30.4       1.0X
-InSet expression                                    124            126           2          8.0         124.5       0.2X
+In expression                                        29             30           2         34.3          29.1       1.0X
+InSet expression                                    116            117           2          8.7         115.6       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 25 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                        69             71           1         14.4          69.4       1.0X
-InSet expression                                    146            148           1          6.8         146.5       0.5X
+In expression                                        68             70           1         14.6          68.3       1.0X
+InSet expression                                    131            137           4          7.6         131.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 50 structs:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       212            214           3          4.7         212.1       1.0X
-InSet expression                                    190            193           3          5.3         189.9       1.1X
+In expression                                       187            189           3          5.4         186.5       1.0X
+InSet expression                                    176            179           3          5.7         175.5       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 100 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                       482            492           6          2.1         482.2       1.0X
-InSet expression                                    214            216           1          4.7         213.8       2.3X
+In expression                                       491            498           8          2.0         491.0       1.0X
+InSet expression                                    199            201           1          5.0         199.0       2.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 200 structs:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-In expression                                      1209           1433         281          0.8        1209.2       1.0X
-InSet expression                                    245            247           2          4.1         245.0       4.9X
+In expression                                      1139           1377         266          0.9        1139.0       1.0X
+InSet expression                                    227            232           8          4.4         227.2       5.0X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
index a6cadf2a57c1a..4d79ea0b65033 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            156            169          18          6.4         155.6       1.0X
-row-based deserialization                             125            176          46          8.0         125.1       1.2X
+columnar deserialization + columnar-to-row            147            200          59          6.8         147.4       1.0X
+row-based deserialization                             129            158          42          7.8         129.0       1.1X
 
 
diff --git a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
index 1774d114da136..6787b645563b3 100644
--- a/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
+++ b/sql/core/benchmarks/InMemoryColumnarBenchmark-results.txt
@@ -2,11 +2,11 @@
 Int In-memory with 1000000 rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int In-Memory scan:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-columnar deserialization + columnar-to-row            195            199           8          5.1         194.6       1.0X
-row-based deserialization                             127            128           1          7.9         127.3       1.5X
+columnar deserialization + columnar-to-row            188            199          12          5.3         187.6       1.0X
+row-based deserialization                             142            216         115          7.0         141.9       1.3X
 
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
index b2a1034782f1d..85601d9e9757f 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-jdk21-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         9816           9873          80          0.0       49079.3       1.0X
-two partition columns, 500 partitions                       26057          26309         355          0.0      130285.9       0.4X
-three partition columns, 2000 partitions                    72728          72816         124          0.0      363640.3       0.1X
+one partition column, 100 partitions                         9762           9793          43          0.0       48810.6       1.0X
+two partition columns, 500 partitions                       25446          25796         495          0.0      127230.3       0.4X
+three partition columns, 2000 partitions                    68971          69095         176          0.0      344853.7       0.1X
 
diff --git a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
index b360d3f5d5270..a8b6b9b48805d 100644
--- a/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
+++ b/sql/core/benchmarks/InsertTableWithDynamicPartitionsBenchmark-results.txt
@@ -1,8 +1,8 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 dynamic insert table benchmark, totalRows = 200000:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------
-one partition column, 100 partitions                         9772           9823          71          0.0       48861.7       1.0X
-two partition columns, 500 partitions                       25719          25897         252          0.0      128594.9       0.4X
-three partition columns, 2000 partitions                    72019          72199         254          0.0      360097.0       0.1X
+one partition column, 100 partitions                         9336           9513         250          0.0       46681.2       1.0X
+two partition columns, 500 partitions                       25266          25745         677          0.0      126332.0       0.4X
+three partition columns, 2000 partitions                    69778          70117         479          0.0      348891.4       0.1X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
index 06f16fec2065d..260eec63f5118 100644
--- a/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-jdk21-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          399            402           5          2.5         399.2       1.0X
-prepare string w/o interval                         390            397          13          2.6         389.7       1.0X
-1 units w/ interval                                 319            320           2          3.1         318.8       1.3X
-1 units w/o interval                                327            329           2          3.1         326.8       1.2X
-2 units w/ interval                                 503            509           6          2.0         503.5       0.8X
-2 units w/o interval                                498            500           2          2.0         497.7       0.8X
-3 units w/ interval                                1080           1089          15          0.9        1079.9       0.4X
-3 units w/o interval                               1110           1113           5          0.9        1109.8       0.4X
-4 units w/ interval                                1369           1371           4          0.7        1368.5       0.3X
-4 units w/o interval                               1378           1386          12          0.7        1377.7       0.3X
-5 units w/ interval                                1529           1531           2          0.7        1528.8       0.3X
-5 units w/o interval                               1545           1549           5          0.6        1545.2       0.3X
-6 units w/ interval                                1698           1706           7          0.6        1698.0       0.2X
-6 units w/o interval                               1700           1707           7          0.6        1700.2       0.2X
-7 units w/ interval                                2028           2040          11          0.5        2027.5       0.2X
-7 units w/o interval                               2044           2046           3          0.5        2043.9       0.2X
-8 units w/ interval                                2261           2271          12          0.4        2260.7       0.2X
-8 units w/o interval                               2249           2258           9          0.4        2249.3       0.2X
-9 units w/ interval                                2705           2710           4          0.4        2705.2       0.1X
-9 units w/o interval                               2713           2722           8          0.4        2713.3       0.1X
-10 units w/ interval                               2784           2789           7          0.4        2784.0       0.1X
-10 units w/o interval                              2785           2790           5          0.4        2784.9       0.1X
-11 units w/ interval                               3123           3148          31          0.3        3122.7       0.1X
-11 units w/o interval                              3136           3144          10          0.3        3136.2       0.1X
+prepare string w/ interval                          427            498          62          2.3         426.8       1.0X
+prepare string w/o interval                         385            389           4          2.6         384.8       1.1X
+1 units w/ interval                                 340            343           4          2.9         340.0       1.3X
+1 units w/o interval                                380            387           6          2.6         380.3       1.1X
+2 units w/ interval                                 549            557           7          1.8         549.2       0.8X
+2 units w/o interval                                553            555           4          1.8         553.1       0.8X
+3 units w/ interval                                1125           1126           2          0.9        1124.7       0.4X
+3 units w/o interval                               1144           1149           7          0.9        1143.7       0.4X
+4 units w/ interval                                1338           1341           3          0.7        1337.7       0.3X
+4 units w/o interval                               1351           1367          18          0.7        1351.1       0.3X
+5 units w/ interval                                1506           1510           5          0.7        1506.4       0.3X
+5 units w/o interval                               1522           1523           1          0.7        1521.6       0.3X
+6 units w/ interval                                1644           1651          11          0.6        1643.6       0.3X
+6 units w/o interval                               1654           1661          10          0.6        1653.8       0.3X
+7 units w/ interval                                2058           2066           9          0.5        2058.2       0.2X
+7 units w/o interval                               2069           2072           5          0.5        2068.7       0.2X
+8 units w/ interval                                2291           2295           6          0.4        2290.9       0.2X
+8 units w/o interval                               2348           2358          12          0.4        2347.9       0.2X
+9 units w/ interval                                2453           2457           5          0.4        2452.8       0.2X
+9 units w/o interval                               2460           2472          16          0.4        2460.0       0.2X
+10 units w/ interval                               2709           2716           6          0.4        2709.3       0.2X
+10 units w/o interval                              2706           2707           1          0.4        2705.6       0.2X
+11 units w/ interval                               3049           3055           7          0.3        3048.7       0.1X
+11 units w/o interval                              3043           3050           7          0.3        3042.5       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               346            352           5          2.9         346.3       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             39             43           4         25.3          39.5       8.8X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             51             57           9         19.5          51.3       6.7X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             57             59           3         17.7          56.6       6.1X
-make_interval(0, 1, 2, 3, *, *, *)                    356            358           2          2.8         355.9       1.0X
-make_interval(*, *, *, *, *, *, *)                    344            347           4          2.9         344.4       1.0X
+prepare make_interval()                               351            355           4          2.9         350.8       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             42             44           2         23.9          41.9       8.4X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             52             54           3         19.2          52.0       6.7X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             61             64           3         16.4          60.9       5.8X
+make_interval(0, 1, 2, 3, *, *, *)                    344            348           5          2.9         344.5       1.0X
+make_interval(*, *, *, *, *, *, *)                    359            363           6          2.8         359.2       1.0X
 
diff --git a/sql/core/benchmarks/IntervalBenchmark-results.txt b/sql/core/benchmarks/IntervalBenchmark-results.txt
index 86cfcdf96dbd7..f09ebdc4d121f 100644
--- a/sql/core/benchmarks/IntervalBenchmark-results.txt
+++ b/sql/core/benchmarks/IntervalBenchmark-results.txt
@@ -1,40 +1,40 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 cast strings to intervals:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare string w/ interval                          375            379           4          2.7         375.4       1.0X
-prepare string w/o interval                         365            367           2          2.7         364.8       1.0X
-1 units w/ interval                                 321            329           8          3.1         321.1       1.2X
-1 units w/o interval                                291            302          12          3.4         291.2       1.3X
-2 units w/ interval                                 435            441           7          2.3         434.9       0.9X
-2 units w/o interval                                416            418           2          2.4         415.7       0.9X
-3 units w/ interval                                1019           1024           4          1.0        1019.3       0.4X
-3 units w/o interval                               1000           1006           9          1.0        1000.1       0.4X
-4 units w/ interval                                1319           1326           5          0.8        1319.4       0.3X
-4 units w/o interval                               1317           1321           4          0.8        1317.3       0.3X
-5 units w/ interval                                1457           1467           9          0.7        1457.4       0.3X
-5 units w/o interval                               1461           1467           5          0.7        1461.2       0.3X
-6 units w/ interval                                1631           1635           4          0.6        1630.7       0.2X
-6 units w/o interval                               1614           1620           6          0.6        1614.4       0.2X
-7 units w/ interval                                2270           2282          12          0.4        2270.2       0.2X
-7 units w/o interval                               2252           2255           3          0.4        2252.0       0.2X
-8 units w/ interval                                2238           2247          13          0.4        2237.6       0.2X
-8 units w/o interval                               2237           2239           3          0.4        2236.6       0.2X
-9 units w/ interval                                2478           2484           7          0.4        2478.2       0.2X
-9 units w/o interval                               2455           2458           4          0.4        2455.2       0.2X
-10 units w/ interval                               2628           2635           6          0.4        2628.5       0.1X
-10 units w/o interval                              2618           2633          13          0.4        2618.4       0.1X
-11 units w/ interval                               2907           2915           8          0.3        2907.3       0.1X
-11 units w/o interval                              2905           2914          10          0.3        2905.1       0.1X
+prepare string w/ interval                          409            467          97          2.4         409.1       1.0X
+prepare string w/o interval                         371            378          11          2.7         371.2       1.1X
+1 units w/ interval                                 358            373          14          2.8         358.5       1.1X
+1 units w/o interval                                382            395          22          2.6         382.1       1.1X
+2 units w/ interval                                 532            536           4          1.9         531.9       0.8X
+2 units w/o interval                                538            544           6          1.9         537.9       0.8X
+3 units w/ interval                                1202           1205           3          0.8        1202.1       0.3X
+3 units w/o interval                               1222           1227           5          0.8        1222.0       0.3X
+4 units w/ interval                                1403           1408           5          0.7        1403.3       0.3X
+4 units w/o interval                               1432           1435           4          0.7        1431.6       0.3X
+5 units w/ interval                                1552           1564          16          0.6        1551.5       0.3X
+5 units w/o interval                               1559           1562           3          0.6        1558.8       0.3X
+6 units w/ interval                                1700           1705           5          0.6        1700.2       0.2X
+6 units w/o interval                               1721           1728           8          0.6        1720.8       0.2X
+7 units w/ interval                                2241           2244           4          0.4        2241.0       0.2X
+7 units w/o interval                               2254           2265          10          0.4        2254.3       0.2X
+8 units w/ interval                                2505           2519          15          0.4        2505.0       0.2X
+8 units w/o interval                               2505           2508           3          0.4        2505.5       0.2X
+9 units w/ interval                                2621           2629           7          0.4        2621.2       0.2X
+9 units w/o interval                               2623           2628           4          0.4        2623.1       0.2X
+10 units w/ interval                               2844           2849           6          0.4        2843.6       0.1X
+10 units w/o interval                              2829           2842          20          0.4        2829.2       0.1X
+11 units w/ interval                               3143           3146           3          0.3        3142.7       0.1X
+11 units w/o interval                              3147           3156          10          0.3        3146.9       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_interval():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-prepare make_interval()                               344            347           3          2.9         344.3       1.0X
-make_interval(0, 1, 2, 3, 4, 5, 50.123456)             44             45           1         22.8          44.0       7.8X
-make_interval(*, *, 2, 3, 4, 5, 50.123456)             51             51           1         19.7          50.8       6.8X
-make_interval(0, 1, *, *, 4, 5, 50.123456)             55             60           9         18.2          54.9       6.3X
-make_interval(0, 1, 2, 3, *, *, *)                    340            341           1          2.9         340.0       1.0X
-make_interval(*, *, *, *, *, *, *)                    333            335           2          3.0         333.3       1.0X
+prepare make_interval()                               368            374           5          2.7         368.5       1.0X
+make_interval(0, 1, 2, 3, 4, 5, 50.123456)             47             50           3         21.1          47.3       7.8X
+make_interval(*, *, 2, 3, 4, 5, 50.123456)             59             62           5         17.0          58.8       6.3X
+make_interval(0, 1, *, *, 4, 5, 50.123456)             62             64           3         16.0          62.3       5.9X
+make_interval(0, 1, 2, 3, *, *, *)                    342            345           2          2.9         342.0       1.1X
+make_interval(*, *, *, *, *, *, *)                    351            357           7          2.8         350.9       1.1X
 
diff --git a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
index d7bb196bb7144..473cfdde4d76d 100644
--- a/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-jdk21-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2147           2166          27          9.8         102.4       1.0X
-Join w long wholestage on                           944            990          40         22.2          45.0       2.3X
+Join w long wholestage off                         2088           2099          15         10.0          99.6       1.0X
+Join w long wholestage on                           918            947          28         22.8          43.8       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              2214           2248          48          9.5         105.6       1.0X
-Join w long duplicated wholestage on                996           1005          10         21.1          47.5       2.2X
+Join w long duplicated wholestage off              1991           1993           3         10.5          94.9       1.0X
+Join w long duplicated wholestage on                911            923          16         23.0          43.4       2.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     148982         149062         112          0.1        7104.0       1.0X
-Join w 2 ints wholestage on                      105434         105515          63          0.2        5027.5       1.4X
+Join w 2 ints wholestage off                     106730         106790          85          0.2        5089.3       1.0X
+Join w 2 ints wholestage on                      105489         105534          40          0.2        5030.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3442           3459          23          6.1         164.1       1.0X
-Join w 2 longs wholestage on                       2179           2191          11          9.6         103.9       1.6X
+Join w 2 longs wholestage off                      3315           3323          12          6.3         158.1       1.0X
+Join w 2 longs wholestage on                       1972           1997          25         10.6          94.0       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          10326          10385          84          2.0         492.4       1.0X
-Join w 2 longs duplicated wholestage on            6246           6271          22          3.4         297.8       1.7X
+Join w 2 longs duplicated wholestage off           8534           8563          42          2.5         406.9       1.0X
+Join w 2 longs duplicated wholestage on            5521           5729         121          3.8         263.3       1.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1711           1713           3         12.3          81.6       1.0X
-outer join w long wholestage on                    1045           1056           8         20.1          49.8       1.6X
+outer join w long wholestage off                   1590           1593           5         13.2          75.8       1.0X
+outer join w long wholestage on                     948            978          46         22.1          45.2       1.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1207           1210           4         17.4          57.6       1.0X
-semi join w long wholestage on                      682            701          14         30.7          32.5       1.8X
+semi join w long wholestage off                    1053           1055           3         19.9          50.2       1.0X
+semi join w long wholestage on                      568            585          15         37.0          27.1       1.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      548            573          35          3.8         261.3       1.0X
-sort merge join wholestage on                       489            520          28          4.3         233.1       1.1X
+sort merge join wholestage off                      519            527          11          4.0         247.7       1.0X
+sort merge join wholestage on                       467            493          27          4.5         222.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1054           1091          52          2.0         502.8       1.0X
-sort merge join with duplicates wholestage on             934            961          24          2.2         445.2       1.1X
+sort merge join with duplicates wholestage off           1031           1042          15          2.0         491.7       1.0X
+sort merge join with duplicates wholestage on             960            968           8          2.2         457.8       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    501            514          18          8.4         119.4       1.0X
-shuffle hash join wholestage on                     427            469          32          9.8         101.7       1.2X
+shuffle hash join wholestage off                    530            537          10          7.9         126.4       1.0X
+shuffle hash join wholestage on                     415            434          12         10.1          99.1       1.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          26497          26499           2          0.8        1263.5       1.0X
-broadcast nested loop join wholestage on           18614          18698          61          1.1         887.6       1.4X
+broadcast nested loop join wholestage off          25590          25605          22          0.8        1220.2       1.0X
+broadcast nested loop join wholestage on           18711          18767          79          1.1         892.2       1.4X
 
 
diff --git a/sql/core/benchmarks/JoinBenchmark-results.txt b/sql/core/benchmarks/JoinBenchmark-results.txt
index a8e057371664b..9c460f39d1ae7 100644
--- a/sql/core/benchmarks/JoinBenchmark-results.txt
+++ b/sql/core/benchmarks/JoinBenchmark-results.txt
@@ -2,81 +2,81 @@
 Join Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w long:                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long wholestage off                         2138           2142           6          9.8         101.9       1.0X
-Join w long wholestage on                           944            958          15         22.2          45.0       2.3X
+Join w long wholestage off                         2221           2232          15          9.4         105.9       1.0X
+Join w long wholestage on                          1032           1080          56         20.3          49.2       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w long duplicated:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w long duplicated wholestage off              2278           2326          68          9.2         108.6       1.0X
-Join w long duplicated wholestage on               1080           1084           2         19.4          51.5       2.1X
+Join w long duplicated wholestage off              2180           2181           1          9.6         104.0       1.0X
+Join w long duplicated wholestage on                917            927          10         22.9          43.7       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 ints:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 ints wholestage off                     149192         149209          24          0.1        7114.0       1.0X
-Join w 2 ints wholestage on                      111484         111555          82          0.2        5316.0       1.3X
+Join w 2 ints wholestage off                     112458         112473          21          0.2        5362.4       1.0X
+Join w 2 ints wholestage on                      110885         110937          68          0.2        5287.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs wholestage off                      3463           3507          62          6.1         165.1       1.0X
-Join w 2 longs wholestage on                       2116           2133          25          9.9         100.9       1.6X
+Join w 2 longs wholestage off                      3502           3507           7          6.0         167.0       1.0X
+Join w 2 longs wholestage on                       2071           2085          10         10.1          98.8       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Join w 2 longs duplicated:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Join w 2 longs duplicated wholestage off          10496          10500           6          2.0         500.5       1.0X
-Join w 2 longs duplicated wholestage on            6203           6227          32          3.4         295.8       1.7X
+Join w 2 longs duplicated wholestage off           9384           9385           2          2.2         447.4       1.0X
+Join w 2 longs duplicated wholestage on            5493           5515          16          3.8         261.9       1.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 outer join w long:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-outer join w long wholestage off                   1834           1840           9         11.4          87.5       1.0X
-outer join w long wholestage on                    1078           1083           4         19.5          51.4       1.7X
+outer join w long wholestage off                   1871           1884          19         11.2          89.2       1.0X
+outer join w long wholestage on                    1031           1054          30         20.4          49.1       1.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 semi join w long:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-semi join w long wholestage off                    1326           1332           8         15.8          63.2       1.0X
-semi join w long wholestage on                      711            716           7         29.5          33.9       1.9X
+semi join w long wholestage off                    1189           1195           8         17.6          56.7       1.0X
+semi join w long wholestage on                      549            569          35         38.2          26.2       2.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort merge join wholestage off                      541            551          14          3.9         258.2       1.0X
-sort merge join wholestage on                       487            493           4          4.3         232.2       1.1X
+sort merge join wholestage off                      526            535          13          4.0         250.9       1.0X
+sort merge join wholestage on                       461            470           6          4.5         220.0       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sort merge join with duplicates:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-sort merge join with duplicates wholestage off           1092           1096           7          1.9         520.6       1.0X
-sort merge join with duplicates wholestage on             965            977          14          2.2         460.3       1.1X
+sort merge join with duplicates wholestage off           1026           1054          39          2.0         489.2       1.0X
+sort merge join with duplicates wholestage on             922            948          28          2.3         439.4       1.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 shuffle hash join:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-shuffle hash join wholestage off                    551            558          10          7.6         131.4       1.0X
-shuffle hash join wholestage on                     397            404           6         10.6          94.7       1.4X
+shuffle hash join wholestage off                    521            533          16          8.0         124.3       1.0X
+shuffle hash join wholestage on                     383            393          10         11.0          91.3       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 broadcast nested loop join:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-broadcast nested loop join wholestage off          27651          27728         109          0.8        1318.5       1.0X
-broadcast nested loop join wholestage on           19162          19202          33          1.1         913.7       1.4X
+broadcast nested loop join wholestage off          29667          29788         171          0.7        1414.6       1.0X
+broadcast nested loop join wholestage on           18946          19016          66          1.1         903.4       1.6X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
index 80448f80df486..d87eb6530a855 100644
--- a/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-jdk21-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2459           2482          39          2.0         491.9       1.0X
-UTF-8 is set                                       3337           3360          20          1.5         667.4       0.7X
+No encoding                                        2240           2300          98          2.2         448.0       1.0X
+UTF-8 is set                                       3325           3333           8          1.5         665.0       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2195           2205          11          2.3         439.1       1.0X
-UTF-8 is set                                       3159           3169           9          1.6         631.7       0.7X
+No encoding                                        1890           1917          23          2.6         378.1       1.0X
+UTF-8 is set                                       3155           3158           3          1.6         630.9       0.6X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        4837           4914         116          0.2        4837.1       1.0X
-UTF-8 is set                                       4384           4417          30          0.2        4383.6       1.1X
+No encoding                                        5079           5266         188          0.2        5078.9       1.0X
+UTF-8 is set                                       4272           4280           6          0.2        4272.5       1.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        9775           9911         129          0.0      195491.4       1.0X
-UTF-8 is set                                      10824          10845          31          0.0      216478.6       0.9X
+No encoding                                        9614           9866         271          0.0      192271.0       1.0X
+UTF-8 is set                                      10517          10608          80          0.0      210331.2       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1606           1614           8          0.6        1606.2       1.0X
-Select 1 column                                    1334           1341           7          0.7        1333.7       1.2X
+Select 10 columns                                  1661           1666           6          0.6        1660.6       1.0X
+Select 1 column                                    1078           1081           2          0.9        1078.3       1.5X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       595            596           2          1.7         594.9       1.0X
-Short column with UTF-8                             819            828          10          1.2         819.2       0.7X
-Wide column without encoding                       5442           5464          28          0.2        5442.1       0.1X
-Wide column with UTF-8                             6442           6454          12          0.2        6442.0       0.1X
+Short column without encoding                       604            612          10          1.7         604.3       1.0X
+Short column with UTF-8                             828            839          15          1.2         828.3       0.7X
+Wide column without encoding                       7212           7255          38          0.1        7212.1       0.1X
+Wide column with UTF-8                             7446           7462          15          0.1        7445.8       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            55             56           1         18.2          55.0       1.0X
-from_json                                          1152           1156           3          0.9        1152.1       0.0X
-json_tuple                                         1185           1188           4          0.8        1185.0       0.0X
-get_json_object wholestage off                     1093           1099          10          0.9        1093.3       0.1X
-get_json_object wholestage on                      1017           1019           1          1.0        1017.3       0.1X
+Text read                                            60             63           2         16.6          60.1       1.0X
+from_json                                          1168           1175           7          0.9        1168.4       0.1X
+json_tuple                                         1158           1170          16          0.9        1158.4       0.1X
+get_json_object wholestage off                     1075           1081           6          0.9        1074.8       0.1X
+get_json_object wholestage on                      1018           1029          13          1.0        1018.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           236            238           2         21.2          47.2       1.0X
-schema inferring                                   2018           2025           8          2.5         403.6       0.1X
-parsing                                            2730           2737          10          1.8         546.1       0.1X
+Text read                                           232            238          10         21.6          46.4       1.0X
+schema inferring                                   1919           1928           9          2.6         383.7       0.1X
+parsing                                            2717           2724           7          1.8         543.4       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           549            552           4          9.1         109.9       1.0X
-Schema inferring                                   2522           2525           4          2.0         504.4       0.2X
-Parsing without charset                            2921           2933          17          1.7         584.2       0.2X
-Parsing with UTF-8                                 3873           3881          13          1.3         774.7       0.1X
+Text read                                           562            569           7          8.9         112.5       1.0X
+Schema inferring                                   2424           2432           9          2.1         484.8       0.2X
+Parsing without charset                            2808           2810           3          1.8         561.7       0.2X
+Parsing with UTF-8                                 3993           4001          12          1.3         798.5       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      103            107           7          9.7         103.1       1.0X
-to_json(timestamp)                                  737            742           5          1.4         736.5       0.1X
-write timestamps to files                           644            646           2          1.6         643.9       0.2X
-Create a dataset of dates                           111            117           6          9.0         110.7       0.9X
-to_json(date)                                       557            562           6          1.8         556.6       0.2X
-write dates to files                                434            436           2          2.3         434.1       0.2X
+Create a dataset of timestamps                      101            108           7          9.9         101.4       1.0X
+to_json(timestamp)                                  705            707           2          1.4         704.6       0.1X
+write timestamps to files                           598            611          20          1.7         598.3       0.2X
+Create a dataset of dates                           112            118          10          8.9         111.9       0.9X
+to_json(date)                                       546            548           2          1.8         546.3       0.2X
+write dates to files                                393            399           9          2.5         393.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   151            157           8          6.6         150.7       1.0X
-read timestamps from files                                                      1071           1086          13          0.9        1071.1       0.1X
-infer timestamps from files                                                     2021           2025           5          0.5        2020.8       0.1X
-read date text from files                                                        137            147          11          7.3         136.5       1.1X
-read date from files                                                             699            705           9          1.4         698.7       0.2X
-timestamp strings                                                                143            149           5          7.0         143.4       1.1X
-parse timestamps from Dataset[String]                                           1251           1255           3          0.8        1251.1       0.1X
-infer timestamps from Dataset[String]                                           2181           2186           5          0.5        2181.1       0.1X
-date strings                                                                     226            234          13          4.4         225.7       0.7X
-parse dates from Dataset[String]                                                 974            977           4          1.0         973.8       0.2X
-from_json(timestamp)                                                            1758           1764           9          0.6        1758.2       0.1X
-from_json(date)                                                                 1470           1473           3          0.7        1469.7       0.1X
-infer error timestamps from Dataset[String] with default format                 1436           1438           3          0.7        1436.1       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1437           1444           8          0.7        1437.4       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1448           1450           3          0.7        1448.2       0.1X
+read timestamp text from files                                                   149            153           4          6.7         149.4       1.0X
+read timestamps from files                                                      1044           1049           5          1.0        1043.6       0.1X
+infer timestamps from files                                                     1973           1983          12          0.5        1972.7       0.1X
+read date text from files                                                        140            143           5          7.1         140.0       1.1X
+read date from files                                                             690            698           9          1.4         690.3       0.2X
+timestamp strings                                                                141            149           7          7.1         140.8       1.1X
+parse timestamps from Dataset[String]                                           1265           1266           2          0.8        1264.5       0.1X
+infer timestamps from Dataset[String]                                           2160           2169          12          0.5        2160.5       0.1X
+date strings                                                                     248            250           2          4.0         248.3       0.6X
+parse dates from Dataset[String]                                                1010           1015           6          1.0        1009.6       0.1X
+from_json(timestamp)                                                            1781           1810          27          0.6        1781.1       0.1X
+from_json(date)                                                                 1510           1514           4          0.7        1510.0       0.1X
+infer error timestamps from Dataset[String] with default format                 1412           1420           8          0.7        1412.2       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1372           1378           6          0.7        1371.6       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1427           1439          18          0.7        1426.6       0.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5891           5911          22          0.0       58911.2       1.0X
-pushdown disabled                                  5547           5560          11          0.0       55470.8       1.1X
-w/ filters                                          618            626          10          0.2        6177.6       9.5X
+w/o filters                                        6100           6124          33          0.0       61003.7       1.0X
+pushdown disabled                                  5957           5981          31          0.0       59569.9       1.0X
+w/ filters                                          729            737           8          0.1        7291.0       8.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2319           2338          26          0.0      231898.9       1.0X
+parse invalid JSON                                 2476           2480           5          0.0      247550.8       1.0X
 
 
diff --git a/sql/core/benchmarks/JsonBenchmark-results.txt b/sql/core/benchmarks/JsonBenchmark-results.txt
index 3f4b9e435b06d..bf7662a428dfb 100644
--- a/sql/core/benchmarks/JsonBenchmark-results.txt
+++ b/sql/core/benchmarks/JsonBenchmark-results.txt
@@ -3,128 +3,128 @@ Benchmark for performance of JSON parsing
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON schema inferring:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2406           2422          16          2.1         481.1       1.0X
-UTF-8 is set                                       3323           3335          10          1.5         664.5       0.7X
+No encoding                                        2335           2405         102          2.1         467.1       1.0X
+UTF-8 is set                                       3188           3205          17          1.6         637.5       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 count a short column:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        2005           2037          29          2.5         401.0       1.0X
-UTF-8 is set                                       3138           3143           5          1.6         627.6       0.6X
+No encoding                                        2017           2130         130          2.5         403.4       1.0X
+UTF-8 is set                                       3090           3104          22          1.6         618.0       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 count a wide column:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        3337           3406          64          0.3        3336.8       1.0X
-UTF-8 is set                                       4383           4411          27          0.2        4383.4       0.8X
+No encoding                                        3264           3390         134          0.3        3264.0       1.0X
+UTF-8 is set                                       4385           4419          40          0.2        4384.9       0.7X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 select wide row:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-No encoding                                        9364           9394          35          0.0      187287.2       1.0X
-UTF-8 is set                                      10402          10439          42          0.0      208036.3       0.9X
+No encoding                                        8549           8761         245          0.0      170970.8       1.0X
+UTF-8 is set                                       9833           9868          31          0.0      196661.2       0.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Select a subset of 10 columns:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Select 10 columns                                  1657           1663           5          0.6        1657.2       1.0X
-Select 1 column                                    1328           1331           4          0.8        1327.7       1.2X
+Select 10 columns                                  1485           1498          22          0.7        1484.5       1.0X
+Select 1 column                                    1056           1063           6          0.9        1055.6       1.4X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 creation of JSON parser per line:         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Short column without encoding                       656            656           1          1.5         655.8       1.0X
-Short column with UTF-8                             844            858          23          1.2         843.5       0.8X
-Wide column without encoding                       5501           5529          26          0.2        5501.5       0.1X
-Wide column with UTF-8                             6440           6460          25          0.2        6440.0       0.1X
+Short column without encoding                       622            630           9          1.6         622.2       1.0X
+Short column with UTF-8                             792            802          13          1.3         792.1       0.8X
+Wide column without encoding                       7214           7321         111          0.1        7214.3       0.1X
+Wide column with UTF-8                             6455           6493          54          0.2        6454.8       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON functions:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                            51             52           1         19.8          50.6       1.0X
-from_json                                          1134           1142           7          0.9        1134.4       0.0X
-json_tuple                                         1117           1121           4          0.9        1116.9       0.0X
-get_json_object wholestage off                     1036           1042           7          1.0        1036.3       0.0X
-get_json_object wholestage on                       944            945           1          1.1         944.3       0.1X
+Text read                                            59             61           3         16.9          59.1       1.0X
+from_json                                          1083           1088           5          0.9        1082.5       0.1X
+json_tuple                                         1125           1133           7          0.9        1125.5       0.1X
+get_json_object wholestage off                     1049           1062          12          1.0        1048.6       0.1X
+get_json_object wholestage on                       968            975           7          1.0         968.1       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Dataset of json strings:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           227            230           3         22.0          45.5       1.0X
-schema inferring                                   1835           1836           1          2.7         367.1       0.1X
-parsing                                            2831           2843          15          1.8         566.3       0.1X
+Text read                                           234            238           5         21.4          46.8       1.0X
+schema inferring                                   1774           1776           2          2.8         354.8       0.1X
+parsing                                            2648           2686          33          1.9         529.6       0.1X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Json files in the per-line mode:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Text read                                           588            594           6          8.5         117.5       1.0X
-Schema inferring                                   2444           2449           5          2.0         488.8       0.2X
-Parsing without charset                            3046           3052           5          1.6         609.2       0.2X
-Parsing with UTF-8                                 3937           3940           4          1.3         787.4       0.1X
+Text read                                           615            634          23          8.1         123.0       1.0X
+Schema inferring                                   2319           2330          10          2.2         463.8       0.3X
+Parsing without charset                            2834           2844           9          1.8         566.8       0.2X
+Parsing with UTF-8                                 3741           3758          17          1.3         748.1       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write dates and timestamps:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Create a dataset of timestamps                      100            105           6         10.0         100.5       1.0X
-to_json(timestamp)                                  815            820           4          1.2         815.2       0.1X
-write timestamps to files                           734            745          14          1.4         733.6       0.1X
-Create a dataset of dates                           112            118           6          8.9         111.9       0.9X
-to_json(date)                                       606            608           3          1.6         606.3       0.2X
-write dates to files                                472            480           8          2.1         472.3       0.2X
+Create a dataset of timestamps                      117            125          10          8.6         116.8       1.0X
+to_json(timestamp)                                  803            809           5          1.2         803.0       0.1X
+write timestamps to files                           698            701           4          1.4         697.6       0.2X
+Create a dataset of dates                           123            128           6          8.1         123.2       0.9X
+to_json(date)                                       594            602           7          1.7         594.2       0.2X
+write dates to files                                471            479           7          2.1         471.4       0.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read dates and timestamps:                                             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-read timestamp text from files                                                   154            156           3          6.5         153.9       1.0X
-read timestamps from files                                                      1048           1055           6          1.0        1048.1       0.1X
-infer timestamps from files                                                     1962           1967           6          0.5        1961.6       0.1X
-read date text from files                                                        139            148          12          7.2         139.1       1.1X
-read date from files                                                             728            736           7          1.4         727.8       0.2X
-timestamp strings                                                                135            140           5          7.4         134.7       1.1X
-parse timestamps from Dataset[String]                                           1193           1197           3          0.8        1192.9       0.1X
-infer timestamps from Dataset[String]                                           2042           2046           4          0.5        2042.3       0.1X
-date strings                                                                     203            204           1          4.9         203.1       0.8X
-parse dates from Dataset[String]                                                 991            996           8          1.0         990.6       0.2X
-from_json(timestamp)                                                            1670           1679          11          0.6        1669.9       0.1X
-from_json(date)                                                                 1459           1460           1          0.7        1458.6       0.1X
-infer error timestamps from Dataset[String] with default format                 1393           1400           7          0.7        1392.6       0.1X
-infer error timestamps from Dataset[String] with user-provided format           1384           1388           5          0.7        1383.6       0.1X
-infer error timestamps from Dataset[String] with legacy format                  1418           1419           2          0.7        1418.3       0.1X
+read timestamp text from files                                                   157            161           3          6.4         156.8       1.0X
+read timestamps from files                                                      1010           1019          10          1.0        1009.8       0.2X
+infer timestamps from files                                                     1924           1930          10          0.5        1923.9       0.1X
+read date text from files                                                        147            150           4          6.8         146.6       1.1X
+read date from files                                                             705            710           6          1.4         705.5       0.2X
+timestamp strings                                                                151            159           7          6.6         150.9       1.0X
+parse timestamps from Dataset[String]                                           1191           1193           1          0.8        1191.3       0.1X
+infer timestamps from Dataset[String]                                           2049           2055           7          0.5        2049.2       0.1X
+date strings                                                                     228            235           6          4.4         228.3       0.7X
+parse dates from Dataset[String]                                                 955            967          14          1.0         954.8       0.2X
+from_json(timestamp)                                                            1669           1681          12          0.6        1669.4       0.1X
+from_json(date)                                                                 1444           1447           5          0.7        1443.9       0.1X
+infer error timestamps from Dataset[String] with default format                 1398           1401           5          0.7        1397.9       0.1X
+infer error timestamps from Dataset[String] with user-provided format           1420           1423           2          0.7        1420.2       0.1X
+infer error timestamps from Dataset[String] with legacy format                  1419           1437          21          0.7        1418.7       0.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Filters pushdown:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-w/o filters                                        5964           5972          10          0.0       59641.7       1.0X
-pushdown disabled                                  5780           5785           6          0.0       57798.8       1.0X
-w/ filters                                          701            702           1          0.1        7010.4       8.5X
+w/o filters                                        6592           6601           7          0.0       65920.4       1.0X
+pushdown disabled                                  5825           5829           4          0.0       58246.5       1.1X
+w/ filters                                          664            802         200          0.2        6643.7       9.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partial JSON results:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-parse invalid JSON                                 2429           2545         138          0.0      242888.8       1.0X
+parse invalid JSON                                 2714           2828         195          0.0      271356.0       1.0X
 
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
index 7ff49dc2c3d73..3f95fc73de078 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-jdk21-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                2324           2379          51         43.0          23.2       1.0X
-make_date(2019, 9, 16)                             1859           1869          11         53.8          18.6       1.3X
-make_date(*, *, *)                                 2884           2914          29         34.7          28.8       0.8X
+prepare make_date()                                2328           2360          31         43.0          23.3       1.0X
+make_date(2019, 9, 16)                             1883           1936          46         53.1          18.8       1.2X
+make_date(*, *, *)                                 4034           4050          20         24.8          40.3       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               312            318           6          3.2         312.2       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             48             49           1         20.7          48.3       6.5X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             33             38           5         30.0          33.3       9.4X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             32             35           3         30.9          32.3       9.7X
-make_timestamp(*, *, *, 3, 4, 50.123456)               165            169           6          6.1         164.7       1.9X
-make_timestamp(*, *, *, *, *, 0)                       107            110           2          9.3         107.3       2.9X
-make_timestamp(*, *, *, *, *, 60.0)                    149            159          14          6.7         149.2       2.1X
-make_timestamp(2019, 1, 2, *, *, *)                    476            477           1          2.1         475.8       0.7X
-make_timestamp(*, *, *, *, *, *)                       495            503           9          2.0         495.5       0.6X
+prepare make_timestamp()                               358            367          11          2.8         358.0       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             43             46           4         23.3          42.9       8.3X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             37             40           3         26.8          37.3       9.6X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             37             48          10         27.3          36.6       9.8X
+make_timestamp(*, *, *, 3, 4, 50.123456)               170            171           2          5.9         169.9       2.1X
+make_timestamp(*, *, *, *, *, 0)                       113            116           3          8.9         112.5       3.2X
+make_timestamp(*, *, *, *, *, 60.0)                    158            162           4          6.3         158.2       2.3X
+make_timestamp(2019, 1, 2, *, *, *)                    478            479           1          2.1         477.9       0.7X
+make_timestamp(*, *, *, *, *, *)                       491            495           6          2.0         491.5       0.7X
 
diff --git a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
index 02c43e519ff81..34855593dd93f 100644
--- a/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
+++ b/sql/core/benchmarks/MakeDateTimeBenchmark-results.txt
@@ -1,22 +1,22 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_date():                              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-prepare make_date()                                2144           2161          29         46.6          21.4       1.0X
-make_date(2019, 9, 16)                             1807           1812           5         55.3          18.1       1.2X
-make_date(*, *, *)                                 2860           2876          22         35.0          28.6       0.7X
+prepare make_date()                                2195           2443         263         45.5          22.0       1.0X
+make_date(2019, 9, 16)                             1806           1860          81         55.4          18.1       1.2X
+make_date(*, *, *)                                 4107           4186          74         24.4          41.1       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 make_timestamp():                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-prepare make_timestamp()                               327            329           3          3.1         326.9       1.0X
-make_timestamp(2019, 1, 2, 3, 4, 50.123456)             34             34           1         29.8          33.6       9.7X
-make_timestamp(2019, 1, 2, 3, 4, 60.000000)             34             40           5         29.4          34.1       9.6X
-make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             38           4         29.6          33.8       9.7X
-make_timestamp(*, *, *, 3, 4, 50.123456)               171            176           5          5.9         170.5       1.9X
-make_timestamp(*, *, *, *, *, 0)                       101            108          10          9.9         101.0       3.2X
-make_timestamp(*, *, *, *, *, 60.0)                    144            146           3          7.0         143.6       2.3X
-make_timestamp(2019, 1, 2, *, *, *)                    429            430           1          2.3         428.8       0.8X
-make_timestamp(*, *, *, *, *, *)                       481            488           6          2.1         481.2       0.7X
+prepare make_timestamp()                               354            364           9          2.8         354.3       1.0X
+make_timestamp(2019, 1, 2, 3, 4, 50.123456)             44             48           3         22.5          44.5       8.0X
+make_timestamp(2019, 1, 2, 3, 4, 60.000000)             48             53           4         20.8          48.1       7.4X
+make_timestamp(2019, 12, 31, 23, 59, 60.00)             34             37           4         29.3          34.1      10.4X
+make_timestamp(*, *, *, 3, 4, 50.123456)               146            158          16          6.9         146.0       2.4X
+make_timestamp(*, *, *, *, *, 0)                       107            114           9          9.4         106.5       3.3X
+make_timestamp(*, *, *, *, *, 60.0)                    145            150           8          6.9         144.7       2.4X
+make_timestamp(2019, 1, 2, *, *, *)                    453            454           2          2.2         452.7       0.8X
+make_timestamp(*, *, *, *, *, *)                       475            480           6          2.1         475.2       0.7X
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
index 0d974239430df..8f1696638d097 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-jdk21-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 631            647           9          7.9         126.3       1.0X
-_metadata.file_path                                 704            737          10          7.1         140.9       0.9X
-_metadata.file_name                                 700            739          15          7.1         139.9       0.9X
-_metadata.file_size                                 623            666          15          8.0         124.7       1.0X
-_metadata.file_block_start                          630            665          12          7.9         126.0       1.0X
-_metadata.file_block_length                         622            661          17          8.0         124.3       1.0X
-_metadata.file_modification_time                    629            664          13          7.9         125.8       1.0X
-_metadata.row_index                                 669            713          17          7.5         133.8       0.9X
-_metadata                                           961            993          20          5.2         192.2       0.7X
+no metadata columns                                 614            644          14          8.1         122.7       1.0X
+_metadata.file_path                                 737            752          10          6.8         147.5       0.8X
+_metadata.file_name                                 737            751          17          6.8         147.3       0.8X
+_metadata.file_size                                 638            678          14          7.8         127.5       1.0X
+_metadata.file_block_start                          632            654          11          7.9         126.5       1.0X
+_metadata.file_block_length                         665            676           7          7.5         133.0       0.9X
+_metadata.file_modification_time                    636            655          13          7.9         127.1       1.0X
+_metadata.row_index                                 714            728           8          7.0         142.9       0.9X
+_metadata                                           966            993          15          5.2         193.2       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                2687           2714          20          1.9         537.3       1.0X
-_metadata.file_path                                3372           3402          21          1.5         674.4       0.8X
-_metadata.file_name                                3370           3402          23          1.5         673.9       0.8X
-_metadata.file_size                                3227           3256          61          1.5         645.4       0.8X
-_metadata.file_block_start                         3196           3225          21          1.6         639.1       0.8X
-_metadata.file_block_length                        3198           3246          30          1.6         639.7       0.8X
-_metadata.file_modification_time                   3189           3239          19          1.6         637.9       0.8X
-_metadata.row_index                                3643           3686          25          1.4         728.5       0.7X
-_metadata                                          4684           4710          20          1.1         936.9       0.6X
+no metadata columns                                2804           2862          34          1.8         560.9       1.0X
+_metadata.file_path                                3567           3624          33          1.4         713.3       0.8X
+_metadata.file_name                                3614           3648          27          1.4         722.8       0.8X
+_metadata.file_size                                3459           3485          21          1.4         691.8       0.8X
+_metadata.file_block_start                         3460           3498          25          1.4         692.0       0.8X
+_metadata.file_block_length                        3396           3432          32          1.5         679.3       0.8X
+_metadata.file_modification_time                   3385           3416          19          1.5         677.1       0.8X
+_metadata.row_index                                3734           3762          18          1.3         746.8       0.8X
+_metadata                                          4804           4837          20          1.0         960.8       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                6920           6947          27          0.7        1384.1       1.0X
-_metadata.file_path                                7691           7716          17          0.7        1538.2       0.9X
-_metadata.file_name                                7694           7728          32          0.6        1538.8       0.9X
-_metadata.file_size                                7502           7538          26          0.7        1500.5       0.9X
-_metadata.file_block_start                         7513           7536          19          0.7        1502.6       0.9X
-_metadata.file_block_length                        7504           7525          13          0.7        1500.9       0.9X
-_metadata.file_modification_time                   7501           7520          11          0.7        1500.2       0.9X
-_metadata                                          8293           8310          10          0.6        1658.6       0.8X
+no metadata columns                                6626           6678          89          0.8        1325.2       1.0X
+_metadata.file_path                                7476           7498          14          0.7        1495.3       0.9X
+_metadata.file_name                                7468           7485          15          0.7        1493.7       0.9X
+_metadata.file_size                                7302           7326          18          0.7        1460.3       0.9X
+_metadata.file_block_start                         7303           7327          14          0.7        1460.5       0.9X
+_metadata.file_block_length                        7312           7337          14          0.7        1462.4       0.9X
+_metadata.file_modification_time                   7322           7340          11          0.7        1464.3       0.9X
+_metadata                                          8135           8155          14          0.6        1627.0       0.8X
 
 
diff --git a/sql/core/benchmarks/MetadataStructBenchmark-results.txt b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
index b74cc469b2c8d..82429601dab29 100644
--- a/sql/core/benchmarks/MetadataStructBenchmark-results.txt
+++ b/sql/core/benchmarks/MetadataStructBenchmark-results.txt
@@ -2,45 +2,45 @@
 Metadata Struct Benchmark
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Vectorized Parquet:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                 649            669          23          7.7         129.8       1.0X
-_metadata.file_path                                 750            765          10          6.7         150.0       0.9X
-_metadata.file_name                                 776            793          20          6.4         155.2       0.8X
-_metadata.file_size                                 687            706          24          7.3         137.5       0.9X
-_metadata.file_block_start                          684            697          11          7.3         136.7       0.9X
-_metadata.file_block_length                         686            703          13          7.3         137.3       0.9X
-_metadata.file_modification_time                    686            702          15          7.3         137.2       0.9X
-_metadata.row_index                                 732            757          28          6.8         146.4       0.9X
-_metadata                                          1048           1065          16          4.8         209.5       0.6X
+no metadata columns                                 650            670          20          7.7         129.9       1.0X
+_metadata.file_path                                 743            762          12          6.7         148.7       0.9X
+_metadata.file_name                                 742            752           7          6.7         148.4       0.9X
+_metadata.file_size                                 677            685           6          7.4         135.4       1.0X
+_metadata.file_block_start                          675            686          12          7.4         134.9       1.0X
+_metadata.file_block_length                         677            683           5          7.4         135.5       1.0X
+_metadata.file_modification_time                    673            682           7          7.4         134.7       1.0X
+_metadata.row_index                                 718            728           8          7.0         143.6       0.9X
+_metadata                                          1023           1033           6          4.9         204.6       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Parquet-mr:                               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                2565           2605          42          1.9         512.9       1.0X
-_metadata.file_path                                3396           3431          32          1.5         679.3       0.8X
-_metadata.file_name                                3391           3429          30          1.5         678.2       0.8X
-_metadata.file_size                                3174           3205          25          1.6         634.8       0.8X
-_metadata.file_block_start                         3187           3243          61          1.6         637.4       0.8X
-_metadata.file_block_length                        3228           3266          39          1.5         645.6       0.8X
-_metadata.file_modification_time                   3220           3261          31          1.6         644.0       0.8X
-_metadata.row_index                                3609           3644          23          1.4         721.7       0.7X
-_metadata                                          4854           4910          27          1.0         970.8       0.5X
+no metadata columns                                2582           2621          41          1.9         516.4       1.0X
+_metadata.file_path                                3488           3505          12          1.4         697.5       0.7X
+_metadata.file_name                                3481           3502          10          1.4         696.1       0.7X
+_metadata.file_size                                3193           3223          18          1.6         638.6       0.8X
+_metadata.file_block_start                         3198           3217          15          1.6         639.7       0.8X
+_metadata.file_block_length                        3191           3216          19          1.6         638.2       0.8X
+_metadata.file_modification_time                   3188           3204          13          1.6         637.7       0.8X
+_metadata.row_index                                3714           3736          18          1.3         742.8       0.7X
+_metadata                                          4935           4958          24          1.0         986.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 JSON:                                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-no metadata columns                                6808           6932         123          0.7        1361.5       1.0X
-_metadata.file_path                                7560           7591          18          0.7        1512.0       0.9X
-_metadata.file_name                                7594           7619          42          0.7        1518.9       0.9X
-_metadata.file_size                                7372           7392          10          0.7        1474.3       0.9X
-_metadata.file_block_start                         7369           7393          18          0.7        1473.8       0.9X
-_metadata.file_block_length                        7370           7389          15          0.7        1474.1       0.9X
-_metadata.file_modification_time                   7376           7393          13          0.7        1475.1       0.9X
-_metadata                                          8188           8211          23          0.6        1637.5       0.8X
+no metadata columns                                7127           7171          21          0.7        1425.3       1.0X
+_metadata.file_path                                8122           8147          13          0.6        1624.4       0.9X
+_metadata.file_name                                8143           8166          24          0.6        1628.6       0.9X
+_metadata.file_size                                7914           7943          14          0.6        1582.8       0.9X
+_metadata.file_block_start                         7947           7978          16          0.6        1589.4       0.9X
+_metadata.file_block_length                        7964           7991          20          0.6        1592.8       0.9X
+_metadata.file_modification_time                   7950           7977          20          0.6        1590.1       0.9X
+_metadata                                          8869           8888          15          0.6        1773.7       0.8X
 
 
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
index 6cc4668711283..220e9da2e088c 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-jdk21-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          757            787          33          0.0   756863015.0       1.0X
-2 stage(s)                                                         1448           1611         230          0.0  1447967154.0       0.5X
-3 stage(s)                                                         2313           2394         115          0.0  2312633108.0       0.3X
+1 stage(s)                                                          565            623          52          0.0   564588687.0       1.0X
+2 stage(s)                                                         1318           1347          41          0.0  1318133868.0       0.4X
+3 stage(s)                                                         2044           2136         130          0.0  2043877303.0       0.3X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              375                73
-     2              348                230
-     3              393                279
+     1              339                58
+     2              333                213
+     3              376                256
diff --git a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
index 14203a6d527e4..5ca7125aa3bc0 100644
--- a/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
+++ b/sql/core/benchmarks/MetricsAggregationBenchmark-results.txt
@@ -1,12 +1,12 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 metrics aggregation (50 metrics, 100000 tasks per stage):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ----------------------------------------------------------------------------------------------------------------------------------------
-1 stage(s)                                                          759            804          53          0.0   759264298.0       1.0X
-2 stage(s)                                                         1571           1604          48          0.0  1570666996.0       0.5X
-3 stage(s)                                                         2364           2417          75          0.0  2363843200.0       0.3X
+1 stage(s)                                                          684            710          28          0.0   683720517.0       1.0X
+2 stage(s)                                                         1368           1407          55          0.0  1367925138.0       0.5X
+3 stage(s)                                                         1606           1850         346          0.0  1605768734.0       0.4X
 
 Stage Count    Stage Proc. Time    Aggreg. Time
-     1              481                66
-     2              403                198
-     3              443                292
+     1              306                92
+     2              437                150
+     3              368                219
diff --git a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
index d48562a67ddfe..7228d0a184011 100644
--- a/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-jdk21-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   39354          39673         451         53.3          18.8       1.0X
-range/filter/sum wholestage on                     3472           3578          64        604.1           1.7      11.3X
+range/filter/sum wholestage off                   36694          36725          44         57.2          17.5       1.0X
+range/filter/sum wholestage on                     3483           3597          69        602.2           1.7      10.5X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       59             62           4       8881.1           0.1       1.0X
-range/limit/sum wholestage on                        60             67           7       8666.1           0.1       1.0X
+range/limit/sum wholestage off                       70             95          35       7458.5           0.1       1.0X
+range/limit/sum wholestage on                        66             82          13       7909.4           0.1       1.1X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             8093           8118          35         16.2          61.7       1.0X
-sample with replacement wholestage on              5100           5122          18         25.7          38.9       1.6X
+sample with replacement wholestage off             8132           8156          35         16.1          62.0       1.0X
+sample with replacement wholestage on              5075           5185         154         25.8          38.7       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           2716           2724          12         48.3          20.7       1.0X
-sample without replacement wholestage on             652            659           8        200.9           5.0       4.2X
+sample without replacement wholestage off           1885           1893          12         69.5          14.4       1.0X
+sample without replacement wholestage on             651            668          20        201.2           5.0       2.9X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   149            227          52          7.0         142.0       1.0X
-collect 2 millions                                  288            413         105          3.6         274.5       0.5X
-collect 4 millions                                  780            831          85          1.3         743.8       0.2X
+collect 1 million                                   159            232          53          6.6         151.6       1.0X
+collect 2 millions                                  295            441          85          3.6         281.2       0.5X
+collect 4 millions                                  818            832          12          1.3         780.2       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             156            222          55          6.7         148.8       1.0X
-collect limit 2 millions                            322            441          83          3.3         307.4       0.5X
+collect limit 1 million                             147            224          53          7.1         140.6       1.0X
+collect limit 2 millions                            301            404          86          3.5         287.3       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             12087          12259         244          1.4         720.4       1.0X
-generate explode array wholestage on               3313           3473          99          5.1         197.5       3.6X
+generate explode array wholestage off             12316          12347          45          1.4         734.1       1.0X
+generate explode array wholestage on               2800           2856          69          6.0         166.9       4.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               24473          24489          22          0.7        1458.7       1.0X
-generate explode map wholestage on                 9589           9743         163          1.7         571.6       2.6X
+generate explode map wholestage off               23670          23767         138          0.7        1410.8       1.0X
+generate explode map wholestage on                 9745           9872         100          1.7         580.8       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          12779          12830          72          1.3         761.7       1.0X
-generate posexplode array wholestage on            3340           3492          89          5.0         199.1       3.8X
+generate posexplode array wholestage off          12583          12709         178          1.3         750.0       1.0X
+generate posexplode array wholestage on            2992           3053          67          5.6         178.3       4.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               7039           7047          12          2.4         419.5       1.0X
-generate inline array wholestage on                2712           2806          80          6.2         161.6       2.6X
+generate inline array wholestage off               6914           6972          81          2.4         412.1       1.0X
+generate inline array wholestage on                2418           2524          90          6.9         144.1       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            188            196          11          0.3        3127.5       1.0X
-generate big struct array wholestage on             149            169          16          0.4        2484.0       1.3X
+generate big struct array wholestage off            200            216          22          0.3        3334.6       1.0X
+generate big struct array wholestage on             164            180          13          0.4        2733.2       1.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          21562          21565           4          0.0      359373.5       1.0X
-generate big nested struct array wholestage on             143            161          17          0.4        2378.5     151.1X
+generate big nested struct array wholestage off          18684          18738          77          0.0      311401.3       1.0X
+generate big nested struct array wholestage on             152            161          10          0.4        2533.3     122.9X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     13383          13385           2          1.3         797.7       1.0X
-generate stack wholestage on                       3139           3149          11          5.3         187.1       4.3X
+generate stack wholestage off                     14179          14202          33          1.2         845.1       1.0X
+generate stack wholestage on                       3091           3114          26          5.4         184.2       4.6X
 
 
diff --git a/sql/core/benchmarks/MiscBenchmark-results.txt b/sql/core/benchmarks/MiscBenchmark-results.txt
index bc6376495bc1d..8a3e9921dbe4b 100644
--- a/sql/core/benchmarks/MiscBenchmark-results.txt
+++ b/sql/core/benchmarks/MiscBenchmark-results.txt
@@ -2,126 +2,126 @@
 filter & aggregate without group
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range/filter/sum:                         Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/filter/sum wholestage off                   39427          39448          31         53.2          18.8       1.0X
-range/filter/sum wholestage on                     3452           3698         149        607.6           1.6      11.4X
+range/filter/sum wholestage off                   33428          35668        3169         62.7          15.9       1.0X
+range/filter/sum wholestage on                     2842           3756         511        737.8           1.4      11.8X
 
 
 ================================================================================================
 range/limit/sum
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range/limit/sum:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-range/limit/sum wholestage off                       91            106          22       5753.7           0.2       1.0X
-range/limit/sum wholestage on                        75             84          10       6966.3           0.1       1.2X
+range/limit/sum wholestage off                       98            107          13       5332.3           0.2       1.0X
+range/limit/sum wholestage on                        67             77          11       7806.1           0.1       1.5X
 
 
 ================================================================================================
 sample
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sample with replacement:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sample with replacement wholestage off             8030           8041          16         16.3          61.3       1.0X
-sample with replacement wholestage on              4992           5008          26         26.3          38.1       1.6X
+sample with replacement wholestage off             8058           8215         221         16.3          61.5       1.0X
+sample with replacement wholestage on              4994           5005           8         26.2          38.1       1.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 sample without replacement:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-sample without replacement wholestage off           3093           3100           9         42.4          23.6       1.0X
-sample without replacement wholestage on             630            660          28        208.1           4.8       4.9X
+sample without replacement wholestage off           1974           1983          13         66.4          15.1       1.0X
+sample without replacement wholestage on             701            713          12        186.9           5.4       2.8X
 
 
 ================================================================================================
 collect
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 collect:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect 1 million                                   147            194          72          7.2         139.8       1.0X
-collect 2 millions                                  274            394          87          3.8         261.2       0.5X
-collect 4 millions                                  758            817          60          1.4         722.7       0.2X
+collect 1 million                                   170            227          81          6.2         161.8       1.0X
+collect 2 millions                                  361            470          71          2.9         344.1       0.5X
+collect 4 millions                                  727            753          33          1.4         693.1       0.2X
 
 
 ================================================================================================
 collect limit
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 collect limit:                            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-collect limit 1 million                             149            216          81          7.0         142.0       1.0X
-collect limit 2 millions                            283            397          87          3.7         269.6       0.5X
+collect limit 1 million                             153            229         120          6.9         145.6       1.0X
+collect limit 2 millions                            283            420         139          3.7         269.6       0.5X
 
 
 ================================================================================================
 generate explode
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate explode array:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode array wholestage off             13755          13765          14          1.2         819.9       1.0X
-generate explode array wholestage on               2847           2938          80          5.9         169.7       4.8X
+generate explode array wholestage off             13769          13835          94          1.2         820.7       1.0X
+generate explode array wholestage on               2901           2973          67          5.8         172.9       4.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate explode map:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate explode map wholestage off               24921          24985          91          0.7        1485.4       1.0X
-generate explode map wholestage on                 9283           9399          83          1.8         553.3       2.7X
+generate explode map wholestage off               25526          25775         352          0.7        1521.4       1.0X
+generate explode map wholestage on                 9201           9259          68          1.8         548.4       2.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate posexplode array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate posexplode array wholestage off          14332          14431         139          1.2         854.3       1.0X
-generate posexplode array wholestage on            2909           3002          52          5.8         173.4       4.9X
+generate posexplode array wholestage off          14288          14334          65          1.2         851.6       1.0X
+generate posexplode array wholestage on            2959           3006          49          5.7         176.3       4.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate inline array:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate inline array wholestage off               7138           7205          96          2.4         425.5       1.0X
-generate inline array wholestage on                2369           2489         116          7.1         141.2       3.0X
+generate inline array wholestage off               7176           7181           8          2.3         427.7       1.0X
+generate inline array wholestage on                2383           2471          61          7.0         142.0       3.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate big struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate big struct array wholestage off            194            211          25          0.3        3229.0       1.0X
-generate big struct array wholestage on             165            173          10          0.4        2750.7       1.2X
+generate big struct array wholestage off            209            296         123          0.3        3483.4       1.0X
+generate big struct array wholestage on             178            188          11          0.3        2965.6       1.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate big nested struct array:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------
-generate big nested struct array wholestage off          17295          17757         654          0.0      288246.5       1.0X
-generate big nested struct array wholestage on             163            174          11          0.4        2709.5     106.4X
+generate big nested struct array wholestage off          18690          20677        2809          0.0      311503.1       1.0X
+generate big nested struct array wholestage on             172            186          15          0.3        2860.7     108.9X
 
 
 ================================================================================================
 generate regular generator
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 generate stack:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-generate stack wholestage off                     14910          14980          99          1.1         888.7       1.0X
-generate stack wholestage on                       3115           3129          14          5.4         185.7       4.8X
+generate stack wholestage off                     15372          15414          60          1.1         916.2       1.0X
+generate stack wholestage on                       3053           3069          15          5.5         182.0       5.0X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
index e5f8398d72d7a..d54a37baa5770 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     49             60          11         20.5          48.8       1.0X
-Nested column                                        51             55           5         19.8          50.6       1.0X
-Nested column in array                              159            165           5          6.3         159.4       0.3X
+Top-level column                                     64             85          12         15.7          63.6       1.0X
+Nested column                                        58             65           7         17.3          57.9       1.1X
+Nested column in array                              165            170           5          6.1         164.6       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    242            260          19          4.1         241.8       1.0X
-Nested column                                       230            250          18          4.3         230.0       1.1X
-Nested column in array                              498            543          22          2.0         497.5       0.5X
+Top-level column                                    236            264          18          4.2         235.7       1.0X
+Nested column                                       241            259          11          4.2         240.7       1.0X
+Nested column in array                              518            537          11          1.9         518.3       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    208            219           9          4.8         207.9       1.0X
-Nested column                                       214            218           6          4.7         214.1       1.0X
-Nested column in array                              477            492          10          2.1         476.6       0.4X
+Top-level column                                    219            230           7          4.6         218.5       1.0X
+Nested column                                       224            235          10          4.5         223.6       1.0X
+Nested column in array                              483            487           5          2.1         482.6       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    207            219           8          4.8         206.8       1.0X
-Nested column                                       236            252          22          4.2         236.2       0.9X
-Nested column in array                              498            513          20          2.0         497.9       0.4X
+Top-level column                                    218            226           5          4.6         218.5       1.0X
+Nested column                                       240            251           7          4.2         240.2       0.9X
+Nested column in array                              511            515           4          2.0         510.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     63             73           9         15.9          62.8       1.0X
-Nested column                                        70             83          13         14.3          70.1       0.9X
-Nested column in array                              200            224          17          5.0         200.3       0.3X
+Top-level column                                     65             78          13         15.5          64.6       1.0X
+Nested column                                        74             89          15         13.5          74.0       0.9X
+Nested column in array                              200            219          16          5.0         199.9       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    288            302          14          3.5         287.6       1.0X
-Nested column                                       344            356          15          2.9         344.0       0.8X
-Nested column in array                              712            760          44          1.4         711.8       0.4X
+Top-level column                                    302            318          22          3.3         301.5       1.0X
+Nested column                                       353            368          17          2.8         352.9       0.9X
+Nested column in array                              720            755          35          1.4         720.5       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
index f9a3f229b7ff5..77a9e92525691 100644
--- a/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v1
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     46             54           8         21.8          45.9       1.0X
-Nested column                                        49             55           5         20.2          49.5       0.9X
-Nested column in array                              152            157           7          6.6         151.6       0.3X
+Top-level column                                     67             83          11         14.8          67.4       1.0X
+Nested column                                        58             65           7         17.3          57.6       1.2X
+Nested column in array                              158            163           5          6.3         158.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    243            257          21          4.1         243.3       1.0X
-Nested column                                       238            263          15          4.2         238.3       1.0X
-Nested column in array                              508            531          18          2.0         507.9       0.5X
+Top-level column                                    238            263          18          4.2         237.5       1.0X
+Nested column                                       237            255          12          4.2         237.2       1.0X
+Nested column in array                              523            534           6          1.9         523.3       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    218            226           7          4.6         217.7       1.0X
-Nested column                                       222            227           3          4.5         221.6       1.0X
-Nested column in array                              471            479           6          2.1         471.2       0.5X
+Top-level column                                    212            221           8          4.7         212.3       1.0X
+Nested column                                       219            230           9          4.6         219.4       1.0X
+Nested column in array                              470            477           5          2.1         470.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    216            223           3          4.6         215.9       1.0X
-Nested column                                       242            248          11          4.1         242.0       0.9X
-Nested column in array                              508            517          17          2.0         508.1       0.4X
+Top-level column                                    213            218           6          4.7         213.2       1.0X
+Nested column                                       237            245           7          4.2         236.6       0.9X
+Nested column in array                              504            510           6          2.0         503.8       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     56             73          13         17.7          56.5       1.0X
-Nested column                                        68             82          17         14.8          67.6       0.8X
-Nested column in array                              216            226           9          4.6         216.3       0.3X
+Top-level column                                     66             78           8         15.1          66.4       1.0X
+Nested column                                        73             84          12         13.7          73.0       0.9X
+Nested column in array                              202            234          31          4.9         202.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    307            344          41          3.3         306.7       1.0X
-Nested column                                       361            401          28          2.8         361.3       0.8X
-Nested column in array                              771            793          15          1.3         771.2       0.4X
+Top-level column                                    304            317          12          3.3         303.9       1.0X
+Nested column                                       350            358           7          2.9         350.3       0.9X
+Nested column in array                              722            730           7          1.4         721.8       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
index 2a303225d57b9..5585eabfe717b 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     61             83          14         16.3          61.3       1.0X
-Nested column                                        59             67           7         17.1          58.6       1.0X
-Nested column in array                              169            176           4          5.9         169.4       0.4X
+Top-level column                                     66             84          14         15.1          66.0       1.0X
+Nested column                                        57             68           7         17.6          56.7       1.2X
+Nested column in array                              167            173           5          6.0         166.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    242            263          16          4.1         242.4       1.0X
-Nested column                                       237            260          10          4.2         236.6       1.0X
-Nested column in array                              513            535          14          1.9         513.5       0.5X
+Top-level column                                    243            260          12          4.1         242.6       1.0X
+Nested column                                       245            260          12          4.1         244.9       1.0X
+Nested column in array                              515            532          12          1.9         514.8       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    217            229           9          4.6         216.6       1.0X
-Nested column                                       222            236           8          4.5         222.1       1.0X
-Nested column in array                              477            484           7          2.1         477.2       0.5X
+Top-level column                                    218            230           7          4.6         218.1       1.0X
+Nested column                                       223            234          14          4.5         223.4       1.0X
+Nested column in array                              472            484           7          2.1         472.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    214            225           9          4.7         214.3       1.0X
-Nested column                                       240            254          14          4.2         239.8       0.9X
-Nested column in array                              508            520           8          2.0         508.4       0.4X
+Top-level column                                    213            231          14          4.7         213.3       1.0X
+Nested column                                       240            252           8          4.2         240.1       0.9X
+Nested column in array                              510            514           3          2.0         509.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     56             71           8         18.0          55.7       1.0X
-Nested column                                        73             92          18         13.7          73.2       0.8X
-Nested column in array                              209            237          22          4.8         208.6       0.3X
+Top-level column                                     65             77          10         15.3          65.3       1.0X
+Nested column                                        74             89          15         13.6          73.7       0.9X
+Nested column in array                              206            242          28          4.8         206.2       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    300            315          21          3.3         299.8       1.0X
-Nested column                                       353            366          17          2.8         353.4       0.8X
-Nested column in array                              809            838          18          1.2         809.3       0.4X
+Top-level column                                    301            316          21          3.3         300.7       1.0X
+Nested column                                       361            378          21          2.8         360.9       0.8X
+Nested column in array                              723            765          42          1.4         722.8       0.4X
 
 
diff --git a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
index 80519cb6b28bc..e2eba2b51fb49 100644
--- a/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/OrcV2NestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For ORC v2
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     80            103          13         12.5          79.8       1.0X
-Nested column                                        70             81           9         14.3          69.9       1.1X
-Nested column in array                              183            193           6          5.5         182.6       0.4X
+Top-level column                                     66             83          11         15.2          66.0       1.0X
+Nested column                                        60             68           7         16.6          60.4       1.1X
+Nested column in array                              165            170           3          6.1         164.6       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    269            283          10          3.7         268.9       1.0X
-Nested column                                       259            282          16          3.9         259.4       1.0X
-Nested column in array                              567            581          15          1.8         567.0       0.5X
+Top-level column                                    252            265          16          4.0         252.0       1.0X
+Nested column                                       243            258          14          4.1         242.8       1.0X
+Nested column in array                              503            532          14          2.0         503.4       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    242            247           5          4.1         242.0       1.0X
-Nested column                                       245            254           7          4.1         245.0       1.0X
-Nested column in array                              517            523           4          1.9         516.9       0.5X
+Top-level column                                    216            221           5          4.6         216.1       1.0X
+Nested column                                       221            228           5          4.5         220.8       1.0X
+Nested column in array                              471            476           3          2.1         470.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    234            246          10          4.3         234.2       1.0X
-Nested column                                       259            270           7          3.9         258.7       0.9X
-Nested column in array                              547            559           9          1.8         546.8       0.4X
+Top-level column                                    213            219           5          4.7         213.2       1.0X
+Nested column                                       238            247           7          4.2         237.9       0.9X
+Nested column in array                              504            510           5          2.0         503.9       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     78             89          16         12.8          78.0       1.0X
-Nested column                                        86             96           8         11.6          86.0       0.9X
-Nested column in array                              225            263          18          4.4         224.8       0.3X
+Top-level column                                     60             76           7         16.5          60.4       1.0X
+Nested column                                        71             89          15         14.2          70.5       0.9X
+Nested column in array                              202            242          47          4.9         202.5       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    344            359          10          2.9         344.1       1.0X
-Nested column                                       385            409          14          2.6         384.6       0.9X
-Nested column in array                              769            781          10          1.3         768.9       0.4X
+Top-level column                                    309            317          12          3.2         309.4       1.0X
+Nested column                                       355            362           5          2.8         354.8       0.9X
+Nested column in array                              710            732          19          1.4         710.5       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
index 2b5f80423b41d..743331fb4dae2 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-jdk21-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6507           6573          42         16.1          62.1       1.0X
-With nested predicate Pushdown                       51             66          15       2074.7           0.5     128.7X
+Without nested predicate Pushdown                  6375           6430          47         16.4          60.8       1.0X
+With nested predicate Pushdown                       50             65          14       2093.7           0.5     127.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  6994           7049          34         15.0          66.7       1.0X
-With nested predicate Pushdown                       41             58          13       2582.3           0.4     172.2X
+Without nested predicate Pushdown                  6877           6916          20         15.2          65.6       1.0X
+With nested predicate Pushdown                       45             60          10       2345.3           0.4     153.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 13276          13334          59          7.9         126.6       1.0X
-With nested predicate Pushdown                    13267          13393         117          7.9         126.5       1.0X
+Without nested predicate Pushdown                 13281          13345          48          7.9         126.7       1.0X
+With nested predicate Pushdown                    13310          13352          34          7.9         126.9       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
index b2a9464e8f4d6..f6a914114a017 100644
--- a/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedPredicatePushDownBenchmark-results.txt
@@ -1,21 +1,21 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip all row groups:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7218           7266          26         14.5          68.8       1.0X
-With nested predicate Pushdown                       49             72          14       2136.4           0.5     147.1X
+Without nested predicate Pushdown                  7157           7297          99         14.7          68.3       1.0X
+With nested predicate Pushdown                       82             99          13       1279.1           0.8      87.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip some row groups:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                  7799           7864          49         13.4          74.4       1.0X
-With nested predicate Pushdown                       48             60           9       2194.5           0.5     163.2X
+Without nested predicate Pushdown                  7499           7833         119         14.0          71.5       1.0X
+With nested predicate Pushdown                       61             74           6       1714.3           0.6     122.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Can skip no row groups:                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Without nested predicate Pushdown                 14137          14228          42          7.4         134.8       1.0X
-With nested predicate Pushdown                    14176          14233          49          7.4         135.2       1.0X
+Without nested predicate Pushdown                 14150          14216          75          7.4         134.9       1.0X
+With nested predicate Pushdown                    14150          14221          50          7.4         134.9       1.0X
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
index f3a5ff49939b7..9f48b560d615a 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-jdk21-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     64             77           8         15.6          64.1       1.0X
-Nested column                                        65             74           9         15.3          65.3       1.0X
-Nested column in array                              245            251           6          4.1         244.6       0.3X
+Top-level column                                     66             82          12         15.2          66.0       1.0X
+Nested column                                        67             77           6         15.0          66.6       1.0X
+Nested column in array                              230            239           8          4.4         229.7       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    232            252          12          4.3         231.7       1.0X
-Nested column                                       242            259          16          4.1         242.2       1.0X
-Nested column in array                              578            609          23          1.7         578.0       0.4X
+Top-level column                                    248            266          13          4.0         247.7       1.0X
+Nested column                                       242            259          12          4.1         242.3       1.0X
+Nested column in array                              564            594          22          1.8         563.9       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    214            226           9          4.7         213.5       1.0X
-Nested column                                       219            229           9          4.6         219.0       1.0X
-Nested column in array                              540            551           9          1.9         540.2       0.4X
+Top-level column                                    218            229           7          4.6         218.4       1.0X
+Nested column                                       222            234          10          4.5         221.8       1.0X
+Nested column in array                              521            537           8          1.9         521.2       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    212            225           8          4.7         211.6       1.0X
-Nested column                                       236            245           6          4.2         235.8       0.9X
-Nested column in array                              576            586          10          1.7         576.0       0.4X
+Top-level column                                    216            224           7          4.6         216.4       1.0X
+Nested column                                       237            251          10          4.2         236.9       0.9X
+Nested column in array                              559            568           7          1.8         558.7       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     75             85          11         13.4          74.7       1.0X
-Nested column                                        77            100          17         12.9          77.2       1.0X
-Nested column in array                              283            300          10          3.5         283.1       0.3X
+Top-level column                                     74             91          10         13.4          74.4       1.0X
+Nested column                                        81             98          12         12.3          81.2       0.9X
+Nested column in array                              264            287          18          3.8         264.1       0.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    303            318          18          3.3         302.5       1.0X
-Nested column                                       352            373          20          2.8         352.1       0.9X
-Nested column in array                              813            850          33          1.2         812.8       0.4X
+Top-level column                                    303            370          49          3.3         302.9       1.0X
+Nested column                                       432            447          11          2.3         432.0       0.7X
+Nested column in array                              779            833          37          1.3         779.2       0.4X
 
 
diff --git a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
index e30af4fa62c56..094a254580f30 100644
--- a/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
+++ b/sql/core/benchmarks/ParquetNestedSchemaPruningBenchmark-results.txt
@@ -2,52 +2,52 @@
 Nested Schema Pruning Benchmark For Parquet
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Selection:                                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     75             87          10         13.3          74.9       1.0X
-Nested column                                        72             80           8         13.9          72.0       1.0X
-Nested column in array                              243            248           5          4.1         242.7       0.3X
+Top-level column                                     68             84          11         14.6          68.4       1.0X
+Nested column                                        69             76           5         14.5          69.2       1.0X
+Nested column in array                              224            229           5          4.5         224.0       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Limiting:                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    251            283          41          4.0         250.7       1.0X
-Nested column                                       258            275          11          3.9         258.5       1.0X
-Nested column in array                              586            622          33          1.7         586.3       0.4X
+Top-level column                                    241            267          25          4.1         241.1       1.0X
+Nested column                                       244            258          10          4.1         243.8       1.0X
+Nested column in array                              562            583          18          1.8         562.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    226            234           6          4.4         226.4       1.0X
-Nested column                                       231            240           8          4.3         230.7       1.0X
-Nested column in array                              550            560          18          1.8         549.8       0.4X
+Top-level column                                    220            226           3          4.5         220.2       1.0X
+Nested column                                       223            230           8          4.5         223.0       1.0X
+Nested column in array                              525            530           4          1.9         525.2       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repartitioning by exprs:                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    224            229           6          4.5         224.1       1.0X
-Nested column                                       252            259           5          4.0         252.0       0.9X
-Nested column in array                              595            604           9          1.7         595.3       0.4X
+Top-level column                                    216            221           5          4.6         216.0       1.0X
+Nested column                                       242            252           6          4.1         242.0       0.9X
+Nested column in array                              561            565           4          1.8         561.1       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sample:                                   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                     75             86           9         13.4          74.8       1.0X
-Nested column                                        76             98          21         13.1          76.4       1.0X
-Nested column in array                              286            300          11          3.5         286.1       0.3X
+Top-level column                                     74             84           8         13.5          73.8       1.0X
+Nested column                                        79             94          12         12.6          79.4       0.9X
+Nested column in array                              263            283          13          3.8         263.3       0.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Sorting:                                  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Top-level column                                    319            333          14          3.1         318.6       1.0X
-Nested column                                       376            388          10          2.7         375.6       0.8X
-Nested column in array                              820            824           5          1.2         819.7       0.4X
+Top-level column                                    309            323          11          3.2         309.0       1.0X
+Nested column                                       358            367           7          2.8         358.3       0.9X
+Nested column in array                              771            793          23          1.3         770.7       0.4X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
index 42f2d9349d24a..3c57cee485c54 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 168            186          15         49.9          20.0       1.0X
-Double                                              269            286          13         31.2          32.1       0.6X
+Int                                                 211            241          25         39.8          25.2       1.0X
+Double                                              287            303          12         29.3          34.2       0.7X
 
 
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
index eb5e87109dabc..1f8ea79f262be 100644
--- a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -2,11 +2,11 @@
 Write primitive arrays in dataset
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write an array in Dataset:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                 157            173          10         53.4          18.7       1.0X
-Double                                              248            269          13         33.8          29.6       0.6X
+Int                                                 256            291          24         32.8          30.5       1.0X
+Double                                              305            327          15         27.5          36.4       0.8X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
index 16f19bd9bce4c..33fa0ff972d15 100644
--- a/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-jdk21-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         12772          12920         156         41.1          24.4       1.0X
-limit after range                                    19             19           0      27834.0           0.0     678.0X
-filter after range                                 1020           1042          34        514.1           1.9      12.5X
-count after range                                   334            339           6       1572.0           0.6      38.3X
-count after limit after range                        28             31           3      18729.5           0.1     456.2X
+full scan                                         10051          10241         242         52.2          19.2       1.0X
+limit after range                                    36             48          10      14566.1           0.1     279.3X
+filter after range                                 1003           1012           7        522.9           1.9      10.0X
+count after range                                   344            371          42       1522.3           0.7      29.2X
+count after limit after range                        40             50          10      13166.5           0.1     252.4X
 
 
diff --git a/sql/core/benchmarks/RangeBenchmark-results.txt b/sql/core/benchmarks/RangeBenchmark-results.txt
index d6a426e804efd..faca550c9e2d5 100644
--- a/sql/core/benchmarks/RangeBenchmark-results.txt
+++ b/sql/core/benchmarks/RangeBenchmark-results.txt
@@ -2,14 +2,14 @@
 range
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 range:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-full scan                                         13257          13466         272         39.5          25.3       1.0X
-limit after range                                    16             17           1      31952.5           0.0     808.0X
-filter after range                                 1029           1044          23        509.6           2.0      12.9X
-count after range                                   187            191           2       2803.4           0.4      70.9X
-count after limit after range                        27             32           4      19467.1           0.1     492.3X
+full scan                                          9920          10204         190         52.9          18.9       1.0X
+limit after range                                    44             51           7      11786.7           0.1     223.0X
+filter after range                                 1011           1031          16        518.5           1.9       9.8X
+count after range                                   370            372           2       1417.0           0.7      26.8X
+count after limit after range                        47             49           1      11082.6           0.1     209.7X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
index 0950e46adcf30..4125c72bce4ab 100644
--- a/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-jdk21-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8459           8536         108          3.0         338.4       1.0X
-reference Arrays.sort                              2076           2089          17         12.0          83.1       4.1X
-radix sort one byte                                  67             70           2        372.3           2.7     126.0X
-radix sort two bytes                                125            130           4        200.1           5.0      67.7X
-radix sort eight bytes                              470            481           6         53.1          18.8      18.0X
-radix sort key prefix array                         566            572           4         44.2          22.6      14.9X
+reference TimSort key prefix array                 8456           8460           5          3.0         338.3       1.0X
+reference Arrays.sort                              2041           2067          37         12.2          81.6       4.1X
+radix sort one byte                                  68             76           8        368.5           2.7     124.6X
+radix sort two bytes                                125            133           7        200.4           5.0      67.8X
+radix sort eight bytes                              479            494          17         52.2          19.2      17.7X
+radix sort key prefix array                         564            584          33         44.3          22.6      15.0X
 
 
diff --git a/sql/core/benchmarks/SortBenchmark-results.txt b/sql/core/benchmarks/SortBenchmark-results.txt
index 68f6eed9b84cc..533049edd2237 100644
--- a/sql/core/benchmarks/SortBenchmark-results.txt
+++ b/sql/core/benchmarks/SortBenchmark-results.txt
@@ -2,15 +2,15 @@
 radix sort
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 radix sort 25000000:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-reference TimSort key prefix array                 8170           8294         175          3.1         326.8       1.0X
-reference Arrays.sort                              2059           2085          37         12.1          82.4       4.0X
-radix sort one byte                                  64             71           5        391.0           2.6     127.8X
-radix sort two bytes                                117            126           5        213.2           4.7      69.7X
-radix sort eight bytes                              477            493           9         52.4          19.1      17.1X
-radix sort key prefix array                         912            920           7         27.4          36.5       9.0X
+reference TimSort key prefix array                 8140           8157          23          3.1         325.6       1.0X
+reference Arrays.sort                              2063           2087          35         12.1          82.5       3.9X
+radix sort one byte                                  64             73           6        393.0           2.5     128.0X
+radix sort two bytes                                116            129           8        216.1           4.6      70.4X
+radix sort eight bytes                              454            475          16         55.1          18.2      17.9X
+radix sort key prefix array                         885            896          11         28.3          35.4       9.2X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
index 6a42c7b283b7e..4ab5f6d0061cc 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-jdk21-results.txt
@@ -2,143 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                           10             11           1          1.0         968.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              40             42           2          0.2        4033.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             15             16           1          0.7        1502.0       0.6X
+In-memory                                                           10             14           1          1.0        1006.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              43             45           2          0.2        4345.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             15             17           1          0.6        1547.6       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             11           1          1.1         943.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            48             50           1          0.2        4817.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1499.9       0.6X
+In-memory                                                         10             12           1          1.0        1011.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            44             46           1          0.2        4441.2       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.7       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.1         906.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            54             56           1          0.2        5418.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1535.8       0.6X
+In-memory                                                          9             10           1          1.1         940.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            44             46           1          0.2        4425.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1515.2       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                     10             11           1          1.1         951.0       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        57             58           1          0.2        5680.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       16             16           1          0.6        1563.7       0.6X
+In-memory                                                      9             11           2          1.1         932.2       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        44             46           1          0.2        4400.3       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       15             17           1          0.7        1506.0       0.6X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    531            550           7          0.0       53076.7       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   175            183           3          0.1       17475.3       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    532            547           8          0.0       53154.1       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   174            180           3          0.1       17410.5       3.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  522            538           8          0.0       52183.0       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 171            177           3          0.1       17100.7       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                  472            484           5          0.0       47228.8       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 174            179           3          0.1       17433.5       2.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  518            534           7          0.0       51827.6       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 174            179           2          0.1       17358.9       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                                  422            434           5          0.0       42226.0       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 172            179           3          0.1       17235.9       2.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              522            537           6          0.0       52162.9       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             173            181           3          0.1       17259.8       3.0X
+RocksDB (trackTotalNumberOfRows: true)                                              406            419           7          0.0       40646.7       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             173            179           3          0.1       17265.8       2.4X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        1              1           0         17.5          57.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          56             58           1          0.2        5647.4       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1525.5       0.0X
+In-memory                                                                                        0              1           0         27.0          37.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          44             46           1          0.2        4447.0       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             16           1          0.7        1453.0       0.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           0          1.2         826.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        50             51           1          0.2        4955.2       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           0          0.7        1533.6       0.5X
+In-memory                                                                                      8              9           1          1.3         796.5       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4384.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             15           0          0.7        1463.5       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      9             11           1          1.1         892.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4351.5       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1526.5       0.6X
+In-memory                                                                                      9              9           1          1.2         853.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        43             44           1          0.2        4278.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             15           1          0.7        1460.7       0.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  9             10           1          1.1         894.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    41             42           1          0.2        4142.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             16           0          0.7        1509.7       0.6X
+In-memory                                                                                  9             10           2          1.2         854.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    42             44           1          0.2        4183.1       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   15             16           1          0.7        1457.0       0.6X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            9              9           1          1.2         851.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              40             41           1          0.2        4030.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             16             17           1          0.6        1632.1       0.5X
+In-memory                                                                            8              9           0          1.2         837.4       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              41             42           1          0.2        4146.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             16             17           1          0.6        1623.1       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              9           1          1.2         807.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             21             22           1          0.5        2124.6       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                             9             10           0          1.1         940.9       0.9X
+In-memory                                                                           8              9           1          1.3         798.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             22             23           1          0.5        2201.4       0.4X
+RocksDB (trackTotalNumberOfRows: false)                                            10             10           1          1.0         956.5       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           1          1.4         739.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         697.1       1.1X
-RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.2         460.0       1.6X
+In-memory                                                                          7              8           1          1.4         724.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         698.4       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.2         450.9       1.6X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              1           0         23.9          41.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         3              3           0          3.0         328.1       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        3              3           0          3.0         329.5       0.1X
+In-memory                                                                      0              0           0         24.0          41.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         3              3           1          3.2         317.3       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        3              3           0          3.2         317.2       0.1X
 
 
diff --git a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
index 9c99d86e3bfd1..856985b5d071f 100644
--- a/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
+++ b/sql/core/benchmarks/StateStoreBasicOperationsBenchmark-results.txt
@@ -2,143 +2,143 @@
 put rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                            9             10           0          1.1         927.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                              41             42           2          0.2        4063.3       0.2X
-RocksDB (trackTotalNumberOfRows: false)                             15             16           1          0.7        1500.5       0.6X
+In-memory                                                           10             10           1          1.0         953.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                              43             44           2          0.2        4269.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                             16             16           1          0.6        1550.5       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           1          1.1         926.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            49             50           1          0.2        4853.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1495.9       0.6X
+In-memory                                                          9             10           0          1.1         930.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            44             45           1          0.2        4387.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.4       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                          9             10           0          1.1         900.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                            54             55           1          0.2        5359.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                           15             15           1          0.7        1491.9       0.6X
+In-memory                                                          9             10           0          1.1         918.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                            44             45           1          0.2        4441.6       0.2X
+RocksDB (trackTotalNumberOfRows: false)                           15             16           1          0.7        1521.7       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 putting 10000 rows (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                      9             10           0          1.1         899.6       1.0X
-RocksDB (trackTotalNumberOfRows: true)                        55             56           1          0.2        5500.9       0.2X
-RocksDB (trackTotalNumberOfRows: false)                       15             15           0          0.7        1493.8       0.6X
+In-memory                                                      9             10           0          1.1         916.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                        44             45           1          0.2        4413.7       0.2X
+RocksDB (trackTotalNumberOfRows: false)                       15             16           0          0.7        1522.0       0.6X
 
 
 ================================================================================================
 merge rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (10000 rows to overwrite - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                    515            526           6          0.0       51507.8       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                   167            175           3          0.1       16747.6       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                    542            553           6          0.0       54222.4       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                   174            179           3          0.1       17391.9       3.1X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (5000 rows to overwrite - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  516            526           4          0.0       51588.3       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 166            171           3          0.1       16579.3       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                  479            490           5          0.0       47921.1       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 174            179           3          0.1       17446.2       2.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (1000 rows to overwrite - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                                  513            523           5          0.0       51287.0       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                                 165            170           2          0.1       16532.2       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                                  423            433           5          0.0       42311.4       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                                 173            178           3          0.1       17309.1       2.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 merging 10000 rows with 10 values per key (0 rows to overwrite - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------
-RocksDB (trackTotalNumberOfRows: true)                                              513            521           4          0.0       51288.3       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                             165            169           2          0.1       16482.6       3.1X
+RocksDB (trackTotalNumberOfRows: true)                                              408            419           5          0.0       40762.3       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                             174            183           3          0.1       17377.7       2.3X
 
 
 ================================================================================================
 delete rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(10000 rows are non-existing - rate 100):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                        0              0           0         27.9          35.8       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                          54             56           1          0.2        5448.6       0.0X
-RocksDB (trackTotalNumberOfRows: false)                                                         15             15           0          0.7        1458.7       0.0X
+In-memory                                                                                        0              0           0         26.1          38.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                          44             46           1          0.2        4444.2       0.0X
+RocksDB (trackTotalNumberOfRows: false)                                                         15             15           0          0.7        1489.6       0.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(5000 rows are non-existing - rate 50):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              8           0          1.3         772.5       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        48             49           1          0.2        4773.0       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       14             15           0          0.7        1445.6       0.5X
+In-memory                                                                                      8              8           0          1.3         788.8       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        44             45           1          0.2        4425.4       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1499.2       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(1000 rows are non-existing - rate 10):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                      8              9           0          1.2         826.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                        42             43           1          0.2        4198.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                       15             15           0          0.7        1460.1       0.6X
+In-memory                                                                                      8              9           0          1.2         841.3       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                        43             44           1          0.2        4336.9       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                       15             16           1          0.7        1493.6       0.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 trying to delete 10000 rows from 10000 rows(0 rows are non-existing - rate 0):  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                                  8              9           0          1.2         833.2       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                                    40             41           1          0.2        4043.6       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                                   15             15           0          0.7        1457.1       0.6X
+In-memory                                                                                  8              9           0          1.2         848.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                                    42             43           1          0.2        4216.8       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                                   15             15           0          0.7        1467.4       0.6X
 
 
 ================================================================================================
 evict rows
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 10000 rows (maxTimestampToEvictInMillis: 9999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                            8              9           0          1.2         835.1       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                              40             40           0          0.3        3972.7       0.2X
-RocksDB (trackTotalNumberOfRows: false)                                             15             16           0          0.6        1547.2       0.5X
+In-memory                                                                            8              9           0          1.2         836.6       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                              42             43           2          0.2        4182.0       0.2X
+RocksDB (trackTotalNumberOfRows: false)                                             16             17           0          0.6        1645.0       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 5000 rows (maxTimestampToEvictInMillis: 4999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                           8              8           0          1.3         775.3       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             21             22           1          0.5        2130.5       0.4X
-RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         973.2       0.8X
+In-memory                                                                           8              8           0          1.3         785.1       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             23             23           1          0.4        2258.3       0.3X
+RocksDB (trackTotalNumberOfRows: false)                                            10             10           0          1.0         999.7       0.8X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 1000 rows (maxTimestampToEvictInMillis: 999) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                          7              8           0          1.4         704.9       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                             7              7           0          1.4         717.5       1.0X
-RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.1         482.3       1.5X
+In-memory                                                                          7              8           0          1.4         726.0       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                             7              8           0          1.4         736.8       1.0X
+RocksDB (trackTotalNumberOfRows: false)                                            5              5           0          2.1         487.0       1.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1022-azure
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 evicting 0 rows (maxTimestampToEvictInMillis: -1) from 10000 rows:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-In-memory                                                                      0              0           0         23.0          43.4       1.0X
-RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         352.8       0.1X
-RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         353.7       0.1X
+In-memory                                                                      0              0           0         22.8          43.9       1.0X
+RocksDB (trackTotalNumberOfRows: true)                                         4              4           0          2.8         354.8       0.1X
+RocksDB (trackTotalNumberOfRows: false)                                        4              4           0          2.8         353.1       0.1X
 
 
diff --git a/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt b/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt
new file mode 100644
index 0000000000000..04720fb50b41c
--- /dev/null
+++ b/sql/core/benchmarks/StringFunctionsBenchmark-jdk21-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+SQL string functions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 21.0.5+11-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+regexp_replace:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+regexp_replace('*-*', '(\\d+)', 'num')              503            527          32          2.0         503.0       1.0X
+
+
diff --git a/sql/core/benchmarks/StringFunctionsBenchmark-results.txt b/sql/core/benchmarks/StringFunctionsBenchmark-results.txt
new file mode 100644
index 0000000000000..c1b9bdb4ea3da
--- /dev/null
+++ b/sql/core/benchmarks/StringFunctionsBenchmark-results.txt
@@ -0,0 +1,11 @@
+================================================================================================
+SQL string functions
+================================================================================================
+
+OpenJDK 64-Bit Server VM 17.0.13+11-LTS on Linux 6.5.0-1025-azure
+AMD EPYC 7763 64-Core Processor
+regexp_replace:                           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------------------------------
+regexp_replace('*-*', '(\\d+)', 'num')              512            529          24          2.0         512.0       1.0X
+
+
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
index 73165e6568854..be430feb97802 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-jdk21-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            5718           5952         306          0.0    57180602.0       1.0X
-subExprElimination false, codegen: false           5691           5724          36          0.0    56912726.3       1.0X
-subExprElimination true, codegen: true             1296           1353          56          0.0    12955974.7       4.4X
-subExprElimination true, codegen: false            1195           1274          71          0.0    11946584.3       4.8X
+subExprElimination false, codegen: true            6313           6431         120          0.0    63134831.3       1.0X
+subExprElimination false, codegen: false           6093           6348         288          0.0    60930747.6       1.0X
+subExprElimination true, codegen: true             1387           1425          33          0.0    13872525.5       4.6X
+subExprElimination true, codegen: false            1218           1332          99          0.0    12182992.7       5.2X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6036           6207         176          0.0    60362284.0       1.0X
-subExprElimination false, codegen: false           6027           6111         106          0.0    60270452.3       1.0X
-subExprElimination true, codegen: true             1975           2005          50          0.0    19751387.6       3.1X
-subExprElimination true, codegen: false            1844           1969         108          0.0    18442635.2       3.3X
+subExprElimination false, codegen: true            6610           6705          85          0.0    66104698.4       1.0X
+subExprElimination false, codegen: false           6647           6730          76          0.0    66469463.5       1.0X
+subExprElimination true, codegen: true             2077           2126          43          0.0    20769220.1       3.2X
+subExprElimination true, codegen: false            1949           2000          64          0.0    19489004.0       3.4X
 
 
diff --git a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
index 42f2df1de0337..12d602bec17c8 100644
--- a/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
+++ b/sql/core/benchmarks/SubExprEliminationBenchmark-results.txt
@@ -3,23 +3,23 @@ Benchmark for performance of subexpression elimination
 ================================================================================================
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Project:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6332           6606         239          0.0    63318653.1       1.0X
-subExprElimination false, codegen: false           6178           6270         117          0.0    61782941.5       1.0X
-subExprElimination true, codegen: true             1438           1497          64          0.0    14383249.6       4.4X
-subExprElimination true, codegen: false            1382           1415          48          0.0    13817508.7       4.6X
+subExprElimination false, codegen: true            6438           6551          98          0.0    64378783.5       1.0X
+subExprElimination false, codegen: false           6216           6320         175          0.0    62161826.1       1.0X
+subExprElimination true, codegen: true             1480           1518          39          0.0    14799890.8       4.3X
+subExprElimination true, codegen: false            1321           1429          94          0.0    13212919.6       4.9X
 
 Preparing data for benchmarking ...
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 from_json as subExpr in Filter:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-subExprElimination false, codegen: true            6539           6660         105          0.0    65387594.7       1.0X
-subExprElimination false, codegen: false           6548           6584          49          0.0    65477566.0       1.0X
-subExprElimination true, codegen: true             2032           2093          66          0.0    20323994.4       3.2X
-subExprElimination true, codegen: false            2016           2078          69          0.0    20155395.9       3.2X
+subExprElimination false, codegen: true            7107           7310         207          0.0    71066752.8       1.0X
+subExprElimination false, codegen: false           6738           6781          41          0.0    67375897.0       1.1X
+subExprElimination true, codegen: true             2052           2110          51          0.0    20519152.3       3.5X
+subExprElimination true, codegen: false            2053           2079          33          0.0    20526629.8       3.5X
 
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
index 0d13b70e5682e..8a79199cd92ed 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-jdk21-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  405            522         130          1.1         878.3       1.0X
+q1                                                  756            854         165          0.6        1639.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  676            726          47          3.3         302.9       1.0X
+q2                                                  841            929         113          2.7         376.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  184            201          10         16.1          62.0       1.0X
+q3                                                  239            288          37         12.4          80.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 4172           4480         435          1.2         800.6       1.0X
+q4                                                 5033           5148         163          1.0         965.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1050           1064          20          5.4         186.6       1.0X
+q5                                                 1125           1352         321          5.0         199.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                  989           1051          88          3.2         316.9       1.0X
+q6                                                 1095           1121          37          2.8         350.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  498            515          25          9.8         101.9       1.0X
+q7                                                  601            631          48          8.1         122.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  402            426          20          7.7         129.7       1.0X
+q8                                                  453            501          54          6.8         146.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                  872            873           1          0.0    24921608.5       1.0X
+q9                                                  895            929          36          0.0    25559860.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1859           1959         140          1.1         897.9       1.0X
+q10                                                1917           1986          98          1.1         925.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1675           1908         330          2.3         444.0       1.0X
+q11                                                1897           2025         181          2.0         502.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 143            167          24          5.7         176.8       1.0X
+q12                                                 164            205          47          4.9         203.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 724            762          48          6.8         146.8       1.0X
+q13                                                 778            820          49          6.3         157.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               4892           5087         277          1.0         953.6       1.0X
+q14a                                               4952           5091         196          1.0         965.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3769           3856         123          1.4         734.7       1.0X
+q14b                                               3747           3786          56          1.4         730.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 404            441          43          4.1         242.8       1.0X
+q15                                                 443            505          48          3.8         266.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 587            645          53          2.7         375.9       1.0X
+q16                                                 618            652          44          2.5         395.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1372           1376           5          3.4         292.1       1.0X
+q17                                                1531           1632         144          3.1         325.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                 926           1142         304          3.9         257.2       1.0X
+q18                                                1107           1243         193          3.3         307.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 279            304          36         11.2          89.3       1.0X
+q19                                                 299            335          42         10.4          95.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 160            183          25          9.6         104.7       1.0X
+q20                                                 180            205          33          8.5         117.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 623            650          28         19.0          52.6       1.0X
+q21                                                 595            639          32         19.9          50.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3106           3138          46          3.8         262.4       1.0X
+q22                                                3548           3579          43          3.3         299.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               6166           6171           6          0.8        1179.1       1.0X
+q23a                                               6699           6712          20          0.8        1280.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               6289           6440         214          0.8        1202.5       1.0X
+q23b                                               6338           6683         488          0.8        1211.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                206            235          24         16.2          61.7       1.0X
+q24a                                                118            248          53         28.3          35.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                158            232          46         21.1          47.4       1.0X
+q24b                                                214            264          41         15.6          64.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1285           1317          46          3.7         273.5       1.0X
+q25                                                1381           1456         106          3.4         293.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 292            314          23         11.8          84.6       1.0X
+q26                                                 344            378          37         10.0          99.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 516            553          38          9.5         105.5       1.0X
+q27                                                 531            580          41          9.2         108.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1176           1179           5          2.4         408.4       1.0X
+q28                                                1202           1337         190          2.4         417.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1292           1294           2          3.6         275.0       1.0X
+q29                                                1540           1703         230          3.1         327.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 387            415          33          0.8        1313.2       1.0X
+q30                                                 402            447          43          0.7        1364.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                 740            843         125          5.0         198.8       1.0X
+q31                                                 839            851          14          4.4         225.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 183            198          20          8.3         119.8       1.0X
+q32                                                 198            246          53          7.7         129.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 401            432          25         12.9          77.3       1.0X
+q33                                                 405            447          56         12.8          78.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 336            375          32          9.1         109.8       1.0X
+q34                                                 353            384          32          8.7         115.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1231           1240          13          1.7         594.2       1.0X
+q35                                                1296           1367         101          1.6         625.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 494            531          31          6.0         166.4       1.0X
+q36                                                 544            583          43          5.5         183.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 793            811          24         16.7          59.8       1.0X
+q37                                                 765            807          45         17.4          57.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 645            690          43          8.1         123.8       1.0X
+q38                                                 696            740          39          7.5         133.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1410           1507         136          8.4         119.2       1.0X
+q39a                                               1244           1409         233          9.5         105.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1362           1375          19          8.7         115.1       1.0X
+q39b                                               1269           1285          23          9.3         107.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 269            296          29          6.2         160.8       1.0X
+q40                                                 398            446          51          4.2         237.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 147            167          20          0.1        8166.0       1.0X
+q41                                                 140            170          37          0.1        7757.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 146            164          23         20.4          49.1       1.0X
+q42                                                 155            169          24         19.2          52.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 283            305          26         10.4          96.0       1.0X
+q43                                                 308            335          30          9.6         104.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 329            380          62          8.8         113.7       1.0X
+q44                                                 338            403          45          8.6         116.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 167            201          23          5.7         174.0       1.0X
+q45                                                 196            226          31          4.9         204.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 472            504          27          6.6         151.9       1.0X
+q46                                                 462            505          67          6.7         148.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1488           1654         235          2.0         501.0       1.0X
+q47                                                1578           1797         310          1.9         531.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 877            906          27          5.6         178.1       1.0X
+q48                                                 924            945          25          5.3         187.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 647            693          58          8.7         115.2       1.0X
+q49                                                 670            761         131          8.4         119.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 643            676          28          5.0         198.5       1.0X
+q50                                                 784            841          51          4.1         241.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2677           2903         319          1.4         729.0       1.0X
+q51                                                2769           2809          56          1.3         754.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 145            157          16         20.5          48.7       1.0X
+q52                                                 153            186          33         19.5          51.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 276            292          14         10.8          92.9       1.0X
+q53                                                 277            340          52         10.7          93.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1223           1256          47          4.3         231.7       1.0X
+q54                                                1250           1262          17          4.2         236.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 145            168          24         20.5          48.7       1.0X
+q55                                                 154            168          19         19.3          51.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 397            426          29         13.0          76.7       1.0X
+q56                                                 408            473          67         12.7          78.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 752            812          62          2.0         491.3       1.0X
+q57                                                 778            837          65          2.0         508.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 396            437          38         13.0          77.2       1.0X
+q58                                                 412            543         195         12.4          80.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 582            633          61          5.1         197.0       1.0X
+q59                                                 669            747         118          4.4         226.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 405            467          59         12.8          78.2       1.0X
+q60                                                 427            473          27         12.1          82.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 567            695         169          5.5         181.6       1.0X
+q61                                                 556            586          30          5.6         178.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 166            185          25          4.8         210.1       1.0X
+q62                                                 183            204          25          4.3         230.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 267            308          25         11.1          90.0       1.0X
+q63                                                 281            301          18         10.6          94.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2323           2600         392          3.0         335.6       1.0X
+q64                                                2377           2586         296          2.9         343.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 680            704          35          4.4         228.8       1.0X
+q65                                                 707            782          71          4.2         237.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 518            579          65          4.5         223.5       1.0X
+q66                                                 510            565          54          4.5         219.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5487           5527          57          0.5        1847.0       1.0X
+q67                                                5734           5829         134          0.5        1930.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 485            505          22          6.4         155.8       1.0X
+q68                                                 539            559          20          5.8         173.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1592           1605          17          1.3         768.9       1.0X
+q69                                                1756           1826          99          1.2         848.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 557            594          34          5.3         188.8       1.0X
+q70                                                 587            620          35          5.0         198.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 361            389          31         14.5          69.2       1.0X
+q71                                                 352            394          43         14.8          67.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                              111274         114140        4054          0.1        7250.1       1.0X
+q72                                              136543         138257        2425          0.1        8896.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 315            352          36          9.7         103.0       1.0X
+q73                                                 349            380          32          8.8         114.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1104           1493         550          3.4         292.6       1.0X
+q74                                                1353           1607         359          2.8         358.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1291           1479         266          4.4         229.1       1.0X
+q75                                                1482           1683         285          3.8         263.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 286            304          25         18.0          55.7       1.0X
+q76                                                 298            335          28         17.2          58.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 452            545          83         12.4          80.5       1.0X
+q77                                                 610            791         184          9.2         108.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                1995           2312         450          2.8         355.2       1.0X
+q78                                                2709           2767          82          2.1         482.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 427            454          38          7.2         139.5       1.0X
+q79                                                 422            449          29          7.3         137.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1196           1286         127          4.7         211.9       1.0X
+q80                                                1318           1555         335          4.3         233.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 335            364          39          1.1         914.5       1.0X
+q81                                                 371            421          42          1.0        1012.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1066           1075          13         13.8          72.4       1.0X
+q82                                                1019           1064          64         14.4          69.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 239            267          18          2.5         401.0       1.0X
+q83                                                 230            286          27          2.6         387.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 629            657          32          3.8         265.9       1.0X
+q84                                                 685            705          25          3.5         289.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                1810           2045         333          1.6         638.5       1.0X
+q85                                                1656           1798         200          1.7         584.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 176            194          23          4.6         217.4       1.0X
+q86                                                 184            204          30          4.4         227.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 650            705          54          8.0         124.7       1.0X
+q87                                                 660            727          65          7.9         126.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1182           1328         205          2.5         397.7       1.0X
+q88                                                1241           1402         229          2.4         417.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 284            314          33         10.5          95.7       1.0X
+q89                                                 309            357          53          9.6         103.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 118            143          23          6.9         145.8       1.0X
+q90                                                 132            159          19          6.1         162.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 324            346          20          7.1         141.2       1.0X
+q91                                                 327            365          38          7.0         142.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 125            151          24          6.5         154.3       1.0X
+q92                                                 135            160          19          6.0         166.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 380            405          28          8.3         120.0       1.0X
+q93                                                 529            549          38          6.0         166.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 312            341          38          2.7         371.0       1.0X
+q94                                                 307            364          42          2.7         364.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5317           5518         285          0.2        6314.6       1.0X
+q95                                                5173           5213          57          0.2        6143.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 163            181          24         18.3          54.8       1.0X
+q96                                                 163            179          17         18.3          54.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1110           1176          93          4.0         252.7       1.0X
+q97                                                1257           1323          93          3.5         286.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 260            277          24         11.4          87.4       1.0X
+q98                                                 268            304          31         11.1          90.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 245            265          24          6.2         161.7       1.0X
+q99                                                 264            295          38          5.7         174.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           1019           1185         234          5.5         181.1       1.0X
+q5a-v2.7                                           1294           1378         120          4.3         229.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                             909            937          26          3.4         291.2       1.0X
+q6-v2.7                                             952            977          22          3.3         305.1       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1665           1721          79          1.2         803.9       1.0X
+q10a-v2.7                                          1806           1867          88          1.1         871.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1547           1844         419          2.4         410.3       1.0X
+q11-v2.7                                           1867           2114         350          2.0         494.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            125            139          21          6.5         153.9       1.0X
+q12-v2.7                                            128            143          18          6.4         157.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           3522           3706         261          1.5         686.6       1.0X
+q14-v2.7                                           3604           3857         358          1.4         702.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          7188           7299         156          0.7        1401.2       1.0X
+q14a-v2.7                                          6933           7124         270          0.7        1351.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          1854           1861          10          1.9         514.8       1.0X
+q18a-v2.7                                          1805           2075         382          2.0         501.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            149            172          35         10.3          97.6       1.0X
+q20-v2.7                                            157            175          20          9.7         102.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          12659          12738         112          0.9        1069.5       1.0X
+q22-v2.7                                          13585          13655          98          0.9        1147.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          1850           1901          72          6.4         156.3       1.0X
+q22a-v2.7                                          2043           2046           4          5.8         172.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            197            239          29         17.0          59.0       1.0X
+q24-v2.7                                            207            238          34         16.1          61.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1122           1150          40          4.4         229.3       1.0X
+q27a-v2.7                                          1452           1553         143          3.4         296.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            338            357          30          9.1         110.4       1.0X
+q34-v2.7                                            363            390          39          8.4         118.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1221           1238          23          1.7         589.8       1.0X
+q35-v2.7                                           1310           1320          14          1.6         632.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1173           1208          49          1.8         566.3       1.0X
+q35a-v2.7                                          1283           1290          10          1.6         619.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           462            493          42          6.4         155.7       1.0X
+q36a-v2.7                                           492            538          44          6.0         165.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1492           1637         205          2.0         502.1       1.0X
+q47-v2.7                                           1573           1759         262          1.9         529.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            541            612          77         10.4          96.4       1.0X
+q49-v2.7                                            560            618          39         10.0          99.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         14021          14459         619          0.3        3818.5       1.0X
+q51a-v2.7                                         15223          15578         502          0.2        4146.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            749            822          81          2.0         489.0       1.0X
+q57-v2.7                                            756            801          50          2.0         494.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2085           2377         413          3.3         301.3       1.0X
+q64-v2.7                                           2553           2715         230          2.7         368.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          6711           7040         464          0.4        2259.1       1.0X
+q67a-v2.7                                          7363           7679         446          0.4        2478.4       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           598            643          49          4.9         202.7       1.0X
+q70a-v2.7                                           630            674          35          4.7         213.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                         112833         114390        2202          0.1        7351.7       1.0X
+q72-v2.7                                         137936         138063         180          0.1        8987.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           1067           1068           0          3.5         283.0       1.0X
+q74-v2.7                                           1287           1678         553          2.9         341.3       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1182           1411         325          4.8         209.8       1.0X
+q75-v2.7                                           1379           1615         333          4.1         244.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                          1302           1356          77          4.3         231.8       1.0X
+q77a-v2.7                                           898           1007         155          6.3         159.8       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           1772           2057         403          3.2         315.5       1.0X
+q78-v2.7                                           2389           2652         372          2.4         425.5       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1412           1593         256          4.0         250.2       1.0X
+q80a-v2.7                                          1616           2053         618          3.5         286.2       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           218            251          35          3.7         268.8       1.0X
+q86a-v2.7                                           237            278          36          3.4         292.6       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            258            276          22         11.5          86.7       1.0X
+q98-v2.7                                            259            296          65         11.5          87.0       1.0X
 
diff --git a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
index d66ea7b619588..4831dffceecd1 100644
--- a/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
+++ b/sql/core/benchmarks/TPCDSQueryBenchmark-results.txt
@@ -1,810 +1,810 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q1                                                  338            403          38          1.4         733.2       1.0X
+q1                                                  600            728         155          0.8        1300.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q2                                                  768            824          74          2.9         344.2       1.0X
+q2                                                  823            845          23          2.7         368.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q3                                                  214            232          16         13.9          71.9       1.0X
+q3                                                  225            259          25         13.2          75.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q4                                                 4002           4315         442          1.3         767.9       1.0X
+q4                                                 4365           4759         557          1.2         837.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5                                                 1343           1497         218          4.2         238.6       1.0X
+q5                                                 1027           1178         214          5.5         182.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6                                                  953           1004          72          3.3         305.4       1.0X
+q6                                                 1062           1102          56          2.9         340.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q7                                                  554            568          14          8.8         113.3       1.0X
+q7                                                  583            611          30          8.4         119.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q8                                                  475            498          28          6.5         153.1       1.0X
+q8                                                  462            483          23          6.7         149.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q9                                                  818            930         101          0.0    23364476.2       1.0X
+q9                                                  878            890          11          0.0    25071759.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10                                                1842           2015         245          1.1         889.4       1.0X
+q10                                                1901           2002         143          1.1         917.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11                                                1749           1996         349          2.2         463.8       1.0X
+q11                                                1901           2269         521          2.0         504.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12                                                 196            226          17          4.1         241.9       1.0X
+q12                                                 228            263          28          3.5         281.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q13                                                 752            814          54          6.6         152.6       1.0X
+q13                                                 856            885          27          5.8         173.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a                                               4993           5656         938          1.0         973.4       1.0X
+q14a                                               4584           5058         670          1.1         893.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14b                                               3721           3867         207          1.4         725.3       1.0X
+q14b                                               3771           3852         115          1.4         735.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q15                                                 411            441          20          4.0         247.2       1.0X
+q15                                                 383            418          39          4.3         230.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q16                                                 552            635          94          2.8         353.2       1.0X
+q16                                                 645            781         127          2.4         412.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q17                                                1419           1443          33          3.3         302.0       1.0X
+q17                                                1451           1466          21          3.2         308.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18                                                1100           1167          95          3.3         305.4       1.0X
+q18                                                1305           1446         200          2.8         362.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q19                                                 327            343          18          9.6         104.7       1.0X
+q19                                                 359            415          50          8.7         114.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20                                                 195            238          31          7.8         127.6       1.0X
+q20                                                 185            203          26          8.3         121.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q21                                                 567            600          28         20.9          47.9       1.0X
+q21                                                 699            727          27         16.9          59.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22                                                3145           3283         195          3.8         265.8       1.0X
+q22                                                3295           3404         154          3.6         278.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23a                                               5658           5959         425          0.9        1081.9       1.0X
+q23a                                               5808           5881         103          0.9        1110.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q23b                                               5790           5814          34          0.9        1107.2       1.0X
+q23b                                               5902           5980         111          0.9        1128.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24a                                                105            224          54         31.7          31.5       1.0X
+q24a                                                146            308          88         22.8          43.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24b                                                218            277          40         15.3          65.3       1.0X
+q24b                                                210            253          43         15.9          62.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q25                                                1298           1321          32          3.6         276.3       1.0X
+q25                                                1249           1294          63          3.8         265.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q26                                                 335            362          26         10.3          97.0       1.0X
+q26                                                 391            436          39          8.8         113.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27                                                 523            550          26          9.4         106.9       1.0X
+q27                                                 515            575          51          9.5         105.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q28                                                1210           1300         126          2.4         420.3       1.0X
+q28                                                1188           1378         268          2.4         412.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q29                                                1229           1245          23          3.8         261.5       1.0X
+q29                                                1239           1246           9          3.8         263.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q30                                                 437            476          28          0.7        1481.3       1.0X
+q30                                                 473            511          36          0.6        1606.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q31                                                 963           1225         371          3.9         258.7       1.0X
+q31                                                1043           1236         273          3.6         280.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q32                                                 226            248          13          6.8         147.8       1.0X
+q32                                                 254            310          63          6.0         165.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q33                                                 395            461          49         13.1          76.2       1.0X
+q33                                                 484            537          38         10.7          93.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34                                                 327            350          26          9.4         106.7       1.0X
+q34                                                 362            393          55          8.4         118.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35                                                1228           1234           7          1.7         593.2       1.0X
+q35                                                1405           1427          31          1.5         678.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36                                                 549            558          13          5.4         184.9       1.0X
+q36                                                 559            580          18          5.3         188.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q37                                                 772            813          37         17.2          58.1       1.0X
+q37                                                 931            945          23         14.3          70.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q38                                                 731            907         202          7.1         140.2       1.0X
+q38                                                 759            830          85          6.9         145.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39a                                               1287           1442         219          9.2         108.7       1.0X
+q39a                                               1611           1833         314          7.3         136.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q39b                                               1247           1289          60          9.5         105.4       1.0X
+q39b                                               1526           1610         118          7.8         129.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q40                                                 323            348          23          5.2         192.8       1.0X
+q40                                                 331            358          19          5.1         197.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q41                                                 171            193          17          0.1        9511.7       1.0X
+q41                                                 164            180          13          0.1        9114.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q42                                                 171            187          14         17.3          57.7       1.0X
+q42                                                 169            196          20         17.6          57.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q43                                                 302            323          19          9.8         102.2       1.0X
+q43                                                 330            344          15          9.0         111.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q44                                                 313            335          19          9.3         108.1       1.0X
+q44                                                 401            431          38          7.2         138.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q45                                                 189            233          32          5.1         196.5       1.0X
+q45                                                 207            249          24          4.6         215.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q46                                                 498            512          10          6.2         160.1       1.0X
+q46                                                 523            534          14          5.9         168.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47                                                1538           1758         310          1.9         517.8       1.0X
+q47                                                1714           1776          88          1.7         576.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q48                                                 847            873          23          5.8         172.1       1.0X
+q48                                                 887            897          13          5.6         180.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49                                                 672            741          59          8.4         119.7       1.0X
+q49                                                 681            732          49          8.2         121.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q50                                                 668            686          24          4.9         206.1       1.0X
+q50                                                 672            694          37          4.8         207.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51                                                2662           2825         231          1.4         725.1       1.0X
+q51                                                2761           2791          41          1.3         752.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q52                                                 151            173          12         19.7          50.8       1.0X
+q52                                                 159            174          17         18.6          53.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q53                                                 255            287          33         11.6          85.9       1.0X
+q53                                                 279            320          31         10.7          93.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q54                                                1366           1388          31          3.9         258.7       1.0X
+q54                                                1304           1308           5          4.0         247.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q55                                                 153            170          17         19.5          51.4       1.0X
+q55                                                 161            186          13         18.5          54.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q56                                                 445            501          50         11.6          85.8       1.0X
+q56                                                 525            550          22          9.9         101.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57                                                 744            798          47          2.1         486.1       1.0X
+q57                                                 761            854         105          2.0         496.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q58                                                 474            529          60         10.8          92.4       1.0X
+q58                                                 483            529          39         10.6          94.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q59                                                 636            673          36          4.6         215.4       1.0X
+q59                                                 680            692          14          4.3         230.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q60                                                 537            627         141          9.6         103.6       1.0X
+q60                                                 500            565          49         10.4          96.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q61                                                 612            630          17          5.1         196.1       1.0X
+q61                                                 579            630          56          5.4         185.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q62                                                 185            204          11          4.3         233.2       1.0X
+q62                                                 190            215          18          4.2         239.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q63                                                 256            289          49         11.6          86.2       1.0X
+q63                                                 268            282          14         11.1          90.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64                                                2327           2744         590          3.0         336.2       1.0X
+q64                                                2422           2684         371          2.9         350.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q65                                                 588            606          30          5.1         197.8       1.0X
+q65                                                 767            815          45          3.9         258.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q66                                                 510            538          34          4.5         220.0       1.0X
+q66                                                 681            707          26          3.4         293.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67                                                5165           5225          84          0.6        1738.6       1.0X
+q67                                                5411           5483         101          0.5        1821.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q68                                                 480            504          21          6.5         154.4       1.0X
+q68                                                 511            535          24          6.1         164.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q69                                                1648           1648           1          1.3         795.8       1.0X
+q69                                                1532           1706         247          1.4         739.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70                                                 621            634          10          4.8         210.2       1.0X
+q70                                                 548            560           9          5.4         185.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q71                                                 425            446          21         12.3          81.5       1.0X
+q71                                                 408            420          15         12.8          78.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72                                              109565         110431        1224          0.1        7138.7       1.0X
+q72                                               93843          95348        2129          0.2        6114.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q73                                                 371            384          11          8.2         121.4       1.0X
+q73                                                 389            404          11          7.9         127.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74                                                1243           1519         390          3.0         329.7       1.0X
+q74                                                1330           1616         405          2.8         352.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75                                                1365           1649         402          4.1         242.4       1.0X
+q75                                                1481           1759         394          3.8         262.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q76                                                 320            344          23         16.0          62.4       1.0X
+q76                                                 325            364          30         15.8          63.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77                                                 530            827         277         10.6          94.4       1.0X
+q77                                                 573            780         185          9.8         102.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78                                                1870           2048         251          3.0         333.0       1.0X
+q78                                                2164           2460         420          2.6         385.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q79                                                 431            443          11          7.1         140.9       1.0X
+q79                                                 450            464          11          6.8         147.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80                                                1172           1306         191          4.8         207.5       1.0X
+q80                                                1596           1722         178          3.5         282.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q81                                                 366            386          25          1.0         997.4       1.0X
+q81                                                 408            458          53          0.9        1113.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q82                                                1096           1113          25         13.4          74.4       1.0X
+q82                                                1177           1192          22         12.5          80.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q83                                                 265            288          24          2.2         444.7       1.0X
+q83                                                 284            324          30          2.1         477.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q84                                                 763            782          24          3.1         322.3       1.0X
+q84                                                 776            778           2          3.0         328.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q85                                                1875           2089         301          1.5         661.6       1.0X
+q85                                                1721           2185         656          1.6         607.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86                                                 195            220          13          4.2         240.2       1.0X
+q86                                                 207            227          14          3.9         255.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q87                                                 745            772          34          7.0         142.9       1.0X
+q87                                                 731            791          74          7.1         140.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q88                                                1239           1371         187          2.4         416.7       1.0X
+q88                                                1414           1665         355          2.1         475.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q89                                                 324            363          34          9.2         109.1       1.0X
+q89                                                 346            391          43          8.6         116.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q90                                                 128            144          16          6.4         157.1       1.0X
+q90                                                 146            171          21          5.5         180.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q91                                                 303            327          20          7.6         131.9       1.0X
+q91                                                 365            393          26          6.3         159.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q92                                                 125            149          22          6.5         154.9       1.0X
+q92                                                 153            183          22          5.3         189.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q93                                                 365            375          10          8.7         115.2       1.0X
+q93                                                 430            445           9          7.4         135.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q94                                                 288            299           6          2.9         342.6       1.0X
+q94                                                 356            377          23          2.4         422.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q95                                                5528           5648         169          0.2        6565.0       1.0X
+q95                                                5268           5437         240          0.2        6256.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q96                                                 168            194          22         17.7          56.4       1.0X
+q96                                                 188            207          20         15.8          63.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q97                                                1164           1192          39          3.8         265.0       1.0X
+q97                                                1214           1299         120          3.6         276.4       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98                                                 289            311          12         10.3          97.2       1.0X
+q98                                                 314            351          41          9.5         105.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q99                                                 257            280          14          5.9         169.5       1.0X
+q99                                                 312            321          11          4.8         206.2       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q5a-v2.7                                           1275           1493         309          4.4         226.6       1.0X
+q5a-v2.7                                           1495           1520          36          3.8         265.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q6-v2.7                                            1008           1012           4          3.1         323.1       1.0X
+q6-v2.7                                            1002           1015          18          3.1         321.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q10a-v2.7                                          1754           1765          16          1.2         846.8       1.0X
+q10a-v2.7                                          1792           1914         172          1.2         865.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q11-v2.7                                           1667           1798         186          2.3         442.0       1.0X
+q11-v2.7                                           1809           2158         493          2.1         479.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q12-v2.7                                            140            165          17          5.8         172.7       1.0X
+q12-v2.7                                            146            168          18          5.5         180.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14-v2.7                                           3930           4153         315          1.3         766.1       1.0X
+q14-v2.7                                           4035           4204         239          1.3         786.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q14a-v2.7                                          6341           6841         707          0.8        1236.1       1.0X
+q14a-v2.7                                          7068           7371         429          0.7        1377.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q18a-v2.7                                          1690           1928         336          2.1         469.2       1.0X
+q18a-v2.7                                          2182           2292         156          1.7         605.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q20-v2.7                                            149            163          17         10.2          97.6       1.0X
+q20-v2.7                                            179            193          11          8.5         117.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22-v2.7                                          13001          13080         111          0.9        1098.4       1.0X
+q22-v2.7                                          13689          13818         183          0.9        1156.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q22a-v2.7                                          1890           1968         110          6.3         159.7       1.0X
+q22a-v2.7                                          1981           2114         189          6.0         167.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q24-v2.7                                            200            228          25         16.7          59.9       1.0X
+q24-v2.7                                            228            270          21         14.6          68.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q27a-v2.7                                          1274           1474         283          3.8         260.4       1.0X
+q27a-v2.7                                          1361           1495         189          3.6         278.3       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q34-v2.7                                            362            377          16          8.4         118.4       1.0X
+q34-v2.7                                            403            409           8          7.6         131.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35-v2.7                                           1324           1346          31          1.6         639.4       1.0X
+q35-v2.7                                           1326           1387          87          1.6         640.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q35a-v2.7                                          1271           1288          24          1.6         613.8       1.0X
+q35a-v2.7                                          1296           1311          20          1.6         626.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q36a-v2.7                                           535            543           5          5.6         180.2       1.0X
+q36a-v2.7                                           533            556          16          5.6         179.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q47-v2.7                                           1562           1638         107          1.9         525.9       1.0X
+q47-v2.7                                           1618           1744         178          1.8         544.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q49-v2.7                                            614            651          52          9.1         109.3       1.0X
+q49-v2.7                                            617            669          35          9.1         109.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q51a-v2.7                                         14597          14929         469          0.3        3975.4       1.0X
+q51a-v2.7                                         13989          14478         692          0.3        3809.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q57-v2.7                                            738            777          61          2.1         482.1       1.0X
+q57-v2.7                                            799            867          82          1.9         521.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q64-v2.7                                           2239           2674         615          3.1         323.5       1.0X
+q64-v2.7                                           2391           2749         506          2.9         345.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q67a-v2.7                                          7426           7591         233          0.4        2499.5       1.0X
+q67a-v2.7                                          7040           7300         367          0.4        2369.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q70a-v2.7                                           652            711          60          4.5         220.7       1.0X
+q70a-v2.7                                           702            723          33          4.2         237.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q72-v2.7                                         108642         111301        3761          0.1        7078.6       1.0X
+q72-v2.7                                          92914          94378        2071          0.2        6053.8       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q74-v2.7                                           1000           1158         223          3.8         265.2       1.0X
+q74-v2.7                                           1301           1522         314          2.9         344.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q75-v2.7                                           1435           1692         363          3.9         254.7       1.0X
+q75-v2.7                                           1729           1814         121          3.3         306.9       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q77a-v2.7                                           803            813          17          7.0         142.9       1.0X
+q77a-v2.7                                           809            874          92          6.9         144.1       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q78-v2.7                                           2051           2419         520          2.7         365.2       1.0X
+q78-v2.7                                           2053           2458         573          2.7         365.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q80a-v2.7                                          1670           1814         204          3.4         295.8       1.0X
+q80a-v2.7                                          1612           1662          72          3.5         285.5       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q86a-v2.7                                           258            282          16          3.1         318.7       1.0X
+q86a-v2.7                                           244            278          29          3.3         300.6       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TPCDS:                                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-q98-v2.7                                            288            302          16         10.3          96.8       1.0X
+q98-v2.7                                            289            308          19         10.3          97.2       1.0X
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
index c725476b53778..c746cde05060a 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-jdk21-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                  87             91           4          0.1        8677.0       1.0X
-TakeOrderedAndProject with SMJ for executeCollect             63             70           8          0.2        6290.5       1.4X
+TakeOrderedAndProject with SMJ for doExecute                 214            243          27          0.0       21428.5       1.0X
+TakeOrderedAndProject with SMJ for executeCollect             97            102           4          0.1        9748.1       2.2X
 
 
diff --git a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
index d3b09bc5d8958..1fa4496d6aea0 100644
--- a/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
+++ b/sql/core/benchmarks/TakeOrderedAndProjectBenchmark-results.txt
@@ -2,11 +2,11 @@
 TakeOrderedAndProject
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 TakeOrderedAndProject with SMJ:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-TakeOrderedAndProject with SMJ for doExecute                 107            108           1          0.1       10711.2       1.0X
-TakeOrderedAndProject with SMJ for executeCollect             76             80           5          0.1        7647.4       1.4X
+TakeOrderedAndProject with SMJ for doExecute                 262            286          31          0.0       26165.4       1.0X
+TakeOrderedAndProject with SMJ for executeCollect            107            113           7          0.1       10681.8       2.4X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
index edd607e86e0f4..269fdd7c815a2 100644
--- a/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-jdk21-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9338           9444          88          2.2         445.3       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1602           1622          12         13.1          76.4       5.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11523          11814         140          1.8         549.5       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4612           4824         102          4.5         219.9       2.0X
-RANK (PARTITION: , WindowGroupLimit: false)                               9780           9938          73          2.1         466.4       1.0X
-RANK (PARTITION: , WindowGroupLimit: true)                                1780           1937         122         11.8          84.9       5.2X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                11823          12111         147          1.8         563.8       0.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4739           4857          78          4.4         226.0       2.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9565           9822         134          2.2         456.1       1.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1765           1937         116         11.9          84.1       5.3X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11830          12062         157          1.8         564.1       0.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4751           4899          67          4.4         226.5       2.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9148           9493         278          2.3         436.2       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1698           1731          48         12.4          81.0       5.4X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12103          12157          56          1.7         577.1       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5049           5211          98          4.2         240.8       1.8X
+RANK (PARTITION: , WindowGroupLimit: false)                               9596           9842         149          2.2         457.6       1.0X
+RANK (PARTITION: , WindowGroupLimit: true)                                1896           2059         112         11.1          90.4       4.8X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12338          12642         150          1.7         588.3       0.7X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4985           5179          95          4.2         237.7       1.8X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9389           9628         171          2.2         447.7       1.0X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1849           1900          71         11.3          88.2       4.9X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12027          12393         186          1.7         573.5       0.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5018           5083          47          4.2         239.3       1.8X
 
 
diff --git a/sql/core/benchmarks/TopKBenchmark-results.txt b/sql/core/benchmarks/TopKBenchmark-results.txt
index 8df7b646b3a69..76efbf1397b08 100644
--- a/sql/core/benchmarks/TopKBenchmark-results.txt
+++ b/sql/core/benchmarks/TopKBenchmark-results.txt
@@ -2,21 +2,21 @@
 Top-K Computation
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Benchmark Top-K:                                                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------
-ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9300           9429         180          2.3         443.5       1.0X
-ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1665           1676          11         12.6          79.4       5.6X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12100          12186          61          1.7         577.0       0.8X
-ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4751           4805          36          4.4         226.6       2.0X
-RANK (PARTITION: , WindowGroupLimit: false)                               9883           9971          74          2.1         471.3       0.9X
-RANK (PARTITION: , WindowGroupLimit: true)                                1919           1960          31         10.9          91.5       4.8X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12946          13013          36          1.6         617.3       0.7X
-RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  4751           4809          45          4.4         226.5       2.0X
-DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9882           9953          57          2.1         471.2       0.9X
-DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1932           1974          47         10.9          92.1       4.8X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12891          12989         143          1.6         614.7       0.7X
-DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            4773           4812          23          4.4         227.6       1.9X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: false)                         9462           9625         131          2.2         451.2       1.0X
+ROW_NUMBER (PARTITION: , WindowGroupLimit: true)                          1653           1694          28         12.7          78.8       5.7X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: false)          11977          12058          82          1.8         571.1       0.8X
+ROW_NUMBER (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5021           5081          35          4.2         239.4       1.9X
+RANK (PARTITION: , WindowGroupLimit: false)                              10017          10132          79          2.1         477.6       0.9X
+RANK (PARTITION: , WindowGroupLimit: true)                                1948           1984          22         10.8          92.9       4.9X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)                12477          12533          73          1.7         594.9       0.8X
+RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)                  5033           5090          36          4.2         240.0       1.9X
+DENSE_RANK (PARTITION: , WindowGroupLimit: false)                         9757           9841          63          2.1         465.3       1.0X
+DENSE_RANK (PARTITION: , WindowGroupLimit: true)                          1968           1996          30         10.7          93.8       4.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: false)          12419          12483          47          1.7         592.2       0.8X
+DENSE_RANK (PARTITION: PARTITION BY b, WindowGroupLimit: true)            5060           5128          53          4.1         241.3       1.9X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
index c4126410e8f38..81efa0b9b3a72 100644
--- a/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-jdk21-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off             29             30           1          3.4         290.3       1.0X
-long/nullable int/string to string wholestage on              31             34           5          3.3         305.7       0.9X
+long/nullable int/string to string wholestage off            129            165          50          0.8        1291.3       1.0X
+long/nullable int/string to string wholestage on              64             74           6          1.6         638.6       2.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             22             28           8          4.5         221.8       1.0X
-long/nullable int/string to option wholestage on              23             33           7          4.3         230.5       1.0X
+long/nullable int/string to option wholestage off             47             66          28          2.2         465.1       1.0X
+long/nullable int/string to option wholestage on              34             39           6          2.9         343.2       1.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             16             18           3          6.3         158.0       1.0X
-long/nullable int/string to primitive wholestage on              17             18           2          6.0         165.8       1.0X
+long/nullable int/string to primitive wholestage off             30             31           1          3.3         299.6       1.0X
+long/nullable int/string to primitive wholestage on              28             29           2          3.6         280.4       1.1X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             21             21           0          4.8         207.8       1.0X
-long/nullable int to string wholestage on              24             29           5          4.3         235.0       0.9X
+long/nullable int to string wholestage off             30             31           2          3.4         297.4       1.0X
+long/nullable int to string wholestage on              28             35           5          3.5         283.9       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             14             15           1          6.9         144.0       1.0X
-long/nullable int to option wholestage on              15             17           2          6.8         146.6       1.0X
+long/nullable int to option wholestage off             22             26           6          4.6         219.3       1.0X
+long/nullable int to option wholestage on              21             23           1          4.7         214.0       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             11             15           6          9.5         105.3       1.0X
-long/nullable int to primitive wholestage on              12             13           2          8.3         120.6       0.9X
+long/nullable int to primitive wholestage off             18             19           1          5.6         179.0       1.0X
+long/nullable int to primitive wholestage on              18             19           1          5.6         179.7       1.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                              7              7           0         14.0          71.3       1.0X
-With identity UDF                                    10             12           2         10.1          99.4       0.7X
+Baseline                                             13             19           8          7.9         125.8       1.0X
+With identity UDF                                    16             18           2          6.2         160.4       0.8X
 
 
diff --git a/sql/core/benchmarks/UDFBenchmark-results.txt b/sql/core/benchmarks/UDFBenchmark-results.txt
index 3208259e29b3f..818b51532da74 100644
--- a/sql/core/benchmarks/UDFBenchmark-results.txt
+++ b/sql/core/benchmarks/UDFBenchmark-results.txt
@@ -2,58 +2,58 @@
 UDF with mixed input types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to string wholestage off             32             35           4          3.1         318.8       1.0X
-long/nullable int/string to string wholestage on              31             41           8          3.2         314.3       1.0X
+long/nullable int/string to string wholestage off            165            166           1          0.6        1648.2       1.0X
+long/nullable int/string to string wholestage on              87            114          18          1.1         869.9       1.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to option wholestage off             23             27           6          4.4         226.1       1.0X
-long/nullable int/string to option wholestage on              27             35           6          3.7         272.8       0.8X
+long/nullable int/string to option wholestage off             66             68           3          1.5         659.8       1.0X
+long/nullable int/string to option wholestage on              53             63           6          1.9         525.8       1.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int/string to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------
-long/nullable int/string to primitive wholestage off             18             18           0          5.5         181.5       1.0X
-long/nullable int/string to primitive wholestage on              20             21           2          5.1         196.4       0.9X
+long/nullable int/string to primitive wholestage off             34             40           8          3.0         338.5       1.0X
+long/nullable int/string to primitive wholestage on              28             30           1          3.6         280.4       1.2X
 
 
 ================================================================================================
 UDF with primitive types
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to string:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to string wholestage off             22             22           1          4.6         218.7       1.0X
-long/nullable int to string wholestage on              23             23           0          4.3         232.0       0.9X
+long/nullable int to string wholestage off             28             30           2          3.5         284.0       1.0X
+long/nullable int to string wholestage on              29             33           5          3.4         293.0       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to option:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------
-long/nullable int to option wholestage off             14             15           0          7.0         143.4       1.0X
-long/nullable int to option wholestage on              15             16           2          6.5         153.0       0.9X
+long/nullable int to option wholestage off             22             23           1          4.6         218.1       1.0X
+long/nullable int to option wholestage on              22             23           1          4.5         224.7       1.0X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 long/nullable int to primitive:                Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------
-long/nullable int to primitive wholestage off             12             12           0          8.3         121.0       1.0X
-long/nullable int to primitive wholestage on              13             13           1          7.7         129.5       0.9X
+long/nullable int to primitive wholestage off             16             16           0          6.4         157.3       1.0X
+long/nullable int to primitive wholestage on              18             21           4          5.7         175.0       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 UDF identity overhead:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Baseline                                              8              8           0         13.1          76.3       1.0X
-With identity UDF                                    11             11           0          9.0         110.7       0.7X
+Baseline                                             14             16           1          7.1         141.4       1.0X
+With identity UDF                                    14             16           3          6.9         144.3       1.0X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
index 5283d13abce43..d11fd0406e1b4 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-jdk21-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  73             74           1       2292.6           0.4       1.0X
-Double                                              158            158           0       1063.2           0.9       0.5X
+Int                                                  74             74           1       2281.5           0.4       1.0X
+Double                                              158            158           0       1064.2           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  13             15           2       1608.6           0.6       1.0X
-Double                                               30             34           4        696.9           1.4       0.4X
+Int                                                  12             13           1       1709.6           0.6       1.0X
+Double                                               28             33           4        737.4           1.4       0.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  20             23           2       3090.1           0.3       1.0X
-Double                                               42             47           2       1508.0           0.7       0.5X
+Int                                                  19             21           1       3322.4           0.3       1.0X
+Double                                               39             42           2       1600.5           0.6       0.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     22             24           2       2892.7           0.3       1.0X
-Double                                                  44             47           2       1425.5           0.7       0.5X
+Int                                                     20             24           2       3069.8           0.3       1.0X
+Double                                                  44             49           3       1444.2           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
index af96712ae368f..79032e13c0de3 100644
--- a/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
+++ b/sql/core/benchmarks/UnsafeArrayDataBenchmark-results.txt
@@ -2,32 +2,32 @@
 Benchmark UnsafeArrayData
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Read UnsafeArrayData:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  73             73           0       2313.3           0.4       1.0X
-Double                                              152            156           2       1106.9           0.9       0.5X
+Int                                                  76             76           0       2215.1           0.5       1.0X
+Double                                              158            158           0       1062.9           0.9       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Write UnsafeArrayData:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  12             14           2       1744.6           0.6       1.0X
-Double                                               28             33           3        738.4           1.4       0.4X
+Int                                                  12             14           1       1690.2           0.6       1.0X
+Double                                               31             33           1        687.1           1.5       0.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Get primitive array from UnsafeArrayData:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Int                                                  19             22           2       3335.4           0.3       1.0X
-Double                                               39             44           2       1594.2           0.6       0.5X
+Int                                                  21             23           2       3025.8           0.3       1.0X
+Double                                               45             48           1       1410.8           0.7       0.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Create UnsafeArrayData from primitive array:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ---------------------------------------------------------------------------------------------------------------------------
-Int                                                     20             22           2       3128.0           0.3       1.0X
-Double                                                  42             46           2       1481.7           0.7       0.5X
+Int                                                     22             24           1       2902.4           0.3       1.0X
+Double                                                  46             49           1       1374.3           0.7       0.5X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
index e289715a15dc9..c5756342d99d5 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-jdk21-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                            2              3           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
-To non-nullable StructTypes using non-performant method                       18             18           1          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          789            799          16          0.0      Infinity       0.0X
+To non-nullable StructTypes using performant method                            3              4           1          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                1              2           0          0.0      Infinity       1.9X
+To non-nullable StructTypes using non-performant method                       18             21           2          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          792            836          39          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1182           1264         115          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1280           1280           1          0.0      Infinity       0.9X
+To non-nullable StructTypes using performant method                             1088           1112          34          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 1150           1151           1          0.0      Infinity       0.9X
 
 
diff --git a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
index b05804969c1b6..7bc440e192516 100644
--- a/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
+++ b/sql/core/benchmarks/UpdateFieldsBenchmark-results.txt
@@ -2,25 +2,25 @@
 Add 2 columns and drop 2 columns at 3 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Add 2 columns and drop 2 columns at 3 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------
 To non-nullable StructTypes using performant method                            2              3           1          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                1              2           0          0.0      Infinity       1.3X
-To non-nullable StructTypes using non-performant method                       19             20           2          0.0      Infinity       0.1X
-To nullable StructTypes using non-performant method                          867            899          30          0.0      Infinity       0.0X
+To nullable StructTypes using performant method                                1              1           0          0.0      Infinity       1.4X
+To non-nullable StructTypes using non-performant method                       18             19           2          0.0      Infinity       0.1X
+To nullable StructTypes using non-performant method                          846            885          45          0.0      Infinity       0.0X
 
 
 ================================================================================================
 Add 50 columns and drop 50 columns at 100 different depths of nesting
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Add 50 columns and drop 50 columns at 100 different depths of nesting:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -----------------------------------------------------------------------------------------------------------------------------------------------------
-To non-nullable StructTypes using performant method                             1554           1575          30          0.0      Infinity       1.0X
-To nullable StructTypes using performant method                                 1666           1704          54          0.0      Infinity       0.9X
+To non-nullable StructTypes using performant method                             1087           1109          31          0.0      Infinity       1.0X
+To nullable StructTypes using performant method                                 1123           1190          95          0.0      Infinity       1.0X
 
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
index def6739a917fa..49cf58086a51c 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-jdk21-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9638           9709          87         51.9          19.3       1.0X
-java_long_add_default                                                                 27727          27753          27         18.0          55.5       0.3X
-java_long_add_magic                                                                   11740          11767          31         42.6          23.5       0.8X
-java_long_add_static_magic                                                            11578          11647          85         43.2          23.2       0.8X
-scala_long_add_default                                                                23241          23295          73         21.5          46.5       0.4X
-scala_long_add_magic                                                                  11729          11805         107         42.6          23.5       0.8X
+native_long_add                                                                        9542           9554          11         52.4          19.1       1.0X
+java_long_add_default                                                                 22433          22756         485         22.3          44.9       0.4X
+java_long_add_magic                                                                   11747          11782          44         42.6          23.5       0.8X
+java_long_add_static_magic                                                            11539          11594          48         43.3          23.1       0.8X
+scala_long_add_default                                                                23789          25196        2336         21.0          47.6       0.4X
+scala_long_add_magic                                                                  11714          11758          38         42.7          23.4       0.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        10259          10290          34         48.7          20.5       1.0X
-java_long_add_default                                                                  22285          22378         127         22.4          44.6       0.5X
-java_long_add_magic                                                                    11725          11813          83         42.6          23.5       0.9X
-java_long_add_static_magic                                                              9877           9966         116         50.6          19.8       1.0X
-scala_long_add_default                                                                 22320          22495         187         22.4          44.6       0.5X
-scala_long_add_magic                                                                   11742          11827          77         42.6          23.5       0.9X
+native_long_add                                                                        10296          10347          45         48.6          20.6       1.0X
+java_long_add_default                                                                  22464          23279        1403         22.3          44.9       0.5X
+java_long_add_magic                                                                    11775          11807          33         42.5          23.5       0.9X
+java_long_add_static_magic                                                             10049          10065          16         49.8          20.1       1.0X
+scala_long_add_default                                                                 22436          24439        3455         22.3          44.9       0.5X
+scala_long_add_magic                                                                   11815          11895         108         42.3          23.6       0.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22577          22649         123         22.1          45.2       1.0X
-java_long_add_default                                                                  27897          27935          59         17.9          55.8       0.8X
-java_long_add_magic                                                                    32443          32564         110         15.4          64.9       0.7X
-java_long_add_static_magic                                                             31297          31408         107         16.0          62.6       0.7X
-scala_long_add_default                                                                 26280          26438         200         19.0          52.6       0.9X
-scala_long_add_magic                                                                   32608          32625          17         15.3          65.2       0.7X
+native_long_add                                                                        22445          22924         448         22.3          44.9       1.0X
+java_long_add_default                                                                  26468          26478          10         18.9          52.9       0.8X
+java_long_add_magic                                                                    32917          32937          26         15.2          65.8       0.7X
+java_long_add_static_magic                                                             31424          31496         108         15.9          62.8       0.7X
+scala_long_add_default                                                                 26265          26358         100         19.0          52.5       0.9X
+scala_long_add_magic                                                                   33764          34033         423         14.8          67.5       0.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         21616          21652          43         23.1          43.2       1.0X
-java_long_add_default                                                                   25274          25397         209         19.8          50.5       0.9X
-java_long_add_magic                                                                     31544          31592          53         15.9          63.1       0.7X
-java_long_add_static_magic                                                              30400          30965         492         16.4          60.8       0.7X
-scala_long_add_default                                                                  25277          25394         138         19.8          50.6       0.9X
-scala_long_add_magic                                                                    31560          31711         261         15.8          63.1       0.7X
+native_long_add                                                                         21582          22336        1304         23.2          43.2       1.0X
+java_long_add_default                                                                   25194          25472         475         19.8          50.4       0.9X
+java_long_add_magic                                                                     32678          32727          43         15.3          65.4       0.7X
+java_long_add_static_magic                                                              30357          30481         214         16.5          60.7       0.7X
+scala_long_add_default                                                                  25166          25413         392         19.9          50.3       0.9X
+scala_long_add_magic                                                                    32759          32773          12         15.3          65.5       0.7X
 
diff --git a/sql/core/benchmarks/V2FunctionBenchmark-results.txt b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
index 69bcb6ca79de0..dca57e380c1a2 100644
--- a/sql/core/benchmarks/V2FunctionBenchmark-results.txt
+++ b/sql/core/benchmarks/V2FunctionBenchmark-results.txt
@@ -1,44 +1,44 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        9469          10166         728         52.8          18.9       1.0X
-java_long_add_default                                                                 22104          22180         123         22.6          44.2       0.4X
-java_long_add_magic                                                                   10681          10726          53         46.8          21.4       0.9X
-java_long_add_static_magic                                                            10526          10622          84         47.5          21.1       0.9X
-scala_long_add_default                                                                22671          23034         438         22.1          45.3       0.4X
-scala_long_add_magic                                                                  10662          10703          39         46.9          21.3       0.9X
+native_long_add                                                                        9323           9391          60         53.6          18.6       1.0X
+java_long_add_default                                                                 22346          22797         425         22.4          44.7       0.4X
+java_long_add_magic                                                                   10786          10800          13         46.4          21.6       0.9X
+java_long_add_static_magic                                                            10625          10748         169         47.1          21.2       0.9X
+scala_long_add_default                                                                22788          22840          47         21.9          45.6       0.4X
+scala_long_add_magic                                                                  10709          10767          51         46.7          21.4       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = true:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         9914           9941          27         50.4          19.8       1.0X
-java_long_add_default                                                                  21984          22016          45         22.7          44.0       0.5X
-java_long_add_magic                                                                    10683          10700          25         46.8          21.4       0.9X
-java_long_add_static_magic                                                              9884           9941          60         50.6          19.8       1.0X
-scala_long_add_default                                                                 21936          22057         180         22.8          43.9       0.5X
-scala_long_add_magic                                                                   10677          10997         538         46.8          21.4       0.9X
+native_long_add                                                                         9743           9901         137         51.3          19.5       1.0X
+java_long_add_default                                                                  22268          22278          10         22.5          44.5       0.4X
+java_long_add_magic                                                                    10735          10785          44         46.6          21.5       0.9X
+java_long_add_static_magic                                                              9964          10028          94         50.2          19.9       1.0X
+scala_long_add_default                                                                 21995          22058          63         22.7          44.0       0.4X
+scala_long_add_magic                                                                   10726          10757          42         46.6          21.5       0.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = true codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                        22579          22718         163         22.1          45.2       1.0X
-java_long_add_default                                                                  25854          25927         124         19.3          51.7       0.9X
-java_long_add_magic                                                                    32272          32342          69         15.5          64.5       0.7X
-java_long_add_static_magic                                                             30215          30835         987         16.5          60.4       0.7X
-scala_long_add_default                                                                 26500          26616         161         18.9          53.0       0.9X
-scala_long_add_magic                                                                   32366          32583         317         15.4          64.7       0.7X
+native_long_add                                                                        22837          22861          31         21.9          45.7       1.0X
+java_long_add_default                                                                  28062          28099          41         17.8          56.1       0.8X
+java_long_add_magic                                                                    32026          33081        1131         15.6          64.1       0.7X
+java_long_add_static_magic                                                             32031          32038           8         15.6          64.1       0.7X
+scala_long_add_default                                                                 26219          26263          63         19.1          52.4       0.9X
+scala_long_add_magic                                                                   32113          32182          65         15.6          64.2       0.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 scalar function (long + long) -> long, result_nullable = false codegen = false:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 --------------------------------------------------------------------------------------------------------------------------------------------------------------
-native_long_add                                                                         21710          21832         159         23.0          43.4       1.0X
-java_long_add_default                                                                   25610          25663          49         19.5          51.2       0.8X
-java_long_add_magic                                                                     31550          31580          45         15.8          63.1       0.7X
-java_long_add_static_magic                                                              29780          29820          49         16.8          59.6       0.7X
-scala_long_add_default                                                                  25753          26613        1063         19.4          51.5       0.8X
-scala_long_add_magic                                                                    31546          31702         184         15.8          63.1       0.7X
+native_long_add                                                                         22056          22271         294         22.7          44.1       1.0X
+java_long_add_default                                                                   25840          25884          40         19.3          51.7       0.9X
+java_long_add_magic                                                                     31928          31992          55         15.7          63.9       0.7X
+java_long_add_static_magic                                                              31464          31507          46         15.9          62.9       0.7X
+scala_long_add_default                                                                  25851          25932         107         19.3          51.7       0.9X
+scala_long_add_magic                                                                    32315          32881         629         15.5          64.6       0.7X
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
index f8b9e3744bf27..c4b6ef29d7074 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-jdk21-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              1           0          0.0      618123.0       1.0X
-100 select expressions                                2              3           1          0.0     2251962.0       0.3X
-2500 select expressions                              46             48           4          0.0    46311762.0       0.0X
+1 select expressions                                  1              2           1          0.0     1196151.0       1.0X
+100 select expressions                                2              3           1          0.0     2095800.0       0.6X
+2500 select expressions                              36             39           4          0.0    35701821.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           5              5           1          0.0     4594183.0       1.0X
-1000 columns                                         34             35           4          0.0    33513952.0       0.1X
-10000 columns                                       359            388          38          0.0   359145545.0       0.0X
+100 columns                                           4              5           1          0.0     4384067.0       1.0X
+1000 columns                                         28             29           1          0.0    27845199.0       0.2X
+10000 columns                                       287            294           7          0.0   286788665.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   13             18           5          7.5         134.2       1.0X
-1 cols x 100000 rows (exec in-mem)                   14             17           4          7.4         135.4       1.0X
-1 cols x 100000 rows (read parquet)                  25             34           9          4.1         246.6       0.5X
-1 cols x 100000 rows (write parquet)                 95            106          13          1.1         950.3       0.1X
-100 cols x 1000 rows (read in-mem)                   12             16           5          8.4         118.8       1.1X
-100 cols x 1000 rows (exec in-mem)                   16             19           5          6.3         158.4       0.8X
-100 cols x 1000 rows (read parquet)                  22             28           8          4.6         217.5       0.6X
-100 cols x 1000 rows (write parquet)                 93            102          15          1.1         934.8       0.1X
-2500 cols x 40 rows (read in-mem)                    74             83          11          1.4         739.5       0.2X
-2500 cols x 40 rows (exec in-mem)                   132            150          15          0.8        1324.5       0.1X
-2500 cols x 40 rows (read parquet)                  289            318          32          0.3        2894.9       0.0X
-2500 cols x 40 rows (write parquet)                 152            176          26          0.7        1522.8       0.1X
+1 cols x 100000 rows (read in-mem)                   16             22           5          6.4         157.3       1.0X
+1 cols x 100000 rows (exec in-mem)                   18             23           5          5.6         179.9       0.9X
+1 cols x 100000 rows (read parquet)                  30             37           7          3.3         302.7       0.5X
+1 cols x 100000 rows (write parquet)                 98            106           8          1.0         978.0       0.2X
+100 cols x 1000 rows (read in-mem)                   12             17           4          8.1         123.9       1.3X
+100 cols x 1000 rows (exec in-mem)                   15             19           5          6.5         153.2       1.0X
+100 cols x 1000 rows (read parquet)                  24             30           7          4.1         244.2       0.6X
+100 cols x 1000 rows (write parquet)                 93            103           9          1.1         932.3       0.2X
+2500 cols x 40 rows (read in-mem)                    55             58           4          1.8         545.9       0.3X
+2500 cols x 40 rows (exec in-mem)                   100            107           6          1.0         995.7       0.2X
+2500 cols x 40 rows (read parquet)                  306            308           3          0.3        3060.5       0.1X
+2500 cols x 40 rows (write parquet)                 135            144          10          0.7        1349.9       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   19             24           7          5.4         186.4       1.0X
-1 wide x 100000 rows (exec in-mem)                   20             25           7          4.9         204.7       0.9X
-1 wide x 100000 rows (read parquet)                  21             25           6          4.8         207.1       0.9X
-1 wide x 100000 rows (write parquet)                100            108          11          1.0        1000.3       0.2X
-100 wide x 1000 rows (read in-mem)                   14             17           5          7.0         143.8       1.3X
-100 wide x 1000 rows (exec in-mem)                   23             28           7          4.3         230.8       0.8X
-100 wide x 1000 rows (read parquet)                  21             25           6          4.8         206.8       0.9X
-100 wide x 1000 rows (write parquet)                 96            101           7          1.0         961.9       0.2X
-2500 wide x 40 rows (read in-mem)                    21             24           5          4.8         210.2       0.9X
-2500 wide x 40 rows (exec in-mem)                   233            254          23          0.4        2325.6       0.1X
-2500 wide x 40 rows (read parquet)                   62             68           9          1.6         617.9       0.3X
-2500 wide x 40 rows (write parquet)                 102            111          19          1.0        1022.9       0.2X
+1 wide x 100000 rows (read in-mem)                   20             26           6          5.0         201.6       1.0X
+1 wide x 100000 rows (exec in-mem)                   22             28           7          4.5         223.4       0.9X
+1 wide x 100000 rows (read parquet)                  25             31           8          4.0         249.3       0.8X
+1 wide x 100000 rows (write parquet)                 99            109           9          1.0         992.7       0.2X
+100 wide x 1000 rows (read in-mem)                   15             19           5          6.6         151.9       1.3X
+100 wide x 1000 rows (exec in-mem)                   23             28           6          4.4         229.1       0.9X
+100 wide x 1000 rows (read parquet)                  22             29           7          4.5         223.1       0.9X
+100 wide x 1000 rows (write parquet)                 95            103           7          1.1         947.8       0.2X
+2500 wide x 40 rows (read in-mem)                    23             27           5          4.3         231.0       0.9X
+2500 wide x 40 rows (exec in-mem)                   192            201           7          0.5        1920.9       0.1X
+2500 wide x 40 rows (read parquet)                   68             73           6          1.5         681.4       0.3X
+2500 wide x 40 rows (write parquet)                 102            107           7          1.0        1019.1       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   15             19           6          6.5         154.6       1.0X
-1 deep x 100000 rows (exec in-mem)                   17             20           5          5.7         174.1       0.9X
-1 deep x 100000 rows (read parquet)                  17             23           7          5.7         174.6       0.9X
-1 deep x 100000 rows (write parquet)                 96            106          14          1.0         961.6       0.2X
-100 deep x 1000 rows (read in-mem)                   47             54           9          2.1         466.7       0.3X
-100 deep x 1000 rows (exec in-mem)                  521            538          16          0.2        5211.5       0.0X
-100 deep x 1000 rows (read parquet)                 500            510          13          0.2        5001.6       0.0X
-100 deep x 1000 rows (write parquet)                128            134           6          0.8        1278.6       0.1X
-250 deep x 400 rows (read in-mem)                   221            231           8          0.5        2210.9       0.1X
-250 deep x 400 rows (exec in-mem)                  3301           3306           7          0.0       33011.6       0.0X
-250 deep x 400 rows (read parquet)                 3049           3073          34          0.0       30491.4       0.0X
-250 deep x 400 rows (write parquet)                 298            307          11          0.3        2982.8       0.1X
+1 deep x 100000 rows (read in-mem)                   16             20           6          6.3         158.7       1.0X
+1 deep x 100000 rows (exec in-mem)                   18             20           4          5.6         177.3       0.9X
+1 deep x 100000 rows (read parquet)                  19             23           6          5.4         185.3       0.9X
+1 deep x 100000 rows (write parquet)                 95            104           8          1.1         951.5       0.2X
+100 deep x 1000 rows (read in-mem)                   44             47           3          2.3         444.0       0.4X
+100 deep x 1000 rows (exec in-mem)                  452            466          13          0.2        4520.5       0.0X
+100 deep x 1000 rows (read parquet)                 433            442          13          0.2        4329.1       0.0X
+100 deep x 1000 rows (write parquet)                122            129           6          0.8        1224.3       0.1X
+250 deep x 400 rows (read in-mem)                   191            195           3          0.5        1909.7       0.1X
+250 deep x 400 rows (exec in-mem)                  2893           2909          23          0.0       28927.4       0.0X
+250 deep x 400 rows (read parquet)                 2595           2598           4          0.0       25951.8       0.0X
+250 deep x 400 rows (write parquet)                 268            273           4          0.4        2675.1       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                13             17           5          7.6         131.7       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                15             19           5          6.7         148.2       0.9X
-1 x 1 deep x 100000 rows (read parquet)               16             19           5          6.1         164.3       0.8X
-1 x 1 deep x 100000 rows (write parquet)              94             99           6          1.1         935.0       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                13             15           4          7.6         131.4       1.0X
-128 x 8 deep x 1000 rows (exec in-mem)                27             31           5          3.7         269.8       0.5X
-128 x 8 deep x 1000 rows (read parquet)               20             22           4          4.9         202.9       0.6X
-128 x 8 deep x 1000 rows (write parquet)              93             99           8          1.1         933.8       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               18             20           4          5.6         179.2       0.7X
-1024 x 11 deep x 100 rows (exec in-mem)              143            154          10          0.7        1429.6       0.1X
-1024 x 11 deep x 100 rows (read parquet)              34             37           5          2.9         344.1       0.4X
-1024 x 11 deep x 100 rows (write parquet)             98            102           4          1.0         977.9       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                13             16           4          7.4         134.4       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                15             19           5          6.6         150.9       0.9X
+1 x 1 deep x 100000 rows (read parquet)               18             22           5          5.6         177.6       0.8X
+1 x 1 deep x 100000 rows (write parquet)              92             98           7          1.1         921.7       0.1X
+128 x 8 deep x 1000 rows (read in-mem)                13             16           4          7.4         134.3       1.0X
+128 x 8 deep x 1000 rows (exec in-mem)                25             28           5          4.0         248.9       0.5X
+128 x 8 deep x 1000 rows (read parquet)               21             26           6          4.7         213.3       0.6X
+128 x 8 deep x 1000 rows (write parquet)              91             98           8          1.1         911.6       0.1X
+1024 x 11 deep x 100 rows (read in-mem)               19             22           4          5.3         187.6       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              129            133           6          0.8        1286.3       0.1X
+1024 x 11 deep x 100 rows (read parquet)              36             40           4          2.8         363.4       0.4X
+1024 x 11 deep x 100 rows (write parquet)             96            102          10          1.0         962.5       0.1X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   15             19           5          6.6         151.2       1.0X
-1 wide x 100000 rows (exec in-mem)                   17             20           5          5.8         172.4       0.9X
-1 wide x 100000 rows (read parquet)                  17             19           5          5.8         171.0       0.9X
-1 wide x 100000 rows (write parquet)                 95            105           6          1.0         952.9       0.2X
-100 wide x 1000 rows (read in-mem)                   11             13           4          8.9         112.9       1.3X
-100 wide x 1000 rows (exec in-mem)                   13             15           4          7.8         128.6       1.2X
-100 wide x 1000 rows (read parquet)                  17             20           5          6.0         166.7       0.9X
-100 wide x 1000 rows (write parquet)                 92            101           7          1.1         920.3       0.2X
-2500 wide x 40 rows (read in-mem)                    11             13           4          9.0         110.9       1.4X
-2500 wide x 40 rows (exec in-mem)                    13             14           3          7.9         127.4       1.2X
-2500 wide x 40 rows (read parquet)                   16             19           4          6.1         164.4       0.9X
-2500 wide x 40 rows (write parquet)                  91             98           6          1.1         909.1       0.2X
+1 wide x 100000 rows (read in-mem)                   15             18           4          6.6         151.1       1.0X
+1 wide x 100000 rows (exec in-mem)                   17             19           4          5.8         171.5       0.9X
+1 wide x 100000 rows (read parquet)                  17             21           5          5.8         172.7       0.9X
+1 wide x 100000 rows (write parquet)                 93            103           8          1.1         931.5       0.2X
+100 wide x 1000 rows (read in-mem)                   11             13           4          8.9         112.1       1.3X
+100 wide x 1000 rows (exec in-mem)                   13             15           4          7.8         128.9       1.2X
+100 wide x 1000 rows (read parquet)                  17             21           5          5.9         170.7       0.9X
+100 wide x 1000 rows (write parquet)                 90             98           9          1.1         900.2       0.2X
+2500 wide x 40 rows (read in-mem)                    11             13           3          9.0         111.5       1.4X
+2500 wide x 40 rows (exec in-mem)                    13             16           4          7.7         129.6       1.2X
+2500 wide x 40 rows (read parquet)                   17             19           4          5.9         168.5       0.9X
+2500 wide x 40 rows (write parquet)                  91             98           7          1.1         906.0       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   12             14           3          8.5         117.9       1.0X
-1 wide x 100000 rows (exec in-mem)                   15             17           2          6.5         154.8       0.8X
-1 wide x 100000 rows (read parquet)                  20             23           5          4.9         202.6       0.6X
-1 wide x 100000 rows (write parquet)                 92             97           5          1.1         918.0       0.1X
-100 wide x 1000 rows (read in-mem)                    7              8           2         13.5          74.1       1.6X
-100 wide x 1000 rows (exec in-mem)                    9             10           2         10.7          93.1       1.3X
-100 wide x 1000 rows (read parquet)                  18             21           5          5.5         181.3       0.7X
-100 wide x 1000 rows (write parquet)                 88             91           2          1.1         881.6       0.1X
-2500 wide x 40 rows (read in-mem)                     9             10           2         10.9          91.3       1.3X
-2500 wide x 40 rows (exec in-mem)                    11             12           2          9.1         109.7       1.1X
-2500 wide x 40 rows (read parquet)                   18             20           4          5.6         179.7       0.7X
-2500 wide x 40 rows (write parquet)                  89             97           6          1.1         892.1       0.1X
+1 wide x 100000 rows (read in-mem)                   12             14           3          8.3         121.0       1.0X
+1 wide x 100000 rows (exec in-mem)                   16             18           3          6.3         159.0       0.8X
+1 wide x 100000 rows (read parquet)                  21             24           5          4.7         213.2       0.6X
+1 wide x 100000 rows (write parquet)                 91             96           6          1.1         905.2       0.1X
+100 wide x 1000 rows (read in-mem)                    8              9           3         13.3          75.2       1.6X
+100 wide x 1000 rows (exec in-mem)                   10             12           3         10.4          96.1       1.3X
+100 wide x 1000 rows (read parquet)                  19             21           4          5.3         187.5       0.6X
+100 wide x 1000 rows (write parquet)                 86             90           5          1.2         858.4       0.1X
+2500 wide x 40 rows (read in-mem)                     9             11           2         10.8          92.8       1.3X
+2500 wide x 40 rows (exec in-mem)                    11             13           3          9.0         111.5       1.1X
+2500 wide x 40 rows (read parquet)                   19             22           4          5.2         191.8       0.6X
+2500 wide x 40 rows (write parquet)                  90             94           5          1.1         899.2       0.1X
 
 
diff --git a/sql/core/benchmarks/WideSchemaBenchmark-results.txt b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
index 3272e7a72fcc4..e61b27a7c727f 100644
--- a/sql/core/benchmarks/WideSchemaBenchmark-results.txt
+++ b/sql/core/benchmarks/WideSchemaBenchmark-results.txt
@@ -2,157 +2,157 @@
 parsing large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 parsing large select:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 select expressions                                  1              1           0          0.0      665640.0       1.0X
-100 select expressions                                3              3           1          0.0     2542608.0       0.3X
-2500 select expressions                              53             56           4          0.0    53485744.0       0.0X
+1 select expressions                                  1              2           1          0.0     1177503.0       1.0X
+100 select expressions                                2              3           1          0.0     2179549.0       0.5X
+2500 select expressions                              40             43           4          0.0    39575214.0       0.0X
 
 
 ================================================================================================
 optimize large select expressions
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 optimize large select:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-100 columns                                           5              6           1          0.0     5225216.0       1.0X
-1000 columns                                         38             42           4          0.0    37975149.0       0.1X
-10000 columns                                       395            424          30          0.0   394705382.0       0.0X
+100 columns                                           5              5           1          0.0     4710103.0       1.0X
+1000 columns                                         31             32           1          0.0    30879997.0       0.2X
+10000 columns                                       309            327          10          0.0   309351929.0       0.0X
 
 
 ================================================================================================
 many column field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 many column field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 cols x 100000 rows (read in-mem)                   15             22           5          6.8         146.6       1.0X
-1 cols x 100000 rows (exec in-mem)                   17             25           6          5.8         171.7       0.9X
-1 cols x 100000 rows (read parquet)                  28             38           7          3.6         275.8       0.5X
-1 cols x 100000 rows (write parquet)                103            123          12          1.0        1034.1       0.1X
-100 cols x 1000 rows (read in-mem)                   14             21           5          7.1         141.6       1.0X
-100 cols x 1000 rows (exec in-mem)                   18             25           6          5.6         178.2       0.8X
-100 cols x 1000 rows (read parquet)                  24             34           7          4.1         243.6       0.6X
-100 cols x 1000 rows (write parquet)                106            129          14          0.9        1064.4       0.1X
-2500 cols x 40 rows (read in-mem)                    84            106          10          1.2         842.2       0.2X
-2500 cols x 40 rows (exec in-mem)                   155            170          15          0.6        1546.3       0.1X
-2500 cols x 40 rows (read parquet)                  295            328          41          0.3        2946.7       0.0X
-2500 cols x 40 rows (write parquet)                 165            183          18          0.6        1651.4       0.1X
+1 cols x 100000 rows (read in-mem)                   16             22           3          6.1         163.3       1.0X
+1 cols x 100000 rows (exec in-mem)                   16             22           3          6.1         162.8       1.0X
+1 cols x 100000 rows (read parquet)                  29             36           6          3.4         290.4       0.6X
+1 cols x 100000 rows (write parquet)                 96            105           8          1.0         961.4       0.2X
+100 cols x 1000 rows (read in-mem)                   13             16           3          7.8         128.8       1.3X
+100 cols x 1000 rows (exec in-mem)                   16             19           3          6.3         159.4       1.0X
+100 cols x 1000 rows (read parquet)                  24             28           4          4.2         240.6       0.7X
+100 cols x 1000 rows (write parquet)                 93             98           5          1.1         931.7       0.2X
+2500 cols x 40 rows (read in-mem)                    57             61           4          1.8         566.2       0.3X
+2500 cols x 40 rows (exec in-mem)                   105            108           5          0.9        1054.4       0.2X
+2500 cols x 40 rows (read parquet)                  285            287           2          0.4        2852.5       0.1X
+2500 cols x 40 rows (write parquet)                 136            143           7          0.7        1358.0       0.1X
 
 
 ================================================================================================
 wide shallowly nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide shallowly nested struct field r/w:   Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   21             28           6          4.7         214.5       1.0X
-1 wide x 100000 rows (exec in-mem)                   23             30           6          4.4         229.7       0.9X
-1 wide x 100000 rows (read parquet)                  21             25           4          4.7         210.8       1.0X
-1 wide x 100000 rows (write parquet)                104            110           5          1.0        1036.3       0.2X
-100 wide x 1000 rows (read in-mem)                   15             18           3          6.6         151.7       1.4X
-100 wide x 1000 rows (exec in-mem)                   23             27           4          4.3         233.9       0.9X
-100 wide x 1000 rows (read parquet)                  21             24           3          4.7         211.7       1.0X
-100 wide x 1000 rows (write parquet)                 98            101           4          1.0         979.6       0.2X
-2500 wide x 40 rows (read in-mem)                    21             24           3          4.7         212.7       1.0X
-2500 wide x 40 rows (exec in-mem)                   223            233           7          0.4        2227.4       0.1X
-2500 wide x 40 rows (read parquet)                   65             69           3          1.5         654.4       0.3X
-2500 wide x 40 rows (write parquet)                 104            108           4          1.0        1035.7       0.2X
+1 wide x 100000 rows (read in-mem)                   21             25           3          4.8         208.5       1.0X
+1 wide x 100000 rows (exec in-mem)                   23             26           4          4.4         228.5       0.9X
+1 wide x 100000 rows (read parquet)                  23             28           4          4.3         231.2       0.9X
+1 wide x 100000 rows (write parquet)                100            109           6          1.0        1002.6       0.2X
+100 wide x 1000 rows (read in-mem)                   15             18           4          6.7         148.9       1.4X
+100 wide x 1000 rows (exec in-mem)                   21             25           4          4.7         214.8       1.0X
+100 wide x 1000 rows (read parquet)                  22             26           4          4.6         218.0       1.0X
+100 wide x 1000 rows (write parquet)                 98            102           5          1.0         975.5       0.2X
+2500 wide x 40 rows (read in-mem)                    23             27           3          4.4         227.3       0.9X
+2500 wide x 40 rows (exec in-mem)                   195            199           4          0.5        1951.3       0.1X
+2500 wide x 40 rows (read parquet)                   71             75           5          1.4         707.3       0.3X
+2500 wide x 40 rows (write parquet)                 107            110           4          0.9        1065.6       0.2X
 
 
 ================================================================================================
 deeply nested struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 deeply nested struct field r/w:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 deep x 100000 rows (read in-mem)                   16             18           3          6.2         161.8       1.0X
-1 deep x 100000 rows (exec in-mem)                   18             22           3          5.4         183.6       0.9X
-1 deep x 100000 rows (read parquet)                  19             22           3          5.2         192.1       0.8X
-1 deep x 100000 rows (write parquet)                 99            103           4          1.0         992.0       0.2X
-100 deep x 1000 rows (read in-mem)                   31             34           4          3.2         314.9       0.5X
-100 deep x 1000 rows (exec in-mem)                  479            486           8          0.2        4794.2       0.0X
-100 deep x 1000 rows (read parquet)                 464            469           5          0.2        4643.8       0.0X
-100 deep x 1000 rows (write parquet)                115            119           3          0.9        1146.1       0.1X
-250 deep x 400 rows (read in-mem)                   122            125           2          0.8        1219.8       0.1X
-250 deep x 400 rows (exec in-mem)                  3018           3025          11          0.0       30175.6       0.0X
-250 deep x 400 rows (read parquet)                 2818           2822           6          0.0       28178.4       0.0X
-250 deep x 400 rows (write parquet)                 204            211           5          0.5        2042.2       0.1X
+1 deep x 100000 rows (read in-mem)                   17             20           3          5.8         171.8       1.0X
+1 deep x 100000 rows (exec in-mem)                   18             21           3          5.5         183.1       0.9X
+1 deep x 100000 rows (read parquet)                  19             22           4          5.4         186.1       0.9X
+1 deep x 100000 rows (write parquet)                 96            102           6          1.0         962.2       0.2X
+100 deep x 1000 rows (read in-mem)                   31             33           3          3.2         314.7       0.5X
+100 deep x 1000 rows (exec in-mem)                  462            469           4          0.2        4622.7       0.0X
+100 deep x 1000 rows (read parquet)                 458            465           8          0.2        4576.2       0.0X
+100 deep x 1000 rows (write parquet)                110            116           4          0.9        1100.9       0.2X
+250 deep x 400 rows (read in-mem)                   123            127           4          0.8        1230.2       0.1X
+250 deep x 400 rows (exec in-mem)                  2940           2943           4          0.0       29395.9       0.0X
+250 deep x 400 rows (read parquet)                 2723           2741          25          0.0       27229.1       0.0X
+250 deep x 400 rows (write parquet)                 206            219          11          0.5        2055.2       0.1X
 
 
 ================================================================================================
 bushy struct field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 bushy struct field r/w:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-1 x 1 deep x 100000 rows (read in-mem)                14             15           2          7.2         138.1       1.0X
-1 x 1 deep x 100000 rows (exec in-mem)                16             19           3          6.4         155.6       0.9X
-1 x 1 deep x 100000 rows (read parquet)               17             19           3          5.9         169.3       0.8X
-1 x 1 deep x 100000 rows (write parquet)              95             99           5          1.1         950.2       0.1X
-128 x 8 deep x 1000 rows (read in-mem)                14             15           3          7.4         135.2       1.0X
-128 x 8 deep x 1000 rows (exec in-mem)                27             30           4          3.6         274.2       0.5X
-128 x 8 deep x 1000 rows (read parquet)               21             23           3          4.9         205.5       0.7X
-128 x 8 deep x 1000 rows (write parquet)              95             99           3          1.1         950.6       0.1X
-1024 x 11 deep x 100 rows (read in-mem)               18             20           3          5.6         178.7       0.8X
-1024 x 11 deep x 100 rows (exec in-mem)              152            161           6          0.7        1518.3       0.1X
-1024 x 11 deep x 100 rows (read parquet)              35             37           3          2.9         345.0       0.4X
-1024 x 11 deep x 100 rows (write parquet)            100            106           4          1.0        1003.5       0.1X
+1 x 1 deep x 100000 rows (read in-mem)                13             15           3          7.6         132.0       1.0X
+1 x 1 deep x 100000 rows (exec in-mem)                15             18           4          6.8         147.3       0.9X
+1 x 1 deep x 100000 rows (read parquet)               17             19           3          5.8         172.4       0.8X
+1 x 1 deep x 100000 rows (write parquet)              97            100           4          1.0         965.0       0.1X
+128 x 8 deep x 1000 rows (read in-mem)                15             17           3          6.9         145.3       0.9X
+128 x 8 deep x 1000 rows (exec in-mem)                26             28           3          3.9         257.4       0.5X
+128 x 8 deep x 1000 rows (read parquet)               22             24           3          4.5         221.1       0.6X
+128 x 8 deep x 1000 rows (write parquet)              92             95           5          1.1         916.0       0.1X
+1024 x 11 deep x 100 rows (read in-mem)               19             22           3          5.3         188.5       0.7X
+1024 x 11 deep x 100 rows (exec in-mem)              126            128           2          0.8        1257.4       0.1X
+1024 x 11 deep x 100 rows (read parquet)              37             39           3          2.7         368.9       0.4X
+1024 x 11 deep x 100 rows (write parquet)             97            102           5          1.0         971.1       0.1X
 
 
 ================================================================================================
 wide array field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide array field r/w:                     Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   16             19           4          6.3         159.2       1.0X
-1 wide x 100000 rows (exec in-mem)                   18             21           5          5.6         179.4       0.9X
-1 wide x 100000 rows (read parquet)                  18             21           4          5.5         180.5       0.9X
-1 wide x 100000 rows (write parquet)                 99            105           6          1.0         990.6       0.2X
-100 wide x 1000 rows (read in-mem)                   13             14           2          7.9         127.3       1.3X
-100 wide x 1000 rows (exec in-mem)                   14             16           3          7.4         135.0       1.2X
-100 wide x 1000 rows (read parquet)                  17             19           3          5.7         174.5       0.9X
-100 wide x 1000 rows (write parquet)                 96            101           4          1.0         957.0       0.2X
-2500 wide x 40 rows (read in-mem)                    12             13           2          8.4         118.5       1.3X
-2500 wide x 40 rows (exec in-mem)                    13             14           2          7.7         130.2       1.2X
-2500 wide x 40 rows (read parquet)                   17             20           3          5.8         173.7       0.9X
-2500 wide x 40 rows (write parquet)                  94             99           3          1.1         935.0       0.2X
+1 wide x 100000 rows (read in-mem)                   15             17           3          6.8         147.6       1.0X
+1 wide x 100000 rows (exec in-mem)                   17             19           3          6.0         167.8       0.9X
+1 wide x 100000 rows (read parquet)                  17             20           3          5.9         170.6       0.9X
+1 wide x 100000 rows (write parquet)                 93             96           3          1.1         926.3       0.2X
+100 wide x 1000 rows (read in-mem)                   11             12           3          9.1         109.4       1.3X
+100 wide x 1000 rows (exec in-mem)                   12             14           3          8.0         125.0       1.2X
+100 wide x 1000 rows (read parquet)                  17             19           3          6.0         165.8       0.9X
+100 wide x 1000 rows (write parquet)                 89             94           4          1.1         885.3       0.2X
+2500 wide x 40 rows (read in-mem)                    11             12           3          9.4         106.5       1.4X
+2500 wide x 40 rows (exec in-mem)                    12             14           3          8.2         121.9       1.2X
+2500 wide x 40 rows (read parquet)                   16             18           3          6.2         162.4       0.9X
+2500 wide x 40 rows (write parquet)                  89             94           5          1.1         885.5       0.2X
 
 
 ================================================================================================
 wide map field read and write
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 wide map field r/w:                       Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-1 wide x 100000 rows (read in-mem)                   12             14           2          8.0         124.2       1.0X
-1 wide x 100000 rows (exec in-mem)                   16             17           2          6.2         160.1       0.8X
-1 wide x 100000 rows (read parquet)                  21             24           5          4.8         207.7       0.6X
-1 wide x 100000 rows (write parquet)                 97            104           7          1.0         970.5       0.1X
-100 wide x 1000 rows (read in-mem)                    8              9           2         12.9          77.5       1.6X
-100 wide x 1000 rows (exec in-mem)                   10             12           2         10.0          99.7       1.2X
-100 wide x 1000 rows (read parquet)                  19             21           3          5.2         191.2       0.6X
-100 wide x 1000 rows (write parquet)                 91             95           3          1.1         911.0       0.1X
-2500 wide x 40 rows (read in-mem)                    10             11           1         10.2          98.5       1.3X
-2500 wide x 40 rows (exec in-mem)                    12             13           1          8.2         121.6       1.0X
-2500 wide x 40 rows (read parquet)                   19             21           3          5.2         190.7       0.7X
-2500 wide x 40 rows (write parquet)                  93             99           5          1.1         929.2       0.1X
+1 wide x 100000 rows (read in-mem)                   12             13           2          8.3         120.7       1.0X
+1 wide x 100000 rows (exec in-mem)                   15             16           2          6.8         147.6       0.8X
+1 wide x 100000 rows (read parquet)                  20             26           4          5.0         201.5       0.6X
+1 wide x 100000 rows (write parquet)                 89             93           4          1.1         894.7       0.1X
+100 wide x 1000 rows (read in-mem)                    7              8           1         13.5          74.1       1.6X
+100 wide x 1000 rows (exec in-mem)                    9             10           2         10.9          91.6       1.3X
+100 wide x 1000 rows (read parquet)                  18             20           3          5.6         177.1       0.7X
+100 wide x 1000 rows (write parquet)                 84             87           4          1.2         843.3       0.1X
+2500 wide x 40 rows (read in-mem)                     9             10           1         11.0          91.3       1.3X
+2500 wide x 40 rows (exec in-mem)                    11             12           2          9.2         108.4       1.1X
+2500 wide x 40 rows (read parquet)                   18             20           3          5.6         180.1       0.7X
+2500 wide x 40 rows (write parquet)                  88             92           4          1.1         881.3       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
index b9cfa3a8bd0b4..04f1737afb586 100644
--- a/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-jdk21-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2580           2601          18          0.4        2460.7       1.0X
-split threshold 100                                2137           2154          19          0.5        2038.2       1.2X
-split threshold 1024                               1652           1660          11          0.6        1575.0       1.6X
-split threshold 2048                               1586           1601          12          0.7        1512.4       1.6X
-split threshold 4096                               1715           1727           7          0.6        1635.7       1.5X
-split threshold 8192                               2359           2366           6          0.4        2250.0       1.1X
-split threshold 65536                             20935          21321         294          0.1       19964.9       0.1X
+split threshold 10                                 2606           2701          94          0.4        2485.4       1.0X
+split threshold 100                                2174           2193          24          0.5        2073.8       1.2X
+split threshold 1024                               1652           1662           9          0.6        1575.6       1.6X
+split threshold 2048                               1618           1625           6          0.6        1543.3       1.6X
+split threshold 4096                               1713           1734          12          0.6        1633.6       1.5X
+split threshold 8192                               2321           2336          25          0.5        2213.4       1.1X
+split threshold 65536                             20726          20950         265          0.1       19765.7       0.1X
 
 
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt
index 5dc6dde967ec1..1dda0fdd03fb9 100644
--- a/sql/core/benchmarks/WideTableBenchmark-results.txt
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -2,16 +2,16 @@
 projection on wide table
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 projection on wide table:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-split threshold 10                                 2477           2481           4          0.4        2362.0       1.0X
-split threshold 100                                1985           1996           9          0.5        1892.6       1.2X
-split threshold 1024                               1610           1628          14          0.7        1535.2       1.5X
-split threshold 2048                               1582           1596          12          0.7        1508.8       1.6X
-split threshold 4096                               1664           1672           9          0.6        1587.1       1.5X
-split threshold 8192                               2126           2131           6          0.5        2027.4       1.2X
-split threshold 65536                             21950          22285         234          0.0       20932.7       0.1X
+split threshold 10                                 2543           2625          73          0.4        2425.1       1.0X
+split threshold 100                                2035           2074          30          0.5        1940.5       1.2X
+split threshold 1024                               1641           1658          12          0.6        1565.4       1.5X
+split threshold 2048                               1609           1625          12          0.7        1534.9       1.6X
+split threshold 4096                               1668           1681          15          0.6        1590.4       1.5X
+split threshold 8192                               2119           2153          50          0.5        2021.2       1.2X
+split threshold 65536                             21512          21816         366          0.0       20515.1       0.1X
 
 
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c891763eb4e1a..47c9ca0ea7a1f 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -73,6 +73,19 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql-api_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <type>test-jar</type>
+      <scope>test</scope>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.spark</groupId>
+          <artifactId>spark-connect-shims_${scala.binary.version}</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
       <artifactId>spark-tags_${scala.binary.version}</artifactId>
@@ -157,6 +170,7 @@
     <dependency>
       <groupId>com.google.protobuf</groupId>
       <artifactId>protobuf-java</artifactId>
+      <scope>compile</scope>
     </dependency>
     <dependency>
       <groupId>org.scalacheck</groupId>
@@ -166,7 +180,7 @@
     <dependency>
       <groupId>com.h2database</groupId>
       <artifactId>h2</artifactId>
-      <version>2.3.230</version>
+      <version>2.3.232</version>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -195,7 +209,7 @@
     </dependency>
     <dependency>
       <groupId>com.oracle.database.jdbc</groupId>
-      <artifactId>ojdbc11</artifactId>
+      <artifactId>ojdbc17</artifactId>
       <scope>test</scope>
     </dependency>
     <dependency>
@@ -218,6 +232,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>org.seleniumhq.selenium</groupId>
       <artifactId>selenium-java</artifactId>
@@ -245,6 +269,29 @@
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
     <testOutputDirectory>target/scala-${scala.binary.version}/test-classes</testOutputDirectory>
     <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-shade-plugin</artifactId>
+        <configuration combine.self = "override">
+          <shadedArtifactAttached>false</shadedArtifactAttached>
+          <promoteTransitiveDependencies>true</promoteTransitiveDependencies>
+          <artifactSet>
+            <includes>
+              <include>org.spark-project.spark:unused</include>
+            </includes>
+          </artifactSet>
+          <relocations>
+            <relocation>
+              <pattern>com.google.common</pattern>
+              <shadedPattern>${spark.shade.packageName}.guava</shadedPattern>
+            </relocation>
+            <relocation>
+              <pattern>com.google.protobuf</pattern>
+              <shadedPattern>${spark.shade.packageName}.spark_core.protobuf</shadedPattern>
+            </relocation>
+          </relocations>
+        </configuration>
+      </plugin>
        <!--
             This plugin forces the generation of jar containing sql test classes,
             so that the tests classes of external modules can use them. The two execution profiles
@@ -294,4 +341,69 @@
       </plugin>
     </plugins>
   </build>
+  <profiles>
+    <profile>
+      <id>default-protoc</id>
+      <activation>
+        <property>
+          <name>!skipDefaultProtoc</name>
+        </property>
+      </activation>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <executions>
+              <execution>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <inputDirectories>
+                    <include>src/main/protobuf</include>
+                  </inputDirectories>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+    <profile>
+      <id>user-defined-protoc</id>
+      <properties>
+        <spark.protoc.executable.path>${env.SPARK_PROTOC_EXEC_PATH}</spark.protoc.executable.path>
+      </properties>
+      <build>
+        <plugins>
+          <plugin>
+            <groupId>com.github.os72</groupId>
+            <artifactId>protoc-jar-maven-plugin</artifactId>
+            <version>${protoc-jar-maven-plugin.version}</version>
+            <executions>
+              <execution>
+                <phase>generate-sources</phase>
+                <goals>
+                  <goal>run</goal>
+                </goals>
+                <configuration>
+                  <protocArtifact>com.google.protobuf:protoc:${protobuf.version}</protocArtifact>
+                  <protocVersion>${protobuf.version}</protocVersion>
+                  <protocCommand>${spark.protoc.executable.path}</protocCommand>
+                  <inputDirectories>
+                    <include>src/main/protobuf</include>
+                  </inputDirectories>
+                </configuration>
+              </execution>
+            </executions>
+          </plugin>
+        </plugins>
+      </build>
+    </profile>
+  </profiles>
 </project>
diff --git a/sql/core/src/main/buf.gen.yaml b/sql/core/src/main/buf.gen.yaml
new file mode 100644
index 0000000000000..94da50c2c41c8
--- /dev/null
+++ b/sql/core/src/main/buf.gen.yaml
@@ -0,0 +1,24 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+version: v1
+plugins:
+   # Building the Python build and building the mypy interfaces.
+  - plugin: buf.build/protocolbuffers/python:v28.3
+    out: gen/proto/python
+  - name: mypy
+    out: gen/proto/python
+
diff --git a/sql/core/src/main/buf.work.yaml b/sql/core/src/main/buf.work.yaml
new file mode 100644
index 0000000000000..a02dead420cdf
--- /dev/null
+++ b/sql/core/src/main/buf.work.yaml
@@ -0,0 +1,19 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+version: v1
+directories:
+  - protobuf
diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/AvroCompressionCodec.java b/sql/core/src/main/java/org/apache/spark/sql/avro/AvroCompressionCodec.java
similarity index 100%
rename from connector/avro/src/main/java/org/apache/spark/sql/avro/AvroCompressionCodec.java
rename to sql/core/src/main/java/org/apache/spark/sql/avro/AvroCompressionCodec.java
diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java b/sql/core/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
similarity index 100%
rename from connector/avro/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
rename to sql/core/src/main/java/org/apache/spark/sql/avro/SparkAvroKeyOutputFormat.java
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcCompressionCodec.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcCompressionCodec.java
similarity index 100%
rename from sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcCompressionCodec.java
rename to sql/core/src/main/java/org/apache/spark/sql/execution/datasources/orc/OrcCompressionCodec.java
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
index b6065c24f2ece..70f806ba14f03 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/ParquetVectorUpdaterFactory.java
@@ -954,7 +954,7 @@ public void decodeSingleDictionaryId(
         WritableColumnVector dictionaryIds,
         Dictionary dictionary) {
       Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(offset));
-      values.putByteArray(offset, v.getBytes());
+      values.putByteArray(offset, v.getBytesUnsafe());
     }
   }
 
@@ -1264,7 +1264,7 @@ public void decodeSingleDictionaryId(
         WritableColumnVector dictionaryIds,
         Dictionary dictionary) {
       Binary v = dictionary.decodeToBinary(dictionaryIds.getDictId(offset));
-      values.putByteArray(offset, v.getBytes());
+      values.putByteArray(offset, v.getBytesUnsafe());
     }
   }
 
@@ -1498,7 +1498,7 @@ public void decodeSingleDictionaryId(
         WritableColumnVector dictionaryIds,
         Dictionary dictionary) {
       BigInteger value =
-        new BigInteger(dictionary.decodeToBinary(dictionaryIds.getDictId(offset)).getBytes());
+        new BigInteger(dictionary.decodeToBinary(dictionaryIds.getDictId(offset)).getBytesUnsafe());
       BigDecimal decimal = new BigDecimal(value, parquetScale);
       writeDecimal(offset, values, decimal);
     }
@@ -1526,7 +1526,7 @@ public void readValue(
         int offset,
         WritableColumnVector values,
         VectorizedValuesReader valuesReader) {
-      BigInteger value = new BigInteger(valuesReader.readBinary(arrayLen).getBytes());
+      BigInteger value = new BigInteger(valuesReader.readBinary(arrayLen).getBytesUnsafe());
       BigDecimal decimal = new BigDecimal(value, this.parquetScale);
       writeDecimal(offset, values, decimal);
     }
@@ -1538,7 +1538,7 @@ public void decodeSingleDictionaryId(
         WritableColumnVector dictionaryIds,
         Dictionary dictionary) {
       BigInteger value =
-        new BigInteger(dictionary.decodeToBinary(dictionaryIds.getDictId(offset)).getBytes());
+        new BigInteger(dictionary.decodeToBinary(dictionaryIds.getDictId(offset)).getBytesUnsafe());
       BigDecimal decimal = new BigDecimal(value, this.parquetScale);
       writeDecimal(offset, values, decimal);
     }
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
index 6d00048154a56..d3716ef184476 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/datasources/parquet/SpecificParquetRecordReaderBase.java
@@ -139,12 +139,15 @@ public void initialize(
     // in test case.
     TaskContext taskContext = TaskContext$.MODULE$.get();
     if (taskContext != null) {
-      Option<AccumulatorV2<?, ?>> accu = taskContext.taskMetrics().externalAccums().lastOption();
-      if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
-        @SuppressWarnings("unchecked")
-        AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
-        intAccum.add(fileReader.getRowGroups().size());
-      }
+      taskContext.taskMetrics().withExternalAccums((accums) -> {
+        Option<AccumulatorV2<?, ?>> accu = accums.lastOption();
+        if (accu.isDefined() && accu.get().getClass().getSimpleName().equals("NumRowGroupsAcc")) {
+          @SuppressWarnings("unchecked")
+          AccumulatorV2<Integer, Integer> intAccum = (AccumulatorV2<Integer, Integer>) accu.get();
+          intAccum.add(fileReader.getRowGroups().size());
+        }
+        return null;
+      });
     }
   }
 
diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
index 8b2fc7f5db31e..696e20525cdac 100644
--- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
+++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/WritableColumnVector.java
@@ -72,7 +72,7 @@ public void reset() {
       numNulls = 0;
     }
 
-    if (hugeVectorThreshold > 0 && capacity > hugeVectorThreshold) {
+    if (hugeVectorThreshold > -1 && capacity > hugeVectorThreshold) {
       capacity = defaultCapacity;
       releaseMemory();
       reserveInternal(capacity);
@@ -96,6 +96,11 @@ public void close() {
     releaseMemory();
   }
 
+  @Override
+  public void closeIfNotWritable() {
+    // no-op
+  }
+
   public void reserveAdditional(int additionalCapacity) {
     reserve(elementsAppended + additionalCapacity);
   }
diff --git a/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto b/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto
new file mode 100644
index 0000000000000..4b0477290c8f7
--- /dev/null
+++ b/sql/core/src/main/protobuf/org/apache/spark/sql/execution/streaming/StateMessage.proto
@@ -0,0 +1,235 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+syntax = "proto3";
+
+package org.apache.spark.sql.execution.streaming.state;
+
+message StateRequest {
+  int32 version = 1;
+  oneof method {
+    StatefulProcessorCall statefulProcessorCall = 2;
+    StateVariableRequest stateVariableRequest = 3;
+    ImplicitGroupingKeyRequest implicitGroupingKeyRequest = 4;
+    TimerRequest timerRequest = 5;
+  }
+}
+
+message StateResponse {
+  int32 statusCode = 1;
+  string errorMessage = 2;
+  bytes value = 3;
+}
+
+message StateResponseWithLongTypeVal {
+  int32 statusCode = 1;
+  string errorMessage = 2;
+  int64 value = 3;
+}
+
+message StatefulProcessorCall {
+  oneof method {
+    SetHandleState setHandleState = 1;
+    StateCallCommand getValueState = 2;
+    StateCallCommand getListState = 3;
+    StateCallCommand getMapState = 4;
+    TimerStateCallCommand timerStateCall = 5;
+    StateCallCommand deleteIfExists = 6;
+  }
+}
+
+message StateVariableRequest {
+  oneof method {
+    ValueStateCall valueStateCall = 1;
+    ListStateCall listStateCall = 2;
+    MapStateCall mapStateCall = 3;
+  }
+}
+
+message ImplicitGroupingKeyRequest {
+  oneof method {
+    SetImplicitKey setImplicitKey = 1;
+    RemoveImplicitKey removeImplicitKey = 2;
+  }
+}
+
+message TimerRequest {
+  oneof method {
+    TimerValueRequest timerValueRequest = 1;
+    ExpiryTimerRequest expiryTimerRequest = 2;
+  }
+}
+
+message TimerValueRequest {
+  oneof method {
+    GetProcessingTime getProcessingTimer = 1;
+    GetWatermark getWatermark = 2;
+  }
+}
+
+message ExpiryTimerRequest {
+  int64 expiryTimestampMs = 1;
+}
+
+message GetProcessingTime {
+}
+
+message GetWatermark {
+}
+
+message StateCallCommand {
+  string stateName = 1;
+  string schema = 2;
+  string mapStateValueSchema = 3;
+  TTLConfig ttl = 4;
+}
+
+message TimerStateCallCommand {
+  oneof method {
+    RegisterTimer register = 1;
+    DeleteTimer delete = 2;
+    ListTimers list = 3;
+  }
+}
+
+message ValueStateCall {
+  string stateName = 1;
+  oneof method {
+    Exists exists = 2;
+    Get get = 3;
+    ValueStateUpdate valueStateUpdate = 4;
+    Clear clear = 5;
+  }
+}
+
+message ListStateCall {
+  string stateName = 1;
+  oneof method {
+    Exists exists = 2;
+    ListStateGet listStateGet = 3;
+    ListStatePut listStatePut = 4;
+    AppendValue appendValue = 5;
+    AppendList appendList = 6;
+    Clear clear = 7;
+  }
+}
+
+message MapStateCall {
+  string stateName = 1;
+  oneof method {
+    Exists exists = 2;
+    GetValue getValue = 3;
+    ContainsKey containsKey = 4;
+    UpdateValue updateValue = 5;
+    Iterator iterator = 6;
+    Keys keys = 7;
+    Values values = 8;
+    RemoveKey removeKey = 9;
+    Clear clear = 10;
+  }
+}
+
+message SetImplicitKey {
+  bytes key = 1;
+}
+
+message RemoveImplicitKey {
+}
+
+message Exists {
+}
+
+message Get {
+}
+
+message RegisterTimer {
+  int64 expiryTimestampMs = 1;
+}
+
+message DeleteTimer {
+  int64 expiryTimestampMs = 1;
+}
+
+message ListTimers {
+  string iteratorId = 1;
+}
+
+message ValueStateUpdate {
+  bytes value = 1;
+}
+
+message Clear {
+}
+
+message ListStateGet {
+  string iteratorId = 1;
+}
+
+message ListStatePut {
+}
+
+message AppendValue {
+  bytes value = 1;
+}
+
+message AppendList {
+}
+
+message GetValue {
+  bytes userKey = 1;
+}
+
+message ContainsKey {
+  bytes userKey = 1;
+}
+
+message UpdateValue {
+  bytes userKey = 1;
+  bytes value = 2;
+}
+
+message Iterator {
+  string iteratorId = 1;
+}
+
+message Keys {
+  string iteratorId = 1;
+}
+
+message Values {
+  string iteratorId = 1;
+}
+
+message RemoveKey {
+  bytes userKey = 1;
+}
+
+enum HandleState {
+  CREATED = 0;
+  INITIALIZED = 1;
+  DATA_PROCESSED = 2;
+  TIMER_PROCESSED = 3;
+  CLOSED = 4;
+}
+
+message SetHandleState {
+  HandleState state = 1;
+}
+
+message TTLConfig {
+  int32 durationMs = 1;
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
deleted file mode 100644
index 2f383f45f1f2e..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ /dev/null
@@ -1,1558 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.{Logging, MDC}
-import org.apache.spark.internal.LogKeys.{LEFT_EXPR, RIGHT_EXPR}
-import org.apache.spark.sql.catalyst.analysis._
-import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
-import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
-import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
-import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.functions.lit
-import org.apache.spark.sql.internal.TypedAggUtils
-import org.apache.spark.sql.types._
-import org.apache.spark.util.ArrayImplicits._
-
-private[sql] object Column {
-
-  def apply(colName: String): Column = new Column(colName)
-
-  def apply(expr: Expression): Column = new Column(expr)
-
-  def unapply(col: Column): Option[Expression] = Some(col.expr)
-
-  private[sql] def generateAlias(e: Expression): String = {
-    e match {
-      case a: AggregateExpression if a.aggregateFunction.isInstanceOf[TypedAggregateExpression] =>
-        a.aggregateFunction.toString
-      case expr => toPrettySQL(expr)
-    }
-  }
-
-  private[sql] def stripColumnReferenceMetadata(a: AttributeReference): AttributeReference = {
-    val metadataWithoutId = new MetadataBuilder()
-      .withMetadata(a.metadata)
-      .remove(Dataset.DATASET_ID_KEY)
-      .remove(Dataset.COL_POS_KEY)
-      .build()
-    a.withMetadata(metadataWithoutId)
-  }
-
-  private[sql] def fn(name: String, inputs: Column*): Column = {
-    fn(name, isDistinct = false, ignoreNulls = false, inputs: _*)
-  }
-
-  private[sql] def fn(name: String, isDistinct: Boolean, inputs: Column*): Column = {
-    fn(name, isDistinct = isDistinct, ignoreNulls = false, inputs: _*)
-  }
-
-  private[sql] def fn(
-      name: String,
-      isDistinct: Boolean,
-      ignoreNulls: Boolean,
-      inputs: Column*): Column = withOrigin {
-    Column {
-      UnresolvedFunction(Seq(name), inputs.map(_.expr), isDistinct, ignoreNulls = ignoreNulls)
-    }
-  }
-}
-
-/**
- * A [[Column]] where an [[Encoder]] has been given for the expected input and return type.
- * To create a [[TypedColumn]], use the `as` function on a [[Column]].
- *
- * @tparam T The input type expected for this expression.  Can be `Any` if the expression is type
- *           checked by the analyzer instead of the compiler (i.e. `expr("sum(...)")`).
- * @tparam U The output type of this column.
- *
- * @since 1.6.0
- */
-@Stable
-class TypedColumn[-T, U](
-    expr: Expression,
-    private[sql] val encoder: ExpressionEncoder[U])
-  extends Column(expr) {
-
-  /**
-   * Inserts the specific input type and schema into any expressions that are expected to operate
-   * on a decoded object.
-   */
-  private[sql] def withInputType(
-      inputEncoder: ExpressionEncoder[_],
-      inputAttributes: Seq[Attribute]): TypedColumn[T, U] = {
-    val newExpr = TypedAggUtils.withInputType(expr, inputEncoder, inputAttributes)
-    new TypedColumn[T, U](newExpr, encoder)
-  }
-
-  /**
-   * Gives the [[TypedColumn]] a name (alias).
-   * If the current `TypedColumn` has metadata associated with it, this metadata will be propagated
-   * to the new column.
-   *
-   * @group expr_ops
-   * @since 2.0.0
-   */
-  override def name(alias: String): TypedColumn[T, U] =
-    new TypedColumn[T, U](super.name(alias).expr, encoder)
-
-}
-
-/**
- * A column that will be computed based on the data in a `DataFrame`.
- *
- * A new column can be constructed based on the input columns present in a DataFrame:
- *
- * {{{
- *   df("columnName")            // On a specific `df` DataFrame.
- *   col("columnName")           // A generic column not yet associated with a DataFrame.
- *   col("columnName.field")     // Extracting a struct field
- *   col("`a.column.with.dots`") // Escape `.` in column names.
- *   $"columnName"               // Scala short hand for a named column.
- * }}}
- *
- * [[Column]] objects can be composed to form complex expressions:
- *
- * {{{
- *   $"a" + 1
- *   $"a" === $"b"
- * }}}
- *
- * @note The internal Catalyst expression can be accessed via [[expr]], but this method is for
- * debugging purposes only and can change in any future Spark releases.
- *
- * @groupname java_expr_ops Java-specific expression operators
- * @groupname expr_ops Expression operators
- * @groupname df_ops DataFrame functions
- * @groupname Ungrouped Support functions for DataFrames
- *
- * @since 1.3.0
- */
-@Stable
-class Column(val expr: Expression) extends Logging {
-
-  def this(name: String) = this(withOrigin {
-    name match {
-      case "*" => UnresolvedStar(None)
-      case _ if name.endsWith(".*") =>
-        val parts = UnresolvedAttribute.parseAttributeName(name.dropRight(2))
-        UnresolvedStar(Some(parts))
-      case _ => UnresolvedAttribute.quotedString(name)
-    }
-  })
-
-  private def fn(name: String): Column = {
-    Column.fn(name, this)
-  }
-  private def fn(name: String, other: Column): Column = {
-    Column.fn(name, this, other)
-  }
-  private def fn(name: String, other: Any): Column = {
-    Column.fn(name, this, lit(other))
-  }
-
-  override def toString: String = toPrettySQL(expr)
-
-  override def equals(that: Any): Boolean = that match {
-    case that: Column => that.normalizedExpr() == this.normalizedExpr()
-    case _ => false
-  }
-
-  override def hashCode: Int = this.normalizedExpr().hashCode()
-
-  private def normalizedExpr(): Expression = expr transform {
-    case a: AttributeReference => Column.stripColumnReferenceMetadata(a)
-  }
-
-  /** Creates a column based on the given expression. */
-  private def withExpr(newExpr: => Expression): Column = withOrigin {
-    new Column(newExpr)
-  }
-
-  /**
-   * Returns the expression for this column either with an existing or auto assigned name.
-   */
-  private[sql] def named: NamedExpression = expr match {
-    case expr: NamedExpression => expr
-
-    // Leave an unaliased generator with an empty list of names since the analyzer will generate
-    // the correct defaults after the nested expression's type has been resolved.
-    case g: Generator => MultiAlias(g, Nil)
-
-    // If we have a top level Cast, there is a chance to give it a better alias, if there is a
-    // NamedExpression under this Cast.
-    case c: Cast =>
-      c.transformUp {
-        case c @ Cast(_: NamedExpression, _, _, _) => UnresolvedAlias(c)
-      } match {
-        case ne: NamedExpression => ne
-        case _ => UnresolvedAlias(expr, Some(Column.generateAlias))
-      }
-
-    case expr: Expression => UnresolvedAlias(expr, Some(Column.generateAlias))
-  }
-
-  /**
-   * Provides a type hint about the expected return value of this column.  This information can
-   * be used by operations such as `select` on a [[Dataset]] to automatically convert the
-   * results into the correct JVM types.
-   * @since 1.6.0
-   */
-  def as[U : Encoder]: TypedColumn[Any, U] = new TypedColumn[Any, U](expr, encoderFor[U])
-
-  /**
-   * Extracts a value or values from a complex type.
-   * The following types of extraction are supported:
-   * <ul>
-   * <li>Given an Array, an integer ordinal can be used to retrieve a single value.</li>
-   * <li>Given a Map, a key of the correct type can be used to retrieve an individual value.</li>
-   * <li>Given a Struct, a string fieldName can be used to extract that field.</li>
-   * <li>Given an Array of Structs, a string fieldName can be used to extract filed
-   *    of every struct in that array, and return an Array of fields.</li>
-   * </ul>
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def apply(extraction: Any): Column = withExpr {
-    UnresolvedExtractValue(expr, lit(extraction).expr)
-  }
-
-  /**
-   * Unary minus, i.e. negate the expression.
-   * {{{
-   *   // Scala: select the amount column and negates all values.
-   *   df.select( -df("amount") )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.select( negate(col("amount") );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def unary_- : Column = fn("negative")
-
-  /**
-   * Inversion of boolean expression, i.e. NOT.
-   * {{{
-   *   // Scala: select rows that are not active (isActive === false)
-   *   df.filter( !df("isActive") )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( not(df.col("isActive")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def unary_! : Column = fn("!")
-
-  /**
-   * Equality test.
-   * {{{
-   *   // Scala:
-   *   df.filter( df("colA") === df("colB") )
-   *
-   *   // Java
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( col("colA").equalTo(col("colB")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def ===(other: Any): Column = {
-    val right = lit(other).expr
-    if (this.expr == right) {
-      logWarning(
-        log"Constructing trivially true equals predicate, " +
-          log"'${MDC(LEFT_EXPR, this.expr)} = ${MDC(RIGHT_EXPR, right)}'. " +
-          log"Perhaps you need to use aliases.")
-    }
-    fn("=", other)
-  }
-
-  /**
-   * Equality test.
-   * {{{
-   *   // Scala:
-   *   df.filter( df("colA") === df("colB") )
-   *
-   *   // Java
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( col("colA").equalTo(col("colB")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def equalTo(other: Any): Column = this === other
-
-  /**
-   * Inequality test.
-   * {{{
-   *   // Scala:
-   *   df.select( df("colA") =!= df("colB") )
-   *   df.select( !(df("colA") === df("colB")) )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( col("colA").notEqual(col("colB")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.0.0
-    */
-  def =!= (other: Any): Column = !(this === other)
-
-  /**
-   * Inequality test.
-   * {{{
-   *   // Scala:
-   *   df.select( df("colA") !== df("colB") )
-   *   df.select( !(df("colA") === df("colB")) )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( col("colA").notEqual(col("colB")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-    */
-  @deprecated("!== does not have the same precedence as ===, use =!= instead", "2.0.0")
-  def !== (other: Any): Column = this =!= other
-
-  /**
-   * Inequality test.
-   * {{{
-   *   // Scala:
-   *   df.select( df("colA") !== df("colB") )
-   *   df.select( !(df("colA") === df("colB")) )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.filter( col("colA").notEqual(col("colB")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def notEqual(other: Any): Column = this =!= other
-
-  /**
-   * Greater than.
-   * {{{
-   *   // Scala: The following selects people older than 21.
-   *   people.select( people("age") > 21 )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   people.select( people.col("age").gt(21) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def >(other: Any): Column = fn(">", other)
-
-  /**
-   * Greater than.
-   * {{{
-   *   // Scala: The following selects people older than 21.
-   *   people.select( people("age") > lit(21) )
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   people.select( people.col("age").gt(21) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def gt(other: Any): Column = this > other
-
-  /**
-   * Less than.
-   * {{{
-   *   // Scala: The following selects people younger than 21.
-   *   people.select( people("age") < 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").lt(21) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def <(other: Any): Column = fn("<", other)
-
-  /**
-   * Less than.
-   * {{{
-   *   // Scala: The following selects people younger than 21.
-   *   people.select( people("age") < 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").lt(21) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def lt(other: Any): Column = this < other
-
-  /**
-   * Less than or equal to.
-   * {{{
-   *   // Scala: The following selects people age 21 or younger than 21.
-   *   people.select( people("age") <= 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").leq(21) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def <=(other: Any): Column = fn("<=", other)
-
-  /**
-   * Less than or equal to.
-   * {{{
-   *   // Scala: The following selects people age 21 or younger than 21.
-   *   people.select( people("age") <= 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").leq(21) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def leq(other: Any): Column = this <= other
-
-  /**
-   * Greater than or equal to an expression.
-   * {{{
-   *   // Scala: The following selects people age 21 or older than 21.
-   *   people.select( people("age") >= 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").geq(21) )
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def >=(other: Any): Column = fn(">=", other)
-
-  /**
-   * Greater than or equal to an expression.
-   * {{{
-   *   // Scala: The following selects people age 21 or older than 21.
-   *   people.select( people("age") >= 21 )
-   *
-   *   // Java:
-   *   people.select( people.col("age").geq(21) )
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def geq(other: Any): Column = this >= other
-
-  /**
-   * Equality test that is safe for null values.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def <=>(other: Any): Column = {
-    val right = lit(other).expr
-    if (this.expr == right) {
-      logWarning(
-        log"Constructing trivially true equals predicate, " +
-          log"'${MDC(LEFT_EXPR, this.expr)} <=> ${MDC(RIGHT_EXPR, right)}'. " +
-          log"Perhaps you need to use aliases.")
-    }
-    fn("<=>", other)
-  }
-
-  /**
-   * Equality test that is safe for null values.
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def eqNullSafe(other: Any): Column = this <=> other
-
-  /**
-   * Evaluates a list of conditions and returns one of multiple possible result expressions.
-   * If otherwise is not defined at the end, null is returned for unmatched conditions.
-   *
-   * {{{
-   *   // Example: encoding gender string column into integer.
-   *
-   *   // Scala:
-   *   people.select(when(people("gender") === "male", 0)
-   *     .when(people("gender") === "female", 1)
-   *     .otherwise(2))
-   *
-   *   // Java:
-   *   people.select(when(col("gender").equalTo("male"), 0)
-   *     .when(col("gender").equalTo("female"), 1)
-   *     .otherwise(2))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def when(condition: Column, value: Any): Column = withExpr {
-    this.expr match {
-      case CaseWhen(branches, None) =>
-        CaseWhen(branches :+ ((condition.expr, lit(value).expr)))
-      case CaseWhen(_, Some(_)) =>
-        throw new IllegalArgumentException(
-          "when() cannot be applied once otherwise() is applied")
-      case _ =>
-        throw new IllegalArgumentException(
-          "when() can only be applied on a Column previously generated by when() function")
-    }
-  }
-
-  /**
-   * Evaluates a list of conditions and returns one of multiple possible result expressions.
-   * If otherwise is not defined at the end, null is returned for unmatched conditions.
-   *
-   * {{{
-   *   // Example: encoding gender string column into integer.
-   *
-   *   // Scala:
-   *   people.select(when(people("gender") === "male", 0)
-   *     .when(people("gender") === "female", 1)
-   *     .otherwise(2))
-   *
-   *   // Java:
-   *   people.select(when(col("gender").equalTo("male"), 0)
-   *     .when(col("gender").equalTo("female"), 1)
-   *     .otherwise(2))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def otherwise(value: Any): Column = withExpr {
-    this.expr match {
-      case CaseWhen(branches, None) =>
-        CaseWhen(branches, Option(lit(value).expr))
-      case CaseWhen(_, Some(_)) =>
-        throw new IllegalArgumentException(
-          "otherwise() can only be applied once on a Column previously generated by when()")
-      case _ =>
-        throw new IllegalArgumentException(
-          "otherwise() can only be applied on a Column previously generated by when()")
-    }
-  }
-
-  /**
-   * True if the current column is between the lower bound and upper bound, inclusive.
-   *
-   * @group java_expr_ops
-   * @since 1.4.0
-   */
-  def between(lowerBound: Any, upperBound: Any): Column = {
-    (this >= lowerBound) && (this <= upperBound)
-  }
-
-  /**
-   * True if the current expression is NaN.
-   *
-   * @group expr_ops
-   * @since 1.5.0
-   */
-  def isNaN: Column = fn("isNaN")
-
-  /**
-   * True if the current expression is null.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def isNull: Column = fn("isNull")
-
-  /**
-   * True if the current expression is NOT null.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def isNotNull: Column = fn("isNotNull")
-
-  /**
-   * Boolean OR.
-   * {{{
-   *   // Scala: The following selects people that are in school or employed.
-   *   people.filter( people("inSchool") || people("isEmployed") )
-   *
-   *   // Java:
-   *   people.filter( people.col("inSchool").or(people.col("isEmployed")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def ||(other: Any): Column = fn("or", other)
-
-  /**
-   * Boolean OR.
-   * {{{
-   *   // Scala: The following selects people that are in school or employed.
-   *   people.filter( people("inSchool") || people("isEmployed") )
-   *
-   *   // Java:
-   *   people.filter( people.col("inSchool").or(people.col("isEmployed")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def or(other: Column): Column = this || other
-
-  /**
-   * Boolean AND.
-   * {{{
-   *   // Scala: The following selects people that are in school and employed at the same time.
-   *   people.select( people("inSchool") && people("isEmployed") )
-   *
-   *   // Java:
-   *   people.select( people.col("inSchool").and(people.col("isEmployed")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def &&(other: Any): Column = fn("and", other)
-
-  /**
-   * Boolean AND.
-   * {{{
-   *   // Scala: The following selects people that are in school and employed at the same time.
-   *   people.select( people("inSchool") && people("isEmployed") )
-   *
-   *   // Java:
-   *   people.select( people.col("inSchool").and(people.col("isEmployed")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def and(other: Column): Column = this && other
-
-  /**
-   * Sum of this expression and another expression.
-   * {{{
-   *   // Scala: The following selects the sum of a person's height and weight.
-   *   people.select( people("height") + people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").plus(people.col("weight")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def +(other: Any): Column = fn("+", other)
-
-  /**
-   * Sum of this expression and another expression.
-   * {{{
-   *   // Scala: The following selects the sum of a person's height and weight.
-   *   people.select( people("height") + people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").plus(people.col("weight")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def plus(other: Any): Column = this + other
-
-  /**
-   * Subtraction. Subtract the other expression from this expression.
-   * {{{
-   *   // Scala: The following selects the difference between people's height and their weight.
-   *   people.select( people("height") - people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").minus(people.col("weight")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def -(other: Any): Column = fn("-", other)
-
-  /**
-   * Subtraction. Subtract the other expression from this expression.
-   * {{{
-   *   // Scala: The following selects the difference between people's height and their weight.
-   *   people.select( people("height") - people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").minus(people.col("weight")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def minus(other: Any): Column = this - other
-
-  /**
-   * Multiplication of this expression and another expression.
-   * {{{
-   *   // Scala: The following multiplies a person's height by their weight.
-   *   people.select( people("height") * people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").multiply(people.col("weight")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def *(other: Any): Column = fn("*", other)
-
-  /**
-   * Multiplication of this expression and another expression.
-   * {{{
-   *   // Scala: The following multiplies a person's height by their weight.
-   *   people.select( people("height") * people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").multiply(people.col("weight")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def multiply(other: Any): Column = this * other
-
-  /**
-   * Division this expression by another expression.
-   * {{{
-   *   // Scala: The following divides a person's height by their weight.
-   *   people.select( people("height") / people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").divide(people.col("weight")) );
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def /(other: Any): Column = fn("/", other)
-
-  /**
-   * Division this expression by another expression.
-   * {{{
-   *   // Scala: The following divides a person's height by their weight.
-   *   people.select( people("height") / people("weight") )
-   *
-   *   // Java:
-   *   people.select( people.col("height").divide(people.col("weight")) );
-   * }}}
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def divide(other: Any): Column = this / other
-
-  /**
-   * Modulo (a.k.a. remainder) expression.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def %(other: Any): Column = fn("%", other)
-
-  /**
-   * Modulo (a.k.a. remainder) expression.
-   *
-   * @group java_expr_ops
-   * @since 1.3.0
-   */
-  def mod(other: Any): Column = this % other
-
-  /**
-   * A boolean expression that is evaluated to true if the value of this expression is contained
-   * by the evaluated values of the arguments.
-   *
-   * Note: Since the type of the elements in the list are inferred only during the run time,
-   * the elements will be "up-casted" to the most common type for comparison.
-   * For eg:
-   *   1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the
-   * comparison will look like "String vs String".
-   *   2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the
-   * comparison will look like "Double vs Double"
-   *
-   * @group expr_ops
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def isin(list: Any*): Column = withExpr { In(expr, list.map(lit(_).expr)) }
-
-  /**
-   * A boolean expression that is evaluated to true if the value of this expression is contained
-   * by the provided collection.
-   *
-   * Note: Since the type of the elements in the collection are inferred only during the run time,
-   * the elements will be "up-casted" to the most common type for comparison.
-   * For eg:
-   *   1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the
-   * comparison will look like "String vs String".
-   *   2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the
-   * comparison will look like "Double vs Double"
-   *
-   * @group expr_ops
-   * @since 2.4.0
-   */
-  def isInCollection(values: scala.collection.Iterable[_]): Column = isin(values.toSeq: _*)
-
-  /**
-   * A boolean expression that is evaluated to true if the value of this expression is contained
-   * by the provided collection.
-   *
-   * Note: Since the type of the elements in the collection are inferred only during the run time,
-   * the elements will be "up-casted" to the most common type for comparison.
-   * For eg:
-   *   1) In the case of "Int vs String", the "Int" will be up-casted to "String" and the
-   * comparison will look like "String vs String".
-   *   2) In the case of "Float vs Double", the "Float" will be up-casted to "Double" and the
-   * comparison will look like "Double vs Double"
-   *
-   * @group java_expr_ops
-   * @since 2.4.0
-   */
-  def isInCollection(values: java.lang.Iterable[_]): Column = isInCollection(values.asScala)
-
-  /**
-   * SQL like expression. Returns a boolean column based on a SQL LIKE match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def like(literal: String): Column = fn("like", literal)
-
-  /**
-   * SQL RLIKE expression (LIKE with Regex). Returns a boolean column based on a regex
-   * match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def rlike(literal: String): Column = fn("rlike", literal)
-
-  /**
-   * SQL ILIKE expression (case insensitive LIKE).
-   *
-   * @group expr_ops
-   * @since 3.3.0
-   */
-  def ilike(literal: String): Column = fn("ilike", literal)
-
-  /**
-   * An expression that gets an item at position `ordinal` out of an array,
-   * or gets a value by key `key` in a `MapType`.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def getItem(key: Any): Column = apply(key)
-
-  // scalastyle:off line.size.limit
-  /**
-   * An expression that adds/replaces field in `StructType` by name.
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-   *   df.select($"struct_col".withField("c", lit(3)))
-   *   // result: {"a":1,"b":2,"c":3}
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-   *   df.select($"struct_col".withField("b", lit(3)))
-   *   // result: {"a":1,"b":3}
-   *
-   *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
-   *   df.select($"struct_col".withField("c", lit(3)))
-   *   // result: null of type struct<a:int,b:int,c:int>
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
-   *   df.select($"struct_col".withField("b", lit(100)))
-   *   // result: {"a":1,"b":100,"b":100}
-   *
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".withField("a.c", lit(3)))
-   *   // result: {"a":{"a":1,"b":2,"c":3}}
-   *
-   *   val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-   *   df.select($"struct_col".withField("a.c", lit(3)))
-   *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
-   * }}}
-   *
-   * This method supports adding/replacing nested fields directly e.g.
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".withField("a.c", lit(3)).withField("a.d", lit(4)))
-   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
-   * }}}
-   *
-   * However, if you are going to add/replace multiple nested fields, it is more optimal to extract
-   * out the nested struct before adding/replacing multiple fields e.g.
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".withField("a", $"struct_col.a".withField("c", lit(3)).withField("d", lit(4))))
-   *   // result: {"a":{"a":1,"b":2,"c":3,"d":4}}
-   * }}}
-   *
-   * @group expr_ops
-   * @since 3.1.0
-   */
-  // scalastyle:on line.size.limit
-  def withField(fieldName: String, col: Column): Column = withExpr {
-    require(fieldName != null, "fieldName cannot be null")
-    require(col != null, "col cannot be null")
-    UpdateFields(expr, fieldName, col.expr)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * An expression that drops fields in `StructType` by name.
-   * This is a no-op if schema doesn't contain field name(s).
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-   *   df.select($"struct_col".dropFields("b"))
-   *   // result: {"a":1}
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-   *   df.select($"struct_col".dropFields("c"))
-   *   // result: {"a":1,"b":2}
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'c', 3) struct_col")
-   *   df.select($"struct_col".dropFields("b", "c"))
-   *   // result: {"a":1}
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
-   *   df.select($"struct_col".dropFields("a", "b"))
-   *   // result: org.apache.spark.sql.AnalysisException: [DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS] Cannot resolve "update_fields(struct_col, dropfield(), dropfield())" due to data type mismatch: Cannot drop all fields in struct.;
-   *
-   *   val df = sql("SELECT CAST(NULL AS struct<a:int,b:int>) struct_col")
-   *   df.select($"struct_col".dropFields("b"))
-   *   // result: null of type struct<a:int>
-   *
-   *   val df = sql("SELECT named_struct('a', 1, 'b', 2, 'b', 3) struct_col")
-   *   df.select($"struct_col".dropFields("b"))
-   *   // result: {"a":1}
-   *
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".dropFields("a.b"))
-   *   // result: {"a":{"a":1}}
-   *
-   *   val df = sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
-   *   df.select($"struct_col".dropFields("a.c"))
-   *   // result: org.apache.spark.sql.AnalysisException: Ambiguous reference to fields
-   * }}}
-   *
-   * This method supports dropping multiple nested fields directly e.g.
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".dropFields("a.b", "a.c"))
-   *   // result: {"a":{"a":1}}
-   * }}}
-   *
-   * However, if you are going to drop multiple nested fields, it is more optimal to extract
-   * out the nested struct before dropping multiple fields from it e.g.
-   *
-   * {{{
-   *   val df = sql("SELECT named_struct('a', named_struct('a', 1, 'b', 2)) struct_col")
-   *   df.select($"struct_col".withField("a", $"struct_col.a".dropFields("b", "c")))
-   *   // result: {"a":{"a":1}}
-   * }}}
-   *
-   * @group expr_ops
-   * @since 3.1.0
-   */
-  // scalastyle:on line.size.limit
-  def dropFields(fieldNames: String*): Column = withExpr {
-    fieldNames.tail.foldLeft(UpdateFields(expr, fieldNames.head)) {
-      (resExpr, fieldName) => UpdateFields(resExpr, fieldName)
-    }
-  }
-
-  /**
-   * An expression that gets a field by name in a `StructType`.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def getField(fieldName: String): Column = apply(fieldName)
-
-  /**
-   * An expression that returns a substring.
-   * @param startPos expression for the starting position.
-   * @param len expression for the length of the substring.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def substr(startPos: Column, len: Column): Column = Column.fn("substr", this, startPos, len)
-
-  /**
-   * An expression that returns a substring.
-   * @param startPos starting position.
-   * @param len length of the substring.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def substr(startPos: Int, len: Int): Column = substr(lit(startPos), lit(len))
-
-  /**
-   * Contains the other element. Returns a boolean column based on a string match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def contains(other: Any): Column = fn("contains", other)
-
-  /**
-   * String starts with. Returns a boolean column based on a string match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def startsWith(other: Column): Column = fn("startswith", other)
-
-  /**
-   * String starts with another string literal. Returns a boolean column based on a string match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def startsWith(literal: String): Column = startsWith(lit(literal))
-
-  /**
-   * String ends with. Returns a boolean column based on a string match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def endsWith(other: Column): Column = fn("endswith", other)
-
-  /**
-   * String ends with another string literal. Returns a boolean column based on a string match.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def endsWith(literal: String): Column = endsWith(lit(literal))
-
-  /**
-   * Gives the column an alias. Same as `as`.
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select($"colA".alias("colB"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def alias(alias: String): Column = name(alias)
-
-  /**
-   * Gives the column an alias.
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select($"colA".as("colB"))
-   * }}}
-   *
-   * If the current column has metadata associated with it, this metadata will be propagated
-   * to the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)`
-   * with explicit metadata.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def as(alias: String): Column = name(alias)
-
-  /**
-   * (Scala-specific) Assigns the given aliases to the results of a table generating function.
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def as(aliases: Seq[String]): Column = withExpr { MultiAlias(expr, aliases) }
-
-  /**
-   * Assigns the given aliases to the results of a table generating function.
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select(explode($"myMap").as("key" :: "value" :: Nil))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def as(aliases: Array[String]): Column = withExpr {
-    MultiAlias(expr, aliases.toImmutableArraySeq)
-  }
-
-  /**
-   * Gives the column an alias.
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select($"colA".as("colB"))
-   * }}}
-   *
-   * If the current column has metadata associated with it, this metadata will be propagated
-   * to the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)`
-   * with explicit metadata.
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def as(alias: Symbol): Column = name(alias.name)
-
-  /**
-   * Gives the column an alias with metadata.
-   * {{{
-   *   val metadata: Metadata = ...
-   *   df.select($"colA".as("colB", metadata))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def as(alias: String, metadata: Metadata): Column = withExpr {
-    Alias(expr, alias)(explicitMetadata = Some(metadata))
-  }
-
-  /**
-   * Gives the column a name (alias).
-   * {{{
-   *   // Renames colA to colB in select output.
-   *   df.select($"colA".name("colB"))
-   * }}}
-   *
-   * If the current column has metadata associated with it, this metadata will be propagated
-   * to the new column. If this not desired, use the API `as(alias: String, metadata: Metadata)`
-   * with explicit metadata.
-   *
-   * @group expr_ops
-   * @since 2.0.0
-   */
-  def name(alias: String): Column = withExpr {
-    // SPARK-33536: an alias is no longer a column reference. Therefore,
-    // we should not inherit the column reference related metadata in an alias
-    // so that it is not caught as a column reference in DetectAmbiguousSelfJoin.
-    Alias(expr, alias)(
-      nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
-  }
-
-  /**
-   * Casts the column to a different data type.
-   * {{{
-   *   // Casts colA to IntegerType.
-   *   import org.apache.spark.sql.types.IntegerType
-   *   df.select(df("colA").cast(IntegerType))
-   *
-   *   // equivalent to
-   *   df.select(df("colA").cast("int"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def cast(to: DataType): Column = withExpr {
-    val cast = Cast(expr, CharVarcharUtils.replaceCharVarcharWithStringForCast(to))
-    cast.setTagValue(Cast.USER_SPECIFIED_CAST, ())
-    cast
-  }
-
-  /**
-   * Casts the column to a different data type, using the canonical string representation
-   * of the type. The supported types are: `string`, `boolean`, `byte`, `short`, `int`, `long`,
-   * `float`, `double`, `decimal`, `date`, `timestamp`.
-   * {{{
-   *   // Casts colA to integer.
-   *   df.select(df("colA").cast("int"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def cast(to: String): Column = cast(CatalystSqlParser.parseDataType(to))
-
-  /**
-   * Casts the column to a different data type and the result is null on failure.
-   * {{{
-   *   // Casts colA to IntegerType.
-   *   import org.apache.spark.sql.types.IntegerType
-   *   df.select(df("colA").try_cast(IntegerType))
-   *
-   *   // equivalent to
-   *   df.select(df("colA").try_cast("int"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 4.0.0
-   */
-  def try_cast(to: DataType): Column = withExpr {
-    val cast = Cast(
-      child = expr,
-      dataType = CharVarcharUtils.replaceCharVarcharWithStringForCast(to),
-      evalMode = EvalMode.TRY)
-    cast.setTagValue(Cast.USER_SPECIFIED_CAST, ())
-    cast
-  }
-
-  /**
-   * Casts the column to a different data type and the result is null on failure.
-   * {{{
-   *   // Casts colA to integer.
-   *   df.select(df("colA").try_cast("int"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 4.0.0
-   */
-  def try_cast(to: String): Column = {
-    try_cast(CatalystSqlParser.parseDataType(to))
-  }
-
-  /**
-   * Returns a sort expression based on the descending order of the column.
-   * {{{
-   *   // Scala
-   *   df.sort(df("age").desc)
-   *
-   *   // Java
-   *   df.sort(df.col("age").desc());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def desc: Column = withExpr { SortOrder(expr, Descending) }
-
-  /**
-   * Returns a sort expression based on the descending order of the column,
-   * and null values appear before non-null values.
-   * {{{
-   *   // Scala: sort a DataFrame by age column in descending order and null values appearing first.
-   *   df.sort(df("age").desc_nulls_first)
-   *
-   *   // Java
-   *   df.sort(df.col("age").desc_nulls_first());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.1.0
-   */
-  def desc_nulls_first: Column = withExpr { SortOrder(expr, Descending, NullsFirst, Seq.empty) }
-
-  /**
-   * Returns a sort expression based on the descending order of the column,
-   * and null values appear after non-null values.
-   * {{{
-   *   // Scala: sort a DataFrame by age column in descending order and null values appearing last.
-   *   df.sort(df("age").desc_nulls_last)
-   *
-   *   // Java
-   *   df.sort(df.col("age").desc_nulls_last());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.1.0
-   */
-  def desc_nulls_last: Column = withExpr { SortOrder(expr, Descending, NullsLast, Seq.empty) }
-
-  /**
-   * Returns a sort expression based on ascending order of the column.
-   * {{{
-   *   // Scala: sort a DataFrame by age column in ascending order.
-   *   df.sort(df("age").asc)
-   *
-   *   // Java
-   *   df.sort(df.col("age").asc());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.3.0
-   */
-  def asc: Column = withExpr { SortOrder(expr, Ascending) }
-
-  /**
-   * Returns a sort expression based on ascending order of the column,
-   * and null values return before non-null values.
-   * {{{
-   *   // Scala: sort a DataFrame by age column in ascending order and null values appearing first.
-   *   df.sort(df("age").asc_nulls_first)
-   *
-   *   // Java
-   *   df.sort(df.col("age").asc_nulls_first());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.1.0
-   */
-  def asc_nulls_first: Column = withExpr { SortOrder(expr, Ascending, NullsFirst, Seq.empty) }
-
-  /**
-   * Returns a sort expression based on ascending order of the column,
-   * and null values appear after non-null values.
-   * {{{
-   *   // Scala: sort a DataFrame by age column in ascending order and null values appearing last.
-   *   df.sort(df("age").asc_nulls_last)
-   *
-   *   // Java
-   *   df.sort(df.col("age").asc_nulls_last());
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.1.0
-   */
-  def asc_nulls_last: Column = withExpr { SortOrder(expr, Ascending, NullsLast, Seq.empty) }
-
-  /**
-   * Prints the expression to the console for debugging purposes.
-   *
-   * @group df_ops
-   * @since 1.3.0
-   */
-  def explain(extended: Boolean): Unit = {
-    // scalastyle:off println
-    if (extended) {
-      println(expr)
-    } else {
-      println(expr.sql)
-    }
-    // scalastyle:on println
-  }
-
-  /**
-   * Compute bitwise OR of this expression with another expression.
-   * {{{
-   *   df.select($"colA".bitwiseOR($"colB"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def bitwiseOR(other: Any): Column = fn("|", other)
-
-  /**
-   * Compute bitwise AND of this expression with another expression.
-   * {{{
-   *   df.select($"colA".bitwiseAND($"colB"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def bitwiseAND(other: Any): Column = fn("&", other)
-
-  /**
-   * Compute bitwise XOR of this expression with another expression.
-   * {{{
-   *   df.select($"colA".bitwiseXOR($"colB"))
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def bitwiseXOR(other: Any): Column = fn("^", other)
-
-  /**
-   * Defines a windowing column.
-   *
-   * {{{
-   *   val w = Window.partitionBy("name").orderBy("id")
-   *   df.select(
-   *     sum("price").over(w.rangeBetween(Window.unboundedPreceding, 2)),
-   *     avg("price").over(w.rowsBetween(Window.currentRow, 4))
-   *   )
-   * }}}
-   *
-   * @group expr_ops
-   * @since 1.4.0
-   */
-  def over(window: expressions.WindowSpec): Column = withOrigin {
-    window.withAggregate(this)
-  }
-
-  /**
-   * Defines an empty analytic clause. In this case the analytic function is applied
-   * and presented for all rows in the result set.
-   *
-   * {{{
-   *   df.select(
-   *     sum("price").over(),
-   *     avg("price").over()
-   *   )
-   * }}}
-   *
-   * @group expr_ops
-   * @since 2.0.0
-   */
-  def over(): Column = over(Window.spec)
-
-}
-
-
-/**
- * A convenient class used for constructing schema.
- *
- * @since 1.3.0
- */
-@Stable
-class ColumnName(name: String) extends Column(name) {
-
-  /**
-   * Creates a new `StructField` of type boolean.
-   * @since 1.3.0
-   */
-  def boolean: StructField = StructField(name, BooleanType)
-
-  /**
-   * Creates a new `StructField` of type byte.
-   * @since 1.3.0
-   */
-  def byte: StructField = StructField(name, ByteType)
-
-  /**
-   * Creates a new `StructField` of type short.
-   * @since 1.3.0
-   */
-  def short: StructField = StructField(name, ShortType)
-
-  /**
-   * Creates a new `StructField` of type int.
-   * @since 1.3.0
-   */
-  def int: StructField = StructField(name, IntegerType)
-
-  /**
-   * Creates a new `StructField` of type long.
-   * @since 1.3.0
-   */
-  def long: StructField = StructField(name, LongType)
-
-  /**
-   * Creates a new `StructField` of type float.
-   * @since 1.3.0
-   */
-  def float: StructField = StructField(name, FloatType)
-
-  /**
-   * Creates a new `StructField` of type double.
-   * @since 1.3.0
-   */
-  def double: StructField = StructField(name, DoubleType)
-
-  /**
-   * Creates a new `StructField` of type string.
-   * @since 1.3.0
-   */
-  def string: StructField = StructField(name, StringType)
-
-  /**
-   * Creates a new `StructField` of type date.
-   * @since 1.3.0
-   */
-  def date: StructField = StructField(name, DateType)
-
-  /**
-   * Creates a new `StructField` of type decimal.
-   * @since 1.3.0
-   */
-  def decimal: StructField = StructField(name, DecimalType.USER_DEFAULT)
-
-  /**
-   * Creates a new `StructField` of type decimal.
-   * @since 1.3.0
-   */
-  def decimal(precision: Int, scale: Int): StructField =
-    StructField(name, DecimalType(precision, scale))
-
-  /**
-   * Creates a new `StructField` of type timestamp.
-   * @since 1.3.0
-   */
-  def timestamp: StructField = StructField(name, TimestampType)
-
-  /**
-   * Creates a new `StructField` of type binary.
-   * @since 1.3.0
-   */
-  def binary: StructField = StructField(name, BinaryType)
-
-  /**
-   * Creates a new `StructField` of type array.
-   * @since 1.3.0
-   */
-  def array(dataType: DataType): StructField = StructField(name, ArrayType(dataType))
-
-  /**
-   * Creates a new `StructField` of type map.
-   * @since 1.3.0
-   */
-  def map(keyType: DataType, valueType: DataType): StructField =
-    map(MapType(keyType, valueType))
-
-  def map(mapType: MapType): StructField = StructField(name, mapType)
-
-  /**
-   * Creates a new `StructField` of type struct.
-   * @since 1.3.0
-   */
-  def struct(fields: StructField*): StructField = struct(StructType(fields))
-
-  /**
-   * Creates a new `StructField` of type struct.
-   * @since 1.3.0
-   */
-  def struct(structType: StructType): StructField = StructField(name, structType)
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
index c083ee89db6f2..53e12f58edd69 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala
@@ -18,16 +18,13 @@
 package org.apache.spark.sql
 
 import java.{lang => jl}
-import java.util.Locale
-
-import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
 import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.types._
-import org.apache.spark.util.ArrayImplicits._
 
 /**
  * Functionality for working with missing data in `DataFrame`s.
@@ -35,299 +32,43 @@ import org.apache.spark.util.ArrayImplicits._
  * @since 1.3.1
  */
 @Stable
-final class DataFrameNaFunctions private[sql](df: DataFrame) {
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing any null or NaN values.
-   *
-   * @since 1.3.1
-   */
-  def drop(): DataFrame = drop0("any", outputAttributes)
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing null or NaN values.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values.
-   * If `how` is "all", then drop rows only if every column is null or NaN for that row.
-   *
-   * @since 1.3.1
-   */
-  def drop(how: String): DataFrame = drop0(how, outputAttributes)
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing any null or NaN values
-   * in the specified columns.
-   *
-   * @since 1.3.1
-   */
-  def drop(cols: Array[String]): DataFrame = drop(cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing any null or NaN values
-   * in the specified columns.
-   *
-   * @since 1.3.1
-   */
-  def drop(cols: Seq[String]): DataFrame = drop(cols.size, cols)
+final class DataFrameNaFunctions private[sql](df: DataFrame)
+  extends api.DataFrameNaFunctions {
+  import df.sparkSession.RichColumn
 
-  /**
-   * Returns a new `DataFrame` that drops rows containing null or NaN values
-   * in the specified columns.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
-   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
-   *
-   * @since 1.3.1
-   */
-  def drop(how: String, cols: Array[String]): DataFrame = drop(how, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing null or NaN values
-   * in the specified columns.
-   *
-   * If `how` is "any", then drop rows containing any null or NaN values in the specified columns.
-   * If `how` is "all", then drop rows only if every specified column is null or NaN for that row.
-   *
-   * @since 1.3.1
-   */
-  def drop(how: String, cols: Seq[String]): DataFrame = {
-    drop0(how, cols.map(df.resolve(_)))
+  protected def drop(minNonNulls: Option[Int]): Dataset[Row] = {
+    drop0(minNonNulls, outputAttributes)
   }
 
-  /**
-   * Returns a new `DataFrame` that drops rows containing
-   * less than `minNonNulls` non-null and non-NaN values.
-   *
-   * @since 1.3.1
-   */
-  def drop(minNonNulls: Int): DataFrame = drop(minNonNulls, df.columns)
-
-  /**
-   * Returns a new `DataFrame` that drops rows containing
-   * less than `minNonNulls` non-null and non-NaN values in the specified columns.
-   *
-   * @since 1.3.1
-   */
-  def drop(minNonNulls: Int, cols: Array[String]): DataFrame =
-    drop(minNonNulls, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that drops rows containing less than
-   * `minNonNulls` non-null and non-NaN values in the specified columns.
-   *
-   * @since 1.3.1
-   */
-  def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = {
-    drop0(minNonNulls, cols.map(df.resolve(_)))
+  override protected def drop(minNonNulls: Option[Int], cols: Seq[String]): Dataset[Row] = {
+    drop0(minNonNulls, cols.map(df.resolve))
   }
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
-   *
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def fill(value: Long): DataFrame = fillValue(value, outputAttributes)
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in numeric columns with `value`.
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def fill(value: Double): DataFrame = fillValue(value, outputAttributes)
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in string columns with `value`.
-   *
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def fill(value: String): DataFrame = fillValue(value, outputAttributes)
 
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
-   * If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 2.2.0
-   */
-  def fill(value: Long, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * Returns a new `DataFrame` that replaces null or NaN values in specified numeric columns.
-   * If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 1.3.1
-   */
-  def fill(value: Double, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
-   * numeric columns. If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def fill(value: Long, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null or NaN values in specified
-   * numeric columns. If a specified column is not a numeric column, it is ignored.
-   *
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def fill(value: Double, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
-
-  /**
-   * Returns a new `DataFrame` that replaces null values in specified string columns.
-   * If a specified column is not a string column, it is ignored.
-   *
-   * @since 1.3.1
-   */
-  def fill(value: String, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null values in
-   * specified string columns. If a specified column is not a string column, it is ignored.
-   *
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def fill(value: String, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in boolean columns with `value`.
-   *
-   * @since 2.3.0
-   */
+  /** @inheritdoc */
   def fill(value: Boolean): DataFrame = fillValue(value, outputAttributes)
 
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null values in specified
-   * boolean columns. If a specified column is not a boolean column, it is ignored.
-   *
-   * @since 2.3.0
-   */
+  /** @inheritdoc */
   def fill(value: Boolean, cols: Seq[String]): DataFrame = fillValue(value, toAttributes(cols))
 
-  /**
-   * Returns a new `DataFrame` that replaces null values in specified boolean columns.
-   * If a specified column is not a boolean column, it is ignored.
-   *
-   * @since 2.3.0
-   */
-  def fill(value: Boolean, cols: Array[String]): DataFrame = fill(value, cols.toImmutableArraySeq)
-
-
-  /**
-   * Returns a new `DataFrame` that replaces null values.
-   *
-   * The key of the map is the column name, and the value of the map is the replacement value.
-   * The value must be of the following type:
-   * `Integer`, `Long`, `Float`, `Double`, `String`, `Boolean`.
-   * Replacement values are cast to the column data type.
-   *
-   * For example, the following replaces null values in column "A" with string "unknown", and
-   * null values in column "B" with numeric value 1.0.
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *   df.na.fill(ImmutableMap.of("A", "unknown", "B", 1.0));
-   * }}}
-   *
-   * @since 1.3.1
-   */
-  def fill(valueMap: java.util.Map[String, Any]): DataFrame = fillMap(valueMap.asScala.toSeq)
-
-  /**
-   * (Scala-specific) Returns a new `DataFrame` that replaces null values.
-   *
-   * The key of the map is the column name, and the value of the map is the replacement value.
-   * The value must be of the following type: `Int`, `Long`, `Float`, `Double`, `String`, `Boolean`.
-   * Replacement values are cast to the column data type.
-   *
-   * For example, the following replaces null values in column "A" with string "unknown", and
-   * null values in column "B" with numeric value 1.0.
-   * {{{
-   *   df.na.fill(Map(
-   *     "A" -> "unknown",
-   *     "B" -> 1.0
-   *   ))
-   * }}}
-   *
-   * @since 1.3.1
-   */
-  def fill(valueMap: Map[String, Any]): DataFrame = fillMap(valueMap.toSeq)
-
-  /**
-   * Replaces values matching keys in `replacement` map with the corresponding values.
-   *
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.na.replace("height", ImmutableMap.of(1.0, 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.na.replace("name", ImmutableMap.of("UNKNOWN", "unnamed"));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.na.replace("*", ImmutableMap.of("UNKNOWN", "unnamed"));
-   * }}}
-   *
-   * @param col name of the column to apply the value replacement. If `col` is "*",
-   *            replacement is applied on all string, numeric or boolean columns.
-   * @param replacement value replacement map. Key and value of `replacement` map must have
-   *                    the same type, and can only be doubles, strings or booleans.
-   *                    The map value can have nulls.
-   *
-   * @since 1.3.1
-   */
-  def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame = {
-    replace[T](col, replacement.asScala.toMap)
-  }
-
-  /**
-   * Replaces values matching keys in `replacement` map with the corresponding values.
-   *
-   * {{{
-   *   import com.google.common.collect.ImmutableMap;
-   *
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.na.replace(new String[] {"height", "weight"}, ImmutableMap.of(1.0, 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.na.replace(new String[] {"firstname", "lastname"}, ImmutableMap.of("UNKNOWN", "unnamed"));
-   * }}}
-   *
-   * @param cols list of columns to apply the value replacement. If `col` is "*",
-   *             replacement is applied on all string, numeric or boolean columns.
-   * @param replacement value replacement map. Key and value of `replacement` map must have
-   *                    the same type, and can only be doubles, strings or booleans.
-   *                    The map value can have nulls.
-   *
-   * @since 1.3.1
-   */
-  def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame = {
-    replace(cols.toImmutableArraySeq, replacement.asScala.toMap)
-  }
-
-  /**
-   * (Scala-specific) Replaces values matching keys in `replacement` map.
-   *
-   * {{{
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height".
-   *   df.na.replace("height", Map(1.0 -> 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "name".
-   *   df.na.replace("name", Map("UNKNOWN" -> "unnamed"));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in all string columns.
-   *   df.na.replace("*", Map("UNKNOWN" -> "unnamed"));
-   * }}}
-   *
-   * @param col name of the column to apply the value replacement. If `col` is "*",
-   *            replacement is applied on all string, numeric or boolean columns.
-   * @param replacement value replacement map. Key and value of `replacement` map must have
-   *                    the same type, and can only be doubles, strings or booleans.
-   *                    The map value can have nulls.
-   *
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def replace[T](col: String, replacement: Map[T, T]): DataFrame = {
     if (col == "*") {
       replace0(df.logicalPlan.output, replacement)
@@ -336,25 +77,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     }
   }
 
-  /**
-   * (Scala-specific) Replaces values matching keys in `replacement` map.
-   *
-   * {{{
-   *   // Replaces all occurrences of 1.0 with 2.0 in column "height" and "weight".
-   *   df.na.replace("height" :: "weight" :: Nil, Map(1.0 -> 2.0));
-   *
-   *   // Replaces all occurrences of "UNKNOWN" with "unnamed" in column "firstname" and "lastname".
-   *   df.na.replace("firstname" :: "lastname" :: Nil, Map("UNKNOWN" -> "unnamed"));
-   * }}}
-   *
-   * @param cols list of columns to apply the value replacement. If `col` is "*",
-   *             replacement is applied on all string, numeric or boolean columns.
-   * @param replacement value replacement map. Key and value of `replacement` map must have
-   *                    the same type, and can only be doubles, strings or booleans.
-   *                    The map value can have nulls.
-   *
-   * @since 1.3.1
-   */
+  /** @inheritdoc */
   def replace[T](cols: Seq[String], replacement: Map[T, T]): DataFrame = {
     val attrs = cols.map { colName =>
       // Check column name exists
@@ -404,10 +127,10 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     df.select(projections : _*)
   }
 
-  private def fillMap(values: Seq[(String, Any)]): DataFrame = {
+  protected def fillMap(values: Seq[(String, Any)]): DataFrame = {
     // Error handling
     val attrToValue = AttributeMap(values.map { case (colName, replaceValue) =>
-      // Check column name exists
+      // Check Column name exists
       val attr = df.resolve(colName) match {
         case a: Attribute => a
         case _ => throw QueryExecutionErrors.nestedFieldUnsupportedError(colName)
@@ -468,7 +191,7 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     val branches = replacementMap.flatMap { case (source, target) =>
       Seq(Literal(source), buildExpr(target))
     }.toSeq
-    new Column(CaseKeyWhen(attr, branches :+ attr)).as(attr.name)
+    Column(CaseKeyWhen(attr, branches :+ attr)).as(attr.name)
   }
 
   private def convertToDouble(v: Any): Double = v match {
@@ -490,18 +213,11 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     df.queryExecution.analyzed.output
   }
 
-  private def drop0(how: String, cols: Seq[NamedExpression]): DataFrame = {
-    how.toLowerCase(Locale.ROOT) match {
-      case "any" => drop0(cols.size, cols)
-      case "all" => drop0(1, cols)
-      case _ => throw new IllegalArgumentException(s"how ($how) must be 'any' or 'all'")
-    }
-  }
 
-  private def drop0(minNonNulls: Int, cols: Seq[NamedExpression]): DataFrame = {
+  private def drop0(minNonNulls: Option[Int], cols: Seq[NamedExpression]): DataFrame = {
     // Filtering condition:
     // only keep the row if it has at least `minNonNulls` non-null and non-NaN values.
-    val predicate = AtLeastNNonNulls(minNonNulls, cols)
+    val predicate = AtLeastNNonNulls(minNonNulls.getOrElse(cols.size), cols)
     df.filter(Column(predicate))
   }
 
@@ -545,4 +261,58 @@ final class DataFrameNaFunctions private[sql](df: DataFrame) {
     }
     df.select(projections : _*)
   }
+
+  /** @inheritdoc */
+  override def drop(): DataFrame = super.drop()
+
+  /** @inheritdoc */
+  override def drop(cols: Array[String]): DataFrame = super.drop(cols)
+
+  /** @inheritdoc */
+  override def drop(cols: Seq[String]): DataFrame = super.drop(cols)
+
+  /** @inheritdoc */
+  override def drop(how: String, cols: Array[String]): DataFrame = super.drop(how, cols)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int, cols: Array[String]): DataFrame =
+    super.drop(minNonNulls, cols)
+
+  /** @inheritdoc */
+  override def drop(how: String): DataFrame = super.drop(how)
+
+  /** @inheritdoc */
+  override def drop(how: String, cols: Seq[String]): DataFrame = super.drop(how, cols)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int): DataFrame = super.drop(minNonNulls)
+
+  /** @inheritdoc */
+  override def drop(minNonNulls: Int, cols: Seq[String]): DataFrame = super.drop(minNonNulls, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Long, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Double, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: String, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(value: Boolean, cols: Array[String]): DataFrame = super.fill(value, cols)
+
+  /** @inheritdoc */
+  override def fill(valueMap: java.util.Map[String, Any]): DataFrame = super.fill(valueMap)
+
+  /** @inheritdoc */
+  override def fill(valueMap: Map[String, Any]): DataFrame = super.fill(valueMap)
+
+  /** @inheritdoc */
+  override def replace[T](col: String, replacement: java.util.Map[T, T]): DataFrame =
+    super.replace[T](col, replacement)
+
+  /** @inheritdoc */
+  override def replace[T](cols: Array[String], replacement: java.util.Map[T, T]): DataFrame =
+    super.replace(cols, replacement)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
index 9d7a765a24c92..ab3e939cee171 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala
@@ -24,14 +24,14 @@ import scala.jdk.CollectionConverters._
 import org.apache.spark.Partition
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.java.JavaRDD
-import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.json.{CreateJacksonParser, JacksonParser, JSONOptions}
-import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, FailureSafeParser}
+import org.apache.spark.sql.catalyst.util.FailureSafeParser
 import org.apache.spark.sql.catalyst.xml.{StaxXmlParser, XmlOptions}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
@@ -54,136 +54,44 @@ import org.apache.spark.unsafe.types.UTF8String
  * @since 1.4.0
  */
 @Stable
-class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
-
-  /**
-   * Specifies the input data source format.
-   *
-   * @since 1.4.0
-   */
-  def format(source: String): DataFrameReader = {
-    this.source = source
-    this
-  }
+class DataFrameReader private[sql](sparkSession: SparkSession)
+  extends api.DataFrameReader {
+  override type DS[U] = Dataset[U]
 
-  /**
-   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
-   * automatically from data. By specifying the schema here, the underlying data source can
-   * skip the schema inference step, and thus speed up data loading.
-   *
-   * @since 1.4.0
-   */
-  def schema(schema: StructType): DataFrameReader = {
-    if (schema != null) {
-      val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
-      this.userSpecifiedSchema = Option(replaced)
-      validateSingleVariantColumn()
-    }
-    this
-  }
+  format(sparkSession.sessionState.conf.defaultDataSourceName)
 
-  /**
-   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON) can
-   * infer the input schema automatically from data. By specifying the schema here, the underlying
-   * data source can skip the schema inference step, and thus speed up data loading.
-   *
-   * {{{
-   *   spark.read.schema("a INT, b STRING, c DOUBLE").csv("test.csv")
-   * }}}
-   *
-   * @since 2.3.0
-   */
-  def schema(schemaString: String): DataFrameReader = {
-    schema(StructType.fromDDL(schemaString))
-  }
+  /** @inheritdoc */
+  override def format(source: String): this.type = super.format(source)
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def option(key: String, value: String): DataFrameReader = {
-    this.extraOptions = this.extraOptions + (key -> value)
-    validateSingleVariantColumn()
-    this
-  }
+  /** @inheritdoc */
+  override def schema(schema: StructType): this.type = super.schema(schema)
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Boolean): DataFrameReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Long): DataFrameReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Double): DataFrameReader = option(key, value.toString)
-
-  /**
-   * (Scala-specific) Adds input options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataFrameReader = {
-    this.extraOptions ++= options
-    validateSingleVariantColumn()
-    this
-  }
+  /** @inheritdoc */
+  override def schema(schemaString: String): this.type = super.schema(schemaString)
 
-  /**
-   * Adds input options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def options(options: java.util.Map[String, String]): DataFrameReader = {
-    this.options(options.asScala)
-    this
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = super.option(key, value)
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that don't require a path (e.g. external
-   * key-value stores).
-   *
-   * @since 1.4.0
-   */
-  def load(): DataFrame = {
-    load(Seq.empty: _*) // force invocation of `load(...varargs...)`
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type =
+    super.options(options)
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that require a path (e.g. data backed by
-   * a local or distributed file system).
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type = super.options(options)
+
+  /** @inheritdoc */
+  override def load(): DataFrame = load(Nil: _*)
+
+  /** @inheritdoc */
   def load(path: String): DataFrame = {
     // force invocation of `load(...varargs...)`
     if (sparkSession.sessionState.conf.legacyPathOptionBehavior) {
@@ -193,12 +101,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     }
   }
 
-  /**
-   * Loads input in as a `DataFrame`, for data sources that support multiple paths.
-   * Only works if the source is a HadoopFsRelationProvider.
-   *
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def load(paths: String*): DataFrame = {
     if (source.toLowerCase(Locale.ROOT) == DDLUtils.HIVE_PROVIDER) {
@@ -235,90 +138,22 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
         options = finalOptions.originalMap).resolveRelation())
   }
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table and connection properties.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables
-   * via JDBC in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.4.0
-   */
-  def jdbc(url: String, table: String, properties: Properties): DataFrame = {
-    assertNoSpecifiedSchema("jdbc")
-    // properties should override settings in extraOptions.
-    this.extraOptions ++= properties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions ++= Seq(JDBCOptions.JDBC_URL -> url, JDBCOptions.JDBC_TABLE_NAME -> table)
-    format("jdbc").load()
-  }
+  /** @inheritdoc */
+  override def jdbc(url: String, table: String, properties: Properties): DataFrame =
+    super.jdbc(url, table, properties)
 
-  // scalastyle:off line.size.limit
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table. Partitions of the table will be retrieved in parallel based on the parameters
-   * passed to this function.
-   *
-   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
-   * your external database systems.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables via JDBC in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @param table Name of the table in the external database.
-   * @param columnName Alias of `partitionColumn` option. Refer to `partitionColumn` in
-   *                   <a href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *                     Data Source Option</a> in the version you use.
-   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
-   *                             tag/value. Normally at least a "user" and "password" property
-   *                             should be included. "fetchsize" can be used to control the
-   *                             number of rows per fetch and "queryTimeout" can be used to wait
-   *                             for a Statement object to execute to the given number of seconds.
-   * @since 1.4.0
-   */
-  // scalastyle:on line.size.limit
-  def jdbc(
+  /** @inheritdoc */
+  override def jdbc(
       url: String,
       table: String,
       columnName: String,
       lowerBound: Long,
       upperBound: Long,
       numPartitions: Int,
-      connectionProperties: Properties): DataFrame = {
-    // columnName, lowerBound, upperBound and numPartitions override settings in extraOptions.
-    this.extraOptions ++= Map(
-      JDBCOptions.JDBC_PARTITION_COLUMN -> columnName,
-      JDBCOptions.JDBC_LOWER_BOUND -> lowerBound.toString,
-      JDBCOptions.JDBC_UPPER_BOUND -> upperBound.toString,
-      JDBCOptions.JDBC_NUM_PARTITIONS -> numPartitions.toString)
-    jdbc(url, table, connectionProperties)
-  }
+      connectionProperties: Properties): DataFrame =
+    super.jdbc(url, table, columnName, lowerBound, upperBound, numPartitions, connectionProperties)
 
-  /**
-   * Construct a `DataFrame` representing the database table accessible via JDBC URL
-   * url named table using connection properties. The `predicates` parameter gives a list
-   * expressions suitable for inclusion in WHERE clauses; each one defines one partition
-   * of the `DataFrame`.
-   *
-   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
-   * your external database systems.
-   *
-   * You can find the JDBC-specific option and parameter documentation for reading tables
-   * via JDBC in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @param table Name of the table in the external database.
-   * @param predicates Condition in the where clause for each partition.
-   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
-   *                             tag/value. Normally at least a "user" and "password" property
-   *                             should be included. "fetchsize" can be used to control the
-   *                             number of rows per fetch.
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   def jdbc(
       url: String,
       table: String,
@@ -335,78 +170,24 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     sparkSession.baseRelationToDataFrame(relation)
   }
 
-  /**
-   * Loads a JSON file and returns the results as a `DataFrame`.
-   *
-   * See the documentation on the overloaded `json()` method with varargs for more details.
-   *
-   * @since 1.4.0
-   */
-  def json(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    json(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def json(path: String): DataFrame = super.json(path)
 
-  /**
-   * Loads JSON files and returns the results as a `DataFrame`.
-   *
-   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `multiLine` option to true.
-   *
-   * This function goes through the input once to determine the input schema. If you know the
-   * schema in advance, use the version that specifies the schema to avoid the extra scan.
-   *
-   * You can find the JSON-specific options for reading JSON files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def json(paths: String*): DataFrame = {
-    userSpecifiedSchema.foreach(checkJsonSchema)
-    format("json").load(paths : _*)
-  }
+  override def json(paths: String*): DataFrame = super.json(paths: _*)
 
-  /**
-   * Loads a `JavaRDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON
-   * Lines text format or newline-delimited JSON</a>) and returns the result as
-   * a `DataFrame`.
-   *
-   * Unless the schema is specified using `schema` function, this function goes through the
-   * input once to determine the input schema.
-   *
-   * @param jsonRDD input RDD with one JSON object per record
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   @deprecated("Use json(Dataset[String]) instead.", "2.2.0")
   def json(jsonRDD: JavaRDD[String]): DataFrame = json(jsonRDD.rdd)
 
-  /**
-   * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
-   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
-   *
-   * Unless the schema is specified using `schema` function, this function goes through the
-   * input once to determine the input schema.
-   *
-   * @param jsonRDD input RDD with one JSON object per record
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   @deprecated("Use json(Dataset[String]) instead.", "2.2.0")
   def json(jsonRDD: RDD[String]): DataFrame = {
     json(sparkSession.createDataset(jsonRDD)(Encoders.STRING))
   }
 
-  /**
-   * Loads a `Dataset[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines
-   * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`.
-   *
-   * Unless the schema is specified using `schema` function, this function goes through the
-   * input once to determine the input schema.
-   *
-   * @param jsonDataset input Dataset with one JSON object per record
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def json(jsonDataset: Dataset[String]): DataFrame = {
     val parsedOptions = new JSONOptions(
       extraOptions.toMap,
@@ -439,36 +220,10 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     sparkSession.internalCreateDataFrame(parsed, schema, isStreaming = jsonDataset.isStreaming)
   }
 
-  /**
-   * Loads a CSV file and returns the result as a `DataFrame`. See the documentation on the
-   * other overloaded `csv()` method for more details.
-   *
-   * @since 2.0.0
-   */
-  def csv(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    csv(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def csv(path: String): DataFrame = super.csv(path)
 
-  /**
-   * Loads an `Dataset[String]` storing CSV rows and returns the result as a `DataFrame`.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
-   * this function goes through the input once to determine the input schema.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is disabled,
-   * it determines the columns as string types and it reads only the first line to determine the
-   * names and the number of fields.
-   *
-   * If the enforceSchema is set to `false`, only the CSV header in the first line is checked
-   * to conform specified or inferred schema.
-   *
-   * @note if `header` option is set to `true` when calling this API, all lines same with
-   * the header will be removed if exists.
-   *
-   * @param csvDataset input Dataset with one CSV row per record
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def csv(csvDataset: Dataset[String]): DataFrame = {
     val parsedOptions: CSVOptions = new CSVOptions(
       extraOptions.toMap,
@@ -527,61 +282,18 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     sparkSession.internalCreateDataFrame(parsed, schema, isStreaming = csvDataset.isStreaming)
   }
 
-  /**
-   * Loads CSV files and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can find the CSV-specific options for reading CSV files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def csv(paths: String*): DataFrame = format("csv").load(paths : _*)
-
-  /**
-   * Loads a XML file and returns the result as a `DataFrame`. See the documentation on the
-   * other overloaded `xml()` method for more details.
-   *
-   * @since 4.0.0
-   */
-  def xml(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    xml(Seq(path): _*)
-  }
+  override def csv(paths: String*): DataFrame = super.csv(paths: _*)
+
+  /** @inheritdoc */
+  override def xml(path: String): DataFrame = super.xml(path)
 
-  /**
-   * Loads XML files and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can find the XML-specific options for reading XML files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def xml(paths: String*): DataFrame = {
-    userSpecifiedSchema.foreach(checkXmlSchema)
-    format("xml").load(paths: _*)
-  }
+  override def xml(paths: String*): DataFrame = super.xml(paths: _*)
 
-  /**
-   * Loads an `Dataset[String]` storing XML object and returns the result as a `DataFrame`.
-   *
-   * If the schema is not specified using `schema` function and `inferSchema` option is enabled,
-   * this function goes through the input once to determine the input schema.
-   *
-   * @param xmlDataset input Dataset with one XML object per record
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   def xml(xmlDataset: Dataset[String]): DataFrame = {
     val parsedOptions: XmlOptions = new XmlOptions(
       extraOptions.toMap,
@@ -614,70 +326,21 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
     sparkSession.internalCreateDataFrame(parsed, schema, isStreaming = xmlDataset.isStreaming)
   }
 
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`. See the documentation
-   * on the other overloaded `parquet()` method for more details.
-   *
-   * @since 2.0.0
-   */
-  def parquet(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    parquet(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def parquet(path: String): DataFrame = super.parquet(path)
 
-  /**
-   * Loads a Parquet file, returning the result as a `DataFrame`.
-   *
-   * Parquet-specific option(s) for reading Parquet files can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def parquet(paths: String*): DataFrame = {
-    format("parquet").load(paths: _*)
-  }
+  override def parquet(paths: String*): DataFrame = super.parquet(paths: _*)
 
-  /**
-   * Loads an ORC file and returns the result as a `DataFrame`.
-   *
-   * @param path input path
-   * @since 1.5.0
-   */
-  def orc(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    orc(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def orc(path: String): DataFrame = super.orc(path)
 
-  /**
-   * Loads ORC files and returns the result as a `DataFrame`.
-   *
-   * ORC-specific option(s) for reading ORC files can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @param paths input paths
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def orc(paths: String*): DataFrame = format("orc").load(paths: _*)
-
-  /**
-   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
-   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
-   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
-   * streaming query plan.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table or view.
-   *                  If a database is specified, it identifies the table/view from the database.
-   *                  Otherwise, it first attempts to find a temporary view with the given name
-   *                  and then match the table/view from the current database.
-   *                  Note that, the global temporary view database is also valid here.
-   * @since 1.4.0
-   */
+  override def orc(paths: String*): DataFrame = super.orc(paths: _*)
+
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
     assertNoSpecifiedSchema("table")
     val multipartIdentifier =
@@ -686,108 +349,31 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging {
       new CaseInsensitiveStringMap(extraOptions.toMap.asJava)))
   }
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any. See the documentation on
-   * the other overloaded `text()` method for more details.
-   *
-   * @since 2.0.0
-   */
-  def text(path: String): DataFrame = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    text(Seq(path): _*)
-  }
+  /** @inheritdoc */
+  override def text(path: String): DataFrame = super.text(path)
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any.
-   * The text files must be encoded as UTF-8.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.read.text("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.read().text("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can find the text-specific options for reading text files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @param paths input paths
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def text(paths: String*): DataFrame = format("text").load(paths : _*)
-
-  /**
-   * Loads text files and returns a [[Dataset]] of String. See the documentation on the
-   * other overloaded `textFile()` method for more details.
-   * @since 2.0.0
-   */
-  def textFile(path: String): Dataset[String] = {
-    // This method ensures that calls that explicit need single argument works, see SPARK-16009
-    textFile(Seq(path): _*)
-  }
+  override def text(paths: String*): DataFrame = super.text(paths: _*)
 
-  /**
-   * Loads text files and returns a [[Dataset]] of String. The underlying schema of the Dataset
-   * contains a single string column named "value".
-   * The text files must be encoded as UTF-8.
-   *
-   * If the directory structure of the text files contains partitioning information, those are
-   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.read.textFile("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.read().textFile("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can set the text-specific options as specified in `DataFrameReader.text`.
-   *
-   * @param paths input path
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def textFile(paths: String*): Dataset[String] = {
-    assertNoSpecifiedSchema("textFile")
-    text(paths : _*).select("value").as[String](sparkSession.implicits.newStringEncoder)
-  }
+  /** @inheritdoc */
+  override def textFile(path: String): Dataset[String] = super.textFile(path)
 
-  /**
-   * A convenient function for schema validation in APIs.
-   */
-  private def assertNoSpecifiedSchema(operation: String): Unit = {
-    if (userSpecifiedSchema.nonEmpty) {
-      throw QueryCompilationErrors.userSpecifiedSchemaUnsupportedError(operation)
-    }
-  }
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def textFile(paths: String*): Dataset[String] = super.textFile(paths: _*)
 
-  /**
-   * Ensure that the `singleVariantColumn` option cannot be used if there is also a user specified
-   * schema.
-   */
-  private def validateSingleVariantColumn(): Unit = {
+  /** @inheritdoc */
+  override protected def validateSingleVariantColumn(): Unit = {
     if (extraOptions.get(JSONOptions.SINGLE_VARIANT_COLUMN).isDefined &&
       userSpecifiedSchema.isDefined) {
       throw QueryCompilationErrors.invalidSingleVariantColumn()
     }
   }
 
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // Builder pattern config options
-  ///////////////////////////////////////////////////////////////////////////////////////
-
-  private var source: String = sparkSession.sessionState.conf.defaultDataSourceName
-
-  private var userSpecifiedSchema: Option[StructType] = None
-
-  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
+  override protected def validateJsonSchema(): Unit =
+    userSpecifiedSchema.foreach(checkJsonSchema)
 
+  override protected def validateXmlSchema(): Unit =
+    userSpecifiedSchema.foreach(checkXmlSchema)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
index 790d15267a574..9f7180d8dfd6a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala
@@ -22,13 +22,11 @@ import java.{lang => jl, util => ju}
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.aggregate.{BloomFilterAggregate, CountMinSketchAgg}
+import org.apache.spark.sql.catalyst.trees.CurrentOrigin.withOrigin
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.stat._
 import org.apache.spark.sql.functions.col
-import org.apache.spark.sql.types._
 import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
 
 /**
  * Statistic functions for `DataFrame`s.
@@ -36,65 +34,10 @@ import org.apache.spark.util.sketch.{BloomFilter, CountMinSketch}
  * @since 1.4.0
  */
 @Stable
-final class DataFrameStatFunctions private[sql](df: DataFrame) {
+final class DataFrameStatFunctions private[sql](protected val df: DataFrame)
+  extends api.DataFrameStatFunctions {
 
-  /**
-   * Calculates the approximate quantiles of a numerical column of a DataFrame.
-   *
-   * The result of this algorithm has the following deterministic bound:
-   * If the DataFrame has N elements and if we request the quantile at probability `p` up to error
-   * `err`, then the algorithm will return a sample `x` from the DataFrame so that the *exact* rank
-   * of `x` is close to (p * N).
-   * More precisely,
-   *
-   * {{{
-   *   floor((p - err) * N) <= rank(x) <= ceil((p + err) * N)
-   * }}}
-   *
-   * This method implements a variation of the Greenwald-Khanna algorithm (with some speed
-   * optimizations).
-   * The algorithm was first present in <a href="https://doi.org/10.1145/375663.375670">
-   * Space-efficient Online Computation of Quantile Summaries</a> by Greenwald and Khanna.
-   *
-   * @param col the name of the numerical column
-   * @param probabilities a list of quantile probabilities
-   *   Each number must belong to [0, 1].
-   *   For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
-   * @param relativeError The relative target precision to achieve (greater than or equal to 0).
-   *   If set to zero, the exact quantiles are computed, which could be very expensive.
-   *   Note that values greater than 1 are accepted but give the same result as 1.
-   * @return the approximate quantiles at the given probabilities
-   *
-   * @note null and NaN values will be removed from the numerical column before calculation. If
-   *   the dataframe is empty or the column only contains null or NaN, an empty array is returned.
-   *
-   * @since 2.0.0
-   */
-  def approxQuantile(
-      col: String,
-      probabilities: Array[Double],
-      relativeError: Double): Array[Double] = withOrigin {
-    approxQuantile(Array(col), probabilities, relativeError).head
-  }
-
-  /**
-   * Calculates the approximate quantiles of numerical columns of a DataFrame.
-   * @see `approxQuantile(col:Str* approxQuantile)` for detailed description.
-   *
-   * @param cols the names of the numerical columns
-   * @param probabilities a list of quantile probabilities
-   *   Each number must belong to [0, 1].
-   *   For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
-   * @param relativeError The relative target precision to achieve (greater than or equal to 0).
-   *   If set to zero, the exact quantiles are computed, which could be very expensive.
-   *   Note that values greater than 1 are accepted but give the same result as 1.
-   * @return the approximate quantiles at the given probabilities of each column
-   *
-   * @note null and NaN values will be ignored in numerical columns before calculation. For
-   *   columns only containing null or NaN values, an empty array is returned.
-   *
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def approxQuantile(
       cols: Array[String],
       probabilities: Array[Double],
@@ -106,7 +49,6 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
       relativeError).map(_.toArray).toArray
   }
 
-
   /**
    * Python-friendly version of [[approxQuantile()]]
    */
@@ -118,304 +60,49 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
       .map(_.toList.asJava).toList.asJava
   }
 
-  /**
-   * Calculate the sample covariance of two numerical columns of a DataFrame.
-   * @param col1 the name of the first column
-   * @param col2 the name of the second column
-   * @return the covariance of the two columns.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.cov("rand1", "rand2")
-   *    res1: Double = 0.065...
-   * }}}
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   def cov(col1: String, col2: String): Double = withOrigin {
     StatFunctions.calculateCov(df, Seq(col1, col2))
   }
 
-  /**
-   * Calculates the correlation of two columns of a DataFrame. Currently only supports the Pearson
-   * Correlation Coefficient. For Spearman Correlation, consider using RDD methods found in
-   * MLlib's Statistics.
-   *
-   * @param col1 the name of the column
-   * @param col2 the name of the column to calculate the correlation against
-   * @return The Pearson Correlation Coefficient as a Double.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.corr("rand1", "rand2")
-   *    res1: Double = 0.613...
-   * }}}
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   def corr(col1: String, col2: String, method: String): Double = withOrigin {
     require(method == "pearson", "Currently only the calculation of the Pearson Correlation " +
       "coefficient is supported.")
     StatFunctions.pearsonCorrelation(df, Seq(col1, col2))
   }
 
-  /**
-   * Calculates the Pearson Correlation Coefficient of two columns of a DataFrame.
-   *
-   * @param col1 the name of the column
-   * @param col2 the name of the column to calculate the correlation against
-   * @return The Pearson Correlation Coefficient as a Double.
-   *
-   * {{{
-   *    val df = sc.parallelize(0 until 10).toDF("id").withColumn("rand1", rand(seed=10))
-   *      .withColumn("rand2", rand(seed=27))
-   *    df.stat.corr("rand1", "rand2", "pearson")
-   *    res1: Double = 0.613...
-   * }}}
-   *
-   * @since 1.4.0
-   */
-  def corr(col1: String, col2: String): Double = {
-    corr(col1, col2, "pearson")
-  }
-
-  /**
-   * Computes a pair-wise frequency table of the given columns. Also known as a contingency table.
-   * The first column of each row will be the distinct values of `col1` and the column names will
-   * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts
-   * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts.
-   * Null elements will be replaced by "null", and back ticks will be dropped from elements if they
-   * exist.
-   *
-   * @param col1 The name of the first column. Distinct items will make the first item of
-   *             each row.
-   * @param col2 The name of the second column. Distinct items will make the column names
-   *             of the DataFrame.
-   * @return A DataFrame containing for the contingency table.
-   *
-   * {{{
-   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2), (3, 3)))
-   *      .toDF("key", "value")
-   *    val ct = df.stat.crosstab("key", "value")
-   *    ct.show()
-   *    +---------+---+---+---+
-   *    |key_value|  1|  2|  3|
-   *    +---------+---+---+---+
-   *    |        2|  2|  0|  1|
-   *    |        1|  1|  1|  0|
-   *    |        3|  0|  1|  1|
-   *    +---------+---+---+---+
-   * }}}
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   def crosstab(col1: String, col2: String): DataFrame = withOrigin {
     StatFunctions.crossTabulate(df, col1, col2)
   }
 
-  /**
-   * Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in
-   * <a href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
-   * Schenker, and Papadimitriou.
-   * The `support` should be greater than 1e-4.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols the names of the columns to search frequent items in.
-   * @param support The minimum frequency for an item to be considered `frequent`. Should be greater
-   *                than 1e-4.
-   * @return A Local DataFrame with the Array of frequent items for each column.
-   *
-   * {{{
-   *    val rows = Seq.tabulate(100) { i =>
-   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
-   *    }
-   *    val df = spark.createDataFrame(rows).toDF("a", "b")
-   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
-   *    // "a" and "b"
-   *    val freqSingles = df.stat.freqItems(Array("a", "b"), 0.4)
-   *    freqSingles.show()
-   *    +-----------+-------------+
-   *    |a_freqItems|  b_freqItems|
-   *    +-----------+-------------+
-   *    |    [1, 99]|[-1.0, -99.0]|
-   *    +-----------+-------------+
-   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
-   *    val pairDf = df.select(struct("a", "b").as("a-b"))
-   *    val freqPairs = pairDf.stat.freqItems(Array("a-b"), 0.1)
-   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
-   *    +----------+
-   *    |   freq_ab|
-   *    +----------+
-   *    |  [1,-1.0]|
-   *    |   ...    |
-   *    +----------+
-   * }}}
-   *
-   * @since 1.4.0
-   */
-  def freqItems(cols: Array[String], support: Double): DataFrame = withOrigin {
-    FrequentItems.singlePassFreqItems(df, cols.toImmutableArraySeq, support)
-  }
-
-  /**
-   * Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in
-   * <a href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp,
-   * Schenker, and Papadimitriou.
-   * Uses a `default` support of 1%.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols the names of the columns to search frequent items in.
-   * @return A Local DataFrame with the Array of frequent items for each column.
-   *
-   * @since 1.4.0
-   */
-  def freqItems(cols: Array[String]): DataFrame = withOrigin {
-    FrequentItems.singlePassFreqItems(df, cols.toImmutableArraySeq, 0.01)
-  }
-
-  /**
-   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in
-   * <a href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
-   * and Papadimitriou.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols the names of the columns to search frequent items in.
-   * @return A Local DataFrame with the Array of frequent items for each column.
-   *
-   * {{{
-   *    val rows = Seq.tabulate(100) { i =>
-   *      if (i % 2 == 0) (1, -1.0) else (i, i * -1.0)
-   *    }
-   *    val df = spark.createDataFrame(rows).toDF("a", "b")
-   *    // find the items with a frequency greater than 0.4 (observed 40% of the time) for columns
-   *    // "a" and "b"
-   *    val freqSingles = df.stat.freqItems(Seq("a", "b"), 0.4)
-   *    freqSingles.show()
-   *    +-----------+-------------+
-   *    |a_freqItems|  b_freqItems|
-   *    +-----------+-------------+
-   *    |    [1, 99]|[-1.0, -99.0]|
-   *    +-----------+-------------+
-   *    // find the pair of items with a frequency greater than 0.1 in columns "a" and "b"
-   *    val pairDf = df.select(struct("a", "b").as("a-b"))
-   *    val freqPairs = pairDf.stat.freqItems(Seq("a-b"), 0.1)
-   *    freqPairs.select(explode($"a-b_freqItems").as("freq_ab")).show()
-   *    +----------+
-   *    |   freq_ab|
-   *    +----------+
-   *    |  [1,-1.0]|
-   *    |   ...    |
-   *    +----------+
-   * }}}
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   def freqItems(cols: Seq[String], support: Double): DataFrame = withOrigin {
     FrequentItems.singlePassFreqItems(df, cols, support)
   }
 
-  /**
-   * (Scala-specific) Finding frequent items for columns, possibly with false positives. Using the
-   * frequent element count algorithm described in
-   * <a href="https://doi.org/10.1145/762471.762473">here</a>, proposed by Karp, Schenker,
-   * and Papadimitriou.
-   * Uses a `default` support of 1%.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting `DataFrame`.
-   *
-   * @param cols the names of the columns to search frequent items in.
-   * @return A Local DataFrame with the Array of frequent items for each column.
-   *
-   * @since 1.4.0
-   */
-  def freqItems(cols: Seq[String]): DataFrame = withOrigin {
-    FrequentItems.singlePassFreqItems(df, cols, 0.01)
-  }
+  /** @inheritdoc */
+  override def freqItems(cols: Array[String], support: Double): DataFrame =
+    super.freqItems(cols, support)
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col column that defines strata
-   * @param fractions sampling fraction for each stratum. If a stratum is not specified, we treat
-   *                  its fraction as zero.
-   * @param seed random seed
-   * @tparam T stratum type
-   * @return a new `DataFrame` that represents the stratified sample
-   *
-   * {{{
-   *    val df = spark.createDataFrame(Seq((1, 1), (1, 2), (2, 1), (2, 1), (2, 3), (3, 2),
-   *      (3, 3))).toDF("key", "value")
-   *    val fractions = Map(1 -> 1.0, 3 -> 0.5)
-   *    df.stat.sampleBy("key", fractions, 36L).show()
-   *    +---+-----+
-   *    |key|value|
-   *    +---+-----+
-   *    |  1|    1|
-   *    |  1|    2|
-   *    |  3|    2|
-   *    +---+-----+
-   * }}}
-   *
-   * @since 1.5.0
-   */
-  def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): DataFrame = {
-    sampleBy(Column(col), fractions, seed)
+  /** @inheritdoc */
+  override def freqItems(cols: Array[String]): DataFrame = super.freqItems(cols)
+
+  /** @inheritdoc */
+  override def freqItems(cols: Seq[String]): DataFrame = super.freqItems(cols)
+
+  /** @inheritdoc */
+  override def sampleBy[T](col: String, fractions: Map[T, Double], seed: Long): DataFrame = {
+    super.sampleBy(col, fractions, seed)
   }
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col column that defines strata
-   * @param fractions sampling fraction for each stratum. If a stratum is not specified, we treat
-   *                  its fraction as zero.
-   * @param seed random seed
-   * @tparam T stratum type
-   * @return a new `DataFrame` that represents the stratified sample
-   *
-   * @since 1.5.0
-   */
-  def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
-    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
+  /** @inheritdoc */
+  override def sampleBy[T](col: String, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
+    super.sampleBy(col, fractions, seed)
   }
 
-  /**
-   * Returns a stratified sample without replacement based on the fraction given on each stratum.
-   * @param col column that defines strata
-   * @param fractions sampling fraction for each stratum. If a stratum is not specified, we treat
-   *                  its fraction as zero.
-   * @param seed random seed
-   * @tparam T stratum type
-   * @return a new `DataFrame` that represents the stratified sample
-   *
-   * The stratified sample can be performed over multiple columns:
-   * {{{
-   *    import org.apache.spark.sql.Row
-   *    import org.apache.spark.sql.functions.struct
-   *
-   *    val df = spark.createDataFrame(Seq(("Bob", 17), ("Alice", 10), ("Nico", 8), ("Bob", 17),
-   *      ("Alice", 10))).toDF("name", "age")
-   *    val fractions = Map(Row("Alice", 10) -> 0.3, Row("Nico", 8) -> 1.0)
-   *    df.stat.sampleBy(struct($"name", $"age"), fractions, 36L).show()
-   *    +-----+---+
-   *    | name|age|
-   *    +-----+---+
-   *    | Nico|  8|
-   *    |Alice| 10|
-   *    +-----+---+
-   * }}}
-   *
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def sampleBy[T](col: Column, fractions: Map[T, Double], seed: Long): DataFrame = withOrigin {
     require(fractions.values.forall(p => p >= 0.0 && p <= 1.0),
       s"Fractions must be in [0, 1], but got $fractions.")
@@ -427,148 +114,8 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) {
     df.filter(f(col, r))
   }
 
-  /**
-   * (Java-specific) Returns a stratified sample without replacement based on the fraction given
-   * on each stratum.
-   * @param col column that defines strata
-   * @param fractions sampling fraction for each stratum. If a stratum is not specified, we treat
-   *                  its fraction as zero.
-   * @param seed random seed
-   * @tparam T stratum type
-   * @return a new `DataFrame` that represents the stratified sample
-   *
-   * @since 3.0.0
-   */
-  def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
-    sampleBy(col, fractions.asScala.toMap.asInstanceOf[Map[T, Double]], seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param colName name of the column over which the sketch is built
-   * @param depth depth of the sketch
-   * @param width width of the sketch
-   * @param seed random seed
-   * @return a `CountMinSketch` over column `colName`
-   * @since 2.0.0
-   */
-  def countMinSketch(colName: String, depth: Int, width: Int, seed: Int): CountMinSketch = {
-    countMinSketch(Column(colName), depth, width, seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param colName name of the column over which the sketch is built
-   * @param eps relative error of the sketch
-   * @param confidence confidence of the sketch
-   * @param seed random seed
-   * @return a `CountMinSketch` over column `colName`
-   * @since 2.0.0
-   */
-  def countMinSketch(
-      colName: String, eps: Double, confidence: Double, seed: Int): CountMinSketch = {
-    countMinSketch(Column(colName), eps, confidence, seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param col the column over which the sketch is built
-   * @param depth depth of the sketch
-   * @param width width of the sketch
-   * @param seed random seed
-   * @return a `CountMinSketch` over column `colName`
-   * @since 2.0.0
-   */
-  def countMinSketch(col: Column, depth: Int, width: Int, seed: Int): CountMinSketch = {
-    val eps = 2.0 / width
-    val confidence = 1 - 1 / Math.pow(2, depth)
-    countMinSketch(col, eps, confidence, seed)
-  }
-
-  /**
-   * Builds a Count-min Sketch over a specified column.
-   *
-   * @param col the column over which the sketch is built
-   * @param eps relative error of the sketch
-   * @param confidence confidence of the sketch
-   * @param seed random seed
-   * @return a `CountMinSketch` over column `colName`
-   * @since 2.0.0
-   */
-  def countMinSketch(
-      col: Column,
-      eps: Double,
-      confidence: Double,
-      seed: Int): CountMinSketch = withOrigin {
-    val countMinSketchAgg = new CountMinSketchAgg(
-      col.expr,
-      Literal(eps, DoubleType),
-      Literal(confidence, DoubleType),
-      Literal(seed, IntegerType)
-    )
-    val bytes = df.select(
-      Column(countMinSketchAgg.toAggregateExpression(false))
-    ).head().getAs[Array[Byte]](0)
-    countMinSketchAgg.deserialize(bytes)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param colName name of the column over which the filter is built
-   * @param expectedNumItems expected number of items which will be put into the filter.
-   * @param fpp expected false positive probability of the filter.
-   * @since 2.0.0
-   */
-  def bloomFilter(colName: String, expectedNumItems: Long, fpp: Double): BloomFilter = {
-    bloomFilter(Column(colName), expectedNumItems, fpp)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param col the column over which the filter is built
-   * @param expectedNumItems expected number of items which will be put into the filter.
-   * @param fpp expected false positive probability of the filter.
-   * @since 2.0.0
-   */
-  def bloomFilter(col: Column, expectedNumItems: Long, fpp: Double): BloomFilter = {
-    val numBits = BloomFilter.optimalNumOfBits(expectedNumItems, fpp)
-    bloomFilter(col, expectedNumItems, numBits)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param colName name of the column over which the filter is built
-   * @param expectedNumItems expected number of items which will be put into the filter.
-   * @param numBits expected number of bits of the filter.
-   * @since 2.0.0
-   */
-  def bloomFilter(colName: String, expectedNumItems: Long, numBits: Long): BloomFilter = {
-    bloomFilter(Column(colName), expectedNumItems, numBits)
-  }
-
-  /**
-   * Builds a Bloom filter over a specified column.
-   *
-   * @param col the column over which the filter is built
-   * @param expectedNumItems expected number of items which will be put into the filter.
-   * @param numBits expected number of bits of the filter.
-   * @since 2.0.0
-   */
-  def bloomFilter(col: Column, expectedNumItems: Long, numBits: Long): BloomFilter = withOrigin {
-    val bloomFilterAgg = new BloomFilterAggregate(
-      col.expr,
-      Literal(expectedNumItems, LongType),
-      Literal(numBits, LongType)
-    )
-    val bytes = df.select(
-      Column(bloomFilterAgg.toAggregateExpression(false))
-    ).head().getAs[Array[Byte]](0)
-    bloomFilterAgg.deserialize(bytes)
+  /** @inheritdoc */
+  override def sampleBy[T](col: Column, fractions: ju.Map[T, jl.Double], seed: Long): DataFrame = {
+    super.sampleBy(col, fractions, seed)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
deleted file mode 100644
index b68a13ba21590..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriterV2.scala
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.collection.mutable
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchTableException, TableAlreadyExistsException, UnresolvedIdentifier, UnresolvedRelation}
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Bucket, Days, Hours, Literal, Months, Years}
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, LogicalPlan, OptionList, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, UnresolvedTableSpec}
-import org.apache.spark.sql.connector.expressions.{LogicalExpressions, NamedReference, Transform}
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.types.IntegerType
-
-/**
- * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2 API.
- *
- * @since 3.0.0
- */
-@Experimental
-final class DataFrameWriterV2[T] private[sql](table: String, ds: Dataset[T])
-    extends CreateTableWriter[T] {
-
-  private val df: DataFrame = ds.toDF()
-
-  private val sparkSession = ds.sparkSession
-
-  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
-
-  private val logicalPlan = df.queryExecution.logical
-
-  private var provider: Option[String] = None
-
-  private val options = new mutable.HashMap[String, String]()
-
-  private val properties = new mutable.HashMap[String, String]()
-
-  private var partitioning: Option[Seq[Transform]] = None
-
-  override def using(provider: String): CreateTableWriter[T] = {
-    this.provider = Some(provider)
-    this
-  }
-
-  override def option(key: String, value: String): DataFrameWriterV2[T] = {
-    this.options.put(key, value)
-    this
-  }
-
-  override def options(options: scala.collection.Map[String, String]): DataFrameWriterV2[T] = {
-    options.foreach {
-      case (key, value) =>
-        this.options.put(key, value)
-    }
-    this
-  }
-
-  override def options(options: java.util.Map[String, String]): DataFrameWriterV2[T] = {
-    this.options(options.asScala)
-    this
-  }
-
-  override def tableProperty(property: String, value: String): CreateTableWriter[T] = {
-    this.properties.put(property, value)
-    this
-  }
-
-  @scala.annotation.varargs
-  override def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T] = {
-    def ref(name: String): NamedReference = LogicalExpressions.parseReference(name)
-
-    val asTransforms = (column +: columns).map(_.expr).map {
-      case Years(attr: Attribute) =>
-        LogicalExpressions.years(ref(attr.name))
-      case Months(attr: Attribute) =>
-        LogicalExpressions.months(ref(attr.name))
-      case Days(attr: Attribute) =>
-        LogicalExpressions.days(ref(attr.name))
-      case Hours(attr: Attribute) =>
-        LogicalExpressions.hours(ref(attr.name))
-      case Bucket(Literal(numBuckets: Int, IntegerType), attr: Attribute) =>
-        LogicalExpressions.bucket(numBuckets, Array(ref(attr.name)))
-      case attr: Attribute =>
-        LogicalExpressions.identity(ref(attr.name))
-      case expr =>
-        throw QueryCompilationErrors.invalidPartitionTransformationError(expr)
-    }
-
-    this.partitioning = Some(asTransforms)
-    this
-  }
-
-  override def create(): Unit = {
-    val tableSpec = UnresolvedTableSpec(
-      properties = properties.toMap,
-      provider = provider,
-      optionExpression = OptionList(Seq.empty),
-      location = None,
-      comment = None,
-      serde = None,
-      external = false)
-    runCommand(
-      CreateTableAsSelect(
-        UnresolvedIdentifier(tableName),
-        partitioning.getOrElse(Seq.empty),
-        logicalPlan,
-        tableSpec,
-        options.toMap,
-        false))
-  }
-
-  override def replace(): Unit = {
-    internalReplace(orCreate = false)
-  }
-
-  override def createOrReplace(): Unit = {
-    internalReplace(orCreate = true)
-  }
-
-
-  /**
-   * Append the contents of the data frame to the output table.
-   *
-   * If the output table does not exist, this operation will fail with
-   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
-   * validated to ensure it is compatible with the existing table.
-   *
-   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
-   */
-  @throws(classOf[NoSuchTableException])
-  def append(): Unit = {
-    val append = AppendData.byName(UnresolvedRelation(tableName), logicalPlan, options.toMap)
-    runCommand(append)
-  }
-
-  /**
-   * Overwrite rows matching the given filter condition with the contents of the data frame in
-   * the output table.
-   *
-   * If the output table does not exist, this operation will fail with
-   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]].
-   * The data frame will be validated to ensure it is compatible with the existing table.
-   *
-   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
-   */
-  @throws(classOf[NoSuchTableException])
-  def overwrite(condition: Column): Unit = {
-    val overwrite = OverwriteByExpression.byName(
-      UnresolvedRelation(tableName), logicalPlan, condition.expr, options.toMap)
-    runCommand(overwrite)
-  }
-
-  /**
-   * Overwrite all partition for which the data frame contains at least one row with the contents
-   * of the data frame in the output table.
-   *
-   * This operation is equivalent to Hive's `INSERT OVERWRITE ... PARTITION`, which replaces
-   * partitions dynamically depending on the contents of the data frame.
-   *
-   * If the output table does not exist, this operation will fail with
-   * [[org.apache.spark.sql.catalyst.analysis.NoSuchTableException]]. The data frame will be
-   * validated to ensure it is compatible with the existing table.
-   *
-   * @throws org.apache.spark.sql.catalyst.analysis.NoSuchTableException If the table does not exist
-   */
-  @throws(classOf[NoSuchTableException])
-  def overwritePartitions(): Unit = {
-    val dynamicOverwrite = OverwritePartitionsDynamic.byName(
-      UnresolvedRelation(tableName), logicalPlan, options.toMap)
-    runCommand(dynamicOverwrite)
-  }
-
-  /**
-   * Wrap an action to track the QueryExecution and time cost, then report to the user-registered
-   * callback functions.
-   */
-  private def runCommand(command: LogicalPlan): Unit = {
-    val qe = new QueryExecution(sparkSession, command, df.queryExecution.tracker)
-    qe.assertCommandExecuted()
-  }
-
-  private def internalReplace(orCreate: Boolean): Unit = {
-    val tableSpec = UnresolvedTableSpec(
-      properties = properties.toMap,
-      provider = provider,
-      optionExpression = OptionList(Seq.empty),
-      location = None,
-      comment = None,
-      serde = None,
-      external = false)
-    runCommand(ReplaceTableAsSelect(
-      UnresolvedIdentifier(tableName),
-      partitioning.getOrElse(Seq.empty),
-      logicalPlan,
-      tableSpec,
-      writeOptions = options.toMap,
-      orCreate = orCreate))
-  }
-}
-
-/**
- * Configuration methods common to create/replace operations and insert/overwrite operations.
- * @tparam R builder type to return
- * @since 3.0.0
- */
-trait WriteConfigMethods[R] {
-  /**
-   * Add a write option.
-   *
-   * @since 3.0.0
-   */
-  def option(key: String, value: String): R
-
-  /**
-   * Add a boolean output option.
-   *
-   * @since 3.0.0
-   */
-  def option(key: String, value: Boolean): R = option(key, value.toString)
-
-  /**
-   * Add a long output option.
-   *
-   * @since 3.0.0
-   */
-  def option(key: String, value: Long): R = option(key, value.toString)
-
-  /**
-   * Add a double output option.
-   *
-   * @since 3.0.0
-   */
-  def option(key: String, value: Double): R = option(key, value.toString)
-
-  /**
-   * Add write options from a Scala Map.
-   *
-   * @since 3.0.0
-   */
-  def options(options: scala.collection.Map[String, String]): R
-
-  /**
-   * Add write options from a Java Map.
-   *
-   * @since 3.0.0
-   */
-  def options(options: java.util.Map[String, String]): R
-}
-
-/**
- * Trait to restrict calls to create and replace operations.
- *
- * @since 3.0.0
- */
-trait CreateTableWriter[T] extends WriteConfigMethods[CreateTableWriter[T]] {
-  /**
-   * Create a new table from the contents of the data frame.
-   *
-   * The new table's schema, partition layout, properties, and other configuration will be
-   * based on the configuration set on this writer.
-   *
-   * If the output table exists, this operation will fail with
-   * [[org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException]].
-   *
-   * @throws org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
-   *         If the table already exists
-   */
-  @throws(classOf[TableAlreadyExistsException])
-  def create(): Unit
-
-  /**
-   * Replace an existing table with the contents of the data frame.
-   *
-   * The existing table's schema, partition layout, properties, and other configuration will be
-   * replaced with the contents of the data frame and the configuration set on this writer.
-   *
-   * If the output table does not exist, this operation will fail with
-   * [[org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException]].
-   *
-   * @throws org.apache.spark.sql.catalyst.analysis.CannotReplaceMissingTableException
-   *         If the table does not exist
-   */
-  @throws(classOf[CannotReplaceMissingTableException])
-  def replace(): Unit
-
-  /**
-   * Create a new table or replace an existing table with the contents of the data frame.
-   *
-   * The output table's schema, partition layout, properties, and other configuration will be based
-   * on the contents of the data frame and the configuration set on this writer. If the table
-   * exists, its configuration and data will be replaced.
-   */
-  def createOrReplace(): Unit
-
-  /**
-   * Partition the output table created by `create`, `createOrReplace`, or `replace` using
-   * the given columns or transforms.
-   *
-   * When specified, the table data will be stored by these values for efficient reads.
-   *
-   * For example, when a table is partitioned by day, it may be stored in a directory layout like:
-   * <ul>
-   * <li>`table/day=2019-06-01/`</li>
-   * <li>`table/day=2019-06-02/`</li>
-   * </ul>
-   *
-   * Partitioning is one of the most widely used techniques to optimize physical data layout.
-   * It provides a coarse-grained index for skipping unnecessary data reads when queries have
-   * predicates on the partitioned columns. In order for partitioning to work well, the number
-   * of distinct values in each column should typically be less than tens of thousands.
-   *
-   * @since 3.0.0
-   */
-  def partitionedBy(column: Column, columns: Column*): CreateTableWriter[T]
-
-  /**
-   * Specifies a provider for the underlying output data source. Spark's default catalog supports
-   * "parquet", "json", etc.
-   *
-   * @since 3.0.0
-   */
-  def using(provider: String): CreateTableWriter[T]
-
-  /**
-   * Add a table property.
-   */
-  def tableProperty(property: String, value: String): CreateTableWriter[T]
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
index 8ffdbb952b082..9d763edb079e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/DataSourceRegistration.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.internal.SQLConf
  * Use `SparkSession.dataSource` to access this.
  */
 @Evolving
-class DataSourceRegistration private[sql] (dataSourceManager: DataSourceManager)
+private[sql] class DataSourceRegistration private[sql] (dataSourceManager: DataSourceManager)
   extends Logging {
 
   protected[sql] def registerPython(
@@ -68,7 +68,7 @@ class DataSourceRegistration private[sql] (dataSourceManager: DataSourceManager)
       DataSource.lookupDataSource(name, SQLConf.get)
       throw QueryCompilationErrors.dataSourceAlreadyExists(name)
     } catch {
-      case e: SparkClassNotFoundException if e.getErrorClass == "DATA_SOURCE_NOT_FOUND" => // OK
+      case e: SparkClassNotFoundException if e.getCondition == "DATA_SOURCE_NOT_FOUND" => // OK
       case _: Throwable =>
         // If there are other errors when resolving the data source, it's unclear whether
         // it's safe to proceed. To prevent potential lookup errors, treat it as an existing
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index c7511737b2b3f..a74d93b44db98 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -18,8 +18,8 @@
 package org.apache.spark.sql
 
 import java.io.{ByteArrayOutputStream, CharArrayWriter, DataOutputStream}
+import java.util
 
-import scala.annotation.varargs
 import scala.collection.mutable.{ArrayBuffer, HashSet}
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -42,7 +42,8 @@ import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, Query
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.encoders._
-import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, ProductEncoder, StructEncoder}
+import org.apache.spark.sql.catalyst.expressions.{ScalarSubquery => ScalarSubqueryExpr, _}
 import org.apache.spark.sql.catalyst.json.{JacksonGenerator, JSONOptions}
 import org.apache.spark.sql.catalyst.parser.{ParseException, ParserUtils}
 import org.apache.spark.sql.catalyst.plans._
@@ -51,16 +52,18 @@ import org.apache.spark.sql.catalyst.trees.{TreeNodeTag, TreePattern}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, IntervalUtils}
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 import org.apache.spark.sql.execution.arrow.{ArrowBatchStreamWriter, ArrowConverters}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.LogicalRelationWithTable
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, DataSourceV2ScanRelation, FileTable}
 import org.apache.spark.sql.execution.python.EvaluatePython
 import org.apache.spark.sql.execution.stat.StatFunctions
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{DataFrameWriterImpl, DataFrameWriterV2Impl, ExpressionColumnNode, MergeIntoWriterImpl, SQLConf}
+import org.apache.spark.sql.internal.TypedAggUtils.withInputType
 import org.apache.spark.sql.streaming.DataStreamWriter
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.SchemaUtils
@@ -76,13 +79,14 @@ private[sql] object Dataset {
   val DATASET_ID_TAG = TreeNodeTag[HashSet[Long]]("dataset_id")
 
   def apply[T: Encoder](sparkSession: SparkSession, logicalPlan: LogicalPlan): Dataset[T] = {
-    val dataset = new Dataset(sparkSession, logicalPlan, implicitly[Encoder[T]])
+    val encoder = implicitly[Encoder[T]]
+    val dataset = new Dataset(sparkSession, logicalPlan, encoder)
     // Eagerly bind the encoder so we verify that the encoder matches the underlying
     // schema. The user will get an error if this is not the case.
     // optimization: it is guaranteed that [[InternalRow]] can be converted to [[Row]] so
     // do not do this check in that case. this check can be expensive since it requires running
     // the whole [[Analyzer]] to resolve the deserializer
-    if (dataset.exprEnc.clsTag.runtimeClass != classOf[Row]) {
+    if (dataset.encoder.clsTag.runtimeClass != classOf[Row]) {
       dataset.resolvedEnc
     }
     dataset
@@ -91,9 +95,9 @@ private[sql] object Dataset {
   def ofRows(sparkSession: SparkSession, logicalPlan: LogicalPlan): DataFrame =
     sparkSession.withActive {
       val qe = sparkSession.sessionState.executePlan(logicalPlan)
-      qe.assertAnalyzed()
-      new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
-  }
+      if (!qe.isLazyAnalysis) qe.assertAnalyzed()
+      new Dataset[Row](qe, () => RowEncoder.encoderFor(qe.analyzed.schema))
+    }
 
   def ofRows(
       sparkSession: SparkSession,
@@ -102,8 +106,8 @@ private[sql] object Dataset {
     sparkSession.withActive {
       val qe = new QueryExecution(
         sparkSession, logicalPlan, shuffleCleanupMode = shuffleCleanupMode)
-      qe.assertAnalyzed()
-      new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
+      if (!qe.isLazyAnalysis) qe.assertAnalyzed()
+      new Dataset[Row](qe, () => RowEncoder.encoderFor(qe.analyzed.schema))
     }
 
   /** A variant of ofRows that allows passing in a tracker so we can track query parsing time. */
@@ -115,8 +119,8 @@ private[sql] object Dataset {
     : DataFrame = sparkSession.withActive {
     val qe = new QueryExecution(
       sparkSession, logicalPlan, tracker, shuffleCleanupMode = shuffleCleanupMode)
-    qe.assertAnalyzed()
-    new Dataset[Row](qe, ExpressionEncoder(qe.analyzed.schema))
+    if (!qe.isLazyAnalysis) qe.assertAnalyzed()
+    new Dataset[Row](qe, () => RowEncoder.encoderFor(qe.analyzed.schema))
   }
 }
 
@@ -210,8 +214,9 @@ private[sql] object Dataset {
 @Stable
 class Dataset[T] private[sql](
     @DeveloperApi @Unstable @transient val queryExecution: QueryExecution,
-    @DeveloperApi @Unstable @transient val encoder: Encoder[T])
-  extends Serializable {
+    @transient encoderGenerator: () => Encoder[T])
+  extends api.Dataset[T] {
+  type DS[U] = Dataset[U]
 
   @transient lazy val sparkSession: SparkSession = {
     if (queryExecution == null || queryExecution.sparkSession == null) {
@@ -220,14 +225,22 @@ class Dataset[T] private[sql](
     queryExecution.sparkSession
   }
 
+  import sparkSession.RichColumn
+
   // A globally unique id of this Dataset.
   private[sql] val id = Dataset.curId.getAndIncrement()
 
-  queryExecution.assertAnalyzed()
+  if (!queryExecution.isLazyAnalysis) {
+    queryExecution.assertAnalyzed()
+  }
 
   // Note for Spark contributors: if adding or updating any action in `Dataset`, please make sure
   // you wrap it with `withNewExecutionId` if this actions doesn't call other action.
 
+  private[sql] def this(queryExecution: QueryExecution, encoder: Encoder[T]) = {
+    this(queryExecution, () => encoder)
+  }
+
   def this(sparkSession: SparkSession, logicalPlan: LogicalPlan, encoder: Encoder[T]) = {
     this(sparkSession.sessionState.executePlan(logicalPlan), encoder)
   }
@@ -237,30 +250,41 @@ class Dataset[T] private[sql](
   }
 
   @transient private[sql] val logicalPlan: LogicalPlan = {
-    val plan = queryExecution.commandExecuted
-    if (sparkSession.conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
-      val dsIds = plan.getTagValue(Dataset.DATASET_ID_TAG).getOrElse(new HashSet[Long])
-      dsIds.add(id)
-      plan.setTagValue(Dataset.DATASET_ID_TAG, dsIds)
+    if (queryExecution.isLazyAnalysis) {
+      queryExecution.logical
+    } else {
+      val plan = queryExecution.commandExecuted
+      if (sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED)) {
+        val dsIds = plan.getTagValue(Dataset.DATASET_ID_TAG).getOrElse(new HashSet[Long])
+        dsIds.add(id)
+        plan.setTagValue(Dataset.DATASET_ID_TAG, dsIds)
+      }
+      plan
     }
-    plan
   }
 
+  @DeveloperApi @Unstable @transient lazy val encoder: Encoder[T] = encoderGenerator()
+
+  /**
+   * Expose the encoder as implicit so it can be used to construct new Dataset objects that have
+   * the same external type.
+   */
+  private implicit def encoderImpl: Encoder[T] = encoder
+
   /**
-   * Currently [[ExpressionEncoder]] is the only implementation of [[Encoder]], here we turn the
-   * passed in encoder to [[ExpressionEncoder]] explicitly, and mark it implicit so that we can use
-   * it when constructing new Dataset objects that have the same object type (that will be
-   * possibly resolved to a different schema).
+   * The actual [[ExpressionEncoder]] used by the dataset. This and its resolved counterpart should
+   * only be used for actual (de)serialization, the binding of Aggregator inputs, and in the rare
+   * cases where a plan needs to be constructed with an ExpressionEncoder.
    */
-  private[sql] implicit val exprEnc: ExpressionEncoder[T] = encoderFor(encoder)
+  private[sql] lazy val exprEnc: ExpressionEncoder[T] = encoderFor(encoder)
 
   // The resolved `ExpressionEncoder` which can be used to turn rows to objects of type T, after
   // collecting rows to the driver side.
-  private lazy val resolvedEnc = {
-    exprEnc.resolveAndBind(logicalPlan.output, sparkSession.sessionState.analyzer)
-  }
+  private lazy val resolvedEnc = exprEnc.resolveAndBind(
+    queryExecution.commandExecuted.output, sparkSession.sessionState.analyzer)
+
 
-  private implicit def classTag: ClassTag[T] = exprEnc.clsTag
+  private implicit def classTag: ClassTag[T] = encoder.clsTag
 
   // sqlContext must be val because a stable identifier is expected when you import implicits
   @transient lazy val sqlContext: SQLContext = sparkSession.sqlContext
@@ -399,11 +423,11 @@ class Dataset[T] private[sql](
     // Print a footer
     if (vertical && rows.tail.isEmpty) {
       // In a vertical mode, print an empty row set explicitly
-      sb.append("(0 rows)\n")
+      sb.append("(0 rows)")
     } else if (hasMoreData) {
       // For Data that has more than "numRows" records
       val rowsString = if (numRows == 1) "row" else "rows"
-      sb.append(s"only showing top $numRows $rowsString\n")
+      sb.append(s"only showing top $numRows $rowsString")
     }
 
     sb.toString()
@@ -449,8 +473,8 @@ class Dataset[T] private[sql](
   override def toString: String = {
     try {
       val builder = new StringBuilder
-      val fields = schema.take(2).map {
-        case f => s"${f.name}: ${f.dataType.simpleString(2)}"
+      val fields = schema.take(2).map { f =>
+        s"${f.name}: ${f.dataType.simpleString(2)}"
       }
       builder.append("[")
       builder.append(fields.mkString(", "))
@@ -468,79 +492,21 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Converts this strongly typed collection of data to generic Dataframe. In contrast to the
-   * strongly typed objects that Dataset operations work on, a Dataframe returns generic [[Row]]
-   * objects that allow fields to be accessed by ordinal or name.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   // This is declared with parentheses to prevent the Scala compiler from treating
   // `ds.toDF("1")` as invoking this toDF and then apply on the returned DataFrame.
-  def toDF(): DataFrame = new Dataset[Row](queryExecution, ExpressionEncoder(schema))
+  def toDF(): DataFrame = new Dataset[Row](queryExecution, RowEncoder.encoderFor(schema))
 
-  /**
-   * Returns a new Dataset where each record has been mapped on to the specified type. The
-   * method used to map columns depend on the type of `U`:
-   * <ul>
-   *   <li>When `U` is a class, fields for the class will be mapped to columns of the same name
-   *   (case sensitivity is determined by `spark.sql.caseSensitive`).</li>
-   *   <li>When `U` is a tuple, the columns will be mapped by ordinal (i.e. the first column will
-   *   be assigned to `_1`).</li>
-   *   <li>When `U` is a primitive type (i.e. String, Int, etc), then the first column of the
-   *   `DataFrame` will be used.</li>
-   * </ul>
-   *
-   * If the schema of the Dataset does not match the desired `U` type, you can use `select`
-   * along with `alias` or `as` to rearrange or rename as required.
-   *
-   * Note that `as[]` only changes the view of the data that is passed into typed operations,
-   * such as `map()`, and does not eagerly project away any columns that are not present in
-   * the specified class.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def as[U : Encoder]: Dataset[U] = Dataset[U](sparkSession, logicalPlan)
 
-  /**
-   * Returns a new DataFrame where each row is reconciled to match the specified schema. Spark will:
-   * <ul>
-   *   <li>Reorder columns and/or inner fields by name to match the specified schema.</li>
-   *   <li>Project away columns and/or inner fields that are not needed by the specified schema.
-   *   Missing columns and/or inner fields (present in the specified schema but not input DataFrame)
-   *   lead to failures.</li>
-   *   <li>Cast the columns and/or inner fields to match the data types in the specified schema, if
-   *   the types are compatible, e.g., numeric to numeric (error if overflows), but not string to
-   *   int.</li>
-   *   <li>Carry over the metadata from the specified schema, while the columns and/or inner fields
-   *   still keep their own metadata if not overwritten by the specified schema.</li>
-   *   <li>Fail if the nullability is not compatible. For example, the column and/or inner field is
-   *   nullable but the specified schema requires them to be not nullable.</li>
-   * </ul>
-   *
-   * @group basic
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def to(schema: StructType): DataFrame = withPlan {
     val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
     Project.matchSchema(logicalPlan, replaced, sparkSession.sessionState.conf)
   }
 
-  /**
-   * Converts this strongly typed collection of data to generic `DataFrame` with columns renamed.
-   * This can be quite convenient in conversion from an RDD of tuples into a `DataFrame` with
-   * meaningful names. For example:
-   * {{{
-   *   val rdd: RDD[(Int, String)] = ...
-   *   rdd.toDF()  // this implicit conversion creates a DataFrame with column name `_1` and `_2`
-   *   rdd.toDF("id", "name")  // this creates a DataFrame with column name "id" and "name"
-   * }}}
-   *
-   * @group basic
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def toDF(colNames: String*): DataFrame = {
     require(schema.size == colNames.size,
@@ -554,50 +520,12 @@ class Dataset[T] private[sql](
     select(newCols : _*)
   }
 
-  /**
-   * Returns the schema of this Dataset.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def schema: StructType = sparkSession.withActive {
     queryExecution.analyzed.schema
   }
 
-  /**
-   * Prints the schema to the console in a nice tree format.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def printSchema(): Unit = printSchema(Int.MaxValue)
-
-  // scalastyle:off println
-  /**
-   * Prints the schema up to the given level to the console in a nice tree format.
-   *
-   * @group basic
-   * @since 3.0.0
-   */
-  def printSchema(level: Int): Unit = println(schema.treeString(level))
-  // scalastyle:on println
-
-  /**
-   * Prints the plans (logical and physical) with a format specified by a given explain mode.
-   *
-   * @param mode specifies the expected output format of plans.
-   *             <ul>
-   *               <li>`simple` Print only a physical plan.</li>
-   *               <li>`extended`: Print both logical and physical plans.</li>
-   *               <li>`codegen`: Print a physical plan and generated codes if they are
-   *                 available.</li>
-   *               <li>`cost`: Print a logical plan and statistics if they are available.</li>
-   *               <li>`formatted`: Split explain output into two sections: a physical plan outline
-   *                 and node details.</li>
-   *             </ul>
-   * @group basic
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def explain(mode: String): Unit = sparkSession.withActive {
     // Because temporary views are resolved during analysis when we create a Dataset, and
     // `ExplainCommand` analyzes input query plan and resolves temporary views again. Using
@@ -609,158 +537,32 @@ class Dataset[T] private[sql](
     // scalastyle:on println
   }
 
-  /**
-   * Prints the plans (logical and physical) to the console for debugging purposes.
-   *
-   * @param extended default `false`. If `false`, prints only the physical plan.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def explain(extended: Boolean): Unit = if (extended) {
-    explain(ExtendedMode.name)
-  } else {
-    explain(SimpleMode.name)
-  }
-
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def explain(): Unit = explain(SimpleMode.name)
-
-  /**
-   * Returns all column names and their data types as an array.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def dtypes: Array[(String, String)] = schema.fields.map { field =>
-    (field.name, field.dataType.toString)
-  }
-
-  /**
-   * Returns all column names as an array.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def columns: Array[String] = schema.fields.map(_.name)
-
-  /**
-   * Returns true if the `collect` and `take` methods can be run locally
-   * (without any Spark executors).
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def isLocal: Boolean = logicalPlan.isInstanceOf[LocalRelation] ||
     logicalPlan.isInstanceOf[CommandResult]
 
-  /**
-   * Returns true if the `Dataset` is empty.
-   *
-   * @group basic
-   * @since 2.4.0
-   */
+  /** @inheritdoc */
   def isEmpty: Boolean = withAction("isEmpty",
       commandResultOptimized.select().limit(1).queryExecution) { plan =>
     plan.executeTake(1).isEmpty
   }
 
-  /**
-   * Returns true if this Dataset contains one or more sources that continuously
-   * return data as it arrives. A Dataset that reads data from a streaming source
-   * must be executed as a `StreamingQuery` using the `start()` method in
-   * `DataStreamWriter`. Methods that return a single answer, e.g. `count()` or
-   * `collect()`, will throw an [[AnalysisException]] when there is a streaming
-   * source present.
-   *
-   * @group streaming
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def isStreaming: Boolean = logicalPlan.isStreaming
 
-  /**
-   * Eagerly checkpoint a Dataset and return the new Dataset. Checkpointing can be used to truncate
-   * the logical plan of this Dataset, which is especially useful in iterative algorithms where the
-   * plan may grow exponentially. It will be saved to files inside the checkpoint
-   * directory set with `SparkContext#setCheckpointDir`.
-   *
-   * @group basic
-   * @since 2.1.0
-   */
-  def checkpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = true)
-
-  /**
-   * Returns a checkpointed version of this Dataset. Checkpointing can be used to truncate the
-   * logical plan of this Dataset, which is especially useful in iterative algorithms where the
-   * plan may grow exponentially. It will be saved to files inside the checkpoint
-   * directory set with `SparkContext#setCheckpointDir`.
-   *
-   * @param eager Whether to checkpoint this dataframe immediately
-   *
-   * @note When checkpoint is used with eager = false, the final data that is checkpointed after
-   *       the first action may be different from the data that was used during the job due to
-   *       non-determinism of the underlying operation and retries. If checkpoint is used to achieve
-   *       saving a deterministic snapshot of the data, eager = true should be used. Otherwise,
-   *       it is only deterministic after the first execution, after the checkpoint was finalized.
-   *
-   * @group basic
-   * @since 2.1.0
-   */
-  def checkpoint(eager: Boolean): Dataset[T] = checkpoint(eager = eager, reliableCheckpoint = true)
-
-  /**
-   * Eagerly locally checkpoints a Dataset and return the new Dataset. Checkpointing can be
-   * used to truncate the logical plan of this Dataset, which is especially useful in iterative
-   * algorithms where the plan may grow exponentially. Local checkpoints are written to executor
-   * storage and despite potentially faster they are unreliable and may compromise job completion.
-   *
-   * @group basic
-   * @since 2.3.0
-   */
-  def localCheckpoint(): Dataset[T] = checkpoint(eager = true, reliableCheckpoint = false)
-
-  /**
-   * Locally checkpoints a Dataset and return the new Dataset. Checkpointing can be used to truncate
-   * the logical plan of this Dataset, which is especially useful in iterative algorithms where the
-   * plan may grow exponentially. Local checkpoints are written to executor storage and despite
-   * potentially faster they are unreliable and may compromise job completion.
-   *
-   * @param eager Whether to checkpoint this dataframe immediately
-   *
-   * @note When checkpoint is used with eager = false, the final data that is checkpointed after
-   *       the first action may be different from the data that was used during the job due to
-   *       non-determinism of the underlying operation and retries. If checkpoint is used to achieve
-   *       saving a deterministic snapshot of the data, eager = true should be used. Otherwise,
-   *       it is only deterministic after the first execution, after the checkpoint was finalized.
-   *
-   * @group basic
-   * @since 2.3.0
-   */
-  def localCheckpoint(eager: Boolean): Dataset[T] = checkpoint(
-    eager = eager,
-    reliableCheckpoint = false
-  )
-
-  /**
-   * Returns a checkpointed version of this Dataset.
-   *
-   * @param eager Whether to checkpoint this dataframe immediately
-   * @param reliableCheckpoint Whether to create a reliable checkpoint saved to files inside the
-   *                           checkpoint directory. If false creates a local checkpoint using
-   *                           the caching subsystem
-   */
-  private[sql] def checkpoint(eager: Boolean, reliableCheckpoint: Boolean): Dataset[T] = {
+  /** @inheritdoc */
+  protected[sql] def checkpoint(
+      eager: Boolean,
+      reliableCheckpoint: Boolean,
+      storageLevel: Option[StorageLevel]): Dataset[T] = {
     val actionName = if (reliableCheckpoint) "checkpoint" else "localCheckpoint"
     withAction(actionName, queryExecution) { physicalPlan =>
       val internalRdd = physicalPlan.execute().map(_.copy())
       if (reliableCheckpoint) {
+        assert(storageLevel.isEmpty, "StorageLevel should not be defined for reliableCheckpoint")
         internalRdd.checkpoint()
       } else {
+        storageLevel.foreach(storageLevel => internalRdd.persist(storageLevel))
         internalRdd.localCheckpoint()
       }
 
@@ -768,38 +570,13 @@ class Dataset[T] private[sql](
         internalRdd.doCheckpoint()
       }
 
-      Dataset.ofRows(
-        sparkSession,
+      withTypedPlan[T] {
         LogicalRDD.fromDataset(rdd = internalRdd, originDataset = this, isStreaming = isStreaming)
-      ).as[T]
+      }
     }
   }
 
-  /**
-   * Defines an event time watermark for this [[Dataset]]. A watermark tracks a point in time
-   * before which we assume no more late data is going to arrive.
-   *
-   * Spark will use this watermark for several purposes:
-   * <ul>
-   *   <li>To know when a given time window aggregation can be finalized and thus can be emitted
-   *   when using output modes that do not allow updates.</li>
-   *   <li>To minimize the amount of state that we need to keep for on-going aggregations,
-   *    `mapGroupsWithState` and `dropDuplicates` operators.</li>
-   * </ul>
-   *  The current watermark is computed by looking at the `MAX(eventTime)` seen across
-   *  all of the partitions in the query minus a user specified `delayThreshold`.  Due to the cost
-   *  of coordinating this value across partitions, the actual watermark used is only guaranteed
-   *  to be at least `delayThreshold` behind the actual event time.  In some cases we may still
-   *  process records that arrive more than `delayThreshold` late.
-   *
-   * @param eventTime the name of the column that contains the event time of the row.
-   * @param delayThreshold the minimum delay to wait to data to arrive late, relative to the latest
-   *                       record that has been processed in the form of an interval
-   *                       (e.g. "1 minute" or "5 hours"). NOTE: This should not be negative.
-   *
-   * @group streaming
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   // We only accept an existing column name, not a derived column here as a watermark that is
   // defined on a derived column cannot referenced elsewhere in the plan.
   def withWatermark(eventTime: String, delayThreshold: String): Dataset[T] = withTypedPlan {
@@ -807,65 +584,11 @@ class Dataset[T] private[sql](
     require(!IntervalUtils.isNegative(parsedDelay),
       s"delay threshold ($delayThreshold) should not be negative.")
     EliminateEventTimeWatermark(
-      EventTimeWatermark(UnresolvedAttribute(eventTime), parsedDelay, logicalPlan))
+      EventTimeWatermark(util.UUID.randomUUID(), UnresolvedAttribute(eventTime),
+        parsedDelay, logicalPlan))
   }
 
-  /**
-   * Displays the Dataset in a tabular form. Strings more than 20 characters will be truncated,
-   * and all cells will be aligned right. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * @param numRows Number of rows to show
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def show(numRows: Int): Unit = show(numRows, truncate = true)
-
-  /**
-   * Displays the top 20 rows of Dataset in a tabular form. Strings more than 20 characters
-   * will be truncated, and all cells will be aligned right.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def show(): Unit = show(20)
-
-  /**
-   * Displays the top 20 rows of Dataset in a tabular form.
-   *
-   * @param truncate Whether truncate long strings. If true, strings more than 20 characters will
-   *                 be truncated and all cells will be aligned right
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def show(truncate: Boolean): Unit = show(20, truncate)
-
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   * @param numRows Number of rows to show
-   * @param truncate Whether truncate long strings. If true, strings more than 20 characters will
-   *              be truncated and all cells will be aligned right
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   // scalastyle:off println
   def show(numRows: Int, truncate: Boolean): Unit = if (truncate) {
     println(showString(numRows, truncate = 20))
@@ -873,246 +596,24 @@ class Dataset[T] private[sql](
     println(showString(numRows, truncate = 0))
   }
 
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * @param numRows Number of rows to show
-   * @param truncate If set to more than 0, truncates strings to `truncate` characters and
-   *                    all cells will be aligned right.
-   * @group action
-   * @since 1.6.0
-   */
-  def show(numRows: Int, truncate: Int): Unit = show(numRows, truncate, vertical = false)
-
-  /**
-   * Displays the Dataset in a tabular form. For example:
-   * {{{
-   *   year  month AVG('Adj Close) MAX('Adj Close)
-   *   1980  12    0.503218        0.595103
-   *   1981  01    0.523289        0.570307
-   *   1982  02    0.436504        0.475256
-   *   1983  03    0.410516        0.442194
-   *   1984  04    0.450090        0.483521
-   * }}}
-   *
-   * If `vertical` enabled, this command prints output rows vertically (one line per column value)?
-   *
-   * {{{
-   * -RECORD 0-------------------
-   *  year            | 1980
-   *  month           | 12
-   *  AVG('Adj Close) | 0.503218
-   *  AVG('Adj Close) | 0.595103
-   * -RECORD 1-------------------
-   *  year            | 1981
-   *  month           | 01
-   *  AVG('Adj Close) | 0.523289
-   *  AVG('Adj Close) | 0.570307
-   * -RECORD 2-------------------
-   *  year            | 1982
-   *  month           | 02
-   *  AVG('Adj Close) | 0.436504
-   *  AVG('Adj Close) | 0.475256
-   * -RECORD 3-------------------
-   *  year            | 1983
-   *  month           | 03
-   *  AVG('Adj Close) | 0.410516
-   *  AVG('Adj Close) | 0.442194
-   * -RECORD 4-------------------
-   *  year            | 1984
-   *  month           | 04
-   *  AVG('Adj Close) | 0.450090
-   *  AVG('Adj Close) | 0.483521
-   * }}}
-   *
-   * @param numRows Number of rows to show
-   * @param truncate If set to more than 0, truncates strings to `truncate` characters and
-   *                    all cells will be aligned right.
-   * @param vertical If set to true, prints output rows vertically (one line per column value).
-   * @group action
-   * @since 2.3.0
-   */
+  /** @inheritdoc */
   // scalastyle:off println
   def show(numRows: Int, truncate: Int, vertical: Boolean): Unit =
     println(showString(numRows, truncate, vertical))
   // scalastyle:on println
 
-  /**
-   * Returns a [[DataFrameNaFunctions]] for working with missing data.
-   * {{{
-   *   // Dropping rows containing any null values.
-   *   ds.na.drop()
-   * }}}
-   *
-   * @group untypedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def na: DataFrameNaFunctions = new DataFrameNaFunctions(toDF())
 
-  /**
-   * Returns a [[DataFrameStatFunctions]] for working statistic functions support.
-   * {{{
-   *   // Finding frequent items in column with name 'a'.
-   *   ds.stat.freqItems(Seq("a"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def stat: DataFrameStatFunctions = new DataFrameStatFunctions(toDF())
 
-  /**
-   * Join with another `DataFrame`.
-   *
-   * Behaves as an INNER JOIN and requires a subsequent join predicate.
-   *
-   * @param right Right side of the join operation.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_]): DataFrame = withPlan {
     Join(logicalPlan, right.logicalPlan, joinType = Inner, None, JoinHint.NONE)
   }
 
-  /**
-   * Inner equi-join with another `DataFrame` using the given column.
-   *
-   * Different from other join functions, the join column will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * {{{
-   *   // Joining df1 and df2 using the column "user_id"
-   *   df1.join(df2, "user_id")
-   * }}}
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumn Name of the column to join on. This column must exist on both sides.
-   *
-   * @note If you perform a self-join using this function without aliasing the input
-   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def join(right: Dataset[_], usingColumn: String): DataFrame = {
-    join(right, Seq(usingColumn))
-  }
-
-  /**
-   * (Java-specific) Inner equi-join with another `DataFrame` using the given columns. See the
-   * Scala-specific overload for more details.
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumns: Array[String]): DataFrame = {
-    join(right, usingColumns.toImmutableArraySeq)
-  }
-
-  /**
-   * (Scala-specific) Inner equi-join with another `DataFrame` using the given columns.
-   *
-   * Different from other join functions, the join columns will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * {{{
-   *   // Joining df1 and df2 using the columns "user_id" and "user_name"
-   *   df1.join(df2, Seq("user_id", "user_name"))
-   * }}}
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   *
-   * @note If you perform a self-join using this function without aliasing the input
-   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame = {
-    join(right, usingColumns, "inner")
-  }
-
-  /**
-   * Equi-join with another `DataFrame` using the given column. A cross join with a predicate
-   * is specified as an inner join. If you would explicitly like to perform a cross join use the
-   * `crossJoin` method.
-   *
-   * Different from other join functions, the join column will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumn Name of the column to join on. This column must exist on both sides.
-   * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
-   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
-   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
-   *
-   * @note If you perform a self-join using this function without aliasing the input
-   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame = {
-    join(right, Seq(usingColumn), joinType)
-  }
-
-  /**
-   * (Java-specific) Equi-join with another `DataFrame` using the given columns. See the
-   * Scala-specific overload for more details.
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
-   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
-   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def join(right: Dataset[_], usingColumns: Array[String], joinType: String): DataFrame = {
-    join(right, usingColumns.toImmutableArraySeq, joinType)
-  }
-
-  /**
-   * (Scala-specific) Equi-join with another `DataFrame` using the given columns. A cross join
-   * with a predicate is specified as an inner join. If you would explicitly like to perform a
-   * cross join use the `crossJoin` method.
-   *
-   * Different from other join functions, the join columns will only appear once in the output,
-   * i.e. similar to SQL's `JOIN USING` syntax.
-   *
-   * @param right Right side of the join operation.
-   * @param usingColumns Names of the columns to join on. This columns must exist on both sides.
-   * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
-   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
-   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
-   *
-   * @note If you perform a self-join using this function without aliasing the input
-   * `DataFrame`s, you will NOT be able to reference any columns after the join, since
-   * there is no way to disambiguate which side of the join you would like to reference.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_], usingColumns: Seq[String], joinType: String): DataFrame = {
     // Analyze the self join. The assumption is that the analyzer will disambiguate left vs right
     // by creating a new instance for one of the branch.
@@ -1130,20 +631,6 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Inner join with another `DataFrame`, using the given join expression.
-   *
-   * {{{
-   *   // The following two are equivalent:
-   *   df1.join(df2, $"df1Key" === $"df2Key")
-   *   df1.join(df2).where($"df1Key" === $"df2Key")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def join(right: Dataset[_], joinExprs: Column): DataFrame = join(right, joinExprs, "inner")
-
   /**
    * find the trivially true predicates and automatically resolves them to both sides.
    */
@@ -1185,71 +672,19 @@ class Dataset[T] private[sql](
     JoinWith.resolveSelfJoinCondition(sparkSession.sessionState.analyzer.resolver, plan)
   }
 
-  /**
-   * Join with another `DataFrame`, using the given join expression. The following performs
-   * a full outer join between `df1` and `df2`.
-   *
-   * {{{
-   *   // Scala:
-   *   import org.apache.spark.sql.functions._
-   *   df1.join(df2, $"df1Key" === $"df2Key", "outer")
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df1.join(df2, col("df1Key").equalTo(col("df2Key")), "outer");
-   * }}}
-   *
-   * @param right Right side of the join.
-   * @param joinExprs Join expression.
-   * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
-   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
-   *                 `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, left_anti`.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def join(right: Dataset[_], joinExprs: Column, joinType: String): DataFrame = {
     withPlan {
       resolveSelfJoinCondition(right, Some(joinExprs), joinType)
     }
   }
 
-  /**
-   * Explicit cartesian join with another `DataFrame`.
-   *
-   * @param right Right side of the join operation.
-   *
-   * @note Cartesian joins are very expensive without an extra filter that can be pushed down.
-   *
-   * @group untypedrel
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   def crossJoin(right: Dataset[_]): DataFrame = withPlan {
     Join(logicalPlan, right.logicalPlan, joinType = Cross, None, JoinHint.NONE)
   }
 
-  /**
-   * Joins this Dataset returning a `Tuple2` for each pair where `condition` evaluates to
-   * true.
-   *
-   * This is similar to the relation `join` function with one important difference in the
-   * result schema. Since `joinWith` preserves objects present on either side of the join, the
-   * result schema is similarly nested into a tuple under the column names `_1` and `_2`.
-   *
-   * This type of join can be useful both for preserving type-safety with the original object
-   * types as well as working with relational data where either side of the join has column
-   * names in common.
-   *
-   * @param other Right side of the join.
-   * @param condition Join expression.
-   * @param joinType Type of join to perform. Default `inner`. Must be one of:
-   *                 `inner`, `cross`, `outer`, `full`, `fullouter`,`full_outer`, `left`,
-   *                 `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def joinWith[U](other: Dataset[U], condition: Column, joinType: String): Dataset[(T, U)] = {
     // Creates a Join node and resolve it first, to get join condition resolved, self-join resolved,
     // etc.
@@ -1261,31 +696,17 @@ class Dataset[T] private[sql](
         Some(condition.expr),
         JoinHint.NONE)).analyzed.asInstanceOf[Join]
 
-    implicit val tuple2Encoder: Encoder[(T, U)] =
-      ExpressionEncoder
-        .tuple(Seq(this.exprEnc, other.exprEnc), useNullSafeDeserializer = true)
-        .asInstanceOf[Encoder[(T, U)]]
-
-    withTypedPlan(JoinWith.typedJoinWith(
+    val leftEncoder = agnosticEncoderFor(encoder)
+    val rightEncoder = agnosticEncoderFor(other.encoder)
+    val joinEncoder = ProductEncoder.tuple(Seq(leftEncoder, rightEncoder), elementsCanBeNull = true)
+      .asInstanceOf[Encoder[(T, U)]]
+    val joinWith = JoinWith.typedJoinWith(
       joined,
       sparkSession.sessionState.conf.dataFrameSelfJoinAutoResolveAmbiguity,
       sparkSession.sessionState.analyzer.resolver,
-      this.exprEnc.isSerializedAsStructForTopLevel,
-      other.exprEnc.isSerializedAsStructForTopLevel))
-  }
-
-  /**
-   * Using inner equi-join to join this Dataset returning a `Tuple2` for each pair
-   * where `condition` evaluates to true.
-   *
-   * @param other Right side of the join.
-   * @param condition Join expression.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] = {
-    joinWith(other, condition, "inner")
+      leftEncoder.isStruct,
+      rightEncoder.isStruct)
+    new Dataset(sparkSession, joinWith, joinEncoder)
   }
 
   // TODO(SPARK-22947): Fix the DataFrame API.
@@ -1298,11 +719,11 @@ class Dataset[T] private[sql](
       tolerance: Column,
       allowExactMatches: Boolean,
       direction: String): DataFrame = {
-    val joinExprs = usingColumns.map { column =>
-      EqualTo(resolve(column), other.resolve(column))
-    }.reduceOption(And).map(Column.apply).orNull
-
-    joinAsOf(other, leftAsOf, rightAsOf, joinExprs, joinType,
+    val joinConditions = usingColumns.map { name =>
+      this(name) === other(name)
+    }
+    val joinCondition = joinConditions.reduceOption(_ && _).orNull
+    joinAsOf(other, leftAsOf, rightAsOf, joinCondition, joinType,
       tolerance, allowExactMatches, direction)
   }
 
@@ -1340,156 +761,33 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset with each partition sorted by the given expressions.
-   *
-   * This is the same operation as "SORT BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] = {
-    sortWithinPartitions((sortCol +: sortCols).map(Column(_)) : _*)
-  }
-
-  /**
-   * Returns a new Dataset with each partition sorted by the given expressions.
-   *
-   * This is the same operation as "SORT BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def sortWithinPartitions(sortExprs: Column*): Dataset[T] = {
-    sortInternal(global = false, sortExprs)
+  def hint(name: String, parameters: Any*): Dataset[T] = withTypedPlan {
+    val exprs = parameters.map {
+      case c: Column => c.expr
+      case s: Symbol => Column(s.name).expr
+      case e: Expression => e
+      case literal => Literal(literal)
+    }
+    UnresolvedHint(name, exprs, logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset sorted by the specified column, all in ascending order.
-   * {{{
-   *   // The following 3 are equivalent
-   *   ds.sort("sortcol")
-   *   ds.sort($"sortcol")
-   *   ds.sort($"sortcol".asc)
-   * }}}
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def sort(sortCol: String, sortCols: String*): Dataset[T] = {
-    sort((sortCol +: sortCols).map(Column(_)) : _*)
+  /** @inheritdoc */
+  def col(colName: String): Column = colName match {
+    case "*" =>
+      Column(ResolvedStar(queryExecution.analyzed.output))
+    case _ =>
+      if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
+        colRegex(colName)
+      } else {
+        Column(addDataFrameIdToCol(resolve(colName)))
+      }
   }
 
-  /**
-   * Returns a new Dataset sorted by the given expressions. For example:
-   * {{{
-   *   ds.sort($"col1", $"col2".desc)
-   * }}}
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def sort(sortExprs: Column*): Dataset[T] = {
-    sortInternal(global = true, sortExprs)
-  }
-
-  /**
-   * Returns a new Dataset sorted by the given expressions.
-   * This is an alias of the `sort` function.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def orderBy(sortCol: String, sortCols: String*): Dataset[T] = sort(sortCol, sortCols : _*)
-
-  /**
-   * Returns a new Dataset sorted by the given expressions.
-   * This is an alias of the `sort` function.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def orderBy(sortExprs: Column*): Dataset[T] = sort(sortExprs : _*)
-
-  /**
-   * Selects column based on the column name and returns it as a [[Column]].
-   *
-   * @note The column name can also reference to a nested column like `a.b`.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def apply(colName: String): Column = col(colName)
-
-  /**
-   * Specifies some hint on the current Dataset. As an example, the following code specifies
-   * that one of the plan can be broadcasted:
-   *
-   * {{{
-   *   df1.join(df2.hint("broadcast"))
-   * }}}
-   *
-   * the following code specifies that this dataset could be rebalanced with given number of
-   * partitions:
-   *
-   * {{{
-   *    df1.hint("rebalance", 10)
-   * }}}
-   *
-   * @param name the name of the hint
-   * @param parameters the parameters of the hint, all the parameters should be a `Column` or
-   *                   `Expression` or `Symbol` or could be converted into a `Literal`
-   *
-   * @group basic
-   * @since 2.2.0
-   */
-  @scala.annotation.varargs
-  def hint(name: String, parameters: Any*): Dataset[T] = withTypedPlan {
-    val exprs = parameters.map {
-      case c: Column => c.expr
-      case s: Symbol => Column(s.name).expr
-      case e: Expression => e
-      case literal => Literal(literal)
-    }.toSeq
-    UnresolvedHint(name, exprs, logicalPlan)
-  }
-
-  /**
-   * Selects column based on the column name and returns it as a [[Column]].
-   *
-   * @note The column name can also reference to a nested column like `a.b`.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def col(colName: String): Column = colName match {
-    case "*" =>
-      Column(ResolvedStar(queryExecution.analyzed.output))
-    case _ =>
-      if (sparkSession.sessionState.conf.supportQuotedRegexColumnName) {
-        colRegex(colName)
-      } else {
-        Column(addDataFrameIdToCol(resolve(colName)))
-      }
-  }
-
-  /**
-   * Selects a metadata column based on its logical column name, and returns it as a [[Column]].
-   *
-   * A metadata column can be accessed this way even if the underlying data source defines a data
-   * column with a conflicting name.
-   *
-   * @group untypedrel
-   * @since 3.5.0
-   */
-  def metadataColumn(colName: String): Column =
-    Column(queryExecution.analyzed.getMetadataAttributeByName(colName))
+  /** @inheritdoc */
+  def metadataColumn(colName: String): Column =
+    Column(queryExecution.analyzed.getMetadataAttributeByName(colName))
 
   // Attach the dataset id and column position to the column reference, so that we can detect
   // ambiguous self-join correctly. See the rule `DetectAmbiguousSelfJoin`.
@@ -1499,7 +797,7 @@ class Dataset[T] private[sql](
   private def addDataFrameIdToCol(expr: NamedExpression): NamedExpression = {
     val newExpr = expr transform {
       case a: AttributeReference
-        if sparkSession.conf.get(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
+        if sparkSession.sessionState.conf.getConf(SQLConf.FAIL_AMBIGUOUS_SELF_JOIN_ENABLED) =>
         val metadata = new MetadataBuilder()
           .withMetadata(a.metadata)
           .putLong(Dataset.DATASET_ID_KEY, id)
@@ -1510,11 +808,7 @@ class Dataset[T] private[sql](
     newExpr.asInstanceOf[NamedExpression]
   }
 
-  /**
-   * Selects column based on the column name specified as a regex and returns it as [[Column]].
-   * @group untypedrel
-   * @since 2.3.0
-   */
+  /** @inheritdoc */
   def colRegex(colName: String): Column = {
     val caseSensitive = sparkSession.sessionState.conf.caseSensitiveAnalysis
     colName match {
@@ -1527,49 +821,12 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset with an alias set.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def as(alias: String): Dataset[T] = withTypedPlan {
     SubqueryAlias(alias, logicalPlan)
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset with an alias set.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def as(alias: Symbol): Dataset[T] = as(alias.name)
-
-  /**
-   * Returns a new Dataset with an alias set. Same as `as`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def alias(alias: String): Dataset[T] = as(alias)
-
-  /**
-   * (Scala-specific) Returns a new Dataset with an alias set. Same as `as`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def alias(alias: Symbol): Dataset[T] = as(alias)
-
-  /**
-   * Selects a set of column based expressions.
-   * {{{
-   *   ds.select($"colA", $"colB" + 1)
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def select(cols: Column*): DataFrame = withPlan {
     val untypedCols = cols.map {
@@ -1592,268 +849,57 @@ class Dataset[T] private[sql](
     Project(untypedCols.map(_.named), logicalPlan)
   }
 
-  /**
-   * Selects a set of columns. This is a variant of `select` that can only select
-   * existing columns using column names (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // The following two are equivalent:
-   *   ds.select("colA", "colB")
-   *   ds.select($"colA", $"colB")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def select(col: String, cols: String*): DataFrame = select((col +: cols).map(Column(_)) : _*)
-
-  /**
-   * Selects a set of SQL expressions. This is a variant of `select` that accepts
-   * SQL expressions.
-   *
-   * {{{
-   *   // The following are equivalent:
-   *   ds.selectExpr("colA", "colB as newName", "abs(colC)")
-   *   ds.select(expr("colA"), expr("colB as newName"), expr("abs(colC)"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def selectExpr(exprs: String*): DataFrame = sparkSession.withActive {
-    select(exprs.map { expr =>
-      Column(sparkSession.sessionState.sqlParser.parseExpression(expr))
-    }: _*)
-  }
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expression for each element.
-   *
-   * {{{
-   *   val ds = Seq(1, 2, 3).toDS()
-   *   val newDS = ds.select(expr("value + 1").as[Int])
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def select[U1](c1: TypedColumn[T, U1]): Dataset[U1] = {
-    implicit val encoder: ExpressionEncoder[U1] = c1.encoder
-    val project = Project(c1.withInputType(exprEnc, logicalPlan.output).named :: Nil, logicalPlan)
-
-    if (!encoder.isSerializedAsStructForTopLevel) {
-      new Dataset[U1](sparkSession, project, encoder)
-    } else {
-      // Flattens inner fields of U1
-      new Dataset[Tuple1[U1]](sparkSession, project, ExpressionEncoder.tuple(encoder)).map(_._1)
+    val encoder = agnosticEncoderFor(c1.encoder)
+    val tc1 = withInputType(c1.named, exprEnc, logicalPlan.output)
+    val project = Project(tc1 :: Nil, logicalPlan)
+
+    val plan = encoder match {
+      case se: StructEncoder[U1] =>
+        // Flatten the result.
+        val attribute = GetColumnByOrdinal(0, se.dataType)
+        val projectList = se.fields.zipWithIndex.map {
+          case (field, index) =>
+            Alias(GetStructField(attribute, index, None), field.name)()
+        }
+        Project(projectList, project)
+      case _ => project
     }
+    new Dataset[U1](sparkSession, plan, encoder)
   }
 
-  /**
-   * Internal helper function for building typed selects that return tuples. For simplicity and
-   * code reuse, we do this without the help of the type system and then use helper functions
-   * that cast appropriately for the user facing interface.
-   */
+  /** @inheritdoc */
   protected def selectUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
-    val encoders = columns.map(_.encoder)
-    val namedColumns =
-      columns.map(_.withInputType(exprEnc, logicalPlan.output).named)
-    val execution = new QueryExecution(sparkSession, Project(namedColumns, logicalPlan))
-    new Dataset(execution, ExpressionEncoder.tuple(encoders))
+    val encoders = columns.map(c => agnosticEncoderFor(c.encoder))
+    val namedColumns = columns.map(c => withInputType(c.named, exprEnc, logicalPlan.output))
+    new Dataset(sparkSession, Project(namedColumns, logicalPlan), ProductEncoder.tuple(encoders))
   }
 
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
-    selectUntyped(c1, c2).asInstanceOf[Dataset[(U1, U2)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def select[U1, U2, U3](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] =
-    selectUntyped(c1, c2, c3).asInstanceOf[Dataset[(U1, U2, U3)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def select[U1, U2, U3, U4](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3],
-      c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] =
-    selectUntyped(c1, c2, c3, c4).asInstanceOf[Dataset[(U1, U2, U3, U4)]]
-
-  /**
-   * Returns a new Dataset by computing the given [[Column]] expressions for each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def select[U1, U2, U3, U4, U5](
-      c1: TypedColumn[T, U1],
-      c2: TypedColumn[T, U2],
-      c3: TypedColumn[T, U3],
-      c4: TypedColumn[T, U4],
-      c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] =
-    selectUntyped(c1, c2, c3, c4, c5).asInstanceOf[Dataset[(U1, U2, U3, U4, U5)]]
-
-  /**
-   * Filters rows using the given condition.
-   * {{{
-   *   // The following are equivalent:
-   *   peopleDs.filter($"age" > 15)
-   *   peopleDs.where($"age" > 15)
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def filter(condition: Column): Dataset[T] = withTypedPlan {
     Filter(condition.expr, logicalPlan)
   }
 
-  /**
-   * Filters rows using the given SQL expression.
-   * {{{
-   *   peopleDs.filter("age > 15")
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def filter(conditionExpr: String): Dataset[T] = sparkSession.withActive {
-    filter(Column(sparkSession.sessionState.sqlParser.parseExpression(conditionExpr)))
-  }
-
-  /**
-   * Filters rows using the given condition. This is an alias for `filter`.
-   * {{{
-   *   // The following are equivalent:
-   *   peopleDs.filter($"age" > 15)
-   *   peopleDs.where($"age" > 15)
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def where(condition: Column): Dataset[T] = filter(condition)
-
-  /**
-   * Filters rows using the given SQL expression.
-   * {{{
-   *   peopleDs.where("age > 15")
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def where(conditionExpr: String): Dataset[T] = filter(conditionExpr)
-
-  /**
-   * Groups the Dataset using the specified columns, so we can run aggregation on them. See
-   * [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns grouped by department.
-   *   ds.groupBy($"department").avg()
-   *
-   *   // Compute the max age and average salary, grouped by department and gender.
-   *   ds.groupBy($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def groupBy(cols: Column*): RelationalGroupedDataset = {
     RelationalGroupedDataset(toDF(), cols.map(_.expr), RelationalGroupedDataset.GroupByType)
   }
 
-  /**
-   * Create a multi-dimensional rollup for the current Dataset using the specified columns,
-   * so we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolled up by department and group.
-   *   ds.rollup($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, rolled up by department and gender.
-   *   ds.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def rollup(cols: Column*): RelationalGroupedDataset = {
     RelationalGroupedDataset(toDF(), cols.map(_.expr), RelationalGroupedDataset.RollupType)
   }
 
-  /**
-   * Create a multi-dimensional cube for the current Dataset using the specified columns,
-   * so we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   ds.cube($"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   ds.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def cube(cols: Column*): RelationalGroupedDataset = {
     RelationalGroupedDataset(toDF(), cols.map(_.expr), RelationalGroupedDataset.CubeType)
   }
 
-  /**
-   * Create multi-dimensional aggregation for the current Dataset using the specified grouping sets,
-   * so we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * {{{
-   *   // Compute the average for all numeric columns group by specific grouping sets.
-   *   ds.groupingSets(Seq(Seq($"department", $"group"), Seq()), $"department", $"group").avg()
-   *
-   *   // Compute the max age and average salary, group by specific grouping sets.
-   *   ds.groupingSets(Seq($"department", $"gender"), Seq()), $"department", $"group").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def groupingSets(groupingSets: Seq[Seq[Column]], cols: Column*): RelationalGroupedDataset = {
     RelationalGroupedDataset(
@@ -1862,250 +908,25 @@ class Dataset[T] private[sql](
       RelationalGroupedDataset.GroupingSetsType(groupingSets.map(_.map(_.expr))))
   }
 
-  /**
-   * Groups the Dataset using the specified columns, so that we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * This is a variant of groupBy that can only group by existing columns using column names
-   * (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns grouped by department.
-   *   ds.groupBy("department").avg()
-   *
-   *   // Compute the max age and average salary, grouped by department and gender.
-   *   ds.groupBy($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def groupBy(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    RelationalGroupedDataset(
-      toDF(), colNames.map(colName => resolve(colName)), RelationalGroupedDataset.GroupByType)
-  }
-
-  /**
-   * (Scala-specific)
-   * Reduces the elements of this Dataset using the specified binary function. The given `func`
-   * must be commutative and associative or the result may be non-deterministic.
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def reduce(func: (T, T) => T): T = withNewRDDExecutionId("reduce") {
     rdd.reduce(func)
   }
 
-  /**
-   * (Java-specific)
-   * Reduces the elements of this Dataset using the specified binary function. The given `func`
-   * must be commutative and associative or the result may be non-deterministic.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def reduce(func: ReduceFunction[T]): T = reduce(func.call(_, _))
-
-  /**
-   * (Scala-specific)
-   * Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def groupByKey[K: Encoder](func: T => K): KeyValueGroupedDataset[K, T] = {
     val withGroupingKey = AppendColumns(func, logicalPlan)
     val executed = sparkSession.sessionState.executePlan(withGroupingKey)
 
     new KeyValueGroupedDataset(
-      encoderFor[K],
-      encoderFor[T],
+      implicitly[Encoder[K]],
+      encoder,
       executed,
       logicalPlan.output,
       withGroupingKey.newColumns)
   }
 
-  /**
-   * (Java-specific)
-   * Returns a [[KeyValueGroupedDataset]] where the data is grouped by the given key `func`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def groupByKey[K](func: MapFunction[T, K], encoder: Encoder[K]): KeyValueGroupedDataset[K, T] =
-    groupByKey(func.call(_))(encoder)
-
-  /**
-   * Create a multi-dimensional rollup for the current Dataset using the specified columns,
-   * so we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * This is a variant of rollup that can only group by existing columns using column names
-   * (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns rolled up by department and group.
-   *   ds.rollup("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, rolled up by department and gender.
-   *   ds.rollup($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def rollup(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    RelationalGroupedDataset(
-      toDF(), colNames.map(colName => resolve(colName)), RelationalGroupedDataset.RollupType)
-  }
-
-  /**
-   * Create a multi-dimensional cube for the current Dataset using the specified columns,
-   * so we can run aggregation on them.
-   * See [[RelationalGroupedDataset]] for all the available aggregate functions.
-   *
-   * This is a variant of cube that can only group by existing columns using column names
-   * (i.e. cannot construct expressions).
-   *
-   * {{{
-   *   // Compute the average for all numeric columns cubed by department and group.
-   *   ds.cube("department", "group").avg()
-   *
-   *   // Compute the max age and average salary, cubed by department and gender.
-   *   ds.cube($"department", $"gender").agg(Map(
-   *     "salary" -> "avg",
-   *     "age" -> "max"
-   *   ))
-   * }}}
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def cube(col1: String, cols: String*): RelationalGroupedDataset = {
-    val colNames: Seq[String] = col1 +: cols
-    RelationalGroupedDataset(
-      toDF(), colNames.map(colName => resolve(colName)), RelationalGroupedDataset.CubeType)
-  }
-
-  /**
-   * (Scala-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg("age" -> "max", "salary" -> "avg")
-   *   ds.groupBy().agg("age" -> "max", "salary" -> "avg")
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
-    groupBy().agg(aggExpr, aggExprs : _*)
-  }
-
-  /**
-   * (Scala-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
-   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def agg(exprs: Map[String, String]): DataFrame = groupBy().agg(exprs)
-
-  /**
-   * (Java-specific) Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(Map("age" -> "max", "salary" -> "avg"))
-   *   ds.groupBy().agg(Map("age" -> "max", "salary" -> "avg"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def agg(exprs: java.util.Map[String, String]): DataFrame = groupBy().agg(exprs)
-
-  /**
-   * Aggregates on the entire Dataset without groups.
-   * {{{
-   *   // ds.agg(...) is a shorthand for ds.groupBy().agg(...)
-   *   ds.agg(max($"age"), avg($"salary"))
-   *   ds.groupBy().agg(max($"age"), avg($"salary"))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def agg(expr: Column, exprs: Column*): DataFrame = groupBy().agg(expr, exprs : _*)
-
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
-   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed.
-   *
-   * This function is useful to massage a DataFrame into a format where some
-   * columns are identifier columns ("ids"), while all other columns ("values")
-   * are "unpivoted" to the rows, leaving just two non-id columns, named as given
-   * by `variableColumnName` and `valueColumnName`.
-   *
-   * {{{
-   *   val df = Seq((1, 11, 12L), (2, 21, 22L)).toDF("id", "int", "long")
-   *   df.show()
-   *   // output:
-   *   // +---+---+----+
-   *   // | id|int|long|
-   *   // +---+---+----+
-   *   // |  1| 11|  12|
-   *   // |  2| 21|  22|
-   *   // +---+---+----+
-   *
-   *   df.unpivot(Array($"id"), Array($"int", $"long"), "variable", "value").show()
-   *   // output:
-   *   // +---+--------+-----+
-   *   // | id|variable|value|
-   *   // +---+--------+-----+
-   *   // |  1|     int|   11|
-   *   // |  1|    long|   12|
-   *   // |  2|     int|   21|
-   *   // |  2|    long|   22|
-   *   // +---+--------+-----+
-   *   // schema:
-   *   //root
-   *   // |-- id: integer (nullable = false)
-   *   // |-- variable: string (nullable = false)
-   *   // |-- value: long (nullable = true)
-   * }}}
-   *
-   * When no "id" columns are given, the unpivoted DataFrame consists of only the
-   * "variable" and "value" columns.
-   *
-   * All "value" columns must share a least common data type. Unless they are the same data type,
-   * all "value" columns are cast to the nearest common data type. For instance,
-   * types `IntegerType` and `LongType` are cast to `LongType`, while `IntegerType` and `StringType`
-   * do not have a common data type and `unpivot` fails with an `AnalysisException`.
-   *
-   * @param ids Id columns
-   * @param values Value columns to unpivot
-   * @param variableColumnName Name of the variable column
-   * @param valueColumnName Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unpivot(
       ids: Array[Column],
       values: Array[Column],
@@ -2121,23 +942,7 @@ class Dataset[T] private[sql](
     )
   }
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
-   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed.
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)`
-   * where `values` is set to all non-id columns that exist in the DataFrame.
-   *
-   * @param ids Id columns
-   * @param variableColumnName Name of the variable column
-   * @param valueColumnName Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def unpivot(
       ids: Array[Column],
       variableColumnName: String,
@@ -2173,147 +978,51 @@ class Dataset[T] private[sql](
       valueColumnName: String): DataFrame =
     unpivot(ids.toArray, variableColumnName, valueColumnName)
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
-   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed. This is an alias for `unpivot`.
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * @param ids Id columns
-   * @param values Value columns to unpivot
-   * @param variableColumnName Name of the variable column
-   * @param valueColumnName Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def melt(
-      ids: Array[Column],
-      values: Array[Column],
-      variableColumnName: String,
-      valueColumnName: String): DataFrame =
-    unpivot(ids, values, variableColumnName, valueColumnName)
+  /** @inheritdoc */
+  def transpose(indexColumn: Column): DataFrame = withPlan {
+    UnresolvedTranspose(
+      Seq(indexColumn.named),
+      logicalPlan
+    )
+  }
 
-  /**
-   * Unpivot a DataFrame from wide format to long format, optionally leaving identifier columns set.
-   * This is the reverse to `groupBy(...).pivot(...).agg(...)`, except for the aggregation,
-   * which cannot be reversed. This is an alias for `unpivot`.
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot(Array, Array, String, String)`
-   *
-   * This is equivalent to calling `Dataset#unpivot(Array, Array, String, String)`
-   * where `values` is set to all non-id columns that exist in the DataFrame.
-   *
-   * @param ids Id columns
-   * @param variableColumnName Name of the variable column
-   * @param valueColumnName Name of the value column
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def melt(
-      ids: Array[Column],
-      variableColumnName: String,
-      valueColumnName: String): DataFrame =
-    unpivot(ids, variableColumnName, valueColumnName)
-
- /**
-  * Define (named) metrics to observe on the Dataset. This method returns an 'observed' Dataset
-  * that returns the same result as the input, with the following guarantees:
-  * <ul>
-  *   <li>It will compute the defined aggregates (metrics) on all the data that is flowing through
-  *   the Dataset at that point.</li>
-  *   <li>It will report the value of the defined aggregate columns as soon as we reach a completion
-  *   point. A completion point is either the end of a query (batch mode) or the end of a streaming
-  *   epoch. The value of the aggregates only reflects the data processed since the previous
-  *   completion point.</li>
-  * </ul>
-  * Please note that continuous execution is currently not supported.
-  *
-  * The metrics columns must either contain a literal (e.g. lit(42)), or should contain one or
-  * more aggregate functions (e.g. sum(a) or sum(a + b) + avg(c) - lit(1)). Expressions that
-  * contain references to the input Dataset's columns must always be wrapped in an aggregate
-  * function.
-  *
-  * A user can observe these metrics by either adding
-  * [[org.apache.spark.sql.streaming.StreamingQueryListener]] or a
-  * [[org.apache.spark.sql.util.QueryExecutionListener]] to the spark session.
-  *
-  * {{{
-  *   // Monitor the metrics using a listener.
-  *   spark.streams.addListener(new StreamingQueryListener() {
-  *     override def onQueryStarted(event: QueryStartedEvent): Unit = {}
-  *     override def onQueryProgress(event: QueryProgressEvent): Unit = {
-  *       event.progress.observedMetrics.asScala.get("my_event").foreach { row =>
-  *         // Trigger if the number of errors exceeds 5 percent
-  *         val num_rows = row.getAs[Long]("rc")
-  *         val num_error_rows = row.getAs[Long]("erc")
-  *         val ratio = num_error_rows.toDouble / num_rows
-  *         if (ratio > 0.05) {
-  *           // Trigger alert
-  *         }
-  *       }
-  *     }
-  *     override def onQueryTerminated(event: QueryTerminatedEvent): Unit = {}
-  *   })
-  *   // Observe row count (rc) and error row count (erc) in the streaming Dataset
-  *   val observed_ds = ds.observe("my_event", count(lit(1)).as("rc"), count($"error").as("erc"))
-  *   observed_ds.writeStream.format("...").start()
-  * }}}
-  *
-  * @group typedrel
-  * @since 3.0.0
-  */
-  @varargs
+  /** @inheritdoc */
+  def transpose(): DataFrame = withPlan {
+    UnresolvedTranspose(
+      Seq.empty,
+      logicalPlan
+    )
+  }
+
+  /** @inheritdoc */
+  def scalar(): Column = {
+    Column(ExpressionColumnNode(ScalarSubqueryExpr(logicalPlan)))
+  }
+
+  /** @inheritdoc */
+  def exists(): Column = {
+    Column(ExpressionColumnNode(Exists(logicalPlan)))
+  }
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
   def observe(name: String, expr: Column, exprs: Column*): Dataset[T] = withTypedPlan {
     CollectMetrics(name, (expr +: exprs).map(_.named), logicalPlan, id)
   }
 
-  /**
-   * Observe (named) metrics through an `org.apache.spark.sql.Observation` instance.
-   * This is equivalent to calling `observe(String, Column, Column*)` but does not require
-   * adding `org.apache.spark.sql.util.QueryExecutionListener` to the spark session.
-   * This method does not support streaming datasets.
-   *
-   * A user can retrieve the metrics by accessing `org.apache.spark.sql.Observation.get`.
-   *
-   * {{{
-   *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
-   *   val observation = Observation("my_metrics")
-   *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
-   *   observed_ds.write.parquet("ds.parquet")
-   *   val metrics = observation.get
-   * }}}
-   *
-   * @throws IllegalArgumentException If this is a streaming Dataset (this.isStreaming == true)
-   *
-   * @group typedrel
-   * @since 3.3.0
-   */
-  @varargs
+  /** @inheritdoc */
+  @scala.annotation.varargs
   def observe(observation: Observation, expr: Column, exprs: Column*): Dataset[T] = {
-    observation.on(this, expr, exprs: _*)
+    sparkSession.observationManager.register(observation, this)
+    observe(observation.name, expr, exprs: _*)
   }
 
-  /**
-   * Returns a new Dataset by taking the first `n` rows. The difference between this function
-   * and `head` is that `head` is an action and returns an array (by triggering query execution)
-   * while `limit` returns a new Dataset.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def limit(n: Int): Dataset[T] = withTypedPlan {
     Limit(Literal(n), logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset by skipping the first `n` rows.
-   *
-   * @group typedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def offset(n: Int): Dataset[T] = withTypedPlan {
     Offset(Literal(n), logicalPlan)
   }
@@ -2326,7 +1035,7 @@ class Dataset[T] private[sql](
         Distinct(flattenUnion(u, isUnionDistinct = true))
       // Only handle distinct-like 'Deduplicate', where the keys == output
       case Deduplicate(keys: Seq[Attribute], u: Union) if AttributeSet(keys) == u.outputSet =>
-        Deduplicate(keys, flattenUnion(u, true))
+        Deduplicate(keys, flattenUnion(u, isUnionDistinct = true))
       case u: Union =>
         flattenUnion(u, isUnionDistinct = false)
     }
@@ -2363,276 +1072,50 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
-   * deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * Also as standard in SQL, this function resolves columns by position (not by name):
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
-   *   df1.union(df2).show
-   *
-   *   // output:
-   *   // +----+----+----+
-   *   // |col0|col1|col2|
-   *   // +----+----+----+
-   *   // |   1|   2|   3|
-   *   // |   4|   5|   6|
-   *   // +----+----+----+
-   * }}}
-   *
-   * Notice that the column positions in the schema aren't necessarily matched with the
-   * fields in the strongly typed objects in a Dataset. This function resolves columns
-   * by their positions in the schema, not the fields in the strongly typed objects. Use
-   * [[unionByName]] to resolve columns by field name in the typed objects.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def union(other: Dataset[T]): Dataset[T] = withSetOperator {
     combineUnions(Union(logicalPlan, other.logicalPlan))
   }
 
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   * This is an alias for `union`.
-   *
-   * This is equivalent to `UNION ALL` in SQL. To do a SQL-style set union (that does
-   * deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * Also as standard in SQL, this function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def unionAll(other: Dataset[T]): Dataset[T] = union(other)
-
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * This is different from both `UNION ALL` and `UNION DISTINCT` in SQL. To do a SQL-style set
-   * union (that does deduplication of elements), use this function followed by a [[distinct]].
-   *
-   * The difference between this function and [[union]] is that this function
-   * resolves columns by name (not by position):
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col2", "col0")
-   *   df1.unionByName(df2).show
-   *
-   *   // output:
-   *   // +----+----+----+
-   *   // |col0|col1|col2|
-   *   // +----+----+----+
-   *   // |   1|   2|   3|
-   *   // |   6|   4|   5|
-   *   // +----+----+----+
-   * }}}
-   *
-   * Note that this supports nested columns in struct and array types. Nested columns in map types
-   * are not currently supported.
-   *
-   * @group typedrel
-   * @since 2.3.0
-   */
-  def unionByName(other: Dataset[T]): Dataset[T] = unionByName(other, false)
-
-  /**
-   * Returns a new Dataset containing union of rows in this Dataset and another Dataset.
-   *
-   * The difference between this function and [[union]] is that this function
-   * resolves columns by name (not by position).
-   *
-   * When the parameter `allowMissingColumns` is `true`, the set of column names
-   * in this and other `Dataset` can differ; missing columns will be filled with null.
-   * Further, the missing columns of this `Dataset` will be added at the end
-   * in the schema of the union result:
-   *
-   * {{{
-   *   val df1 = Seq((1, 2, 3)).toDF("col0", "col1", "col2")
-   *   val df2 = Seq((4, 5, 6)).toDF("col1", "col0", "col3")
-   *   df1.unionByName(df2, true).show
-   *
-   *   // output: "col3" is missing at left df1 and added at the end of schema.
-   *   // +----+----+----+----+
-   *   // |col0|col1|col2|col3|
-   *   // +----+----+----+----+
-   *   // |   1|   2|   3|NULL|
-   *   // |   5|   4|NULL|   6|
-   *   // +----+----+----+----+
-   *
-   *   df2.unionByName(df1, true).show
-   *
-   *   // output: "col2" is missing at left df2 and added at the end of schema.
-   *   // +----+----+----+----+
-   *   // |col1|col0|col3|col2|
-   *   // +----+----+----+----+
-   *   // |   4|   5|   6|NULL|
-   *   // |   2|   1|NULL|   3|
-   *   // +----+----+----+----+
-   * }}}
-   *
-   * Note that this supports nested columns in struct and array types. With `allowMissingColumns`,
-   * missing nested columns of struct columns with the same name will also be filled with null
-   * values and added to the end of struct. Nested columns in map types are not currently
-   * supported.
-   *
-   * @group typedrel
-   * @since 3.1.0
-   */
-  def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T] = withSetOperator {
-    // We need to resolve the by-name Union first, as the underlying Unions are already resolved
-    // and we can only combine adjacent Unions if they are all resolved.
-    val resolvedUnion = sparkSession.sessionState.executePlan(
-      Union(logicalPlan :: other.logicalPlan :: Nil, true, allowMissingColumns))
-    combineUnions(resolvedUnion.analyzed)
+  /** @inheritdoc */
+  def unionByName(other: Dataset[T], allowMissingColumns: Boolean): Dataset[T] = {
+    withSetOperator {
+      // We need to resolve the by-name Union first, as the underlying Unions are already resolved
+      // and we can only combine adjacent Unions if they are all resolved.
+      val resolvedUnion = sparkSession.sessionState.executePlan(
+        Union(logicalPlan :: other.logicalPlan :: Nil, byName = true, allowMissingColumns))
+      combineUnions(resolvedUnion.analyzed)
+    }
   }
 
-  /**
-   * Returns a new Dataset containing rows only in both this Dataset and another Dataset.
-   * This is equivalent to `INTERSECT` in SQL.
-   *
-   * @note Equality checking is performed directly on the encoded representation of the data
-   * and thus is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def intersect(other: Dataset[T]): Dataset[T] = withSetOperator {
     Intersect(logicalPlan, other.logicalPlan, isAll = false)
   }
 
-  /**
-   * Returns a new Dataset containing rows only in both this Dataset and another Dataset while
-   * preserving the duplicates.
-   * This is equivalent to `INTERSECT ALL` in SQL.
-   *
-   * @note Equality checking is performed directly on the encoded representation of the data
-   * and thus is not affected by a custom `equals` function defined on `T`. Also as standard
-   * in SQL, this function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 2.4.0
-   */
+  /** @inheritdoc */
   def intersectAll(other: Dataset[T]): Dataset[T] = withSetOperator {
     Intersect(logicalPlan, other.logicalPlan, isAll = true)
   }
 
-
-  /**
-   * Returns a new Dataset containing rows in this Dataset but not in another Dataset.
-   * This is equivalent to `EXCEPT DISTINCT` in SQL.
-   *
-   * @note Equality checking is performed directly on the encoded representation of the data
-   * and thus is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def except(other: Dataset[T]): Dataset[T] = withSetOperator {
     Except(logicalPlan, other.logicalPlan, isAll = false)
   }
 
-  /**
-   * Returns a new Dataset containing rows in this Dataset but not in another Dataset while
-   * preserving the duplicates.
-   * This is equivalent to `EXCEPT ALL` in SQL.
-   *
-   * @note Equality checking is performed directly on the encoded representation of the data
-   * and thus is not affected by a custom `equals` function defined on `T`. Also as standard in
-   * SQL, this function resolves columns by position (not by name).
-   *
-   * @group typedrel
-   * @since 2.4.0
-   */
+  /** @inheritdoc */
   def exceptAll(other: Dataset[T]): Dataset[T] = withSetOperator {
     Except(logicalPlan, other.logicalPlan, isAll = true)
   }
 
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement),
-   * using a user-supplied seed.
-   *
-   * @param fraction Fraction of rows to generate, range [0.0, 1.0].
-   * @param seed Seed for sampling.
-   *
-   * @note This is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[Dataset]].
-   *
-   * @group typedrel
-   * @since 2.3.0
-   */
-  def sample(fraction: Double, seed: Long): Dataset[T] = {
-    sample(withReplacement = false, fraction = fraction, seed = seed)
-  }
-
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows (without replacement),
-   * using a random seed.
-   *
-   * @param fraction Fraction of rows to generate, range [0.0, 1.0].
-   *
-   * @note This is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[Dataset]].
-   *
-   * @group typedrel
-   * @since 2.3.0
-   */
-  def sample(fraction: Double): Dataset[T] = {
-    sample(withReplacement = false, fraction = fraction)
-  }
-
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows, using a user-supplied seed.
-   *
-   * @param withReplacement Sample with replacement or not.
-   * @param fraction Fraction of rows to generate, range [0.0, 1.0].
-   * @param seed Seed for sampling.
-   *
-   * @note This is NOT guaranteed to provide exactly the fraction of the count
-   * of the given [[Dataset]].
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def sample(withReplacement: Boolean, fraction: Double, seed: Long): Dataset[T] = {
     withTypedPlan {
       Sample(0.0, fraction, withReplacement, seed, logicalPlan)
     }
   }
 
-  /**
-   * Returns a new [[Dataset]] by sampling a fraction of rows, using a random seed.
-   *
-   * @param withReplacement Sample with replacement or not.
-   * @param fraction Fraction of rows to generate, range [0.0, 1.0].
-   *
-   * @note This is NOT guaranteed to provide exactly the fraction of the total count
-   * of the given [[Dataset]].
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def sample(withReplacement: Boolean, fraction: Double): Dataset[T] = {
-    sample(withReplacement, fraction, Utils.random.nextLong)
-  }
-
-  /**
-   * Randomly splits this Dataset with the provided weights.
-   *
-   * @param weights weights for splits, will be normalized if they don't sum to 1.
-   * @param seed Seed for sampling.
-   *
-   * For Java API, use [[randomSplitAsList]].
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def randomSplit(weights: Array[Double], seed: Long): Array[Dataset[T]] = {
     require(weights.forall(_ >= 0),
       s"Weights must be nonnegative, but got ${weights.mkString("[", ",", "]")}")
@@ -2659,33 +1142,12 @@ class Dataset[T] private[sql](
     normalizedCumWeights.sliding(2).map { x =>
       new Dataset[T](
         sparkSession, Sample(x(0), x(1), withReplacement = false, seed, plan), encoder)
-    }.toArray
-  }
-
-  /**
-   * Returns a Java list that contains randomly split Dataset with the provided weights.
-   *
-   * @param weights weights for splits, will be normalized if they don't sum to 1.
-   * @param seed Seed for sampling.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def randomSplitAsList(weights: Array[Double], seed: Long): java.util.List[Dataset[T]] = {
-    val values = randomSplit(weights, seed)
-    java.util.Arrays.asList(values : _*)
+    }.toArray
   }
 
-  /**
-   * Randomly splits this Dataset with the provided weights.
-   *
-   * @param weights weights for splits, will be normalized if they don't sum to 1.
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def randomSplit(weights: Array[Double]): Array[Dataset[T]] = {
-    randomSplit(weights, Utils.random.nextLong)
-  }
+  /** @inheritdoc */
+  override def randomSplit(weights: Array[Double]): Array[Dataset[T]] =
+    randomSplit(weights, Utils.random.nextLong())
 
   /**
    * Randomly splits this Dataset with the provided weights. Provided for the Python Api.
@@ -2697,33 +1159,11 @@ class Dataset[T] private[sql](
     randomSplit(weights.toArray, seed)
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset where each row has been expanded to zero or more
-   * rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. The columns of
-   * the input row are implicitly joined with each row that is output by the function.
-   *
-   * Given that this is deprecated, as an alternative, you can explode columns either using
-   * `functions.explode()` or `flatMap()`. The following example uses these alternatives to count
-   * the number of books that contain a given word:
-   *
-   * {{{
-   *   case class Book(title: String, words: String)
-   *   val ds: Dataset[Book]
-   *
-   *   val allWords = ds.select($"title", explode(split($"words", " ")).as("word"))
-   *
-   *   val bookCountPerWord = allWords.groupBy("word").agg(count_distinct("title"))
-   * }}}
-   *
-   * Using `flatMap()` this can similarly be exploded as:
-   *
-   * {{{
-   *   ds.flatMap(_.words.split(" "))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
+  override def randomSplitAsList(weights: Array[Double], seed: Long): util.List[Dataset[T]] =
+    util.Arrays.asList(randomSplit(weights, seed): _*)
+
+  /** @inheritdoc */
   @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
   def explode[A <: Product : TypeTag](input: Column*)(f: Row => IterableOnce[A]): DataFrame = {
     val elementSchema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
@@ -2740,27 +1180,7 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * (Scala-specific) Returns a new Dataset where a single column has been expanded to zero
-   * or more rows by the provided function. This is similar to a `LATERAL VIEW` in HiveQL. All
-   * columns of the input row are implicitly joined with each value that is output by the function.
-   *
-   * Given that this is deprecated, as an alternative, you can explode columns either using
-   * `functions.explode()`:
-   *
-   * {{{
-   *   ds.select(explode(split($"words", " ")).as("word"))
-   * }}}
-   *
-   * or `flatMap()`:
-   *
-   * {{{
-   *   ds.flatMap(_.words.split(" "))
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @deprecated("use flatMap() or select() with functions.explode() instead", "2.0.0")
   def explode[A, B : TypeTag](inputColumn: String, outputColumn: String)(f: A => IterableOnce[B])
     : DataFrame = {
@@ -2781,57 +1201,8 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset by adding a column or replacing the existing column that has
-   * the same name.
-   *
-   * `column`'s expression must only refer to attributes supplied by this Dataset. It is an
-   * error to add a column that refers to some other Dataset.
-   *
-   * @note this method introduces a projection internally. Therefore, calling it multiple times,
-   * for instance, via loops in order to add multiple columns can generate big plans which
-   * can cause performance issues and even `StackOverflowException`. To avoid this,
-   * use `select` with the multiple columns at once.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def withColumn(colName: String, col: Column): DataFrame = withColumns(Seq(colName), Seq(col))
-
-  /**
-   * (Scala-specific) Returns a new Dataset by adding columns or replacing the existing columns
-   * that has the same names.
-   *
-   * `colsMap` is a map of column name and column, the column must only refer to attributes
-   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
-   *
-   * @group untypedrel
-   * @since 3.3.0
-   */
-  def withColumns(colsMap: Map[String, Column]): DataFrame = {
-    val (colNames, newCols) = colsMap.toSeq.unzip
-    withColumns(colNames, newCols)
-  }
-
-  /**
-   * (Java-specific) Returns a new Dataset by adding columns or replacing the existing columns
-   * that has the same names.
-   *
-   * `colsMap` is a map of column name and column, the column must only refer to attribute
-   * supplied by this Dataset. It is an error to add columns that refers to some other Dataset.
-   *
-   * @group untypedrel
-   * @since 3.3.0
-   */
-  def withColumns(colsMap: java.util.Map[String, Column]): DataFrame = withColumns(
-    colsMap.asScala.toMap
-  )
-
-  /**
-   * Returns a new Dataset by adding columns or replacing the existing columns that has
-   * the same names.
-   */
-  private[spark] def withColumns(colNames: Seq[String], cols: Seq[Column]): DataFrame = {
+  /** @inheritdoc */
+  protected[spark] def withColumns(colNames: Seq[String], cols: Seq[Column]): DataFrame = {
     require(colNames.size == cols.size,
       s"The size of column names: ${colNames.size} isn't equal to " +
         s"the size of columns: ${cols.size}")
@@ -2860,9 +1231,7 @@ class Dataset[T] private[sql](
     select(replacedAndExistingColumns ++ newColumns : _*)
   }
 
-  /**
-   * Returns a new Dataset by adding columns with metadata.
-   */
+  /** @inheritdoc */
   private[spark] def withColumns(
       colNames: Seq[String],
       cols: Seq[Column],
@@ -2876,43 +1245,13 @@ class Dataset[T] private[sql](
     withColumns(colNames, newCols)
   }
 
-  /**
-   * Returns a new Dataset by adding a column with metadata.
-   */
+  /** @inheritdoc */
   private[spark] def withColumn(colName: String, col: Column, metadata: Metadata): DataFrame =
     withColumns(Seq(colName), Seq(col), Seq(metadata))
 
-  /**
-   * Returns a new Dataset with a column renamed.
-   * This is a no-op if schema doesn't contain existingName.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def withColumnRenamed(existingName: String, newName: String): DataFrame =
-    withColumnsRenamed(Seq(existingName), Seq(newName))
-
-  /**
-   * (Scala-specific)
-   * Returns a new Dataset with a columns renamed.
-   * This is a no-op if schema doesn't contain existingName.
-   *
-   * `colsMap` is a map of existing column name and new column name.
-   *
-   * @throws AnalysisException if there are duplicate names in resulting projection
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  @throws[AnalysisException]
-  def withColumnsRenamed(colsMap: Map[String, String]): DataFrame = {
-    val (colNames, newColNames) = colsMap.toSeq.unzip
-    withColumnsRenamed(colNames, newColNames)
-  }
-
-  private[spark] def withColumnsRenamed(
-    colNames: Seq[String],
-    newColNames: Seq[String]): DataFrame = {
+  protected[spark] def withColumnsRenamed(
+      colNames: Seq[String],
+      newColNames: Seq[String]): DataFrame = {
     require(colNames.size == newColNames.size,
       s"The size of existing column names: ${colNames.size} isn't equal to " +
         s"the size of new column names: ${newColNames.size}")
@@ -2939,114 +1278,12 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * (Java-specific)
-   * Returns a new Dataset with a columns renamed.
-   * This is a no-op if schema doesn't contain existingName.
-   *
-   * `colsMap` is a map of existing column name and new column name.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
-  def withColumnsRenamed(colsMap: java.util.Map[String, String]): DataFrame =
-    withColumnsRenamed(colsMap.asScala.toMap)
-
-  /**
-   * Returns a new Dataset by updating an existing column with metadata.
-   *
-   * @group untypedrel
-   * @since 3.3.0
-   */
+  /** @inheritdoc */
   def withMetadata(columnName: String, metadata: Metadata): DataFrame = {
     withColumn(columnName, col(columnName), metadata)
   }
 
-  /**
-   * Returns a new Dataset with a column dropped. This is a no-op if schema doesn't contain
-   * column name.
-   *
-   * This method can only be used to drop top level columns. the colName string is treated
-   * literally without further interpretation.
-   *
-   * Note: `drop(colName)` has different semantic with `drop(col(colName))`, for example:
-   * 1, multi column have the same colName:
-   * {{{
-   *   val df1 = spark.range(0, 2).withColumn("key1", lit(1))
-   *   val df2 = spark.range(0, 2).withColumn("key2", lit(2))
-   *   val df3 = df1.join(df2)
-   *
-   *   df3.show
-   *   // +---+----+---+----+
-   *   // | id|key1| id|key2|
-   *   // +---+----+---+----+
-   *   // |  0|   1|  0|   2|
-   *   // |  0|   1|  1|   2|
-   *   // |  1|   1|  0|   2|
-   *   // |  1|   1|  1|   2|
-   *   // +---+----+---+----+
-   *
-   *   df3.drop("id").show()
-   *   // output: the two 'id' columns are both dropped.
-   *   // |key1|key2|
-   *   // +----+----+
-   *   // |   1|   2|
-   *   // |   1|   2|
-   *   // |   1|   2|
-   *   // |   1|   2|
-   *   // +----+----+
-   *
-   *   df3.drop(col("id")).show()
-   *   // ...AnalysisException: [AMBIGUOUS_REFERENCE] Reference `id` is ambiguous...
-   * }}}
-   *
-   * 2, colName contains special characters, like dot.
-   * {{{
-   *   val df = spark.range(0, 2).withColumn("a.b.c", lit(1))
-   *
-   *   df.show()
-   *   // +---+-----+
-   *   // | id|a.b.c|
-   *   // +---+-----+
-   *   // |  0|    1|
-   *   // |  1|    1|
-   *   // +---+-----+
-   *
-   *   df.drop("a.b.c").show()
-   *   // +---+
-   *   // | id|
-   *   // +---+
-   *   // |  0|
-   *   // |  1|
-   *   // +---+
-   *
-   *   df.drop(col("a.b.c")).show()
-   *   // no column match the expression 'a.b.c'
-   *   // +---+-----+
-   *   // | id|a.b.c|
-   *   // +---+-----+
-   *   // |  0|    1|
-   *   // |  1|    1|
-   *   // +---+-----+
-   * }}}
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def drop(colName: String): DataFrame = {
-    drop(Seq(colName) : _*)
-  }
-
-  /**
-   * Returns a new Dataset with columns dropped.
-   * This is a no-op if schema doesn't contain column name(s).
-   *
-   * This method can only be used to drop top level columns. the colName string is treated literally
-   * without further interpretation.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def drop(colNames: String*): DataFrame = {
     val resolver = sparkSession.sessionState.analyzer.resolver
@@ -3061,198 +1298,33 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset with column dropped.
-   *
-   * This method can only be used to drop top level column.
-   * This version of drop accepts a [[Column]] rather than a name.
-   * This is a no-op if the Dataset doesn't have a column
-   * with an equivalent expression.
-   *
-   * Note: `drop(col(colName))` has different semantic with `drop(colName)`,
-   * please refer to `Dataset#drop(colName: String)`.
-   *
-   * @group untypedrel
-   * @since 2.0.0
-   */
-  def drop(col: Column): DataFrame = {
-    drop(col, Seq.empty : _*)
-  }
-
-  /**
-   * Returns a new Dataset with columns dropped.
-   *
-   * This method can only be used to drop top level columns.
-   * This is a no-op if the Dataset doesn't have a columns
-   * with an equivalent expression.
-   *
-   * @group untypedrel
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
   def drop(col: Column, cols: Column*): DataFrame = withPlan {
     DataFrameDropColumns((col +: cols).map(_.expr), logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset that contains only the unique rows from this Dataset.
-   * This is an alias for `distinct`.
-   *
-   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
-   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
-   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly limit
-   * the state. In addition, too late data older than watermark will be dropped to avoid any
-   * possibility of duplicates.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def dropDuplicates(): Dataset[T] = dropDuplicates(this.columns)
 
-  /**
-   * (Scala-specific) Returns a new Dataset with duplicate rows removed, considering only
-   * the subset of columns.
-   *
-   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
-   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
-   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly limit
-   * the state. In addition, too late data older than watermark will be dropped to avoid any
-   * possibility of duplicates.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def dropDuplicates(colNames: Seq[String]): Dataset[T] = withTypedPlan {
     val groupCols = groupColsFromDropDuplicates(colNames)
     Deduplicate(groupCols, logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset with duplicate rows removed, considering only
-   * the subset of columns.
-   *
-   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
-   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
-   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly limit
-   * the state. In addition, too late data older than watermark will be dropped to avoid any
-   * possibility of duplicates.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def dropDuplicates(colNames: Array[String]): Dataset[T] =
-    dropDuplicates(colNames.toImmutableArraySeq)
-
-  /**
-   * Returns a new [[Dataset]] with duplicate rows removed, considering only
-   * the subset of columns.
-   *
-   * For a static batch [[Dataset]], it just drops duplicate rows. For a streaming [[Dataset]], it
-   * will keep all data across triggers as intermediate state to drop duplicates rows. You can use
-   * [[withWatermark]] to limit how late the duplicate data can be and system will accordingly limit
-   * the state. In addition, too late data older than watermark will be dropped to avoid any
-   * possibility of duplicates.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def dropDuplicates(col1: String, cols: String*): Dataset[T] = {
-    val colNames: Seq[String] = col1 +: cols
-    dropDuplicates(colNames)
-  }
-
-  /**
-   * Returns a new Dataset with duplicates rows removed, within watermark.
-   *
-   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
-   * set via [[withWatermark]].
-   *
-   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state
-   * to drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
-   * deduplicated as long as the time distance of earliest and latest events are smaller than the
-   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
-   * longer than max timestamp differences among duplicated events.
-   *
-   * Note: too late data older than watermark will be dropped.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def dropDuplicatesWithinWatermark(): Dataset[T] = {
     dropDuplicatesWithinWatermark(this.columns)
   }
 
-  /**
-   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
-   * within watermark.
-   *
-   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
-   * set via [[withWatermark]].
-   *
-   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state
-   * to drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
-   * deduplicated as long as the time distance of earliest and latest events are smaller than the
-   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
-   * longer than max timestamp differences among duplicated events.
-   *
-   * Note: too late data older than watermark will be dropped.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
+  /** @inheritdoc */
   def dropDuplicatesWithinWatermark(colNames: Seq[String]): Dataset[T] = withTypedPlan {
     val groupCols = groupColsFromDropDuplicates(colNames)
     // UnsupportedOperationChecker will fail the query if this is called with batch Dataset.
     DeduplicateWithinWatermark(groupCols, logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
-   * within watermark.
-   *
-   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
-   * set via [[withWatermark]].
-   *
-   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state
-   * to drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
-   * deduplicated as long as the time distance of earliest and latest events are smaller than the
-   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
-   * longer than max timestamp differences among duplicated events.
-   *
-   * Note: too late data older than watermark will be dropped.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T] = {
-    dropDuplicatesWithinWatermark(colNames.toImmutableArraySeq)
-  }
-
-  /**
-   * Returns a new Dataset with duplicates rows removed, considering only the subset of columns,
-   * within watermark.
-   *
-   * This only works with streaming [[Dataset]], and watermark for the input [[Dataset]] must be
-   * set via [[withWatermark]].
-   *
-   * For a streaming [[Dataset]], this will keep all data across triggers as intermediate state
-   * to drop duplicated rows. The state will be kept to guarantee the semantic, "Events are
-   * deduplicated as long as the time distance of earliest and latest events are smaller than the
-   * delay threshold of watermark." Users are encouraged to set the delay threshold of watermark
-   * longer than max timestamp differences among duplicated events.
-   *
-   * Note: too late data older than watermark will be dropped.
-   *
-   * @group typedrel
-   * @since 3.5.0
-   */
-  @scala.annotation.varargs
-  def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T] = {
-    val colNames: Seq[String] = col1 +: cols
-    dropDuplicatesWithinWatermark(colNames)
-  }
-
   private def groupColsFromDropDuplicates(colNames: Seq[String]): Seq[Attribute] = {
     val resolver = sparkSession.sessionState.analyzer.resolver
     val allColumns = queryExecution.analyzed.output
@@ -3270,211 +1342,42 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Computes basic statistics for numeric and string columns, including count, mean, stddev, min,
-   * and max. If no columns are given, this function computes statistics for all numerical or
-   * string columns.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting Dataset. If you want to
-   * programmatically compute summary statistics, use the `agg` function instead.
-   *
-   * {{{
-   *   ds.describe("age", "height").show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // mean    53.3  178.05
-   *   // stddev  11.6  15.7
-   *   // min     18.0  163.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * Use [[summary]] for expanded statistics and control over which statistics to compute.
-   *
-   * @param cols Columns to compute statistics on.
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  def summary(statistics: String*): DataFrame = StatFunctions.summary(this, statistics)
+
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def describe(cols: String*): DataFrame = {
+  override def describe(cols: String*): DataFrame = {
     val selected = if (cols.isEmpty) this else select(cols.head, cols.tail: _*)
     selected.summary("count", "mean", "stddev", "min", "max")
   }
 
-  /**
-   * Computes specified statistics for numeric and string columns. Available statistics are:
-   * <ul>
-   *   <li>count</li>
-   *   <li>mean</li>
-   *   <li>stddev</li>
-   *   <li>min</li>
-   *   <li>max</li>
-   *   <li>arbitrary approximate percentiles specified as a percentage (e.g. 75%)</li>
-   *   <li>count_distinct</li>
-   *   <li>approx_count_distinct</li>
-   * </ul>
-   *
-   * If no statistics are given, this function computes count, mean, stddev, min,
-   * approximate quartiles (percentiles at 25%, 50%, and 75%), and max.
-   *
-   * This function is meant for exploratory data analysis, as we make no guarantee about the
-   * backward compatibility of the schema of the resulting Dataset. If you want to
-   * programmatically compute summary statistics, use the `agg` function instead.
-   *
-   * {{{
-   *   ds.summary().show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // mean    53.3  178.05
-   *   // stddev  11.6  15.7
-   *   // min     18.0  163.0
-   *   // 25%     24.0  176.0
-   *   // 50%     24.0  176.0
-   *   // 75%     32.0  180.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * {{{
-   *   ds.summary("count", "min", "25%", "75%", "max").show()
-   *
-   *   // output:
-   *   // summary age   height
-   *   // count   10.0  10.0
-   *   // min     18.0  163.0
-   *   // 25%     24.0  176.0
-   *   // 75%     32.0  180.0
-   *   // max     92.0  192.0
-   * }}}
-   *
-   * To do a summary for specific columns first select them:
-   *
-   * {{{
-   *   ds.select("age", "height").summary().show()
-   * }}}
-   *
-   * Specify statistics to output custom summaries:
-   *
-   * {{{
-   *   ds.summary("count", "count_distinct").show()
-   * }}}
-   *
-   * The distinct count isn't included by default.
-   *
-   * You can also run approximate distinct counts which are faster:
-   *
-   * {{{
-   *   ds.summary("count", "approx_count_distinct").show()
-   * }}}
-   *
-   * See also [[describe]] for basic statistics.
-   *
-   * @param statistics Statistics from above list to be computed.
-   *
-   * @group action
-   * @since 2.3.0
-   */
-  @scala.annotation.varargs
-  def summary(statistics: String*): DataFrame = StatFunctions.summary(this, statistics.toSeq)
-
-  /**
-   * Returns the first `n` rows.
-   *
-   * @note this method should only be used if the resulting array is expected to be small, as
-   * all the data is loaded into the driver's memory.
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def head(n: Int): Array[T] = withAction("head", limit(n).queryExecution)(collectFromPlan)
 
-  /**
-   * Returns the first row.
-   * @group action
-   * @since 1.6.0
-   */
-  def head(): T = head(1).head
-
-  /**
-   * Returns the first row. Alias for head().
-   * @group action
-   * @since 1.6.0
-   */
-  def first(): T = head()
-
-  /**
-   * Concise syntax for chaining custom transformations.
-   * {{{
-   *   def featurize(ds: Dataset[T]): Dataset[U] = ...
-   *
-   *   ds
-   *     .transform(featurize)
-   *     .transform(...)
-   * }}}
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def transform[U](t: Dataset[T] => Dataset[U]): Dataset[U] = t(this)
-
-  /**
-   * (Scala-specific)
-   * Returns a new Dataset that only contains elements where `func` returns `true`.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def filter(func: T => Boolean): Dataset[T] = {
     withTypedPlan(TypedFilter(func, logicalPlan))
   }
 
-  /**
-   * (Java-specific)
-   * Returns a new Dataset that only contains elements where `func` returns `true`.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def filter(func: FilterFunction[T]): Dataset[T] = {
     withTypedPlan(TypedFilter(func, logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Returns a new Dataset that contains the result of applying `func` to each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def map[U : Encoder](func: T => U): Dataset[U] = {
-    withTypedPlan {
-      MapElements[T, U](func, logicalPlan)
-    }
+    withTypedPlan(MapElements[T, U](func, logicalPlan))
   }
 
-  /**
-   * (Java-specific)
-   * Returns a new Dataset that contains the result of applying `func` to each element.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def map[U](func: MapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
     implicit val uEnc: Encoder[U] = encoder
     withTypedPlan(MapElements[T, U](func, logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Returns a new Dataset that contains the result of applying `func` to each partition.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def mapPartitions[U : Encoder](func: Iterator[T] => Iterator[U]): Dataset[U] = {
     new Dataset[U](
       sparkSession,
@@ -3482,18 +1385,6 @@ class Dataset[T] private[sql](
       implicitly[Encoder[U]])
   }
 
-  /**
-   * (Java-specific)
-   * Returns a new Dataset that contains the result of applying `f` to each partition.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
-    val func: (Iterator[T]) => Iterator[U] = x => f.call(x.asJava).asScala
-    mapPartitions(func)(encoder)
-  }
-
   /**
    * Returns a new `DataFrame` that contains the result of applying a serialized R function
    * `func` to each partition.
@@ -3503,7 +1394,11 @@ class Dataset[T] private[sql](
       packageNames: Array[Byte],
       broadcastVars: Array[Broadcast[Object]],
       schema: StructType): DataFrame = {
-    val rowEncoder = encoder.asInstanceOf[ExpressionEncoder[Row]]
+    val rowEncoder: ExpressionEncoder[Row] = if (isUnTyped) {
+      exprEnc.asInstanceOf[ExpressionEncoder[Row]]
+    } else {
+      ExpressionEncoder(schema)
+    }
     Dataset.ofRows(
       sparkSession,
       MapPartitionsInR(func, packageNames, broadcastVars, schema, rowEncoder, logicalPlan))
@@ -3518,9 +1413,10 @@ class Dataset[T] private[sql](
    * workers.
    */
   private[sql] def mapInPandas(
-      func: PythonUDF,
+      funcCol: Column,
       isBarrier: Boolean = false,
       profile: ResourceProfile = null): DataFrame = {
+    val func = funcCol.expr
     Dataset.ofRows(
       sparkSession,
       MapInPandas(
@@ -3537,9 +1433,10 @@ class Dataset[T] private[sql](
    * Each partition is each iterator consisting of `pyarrow.RecordBatch`s as batches.
    */
   private[sql] def mapInArrow(
-      func: PythonUDF,
+      funcCol: Column,
       isBarrier: Boolean = false,
       profile: ResourceProfile = null): DataFrame = {
+    val func = funcCol.expr
     Dataset.ofRows(
       sparkSession,
       MapInArrow(
@@ -3550,143 +1447,25 @@ class Dataset[T] private[sql](
         Option(profile)))
   }
 
-  /**
-   * (Scala-specific)
-   * Returns a new Dataset by first applying a function to all elements of this Dataset,
-   * and then flattening the results.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def flatMap[U : Encoder](func: T => IterableOnce[U]): Dataset[U] =
-    mapPartitions(_.flatMap(func))
-
-  /**
-   * (Java-specific)
-   * Returns a new Dataset by first applying a function to all elements of this Dataset,
-   * and then flattening the results.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
-  def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] = {
-    val func: (T) => Iterator[U] = x => f.call(x).asScala
-    flatMap(func)(encoder)
-  }
-
-  /**
-   * Applies a function `f` to all rows.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def foreach(f: T => Unit): Unit = withNewRDDExecutionId("foreach") {
-    rdd.foreach(f)
-  }
-
-  /**
-   * (Java-specific)
-   * Runs `func` on each element of this Dataset.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def foreach(func: ForeachFunction[T]): Unit = foreach(func.call(_))
-
-  /**
-   * Applies a function `f` to each partition of this Dataset.
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def foreachPartition(f: Iterator[T] => Unit): Unit = withNewRDDExecutionId("foreachPartition") {
     rdd.foreachPartition(f)
   }
 
-  /**
-   * (Java-specific)
-   * Runs `func` on each partition of this Dataset.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def foreachPartition(func: ForeachPartitionFunction[T]): Unit = {
-    foreachPartition((it: Iterator[T]) => func.call(it.asJava))
-  }
-
-  /**
-   * Returns the first `n` rows in the Dataset.
-   *
-   * Running take requires moving data into the application's driver process, and doing so with
-   * a very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def take(n: Int): Array[T] = head(n)
-
-  /**
-   * Returns the last `n` rows in the Dataset.
-   *
-   * Running tail requires moving data into the application's driver process, and doing so with
-   * a very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def tail(n: Int): Array[T] = withAction(
-    "tail", withTypedPlan(Tail(Literal(n), logicalPlan)).queryExecution)(collectFromPlan)
-
-  /**
-   * Returns the first `n` rows in the Dataset as a list.
-   *
-   * Running take requires moving data into the application's driver process, and doing so with
-   * a very large `n` can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 1.6.0
-   */
-  def takeAsList(n: Int): java.util.List[T] = java.util.Arrays.asList(take(n) : _*)
+    "tail", withTypedPlan(Tail(Literal(n), logicalPlan)).queryExecution)(collectFromPlan)
 
-  /**
-   * Returns an array that contains all rows in this Dataset.
-   *
-   * Running collect requires moving all the data into the application's driver process, and
-   * doing so on a very large dataset can crash the driver process with OutOfMemoryError.
-   *
-   * For Java API, use [[collectAsList]].
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def collect(): Array[T] = withAction("collect", queryExecution)(collectFromPlan)
 
-  /**
-   * Returns a Java list that contains all rows in this Dataset.
-   *
-   * Running collect requires moving all the data into the application's driver process, and
-   * doing so on a very large dataset can crash the driver process with OutOfMemoryError.
-   *
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def collectAsList(): java.util.List[T] = withAction("collectAsList", queryExecution) { plan =>
     val values = collectFromPlan(plan)
     java.util.Arrays.asList(values : _*)
   }
 
-  /**
-   * Returns an iterator that contains all rows in this Dataset.
-   *
-   * The iterator will consume as much memory as the largest partition in this Dataset.
-   *
-   * @note this results in multiple Spark jobs, and if the input Dataset is the result
-   * of a wide transformation (e.g. join with different partitioners), to avoid
-   * recomputing the input Dataset should be cached first.
-   *
-   * @group action
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def toLocalIterator(): java.util.Iterator[T] = {
     withAction("toLocalIterator", queryExecution) { plan =>
       val fromRow = resolvedEnc.createDeserializer()
@@ -3694,26 +1473,17 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns the number of rows in the Dataset.
-   * @group action
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def count(): Long = withAction("count", groupBy().count().queryExecution) { plan =>
     plan.executeCollect().head.getLong(0)
   }
 
-  /**
-   * Returns a new Dataset that has exactly `numPartitions` partitions.
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def repartition(numPartitions: Int): Dataset[T] = withTypedPlan {
     Repartition(numPartitions, shuffle = true, logicalPlan)
   }
 
-  private def repartitionByExpression(
+  protected def repartitionByExpression(
       numPartitions: Option[Int],
       partitionExprs: Seq[Column]): Dataset[T] = {
     // The underlying `LogicalPlan` operator special-cases all-`SortOrder` arguments.
@@ -3729,36 +1499,7 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions into
-   * `numPartitions`. The resulting Dataset is hash partitioned.
-   *
-   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
-    repartitionByExpression(Some(numPartitions), partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions, using
-   * `spark.sql.shuffle.partitions` as number of partitions.
-   * The resulting Dataset is hash partitioned.
-   *
-   * This is the same operation as "DISTRIBUTE BY" in SQL (Hive QL).
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def repartition(partitionExprs: Column*): Dataset[T] = {
-    repartitionByExpression(None, partitionExprs)
-  }
-
-  private def repartitionByRange(
+  protected def repartitionByRange(
       numPartitions: Option[Int],
       partitionExprs: Seq[Column]): Dataset[T] = {
     require(partitionExprs.nonEmpty, "At least one partition-by expression must be specified.")
@@ -3771,151 +1512,38 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions into
-   * `numPartitions`. The resulting Dataset is range partitioned.
-   *
-   * At least one partition-by expression must be specified.
-   * When no explicit sort order is specified, "ascending nulls first" is assumed.
-   * Note, the rows are not sorted in each partition of the resulting Dataset.
-   *
-   *
-   * Note that due to performance reasons this method uses sampling to estimate the ranges.
-   * Hence, the output may not be consistent, since sampling can return different values.
-   * The sample size can be controlled by the config
-   * `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
-   *
-   * @group typedrel
-   * @since 2.3.0
-   */
-  @scala.annotation.varargs
-  def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] = {
-    repartitionByRange(Some(numPartitions), partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset partitioned by the given partitioning expressions, using
-   * `spark.sql.shuffle.partitions` as number of partitions.
-   * The resulting Dataset is range partitioned.
-   *
-   * At least one partition-by expression must be specified.
-   * When no explicit sort order is specified, "ascending nulls first" is assumed.
-   * Note, the rows are not sorted in each partition of the resulting Dataset.
-   *
-   * Note that due to performance reasons this method uses sampling to estimate the ranges.
-   * Hence, the output may not be consistent, since sampling can return different values.
-   * The sample size can be controlled by the config
-   * `spark.sql.execution.rangeExchange.sampleSizePerPartition`.
-   *
-   * @group typedrel
-   * @since 2.3.0
-   */
-  @scala.annotation.varargs
-  def repartitionByRange(partitionExprs: Column*): Dataset[T] = {
-    repartitionByRange(None, partitionExprs)
-  }
-
-  /**
-   * Returns a new Dataset that has exactly `numPartitions` partitions, when the fewer partitions
-   * are requested. If a larger number of partitions is requested, it will stay at the current
-   * number of partitions. Similar to coalesce defined on an `RDD`, this operation results in
-   * a narrow dependency, e.g. if you go from 1000 partitions to 100 partitions, there will not
-   * be a shuffle, instead each of the 100 new partitions will claim 10 of the current partitions.
-   *
-   * However, if you're doing a drastic coalesce, e.g. to numPartitions = 1,
-   * this may result in your computation taking place on fewer nodes than
-   * you like (e.g. one node in the case of numPartitions = 1). To avoid this,
-   * you can call repartition. This will add a shuffle step, but means the
-   * current upstream partitions will be executed in parallel (per whatever
-   * the current partitioning is).
-   *
-   * @group typedrel
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def coalesce(numPartitions: Int): Dataset[T] = withTypedPlan {
     Repartition(numPartitions, shuffle = false, logicalPlan)
   }
 
-  /**
-   * Returns a new Dataset that contains only the unique rows from this Dataset.
-   * This is an alias for `dropDuplicates`.
-   *
-   * Note that for a streaming [[Dataset]], this method returns distinct rows only once
-   * regardless of the output mode, which the behavior may not be same with `DISTINCT` in SQL
-   * against streaming [[Dataset]].
-   *
-   * @note Equality checking is performed directly on the encoded representation of the data
-   * and thus is not affected by a custom `equals` function defined on `T`.
-   *
-   * @group typedrel
-   * @since 2.0.0
-   */
-  def distinct(): Dataset[T] = dropDuplicates()
-
-  /**
-   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def persist(): this.type = persist(sparkSession.sessionState.conf.defaultCacheStorageLevel)
 
-  /**
-   * Persist this Dataset with the default storage level (`MEMORY_AND_DISK`).
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def cache(): this.type = persist()
+  /** @inheritdoc */
+  override def cache(): this.type = persist()
 
-  /**
-   * Persist this Dataset with the given storage level.
-   * @param newLevel One of: `MEMORY_ONLY`, `MEMORY_AND_DISK`, `MEMORY_ONLY_SER`,
-   *                 `MEMORY_AND_DISK_SER`, `DISK_ONLY`, `MEMORY_ONLY_2`,
-   *                 `MEMORY_AND_DISK_2`, etc.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def persist(newLevel: StorageLevel): this.type = {
     sparkSession.sharedState.cacheManager.cacheQuery(this, None, newLevel)
     this
   }
 
-  /**
-   * Get the Dataset's current storage level, or StorageLevel.NONE if not persisted.
-   *
-   * @group basic
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   def storageLevel: StorageLevel = {
     sparkSession.sharedState.cacheManager.lookupCachedData(this).map { cachedData =>
       cachedData.cachedRepresentation.cacheBuilder.storageLevel
     }.getOrElse(StorageLevel.NONE)
   }
 
-  /**
-   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk.
-   * This will not un-persist any cached data that is built upon this Dataset.
-   *
-   * @param blocking Whether to block until all blocks are deleted.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def unpersist(blocking: Boolean): this.type = {
     sparkSession.sharedState.cacheManager.uncacheQuery(this, cascade = false, blocking)
     this
   }
 
-  /**
-   * Mark the Dataset as non-persistent, and remove all blocks for it from memory and disk.
-   * This will not un-persist any cached data that is built upon this Dataset.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  def unpersist(): this.type = unpersist(blocking = false)
+  /** @inheritdoc */
+  override def unpersist(): this.type = unpersist(blocking = false)
 
   // Represents the `QueryExecution` used to produce the content of the Dataset as an `RDD`.
   @transient private lazy val rddQueryExecution: QueryExecution = {
@@ -3923,12 +1551,7 @@ class Dataset[T] private[sql](
     sparkSession.sessionState.executePlan(deserialized)
   }
 
-  /**
-   * Represents the content of the Dataset as an `RDD` of `T`.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   lazy val rdd: RDD[T] = {
     val objectType = exprEnc.deserializer.dataType
     rddQueryExecution.toRdd.mapPartitions { rows =>
@@ -3936,102 +1559,13 @@ class Dataset[T] private[sql](
     }
   }
 
-  /**
-   * Returns the content of the Dataset as a `JavaRDD` of `T`s.
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD()
 
-  /**
-   * Returns the content of the Dataset as a `JavaRDD` of `T`s.
-   * @group basic
-   * @since 1.6.0
-   */
-  def javaRDD: JavaRDD[T] = toJavaRDD
-
-  /**
-   * Registers this Dataset as a temporary table using the given name. The lifetime of this
-   * temporary table is tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
-  @deprecated("Use createOrReplaceTempView(viewName) instead.", "2.0.0")
-  def registerTempTable(tableName: String): Unit = {
-    createOrReplaceTempView(tableName)
-  }
-
-  /**
-   * Creates a local temporary view using the given name. The lifetime of this
-   * temporary view is tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
-   * created it, i.e. it will be automatically dropped when the session terminates. It's not
-   * tied to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
-   *
-   * @throws AnalysisException if the view name is invalid or already exists
-   *
-   * @group basic
-   * @since 2.0.0
-   */
-  @throws[AnalysisException]
-  def createTempView(viewName: String): Unit = withPlan {
-    createTempViewCommand(viewName, replace = false, global = false)
-  }
-
-
-
-  /**
-   * Creates a local temporary view using the given name. The lifetime of this
-   * temporary view is tied to the [[SparkSession]] that was used to create this Dataset.
-   *
-   * @group basic
-   * @since 2.0.0
-   */
-  def createOrReplaceTempView(viewName: String): Unit = withPlan {
-    createTempViewCommand(viewName, replace = true, global = false)
-  }
-
-  /**
-   * Creates a global temporary view using the given name. The lifetime of this
-   * temporary view is tied to this Spark application.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark application,
-   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
-   * preserved database `global_temp`, and we must use the qualified name to refer a global temp
-   * view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @throws AnalysisException if the view name is invalid or already exists
-   *
-   * @group basic
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]
-  def createGlobalTempView(viewName: String): Unit = withPlan {
-    createTempViewCommand(viewName, replace = false, global = true)
-  }
-
-  /**
-   * Creates or replaces a global temporary view using the given name. The lifetime of this
-   * temporary view is tied to this Spark application.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark application,
-   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
-   * preserved database `global_temp`, and we must use the qualified name to refer a global temp
-   * view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @group basic
-   * @since 2.2.0
-   */
-  def createOrReplaceGlobalTempView(viewName: String): Unit = withPlan {
-    createTempViewCommand(viewName, replace = true, global = true)
-  }
-
-  private def createTempViewCommand(
+  protected def createTempView(
       viewName: String,
       replace: Boolean,
-      global: Boolean): CreateViewCommand = sparkSession.withActive {
+      global: Boolean): Unit = sparkSession.withActive {
     val viewType = if (global) GlobalTempView else LocalTempView
 
     val identifier = try {
@@ -4047,53 +1581,32 @@ class Dataset[T] private[sql](
         messageParameters = Map("actualName" -> viewName))
     }
 
-    CreateViewCommand(
-      name = TableIdentifier(identifier.last),
-      userSpecifiedColumns = Nil,
-      comment = None,
-      properties = Map.empty,
-      originalText = None,
-      plan = logicalPlan,
-      allowExisting = false,
-      replace = replace,
-      viewType = viewType,
-      isAnalyzed = true)
+    withPlan {
+      CreateViewCommand(
+        name = TableIdentifier(identifier.last),
+        userSpecifiedColumns = Nil,
+        comment = None,
+        properties = Map.empty,
+        originalText = None,
+        plan = logicalPlan,
+        allowExisting = false,
+        replace = replace,
+        viewType = viewType,
+        isAnalyzed = true)
+    }
   }
 
-  /**
-   * Interface for saving the content of the non-streaming Dataset out into external storage.
-   *
-   * @group basic
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def write: DataFrameWriter[T] = {
     if (isStreaming) {
       logicalPlan.failAnalysis(
         errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
         messageParameters = Map("methodName" -> toSQLId("write")))
     }
-    new DataFrameWriter[T](this)
+    new DataFrameWriterImpl[T](this)
   }
 
-  /**
-   * Create a write configuration builder for v2 sources.
-   *
-   * This builder is used to configure and execute write operations. For example, to append to an
-   * existing table, run:
-   *
-   * {{{
-   *   df.writeTo("catalog.db.table").append()
-   * }}}
-   *
-   * This can also be used to create or replace existing tables:
-   *
-   * {{{
-   *   df.writeTo("catalog.db.table").partitionedBy($"col").createOrReplace()
-   * }}}
-   *
-   * @group basic
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def writeTo(table: String): DataFrameWriterV2[T] = {
     // TODO: streaming could be adapted to use this interface
     if (isStreaming) {
@@ -4101,31 +1614,10 @@ class Dataset[T] private[sql](
         errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
         messageParameters = Map("methodName" -> toSQLId("writeTo")))
     }
-    new DataFrameWriterV2[T](table, this)
+    new DataFrameWriterV2Impl[T](table, this)
   }
 
-  /**
-   * Merges a set of updates, insertions, and deletions based on a source table into
-   * a target table.
-   *
-   * Scala Examples:
-   * {{{
-   *   spark.table("source")
-   *     .mergeInto("target", $"source.id" === $"target.id")
-   *     .whenMatched($"salary" === 100)
-   *     .delete()
-   *     .whenNotMatched()
-   *     .insertAll()
-   *     .whenNotMatchedBySource($"salary" === 100)
-   *     .update(Map(
-   *       "salary" -> lit(200)
-   *     ))
-   *     .merge()
-   * }}}
-   *
-   * @group basic
-   * @since 4.0.0
-   */
+  /** @inheritdoc */
   def mergeInto(table: String, condition: Column): MergeIntoWriter[T] = {
     if (isStreaming) {
       logicalPlan.failAnalysis(
@@ -4133,15 +1625,10 @@ class Dataset[T] private[sql](
         messageParameters = Map("methodName" -> toSQLId("mergeInto")))
     }
 
-    new MergeIntoWriter[T](table, this, condition)
+    new MergeIntoWriterImpl[T](table, this, condition)
   }
 
-  /**
-   * Interface for saving the content of the streaming Dataset out into external storage.
-   *
-   * @group basic
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def writeStream: DataStreamWriter[T] = {
     if (!isStreaming) {
       logicalPlan.failAnalysis(
@@ -4151,13 +1638,9 @@ class Dataset[T] private[sql](
     new DataStreamWriter[T](this)
   }
 
-
-  /**
-   * Returns the content of the Dataset as a Dataset of JSON strings.
-   * @since 2.0.0
-   */
-  def toJSON: Dataset[String] = {
-    val rowSchema = this.schema
+  /** @inheritdoc */
+  override def toJSON: Dataset[String] = {
+    val rowSchema = exprEnc.schema
     val sessionLocalTimeZone = sparkSession.sessionState.conf.sessionLocalTimeZone
     mapPartitions { iter =>
       val writer = new CharArrayWriter()
@@ -4185,17 +1668,10 @@ class Dataset[T] private[sql](
     } (Encoders.STRING)
   }
 
-  /**
-   * Returns a best-effort snapshot of the files that compose this Dataset. This method simply
-   * asks each constituent BaseRelation for its respective files and takes the union of all results.
-   * Depending on the source relations, this may not find all input files. Duplicates are removed.
-   *
-   * @group basic
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def inputFiles: Array[String] = {
     val files: Seq[String] = queryExecution.optimizedPlan.collect {
-      case LogicalRelation(fsBasedRelation: FileRelation, _, _, _) =>
+      case LogicalRelationWithTable(fsBasedRelation: FileRelation, _) =>
         fsBasedRelation.inputFiles
       case fr: FileRelation =>
         fr.inputFiles
@@ -4208,34 +1684,307 @@ class Dataset[T] private[sql](
     files.toSet.toArray
   }
 
-  /**
-   * Returns `true` when the logical query plans inside both [[Dataset]]s are equal and
-   * therefore return same results.
-   *
-   * @note The equality comparison here is simplified by tolerating the cosmetic differences
-   *       such as attribute names.
-   * @note This API can compare both [[Dataset]]s very fast but can still return `false` on
-   *       the [[Dataset]] that return the same results, for instance, from different plans. Such
-   *       false negative semantic can be useful when caching as an example.
-   * @since 3.1.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def sameSemantics(other: Dataset[T]): Boolean = {
     queryExecution.analyzed.sameResult(other.queryExecution.analyzed)
   }
 
-  /**
-   * Returns a `hashCode` of the logical query plan against this [[Dataset]].
-   *
-   * @note Unlike the standard `hashCode`, the hash is calculated against the query plan
-   *       simplified by tolerating the cosmetic differences such as attribute names.
-   * @since 3.1.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def semanticHash(): Int = {
     queryExecution.analyzed.semanticHash()
   }
 
+  ////////////////////////////////////////////////////////////////////////////
+  // Return type overrides to make sure we return the implementation instead
+  // of the interface. This is done for a couple of reasons:
+  // - Retain the old signatures for binary compatibility;
+  // - Java compatibility . The java compiler uses the byte code signatures,
+  //   and those would point to api.Dataset being returned instead of Dataset.
+  //   This causes issues when the java code tries to materialize results, or
+  //   tries to use functionality that is implementation specfic.
+  // - Scala method resolution runs into problems when the ambiguous methods are
+  //   scattered across the interface and implementation. `drop` and `select`
+  //   suffered from this.
+  ////////////////////////////////////////////////////////////////////////////
+
+  /** @inheritdoc */
+  override def drop(colName: String): DataFrame = super.drop(colName)
+
+  /** @inheritdoc */
+  override def drop(col: Column): DataFrame = super.drop(col)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumn: String): DataFrame =
+    super.join(right, usingColumn)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumns: Array[String]): DataFrame =
+    super.join(right, usingColumns)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumns: Seq[String]): DataFrame =
+    super.join(right, usingColumns)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], usingColumn: String, joinType: String): DataFrame =
+    super.join(right, usingColumn, joinType)
+
+  /** @inheritdoc */
+  override def join(
+      right: Dataset[_],
+      usingColumns: Array[String],
+      joinType: String): DataFrame =
+    super.join(right, usingColumns, joinType)
+
+  /** @inheritdoc */
+  override def join(right: Dataset[_], joinExprs: Column): DataFrame =
+    super.join(right, joinExprs)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def select(col: String, cols: String*): DataFrame = super.select(col, cols: _*)
+
+  /** @inheritdoc */
+  override def select[U1, U2](c1: TypedColumn[T, U1], c2: TypedColumn[T, U2]): Dataset[(U1, U2)] =
+    super.select(c1, c2)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3]): Dataset[(U1, U2, U3)] = super.select(c1, c2, c3)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3, U4](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4]): Dataset[(U1, U2, U3, U4)] = super.select(c1, c2, c3, c4)
+
+  /** @inheritdoc */
+  override def select[U1, U2, U3, U4, U5](
+      c1: TypedColumn[T, U1],
+      c2: TypedColumn[T, U2],
+      c3: TypedColumn[T, U3],
+      c4: TypedColumn[T, U4],
+      c5: TypedColumn[T, U5]): Dataset[(U1, U2, U3, U4, U5)] = super.select(c1, c2, c3, c4, c5)
+
+  override def melt(
+      ids: Array[Column],
+      values: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    super.melt(ids, values, variableColumnName, valueColumnName)
+
+  /** @inheritdoc */
+  override def melt(
+      ids: Array[Column],
+      variableColumnName: String,
+      valueColumnName: String): DataFrame =
+    super.melt(ids, variableColumnName, valueColumnName)
+
+  /** @inheritdoc */
+  override def withColumn(colName: String, col: Column): DataFrame =
+    super.withColumn(colName, col)
+
+  /** @inheritdoc */
+  override def withColumns(colsMap: Map[String, Column]): DataFrame =
+    super.withColumns(colsMap)
+
+  /** @inheritdoc */
+  override def withColumns(colsMap: util.Map[String, Column]): DataFrame =
+    super.withColumns(colsMap)
+
+  /** @inheritdoc */
+  override def withColumnRenamed(existingName: String, newName: String): DataFrame =
+    super.withColumnRenamed(existingName, newName)
+
+  /** @inheritdoc */
+  override def withColumnsRenamed(colsMap: Map[String, String]): DataFrame =
+    super.withColumnsRenamed(colsMap)
+
+  /** @inheritdoc */
+  override def withColumnsRenamed(colsMap: util.Map[String, String]): DataFrame =
+    super.withColumnsRenamed(colsMap)
+
+  /** @inheritdoc */
+  override def checkpoint(): Dataset[T] = super.checkpoint()
+
+  /** @inheritdoc */
+  override def checkpoint(eager: Boolean): Dataset[T] = super.checkpoint(eager)
+
+  /** @inheritdoc */
+  override def localCheckpoint(): Dataset[T] = super.localCheckpoint()
+
+  /** @inheritdoc */
+  override def localCheckpoint(eager: Boolean): Dataset[T] = super.localCheckpoint(eager)
+
+  /** @inheritdoc */
+  override def localCheckpoint(eager: Boolean, storageLevel: StorageLevel): Dataset[T] =
+    super.localCheckpoint(eager, storageLevel)
+
+  /** @inheritdoc */
+  override def joinWith[U](other: Dataset[U], condition: Column): Dataset[(T, U)] =
+    super.joinWith(other, condition)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sortWithinPartitions(sortCol: String, sortCols: String*): Dataset[T] =
+    super.sortWithinPartitions(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sortWithinPartitions(sortExprs: Column*): Dataset[T] =
+    super.sortWithinPartitions(sortExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sort(sortCol: String, sortCols: String*): Dataset[T] =
+    super.sort(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def sort(sortExprs: Column*): Dataset[T] = super.sort(sortExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def orderBy(sortCol: String, sortCols: String*): Dataset[T] =
+    super.orderBy(sortCol, sortCols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def orderBy(sortExprs: Column*): Dataset[T] = super.orderBy(sortExprs: _*)
+
+  /** @inheritdoc */
+  override def as(alias: Symbol): Dataset[T] = super.as(alias)
+
+  /** @inheritdoc */
+  override def alias(alias: String): Dataset[T] = super.alias(alias)
+
+  /** @inheritdoc */
+  override def alias(alias: Symbol): Dataset[T] = super.alias(alias)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def selectExpr(exprs: String*): DataFrame = super.selectExpr(exprs: _*)
+
+  /** @inheritdoc */
+  override def filter(conditionExpr: String): Dataset[T] = super.filter(conditionExpr)
+
+  /** @inheritdoc */
+  override def where(condition: Column): Dataset[T] = super.where(condition)
+
+  /** @inheritdoc */
+  override def where(conditionExpr: String): Dataset[T] = super.where(conditionExpr)
+
+  /** @inheritdoc */
+  override def unionAll(other: Dataset[T]): Dataset[T] = super.unionAll(other)
+
+  /** @inheritdoc */
+  override def unionByName(other: Dataset[T]): Dataset[T] = super.unionByName(other)
+
+  /** @inheritdoc */
+  override def sample(fraction: Double, seed: Long): Dataset[T] = super.sample(fraction, seed)
+
+  /** @inheritdoc */
+  override def sample(fraction: Double): Dataset[T] = super.sample(fraction)
+
+  /** @inheritdoc */
+  override def sample(withReplacement: Boolean, fraction: Double): Dataset[T] =
+    super.sample(withReplacement, fraction)
+
+  /** @inheritdoc */
+  override def dropDuplicates(colNames: Array[String]): Dataset[T] = super.dropDuplicates(colNames)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def dropDuplicates(col1: String, cols: String*): Dataset[T] =
+    super.dropDuplicates(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def dropDuplicatesWithinWatermark(colNames: Array[String]): Dataset[T] =
+    super.dropDuplicatesWithinWatermark(colNames)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def dropDuplicatesWithinWatermark(col1: String, cols: String*): Dataset[T] =
+    super.dropDuplicatesWithinWatermark(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def mapPartitions[U](f: MapPartitionsFunction[T, U], encoder: Encoder[U]): Dataset[U] =
+    super.mapPartitions(f, encoder)
+
+  /** @inheritdoc */
+  override def flatMap[U: Encoder](func: T => IterableOnce[U]): Dataset[U] = super.flatMap(func)
+
+  /** @inheritdoc */
+  override def flatMap[U](f: FlatMapFunction[T, U], encoder: Encoder[U]): Dataset[U] =
+    super.flatMap(f, encoder)
+
+  /** @inheritdoc */
+  override def foreachPartition(func: ForeachPartitionFunction[T]): Unit =
+    super.foreachPartition(func)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartition(numPartitions: Int, partitionExprs: Column*): Dataset[T] =
+    super.repartition(numPartitions, partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartition(partitionExprs: Column*): Dataset[T] =
+    super.repartition(partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartitionByRange(numPartitions: Int, partitionExprs: Column*): Dataset[T] =
+    super.repartitionByRange(numPartitions, partitionExprs: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def repartitionByRange(partitionExprs: Column*): Dataset[T] =
+    super.repartitionByRange(partitionExprs: _*)
+
+  /** @inheritdoc */
+  override def distinct(): Dataset[T] = super.distinct()
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def groupBy(col1: String, cols: String*): RelationalGroupedDataset =
+    super.groupBy(col1, cols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def rollup(col1: String, cols: String*): RelationalGroupedDataset =
+    super.rollup(col1, cols: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def cube(col1: String, cols: String*): RelationalGroupedDataset =
+    super.cube(col1, cols: _*)
+
+  /** @inheritdoc */
+  override def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame =
+    super.agg(aggExpr, aggExprs: _*)
+
+  /** @inheritdoc */
+  override def agg(exprs: Map[String, String]): DataFrame = super.agg(exprs)
+
+  /** @inheritdoc */
+  override def agg(exprs: java.util.Map[String, String]): DataFrame = super.agg(exprs)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def agg(expr: Column, exprs: Column*): DataFrame = super.agg(expr, exprs: _*)
+
+  /** @inheritdoc */
+  override def groupByKey[K](
+      func: MapFunction[T, K],
+      encoder: Encoder[K]): KeyValueGroupedDataset[K, T] =
+    super.groupByKey(func, encoder).asInstanceOf[KeyValueGroupedDataset[K, T]]
+
   ////////////////////////////////////////////////////////////////////////////
   // For Python API
   ////////////////////////////////////////////////////////////////////////////
@@ -4449,7 +2198,7 @@ class Dataset[T] private[sql](
     plan.executeCollect().map(fromRow)
   }
 
-  private def sortInternal(global: Boolean, sortExprs: Seq[Column]): Dataset[T] = {
+  protected def sortInternal(global: Boolean, sortExprs: Seq[Column]): Dataset[T] = {
     val sortOrder: Seq[SortOrder] = sortExprs.map { col =>
       col.expr match {
         case expr: SortOrder =>
@@ -4475,7 +2224,7 @@ class Dataset[T] private[sql](
 
   /** A convenient function to wrap a set based logical plan and produce a Dataset. */
   @inline private def withSetOperator[U : Encoder](logicalPlan: LogicalPlan): Dataset[U] = {
-    if (classTag.runtimeClass.isAssignableFrom(classOf[Row])) {
+    if (isUnTyped) {
       // Set operators widen types (change the schema), so we cannot reuse the row encoder.
       Dataset.ofRows(sparkSession, logicalPlan).asInstanceOf[Dataset[U]]
     } else {
@@ -4483,6 +2232,8 @@ class Dataset[T] private[sql](
     }
   }
 
+  private def isUnTyped: Boolean = classTag.runtimeClass.isAssignableFrom(classOf[Row])
+
   /** Returns a optimized plan for CommandResult, convert to `LocalRelation`. */
   private def commandResultOptimized: Dataset[T] = {
     logicalPlan match {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
deleted file mode 100644
index 27012c471462d..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/ForeachWriter.scala
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-/**
- * The abstract class for writing custom logic to process data generated by a query.
- * This is often used to write the output of a streaming query to arbitrary storage systems.
- * Any implementation of this base class will be used by Spark in the following way.
- *
- * <ul>
- * <li>A single instance of this class is responsible of all the data generated by a single task
- *     in a query. In other words, one instance is responsible for processing one partition of the
- *     data generated in a distributed manner.
- *
- * <li>Any implementation of this class must be serializable because each task will get a fresh
- *     serialized-deserialized copy of the provided object. Hence, it is strongly recommended that
- *     any initialization for writing data (e.g. opening a connection or starting a transaction)
- *     is done after the `open(...)` method has been called, which signifies that the task is
- *     ready to generate data.
- *
- * <li>The lifecycle of the methods are as follows.
- *
- *   <pre>
- *   For each partition with `partitionId`:
- *       For each batch/epoch of streaming data (if its streaming query) with `epochId`:
- *           Method `open(partitionId, epochId)` is called.
- *           If `open` returns true:
- *                For each row in the partition and batch/epoch, method `process(row)` is called.
- *           Method `close(errorOrNull)` is called with error (if any) seen while processing rows.
- *   </pre>
- *
- * </ul>
- *
- * Important points to note:
- * <ul>
- * <li>Spark doesn't guarantee same output for (partitionId, epochId), so deduplication
- *     cannot be achieved with (partitionId, epochId). e.g. source provides different number of
- *     partitions for some reason, Spark optimization changes number of partitions, etc.
- *     Refer SPARK-28650 for more details. If you need deduplication on output, try out
- *     `foreachBatch` instead.
- *
- * <li>The `close()` method will be called if `open()` method returns successfully (irrespective
- *     of the return value), except if the JVM crashes in the middle.
- * </ul>
- *
- * Scala example:
- * {{{
- *   datasetOfString.writeStream.foreach(new ForeachWriter[String] {
- *
- *     def open(partitionId: Long, version: Long): Boolean = {
- *       // open connection
- *     }
- *
- *     def process(record: String) = {
- *       // write string to connection
- *     }
- *
- *     def close(errorOrNull: Throwable): Unit = {
- *       // close the connection
- *     }
- *   })
- * }}}
- *
- * Java example:
- * {{{
- *  datasetOfString.writeStream().foreach(new ForeachWriter<String>() {
- *
- *    @Override
- *    public boolean open(long partitionId, long version) {
- *      // open connection
- *    }
- *
- *    @Override
- *    public void process(String value) {
- *      // write string to connection
- *    }
- *
- *    @Override
- *    public void close(Throwable errorOrNull) {
- *      // close the connection
- *    }
- *  });
- * }}}
- *
- * @since 2.0.0
- */
-abstract class ForeachWriter[T] extends Serializable {
-
-  // TODO: Move this to org.apache.spark.sql.util or consolidate this with batch API.
-
-  /**
-   * Called when starting to process one partition of new data in the executor. See the class
-   * docs for more information on how to use the `partitionId` and `epochId`.
-   *
-   * @param partitionId the partition id.
-   * @param epochId a unique id for data deduplication.
-   * @return `true` if the corresponding partition and version id should be processed. `false`
-   *         indicates the partition should be skipped.
-   */
-  def open(partitionId: Long, epochId: Long): Boolean
-
-  /**
-   * Called to process the data in the executor side. This method will be called only if `open`
-   * returns `true`.
-   */
-  def process(value: T): Unit
-
-  /**
-   * Called when stopping to process one partition of new data in the executor side. This is
-   * guaranteed to be called either `open` returns `true` or `false`. However,
-   * `close` won't be called in the following cases:
-   *
-   * <ul>
-   * <li>JVM crashes without throwing a `Throwable`</li>
-   * <li>`open` throws a `Throwable`.</li>
-   * </ul>
-   *
-   * @param errorOrNull the error thrown during processing data or null if there was no error.
-   */
-  def close(errorOrNull: Throwable): Unit
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
index 52ab633cd75a7..6dcf01d3a9db2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/KeyValueGroupedDataset.scala
@@ -17,16 +17,16 @@
 
 package org.apache.spark.sql
 
-import scala.jdk.CollectionConverters._
-
 import org.apache.spark.api.java.function._
-import org.apache.spark.sql.catalyst.analysis.{EliminateEventTimeWatermark, UnresolvedAttribute}
-import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, SortOrder}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.{agnosticEncoderFor, ProductEncoder}
+import org.apache.spark.sql.catalyst.encoders.encoderFor
+import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.expressions.ReduceAggregator
-import org.apache.spark.sql.internal.TypedAggUtils
+import org.apache.spark.sql.internal.TypedAggUtils.{aggKeyColumn, withInputType}
 import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode, StatefulProcessor, StatefulProcessorWithInitialState, TimeMode}
 
 /**
@@ -39,80 +39,43 @@ import org.apache.spark.sql.streaming.{GroupState, GroupStateTimeout, OutputMode
 class KeyValueGroupedDataset[K, V] private[sql](
     kEncoder: Encoder[K],
     vEncoder: Encoder[V],
-    @transient val queryExecution: QueryExecution,
+    @transient private[sql] val queryExecution: QueryExecution,
     private val dataAttributes: Seq[Attribute],
-    private val groupingAttributes: Seq[Attribute]) extends Serializable {
+    private val groupingAttributes: Seq[Attribute])
+  extends api.KeyValueGroupedDataset[K, V] {
+  type KVDS[KY, VL] = KeyValueGroupedDataset[KY, VL]
 
-  // Similar to [[Dataset]], we turn the passed in encoder to `ExpressionEncoder` explicitly.
-  private implicit val kExprEnc: ExpressionEncoder[K] = encoderFor(kEncoder)
-  private implicit val vExprEnc: ExpressionEncoder[V] = encoderFor(vEncoder)
+  private implicit def kEncoderImpl: Encoder[K] = kEncoder
+  private implicit def vEncoderImpl: Encoder[V] = vEncoder
 
   private def logicalPlan = queryExecution.analyzed
   private def sparkSession = queryExecution.sparkSession
+  import queryExecution.sparkSession._
 
-  /**
-   * Returns a new [[KeyValueGroupedDataset]] where the type of the key has been mapped to the
-   * specified type. The mapping of key columns to the type follows the same rules as `as` on
-   * [[Dataset]].
-   *
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def keyAs[L : Encoder]: KeyValueGroupedDataset[L, V] =
     new KeyValueGroupedDataset(
-      encoderFor[L],
-      vExprEnc,
+      implicitly[Encoder[L]],
+      vEncoder,
       queryExecution,
       dataAttributes,
       groupingAttributes)
 
-  /**
-   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied
-   * to the data. The grouping key is unchanged by this.
-   *
-   * {{{
-   *   // Create values grouped by key from a Dataset[(K, V)]
-   *   ds.groupByKey(_._1).mapValues(_._2) // Scala
-   * }}}
-   *
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   def mapValues[W : Encoder](func: V => W): KeyValueGroupedDataset[K, W] = {
     val withNewData = AppendColumns(func, dataAttributes, logicalPlan)
     val projected = Project(withNewData.newColumns ++ groupingAttributes, withNewData)
     val executed = sparkSession.sessionState.executePlan(projected)
 
     new KeyValueGroupedDataset(
-      encoderFor[K],
-      encoderFor[W],
+      kEncoder,
+      implicitly[Encoder[W]],
       executed,
       withNewData.newColumns,
       groupingAttributes)
   }
 
-  /**
-   * Returns a new [[KeyValueGroupedDataset]] where the given function `func` has been applied
-   * to the data. The grouping key is unchanged by this.
-   *
-   * {{{
-   *   // Create Integer values grouped by String key from a Dataset<Tuple2<String, Integer>>
-   *   Dataset<Tuple2<String, Integer>> ds = ...;
-   *   KeyValueGroupedDataset<String, Integer> grouped =
-   *     ds.groupByKey(t -> t._1, Encoders.STRING()).mapValues(t -> t._2, Encoders.INT());
-   * }}}
-   *
-   * @since 2.1.0
-   */
-  def mapValues[W](func: MapFunction[V, W], encoder: Encoder[W]): KeyValueGroupedDataset[K, W] = {
-    implicit val uEnc = encoder
-    mapValues { (v: V) => func.call(v) }
-  }
-
-  /**
-   * Returns a [[Dataset]] that contains each unique key. This is equivalent to doing mapping
-   * over the Dataset to extract the keys and then running a distinct operation on those.
-   *
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def keys: Dataset[K] = {
     Dataset[K](
       sparkSession,
@@ -120,194 +83,23 @@ class KeyValueGroupedDataset[K, V] private[sql](
         Project(groupingAttributes, logicalPlan)))
   }
 
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and an iterator that contains all of the elements in the group. The
-   * function can return an iterator containing elements of an arbitrary type which will be returned
-   * as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * @since 1.6.0
-   */
-  def flatMapGroups[U : Encoder](f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
-    Dataset[U](
-      sparkSession,
-      MapGroups(
-        f,
-        groupingAttributes,
-        dataAttributes,
-        Seq.empty,
-        logicalPlan))
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and an iterator that contains all of the elements in the group. The
-   * function can return an iterator containing elements of an arbitrary type which will be returned
-   * as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * @since 1.6.0
-   */
-  def flatMapGroups[U](f: FlatMapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
-    flatMapGroups((key, data) => f.call(key, data.asJava).asScala)(encoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and a sorted iterator that contains all of the elements in the group.
-   * The function can return an iterator containing elements of an arbitrary type which will be
-   * returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator
-   * to be sorted according to the given sort expressions. That sorting does not add
-   * computational complexity.
-   *
-   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroups]]
-   * @since 3.4.0
-   */
+  /** @inheritdoc */
   def flatMapSortedGroups[U : Encoder](
       sortExprs: Column*)(
       f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] = {
-    val sortOrder: Seq[SortOrder] = MapGroups.sortOrder(sortExprs.map(_.expr))
-
     Dataset[U](
       sparkSession,
       MapGroups(
         f,
         groupingAttributes,
         dataAttributes,
-        sortOrder,
+        MapGroups.sortOrder(sortExprs.map(_.expr)),
         logicalPlan
       )
     )
   }
 
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and a sorted iterator that contains all of the elements in the group.
-   * The function can return an iterator containing elements of an arbitrary type which will be
-   * returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#flatMapGroups]], except for the iterator
-   * to be sorted according to the given sort expressions. That sorting does not add
-   * computational complexity.
-   *
-   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#flatMapGroups]]
-   * @since 3.4.0
-   */
-  def flatMapSortedGroups[U](
-      SortExprs: Array[Column],
-      f: FlatMapGroupsFunction[K, V, U],
-      encoder: Encoder[U]): Dataset[U] = {
-    import org.apache.spark.util.ArrayImplicits._
-    flatMapSortedGroups(
-      SortExprs.toImmutableArraySeq: _*)((key, data) => f.call(key, data.asJava).asScala)(encoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and an iterator that contains all of the elements in the group. The
-   * function can return an element of arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * @since 1.6.0
-   */
-  def mapGroups[U : Encoder](f: (K, Iterator[V]) => U): Dataset[U] = {
-    val func = (key: K, it: Iterator[V]) => Iterator(f(key, it))
-    flatMapGroups(func)
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data.  For each unique group, the function will
-   * be passed the group key and an iterator that contains all of the elements in the group. The
-   * function can return an element of arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * This function does not support partial aggregation, and as a result requires shuffling all
-   * the data in the [[Dataset]]. If an application intends to perform an aggregation over each
-   * key, it is best to use the reduce function or an
-   * `org.apache.spark.sql.expressions#Aggregator`.
-   *
-   * Internally, the implementation will spill to disk if any given group is too large to fit into
-   * memory.  However, users must take care to avoid materializing the whole iterator for a group
-   * (for example, by calling `toList`) unless they are sure that this is possible given the memory
-   * constraints of their cluster.
-   *
-   * @since 1.6.0
-   */
-  def mapGroups[U](f: MapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] = {
-    mapGroups((key, data) => f.call(key, data.asJava))(encoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func Function to be called on every group.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](
       func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
     val flatMapFunc = (key: K, it: Iterator[V], s: GroupState[S]) => Iterator(func(key, it, s))
@@ -323,23 +115,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
         child = logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func Function to be called on every group.
-   * @param timeoutConf Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](
       timeoutConf: GroupStateTimeout)(
       func: (K, Iterator[V], GroupState[S]) => U): Dataset[U] = {
@@ -356,29 +132,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
         child = logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See [[org.apache.spark.sql.streaming.GroupState]] for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func Function to be called on every group.
-   * @param timeoutConf Timeout Conf, see GroupStateTimeout for more details
-   * @param initialState The user provided state that will be initialized when the first batch
-   *                     of data is processed in the streaming query. The user defined function
-   *                     will be called on the state data even if there are no other values in
-   *                     the group. To convert a Dataset ds of type Dataset[(K, S)] to a
-   *                     KeyValueGroupedDataset[K, S]
-   *                     do {{{ ds.groupByKey(x => x._1).mapValues(_._2) }}}
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.2.0
-   */
+  /** @inheritdoc */
   def mapGroupsWithState[S: Encoder, U: Encoder](
       timeoutConf: GroupStateTimeout,
       initialState: KeyValueGroupedDataset[K, S])(
@@ -401,114 +155,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
       ))
   }
 
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    mapGroupsWithState[S, U](
-      (key: K, it: Iterator[V], s: GroupState[S]) => func.call(key, it.asJava, s)
-    )(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   * @param timeoutConf   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout): Dataset[U] = {
-    mapGroupsWithState[S, U](timeoutConf)(
-      (key: K, it: Iterator[V], s: GroupState[S]) => func.call(key, it.asJava, s)
-    )(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   * @param timeoutConf   Timeout configuration for groups that do not receive data for a while.
-   * @param initialState The user provided state that will be initialized when the first batch
-   *                     of data is processed in the streaming query. The user defined function
-   *                     will be called on the state data even if there are no other values in
-   *                     the group.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.2.0
-   */
-  def mapGroupsWithState[S, U](
-      func: MapGroupsWithStateFunction[K, V, S, U],
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout,
-      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
-    mapGroupsWithState[S, U](timeoutConf, initialState)(
-      (key: K, it: Iterator[V], s: GroupState[S]) => func.call(key, it.asJava, s)
-    )(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func Function to be called on every group.
-   * @param outputMode The output mode of the function.
-   * @param timeoutConf Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   def flatMapGroupsWithState[S: Encoder, U: Encoder](
       outputMode: OutputMode,
       timeoutConf: GroupStateTimeout)(
@@ -528,29 +175,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
         child = logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func Function to be called on every group.
-   * @param outputMode The output mode of the function.
-   * @param timeoutConf Timeout configuration for groups that do not receive data for a while.
-   * @param initialState The user provided state that will be initialized when the first batch
-   *                     of data is processed in the streaming query. The user defined function
-   *                     will be called on the state data even if there are no other values in
-   *                     the group. To covert a Dataset `ds` of type  of type `Dataset[(K, S)]`
-   *                     to a `KeyValueGroupedDataset[K, S]`, use
-   *                     {{{ ds.groupByKey(x => x._1).mapValues(_._2) }}}
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.2.0
-   */
+  /** @inheritdoc */
   def flatMapGroupsWithState[S: Encoder, U: Encoder](
       outputMode: OutputMode,
       timeoutConf: GroupStateTimeout,
@@ -575,91 +200,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
       ))
   }
 
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param outputMode    The output mode of the function.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   * @param timeoutConf   Timeout configuration for groups that do not receive data for a while.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 2.2.0
-   */
-  def flatMapGroupsWithState[S, U](
-      func: FlatMapGroupsWithStateFunction[K, V, S, U],
-      outputMode: OutputMode,
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout): Dataset[U] = {
-    val f = (key: K, it: Iterator[V], s: GroupState[S]) => func.call(key, it.asJava, s).asScala
-    flatMapGroupsWithState[S, U](outputMode, timeoutConf)(f)(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each group of data, while maintaining a user-defined per-group
-   * state. The result Dataset will represent the objects returned by the function.
-   * For a static batch Dataset, the function will be invoked once per group. For a streaming
-   * Dataset, the function will be invoked for each group repeatedly in every trigger, and
-   * updates to each group's state will be saved across invocations.
-   * See `GroupState` for more details.
-   *
-   * @tparam S The type of the user-defined state. Must be encodable to Spark SQL types.
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param func          Function to be called on every group.
-   * @param outputMode    The output mode of the function.
-   * @param stateEncoder  Encoder for the state type.
-   * @param outputEncoder Encoder for the output type.
-   * @param timeoutConf   Timeout configuration for groups that do not receive data for a while.
-   * @param initialState The user provided state that will be initialized when the first batch
-   *                     of data is processed in the streaming query. The user defined function
-   *                     will be called on the state data even if there are no other values in
-   *                     the group. To covert a Dataset `ds` of type  of type `Dataset[(K, S)]`
-   *                     to a `KeyValueGroupedDataset[K, S]`, use
-   *                     {{{ ds.groupByKey(x => x._1).mapValues(_._2) }}}
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   * @since 3.2.0
-   */
-  def flatMapGroupsWithState[S, U](
-      func: FlatMapGroupsWithStateFunction[K, V, S, U],
-      outputMode: OutputMode,
-      stateEncoder: Encoder[S],
-      outputEncoder: Encoder[U],
-      timeoutConf: GroupStateTimeout,
-      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] = {
-    val f = (key: K, it: Iterator[V], s: GroupState[S]) => func.call(key, it.asJava, s).asScala
-    flatMapGroupsWithState[S, U](
-      outputMode, timeoutConf, initialState)(f)(stateEncoder, outputEncoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * We allow the user to act on per-group set of input rows along with keyed state and the
-   * user can choose to output/return 0 or more rows.
-   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
-   * in each trigger and the user's state/state variables will be stored persistently across
-   * invocations.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor Instance of statefulProcessor whose functions will be invoked
-   *                          by the operator.
-   * @param timeMode          The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode        The output mode of the stateful processor.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
+  /** @inheritdoc */
   private[sql] def transformWithState[U: Encoder](
       statefulProcessor: StatefulProcessor[K, V, U],
       timeMode: TimeMode,
@@ -677,29 +218,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
     )
   }
 
-  /**
-   * (Scala-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * We allow the user to act on per-group set of input rows along with keyed state and the
-   * user can choose to output/return 0 or more rows.
-   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
-   * in each trigger and the user's state/state variables will be stored persistently across
-   * invocations.
-   *
-   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
-   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor   Instance of statefulProcessor whose functions will
-   *                            be invoked by the operator.
-   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
-   *                            transformWithState will use the new eventTimeColumn. The user
-   *                            needs to ensure that the eventTime for emitted output adheres to
-   *                            the watermark boundary, otherwise streaming query will fail.
-   * @param outputMode          The output mode of the stateful processor.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
+  /** @inheritdoc */
   private[sql] def transformWithState[U: Encoder](
       statefulProcessor: StatefulProcessor[K, V, U],
       eventTimeColumnName: String,
@@ -715,81 +234,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
     updateEventTimeColumnAfterTransformWithState(transformWithState, eventTimeColumnName)
   }
 
-  /**
-   * (Java-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * We allow the user to act on per-group set of input rows along with keyed state and the
-   * user can choose to output/return 0 or more rows.
-   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
-   * in each trigger and the user's state/state variables will be stored persistently across
-   * invocations.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor Instance of statefulProcessor whose functions will be invoked by the
-   *                          operator.
-   * @param timeMode The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode The output mode of the stateful processor.
-   * @param outputEncoder Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  private[sql] def transformWithState[U: Encoder](
-      statefulProcessor: StatefulProcessor[K, V, U],
-      timeMode: TimeMode,
-      outputMode: OutputMode,
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    transformWithState(statefulProcessor, timeMode, outputMode)(outputEncoder)
-  }
-
-  /**
-   * (Java-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * We allow the user to act on per-group set of input rows along with keyed state and the
-   * user can choose to output/return 0 or more rows.
-   *
-   * For a streaming dataframe, we will repeatedly invoke the interface methods for new rows
-   * in each trigger and the user's state/state variables will be stored persistently across
-   * invocations.
-   *
-   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
-   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor Instance of statefulProcessor whose functions will be invoked by the
-   *                          operator.
-   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
-   *                            transformWithState will use the new eventTimeColumn. The user
-   *                            needs to ensure that the eventTime for emitted output adheres to
-   *                            the watermark boundary, otherwise streaming query will fail.
-   * @param outputMode        The output mode of the stateful processor.
-   * @param outputEncoder     Encoder for the output type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  private[sql] def transformWithState[U: Encoder](
-      statefulProcessor: StatefulProcessor[K, V, U],
-      eventTimeColumnName: String,
-      outputMode: OutputMode,
-      outputEncoder: Encoder[U]): Dataset[U] = {
-    transformWithState(statefulProcessor, eventTimeColumnName, outputMode)(outputEncoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * Functions as the function above, but with additional initial state.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor Instance of statefulProcessor whose functions will
-   *                          be invoked by the operator.
-   * @param timeMode          The time mode semantics of the stateful processor for timers and TTL.
-   * @param outputMode        The output mode of the stateful processor.
-   * @param initialState      User provided initial state that will be used to initiate state for
-   *                          the query in the first batch.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
+  /** @inheritdoc */
   private[sql] def transformWithState[U: Encoder, S: Encoder](
       statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
       timeMode: TimeMode,
@@ -811,29 +256,7 @@ class KeyValueGroupedDataset[K, V] private[sql](
     )
   }
 
-  /**
-   * (Scala-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * Functions as the function above, but with additional eventTimeColumnName for output.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
-   *
-   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
-   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
-   *
-   * @param statefulProcessor   Instance of statefulProcessor whose functions will
-   *                            be invoked by the operator.
-   * @param eventTimeColumnName eventTime column in the output dataset. Any operations after
-   *                            transformWithState will use the new eventTimeColumn. The user
-   *                            needs to ensure that the eventTime for emitted output adheres to
-   *                            the watermark boundary, otherwise streaming query will fail.
-   * @param outputMode          The output mode of the stateful processor.
-   * @param initialState        User provided initial state that will be used to initiate state for
-   *                            the query in the first batch.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
+  /** @inheritdoc */
   private[sql] def transformWithState[U: Encoder, S: Encoder](
       statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
       eventTimeColumnName: String,
@@ -854,71 +277,6 @@ class KeyValueGroupedDataset[K, V] private[sql](
     updateEventTimeColumnAfterTransformWithState(transformWithState, eventTimeColumnName)
   }
 
-  /**
-   * (Java-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * Functions as the function above, but with additional initialStateEncoder for state encoding.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor   Instance of statefulProcessor whose functions will
-   *                            be invoked by the operator.
-   * @param timeMode            The time mode semantics of the stateful processor for
-   *                            timers and TTL.
-   * @param outputMode          The output mode of the stateful processor.
-   * @param initialState        User provided initial state that will be used to initiate state for
-   *                            the query in the first batch.
-   * @param outputEncoder       Encoder for the output type.
-   * @param initialStateEncoder Encoder for the initial state type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  private[sql] def transformWithState[U: Encoder, S: Encoder](
-      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
-      timeMode: TimeMode,
-      outputMode: OutputMode,
-      initialState: KeyValueGroupedDataset[K, S],
-      outputEncoder: Encoder[U],
-      initialStateEncoder: Encoder[S]): Dataset[U] = {
-    transformWithState(statefulProcessor, timeMode,
-      outputMode, initialState)(outputEncoder, initialStateEncoder)
-  }
-
-  /**
-   * (Java-specific)
-   * Invokes methods defined in the stateful processor used in arbitrary state API v2.
-   * Functions as the function above, but with additional eventTimeColumnName for output.
-   *
-   * Downstream operators would use specified eventTimeColumnName to calculate watermark.
-   * Note that TimeMode is set to EventTime to ensure correct flow of watermark.
-   *
-   * @tparam U The type of the output objects. Must be encodable to Spark SQL types.
-   * @tparam S The type of initial state objects. Must be encodable to Spark SQL types.
-   * @param statefulProcessor Instance of statefulProcessor whose functions will
-   *                          be invoked by the operator.
-   * @param outputMode        The output mode of the stateful processor.
-   * @param initialState      User provided initial state that will be used to initiate state for
-   *                          the query in the first batch.
-   * @param eventTimeColumnName event column in the output dataset. Any operations after
-   *                            transformWithState will use the new eventTimeColumn. The user
-   *                            needs to ensure that the eventTime for emitted output adheres to
-   *                            the watermark boundary, otherwise streaming query will fail.
-   * @param outputEncoder     Encoder for the output type.
-   * @param initialStateEncoder Encoder for the initial state type.
-   *
-   * See [[Encoder]] for more details on what types are encodable to Spark SQL.
-   */
-  private[sql] def transformWithState[U: Encoder, S: Encoder](
-      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
-      outputMode: OutputMode,
-      initialState: KeyValueGroupedDataset[K, S],
-      eventTimeColumnName: String,
-      outputEncoder: Encoder[U],
-      initialStateEncoder: Encoder[S]): Dataset[U] = {
-    transformWithState(statefulProcessor, eventTimeColumnName,
-      outputMode, initialState)(outputEncoder, initialStateEncoder)
-  }
-
   /**
    * Creates a new dataset with updated eventTimeColumn after the transformWithState
    * logical node.
@@ -931,133 +289,245 @@ class KeyValueGroupedDataset[K, V] private[sql](
       transformWithState
     )
 
-    Dataset[U](sparkSession, EliminateEventTimeWatermark(
+    Dataset[U](sparkSession,
       UpdateEventTimeWatermarkColumn(
         UnresolvedAttribute(eventTimeColumnName),
         None,
-        transformWithStateDataset.logicalPlan)))
+        transformWithStateDataset.logicalPlan))
   }
 
-  /**
-   * (Scala-specific)
-   * Reduces the elements of each group of data using the specified binary function.
-   * The given function must be commutative and associative or the result may be non-deterministic.
-   *
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def reduceGroups(f: (V, V) => V): Dataset[(K, V)] = {
-    val vEncoder = encoderFor[V]
     val aggregator: TypedColumn[V, V] = new ReduceAggregator[V](f)(vEncoder).toColumn
     agg(aggregator)
   }
 
-  /**
-   * (Java-specific)
-   * Reduces the elements of each group of data using the specified binary function.
-   * The given function must be commutative and associative or the result may be non-deterministic.
-   *
-   * @since 1.6.0
-   */
-  def reduceGroups(f: ReduceFunction[V]): Dataset[(K, V)] = {
-    reduceGroups(f.call _)
-  }
-
-  /**
-   * Internal helper function for building typed aggregations that return tuples.  For simplicity
-   * and code reuse, we do this without the help of the type system and then use helper functions
-   * that cast appropriately for the user facing interface.
-   */
+  /** @inheritdoc */
   protected def aggUntyped(columns: TypedColumn[_, _]*): Dataset[_] = {
-    val encoders = columns.map(_.encoder)
-    val namedColumns =
-      columns.map(_.withInputType(vExprEnc, dataAttributes).named)
-    val keyColumn = TypedAggUtils.aggKeyColumn(kExprEnc, groupingAttributes)
+    val keyAgEncoder = agnosticEncoderFor(kEncoder)
+    val valueExprEncoder = encoderFor(vEncoder)
+    val encoders = columns.map(c => agnosticEncoderFor(c.encoder))
+    val namedColumns = columns.map { c =>
+      withInputType(c.named, valueExprEncoder, dataAttributes)
+    }
+    val keyColumn = aggKeyColumn(keyAgEncoder, groupingAttributes)
     val aggregate = Aggregate(groupingAttributes, keyColumn +: namedColumns, logicalPlan)
-    val execution = new QueryExecution(sparkSession, aggregate)
+    new Dataset(sparkSession, aggregate, ProductEncoder.tuple(keyAgEncoder +: encoders))
+  }
+
+  /** @inheritdoc */
+  def cogroupSorted[U, R : Encoder](
+      other: KeyValueGroupedDataset[K, U])(
+      thisSortExprs: Column*)(
+      otherSortExprs: Column*)(
+      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
+    implicit val uEncoder = other.vEncoderImpl
+    Dataset[R](
+      sparkSession,
+      CoGroup(
+        f,
+        this.groupingAttributes,
+        other.groupingAttributes,
+        this.dataAttributes,
+        other.dataAttributes,
+        MapGroups.sortOrder(thisSortExprs.map(_.expr)),
+        MapGroups.sortOrder(otherSortExprs.map(_.expr)),
+        this.logicalPlan,
+        other.logicalPlan))
+  }
 
-    new Dataset(execution, ExpressionEncoder.tuple(kExprEnc +: encoders))
+  override def toString: String = {
+    val builder = new StringBuilder
+    val kFields = kEncoder.schema.map { f =>
+      s"${f.name}: ${f.dataType.simpleString(2)}"
+    }
+    val vFields = vEncoder.schema.map { f =>
+      s"${f.name}: ${f.dataType.simpleString(2)}"
+    }
+    builder.append("KeyValueGroupedDataset: [key: [")
+    builder.append(kFields.take(2).mkString(", "))
+    if (kFields.length > 2) {
+      builder.append(" ... " + (kFields.length - 2) + " more field(s)")
+    }
+    builder.append("], value: [")
+    builder.append(vFields.take(2).mkString(", "))
+    if (vFields.length > 2) {
+      builder.append(" ... " + (vFields.length - 2) + " more field(s)")
+    }
+    builder.append("]]").toString()
   }
 
-  /**
-   * Computes the given aggregation, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing this aggregation over all elements in the group.
-   *
-   * @since 1.6.0
-   */
-  def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)] =
-    aggUntyped(col1).asInstanceOf[Dataset[(K, U1)]]
+  ////////////////////////////////////////////////////////////////////////////
+  // Return type overrides to make sure we return the implementation instead
+  // of the interface.
+  ////////////////////////////////////////////////////////////////////////////
+  /** @inheritdoc */
+  override def mapValues[W](
+      func: MapFunction[V, W],
+      encoder: Encoder[W]): KeyValueGroupedDataset[K, W] = super.mapValues(func, encoder)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 1.6.0
-   */
-  def agg[U1, U2](col1: TypedColumn[V, U1], col2: TypedColumn[V, U2]): Dataset[(K, U1, U2)] =
-    aggUntyped(col1, col2).asInstanceOf[Dataset[(K, U1, U2)]]
+  /** @inheritdoc */
+  override def flatMapGroups[U: Encoder](f: (K, Iterator[V]) => IterableOnce[U]): Dataset[U] =
+    super.flatMapGroups(f)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 1.6.0
-   */
-  def agg[U1, U2, U3](
+  /** @inheritdoc */
+  override def flatMapGroups[U](
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = super.flatMapGroups(f, encoder)
+
+  /** @inheritdoc */
+  override def flatMapSortedGroups[U](
+      SortExprs: Array[Column],
+      f: FlatMapGroupsFunction[K, V, U],
+      encoder: Encoder[U]): Dataset[U] = super.flatMapSortedGroups(SortExprs, f, encoder)
+
+  /** @inheritdoc */
+  override def mapGroups[U: Encoder](f: (K, Iterator[V]) => U): Dataset[U] = super.mapGroups(f)
+
+  /** @inheritdoc */
+  override def mapGroups[U](f: MapGroupsFunction[K, V, U], encoder: Encoder[U]): Dataset[U] =
+    super.mapGroups(f, encoder)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U]): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder, timeoutConf)
+
+  /** @inheritdoc */
+  override def mapGroupsWithState[S, U](
+      func: MapGroupsWithStateFunction[K, V, S, U],
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] =
+    super.mapGroupsWithState(func, stateEncoder, outputEncoder, timeoutConf, initialState)
+
+  /** @inheritdoc */
+  override def flatMapGroupsWithState[S, U](
+      func: FlatMapGroupsWithStateFunction[K, V, S, U],
+      outputMode: OutputMode,
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout): Dataset[U] =
+    super.flatMapGroupsWithState(func, outputMode, stateEncoder, outputEncoder, timeoutConf)
+
+  /** @inheritdoc */
+  override def flatMapGroupsWithState[S, U](
+      func: FlatMapGroupsWithStateFunction[K, V, S, U],
+      outputMode: OutputMode,
+      stateEncoder: Encoder[S],
+      outputEncoder: Encoder[U],
+      timeoutConf: GroupStateTimeout,
+      initialState: KeyValueGroupedDataset[K, S]): Dataset[U] =
+    super.flatMapGroupsWithState(
+      func,
+      outputMode,
+      stateEncoder,
+      outputEncoder,
+      timeoutConf,
+      initialState)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]) =
+    super.transformWithState(statefulProcessor, timeMode, outputMode, outputEncoder)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder](
+      statefulProcessor: StatefulProcessor[K, V, U],
+      eventTimeColumnName: String,
+      outputMode: OutputMode,
+      outputEncoder: Encoder[U]) =
+    super.transformWithState(statefulProcessor, eventTimeColumnName, outputMode, outputEncoder)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      timeMode: TimeMode,
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S],
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]) = super.transformWithState(
+    statefulProcessor,
+    timeMode,
+    outputMode,
+    initialState,
+    outputEncoder,
+    initialStateEncoder)
+
+  /** @inheritdoc */
+  override private[sql] def transformWithState[U: Encoder, S: Encoder](
+      statefulProcessor: StatefulProcessorWithInitialState[K, V, U, S],
+      outputMode: OutputMode,
+      initialState: KeyValueGroupedDataset[K, S],
+      eventTimeColumnName: String,
+      outputEncoder: Encoder[U],
+      initialStateEncoder: Encoder[S]) = super.transformWithState(
+    statefulProcessor,
+    outputMode,
+    initialState,
+    eventTimeColumnName,
+    outputEncoder,
+    initialStateEncoder)
+
+  /** @inheritdoc */
+  override def reduceGroups(f: ReduceFunction[V]): Dataset[(K, V)] = super.reduceGroups(f)
+
+  /** @inheritdoc */
+  override def agg[U1](col1: TypedColumn[V, U1]): Dataset[(K, U1)] = super.agg(col1)
+
+  /** @inheritdoc */
+  override def agg[U1, U2](
+      col1: TypedColumn[V, U1],
+      col2: TypedColumn[V, U2]): Dataset[(K, U1, U2)] = super.agg(col1, col2)
+
+  /** @inheritdoc */
+  override def agg[U1, U2, U3](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
-      col3: TypedColumn[V, U3]): Dataset[(K, U1, U2, U3)] =
-    aggUntyped(col1, col2, col3).asInstanceOf[Dataset[(K, U1, U2, U3)]]
+      col3: TypedColumn[V, U3]): Dataset[(K, U1, U2, U3)] = super.agg(col1, col2, col3)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 1.6.0
-   */
-  def agg[U1, U2, U3, U4](
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
       col3: TypedColumn[V, U3],
-      col4: TypedColumn[V, U4]): Dataset[(K, U1, U2, U3, U4)] =
-    aggUntyped(col1, col2, col3, col4).asInstanceOf[Dataset[(K, U1, U2, U3, U4)]]
+      col4: TypedColumn[V, U4]): Dataset[(K, U1, U2, U3, U4)] = super.agg(col1, col2, col3, col4)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.0.0
-   */
-  def agg[U1, U2, U3, U4, U5](
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
       col3: TypedColumn[V, U3],
       col4: TypedColumn[V, U4],
       col5: TypedColumn[V, U5]): Dataset[(K, U1, U2, U3, U4, U5)] =
-    aggUntyped(col1, col2, col3, col4, col5).asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5)]]
+    super.agg(col1, col2, col3, col4, col5)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.0.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6](
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
       col3: TypedColumn[V, U3],
       col4: TypedColumn[V, U4],
       col5: TypedColumn[V, U5],
       col6: TypedColumn[V, U6]): Dataset[(K, U1, U2, U3, U4, U5, U6)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6)]]
+    super.agg(col1, col2, col3, col4, col5, col6)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.0.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6, U7](
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6, U7](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
       col3: TypedColumn[V, U3],
@@ -1065,16 +535,10 @@ class KeyValueGroupedDataset[K, V] private[sql](
       col5: TypedColumn[V, U5],
       col6: TypedColumn[V, U6],
       col7: TypedColumn[V, U7]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6, col7)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7)]]
+    super.agg(col1, col2, col3, col4, col5, col6, col7)
 
-  /**
-   * Computes the given aggregations, returning a [[Dataset]] of tuples for each unique key
-   * and the result of computing these aggregations over all elements in the group.
-   *
-   * @since 3.0.0
-   */
-  def agg[U1, U2, U3, U4, U5, U6, U7, U8](
+  /** @inheritdoc */
+  override def agg[U1, U2, U3, U4, U5, U6, U7, U8](
       col1: TypedColumn[V, U1],
       col2: TypedColumn[V, U2],
       col3: TypedColumn[V, U3],
@@ -1083,146 +547,30 @@ class KeyValueGroupedDataset[K, V] private[sql](
       col6: TypedColumn[V, U6],
       col7: TypedColumn[V, U7],
       col8: TypedColumn[V, U8]): Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)] =
-    aggUntyped(col1, col2, col3, col4, col5, col6, col7, col8)
-      .asInstanceOf[Dataset[(K, U1, U2, U3, U4, U5, U6, U7, U8)]]
+    super.agg(col1, col2, col3, col4, col5, col6, col7, col8)
 
-  /**
-   * Returns a [[Dataset]] that contains a tuple with each key and the number of items present
-   * for that key.
-   *
-   * @since 1.6.0
-   */
-  def count(): Dataset[(K, Long)] = agg(functions.count("*").as(ExpressionEncoder[Long]()))
+  /** @inheritdoc */
+  override def count(): Dataset[(K, Long)] = super.count()
 
-  /**
-   * (Scala-specific)
-   * Applies the given function to each cogrouped data.  For each unique group, the function will
-   * be passed the grouping key and 2 iterators containing all elements in the group from
-   * [[Dataset]] `this` and `other`.  The function can return an iterator containing elements of an
-   * arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * @since 1.6.0
-   */
-  def cogroup[U, R : Encoder](
+  /** @inheritdoc */
+  override def cogroup[U, R: Encoder](
       other: KeyValueGroupedDataset[K, U])(
-      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
-    implicit val uEncoder = other.vExprEnc
-    Dataset[R](
-      sparkSession,
-      CoGroup(
-        f,
-        this.groupingAttributes,
-        other.groupingAttributes,
-        this.dataAttributes,
-        other.dataAttributes,
-        Seq.empty,
-        Seq.empty,
-        this.logicalPlan,
-        other.logicalPlan))
-  }
+      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] =
+    super.cogroup(other)(f)
 
-  /**
-   * (Java-specific)
-   * Applies the given function to each cogrouped data.  For each unique group, the function will
-   * be passed the grouping key and 2 iterators containing all elements in the group from
-   * [[Dataset]] `this` and `other`.  The function can return an iterator containing elements of an
-   * arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * @since 1.6.0
-   */
-  def cogroup[U, R](
+  /** @inheritdoc */
+  override def cogroup[U, R](
       other: KeyValueGroupedDataset[K, U],
       f: CoGroupFunction[K, V, U, R],
-      encoder: Encoder[R]): Dataset[R] = {
-    cogroup(other)((key, left, right) => f.call(key, left.asJava, right.asJava).asScala)(encoder)
-  }
-
-  /**
-   * (Scala-specific)
-   * Applies the given function to each sorted cogrouped data.  For each unique group, the function
-   * will be passed the grouping key and 2 sorted iterators containing all elements in the group
-   * from [[Dataset]] `this` and `other`.  The function can return an iterator containing elements
-   * of an arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators
-   * to be sorted according to the given sort expressions. That sorting does not add
-   * computational complexity.
-   *
-   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#cogroup]]
-   * @since 3.4.0
-   */
-  def cogroupSorted[U, R : Encoder](
-      other: KeyValueGroupedDataset[K, U])(
-      thisSortExprs: Column*)(
-      otherSortExprs: Column*)(
-      f: (K, Iterator[V], Iterator[U]) => IterableOnce[R]): Dataset[R] = {
-    def toSortOrder(col: Column): SortOrder = col.expr match {
-      case expr: SortOrder => expr
-      case expr: Expression => SortOrder(expr, Ascending)
-    }
+      encoder: Encoder[R]): Dataset[R] =
+    super.cogroup(other, f, encoder)
 
-    val thisSortOrder: Seq[SortOrder] = thisSortExprs.map(toSortOrder)
-    val otherSortOrder: Seq[SortOrder] = otherSortExprs.map(toSortOrder)
-
-    implicit val uEncoder = other.vExprEnc
-    Dataset[R](
-      sparkSession,
-      CoGroup(
-        f,
-        this.groupingAttributes,
-        other.groupingAttributes,
-        this.dataAttributes,
-        other.dataAttributes,
-        thisSortOrder,
-        otherSortOrder,
-        this.logicalPlan,
-        other.logicalPlan))
-  }
-
-  /**
-   * (Java-specific)
-   * Applies the given function to each sorted cogrouped data.  For each unique group, the function
-   * will be passed the grouping key and 2 sorted iterators containing all elements in the group
-   * from [[Dataset]] `this` and `other`.  The function can return an iterator containing elements
-   * of an arbitrary type which will be returned as a new [[Dataset]].
-   *
-   * This is equivalent to [[KeyValueGroupedDataset#cogroup]], except for the iterators
-   * to be sorted according to the given sort expressions. That sorting does not add
-   * computational complexity.
-   *
-   * @see [[org.apache.spark.sql.KeyValueGroupedDataset#cogroup]]
-   * @since 3.4.0
-   */
-  def cogroupSorted[U, R](
+  /** @inheritdoc */
+  override def cogroupSorted[U, R](
       other: KeyValueGroupedDataset[K, U],
       thisSortExprs: Array[Column],
       otherSortExprs: Array[Column],
       f: CoGroupFunction[K, V, U, R],
-      encoder: Encoder[R]): Dataset[R] = {
-    import org.apache.spark.util.ArrayImplicits._
-    cogroupSorted(other)(
-      thisSortExprs.toImmutableArraySeq: _*)(otherSortExprs.toImmutableArraySeq: _*)(
-      (key, left, right) => f.call(key, left.asJava, right.asJava).asScala)(encoder)
-  }
-
-  override def toString: String = {
-    val builder = new StringBuilder
-    val kFields = kExprEnc.schema.map {
-      case f => s"${f.name}: ${f.dataType.simpleString(2)}"
-    }
-    val vFields = vExprEnc.schema.map {
-      case f => s"${f.name}: ${f.dataType.simpleString(2)}"
-    }
-    builder.append("KeyValueGroupedDataset: [key: [")
-    builder.append(kFields.take(2).mkString(", "))
-    if (kFields.length > 2) {
-      builder.append(" ... " + (kFields.length - 2) + " more field(s)")
-    }
-    builder.append("], value: [")
-    builder.append(vFields.take(2).mkString(", "))
-    if (vFields.length > 2) {
-      builder.append(" ... " + (vFields.length - 2) + " more field(s)")
-    }
-    builder.append("]]").toString()
-  }
+      encoder: Encoder[R]): Dataset[R] =
+    super.cogroupSorted(other, thisSortExprs, otherSortExprs, f, encoder)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
deleted file mode 100644
index b7f9c96f82e04..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/MergeIntoWriter.scala
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import org.apache.spark.SparkRuntimeException
-import org.apache.spark.annotation.Experimental
-import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
-import org.apache.spark.sql.catalyst.expressions.Expression
-import org.apache.spark.sql.catalyst.plans.logical.{Assignment, DeleteAction, InsertAction, InsertStarAction, MergeAction, MergeIntoTable, UpdateAction, UpdateStarAction}
-import org.apache.spark.sql.functions.expr
-
-/**
- * `MergeIntoWriter` provides methods to define and execute merge actions based
- * on specified conditions.
- *
- * @tparam T the type of data in the Dataset.
- * @param table the name of the target table for the merge operation.
- * @param ds the source Dataset to merge into the target table.
- * @param on the merge condition.
- * @param schemaEvolutionEnabled whether to enable automatic schema evolution for this merge
- *                               operation. Default is `false`.
- *
- * @since 4.0.0
- */
-@Experimental
-class MergeIntoWriter[T] private[sql] (
-    table: String,
-    ds: Dataset[T],
-    on: Column,
-    private[sql] val schemaEvolutionEnabled: Boolean = false) {
-
-  private val df: DataFrame = ds.toDF()
-
-  private val sparkSession = ds.sparkSession
-
-  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
-
-  private val logicalPlan = df.queryExecution.logical
-
-  private[sql] var matchedActions: Seq[MergeAction] = Seq.empty[MergeAction]
-  private[sql] var notMatchedActions: Seq[MergeAction] = Seq.empty[MergeAction]
-  private[sql] var notMatchedBySourceActions: Seq[MergeAction] = Seq.empty[MergeAction]
-
-  /**
-   * Initialize a `WhenMatched` action without any condition.
-   *
-   * This `WhenMatched` action will be executed when a source row matches a target table row based
-   * on the merge condition.
-   *
-   * This `WhenMatched` can be followed by one of the following merge actions:
-   *   - `updateAll`: Update all the matched target table rows with source dataset rows.
-   *   - `update(Map)`: Update all the matched target table rows while changing only
-   *     a subset of columns based on the provided assignment.
-   *   - `delete`: Delete all target rows that have a match in the source table.
-   *
-   * @return a new `WhenMatched` object.
-   */
-  def whenMatched(): WhenMatched[T] = {
-    new WhenMatched[T](this, None)
-  }
-
-  /**
-   * Initialize a `WhenMatched` action with a condition.
-   *
-   * This `WhenMatched` action will be executed when a source row matches a target table row based
-   * on the merge condition and the specified `condition` is satisfied.
-   *
-   * This `WhenMatched` can be followed by one of the following merge actions:
-   *   - `updateAll`: Update all the matched target table rows with source dataset rows.
-   *   - `update(Map)`: Update all the matched target table rows while changing only
-   *     a subset of columns based on the provided assignment.
-   *   - `delete`: Delete all target rows that have a match in the source table.
-   *
-   * @param condition a `Column` representing the condition to be evaluated for the action.
-   * @return a new `WhenMatched` object configured with the specified condition.
-   */
-  def whenMatched(condition: Column): WhenMatched[T] = {
-    new WhenMatched[T](this, Some(condition.expr))
-  }
-
-  /**
-   * Initialize a `WhenNotMatched` action without any condition.
-   *
-   * This `WhenNotMatched` action will be executed when a source row does not match any target row
-   * based on the merge condition.
-   *
-   * This `WhenNotMatched` can be followed by one of the following merge actions:
-   *   - `insertAll`: Insert all rows from the source that are not already in the target table.
-   *   - `insert(Map)`: Insert all rows from the source that are not already in the target table,
-   *      with the specified columns based on the provided assignment.
-   *
-   * @return a new `WhenNotMatched` object.
-   */
-  def whenNotMatched(): WhenNotMatched[T] = {
-    new WhenNotMatched[T](this, None)
-  }
-
-  /**
-   * Initialize a `WhenNotMatched` action with a condition.
-   *
-   * This `WhenNotMatched` action will be executed when a source row does not match any target row
-   * based on the merge condition and the specified `condition` is satisfied.
-   *
-   * This `WhenNotMatched` can be followed by one of the following merge actions:
-   *   - `insertAll`: Insert all rows from the source that are not already in the target table.
-   *   - `insert(Map)`: Insert all rows from the source that are not already in the target table,
-   *     with the specified columns based on the provided assignment.
-   *
-   * @param condition a `Column` representing the condition to be evaluated for the action.
-   * @return a new `WhenNotMatched` object configured with the specified condition.
-   */
-  def whenNotMatched(condition: Column): WhenNotMatched[T] = {
-    new WhenNotMatched[T](this, Some(condition.expr))
-  }
-
-  /**
-   * Initialize a `WhenNotMatchedBySource` action without any condition.
-   *
-   * This `WhenNotMatchedBySource` action will be executed when a target row does not match any
-   * rows in the source table based on the merge condition.
-   *
-   * This `WhenNotMatchedBySource` can be followed by one of the following merge actions:
-   *   - `updateAll`: Update all the not matched target table rows with source dataset rows.
-   *   - `update(Map)`: Update all the not matched target table rows while changing only
-   *     the specified columns based on the provided assignment.
-   *   - `delete`: Delete all target rows that have no matches in the source table.
-   *
-   * @return a new `WhenNotMatchedBySource` object.
-   */
-  def whenNotMatchedBySource(): WhenNotMatchedBySource[T] = {
-    new WhenNotMatchedBySource[T](this, None)
-  }
-
-  /**
-   * Initialize a `WhenNotMatchedBySource` action with a condition.
-   *
-   * This `WhenNotMatchedBySource` action will be executed when a target row does not match any
-   * rows in the source table based on the merge condition and the specified `condition`
-   * is satisfied.
-   *
-   * This `WhenNotMatchedBySource` can be followed by one of the following merge actions:
-   *   - `updateAll`: Update all the not matched target table rows with source dataset rows.
-   *   - `update(Map)`: Update all the not matched target table rows while changing only
-   *     the specified columns based on the provided assignment.
-   *   - `delete`: Delete all target rows that have no matches in the source table.
-   *
-   * @param condition a `Column` representing the condition to be evaluated for the action.
-   * @return a new `WhenNotMatchedBySource` object configured with the specified condition.
-   */
-  def whenNotMatchedBySource(condition: Column): WhenNotMatchedBySource[T] = {
-    new WhenNotMatchedBySource[T](this, Some(condition.expr))
-  }
-
-  /**
-   * Enable automatic schema evolution for this merge operation.
-   * @return A `MergeIntoWriter` instance with schema evolution enabled.
-   */
-  def withSchemaEvolution(): MergeIntoWriter[T] = {
-    new MergeIntoWriter[T](this.table, this.ds, this.on, schemaEvolutionEnabled = true)
-      .withNewMatchedActions(this.matchedActions: _*)
-      .withNewNotMatchedActions(this.notMatchedActions: _*)
-      .withNewNotMatchedBySourceActions(this.notMatchedBySourceActions: _*)
-  }
-
-  /**
-   * Executes the merge operation.
-   */
-  def merge(): Unit = {
-    if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
-      throw new SparkRuntimeException(
-        errorClass = "NO_MERGE_ACTION_SPECIFIED",
-        messageParameters = Map.empty)
-    }
-
-    val merge = MergeIntoTable(
-      UnresolvedRelation(tableName),
-      logicalPlan,
-      on.expr,
-      matchedActions,
-      notMatchedActions,
-      notMatchedBySourceActions,
-      schemaEvolutionEnabled)
-    val qe = sparkSession.sessionState.executePlan(merge)
-    qe.assertCommandExecuted()
-  }
-
-  private[sql] def withNewMatchedActions(actions: MergeAction*): MergeIntoWriter[T] = {
-    this.matchedActions ++= actions
-    this
-  }
-
-  private[sql] def withNewNotMatchedActions(actions: MergeAction*): MergeIntoWriter[T] = {
-    this.notMatchedActions ++= actions
-    this
-  }
-
-  private[sql] def withNewNotMatchedBySourceActions(actions: MergeAction*): MergeIntoWriter[T] = {
-    this.notMatchedBySourceActions ++= actions
-    this
-  }
-}
-
-/**
- * A class for defining actions to be taken when matching rows in a DataFrame during
- * a merge operation.
- *
- * @param mergeIntoWriter   The MergeIntoWriter instance responsible for writing data to a
- *                          target DataFrame.
- * @param condition         An optional condition Expression that specifies when the actions
- *                          should be applied.
- *                          If the condition is None, the actions will be applied to all matched
- *                          rows.
- *
- * @tparam T                The type of data in the MergeIntoWriter.
- */
-case class WhenMatched[T] private[sql](
-    mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
-  /**
-   * Specifies an action to update all matched rows in the DataFrame.
-   *
-   * @return The MergeIntoWriter instance with the update all action configured.
-   */
-  def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(UpdateStarAction(condition))
-  }
-
-  /**
-   * Specifies an action to update matched rows in the DataFrame with the provided column
-   * assignments.
-   *
-   * @param map A Map of column names to Column expressions representing the updates to be applied.
-   * @return The MergeIntoWriter instance with the update action configured.
-   */
-  def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(
-      UpdateAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
-  }
-
-  /**
-   * Specifies an action to delete matched rows from the DataFrame.
-   *
-   * @return The MergeIntoWriter instance with the delete action configured.
-   */
-  def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewMatchedActions(DeleteAction(condition))
-  }
-}
-
-/**
- * A class for defining actions to be taken when no matching rows are found in a DataFrame
- * during a merge operation.
- *
- * @param MergeIntoWriter   The MergeIntoWriter instance responsible for writing data to a
- *                          target DataFrame.
- * @param condition         An optional condition Expression that specifies when the actions
- *                          defined in this configuration should be applied.
- *                          If the condition is None, the actions will be applied when there
- *                          are no matching rows.
- * @tparam T                The type of data in the MergeIntoWriter.
- */
-case class WhenNotMatched[T] private[sql](
-    mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
-
-  /**
-   * Specifies an action to insert all non-matched rows into the DataFrame.
-   *
-   * @return The MergeIntoWriter instance with the insert all action configured.
-   */
-  def insertAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedActions(InsertStarAction(condition))
-  }
-
-  /**
-   * Specifies an action to insert non-matched rows into the DataFrame with the provided
-   * column assignments.
-   *
-   * @param map A Map of column names to Column expressions representing the values to be inserted.
-   * @return The MergeIntoWriter instance with the insert action configured.
-   */
-  def insert(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedActions(
-      InsertAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
-  }
-}
-
-
-/**
- * A class for defining actions to be performed when there is no match by source
- * during a merge operation in a MergeIntoWriter.
- *
- * @param MergeIntoWriter the MergeIntoWriter instance to which the merge actions will be applied.
- * @param condition       an optional condition to be used with the merge actions.
- * @tparam T the type parameter for the MergeIntoWriter.
- */
-case class WhenNotMatchedBySource[T] private[sql](
-    mergeIntoWriter: MergeIntoWriter[T],
-    condition: Option[Expression]) {
-
-  /**
-   * Specifies an action to update all non-matched rows in the target DataFrame when
-   * not matched by the source.
-   *
-   * @return The MergeIntoWriter instance with the update all action configured.
-   */
-  def updateAll(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(UpdateStarAction(condition))
-  }
-
-  /**
-   * Specifies an action to update non-matched rows in the target DataFrame with the provided
-   * column assignments when not matched by the source.
-   *
-   * @param map A Map of column names to Column expressions representing the updates to be applied.
-   * @return The MergeIntoWriter instance with the update action configured.
-   */
-  def update(map: Map[String, Column]): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(
-      UpdateAction(condition, map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq))
-  }
-
-  /**
-   * Specifies an action to delete non-matched rows from the target DataFrame when not matched by
-   * the source.
-   *
-   * @return The MergeIntoWriter instance with the delete action configured.
-   */
-  def delete(): MergeIntoWriter[T] = {
-    mergeIntoWriter.withNewNotMatchedBySourceActions(DeleteAction(condition))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala b/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
deleted file mode 100644
index 30d5943c60922..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/Observation.scala
+++ /dev/null
@@ -1,143 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.util.UUID
-
-import org.apache.spark.sql.catalyst.plans.logical.CollectMetrics
-import org.apache.spark.sql.execution.QueryExecution
-import org.apache.spark.sql.util.QueryExecutionListener
-import org.apache.spark.util.ArrayImplicits._
-
-
-/**
- * Helper class to simplify usage of `Dataset.observe(String, Column, Column*)`:
- *
- * {{{
- *   // Observe row count (rows) and highest id (maxid) in the Dataset while writing it
- *   val observation = Observation("my metrics")
- *   val observed_ds = ds.observe(observation, count(lit(1)).as("rows"), max($"id").as("maxid"))
- *   observed_ds.write.parquet("ds.parquet")
- *   val metrics = observation.get
- * }}}
- *
- * This collects the metrics while the first action is executed on the observed dataset. Subsequent
- * actions do not modify the metrics returned by [[get]]. Retrieval of the metric via [[get]]
- * blocks until the first action has finished and metrics become available.
- *
- * This class does not support streaming datasets.
- *
- * @param name name of the metric
- * @since 3.3.0
- */
-class Observation(name: String) extends ObservationBase(name) {
-
-  /**
-   * Create an Observation instance without providing a name. This generates a random name.
-   */
-  def this() = this(UUID.randomUUID().toString)
-
-  private val listener: ObservationListener = ObservationListener(this)
-
-  @volatile private var dataframeId: Option[(SparkSession, Long)] = None
-
-  /**
-   * Attach this observation to the given [[Dataset]] to observe aggregation expressions.
-   *
-   * @param ds dataset
-   * @param expr first aggregation expression
-   * @param exprs more aggregation expressions
-   * @tparam T dataset type
-   * @return observed dataset
-   * @throws IllegalArgumentException If this is a streaming Dataset (ds.isStreaming == true)
-   */
-  private[spark] def on[T](ds: Dataset[T], expr: Column, exprs: Column*): Dataset[T] = {
-    if (ds.isStreaming) {
-      throw new IllegalArgumentException("Observation does not support streaming Datasets." +
-        "This is because there will be multiple observed metrics as microbatches are constructed" +
-        ". Please register a StreamingQueryListener and get the metric for each microbatch in " +
-        "QueryProgressEvent.progress, or use query.lastProgress or query.recentProgress.")
-    }
-    register(ds.sparkSession, ds.id)
-    ds.observe(name, expr, exprs: _*)
-  }
-
-  private[sql] def register(sparkSession: SparkSession, dataframeId: Long): Unit = {
-    // makes this class thread-safe:
-    // only the first thread entering this block can set sparkSession
-    // all other threads will see the exception, as it is only allowed to do this once
-    synchronized {
-      if (this.dataframeId.isDefined) {
-        throw new IllegalArgumentException("An Observation can be used with a Dataset only once")
-      }
-      this.dataframeId = Some((sparkSession, dataframeId))
-    }
-
-    sparkSession.listenerManager.register(this.listener)
-  }
-
-  private def unregister(): Unit = {
-    this.dataframeId.foreach(_._1.listenerManager.unregister(this.listener))
-  }
-
-  private[spark] def onFinish(qe: QueryExecution): Unit = {
-    synchronized {
-      if (this.metrics.isEmpty && qe.logical.exists {
-        case CollectMetrics(name, _, _, dataframeId) =>
-          name == this.name && dataframeId == this.dataframeId.get._2
-        case _ => false
-      }) {
-        val row = qe.observedMetrics.get(name)
-        val metrics = row.map(r => r.getValuesMap[Any](r.schema.fieldNames.toImmutableArraySeq))
-        if (setMetricsAndNotify(metrics)) {
-          unregister()
-        }
-      }
-    }
-  }
-
-}
-
-private[sql] case class ObservationListener(observation: Observation)
-  extends QueryExecutionListener {
-
-  override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit =
-    observation.onFinish(qe)
-
-  override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit =
-    observation.onFinish(qe)
-
-}
-
-/**
- * (Scala-specific) Create instances of Observation via Scala `apply`.
- * @since 3.3.0
- */
-object Observation {
-
-  /**
-   * Observation constructor for creating an anonymous observation.
-   */
-  def apply(): Observation = new Observation()
-
-  /**
-   * Observation constructor for creating a named observation.
-   */
-  def apply(name: String): Observation = new Observation(name)
-
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
index 0d66632a1c3da..6f0db42ec1f5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala
@@ -17,24 +17,22 @@
 
 package org.apache.spark.sql
 
-import java.util.Locale
-
-import scala.jdk.CollectionConverters._
-
 import org.apache.spark.SparkRuntimeException
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.broadcast.Broadcast
-import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedAlias, UnresolvedFunction}
-import org.apache.spark.sql.catalyst.encoders.encoderFor
+import org.apache.spark.sql.catalyst.analysis.{UnresolvedAlias, UnresolvedAttribute}
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.toPrettySQL
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.internal.ExpressionUtils.{column, generateAlias}
+import org.apache.spark.sql.internal.TypedAggUtils.withInputType
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.sql.types.{NumericType, StructType}
 import org.apache.spark.util.ArrayImplicits._
@@ -52,12 +50,19 @@ import org.apache.spark.util.ArrayImplicits._
  */
 @Stable
 class RelationalGroupedDataset protected[sql](
-    private[sql] val df: DataFrame,
+    protected[sql] val df: DataFrame,
     private[sql] val groupingExprs: Seq[Expression],
-    groupType: RelationalGroupedDataset.GroupType) {
+    groupType: RelationalGroupedDataset.GroupType)
+  extends api.RelationalGroupedDataset {
+
   import RelationalGroupedDataset._
+  import df.sparkSession._
+
+  override protected def toDF(aggCols: Seq[Column]): DataFrame = {
+    val aggExprs = aggCols.map(expression).map { e =>
+      withInputType(e, df.exprEnc, df.logicalPlan.output)
+    }
 
-  private[this] def toDF(aggExprs: Seq[Expression]): DataFrame = {
     @scala.annotation.nowarn("cat=deprecation")
     val aggregates = if (df.sparkSession.sessionState.conf.dataFrameRetainGroupColumns) {
       groupingExprs match {
@@ -95,9 +100,7 @@ class RelationalGroupedDataset protected[sql](
     }
   }
 
-  private[this] def aggregateNumericColumns(colNames: String*)(f: Expression => AggregateFunction)
-    : DataFrame = {
-
+  override protected def selectNumericColumns(colNames: Seq[String]): Seq[Column] = {
     val columnExprs = if (colNames.isEmpty) {
       // No columns specified. Use all numeric columns.
       df.numericColumns
@@ -111,341 +114,84 @@ class RelationalGroupedDataset protected[sql](
         namedExpr
       }
     }
-    toDF(columnExprs.map(expr => f(expr).toAggregateExpression()))
+    columnExprs.map(column)
   }
 
-  private[this] def strToExpr(expr: String): (Expression => Expression) = {
-    val exprToFunc: (Expression => Expression) = {
-      (inputExpr: Expression) => expr.toLowerCase(Locale.ROOT) match {
-        // We special handle a few cases that have alias that are not in function registry.
-        case "avg" | "average" | "mean" =>
-          UnresolvedFunction("avg", inputExpr :: Nil, isDistinct = false)
-        case "stddev" | "std" =>
-          UnresolvedFunction("stddev", inputExpr :: Nil, isDistinct = false)
-        // Also special handle count because we need to take care count(*).
-        case "count" | "size" =>
-          // Turn count(*) into count(1)
-          inputExpr match {
-            case s: Star => Count(Literal(1)).toAggregateExpression()
-            case _ => Count(inputExpr).toAggregateExpression()
-          }
-        case name => UnresolvedFunction(name, inputExpr :: Nil, isDistinct = false)
-      }
-    }
-    (inputExpr: Expression) => exprToFunc(inputExpr)
-  }
-
-  /**
-   * Returns a `KeyValueGroupedDataset` where the data is grouped by the grouping expressions
-   * of current `RelationalGroupedDataset`.
-   *
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def as[K: Encoder, T: Encoder]: KeyValueGroupedDataset[K, T] = {
-    val keyEncoder = encoderFor[K]
-    val valueEncoder = encoderFor[T]
-
     val (qe, groupingAttributes) =
       handleGroupingExpression(df.logicalPlan, df.sparkSession, groupingExprs)
 
     new KeyValueGroupedDataset(
-      keyEncoder,
-      valueEncoder,
+      implicitly[Encoder[K]],
+      implicitly[Encoder[T]],
       qe,
       df.logicalPlan.output,
       groupingAttributes)
   }
 
-  /**
-   * (Scala-specific) Compute aggregates by specifying the column names and
-   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   df.groupBy("department").agg(
-   *     "age" -> "max",
-   *     "expense" -> "sum"
-   *   )
-   * }}}
-   *
-   * @since 1.3.0
-   */
-  def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame = {
-    toDF((aggExpr +: aggExprs).map { case (colName, expr) =>
-      strToExpr(expr)(df(colName).expr)
-    })
-  }
+  /** @inheritdoc */
+  override def agg(aggExpr: (String, String), aggExprs: (String, String)*): DataFrame =
+    super.agg(aggExpr, aggExprs: _*)
 
-  /**
-   * (Scala-specific) Compute aggregates by specifying a map from column name to
-   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   df.groupBy("department").agg(Map(
-   *     "age" -> "max",
-   *     "expense" -> "sum"
-   *   ))
-   * }}}
-   *
-   * @since 1.3.0
-   */
-  def agg(exprs: Map[String, String]): DataFrame = {
-    toDF(exprs.map { case (colName, expr) =>
-      strToExpr(expr)(df(colName).expr)
-    }.toSeq)
-  }
+  /** @inheritdoc */
+  override def agg(exprs: Map[String, String]): DataFrame = super.agg(exprs)
 
-  /**
-   * (Java-specific) Compute aggregates by specifying a map from column name to
-   * aggregate methods. The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * The available aggregate methods are `avg`, `max`, `min`, `sum`, `count`.
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *   import com.google.common.collect.ImmutableMap;
-   *   df.groupBy("department").agg(ImmutableMap.of("age", "max", "expense", "sum"));
-   * }}}
-   *
-   * @since 1.3.0
-   */
-  def agg(exprs: java.util.Map[String, String]): DataFrame = {
-    agg(exprs.asScala.toMap)
-  }
+  /** @inheritdoc */
+  override def agg(exprs: java.util.Map[String, String]): DataFrame = super.agg(exprs)
 
-  /**
-   * Compute aggregates by specifying a series of aggregate columns. Note that this function by
-   * default retains the grouping columns in its output. To not retain grouping columns, set
-   * `spark.sql.retainGroupColumns` to false.
-   *
-   * The available aggregate methods are defined in [[org.apache.spark.sql.functions]].
-   *
-   * {{{
-   *   // Selects the age of the oldest employee and the aggregate expense for each department
-   *
-   *   // Scala:
-   *   import org.apache.spark.sql.functions._
-   *   df.groupBy("department").agg(max("age"), sum("expense"))
-   *
-   *   // Java:
-   *   import static org.apache.spark.sql.functions.*;
-   *   df.groupBy("department").agg(max("age"), sum("expense"));
-   * }}}
-   *
-   * Note that before Spark 1.4, the default behavior is to NOT retain grouping columns. To change
-   * to that behavior, set config variable `spark.sql.retainGroupColumns` to `false`.
-   * {{{
-   *   // Scala, 1.3.x:
-   *   df.groupBy("department").agg($"department", max("age"), sum("expense"))
-   *
-   *   // Java, 1.3.x:
-   *   df.groupBy("department").agg(col("department"), max("age"), sum("expense"));
-   * }}}
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def agg(expr: Column, exprs: Column*): DataFrame = {
-    toDF((expr +: exprs).map {
-      case typed: TypedColumn[_, _] =>
-        typed.withInputType(df.exprEnc, df.logicalPlan.output).expr
-      case c => c.expr
-    })
-  }
+  override def agg(expr: Column, exprs: Column*): DataFrame = super.agg(expr, exprs: _*)
 
-  /**
-   * Count the number of rows for each group.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   *
-   * @since 1.3.0
-   */
-  def count(): DataFrame = toDF(Seq(Alias(Count(Literal(1)).toAggregateExpression(), "count")()))
+  /** @inheritdoc */
+  override def count(): DataFrame = super.count()
 
-  /**
-   * Compute the average value for each numeric columns for each group. This is an alias for `avg`.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   * When specified columns are given, only compute the average values for them.
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def mean(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Average(_))
-  }
+  override def mean(colNames: String*): DataFrame = super.mean(colNames: _*)
 
-  /**
-   * Compute the max value for each numeric columns for each group.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   * When specified columns are given, only compute the max values for them.
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def max(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Max)
-  }
+  override def max(colNames: String*): DataFrame = super.max(colNames: _*)
 
-  /**
-   * Compute the mean value for each numeric columns for each group.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   * When specified columns are given, only compute the mean values for them.
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def avg(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Average(_))
-  }
+  override def avg(colNames: String*): DataFrame = super.avg(colNames: _*)
 
-  /**
-   * Compute the min value for each numeric column for each group.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   * When specified columns are given, only compute the min values for them.
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def min(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Min)
-  }
+  override def min(colNames: String*): DataFrame = super.min(colNames: _*)
 
-  /**
-   * Compute the sum for each numeric columns for each group.
-   * The resulting `DataFrame` will also contain the grouping columns.
-   * When specified columns are given, only compute the sum for them.
-   *
-   * @since 1.3.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def sum(colNames: String*): DataFrame = {
-    aggregateNumericColumns(colNames : _*)(Sum(_))
-  }
+  override def sum(colNames: String*): DataFrame = super.sum(colNames: _*)
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   *
-   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine
-   * the resulting schema of the transformation. To avoid any eager computations, provide an
-   * explicit list of values via `pivot(pivotColumn: String, values: Seq[Any])`.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course").sum("earnings")
-   * }}}
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn Name of the column to pivot.
-   * @since 1.6.0
-   */
-  def pivot(pivotColumn: String): RelationalGroupedDataset = pivot(Column(pivotColumn))
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String): RelationalGroupedDataset = super.pivot(pivotColumn)
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   * There are two versions of pivot function: one that requires the caller to specify the list
-   * of distinct values to pivot on, and one that does not. The latter is more concise but less
-   * efficient, because Spark needs to first compute the list of distinct values internally.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course", Seq("dotNET", "Java")).sum("earnings")
-   *
-   *   // Or without specifying column values (less efficient)
-   *   df.groupBy("year").pivot("course").sum("earnings")
-   * }}}
-   *
-   * From Spark 3.0.0, values can be literal columns, for instance, struct. For pivoting by
-   * multiple columns, use the `struct` function to combine the columns and values:
-   *
-   * {{{
-   *   df.groupBy("year")
-   *     .pivot("trainingCourse", Seq(struct(lit("java"), lit("Experts"))))
-   *     .agg(sum($"earnings"))
-   * }}}
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn Name of the column to pivot.
-   * @param values List of values that will be translated to columns in the output DataFrame.
-   * @since 1.6.0
-   */
-  def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset = {
-    pivot(Column(pivotColumn), values)
-  }
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String, values: Seq[Any]): RelationalGroupedDataset =
+    super.pivot(pivotColumn, values)
 
-  /**
-   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
-   * aggregation.
-   *
-   * There are two versions of pivot function: one that requires the caller to specify the list
-   * of distinct values to pivot on, and one that does not. The latter is more concise but less
-   * efficient, because Spark needs to first compute the list of distinct values internally.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy("year").pivot("course", Arrays.<Object>asList("dotNET", "Java")).sum("earnings");
-   *
-   *   // Or without specifying column values (less efficient)
-   *   df.groupBy("year").pivot("course").sum("earnings");
-   * }}}
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn Name of the column to pivot.
-   * @param values List of values that will be translated to columns in the output DataFrame.
-   * @since 1.6.0
-   */
-  def pivot(pivotColumn: String, values: java.util.List[Any]): RelationalGroupedDataset = {
-    pivot(Column(pivotColumn), values)
+  /** @inheritdoc */
+  override def pivot(pivotColumn: String, values: java.util.List[Any]): RelationalGroupedDataset =
+    super.pivot(pivotColumn, values)
+
+  /** @inheritdoc */
+  override def pivot(pivotColumn: Column, values: java.util.List[Any]): RelationalGroupedDataset = {
+    super.pivot(pivotColumn, values)
   }
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   *
-   * Spark will eagerly compute the distinct values in `pivotColumn` so it can determine
-   * the resulting schema of the transformation. To avoid any eager computations, provide an
-   * explicit list of values via `pivot(pivotColumn: Column, values: Seq[Any])`.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy($"year").pivot($"course").sum($"earnings");
-   * }}}
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn he column to pivot.
-   * @since 2.4.0
-   */
-  def pivot(pivotColumn: Column): RelationalGroupedDataset = {
+  /** @inheritdoc */
+  override def pivot(pivotColumn: Column): RelationalGroupedDataset =
     pivot(pivotColumn, collectPivotValues(df, pivotColumn))
-  }
 
-  /**
-   * Pivots a column of the current `DataFrame` and performs the specified aggregation.
-   * This is an overloaded version of the `pivot` method with `pivotColumn` of the `String` type.
-   *
-   * {{{
-   *   // Compute the sum of earnings for each year by course with each course as a separate column
-   *   df.groupBy($"year").pivot($"course", Seq("dotNET", "Java")).sum($"earnings")
-   * }}}
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn the column to pivot.
-   * @param values List of values that will be translated to columns in the output DataFrame.
-   * @since 2.4.0
-   */
+  /** @inheritdoc */
   def pivot(pivotColumn: Column, values: Seq[Any]): RelationalGroupedDataset = {
     groupType match {
       case RelationalGroupedDataset.GroupByType =>
-        val valueExprs = values.map(_ match {
+        val valueExprs = values.map {
           case c: Column => c.expr
           case v =>
             try {
@@ -454,7 +200,7 @@ class RelationalGroupedDataset protected[sql](
               case _: SparkRuntimeException =>
                 throw QueryExecutionErrors.pivotColumnUnsupportedError(v, pivotColumn.expr)
             }
-        })
+        }
         new RelationalGroupedDataset(
           df,
           groupingExprs,
@@ -468,22 +214,6 @@ class RelationalGroupedDataset protected[sql](
     }
   }
 
-  /**
-   * (Java-specific) Pivots a column of the current `DataFrame` and performs the specified
-   * aggregation. This is an overloaded version of the `pivot` method with `pivotColumn` of
-   * the `String` type.
-   *
-   * @see `org.apache.spark.sql.Dataset.unpivot` for the reverse operation,
-   *      except for the aggregation.
-   *
-   * @param pivotColumn the column to pivot.
-   * @param values List of values that will be translated to columns in the output DataFrame.
-   * @since 2.4.0
-   */
-  def pivot(pivotColumn: Column, values: java.util.List[Any]): RelationalGroupedDataset = {
-    pivot(pivotColumn, values.asScala.toSeq)
-  }
-
   /**
    * Applies the given serialized R function `func` to each group of data. For each unique group,
    * the function will be passed the group key and an iterator that contains all of the elements in
@@ -508,7 +238,7 @@ class RelationalGroupedDataset protected[sql](
       broadcastVars: Array[Broadcast[Object]],
       outputSchema: StructType): DataFrame = {
       val groupingNamedExpressions = groupingExprs.map(alias)
-      val groupingCols = groupingNamedExpressions.map(Column(_))
+      val groupingCols = groupingNamedExpressions.map(column)
       val groupingDataFrame = df.select(groupingCols : _*)
       val groupingAttributes = groupingNamedExpressions.map(_.toAttribute)
       Dataset.ofRows(
@@ -538,7 +268,8 @@ class RelationalGroupedDataset protected[sql](
    * This function uses Apache Arrow as serialization format between Java executors and Python
    * workers.
    */
-  private[sql] def flatMapGroupsInPandas(expr: PythonUDF): DataFrame = {
+  private[sql] def flatMapGroupsInPandas(column: Column): DataFrame = {
+    val expr = column.expr.asInstanceOf[PythonUDF]
     require(expr.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
       "Must pass a grouped map pandas udf")
     require(expr.dataType.isInstanceOf[StructType],
@@ -570,7 +301,8 @@ class RelationalGroupedDataset protected[sql](
    * This function uses Apache Arrow as serialization format between Java executors and Python
    * workers.
    */
-  private[sql] def flatMapGroupsInArrow(expr: PythonUDF): DataFrame = {
+  private[sql] def flatMapGroupsInArrow(column: Column): DataFrame = {
+    val expr = column.expr.asInstanceOf[PythonUDF]
     require(expr.evalType == PythonEvalType.SQL_GROUPED_MAP_ARROW_UDF,
       "Must pass a grouped map arrow udf")
     require(expr.dataType.isInstanceOf[StructType],
@@ -602,7 +334,8 @@ class RelationalGroupedDataset protected[sql](
    */
   private[sql] def flatMapCoGroupsInPandas(
       r: RelationalGroupedDataset,
-      expr: PythonUDF): DataFrame = {
+      column: Column): DataFrame = {
+    val expr = column.expr.asInstanceOf[PythonUDF]
     require(expr.evalType == PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
       "Must pass a cogrouped map pandas udf")
     require(this.groupingExprs.length == r.groupingExprs.length,
@@ -648,7 +381,8 @@ class RelationalGroupedDataset protected[sql](
    */
   private[sql] def flatMapCoGroupsInArrow(
       r: RelationalGroupedDataset,
-      expr: PythonUDF): DataFrame = {
+      column: Column): DataFrame = {
+    val expr = column.expr.asInstanceOf[PythonUDF]
     require(expr.evalType == PythonEvalType.SQL_COGROUPED_MAP_ARROW_UDF,
       "Must pass a cogrouped map arrow udf")
     require(this.groupingExprs.length == r.groupingExprs.length,
@@ -697,7 +431,7 @@ class RelationalGroupedDataset protected[sql](
    * workers.
    */
   private[sql] def applyInPandasWithState(
-      func: PythonUDF,
+      func: Column,
       outputStructType: StructType,
       stateStructType: StructType,
       outputModeStr: String,
@@ -715,7 +449,7 @@ class RelationalGroupedDataset protected[sql](
     val groupingAttrs = groupingNamedExpressions.map(_.toAttribute)
     val outputAttrs = toAttributes(outputStructType)
     val plan = FlatMapGroupsInPandasWithState(
-      func,
+      func.expr,
       groupingAttrs,
       outputAttrs,
       stateStructType,
@@ -725,6 +459,103 @@ class RelationalGroupedDataset protected[sql](
     Dataset.ofRows(df.sparkSession, plan)
   }
 
+  /**
+   * Applies a grouped vectorized python user-defined function to each group of data.
+   * The user-defined function defines a transformation: iterator of `pandas.DataFrame` ->
+   * iterator of `pandas.DataFrame`.
+   * For each group, all elements in the group are passed as an iterator of `pandas.DataFrame`
+   * along with corresponding state, and the results for all groups are combined into a new
+   * [[DataFrame]].
+   *
+   * This function uses Apache Arrow as serialization format between Java executors and Python
+   * workers.
+   */
+  private[sql] def transformWithStateInPandas(
+      func: Column,
+      outputStructType: StructType,
+      outputModeStr: String,
+      timeModeStr: String,
+      initialState: RelationalGroupedDataset,
+      eventTimeColumnName: String): DataFrame = {
+    def exprToAttr(expr: Seq[Expression]): Seq[Attribute] = {
+      expr.map {
+        case ne: NamedExpression => ne
+        case other => Alias(other, other.toString)()
+      }.map(_.toAttribute)
+    }
+
+    val groupingAttrs = exprToAttr(groupingExprs)
+    val outputAttrs = toAttributes(outputStructType)
+    val outputMode = InternalOutputModes(outputModeStr)
+    val timeMode = TimeModes(timeModeStr)
+    // Place grouping attributes at the front of output so we can
+    // refer to them correctly in the following planning phase;
+    // will perform dedup in the physical operator
+    val leftChild = df.logicalPlan
+    val left = df.sparkSession.sessionState.executePlan(
+      Project(groupingAttrs ++ leftChild.output, leftChild)).analyzed
+
+    val plan: LogicalPlan = if (initialState == null) {
+      TransformWithStateInPandas(
+        func.expr,
+        groupingAttrs.length,
+        outputAttrs,
+        outputMode,
+        timeMode,
+        child = left,
+        hasInitialState = false,
+        /* The followings are dummy variables because hasInitialState is false */
+        initialState = LocalRelation(Seq.empty[Attribute]),
+        initGroupingAttrsLen = 0,
+        initialStateSchema = new StructType()
+      )
+    } else {
+      val initGroupingAttrs = exprToAttr(initialState.groupingExprs)
+
+      val rightChild = initialState.df.logicalPlan
+
+      val right = initialState.df.sparkSession.sessionState.executePlan(
+        Project(initGroupingAttrs ++ rightChild.output, rightChild)).analyzed
+
+      TransformWithStateInPandas(
+        func.expr,
+        groupingAttributesLen = groupingAttrs.length,
+        outputAttrs,
+        outputMode,
+        timeMode,
+        child = left,
+        hasInitialState = true,
+        initialState = right,
+        initGroupingAttrsLen = initGroupingAttrs.length,
+        initialStateSchema = initialState.df.schema
+      )
+    }
+    if (eventTimeColumnName.isEmpty) {
+      Dataset.ofRows(df.sparkSession, plan)
+    } else {
+      updateEventTimeColumnAfterTransformWithState(plan, eventTimeColumnName)
+    }
+  }
+
+  /**
+   * Creates a new dataset with updated eventTimeColumn after the transformWithState
+   * logical node.
+   */
+  private def updateEventTimeColumnAfterTransformWithState(
+      transformWithStateInPandas: LogicalPlan,
+      eventTimeColumnName: String): DataFrame = {
+    val transformWithStateDataset = Dataset.ofRows(
+      df.sparkSession,
+      transformWithStateInPandas
+    )
+
+    Dataset.ofRows(df.sparkSession,
+      UpdateEventTimeWatermarkColumn(
+        UnresolvedAttribute(eventTimeColumnName),
+        None,
+        transformWithStateDataset.logicalPlan))
+  }
+
   override def toString: String = {
     val builder = new StringBuilder
     builder.append("RelationalGroupedDataset: [grouping expressions: [")
@@ -772,7 +603,7 @@ private[sql] object RelationalGroupedDataset {
 
   private def alias(expr: Expression): NamedExpression = expr match {
     case expr: NamedExpression => expr
-    case a: AggregateExpression => UnresolvedAlias(a, Some(Column.generateAlias))
+    case a: AggregateExpression => UnresolvedAlias(a, Some(generateAlias))
     case _ if !expr.resolved => UnresolvedAlias(expr, None)
     case expr: Expression => Alias(expr, toPrettySQL(expr))()
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala b/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
deleted file mode 100644
index ed8cf4f121f03..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/RuntimeConfig.scala
+++ /dev/null
@@ -1,175 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import scala.jdk.CollectionConverters._
-
-import org.apache.spark.SPARK_DOC_ROOT
-import org.apache.spark.annotation.Stable
-import org.apache.spark.internal.config.{ConfigEntry, OptionalConfigEntry}
-import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.internal.SQLConf
-
-/**
- * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
- *
- * Options set here are automatically propagated to the Hadoop configuration during I/O.
- *
- * @since 2.0.0
- */
-@Stable
-class RuntimeConfig private[sql](val sqlConf: SQLConf = new SQLConf) {
-
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 2.0.0
-   */
-  def set(key: String, value: String): Unit = {
-    requireNonStaticConf(key)
-    sqlConf.setConfString(key, value)
-  }
-
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 2.0.0
-   */
-  def set(key: String, value: Boolean): Unit = {
-    set(key, value.toString)
-  }
-
-  /**
-   * Sets the given Spark runtime configuration property.
-   *
-   * @since 2.0.0
-   */
-  def set(key: String, value: Long): Unit = {
-    set(key, value.toString)
-  }
-
-  /**
-   * Sets the given Spark runtime configuration property.
-   */
-  private[sql] def set[T](entry: ConfigEntry[T], value: T): Unit = {
-    requireNonStaticConf(entry.key)
-    sqlConf.setConf(entry, value)
-  }
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @throws java.util.NoSuchElementException if the key is not set and does not have a default
-   *                                          value
-   * @since 2.0.0
-   */
-  @throws[NoSuchElementException]("if the key is not set")
-  def get(key: String): String = {
-    sqlConf.getConfString(key)
-  }
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @since 2.0.0
-   */
-  def get(key: String, default: String): String = {
-    sqlConf.getConfString(key, default)
-  }
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   */
-  @throws[NoSuchElementException]("if the key is not set")
-  private[sql] def get[T](entry: ConfigEntry[T]): T = {
-    sqlConf.getConf(entry)
-  }
-
-  private[sql] def get[T](entry: OptionalConfigEntry[T]): Option[T] = {
-    sqlConf.getConf(entry)
-  }
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   */
-  private[sql] def get[T](entry: ConfigEntry[T], default: T): T = {
-    sqlConf.getConf(entry, default)
-  }
-
-  /**
-   * Returns all properties set in this conf.
-   *
-   * @since 2.0.0
-   */
-  def getAll: Map[String, String] = {
-    sqlConf.getAllConfs
-  }
-
-  private[sql] def getAllAsJava: java.util.Map[String, String] = {
-    getAll.asJava
-  }
-
-  /**
-   * Returns the value of Spark runtime configuration property for the given key.
-   *
-   * @since 2.0.0
-   */
-  def getOption(key: String): Option[String] = {
-    try Option(get(key)) catch {
-      case _: NoSuchElementException => None
-    }
-  }
-
-  /**
-   * Resets the configuration property for the given key.
-   *
-   * @since 2.0.0
-   */
-  def unset(key: String): Unit = {
-    requireNonStaticConf(key)
-    sqlConf.unsetConf(key)
-  }
-
-  /**
-   * Indicates whether the configuration property with the given key
-   * is modifiable in the current session.
-   *
-   * @return `true` if the configuration property is modifiable. For static SQL, Spark Core,
-   *         invalid (not existing) and other non-modifiable configuration properties,
-   *         the returned value is `false`.
-   * @since 2.4.0
-   */
-  def isModifiable(key: String): Boolean = sqlConf.isModifiable(key)
-
-  /**
-   * Returns whether a particular key is set.
-   */
-  private[sql] def contains(key: String): Boolean = {
-    sqlConf.contains(key)
-  }
-
-  private def requireNonStaticConf(key: String): Unit = {
-    if (SQLConf.isStaticConfigKey(key)) {
-      throw QueryCompilationErrors.cannotModifyValueOfStaticConfigError(key)
-    }
-    if (sqlConf.setCommandRejectsSparkCoreConfs &&
-        ConfigEntry.findEntry(key) != null && !SQLConf.containsConfigKey(key)) {
-      throw QueryCompilationErrors.cannotModifyValueOfSparkConfigError(key, SPARK_DOC_ROOT)
-    }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
index ffcc0b923f2cb..636899a7acb06 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala
@@ -251,8 +251,8 @@ class SQLContext private[sql](val sparkSession: SparkSession)
    * @group basic
    * @since 1.3.0
    */
-  object implicits extends SQLImplicits with Serializable {
-    protected override def session: SparkSession = self.sparkSession
+  object implicits extends SQLImplicits {
+    override protected def session: SparkSession = sparkSession
   }
   // scalastyle:on
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
index a657836aafbea..b6ed50447109d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLImplicits.scala
@@ -17,259 +17,9 @@
 
 package org.apache.spark.sql
 
-import scala.collection.Map
-import scala.language.implicitConversions
-import scala.reflect.runtime.universe.TypeTag
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-
-/**
- * A collection of implicit methods for converting common Scala objects into [[Dataset]]s.
- *
- * @since 1.6.0
- */
-abstract class SQLImplicits extends LowPrioritySQLImplicits {
+/** @inheritdoc */
+abstract class SQLImplicits extends api.SQLImplicits {
+  type DS[U] = Dataset[U]
 
   protected def session: SparkSession
-
-  /**
-   * Converts $"col name" into a [[Column]].
-   *
-   * @since 2.0.0
-   */
-  implicit class StringToColumn(val sc: StringContext) {
-    def $(args: Any*): ColumnName = {
-      new ColumnName(sc.s(args: _*))
-    }
-  }
-
-  // Primitives
-
-  /** @since 1.6.0 */
-  implicit def newIntEncoder: Encoder[Int] = Encoders.scalaInt
-
-  /** @since 1.6.0 */
-  implicit def newLongEncoder: Encoder[Long] = Encoders.scalaLong
-
-  /** @since 1.6.0 */
-  implicit def newDoubleEncoder: Encoder[Double] = Encoders.scalaDouble
-
-  /** @since 1.6.0 */
-  implicit def newFloatEncoder: Encoder[Float] = Encoders.scalaFloat
-
-  /** @since 1.6.0 */
-  implicit def newByteEncoder: Encoder[Byte] = Encoders.scalaByte
-
-  /** @since 1.6.0 */
-  implicit def newShortEncoder: Encoder[Short] = Encoders.scalaShort
-
-  /** @since 1.6.0 */
-  implicit def newBooleanEncoder: Encoder[Boolean] = Encoders.scalaBoolean
-
-  /** @since 1.6.0 */
-  implicit def newStringEncoder: Encoder[String] = Encoders.STRING
-
-  /** @since 2.2.0 */
-  implicit def newJavaDecimalEncoder: Encoder[java.math.BigDecimal] = Encoders.DECIMAL
-
-  /** @since 2.2.0 */
-  implicit def newScalaDecimalEncoder: Encoder[scala.math.BigDecimal] = ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newDateEncoder: Encoder[java.sql.Date] = Encoders.DATE
-
-  /** @since 3.0.0 */
-  implicit def newLocalDateEncoder: Encoder[java.time.LocalDate] = Encoders.LOCALDATE
-
-  /** @since 3.4.0 */
-  implicit def newLocalDateTimeEncoder: Encoder[java.time.LocalDateTime] = Encoders.LOCALDATETIME
-
-  /** @since 2.2.0 */
-  implicit def newTimeStampEncoder: Encoder[java.sql.Timestamp] = Encoders.TIMESTAMP
-
-  /** @since 3.0.0 */
-  implicit def newInstantEncoder: Encoder[java.time.Instant] = Encoders.INSTANT
-
-  /** @since 3.2.0 */
-  implicit def newDurationEncoder: Encoder[java.time.Duration] = Encoders.DURATION
-
-  /** @since 3.2.0 */
-  implicit def newPeriodEncoder: Encoder[java.time.Period] = Encoders.PERIOD
-
-  /** @since 3.2.0 */
-  implicit def newJavaEnumEncoder[A <: java.lang.Enum[_] : TypeTag]: Encoder[A] =
-    ExpressionEncoder()
-
-  // Boxed primitives
-
-  /** @since 2.0.0 */
-  implicit def newBoxedIntEncoder: Encoder[java.lang.Integer] = Encoders.INT
-
-  /** @since 2.0.0 */
-  implicit def newBoxedLongEncoder: Encoder[java.lang.Long] = Encoders.LONG
-
-  /** @since 2.0.0 */
-  implicit def newBoxedDoubleEncoder: Encoder[java.lang.Double] = Encoders.DOUBLE
-
-  /** @since 2.0.0 */
-  implicit def newBoxedFloatEncoder: Encoder[java.lang.Float] = Encoders.FLOAT
-
-  /** @since 2.0.0 */
-  implicit def newBoxedByteEncoder: Encoder[java.lang.Byte] = Encoders.BYTE
-
-  /** @since 2.0.0 */
-  implicit def newBoxedShortEncoder: Encoder[java.lang.Short] = Encoders.SHORT
-
-  /** @since 2.0.0 */
-  implicit def newBoxedBooleanEncoder: Encoder[java.lang.Boolean] = Encoders.BOOLEAN
-
-  // Seqs
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newIntSeqEncoder: Encoder[Seq[Int]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newLongSeqEncoder: Encoder[Seq[Long]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newDoubleSeqEncoder: Encoder[Seq[Double]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newFloatSeqEncoder: Encoder[Seq[Float]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newByteSeqEncoder: Encoder[Seq[Byte]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newShortSeqEncoder: Encoder[Seq[Short]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newBooleanSeqEncoder: Encoder[Seq[Boolean]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newStringSeqEncoder: Encoder[Seq[String]] = ExpressionEncoder()
-
-  /**
-   * @since 1.6.1
-   * @deprecated use [[newSequenceEncoder]]
-   */
-  @deprecated("Use newSequenceEncoder instead", "2.2.0")
-  def newProductSeqEncoder[A <: Product : TypeTag]: Encoder[Seq[A]] = ExpressionEncoder()
-
-  /** @since 2.2.0 */
-  implicit def newSequenceEncoder[T <: Seq[_] : TypeTag]: Encoder[T] = ExpressionEncoder()
-
-  // Maps
-  /** @since 2.3.0 */
-  implicit def newMapEncoder[T <: Map[_, _] : TypeTag]: Encoder[T] = ExpressionEncoder()
-
-  /**
-   * Notice that we serialize `Set` to Catalyst array. The set property is only kept when
-   * manipulating the domain objects. The serialization format doesn't keep the set property.
-   * When we have a Catalyst array which contains duplicated elements and convert it to
-   * `Dataset[Set[T]]` by using the encoder, the elements will be de-duplicated.
-   *
-   * @since 2.3.0
-   */
-  implicit def newSetEncoder[T <: Set[_] : TypeTag]: Encoder[T] = ExpressionEncoder()
-
-  // Arrays
-
-  /** @since 1.6.1 */
-  implicit def newIntArrayEncoder: Encoder[Array[Int]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newLongArrayEncoder: Encoder[Array[Long]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newDoubleArrayEncoder: Encoder[Array[Double]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newFloatArrayEncoder: Encoder[Array[Float]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newByteArrayEncoder: Encoder[Array[Byte]] = Encoders.BINARY
-
-  /** @since 1.6.1 */
-  implicit def newShortArrayEncoder: Encoder[Array[Short]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newBooleanArrayEncoder: Encoder[Array[Boolean]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newStringArrayEncoder: Encoder[Array[String]] = ExpressionEncoder()
-
-  /** @since 1.6.1 */
-  implicit def newProductArrayEncoder[A <: Product : TypeTag]: Encoder[Array[A]] =
-    ExpressionEncoder()
-
-  /**
-   * Creates a [[Dataset]] from an RDD.
-   *
-   * @since 1.6.0
-   */
-  implicit def rddToDatasetHolder[T : Encoder](rdd: RDD[T]): DatasetHolder[T] = {
-    DatasetHolder(session.createDataset(rdd))
-  }
-
-  /**
-   * Creates a [[Dataset]] from a local Seq.
-   * @since 1.6.0
-   */
-  implicit def localSeqToDatasetHolder[T : Encoder](s: Seq[T]): DatasetHolder[T] = {
-    DatasetHolder(session.createDataset(s))
-  }
-
-  /**
-   * An implicit conversion that turns a Scala `Symbol` into a [[Column]].
-   * @since 1.3.0
-   */
-  implicit def symbolToColumn(s: Symbol): ColumnName = new ColumnName(s.name)
-
-}
-
-/**
- * Lower priority implicit methods for converting Scala objects into [[Dataset]]s.
- * Conflicting implicits are placed here to disambiguate resolution.
- *
- * Reasons for including specific implicits:
- * newProductEncoder - to disambiguate for `List`s which are both `Seq` and `Product`
- */
-trait LowPrioritySQLImplicits {
-  /** @since 1.6.0 */
-  implicit def newProductEncoder[T <: Product : TypeTag]: Encoder[T] = Encoders.product[T]
-
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
index 4e9dcdb0f3af9..dbe4543c33101 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala
@@ -17,11 +17,13 @@
 
 package org.apache.spark.sql
 
-import java.io.Closeable
+import java.net.URI
+import java.nio.file.Paths
 import java.util.{ServiceLoader, UUID}
-import java.util.concurrent.TimeUnit._
-import java.util.concurrent.atomic.{AtomicBoolean, AtomicReference}
+import java.util.concurrent.atomic.AtomicBoolean
 
+import scala.collection.mutable
+import scala.concurrent.duration.DurationInt
 import scala.jdk.CollectionConverters._
 import scala.reflect.runtime.universe.TypeTag
 import scala.util.control.NonFatal
@@ -34,28 +36,32 @@ import org.apache.spark.internal.LogKeys.{CALL_SITE_LONG_FORM, CLASS_NAME}
 import org.apache.spark.internal.config.{ConfigEntry, EXECUTOR_ALLOW_SPARK_CONTEXT}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerApplicationEnd}
+import org.apache.spark.sql.SparkSession.applyAndLoadExtensions
 import org.apache.spark.sql.artifact.ArtifactManager
 import org.apache.spark.sql.catalog.Catalog
 import org.apache.spark.sql.catalyst._
 import org.apache.spark.sql.catalyst.analysis.{NameParameterizedQuery, PosParameterizedQuery, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.encoders._
-import org.apache.spark.sql.catalyst.expressions.AttributeReference
-import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, Range}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, NamedExpression}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.plans.logical.{CompoundBody, LocalRelation, LogicalPlan, Range}
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.connector.ExternalCommandRunner
-import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, SqlScriptingErrors}
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.ExternalCommandExecutor
 import org.apache.spark.sql.execution.datasources.{DataSource, LogicalRelation}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal._
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
+import org.apache.spark.sql.scripting.SqlScriptingExecution
 import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.ExecutionListenerManager
-import org.apache.spark.util.{CallSite, Utils}
+import org.apache.spark.util.{CallSite, SparkFileUtils, ThreadUtils, Utils}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -90,8 +96,9 @@ class SparkSession private(
     @transient private val existingSharedState: Option[SharedState],
     @transient private val parentSessionState: Option[SessionState],
     @transient private[sql] val extensions: SparkSessionExtensions,
-    @transient private[sql] val initialSessionOptions: Map[String, String])
-  extends Serializable with Closeable with Logging { self =>
+    @transient private[sql] val initialSessionOptions: Map[String, String],
+    @transient private val parentManagedJobTags: Map[String, String])
+  extends api.SparkSession with Logging { self =>
 
   // The call site where this SparkSession was constructed.
   private val creationSite: CallSite = Utils.getCallSite()
@@ -105,8 +112,12 @@ class SparkSession private(
   private[sql] def this(
       sc: SparkContext,
       initialSessionOptions: java.util.HashMap[String, String]) = {
-    this(sc, None, None, SparkSession.applyExtensions(sc, new SparkSessionExtensions),
-      initialSessionOptions.asScala.toMap)
+    this(
+      sc,
+      existingSharedState = None,
+      parentSessionState = None,
+      applyAndLoadExtensions(sc), initialSessionOptions.asScala.toMap,
+      parentManagedJobTags = Map.empty)
   }
 
   private[sql] def this(sc: SparkContext) = this(sc, new java.util.HashMap[String, String]())
@@ -121,40 +132,53 @@ class SparkSession private(
       .getOrElse(SQLConf.getFallbackConf)
   })
 
+  /** Tag to mark all jobs owned by this session. */
+  private[sql] lazy val sessionJobTag = s"spark-session-$sessionUUID"
+
+  /**
+   * A UUID that is unique on the thread level. Used by managedJobTags to make sure that a same
+   * tag from two threads does not overlap in the underlying SparkContext/SQLExecution.
+   */
+  private[sql] lazy val threadUuid = new InheritableThreadLocal[String] {
+    override def childValue(parent: String): String = parent
+
+    override def initialValue(): String = UUID.randomUUID().toString
+  }
+
   /**
-   * The version of Spark on which this application is running.
+   * A map to hold the mapping from user-defined tags to the real tags attached to Jobs.
+   * Real tag have the current session ID attached:
+   *   tag1 -> spark-session-$sessionUUID-thread-$threadUuid-tag1
    *
-   * @since 2.0.0
    */
+  @transient
+  private[sql] lazy val managedJobTags = new InheritableThreadLocal[mutable.Map[String, String]] {
+      override def childValue(parent: mutable.Map[String, String]): mutable.Map[String, String] = {
+        // Note: make a clone such that changes in the parent tags aren't reflected in
+        // those of the children threads.
+        parent.clone()
+      }
+
+      override def initialValue(): mutable.Map[String, String] = {
+        mutable.Map(parentManagedJobTags.toSeq: _*)
+      }
+    }
+
+  /** @inheritdoc */
   def version: String = SPARK_VERSION
 
   /* ----------------------- *
    |  Session-related state  |
    * ----------------------- */
 
-  /**
-   * State shared across sessions, including the `SparkContext`, cached data, listener,
-   * and a catalog that interacts with external systems.
-   *
-   * This is internal to Spark and there is no guarantee on interface stability.
-   *
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   @Unstable
   @transient
   lazy val sharedState: SharedState = {
     existingSharedState.getOrElse(new SharedState(sparkContext, initialSessionOptions))
   }
 
-  /**
-   * State isolated across sessions, including SQL configurations, temporary tables, registered
-   * functions, and everything else that accepts a [[org.apache.spark.sql.internal.SQLConf]].
-   * If `parentSessionState` is not null, the `SessionState` will be a copy of the parent.
-   *
-   * This is internal to Spark and there is no guarantee on interface stability.
-   *
-   * @since 2.2.0
-   */
+  /** @inheritdoc */
   @Unstable
   @transient
   lazy val sessionState: SessionState = {
@@ -168,68 +192,25 @@ class SparkSession private(
       }
   }
 
-  /**
-   * A wrapped version of this session in the form of a [[SQLContext]], for backward compatibility.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @transient
   val sqlContext: SQLContext = new SQLContext(this)
 
-  /**
-   * Runtime configuration interface for Spark.
-   *
-   * This is the interface through which the user can get and set all Spark and Hadoop
-   * configurations that are relevant to Spark SQL. When getting the value of a config,
-   * this defaults to the value set in the underlying `SparkContext`, if any.
-   *
-   * @since 2.0.0
-   */
-  @transient lazy val conf: RuntimeConfig = new RuntimeConfig(sessionState.conf)
+  /** @inheritdoc */
+  @transient lazy val conf: RuntimeConfig = new RuntimeConfigImpl(sessionState.conf)
 
-  /**
-   * An interface to register custom [[org.apache.spark.sql.util.QueryExecutionListener]]s
-   * that listen for execution metrics.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def listenerManager: ExecutionListenerManager = sessionState.listenerManager
 
-  /**
-   * :: Experimental ::
-   * A collection of methods that are considered experimental, but can be used to hook into
-   * the query planner for advanced functionality.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @Experimental
   @Unstable
   def experimental: ExperimentalMethods = sessionState.experimentalMethods
 
-  /**
-   * A collection of methods for registering user-defined functions (UDF).
-   *
-   * The following example registers a Scala closure as UDF:
-   * {{{
-   *   sparkSession.udf.register("myUDF", (arg1: Int, arg2: String) => arg2 + arg1)
-   * }}}
-   *
-   * The following example registers a UDF in Java:
-   * {{{
-   *   sparkSession.udf().register("myUDF",
-   *       (Integer arg1, String arg2) -> arg2 + arg1,
-   *       DataTypes.StringType);
-   * }}}
-   *
-   * @note The user-defined functions must be deterministic. Due to optimization,
-   * duplicate invocations may be eliminated or the function may even be invoked more times than
-   * it is present in the query.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def udf: UDFRegistration = sessionState.udfRegistration
 
-  def udtf: UDTFRegistration = sessionState.udtfRegistration
+  private[sql] def udtf: UDTFRegistration = sessionState.udtfRegistration
 
   /**
    * A collection of methods for registering user-defined data sources.
@@ -238,14 +219,9 @@ class SparkSession private(
    */
   @Experimental
   @Unstable
-  def dataSource: DataSourceRegistration = sessionState.dataSourceRegistration
+  private[sql] def dataSource: DataSourceRegistration = sessionState.dataSourceRegistration
 
-  /**
-   * Returns a `StreamingQueryManager` that allows managing all the
-   * `StreamingQuery`s active on `this`.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @Unstable
   def streams: StreamingQueryManager = sessionState.streamingQueryManager
 
@@ -259,24 +235,15 @@ class SparkSession private(
   @Unstable
   private[sql] def artifactManager: ArtifactManager = sessionState.artifactManager
 
-  /**
-   * Start a new session with isolated SQL configurations, temporary tables, registered
-   * functions are isolated, but sharing the underlying `SparkContext` and cached data.
-   *
-   * @note Other than the `SparkContext`, all shared state is initialized lazily.
-   * This method will force the initialization of the shared state to ensure that parent
-   * and child sessions are set up with the same shared state. If the underlying catalog
-   * implementation is Hive, this will initialize the metastore, which may take some time.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def newSession(): SparkSession = {
     new SparkSession(
       sparkContext,
       Some(sharedState),
       parentSessionState = None,
       extensions,
-      initialSessionOptions)
+      initialSessionOptions,
+      parentManagedJobTags = Map.empty)
   }
 
   /**
@@ -297,8 +264,11 @@ class SparkSession private(
       Some(sharedState),
       Some(sessionState),
       extensions,
-      Map.empty)
+      Map.empty,
+      managedJobTags.get().toMap)
     result.sessionState // force copy of SessionState
+    result.sessionState.artifactManager // force copy of ArtifactManager and its resources
+    result.managedJobTags // force copy of managedJobTags
     result
   }
 
@@ -307,76 +277,30 @@ class SparkSession private(
    |  Methods for creating DataFrames  |
    * --------------------------------- */
 
-  /**
-   * Returns a `DataFrame` with no rows or columns.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @transient
   lazy val emptyDataFrame: DataFrame = Dataset.ofRows(self, LocalRelation())
 
-  /**
-   * Creates a new [[Dataset]] of type T containing zero elements.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def emptyDataset[T: Encoder]: Dataset[T] = {
     val encoder = implicitly[Encoder[T]]
     new Dataset(self, LocalRelation(encoder.schema), encoder)
   }
 
-  /**
-   * Creates a `DataFrame` from an RDD of Product (e.g. case classes, tuples).
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataFrame[A <: Product : TypeTag](rdd: RDD[A]): DataFrame = withActive {
     val encoder = Encoders.product[A]
     Dataset.ofRows(self, ExternalRDD(rdd, self)(encoder))
   }
 
-  /**
-   * Creates a `DataFrame` from a local Seq of Product.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataFrame[A <: Product : TypeTag](data: Seq[A]): DataFrame = withActive {
     val schema = ScalaReflection.schemaFor[A].dataType.asInstanceOf[StructType]
     val attributeSeq = toAttributes(schema)
     Dataset.ofRows(self, LocalRelation.fromProduct(attributeSeq, data))
   }
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from an `RDD` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   * Example:
-   * {{{
-   *  import org.apache.spark.sql._
-   *  import org.apache.spark.sql.types._
-   *  val sparkSession = new org.apache.spark.sql.SparkSession(sc)
-   *
-   *  val schema =
-   *    StructType(
-   *      StructField("name", StringType, false) ::
-   *      StructField("age", IntegerType, true) :: Nil)
-   *
-   *  val people =
-   *    sc.textFile("examples/src/main/resources/people.txt").map(
-   *      _.split(",")).map(p => Row(p(0), p(1).trim.toInt))
-   *  val dataFrame = sparkSession.createDataFrame(people, schema)
-   *  dataFrame.printSchema
-   *  // root
-   *  // |-- name: string (nullable = false)
-   *  // |-- age: integer (nullable = true)
-   *
-   *  dataFrame.createOrReplaceTempView("people")
-   *  sparkSession.sql("select name from people").collect.foreach(println)
-   * }}}
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def createDataFrame(rowRDD: RDD[Row], schema: StructType): DataFrame = withActive {
     val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
@@ -388,42 +312,21 @@ class SparkSession private(
     internalCreateDataFrame(catalystRows.setName(rowRDD.name), schema)
   }
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from a `JavaRDD` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided RDD matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def createDataFrame(rowRDD: JavaRDD[Row], schema: StructType): DataFrame = {
     val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
     createDataFrame(rowRDD.rdd, replaced)
   }
 
-  /**
-   * :: DeveloperApi ::
-   * Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using the given schema.
-   * It is important to make sure that the structure of every [[Row]] of the provided List matches
-   * the provided schema. Otherwise, there will be runtime exception.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @DeveloperApi
   def createDataFrame(rows: java.util.List[Row], schema: StructType): DataFrame = withActive {
     val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
     Dataset.ofRows(self, LocalRelation.fromExternalRows(toAttributes(replaced), rows.asScala.toSeq))
   }
 
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   * SELECT * queries will return the columns in an undefined order.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataFrame(rdd: RDD[_], beanClass: Class[_]): DataFrame = withActive {
     val attributeSeq: Seq[AttributeReference] = getSchema(beanClass)
     val className = beanClass.getName
@@ -434,36 +337,19 @@ class SparkSession private(
     Dataset.ofRows(self, LogicalRDD(attributeSeq, rowRdd.setName(rdd.name))(self))
   }
 
-  /**
-   * Applies a schema to an RDD of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   * SELECT * queries will return the columns in an undefined order.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataFrame(rdd: JavaRDD[_], beanClass: Class[_]): DataFrame = {
     createDataFrame(rdd.rdd, beanClass)
   }
 
-  /**
-   * Applies a schema to a List of Java Beans.
-   *
-   * WARNING: Since there is no guaranteed ordering for fields in a Java Bean,
-   *          SELECT * queries will return the columns in an undefined order.
-   * @since 1.6.0
-   */
+  /** @inheritdoc */
   def createDataFrame(data: java.util.List[_], beanClass: Class[_]): DataFrame = withActive {
     val attrSeq = getSchema(beanClass)
     val rows = SQLContext.beansToRows(data.asScala.iterator, beanClass, attrSeq)
     Dataset.ofRows(self, LocalRelation(attrSeq, rows.toSeq))
   }
 
-  /**
-   * Convert a `BaseRelation` created for external data sources into a `DataFrame`.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def baseRelationToDataFrame(baseRelation: BaseRelation): DataFrame = {
     Dataset.ofRows(self, LogicalRelation(baseRelation))
   }
@@ -472,33 +358,7 @@ class SparkSession private(
    |  Methods for creating DataSets  |
    * ------------------------------- */
 
-  /**
-   * Creates a [[Dataset]] from a local Seq of data of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * == Example ==
-   *
-   * {{{
-   *
-   *   import spark.implicits._
-   *   case class Person(name: String, age: Long)
-   *   val data = Seq(Person("Michael", 29), Person("Andy", 30), Person("Justin", 19))
-   *   val ds = spark.createDataset(data)
-   *
-   *   ds.show()
-   *   // +-------+---+
-   *   // |   name|age|
-   *   // +-------+---+
-   *   // |Michael| 29|
-   *   // |   Andy| 30|
-   *   // | Justin| 19|
-   *   // +-------+---+
-   * }}}
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataset[T : Encoder](data: Seq[T]): Dataset[T] = {
     val enc = encoderFor[T]
     val toRow = enc.createSerializer()
@@ -508,72 +368,30 @@ class SparkSession private(
     Dataset[T](self, plan)
   }
 
-  /**
-   * Creates a [[Dataset]] from an RDD of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataset[T : Encoder](data: RDD[T]): Dataset[T] = {
     Dataset[T](self, ExternalRDD(data, self))
   }
 
-  /**
-   * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an
-   * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation)
-   * that is generally created automatically through implicits from a `SparkSession`, or can be
-   * created explicitly by calling static methods on [[Encoders]].
-   *
-   * == Java Example ==
-   *
-   * {{{
-   *     List<String> data = Arrays.asList("hello", "world");
-   *     Dataset<String> ds = spark.createDataset(data, Encoders.STRING());
-   * }}}
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def createDataset[T : Encoder](data: java.util.List[T]): Dataset[T] = {
     createDataset(data.asScala.toSeq)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
-   * in a range from 0 to `end` (exclusive) with step value 1.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def range(end: Long): Dataset[java.lang.Long] = range(0, end)
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
-   * in a range from `start` to `end` (exclusive) with step value 1.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long): Dataset[java.lang.Long] = {
     range(start, end, step = 1, numPartitions = leafNodeDefaultParallelism)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
-   * in a range from `start` to `end` (exclusive) with a step value.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
     range(start, end, step, numPartitions = leafNodeDefaultParallelism)
   }
 
-  /**
-   * Creates a [[Dataset]] with a single `LongType` column named `id`, containing elements
-   * in a range from `start` to `end` (exclusive) with a step value, with partition number
-   * specified.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def range(start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
     new Dataset(self, Range(start, end, step, numPartitions), Encoders.LONG)
   }
@@ -599,27 +417,10 @@ class SparkSession private(
    |  Catalog-related methods  |
    * ------------------------- */
 
-  /**
-   * Interface through which the user may create, drop, alter or query underlying
-   * databases, tables, functions etc.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @transient lazy val catalog: Catalog = new CatalogImpl(self)
 
-  /**
-   * Returns the specified table/view as a `DataFrame`. If it's a table, it must support batch
-   * reading and the returned DataFrame is the batch scan query plan of this table. If it's a view,
-   * the returned DataFrame is simply the query plan of the view, which can either be a batch or
-   * streaming query plan.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table or view.
-   *                  If a database is specified, it identifies the table/view from the database.
-   *                  Otherwise, it first attempts to find a temporary view with the given name
-   *                  and then match the table/view from the current database.
-   *                  Note that, the global temporary view database is also valid here.
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
     read.table(tableName)
   }
@@ -632,6 +433,42 @@ class SparkSession private(
    |  Everything else  |
    * ----------------- */
 
+  /**
+   * Executes given script and return the result of the last statement.
+   * If script contains no queries, an empty `DataFrame` is returned.
+   *
+   * @param script A SQL script to execute.
+   * @param args A map of parameter names to SQL literal expressions.
+   *
+   * @return The result as a `DataFrame`.
+   */
+  private def executeSqlScript(
+      script: CompoundBody,
+      args: Map[String, Expression] = Map.empty): DataFrame = {
+    val sse = new SqlScriptingExecution(script, this, args)
+    var result: Option[Seq[Row]] = None
+
+    while (sse.hasNext) {
+      sse.withErrorHandling {
+        val df = sse.next()
+        if (sse.hasNext) {
+          df.write.format("noop").mode("overwrite").save()
+        } else {
+          // Collect results from the last DataFrame.
+          result = Some(df.collect().toSeq)
+        }
+      }
+    }
+
+    if (result.isEmpty) {
+      emptyDataFrame
+    } else {
+      val attributes = DataTypeUtils.toAttributes(result.get.head.schema)
+      Dataset.ofRows(
+        self, LocalRelation.fromExternalRows(attributes, result.get))
+    }
+  }
+
   /**
    * Executes a SQL query substituting positional parameters by the given arguments,
    * returning the result as a `DataFrame`.
@@ -651,32 +488,33 @@ class SparkSession private(
     withActive {
       val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
         val parsedPlan = sessionState.sqlParser.parsePlan(sqlText)
-        if (args.nonEmpty) {
-          PosParameterizedQuery(parsedPlan, args.map(lit(_).expr).toImmutableArraySeq)
-        } else {
-          parsedPlan
+        parsedPlan match {
+          case compoundBody: CompoundBody =>
+            if (args.nonEmpty) {
+              // Positional parameters are not supported for SQL scripting.
+              throw SqlScriptingErrors.positionalParametersAreNotSupportedWithSqlScripting()
+            }
+            compoundBody
+          case logicalPlan: LogicalPlan =>
+            if (args.nonEmpty) {
+              PosParameterizedQuery(logicalPlan, args.map(lit(_).expr).toImmutableArraySeq)
+            } else {
+              logicalPlan
+            }
         }
       }
-      Dataset.ofRows(self, plan, tracker)
+
+      plan match {
+        case compoundBody: CompoundBody =>
+          // Execute the SQL script.
+          executeSqlScript(compoundBody)
+        case logicalPlan: LogicalPlan =>
+          // Execute the standalone SQL statement.
+          Dataset.ofRows(self, plan, tracker)
+      }
     }
 
-  /**
-   * Executes a SQL query substituting positional parameters by the given arguments,
-   * returning the result as a `DataFrame`.
-   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText A SQL statement with positional parameters to execute.
-   * @param args An array of Java/Scala objects that can be converted to
-   *             SQL literal expressions. See
-   *             <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
-   *             Supported Data Types</a> for supported value types in Scala/Java.
-   *             For example, 1, "Steven", LocalDate.of(2023, 4, 2).
-   *             A value can be also a `Column` of a literal or collection constructor functions
-   *             such as `map()`, `array()`, `struct()`, in that case it is taken as is.
-   *
-   * @since 3.5.0
-   */
-  @Experimental
+  /** @inheritdoc */
   def sql(sqlText: String, args: Array[_]): DataFrame = {
     sql(sqlText, args, new QueryPlanningTracker)
   }
@@ -704,81 +542,43 @@ class SparkSession private(
     withActive {
       val plan = tracker.measurePhase(QueryPlanningTracker.PARSING) {
         val parsedPlan = sessionState.sqlParser.parsePlan(sqlText)
-        if (args.nonEmpty) {
-          NameParameterizedQuery(parsedPlan, args.transform((_, v) => lit(v).expr))
-        } else {
-          parsedPlan
+        parsedPlan match {
+          case compoundBody: CompoundBody =>
+            compoundBody
+          case logicalPlan: LogicalPlan =>
+            if (args.nonEmpty) {
+              NameParameterizedQuery(logicalPlan, args.transform((_, v) => lit(v).expr))
+            } else {
+              logicalPlan
+            }
         }
       }
-      Dataset.ofRows(self, plan, tracker)
+
+      plan match {
+        case compoundBody: CompoundBody =>
+          // Execute the SQL script.
+          executeSqlScript(compoundBody, args.transform((_, v) => lit(v).expr))
+        case logicalPlan: LogicalPlan =>
+          // Execute the standalone SQL statement.
+          Dataset.ofRows(self, plan, tracker)
+      }
     }
 
-  /**
-   * Executes a SQL query substituting named parameters by the given arguments,
-   * returning the result as a `DataFrame`.
-   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText A SQL statement with named parameters to execute.
-   * @param args A map of parameter names to Java/Scala objects that can be converted to
-   *             SQL literal expressions. See
-   *             <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
-   *             Supported Data Types</a> for supported value types in Scala/Java.
-   *             For example, map keys: "rank", "name", "birthdate";
-   *             map values: 1, "Steven", LocalDate.of(2023, 4, 2).
-   *             Map value can be also a `Column` of a literal or collection constructor functions
-   *             such as `map()`, `array()`, `struct()`, in that case it is taken as is.
-   *
-   * @since 3.4.0
-   */
-  @Experimental
+  /** @inheritdoc */
   def sql(sqlText: String, args: Map[String, Any]): DataFrame = {
     sql(sqlText, args, new QueryPlanningTracker)
   }
 
-  /**
-   * Executes a SQL query substituting named parameters by the given arguments,
-   * returning the result as a `DataFrame`.
-   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @param sqlText A SQL statement with named parameters to execute.
-   * @param args A map of parameter names to Java/Scala objects that can be converted to
-   *             SQL literal expressions. See
-   *             <a href="https://spark.apache.org/docs/latest/sql-ref-datatypes.html">
-   *             Supported Data Types</a> for supported value types in Scala/Java.
-   *             For example, map keys: "rank", "name", "birthdate";
-   *             map values: 1, "Steven", LocalDate.of(2023, 4, 2).
-   *             Map value can be also a `Column` of a literal or collection constructor functions
-   *             such as `map()`, `array()`, `struct()`, in that case it is taken as is.
-   *
-   * @since 3.4.0
-   */
-  @Experimental
-  def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
+  /** @inheritdoc */
+  override def sql(sqlText: String, args: java.util.Map[String, Any]): DataFrame = {
     sql(sqlText, args.asScala.toMap)
   }
 
-  /**
-   * Executes a SQL query using Spark, returning the result as a `DataFrame`.
-   * This API eagerly runs DDL/DML commands, but not for SELECT queries.
-   *
-   * @since 2.0.0
-   */
-  def sql(sqlText: String): DataFrame = sql(sqlText, Map.empty[String, Any])
+  /** @inheritdoc */
+  override def sql(sqlText: String): DataFrame = sql(sqlText, Map.empty[String, Any])
 
   /**
-   * Execute an arbitrary string command inside an external execution engine rather than Spark.
-   * This could be useful when user wants to execute some commands out of Spark. For
-   * example, executing custom DDL/DML command for JDBC, creating index for ElasticSearch,
-   * creating cores for Solr and so on.
-   *
-   * The command will be eagerly executed after this method is called and the returned
-   * DataFrame will contain the output of the command(if any).
-   *
-   * @param runner The class name of the runner that implements `ExternalCommandRunner`.
-   * @param command The target command to be executed
-   * @param options The options for the runner.
-   *
-   * @since 3.0.0
+   * @inheritdoc
    */
   @Unstable
   def executeCommand(runner: String, command: String, options: Map[String, String]): DataFrame = {
@@ -793,79 +593,146 @@ class SparkSession private(
     }
   }
 
+  /** @inheritdoc */
+  @Experimental
+  override def addArtifact(path: String): Unit = addArtifact(SparkFileUtils.resolveURI(path))
+
+  /** @inheritdoc */
+  @Experimental
+  override def addArtifact(uri: URI): Unit = {
+    artifactManager.addLocalArtifacts(Artifact.parseArtifacts(uri))
+  }
+
+  /** @inheritdoc */
+  @Experimental
+  override def addArtifact(bytes: Array[Byte], target: String): Unit = {
+    val targetPath = Paths.get(target)
+    val artifact = Artifact.newArtifactFromExtension(
+      targetPath.getFileName.toString,
+      targetPath,
+      new Artifact.InMemory(bytes))
+    artifactManager.addLocalArtifacts(artifact :: Nil)
+  }
+
+  /** @inheritdoc */
+  @Experimental
+  override def addArtifact(source: String, target: String): Unit = {
+    val targetPath = Paths.get(target)
+    val artifact = Artifact.newArtifactFromExtension(
+      targetPath.getFileName.toString,
+      targetPath,
+      new Artifact.LocalFile(Paths.get(source)))
+    artifactManager.addLocalArtifacts(artifact :: Nil)
+  }
+
+  /** @inheritdoc */
+  @Experimental
+  @scala.annotation.varargs
+  override def addArtifacts(uri: URI*): Unit = {
+    artifactManager.addLocalArtifacts(uri.flatMap(Artifact.parseArtifacts))
+  }
+
+  /** @inheritdoc */
+  override def addTag(tag: String): Unit = {
+    SparkContext.throwIfInvalidTag(tag)
+    managedJobTags.get().put(tag, s"spark-session-$sessionUUID-thread-${threadUuid.get()}-$tag")
+  }
+
+  /** @inheritdoc */
+  override def removeTag(tag: String): Unit = managedJobTags.get().remove(tag)
+
+  /** @inheritdoc */
+  override def getTags(): Set[String] = managedJobTags.get().keySet.toSet
+
+  /** @inheritdoc */
+  override def clearTags(): Unit = managedJobTags.get().clear()
+
   /**
-   * Returns a [[DataFrameReader]] that can be used to read non-streaming data in as a
-   * `DataFrame`.
-   * {{{
-   *   sparkSession.read.parquet("/path/to/file.parquet")
-   *   sparkSession.read.schema(schema).json("/path/to/file.json")
-   * }}}
+   * Request to interrupt all currently running SQL operations of this session.
    *
-   * @since 2.0.0
+   * @note Only DataFrame/SQL operations started by this session can be interrupted.
+   *
+   * @note This method will wait up to 60 seconds for the interruption request to be issued.
+
+   * @return Sequence of SQL execution IDs requested to be interrupted.
+
+   * @since 4.0.0
    */
-  def read: DataFrameReader = new DataFrameReader(self)
+  override def interruptAll(): Seq[String] =
+    doInterruptTag(sessionJobTag, "as part of cancellation of all jobs")
 
   /**
-   * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`.
-   * {{{
-   *   sparkSession.readStream.parquet("/path/to/directory/of/parquet/files")
-   *   sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files")
-   * }}}
+   * Request to interrupt all currently running SQL operations of this session with the given
+   * job tag.
    *
-   * @since 2.0.0
+   * @note Only DataFrame/SQL operations started by this session can be interrupted.
+   *
+   * @note This method will wait up to 60 seconds for the interruption request to be issued.
+   *
+   * @return Sequence of SQL execution IDs requested to be interrupted.
+
+   * @since 4.0.0
    */
-  def readStream: DataStreamReader = new DataStreamReader(self)
+  override def interruptTag(tag: String): Seq[String] = {
+    val realTag = managedJobTags.get().get(tag)
+    realTag.map(doInterruptTag(_, s"part of cancelled job tags $tag")).getOrElse(Seq.empty)
+  }
+
+  private def doInterruptTag(tag: String, reason: String): Seq[String] = {
+    val cancelledTags =
+      sparkContext.cancelJobsWithTagWithFuture(tag, reason)
+
+    ThreadUtils.awaitResult(cancelledTags, 60.seconds)
+      .flatMap(job => Option(job.properties.getProperty(SQLExecution.EXECUTION_ROOT_ID_KEY)))
+  }
 
   /**
-   * Executes some code block and prints to stdout the time taken to execute the block. This is
-   * available in Scala only and is used primarily for interactive testing and debugging.
+   * Request to interrupt a SQL operation of this session, given its SQL execution ID.
    *
-   * @since 2.1.0
+   * @note Only DataFrame/SQL operations started by this session can be interrupted.
+   *
+   * @note This method will wait up to 60 seconds for the interruption request to be issued.
+   *
+   * @return The execution ID requested to be interrupted, as a single-element sequence, or an empty
+   *    sequence if the operation is not started by this session.
+   *
+   * @since 4.0.0
    */
-  def time[T](f: => T): T = {
-    val start = System.nanoTime()
-    val ret = f
-    val end = System.nanoTime()
-    // scalastyle:off println
-    println(s"Time taken: ${NANOSECONDS.toMillis(end - start)} ms")
-    // scalastyle:on println
-    ret
+  override def interruptOperation(operationId: String): Seq[String] = {
+    scala.util.Try(operationId.toLong).toOption match {
+      case Some(executionIdToBeCancelled) =>
+        val tagToBeCancelled = SQLExecution.executionIdJobTag(this, executionIdToBeCancelled)
+        doInterruptTag(tagToBeCancelled, reason = "")
+      case None =>
+        throw new IllegalArgumentException("executionId must be a number in string form.")
+    }
   }
 
+  /** @inheritdoc */
+  def read: DataFrameReader = new DataFrameReader(self)
+
+  /** @inheritdoc */
+  def readStream: DataStreamReader = new DataStreamReader(self)
+
+  /** @inheritdoc */
+  def tvf: TableValuedFunction = new TableValuedFunction(self)
+
   // scalastyle:off
   // Disable style checker so "implicits" object can start with lowercase i
-  /**
-   * (Scala-specific) Implicit methods available in Scala for converting
-   * common Scala objects into `DataFrame`s.
-   *
-   * {{{
-   *   val sparkSession = SparkSession.builder.getOrCreate()
-   *   import sparkSession.implicits._
-   * }}}
-   *
-   * @since 2.0.0
-   */
-  object implicits extends SQLImplicits with Serializable {
-    protected override def session: SparkSession = SparkSession.this
+  object implicits extends SQLImplicits {
+    override protected def session: SparkSession = self
   }
   // scalastyle:on
 
   /**
    * Stop the underlying `SparkContext`.
    *
-   * @since 2.0.0
+   * @since 2.1.0
    */
-  def stop(): Unit = {
+  override def close(): Unit = {
     sparkContext.stop()
   }
 
-  /**
-   * Synonym for `stop()`.
-   *
-   * @since 2.1.0
-   */
-  override def close(): Unit = stop()
-
   /**
    * Parses the data type in our internal string representation. The data type string should
    * have the same format as the one generated by `toString` in scala.
@@ -911,14 +778,14 @@ class SparkSession private(
   }
 
   /**
-   * Execute a block of code with the this session set as the active session, and restore the
+   * Execute a block of code with this session set as the active session, and restore the
    * previous session on completion.
    */
   private[sql] def withActive[T](block: => T): T = {
     // Use the active session thread local directly to make sure we get the session that is actually
     // set and not the default session. This to prevent that we promote the default session to the
     // active session once we are done.
-    val old = SparkSession.activeThreadSession.get()
+    val old = SparkSession.getActiveSession.orNull
     SparkSession.setActiveSession(this)
     try block finally {
       SparkSession.setActiveSession(old)
@@ -926,135 +793,91 @@ class SparkSession private(
   }
 
   private[sql] def leafNodeDefaultParallelism: Int = {
-    conf.get(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM).getOrElse(sparkContext.defaultParallelism)
+    sessionState.conf.getConf(SQLConf.LEAF_NODE_DEFAULT_PARALLELISM)
+      .getOrElse(sparkContext.defaultParallelism)
+  }
+
+  private[sql] object Converter extends ColumnNodeToExpressionConverter with Serializable {
+    override protected def parser: ParserInterface = sessionState.sqlParser
+    override protected def conf: SQLConf = sessionState.conf
+  }
+
+  private[sql] def expression(e: Column): Expression = Converter(e.node)
+
+  private[sql] implicit class RichColumn(val column: Column) {
+    /**
+     * Returns the expression for this column.
+     */
+    def expr: Expression = Converter(column.node)
+    /**
+     * Returns the expression for this column either with an existing or auto assigned name.
+     */
+    def named: NamedExpression = ExpressionUtils.toNamed(expr)
   }
+
+  private[sql] lazy val observationManager = new ObservationManager(this)
+
+  override private[sql] def isUsable: Boolean = !sparkContext.isStopped
 }
 
 
 @Stable
-object SparkSession extends Logging {
+object SparkSession extends api.BaseSparkSessionCompanion with Logging {
+  override private[sql] type Session = SparkSession
 
   /**
    * Builder for [[SparkSession]].
    */
   @Stable
-  class Builder extends Logging {
-
-    private[this] val options = new scala.collection.mutable.HashMap[String, String]
+  class Builder extends api.SparkSessionBuilder {
 
     private[this] val extensions = new SparkSessionExtensions
 
     private[this] var userSuppliedContext: Option[SparkContext] = None
 
-    private[spark] def sparkContext(sparkContext: SparkContext): Builder = synchronized {
+    private[spark] def sparkContext(sparkContext: SparkContext): this.type = synchronized {
       userSuppliedContext = Option(sparkContext)
       this
     }
 
-    /**
-     * Sets a name for the application, which will be shown in the Spark web UI.
-     * If no application name is set, a randomly generated name will be used.
-     *
-     * @since 2.0.0
-     */
-    def appName(name: String): Builder = config("spark.app.name", name)
+    /** @inheritdoc */
+    override def remote(connectionString: String): this.type = this
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 2.0.0
-     */
-    def config(key: String, value: String): Builder = synchronized {
-      options += key -> value
-      this
-    }
+    /** @inheritdoc */
+    override def appName(name: String): this.type = super.appName(name)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 2.0.0
-     */
-    def config(key: String, value: Long): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: String): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 2.0.0
-     */
-    def config(key: String, value: Double): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Long): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 2.0.0
-     */
-    def config(key: String, value: Boolean): Builder = synchronized {
-      options += key -> value.toString
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Double): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 3.4.0
-     */
-    def config(map: Map[String, Any]): Builder = synchronized {
-      map.foreach {
-        kv: (String, Any) => {
-          options += kv._1 -> kv._2.toString
-        }
-      }
-      this
-    }
+    /** @inheritdoc */
+    override def config(key: String, value: Boolean): this.type = super.config(key, value)
 
-    /**
-     * Sets a config option. Options set using this method are automatically propagated to
-     * both `SparkConf` and SparkSession's own configuration.
-     *
-     * @since 3.4.0
-     */
-    def config(map: java.util.Map[String, Any]): Builder = synchronized {
-      config(map.asScala.toMap)
-    }
+    /** @inheritdoc */
+    override def config(map: Map[String, Any]): this.type = super.config(map)
 
-    /**
-     * Sets a list of config options based on the given `SparkConf`.
-     *
-     * @since 2.0.0
-     */
-    def config(conf: SparkConf): Builder = synchronized {
-      conf.getAll.foreach { case (k, v) => options += k -> v }
-      this
-    }
+    /** @inheritdoc */
+    override def config(map: java.util.Map[String, Any]): this.type = super.config(map)
 
-    /**
-     * Sets the Spark master URL to connect to, such as "local" to run locally, "local[4]" to
-     * run locally with 4 cores, or "spark://master:7077" to run on a Spark standalone cluster.
-     *
-     * @since 2.0.0
-     */
-    def master(master: String): Builder = config("spark.master", master)
+    /** @inheritdoc */
+    override def config(conf: SparkConf): this.type = super.config(conf)
 
-    /**
-     * Enables Hive support, including connectivity to a persistent Hive metastore, support for
-     * Hive serdes, and Hive user-defined functions.
-     *
-     * @since 2.0.0
-     */
-    def enableHiveSupport(): Builder = synchronized {
+    /** @inheritdoc */
+    override def master(master: String): this.type = super.master(master)
+
+    /** @inheritdoc */
+    override def enableHiveSupport(): this.type = synchronized {
       if (hiveClassesArePresent) {
-        config(CATALOG_IMPLEMENTATION.key, "hive")
+        // TODO(SPARK-50244): We now isolate artifacts added by the `ADD JAR` command. This will
+        //  break an existing Hive use case (one session adds JARs and another session uses them).
+        //  We need to decide whether/how to enable isolation for Hive.
+        super.enableHiveSupport()
+          .config(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED.key, false)
       } else {
         throw new IllegalArgumentException(
           "Unable to instantiate SparkSession with Hive support because " +
@@ -1062,33 +885,13 @@ object SparkSession extends Logging {
       }
     }
 
-    /**
-     * Inject extensions into the [[SparkSession]]. This allows a user to add Analyzer rules,
-     * Optimizer rules, Planning Strategies or a customized parser.
-     *
-     * @since 2.2.0
-     */
-    def withExtensions(f: SparkSessionExtensions => Unit): Builder = synchronized {
+    /** @inheritdoc */
+    override def withExtensions(f: SparkSessionExtensions => Unit): this.type = synchronized {
       f(extensions)
       this
     }
 
-    /**
-     * Gets an existing [[SparkSession]] or, if there is no existing one, creates a new
-     * one based on the options set in this builder.
-     *
-     * This method first checks whether there is a valid thread-local SparkSession,
-     * and if yes, return that one. It then checks whether there is a valid global
-     * default SparkSession, and if yes, return that one. If no valid global default
-     * SparkSession exists, the method creates a new SparkSession and assigns the
-     * newly created SparkSession as the global default.
-     *
-     * In case an existing SparkSession is returned, the non-static config options specified in
-     * this builder will be applied to the existing SparkSession.
-     *
-     * @since 2.0.0
-     */
-    def getOrCreate(): SparkSession = synchronized {
+    private def build(forceCreate: Boolean): SparkSession = synchronized {
       val sparkConf = new SparkConf()
       options.foreach { case (k, v) => sparkConf.set(k, v) }
 
@@ -1097,8 +900,9 @@ object SparkSession extends Logging {
       }
 
       // Get the session from current thread's active session.
-      var session = activeThreadSession.get()
-      if ((session ne null) && !session.sparkContext.isStopped) {
+      val active = getActiveSession
+      if (!forceCreate && active.isDefined) {
+        val session = active.get
         applyModifiableSettings(session, new java.util.HashMap[String, String](options.asJava))
         return session
       }
@@ -1106,14 +910,18 @@ object SparkSession extends Logging {
       // Global synchronization so we will only set the default session once.
       SparkSession.synchronized {
         // If the current thread does not have an active session, get it from the global session.
-        session = defaultSession.get()
-        if ((session ne null) && !session.sparkContext.isStopped) {
+        val default = getDefaultSession
+        if (!forceCreate && default.isDefined) {
+          val session = default.get
           applyModifiableSettings(session, new java.util.HashMap[String, String](options.asJava))
           return session
         }
 
         // No active nor global default session. Create a new one.
         val sparkContext = userSuppliedContext.getOrElse {
+          // Override appName with the submitted appName
+          sparkConf.getOption("spark.submit.appName")
+            .map(sparkConf.setAppName)
           // set a random app name if not given.
           if (!sparkConf.contains("spark.app.name")) {
             sparkConf.setAppName(java.util.UUID.randomUUID().toString)
@@ -1126,104 +934,43 @@ object SparkSession extends Logging {
         loadExtensions(extensions)
         applyExtensions(sparkContext, extensions)
 
-        session = new SparkSession(sparkContext, None, None, extensions, options.toMap)
-        setDefaultSession(session)
-        setActiveSession(session)
+        val session = new SparkSession(sparkContext,
+          existingSharedState = None,
+          parentSessionState = None,
+          extensions,
+          initialSessionOptions = options.toMap,
+          parentManagedJobTags = Map.empty)
+        setDefaultAndActiveSession(session)
         registerContextListener(sparkContext)
+        session
       }
-
-      return session
     }
-  }
 
-  /**
-   * Creates a [[SparkSession.Builder]] for constructing a [[SparkSession]].
-   *
-   * @since 2.0.0
-   */
-  def builder(): Builder = new Builder
-
-  /**
-   * Changes the SparkSession that will be returned in this thread and its children when
-   * SparkSession.getOrCreate() is called. This can be used to ensure that a given thread receives
-   * a SparkSession with an isolated session, instead of the global (first created) context.
-   *
-   * @since 2.0.0
-   */
-  def setActiveSession(session: SparkSession): Unit = {
-    activeThreadSession.set(session)
-  }
+    /** @inheritdoc */
+    def getOrCreate(): SparkSession = build(forceCreate = false)
 
-  /**
-   * Clears the active SparkSession for current thread. Subsequent calls to getOrCreate will
-   * return the first created context instead of a thread-local override.
-   *
-   * @since 2.0.0
-   */
-  def clearActiveSession(): Unit = {
-    activeThreadSession.remove()
+    /** @inheritdoc */
+    def create(): SparkSession = build(forceCreate = true)
   }
 
   /**
-   * Sets the default SparkSession that is returned by the builder.
+   * Creates a [[SparkSession.Builder]] for constructing a [[SparkSession]].
    *
    * @since 2.0.0
    */
-  def setDefaultSession(session: SparkSession): Unit = {
-    defaultSession.set(session)
-  }
+  def builder(): Builder = new Builder
 
-  /**
-   * Clears the default SparkSession that is returned by the builder.
-   *
-   * @since 2.0.0
-   */
-  def clearDefaultSession(): Unit = {
-    defaultSession.set(null)
-  }
+  /** @inheritdoc */
+  override def getActiveSession: Option[SparkSession] = super.getActiveSession
 
-  /**
-   * Returns the active SparkSession for the current thread, returned by the builder.
-   *
-   * @note Return None, when calling this function on executors
-   *
-   * @since 2.2.0
-   */
-  def getActiveSession: Option[SparkSession] = {
-    if (Utils.isInRunningSparkTask) {
-      // Return None when running on executors.
-      None
-    } else {
-      Option(activeThreadSession.get)
-    }
-  }
+  /** @inheritdoc */
+  override def getDefaultSession: Option[SparkSession] = super.getDefaultSession
 
-  /**
-   * Returns the default SparkSession that is returned by the builder.
-   *
-   * @note Return None, when calling this function on executors
-   *
-   * @since 2.2.0
-   */
-  def getDefaultSession: Option[SparkSession] = {
-    if (Utils.isInRunningSparkTask) {
-      // Return None when running on executors.
-      None
-    } else {
-      Option(defaultSession.get)
-    }
-  }
+  /** @inheritdoc */
+  override def active: SparkSession = super.active
 
-  /**
-   * Returns the currently active SparkSession, otherwise the default one. If there is no default
-   * SparkSession, throws an exception.
-   *
-   * @since 2.4.0
-   */
-  def active: SparkSession = {
-    getActiveSession.getOrElse(getDefaultSession.getOrElse(
-      throw SparkException.internalError("No active or default Spark session found")))
-  }
+  override protected def canUseSession(session: SparkSession): Boolean =
+    session.isUsable && !Utils.isInRunningSparkTask
 
   /**
    * Apply modifiable settings to an existing [[SparkSession]]. This method are used
@@ -1271,13 +1018,13 @@ object SparkSession extends Logging {
   private[sql] def getOrCloneSessionWithConfigsOff(
       session: SparkSession,
       configurations: Seq[ConfigEntry[Boolean]]): SparkSession = {
-    val configsEnabled = configurations.filter(session.conf.get[Boolean])
+    val configsEnabled = configurations.filter(session.sessionState.conf.getConf[Boolean])
     if (configsEnabled.isEmpty) {
       session
     } else {
       val newSession = session.cloneSession()
       configsEnabled.foreach(conf => {
-        newSession.conf.set(conf, false)
+        newSession.sessionState.conf.setConf(conf, false)
       })
       newSession
     }
@@ -1294,7 +1041,8 @@ object SparkSession extends Logging {
     if (!listenerRegistered.get()) {
       sparkContext.addSparkListener(new SparkListener {
         override def onApplicationEnd(applicationEnd: SparkListenerApplicationEnd): Unit = {
-          defaultSession.set(null)
+          clearDefaultSession()
+          clearActiveSession()
           listenerRegistered.set(false)
         }
       })
@@ -1302,12 +1050,6 @@ object SparkSession extends Logging {
     }
   }
 
-  /** The active SparkSession for the current thread. */
-  private val activeThreadSession = new InheritableThreadLocal[SparkSession]
-
-  /** Reference to the root SparkSession. */
-  private val defaultSession = new AtomicReference[SparkSession]
-
   private val HIVE_SESSION_STATE_BUILDER_CLASS_NAME =
     "org.apache.spark.sql.hive.HiveSessionStateBuilder"
 
@@ -1376,6 +1118,18 @@ object SparkSession extends Logging {
     }
   }
 
+  /**
+   * Create new session extensions, initialize with the confs set in [[StaticSQLConf]],
+   * and optionally apply the [[SparkSessionExtensionsProvider]] present on the classpath.
+   */
+  private[sql] def applyAndLoadExtensions(sparkContext: SparkContext): SparkSessionExtensions = {
+    val extensions = applyExtensions(sparkContext, new SparkSessionExtensions)
+    if (sparkContext.conf.get(StaticSQLConf.LOAD_SESSION_EXTENSIONS_FROM_CLASSPATH)) {
+      loadExtensions(extensions)
+    }
+    extensions
+  }
+
   /**
    * Initialize extensions specified in [[StaticSQLConf]]. The classes will be applied to the
    * extensions passed into this function.
@@ -1383,7 +1137,7 @@ object SparkSession extends Logging {
   private def applyExtensions(
       sparkContext: SparkContext,
       extensions: SparkSessionExtensions): SparkSessionExtensions = {
-    val extensionConfClassNames = sparkContext.getConf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
+    val extensionConfClassNames = sparkContext.conf.get(StaticSQLConf.SPARK_SESSION_EXTENSIONS)
       .getOrElse(Seq.empty)
     extensionConfClassNames.foreach { extensionConfClassName =>
       try {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
index 677dba0082575..ec85c73c5ce0b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSessionExtensions.scala
@@ -223,6 +223,24 @@ class SparkSessionExtensions {
     resolutionRuleBuilders += builder
   }
 
+  private[this] val hintResolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder]
+
+  /**
+   * Build the analyzer hint resolution rules using the given [[SparkSession]].
+   */
+  private[sql] def buildHintResolutionRules(session: SparkSession): Seq[Rule[LogicalPlan]] = {
+    hintResolutionRuleBuilders.map(_.apply(session)).toSeq
+  }
+
+  /**
+   * Inject an analyzer hint resolution rule builder into the [[SparkSession]]. These analyzer
+   * rules will be executed as part of the early resolution phase of the analyzer, together with
+   * other hint resolution rules.
+   */
+  def injectHintResolutionRule(builder: RuleBuilder): Unit = {
+    hintResolutionRuleBuilders += builder
+  }
+
   private[this] val postHocResolutionRuleBuilders = mutable.Buffer.empty[RuleBuilder]
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
new file mode 100644
index 0000000000000..406b67e6f3b8a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/TableValuedFunction.scala
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedTableValuedFunction
+
+class TableValuedFunction(sparkSession: SparkSession)
+  extends api.TableValuedFunction {
+
+  /** @inheritdoc */
+  override def range(end: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(end)
+  }
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end)
+  }
+
+  /** @inheritdoc */
+  override def range(start: Long, end: Long, step: Long): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end, step)
+  }
+
+  /** @inheritdoc */
+  override def range(
+      start: Long, end: Long, step: Long, numPartitions: Int): Dataset[java.lang.Long] = {
+    sparkSession.range(start, end, step, numPartitions)
+  }
+
+  private def fn(name: String, args: Seq[Column]): Dataset[Row] = {
+    Dataset.ofRows(
+      sparkSession,
+      UnresolvedTableValuedFunction(name, args.map(sparkSession.expression)))
+  }
+
+  /** @inheritdoc */
+  override def explode(collection: Column): Dataset[Row] =
+    fn("explode", Seq(collection))
+
+  /** @inheritdoc */
+  override def explode_outer(collection: Column): Dataset[Row] =
+    fn("explode_outer", Seq(collection))
+
+  /** @inheritdoc */
+  override def inline(input: Column): Dataset[Row] =
+    fn("inline", Seq(input))
+
+  /** @inheritdoc */
+  override def inline_outer(input: Column): Dataset[Row] =
+    fn("inline_outer", Seq(input))
+
+  /** @inheritdoc */
+  override def json_tuple(input: Column, fields: Column*): Dataset[Row] =
+    fn("json_tuple", input +: fields)
+
+  /** @inheritdoc */
+  override def posexplode(collection: Column): Dataset[Row] =
+    fn("posexplode", Seq(collection))
+
+  /** @inheritdoc */
+  override def posexplode_outer(collection: Column): Dataset[Row] =
+    fn("posexplode_outer", Seq(collection))
+
+  /** @inheritdoc */
+  override def stack(n: Column, fields: Column*): Dataset[Row] =
+    fn("stack", n +: fields)
+
+  /** @inheritdoc */
+  override def collations(): Dataset[Row] =
+    fn("collations", Seq.empty)
+
+  /** @inheritdoc */
+  override def sql_keywords(): Dataset[Row] =
+    fn("sql_keywords", Seq.empty)
+
+  /** @inheritdoc */
+  override def variant_explode(input: Column): Dataset[Row] =
+    fn("variant_explode", Seq(input))
+
+  /** @inheritdoc */
+  override def variant_explode_outer(input: Column): Dataset[Row] =
+    fn("variant_explode_outer", Seq(input))
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
index d0d5beee9945a..6715673cf3d1c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDFRegistration.scala
@@ -19,24 +19,19 @@ package org.apache.spark.sql
 
 import java.lang.reflect.ParameterizedType
 
-import scala.reflect.runtime.universe.TypeTag
-import scala.util.Try
-
 import org.apache.spark.annotation.Stable
 import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.api.java._
-import org.apache.spark.sql.catalyst.{JavaTypeInference, ScalaReflection}
+import org.apache.spark.sql.catalyst.JavaTypeInference
 import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
+import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.aggregate.ScalaUDAF
+import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF}
 import org.apache.spark.sql.execution.python.UserDefinedPythonFunction
 import org.apache.spark.sql.expressions.{SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator, UserDefinedFunction}
+import org.apache.spark.sql.internal.UserDefinedFunctionUtils.toScalaUDF
 import org.apache.spark.sql.types.DataType
-import org.apache.spark.util.Utils
 
 /**
  * Functions for registering user-defined functions. Use `SparkSession.udf` to access this:
@@ -48,10 +43,9 @@ import org.apache.spark.util.Utils
  * @since 1.3.0
  */
 @Stable
-class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends Logging {
-
-  import UDFRegistration._
-
+class UDFRegistration private[sql] (session: SparkSession, functionRegistry: FunctionRegistry)
+  extends api.UDFRegistration
+  with Logging {
   protected[sql] def registerPython(name: String, udf: UserDefinedPythonFunction): Unit = {
     log.debug(
       s"""
@@ -75,10 +69,10 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
    * @param name the name of the UDAF.
    * @param udaf the UDAF needs to be registered.
    * @return the registered UDAF.
-   *
    * @since 1.5.0
    * @deprecated this method and the use of UserDefinedAggregateFunction are deprecated.
-   * Aggregator[IN, BUF, OUT] should now be registered as a UDF via the functions.udaf(agg) method.
+   *             Aggregator[IN, BUF, OUT] should now be registered as a UDF via the
+   *             functions.udaf(agg) method.
    */
   @deprecated("Aggregator[IN, BUF, OUT] should now be registered as a UDF" +
     " via the functions.udaf(agg) method.", "3.0.0")
@@ -88,570 +82,64 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
     udaf
   }
 
-  /**
-   * Registers a user-defined function (UDF), for a UDF that's already defined using the Dataset
-   * API (i.e. of type UserDefinedFunction). To change a UDF to nondeterministic, call the API
-   * `UserDefinedFunction.asNondeterministic()`. To change a UDF to nonNullable, call the API
-   * `UserDefinedFunction.asNonNullable()`.
-   *
-   * Example:
-   * {{{
-   *   val foo = udf(() => Math.random())
-   *   spark.udf.register("random", foo.asNondeterministic())
-   *
-   *   val bar = udf(() => "bar")
-   *   spark.udf.register("stringLit", bar.asNonNullable())
-   * }}}
-   *
-   * @param name the name of the UDF.
-   * @param udf the UDF needs to be registered.
-   * @return the registered UDF.
-   *
-   * @since 2.2.0
-   */
-  def register(name: String, udf: UserDefinedFunction): UserDefinedFunction = {
-    udf.withName(name) match {
+  override protected def register(
+      name: String,
+      udf: UserDefinedFunction,
+      source: String,
+      validateParameterCount: Boolean): UserDefinedFunction = {
+    val named = udf.withName(name)
+    val builder: Seq[Expression] => Expression = named match {
       case udaf: UserDefinedAggregator[_, _, _] =>
-        def builder(children: Seq[Expression]) = udaf.scalaAggregator(children)
-        functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-        udaf
-      case other =>
-        def builder(children: Seq[Expression]) = other.apply(children.map(Column.apply) : _*).expr
-        functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-        other
-    }
-  }
-
-  // scalastyle:off line.size.limit
-
-  /* register 0-22 were generated by this script
-
-    (0 to 22).foreach { x =>
-      val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
-      val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
-      val inputEncoders = (1 to x).foldRight("Nil")((i, s) => {s"Try(ExpressionEncoder[A$i]()).toOption :: $s"})
-      println(s"""
-        |/**
-        | * Registers a deterministic Scala closure of $x arguments as user-defined function (UDF).
-        | * @tparam RT return type of UDF.
-        | * @since 1.3.0
-        | */
-        |def register[$typeTags](name: String, func: Function$x[$types]): UserDefinedFunction = {
-        |  val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-        |  val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-        |  val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = $inputEncoders
-        |  val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-        |  val finalUdf = if (nullable) udf else udf.asNonNullable()
-        |  def builder(e: Seq[Expression]) = if (e.length == $x) {
-        |    finalUdf.createScalaUDF(e)
-        |  } else {
-        |    throw QueryCompilationErrors.wrongNumArgsError(name, "$x", e.length)
-        |  }
-        |  functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-        |  finalUdf
-        |}""".stripMargin)
-    }
-
-    (0 to 22).foreach { i =>
-      val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
-      val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
-      val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
-      val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
-      val version = if (i == 0) "2.3.0" else "1.3.0"
-      val funcCall = if (i == 0) s"() => f$anyCast.call($anyParams)" else s"f$anyCast.call($anyParams)"
-      println(s"""
-        |/**
-        | * Register a deterministic Java UDF$i instance as user-defined function (UDF).
-        | * @since $version
-        | */
-        |def register(name: String, f: UDF$i[$extTypeArgs], returnType: DataType): Unit = {
-        |  val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-        |  val func = $funcCall
-        |  def builder(e: Seq[Expression]) = if (e.length == $i) {
-        |    ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-        |  } else {
-        |    throw QueryCompilationErrors.wrongNumArgsError(name, "$i", e.length)
-        |  }
-        |  functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-        |}""".stripMargin)
-    }
-    */
-
-  /**
-   * Registers a deterministic Scala closure of 0 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag](name: String, func: Function0[RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 0) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "0", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 1 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag](name: String, func: Function1[A1, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 1) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "1", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 2 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag](name: String, func: Function2[A1, A2, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 2) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "2", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 3 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](name: String, func: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 3) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "3", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 4 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](name: String, func: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 4) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "4", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 5 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](name: String, func: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 5) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "5", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 6 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](name: String, func: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 6) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "6", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 7 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](name: String, func: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 7) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "7", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 8 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](name: String, func: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 8) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "8", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 9 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](name: String, func: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 9) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "9", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 10 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](name: String, func: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 10) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "10", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 11 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag](name: String, func: Function11[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 11) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "11", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 12 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag](name: String, func: Function12[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 12) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "12", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 13 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag](name: String, func: Function13[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 13) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "13", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 14 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag](name: String, func: Function14[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 14) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "14", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 15 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag](name: String, func: Function15[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 15) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "15", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 16 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag](name: String, func: Function16[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 16) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "16", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 17 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag](name: String, func: Function17[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 17) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "17", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 18 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag](name: String, func: Function18[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Try(ExpressionEncoder[A18]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 18) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "18", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 19 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag](name: String, func: Function19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Try(ExpressionEncoder[A18]()).toOption :: Try(ExpressionEncoder[A19]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 19) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "19", e.length)
+        ScalaAggregator(udaf, _)
+      case udf: SparkUserDefinedFunction if validateParameterCount =>
+        val expectedParameterCount = udf.inputEncoders.size
+        children => {
+          val actualParameterCount = children.length
+          if (expectedParameterCount == actualParameterCount) {
+            toScalaUDF(udf, children)
+          } else {
+            throw QueryCompilationErrors.wrongNumArgsError(
+              name,
+              expectedParameterCount.toString,
+              actualParameterCount)
+          }
+        }
+      case udf: SparkUserDefinedFunction =>
+        toScalaUDF(udf, _)
     }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
+    functionRegistry.createOrReplaceTempFunction(name, builder, source)
+    named
   }
 
-  /**
-   * Registers a deterministic Scala closure of 20 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag](name: String, func: Function20[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Try(ExpressionEncoder[A18]()).toOption :: Try(ExpressionEncoder[A19]()).toOption :: Try(ExpressionEncoder[A20]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 20) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "20", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
 
   /**
-   * Registers a deterministic Scala closure of 21 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
-   */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag](name: String, func: Function21[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Try(ExpressionEncoder[A18]()).toOption :: Try(ExpressionEncoder[A19]()).toOption :: Try(ExpressionEncoder[A20]()).toOption :: Try(ExpressionEncoder[A21]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 21) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "21", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
-  }
-
-  /**
-   * Registers a deterministic Scala closure of 22 arguments as user-defined function (UDF).
-   * @tparam RT return type of UDF.
-   * @since 1.3.0
+   * Register a Java UDAF class using reflection, for use from pyspark
+   *
+   * @param name      UDAF name
+   * @param className fully qualified class name of UDAF
    */
-  def register[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag, A11: TypeTag, A12: TypeTag, A13: TypeTag, A14: TypeTag, A15: TypeTag, A16: TypeTag, A17: TypeTag, A18: TypeTag, A19: TypeTag, A20: TypeTag, A21: TypeTag, A22: TypeTag](name: String, func: Function22[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, A20, A21, A22, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders: Seq[Option[ExpressionEncoder[_]]] = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Try(ExpressionEncoder[A11]()).toOption :: Try(ExpressionEncoder[A12]()).toOption :: Try(ExpressionEncoder[A13]()).toOption :: Try(ExpressionEncoder[A14]()).toOption :: Try(ExpressionEncoder[A15]()).toOption :: Try(ExpressionEncoder[A16]()).toOption :: Try(ExpressionEncoder[A17]()).toOption :: Try(ExpressionEncoder[A18]()).toOption :: Try(ExpressionEncoder[A19]()).toOption :: Try(ExpressionEncoder[A20]()).toOption :: Try(ExpressionEncoder[A21]()).toOption :: Try(ExpressionEncoder[A22]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(func, dataType, inputEncoders, outputEncoder).withName(name)
-    val finalUdf = if (nullable) udf else udf.asNonNullable()
-    def builder(e: Seq[Expression]) = if (e.length == 22) {
-      finalUdf.createScalaUDF(e)
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "22", e.length)
+  private[sql] def registerJavaUDAF(name: String, className: String): Unit = {
+    try {
+      val clazz = session.artifactManager.classloader.loadClass(className)
+      if (!classOf[UserDefinedAggregateFunction].isAssignableFrom(clazz)) {
+        throw QueryCompilationErrors
+          .classDoesNotImplementUserDefinedAggregateFunctionError(className)
+      }
+      val udaf = clazz.getConstructor().newInstance().asInstanceOf[UserDefinedAggregateFunction]
+      register(name, udaf)
+    } catch {
+      case _: ClassNotFoundException =>
+        throw QueryCompilationErrors.cannotLoadClassNotOnClassPathError(className)
+      case _: InstantiationException | _: IllegalArgumentException =>
+        throw QueryCompilationErrors.classWithoutPublicNonArgumentConstructorError(className)
     }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "scala_udf")
-    finalUdf
   }
 
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Register a Java UDF class using reflection, for use from pyspark
-   *
-   * @param name   udf name
-   * @param className   fully qualified class name of udf
-   * @param returnDataType  return type of udf. If it is null, spark would try to infer
-   *                        via reflection.
-   */
-  private[sql] def registerJava(name: String, className: String, returnDataType: DataType): Unit = {
+  // scalastyle:off line.size.limit
 
+  override def registerJava(name: String, className: String, returnDataType: DataType): Unit = {
     try {
-      val clazz = Utils.classForName[AnyRef](className)
+      val clazz = session.artifactManager.classloader.loadClass(className)
       val udfInterfaces = clazz.getGenericInterfaces
         .filter(_.isInstanceOf[ParameterizedType])
         .map(_.asInstanceOf[ParameterizedType])
@@ -697,396 +185,13 @@ class UDFRegistration private[sql] (functionRegistry: FunctionRegistry) extends
               throw QueryCompilationErrors.udfClassWithTooManyTypeArgumentsError(n)
           }
         } catch {
-          case e @ (_: InstantiationException | _: IllegalArgumentException) =>
+          case _: InstantiationException | _: IllegalArgumentException =>
             throw QueryCompilationErrors.classWithoutPublicNonArgumentConstructorError(className)
         }
       }
     } catch {
-      case e: ClassNotFoundException => throw QueryCompilationErrors.cannotLoadClassNotOnClassPathError(className)
-    }
-
-  }
-
-  /**
-   * Register a Java UDAF class using reflection, for use from pyspark
-   *
-   * @param name     UDAF name
-   * @param className    fully qualified class name of UDAF
-   */
-  private[sql] def registerJavaUDAF(name: String, className: String): Unit = {
-    try {
-      val clazz = Utils.classForName[AnyRef](className)
-      if (!classOf[UserDefinedAggregateFunction].isAssignableFrom(clazz)) {
-        throw QueryCompilationErrors.classDoesNotImplementUserDefinedAggregateFunctionError(className)
-      }
-      val udaf = clazz.getConstructor().newInstance().asInstanceOf[UserDefinedAggregateFunction]
-      register(name, udaf)
-    } catch {
-      case e: ClassNotFoundException => throw QueryCompilationErrors.cannotLoadClassNotOnClassPathError(className)
-      case e @ (_: InstantiationException | _: IllegalArgumentException) =>
-        throw QueryCompilationErrors.classWithoutPublicNonArgumentConstructorError(className)
-    }
-  }
-
-  /**
-   * Register a deterministic Java UDF0 instance as user-defined function (UDF).
-   * @since 2.3.0
-   */
-  def register(name: String, f: UDF0[_], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = () => f.asInstanceOf[UDF0[Any]].call()
-    def builder(e: Seq[Expression]) = if (e.length == 0) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "0", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF1 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF1[_, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 1) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "1", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF2 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF2[_, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 2) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "2", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF3 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF3[_, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 3) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "3", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF4 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF4[_, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 4) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "4", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF5 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF5[_, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 5) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "5", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF6 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF6[_, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 6) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "6", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF7 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 7) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "7", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF8 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 8) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "8", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF9 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 9) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "9", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF10 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 10) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "10", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF11 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF11[_, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF11[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 11) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "11", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF12 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF12[_, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF12[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 12) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "12", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF13 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF13[_, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF13[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 13) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "13", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF14 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF14[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF14[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 14) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "14", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF15 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF15[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF15[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 15) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "15", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF16 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF16[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF16[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 16) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "16", e.length)
+      case _: ClassNotFoundException => throw QueryCompilationErrors.cannotLoadClassNotOnClassPathError(className)
     }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
   }
-
-  /**
-   * Register a deterministic Java UDF17 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF17[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF17[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 17) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "17", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF18 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF18[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF18[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 18) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "18", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF19 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF19[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF19[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 19) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "19", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF20 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF20[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF20[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 20) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "20", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF21 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF21[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF21[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 21) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "21", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
-  /**
-   * Register a deterministic Java UDF22 instance as user-defined function (UDF).
-   * @since 1.3.0
-   */
-  def register(name: String, f: UDF22[_, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _, _], returnType: DataType): Unit = {
-    val replaced = CharVarcharUtils.failIfHasCharVarchar(returnType)
-    val func = f.asInstanceOf[UDF22[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    def builder(e: Seq[Expression]) = if (e.length == 22) {
-      ScalaUDF(func, replaced, e, Nil, udfName = Some(name))
-    } else {
-      throw QueryCompilationErrors.wrongNumArgsError(name, "22", e.length)
-    }
-    functionRegistry.createOrReplaceTempFunction(name, builder, "java_udf")
-  }
-
   // scalastyle:on line.size.limit
-
-}
-
-private[sql] object UDFRegistration {
-  /**
-   * Obtaining the schema of output encoder for `ScalaUDF`.
-   *
-   * As the serialization in `ScalaUDF` is for individual column, not the whole row,
-   * we just take the data type of vanilla object serializer, not `serializer` which
-   * is transformed somehow for top-level row.
-   */
-  def outputSchema(outputEncoder: ExpressionEncoder[_]): ScalaReflection.Schema = {
-    ScalaReflection.Schema(outputEncoder.objSerializer.dataType,
-      outputEncoder.objSerializer.nullable)
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/UDTFRegistration.scala b/sql/core/src/main/scala/org/apache/spark/sql/UDTFRegistration.scala
index b1666f2475812..06f103c0d69af 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/UDTFRegistration.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/UDTFRegistration.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.execution.python.UserDefinedPythonTableFunction
  * @since 3.5.0
  */
 @Evolving
-class UDTFRegistration private[sql] (tableFunctionRegistry: TableFunctionRegistry)
+private[sql] class UDTFRegistration private[sql] (tableFunctionRegistry: TableFunctionRegistry)
   extends Logging {
 
   protected[sql] def registerPython(name: String, udtf: UserDefinedPythonTableFunction): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
index 79c5249b3669d..a66a6e54a7c8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/api/python/PythonSQLUtils.scala
@@ -27,16 +27,16 @@ import org.apache.spark.api.python.DechunkedInputStream
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.CLASS_LOADER
 import org.apache.spark.security.SocketAuthServer
-import org.apache.spark.sql.{Column, DataFrame, Row, SparkSession}
+import org.apache.spark.sql.{internal, Column, DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, TableFunctionRegistry}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.execution.{ExplainMode, QueryExecution}
 import org.apache.spark.sql.execution.arrow.ArrowConverters
 import org.apache.spark.sql.execution.python.EvaluatePython
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.util.{MutableURLClassLoader, Utils}
@@ -68,7 +68,10 @@ private[sql] object PythonSQLUtils extends Logging {
 
   // This is needed when generating SQL documentation for built-in functions.
   def listBuiltinFunctionInfos(): Array[ExpressionInfo] = {
-    FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)).toArray
+    (FunctionRegistry.functionSet.flatMap(f => FunctionRegistry.builtin.lookupFunction(f)) ++
+      TableFunctionRegistry.functionSet.flatMap(
+        f => TableFunctionRegistry.builtin.lookupFunction(f))).
+      groupBy(_.getName).map(v => v._2.head).toArray
   }
 
   private def listAllSQLConfigs(): Seq[(String, String, String, String)] = {
@@ -140,40 +143,22 @@ private[sql] object PythonSQLUtils extends Logging {
     }
   }
 
-  def castTimestampNTZToLong(c: Column): Column = Column(CastTimestampNTZToLong(c.expr))
+  def unresolvedNamedLambdaVariable(name: String): Column =
+    Column(internal.UnresolvedNamedLambdaVariable.apply(name))
 
-  def ewm(e: Column, alpha: Double, ignoreNA: Boolean): Column =
-    Column(EWM(e.expr, alpha, ignoreNA))
-
-  def nullIndex(e: Column): Column = Column(NullIndex(e.expr))
-
-  def pandasProduct(e: Column, ignoreNA: Boolean): Column = {
-    Column(PandasProduct(e.expr, ignoreNA).toAggregateExpression(false))
-  }
-
-  def pandasStddev(e: Column, ddof: Int): Column = {
-    Column(PandasStddev(e.expr, ddof).toAggregateExpression(false))
+  @scala.annotation.varargs
+  def lambdaFunction(function: Column, variables: Column*): Column = {
+    val arguments = variables.map(_.node.asInstanceOf[internal.UnresolvedNamedLambdaVariable])
+    Column(internal.LambdaFunction(function.node, arguments))
   }
 
-  def pandasVariance(e: Column, ddof: Int): Column = {
-    Column(PandasVariance(e.expr, ddof).toAggregateExpression(false))
-  }
+  def namedArgumentExpression(name: String, e: Column): Column = NamedArgumentExpression(name, e)
 
-  def pandasSkewness(e: Column): Column = {
-    Column(PandasSkewness(e.expr).toAggregateExpression(false))
-  }
+  @scala.annotation.varargs
+  def fn(name: String, arguments: Column*): Column = Column.fn(name, arguments: _*)
 
-  def pandasKurtosis(e: Column): Column = {
-    Column(PandasKurtosis(e.expr).toAggregateExpression(false))
-  }
-
-  def pandasMode(e: Column, ignoreNA: Boolean): Column = {
-    Column(PandasMode(e.expr, ignoreNA).toAggregateExpression(false))
-  }
-
-  def pandasCovar(col1: Column, col2: Column, ddof: Int): Column = {
-    Column(PandasCovar(col1.expr, col2.expr, ddof).toAggregateExpression(false))
-  }
+  @scala.annotation.varargs
+  def internalFn(name: String, inputs: Column*): Column = Column.internalFn(name, inputs: _*)
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
index ea7a33f6887d9..0aae5a43ca40f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/artifact/ArtifactManager.scala
@@ -19,8 +19,10 @@ package org.apache.spark.sql.artifact
 
 import java.io.File
 import java.net.{URI, URL, URLClassLoader}
-import java.nio.file.{Files, Path, Paths, StandardCopyOption}
+import java.nio.ByteBuffer
+import java.nio.file.{CopyOption, Files, Path, Paths, StandardCopyOption}
 import java.util.concurrent.CopyOnWriteArrayList
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.jdk.CollectionConverters._
 import scala.reflect.ClassTag
@@ -28,13 +30,13 @@ import scala.reflect.ClassTag
 import org.apache.commons.io.{FilenameUtils, FileUtils}
 import org.apache.hadoop.fs.{LocalFileSystem, Path => FSPath}
 
-import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkEnv, SparkUnsupportedOperationException}
+import org.apache.spark.{JobArtifactSet, JobArtifactState, SparkEnv, SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.internal.Logging
 import org.apache.spark.internal.config.{CONNECT_SCALA_UDF_STUB_PREFIXES, EXECUTOR_USER_CLASS_PATH_FIRST}
-import org.apache.spark.sql.SparkSession
-import org.apache.spark.sql.artifact.util.ArtifactUtils
+import org.apache.spark.sql.{Artifact, SparkSession}
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.storage.{CacheId, StorageLevel}
+import org.apache.spark.sql.util.ArtifactUtils
+import org.apache.spark.storage.{BlockManager, CacheId, StorageLevel}
 import org.apache.spark.util.{ChildFirstURLClassLoader, StubClassLoader, Utils}
 
 /**
@@ -67,27 +69,59 @@ class ArtifactManager(session: SparkSession) extends Logging {
       s"$artifactRootURI/${session.sessionUUID}")
 
   // The base directory/URI where all class file artifacts are stored for this `sessionUUID`.
-  protected[artifact] val (classDir, classURI): (Path, String) =
+  protected[artifact] val (classDir, replClassURI): (Path, String) =
     (ArtifactUtils.concatenatePaths(artifactPath, "classes"), s"$artifactURI/classes/")
 
-  protected[artifact] val state: JobArtifactState =
-    JobArtifactState(session.sessionUUID, Option(classURI))
+  private lazy val alwaysApplyClassLoader =
+    session.conf.get(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER.key).toBoolean
 
-  def withResources[T](f: => T): T = {
-    Utils.withContextClassLoader(classloader) {
-      JobArtifactSet.withActiveJobArtifactState(state) {
+  private lazy val sessionIsolated =
+    session.conf.get(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED.key).toBoolean
+
+  protected[sql] lazy val state: JobArtifactState =
+    if (sessionIsolated) JobArtifactState(session.sessionUUID, Some(replClassURI)) else null
+
+  /**
+   * Whether any artifact has been added to this artifact manager. We use this to determine whether
+   * we should apply the classloader to the session, see `withClassLoaderIfNeeded`.
+   */
+  protected val sessionArtifactAdded = new AtomicBoolean(false)
+
+  private def withClassLoaderIfNeeded[T](f: => T): T = {
+    val log = s" classloader for session ${session.sessionUUID} because " +
+      s"alwaysApplyClassLoader=$alwaysApplyClassLoader, " +
+      s"sessionArtifactAdded=${sessionArtifactAdded.get()}."
+    if (alwaysApplyClassLoader || sessionArtifactAdded.get()) {
+      logDebug(s"Applying $log")
+      Utils.withContextClassLoader(classloader) {
         f
       }
+    } else {
+      logDebug(s"Not applying $log")
+      f
     }
   }
 
+  def withResources[T](f: => T): T = withClassLoaderIfNeeded {
+    JobArtifactSet.withActiveJobArtifactState(state) {
+      f
+    }
+  }
+
+  protected val cachedBlockIdList = new CopyOnWriteArrayList[CacheId]
   protected val jarsList = new CopyOnWriteArrayList[Path]
   protected val pythonIncludeList = new CopyOnWriteArrayList[String]
+  protected val sparkContextRelativePaths =
+    new CopyOnWriteArrayList[(SparkContextResourceType.ResourceType, Path, Option[String])]
 
   /**
    * Get the URLs of all jar artifacts.
    */
-  def getAddedJars: Seq[URL] = jarsList.asScala.map(_.toUri.toURL).toSeq
+  def getAddedJars: Seq[URL] = jarsList
+    .asScala
+    .map(ArtifactUtils.concatenatePaths(artifactPath, _))
+    .map(_.toUri.toURL)
+    .toSeq
 
   /**
    * Get the py-file names added to this SparkSession.
@@ -96,15 +130,30 @@ class ArtifactManager(session: SparkSession) extends Logging {
    */
   def getPythonIncludes: Seq[String] = pythonIncludeList.asScala.toSeq
 
-  protected def moveFile(source: Path, target: Path, allowOverwrite: Boolean = false): Unit = {
+  private def transferFile(
+      source: Path,
+      target: Path,
+      allowOverwrite: Boolean = false,
+      deleteSource: Boolean = true): Unit = {
+    def execute(s: Path, t: Path, opt: CopyOption*): Path =
+      if (deleteSource) Files.move(s, t, opt: _*) else Files.copy(s, t, opt: _*)
+
     Files.createDirectories(target.getParent)
     if (allowOverwrite) {
-      Files.move(source, target, StandardCopyOption.REPLACE_EXISTING)
+      execute(source, target, StandardCopyOption.REPLACE_EXISTING)
     } else {
-      Files.move(source, target)
+      execute(source, target)
     }
   }
 
+  private def normalizePath(path: Path): Path = {
+    // Convert the path to a string with the current system's separator
+    val normalizedPathString = path.toString
+      .replace('/', File.separatorChar)
+      .replace('\\', File.separatorChar)
+    // Convert the normalized string back to a Path object
+    Paths.get(normalizedPathString).normalize()
+  }
   /**
    * Add and prepare a staged artifact (i.e an artifact that has been rebuilt locally from bytes
    * over the wire) for use.
@@ -112,19 +161,23 @@ class ArtifactManager(session: SparkSession) extends Logging {
    * @param remoteRelativePath
    * @param serverLocalStagingPath
    * @param fragment
+   * @param deleteStagedFile
    */
   def addArtifact(
       remoteRelativePath: Path,
       serverLocalStagingPath: Path,
-      fragment: Option[String]): Unit = JobArtifactSet.withActiveJobArtifactState(state) {
+      fragment: Option[String],
+      deleteStagedFile: Boolean = true
+  ): Unit = JobArtifactSet.withActiveJobArtifactState(state) {
     require(!remoteRelativePath.isAbsolute)
-    if (remoteRelativePath.startsWith(s"cache${File.separator}")) {
+    val normalizedRemoteRelativePath = normalizePath(remoteRelativePath)
+    if (normalizedRemoteRelativePath.startsWith(s"cache${File.separator}")) {
       val tmpFile = serverLocalStagingPath.toFile
       Utils.tryWithSafeFinallyAndFailureCallbacks {
         val blockManager = session.sparkContext.env.blockManager
         val blockId = CacheId(
           sessionUUID = session.sessionUUID,
-          hash = remoteRelativePath.toString.stripPrefix(s"cache${File.separator}"))
+          hash = normalizedRemoteRelativePath.toString.stripPrefix(s"cache${File.separator}"))
         val updater = blockManager.TempFileBasedBlockStoreUpdater(
           blockId = blockId,
           level = StorageLevel.MEMORY_AND_DISK_SER,
@@ -133,18 +186,24 @@ class ArtifactManager(session: SparkSession) extends Logging {
           blockSize = tmpFile.length(),
           tellMaster = false)
         updater.save()
-      }(catchBlock = { tmpFile.delete() })
-    } else if (remoteRelativePath.startsWith(s"classes${File.separator}")) {
+        cachedBlockIdList.add(blockId)
+      }(finallyBlock = { tmpFile.delete() })
+    } else if (normalizedRemoteRelativePath.startsWith(s"classes${File.separator}")) {
       // Move class files to the right directory.
       val target = ArtifactUtils.concatenatePaths(
         classDir,
-        remoteRelativePath.toString.stripPrefix(s"classes${File.separator}"))
+        normalizedRemoteRelativePath.toString.stripPrefix(s"classes${File.separator}"))
       // Allow overwriting class files to capture updates to classes.
       // This is required because the client currently sends all the class files in each class file
       // transfer.
-      moveFile(serverLocalStagingPath, target, allowOverwrite = true)
+      transferFile(
+        serverLocalStagingPath,
+        target,
+        allowOverwrite = true,
+        deleteSource = deleteStagedFile)
+      sessionArtifactAdded.set(true)
     } else {
-      val target = ArtifactUtils.concatenatePaths(artifactPath, remoteRelativePath)
+      val target = ArtifactUtils.concatenatePaths(artifactPath, normalizedRemoteRelativePath)
       // Disallow overwriting with modified version
       if (Files.exists(target)) {
         // makes the query idempotent
@@ -152,31 +211,72 @@ class ArtifactManager(session: SparkSession) extends Logging {
           return
         }
 
-        throw new RuntimeException(s"Duplicate Artifact: $remoteRelativePath. " +
+        throw new RuntimeException(s"Duplicate Artifact: $normalizedRemoteRelativePath. " +
             "Artifacts cannot be overwritten.")
       }
-      moveFile(serverLocalStagingPath, target)
+      transferFile(serverLocalStagingPath, target, deleteSource = deleteStagedFile)
 
       // This URI is for Spark file server that starts with "spark://".
       val uri = s"$artifactURI/${Utils.encodeRelativeUnixPathToURIRawPath(
-          FilenameUtils.separatorsToUnix(remoteRelativePath.toString))}"
+          FilenameUtils.separatorsToUnix(normalizedRemoteRelativePath.toString))}"
 
-      if (remoteRelativePath.startsWith(s"jars${File.separator}")) {
+      if (normalizedRemoteRelativePath.startsWith(s"jars${File.separator}")) {
         session.sparkContext.addJar(uri)
-        jarsList.add(target)
-      } else if (remoteRelativePath.startsWith(s"pyfiles${File.separator}")) {
+        sparkContextRelativePaths.add(
+          (SparkContextResourceType.JAR, normalizedRemoteRelativePath, fragment))
+        jarsList.add(normalizedRemoteRelativePath)
+        sessionArtifactAdded.set(true)
+      } else if (normalizedRemoteRelativePath.startsWith(s"pyfiles${File.separator}")) {
         session.sparkContext.addFile(uri)
-        val stringRemotePath = remoteRelativePath.toString
+        sparkContextRelativePaths.add(
+          (SparkContextResourceType.FILE, normalizedRemoteRelativePath, fragment))
+        val stringRemotePath = normalizedRemoteRelativePath.toString
         if (stringRemotePath.endsWith(".zip") || stringRemotePath.endsWith(
             ".egg") || stringRemotePath.endsWith(".jar")) {
           pythonIncludeList.add(target.getFileName.toString)
         }
-      } else if (remoteRelativePath.startsWith(s"archives${File.separator}")) {
+      } else if (normalizedRemoteRelativePath.startsWith(s"archives${File.separator}")) {
         val canonicalUri =
           fragment.map(Utils.getUriBuilder(new URI(uri)).fragment).getOrElse(new URI(uri))
         session.sparkContext.addArchive(canonicalUri.toString)
-      } else if (remoteRelativePath.startsWith(s"files${File.separator}")) {
+        sparkContextRelativePaths.add(
+          (SparkContextResourceType.ARCHIVE, normalizedRemoteRelativePath, fragment))
+      } else if (normalizedRemoteRelativePath.startsWith(s"files${File.separator}")) {
         session.sparkContext.addFile(uri)
+        sparkContextRelativePaths.add(
+          (SparkContextResourceType.FILE, normalizedRemoteRelativePath, fragment))
+      }
+    }
+  }
+
+  /**
+   * Add locally-stored artifacts to the session. These artifacts are from a user-provided
+   * permanent path which are accessible by the driver directly.
+   *
+   * Different from the [[addArtifact]] method, this method will not delete staged artifacts since
+   * they are from a permanent location.
+   */
+  private[sql] def addLocalArtifacts(artifacts: Seq[Artifact]): Unit = {
+    artifacts.foreach { artifact =>
+      artifact.storage match {
+        case d: Artifact.LocalFile =>
+          addArtifact(
+            artifact.path,
+            d.path,
+            fragment = None,
+            deleteStagedFile = false)
+        case d: Artifact.InMemory =>
+          val tempDir = Utils.createTempDir().toPath
+          val tempFile = tempDir.resolve(artifact.path.getFileName)
+          val outStream = Files.newOutputStream(tempFile)
+          Utils.tryWithSafeFinallyAndFailureCallbacks {
+            d.stream.transferTo(outStream)
+            addArtifact(artifact.path, tempFile, fragment = None)
+          }(finallyBlock = {
+            outStream.close()
+          })
+        case _ =>
+          throw SparkException.internalError(s"Unsupported artifact storage: ${artifact.storage}")
       }
     }
   }
@@ -185,9 +285,10 @@ class ArtifactManager(session: SparkSession) extends Logging {
    * Returns a [[ClassLoader]] for session-specific jar/class file resources.
    */
   def classloader: ClassLoader = {
-    val urls = getAddedJars :+ classDir.toUri.toURL
+    val urls = (getAddedJars :+ classDir.toUri.toURL).toArray
     val prefixes = SparkEnv.get.conf.get(CONNECT_SCALA_UDF_STUB_PREFIXES)
     val userClasspathFirst = SparkEnv.get.conf.get(EXECUTOR_USER_CLASS_PATH_FIRST)
+    val fallbackClassLoader = session.sharedState.jarClassLoader
     val loader = if (prefixes.nonEmpty) {
       // Two things you need to know about classloader for all of this to make sense:
       // 1. A classloader needs to be able to fully define a class.
@@ -201,21 +302,16 @@ class ArtifactManager(session: SparkSession) extends Logging {
       // it delegates to.
       if (userClasspathFirst) {
         // USER -> SYSTEM -> STUB
-        new ChildFirstURLClassLoader(
-          urls.toArray,
-          StubClassLoader(Utils.getContextOrSparkClassLoader, prefixes))
+        new ChildFirstURLClassLoader(urls, StubClassLoader(fallbackClassLoader, prefixes))
       } else {
         // SYSTEM -> USER -> STUB
-        new ChildFirstURLClassLoader(
-          urls.toArray,
-          StubClassLoader(null, prefixes),
-          Utils.getContextOrSparkClassLoader)
+        new ChildFirstURLClassLoader(urls, StubClassLoader(null, prefixes), fallbackClassLoader)
       }
     } else {
       if (userClasspathFirst) {
-        new ChildFirstURLClassLoader(urls.toArray, Utils.getContextOrSparkClassLoader)
+        new ChildFirstURLClassLoader(urls, fallbackClassLoader)
       } else {
-        new URLClassLoader(urls.toArray, Utils.getContextOrSparkClassLoader)
+        new URLClassLoader(urls, fallbackClassLoader)
       }
     }
 
@@ -223,6 +319,47 @@ class ArtifactManager(session: SparkSession) extends Logging {
     loader
   }
 
+  private[sql] def clone(newSession: SparkSession): ArtifactManager = {
+    val sparkContext = session.sparkContext
+    val newArtifactManager = new ArtifactManager(newSession)
+    if (artifactPath.toFile.exists()) {
+      FileUtils.copyDirectory(artifactPath.toFile, newArtifactManager.artifactPath.toFile)
+    }
+    val blockManager = sparkContext.env.blockManager
+    val newBlockIds = cachedBlockIdList.asScala.map { blockId =>
+      val newBlockId = blockId.copy(sessionUUID = newSession.sessionUUID)
+      copyBlock(blockId, newBlockId, blockManager)
+    }
+
+    // Re-register resources to SparkContext
+    JobArtifactSet.withActiveJobArtifactState(newArtifactManager.state) {
+      sparkContextRelativePaths.forEach { case (resourceType, relativePath, fragment) =>
+        val uri = s"${newArtifactManager.artifactURI}/${
+          Utils.encodeRelativeUnixPathToURIRawPath(
+            FilenameUtils.separatorsToUnix(relativePath.toString))
+        }"
+        resourceType match {
+          case SparkContextResourceType.JAR =>
+            sparkContext.addJar(uri)
+          case SparkContextResourceType.FILE =>
+            sparkContext.addFile(uri)
+          case SparkContextResourceType.ARCHIVE =>
+            val canonicalUri =
+              fragment.map(Utils.getUriBuilder(new URI(uri)).fragment).getOrElse(new URI(uri))
+            sparkContext.addArchive(canonicalUri.toString)
+          case _ =>
+            throw SparkException.internalError(s"Unsupported resource type: $resourceType")
+        }
+      }
+    }
+
+    newArtifactManager.cachedBlockIdList.addAll(newBlockIds.asJava)
+    newArtifactManager.jarsList.addAll(jarsList)
+    newArtifactManager.pythonIncludeList.addAll(pythonIncludeList)
+    newArtifactManager.sparkContextRelativePaths.addAll(sparkContextRelativePaths)
+    newArtifactManager
+  }
+
   /**
    * Cleans up all resources specific to this `session`.
    */
@@ -233,14 +370,16 @@ class ArtifactManager(session: SparkSession) extends Logging {
     // Clean up added files
     val fileserver = SparkEnv.get.rpcEnv.fileServer
     val sparkContext = session.sparkContext
-    val shouldUpdateEnv = sparkContext.addedFiles.contains(state.uuid) ||
-      sparkContext.addedArchives.contains(state.uuid) ||
-      sparkContext.addedJars.contains(state.uuid)
-    if (shouldUpdateEnv) {
-      sparkContext.addedFiles.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
-      sparkContext.addedArchives.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
-      sparkContext.addedJars.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeJar))
-      sparkContext.postEnvironmentUpdate()
+    if (state != null) {
+      val shouldUpdateEnv = sparkContext.addedFiles.contains(state.uuid) ||
+        sparkContext.addedArchives.contains(state.uuid) ||
+        sparkContext.addedJars.contains(state.uuid)
+      if (shouldUpdateEnv) {
+        sparkContext.addedFiles.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
+        sparkContext.addedArchives.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeFile))
+        sparkContext.addedJars.remove(state.uuid).foreach(_.keys.foreach(fileserver.removeJar))
+        sparkContext.postEnvironmentUpdate()
+      }
     }
 
     // Clean up cached relations
@@ -249,25 +388,32 @@ class ArtifactManager(session: SparkSession) extends Logging {
 
     // Clean up artifacts folder
     FileUtils.deleteDirectory(artifactPath.toFile)
+
+    // Clean up internal trackers
+    jarsList.clear()
+    pythonIncludeList.clear()
+    cachedBlockIdList.clear()
+    sparkContextRelativePaths.clear()
   }
 
   def uploadArtifactToFs(
       remoteRelativePath: Path,
       serverLocalStagingPath: Path): Unit = {
+    val normalizedRemoteRelativePath = normalizePath(remoteRelativePath)
     val hadoopConf = session.sparkContext.hadoopConfiguration
     assert(
-      remoteRelativePath.startsWith(
+      normalizedRemoteRelativePath.startsWith(
         ArtifactManager.forwardToFSPrefix + File.separator))
     val destFSPath = new FSPath(
       Paths
-        .get("/")
-        .resolve(remoteRelativePath.subpath(1, remoteRelativePath.getNameCount))
+        .get(File.separator)
+        .resolve(normalizedRemoteRelativePath.subpath(1, normalizedRemoteRelativePath.getNameCount))
         .toString)
     val localPath = serverLocalStagingPath
     val fs = destFSPath.getFileSystem(hadoopConf)
     if (fs.isInstanceOf[LocalFileSystem]) {
       val allowDestLocalConf =
-        session.conf.get(SQLConf.ARTIFACT_COPY_FROM_LOCAL_TO_FS_ALLOW_DEST_LOCAL)
+        session.sessionState.conf.getConf(SQLConf.ARTIFACT_COPY_FROM_LOCAL_TO_FS_ALLOW_DEST_LOCAL)
           .getOrElse(
             session.conf.get("spark.connect.copyFromLocalToFs.allowDestLocal").contains("true"))
 
@@ -295,4 +441,28 @@ object ArtifactManager extends Logging {
 
   private[artifact] lazy val artifactRootDirectory =
     Utils.createTempDir(ARTIFACT_DIRECTORY_PREFIX).toPath
+
+  private[artifact] object SparkContextResourceType extends Enumeration {
+    type ResourceType = Value
+    val JAR, FILE, ARCHIVE = Value
+  }
+
+  private def copyBlock(fromId: CacheId, toId: CacheId, blockManager: BlockManager): CacheId = {
+    require(fromId != toId)
+    blockManager.getLocalBytes(fromId) match {
+      case Some(blockData) =>
+        Utils.tryWithSafeFinallyAndFailureCallbacks {
+          val updater = blockManager.ByteBufferBlockStoreUpdater(
+            blockId = toId,
+            level = StorageLevel.MEMORY_AND_DISK_SER,
+            classTag = implicitly[ClassTag[Array[Byte]]],
+            bytes = blockData.toChunkedByteBuffer(ByteBuffer.allocate),
+            tellMaster = false)
+          updater.save()
+          toId
+        }(finallyBlock = { blockManager.releaseLock(fromId); blockData.dispose() })
+      case None =>
+        throw SparkException.internalError(s"Block $fromId not found in the block manager.")
+    }
+  }
 }
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
similarity index 97%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
index 139c45adb4421..ac20614553ca2 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala
@@ -51,14 +51,16 @@ private[sql] class AvroDeserializer(
     datetimeRebaseSpec: RebaseSpec,
     filters: StructFilters,
     useStableIdForUnionType: Boolean,
-    stableIdPrefixForUnionType: String) {
+    stableIdPrefixForUnionType: String,
+    recursiveFieldMaxDepth: Int) {
 
   def this(
       rootAvroType: Schema,
       rootCatalystType: DataType,
       datetimeRebaseMode: String,
       useStableIdForUnionType: Boolean,
-      stableIdPrefixForUnionType: String) = {
+      stableIdPrefixForUnionType: String,
+      recursiveFieldMaxDepth: Int) = {
     this(
       rootAvroType,
       rootCatalystType,
@@ -66,7 +68,8 @@ private[sql] class AvroDeserializer(
       RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)),
       new NoopFilters,
       useStableIdForUnionType,
-      stableIdPrefixForUnionType)
+      stableIdPrefixForUnionType,
+      recursiveFieldMaxDepth)
   }
 
   private lazy val decimalConversions = new DecimalConversion()
@@ -128,7 +131,8 @@ private[sql] class AvroDeserializer(
         s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
 
     val realDataType = SchemaConverters.toSqlType(
-      avroType, useStableIdForUnionType, stableIdPrefixForUnionType).dataType
+      avroType, useStableIdForUnionType, stableIdPrefixForUnionType,
+      recursiveFieldMaxDepth).dataType
 
     (avroType.getType, catalystType) match {
       case (NULL, NullType) => (updater, ordinal, _) =>
@@ -141,6 +145,12 @@ private[sql] class AvroDeserializer(
       case (INT, IntegerType) => (updater, ordinal, value) =>
         updater.setInt(ordinal, value.asInstanceOf[Int])
 
+      case (INT, LongType) => (updater, ordinal, value) =>
+        updater.setLong(ordinal, value.asInstanceOf[Int])
+
+      case (INT, DoubleType) => (updater, ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[Int])
+
       case (INT, dt: DatetimeType)
         if preventReadingIncorrectType && realDataType.isInstanceOf[YearMonthIntervalType] =>
         throw QueryCompilationErrors.avroIncompatibleReadError(toFieldStr(avroPath),
@@ -194,6 +204,9 @@ private[sql] class AvroDeserializer(
       case (FLOAT, FloatType) => (updater, ordinal, value) =>
         updater.setFloat(ordinal, value.asInstanceOf[Float])
 
+      case (FLOAT, DoubleType) => (updater, ordinal, value) =>
+        updater.setDouble(ordinal, value.asInstanceOf[Float])
+
       case (DOUBLE, DoubleType) => (updater, ordinal, value) =>
         updater.setDouble(ordinal, value.asInstanceOf[Double])
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
similarity index 97%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
index 372f24b54f5c4..3e1aa11b52b3a 100755
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala
@@ -21,8 +21,7 @@ import java.io._
 
 import scala.util.control.NonFatal
 
-import org.apache.avro.{LogicalTypes, Schema}
-import org.apache.avro.LogicalType
+import org.apache.avro.{LogicalType, LogicalTypes, Schema}
 import org.apache.avro.file.DataFileReader
 import org.apache.avro.generic.{GenericDatumReader, GenericRecord}
 import org.apache.avro.mapred.FsInput
@@ -145,7 +144,8 @@ private[sql] class AvroFileFormat extends FileFormat
             datetimeRebaseMode,
             avroFilters,
             parsedOptions.useStableIdForUnionType,
-            parsedOptions.stableIdPrefixForUnionType)
+            parsedOptions.stableIdPrefixForUnionType,
+            parsedOptions.recursiveFieldMaxDepth)
           override val stopPosition = file.start + file.length
 
           override def hasNext: Boolean = hasNextRow
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
similarity index 83%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
index 4332904339f19..e0c6ad3ee69d3 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala
@@ -27,6 +27,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailFastMode, ParseMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -136,6 +137,15 @@ private[sql] class AvroOptions(
 
   val stableIdPrefixForUnionType: String = parameters
     .getOrElse(STABLE_ID_PREFIX_FOR_UNION_TYPE, "member_")
+
+  val recursiveFieldMaxDepth: Int =
+    parameters.get(RECURSIVE_FIELD_MAX_DEPTH).map(_.toInt).getOrElse(-1)
+
+  if (recursiveFieldMaxDepth > RECURSIVE_FIELD_MAX_DEPTH_LIMIT) {
+    throw QueryCompilationErrors.avroOptionsException(
+      RECURSIVE_FIELD_MAX_DEPTH,
+      s"Should not be greater than $RECURSIVE_FIELD_MAX_DEPTH_LIMIT.")
+  }
 }
 
 private[sql] object AvroOptions extends DataSourceOptions {
@@ -170,4 +180,25 @@ private[sql] object AvroOptions extends DataSourceOptions {
   // When STABLE_ID_FOR_UNION_TYPE is enabled, the option allows to configure the prefix for fields
   // of Avro Union type.
   val STABLE_ID_PREFIX_FOR_UNION_TYPE = newOption("stableIdentifierPrefixForUnionType")
+
+  /**
+   * Adds support for recursive fields. If this option is not specified or is set to 0, recursive
+   * fields are not permitted. Setting it to 1 drops all recursive fields, 2 allows recursive
+   * fields to be recursed once, and 3 allows it to be recursed twice and so on, up to 15.
+   * Values larger than 15 are not allowed in order to avoid inadvertently creating very large
+   * schemas. If an avro message has depth beyond this limit, the Spark struct returned is
+   * truncated after the recursion limit.
+   *
+   * Examples: Consider an Avro schema with a recursive field:
+   * {"type" : "record", "name" : "Node", "fields" : [{"name": "Id", "type": "int"},
+   * {"name": "Next", "type": ["null", "Node"]}]}
+   * The following lists the parsed schema with different values for this setting.
+   *  1:  `struct<Id: int>`
+   *  2:  `struct<Id: int, Next: struct<Id: int>>`
+   *  3:  `struct<Id: int, Next: struct<Id: int, Next: struct<Id: int>>>`
+   * and so on.
+   */
+  val RECURSIVE_FIELD_MAX_DEPTH = newOption("recursiveFieldMaxDepth")
+
+  val RECURSIVE_FIELD_MAX_DEPTH_LIMIT: Int = 15
 }
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
similarity index 100%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriter.scala
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
similarity index 100%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroOutputWriterFactory.scala
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
similarity index 98%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
index 1d9eada94658e..814a28e24f522 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroSerializer.scala
@@ -21,14 +21,12 @@ import java.nio.ByteBuffer
 
 import scala.jdk.CollectionConverters._
 
+import org.apache.avro.{LogicalTypes, Schema}
 import org.apache.avro.Conversions.DecimalConversion
-import org.apache.avro.LogicalTypes
 import org.apache.avro.LogicalTypes.{LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis}
-import org.apache.avro.Schema
 import org.apache.avro.Schema.Type
 import org.apache.avro.Schema.Type._
-import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed}
-import org.apache.avro.generic.GenericData.Record
+import org.apache.avro.generic.GenericData.{EnumSymbol, Fixed, Record}
 import org.apache.avro.util.Utf8
 
 import org.apache.spark.internal.Logging
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
similarity index 99%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index 7cbc30f1fb3dc..594ebb4716c41 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -65,7 +65,8 @@ private[sql] object AvroUtils extends Logging {
     SchemaConverters.toSqlType(
       avroSchema,
       parsedOptions.useStableIdForUnionType,
-      parsedOptions.stableIdPrefixForUnionType).dataType match {
+      parsedOptions.stableIdPrefixForUnionType,
+      parsedOptions.recursiveFieldMaxDepth).dataType match {
       case t: StructType => Some(t)
       case _ => throw new RuntimeException(
         s"""Avro schema cannot be converted to a Spark SQL StructType:
diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala
similarity index 97%
rename from connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala
index fab3d4493e344..a5700a0481531 100644
--- a/connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.avro
 
-import org.apache.avro.LogicalType
-import org.apache.avro.Schema
+import org.apache.avro.{LogicalType, Schema}
 
 import org.apache.spark.sql.types.DecimalType
 
diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala b/sql/core/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
similarity index 61%
rename from connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index b2285aa966ddb..495fc011df462 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -23,10 +23,13 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
 import org.apache.avro.{LogicalTypes, Schema, SchemaBuilder}
-import org.apache.avro.LogicalTypes.{Date, Decimal, LocalTimestampMicros, LocalTimestampMillis, TimestampMicros, TimestampMillis}
+import org.apache.avro.LogicalTypes.{Decimal, _}
 import org.apache.avro.Schema.Type._
 
 import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.{FIELD_NAME, FIELD_TYPE, RECURSIVE_DEPTH}
+import org.apache.spark.sql.avro.AvroOptions.RECURSIVE_FIELD_MAX_DEPTH_LIMIT
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.Decimal.minBytesForPrecision
@@ -36,7 +39,7 @@ import org.apache.spark.sql.types.Decimal.minBytesForPrecision
  * versa.
  */
 @DeveloperApi
-object SchemaConverters {
+object SchemaConverters extends Logging {
   private lazy val nullSchema = Schema.create(Schema.Type.NULL)
 
   /**
@@ -48,14 +51,27 @@ object SchemaConverters {
 
   /**
    * Converts an Avro schema to a corresponding Spark SQL schema.
-   *
+ *
+   * @param avroSchema The Avro schema to convert.
+   * @param useStableIdForUnionType If true, Avro schema is deserialized into Spark SQL schema,
+   *                                and the Avro Union type is transformed into a structure where
+   *                                the field names remain consistent with their respective types.
+   * @param stableIdPrefixForUnionType The prefix to use to configure the prefix for fields of
+   *                                   Avro Union type
+   * @param recursiveFieldMaxDepth The maximum depth to recursively process fields in Avro schema.
+   *                               -1 means not supported.
    * @since 4.0.0
    */
   def toSqlType(
       avroSchema: Schema,
       useStableIdForUnionType: Boolean,
-      stableIdPrefixForUnionType: String): SchemaType = {
-    toSqlTypeHelper(avroSchema, Set.empty, useStableIdForUnionType, stableIdPrefixForUnionType)
+      stableIdPrefixForUnionType: String,
+      recursiveFieldMaxDepth: Int = -1): SchemaType = {
+    val schema = toSqlTypeHelper(avroSchema, Map.empty, useStableIdForUnionType,
+      stableIdPrefixForUnionType, recursiveFieldMaxDepth)
+    // the top level record should never return null
+    assert(schema != null)
+    schema
   }
   /**
    * Converts an Avro schema to a corresponding Spark SQL schema.
@@ -63,17 +79,17 @@ object SchemaConverters {
    * @since 2.4.0
    */
   def toSqlType(avroSchema: Schema): SchemaType = {
-    toSqlType(avroSchema, false, "")
+    toSqlType(avroSchema, false, "", -1)
   }
 
   @deprecated("using toSqlType(..., useStableIdForUnionType: Boolean) instead", "4.0.0")
   def toSqlType(avroSchema: Schema, options: Map[String, String]): SchemaType = {
     val avroOptions = AvroOptions(options)
-    toSqlTypeHelper(
+    toSqlType(
       avroSchema,
-      Set.empty,
       avroOptions.useStableIdForUnionType,
-      avroOptions.stableIdPrefixForUnionType)
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth)
   }
 
   // The property specifies Catalyst type of the given field
@@ -81,9 +97,10 @@ object SchemaConverters {
 
   private def toSqlTypeHelper(
       avroSchema: Schema,
-      existingRecordNames: Set[String],
+      existingRecordNames: Map[String, Int],
       useStableIdForUnionType: Boolean,
-      stableIdPrefixForUnionType: String): SchemaType = {
+      stableIdPrefixForUnionType: String,
+      recursiveFieldMaxDepth: Int): SchemaType = {
     avroSchema.getType match {
       case INT => avroSchema.getLogicalType match {
         case _: Date => SchemaType(DateType, nullable = false)
@@ -128,62 +145,110 @@ object SchemaConverters {
       case NULL => SchemaType(NullType, nullable = true)
 
       case RECORD =>
-        if (existingRecordNames.contains(avroSchema.getFullName)) {
+        val recursiveDepth: Int = existingRecordNames.getOrElse(avroSchema.getFullName, 0)
+        if (recursiveDepth > 0 && recursiveFieldMaxDepth <= 0) {
           throw new IncompatibleSchemaException(s"""
-            |Found recursive reference in Avro schema, which can not be processed by Spark:
-            |${avroSchema.toString(true)}
+            |Found recursive reference in Avro schema, which can not be processed by Spark by
+            | default: ${avroSchema.toString(true)}. Try setting the option `recursiveFieldMaxDepth`
+            | to 1 - $RECURSIVE_FIELD_MAX_DEPTH_LIMIT.
           """.stripMargin)
-        }
-        val newRecordNames = existingRecordNames + avroSchema.getFullName
-        val fields = avroSchema.getFields.asScala.map { f =>
-          val schemaType = toSqlTypeHelper(
-            f.schema(),
-            newRecordNames,
-            useStableIdForUnionType,
-            stableIdPrefixForUnionType)
-          StructField(f.name, schemaType.dataType, schemaType.nullable)
-        }
+        } else if (recursiveDepth > 0 && recursiveDepth >= recursiveFieldMaxDepth) {
+          logInfo(
+            log"The field ${MDC(FIELD_NAME, avroSchema.getFullName)} of type " +
+              log"${MDC(FIELD_TYPE, avroSchema.getType.getName)} is dropped at recursive depth " +
+              log"${MDC(RECURSIVE_DEPTH, recursiveDepth)}."
+          )
+          null
+        } else {
+          val newRecordNames =
+            existingRecordNames + (avroSchema.getFullName -> (recursiveDepth + 1))
+          val fields = avroSchema.getFields.asScala.map { f =>
+            val schemaType = toSqlTypeHelper(
+              f.schema(),
+              newRecordNames,
+              useStableIdForUnionType,
+              stableIdPrefixForUnionType,
+              recursiveFieldMaxDepth)
+            if (schemaType == null) {
+              null
+            }
+            else {
+              StructField(f.name, schemaType.dataType, schemaType.nullable)
+            }
+          }.filter(_ != null).toSeq
 
-        SchemaType(StructType(fields.toArray), nullable = false)
+          SchemaType(StructType(fields), nullable = false)
+        }
 
       case ARRAY =>
         val schemaType = toSqlTypeHelper(
           avroSchema.getElementType,
           existingRecordNames,
           useStableIdForUnionType,
-          stableIdPrefixForUnionType)
-        SchemaType(
-          ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
-          nullable = false)
+          stableIdPrefixForUnionType,
+          recursiveFieldMaxDepth)
+        if (schemaType == null) {
+          logInfo(
+            log"Dropping ${MDC(FIELD_NAME, avroSchema.getFullName)} of type " +
+              log"${MDC(FIELD_TYPE, avroSchema.getType.getName)} as it does not have any " +
+              log"fields left likely due to recursive depth limit."
+          )
+          null
+        } else {
+          SchemaType(
+            ArrayType(schemaType.dataType, containsNull = schemaType.nullable),
+            nullable = false)
+        }
 
       case MAP =>
         val schemaType = toSqlTypeHelper(avroSchema.getValueType,
-          existingRecordNames, useStableIdForUnionType, stableIdPrefixForUnionType)
-        SchemaType(
-          MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
-          nullable = false)
+          existingRecordNames, useStableIdForUnionType, stableIdPrefixForUnionType,
+          recursiveFieldMaxDepth)
+        if (schemaType == null) {
+          logInfo(
+            log"Dropping ${MDC(FIELD_NAME, avroSchema.getFullName)} of type " +
+              log"${MDC(FIELD_TYPE, avroSchema.getType.getName)} as it does not have any " +
+              log"fields left likely due to recursive depth limit."
+          )
+          null
+        } else {
+          SchemaType(
+            MapType(StringType, schemaType.dataType, valueContainsNull = schemaType.nullable),
+            nullable = false)
+        }
 
       case UNION =>
         if (avroSchema.getTypes.asScala.exists(_.getType == NULL)) {
           // In case of a union with null, eliminate it and make a recursive call
           val remainingUnionTypes = AvroUtils.nonNullUnionBranches(avroSchema)
-          if (remainingUnionTypes.size == 1) {
-            toSqlTypeHelper(
-              remainingUnionTypes.head,
-              existingRecordNames,
-              useStableIdForUnionType,
-              stableIdPrefixForUnionType).copy(nullable = true)
+          val remainingSchema =
+            if (remainingUnionTypes.size == 1) {
+              remainingUnionTypes.head
+            } else {
+              Schema.createUnion(remainingUnionTypes.asJava)
+            }
+          val schemaType = toSqlTypeHelper(
+            remainingSchema,
+            existingRecordNames,
+            useStableIdForUnionType,
+            stableIdPrefixForUnionType,
+            recursiveFieldMaxDepth)
+
+          if (schemaType == null) {
+            logInfo(
+              log"Dropping ${MDC(FIELD_NAME, avroSchema.getFullName)} of type " +
+                log"${MDC(FIELD_TYPE, avroSchema.getType.getName)} as it does not have any " +
+                log"fields left likely due to recursive depth limit."
+            )
+            null
           } else {
-            toSqlTypeHelper(
-              Schema.createUnion(remainingUnionTypes.asJava),
-              existingRecordNames,
-              useStableIdForUnionType,
-              stableIdPrefixForUnionType).copy(nullable = true)
+            schemaType.copy(nullable = true)
           }
         } else avroSchema.getTypes.asScala.map(_.getType).toSeq match {
           case Seq(t1) =>
             toSqlTypeHelper(avroSchema.getTypes.get(0),
-              existingRecordNames, useStableIdForUnionType, stableIdPrefixForUnionType)
+              existingRecordNames, useStableIdForUnionType, stableIdPrefixForUnionType,
+              recursiveFieldMaxDepth)
           case Seq(t1, t2) if Set(t1, t2) == Set(INT, LONG) =>
             SchemaType(LongType, nullable = false)
           case Seq(t1, t2) if Set(t1, t2) == Set(FLOAT, DOUBLE) =>
@@ -201,29 +266,33 @@ object SchemaConverters {
                   s,
                   existingRecordNames,
                   useStableIdForUnionType,
-                  stableIdPrefixForUnionType)
-
-                val fieldName = if (useStableIdForUnionType) {
-                  // Avro's field name may be case sensitive, so field names for two named type
-                  // could be "a" and "A" and we need to distinguish them. In this case, we throw
-                  // an exception.
-                  // Stable id prefix can be empty so the name of the field can be just the type.
-                  val tempFieldName = s"${stableIdPrefixForUnionType}${s.getName}"
-                  if (!fieldNameSet.add(tempFieldName.toLowerCase(Locale.ROOT))) {
-                    throw new IncompatibleSchemaException(
-                      "Cannot generate stable identifier for Avro union type due to name " +
-                      s"conflict of type name ${s.getName}")
-                  }
-                  tempFieldName
+                  stableIdPrefixForUnionType,
+                  recursiveFieldMaxDepth)
+                if (schemaType == null) {
+                  null
                 } else {
-                  s"member$i"
-                }
+                  val fieldName = if (useStableIdForUnionType) {
+                    // Avro's field name may be case sensitive, so field names for two named type
+                    // could be "a" and "A" and we need to distinguish them. In this case, we throw
+                    // an exception.
+                    // Stable id prefix can be empty so the name of the field can be just the type.
+                    val tempFieldName = s"${stableIdPrefixForUnionType}${s.getName}"
+                    if (!fieldNameSet.add(tempFieldName.toLowerCase(Locale.ROOT))) {
+                      throw new IncompatibleSchemaException(
+                        "Cannot generate stable identifier for Avro union type due to name " +
+                          s"conflict of type name ${s.getName}")
+                    }
+                    tempFieldName
+                  } else {
+                    s"member$i"
+                  }
 
-                // All fields are nullable because only one of them is set at a time
-                StructField(fieldName, schemaType.dataType, nullable = true)
-            }
+                  // All fields are nullable because only one of them is set at a time
+                  StructField(fieldName, schemaType.dataType, nullable = true)
+                }
+            }.filter(_ != null).toSeq
 
-            SchemaType(StructType(fields.toArray), nullable = false)
+            SchemaType(StructType(fields), nullable = false)
         }
 
       case other => throw new IncompatibleSchemaException(s"Unsupported type $other")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
index 676be7fe41cbc..c39018ff06fca 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalog/Catalog.scala
@@ -14,657 +14,153 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.catalog
 
-import scala.jdk.CollectionConverters._
+import java.util
 
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset}
+import org.apache.spark.sql.{api, DataFrame, Dataset}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.storage.StorageLevel
-
-/**
- * Catalog interface for Spark. To access this, use `SparkSession.catalog`.
- *
- * @since 2.0.0
- */
-@Stable
-abstract class Catalog {
-
-  /**
-   * Returns the current database (namespace) in this session.
-   *
-   * @since 2.0.0
-   */
-  def currentDatabase: String
-
-  /**
-   * Sets the current database (namespace) in this session.
-   *
-   * @since 2.0.0
-   */
-  def setCurrentDatabase(dbName: String): Unit
-
-  /**
-   * Returns a list of databases (namespaces) available within the current catalog.
-   *
-   * @since 2.0.0
-   */
-  def listDatabases(): Dataset[Database]
-
-  /**
-   * Returns a list of databases (namespaces) which name match the specify pattern and
-   * available within the current catalog.
-   *
-   * @since 3.5.0
-   */
-  def listDatabases(pattern: String): Dataset[Database]
-
-  /**
-   * Returns a list of tables/views in the current database (namespace).
-   * This includes all temporary views.
-   *
-   * @since 2.0.0
-   */
-  def listTables(): Dataset[Table]
-
-  /**
-   * Returns a list of tables/views in the specified database (namespace) (the name can be qualified
-   * with catalog).
-   * This includes all temporary views.
-   *
-   * @since 2.0.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listTables(dbName: String): Dataset[Table]
-
-  /**
-   * Returns a list of tables/views in the specified database (namespace)
-   * which name match the specify pattern (the name can be qualified with catalog).
-   * This includes all temporary views.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listTables(dbName: String, pattern: String): Dataset[Table]
-
-  /**
-   * Returns a list of functions registered in the current database (namespace).
-   * This includes all temporary functions.
-   *
-   * @since 2.0.0
-   */
-  def listFunctions(): Dataset[Function]
-
-  /**
-   * Returns a list of functions registered in the specified database (namespace) (the name can be
-   * qualified with catalog).
-   * This includes all built-in and temporary functions.
-   *
-   * @since 2.0.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listFunctions(dbName: String): Dataset[Function]
-
-  /**
-   * Returns a list of functions registered in the specified database (namespace)
-   * which name match the specify pattern (the name can be qualified with catalog).
-   * This includes all built-in and temporary functions.
-   *
-   * @since 3.5.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def listFunctions(dbName: String, pattern: String): Dataset[Function]
 
-  /**
-   * Returns a list of columns for the given table/view or temporary view.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view. It
-   *                  follows the same resolution rule with SQL: search for temp views first then
-   *                  table/views in the current database (namespace).
-   * @since 2.0.0
-   */
-  @throws[AnalysisException]("table does not exist")
-  def listColumns(tableName: String): Dataset[Column]
+/** @inheritdoc */
+abstract class Catalog extends api.Catalog {
+  /** @inheritdoc */
+  override def listDatabases(): Dataset[Database]
 
-  /**
-   * Returns a list of columns for the given table/view in the specified database under the Hive
-   * Metastore.
-   *
-   * To list columns for table/view in other catalogs, please use `listColumns(tableName)` with
-   * qualified table/view name instead.
-   *
-   * @param dbName is an unqualified name that designates a database.
-   * @param tableName is an unqualified name that designates a table/view.
-   * @since 2.0.0
-   */
-  @throws[AnalysisException]("database or table does not exist")
-  def listColumns(dbName: String, tableName: String): Dataset[Column]
+  /** @inheritdoc */
+  override def listDatabases(pattern: String): Dataset[Database]
 
-  /**
-   * Get the database (namespace) with the specified name (can be qualified with catalog). This
-   * throws an AnalysisException when the database (namespace) cannot be found.
-   *
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]("database does not exist")
-  def getDatabase(dbName: String): Database
+  /** @inheritdoc */
+  override def listTables(): Dataset[Table]
 
-  /**
-   * Get the table or view with the specified name. This table can be a temporary view or a
-   * table/view. This throws an AnalysisException when no Table can be found.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view. It
-   *                  follows the same resolution rule with SQL: search for temp views first then
-   *                  table/views in the current database (namespace).
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]("table does not exist")
-  def getTable(tableName: String): Table
+  /** @inheritdoc */
+  override def listTables(dbName: String): Dataset[Table]
 
-  /**
-   * Get the table or view with the specified name in the specified database under the Hive
-   * Metastore. This throws an AnalysisException when no Table can be found.
-   *
-   * To get table/view in other catalogs, please use `getTable(tableName)` with qualified table/view
-   * name instead.
-   *
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]("database or table does not exist")
-  def getTable(dbName: String, tableName: String): Table
+  /** @inheritdoc */
+  override def listTables(dbName: String, pattern: String): Dataset[Table]
 
-  /**
-   * Get the function with the specified name. This function can be a temporary function or a
-   * function. This throws an AnalysisException when the function cannot be found.
-   *
-   * @param functionName is either a qualified or unqualified name that designates a function. It
-   *                     follows the same resolution rule with SQL: search for built-in/temp
-   *                     functions first then functions in the current database (namespace).
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]("function does not exist")
-  def getFunction(functionName: String): Function
+  /** @inheritdoc */
+  override def listFunctions(): Dataset[Function]
 
-  /**
-   * Get the function with the specified name in the specified database under the Hive Metastore.
-   * This throws an AnalysisException when the function cannot be found.
-   *
-   * To get functions in other catalogs, please use `getFunction(functionName)` with qualified
-   * function name instead.
-   *
-   * @param dbName is an unqualified name that designates a database.
-   * @param functionName is an unqualified name that designates a function in the specified database
-   * @since 2.1.0
-   */
-  @throws[AnalysisException]("database or function does not exist")
-  def getFunction(dbName: String, functionName: String): Function
+  /** @inheritdoc */
+  override def listFunctions(dbName: String): Dataset[Function]
 
-  /**
-   * Check if the database (namespace) with the specified name exists (the name can be qualified
-   * with catalog).
-   *
-   * @since 2.1.0
-   */
-  def databaseExists(dbName: String): Boolean
+  /** @inheritdoc */
+  override def listFunctions(dbName: String, pattern: String): Dataset[Function]
 
-  /**
-   * Check if the table or view with the specified name exists. This can either be a temporary
-   * view or a table/view.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view. It
-   *                  follows the same resolution rule with SQL: search for temp views first then
-   *                  table/views in the current database (namespace).
-   * @since 2.1.0
-   */
-  def tableExists(tableName: String): Boolean
+  /** @inheritdoc */
+  override def listColumns(tableName: String): Dataset[Column]
 
-  /**
-   * Check if the table or view with the specified name exists in the specified database under the
-   * Hive Metastore.
-   *
-   * To check existence of table/view in other catalogs, please use `tableExists(tableName)` with
-   * qualified table/view name instead.
-   *
-   * @param dbName is an unqualified name that designates a database.
-   * @param tableName is an unqualified name that designates a table.
-   * @since 2.1.0
-   */
-  def tableExists(dbName: String, tableName: String): Boolean
+  /** @inheritdoc */
+  override def listColumns(dbName: String, tableName: String): Dataset[Column]
 
-  /**
-   * Check if the function with the specified name exists. This can either be a temporary function
-   * or a function.
-   *
-   * @param functionName is either a qualified or unqualified name that designates a function. It
-   *                     follows the same resolution rule with SQL: search for built-in/temp
-   *                     functions first then functions in the current database (namespace).
-   * @since 2.1.0
-   */
-  def functionExists(functionName: String): Boolean
+  /** @inheritdoc */
+  override def createTable(tableName: String, path: String): DataFrame
 
-  /**
-   * Check if the function with the specified name exists in the specified database under the
-   * Hive Metastore.
-   *
-   * To check existence of functions in other catalogs, please use `functionExists(functionName)`
-   * with qualified function name instead.
-   *
-   * @param dbName is an unqualified name that designates a database.
-   * @param functionName is an unqualified name that designates a function.
-   * @since 2.1.0
-   */
-  def functionExists(dbName: String, functionName: String): Boolean
+  /** @inheritdoc */
+  override def createTable(tableName: String, path: String, source: String): DataFrame
 
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String): DataFrame = {
-    createTable(tableName, path)
-  }
-
-  /**
-   * Creates a table from the given path and returns the corresponding DataFrame.
-   * It will use the default data source configured by spark.sql.sources.default.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(tableName: String, path: String): DataFrame
-
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(tableName: String, path: String, source: String): DataFrame = {
-    createTable(tableName, path, source)
-  }
-
-  /**
-   * Creates a table from the given path based on a data source and returns the corresponding
-   * DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(tableName: String, path: String, source: String): DataFrame
-
-  /**
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
+      options: Map[String, String]): DataFrame
 
-  /**
-   * Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, options.asScala.toMap)
-  }
+      description: String,
+      options: Map[String, String]): DataFrame
 
-  /**
-   * (Scala-specific)
-   * Creates a table from the given path based on a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, options)
-  }
+      schema: StructType,
+      options: Map[String, String]): DataFrame
 
-  /**
-   * (Scala-specific)
-   * Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
+      schema: StructType,
+      description: String,
       options: Map[String, String]): DataFrame
 
-  /**
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def listCatalogs(): Dataset[CatalogMetadata]
+
+  /** @inheritdoc */
+  override def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String): DataFrame =
+    super.createExternalTable(tableName, path)
+
+  /** @inheritdoc */
+  override def createExternalTable(tableName: String, path: String, source: String): DataFrame =
+    super.createExternalTable(tableName, path, source)
+
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
+      options: util.Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, options)
 
-  /**
-   * Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 3.1.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      description: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(
-      tableName,
-      source = source,
-      description = description,
-      options = options.asScala.toMap
-    )
-  }
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, options)
 
-  /**
-   * (Scala-specific)
-   * Creates a table based on the dataset in a data source and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 3.1.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
-      description: String,
-      options: Map[String, String]): DataFrame
+      options: Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, options)
 
-  /**
-   * Create a table based on the dataset in a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options.asScala.toMap)
-  }
+      options: util.Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, schema, options)
 
-  /**
-   * (Scala-specific)
-   * Create a table from the given path based on a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.0.0
-   */
-  @deprecated("use createTable instead.", "2.2.0")
-  def createExternalTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
-      schema: StructType,
-      options: Map[String, String]): DataFrame = {
-    createTable(tableName, source, schema, options)
-  }
+      description: String,
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, description, options)
 
-  /**
-   * (Scala-specific)
-   * Create a table based on the dataset in a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 2.2.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
       schema: StructType,
-      options: Map[String, String]): DataFrame
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, schema, options)
 
-  /**
-   * Create a table based on the dataset in a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 3.1.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createExternalTable(
       tableName: String,
       source: String,
       schema: StructType,
-      description: String,
-      options: java.util.Map[String, String]): DataFrame = {
-    createTable(
-      tableName,
-      source = source,
-      schema = schema,
-      description = description,
-      options = options.asScala.toMap
-    )
-  }
+      options: Map[String, String]): DataFrame =
+    super.createExternalTable(tableName, source, schema, options)
 
-  /**
-   * (Scala-specific)
-   * Create a table based on the dataset in a data source, a schema and a set of options.
-   * Then, returns the corresponding DataFrame.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in
-   *                  the current database.
-   * @since 3.1.0
-   */
-  def createTable(
+  /** @inheritdoc */
+  override def createTable(
       tableName: String,
       source: String,
       schema: StructType,
       description: String,
-      options: Map[String, String]): DataFrame
-
-  /**
-   * Drops the local temporary view with the given view name in the catalog.
-   * If the view has been cached before, then it will also be uncached.
-   *
-   * Local temporary view is session-scoped. Its lifetime is the lifetime of the session that
-   * created it, i.e. it will be automatically dropped when the session terminates. It's not
-   * tied to any databases, i.e. we can't use `db1.view1` to reference a local temporary view.
-   *
-   * Note that, the return type of this method was Unit in Spark 2.0, but changed to Boolean
-   * in Spark 2.1.
-   *
-   * @param viewName the name of the temporary view to be dropped.
-   * @return true if the view is dropped successfully, false otherwise.
-   * @since 2.0.0
-   */
-  def dropTempView(viewName: String): Boolean
-
-  /**
-   * Drops the global temporary view with the given view name in the catalog.
-   * If the view has been cached before, then it will also be uncached.
-   *
-   * Global temporary view is cross-session. Its lifetime is the lifetime of the Spark application,
-   * i.e. it will be automatically dropped when the application terminates. It's tied to a system
-   * preserved database `global_temp`, and we must use the qualified name to refer a global temp
-   * view, e.g. `SELECT * FROM global_temp.view1`.
-   *
-   * @param viewName the unqualified name of the temporary view to be dropped.
-   * @return true if the view is dropped successfully, false otherwise.
-   * @since 2.1.0
-   */
-  def dropGlobalTempView(viewName: String): Boolean
-
-  /**
-   * Recovers all the partitions in the directory of a table and update the catalog.
-   * Only works with a partitioned table, and not a view.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table.
-   *                  If no database identifier is provided, it refers to a table in the
-   *                  current database.
-   * @since 2.1.1
-   */
-  def recoverPartitions(tableName: String): Unit
-
-  /**
-   * Returns true if the table is currently cached in-memory.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
-   * @since 2.0.0
-   */
-  def isCached(tableName: String): Boolean
-
-  /**
-   * Caches the specified table in-memory.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
-   * @since 2.0.0
-   */
-  def cacheTable(tableName: String): Unit
-
-  /**
-   * Caches the specified table with the given storage level.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
-   * @param storageLevel storage level to cache table.
-   * @since 2.3.0
-   */
-  def cacheTable(tableName: String, storageLevel: StorageLevel): Unit
-
-
-  /**
-   * Removes the specified table from the in-memory cache.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
-   * @since 2.0.0
-   */
-  def uncacheTable(tableName: String): Unit
-
-  /**
-   * Removes all cached tables from the in-memory cache.
-   *
-   * @since 2.0.0
-   */
-  def clearCache(): Unit
-
-  /**
-   * Invalidates and refreshes all the cached data and metadata of the given table. For performance
-   * reasons, Spark SQL or the external data source library it uses might cache certain metadata
-   * about a table, such as the location of blocks. When those change outside of Spark SQL, users
-   * should call this function to invalidate the cache.
-   *
-   * If this table is cached as an InMemoryRelation, drop the original cached version and make the
-   * new version cached lazily.
-   *
-   * @param tableName is either a qualified or unqualified name that designates a table/view.
-   *                  If no database identifier is provided, it refers to a temporary view or
-   *                  a table/view in the current database.
-   * @since 2.0.0
-   */
-  def refreshTable(tableName: String): Unit
-
-  /**
-   * Invalidates and refreshes all the cached data (and the associated metadata) for any `Dataset`
-   * that contains the given data source path. Path matching is by checking for sub-directories,
-   * i.e. "/" would invalidate everything that is cached and "/test/parent" would invalidate
-   * everything that is a subdirectory of "/test/parent".
-   *
-   * @since 2.0.0
-   */
-  def refreshByPath(path: String): Unit
-
-  /**
-   * Returns the current catalog in this session.
-   *
-   * @since 3.4.0
-   */
-  def currentCatalog(): String
-
-  /**
-   * Sets the current catalog in this session.
-   *
-   * @since 3.4.0
-   */
-  def setCurrentCatalog(catalogName: String): Unit
-
-  /**
-   * Returns a list of catalogs available in this session.
-   *
-   * @since 3.4.0
-   */
-  def listCatalogs(): Dataset[CatalogMetadata]
-
-  /**
-   * Returns a list of catalogs which name match the specify pattern and available in this session.
-   *
-   * @since 3.5.0
-   */
-  def listCatalogs(pattern: String): Dataset[CatalogMetadata]
+      options: util.Map[String, String]): DataFrame =
+    super.createTable(tableName, source, schema, description, options)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
deleted file mode 100644
index f2513e66edce0..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalog/interface.scala
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.catalog
-
-import javax.annotation.Nullable
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.catalyst.DefinedByConstructorParams
-
-
-// Note: all classes here are expected to be wrapped in Datasets and so must extend
-// DefinedByConstructorParams for the catalog to be able to create encoders for them.
-
-/**
- * A catalog in Spark, as returned by the `listCatalogs` method defined in [[Catalog]].
- *
- * @param name name of the catalog
- * @param description description of the catalog
- * @since 3.4.0
- */
-class CatalogMetadata(
-    val name: String,
-    @Nullable val description: String)
-  extends DefinedByConstructorParams {
-
-  override def toString: String =
-    s"Catalog[name='$name', ${Option(description).map(d => s"description='$d'").getOrElse("")}]"
-}
-
-/**
- * A database in Spark, as returned by the `listDatabases` method defined in [[Catalog]].
- *
- * @param name name of the database.
- * @param catalog name of the catalog that the table belongs to.
- * @param description description of the database.
- * @param locationUri path (in the form of a uri) to data files.
- * @since 2.0.0
- */
-@Stable
-class Database(
-    val name: String,
-    @Nullable val catalog: String,
-    @Nullable val description: String,
-    val locationUri: String)
-  extends DefinedByConstructorParams {
-
-  def this(name: String, description: String, locationUri: String) = {
-    this(name, null, description, locationUri)
-  }
-
-  override def toString: String = {
-    "Database[" +
-      s"name='$name', " +
-      Option(catalog).map { c => s"catalog='$c', " }.getOrElse("") +
-      Option(description).map { d => s"description='$d', " }.getOrElse("") +
-      s"path='$locationUri']"
-  }
-
-}
-
-
-/**
- * A table in Spark, as returned by the `listTables` method in [[Catalog]].
- *
- * @param name name of the table.
- * @param catalog name of the catalog that the table belongs to.
- * @param namespace the namespace that the table belongs to.
- * @param description description of the table.
- * @param tableType type of the table (e.g. view, table).
- * @param isTemporary whether the table is a temporary table.
- * @since 2.0.0
- */
-@Stable
-class Table(
-    val name: String,
-    @Nullable val catalog: String,
-    @Nullable val namespace: Array[String],
-    @Nullable val description: String,
-    val tableType: String,
-    val isTemporary: Boolean)
-  extends DefinedByConstructorParams {
-
-  if (namespace != null) {
-    assert(namespace.forall(_ != null))
-  }
-
-  def this(
-      name: String,
-      database: String,
-      description: String,
-      tableType: String,
-      isTemporary: Boolean) = {
-    this(name, null, if (database != null) Array(database) else null,
-      description, tableType, isTemporary)
-  }
-
-  def database: String = {
-    if (namespace != null && namespace.length == 1) namespace(0) else null
-  }
-
-  override def toString: String = {
-    "Table[" +
-      s"name='$name', " +
-      Option(catalog).map { d => s"catalog='$d', " }.getOrElse("") +
-      Option(database).map { d => s"database='$d', " }.getOrElse("") +
-      Option(description).map { d => s"description='$d', " }.getOrElse("") +
-      s"tableType='$tableType', " +
-      s"isTemporary='$isTemporary']"
-  }
-
-}
-
-
-/**
- * A column in Spark, as returned by `listColumns` method in [[Catalog]].
- *
- * @param name name of the column.
- * @param description description of the column.
- * @param dataType data type of the column.
- * @param nullable whether the column is nullable.
- * @param isPartition whether the column is a partition column.
- * @param isBucket whether the column is a bucket column.
- * @since 2.0.0
- */
-@Stable
-class Column(
-    val name: String,
-    @Nullable val description: String,
-    val dataType: String,
-    val nullable: Boolean,
-    val isPartition: Boolean,
-    val isBucket: Boolean)
-  extends DefinedByConstructorParams {
-
-  override def toString: String = {
-    "Column[" +
-      s"name='$name', " +
-      Option(description).map { d => s"description='$d', " }.getOrElse("") +
-      s"dataType='$dataType', " +
-      s"nullable='$nullable', " +
-      s"isPartition='$isPartition', " +
-      s"isBucket='$isBucket']"
-  }
-
-}
-
-
-/**
- * A user-defined function in Spark, as returned by `listFunctions` method in [[Catalog]].
- *
- * @param name name of the function.
- * @param catalog name of the catalog that the table belongs to.
- * @param namespace the namespace that the table belongs to.
- * @param description description of the function; description can be null.
- * @param className the fully qualified class name of the function.
- * @param isTemporary whether the function is a temporary function or not.
- * @since 2.0.0
- */
-@Stable
-class Function(
-    val name: String,
-    @Nullable val catalog: String,
-    @Nullable val namespace: Array[String],
-    @Nullable val description: String,
-    val className: String,
-    val isTemporary: Boolean)
-  extends DefinedByConstructorParams {
-
-  if (namespace != null) {
-    assert(namespace.forall(_ != null))
-  }
-
-  def this(
-      name: String,
-      database: String,
-      description: String,
-      className: String,
-      isTemporary: Boolean) = {
-    this(name, null, if (database != null) Array(database) else null,
-      description, className, isTemporary)
-  }
-
-  def database: String = {
-    if (namespace != null && namespace.length == 1) namespace(0) else null
-  }
-
-  override def toString: String = {
-    "Function[" +
-      s"name='$name', " +
-      Option(database).map { d => s"database='$d', " }.getOrElse("") +
-      Option(description).map { d => s"description='$d', " }.getOrElse("") +
-      s"className='$className', " +
-      s"isTemporary='$isTemporary']"
-  }
-
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/InvokeProcedures.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/InvokeProcedures.scala
new file mode 100644
index 0000000000000..c7320d350a7ff
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/InvokeProcedures.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import scala.jdk.CollectionConverters.IteratorHasAsScala
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Expression, GenericInternalRow}
+import org.apache.spark.sql.catalyst.plans.logical.{Call, LocalRelation, LogicalPlan, MultiResult}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connector.catalog.procedures.BoundProcedure
+import org.apache.spark.sql.connector.read.{LocalScan, Scan}
+import org.apache.spark.util.ArrayImplicits._
+
+class InvokeProcedures(session: SparkSession) extends Rule[LogicalPlan] {
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators {
+    case c: Call if c.resolved && c.bound && c.execute && c.checkArgTypes().isSuccess =>
+      session.sessionState.optimizer.execute(c) match {
+        case Call(ResolvedProcedure(_, _, procedure: BoundProcedure), args, _) =>
+          invoke(procedure, args)
+        case _ =>
+          throw SparkException.internalError("Unexpected plan for optimized CALL statement")
+      }
+  }
+
+  private def invoke(procedure: BoundProcedure, args: Seq[Expression]): LogicalPlan = {
+    val input = toInternalRow(args)
+    val scanIterator = procedure.call(input)
+    val relations = scanIterator.asScala.map(toRelation).toSeq
+    relations match {
+      case Nil => LocalRelation(Nil)
+      case Seq(relation) => relation
+      case _ => MultiResult(relations)
+    }
+  }
+
+  private def toRelation(scan: Scan): LogicalPlan = scan match {
+    case s: LocalScan =>
+      val attrs = DataTypeUtils.toAttributes(s.readSchema)
+      val data = s.rows.toImmutableArraySeq
+      LocalRelation(attrs, data)
+    case _ =>
+      throw SparkException.internalError(
+        s"Only local scans are temporarily supported as procedure output: ${scan.getClass.getName}")
+  }
+
+  private def toInternalRow(args: Seq[Expression]): InternalRow = {
+    require(args.forall(_.foldable), "args must be foldable")
+    val values = args.map(_.eval()).toArray
+    new GenericInternalRow(values)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
index 1f9419c41b746..5f1ab089cf3e9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveSessionCatalog.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, ResolveDefaultColumns => DefaultCols}
+import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, toPrettySQL, CharVarcharUtils, ResolveDefaultColumns => DefaultCols}
 import org.apache.spark.sql.catalyst.util.ResolveDefaultColumns._
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogExtension, CatalogManager, CatalogPlugin, CatalogV2Util, LookupCatalog, SupportsNamespaces, V1Table}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.command._
@@ -38,7 +38,7 @@ import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Utils
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.internal.connector.V1Function
-import org.apache.spark.sql.types.{MetadataBuilder, StructField, StructType}
+import org.apache.spark.sql.types.{MetadataBuilder, StringType, StructField, StructType}
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -54,6 +54,11 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Implicits._
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
+    case _ if ResolveDefaultStringTypes.needsResolution(plan) =>
+      // if there are still unresolved string types in the plan
+      // we should not try to resolve it
+      plan
+
     case AddColumns(ResolvedV1TableIdentifier(ident), cols) =>
       cols.foreach { c =>
         if (c.name.length > 1) {
@@ -70,13 +75,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       throw QueryCompilationErrors.unsupportedTableOperationError(ident, "REPLACE COLUMNS")
 
     case a @ AlterColumn(ResolvedTable(catalog, ident, table: V1Table, _), _, _, _, _, _, _)
-        if isSessionCatalog(catalog) =>
+        if supportsV1Command(catalog) =>
       if (a.column.name.length > 1) {
         throw QueryCompilationErrors.unsupportedTableOperationError(
           catalog, ident, "ALTER COLUMN with qualified column")
       }
       if (a.nullable.isDefined) {
-        throw QueryCompilationErrors.alterColumnWithV1TableCannotSpecifyNotNullError()
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          catalog, ident, "ALTER COLUMN ... SET NOT NULL")
       }
       if (a.position.isDefined) {
         throw QueryCompilationErrors.unsupportedTableOperationError(
@@ -88,10 +94,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       val colName = a.column.name(0)
       val dataType = a.dataType.getOrElse {
         table.schema.findNestedField(Seq(colName), resolver = conf.resolver)
-          .map(_._2.dataType)
+          .map {
+            case (_, StructField(_, st: StringType, _, metadata)) =>
+              CharVarcharUtils.getRawType(metadata).getOrElse(st)
+            case (_, field) => field.dataType
+          }
           .getOrElse {
-            throw QueryCompilationErrors.alterColumnCannotFindColumnInV1TableError(
-              quoteIfNeeded(colName), table)
+            throw QueryCompilationErrors.unresolvedColumnError(
+              toSQLId(a.column.name), table.schema.fieldNames)
           }
       }
       // Add the current default column value string (if any) to the column metadata.
@@ -104,7 +114,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       AlterTableChangeColumnCommand(table.catalogTable.identifier, colName, newColumn)
 
     case AlterTableClusterBy(ResolvedTable(catalog, _, table: V1Table, _), clusterBySpecOpt)
-        if isSessionCatalog(catalog) =>
+        if supportsV1Command(catalog) =>
       val prop = Map(ClusterBySpec.toProperty(table.schema,
         clusterBySpecOpt.getOrElse(ClusterBySpec(Nil)), conf.resolver))
       AlterTableSetPropertiesCommand(table.catalogTable.identifier, prop, isView = false)
@@ -127,13 +137,13 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case UnsetViewProperties(ResolvedViewIdentifier(ident), keys, ifExists) =>
       AlterTableUnsetPropertiesCommand(ident, keys, ifExists, isView = true)
 
-    case DescribeNamespace(DatabaseInSessionCatalog(db), extended, output) if conf.useV1Command =>
+    case DescribeNamespace(ResolvedV1Database(db), extended, output) if conf.useV1Command =>
       DescribeDatabaseCommand(db, extended, output)
 
-    case SetNamespaceProperties(DatabaseInSessionCatalog(db), properties) if conf.useV1Command =>
+    case SetNamespaceProperties(ResolvedV1Database(db), properties) if conf.useV1Command =>
       AlterDatabasePropertiesCommand(db, properties)
 
-    case SetNamespaceLocation(DatabaseInSessionCatalog(db), location) if conf.useV1Command =>
+    case SetNamespaceLocation(ResolvedV1Database(db), location) if conf.useV1Command =>
       if (StringUtils.isEmpty(location)) {
         throw QueryExecutionErrors.invalidEmptyLocationError(location)
       }
@@ -226,7 +236,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case DropTable(ResolvedIdentifier(FakeSystemCatalog, ident), _, _) =>
       DropTempViewCommand(ident)
 
-    case DropView(ResolvedV1Identifier(ident), ifExists) =>
+    case DropView(ResolvedIdentifierInSessionCatalog(ident), ifExists) =>
       DropTableCommand(ident, ifExists, isView = true, purge = false)
 
     case DropView(r @ ResolvedIdentifier(catalog, ident), _) =>
@@ -245,14 +255,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       }
       CreateDatabaseCommand(name, c.ifNotExists, location, comment, newProperties)
 
-    case d @ DropNamespace(DatabaseInSessionCatalog(db), _, _) if conf.useV1Command =>
+    case d @ DropNamespace(ResolvedV1Database(db), _, _) if conf.useV1Command =>
       DropDatabaseCommand(db, d.ifExists, d.cascade)
 
-    case ShowTables(DatabaseInSessionCatalog(db), pattern, output) if conf.useV1Command =>
+    case ShowTables(ResolvedV1Database(db), pattern, output) if conf.useV1Command =>
       ShowTablesCommand(Some(db), pattern, output)
 
     case ShowTablesExtended(
-        DatabaseInSessionCatalog(db),
+        ResolvedV1Database(db),
         pattern,
         output) =>
       val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
@@ -265,7 +275,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case ShowTablePartition(
         ResolvedTable(catalog, _, table: V1Table, _),
         partitionSpec,
-        output) if isSessionCatalog(catalog) =>
+        output) if supportsV1Command(catalog) =>
       val newOutput = if (conf.getConf(SQLConf.LEGACY_KEEP_COMMAND_OUTPUT_SCHEMA)) {
         output.head.withName("database") +: output.tail
       } else {
@@ -285,16 +295,26 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         AnalyzePartitionCommand(ident, partitionSpec, noScan)
       }
 
-    case AnalyzeTables(DatabaseInSessionCatalog(db), noScan) =>
+    case AnalyzeTables(ResolvedV1Database(db), noScan) =>
       AnalyzeTablesCommand(Some(db), noScan)
 
     case AnalyzeColumn(ResolvedV1TableOrViewIdentifier(ident), columnNames, allColumns) =>
       AnalyzeColumnCommand(ident, columnNames, allColumns)
 
-    case RepairTable(ResolvedV1TableIdentifier(ident), addPartitions, dropPartitions) =>
+    // V2 catalog doesn't support REPAIR TABLE yet, we must use v1 command here.
+    case RepairTable(
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
+        addPartitions,
+        dropPartitions) =>
       RepairTableCommand(ident, addPartitions, dropPartitions)
 
-    case LoadData(ResolvedV1TableIdentifier(ident), path, isLocal, isOverwrite, partition) =>
+    // V2 catalog doesn't support LOAD DATA yet, we must use v1 command here.
+    case LoadData(
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
+        path,
+        isLocal,
+        isOverwrite,
+        partition) =>
       LoadDataCommand(
         ident,
         path,
@@ -313,7 +333,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       if conf.useV1Command => ShowCreateTableCommand(ident, output)
 
     case ShowCreateTable(ResolvedTable(catalog, _, table: V1Table, _), _, output)
-        if isSessionCatalog(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
+        if supportsV1Command(catalog) && DDLUtils.isHiveTable(table.catalogTable) =>
       ShowCreateTableCommand(table.catalogTable.identifier, output)
 
     case TruncateTable(ResolvedV1TableIdentifier(ident)) =>
@@ -337,12 +357,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
       val resolver = conf.resolver
       val db = ns match {
         case Some(db) if v1TableName.database.exists(!resolver(_, db.head)) =>
-          throw QueryCompilationErrors.showColumnsWithConflictDatabasesError(db, v1TableName)
+          throw QueryCompilationErrors.showColumnsWithConflictNamespacesError(
+            Seq(db.head), Seq(v1TableName.database.get))
         case _ => ns.map(_.head)
       }
       ShowColumnsCommand(db, v1TableName, output)
 
-    case RecoverPartitions(ResolvedV1TableIdentifier(ident)) =>
+    // V2 catalog doesn't support RECOVER PARTITIONS yet, we must use v1 command here.
+    case RecoverPartitions(ResolvedV1TableIdentifierInSessionCatalog(ident)) =>
       RepairTableCommand(
         ident,
         enableAddPartitions = true,
@@ -370,8 +392,9 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         purge,
         retainData = false)
 
+    // V2 catalog doesn't support setting serde properties yet, we must use v1 command here.
     case SetTableSerDeProperties(
-        ResolvedV1TableIdentifier(ident),
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
         serdeClassName,
         serdeProperties,
         partitionSpec) =>
@@ -381,8 +404,15 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         serdeProperties,
         partitionSpec)
 
-    case SetTableLocation(ResolvedV1TableIdentifier(ident), partitionSpec, location) =>
-      AlterTableSetLocationCommand(ident, partitionSpec, location)
+    case SetTableLocation(ResolvedV1TableIdentifier(ident), None, location) =>
+      AlterTableSetLocationCommand(ident, None, location)
+
+    // V2 catalog doesn't support setting partition location yet, we must use v1 command here.
+    case SetTableLocation(
+        ResolvedV1TableIdentifierInSessionCatalog(ident),
+        Some(partitionSpec),
+        location) =>
+      AlterTableSetLocationCommand(ident, Some(partitionSpec), location)
 
     case AlterViewAs(ResolvedViewIdentifier(ident), originalText, query) =>
       AlterViewAsCommand(ident, originalText, query)
@@ -390,7 +420,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     case AlterViewSchemaBinding(ResolvedViewIdentifier(ident), viewSchemaMode) =>
       AlterViewSchemaBindingCommand(ident, viewSchemaMode)
 
-    case CreateView(ResolvedV1Identifier(ident), userSpecifiedColumns, comment,
+    case CreateView(ResolvedIdentifierInSessionCatalog(ident), userSpecifiedColumns, comment,
         properties, originalText, child, allowExisting, replace, viewSchemaMode) =>
       CreateViewCommand(
         name = ident,
@@ -409,7 +439,7 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
     case ShowViews(ns: ResolvedNamespace, pattern, output) =>
       ns match {
-        case DatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output)
+        case ResolvedDatabaseInSessionCatalog(db) => ShowViewsCommand(db, pattern, output)
         case _ =>
           throw QueryCompilationErrors.missingCatalogAbilityError(ns.catalog, "views")
       }
@@ -432,7 +462,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "functions")
       }
 
-    case ShowFunctions(DatabaseInSessionCatalog(db), userScope, systemScope, pattern, output) =>
+    case ShowFunctions(
+        ResolvedDatabaseInSessionCatalog(db), userScope, systemScope, pattern, output) =>
       ShowFunctionsCommand(db, pattern, userScope, systemScope, output)
 
     case DropFunction(ResolvedPersistentFunc(catalog, identifier, _), ifExists) =>
@@ -453,7 +484,8 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
         throw QueryCompilationErrors.missingCatalogAbilityError(catalog, "REFRESH FUNCTION")
       }
 
-    case CreateFunction(ResolvedV1Identifier(ident), className, resources, ifExists, replace) =>
+    case CreateFunction(
+        ResolvedIdentifierInSessionCatalog(ident), className, resources, ifExists, replace) =>
       CreateFunctionCommand(
         FunctionIdentifier(ident.table, ident.database, ident.catalog),
         className,
@@ -590,6 +622,14 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   object ResolvedV1TableIdentifier {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedTable(catalog, _, t: V1Table, _) if supportsV1Command(catalog) =>
+        Some(t.catalogTable.identifier)
+      case _ => None
+    }
+  }
+
+  object ResolvedV1TableIdentifierInSessionCatalog {
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
       case ResolvedTable(catalog, _, t: V1Table, _) if isSessionCatalog(catalog) =>
         Some(t.catalogTable.identifier)
@@ -606,6 +646,19 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
   }
 
   object ResolvedV1Identifier {
+    def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
+      case ResolvedIdentifier(catalog, ident) if supportsV1Command(catalog) =>
+        if (ident.namespace().length != 1) {
+          throw QueryCompilationErrors
+            .requiresSinglePartNamespaceError(ident.namespace().toImmutableArraySeq)
+        }
+        Some(TableIdentifier(ident.name, Some(ident.namespace.head), Some(catalog.name)))
+      case _ => None
+    }
+  }
+
+  // Use this object to help match commands that do not have a v2 implementation.
+  object ResolvedIdentifierInSessionCatalog{
     def unapply(resolved: LogicalPlan): Option[TableIdentifier] = resolved match {
       case ResolvedIdentifier(catalog, ident) if isSessionCatalog(catalog) =>
         if (ident.namespace().length != 1) {
@@ -630,7 +683,21 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
     DataSourceV2Utils.getTableProvider(provider, conf).isDefined
   }
 
-  private object DatabaseInSessionCatalog {
+  private object ResolvedV1Database {
+    def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
+      case ResolvedNamespace(catalog, _, _) if !supportsV1Command(catalog) => None
+      case ResolvedNamespace(_, Seq(), _) =>
+        throw QueryCompilationErrors.databaseFromV1SessionCatalogNotSpecifiedError()
+      case ResolvedNamespace(_, Seq(dbName), _) => Some(dbName)
+      case _ =>
+        assert(resolved.namespace.length > 1)
+        throw QueryCompilationErrors.nestedDatabaseUnsupportedByV1SessionCatalogError(
+          resolved.namespace.map(quoteIfNeeded).mkString("."))
+    }
+  }
+
+  // Use this object to help match commands that do not have a v2 implementation.
+  private object ResolvedDatabaseInSessionCatalog {
     def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
       case ResolvedNamespace(catalog, _, _) if !isSessionCatalog(catalog) => None
       case ResolvedNamespace(_, Seq(), _) =>
@@ -645,11 +712,17 @@ class ResolveSessionCatalog(val catalogManager: CatalogManager)
 
   private object DatabaseNameInSessionCatalog {
     def unapply(resolved: ResolvedNamespace): Option[String] = resolved match {
-      case ResolvedNamespace(catalog, _, _) if !isSessionCatalog(catalog) => None
+      case ResolvedNamespace(catalog, _, _) if !supportsV1Command(catalog) => None
       case ResolvedNamespace(_, Seq(dbName), _) => Some(dbName)
       case _ =>
         assert(resolved.namespace.length > 1)
         throw QueryCompilationErrors.requiresSinglePartNamespaceError(resolved.namespace)
     }
   }
+
+  private def supportsV1Command(catalog: CatalogPlugin): Boolean = {
+    isSessionCatalog(catalog) && (
+      SQLConf.get.getConf(SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isEmpty ||
+        catalog.isInstanceOf[CatalogExtension])
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTranspose.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTranspose.scala
new file mode 100644
index 0000000000000..df45360be8758
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveTranspose.scala
@@ -0,0 +1,211 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.analysis
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Attribute, AttributeReference, Cast, IsNotNull, Literal, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.{Filter, Limit, LogicalPlan, Project, Sort, Transpose}
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.catalyst.trees.TreePattern
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.{AtomicType, DataType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+
+/**
+ * Rule that resolves and transforms an `UnresolvedTranspose` logical plan into a `Transpose`
+ * logical plan, which effectively transposes a DataFrame by turning rows into columns based
+ * on a specified index column.
+ *
+ * The high-level logic for the transpose operation is as follows:
+ *   - If the index column is not provided, the first column of the DataFrame is used as the
+ *     default index column.
+ *   - The index column is cast to `StringType` to ensure consistent column naming.
+ *   - Non-index columns are cast to a common data type, determined by finding the least
+ *     common type that can accommodate all non-index columns.
+ *   - The data is sorted by the index column, and rows with `null` index values are excluded
+ *     from the transpose operation.
+ *   - The transposed DataFrame is constructed by turning the original rows into columns, with
+ *     the index column values becoming the new column names and the non-index column values
+ *     populating the transposed data.
+ */
+class ResolveTranspose(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+
+  private def leastCommonType(dataTypes: Seq[DataType]): DataType = {
+    if (dataTypes.isEmpty) {
+      StringType
+    } else {
+      dataTypes.reduce { (dt1, dt2) =>
+        AnsiTypeCoercion.findTightestCommonType(dt1, dt2).getOrElse {
+          throw new AnalysisException(
+            errorClass = "TRANSPOSE_NO_LEAST_COMMON_TYPE",
+            messageParameters = Map(
+              "dt1" -> toSQLType(dt1),
+              "dt2" -> toSQLType(dt2))
+          )
+        }
+      }
+    }
+  }
+
+  private def transposeMatrix(
+      fullCollectedRows: Array[InternalRow],
+      nonIndexColumnNames: Seq[String],
+      nonIndexColumnDataTypes: Seq[DataType]): Array[Array[Any]] = {
+    val numTransposedRows = fullCollectedRows.head.numFields - 1
+    val numTransposedCols = fullCollectedRows.length + 1
+    val finalMatrix = Array.ofDim[Any](numTransposedRows, numTransposedCols)
+
+    // Example of the original DataFrame:
+    // +---+-----+-----+
+    // | id|col1 |col2 |
+    // +---+-----+-----+
+    // |  1|  10 |  20 |
+    // |  2|  30 |  40 |
+    // +---+-----+-----+
+    //
+    // After transposition, the finalMatrix will look like:
+    // [
+    //   ["col1", 10, 30],  // Transposed row for col1
+    //   ["col2", 20, 40]   // Transposed row for col2
+    // ]
+
+    for (i <- 0 until numTransposedRows) {
+      // Insert non-index column name as the first element in each transposed row
+      finalMatrix(i)(0) = UTF8String.fromString(nonIndexColumnNames(i))
+
+      for (j <- 1 until numTransposedCols) {
+        // Insert the transposed data
+
+        // Example: If j = 2, then row = fullCollectedRows(1)
+        // This corresponds to the second row of the original DataFrame: InternalRow(2, 30, 40)
+        val row = fullCollectedRows(j - 1)
+
+        // Example: If i = 0 (for "col1"), and j = 2,
+        // then finalMatrix(0)(2) corresponds to row.get(1, nonIndexColumnDataTypes(0)),
+        // which accesses the value 30 from InternalRow(2, 30, 40)
+        finalMatrix(i)(j) = row.get(i + 1, nonIndexColumnDataTypes(i))
+      }
+    }
+    finalMatrix
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsWithPruning(
+    _.containsPattern(TreePattern.UNRESOLVED_TRANSPOSE)) {
+    case t @ UnresolvedTranspose(indices, child) if child.resolved && indices.forall(_.resolved) =>
+      assert(indices.length == 0 || indices.length == 1,
+        "The number of index columns should be either 0 or 1.")
+
+      // Handle empty frame with no column headers
+      if (child.output.isEmpty) {
+        return Transpose(Seq.empty)
+      }
+
+      // Use the first column as index column if not provided
+      val inferredIndexColumn = if (indices.isEmpty) {
+        child.output.head
+      } else {
+        indices.head
+      }
+
+      // Cast the index column to StringType
+      val indexColumnAsString = inferredIndexColumn match {
+        case attr: Attribute if attr.dataType.isInstanceOf[AtomicType] =>
+          Alias(Cast(attr, StringType), attr.name)()
+        case attr: Attribute =>
+          throw new AnalysisException(
+            errorClass = "TRANSPOSE_INVALID_INDEX_COLUMN",
+            messageParameters = Map(
+              "reason" -> s"Index column must be of atomic type, but found: ${attr.dataType}")
+          )
+        case _ =>
+          throw new AnalysisException(
+            errorClass = "TRANSPOSE_INVALID_INDEX_COLUMN",
+            messageParameters = Map(
+              "reason" -> s"Index column must be an atomic attribute")
+          )
+      }
+
+      // Cast non-index columns to the least common type
+      val nonIndexColumns = child.output.filterNot(
+        _.exprId == inferredIndexColumn.asInstanceOf[Attribute].exprId)
+      val nonIndexTypes = nonIndexColumns.map(_.dataType)
+      val commonType = leastCommonType(nonIndexTypes)
+      val nonIndexColumnsAsLCT = nonIndexColumns.map { attr =>
+        Alias(Cast(attr, commonType), attr.name)()
+      }
+
+      // Exclude nulls and sort index column values, and collect the casted frame
+      val allCastCols = indexColumnAsString +: nonIndexColumnsAsLCT
+      val nonNullChild = Filter(IsNotNull(inferredIndexColumn), child)
+      val sortedChild = Sort(
+        Seq(SortOrder(inferredIndexColumn, Ascending)),
+        global = true,
+        nonNullChild
+      )
+      val projectAllCastCols = Project(allCastCols, sortedChild)
+      val maxValues = sparkSession.sessionState.conf.dataFrameTransposeMaxValues
+      val limit = Literal(maxValues + 1)
+      val limitedProject = Limit(limit, projectAllCastCols)
+      val queryExecution = sparkSession.sessionState.executePlan(limitedProject)
+      val fullCollectedRows = queryExecution.executedPlan.executeCollect()
+
+      if (fullCollectedRows.isEmpty) {
+        // Return a DataFrame with a single column "key" containing non-index column names
+        val keyAttr = AttributeReference("key", StringType, nullable = false)()
+        val keyValues = nonIndexColumns.map(col => UTF8String.fromString(col.name))
+        val keyRows = keyValues.map(value => InternalRow(value))
+
+        Transpose(Seq(keyAttr), keyRows)
+      } else {
+        if (fullCollectedRows.length > maxValues) {
+          throw new AnalysisException(
+            errorClass = "TRANSPOSE_EXCEED_ROW_LIMIT",
+            messageParameters = Map(
+              "maxValues" -> maxValues.toString,
+              "config" -> SQLConf.DATAFRAME_TRANSPOSE_MAX_VALUES.key))
+        }
+
+        // Transpose the matrix
+        val nonIndexColumnNames = nonIndexColumns.map(_.name)
+        val nonIndexColumnDataTypes = projectAllCastCols.output.tail.map(attr => attr.dataType)
+        val transposedMatrix = transposeMatrix(
+          fullCollectedRows, nonIndexColumnNames, nonIndexColumnDataTypes)
+        val transposedInternalRows = transposedMatrix.map { row =>
+          InternalRow.fromSeq(row.toIndexedSeq)
+        }
+
+        // Construct output attributes
+        val keyAttr = AttributeReference("key", StringType, nullable = false)()
+        val transposedColumnNames = fullCollectedRows.map { row => row.getString(0) }
+        val valueAttrs = transposedColumnNames.map { value =>
+          AttributeReference(
+            value,
+            commonType
+          )()
+        }
+
+        val transposeOutput = (keyAttr +: valueAttrs).toIndexedSeq
+        val transposeData = transposedInternalRows.toIndexedSeq
+        Transpose(transposeOutput, transposeData)
+      }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
index ca04991b50fc2..b0ce2bb4293e1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/catalyst/util/V2ExpressionBuilder.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.catalyst.util
 
+import org.apache.spark.internal.{Logging, MDC}
+import org.apache.spark.internal.LogKeys.EXPR
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction, Complete}
 import org.apache.spark.sql.catalyst.expressions.objects.{Invoke, StaticInvoke}
@@ -25,15 +27,43 @@ import org.apache.spark.sql.connector.expressions.{Cast => V2Cast, Expression =>
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Avg, Count, CountStar, GeneralAggregateFunc, Max, Min, Sum, UserDefinedAggregateFunc}
 import org.apache.spark.sql.connector.expressions.filter.{AlwaysFalse, AlwaysTrue, And => V2And, Not => V2Not, Or => V2Or, Predicate => V2Predicate}
 import org.apache.spark.sql.execution.datasources.PushableExpression
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, DataType, IntegerType, StringType}
 
 /**
  * The builder to generate V2 expressions from catalyst expressions.
  */
-class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
+class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) extends Logging  {
 
   def build(): Option[V2Expression] = generateExpression(e, isPredicate)
 
+  def buildPredicate(): Option[V2Predicate] = {
+
+    if (isPredicate) {
+      val translated = build()
+
+      val modifiedExprOpt = if (
+        SQLConf.get.getConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE)
+          && translated.isDefined
+          && !translated.get.isInstanceOf[V2Predicate]) {
+
+        // If a predicate is expected but the translation yields something else,
+        // log a warning and proceed as if the translation was not possible.
+        logWarning(log"Predicate expected but got class: ${MDC(EXPR, translated.get.describe())}")
+        None
+      } else {
+        translated
+      }
+
+      modifiedExprOpt.map { v =>
+        assert(v.isInstanceOf[V2Predicate], s"Expected Predicate but got ${v.describe()}")
+        v.asInstanceOf[V2Predicate]
+      }
+    } else {
+      None
+    }
+  }
+
   private def canTranslate(b: BinaryOperator) = b match {
     case _: BinaryComparison => true
     case _: BitwiseAnd | _: BitwiseOr | _: BitwiseXor => true
@@ -191,8 +221,8 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
     case _: BitwiseNot => generateExpressionWithName("~", expr, isPredicate)
     case caseWhen @ CaseWhen(branches, elseValue) =>
       val conditions = branches.map(_._1).flatMap(generateExpression(_, true))
-      val values = branches.map(_._2).flatMap(generateExpression(_))
-      val elseExprOpt = elseValue.flatMap(generateExpression(_))
+      val values = branches.map(_._2).flatMap(generateExpression(_, isPredicate))
+      val elseExprOpt = elseValue.flatMap(generateExpression(_, isPredicate))
       if (conditions.length == branches.length && values.length == branches.length &&
           elseExprOpt.size == elseValue.size) {
         val branchExpressions = conditions.zip(values).flatMap { case (c, v) =>
@@ -391,7 +421,7 @@ class V2ExpressionBuilder(e: Expression, isPredicate: Boolean = false) {
       children: Seq[Expression],
       dataType: DataType,
       isPredicate: Boolean): Option[V2Expression] = {
-    val childrenExpressions = children.flatMap(generateExpression(_))
+    val childrenExpressions = children.flatMap(generateExpression(_, isPredicate))
     if (childrenExpressions.length == children.length) {
       if (isPredicate && dataType.isInstanceOf[BooleanType]) {
         Some(new V2Predicate(v2ExpressionName, childrenExpressions.toArray[V2Expression]))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala b/sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala
new file mode 100644
index 0000000000000..8c3223fa72f55
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/classic/ClassicConversions.scala
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.classic
+
+import scala.language.implicitConversions
+
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.internal.ExpressionUtils
+
+/**
+ * Conversions from sql interfaces to the Classic specific implementation.
+ *
+ * This class is mainly used by the implementation. It is also meant to be used by extension
+ * developers.
+ *
+ * We provide both a trait and an object. The trait is useful in situations where an extension
+ * developer needs to use these conversions in a project covering multiple Spark versions. They can
+ * create a shim for these conversions, the Spark 4+ version of the shim implements this trait, and
+ * shims for older versions do not.
+ */
+@DeveloperApi
+trait ClassicConversions {
+  implicit def castToImpl(session: api.SparkSession): SparkSession =
+    session.asInstanceOf[SparkSession]
+
+  implicit def castToImpl[T](ds: api.Dataset[T]): Dataset[T] =
+    ds.asInstanceOf[Dataset[T]]
+
+  implicit def castToImpl(rgds: api.RelationalGroupedDataset): RelationalGroupedDataset =
+    rgds.asInstanceOf[RelationalGroupedDataset]
+
+  implicit def castToImpl[K, V](kvds: api.KeyValueGroupedDataset[K, V])
+  : KeyValueGroupedDataset[K, V] = kvds.asInstanceOf[KeyValueGroupedDataset[K, V]]
+
+  /**
+   * Helper that makes it easy to construct a Column from an Expression.
+   */
+  implicit class ColumnConstructorExt(val c: Column.type) {
+    def apply(e: Expression): Column = ExpressionUtils.column(e)
+  }
+}
+
+object ClassicConversions extends ClassicConversions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
index af3a8d67e3c29..60156bff1fb71 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala
@@ -51,8 +51,8 @@ trait BaseScriptTransformationExec extends UnaryExecNode {
     child.output.map { in =>
       in.dataType match {
         case _: ArrayType | _: MapType | _: StructType =>
-          new StructsToJson(ioschema.inputSerdeProps.toMap, in)
-            .withTimeZone(conf.sessionLocalTimeZone)
+          StructsToJson(ioschema.inputSerdeProps.toMap, in,
+            Some(conf.sessionLocalTimeZone)).replacement
         case _ => Cast(in, StringType).withTimeZone(conf.sessionLocalTimeZone)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
index b96f257e6b5b6..a3382c83e1f20 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CacheManager.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.util.sideBySide
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
 import org.apache.spark.sql.execution.command.CommandUtils
-import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{FileIndex, HadoopFsRelation, LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, FileTable}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.storage.StorageLevel
@@ -94,7 +94,13 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       query: Dataset[_],
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
-    cacheQueryInternal(query.sparkSession, query.queryExecution.normalized, tableName, storageLevel)
+    cacheQueryInternal(
+      query.sparkSession,
+      query.queryExecution.analyzed,
+      query.queryExecution.normalized,
+      tableName,
+      storageLevel
+    )
   }
 
   /**
@@ -107,23 +113,25 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
     val normalized = QueryExecution.normalize(spark, planToCache)
-    cacheQueryInternal(spark, normalized, tableName, storageLevel)
+    cacheQueryInternal(spark, planToCache, normalized, tableName, storageLevel)
   }
 
-  // The `planToCache` should have been normalized.
+  // The `normalizedPlan` should have been normalized. It is the cache key.
   private def cacheQueryInternal(
       spark: SparkSession,
-      planToCache: LogicalPlan,
+      unnormalizedPlan: LogicalPlan,
+      normalizedPlan: LogicalPlan,
       tableName: Option[String],
       storageLevel: StorageLevel): Unit = {
     if (storageLevel == StorageLevel.NONE) {
       // Do nothing for StorageLevel.NONE since it will not actually cache any data.
-    } else if (lookupCachedDataInternal(planToCache).nonEmpty) {
+    } else if (lookupCachedDataInternal(normalizedPlan).nonEmpty) {
       logWarning("Asked to cache already cached data.")
     } else {
       val sessionWithConfigsOff = getOrCloneSessionWithConfigsOff(spark)
       val inMemoryRelation = sessionWithConfigsOff.withActive {
-        val qe = sessionWithConfigsOff.sessionState.executePlan(planToCache)
+        // it creates query execution from unnormalizedPlan plan to avoid multiple normalization.
+        val qe = sessionWithConfigsOff.sessionState.executePlan(unnormalizedPlan)
         InMemoryRelation(
           storageLevel,
           qe,
@@ -131,10 +139,11 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
       }
 
       this.synchronized {
-        if (lookupCachedDataInternal(planToCache).nonEmpty) {
+        if (lookupCachedDataInternal(normalizedPlan).nonEmpty) {
           logWarning("Data has already been cached.")
         } else {
-          val cd = CachedData(planToCache, inMemoryRelation)
+          // the cache key is the normalized plan
+          val cd = CachedData(normalizedPlan, inMemoryRelation)
           cachedData = cd +: cachedData
           CacheManager.logCacheOperation(log"Added Dataframe cache entry:" +
             log"${MDC(DATAFRAME_CACHE_ENTRY, cd)}")
@@ -211,7 +220,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     }
 
     plan match {
-      case LogicalRelation(_, _, Some(catalogTable), _) =>
+      case LogicalRelationWithTable(_, Some(catalogTable)) =>
         isSameName(catalogTable.identifier.nameParts)
 
       case DataSourceV2Relation(_, _, Some(catalog), Some(v2Ident), _) =>
@@ -465,7 +474,7 @@ class CacheManager extends Logging with AdaptiveSparkPlanHelper {
     // Bucketed scan only has one time overhead but can have multi-times benefits in cache,
     // so we always do bucketed scan in a cached plan.
     var disableConfigs = Seq(SQLConf.AUTO_BUCKETED_SCAN_ENABLED)
-    if (!session.conf.get(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING)) {
+    if (!session.sessionState.conf.getConf(SQLConf.CAN_CHANGE_CACHED_PLAN_OUTPUT_PARTITIONING)) {
       // Allowing changing cached plan output partitioning might lead to regression as it introduces
       // extra shuffle
       disableConfigs =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
index dc918e51d0550..0a487bac77696 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/CollectMetricsExec.scala
@@ -60,9 +60,13 @@ case class CollectMetricsExec(
 
   override def outputOrdering: Seq[SortOrder] = child.outputOrdering
 
+  override def resetMetrics(): Unit = {
+    accumulator.reset()
+    super.resetMetrics()
+  }
+
   override protected def doExecute(): RDD[InternalRow] = {
     val collector = accumulator
-    collector.reset()
     child.execute().mapPartitions { rows =>
       // Only publish the value of the accumulator when the task has completed. This is done by
       // updating a task local accumulator ('updater') which will be merged with the actual
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
index 111851094a69b..64163da50e13a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/Columnar.scala
@@ -194,9 +194,14 @@ case class ColumnarToRowExec(child: SparkPlan) extends ColumnarToRowTransition w
        |    $shouldStop
        |  }
        |  $idx = $numRows;
+       |  $batch.closeIfNotWritable();
        |  $batch = null;
        |  $nextBatchFuncName();
        |}
+       |// clean up resources
+       |if ($batch != null) {
+       |  $batch.close();
+       |}
      """.stripMargin
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
index 2ebbb9664f67a..226debc976420 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partitioning, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.{truncatedString, CaseInsensitiveMap}
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution
 import org.apache.spark.sql.execution.datasources._
@@ -46,7 +47,7 @@ import org.apache.spark.util.ArrayImplicits._
 import org.apache.spark.util.Utils
 import org.apache.spark.util.collection.BitSet
 
-trait DataSourceScanExec extends LeafExecNode {
+trait DataSourceScanExec extends LeafExecNode with StreamSourceAwareSparkPlan {
   def relation: BaseRelation
   def tableIdentifier: Option[TableIdentifier]
 
@@ -114,6 +115,7 @@ case class RowDataSourceScanExec(
     pushedDownOperators: PushedDownOperators,
     rdd: RDD[InternalRow],
     @transient relation: BaseRelation,
+    @transient stream: Option[SparkDataStream],
     tableIdentifier: Option[TableIdentifier])
   extends DataSourceScanExec with InputRDDCodegen {
 
@@ -201,12 +203,15 @@ case class RowDataSourceScanExec(
       )
   }
 
-  // Don't care about `rdd` and `tableIdentifier` when canonicalizing.
+  // Don't care about `rdd` and `tableIdentifier`, and `stream` when canonicalizing.
   override def doCanonicalize(): SparkPlan =
     copy(
       output.map(QueryPlan.normalizeExpressions(_, output)),
       rdd = null,
-      tableIdentifier = None)
+      tableIdentifier = None,
+      stream = None)
+
+  override def getStream: Option[SparkDataStream] = stream
 }
 
 /**
@@ -599,6 +604,7 @@ trait FileSourceScanLike extends DataSourceScanExec {
  */
 case class FileSourceScanExec(
     @transient override val relation: HadoopFsRelation,
+    @transient stream: Option[SparkDataStream],
     override val output: Seq[Attribute],
     override val requiredSchema: StructType,
     override val partitionFilters: Seq[Expression],
@@ -817,6 +823,9 @@ case class FileSourceScanExec(
   override def doCanonicalize(): FileSourceScanExec = {
     FileSourceScanExec(
       relation,
+      // remove stream on canonicalization; this is needed for reused shuffle to be effective in
+      // self-join
+      None,
       output.map(QueryPlan.normalizeExpressions(_, output)),
       requiredSchema,
       QueryPlan.normalizePredicates(
@@ -827,4 +836,6 @@ case class FileSourceScanExec(
       None,
       disableBucketedScan)
   }
+
+  override def getStream: Option[SparkDataStream] = stream
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala
index 085c0b22524c9..a0c3d7b51c2c3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/EmptyRelationExec.scala
@@ -22,7 +22,6 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
-import org.apache.spark.sql.execution.adaptive.LogicalQueryStage
 import org.apache.spark.sql.vectorized.ColumnarBatch
 
 /**
@@ -72,22 +71,15 @@ case class EmptyRelationExec(@transient logical: LogicalPlan) extends LeafExecNo
       maxFields,
       printNodeId,
       indent)
-    lastChildren.add(true)
-    logical.generateTreeString(
-      depth + 1, lastChildren, append, verbose, "", false, maxFields, printNodeId, indent)
-    lastChildren.remove(lastChildren.size() - 1)
+    Option(logical).foreach { _ =>
+      lastChildren.add(true)
+      logical.generateTreeString(
+        depth + 1, lastChildren, append, verbose, "", false, maxFields, printNodeId, indent)
+      lastChildren.remove(lastChildren.size() - 1)
+    }
   }
 
   override def doCanonicalize(): SparkPlan = {
-    this.copy(logical = LocalRelation(logical.output).canonicalized)
-  }
-
-  override protected[sql] def cleanupResources(): Unit = {
-    logical.foreach {
-      case LogicalQueryStage(_, physical) =>
-        physical.cleanupResources()
-      case _ =>
-    }
-    super.cleanupResources()
+    this.copy(logical = LocalRelation(output).canonicalized)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
index 8c7ed7b88d45d..fd8f0b85edd26 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ExistingRDD.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, PartitioningCollection, UnknownPartitioning}
 import org.apache.spark.sql.catalyst.util.truncatedString
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.util.collection.Utils
 
@@ -97,13 +98,16 @@ case class LogicalRDD(
     rdd: RDD[InternalRow],
     outputPartitioning: Partitioning = UnknownPartitioning(0),
     override val outputOrdering: Seq[SortOrder] = Nil,
-    override val isStreaming: Boolean = false)(
+    override val isStreaming: Boolean = false,
+    @transient stream: Option[SparkDataStream] = None)(
     session: SparkSession,
     // originStats and originConstraints are intentionally placed to "second" parameter list,
     // to prevent catalyst rules to mistakenly transform and rewrite them. Do not change this.
     originStats: Option[Statistics] = None,
     originConstraints: Option[ExpressionSet] = None)
-  extends LeafNode with MultiInstanceRelation {
+  extends LeafNode
+  with StreamSourceAwareLogicalPlan
+  with MultiInstanceRelation {
 
   import LogicalRDD._
 
@@ -134,7 +138,8 @@ case class LogicalRDD(
       rdd,
       rewrittenPartitioning,
       rewrittenOrdering,
-      isStreaming
+      isStreaming,
+      stream
     )(session, rewrittenStatistics, rewrittenConstraints).asInstanceOf[this.type]
   }
 
@@ -158,6 +163,13 @@ case class LogicalRDD(
     // Therefore we assume that all subqueries are non-deterministic, and we do not expose any
     // constraints that contain a subquery.
     .filterNot(SubqueryExpression.hasSubquery)
+
+  override def withStream(stream: SparkDataStream): LogicalRDD = {
+    copy(stream = Some(stream))(session, originStats, originConstraints)
+  }
+
+  override def getStream: Option[SparkDataStream] = stream
+
 }
 
 object LogicalRDD extends Logging {
@@ -191,7 +203,8 @@ object LogicalRDD extends Logging {
       rdd,
       firstLeafPartitioning(executedPlan.outputPartitioning),
       executedPlan.outputOrdering,
-      isStreaming
+      isStreaming,
+      None
     )(originDataset.sparkSession, stats, constraints)
   }
 
@@ -264,7 +277,11 @@ case class RDDScanExec(
     rdd: RDD[InternalRow],
     name: String,
     override val outputPartitioning: Partitioning = UnknownPartitioning(0),
-    override val outputOrdering: Seq[SortOrder] = Nil) extends LeafExecNode with InputRDDCodegen {
+    override val outputOrdering: Seq[SortOrder] = Nil,
+    @transient stream: Option[SparkDataStream] = None)
+  extends LeafExecNode
+  with StreamSourceAwareSparkPlan
+  with InputRDDCodegen {
 
   private def rddName: String = Option(rdd.name).map(n => s" $n").getOrElse("")
 
@@ -293,4 +310,11 @@ case class RDDScanExec(
   override protected val createUnsafeProjection: Boolean = true
 
   override def inputRDD: RDD[InternalRow] = rdd
+
+  // Don't care about `stream` when canonicalizing.
+  override protected def doCanonicalize(): SparkPlan = {
+    super.doCanonicalize().asInstanceOf[RDDScanExec].copy(stream = None)
+  }
+
+  override def getStream: Option[SparkDataStream] = stream
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala
new file mode 100644
index 0000000000000..6c7a9206a8e39
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffset.scala
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.catalyst.expressions.SortOrder
+import org.apache.spark.sql.catalyst.plans.physical.SinglePartition
+import org.apache.spark.sql.catalyst.rules.Rule
+import org.apache.spark.sql.execution.adaptive.{AQEShuffleReadExec, ShuffleQueryStageExec}
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * When LIMIT/OFFSET is the root node, Spark plans it as CollectLimitExec which preserves the data
+ * ordering. However, when OFFSET/LIMIT is not the root node, Spark uses GlobalLimitExec which
+ * shuffles all the data into one partition and then gets a slice of it. Unfortunately, the shuffle
+ * reader fetches shuffle blocks in a random order and can not preserve the data ordering, which
+ * violates the requirement of LIMIT/OFFSET.
+ *
+ * This rule inserts an extra local sort before LIMIT/OFFSET to preserve the data ordering.
+ * TODO: add a order preserving mode in the shuffle reader.
+ */
+object InsertSortForLimitAndOffset extends Rule[SparkPlan] {
+  override def apply(plan: SparkPlan): SparkPlan = {
+    if (!conf.getConf(SQLConf.ORDERING_AWARE_LIMIT_OFFSET)) return plan
+
+    plan transform {
+      case l @ GlobalLimitExec(
+          _,
+          SinglePartitionShuffleWithGlobalOrdering(ordering),
+          _) =>
+        val newChild = SortExec(ordering, global = false, child = l.child)
+        l.withNewChildren(Seq(newChild))
+    }
+  }
+
+  object SinglePartitionShuffleWithGlobalOrdering {
+    def unapply(plan: SparkPlan): Option[Seq[SortOrder]] = plan match {
+      case ShuffleExchangeExec(SinglePartition, SparkPlanWithGlobalOrdering(ordering), _, _) =>
+        Some(ordering)
+      case p: AQEShuffleReadExec => unapply(p.child)
+      case p: ShuffleQueryStageExec => unapply(p.plan)
+      case _ => None
+    }
+  }
+
+  // Note: this is not implementing a generalized notion of "global order preservation", but just
+  // tackles the regular ORDER BY semantics with optional LIMIT (top-K).
+  object SparkPlanWithGlobalOrdering {
+    def unapply(plan: SparkPlan): Option[Seq[SortOrder]] = plan match {
+      case p: SortExec if p.global => Some(p.sortOrder)
+      case p: LocalLimitExec => unapply(p.child)
+      case p: WholeStageCodegenExec => unapply(p.child)
+      case _ => None
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
index 9ac79aab36f64..2d5dbf8199599 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, UnsafeProjection}
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.util.ArrayImplicits._
 
@@ -32,7 +33,11 @@ import org.apache.spark.util.ArrayImplicits._
  */
 case class LocalTableScanExec(
     output: Seq[Attribute],
-    @transient rows: Seq[InternalRow]) extends LeafExecNode with InputRDDCodegen {
+    @transient rows: Seq[InternalRow],
+    @transient stream: Option[SparkDataStream])
+  extends LeafExecNode
+  with StreamSourceAwareSparkPlan
+  with InputRDDCodegen {
 
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
@@ -97,8 +102,15 @@ case class LocalTableScanExec(
 
   override def inputRDD: RDD[InternalRow] = rdd
 
+  override def getStream: Option[SparkDataStream] = stream
+
   private def sendDriverMetrics(): Unit = {
     val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
     SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
   }
+
+  // Don't care about `stream` when canonicalizing.
+  override protected def doCanonicalize(): SparkPlan = {
+    super.doCanonicalize().asInstanceOf[LocalTableScanExec].copy(stream = None)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/MultiResultExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/MultiResultExec.scala
new file mode 100644
index 0000000000000..c2b12b053c927
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/MultiResultExec.scala
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+
+case class MultiResultExec(children: Seq[SparkPlan]) extends SparkPlan {
+
+  override def output: Seq[Attribute] = children.lastOption.map(_.output).getOrElse(Nil)
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    children.lastOption.map(_.execute()).getOrElse(sparkContext.emptyRDD)
+  }
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[SparkPlan]): MultiResultExec = {
+    copy(children = newChildren)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
index 1de7a565f11a4..a37258977481f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuery.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, DateTimeUtils}
 import org.apache.spark.sql.errors.QueryCompilationErrors
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -52,7 +52,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
 
     plan.transform {
       case a @ Aggregate(_, aggExprs, child @ PhysicalOperation(
-          projectList, filters, PartitionedRelation(partAttrs, rel))) =>
+          projectList, filters, PartitionedRelation(partAttrs, rel)), _) =>
         // We only apply this optimization when only partitioned attributes are scanned.
         if (AttributeSet((projectList ++ filters).flatMap(_.references)).subsetOf(partAttrs)) {
           // The project list and filters all only refer to partition attributes, which means the
@@ -122,10 +122,10 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
     child transform {
       case plan if plan eq relation =>
         relation match {
-          case l @ LogicalRelation(fsRelation: HadoopFsRelation, _, _, isStreaming) =>
+          case l @ LogicalRelationWithTable(fsRelation: HadoopFsRelation, _) =>
             val partAttrs = getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l)
             val partitionData = fsRelation.location.listFiles(normalizedFilters, Nil)
-            LocalRelation(partAttrs, partitionData.map(_.values), isStreaming)
+            LocalRelation(partAttrs, partitionData.map(_.values), l.isStreaming)
 
           case relation: HiveTableRelation =>
             val partAttrs = getPartitionAttrs(relation.tableMeta.partitionColumnNames, relation)
@@ -165,7 +165,7 @@ case class OptimizeMetadataOnlyQuery(catalog: SessionCatalog) extends Rule[Logic
 
     def unapply(plan: LogicalPlan): Option[(AttributeSet, LogicalPlan)] = {
       plan match {
-        case l @ LogicalRelation(fsRelation: HadoopFsRelation, _, _, _)
+        case l @ LogicalRelationWithTable(fsRelation: HadoopFsRelation, _)
             if fsRelation.partitionSchema.nonEmpty =>
           val partAttrs = AttributeSet(getPartitionAttrs(fsRelation.partitionSchema.map(_.name), l))
           Some((partAttrs, l))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
index 5c894eb7555b1..5695ea57e7fbc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/QueryExecution.scala
@@ -31,7 +31,7 @@ import org.apache.spark.internal.LogKeys.EXTENDED_EXPLAIN_GENERATOR
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, ExtendedExplainGenerator, Row, SparkSession}
 import org.apache.spark.sql.catalyst.{InternalRow, QueryPlanningTracker}
-import org.apache.spark.sql.catalyst.analysis.UnsupportedOperationChecker
+import org.apache.spark.sql.catalyst.analysis.{LazyExpression, UnsupportedOperationChecker}
 import org.apache.spark.sql.catalyst.expressions.codegen.ByteCodeStats
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, Command, CommandResult, CreateTableAsSelect, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, ReturnAnswer, Union}
@@ -46,8 +46,8 @@ import org.apache.spark.sql.execution.reuse.ReuseExchangeAndSubquery
 import org.apache.spark.sql.execution.streaming.{IncrementalExecution, OffsetSeqMetadata, WatermarkPropagator}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.util.{LazyTry, Utils}
 import org.apache.spark.util.ArrayImplicits._
-import org.apache.spark.util.Utils
 
 /**
  * The primary workflow for executing relational queries using Spark.  Designed to allow easy
@@ -68,6 +68,11 @@ class QueryExecution(
   // TODO: Move the planner an optimizer into here from SessionState.
   protected def planner = sparkSession.sessionState.planner
 
+  lazy val isLazyAnalysis: Boolean = {
+    // Only check the main query as subquery expression can be resolved now with the main query.
+    logical.exists(_.expressions.exists(_.exists(_.isInstanceOf[LazyExpression])))
+  }
+
   def assertAnalyzed(): Unit = {
     try {
       analyzed
@@ -86,7 +91,7 @@ class QueryExecution(
     }
   }
 
-  lazy val analyzed: LogicalPlan = {
+  private val lazyAnalyzed = LazyTry {
     val plan = executePhase(QueryPlanningTracker.ANALYSIS) {
       // We can't clone `logical` here, which will reset the `_analyzed` flag.
       sparkSession.sessionState.analyzer.executeAndCheck(logical, tracker)
@@ -95,12 +100,18 @@ class QueryExecution(
     plan
   }
 
-  lazy val commandExecuted: LogicalPlan = mode match {
-    case CommandExecutionMode.NON_ROOT => analyzed.mapChildren(eagerlyExecuteCommands)
-    case CommandExecutionMode.ALL => eagerlyExecuteCommands(analyzed)
-    case CommandExecutionMode.SKIP => analyzed
+  def analyzed: LogicalPlan = lazyAnalyzed.get
+
+  private val lazyCommandExecuted = LazyTry {
+    mode match {
+      case CommandExecutionMode.NON_ROOT => analyzed.mapChildren(eagerlyExecuteCommands)
+      case CommandExecutionMode.ALL => eagerlyExecuteCommands(analyzed)
+      case CommandExecutionMode.SKIP => analyzed
+    }
   }
 
+  def commandExecuted: LogicalPlan = lazyCommandExecuted.get
+
   private def commandExecutionName(command: Command): String = command match {
     case _: CreateTableAsSelect => "create"
     case _: ReplaceTableAsSelect => "replace"
@@ -141,22 +152,28 @@ class QueryExecution(
     }
   }
 
-  // The plan that has been normalized by custom rules, so that it's more likely to hit cache.
-  lazy val normalized: LogicalPlan = {
+  private val lazyNormalized = LazyTry {
     QueryExecution.normalize(sparkSession, commandExecuted, Some(tracker))
   }
 
-  lazy val withCachedData: LogicalPlan = sparkSession.withActive {
-    assertAnalyzed()
-    assertSupported()
-    // clone the plan to avoid sharing the plan instance between different stages like analyzing,
-    // optimizing and planning.
-    sparkSession.sharedState.cacheManager.useCachedData(normalized.clone())
+  // The plan that has been normalized by custom rules, so that it's more likely to hit cache.
+  def normalized: LogicalPlan = lazyNormalized.get
+
+  private val lazyWithCachedData = LazyTry {
+    sparkSession.withActive {
+      assertAnalyzed()
+      assertSupported()
+      // clone the plan to avoid sharing the plan instance between different stages like analyzing,
+      // optimizing and planning.
+      sparkSession.sharedState.cacheManager.useCachedData(normalized.clone())
+    }
   }
 
+  def withCachedData: LogicalPlan = lazyWithCachedData.get
+
   def assertCommandExecuted(): Unit = commandExecuted
 
-  lazy val optimizedPlan: LogicalPlan = {
+  private val lazyOptimizedPlan = LazyTry {
     // We need to materialize the commandExecuted here because optimizedPlan is also tracked under
     // the optimizing phase
     assertCommandExecuted()
@@ -174,9 +191,11 @@ class QueryExecution(
     }
   }
 
+  def optimizedPlan: LogicalPlan = lazyOptimizedPlan.get
+
   def assertOptimized(): Unit = optimizedPlan
 
-  lazy val sparkPlan: SparkPlan = {
+  private val lazySparkPlan = LazyTry {
     // We need to materialize the optimizedPlan here because sparkPlan is also tracked under
     // the planning phase
     assertOptimized()
@@ -187,11 +206,11 @@ class QueryExecution(
     }
   }
 
+  def sparkPlan: SparkPlan = lazySparkPlan.get
+
   def assertSparkPlanPrepared(): Unit = sparkPlan
 
-  // executedPlan should not be used to initialize any SparkPlan. It should be
-  // only used for execution.
-  lazy val executedPlan: SparkPlan = {
+  private val lazyExecutedPlan = LazyTry {
     // We need to materialize the optimizedPlan here, before tracking the planning phase, to ensure
     // that the optimization time is not counted as part of the planning phase.
     assertOptimized()
@@ -206,8 +225,16 @@ class QueryExecution(
     plan
   }
 
+  // executedPlan should not be used to initialize any SparkPlan. It should be
+  // only used for execution.
+  def executedPlan: SparkPlan = lazyExecutedPlan.get
+
   def assertExecutedPlanPrepared(): Unit = executedPlan
 
+  val lazyToRdd = LazyTry {
+    new SQLExecutionRDD(executedPlan.execute(), sparkSession.sessionState.conf)
+  }
+
   /**
    * Internal version of the RDD. Avoids copies and has no schema.
    * Note for callers: Spark may apply various optimization including reusing object: this means
@@ -218,8 +245,7 @@ class QueryExecution(
    * Given QueryExecution is not a public class, end users are discouraged to use this: please
    * use `Dataset.rdd` instead where conversion will be applied.
    */
-  lazy val toRdd: RDD[InternalRow] = new SQLExecutionRDD(
-    executedPlan.execute(), sparkSession.sessionState.conf)
+  def toRdd: RDD[InternalRow] = lazyToRdd.get
 
   /** Get the metrics observed during the execution of the query plan. */
   def observedMetrics: Map[String, Row] = CollectMetricsExec.collect(executedPlan)
@@ -497,6 +523,8 @@ object QueryExecution {
       PlanSubqueries(sparkSession),
       RemoveRedundantProjects,
       EnsureRequirements(),
+      // This rule must be run after `EnsureRequirements`.
+      InsertSortForLimitAndOffset,
       // `ReplaceHashWithSortAgg` needs to be added after `EnsureRequirements` to guarantee the
       // sort order of each node is checked to be valid.
       ReplaceHashWithSortAgg,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
index 8f4ce0f49a89a..69230fd7b3343 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/RemoveRedundantProjects.scala
@@ -58,7 +58,13 @@ object RemoveRedundantProjects extends Rule[SparkPlan] {
           p.mapChildren(removeProject(_, false))
         }
       case op: TakeOrderedAndProjectExec =>
-        op.mapChildren(removeProject(_, false))
+        // The planner turns Limit + Sort into TakeOrderedAndProjectExec which adds an additional
+        // Project that does not exist in the logical plan. We shouldn't use this additional Project
+        // to optimize out other Projects, otherwise when AQE turns physical plan back to
+        // logical plan, we lose the Project and may mess up the output column order. So column
+        // ordering is required if AQE is enabled and projectList is the same as child output.
+        val requireColOrdering = conf.adaptiveExecutionEnabled && op.projectList == op.child.output
+        op.mapChildren(removeProject(_, requireColOrdering))
       case a: BaseAggregateExec =>
         // BaseAggregateExec require specific column ordering when mode is Final or PartialMerge.
         // See comments in BaseAggregateExec inputAttributes method.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
index 7c03bad90ebbc..242149010ceef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SQLExecution.scala
@@ -44,7 +44,7 @@ object SQLExecution extends Logging {
 
   private def nextExecutionId: Long = _nextExecutionId.getAndIncrement
 
-  private val executionIdToQueryExecution = new ConcurrentHashMap[Long, QueryExecution]()
+  private[sql] val executionIdToQueryExecution = new ConcurrentHashMap[Long, QueryExecution]()
 
   def getQueryExecution(executionId: Long): QueryExecution = {
     executionIdToQueryExecution.get(executionId)
@@ -52,6 +52,9 @@ object SQLExecution extends Logging {
 
   private val testing = sys.props.contains(IS_TESTING.key)
 
+  private[sql] def executionIdJobTag(session: SparkSession, id: Long) =
+    s"${session.sessionJobTag}-execution-root-id-$id"
+
   private[sql] def checkSQLExecutionId(sparkSession: SparkSession): Unit = {
     val sc = sparkSession.sparkContext
     // only throw an exception during tests. a missing execution ID should not fail a job.
@@ -82,12 +85,13 @@ object SQLExecution extends Logging {
     // And for the root execution, rootExecutionId == executionId.
     if (sc.getLocalProperty(EXECUTION_ROOT_ID_KEY) == null) {
       sc.setLocalProperty(EXECUTION_ROOT_ID_KEY, executionId.toString)
+      sc.addJobTag(executionIdJobTag(sparkSession, executionId))
     }
     val rootExecutionId = sc.getLocalProperty(EXECUTION_ROOT_ID_KEY).toLong
     executionIdToQueryExecution.put(executionId, queryExecution)
     val originalInterruptOnCancel = sc.getLocalProperty(SPARK_JOB_INTERRUPT_ON_CANCEL)
     if (originalInterruptOnCancel == null) {
-      val interruptOnCancel = sparkSession.conf.get(SQLConf.INTERRUPT_ON_CANCEL)
+      val interruptOnCancel = sparkSession.sessionState.conf.getConf(SQLConf.INTERRUPT_ON_CANCEL)
       sc.setInterruptOnCancel(interruptOnCancel)
     }
     try {
@@ -116,89 +120,98 @@ object SQLExecution extends Logging {
       val redactedConfigs = sparkSession.sessionState.conf.redactOptions(modifiedConfigs)
 
       withSQLConfPropagated(sparkSession) {
-        var ex: Option[Throwable] = None
-        var isExecutedPlanAvailable = false
-        val startTime = System.nanoTime()
-        val startEvent = SparkListenerSQLExecutionStart(
-          executionId = executionId,
-          rootExecutionId = Some(rootExecutionId),
-          description = desc,
-          details = callSite.longForm,
-          physicalPlanDescription = "",
-          sparkPlanInfo = SparkPlanInfo.EMPTY,
-          time = System.currentTimeMillis(),
-          modifiedConfigs = redactedConfigs,
-          jobTags = sc.getJobTags(),
-          jobGroupId = Option(sc.getLocalProperty(SparkContext.SPARK_JOB_GROUP_ID))
-        )
-        try {
-          body match {
-            case Left(e) =>
-              sc.listenerBus.post(startEvent)
-              throw e
-            case Right(f) =>
-              val planDescriptionMode =
-                ExplainMode.fromString(sparkSession.sessionState.conf.uiExplainMode)
-              val planDesc = queryExecution.explainString(planDescriptionMode)
-              val planInfo = try {
-                SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan)
-              } catch {
-                case NonFatal(e) =>
-                  logDebug("Failed to generate SparkPlanInfo", e)
-                  // If the queryExecution already failed before this, we are not able to generate
-                  // the the plan info, so we use and empty graphviz node to make the UI happy
-                  SparkPlanInfo.EMPTY
+        sparkSession.artifactManager.withResources {
+          withSessionTagsApplied(sparkSession) {
+            var ex: Option[Throwable] = None
+            var isExecutedPlanAvailable = false
+            val startTime = System.nanoTime()
+            val startEvent = SparkListenerSQLExecutionStart(
+              executionId = executionId,
+              rootExecutionId = Some(rootExecutionId),
+              description = desc,
+              details = callSite.longForm,
+              physicalPlanDescription = "",
+              sparkPlanInfo = SparkPlanInfo.EMPTY,
+              time = System.currentTimeMillis(),
+              modifiedConfigs = redactedConfigs,
+              jobTags = sc.getJobTags(),
+              jobGroupId = Option(sc.getLocalProperty(SparkContext.SPARK_JOB_GROUP_ID))
+            )
+            try {
+              body match {
+                case Left(e) =>
+                  sc.listenerBus.post(startEvent)
+                  throw e
+                case Right(f) =>
+                  val planDescriptionMode =
+                    ExplainMode.fromString(sparkSession.sessionState.conf.uiExplainMode)
+                  val planDesc = queryExecution.explainString(planDescriptionMode)
+                  val planInfo = try {
+                    SparkPlanInfo.fromSparkPlan(queryExecution.executedPlan)
+                  } catch {
+                    case NonFatal(e) =>
+                      logDebug("Failed to generate SparkPlanInfo", e)
+                      // If the queryExecution already failed before this, we are not able to
+                      // generate the the plan info, so we use and empty graphviz node to make the
+                      // UI happy
+                      SparkPlanInfo.EMPTY
+                  }
+                  sc.listenerBus.post(
+                    startEvent.copy(physicalPlanDescription = planDesc, sparkPlanInfo = planInfo))
+                  isExecutedPlanAvailable = true
+                  f()
               }
-              sc.listenerBus.post(
-                startEvent.copy(physicalPlanDescription = planDesc, sparkPlanInfo = planInfo))
-              isExecutedPlanAvailable = true
-              f()
-          }
-        } catch {
-          case e: Throwable =>
-            ex = Some(e)
-            throw e
-        } finally {
-          val endTime = System.nanoTime()
-          val errorMessage = ex.map {
-            case e: SparkThrowable =>
-              SparkThrowableHelper.getMessage(e, ErrorMessageFormat.PRETTY)
-            case e =>
-              Utils.exceptionString(e)
-          }
-          if (queryExecution.shuffleCleanupMode != DoNotCleanup
-            && isExecutedPlanAvailable) {
-            val shuffleIds = queryExecution.executedPlan match {
-              case ae: AdaptiveSparkPlanExec =>
-                ae.context.shuffleIds.asScala.keys
-              case _ =>
-                Iterable.empty
-            }
-            shuffleIds.foreach { shuffleId =>
-              queryExecution.shuffleCleanupMode match {
-                case RemoveShuffleFiles =>
-                  SparkEnv.get.shuffleManager.unregisterShuffle(shuffleId)
-                case SkipMigration =>
-                  SparkEnv.get.blockManager.migratableResolver.addShuffleToSkip(shuffleId)
-                case _ => // this should not happen
+            } catch {
+              case e: Throwable =>
+                ex = Some(e)
+                throw e
+            } finally {
+              val endTime = System.nanoTime()
+              val errorMessage = ex.map {
+                case e: SparkThrowable =>
+                  SparkThrowableHelper.getMessage(e, ErrorMessageFormat.PRETTY)
+                case e =>
+                  Utils.exceptionString(e)
+              }
+              if (queryExecution.shuffleCleanupMode != DoNotCleanup
+                && isExecutedPlanAvailable) {
+                val shuffleIds = queryExecution.executedPlan match {
+                  case ae: AdaptiveSparkPlanExec =>
+                    ae.context.shuffleIds.asScala.keys
+                  case _ =>
+                    Iterable.empty
+                }
+                shuffleIds.foreach { shuffleId =>
+                  queryExecution.shuffleCleanupMode match {
+                    case RemoveShuffleFiles =>
+                      // Same as what we do in ContextCleaner.doCleanupShuffle, but do not
+                      // unregister the shuffle on MapOutputTracker, so that stage retries would be
+                      // triggered.
+                      // Set blocking to Utils.isTesting to deflake unit tests.
+                      sc.shuffleDriverComponents.removeShuffle(shuffleId, Utils.isTesting)
+                    case SkipMigration =>
+                      SparkEnv.get.blockManager.migratableResolver.addShuffleToSkip(shuffleId)
+                    case _ => // this should not happen
+                  }
+                }
               }
+              val event = SparkListenerSQLExecutionEnd(
+                executionId,
+                System.currentTimeMillis(),
+                // Use empty string to indicate no error, as None may mean events generated by old
+                // versions of Spark.
+                errorMessage.orElse(Some("")))
+              // Currently only `Dataset.withAction` and `DataFrameWriter.runCommand` specify the
+              // `name` parameter. The `ExecutionListenerManager` only watches SQL executions with
+              // name. We can specify the execution name in more places in the future, so that
+              // `QueryExecutionListener` can track more cases.
+              event.executionName = name
+              event.duration = endTime - startTime
+              event.qe = queryExecution
+              event.executionFailure = ex
+              sc.listenerBus.post(event)
             }
           }
-          val event = SparkListenerSQLExecutionEnd(
-            executionId,
-            System.currentTimeMillis(),
-            // Use empty string to indicate no error, as None may mean events generated by old
-            // versions of Spark.
-            errorMessage.orElse(Some("")))
-          // Currently only `Dataset.withAction` and `DataFrameWriter.runCommand` specify the `name`
-          // parameter. The `ExecutionListenerManager` only watches SQL executions with name. We
-          // can specify the execution name in more places in the future, so that
-          // `QueryExecutionListener` can track more cases.
-          event.executionName = name
-          event.duration = endTime - startTime
-          event.qe = queryExecution
-          event.executionFailure = ex
-          sc.listenerBus.post(event)
         }
       }
     } finally {
@@ -208,6 +221,7 @@ object SQLExecution extends Logging {
       // The current execution is the root execution if rootExecutionId == executionId.
       if (sc.getLocalProperty(EXECUTION_ROOT_ID_KEY) == executionId.toString) {
         sc.setLocalProperty(EXECUTION_ROOT_ID_KEY, null)
+        sc.removeJobTag(executionIdJobTag(sparkSession, executionId))
       }
       sc.setLocalProperty(SPARK_JOB_INTERRUPT_ON_CANCEL, originalInterruptOnCancel)
     }
@@ -235,15 +249,28 @@ object SQLExecution extends Logging {
     val sc = sparkSession.sparkContext
     val oldExecutionId = sc.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
     withSQLConfPropagated(sparkSession) {
-      try {
-        sc.setLocalProperty(SQLExecution.EXECUTION_ID_KEY, executionId)
-        body
-      } finally {
-        sc.setLocalProperty(SQLExecution.EXECUTION_ID_KEY, oldExecutionId)
+      withSessionTagsApplied(sparkSession) {
+        try {
+          sc.setLocalProperty(SQLExecution.EXECUTION_ID_KEY, executionId)
+          body
+        } finally {
+          sc.setLocalProperty(SQLExecution.EXECUTION_ID_KEY, oldExecutionId)
+        }
       }
     }
   }
 
+  private[sql] def withSessionTagsApplied[T](sparkSession: SparkSession)(block: => T): T = {
+    val allTags = sparkSession.managedJobTags.get().values.toSet + sparkSession.sessionJobTag
+    sparkSession.sparkContext.addJobTags(allTags)
+
+    try {
+      block
+    } finally {
+      sparkSession.sparkContext.removeJobTags(allTags)
+    }
+  }
+
   /**
    * Wrap an action with specified SQL configs. These configs will be propagated to the executor
    * side via job local properties.
@@ -278,15 +305,21 @@ object SQLExecution extends Logging {
     val activeSession = sparkSession
     val sc = sparkSession.sparkContext
     val localProps = Utils.cloneProperties(sc.getLocalProperties)
-    val artifactState = JobArtifactSet.getCurrentJobArtifactState.orNull
+    // `getCurrentJobArtifactState` will return a stat only in Spark Connect mode. In non-Connect
+    // mode, we default back to the resources of the current Spark session.
+    val artifactState = JobArtifactSet.getCurrentJobArtifactState.getOrElse(
+      activeSession.artifactManager.state)
     exec.submit(() => JobArtifactSet.withActiveJobArtifactState(artifactState) {
       val originalSession = SparkSession.getActiveSession
       val originalLocalProps = sc.getLocalProperties
       SparkSession.setActiveSession(activeSession)
-      sc.setLocalProperties(localProps)
-      val res = body
-      // reset active session and local props.
-      sc.setLocalProperties(originalLocalProps)
+      val res = withSessionTagsApplied(activeSession) {
+        sc.setLocalProperties(localProps)
+        val res = body
+        // reset active session and local props.
+        sc.setLocalProperties(originalLocalProps)
+        res
+      }
       if (originalSession.nonEmpty) {
         SparkSession.setActiveSession(originalSession.get)
       } else {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
index 3382a1161ddba..6173703ef3cd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.ExperimentalMethods
-import org.apache.spark.sql.catalyst.analysis.RewriteCollationJoin
 import org.apache.spark.sql.catalyst.catalog.SessionCatalog
 import org.apache.spark.sql.catalyst.optimizer._
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -93,8 +92,7 @@ class SparkOptimizer(
       EliminateLimits,
       ConstantFolding) :+
     Batch("User Provided Optimizers", fixedPoint, experimentalMethods.extraOptimizations: _*) :+
-    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition) :+
-    Batch("RewriteCollationJoin", Once, RewriteCollationJoin)
+    Batch("Replace CTE with Repartition", Once, ReplaceCTERefWithRepartition)
 
   override def nonExcludableRules: Seq[String] = super.nonExcludableRules :+
     ExtractPythonUDFFromJoinCondition.ruleName :+
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
index 7bc770a0c9e33..fb3ec3ad41812 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkPlan.scala
@@ -40,7 +40,7 @@ import org.apache.spark.sql.execution.datasources.WriteFilesSpec
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.NextIterator
+import org.apache.spark.util.{LazyTry, NextIterator}
 import org.apache.spark.util.io.{ChunkedByteBuffer, ChunkedByteBufferOutputStream}
 
 object SparkPlan {
@@ -182,6 +182,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
   /** Specifies sort order for each partition requirements on the input data for this operator. */
   def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq.fill(children.size)(Nil)
 
+  @transient
+  private val executeRDD = LazyTry {
+    doExecute()
+  }
+
   /**
    * Returns the result of this query as an RDD[InternalRow] by delegating to `doExecute` after
    * preparations.
@@ -192,7 +197,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     if (isCanonicalizedPlan) {
       throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
-    doExecute()
+    executeRDD.get
+  }
+
+  private val executeBroadcastBcast = LazyTry {
+    doExecuteBroadcast()
   }
 
   /**
@@ -205,7 +214,11 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     if (isCanonicalizedPlan) {
       throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
-    doExecuteBroadcast()
+    executeBroadcastBcast.get.asInstanceOf[broadcast.Broadcast[T]]
+  }
+
+  private val executeColumnarRDD = LazyTry {
+    doExecuteColumnar()
   }
 
   /**
@@ -219,7 +232,7 @@ abstract class SparkPlan extends QueryPlan[SparkPlan] with Logging with Serializ
     if (isCanonicalizedPlan) {
       throw SparkException.internalError("A canonicalized plan is not supposed to be executed.")
     }
-    doExecuteColumnar()
+    executeColumnarRDD.get
   }
 
   /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
index 170aae0c76cd7..311ba2e465d81 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkSqlParser.scala
@@ -27,13 +27,12 @@ import org.antlr.v4.runtime.tree.TerminalNode
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, SchemaEvolution, SchemaTypeEvolution, UnresolvedFunctionName, UnresolvedIdentifier, UnresolvedNamespace}
+import org.apache.spark.sql.catalyst.analysis.{GlobalTempView, LocalTempView, PersistedView, PlanWithUnresolvedIdentifier, SchemaEvolution, SchemaTypeEvolution, UnresolvedFunctionName, UnresolvedIdentifier, UnresolvedNamespace}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.parser._
 import org.apache.spark.sql.catalyst.parser.SqlBaseParser._
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.trees.TreePattern.PARAMETER
 import org.apache.spark.sql.catalyst.util.DateTimeConstants
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryParsingErrors}
 import org.apache.spark.sql.execution.command._
@@ -68,6 +67,25 @@ class SparkSqlAstBuilder extends AstBuilder {
   private val configValueDef = """([^;]*);*""".r
   private val strLiteralDef = """(".*?[^\\]"|'.*?[^\\]'|[^ \n\r\t"']+)""".r
 
+  private def withCatalogIdentClause(
+      ctx: CatalogIdentifierReferenceContext,
+      builder: Seq[String] => LogicalPlan): LogicalPlan = {
+    val exprCtx = ctx.expression
+    if (exprCtx != null) {
+      // resolve later in analyzer
+      PlanWithUnresolvedIdentifier(withOrigin(exprCtx) { expression(exprCtx) }, Nil,
+        (ident, _) => builder(ident))
+    } else if (ctx.errorCapturingIdentifier() != null) {
+      // resolve immediately
+      builder.apply(Seq(ctx.errorCapturingIdentifier().getText))
+    } else if (ctx.stringLit() != null) {
+      // resolve immediately
+      builder.apply(Seq(string(visitStringLit(ctx.stringLit()))))
+    } else {
+      throw SparkException.internalError("Invalid catalog name")
+    }
+  }
+
   /**
    * Create a [[SetCommand]] logical plan.
    *
@@ -150,8 +168,9 @@ class SparkSqlAstBuilder extends AstBuilder {
    * }}}
    */
   override def visitSetCollation(ctx: SetCollationContext): LogicalPlan = withOrigin(ctx) {
-    if (!SQLConf.get.collationEnabled) {
-      throw QueryCompilationErrors.collationNotEnabledError()
+    val collationName = ctx.collationName.getText
+    if (!SQLConf.get.trimCollationEnabled && collationName.toUpperCase().contains("TRIM")) {
+      throw QueryCompilationErrors.trimCollationNotEnabledError()
     }
     val key = SQLConf.DEFAULT_COLLATION.key
     SetCommand(Some(key -> Some(ctx.identifier.getText.toUpperCase(Locale.ROOT))))
@@ -170,10 +189,29 @@ class SparkSqlAstBuilder extends AstBuilder {
     val key = SQLConf.SESSION_LOCAL_TIMEZONE.key
     if (ctx.interval != null) {
       val interval = parseIntervalLiteral(ctx.interval)
-      if (interval.months != 0 || interval.days != 0 ||
-        math.abs(interval.microseconds) > 18 * DateTimeConstants.MICROS_PER_HOUR ||
-        interval.microseconds % DateTimeConstants.MICROS_PER_SECOND != 0) {
-        throw QueryParsingErrors.intervalValueOutOfRangeError(ctx.interval())
+      if (interval.months != 0) {
+        throw QueryParsingErrors.intervalValueOutOfRangeError(
+          toSQLValue(interval.months),
+          ctx.interval()
+        )
+      }
+      else if (interval.days != 0) {
+        throw QueryParsingErrors.intervalValueOutOfRangeError(
+          toSQLValue(interval.days),
+          ctx.interval()
+        )
+      }
+      else if (math.abs(interval.microseconds) > 18 * DateTimeConstants.MICROS_PER_HOUR) {
+        throw QueryParsingErrors.intervalValueOutOfRangeError(
+          toSQLValue((math.abs(interval.microseconds) / DateTimeConstants.MICROS_PER_HOUR).toInt),
+          ctx.interval()
+        )
+      }
+      else if (interval.microseconds % DateTimeConstants.MICROS_PER_SECOND != 0) {
+        throw QueryParsingErrors.intervalValueOutOfRangeError(
+          toSQLValue((interval.microseconds / DateTimeConstants.MICROS_PER_SECOND).toInt),
+          ctx.interval()
+        )
       } else {
         val seconds = (interval.microseconds / DateTimeConstants.MICROS_PER_SECOND).toInt
         SetCommand(Some(key -> Some(ZoneOffset.ofTotalSeconds(seconds).toString)))
@@ -280,13 +318,13 @@ class SparkSqlAstBuilder extends AstBuilder {
    * Create a [[SetCatalogCommand]] logical command.
    */
   override def visitSetCatalog(ctx: SetCatalogContext): LogicalPlan = withOrigin(ctx) {
-    if (ctx.errorCapturingIdentifier() != null) {
-      SetCatalogCommand(ctx.errorCapturingIdentifier().getText)
-    } else if (ctx.stringLit() != null) {
-      SetCatalogCommand(string(visitStringLit(ctx.stringLit())))
-    } else {
-      throw SparkException.internalError("Invalid catalog name")
-    }
+    withCatalogIdentClause(ctx.catalogIdentifierReference, identifiers => {
+      if (identifiers.size > 1) {
+        // can occur when user put multipart string in IDENTIFIER(...) clause
+        throw QueryParsingErrors.invalidNameForSetCatalog(identifiers, ctx)
+      }
+      SetCatalogCommand(identifiers.head)
+    })
   }
 
   /**
@@ -469,22 +507,6 @@ class SparkSqlAstBuilder extends AstBuilder {
     }
   }
 
-
-  private def checkInvalidParameter(plan: LogicalPlan, statement: String):
-  Unit = {
-    plan.foreach { p =>
-      p.expressions.foreach { expr =>
-        if (expr.containsPattern(PARAMETER)) {
-          throw QueryParsingErrors.parameterMarkerNotAllowed(statement, p.origin)
-        }
-      }
-    }
-    plan.children.foreach(p => checkInvalidParameter(p, statement))
-    plan.innerChildren.collect {
-      case child: LogicalPlan => checkInvalidParameter(child, statement)
-    }
-  }
-
   /**
    * Create or replace a view. This creates a [[CreateViewCommand]].
    *
@@ -540,12 +562,13 @@ class SparkSqlAstBuilder extends AstBuilder {
     }
     val qPlan: LogicalPlan = plan(ctx.query)
 
-    // Disallow parameter markers in the body of the view.
+    // Disallow parameter markers in the query of the view.
     // We need this limitation because we store the original query text, pre substitution.
-    // To lift this we would need to reconstitute the body with parameter markers replaced with the
+    // To lift this we would need to reconstitute the query with parameter markers replaced with the
     // values given at CREATE VIEW time, or we would need to store the parameter values alongside
     // the text.
-    checkInvalidParameter(qPlan, "CREATE VIEW body")
+    // The same rule can be found in CACHE TABLE builder.
+    checkInvalidParameter(qPlan, "the query of CREATE VIEW")
     if (viewType == PersistedView) {
       val originalText = source(ctx.query)
       assert(Option(originalText).isDefined,
@@ -574,7 +597,7 @@ class SparkSqlAstBuilder extends AstBuilder {
         throw QueryParsingErrors.defineTempViewWithIfNotExistsError(ctx)
       }
 
-      withIdentClause(ctx.identifierReference(), ident => {
+      withIdentClause(ctx.identifierReference(), Seq(qPlan), (ident, otherPlans) => {
         val tableIdentifier = ident.asTableIdentifier
         if (tableIdentifier.database.isDefined) {
           // Temporary view names should NOT contain database prefix like "database.table"
@@ -588,7 +611,7 @@ class SparkSqlAstBuilder extends AstBuilder {
           visitCommentSpecList(ctx.commentSpec()),
           properties,
           Option(source(ctx.query)),
-          qPlan,
+          otherPlans.head,
           ctx.EXISTS != null,
           ctx.REPLACE != null,
           viewType = viewType)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
index ed7a6162cc9f4..c621c151c0bd6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala
@@ -22,7 +22,7 @@ import java.util.Locale
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{execution, AnalysisException, Strategy}
-import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, InternalRow}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide, JoinSelectionHelper, NormalizeFloatingNumbers}
@@ -106,28 +106,28 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     private def planTakeOrdered(plan: LogicalPlan): Option[SparkPlan] = plan match {
       // We should match the combination of limit and offset first, to get the optimal physical
       // plan, instead of planning limit and offset separately.
-      case LimitAndOffset(limit, offset, Sort(order, true, child))
+      case LimitAndOffset(limit, offset, Sort(order, true, child, _))
           if limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           limit, order, child.output, planLater(child), offset))
-      case LimitAndOffset(limit, offset, Project(projectList, Sort(order, true, child)))
+      case LimitAndOffset(limit, offset, Project(projectList, Sort(order, true, child, _)))
           if limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           limit, order, projectList, planLater(child), offset))
       // 'Offset a' then 'Limit b' is the same as 'Limit a + b' then 'Offset a'.
-      case OffsetAndLimit(offset, limit, Sort(order, true, child))
+      case OffsetAndLimit(offset, limit, Sort(order, true, child, _))
           if offset + limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           offset + limit, order, child.output, planLater(child), offset))
-      case OffsetAndLimit(offset, limit, Project(projectList, Sort(order, true, child)))
+      case OffsetAndLimit(offset, limit, Project(projectList, Sort(order, true, child, _)))
           if offset + limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           offset + limit, order, projectList, planLater(child), offset))
-      case Limit(IntegerLiteral(limit), Sort(order, true, child))
+      case Limit(IntegerLiteral(limit), Sort(order, true, child, _))
           if limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           limit, order, child.output, planLater(child)))
-      case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child)))
+      case Limit(IntegerLiteral(limit), Project(projectList, Sort(order, true, child, _)))
           if limit < conf.topKSortFallbackThreshold =>
         Some(TakeOrderedAndProjectExec(
           limit, order, projectList, planLater(child)))
@@ -269,8 +269,13 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           }
         }
 
+        def canMerge(joinType: JoinType): Boolean = joinType match {
+          case LeftSingle => false
+          case _ => true
+        }
+
         def createSortMergeJoin() = {
-          if (RowOrdering.isOrderable(leftKeys)) {
+          if (canMerge(joinType) && RowOrdering.isOrderable(leftKeys)) {
             Some(Seq(joins.SortMergeJoinExec(
               leftKeys, rightKeys, joinType, nonEquiCond, planLater(left), planLater(right))))
           } else {
@@ -297,7 +302,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               // This join could be very slow or OOM
               // Build the smaller side unless the join requires a particular build side
               // (e.g. NO_BROADCAST_AND_REPLICATION hint)
-              val requiredBuildSide = getBroadcastNestedLoopJoinBuildSide(hint)
+              val requiredBuildSide = getBroadcastNestedLoopJoinBuildSide(hint, joinType)
               val buildSide = requiredBuildSide.getOrElse(getSmallerSide(left, right))
               Seq(joins.BroadcastNestedLoopJoinExec(
                 planLater(left), planLater(right), buildSide, joinType, j.condition))
@@ -390,7 +395,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
               // This join could be very slow or OOM
               // Build the desired side unless the join requires a particular build side
               // (e.g. NO_BROADCAST_AND_REPLICATION hint)
-              val requiredBuildSide = getBroadcastNestedLoopJoinBuildSide(hint)
+              val requiredBuildSide = getBroadcastNestedLoopJoinBuildSide(hint, joinType)
               val buildSide = requiredBuildSide.getOrElse(desiredBuildSide)
               Seq(joins.BroadcastNestedLoopJoinExec(
                 planLater(left), planLater(right), buildSide, joinType, condition))
@@ -420,8 +425,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
       case _ if !plan.isStreaming => Nil
 
-      case EventTimeWatermark(columnName, delay, child) =>
-        EventTimeWatermarkExec(columnName, delay, planLater(child)) :: Nil
+      case EventTimeWatermark(nodeId, columnName, delay, child) =>
+        EventTimeWatermarkExec(nodeId, columnName, delay, planLater(child)) :: Nil
 
       case UpdateEventTimeWatermarkColumn(columnName, delay, child) =>
         // we expect watermarkDelay to be resolved before physical planning.
@@ -777,6 +782,34 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
     }
   }
 
+  /**
+   * Strategy to convert [[TransformWithStateInPandas]] logical operator to physical operator
+   * in streaming plans.
+   */
+  object TransformWithStateInPandasStrategy extends Strategy {
+    override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
+      case t @ TransformWithStateInPandas(
+      func, _, outputAttrs, outputMode, timeMode, child,
+      hasInitialState, initialState, _, initialStateSchema) =>
+        val execPlan = TransformWithStateInPandasExec(
+          func, t.leftAttributes, outputAttrs, outputMode, timeMode,
+          stateInfo = None,
+          batchTimestampMs = None,
+          eventTimeWatermarkForLateEvents = None,
+          eventTimeWatermarkForEviction = None,
+          planLater(child),
+          hasInitialState,
+          planLater(initialState),
+          t.rightAttributes,
+          initialStateSchema
+        )
+
+        execPlan :: Nil
+      case _ =>
+        Nil
+    }
+  }
+
   /**
    * Strategy to convert [[FlatMapGroupsInPandasWithState]] logical operator to physical operator
    * in streaming plans. Conversion for batch plans is handled by [[BasicOperators]].
@@ -837,7 +870,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case MemoryPlan(sink, output) =>
         val encoder = ExpressionEncoder(DataTypeUtils.fromAttributes(output))
         val toRow = encoder.createSerializer()
-        LocalTableScanExec(output, sink.allData.map(r => toRow(r).copy())) :: Nil
+        LocalTableScanExec(output, sink.allData.map(r => toRow(r).copy()), None) :: Nil
 
       case logical.Distinct(child) =>
         throw SparkException.internalError(
@@ -933,6 +966,14 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case _: FlatMapGroupsInPandasWithState =>
         // TODO(SPARK-40443): support applyInPandasWithState in batch query
         throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3176")
+      case t: TransformWithStateInPandas =>
+        // TODO(SPARK-50428): support TransformWithStateInPandas in batch query
+        throw new ExtendedAnalysisException(
+          new AnalysisException(
+            "_LEGACY_ERROR_TEMP_3102",
+            Map(
+              "msg" -> "TransformWithStateInPandas is not supported with batch DataFrames/Datasets")
+          ), plan = t)
       case logical.CoGroup(
           f, key, lObj, rObj, lGroup, rGroup, lAttr, rAttr, lOrder, rOrder, oAttr, left, right) =>
         execution.CoGroupExec(
@@ -945,7 +986,7 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         } else {
           execution.CoalesceExec(numPartitions, planLater(child)) :: Nil
         }
-      case logical.Sort(sortExprs, global, child) =>
+      case logical.Sort(sortExprs, global, child, _) =>
         execution.SortExec(sortExprs, global, planLater(child)) :: Nil
       case logical.Project(projectList, child) =>
         execution.ProjectExec(projectList, planLater(child)) :: Nil
@@ -957,8 +998,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
         execution.ExpandExec(e.projections, e.output, planLater(child)) :: Nil
       case logical.Sample(lb, ub, withReplacement, seed, child) =>
         execution.SampleExec(lb, ub, withReplacement, seed, planLater(child)) :: Nil
-      case logical.LocalRelation(output, data, _) =>
-        LocalTableScanExec(output, data) :: Nil
+      case logical.LocalRelation(output, data, _, stream) =>
+        LocalTableScanExec(output, data, stream) :: Nil
       case logical.EmptyRelation(l) => EmptyRelationExec(l) :: Nil
       case CommandResult(output, _, plan, data) => CommandResultExec(output, plan, data) :: Nil
       // We should match the combination of limit and offset first, to get the optimal physical
@@ -1008,7 +1049,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
           shuffleOrigin, r.optAdvisoryPartitionSize) :: Nil
       case ExternalRDD(outputObjAttr, rdd) => ExternalRDDScanExec(outputObjAttr, rdd) :: Nil
       case r: LogicalRDD =>
-        RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering) :: Nil
+        RDDScanExec(r.output, r.rdd, "ExistingRDD", r.outputPartitioning, r.outputOrdering,
+          r.stream) :: Nil
       case _: UpdateTable =>
         throw QueryExecutionErrors.ddlUnsupportedTemporarilyError("UPDATE TABLE")
       case _: MergeIntoTable =>
@@ -1018,6 +1060,8 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] {
       case WriteFiles(child, fileFormat, partitionColumns, bucket, options, staticPartitions) =>
         WriteFilesExec(planLater(child), fileFormat, partitionColumns, bucket, options,
           staticPartitions) :: Nil
+      case MultiResult(children) =>
+        MultiResultExec(children.map(planLater)) :: Nil
       case _ => Nil
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/StreamSourceAwareSparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/StreamSourceAwareSparkPlan.scala
new file mode 100644
index 0000000000000..cd50b78d203fd
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/StreamSourceAwareSparkPlan.scala
@@ -0,0 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
+
+/**
+ * This trait is a mixin for source physical nodes to represent the stream. This is required to the
+ * physical nodes which is transformed from source logical nodes inheriting
+ * [[org.apache.spark.sql.catalyst.plans.logical.StreamSourceAwareLogicalPlan]].
+ *
+ * The node implementing this trait should expose the number of output rows via "numOutputRows"
+ * in `metrics`.
+ */
+trait StreamSourceAwareSparkPlan extends SparkPlan {
+  /** Get the stream associated with this node. */
+  def getStream: Option[SparkDataStream]
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
index 014d23f2f4101..0f1743eeaacfb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AQEOptimizer.scala
@@ -74,6 +74,12 @@ class AQEOptimizer(conf: SQLConf, extendedRuntimeOptimizerRules: Seq[Rule[Logica
   override protected def validatePlanChanges(
       previousPlan: LogicalPlan,
       currentPlan: LogicalPlan): Option[String] = {
-    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan)
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan, lightweight = false)
+  }
+
+  override protected def validatePlanChangesLightweight(
+      previousPlan: LogicalPlan,
+      currentPlan: LogicalPlan): Option[String] = {
+    LogicalPlanIntegrity.validateOptimizedPlan(previousPlan, currentPlan, lightweight = true)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
index f21960aeedd64..a0a0991429309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/AdaptiveSparkPlanExec.scala
@@ -145,6 +145,8 @@ case class AdaptiveSparkPlanExec(
       CoalesceBucketsInJoin,
       RemoveRedundantProjects,
       ensureRequirements,
+      // This rule must be run after `EnsureRequirements`.
+      InsertSortForLimitAndOffset,
       AdjustShuffleExchangePosition,
       ValidateSparkPlan,
       ReplaceHashWithSortAgg,
@@ -340,6 +342,7 @@ case class AdaptiveSparkPlanExec(
               }(AdaptiveSparkPlanExec.executionContext)
             } catch {
               case e: Throwable =>
+                stage.error.set(Some(e))
                 cleanUpAndThrowException(Seq(e), Some(stage.id))
             }
           }
@@ -355,6 +358,7 @@ case class AdaptiveSparkPlanExec(
           case StageSuccess(stage, res) =>
             stage.resultOption.set(Some(res))
           case StageFailure(stage, ex) =>
+            stage.error.set(Some(ex))
             errors.append(ex)
         }
 
@@ -600,6 +604,7 @@ case class AdaptiveSparkPlanExec(
         newStages = Seq(newStage))
 
     case q: QueryStageExec =>
+      assertStageNotFailed(q)
       CreateStageResult(newPlan = q,
         allChildStagesMaterialized = q.isMaterialized, newStages = Seq.empty)
 
@@ -815,6 +820,15 @@ case class AdaptiveSparkPlanExec(
     }
   }
 
+  private def assertStageNotFailed(stage: QueryStageExec): Unit = {
+    if (stage.hasFailed) {
+      throw stage.error.get().get match {
+        case fatal: SparkFatalException => fatal.throwable
+        case other => other
+      }
+    }
+  }
+
   /**
    * Cancel all running stages with best effort and throw an Exception containing all stage
    * materialization errors and stage cancellation errors.
@@ -827,7 +841,7 @@ case class AdaptiveSparkPlanExec(
       // so we should avoid calling cancel on it to re-trigger the failure again.
       case s: ExchangeQueryStageExec if !earlyFailedStage.contains(s.id) =>
         try {
-          s.cancel()
+          s.cancel("The corresponding SQL query has failed.")
         } catch {
           case NonFatal(t) =>
             logError(log"Exception in cancelling query stage: " +
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
index df4d895867586..5f2638655c37c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/PlanAdaptiveSubqueries.scala
@@ -30,7 +30,7 @@ case class PlanAdaptiveSubqueries(
   def apply(plan: SparkPlan): SparkPlan = {
     plan.transformAllExpressionsWithPruning(
       _.containsAnyPattern(SCALAR_SUBQUERY, IN_SUBQUERY, DYNAMIC_PRUNING_SUBQUERY)) {
-      case expressions.ScalarSubquery(_, _, exprId, _, _, _) =>
+      case expressions.ScalarSubquery(_, _, exprId, _, _, _, _) =>
         val subquery = SubqueryExec.createForScalarSubquery(
           s"subquery#${exprId.id}", subqueryMap(exprId.id))
         execution.ScalarSubquery(subquery, exprId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
index 71e138e6152b5..2391fe740118d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageExec.scala
@@ -93,6 +93,13 @@ abstract class QueryStageExec extends LeafExecNode {
   private[adaptive] def resultOption: AtomicReference[Option[Any]] = _resultOption
   final def isMaterialized: Boolean = resultOption.get().isDefined
 
+  @transient
+  @volatile
+  protected var _error = new AtomicReference[Option[Throwable]](None)
+
+  def error: AtomicReference[Option[Throwable]] = _error
+  final def hasFailed: Boolean = _error.get().isDefined
+
   override def output: Seq[Attribute] = plan.output
   override def outputPartitioning: Partitioning = plan.outputPartitioning
   override def outputOrdering: Seq[SortOrder] = plan.outputOrdering
@@ -154,14 +161,14 @@ abstract class QueryStageExec extends LeafExecNode {
 abstract class ExchangeQueryStageExec extends QueryStageExec {
 
   /**
-   * Cancel the stage materialization if in progress; otherwise do nothing.
+   * Cancel the stage materialization if in progress with a reason; otherwise do nothing.
    */
-  final def cancel(): Unit = {
+  final def cancel(reason: String): Unit = {
     logDebug(s"Cancel query stage: $name")
-    doCancel()
+    doCancel(reason)
   }
 
-  protected def doCancel(): Unit
+  protected def doCancel(reason: String): Unit
 
   /**
    * The canonicalized plan before applying query stage optimizer rules.
@@ -194,7 +201,7 @@ case class ShuffleQueryStageExec(
 
   def advisoryPartitionSize: Option[Long] = shuffle.advisoryPartitionSize
 
-  override protected def doMaterialize(): Future[Any] = shuffle.submitShuffleJob
+  override protected def doMaterialize(): Future[Any] = shuffle.submitShuffleJob()
 
   override def newReuseInstance(
       newStageId: Int, newOutput: Seq[Attribute]): ExchangeQueryStageExec = {
@@ -203,10 +210,11 @@ case class ShuffleQueryStageExec(
       ReusedExchangeExec(newOutput, shuffle),
       _canonicalized)
     reuse._resultOption = this._resultOption
+    reuse._error = this._error
     reuse
   }
 
-  override protected def doCancel(): Unit = shuffle.cancelShuffleJob
+  override protected def doCancel(reason: String): Unit = shuffle.cancelShuffleJob(Option(reason))
 
   /**
    * Returns the Option[MapOutputStatistics]. If the shuffle map stage has no partition,
@@ -240,7 +248,7 @@ case class BroadcastQueryStageExec(
       throw SparkException.internalError(s"wrong plan for broadcast stage:\n ${plan.treeString}")
   }
 
-  override protected def doMaterialize(): Future[Any] = broadcast.submitBroadcastJob
+  override protected def doMaterialize(): Future[Any] = broadcast.submitBroadcastJob()
 
   override def newReuseInstance(
       newStageId: Int, newOutput: Seq[Attribute]): ExchangeQueryStageExec = {
@@ -249,10 +257,12 @@ case class BroadcastQueryStageExec(
       ReusedExchangeExec(newOutput, broadcast),
       _canonicalized)
     reuse._resultOption = this._resultOption
+    reuse._error = this._error
     reuse
   }
 
-  override protected def doCancel(): Unit = broadcast.cancelBroadcastJob()
+  override protected def doCancel(reason: String): Unit =
+    broadcast.cancelBroadcastJob(Option(reason))
 
   override def getRuntimeStatistics: Statistics = broadcast.runtimeStatistics
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
index e2d8ac8988043..40112979c6d46 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/AggregateCodegenSupport.scala
@@ -272,13 +272,13 @@ trait AggregateCodegenSupport
 
     aggCodes.zip(aggregateExpressions.map(ae => (ae.mode, ae.filter))).map {
       case (aggCode, (Partial | Complete, Some(condition))) =>
-        // Note: wrap in "do { } while(false);", so the generated checks can jump out
+        // Note: wrap in "do { } while (false);", so the generated checks can jump out
         // with "continue;"
         s"""
            |do {
            |  ${generatePredicateCode(ctx, condition, inputAttrs, input)}
            |  $aggCode
-           |} while(false);
+           |} while (false);
          """.stripMargin
       case (aggCode, _) =>
         aggCode
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
index 8f2b7ca5cba25..469f42dcc0afe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashAggregateExec.scala
@@ -63,7 +63,7 @@ case class HashAggregateExec(
 
   require(Aggregate.supportsHashAggregate(aggregateBufferAttributes, groupingExpressions))
 
-  override lazy val allAttributes: AttributeSeq =
+  override def allAttributes: AttributeSeq =
     child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++
       aggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes)
 
@@ -682,7 +682,7 @@ case class HashAggregateExec(
          |    $unsafeRowKeys, $unsafeRowKeyHash);
          |  if ($unsafeRowBuffer == null) {
          |    // failed to allocate the first page
-         |    throw new $oomeClassName("No enough memory for aggregation");
+         |    throw new $oomeClassName("_LEGACY_ERROR_TEMP_3302", new java.util.HashMap());
          |  }
          |}
        """.stripMargin
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
index 45a71b4da7287..19a36483abe6d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/HashMapGenerator.scala
@@ -173,7 +173,8 @@ abstract class HashMapGenerator(
             ${hashBytes(bytes)}
           """
         }
-      case st: StringType if st.supportsBinaryEquality => hashBytes(s"$input.getBytes()")
+      case st: StringType if st.supportsBinaryEquality =>
+        hashBytes(s"$input.getBytes()")
       case st: StringType if !st.supportsBinaryEquality =>
         hashLong(s"CollationFactory.fetchCollation(${st.collationId})" +
           s".hashFunction.applyAsLong($input)")
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
index e6530e94701f9..7e8ce3e884a3c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/ObjectHashAggregateExec.scala
@@ -69,7 +69,7 @@ case class ObjectHashAggregateExec(
     child: SparkPlan)
   extends BaseAggregateExec {
 
-  override lazy val allAttributes: AttributeSeq =
+  override def allAttributes: AttributeSeq =
     child.output ++ aggregateBufferAttributes ++ aggregateAttributes ++
       aggregateExpressions.flatMap(_.aggregateFunction.inputAggBufferAttributes)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
index 1ebf0d143bd1f..2f1cda9d0f9be 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/TungstenAggregationIterator.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.aggregate
 
+import java.util
+
 import org.apache.spark.{SparkException, TaskContext}
 import org.apache.spark.internal.Logging
 import org.apache.spark.memory.SparkOutOfMemoryError
@@ -210,7 +212,7 @@ class TungstenAggregationIterator(
           if (buffer == null) {
             // failed to allocate the first page
             // scalastyle:off throwerror
-            throw new SparkOutOfMemoryError("No enough memory for aggregation")
+            throw new SparkOutOfMemoryError("_LEGACY_ERROR_TEMP_3302", new util.HashMap())
             // scalastyle:on throwerror
           }
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
index e517376bc5fc0..09d9915022a65 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/aggregate/udaf.scala
@@ -20,13 +20,13 @@ package org.apache.spark.sql.execution.aggregate
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, _}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
-import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction}
+import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction, UserDefinedAggregator}
 import org.apache.spark.sql.types._
 
 /**
@@ -504,7 +504,7 @@ case class ScalaAggregator[IN, BUF, OUT](
   private[this] lazy val inputDeserializer = inputEncoder.createDeserializer()
   private[this] lazy val bufferSerializer = bufferEncoder.createSerializer()
   private[this] lazy val bufferDeserializer = bufferEncoder.createDeserializer()
-  private[this] lazy val outputEncoder = agg.outputEncoder.asInstanceOf[ExpressionEncoder[OUT]]
+  private[this] lazy val outputEncoder = encoderFor(agg.outputEncoder)
   private[this] lazy val outputSerializer = outputEncoder.createSerializer()
 
   def dataType: DataType = outputEncoder.objSerializer.dataType
@@ -554,6 +554,21 @@ case class ScalaAggregator[IN, BUF, OUT](
     copy(children = newChildren)
 }
 
+object ScalaAggregator {
+  def apply[IN, BUF, OUT](
+      uda: UserDefinedAggregator[IN, BUF, OUT],
+      children: Seq[Expression]): ScalaAggregator[IN, BUF, OUT] = {
+    new ScalaAggregator(
+      children = children,
+      agg = uda.aggregator,
+      inputEncoder = encoderFor(uda.inputEncoder),
+      bufferEncoder = encoderFor(uda.aggregator.bufferEncoder),
+      nullable = uda.nullable,
+      isDeterministic = uda.deterministic,
+      aggregatorName = uda.givenName)
+  }
+}
+
 /**
  * An extension rule to resolve encoder expressions from a [[ScalaAggregator]]
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
index c2925a3ba596b..25c8f695689c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/analysis/DetectAmbiguousSelfJoin.scala
@@ -20,12 +20,13 @@ package org.apache.spark.sql.execution.analysis
 import scala.collection.mutable
 
 import org.apache.spark.SparkException
-import org.apache.spark.sql.{Column, Dataset}
+import org.apache.spark.sql.Dataset
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, AttributeSet, Cast, Equality, Expression, ExprId}
 import org.apache.spark.sql.catalyst.plans.logical.{Join, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.MetadataBuilder
 
 /**
  * Detects ambiguous self-joins, so that we can fail the query instead of returning confusing
@@ -169,7 +170,16 @@ object DetectAmbiguousSelfJoin extends Rule[LogicalPlan] {
     plan.transformExpressions {
       case a: AttributeReference if isColumnReference(a) =>
         // Remove the special metadata from this `AttributeReference`, as the detection is done.
-        Column.stripColumnReferenceMetadata(a)
+        stripColumnReferenceMetadata(a)
     }
   }
+
+  private[sql] def stripColumnReferenceMetadata(a: AttributeReference): AttributeReference = {
+    val metadataWithoutId = new MetadataBuilder()
+      .withMetadata(a.metadata)
+      .remove(Dataset.DATASET_ID_KEY)
+      .remove(Dataset.COL_POS_KEY)
+      .build()
+    a.withMetadata(metadataWithoutId)
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
index 083858e4fe80a..995f857bbf635 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/basicPhysicalOperators.scala
@@ -261,13 +261,13 @@ case class FilterExec(condition: Expression, child: SparkPlan)
       ev
     }
 
-    // Note: wrap in "do { } while(false);", so the generated checks can jump out with "continue;"
+    // Note: wrap in "do { } while (false);", so the generated checks can jump out with "continue;"
     s"""
        |do {
        |  $predicateCode
        |  $numOutput.add(1);
        |  ${consume(ctx, resultVars)}
-       |} while(false);
+       |} while (false);
      """.stripMargin
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
index 9652a48e5270e..2074649cc9863 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnAccessor.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.columnar.compression.CompressibleColumnAccessor
 import org.apache.spark.sql.execution.vectorized.WritableColumnVector
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.CalendarInterval
+import org.apache.spark.unsafe.types.{CalendarInterval, VariantVal}
 
 /**
  * An `Iterator` like trait used to extract values from columnar byte buffer. When a value is
@@ -111,6 +111,10 @@ private[columnar] class IntervalColumnAccessor(buffer: ByteBuffer)
   extends BasicColumnAccessor[CalendarInterval](buffer, CALENDAR_INTERVAL)
   with NullableColumnAccessor
 
+private[columnar] class VariantColumnAccessor(buffer: ByteBuffer)
+  extends BasicColumnAccessor[VariantVal](buffer, VARIANT)
+  with NullableColumnAccessor
+
 private[columnar] class CompactDecimalColumnAccessor(buffer: ByteBuffer, dataType: DecimalType)
   extends NativeColumnAccessor(buffer, COMPACT_DECIMAL(dataType))
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
index 9fafdb7948416..b65ef12f12d56 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnBuilder.scala
@@ -131,6 +131,9 @@ class BinaryColumnBuilder extends ComplexColumnBuilder(new BinaryColumnStats, BI
 private[columnar]
 class IntervalColumnBuilder extends ComplexColumnBuilder(new IntervalColumnStats, CALENDAR_INTERVAL)
 
+private[columnar]
+class VariantColumnBuilder extends ComplexColumnBuilder(new VariantColumnStats, VARIANT)
+
 private[columnar] class CompactDecimalColumnBuilder(dataType: DecimalType)
   extends NativeColumnBuilder(new DecimalColumnStats(dataType), COMPACT_DECIMAL(dataType))
 
@@ -189,6 +192,7 @@ private[columnar] object ColumnBuilder {
       case s: StringType => new StringColumnBuilder(s)
       case BinaryType => new BinaryColumnBuilder
       case CalendarIntervalType => new IntervalColumnBuilder
+      case VariantType => new VariantColumnBuilder
       case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
         new CompactDecimalColumnBuilder(dt)
       case dt: DecimalType => new DecimalColumnBuilder(dt)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
index 45f489cb13c2a..4e4b3667fa24f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnStats.scala
@@ -297,6 +297,21 @@ private[columnar] final class BinaryColumnStats extends ColumnStats {
     Array[Any](null, null, nullCount, count, sizeInBytes)
 }
 
+private[columnar] final class VariantColumnStats extends ColumnStats {
+  override def gatherStats(row: InternalRow, ordinal: Int): Unit = {
+    if (!row.isNullAt(ordinal)) {
+      val size = VARIANT.actualSize(row, ordinal)
+      sizeInBytes += size
+      count += 1
+    } else {
+      gatherNullStats()
+    }
+  }
+
+  override def collectedStatistics: Array[Any] =
+    Array[Any](null, null, nullCount, count, sizeInBytes)
+}
+
 private[columnar] final class IntervalColumnStats extends ColumnStats {
   override def gatherStats(row: InternalRow, ordinal: Int): Unit = {
     if (!row.isNullAt(ordinal)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
index b8e63294f3cdc..60695a6c5d49c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/ColumnType.scala
@@ -24,11 +24,11 @@ import scala.annotation.tailrec
 
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.types.{PhysicalArrayType, PhysicalBinaryType, PhysicalBooleanType, PhysicalByteType, PhysicalCalendarIntervalType, PhysicalDataType, PhysicalDecimalType, PhysicalDoubleType, PhysicalFloatType, PhysicalIntegerType, PhysicalLongType, PhysicalMapType, PhysicalNullType, PhysicalShortType, PhysicalStringType, PhysicalStructType}
+import org.apache.spark.sql.catalyst.types._
 import org.apache.spark.sql.errors.ExecutionErrors
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
-import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String}
+import org.apache.spark.unsafe.types.{CalendarInterval, UTF8String, VariantVal}
 
 
 /**
@@ -815,6 +815,51 @@ private[columnar] object CALENDAR_INTERVAL extends ColumnType[CalendarInterval]
   }
 }
 
+/**
+ * Used to append/extract Java VariantVals into/from the underlying [[ByteBuffer]] of a column.
+ *
+ * Variants are encoded in `append` as:
+ * | value size | metadata size | value binary | metadata binary |
+ * and are only expected to be decoded in `extract`.
+ */
+private[columnar] object VARIANT
+  extends ColumnType[VariantVal] with DirectCopyColumnType[VariantVal] {
+  override def dataType: PhysicalDataType = PhysicalVariantType
+
+  /** Chosen to match the default size set in `VariantType`. */
+  override def defaultSize: Int = 2048
+
+  override def actualSize(row: InternalRow, ordinal: Int): Int = {
+    val v = getField(row, ordinal)
+    // 4 bytes each for the integers representing the 'value' and 'metadata' lengths.
+    8 + v.getValue().length + v.getMetadata().length
+  }
+
+  override def getField(row: InternalRow, ordinal: Int): VariantVal = row.getVariant(ordinal)
+
+  override def setField(row: InternalRow, ordinal: Int, value: VariantVal): Unit =
+    row.update(ordinal, value)
+
+  override def append(v: VariantVal, buffer: ByteBuffer): Unit = {
+    val valueSize = v.getValue().length
+    val metadataSize = v.getMetadata().length
+    ByteBufferHelper.putInt(buffer, valueSize)
+    ByteBufferHelper.putInt(buffer, metadataSize)
+    ByteBufferHelper.copyMemory(ByteBuffer.wrap(v.getValue()), buffer, valueSize)
+    ByteBufferHelper.copyMemory(ByteBuffer.wrap(v.getMetadata()), buffer, metadataSize)
+  }
+
+  override def extract(buffer: ByteBuffer): VariantVal = {
+    val valueSize = ByteBufferHelper.getInt(buffer)
+    val metadataSize = ByteBufferHelper.getInt(buffer)
+    val valueBuffer = ByteBuffer.allocate(valueSize)
+    ByteBufferHelper.copyMemory(buffer, valueBuffer, valueSize)
+    val metadataBuffer = ByteBuffer.allocate(metadataSize)
+    ByteBufferHelper.copyMemory(buffer, metadataBuffer, metadataSize)
+    new VariantVal(valueBuffer.array(), metadataBuffer.array())
+  }
+}
+
 private[columnar] object ColumnType {
   @tailrec
   def apply(dataType: DataType): ColumnType[_] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
index 75416b8789142..d07ebeb843bba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/GenerateColumnAccessor.scala
@@ -89,6 +89,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
         case _: StringType => classOf[StringColumnAccessor].getName
         case BinaryType => classOf[BinaryColumnAccessor].getName
         case CalendarIntervalType => classOf[IntervalColumnAccessor].getName
+        case VariantType => classOf[VariantColumnAccessor].getName
         case dt: DecimalType if dt.precision <= Decimal.MAX_LONG_DIGITS =>
           classOf[CompactDecimalColumnAccessor].getName
         case dt: DecimalType => classOf[DecimalColumnAccessor].getName
@@ -101,7 +102,7 @@ object GenerateColumnAccessor extends CodeGenerator[Seq[DataType], ColumnarItera
       val createCode = dt match {
         case t if CodeGenerator.isPrimitiveType(dt) =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
-        case NullType | BinaryType | CalendarIntervalType =>
+        case NullType | BinaryType | CalendarIntervalType | VariantType =>
           s"$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder));"
         case other =>
           s"""$accessorName = new $accessorCls(ByteBuffer.wrap(buffers[$index]).order(nativeOrder),
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
index f76b702ca6714..0f280d236203f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala
@@ -207,7 +207,8 @@ case class CachedRDDBuilder(
     serializer: CachedBatchSerializer,
     storageLevel: StorageLevel,
     @transient cachedPlan: SparkPlan,
-    tableName: Option[String]) {
+    tableName: Option[String],
+    @transient logicalPlan: LogicalPlan) {
 
   @transient @volatile private var _cachedColumnBuffers: RDD[CachedBatch] = null
   @transient @volatile private var _cachedColumnBuffersAreLoaded: Boolean = false
@@ -271,18 +272,28 @@ case class CachedRDDBuilder(
   }
 
   private def buildBuffers(): RDD[CachedBatch] = {
-    val cb = if (supportsColumnarInput) {
-      serializer.convertColumnarBatchToCachedBatch(
-        cachedPlan.executeColumnar(),
-        cachedPlan.output,
-        storageLevel,
-        cachedPlan.conf)
-    } else {
-      serializer.convertInternalRowToCachedBatch(
-        cachedPlan.execute(),
-        cachedPlan.output,
-        storageLevel,
-        cachedPlan.conf)
+    val cb = try {
+      if (supportsColumnarInput) {
+        serializer.convertColumnarBatchToCachedBatch(
+          cachedPlan.executeColumnar(),
+          cachedPlan.output,
+          storageLevel,
+          cachedPlan.conf)
+      } else {
+        serializer.convertInternalRowToCachedBatch(
+          cachedPlan.execute(),
+          cachedPlan.output,
+          storageLevel,
+          cachedPlan.conf)
+      }
+    } catch {
+      case e: Throwable if cachedPlan.isInstanceOf[AdaptiveSparkPlanExec] =>
+        // SPARK-49982: during RDD execution, AQE will execute all stages except ResultStage. If any
+        // failure happen, the failure will be cached and the next SQL cache caller will hit the
+        // negative cache. Therefore we need to recache the plan.
+        val session = cachedPlan.session
+        session.sharedState.cacheManager.recacheByPlan(session, logicalPlan)
+        throw e
     }
     val cached = cb.mapPartitionsInternal { it =>
       TaskContext.get().addTaskCompletionListener[Unit] { context =>
@@ -350,7 +361,7 @@ object InMemoryRelation {
     } else {
       qe.executedPlan
     }
-    val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, child, tableName)
+    val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, child, tableName, qe.logical)
     val relation = new InMemoryRelation(child.output, cacheBuilder, optimizedPlan.outputOrdering)
     relation.statsOfPlanToCache = optimizedPlan.stats
     relation
@@ -365,7 +376,7 @@ object InMemoryRelation {
       child: SparkPlan,
       tableName: Option[String],
       optimizedPlan: LogicalPlan): InMemoryRelation = {
-    val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, child, tableName)
+    val cacheBuilder = CachedRDDBuilder(serializer, storageLevel, child, tableName, optimizedPlan)
     val relation = new InMemoryRelation(child.output, cacheBuilder, optimizedPlan.outputOrdering)
     relation.statsOfPlanToCache = optimizedPlan.stats
     relation
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
index cfcfd282e5480..cbd60804b27e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala
@@ -99,48 +99,6 @@ case class InMemoryTableScanExec(
         relation.cacheBuilder.serializer.supportsColumnarOutput(relation.schema)
   }
 
-  private lazy val columnarInputRDD: RDD[ColumnarBatch] = {
-    val numOutputRows = longMetric("numOutputRows")
-    val buffers = filteredCachedBatches()
-    relation.cacheBuilder.serializer.convertCachedBatchToColumnarBatch(
-      buffers,
-      relation.output,
-      attributes,
-      conf).map { cb =>
-      numOutputRows += cb.numRows()
-      cb
-    }
-  }
-
-  private lazy val inputRDD: RDD[InternalRow] = {
-    if (enableAccumulatorsForTest) {
-      readPartitions.setValue(0)
-      readBatches.setValue(0)
-    }
-
-    val numOutputRows = longMetric("numOutputRows")
-    // Using these variables here to avoid serialization of entire objects (if referenced
-    // directly) within the map Partitions closure.
-    val relOutput = relation.output
-    val serializer = relation.cacheBuilder.serializer
-
-    // update SQL metrics
-    val withMetrics =
-      filteredCachedBatches().mapPartitionsInternal { iter =>
-        if (enableAccumulatorsForTest && iter.hasNext) {
-          readPartitions.add(1)
-        }
-        iter.map { batch =>
-          if (enableAccumulatorsForTest) {
-            readBatches.add(1)
-          }
-          numOutputRows += batch.numRows
-          batch
-        }
-      }
-    serializer.convertCachedBatchToInternalRow(withMetrics, relOutput, attributes, conf)
-  }
-
   override def output: Seq[Attribute] = attributes
 
   private def cachedPlan = relation.cachedPlan match {
@@ -191,11 +149,47 @@ case class InMemoryTableScanExec(
   }
 
   protected override def doExecute(): RDD[InternalRow] = {
-    inputRDD
+    // Resulting RDD is cached and reused by SparkPlan.executeRDD
+    if (enableAccumulatorsForTest) {
+      readPartitions.setValue(0)
+      readBatches.setValue(0)
+    }
+
+    val numOutputRows = longMetric("numOutputRows")
+    // Using these variables here to avoid serialization of entire objects (if referenced
+    // directly) within the map Partitions closure.
+    val relOutput = relation.output
+    val serializer = relation.cacheBuilder.serializer
+
+    // update SQL metrics
+    val withMetrics =
+      filteredCachedBatches().mapPartitionsInternal { iter =>
+        if (enableAccumulatorsForTest && iter.hasNext) {
+          readPartitions.add(1)
+        }
+        iter.map { batch =>
+          if (enableAccumulatorsForTest) {
+            readBatches.add(1)
+          }
+          numOutputRows += batch.numRows
+          batch
+        }
+      }
+    serializer.convertCachedBatchToInternalRow(withMetrics, relOutput, attributes, conf)
   }
 
   protected override def doExecuteColumnar(): RDD[ColumnarBatch] = {
-    columnarInputRDD
+    // Resulting RDD is cached and reused by SparkPlan.executeColumnarRDD
+    val numOutputRows = longMetric("numOutputRows")
+    val buffers = filteredCachedBatches()
+    relation.cacheBuilder.serializer.convertCachedBatchToColumnarBatch(
+      buffers,
+      relation.output,
+      attributes,
+      conf).map { cb =>
+      numOutputRows += cb.numRows()
+      cb
+    }
   }
 
   override def isMaterialized: Boolean = relation.cacheBuilder.isCachedColumnBuffersLoaded
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
index 65a7a0ebbd916..23555c98135f6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/AnalyzeColumnCommand.scala
@@ -140,7 +140,7 @@ case class AnalyzeColumnCommand(
     case DoubleType | FloatType => true
     case BooleanType => true
     case _: DatetimeType => true
-    case BinaryType | StringType => true
+    case BinaryType | _: StringType => true
     case _ => false
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
index 7acd1cb0852b9..48d98c14c3889 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/CommandUtils.scala
@@ -26,9 +26,9 @@ import org.apache.hadoop.fs.{FileStatus, FileSystem, Path, PathFilter}
 
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{COUNT, DATABASE_NAME, ERROR, TABLE_NAME, TIME}
-import org.apache.spark.sql.{Column, SparkSession}
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
-import org.apache.spark.sql.catalyst.analysis.{ResolvedIdentifier, UnresolvedAttribute}
+import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
 import org.apache.spark.sql.catalyst.catalog.{CatalogStatistics, CatalogTable, CatalogTablePartition, CatalogTableType, ExternalCatalogUtils}
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
@@ -38,6 +38,7 @@ import org.apache.spark.sql.catalyst.util.{ArrayData, GenericArrayData}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.{DataSourceUtils, InMemoryFileIndex}
+import org.apache.spark.sql.functions.{col, lit}
 import org.apache.spark.sql.internal.{SessionState, SQLConf}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.collection.Utils
@@ -329,7 +330,7 @@ object CommandUtils extends Logging {
     val attributePercentiles = mutable.HashMap[Attribute, ArrayData]()
     if (attrsToGenHistogram.nonEmpty) {
       val percentiles = (0 to conf.histogramNumBins)
-        .map(i => i.toDouble / conf.histogramNumBins).toArray
+        .map(i => i.toDouble / conf.histogramNumBins).toArray[Any]
 
       val namedExprs = attrsToGenHistogram.map { attr =>
         val aggFunc =
@@ -410,7 +411,7 @@ object CommandUtils extends Logging {
       case DoubleType | FloatType => fixedLenTypeStruct
       case BooleanType => fixedLenTypeStruct
       case _: DatetimeType => fixedLenTypeStruct
-      case BinaryType | StringType =>
+      case BinaryType | _: StringType =>
         // For string and binary type, we don't compute min, max or histogram
         val nullLit = Literal(null, col.dataType)
         struct(
@@ -483,17 +484,17 @@ object CommandUtils extends Logging {
       partitionValueSpec: Option[TablePartitionSpec]): Map[TablePartitionSpec, BigInt] = {
     val filter = if (partitionValueSpec.isDefined) {
       val filters = partitionValueSpec.get.map {
-        case (columnName, value) => EqualTo(UnresolvedAttribute(columnName), Literal(value))
+        case (columnName, value) => col(columnName) ===  lit(value)
       }
-      filters.reduce(And)
+      filters.reduce(_ && _)
     } else {
-      Literal.TrueLiteral
+      lit(true)
     }
 
     val tableDf = sparkSession.table(tableMeta.identifier)
-    val partitionColumns = tableMeta.partitionColumnNames.map(Column(_))
+    val partitionColumns = tableMeta.partitionColumnNames.map(col)
 
-    val df = tableDf.filter(Column(filter)).groupBy(partitionColumns: _*).count()
+    val df = tableDf.filter(filter).groupBy(partitionColumns: _*).count()
 
     df.collect().map { r =>
       val partitionColumnValues = partitionColumns.indices.map { i =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
index ea2736b2c1266..ea9d53190546e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala
@@ -25,7 +25,7 @@ import org.apache.spark.sql.{Row, SparkSession}
 import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
 import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan, SupervisingCommand}
+import org.apache.spark.sql.catalyst.plans.logical.{Command, ExecutableDuringAnalysis, LogicalPlan, SupervisingCommand}
 import org.apache.spark.sql.catalyst.trees.{LeafLike, UnaryLike}
 import org.apache.spark.sql.connector.ExternalCommandRunner
 import org.apache.spark.sql.execution.{CommandExecutionMode, ExplainMode, LeafExecNode, SparkPlan, UnaryExecNode}
@@ -165,14 +165,19 @@ case class ExplainCommand(
 
   // Run through the optimizer to generate the physical plan.
   override def run(sparkSession: SparkSession): Seq[Row] = try {
-    val outputString = sparkSession.sessionState.executePlan(logicalPlan, CommandExecutionMode.SKIP)
-      .explainString(mode)
+    val stagedLogicalPlan = stageForAnalysis(logicalPlan)
+    val qe = sparkSession.sessionState.executePlan(stagedLogicalPlan, CommandExecutionMode.SKIP)
+    val outputString = qe.explainString(mode)
     Seq(Row(outputString))
   } catch { case NonFatal(cause) =>
     ("Error occurred during query planning: \n" + cause.getMessage).split("\n")
       .map(Row(_)).toImmutableArraySeq
   }
 
+  private def stageForAnalysis(plan: LogicalPlan): LogicalPlan = plan transform {
+    case p: ExecutableDuringAnalysis => p.stageForExplain()
+  }
+
   def withTransformedSupervisedPlan(transformer: LogicalPlan => LogicalPlan): LogicalPlan =
     copy(logicalPlan = transformer(logicalPlan))
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
index 3f221bfa53051..a8a91af1bdbc4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/ddl.scala
@@ -45,7 +45,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAM
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.errors.QueryExecutionErrors.hiveTableWithAnsiIntervalsError
-import org.apache.spark.sql.execution.datasources.{DataSource, DataSourceUtils, FileFormat, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{DataSource, DataSourceUtils, FileFormat, HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
 import org.apache.spark.sql.internal.{HiveSerDe, SQLConf}
 import org.apache.spark.sql.types._
@@ -493,7 +493,7 @@ case class AlterTableSerDePropertiesCommand(
       throw QueryCompilationErrors.alterTableSetSerdeForSpecificPartitionNotSupportedError()
     }
     if (serdeClassName.isDefined && DDLUtils.isDatasourceTable(table)) {
-      throw QueryCompilationErrors.alterTableSetSerdeNotSupportedError()
+      throw QueryCompilationErrors.alterTableSetSerdeNotSupportedError(table.qualifiedName)
     }
     if (partSpec.isEmpty) {
       val newTable = table.withNewStorage(
@@ -861,7 +861,7 @@ case class RepairTableCommand(
     // Hive metastore may not have enough memory to handle millions of partitions in single RPC,
     // we should split them into smaller batches. Since Hive client is not thread safe, we cannot
     // do this in parallel.
-    val batchSize = spark.conf.get(SQLConf.ADD_PARTITION_BATCH_SIZE)
+    val batchSize = spark.sessionState.conf.getConf(SQLConf.ADD_PARTITION_BATCH_SIZE)
     partitionSpecsAndLocs.iterator.grouped(batchSize).foreach { batch =>
       val now = MILLISECONDS.toSeconds(System.currentTimeMillis())
       val parts = batch.map { case (spec, location) =>
@@ -1008,9 +1008,13 @@ object DDLUtils extends Logging {
     if (!catalog.isTempView(tableMetadata.identifier)) {
       tableMetadata.tableType match {
         case CatalogTableType.VIEW if !isView =>
-          throw QueryCompilationErrors.cannotAlterViewWithAlterTableError()
+          throw QueryCompilationErrors.cannotAlterViewWithAlterTableError(
+            viewName = tableMetadata.identifier.table
+          )
         case o if o != CatalogTableType.VIEW && isView =>
-          throw QueryCompilationErrors.cannotAlterTableWithAlterViewError()
+          throw QueryCompilationErrors.cannotAlterTableWithAlterViewError(
+            tableName = tableMetadata.identifier.table
+          )
         case _ =>
       }
     }
@@ -1071,7 +1075,7 @@ object DDLUtils extends Logging {
       outputPath: Path,
       table: Option[CatalogTable] = None) : Unit = {
     val inputPaths = query.collect {
-      case LogicalRelation(r: HadoopFsRelation, _, _, _) =>
+      case LogicalRelationWithTable(r: HadoopFsRelation, _) =>
         r.location.rootPaths
     }.flatten
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index ee0074dfe61b2..9ecd3fd19aa64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -1183,8 +1183,8 @@ case class ShowCreateTableCommand(
       } else {
         // For a Hive serde table, we try to convert it to Spark DDL.
         if (tableMetadata.unsupportedFeatures.nonEmpty) {
-          throw QueryCompilationErrors.showCreateTableFailToExecuteUnsupportedFeatureError(
-            tableMetadata)
+          throw QueryCompilationErrors.showCreateTableOrViewFailToExecuteUnsupportedFeatureError(
+            tableMetadata, tableMetadata.unsupportedFeatures)
         }
 
         if ("true".equalsIgnoreCase(tableMetadata.properties.getOrElse("transactional", "false"))) {
@@ -1237,7 +1237,8 @@ case class ShowCreateTableCommand(
       hiveSerde.outputFormat.foreach { format =>
         builder ++= s" OUTPUTFORMAT: $format"
       }
-      throw QueryCompilationErrors.showCreateTableFailToExecuteUnsupportedConfError(table, builder)
+      throw QueryCompilationErrors.showCreateTableFailToExecuteUnsupportedConfError(
+        table, builder.toString())
     } else {
       // TODO: should we keep Hive serde properties?
       val newStorage = tableMetadata.storage.copy(properties = Map.empty)
@@ -1325,9 +1326,9 @@ case class ShowCreateTableAsSerdeCommand(
   }
 
   private def showCreateHiveTable(metadata: CatalogTable): String = {
-    def reportUnsupportedError(features: Seq[String]): Unit = {
+    def reportUnsupportedError(unsupportedFeatures: Seq[String]): Unit = {
       throw QueryCompilationErrors.showCreateTableOrViewFailToExecuteUnsupportedFeatureError(
-        metadata, features)
+        metadata, unsupportedFeatures)
     }
 
     if (metadata.unsupportedFeatures.nonEmpty) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
index e1061a46db7b0..071e3826b20a0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala
@@ -135,7 +135,7 @@ case class CreateViewCommand(
         referredTempFunctions)
       catalog.createTempView(name.table, tableDefinition, overrideIfExists = replace)
     } else if (viewType == GlobalTempView) {
-      val db = sparkSession.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+      val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(name.table, Option(db))
       val aliasedPlan = aliasPlan(sparkSession, analyzedPlan)
       val tableDefinition = createTemporaryViewRelation(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
index 141767135a509..d952927f9d30a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ApplyCharTypePadding.scala
@@ -17,16 +17,13 @@
 
 package org.apache.spark.sql.execution.datasources
 
+import org.apache.spark.sql.catalyst.analysis.ApplyCharTypePaddingHelper
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.catalyst.trees.TreePattern.{BINARY_COMPARISON, IN}
 import org.apache.spark.sql.catalyst.util.CharVarcharUtils
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{CharType, Metadata, StringType}
-import org.apache.spark.unsafe.types.UTF8String
 
 /**
  * This rule performs string padding for char type.
@@ -39,14 +36,6 @@ import org.apache.spark.unsafe.types.UTF8String
  */
 object ApplyCharTypePadding extends Rule[LogicalPlan] {
 
-  object AttrOrOuterRef {
-    def unapply(e: Expression): Option[Attribute] = e match {
-      case a: Attribute => Some(a)
-      case OuterReference(a: Attribute) => Some(a)
-      case _ => None
-    }
-  }
-
   override def apply(plan: LogicalPlan): LogicalPlan = {
     if (conf.charVarcharAsString) {
       return plan
@@ -55,158 +44,22 @@ object ApplyCharTypePadding extends Rule[LogicalPlan] {
     if (conf.readSideCharPadding) {
       val newPlan = plan.resolveOperatorsUpWithNewOutput {
         case r: LogicalRelation =>
-          readSidePadding(r, () =>
+          ApplyCharTypePaddingHelper.readSidePadding(r, () =>
             r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata)))
         case r: DataSourceV2Relation =>
-          readSidePadding(r, () =>
+          ApplyCharTypePaddingHelper.readSidePadding(r, () =>
             r.copy(output = r.output.map(CharVarcharUtils.cleanAttrMetadata)))
         case r: HiveTableRelation =>
-          readSidePadding(r, () => {
+          ApplyCharTypePaddingHelper.readSidePadding(r, () => {
             val cleanedDataCols = r.dataCols.map(CharVarcharUtils.cleanAttrMetadata)
             val cleanedPartCols = r.partitionCols.map(CharVarcharUtils.cleanAttrMetadata)
             r.copy(dataCols = cleanedDataCols, partitionCols = cleanedPartCols)
           })
       }
-      paddingForStringComparison(newPlan, padCharCol = false)
+      ApplyCharTypePaddingHelper.paddingForStringComparison(newPlan, padCharCol = false)
     } else {
-      paddingForStringComparison(
+      ApplyCharTypePaddingHelper.paddingForStringComparison(
         plan, padCharCol = !conf.getConf(SQLConf.LEGACY_NO_CHAR_PADDING_IN_PREDICATE))
     }
   }
-
-  private def readSidePadding(
-      relation: LogicalPlan,
-      cleanedRelation: () => LogicalPlan)
-  : (LogicalPlan, Seq[(Attribute, Attribute)]) = {
-    val projectList = relation.output.map { attr =>
-      CharVarcharUtils.addPaddingForScan(attr) match {
-        case ne: NamedExpression => ne
-        case other => Alias(other, attr.name)(explicitMetadata = Some(attr.metadata))
-      }
-    }
-    if (projectList == relation.output) {
-      relation -> Nil
-    } else {
-      val newPlan = Project(projectList, cleanedRelation())
-      newPlan -> relation.output.zip(newPlan.output)
-    }
-  }
-
-  private def paddingForStringComparison(plan: LogicalPlan, padCharCol: Boolean): LogicalPlan = {
-    plan.resolveOperatorsUpWithPruning(_.containsAnyPattern(BINARY_COMPARISON, IN)) {
-      case operator => operator.transformExpressionsUpWithPruning(
-        _.containsAnyPattern(BINARY_COMPARISON, IN)) {
-        case e if !e.childrenResolved => e
-
-        // String literal is treated as char type when it's compared to a char type column.
-        // We should pad the shorter one to the longer length.
-        case b @ BinaryComparison(e @ AttrOrOuterRef(attr), lit) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
-            b.withNewChildren(newChildren)
-          }.getOrElse(b)
-
-        case b @ BinaryComparison(lit, e @ AttrOrOuterRef(attr)) if lit.foldable =>
-          padAttrLitCmp(e, attr.metadata, padCharCol, lit).map { newChildren =>
-            b.withNewChildren(newChildren.reverse)
-          }.getOrElse(b)
-
-        case i @ In(e @ AttrOrOuterRef(attr), list)
-          if attr.dataType == StringType && list.forall(_.foldable) =>
-          CharVarcharUtils.getRawType(attr.metadata).flatMap {
-            case CharType(length) =>
-              val (nulls, literalChars) =
-                list.map(_.eval().asInstanceOf[UTF8String]).partition(_ == null)
-              val literalCharLengths = literalChars.map(_.numChars())
-              val targetLen = (length +: literalCharLengths).max
-              Some(i.copy(
-                value = addPadding(e, length, targetLen, alwaysPad = padCharCol),
-                list = list.zip(literalCharLengths).map {
-                  case (lit, charLength) =>
-                    addPadding(lit, charLength, targetLen, alwaysPad = false)
-                } ++ nulls.map(Literal.create(_, StringType))))
-            case _ => None
-          }.getOrElse(i)
-
-        // For char type column or inner field comparison, pad the shorter one to the longer length.
-        case b @ BinaryComparison(e1 @ AttrOrOuterRef(left), e2 @ AttrOrOuterRef(right))
-          // For the same attribute, they must be the same length and no padding is needed.
-          if !left.semanticEquals(right) =>
-          val outerRefs = (e1, e2) match {
-            case (_: OuterReference, _: OuterReference) => Seq(left, right)
-            case (_: OuterReference, _) => Seq(left)
-            case (_, _: OuterReference) => Seq(right)
-            case _ => Nil
-          }
-          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
-            Seq(left, right), padCharCol)
-          if (outerRefs.nonEmpty) {
-            b.withNewChildren(newChildren.map(_.transform {
-              case a: Attribute if outerRefs.exists(_.semanticEquals(a)) => OuterReference(a)
-            }))
-          } else {
-            b.withNewChildren(newChildren)
-          }
-
-        case i @ In(e @ AttrOrOuterRef(attr), list) if list.forall(_.isInstanceOf[Attribute]) =>
-          val newChildren = CharVarcharUtils.addPaddingInStringComparison(
-            attr +: list.map(_.asInstanceOf[Attribute]), padCharCol)
-          if (e.isInstanceOf[OuterReference]) {
-            i.copy(
-              value = newChildren.head.transform {
-                case a: Attribute if a.semanticEquals(attr) => OuterReference(a)
-              },
-              list = newChildren.tail)
-          } else {
-            i.copy(value = newChildren.head, list = newChildren.tail)
-          }
-      }
-    }
-  }
-
-  private def padAttrLitCmp(
-      expr: Expression,
-      metadata: Metadata,
-      padCharCol: Boolean,
-      lit: Expression): Option[Seq[Expression]] = {
-    if (expr.dataType == StringType) {
-      CharVarcharUtils.getRawType(metadata).flatMap {
-        case CharType(length) =>
-          val str = lit.eval().asInstanceOf[UTF8String]
-          if (str == null) {
-            None
-          } else {
-            val stringLitLen = str.numChars()
-            if (length < stringLitLen) {
-              Some(Seq(StringRPad(expr, Literal(stringLitLen)), lit))
-            } else if (length > stringLitLen) {
-              val paddedExpr = if (padCharCol) {
-                StringRPad(expr, Literal(length))
-              } else {
-                expr
-              }
-              Some(Seq(paddedExpr, StringRPad(lit, Literal(length))))
-            } else if (padCharCol)  {
-              Some(Seq(StringRPad(expr, Literal(length)), lit))
-            } else {
-              None
-            }
-          }
-        case _ => None
-      }
-    } else {
-      None
-    }
-  }
-
-  private def addPadding(
-      expr: Expression,
-      charLength: Int,
-      targetLength: Int,
-      alwaysPad: Boolean): Expression = {
-    if (targetLength > charLength) {
-      StringRPad(expr, Literal(targetLength))
-    } else if (alwaysPad) {
-      StringRPad(expr, Literal(charLength))
-    } else expr
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BucketingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BucketingUtils.scala
index 4fa1e0c1f2c58..fd47feef25d57 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BucketingUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BucketingUtils.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.execution.datasources
 
 import org.apache.spark.sql.catalyst.expressions.{Attribute, SpecificInternalRow, UnsafeProjection}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
-import org.apache.spark.sql.types.{DataType, StringType}
+import org.apache.spark.sql.types.DataType
+import org.apache.spark.sql.util.SchemaUtils
 
 object BucketingUtils {
   // The file name of bucketed data should have 3 parts:
@@ -53,10 +54,7 @@ object BucketingUtils {
     bucketIdGenerator(mutableInternalRow).getInt(0)
   }
 
-  def canBucketOn(dataType: DataType): Boolean = dataType match {
-    case st: StringType => st.supportsBinaryOrdering
-    case other => true
-  }
+  def canBucketOn(dataType: DataType): Boolean = !SchemaUtils.hasNonUTF8BinaryCollation(dataType)
 
   def bucketIdToString(id: Int): String = f"_$id%05d"
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
index d88b5ee8877d7..58bbd91a8cc77 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSource.scala
@@ -267,7 +267,8 @@ case class DataSource(
             checkAndGlobPathIfNecessary(checkEmptyGlobPath = false, checkFilesExist = false)
           createInMemoryFileIndex(globbedPaths)
         })
-        val forceNullable = sparkSession.conf.get(SQLConf.FILE_SOURCE_SCHEMA_FORCE_NULLABLE)
+        val forceNullable = sparkSession.sessionState.conf
+          .getConf(SQLConf.FILE_SOURCE_SCHEMA_FORCE_NULLABLE)
         val sourceDataSchema = if (forceNullable) dataSchema.asNullable else dataSchema
         SourceInfo(
           s"FileSource[$path]",
@@ -465,7 +466,7 @@ case class DataSource(
 
     val fileIndex = catalogTable.map(_.identifier).map { tableIdent =>
       sparkSession.table(tableIdent).queryExecution.analyzed.collect {
-        case LogicalRelation(t: HadoopFsRelation, _, _, _) => t.location
+        case LogicalRelationWithTable(t: HadoopFsRelation, _) => t.location
       }.head
     }
     // For partitioned relation r, r.schema's column ordering can be different from the column
@@ -513,7 +514,8 @@ case class DataSource(
         dataSource.createRelation(
           sparkSession.sqlContext, mode, caseInsensitiveOptions, Dataset.ofRows(sparkSession, data))
       case format: FileFormat =>
-        disallowWritingIntervals(outputColumns.map(_.dataType), forbidAnsiIntervals = false)
+        disallowWritingIntervals(
+          outputColumns.toStructType.asNullable, format.toString, forbidAnsiIntervals = false)
         val cmd = planForWritingFileFormat(format, mode, data)
         val qe = sparkSession.sessionState.executePlan(cmd)
         qe.assertCommandExecuted()
@@ -538,8 +540,8 @@ case class DataSource(
         }
         SaveIntoDataSourceCommand(data, dataSource, caseInsensitiveOptions, mode)
       case format: FileFormat =>
-        disallowWritingIntervals(data.schema.map(_.dataType), forbidAnsiIntervals = false)
-        DataSource.validateSchema(data.schema, sparkSession.sessionState.conf)
+        disallowWritingIntervals(data.schema, format.toString, forbidAnsiIntervals = false)
+        DataSource.validateSchema(format.toString, data.schema, sparkSession.sessionState.conf)
         planForWritingFileFormat(format, mode, data)
       case _ => throw SparkException.internalError(
         s"${providingClass.getCanonicalName} does not allow create table as select.")
@@ -565,12 +567,15 @@ case class DataSource(
   }
 
   private def disallowWritingIntervals(
-      dataTypes: Seq[DataType],
+      outputColumns: Seq[StructField],
+      format: String,
       forbidAnsiIntervals: Boolean): Unit = {
-    dataTypes.foreach(
-      TypeUtils.invokeOnceForInterval(_, forbidAnsiIntervals) {
-      throw QueryCompilationErrors.cannotSaveIntervalIntoExternalStorageError()
-    })
+    outputColumns.foreach { field =>
+      TypeUtils.invokeOnceForInterval(field.dataType, forbidAnsiIntervals) {
+      throw QueryCompilationErrors.dataTypeUnsupportedByDataSourceError(
+        format, field
+      )}
+    }
   }
 }
 
@@ -677,11 +682,10 @@ object DataSource extends Logging {
                 throw e
               }
           }
-        case _ :: Nil if isUserDefinedDataSource =>
-          // There was DSv1 or DSv2 loaded, but the same name source was found
-          // in user defined data source.
-          throw QueryCompilationErrors.foundMultipleDataSources(provider)
         case head :: Nil =>
+          // We do not check whether the provider is a Python data source
+          // (isUserDefinedDataSource) to avoid the lookup cost. Java data sources
+          // always take precedence over Python user-defined data sources.
           head.getClass
         case sources =>
           // There are multiple registered aliases for the input. If there is single datasource
@@ -837,7 +841,7 @@ object DataSource extends Logging {
    * @param schema
    * @param conf
    */
-  def validateSchema(schema: StructType, conf: SQLConf): Unit = {
+  def validateSchema(formatName: String, schema: StructType, conf: SQLConf): Unit = {
     val shouldAllowEmptySchema = conf.getConf(SQLConf.ALLOW_EMPTY_SCHEMAS_FOR_WRITES)
     def hasEmptySchema(schema: StructType): Boolean = {
       schema.size == 0 || schema.exists {
@@ -848,7 +852,7 @@ object DataSource extends Logging {
 
 
     if (!shouldAllowEmptySchema && hasEmptySchema(schema)) {
-      throw QueryCompilationErrors.writeEmptySchemasUnsupportedByDataSourceError()
+      throw QueryCompilationErrors.writeEmptySchemasUnsupportedByDataSourceError(formatName)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
index 93fc6cf367cfc..711e096ebd1f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceManager.scala
@@ -101,6 +101,7 @@ object DataSourceManager extends Logging {
 
   private def initialStaticDataSourceBuilders: Map[String, UserDefinedPythonDataSource] = {
     if (shouldLoadPythonDataSources) this.synchronized {
+      logInfo("Loading static Python Data Sources.")
       if (dataSourceBuilders.isEmpty) {
         val maybeResult = try {
           Some(UserDefinedPythonDataSource.lookupAllDataSourcesInPython())
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
index 5d2310c130703..95746218e8792 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
@@ -28,28 +28,27 @@ import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.PREDICATES
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
+import org.apache.spark.sql.catalyst.{expressions, CatalystTypeConverters, InternalRow, QualifiedTableName, SQLConfHelper}
 import org.apache.spark.sql.catalyst.CatalystTypeConverters.convertToScala
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{AppendData, InsertIntoDir, InsertIntoStatement, LogicalPlan, Project}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
-import org.apache.spark.sql.catalyst.util.{GeneratedColumn, ResolveDefaultColumns, V2ExpressionBuilder}
-import org.apache.spark.sql.connector.catalog.SupportsRead
+import org.apache.spark.sql.catalyst.util.{GeneratedColumn, IdentityColumn, ResolveDefaultColumns, V2ExpressionBuilder}
+import org.apache.spark.sql.connector.catalog.{SupportsRead, V1Table}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.{Expression => V2Expression, NullOrdering, SortDirection, SortOrder => V2SortOrder, SortValue}
 import org.apache.spark.sql.connector.expressions.aggregate.{AggregateFunc, Aggregation}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command._
-import org.apache.spark.sql.execution.datasources.v2.PushedDownOperators
+import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, PushedDownOperators}
 import org.apache.spark.sql.execution.streaming.StreamingRelation
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -147,6 +146,11 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
           tableDesc.identifier, "generated columns")
       }
 
+      if (IdentityColumn.hasIdentityColumns(newSchema)) {
+        throw QueryCompilationErrors.unsupportedTableOperationError(
+          tableDesc.identifier, "identity columns")
+      }
+
       val newTableDesc = tableDesc.copy(schema = newSchema)
       CreateDataSourceTableCommand(newTableDesc, ignoreIfExists = mode == SaveMode.Ignore)
 
@@ -154,7 +158,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
         if query.resolved && DDLUtils.isDatasourceTable(tableDesc) =>
       CreateDataSourceTableAsSelectCommand(tableDesc, mode, query, query.output.map(_.name))
 
-    case InsertIntoStatement(l @ LogicalRelation(_: InsertableRelation, _, _, _),
+    case InsertIntoStatement(l @ LogicalRelationWithTable(_: InsertableRelation, _),
         parts, _, query, overwrite, false, _) if parts.isEmpty =>
       InsertIntoDataSourceCommand(l, query, overwrite)
 
@@ -167,7 +171,7 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
       InsertIntoDataSourceDirCommand(storage, provider.get, query, overwrite)
 
     case i @ InsertIntoStatement(
-        l @ LogicalRelation(t: HadoopFsRelation, _, table, _), parts, _, query, overwrite, _, _)
+        l @ LogicalRelationWithTable(t: HadoopFsRelation, table), parts, _, query, overwrite, _, _)
         if query.resolved =>
       // If the InsertIntoTable command is for a partitioned HadoopFsRelation and
       // the user has specified static partitions, we add a Project operator on top of the query
@@ -244,7 +248,8 @@ object DataSourceAnalysis extends Rule[LogicalPlan] {
 class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan] {
   private def readDataSourceTable(
       table: CatalogTable, extraOptions: CaseInsensitiveStringMap): LogicalPlan = {
-    val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
+    val qualifiedTableName =
+      QualifiedTableName(table.identifier.catalog.get, table.database, table.identifier.table)
     val catalog = sparkSession.sessionState.catalog
     val dsOptions = DataSourceUtils.generateDatasourceOptions(extraOptions, table)
     catalog.getCachedPlan(qualifiedTableName, () => {
@@ -286,15 +291,15 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
         _, _, _, _, _, _) =>
       i.copy(table = DDLUtils.readHiveTable(tableMeta))
 
-    case UnresolvedCatalogRelation(tableMeta, options, false)
-        if DDLUtils.isDatasourceTable(tableMeta) =>
-      readDataSourceTable(tableMeta, options)
+    case append @ AppendData(
+        DataSourceV2Relation(
+          V1Table(table: CatalogTable), _, _, _, _), _, _, _, _, _) if !append.isByName =>
+      InsertIntoStatement(UnresolvedCatalogRelation(table),
+        table.partitionColumnNames.map(name => name -> None).toMap,
+        Seq.empty, append.query, false, append.isByName)
 
-    case UnresolvedCatalogRelation(tableMeta, _, false) =>
-      DDLUtils.readHiveTable(tableMeta)
-
-    case UnresolvedCatalogRelation(tableMeta, extraOptions, true) =>
-      getStreamingRelation(tableMeta, extraOptions)
+    case unresolvedCatalogRelation: UnresolvedCatalogRelation =>
+      resolveUnresolvedCatalogRelation(unresolvedCatalogRelation)
 
     case s @ StreamingRelationV2(
         _, _, table, extraOptions, _, _, _, Some(UnresolvedCatalogRelation(tableMeta, _, true))) =>
@@ -308,6 +313,21 @@ class FindDataSourceTable(sparkSession: SparkSession) extends Rule[LogicalPlan]
         v1Relation
       }
   }
+
+  def resolveUnresolvedCatalogRelation(
+      unresolvedCatalogRelation: UnresolvedCatalogRelation): LogicalPlan = {
+    unresolvedCatalogRelation match {
+      case UnresolvedCatalogRelation(tableMeta, options, false)
+          if DDLUtils.isDatasourceTable(tableMeta) =>
+        readDataSourceTable(tableMeta, options)
+
+      case UnresolvedCatalogRelation(tableMeta, _, false) =>
+        DDLUtils.readHiveTable(tableMeta)
+
+      case UnresolvedCatalogRelation(tableMeta, extraOptions, true) =>
+        getStreamingRelation(tableMeta, extraOptions)
+    }
+  }
 }
 
 
@@ -318,7 +338,7 @@ object DataSourceStrategy
   extends Strategy with Logging with CastSupport with PredicateHelper with SQLConfHelper {
 
   def apply(plan: LogicalPlan): Seq[execution.SparkPlan] = plan match {
-    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: CatalystScan, _, _, _)) =>
+    case PhysicalOperation(projects, filters, l @ LogicalRelationWithTable(t: CatalystScan, _)) =>
       pruneFilterProjectRaw(
         l,
         projects,
@@ -327,21 +347,21 @@ object DataSourceStrategy
           toCatalystRDD(l, requestedColumns, t.buildScan(requestedColumns, allPredicates))) :: Nil
 
     case PhysicalOperation(projects, filters,
-                           l @ LogicalRelation(t: PrunedFilteredScan, _, _, _)) =>
+                           l @ LogicalRelationWithTable(t: PrunedFilteredScan, _)) =>
       pruneFilterProject(
         l,
         projects,
         filters,
         (a, f) => toCatalystRDD(l, a, t.buildScan(a.map(_.name).toArray, f))) :: Nil
 
-    case PhysicalOperation(projects, filters, l @ LogicalRelation(t: PrunedScan, _, _, _)) =>
+    case PhysicalOperation(projects, filters, l @ LogicalRelationWithTable(t: PrunedScan, _)) =>
       pruneFilterProject(
         l,
         projects,
         filters,
         (a, _) => toCatalystRDD(l, a, t.buildScan(a.map(_.name).toArray))) :: Nil
 
-    case l @ LogicalRelation(baseRelation: TableScan, _, _, _) =>
+    case l @ LogicalRelationWithTable(baseRelation: TableScan, _) =>
       RowDataSourceScanExec(
         l.output,
         l.output.toStructType,
@@ -350,6 +370,7 @@ object DataSourceStrategy
         PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         toCatalystRDD(l, baseRelation.buildScan()),
         baseRelation,
+        l.stream,
         None) :: Nil
 
     case _ => Nil
@@ -424,6 +445,7 @@ object DataSourceStrategy
         PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
         relation.relation,
+        relation.stream,
         relation.catalogTable.map(_.identifier))
       filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan)
     } else {
@@ -447,6 +469,7 @@ object DataSourceStrategy
         PushedDownOperators(None, None, None, None, Seq.empty, Seq.empty),
         scanBuilder(requestedColumns, candidatePredicates, pushedFilters),
         relation.relation,
+        relation.stream,
         relation.catalogTable.map(_.identifier))
       execution.ProjectExec(
         projects, filterCondition.map(execution.FilterExec(_, scan)).getOrElse(scan))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
index c80dc83079675..81eadcc263c61 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceUtils.scala
@@ -52,6 +52,11 @@ object DataSourceUtils extends PredicateHelper {
    */
   val PARTITION_OVERWRITE_MODE = "partitionOverwriteMode"
 
+  /**
+   * The key to use for storing clusterBy columns as options.
+   */
+  val CLUSTERING_COLUMNS_KEY = "__clustering_columns"
+
   /**
    * Utility methods for converting partitionBy columns to options and back.
    */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
index 36c59950fe209..7788f3287ac4b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormat.scala
@@ -188,6 +188,11 @@ trait FileFormat {
    */
   def supportFieldName(name: String): Boolean = true
 
+  /**
+   * Returns whether this format allows duplicated column names in the input query during writing.
+   */
+  def allowDuplicatedColumnNames: Boolean = false
+
   /**
    * All fields the file format's _metadata struct defines.
    *
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
index 91749ddd794fb..5e6107c4f49c7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileFormatWriter.scala
@@ -383,32 +383,41 @@ object FileFormatWriter extends Logging {
 
     committer.setupTask(taskAttemptContext)
 
-    val dataWriter =
-      if (sparkPartitionId != 0 && !iterator.hasNext) {
-        // In case of empty job, leave first partition to save meta for file format like parquet.
-        new EmptyDirectoryDataWriter(description, taskAttemptContext, committer)
-      } else if (description.partitionColumns.isEmpty && description.bucketSpec.isEmpty) {
-        new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
-      } else {
-        concurrentOutputWriterSpec match {
-          case Some(spec) =>
-            new DynamicPartitionDataConcurrentWriter(
-              description, taskAttemptContext, committer, spec)
-          case _ =>
-            new DynamicPartitionDataSingleWriter(description, taskAttemptContext, committer)
-        }
-      }
+    var dataWriter: FileFormatDataWriter = null
 
     Utils.tryWithSafeFinallyAndFailureCallbacks(block = {
+      dataWriter =
+        if (sparkPartitionId != 0 && !iterator.hasNext) {
+          // In case of empty job, leave first partition to save meta for file format like parquet.
+          new EmptyDirectoryDataWriter(description, taskAttemptContext, committer)
+        } else if (description.partitionColumns.isEmpty && description.bucketSpec.isEmpty) {
+          new SingleDirectoryDataWriter(description, taskAttemptContext, committer)
+        } else {
+          concurrentOutputWriterSpec match {
+            case Some(spec) =>
+              new DynamicPartitionDataConcurrentWriter(
+                description, taskAttemptContext, committer, spec)
+            case _ =>
+              new DynamicPartitionDataSingleWriter(description, taskAttemptContext, committer)
+          }
+        }
+
       // Execute the task to write rows out and commit the task.
       dataWriter.writeWithIterator(iterator)
       dataWriter.commit()
     })(catchBlock = {
       // If there is an error, abort the task
-      dataWriter.abort()
-      logError(log"Job ${MDC(JOB_ID, jobId)} aborted.")
+      if (dataWriter != null) {
+        dataWriter.abort()
+      } else {
+        committer.abortTask(taskAttemptContext)
+      }
+      logError(log"Job: ${MDC(JOB_ID, jobId)}, Task: ${MDC(TASK_ID, taskId)}, " +
+        log"Task attempt ${MDC(TASK_ATTEMPT_ID, taskAttemptId)} aborted.")
     }, finallyBlock = {
-      dataWriter.close()
+      if (dataWriter != null) {
+        dataWriter.close()
+      }
     })
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
index 27019ab047ff2..02235ffb19761 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategy.scala
@@ -151,7 +151,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
 
   def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
     case ScanOperation(projects, stayUpFilters, filters,
-      l @ LogicalRelation(fsRelation: HadoopFsRelation, _, table, _)) =>
+      l @ LogicalRelationWithTable(fsRelation: HadoopFsRelation, table)) =>
       // Filters on this relation fall into four categories based on where we can use them to avoid
       // reading unneeded data:
       //  - partition keys only - used to prune directories to read
@@ -321,6 +321,7 @@ object FileSourceStrategy extends Strategy with PredicateHelper with Logging {
       val scan =
         FileSourceScanExec(
           fsRelation,
+          l.stream,
           outputAttributes,
           outputDataSchema,
           partitionKeyFilters.toSeq,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
index 5ec17290c37d9..bf6da8765e516 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileLinesReader.scala
@@ -25,6 +25,8 @@ import org.apache.hadoop.mapreduce._
 import org.apache.hadoop.mapreduce.lib.input.{FileSplit, LineRecordReader}
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
+import org.apache.spark.util.Utils
+
 /**
  * An adaptor from a [[PartitionedFile]] to an [[Iterator]] of [[Text]], which are all of the lines
  * in that file.
@@ -54,14 +56,16 @@ class HadoopFileLinesReader(
     val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
     val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
 
-    val reader = lineSeparator match {
-      case Some(sep) => new LineRecordReader(sep)
-      // If the line separator is `None`, it covers `\r`, `\r\n` and `\n`.
-      case _ => new LineRecordReader()
+    Utils.tryInitializeResource(
+      lineSeparator match {
+        case Some(sep) => new LineRecordReader(sep)
+        // If the line separator is `None`, it covers `\r`, `\r\n` and `\n`.
+        case _ => new LineRecordReader()
+      }
+    ) { reader =>
+      reader.initialize(fileSplit, hadoopAttemptContext)
+      new RecordReaderIterator(reader)
     }
-
-    reader.initialize(fileSplit, hadoopAttemptContext)
-    new RecordReaderIterator(reader)
   }
 
   override def hasNext: Boolean = _iterator.hasNext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
index 17649f62d84a8..f49c66f9198f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/HadoopFileWholeTextReader.scala
@@ -26,6 +26,7 @@ import org.apache.hadoop.mapreduce.lib.input.CombineFileSplit
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl
 
 import org.apache.spark.input.WholeTextFileRecordReader
+import org.apache.spark.util.Utils
 
 /**
  * An adaptor from a [[PartitionedFile]] to an [[Iterator]] of [[Text]], which is all of the lines
@@ -42,10 +43,13 @@ class HadoopFileWholeTextReader(file: PartitionedFile, conf: Configuration)
       Array.empty[String])
     val attemptId = new TaskAttemptID(new TaskID(new JobID(), TaskType.MAP, 0), 0)
     val hadoopAttemptContext = new TaskAttemptContextImpl(conf, attemptId)
-    val reader = new WholeTextFileRecordReader(fileSplit, hadoopAttemptContext, 0)
-    reader.setConf(hadoopAttemptContext.getConfiguration)
-    reader.initialize(fileSplit, hadoopAttemptContext)
-    new RecordReaderIterator(reader)
+    Utils.tryInitializeResource(
+      new WholeTextFileRecordReader(fileSplit, hadoopAttemptContext, 0)
+    ) { reader =>
+      reader.setConf(hadoopAttemptContext.getConfiguration)
+      reader.initialize(fileSplit, hadoopAttemptContext)
+      new RecordReaderIterator(reader)
+    }
   }
 
   override def hasNext: Boolean = _iterator.hasNext
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
index fe6ec094812e8..aed129c7dccc4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InsertIntoHadoopFsRelationCommand.scala
@@ -79,10 +79,11 @@ case class InsertIntoHadoopFsRelationCommand(
       staticPartitions.size)
 
   override def run(sparkSession: SparkSession, child: SparkPlan): Seq[Row] = {
-    // Most formats don't do well with duplicate columns, so lets not allow that
-    SchemaUtils.checkColumnNameDuplication(
-      outputColumnNames,
-      sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    if (!fileFormat.allowDuplicatedColumnNames) {
+      SchemaUtils.checkColumnNameDuplication(
+        outputColumnNames,
+        sparkSession.sessionState.conf.caseSensitiveAnalysis)
+    }
 
     val hadoopConf = sparkSession.sessionState.newHadoopConfWithOptions(options)
     val fs = outputPath.getFileSystem(hadoopConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
index 3c579034fcaea..725b4a2332576 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/LogicalRelation.scala
@@ -20,20 +20,32 @@ import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.{AttributeMap, AttributeReference}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
-import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics}
+import org.apache.spark.sql.catalyst.plans.logical.{ExposesMetadataColumns, LeafNode, LogicalPlan, Statistics, StreamSourceAwareLogicalPlan}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.{truncatedString, CharVarcharUtils}
+import org.apache.spark.sql.connector.read.streaming.SparkDataStream
 import org.apache.spark.sql.sources.BaseRelation
 
 /**
  * Used to link a [[BaseRelation]] in to a logical query plan.
+ *
+ * This class is widely used for pattern matching. We encourage developers to avoid using the
+ * default pattern if possible (except all parameters are needed). We provide convenient pattern
+ * objects to help avoiding the default pattern.
+ *
+ * Here's the list of pattern objects:
+ * - [[LogicalRelationWithTable]]
  */
 case class LogicalRelation(
     relation: BaseRelation,
     output: Seq[AttributeReference],
     catalogTable: Option[CatalogTable],
-    override val isStreaming: Boolean)
-  extends LeafNode with MultiInstanceRelation with ExposesMetadataColumns {
+    override val isStreaming: Boolean,
+    @transient stream: Option[SparkDataStream])
+  extends LeafNode
+  with StreamSourceAwareLogicalPlan
+  with MultiInstanceRelation
+  with ExposesMetadataColumns {
 
   // Only care about relation when canonicalizing.
   override def doCanonicalize(): LogicalPlan = copy(
@@ -85,6 +97,10 @@ case class LogicalRelation(
       this
     }
   }
+
+  override def withStream(stream: SparkDataStream): LogicalRelation = copy(stream = Some(stream))
+
+  override def getStream: Option[SparkDataStream] = stream
 }
 
 object LogicalRelation {
@@ -92,13 +108,25 @@ object LogicalRelation {
     // The v1 source may return schema containing char/varchar type. We replace char/varchar
     // with "annotated" string type here as the query engine doesn't support char/varchar yet.
     val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema)
-    LogicalRelation(relation, toAttributes(schema), None, isStreaming)
+    LogicalRelation(relation, toAttributes(schema), None, isStreaming, None)
   }
 
   def apply(relation: BaseRelation, table: CatalogTable): LogicalRelation = {
     // The v1 source may return schema containing char/varchar type. We replace char/varchar
     // with "annotated" string type here as the query engine doesn't support char/varchar yet.
     val schema = CharVarcharUtils.replaceCharVarcharWithStringInSchema(relation.schema)
-    LogicalRelation(relation, toAttributes(schema), Some(table), false)
+    LogicalRelation(relation, toAttributes(schema), Some(table), false, None)
+  }
+}
+
+/**
+ * Extract the [[BaseRelation]] and [[CatalogTable]] from [[LogicalRelation]]. You can also
+ * retrieve the instance of LogicalRelation like following:
+ *
+ * case l @ LogicalRelationWithTable(relation, catalogTable) => ...
+ */
+object LogicalRelationWithTable {
+  def unapply(plan: LogicalRelation): Option[(BaseRelation, Option[CatalogTable])] = {
+    Some(plan.relation, plan.catalogTable)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
index 3b2d601b81fb5..402b70065d8e6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PartitioningUtils.scala
@@ -29,6 +29,7 @@ import scala.util.control.NonFatal
 
 import org.apache.hadoop.fs.Path
 
+import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
@@ -172,14 +173,9 @@ object PartitioningUtils extends SQLConfHelper {
       //   "hdfs://host:9000/path"
       // TODO: Selective case sensitivity.
       val discoveredBasePaths = optDiscoveredBasePaths.flatten.map(_.toString.toLowerCase())
-      assert(
-        ignoreInvalidPartitionPaths || discoveredBasePaths.distinct.size == 1,
-        "Conflicting directory structures detected. Suspicious paths:\b" +
-          discoveredBasePaths.distinct.mkString("\n\t", "\n\t", "\n\n") +
-          "If provided paths are partition directories, please set " +
-          "\"basePath\" in the options of the data source to specify the " +
-          "root directory of the table. If there are multiple root directories, " +
-          "please load them separately and then union them.")
+      if (!ignoreInvalidPartitionPaths && discoveredBasePaths.distinct.size != 1) {
+        throw QueryExecutionErrors.conflictingDirectoryStructuresError(discoveredBasePaths)
+      }
 
       val resolvedPartitionValues = resolvePartitions(pathsWithPartitionValues, caseSensitive)
 
@@ -386,9 +382,9 @@ object PartitioningUtils extends SQLConfHelper {
       } else {
         pathsWithPartitionValues.map(_._2.columnNames.map(_.toLowerCase()))
       }
-      assert(
-        partColNames.distinct.size == 1,
-        listConflictingPartitionColumns(pathsWithPartitionValues))
+      if (partColNames.distinct.size != 1) {
+        throw conflictingPartitionColumnsError(pathsWithPartitionValues)
+      }
 
       // Resolves possible type conflicts for each column
       val values = pathsWithPartitionValues.map(_._2)
@@ -404,8 +400,8 @@ object PartitioningUtils extends SQLConfHelper {
     }
   }
 
-  private[datasources] def listConflictingPartitionColumns(
-      pathWithPartitionValues: Seq[(Path, PartitionValues)]): String = {
+  private[datasources] def conflictingPartitionColumnsError(
+      pathWithPartitionValues: Seq[(Path, PartitionValues)]): SparkRuntimeException = {
     val distinctPartColNames = pathWithPartitionValues.map(_._2.columnNames).distinct
 
     def groupByKey[K, V](seq: Seq[(K, V)]): Map[K, Iterable[V]] =
@@ -423,13 +419,8 @@ object PartitioningUtils extends SQLConfHelper {
     // Lists out those non-leaf partition directories that also contain files
     val suspiciousPaths = distinctPartColNames.sortBy(_.length).flatMap(partColNamesToPaths)
 
-    s"Conflicting partition column names detected:\n" +
-      distinctPartColLists.mkString("\n\t", "\n\t", "\n\n") +
-      "For partitioned table directories, data files should only live in leaf directories.\n" +
-      "And directories at the same level should have the same partition column name.\n" +
-      "Please check the following directories for unexpected files or " +
-      "inconsistent partition column names:\n" +
-      suspiciousPaths.map("\t" + _).mkString("\n", "\n", "")
+    QueryExecutionErrors.conflictingPartitionColumnNamesError(
+      distinctPartColLists, suspiciousPaths)
   }
 
   // scalastyle:off line.size.limit
@@ -554,7 +545,7 @@ object PartitioningUtils extends SQLConfHelper {
       Cast(Literal(unescapePathName(value)), it).eval()
     case BinaryType => value.getBytes()
     case BooleanType => value.toBoolean
-    case dt => throw QueryExecutionErrors.typeUnsupportedError(dt)
+    case dt => throw SparkException.internalError(s"Unsupported partition type: $dt")
   }
 
   def validatePartitionColumn(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
index d5923a577daac..6af6dc721a6f8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/PruneFileSourcePartitions.scala
@@ -49,18 +49,17 @@ private[sql] object PruneFileSourcePartitions extends Rule[LogicalPlan] {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case op @ PhysicalOperation(projects, filters,
-        logicalRelation @
-          LogicalRelation(fsRelation @
+        logicalRelation @ LogicalRelationWithTable(
+          fsRelation @
             HadoopFsRelation(
-              catalogFileIndex: CatalogFileIndex,
-              partitionSchema,
-              _,
-              _,
-              _,
-              _),
+            catalogFileIndex: CatalogFileIndex,
+            partitionSchema,
             _,
             _,
-            _))
+            _,
+            _),
+          _)
+        )
         if filters.nonEmpty && fsRelation.partitionSchema.nonEmpty =>
       val normalizedFilters = DataSourceStrategy.normalizeExprs(
         filters.filter { f =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
index 5423232db4293..51fed315439ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommand.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql.execution.datasources
 
 import scala.util.control.NonFatal
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.{Dataset, Row, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.plans.QueryPlan
 import org.apache.spark.sql.catalyst.plans.logical.{CTEInChildren, CTERelationDef, LogicalPlan, WithCTE}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.LeafRunnableCommand
-import org.apache.spark.sql.sources.CreatableRelationProvider
+import org.apache.spark.sql.sources.{BaseRelation, CreatableRelationProvider}
 
 /**
  * Saves the results of `query` in to a data source.
@@ -44,11 +46,30 @@ case class SaveIntoDataSourceCommand(
   override def innerChildren: Seq[QueryPlan[_]] = Seq(query)
 
   override def run(sparkSession: SparkSession): Seq[Row] = {
-    val relation = dataSource.createRelation(
-      sparkSession.sqlContext, mode, options, Dataset.ofRows(sparkSession, query))
+    var relation: BaseRelation = null
 
     try {
-      val logicalRelation = LogicalRelation(relation, toAttributes(relation.schema), None, false)
+      relation = dataSource.createRelation(
+        sparkSession.sqlContext, mode, options, Dataset.ofRows(sparkSession, query))
+    } catch {
+      case e: SparkThrowable =>
+        // We should avoid wrapping `SparkThrowable` exceptions into another `AnalysisException`.
+        throw e
+      case e @ (_: NullPointerException | _: MatchError | _: ArrayIndexOutOfBoundsException) =>
+        // These are some of the exceptions thrown by the data source API. We catch these
+        // exceptions here and rethrow QueryCompilationErrors.externalDataSourceException to
+        // provide a more friendly error message for the user. This list is not exhaustive.
+        throw QueryCompilationErrors.externalDataSourceException(e)
+      case e: Throwable =>
+        // For other exceptions, just rethrow it, since we don't have enough information to
+        // provide a better error message for the user at the moment. We may want to further
+        // improve the error message handling in the future.
+        throw e
+    }
+
+    try {
+      val logicalRelation = LogicalRelation(relation, toAttributes(relation.schema), None,
+        false, None)
       sparkSession.sharedState.cacheManager.recacheByPlan(sparkSession, logicalRelation)
     } catch {
       case NonFatal(_) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
index 28f05ca7f8dc3..1b23fd1a5e829 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/SchemaPruning.scala
@@ -41,7 +41,7 @@ object SchemaPruning extends Rule[LogicalPlan] {
   override def apply(plan: LogicalPlan): LogicalPlan =
     plan transformDown {
       case op @ ScanOperation(projects, filtersStayUp, filtersPushDown,
-      l @ LogicalRelation(hadoopFsRelation: HadoopFsRelation, _, _, _)) =>
+        l @ LogicalRelationWithTable(hadoopFsRelation: HadoopFsRelation, _)) =>
         val allFilters = filtersPushDown.reduceOption(And).toSeq ++ filtersStayUp
         prunePhysicalColumns(l, projects, allFilters, hadoopFsRelation,
           (prunedDataSchema, prunedMetadataSchema) => {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
index cbff526592f92..54c100282e2db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormat.scala
@@ -98,7 +98,7 @@ class BinaryFileFormat extends FileFormat with DataSourceRegister {
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
     val filterFuncs = filters.flatMap(filter => createFilterFunction(filter))
-    val maxLength = sparkSession.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH)
+    val maxLength = sparkSession.sessionState.conf.getConf(SOURCES_BINARY_FILE_MAX_LENGTH)
 
     file: PartitionedFile => {
       val path = file.toPath
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
index 931a3610507f8..8ef85ee91aa8f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVFileFormat.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.csv.{CSVHeaderChecker, CSVOptions, UnivocityParser}
 import org.apache.spark.sql.catalyst.expressions.ExprUtils
 import org.apache.spark.sql.catalyst.util.CompressionCodecs
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types._
@@ -109,6 +110,12 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
 
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
+
+    if (requiredSchema.length == 1 &&
+      requiredSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
+      throw QueryCompilationErrors.queryFromRawFilesIncludeCorruptRecordColumnError()
+    }
+
     // Don't push any filter which refers to the "virtual" column which cannot present in the input.
     // Such filters will be applied later on the upper layer.
     val actualFilters =
@@ -157,4 +164,5 @@ class CSVFileFormat extends TextBasedFileFormat with DataSourceRegister {
     case _ => false
   }
 
+  override def allowDuplicatedColumnNames: Boolean = true
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
index d8b52c503ad34..1a48b81fd7e64 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/csv/CSVUtils.scala
@@ -32,7 +32,7 @@ object CSVUtils {
     // Note that this was separately made by SPARK-18362. Logically, this should be the same
     // with the one below, `filterCommentAndEmpty` but execution path is different. One of them
     // might have to be removed in the near future if possible.
-    import lines.sqlContext.implicits._
+    import lines.sparkSession.implicits._
     val aliased = lines.toDF("value")
     val nonEmptyLines = aliased.filter(length(trim($"value")) > 0)
     if (options.isCommentSet) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
index fc6cba786c4ed..eb9d5813cff7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/ddl.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, V1CreateTablePlan}
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.{DDLUtils, LeafRunnableCommand}
@@ -43,7 +43,7 @@ import org.apache.spark.sql.types._
 case class CreateTable(
     tableDesc: CatalogTable,
     mode: SaveMode,
-    query: Option[LogicalPlan]) extends LogicalPlan {
+    query: Option[LogicalPlan]) extends LogicalPlan with V1CreateTablePlan {
   assert(tableDesc.provider.isDefined, "The table to be created must have a provider.")
 
   if (query.isEmpty) {
@@ -115,7 +115,7 @@ case class CreateTempViewUsing(
       }.logicalPlan
 
     if (global) {
-      val db = sparkSession.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+      val db = sparkSession.sessionState.conf.getConf(StaticSQLConf.GLOBAL_TEMP_DATABASE)
       val viewIdent = TableIdentifier(tableIdent.table, Option(db))
       val viewDefinition = createTemporaryViewRelation(
         viewIdent,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
index f7d2d61eab653..6a183635c865c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala
@@ -35,7 +35,7 @@ import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{DEFAULT_ISOLATION_LEVEL, ISOLATION_LEVEL}
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.{InternalRow, SQLConfHelper}
-import org.apache.spark.sql.catalyst.analysis.{DecimalPrecision, Resolver}
+import org.apache.spark.sql.catalyst.analysis.{DecimalPrecisionTypeCoercion, Resolver}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.SpecificInternalRow
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -127,7 +127,8 @@ object JdbcUtils extends Logging with SQLConfHelper {
       // RDD column names for user convenience.
       rddSchema.fields.map { col =>
         tableSchema.get.find(f => conf.resolver(f.name, col.name)).getOrElse {
-          throw QueryCompilationErrors.columnNotFoundInSchemaError(col, tableSchema)
+          throw QueryCompilationErrors.columnNotFoundInSchemaError(
+            col.dataType, col.name, table, rddSchema.fieldNames)
         }
       }
     }
@@ -202,7 +203,7 @@ object JdbcUtils extends Logging with SQLConfHelper {
     case java.sql.Types.DECIMAL | java.sql.Types.NUMERIC if scale < 0 =>
       DecimalType.bounded(precision - scale, 0)
     case java.sql.Types.DECIMAL | java.sql.Types.NUMERIC =>
-      DecimalPrecision.bounded(precision, scale)
+      DecimalPrecisionTypeCoercion.bounded(precision, scale)
     case java.sql.Types.DOUBLE => DoubleType
     case java.sql.Types.FLOAT => FloatType
     case java.sql.Types.INTEGER => if (signed) IntegerType else LongType
@@ -585,14 +586,26 @@ object JdbcUtils extends Logging with SQLConfHelper {
             arr => new GenericArrayData(elementConversion(et0)(arr))
           }
 
+        case IntegerType => arrayConverter[Int]((i: Int) => i)
+        case FloatType => arrayConverter[Float]((f: Float) => f)
+        case DoubleType => arrayConverter[Double]((d: Double) => d)
+        case ShortType => arrayConverter[Short]((s: Short) => s)
+        case BooleanType => arrayConverter[Boolean]((b: Boolean) => b)
+        case LongType => arrayConverter[Long]((l: Long) => l)
+
         case _ => (array: Object) => array.asInstanceOf[Array[Any]]
       }
 
       (rs: ResultSet, row: InternalRow, pos: Int) =>
-        val array = nullSafeConvert[java.sql.Array](
-          input = rs.getArray(pos + 1),
-          array => new GenericArrayData(elementConversion(et)(array.getArray)))
-        row.update(pos, array)
+        try {
+          val array = nullSafeConvert[java.sql.Array](
+            input = rs.getArray(pos + 1),
+            array => new GenericArrayData(elementConversion(et)(array.getArray())))
+          row.update(pos, array)
+        } catch {
+          case e: java.lang.ClassCastException =>
+            throw QueryExecutionErrors.wrongDatatypeInSomeRows(pos, dt)
+        }
 
     case NullType =>
       (_: ResultSet, row: InternalRow, pos: Int) => row.update(pos, null)
@@ -1262,13 +1275,14 @@ object JdbcUtils extends Logging with SQLConfHelper {
       errorClass: String,
       messageParameters: Map[String, String],
       dialect: JdbcDialect,
-      description: String)(f: => T): T = {
+      description: String,
+      isRuntime: Boolean)(f: => T): T = {
     try {
       f
     } catch {
       case e: SparkThrowable with Throwable => throw e
       case e: Throwable =>
-        throw dialect.classifyException(e, errorClass, messageParameters, description)
+        throw dialect.classifyException(e, errorClass, messageParameters, description, isRuntime)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
index 7c98c31bba220..cb4c4f5290880 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/json/JsonDataSource.scala
@@ -164,7 +164,8 @@ object MultiLineJsonDataSource extends JsonDataSource {
       .getOrElse(createParser(_: JsonFactory, _: PortableDataStream))
 
     SQLExecution.withSQLConfPropagated(sparkSession) {
-      new JsonInferSchema(parsedOptions).infer[PortableDataStream](sampled, parser)
+      new JsonInferSchema(parsedOptions)
+        .infer[PortableDataStream](sampled, parser, isReadFile = true)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
index 8c0b9aa0a4270..3dc9ddf386f10 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilters.scala
@@ -806,8 +806,8 @@ class ParquetFilters(
               }
 
               override def keep(value: Binary): Boolean = {
-                value != null && UTF8String.fromBytes(value.getBytes).startsWith(
-                  UTF8String.fromBytes(strToBinary.getBytes))
+                value != null && UTF8String.fromBytes(value.getBytesUnsafe).startsWith(
+                  UTF8String.fromBytes(strToBinary.getBytesUnsafe))
               }
             }
           )
@@ -822,7 +822,7 @@ class ParquetFilters(
               override def canDrop(statistics: Statistics[Binary]): Boolean = false
               override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
               override def keep(value: Binary): Boolean = {
-                value != null && UTF8String.fromBytes(value.getBytes).endsWith(suffixStr)
+                value != null && UTF8String.fromBytes(value.getBytesUnsafe).endsWith(suffixStr)
               }
             }
           )
@@ -837,7 +837,7 @@ class ParquetFilters(
               override def canDrop(statistics: Statistics[Binary]): Boolean = false
               override def inverseCanDrop(statistics: Statistics[Binary]): Boolean = false
               override def keep(value: Binary): Boolean = {
-                value != null && UTF8String.fromBytes(value.getBytes).contains(subStr)
+                value != null && UTF8String.fromBytes(value.getBytesUnsafe).contains(subStr)
               }
             }
           )
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
index 1bdff5234fd64..838eb30c38fb1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
@@ -95,7 +95,7 @@ private[parquet] class ParquetPrimitiveConverter(val updater: ParentContainerUpd
   override def addLong(value: Long): Unit = updater.setLong(value)
   override def addFloat(value: Float): Unit = updater.setFloat(value)
   override def addDouble(value: Double): Unit = updater.setDouble(value)
-  override def addBinary(value: Binary): Unit = updater.set(value.getBytes)
+  override def addBinary(value: Binary): Unit = updater.set(value.getBytesUnsafe)
 }
 
 /**
@@ -562,7 +562,7 @@ private[parquet] class ParquetRowConverter(
 
     override def setDictionary(dictionary: Dictionary): Unit = {
       this.expandedDictionary = Array.tabulate(dictionary.getMaxId + 1) { i =>
-        UTF8String.fromBytes(dictionary.decodeToBinary(i).getBytes)
+        UTF8String.fromBytes(dictionary.decodeToBinary(i).getBytesUnsafe)
       }
     }
 
@@ -622,7 +622,7 @@ private[parquet] class ParquetRowConverter(
         Decimal(unscaled, precision, scale)
       } else {
         // Otherwise, resorts to an unscaled `BigInteger` instead.
-        Decimal(new BigDecimal(new BigInteger(value.getBytes), scale), precision, scale)
+        Decimal(new BigDecimal(new BigInteger(value.getBytesUnsafe), scale), precision, scale)
       }
     }
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
new file mode 100644
index 0000000000000..2b81668b88b87
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkShreddingUtils.scala
@@ -0,0 +1,220 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.types._
+import org.apache.spark.types.variant._
+import org.apache.spark.unsafe.types._
+
+case object SparkShreddingUtils {
+  val VariantValueFieldName = "value";
+  val TypedValueFieldName = "typed_value";
+  val MetadataFieldName = "metadata";
+
+  def buildVariantSchema(schema: DataType): VariantSchema = {
+    schema match {
+      case s: StructType => buildVariantSchema(s, topLevel = true)
+      case _ => throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+    }
+  }
+
+  /**
+   * Given an expected schema of a Variant value, returns a suitable schema for shredding, by
+   * inserting appropriate intermediate value/typed_value fields at each level.
+   * For example, to represent the JSON {"a": 1, "b": "hello"},
+   * the schema struct<a: int, b: string> could be passed into this function, and it would return
+   * the shredding schema:
+   * struct<
+   *  metadata: binary,
+   *  value: binary,
+   *  typed_value: struct<
+   *   a: struct<typed_value: int, value: binary>,
+   *   b: struct<typed_value: string, value: binary>>>
+   *
+   */
+  def variantShreddingSchema(dataType: DataType, isTopLevel: Boolean = true): StructType = {
+    val fields = dataType match {
+      case ArrayType(elementType, containsNull) =>
+        val arrayShreddingSchema =
+          ArrayType(variantShreddingSchema(elementType, false), containsNull)
+        Seq(
+          StructField(VariantValueFieldName, BinaryType, nullable = true),
+          StructField(TypedValueFieldName, arrayShreddingSchema, nullable = true)
+        )
+      case StructType(fields) =>
+        val objectShreddingSchema = StructType(fields.map(f =>
+            f.copy(dataType = variantShreddingSchema(f.dataType, false))))
+        Seq(
+          StructField(VariantValueFieldName, BinaryType, nullable = true),
+          StructField(TypedValueFieldName, objectShreddingSchema, nullable = true)
+        )
+      case VariantType =>
+        // For Variant, we don't need a typed column
+        Seq(
+          StructField(VariantValueFieldName, BinaryType, nullable = true)
+        )
+      case _: NumericType | BooleanType | _: StringType | BinaryType | _: DatetimeType =>
+        Seq(
+          StructField(VariantValueFieldName, BinaryType, nullable = true),
+          StructField(TypedValueFieldName, dataType, nullable = true)
+        )
+      case _ =>
+        // No other types have a corresponding shreddings schema.
+        throw QueryCompilationErrors.invalidVariantShreddingSchema(dataType)
+    }
+
+    if (isTopLevel) {
+      StructType(StructField(MetadataFieldName, BinaryType, nullable = false) +: fields)
+    } else {
+      StructType(fields)
+    }
+  }
+
+  /*
+   * Given a Spark schema that represents a valid shredding schema (e.g. constructed by
+   * SparkShreddingUtils.variantShreddingSchema), return the corresponding VariantSchema.
+   */
+  private def buildVariantSchema(schema: StructType, topLevel: Boolean): VariantSchema = {
+    var typedIdx = -1
+    var variantIdx = -1
+    var topLevelMetadataIdx = -1
+    var scalarSchema: VariantSchema.ScalarType = null
+    var objectSchema: Array[VariantSchema.ObjectField] = null
+    var arraySchema: VariantSchema = null
+
+    schema.fields.zipWithIndex.foreach { case (f, i) =>
+      f.name match {
+        case TypedValueFieldName =>
+          if (typedIdx != -1) {
+            throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+          }
+          typedIdx = i
+          f.dataType match {
+            case StructType(fields) =>
+              objectSchema =
+                  new Array[VariantSchema.ObjectField](fields.length)
+              fields.zipWithIndex.foreach { case (field, fieldIdx) =>
+                field.dataType match {
+                  case s: StructType =>
+                    val fieldSchema = buildVariantSchema(s, topLevel = false)
+                    objectSchema(fieldIdx) = new VariantSchema.ObjectField(field.name, fieldSchema)
+                  case _ => throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+                }
+              }
+            case ArrayType(elementType, _) =>
+              elementType match {
+                case s: StructType => arraySchema = buildVariantSchema(s, topLevel = false)
+                case _ => throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+              }
+            case t => scalarSchema = (t match {
+              case BooleanType => new VariantSchema.BooleanType
+              case ByteType => new VariantSchema.IntegralType(VariantSchema.IntegralSize.BYTE)
+              case ShortType => new VariantSchema.IntegralType(VariantSchema.IntegralSize.SHORT)
+              case IntegerType => new VariantSchema.IntegralType(VariantSchema.IntegralSize.INT)
+              case LongType => new VariantSchema.IntegralType(VariantSchema.IntegralSize.LONG)
+              case FloatType => new VariantSchema.FloatType
+              case DoubleType => new VariantSchema.DoubleType
+              case StringType => new VariantSchema.StringType
+              case BinaryType => new VariantSchema.BinaryType
+              case DateType => new VariantSchema.DateType
+              case TimestampType => new VariantSchema.TimestampType
+              case TimestampNTZType => new VariantSchema.TimestampNTZType
+              case d: DecimalType => new VariantSchema.DecimalType(d.precision, d.scale)
+              case _ => throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+            })
+          }
+        case VariantValueFieldName =>
+          if (variantIdx != -1 || f.dataType != BinaryType) {
+            throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+          }
+          variantIdx = i
+        case MetadataFieldName =>
+          if (topLevelMetadataIdx != -1 || f.dataType != BinaryType) {
+            throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+          }
+          topLevelMetadataIdx = i
+        case _ => throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+      }
+    }
+
+    if (topLevel != (topLevelMetadataIdx >= 0)) {
+      throw QueryCompilationErrors.invalidVariantShreddingSchema(schema)
+    }
+    new VariantSchema(typedIdx, variantIdx, topLevelMetadataIdx, schema.fields.length,
+      scalarSchema, objectSchema, arraySchema)
+  }
+
+  class SparkShreddedResult(schema: VariantSchema) extends VariantShreddingWriter.ShreddedResult {
+    // Result is stored as an InternalRow.
+    val row = new GenericInternalRow(schema.numFields)
+
+    override def addArray(array: Array[VariantShreddingWriter.ShreddedResult]): Unit = {
+      val arrayResult = new GenericArrayData(
+          array.map(_.asInstanceOf[SparkShreddedResult].row))
+      row.update(schema.typedIdx, arrayResult)
+    }
+
+    override def addObject(values: Array[VariantShreddingWriter.ShreddedResult]): Unit = {
+      val innerRow = new GenericInternalRow(schema.objectSchema.size)
+      for (i <- 0 until values.length) {
+        innerRow.update(i, values(i).asInstanceOf[SparkShreddedResult].row)
+      }
+      row.update(schema.typedIdx, innerRow)
+    }
+
+    override def addVariantValue(result: Array[Byte]): Unit = {
+      row.update(schema.variantIdx, result)
+    }
+
+    override def addScalar(result: Any): Unit = {
+      // Convert to native spark value, if necessary.
+      val sparkValue = schema.scalarSchema match {
+        case _: VariantSchema.StringType => UTF8String.fromString(result.asInstanceOf[String])
+        case _: VariantSchema.DecimalType => Decimal(result.asInstanceOf[java.math.BigDecimal])
+        case _ => result
+      }
+      row.update(schema.typedIdx, sparkValue)
+    }
+
+    override def addMetadata(result: Array[Byte]): Unit = {
+      row.update(schema.topLevelMetadataIdx, result)
+    }
+  }
+
+  class SparkShreddedResultBuilder() extends VariantShreddingWriter.ShreddedResultBuilder {
+    override def createEmpty(schema: VariantSchema): VariantShreddingWriter.ShreddedResult = {
+      new SparkShreddedResult(schema)
+    }
+
+    // Consider allowing this to be set via config?
+    override def allowNumericScaleChanges(): Boolean = true
+  }
+
+  /**
+   * Converts an input variant into shredded components. Returns the shredded result.
+   */
+  def castShredded(v: Variant, schema: VariantSchema): InternalRow = {
+    VariantShreddingWriter.castShredded(v, schema, new SparkShreddedResultBuilder())
+        .asInstanceOf[SparkShreddedResult]
+        .row
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
index 5265c24b140d6..fcc3a257cd2dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/rules.scala
@@ -19,12 +19,13 @@ package org.apache.spark.sql.execution.datasources
 
 import java.util.Locale
 
+import scala.collection.mutable.{HashMap, HashSet}
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.sql.{AnalysisException, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.analysis._
 import org.apache.spark.sql.catalyst.catalog._
-import org.apache.spark.sql.catalyst.expressions.{Collate, Collation, Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
+import org.apache.spark.sql.catalyst.expressions.{Expression, InputFileBlockLength, InputFileBlockStart, InputFileName, RowOrdering}
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
@@ -35,7 +36,6 @@ import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.command.ViewHelper.generateViewProperties
 import org.apache.spark.sql.execution.datasources.{CreateTable => CreateTableV1}
 import org.apache.spark.sql.execution.datasources.v2.FileDataSourceV2
-import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.InsertableRelation
 import org.apache.spark.sql.types.{StructField, StructType}
 import org.apache.spark.sql.util.PartitioningUtils.normalizePartitionSpec
@@ -46,6 +46,29 @@ import org.apache.spark.util.ArrayImplicits._
  * Replaces [[UnresolvedRelation]]s if the plan is for direct query on files.
  */
 class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
+  object UnresolvedRelationResolution {
+    def unapply(plan: LogicalPlan): Option[LogicalPlan] = {
+      plan match {
+        case u: UnresolvedRelation if maybeSQLFile(u) =>
+          try {
+            val ds = resolveDataSource(u)
+            Some(LogicalRelation(ds.resolveRelation()))
+          } catch {
+            case _: ClassNotFoundException => None
+            case e: Exception if !e.isInstanceOf[AnalysisException] =>
+              // the provider is valid, but failed to create a logical plan
+              u.failAnalysis(
+                errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
+                messageParameters = Map("dataSourceType" -> u.multipartIdentifier.head),
+                cause = e
+              )
+          }
+        case _ =>
+          None
+      }
+    }
+  }
+
   private def maybeSQLFile(u: UnresolvedRelation): Boolean = {
     conf.runSQLonFile && u.multipartIdentifier.size == 2
   }
@@ -87,21 +110,8 @@ class ResolveSQLOnFile(sparkSession: SparkSession) extends Rule[LogicalPlan] {
       } catch {
         case _: ClassNotFoundException => r
       }
-
-    case u: UnresolvedRelation if maybeSQLFile(u) =>
-      try {
-        val ds = resolveDataSource(u)
-        LogicalRelation(ds.resolveRelation())
-      } catch {
-        case _: ClassNotFoundException => u
-        case e: Exception if !e.isInstanceOf[AnalysisException] =>
-          // the provider is valid, but failed to create a logical plan
-          u.failAnalysis(
-            errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
-            messageParameters = Map("dataSourceType" -> u.multipartIdentifier.head),
-            cause = e
-          )
-      }
+    case UnresolvedRelationResolution(resolvedRelation) =>
+      resolvedRelation
   }
 }
 
@@ -204,6 +214,18 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
           tableName, specifiedBucketString, existingBucketString)
       }
 
+      // Check if the specified clustering columns match the existing table.
+      val specifiedClusterBySpec = tableDesc.clusterBySpec
+      val existingClusterBySpec = existingTable.clusterBySpec
+      if (specifiedClusterBySpec != existingClusterBySpec) {
+        val specifiedClusteringString =
+          specifiedClusterBySpec.map(_.toString).getOrElse("")
+        val existingClusteringString =
+          existingClusterBySpec.map(_.toString).getOrElse("")
+        throw QueryCompilationErrors.mismatchedTableClusteringError(
+          tableName, specifiedClusteringString, existingClusteringString)
+      }
+
       val newQuery = if (adjustedColumns != query.output) {
         Project(adjustedColumns, query)
       } else {
@@ -236,10 +258,14 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
         DDLUtils.checkTableColumns(tableDesc.copy(schema = analyzedQuery.schema))
 
         val output = analyzedQuery.output
+
+        val outputByName = HashMap(output.map(o => o.name -> o): _*)
         val partitionAttrs = normalizedTable.partitionColumnNames.map { partCol =>
-          output.find(_.name == partCol).get
+          outputByName(partCol)
         }
-        val newOutput = output.filterNot(partitionAttrs.contains) ++ partitionAttrs
+        val partitionAttrsSet = HashSet(partitionAttrs: _*)
+        val newOutput = output.filterNot(partitionAttrsSet.contains) ++ partitionAttrs
+
         val reorderedQuery = if (newOutput == output) {
           analyzedQuery
         } else {
@@ -251,12 +277,14 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
         DDLUtils.checkTableColumns(tableDesc)
         val normalizedTable = normalizeCatalogTable(tableDesc.schema, tableDesc)
 
+        val normalizedSchemaByName = HashMap(normalizedTable.schema.map(s => s.name -> s): _*)
         val partitionSchema = normalizedTable.partitionColumnNames.map { partCol =>
-          normalizedTable.schema.find(_.name == partCol).get
+          normalizedSchemaByName(partCol)
         }
-
-        val reorderedSchema =
-          StructType(normalizedTable.schema.filterNot(partitionSchema.contains) ++ partitionSchema)
+        val partitionSchemaSet = HashSet(partitionSchema: _*)
+        val reorderedSchema = StructType(
+          normalizedTable.schema.filterNot(partitionSchemaSet.contains) ++ partitionSchema
+        )
 
         c.copy(tableDesc = normalizedTable.copy(schema = reorderedSchema))
       }
@@ -325,7 +353,12 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
       }
     }
 
-    table.copy(partitionColumnNames = normalizedPartCols, bucketSpec = normalizedBucketSpec)
+    val normalizedProperties = table.properties ++ table.clusterBySpec.map { spec =>
+      ClusterBySpec.toProperty(schema, spec, conf.resolver)
+    }
+
+    table.copy(partitionColumnNames = normalizedPartCols, bucketSpec = normalizedBucketSpec,
+      properties = normalizedProperties)
   }
 
   private def normalizePartitionColumns(schema: StructType, table: CatalogTable): Seq[String] = {
@@ -343,8 +376,9 @@ case class PreprocessTableCreation(catalog: SessionCatalog) extends Rule[Logical
         messageParameters = Map.empty)
     }
 
+    val normalizedPartitionColsSet = HashSet(normalizedPartitionCols: _*)
     schema
-      .filter(f => normalizedPartitionCols.contains(f.name))
+      .filter(f => normalizedPartitionColsSet.contains(f.name))
       .foreach { field =>
         if (!PartitioningUtils.canPartitionOn(field.dataType)) {
           throw QueryCompilationErrors.invalidPartitionColumnDataTypeError(field)
@@ -445,8 +479,8 @@ object PreprocessTableInsertion extends ResolveInsertionBase {
         supportColDefaultValue = true)
     } catch {
       case e: AnalysisException if staticPartCols.nonEmpty &&
-        (e.getErrorClass == "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS" ||
-          e.getErrorClass == "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS") =>
+        (e.getCondition == "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS" ||
+          e.getCondition == "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS") =>
         val newException = e.copy(
           errorClass = Some("INSERT_PARTITION_COLUMN_ARITY_MISMATCH"),
           messageParameters = e.messageParameters ++ Map(
@@ -477,10 +511,10 @@ object PreprocessTableInsertion extends ResolveInsertionBase {
           val metadata = relation.tableMeta
           preprocess(i, metadata.identifier.quotedString, metadata.partitionSchema,
             Some(metadata))
-        case LogicalRelation(h: HadoopFsRelation, _, catalogTable, _) =>
+        case LogicalRelationWithTable(h: HadoopFsRelation, catalogTable) =>
           val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown")
           preprocess(i, tblName, h.partitionSchema, catalogTable)
-        case LogicalRelation(_: InsertableRelation, _, catalogTable, _) =>
+        case LogicalRelationWithTable(_: InsertableRelation, catalogTable) =>
           val tblName = catalogTable.map(_.identifier.quotedString).getOrElse("unknown")
           preprocess(i, tblName, new StructType(), catalogTable)
         case _ => i
@@ -524,7 +558,7 @@ object PreReadCheck extends (LogicalPlan => Unit) {
   private def checkNumInputFileBlockSources(e: Expression, operator: LogicalPlan): Int = {
     operator match {
       case _: HiveTableRelation => 1
-      case _ @ LogicalRelation(_: HadoopFsRelation, _, _, _) => 1
+      case _ @ LogicalRelationWithTable(_: HadoopFsRelation, _) => 1
       case _: LeafNode => 0
       // UNION ALL has multiple children, but these children do not concurrently use InputFileBlock.
       case u: Union =>
@@ -550,11 +584,11 @@ object PreWriteCheck extends (LogicalPlan => Unit) {
 
   def apply(plan: LogicalPlan): Unit = {
     plan.foreach {
-      case InsertIntoStatement(l @ LogicalRelation(relation, _, _, _), partition,
+      case InsertIntoStatement(LogicalRelationWithTable(relation, _), partition,
           _, query, _, _, _) =>
         // Get all input data source relations of the query.
         val srcRelations = query.collect {
-          case LogicalRelation(src, _, _, _) => src
+          case l: LogicalRelation => l.relation
         }
         if (srcRelations.contains(relation)) {
           throw new AnalysisException(
@@ -616,25 +650,6 @@ case class QualifyLocationWithWarehouse(catalog: SessionCatalog) extends Rule[Lo
   }
 }
 
-object CollationCheck extends (LogicalPlan => Unit) {
-  def apply(plan: LogicalPlan): Unit = {
-    plan.foreach {
-      case operator: LogicalPlan =>
-        operator.expressions.foreach(_.foreach(
-          e =>
-            if (isCollationExpression(e) && !SQLConf.get.collationEnabled) {
-              throw QueryCompilationErrors.collationNotEnabledError()
-            }
-          )
-        )
-    }
-  }
-
-  private def isCollationExpression(expression: Expression): Boolean =
-    expression.isInstanceOf[Collation] || expression.isInstanceOf[Collate]
-}
-
-
 /**
  * This rule checks for references to views WITH SCHEMA [TYPE] EVOLUTION and synchronizes the
  * catalog if evolution was detected.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
index 997576a396d20..6a502a44fad58 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/BatchScanExec.scala
@@ -200,7 +200,12 @@ case class BatchScanExec(
                 .get
                 .map(t => (InternalRowComparableWrapper(t._1, partExpressions), t._2))
                 .toMap
-            val nestGroupedPartitions = finalGroupedPartitions.map { case (partValue, splits) =>
+            val filteredGroupedPartitions = finalGroupedPartitions.filter {
+              case (partValues, _) =>
+               commonPartValuesMap.keySet.contains(
+                InternalRowComparableWrapper(partValues, partExpressions))
+            }
+            val nestGroupedPartitions = filteredGroupedPartitions.map { case (partValue, splits) =>
               // `commonPartValuesMap` should contain the part value since it's the super set.
               val numSplits = commonPartValuesMap
                   .get(InternalRowComparableWrapper(partValue, partExpressions))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
index 7a668b75c3c73..499721fbae4e8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Strategy.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.datasources.v2
 import scala.collection.mutable
 
 import org.apache.commons.lang3.StringUtils
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkException
 import org.apache.spark.internal.{Logging, MDC}
@@ -32,7 +33,7 @@ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, DynamicPruning
 import org.apache.spark.sql.catalyst.expressions.Literal.TrueLiteral
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical._
-import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, ResolveDefaultColumns, V2ExpressionBuilder}
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, GeneratedColumn, IdentityColumn, ResolveDefaultColumns, V2ExpressionBuilder}
 import org.apache.spark.sql.connector.catalog.{Identifier, StagingTableCatalog, SupportsDeleteV2, SupportsNamespaces, SupportsPartitionManagement, SupportsWrite, Table, TableCapability, TableCatalog, TruncatableTable}
 import org.apache.spark.sql.connector.catalog.index.SupportsIndex
 import org.apache.spark.sql.connector.expressions.{FieldReference, LiteralValue}
@@ -43,8 +44,9 @@ import org.apache.spark.sql.connector.write.V1Write
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.{FilterExec, InSubqueryExec, LeafExecNode, LocalTableScanExec, ProjectExec, RowDataSourceScanExec, SparkPlan}
 import org.apache.spark.sql.execution.command.CommandUtils
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelation, PushableColumnAndNestedColumn}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, LogicalRelationWithTable, PushableColumnAndNestedColumn}
 import org.apache.spark.sql.execution.streaming.continuous.{WriteToContinuousDataSource, WriteToContinuousDataSourceExec}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.WAREHOUSE_PATH
 import org.apache.spark.sql.sources.{BaseRelation, TableScan}
 import org.apache.spark.storage.StorageLevel
@@ -55,20 +57,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   import DataSourceV2Implicits._
   import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
-  private def withProjectAndFilter(
-      project: Seq[NamedExpression],
-      filters: Seq[Expression],
-      scan: LeafExecNode,
-      needsUnsafeConversion: Boolean): SparkPlan = {
-    val filterCondition = filters.reduceLeftOption(And)
-    val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan)
-
-    if (withFilter.output != project || needsUnsafeConversion) {
-      ProjectExec(project, withFilter)
-    } else {
-      withFilter
-    }
-  }
+  private def hadoopConf = session.sessionState.newHadoopConf()
 
   private def refreshCache(r: DataSourceV2Relation)(): Unit = {
     session.sharedState.cacheManager.recacheByPlan(session, r)
@@ -104,7 +93,19 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
   }
 
   private def qualifyLocInTableSpec(tableSpec: TableSpec): TableSpec = {
-    tableSpec.withNewLocation(tableSpec.location.map(makeQualifiedDBObjectPath(_)))
+    val newLoc = tableSpec.location.map { loc =>
+      val locationUri = CatalogUtils.stringToURI(loc)
+      val qualified = if (locationUri.isAbsolute) {
+        locationUri
+      } else if (new Path(locationUri).isAbsolute) {
+        CatalogUtils.makeQualifiedPath(locationUri, hadoopConf)
+      } else {
+        // Leave it to the catalog implementation to qualify relative paths.
+        locationUri
+      }
+      CatalogUtils.URIToString(qualified)
+    }
+    tableSpec.withNewLocation(newLoc)
   }
 
   override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
@@ -129,13 +130,16 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         pushedDownOperators,
         unsafeRowRDD,
         v1Relation,
+        None,
         tableIdentifier)
-      withProjectAndFilter(project, filters, dsScan, needsUnsafeConversion = false) :: Nil
+      DataSourceV2Strategy.withProjectAndFilter(
+        project, filters, dsScan, needsUnsafeConversion = false) :: Nil
 
     case PhysicalOperation(project, filters,
         DataSourceV2ScanRelation(_, scan: LocalScan, output, _, _)) =>
-      val localScanExec = LocalTableScanExec(output, scan.rows().toImmutableArraySeq)
-      withProjectAndFilter(project, filters, localScanExec, needsUnsafeConversion = false) :: Nil
+      val localScanExec = LocalTableScanExec(output, scan.rows().toImmutableArraySeq, None)
+      DataSourceV2Strategy.withProjectAndFilter(
+        project, filters, localScanExec, needsUnsafeConversion = false) :: Nil
 
     case PhysicalOperation(project, filters, relation: DataSourceV2ScanRelation) =>
       // projection and filters were already pushed down in the optimizer.
@@ -148,7 +152,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       val batchExec = BatchScanExec(relation.output, relation.scan, runtimeFilters,
         relation.ordering, relation.relation.table,
         StoragePartitionJoinParams(relation.keyGroupedPartitioning))
-      withProjectAndFilter(project, postScanFilters, batchExec, !batchExec.supportsColumnar) :: Nil
+      DataSourceV2Strategy.withProjectAndFilter(
+        project, postScanFilters, batchExec, !batchExec.supportsColumnar) :: Nil
 
     case PhysicalOperation(p, f, r: StreamingDataSourceV2ScanRelation)
       if r.startOffset.isDefined && r.endOffset.isDefined =>
@@ -158,7 +163,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         r.output, r.scan, microBatchStream, r.startOffset.get, r.endOffset.get)
 
       // Add a Project here to make sure we produce unsafe rows.
-      withProjectAndFilter(p, f, scanExec, !scanExec.supportsColumnar) :: Nil
+      DataSourceV2Strategy.withProjectAndFilter(p, f, scanExec, !scanExec.supportsColumnar) :: Nil
 
     case PhysicalOperation(p, f, r: StreamingDataSourceV2ScanRelation)
       if r.startOffset.isDefined && r.endOffset.isEmpty =>
@@ -167,7 +172,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       val scanExec = ContinuousScanExec(r.output, r.scan, continuousStream, r.startOffset.get)
 
       // Add a Project here to make sure we produce unsafe rows.
-      withProjectAndFilter(p, f, scanExec, !scanExec.supportsColumnar) :: Nil
+      DataSourceV2Strategy.withProjectAndFilter(p, f, scanExec, !scanExec.supportsColumnar) :: Nil
 
     case WriteToDataSourceV2(relationOpt, writer, query, customMetrics) =>
       val invalidateCacheFunc: () => Unit = () => relationOpt match {
@@ -182,6 +187,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       val statementType = "CREATE TABLE"
       GeneratedColumn.validateGeneratedColumns(
         c.tableSchema, catalog.asTableCatalog, ident, statementType)
+      IdentityColumn.validateIdentityColumn(c.tableSchema, catalog.asTableCatalog, ident)
 
       CreateTableExec(
         catalog.asTableCatalog,
@@ -211,6 +217,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       val statementType = "REPLACE TABLE"
       GeneratedColumn.validateGeneratedColumns(
         c.tableSchema, catalog.asTableCatalog, ident, statementType)
+      IdentityColumn.validateIdentityColumn(c.tableSchema, catalog.asTableCatalog, ident)
 
       val v2Columns = columns.map(_.toV2Column(statementType)).toArray
       catalog match {
@@ -307,8 +314,8 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
             case _ =>
               throw QueryCompilationErrors.tableDoesNotSupportDeletesError(table)
           }
-        case LogicalRelation(_, _, catalogTable, _) =>
-          val tableIdentifier = catalogTable.get.identifier
+        case LogicalRelationWithTable(_, Some(catalogTable)) =>
+          val tableIdentifier = catalogTable.identifier
           throw QueryCompilationErrors.unsupportedTableOperationError(
             tableIdentifier,
             "DELETE")
@@ -357,7 +364,7 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
       DropTableExec(r.catalog.asTableCatalog, r.identifier, ifExists, purge, invalidateFunc) :: Nil
 
     case _: NoopCommand =>
-      LocalTableScanExec(Nil, Nil) :: Nil
+      LocalTableScanExec(Nil, Nil, None) :: Nil
 
     case RenameTable(r @ ResolvedTable(catalog, oldIdent, _, _), newIdent, isView) =>
       if (isView) {
@@ -482,8 +489,18 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         Seq(part).asResolvedPartitionSpecs.head,
         recacheTable(r)) :: Nil
 
-    case ShowColumns(_: ResolvedTable, _, _) =>
-      throw QueryCompilationErrors.showColumnsNotSupportedForV2TablesError()
+    case ShowColumns(resolvedTable: ResolvedTable, ns, output) =>
+      ns match {
+        case Some(namespace) =>
+          val tableNamespace = resolvedTable.identifier.namespace()
+          if (namespace.length != tableNamespace.length ||
+            !namespace.zip(tableNamespace).forall(SQLConf.get.resolver.tupled)) {
+            throw QueryCompilationErrors.showColumnsWithConflictNamespacesError(
+              namespace, tableNamespace.toSeq)
+          }
+        case _ =>
+      }
+      ShowColumnsExec(output, resolvedTable) :: Nil
 
     case r @ ShowPartitions(
         ResolvedTable(catalog, _, table: SupportsPartitionManagement, _),
@@ -545,6 +562,9 @@ class DataSourceV2Strategy(session: SparkSession) extends Strategy with Predicat
         systemScope,
         pattern) :: Nil
 
+    case c: Call =>
+      ExplainOnlySparkPlan(c) :: Nil
+
     case _ => Nil
   }
 }
@@ -654,15 +674,34 @@ private[sql] object DataSourceV2Strategy extends Logging {
       logWarning(log"Can't translate ${MDC(EXPR, other)} to source filter, unsupported expression")
       None
   }
+
+  /**
+   * Creates new spark plan that should apply given filters and projections to given scan node
+   * @param project Projection list that should be output of returned spark plan
+   * @param filters Filter list that should be applied to scan node
+   * @param scan Scan node
+   * @param needsUnsafeConversion Value that indicates whether unsafe conversion is needed
+   * @return SparkPlan tree composed of scan node and eventually filter/project nodes
+   */
+  protected[sql] def withProjectAndFilter(
+      project: Seq[NamedExpression],
+      filters: Seq[Expression],
+      scan: LeafExecNode,
+      needsUnsafeConversion: Boolean): SparkPlan = {
+    val filterCondition = filters.reduceLeftOption(And)
+    val withFilter = filterCondition.map(FilterExec(_, scan)).getOrElse(scan)
+
+    if (withFilter.output != project || needsUnsafeConversion) {
+      ProjectExec(project, withFilter)
+    } else {
+      withFilter
+    }
+  }
 }
 
 /**
  * Get the expression of DS V2 to represent catalyst predicate that can be pushed down.
  */
-object PushablePredicate {
-  def unapply(e: Expression): Option[Predicate] =
-    new V2ExpressionBuilder(e, true).build().map { v =>
-      assert(v.isInstanceOf[Predicate])
-      v.asInstanceOf[Predicate]
-    }
+object PushablePredicate extends Logging {
+  def unapply(e: Expression): Option[Predicate] = new V2ExpressionBuilder(e, true).buildPredicate()
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
index 9ffa0d728ca28..9c19609dce79a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DataSourceV2Utils.scala
@@ -23,16 +23,19 @@ import scala.jdk.CollectionConverters._
 
 import com.fasterxml.jackson.databind.ObjectMapper
 
+import org.apache.spark.SparkContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.TimeTravelSpec
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SessionConfigSupport, SupportsCatalogOptions, SupportsRead, Table, TableProvider}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SessionConfigSupport, StagedTable, StagingTableCatalog, SupportsCatalogOptions, SupportsRead, Table, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability.BATCH_READ
 import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.execution.SQLExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{LongType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -179,4 +182,34 @@ private[sql] object DataSourceV2Utils extends Logging {
       extraOptions + ("paths" -> objectMapper.writeValueAsString(paths.toArray))
     }
   }
+
+  /**
+   * If `table` is a StagedTable, commit the staged changes and report the commit metrics.
+   * Do nothing if the table is not a StagedTable.
+   */
+  def commitStagedChanges(
+      sparkContext: SparkContext, table: Table, metrics: Map[String, SQLMetric]): Unit = {
+    table match {
+      case stagedTable: StagedTable =>
+        stagedTable.commitStagedChanges()
+
+        val driverMetrics = stagedTable.reportDriverMetrics()
+        if (driverMetrics.nonEmpty) {
+          for (taskMetric <- driverMetrics) {
+            metrics.get(taskMetric.name()).foreach(_.set(taskMetric.value()))
+          }
+
+          val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+          SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
+        }
+      case _ =>
+    }
+  }
+
+  def commitMetrics(
+      sparkContext: SparkContext, tableCatalog: StagingTableCatalog): Map[String, SQLMetric] = {
+    tableCatalog.supportedCustomMetrics().map {
+      metric => metric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, metric)
+    }.toMap
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
index 7f7f280d8cdca..7cfd601ef774f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/DescribeTableExec.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.util.{quoteIfNeeded, ResolveDefaultColumns}
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, SupportsMetadataColumns, SupportsRead, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.{ClusterByTransform, IdentityTransform}
 import org.apache.spark.sql.connector.read.SupportsReportStatistics
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
 
@@ -156,9 +157,12 @@ case class DescribeTableExec(
           .map(_.asInstanceOf[IdentityTransform].ref.fieldNames())
           .map { fieldNames =>
             val nestedField = table.schema.findNestedField(fieldNames.toImmutableArraySeq)
-            assert(nestedField.isDefined,
-              s"Not found the partition column ${fieldNames.map(quoteIfNeeded).mkString(".")} " +
-              s"in the table schema ${table.schema().catalogString}.")
+            if (nestedField.isEmpty) {
+              throw QueryExecutionErrors.partitionColumnNotFoundInTheTableSchemaError(
+                fieldNames.toSeq,
+                table.schema()
+              )
+            }
             nestedField.get
           }.map { case (path, field) =>
             toCatalystRow(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExplainOnlySparkPlan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExplainOnlySparkPlan.scala
new file mode 100644
index 0000000000000..bbf56eaa71184
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExplainOnlySparkPlan.scala
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.LeafLike
+import org.apache.spark.sql.execution.SparkPlan
+
+case class ExplainOnlySparkPlan(toExplain: LogicalPlan) extends SparkPlan with LeafLike[SparkPlan] {
+
+  override def output: Seq[Attribute] = Nil
+
+  override def simpleString(maxFields: Int): String = {
+    toExplain.simpleString(maxFields)
+  }
+
+  override protected def doExecute(): RDD[InternalRow] = {
+    throw new UnsupportedOperationException()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
index 168aea5b041f8..4242fc5d8510a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileDataSourceV2.scala
@@ -131,7 +131,7 @@ object FileDataSourceV2 {
       // The error is already FAILED_READ_FILE, throw it directly. To be consistent, schema
       // inference code path throws `FAILED_READ_FILE`, but the file reading code path can reach
       // that code path as well and we should not double-wrap the error.
-      case e: SparkException if e.getErrorClass == "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER" =>
+      case e: SparkException if e.getCondition == "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER" =>
         throw e
       case e: SchemaColumnConvertNotSupportedException =>
         throw QueryExecutionErrors.parquetColumnDataTypeMismatchError(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
index d890107277d6c..5c0f8c0a4afd9 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileScan.scala
@@ -164,7 +164,7 @@ trait FileScan extends Scan
     if (splitFiles.length == 1) {
       val path = splitFiles(0).toPath
       if (!isSplitable(path) && splitFiles(0).length >
-        sparkSession.sparkContext.getConf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
+        sparkSession.sparkContext.conf.get(IO_WARNING_LARGEFILETHRESHOLD)) {
         logWarning(log"Loading one large unsplittable file ${MDC(PATH, path.toString)} with only " +
           log"one partition, the reason is: ${MDC(REASON, getFileUnSplittableReason(path))}")
       }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
index f56f9436d9437..863104da80c2e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileTable.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.streaming.{FileStreamSink, MetadataLogFileIndex}
@@ -146,6 +147,32 @@ abstract class FileTable(
     val entry = options.get(DataSource.GLOB_PATHS_KEY)
     Option(entry).map(_ == "true").getOrElse(true)
   }
+
+  /**
+   * Merge the options of FileTable and the table operation while respecting the
+   * keys of the table operation.
+   *
+   * @param options The options of the table operation.
+   * @return
+   */
+  protected def mergedOptions(options: CaseInsensitiveStringMap): CaseInsensitiveStringMap = {
+    val finalOptions = this.options.asCaseSensitiveMap().asScala ++
+      options.asCaseSensitiveMap().asScala
+    new CaseInsensitiveStringMap(finalOptions.asJava)
+  }
+
+  /**
+   * Merge the options of FileTable and the LogicalWriteInfo while respecting the
+   * keys of the options carried by LogicalWriteInfo.
+   */
+  protected def mergedWriteInfo(writeInfo: LogicalWriteInfo): LogicalWriteInfo = {
+    LogicalWriteInfoImpl(
+      writeInfo.queryId(),
+      writeInfo.schema(),
+      mergedOptions(writeInfo.options()),
+      writeInfo.rowIdSchema(),
+      writeInfo.metadataSchema())
+  }
 }
 
 object FileTable {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
index 52f44e33ea11f..77e1ade44780f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/FileWrite.scala
@@ -44,11 +44,12 @@ trait FileWrite extends Write {
   def paths: Seq[String]
   def formatName: String
   def supportsDataType: DataType => Boolean
+  def allowDuplicatedColumnNames: Boolean = false
   def info: LogicalWriteInfo
 
   private val schema = info.schema()
   private val queryId = info.queryId()
-  private val options = info.options()
+  val options = info.options()
 
   override def description(): String = formatName
 
@@ -91,10 +92,11 @@ trait FileWrite extends Write {
       throw new IllegalArgumentException("Expected exactly one path to be specified, but " +
         s"got: ${paths.mkString(", ")}")
     }
-    val pathName = paths.head
-    SchemaUtils.checkColumnNameDuplication(
-      schema.fields.map(_.name).toImmutableArraySeq, caseSensitiveAnalysis)
-    DataSource.validateSchema(schema, sqlConf)
+    if (!allowDuplicatedColumnNames) {
+      SchemaUtils.checkColumnNameDuplication(
+        schema.fields.map(_.name).toImmutableArraySeq, caseSensitiveAnalysis)
+    }
+    DataSource.validateSchema(formatName, schema, sqlConf)
 
     // TODO: [SPARK-36340] Unify check schema filed of DataSource V2 Insert.
     schema.foreach { field =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
index 07958987fa081..f81ca001fbe29 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/MicroBatchScanExec.scala
@@ -21,7 +21,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, SortOrder}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReaderFactory, Scan}
-import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset}
+import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset, SparkDataStream}
+import org.apache.spark.sql.execution.StreamSourceAwareSparkPlan
 import org.apache.spark.util.ArrayImplicits._
 
 /**
@@ -34,7 +35,9 @@ case class MicroBatchScanExec(
     @transient start: Offset,
     @transient end: Offset,
     keyGroupedPartitioning: Option[Seq[Expression]] = None,
-    ordering: Option[Seq[SortOrder]] = None) extends DataSourceV2ScanExecBase {
+    ordering: Option[Seq[SortOrder]] = None)
+  extends DataSourceV2ScanExecBase
+  with StreamSourceAwareSparkPlan {
 
   // TODO: unify the equal/hashCode implementation for all data source v2 query plans.
   override def equals(other: Any): Boolean = other match {
@@ -55,4 +58,6 @@ case class MicroBatchScanExec(
     postDriverMetrics()
     inputRDD
   }
+
+  override def getStream: Option[SparkDataStream] = Some(stream)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
index 104d8a706efb7..894a3a10d4193 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ReplaceTableExec.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.plans.logical.TableSpec
 import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.util.Utils
 
 case class ReplaceTableExec(
@@ -65,6 +66,9 @@ case class AtomicReplaceTableExec(
 
   val tableProperties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(identifier)) {
       val table = catalog.loadTable(identifier)
@@ -92,7 +96,7 @@ case class AtomicReplaceTableExec(
 
   private def commitOrAbortStagedChanges(staged: StagedTable): Unit = {
     Utils.tryWithSafeFinallyAndFailureCallbacks({
-      staged.commitStagedChanges()
+      DataSourceV2Utils.commitStagedChanges(sparkContext, staged, metrics)
     })(catchBlock = {
       staged.abortStagedChanges()
     })
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowColumnsExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowColumnsExec.scala
new file mode 100644
index 0000000000000..e92607aa87164
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowColumnsExec.scala
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.v2
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.analysis.ResolvedTable
+import org.apache.spark.sql.catalyst.expressions.Attribute
+import org.apache.spark.sql.execution.LeafExecNode
+
+/**
+ * Physical plan node for show columns from table.
+ */
+case class ShowColumnsExec(
+    output: Seq[Attribute],
+    resolvedTable: ResolvedTable) extends V2CommandExec with LeafExecNode {
+  override protected def run(): Seq[InternalRow] = {
+    resolvedTable.table.columns().map(f => toCatalystRow(f.name())).toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
index 102214e36c910..37339a34af3db 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ShowCreateTableExec.scala
@@ -120,7 +120,7 @@ case class ShowCreateTableExec(
   private def showTableLocation(table: Table, builder: StringBuilder): Unit = {
     val isManagedOption = Option(table.properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
     // Only generate LOCATION clause if it's not managed.
-    if (isManagedOption.forall(_.equalsIgnoreCase("false"))) {
+    if (isManagedOption.isEmpty || !isManagedOption.get.equalsIgnoreCase("true")) {
       Option(table.properties.get(TableCatalog.PROP_LOCATION))
         .map("LOCATION '" + escapeSingleQuotedString(_) + "'\n")
         .foreach(builder.append)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
index 6f83b82785955..801151c51206d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V1FallbackWriters.scala
@@ -23,7 +23,8 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.connector.catalog.SupportsWrite
 import org.apache.spark.sql.connector.write.V1Write
-import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
+import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
 import org.apache.spark.sql.sources.InsertableRelation
 
 /**
@@ -58,14 +59,27 @@ case class OverwriteByExpressionExecV1(
 sealed trait V1FallbackWriters extends LeafV2CommandExec with SupportsV1Write {
   override def output: Seq[Attribute] = Nil
 
+  override val metrics: Map[String, SQLMetric] =
+    write.supportedCustomMetrics().map { customMetric =>
+      customMetric.name() -> SQLMetrics.createV2CustomMetric(sparkContext, customMetric)
+    }.toMap
+
   def table: SupportsWrite
   def refreshCache: () => Unit
   def write: V1Write
 
   override def run(): Seq[InternalRow] = {
-    val writtenRows = writeWithV1(write.toInsertableRelation)
+    writeWithV1(write.toInsertableRelation)
     refreshCache()
-    writtenRows
+
+    write.reportDriverMetrics().foreach { customTaskMetric =>
+      metrics.get(customTaskMetric.name()).foreach(_.set(customTaskMetric.value()))
+    }
+
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId, metrics.values.toSeq)
+
+    Nil
   }
 }
 
@@ -75,8 +89,7 @@ sealed trait V1FallbackWriters extends LeafV2CommandExec with SupportsV1Write {
 trait SupportsV1Write extends SparkPlan {
   def plan: LogicalPlan
 
-  protected def writeWithV1(relation: InsertableRelation): Seq[InternalRow] = {
+  protected def writeWithV1(relation: InsertableRelation): Unit = {
     relation.insert(Dataset.ofRows(session, plan), overwrite = false)
-    Nil
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
index 2b6fcd9d547f1..23b2647b62a19 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2ScanRelationPushDown.scala
@@ -420,7 +420,8 @@ object V2ScanRelationPushDown extends Rule[LogicalPlan] with PredicateHelper {
         sHolder.pushedLimit = Some(limit)
       }
       (operation, isPushed && !isPartiallyPushed)
-    case s @ Sort(order, _, operation @ PhysicalOperation(project, Nil, sHolder: ScanBuilderHolder))
+    case s @ Sort(order, _,
+    operation @ PhysicalOperation(project, Nil, sHolder: ScanBuilderHolder), _)
       if CollapseProject.canCollapseExpressions(order, project, alwaysInline = true) =>
       val aliasMap = getAliasMap(project)
       val aliasReplacedOrder = order.map(replaceAlias(_, aliasMap))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
index bc1e2c92faa83..22c13fd98ced1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalog.scala
@@ -39,6 +39,7 @@ import org.apache.spark.sql.internal.connector.V1Function
 import org.apache.spark.sql.types.{DataType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
+import org.apache.spark.util.Utils
 
 /**
  * A [[TableCatalog]] that translates calls to the v1 SessionCatalog.
@@ -92,7 +93,8 @@ class V2SessionCatalog(catalog: SessionCatalog)
       // table here. To avoid breaking it we do not resolve the table provider and still return
       // `V1Table` if the custom session catalog is present.
       if (table.provider.isDefined && !hasCustomSessionCatalog) {
-        val qualifiedTableName = QualifiedTableName(table.database, table.identifier.table)
+        val qualifiedTableName = QualifiedTableName(
+          table.identifier.catalog.get, table.database, table.identifier.table)
         // Check if the table is in the v1 table cache to skip the v2 table lookup.
         if (catalog.getCachedTable(qualifiedTableName) != null) {
           return V1Table(table)
@@ -158,9 +160,9 @@ class V2SessionCatalog(catalog: SessionCatalog)
     catalog.refreshTable(ident.asTableIdentifier)
   }
 
-  override def createTable(
+  private def createTable0(
       ident: Identifier,
-      columns: Array[Column],
+      schema: StructType,
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
@@ -170,13 +172,14 @@ class V2SessionCatalog(catalog: SessionCatalog)
     val storage = DataSource.buildStorageFormatFromOptions(toOptions(tableProperties))
       .copy(locationUri = location.map(CatalogUtils.stringToURI))
     val isExternal = properties.containsKey(TableCatalog.PROP_EXTERNAL)
-    val tableType = if (isExternal || location.isDefined) {
+    val isManagedLocation = Option(properties.get(TableCatalog.PROP_IS_MANAGED_LOCATION))
+      .exists(_.equalsIgnoreCase("true"))
+    val tableType = if (isExternal || (location.isDefined && !isManagedLocation)) {
       CatalogTableType.EXTERNAL
     } else {
       CatalogTableType.MANAGED
     }
 
-    val schema = CatalogV2Util.v2ColumnsToStructType(columns)
     val (newSchema, newPartitions) = DataSourceV2Utils.getTableProvider(provider, conf) match {
       // If the provider does not support external metadata, users should not be allowed to
       // specify custom schema when creating the data source table, since the schema will not
@@ -253,7 +256,18 @@ class V2SessionCatalog(catalog: SessionCatalog)
       schema: StructType,
       partitions: Array[Transform],
       properties: util.Map[String, String]): Table = {
-    throw QueryCompilationErrors.createTableDeprecatedError()
+    if (Utils.isTesting) {
+      throw QueryCompilationErrors.createTableDeprecatedError()
+    }
+    createTable0(ident, schema, partitions, properties)
+  }
+
+  override def createTable(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    createTable0(ident, CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties)
   }
 
   private def toOptions(properties: Map[String, String]): Map[String, String] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
index 319cc1c731577..17b2579ca873a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/V2Writes.scala
@@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.datasources.v2
 
 import java.util.{Optional, UUID}
 
+import scala.jdk.CollectionConverters._
+
 import org.apache.spark.sql.catalyst.expressions.PredicateHelper
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, OverwriteByExpression, OverwritePartitionsDynamic, Project, ReplaceData, WriteDelta}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -44,7 +46,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
 
   override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
     case a @ AppendData(r: DataSourceV2Relation, query, options, _, None, _) =>
-      val writeBuilder = newWriteBuilder(r.table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(r.table, writeOptions, query.schema)
       val write = writeBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       a.copy(write = Some(write), query = newQuery)
@@ -61,7 +64,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       }.toArray
 
       val table = r.table
-      val writeBuilder = newWriteBuilder(table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(table, writeOptions, query.schema)
       val write = writeBuilder match {
         case builder: SupportsTruncate if isTruncate(predicates) =>
           builder.truncate().build()
@@ -76,7 +80,8 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
 
     case o @ OverwritePartitionsDynamic(r: DataSourceV2Relation, query, options, _, None) =>
       val table = r.table
-      val writeBuilder = newWriteBuilder(table, options, query.schema)
+      val writeOptions = mergeOptions(options, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(table, writeOptions, query.schema)
       val write = writeBuilder match {
         case builder: SupportsDynamicOverwrite =>
           builder.overwriteDynamicPartitions().build()
@@ -87,31 +92,44 @@ object V2Writes extends Rule[LogicalPlan] with PredicateHelper {
       o.copy(write = Some(write), query = newQuery)
 
     case WriteToMicroBatchDataSource(
-        relation, table, query, queryId, writeOptions, outputMode, Some(batchId)) =>
-
+        relationOpt, table, query, queryId, options, outputMode, Some(batchId)) =>
+      val writeOptions = mergeOptions(
+        options, relationOpt.map(r => r.options.asScala.toMap).getOrElse(Map.empty))
       val writeBuilder = newWriteBuilder(table, writeOptions, query.schema, queryId)
       val write = buildWriteForMicroBatch(table, writeBuilder, outputMode)
       val microBatchWrite = new MicroBatchWrite(batchId, write.toStreaming)
       val customMetrics = write.supportedCustomMetrics.toImmutableArraySeq
-      val funCatalogOpt = relation.flatMap(_.funCatalog)
+      val funCatalogOpt = relationOpt.flatMap(_.funCatalog)
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, funCatalogOpt)
-      WriteToDataSourceV2(relation, microBatchWrite, newQuery, customMetrics)
+      WriteToDataSourceV2(relationOpt, microBatchWrite, newQuery, customMetrics)
 
     case rd @ ReplaceData(r: DataSourceV2Relation, _, query, _, _, None) =>
       val rowSchema = DataTypeUtils.fromAttributes(rd.dataInput)
-      val writeBuilder = newWriteBuilder(r.table, Map.empty, rowSchema)
+      val writeOptions = mergeOptions(Map.empty, r.options.asScala.toMap)
+      val writeBuilder = newWriteBuilder(r.table, writeOptions, rowSchema)
       val write = writeBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(write, query, r.funCatalog)
       // project away any metadata columns that could be used for distribution and ordering
       rd.copy(write = Some(write), query = Project(rd.dataInput, newQuery))
 
     case wd @ WriteDelta(r: DataSourceV2Relation, _, query, _, projections, None) =>
-      val deltaWriteBuilder = newDeltaWriteBuilder(r.table, Map.empty, projections)
+      val writeOptions = mergeOptions(Map.empty, r.options.asScala.toMap)
+      val deltaWriteBuilder = newDeltaWriteBuilder(r.table, writeOptions, projections)
       val deltaWrite = deltaWriteBuilder.build()
       val newQuery = DistributionAndOrderingUtils.prepareQuery(deltaWrite, query, r.funCatalog)
       wd.copy(write = Some(deltaWrite), query = newQuery)
   }
 
+  private def mergeOptions(
+      commandOptions: Map[String, String],
+      dsOptions: Map[String, String]): Map[String, String] = {
+    // for DataFrame API cases, same options are carried by both Command and DataSourceV2Relation
+    // for DataFrameV2 API cases, options are only carried by Command
+    // for SQL cases, options are only carried by DataSourceV2Relation
+    assert(commandOptions == dsOptions || commandOptions.isEmpty || dsOptions.isEmpty)
+    commandOptions ++ dsOptions
+  }
+
   private def buildWriteForMicroBatch(
       table: SupportsWrite,
       writeBuilder: WriteBuilder,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
index 5632595de7cf8..bdcf7b8260a7a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/WriteToDataSourceV2Exec.scala
@@ -28,12 +28,12 @@ import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, TableSpec, UnaryNode}
 import org.apache.spark.sql.catalyst.util.{removeInternalMetadata, CharVarcharUtils, WriteDeltaProjections}
 import org.apache.spark.sql.catalyst.util.RowDeltaUtils.{DELETE_OPERATION, INSERT_OPERATION, UPDATE_OPERATION}
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, StagedTable, StagingTableCatalog, Table, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.connector.metric.CustomMetric
 import org.apache.spark.sql.connector.write.{BatchWrite, DataWriter, DataWriterFactory, DeltaWrite, DeltaWriter, PhysicalWriteInfoImpl, Write, WriterCommitMessage}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
-import org.apache.spark.sql.execution.{SparkPlan, UnaryExecNode}
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution, UnaryExecNode}
 import org.apache.spark.sql.execution.metric.{CustomMetrics, SQLMetric, SQLMetrics}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{LongAccumulator, Utils}
@@ -84,7 +84,8 @@ case class CreateTableAsSelectExec(
     }
     val table = Option(catalog.createTable(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
-      partitioning.toArray, properties.asJava)).getOrElse(catalog.loadTable(ident))
+      partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
@@ -109,6 +110,9 @@ case class AtomicCreateTableAsSelectExec(
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     if (catalog.tableExists(ident)) {
       if (ifNotExists) {
@@ -116,9 +120,10 @@ case class AtomicCreateTableAsSelectExec(
       }
       throw QueryCompilationErrors.tableAlreadyExistsError(ident)
     }
-    val stagedTable = catalog.stageCreate(
+    val stagedTable = Option(catalog.stageCreate(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
       partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, stagedTable, writeOptions, ident, query)
   }
 }
@@ -164,7 +169,8 @@ case class ReplaceTableAsSelectExec(
     }
     val table = Option(catalog.createTable(
       ident, getV2Columns(query.schema, catalog.useNullableQuerySchema),
-      partitioning.toArray, properties.asJava)).getOrElse(catalog.loadTable(ident))
+      partitioning.toArray, properties.asJava)
+    ).getOrElse(catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
     writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
@@ -194,6 +200,9 @@ case class AtomicReplaceTableAsSelectExec(
 
   val properties = CatalogV2Util.convertTableProperties(tableSpec)
 
+  override val metrics: Map[String, SQLMetric] =
+    DataSourceV2Utils.commitMetrics(sparkContext, catalog)
+
   override protected def run(): Seq[InternalRow] = {
     val columns = getV2Columns(query.schema, catalog.useNullableQuerySchema)
     if (catalog.tableExists(ident)) {
@@ -214,7 +223,9 @@ case class AtomicReplaceTableAsSelectExec(
     } else {
       throw QueryCompilationErrors.cannotReplaceMissingTableError(ident)
     }
-    writeToTable(catalog, staged, writeOptions, ident, query)
+    val table = Option(staged).getOrElse(
+      catalog.loadTable(ident, Set(TableWritePrivilege.INSERT).asJava))
+    writeToTable(catalog, table, writeOptions, ident, query)
   }
 }
 
@@ -336,9 +347,22 @@ trait V2ExistingTableWriteExec extends V2TableWriteExec {
 
   override protected def run(): Seq[InternalRow] = {
     val writtenRows = writeWithV2(write.toBatch)
+    postDriverMetrics()
     refreshCache()
     writtenRows
   }
+
+  protected def postDriverMetrics(): Unit = {
+    val driveSQLMetrics = write.reportDriverMetrics().map(customTaskMetric => {
+      val metric = metrics(customTaskMetric.name())
+      metric.set(customTaskMetric.value())
+      metric
+    })
+
+    val executionId = sparkContext.getLocalProperty(SQLExecution.EXECUTION_ID_KEY)
+    SQLMetrics.postDriverMetricUpdates(sparkContext, executionId,
+      driveSQLMetrics.toImmutableArraySeq)
+  }
 }
 
 /**
@@ -612,10 +636,7 @@ private[v2] trait V2CreateTableAsSelectBaseExec extends LeafV2CommandExec {
       val qe = session.sessionState.executePlan(append)
       qe.assertCommandExecuted()
 
-      table match {
-        case st: StagedTable => st.commitStagedChanges()
-        case _ =>
-      }
+      DataSourceV2Utils.commitStagedChanges(sparkContext, table, metrics)
 
       Nil
     })(catchBlock = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
index 05c6844c7c3b7..48c4d4ff95183 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVScan.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.csv.CSVOptions
 import org.apache.spark.sql.catalyst.expressions.{Expression, ExprUtils}
 import org.apache.spark.sql.connector.read.PartitionReaderFactory
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.PartitioningAwareFileIndex
 import org.apache.spark.sql.execution.datasources.csv.CSVDataSource
 import org.apache.spark.sql.execution.datasources.v2.TextBasedFileScan
@@ -68,6 +69,12 @@ case class CSVScan(
   override def createReaderFactory(): PartitionReaderFactory = {
     // Check a field requirement for corrupt records here to throw an exception in a driver side
     ExprUtils.verifyColumnNameOfCorruptRecord(dataSchema, parsedOptions.columnNameOfCorruptRecord)
+
+    if (readDataSchema.length == 1 &&
+      readDataSchema.head.name == parsedOptions.columnNameOfCorruptRecord) {
+      throw QueryCompilationErrors.queryFromRawFilesIncludeCorruptRecordColumnError()
+    }
+
     // Don't push any filter which refers to the "virtual" column which cannot present in the input.
     // Such filters will be applied later on the upper layer.
     val actualFilters =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
index 8b4fd3af6ded7..df8df37b711fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVTable.scala
@@ -38,7 +38,7 @@ case class CSVTable(
     fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
   override def newScanBuilder(options: CaseInsensitiveStringMap): CSVScanBuilder =
-    CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    CSVScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
     val parsedOptions = new CSVOptions(
@@ -49,10 +49,12 @@ case class CSVTable(
     CSVDataSource(parsedOptions).inferSchema(sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = CSVWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        CSVWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
index 3a1848f544c45..f38a1d385a39c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/csv/CSVWrite.scala
@@ -32,6 +32,9 @@ case class CSVWrite(
     formatName: String,
     supportsDataType: DataType => Boolean,
     info: LogicalWriteInfo) extends FileWrite {
+
+  override def allowDuplicatedColumnNames: Boolean = true
+
   override def prepareWrite(
       sqlConf: SQLConf,
       job: Job,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
index 6828bb0f0c4d8..20283cc124596 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTable.scala
@@ -70,7 +70,8 @@ case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOpt
           "indexName" -> toSQLId(indexName),
           "tableName" -> toSQLId(name)),
         dialect = JdbcDialects.get(jdbcOptions.url),
-        description = s"Failed to create index $indexName in ${name()}") {
+        description = s"Failed to create index $indexName in ${name()}",
+        isRuntime = false) {
         JdbcUtils.createIndex(
           conn, indexName, ident, columns, columnsProperties, properties, jdbcOptions)
       }
@@ -92,7 +93,8 @@ case class JDBCTable(ident: Identifier, schema: StructType, jdbcOptions: JDBCOpt
           "indexName" -> toSQLId(indexName),
           "tableName" -> toSQLId(name)),
         dialect = JdbcDialects.get(jdbcOptions.url),
-        description = s"Failed to drop index $indexName in ${name()}") {
+        description = s"Failed to drop index $indexName in ${name()}",
+        isRuntime = false) {
         JdbcUtils.dropIndex(conn, indexName, ident, jdbcOptions)
       }
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
index e7a3fe0f8aa7b..99e9abe965183 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalog.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.errors.{DataTypeErrorsBase, QueryCompilationErrors,
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcOptionsInWrite, JDBCRDD, JdbcUtils}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.{JdbcDialect, JdbcDialects}
-import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
 class JDBCTableCatalog extends TableCatalog
@@ -74,7 +73,8 @@ class JDBCTableCatalog extends TableCatalog
           "url" -> options.getRedactUrl(),
           "namespace" -> toSQLId(namespace.toSeq)),
         dialect,
-        description = s"Failed get tables from: ${namespace.mkString(".")}") {
+        description = s"Failed get tables from: ${namespace.mkString(".")}",
+        isRuntime = false) {
         conn.getMetaData.getTables(null, schemaPattern, "%", Array("TABLE"))
       }
       new Iterator[Identifier] {
@@ -94,7 +94,8 @@ class JDBCTableCatalog extends TableCatalog
         "url" -> options.getRedactUrl(),
         "tableName" -> toSQLId(ident)),
       dialect,
-      description = s"Failed table existence check: $ident") {
+      description = s"Failed table existence check: $ident",
+      isRuntime = false) {
       JdbcUtils.withConnection(options)(JdbcUtils.tableExists(_, writeOptions))
     }
   }
@@ -121,7 +122,8 @@ class JDBCTableCatalog extends TableCatalog
           "oldName" -> toSQLId(oldIdent),
           "newName" -> toSQLId(newIdent)),
         dialect,
-        description = s"Failed table renaming from $oldIdent to $newIdent") {
+        description = s"Failed table renaming from $oldIdent to $newIdent",
+        isRuntime = false) {
         JdbcUtils.renameTable(conn, oldIdent, newIdent, options)
       }
     }
@@ -137,21 +139,14 @@ class JDBCTableCatalog extends TableCatalog
         "url" -> options.getRedactUrl(),
         "tableName" -> toSQLId(ident)),
       dialect,
-      description = s"Failed to load table: $ident"
+      description = s"Failed to load table: $ident",
+      isRuntime = false
     ) {
       val schema = JDBCRDD.resolveTable(optionsWithTableName)
       JDBCTable(ident, schema, optionsWithTableName)
     }
   }
 
-  override def createTable(
-      ident: Identifier,
-      schema: StructType,
-      partitions: Array[Transform],
-      properties: java.util.Map[String, String]): Table = {
-    throw QueryCompilationErrors.createTableDeprecatedError()
-  }
-
   override def createTable(
       ident: Identifier,
       columns: Array[Column],
@@ -201,7 +196,8 @@ class JDBCTableCatalog extends TableCatalog
           "url" -> options.getRedactUrl(),
           "tableName" -> toSQLId(ident)),
         dialect,
-        description = s"Failed table creation: $ident") {
+        description = s"Failed table creation: $ident",
+        isRuntime = false) {
         JdbcUtils.createTable(conn, getTableName(ident), schema, caseSensitive, writeOptions)
       }
     }
@@ -218,7 +214,8 @@ class JDBCTableCatalog extends TableCatalog
           "url" -> options.getRedactUrl(),
           "tableName" -> toSQLId(ident)),
         dialect,
-        description = s"Failed table altering: $ident") {
+        description = s"Failed table altering: $ident",
+        isRuntime = false) {
         JdbcUtils.alterTable(conn, getTableName(ident), changes, options)
       }
       loadTable(ident)
@@ -234,7 +231,8 @@ class JDBCTableCatalog extends TableCatalog
             "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(namespace.toSeq)),
           dialect,
-          description = s"Failed namespace exists: ${namespace.mkString}") {
+          description = s"Failed namespace exists: ${namespace.mkString}",
+          isRuntime = false) {
           JdbcUtils.schemaExists(conn, options, db)
         }
       }
@@ -247,7 +245,8 @@ class JDBCTableCatalog extends TableCatalog
         errorClass = "FAILED_JDBC.LIST_NAMESPACES",
         messageParameters = Map("url" -> options.getRedactUrl()),
         dialect,
-        description = s"Failed list namespaces") {
+        description = s"Failed list namespaces",
+        isRuntime = false) {
         JdbcUtils.listSchemas(conn, options)
       }
     }
@@ -301,7 +300,8 @@ class JDBCTableCatalog extends TableCatalog
             "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(db)),
           dialect,
-          description = s"Failed create name space: $db") {
+          description = s"Failed create name space: $db",
+          isRuntime = false) {
           JdbcUtils.createSchema(conn, options, db, comment)
         }
       }
@@ -326,7 +326,8 @@ class JDBCTableCatalog extends TableCatalog
                     "url" -> options.getRedactUrl(),
                     "namespace" -> toSQLId(db)),
                   dialect,
-                  description = s"Failed create comment on name space: $db") {
+                  description = s"Failed create comment on name space: $db",
+                  isRuntime = false) {
                   JdbcUtils.alterSchemaComment(conn, options, db, set.value)
                 }
               }
@@ -343,7 +344,8 @@ class JDBCTableCatalog extends TableCatalog
                     "url" -> options.getRedactUrl(),
                     "namespace" -> toSQLId(db)),
                   dialect,
-                  description = s"Failed remove comment on name space: $db") {
+                  description = s"Failed remove comment on name space: $db",
+                  isRuntime = false) {
                   JdbcUtils.removeSchemaComment(conn, options, db)
                 }
               }
@@ -371,7 +373,8 @@ class JDBCTableCatalog extends TableCatalog
             "url" -> options.getRedactUrl(),
             "namespace" -> toSQLId(db)),
           dialect,
-          description = s"Failed drop name space: $db") {
+          description = s"Failed drop name space: $db",
+          isRuntime = false) {
           JdbcUtils.dropSchema(conn, options, db, cascade)
           true
         }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
index c567e87e7d767..1c1d3393b95a4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/json/JsonTable.scala
@@ -38,7 +38,7 @@ case class JsonTable(
     fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
   override def newScanBuilder(options: CaseInsensitiveStringMap): JsonScanBuilder =
-    new JsonScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    JsonScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] = {
     val parsedOptions = new JSONOptionsInRead(
@@ -49,10 +49,12 @@ case class JsonTable(
       sparkSession, files, parsedOptions)
   }
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = JsonWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        JsonWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
index ca4b83b3c58f1..81c347ae9c59c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/orc/OrcTable.scala
@@ -38,15 +38,17 @@ case class OrcTable(
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): OrcScanBuilder =
-    new OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    OrcScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     OrcUtils.inferSchema(sparkSession, files, options.asScala.toMap)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = OrcWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        OrcWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
index 5774df95ac070..d3643f7426db0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetPartitionReaderFactory.scala
@@ -42,7 +42,7 @@ import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.Filter
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.vectorized.ColumnarBatch
-import org.apache.spark.util.SerializableConfiguration
+import org.apache.spark.util.{SerializableConfiguration, Utils}
 
 /**
  * A factory used to create Parquet readers.
@@ -261,19 +261,22 @@ case class ParquetPartitionReaderFactory(
     val int96RebaseSpec = DataSourceUtils.int96RebaseSpec(
       footerFileMetaData.getKeyValueMetaData.get,
       int96RebaseModeInRead)
-    val reader = buildReaderFunc(
-      file.partitionValues,
-      pushed,
-      convertTz,
-      datetimeRebaseSpec,
-      int96RebaseSpec)
-    reader match {
-      case vectorizedReader: VectorizedParquetRecordReader =>
-        vectorizedReader.initialize(split, hadoopAttemptContext, Option.apply(fileFooter))
-      case _ =>
-        reader.initialize(split, hadoopAttemptContext)
+    Utils.tryInitializeResource(
+      buildReaderFunc(
+        file.partitionValues,
+        pushed,
+        convertTz,
+        datetimeRebaseSpec,
+        int96RebaseSpec)
+    ) { reader =>
+      reader match {
+        case vectorizedReader: VectorizedParquetRecordReader =>
+          vectorizedReader.initialize(split, hadoopAttemptContext, Option.apply(fileFooter))
+        case _ =>
+          reader.initialize(split, hadoopAttemptContext)
+      }
+      reader
     }
-    reader
   }
 
   private def createRowBaseReader(file: PartitionedFile): RecordReader[Void, InternalRow] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
index e593ad7d0c0cd..28c5a62f91ecb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/parquet/ParquetTable.scala
@@ -38,15 +38,17 @@ case class ParquetTable(
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ParquetScanBuilder =
-    new ParquetScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    ParquetScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     ParquetUtils.inferSchema(sparkSession, options.asScala.toMap, files)
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = ParquetWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        ParquetWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType match {
     case _: AtomicType => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
index 4e7f2f1c41e25..2a9abfa5d6a50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSource.scala
@@ -24,26 +24,29 @@ import scala.util.control.NonFatal
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 
-import org.apache.spark.sql.{RuntimeConfig, SparkSession}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.DataSourceOptions
 import org.apache.spark.sql.connector.catalog.{Table, TableProvider}
 import org.apache.spark.sql.connector.expressions.Transform
-import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.JoinSideValues
+import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.{JoinSideValues, READ_REGISTERED_TIMERS, STATE_VAR_NAME}
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.JoinSideValues.JoinSideValues
-import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataPartitionReader
-import org.apache.spark.sql.execution.streaming.{CommitLog, OffsetSeqLog, OffsetSeqMetadata}
+import org.apache.spark.sql.execution.datasources.v2.state.metadata.{StateMetadataPartitionReader, StateMetadataTableEntry}
+import org.apache.spark.sql.execution.datasources.v2.state.utils.SchemaUtil
+import org.apache.spark.sql.execution.streaming.{CommitLog, OffsetSeqLog, OffsetSeqMetadata, TimerStateUtils, TransformWithStateOperatorProperties, TransformWithStateVariableInfo}
 import org.apache.spark.sql.execution.streaming.StreamingCheckpointConstants.{DIR_NAME_COMMITS, DIR_NAME_OFFSETS, DIR_NAME_STATE}
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide, RightSide}
-import org.apache.spark.sql.execution.streaming.state.{StateSchemaCompatibilityChecker, StateStore, StateStoreConf, StateStoreId, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state.{KeyStateEncoderSpec, NoPrefixKeyStateEncoderSpec, PrefixKeyScanStateEncoderSpec, StateSchemaCompatibilityChecker, StateStore, StateStoreColFamilySchema, StateStoreConf, StateStoreId, StateStoreProviderId}
 import org.apache.spark.sql.sources.DataSourceRegister
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+import org.apache.spark.sql.streaming.TimeMode
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.SerializableConfiguration
 
 /**
  * An implementation of [[TableProvider]] with [[DataSourceRegister]] for State Store data source.
  */
-class StateDataSource extends TableProvider with DataSourceRegister {
+class StateDataSource extends TableProvider with DataSourceRegister with Logging {
   private lazy val session: SparkSession = SparkSession.active
 
   private lazy val hadoopConf: Configuration = session.sessionState.newHadoopConf()
@@ -58,23 +61,26 @@ class StateDataSource extends TableProvider with DataSourceRegister {
       properties: util.Map[String, String]): Table = {
     val sourceOptions = StateSourceOptions.apply(session, hadoopConf, properties)
     val stateConf = buildStateStoreConf(sourceOptions.resolvedCpLocation, sourceOptions.batchId)
-    // Read the operator metadata once to see if we can find the information for prefix scan
-    // encoder used in session window aggregation queries.
-    val allStateStoreMetadata = new StateMetadataPartitionReader(
-      sourceOptions.stateCheckpointLocation.getParent.toString, serializedHadoopConf)
-      .stateMetadata.toArray
-    val stateStoreMetadata = allStateStoreMetadata.filter { entry =>
-      entry.operatorId == sourceOptions.operatorId &&
-        entry.stateStoreName == sourceOptions.storeName
+    val stateStoreReaderInfo: StateStoreReaderInfo = getStoreMetadataAndRunChecks(sourceOptions)
+
+    // The key state encoder spec should be available for all operators except stream-stream joins
+    val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
+      stateStoreReaderInfo.keyStateEncoderSpecOpt.get
+    } else {
+      val keySchema = SchemaUtil.getSchemaAsDataType(schema, "key").asInstanceOf[StructType]
+      NoPrefixKeyStateEncoderSpec(keySchema)
     }
 
-    new StateTable(session, schema, sourceOptions, stateConf, stateStoreMetadata)
+    new StateTable(session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
+      stateStoreReaderInfo.transformWithStateVariableInfoOpt,
+      stateStoreReaderInfo.stateStoreColFamilySchemaOpt)
   }
 
   override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
-    val partitionId = StateStore.PARTITION_ID_TO_CHECK_SCHEMA
     val sourceOptions = StateSourceOptions.apply(session, hadoopConf, options)
 
+    val stateStoreReaderInfo: StateStoreReaderInfo = getStoreMetadataAndRunChecks(sourceOptions)
+
     val stateCheckpointLocation = sourceOptions.stateCheckpointLocation
     try {
       val (keySchema, valueSchema) = sourceOptions.joinSide match {
@@ -87,34 +93,24 @@ class StateDataSource extends TableProvider with DataSourceRegister {
             sourceOptions.operatorId, RightSide)
 
         case JoinSideValues.none =>
-          val storeId = new StateStoreId(stateCheckpointLocation.toString, sourceOptions.operatorId,
-            partitionId, sourceOptions.storeName)
-          val providerId = new StateStoreProviderId(storeId, UUID.randomUUID())
-          val manager = new StateSchemaCompatibilityChecker(providerId, hadoopConf)
-          val stateSchema = manager.readSchemaFile().head
-          (stateSchema.keySchema, stateSchema.valueSchema)
-      }
-
-      if (sourceOptions.readChangeFeed) {
-        new StructType()
-          .add("batch_id", LongType)
-          .add("change_type", StringType)
-          .add("key", keySchema)
-          .add("value", valueSchema)
-          .add("partition_id", IntegerType)
-      } else {
-        new StructType()
-          .add("key", keySchema)
-          .add("value", valueSchema)
-          .add("partition_id", IntegerType)
+          // we should have the schema for the state store if joinSide is none
+          require(stateStoreReaderInfo.stateStoreColFamilySchemaOpt.isDefined)
+          val resultSchema = stateStoreReaderInfo.stateStoreColFamilySchemaOpt.get
+          (resultSchema.keySchema, resultSchema.valueSchema)
       }
 
+      SchemaUtil.getSourceSchema(sourceOptions, keySchema,
+        valueSchema,
+        stateStoreReaderInfo.transformWithStateVariableInfoOpt,
+        stateStoreReaderInfo.stateStoreColFamilySchemaOpt)
     } catch {
       case NonFatal(e) =>
         throw StateDataSourceErrors.failedToReadStateSchema(sourceOptions, e)
     }
   }
 
+  override def supportsExternalMetadata(): Boolean = false
+
   private def buildStateStoreConf(checkpointLocation: String, batchId: Long): StateStoreConf = {
     val offsetLog = new OffsetSeqLog(session,
       new Path(checkpointLocation, DIR_NAME_OFFSETS).toString)
@@ -124,16 +120,183 @@ class StateDataSource extends TableProvider with DataSourceRegister {
           throw StateDataSourceErrors.offsetMetadataLogUnavailable(batchId, checkpointLocation)
         )
 
-        val clonedRuntimeConf = new RuntimeConfig(session.sessionState.conf.clone())
-        OffsetSeqMetadata.setSessionConf(metadata, clonedRuntimeConf)
-        StateStoreConf(clonedRuntimeConf.sqlConf)
+        val clonedSqlConf = session.sessionState.conf.clone()
+        OffsetSeqMetadata.setSessionConf(metadata, clonedSqlConf)
+        StateStoreConf(clonedSqlConf)
 
       case _ =>
         throw StateDataSourceErrors.offsetLogUnavailable(batchId, checkpointLocation)
     }
   }
 
-  override def supportsExternalMetadata(): Boolean = false
+  private def runStateVarChecks(
+      sourceOptions: StateSourceOptions,
+      stateStoreMetadata: Array[StateMetadataTableEntry]): Unit = {
+    val twsShortName = "transformWithStateExec"
+    if (sourceOptions.stateVarName.isDefined || sourceOptions.readRegisteredTimers) {
+      // Perform checks for transformWithState operator in case state variable name is provided
+      require(stateStoreMetadata.size == 1)
+      val opMetadata = stateStoreMetadata.head
+      if (opMetadata.operatorName != twsShortName) {
+        // if we are trying to query state source with state variable name, then the operator
+        // should be transformWithState
+        val errorMsg = "Providing state variable names is only supported with the " +
+          s"transformWithState operator. Found operator=${opMetadata.operatorName}. " +
+          s"Please remove this option and re-run the query."
+        throw StateDataSourceErrors.invalidOptionValue(STATE_VAR_NAME, errorMsg)
+      }
+
+      // if the operator is transformWithState, but the operator properties are empty, then
+      // the user has not defined any state variables for the operator
+      val operatorProperties = opMetadata.operatorPropertiesJson
+      if (operatorProperties.isEmpty) {
+        throw StateDataSourceErrors.invalidOptionValue(STATE_VAR_NAME,
+          "No state variable names are defined for the transformWithState operator")
+      }
+
+      val twsOperatorProperties = TransformWithStateOperatorProperties.fromJson(operatorProperties)
+      val timeMode = twsOperatorProperties.timeMode
+      if (sourceOptions.readRegisteredTimers && timeMode == TimeMode.None().toString) {
+        throw StateDataSourceErrors.invalidOptionValue(READ_REGISTERED_TIMERS,
+          "Registered timers are not available in TimeMode=None.")
+      }
+
+      // if the state variable is not one of the defined/available state variables, then we
+      // fail the query
+      val stateVarName = if (sourceOptions.readRegisteredTimers) {
+        TimerStateUtils.getTimerStateVarName(timeMode)
+      } else {
+        sourceOptions.stateVarName.get
+      }
+
+      val stateVars = twsOperatorProperties.stateVariables
+      val stateVarInfo = stateVars.filter(stateVar => stateVar.stateName == stateVarName)
+      if (stateVarInfo.size != 1) {
+        throw StateDataSourceErrors.invalidOptionValue(STATE_VAR_NAME,
+          s"State variable $stateVarName is not defined for the transformWithState operator.")
+      }
+    } else {
+      // if the operator is transformWithState, then a state variable argument is mandatory
+      if (stateStoreMetadata.size == 1 &&
+        stateStoreMetadata.head.operatorName == twsShortName) {
+        throw StateDataSourceErrors.requiredOptionUnspecified("stateVarName")
+      }
+    }
+  }
+
+  private def getStateStoreMetadata(stateSourceOptions: StateSourceOptions):
+    Array[StateMetadataTableEntry] = {
+    val allStateStoreMetadata = new StateMetadataPartitionReader(
+      stateSourceOptions.stateCheckpointLocation.getParent.toString,
+      serializedHadoopConf, stateSourceOptions.batchId).stateMetadata.toArray
+    val stateStoreMetadata = allStateStoreMetadata.filter { entry =>
+      entry.operatorId == stateSourceOptions.operatorId &&
+        entry.stateStoreName == stateSourceOptions.storeName
+    }
+    stateStoreMetadata
+  }
+
+  private def getStoreMetadataAndRunChecks(sourceOptions: StateSourceOptions):
+    StateStoreReaderInfo = {
+    val storeMetadata = getStateStoreMetadata(sourceOptions)
+    runStateVarChecks(sourceOptions, storeMetadata)
+    var keyStateEncoderSpecOpt: Option[KeyStateEncoderSpec] = None
+    var stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema] = None
+    var transformWithStateVariableInfoOpt: Option[TransformWithStateVariableInfo] = None
+    var timeMode: String = TimeMode.None.toString
+
+    if (sourceOptions.joinSide == JoinSideValues.none) {
+      var stateVarName = sourceOptions.stateVarName
+        .getOrElse(StateStore.DEFAULT_COL_FAMILY_NAME)
+
+      // Read the schema file path from operator metadata version v2 onwards
+      // for the transformWithState operator
+      val oldSchemaFilePath = if (storeMetadata.length > 0 && storeMetadata.head.version == 2
+        && storeMetadata.head.operatorName.contains("transformWithStateExec")) {
+        val storeMetadataEntry = storeMetadata.head
+        val operatorProperties = TransformWithStateOperatorProperties.fromJson(
+          storeMetadataEntry.operatorPropertiesJson)
+        timeMode = operatorProperties.timeMode
+
+        if (sourceOptions.readRegisteredTimers) {
+          stateVarName = TimerStateUtils.getTimerStateVarName(timeMode)
+        }
+
+        val stateVarInfoList = operatorProperties.stateVariables
+          .filter(stateVar => stateVar.stateName == stateVarName)
+        require(stateVarInfoList.size == 1, s"Failed to find unique state variable info " +
+          s"for state variable $stateVarName in operator ${sourceOptions.operatorId}")
+        val stateVarInfo = stateVarInfoList.head
+        transformWithStateVariableInfoOpt = Some(stateVarInfo)
+        val schemaFilePath = new Path(storeMetadataEntry.stateSchemaFilePath.get)
+        Some(schemaFilePath)
+      } else {
+        None
+      }
+
+      try {
+        // Read the actual state schema from the provided path for v2 or from the dedicated path
+        // for v1
+        val partitionId = StateStore.PARTITION_ID_TO_CHECK_SCHEMA
+        val stateCheckpointLocation = sourceOptions.stateCheckpointLocation
+        val storeId = new StateStoreId(stateCheckpointLocation.toString, sourceOptions.operatorId,
+          partitionId, sourceOptions.storeName)
+        val providerId = new StateStoreProviderId(storeId, UUID.randomUUID())
+        val manager = new StateSchemaCompatibilityChecker(providerId, hadoopConf,
+          oldSchemaFilePath = oldSchemaFilePath)
+        val stateSchema = manager.readSchemaFile()
+
+        // Based on the version and read schema, populate the keyStateEncoderSpec used for
+        // reading the column families
+        val resultSchema = stateSchema.filter(_.colFamilyName == stateVarName).head
+        keyStateEncoderSpecOpt = Some(getKeyStateEncoderSpec(resultSchema, storeMetadata))
+        stateStoreColFamilySchemaOpt = Some(resultSchema)
+      } catch {
+        case NonFatal(ex) =>
+          throw StateDataSourceErrors.failedToReadStateSchema(sourceOptions, ex)
+      }
+    }
+
+    StateStoreReaderInfo(
+      keyStateEncoderSpecOpt,
+      stateStoreColFamilySchemaOpt,
+      transformWithStateVariableInfoOpt
+    )
+  }
+
+  private def getKeyStateEncoderSpec(
+      colFamilySchema: StateStoreColFamilySchema,
+      storeMetadata: Array[StateMetadataTableEntry]): KeyStateEncoderSpec = {
+    // If operator metadata is not found, then log a warning and continue with using the no-prefix
+    // key state encoder
+    val keyStateEncoderSpec = if (storeMetadata.length == 0) {
+      logWarning("Metadata for state store not found, possible cause is this checkpoint " +
+        "is created by older version of spark. If the query has session window aggregation, " +
+        "the state can't be read correctly and runtime exception will be thrown. " +
+        "Run the streaming query in newer spark version to generate state metadata " +
+        "can fix the issue.")
+      NoPrefixKeyStateEncoderSpec(colFamilySchema.keySchema)
+    } else {
+      require(storeMetadata.length == 1)
+      val storeMetadataEntry = storeMetadata.head
+      // if version has metadata info, then use numColsPrefixKey as specified
+      if (storeMetadataEntry.version == 1 && storeMetadataEntry.numColsPrefixKey == 0) {
+        NoPrefixKeyStateEncoderSpec(colFamilySchema.keySchema)
+      } else if (storeMetadataEntry.version == 1 && storeMetadataEntry.numColsPrefixKey > 0) {
+        PrefixKeyScanStateEncoderSpec(colFamilySchema.keySchema,
+          storeMetadataEntry.numColsPrefixKey)
+      } else if (storeMetadataEntry.version == 2) {
+        // for version 2, we have the encoder spec recorded to the state schema file. so we just
+        // use that directly
+        require(colFamilySchema.keyStateEncoderSpec.isDefined)
+        colFamilySchema.keyStateEncoderSpec.get
+      } else {
+        throw StateDataSourceErrors.internalError(s"Failed to read " +
+          s"key state encoder spec for operator=${storeMetadataEntry.operatorId}")
+      }
+    }
+    keyStateEncoderSpec
+  }
 }
 
 case class FromSnapshotOptions(
@@ -153,12 +316,17 @@ case class StateSourceOptions(
     joinSide: JoinSideValues,
     readChangeFeed: Boolean,
     fromSnapshotOptions: Option[FromSnapshotOptions],
-    readChangeFeedOptions: Option[ReadChangeFeedOptions]) {
+    readChangeFeedOptions: Option[ReadChangeFeedOptions],
+    stateVarName: Option[String],
+    readRegisteredTimers: Boolean,
+    flattenCollectionTypes: Boolean) {
   def stateCheckpointLocation: Path = new Path(resolvedCpLocation, DIR_NAME_STATE)
 
   override def toString: String = {
     var desc = s"StateSourceOptions(checkpointLocation=$resolvedCpLocation, batchId=$batchId, " +
-      s"operatorId=$operatorId, storeName=$storeName, joinSide=$joinSide"
+      s"operatorId=$operatorId, storeName=$storeName, joinSide=$joinSide, " +
+      s"stateVarName=${stateVarName.getOrElse("None")}, +" +
+      s"flattenCollectionTypes=$flattenCollectionTypes"
     if (fromSnapshotOptions.isDefined) {
       desc += s", snapshotStartBatchId=${fromSnapshotOptions.get.snapshotStartBatchId}"
       desc += s", snapshotPartitionId=${fromSnapshotOptions.get.snapshotPartitionId}"
@@ -182,6 +350,9 @@ object StateSourceOptions extends DataSourceOptions {
   val READ_CHANGE_FEED = newOption("readChangeFeed")
   val CHANGE_START_BATCH_ID = newOption("changeStartBatchId")
   val CHANGE_END_BATCH_ID = newOption("changeEndBatchId")
+  val STATE_VAR_NAME = newOption("stateVarName")
+  val READ_REGISTERED_TIMERS = newOption("readRegisteredTimers")
+  val FLATTEN_COLLECTION_TYPES = newOption("flattenCollectionTypes")
 
   object JoinSideValues extends Enumeration {
     type JoinSideValues = Value
@@ -218,6 +389,32 @@ object StateSourceOptions extends DataSourceOptions {
       throw StateDataSourceErrors.invalidOptionValueIsEmpty(STORE_NAME)
     }
 
+    // Check if the state variable name is provided. Used with the transformWithState operator.
+    val stateVarName = Option(options.get(STATE_VAR_NAME))
+      .map(_.trim)
+
+    val readRegisteredTimers = try {
+      Option(options.get(READ_REGISTERED_TIMERS))
+        .map(_.toBoolean).getOrElse(false)
+    } catch {
+      case _: IllegalArgumentException =>
+        throw StateDataSourceErrors.invalidOptionValue(READ_REGISTERED_TIMERS,
+          "Boolean value is expected")
+    }
+
+    if (readRegisteredTimers && stateVarName.isDefined) {
+      throw StateDataSourceErrors.conflictOptions(Seq(READ_REGISTERED_TIMERS, STATE_VAR_NAME))
+    }
+
+    val flattenCollectionTypes = try {
+      Option(options.get(FLATTEN_COLLECTION_TYPES))
+        .map(_.toBoolean).getOrElse(true)
+    } catch {
+      case _: IllegalArgumentException =>
+        throw StateDataSourceErrors.invalidOptionValue(FLATTEN_COLLECTION_TYPES,
+          "Boolean value is expected")
+    }
+
     val joinSide = try {
       Option(options.get(JOIN_SIDE))
         .map(JoinSideValues.withName).getOrElse(JoinSideValues.none)
@@ -321,7 +518,8 @@ object StateSourceOptions extends DataSourceOptions {
 
     StateSourceOptions(
       resolvedCpLocation, batchId.get, operatorId, storeName, joinSide,
-      readChangeFeed, fromSnapshotOptions, readChangeFeedOptions)
+      readChangeFeed, fromSnapshotOptions, readChangeFeedOptions,
+      stateVarName, readRegisteredTimers, flattenCollectionTypes)
   }
 
   private def resolvedCheckpointLocation(
@@ -341,3 +539,11 @@ object StateSourceOptions extends DataSourceOptions {
     }
   }
 }
+
+// Case class to store information around the key state encoder, col family schema and
+// operator specific state used primarily for the transformWithState operator.
+case class StateStoreReaderInfo(
+    keyStateEncoderSpecOpt: Option[KeyStateEncoderSpec],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema],
+    transformWithStateVariableInfoOpt: Option[TransformWithStateVariableInfo]
+)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceErrors.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceErrors.scala
index fe81d65c9260e..b6883a98f3edd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceErrors.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceErrors.scala
@@ -63,6 +63,12 @@ object StateDataSourceErrors {
     new StateDataSourceReadStateSchemaFailure(sourceOptions, cause)
   }
 
+  def failedToReadOperatorMetadata(
+      checkpointLocation: String,
+      batchId: Long): StateDataSourceException = {
+    new StateDataSourceReadOperatorMetadataFailure(checkpointLocation, batchId)
+  }
+
   def conflictOptions(options: Seq[String]): StateDataSourceException = {
     new StateDataSourceConflictOptions(options)
   }
@@ -158,3 +164,11 @@ class StateDataSourceNoPartitionDiscoveredInStateStore(sourceOptions: StateSourc
     "STDS_NO_PARTITION_DISCOVERED_IN_STATE_STORE",
     Map("sourceOptions" -> sourceOptions.toString),
     cause = null)
+
+class StateDataSourceReadOperatorMetadataFailure(
+    checkpointLocation: String,
+    batchId: Long)
+  extends StateDataSourceException(
+    "STDS_FAILED_TO_READ_OPERATOR_METADATA",
+    Map("checkpointLocation" -> checkpointLocation, "batchId" -> batchId.toString),
+    cause = null)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
index 6201cf1157ab3..9e993fbedc304 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StatePartitionReader.scala
@@ -20,11 +20,11 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeRow}
 import org.apache.spark.sql.connector.read.{InputPartition, PartitionReader, PartitionReaderFactory}
-import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataTableEntry
 import org.apache.spark.sql.execution.datasources.v2.state.utils.SchemaUtil
+import org.apache.spark.sql.execution.streaming.{StateVariableType, TransformWithStateVariableInfo}
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.execution.streaming.state.RecordType.{getRecordTypeAsString, RecordType}
-import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.types.{NullType, StructField, StructType}
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.{NextIterator, SerializableConfiguration}
 
@@ -36,16 +36,21 @@ class StatePartitionReaderFactory(
     storeConf: StateStoreConf,
     hadoopConf: SerializableConfiguration,
     schema: StructType,
-    stateStoreMetadata: Array[StateMetadataTableEntry]) extends PartitionReaderFactory {
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema])
+  extends PartitionReaderFactory {
 
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     val stateStoreInputPartition = partition.asInstanceOf[StateStoreInputPartition]
     if (stateStoreInputPartition.sourceOptions.readChangeFeed) {
       new StateStoreChangeDataPartitionReader(storeConf, hadoopConf,
-        stateStoreInputPartition, schema, stateStoreMetadata)
+        stateStoreInputPartition, schema, keyStateEncoderSpec, stateVariableInfoOpt,
+        stateStoreColFamilySchemaOpt)
     } else {
       new StatePartitionReader(storeConf, hadoopConf,
-        stateStoreInputPartition, schema, stateStoreMetadata)
+        stateStoreInputPartition, schema, keyStateEncoderSpec, stateVariableInfoOpt,
+        stateStoreColFamilySchemaOpt)
     }
   }
 }
@@ -59,40 +64,65 @@ abstract class StatePartitionReaderBase(
     hadoopConf: SerializableConfiguration,
     partition: StateStoreInputPartition,
     schema: StructType,
-    stateStoreMetadata: Array[StateMetadataTableEntry])
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema])
   extends PartitionReader[InternalRow] with Logging {
-  private val keySchema = SchemaUtil.getSchemaAsDataType(schema, "key").asInstanceOf[StructType]
-  private val valueSchema = SchemaUtil.getSchemaAsDataType(schema, "value").asInstanceOf[StructType]
+  // Used primarily as a placeholder for the value schema in the context of
+  // state variables used within the transformWithState operator.
+  private val schemaForValueRow: StructType =
+    StructType(Array(StructField("__dummy__", NullType)))
+
+  protected val keySchema = {
+    if (SchemaUtil.checkVariableType(stateVariableInfoOpt, StateVariableType.MapState)) {
+      SchemaUtil.getCompositeKeySchema(schema, partition.sourceOptions)
+    } else {
+      SchemaUtil.getSchemaAsDataType(schema, "key").asInstanceOf[StructType]
+    }
+  }
+
+  protected val valueSchema = if (stateVariableInfoOpt.isDefined) {
+    schemaForValueRow
+  } else {
+    SchemaUtil.getSchemaAsDataType(
+      schema, "value").asInstanceOf[StructType]
+  }
 
   protected lazy val provider: StateStoreProvider = {
     val stateStoreId = StateStoreId(partition.sourceOptions.stateCheckpointLocation.toString,
       partition.sourceOptions.operatorId, partition.partition, partition.sourceOptions.storeName)
     val stateStoreProviderId = StateStoreProviderId(stateStoreId, partition.queryId)
-    val numColsPrefixKey = if (stateStoreMetadata.isEmpty) {
-      logWarning("Metadata for state store not found, possible cause is this checkpoint " +
-        "is created by older version of spark. If the query has session window aggregation, " +
-        "the state can't be read correctly and runtime exception will be thrown. " +
-        "Run the streaming query in newer spark version to generate state metadata " +
-        "can fix the issue.")
-      0
-    } else {
-      require(stateStoreMetadata.length == 1)
-      stateStoreMetadata.head.numColsPrefixKey
-    }
 
-    // TODO: currently we don't support RangeKeyScanStateEncoderSpec. Support for this will be
-    // added in the future along with state metadata changes.
-    // Filed JIRA here: https://issues.apache.org/jira/browse/SPARK-47524
-    val keyStateEncoderType = if (numColsPrefixKey > 0) {
-      PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey)
+    val useColFamilies = if (stateVariableInfoOpt.isDefined) {
+      true
     } else {
-      NoPrefixKeyStateEncoderSpec(keySchema)
+      false
     }
 
-    StateStoreProvider.createAndInit(
-      stateStoreProviderId, keySchema, valueSchema, keyStateEncoderType,
-      useColumnFamilies = false, storeConf, hadoopConf.value,
-      useMultipleValuesPerKey = false)
+    val useMultipleValuesPerKey = SchemaUtil.checkVariableType(stateVariableInfoOpt,
+      StateVariableType.ListState)
+
+    val provider = StateStoreProvider.createAndInit(
+      stateStoreProviderId, keySchema, valueSchema, keyStateEncoderSpec,
+      useColumnFamilies = useColFamilies, storeConf, hadoopConf.value,
+      useMultipleValuesPerKey = useMultipleValuesPerKey)
+
+    val isInternal = partition.sourceOptions.readRegisteredTimers
+
+    if (useColFamilies) {
+      val store = provider.getStore(partition.sourceOptions.batchId + 1)
+      require(stateStoreColFamilySchemaOpt.isDefined)
+      val stateStoreColFamilySchema = stateStoreColFamilySchemaOpt.get
+      require(stateStoreColFamilySchema.keyStateEncoderSpec.isDefined)
+      store.createColFamilyIfAbsent(
+        stateStoreColFamilySchema.colFamilyName,
+        stateStoreColFamilySchema.keySchema,
+        stateStoreColFamilySchema.valueSchema,
+        stateStoreColFamilySchema.keyStateEncoderSpec.get,
+        useMultipleValuesPerKey = useMultipleValuesPerKey,
+        isInternal = isInternal)
+    }
+    provider
   }
 
   protected val iter: Iterator[InternalRow]
@@ -119,15 +149,18 @@ abstract class StatePartitionReaderBase(
 
 /**
  * An implementation of [[StatePartitionReaderBase]] for the normal mode of State Data
- * Source. It reads the the state at a particular batchId.
+ * Source. It reads the state at a particular batchId.
  */
 class StatePartitionReader(
     storeConf: StateStoreConf,
     hadoopConf: SerializableConfiguration,
     partition: StateStoreInputPartition,
     schema: StructType,
-    stateStoreMetadata: Array[StateMetadataTableEntry])
-  extends StatePartitionReaderBase(storeConf, hadoopConf, partition, schema, stateStoreMetadata) {
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema])
+  extends StatePartitionReaderBase(storeConf, hadoopConf, partition, schema,
+    keyStateEncoderSpec, stateVariableInfoOpt, stateStoreColFamilySchemaOpt) {
 
   private lazy val store: ReadStateStore = {
     partition.sourceOptions.fromSnapshotOptions match {
@@ -146,21 +179,27 @@ class StatePartitionReader(
   }
 
   override lazy val iter: Iterator[InternalRow] = {
-    store.iterator().map(pair => unifyStateRowPair((pair.key, pair.value)))
+    val stateVarName = stateVariableInfoOpt
+      .map(_.stateName).getOrElse(StateStore.DEFAULT_COL_FAMILY_NAME)
+
+    if (stateVariableInfoOpt.isDefined) {
+      val stateVariableInfo = stateVariableInfoOpt.get
+      val stateVarType = stateVariableInfo.stateVariableType
+      SchemaUtil.processStateEntries(stateVarType, stateVarName, store,
+        keySchema, partition.partition, partition.sourceOptions)
+    } else {
+      store
+        .iterator(stateVarName)
+        .map { pair =>
+          SchemaUtil.unifyStateRowPair((pair.key, pair.value), partition.partition)
+        }
+    }
   }
 
   override def close(): Unit = {
     store.abort()
     super.close()
   }
-
-  private def unifyStateRowPair(pair: (UnsafeRow, UnsafeRow)): InternalRow = {
-    val row = new GenericInternalRow(3)
-    row.update(0, pair._1)
-    row.update(1, pair._2)
-    row.update(2, partition.partition)
-    row
-  }
 }
 
 /**
@@ -172,8 +211,11 @@ class StateStoreChangeDataPartitionReader(
     hadoopConf: SerializableConfiguration,
     partition: StateStoreInputPartition,
     schema: StructType,
-    stateStoreMetadata: Array[StateMetadataTableEntry])
-  extends StatePartitionReaderBase(storeConf, hadoopConf, partition, schema, stateStoreMetadata) {
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema])
+  extends StatePartitionReaderBase(storeConf, hadoopConf, partition, schema,
+    keyStateEncoderSpec, stateVariableInfoOpt, stateStoreColFamilySchemaOpt) {
 
   private lazy val changeDataReader:
     NextIterator[(RecordType.Value, UnsafeRow, UnsafeRow, Long)] = {
@@ -181,14 +223,37 @@ class StateStoreChangeDataPartitionReader(
       throw StateStoreErrors.stateStoreProviderDoesNotSupportFineGrainedReplay(
         provider.getClass.toString)
     }
+
+    val colFamilyNameOpt = if (stateVariableInfoOpt.isDefined) {
+      Some(stateVariableInfoOpt.get.stateName)
+    } else {
+      None
+    }
+
     provider.asInstanceOf[SupportsFineGrainedReplay]
       .getStateStoreChangeDataReader(
         partition.sourceOptions.readChangeFeedOptions.get.changeStartBatchId + 1,
-        partition.sourceOptions.readChangeFeedOptions.get.changeEndBatchId + 1)
+        partition.sourceOptions.readChangeFeedOptions.get.changeEndBatchId + 1,
+        colFamilyNameOpt)
   }
 
   override lazy val iter: Iterator[InternalRow] = {
-    changeDataReader.iterator.map(unifyStateChangeDataRow)
+    if (SchemaUtil.checkVariableType(stateVariableInfoOpt, StateVariableType.MapState)) {
+      val groupingKeySchema = SchemaUtil.getSchemaAsDataType(
+        keySchema, "key"
+      ).asInstanceOf[StructType]
+      val userKeySchema = SchemaUtil.getSchemaAsDataType(
+        keySchema, "userKey"
+      ).asInstanceOf[StructType]
+      changeDataReader.iterator.map { entry =>
+        val groupingKey = entry._2.get(0, groupingKeySchema).asInstanceOf[UnsafeRow]
+        val userMapKey = entry._2.get(1, userKeySchema).asInstanceOf[UnsafeRow]
+        createFlattenedRowForMapState(entry._4, entry._1,
+          groupingKey, userMapKey, entry._3, partition.partition)
+      }
+    } else {
+      changeDataReader.iterator.map(unifyStateChangeDataRow)
+    }
   }
 
   override def close(): Unit = {
@@ -206,4 +271,21 @@ class StateStoreChangeDataPartitionReader(
     result.update(4, partition.partition)
     result
   }
+
+  private def createFlattenedRowForMapState(
+      batchId: Long,
+      recordType: RecordType,
+      groupingKey: UnsafeRow,
+      userKey: UnsafeRow,
+      userValue: UnsafeRow,
+      partition: Int): InternalRow = {
+    val result = new GenericInternalRow(6)
+    result.update(0, batchId)
+    result.update(1, UTF8String.fromString(getRecordTypeAsString(recordType)))
+    result.update(2, groupingKey)
+    result.update(3, userKey)
+    result.update(4, userValue)
+    result.update(5, partition)
+    result
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
index 01f966ae948ac..1bb992eb9addd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateScanBuilder.scala
@@ -25,9 +25,9 @@ import org.apache.hadoop.fs.{Path, PathFilter}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionReaderFactory, Scan, ScanBuilder}
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.JoinSideValues
-import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataTableEntry
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.{LeftSide, RightSide}
-import org.apache.spark.sql.execution.streaming.state.{StateStoreConf, StateStoreErrors}
+import org.apache.spark.sql.execution.streaming.TransformWithStateVariableInfo
+import org.apache.spark.sql.execution.streaming.state.{KeyStateEncoderSpec, StateStoreColFamilySchema, StateStoreConf, StateStoreErrors}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.SerializableConfiguration
 
@@ -37,9 +37,11 @@ class StateScanBuilder(
     schema: StructType,
     sourceOptions: StateSourceOptions,
     stateStoreConf: StateStoreConf,
-    stateStoreMetadata: Array[StateMetadataTableEntry]) extends ScanBuilder {
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema]) extends ScanBuilder {
   override def build(): Scan = new StateScan(session, schema, sourceOptions, stateStoreConf,
-    stateStoreMetadata)
+    keyStateEncoderSpec, stateVariableInfoOpt, stateStoreColFamilySchemaOpt)
 }
 
 /** An implementation of [[InputPartition]] for State Store data source. */
@@ -54,7 +56,10 @@ class StateScan(
     schema: StructType,
     sourceOptions: StateSourceOptions,
     stateStoreConf: StateStoreConf,
-    stateStoreMetadata: Array[StateMetadataTableEntry]) extends Scan with Batch {
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema]) extends Scan
+  with Batch {
 
   // A Hadoop Configuration can be about 10 KB, which is pretty big, so broadcast it
   private val hadoopConfBroadcast = session.sparkContext.broadcast(
@@ -123,7 +128,7 @@ class StateScan(
 
     case JoinSideValues.none =>
       new StatePartitionReaderFactory(stateStoreConf, hadoopConfBroadcast.value, schema,
-        stateStoreMetadata)
+        keyStateEncoderSpec, stateVariableInfoOpt, stateStoreColFamilySchemaOpt)
   }
 
   override def toBatch: Batch = this
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
index 2fc85cd8aa968..4069a52f38b13 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StateTable.scala
@@ -24,12 +24,11 @@ import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.connector.catalog.{MetadataColumn, SupportsMetadataColumns, SupportsRead, Table, TableCapability}
 import org.apache.spark.sql.connector.read.ScanBuilder
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.JoinSideValues
-import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataTableEntry
 import org.apache.spark.sql.execution.datasources.v2.state.utils.SchemaUtil
-import org.apache.spark.sql.execution.streaming.state.StateStoreConf
-import org.apache.spark.sql.types.{IntegerType, LongType, StringType, StructType}
+import org.apache.spark.sql.execution.streaming.TransformWithStateVariableInfo
+import org.apache.spark.sql.execution.streaming.state.{KeyStateEncoderSpec, StateStoreColFamilySchema, StateStoreConf}
+import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
-import org.apache.spark.util.ArrayImplicits._
 
 /** An implementation of [[Table]] with [[SupportsRead]] for State Store data source. */
 class StateTable(
@@ -37,12 +36,14 @@ class StateTable(
     override val schema: StructType,
     sourceOptions: StateSourceOptions,
     stateConf: StateStoreConf,
-    stateStoreMetadata: Array[StateMetadataTableEntry])
+    keyStateEncoderSpec: KeyStateEncoderSpec,
+    stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+    stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema])
   extends Table with SupportsRead with SupportsMetadataColumns {
 
   import StateTable._
 
-  if (!isValidSchema(schema)) {
+  if (!SchemaUtil.isValidSchema(sourceOptions, schema, stateVariableInfoOpt)) {
     throw StateDataSourceErrors.internalError(
       s"Invalid schema is provided. Provided schema: $schema for " +
         s"checkpoint location: ${sourceOptions.stateCheckpointLocation} , operatorId: " +
@@ -77,34 +78,11 @@ class StateTable(
   override def capabilities(): util.Set[TableCapability] = CAPABILITY
 
   override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder =
-    new StateScanBuilder(session, schema, sourceOptions, stateConf, stateStoreMetadata)
+    new StateScanBuilder(session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
+      stateVariableInfoOpt, stateStoreColFamilySchemaOpt)
 
   override def properties(): util.Map[String, String] = Map.empty[String, String].asJava
 
-  private def isValidSchema(schema: StructType): Boolean = {
-    val expectedFieldNames =
-      if (sourceOptions.readChangeFeed) {
-        Seq("batch_id", "change_type", "key", "value", "partition_id")
-      } else {
-        Seq("key", "value", "partition_id")
-      }
-    val expectedTypes = Map(
-      "batch_id" -> classOf[LongType],
-      "change_type" -> classOf[StringType],
-      "key" -> classOf[StructType],
-      "value" -> classOf[StructType],
-      "partition_id" -> classOf[IntegerType])
-
-    if (schema.fieldNames.toImmutableArraySeq != expectedFieldNames) {
-      false
-    } else {
-      schema.fieldNames.forall { fieldName =>
-        expectedTypes(fieldName).isAssignableFrom(
-          SchemaUtil.getSchemaAsDataType(schema, fieldName).getClass)
-      }
-    }
-  }
-
   override def metadataColumns(): Array[MetadataColumn] = Array.empty
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
index 673ec3414c237..b9adb379e38c1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/StreamStreamJoinStatePartitionReader.scala
@@ -102,10 +102,15 @@ class StreamStreamJoinStatePartitionReader(
 
   private lazy val iter = {
     if (joinStateManager == null) {
+      // Here we don't know the StateStoreCheckpointID, so we set it to None in both stateInfo
+      // and `keyToNumValuesStateStoreCkptId` and `keyWithIndexToValueStateStoreCkptId` passed
+      // into SymmetricHashJoinStateManager.
+      // TODO after we persistent the StateStoreCheckpointID to the commit log, we can get it from
+      // there and pass it in.
       val stateInfo = StatefulOperatorStateInfo(
         partition.sourceOptions.stateCheckpointLocation.toString,
         partition.queryId, partition.sourceOptions.operatorId,
-        partition.sourceOptions.batchId + 1, -1)
+        partition.sourceOptions.batchId + 1, -1, None)
       joinStateManager = new SymmetricHashJoinStateManager(
         joinSide,
         inputAttributes,
@@ -114,6 +119,8 @@ class StreamStreamJoinStatePartitionReader(
         storeConf = storeConf,
         hadoopConf = hadoopConf.value,
         partitionId = partition.partition,
+        keyToNumValuesStateStoreCkptId = None,
+        keyWithIndexToValueStateStoreCkptId = None,
         formatVersion,
         skippedNullValueCount = None,
         useStateStoreCoordinator = false,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala
index 893984feabf11..64fdfb7997623 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/metadata/StateMetadataSource.scala
@@ -23,6 +23,7 @@ import scala.jdk.CollectionConverters._
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.{Path, PathFilter}
 
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
@@ -32,7 +33,7 @@ import org.apache.spark.sql.connector.read.{Batch, InputPartition, PartitionRead
 import org.apache.spark.sql.execution.datasources.v2.state.StateDataSourceErrors
 import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions.PATH
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager
-import org.apache.spark.sql.execution.streaming.state.{OperatorStateMetadata, OperatorStateMetadataReader, OperatorStateMetadataV1}
+import org.apache.spark.sql.execution.streaming.state.{OperatorStateMetadata, OperatorStateMetadataReader, OperatorStateMetadataUtils, OperatorStateMetadataV1, OperatorStateMetadataV2}
 import org.apache.spark.sql.sources.DataSourceRegister
 import org.apache.spark.sql.types.{DataType, IntegerType, LongType, StringType, StructType}
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -46,7 +47,10 @@ case class StateMetadataTableEntry(
     numPartitions: Int,
     minBatchId: Long,
     maxBatchId: Long,
-    numColsPrefixKey: Int) {
+    version: Int,
+    operatorPropertiesJson: String,
+    numColsPrefixKey: Int,
+    stateSchemaFilePath: Option[String]) {
   def toRow(): InternalRow = {
     new GenericInternalRow(
       Array[Any](operatorId,
@@ -55,6 +59,7 @@ case class StateMetadataTableEntry(
         numPartitions,
         minBatchId,
         maxBatchId,
+        UTF8String.fromString(operatorPropertiesJson),
         numColsPrefixKey))
   }
 }
@@ -68,6 +73,7 @@ object StateMetadataTableEntry {
       .add("numPartitions", IntegerType)
       .add("minBatchId", LongType)
       .add("maxBatchId", LongType)
+      .add("operatorProperties", StringType)
   }
 }
 
@@ -83,7 +89,7 @@ class StateMetadataSource extends TableProvider with DataSourceRegister {
 
   override def inferSchema(options: CaseInsensitiveStringMap): StructType = {
     // The schema of state metadata table is static.
-   StateMetadataTableEntry.schema
+    StateMetadataTableEntry.schema
   }
 }
 
@@ -100,7 +106,16 @@ class StateMetadataTable extends Table with SupportsRead with SupportsMetadataCo
       if (!options.containsKey("path")) {
         throw StateDataSourceErrors.requiredOptionUnspecified(PATH)
       }
-      new StateMetadataScan(options.get("path"))
+
+      val checkpointLocation = options.get("path")
+
+      val batchIdOpt = Option(options.get("batchId")).map(_.toLong)
+      // if a batchId is provided, use it. Otherwise, use the last committed batch. If there is no
+      // committed batch, use batchId 0.
+      val batchId = batchIdOpt.getOrElse(OperatorStateMetadataUtils
+        .getLastCommittedBatch(SparkSession.active, checkpointLocation).getOrElse(0L))
+
+      new StateMetadataScan(options.get("path"), batchId)
     }
   }
 
@@ -115,7 +130,7 @@ class StateMetadataTable extends Table with SupportsRead with SupportsMetadataCo
 
 case class StateMetadataInputPartition(checkpointLocation: String) extends InputPartition
 
-class StateMetadataScan(checkpointLocation: String) extends Scan {
+class StateMetadataScan(checkpointLocation: String, batchId: Long) extends Scan {
   override def readSchema: StructType = StateMetadataTableEntry.schema
 
   override def toBatch: Batch = {
@@ -127,24 +142,26 @@ class StateMetadataScan(checkpointLocation: String) extends Scan {
       override def createReaderFactory(): PartitionReaderFactory = {
         // Don't need to broadcast the hadoop conf because this source only has one partition.
         val conf = new SerializableConfiguration(SparkSession.active.sessionState.newHadoopConf())
-        StateMetadataPartitionReaderFactory(conf)
+        StateMetadataPartitionReaderFactory(conf, batchId)
       }
     }
   }
 }
 
-case class StateMetadataPartitionReaderFactory(hadoopConf: SerializableConfiguration)
-  extends PartitionReaderFactory {
+case class StateMetadataPartitionReaderFactory(
+    hadoopConf: SerializableConfiguration,
+    batchId: Long) extends PartitionReaderFactory {
 
   override def createReader(partition: InputPartition): PartitionReader[InternalRow] = {
     new StateMetadataPartitionReader(
-      partition.asInstanceOf[StateMetadataInputPartition].checkpointLocation, hadoopConf)
+      partition.asInstanceOf[StateMetadataInputPartition].checkpointLocation, hadoopConf, batchId)
   }
 }
 
 class StateMetadataPartitionReader(
     checkpointLocation: String,
-    serializedHadoopConf: SerializableConfiguration) extends PartitionReader[InternalRow] {
+    serializedHadoopConf: SerializableConfiguration,
+    batchId: Long) extends PartitionReader[InternalRow] with Logging {
 
   override def next(): Boolean = {
     stateMetadata.hasNext
@@ -188,29 +205,75 @@ class StateMetadataPartitionReader(
     } else Array.empty
   }
 
-  private def allOperatorStateMetadata: Array[OperatorStateMetadata] = {
-    val stateDir = new Path(checkpointLocation, "state")
-    val opIds = fileManager
-      .list(stateDir, pathNameCanBeParsedAsLongFilter).map(f => pathToLong(f.getPath)).sorted
-    opIds.map { opId =>
-      new OperatorStateMetadataReader(new Path(stateDir, opId.toString), hadoopConf).read()
+  // Need this to be accessible from IncrementalExecution for the planning rule.
+  private[sql] def allOperatorStateMetadata: Array[OperatorStateMetadata] = {
+    try {
+      val stateDir = new Path(checkpointLocation, "state")
+      val opIds = fileManager
+        .list(stateDir, pathNameCanBeParsedAsLongFilter).map(f => pathToLong(f.getPath)).sorted
+      opIds.map { opId =>
+        val operatorIdPath = new Path(stateDir, opId.toString)
+        // check if OperatorStateMetadataV2 path exists, if it does, read it
+        // otherwise, fall back to OperatorStateMetadataV1
+        val operatorStateMetadataV2Path = OperatorStateMetadataV2.metadataDirPath(operatorIdPath)
+        val operatorStateMetadataVersion = if (fileManager.exists(operatorStateMetadataV2Path)) {
+          2
+        } else {
+          1
+        }
+        OperatorStateMetadataReader.createReader(
+          operatorIdPath, hadoopConf, operatorStateMetadataVersion, batchId).read() match {
+          case Some(metadata) => metadata
+          case None => throw StateDataSourceErrors.failedToReadOperatorMetadata(checkpointLocation,
+            batchId)
+        }
+      }
+    } catch {
+      // if the operator metadata is not present, catch the exception
+      // and return an empty array
+      case ex: Exception =>
+        logWarning(log"Failed to find operator metadata for " +
+          log"path=${MDC(LogKeys.CHECKPOINT_LOCATION, checkpointLocation)} " +
+          log"with exception=${MDC(LogKeys.EXCEPTION, ex)}")
+        Array.empty
     }
   }
 
+  // From v2, we also need to populate the operatorProperties and stateSchemaFilePath fields
+  // for use with the state data source reader
   private[sql] lazy val stateMetadata: Iterator[StateMetadataTableEntry] = {
     allOperatorStateMetadata.flatMap { operatorStateMetadata =>
-      require(operatorStateMetadata.version == 1)
-      val operatorStateMetadataV1 = operatorStateMetadata.asInstanceOf[OperatorStateMetadataV1]
-      operatorStateMetadataV1.stateStoreInfo.map { stateStoreMetadata =>
-        StateMetadataTableEntry(operatorStateMetadataV1.operatorInfo.operatorId,
-          operatorStateMetadataV1.operatorInfo.operatorName,
-          stateStoreMetadata.storeName,
-          stateStoreMetadata.numPartitions,
-          if (batchIds.nonEmpty) batchIds.head else -1,
-          if (batchIds.nonEmpty) batchIds.last else -1,
-          stateStoreMetadata.numColsPrefixKey
-        )
+      require(operatorStateMetadata.version == 1 || operatorStateMetadata.version == 2)
+      operatorStateMetadata match {
+        case v1: OperatorStateMetadataV1 =>
+          v1.stateStoreInfo.map { stateStoreMetadata =>
+            StateMetadataTableEntry(v1.operatorInfo.operatorId,
+              v1.operatorInfo.operatorName,
+              stateStoreMetadata.storeName,
+              stateStoreMetadata.numPartitions,
+              if (batchIds.nonEmpty) batchIds.head else -1,
+              if (batchIds.nonEmpty) batchIds.last else -1,
+              operatorStateMetadata.version,
+              null,
+              stateStoreMetadata.numColsPrefixKey,
+              None
+            )
+          }
+        case v2: OperatorStateMetadataV2 =>
+          v2.stateStoreInfo.map { stateStoreMetadata =>
+            StateMetadataTableEntry(v2.operatorInfo.operatorId,
+              v2.operatorInfo.operatorName,
+              stateStoreMetadata.storeName,
+              stateStoreMetadata.numPartitions,
+              if (batchIds.nonEmpty) batchIds.head else -1,
+              if (batchIds.nonEmpty) batchIds.last else -1,
+              operatorStateMetadata.version,
+              v2.operatorPropertiesJson,
+              -1, // numColsPrefixKey is not available in OperatorStateMetadataV2
+              Some(stateStoreMetadata.stateSchemaFilePath)
+            )
+          }
+        }
       }
-    }
-  }.iterator
+    }.iterator
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/utils/SchemaUtil.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/utils/SchemaUtil.scala
index 54c6b34db9723..84eab3356c204 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/utils/SchemaUtil.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/state/utils/SchemaUtil.scala
@@ -16,8 +16,20 @@
  */
 package org.apache.spark.sql.execution.datasources.v2.state.utils
 
+import scala.collection.mutable
+import scala.collection.mutable.ArrayBuffer
+import scala.util.control.NonFatal
+
 import org.apache.spark.sql.AnalysisException
-import org.apache.spark.sql.types.{DataType, StructType}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeRow}
+import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData}
+import org.apache.spark.sql.execution.datasources.v2.state.{StateDataSourceErrors, StateSourceOptions}
+import org.apache.spark.sql.execution.streaming.{StateVariableType, TransformWithStateVariableInfo}
+import org.apache.spark.sql.execution.streaming.StateVariableType._
+import org.apache.spark.sql.execution.streaming.state.{ReadStateStore, StateStoreColFamilySchema, UnsafeRowPair}
+import org.apache.spark.sql.types.{ArrayType, DataType, IntegerType, LongType, MapType, StringType, StructType}
+import org.apache.spark.util.ArrayImplicits._
 
 object SchemaUtil {
   def getSchemaAsDataType(schema: StructType, fieldName: String): DataType = {
@@ -30,4 +42,439 @@ object SchemaUtil {
           "schema" -> schema.toString()))
     }
   }
+
+  def getSourceSchema(
+      sourceOptions: StateSourceOptions,
+      keySchema: StructType,
+      valueSchema: StructType,
+      transformWithStateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+      stateStoreColFamilySchemaOpt: Option[StateStoreColFamilySchema]): StructType = {
+    if (transformWithStateVariableInfoOpt.isDefined) {
+      require(stateStoreColFamilySchemaOpt.isDefined)
+      generateSchemaForStateVar(transformWithStateVariableInfoOpt.get,
+        stateStoreColFamilySchemaOpt.get, sourceOptions)
+    } else if (sourceOptions.readChangeFeed) {
+      new StructType()
+        .add("batch_id", LongType)
+        .add("change_type", StringType)
+        .add("key", keySchema)
+        .add("value", valueSchema)
+        .add("partition_id", IntegerType)
+    } else {
+      new StructType()
+        .add("key", keySchema)
+        .add("value", valueSchema)
+        .add("partition_id", IntegerType)
+    }
+  }
+
+  def unifyStateRowPair(pair: (UnsafeRow, UnsafeRow), partition: Int): InternalRow = {
+    val row = new GenericInternalRow(3)
+    row.update(0, pair._1)
+    row.update(1, pair._2)
+    row.update(2, partition)
+    row
+  }
+
+  def unifyStateRowPairWithMultipleValues(
+      pair: (UnsafeRow, GenericArrayData),
+      partition: Int): InternalRow = {
+    val row = new GenericInternalRow(3)
+    row.update(0, pair._1)
+    row.update(1, pair._2)
+    row.update(2, partition)
+    row
+  }
+
+  /**
+   * For map state variables, state rows are stored as composite key.
+   * To return grouping key -> Map{user key -> value} as one state reader row to
+   * the users, we need to perform grouping on state rows by their grouping key,
+   * and construct a map for that grouping key.
+   *
+   * We traverse the iterator returned from state store,
+   * and will only return a row for `next()` only if the grouping key in the next row
+   * from state store is different (or there are no more rows)
+   *
+   * Note that all state rows with the same grouping key are co-located so they will
+   * appear consecutively during the iterator traversal.
+   */
+  def unifyMapStateRowPair(
+      stateRows: Iterator[UnsafeRowPair],
+      compositeKeySchema: StructType,
+      partitionId: Int,
+      stateSourceOptions: StateSourceOptions): Iterator[InternalRow] = {
+    val groupingKeySchema = SchemaUtil.getSchemaAsDataType(
+      compositeKeySchema, "key"
+    ).asInstanceOf[StructType]
+    val userKeySchema = SchemaUtil.getSchemaAsDataType(
+      compositeKeySchema, "userKey"
+    ).asInstanceOf[StructType]
+
+    def appendKVPairToMap(
+        curMap: mutable.Map[Any, Any],
+        stateRowPair: UnsafeRowPair): Unit = {
+      curMap += (
+        stateRowPair.key.get(1, userKeySchema)
+          .asInstanceOf[UnsafeRow].copy() ->
+          stateRowPair.value.copy()
+        )
+    }
+
+    def createDataRow(
+        groupingKey: Any,
+        curMap: mutable.Map[Any, Any]): GenericInternalRow = {
+      val row = new GenericInternalRow(3)
+      val mapData = ArrayBasedMapData(curMap)
+      row.update(0, groupingKey)
+      row.update(1, mapData)
+      row.update(2, partitionId)
+      row
+    }
+
+    def createFlattenedRow(
+        groupingKey: UnsafeRow,
+        userMapKey: UnsafeRow,
+        userMapValue: UnsafeRow,
+        partitionId: Int): GenericInternalRow = {
+      val row = new GenericInternalRow(4)
+      row.update(0, groupingKey)
+      row.update(1, userMapKey)
+      row.update(2, userMapValue)
+      row.update(3, partitionId)
+      row
+    }
+
+    if (stateSourceOptions.flattenCollectionTypes) {
+      stateRows
+        .map { pair =>
+          val groupingKey = pair.key.get(0, groupingKeySchema).asInstanceOf[UnsafeRow]
+          val userMapKey = pair.key.get(1, userKeySchema).asInstanceOf[UnsafeRow]
+          val userMapValue = pair.value
+          createFlattenedRow(groupingKey, userMapKey, userMapValue, partitionId)
+        }
+    } else {
+      // All of the rows with the same grouping key were co-located and were
+      // grouped together consecutively.
+      new Iterator[InternalRow] {
+        var curGroupingKey: UnsafeRow = _
+        var curStateRowPair: UnsafeRowPair = _
+        val curMap = mutable.Map.empty[Any, Any]
+
+        override def hasNext: Boolean =
+          stateRows.hasNext || !curMap.isEmpty
+
+        override def next(): InternalRow = {
+          var foundNewGroupingKey = false
+          while (stateRows.hasNext && !foundNewGroupingKey) {
+            curStateRowPair = stateRows.next()
+            if (curGroupingKey == null) {
+              // First time in the iterator
+              // Need to make a copy because we need to keep the
+              // value across function calls
+              curGroupingKey = curStateRowPair.key
+                .get(0, groupingKeySchema).asInstanceOf[UnsafeRow].copy()
+              appendKVPairToMap(curMap, curStateRowPair)
+            } else {
+              val curPairGroupingKey =
+                curStateRowPair.key.get(0, groupingKeySchema)
+              if (curPairGroupingKey == curGroupingKey) {
+                appendKVPairToMap(curMap, curStateRowPair)
+              } else {
+                // find a different grouping key, exit loop and return a row
+                foundNewGroupingKey = true
+              }
+            }
+          }
+          if (foundNewGroupingKey) {
+            // found a different grouping key
+            val row = createDataRow(curGroupingKey, curMap)
+            // update vars
+            curGroupingKey =
+              curStateRowPair.key.get(0, groupingKeySchema)
+                .asInstanceOf[UnsafeRow].copy()
+            // empty the map, append current row
+            curMap.clear()
+            appendKVPairToMap(curMap, curStateRowPair)
+            // return map value of previous grouping key
+            row
+          } else {
+            if (curMap.isEmpty) {
+              throw new NoSuchElementException("Please check if the iterator hasNext(); Likely " +
+                "user is trying to get element from an exhausted iterator.")
+            }
+            else {
+              // reach the end of the state rows
+              val row = createDataRow(curGroupingKey, curMap)
+              // clear the map to end the iterator
+              curMap.clear()
+              row
+            }
+          }
+        }
+      }
+    }
+  }
+
+  def isValidSchema(
+      sourceOptions: StateSourceOptions,
+      schema: StructType,
+      transformWithStateVariableInfoOpt: Option[TransformWithStateVariableInfo]): Boolean = {
+  val expectedTypes = Map(
+      "batch_id" -> classOf[LongType],
+      "change_type" -> classOf[StringType],
+      "key" -> classOf[StructType],
+      "value" -> classOf[StructType],
+      "list_element" -> classOf[StructType],
+      "list_value" -> classOf[ArrayType],
+      "map_value" -> classOf[MapType],
+      "user_map_key" -> classOf[StructType],
+      "user_map_value" -> classOf[StructType],
+      "expiration_timestamp_ms" -> classOf[LongType],
+      "partition_id" -> classOf[IntegerType])
+
+    val expectedFieldNames = if (transformWithStateVariableInfoOpt.isDefined) {
+      val stateVarInfo = transformWithStateVariableInfoOpt.get
+      val stateVarType = stateVarInfo.stateVariableType
+
+      stateVarType match {
+        case ValueState =>
+          if (sourceOptions.readChangeFeed) {
+            Seq("batch_id", "change_type", "key", "value", "partition_id")
+          } else {
+            Seq("key", "value", "partition_id")
+          }
+
+        case ListState =>
+          if (sourceOptions.readChangeFeed) {
+            Seq("batch_id", "change_type", "key", "list_element", "partition_id")
+          } else if (sourceOptions.flattenCollectionTypes) {
+            Seq("key", "list_element", "partition_id")
+          } else {
+            Seq("key", "list_value", "partition_id")
+          }
+
+        case MapState =>
+          if (sourceOptions.readChangeFeed) {
+            Seq("batch_id", "change_type", "key", "user_map_key", "user_map_value", "partition_id")
+          } else if (sourceOptions.flattenCollectionTypes) {
+            Seq("key", "user_map_key", "user_map_value", "partition_id")
+          } else {
+            Seq("key", "map_value", "partition_id")
+          }
+
+        case TimerState =>
+          Seq("key", "expiration_timestamp_ms", "partition_id")
+
+        case _ =>
+          throw StateDataSourceErrors
+            .internalError(s"Unsupported state variable type $stateVarType")
+      }
+    } else if (sourceOptions.readChangeFeed) {
+      Seq("batch_id", "change_type", "key", "value", "partition_id")
+    } else {
+      Seq("key", "value", "partition_id")
+    }
+
+    if (schema.fieldNames.toImmutableArraySeq != expectedFieldNames) {
+      false
+    } else {
+      schema.fieldNames.forall { fieldName =>
+        expectedTypes(fieldName).isAssignableFrom(
+          SchemaUtil.getSchemaAsDataType(schema, fieldName).getClass)
+      }
+    }
+  }
+
+  private def generateSchemaForStateVar(
+      stateVarInfo: TransformWithStateVariableInfo,
+      stateStoreColFamilySchema: StateStoreColFamilySchema,
+      stateSourceOptions: StateSourceOptions): StructType = {
+    val stateVarType = stateVarInfo.stateVariableType
+
+    stateVarType match {
+      case ValueState =>
+        if (stateSourceOptions.readChangeFeed) {
+          new StructType()
+            .add("batch_id", LongType)
+            .add("change_type", StringType)
+            .add("key", stateStoreColFamilySchema.keySchema)
+            .add("value", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        } else {
+          new StructType()
+            .add("key", stateStoreColFamilySchema.keySchema)
+            .add("value", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        }
+
+      case ListState =>
+        if (stateSourceOptions.readChangeFeed) {
+          new StructType()
+            .add("batch_id", LongType)
+            .add("change_type", StringType)
+            .add("key", stateStoreColFamilySchema.keySchema)
+            .add("list_element", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        } else if (stateSourceOptions.flattenCollectionTypes) {
+          new StructType()
+            .add("key", stateStoreColFamilySchema.keySchema)
+            .add("list_element", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        } else {
+          new StructType()
+            .add("key", stateStoreColFamilySchema.keySchema)
+            .add("list_value", ArrayType(stateStoreColFamilySchema.valueSchema))
+            .add("partition_id", IntegerType)
+        }
+
+      case MapState =>
+        val groupingKeySchema = SchemaUtil.getSchemaAsDataType(
+          stateStoreColFamilySchema.keySchema, "key")
+        val userKeySchema = stateStoreColFamilySchema.userKeyEncoderSchema.get
+        val valueMapSchema = MapType.apply(
+          keyType = userKeySchema,
+          valueType = stateStoreColFamilySchema.valueSchema
+        )
+
+        if (stateSourceOptions.readChangeFeed) {
+          new StructType()
+            .add("batch_id", LongType)
+            .add("change_type", StringType)
+            .add("key", groupingKeySchema)
+            .add("user_map_key", userKeySchema)
+            .add("user_map_value", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        } else if (stateSourceOptions.flattenCollectionTypes) {
+          new StructType()
+            .add("key", groupingKeySchema)
+            .add("user_map_key", userKeySchema)
+            .add("user_map_value", stateStoreColFamilySchema.valueSchema)
+            .add("partition_id", IntegerType)
+        } else {
+          new StructType()
+            .add("key", groupingKeySchema)
+            .add("map_value", valueMapSchema)
+            .add("partition_id", IntegerType)
+        }
+
+      case TimerState =>
+        val groupingKeySchema = SchemaUtil.getSchemaAsDataType(
+          stateStoreColFamilySchema.keySchema, "key")
+        new StructType()
+          .add("key", groupingKeySchema)
+          .add("expiration_timestamp_ms", LongType)
+          .add("partition_id", IntegerType)
+
+      case _ =>
+        throw StateDataSourceErrors.internalError(s"Unsupported state variable type $stateVarType")
+    }
+  }
+
+  def checkVariableType(
+      stateVariableInfoOpt: Option[TransformWithStateVariableInfo],
+      varType: StateVariableType): Boolean = {
+    stateVariableInfoOpt.isDefined &&
+      stateVariableInfoOpt.get.stateVariableType == varType
+  }
+
+  /**
+   * Given key-value schema generated from `generateSchemaForStateVar()`,
+   * returns the compositeKey schema that key is stored in the state store
+   */
+  def getCompositeKeySchema(
+      schema: StructType,
+      stateSourceOptions: StateSourceOptions): StructType = {
+    val groupingKeySchema = SchemaUtil.getSchemaAsDataType(
+      schema, "key").asInstanceOf[StructType]
+    val userKeySchema = try {
+      if (stateSourceOptions.flattenCollectionTypes) {
+        Option(SchemaUtil.getSchemaAsDataType(schema, "user_map_key").asInstanceOf[StructType])
+      } else {
+        Option(SchemaUtil.getSchemaAsDataType(schema, "map_value").asInstanceOf[MapType]
+          .keyType.asInstanceOf[StructType])
+      }
+    } catch {
+      case NonFatal(e) =>
+        throw StateDataSourceErrors.internalError(s"No such field named as 'map_value' " +
+          s"during state source reader schema initialization. Internal exception message: $e")
+    }
+    new StructType()
+      .add("key", groupingKeySchema)
+      .add("userKey", userKeySchema.get)
+  }
+
+  def processStateEntries(
+      stateVarType: StateVariableType,
+      stateVarName: String,
+      store: ReadStateStore,
+      compositeKeySchema: StructType,
+      partitionId: Int,
+      stateSourceOptions: StateSourceOptions): Iterator[InternalRow] = {
+    stateVarType match {
+      case StateVariableType.ValueState =>
+        store
+          .iterator(stateVarName)
+          .map { pair =>
+            unifyStateRowPair((pair.key, pair.value), partitionId)
+          }
+
+      case StateVariableType.ListState =>
+        if (stateSourceOptions.flattenCollectionTypes) {
+          store
+            .iterator(stateVarName)
+            .flatMap { pair =>
+              val key = pair.key
+              val result = store.valuesIterator(key, stateVarName)
+              result.map { entry =>
+                SchemaUtil.unifyStateRowPair((key, entry), partitionId)
+              }
+            }
+        } else {
+          store
+            .iterator(stateVarName)
+            .map { pair =>
+              val key = pair.key
+              val result = store.valuesIterator(key, stateVarName)
+              val unsafeRowArr = ArrayBuffer[UnsafeRow]()
+              result.foreach { entry =>
+                unsafeRowArr += entry.copy()
+              }
+              // convert the list of values to array type
+              val arrData = new GenericArrayData(unsafeRowArr.toArray)
+              // convert the list of values to a single row
+              SchemaUtil.unifyStateRowPairWithMultipleValues((key, arrData), partitionId)
+            }
+        }
+
+      case StateVariableType.MapState =>
+        unifyMapStateRowPair(store.iterator(stateVarName),
+          compositeKeySchema, partitionId, stateSourceOptions)
+
+      case StateVariableType.TimerState =>
+        store
+          .iterator(stateVarName)
+          .map { pair =>
+            unifyTimerRow(pair.key, compositeKeySchema, partitionId)
+          }
+
+      case _ =>
+        throw new IllegalStateException(
+          s"Unsupported state variable type: $stateVarType")
+    }
+  }
+
+  private def unifyTimerRow(
+      rowKey: UnsafeRow,
+      groupingKeySchema: StructType,
+      partitionId: Int): InternalRow = {
+    val groupingKey = rowKey.get(0, groupingKeySchema).asInstanceOf[UnsafeRow]
+    val expirationTimestamp = rowKey.getLong(1)
+
+    val row = new GenericInternalRow(3)
+    row.update(0, groupingKey)
+    row.update(1, expirationTimestamp)
+    row.update(2, partitionId)
+    row
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
index 046bdcb69846e..d8880b84c6211 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/v2/text/TextTable.scala
@@ -34,15 +34,17 @@ case class TextTable(
     fallbackFileFormat: Class[_ <: FileFormat])
   extends FileTable(sparkSession, options, paths, userSpecifiedSchema) {
   override def newScanBuilder(options: CaseInsensitiveStringMap): TextScanBuilder =
-    TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, options)
+    TextScanBuilder(sparkSession, fileIndex, schema, dataSchema, mergedOptions(options))
 
   override def inferSchema(files: Seq[FileStatus]): Option[StructType] =
     Some(StructType(Array(StructField("value", StringType))))
 
-  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
+  override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
     new WriteBuilder {
-      override def build(): Write = TextWrite(paths, formatName, supportsDataType, info)
+      override def build(): Write =
+        TextWrite(paths, formatName, supportsDataType, mergedWriteInfo(info))
     }
+  }
 
   override def supportsDataType(dataType: DataType): Boolean = dataType == StringType
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
index 2d0130985eacc..4001bc46d01fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/dynamicpruning/CleanupDynamicPruningFilters.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.catalyst.planning.NodeWithOnlyDeterministicProjectAn
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreePattern._
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 
 /**
@@ -73,7 +73,7 @@ object CleanupDynamicPruningFilters extends Rule[LogicalPlan] with PredicateHelp
       _.containsAnyPattern(DYNAMIC_PRUNING_EXPRESSION, DYNAMIC_PRUNING_SUBQUERY)) {
       // pass through anything that is pushed down into PhysicalOperation
       case p @ NodeWithOnlyDeterministicProjectAndFilter(
-          LogicalRelation(_: HadoopFsRelation, _, _, _)) =>
+          LogicalRelationWithTable(_: HadoopFsRelation, _)) =>
         removeUnnecessaryDynamicPruningSubquery(p)
       // pass through anything that is pushed down into PhysicalOperation
       case p @ NodeWithOnlyDeterministicProjectAndFilter(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
index 7844f470b0ef0..2565a14cef90b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/BroadcastExchangeExec.scala
@@ -27,7 +27,7 @@ import scala.util.control.NonFatal
 import org.apache.spark.{broadcast, SparkException}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.internal.MDC
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.plans.logical.Statistics
@@ -61,24 +61,53 @@ trait BroadcastExchangeLike extends Exchange {
    */
   def relationFuture: Future[broadcast.Broadcast[Any]]
 
+  @transient
+  private lazy val promise = Promise[Unit]()
+
+  @transient
+  private lazy val scalaFuture: scala.concurrent.Future[Unit] = promise.future
+
+  @transient
+  private lazy val triggerFuture: Future[Any] = {
+    SQLExecution.withThreadLocalCaptured(session, BroadcastExchangeExec.executionContext) {
+      try {
+        // Trigger broadcast preparation which can involve expensive operations like waiting on
+        // subqueries and file listing.
+        executeQuery(null)
+        promise.trySuccess(())
+      } catch {
+        case e: Throwable =>
+          promise.tryFailure(e)
+          throw e
+      }
+    }
+  }
+
+  protected def completionFuture: scala.concurrent.Future[broadcast.Broadcast[Any]]
+
   /**
    * The asynchronous job that materializes the broadcast. It's used for registering callbacks on
    * `relationFuture`. Note that calling this method may not start the execution of broadcast job.
    * It also does the preparations work, such as waiting for the subqueries.
    */
-  final def submitBroadcastJob: scala.concurrent.Future[broadcast.Broadcast[Any]] = executeQuery {
-    materializationStarted.set(true)
-    completionFuture
+  final def submitBroadcastJob(): scala.concurrent.Future[broadcast.Broadcast[Any]] = {
+    triggerFuture
+    scalaFuture.flatMap { _ =>
+      RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
+        completionFuture
+      }
+    }(BroadcastExchangeExec.executionContext)
   }
 
-  protected def completionFuture: scala.concurrent.Future[broadcast.Broadcast[Any]]
-
   /**
-   * Cancels broadcast job.
+   * Cancels broadcast job with an optional reason.
    */
-  final def cancelBroadcastJob(): Unit = {
-    if (isMaterializationStarted() && !this.relationFuture.isDone) {
-      sparkContext.cancelJobsWithTag(this.jobTag)
+  final def cancelBroadcastJob(reason: Option[String]): Unit = {
+    if (!this.relationFuture.isDone) {
+      reason match {
+        case Some(r) => sparkContext.cancelJobsWithTag(this.jobTag, r)
+        case None => sparkContext.cancelJobsWithTag(this.jobTag)
+      }
       this.relationFuture.cancel(true)
     }
   }
@@ -227,7 +256,7 @@ case class BroadcastExchangeExec(
       case ex: TimeoutException =>
         logError(log"Could not execute broadcast in ${MDC(TIMEOUT, timeout)} secs.", ex)
         if (!relationFuture.isDone) {
-          sparkContext.cancelJobsWithTag(jobTag)
+          sparkContext.cancelJobsWithTag(jobTag, "The corresponding broadcast query has failed.")
           relationFuture.cancel(true)
         }
         throw QueryExecutionErrors.executeBroadcastTimeoutError(timeout, Some(ex))
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
index 0470aacd4f823..8ec903f8e61da 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala
@@ -70,7 +70,16 @@ case class EnsureRequirements(
       case (child, distribution) =>
         val numPartitions = distribution.requiredNumPartitions
           .getOrElse(conf.numShufflePartitions)
-        ShuffleExchangeExec(distribution.createPartitioning(numPartitions), child, shuffleOrigin)
+        distribution match {
+          case _: StatefulOpClusteredDistribution =>
+            ShuffleExchangeExec(
+              distribution.createPartitioning(numPartitions), child,
+              REQUIRED_BY_STATEFUL_OPERATOR)
+
+          case _ =>
+            ShuffleExchangeExec(
+              distribution.createPartitioning(numPartitions), child, shuffleOrigin)
+        }
     }
 
     // Get the indexes of children which have specified distribution requirements and need to be
@@ -429,8 +438,19 @@ case class EnsureRequirements(
         // expressions
         val partitionExprs = leftSpec.partitioning.expressions
 
-        var mergedPartValues = InternalRowComparableWrapper
-            .mergePartitions(leftSpec.partitioning, rightSpec.partitioning, partitionExprs)
+        // in case of compatible but not identical partition expressions, we apply 'reduce'
+        // transforms to group one side's partitions as well as the common partition values
+        val leftReducers = leftSpec.reducers(rightSpec)
+        val leftParts = reducePartValues(leftSpec.partitioning.partitionValues,
+          partitionExprs,
+          leftReducers)
+        val rightReducers = rightSpec.reducers(leftSpec)
+        val rightParts = reducePartValues(rightSpec.partitioning.partitionValues,
+          partitionExprs,
+          rightReducers)
+
+        // merge values on both sides
+        var mergedPartValues = mergePartitions(leftParts, rightParts, partitionExprs, joinType)
             .map(v => (v, 1))
 
         logInfo(log"After merging, there are " +
@@ -525,23 +545,6 @@ case class EnsureRequirements(
           }
         }
 
-        // in case of compatible but not identical partition expressions, we apply 'reduce'
-        // transforms to group one side's partitions as well as the common partition values
-        val leftReducers = leftSpec.reducers(rightSpec)
-        val rightReducers = rightSpec.reducers(leftSpec)
-
-        if (leftReducers.isDefined || rightReducers.isDefined) {
-          mergedPartValues = reduceCommonPartValues(mergedPartValues,
-            leftSpec.partitioning.expressions,
-            leftReducers)
-          mergedPartValues = reduceCommonPartValues(mergedPartValues,
-            rightSpec.partitioning.expressions,
-            rightReducers)
-          val rowOrdering = RowOrdering
-            .createNaturalAscendingOrdering(partitionExprs.map(_.dataType))
-          mergedPartValues = mergedPartValues.sorted(rowOrdering.on((t: (InternalRow, _)) => t._1))
-        }
-
         // Now we need to push-down the common partition information to the scan in each child
         newLeft = populateCommonPartitionInfo(left, mergedPartValues, leftSpec.joinKeyPositions,
           leftReducers, applyPartialClustering, replicateLeftSide)
@@ -602,15 +605,15 @@ case class EnsureRequirements(
         child, joinKeyPositions))
   }
 
-  private def reduceCommonPartValues(
-      commonPartValues: Seq[(InternalRow, Int)],
+  private def reducePartValues(
+      partValues: Seq[InternalRow],
       expressions: Seq[Expression],
       reducers: Option[Seq[Option[Reducer[_, _]]]]) = {
     reducers match {
-      case Some(reducers) => commonPartValues.groupBy { case (row, _) =>
+      case Some(reducers) => partValues.map { row =>
         KeyGroupedShuffleSpec.reducePartitionValue(row, expressions, reducers)
-      }.map{ case(wrapper, splits) => (wrapper.row, splits.map(_._2).sum) }.toSeq
-      case _ => commonPartValues
+      }.distinct.map(_.row)
+      case _ => partValues
     }
   }
 
@@ -622,7 +625,7 @@ case class EnsureRequirements(
   private def createKeyGroupedShuffleSpec(
       partitioning: Partitioning,
       distribution: ClusteredDistribution): Option[KeyGroupedShuffleSpec] = {
-    def check(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
+    def tryCreate(partitioning: KeyGroupedPartitioning): Option[KeyGroupedShuffleSpec] = {
       val attributes = partitioning.expressions.flatMap(_.collectLeaves())
       val clustering = distribution.clustering
 
@@ -642,15 +645,54 @@ case class EnsureRequirements(
     }
 
     partitioning match {
-      case p: KeyGroupedPartitioning => check(p)
+      case p: KeyGroupedPartitioning => tryCreate(p)
       case PartitioningCollection(partitionings) =>
         val specs = partitionings.map(p => createKeyGroupedShuffleSpec(p, distribution))
-        assert(specs.forall(_.isEmpty) || specs.forall(_.isDefined))
-        specs.head
+        specs.filter(_.isDefined).map(_.get).headOption
       case _ => None
     }
   }
 
+  /**
+   * Merge and sort partitions values for SPJ and optionally enable partition filtering.
+   * Both sides must have
+   * matching partition expressions.
+   * @param leftPartitioning left side partition values
+   * @param rightPartitioning right side partition values
+   * @param partitionExpression partition expressions
+   * @param joinType join type for optional partition filtering
+   * @return merged and sorted partition values
+   */
+  private def mergePartitions(
+      leftPartitioning: Seq[InternalRow],
+      rightPartitioning: Seq[InternalRow],
+      partitionExpression: Seq[Expression],
+      joinType: JoinType): Seq[InternalRow] = {
+
+    val merged = if (SQLConf.get.getConf(SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED)) {
+      joinType match {
+        case Inner => InternalRowComparableWrapper.mergePartitions(
+          leftPartitioning, rightPartitioning, partitionExpression, intersect = true)
+        case LeftOuter => leftPartitioning.map(
+          InternalRowComparableWrapper(_, partitionExpression))
+        case RightOuter => rightPartitioning.map(
+          InternalRowComparableWrapper(_, partitionExpression))
+        case _ => InternalRowComparableWrapper.mergePartitions(leftPartitioning,
+          rightPartitioning, partitionExpression)
+      }
+    } else {
+      InternalRowComparableWrapper.mergePartitions(leftPartitioning, rightPartitioning,
+        partitionExpression)
+    }
+
+    // SPARK-41471: We keep to order of partitions to make sure the order of
+    // partitions is deterministic in different case.
+    val partitionOrdering: Ordering[InternalRow] = {
+      RowOrdering.createNaturalAscendingOrdering(partitionExpression.map(_.dataType))
+    }
+    merged.map(_.row).sorted(partitionOrdering)
+  }
+
   def apply(plan: SparkPlan): SparkPlan = {
     val newPlan = plan.transformUp {
       case operator @ ShuffleExchangeExec(upper: HashPartitioning, child, shuffleOrigin, _)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
index 154070a954f3a..c02beea4f879c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/Exchange.scala
@@ -17,8 +17,6 @@
 
 package org.apache.spark.sql.execution.exchange
 
-import java.util.concurrent.atomic.AtomicBoolean
-
 import org.apache.spark.broadcast
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
@@ -36,17 +34,6 @@ import org.apache.spark.sql.vectorized.ColumnarBatch
  * "Volcano -- An Extensible and Parallel Query Evaluation System" by Goetz Graefe.
  */
 abstract class Exchange extends UnaryExecNode {
-  /**
-   * This flag aims to detect if the stage materialization is started. This helps
-   * to avoid unnecessary AQE stage materialization when the stage is canceled.
-   */
-  protected val materializationStarted = new AtomicBoolean()
-
-  /**
-   * Exposes status if the materialization is started
-   */
-  def isMaterializationStarted(): Boolean = materializationStarted.get()
-
   override def output: Seq[Attribute] = child.output
   final override val nodePatterns: Seq[TreePattern] = Seq(EXCHANGE)
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
index 70c08edfd8678..31a3f53eb7191 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/ShuffleExchangeExec.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.sql.execution.exchange
 
+import java.util.concurrent.atomic.AtomicReference
 import java.util.function.Supplier
 
 import scala.collection.mutable
-import scala.concurrent.Future
+import scala.concurrent.{ExecutionContext, Future, Promise}
 
 import org.apache.spark._
 import org.apache.spark.internal.config
-import org.apache.spark.rdd.RDD
+import org.apache.spark.rdd.{RDD, RDDOperationScope}
 import org.apache.spark.serializer.Serializer
 import org.apache.spark.shuffle.{ShuffleWriteMetricsReporter, ShuffleWriteProcessor}
 import org.apache.spark.shuffle.sort.SortShuffleManager
@@ -37,8 +38,8 @@ import org.apache.spark.sql.catalyst.plans.physical._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics, SQLShuffleReadMetricsReporter, SQLShuffleWriteMetricsReporter}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.util.MutablePair
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
+import org.apache.spark.util.{MutablePair, ThreadUtils}
 import org.apache.spark.util.collection.unsafe.sort.{PrefixComparators, RecordComparator}
 import org.apache.spark.util.random.XORShiftRandom
 
@@ -46,16 +47,6 @@ import org.apache.spark.util.random.XORShiftRandom
  * Common trait for all shuffle exchange implementations to facilitate pattern matching.
  */
 trait ShuffleExchangeLike extends Exchange {
-
-  /**
-   * The asynchronous job that materializes the shuffle. It also does the preparations work,
-   * such as waiting for the subqueries.
-   */
-  @transient private lazy val shuffleFuture: Future[MapOutputStatistics] = executeQuery {
-    materializationStarted.set(true)
-    mapOutputStatisticsFuture
-  }
-
   /**
    * Returns the number of mappers of this shuffle.
    */
@@ -76,26 +67,72 @@ trait ShuffleExchangeLike extends Exchange {
    */
   def shuffleOrigin: ShuffleOrigin
 
+  @transient
+  private lazy val promise = Promise[MapOutputStatistics]()
+
+  @transient
+  private lazy val completionFuture
+  : scala.concurrent.Future[MapOutputStatistics] = promise.future
+
+  @transient
+  private[sql] // Exposed for testing
+  val futureAction = new AtomicReference[Option[FutureAction[MapOutputStatistics]]](None)
+
+  @transient
+  private var isCancelled: Boolean = false
+
+  @transient
+  private lazy val triggerFuture: java.util.concurrent.Future[Any] = {
+    SQLExecution.withThreadLocalCaptured(session, ShuffleExchangeExec.executionContext) {
+      try {
+        // Trigger shuffle preparation which can involve expensive operations like waiting on
+        // subqueries and file listing.
+        executeQuery(null)
+        // Submit shuffle job if not cancelled.
+        this.synchronized {
+          if (isCancelled) {
+            promise.tryFailure(new SparkException("Shuffle cancelled."))
+          } else {
+            val shuffleJob = RDDOperationScope.withScope(sparkContext, nodeName, false, true) {
+              mapOutputStatisticsFuture
+            }
+            shuffleJob match {
+              case action: FutureAction[MapOutputStatistics] => futureAction.set(Some(action))
+              case _ =>
+            }
+            promise.completeWith(shuffleJob)
+          }
+        }
+        null
+      } catch {
+        case e: Throwable =>
+          promise.tryFailure(e)
+          throw e
+      }
+    }
+  }
+
   /**
-   * Submits the shuffle job.
+   * The asynchronous job that materializes the shuffle. It also does the preparations work,
+   * such as waiting for the subqueries.
    */
-  final def submitShuffleJob: Future[MapOutputStatistics] = shuffleFuture
-
-  protected def mapOutputStatisticsFuture: Future[MapOutputStatistics]
+  final def submitShuffleJob(): Future[MapOutputStatistics] = {
+    triggerFuture
+    completionFuture
+  }
 
   /**
-   * Cancels the shuffle job.
+   * Cancels the shuffle job with an optional reason.
    */
-  final def cancelShuffleJob: Unit = {
-    if (isMaterializationStarted()) {
-      shuffleFuture match {
-        case action: FutureAction[MapOutputStatistics] if !action.isCompleted =>
-          action.cancel()
-        case _ =>
-      }
+  final def cancelShuffleJob(reason: Option[String]): Unit = this.synchronized {
+    if (!isCancelled) {
+      isCancelled = true
+      futureAction.get().foreach(_.cancel(reason))
     }
   }
 
+  protected def mapOutputStatisticsFuture: Future[MapOutputStatistics]
+
   /**
    * Returns the shuffle RDD with specified partition specs.
    */
@@ -140,6 +177,11 @@ case object REBALANCE_PARTITIONS_BY_NONE extends ShuffleOrigin
 // the output needs to be partitioned by the given columns.
 case object REBALANCE_PARTITIONS_BY_COL extends ShuffleOrigin
 
+// Indicates that the shuffle operator was added by the internal `EnsureRequirements` rule, but
+// was required by a stateful operator. The physical partitioning is static and Spark shouldn't
+// change it.
+case object REQUIRED_BY_STATEFUL_OPERATOR extends ShuffleOrigin
+
 /**
  * Performs a shuffle that will result in the desired partitioning.
  */
@@ -212,17 +254,10 @@ case class ShuffleExchangeExec(
     dep
   }
 
-  /**
-   * Caches the created ShuffleRowRDD so we can reuse that.
-   */
-  private var cachedShuffleRDD: ShuffledRowRDD = null
-
   protected override def doExecute(): RDD[InternalRow] = {
-    // Returns the same ShuffleRowRDD if this plan is used by multiple plans.
-    if (cachedShuffleRDD == null) {
-      cachedShuffleRDD = new ShuffledRowRDD(shuffleDependency, readMetrics)
-    }
-    cachedShuffleRDD
+    // The ShuffleRowRDD will be cached in SparkPlan.executeRDD and reused if this plan is used by
+    // multiple plans.
+    new ShuffledRowRDD(shuffleDependency, readMetrics)
   }
 
   override protected def withNewChildInternal(newChild: SparkPlan): ShuffleExchangeExec =
@@ -231,6 +266,10 @@ case class ShuffleExchangeExec(
 
 object ShuffleExchangeExec {
 
+  private[execution] val executionContext = ExecutionContext.fromExecutorService(
+    ThreadUtils.newDaemonCachedThreadPool("shuffle-exchange",
+      SQLConf.get.getConf(StaticSQLConf.SHUFFLE_EXCHANGE_MAX_THREAD_THRESHOLD)))
+
   /**
    * Determines whether records must be defensively copied before being sent to the shuffle.
    * Several of Spark's shuffle components will buffer deserialized Java objects in memory. The
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
index 6dd41aca3a5e1..a7292ee1f8fa7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/BroadcastNestedLoopJoinExec.scala
@@ -25,6 +25,7 @@ import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCo
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide}
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical._
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{CodegenSupport, ExplainUtils, SparkPlan}
 import org.apache.spark.sql.execution.metric.SQLMetrics
 import org.apache.spark.util.ArrayImplicits._
@@ -63,13 +64,15 @@ case class BroadcastNestedLoopJoinExec(
 
   override def outputPartitioning: Partitioning = (joinType, buildSide) match {
     case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
-         (LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputPartitioning
+         (LeftSingle, BuildRight) | (LeftSemi, BuildRight) | (LeftAnti, BuildRight) =>
+      streamed.outputPartitioning
     case _ => super.outputPartitioning
   }
 
   override def outputOrdering: Seq[SortOrder] = (joinType, buildSide) match {
     case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
-         (LeftSemi, BuildRight) | (LeftAnti, BuildRight) => streamed.outputOrdering
+         (LeftSingle, BuildRight) | (LeftSemi, BuildRight) | (LeftAnti, BuildRight) =>
+      streamed.outputOrdering
     case _ => Nil
   }
 
@@ -87,7 +90,7 @@ case class BroadcastNestedLoopJoinExec(
     joinType match {
       case _: InnerLike =>
         left.output ++ right.output
-      case LeftOuter =>
+      case LeftOuter | LeftSingle =>
         left.output ++ right.output.map(_.withNullability(true))
       case RightOuter =>
         left.output.map(_.withNullability(true)) ++ right.output
@@ -135,8 +138,14 @@ case class BroadcastNestedLoopJoinExec(
    *
    *   LeftOuter with BuildRight
    *   RightOuter with BuildLeft
+   *   LeftSingle with BuildRight
+   *
+   * For the (LeftSingle, BuildRight) case we pass 'singleJoin' flag that
+   * makes sure there is at most 1 matching build row per every probe tuple.
    */
-  private def outerJoin(relation: Broadcast[Array[InternalRow]]): RDD[InternalRow] = {
+  private def outerJoin(
+      relation: Broadcast[Array[InternalRow]],
+      singleJoin: Boolean = false): RDD[InternalRow] = {
     streamed.execute().mapPartitionsInternal { streamedIter =>
       val buildRows = relation.value
       val joinedRow = new JoinedRow
@@ -167,6 +176,9 @@ case class BroadcastNestedLoopJoinExec(
             resultRow = joinedRow(streamRow, buildRows(nextIndex))
             nextIndex += 1
             if (boundCondition(resultRow)) {
+              if (foundMatch && singleJoin) {
+                throw QueryExecutionErrors.scalarSubqueryReturnsMultipleRows();
+              }
               foundMatch = true
               return true
             }
@@ -382,12 +394,18 @@ case class BroadcastNestedLoopJoinExec(
         innerJoin(broadcastedRelation)
       case (LeftOuter, BuildRight) | (RightOuter, BuildLeft) =>
         outerJoin(broadcastedRelation)
+      case (LeftSingle, BuildRight) =>
+        outerJoin(broadcastedRelation, singleJoin = true)
       case (LeftSemi, _) =>
         leftExistenceJoin(broadcastedRelation, exists = true)
       case (LeftAnti, _) =>
         leftExistenceJoin(broadcastedRelation, exists = false)
       case (_: ExistenceJoin, _) =>
         existenceJoin(broadcastedRelation)
+      case (LeftSingle, BuildLeft) =>
+        throw new IllegalArgumentException(
+          s"BroadcastNestedLoopJoin should not use the left side as build when " +
+            s"executing a LeftSingle join")
       case _ =>
         /**
          * LeftOuter with BuildLeft
@@ -410,7 +428,7 @@ case class BroadcastNestedLoopJoinExec(
 
   override def supportCodegen: Boolean = (joinType, buildSide) match {
     case (_: InnerLike, _) | (LeftOuter, BuildRight) | (RightOuter, BuildLeft) |
-         (LeftSemi | LeftAnti, BuildRight) => true
+         (LeftSemi | LeftAnti, BuildRight) | (LeftSingle, BuildRight) => true
     case _ => false
   }
 
@@ -428,6 +446,7 @@ case class BroadcastNestedLoopJoinExec(
     (joinType, buildSide) match {
       case (_: InnerLike, _) => codegenInner(ctx, input)
       case (LeftOuter, BuildRight) | (RightOuter, BuildLeft) => codegenOuter(ctx, input)
+      case (LeftSingle, BuildRight) => codegenOuter(ctx, input)
       case (LeftSemi, BuildRight) => codegenLeftExistence(ctx, input, exists = true)
       case (LeftAnti, BuildRight) => codegenLeftExistence(ctx, input, exists = false)
       case _ =>
@@ -473,7 +492,9 @@ case class BroadcastNestedLoopJoinExec(
      """.stripMargin
   }
 
-  private def codegenOuter(ctx: CodegenContext, input: Seq[ExprCode]): String = {
+  private def codegenOuter(
+      ctx: CodegenContext,
+      input: Seq[ExprCode]): String = {
     val (buildRowArray, buildRowArrayTerm) = prepareBroadcast(ctx)
     val (buildRow, checkCondition, _) = getJoinCondition(ctx, input, streamed, broadcast)
     val buildVars = genOneSideJoinVars(ctx, buildRow, broadcast, setDefaultValue = true)
@@ -494,12 +515,23 @@ case class BroadcastNestedLoopJoinExec(
          |${consume(ctx, resultVars)}
        """.stripMargin
     } else {
+      // For LeftSingle joins, generate the check on the number of matches.
+      val evaluateSingleCheck = if (joinType == LeftSingle) {
+        s"""
+           |if ($foundMatch) {
+           |  throw QueryExecutionErrors.scalarSubqueryReturnsMultipleRows();
+           |}
+           |""".stripMargin
+      } else {
+        ""
+      }
       s"""
          |boolean $foundMatch = false;
          |for (int $arrayIndex = 0; $arrayIndex < $buildRowArrayTerm.length; $arrayIndex++) {
          |  UnsafeRow $buildRow = (UnsafeRow) $buildRowArrayTerm[$arrayIndex];
          |  boolean $shouldOutputRow = false;
          |  $checkCondition {
+         |    $evaluateSingleCheck
          |    $shouldOutputRow = true;
          |    $foundMatch = true;
          |  }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
index 3ae76a1db22b2..ce7d48babc91e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/HashJoin.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight, BuildSide
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.plans.physical.Partitioning
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.{CodegenSupport, ExplainUtils, RowIterator}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.types.{BooleanType, IntegralType, LongType}
@@ -52,7 +53,7 @@ trait HashJoin extends JoinCodegenSupport {
     joinType match {
       case _: InnerLike =>
         left.output ++ right.output
-      case LeftOuter =>
+      case LeftOuter | LeftSingle =>
         left.output ++ right.output.map(_.withNullability(true))
       case RightOuter =>
         left.output.map(_.withNullability(true)) ++ right.output
@@ -75,7 +76,7 @@ trait HashJoin extends JoinCodegenSupport {
       }
     case BuildRight =>
       joinType match {
-        case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin =>
+        case _: InnerLike | LeftOuter | LeftSingle | LeftSemi | LeftAnti | _: ExistenceJoin =>
           left.outputPartitioning
         case x =>
           throw new IllegalArgumentException(
@@ -93,7 +94,7 @@ trait HashJoin extends JoinCodegenSupport {
       }
     case BuildRight =>
       joinType match {
-        case _: InnerLike | LeftOuter | LeftSemi | LeftAnti | _: ExistenceJoin =>
+        case _: InnerLike | LeftOuter | LeftSingle | LeftSemi | LeftAnti | _: ExistenceJoin =>
           left.outputOrdering
         case x =>
           throw new IllegalArgumentException(
@@ -138,9 +139,8 @@ trait HashJoin extends JoinCodegenSupport {
     UnsafeProjection.create(streamedBoundKeys)
 
   @transient protected[this] lazy val boundCondition = if (condition.isDefined) {
-    if (joinType == FullOuter && buildSide == BuildLeft) {
-      // Put join left side before right side. This is to be consistent with
-      // `ShuffledHashJoinExec.fullOuterJoin`.
+    if ((joinType == FullOuter || joinType == LeftOuter) && buildSide == BuildLeft) {
+      // Put join left side before right side.
       Predicate.create(condition.get, buildPlan.output ++ streamedPlan.output).eval _
     } else {
       Predicate.create(condition.get, streamedPlan.output ++ buildPlan.output).eval _
@@ -192,7 +192,8 @@ trait HashJoin extends JoinCodegenSupport {
 
   private def outerJoin(
       streamedIter: Iterator[InternalRow],
-      hashedRelation: HashedRelation): Iterator[InternalRow] = {
+      hashedRelation: HashedRelation,
+      singleJoin: Boolean = false): Iterator[InternalRow] = {
     val joinedRow = new JoinedRow()
     val keyGenerator = streamSideKeyGenerator()
     val nullRow = new GenericInternalRow(buildPlan.output.length)
@@ -219,6 +220,9 @@ trait HashJoin extends JoinCodegenSupport {
             while (buildIter != null && buildIter.hasNext) {
               val nextBuildRow = buildIter.next()
               if (boundCondition(joinedRow.withRight(nextBuildRow))) {
+                if (found && singleJoin) {
+                  throw QueryExecutionErrors.scalarSubqueryReturnsMultipleRows();
+                }
                 found = true
                 return true
               }
@@ -330,6 +334,8 @@ trait HashJoin extends JoinCodegenSupport {
         innerJoin(streamedIter, hashed)
       case LeftOuter | RightOuter =>
         outerJoin(streamedIter, hashed)
+      case LeftSingle =>
+        outerJoin(streamedIter, hashed, singleJoin = true)
       case LeftSemi =>
         semiJoin(streamedIter, hashed)
       case LeftAnti =>
@@ -355,7 +361,7 @@ trait HashJoin extends JoinCodegenSupport {
   override def doConsume(ctx: CodegenContext, input: Seq[ExprCode], row: ExprCode): String = {
     joinType match {
       case _: InnerLike => codegenInner(ctx, input)
-      case LeftOuter | RightOuter => codegenOuter(ctx, input)
+      case LeftOuter | RightOuter | LeftSingle => codegenOuter(ctx, input)
       case LeftSemi => codegenSemi(ctx, input)
       case LeftAnti => codegenAnti(ctx, input)
       case _: ExistenceJoin => codegenExistence(ctx, input)
@@ -493,6 +499,17 @@ trait HashJoin extends JoinCodegenSupport {
       val matches = ctx.freshName("matches")
       val iteratorCls = classOf[Iterator[UnsafeRow]].getName
       val found = ctx.freshName("found")
+      // For LeftSingle joins generate the check on the number of build rows that match every
+      // probe row. Return an error for >1 matches.
+      val evaluateSingleCheck = if (joinType == LeftSingle) {
+        s"""
+           |if ($found) {
+           |  throw QueryExecutionErrors.scalarSubqueryReturnsMultipleRows();
+           |}
+           |""".stripMargin
+      } else {
+        ""
+      }
 
       s"""
          |// generate join key for stream side
@@ -506,6 +523,7 @@ trait HashJoin extends JoinCodegenSupport {
          |    (UnsafeRow) $matches.next() : null;
          |  ${checkCondition.trim}
          |  if ($conditionPassed) {
+         |    $evaluateSingleCheck
          |    $found = true;
          |    $numOutput.add(1);
          |    ${consume(ctx, resultVars)}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledJoin.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledJoin.scala
index 7c4628c8576c5..60e5a7769a503 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledJoin.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/ShuffledJoin.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.joins
 
 import org.apache.spark.sql.catalyst.expressions.Attribute
-import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, LeftExistence, LeftOuter, RightOuter}
+import org.apache.spark.sql.catalyst.plans.{ExistenceJoin, FullOuter, InnerLike, LeftExistence, LeftOuter, LeftSingle, RightOuter}
 import org.apache.spark.sql.catalyst.plans.physical.{ClusteredDistribution, Distribution, Partitioning, PartitioningCollection, UnknownPartitioning, UnspecifiedDistribution}
 
 /**
@@ -47,7 +47,7 @@ trait ShuffledJoin extends JoinCodegenSupport {
   override def outputPartitioning: Partitioning = joinType match {
     case _: InnerLike =>
       PartitioningCollection(Seq(left.outputPartitioning, right.outputPartitioning))
-    case LeftOuter => left.outputPartitioning
+    case LeftOuter | LeftSingle => left.outputPartitioning
     case RightOuter => right.outputPartitioning
     case FullOuter => UnknownPartitioning(left.outputPartitioning.numPartitions)
     case LeftExistence(_) => left.outputPartitioning
@@ -60,7 +60,7 @@ trait ShuffledJoin extends JoinCodegenSupport {
     joinType match {
       case _: InnerLike =>
         left.output ++ right.output
-      case LeftOuter =>
+      case LeftOuter | LeftSingle =>
         left.output ++ right.output.map(_.withNullability(true))
       case RightOuter =>
         left.output.map(_.withNullability(true)) ++ right.output
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
index a246b47fe655a..f0c1c0900c7f5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/metric/SQLMetrics.scala
@@ -105,6 +105,13 @@ class SQLMetric(
     AccumulableInfo(id, name, internOption(update), internOption(value), true, true,
       SQLMetrics.cachedSQLAccumIdentifier)
   }
+
+  // We should provide the raw value which can be -1, so that `SQLMetrics.stringValue` can correctly
+  // filter out the invalid -1 values.
+  override def toInfoUpdate: AccumulableInfo = {
+    AccumulableInfo(id, name, internOption(Some(_value)), None, true, true,
+      SQLMetrics.cachedSQLAccumIdentifier)
+  }
 }
 
 object SQLMetrics {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala
index 6c9c7e1179b60..db49be7fd99f2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ApplyInPandasWithStateWriter.scala
@@ -50,7 +50,8 @@ import org.apache.spark.unsafe.types.UTF8String
 class ApplyInPandasWithStateWriter(
     root: VectorSchemaRoot,
     writer: ArrowStreamWriter,
-    arrowMaxRecordsPerBatch: Int) {
+    arrowMaxRecordsPerBatch: Int)
+  extends BaseStreamingArrowWriter(root, writer, arrowMaxRecordsPerBatch) {
 
   import ApplyInPandasWithStateWriter._
 
@@ -72,9 +73,9 @@ class ApplyInPandasWithStateWriter(
   // there are at least one data for a grouping key (we ensure this for the case of handling timed
   // out state as well) whereas there is only one state for a grouping key, we have to fill up the
   // empty rows in state side to ensure both have the same number of rows.
-  private val arrowWriterForData = createArrowWriter(
+  override protected val arrowWriterForData: ArrowWriter = createArrowWriter(
     root.getFieldVectors.asScala.toSeq.dropRight(1))
-  private val arrowWriterForState = createArrowWriter(
+  private val arrowWriterForState: ArrowWriter = createArrowWriter(
     root.getFieldVectors.asScala.toSeq.takeRight(1))
 
   // - Bin-packing
@@ -117,12 +118,10 @@ class ApplyInPandasWithStateWriter(
   private var currentGroupState: GroupStateImpl[Row] = _
 
   // variables for tracking the status of current batch
-  private var totalNumRowsForBatch = 0
   private var totalNumStatesForBatch = 0
 
   // variables for tracking the status of current chunk
   private var startOffsetForCurrentChunk = 0
-  private var numRowsForCurrentChunk = 0
 
 
   /**
@@ -136,26 +135,6 @@ class ApplyInPandasWithStateWriter(
     currentGroupState = groupState
   }
 
-  /**
-   * Indicates writer to write a row in the current group.
-   *
-   * @param dataRow The row to write in the current group.
-   */
-  def writeRow(dataRow: InternalRow): Unit = {
-    // If it exceeds the condition of batch (number of records) and there is more data for the
-    // same group, finalize and construct a new batch.
-
-    if (totalNumRowsForBatch >= arrowMaxRecordsPerBatch) {
-      finalizeCurrentChunk(isLastChunkForGroup = false)
-      finalizeCurrentArrowBatch()
-    }
-
-    arrowWriterForData.write(dataRow)
-
-    numRowsForCurrentChunk += 1
-    totalNumRowsForBatch += 1
-  }
-
   /**
    * Indicates writer that current group has finalized and there will be no further row bound to
    * the current group.
@@ -209,7 +188,7 @@ class ApplyInPandasWithStateWriter(
     new GenericInternalRow(Array[Any](stateUnderlyingRow))
   }
 
-  private def finalizeCurrentChunk(isLastChunkForGroup: Boolean): Unit = {
+  override protected def finalizeCurrentChunk(isLastChunkForGroup: Boolean): Unit = {
     val stateInfoRow = buildStateInfoRow(currentGroupKeyRow, currentGroupState,
       startOffsetForCurrentChunk, numRowsForCurrentChunk, isLastChunkForGroup)
     arrowWriterForState.write(stateInfoRow)
@@ -221,7 +200,10 @@ class ApplyInPandasWithStateWriter(
     numRowsForCurrentChunk = 0
   }
 
-  private def finalizeCurrentArrowBatch(): Unit = {
+  /**
+   * Finalizes the current batch and writes it to the Arrow stream.
+   */
+  override def finalizeCurrentArrowBatch(): Unit = {
     val remainingEmptyStateRows = totalNumRowsForBatch - totalNumStatesForBatch
     (0 until remainingEmptyStateRows).foreach { _ =>
       arrowWriterForState.write(EMPTY_STATE_METADATA_ROW)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
index a555d660ea1ac..72e9c5210194a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/ArrowPythonRunner.scala
@@ -116,6 +116,9 @@ object ArrowPythonRunner {
       conf.pandasGroupedMapAssignColumnsByName.toString)
     val arrowSafeTypeCheck = Seq(SQLConf.PANDAS_ARROW_SAFE_TYPE_CONVERSION.key ->
       conf.arrowSafeTypeConversion.toString)
-    Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck: _*)
+    val arrowAyncParallelism = conf.pythonUDFArrowConcurrencyLevel.map(v =>
+      Seq(SQLConf.PYTHON_UDF_ARROW_CONCURRENCY_LEVEL.key -> v.toString)
+    ).getOrElse(Seq.empty)
+    Map(timeZoneConf ++ pandasColsByName ++ arrowSafeTypeCheck ++ arrowAyncParallelism: _*)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriter.scala
new file mode 100644
index 0000000000000..303389cee0961
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriter.scala
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+
+/**
+ * Base class to handle writing data to Arrow stream to Python workers. When the rows
+ * for a group exceed the maximum number of records per batch, we chunk the data into multiple
+ * batches.
+ */
+class BaseStreamingArrowWriter(
+    root: VectorSchemaRoot,
+    writer: ArrowStreamWriter,
+    arrowMaxRecordsPerBatch: Int,
+    arrowWriterForTest: ArrowWriter = null) {
+  protected val arrowWriterForData: ArrowWriter = if (arrowWriterForTest == null) {
+    ArrowWriter.create(root)
+  } else {
+    arrowWriterForTest
+  }
+
+  // variables for tracking the status of current batch
+  protected var totalNumRowsForBatch = 0
+
+  // variables for tracking the status of current chunk
+  protected var numRowsForCurrentChunk = 0
+
+  /**
+   * Indicates writer to write a row for current batch.
+   *
+   * @param dataRow The row to write for current batch.
+   */
+  def writeRow(dataRow: InternalRow): Unit = {
+    // If it exceeds the condition of batch (number of records) and there is more data for the
+    // same group, finalize and construct a new batch.
+
+    if (totalNumRowsForBatch >= arrowMaxRecordsPerBatch) {
+      finalizeCurrentChunk(isLastChunkForGroup = false)
+      finalizeCurrentArrowBatch()
+    }
+
+    arrowWriterForData.write(dataRow)
+
+    numRowsForCurrentChunk += 1
+    totalNumRowsForBatch += 1
+  }
+
+  /**
+   * Finalizes the current batch and writes it to the Arrow stream.
+   */
+  def finalizeCurrentArrowBatch(): Unit = {
+    arrowWriterForData.finish()
+    writer.writeBatch()
+    arrowWriterForData.reset()
+    totalNumRowsForBatch = 0
+  }
+
+  /**
+   * Finalizes the current chunk. We only reset the number of rows for the current chunk here since
+   * not all the writers need this step.
+   */
+  protected def finalizeCurrentChunk(isLastChunkForGroup: Boolean): Unit = {
+    numRowsForCurrentChunk = 0
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
index fca277dae5d55..fd7ccb2189bff 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/EvaluatePython.scala
@@ -31,12 +31,12 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, ArrayData, GenericArrayData, MapData}
 import org.apache.spark.sql.types._
-import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
 
 object EvaluatePython {
 
   def needConversionInPython(dt: DataType): Boolean = dt match {
-    case DateType | TimestampType | TimestampNTZType | _: DayTimeIntervalType => true
+    case DateType | TimestampType | TimestampNTZType | VariantType | _: DayTimeIntervalType => true
     case _: StructType => true
     case _: UserDefinedType[_] => true
     case ArrayType(elementType, _) => needConversionInPython(elementType)
@@ -201,6 +201,13 @@ object EvaluatePython {
 
     case udt: UserDefinedType[_] => makeFromJava(udt.sqlType)
 
+    case VariantType => (obj: Any) => nullSafeConvert(obj) {
+      case s: java.util.HashMap[_, _] =>
+        new VariantVal(
+          s.get("value").asInstanceOf[Array[Byte]], s.get("metadata").asInstanceOf[Array[Byte]]
+        )
+    }
+
     case other => (obj: Any) => nullSafeConvert(obj)(PartialFunction.empty)
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInArrowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInArrowExec.scala
index e91140414732b..a2d200dc86e18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInArrowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapCoGroupsInArrowExec.scala
@@ -23,13 +23,13 @@ import org.apache.spark.sql.execution.SparkPlan
 
 
 /**
- * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapCoGroupsInPandas]]
+ * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapCoGroupsInArrow]]
  *
  * The input dataframes are first Cogrouped.  Rows from each side of the cogroup are passed to the
  * Python worker via Arrow.  As each side of the cogroup may have a different schema we send every
  * group in its own Arrow stream.
- * The Python worker turns the resulting record batches to `pandas.DataFrame`s, invokes the
- * user-defined function, and passes the resulting `pandas.DataFrame`
+ * The Python worker turns the resulting record batches to `pyarrow.Table`s, invokes the
+ * user-defined function, and passes the resulting `pyarrow.Table`
  * as an Arrow record batch. Finally, each record batch is turned to
  * Iterator[InternalRow] using ColumnarBatch.
  *
@@ -37,7 +37,7 @@ import org.apache.spark.sql.execution.SparkPlan
  * Both the Python worker and the Java executor need to have enough memory to
  * hold the largest cogroup. The memory on the Java side is used to construct the
  * record batches (off heap memory). The memory on the Python side is used for
- * holding the `pandas.DataFrame`. It's possible to further split one group into
+ * holding the `pyarrow.Table`. It's possible to further split one group into
  * multiple record batches to reduce the memory footprint on the Java side, this
  * is left as future work.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInArrowExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInArrowExec.scala
index 942aaf6e44c17..6569b29f3954f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInArrowExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/FlatMapGroupsInArrowExec.scala
@@ -25,11 +25,11 @@ import org.apache.spark.sql.types.{StructField, StructType}
 
 
 /**
- * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInPandas]]
+ * Physical node for [[org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsInArrow]]
  *
  * Rows in each group are passed to the Python worker as an Arrow record batch.
- * The Python worker turns the record batch to a `pandas.DataFrame`, invoke the
- * user-defined function, and passes the resulting `pandas.DataFrame`
+ * The Python worker turns the record batch to a `pyarrow.Table`, invokes the
+ * user-defined function, and passes the resulting `pyarrow.Table`
  * as an Arrow record batch. Finally, each record batch is turned to
  * Iterator[InternalRow] using ColumnarBatch.
  *
@@ -37,7 +37,7 @@ import org.apache.spark.sql.types.{StructField, StructType}
  * Both the Python worker and the Java executor need to have enough memory to
  * hold the largest group. The memory on the Java side is used to construct the
  * record batch (off heap memory). The memory on the Python side is used for
- * holding the `pandas.DataFrame`. It's possible to further split one group into
+ * holding the `pyarrow.Table`. It's possible to further split one group into
  * multiple record batches to reduce the memory footprint on the Java side, this
  * is left as future work.
  */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
index 67b264436fea9..ed7ff6a753487 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonForeachWriter.scala
@@ -21,12 +21,15 @@ import java.io.File
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.locks.ReentrantLock
 
-import org.apache.spark.{JobArtifactSet, SparkEnv, TaskContext}
+import scala.util.control.NonFatal
+
+import org.apache.spark.{JobArtifactSet, SparkEnv, SparkThrowable, TaskContext}
 import org.apache.spark.api.python._
 import org.apache.spark.internal.Logging
 import org.apache.spark.memory.TaskMemoryManager
 import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.streaming.sources.ForeachUserFuncException
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{NextIterator, Utils}
@@ -53,6 +56,8 @@ class WriterThread(outputIterator: Iterator[Array[Byte]])
     } catch {
       // Cache exceptions seen while evaluating the Python function on the streamed input. The
       // parent thread will throw this crashed exception eventually.
+      case NonFatal(e) if !e.isInstanceOf[SparkThrowable] =>
+        _exception = ForeachUserFuncException(e)
       case t: Throwable =>
         _exception = t
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
index 33612b6947f27..11ab706e8abb4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/PythonStreamingSourceRunner.scala
@@ -233,7 +233,17 @@ class PythonStreamingSourceRunner(
     s"stream reader for $pythonExec", 0, Long.MaxValue)
 
   def readArrowRecordBatches(): Iterator[InternalRow] = {
-    assert(dataIn.readInt() == SpecialLengths.START_ARROW_STREAM)
+    val status = dataIn.readInt()
+    status match {
+      case SpecialLengths.PYTHON_EXCEPTION_THROWN =>
+        val msg = PythonWorkerUtils.readUTF(dataIn)
+        throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(
+        action = "prefetchArrowBatches", msg)
+      case SpecialLengths.START_ARROW_STREAM =>
+      case _ =>
+        throw QueryExecutionErrors.pythonStreamingDataSourceRuntimeError(
+          action = "prefetchArrowBatches", s"unknown status code $status")
+    }
     val reader = new ArrowStreamReader(dataIn, allocator)
     val root = reader.getVectorSchemaRoot()
     // When input is empty schema can't be read.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasDeserializer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasDeserializer.scala
new file mode 100644
index 0000000000000..82d4978853cb6
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasDeserializer.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io.DataInputStream
+
+import scala.collection.mutable.ArrayBuffer
+import scala.jdk.CollectionConverters._
+
+import org.apache.arrow.vector.ipc.ArrowStreamReader
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.sql.vectorized.{ArrowColumnVector, ColumnarBatch, ColumnVector}
+
+/**
+ * A helper class to deserialize state Arrow batches from the state socket in
+ * TransformWithStateInPandas.
+ */
+class TransformWithStateInPandasDeserializer(deserializer: ExpressionEncoder.Deserializer[Row])
+  extends Logging {
+  private val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+        s"stdin reader for transformWithStateInPandas state socket", 0, Long.MaxValue)
+
+  /**
+   * Read Arrow batches from the given stream and deserialize them into rows.
+   */
+  def readArrowBatches(stream: DataInputStream): Seq[Row] = {
+    val reader = new ArrowStreamReader(stream, allocator)
+    val root = reader.getVectorSchemaRoot
+    val vectors = root.getFieldVectors.asScala.map { vector =>
+      new ArrowColumnVector(vector)
+    }.toArray[ColumnVector]
+    val rows = ArrayBuffer[Row]()
+    while (reader.loadNextBatch()) {
+      val batch = new ColumnarBatch(vectors)
+      batch.setNumRows(root.getRowCount)
+      rows.appendAll(batch.rowIterator().asScala.map(r => deserializer(r.copy())))
+    }
+    reader.close(false)
+    rows.toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala
new file mode 100644
index 0000000000000..617c20c3a782a
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasExec.scala
@@ -0,0 +1,336 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import scala.concurrent.duration.NANOSECONDS
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.JobArtifactSet
+import org.apache.spark.api.python.{ChainedPythonFunctions, PythonEvalType}
+import org.apache.spark.rdd.RDD
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.{Ascending, Attribute, Expression, PythonUDF, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.ProcessingTime
+import org.apache.spark.sql.catalyst.plans.physical.Distribution
+import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.execution.{BinaryExecNode, CoGroupedIterator, SparkPlan}
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.python.PandasGroupUtils.{executePython, groupAndProject, resolveArgOffsets}
+import org.apache.spark.sql.execution.streaming.{StatefulOperatorCustomMetric, StatefulOperatorCustomSumMetric, StatefulOperatorPartitioning, StatefulOperatorStateInfo, StatefulProcessorHandleImpl, StateStoreWriter, WatermarkSupport}
+import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper.StateStoreAwareZipPartitionsHelper
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateSchemaValidationResult, StateStore, StateStoreConf, StateStoreId, StateStoreOps, StateStoreProviderId}
+import org.apache.spark.sql.streaming.{OutputMode, TimeMode}
+import org.apache.spark.sql.types.{BinaryType, StructField, StructType}
+import org.apache.spark.util.{CompletionIterator, SerializableConfiguration}
+
+/**
+ * Physical operator for executing
+ * [[org.apache.spark.sql.catalyst.plans.logical.TransformWithStateInPandas]]
+ *
+ * @param functionExpr function called on each group
+ * @param groupingAttributes used to group the data
+ * @param output used to define the output rows
+ * @param outputMode defines the output mode for the statefulProcessor
+ * @param timeMode The time mode semantics of the stateful processor for timers and TTL.
+ * @param stateInfo Used to identify the state store for a given operator.
+ * @param batchTimestampMs processing timestamp of the current batch.
+ * @param eventTimeWatermarkForLateEvents event time watermark for filtering late events
+ * @param eventTimeWatermarkForEviction event time watermark for state eviction
+ * @param child the physical plan for the underlying data
+ * @param initialState the physical plan for the input initial state
+ * @param initialStateGroupingAttrs grouping attributes for initial state
+ */
+case class TransformWithStateInPandasExec(
+    functionExpr: Expression,
+    groupingAttributes: Seq[Attribute],
+    output: Seq[Attribute],
+    outputMode: OutputMode,
+    timeMode: TimeMode,
+    stateInfo: Option[StatefulOperatorStateInfo],
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForLateEvents: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long],
+    child: SparkPlan,
+    hasInitialState: Boolean,
+    initialState: SparkPlan,
+    initialStateGroupingAttrs: Seq[Attribute],
+    initialStateSchema: StructType)
+  extends BinaryExecNode with StateStoreWriter with WatermarkSupport {
+
+  override def shortName: String = "transformWithStateInPandasExec"
+
+  private val pythonUDF = functionExpr.asInstanceOf[PythonUDF]
+  private val pythonFunction = pythonUDF.func
+  private val chainedFunc =
+    Seq((ChainedPythonFunctions(Seq(pythonFunction)), pythonUDF.resultId.id))
+
+  private val sessionLocalTimeZone = conf.sessionLocalTimeZone
+  private val pythonRunnerConf = ArrowPythonRunner.getPythonRunnerConfMap(conf)
+  private[this] val jobArtifactUUID = JobArtifactSet.getCurrentJobArtifactState.map(_.uuid)
+
+  private val groupingKeyStructFields = groupingAttributes
+    .map(a => StructField(a.name, a.dataType, a.nullable))
+  private val groupingKeySchema = StructType(groupingKeyStructFields)
+  private val groupingKeyExprEncoder = ExpressionEncoder(groupingKeySchema)
+    .resolveAndBind().asInstanceOf[ExpressionEncoder[Any]]
+
+  private val numOutputRows: SQLMetric = longMetric("numOutputRows")
+
+  // The keys that may have a watermark attribute.
+  override def keyExpressions: Seq[Attribute] = groupingAttributes
+
+  // Each state variable has its own schema, this is a dummy one.
+  protected val schemaForKeyRow: StructType = new StructType().add("key", BinaryType)
+
+  // Each state variable has its own schema, this is a dummy one.
+  protected val schemaForValueRow: StructType = new StructType().add("value", BinaryType)
+
+  /**
+   * Distribute by grouping attributes - We need the underlying data and the initial state data
+   * to have the same grouping so that the data are co-located on the same task.
+   */
+  override def requiredChildDistribution: Seq[Distribution] = {
+    StatefulOperatorPartitioning.getCompatibleDistribution(groupingAttributes,
+      getStateInfo, conf) ::
+      StatefulOperatorPartitioning.getCompatibleDistribution(
+        initialStateGroupingAttrs, getStateInfo, conf) ::
+      Nil
+  }
+
+  /**
+   * We need the initial state to also use the ordering as the data so that we can co-locate the
+   * keys from the underlying data and the initial state.
+   */
+  override def requiredChildOrdering: Seq[Seq[SortOrder]] = Seq(
+    groupingAttributes.map(SortOrder(_, Ascending)),
+    initialStateGroupingAttrs.map(SortOrder(_, Ascending)))
+
+  override def validateAndMaybeEvolveStateSchema(
+      hadoopConf: Configuration,
+      batchId: Long,
+      stateSchemaVersion: Int): List[StateSchemaValidationResult] = {
+    // TODO(SPARK-49212): Implement schema evolution support
+    List.empty
+  }
+
+  override def shouldRunAnotherBatch(newInputWatermark: Long): Boolean = {
+    if (timeMode == ProcessingTime) {
+      // TODO SPARK-50180: check if we can return true only if actual timers are registered,
+      //  or there is expired state
+      true
+    } else if (outputMode == OutputMode.Append || outputMode == OutputMode.Update) {
+      eventTimeWatermarkForEviction.isDefined &&
+        newInputWatermark > eventTimeWatermarkForEviction.get
+    } else {
+      false
+    }
+  }
+
+  /**
+   * Controls watermark propagation to downstream modes. If timeMode is
+   * ProcessingTime, the output rows cannot be interpreted in eventTime, hence
+   * this node will not propagate watermark in this timeMode.
+   *
+   * For timeMode EventTime, output watermark is same as input Watermark because
+   * transformWithState does not allow users to set the event time column to be
+   * earlier than the watermark.
+   */
+  override def produceOutputWatermark(inputWatermarkMs: Long): Option[Long] = {
+    timeMode match {
+      case ProcessingTime =>
+        None
+      case _ =>
+        Some(inputWatermarkMs)
+    }
+  }
+
+  override def customStatefulOperatorMetrics: Seq[StatefulOperatorCustomMetric] = {
+    Seq(
+      // metrics around state variables
+      StatefulOperatorCustomSumMetric("numValueStateVars", "Number of value state variables"),
+      StatefulOperatorCustomSumMetric("numListStateVars", "Number of list state variables"),
+      StatefulOperatorCustomSumMetric("numMapStateVars", "Number of map state variables"),
+      StatefulOperatorCustomSumMetric("numDeletedStateVars", "Number of deleted state variables"),
+      // metrics around timers
+      StatefulOperatorCustomSumMetric("numRegisteredTimers", "Number of registered timers"),
+      StatefulOperatorCustomSumMetric("numDeletedTimers", "Number of deleted timers"),
+      StatefulOperatorCustomSumMetric("numExpiredTimers", "Number of expired timers"),
+      // metrics around TTL
+      StatefulOperatorCustomSumMetric("numValueStateWithTTLVars",
+        "Number of value state variables with TTL"),
+      StatefulOperatorCustomSumMetric("numListStateWithTTLVars",
+        "Number of list state variables with TTL"),
+      StatefulOperatorCustomSumMetric("numMapStateWithTTLVars",
+        "Number of map state variables with TTL"),
+      StatefulOperatorCustomSumMetric("numValuesRemovedDueToTTLExpiry",
+        "Number of values removed due to TTL expiry")
+    )
+  }
+
+  /**
+   * Produces the result of the query as an `RDD[InternalRow]`
+   */
+  override protected def doExecute(): RDD[InternalRow] = {
+    metrics
+
+    if (!hasInitialState) {
+      child.execute().mapPartitionsWithStateStore[InternalRow](
+        getStateInfo,
+        schemaForKeyRow,
+        schemaForValueRow,
+        NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
+        session.sqlContext.sessionState,
+        Some(session.sqlContext.streams.stateStoreCoordinator),
+        useColumnFamilies = true,
+        useMultipleValuesPerKey = true
+      ) {
+        case (store: StateStore, dataIterator: Iterator[InternalRow]) =>
+          processDataWithPartition(store, dataIterator)
+      }
+    } else {
+      val storeConf = new StateStoreConf(session.sqlContext.sessionState.conf)
+      val hadoopConfBroadcast = sparkContext.broadcast(
+        new SerializableConfiguration(session.sqlContext.sessionState.newHadoopConf()))
+
+      child.execute().stateStoreAwareZipPartitions(
+        initialState.execute(),
+        getStateInfo,
+        storeNames = Seq(),
+        session.sqlContext.streams.stateStoreCoordinator) {
+        // The state store aware zip partitions will provide us with two iterators,
+        // child data iterator and the initial state iterator per partition.
+        case (partitionId, childDataIterator, initStateIterator) =>
+          val stateStoreId = StateStoreId(stateInfo.get.checkpointLocation,
+            stateInfo.get.operatorId, partitionId)
+          val storeProviderId = StateStoreProviderId(stateStoreId, stateInfo.get.queryRunId)
+          val store = StateStore.get(
+            storeProviderId = storeProviderId,
+            keySchema = schemaForKeyRow,
+            valueSchema = schemaForValueRow,
+            NoPrefixKeyStateEncoderSpec(schemaForKeyRow),
+            version = stateInfo.get.storeVersion,
+            stateStoreCkptId = stateInfo.get.getStateStoreCkptId(partitionId).map(_.head),
+            useColumnFamilies = true,
+            storeConf = storeConf,
+            hadoopConf = hadoopConfBroadcast.value.value
+          )
+          processDataWithPartition(store, childDataIterator, initStateIterator)
+      }
+    }
+  }
+
+  private def processDataWithPartition(
+      store: StateStore,
+      dataIterator: Iterator[InternalRow],
+      initStateIterator: Iterator[InternalRow] = Iterator.empty):
+  Iterator[InternalRow] = {
+    val allUpdatesTimeMs = longMetric("allUpdatesTimeMs")
+    // TODO(SPARK-49603) set the metrics in the lazily initialized iterator
+    val timeoutLatencyMs = longMetric("allRemovalsTimeMs")
+    val commitTimeMs = longMetric("commitTimeMs")
+    val currentTimeNs = System.nanoTime
+    val updatesStartTimeNs = currentTimeNs
+
+    val (dedupAttributes, argOffsets) = resolveArgOffsets(child.output, groupingAttributes)
+    // If timeout is based on event time, then filter late data based on watermark
+    val filteredIter = watermarkPredicateForDataForLateEvents match {
+      case Some(predicate) =>
+        applyRemovingRowsOlderThanWatermark(dataIterator, predicate)
+      case _ =>
+        dataIterator
+    }
+
+    val data = groupAndProject(filteredIter, groupingAttributes, child.output, dedupAttributes)
+
+    val processorHandle = new StatefulProcessorHandleImpl(store, getStateInfo.queryRunId,
+      groupingKeyExprEncoder, timeMode, isStreaming = true, batchTimestampMs, metrics)
+
+    val outputIterator = if (!hasInitialState) {
+      val runner = new TransformWithStateInPandasPythonRunner(
+        chainedFunc,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_UDF,
+        Array(argOffsets),
+        DataTypeUtils.fromAttributes(dedupAttributes),
+        processorHandle,
+        sessionLocalTimeZone,
+        pythonRunnerConf,
+        pythonMetrics,
+        jobArtifactUUID,
+        groupingKeySchema,
+        batchTimestampMs,
+        eventTimeWatermarkForEviction
+      )
+      executePython(data, output, runner)
+    } else {
+      // dedup attributes here because grouping attributes appear twice (key and value)
+      val (initDedupAttributes, initArgOffsets) =
+        resolveArgOffsets(initialState.output, initialStateGroupingAttrs)
+      val initData =
+        groupAndProject(initStateIterator, initialStateGroupingAttrs,
+          initialState.output, initDedupAttributes)
+      // group input rows and initial state rows by the same grouping key
+      val groupedData: Iterator[(InternalRow, Iterator[InternalRow], Iterator[InternalRow])] =
+        new CoGroupedIterator(data, initData, groupingAttributes)
+
+      val runner = new TransformWithStateInPandasPythonInitialStateRunner(
+        chainedFunc,
+        PythonEvalType.SQL_TRANSFORM_WITH_STATE_PANDAS_INIT_STATE_UDF,
+        Array(argOffsets ++ initArgOffsets),
+        DataTypeUtils.fromAttributes(dedupAttributes),
+        DataTypeUtils.fromAttributes(initDedupAttributes),
+        processorHandle,
+        sessionLocalTimeZone,
+        pythonRunnerConf,
+        pythonMetrics,
+        jobArtifactUUID,
+        groupingKeySchema,
+        batchTimestampMs,
+        eventTimeWatermarkForEviction
+      )
+      executePython(groupedData, output, runner)
+    }
+
+    CompletionIterator[InternalRow, Iterator[InternalRow]](outputIterator, {
+      // Note: Due to the iterator lazy execution, this metric also captures the time taken
+      // by the upstream (consumer) operators in addition to the processing in this operator.
+      allUpdatesTimeMs += NANOSECONDS.toMillis(System.nanoTime - updatesStartTimeNs)
+      commitTimeMs += timeTakenMs {
+        processorHandle.doTtlCleanup()
+        store.commit()
+      }
+      setStoreMetrics(store)
+      setOperatorMetrics()
+    }).map { row =>
+      numOutputRows += 1
+      row
+    }
+  }
+
+  override protected def withNewChildrenInternal(
+      newLeft: SparkPlan, newRight: SparkPlan): TransformWithStateInPandasExec =
+    if (hasInitialState) {
+      copy(child = newLeft, initialState = newRight)
+    } else {
+      copy(child = newLeft)
+    }
+
+  override def left: SparkPlan = child
+
+  override def right: SparkPlan = initialState
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala
new file mode 100644
index 0000000000000..c5980012124fe
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasPythonRunner.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io.DataOutputStream
+import java.net.ServerSocket
+
+import scala.concurrent.ExecutionContext
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.TaskContext
+import org.apache.spark.api.python.{BasePythonRunner, ChainedPythonFunctions, PythonRDD}
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.metric.SQLMetric
+import org.apache.spark.sql.execution.python.TransformWithStateInPandasPythonRunner.{GroupedInType, InType}
+import org.apache.spark.sql.execution.streaming.StatefulProcessorHandleImpl
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.sql.vectorized.ColumnarBatch
+import org.apache.spark.util.ThreadUtils
+
+/**
+ * Python runner with no initial state in TransformWithStateInPandas.
+ * Write input data as one single InternalRow in each row in arrow batch.
+ */
+class TransformWithStateInPandasPythonRunner(
+    funcs: Seq[(ChainedPythonFunctions, Long)],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    _schema: StructType,
+    processorHandle: StatefulProcessorHandleImpl,
+    _timeZoneId: String,
+    initialWorkerConf: Map[String, String],
+    override val pythonMetrics: Map[String, SQLMetric],
+    jobArtifactUUID: Option[String],
+    groupingKeySchema: StructType,
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long])
+  extends TransformWithStateInPandasPythonBaseRunner[InType](
+    funcs, evalType, argOffsets, _schema, processorHandle, _timeZoneId,
+    initialWorkerConf, pythonMetrics, jobArtifactUUID, groupingKeySchema,
+    batchTimestampMs, eventTimeWatermarkForEviction)
+  with PythonArrowInput[InType] {
+
+  private var pandasWriter: BaseStreamingArrowWriter = _
+
+  override protected def writeNextInputToArrowStream(
+      root: VectorSchemaRoot,
+      writer: ArrowStreamWriter,
+      dataOut: DataOutputStream,
+      inputIterator: Iterator[InType]): Boolean = {
+    if (pandasWriter == null) {
+      pandasWriter = new BaseStreamingArrowWriter(root, writer, arrowMaxRecordsPerBatch)
+    }
+
+    if (inputIterator.hasNext) {
+      val startData = dataOut.size()
+      val next = inputIterator.next()
+      val dataIter = next._2
+
+      while (dataIter.hasNext) {
+        val dataRow = dataIter.next()
+        pandasWriter.writeRow(dataRow)
+      }
+      pandasWriter.finalizeCurrentArrowBatch()
+      val deltaData = dataOut.size() - startData
+      pythonMetrics("pythonDataSent") += deltaData
+      true
+    } else {
+      super[PythonArrowInput].close()
+      false
+    }
+  }
+}
+
+/**
+ * Python runner with initial state in TransformWithStateInPandas.
+ * Write input data as one InternalRow(inputRow, initialState) in each row in arrow batch.
+ */
+class TransformWithStateInPandasPythonInitialStateRunner(
+    funcs: Seq[(ChainedPythonFunctions, Long)],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    dataSchema: StructType,
+    initStateSchema: StructType,
+    processorHandle: StatefulProcessorHandleImpl,
+    _timeZoneId: String,
+    initialWorkerConf: Map[String, String],
+    override val pythonMetrics: Map[String, SQLMetric],
+    jobArtifactUUID: Option[String],
+    groupingKeySchema: StructType,
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long])
+  extends TransformWithStateInPandasPythonBaseRunner[GroupedInType](
+    funcs, evalType, argOffsets, dataSchema, processorHandle, _timeZoneId,
+    initialWorkerConf, pythonMetrics, jobArtifactUUID, groupingKeySchema,
+    batchTimestampMs, eventTimeWatermarkForEviction)
+  with PythonArrowInput[GroupedInType] {
+
+  override protected lazy val schema: StructType = new StructType()
+    .add("inputData", dataSchema)
+    .add("initState", initStateSchema)
+
+  private var pandasWriter: BaseStreamingArrowWriter = _
+
+  override protected def writeNextInputToArrowStream(
+      root: VectorSchemaRoot,
+      writer: ArrowStreamWriter,
+      dataOut: DataOutputStream,
+      inputIterator: Iterator[GroupedInType]): Boolean = {
+    if (pandasWriter == null) {
+      pandasWriter = new BaseStreamingArrowWriter(root, writer, arrowMaxRecordsPerBatch)
+    }
+
+    if (inputIterator.hasNext) {
+      val startData = dataOut.size()
+      // a new grouping key with data & init state iter
+      val next = inputIterator.next()
+      val dataIter = next._2
+      val initIter = next._3
+
+      while (dataIter.hasNext || initIter.hasNext) {
+        val dataRow =
+          if (dataIter.hasNext) dataIter.next()
+          else InternalRow.empty
+        val initRow =
+          if (initIter.hasNext) initIter.next()
+          else InternalRow.empty
+        pandasWriter.writeRow(InternalRow(dataRow, initRow))
+      }
+      pandasWriter.finalizeCurrentArrowBatch()
+      val deltaData = dataOut.size() - startData
+      pythonMetrics("pythonDataSent") += deltaData
+      true
+    } else {
+      super[PythonArrowInput].close()
+      false
+    }
+  }
+}
+
+/**
+ * Base Python runner implementation for TransformWithStateInPandas.
+ */
+abstract class TransformWithStateInPandasPythonBaseRunner[I](
+    funcs: Seq[(ChainedPythonFunctions, Long)],
+    evalType: Int,
+    argOffsets: Array[Array[Int]],
+    _schema: StructType,
+    processorHandle: StatefulProcessorHandleImpl,
+    _timeZoneId: String,
+    initialWorkerConf: Map[String, String],
+    override val pythonMetrics: Map[String, SQLMetric],
+    jobArtifactUUID: Option[String],
+    groupingKeySchema: StructType,
+    batchTimestampMs: Option[Long],
+    eventTimeWatermarkForEviction: Option[Long])
+  extends BasePythonRunner[I, ColumnarBatch](funcs.map(_._1), evalType, argOffsets, jobArtifactUUID)
+  with PythonArrowInput[I]
+  with BasicPythonArrowOutput
+  with Logging {
+
+  protected val sqlConf = SQLConf.get
+  protected val arrowMaxRecordsPerBatch = sqlConf.arrowMaxRecordsPerBatch
+
+  private var stateServerSocketPort: Int = 0
+
+  override protected val workerConf: Map[String, String] = initialWorkerConf +
+    (SQLConf.ARROW_EXECUTION_MAX_RECORDS_PER_BATCH.key -> arrowMaxRecordsPerBatch.toString)
+
+  // Use lazy val to initialize the fields before these are accessed in [[PythonArrowInput]]'s
+  // constructor.
+  override protected lazy val schema: StructType = _schema
+  override protected lazy val timeZoneId: String = _timeZoneId
+  override protected val errorOnDuplicatedFieldNames: Boolean = true
+  override protected val largeVarTypes: Boolean = sqlConf.arrowUseLargeVarTypes
+
+  override protected def handleMetadataBeforeExec(stream: DataOutputStream): Unit = {
+    super.handleMetadataBeforeExec(stream)
+    // Also write the port number for state server
+    stream.writeInt(stateServerSocketPort)
+    PythonRDD.writeUTF(groupingKeySchema.json, stream)
+  }
+
+  override def compute(
+      inputIterator: Iterator[I],
+      partitionIndex: Int,
+      context: TaskContext): Iterator[ColumnarBatch] = {
+    var stateServerSocket: ServerSocket = null
+    var failed = false
+    try {
+      stateServerSocket = new ServerSocket( /* port = */ 0,
+        /* backlog = */ 1)
+      stateServerSocketPort = stateServerSocket.getLocalPort
+    } catch {
+      case e: Throwable =>
+        failed = true
+        throw e
+    } finally {
+      if (failed) {
+        closeServerSocketChannelSilently(stateServerSocket)
+      }
+    }
+
+    val executor = ThreadUtils.newDaemonSingleThreadExecutor("stateConnectionListenerThread")
+    val executionContext = ExecutionContext.fromExecutor(executor)
+
+    executionContext.execute(
+      new TransformWithStateInPandasStateServer(stateServerSocket, processorHandle,
+        groupingKeySchema, timeZoneId, errorOnDuplicatedFieldNames, largeVarTypes,
+        sqlConf.arrowTransformWithStateInPandasMaxRecordsPerBatch,
+        batchTimestampMs, eventTimeWatermarkForEviction))
+
+    context.addTaskCompletionListener[Unit] { _ =>
+      logInfo(log"completion listener called")
+      executor.shutdownNow()
+      closeServerSocketChannelSilently(stateServerSocket)
+    }
+
+    super.compute(inputIterator, partitionIndex, context)
+  }
+
+  private def closeServerSocketChannelSilently(stateServerSocket: ServerSocket): Unit = {
+    try {
+      logInfo(log"closing the state server socket")
+      stateServerSocket.close()
+    } catch {
+      case e: Exception =>
+        logError(log"failed to close state server socket", e)
+    }
+  }
+
+  override protected def writeUDF(dataOut: DataOutputStream): Unit = {
+    PythonUDFRunner.writeUDFs(dataOut, funcs, argOffsets, None)
+  }
+}
+
+object TransformWithStateInPandasPythonRunner {
+  type InType = (InternalRow, Iterator[InternalRow])
+  type GroupedInType = (InternalRow, Iterator[InternalRow], Iterator[InternalRow])
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala
new file mode 100644
index 0000000000000..2957f4b387580
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServer.scala
@@ -0,0 +1,762 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.python
+
+import java.io.{BufferedInputStream, BufferedOutputStream, DataInputStream, DataOutputStream, EOFException}
+import java.net.ServerSocket
+import java.time.Duration
+
+import scala.collection.mutable
+
+import com.google.protobuf.ByteString
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+
+import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.{Encoders, Row}
+import org.apache.spark.sql.api.python.PythonSQLUtils
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
+import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl, StatefulProcessorHandleState, StateVariableType}
+import org.apache.spark.sql.execution.streaming.state.StateMessage.{HandleState, ImplicitGroupingKeyRequest, ListStateCall, MapStateCall, StatefulProcessorCall, StateRequest, StateResponse, StateResponseWithLongTypeVal, StateVariableRequest, TimerRequest, TimerStateCallCommand, TimerValueRequest, ValueStateCall}
+import org.apache.spark.sql.streaming.{ListState, MapState, TTLConfig, ValueState}
+import org.apache.spark.sql.types.{BinaryType, LongType, StructField, StructType}
+import org.apache.spark.sql.util.ArrowUtils
+import org.apache.spark.util.Utils
+
+/**
+ * This class is used to handle the state requests from the Python side. It runs on a separate
+ * thread spawned by TransformWithStateInPandasStateRunner per task. It opens a dedicated socket
+ * to process/transfer state related info which is shut down when task finishes or there's an error
+ * on opening the socket. It processes following state requests and return responses to the
+ * Python side:
+ * - Requests for managing explicit grouping key.
+ * - Stateful processor requests.
+ * - Requests for managing state variables (e.g. valueState).
+ */
+class TransformWithStateInPandasStateServer(
+    stateServerSocket: ServerSocket,
+    statefulProcessorHandle: StatefulProcessorHandleImpl,
+    groupingKeySchema: StructType,
+    timeZoneId: String,
+    errorOnDuplicatedFieldNames: Boolean,
+    largeVarTypes: Boolean,
+    arrowTransformWithStateInPandasMaxRecordsPerBatch: Int,
+    batchTimestampMs: Option[Long] = None,
+    eventTimeWatermarkForEviction: Option[Long] = None,
+    outputStreamForTest: DataOutputStream = null,
+    valueStateMapForTest: mutable.HashMap[String, ValueStateInfo] = null,
+    deserializerForTest: TransformWithStateInPandasDeserializer = null,
+    arrowStreamWriterForTest: BaseStreamingArrowWriter = null,
+    listStatesMapForTest : mutable.HashMap[String, ListStateInfo] = null,
+    iteratorMapForTest: mutable.HashMap[String, Iterator[Row]] = null,
+    mapStatesMapForTest : mutable.HashMap[String, MapStateInfo] = null,
+    keyValueIteratorMapForTest: mutable.HashMap[String, Iterator[(Row, Row)]] = null,
+    expiryTimerIterForTest: Iterator[(Any, Long)] = null,
+    listTimerMapForTest: mutable.HashMap[String, Iterator[Long]] = null)
+  extends Runnable with Logging {
+
+  import PythonResponseWriterUtils._
+
+  private val keyRowDeserializer: ExpressionEncoder.Deserializer[Row] =
+    ExpressionEncoder(groupingKeySchema).resolveAndBind().createDeserializer()
+  private var inputStream: DataInputStream = _
+  private var outputStream: DataOutputStream = outputStreamForTest
+
+  /** State variable related class variables */
+  // A map to store the value state name -> (value state, schema, value row deserializer) mapping.
+  private val valueStates = if (valueStateMapForTest != null) {
+    valueStateMapForTest
+  } else {
+    new mutable.HashMap[String, ValueStateInfo]()
+  }
+  // A map to store the list state name -> (list state, schema, list state row deserializer,
+  // list state row serializer) mapping.
+  private val listStates = if (listStatesMapForTest != null) {
+    listStatesMapForTest
+  } else {
+    new mutable.HashMap[String, ListStateInfo]()
+  }
+  // A map to store the iterator id -> Iterator[Row] mapping. This is to keep track of the
+  // current iterator position for each iterator id in a state variable for a grouping key in case
+  // user tries to fetch another state variable before the current iterator is exhausted. This is
+  // mainly used for list state and map state.
+  private var iterators = if (iteratorMapForTest != null) {
+    iteratorMapForTest
+  } else {
+    new mutable.HashMap[String, Iterator[Row]]()
+  }
+  // A map to store the map state name -> (map state, schema, map state row deserializer,
+  // map state row serializer) mapping.
+  private val mapStates = if (mapStatesMapForTest != null) {
+    mapStatesMapForTest
+  } else {
+    new mutable.HashMap[String, MapStateInfo]()
+  }
+
+  // A map to store the iterator id -> Iterator[(Row, Row)] mapping. This is to keep track of the
+  // current key-value iterator position for each iterator id in a map state for a grouping key in
+  // case user tries to fetch another state variable before the current iterator is exhausted.
+  private var keyValueIterators = if (keyValueIteratorMapForTest != null) {
+    keyValueIteratorMapForTest
+  } else {
+    new mutable.HashMap[String, Iterator[(Row, Row)]]()
+  }
+
+  /** Timer related class variables */
+  // An iterator to store all expired timer info. This is meant to be consumed only once per
+  // partition. This should be called after finishing handling all input rows.
+  private var expiryTimestampIter: Option[Iterator[(Any, Long)]] =
+    if (expiryTimerIterForTest != null) {
+      Option(expiryTimerIterForTest)
+    } else None
+
+  // A map to store the iterator id -> Iterator[Long] mapping. This is to keep track of the
+  // current iterator position for each iterator id in the same partition for a grouping key in case
+  // user tries to fetch multiple iterators before the current iterator is exhausted. This is
+  // used for list timer function call
+  private var listTimerIters = if (listTimerMapForTest != null) {
+    listTimerMapForTest
+  } else new mutable.HashMap[String, Iterator[Long]]()
+
+  def run(): Unit = {
+    val listeningSocket = stateServerSocket.accept()
+    inputStream = new DataInputStream(
+      new BufferedInputStream(listeningSocket.getInputStream))
+    outputStream = new DataOutputStream(
+      new BufferedOutputStream(listeningSocket.getOutputStream)
+    )
+
+    while (listeningSocket.isConnected &&
+      statefulProcessorHandle.getHandleState != StatefulProcessorHandleState.CLOSED) {
+      try {
+        val version = inputStream.readInt()
+        if (version != -1) {
+          assert(version == 0)
+          val message = parseProtoMessage()
+          handleRequest(message)
+          outputStream.flush()
+        }
+      } catch {
+        case _: EOFException =>
+          logWarning(log"No more data to read from the socket")
+          statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CLOSED)
+          return
+        case e: Exception =>
+          logError(log"Error reading message: ${MDC(LogKeys.ERROR, e.getMessage)}", e)
+          sendResponse(1, e.getMessage)
+          outputStream.flush()
+          statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CLOSED)
+          return
+      }
+    }
+    logInfo(log"Done from the state server thread")
+  }
+
+  private def parseProtoMessage(): StateRequest = {
+    val messageLen = inputStream.readInt()
+    val messageBytes = new Array[Byte](messageLen)
+    inputStream.read(messageBytes)
+    StateRequest.parseFrom(ByteString.copyFrom(messageBytes))
+  }
+
+  private def handleRequest(message: StateRequest): Unit = {
+    message.getMethodCase match {
+      case StateRequest.MethodCase.IMPLICITGROUPINGKEYREQUEST =>
+        handleImplicitGroupingKeyRequest(message.getImplicitGroupingKeyRequest)
+      case StateRequest.MethodCase.STATEFULPROCESSORCALL =>
+        handleStatefulProcessorCall(message.getStatefulProcessorCall)
+      case StateRequest.MethodCase.STATEVARIABLEREQUEST =>
+        handleStateVariableRequest(message.getStateVariableRequest)
+      case StateRequest.MethodCase.TIMERREQUEST =>
+        handleTimerRequest(message.getTimerRequest)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleTimerRequest(message: TimerRequest): Unit = {
+    message.getMethodCase match {
+      case TimerRequest.MethodCase.TIMERVALUEREQUEST =>
+        val timerRequest = message.getTimerValueRequest()
+        timerRequest.getMethodCase match {
+          case TimerValueRequest.MethodCase.GETPROCESSINGTIMER =>
+            val procTimestamp: Long =
+              if (batchTimestampMs.isDefined) batchTimestampMs.get else -1L
+            sendResponseWithLongVal(0, null, procTimestamp)
+          case TimerValueRequest.MethodCase.GETWATERMARK =>
+            val eventTimestamp: Long =
+              if (eventTimeWatermarkForEviction.isDefined) eventTimeWatermarkForEviction.get
+              else -1L
+            sendResponseWithLongVal(0, null, eventTimestamp)
+          case _ =>
+            throw new IllegalArgumentException("Invalid timer value method call")
+        }
+
+      case TimerRequest.MethodCase.EXPIRYTIMERREQUEST =>
+        // Note that for `getExpiryTimers` python call, as this is not a public
+        // API and it will only be used by `group_ops` once per partition, we won't
+        // need to worry about different function calls will interleaved and hence
+        // this implementation is safe
+        val expiryRequest = message.getExpiryTimerRequest()
+        val expiryTimestamp = expiryRequest.getExpiryTimestampMs
+        if (!expiryTimestampIter.isDefined) {
+          expiryTimestampIter =
+            Option(statefulProcessorHandle.getExpiredTimers(expiryTimestamp))
+        }
+        // expiryTimestampIter could be None in the TWSPandasServerSuite
+        if (!expiryTimestampIter.isDefined || !expiryTimestampIter.get.hasNext) {
+          // iterator is exhausted, signal the end of iterator on python client
+          sendResponse(1)
+        } else {
+          sendResponse(0)
+          val outputSchema = new StructType()
+            .add("key", BinaryType)
+            .add(StructField("timestamp", LongType))
+          sendIteratorAsArrowBatches(expiryTimestampIter.get, outputSchema,
+            arrowStreamWriterForTest) { data =>
+            InternalRow(PythonSQLUtils.toPyRow(data._1.asInstanceOf[Row]), data._2)
+          }
+        }
+
+      case _ =>
+        throw new IllegalArgumentException("Invalid timer request method call")
+    }
+  }
+
+  private def handleImplicitGroupingKeyRequest(message: ImplicitGroupingKeyRequest): Unit = {
+    message.getMethodCase match {
+      case ImplicitGroupingKeyRequest.MethodCase.SETIMPLICITKEY =>
+        val keyBytes = message.getSetImplicitKey.getKey.toByteArray
+        // The key row is serialized as a byte array, we need to convert it back to a Row
+        val keyRow = PythonSQLUtils.toJVMRow(keyBytes, groupingKeySchema, keyRowDeserializer)
+        ImplicitGroupingKeyTracker.setImplicitKey(keyRow)
+        // Reset the list/map state iterators for a new grouping key.
+        iterators = new mutable.HashMap[String, Iterator[Row]]()
+        listTimerIters = new mutable.HashMap[String, Iterator[Long]]()
+        sendResponse(0)
+      case ImplicitGroupingKeyRequest.MethodCase.REMOVEIMPLICITKEY =>
+        ImplicitGroupingKeyTracker.removeImplicitKey()
+        // Reset the list/map state iterators for a new grouping key.
+        iterators = new mutable.HashMap[String, Iterator[Row]]()
+        listTimerIters = new mutable.HashMap[String, Iterator[Long]]()
+        sendResponse(0)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleStatefulProcessorCall(message: StatefulProcessorCall): Unit = {
+    message.getMethodCase match {
+      case StatefulProcessorCall.MethodCase.SETHANDLESTATE =>
+        val requestedState = message.getSetHandleState.getState
+        requestedState match {
+          case HandleState.CREATED =>
+            logInfo(log"set handle state to Created")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CREATED)
+          case HandleState.INITIALIZED =>
+            logInfo(log"set handle state to Initialized")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.INITIALIZED)
+          case HandleState.DATA_PROCESSED =>
+            logInfo(log"set handle state to Data Processed")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.DATA_PROCESSED)
+          case HandleState.TIMER_PROCESSED =>
+            logInfo(log"set handle state to Timer Processed")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.TIMER_PROCESSED)
+          case HandleState.CLOSED =>
+            logInfo(log"set handle state to Closed")
+            statefulProcessorHandle.setHandleState(StatefulProcessorHandleState.CLOSED)
+          case _ =>
+        }
+        sendResponse(0)
+      case StatefulProcessorCall.MethodCase.GETVALUESTATE =>
+        val stateName = message.getGetValueState.getStateName
+        val schema = message.getGetValueState.getSchema
+        val ttlDurationMs = if (message.getGetValueState.hasTtl) {
+          Some(message.getGetValueState.getTtl.getDurationMs)
+        } else None
+        initializeStateVariable(stateName, schema, StateVariableType.ValueState, ttlDurationMs)
+      case StatefulProcessorCall.MethodCase.GETLISTSTATE =>
+        val stateName = message.getGetListState.getStateName
+        val schema = message.getGetListState.getSchema
+        val ttlDurationMs = if (message.getGetListState.hasTtl) {
+          Some(message.getGetListState.getTtl.getDurationMs)
+        } else {
+            None
+        }
+        initializeStateVariable(stateName, schema, StateVariableType.ListState, ttlDurationMs)
+      case StatefulProcessorCall.MethodCase.GETMAPSTATE =>
+        val stateName = message.getGetMapState.getStateName
+        val userKeySchema = message.getGetMapState.getSchema
+        val valueSchema = message.getGetMapState.getMapStateValueSchema
+        val ttlDurationMs = if (message.getGetMapState.hasTtl) {
+          Some(message.getGetMapState.getTtl.getDurationMs)
+        } else None
+        initializeStateVariable(stateName, userKeySchema, StateVariableType.MapState, ttlDurationMs,
+          valueSchema)
+      case StatefulProcessorCall.MethodCase.TIMERSTATECALL =>
+        message.getTimerStateCall.getMethodCase match {
+          case TimerStateCallCommand.MethodCase.REGISTER =>
+            val expiryTimestamp =
+              message.getTimerStateCall.getRegister.getExpiryTimestampMs
+            statefulProcessorHandle.registerTimer(expiryTimestamp)
+            sendResponse(0)
+          case TimerStateCallCommand.MethodCase.DELETE =>
+            val expiryTimestamp =
+              message.getTimerStateCall.getDelete.getExpiryTimestampMs
+            statefulProcessorHandle.deleteTimer(expiryTimestamp)
+            sendResponse(0)
+          case TimerStateCallCommand.MethodCase.LIST =>
+            val iteratorId = message.getTimerStateCall.getList.getIteratorId
+            var iteratorOption = listTimerIters.get(iteratorId)
+            if (iteratorOption.isEmpty) {
+              iteratorOption = Option(statefulProcessorHandle.listTimers())
+              listTimerIters.put(iteratorId, iteratorOption.get)
+            }
+            if (!iteratorOption.get.hasNext) {
+              sendResponse(2, s"List timer iterator doesn't contain any value.")
+              return
+            } else {
+              sendResponse(0)
+            }
+            val outputSchema = new StructType()
+              .add(StructField("timestamp", LongType))
+            sendIteratorAsArrowBatches(iteratorOption.get, outputSchema,
+              arrowStreamWriterForTest) { data =>
+              InternalRow(data)
+            }
+
+          case _ =>
+            throw new IllegalArgumentException("Invalid timer state method call")
+        }
+      case StatefulProcessorCall.MethodCase.DELETEIFEXISTS =>
+        val stateName = message.getDeleteIfExists.getStateName
+        statefulProcessorHandle.deleteIfExists(stateName)
+        if (valueStates.contains(stateName)) {
+          valueStates.remove(stateName)
+        } else if (listStates.contains(stateName)) {
+          listStates.remove(stateName)
+        } else if (mapStates.contains(stateName)) {
+          mapStates.remove(stateName)
+        }
+        sendResponse(0)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private def handleStateVariableRequest(message: StateVariableRequest): Unit = {
+    message.getMethodCase match {
+      case StateVariableRequest.MethodCase.VALUESTATECALL =>
+        handleValueStateRequest(message.getValueStateCall)
+      case StateVariableRequest.MethodCase.LISTSTATECALL =>
+        handleListStateRequest(message.getListStateCall)
+      case StateVariableRequest.MethodCase.MAPSTATECALL =>
+        handleMapStateRequest(message.getMapStateCall)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleValueStateRequest(message: ValueStateCall): Unit = {
+    val stateName = message.getStateName
+    if (!valueStates.contains(stateName)) {
+      logWarning(log"Value state ${MDC(LogKeys.STATE_NAME, stateName)} is not initialized.")
+      sendResponse(1, s"Value state $stateName is not initialized.")
+      return
+    }
+    val valueStateInfo = valueStates(stateName)
+    message.getMethodCase match {
+      case ValueStateCall.MethodCase.EXISTS =>
+        if (valueStateInfo.valueState.exists()) {
+          sendResponse(0)
+        } else {
+          // Send status code 2 to indicate that the value state doesn't have a value yet.
+          sendResponse(2, s"state $stateName doesn't exist")
+        }
+      case ValueStateCall.MethodCase.GET =>
+        val valueOption = valueStateInfo.valueState.getOption()
+        if (valueOption.isDefined) {
+          // Serialize the value row as a byte array
+          val valueBytes = PythonSQLUtils.toPyRow(valueOption.get)
+          val byteString = ByteString.copyFrom(valueBytes)
+          sendResponse(0, null, byteString)
+        } else {
+          logWarning(log"Value state ${MDC(LogKeys.STATE_NAME, stateName)} doesn't contain" +
+            log" a value.")
+          sendResponse(0)
+        }
+      case ValueStateCall.MethodCase.VALUESTATEUPDATE =>
+        val byteArray = message.getValueStateUpdate.getValue.toByteArray
+        // The value row is serialized as a byte array, we need to convert it back to a Row
+        val valueRow = PythonSQLUtils.toJVMRow(byteArray, valueStateInfo.schema,
+          valueStateInfo.deserializer)
+        valueStateInfo.valueState.update(valueRow)
+        sendResponse(0)
+      case ValueStateCall.MethodCase.CLEAR =>
+        valueStateInfo.valueState.clear()
+        sendResponse(0)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleListStateRequest(message: ListStateCall): Unit = {
+    val stateName = message.getStateName
+    if (!listStates.contains(stateName)) {
+      logWarning(log"List state ${MDC(LogKeys.STATE_NAME, stateName)} is not initialized.")
+      sendResponse(1, s"List state $stateName is not initialized.")
+      return
+    }
+    val listStateInfo = listStates(stateName)
+    val deserializer = if (deserializerForTest != null) {
+      deserializerForTest
+    } else {
+      new TransformWithStateInPandasDeserializer(listStateInfo.deserializer)
+    }
+    message.getMethodCase match {
+      case ListStateCall.MethodCase.EXISTS =>
+        if (listStateInfo.listState.exists()) {
+          sendResponse(0)
+        } else {
+          // Send status code 2 to indicate that the list state doesn't have a value yet.
+          sendResponse(2, s"state $stateName doesn't exist")
+        }
+      case ListStateCall.MethodCase.LISTSTATEPUT =>
+        val rows = deserializer.readArrowBatches(inputStream)
+        listStateInfo.listState.put(rows.toArray)
+        sendResponse(0)
+      case ListStateCall.MethodCase.LISTSTATEGET =>
+        val iteratorId = message.getListStateGet.getIteratorId
+        var iteratorOption = iterators.get(iteratorId)
+        if (iteratorOption.isEmpty) {
+          iteratorOption = Some(listStateInfo.listState.get())
+          iterators.put(iteratorId, iteratorOption.get)
+        }
+        if (!iteratorOption.get.hasNext) {
+          sendResponse(2, s"List state $stateName doesn't contain any value.")
+          return
+        } else {
+          sendResponse(0)
+        }
+        sendIteratorAsArrowBatches(iteratorOption.get, listStateInfo.schema,
+          arrowStreamWriterForTest) { data => listStateInfo.serializer(data)}
+      case ListStateCall.MethodCase.APPENDVALUE =>
+        val byteArray = message.getAppendValue.getValue.toByteArray
+        val newRow = PythonSQLUtils.toJVMRow(byteArray, listStateInfo.schema,
+          listStateInfo.deserializer)
+        listStateInfo.listState.appendValue(newRow)
+        sendResponse(0)
+      case ListStateCall.MethodCase.APPENDLIST =>
+        val rows = deserializer.readArrowBatches(inputStream)
+        listStateInfo.listState.appendList(rows.toArray)
+        sendResponse(0)
+      case ListStateCall.MethodCase.CLEAR =>
+        listStates(stateName).listState.clear()
+        sendResponse(0)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private[sql] def handleMapStateRequest(message: MapStateCall): Unit = {
+    val stateName = message.getStateName
+    if (!mapStates.contains(stateName)) {
+      logWarning(log"Map state ${MDC(LogKeys.STATE_NAME, stateName)} is not initialized.")
+      sendResponse(1, s"Map state $stateName is not initialized.")
+      return
+    }
+    val mapStateInfo = mapStates(stateName)
+    message.getMethodCase match {
+      case MapStateCall.MethodCase.EXISTS =>
+        if (mapStateInfo.mapState.exists()) {
+          sendResponse(0)
+        } else {
+          // Send status code 2 to indicate that the list state doesn't have a value yet.
+          sendResponse(2, s"state $stateName doesn't exist")
+        }
+      case MapStateCall.MethodCase.GETVALUE =>
+        val keyBytes = message.getGetValue.getUserKey.toByteArray
+        val keyRow = PythonSQLUtils.toJVMRow(keyBytes, mapStateInfo.keySchema,
+          mapStateInfo.keyDeserializer)
+        val value = mapStateInfo.mapState.getValue(keyRow)
+        if (value != null) {
+          val valueBytes = PythonSQLUtils.toPyRow(value)
+          val byteString = ByteString.copyFrom(valueBytes)
+          sendResponse(0, null, byteString)
+        } else {
+          logWarning(log"Map state ${MDC(LogKeys.STATE_NAME, stateName)} doesn't contain" +
+            log" key ${MDC(LogKeys.KEY, keyRow.toString)}.")
+          sendResponse(0)
+        }
+      case MapStateCall.MethodCase.CONTAINSKEY =>
+        val keyBytes = message.getContainsKey.getUserKey.toByteArray
+        val keyRow = PythonSQLUtils.toJVMRow(keyBytes, mapStateInfo.keySchema,
+          mapStateInfo.keyDeserializer)
+        if (mapStateInfo.mapState.containsKey(keyRow)) {
+          sendResponse(0)
+        } else {
+          sendResponse(2, s"Map state $stateName doesn't contain key ${keyRow.toString()}")
+        }
+      case MapStateCall.MethodCase.UPDATEVALUE =>
+        val keyBytes = message.getUpdateValue.getUserKey.toByteArray
+        val keyRow = PythonSQLUtils.toJVMRow(keyBytes, mapStateInfo.keySchema,
+          mapStateInfo.keyDeserializer)
+        val valueBytes = message.getUpdateValue.getValue.toByteArray
+        val valueRow = PythonSQLUtils.toJVMRow(valueBytes, mapStateInfo.valueSchema,
+          mapStateInfo.valueDeserializer)
+        mapStateInfo.mapState.updateValue(keyRow, valueRow)
+        sendResponse(0)
+      case MapStateCall.MethodCase.ITERATOR =>
+        val iteratorId = message.getIterator.getIteratorId
+        var iteratorOption = keyValueIterators.get(iteratorId)
+        if (iteratorOption.isEmpty) {
+          iteratorOption = Some(mapStateInfo.mapState.iterator())
+          keyValueIterators.put(iteratorId, iteratorOption.get)
+        }
+        if (!iteratorOption.get.hasNext) {
+          sendResponse(2, s"Map state $stateName doesn't contain any entry.")
+        } else {
+          sendResponse(0)
+          val keyValueStateSchema: StructType = StructType(
+            Array(
+              // key row serialized as a byte array.
+              StructField("keyRow", BinaryType),
+              // value row serialized as a byte array.
+              StructField("valueRow", BinaryType)
+            )
+          )
+          sendIteratorAsArrowBatches(iteratorOption.get, keyValueStateSchema,
+            arrowStreamWriterForTest) {tuple =>
+            val keyBytes = PythonSQLUtils.toPyRow(tuple._1)
+            val valueBytes = PythonSQLUtils.toPyRow(tuple._2)
+            new GenericInternalRow(
+              Array[Any](
+                keyBytes,
+                valueBytes
+              )
+            )
+          }
+        }
+      case MapStateCall.MethodCase.KEYS =>
+        val iteratorId = message.getKeys.getIteratorId
+        var iteratorOption = iterators.get(iteratorId)
+        if (iteratorOption.isEmpty) {
+          iteratorOption = Some(mapStateInfo.mapState.keys())
+          iterators.put(iteratorId, iteratorOption.get)
+        }
+        if (!iteratorOption.get.hasNext) {
+          sendResponse(2, s"Map state $stateName doesn't contain any key.")
+        } else {
+          sendResponse(0)
+          sendIteratorAsArrowBatches(iteratorOption.get, mapStateInfo.keySchema,
+            arrowStreamWriterForTest) {data => mapStateInfo.keySerializer(data)}
+        }
+      case MapStateCall.MethodCase.VALUES =>
+        val iteratorId = message.getValues.getIteratorId
+        var iteratorOption = iterators.get(iteratorId)
+        if (iteratorOption.isEmpty) {
+          iteratorOption = Some(mapStateInfo.mapState.values())
+          iterators.put(iteratorId, iteratorOption.get)
+        }
+        if (!iteratorOption.get.hasNext) {
+          sendResponse(2, s"Map state $stateName doesn't contain any value.")
+        } else {
+          sendResponse(0)
+          sendIteratorAsArrowBatches(iteratorOption.get, mapStateInfo.valueSchema,
+            arrowStreamWriterForTest) {data => mapStateInfo.valueSerializer(data)}
+        }
+      case MapStateCall.MethodCase.REMOVEKEY =>
+        val keyBytes = message.getRemoveKey.getUserKey.toByteArray
+        val keyRow = PythonSQLUtils.toJVMRow(keyBytes, mapStateInfo.keySchema,
+          mapStateInfo.keyDeserializer)
+        mapStateInfo.mapState.removeKey(keyRow)
+        sendResponse(0)
+      case _ =>
+        throw new IllegalArgumentException("Invalid method call")
+    }
+  }
+
+  private def initializeStateVariable(
+      stateName: String,
+      schemaString: String,
+      stateType: StateVariableType.StateVariableType,
+      ttlDurationMs: Option[Int],
+      mapStateValueSchemaString: String = null): Unit = {
+    val schema = StructType.fromString(schemaString)
+    val expressionEncoder = ExpressionEncoder(schema).resolveAndBind()
+    stateType match {
+      case StateVariableType.ValueState => if (!valueStates.contains(stateName)) {
+        val state = if (ttlDurationMs.isEmpty) {
+          statefulProcessorHandle.getValueState[Row](stateName, Encoders.row(schema),
+            TTLConfig.NONE)
+          } else {
+            statefulProcessorHandle.getValueState(
+              stateName, Encoders.row(schema), TTLConfig(Duration.ofMillis(ttlDurationMs.get)))
+          }
+          valueStates.put(stateName,
+            ValueStateInfo(state, schema, expressionEncoder.createDeserializer()))
+          sendResponse(0)
+        } else {
+          sendResponse(1, s"Value state $stateName already exists")
+        }
+
+      case StateVariableType.ListState => if (!listStates.contains(stateName)) {
+        val state = if (ttlDurationMs.isEmpty) {
+          statefulProcessorHandle.getListState[Row](stateName, Encoders.row(schema),
+            TTLConfig.NONE)
+        } else {
+          statefulProcessorHandle.getListState(
+            stateName, Encoders.row(schema), TTLConfig(Duration.ofMillis(ttlDurationMs.get)))
+        }
+        listStates.put(stateName,
+          ListStateInfo(state, schema, expressionEncoder.createDeserializer(),
+            expressionEncoder.createSerializer()))
+        sendResponse(0)
+      } else {
+        sendResponse(1, s"List state $stateName already exists")
+      }
+
+      case StateVariableType.MapState => if (!mapStates.contains(stateName)) {
+        val valueSchema = StructType.fromString(mapStateValueSchemaString)
+        val valueExpressionEncoder = ExpressionEncoder(valueSchema).resolveAndBind()
+        val state = if (ttlDurationMs.isEmpty) {
+          statefulProcessorHandle.getMapState[Row, Row](stateName,
+            Encoders.row(schema), Encoders.row(valueSchema), TTLConfig.NONE)
+        } else {
+          statefulProcessorHandle.getMapState[Row, Row](stateName, Encoders.row(schema),
+            Encoders.row(valueSchema), TTLConfig(Duration.ofMillis(ttlDurationMs.get)))
+        }
+        mapStates.put(stateName,
+          MapStateInfo(state, schema, valueSchema, expressionEncoder.createDeserializer(),
+            expressionEncoder.createSerializer(), valueExpressionEncoder.createDeserializer(),
+            valueExpressionEncoder.createSerializer()))
+        sendResponse(0)
+      } else {
+        sendResponse(1, s"Map state $stateName already exists")
+      }
+    }
+  }
+
+  /** Utils object for sending response to Python client. */
+  private object PythonResponseWriterUtils {
+    def sendResponse(
+        status: Int,
+        errorMessage: String = null,
+        byteString: ByteString = null): Unit = {
+      val responseMessageBuilder = StateResponse.newBuilder().setStatusCode(status)
+      if (status != 0 && errorMessage != null) {
+        responseMessageBuilder.setErrorMessage(errorMessage)
+      }
+      if (byteString != null) {
+        responseMessageBuilder.setValue(byteString)
+      }
+      val responseMessage = responseMessageBuilder.build()
+      val responseMessageBytes = responseMessage.toByteArray
+      val byteLength = responseMessageBytes.length
+      outputStream.writeInt(byteLength)
+      outputStream.write(responseMessageBytes)
+    }
+
+    def sendResponseWithLongVal(
+        status: Int,
+        errorMessage: String = null,
+        longVal: Long): Unit = {
+      val responseMessageBuilder = StateResponseWithLongTypeVal.newBuilder().setStatusCode(status)
+      if (status != 0 && errorMessage != null) {
+        responseMessageBuilder.setErrorMessage(errorMessage)
+      }
+      responseMessageBuilder.setValue(longVal)
+      val responseMessage = responseMessageBuilder.build()
+      val responseMessageBytes = responseMessage.toByteArray
+      val byteLength = responseMessageBytes.length
+      outputStream.writeInt(byteLength)
+      outputStream.write(responseMessageBytes)
+    }
+
+    def sendIteratorAsArrowBatches[T](
+        iter: Iterator[T],
+        outputSchema: StructType,
+        arrowStreamWriterForTest: BaseStreamingArrowWriter = null)(func: T => InternalRow): Unit = {
+      outputStream.flush()
+      val arrowSchema = ArrowUtils.toArrowSchema(outputSchema, timeZoneId,
+        errorOnDuplicatedFieldNames, largeVarTypes)
+      val allocator = ArrowUtils.rootAllocator.newChildAllocator(
+        s"stdout writer for transformWithStateInPandas state socket", 0, Long.MaxValue)
+      val root = VectorSchemaRoot.create(arrowSchema, allocator)
+      val writer = new ArrowStreamWriter(root, null, outputStream)
+      val arrowStreamWriter = if (arrowStreamWriterForTest != null) {
+        arrowStreamWriterForTest
+      } else {
+        new BaseStreamingArrowWriter(root, writer,
+          arrowTransformWithStateInPandasMaxRecordsPerBatch)
+      }
+      // Only write a single batch in each GET request. Stops writing row if rowCount reaches
+      // the arrowTransformWithStateInPandasMaxRecordsPerBatch limit. This is to handle a case
+      // when there are multiple state variables, user tries to access a different state variable
+      // while the current state variable is not exhausted yet.
+      var rowCount = 0
+      while (iter.hasNext && rowCount < arrowTransformWithStateInPandasMaxRecordsPerBatch) {
+        val data = iter.next()
+        val internalRow = func(data)
+        arrowStreamWriter.writeRow(internalRow)
+        rowCount += 1
+      }
+      arrowStreamWriter.finalizeCurrentArrowBatch()
+      Utils.tryWithSafeFinally {
+        // end writes footer to the output stream and doesn't clean any resources.
+        // It could throw exception if the output stream is closed, so it should be
+        // in the try block.
+        writer.end()
+      } {
+        root.close()
+        allocator.close()
+      }
+    }
+  }
+}
+
+/**
+ * Case class to store the information of a value state.
+ */
+case class ValueStateInfo(
+    valueState: ValueState[Row],
+    schema: StructType,
+    deserializer: ExpressionEncoder.Deserializer[Row])
+
+/**
+ * Case class to store the information of a list state.
+ */
+case class ListStateInfo(
+    listState: ListState[Row],
+    schema: StructType,
+    deserializer: ExpressionEncoder.Deserializer[Row],
+    serializer: ExpressionEncoder.Serializer[Row])
+
+/**
+ * Case class to store the information of a map state.
+ */
+case class MapStateInfo(
+    mapState: MapState[Row, Row],
+    keySchema: StructType,
+    valueSchema: StructType,
+    keyDeserializer: ExpressionEncoder.Deserializer[Row],
+    keySerializer: ExpressionEncoder.Serializer[Row],
+    valueDeserializer: ExpressionEncoder.Deserializer[Row],
+    valueSerializer: ExpressionEncoder.Serializer[Row])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
index 32406460ad713..ea1f5e6ae1340 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, Descending,
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.plans.logical.{Generate, LogicalPlan, NamedParametersSupport, OneRowRelation}
 import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.types.{DataType, StructType}
 
 /**
@@ -63,9 +64,11 @@ case class UserDefinedPythonFunction(
     }
   }
 
+  def builderWithColumns(e: Seq[Column]): Expression = builder(e.map(expression))
+
   /** Returns a [[Column]] that will evaluate to calling this UDF with the given input. */
   def apply(exprs: Column*): Column = {
-    fromUDFExpr(builder(exprs.map(_.expr)))
+    fromUDFExpr(builder(exprs.map(expression)))
   }
 
   /**
@@ -73,8 +76,8 @@ case class UserDefinedPythonFunction(
    */
   def fromUDFExpr(expr: Expression): Column = {
     expr match {
-      case udaf: PythonUDAF => Column(udaf.toAggregateExpression())
-      case _ => Column(expr)
+      case udaf: PythonUDAF => udaf.toAggregateExpression()
+      case _ => expr
     }
   }
 }
@@ -157,7 +160,7 @@ case class UserDefinedPythonTableFunction(
 
   /** Returns a [[DataFrame]] that will evaluate to calling this UDTF with the given input. */
   def apply(session: SparkSession, exprs: Column*): DataFrame = {
-    val udtf = builder(exprs.map(_.expr), session.sessionState.sqlParser)
+    val udtf = builder(exprs.map(session.expression), session.sessionState.sqlParser)
     Dataset.ofRows(session, udtf)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
index c05562fc083ca..148766f9d0026 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/FrequentItems.scala
@@ -22,12 +22,13 @@ import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, Da
 import scala.collection.mutable
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{functions, Column, DataFrame}
+import org.apache.spark.sql.{functions, DataFrame}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Expression, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.aggregate.{ImperativeAggregate, TypedImperativeAggregate}
 import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.catalyst.util.GenericArrayData
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.types._
 import org.apache.spark.util.Utils
 
@@ -57,8 +58,7 @@ object FrequentItems extends Logging {
     val sizeOfMap = (1 / support).toInt
 
     val frequentItemCols = cols.map { col =>
-      val aggExpr = new CollectFrequentItems(functions.col(col).expr, sizeOfMap)
-      Column(aggExpr.toAggregateExpression(isDistinct = false)).as(s"${col}_freqItems")
+      column(new CollectFrequentItems(functions.col(col), sizeOfMap)).as(s"${col}_freqItems")
     }
 
     df.select(frequentItemCols: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
index 6b3b374ae9ad9..dd7fee455b4df 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/stat/StatFunctions.scala
@@ -21,7 +21,6 @@ import java.util.Locale
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.{Column, DataFrame, Dataset, Row}
-import org.apache.spark.sql.catalyst.expressions.{Cast, ElementAt, EvalMode}
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.util.QuantileSummaries
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -73,7 +72,7 @@ object StatFunctions extends Logging {
       require(field.dataType.isInstanceOf[NumericType],
         s"Quantile calculation for column $colName with data type ${field.dataType}" +
         " is not supported.")
-      Column(Cast(Column(colName).expr, DoubleType))
+      Column(colName).cast(DoubleType)
     }
     val emptySummaries = Array.fill(cols.size)(
       new QuantileSummaries(QuantileSummaries.defaultCompressThreshold, relativeError))
@@ -205,7 +204,7 @@ object StatFunctions extends Logging {
         val column = col(field.name)
         var casted = column
         if (field.dataType.isInstanceOf[StringType]) {
-          casted = new Column(Cast(column.expr, DoubleType, evalMode = EvalMode.TRY))
+          casted = column.try_cast(DoubleType)
         }
 
         val percentilesCol = if (percentiles.nonEmpty) {
@@ -252,7 +251,7 @@ object StatFunctions extends Logging {
         .withColumnRenamed("_1", "summary")
     } else {
       val valueColumns = columnNames.map { columnName =>
-        new Column(ElementAt(col(columnName).expr, col("summary").expr)).as(columnName)
+        element_at(col(columnName), col("summary")).as(columnName)
       }
       import org.apache.spark.util.ArrayImplicits._
       ds.select(mapColumns: _*)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala
index aa393211a1c15..cb7e71bda84dd 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/AsyncLogPurge.scala
@@ -21,6 +21,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.util.ThreadUtils
 
@@ -40,9 +41,16 @@ trait AsyncLogPurge extends Logging {
 
   private val purgeRunning = new AtomicBoolean(false)
 
+  private val statefulMetadataPurgeRunning = new AtomicBoolean(false)
+
   protected def purge(threshold: Long): Unit
 
-  protected lazy val useAsyncPurge: Boolean = sparkSession.conf.get(SQLConf.ASYNC_LOG_PURGE)
+  // This method is used to purge the oldest OperatorStateMetadata and StateSchema files
+  // which are written per run.
+  protected def purgeStatefulMetadata(plan: SparkPlan): Unit
+
+  protected lazy val useAsyncPurge: Boolean = sparkSession.sessionState.conf
+    .getConf(SQLConf.ASYNC_LOG_PURGE)
 
   protected def purgeAsync(batchId: Long): Unit = {
     if (purgeRunning.compareAndSet(false, true)) {
@@ -62,6 +70,24 @@ trait AsyncLogPurge extends Logging {
     }
   }
 
+  protected def purgeStatefulMetadataAsync(plan: SparkPlan): Unit = {
+    if (statefulMetadataPurgeRunning.compareAndSet(false, true)) {
+      asyncPurgeExecutorService.execute(() => {
+        try {
+          purgeStatefulMetadata(plan)
+        } catch {
+          case throwable: Throwable =>
+            logError("Encountered error while performing async log purge", throwable)
+            errorNotifier.markError(throwable)
+        } finally {
+          statefulMetadataPurgeRunning.set(false)
+        }
+      })
+    } else {
+      log.debug("Skipped log purging since there is already one in progress.")
+    }
+  }
+
   protected def asyncLogPurgeShutdown(): Unit = {
     ThreadUtils.shutdown(asyncPurgeExecutorService)
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
index dc52f33474c62..84e70d89fd8ef 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/CommitLog.scala
@@ -26,6 +26,7 @@ import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Used to write log files that represent batch commit points in structured streaming.
@@ -50,18 +51,19 @@ class CommitLog(sparkSession: SparkSession, path: String)
 
   import CommitLog._
 
-  override protected def deserialize(in: InputStream): CommitMetadata = {
+  override protected[sql] def deserialize(in: InputStream): CommitMetadata = {
     // called inside a try-finally where the underlying stream is closed in the caller
     val lines = IOSource.fromInputStream(in, UTF_8.name()).getLines()
     if (!lines.hasNext) {
       throw new IllegalStateException("Incomplete log file in the offset commit log")
     }
+    // TODO [SPARK-49462] This validation should be relaxed for a stateless query.
     validateVersion(lines.next().trim, VERSION)
     val metadataJson = if (lines.hasNext) lines.next() else EMPTY_JSON
     CommitMetadata(metadataJson)
   }
 
-  override protected def serialize(metadata: CommitMetadata, out: OutputStream): Unit = {
+  override protected[sql] def serialize(metadata: CommitMetadata, out: OutputStream): Unit = {
     // called inside a try-finally where the underlying stream is closed in the caller
     out.write(s"v${VERSION}".getBytes(UTF_8))
     out.write('\n')
@@ -72,12 +74,37 @@ class CommitLog(sparkSession: SparkSession, path: String)
 }
 
 object CommitLog {
-  private val VERSION = 1
+  private val VERSION = SQLConf.get.stateStoreCheckpointFormatVersion
   private val EMPTY_JSON = "{}"
 }
 
+/**
+ * In Checkpoint V2, for a stateful query, the checkpoint structure looks like below:
+ * 0 (operator ID)
+ *     +----+
+ *          | 0 (partitionID)
+ *     +----+
+ *          |     ......
+ *          | 1 (partitionID)
+ *          +----+
+ *          |    |- default (storeName)
+ *          |     +-----+
+ *          |           |  20_unique_id_1.zip
+ *          |           |  21_unique_id_2.delta
+ *          |           |  22_unique_id_3.delta
+ *          |           +  23_unique_id_4.delta
+ *          | 2 (partitionID)
+ *          +--- ......
+ * In the commit log, in addition to nextBatchWatermarkMs, we also store the unique ids of the
+ * state store files.
+ * @param nextBatchWatermarkMs The watermark of the next batch.
+ * @param stateUniqueIds Map[Long, Array[Array[String]]] of map
+ *                       OperatorId -> (partitionID -> array of uniqueID)
+ */
 
-case class CommitMetadata(nextBatchWatermarkMs: Long = 0) {
+case class CommitMetadata(
+    nextBatchWatermarkMs: Long = 0,
+    stateUniqueIds: Map[Long, Array[Array[String]]] = Map.empty) {
   def json: String = Serialization.write(this)(CommitMetadata.format)
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
index 54041abdc9ab4..d25c4be0fb84a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/EventTimeWatermarkExec.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql.execution.streaming
 
+import java.util.UUID
+
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Predicate, SortOrder, UnsafeProjection}
@@ -90,6 +92,7 @@ class EventTimeStatsAccum(protected var currentStats: EventTimeStats = EventTime
  * period. Note that event time is measured in milliseconds.
  */
 case class EventTimeWatermarkExec(
+    nodeId: UUID,
     eventTime: Attribute,
     delay: CalendarInterval,
     child: SparkPlan) extends UnaryExecNode {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ExpiredTimerInfoImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ExpiredTimerInfoImpl.scala
index e0bfc684585df..984d650a27ccc 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ExpiredTimerInfoImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ExpiredTimerInfoImpl.scala
@@ -16,21 +16,15 @@
  */
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.sql.streaming.{ExpiredTimerInfo, TimeMode}
+import org.apache.spark.sql.streaming.ExpiredTimerInfo
 
 /**
  * Class that provides a concrete implementation that can be used to provide access to expired
  * timer's expiry time. These values are only relevant if the ExpiredTimerInfo
  * is valid.
- * @param isValid - boolean to check if the provided ExpiredTimerInfo is valid
  * @param expiryTimeInMsOpt - option to expired timer's expiry time as milliseconds in epoch time
  */
-class ExpiredTimerInfoImpl(
-    isValid: Boolean,
-    expiryTimeInMsOpt: Option[Long] = None,
-    timeMode: TimeMode = TimeMode.None()) extends ExpiredTimerInfo {
-
-  override def isValid(): Boolean = isValid
+class ExpiredTimerInfoImpl(expiryTimeInMsOpt: Option[Long] = None) extends ExpiredTimerInfo {
 
   override def getExpiryTimeInMs(): Long = expiryTimeInMsOpt.getOrElse(-1L)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
index 638da08d0fd9b..d561ee1ef730f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FileStreamSink.scala
@@ -49,8 +49,8 @@ object FileStreamSink extends Logging {
 
     path match {
       case Seq(singlePath) =>
-        val hdfsPath = new Path(singlePath)
         try {
+          val hdfsPath = new Path(singlePath)
           val fs = hdfsPath.getFileSystem(hadoopConf)
           if (fs.getFileStatus(hdfsPath).isDirectory) {
             val metadataPath = getMetadataLogPath(fs, hdfsPath, sqlConf)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
index 3ee1fc1db71f7..941e3a9949cf1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/FlatMapGroupsWithStateExec.scala
@@ -18,8 +18,11 @@ package org.apache.spark.sql.execution.streaming
 
 import java.util.concurrent.TimeUnit.NANOSECONDS
 
+import scala.util.control.NonFatal
+
 import org.apache.hadoop.conf.Configuration
 
+import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -240,6 +243,7 @@ trait FlatMapGroupsWithStateExecBase
             stateManager.stateSchema,
             NoPrefixKeyStateEncoderSpec(groupingAttributes.toStructType),
             stateInfo.get.storeVersion,
+            stateInfo.get.getStateStoreCkptId(partitionId).map(_.head),
             useColumnFamilies = false,
             storeConf, hadoopConfBroadcast.value.value)
           val processor = createInputProcessor(store)
@@ -415,8 +419,13 @@ case class FlatMapGroupsWithStateExec(
   override def right: SparkPlan = initialState
 
   override protected def withNewChildrenInternal(
-      newLeft: SparkPlan, newRight: SparkPlan): FlatMapGroupsWithStateExec =
-    copy(child = newLeft, initialState = newRight)
+      newLeft: SparkPlan, newRight: SparkPlan): FlatMapGroupsWithStateExec = {
+    if (hasInitialState) {
+      copy(child = newLeft, initialState = newRight)
+    } else {
+      copy(child = newLeft)
+    }
+  }
 
   override def createInputProcessor(
       store: StateStore): InputProcessor = new InputProcessor(store) {
@@ -442,10 +451,33 @@ case class FlatMapGroupsWithStateExec(
         hasTimedOut,
         watermarkPresent)
 
-      // Call function, get the returned objects and convert them to rows
-      val mappedIterator = func(keyObj, valueObjIter, groupState).map { obj =>
-        numOutputRows += 1
-        getOutputRow(obj)
+      def withUserFuncExceptionHandling[T](func: => T): T = {
+        try {
+          func
+        } catch {
+          case NonFatal(e) if !e.isInstanceOf[SparkThrowable] =>
+            throw FlatMapGroupsWithStateUserFuncException(e)
+          case f: Throwable =>
+            throw f
+        }
+      }
+
+      val mappedIterator = withUserFuncExceptionHandling {
+        func(keyObj, valueObjIter, groupState).map { obj =>
+          numOutputRows += 1
+          getOutputRow(obj)
+        }
+      }
+
+      // Wrap user-provided fns with error handling
+      val wrappedMappedIterator = new Iterator[InternalRow] {
+        override def hasNext: Boolean = {
+          withUserFuncExceptionHandling(mappedIterator.hasNext)
+        }
+
+        override def next(): InternalRow = {
+          withUserFuncExceptionHandling(mappedIterator.next())
+        }
       }
 
       // When the iterator is consumed, then write changes to state
@@ -467,7 +499,9 @@ case class FlatMapGroupsWithStateExec(
       }
 
       // Return an iterator of rows such that fully consumed, the updated state value will be saved
-      CompletionIterator[InternalRow, Iterator[InternalRow]](mappedIterator, onIteratorCompletion)
+      CompletionIterator[InternalRow, Iterator[InternalRow]](
+        wrappedMappedIterator, onIteratorCompletion
+      )
     }
   }
 }
@@ -539,3 +573,13 @@ object FlatMapGroupsWithStateExec {
     }
   }
 }
+
+
+/**
+ * Exception that wraps the exception thrown in the user provided function in Foreach sink.
+ */
+private[sql] case class FlatMapGroupsWithStateUserFuncException(cause: Throwable)
+  extends SparkException(
+    errorClass = "FLATMAPGROUPSWITHSTATE_USER_FUNCTION_ERROR",
+    messageParameters = Map("reason" -> Option(cause.getMessage).getOrElse("")),
+    cause = cause)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
index cb283699b4e32..d4e93642b8164 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/GroupStateImpl.scala
@@ -218,7 +218,7 @@ private[sql] object GroupStateImpl {
       throw new IllegalArgumentException("eventTimeWatermarkMs must be 0 or positive if present")
     }
     if (hasTimedOut && timeoutConf == NoTimeout) {
-      throw new SparkUnsupportedOperationException("_LEGACY_ERROR_TEMP_3168")
+      throw new SparkUnsupportedOperationException("MISSING_TIMEOUT_CONFIGURATION")
     }
 
     new GroupStateImpl[S](
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
index 722a3bd86b7e1..719c4da14d729 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/IncrementalExecution.scala
@@ -20,6 +20,8 @@ package org.apache.spark.sql.execution.streaming
 import java.util.UUID
 import java.util.concurrent.atomic.AtomicInteger
 
+import scala.collection.mutable.{Map => MutableMap}
+
 import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.{Logging, MDC}
@@ -35,9 +37,9 @@ import org.apache.spark.sql.execution.{LocalLimitExec, QueryExecution, Serialize
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, MergingSessionsExec, ObjectHashAggregateExec, SortAggregateExec, UpdatingSessionsExec}
 import org.apache.spark.sql.execution.datasources.v2.state.metadata.StateMetadataPartitionReader
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeLike
-import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
+import org.apache.spark.sql.execution.python.{FlatMapGroupsInPandasWithStateExec, TransformWithStateInPandasExec}
 import org.apache.spark.sql.execution.streaming.sources.WriteToMicroBatchDataSourceV1
-import org.apache.spark.sql.execution.streaming.state.OperatorStateMetadataWriter
+import org.apache.spark.sql.execution.streaming.state.{OperatorStateMetadataReader, OperatorStateMetadataV1, OperatorStateMetadataV2, OperatorStateMetadataWriter}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.OutputMode
 import org.apache.spark.util.{SerializableConfiguration, Utils}
@@ -45,6 +47,9 @@ import org.apache.spark.util.{SerializableConfiguration, Utils}
 /**
  * A variant of [[QueryExecution]] that allows the execution of the given [[LogicalPlan]]
  * plan incrementally. Possibly preserving state in between each execution.
+ * @param currentStateStoreCkptId checkpoint ID for the latest committed version. It is
+ *                                  operatorID -> array of checkpointIDs. Array index n
+ *                                  represents checkpoint ID for the nth shuffle partition.
  */
 class IncrementalExecution(
     sparkSession: SparkSession,
@@ -57,7 +62,9 @@ class IncrementalExecution(
     val prevOffsetSeqMetadata: Option[OffsetSeqMetadata],
     val offsetSeqMetadata: OffsetSeqMetadata,
     val watermarkPropagator: WatermarkPropagator,
-    val isFirstBatch: Boolean)
+    val isFirstBatch: Boolean,
+    val currentStateStoreCkptId:
+      MutableMap[Long, Array[Array[String]]] = MutableMap[Long, Array[Array[String]]]())
   extends QueryExecution(sparkSession, logicalPlan) with Logging {
 
   // Modified planner with stateful operations.
@@ -76,7 +83,8 @@ class IncrementalExecution(
       StreamingRelationStrategy ::
       StreamingDeduplicationStrategy ::
       StreamingGlobalLimitStrategy(outputMode) ::
-      StreamingTransformWithStateStrategy :: Nil
+      StreamingTransformWithStateStrategy ::
+      TransformWithStateInPandasStrategy :: Nil
   }
 
   private lazy val hadoopConf = sparkSession.sessionState.newHadoopConf()
@@ -125,12 +133,17 @@ class IncrementalExecution(
 
   /** Get the state info of the next stateful operator */
   private def nextStatefulOperationStateInfo(): StatefulOperatorStateInfo = {
-    StatefulOperatorStateInfo(
+    val operatorId = statefulOperatorId.getAndIncrement()
+    // TODO When state store checkpoint format V2 is used, after state store checkpoint ID is
+    // stored to the commit logs, we should assert the ID is not empty if it is not batch 0
+    val ret = StatefulOperatorStateInfo(
       checkpointLocation,
       runId,
-      statefulOperatorId.getAndIncrement(),
+      operatorId,
       currentBatchId,
-      numStateStores)
+      numStateStores,
+      currentStateStoreCkptId.get(operatorId))
+    ret
   }
 
   sealed trait SparkPlanPartialRule {
@@ -208,13 +221,37 @@ class IncrementalExecution(
         }
         val schemaValidationResult = statefulOp.
           validateAndMaybeEvolveStateSchema(hadoopConf, currentBatchId, stateSchemaVersion)
+        val stateSchemaPaths = schemaValidationResult.map(_.schemaPath)
         // write out the state schema paths to the metadata file
         statefulOp match {
-          case stateStoreWriter: StateStoreWriter =>
-            val metadata = stateStoreWriter.operatorStateMetadata()
-            // TODO: [SPARK-48849] Populate metadata with stateSchemaPaths if metadata version is v2
-            val metadataWriter = new OperatorStateMetadataWriter(new Path(
-              checkpointLocation, stateStoreWriter.getStateInfo.operatorId.toString), hadoopConf)
+          case ssw: StateStoreWriter =>
+            val metadata = ssw.operatorStateMetadata(stateSchemaPaths)
+            // validate metadata
+            if (isFirstBatch && currentBatchId != 0) {
+              // If we are restarting from a different checkpoint directory
+              // there may be a mismatch between the stateful operators in the
+              // physical plan and the metadata.
+              val oldMetadata = try {
+                OperatorStateMetadataReader.createReader(
+                  new Path(checkpointLocation, ssw.getStateInfo.operatorId.toString),
+                  hadoopConf, ssw.operatorStateMetadataVersion, currentBatchId - 1).read()
+              } catch {
+                case e: Exception =>
+                  logWarning(log"Error reading metadata path for stateful operator. This " +
+                    log"may due to no prior committed batch, or previously run on lower " +
+                    log"versions: ${MDC(ERROR, e.getMessage)}")
+                None
+              }
+              oldMetadata match {
+                case Some(oldMetadata) => ssw.validateNewMetadata(oldMetadata, metadata)
+                case None =>
+              }
+            }
+            val metadataWriter = OperatorStateMetadataWriter.createWriter(
+                new Path(checkpointLocation, ssw.getStateInfo.operatorId.toString),
+                hadoopConf,
+                ssw.operatorStateMetadataVersion,
+                Some(currentBatchId))
             metadataWriter.write(metadata)
           case _ =>
         }
@@ -302,6 +339,16 @@ class IncrementalExecution(
           hasInitialState = hasInitialState
         )
 
+      case t: TransformWithStateInPandasExec =>
+        val hasInitialState = (currentBatchId == 0L && t.hasInitialState)
+        t.copy(
+          stateInfo = Some(nextStatefulOperationStateInfo()),
+          batchTimestampMs = Some(offsetSeqMetadata.batchTimestampMs),
+          eventTimeWatermarkForLateEvents = None,
+          eventTimeWatermarkForEviction = None,
+          hasInitialState = hasInitialState
+        )
+
       case m: FlatMapGroupsInPandasWithStateExec =>
         m.copy(
           stateInfo = Some(nextStatefulOperationStateInfo()),
@@ -392,6 +439,23 @@ class IncrementalExecution(
               eventTimeWatermarkForEviction = iwEviction)
           ))
 
+      // UpdateEventTimeColumnExec is used to tag the eventTime column, and validate
+      // emitted rows adhere to watermark in the output of transformWithStateInp.
+      // Hence, this node shares the same watermark value as TransformWithStateInPandasExec.
+      // This is the same as above in TransformWithStateExec.
+      // The only difference is TransformWithStateInPandasExec is analysed slightly different
+      // with no SerializeFromObjectExec wrapper.
+      case UpdateEventTimeColumnExec(eventTime, delay, None, t: TransformWithStateInPandasExec)
+        if t.stateInfo.isDefined =>
+        val stateInfo = t.stateInfo.get
+        val iwLateEvents = inputWatermarkForLateEvents(stateInfo)
+        val iwEviction = inputWatermarkForEviction(stateInfo)
+
+        UpdateEventTimeColumnExec(eventTime, delay, iwLateEvents,
+          t.copy(
+            eventTimeWatermarkForLateEvents = iwLateEvents,
+            eventTimeWatermarkForEviction = iwEviction)
+        )
 
       case t: TransformWithStateExec if t.stateInfo.isDefined =>
         t.copy(
@@ -399,6 +463,12 @@ class IncrementalExecution(
           eventTimeWatermarkForEviction = inputWatermarkForEviction(t.stateInfo.get)
         )
 
+      case t: TransformWithStateInPandasExec if t.stateInfo.isDefined =>
+        t.copy(
+          eventTimeWatermarkForLateEvents = inputWatermarkForLateEvents(t.stateInfo.get),
+          eventTimeWatermarkForEviction = inputWatermarkForEviction(t.stateInfo.get)
+        )
+
       case m: FlatMapGroupsInPandasWithStateExec if m.stateInfo.isDefined =>
         m.copy(
           eventTimeWatermarkForLateEvents = inputWatermarkForLateEvents(m.stateInfo.get),
@@ -438,7 +508,9 @@ class IncrementalExecution(
       rulesToCompose.reduceLeft { (ruleA, ruleB) => ruleA orElse ruleB }
     }
 
-    private def checkOperatorValidWithMetadata(planWithStateOpId: SparkPlan): Unit = {
+    private def checkOperatorValidWithMetadata(
+        planWithStateOpId: SparkPlan,
+        batchId: Long): Unit = {
       // get stateful operators for current batch
       val opMapInPhysicalPlan: Map[Long, String] = planWithStateOpId.collect {
         case stateStoreWriter: StateStoreWriter =>
@@ -455,9 +527,13 @@ class IncrementalExecution(
           try {
             val reader = new StateMetadataPartitionReader(
               new Path(checkpointLocation).getParent.toString,
-              new SerializableConfiguration(hadoopConf))
-            ret = reader.stateMetadata.map { metadataTableEntry =>
-              metadataTableEntry.operatorId -> metadataTableEntry.operatorName
+              new SerializableConfiguration(hadoopConf), batchId)
+            val opMetadataList = reader.allOperatorStateMetadata
+            ret = opMetadataList.map {
+              case OperatorStateMetadataV1(operatorInfo, _) =>
+                operatorInfo.operatorId -> operatorInfo.operatorName
+              case OperatorStateMetadataV2(operatorInfo, _, _) =>
+                operatorInfo.operatorId -> operatorInfo.operatorName
             }.toMap
           } catch {
             case e: Exception =>
@@ -489,7 +565,7 @@ class IncrementalExecution(
       // Need to check before write to metadata because we need to detect add operator
       // Only check when streaming is restarting and is first batch
       if (isFirstBatch && currentBatchId != 0) {
-        checkOperatorValidWithMetadata(planWithStateOpId)
+        checkOperatorValidWithMetadata(planWithStateOpId, currentBatchId - 1)
       }
 
       // The rule below doesn't change the plan but can cause the side effect that
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImpl.scala
index 497472ce63676..32683aebd8c18 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImpl.scala
@@ -17,10 +17,11 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore, StateStoreErrors}
 import org.apache.spark.sql.streaming.ListState
+import org.apache.spark.sql.types.StructType
 
 /**
  * Provides concrete implementation for list of values associated with a state variable
@@ -30,14 +31,22 @@ import org.apache.spark.sql.streaming.ListState
  * @param stateName - name of logical state partition
  * @param keyExprEnc - Spark SQL encoder for key
  * @param valEncoder - Spark SQL encoder for value
+ * @param metrics - metrics to be updated as part of stateful processing
  * @tparam S - data type of object that will be stored in the list
  */
 class ListStateImpl[S](
      store: StateStore,
      stateName: String,
      keyExprEnc: ExpressionEncoder[Any],
-     valEncoder: Encoder[S])
-  extends ListState[S] with Logging {
+     valEncoder: ExpressionEncoder[Any],
+     metrics: Map[String, SQLMetric] = Map.empty)
+  extends ListStateMetricsImpl
+  with ListState[S]
+  with Logging {
+
+  override def stateStore: StateStore = store
+  override def baseStateName: String = stateName
+  override def exprEncSchema: StructType = keyExprEnc.schema
 
   private val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder, stateName)
 
@@ -65,7 +74,7 @@ class ListStateImpl[S](
 
        override def next(): S = {
          val valueUnsafeRow = unsafeRowValuesIterator.next()
-         stateTypesEncoder.decodeValue(valueUnsafeRow)
+         stateTypesEncoder.decodeValue(valueUnsafeRow).asInstanceOf[S]
        }
      }
    }
@@ -76,6 +85,8 @@ class ListStateImpl[S](
 
      val encodedKey = stateTypesEncoder.encodeGroupingKey()
      var isFirst = true
+     var entryCount = 0L
+     TWSMetricsUtils.resetMetric(metrics, "numUpdatedStateRows")
 
      newState.foreach { v =>
        val encodedValue = stateTypesEncoder.encodeValue(v)
@@ -83,16 +94,23 @@ class ListStateImpl[S](
          store.put(encodedKey, encodedValue, stateName)
          isFirst = false
        } else {
-          store.merge(encodedKey, encodedValue, stateName)
+         store.merge(encodedKey, encodedValue, stateName)
        }
+       entryCount += 1
+       TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
      }
+     updateEntryCount(encodedKey, entryCount)
    }
 
    /** Append an entry to the list. */
    override def appendValue(newState: S): Unit = {
      StateStoreErrors.requireNonNullStateValue(newState, stateName)
-     store.merge(stateTypesEncoder.encodeGroupingKey(),
+     val encodedKey = stateTypesEncoder.encodeGroupingKey()
+     val entryCount = getEntryCount(encodedKey)
+     store.merge(encodedKey,
          stateTypesEncoder.encodeValue(newState), stateName)
+     TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+     updateEntryCount(encodedKey, entryCount + 1)
    }
 
    /** Append an entire list to the existing value. */
@@ -100,15 +118,23 @@ class ListStateImpl[S](
      validateNewState(newState)
 
      val encodedKey = stateTypesEncoder.encodeGroupingKey()
+     var entryCount = getEntryCount(encodedKey)
      newState.foreach { v =>
        val encodedValue = stateTypesEncoder.encodeValue(v)
        store.merge(encodedKey, encodedValue, stateName)
+       entryCount += 1
+       TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
      }
+     updateEntryCount(encodedKey, entryCount)
    }
 
    /** Remove this state. */
    override def clear(): Unit = {
-     store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
+     val encodedKey = stateTypesEncoder.encodeGroupingKey()
+     store.remove(encodedKey, stateName)
+     val entryCount = getEntryCount(encodedKey)
+     TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", entryCount)
+     removeEntryCount(encodedKey)
    }
 
    private def validateNewState(newState: Array[S]): Unit = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
index faeec7cb93863..4e32b80578155 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateImplWithTTL.scala
@@ -16,9 +16,9 @@
  */
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore, StateStoreErrors}
 import org.apache.spark.sql.streaming.{ListState, TTLConfig}
@@ -34,24 +34,23 @@ import org.apache.spark.util.NextIterator
  * @param valEncoder - Spark SQL encoder for value
  * @param ttlConfig  - TTL configuration for values  stored in this state
  * @param batchTimestampMs - current batch processing timestamp.
+ * @param metrics - metrics to be updated as part of stateful processing
  * @tparam S - data type of object that will be stored
  */
 class ListStateImplWithTTL[S](
     store: StateStore,
     stateName: String,
     keyExprEnc: ExpressionEncoder[Any],
-    valEncoder: Encoder[S],
+    valEncoder: ExpressionEncoder[Any],
     ttlConfig: TTLConfig,
-    batchTimestampMs: Long)
-  extends SingleKeyTTLStateImpl(
-    stateName, store, keyExprEnc, batchTimestampMs) with ListState[S] {
+    batchTimestampMs: Long,
+    metrics: Map[String, SQLMetric])
+  extends OneToManyTTLState(
+    stateName, store, keyExprEnc.schema, ttlConfig, batchTimestampMs, metrics) with ListState[S] {
 
   private lazy val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder,
     stateName, hasTtl = true)
 
-  private lazy val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
-
   initialize()
 
   private def initialize(): Unit = {
@@ -82,7 +81,7 @@ class ListStateImplWithTTL[S](
 
         if (iter.hasNext) {
           val currentRow = iter.next()
-          stateTypesEncoder.decodeValue(currentRow)
+          stateTypesEncoder.decodeValue(currentRow).asInstanceOf[S]
         } else {
           finished = true
           null.asInstanceOf[S]
@@ -98,27 +97,22 @@ class ListStateImplWithTTL[S](
     validateNewState(newState)
 
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    var isFirst = true
-
-    newState.foreach { v =>
-      val encodedValue = stateTypesEncoder.encodeValue(v, ttlExpirationMs)
-      if (isFirst) {
-        store.put(encodedKey, encodedValue, stateName)
-        isFirst = false
-      } else {
-        store.merge(encodedKey, encodedValue, stateName)
-      }
+    val newStateUnsafeRows = newState.iterator.map { v =>
+      stateTypesEncoder.encodeValue(v, ttlExpirationMs)
     }
-    upsertTTLForStateKey(encodedKey)
+
+    updatePrimaryAndSecondaryIndices(true, encodedKey, newStateUnsafeRows, ttlExpirationMs)
   }
 
   /** Append an entry to the list. */
   override def appendValue(newState: S): Unit = {
     StateStoreErrors.requireNonNullStateValue(newState, stateName)
+
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    store.merge(encodedKey,
-      stateTypesEncoder.encodeValue(newState, ttlExpirationMs), stateName)
-    upsertTTLForStateKey(encodedKey)
+    val newStateUnsafeRow = stateTypesEncoder.encodeValue(newState, ttlExpirationMs)
+
+    updatePrimaryAndSecondaryIndices(false, encodedKey,
+      Iterator.single(newStateUnsafeRow), ttlExpirationMs)
   }
 
   /** Append an entire list to the existing value. */
@@ -126,17 +120,21 @@ class ListStateImplWithTTL[S](
     validateNewState(newState)
 
     val encodedKey = stateTypesEncoder.encodeGroupingKey()
-    newState.foreach { v =>
-      val encodedValue = stateTypesEncoder.encodeValue(v, ttlExpirationMs)
-      store.merge(encodedKey, encodedValue, stateName)
+    // The UnsafeRows created here are reused: we do NOT copy them. As a result,
+    // this iterator must only be used lazily, and it should never be materialized,
+    // unless you call newStateUnsafeRows.map(_.copy()).
+    val newStateUnsafeRows = newState.iterator.map { v =>
+      stateTypesEncoder.encodeValue(v, ttlExpirationMs)
     }
-    upsertTTLForStateKey(encodedKey)
+
+    updatePrimaryAndSecondaryIndices(false, encodedKey,
+      newStateUnsafeRows, ttlExpirationMs)
   }
 
   /** Remove this state. */
   override def clear(): Unit = {
-    store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
-    clearTTLState()
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    clearAllStateForElementKey(groupingKey)
   }
 
   private def validateNewState(newState: Array[S]): Unit = {
@@ -151,31 +149,41 @@ class ListStateImplWithTTL[S](
   /**
    * Loops through all the values associated with the grouping key, and removes
    * the expired elements from the list.
-   * @param groupingKey grouping key for which cleanup should be performed.
+   * @param elementKey grouping key for which cleanup should be performed.
    */
-  override def clearIfExpired(groupingKey: UnsafeRow): Long = {
+  override def clearExpiredValues(elementKey: UnsafeRow): ValueExpirationResult = {
     var numValuesExpired = 0L
-    val unsafeRowValuesIterator = store.valuesIterator(groupingKey, stateName)
+    val unsafeRowValuesIterator = store.valuesIterator(elementKey, stateName)
     // We clear the list, and use the iterator to put back all of the non-expired values
-    store.remove(groupingKey, stateName)
+    store.remove(elementKey, stateName)
+
+    var newMinExpirationMsOpt: Option[Long] = None
     var isFirst = true
     unsafeRowValuesIterator.foreach { encodedValue =>
       if (!stateTypesEncoder.isExpired(encodedValue, batchTimestampMs)) {
         if (isFirst) {
-          store.put(groupingKey, encodedValue, stateName)
           isFirst = false
+          store.put(elementKey, encodedValue, stateName)
         } else {
-          store.merge(groupingKey, encodedValue, stateName)
+          store.merge(elementKey, encodedValue, stateName)
+        }
+
+        // If it is not expired, it needs to be reinserted (either via put or merge), but
+        // it also has an expiration time that might be the new minimum.
+        val currentExpirationMs = stateTypesEncoder.decodeTtlExpirationMs(encodedValue)
+
+        newMinExpirationMsOpt = newMinExpirationMsOpt match {
+          case Some(minExpirationMs) =>
+            Some(math.min(minExpirationMs, currentExpirationMs.get))
+          case None =>
+            Some(currentExpirationMs.get)
         }
       } else {
         numValuesExpired += 1
       }
     }
-    numValuesExpired
-  }
 
-  private def upsertTTLForStateKey(encodedGroupingKey: UnsafeRow): Unit = {
-    upsertTTLForStateKey(ttlExpirationMs, encodedGroupingKey)
+    ValueExpirationResult(numValuesExpired, newMinExpirationMsOpt)
   }
 
   /*
@@ -193,7 +201,7 @@ class ListStateImplWithTTL[S](
     val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
     val unsafeRowValuesIterator = store.valuesIterator(encodedGroupingKey, stateName)
     unsafeRowValuesIterator.map { valueUnsafeRow =>
-      stateTypesEncoder.decodeValue(valueUnsafeRow)
+      stateTypesEncoder.decodeValue(valueUnsafeRow).asInstanceOf[S]
     }
   }
 
@@ -204,16 +212,28 @@ class ListStateImplWithTTL[S](
     val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
     val unsafeRowValuesIterator = store.valuesIterator(encodedGroupingKey, stateName)
     unsafeRowValuesIterator.map { valueUnsafeRow =>
-      (stateTypesEncoder.decodeValue(valueUnsafeRow),
+      (stateTypesEncoder.decodeValue(valueUnsafeRow).asInstanceOf[S],
         stateTypesEncoder.decodeTtlExpirationMs(valueUnsafeRow).get)
     }
   }
 
+  private[sql] def getMinValues(): Iterator[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    minIndexIterator()
+      .filter(_._1 == groupingKey)
+      .map(_._2)
+  }
+
   /**
-   * Get all ttl values stored in ttl state for current implicit
-   * grouping key.
+   * Get the TTL value stored in TTL state for the current implicit grouping key,
+   * if it exists.
    */
-  private[sql] def getValuesInTTLState(): Iterator[Long] = {
-    getValuesInTTLState(stateTypesEncoder.encodeGroupingKey())
+  private[sql] def getValueInTTLState(): Option[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    val ttlRowsForGroupingKey = getTTLRows().filter(_.elementKey == groupingKey).toSeq
+
+    assert(ttlRowsForGroupingKey.size <= 1, "Multiple TTLRows found for grouping key " +
+      s"$groupingKey. Expected at most 1. Found: ${ttlRowsForGroupingKey.mkString(", ")}.")
+    ttlRowsForGroupingKey.headOption.map(_.expirationMs)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateMetricsImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateMetricsImpl.scala
new file mode 100644
index 0000000000000..ea43c3f4fcd3b
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ListStateMetricsImpl.scala
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore}
+import org.apache.spark.sql.types._
+
+/**
+ * Trait that provides helper methods to maintain metrics for a list state.
+ * For list state, we keep track of the count of entries in the list in a separate column family
+ * to get an accurate view of the number of entries that are updated/removed from the list and
+ * reported as part of the query progress metrics.
+ */
+trait ListStateMetricsImpl {
+  def stateStore: StateStore
+
+  def baseStateName: String
+
+  def exprEncSchema: StructType
+
+  // We keep track of the count of entries in the list in a separate column family
+  // to avoid scanning the entire list to get the count.
+  private val counterCFValueSchema: StructType =
+    StructType(Seq(StructField("count", LongType, nullable = false)))
+
+  private val counterCFProjection = UnsafeProjection.create(counterCFValueSchema)
+
+  private val updatedCountRow = new GenericInternalRow(1)
+
+  private def getRowCounterCFName(stateName: String) = "$rowCounter_" + stateName
+
+  stateStore.createColFamilyIfAbsent(getRowCounterCFName(baseStateName), exprEncSchema,
+    counterCFValueSchema, NoPrefixKeyStateEncoderSpec(exprEncSchema), isInternal = true)
+
+  /**
+   * Function to get the number of entries in the list state for a given grouping key
+   * @param encodedKey - encoded grouping key
+   * @return - number of entries in the list state
+   */
+  def getEntryCount(encodedKey: UnsafeRow): Long = {
+    val countRow = stateStore.get(encodedKey, getRowCounterCFName(baseStateName))
+    if (countRow != null) {
+      countRow.getLong(0)
+    } else {
+      0L
+    }
+  }
+
+  /**
+   * Function to update the number of entries in the list state for a given grouping key
+   * @param encodedKey - encoded grouping key
+   * @param updatedCount - updated count of entries in the list state
+   */
+  def updateEntryCount(
+      encodedKey: UnsafeRow,
+      updatedCount: Long): Unit = {
+    updatedCountRow.setLong(0, updatedCount)
+    stateStore.put(encodedKey,
+      counterCFProjection(updatedCountRow.asInstanceOf[InternalRow]),
+      getRowCounterCFName(baseStateName))
+  }
+
+  /**
+   * Function to remove the number of entries in the list state for a given grouping key
+   * @param encodedKey - encoded grouping key
+   */
+  def removeEntryCount(encodedKey: UnsafeRow): Unit = {
+    stateStore.remove(encodedKey, getRowCounterCFName(baseStateName))
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImpl.scala
index 2fa6fa415a77b..4e608a5d5dbbe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImpl.scala
@@ -17,19 +17,31 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{PrefixKeyScanStateEncoderSpec, StateStore, StateStoreErrors, UnsafeRowPair}
 import org.apache.spark.sql.streaming.MapState
 import org.apache.spark.sql.types.StructType
 
+/**
+ * Class that provides a concrete implementation for map state associated with state
+ * variables used in the streaming transformWithState operator.
+ * @param store - reference to the StateStore instance to be used for storing state
+ * @param stateName - name of logical state partition
+ * @param keyExprEnc - Spark SQL encoder for key
+ * @param valEncoder - Spark SQL encoder for value
+ * @param metrics - metrics to be updated as part of stateful processing
+ * @tparam K - type of key for map state variable
+ * @tparam V - type of value for map state variable
+ */
 class MapStateImpl[K, V](
     store: StateStore,
     stateName: String,
     keyExprEnc: ExpressionEncoder[Any],
-    userKeyEnc: Encoder[K],
-    valEncoder: Encoder[V]) extends MapState[K, V] with Logging {
+    userKeyEnc: ExpressionEncoder[Any],
+    valEncoder: ExpressionEncoder[Any],
+    metrics: Map[String, SQLMetric] = Map.empty) extends MapState[K, V] with Logging {
 
   // Pack grouping key and user key together as a prefixed composite key
   private val schemaForCompositeKeyRow: StructType = {
@@ -54,7 +66,7 @@ class MapStateImpl[K, V](
     val unsafeRowValue = store.get(encodedCompositeKey, stateName)
 
     if (unsafeRowValue == null) return null.asInstanceOf[V]
-    stateTypesEncoder.decodeValue(unsafeRowValue)
+    stateTypesEncoder.decodeValue(unsafeRowValue).asInstanceOf[V]
   }
 
   /** Check if the user key is contained in the map */
@@ -70,6 +82,7 @@ class MapStateImpl[K, V](
     val encodedValue = stateTypesEncoder.encodeValue(value)
     val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(key)
     store.put(encodedCompositeKey, encodedValue, stateName)
+    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
   }
 
   /** Get the map associated with grouping key */
@@ -78,8 +91,8 @@ class MapStateImpl[K, V](
     store.prefixScan(encodedGroupingKey, stateName)
       .map {
         case iter: UnsafeRowPair =>
-          (stateTypesEncoder.decodeCompositeKey(iter.key),
-            stateTypesEncoder.decodeValue(iter.value))
+          (stateTypesEncoder.decodeCompositeKey(iter.key).asInstanceOf[K],
+            stateTypesEncoder.decodeValue(iter.value).asInstanceOf[V])
       }
   }
 
@@ -98,6 +111,9 @@ class MapStateImpl[K, V](
     StateStoreErrors.requireNonNullStateValue(key, stateName)
     val compositeKey = stateTypesEncoder.encodeCompositeKey(key)
     store.remove(compositeKey, stateName)
+    // Note that for mapState, the rows are flattened. So we count the number of rows removed
+    // proportional to the number of keys in the map per grouping key.
+    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
   }
 
   /** Remove this state. */
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
index a6234636a94f7..64581006555e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MapStateImplWithTTL.scala
@@ -17,9 +17,8 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{PrefixKeyScanStateEncoderSpec, StateStore, StateStoreErrors}
 import org.apache.spark.sql.streaming.{MapState, TTLConfig}
@@ -35,6 +34,7 @@ import org.apache.spark.util.NextIterator
  * @param valEncoder - SQL encoder for state variable
  * @param ttlConfig  - the ttl configuration (time to live duration etc.)
  * @param batchTimestampMs - current batch processing timestamp.
+ * @param metrics - metrics to be updated as part of stateful processing
  * @tparam K - type of key for map state variable
  * @tparam V - type of value for map state variable
  * @return - instance of MapState of type [K,V] that can be used to store state persistently
@@ -43,20 +43,18 @@ class MapStateImplWithTTL[K, V](
     store: StateStore,
     stateName: String,
     keyExprEnc: ExpressionEncoder[Any],
-    userKeyEnc: Encoder[K],
-    valEncoder: Encoder[V],
+    userKeyEnc: ExpressionEncoder[Any],
+    valEncoder: ExpressionEncoder[Any],
     ttlConfig: TTLConfig,
-    batchTimestampMs: Long)
-  extends CompositeKeyTTLStateImpl[K](stateName, store,
-    keyExprEnc, userKeyEnc, batchTimestampMs)
-  with MapState[K, V] with Logging {
+    batchTimestampMs: Long,
+metrics: Map[String, SQLMetric])
+  extends OneToOneTTLState(
+    stateName, store, getCompositeKeySchema(keyExprEnc.schema, userKeyEnc.schema), ttlConfig,
+    batchTimestampMs, metrics) with MapState[K, V] with Logging {
 
   private val stateTypesEncoder = new CompositeKeyStateEncoder(
     keyExprEnc, userKeyEnc, valEncoder, stateName, hasTtl = true)
 
-  private val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
-
   initialize()
 
   private def initialize(): Unit = {
@@ -80,7 +78,7 @@ class MapStateImplWithTTL[K, V](
 
     if (retRow != null) {
       if (!stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        stateTypesEncoder.decodeValue(retRow)
+        stateTypesEncoder.decodeValue(retRow).asInstanceOf[V]
       } else {
         null.asInstanceOf[V]
       }
@@ -100,14 +98,12 @@ class MapStateImplWithTTL[K, V](
     StateStoreErrors.requireNonNullStateValue(key, stateName)
     StateStoreErrors.requireNonNullStateValue(value, stateName)
 
-    val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    val encodedUserKey = stateTypesEncoder.encodeUserKey(key)
-
-    val encodedValue = stateTypesEncoder.encodeValue(value, ttlExpirationMs)
     val encodedCompositeKey = stateTypesEncoder.encodeCompositeKey(key)
-    store.put(encodedCompositeKey, encodedValue, stateName)
+    val ttlExpirationMs = StateTTL
+      .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
+    val encodedValue = stateTypesEncoder.encodeValue(value, ttlExpirationMs)
 
-    upsertTTLForStateKey(ttlExpirationMs, encodedGroupingKey, encodedUserKey)
+    updatePrimaryAndSecondaryIndices(encodedCompositeKey, encodedValue, ttlExpirationMs)
   }
 
   /** Get the map associated with grouping key */
@@ -122,7 +118,9 @@ class MapStateImplWithTTL[K, V](
         if (iter.hasNext) {
           val currentRowPair = iter.next()
           val key = stateTypesEncoder.decodeCompositeKey(currentRowPair.key)
+            .asInstanceOf[K]
           val value = stateTypesEncoder.decodeValue(currentRowPair.value)
+            .asInstanceOf[V]
           (key, value)
         } else {
           finished = true
@@ -149,44 +147,19 @@ class MapStateImplWithTTL[K, V](
     StateStoreErrors.requireNonNullStateValue(key, stateName)
     val compositeKey = stateTypesEncoder.encodeCompositeKey(key)
     store.remove(compositeKey, stateName)
+    // Note that for mapState, the rows are flattened. So we count the number of rows removed
+    // proportional to the number of keys in the map per grouping key.
+    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
   }
 
   /** Remove this state. */
   override def clear(): Unit = {
-    keys().foreach { itr =>
-      removeKey(itr)
-    }
-    clearTTLState()
-  }
-
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   * @param userKey     user key for which cleanup should be performed.
-   */
-  override def clearIfExpired(
-      groupingKeyRow: UnsafeRow,
-      userKeyRow: UnsafeRow): Long = {
-    val compositeKeyRow = stateTypesEncoder.encodeCompositeKey(groupingKeyRow, userKeyRow)
+    val encodedGroupingKey = stateTypesEncoder.encodeGroupingKey()
+    val unsafeRowPairIterator = store.prefixScan(encodedGroupingKey, stateName)
 
-    val retRow = store.get(compositeKeyRow, stateName)
-    var numRemovedElements = 0L
-    if (retRow != null) {
-      if (stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        store.remove(compositeKeyRow, stateName)
-        numRemovedElements += 1
-      }
+    unsafeRowPairIterator.foreach { rowPair =>
+      clearAllStateForElementKey(rowPair.key)
     }
-    numRemovedElements
   }
 
   /*
@@ -205,7 +178,7 @@ class MapStateImplWithTTL[K, V](
     val retRow = store.get(encodedCompositeKey, stateName)
 
     if (retRow != null) {
-      val resState = stateTypesEncoder.decodeValue(retRow)
+      val resState = stateTypesEncoder.decodeValue(retRow).asInstanceOf[V]
       Some(resState)
     } else {
       None
@@ -223,7 +196,9 @@ class MapStateImplWithTTL[K, V](
     // ttlExpiration
     Option(retRow).flatMap { row =>
       val ttlExpiration = stateTypesEncoder.decodeTtlExpirationMs(row)
-      ttlExpiration.map(expiration => (stateTypesEncoder.decodeValue(row), expiration))
+      ttlExpiration.map { expiration =>
+        (stateTypesEncoder.decodeValue(row).asInstanceOf[V], expiration)
+      }
     }
   }
 
@@ -232,30 +207,18 @@ class MapStateImplWithTTL[K, V](
    * grouping key.
    */
   private[sql] def getKeyValuesInTTLState(): Iterator[(K, Long)] = {
-    val ttlIterator = ttlIndexIterator()
     val implicitGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    var nextValue: Option[(K, Long)] = None
-
-    new Iterator[(K, Long)] {
-      override def hasNext: Boolean = {
-        while (nextValue.isEmpty && ttlIterator.hasNext) {
-          val nextTtlValue = ttlIterator.next()
-          val groupingKey = nextTtlValue.groupingKey
-          if (groupingKey equals implicitGroupingKey.getStruct(
-            0, keyExprEnc.schema.length)) {
-            val userKey = stateTypesEncoder.decodeUserKey(
-              nextTtlValue.userKey)
-            nextValue = Some(userKey, nextTtlValue.expirationMs)
-          }
-        }
-        nextValue.isDefined
-      }
-
-      override def next(): (K, Long) = {
-        val result = nextValue.get
-        nextValue = None
-        result
-      }
+      .getStruct(0, keyExprEnc.schema.length)
+
+    // We're getting composite rows back
+    getTTLRows().filter { ttlRow =>
+      val compositeKey = ttlRow.elementKey
+      val groupingKey = compositeKey.getStruct(0, keyExprEnc.schema.length)
+      groupingKey == implicitGroupingKey
+    }.map { ttlRow =>
+      val compositeKey = ttlRow.elementKey
+      val userKey = stateTypesEncoder.decodeCompositeKey(compositeKey)
+      (userKey.asInstanceOf[K], ttlRow.expirationMs)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
index 33591038cbe9c..40d58e5a402a1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/MicroBatchExecution.scala
@@ -24,14 +24,14 @@ import org.apache.spark.internal.{LogKeys, MDC}
 import org.apache.spark.sql.{Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, CurrentBatchTimestamp, CurrentDate, CurrentTimestamp, FileSourceMetadataAttribute, LocalTimestamp}
-import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.catalyst.plans.logical.{LeafNode, LocalRelation, LogicalPlan, Project, StreamSourceAwareLogicalPlan}
 import org.apache.spark.sql.catalyst.streaming.{StreamingRelationV2, WriteToStream}
 import org.apache.spark.sql.catalyst.trees.TreePattern.CURRENT_LIKE
 import org.apache.spark.sql.catalyst.util.truncatedString
 import org.apache.spark.sql.connector.catalog.{SupportsRead, SupportsWrite, TableCapability}
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, Offset => OffsetV2, ReadLimit, SparkDataStream, SupportsAdmissionControl, SupportsTriggerAvailableNow}
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.{SparkPlan, SQLExecution}
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Relation, StreamingDataSourceV2Relation, StreamingDataSourceV2ScanRelation, StreamWriterCommitProgress, WriteToDataSourceV2Exec}
 import org.apache.spark.sql.execution.streaming.sources.{WriteToMicroBatchDataSource, WriteToMicroBatchDataSourceV1}
@@ -130,6 +130,11 @@ class MicroBatchExecution(
 
   protected var watermarkTracker: WatermarkTracker = _
 
+  // Store checkpointIDs for state store checkpoints to be committed or have been committed to
+  // the commit log.
+  // operatorID -> (partitionID -> array of uniqueID)
+  private val currentStateStoreCkptId = MutableMap[Long, Array[Array[String]]]()
+
   override lazy val logicalPlan: LogicalPlan = {
     assert(queryExecutionThread eq Thread.currentThread,
       "logicalPlan must be initialized in QueryExecutionThread " +
@@ -280,10 +285,12 @@ class MicroBatchExecution(
     // intentionally
     state.set(TERMINATED)
     if (queryExecutionThread.isAlive) {
-      sparkSession.sparkContext.cancelJobGroup(runId.toString)
+      sparkSession.sparkContext.cancelJobGroup(runId.toString,
+        s"Query $prettyIdString was stopped")
       interruptAndAwaitExecutionThreadTermination()
       // microBatchThread may spawn new jobs, so we need to cancel again to prevent a leak
-      sparkSession.sparkContext.cancelJobGroup(runId.toString)
+      sparkSession.sparkContext.cancelJobGroup(runId.toString,
+        s"Query $prettyIdString was stopped")
     }
     logInfo(log"Query ${MDC(LogKeys.PRETTY_ID_STRING, prettyIdString)} was stopped")
   }
@@ -475,10 +482,10 @@ class MicroBatchExecution(
 
         // update offset metadata
         nextOffsets.metadata.foreach { metadata =>
-          OffsetSeqMetadata.setSessionConf(metadata, sparkSessionToRunBatches.conf)
+          OffsetSeqMetadata.setSessionConf(metadata, sparkSessionToRunBatches.sessionState.conf)
           execCtx.offsetSeqMetadata = OffsetSeqMetadata(
             metadata.batchWatermarkMs, metadata.batchTimestampMs, sparkSessionToRunBatches.conf)
-          watermarkTracker = WatermarkTracker(sparkSessionToRunBatches.conf)
+          watermarkTracker = WatermarkTracker(sparkSessionToRunBatches.conf, logicalPlan)
           watermarkTracker.setWatermark(metadata.batchWatermarkMs)
         }
 
@@ -532,7 +539,7 @@ class MicroBatchExecution(
       case None => // We are starting this stream for the first time.
         logInfo(s"Starting new streaming query.")
         execCtx.batchId = 0
-        watermarkTracker = WatermarkTracker(sparkSessionToRunBatches.conf)
+        watermarkTracker = WatermarkTracker(sparkSessionToRunBatches.conf, logicalPlan)
     }
   }
 
@@ -762,21 +769,25 @@ class MicroBatchExecution(
               }
               newRelation
           }
+          val finalDataPlanWithStream = finalDataPlan transformUp {
+            case l: StreamSourceAwareLogicalPlan => l.withStream(source)
+          }
           // SPARK-40460: overwrite the entry with the new logicalPlan
           // because it might contain the _metadata column. It is a necessary change,
           // in the ProgressReporter, we use the following mapping to get correct streaming metrics:
           // streaming logical plan (with sources) <==> trigger's logical plan <==> executed plan
-          mutableNewData.put(source, finalDataPlan)
+          mutableNewData.put(source, finalDataPlanWithStream)
           val maxFields = SQLConf.get.maxToStringFields
-          assert(output.size == finalDataPlan.output.size,
+          assert(output.size == finalDataPlanWithStream.output.size,
             s"Invalid batch: ${truncatedString(output, ",", maxFields)} != " +
-              s"${truncatedString(finalDataPlan.output, ",", maxFields)}")
+              s"${truncatedString(finalDataPlanWithStream.output, ",", maxFields)}")
 
-          val aliases = output.zip(finalDataPlan.output).map { case (to, from) =>
+          val aliases = output.zip(finalDataPlanWithStream.output).map { case (to, from) =>
             Alias(from, to.name)(exprId = to.exprId, explicitMetadata = Some(from.metadata))
           }
-          Project(aliases, finalDataPlan)
+          Project(aliases, finalDataPlanWithStream)
         }.getOrElse {
+          // Don't track the source node which is known to produce zero rows.
           LocalRelation(output, isStreaming = true)
         }
 
@@ -786,6 +797,7 @@ class MicroBatchExecution(
           case OffsetHolder(start, end) =>
             r.copy(startOffset = Some(start), endOffset = Some(end))
         }.getOrElse {
+          // Don't track the source node which is known to produce zero rows.
           LocalRelation(r.output, isStreaming = true)
         }
     }
@@ -834,12 +846,17 @@ class MicroBatchExecution(
         offsetLog.offsetSeqMetadataForBatchId(execCtx.batchId - 1),
         execCtx.offsetSeqMetadata,
         watermarkPropagator,
-        execCtx.previousContext.isEmpty)
+        execCtx.previousContext.isEmpty,
+        currentStateStoreCkptId)
       execCtx.executionPlan.executedPlan // Force the lazy generation of execution plan
     }
 
     markMicroBatchExecutionStart(execCtx)
 
+    if (execCtx.previousContext.isEmpty) {
+      purgeStatefulMetadataAsync(execCtx.executionPlan.executedPlan)
+    }
+
     val nextBatch =
       new Dataset(execCtx.executionPlan, ExpressionEncoder(execCtx.executionPlan.analyzed.schema))
 
@@ -894,12 +911,59 @@ class MicroBatchExecution(
    */
   protected def markMicroBatchExecutionStart(execCtx: MicroBatchExecutionContext): Unit = {}
 
+  /**
+   * Store the state store checkpoint id for a finishing batch to `currentStateStoreCkptId`,
+   * which will be retrieved later when the next batch starts.
+   */
+  private def updateStateStoreCkptIdForOperator(
+      execCtx: MicroBatchExecutionContext,
+      opId: Long,
+      checkpointInfo: Array[StatefulOpStateStoreCheckpointInfo]): Unit = {
+    // TODO validate baseStateStoreCkptId
+    checkpointInfo.map(_.batchVersion).foreach { v =>
+      assert(
+        execCtx.batchId == -1 || v == execCtx.batchId + 1,
+        s"Batch version ${execCtx.batchId} should generate state store checkpoint " +
+          s"version ${execCtx.batchId + 1} but we see ${v}")
+    }
+    val ckptIds = checkpointInfo.map { info =>
+      assert(info.stateStoreCkptId.isDefined)
+      info.stateStoreCkptId.get
+    }
+    currentStateStoreCkptId.put(opId, ckptIds)
+  }
+
+  /**
+   * Walk the query plan `latestExecPlan` to find out a StateStoreWriter operator. Retrieve
+   * the state store checkpoint id from the operator and update it to `currentStateStoreCkptId`.
+   * @param execCtx information is needed to do some validation.
+   * @param latestExecPlan the query plan that contains stateful operators where we would
+   *                       extract the state store checkpoint id.
+   */
+  private def updateStateStoreCkptId(
+      execCtx: MicroBatchExecutionContext,
+      latestExecPlan: SparkPlan): Unit = {
+    latestExecPlan.collect {
+      case e: StateStoreWriter =>
+        assert(e.stateInfo.isDefined, "StateInfo should not be empty in StateStoreWriter")
+        updateStateStoreCkptIdForOperator(
+          execCtx,
+          e.stateInfo.get.operatorId,
+          e.getStateStoreCheckpointInfo())
+    }
+  }
+
   /**
    * Called after the microbatch has completed execution. It takes care of committing the offset
    * to commit log and other bookkeeping.
    */
   protected def markMicroBatchEnd(execCtx: MicroBatchExecutionContext): Unit = {
-    watermarkTracker.updateWatermark(execCtx.executionPlan.executedPlan)
+    val latestExecPlan = execCtx.executionPlan.executedPlan
+    watermarkTracker.updateWatermark(latestExecPlan)
+    if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(
+      sparkSessionForStream.sessionState.conf)) {
+      updateStateStoreCkptId(execCtx, latestExecPlan)
+    }
     execCtx.reportTimeTaken("commitOffsets") {
       if (!commitLog.add(execCtx.batchId, CommitMetadata(watermarkTracker.currentWatermark))) {
         throw QueryExecutionErrors.concurrentStreamLogUpdate(execCtx.batchId)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
index f0be33ad9a9d8..e1e5b3a7ef88e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/OffsetSeq.scala
@@ -26,6 +26,7 @@ import org.apache.spark.io.CompressionCodec
 import org.apache.spark.sql.RuntimeConfig
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, SparkDataStream}
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, StreamingAggregationStateManager, SymmetricHashJoinStateManager}
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf._
 
 
@@ -100,7 +101,9 @@ object OffsetSeqMetadata extends Logging {
     SHUFFLE_PARTITIONS, STATE_STORE_PROVIDER_CLASS, STREAMING_MULTIPLE_WATERMARK_POLICY,
     FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION, STREAMING_AGGREGATION_STATE_FORMAT_VERSION,
     STREAMING_JOIN_STATE_FORMAT_VERSION, STATE_STORE_COMPRESSION_CODEC,
-    STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION)
+    STATE_STORE_ROCKSDB_FORMAT_VERSION, STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION,
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN
+  )
 
   /**
    * Default values of relevant configurations that are used for backward compatibility.
@@ -121,7 +124,8 @@ object OffsetSeqMetadata extends Logging {
     STREAMING_JOIN_STATE_FORMAT_VERSION.key ->
       SymmetricHashJoinStateManager.legacyVersion.toString,
     STATE_STORE_COMPRESSION_CODEC.key -> CompressionCodec.LZ4,
-    STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false"
+    STATEFUL_OPERATOR_USE_STRICT_DISTRIBUTION.key -> "false",
+    PRUNE_FILTERS_CAN_PRUNE_STREAMING_SUBPLAN.key -> "true"
   )
 
   def apply(json: String): OffsetSeqMetadata = Serialization.read[OffsetSeqMetadata](json)
@@ -135,20 +139,21 @@ object OffsetSeqMetadata extends Logging {
   }
 
   /** Set the SparkSession configuration with the values in the metadata */
-  def setSessionConf(metadata: OffsetSeqMetadata, sessionConf: RuntimeConfig): Unit = {
+  def setSessionConf(metadata: OffsetSeqMetadata, sessionConf: SQLConf): Unit = {
+    val configs = sessionConf.getAllConfs
     OffsetSeqMetadata.relevantSQLConfs.map(_.key).foreach { confKey =>
 
       metadata.conf.get(confKey) match {
 
         case Some(valueInMetadata) =>
           // Config value exists in the metadata, update the session config with this value
-          val optionalValueInSession = sessionConf.getOption(confKey)
-          if (optionalValueInSession.isDefined && optionalValueInSession.get != valueInMetadata) {
+          val optionalValueInSession = sessionConf.getConfString(confKey, null)
+          if (optionalValueInSession != null && optionalValueInSession != valueInMetadata) {
             logWarning(log"Updating the value of conf '${MDC(CONFIG, confKey)}' in current " +
-              log"session from '${MDC(OLD_VALUE, optionalValueInSession.get)}' " +
+              log"session from '${MDC(OLD_VALUE, optionalValueInSession)}' " +
               log"to '${MDC(NEW_VALUE, valueInMetadata)}'.")
           }
-          sessionConf.set(confKey, valueInMetadata)
+          sessionConf.setConfString(confKey, valueInMetadata)
 
         case None =>
           // For backward compatibility, if a config was not recorded in the offset log,
@@ -157,14 +162,17 @@ object OffsetSeqMetadata extends Logging {
           relevantSQLConfDefaultValues.get(confKey) match {
 
             case Some(defaultValue) =>
-              sessionConf.set(confKey, defaultValue)
+              sessionConf.setConfString(confKey, defaultValue)
               logWarning(log"Conf '${MDC(CONFIG, confKey)}' was not found in the offset log, " +
                 log"using default value '${MDC(DEFAULT_VALUE, defaultValue)}'")
 
             case None =>
-              val valueStr = sessionConf.getOption(confKey).map { v =>
-                s" Using existing session conf value '$v'."
-              }.getOrElse { " No value set in session conf." }
+              val value = sessionConf.getConfString(confKey, null)
+              val valueStr = if (value != null) {
+                s" Using existing session conf value '$value'."
+              } else {
+                " No value set in session conf."
+              }
               logWarning(log"Conf '${MDC(CONFIG, confKey)}' was not found in the offset log. " +
                 log"${MDC(TIP, valueStr)}")
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
index c440ec451b724..fdb4f2813dba2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ProgressReporter.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.util.DateTimeConstants.MILLIS_PER_SECOND
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.Table
 import org.apache.spark.sql.connector.read.streaming.{MicroBatchStream, ReportsSinkMetrics, ReportsSourceMetrics, SparkDataStream}
-import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.execution.{QueryExecution, StreamSourceAwareSparkPlan}
 import org.apache.spark.sql.execution.datasources.v2.{MicroBatchScanExec, StreamingDataSourceV2ScanRelation, StreamWriterCommitProgress}
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.streaming.StreamingQueryListener.{QueryIdleEvent, QueryProgressEvent}
@@ -401,7 +401,6 @@ abstract class ProgressContext(
     }
   }
 
-  /** Extract number of input sources for each streaming source in plan */
   private def extractSourceToNumInputRows(
       lastExecution: IncrementalExecution): Map[SparkDataStream, Long] = {
 
@@ -409,6 +408,41 @@ abstract class ProgressContext(
       tuples.groupBy(_._1).transform((_, v) => v.map(_._2).sum) // sum up rows for each source
     }
 
+    val sources = newData.keys.toSet
+
+    val sourceToInputRowsTuples = lastExecution.executedPlan
+      .collect {
+        case node: StreamSourceAwareSparkPlan if node.getStream.isDefined =>
+          val numRows = node.metrics.get("numOutputRows").map(_.value).getOrElse(0L)
+          node.getStream.get -> numRows
+      }
+
+    val capturedSources = sourceToInputRowsTuples.map(_._1).toSet
+
+    if (sources == capturedSources) {
+      logDebug("Source -> # input rows\n\t" + sourceToInputRowsTuples.mkString("\n\t"))
+      sumRows(sourceToInputRowsTuples)
+    } else {
+      // Falling back to the legacy approach to avoid any regression.
+      val inputRows = legacyExtractSourceToNumInputRows(lastExecution)
+      // If the legacy approach fails to extract the input rows, we just pick the new approach
+      // as it is more likely that the source nodes have been pruned in valid reason.
+      if (inputRows.isEmpty) {
+        sumRows(sourceToInputRowsTuples)
+      } else {
+        inputRows
+      }
+    }
+  }
+
+  /** Extract number of input sources for each streaming source in plan */
+  private def legacyExtractSourceToNumInputRows(
+      lastExecution: IncrementalExecution): Map[SparkDataStream, Long] = {
+
+    def sumRows(tuples: Seq[(SparkDataStream, Long)]): Map[SparkDataStream, Long] = {
+      tuples.groupBy(_._1).transform((_, v) => v.map(_._2).sum) // sum up rows for each source
+    }
+
     def unrollCTE(plan: LogicalPlan): LogicalPlan = {
       val containsCTE = plan.exists {
         case _: WithCTE => true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
index 99229c6132eb2..585298fa4c993 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateStoreColumnFamilySchemaUtils.scala
@@ -20,9 +20,49 @@ import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, PrefixKeyScanStateEncoderSpec, StateStoreColFamilySchema}
+import org.apache.spark.sql.types._
 
 object StateStoreColumnFamilySchemaUtils {
 
+  /**
+   * Avro uses zig-zag encoding for some fixed-length types, like Longs and Ints. For range scans
+   * we want to use big-endian encoding, so we need to convert the source schema to replace these
+   * types with BinaryType.
+   *
+   * @param schema The schema to convert
+   * @param ordinals If non-empty, only convert fields at these ordinals.
+   *                 If empty, convert all fields.
+   */
+  def convertForRangeScan(schema: StructType, ordinals: Seq[Int] = Seq.empty): StructType = {
+    val ordinalSet = ordinals.toSet
+
+    StructType(schema.fields.zipWithIndex.flatMap { case (field, idx) =>
+      if ((ordinals.isEmpty || ordinalSet.contains(idx)) && isFixedSize(field.dataType)) {
+        // For each numeric field, create two fields:
+        // 1. Byte marker for null, positive, or negative values
+        // 2. The original numeric value in big-endian format
+        // Byte type is converted to Int in Avro, which doesn't work for us as Avro
+        // uses zig-zag encoding as opposed to big-endian for Ints
+        Seq(
+          StructField(s"${field.name}_marker", BinaryType, nullable = false),
+          field.copy(name = s"${field.name}_value", BinaryType)
+        )
+      } else {
+        Seq(field)
+      }
+    })
+  }
+
+  private def isFixedSize(dataType: DataType): Boolean = dataType match {
+    case _: ByteType | _: BooleanType | _: ShortType | _: IntegerType | _: LongType |
+         _: FloatType | _: DoubleType => true
+    case _ => false
+  }
+
+  def getTtlColFamilyName(stateName: String): String = {
+    "$ttl_" + stateName
+  }
+
   def getValueStateSchema[T](
       stateName: String,
       keyEncoder: ExpressionEncoder[Any],
@@ -61,4 +101,15 @@ object StateStoreColumnFamilySchemaUtils {
       Some(PrefixKeyScanStateEncoderSpec(compositeKeySchema, 1)),
       Some(userKeyEnc.schema))
   }
+
+  def getTimerStateSchema(
+      stateName: String,
+      keySchema: StructType,
+      valSchema: StructType): StateStoreColFamilySchema = {
+    StateStoreColFamilySchema(
+      stateName,
+      keySchema,
+      valSchema,
+      Some(PrefixKeyScanStateEncoderSpec(keySchema, 1)))
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
index 1f5ad2fc85470..a2b7ee4ba7916 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StateTypesEncoderUtils.scala
@@ -30,6 +30,11 @@ import org.apache.spark.sql.types._
  * files and to be passed into `RocksDBStateKey(/Value)Encoder`.
  */
 object TransformWithStateKeyValueRowSchemaUtils {
+  /**
+   * Creates a schema that is the concatenation of the grouping key and a user-defined
+   * key. This is used by MapState to create a composite key that is then treated as
+   * an "elementKey" by OneToOneTTLState.
+   */
   def getCompositeKeySchema(
       groupingKeySchema: StructType,
       userKeySchema: StructType): StructType = {
@@ -38,24 +43,37 @@ object TransformWithStateKeyValueRowSchemaUtils {
       .add("userKey", new StructType(userKeySchema.fields))
   }
 
-  def getSingleKeyTTLRowSchema(keySchema: StructType): StructType =
+  /**
+   * Represents the schema of keys in the TTL index, managed by TTLState implementations.
+   * There is no value associated with entries in the TTL index, so there is no method
+   * called, for example, getTTLValueSchema.
+   */
+  def getTTLRowKeySchema(keySchema: StructType): StructType =
     new StructType()
       .add("expirationMs", LongType)
-      .add("groupingKey", keySchema)
+      .add("elementKey", keySchema)
 
-  def getCompositeKeyTTLRowSchema(
-      groupingKeySchema: StructType,
-      userKeySchema: StructType): StructType =
+  /**
+   * Represents the schema of a single long value, which is used to store the expiration
+   * timestamp of elements in the minimum index, managed by OneToManyTTLState.
+   */
+  def getExpirationMsRowSchema(): StructType =
     new StructType()
       .add("expirationMs", LongType)
-      .add("groupingKey", new StructType(groupingKeySchema.fields))
-      .add("userKey", new StructType(userKeySchema.fields))
 
+  /**
+   * Represents the schema of an element with TTL in the primary index. We store the expiration
+   * of each value along with the value itself, since each value has its own TTL. It is used as
+   * the value schema of every value, for every stateful variable.
+   */
   def getValueSchemaWithTTL(schema: StructType, hasTTL: Boolean): StructType = {
     if (hasTTL) {
-      new StructType().add("value", schema)
+      new StructType()
+        .add("value", schema)
         .add("ttlExpirationMs", LongType)
-    } else schema
+    } else {
+      schema
+    }
   }
 }
 
@@ -118,7 +136,9 @@ class StateTypesEncoder[V](
   def decodeValue(row: UnsafeRow): V = {
     if (hasTtl) {
       rowToObjDeserializer.apply(row.getStruct(0, valEncoder.schema.length))
-    } else rowToObjDeserializer.apply(row)
+    } else {
+      rowToObjDeserializer.apply(row)
+    }
   }
 
   /**
@@ -225,10 +245,6 @@ class CompositeKeyStateEncoder[K, V](
     compositeKeyProjection(InternalRow(groupingKey, userKey))
   }
 
-  def decodeUserKey(row: UnsafeRow): K = {
-    userKeyRowToObjDeserializer.apply(row)
-  }
-
   /**
    * The input row is of composite Key schema.
    * Only user key is returned though grouping key also exist in the row.
@@ -239,37 +255,14 @@ class CompositeKeyStateEncoder[K, V](
 }
 
 /** Class for TTL with single key serialization */
-class SingleKeyTTLEncoder(
-    keyExprEnc: ExpressionEncoder[Any]) {
+class TTLEncoder(schema: StructType) {
 
-  private val ttlKeyProjection = UnsafeProjection.create(
-    getSingleKeyTTLRowSchema(keyExprEnc.schema))
+  private val ttlKeyProjection = UnsafeProjection.create(getTTLRowKeySchema(schema))
 
-  def encodeTTLRow(expirationMs: Long, groupingKey: UnsafeRow): UnsafeRow = {
+  // Take a groupingKey UnsafeRow and turn it into a (expirationMs, groupingKey) UnsafeRow.
+  def encodeTTLRow(expirationMs: Long, elementKey: UnsafeRow): UnsafeRow = {
     ttlKeyProjection.apply(
-      InternalRow(expirationMs, groupingKey.asInstanceOf[InternalRow]))
-  }
-}
-
-/** Class for TTL with composite key serialization */
-class CompositeKeyTTLEncoder[K](
-    keyExprEnc: ExpressionEncoder[Any],
-    userKeyEnc: Encoder[K]) {
-
-  private val ttlKeyProjection = UnsafeProjection.create(
-    getCompositeKeyTTLRowSchema(keyExprEnc.schema, userKeyEnc.schema))
-
-  def encodeTTLRow(
-      expirationMs: Long,
-      groupingKey: UnsafeRow,
-      userKey: UnsafeRow): UnsafeRow = {
-    ttlKeyProjection.apply(
-      InternalRow(
-        expirationMs,
-        groupingKey.getStruct(0, keyExprEnc.schema.length)
-          .asInstanceOf[InternalRow],
-        userKey.getStruct(0, userKeyEnc.schema.length)
-          .asInstanceOf[InternalRow]))
+      InternalRow(expirationMs, elementKey.asInstanceOf[InternalRow]))
   }
 }
 
@@ -288,6 +281,9 @@ class TimerKeyEncoder(keyExprEnc: ExpressionEncoder[Any]) {
     .add("key", new StructType(keyExprEnc.schema.fields))
     .add("expiryTimestampMs", LongType, nullable = false)
 
+  val schemaForValueRow: StructType =
+    StructType(Array(StructField("__dummy__", NullType)))
+
   private val keySerializer = keyExprEnc.createSerializer()
   private val keyDeserializer = keyExprEnc.resolveAndBind().createDeserializer()
   private val prefixKeyProjection = UnsafeProjection.create(schemaForPrefixKey)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
index 44e2e6838f4a3..5d13af0af7c43 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StatefulProcessorHandleImpl.scala
@@ -24,7 +24,7 @@ import scala.collection.mutable
 import org.apache.spark.TaskContext
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Encoder
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StatefulProcessorHandleState.PRE_INIT
 import org.apache.spark.sql.execution.streaming.state._
@@ -45,6 +45,24 @@ object ImplicitGroupingKeyTracker {
   def removeImplicitKey(): Unit = implicitKey.remove()
 }
 
+/**
+ * Utility object to perform metrics updates
+ */
+object TWSMetricsUtils {
+  def resetMetric(
+      metrics: Map[String, SQLMetric],
+      metricName: String): Unit = {
+    metrics.get(metricName).foreach(_.reset())
+  }
+
+  def incrementMetric(
+      metrics: Map[String, SQLMetric],
+      metricName: String,
+      countValue: Long = 1L): Unit = {
+    metrics.get(metricName).foreach(_.add(countValue))
+  }
+}
+
 /**
  * Enum used to track valid states for the StatefulProcessorHandle
  */
@@ -117,34 +135,6 @@ class StatefulProcessorHandleImpl(
 
   private lazy val currQueryInfo: QueryInfo = buildQueryInfo()
 
-  private def incrementMetric(metricName: String): Unit = {
-    metrics.get(metricName).foreach(_.add(1))
-  }
-
-  override def getValueState[T](
-      stateName: String,
-      valEncoder: Encoder[T]): ValueState[T] = {
-    verifyStateVarOperations("get_value_state", CREATED)
-    incrementMetric("numValueStateVars")
-    val resultState = new ValueStateImpl[T](store, stateName, keyEncoder, valEncoder)
-    resultState
-  }
-
-  override def getValueState[T](
-      stateName: String,
-      valEncoder: Encoder[T],
-      ttlConfig: TTLConfig): ValueState[T] = {
-    verifyStateVarOperations("get_value_state", CREATED)
-    validateTTLConfig(ttlConfig, stateName)
-
-    assert(batchTimestampMs.isDefined)
-    val valueStateWithTTL = new ValueStateImplWithTTL[T](store, stateName,
-      keyEncoder, valEncoder, ttlConfig, batchTimestampMs.get)
-    incrementMetric("numValueStateWithTTLVars")
-    ttlStates.add(valueStateWithTTL)
-    valueStateWithTTL
-  }
-
   override def getQueryInfo(): QueryInfo = currQueryInfo
 
   private lazy val timerState = new TimerStateImpl(store, timeMode, keyEncoder)
@@ -155,8 +145,8 @@ class StatefulProcessorHandleImpl(
    */
   override def registerTimer(expiryTimestampMs: Long): Unit = {
     verifyTimerOperations("register_timer")
-    incrementMetric("numRegisteredTimers")
     timerState.registerTimer(expiryTimestampMs)
+    TWSMetricsUtils.incrementMetric(metrics, "numRegisteredTimers")
   }
 
   /**
@@ -165,8 +155,8 @@ class StatefulProcessorHandleImpl(
    */
   override def deleteTimer(expiryTimestampMs: Long): Unit = {
     verifyTimerOperations("delete_timer")
-    incrementMetric("numDeletedTimers")
     timerState.deleteTimer(expiryTimestampMs)
+    TWSMetricsUtils.incrementMetric(metrics, "numDeletedTimers")
   }
 
   /**
@@ -199,7 +189,7 @@ class StatefulProcessorHandleImpl(
   def doTtlCleanup(): Unit = {
     val numValuesRemovedDueToTTLExpiry = metrics.get("numValuesRemovedDueToTTLExpiry").get
     ttlStates.forEach { s =>
-      numValuesRemovedDueToTTLExpiry += s.clearExpiredState()
+      numValuesRemovedDueToTTLExpiry += s.clearExpiredStateForAllKeys()
     }
   }
 
@@ -211,15 +201,43 @@ class StatefulProcessorHandleImpl(
   override def deleteIfExists(stateName: String): Unit = {
     verifyStateVarOperations("delete_if_exists", CREATED)
     if (store.removeColFamilyIfExists(stateName)) {
-      incrementMetric("numDeletedStateVars")
+      TWSMetricsUtils.incrementMetric(metrics, "numDeletedStateVars")
     }
   }
 
-  override def getListState[T](stateName: String, valEncoder: Encoder[T]): ListState[T] = {
-    verifyStateVarOperations("get_list_state", CREATED)
-    incrementMetric("numListStateVars")
-    val resultState = new ListStateImpl[T](store, stateName, keyEncoder, valEncoder)
-    resultState
+  override def getValueState[T](
+      stateName: String,
+      valEncoder: Encoder[T],
+      ttlConfig: TTLConfig): ValueState[T] = {
+    getValueState(stateName, ttlConfig)(valEncoder)
+  }
+
+  override def getValueState[T: Encoder](
+      stateName: String,
+      ttlConfig: TTLConfig): ValueState[T] = {
+    verifyStateVarOperations("get_value_state", CREATED)
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
+
+    val stateEncoder = encoderFor[T].asInstanceOf[ExpressionEncoder[Any]]
+    val result = if (ttlEnabled) {
+      validateTTLConfig(ttlConfig, stateName)
+      assert(batchTimestampMs.isDefined)
+      val valueStateWithTTL = new ValueStateImplWithTTL[T](store, stateName,
+        keyEncoder, stateEncoder, ttlConfig, batchTimestampMs.get, metrics)
+      ttlStates.add(valueStateWithTTL)
+      TWSMetricsUtils.incrementMetric(metrics, "numValueStateWithTTLVars")
+      valueStateWithTTL
+    } else {
+      val valueStateWithoutTTL = new ValueStateImpl[T](store, stateName,
+        keyEncoder, stateEncoder, metrics)
+      TWSMetricsUtils.incrementMetric(metrics, "numValueStateVars")
+      valueStateWithoutTTL
+    }
+    result
   }
 
   /**
@@ -241,44 +259,72 @@ class StatefulProcessorHandleImpl(
       stateName: String,
       valEncoder: Encoder[T],
       ttlConfig: TTLConfig): ListState[T] = {
+    getListState(stateName, ttlConfig)(valEncoder)
+  }
 
+  override def getListState[T: Encoder](stateName: String, ttlConfig: TTLConfig): ListState[T] = {
     verifyStateVarOperations("get_list_state", CREATED)
-    validateTTLConfig(ttlConfig, stateName)
 
-    assert(batchTimestampMs.isDefined)
-    val listStateWithTTL = new ListStateImplWithTTL[T](store, stateName,
-      keyEncoder, valEncoder, ttlConfig, batchTimestampMs.get)
-    incrementMetric("numListStateWithTTLVars")
-    ttlStates.add(listStateWithTTL)
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
 
-    listStateWithTTL
+    val stateEncoder = encoderFor[T].asInstanceOf[ExpressionEncoder[Any]]
+    val result = if (ttlEnabled) {
+      validateTTLConfig(ttlConfig, stateName)
+      assert(batchTimestampMs.isDefined)
+      val listStateWithTTL = new ListStateImplWithTTL[T](store, stateName,
+        keyEncoder, stateEncoder, ttlConfig, batchTimestampMs.get, metrics)
+      TWSMetricsUtils.incrementMetric(metrics, "numListStateWithTTLVars")
+      ttlStates.add(listStateWithTTL)
+      listStateWithTTL
+    } else {
+      val listStateWithoutTTL = new ListStateImpl[T](store, stateName, keyEncoder,
+        stateEncoder, metrics)
+      TWSMetricsUtils.incrementMetric(metrics, "numListStateVars")
+      listStateWithoutTTL
+    }
+    result
   }
 
   override def getMapState[K, V](
       stateName: String,
       userKeyEnc: Encoder[K],
-      valEncoder: Encoder[V]): MapState[K, V] = {
-    verifyStateVarOperations("get_map_state", CREATED)
-    incrementMetric("numMapStateVars")
-    val resultState = new MapStateImpl[K, V](store, stateName, keyEncoder, userKeyEnc, valEncoder)
-    resultState
+      valEncoder: Encoder[V],
+      ttlConfig: TTLConfig): MapState[K, V] = {
+    getMapState(stateName, ttlConfig)(userKeyEnc, valEncoder)
   }
 
-  override def getMapState[K, V](
+  override def getMapState[K: Encoder, V: Encoder](
       stateName: String,
-      userKeyEnc: Encoder[K],
-      valEncoder: Encoder[V],
       ttlConfig: TTLConfig): MapState[K, V] = {
     verifyStateVarOperations("get_map_state", CREATED)
-    validateTTLConfig(ttlConfig, stateName)
 
-    assert(batchTimestampMs.isDefined)
-    val mapStateWithTTL = new MapStateImplWithTTL[K, V](store, stateName, keyEncoder, userKeyEnc,
-      valEncoder, ttlConfig, batchTimestampMs.get)
-    incrementMetric("numMapStateWithTTLVars")
-    ttlStates.add(mapStateWithTTL)
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
 
-    mapStateWithTTL
+    val userKeyEnc = encoderFor[K].asInstanceOf[ExpressionEncoder[Any]]
+    val valEncoder = encoderFor[V].asInstanceOf[ExpressionEncoder[Any]]
+    val result = if (ttlEnabled) {
+      validateTTLConfig(ttlConfig, stateName)
+      assert(batchTimestampMs.isDefined)
+      val mapStateWithTTL = new MapStateImplWithTTL[K, V](store, stateName, keyEncoder, userKeyEnc,
+        valEncoder, ttlConfig, batchTimestampMs.get, metrics)
+      TWSMetricsUtils.incrementMetric(metrics, "numMapStateWithTTLVars")
+      ttlStates.add(mapStateWithTTL)
+      mapStateWithTTL
+    } else {
+      val mapStateWithoutTTL = new MapStateImpl[K, V](store, stateName, keyEncoder,
+        userKeyEnc, valEncoder, metrics)
+      TWSMetricsUtils.incrementMetric(metrics, "numMapStateVars")
+      mapStateWithoutTTL
+    }
+    result
   }
 
   private def validateTTLConfig(ttlConfig: TTLConfig, stateName: String): Unit = {
@@ -301,70 +347,122 @@ class DriverStatefulProcessorHandleImpl(timeMode: TimeMode, keyExprEnc: Expressi
   extends StatefulProcessorHandleImplBase(timeMode, keyExprEnc) {
 
   // Because this is only happening on the driver side, there is only
-  // one task modifying and accessing this map at a time
+  // one task modifying and accessing these maps at a time
   private[sql] val columnFamilySchemas: mutable.Map[String, StateStoreColFamilySchema] =
     new mutable.HashMap[String, StateStoreColFamilySchema]()
 
+  private val stateVariableInfos: mutable.Map[String, TransformWithStateVariableInfo] =
+    new mutable.HashMap[String, TransformWithStateVariableInfo]()
+
+  // If timeMode is not None, add a timer column family schema to the operator metadata so that
+  // registered timers can be read using the state data source reader.
+  if (timeMode != TimeMode.None()) {
+    addTimerColFamily()
+  }
+
   def getColumnFamilySchemas: Map[String, StateStoreColFamilySchema] = columnFamilySchemas.toMap
 
-  override def getValueState[T](stateName: String, valEncoder: Encoder[T]): ValueState[T] = {
-    verifyStateVarOperations("get_value_state", PRE_INIT)
+  def getStateVariableInfos: Map[String, TransformWithStateVariableInfo] = stateVariableInfos.toMap
+
+  private def checkIfDuplicateVariableDefined(stateVarName: String): Unit = {
+    if (columnFamilySchemas.contains(stateVarName)) {
+      throw StateStoreErrors.duplicateStateVariableDefined(stateVarName)
+    }
+  }
+
+  private def addTimerColFamily(): Unit = {
+    val stateName = TimerStateUtils.getTimerStateVarName(timeMode.toString)
+    val timerEncoder = new TimerKeyEncoder(keyExprEnc)
     val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getValueStateSchema(stateName, keyExprEnc, valEncoder, false)
+      getTimerStateSchema(stateName, timerEncoder.schemaForKeyRow, timerEncoder.schemaForValueRow)
     columnFamilySchemas.put(stateName, colFamilySchema)
-    null.asInstanceOf[ValueState[T]]
+    val stateVariableInfo = TransformWithStateVariableUtils.getTimerState(stateName)
+    stateVariableInfos.put(stateName, stateVariableInfo)
   }
 
   override def getValueState[T](
       stateName: String,
       valEncoder: Encoder[T],
       ttlConfig: TTLConfig): ValueState[T] = {
-    verifyStateVarOperations("get_value_state", PRE_INIT)
-    val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getValueStateSchema(stateName, keyExprEnc, valEncoder, true)
-    columnFamilySchemas.put(stateName, colFamilySchema)
-    null.asInstanceOf[ValueState[T]]
+    getValueState(stateName, ttlConfig)(valEncoder)
   }
 
-  override def getListState[T](stateName: String, valEncoder: Encoder[T]): ListState[T] = {
-    verifyStateVarOperations("get_list_state", PRE_INIT)
+  override def getValueState[T: Encoder](
+      stateName: String,
+      ttlConfig: TTLConfig): ValueState[T] = {
+    verifyStateVarOperations("get_value_state", PRE_INIT)
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
+
+    val stateEncoder = encoderFor[T]
     val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getListStateSchema(stateName, keyExprEnc, valEncoder, false)
+      getValueStateSchema(stateName, keyExprEnc, stateEncoder, ttlEnabled)
+    checkIfDuplicateVariableDefined(stateName)
     columnFamilySchemas.put(stateName, colFamilySchema)
-    null.asInstanceOf[ListState[T]]
+    val stateVariableInfo = TransformWithStateVariableUtils.
+      getValueState(stateName, ttlEnabled = ttlEnabled)
+    stateVariableInfos.put(stateName, stateVariableInfo)
+    null.asInstanceOf[ValueState[T]]
   }
 
   override def getListState[T](
       stateName: String,
       valEncoder: Encoder[T],
       ttlConfig: TTLConfig): ListState[T] = {
+    getListState(stateName, ttlConfig)(valEncoder)
+  }
+
+  override def getListState[T: Encoder](
+      stateName: String,
+      ttlConfig: TTLConfig): ListState[T] = {
     verifyStateVarOperations("get_list_state", PRE_INIT)
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
+
+    val stateEncoder = encoderFor[T]
     val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getListStateSchema(stateName, keyExprEnc, valEncoder, true)
+      getListStateSchema(stateName, keyExprEnc, stateEncoder, ttlEnabled)
+    checkIfDuplicateVariableDefined(stateName)
     columnFamilySchemas.put(stateName, colFamilySchema)
+    val stateVariableInfo = TransformWithStateVariableUtils.
+      getListState(stateName, ttlEnabled = ttlEnabled)
+    stateVariableInfos.put(stateName, stateVariableInfo)
     null.asInstanceOf[ListState[T]]
   }
 
   override def getMapState[K, V](
       stateName: String,
       userKeyEnc: Encoder[K],
-      valEncoder: Encoder[V]): MapState[K, V] = {
-    verifyStateVarOperations("get_map_state", PRE_INIT)
-    val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getMapStateSchema(stateName, keyExprEnc, userKeyEnc, valEncoder, false)
-    columnFamilySchemas.put(stateName, colFamilySchema)
-    null.asInstanceOf[MapState[K, V]]
+      valEncoder: Encoder[V],
+      ttlConfig: TTLConfig): MapState[K, V] = {
+    getMapState(stateName, ttlConfig)(userKeyEnc, valEncoder)
   }
 
-  override def getMapState[K, V](
+  override def getMapState[K: Encoder, V: Encoder](
       stateName: String,
-      userKeyEnc: Encoder[K],
-      valEncoder: Encoder[V],
       ttlConfig: TTLConfig): MapState[K, V] = {
     verifyStateVarOperations("get_map_state", PRE_INIT)
+
+    val ttlEnabled = if (ttlConfig.ttlDuration != null && ttlConfig.ttlDuration.isZero) {
+      false
+    } else {
+      true
+    }
+
+    val userKeyEnc = encoderFor[K]
+    val valEncoder = encoderFor[V]
     val colFamilySchema = StateStoreColumnFamilySchemaUtils.
-      getMapStateSchema(stateName, keyExprEnc, userKeyEnc, valEncoder, true)
+      getMapStateSchema(stateName, keyExprEnc, userKeyEnc, valEncoder, ttlEnabled)
     columnFamilySchemas.put(stateName, colFamilySchema)
+    val stateVariableInfo = TransformWithStateVariableUtils.
+      getMapState(stateName, ttlEnabled = ttlEnabled)
+    stateVariableInfos.put(stateName, stateVariableInfo)
     null.asInstanceOf[MapState[K, V]]
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
index 6fd58e13366e0..44202bb0d2944 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamExecution.scala
@@ -41,8 +41,10 @@ import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.connector.catalog.{SupportsWrite, Table}
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit, SparkDataStream}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfoImpl, SupportsTruncate, Write}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.command.StreamingExplainCommand
-import org.apache.spark.sql.execution.streaming.sources.ForeachBatchUserFuncException
+import org.apache.spark.sql.execution.streaming.sources.{ForeachBatchUserFuncException, ForeachUserFuncException}
+import org.apache.spark.sql.execution.streaming.state.OperatorStateMetadataV2FileManager
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.connector.SupportsStreamingUpdateAsAppend
 import org.apache.spark.sql.streaming._
@@ -208,7 +210,7 @@ abstract class StreamExecution(
     this, s"spark.streaming.${Option(name).getOrElse(id)}")
 
   /** Isolated spark session to run the batches with. */
-  private val sparkSessionForStream = sparkSession.cloneSession()
+  protected val sparkSessionForStream = sparkSession.cloneSession()
 
   /**
    * The thread that runs the micro-batches of this stream. Note that this thread must be
@@ -221,9 +223,7 @@ abstract class StreamExecution(
         // To fix call site like "run at <unknown>:0", we bridge the call site from the caller
         // thread to this micro batch thread
         sparkSession.sparkContext.setCallSite(callSite)
-        JobArtifactSet.withActiveJobArtifactState(jobArtifactState) {
-          runStream()
-        }
+        runStream()
       }
     }
 
@@ -346,9 +346,11 @@ abstract class StreamExecution(
         getLatestExecutionContext().updateStatusMessage("Stopped")
       case e: Throwable =>
         val message = if (e.getMessage == null) "" else e.getMessage
-        val cause = if (e.isInstanceOf[ForeachBatchUserFuncException]) {
+        val cause = if (e.isInstanceOf[ForeachBatchUserFuncException] ||
+          e.isInstanceOf[ForeachUserFuncException]) {
           // We want to maintain the current way users get the causing exception
-          // from the StreamingQueryException. Hence the ForeachBatch exception is unwrapped here.
+          // from the StreamingQueryException.
+          // Hence the ForeachBatch/Foreach exception is unwrapped here.
           e.getCause
         } else {
           e
@@ -367,16 +369,10 @@ abstract class StreamExecution(
           messageParameters = Map(
             "id" -> id.toString,
             "runId" -> runId.toString,
-            "message" -> message,
-            "queryDebugString" -> toDebugString(includeLogicalPlan = isInitialized),
-            "startOffset" -> getLatestExecutionContext().startOffsets.toOffsetSeq(
-              sources.toSeq, getLatestExecutionContext().offsetSeqMetadata).toString,
-            "endOffset" -> getLatestExecutionContext().endOffsets.toOffsetSeq(
-              sources.toSeq, getLatestExecutionContext().offsetSeqMetadata).toString
-          ))
+            "message" -> message))
 
         errorClassOpt = e match {
-          case t: SparkThrowable => Option(t.getErrorClass)
+          case t: SparkThrowable => Option(t.getCondition)
           case _ => None
         }
 
@@ -485,7 +481,7 @@ abstract class StreamExecution(
   @throws[TimeoutException]
   protected def interruptAndAwaitExecutionThreadTermination(): Unit = {
     val timeout = math.max(
-      sparkSession.conf.get(SQLConf.STREAMING_STOP_TIMEOUT), 0)
+      sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_TIMEOUT), 0)
     queryExecutionThread.interrupt()
     queryExecutionThread.join(timeout)
     if (queryExecutionThread.isAlive) {
@@ -690,10 +686,36 @@ abstract class StreamExecution(
     offsetLog.purge(threshold)
     commitLog.purge(threshold)
   }
+
+  protected def purgeStatefulMetadata(plan: SparkPlan): Unit = {
+    plan.collect { case statefulOperator: StatefulOperator =>
+      statefulOperator match {
+        case ssw: StateStoreWriter =>
+          ssw.operatorStateMetadataVersion match {
+            case 2 =>
+              // checkpointLocation of the operator is runId/state, and commitLog path is
+              // runId/commits, so we want the parent of the checkpointLocation to get the
+              // commit log path.
+              val parentCheckpointLocation =
+                new Path(statefulOperator.getStateInfo.checkpointLocation).getParent
+
+              val fileManager = new OperatorStateMetadataV2FileManager(
+                parentCheckpointLocation,
+                sparkSession,
+                ssw
+              )
+              fileManager.purgeMetadataFiles()
+            case _ =>
+          }
+        case _ =>
+      }
+    }
+  }
 }
 
 object StreamExecution {
   val QUERY_ID_KEY = "sql.streaming.queryId"
+  val RUN_ID_KEY = "sql.streaming.runId"
   val IS_CONTINUOUS_PROCESSING = "__is_continuous_processing"
   val IO_EXCEPTION_NAMES = Seq(
     classOf[InterruptedException].getName,
@@ -728,6 +750,7 @@ object StreamExecution {
           if e2.getCause != null =>
         isInterruptionException(e2.getCause, sc)
       case fe: ForeachBatchUserFuncException => isInterruptionException(fe.getCause, sc)
+      case fes: ForeachUserFuncException => isInterruptionException(fes.getCause, sc)
       case se: SparkException =>
         if (se.getCause == null) {
           isCancelledJobGroup(se.getMessage)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
index a303d4db66a01..c90c87899c73f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinExec.scala
@@ -227,10 +227,12 @@ case class StreamingSymmetricHashJoinExec(
   private val stateStoreNames =
     SymmetricHashJoinStateManager.allStateStoreNames(LeftSide, RightSide)
 
-  override def operatorStateMetadata(): OperatorStateMetadata = {
+  override def operatorStateMetadata(
+      stateSchemaPaths: List[String] = List.empty): OperatorStateMetadata = {
     val info = getStateInfo
     val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
-    val stateStoreInfo = stateStoreNames.map(StateStoreMetadataV1(_, 0, info.numPartitions)).toArray
+    val stateStoreInfo =
+      stateStoreNames.map(StateStoreMetadataV1(_, 0, info.numPartitions)).toArray
     OperatorStateMetadataV1(operatorInfo, stateStoreInfo)
   }
 
@@ -301,6 +303,9 @@ case class StreamingSymmetricHashJoinExec(
     val updateStartTimeNs = System.nanoTime
     val joinedRow = new JoinedRow
 
+    assert(stateInfo.isDefined, "State info not defined")
+    val checkpointIds = SymmetricHashJoinStateManager.getStateStoreCheckpointIds(
+      partitionId, stateInfo.get)
 
     val inputSchema = left.output ++ right.output
     val postJoinFilter =
@@ -308,11 +313,11 @@ case class StreamingSymmetricHashJoinExec(
     val leftSideJoiner = new OneSideHashJoiner(
       LeftSide, left.output, leftKeys, leftInputIter,
       condition.leftSideOnly, postJoinFilter, stateWatermarkPredicates.left, partitionId,
-      skippedNullValueCount)
+      checkpointIds.left.keyToNumValues, checkpointIds.left.valueToNumKeys, skippedNullValueCount)
     val rightSideJoiner = new OneSideHashJoiner(
       RightSide, right.output, rightKeys, rightInputIter,
       condition.rightSideOnly, postJoinFilter, stateWatermarkPredicates.right, partitionId,
-      skippedNullValueCount)
+      checkpointIds.right.keyToNumValues, checkpointIds.right.valueToNumKeys, skippedNullValueCount)
 
     //  Join one side input using the other side's buffered/state rows. Here is how it is done.
     //
@@ -505,6 +510,16 @@ case class StreamingSymmetricHashJoinExec(
         val rightSideMetrics = rightSideJoiner.commitStateAndGetMetrics()
         val combinedMetrics = StateStoreMetrics.combine(Seq(leftSideMetrics, rightSideMetrics))
 
+        if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf)) {
+          val checkpointInfo = SymmetricHashJoinStateManager.mergeStateStoreCheckpointInfo(
+            JoinStateStoreCkptInfo(
+              leftSideJoiner.getLatestCheckpointInfo(),
+              rightSideJoiner.getLatestCheckpointInfo()
+            )
+          )
+          setStateStoreCheckpointInfo(checkpointInfo)
+        }
+
         // Update SQL metrics
         numUpdatedStateRows +=
           (leftSideJoiner.numUpdatedStateRows + rightSideJoiner.numUpdatedStateRows)
@@ -542,6 +557,7 @@ case class StreamingSymmetricHashJoinExec(
    * @param stateWatermarkPredicate The state watermark predicate. See
    *                                [[StreamingSymmetricHashJoinExec]] for further description of
    *                                state watermarks.
+   * @param oneSideStateInfo  Reconstructed state info for this side
    * @param partitionId A partition ID of source RDD.
    */
   private class OneSideHashJoiner(
@@ -553,6 +569,8 @@ case class StreamingSymmetricHashJoinExec(
       postJoinFilter: (InternalRow) => Boolean,
       stateWatermarkPredicate: Option[JoinStateWatermarkPredicate],
       partitionId: Int,
+      keyToNumValuesStateStoreCkptId: Option[String],
+      keyWithIndexToValueStateStoreCkptId: Option[String],
       skippedNullValueCount: Option[SQLMetric]) {
 
     // Filter the joined rows based on the given condition.
@@ -560,8 +578,18 @@ case class StreamingSymmetricHashJoinExec(
       Predicate.create(preJoinFilterExpr.getOrElse(Literal(true)), inputAttributes).eval _
 
     private val joinStateManager = new SymmetricHashJoinStateManager(
-      joinSide, inputAttributes, joinKeys, stateInfo, storeConf, hadoopConfBcast.value.value,
-      partitionId, stateFormatVersion, skippedNullValueCount)
+      joinSide = joinSide,
+      inputValueAttributes = inputAttributes,
+      joinKeys = joinKeys,
+      stateInfo = stateInfo,
+      storeConf = storeConf,
+      hadoopConf = hadoopConfBcast.value.value,
+      partitionId = partitionId,
+      keyToNumValuesStateStoreCkptId = keyToNumValuesStateStoreCkptId,
+      keyWithIndexToValueStateStoreCkptId = keyWithIndexToValueStateStoreCkptId,
+      stateFormatVersion = stateFormatVersion,
+      skippedNullValueCount = skippedNullValueCount)
+
     private[this] val keyGenerator = UnsafeProjection.create(joinKeys, inputAttributes)
 
     private[this] val stateKeyWatermarkPredicateFunc = stateWatermarkPredicate match {
@@ -666,13 +694,38 @@ case class StreamingSymmetricHashJoinExec(
       private val iteratorNotEmpty: Boolean = super.hasNext
 
       override def completion(): Unit = {
-        val isLeftSemiWithMatch =
-          joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
-        // Add to state store only if both removal predicates do not match,
-        // and the row is not matched for left side of left semi join.
-        val shouldAddToState =
-          !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow) &&
-          !isLeftSemiWithMatch
+        // The criteria of whether the input has to be added into state store or not:
+        // - Left side: input can be skipped to be added to the state store if it's already matched
+        //   and the join type is left semi.
+        //   For other cases, the input should be added, including the case it's going to be evicted
+        //   in this batch. It hasn't yet evaluated with inputs from right side for this batch.
+        //   Refer to the classdoc of SteramingSymmetricHashJoinExec about how stream-stream join
+        //   works.
+        // - Right side: for this side, the evaluation with inputs from left side for this batch
+        //   is done at this point. That said, input can be skipped to be added to the state store
+        //   if input is going to be evicted in this batch. Though, input should be added to the
+        //   state store if it's right outer join or full outer join, as unmatched output is
+        //   handled during state eviction.
+        val isLeftSemiWithMatch = joinType == LeftSemi && joinSide == LeftSide && iteratorNotEmpty
+        val shouldAddToState = if (isLeftSemiWithMatch) {
+          false
+        } else if (joinSide == LeftSide) {
+          true
+        } else {
+          // joinSide == RightSide
+
+          // if the input is not evicted in this batch (hence need to be persisted)
+          val isNotEvictingInThisBatch =
+            !stateKeyWatermarkPredicateFunc(key) && !stateValueWatermarkPredicateFunc(thisRow)
+
+          isNotEvictingInThisBatch ||
+            // if the input is producing "unmatched row" in this batch
+            (
+              (joinType == RightOuter && !iteratorNotEmpty) ||
+                (joinType == FullOuter && !iteratorNotEmpty)
+            )
+        }
+
         if (shouldAddToState) {
           joinStateManager.append(key, thisRow, matched = iteratorNotEmpty)
           updatedStateRowsCount += 1
@@ -715,6 +768,10 @@ case class StreamingSymmetricHashJoinExec(
       joinStateManager.metrics
     }
 
+    def getLatestCheckpointInfo(): JoinerStateStoreCkptInfo = {
+      joinStateManager.getLatestCheckpointInfo()
+    }
+
     def numUpdatedStateRows: Long = updatedStateRowsCount
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
index 49e1f5e8ba12a..497e71070a09a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/StreamingSymmetricHashJoinHelper.scala
@@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet,
 import org.apache.spark.sql.catalyst.plans.logical.EventTimeWatermark._
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.streaming.WatermarkSupport.watermarkExpression
-import org.apache.spark.sql.execution.streaming.state.{StateStoreCoordinatorRef, StateStoreProviderId}
+import org.apache.spark.sql.execution.streaming.state.{StateStoreCheckpointInfo, StateStoreCoordinatorRef, StateStoreProviderId}
 
 
 /**
@@ -320,4 +320,21 @@ object StreamingSymmetricHashJoinHelper extends Logging {
         dataRDD.sparkContext, f, dataRDD, dataRDD2, stateInfo, storeNames, Some(storeCoordinator))
     }
   }
+
+  case class JoinerStateStoreCkptInfo(
+      keyToNumValues: StateStoreCheckpointInfo,
+      valueToNumKeys: StateStoreCheckpointInfo)
+
+  case class JoinStateStoreCkptInfo(
+      left: JoinerStateStoreCkptInfo,
+      right: JoinerStateStoreCkptInfo)
+
+  case class JoinerStateStoreCheckpointId(
+       keyToNumValues: Option[String],
+       valueToNumKeys: Option[String])
+
+  case class JoinStateStoreCheckpointId(
+       left: JoinerStateStoreCheckpointId,
+       right: JoinerStateStoreCheckpointId)
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
index 6065af10ffe80..b4449f99d6ba5 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TTLState.scala
@@ -18,276 +18,530 @@ package org.apache.spark.sql.execution.streaming
 
 import java.time.Duration
 
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
-import org.apache.spark.sql.execution.streaming.state.{RangeKeyScanStateEncoderSpec, StateStore}
+import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, RangeKeyScanStateEncoderSpec, StateStore}
+import org.apache.spark.sql.streaming.TTLConfig
 import org.apache.spark.sql.types._
 
-object StateTTLSchema {
-  val TTL_VALUE_ROW_SCHEMA: StructType =
-    StructType(Array(StructField("__dummy__", NullType)))
-}
-
 /**
- * Encapsulates the ttl row information stored in [[SingleKeyTTLStateImpl]].
+ * Any state variable that wants to support TTL must implement this trait,
+ * which they can do by extending [[OneToOneTTLState]] or [[OneToManyTTLState]].
  *
- * @param groupingKey grouping key for which ttl is set
- * @param expirationMs expiration time for the grouping key
- */
-case class SingleKeyTTLRow(
-    groupingKey: UnsafeRow,
-    expirationMs: Long)
-
-/**
- * Encapsulates the ttl row information stored in [[CompositeKeyTTLStateImpl]].
+ * The only required methods here are ones relating to evicting expired and all
+ * state, via clearExpiredStateForAllKeys and clearAllStateForElementKey,
+ * respectively. How classes do this is implementation detail, but the general
+ * pattern is to use secondary indexes to make sure cleanup scans
+ * theta(records to evict), not theta(all records).
  *
- * @param groupingKey grouping key for which ttl is set
- * @param userKey user key for which ttl is set
- * @param expirationMs expiration time for the grouping key
- */
-case class CompositeKeyTTLRow(
-   groupingKey: UnsafeRow,
-   userKey: UnsafeRow,
-   expirationMs: Long)
-
-/**
- * Represents the underlying state for secondary TTL Index for a user defined
- * state variable.
+ * There are two broad patterns of implementing stateful variables, and thus
+ * there are two broad patterns for implementing TTL. The first is when there
+ * is a one-to-one mapping between an element key [1] and a value; the primary
+ * and secondary index management for this case is implemented by
+ * [[OneToOneTTLState]]. When a single element key can have multiple values,
+ * all of which can expire at their own, unique times, then
+ * [[OneToManyTTLState]] should be used.
+ *
+ * In either case, implementations need to use some sort of secondary index
+ * that orders element keys by expiration time. This base functionality
+ * is provided by methods in this trait that read/write/delete to the
+ * so-called "TTL index". It is a secondary index with the layout of
+ * (expirationMs, elementKey) -> EMPTY_ROW. The expirationMs is big-endian
+ * encoded to allow for efficient range scans to find all expired keys.
+ *
+ * TTLState (or any abstract sub-classes) should never deal with encoding or
+ * decoding UnsafeRows to and from their user-facing types. The stateful variable
+ * themselves should be doing this; all other TTLState sub-classes should be concerned
+ * only with writing, reading, and deleting UnsafeRows and their associated
+ * expirations from the primary and secondary indexes. [2]
+ *
+ * [1]. You might ask, why call it "element key" instead of "grouping key"?
+ *      This is because a single grouping key might have multiple elements, as in
+ *      the case of a map, which has composite keys of the form (groupingKey, mapKey).
+ *      In the case of ValueState, though, the element key is the grouping key.
+ *      To generalize to both cases, this class should always use the term elementKey.)
  *
- * This state allows Spark to query ttl values based on expiration time
- * allowing efficient ttl cleanup.
+ * [2]. You might also ask, why design it this way? We want the TTLState abstract
+ *      sub-classes to write to both the primary and secondary indexes, since they
+ *      both need to stay in sync; co-locating the logic is cleanest.
  */
 trait TTLState {
+  // Name of the state variable, e.g. the string the user passes to get{Value/List/Map}State
+  // in the init() method of a StatefulProcessor.
+  private[sql] def stateName: String
+
+  // The StateStore instance used to store the state. There is only one instance shared
+  // among the primary and secondary indexes, since it uses virtual column families
+  // to keep the indexes separate.
+  private[sql] def store: StateStore
+
+  // The schema of the primary key for the state variable. For value and list state, this
+  // is the grouping key. For map state, this is the composite key of the grouping key and
+  // a map key.
+  private[sql] def elementKeySchema: StructType
+
+  // The timestamp at which the batch is being processed. All state variables that have
+  // an expiration at or before this timestamp must be cleaned up.
+  private[sql] def batchTimestampMs: Long
+
+  // The configuration for this run of the streaming query. It may change between runs
+  // (e.g. user sets ttlConfig1, stops their query, updates to ttlConfig2, and then
+  // resumes their query).
+  private[sql] def ttlConfig: TTLConfig
+
+  // A map from metric name to the underlying SQLMetric. This should not be updated
+  // by the underlying state variable, as the TTL state implementation should be
+  // handling all reads/writes/deletes to the indexes.
+  private[sql] def metrics: Map[String, SQLMetric] = Map.empty
+
+  private final val TTL_INDEX = "$ttl_" + stateName
+  private final val TTL_INDEX_KEY_SCHEMA = getTTLRowKeySchema(elementKeySchema)
+  private final val TTL_EMPTY_VALUE_ROW_SCHEMA: StructType =
+    StructType(Array(StructField("__empty__", NullType)))
+
+  private final val TTL_ENCODER = new TTLEncoder(elementKeySchema)
+
+  // Empty row used for values
+  private final val TTL_EMPTY_VALUE_ROW =
+    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
 
-  /**
-   * Perform the user state clean up based on ttl values stored in
-   * this state. NOTE that its not safe to call this operation concurrently
-   * when the user can also modify the underlying State. Cleanup should be initiated
-   * after arbitrary state operations are completed by the user.
-   *
-   * @return number of values cleaned up.
-   */
-  def clearExpiredState(): Long
-}
+  private[sql] final def ttlExpirationMs = StateTTL
+    .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
 
-/**
- * Manages the ttl information for user state keyed with a single key (grouping key).
- */
-abstract class SingleKeyTTLStateImpl(
-    stateName: String,
-    store: StateStore,
-    keyExprEnc: ExpressionEncoder[Any],
-    ttlExpirationMs: Long)
-  extends TTLState {
+  store.createColFamilyIfAbsent(
+    TTL_INDEX,
+    TTL_INDEX_KEY_SCHEMA,
+    TTL_EMPTY_VALUE_ROW_SCHEMA,
+    RangeKeyScanStateEncoderSpec(TTL_INDEX_KEY_SCHEMA, Seq(0)),
+    isInternal = true
+  )
 
-  import org.apache.spark.sql.execution.streaming.StateTTLSchema._
+  private[sql] def insertIntoTTLIndex(expirationMs: Long, elementKey: UnsafeRow): Unit = {
+    val secondaryIndexKey = TTL_ENCODER.encodeTTLRow(expirationMs, elementKey)
+    store.put(secondaryIndexKey, TTL_EMPTY_VALUE_ROW, TTL_INDEX)
+  }
 
-  private val ttlColumnFamilyName = s"_ttl_$stateName"
-  private val keySchema = getSingleKeyTTLRowSchema(keyExprEnc.schema)
-  private val keyTTLRowEncoder = new SingleKeyTTLEncoder(keyExprEnc)
+  // The deleteFromTTLIndex overload that takes an expiration time and elementKey as an
+  // argument is used when we need to _construct_ the key to delete from the TTL index.
+  //
+  // If we know the timestamp to delete and the elementKey, but don't have a pre-constructed
+  // UnsafeRow, then you should use this method to delete from the TTL index.
+  private[sql] def deleteFromTTLIndex(expirationMs: Long, elementKey: UnsafeRow): Unit = {
+    val secondaryIndexKey = TTL_ENCODER.encodeTTLRow(expirationMs, elementKey)
+    store.remove(secondaryIndexKey, TTL_INDEX)
+  }
 
-  // empty row used for values
-  private val EMPTY_ROW =
-    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
+  // The deleteFromTTLIndex overload that takes an UnsafeRow as an argument is used when
+  // we're deleting elements from the TTL index that we are iterating over.
+  //
+  // If we were to use the other deleteFromTTLIndex method, we would have to re-encode the
+  // components into an UnsafeRow. It is more efficient to just pass the UnsafeRow that we
+  // read from the iterator.
+  private[sql] def deleteFromTTLIndex(ttlKey: UnsafeRow): Unit = {
+    store.remove(ttlKey, TTL_INDEX)
+  }
 
-  store.createColFamilyIfAbsent(ttlColumnFamilyName, keySchema, TTL_VALUE_ROW_SCHEMA,
-    RangeKeyScanStateEncoderSpec(keySchema, Seq(0)), isInternal = true)
+  private[sql] def toTTLRow(ttlKey: UnsafeRow): TTLRow = {
+    val expirationMs = ttlKey.getLong(0)
+    val elementKey = ttlKey.getStruct(1, TTL_INDEX_KEY_SCHEMA.length)
+    TTLRow(elementKey, expirationMs)
+  }
 
-  /**
-   * This function will be called when clear() on State Variables
-   * with ttl enabled is called. This function should clear any
-   * associated ttlState, since we are clearing the user state.
-   */
-  def clearTTLState(): Unit = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    iterator.foreach { kv =>
-      store.remove(kv.key, ttlColumnFamilyName)
-    }
+  private[sql] def getTTLRows(): Iterator[TTLRow] = {
+    store.iterator(TTL_INDEX).map(kv => toTTLRow(kv.key))
   }
 
-  def upsertTTLForStateKey(
-      expirationMs: Long,
-      groupingKey: UnsafeRow): Unit = {
-    val encodedTtlKey = keyTTLRowEncoder.encodeTTLRow(
-      expirationMs, groupingKey)
-    store.put(encodedTtlKey, EMPTY_ROW, ttlColumnFamilyName)
+  // Returns an Iterator over all the keys in the TTL index that have expired. This method
+  // does not delete the keys from the TTL index; it is the responsibility of the caller
+  // to do so.
+  //
+  // The schema of the UnsafeRow returned by this iterator is (expirationMs, elementKey).
+  private[sql] def ttlEvictionIterator(): Iterator[UnsafeRow] = {
+    val ttlIterator = store.iterator(TTL_INDEX)
+
+    // Recall that the format is (expirationMs, elementKey) -> TTL_EMPTY_VALUE_ROW, so
+    // kv.value doesn't ever need to be used.
+    ttlIterator.takeWhile { kv =>
+      val expirationMs = kv.key.getLong(0)
+      StateTTL.isExpired(expirationMs, batchTimestampMs)
+    }.map(_.key)
   }
 
+  // Encapsulates a row stored in a TTL index. Exposed for testing.
+  private[sql] case class TTLRow(elementKey: UnsafeRow, expirationMs: Long)
+
   /**
-   * Clears any state which has ttl older than [[ttlExpirationMs]].
+   * Evicts the state associated with this stateful variable that has expired
+   * due to TTL. The eviction applies to all grouping keys, and to all indexes,
+   * primary or secondary.
+   *
+   * This method can be called at any time in the micro-batch execution,
+   * as long as it is allowed to complete before subsequent state operations are
+   * issued. Operations to the state variable should not be issued concurrently while
+   * this is running, since it may leave the state variable in an inconsistent state
+   * as it cleans up.
+   *
+   * @return number of values cleaned up.
    */
-  override def clearExpiredState(): Long = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    var numValuesExpired = 0L
+  private[sql] def clearExpiredStateForAllKeys(): Long
 
-    iterator.takeWhile { kv =>
-      val expirationMs = kv.key.getLong(0)
-      StateTTL.isExpired(expirationMs, ttlExpirationMs)
-    }.foreach { kv =>
-      val groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length)
-      numValuesExpired += clearIfExpired(groupingKey)
-      store.remove(kv.key, ttlColumnFamilyName)
+  /**
+   * When a user calls clear() on a stateful variable, this method is invoked to
+   * clear all of the state for the current (implicit) grouping key. It is responsible
+   * for deleting from the primary index as well as any secondary index(es).
+   *
+   * If a given state variable has to clean up multiple elementKeys (in MapState, for
+   * example, every key in the map is its own elementKey), then this method should
+   * be invoked for each of those keys.
+   */
+  private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit
+}
+
+/**
+ * OneToOneTTLState is an implementation of [[TTLState]] that is used to manage
+ * TTL for state variables that need a single secondary index to efficiently manage
+ * records with an expiration.
+ *
+ * The primary index for state variables that can use a [[OneToOneTTLState]] have
+ * the form of: [elementKey -> (value, elementExpiration)]. You'll notice that, given
+ * a timestamp, it would take linear time to probe the primary index for all of its
+ * expired values.
+ *
+ * As a result, this class uses helper methods from [[TTLState]] to maintain the secondary
+ * index from [(elementExpiration, elementKey) -> EMPTY_ROW].
+ *
+ * For an explanation of why this structure is not always sufficient (e.g. why the class
+ * [[OneToManyTTLState]] is needed), please visit its class-doc comment.
+ */
+abstract class OneToOneTTLState(
+    stateNameArg: String,
+    storeArg: StateStore,
+    elementKeySchemaArg: StructType,
+    ttlConfigArg: TTLConfig,
+    batchTimestampMsArg: Long,
+    metricsArg: Map[String, SQLMetric]) extends TTLState {
+  override private[sql] def stateName: String = stateNameArg
+  override private[sql] def store: StateStore = storeArg
+  override private[sql] def elementKeySchema: StructType = elementKeySchemaArg
+  override private[sql] def ttlConfig: TTLConfig = ttlConfigArg
+  override private[sql] def batchTimestampMs: Long = batchTimestampMsArg
+  override private[sql] def metrics: Map[String, SQLMetric] = metricsArg
+
+  /**
+   * This method updates the TTL for the given elementKey to be expirationMs,
+   * updating both the primary and secondary indices if needed.
+   *
+   * Note that an elementKey may be the state variable's grouping key, _or_ it
+   * could be a composite key. MapState is an example of a state variable that
+   * has composite keys, which has the structure of the groupingKey followed by
+   * the specific key in the map. This method doesn't need to know what type of
+   * key is being used, though, since in either case, it's just an UnsafeRow.
+   *
+   * @param elementKey the key for which the TTL should be updated, which may
+   *                   either be the encoded grouping key, or the grouping key
+   *                   and some user-defined key.
+   * @param elementValue the value to update the primary index with. It is of the
+   *                     form (value, expirationMs).
+   * @param expirationMs the new expiration timestamp to use for elementKey.
+   */
+  private[sql] def updatePrimaryAndSecondaryIndices(
+      elementKey: UnsafeRow,
+      elementValue: UnsafeRow,
+      expirationMs: Long): Unit = {
+    val existingPrimaryValue = store.get(elementKey, stateName)
+
+    // Doesn't exist. Insert into the primary and TTL indexes.
+    if (existingPrimaryValue == null) {
+      store.put(elementKey, elementValue, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+      insertIntoTTLIndex(expirationMs, elementKey)
+    } else {
+      // If the values are equal, then they must be equal in actual value and the expiration
+      // timestamp. We don't need to update any index in this case.
+      if (elementValue != existingPrimaryValue) {
+        store.put(elementKey, elementValue, stateName)
+        TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
+
+        // Small optimization: the value could have changed, but the expirationMs could have
+        // stayed the same. We only put into the TTL index if the expirationMs has changed.
+        val existingExpirationMs = existingPrimaryValue.getLong(1)
+        if (existingExpirationMs != expirationMs) {
+          deleteFromTTLIndex(existingExpirationMs, elementKey)
+          insertIntoTTLIndex(expirationMs, elementKey)
+        }
+      }
     }
-    numValuesExpired
   }
 
-  private[sql] def ttlIndexIterator(): Iterator[SingleKeyTTLRow] = {
-    val ttlIterator = store.iterator(ttlColumnFamilyName)
+  override private[sql] def clearExpiredStateForAllKeys(): Long = {
+    var numValuesExpired = 0L
 
-    new Iterator[SingleKeyTTLRow] {
-      override def hasNext: Boolean = ttlIterator.hasNext
+    ttlEvictionIterator().foreach { ttlKey =>
+      // Delete from secondary index
+      deleteFromTTLIndex(ttlKey)
+      // Delete from primary index
+      store.remove(toTTLRow(ttlKey).elementKey, stateName)
 
-      override def next(): SingleKeyTTLRow = {
-        val kv = ttlIterator.next()
-        SingleKeyTTLRow(
-          expirationMs = kv.key.getLong(0),
-          groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length)
-        )
-      }
+      numValuesExpired += 1
     }
+
+    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", numValuesExpired)
+    numValuesExpired
   }
 
-  private[sql] def getValuesInTTLState(groupingKey: UnsafeRow): Iterator[Long] = {
-    val ttlIterator = ttlIndexIterator()
-    var nextValue: Option[Long] = None
-
-    new Iterator[Long] {
-      override def hasNext: Boolean = {
-        while (nextValue.isEmpty && ttlIterator.hasNext) {
-          val nextTtlValue = ttlIterator.next()
-          val valueGroupingKey = nextTtlValue.groupingKey
-          if (valueGroupingKey equals groupingKey) {
-            nextValue = Some(nextTtlValue.expirationMs)
-          }
-        }
-        nextValue.isDefined
-      }
+  override private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit = {
+    val existingPrimaryValue = store.get(elementKey, stateName)
+    if (existingPrimaryValue != null) {
+      val existingExpirationMs = existingPrimaryValue.getLong(1)
 
-      override def next(): Long = {
-        val result = nextValue.get
-        nextValue = None
-        result
-      }
+      store.remove(elementKey, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
+
+      deleteFromTTLIndex(existingExpirationMs, elementKey)
     }
   }
-
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   *
-   * @return true if the state was cleared, false otherwise.
-   */
-  def clearIfExpired(groupingKey: UnsafeRow): Long
 }
 
 /**
- * Manages the ttl information for user state keyed with a single key (grouping key).
+ * [[OneToManyTTLState]] is an implementation of [[TTLState]] for stateful variables
+ * that associate a single key with multiple values; every value has its own expiration
+ * timestamp.
+ *
+ * We need an efficient way to find all the values that have expired, but we cannot
+ * issue point-wise deletes to the elements, since they are merged together using the
+ * RocksDB StringAppendOperator for merging. As such, we cannot keep a secondary index
+ * on the key (expirationMs, groupingKey, indexInList), since we have no way to delete a
+ * specific indexInList from the RocksDB value. (In the future, we could write a custom
+ * merge operator that can handle tombstones for deleted indexes, but RocksDB doesn't
+ * support custom merge operators written in Java/Scala.)
+ *
+ * Instead, we manage expiration per grouping key instead. Our secondary index will look
+ * like (expirationMs, groupingKey) -> EMPTY_ROW. This way, we can quickly find all the
+ * grouping keys that contain at least one element that has expired.
+ *
+ * To make sure that we aren't "late" in cleaning up expired values, this secondary index
+ * maps from the minimum expiration in a list and a grouping key to the EMPTY_VALUE. This
+ * index is called the "TTL index" in the code (to be consistent with [[OneToOneTTLState]]),
+ * though it behaves more like a work queue of lists that need to be cleaned up.
+ *
+ * Since a grouping key may have a large list and we need to quickly know what the
+ * minimum expiration is, we need to reverse this work queue index. This reversed index
+ * maps from key to the minimum expiration in the list, and it is called the "min-expiry" index.
+ *
+ * Note: currently, this is only used by ListState with TTL.
  */
-abstract class CompositeKeyTTLStateImpl[K](
-    stateName: String,
-    store: StateStore,
-    keyExprEnc: ExpressionEncoder[Any],
-    userKeyEncoder: Encoder[K],
-    ttlExpirationMs: Long)
-  extends TTLState {
-
-  import org.apache.spark.sql.execution.streaming.StateTTLSchema._
-
-  private val ttlColumnFamilyName = s"_ttl_$stateName"
-  private val keySchema = getCompositeKeyTTLRowSchema(
-    keyExprEnc.schema, userKeyEncoder.schema
+abstract class OneToManyTTLState(
+    stateNameArg: String,
+    storeArg: StateStore,
+    elementKeySchemaArg: StructType,
+    ttlConfigArg: TTLConfig,
+    batchTimestampMsArg: Long,
+    metricsArg: Map[String, SQLMetric]) extends TTLState {
+  override private[sql] def stateName: String = stateNameArg
+  override private[sql] def store: StateStore = storeArg
+  override private[sql] def elementKeySchema: StructType = elementKeySchemaArg
+  override private[sql] def ttlConfig: TTLConfig = ttlConfigArg
+  override private[sql] def batchTimestampMs: Long = batchTimestampMsArg
+  override private[sql] def metrics: Map[String, SQLMetric] = metricsArg
+
+  // Schema of the min-expiry index: elementKey -> minExpirationMs
+  private val MIN_INDEX = "$min_" + stateName
+  private val MIN_INDEX_SCHEMA = elementKeySchema
+  private val MIN_INDEX_VALUE_SCHEMA = getExpirationMsRowSchema()
+
+  // Projects a Long into an UnsafeRow
+  private val minIndexValueProjector = UnsafeProjection.create(MIN_INDEX_VALUE_SCHEMA)
+
+  // Schema of the entry count index: elementKey -> count
+  private val COUNT_INDEX = "$count_" + stateName
+  private val COUNT_INDEX_VALUE_SCHEMA: StructType =
+    StructType(Seq(StructField("count", LongType, nullable = false)))
+  private val countIndexValueProjector = UnsafeProjection.create(COUNT_INDEX_VALUE_SCHEMA)
+
+  // Reused internal row that we use to create an UnsafeRow with the schema of
+  // COUNT_INDEX_VALUE_SCHEMA and the desired value. It is not thread safe (although, anyway,
+  // this class is not thread safe).
+  private val reusedCountIndexValueRow = new GenericInternalRow(1)
+
+  store.createColFamilyIfAbsent(
+    MIN_INDEX,
+    MIN_INDEX_SCHEMA,
+    MIN_INDEX_VALUE_SCHEMA,
+    NoPrefixKeyStateEncoderSpec(MIN_INDEX_SCHEMA),
+    isInternal = true
   )
 
-  private val keyRowEncoder = new CompositeKeyTTLEncoder[K](
-    keyExprEnc, userKeyEncoder)
+  store.createColFamilyIfAbsent(
+    COUNT_INDEX,
+    elementKeySchema,
+    COUNT_INDEX_VALUE_SCHEMA,
+    NoPrefixKeyStateEncoderSpec(elementKeySchema),
+    isInternal = true
+  )
 
-  // empty row used for values
-  private val EMPTY_ROW =
-    UnsafeProjection.create(Array[DataType](NullType)).apply(InternalRow.apply(null))
+  // Helper method to get the number of entries in the list state for a given element key
+  private def getEntryCount(elementKey: UnsafeRow): Long = {
+    val countRow = store.get(elementKey, COUNT_INDEX)
+    if (countRow != null) {
+      countRow.getLong(0)
+    } else {
+      0L
+    }
+  }
 
-  store.createColFamilyIfAbsent(ttlColumnFamilyName, keySchema,
-    TTL_VALUE_ROW_SCHEMA, RangeKeyScanStateEncoderSpec(keySchema,
-      Seq(0)), isInternal = true)
+  // Helper function to update the number of entries in the list state for a given element key
+  private def updateEntryCount(elementKey: UnsafeRow, updatedCount: Long): Unit = {
+    reusedCountIndexValueRow.setLong(0, updatedCount)
+    store.put(elementKey,
+      countIndexValueProjector(reusedCountIndexValueRow.asInstanceOf[InternalRow]),
+      COUNT_INDEX
+    )
+  }
 
-  def clearTTLState(): Unit = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    iterator.foreach { kv =>
-      store.remove(kv.key, ttlColumnFamilyName)
-    }
+  // Helper function to remove the number of entries in the list state for a given element key
+  private def removeEntryCount(elementKey: UnsafeRow): Unit = {
+    store.remove(elementKey, COUNT_INDEX)
   }
 
-  def upsertTTLForStateKey(
-      expirationMs: Long,
-      groupingKey: UnsafeRow,
-      userKey: UnsafeRow): Unit = {
-    val encodedTtlKey = keyRowEncoder.encodeTTLRow(
-      expirationMs, groupingKey, userKey)
-    store.put(encodedTtlKey, EMPTY_ROW, ttlColumnFamilyName)
+  private def writePrimaryIndexEntries(
+      overwritePrimaryIndex: Boolean,
+      elementKey: UnsafeRow,
+      elementValues: Iterator[UnsafeRow]): Unit = {
+    val initialEntryCount = if (overwritePrimaryIndex) {
+      removeEntryCount(elementKey)
+      0
+    } else {
+      getEntryCount(elementKey)
+    }
+
+    // Manually keep track of the count so that we can update the count index. We don't
+    // want to call elementValues.size since that will try to re-read the iterator.
+    var numNewElements = 0
+
+    // If we're overwriting the primary index, then we only need to put the first value,
+    // and then we can merge the rest.
+    var isFirst = true
+    elementValues.foreach { value =>
+      numNewElements += 1
+      if (isFirst && overwritePrimaryIndex) {
+        isFirst = false
+        store.put(elementKey, value, stateName)
+      } else {
+        store.merge(elementKey, value, stateName)
+      }
+    }
+
+    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows", numNewElements)
+    updateEntryCount(elementKey, initialEntryCount + numNewElements)
   }
 
-  /**
-   * Clears any state which has ttl older than [[ttlExpirationMs]].
-   */
-  override def clearExpiredState(): Long = {
-    val iterator = store.iterator(ttlColumnFamilyName)
-    var numRemovedElements = 0L
-    iterator.takeWhile { kv =>
-      val expirationMs = kv.key.getLong(0)
-      StateTTL.isExpired(expirationMs, ttlExpirationMs)
-    }.foreach { kv =>
-      numRemovedElements += clearIfExpired(
-        kv.key.getStruct(1, keyExprEnc.schema.length),
-        kv.key.getStruct(2, userKeyEncoder.schema.length))
-      store.remove(kv.key, ttlColumnFamilyName)
+  private[sql] def updatePrimaryAndSecondaryIndices(
+      overwritePrimaryIndex: Boolean,
+      elementKey: UnsafeRow,
+      elementValues: Iterator[UnsafeRow],
+      expirationMs: Long): Unit = {
+    val existingMinExpirationUnsafeRow = store.get(elementKey, MIN_INDEX)
+
+    writePrimaryIndexEntries(overwritePrimaryIndex, elementKey, elementValues)
+
+    // If nothing exists in the minimum index, then we need to make sure to write
+    // the minimum and the TTL indices. There's nothing to clean-up from the
+    // secondary index, since it's empty.
+    if (existingMinExpirationUnsafeRow == null) {
+      // Insert into the min-expiry and TTL index, in no particular order.
+      store.put(elementKey, minIndexValueProjector(InternalRow(expirationMs)), MIN_INDEX)
+      insertIntoTTLIndex(expirationMs, elementKey)
+    } else {
+      val existingMinExpiration = existingMinExpirationUnsafeRow.getLong(0)
+
+      if (overwritePrimaryIndex || expirationMs < existingMinExpiration) {
+        // We don't actually have to delete from the min-expiry index, since we're going
+        // to overwrite it on the next line. However, since the TTL index has the existing
+        // minimum expiration in it, we need to delete that.
+        deleteFromTTLIndex(existingMinExpiration, elementKey)
+
+        // Insert into the min-expiry and TTL index, in no particular order.
+        store.put(elementKey, minIndexValueProjector(InternalRow(expirationMs)), MIN_INDEX)
+        insertIntoTTLIndex(expirationMs, elementKey)
+      }
     }
-    numRemovedElements
   }
 
-  private[sql] def ttlIndexIterator(): Iterator[CompositeKeyTTLRow] = {
-    val ttlIterator = store.iterator(ttlColumnFamilyName)
+  // The return type of clearExpiredValues. For a one-to-many stateful variable, cleanup
+  // must go through all of the values. numValuesExpired represents the number of entries
+  // that were removed (for metrics), and newMinExpirationMs is the new minimum expiration
+  // for the values remaining in the state variable.
+  case class ValueExpirationResult(
+      numValuesExpired: Long,
+      newMinExpirationMs: Option[Long])
 
-    new Iterator[CompositeKeyTTLRow] {
-      override def hasNext: Boolean = ttlIterator.hasNext
+  // Clears all the expired values for the given elementKey.
+  protected def clearExpiredValues(elementKey: UnsafeRow): ValueExpirationResult
 
-      override def next(): CompositeKeyTTLRow = {
-        val kv = ttlIterator.next()
-        CompositeKeyTTLRow(
-          expirationMs = kv.key.getLong(0),
-          groupingKey = kv.key.getStruct(1, keyExprEnc.schema.length),
-          userKey = kv.key.getStruct(2, userKeyEncoder.schema.length)
-        )
+  override private[sql] def clearExpiredStateForAllKeys(): Long = {
+    var totalNumValuesExpired = 0L
+
+    ttlEvictionIterator().foreach { ttlKey =>
+      val ttlRow = toTTLRow(ttlKey)
+      val elementKey = ttlRow.elementKey
+
+      // Delete from TTL index and minimum index
+      deleteFromTTLIndex(ttlKey)
+      store.remove(elementKey, MIN_INDEX)
+
+      // Now, we need the specific implementation to remove all the values associated with
+      // elementKey.
+      val valueExpirationResult = clearExpiredValues(elementKey)
+
+      valueExpirationResult.newMinExpirationMs.foreach { newExpirationMs =>
+        // Insert into the min-expiry and TTL index, in no particular order.
+        store.put(elementKey, minIndexValueProjector(InternalRow(newExpirationMs)), MIN_INDEX)
+        insertIntoTTLIndex(newExpirationMs, elementKey)
       }
+
+      // If we have records [foo, bar, baz] and bar and baz are expiring, then, the
+      // entryCountBeforeExpirations would be 3. The numValuesExpired would be 2, and so the
+      // newEntryCount would be 3 - 2 = 1.
+      val entryCountBeforeExpirations = getEntryCount(elementKey)
+      val numValuesExpired = valueExpirationResult.numValuesExpired
+      val newEntryCount = entryCountBeforeExpirations - numValuesExpired
+
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", numValuesExpired)
+
+      if (newEntryCount == 0) {
+        removeEntryCount(elementKey)
+      } else {
+        updateEntryCount(elementKey, newEntryCount)
+      }
+
+      totalNumValuesExpired += numValuesExpired
     }
+
+    totalNumValuesExpired
   }
 
-  /**
-   * Clears the user state associated with this grouping key
-   * if it has expired. This function is called by Spark to perform
-   * cleanup at the end of transformWithState processing.
-   *
-   * Spark uses a secondary index to determine if the user state for
-   * this grouping key has expired. However, its possible that the user
-   * has updated the TTL and secondary index is out of date. Implementations
-   * must validate that the user State has actually expired before cleanup based
-   * on their own State data.
-   *
-   * @param groupingKey grouping key for which cleanup should be performed.
-   * @param userKey user key for which cleanup should be performed.
-   */
-  def clearIfExpired(groupingKeyRow: UnsafeRow,
-                     userKeyRow: UnsafeRow): Long
+  override private[sql] def clearAllStateForElementKey(elementKey: UnsafeRow): Unit = {
+    val existingMinExpirationUnsafeRow = store.get(elementKey, MIN_INDEX)
+    if (existingMinExpirationUnsafeRow != null) {
+      val existingMinExpiration = existingMinExpirationUnsafeRow.getLong(0)
+
+      store.remove(elementKey, stateName)
+      TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows", getEntryCount(elementKey))
+      removeEntryCount(elementKey)
+
+      store.remove(elementKey, MIN_INDEX)
+      deleteFromTTLIndex(existingMinExpiration, elementKey)
+    }
+  }
+
+  // Exposed for testing.
+  private[sql] def minIndexIterator(): Iterator[(UnsafeRow, Long)] = {
+    store
+      .iterator(MIN_INDEX)
+      .map(kv => (kv.key, kv.value.getLong(0)))
+  }
 }
 
 /**
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
index 650f57039a030..5d20f53449c59 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TimerStateImpl.scala
@@ -30,10 +30,19 @@ import org.apache.spark.util.NextIterator
  * Singleton utils class used primarily while interacting with TimerState
  */
 object TimerStateUtils {
-  val PROC_TIMERS_STATE_NAME = "_procTimers"
-  val EVENT_TIMERS_STATE_NAME = "_eventTimers"
+  val PROC_TIMERS_STATE_NAME = "$procTimers"
+  val EVENT_TIMERS_STATE_NAME = "$eventTimers"
   val KEY_TO_TIMESTAMP_CF = "_keyToTimestamp"
   val TIMESTAMP_TO_KEY_CF = "_timestampToKey"
+
+  def getTimerStateVarName(timeMode: String): String = {
+    assert(timeMode == TimeMode.EventTime.toString || timeMode == TimeMode.ProcessingTime.toString)
+    if (timeMode == TimeMode.EventTime.toString) {
+      TimerStateUtils.EVENT_TIMERS_STATE_NAME + TimerStateUtils.KEY_TO_TIMESTAMP_CF
+    } else {
+      TimerStateUtils.PROC_TIMERS_STATE_NAME + TimerStateUtils.KEY_TO_TIMESTAMP_CF
+    }
+  }
 }
 
 /**
@@ -169,7 +178,7 @@ class TimerStateImpl(
           val rowPair = iter.next()
           val keyRow = rowPair.key
           val result = getTimerRowFromSecIndex(keyRow)
-          if (result._2 < expiryTimestampMs) {
+          if (result._2 <= expiryTimestampMs) {
             result
           } else {
             finished = true
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
index a4d525ad13fd4..5716242afc152 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateExec.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming._
 import org.apache.spark.sql.types.{BinaryType, StructType}
-import org.apache.spark.util.{CompletionIterator, SerializableConfiguration, Utils}
+import org.apache.spark.util.{CompletionIterator, NextIterator, SerializableConfiguration, Utils}
 
 /**
  * Physical operator for executing `TransformWithState`
@@ -83,10 +83,21 @@ case class TransformWithStateExec(
   // dummy value schema, the real schema will get during state variable init time
   private val DUMMY_VALUE_ROW_SCHEMA = new StructType().add("value", BinaryType)
 
+  // We need to just initialize key and value deserializer once per partition.
+  // The deserializers need to be lazily created on the executor since they
+  // are not serializable.
+  // Ideas for for improvement can be found here:
+  // https://issues.apache.org/jira/browse/SPARK-50437
+  private lazy val getKeyObj =
+    ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
+
+  private lazy val getValueObj =
+    ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
+
   override def shouldRunAnotherBatch(newInputWatermark: Long): Boolean = {
     if (timeMode == ProcessingTime) {
-      // TODO: check if we can return true only if actual timers are registered, or there is
-      // expired state
+      // TODO SPARK-50180: check if we can return true only if actual timers are registered,
+      //  or there is expired state
       true
     } else if (outputMode == OutputMode.Append || outputMode == OutputMode.Update) {
       eventTimeWatermarkForEviction.isDefined &&
@@ -96,6 +107,8 @@ case class TransformWithStateExec(
     }
   }
 
+  override def operatorStateMetadataVersion: Int = 2
+
   /**
    * We initialize this processor handle in the driver to run the init function
    * and fetch the schemas of the state variables initialized in this processor.
@@ -119,6 +132,12 @@ case class TransformWithStateExec(
     columnFamilySchemas
   }
 
+  private def getStateVariableInfos(): Map[String, TransformWithStateVariableInfo] = {
+    val stateVariableInfos = getDriverProcessorHandle().getStateVariableInfos
+    closeProcessorHandle()
+    stateVariableInfos
+  }
+
   /**
    * This method is used for the driver-side stateful processor after we
    * have collected all the necessary schemas.
@@ -152,8 +171,13 @@ case class TransformWithStateExec(
   override def right: SparkPlan = initialState
 
   override protected def withNewChildrenInternal(
-      newLeft: SparkPlan, newRight: SparkPlan): TransformWithStateExec =
-    copy(child = newLeft, initialState = newRight)
+      newLeft: SparkPlan, newRight: SparkPlan): TransformWithStateExec = {
+    if (hasInitialState) {
+      copy(child = newLeft, initialState = newRight)
+    } else {
+      copy(child = newLeft)
+    }
+  }
 
   override def keyExpressions: Seq[Attribute] = groupingAttributes
 
@@ -177,35 +201,72 @@ case class TransformWithStateExec(
     groupingAttributes.map(SortOrder(_, Ascending)),
     initialStateGroupingAttrs.map(SortOrder(_, Ascending)))
 
+  // Wrapper to ensure that the implicit key is set when the methods on the iterator
+  // are called. We process all the values for a particular key at a time, so we
+  // only have to set the implicit key when the first call to the iterator is made, and
+  // we have to remove it when the iterator is closed.
+  //
+  // Note: if we ever start to interleave the processing of the iterators we get back
+  // from handleInputRows (i.e. we don't process each iterator all at once), then this
+  // iterator will need to set/unset the implicit key every time hasNext/next is called,
+  // not just at the first and last calls to hasNext.
+  private def iteratorWithImplicitKeySet(
+      key: Any,
+      iter: Iterator[InternalRow],
+      onClose: () => Unit = () => {}
+  ): Iterator[InternalRow] = {
+    new NextIterator[InternalRow] {
+      var hasStarted = false
+
+      override protected def getNext(): InternalRow = {
+        if (!hasStarted) {
+          hasStarted = true
+          ImplicitGroupingKeyTracker.setImplicitKey(key)
+        }
+
+        if (!iter.hasNext) {
+          finished = true
+          null
+        } else {
+          iter.next()
+        }
+      }
+
+      override protected def close(): Unit = {
+        onClose()
+        ImplicitGroupingKeyTracker.removeImplicitKey()
+      }
+    }
+  }
+
   private def handleInputRows(keyRow: UnsafeRow, valueRowIter: Iterator[InternalRow]):
     Iterator[InternalRow] = {
-    val getKeyObj =
-      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
-
-    val getValueObj =
-      ObjectOperator.deserializeRowToObject(valueDeserializer, dataAttributes)
 
     val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjectType)
 
     val keyObj = getKeyObj(keyRow)  // convert key to objects
-    ImplicitGroupingKeyTracker.setImplicitKey(keyObj)
     val valueObjIter = valueRowIter.map(getValueObj.apply)
+
+    // The statefulProcessor's handleInputRows method may create an eager iterator,
+    // and in that case, the implicit key needs to be set now. However, it could return
+    // a lazy iterator, in which case the implicit key should be set when the actual
+    // methods on the iterator are invoked. This is done with the wrapper class
+    // at the end of this method.
+    ImplicitGroupingKeyTracker.setImplicitKey(keyObj)
     val mappedIterator = statefulProcessor.handleInputRows(
       keyObj,
       valueObjIter,
-      new TimerValuesImpl(batchTimestampMs, eventTimeWatermarkForEviction),
-      new ExpiredTimerInfoImpl(isValid = false)).map { obj =>
+      new TimerValuesImpl(batchTimestampMs, eventTimeWatermarkForEviction)).map { obj =>
       getOutputRow(obj)
     }
     ImplicitGroupingKeyTracker.removeImplicitKey()
-    mappedIterator
+
+    iteratorWithImplicitKeySet(keyObj, mappedIterator)
   }
 
   private def processInitialStateRows(
       keyRow: UnsafeRow,
       initStateIter: Iterator[InternalRow]): Unit = {
-    val getKeyObj =
-      ObjectOperator.deserializeRowToObject(keyDeserializer, groupingAttributes)
 
     val getInitStateValueObj =
       ObjectOperator.deserializeRowToObject(initialStateDeserializer, initialStateDataAttrs)
@@ -214,13 +275,9 @@ case class TransformWithStateExec(
     ImplicitGroupingKeyTracker.setImplicitKey(keyObj)
     val initStateObjIter = initStateIter.map(getInitStateValueObj.apply)
 
-    var seenInitStateOnKey = false
     initStateObjIter.foreach { initState =>
-      // cannot re-initialize state on the same grouping key during initial state handling
-      if (seenInitStateOnKey) {
-        throw StateStoreErrors.cannotReInitializeStateOnKey(keyObj.toString)
-      }
-      seenInitStateOnKey = true
+      // allow multiple initial state rows on the same grouping key for integration
+      // with state data source reader with initial state
       statefulProcessor
         .asInstanceOf[StatefulProcessorWithInitialState[Any, Any, Any, Any]]
         .handleInitialState(keyObj, initState,
@@ -243,16 +300,17 @@ case class TransformWithStateExec(
       processorHandle: StatefulProcessorHandleImpl): Iterator[InternalRow] = {
     val getOutputRow = ObjectOperator.wrapObjectToRow(outputObjectType)
     ImplicitGroupingKeyTracker.setImplicitKey(keyObj)
-    val mappedIterator = statefulProcessor.handleInputRows(
+    val mappedIterator = statefulProcessor.handleExpiredTimer(
       keyObj,
-      Iterator.empty,
       new TimerValuesImpl(batchTimestampMs, eventTimeWatermarkForEviction),
-      new ExpiredTimerInfoImpl(isValid = true, Some(expiryTimestampMs))).map { obj =>
+      new ExpiredTimerInfoImpl(Some(expiryTimestampMs))).map { obj =>
       getOutputRow(obj)
     }
-    processorHandle.deleteTimer(expiryTimestampMs)
     ImplicitGroupingKeyTracker.removeImplicitKey()
-    mappedIterator
+
+    iteratorWithImplicitKeySet(keyObj, mappedIterator, () => {
+      processorHandle.deleteTimer(expiryTimestampMs)
+    })
   }
 
   private def processTimers(
@@ -289,11 +347,20 @@ case class TransformWithStateExec(
     CompletionIterator[InternalRow, Iterator[InternalRow]] = {
     val allUpdatesTimeMs = longMetric("allUpdatesTimeMs")
     val commitTimeMs = longMetric("commitTimeMs")
-    val timeoutLatencyMs = longMetric("allRemovalsTimeMs")
+    val timerProcessingTimeMs = longMetric("timerProcessingTimeMs")
+    // In TWS, allRemovalsTimeMs is the time taken to remove state due to TTL.
+    // It does not measure any time taken by explicit calls from the user's state processor
+    // that clear()s state variables.
+    //
+    // allRemovalsTimeMs is not granular enough to distinguish between user-caused removals and
+    // TTL-caused removals. We could leave this empty and have two custom metrics, but leaving
+    // this as always 0 will be confusing for users. We could also time every call to clear(), but
+    // that could have performance penalties. So, we choose to capture TTL-only removals.
+    val allRemovalsTimeMs = longMetric("allRemovalsTimeMs")
 
     val currentTimeNs = System.nanoTime
     val updatesStartTimeNs = currentTimeNs
-    var timeoutProcessingStartTimeNs = currentTimeNs
+    var timerProcessingStartTimeNs = currentTimeNs
 
     // If timeout is based on event time, then filter late data based on watermark
     val filteredIter = watermarkPredicateForDataForLateEvents match {
@@ -306,9 +373,13 @@ case class TransformWithStateExec(
     val newDataProcessorIter =
       CompletionIterator[InternalRow, Iterator[InternalRow]](
       processNewData(filteredIter), {
-        // Once the input is processed, mark the start time for timeout processing to measure
+        // Note: Due to the iterator lazy execution, this metric also captures the time taken
+        // by the upstream (consumer) operators in addition to the processing in this operator.
+        allUpdatesTimeMs += NANOSECONDS.toMillis(System.nanoTime - updatesStartTimeNs)
+
+        // Once the input is processed, mark the start time for timer processing to measure
         // it separately from the overall processing time.
-        timeoutProcessingStartTimeNs = System.nanoTime
+        timerProcessingStartTimeNs = System.nanoTime
         processorHandle.setHandleState(StatefulProcessorHandleState.DATA_PROCESSED)
     })
 
@@ -322,9 +393,10 @@ case class TransformWithStateExec(
       private def getIterator(): Iterator[InternalRow] =
         CompletionIterator[InternalRow, Iterator[InternalRow]](
           processTimers(timeMode, processorHandle), {
-          // Note: `timeoutLatencyMs` also includes the time the parent operator took for
-          // processing output returned through iterator.
-          timeoutLatencyMs += NANOSECONDS.toMillis(System.nanoTime - timeoutProcessingStartTimeNs)
+          // Note: `timerProcessingTimeMs` also includes the time the parent operators take for
+          // processing output returned from the timers that fire.
+          timerProcessingTimeMs +=
+            NANOSECONDS.toMillis(System.nanoTime - timerProcessingStartTimeNs)
           processorHandle.setHandleState(StatefulProcessorHandleState.TIMER_PROCESSED)
         })
     }
@@ -333,13 +405,12 @@ case class TransformWithStateExec(
     // Return an iterator of all the rows generated by all the keys, such that when fully
     // consumed, all the state updates will be committed by the state store
     CompletionIterator[InternalRow, Iterator[InternalRow]](outputIterator, {
-      // Note: Due to the iterator lazy execution, this metric also captures the time taken
-      // by the upstream (consumer) operators in addition to the processing in this operator.
-      allUpdatesTimeMs += NANOSECONDS.toMillis(System.nanoTime - updatesStartTimeNs)
+      allRemovalsTimeMs += timeTakenMs {
+        processorHandle.doTtlCleanup()
+      }
+
       commitTimeMs += timeTakenMs {
         if (isStreaming) {
-          // clean up any expired user state
-          processorHandle.doTtlCleanup()
           store.commit()
         } else {
           store.abort()
@@ -356,12 +427,17 @@ case class TransformWithStateExec(
   // operator specific metrics
   override def customStatefulOperatorMetrics: Seq[StatefulOperatorCustomMetric] = {
     Seq(
+      // metrics around initial state
+      StatefulOperatorCustomSumMetric("initialStateProcessingTimeMs",
+        "Number of milliseconds taken to process all initial state"),
       // metrics around state variables
       StatefulOperatorCustomSumMetric("numValueStateVars", "Number of value state variables"),
       StatefulOperatorCustomSumMetric("numListStateVars", "Number of list state variables"),
       StatefulOperatorCustomSumMetric("numMapStateVars", "Number of map state variables"),
       StatefulOperatorCustomSumMetric("numDeletedStateVars", "Number of deleted state variables"),
       // metrics around timers
+      StatefulOperatorCustomSumMetric("timerProcessingTimeMs",
+        "Number of milliseconds taken to process all timers"),
       StatefulOperatorCustomSumMetric("numRegisteredTimers", "Number of registered timers"),
       StatefulOperatorCustomSumMetric("numDeletedTimers", "Number of deleted timers"),
       StatefulOperatorCustomSumMetric("numExpiredTimers", "Number of expired timers"),
@@ -382,12 +458,55 @@ case class TransformWithStateExec(
       batchId: Long,
       stateSchemaVersion: Int): List[StateSchemaValidationResult] = {
     assert(stateSchemaVersion >= 3)
-    val newColumnFamilySchemas = getColFamilySchemas()
+    val newSchemas = getColFamilySchemas()
     val stateSchemaDir = stateSchemaDirPath()
-    val stateSchemaFilePath = new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}")
-    List(StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
-      newColumnFamilySchemas.values.toList, session.sessionState, stateSchemaVersion,
-      schemaFilePath = Some(stateSchemaFilePath)))
+    val newStateSchemaFilePath =
+      new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}")
+    val metadataPath = new Path(getStateInfo.checkpointLocation, s"${getStateInfo.operatorId}")
+    val metadataReader = OperatorStateMetadataReader.createReader(
+      metadataPath, hadoopConf, operatorStateMetadataVersion, batchId)
+    val operatorStateMetadata = try {
+      metadataReader.read()
+    } catch {
+        // If this is the first time we are running the query, there will be no metadata
+        // and this error is expected. In this case, we return None.
+        case ex: Exception if batchId == 0 =>
+          None
+    }
+
+    val oldStateSchemaFilePath: Option[Path] = operatorStateMetadata match {
+      case Some(metadata) =>
+        metadata match {
+          case v2: OperatorStateMetadataV2 =>
+            Some(new Path(v2.stateStoreInfo.head.stateSchemaFilePath))
+          case _ => None
+        }
+      case None => None
+    }
+    List(StateSchemaCompatibilityChecker.
+      validateAndMaybeEvolveStateSchema(getStateInfo, hadoopConf,
+      newSchemas.values.toList, session.sessionState, stateSchemaVersion,
+      storeName = StateStoreId.DEFAULT_STORE_NAME,
+      oldSchemaFilePath = oldStateSchemaFilePath,
+      newSchemaFilePath = Some(newStateSchemaFilePath)))
+  }
+
+  /** Metadata of this stateful operator and its states stores. */
+  override def operatorStateMetadata(
+      stateSchemaPaths: List[String]): OperatorStateMetadata = {
+    val info = getStateInfo
+    val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
+    // stateSchemaFilePath should be populated at this point
+    val stateStoreInfo =
+      Array(StateStoreMetadataV2(
+        StateStoreId.DEFAULT_STORE_NAME, 0, info.numPartitions, stateSchemaPaths.head))
+
+    val operatorProperties = TransformWithStateOperatorProperties(
+      timeMode.toString,
+      outputMode.toString,
+      getStateVariableInfos().values.toList
+    )
+    OperatorStateMetadataV2(operatorInfo, stateStoreInfo, operatorProperties.json)
   }
 
   private def stateSchemaDirPath(): Path = {
@@ -396,8 +515,26 @@ case class TransformWithStateExec(
       new Path(getStateInfo.checkpointLocation,
         s"${getStateInfo.operatorId.toString}")
 
-    val storeNamePath = new Path(stateCheckpointPath, storeName)
-    new Path(new Path(storeNamePath, "_metadata"), "schema")
+    val stateSchemaPath = new Path(stateCheckpointPath, "_stateSchema")
+    val storeNamePath = new Path(stateSchemaPath, storeName)
+    storeNamePath
+  }
+
+  override def validateNewMetadata(
+      oldOperatorMetadata: OperatorStateMetadata,
+      newOperatorMetadata: OperatorStateMetadata): Unit = {
+    (oldOperatorMetadata, newOperatorMetadata) match {
+      case (
+        oldMetadataV2: OperatorStateMetadataV2,
+        newMetadataV2: OperatorStateMetadataV2) =>
+        val oldOperatorProps = TransformWithStateOperatorProperties.fromJson(
+          oldMetadataV2.operatorPropertiesJson)
+        val newOperatorProps = TransformWithStateOperatorProperties.fromJson(
+          newMetadataV2.operatorPropertiesJson)
+        TransformWithStateOperatorProperties.validateOperatorProperties(
+          oldOperatorProps, newOperatorProps)
+      case (_, _) =>
+    }
   }
 
   override protected def doExecute(): RDD[InternalRow] = {
@@ -427,6 +564,7 @@ case class TransformWithStateExec(
               DUMMY_VALUE_ROW_SCHEMA,
               NoPrefixKeyStateEncoderSpec(keyEncoder.schema),
               version = stateInfo.get.storeVersion,
+              stateStoreCkptId = stateInfo.get.getStateStoreCkptId(partitionId).map(_.head),
               useColumnFamilies = true,
               storeConf = storeConf,
               hadoopConf = hadoopConfBroadcast.value.value
@@ -501,7 +639,7 @@ case class TransformWithStateExec(
       hadoopConf = hadoopConfBroadcast.value.value,
       useMultipleValuesPerKey = true)
 
-    val store = stateStoreProvider.getStore(0)
+    val store = stateStoreProvider.getStore(0, None)
     val outputIterator = f(store)
     CompletionIterator[InternalRow, Iterator[InternalRow]](outputIterator.iterator, {
       stateStoreProvider.close()
@@ -539,6 +677,8 @@ case class TransformWithStateExec(
     statefulProcessor.init(outputMode, timeMode)
     processorHandle.setHandleState(StatefulProcessorHandleState.INITIALIZED)
 
+    val initialStateProcTimeMs = longMetric("initialStateProcessingTimeMs")
+    val initialStateStartTimeNs = System.nanoTime
     // Check if is first batch
     // Only process initial states for first batch
     if (processorHandle.getQueryInfo().getBatchId == 0) {
@@ -551,6 +691,7 @@ case class TransformWithStateExec(
           processInitialStateRows(keyRow.asInstanceOf[UnsafeRow], valueRowIter)
       }
     }
+    initialStateProcTimeMs += NANOSECONDS.toMillis(System.nanoTime - initialStateStartTimeNs)
 
     processDataWithPartition(childDataIterator, store, processorHandle)
   }
@@ -558,14 +699,10 @@ case class TransformWithStateExec(
   private def validateTimeMode(): Unit = {
     timeMode match {
       case ProcessingTime =>
-        if (batchTimestampMs.isEmpty) {
-          StateStoreErrors.missingTimeValues(timeMode.toString)
-        }
+        TransformWithStateVariableUtils.validateTimeMode(timeMode, batchTimestampMs)
 
       case EventTime =>
-        if (eventTimeWatermarkForEviction.isEmpty) {
-          StateStoreErrors.missingTimeValues(timeMode.toString)
-        }
+        TransformWithStateVariableUtils.validateTimeMode(timeMode, eventTimeWatermarkForEviction)
 
       case _ =>
     }
@@ -598,7 +735,8 @@ object TransformWithStateExec {
       queryRunId = UUID.randomUUID(),
       operatorId = 0,
       storeVersion = 0,
-      numPartitions = shufflePartitions
+      numPartitions = shufflePartitions,
+      stateStoreCkptIds = None
     )
 
     new TransformWithStateExec(
@@ -625,4 +763,3 @@ object TransformWithStateExec {
   }
 }
 // scalastyle:on argcount
-
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala
new file mode 100644
index 0000000000000..bc67cee57fef8
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/TransformWithStateVariableUtils.scala
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.streaming
+
+import org.json4s.DefaultFormats
+import org.json4s.JsonAST._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods
+import org.json4s.jackson.JsonMethods.{compact, render}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql.execution.streaming.StateVariableType.StateVariableType
+import org.apache.spark.sql.execution.streaming.state.StateStoreErrors
+import org.apache.spark.sql.streaming.TimeMode
+
+/**
+ * This file contains utility classes and functions for managing state variables in
+ * the operatorProperties field of the OperatorStateMetadata for TransformWithState.
+ * We use these utils to read and write state variable information for validation purposes.
+ */
+object TransformWithStateVariableUtils {
+  def getValueState(stateName: String, ttlEnabled: Boolean): TransformWithStateVariableInfo = {
+    TransformWithStateVariableInfo(stateName, StateVariableType.ValueState, ttlEnabled)
+  }
+
+  def getListState(stateName: String, ttlEnabled: Boolean): TransformWithStateVariableInfo = {
+    TransformWithStateVariableInfo(stateName, StateVariableType.ListState, ttlEnabled)
+  }
+
+  def getMapState(stateName: String, ttlEnabled: Boolean): TransformWithStateVariableInfo = {
+    TransformWithStateVariableInfo(stateName, StateVariableType.MapState, ttlEnabled)
+  }
+
+  def getTimerState(stateName: String): TransformWithStateVariableInfo = {
+    TransformWithStateVariableInfo(stateName, StateVariableType.TimerState, ttlEnabled = false)
+  }
+
+  def validateTimeMode(timeMode: TimeMode, timestampOpt: Option[Long]): Unit = {
+    if (timeMode != TimeMode.None() && timestampOpt.isEmpty) {
+      throw StateStoreErrors.missingTimeValues(timeMode.toString)
+    }
+  }
+}
+
+// Enum of possible State Variable types
+object StateVariableType extends Enumeration {
+  type StateVariableType = Value
+  val ValueState, ListState, MapState, TimerState = Value
+}
+
+case class TransformWithStateVariableInfo(
+    stateName: String,
+    stateVariableType: StateVariableType,
+    ttlEnabled: Boolean) {
+  def jsonValue: JValue = {
+    ("stateName" -> JString(stateName)) ~
+      ("stateVariableType" -> JString(stateVariableType.toString)) ~
+      ("ttlEnabled" -> JBool(ttlEnabled))
+  }
+
+  def json: String = {
+    compact(render(jsonValue))
+  }
+}
+
+object TransformWithStateVariableInfo {
+
+  def fromJson(json: String): TransformWithStateVariableInfo = {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val parsed = JsonMethods.parse(json).extract[Map[String, Any]]
+    fromMap(parsed)
+  }
+
+  def fromMap(map: Map[String, Any]): TransformWithStateVariableInfo = {
+    val stateName = map("stateName").asInstanceOf[String]
+    val stateVariableType = StateVariableType.withName(
+      map("stateVariableType").asInstanceOf[String])
+    val ttlEnabled = map("ttlEnabled").asInstanceOf[Boolean]
+    TransformWithStateVariableInfo(stateName, stateVariableType, ttlEnabled)
+  }
+}
+
+case class TransformWithStateOperatorProperties(
+    timeMode: String,
+    outputMode: String,
+    stateVariables: List[TransformWithStateVariableInfo]) {
+
+  def json: String = {
+    val stateVariablesJson = stateVariables.map(_.jsonValue)
+    val json =
+      ("timeMode" -> timeMode) ~
+        ("outputMode" -> outputMode) ~
+        ("stateVariables" -> stateVariablesJson)
+    compact(render(json))
+  }
+}
+
+object TransformWithStateOperatorProperties extends Logging {
+  def fromJson(json: String): TransformWithStateOperatorProperties = {
+    implicit val formats: DefaultFormats.type = DefaultFormats
+    val jsonMap = JsonMethods.parse(json).extract[Map[String, Any]]
+    TransformWithStateOperatorProperties(
+      jsonMap("timeMode").asInstanceOf[String],
+      jsonMap("outputMode").asInstanceOf[String],
+      jsonMap("stateVariables").asInstanceOf[List[Map[String, Any]]].map { stateVarMap =>
+        TransformWithStateVariableInfo.fromMap(stateVarMap)
+      }
+    )
+  }
+
+  // This function is to confirm that the operator properties and state variables have
+  // only changed in an acceptable way after query restart. If the properties have changed
+  // in an unacceptable way, this function will throw an exception.
+  def validateOperatorProperties(
+      oldOperatorProperties: TransformWithStateOperatorProperties,
+      newOperatorProperties: TransformWithStateOperatorProperties): Unit = {
+    if (oldOperatorProperties.timeMode != newOperatorProperties.timeMode) {
+      throw StateStoreErrors.invalidConfigChangedAfterRestart(
+        "timeMode", oldOperatorProperties.timeMode, newOperatorProperties.timeMode)
+    }
+
+    if (oldOperatorProperties.outputMode != newOperatorProperties.outputMode) {
+      throw StateStoreErrors.invalidConfigChangedAfterRestart(
+        "outputMode", oldOperatorProperties.outputMode, newOperatorProperties.outputMode)
+    }
+
+    val oldStateVariableInfos = oldOperatorProperties.stateVariables
+    val newStateVariableInfos = newOperatorProperties.stateVariables.map { stateVarInfo =>
+      stateVarInfo.stateName -> stateVarInfo
+    }.toMap
+    oldStateVariableInfos.foreach { oldInfo =>
+      val newInfo = newStateVariableInfos.get(oldInfo.stateName)
+      newInfo match {
+        case Some(stateVarInfo) =>
+          if (oldInfo.stateVariableType != stateVarInfo.stateVariableType) {
+            throw StateStoreErrors.invalidVariableTypeChange(
+              stateVarInfo.stateName,
+              oldInfo.stateVariableType.toString,
+              stateVarInfo.stateVariableType.toString
+            )
+          }
+        case None =>
+      }
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImpl.scala
index 63cac4a3b68cb..cd66bf99d4e15 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImpl.scala
@@ -17,8 +17,8 @@
 package org.apache.spark.sql.execution.streaming
 
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore}
 import org.apache.spark.sql.streaming.ValueState
 
@@ -29,13 +29,15 @@ import org.apache.spark.sql.streaming.ValueState
  * @param stateName - name of logical state partition
  * @param keyExprEnc - Spark SQL encoder for key
  * @param valEncoder - Spark SQL encoder for value
+ * @param metrics - metrics to be updated as part of stateful processing
  * @tparam S - data type of object that will be stored
  */
 class ValueStateImpl[S](
     store: StateStore,
     stateName: String,
     keyExprEnc: ExpressionEncoder[Any],
-    valEncoder: Encoder[S])
+    valEncoder: ExpressionEncoder[Any],
+    metrics: Map[String, SQLMetric] = Map.empty)
   extends ValueState[S] with Logging {
 
   private val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder, stateName)
@@ -63,7 +65,7 @@ class ValueStateImpl[S](
     val retRow = store.get(encodedGroupingKey, stateName)
 
     if (retRow != null) {
-      stateTypesEncoder.decodeValue(retRow)
+      stateTypesEncoder.decodeValue(retRow).asInstanceOf[S]
     } else {
       null.asInstanceOf[S]
     }
@@ -74,10 +76,12 @@ class ValueStateImpl[S](
     val encodedValue = stateTypesEncoder.encodeValue(newState)
     store.put(stateTypesEncoder.encodeGroupingKey(),
       encodedValue, stateName)
+    TWSMetricsUtils.incrementMetric(metrics, "numUpdatedStateRows")
   }
 
   /** Function to remove state for given key */
   override def clear(): Unit = {
     store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
+    TWSMetricsUtils.incrementMetric(metrics, "numRemovedStateRows")
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
index c6d11b155866b..87e4596f67309 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/ValueStateImplWithTTL.scala
@@ -16,9 +16,8 @@
  */
 package org.apache.spark.sql.execution.streaming
 
-import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.TransformWithStateKeyValueRowSchemaUtils._
 import org.apache.spark.sql.execution.streaming.state.{NoPrefixKeyStateEncoderSpec, StateStore}
 import org.apache.spark.sql.streaming.{TTLConfig, ValueState}
@@ -33,29 +32,31 @@ import org.apache.spark.sql.streaming.{TTLConfig, ValueState}
  * @param valEncoder - Spark SQL encoder for value
  * @param ttlConfig  - TTL configuration for values  stored in this state
  * @param batchTimestampMs - current batch processing timestamp.
+ * @param metrics - metrics to be updated as part of stateful processing
  * @tparam S - data type of object that will be stored
  */
 class ValueStateImplWithTTL[S](
     store: StateStore,
     stateName: String,
     keyExprEnc: ExpressionEncoder[Any],
-    valEncoder: Encoder[S],
+    valEncoder: ExpressionEncoder[Any],
     ttlConfig: TTLConfig,
-    batchTimestampMs: Long)
-  extends SingleKeyTTLStateImpl(
-    stateName, store, keyExprEnc, batchTimestampMs) with ValueState[S] {
+    batchTimestampMs: Long,
+    metrics: Map[String, SQLMetric] = Map.empty)
+  extends OneToOneTTLState(
+    stateName, store, keyExprEnc.schema, ttlConfig, batchTimestampMs, metrics) with ValueState[S] {
 
-  private val stateTypesEncoder = StateTypesEncoder(keyExprEnc, valEncoder,
-    stateName, hasTtl = true)
-  private val ttlExpirationMs =
-    StateTTL.calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
+  private val stateTypesEncoder =
+    StateTypesEncoder(keyExprEnc, valEncoder, stateName, hasTtl = true)
 
   initialize()
 
   private def initialize(): Unit = {
     store.createColFamilyIfAbsent(stateName,
-      keyExprEnc.schema, getValueSchemaWithTTL(valEncoder.schema, true),
-      NoPrefixKeyStateEncoderSpec(keyExprEnc.schema))
+      keyExprEnc.schema,
+      getValueSchemaWithTTL(valEncoder.schema, true),
+      NoPrefixKeyStateEncoderSpec(keyExprEnc.schema)
+    )
   }
 
   /** Function to check if state exists. Returns true if present and false otherwise */
@@ -74,10 +75,11 @@ class ValueStateImplWithTTL[S](
     val retRow = store.get(encodedGroupingKey, stateName)
 
     if (retRow != null) {
+      // Getting the 0th ordinal of the struct using valEncoder
       val resState = stateTypesEncoder.decodeValue(retRow)
 
       if (!stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        resState
+        resState.asInstanceOf[S]
       } else {
         null.asInstanceOf[S]
       }
@@ -88,30 +90,19 @@ class ValueStateImplWithTTL[S](
 
   /** Function to update and overwrite state associated with given key */
   override def update(newState: S): Unit = {
+    val encodedKey = stateTypesEncoder.encodeGroupingKey()
+
+    val ttlExpirationMs = StateTTL
+      .calculateExpirationTimeForDuration(ttlConfig.ttlDuration, batchTimestampMs)
     val encodedValue = stateTypesEncoder.encodeValue(newState, ttlExpirationMs)
-    val serializedGroupingKey = stateTypesEncoder.encodeGroupingKey()
-    store.put(serializedGroupingKey,
-      encodedValue, stateName)
-    upsertTTLForStateKey(ttlExpirationMs, serializedGroupingKey)
+
+    updatePrimaryAndSecondaryIndices(encodedKey, encodedValue, ttlExpirationMs)
   }
 
   /** Function to remove state for given key */
   override def clear(): Unit = {
-    store.remove(stateTypesEncoder.encodeGroupingKey(), stateName)
-    clearTTLState()
-  }
-
-  def clearIfExpired(groupingKey: UnsafeRow): Long = {
-    val retRow = store.get(groupingKey, stateName)
-
-    var result = 0L
-    if (retRow != null) {
-      if (stateTypesEncoder.isExpired(retRow, batchTimestampMs)) {
-        store.remove(groupingKey, stateName)
-        result = 1L
-      }
-    }
-    result
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    clearAllStateForElementKey(groupingKey)
   }
 
   /*
@@ -130,7 +121,7 @@ class ValueStateImplWithTTL[S](
     val retRow = store.get(encodedGroupingKey, stateName)
 
     if (retRow != null) {
-      val resState = stateTypesEncoder.decodeValue(retRow)
+      val resState = stateTypesEncoder.decodeValue(retRow).asInstanceOf[S]
       Some(resState)
     } else {
       None
@@ -148,18 +139,24 @@ class ValueStateImplWithTTL[S](
     // ttlExpiration
     if (retRow != null) {
       val ttlExpiration = stateTypesEncoder.decodeTtlExpirationMs(retRow)
-      ttlExpiration.map(expiration => (stateTypesEncoder.decodeValue(retRow), expiration))
+      ttlExpiration.map(expiration => (stateTypesEncoder.decodeValue(retRow).asInstanceOf[S],
+        expiration))
     } else {
       None
     }
   }
 
   /**
-   * Get all ttl values stored in ttl state for current implicit
-   * grouping key.
+   * Get the TTL value stored in TTL state for the current implicit grouping key,
+   * if it exists.
    */
-  private[sql] def getValuesInTTLState(): Iterator[Long] = {
-    getValuesInTTLState(stateTypesEncoder.encodeGroupingKey())
+  private[sql] def getValueInTTLState(): Option[Long] = {
+    val groupingKey = stateTypesEncoder.encodeGroupingKey()
+    val ttlRowsForGroupingKey = getTTLRows().filter(_.elementKey == groupingKey).toSeq
+
+    assert(ttlRowsForGroupingKey.size <= 1, "Multiple TTLRows found for grouping key " +
+      s"$groupingKey. Expected at most 1. Found: ${ttlRowsForGroupingKey.mkString(", ")}.")
+    ttlRowsForGroupingKey.headOption.map(_.expirationMs)
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
index 54c47ec4e6ed8..7228767c4d18a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/WatermarkTracker.scala
@@ -17,13 +17,14 @@
 
 package org.apache.spark.sql.execution.streaming
 
-import java.util.Locale
+import java.util.{Locale, UUID}
 
 import scala.collection.mutable
 
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.sql.RuntimeConfig
+import org.apache.spark.sql.catalyst.plans.logical.{EventTimeWatermark, LogicalPlan}
 import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.internal.SQLConf
 
@@ -79,8 +80,21 @@ case object MaxWatermark extends MultipleWatermarkPolicy {
 }
 
 /** Tracks the watermark value of a streaming query based on a given `policy` */
-case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
-  private val operatorToWatermarkMap = mutable.HashMap[Int, Long]()
+class WatermarkTracker(
+    policy: MultipleWatermarkPolicy,
+    initialPlan: LogicalPlan) extends Logging {
+
+  private val operatorToWatermarkMap: mutable.Map[UUID, Option[Long]] = {
+    val map = mutable.HashMap[UUID, Option[Long]]()
+    val watermarkOperators = initialPlan.collect {
+      case e: EventTimeWatermark => e
+    }
+    watermarkOperators.foreach { op =>
+      map.put(op.nodeId, None)
+    }
+    map
+  }
+
   private var globalWatermarkMs: Long = 0
 
   def setWatermark(newWatermarkMs: Long): Unit = synchronized {
@@ -93,26 +107,33 @@ case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
     }
     if (watermarkOperators.isEmpty) return
 
-    watermarkOperators.zipWithIndex.foreach {
-      case (e, index) if e.eventTimeStats.value.count > 0 =>
-        logDebug(s"Observed event time stats $index: ${e.eventTimeStats.value}")
+    watermarkOperators.foreach {
+      case e if e.eventTimeStats.value.count > 0 =>
+        logDebug(s"Observed event time stats ${e.nodeId}: ${e.eventTimeStats.value}")
+
+        if (!operatorToWatermarkMap.isDefinedAt(e.nodeId)) {
+          throw new IllegalStateException(s"Unknown watermark node ID: ${e.nodeId}, known IDs: " +
+            s"${operatorToWatermarkMap.keys.mkString("[", ",", "]")}")
+        }
+
         val newWatermarkMs = e.eventTimeStats.value.max - e.delayMs
-        val prevWatermarkMs = operatorToWatermarkMap.get(index)
+        val prevWatermarkMs = operatorToWatermarkMap(e.nodeId)
         if (prevWatermarkMs.isEmpty || newWatermarkMs > prevWatermarkMs.get) {
-          operatorToWatermarkMap.put(index, newWatermarkMs)
+          operatorToWatermarkMap.put(e.nodeId, Some(newWatermarkMs))
         }
 
-      // Populate 0 if we haven't seen any data yet for this watermark node.
-      case (_, index) =>
-        if (!operatorToWatermarkMap.isDefinedAt(index)) {
-          operatorToWatermarkMap.put(index, 0)
+      case e =>
+        if (!operatorToWatermarkMap.isDefinedAt(e.nodeId)) {
+          throw new IllegalStateException(s"Unknown watermark node ID: ${e.nodeId}, known IDs: " +
+            s"${operatorToWatermarkMap.keys.mkString("[", ",", "]")}")
         }
     }
 
     // Update the global watermark accordingly to the chosen policy. To find all available policies
     // and their semantics, please check the comments of
     // `org.apache.spark.sql.execution.streaming.MultipleWatermarkPolicy` implementations.
-    val chosenGlobalWatermark = policy.chooseGlobalWatermark(operatorToWatermarkMap.values.toSeq)
+    val chosenGlobalWatermark = policy.chooseGlobalWatermark(
+      operatorToWatermarkMap.values.map(_.getOrElse(0L)).toSeq)
     if (chosenGlobalWatermark > globalWatermarkMs) {
       logInfo(log"Updating event-time watermark from " +
         log"${MDC(GLOBAL_WATERMARK, globalWatermarkMs)} " +
@@ -124,10 +145,14 @@ case class WatermarkTracker(policy: MultipleWatermarkPolicy) extends Logging {
   }
 
   def currentWatermark: Long = synchronized { globalWatermarkMs }
+
+  private[sql] def watermarkMap: Map[UUID, Option[Long]] = synchronized {
+    operatorToWatermarkMap.toMap
+  }
 }
 
 object WatermarkTracker {
-  def apply(conf: RuntimeConfig): WatermarkTracker = {
+  def apply(conf: RuntimeConfig, initialPlan: LogicalPlan): WatermarkTracker = {
     // If the session has been explicitly configured to use non-default policy then use it,
     // otherwise use the default `min` policy as thats the safe thing to do.
     // When recovering from a checkpoint location, it is expected that the `conf` will already
@@ -135,7 +160,8 @@ object WatermarkTracker {
     // saved in the checkpoint (e.g., old checkpoints), then the default `min` policy is enforced
     // through defaults specified in OffsetSeqMetadata.setSessionConf().
     val policyName = conf.get(
-      SQLConf.STREAMING_MULTIPLE_WATERMARK_POLICY, MultipleWatermarkPolicy.DEFAULT_POLICY_NAME)
-    new WatermarkTracker(MultipleWatermarkPolicy(policyName))
+      SQLConf.STREAMING_MULTIPLE_WATERMARK_POLICY.key,
+      MultipleWatermarkPolicy.DEFAULT_POLICY_NAME)
+    new WatermarkTracker(MultipleWatermarkPolicy(policyName), initialPlan)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
index 633aaf2682dbb..2f75150f7a2c2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousExecution.scala
@@ -334,7 +334,8 @@ class ContinuousExecution(
           epochUpdateThread.interrupt()
           epochUpdateThread.join()
           // The following line must be the last line because it may fail if SparkContext is stopped
-          sparkSession.sparkContext.cancelJobGroup(runId.toString)
+          sparkSession.sparkContext.cancelJobGroup(runId.toString,
+            s"Continuous execution finished for query $prettyIdString")
         }
       }
       Thread.interrupted()
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
index 420c3e3be16d6..273ffa6aefb7b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/continuous/ContinuousTextSocketSource.scala
@@ -32,8 +32,9 @@ import org.apache.spark.SparkEnv
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{HOST, PORT}
 import org.apache.spark.rpc.RpcEndpointRef
+import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.encoderFor
 import org.apache.spark.sql.catalyst.expressions.{UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.connector.read.InputPartition
 import org.apache.spark.sql.connector.read.streaming.{ContinuousPartitionReader, ContinuousPartitionReaderFactory, ContinuousStream, Offset, PartitionOffset}
@@ -57,8 +58,7 @@ class TextSocketContinuousStream(
 
   implicit val defaultFormats: DefaultFormats = DefaultFormats
 
-  private val encoder = ExpressionEncoder.tuple(ExpressionEncoder[String](),
-    ExpressionEncoder[Timestamp]())
+  private val encoder = encoderFor(Encoders.tuple(Encoders.STRING, Encoders.TIMESTAMP))
 
   @GuardedBy("this")
   private var socket: Socket = _
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
index bbbe28ec7ab11..c0956a62e59fb 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterTable.scala
@@ -19,6 +19,9 @@ package org.apache.spark.sql.execution.streaming.sources
 
 import java.util
 
+import scala.util.control.NonFatal
+
+import org.apache.spark.{SparkException, SparkThrowable}
 import org.apache.spark.sql.{ForeachWriter, SparkSession}
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -146,6 +149,9 @@ class ForeachDataWriter[T](
     try {
       writer.process(rowConverter(record))
     } catch {
+      case NonFatal(e) if !e.isInstanceOf[SparkThrowable] =>
+        errorOrNull = e
+        throw ForeachUserFuncException(e)
       case t: Throwable =>
         errorOrNull = t
         throw t
@@ -172,3 +178,12 @@ class ForeachDataWriter[T](
  * An empty [[WriterCommitMessage]]. [[ForeachWriter]] implementations have no global coordination.
  */
 case object ForeachWriterCommitMessage extends WriterCommitMessage
+
+/**
+ * Exception that wraps the exception thrown in the user provided function in Foreach sink.
+ */
+private[sql] case class ForeachUserFuncException(cause: Throwable)
+  extends SparkException(
+    errorClass = "FOREACH_USER_FUNCTION_ERROR",
+    messageParameters = Map("reason" -> Option(cause.getMessage).getOrElse("")),
+    cause = cause)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
index 7affdad79632b..ae06e82335b12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/HDFSBackedStateStoreProvider.scala
@@ -222,6 +222,10 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       StateStoreMetrics(mapToUpdate.size(), metricsFromProvider("memoryUsedBytes"), customMetrics)
     }
 
+    override def getStateStoreCheckpointInfo(): StateStoreCheckpointInfo = {
+      StateStoreCheckpointInfo(id.partitionId, newVersion, None, None)
+    }
+
     /**
      * Whether all updates have been committed
      */
@@ -255,7 +259,12 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   }
 
   /** Get the state store for making updates to create a new `version` of the store. */
-  override def getStore(version: Long): StateStore = {
+  override def getStore(version: Long, uniqueId: Option[String] = None): StateStore = {
+    if (uniqueId.isDefined) {
+      throw QueryExecutionErrors.cannotLoadStore(new SparkException(
+        "HDFSBackedStateStoreProvider does not support checkpointFormatVersion > 1 " +
+        "but a state store checkpointID is passed in"))
+    }
     val newMap = getLoadedMapForStore(version)
     logInfo(log"Retrieved version ${MDC(LogKeys.STATE_STORE_VERSION, version)} " +
       log"of ${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for update")
@@ -263,7 +272,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
   }
 
   /** Get the state store for reading to specific `version` of the store. */
-  override def getReadStore(version: Long): ReadStateStore = {
+  override def getReadStore(version: Long, stateStoreCkptId: Option[String]): ReadStateStore = {
     val newMap = getLoadedMapForStore(version)
     logInfo(log"Retrieved version ${MDC(LogKeys.STATE_STORE_VERSION, version)} of " +
       log"${MDC(LogKeys.STATE_STORE_PROVIDER, HDFSBackedStateStoreProvider.this)} for readonly")
@@ -282,8 +291,13 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       newMap
     }
     catch {
-      case e: SparkException if e.getErrorClass.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
         throw e
+      case e: OutOfMemoryError =>
+        throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
+          stateStoreId.toString,
+          "HDFS_STORE_PROVIDER",
+          e)
       case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
     }
   }
@@ -325,6 +339,11 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       storeConf: StateStoreConf,
       hadoopConf: Configuration,
       useMultipleValuesPerKey: Boolean = false): Unit = {
+    assert(
+      !storeConf.enableStateStoreCheckpointIds,
+      "HDFS State Store Provider doesn't support checkpointFormatVersion >= 2 " +
+        s"checkpointFormatVersion ${storeConf.sqlConf.stateStoreCheckpointFormatVersion}")
+
     this.stateStoreId_ = stateStoreId
     this.keySchema = keySchema
     this.valueSchema = valueSchema
@@ -584,7 +603,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       input = decompressStream(sourceStream)
       var eof = false
 
-      while(!eof) {
+      while (!eof) {
         val keySize = input.readInt()
         if (keySize == -1) {
           eof = true
@@ -637,7 +656,7 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       rawOutput = fm.createAtomic(targetFile, overwriteIfPossible = true)
       output = compressStream(rawOutput)
       val iter = map.iterator()
-      while(iter.hasNext) {
+      while (iter.hasNext) {
         val entry = iter.next()
         val keyBytes = entry.key.getBytes()
         val valueBytes = entry.value.getBytes()
@@ -949,6 +968,11 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
       newMap
     }
     catch {
+      case e: OutOfMemoryError =>
+        throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
+          stateStoreId.toString,
+          "HDFS_STORE_PROVIDER",
+          e)
       case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
     }
   }
@@ -981,8 +1005,16 @@ private[sql] class HDFSBackedStateStoreProvider extends StateStoreProvider with
     result
   }
 
-  override def getStateStoreChangeDataReader(startVersion: Long, endVersion: Long):
+  override def getStateStoreChangeDataReader(
+      startVersion: Long,
+      endVersion: Long,
+      colFamilyNameOpt: Option[String] = None):
     StateStoreChangeDataReader = {
+    // Multiple column families are not supported with HDFSBackedStateStoreProvider
+    if (colFamilyNameOpt.isDefined) {
+      throw StateStoreErrors.multipleColumnFamiliesNotSupported(providerName)
+    }
+
     new HDFSBackedStateStoreChangeDataReader(fm, baseDir, startVersion, endVersion,
       CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec),
       keySchema, valueSchema)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
index dcea29085bf2b..aa2f332afeff4 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadata.scala
@@ -20,13 +20,20 @@ package org.apache.spark.sql.execution.streaming.state
 import java.io.{BufferedReader, InputStreamReader}
 import java.nio.charset.StandardCharsets
 
+import scala.reflect.ClassTag
+
 import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FSDataOutputStream, Path}
+import org.apache.hadoop.fs.{FSDataInputStream, FSDataOutputStream, Path, PathFilter}
 import org.json4s.{Formats, NoTypeHints}
 import org.json4s.jackson.Serialization
 
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
-import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MetadataVersionUtil}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.execution.datasources.v2.state.StateDataSourceErrors
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, CommitLog, MetadataVersionUtil, OffsetSeqLog, StateStoreWriter}
+import org.apache.spark.sql.execution.streaming.CheckpointFileManager.CancellableFSDataOutputStream
+import org.apache.spark.sql.execution.streaming.StreamingCheckpointConstants.{DIR_NAME_COMMITS, DIR_NAME_OFFSETS}
+import org.apache.spark.sql.execution.streaming.state.OperatorStateMetadataUtils.{OperatorStateMetadataReader, OperatorStateMetadataWriter}
 
 /**
  * Metadata for a state store instance.
@@ -40,6 +47,21 @@ trait StateStoreMetadata {
 case class StateStoreMetadataV1(storeName: String, numColsPrefixKey: Int, numPartitions: Int)
   extends StateStoreMetadata
 
+case class StateStoreMetadataV2(
+    storeName: String,
+    numColsPrefixKey: Int,
+    numPartitions: Int,
+    stateSchemaFilePath: String)
+  extends StateStoreMetadata with Serializable
+
+object StateStoreMetadataV2 {
+  private implicit val formats: Formats = Serialization.formats(NoTypeHints)
+
+  @scala.annotation.nowarn
+  private implicit val manifest = Manifest
+    .classType[StateStoreMetadataV2](implicitly[ClassTag[StateStoreMetadataV2]].runtimeClass)
+}
+
 /**
  * Information about a stateful operator.
  */
@@ -51,7 +73,10 @@ trait OperatorInfo {
 case class OperatorInfoV1(operatorId: Long, operatorName: String) extends OperatorInfo
 
 trait OperatorStateMetadata {
+
   def version: Int
+
+  def operatorInfo: OperatorInfo
 }
 
 case class OperatorStateMetadataV1(
@@ -60,12 +85,61 @@ case class OperatorStateMetadataV1(
   override def version: Int = 1
 }
 
-object OperatorStateMetadataUtils {
+case class OperatorStateMetadataV2(
+    operatorInfo: OperatorInfoV1,
+    stateStoreInfo: Array[StateStoreMetadataV2],
+    operatorPropertiesJson: String) extends OperatorStateMetadata {
+  override def version: Int = 2
+}
+
+object OperatorStateMetadataUtils extends Logging {
+
+  sealed trait OperatorStateMetadataReader {
+    def version: Int
+
+    def read(): Option[OperatorStateMetadata]
+  }
+
+  sealed trait OperatorStateMetadataWriter {
+    def version: Int
+    def write(operatorMetadata: OperatorStateMetadata): Unit
+  }
 
   private implicit val formats: Formats = Serialization.formats(NoTypeHints)
 
-  def metadataFilePath(stateCheckpointPath: Path): Path =
-    new Path(new Path(stateCheckpointPath, "_metadata"), "metadata")
+  def readMetadata(
+      inputStream: FSDataInputStream,
+      expectedVersion: Int): Option[OperatorStateMetadata] = {
+    val inputReader =
+      new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))
+    try {
+      val versionStr = inputReader.readLine()
+      val version = MetadataVersionUtil.validateVersion(versionStr, 2)
+      if (version != expectedVersion) {
+        throw new IllegalArgumentException(s"Expected version $expectedVersion, but found $version")
+      }
+      Some(deserialize(version, inputReader))
+    } finally {
+      inputStream.close()
+    }
+  }
+
+  def writeMetadata(
+      outputStream: CancellableFSDataOutputStream,
+      operatorMetadata: OperatorStateMetadata,
+      metadataFilePath: Path): Unit = {
+    try {
+      outputStream.write(s"v${operatorMetadata.version}\n".getBytes(StandardCharsets.UTF_8))
+      OperatorStateMetadataUtils.serialize(outputStream, operatorMetadata)
+      outputStream.close()
+    } catch {
+      case e: Throwable =>
+        logError(
+          log"Fail to write state metadata file to ${MDC(LogKeys.META_FILE, metadataFilePath)}", e)
+        outputStream.cancel()
+        throw e
+    }
+  }
 
   def deserialize(
       version: Int,
@@ -73,6 +147,8 @@ object OperatorStateMetadataUtils {
     version match {
       case 1 =>
         Serialization.read[OperatorStateMetadataV1](in)
+      case 2 =>
+        Serialization.read[OperatorStateMetadataV2](in)
 
       case _ =>
         throw new IllegalArgumentException(s"Failed to deserialize operator metadata with " +
@@ -86,62 +162,317 @@ object OperatorStateMetadataUtils {
     operatorStateMetadata.version match {
       case 1 =>
         Serialization.write(operatorStateMetadata.asInstanceOf[OperatorStateMetadataV1], out)
-
+      case 2 =>
+        Serialization.write(operatorStateMetadata.asInstanceOf[OperatorStateMetadataV2], out)
       case _ =>
         throw new IllegalArgumentException(s"Failed to serialize operator metadata with " +
           s"version=${operatorStateMetadata.version}")
     }
   }
+
+  def getLastOffsetBatch(session: SparkSession, checkpointLocation: String): Long = {
+    val offsetLog = new OffsetSeqLog(session,
+      new Path(checkpointLocation, DIR_NAME_OFFSETS).toString)
+    offsetLog.getLatest().map(_._1).getOrElse(throw
+      StateDataSourceErrors.offsetLogUnavailable(0, checkpointLocation))
+  }
+
+  def getLastCommittedBatch(session: SparkSession, checkpointLocation: String): Option[Long] = {
+    val commitLog = new CommitLog(session, new Path(checkpointLocation, DIR_NAME_COMMITS).toString)
+    commitLog.getLatest().map(_._1)
+  }
+}
+
+object OperatorStateMetadataReader {
+  def createReader(
+      stateCheckpointPath: Path,
+      hadoopConf: Configuration,
+      version: Int,
+      batchId: Long): OperatorStateMetadataReader = {
+    version match {
+      case 1 =>
+        new OperatorStateMetadataV1Reader(stateCheckpointPath, hadoopConf)
+      case 2 =>
+        new OperatorStateMetadataV2Reader(stateCheckpointPath, hadoopConf, batchId)
+      case _ =>
+        throw new IllegalArgumentException(s"Failed to create reader for operator metadata " +
+          s"with version=$version")
+    }
+  }
+}
+
+object OperatorStateMetadataWriter {
+  def createWriter(
+      stateCheckpointPath: Path,
+      hadoopConf: Configuration,
+      version: Int,
+      currentBatchId: Option[Long] = None): OperatorStateMetadataWriter = {
+    version match {
+      case 1 =>
+        new OperatorStateMetadataV1Writer(stateCheckpointPath, hadoopConf)
+      case 2 =>
+        if (currentBatchId.isEmpty) {
+          throw new IllegalArgumentException("currentBatchId is required for version 2")
+        }
+        new OperatorStateMetadataV2Writer(stateCheckpointPath, hadoopConf, currentBatchId.get)
+      case _ =>
+          throw new IllegalArgumentException(s"Failed to create writer for operator metadata " +
+          s"with version=$version")
+    }
+  }
+}
+
+object OperatorStateMetadataV1 {
+  def metadataFilePath(stateCheckpointPath: Path): Path =
+    new Path(new Path(stateCheckpointPath, "_metadata"), "metadata")
+}
+
+object OperatorStateMetadataV2 {
+  private implicit val formats: Formats = Serialization.formats(NoTypeHints)
+
+  @scala.annotation.nowarn
+  private implicit val manifest = Manifest
+    .classType[OperatorStateMetadataV2](implicitly[ClassTag[OperatorStateMetadataV2]].runtimeClass)
+
+  def metadataDirPath(stateCheckpointPath: Path): Path =
+    new Path(new Path(stateCheckpointPath, "_metadata"), "v2")
+
+  def metadataFilePath(stateCheckpointPath: Path, currentBatchId: Long): Path =
+    new Path(metadataDirPath(stateCheckpointPath), currentBatchId.toString)
 }
 
 /**
  * Write OperatorStateMetadata into the state checkpoint directory.
  */
-class OperatorStateMetadataWriter(stateCheckpointPath: Path, hadoopConf: Configuration)
-  extends Logging {
+class OperatorStateMetadataV1Writer(
+    stateCheckpointPath: Path,
+    hadoopConf: Configuration)
+  extends OperatorStateMetadataWriter with Logging {
 
-  private val metadataFilePath = OperatorStateMetadataUtils.metadataFilePath(stateCheckpointPath)
+  private val metadataFilePath = OperatorStateMetadataV1.metadataFilePath(stateCheckpointPath)
 
   private lazy val fm = CheckpointFileManager.create(stateCheckpointPath, hadoopConf)
 
+  override def version: Int = 1
+
   def write(operatorMetadata: OperatorStateMetadata): Unit = {
     if (fm.exists(metadataFilePath)) return
 
     fm.mkdirs(metadataFilePath.getParent)
     val outputStream = fm.createAtomic(metadataFilePath, overwriteIfPossible = false)
-    try {
-      outputStream.write(s"v${operatorMetadata.version}\n".getBytes(StandardCharsets.UTF_8))
-      OperatorStateMetadataUtils.serialize(outputStream, operatorMetadata)
-      outputStream.close()
-    } catch {
-      case e: Throwable =>
-        logError(
-          log"Fail to write state metadata file to ${MDC(LogKeys.META_FILE, metadataFilePath)}", e)
-        outputStream.cancel()
-        throw e
-    }
+    OperatorStateMetadataUtils.writeMetadata(outputStream, operatorMetadata, metadataFilePath)
   }
 }
 
 /**
- * Read OperatorStateMetadata from the state checkpoint directory.
+ * Read OperatorStateMetadata from the state checkpoint directory. This class will only be
+ * used to read OperatorStateMetadataV1.
+ * OperatorStateMetadataV2 will be read by the OperatorStateMetadataLog.
  */
-class OperatorStateMetadataReader(stateCheckpointPath: Path, hadoopConf: Configuration) {
+class OperatorStateMetadataV1Reader(
+    stateCheckpointPath: Path,
+    hadoopConf: Configuration) extends OperatorStateMetadataReader {
+  override def version: Int = 1
 
-  private val metadataFilePath = OperatorStateMetadataUtils.metadataFilePath(stateCheckpointPath)
+  private val metadataFilePath = OperatorStateMetadataV1.metadataFilePath(stateCheckpointPath)
 
   private lazy val fm = CheckpointFileManager.create(stateCheckpointPath, hadoopConf)
 
-  def read(): OperatorStateMetadata = {
+  def read(): Option[OperatorStateMetadata] = {
     val inputStream = fm.open(metadataFilePath)
-    val inputReader =
-      new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8))
+    OperatorStateMetadataUtils.readMetadata(inputStream, version)
+  }
+}
+
+class OperatorStateMetadataV2Writer(
+    stateCheckpointPath: Path,
+    hadoopConf: Configuration,
+    currentBatchId: Long) extends OperatorStateMetadataWriter {
+
+  private val metadataFilePath = OperatorStateMetadataV2.metadataFilePath(
+    stateCheckpointPath, currentBatchId)
+
+  private lazy val fm = CheckpointFileManager.create(stateCheckpointPath, hadoopConf)
+
+  override def version: Int = 2
+
+  override def write(operatorMetadata: OperatorStateMetadata): Unit = {
+    if (fm.exists(metadataFilePath)) return
+
+    fm.mkdirs(metadataFilePath.getParent)
+    val outputStream = fm.createAtomic(metadataFilePath, overwriteIfPossible = false)
+    OperatorStateMetadataUtils.writeMetadata(outputStream, operatorMetadata, metadataFilePath)
+  }
+}
+
+class OperatorStateMetadataV2Reader(
+    stateCheckpointPath: Path,
+    hadoopConf: Configuration,
+    batchId: Long) extends OperatorStateMetadataReader {
+
+  // Check that the requested batchId is available in the checkpoint directory
+  val baseCheckpointDir = stateCheckpointPath.getParent.getParent
+  val lastAvailOffset = listOffsets(baseCheckpointDir).lastOption.getOrElse(-1L)
+  if (batchId > lastAvailOffset) {
+    throw StateDataSourceErrors.failedToReadOperatorMetadata(baseCheckpointDir.toString, batchId)
+  }
+
+  private val metadataDirPath = OperatorStateMetadataV2.metadataDirPath(stateCheckpointPath)
+  private lazy val fm = CheckpointFileManager.create(metadataDirPath, hadoopConf)
+
+  fm.mkdirs(metadataDirPath.getParent)
+
+  override def version: Int = 2
+
+  // List the available offsets in the offset directory
+  private def listOffsets(baseCheckpointDir: Path): Array[Long] = {
+    val offsetLog = new Path(baseCheckpointDir, DIR_NAME_OFFSETS)
+    val fm = CheckpointFileManager.create(offsetLog, hadoopConf)
+    if (!fm.exists(offsetLog)) {
+      return Array.empty
+    }
+    fm.list(offsetLog)
+      .filter(f => !f.getPath.getName.startsWith(".")) // ignore hidden files
+      .map(_.getPath.getName.toLong).sorted
+  }
+
+  // List the available batches in the operator metadata directory
+  private def listOperatorMetadataBatches(): Array[Long] = {
+    if (!fm.exists(metadataDirPath)) {
+      return Array.empty
+    }
+    fm.list(metadataDirPath).map(_.getPath.getName.toLong).sorted
+  }
+
+  override def read(): Option[OperatorStateMetadata] = {
+    val batches = listOperatorMetadataBatches()
+    val lastBatchId = batches.filter(_ <= batchId).lastOption
+    if (lastBatchId.isEmpty) {
+      throw StateDataSourceErrors.failedToReadOperatorMetadata(stateCheckpointPath.toString,
+        batchId)
+    } else {
+      val metadataFilePath = OperatorStateMetadataV2.metadataFilePath(
+        stateCheckpointPath, lastBatchId.get)
+      val inputStream = fm.open(metadataFilePath)
+      OperatorStateMetadataUtils.readMetadata(inputStream, version)
+    }
+  }
+}
+
+/**
+ * A helper class to manage the metadata files for the operator state checkpoint.
+ * This class is used to manage the metadata files for OperatorStateMetadataV2, and
+ * provides utils to purge the oldest files such that we only keep the metadata files
+ * for which a commit log is present
+ * @param checkpointLocation The root path of the checkpoint directory
+ * @param sparkSession The sparkSession that is used to access the hadoopConf
+ * @param stateStoreWriter The operator that this fileManager is being created for
+ */
+class OperatorStateMetadataV2FileManager(
+    checkpointLocation: Path,
+    sparkSession: SparkSession,
+    stateStoreWriter: StateStoreWriter) extends Logging {
+
+  private val hadoopConf = sparkSession.sessionState.newHadoopConf()
+  private val stateCheckpointPath = new Path(checkpointLocation, "state")
+  private val stateOpIdPath = new Path(
+    stateCheckpointPath, stateStoreWriter.getStateInfo.operatorId.toString)
+  private val commitLog =
+    new CommitLog(sparkSession, new Path(checkpointLocation, "commits").toString)
+  private val stateSchemaPath = stateStoreWriter.stateSchemaDirPath()
+  private val metadataDirPath = OperatorStateMetadataV2.metadataDirPath(stateOpIdPath)
+  private lazy val fm = CheckpointFileManager.create(metadataDirPath, hadoopConf)
+
+  protected def isBatchFile(path: Path): Boolean = {
     try {
-      val versionStr = inputReader.readLine()
-      val version = MetadataVersionUtil.validateVersion(versionStr, 1)
-      OperatorStateMetadataUtils.deserialize(version, inputReader)
-    } finally {
-      inputStream.close()
+      path.getName.toLong
+      true
+    } catch {
+      case _: NumberFormatException => false
     }
   }
+
+  /**
+   * A `PathFilter` to filter only batch files
+   */
+  protected val batchFilesFilter: PathFilter = (path: Path) => isBatchFile(path)
+
+  private def pathToBatchId(path: Path): Long = {
+    path.getName.toLong
+  }
+
+  def purgeMetadataFiles(): Unit = {
+    val thresholdBatchId = findThresholdBatchId()
+    if (thresholdBatchId != 0) {
+      val earliestBatchIdKept = deleteMetadataFiles(thresholdBatchId)
+      // we need to delete everything from 0 to (earliestBatchIdKept - 1), inclusive
+      deleteSchemaFiles(earliestBatchIdKept - 1)
+    }
+  }
+
+  // We only want to keep the metadata and schema files for which the commit
+  // log is present, so we will delete any file that precedes the batch for the oldest
+  // commit log
+  private def findThresholdBatchId(): Long = {
+    commitLog.listBatchesOnDisk.headOption.getOrElse(0L)
+  }
+
+  private def deleteSchemaFiles(thresholdBatchId: Long): Unit = {
+    val schemaFiles = fm.list(stateSchemaPath).sorted.map(_.getPath)
+    val filesBeforeThreshold = schemaFiles.filter { path =>
+      val batchIdInPath = path.getName.split("_").head.toLong
+      batchIdInPath <= thresholdBatchId
+    }
+    filesBeforeThreshold.foreach { path =>
+      fm.delete(path)
+    }
+  }
+
+  // Deletes all metadata files that are below a thresholdBatchId, except
+  // for the latest metadata file so that we have at least 1 metadata and schema
+  // file at all times per each stateful query
+  // Returns the batchId of the earliest schema file we want to keep
+  private def deleteMetadataFiles(thresholdBatchId: Long): Long = {
+    val metadataFiles = fm.list(metadataDirPath, batchFilesFilter)
+
+    if (metadataFiles.isEmpty) {
+      return -1L // No files to delete
+    }
+
+    // get all the metadata files for which we don't have commit logs
+    val sortedBatchIds = metadataFiles
+      .map(file => pathToBatchId(file.getPath))
+      .filter(_ <= thresholdBatchId)
+      .sorted
+
+    if (sortedBatchIds.isEmpty) {
+      return -1L
+    }
+
+    // we don't want to delete the batchId right before the last one
+    val latestBatchId = sortedBatchIds.last
+
+    metadataFiles.foreach { batchFile =>
+      val batchId = pathToBatchId(batchFile.getPath)
+      if (batchId < latestBatchId) {
+        fm.delete(batchFile.getPath)
+      }
+    }
+    val latestMetadata = OperatorStateMetadataReader.createReader(
+      stateOpIdPath,
+      hadoopConf,
+      2,
+      latestBatchId
+    ).read()
+
+    // find the batchId of the earliest schema file we need to keep
+    val earliestBatchToKeep = latestMetadata match {
+      case Some(OperatorStateMetadataV2(_, stateStoreInfo, _)) =>
+        val schemaFilePath = stateStoreInfo.head.stateSchemaFilePath
+        new Path(schemaFilePath).getName.split("_").head.toLong
+      case _ => 0
+    }
+
+    earliestBatchToKeep
+  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
index 2f3f5a57261f6..f8e9885cef14e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDB.scala
@@ -19,11 +19,14 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.io.File
 import java.util.Locale
-import java.util.concurrent.TimeUnit
+import java.util.Set
+import java.util.UUID
+import java.util.concurrent.{ConcurrentHashMap, ConcurrentLinkedQueue, TimeUnit}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicInteger}
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.{mutable, Map}
-import scala.collection.mutable.ListBuffer
+import scala.jdk.CollectionConverters.{ConcurrentMapHasAsScala, MapHasAsJava}
 import scala.ref.WeakReference
 import scala.util.Try
 
@@ -49,7 +52,6 @@ case object RollbackStore extends RocksDBOpType("rollback_store")
 case object CloseStore extends RocksDBOpType("close_store")
 case object ReportStoreMetrics extends RocksDBOpType("report_store_metrics")
 case object StoreTaskCompletionListener extends RocksDBOpType("store_task_completion_listener")
-case object StoreMaintenance extends RocksDBOpType("store_maintenance")
 
 /**
  * Class representing a RocksDB instance that checkpoints version of data to DFS.
@@ -70,21 +72,25 @@ class RocksDB(
     localRootDir: File = Utils.createTempDir(),
     hadoopConf: Configuration = new Configuration,
     loggingId: String = "",
-    useColumnFamilies: Boolean = false) extends Logging {
+    useColumnFamilies: Boolean = false,
+    enableStateStoreCheckpointIds: Boolean = false) extends Logging {
+
+  import RocksDB._
 
   case class RocksDBSnapshot(
       checkpointDir: File,
       version: Long,
       numKeys: Long,
-      capturedFileMappings: RocksDBFileMappings) {
+      columnFamilyMapping: Map[String, Short],
+      maxColumnFamilyId: Short,
+      dfsFileSuffix: String,
+      fileMapping: Map[String, RocksDBSnapshotFile]) {
     def close(): Unit = {
       silentDeleteRecursively(checkpointDir, s"Free up local checkpoint of snapshot $version")
     }
   }
 
-  @volatile private var latestSnapshot: Option[RocksDBSnapshot] = None
   @volatile private var lastSnapshotVersion = 0L
-  private val oldSnapshots = new ListBuffer[RocksDBSnapshot]
 
   RocksDBLoader.loadLibrary()
 
@@ -111,49 +117,64 @@ class RocksDB(
     tableFormatConfig.setPinL0FilterAndIndexBlocksInCache(true)
   }
 
-  private[state] val columnFamilyOptions = new ColumnFamilyOptions()
+  private[state] val rocksDbOptions = new Options() // options to open the RocksDB
+
+  rocksDbOptions.setCreateIfMissing(true)
 
   // Set RocksDB options around MemTable memory usage. By default, we let RocksDB
   // use its internal default values for these settings.
   if (conf.writeBufferSizeMB > 0L) {
-    columnFamilyOptions.setWriteBufferSize(conf.writeBufferSizeMB * 1024 * 1024)
+    rocksDbOptions.setWriteBufferSize(conf.writeBufferSizeMB * 1024 * 1024)
   }
 
   if (conf.maxWriteBufferNumber > 0L) {
-    columnFamilyOptions.setMaxWriteBufferNumber(conf.maxWriteBufferNumber)
+    rocksDbOptions.setMaxWriteBufferNumber(conf.maxWriteBufferNumber)
   }
 
-  columnFamilyOptions.setCompressionType(getCompressionType(conf.compression))
-  columnFamilyOptions.setMergeOperator(new StringAppendOperator())
-
-  private val dbOptions =
-    new Options(new DBOptions(), columnFamilyOptions) // options to open the RocksDB
+  rocksDbOptions.setCompressionType(getCompressionType(conf.compression))
 
-  dbOptions.setCreateIfMissing(true)
-  dbOptions.setTableFormatConfig(tableFormatConfig)
-  dbOptions.setMaxOpenFiles(conf.maxOpenFiles)
-  dbOptions.setAllowFAllocate(conf.allowFAllocate)
-  dbOptions.setMergeOperator(new StringAppendOperator())
+  rocksDbOptions.setTableFormatConfig(tableFormatConfig)
+  rocksDbOptions.setMaxOpenFiles(conf.maxOpenFiles)
+  rocksDbOptions.setAllowFAllocate(conf.allowFAllocate)
+  rocksDbOptions.setAvoidFlushDuringShutdown(true)
+  rocksDbOptions.setMergeOperator(new StringAppendOperator())
 
   if (conf.boundedMemoryUsage) {
-    dbOptions.setWriteBufferManager(writeBufferManager)
+    rocksDbOptions.setWriteBufferManager(writeBufferManager)
   }
 
   private val dbLogger = createLogger() // for forwarding RocksDB native logs to log4j
-  dbOptions.setStatistics(new Statistics())
-  private val nativeStats = dbOptions.statistics()
+  rocksDbOptions.setStatistics(new Statistics())
+  private val nativeStats = rocksDbOptions.statistics()
 
   private val workingDir = createTempDir("workingDir")
   private val fileManager = new RocksDBFileManager(dfsRootDir, createTempDir("fileManager"),
     hadoopConf, conf.compressionCodec, loggingId = loggingId)
   private val byteArrayPair = new ByteArrayPair()
   private val commitLatencyMs = new mutable.HashMap[String, Long]()
+
   private val acquireLock = new Object
 
   @volatile private var db: NativeRocksDB = _
   @volatile private var changelogWriter: Option[StateStoreChangelogWriter] = None
   private val enableChangelogCheckpointing: Boolean = conf.enableChangelogCheckpointing
   @volatile private var loadedVersion = -1L   // -1 = nothing valid is loaded
+
+  // variables to manage checkpoint ID. Once a checkpointing finishes, it needs to return
+  // `lastCommittedStateStoreCkptId` as the committed checkpointID, as well as
+  // `lastCommitBasedStateStoreCkptId` as the checkpontID of the previous version that is based on.
+  // `loadedStateStoreCkptId` is the checkpointID for the current live DB. After the batch finishes
+  // and checkpoint finishes, it will turn into `lastCommitBasedStateStoreCkptId`.
+  // `sessionStateStoreCkptId` store an ID to be used for future checkpoints. It will be used as
+  // `lastCommittedStateStoreCkptId` after the checkpoint is committed. It will be reused until
+  // we have to use a new one. We have to update `sessionStateStoreCkptId` if we reload a previous
+  // batch version, as we would have to use a new checkpointID for re-committing a version.
+  // The reusing is to help debugging but is not required for the algorithm to work.
+  private var lastCommitBasedStateStoreCkptId: Option[String] = None
+  private var lastCommittedStateStoreCkptId: Option[String] = None
+  private var loadedStateStoreCkptId: Option[String] = None
+  private var sessionStateStoreCkptId: Option[String] = None
+
   @volatile private var numKeysOnLoadedVersion = 0L
   @volatile private var numKeysOnWritingVersion = 0L
   @volatile private var fileManagerMetrics = RocksDBFileManagerMetrics.EMPTY_METRICS
@@ -166,41 +187,135 @@ class RocksDB(
   @GuardedBy("acquireLock")
   @volatile private var acquiredThreadInfo: AcquiredThreadInfo = _
 
+  // This is accessed and updated only between load and commit
+  // which means it is implicitly guarded by acquireLock
+  @GuardedBy("acquireLock")
+  private val colFamilyNameToIdMap = new ConcurrentHashMap[String, Short]()
+
+  @GuardedBy("acquireLock")
+  private val maxColumnFamilyId: AtomicInteger = new AtomicInteger(-1)
+
+  @GuardedBy("acquireLock")
+  private val shouldForceSnapshot: AtomicBoolean = new AtomicBoolean(false)
+
+  /**
+   * Check whether the column family name is for internal column families.
+   *
+   * @param cfName - column family name
+   * @return - true if the column family is for internal use, false otherwise
+   */
+  private def checkInternalColumnFamilies(cfName: String): Boolean = cfName.charAt(0) == '_'
+
+  // Methods to fetch column family mapping for this State Store version
+  def getColumnFamilyMapping: Map[String, Short] = {
+    colFamilyNameToIdMap.asScala
+  }
+
+  def getColumnFamilyId(cfName: String): Short = {
+    colFamilyNameToIdMap.get(cfName)
+  }
+
+  /**
+   * Create RocksDB column family, if not created already
+   */
+  def createColFamilyIfAbsent(colFamilyName: String): Short = {
+    if (!checkColFamilyExists(colFamilyName)) {
+      val newColumnFamilyId = maxColumnFamilyId.incrementAndGet().toShort
+      colFamilyNameToIdMap.putIfAbsent(colFamilyName, newColumnFamilyId)
+      shouldForceSnapshot.set(true)
+      newColumnFamilyId
+    } else {
+      colFamilyNameToIdMap.get(colFamilyName)
+    }
+  }
+
+  /**
+   * Remove RocksDB column family, if exists
+   * @return columnFamilyId if it exists, else None
+   */
+  def removeColFamilyIfExists(colFamilyName: String): Option[Short] = {
+    if (checkColFamilyExists(colFamilyName)) {
+      shouldForceSnapshot.set(true)
+      Some(colFamilyNameToIdMap.remove(colFamilyName))
+    } else {
+      None
+    }
+  }
+
+  /**
+   * Function to check if the column family exists in the state store instance.
+   *
+   * @param colFamilyName - name of the column family
+   * @return - true if the column family exists, false otherwise
+   */
+  def checkColFamilyExists(colFamilyName: String): Boolean = {
+    colFamilyNameToIdMap.containsKey(colFamilyName)
+  }
+
+  // This method sets the internal column family metadata to
+  // the default values it should be set to on load
+  private def setInitialCFInfo(): Unit = {
+    colFamilyNameToIdMap.clear()
+    shouldForceSnapshot.set(false)
+    maxColumnFamilyId.set(0)
+  }
+
+  def getColFamilyCount(isInternal: Boolean): Long = {
+    if (isInternal) {
+      colFamilyNameToIdMap.asScala.keys.toSeq.count(checkInternalColumnFamilies)
+    } else {
+      colFamilyNameToIdMap.asScala.keys.toSeq.count(!checkInternalColumnFamilies(_))
+    }
+  }
+
+  // Mapping of local SST files to DFS files for file reuse.
+  // This mapping should only be updated using the Task thread - at version load and commit time.
+  // If same mapping instance is updated from different threads,
+  // it will result in undefined behavior (and most likely incorrect mapping state).
+  @GuardedBy("acquireLock")
+  private val rocksDBFileMapping: RocksDBFileMapping = new RocksDBFileMapping()
+
+  // We send snapshots that needs to be uploaded by the maintenance thread to this queue
+  private val snapshotsToUploadQueue = new ConcurrentLinkedQueue[RocksDBSnapshot]()
+
   /**
    * Load the given version of data in a native RocksDB instance.
    * Note that this will copy all the necessary file from DFS to local disk as needed,
    * and possibly restart the native RocksDB instance.
    */
-  def load(version: Long, readOnly: Boolean = false): RocksDB = {
+  def load(
+      version: Long,
+      stateStoreCkptId: Option[String] = None,
+      readOnly: Boolean = false): RocksDB = {
     assert(version >= 0)
     acquire(LoadStore)
     recordedMetrics = None
     logInfo(log"Loading ${MDC(LogKeys.VERSION_NUM, version)}")
     try {
-      if (loadedVersion != version) {
-        closeDB()
-        // deep copy is needed to avoid race condition
-        // between maintenance and task threads
-        fileManager.copyFileMapping()
+      if (loadedVersion != version ||
+        (enableStateStoreCheckpointIds && stateStoreCkptId.isDefined &&
+        (loadedStateStoreCkptId.isEmpty || stateStoreCkptId.get != loadedStateStoreCkptId.get))) {
+        closeDB(ignoreException = false)
         val latestSnapshotVersion = fileManager.getLatestSnapshotVersion(version)
-        val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion, workingDir)
+        val metadata = fileManager.loadCheckpointFromDfs(latestSnapshotVersion,
+          workingDir, rocksDBFileMapping)
         loadedVersion = latestSnapshotVersion
 
-        // reset last snapshot version
-        if (lastSnapshotVersion > latestSnapshotVersion) {
-          // discard any newer snapshots
-          synchronized {
-            if (latestSnapshot.isDefined) {
-              oldSnapshots += latestSnapshot.get
-              latestSnapshot = None
-            }
-          }
-        }
-
         // reset the last snapshot version to the latest available snapshot version
         lastSnapshotVersion = latestSnapshotVersion
-        openDB()
 
+        // Initialize maxVersion upon successful load from DFS
+        fileManager.setMaxSeenVersion(version)
+
+        setInitialCFInfo()
+        metadata.columnFamilyMapping.foreach { mapping =>
+          colFamilyNameToIdMap.putAll(mapping.asJava)
+        }
+
+        metadata.maxColumnFamilyId.foreach { maxId =>
+          maxColumnFamilyId.set(maxId)
+        }
+        openDB()
         numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
             // we don't track the total number of rows - discard the number being track
             -1L
@@ -217,6 +332,12 @@ class RocksDB(
         numKeysOnLoadedVersion = numKeysOnWritingVersion
         fileManagerMetrics = fileManager.latestLoadCheckpointMetrics
       }
+      if (enableStateStoreCheckpointIds) {
+        lastCommitBasedStateStoreCkptId = None
+        loadedStateStoreCkptId = stateStoreCkptId
+        sessionStateStoreCkptId = Some(java.util.UUID.randomUUID.toString)
+      }
+      lastCommittedStateStoreCkptId = None
       if (conf.resetStatsOnLoad) {
         nativeStats.reset
       }
@@ -224,6 +345,10 @@ class RocksDB(
     } catch {
       case t: Throwable =>
         loadedVersion = -1  // invalidate loaded data
+        lastCommitBasedStateStoreCkptId = None
+        lastCommittedStateStoreCkptId = None
+        loadedStateStoreCkptId = None
+        sessionStateStoreCkptId = None
         throw t
     }
     if (enableChangelogCheckpointing && !readOnly) {
@@ -276,15 +401,10 @@ class RocksDB(
    */
   private def replayFromCheckpoint(snapshotVersion: Long, endVersion: Long): Any = {
     closeDB()
-    val metadata = fileManager.loadCheckpointFromDfs(snapshotVersion, workingDir)
+    val metadata = fileManager.loadCheckpointFromDfs(snapshotVersion,
+      workingDir, rocksDBFileMapping)
     loadedVersion = snapshotVersion
-
-    // reset last snapshot version
-    if (lastSnapshotVersion > snapshotVersion) {
-      // discard any newer snapshots
-      lastSnapshotVersion = 0L
-      latestSnapshot = None
-    }
+    lastSnapshotVersion = snapshotVersion
     openDB()
 
     numKeysOnWritingVersion = if (!conf.trackTotalNumberOfRows) {
@@ -312,10 +432,12 @@ class RocksDB(
    * Replay change log from the loaded version to the target version.
    */
   private def replayChangelog(endVersion: Long): Unit = {
+    logInfo(log"Replaying changelog from version " +
+      log"${MDC(LogKeys.LOADED_VERSION, loadedVersion)} -> " +
+      log"${MDC(LogKeys.END_VERSION, endVersion)}")
     for (v <- loadedVersion + 1 to endVersion) {
-      logInfo(log"replaying changelog from version " +
-        log"${MDC(LogKeys.LOADED_VERSION, loadedVersion)} -> " +
-        log"${MDC(LogKeys.END_VERSION, endVersion)}")
+      logInfo(log"Replaying changelog on version " +
+        log"${MDC(LogKeys.VERSION_NUM, v)}")
       var changelogReader: StateStoreChangelogReader = null
       try {
         changelogReader = fileManager.getChangelogReader(v, useColumnFamilies)
@@ -488,13 +610,14 @@ class RocksDB(
   def commit(): Long = {
     val newVersion = loadedVersion + 1
     try {
-
       logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
 
       var compactTimeMs = 0L
       var flushTimeMs = 0L
       var checkpointTimeMs = 0L
-      if (shouldCreateSnapshot()) {
+      var snapshot: Option[RocksDBSnapshot] = None
+
+      if (shouldCreateSnapshot() || shouldForceSnapshot.get()) {
         // Need to flush the change to disk before creating a checkpoint
         // because rocksdb wal is disabled.
         logInfo(log"Flushing updates for ${MDC(LogKeys.VERSION_NUM, newVersion)}")
@@ -525,37 +648,65 @@ class RocksDB(
           // inside the uploadSnapshot() called below.
           // If changelog checkpointing is enabled, snapshot will be uploaded asynchronously
           // during state store maintenance.
-          synchronized {
-            if (latestSnapshot.isDefined) {
-              oldSnapshots += latestSnapshot.get
-            }
-            latestSnapshot = Some(
-              RocksDBSnapshot(checkpointDir,
-                newVersion,
-                numKeysOnWritingVersion,
-                fileManager.captureFileMapReference()))
-            lastSnapshotVersion = newVersion
-          }
+          snapshot = Some(createSnapshot(checkpointDir, newVersion,
+            colFamilyNameToIdMap.asScala.toMap, maxColumnFamilyId.get().toShort))
+          lastSnapshotVersion = newVersion
         }
       }
 
       logInfo(log"Syncing checkpoint for ${MDC(LogKeys.VERSION_NUM, newVersion)} to DFS")
       val fileSyncTimeMs = timeTakenMs {
         if (enableChangelogCheckpointing) {
+          // If we have changed the columnFamilyId mapping, we have set a new
+          // snapshot and need to upload this to the DFS even if changelog checkpointing
+          // is enabled.
+          var isUploaded = false
+          if (shouldForceSnapshot.get()) {
+            assert(snapshot.isDefined)
+            fileManagerMetrics = uploadSnapshot(
+              snapshot.get,
+              fileManager,
+              rocksDBFileMapping.snapshotsPendingUpload,
+              loggingId
+            )
+            isUploaded = true
+            shouldForceSnapshot.set(false)
+          }
+
+          // ensure that changelog files are always written
           try {
             assert(changelogWriter.isDefined)
             changelogWriter.foreach(_.commit())
+            if (!isUploaded) {
+              snapshot.foreach(snapshotsToUploadQueue.offer)
+            }
           } finally {
             changelogWriter = None
           }
         } else {
           assert(changelogWriter.isEmpty)
-          uploadSnapshot()
+          assert(snapshot.isDefined)
+          fileManagerMetrics = uploadSnapshot(
+            snapshot.get,
+            fileManager,
+            rocksDBFileMapping.snapshotsPendingUpload,
+            loggingId
+          )
         }
       }
 
+      // Set maxVersion when checkpoint files are synced to DFS successfully
+      // We need to handle this explicitly in RocksDB as we could use different
+      // changeLogWriter instances in fileManager instance when committing
+      fileManager.setMaxSeenVersion(newVersion)
+
       numKeysOnLoadedVersion = numKeysOnWritingVersion
       loadedVersion = newVersion
+      if (enableStateStoreCheckpointIds) {
+        lastCommitBasedStateStoreCkptId = loadedStateStoreCkptId
+        lastCommittedStateStoreCkptId = sessionStateStoreCkptId
+        loadedStateStoreCkptId = sessionStateStoreCkptId
+      }
       commitLatencyMs ++= Map(
         "flush" -> flushTimeMs,
         "compact" -> compactTimeMs,
@@ -585,81 +736,75 @@ class RocksDB(
     } else true
   }
 
-  private def uploadSnapshot(): Unit = {
-    var oldSnapshotsImmutable: List[RocksDBSnapshot] = Nil
-    val localCheckpoint = synchronized {
-      val checkpoint = latestSnapshot
-      latestSnapshot = None
-
-      // Convert mutable list buffer to immutable to prevent
-      // race condition with commit where old snapshot is added
-      oldSnapshotsImmutable = oldSnapshots.toList
-      oldSnapshots.clear()
-
-      checkpoint
-    }
-    localCheckpoint match {
-      case Some(RocksDBSnapshot(localDir, version, numKeys, capturedFileMappings)) =>
-        try {
-          val uploadTime = timeTakenMs {
-            fileManager.saveCheckpointToDfs(localDir, version, numKeys, capturedFileMappings)
-            fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
-          }
-          logInfo(log"${MDC(LogKeys.LOG_ID, loggingId)}: Upload snapshot of version " +
-            log"${MDC(LogKeys.VERSION_NUM, version)}," +
-            log" time taken: ${MDC(LogKeys.TIME_UNITS, uploadTime)} ms")
-        } finally {
-          localCheckpoint.foreach(_.close())
-
-          // Clean up old latestSnapshots
-          for (snapshot <- oldSnapshotsImmutable) {
-            snapshot.close()
-          }
-
-        }
-      case _ =>
-    }
-  }
-
   /**
    * Drop uncommitted changes, and roll back to previous version.
    */
   def rollback(): Unit = {
     acquire(RollbackStore)
-    numKeysOnWritingVersion = numKeysOnLoadedVersion
-    loadedVersion = -1L
-    changelogWriter.foreach(_.abort())
-    // Make sure changelogWriter gets recreated next time.
-    changelogWriter = None
-    release(RollbackStore)
-    logInfo(log"Rolled back to ${MDC(LogKeys.VERSION_NUM, loadedVersion)}")
+    try {
+      numKeysOnWritingVersion = numKeysOnLoadedVersion
+      loadedVersion = -1L
+      lastCommitBasedStateStoreCkptId = None
+      lastCommittedStateStoreCkptId = None
+      loadedStateStoreCkptId = None
+      sessionStateStoreCkptId = None
+      changelogWriter.foreach(_.abort())
+      // Make sure changelogWriter gets recreated next time.
+      changelogWriter = None
+      logInfo(log"Rolled back to ${MDC(LogKeys.VERSION_NUM, loadedVersion)}")
+    } finally {
+      release(RollbackStore)
+    }
   }
 
   def doMaintenance(): Unit = {
     if (enableChangelogCheckpointing) {
-      uploadSnapshot()
+
+      var mostRecentSnapshot: Option[RocksDBSnapshot] = None
+      var snapshot = snapshotsToUploadQueue.poll()
+
+      // We only want to upload the most recent snapshot and skip the previous ones.
+      while (snapshot != null) {
+        logDebug(s"RocksDB Maintenance - polled snapshot ${snapshot.version}")
+        mostRecentSnapshot.foreach(_.close())
+        mostRecentSnapshot = Some(snapshot)
+        snapshot = snapshotsToUploadQueue.poll()
+      }
+
+      if (mostRecentSnapshot.isDefined) {
+        fileManagerMetrics = uploadSnapshot(
+          mostRecentSnapshot.get,
+          fileManager,
+          rocksDBFileMapping.snapshotsPendingUpload,
+          loggingId
+        )
+      }
     }
     val cleanupTime = timeTakenMs {
-      fileManager.deleteOldVersions(conf.minVersionsToRetain)
+      fileManager.deleteOldVersions(conf.minVersionsToRetain, conf.minVersionsToDelete)
     }
     logInfo(log"Cleaned old data, time taken: ${MDC(LogKeys.TIME_UNITS, cleanupTime)} ms")
   }
 
   /** Release all resources */
   def close(): Unit = {
+    // Acquire DB instance lock and release at the end to allow for synchronized access
+    acquire(CloseStore)
     try {
-      // Acquire DB instance lock and release at the end to allow for synchronized access
-      acquire(CloseStore)
       closeDB()
 
       readOptions.close()
       writeOptions.close()
       flushOptions.close()
-      dbOptions.close()
+      rocksDbOptions.close()
       dbLogger.close()
-      synchronized {
-        latestSnapshot.foreach(_.close())
+
+      var snapshot = snapshotsToUploadQueue.poll()
+      while (snapshot != null) {
+        snapshot.close()
+        snapshot = snapshotsToUploadQueue.poll()
       }
+
       silentDeleteRecursively(localRootDir, "closing RocksDB")
     } catch {
       case e: Exception =>
@@ -675,6 +820,19 @@ class RocksDB(
   /** Get the write buffer manager and cache */
   def getWriteBufferManagerAndCache(): (WriteBufferManager, Cache) = (writeBufferManager, lruCache)
 
+  /**
+   * Called by RocksDBStateStoreProvider to retrieve the checkpoint information to be
+   * passed back to the stateful operator. It will return the information for the latest
+   * state store checkpointing.
+   */
+  def getLatestCheckpointInfo(partitionId: Int): StateStoreCheckpointInfo = {
+    StateStoreCheckpointInfo(
+      partitionId,
+      loadedVersion,
+      lastCommittedStateStoreCkptId,
+      lastCommitBasedStateStoreCkptId)
+  }
+
   /** Get current instantaneous statistics */
   private def metrics: RocksDBMetrics = {
     import HistogramType._
@@ -746,8 +904,8 @@ class RocksDB(
    */
   def metricsOpt: Option[RocksDBMetrics] = {
     var rocksDBMetricsOpt: Option[RocksDBMetrics] = None
+    acquire(ReportStoreMetrics)
     try {
-      acquire(ReportStoreMetrics)
       rocksDBMetricsOpt = recordedMetrics
     } catch {
       case ex: Exception =>
@@ -758,6 +916,18 @@ class RocksDB(
     rocksDBMetricsOpt
   }
 
+  private def createSnapshot(
+      checkpointDir: File,
+      version: Long,
+      columnFamilyMapping: Map[String, Short],
+      maxColumnFamilyId: Short): RocksDBSnapshot = {
+    val (dfsFileSuffix, immutableFileMapping) = rocksDBFileMapping.createSnapshotFileMapping(
+      fileManager, checkpointDir, version)
+
+    RocksDBSnapshot(checkpointDir, version, numKeysOnWritingVersion,
+      columnFamilyMapping, maxColumnFamilyId, dfsFileSuffix, immutableFileMapping)
+  }
+
   /**
    * Function to acquire RocksDB instance lock that allows for synchronized access to the state
    * store instance
@@ -787,48 +957,88 @@ class RocksDB(
       acquiredThreadInfo = newAcquiredThreadInfo
       // Add a listener to always release the lock when the task (if active) completes
       Option(TaskContext.get()).foreach(_.addTaskCompletionListener[Unit] {
-        _ => this.release(StoreTaskCompletionListener)
+        _ => this.release(StoreTaskCompletionListener, Some(newAcquiredThreadInfo))
       })
-      logInfo(log"RocksDB instance was acquired by ${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
+      logInfo(log"RocksDB instance was acquired by " +
+        log"ownerThread=${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
         log"for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}")
     }
   }
 
   /**
    * Function to release RocksDB instance lock that allows for synchronized access to the state
-   * store instance
+   * store instance. Optionally provide a thread to check against, and release only if provided
+   * thread is the one that acquired the lock.
    *
    * @param opType - operation type releasing the lock
+   * @param releaseForThreadOpt - optional thread to check against acquired thread
    */
-  private def release(opType: RocksDBOpType): Unit = acquireLock.synchronized {
-    logInfo(log"RocksDB instance was released by ${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
-      log"for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}")
-    acquiredThreadInfo = null
-    acquireLock.notifyAll()
+  private def release(
+      opType: RocksDBOpType,
+      releaseForThreadOpt: Option[AcquiredThreadInfo] = None): Unit = acquireLock.synchronized {
+    if (acquiredThreadInfo != null) {
+      val release = releaseForThreadOpt match {
+        case Some(releaseForThread) if releaseForThread.threadRef.get.isEmpty =>
+          logInfo(log"Thread reference is empty when attempting to release for " +
+            log"opType=${MDC(LogKeys.OP_TYPE, opType.toString)}, ignoring release. " +
+            log"Lock is held by ownerThread=${MDC(LogKeys.THREAD, acquiredThreadInfo)}")
+          false
+        // NOTE: we compare the entire acquiredThreadInfo object to ensure that we are
+        // releasing not only for the right thread but the right task as well. This is
+        // inconsistent with the logic for acquire which uses only the thread ID, consider
+        // updating this in future.
+        case Some(releaseForThread) if acquiredThreadInfo != releaseForThread =>
+          logInfo(log"Thread info for " +
+            log"releaseThread=${MDC(LogKeys.THREAD, releaseForThreadOpt.get)} " +
+            log"does not match the acquired thread when attempting to " +
+            log"release for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}, ignoring release. " +
+            log"Lock is held by ownerThread=${MDC(LogKeys.THREAD, acquiredThreadInfo)}")
+          false
+        case _ => true
+      }
+
+      if (release) {
+        logInfo(log"RocksDB instance was released by " +
+          log"releaseThread=${MDC(LogKeys.THREAD, AcquiredThreadInfo())} " +
+          log"with ownerThread=${MDC(LogKeys.THREAD, acquiredThreadInfo)} " +
+          log"for opType=${MDC(LogKeys.OP_TYPE, opType.toString)}")
+        acquiredThreadInfo = null
+        acquireLock.notifyAll()
+      }
+    }
   }
 
   private def getDBProperty(property: String): Long = db.getProperty(property).toLong
 
   private def openDB(): Unit = {
     assert(db == null)
-    db = NativeRocksDB.open(dbOptions, workingDir.toString)
+    db = NativeRocksDB.open(rocksDbOptions, workingDir.toString)
     logInfo(log"Opened DB with conf ${MDC(LogKeys.CONFIG, conf)}")
   }
 
-  private def closeDB(): Unit = {
+  private def closeDB(ignoreException: Boolean = true): Unit = {
     if (db != null) {
-
       // Cancel and wait until all background work finishes
       db.cancelAllBackgroundWork(true)
-      // Close the DB instance
-      db.close()
+      if (ignoreException) {
+        // Close the DB instance
+        db.close()
+      } else {
+        // Close the DB instance and throw the exception if any
+        db.closeE()
+      }
       db = null
     }
   }
 
+  private[state] def getAcquiredThreadInfo(): Option[AcquiredThreadInfo] =
+      acquireLock.synchronized {
+    Option(acquiredThreadInfo).map(_.copy())
+  }
+
   /** Create a native RocksDB logger that forwards native logs to log4j with correct log levels. */
   private def createLogger(): Logger = {
-    val dbLogger = new Logger(dbOptions.infoLogLevel()) {
+    val dbLogger = new Logger(rocksDbOptions.infoLogLevel()) {
       override def log(infoLogLevel: InfoLogLevel, logMsg: String) = {
         // Map DB log level to log4j levels
         // Warn is mapped to info because RocksDB warn is too verbose
@@ -851,8 +1061,8 @@ class RocksDB(
     dbLogger.setInfoLogLevel(dbLogLevel)
     // The log level set in dbLogger is effective and the one to dbOptions isn't applied to
     // customized logger. We still set it as it might show up in RocksDB config file or logging.
-    dbOptions.setInfoLogLevel(dbLogLevel)
-    dbOptions.setLogger(dbLogger)
+    rocksDbOptions.setInfoLogLevel(dbLogLevel)
+    rocksDbOptions.setLogger(dbLogger)
     logInfo(log"Set RocksDB native logging level to ${MDC(LogKeys.ROCKS_DB_LOG_LEVEL, dbLogLevel)}")
     dbLogger
   }
@@ -873,10 +1083,174 @@ class RocksDB(
     }
   }
 
+  override protected def logName: String = s"${super.logName} $loggingId"
+}
+
+object RocksDB extends Logging {
+
+  /** Upload the snapshot to DFS and remove it from snapshots pending */
+  private def uploadSnapshot(
+      snapshot: RocksDB#RocksDBSnapshot,
+      fileManager: RocksDBFileManager,
+      snapshotsPendingUpload: Set[RocksDBVersionSnapshotInfo],
+      loggingId: String): RocksDBFileManagerMetrics = {
+    var fileManagerMetrics: RocksDBFileManagerMetrics = null
+    try {
+      val uploadTime = timeTakenMs {
+        fileManager.saveCheckpointToDfs(snapshot.checkpointDir,
+          snapshot.version, snapshot.numKeys, snapshot.fileMapping,
+          Some(snapshot.columnFamilyMapping), Some(snapshot.maxColumnFamilyId))
+        fileManagerMetrics = fileManager.latestSaveCheckpointMetrics
+
+        val snapshotInfo = RocksDBVersionSnapshotInfo(snapshot.version, snapshot.dfsFileSuffix)
+        // We are only removing the uploaded snapshot info from the pending set,
+        // to let the file mapping (i.e. query threads) know that the snapshot (i.e. and its files)
+        // have been uploaded to DFS. We don't touch the file mapping here to avoid corrupting it.
+        snapshotsPendingUpload.remove(snapshotInfo)
+      }
+      logInfo(log"${MDC(LogKeys.LOG_ID, loggingId)}: Upload snapshot of version " +
+        log"${MDC(LogKeys.VERSION_NUM, snapshot.version)}," +
+        log" time taken: ${MDC(LogKeys.TIME_UNITS, uploadTime)} ms")
+    } finally {
+      snapshot.close()
+    }
+
+    fileManagerMetrics
+  }
+
   /** Records the duration of running `body` for the next query progress update. */
-  protected def timeTakenMs(body: => Unit): Long = Utils.timeTakenMs(body)._2
+  private def timeTakenMs(body: => Unit): Long = Utils.timeTakenMs(body)._2
+}
 
-  override protected def logName: String = s"${super.logName} $loggingId"
+// uniquely identifies a Snapshot. Multiple snapshots created for same version will
+// use a different dfsFilesUUID, and hence will have different RocksDBVersionSnapshotInfo
+case class RocksDBVersionSnapshotInfo(version: Long, dfsFilesUUID: String)
+
+// Encapsulates a RocksDB immutable file, and the information whether it has been previously
+// uploaded to DFS. Already uploaded files can be skipped during SST file upload.
+case class RocksDBSnapshotFile(immutableFile: RocksDBImmutableFile, isUploaded: Boolean)
+
+// Encapsulates the mapping of local SST files to DFS files. This mapping prevents
+// re-uploading the same SST file multiple times to DFS, saving I/O and reducing snapshot
+// upload time. During version load, if a DFS file is already present on local file system,
+// it will be reused.
+// This mapping should only be updated using the Task thread - at version load and commit time.
+// If same mapping instance is updated from different threads, it will result in undefined behavior
+// (and most likely incorrect mapping state).
+class RocksDBFileMapping {
+
+  // Maps a local SST file to the DFS version and DFS file.
+  private val localFileMappings: mutable.Map[String, (Long, RocksDBImmutableFile)] =
+    mutable.HashMap[String, (Long, RocksDBImmutableFile)]()
+
+  // Keeps track of all snapshots which have not been uploaded yet. This prevents Spark
+  // from reusing SST files which have not been yet persisted to DFS,
+  val snapshotsPendingUpload: Set[RocksDBVersionSnapshotInfo] = ConcurrentHashMap.newKeySet()
+
+  /**
+   * Get the mapped DFS file for the given local file for a DFS load operation.
+   * If the currently mapped DFS file was mapped in the same or newer version as the version we
+   * want to load (or was generated in a version which has not been uploaded to DFS yet),
+   * the mapped DFS file is ignored. In this scenario, the local mapping to this DFS file
+   * will be cleared, and function will return None.
+   *
+   * @note For same version number, this is because we want to make sure we are using
+   *       the latest files in DFS, in case the previous zip file has been overwritten in DFS.
+   *
+   * @return - Option with the DFS file or None
+   */
+  def getDfsFileForLoad(
+      fileManager: RocksDBFileManager,
+      localFileName: String,
+      versionToLoad: Long): Option[RocksDBImmutableFile] = {
+    getDfsFileWithVersionCheck(fileManager, localFileName, _ >= versionToLoad)
+  }
+
+  /**
+   * Get the mapped DFS file for the given local file for a DFS save (i.e. checkpoint) operation.
+   * If the currently mapped DFS file was mapped in the same or newer version as the version we
+   * want to save (or was generated in a version which has not been uploaded to DFS yet),
+   * the mapped DFS file is ignored. In this scenario, the local mapping to this DFS file
+   * will be cleared, and function will return None.
+   *
+   * @note If the file was added in current version (i.e. versionToSave - 1), we can reuse it.
+   *       e.g. we load(v1) -> save(v2), the loaded SST files from version 1 can be reused
+   *       in version 2 upload.
+   *
+   * @return - Option with the DFS file or None
+   */
+  private def getDfsFileForSave(
+      fileManager: RocksDBFileManager,
+      localFileName: String,
+      versionToSave: Long): Option[RocksDBImmutableFile] = {
+    getDfsFileWithVersionCheck(fileManager, localFileName, _ >= versionToSave)
+  }
+
+  private def getDfsFileWithVersionCheck(
+      fileManager: RocksDBFileManager,
+      localFileName: String,
+      isIncompatibleVersion: Long => Boolean): Option[RocksDBImmutableFile] = {
+    localFileMappings.get(localFileName).map { case (dfsFileMappedVersion, dfsFile) =>
+      val dfsFileSuffix = fileManager.dfsFileSuffix(dfsFile)
+      val versionSnapshotInfo = RocksDBVersionSnapshotInfo(dfsFileMappedVersion, dfsFileSuffix)
+      if (isIncompatibleVersion(dfsFileMappedVersion) ||
+        snapshotsPendingUpload.contains(versionSnapshotInfo)) {
+        // the mapped dfs file cannot be used, delete from mapping
+        remove(localFileName)
+        None
+      } else {
+        Some(dfsFile)
+      }
+    }.getOrElse(None)
+  }
+
+  def mapToDfsFile(
+      localFileName: String,
+      dfsFile: RocksDBImmutableFile,
+      version: Long): Unit = {
+    localFileMappings.put(localFileName, (version, dfsFile))
+  }
+
+  def remove(localFileName: String): Unit = {
+    localFileMappings.remove(localFileName)
+  }
+
+  private def syncWithLocalState(localFiles: Seq[File]): Unit = {
+    val localFileNames = localFiles.map(_.getName).toSet
+    val deletedFiles = localFileMappings.keys.filterNot(localFileNames.contains)
+
+    deletedFiles.foreach(localFileMappings.remove)
+  }
+
+  // Generates the DFS file names for local Immutable files in checkpoint directory, and
+  // returns the mapping from local fileName in checkpoint directory to generated DFS file.
+  // If the DFS file has been previously uploaded - the snapshot file isUploaded flag is set
+  // to true.
+  def createSnapshotFileMapping(
+      fileManager: RocksDBFileManager,
+      checkpointDir: File,
+      version: Long): (String, Map[String, RocksDBSnapshotFile]) = {
+    val (localImmutableFiles, _) = fileManager.listRocksDBFiles(checkpointDir)
+    // UUID used to prefix files uploaded to DFS as part of commit
+    val dfsFilesSuffix = UUID.randomUUID().toString
+    val snapshotFileMapping = localImmutableFiles.map { f =>
+      val localFileName = f.getName
+      val existingDfsFile = getDfsFileForSave(fileManager, localFileName, version)
+      val dfsFile = existingDfsFile.getOrElse {
+        val newDfsFileName = fileManager.newDFSFileName(localFileName, dfsFilesSuffix)
+        val newDfsFile = RocksDBImmutableFile(localFileName, newDfsFileName, sizeBytes = f.length())
+        mapToDfsFile(localFileName, newDfsFile, version)
+        newDfsFile
+      }
+      localFileName -> RocksDBSnapshotFile(dfsFile, existingDfsFile.isDefined)
+    }.toMap
+
+    syncWithLocalState(localImmutableFiles)
+
+    val rocksDBSnapshotInfo = RocksDBVersionSnapshotInfo(version, dfsFilesSuffix)
+    snapshotsPendingUpload.add(rocksDBSnapshotInfo)
+    (dfsFilesSuffix, snapshotFileMapping)
+  }
 }
 
 
@@ -896,6 +1270,7 @@ class ByteArrayPair(var key: Array[Byte] = null, var value: Array[Byte] = null)
  */
 case class RocksDBConf(
     minVersionsToRetain: Int,
+    minVersionsToDelete: Long,
     minDeltasForSnapshot: Int,
     compactOnCommit: Boolean,
     enableChangelogCheckpointing: Boolean,
@@ -1078,6 +1453,7 @@ object RocksDBConf {
 
     RocksDBConf(
       storeConf.minVersionsToRetain,
+      storeConf.minVersionsToDelete,
       storeConf.minDeltasForSnapshot,
       getBooleanConf(COMPACT_ON_COMMIT_CONF),
       getBooleanConf(ENABLE_CHANGELOG_CHECKPOINTING_CONF),
@@ -1142,10 +1518,9 @@ object RocksDBNativeHistogram {
   }
 }
 
-case class AcquiredThreadInfo() {
-  val threadRef: WeakReference[Thread] = new WeakReference[Thread](Thread.currentThread())
-  val tc: TaskContext = TaskContext.get()
-
+case class AcquiredThreadInfo(
+    threadRef: WeakReference[Thread] = new WeakReference[Thread](Thread.currentThread()),
+    tc: TaskContext = TaskContext.get()) {
   override def toString(): String = {
     val taskStr = if (tc != null) {
       val taskDetails =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
index 6c8db12635fd1..6b13ff31c9d50 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBFileManager.scala
@@ -24,8 +24,7 @@ import java.util.UUID
 import java.util.concurrent.ConcurrentHashMap
 import java.util.zip.{ZipEntry, ZipOutputStream}
 
-import scala.collection.mutable
-import scala.jdk.CollectionConverters._
+import scala.collection.{mutable, Map}
 
 import com.fasterxml.jackson.annotation.JsonInclude.Include
 import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
@@ -146,30 +145,13 @@ class RocksDBFileManager(
 
   private def codec = CompressionCodec.createCodec(sparkConf, codecName)
 
-  @volatile private var rootDirChecked: Boolean = false
-  @volatile private var fileMappings = RocksDBFileMappings(
-    new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
-    new ConcurrentHashMap[String, RocksDBImmutableFile]
-  )
-
-  /**
-   * Make a deep copy of versionToRocksDBFiles and localFilesToDfsFiles to avoid
-   * current task thread from overwriting the file mapping whenever background maintenance
-   * thread attempts to upload a snapshot
-   */
-  def copyFileMapping() : Unit = {
-    val newVersionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
-    val newLocalFilesToDfsFiles = new ConcurrentHashMap[String, RocksDBImmutableFile]
-
-    newVersionToRocksDBFiles.putAll(fileMappings.versionToRocksDBFiles)
-    newLocalFilesToDfsFiles.putAll(fileMappings.localFilesToDfsFiles)
+  // This is set when a version is loaded/committed. Hence only set by a task thread.
+  private var maxSeenVersion: Option[Long] = None
+  // This is set during deletion of old versions. Hence only set by a maintenance thread.
+  private var minSeenVersion = 1L
 
-    fileMappings = RocksDBFileMappings(newVersionToRocksDBFiles, newLocalFilesToDfsFiles)
-  }
-
-  def captureFileMapReference(): RocksDBFileMappings = {
-    fileMappings
-  }
+  @volatile private var rootDirChecked: Boolean = false
+  private val versionToRocksDBFiles = new ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]]
 
   private def getChangelogVersion(useColumnFamilies: Boolean): Short = {
     val changelogVersion: Short = if (useColumnFamilies) {
@@ -197,8 +179,7 @@ class RocksDBFileManager(
       case 2 =>
         new StateStoreChangelogWriterV2(fm, changelogFile, codec)
       case _ =>
-        throw new IllegalArgumentException(s"Failed to find changelog writer for " +
-          s"version=$changelogVersion")
+        throw QueryExecutionErrors.invalidChangeLogWriterVersion(changelogVersion)
     }
     changelogWriter
   }
@@ -221,8 +202,7 @@ class RocksDBFileManager(
       case 2 =>
         new StateStoreChangelogReaderV2(fm, changelogFile, codec)
       case _ =>
-        throw new IllegalArgumentException(s"Failed to find changelog reader for " +
-          s"version=$changelogVersion")
+        throw QueryExecutionErrors.invalidChangeLogReaderVersion(changelogVersion)
     }
     changelogReader
   }
@@ -248,12 +228,15 @@ class RocksDBFileManager(
       checkpointDir: File,
       version: Long,
       numKeys: Long,
-      capturedFileMappings: RocksDBFileMappings): Unit = {
+      fileMapping: Map[String, RocksDBSnapshotFile],
+      columnFamilyMapping: Option[Map[String, Short]] = None,
+      maxColumnFamilyId: Option[Short] = None): Unit = {
     logFilesInDir(checkpointDir, log"Saving checkpoint files " +
       log"for version ${MDC(LogKeys.VERSION_NUM, version)}")
     val (localImmutableFiles, localOtherFiles) = listRocksDBFiles(checkpointDir)
-    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles, capturedFileMappings)
-    val metadata = RocksDBCheckpointMetadata(rocksDBFiles, numKeys)
+    val rocksDBFiles = saveImmutableFilesToDfs(version, localImmutableFiles, fileMapping)
+    val metadata = RocksDBCheckpointMetadata(
+      rocksDBFiles, numKeys, columnFamilyMapping, maxColumnFamilyId)
     val metadataFile = localMetadataFile(checkpointDir)
     metadata.writeToFile(metadataFile)
     logInfo(log"Written metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
@@ -282,15 +265,17 @@ class RocksDBFileManager(
    * ensures that only the exact files generated during checkpointing will be present in the
    * local directory.
    */
-  def loadCheckpointFromDfs(version: Long, localDir: File): RocksDBCheckpointMetadata = {
+  def loadCheckpointFromDfs(
+      version: Long,
+      localDir: File,
+      rocksDBFileMapping: RocksDBFileMapping): RocksDBCheckpointMetadata = {
     logInfo(log"Loading checkpoint files for version ${MDC(LogKeys.VERSION_NUM, version)}")
     // The unique ids of SST files are checked when opening a rocksdb instance. The SST files
     // in larger versions can't be reused even if they have the same size and name because
     // they belong to another rocksdb instance.
-    fileMappings.versionToRocksDBFiles.keySet().removeIf(_ >= version)
+    versionToRocksDBFiles.keySet().removeIf(_ >= version)
     val metadata = if (version == 0) {
       if (localDir.exists) Utils.deleteRecursively(localDir)
-      fileMappings.localFilesToDfsFiles.clear()
       localDir.mkdirs()
       RocksDBCheckpointMetadata(Seq.empty, 0)
     } else {
@@ -303,8 +288,8 @@ class RocksDBFileManager(
       val metadata = RocksDBCheckpointMetadata.readFromFile(metadataFile)
       logInfo(log"Read metadata for version ${MDC(LogKeys.VERSION_NUM, version)}:\n" +
         log"${MDC(LogKeys.METADATA_JSON, metadata.prettyJson)}")
-      loadImmutableFilesFromDfs(metadata.immutableFiles, localDir)
-      fileMappings.versionToRocksDBFiles.put(version, metadata.immutableFiles)
+      loadImmutableFilesFromDfs(metadata.immutableFiles, localDir, rocksDBFileMapping, version)
+      versionToRocksDBFiles.put(version, metadata.immutableFiles)
       metadataFile.delete()
       metadata
     }
@@ -338,11 +323,11 @@ class RocksDBFileManager(
     if (fm.exists(path)) {
       val files = fm.list(path).map(_.getPath)
       val changelogFileVersions = files
-        .filter(onlyChangelogFiles.accept(_))
+        .filter(onlyChangelogFiles.accept)
         .map(_.getName.stripSuffix(".changelog"))
         .map(_.toLong)
       val snapshotFileVersions = files
-        .filter(onlyZipFiles.accept(_))
+        .filter(onlyZipFiles.accept)
         .map(_.getName.stripSuffix(".zip"))
         .map(_.toLong)
       val versions = changelogFileVersions ++ snapshotFileVersions
@@ -398,15 +383,63 @@ class RocksDBFileManager(
     }
   }
 
+  /**
+   *  Set maxSeenVersion to max of itself and version we are uploading.
+   *  This is to ensure accuracy in the case the query has restarted from a particular version.
+   */
+  def setMaxSeenVersion(version: Long): Unit = {
+    if (maxSeenVersion.isDefined) {
+      maxSeenVersion = Some(Math.max(maxSeenVersion.get, version))
+    } else {
+      maxSeenVersion = Some(version)
+    }
+  }
+
+  /**
+   * Determines whether batch deletion of stale version files should be skipped
+   * based on the following parameters and estimates of maximum and minimum
+   * versions present in the checkpoint directory.
+   *
+   * @param numVersionsToRetain Number of versions to retain for rollbacks.
+   * @param minVersionsToDelete Minimum number of stale versions required to trigger deletion.
+   * @return `true` if insufficient stale versions present, otherwise `false`.
+   */
+  private def shouldSkipDeletion(numVersionsToRetain: Int, minVersionsToDelete: Long): Boolean = {
+    // If minVersionsToDelete <= 0, we call list every time maintenance is invoked
+    // This is the original behaviour without list api call optimization
+    if (minVersionsToDelete > 0) {
+      // When maxSeenVersion is defined, we check the if number of stale version files
+      // are at least the value of minVersionsToDelete for batch deletion of files
+      // We still proceed with deletion if maxSeenVersion isn't set to ensure the fallback
+      // is to clean up files if maxSeenVersion fails to be initialized
+      if (maxSeenVersion.isDefined) {
+        logInfo(log"Estimated maximum version is " +
+          log"${MDC(LogKeys.MAX_SEEN_VERSION, maxSeenVersion.get)}" +
+          log" and minimum version is ${MDC(LogKeys.MIN_SEEN_VERSION, minSeenVersion)}")
+        val versionsToDelete = maxSeenVersion.get - minSeenVersion + 1 - numVersionsToRetain
+        if (versionsToDelete < minVersionsToDelete) {
+          logInfo(log"Skipping deleting files." +
+            log" Need at least ${MDC(LogKeys.MIN_VERSIONS_TO_DELETE, minVersionsToDelete)}" +
+            log" stale versions for batch deletion but found only" +
+            log" ${MDC(LogKeys.VERSIONS_TO_DELETE, versionsToDelete)}.")
+          return true
+        }
+      }
+    }
+    false
+  }
+
   /**
    * Delete old versions by deleting the associated version and SST files.
-   * At a high-level, this method finds which versions to delete, and which SST files that were
+   * At a high-level, when enough stale version files are present for batch deletion,
+   * this method finds which versions to delete, and which SST files that were
    * last used in those versions. It's safe to delete these SST files because a SST file can
    * be reused only in successive versions. Therefore, if a SST file F was last used in version
    * V, then it won't be used in version V+1 or later, and if version V can be deleted, then
    * F can safely be deleted as well.
    *
-   * To find old files, it does the following.
+   * First, it checks whether enough stale version files are present for batch deletion.
+   * If true, it does the following to find old files.
    * - List all the existing [version].zip files
    * - Find the min version that needs to be retained based on the given `numVersionsToRetain`.
    * - Accordingly decide which versions should be deleted.
@@ -426,7 +459,10 @@ class RocksDBFileManager(
    * - SST files that were used in a version, but that version got overwritten with a different
    *   set of SST files.
    */
-  def deleteOldVersions(numVersionsToRetain: Int): Unit = {
+  def deleteOldVersions(numVersionsToRetain: Int, minVersionsToDelete: Long = 0): Unit = {
+    // Check if enough stale version files present
+    if (shouldSkipDeletion(numVersionsToRetain, minVersionsToDelete)) return
+
     val path = new Path(dfsRootDir)
     val allFiles = fm.list(path).map(_.getPath)
     val snapshotFiles = allFiles.filter(file => onlyZipFiles.accept(file))
@@ -461,9 +497,9 @@ class RocksDBFileManager(
     // Resolve RocksDB files for all the versions and find the max version each file is used
     val fileToMaxUsedVersion = new mutable.HashMap[String, Long]
     sortedSnapshotVersions.foreach { version =>
-      val files = Option(fileMappings.versionToRocksDBFiles.get(version)).getOrElse {
+      val files = Option(versionToRocksDBFiles.get(version)).getOrElse {
         val newResolvedFiles = getImmutableFilesFromVersionZip(version)
-        fileMappings.versionToRocksDBFiles.put(version, newResolvedFiles)
+        versionToRocksDBFiles.put(version, newResolvedFiles)
         newResolvedFiles
       }
       files.foreach(f => fileToMaxUsedVersion(f.dfsFileName) =
@@ -510,7 +546,7 @@ class RocksDBFileManager(
       val versionFile = dfsBatchZipFile(version)
       try {
         fm.delete(versionFile)
-        fileMappings.versionToRocksDBFiles.remove(version)
+        versionToRocksDBFiles.remove(version)
         logDebug(s"Deleted version $version")
       } catch {
         case e: Exception =>
@@ -526,13 +562,17 @@ class RocksDBFileManager(
       .map(_.getName.stripSuffix(".changelog")).map(_.toLong)
       .filter(_ < minVersionToRetain)
     deleteChangelogFiles(changelogVersionsToDelete)
+
+    // Always set minSeenVersion for regular deletion frequency even if deletion fails.
+    // This is safe because subsequent calls retry deleting old version files
+    minSeenVersion = minVersionToRetain
   }
 
   /** Save immutable files to DFS directory */
   private def saveImmutableFilesToDfs(
       version: Long,
       localFiles: Seq[File],
-      capturedFileMappings: RocksDBFileMappings): Seq[RocksDBImmutableFile] = {
+      fileMappings: Map[String, RocksDBSnapshotFile]): Seq[RocksDBImmutableFile] = {
     // Get the immutable files used in previous versions, as some of those uploaded files can be
     // reused for this version
     logInfo(log"Saving RocksDB files to DFS for ${MDC(LogKeys.VERSION_NUM, version)}")
@@ -542,49 +582,36 @@ class RocksDBFileManager(
     var filesReused = 0L
 
     val immutableFiles = localFiles.map { localFile =>
-      val existingDfsFile =
-        capturedFileMappings.localFilesToDfsFiles.asScala.get(localFile.getName)
-      if (existingDfsFile.isDefined && existingDfsFile.get.sizeBytes == localFile.length()) {
-        val dfsFile = existingDfsFile.get
-        filesReused += 1
+      val dfsFileMapping = fileMappings.get(localFile.getName)
+      assert(dfsFileMapping.isDefined)
+      val dfsFile = dfsFileMapping.get.immutableFile
+      val existsInDfs = dfsFileMapping.get.isUploaded
+
+      if (existsInDfs) {
         logInfo(log"reusing file ${MDC(LogKeys.DFS_FILE, dfsFile)} for " +
           log"${MDC(LogKeys.FILE_NAME, localFile)}")
-        RocksDBImmutableFile(localFile.getName, dfsFile.dfsFileName, dfsFile.sizeBytes)
+        filesReused += 1
       } else {
-        val localFileName = localFile.getName
-        val dfsFileName = newDFSFileName(localFileName)
-        val dfsFile = dfsFilePath(dfsFileName)
         // Note: The implementation of copyFromLocalFile() closes the output stream when there is
         // any exception while copying. So this may generate partial files on DFS. But that is
         // okay because until the main [version].zip file is written, those partial files are
         // not going to be used at all. Eventually these files should get cleared.
         fs.copyFromLocalFile(
-          new Path(localFile.getAbsoluteFile.toURI), dfsFile)
+          new Path(localFile.getAbsoluteFile.toURI), dfsFilePath(dfsFile.dfsFileName))
         val localFileSize = localFile.length()
         logInfo(log"Copied ${MDC(LogKeys.FILE_NAME, localFile)} to " +
           log"${MDC(LogKeys.DFS_FILE, dfsFile)} - ${MDC(LogKeys.NUM_BYTES, localFileSize)} bytes")
         filesCopied += 1
         bytesCopied += localFileSize
-
-        val immutableDfsFile = RocksDBImmutableFile(localFile.getName, dfsFileName, localFileSize)
-        capturedFileMappings.localFilesToDfsFiles.put(localFileName, immutableDfsFile)
-
-        immutableDfsFile
       }
+
+      dfsFile
     }
     logInfo(log"Copied ${MDC(LogKeys.NUM_FILES_COPIED, filesCopied)} files " +
       log"(${MDC(LogKeys.NUM_BYTES, bytesCopied)} bytes) from local to" +
       log" DFS for version ${MDC(LogKeys.VERSION_NUM, version)}. " +
       log"${MDC(LogKeys.NUM_FILES_REUSED, filesReused)} files reused without copying.")
-    capturedFileMappings.versionToRocksDBFiles.put(version, immutableFiles)
-
-    // Cleanup locally deleted files from the localFilesToDfsFiles map
-    // Locally, SST Files can be deleted due to RocksDB compaction. These files need
-    // to be removed rom the localFilesToDfsFiles map to ensure that if a older version
-    // regenerates them and overwrites the version.zip, SST files from the conflicting
-    // version (previously committed) are not reused.
-    removeLocallyDeletedSSTFilesFromDfsMapping(localFiles)
-
+    versionToRocksDBFiles.put(version, immutableFiles)
     saveCheckpointMetrics = RocksDBFileManagerMetrics(
       bytesCopied = bytesCopied,
       filesCopied = filesCopied,
@@ -599,43 +626,38 @@ class RocksDBFileManager(
    * necessary and non-existing files are copied from DFS.
    */
   private def loadImmutableFilesFromDfs(
-      immutableFiles: Seq[RocksDBImmutableFile], localDir: File): Unit = {
+      immutableFiles: Seq[RocksDBImmutableFile],
+      localDir: File,
+      rocksDBFileMapping: RocksDBFileMapping,
+      version: Long): Unit = {
     val requiredFileNameToFileDetails = immutableFiles.map(f => f.localFileName -> f).toMap
 
     val localImmutableFiles = listRocksDBFiles(localDir)._1
 
-    // Cleanup locally deleted files from the localFilesToDfsFiles map
-    // Locally, SST Files can be deleted due to RocksDB compaction. These files need
-    // to be removed rom the localFilesToDfsFiles map to ensure that if a older version
-    // regenerates them and overwrites the version.zip, SST files from the conflicting
-    // version (previously committed) are not reused.
-    removeLocallyDeletedSSTFilesFromDfsMapping(localImmutableFiles)
-
     // Delete unnecessary local immutable files
-    localImmutableFiles
-      .foreach { existingFile =>
-        val existingFileSize = existingFile.length()
-        val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
-        val prevDfsFile = fileMappings.localFilesToDfsFiles.asScala.get(existingFile.getName)
-        val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
-          requiredFile.get.dfsFileName == prevDfsFile.get.dfsFileName &&
-            existingFile.length() == requiredFile.get.sizeBytes
-        } else {
-          false
-        }
+    localImmutableFiles.foreach { existingFile =>
+      val existingFileSize = existingFile.length()
+      val requiredFile = requiredFileNameToFileDetails.get(existingFile.getName)
+      val prevDfsFile = rocksDBFileMapping.getDfsFileForLoad(this, existingFile.getName, version)
+      val isSameFile = if (requiredFile.isDefined && prevDfsFile.isDefined) {
+        requiredFile.get.dfsFileName == prevDfsFile.get.dfsFileName &&
+          existingFile.length() == requiredFile.get.sizeBytes
+      } else {
+        false
+      }
 
-        if (!isSameFile) {
-          existingFile.delete()
-          fileMappings.localFilesToDfsFiles.remove(existingFile.getName)
-          logInfo(log"Deleted local file ${MDC(LogKeys.FILE_NAME, existingFile)} " +
-            log"with size ${MDC(LogKeys.NUM_BYTES, existingFileSize)} mapped" +
-            log" to previous dfsFile ${MDC(LogKeys.DFS_FILE, prevDfsFile.getOrElse("null"))}")
-        } else {
-          logInfo(log"reusing ${MDC(LogKeys.DFS_FILE, prevDfsFile)} present at " +
-            log"${MDC(LogKeys.EXISTING_FILE, existingFile)} " +
-            log"for ${MDC(LogKeys.FILE_NAME, requiredFile)}")
-        }
+      if (!isSameFile) {
+        rocksDBFileMapping.remove(existingFile.getName)
+        existingFile.delete()
+        logInfo(log"Deleted local file ${MDC(LogKeys.FILE_NAME, existingFile)} " +
+          log"with size ${MDC(LogKeys.NUM_BYTES, existingFileSize)} mapped" +
+          log" to previous dfsFile ${MDC(LogKeys.DFS_FILE, prevDfsFile.getOrElse("null"))}")
+      } else {
+        logInfo(log"reusing ${MDC(LogKeys.DFS_FILE, prevDfsFile)} present at " +
+          log"${MDC(LogKeys.EXISTING_FILE, existingFile)} " +
+          log"for ${MDC(LogKeys.FILE_NAME, requiredFile)}")
       }
+    }
 
     var filesCopied = 0L
     var bytesCopied = 0L
@@ -658,7 +680,7 @@ class RocksDBFileManager(
         }
         filesCopied += 1
         bytesCopied += localFileSize
-        fileMappings.localFilesToDfsFiles.put(localFileName, file)
+        rocksDBFileMapping.mapToDfsFile(localFileName, file, version)
         logInfo(log"Copied ${MDC(LogKeys.DFS_FILE, dfsFile)} to " +
           log"${MDC(LogKeys.FILE_NAME, localFile)} - " +
           log"${MDC(LogKeys.NUM_BYTES, localFileSize)} bytes")
@@ -676,19 +698,6 @@ class RocksDBFileManager(
       filesReused = filesReused)
   }
 
-  private def removeLocallyDeletedSSTFilesFromDfsMapping(localFiles: Seq[File]): Unit = {
-    // clean up deleted SST files from the localFilesToDfsFiles Map
-    val currentLocalFiles = localFiles.map(_.getName).toSet
-    val mappingsToClean = fileMappings.localFilesToDfsFiles.asScala
-      .keys
-      .filterNot(currentLocalFiles.contains)
-
-    mappingsToClean.foreach { f =>
-      logInfo(log"cleaning ${MDC(LogKeys.FILE_NAME, f)} from the localFilesToDfsFiles map")
-      fileMappings.localFilesToDfsFiles.remove(f)
-    }
-  }
-
   /** Get the SST files required for a version from the version zip file in DFS */
   private def getImmutableFilesFromVersionZip(version: Long): Seq[RocksDBImmutableFile] = {
     Utils.deleteRecursively(localTempDir)
@@ -752,6 +761,19 @@ class RocksDBFileManager(
     s"$baseName-${UUID.randomUUID}.$extension"
   }
 
+  def newDFSFileName(localFileName: String, dfsFileSuffix: String): String = {
+    val baseName = FilenameUtils.getBaseName(localFileName)
+    val extension = FilenameUtils.getExtension(localFileName)
+    s"$baseName-$dfsFileSuffix.$extension"
+  }
+
+  def dfsFileSuffix(immutableFile: RocksDBImmutableFile): String = {
+    val suffixStart = immutableFile.dfsFileName.indexOf('-')
+    val suffixEnd = immutableFile.dfsFileName.indexOf('.')
+
+    immutableFile.dfsFileName.substring(suffixStart + 1, suffixEnd)
+  }
+
   private def dfsBatchZipFile(version: Long): Path = new Path(s"$dfsRootDir/$version.zip")
   // We use changelog suffix intentionally so that we can tell the difference from changelog file of
   // HDFSBackedStateStore which is named version.delta.
@@ -782,7 +804,7 @@ class RocksDBFileManager(
   /**
    * List all the RocksDB files that need be synced or recovered.
    */
-  private def listRocksDBFiles(localDir: File): (Seq[File], Seq[File]) = {
+  def listRocksDBFiles(localDir: File): (Seq[File], Seq[File]) = {
     val topLevelFiles = localDir.listFiles.filter(!_.isDirectory)
     val archivedLogFiles =
       Option(new File(localDir, LOG_FILES_LOCAL_SUBDIR).listFiles())
@@ -795,20 +817,6 @@ class RocksDBFileManager(
   }
 }
 
-/**
- * Track file mappings in RocksDB across local and remote directories
- * @param versionToRocksDBFiles Mapping of RocksDB files used across versions for maintenance
- * @param localFilesToDfsFiles Mapping of the exact Dfs file used to create a local SST file
- * The reason localFilesToDfsFiles is a separate map because versionToRocksDBFiles can contain
- *  multiple similar SST files to a particular local file (for example 1.sst can map to 1-UUID1.sst
- * in v1 and 1-UUID2.sst in v2). We need to capture the exact file used to ensure Version ID
- * compatibility across SST files and RocksDB manifest.
- */
-
-case class RocksDBFileMappings(
-    versionToRocksDBFiles: ConcurrentHashMap[Long, Seq[RocksDBImmutableFile]],
-    localFilesToDfsFiles: ConcurrentHashMap[String, RocksDBImmutableFile])
-
 /**
  * Metrics regarding RocksDB file sync between local and DFS.
  */
@@ -833,11 +841,17 @@ object RocksDBFileManagerMetrics {
 case class RocksDBCheckpointMetadata(
     sstFiles: Seq[RocksDBSstFile],
     logFiles: Seq[RocksDBLogFile],
-    numKeys: Long) {
+    numKeys: Long,
+    columnFamilyMapping: Option[Map[String, Short]] = None,
+    maxColumnFamilyId: Option[Short] = None) {
+
+  require(columnFamilyMapping.isDefined == maxColumnFamilyId.isDefined,
+    "columnFamilyMapping and maxColumnFamilyId must either both be defined or both be None")
+
   import RocksDBCheckpointMetadata._
 
   def json: String = {
-    // We turn this field into a null to avoid write a empty logFiles field in the json.
+    // We turn this field into a null to avoid write an empty logFiles field in the json.
     val nullified = if (logFiles.isEmpty) this.copy(logFiles = null) else this
     mapper.writeValueAsString(nullified)
   }
@@ -885,11 +899,73 @@ object RocksDBCheckpointMetadata {
     }
   }
 
-  def apply(rocksDBFiles: Seq[RocksDBImmutableFile], numKeys: Long): RocksDBCheckpointMetadata = {
-    val sstFiles = rocksDBFiles.collect { case file: RocksDBSstFile => file }
-    val logFiles = rocksDBFiles.collect { case file: RocksDBLogFile => file }
+  // Apply method for cases without column family information
+  def apply(
+      rocksDBFiles: Seq[RocksDBImmutableFile],
+      numKeys: Long): RocksDBCheckpointMetadata = {
+    val (sstFiles, logFiles) = rocksDBFiles.partition(_.isInstanceOf[RocksDBSstFile])
+    new RocksDBCheckpointMetadata(
+      sstFiles.map(_.asInstanceOf[RocksDBSstFile]),
+      logFiles.map(_.asInstanceOf[RocksDBLogFile]),
+      numKeys,
+      None,
+      None
+    )
+  }
 
-    RocksDBCheckpointMetadata(sstFiles, logFiles, numKeys)
+  def apply(
+      rocksDBFiles: Seq[RocksDBImmutableFile],
+      numKeys: Long,
+      columnFamilyMapping: Option[Map[String, Short]],
+      maxColumnFamilyId: Option[Short]): RocksDBCheckpointMetadata = {
+    val (sstFiles, logFiles) = rocksDBFiles.partition(_.isInstanceOf[RocksDBSstFile])
+    new RocksDBCheckpointMetadata(
+      sstFiles.map(_.asInstanceOf[RocksDBSstFile]),
+      logFiles.map(_.asInstanceOf[RocksDBLogFile]),
+      numKeys,
+      columnFamilyMapping,
+      maxColumnFamilyId
+    )
+  }
+
+  // Apply method for cases with separate sstFiles and logFiles, without column family information
+  def apply(
+      sstFiles: Seq[RocksDBSstFile],
+      logFiles: Seq[RocksDBLogFile],
+      numKeys: Long): RocksDBCheckpointMetadata = {
+    new RocksDBCheckpointMetadata(sstFiles, logFiles, numKeys, None, None)
+  }
+
+  // Apply method for cases with column family information
+  def apply(
+      rocksDBFiles: Seq[RocksDBImmutableFile],
+      numKeys: Long,
+      columnFamilyMapping: Map[String, Short],
+      maxColumnFamilyId: Short): RocksDBCheckpointMetadata = {
+    val (sstFiles, logFiles) = rocksDBFiles.partition(_.isInstanceOf[RocksDBSstFile])
+    new RocksDBCheckpointMetadata(
+      sstFiles.map(_.asInstanceOf[RocksDBSstFile]),
+      logFiles.map(_.asInstanceOf[RocksDBLogFile]),
+      numKeys,
+      Some(columnFamilyMapping),
+      Some(maxColumnFamilyId)
+    )
+  }
+
+  // Apply method for cases with separate sstFiles and logFiles, and column family information
+  def apply(
+      sstFiles: Seq[RocksDBSstFile],
+      logFiles: Seq[RocksDBLogFile],
+      numKeys: Long,
+      columnFamilyMapping: Map[String, Short],
+      maxColumnFamilyId: Short): RocksDBCheckpointMetadata = {
+    new RocksDBCheckpointMetadata(
+      sstFiles,
+      logFiles,
+      numKeys,
+      Some(columnFamilyMapping),
+      Some(maxColumnFamilyId)
+    )
   }
 }
 
@@ -940,7 +1016,10 @@ object RocksDBImmutableFile {
   val LOG_FILES_DFS_SUBDIR = "logs"
   val LOG_FILES_LOCAL_SUBDIR = "archive"
 
-  def apply(localFileName: String, dfsFileName: String, sizeBytes: Long): RocksDBImmutableFile = {
+  def apply(
+      localFileName: String,
+      dfsFileName: String,
+      sizeBytes: Long): RocksDBImmutableFile = {
     if (isSstFile(localFileName)) {
       RocksDBSstFile(localFileName, dfsFileName, sizeBytes)
     } else if (isLogFile(localFileName)) {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
index 202fd224ddfdc..f39022c1f53a6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateEncoder.scala
@@ -17,13 +17,21 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
+import java.io.ByteArrayOutputStream
 import java.lang.Double.{doubleToRawLongBits, longBitsToDouble}
 import java.lang.Float.{floatToRawIntBits, intBitsToFloat}
 import java.nio.{ByteBuffer, ByteOrder}
 
+import org.apache.avro.Schema
+import org.apache.avro.generic.{GenericData, GenericDatumReader, GenericDatumWriter, GenericRecord}
+import org.apache.avro.io.{DecoderFactory, EncoderFactory}
+
 import org.apache.spark.internal.Logging
+import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer, SchemaConverters}
+import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.{BoundReference, JoinedRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.catalyst.expressions.codegen.UnsafeRowWriter
+import org.apache.spark.sql.execution.streaming.StateStoreColumnFamilySchemaUtils
 import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider.{STATE_ENCODING_NUM_VERSION_BYTES, STATE_ENCODING_VERSION, VIRTUAL_COL_FAMILY_PREFIX_BYTES}
 import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.Platform
@@ -49,6 +57,7 @@ abstract class RocksDBKeyStateEncoderBase(
   def offsetForColFamilyPrefix: Int =
     if (useColumnFamilies) VIRTUAL_COL_FAMILY_PREFIX_BYTES else 0
 
+  val out = new ByteArrayOutputStream
   /**
    * Get Byte Array for the virtual column family id that is used as prefix for
    * key state rows.
@@ -89,23 +98,24 @@ abstract class RocksDBKeyStateEncoderBase(
   }
 }
 
-object RocksDBStateEncoder {
+object RocksDBStateEncoder extends Logging {
   def getKeyEncoder(
       keyStateEncoderSpec: KeyStateEncoderSpec,
       useColumnFamilies: Boolean,
-      virtualColFamilyId: Option[Short] = None): RocksDBKeyStateEncoder = {
+      virtualColFamilyId: Option[Short] = None,
+      avroEnc: Option[AvroEncoder] = None): RocksDBKeyStateEncoder = {
     // Return the key state encoder based on the requested type
     keyStateEncoderSpec match {
       case NoPrefixKeyStateEncoderSpec(keySchema) =>
-        new NoPrefixKeyStateEncoder(keySchema, useColumnFamilies, virtualColFamilyId)
+        new NoPrefixKeyStateEncoder(keySchema, useColumnFamilies, virtualColFamilyId, avroEnc)
 
       case PrefixKeyScanStateEncoderSpec(keySchema, numColsPrefixKey) =>
         new PrefixKeyScanStateEncoder(keySchema, numColsPrefixKey,
-          useColumnFamilies, virtualColFamilyId)
+          useColumnFamilies, virtualColFamilyId, avroEnc)
 
       case RangeKeyScanStateEncoderSpec(keySchema, orderingOrdinals) =>
         new RangeKeyScanStateEncoder(keySchema, orderingOrdinals,
-          useColumnFamilies, virtualColFamilyId)
+          useColumnFamilies, virtualColFamilyId, avroEnc)
 
       case _ =>
         throw new IllegalArgumentException(s"Unsupported key state encoder spec: " +
@@ -115,14 +125,21 @@ object RocksDBStateEncoder {
 
   def getValueEncoder(
       valueSchema: StructType,
-      useMultipleValuesPerKey: Boolean): RocksDBValueStateEncoder = {
+      useMultipleValuesPerKey: Boolean,
+      avroEnc: Option[AvroEncoder] = None): RocksDBValueStateEncoder = {
     if (useMultipleValuesPerKey) {
-      new MultiValuedStateEncoder(valueSchema)
+      new MultiValuedStateEncoder(valueSchema, avroEnc)
     } else {
-      new SingleValueStateEncoder(valueSchema)
+      new SingleValueStateEncoder(valueSchema, avroEnc)
     }
   }
 
+  def getColumnFamilyIdBytes(virtualColFamilyId: Short): Array[Byte] = {
+    val encodedBytes = new Array[Byte](VIRTUAL_COL_FAMILY_PREFIX_BYTES)
+    Platform.putShort(encodedBytes, Platform.BYTE_ARRAY_OFFSET, virtualColFamilyId)
+    encodedBytes
+  }
+
   /**
    * Encode the UnsafeRow of N bytes as a N+1 byte array.
    * @note This creates a new byte array and memcopies the UnsafeRow to the new array.
@@ -139,6 +156,26 @@ object RocksDBStateEncoder {
     encodedBytes
   }
 
+  /**
+   * This method takes an UnsafeRow, and serializes to a byte array using Avro encoding.
+   */
+  def encodeUnsafeRowToAvro(
+     row: UnsafeRow,
+     avroSerializer: AvroSerializer,
+     valueAvroType: Schema,
+     out: ByteArrayOutputStream): Array[Byte] = {
+    // InternalRow -> Avro.GenericDataRecord
+    val avroData =
+      avroSerializer.serialize(row)
+    out.reset()
+    val encoder = EncoderFactory.get().directBinaryEncoder(out, null)
+    val writer = new GenericDatumWriter[Any](
+      valueAvroType) // Defining Avro writer for this struct type
+    writer.write(avroData, encoder) // Avro.GenericDataRecord -> byte array
+    encoder.flush()
+    out.toByteArray
+  }
+
   def decodeToUnsafeRow(bytes: Array[Byte], numFields: Int): UnsafeRow = {
     if (bytes != null) {
       val row = new UnsafeRow(numFields)
@@ -148,6 +185,26 @@ object RocksDBStateEncoder {
     }
   }
 
+  /**
+   * This method takes a byte array written using Avro encoding, and
+   * deserializes to an UnsafeRow using the Avro deserializer
+   */
+  def decodeFromAvroToUnsafeRow(
+      valueBytes: Array[Byte],
+      avroDeserializer: AvroDeserializer,
+      valueAvroType: Schema,
+      valueProj: UnsafeProjection): UnsafeRow = {
+    val reader = new GenericDatumReader[Any](valueAvroType)
+    val decoder = DecoderFactory.get().binaryDecoder(valueBytes, 0, valueBytes.length, null)
+    // bytes -> Avro.GenericDataRecord
+    val genericData = reader.read(null, decoder)
+    // Avro.GenericDataRecord -> InternalRow
+    val internalRow = avroDeserializer.deserialize(
+      genericData).orNull.asInstanceOf[InternalRow]
+    // InternalRow -> UnsafeRow
+    valueProj.apply(internalRow)
+  }
+
   def decodeToUnsafeRow(bytes: Array[Byte], reusedRow: UnsafeRow): UnsafeRow = {
     if (bytes != null) {
       // Platform.BYTE_ARRAY_OFFSET is the recommended way refer to the 1st offset. See Platform.
@@ -168,16 +225,20 @@ object RocksDBStateEncoder {
  * @param keySchema - schema of the key to be encoded
  * @param numColsPrefixKey - number of columns to be used for prefix key
  * @param useColumnFamilies - if column family is enabled for this encoder
+ * @param avroEnc - if Avro encoding is specified for this StateEncoder, this encoder will
+ *                be defined
  */
 class PrefixKeyScanStateEncoder(
     keySchema: StructType,
     numColsPrefixKey: Int,
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
+    virtualColFamilyId: Option[Short] = None,
+    avroEnc: Option[AvroEncoder] = None)
   extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
 
   import RocksDBStateEncoder._
 
+  private val usingAvroEncoding = avroEnc.isDefined
   private val prefixKeyFieldsWithIdx: Seq[(StructField, Int)] = {
     keySchema.zipWithIndex.take(numColsPrefixKey)
   }
@@ -197,6 +258,18 @@ class PrefixKeyScanStateEncoder(
     UnsafeProjection.create(refs)
   }
 
+  // Prefix Key schema and projection definitions used by the Avro Serializers
+  // and Deserializers
+  private val prefixKeySchema = StructType(keySchema.take(numColsPrefixKey))
+  private lazy val prefixKeyAvroType = SchemaConverters.toAvroType(prefixKeySchema)
+  private val prefixKeyProj = UnsafeProjection.create(prefixKeySchema)
+
+  // Remaining Key schema and projection definitions used by the Avro Serializers
+  // and Deserializers
+  private val remainingKeySchema = StructType(keySchema.drop(numColsPrefixKey))
+  private lazy val remainingKeyAvroType = SchemaConverters.toAvroType(remainingKeySchema)
+  private val remainingKeyProj = UnsafeProjection.create(remainingKeySchema)
+
   // This is quite simple to do - just bind sequentially, as we don't change the order.
   private val restoreKeyProjection: UnsafeProjection = UnsafeProjection.create(keySchema)
 
@@ -204,9 +277,24 @@ class PrefixKeyScanStateEncoder(
   private val joinedRowOnKey = new JoinedRow()
 
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
-    val prefixKeyEncoded = encodeUnsafeRow(extractPrefixKey(row))
-    val remainingEncoded = encodeUnsafeRow(remainingKeyProjection(row))
-
+    val (prefixKeyEncoded, remainingEncoded) = if (usingAvroEncoding) {
+      (
+        encodeUnsafeRowToAvro(
+          extractPrefixKey(row),
+          avroEnc.get.keySerializer,
+          prefixKeyAvroType,
+          out
+        ),
+        encodeUnsafeRowToAvro(
+          remainingKeyProjection(row),
+          avroEnc.get.suffixKeySerializer.get,
+          remainingKeyAvroType,
+          out
+        )
+      )
+    } else {
+      (encodeUnsafeRow(extractPrefixKey(row)), encodeUnsafeRow(remainingKeyProjection(row)))
+    }
     val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
       prefixKeyEncoded.length + remainingEncoded.length + 4
     )
@@ -237,9 +325,25 @@ class PrefixKeyScanStateEncoder(
     Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4 + prefixKeyEncodedLen,
       remainingKeyEncoded, Platform.BYTE_ARRAY_OFFSET, remainingKeyEncodedLen)
 
-    val prefixKeyDecoded = decodeToUnsafeRow(prefixKeyEncoded, numFields = numColsPrefixKey)
-    val remainingKeyDecoded = decodeToUnsafeRow(remainingKeyEncoded,
-      numFields = keySchema.length - numColsPrefixKey)
+    val (prefixKeyDecoded, remainingKeyDecoded) = if (usingAvroEncoding) {
+      (
+        decodeFromAvroToUnsafeRow(
+          prefixKeyEncoded,
+          avroEnc.get.keyDeserializer,
+          prefixKeyAvroType,
+          prefixKeyProj
+        ),
+        decodeFromAvroToUnsafeRow(
+          remainingKeyEncoded,
+          avroEnc.get.suffixKeyDeserializer.get,
+          remainingKeyAvroType,
+          remainingKeyProj
+        )
+      )
+    } else {
+      (decodeToUnsafeRow(prefixKeyEncoded, numFields = numColsPrefixKey),
+        decodeToUnsafeRow(remainingKeyEncoded, numFields = keySchema.length - numColsPrefixKey))
+    }
 
     restoreKeyProjection(joinedRowOnKey.withLeft(prefixKeyDecoded).withRight(remainingKeyDecoded))
   }
@@ -249,7 +353,11 @@ class PrefixKeyScanStateEncoder(
   }
 
   override def encodePrefixKey(prefixKey: UnsafeRow): Array[Byte] = {
-    val prefixKeyEncoded = encodeUnsafeRow(prefixKey)
+    val prefixKeyEncoded = if (usingAvroEncoding) {
+      encodeUnsafeRowToAvro(prefixKey, avroEnc.get.keySerializer, prefixKeyAvroType, out)
+    } else {
+      encodeUnsafeRow(prefixKey)
+    }
     val (prefix, startingOffset) = encodeColumnFamilyPrefix(
       prefixKeyEncoded.length + 4
     )
@@ -293,13 +401,16 @@ class PrefixKeyScanStateEncoder(
  * @param keySchema - schema of the key to be encoded
  * @param orderingOrdinals - the ordinals for which the range scan is constructed
  * @param useColumnFamilies - if column family is enabled for this encoder
+ * @param avroEnc - if Avro encoding is specified for this StateEncoder, this encoder will
+ *                be defined
  */
 class RangeKeyScanStateEncoder(
     keySchema: StructType,
     orderingOrdinals: Seq[Int],
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
-  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
+    virtualColFamilyId: Option[Short] = None,
+    avroEnc: Option[AvroEncoder] = None)
+  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) with Logging {
 
   import RocksDBStateEncoder._
 
@@ -368,6 +479,22 @@ class RangeKeyScanStateEncoder(
     UnsafeProjection.create(refs)
   }
 
+  private val rangeScanAvroSchema = StateStoreColumnFamilySchemaUtils.convertForRangeScan(
+    StructType(rangeScanKeyFieldsWithOrdinal.map(_._1).toArray))
+
+  private lazy val rangeScanAvroType = SchemaConverters.toAvroType(rangeScanAvroSchema)
+
+  private val rangeScanAvroProjection = UnsafeProjection.create(rangeScanAvroSchema)
+
+  // Existing remainder key schema stuff
+  private val remainingKeySchema = StructType(
+    0.to(keySchema.length - 1).diff(orderingOrdinals).map(keySchema(_))
+  )
+
+  private lazy val remainingKeyAvroType = SchemaConverters.toAvroType(remainingKeySchema)
+
+  private val remainingKeyAvroProjection = UnsafeProjection.create(remainingKeySchema)
+
   // Reusable objects
   private val joinedRowOnKey = new JoinedRow()
 
@@ -557,13 +684,272 @@ class RangeKeyScanStateEncoder(
     writer.getRow()
   }
 
+  /**
+   * Encodes an UnsafeRow into an Avro-compatible byte array format for range scan operations.
+   *
+   * This method transforms row data into a binary format that preserves ordering when
+   * used in range scans.
+   * For each field in the row:
+   * - A marker byte is written to indicate null status or sign (for numeric types)
+   * - The value is written in big-endian format
+   *
+   * Special handling is implemented for:
+   * - Null values: marked with nullValMarker followed by zero bytes
+   * - Negative numbers: marked with negativeValMarker
+   * - Floating point numbers: bit manipulation to handle sign and NaN values correctly
+   *
+   * @param row The UnsafeRow to encode
+   * @param avroType The Avro schema defining the structure for encoding
+   * @return Array[Byte] containing the Avro-encoded data that preserves ordering for range scans
+   * @throws UnsupportedOperationException if a field's data type is not supported for range
+   *                                       scan encoding
+   */
+  def encodePrefixKeyForRangeScan(
+      row: UnsafeRow,
+      avroType: Schema): Array[Byte] = {
+    val record = new GenericData.Record(avroType)
+    var fieldIdx = 0
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+      val value = row.get(idx, field.dataType)
+
+      // Create marker byte buffer
+      val markerBuffer = ByteBuffer.allocate(1)
+      markerBuffer.order(ByteOrder.BIG_ENDIAN)
+
+      if (value == null) {
+        markerBuffer.put(nullValMarker)
+        record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+        record.put(fieldIdx + 1, ByteBuffer.wrap(new Array[Byte](field.dataType.defaultSize)))
+      } else {
+        field.dataType match {
+          case BooleanType =>
+            markerBuffer.put(positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+            val valueBuffer = ByteBuffer.allocate(1)
+            valueBuffer.put(if (value.asInstanceOf[Boolean]) 1.toByte else 0.toByte)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case ByteType =>
+            val byteVal = value.asInstanceOf[Byte]
+            markerBuffer.put(if (byteVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(1)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.put(byteVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case ShortType =>
+            val shortVal = value.asInstanceOf[Short]
+            markerBuffer.put(if (shortVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(2)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putShort(shortVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case IntegerType =>
+            val intVal = value.asInstanceOf[Int]
+            markerBuffer.put(if (intVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(4)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putInt(intVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case LongType =>
+            val longVal = value.asInstanceOf[Long]
+            markerBuffer.put(if (longVal < 0) negativeValMarker else positiveValMarker)
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(8)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            valueBuffer.putLong(longVal)
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case FloatType =>
+            val floatVal = value.asInstanceOf[Float]
+            val rawBits = floatToRawIntBits(floatVal)
+            markerBuffer.put(if ((rawBits & floatSignBitMask) != 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            })
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(4)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            if ((rawBits & floatSignBitMask) != 0) {
+              val updatedVal = rawBits ^ floatFlipBitMask
+              valueBuffer.putFloat(intBitsToFloat(updatedVal))
+            } else {
+              valueBuffer.putFloat(floatVal)
+            }
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case DoubleType =>
+            val doubleVal = value.asInstanceOf[Double]
+            val rawBits = doubleToRawLongBits(doubleVal)
+            markerBuffer.put(if ((rawBits & doubleSignBitMask) != 0) {
+              negativeValMarker
+            } else {
+              positiveValMarker
+            })
+            record.put(fieldIdx, ByteBuffer.wrap(markerBuffer.array()))
+
+            val valueBuffer = ByteBuffer.allocate(8)
+            valueBuffer.order(ByteOrder.BIG_ENDIAN)
+            if ((rawBits & doubleSignBitMask) != 0) {
+              val updatedVal = rawBits ^ doubleFlipBitMask
+              valueBuffer.putDouble(longBitsToDouble(updatedVal))
+            } else {
+              valueBuffer.putDouble(doubleVal)
+            }
+            record.put(fieldIdx + 1, ByteBuffer.wrap(valueBuffer.array()))
+
+          case _ => throw new UnsupportedOperationException(
+            s"Range scan encoding not supported for data type: ${field.dataType}")
+        }
+      }
+      fieldIdx += 2
+    }
+
+    out.reset()
+    val writer = new GenericDatumWriter[GenericRecord](rangeScanAvroType)
+    val encoder = EncoderFactory.get().binaryEncoder(out, null)
+    writer.write(record, encoder)
+    encoder.flush()
+    out.toByteArray
+  }
+
+  /**
+   * Decodes an Avro-encoded byte array back into an UnsafeRow for range scan operations.
+   *
+   * This method reverses the encoding process performed by encodePrefixKeyForRangeScan:
+   * - Reads the marker byte to determine null status or sign
+   * - Reconstructs the original values from big-endian format
+   * - Handles special cases for floating point numbers by reversing bit manipulations
+   *
+   * The decoding process preserves the original data types and values, including:
+   * - Null values marked by nullValMarker
+   * - Sign information for numeric types
+   * - Proper restoration of negative floating point values
+   *
+   * @param bytes The Avro-encoded byte array to decode
+   * @param avroType The Avro schema defining the structure for decoding
+   * @return UnsafeRow containing the decoded data
+   * @throws UnsupportedOperationException if a field's data type is not supported for range
+   *                                       scan decoding
+   */
+  def decodePrefixKeyForRangeScan(
+      bytes: Array[Byte],
+      avroType: Schema): UnsafeRow = {
+
+    val reader = new GenericDatumReader[GenericRecord](avroType)
+    val decoder = DecoderFactory.get().binaryDecoder(bytes, 0, bytes.length, null)
+    val record = reader.read(null, decoder)
+
+    val rowWriter = new UnsafeRowWriter(rangeScanKeyFieldsWithOrdinal.length)
+    rowWriter.resetRowWriter()
+
+    var fieldIdx = 0
+    rangeScanKeyFieldsWithOrdinal.zipWithIndex.foreach { case (fieldWithOrdinal, idx) =>
+      val field = fieldWithOrdinal._1
+
+      val markerBytes = record.get(fieldIdx).asInstanceOf[ByteBuffer].array()
+      val markerBuf = ByteBuffer.wrap(markerBytes)
+      markerBuf.order(ByteOrder.BIG_ENDIAN)
+      val marker = markerBuf.get()
+
+      if (marker == nullValMarker) {
+        rowWriter.setNullAt(idx)
+      } else {
+        field.dataType match {
+          case BooleanType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            rowWriter.write(idx, bytes(0) == 1)
+
+          case ByteType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.get())
+
+          case ShortType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getShort())
+
+          case IntegerType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getInt())
+
+          case LongType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            rowWriter.write(idx, valueBuf.getLong())
+
+          case FloatType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            if (marker == negativeValMarker) {
+              val floatVal = valueBuf.getFloat
+              val updatedVal = floatToRawIntBits(floatVal) ^ floatFlipBitMask
+              rowWriter.write(idx, intBitsToFloat(updatedVal))
+            } else {
+              rowWriter.write(idx, valueBuf.getFloat())
+            }
+
+          case DoubleType =>
+            val bytes = record.get(fieldIdx + 1).asInstanceOf[ByteBuffer].array()
+            val valueBuf = ByteBuffer.wrap(bytes)
+            valueBuf.order(ByteOrder.BIG_ENDIAN)
+            if (marker == negativeValMarker) {
+              val doubleVal = valueBuf.getDouble
+              val updatedVal = doubleToRawLongBits(doubleVal) ^ doubleFlipBitMask
+              rowWriter.write(idx, longBitsToDouble(updatedVal))
+            } else {
+              rowWriter.write(idx, valueBuf.getDouble())
+            }
+
+          case _ => throw new UnsupportedOperationException(
+            s"Range scan decoding not supported for data type: ${field.dataType}")
+        }
+      }
+      fieldIdx += 2
+    }
+
+    rowWriter.getRow()
+  }
+
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
     // This prefix key has the columns specified by orderingOrdinals
     val prefixKey = extractPrefixKey(row)
-    val rangeScanKeyEncoded = encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
+    val rangeScanKeyEncoded = if (avroEnc.isDefined) {
+      encodePrefixKeyForRangeScan(prefixKey, rangeScanAvroType)
+    } else {
+      encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
+    }
 
     val result = if (orderingOrdinals.length < keySchema.length) {
-      val remainingEncoded = encodeUnsafeRow(remainingKeyProjection(row))
+      val remainingEncoded = if (avroEnc.isDefined) {
+        encodeUnsafeRowToAvro(
+          remainingKeyProjection(row),
+          avroEnc.get.keySerializer,
+          remainingKeyAvroType,
+          out
+        )
+      } else {
+        encodeUnsafeRow(remainingKeyProjection(row))
+      }
       val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
         rangeScanKeyEncoded.length + remainingEncoded.length + 4
       )
@@ -600,9 +986,12 @@ class RangeKeyScanStateEncoder(
     Platform.copyMemory(keyBytes, decodeKeyStartOffset + 4,
       prefixKeyEncoded, Platform.BYTE_ARRAY_OFFSET, prefixKeyEncodedLen)
 
-    val prefixKeyDecodedForRangeScan = decodeToUnsafeRow(prefixKeyEncoded,
-      numFields = orderingOrdinals.length)
-    val prefixKeyDecoded = decodePrefixKeyForRangeScan(prefixKeyDecodedForRangeScan)
+    val prefixKeyDecoded = if (avroEnc.isDefined) {
+      decodePrefixKeyForRangeScan(prefixKeyEncoded, rangeScanAvroType)
+    } else {
+      decodePrefixKeyForRangeScan(decodeToUnsafeRow(prefixKeyEncoded,
+        numFields = orderingOrdinals.length))
+    }
 
     if (orderingOrdinals.length < keySchema.length) {
       // Here we calculate the remainingKeyEncodedLen leveraging the length of keyBytes
@@ -614,8 +1003,14 @@ class RangeKeyScanStateEncoder(
         remainingKeyEncoded, Platform.BYTE_ARRAY_OFFSET,
         remainingKeyEncodedLen)
 
-      val remainingKeyDecoded = decodeToUnsafeRow(remainingKeyEncoded,
-        numFields = keySchema.length - orderingOrdinals.length)
+      val remainingKeyDecoded = if (avroEnc.isDefined) {
+        decodeFromAvroToUnsafeRow(remainingKeyEncoded,
+          avroEnc.get.keyDeserializer,
+          remainingKeyAvroType, remainingKeyAvroProjection)
+      } else {
+        decodeToUnsafeRow(remainingKeyEncoded,
+          numFields = keySchema.length - orderingOrdinals.length)
+      }
 
       val joined = joinedRowOnKey.withLeft(prefixKeyDecoded).withRight(remainingKeyDecoded)
       val restored = restoreKeyProjection(joined)
@@ -628,7 +1023,11 @@ class RangeKeyScanStateEncoder(
   }
 
   override def encodePrefixKey(prefixKey: UnsafeRow): Array[Byte] = {
-    val rangeScanKeyEncoded = encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
+    val rangeScanKeyEncoded = if (avroEnc.isDefined) {
+      encodePrefixKeyForRangeScan(prefixKey, rangeScanAvroType)
+    } else {
+      encodeUnsafeRow(encodePrefixKeyForRangeScan(prefixKey))
+    }
     val (prefix, startingOffset) = encodeColumnFamilyPrefix(rangeScanKeyEncoded.length + 4)
 
     Platform.putInt(prefix, startingOffset, rangeScanKeyEncoded.length)
@@ -647,6 +1046,7 @@ class RangeKeyScanStateEncoder(
  * It uses the first byte of the generated byte array to store the version the describes how the
  * row is encoded in the rest of the byte array. Currently, the default version is 0,
  *
+ * If the avroEnc is specified, we are using Avro encoding for this column family's keys
  * VERSION 0:  [ VERSION (1 byte) | ROW (N bytes) ]
  *    The bytes of a UnsafeRow is written unmodified to starting from offset 1
  *    (offset 0 is the version byte of value 0). That is, if the unsafe row has N bytes,
@@ -655,19 +1055,27 @@ class RangeKeyScanStateEncoder(
 class NoPrefixKeyStateEncoder(
     keySchema: StructType,
     useColumnFamilies: Boolean = false,
-    virtualColFamilyId: Option[Short] = None)
-  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) {
+    virtualColFamilyId: Option[Short] = None,
+    avroEnc: Option[AvroEncoder] = None)
+  extends RocksDBKeyStateEncoderBase(useColumnFamilies, virtualColFamilyId) with Logging {
 
   import RocksDBStateEncoder._
 
   // Reusable objects
+  private val usingAvroEncoding = avroEnc.isDefined
   private val keyRow = new UnsafeRow(keySchema.size)
+  private lazy val keyAvroType = SchemaConverters.toAvroType(keySchema)
+  private val keyProj = UnsafeProjection.create(keySchema)
 
   override def encodeKey(row: UnsafeRow): Array[Byte] = {
     if (!useColumnFamilies) {
       encodeUnsafeRow(row)
     } else {
-      val bytesToEncode = row.getBytes
+      // If avroEnc is defined, we know that we need to use Avro to
+      // encode this UnsafeRow to Avro bytes
+      val bytesToEncode = if (usingAvroEncoding) {
+        encodeUnsafeRowToAvro(row, avroEnc.get.keySerializer, keyAvroType, out)
+      } else row.getBytes
       val (encodedBytes, startingOffset) = encodeColumnFamilyPrefix(
         bytesToEncode.length +
           STATE_ENCODING_NUM_VERSION_BYTES
@@ -691,11 +1099,21 @@ class NoPrefixKeyStateEncoder(
     if (useColumnFamilies) {
       if (keyBytes != null) {
         // Platform.BYTE_ARRAY_OFFSET is the recommended way refer to the 1st offset. See Platform.
-        keyRow.pointTo(
-          keyBytes,
-          decodeKeyStartOffset + STATE_ENCODING_NUM_VERSION_BYTES,
-          keyBytes.length - STATE_ENCODING_NUM_VERSION_BYTES - VIRTUAL_COL_FAMILY_PREFIX_BYTES)
-        keyRow
+        if (usingAvroEncoding) {
+          val dataLength = keyBytes.length - STATE_ENCODING_NUM_VERSION_BYTES -
+            VIRTUAL_COL_FAMILY_PREFIX_BYTES
+          val avroBytes = new Array[Byte](dataLength)
+          Platform.copyMemory(
+            keyBytes, decodeKeyStartOffset + STATE_ENCODING_NUM_VERSION_BYTES,
+            avroBytes, Platform.BYTE_ARRAY_OFFSET, dataLength)
+          decodeFromAvroToUnsafeRow(avroBytes, avroEnc.get.keyDeserializer, keyAvroType, keyProj)
+        } else {
+          keyRow.pointTo(
+            keyBytes,
+            decodeKeyStartOffset + STATE_ENCODING_NUM_VERSION_BYTES,
+            keyBytes.length - STATE_ENCODING_NUM_VERSION_BYTES - VIRTUAL_COL_FAMILY_PREFIX_BYTES)
+          keyRow
+        }
       } else {
         null
       }
@@ -721,17 +1139,28 @@ class NoPrefixKeyStateEncoder(
  * This encoder supports RocksDB StringAppendOperator merge operator. Values encoded can be
  * merged in RocksDB using merge operation, and all merged values can be read using decodeValues
  * operation.
+ * If the avroEnc is specified, we are using Avro encoding for this column family's values
  */
-class MultiValuedStateEncoder(valueSchema: StructType)
+class MultiValuedStateEncoder(
+    valueSchema: StructType,
+    avroEnc: Option[AvroEncoder] = None)
   extends RocksDBValueStateEncoder with Logging {
 
   import RocksDBStateEncoder._
 
+  private val usingAvroEncoding = avroEnc.isDefined
   // Reusable objects
+  private val out = new ByteArrayOutputStream
   private val valueRow = new UnsafeRow(valueSchema.size)
+  private lazy val valueAvroType = SchemaConverters.toAvroType(valueSchema)
+  private val valueProj = UnsafeProjection.create(valueSchema)
 
   override def encodeValue(row: UnsafeRow): Array[Byte] = {
-    val bytes = encodeUnsafeRow(row)
+    val bytes = if (usingAvroEncoding) {
+      encodeUnsafeRowToAvro(row, avroEnc.get.valueSerializer, valueAvroType, out)
+    } else {
+      encodeUnsafeRow(row)
+    }
     val numBytes = bytes.length
 
     val encodedBytes = new Array[Byte](java.lang.Integer.BYTES + bytes.length)
@@ -750,7 +1179,12 @@ class MultiValuedStateEncoder(valueSchema: StructType)
       val encodedValue = new Array[Byte](numBytes)
       Platform.copyMemory(valueBytes, java.lang.Integer.BYTES + Platform.BYTE_ARRAY_OFFSET,
         encodedValue, Platform.BYTE_ARRAY_OFFSET, numBytes)
-      decodeToUnsafeRow(encodedValue, valueRow)
+      if (usingAvroEncoding) {
+        decodeFromAvroToUnsafeRow(
+          encodedValue, avroEnc.get.valueDeserializer, valueAvroType, valueProj)
+      } else {
+        decodeToUnsafeRow(encodedValue, valueRow)
+      }
     }
   }
 
@@ -776,7 +1210,12 @@ class MultiValuedStateEncoder(valueSchema: StructType)
 
           pos += numBytes
           pos += 1 // eat the delimiter character
-          decodeToUnsafeRow(encodedValue, valueRow)
+          if (usingAvroEncoding) {
+            decodeFromAvroToUnsafeRow(
+              encodedValue, avroEnc.get.valueDeserializer, valueAvroType, valueProj)
+          } else {
+            decodeToUnsafeRow(encodedValue, valueRow)
+          }
         }
       }
     }
@@ -796,16 +1235,29 @@ class MultiValuedStateEncoder(valueSchema: StructType)
  *    The bytes of a UnsafeRow is written unmodified to starting from offset 1
  *    (offset 0 is the version byte of value 0). That is, if the unsafe row has N bytes,
  *    then the generated array byte will be N+1 bytes.
+ * If the avroEnc is specified, we are using Avro encoding for this column family's values
  */
-class SingleValueStateEncoder(valueSchema: StructType)
-  extends RocksDBValueStateEncoder {
+class SingleValueStateEncoder(
+    valueSchema: StructType,
+    avroEnc: Option[AvroEncoder] = None)
+  extends RocksDBValueStateEncoder with Logging {
 
   import RocksDBStateEncoder._
 
+  private val usingAvroEncoding = avroEnc.isDefined
   // Reusable objects
+  private val out = new ByteArrayOutputStream
   private val valueRow = new UnsafeRow(valueSchema.size)
+  private lazy val valueAvroType = SchemaConverters.toAvroType(valueSchema)
+  private val valueProj = UnsafeProjection.create(valueSchema)
 
-  override def encodeValue(row: UnsafeRow): Array[Byte] = encodeUnsafeRow(row)
+  override def encodeValue(row: UnsafeRow): Array[Byte] = {
+    if (usingAvroEncoding) {
+      encodeUnsafeRowToAvro(row, avroEnc.get.valueSerializer, valueAvroType, out)
+    } else {
+      encodeUnsafeRow(row)
+    }
+  }
 
   /**
    * Decode byte array for a value to a UnsafeRow.
@@ -814,7 +1266,15 @@ class SingleValueStateEncoder(valueSchema: StructType)
    *       the given byte array.
    */
   override def decodeValue(valueBytes: Array[Byte]): UnsafeRow = {
-    decodeToUnsafeRow(valueBytes, valueRow)
+    if (valueBytes == null) {
+      return null
+    }
+    if (usingAvroEncoding) {
+      decodeFromAvroToUnsafeRow(
+        valueBytes, avroEnc.get.valueDeserializer, valueAvroType, valueProj)
+    } else {
+      decodeToUnsafeRow(valueBytes, valueRow)
+    }
   }
 
   override def supportsMultipleValuesPerKey: Boolean = false
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
index cf582090b5d4a..c9c987fa1620d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreProvider.scala
@@ -18,10 +18,9 @@
 package org.apache.spark.sql.execution.streaming.state
 
 import java.io._
-import java.util.concurrent.ConcurrentHashMap
-import java.util.concurrent.atomic.AtomicInteger
+import java.util.UUID
+import java.util.concurrent.{ConcurrentHashMap, TimeUnit}
 
-import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
 import org.apache.hadoop.conf.Configuration
@@ -31,11 +30,12 @@ import org.apache.spark.{SparkConf, SparkEnv, SparkException}
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys._
 import org.apache.spark.io.CompressionCodec
+import org.apache.spark.sql.avro.{AvroDeserializer, AvroOptions, AvroSerializer, SchemaConverters}
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.errors.QueryExecutionErrors
-import org.apache.spark.sql.execution.streaming.CheckpointFileManager
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, StreamExecution}
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.util.Utils
+import org.apache.spark.util.{NonFateSharingCache, Utils}
 
 private[sql] class RocksDBStateStoreProvider
   extends StateStoreProvider with Logging with Closeable
@@ -56,6 +56,17 @@ private[sql] class RocksDBStateStoreProvider
 
     override def version: Long = lastVersion
 
+    // Test-visible methods to fetch column family mapping for this State Store version
+    // Because column families are only enabled for RocksDBStateStore, these methods
+    // are no-ops everywhere else.
+    private[sql] def getColumnFamilyMapping: Map[String, Short] = {
+      rocksDB.getColumnFamilyMapping.toMap
+    }
+
+    private[sql] def getColumnFamilyId(cfName: String): Short = {
+      rocksDB.getColumnFamilyId(cfName)
+    }
+
     override def createColFamilyIfAbsent(
         colFamilyName: String,
         keySchema: StructType,
@@ -63,16 +74,24 @@ private[sql] class RocksDBStateStoreProvider
         keyStateEncoderSpec: KeyStateEncoderSpec,
         useMultipleValuesPerKey: Boolean = false,
         isInternal: Boolean = false): Unit = {
-      val newColFamilyId = ColumnFamilyUtils.createColFamilyIfAbsent(colFamilyName, isInternal)
+      verifyColFamilyCreationOrDeletion("create_col_family", colFamilyName, isInternal)
+      val newColFamilyId = rocksDB.createColFamilyIfAbsent(colFamilyName)
+      // Create cache key using store ID to avoid collisions
+      val avroEncCacheKey = s"${getRunId(hadoopConf)}_${stateStoreId.operatorId}_" +
+        s"${stateStoreId.partitionId}_$colFamilyName"
+
+      val avroEnc = getAvroEnc(
+        stateStoreEncoding, avroEncCacheKey, keyStateEncoderSpec, valueSchema)
 
       keyValueEncoderMap.putIfAbsent(colFamilyName,
-        (RocksDBStateEncoder.getKeyEncoder(keyStateEncoderSpec, useColumnFamilies, newColFamilyId),
-         RocksDBStateEncoder.getValueEncoder(valueSchema, useMultipleValuesPerKey)))
+        (RocksDBStateEncoder.getKeyEncoder(keyStateEncoderSpec, useColumnFamilies,
+          Some(newColFamilyId), avroEnc), RocksDBStateEncoder.getValueEncoder(valueSchema,
+          useMultipleValuesPerKey, avroEnc)))
     }
 
     override def get(key: UnsafeRow, colFamilyName: String): UnsafeRow = {
       verify(key != null, "Key cannot be null")
-      ColumnFamilyUtils.verifyColFamilyOperations("get", colFamilyName)
+      verifyColFamilyOperations("get", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       val value =
@@ -98,7 +117,7 @@ private[sql] class RocksDBStateStoreProvider
      */
     override def valuesIterator(key: UnsafeRow, colFamilyName: String): Iterator[UnsafeRow] = {
       verify(key != null, "Key cannot be null")
-      ColumnFamilyUtils.verifyColFamilyOperations("valuesIterator", colFamilyName)
+      verifyColFamilyOperations("valuesIterator", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       val valueEncoder = kvEncoder._2
@@ -114,7 +133,7 @@ private[sql] class RocksDBStateStoreProvider
     override def merge(key: UnsafeRow, value: UnsafeRow,
         colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Unit = {
       verify(state == UPDATING, "Cannot merge after already committed or aborted")
-      ColumnFamilyUtils.verifyColFamilyOperations("merge", colFamilyName)
+      verifyColFamilyOperations("merge", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       val keyEncoder = kvEncoder._1
@@ -131,7 +150,7 @@ private[sql] class RocksDBStateStoreProvider
       verify(state == UPDATING, "Cannot put after already committed or aborted")
       verify(key != null, "Key cannot be null")
       require(value != null, "Cannot put a null value")
-      ColumnFamilyUtils.verifyColFamilyOperations("put", colFamilyName)
+      verifyColFamilyOperations("put", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       rocksDB.put(kvEncoder._1.encodeKey(key), kvEncoder._2.encodeValue(value))
@@ -140,7 +159,7 @@ private[sql] class RocksDBStateStoreProvider
     override def remove(key: UnsafeRow, colFamilyName: String): Unit = {
       verify(state == UPDATING, "Cannot remove after already committed or aborted")
       verify(key != null, "Key cannot be null")
-      ColumnFamilyUtils.verifyColFamilyOperations("remove", colFamilyName)
+      verifyColFamilyOperations("remove", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       rocksDB.remove(kvEncoder._1.encodeKey(key))
@@ -150,7 +169,7 @@ private[sql] class RocksDBStateStoreProvider
       // Note this verify function only verify on the colFamilyName being valid,
       // we are actually doing prefix when useColumnFamilies,
       // but pass "iterator" to throw correct error message
-      ColumnFamilyUtils.verifyColFamilyOperations("iterator", colFamilyName)
+      verifyColFamilyOperations("iterator", colFamilyName)
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       val rowPair = new UnsafeRowPair()
 
@@ -184,7 +203,7 @@ private[sql] class RocksDBStateStoreProvider
 
     override def prefixScan(prefixKey: UnsafeRow, colFamilyName: String):
       Iterator[UnsafeRowPair] = {
-      ColumnFamilyUtils.verifyColFamilyOperations("prefixScan", colFamilyName)
+      verifyColFamilyOperations("prefixScan", colFamilyName)
 
       val kvEncoder = keyValueEncoderMap.get(colFamilyName)
       require(kvEncoder._1.supportPrefixKeyScan,
@@ -248,13 +267,11 @@ private[sql] class RocksDBStateStoreProvider
 
         // Used for metrics reporting around internal/external column families
         def internalColFamilyCnt(): Long = {
-          colFamilyNameToIdMap.keys.asScala.toSeq
-            .filter(ColumnFamilyUtils.checkInternalColumnFamilies(_)).size
+          rocksDB.getColFamilyCount(isInternal = true)
         }
 
         def externalColFamilyCnt(): Long = {
-          colFamilyNameToIdMap.keys.asScala.toSeq
-            .filter(!ColumnFamilyUtils.checkInternalColumnFamilies(_)).size
+          rocksDB.getColFamilyCount(isInternal = false)
         }
 
         val stateStoreCustomMetrics = Map[StateStoreCustomMetric, Long](
@@ -297,6 +314,11 @@ private[sql] class RocksDBStateStoreProvider
       }
     }
 
+    override def getStateStoreCheckpointInfo(): StateStoreCheckpointInfo = {
+      val checkpointInfo = rocksDB.getLatestCheckpointInfo(id.partitionId)
+      checkpointInfo
+    }
+
     override def hasCommitted: Boolean = state == COMMITTED
 
     override def toString: String = {
@@ -309,25 +331,33 @@ private[sql] class RocksDBStateStoreProvider
 
     /** Remove column family if exists */
     override def removeColFamilyIfExists(colFamilyName: String): Boolean = {
+      verifyColFamilyCreationOrDeletion("remove_col_family", colFamilyName)
       verify(useColumnFamilies, "Column families are not supported in this store")
 
       val result = {
-        val colFamilyExists = ColumnFamilyUtils.removeColFamilyIfExists(colFamilyName)
-
-        if (colFamilyExists) {
-          val colFamilyIdBytes =
-            keyValueEncoderMap.get(colFamilyName)._1.getColumnFamilyIdBytes()
-          rocksDB.prefixScan(colFamilyIdBytes).foreach { kv =>
-            rocksDB.remove(kv.key)
-          }
+        val colFamilyId = rocksDB.removeColFamilyIfExists(colFamilyName)
+
+        colFamilyId match {
+          case Some(vcfId) =>
+            val colFamilyIdBytes =
+              RocksDBStateEncoder.getColumnFamilyIdBytes(vcfId)
+            rocksDB.prefixScan(colFamilyIdBytes).foreach { kv =>
+              rocksDB.remove(kv.key)
+            }
+            true
+          case None => false
         }
-        colFamilyExists
       }
       keyValueEncoderMap.remove(colFamilyName)
       result
     }
   }
 
+  // Test-visible method to fetch the internal RocksDBStateStore class
+  private[sql] def getRocksDBStateStore(version: Long): RocksDBStateStore = {
+    getStore(version).asInstanceOf[RocksDBStateStore]
+  }
+
   override def init(
       stateStoreId: StateStoreId,
       keySchema: StructType,
@@ -343,54 +373,76 @@ private[sql] class RocksDBStateStoreProvider
     this.storeConf = storeConf
     this.hadoopConf = hadoopConf
     this.useColumnFamilies = useColumnFamilies
+    this.stateStoreEncoding = storeConf.stateStoreEncodingFormat
 
     if (useMultipleValuesPerKey) {
       require(useColumnFamilies, "Multiple values per key support requires column families to be" +
         " enabled in RocksDBStateStore.")
     }
 
+    rocksDB // lazy initialization
     var defaultColFamilyId: Option[Short] = None
+
     if (useColumnFamilies) {
-      // put default column family only if useColumnFamilies are enabled
-      colFamilyNameToIdMap.putIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME, colFamilyId.shortValue())
-      defaultColFamilyId = Option(colFamilyId.shortValue())
+      defaultColFamilyId = Some(rocksDB.createColFamilyIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME))
     }
+
+    val colFamilyName = StateStore.DEFAULT_COL_FAMILY_NAME
+    // Create cache key using store ID to avoid collisions
+    val avroEncCacheKey = s"${getRunId(hadoopConf)}_${stateStoreId.operatorId}_" +
+      s"${stateStoreId.partitionId}_$colFamilyName"
+    val avroEnc = getAvroEnc(
+      stateStoreEncoding, avroEncCacheKey, keyStateEncoderSpec, valueSchema)
+
     keyValueEncoderMap.putIfAbsent(StateStore.DEFAULT_COL_FAMILY_NAME,
       (RocksDBStateEncoder.getKeyEncoder(keyStateEncoderSpec,
-        useColumnFamilies, defaultColFamilyId),
-        RocksDBStateEncoder.getValueEncoder(valueSchema, useMultipleValuesPerKey)))
-
-    rocksDB // lazy initialization
+        useColumnFamilies, defaultColFamilyId, avroEnc),
+        RocksDBStateEncoder.getValueEncoder(valueSchema, useMultipleValuesPerKey, avroEnc)))
   }
 
   override def stateStoreId: StateStoreId = stateStoreId_
 
-  override def getStore(version: Long): StateStore = {
+  override def getStore(version: Long, uniqueId: Option[String] = None): StateStore = {
     try {
       if (version < 0) {
         throw QueryExecutionErrors.unexpectedStateStoreVersion(version)
       }
-      rocksDB.load(version)
+      rocksDB.load(
+        version,
+        stateStoreCkptId = if (storeConf.enableStateStoreCheckpointIds) uniqueId else None)
       new RocksDBStateStore(version)
     }
     catch {
-      case e: SparkException if e.getErrorClass.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
         throw e
+      case e: OutOfMemoryError =>
+        throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
+          stateStoreId.toString,
+          "ROCKSDB_STORE_PROVIDER",
+          e)
       case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
     }
   }
 
-  override def getReadStore(version: Long): StateStore = {
+  override def getReadStore(version: Long, uniqueId: Option[String] = None): StateStore = {
     try {
       if (version < 0) {
         throw QueryExecutionErrors.unexpectedStateStoreVersion(version)
       }
-      rocksDB.load(version, true)
+      rocksDB.load(
+        version,
+        stateStoreCkptId = if (storeConf.enableStateStoreCheckpointIds) uniqueId else None,
+        readOnly = true)
       new RocksDBStateStore(version)
     }
     catch {
-      case e: SparkException if e.getErrorClass.contains("CANNOT_LOAD_STATE_STORE") =>
+      case e: SparkException if e.getCondition.contains("CANNOT_LOAD_STATE_STORE") =>
         throw e
+      case e: OutOfMemoryError =>
+        throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
+          stateStoreId.toString,
+          "ROCKSDB_STORE_PROVIDER",
+          e)
       case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
     }
   }
@@ -423,6 +475,7 @@ private[sql] class RocksDBStateStoreProvider
   @volatile private var storeConf: StateStoreConf = _
   @volatile private var hadoopConf: Configuration = _
   @volatile private var useColumnFamilies: Boolean = _
+  @volatile private var stateStoreEncoding: String = _
 
   private[sql] lazy val rocksDB = {
     val dfsRootDir = stateStoreId.storeCheckpointLocation().toString
@@ -431,15 +484,14 @@ private[sql] class RocksDBStateStoreProvider
     val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf)
     val localRootDir = Utils.createTempDir(Utils.getLocalDir(sparkConf), storeIdStr)
     new RocksDB(dfsRootDir, RocksDBConf(storeConf), localRootDir, hadoopConf, storeIdStr,
-      useColumnFamilies)
+      useColumnFamilies, storeConf.enableStateStoreCheckpointIds)
   }
 
   private val keyValueEncoderMap = new java.util.concurrent.ConcurrentHashMap[String,
     (RocksDBKeyStateEncoder, RocksDBValueStateEncoder)]
 
-  private val colFamilyNameToIdMap = new java.util.concurrent.ConcurrentHashMap[String, Short]
-  // TODO SPARK-48796 load column family id from state schema when restarting
-  private val colFamilyId = new AtomicInteger(0)
+  private val multiColFamiliesDisabledStr = "multiple column families is disabled in " +
+    "RocksDBStateStoreProvider"
 
   private def verify(condition: => Boolean, msg: String): Unit = {
     if (!condition) { throw new IllegalStateException(msg) }
@@ -465,11 +517,19 @@ private[sql] class RocksDBStateStoreProvider
       new RocksDBStateStore(endVersion)
     }
     catch {
+      case e: OutOfMemoryError =>
+        throw QueryExecutionErrors.notEnoughMemoryToLoadStore(
+          stateStoreId.toString,
+          "ROCKSDB_STORE_PROVIDER",
+          e)
       case e: Throwable => throw QueryExecutionErrors.cannotLoadStore(e)
     }
   }
 
-  override def getStateStoreChangeDataReader(startVersion: Long, endVersion: Long):
+  override def getStateStoreChangeDataReader(
+      startVersion: Long,
+      endVersion: Long,
+      colFamilyNameOpt: Option[String] = None):
     StateStoreChangeDataReader = {
     val statePath = stateStoreId.storeCheckpointLocation()
     val sparkConf = Option(SparkEnv.get).map(_.conf).getOrElse(new SparkConf)
@@ -479,120 +539,68 @@ private[sql] class RocksDBStateStoreProvider
       startVersion,
       endVersion,
       CompressionCodec.createCodec(sparkConf, storeConf.compressionCodec),
-      keyValueEncoderMap)
+      keyValueEncoderMap,
+      colFamilyNameOpt)
   }
 
   /**
-   * Class for column family related utility functions.
-   * Verification functions for column family names, column family operation validations etc.
+   * Function to verify invariants for column family based operations
+   * such as get, put, remove etc.
+   *
+   * @param operationName - name of the store operation
+   * @param colFamilyName - name of the column family
    */
-  private object ColumnFamilyUtils {
-    private val multColFamiliesDisabledStr = "multiple column families is disabled in " +
-      "RocksDBStateStoreProvider"
-
-    /**
-     * Function to verify invariants for column family based operations
-     * such as get, put, remove etc.
-     *
-     * @param operationName - name of the store operation
-     * @param colFamilyName - name of the column family
-     */
-    def verifyColFamilyOperations(
-        operationName: String,
-        colFamilyName: String): Unit = {
-      if (colFamilyName != StateStore.DEFAULT_COL_FAMILY_NAME) {
-        // if the state store instance does not support multiple column families, throw an exception
-        if (!useColumnFamilies) {
-          throw StateStoreErrors.unsupportedOperationException(operationName,
-            multColFamiliesDisabledStr)
-        }
-
-        // if the column family name is empty or contains leading/trailing whitespaces, throw an
-        // exception
-        if (colFamilyName.isEmpty || colFamilyName.trim != colFamilyName) {
-          throw StateStoreErrors.cannotUseColumnFamilyWithInvalidName(operationName, colFamilyName)
-        }
-
-        // if the column family does not exist, throw an exception
-        if (!checkColFamilyExists(colFamilyName)) {
-          throw StateStoreErrors.unsupportedOperationOnMissingColumnFamily(operationName,
-            colFamilyName)
-        }
-      }
-    }
-
-    /**
-     * Function to verify invariants for column family creation or deletion operations.
-     *
-     * @param operationName - name of the store operation
-     * @param colFamilyName - name of the column family
-     */
-    private def verifyColFamilyCreationOrDeletion(
-        operationName: String,
-        colFamilyName: String,
-        isInternal: Boolean = false): Unit = {
+  private def verifyColFamilyOperations(
+      operationName: String,
+      colFamilyName: String): Unit = {
+    if (colFamilyName != StateStore.DEFAULT_COL_FAMILY_NAME) {
       // if the state store instance does not support multiple column families, throw an exception
       if (!useColumnFamilies) {
         throw StateStoreErrors.unsupportedOperationException(operationName,
-          multColFamiliesDisabledStr)
+          multiColFamiliesDisabledStr)
       }
 
-      // if the column family name is empty or contains leading/trailing whitespaces
-      // or using the reserved "default" column family, throw an exception
-      if (colFamilyName.isEmpty
-        || colFamilyName.trim != colFamilyName
-        || colFamilyName == StateStore.DEFAULT_COL_FAMILY_NAME) {
+      // if the column family name is empty or contains leading/trailing whitespaces, throw an
+      // exception
+      if (colFamilyName.isEmpty || colFamilyName.trim != colFamilyName) {
         throw StateStoreErrors.cannotUseColumnFamilyWithInvalidName(operationName, colFamilyName)
       }
 
-      // if the column family is not internal and uses reserved characters, throw an exception
-      if (!isInternal && colFamilyName.charAt(0) == '_') {
-        throw StateStoreErrors.cannotCreateColumnFamilyWithReservedChars(colFamilyName)
+      // if the column family does not exist, throw an exception
+      if (!rocksDB.checkColFamilyExists(colFamilyName)) {
+        throw StateStoreErrors.unsupportedOperationOnMissingColumnFamily(operationName,
+          colFamilyName)
       }
     }
+  }
 
-    /**
-     * Check whether the column family name is for internal column families.
-     *
-     * @param cfName - column family name
-     * @return - true if the column family is for internal use, false otherwise
-     */
-    def checkInternalColumnFamilies(cfName: String): Boolean = cfName.charAt(0) == '_'
-
-    /**
-     * Create RocksDB column family, if not created already
-     */
-    def createColFamilyIfAbsent(colFamilyName: String, isInternal: Boolean = false):
-      Option[Short] = {
-      verifyColFamilyCreationOrDeletion("create_col_family", colFamilyName, isInternal)
-      if (!checkColFamilyExists(colFamilyName)) {
-        val newColumnFamilyId = colFamilyId.incrementAndGet().toShort
-        colFamilyNameToIdMap.putIfAbsent(colFamilyName, newColumnFamilyId)
-        Option(newColumnFamilyId)
-      } else None
+  /**
+   * Function to verify invariants for column family creation or deletion operations.
+   *
+   * @param operationName - name of the store operation
+   * @param colFamilyName - name of the column family
+   */
+  private def verifyColFamilyCreationOrDeletion(
+      operationName: String,
+      colFamilyName: String,
+      isInternal: Boolean = false): Unit = {
+    // if the state store instance does not support multiple column families, throw an exception
+    if (!useColumnFamilies) {
+      throw StateStoreErrors.unsupportedOperationException(operationName,
+        multiColFamiliesDisabledStr)
     }
 
-    /**
-     * Remove RocksDB column family, if exists
-     */
-    def removeColFamilyIfExists(colFamilyName: String): Boolean = {
-      verifyColFamilyCreationOrDeletion("remove_col_family", colFamilyName)
-      if (checkColFamilyExists(colFamilyName)) {
-        colFamilyNameToIdMap.remove(colFamilyName)
-        true
-      } else {
-        false
-      }
+    // if the column family name is empty or contains leading/trailing whitespaces
+    // or using the reserved "default" column family, throw an exception
+    if (colFamilyName.isEmpty
+      || (colFamilyName.trim != colFamilyName)
+      || (colFamilyName == StateStore.DEFAULT_COL_FAMILY_NAME && !isInternal)) {
+      throw StateStoreErrors.cannotUseColumnFamilyWithInvalidName(operationName, colFamilyName)
     }
 
-    /**
-     * Function to check if the column family exists in the state store instance.
-     *
-     * @param colFamilyName - name of the column family
-     * @return - true if the column family exists, false otherwise
-     */
-    def checkColFamilyExists(colFamilyName: String): Boolean = {
-      colFamilyNameToIdMap.containsKey(colFamilyName)
+    // if the column family is not internal and uses reserved characters, throw an exception
+    if (!isInternal && colFamilyName.charAt(0) == '$') {
+      throw StateStoreErrors.cannotCreateColumnFamilyWithReservedChars(colFamilyName)
     }
   }
 }
@@ -603,6 +611,93 @@ object RocksDBStateStoreProvider {
   val STATE_ENCODING_VERSION: Byte = 0
   val VIRTUAL_COL_FAMILY_PREFIX_BYTES = 2
 
+  private val MAX_AVRO_ENCODERS_IN_CACHE = 1000
+  private val AVRO_ENCODER_LIFETIME_HOURS = 1L
+
+  // Add the cache at companion object level so it persists across provider instances
+  private val avroEncoderMap: NonFateSharingCache[String, AvroEncoder] =
+    NonFateSharingCache(
+      maximumSize = MAX_AVRO_ENCODERS_IN_CACHE,
+      expireAfterAccessTime = AVRO_ENCODER_LIFETIME_HOURS,
+      expireAfterAccessTimeUnit = TimeUnit.HOURS
+    )
+
+  def getAvroEnc(
+      stateStoreEncoding: String,
+      avroEncCacheKey: String,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      valueSchema: StructType): Option[AvroEncoder] = {
+
+    stateStoreEncoding match {
+      case "avro" => Some(
+        RocksDBStateStoreProvider.avroEncoderMap.get(
+          avroEncCacheKey,
+          new java.util.concurrent.Callable[AvroEncoder] {
+            override def call(): AvroEncoder = createAvroEnc(keyStateEncoderSpec, valueSchema)
+          }
+        )
+      )
+      case "unsaferow" => None
+    }
+  }
+
+  private def getRunId(hadoopConf: Configuration): String = {
+    val runId = hadoopConf.get(StreamExecution.RUN_ID_KEY)
+    if (runId != null) {
+      runId
+    } else {
+      assert(Utils.isTesting, "Failed to find query id/batch Id in task context")
+      UUID.randomUUID().toString
+    }
+  }
+
+  private def getAvroSerializer(schema: StructType): AvroSerializer = {
+    val avroType = SchemaConverters.toAvroType(schema)
+    new AvroSerializer(schema, avroType, nullable = false)
+  }
+
+  private def getAvroDeserializer(schema: StructType): AvroDeserializer = {
+    val avroType = SchemaConverters.toAvroType(schema)
+    val avroOptions = AvroOptions(Map.empty)
+    new AvroDeserializer(avroType, schema,
+      avroOptions.datetimeRebaseModeInRead, avroOptions.useStableIdForUnionType,
+      avroOptions.stableIdPrefixForUnionType, avroOptions.recursiveFieldMaxDepth)
+  }
+
+  private def createAvroEnc(
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      valueSchema: StructType
+  ): AvroEncoder = {
+    val valueSerializer = getAvroSerializer(valueSchema)
+    val valueDeserializer = getAvroDeserializer(valueSchema)
+    val keySchema = keyStateEncoderSpec match {
+      case NoPrefixKeyStateEncoderSpec(schema) =>
+        schema
+      case PrefixKeyScanStateEncoderSpec(schema, numColsPrefixKey) =>
+        StructType(schema.take(numColsPrefixKey))
+      case RangeKeyScanStateEncoderSpec(schema, orderingOrdinals) =>
+        val remainingSchema = {
+          0.until(schema.length).diff(orderingOrdinals).map { ordinal =>
+            schema(ordinal)
+          }
+        }
+        StructType(remainingSchema)
+    }
+    val suffixKeySchema = keyStateEncoderSpec match {
+      case PrefixKeyScanStateEncoderSpec(schema, numColsPrefixKey) =>
+        Some(StructType(schema.drop(numColsPrefixKey)))
+      case _ => None
+    }
+    AvroEncoder(
+      getAvroSerializer(keySchema),
+      getAvroDeserializer(keySchema),
+      valueSerializer,
+      valueDeserializer,
+      suffixKeySchema.map(getAvroSerializer),
+      suffixKeySchema.map(getAvroDeserializer)
+    )
+  }
+
   // Native operation latencies report as latency in microseconds
   // as SQLMetrics support millis. Convert the value to millis
   val CUSTOM_METRIC_GET_TIME = StateStoreCustomTimingMetric(
@@ -700,27 +795,70 @@ class RocksDBStateStoreChangeDataReader(
     endVersion: Long,
     compressionCodec: CompressionCodec,
     keyValueEncoderMap:
-      ConcurrentHashMap[String, (RocksDBKeyStateEncoder, RocksDBValueStateEncoder)])
+      ConcurrentHashMap[String, (RocksDBKeyStateEncoder, RocksDBValueStateEncoder)],
+    colFamilyNameOpt: Option[String] = None)
   extends StateStoreChangeDataReader(
-    fm, stateLocation, startVersion, endVersion, compressionCodec) {
+    fm, stateLocation, startVersion, endVersion, compressionCodec, colFamilyNameOpt) {
 
   override protected var changelogSuffix: String = "changelog"
 
+  private def getColFamilyIdBytes: Option[Array[Byte]] = {
+    if (colFamilyNameOpt.isDefined) {
+      val colFamilyName = colFamilyNameOpt.get
+      if (!keyValueEncoderMap.containsKey(colFamilyName)) {
+        throw new IllegalStateException(
+          s"Column family $colFamilyName not found in the key value encoder map")
+      }
+      Some(keyValueEncoderMap.get(colFamilyName)._1.getColumnFamilyIdBytes())
+    } else {
+      None
+    }
+  }
+
+  private val colFamilyIdBytesOpt: Option[Array[Byte]] = getColFamilyIdBytes
+
   override def getNext(): (RecordType.Value, UnsafeRow, UnsafeRow, Long) = {
-    val reader = currentChangelogReader()
-    if (reader == null) {
-      return null
-    }
-    val (recordType, keyArray, valueArray) = reader.next()
-    // Todo: does not support multiple virtual column families
-    val (rocksDBKeyStateEncoder, rocksDBValueStateEncoder) =
-      keyValueEncoderMap.get(StateStore.DEFAULT_COL_FAMILY_NAME)
-    val keyRow = rocksDBKeyStateEncoder.decodeKey(keyArray)
-    if (valueArray == null) {
-      (recordType, keyRow, null, currentChangelogVersion - 1)
+    var currRecord: (RecordType.Value, Array[Byte], Array[Byte]) = null
+    val currEncoder: (RocksDBKeyStateEncoder, RocksDBValueStateEncoder) =
+      keyValueEncoderMap.get(colFamilyNameOpt
+        .getOrElse(StateStore.DEFAULT_COL_FAMILY_NAME))
+
+    if (colFamilyIdBytesOpt.isDefined) {
+      // If we are reading records for a particular column family, the corresponding vcf id
+      // will be encoded in the key byte array. We need to extract that and compare for the
+      // expected column family id. If it matches, we return the record. If not, we move to
+      // the next record. Note that this has be handled across multiple changelog files and we
+      // rely on the currentChangelogReader to move to the next changelog file when needed.
+      while (currRecord == null) {
+        val reader = currentChangelogReader()
+        if (reader == null) {
+          return null
+        }
+
+        val nextRecord = reader.next()
+        val colFamilyIdBytes: Array[Byte] = colFamilyIdBytesOpt.get
+        val endIndex = colFamilyIdBytes.size
+        // Function checks for byte arrays being equal
+        // from index 0 to endIndex - 1 (both inclusive)
+        if (java.util.Arrays.equals(nextRecord._2, 0, endIndex,
+          colFamilyIdBytes, 0, endIndex)) {
+          currRecord = nextRecord
+        }
+      }
+    } else {
+      val reader = currentChangelogReader()
+      if (reader == null) {
+        return null
+      }
+      currRecord = reader.next()
+    }
+
+    val keyRow = currEncoder._1.decodeKey(currRecord._2)
+    if (currRecord._3 == null) {
+      (currRecord._1, keyRow, null, currentChangelogVersion - 1)
     } else {
-      val valueRow = rocksDBValueStateEncoder.decodeValue(valueArray)
-      (recordType, keyRow, valueRow, currentChangelogVersion - 1)
+      val valueRow = currEncoder._2.decodeValue(currRecord._3)
+      (currRecord._1, keyRow, valueRow, currentChangelogVersion - 1)
     }
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
index 3230098c74cd4..48b15ac04f40b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityChecker.scala
@@ -24,9 +24,11 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
+import org.apache.spark.sql.avro.{AvroDeserializer, AvroSerializer}
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, StatefulOperatorStateInfo}
 import org.apache.spark.sql.execution.streaming.state.SchemaHelper.{SchemaReader, SchemaWriter}
+import org.apache.spark.sql.execution.streaming.state.StateSchemaCompatibilityChecker.SCHEMA_FORMAT_V3
 import org.apache.spark.sql.internal.SessionState
 import org.apache.spark.sql.types.{DataType, StructType}
 
@@ -36,6 +38,30 @@ case class StateSchemaValidationResult(
     schemaPath: String
 )
 
+/**
+ * An Avro-based encoder used for serializing between UnsafeRow and Avro
+ *  byte arrays in RocksDB state stores.
+ *
+ * This encoder is primarily utilized by [[RocksDBStateStoreProvider]] and [[RocksDBStateEncoder]]
+ * to handle serialization and deserialization of state store data.
+ *
+ * @param keySerializer Serializer for converting state store keys to Avro format
+ * @param keyDeserializer Deserializer for converting Avro-encoded keys back to UnsafeRow
+ * @param valueSerializer Serializer for converting state store values to Avro format
+ * @param valueDeserializer Deserializer for converting Avro-encoded values back to UnsafeRow
+ * @param suffixKeySerializer Optional serializer for handling suffix keys in Avro format
+ * @param suffixKeyDeserializer Optional deserializer for converting Avro-encoded suffix
+ *                              keys back to UnsafeRow
+ */
+case class AvroEncoder(
+  keySerializer: AvroSerializer,
+  keyDeserializer: AvroDeserializer,
+  valueSerializer: AvroSerializer,
+  valueDeserializer: AvroDeserializer,
+  suffixKeySerializer: Option[AvroSerializer] = None,
+  suffixKeyDeserializer: Option[AvroDeserializer] = None
+) extends Serializable
+
 // Used to represent the schema of a column family in the state store
 case class StateStoreColFamilySchema(
     colFamilyName: String,
@@ -48,13 +74,14 @@ case class StateStoreColFamilySchema(
 class StateSchemaCompatibilityChecker(
     providerId: StateStoreProviderId,
     hadoopConf: Configuration,
-    schemaFilePath: Option[Path] = None) extends Logging {
+    oldSchemaFilePath: Option[Path] = None,
+    newSchemaFilePath: Option[Path] = None) extends Logging {
 
-  private val schemaFileLocation = if (schemaFilePath.isEmpty) {
+  private val schemaFileLocation = if (oldSchemaFilePath.isEmpty) {
     val storeCpLocation = providerId.storeId.storeCheckpointLocation()
     schemaFile(storeCpLocation)
   } else {
-    schemaFilePath.get
+    oldSchemaFilePath.get
   }
 
   private val fm = CheckpointFileManager.create(schemaFileLocation, hadoopConf)
@@ -65,10 +92,6 @@ class StateSchemaCompatibilityChecker(
     val inStream = fm.open(schemaFileLocation)
     try {
       val versionStr = inStream.readUTF()
-      // Ensure that version 3 format has schema file path provided explicitly
-      if (versionStr == "v3" && schemaFilePath.isEmpty) {
-        throw new IllegalStateException("Schema file path is required for schema version 3")
-      }
       val schemaReader = SchemaReader.createSchemaReader(versionStr)
       schemaReader.read(inStream)
     } catch {
@@ -98,7 +121,7 @@ class StateSchemaCompatibilityChecker(
       stateStoreColFamilySchema: List[StateStoreColFamilySchema],
       stateSchemaVersion: Int): Unit = {
     // Ensure that schema file path is passed explicitly for schema version 3
-    if (stateSchemaVersion == 3 && schemaFilePath.isEmpty) {
+    if (stateSchemaVersion == SCHEMA_FORMAT_V3 && newSchemaFilePath.isEmpty) {
       throw new IllegalStateException("Schema file path is required for schema version 3")
     }
 
@@ -110,13 +133,19 @@ class StateSchemaCompatibilityChecker(
   private[sql] def createSchemaFile(
       stateStoreColFamilySchema: List[StateStoreColFamilySchema],
       schemaWriter: SchemaWriter): Unit = {
-    val outStream = fm.createAtomic(schemaFileLocation, overwriteIfPossible = false)
+    val schemaFilePath = newSchemaFilePath match {
+      case Some(path) =>
+        fm.mkdirs(path.getParent)
+        path
+      case None => schemaFileLocation
+    }
+    val outStream = fm.createAtomic(schemaFilePath, overwriteIfPossible = false)
     try {
       schemaWriter.write(stateStoreColFamilySchema, outStream)
       outStream.close()
     } catch {
       case e: Throwable =>
-        logError(log"Fail to write schema file to ${MDC(LogKeys.PATH, schemaFileLocation)}", e)
+        logError(log"Fail to write schema file to ${MDC(LogKeys.PATH, schemaFilePath)}", e)
         outStream.cancel()
         throw e
     }
@@ -164,20 +193,32 @@ class StateSchemaCompatibilityChecker(
       newStateSchema: List[StateStoreColFamilySchema],
       ignoreValueSchema: Boolean,
       stateSchemaVersion: Int): Boolean = {
-    val existingStateSchemaList = getExistingKeyAndValueSchema().sortBy(_.colFamilyName)
-    val newStateSchemaList = newStateSchema.sortBy(_.colFamilyName)
+    val existingStateSchemaList = getExistingKeyAndValueSchema()
+    val newStateSchemaList = newStateSchema
 
     if (existingStateSchemaList.isEmpty) {
       // write the schema file if it doesn't exist
-      createSchemaFile(newStateSchemaList, stateSchemaVersion)
+      createSchemaFile(newStateSchemaList.sortBy(_.colFamilyName), stateSchemaVersion)
       true
     } else {
       // validate if the new schema is compatible with the existing schema
-      existingStateSchemaList.lazyZip(newStateSchemaList).foreach {
-        case (existingStateSchema, newStateSchema) =>
-          check(existingStateSchema, newStateSchema, ignoreValueSchema)
+      val existingSchemaMap = existingStateSchemaList.map { schema =>
+        schema.colFamilyName -> schema
+      }.toMap
+      // For each new state variable, we want to compare it to the old state variable
+      // schema with the same name
+      newStateSchemaList.foreach { newSchema =>
+        existingSchemaMap.get(newSchema.colFamilyName).foreach { existingStateSchema =>
+          check(existingStateSchema, newSchema, ignoreValueSchema)
+        }
+      }
+      val colFamiliesAddedOrRemoved =
+        (newStateSchemaList.map(_.colFamilyName).toSet != existingSchemaMap.keySet)
+      if (stateSchemaVersion == SCHEMA_FORMAT_V3 && colFamiliesAddedOrRemoved) {
+        createSchemaFile(newStateSchemaList.sortBy(_.colFamilyName), stateSchemaVersion)
       }
-      false
+      // TODO: [SPARK-49535] Write Schema files after schema has changed for StateSchemaV3
+      colFamiliesAddedOrRemoved
     }
   }
 
@@ -186,6 +227,9 @@ class StateSchemaCompatibilityChecker(
 }
 
 object StateSchemaCompatibilityChecker {
+
+  val SCHEMA_FORMAT_V3: Int = 3
+
   private def disallowBinaryInequalityColumn(schema: StructType): Unit = {
     if (!UnsafeRowUtils.isBinaryStable(schema)) {
       throw new SparkUnsupportedOperationException(
@@ -208,7 +252,10 @@ object StateSchemaCompatibilityChecker {
    * @param stateSchemaVersion - version of the state schema to be used
    * @param extraOptions - any extra options to be passed for StateStoreConf creation
    * @param storeName - optional state store name
-   * @param schemaFilePath - optional schema file path
+   * @param oldSchemaFilePath - optional path to the old schema file. If not provided, will default
+   *                          to the schema file location
+   * @param newSchemaFilePath - optional path to the destination schema file.
+   *                          Needed for schema version 3
    * @return - StateSchemaValidationResult containing the result of the schema validation
    */
   def validateAndMaybeEvolveStateSchema(
@@ -219,7 +266,8 @@ object StateSchemaCompatibilityChecker {
       stateSchemaVersion: Int,
       extraOptions: Map[String, String] = Map.empty,
       storeName: String = StateStoreId.DEFAULT_STORE_NAME,
-      schemaFilePath: Option[Path] = None): StateSchemaValidationResult = {
+      oldSchemaFilePath: Option[Path] = None,
+      newSchemaFilePath: Option[Path] = None): StateSchemaValidationResult = {
     // SPARK-47776: collation introduces the concept of binary (in)equality, which means
     // in some collation we no longer be able to just compare the binary format of two
     // UnsafeRows to determine equality. For example, 'aaa' and 'AAA' can be "semantically"
@@ -237,7 +285,7 @@ object StateSchemaCompatibilityChecker {
     val providerId = StateStoreProviderId(StateStoreId(stateInfo.checkpointLocation,
       stateInfo.operatorId, 0, storeName), stateInfo.queryRunId)
     val checker = new StateSchemaCompatibilityChecker(providerId, hadoopConf,
-      schemaFilePath = schemaFilePath)
+      oldSchemaFilePath = oldSchemaFilePath, newSchemaFilePath = newSchemaFilePath)
     // regardless of configuration, we check compatibility to at least write schema file
     // if necessary
     // if the format validation for value schema is disabled, we also disable the schema
@@ -261,6 +309,31 @@ object StateSchemaCompatibilityChecker {
     if (storeConf.stateSchemaCheckEnabled && result.isDefined) {
       throw result.get
     }
-    StateSchemaValidationResult(evolvedSchema, checker.schemaFileLocation.toString)
+    val schemaFileLocation = if (evolvedSchema) {
+      // if we are using the state schema v3, and we have
+      // evolved schema, this newSchemaFilePath should be defined
+      // and we want to populate the metadata with this file
+      if (stateSchemaVersion == SCHEMA_FORMAT_V3) {
+        newSchemaFilePath.get.toString
+      } else {
+        // if we are using any version less than v3, we have written
+        // the schema to this static location, which we will return
+        checker.schemaFileLocation.toString
+      }
+    } else {
+      // if we have not evolved schema (there has been a previous schema)
+      // and we are using state schema v3, this file path would be defined
+      // so we would just populate the next run's metadata file with this
+      // file path
+      if (stateSchemaVersion == SCHEMA_FORMAT_V3) {
+        oldSchemaFilePath.get.toString
+      } else {
+        // if we are using any version less than v3, we have written
+        // the schema to this static location, which we will return
+        checker.schemaFileLocation.toString
+      }
+    }
+
+    StateSchemaValidationResult(evolvedSchema, schemaFileLocation)
   }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
index 8dcde7927b4b6..e2b93c147891d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStore.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.execution.streaming.state
 
 import java.util.UUID
 import java.util.concurrent.{ScheduledFuture, TimeUnit}
-import java.util.concurrent.atomic.AtomicReference
 import javax.annotation.concurrent.GuardedBy
 
 import scala.collection.mutable
@@ -38,10 +37,22 @@ import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.catalyst.util.UnsafeRowUtils
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.metric.{SQLMetric, SQLMetrics}
-import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
+import org.apache.spark.sql.execution.streaming.{StatefulOperatorStateInfo, StreamExecution}
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.util.{NextIterator, ThreadUtils, Utils}
 
+sealed trait StateStoreEncoding {
+  override def toString: String = this match {
+    case StateStoreEncoding.UnsafeRow => "unsaferow"
+    case StateStoreEncoding.Avro => "avro"
+  }
+}
+
+object StateStoreEncoding {
+  case object UnsafeRow extends StateStoreEncoding
+  case object Avro extends StateStoreEncoding
+}
+
 /**
  * Base trait for a versioned key-value store which provides read operations. Each instance of a
  * `ReadStateStore` represents a specific version of state data, and such instances are created
@@ -127,6 +138,8 @@ trait StateStore extends ReadStateStore {
 
   /**
    * Create column family with given name, if absent.
+   *
+   * @return column family ID
    */
   def createColFamilyIfAbsent(
       colFamilyName: String,
@@ -189,6 +202,17 @@ trait StateStore extends ReadStateStore {
   /** Current metrics of the state store */
   def metrics: StateStoreMetrics
 
+  /**
+   * Return information on recently generated checkpoints
+   * The information should only be usable when checkpoint format version 2 is used and
+   * underlying state store supports it.
+   * If it is not the case, the method can return a dummy result. The result eventually won't
+   * be sent to the driver, but not all the stateful operator is able to figure out whether
+   * the function should be called to now. They would anyway call it and pass it to
+   * StatefulOperator.setStateStoreCheckpointInfo(), where it will be ignored.
+   * */
+  def getStateStoreCheckpointInfo(): StateStoreCheckpointInfo
+
   /**
    * Whether all updates have been committed
    */
@@ -232,6 +256,21 @@ case class StateStoreMetrics(
     memoryUsedBytes: Long,
     customMetrics: Map[StateStoreCustomMetric, Long])
 
+/**
+ * State store checkpoint information, used to pass checkpointing information from executors
+ * to the driver after execution.
+ * @param stateStoreCkptId The checkpoint ID for a checkpoint at `batchVersion`. This is used to
+ *                         identify the checkpoint
+ * @param baseStateStoreCkptId The checkpoint ID for `batchVersion` - 1, that is used to finish this
+ *                             batch. This is used to validate the batch is processed based on the
+ *                             correct checkpoint.
+ */
+case class StateStoreCheckpointInfo(
+    partitionId: Int,
+    batchVersion: Long,
+    stateStoreCkptId: Option[String],
+    baseStateStoreCkptId: Option[String])
+
 object StateStoreMetrics {
   def combine(allMetrics: Seq[StateStoreMetrics]): StateStoreMetrics = {
     val distinctCustomMetrics = allMetrics.flatMap(_.customMetrics.keys).distinct
@@ -352,6 +391,11 @@ case class RangeKeyScanStateEncoderSpec(
  *   version of the data can be accessed. It is the responsible of the provider to populate
  *   this store with context information like the schema of keys and values, etc.
  *
+ *   If the checkpoint format version 2 is used, an additional argument `checkpointID` may be
+ *   provided as part of `getStore(version, checkpointID)`. The provider needs to guarantee
+ *   that the loaded version is of this unique ID. It needs to load the version for this specific
+ *   ID from the checkpoint if needed.
+ *
  * - After the streaming query is stopped, the created provider instances are lazily disposed off.
  */
 trait StateStoreProvider {
@@ -393,17 +437,23 @@ trait StateStoreProvider {
   /** Called when the provider instance is unloaded from the executor */
   def close(): Unit
 
-  /** Return an instance of [[StateStore]] representing state data of the given version */
-  def getStore(version: Long): StateStore
+  /**
+   * Return an instance of [[StateStore]] representing state data of the given version.
+   * If `stateStoreCkptId` is provided, the instance also needs to match the ID.
+   * */
+  def getStore(
+      version: Long,
+      stateStoreCkptId: Option[String] = None): StateStore
 
   /**
-   * Return an instance of [[ReadStateStore]] representing state data of the given version.
+   * Return an instance of [[ReadStateStore]] representing state data of the given version
+   * and uniqueID if provided.
    * By default it will return the same instance as getStore(version) but wrapped to prevent
    * modification. Providers can override and return optimized version of [[ReadStateStore]]
    * based on the fact the instance will be only used for reading.
    */
-  def getReadStore(version: Long): ReadStateStore =
-    new WrappedReadStateStore(getStore(version))
+  def getReadStore(version: Long, uniqueId: Option[String] = None): ReadStateStore =
+    new WrappedReadStateStore(getStore(version, uniqueId))
 
   /** Optional method for providers to allow for background maintenance (e.g. compactions) */
   def doMaintenance(): Unit = { }
@@ -518,10 +568,14 @@ trait SupportsFineGrainedReplay {
    *
    * @param startVersion starting changelog version
    * @param endVersion ending changelog version
+   * @param colFamilyNameOpt optional column family name to read from
    * @return iterator that gives tuple(recordType: [[RecordType.Value]], nested key: [[UnsafeRow]],
    *         nested value: [[UnsafeRow]], batchId: [[Long]])
    */
-  def getStateStoreChangeDataReader(startVersion: Long, endVersion: Long):
+  def getStateStoreChangeDataReader(
+      startVersion: Long,
+      endVersion: Long,
+      colFamilyNameOpt: Option[String] = None):
     NextIterator[(RecordType.Value, UnsafeRow, UnsafeRow, Long)]
 }
 
@@ -612,10 +666,6 @@ object StateStore extends Logging {
 
   private val maintenanceThreadPoolLock = new Object
 
-  // Shared exception between threads in thread pool that the scheduling thread
-  // checks to see if an exception has been thrown in the maintenance task
-  private val threadPoolException = new AtomicReference[Throwable](null)
-
   // This set is to keep track of the partitions that are queued
   // for maintenance or currently have maintenance running on them
   // to prevent the same partition from being processed concurrently.
@@ -623,10 +673,13 @@ object StateStore extends Logging {
   private val maintenancePartitions = new mutable.HashSet[StateStoreProviderId]
 
   /**
-   * Runs the `task` periodically and automatically cancels it if there is an exception. `onError`
-   * will be called when an exception happens.
+   * Runs the `task` periodically and bubbles any exceptions that it encounters.
+   *
+   * Note: exceptions in the maintenance thread pool are caught and logged; the associated
+   * StateStoreProvider is also unloaded. Any exception that happens in the MaintenanceTask
+   * is indeed exceptional and thus we let it propagate.
    */
-  class MaintenanceTask(periodMs: Long, task: => Unit, onError: => Unit) {
+  class MaintenanceTask(periodMs: Long, task: => Unit) {
     private val executor =
       ThreadUtils.newDaemonSingleThreadScheduledExecutor("state-store-maintenance-task")
 
@@ -637,7 +690,6 @@ object StateStore extends Logging {
         } catch {
           case NonFatal(e) =>
             logWarning("Error running maintenance thread", e)
-            onError
             throw e
         }
       }
@@ -701,6 +753,7 @@ object StateStore extends Logging {
       valueSchema: StructType,
       keyStateEncoderSpec: KeyStateEncoderSpec,
       version: Long,
+      stateStoreCkptId: Option[String],
       useColumnFamilies: Boolean,
       storeConf: StateStoreConf,
       hadoopConf: Configuration,
@@ -710,7 +763,7 @@ object StateStore extends Logging {
     }
     val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema,
       keyStateEncoderSpec, useColumnFamilies, storeConf, hadoopConf, useMultipleValuesPerKey)
-    storeProvider.getReadStore(version)
+    storeProvider.getReadStore(version, stateStoreCkptId)
   }
 
   /** Get or create a store associated with the id. */
@@ -720,6 +773,7 @@ object StateStore extends Logging {
       valueSchema: StructType,
       keyStateEncoderSpec: KeyStateEncoderSpec,
       version: Long,
+      stateStoreCkptId: Option[String],
       useColumnFamilies: Boolean,
       storeConf: StateStoreConf,
       hadoopConf: Configuration,
@@ -727,9 +781,10 @@ object StateStore extends Logging {
     if (version < 0) {
       throw QueryExecutionErrors.unexpectedStateStoreVersion(version)
     }
+    hadoopConf.set(StreamExecution.RUN_ID_KEY, storeProviderId.queryRunId.toString)
     val storeProvider = getStateStoreProvider(storeProviderId, keySchema, valueSchema,
       keyStateEncoderSpec, useColumnFamilies, storeConf, hadoopConf, useMultipleValuesPerKey)
-    storeProvider.getStore(version)
+    storeProvider.getStore(version, stateStoreCkptId)
   }
 
   private def getStateStoreProvider(
@@ -793,7 +848,6 @@ object StateStore extends Logging {
   /** Stop maintenance thread and reset the maintenance task */
   def stopMaintenanceTask(): Unit = loadedProviders.synchronized {
     if (maintenanceThreadPool != null) {
-      threadPoolException.set(null)
       maintenanceThreadPoolLock.synchronized {
         maintenancePartitions.clear()
       }
@@ -822,16 +876,7 @@ object StateStore extends Logging {
       if (SparkEnv.get != null && !isMaintenanceRunning) {
         maintenanceTask = new MaintenanceTask(
           storeConf.maintenanceInterval,
-          task = { doMaintenance() },
-          onError = { loadedProviders.synchronized {
-              logInfo("Stopping maintenance task since an error was encountered.")
-              stopMaintenanceTask()
-              // SPARK-44504 - Unload explicitly to force closing underlying DB instance
-              // and releasing allocated resources, especially for RocksDBStateStoreProvider.
-              loadedProviders.keySet.foreach { key => unload(key) }
-              loadedProviders.clear()
-            }
-          }
+          task = { doMaintenance() }
         )
         maintenanceThreadPool = new MaintenanceThreadPool(numMaintenanceThreads)
         logInfo("State Store maintenance task started")
@@ -862,12 +907,6 @@ object StateStore extends Logging {
     loadedProviders.synchronized {
       loadedProviders.toSeq
     }.foreach { case (id, provider) =>
-      // check exception
-      if (threadPoolException.get() != null) {
-        val exception = threadPoolException.get()
-        logWarning("Error in maintenanceThreadPool", exception)
-        throw exception
-      }
       if (processThisPartition(id)) {
         maintenanceThreadPool.execute(() => {
           val startTime = System.currentTimeMillis()
@@ -880,8 +919,20 @@ object StateStore extends Logging {
           } catch {
             case NonFatal(e) =>
               logWarning(log"Error managing ${MDC(LogKeys.STATE_STORE_PROVIDER, provider)}, " +
-                log"stopping management thread", e)
-              threadPoolException.set(e)
+                log"unloading state store provider", e)
+              // When we get a non-fatal exception, we just unload the provider.
+              //
+              // By not bubbling the exception to the maintenance task thread or the query execution
+              // thread, it's possible for a maintenance thread pool task to continue failing on
+              // the same partition. Additionally, if there is some global issue that will cause
+              // all maintenance thread pool tasks to fail, then bubbling the exception and
+              // stopping the pool is faster than waiting for all tasks to see the same exception.
+              //
+              // However, we assume that repeated failures on the same partition and global issues
+              // are rare. The benefit to unloading just the partition with an exception is that
+              // transient issues on a given provider do not affect any other providers; so, in
+              // most cases, this should be a more performant solution.
+              unload(id)
           } finally {
             val duration = System.currentTimeMillis() - startTime
             val logMsg =
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
index 651d72da16095..203af9d10217e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreChangelog.scala
@@ -60,6 +60,7 @@ object RecordType extends Enumeration {
     recordType match {
       case PUT_RECORD => "update"
       case DELETE_RECORD => "delete"
+      case MERGE_RECORD => "append"
       case _ => throw StateStoreErrors.unsupportedOperationException(
         "getRecordTypeAsString", recordType.toString)
     }
@@ -397,13 +398,15 @@ class StateStoreChangelogReaderV2(
  * @param startVersion start version of the changelog file to read
  * @param endVersion end version of the changelog file to read
  * @param compressionCodec de-compression method using for reading changelog file
+ * @param colFamilyNameOpt optional column family name to read from
  */
 abstract class StateStoreChangeDataReader(
     fm: CheckpointFileManager,
     stateLocation: Path,
     startVersion: Long,
     endVersion: Long,
-    compressionCodec: CompressionCodec)
+    compressionCodec: CompressionCodec,
+    colFamilyNameOpt: Option[String] = None)
   extends NextIterator[(RecordType.Value, UnsafeRow, UnsafeRow, Long)] with Logging {
 
   assert(startVersion >= 1)
@@ -451,9 +454,12 @@ abstract class StateStoreChangeDataReader(
         finished = true
         return null
       }
-      // Todo: Does not support StateStoreChangelogReaderV2
-      changelogReader =
+
+      changelogReader = if (colFamilyNameOpt.isDefined) {
+        new StateStoreChangelogReaderV2(fm, fileIterator.next(), compressionCodec)
+      } else {
         new StateStoreChangelogReaderV1(fm, fileIterator.next(), compressionCodec)
+      }
     }
     changelogReader
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
index c7004524097a0..9d26bf8fdf2e7 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreConf.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark.sql.execution.streaming.state
 
+import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
 import org.apache.spark.sql.internal.SQLConf
 
 /** A class that contains configuration parameters for [[StateStore]]s. */
 class StateStoreConf(
-    @transient private val sqlConf: SQLConf,
+    @transient private[state] val sqlConf: SQLConf,
     val extraOptions: Map[String, String] = Map.empty)
   extends Serializable {
 
@@ -41,6 +42,14 @@ class StateStoreConf(
   /** Minimum versions a State Store implementation should retain to allow rollbacks */
   val minVersionsToRetain: Int = sqlConf.minBatchesToRetain
 
+  /**
+   * Minimum number of stale checkpoint versions that need to be present in the DFS
+   * checkpoint directory for old state checkpoint version deletion to be invoked.
+   * This is to amortize the cost of discovering and deleting old checkpoint versions.
+   */
+  val minVersionsToDelete: Long =
+    Math.round(sqlConf.ratioExtraSpaceAllowedInCheckpoint * sqlConf.minBatchesToRetain)
+
   /** Maximum count of versions a State Store implementation should retain in memory */
   val maxVersionsToRetainInMemory: Int = sqlConf.maxBatchesToRetainInMemory
 
@@ -74,6 +83,15 @@ class StateStoreConf(
   /** The interval of maintenance tasks. */
   val maintenanceInterval = sqlConf.streamingMaintenanceInterval
 
+  /** The interval of maintenance tasks. */
+  val stateStoreEncodingFormat = sqlConf.stateStoreEncodingFormat
+
+  /**
+   * When creating new state store checkpoint, which format version to use.
+   */
+  val enableStateStoreCheckpointIds =
+    StatefulOperatorStateInfo.enableStateStoreCheckpointIds(sqlConf)
+
   /**
    * Additional configurations related to state store. This will capture all configs in
    * SQLConf that start with `spark.sql.streaming.stateStore.`
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
index 4ac813291c00b..45ad7e14c52d3 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreErrors.scala
@@ -123,11 +123,6 @@ object StateStoreErrors {
     new StatefulProcessorCannotPerformOperationWithInvalidHandleState(operationType, handleState)
   }
 
-  def cannotReInitializeStateOnKey(groupingKey: String):
-    StatefulProcessorCannotReInitializeState = {
-    new StatefulProcessorCannotReInitializeState(groupingKey)
-  }
-
   def cannotProvideTTLConfigForTimeMode(stateName: String, timeMode: String):
     StatefulProcessorCannotAssignTTLInTimeMode = {
     new StatefulProcessorCannotAssignTTLInTimeMode(stateName, timeMode)
@@ -173,8 +168,51 @@ object StateStoreErrors {
     StateStoreProviderDoesNotSupportFineGrainedReplay = {
     new StateStoreProviderDoesNotSupportFineGrainedReplay(inputClass)
   }
+
+  def invalidConfigChangedAfterRestart(configName: String, oldConfig: String, newConfig: String):
+    StateStoreInvalidConfigAfterRestart = {
+    new StateStoreInvalidConfigAfterRestart(configName, oldConfig, newConfig)
+  }
+
+  def duplicateStateVariableDefined(stateName: String):
+    StateStoreDuplicateStateVariableDefined = {
+    new StateStoreDuplicateStateVariableDefined(stateName)
+  }
+
+  def invalidVariableTypeChange(stateName: String, oldType: String, newType: String):
+    StateStoreInvalidVariableTypeChange = {
+    new StateStoreInvalidVariableTypeChange(stateName, oldType, newType)
+  }
 }
 
+class StateStoreDuplicateStateVariableDefined(stateVarName: String)
+  extends SparkRuntimeException(
+    errorClass = "STATEFUL_PROCESSOR_DUPLICATE_STATE_VARIABLE_DEFINED",
+    messageParameters = Map(
+      "stateVarName" -> stateVarName
+    )
+  )
+
+class StateStoreInvalidConfigAfterRestart(configName: String, oldConfig: String, newConfig: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_INVALID_CONFIG_AFTER_RESTART",
+    messageParameters = Map(
+      "configName" -> configName,
+      "oldConfig" -> oldConfig,
+      "newConfig" -> newConfig
+    )
+  )
+
+class StateStoreInvalidVariableTypeChange(stateVarName: String, oldType: String, newType: String)
+  extends SparkUnsupportedOperationException(
+    errorClass = "STATE_STORE_INVALID_VARIABLE_TYPE_CHANGE",
+    messageParameters = Map(
+      "stateVarName" -> stateVarName,
+      "oldType" -> oldType,
+      "newType" -> newType
+    )
+  )
+
 class StateStoreMultipleColumnFamiliesNotSupportedException(stateStoreProvider: String)
   extends SparkUnsupportedOperationException(
     errorClass = "UNSUPPORTED_FEATURE.STATE_STORE_MULTIPLE_COLUMN_FAMILIES",
@@ -229,11 +267,6 @@ class StatefulProcessorCannotPerformOperationWithInvalidHandleState(
     messageParameters = Map("operationType" -> operationType, "handleState" -> handleState)
   )
 
-class StatefulProcessorCannotReInitializeState(groupingKey: String)
-  extends SparkUnsupportedOperationException(
-  errorClass = "STATEFUL_PROCESSOR_CANNOT_REINITIALIZE_STATE_ON_KEY",
-  messageParameters = Map("groupingKey" -> groupingKey))
-
 class StateStoreUnsupportedOperationOnMissingColumnFamily(
     operationType: String,
     colFamilyName: String) extends SparkUnsupportedOperationException(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
index 133b4ab1cce3c..67c889283a1b6 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDD.scala
@@ -72,6 +72,7 @@ class ReadStateStoreRDD[T: ClassTag, U: ClassTag](
     queryRunId: UUID,
     operatorId: Long,
     storeVersion: Long,
+    stateStoreCkptIds: Option[Array[Array[String]]],
     keySchema: StructType,
     valueSchema: StructType,
     keyStateEncoderSpec: KeyStateEncoderSpec,
@@ -90,6 +91,7 @@ class ReadStateStoreRDD[T: ClassTag, U: ClassTag](
     val inputIter = dataRDD.iterator(partition, ctxt)
     val store = StateStore.getReadOnly(
       storeProviderId, keySchema, valueSchema, keyStateEncoderSpec, storeVersion,
+      stateStoreCkptIds.map(_.apply(partition.index).head),
       useColumnFamilies, storeConf, hadoopConfBroadcast.value.value)
     storeReadFunction(store, inputIter)
   }
@@ -107,6 +109,7 @@ class StateStoreRDD[T: ClassTag, U: ClassTag](
     queryRunId: UUID,
     operatorId: Long,
     storeVersion: Long,
+    uniqueId: Option[Array[Array[String]]],
     keySchema: StructType,
     valueSchema: StructType,
     keyStateEncoderSpec: KeyStateEncoderSpec,
@@ -126,6 +129,7 @@ class StateStoreRDD[T: ClassTag, U: ClassTag](
     val inputIter = dataRDD.iterator(partition, ctxt)
     val store = StateStore.get(
       storeProviderId, keySchema, valueSchema, keyStateEncoderSpec, storeVersion,
+      uniqueId.map(_.apply(partition.index).head),
       useColumnFamilies, storeConf, hadoopConfBroadcast.value.value,
       useMultipleValuesPerKey)
     storeUpdateFunction(store, inputIter)
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
index 4de3170f5db33..6b00418f8fb53 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManager.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference,
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.execution.streaming.StatefulOperatorStateInfo
+import org.apache.spark.sql.execution.streaming.StatefulOpStateStoreCheckpointInfo
 import org.apache.spark.sql.execution.streaming.StreamingSymmetricHashJoinHelper._
 import org.apache.spark.sql.types.{BooleanType, LongType, StructField, StructType}
 import org.apache.spark.util.NextIterator
@@ -85,6 +86,8 @@ class SymmetricHashJoinStateManager(
     storeConf: StateStoreConf,
     hadoopConf: Configuration,
     partitionId: Int,
+    keyToNumValuesStateStoreCkptId: Option[String],
+    keyWithIndexToValueStateStoreCkptId: Option[String],
     stateFormatVersion: Int,
     skippedNullValueCount: Option[SQLMetric] = None,
     useStateStoreCoordinator: Boolean = true,
@@ -411,6 +414,28 @@ class SymmetricHashJoinStateManager(
     keyWithIndexToValue.abortIfNeeded()
   }
 
+  /**
+   * Get state store checkpoint information of the two state stores for this joiner, after
+   * they finished data processing.
+   */
+  def getLatestCheckpointInfo(): JoinerStateStoreCkptInfo = {
+    val keyToNumValuesCkptInfo = keyToNumValues.getLatestCheckpointInfo()
+    val keyWithIndexToValueCkptInfo = keyWithIndexToValue.getLatestCheckpointInfo()
+
+    assert(
+      keyToNumValuesCkptInfo.partitionId == keyWithIndexToValueCkptInfo.partitionId,
+      "two state stores in a stream-stream joiner don't return the same partition ID")
+    assert(
+      keyToNumValuesCkptInfo.batchVersion == keyWithIndexToValueCkptInfo.batchVersion,
+      "two state stores in a stream-stream joiner don't return the same batch version")
+    assert(
+      keyToNumValuesCkptInfo.stateStoreCkptId.isDefined ==
+      keyWithIndexToValueCkptInfo.stateStoreCkptId.isDefined,
+      "two state stores in a stream-stream joiner should both return checkpoint ID or not")
+
+    JoinerStateStoreCkptInfo(keyToNumValuesCkptInfo, keyWithIndexToValueCkptInfo)
+  }
+
   /** Get the combined metrics of all the state stores */
   def metrics: StateStoreMetrics = {
     val keyToNumValuesMetrics = keyToNumValues.metrics
@@ -451,7 +476,9 @@ class SymmetricHashJoinStateManager(
   Option(TaskContext.get()).foreach { _.addTaskCompletionListener[Unit] { _ => abortIfNeeded() } }
 
   /** Helper trait for invoking common functionalities of a state store. */
-  private abstract class StateStoreHandler(stateStoreType: StateStoreType) extends Logging {
+  private abstract class StateStoreHandler(
+      stateStoreType: StateStoreType,
+      stateStoreCkptId: Option[String]) extends Logging {
     private var stateStoreProvider: StateStoreProvider = _
 
     /** StateStore that the subclasses of this class is going to operate on */
@@ -476,6 +503,10 @@ class SymmetricHashJoinStateManager(
 
     def metrics: StateStoreMetrics = stateStore.metrics
 
+    def getLatestCheckpointInfo(): StateStoreCheckpointInfo = {
+      stateStore.getStateStoreCheckpointInfo()
+    }
+
     /** Get the StateStore with the given schema */
     protected def getStateStore(keySchema: StructType, valueSchema: StructType): StateStore = {
       val storeProviderId = StateStoreProviderId(
@@ -485,7 +516,8 @@ class SymmetricHashJoinStateManager(
           "when reading state as data source.")
         StateStore.get(
           storeProviderId, keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
-          stateInfo.get.storeVersion, useColumnFamilies = false, storeConf, hadoopConf)
+          stateInfo.get.storeVersion, stateStoreCkptId, useColumnFamilies = false,
+          storeConf, hadoopConf)
       } else {
         // This class will manage the state store provider by itself.
         stateStoreProvider = StateStoreProvider.createAndInit(
@@ -500,7 +532,7 @@ class SymmetricHashJoinStateManager(
           stateStoreProvider.asInstanceOf[SupportsFineGrainedReplay]
             .replayStateFromSnapshot(snapshotStartVersion.get, stateInfo.get.storeVersion)
         } else {
-          stateStoreProvider.getStore(stateInfo.get.storeVersion)
+          stateStoreProvider.getStore(stateInfo.get.storeVersion, stateStoreCkptId)
         }
       }
       logInfo(log"Loaded store ${MDC(STATE_STORE_ID, store.id)}")
@@ -522,7 +554,8 @@ class SymmetricHashJoinStateManager(
 
 
   /** A wrapper around a [[StateStore]] that stores [key -> number of values]. */
-  private class KeyToNumValuesStore extends StateStoreHandler(KeyToNumValuesType) {
+  private class KeyToNumValuesStore
+    extends StateStoreHandler(KeyToNumValuesType, keyToNumValuesStateStoreCkptId) {
     private val longValueSchema = new StructType().add("value", "long")
     private val longToUnsafeRow = UnsafeProjection.create(longValueSchema)
     private val valueRow = longToUnsafeRow(new SpecificInternalRow(longValueSchema))
@@ -669,7 +702,7 @@ class SymmetricHashJoinStateManager(
    * state format version - please refer implementations of [[KeyWithIndexToValueRowConverter]].
    */
   private class KeyWithIndexToValueStore(stateFormatVersion: Int)
-    extends StateStoreHandler(KeyWithIndexToValueType) {
+    extends StateStoreHandler(KeyWithIndexToValueType, keyWithIndexToValueStateStoreCkptId) {
 
     private val keyWithIndexExprs = keyAttributes :+ Literal(1L)
     private val keyWithIndexSchema = keySchema.add("index", LongType)
@@ -808,6 +841,80 @@ object SymmetricHashJoinStateManager {
     result
   }
 
+  /**
+   * Stream-stream join has 4 state stores instead of one. So it will generate 4 different
+   * checkpoint IDs. The approach we take here is to merge them into one array in the checkpointing
+   * path. The driver will process this single checkpointID. When it is passed back to the
+   * executors, they will split it back into 4 IDs and use them to load the state. This function is
+   * used to merge two checkpoint IDs (each in the form of an array of 1) into one array.
+   * The merged array is expected to read back by `getStateStoreCheckpointIds()`.
+   */
+  def mergeStateStoreCheckpointInfo(joinCkptInfo: JoinStateStoreCkptInfo):
+      StatefulOpStateStoreCheckpointInfo = {
+    assert(
+      joinCkptInfo.left.keyToNumValues.partitionId == joinCkptInfo.right.keyToNumValues.partitionId,
+      "state store info returned from two Stream-Stream Join sides have different partition IDs")
+    assert(
+      joinCkptInfo.left.keyToNumValues.batchVersion ==
+      joinCkptInfo.right.keyToNumValues.batchVersion,
+      "state store info returned from two Stream-Stream Join sides have different batch versions")
+    assert(
+      joinCkptInfo.left.keyToNumValues.stateStoreCkptId.isDefined ==
+      joinCkptInfo.right.keyToNumValues.stateStoreCkptId.isDefined,
+      "state store info returned from two Stream-Stream Join sides should both return " +
+        "checkpoint ID or not")
+
+    val ckptIds = joinCkptInfo.left.keyToNumValues.stateStoreCkptId.map(
+      Array(
+        _,
+        joinCkptInfo.left.valueToNumKeys.stateStoreCkptId.get,
+        joinCkptInfo.right.keyToNumValues.stateStoreCkptId.get,
+        joinCkptInfo.right.valueToNumKeys.stateStoreCkptId.get
+      )
+    )
+    val baseCkptIds = joinCkptInfo.left.keyToNumValues.baseStateStoreCkptId.map(
+      Array(
+        _,
+        joinCkptInfo.left.valueToNumKeys.baseStateStoreCkptId.get,
+        joinCkptInfo.right.keyToNumValues.baseStateStoreCkptId.get,
+        joinCkptInfo.right.valueToNumKeys.baseStateStoreCkptId.get
+      )
+    )
+
+    StatefulOpStateStoreCheckpointInfo(
+      joinCkptInfo.left.keyToNumValues.partitionId,
+      joinCkptInfo.left.keyToNumValues.batchVersion,
+      ckptIds,
+      baseCkptIds)
+  }
+
+  /**
+   * Stream-stream join has 4 state stores instead of one. So it will generate 4 different
+   * checkpoint IDs. They are translated from each joiners' state store into an array through
+   * mergeStateStoreCheckpointInfo(). This function is used to read it back into individual state
+   * store checkpoint IDs.
+   * @param partitionId
+   * @param stateInfo
+   * @return
+   */
+  def getStateStoreCheckpointIds(
+      partitionId: Int,
+      stateInfo: StatefulOperatorStateInfo): JoinStateStoreCheckpointId = {
+
+    val stateStoreCkptIds = stateInfo
+      .stateStoreCkptIds
+      .map(_(partitionId))
+      .map(_.map(Option(_)))
+      .getOrElse(Array.fill[Option[String]](4)(None))
+    JoinStateStoreCheckpointId(
+      left = JoinerStateStoreCheckpointId(
+        keyToNumValues = stateStoreCkptIds(0),
+        valueToNumKeys = stateStoreCkptIds(1)),
+      right = JoinerStateStoreCheckpointId(
+        keyToNumValues = stateStoreCkptIds(2),
+        valueToNumKeys = stateStoreCkptIds(3)))
+  }
+
   private sealed trait StateStoreType
 
   private case object KeyToNumValuesType extends StateStoreType {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
index 44e939424f55b..e1a95dd10be74 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/state/package.scala
@@ -79,6 +79,7 @@ package object state {
         stateInfo.queryRunId,
         stateInfo.operatorId,
         stateInfo.storeVersion,
+        stateInfo.stateStoreCkptIds,
         keySchema,
         valueSchema,
         keyStateEncoderSpec,
@@ -118,6 +119,7 @@ package object state {
         stateInfo.queryRunId,
         stateInfo.operatorId,
         stateInfo.storeVersion,
+        stateInfo.stateStoreCkptIds,
         keySchema,
         valueSchema,
         keyStateEncoderSpec,
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
index 14f67460763b1..20d8302acd495 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/statefulOperators.scala
@@ -24,8 +24,10 @@ import scala.collection.mutable
 import scala.jdk.CollectionConverters._
 
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.Path
 
 import org.apache.spark.SparkContext
+import org.apache.spark.internal.Logging
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.InternalRow
@@ -43,19 +45,55 @@ import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{OutputMode, StateOperatorProgress}
 import org.apache.spark.sql.types._
-import org.apache.spark.util.{CompletionIterator, NextIterator, Utils}
+import org.apache.spark.util.{CollectionAccumulator, CompletionIterator, NextIterator, Utils}
 
 
-/** Used to identify the state store for a given operator. */
+/** Used to identify the state store for a given operator.
+ *
+ * stateStoreCkptIds is used to identify the checkpoint used for a specific stateful operator
+ * The basic workflow works as following:
+ * 1. When a stateful operator is created, it passes in the checkpoint IDs for each stateful
+ *    operator through the StatefulOperatorStateInfo.
+ * 2. When a stateful task starts to execute, it will find the checkpointID for its shuffle
+ *    partition and use it to recover the state store. The ID is eventually passed into
+ *    the StateStore layer and eventually  RocksDB State Store, where it is used to make sure
+ *    the it loads the correct checkpoint
+ * 3. When the stateful task is finishing, after the state store is committed, the checkpoint ID
+ *    is fetched from the state store by calling StateStore.getStateStoreCheckpointInfo() and added
+ *    to the stateStoreCkptIds accumulator by calling
+ *    StateStoreWriter.setStateStoreCheckpointInfo().
+ * 4. When ending the batch, MicroBatchExecution calls each stateful operator's
+ *    getStateStoreCheckpointInfo() which aggregates checkpointIDs from different partitions. The
+ *    driver will persistent it into commit logs (not implemented yet).
+ * 5. When forming the next batch, the driver constructs the StatefulOperatorStateInfo with the
+ *    checkpoint IDs for the previous batch.
+ * */
 case class StatefulOperatorStateInfo(
     checkpointLocation: String,
     queryRunId: UUID,
     operatorId: Long,
     storeVersion: Long,
-    numPartitions: Int) {
+    numPartitions: Int,
+    stateStoreCkptIds: Option[Array[Array[String]]] = None) {
+
+  def getStateStoreCkptId(partitionId: Int): Option[Array[String]] = {
+    stateStoreCkptIds.map(_(partitionId))
+  }
+
   override def toString(): String = {
     s"state info [ checkpoint = $checkpointLocation, runId = $queryRunId, " +
-      s"opId = $operatorId, ver = $storeVersion, numPartitions = $numPartitions]"
+      s"opId = $operatorId, ver = $storeVersion, numPartitions = $numPartitions] " +
+      s"stateStoreCkptIds = $stateStoreCkptIds"
+  }
+}
+
+object StatefulOperatorStateInfo {
+  /**
+   * Whether stateo store checkpoint version requires checkpointID to be used.
+   * @return true if state store checkpointID should be used.
+   */
+  def enableStateStoreCheckpointIds(conf: SQLConf): Boolean = {
+    conf.stateStoreCheckpointFormatVersion >= 2
   }
 }
 
@@ -73,6 +111,12 @@ trait StatefulOperator extends SparkPlan {
     }
   }
 
+  def metadataFilePath(): Path = {
+    val stateCheckpointPath =
+      new Path(getStateInfo.checkpointLocation, getStateInfo.operatorId.toString)
+    new Path(new Path(stateCheckpointPath, "_metadata"), "metadata")
+  }
+
   // Function used to record state schema for the first time and validate it against proposed
   // schema changes in the future. Runs as part of a planning rule on the driver.
   // Returns the schema file path for operators that write this to the metadata file,
@@ -101,13 +145,27 @@ case class StatefulOperatorCustomSumMetric(name: String, desc: String)
 }
 
 /** An operator that reads from a StateStore. */
-trait StateStoreReader extends StatefulOperator {
+trait StateStoreReader extends StatefulOperator with Logging {
   override lazy val metrics = Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"))
 }
 
+/**
+ * Used to pass state store checkpoint information to load the correct state store checkpoint for a
+ * stateful operator. Will be passed from the driver to exeuctors to load state store.
+ */
+case class StatefulOpStateStoreCheckpointInfo(
+    partitionId: Int,
+    batchVersion: Long,
+    // The checkpoint ID for a checkpoint at `batchVersion`. This is used to identify the checkpoint
+    stateStoreCkptId: Option[Array[String]],
+    // The checkpoint ID for `batchVersion` - 1, that is used to finish this batch. This is used
+    // to validate the batch is processed based on the correct checkpoint.
+    baseStateStoreCkptId: Option[Array[String]])
+
 /** An operator that writes to a StateStore. */
-trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: SparkPlan =>
+trait StateStoreWriter
+  extends StatefulOperator with PythonSQLMetrics with Logging { self: SparkPlan =>
 
   /**
    * Produce the output watermark for given input watermark (ms).
@@ -142,6 +200,8 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
    */
   def produceOutputWatermark(inputWatermarkMs: Long): Option[Long] = Some(inputWatermarkMs)
 
+  def operatorStateMetadataVersion: Int = 1
+
   override lazy val metrics = statefulOperatorCustomMetrics ++ Map(
     "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows"),
     "numRowsDroppedByWatermark" -> SQLMetrics.createMetric(sparkContext,
@@ -157,6 +217,74 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
       "number of state store instances")
   ) ++ stateStoreCustomMetrics ++ pythonMetrics
 
+  // This method is only used to fetch the state schema directory path for
+  // operators that use StateSchemaV3, as prior versions only use a single
+  // set file path.
+  def stateSchemaDirPath(
+      storeName: Option[String] = None): Path = {
+    val stateCheckpointPath =
+      new Path(getStateInfo.checkpointLocation,
+        s"${getStateInfo.operatorId.toString}")
+    storeName match {
+      case Some(storeName) =>
+        new Path(new Path(stateCheckpointPath, "_stateSchema"), storeName)
+      case None =>
+        new Path(new Path(stateCheckpointPath, "_stateSchema"), "default")
+    }
+  }
+
+  /**
+   * Aggregator used for the executors to pass new state store checkpoints' IDs to driver.
+   * For the general checkpoint ID workflow, see comments of
+   * class class [[StatefulOperatorStateInfo]]
+   */
+  val checkpointInfoAccumulator: CollectionAccumulator[StatefulOpStateStoreCheckpointInfo] = {
+    SparkContext.getActive.map(_.collectionAccumulator[StatefulOpStateStoreCheckpointInfo]).get
+  }
+
+  /**
+   * Get aggregated checkpoint ID info for all shuffle partitions
+   * For the general checkpoint ID workflow, see comments of
+   * class class [[StatefulOperatorStateInfo]]
+   */
+  def getStateStoreCheckpointInfo(): Array[StatefulOpStateStoreCheckpointInfo] = {
+    assert(
+      StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf),
+      "Should not fetch checkpoint Info if the state store checkpoint IDs are not enabled")
+    // Multiple entries can be returned for the same partitionID due to task speculative execution
+    // or task failures. All of them should represent a valid state store checkpoint and we just
+    // pick one of them.
+    // In the end, we sort them by partitionID.
+    val ret = checkpointInfoAccumulator
+      .value
+      .asScala
+      .toSeq
+      .groupBy(_.partitionId)
+      .map {
+        case (key, values) => key -> values.head
+      }
+      .toSeq
+      .sortBy(_._1)
+      .map(_._2)
+      .toArray
+    assert(
+      ret.length == getStateInfo.numPartitions,
+      s"ChekpointInfo length: ${ret.length}, numPartitions: ${getStateInfo.numPartitions}")
+    ret
+  }
+
+  /**
+   * The executor reports its state store checkpoint ID, which would be sent back to the driver.
+   * For the general checkpoint ID workflow, see comments of
+   * class class [[StatefulOperatorStateInfo]]
+   */
+  protected def setStateStoreCheckpointInfo(
+      checkpointInfo: StatefulOpStateStoreCheckpointInfo): Unit = {
+    if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf)) {
+      checkpointInfoAccumulator.add(checkpointInfo)
+    }
+  }
+
   /**
    * Get the progress made by this stateful operator after execution. This should be called in
    * the driver after this SparkPlan has been executed and metrics have been updated.
@@ -190,7 +318,8 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
   protected def timeTakenMs(body: => Unit): Long = Utils.timeTakenMs(body)._2
 
   /** Metadata of this stateful operator and its states stores. */
-  def operatorStateMetadata(): OperatorStateMetadata = {
+  def operatorStateMetadata(
+      stateSchemaPaths: List[String] = List.empty): OperatorStateMetadata = {
     val info = getStateInfo
     val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
     val stateStoreInfo =
@@ -217,6 +346,19 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
     storeMetrics.customMetrics.foreach { case (metric, value) =>
       longMetric(metric.name) += value
     }
+
+    if (StatefulOperatorStateInfo.enableStateStoreCheckpointIds(conf)) {
+      // Set the state store checkpoint information for the driver to collect
+      val ssInfo = store.getStateStoreCheckpointInfo()
+      setStateStoreCheckpointInfo(
+        StatefulOpStateStoreCheckpointInfo(
+          ssInfo.partitionId,
+          ssInfo.batchVersion,
+          ssInfo.stateStoreCkptId.map(Array(_)),
+          ssInfo.baseStateStoreCkptId.map(Array(_))
+        )
+      )
+    }
   }
 
   private def stateStoreCustomMetrics: Map[String, SQLMetric] = {
@@ -252,6 +394,10 @@ trait StateStoreWriter extends StatefulOperator with PythonSQLMetrics { self: Sp
   /** Name to output in [[StreamingOperatorProgress]] to identify operator type */
   def shortName: String = "defaultName"
 
+  def validateNewMetadata(
+      oldMetadata: OperatorStateMetadata,
+      newMetadata: OperatorStateMetadata): Unit = {}
+
   /**
    * Should the MicroBatchExecution run another batch based on this stateful operator and the
    * new input watermark.
@@ -574,11 +720,11 @@ case class StateStoreSaveExec(
           // Update and output only rows being evicted from the StateStore
           // Assumption: watermark predicates must be non-empty if append mode is allowed
           case Some(Append) =>
-            assert(watermarkPredicateForDataForLateEvents.isDefined,
-              "Watermark needs to be defined for streaming aggregation query in append mode")
-
-            assert(watermarkPredicateForKeysForEviction.isDefined,
-              "Watermark needs to be defined for streaming aggregation query in append mode")
+            if (watermarkPredicateForDataForLateEvents.isEmpty ||
+              watermarkPredicateForKeysForEviction.isEmpty) {
+              throw QueryExecutionErrors.unsupportedStreamingOperatorWithoutWatermark(
+                "Append", "aggregations")
+            }
 
             allUpdatesTimeMs += timeTakenMs {
               val filteredIter = applyRemovingRowsOlderThanWatermark(iter,
@@ -596,7 +742,7 @@ case class StateStoreSaveExec(
             new NextIterator[InternalRow] {
               override protected def getNext(): InternalRow = {
                 var removedValueRow: InternalRow = null
-                while(rangeIter.hasNext && removedValueRow == null) {
+                while (rangeIter.hasNext && removedValueRow == null) {
                   val rowPair = rangeIter.next()
                   if (watermarkPredicateForKeysForEviction.get.eval(rowPair.key)) {
                     stateManager.remove(store, rowPair.key)
@@ -660,7 +806,8 @@ case class StateStoreSaveExec(
               }
             }
 
-          case _ => throw QueryExecutionErrors.invalidStreamingOutputModeError(outputMode)
+          case _ => throw QueryExecutionErrors.unsupportedOutputModeForStreamingOperationError(
+            outputMode.get, "streaming aggregations")
         }
     }
   }
@@ -875,8 +1022,10 @@ case class SessionWindowStateStoreSaveExec(
         // Update and output only rows being evicted from the StateStore
         // Assumption: watermark predicates must be non-empty if append mode is allowed
         case Some(Append) =>
-          assert(watermarkPredicateForDataForEviction.isDefined,
-              "Watermark needs to be defined for session window query in append mode")
+          if (watermarkPredicateForDataForEviction.isEmpty) {
+            throw QueryExecutionErrors.unsupportedStreamingOperatorWithoutWatermark(
+              "Append", "session window aggregations")
+          }
 
           allUpdatesTimeMs += timeTakenMs {
             putToStore(iter, store)
@@ -906,7 +1055,8 @@ case class SessionWindowStateStoreSaveExec(
             }
           }
 
-        case _ => throw QueryExecutionErrors.invalidStreamingOutputModeError(outputMode)
+        case _ => throw throw QueryExecutionErrors.unsupportedOutputModeForStreamingOperationError(
+          outputMode.get, "session window streaming aggregations")
       }
     }
   }
@@ -920,7 +1070,8 @@ case class SessionWindowStateStoreSaveExec(
       keyWithoutSessionExpressions, getStateInfo, conf) :: Nil
   }
 
-  override def operatorStateMetadata(): OperatorStateMetadata = {
+  override def operatorStateMetadata(
+      stateSchemaPaths: List[String] = List.empty): OperatorStateMetadata = {
     val info = getStateInfo
     val operatorInfo = OperatorInfoV1(info.operatorId, shortName)
     val stateStoreInfo = Array(StateStoreMetadataV1(
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
index dcbf328c71e33..3c8c7edfeb069 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala
@@ -179,14 +179,14 @@ class SQLAppStatusListener(
     // work around a race in the DAGScheduler. The metrics info does not contain accumulator info
     // when reading event logs in the SHS, so we have to rely on the accumulator in that case.
     val accums = if (live && event.taskMetrics != null) {
-      event.taskMetrics.externalAccums.flatMap { a =>
+      event.taskMetrics.withExternalAccums(_.flatMap { a =>
         // This call may fail if the accumulator is gc'ed, so account for that.
         try {
-          Some(a.toInfo(Some(a.value), None))
+          Some(a.toInfoUpdate)
         } catch {
           case _: IllegalAccessError => None
         }
-      }
+      })
     } else {
       info.accumulables
     }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
index bdaccd43c1b7d..7d13dbbe2a06a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/window/WindowEvaluatorFactoryBase.scala
@@ -22,11 +22,12 @@ import scala.collection.mutable.ArrayBuffer
 
 import org.apache.spark.SparkException
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Add, AggregateWindowFunction, Ascending, Attribute, BoundReference, CurrentRow, DateAdd, DateAddYMInterval, DecimalAddNoOverflowCheck, Descending, Expression, FrameLessOffsetWindowFunction, FrameType, IdentityProjection, IntegerLiteral, MutableProjection, NamedExpression, OffsetWindowFunction, PythonFuncExpression, RangeFrame, RowFrame, RowOrdering, SortOrder, SpecifiedWindowFrame, TimeAdd, TimestampAddYMInterval, UnaryMinus, UnboundedFollowing, UnboundedPreceding, UnsafeProjection, WindowExpression}
+import org.apache.spark.sql.catalyst.expressions.{Add, AggregateWindowFunction, Ascending, Attribute, BoundReference, CurrentRow, DateAdd, DateAddYMInterval, DecimalAddNoOverflowCheck, Descending, Expression, ExtractANSIIntervalDays, FrameLessOffsetWindowFunction, FrameType, IdentityProjection, IntegerLiteral, MutableProjection, NamedExpression, OffsetWindowFunction, PythonFuncExpression, RangeFrame, RowFrame, RowOrdering, SortOrder, SpecifiedWindowFrame, TimeAdd, TimestampAddYMInterval, UnaryMinus, UnboundedFollowing, UnboundedPreceding, UnsafeProjection, WindowExpression}
 import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression
 import org.apache.spark.sql.execution.metric.SQLMetric
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{CalendarIntervalType, DateType, DayTimeIntervalType, DecimalType, IntegerType, TimestampNTZType, TimestampType, YearMonthIntervalType}
+import org.apache.spark.sql.types.DayTimeIntervalType.DAY
 import org.apache.spark.util.collection.Utils
 
 trait WindowEvaluatorFactoryBase {
@@ -101,6 +102,8 @@ trait WindowEvaluatorFactoryBase {
         val boundExpr = (expr.dataType, boundOffset.dataType) match {
           case (DateType, IntegerType) => DateAdd(expr, boundOffset)
           case (DateType, _: YearMonthIntervalType) => DateAddYMInterval(expr, boundOffset)
+          case (DateType, DayTimeIntervalType(DAY, DAY)) =>
+            DateAdd(expr, ExtractANSIIntervalDays(boundOffset))
           case (TimestampType | TimestampNTZType, CalendarIntervalType) =>
             TimeAdd(expr, boundOffset, Some(timeZone))
           case (TimestampType | TimestampNTZType, _: YearMonthIntervalType) =>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
deleted file mode 100644
index 93bf738a53daf..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/Window.scala
+++ /dev/null
@@ -1,238 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.expressions
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.expressions.{WindowSpec => _, _}
-
-/**
- * Utility functions for defining window in DataFrames.
- *
- * {{{
- *   // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
- *   Window.partitionBy("country").orderBy("date")
- *     .rowsBetween(Window.unboundedPreceding, Window.currentRow)
- *
- *   // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
- *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
- * }}}
- *
- * @note When ordering is not defined, an unbounded window frame (rowFrame, unboundedPreceding,
- *       unboundedFollowing) is used by default. When ordering is defined, a growing window frame
- *       (rangeFrame, unboundedPreceding, currentRow) is used by default.
- *
- * @since 1.4.0
- */
-@Stable
-object Window {
-
-  /**
-   * Creates a [[WindowSpec]] with the partitioning defined.
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def partitionBy(colName: String, colNames: String*): WindowSpec = {
-    spec.partitionBy(colName, colNames : _*)
-  }
-
-  /**
-   * Creates a [[WindowSpec]] with the partitioning defined.
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def partitionBy(cols: Column*): WindowSpec = {
-    spec.partitionBy(cols : _*)
-  }
-
-  /**
-   * Creates a [[WindowSpec]] with the ordering defined.
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(colName: String, colNames: String*): WindowSpec = {
-    spec.orderBy(colName, colNames : _*)
-  }
-
-  /**
-   * Creates a [[WindowSpec]] with the ordering defined.
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(cols: Column*): WindowSpec = {
-    spec.orderBy(cols : _*)
-  }
-
-  /**
-   * Value representing the first row in the partition, equivalent to "UNBOUNDED PRECEDING" in SQL.
-   * This can be used to specify the frame boundaries:
-   *
-   * {{{
-   *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
-   * }}}
-   *
-   * @since 2.1.0
-   */
-  def unboundedPreceding: Long = Long.MinValue
-
-  /**
-   * Value representing the last row in the partition, equivalent to "UNBOUNDED FOLLOWING" in SQL.
-   * This can be used to specify the frame boundaries:
-   *
-   * {{{
-   *   Window.rowsBetween(Window.unboundedPreceding, Window.unboundedFollowing)
-   * }}}
-   *
-   * @since 2.1.0
-   */
-  def unboundedFollowing: Long = Long.MaxValue
-
-  /**
-   * Value representing the current row. This can be used to specify the frame boundaries:
-   *
-   * {{{
-   *   Window.rowsBetween(Window.unboundedPreceding, Window.currentRow)
-   * }}}
-   *
-   * @since 2.1.0
-   */
-  def currentRow: Long = 0
-
-  /**
-   * Creates a [[WindowSpec]] with the frame boundaries defined,
-   * from `start` (inclusive) to `end` (inclusive).
-   *
-   * Both `start` and `end` are positions relative to the current row. For example, "0" means
-   * "current row", while "-1" means the row before the current row, and "5" means the fifth row
-   * after the current row.
-   *
-   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
-   * and `Window.currentRow` to specify special boundary values, rather than using integral
-   * values directly.
-   *
-   * A row based boundary is based on the position of the row within the partition.
-   * An offset indicates the number of rows above or below the current row, the frame for the
-   * current row starts or ends. For instance, given a row based sliding frame with a lower bound
-   * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
-   * index 4 to index 7.
-   *
-   * {{{
-   *   import org.apache.spark.sql.expressions.Window
-   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
-   *     .toDF("id", "category")
-   *   val byCategoryOrderedById =
-   *     Window.partitionBy($"category").orderBy($"id").rowsBetween(Window.currentRow, 1)
-   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
-   *
-   *   +---+--------+---+
-   *   | id|category|sum|
-   *   +---+--------+---+
-   *   |  1|       b|  3|
-   *   |  2|       b|  5|
-   *   |  3|       b|  3|
-   *   |  1|       a|  2|
-   *   |  1|       a|  3|
-   *   |  2|       a|  2|
-   *   +---+--------+---+
-   * }}}
-   *
-   * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value (`Window.unboundedPreceding`).
-   * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value (`Window.unboundedFollowing`).
-   * @since 2.1.0
-   */
-  // Note: when updating the doc for this method, also update WindowSpec.rowsBetween.
-  def rowsBetween(start: Long, end: Long): WindowSpec = {
-    spec.rowsBetween(start, end)
-  }
-
-  /**
-   * Creates a [[WindowSpec]] with the frame boundaries defined,
-   * from `start` (inclusive) to `end` (inclusive).
-   *
-   * Both `start` and `end` are relative to the current row. For example, "0" means "current row",
-   * while "-1" means one off before the current row, and "5" means the five off after the
-   * current row.
-   *
-   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
-   * and `Window.currentRow` to specify special boundary values, rather than using long values
-   * directly.
-   *
-   * A range-based boundary is based on the actual value of the ORDER BY
-   * expression(s). An offset is used to alter the value of the ORDER BY expression,
-   * for instance if the current ORDER BY expression has a value of 10 and the lower bound offset
-   * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
-   * number of constraints on the ORDER BY expressions: there can be only one expression and this
-   * expression must have a numerical data type. An exception can be made when the offset is
-   * unbounded, because no value modification is needed, in this case multiple and non-numeric
-   * ORDER BY expression are allowed.
-   *
-   * {{{
-   *   import org.apache.spark.sql.expressions.Window
-   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
-   *     .toDF("id", "category")
-   *   val byCategoryOrderedById =
-   *     Window.partitionBy($"category").orderBy($"id").rangeBetween(Window.currentRow, 1)
-   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
-   *
-   *   +---+--------+---+
-   *   | id|category|sum|
-   *   +---+--------+---+
-   *   |  1|       b|  3|
-   *   |  2|       b|  5|
-   *   |  3|       b|  3|
-   *   |  1|       a|  4|
-   *   |  1|       a|  4|
-   *   |  2|       a|  2|
-   *   +---+--------+---+
-   * }}}
-   *
-   * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value (`Window.unboundedPreceding`).
-   * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value (`Window.unboundedFollowing`).
-   * @since 2.1.0
-   */
-  // Note: when updating the doc for this method, also update WindowSpec.rangeBetween.
-  def rangeBetween(start: Long, end: Long): WindowSpec = {
-    spec.rangeBetween(start, end)
-  }
-
-  private[sql] def spec: WindowSpec = {
-    new WindowSpec(Seq.empty, Seq.empty, UnspecifiedFrame)
-  }
-
-}
-
-/**
- * Utility functions for defining window in DataFrames.
- *
- * {{{
- *   // PARTITION BY country ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW
- *   Window.partitionBy("country").orderBy("date")
- *     .rowsBetween(Window.unboundedPreceding, Window.currentRow)
- *
- *   // PARTITION BY country ORDER BY date ROWS BETWEEN 3 PRECEDING AND 3 FOLLOWING
- *   Window.partitionBy("country").orderBy("date").rowsBetween(-3, 3)
- * }}}
- *
- * @since 1.4.0
- */
-@Stable
-class Window private()  // So we can see Window in JavaDoc.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala b/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
deleted file mode 100644
index 32aa13a29cec3..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/expressions/WindowSpec.scala
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.expressions
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.errors.QueryCompilationErrors
-
-/**
- * A window specification that defines the partitioning, ordering, and frame boundaries.
- *
- * Use the static methods in [[Window]] to create a [[WindowSpec]].
- *
- * @since 1.4.0
- */
-@Stable
-class WindowSpec private[sql](
-    partitionSpec: Seq[Expression],
-    orderSpec: Seq[SortOrder],
-    frame: WindowFrame) {
-
-  /**
-   * Defines the partitioning columns in a [[WindowSpec]].
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def partitionBy(colName: String, colNames: String*): WindowSpec = {
-    partitionBy((colName +: colNames).map(Column(_)): _*)
-  }
-
-  /**
-   * Defines the partitioning columns in a [[WindowSpec]].
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def partitionBy(cols: Column*): WindowSpec = {
-    new WindowSpec(cols.map(_.expr), orderSpec, frame)
-  }
-
-  /**
-   * Defines the ordering columns in a [[WindowSpec]].
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(colName: String, colNames: String*): WindowSpec = {
-    orderBy((colName +: colNames).map(Column(_)): _*)
-  }
-
-  /**
-   * Defines the ordering columns in a [[WindowSpec]].
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def orderBy(cols: Column*): WindowSpec = {
-    val sortOrder: Seq[SortOrder] = cols.map { col =>
-      col.expr match {
-        case expr: SortOrder =>
-          expr
-        case expr: Expression =>
-          SortOrder(expr, Ascending)
-      }
-    }
-    new WindowSpec(partitionSpec, sortOrder, frame)
-  }
-
-  /**
-   * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
-   *
-   * Both `start` and `end` are relative positions from the current row. For example, "0" means
-   * "current row", while "-1" means the row before the current row, and "5" means the fifth row
-   * after the current row.
-   *
-   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
-   * and `Window.currentRow` to specify special boundary values, rather than using integral
-   * values directly.
-   *
-   * A row based boundary is based on the position of the row within the partition.
-   * An offset indicates the number of rows above or below the current row, the frame for the
-   * current row starts or ends. For instance, given a row based sliding frame with a lower bound
-   * offset of -1 and a upper bound offset of +2. The frame for row with index 5 would range from
-   * index 4 to index 7.
-   *
-   * {{{
-   *   import org.apache.spark.sql.expressions.Window
-   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
-   *     .toDF("id", "category")
-   *   val byCategoryOrderedById =
-   *     Window.partitionBy($"category").orderBy($"id").rowsBetween(Window.currentRow, 1)
-   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
-   *
-   *   +---+--------+---+
-   *   | id|category|sum|
-   *   +---+--------+---+
-   *   |  1|       b|  3|
-   *   |  2|       b|  5|
-   *   |  3|       b|  3|
-   *   |  1|       a|  2|
-   *   |  1|       a|  3|
-   *   |  2|       a|  2|
-   *   +---+--------+---+
-   * }}}
-   *
-   * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value (`Window.unboundedPreceding`).
-   * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value (`Window.unboundedFollowing`).
-   * @since 1.4.0
-   */
-  // Note: when updating the doc for this method, also update Window.rowsBetween.
-  def rowsBetween(start: Long, end: Long): WindowSpec = {
-    val boundaryStart = start match {
-      case 0 => CurrentRow
-      case Long.MinValue => UnboundedPreceding
-      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
-      case x => throw QueryCompilationErrors.invalidBoundaryStartError(x)
-    }
-
-    val boundaryEnd = end match {
-      case 0 => CurrentRow
-      case Long.MaxValue => UnboundedFollowing
-      case x if Int.MinValue <= x && x <= Int.MaxValue => Literal(x.toInt)
-      case x => throw QueryCompilationErrors.invalidBoundaryEndError(x)
-    }
-
-    new WindowSpec(
-      partitionSpec,
-      orderSpec,
-      SpecifiedWindowFrame(RowFrame, boundaryStart, boundaryEnd))
-  }
-
-  /**
-   * Defines the frame boundaries, from `start` (inclusive) to `end` (inclusive).
-   *
-   * Both `start` and `end` are relative from the current row. For example, "0" means
-   * "current row", while "-1" means one off before the current row, and "5" means the five off
-   * after the current row.
-   *
-   * We recommend users use `Window.unboundedPreceding`, `Window.unboundedFollowing`,
-   * and `Window.currentRow` to specify special boundary values, rather than using long values
-   * directly.
-   *
-   * A range-based boundary is based on the actual value of the ORDER BY
-   * expression(s). An offset is used to alter the value of the ORDER BY expression, for
-   * instance if the current order by expression has a value of 10 and the lower bound offset
-   * is -3, the resulting lower bound for the current row will be 10 - 3 = 7. This however puts a
-   * number of constraints on the ORDER BY expressions: there can be only one expression and this
-   * expression must have a numerical data type. An exception can be made when the offset is
-   * unbounded, because no value modification is needed, in this case multiple and non-numeric
-   * ORDER BY expression are allowed.
-   *
-   * {{{
-   *   import org.apache.spark.sql.expressions.Window
-   *   val df = Seq((1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b"))
-   *     .toDF("id", "category")
-   *   val byCategoryOrderedById =
-   *     Window.partitionBy($"category").orderBy($"id").rangeBetween(Window.currentRow, 1)
-   *   df.withColumn("sum", sum($"id") over byCategoryOrderedById).show()
-   *
-   *   +---+--------+---+
-   *   | id|category|sum|
-   *   +---+--------+---+
-   *   |  1|       b|  3|
-   *   |  2|       b|  5|
-   *   |  3|       b|  3|
-   *   |  1|       a|  4|
-   *   |  1|       a|  4|
-   *   |  2|       a|  2|
-   *   +---+--------+---+
-   * }}}
-   *
-   * @param start boundary start, inclusive. The frame is unbounded if this is
-   *              the minimum long value (`Window.unboundedPreceding`).
-   * @param end boundary end, inclusive. The frame is unbounded if this is the
-   *            maximum long value (`Window.unboundedFollowing`).
-   * @since 1.4.0
-   */
-  // Note: when updating the doc for this method, also update Window.rangeBetween.
-  def rangeBetween(start: Long, end: Long): WindowSpec = {
-    val boundaryStart = start match {
-      case 0 => CurrentRow
-      case Long.MinValue => UnboundedPreceding
-      case x => Literal(x)
-    }
-
-    val boundaryEnd = end match {
-      case 0 => CurrentRow
-      case Long.MaxValue => UnboundedFollowing
-      case x => Literal(x)
-    }
-
-    new WindowSpec(
-      partitionSpec,
-      orderSpec,
-      SpecifiedWindowFrame(RangeFrame, boundaryStart, boundaryEnd))
-  }
-
-  /**
-   * Converts this [[WindowSpec]] into a [[Column]] with an aggregate expression.
-   */
-  private[sql] def withAggregate(aggregate: Column): Column = {
-    val spec = WindowSpecDefinition(partitionSpec, orderSpec, frame)
-    new Column(WindowExpression(aggregate.expr, spec))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
deleted file mode 100644
index 88303b1979a7b..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ /dev/null
@@ -1,8553 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql
-
-import java.util.Collections
-
-import scala.jdk.CollectionConverters._
-import scala.reflect.runtime.universe.TypeTag
-import scala.util.Try
-
-import org.apache.spark.annotation.Stable
-import org.apache.spark.sql.api.java._
-import org.apache.spark.sql.catalyst.ScalaReflection
-import org.apache.spark.sql.catalyst.analysis.{Star, UnresolvedFunction}
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate._
-import org.apache.spark.sql.catalyst.plans.logical.{BROADCAST, HintInfo, ResolvedHint}
-import org.apache.spark.sql.catalyst.util.CharVarcharUtils
-import org.apache.spark.sql.errors.{DataTypeErrors, QueryCompilationErrors}
-import org.apache.spark.sql.execution.SparkSqlParser
-import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator, UserDefinedFunction}
-import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types._
-import org.apache.spark.sql.types.DataType.parseTypeWithFallback
-import org.apache.spark.util.Utils
-
-/**
- * Commonly used functions available for DataFrame operations. Using functions defined here
- * provides a little bit more compile-time safety to make sure the function exists.
- *
- * You can call the functions defined here by two ways: `_FUNC_(...)` and
- * `functions.expr("_FUNC_(...)")`.
- *
- * As an example, `regr_count` is a function that is defined here. You can use
- * `regr_count(col("yCol", col("xCol")))` to invoke the `regr_count` function. This way the
- * programming language's compiler ensures `regr_count` exists and is of the proper form. You can
- * also use `expr("regr_count(yCol, xCol)")` function to invoke the same function. In this case,
- * Spark itself will ensure `regr_count` exists when it analyzes the query.
- *
- * You can find the entire list of functions at SQL API documentation of your Spark version, see
- * also <a href="https://spark.apache.org/docs/latest/api/sql/index.html">the latest list</a>
- *
- * This function APIs usually have methods with `Column` signature only because it can support not
- * only `Column` but also other types such as a native string. The other variants currently exist
- * for historical reasons.
- *
- * @groupname udf_funcs UDF, UDAF and UDT
- * @groupname agg_funcs Aggregate functions
- * @groupname datetime_funcs Date and Timestamp functions
- * @groupname sort_funcs Sort functions
- * @groupname normal_funcs Normal functions
- * @groupname math_funcs Mathematical functions
- * @groupname bitwise_funcs Bitwise functions
- * @groupname predicate_funcs Predicate functions
- * @groupname conditional_funcs Conditional functions
- * @groupname hash_funcs Hash functions
- * @groupname misc_funcs Misc functions
- * @groupname window_funcs Window functions
- * @groupname generator_funcs Generator functions
- * @groupname string_funcs String functions
- * @groupname collection_funcs Collection functions
- * @groupname array_funcs Array functions
- * @groupname map_funcs Map functions
- * @groupname struct_funcs Struct functions
- * @groupname csv_funcs CSV functions
- * @groupname json_funcs JSON functions
- * @groupname variant_funcs VARIANT functions
- * @groupname xml_funcs XML functions
- * @groupname url_funcs URL functions
- * @groupname partition_transforms Partition transform functions
- * @groupname Ungrouped Support functions for DataFrames
- * @since 1.3.0
- */
-@Stable
-// scalastyle:off
-object functions {
-// scalastyle:on
-
-  private def withExpr(expr: => Expression): Column = withOrigin {
-    Column(expr)
-  }
-
-  private def withAggregateFunction(
-    func: => AggregateFunction,
-    isDistinct: Boolean = false): Column = withOrigin {
-    Column(func.toAggregateExpression(isDistinct))
-  }
-
-  /**
-   * Returns a [[Column]] based on the given column name.
-   *
-   * @group normal_funcs
-   * @since 1.3.0
-   */
-  def col(colName: String): Column = Column(colName)
-
-  /**
-   * Returns a [[Column]] based on the given column name. Alias of [[col]].
-   *
-   * @group normal_funcs
-   * @since 1.3.0
-   */
-  def column(colName: String): Column = Column(colName)
-
-  /**
-   * Creates a [[Column]] of literal value.
-   *
-   * The passed in object is returned directly if it is already a [[Column]].
-   * If the object is a Scala Symbol, it is converted into a [[Column]] also.
-   * Otherwise, a new [[Column]] is created to represent the literal value.
-   *
-   * @group normal_funcs
-   * @since 1.3.0
-   */
-  def lit(literal: Any): Column = withOrigin {
-    literal match {
-      case c: Column => c
-      case s: Symbol => new ColumnName(s.name)
-      case _ =>
-        // This is different from `typedlit`. `typedlit` calls `Literal.create` to use
-        // `ScalaReflection` to get the type of `literal`. However, since we use `Any` in this
-        // method, `typedLit[Any](literal)` will always fail and fallback to `Literal.apply`. Hence,
-        // we can just manually call `Literal.apply` to skip the expensive `ScalaReflection` code.
-        // This is significantly better when there are many threads calling `lit` concurrently.
-      Column(Literal(literal))
-    }
-  }
-
-  /**
-   * Creates a [[Column]] of literal value.
-   *
-   * An alias of `typedlit`, and it is encouraged to use `typedlit` directly.
-   *
-   * @group normal_funcs
-   * @since 2.2.0
-   */
-  def typedLit[T : TypeTag](literal: T): Column = withOrigin {
-    typedlit(literal)
-  }
-
-  /**
-   * Creates a [[Column]] of literal value.
-   *
-   * The passed in object is returned directly if it is already a [[Column]].
-   * If the object is a Scala Symbol, it is converted into a [[Column]] also.
-   * Otherwise, a new [[Column]] is created to represent the literal value.
-   * The difference between this function and [[lit]] is that this function
-   * can handle parameterized scala types e.g.: List, Seq and Map.
-   *
-   * @note `typedlit` will call expensive Scala reflection APIs. `lit` is preferred if parameterized
-   * Scala types are not used.
-   *
-   * @group normal_funcs
-   * @since 3.2.0
-   */
-  def typedlit[T : TypeTag](literal: T): Column = withOrigin {
-    literal match {
-      case c: Column => c
-      case s: Symbol => new ColumnName(s.name)
-      case _ => Column(Literal.create(literal))
-    }
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Sort functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns a sort expression based on ascending order of the column.
-   * {{{
-   *   df.sort(asc("dept"), desc("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 1.3.0
-   */
-  def asc(columnName: String): Column = Column(columnName).asc
-
-  /**
-   * Returns a sort expression based on ascending order of the column,
-   * and null values return before non-null values.
-   * {{{
-   *   df.sort(asc_nulls_first("dept"), desc("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 2.1.0
-   */
-  def asc_nulls_first(columnName: String): Column = Column(columnName).asc_nulls_first
-
-  /**
-   * Returns a sort expression based on ascending order of the column,
-   * and null values appear after non-null values.
-   * {{{
-   *   df.sort(asc_nulls_last("dept"), desc("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 2.1.0
-   */
-  def asc_nulls_last(columnName: String): Column = Column(columnName).asc_nulls_last
-
-  /**
-   * Returns a sort expression based on the descending order of the column.
-   * {{{
-   *   df.sort(asc("dept"), desc("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 1.3.0
-   */
-  def desc(columnName: String): Column = Column(columnName).desc
-
-  /**
-   * Returns a sort expression based on the descending order of the column,
-   * and null values appear before non-null values.
-   * {{{
-   *   df.sort(asc("dept"), desc_nulls_first("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 2.1.0
-   */
-  def desc_nulls_first(columnName: String): Column = Column(columnName).desc_nulls_first
-
-  /**
-   * Returns a sort expression based on the descending order of the column,
-   * and null values appear after non-null values.
-   * {{{
-   *   df.sort(asc("dept"), desc_nulls_last("age"))
-   * }}}
-   *
-   * @group sort_funcs
-   * @since 2.1.0
-   */
-  def desc_nulls_last(columnName: String): Column = Column(columnName).desc_nulls_last
-
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Aggregate functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use approx_count_distinct", "2.1.0")
-  def approxCountDistinct(e: Column): Column = approx_count_distinct(e)
-
-  /**
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use approx_count_distinct", "2.1.0")
-  def approxCountDistinct(columnName: String): Column = approx_count_distinct(columnName)
-
-  /**
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use approx_count_distinct", "2.1.0")
-  def approxCountDistinct(e: Column, rsd: Double): Column = approx_count_distinct(e, rsd)
-
-  /**
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use approx_count_distinct", "2.1.0")
-  def approxCountDistinct(columnName: String, rsd: Double): Column = {
-    approx_count_distinct(Column(columnName), rsd)
-  }
-
-  /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
-   *
-   * @group agg_funcs
-   * @since 2.1.0
-   */
-  def approx_count_distinct(e: Column): Column = Column.fn("approx_count_distinct", e)
-
-  /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
-   *
-   * @group agg_funcs
-   * @since 2.1.0
-   */
-  def approx_count_distinct(columnName: String): Column = approx_count_distinct(column(columnName))
-
-  /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
-   *
-   * @param rsd maximum relative standard deviation allowed (default = 0.05)
-   *
-   * @group agg_funcs
-   * @since 2.1.0
-   */
-  def approx_count_distinct(e: Column, rsd: Double): Column = {
-    Column.fn("approx_count_distinct", e, lit(rsd))
-  }
-
-  /**
-   * Aggregate function: returns the approximate number of distinct items in a group.
-   *
-   * @param rsd maximum relative standard deviation allowed (default = 0.05)
-   *
-   * @group agg_funcs
-   * @since 2.1.0
-   */
-  def approx_count_distinct(columnName: String, rsd: Double): Column = {
-    approx_count_distinct(Column(columnName), rsd)
-  }
-
-  /**
-   * Aggregate function: returns the average of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def avg(e: Column): Column = Column.fn("avg", e)
-
-  /**
-   * Aggregate function: returns the average of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def avg(columnName: String): Column = avg(Column(columnName))
-
-  /**
-   * Aggregate function: returns a list of objects with duplicates.
-   *
-   * @note The function is non-deterministic because the order of collected results depends
-   * on the order of the rows which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def collect_list(e: Column): Column = Column.fn("collect_list", e)
-
-  /**
-   * Aggregate function: returns a list of objects with duplicates.
-   *
-   * @note The function is non-deterministic because the order of collected results depends
-   * on the order of the rows which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def collect_list(columnName: String): Column = collect_list(Column(columnName))
-
-  /**
-   * Aggregate function: returns a set of objects with duplicate elements eliminated.
-   *
-   * @note The function is non-deterministic because the order of collected results depends
-   * on the order of the rows which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def collect_set(e: Column): Column = Column.fn("collect_set", e)
-
-  /**
-   * Aggregate function: returns a set of objects with duplicate elements eliminated.
-   *
-   * @note The function is non-deterministic because the order of collected results depends
-   * on the order of the rows which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def collect_set(columnName: String): Column = collect_set(Column(columnName))
-
-  /**
-   * Returns a count-min sketch of a column with the given esp, confidence and seed. The result
-   * is an array of bytes, which can be deserialized to a `CountMinSketch` before usage.
-   * Count-min sketch is a probabilistic data structure used for cardinality estimation using
-   * sub-linear space.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def count_min_sketch(e: Column, eps: Column, confidence: Column, seed: Column): Column =
-    Column.fn("count_min_sketch", e, eps, confidence, seed)
-
-  private[spark] def collect_top_k(e: Column, num: Int, reverse: Boolean): Column =
-    withAggregateFunction { CollectTopK(e.expr, num, reverse) }
-
-  /**
-   * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def corr(column1: Column, column2: Column): Column = Column.fn("corr", column1, column2)
-
-  /**
-   * Aggregate function: returns the Pearson Correlation Coefficient for two columns.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def corr(columnName1: String, columnName2: String): Column = {
-    corr(Column(columnName1), Column(columnName2))
-  }
-
-  /**
-   * Aggregate function: returns the number of items in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def count(e: Column): Column = {
-    val withoutStar = e.expr match {
-      // Turn count(*) into count(1)
-      case _: Star => Column(Literal(1))
-      case _ => e
-    }
-    Column.fn("count", withoutStar)
-  }
-
-  /**
-   * Aggregate function: returns the number of items in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def count(columnName: String): TypedColumn[Any, Long] =
-    count(Column(columnName)).as(ExpressionEncoder[Long]())
-
-  /**
-   * Aggregate function: returns the number of distinct items in a group.
-   *
-   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @scala.annotation.varargs
-  def countDistinct(expr: Column, exprs: Column*): Column = count_distinct(expr, exprs: _*)
-
-  /**
-   * Aggregate function: returns the number of distinct items in a group.
-   *
-   * An alias of `count_distinct`, and it is encouraged to use `count_distinct` directly.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @scala.annotation.varargs
-  def countDistinct(columnName: String, columnNames: String*): Column =
-    count_distinct(Column(columnName), columnNames.map(Column.apply) : _*)
-
-  /**
-   * Aggregate function: returns the number of distinct items in a group.
-   *
-   * @group agg_funcs
-   * @since 3.2.0
-   */
-  @scala.annotation.varargs
-  def count_distinct(expr: Column, exprs: Column*): Column =
-    Column.fn("count", isDistinct = true, expr +: exprs: _*)
-
-  /**
-   * Aggregate function: returns the population covariance for two columns.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def covar_pop(column1: Column, column2: Column): Column =
-    Column.fn("covar_pop", column1, column2)
-
-  /**
-   * Aggregate function: returns the population covariance for two columns.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def covar_pop(columnName1: String, columnName2: String): Column = {
-    covar_pop(Column(columnName1), Column(columnName2))
-  }
-
-  /**
-   * Aggregate function: returns the sample covariance for two columns.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def covar_samp(column1: Column, column2: Column): Column =
-    Column.fn("covar_samp", column1, column2)
-
-  /**
-   * Aggregate function: returns the sample covariance for two columns.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def covar_samp(columnName1: String, columnName2: String): Column = {
-    covar_samp(Column(columnName1), Column(columnName2))
-  }
-
-  /**
-   * Aggregate function: returns the first value in a group.
-   *
-   * The function by default returns the first values it sees. It will return the first non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def first(e: Column, ignoreNulls: Boolean): Column =
-    Column.fn("first", false, ignoreNulls, e)
-
-  /**
-   * Aggregate function: returns the first value of a column in a group.
-   *
-   * The function by default returns the first values it sees. It will return the first non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def first(columnName: String, ignoreNulls: Boolean): Column = {
-    first(Column(columnName), ignoreNulls)
-  }
-
-  /**
-   * Aggregate function: returns the first value in a group.
-   *
-   * The function by default returns the first values it sees. It will return the first non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def first(e: Column): Column = first(e, ignoreNulls = false)
-
-  /**
-   * Aggregate function: returns the first value of a column in a group.
-   *
-   * The function by default returns the first values it sees. It will return the first non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def first(columnName: String): Column = first(Column(columnName))
-
-  /**
-   * Aggregate function: returns the first value in a group.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def first_value(e: Column): Column = Column.fn("first_value", e)
-
-  /**
-   * Aggregate function: returns the first value in a group.
-   *
-   * The function by default returns the first values it sees. It will return the first non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def first_value(e: Column, ignoreNulls: Column): Column =
-    Column.fn("first_value", e, ignoreNulls)
-
-  /**
-   * Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
-   * or not, returns 1 for aggregated or 0 for not aggregated in the result set.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def grouping(e: Column): Column = Column.fn("grouping", e)
-
-  /**
-   * Aggregate function: indicates whether a specified column in a GROUP BY list is aggregated
-   * or not, returns 1 for aggregated or 0 for not aggregated in the result set.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def grouping(columnName: String): Column = grouping(Column(columnName))
-
-  /**
-   * Aggregate function: returns the level of grouping, equals to
-   *
-   * {{{
-   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
-   * }}}
-   *
-   * @note The list of columns should match with grouping columns exactly, or empty (means all the
-   * grouping columns).
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def grouping_id(cols: Column*): Column = Column.fn("grouping_id", cols: _*)
-
-  /**
-   * Aggregate function: returns the level of grouping, equals to
-   *
-   * {{{
-   *   (grouping(c1) <<; (n-1)) + (grouping(c2) <<; (n-2)) + ... + grouping(cn)
-   * }}}
-   *
-   * @note The list of columns should match with grouping columns exactly.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def grouping_id(colName: String, colNames: String*): Column = {
-    grouping_id((Seq(colName) ++ colNames).map(n => Column(n)) : _*)
-  }
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch configured with lgConfigK arg.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_agg(e: Column, lgConfigK: Column): Column =
-    Column.fn("hll_sketch_agg", e, lgConfigK)
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch configured with lgConfigK arg.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_agg(e: Column, lgConfigK: Int): Column =
-    Column.fn("hll_sketch_agg", e, lit(lgConfigK))
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch configured with lgConfigK arg.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_agg(columnName: String, lgConfigK: Int): Column = {
-    hll_sketch_agg(Column(columnName), lgConfigK)
-  }
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch configured with default lgConfigK value.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_agg(e: Column): Column =
-    Column.fn("hll_sketch_agg", e)
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch configured with default lgConfigK value.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_agg(columnName: String): Column = {
-    hll_sketch_agg(Column(columnName))
-  }
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch, generated by merging previously created Datasketches HllSketch instances
-   * via a Datasketches Union instance. Throws an exception if sketches have different
-   * lgConfigK values and allowDifferentLgConfigK is set to false.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_union_agg(e: Column, allowDifferentLgConfigK: Column): Column =
-    Column.fn("hll_union_agg", e, allowDifferentLgConfigK)
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch, generated by merging previously created Datasketches HllSketch instances
-   * via a Datasketches Union instance. Throws an exception if sketches have different
-   * lgConfigK values and allowDifferentLgConfigK is set to false.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_union_agg(e: Column, allowDifferentLgConfigK: Boolean): Column =
-    Column.fn("hll_union_agg", e, lit(allowDifferentLgConfigK))
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch, generated by merging previously created Datasketches HllSketch instances
-   * via a Datasketches Union instance. Throws an exception if sketches have different
-   * lgConfigK values and allowDifferentLgConfigK is set to false.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_union_agg(columnName: String, allowDifferentLgConfigK: Boolean): Column = {
-    hll_union_agg(Column(columnName), allowDifferentLgConfigK)
-  }
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch, generated by merging previously created Datasketches HllSketch instances
-   * via a Datasketches Union instance. Throws an exception if sketches have different
-   * lgConfigK values.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_union_agg(e: Column): Column =
-    Column.fn("hll_union_agg", e)
-
-  /**
-   * Aggregate function: returns the updatable binary representation of the Datasketches
-   * HllSketch, generated by merging previously created Datasketches HllSketch instances
-   * via a Datasketches Union instance. Throws an exception if sketches have different
-   * lgConfigK values.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def hll_union_agg(columnName: String): Column = {
-    hll_union_agg(Column(columnName))
-  }
-
-  /**
-   * Aggregate function: returns the kurtosis of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def kurtosis(e: Column): Column = Column.fn("kurtosis", e)
-
-  /**
-   * Aggregate function: returns the kurtosis of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def kurtosis(columnName: String): Column = kurtosis(Column(columnName))
-
-  /**
-   * Aggregate function: returns the last value in a group.
-   *
-   * The function by default returns the last values it sees. It will return the last non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def last(e: Column, ignoreNulls: Boolean): Column =
-    Column.fn("last", false, ignoreNulls, e)
-
-  /**
-   * Aggregate function: returns the last value of the column in a group.
-   *
-   * The function by default returns the last values it sees. It will return the last non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 2.0.0
-   */
-  def last(columnName: String, ignoreNulls: Boolean): Column = {
-    last(Column(columnName), ignoreNulls)
-  }
-
-  /**
-   * Aggregate function: returns the last value in a group.
-   *
-   * The function by default returns the last values it sees. It will return the last non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def last(e: Column): Column = last(e, ignoreNulls = false)
-
-  /**
-   * Aggregate function: returns the last value of the column in a group.
-   *
-   * The function by default returns the last values it sees. It will return the last non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def last(columnName: String): Column = last(Column(columnName), ignoreNulls = false)
-
-  /**
-   * Aggregate function: returns the last value in a group.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def last_value(e: Column): Column = Column.fn("last_value", e)
-
-  /**
-   * Aggregate function: returns the last value in a group.
-   *
-   * The function by default returns the last values it sees. It will return the last non-null
-   * value it sees when ignoreNulls is set to true. If all values are null, then null is returned.
-   *
-   * @note The function is non-deterministic because its results depends on the order of the rows
-   * which may be non-deterministic after a shuffle.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def last_value(e: Column, ignoreNulls: Column): Column =
-    Column.fn("last_value", e, ignoreNulls)
-
-  /**
-   * Aggregate function: returns the most frequent value in a group.
-   *
-   * @group agg_funcs
-   * @since 3.4.0
-   */
-  def mode(e: Column): Column = Column.fn("mode", e)
-
-  /**
-   * Aggregate function: returns the most frequent value in a group.
-   *
-   * When multiple values have the same greatest frequency then either any of values is returned
-   * if deterministic is false or is not defined, or the lowest value is returned if deterministic
-   * is true.
-   *
-   * @group agg_funcs
-   * @since 4.0.0
-   */
-  def mode(e: Column, deterministic: Boolean): Column = Column.fn("mode", e, lit(deterministic))
-
-  /**
-   * Aggregate function: returns the maximum value of the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def max(e: Column): Column = Column.fn("max", e)
-
-  /**
-   * Aggregate function: returns the maximum value of the column in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def max(columnName: String): Column = max(Column(columnName))
-
-  /**
-   * Aggregate function: returns the value associated with the maximum value of ord.
-   *
-   * @note The function is non-deterministic so the output order can be different for
-   * those associated the same values of `e`.
-   *
-   * @group agg_funcs
-   * @since 3.3.0
-   */
-  def max_by(e: Column, ord: Column): Column = Column.fn("max_by", e, ord)
-
-  /**
-   * Aggregate function: returns the average of the values in a group.
-   * Alias for avg.
-   *
-   * @group agg_funcs
-   * @since 1.4.0
-   */
-  def mean(e: Column): Column = avg(e)
-
-  /**
-   * Aggregate function: returns the average of the values in a group.
-   * Alias for avg.
-   *
-   * @group agg_funcs
-   * @since 1.4.0
-   */
-  def mean(columnName: String): Column = avg(columnName)
-
-  /**
-   * Aggregate function: returns the median of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 3.4.0
-   */
-  def median(e: Column): Column = Column.fn("median", e)
-
-  /**
-   * Aggregate function: returns the minimum value of the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def min(e: Column): Column = Column.fn("min", e)
-
-  /**
-   * Aggregate function: returns the minimum value of the column in a group.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def min(columnName: String): Column = min(Column(columnName))
-
-  /**
-   * Aggregate function: returns the value associated with the minimum value of ord.
-   *
-   * @note The function is non-deterministic so the output order can be different for
-   * those associated the same values of `e`.
-   *
-   * @group agg_funcs
-   * @since 3.3.0
-   */
-  def min_by(e: Column, ord: Column): Column = Column.fn("min_by", e, ord)
-
-  /**
-   * Aggregate function: returns the exact percentile(s) of numeric column `expr` at the
-   * given percentage(s) with value range in [0.0, 1.0].
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def percentile(e: Column, percentage: Column): Column = Column.fn("percentile", e, percentage)
-
-  /**
-   * Aggregate function: returns the exact percentile(s) of numeric column `expr` at the
-   * given percentage(s) with value range in [0.0, 1.0].
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def percentile(e: Column, percentage: Column, frequency: Column): Column =
-    Column.fn("percentile", e, percentage, frequency)
-
-  /**
-   * Aggregate function: returns the approximate `percentile` of the numeric column `col` which
-   * is the smallest value in the ordered `col` values (sorted from least to greatest) such that
-   * no more than `percentage` of `col` values is less than the value or equal to that value.
-   *
-   * If percentage is an array, each value must be between 0.0 and 1.0.
-   * If it is a single floating point value, it must be between 0.0 and 1.0.
-   *
-   * The accuracy parameter is a positive numeric literal
-   * which controls approximation accuracy at the cost of memory.
-   * Higher value of accuracy yields better accuracy, 1.0/accuracy
-   * is the relative error of the approximation.
-   *
-   * @group agg_funcs
-   * @since 3.1.0
-   */
-  def percentile_approx(e: Column, percentage: Column, accuracy: Column): Column =
-    Column.fn("percentile_approx", e, percentage, accuracy)
-
-  /**
-   * Aggregate function: returns the approximate `percentile` of the numeric column `col` which
-   * is the smallest value in the ordered `col` values (sorted from least to greatest) such that
-   * no more than `percentage` of `col` values is less than the value or equal to that value.
-   *
-   * If percentage is an array, each value must be between 0.0 and 1.0.
-   * If it is a single floating point value, it must be between 0.0 and 1.0.
-   *
-   * The accuracy parameter is a positive numeric literal
-   * which controls approximation accuracy at the cost of memory.
-   * Higher value of accuracy yields better accuracy, 1.0/accuracy
-   * is the relative error of the approximation.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def approx_percentile(e: Column, percentage: Column, accuracy: Column): Column = {
-    Column.fn("approx_percentile", e, percentage, accuracy)
-  }
-
-  /**
-   * Aggregate function: returns the product of all numerical elements in a group.
-   *
-   * @group agg_funcs
-   * @since 3.2.0
-   */
-  def product(e: Column): Column =
-    withAggregateFunction { new Product(e.expr) }
-
-  /**
-   * Aggregate function: returns the skewness of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def skewness(e: Column): Column = Column.fn("skewness", e)
-
-  /**
-   * Aggregate function: returns the skewness of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def skewness(columnName: String): Column = skewness(Column(columnName))
-
-  /**
-   * Aggregate function: alias for `stddev_samp`.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def std(e: Column): Column = Column.fn("std", e)
-
-  /**
-   * Aggregate function: alias for `stddev_samp`.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev(e: Column): Column = Column.fn("stddev", e)
-
-  /**
-   * Aggregate function: alias for `stddev_samp`.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev(columnName: String): Column = stddev(Column(columnName))
-
-  /**
-   * Aggregate function: returns the sample standard deviation of
-   * the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev_samp(e: Column): Column = Column.fn("stddev_samp", e)
-
-  /**
-   * Aggregate function: returns the sample standard deviation of
-   * the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev_samp(columnName: String): Column = stddev_samp(Column(columnName))
-
-  /**
-   * Aggregate function: returns the population standard deviation of
-   * the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev_pop(e: Column): Column = Column.fn("stddev_pop", e)
-
-  /**
-   * Aggregate function: returns the population standard deviation of
-   * the expression in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def stddev_pop(columnName: String): Column = stddev_pop(Column(columnName))
-
-  /**
-   * Aggregate function: returns the sum of all values in the expression.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def sum(e: Column): Column = Column.fn("sum", e)
-
-  /**
-   * Aggregate function: returns the sum of all values in the given column.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  def sum(columnName: String): Column = sum(Column(columnName))
-
-  /**
-   * Aggregate function: returns the sum of distinct values in the expression.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use sum_distinct", "3.2.0")
-  def sumDistinct(e: Column): Column = sum_distinct(e)
-
-  /**
-   * Aggregate function: returns the sum of distinct values in the expression.
-   *
-   * @group agg_funcs
-   * @since 1.3.0
-   */
-  @deprecated("Use sum_distinct", "3.2.0")
-  def sumDistinct(columnName: String): Column = sum_distinct(Column(columnName))
-
-  /**
-   * Aggregate function: returns the sum of distinct values in the expression.
-   *
-   * @group agg_funcs
-   * @since 3.2.0
-   */
-  def sum_distinct(e: Column): Column = Column.fn("sum", isDistinct = true, e)
-
-  /**
-   * Aggregate function: alias for `var_samp`.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def variance(e: Column): Column = Column.fn("variance", e)
-
-  /**
-   * Aggregate function: alias for `var_samp`.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def variance(columnName: String): Column = variance(Column(columnName))
-
-  /**
-   * Aggregate function: returns the unbiased variance of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def var_samp(e: Column): Column = Column.fn("var_samp", e)
-
-  /**
-   * Aggregate function: returns the unbiased variance of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def var_samp(columnName: String): Column = var_samp(Column(columnName))
-
-  /**
-   * Aggregate function: returns the population variance of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def var_pop(e: Column): Column = Column.fn("var_pop", e)
-
-  /**
-   * Aggregate function: returns the population variance of the values in a group.
-   *
-   * @group agg_funcs
-   * @since 1.6.0
-   */
-  def var_pop(columnName: String): Column = var_pop(Column(columnName))
-
-  /**
-   * Aggregate function: returns the average of the independent variable for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_avgx(y: Column, x: Column): Column = Column.fn("regr_avgx", y, x)
-
-  /**
-   * Aggregate function: returns the average of the independent variable for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_avgy(y: Column, x: Column): Column = Column.fn("regr_avgy", y, x)
-
-  /**
-   * Aggregate function: returns the number of non-null number pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_count(y: Column, x: Column): Column = Column.fn("regr_count", y, x)
-
-  /**
-   * Aggregate function: returns the intercept of the univariate linear regression line
-   * for non-null pairs in a group, where `y` is the dependent variable and
-   * `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_intercept(y: Column, x: Column): Column = Column.fn("regr_intercept", y, x)
-
-  /**
-   * Aggregate function: returns the coefficient of determination for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_r2(y: Column, x: Column): Column = Column.fn("regr_r2", y, x)
-
-  /**
-   * Aggregate function: returns the slope of the linear regression line for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_slope(y: Column, x: Column): Column = Column.fn("regr_slope", y, x)
-
-  /**
-   * Aggregate function: returns REGR_COUNT(y, x) * VAR_POP(x) for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_sxx(y: Column, x: Column): Column = Column.fn("regr_sxx", y, x)
-
-  /**
-   * Aggregate function: returns REGR_COUNT(y, x) * COVAR_POP(y, x) for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_sxy(y: Column, x: Column): Column = Column.fn("regr_sxy", y, x)
-
-  /**
-   * Aggregate function: returns REGR_COUNT(y, x) * VAR_POP(y) for non-null pairs
-   * in a group, where `y` is the dependent variable and `x` is the independent variable.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def regr_syy(y: Column, x: Column): Column = Column.fn("regr_syy", y, x)
-
-  /**
-   * Aggregate function: returns some value of `e` for a group of rows.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def any_value(e: Column): Column = Column.fn("any_value", e)
-
-  /**
-   * Aggregate function: returns some value of `e` for a group of rows.
-   * If `isIgnoreNull` is true, returns only non-null values.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def any_value(e: Column, ignoreNulls: Column): Column =
-    Column.fn("any_value", e, ignoreNulls)
-
-  /**
-   * Aggregate function: returns the number of `TRUE` values for the expression.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def count_if(e: Column): Column = Column.fn("count_if", e)
-
-  /**
-   * Aggregate function: computes a histogram on numeric 'expr' using nb bins.
-   * The return value is an array of (x,y) pairs representing the centers of the
-   * histogram's bins. As the value of 'nb' is increased, the histogram approximation
-   * gets finer-grained, but may yield artifacts around outliers. In practice, 20-40
-   * histogram bins appear to work well, with more bins being required for skewed or
-   * smaller datasets. Note that this function creates a histogram with non-uniform
-   * bin widths. It offers no guarantees in terms of the mean-squared-error of the
-   * histogram, but in practice is comparable to the histograms produced by the R/S-Plus
-   * statistical computing packages. Note: the output type of the 'x' field in the return value is
-   * propagated from the input value consumed in the aggregate function.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def histogram_numeric(e: Column, nBins: Column): Column =
-    Column.fn("histogram_numeric", e, nBins)
-
-  /**
-   * Aggregate function: returns true if all values of `e` are true.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def every(e: Column): Column = Column.fn("every", e)
-
-  /**
-   * Aggregate function: returns true if all values of `e` are true.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bool_and(e: Column): Column = Column.fn("bool_and", e)
-
-  /**
-   * Aggregate function: returns true if at least one value of `e` is true.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def some(e: Column): Column = Column.fn("some", e)
-
-  /**
-   * Aggregate function: returns true if at least one value of `e` is true.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def any(e: Column): Column = Column.fn("any", e)
-
-  /**
-   * Aggregate function: returns true if at least one value of `e` is true.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bool_or(e: Column): Column = Column.fn("bool_or", e)
-
-  /**
-   * Aggregate function: returns the bitwise AND of all non-null input values, or null if none.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bit_and(e: Column): Column = Column.fn("bit_and", e)
-
-  /**
-   * Aggregate function: returns the bitwise OR of all non-null input values, or null if none.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bit_or(e: Column): Column = Column.fn("bit_or", e)
-
-  /**
-   * Aggregate function: returns the bitwise XOR of all non-null input values, or null if none.
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bit_xor(e: Column): Column = Column.fn("bit_xor", e)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Window functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Window function: returns the cumulative distribution of values within a window partition,
-   * i.e. the fraction of rows that are below the current row.
-   *
-   * {{{
-   *   N = total number of rows in the partition
-   *   cumeDist(x) = number of values before (and including) x / N
-   * }}}
-   *
-   * @group window_funcs
-   * @since 1.6.0
-   */
-  def cume_dist(): Column = Column.fn("cume_dist")
-
-  /**
-   * Window function: returns the rank of rows within a window partition, without any gaps.
-   *
-   * The difference between rank and dense_rank is that denseRank leaves no gaps in ranking
-   * sequence when there are ties. That is, if you were ranking a competition using dense_rank
-   * and had three people tie for second place, you would say that all three were in second
-   * place and that the next person came in third. Rank would give me sequential numbers, making
-   * the person that came in third place (after the ties) would register as coming in fifth.
-   *
-   * This is equivalent to the DENSE_RANK function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.6.0
-   */
-  def dense_rank(): Column = Column.fn("dense_rank")
-
-  /**
-   * Window function: returns the value that is `offset` rows before the current row, and
-   * `null` if there is less than `offset` rows before the current row. For example,
-   * an `offset` of one will return the previous row at any given point in the window partition.
-   *
-   * This is equivalent to the LAG function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lag(e: Column, offset: Int): Column = lag(e, offset, null)
-
-  /**
-   * Window function: returns the value that is `offset` rows before the current row, and
-   * `null` if there is less than `offset` rows before the current row. For example,
-   * an `offset` of one will return the previous row at any given point in the window partition.
-   *
-   * This is equivalent to the LAG function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lag(columnName: String, offset: Int): Column = lag(columnName, offset, null)
-
-  /**
-   * Window function: returns the value that is `offset` rows before the current row, and
-   * `defaultValue` if there is less than `offset` rows before the current row. For example,
-   * an `offset` of one will return the previous row at any given point in the window partition.
-   *
-   * This is equivalent to the LAG function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lag(columnName: String, offset: Int, defaultValue: Any): Column = {
-    lag(Column(columnName), offset, defaultValue)
-  }
-
-  /**
-   * Window function: returns the value that is `offset` rows before the current row, and
-   * `defaultValue` if there is less than `offset` rows before the current row. For example,
-   * an `offset` of one will return the previous row at any given point in the window partition.
-   *
-   * This is equivalent to the LAG function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lag(e: Column, offset: Int, defaultValue: Any): Column = {
-    lag(e, offset, defaultValue, false)
-  }
-
-  /**
-   * Window function: returns the value that is `offset` rows before the current row, and
-   * `defaultValue` if there is less than `offset` rows before the current row. `ignoreNulls`
-   * determines whether null values of row are included in or eliminated from the calculation.
-   * For example, an `offset` of one will return the previous row at any given point in the
-   * window partition.
-   *
-   * This is equivalent to the LAG function in SQL.
-   *
-   * @group window_funcs
-   * @since 3.2.0
-   */
-  def lag(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
-    Column.fn("lag", false, ignoreNulls, e, lit(offset), lit(defaultValue))
-
-  /**
-   * Window function: returns the value that is `offset` rows after the current row, and
-   * `null` if there is less than `offset` rows after the current row. For example,
-   * an `offset` of one will return the next row at any given point in the window partition.
-   *
-   * This is equivalent to the LEAD function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lead(columnName: String, offset: Int): Column = { lead(columnName, offset, null) }
-
-  /**
-   * Window function: returns the value that is `offset` rows after the current row, and
-   * `null` if there is less than `offset` rows after the current row. For example,
-   * an `offset` of one will return the next row at any given point in the window partition.
-   *
-   * This is equivalent to the LEAD function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lead(e: Column, offset: Int): Column = { lead(e, offset, null) }
-
-  /**
-   * Window function: returns the value that is `offset` rows after the current row, and
-   * `defaultValue` if there is less than `offset` rows after the current row. For example,
-   * an `offset` of one will return the next row at any given point in the window partition.
-   *
-   * This is equivalent to the LEAD function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lead(columnName: String, offset: Int, defaultValue: Any): Column = {
-    lead(Column(columnName), offset, defaultValue)
-  }
-
-  /**
-   * Window function: returns the value that is `offset` rows after the current row, and
-   * `defaultValue` if there is less than `offset` rows after the current row. For example,
-   * an `offset` of one will return the next row at any given point in the window partition.
-   *
-   * This is equivalent to the LEAD function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def lead(e: Column, offset: Int, defaultValue: Any): Column = {
-    lead(e, offset, defaultValue, false)
-  }
-
-  /**
-   * Window function: returns the value that is `offset` rows after the current row, and
-   * `defaultValue` if there is less than `offset` rows after the current row. `ignoreNulls`
-   * determines whether null values of row are included in or eliminated from the calculation.
-   * The default value of `ignoreNulls` is false. For example, an `offset` of one will return
-   * the next row at any given point in the window partition.
-   *
-   * This is equivalent to the LEAD function in SQL.
-   *
-   * @group window_funcs
-   * @since 3.2.0
-   */
-  def lead(e: Column, offset: Int, defaultValue: Any, ignoreNulls: Boolean): Column =
-    Column.fn("lead", false, ignoreNulls, e, lit(offset), lit(defaultValue))
-
-  /**
-   * Window function: returns the value that is the `offset`th row of the window frame
-   * (counting from 1), and `null` if the size of window frame is less than `offset` rows.
-   *
-   * It will return the `offset`th non-null value it sees when ignoreNulls is set to true.
-   * If all values are null, then null is returned.
-   *
-   * This is equivalent to the nth_value function in SQL.
-   *
-   * @group window_funcs
-   * @since 3.1.0
-   */
-  def nth_value(e: Column, offset: Int, ignoreNulls: Boolean): Column =
-    Column.fn("nth_value", false, ignoreNulls, e, lit(offset))
-
-  /**
-   * Window function: returns the value that is the `offset`th row of the window frame
-   * (counting from 1), and `null` if the size of window frame is less than `offset` rows.
-   *
-   * This is equivalent to the nth_value function in SQL.
-   *
-   * @group window_funcs
-   * @since 3.1.0
-   */
-  def nth_value(e: Column, offset: Int): Column = nth_value(e, offset, false)
-
-  /**
-   * Window function: returns the ntile group id (from 1 to `n` inclusive) in an ordered window
-   * partition. For example, if `n` is 4, the first quarter of the rows will get value 1, the second
-   * quarter will get 2, the third quarter will get 3, and the last quarter will get 4.
-   *
-   * This is equivalent to the NTILE function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def ntile(n: Int): Column = Column.fn("ntile", lit(n))
-
-  /**
-   * Window function: returns the relative rank (i.e. percentile) of rows within a window partition.
-   *
-   * This is computed by:
-   * {{{
-   *   (rank of row in its partition - 1) / (number of rows in the partition - 1)
-   * }}}
-   *
-   * This is equivalent to the PERCENT_RANK function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.6.0
-   */
-  def percent_rank(): Column = Column.fn("percent_rank")
-
-  /**
-   * Window function: returns the rank of rows within a window partition.
-   *
-   * The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking
-   * sequence when there are ties. That is, if you were ranking a competition using dense_rank
-   * and had three people tie for second place, you would say that all three were in second
-   * place and that the next person came in third. Rank would give me sequential numbers, making
-   * the person that came in third place (after the ties) would register as coming in fifth.
-   *
-   * This is equivalent to the RANK function in SQL.
-   *
-   * @group window_funcs
-   * @since 1.4.0
-   */
-  def rank(): Column = Column.fn("rank")
-
-  /**
-   * Window function: returns a sequential number starting at 1 within a window partition.
-   *
-   * @group window_funcs
-   * @since 1.6.0
-   */
-  def row_number(): Column = Column.fn("row_number")
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Non-aggregate functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Creates a new array column. The input columns must all have the same data type.
-   *
-   * @group array_funcs
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def array(cols: Column*): Column = Column.fn("array", cols: _*)
-
-  /**
-   * Creates a new array column. The input columns must all have the same data type.
-   *
-   * @group array_funcs
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def array(colName: String, colNames: String*): Column = {
-    array((colName +: colNames).map(col) : _*)
-  }
-
-  /**
-   * Creates a new map column. The input columns must be grouped as key-value pairs, e.g.
-   * (key1, value1, key2, value2, ...). The key columns must all have the same data type, and can't
-   * be null. The value columns must all have the same data type.
-   *
-   * @group map_funcs
-   * @since 2.0
-   */
-  @scala.annotation.varargs
-  def map(cols: Column*): Column = Column.fn("map", cols: _*)
-
-  /**
-   * Creates a struct with the given field names and values.
-   *
-   * @group struct_funcs
-   * @since 3.5.0
-   */
-  def named_struct(cols: Column*): Column = Column.fn("named_struct", cols: _*)
-
-  /**
-   * Creates a new map column. The array in the first column is used for keys. The array in the
-   * second column is used for values. All elements in the array for key should not be null.
-   *
-   * @group map_funcs
-   * @since 2.4
-   */
-  def map_from_arrays(keys: Column, values: Column): Column =
-    Column.fn("map_from_arrays", keys, values)
-
-  /**
-   * Creates a map after splitting the text into key/value pairs using delimiters.
-   * Both `pairDelim` and `keyValueDelim` are treated as regular expressions.
-   *
-   * @group map_funcs
-   * @since 3.5.0
-   */
-  def str_to_map(text: Column, pairDelim: Column, keyValueDelim: Column): Column =
-    Column.fn("str_to_map", text, pairDelim, keyValueDelim)
-
-  /**
-   * Creates a map after splitting the text into key/value pairs using delimiters.
-   * The `pairDelim` is treated as regular expressions.
-   *
-   * @group map_funcs
-   * @since 3.5.0
-   */
-  def str_to_map(text: Column, pairDelim: Column): Column =
-    Column.fn("str_to_map", text, pairDelim)
-
-  /**
-   * Creates a map after splitting the text into key/value pairs using delimiters.
-   *
-   * @group map_funcs
-   * @since 3.5.0
-   */
-  def str_to_map(text: Column): Column = Column.fn("str_to_map", text)
-
-  /**
-   * Marks a DataFrame as small enough for use in broadcast joins.
-   *
-   * The following example marks the right DataFrame for broadcast hash join using `joinKey`.
-   * {{{
-   *   // left and right are DataFrames
-   *   left.join(broadcast(right), "joinKey")
-   * }}}
-   *
-   * @group normal_funcs
-   * @since 1.5.0
-   */
-  def broadcast[T](df: Dataset[T]): Dataset[T] = {
-    Dataset[T](df.sparkSession,
-      ResolvedHint(df.logicalPlan, HintInfo(strategy = Some(BROADCAST))))(df.exprEnc)
-  }
-
-  /**
-   * Returns the first column that is not null, or null if all inputs are null.
-   *
-   * For example, `coalesce(a, b, c)` will return a if a is not null,
-   * or b if a is null and b is not null, or c if both a and b are null but c is not null.
-   *
-   * @group conditional_funcs
-   * @since 1.3.0
-   */
-  @scala.annotation.varargs
-  def coalesce(e: Column*): Column = Column.fn("coalesce", e: _*)
-
-  /**
-   * Creates a string column for the file name of the current Spark task.
-   *
-   * @group misc_funcs
-   * @since 1.6.0
-   */
-  def input_file_name(): Column = Column.fn("input_file_name")
-
-  /**
-   * Return true iff the column is NaN.
-   *
-   * @group predicate_funcs
-   * @since 1.6.0
-   */
-  def isnan(e: Column): Column = e.isNaN
-
-  /**
-   * Return true iff the column is null.
-   *
-   * @group predicate_funcs
-   * @since 1.6.0
-   */
-  def isnull(e: Column): Column = e.isNull
-
-  /**
-   * A column expression that generates monotonically increasing 64-bit integers.
-   *
-   * The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
-   * The current implementation puts the partition ID in the upper 31 bits, and the record number
-   * within each partition in the lower 33 bits. The assumption is that the data frame has
-   * less than 1 billion partitions, and each partition has less than 8 billion records.
-   *
-   * As an example, consider a `DataFrame` with two partitions, each with 3 records.
-   * This expression would return the following IDs:
-   *
-   * {{{
-   * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
-   * }}}
-   *
-   * @group misc_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use monotonically_increasing_id()", "2.0.0")
-  def monotonicallyIncreasingId(): Column = monotonically_increasing_id()
-
-  /**
-   * A column expression that generates monotonically increasing 64-bit integers.
-   *
-   * The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive.
-   * The current implementation puts the partition ID in the upper 31 bits, and the record number
-   * within each partition in the lower 33 bits. The assumption is that the data frame has
-   * less than 1 billion partitions, and each partition has less than 8 billion records.
-   *
-   * As an example, consider a `DataFrame` with two partitions, each with 3 records.
-   * This expression would return the following IDs:
-   *
-   * {{{
-   * 0, 1, 2, 8589934592 (1L << 33), 8589934593, 8589934594.
-   * }}}
-   *
-   * @group misc_funcs
-   * @since 1.6.0
-   */
-  def monotonically_increasing_id(): Column = Column.fn("monotonically_increasing_id")
-
-  /**
-   * Returns col1 if it is not NaN, or col2 if col1 is NaN.
-   *
-   * Both inputs should be floating point columns (DoubleType or FloatType).
-   *
-   * @group conditional_funcs
-   * @since 1.5.0
-   */
-  def nanvl(col1: Column, col2: Column): Column = Column.fn("nanvl", col1, col2)
-
-  /**
-   * Unary minus, i.e. negate the expression.
-   * {{{
-   *   // Select the amount column and negates all values.
-   *   // Scala:
-   *   df.select( -df("amount") )
-   *
-   *   // Java:
-   *   df.select( negate(df.col("amount")) );
-   * }}}
-   *
-   * @group math_funcs
-   * @since 1.3.0
-   */
-  def negate(e: Column): Column = -e
-
-  /**
-   * Inversion of boolean expression, i.e. NOT.
-   * {{{
-   *   // Scala: select rows that are not active (isActive === false)
-   *   df.filter( !df("isActive") )
-   *
-   *   // Java:
-   *   df.filter( not(df.col("isActive")) );
-   * }}}
-   *
-   * @group predicate_funcs
-   * @since 1.3.0
-   */
-  def not(e: Column): Column = !e
-
-  /**
-   * Generate a random column with independent and identically distributed (i.i.d.) samples
-   * uniformly distributed in [0.0, 1.0).
-   *
-   * @note The function is non-deterministic in general case.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def rand(seed: Long): Column = withExpr { Rand(seed) }
-
-  /**
-   * Generate a random column with independent and identically distributed (i.i.d.) samples
-   * uniformly distributed in [0.0, 1.0).
-   *
-   * @note The function is non-deterministic in general case.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def rand(): Column = rand(Utils.random.nextLong)
-
-  /**
-   * Generate a column with independent and identically distributed (i.i.d.) samples from
-   * the standard normal distribution.
-   *
-   * @note The function is non-deterministic in general case.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def randn(seed: Long): Column = withExpr { Randn(seed) }
-
-  /**
-   * Generate a column with independent and identically distributed (i.i.d.) samples from
-   * the standard normal distribution.
-   *
-   * @note The function is non-deterministic in general case.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def randn(): Column = randn(Utils.random.nextLong)
-
-  /**
-   * Partition ID.
-   *
-   * @note This is non-deterministic because it depends on data partitioning and task scheduling.
-   *
-   * @group misc_funcs
-   * @since 1.6.0
-   */
-  def spark_partition_id(): Column = Column.fn("spark_partition_id")
-
-  /**
-   * Computes the square root of the specified float value.
-   *
-   * @group math_funcs
-   * @since 1.3.0
-   */
-  def sqrt(e: Column): Column = Column.fn("sqrt", e)
-
-  /**
-   * Computes the square root of the specified float value.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def sqrt(colName: String): Column = sqrt(Column(colName))
-
-  /**
-   * Returns the sum of `left` and `right` and the result is null on overflow. The acceptable
-   * input types are the same with the `+` operator.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_add(left: Column, right: Column): Column = Column.fn("try_add", left, right)
-
-  /**
-   * Returns the mean calculated from values of a group and the result is null on overflow.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_avg(e: Column): Column = Column.fn("try_avg", e)
-
-  /**
-   * Returns `dividend``/``divisor`. It always performs floating point division. Its result is
-   * always null if `divisor` is 0.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_divide(left: Column, right: Column): Column = Column.fn("try_divide", left, right)
-
-  /**
-   * Returns the remainder of `dividend``/``divisor`. Its result is
-   * always null if `divisor` is 0.
-   *
-   * @group math_funcs
-   * @since 4.0.0
-   */
-  def try_mod(left: Column, right: Column): Column = Column.fn("try_mod", left, right)
-
-  /**
-   * Returns `left``*``right` and the result is null on overflow. The acceptable input types are
-   * the same with the `*` operator.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_multiply(left: Column, right: Column): Column = Column.fn("try_multiply", left, right)
-
-  /**
-   * Returns `left``-``right` and the result is null on overflow. The acceptable input types are
-   * the same with the `-` operator.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_subtract(left: Column, right: Column): Column = Column.fn("try_subtract", left, right)
-
-  /**
-   * Returns the sum calculated from values of a group and the result is null on overflow.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def try_sum(e: Column): Column = Column.fn("try_sum", e)
-
-  /**
-   * Creates a new struct column.
-   * If the input column is a column in a `DataFrame`, or a derived column expression
-   * that is named (i.e. aliased), its name would be retained as the StructField's name,
-   * otherwise, the newly generated StructField's name would be auto generated as
-   * `col` with a suffix `index + 1`, i.e. col1, col2, col3, ...
-   *
-   * @group struct_funcs
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def struct(cols: Column*): Column = Column.fn("struct", cols: _*)
-
-  /**
-   * Creates a new struct column that composes multiple input columns.
-   *
-   * @group struct_funcs
-   * @since 1.4.0
-   */
-  @scala.annotation.varargs
-  def struct(colName: String, colNames: String*): Column = {
-    struct((colName +: colNames).map(col) : _*)
-  }
-
-  /**
-   * Evaluates a list of conditions and returns one of multiple possible result expressions.
-   * If otherwise is not defined at the end, null is returned for unmatched conditions.
-   *
-   * {{{
-   *   // Example: encoding gender string column into integer.
-   *
-   *   // Scala:
-   *   people.select(when(people("gender") === "male", 0)
-   *     .when(people("gender") === "female", 1)
-   *     .otherwise(2))
-   *
-   *   // Java:
-   *   people.select(when(col("gender").equalTo("male"), 0)
-   *     .when(col("gender").equalTo("female"), 1)
-   *     .otherwise(2))
-   * }}}
-   *
-   * @group conditional_funcs
-   * @since 1.4.0
-   */
-  def when(condition: Column, value: Any): Column = withExpr {
-    CaseWhen(Seq((condition.expr, lit(value).expr)))
-  }
-
-  /**
-   * Computes bitwise NOT (~) of a number.
-   *
-   * @group bitwise_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use bitwise_not", "3.2.0")
-  def bitwiseNOT(e: Column): Column = bitwise_not(e)
-
-  /**
-   * Computes bitwise NOT (~) of a number.
-   *
-   * @group bitwise_funcs
-   * @since 3.2.0
-   */
-  def bitwise_not(e: Column): Column = Column.fn("~", e)
-
-  /**
-   * Returns the number of bits that are set in the argument expr as an unsigned 64-bit integer,
-   * or NULL if the argument is NULL.
-   *
-   * @group bitwise_funcs
-   * @since 3.5.0
-   */
-  def bit_count(e: Column): Column = Column.fn("bit_count", e)
-
-  /**
-   * Returns the value of the bit (0 or 1) at the specified position.
-   * The positions are numbered from right to left, starting at zero.
-   * The position argument cannot be negative.
-   *
-   * @group bitwise_funcs
-   * @since 3.5.0
-   */
-  def bit_get(e: Column, pos: Column): Column = Column.fn("bit_get", e, pos)
-
-  /**
-   * Returns the value of the bit (0 or 1) at the specified position.
-   * The positions are numbered from right to left, starting at zero.
-   * The position argument cannot be negative.
-   *
-   * @group bitwise_funcs
-   * @since 3.5.0
-   */
-  def getbit(e: Column, pos: Column): Column = Column.fn("getbit", e, pos)
-
-  /**
-   * Parses the expression string into the column that it represents, similar to
-   * [[Dataset#selectExpr]].
-   * {{{
-   *   // get the number of words of each length
-   *   df.groupBy(expr("length(word)")).count()
-   * }}}
-   *
-   * @group normal_funcs
-   */
-  def expr(expr: String): Column = withExpr {
-    val parser = SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
-      new SparkSqlParser()
-    }
-    parser.parseExpression(expr)
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Math Functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Computes the absolute value of a numeric value.
-   *
-   * @group math_funcs
-   * @since 1.3.0
-   */
-  def abs(e: Column): Column = Column.fn("abs", e)
-
-  /**
-   * @return inverse cosine of `e` in radians, as if computed by `java.lang.Math.acos`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def acos(e: Column): Column = Column.fn("acos", e)
-
-  /**
-   * @return inverse cosine of `columnName`, as if computed by `java.lang.Math.acos`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def acos(columnName: String): Column = acos(Column(columnName))
-
-  /**
-   * @return inverse hyperbolic cosine of `e`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def acosh(e: Column): Column = Column.fn("acosh", e)
-
-  /**
-   * @return inverse hyperbolic cosine of `columnName`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def acosh(columnName: String): Column = acosh(Column(columnName))
-
-  /**
-   * @return inverse sine of `e` in radians, as if computed by `java.lang.Math.asin`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def asin(e: Column): Column = Column.fn("asin", e)
-
-  /**
-   * @return inverse sine of `columnName`, as if computed by `java.lang.Math.asin`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def asin(columnName: String): Column = asin(Column(columnName))
-
-  /**
-   * @return inverse hyperbolic sine of `e`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def asinh(e: Column): Column = Column.fn("asinh", e)
-
-  /**
-   * @return inverse hyperbolic sine of `columnName`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def asinh(columnName: String): Column = asinh(Column(columnName))
-
-  /**
-   * @return inverse tangent of `e` as if computed by `java.lang.Math.atan`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan(e: Column): Column = Column.fn("atan", e)
-
-  /**
-   * @return inverse tangent of `columnName`, as if computed by `java.lang.Math.atan`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan(columnName: String): Column = atan(Column(columnName))
-
-  /**
-   * @param y coordinate on y-axis
-   * @param x coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(y: Column, x: Column): Column = Column.fn("atan2", y, x)
-
-  /**
-   * @param y coordinate on y-axis
-   * @param xName coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(y: Column, xName: String): Column = atan2(y, Column(xName))
-
-  /**
-   * @param yName coordinate on y-axis
-   * @param x coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(yName: String, x: Column): Column = atan2(Column(yName), x)
-
-  /**
-   * @param yName coordinate on y-axis
-   * @param xName coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(yName: String, xName: String): Column =
-    atan2(Column(yName), Column(xName))
-
-  /**
-   * @param y coordinate on y-axis
-   * @param xValue coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(y: Column, xValue: Double): Column = atan2(y, lit(xValue))
-
-  /**
-   * @param yName coordinate on y-axis
-   * @param xValue coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(yName: String, xValue: Double): Column = atan2(Column(yName), xValue)
-
-  /**
-   * @param yValue coordinate on y-axis
-   * @param x coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(yValue: Double, x: Column): Column = atan2(lit(yValue), x)
-
-  /**
-   * @param yValue coordinate on y-axis
-   * @param xName coordinate on x-axis
-   * @return the <i>theta</i> component of the point
-   *         (<i>r</i>, <i>theta</i>)
-   *         in polar coordinates that corresponds to the point
-   *         (<i>x</i>, <i>y</i>) in Cartesian coordinates,
-   *         as if computed by `java.lang.Math.atan2`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def atan2(yValue: Double, xName: String): Column = atan2(yValue, Column(xName))
-
-  /**
-   * @return inverse hyperbolic tangent of `e`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def atanh(e: Column): Column = Column.fn("atanh", e)
-
-  /**
-   * @return inverse hyperbolic tangent of `columnName`
-   *
-   * @group math_funcs
-   * @since 3.1.0
-   */
-  def atanh(columnName: String): Column = atanh(Column(columnName))
-
-  /**
-   * An expression that returns the string representation of the binary value of the given long
-   * column. For example, bin("12") returns "1100".
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def bin(e: Column): Column = Column.fn("bin", e)
-
-  /**
-   * An expression that returns the string representation of the binary value of the given long
-   * column. For example, bin("12") returns "1100".
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def bin(columnName: String): Column = bin(Column(columnName))
-
-  /**
-   * Computes the cube-root of the given value.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cbrt(e: Column): Column = Column.fn("cbrt", e)
-
-  /**
-   * Computes the cube-root of the given column.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cbrt(columnName: String): Column = cbrt(Column(columnName))
-
-  /**
-   * Computes the ceiling of the given value of `e` to `scale` decimal places.
-   *
-   * @group math_funcs
-   * @since 3.3.0
-   */
-  def ceil(e: Column, scale: Column): Column = Column.fn("ceil", e, scale)
-
-  /**
-   * Computes the ceiling of the given value of `e` to 0 decimal places.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def ceil(e: Column): Column = Column.fn("ceil", e)
-
-  /**
-   * Computes the ceiling of the given value of `e` to 0 decimal places.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def ceil(columnName: String): Column = ceil(Column(columnName))
-
-  /**
-   * Computes the ceiling of the given value of `e` to `scale` decimal places.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def ceiling(e: Column, scale: Column): Column = Column.fn("ceiling", e, scale)
-
-  /**
-   * Computes the ceiling of the given value of `e` to 0 decimal places.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def ceiling(e: Column): Column = Column.fn("ceiling", e)
-
-  /**
-   * Convert a number in a string column from one base to another.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def conv(num: Column, fromBase: Int, toBase: Int): Column =
-    Column.fn("conv", num, lit(fromBase), lit(toBase))
-
-  /**
-   * @param e angle in radians
-   * @return cosine of the angle, as if computed by `java.lang.Math.cos`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cos(e: Column): Column = Column.fn("cos", e)
-
-  /**
-   * @param columnName angle in radians
-   * @return cosine of the angle, as if computed by `java.lang.Math.cos`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cos(columnName: String): Column = cos(Column(columnName))
-
-  /**
-   * @param e hyperbolic angle
-   * @return hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cosh(e: Column): Column = Column.fn("cosh", e)
-
-  /**
-   * @param columnName hyperbolic angle
-   * @return hyperbolic cosine of the angle, as if computed by `java.lang.Math.cosh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def cosh(columnName: String): Column = cosh(Column(columnName))
-
-  /**
-   * @param e angle in radians
-   * @return cotangent of the angle
-   *
-   * @group math_funcs
-   * @since 3.3.0
-   */
-  def cot(e: Column): Column = Column.fn("cot", e)
-
-  /**
-   * @param e angle in radians
-   * @return cosecant of the angle
-   *
-   * @group math_funcs
-   * @since 3.3.0
-   */
-  def csc(e: Column): Column = Column.fn("csc", e)
-
-  /**
-   * Returns Euler's number.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def e(): Column = Column.fn("e")
-
-  /**
-   * Computes the exponential of the given value.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def exp(e: Column): Column = Column.fn("exp", e)
-
-  /**
-   * Computes the exponential of the given column.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def exp(columnName: String): Column = exp(Column(columnName))
-
-  /**
-   * Computes the exponential of the given value minus one.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def expm1(e: Column): Column = Column.fn("expm1", e)
-
-  /**
-   * Computes the exponential of the given column minus one.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def expm1(columnName: String): Column = expm1(Column(columnName))
-
-  /**
-   * Computes the factorial of the given value.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def factorial(e: Column): Column = Column.fn("factorial", e)
-
-  /**
-   * Computes the floor of the given value of `e` to `scale` decimal places.
-   *
-   * @group math_funcs
-   * @since 3.3.0
-   */
-  def floor(e: Column, scale: Column): Column = Column.fn("floor", e, scale)
-
-  /**
-   * Computes the floor of the given value of `e` to 0 decimal places.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def floor(e: Column): Column = Column.fn("floor", e)
-
-  /**
-   * Computes the floor of the given column value to 0 decimal places.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def floor(columnName: String): Column = floor(Column(columnName))
-
-  /**
-   * Returns the greatest value of the list of values, skipping null values.
-   * This function takes at least 2 parameters. It will return null iff all parameters are null.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def greatest(exprs: Column*): Column = Column.fn("greatest", exprs: _*)
-
-  /**
-   * Returns the greatest value of the list of column names, skipping null values.
-   * This function takes at least 2 parameters. It will return null iff all parameters are null.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def greatest(columnName: String, columnNames: String*): Column = {
-    greatest((columnName +: columnNames).map(Column.apply): _*)
-  }
-
-  /**
-   * Computes hex value of the given column.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def hex(column: Column): Column = Column.fn("hex", column)
-
-  /**
-   * Inverse of hex. Interprets each pair of characters as a hexadecimal number
-   * and converts to the byte representation of number.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def unhex(column: Column): Column = Column.fn("unhex", column)
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(l: Column, r: Column): Column = Column.fn("hypot", l, r)
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(l: Column, rightName: String): Column = hypot(l, Column(rightName))
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(leftName: String, r: Column): Column = hypot(Column(leftName), r)
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(leftName: String, rightName: String): Column =
-    hypot(Column(leftName), Column(rightName))
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(l: Column, r: Double): Column = hypot(l, lit(r))
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(leftName: String, r: Double): Column = hypot(Column(leftName), r)
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(l: Double, r: Column): Column = hypot(lit(l), r)
-
-  /**
-   * Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def hypot(l: Double, rightName: String): Column = hypot(l, Column(rightName))
-
-  /**
-   * Returns the least value of the list of values, skipping null values.
-   * This function takes at least 2 parameters. It will return null iff all parameters are null.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def least(exprs: Column*): Column = Column.fn("least", exprs: _*)
-
-  /**
-   * Returns the least value of the list of column names, skipping null values.
-   * This function takes at least 2 parameters. It will return null iff all parameters are null.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def least(columnName: String, columnNames: String*): Column = {
-    least((columnName +: columnNames).map(Column.apply): _*)
-  }
-
-  /**
-   * Computes the natural logarithm of the given value.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def ln(e: Column): Column = Column.fn("ln", e)
-
-  /**
-   * Computes the natural logarithm of the given value.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log(e: Column): Column = ln(e)
-
-  /**
-   * Computes the natural logarithm of the given column.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log(columnName: String): Column = log(Column(columnName))
-
-  /**
-   * Returns the first argument-base logarithm of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log(base: Double, a: Column): Column = Column.fn("log", lit(base), a)
-
-  /**
-   * Returns the first argument-base logarithm of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log(base: Double, columnName: String): Column = log(base, Column(columnName))
-
-  /**
-   * Computes the logarithm of the given value in base 10.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log10(e: Column): Column = Column.fn("log10", e)
-
-  /**
-   * Computes the logarithm of the given value in base 10.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log10(columnName: String): Column = log10(Column(columnName))
-
-  /**
-   * Computes the natural logarithm of the given value plus one.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log1p(e: Column): Column = Column.fn("log1p", e)
-
-  /**
-   * Computes the natural logarithm of the given column plus one.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def log1p(columnName: String): Column = log1p(Column(columnName))
-
-  /**
-   * Computes the logarithm of the given column in base 2.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def log2(expr: Column): Column = Column.fn("log2", expr)
-
-  /**
-   * Computes the logarithm of the given value in base 2.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def log2(columnName: String): Column = log2(Column(columnName))
-
-  /**
-   * Returns the negated value.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def negative(e: Column): Column = Column.fn("negative", e)
-
-  /**
-   * Returns Pi.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def pi(): Column = Column.fn("pi")
-
-  /**
-   * Returns the value.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def positive(e: Column): Column = Column.fn("positive", e)
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(l: Column, r: Column): Column = Column.fn("power", l, r)
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(l: Column, rightName: String): Column = pow(l, Column(rightName))
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(leftName: String, r: Column): Column = pow(Column(leftName), r)
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(leftName: String, rightName: String): Column = pow(Column(leftName), Column(rightName))
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(l: Column, r: Double): Column = pow(l, lit(r))
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(leftName: String, r: Double): Column = pow(Column(leftName), r)
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(l: Double, r: Column): Column = pow(lit(l), r)
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def pow(l: Double, rightName: String): Column = pow(l, Column(rightName))
-
-  /**
-   * Returns the value of the first argument raised to the power of the second argument.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def power(l: Column, r: Column): Column = Column.fn("power", l, r)
-
-  /**
-   * Returns the positive value of dividend mod divisor.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def pmod(dividend: Column, divisor: Column): Column = Column.fn("pmod", dividend, divisor)
-
-  /**
-   * Returns the double value that is closest in value to the argument and
-   * is equal to a mathematical integer.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def rint(e: Column): Column = Column.fn("rint", e)
-
-  /**
-   * Returns the double value that is closest in value to the argument and
-   * is equal to a mathematical integer.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def rint(columnName: String): Column = rint(Column(columnName))
-
-  /**
-   * Returns the value of the column `e` rounded to 0 decimal places with HALF_UP round mode.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def round(e: Column): Column = round(e, 0)
-
-  /**
-   * Round the value of `e` to `scale` decimal places with HALF_UP round mode
-   * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0.
-   *
-   * @group math_funcs
-   * @since 1.5.0
-   */
-  def round(e: Column, scale: Int): Column = Column.fn("round", e, lit(scale))
-
-  /**
-   * Round the value of `e` to `scale` decimal places with HALF_UP round mode
-   * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0.
-   *
-   * @group math_funcs
-   * @since 4.0.0
-   */
-  def round(e: Column, scale: Column): Column = Column.fn("round", e, scale)
-
-  /**
-   * Returns the value of the column `e` rounded to 0 decimal places with HALF_EVEN round mode.
-   *
-   * @group math_funcs
-   * @since 2.0.0
-   */
-  def bround(e: Column): Column = bround(e, 0)
-
-  /**
-   * Round the value of `e` to `scale` decimal places with HALF_EVEN round mode
-   * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0.
-   *
-   * @group math_funcs
-   * @since 2.0.0
-   */
-  def bround(e: Column, scale: Int): Column = Column.fn("bround", e, lit(scale))
-
-  /**
-   * Round the value of `e` to `scale` decimal places with HALF_EVEN round mode
-   * if `scale` is greater than or equal to 0 or at integral part when `scale` is less than 0.
-   *
-   * @group math_funcs
-   * @since 4.0.0
-   */
-  def bround(e: Column, scale: Column): Column = Column.fn("bround", e, scale)
-
-  /**
-   * @param e angle in radians
-   * @return secant of the angle
-   *
-   * @group math_funcs
-   * @since 3.3.0
-   */
-  def sec(e: Column): Column = Column.fn("sec", e)
-
-  /**
-   * Shift the given value numBits left. If the given value is a long value, this function
-   * will return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 1.5.0
-   */
-  @deprecated("Use shiftleft", "3.2.0")
-  def shiftLeft(e: Column, numBits: Int): Column = shiftleft(e, numBits)
-
-  /**
-   * Shift the given value numBits left. If the given value is a long value, this function
-   * will return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 3.2.0
-   */
-  def shiftleft(e: Column, numBits: Int): Column = Column.fn("shiftleft", e, lit(numBits))
-
-  /**
-   * (Signed) shift the given value numBits right. If the given value is a long value, it will
-   * return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 1.5.0
-   */
-  @deprecated("Use shiftright", "3.2.0")
-  def shiftRight(e: Column, numBits: Int): Column = shiftright(e, numBits)
-
-  /**
-   * (Signed) shift the given value numBits right. If the given value is a long value, it will
-   * return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 3.2.0
-   */
-  def shiftright(e: Column, numBits: Int): Column = Column.fn("shiftright", e, lit(numBits))
-
-  /**
-   * Unsigned shift the given value numBits right. If the given value is a long value,
-   * it will return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 1.5.0
-   */
-  @deprecated("Use shiftrightunsigned", "3.2.0")
-  def shiftRightUnsigned(e: Column, numBits: Int): Column = shiftrightunsigned(e, numBits)
-
-  /**
-   * Unsigned shift the given value numBits right. If the given value is a long value,
-   * it will return a long value else it will return an integer value.
-   *
-   * @group bitwise_funcs
-   * @since 3.2.0
-   */
-  def shiftrightunsigned(e: Column, numBits: Int): Column =
-    Column.fn("shiftrightunsigned", e, lit(numBits))
-
-  /**
-   * Computes the signum of the given value.
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def sign(e: Column): Column = Column.fn("sign", e)
-
-  /**
-   * Computes the signum of the given value.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def signum(e: Column): Column = Column.fn("signum", e)
-
-  /**
-   * Computes the signum of the given column.
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def signum(columnName: String): Column = signum(Column(columnName))
-
-  /**
-   * @param e angle in radians
-   * @return sine of the angle, as if computed by `java.lang.Math.sin`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def sin(e: Column): Column = Column.fn("sin", e)
-
-  /**
-   * @param columnName angle in radians
-   * @return sine of the angle, as if computed by `java.lang.Math.sin`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def sin(columnName: String): Column = sin(Column(columnName))
-
-  /**
-   * @param e hyperbolic angle
-   * @return hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def sinh(e: Column): Column = Column.fn("sinh", e)
-
-  /**
-   * @param columnName hyperbolic angle
-   * @return hyperbolic sine of the given value, as if computed by `java.lang.Math.sinh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def sinh(columnName: String): Column = sinh(Column(columnName))
-
-  /**
-   * @param e angle in radians
-   * @return tangent of the given value, as if computed by `java.lang.Math.tan`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def tan(e: Column): Column = Column.fn("tan", e)
-
-  /**
-   * @param columnName angle in radians
-   * @return tangent of the given value, as if computed by `java.lang.Math.tan`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def tan(columnName: String): Column = tan(Column(columnName))
-
-  /**
-   * @param e hyperbolic angle
-   * @return hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def tanh(e: Column): Column = Column.fn("tanh", e)
-
-  /**
-   * @param columnName hyperbolic angle
-   * @return hyperbolic tangent of the given value, as if computed by `java.lang.Math.tanh`
-   *
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  def tanh(columnName: String): Column = tanh(Column(columnName))
-
-  /**
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use degrees", "2.1.0")
-  def toDegrees(e: Column): Column = degrees(e)
-
-  /**
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use degrees", "2.1.0")
-  def toDegrees(columnName: String): Column = degrees(Column(columnName))
-
-  /**
-   * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
-   *
-   * @param e angle in radians
-   * @return angle in degrees, as if computed by `java.lang.Math.toDegrees`
-   *
-   * @group math_funcs
-   * @since 2.1.0
-   */
-  def degrees(e: Column): Column = Column.fn("degrees", e)
-
-  /**
-   * Converts an angle measured in radians to an approximately equivalent angle measured in degrees.
-   *
-   * @param columnName angle in radians
-   * @return angle in degrees, as if computed by `java.lang.Math.toDegrees`
-   *
-   * @group math_funcs
-   * @since 2.1.0
-   */
-  def degrees(columnName: String): Column = degrees(Column(columnName))
-
-  /**
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use radians", "2.1.0")
-  def toRadians(e: Column): Column = radians(e)
-
-  /**
-   * @group math_funcs
-   * @since 1.4.0
-   */
-  @deprecated("Use radians", "2.1.0")
-  def toRadians(columnName: String): Column = radians(Column(columnName))
-
-  /**
-   * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
-   *
-   * @param e angle in degrees
-   * @return angle in radians, as if computed by `java.lang.Math.toRadians`
-   *
-   * @group math_funcs
-   * @since 2.1.0
-   */
-  def radians(e: Column): Column = Column.fn("radians", e)
-
-  /**
-   * Converts an angle measured in degrees to an approximately equivalent angle measured in radians.
-   *
-   * @param columnName angle in degrees
-   * @return angle in radians, as if computed by `java.lang.Math.toRadians`
-   *
-   * @group math_funcs
-   * @since 2.1.0
-   */
-  def radians(columnName: String): Column = radians(Column(columnName))
-
-  /**
-   * Returns the bucket number into which the value of this expression would fall
-   * after being evaluated. Note that input arguments must follow conditions listed below;
-   * otherwise, the method will return null.
-   *
-   * @param v value to compute a bucket number in the histogram
-   * @param min minimum value of the histogram
-   * @param max maximum value of the histogram
-   * @param numBucket the number of buckets
-   * @return the bucket number into which the value would fall after being evaluated
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def width_bucket(v: Column, min: Column, max: Column, numBucket: Column): Column =
-    Column.fn("width_bucket", v, min, max, numBucket)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Misc functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the current catalog.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def current_catalog(): Column = Column.fn("current_catalog")
-
-  /**
-   * Returns the current database.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def current_database(): Column = Column.fn("current_database")
-
-  /**
-   * Returns the current schema.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def current_schema(): Column = Column.fn("current_schema")
-
-  /**
-   * Returns the user name of current execution context.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def current_user(): Column = Column.fn("current_user")
-
-  /**
-   * Calculates the MD5 digest of a binary column and returns the value
-   * as a 32 character hex string.
-   *
-   * @group hash_funcs
-   * @since 1.5.0
-   */
-  def md5(e: Column): Column = Column.fn("md5", e)
-
-  /**
-   * Calculates the SHA-1 digest of a binary column and returns the value
-   * as a 40 character hex string.
-   *
-   * @group hash_funcs
-   * @since 1.5.0
-   */
-  def sha1(e: Column): Column = Column.fn("sha1", e)
-
-  /**
-   * Calculates the SHA-2 family of hash functions of a binary column and
-   * returns the value as a hex string.
-   *
-   * @param e column to compute SHA-2 on.
-   * @param numBits one of 224, 256, 384, or 512.
-   *
-   * @group hash_funcs
-   * @since 1.5.0
-   */
-  def sha2(e: Column, numBits: Int): Column = {
-    require(
-      Seq(0, 224, 256, 384, 512).contains(numBits),
-      s"numBits $numBits is not in the permitted values (0, 224, 256, 384, 512)")
-    Column.fn("sha2", e, lit(numBits))
-  }
-
-  /**
-   * Calculates the cyclic redundancy check value  (CRC32) of a binary column and
-   * returns the value as a bigint.
-   *
-   * @group hash_funcs
-   * @since 1.5.0
-   */
-  def crc32(e: Column): Column = Column.fn("crc32", e)
-
-  /**
-   * Calculates the hash code of given columns, and returns the result as an int column.
-   *
-   * @group hash_funcs
-   * @since 2.0.0
-   */
-  @scala.annotation.varargs
-  def hash(cols: Column*): Column = Column.fn("hash", cols: _*)
-
-  /**
-   * Calculates the hash code of given columns using the 64-bit
-   * variant of the xxHash algorithm, and returns the result as a long
-   * column. The hash computation uses an initial seed of 42.
-   *
-   * @group hash_funcs
-   * @since 3.0.0
-   */
-  @scala.annotation.varargs
-  def xxhash64(cols: Column*): Column = Column.fn("xxhash64", cols: _*)
-
-  /**
-   * Returns null if the condition is true, and throws an exception otherwise.
-   *
-   * @group misc_funcs
-   * @since 3.1.0
-   */
-  def assert_true(c: Column): Column = Column.fn("assert_true", c)
-
-  /**
-   * Returns null if the condition is true; throws an exception with the error message otherwise.
-   *
-   * @group misc_funcs
-   * @since 3.1.0
-   */
-  def assert_true(c: Column, e: Column): Column = Column.fn("assert_true", c, e)
-
-  /**
-   * Throws an exception with the provided error message.
-   *
-   * @group misc_funcs
-   * @since 3.1.0
-   */
-  def raise_error(c: Column): Column = Column.fn("raise_error", c)
-
-  /**
-   * Returns the estimated number of unique values given the binary representation
-   * of a Datasketches HllSketch.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_estimate(c: Column): Column = Column.fn("hll_sketch_estimate", c)
-
-  /**
-   * Returns the estimated number of unique values given the binary representation
-   * of a Datasketches HllSketch.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_sketch_estimate(columnName: String): Column = {
-    hll_sketch_estimate(Column(columnName))
-  }
-
-  /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a
-   * Datasketches Union object. Throws an exception if sketches have different
-   * lgConfigK values.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_union(c1: Column, c2: Column): Column =
-    Column.fn("hll_union", c1, c2)
-
-  /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a
-   * Datasketches Union object. Throws an exception if sketches have different
-   * lgConfigK values.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_union(columnName1: String, columnName2: String): Column = {
-    hll_union(Column(columnName1), Column(columnName2))
-  }
-
-  /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a
-   * Datasketches Union object. Throws an exception if sketches have different
-   * lgConfigK values and allowDifferentLgConfigK is set to false.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_union(c1: Column, c2: Column, allowDifferentLgConfigK: Boolean): Column =
-    Column.fn("hll_union", c1, c2, lit(allowDifferentLgConfigK))
-
-  /**
-   * Merges two binary representations of Datasketches HllSketch objects, using a
-   * Datasketches Union object. Throws an exception if sketches have different
-   * lgConfigK values and allowDifferentLgConfigK is set to false.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def hll_union(columnName1: String, columnName2: String, allowDifferentLgConfigK: Boolean):
-  Column = {
-    hll_union(Column(columnName1), Column(columnName2), allowDifferentLgConfigK)
-  }
-
-  /**
-   * Returns the user name of current execution context.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def user(): Column = Column.fn("user")
-
-  /**
-   * Returns the user name of current execution context.
-   *
-   * @group misc_funcs
-   * @since 4.0.0
-   */
-  def session_user(): Column = Column.fn("session_user")
-
-  /**
-   * Returns an universally unique identifier (UUID) string. The value is returned as a canonical
-   * UUID 36-character string.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def uuid(): Column = withExpr { Uuid(Some(Utils.random.nextLong)) }
-
-  /**
-   * Returns an encrypted value of `input` using AES in given `mode` with the specified `padding`.
-   * Key lengths of 16, 24 and 32 bits are supported. Supported combinations of (`mode`,
-   * `padding`) are ('ECB', 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). Optional initialization
-   * vectors (IVs) are only supported for CBC and GCM modes. These must be 16 bytes for CBC and 12
-   * bytes for GCM. If not provided, a random vector will be generated and prepended to the
-   * output. Optional additional authenticated data (AAD) is only supported for GCM. If provided
-   * for encryption, the identical AAD value must be provided for decryption. The default mode is
-   * GCM.
-   *
-   * @param input
-   *   The binary value to encrypt.
-   * @param key
-   *   The passphrase to use to encrypt the data.
-   * @param mode
-   *   Specifies which block cipher mode should be used to encrypt messages. Valid modes: ECB,
-   *   GCM, CBC.
-   * @param padding
-   *   Specifies how to pad messages whose length is not a multiple of the block size. Valid
-   *   values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
-   *   for CBC.
-   * @param iv
-   *   Optional initialization vector. Only supported for CBC and GCM modes. Valid values: None or
-   *   "". 16-byte array for CBC mode. 12-byte array for GCM mode.
-   * @param aad
-   *   Optional additional authenticated data. Only supported for GCM mode. This can be any
-   *   free-form input and must be provided for both encryption and decryption.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_encrypt(
-      input: Column,
-      key: Column,
-      mode: Column,
-      padding: Column,
-      iv: Column,
-      aad: Column): Column = Column.fn("aes_encrypt", input, key, mode, padding, iv, aad)
-
-  /**
-   * Returns an encrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
-   *   Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_encrypt(input: Column, key: Column, mode: Column, padding: Column, iv: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode, padding, iv)
-
-  /**
-   * Returns an encrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
-   *   Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_encrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode, padding)
-
-  /**
-   * Returns an encrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
-   *   Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_encrypt(input: Column, key: Column, mode: Column): Column =
-    Column.fn("aes_encrypt", input, key, mode)
-
-  /**
-   * Returns an encrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_encrypt(Column, Column, Column, Column, Column,
-   *   Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_encrypt(input: Column, key: Column): Column =
-    Column.fn("aes_encrypt", input, key)
-
-  /**
-   * Returns a decrypted value of `input` using AES in `mode` with `padding`. Key lengths of 16,
-   * 24 and 32 bits are supported. Supported combinations of (`mode`, `padding`) are ('ECB',
-   * 'PKCS'), ('GCM', 'NONE') and ('CBC', 'PKCS'). Optional additional authenticated data (AAD) is
-   * only supported for GCM. If provided for encryption, the identical AAD value must be provided
-   * for decryption. The default mode is GCM.
-   *
-   * @param input
-   *   The binary value to decrypt.
-   * @param key
-   *   The passphrase to use to decrypt the data.
-   * @param mode
-   *   Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
-   *   GCM, CBC.
-   * @param padding
-   *   Specifies how to pad messages whose length is not a multiple of the block size. Valid
-   *   values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
-   *   for CBC.
-   * @param aad
-   *   Optional additional authenticated data. Only supported for GCM mode. This can be any
-   *   free-form input and must be provided for both encryption and decryption.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_decrypt(
-      input: Column,
-      key: Column,
-      mode: Column,
-      padding: Column,
-      aad: Column): Column =
-    Column.fn("aes_decrypt", input, key, mode, padding, aad)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
-    Column.fn("aes_decrypt", input, key, mode, padding)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_decrypt(input: Column, key: Column, mode: Column): Column =
-    Column.fn("aes_decrypt", input, key, mode)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def aes_decrypt(input: Column, key: Column): Column =
-    Column.fn("aes_decrypt", input, key)
-
-  /**
-   * This is a special version of `aes_decrypt` that performs the same operation, but returns a
-   * NULL value instead of raising an error if the decryption cannot be performed.
-   *
-   * @param input
-   *   The binary value to decrypt.
-   * @param key
-   *   The passphrase to use to decrypt the data.
-   * @param mode
-   *   Specifies which block cipher mode should be used to decrypt messages. Valid modes: ECB,
-   *   GCM, CBC.
-   * @param padding
-   *   Specifies how to pad messages whose length is not a multiple of the block size. Valid
-   *   values: PKCS, NONE, DEFAULT. The DEFAULT padding means PKCS for ECB, NONE for GCM and PKCS
-   *   for CBC.
-   * @param aad
-   *   Optional additional authenticated data. Only supported for GCM mode. This can be any
-   *   free-form input and must be provided for both encryption and decryption.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def try_aes_decrypt(
-      input: Column,
-      key: Column,
-      mode: Column,
-      padding: Column,
-      aad: Column): Column =
-    Column.fn("try_aes_decrypt", input, key, mode, padding, aad)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def try_aes_decrypt(input: Column, key: Column, mode: Column, padding: Column): Column =
-    Column.fn("try_aes_decrypt", input, key, mode, padding)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def try_aes_decrypt(input: Column, key: Column, mode: Column): Column =
-    Column.fn("try_aes_decrypt", input, key, mode)
-
-  /**
-   * Returns a decrypted value of `input`.
-   *
-   * @see
-   *   `org.apache.spark.sql.functions.try_aes_decrypt(Column, Column, Column, Column, Column)`
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def try_aes_decrypt(input: Column, key: Column): Column =
-    Column.fn("try_aes_decrypt", input, key)
-
-  /**
-   * Returns a sha1 hash value as a hex string of the `col`.
-   *
-   * @group hash_funcs
-   * @since 3.5.0
-   */
-  def sha(col: Column): Column = Column.fn("sha", col)
-
-  /**
-   * Returns the length of the block being read, or -1 if not available.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def input_file_block_length(): Column = Column.fn("input_file_block_length")
-
-  /**
-   * Returns the start offset of the block being read, or -1 if not available.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def input_file_block_start(): Column = Column.fn("input_file_block_start")
-
-  /**
-   * Calls a method with reflection.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def reflect(cols: Column*): Column = Column.fn("reflect", cols: _*)
-
-  /**
-   * Calls a method with reflection.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def java_method(cols: Column*): Column = Column.fn("java_method", cols: _*)
-
-  /**
-   * This is a special version of `reflect` that performs the same operation, but returns a NULL
-   * value instead of raising an error if the invoke method thrown exception.
-   *
-   * @group misc_funcs
-   * @since 4.0.0
-   */
-  def try_reflect(cols: Column*): Column = Column.fn("try_reflect", cols: _*)
-
-  /**
-   * Returns the Spark version. The string contains 2 fields, the first being a release version
-   * and the second being a git revision.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def version(): Column = Column.fn("version")
-
-  /**
-   * Return DDL-formatted type string for the data type of the input.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def typeof(col: Column): Column = Column.fn("typeof", col)
-
-  /**
-   * Separates `col1`, ..., `colk` into `n` rows. Uses column names col0, col1, etc. by default
-   * unless specified otherwise.
-   *
-   * @group generator_funcs
-   * @since 3.5.0
-   */
-  def stack(cols: Column*): Column = Column.fn("stack", cols: _*)
-
-  /**
-   * Returns a random value with independent and identically distributed (i.i.d.) uniformly
-   * distributed values in [0, 1).
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def random(seed: Column): Column = call_function("random", seed)
-
-  /**
-   * Returns a random value with independent and identically distributed (i.i.d.) uniformly
-   * distributed values in [0, 1).
-   *
-   * @group math_funcs
-   * @since 3.5.0
-   */
-  def random(): Column = random(lit(Utils.random.nextLong))
-
-  /**
-   * Returns the bucket number for the given input column.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def bitmap_bit_position(col: Column): Column =
-    Column.fn("bitmap_bit_position", col)
-
-  /**
-   * Returns the bit position for the given input column.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def bitmap_bucket_number(col: Column): Column =
-    Column.fn("bitmap_bucket_number", col)
-
-  /**
-   * Returns a bitmap with the positions of the bits set from all the values from the input column.
-   * The input column will most likely be bitmap_bit_position().
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bitmap_construct_agg(col: Column): Column =
-    Column.fn("bitmap_construct_agg", col)
-
-  /**
-   * Returns the number of set bits in the input bitmap.
-   *
-   * @group misc_funcs
-   * @since 3.5.0
-   */
-  def bitmap_count(col: Column): Column = Column.fn("bitmap_count", col)
-
-  /**
-   * Returns a bitmap that is the bitwise OR of all of the bitmaps from the input column.
-   * The input column should be bitmaps created from bitmap_construct_agg().
-   *
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def bitmap_or_agg(col: Column): Column = Column.fn("bitmap_or_agg", col)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // String functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Computes the numeric value of the first character of the string column, and returns the
-   * result as an int column.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def ascii(e: Column): Column = Column.fn("ascii", e)
-
-  /**
-   * Computes the BASE64 encoding of a binary column and returns it as a string column.
-   * This is the reverse of unbase64.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def base64(e: Column): Column = Column.fn("base64", e)
-
-  /**
-   * Calculates the bit length for the specified string column.
-   *
-   * @group string_funcs
-   * @since 3.3.0
-   */
-  def bit_length(e: Column): Column = Column.fn("bit_length", e)
-
-  /**
-   * Concatenates multiple input string columns together into a single string column,
-   * using the given separator.
-   *
-   * @note Input strings which are null are skipped.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def concat_ws(sep: String, exprs: Column*): Column =
-    Column.fn("concat_ws", lit(sep) +: exprs: _*)
-
-  /**
-   * Computes the first argument into a string from a binary using the provided character set
-   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
-   * If either argument is null, the result will also be null.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def decode(value: Column, charset: String): Column =
-    Column.fn("decode", value, lit(charset))
-
-  /**
-   * Computes the first argument into a binary from a string using the provided character set
-   * (one of 'US-ASCII', 'ISO-8859-1', 'UTF-8', 'UTF-16BE', 'UTF-16LE', 'UTF-16').
-   * If either argument is null, the result will also be null.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def encode(value: Column, charset: String): Column =
-    Column.fn("encode", value, lit(charset))
-
-  /**
-   * Formats numeric column x to a format like '#,###,###.##', rounded to d decimal places
-   * with HALF_EVEN round mode, and returns the result as a string column.
-   *
-   * If d is 0, the result has no decimal point or fractional part.
-   * If d is less than 0, the result will be null.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def format_number(x: Column, d: Int): Column = Column.fn("format_number", x, lit(d))
-
-  /**
-   * Formats the arguments in printf-style and returns the result as a string column.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def format_string(format: String, arguments: Column*): Column =
-    Column.fn("format_string", lit(format) +: arguments: _*)
-
-  /**
-   * Returns a new string column by converting the first letter of each word to uppercase.
-   * Words are delimited by whitespace.
-   *
-   * For example, "hello world" will become "Hello World".
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def initcap(e: Column): Column = Column.fn("initcap", e)
-
-  /**
-   * Locate the position of the first occurrence of substr column in the given string.
-   * Returns null if either of the arguments are null.
-   *
-   * @note The position is not zero based, but 1 based index. Returns 0 if substr
-   * could not be found in str.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def instr(str: Column, substring: String): Column = Column.fn("instr", str, lit(substring))
-
-  /**
-   * Computes the character length of a given string or number of bytes of a binary string.
-   * The length of character strings include the trailing spaces. The length of binary strings
-   * includes binary zeros.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def length(e: Column): Column = Column.fn("length", e)
-
-  /**
-   * Computes the character length of a given string or number of bytes of a binary string.
-   * The length of character strings include the trailing spaces. The length of binary strings
-   * includes binary zeros.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def len(e: Column): Column = Column.fn("len", e)
-
-  /**
-   * Converts a string column to lower case.
-   *
-   * @group string_funcs
-   * @since 1.3.0
-   */
-  def lower(e: Column): Column = Column.fn("lower", e)
-
-  /**
-   * Computes the Levenshtein distance of the two given string columns if it's less than or
-   * equal to a given threshold.
-   * @return result distance, or -1
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def levenshtein(l: Column, r: Column, threshold: Int): Column =
-    Column.fn("levenshtein", l, r, lit(threshold))
-
-  /**
-   * Computes the Levenshtein distance of the two given string columns.
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def levenshtein(l: Column, r: Column): Column = Column.fn("levenshtein", l, r)
-
-  /**
-   * Locate the position of the first occurrence of substr.
-   *
-   * @note The position is not zero based, but 1 based index. Returns 0 if substr
-   * could not be found in str.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def locate(substr: String, str: Column): Column = Column.fn("locate", lit(substr), str)
-
-  /**
-   * Locate the position of the first occurrence of substr in a string column, after position pos.
-   *
-   * @note The position is not zero based, but 1 based index. returns 0 if substr
-   * could not be found in str.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def locate(substr: String, str: Column, pos: Int): Column =
-    Column.fn("locate", lit(substr), str, lit(pos))
-
-  /**
-   * Left-pad the string column with pad to a length of len. If the string column is longer
-   * than len, the return value is shortened to len characters.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def lpad(str: Column, len: Int, pad: String): Column =
-    Column.fn("lpad", str, lit(len), lit(pad))
-
-  /**
-   * Left-pad the binary column with pad to a byte length of len. If the binary column is longer
-   * than len, the return value is shortened to len bytes.
-   *
-   * @group string_funcs
-   * @since 3.3.0
-   */
-  def lpad(str: Column, len: Int, pad: Array[Byte]): Column =
-    Column.fn("lpad", str, lit(len), lit(pad))
-
-  /**
-   * Trim the spaces from left end for the specified string value.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def ltrim(e: Column): Column = Column.fn("ltrim", e)
-
-  /**
-   * Trim the specified character string from left end for the specified string column.
-   * @group string_funcs
-   * @since 2.3.0
-   */
-  def ltrim(e: Column, trimString: String): Column = Column.fn("ltrim", lit(trimString), e)
-
-  /**
-   * Calculates the byte length for the specified string column.
-   *
-   * @group string_funcs
-   * @since 3.3.0
-   */
-  def octet_length(e: Column): Column = Column.fn("octet_length", e)
-
-  /**
-   * Marks a given column with specified collation.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def collate(e: Column, collation: String): Column = Column.fn("collate", e, lit(collation))
-
-  /**
-   * Returns the collation name of a given column.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def collation(e: Column): Column = Column.fn("collation", e)
-
-  /**
-   * Returns true if `str` matches `regexp`, or false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def rlike(str: Column, regexp: Column): Column = Column.fn("rlike", str, regexp)
-
-  /**
-   * Returns true if `str` matches `regexp`, or false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def regexp(str: Column, regexp: Column): Column = Column.fn("regexp", str, regexp)
-
-  /**
-   * Returns true if `str` matches `regexp`, or false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def regexp_like(str: Column, regexp: Column): Column = Column.fn("regexp_like", str, regexp)
-
-  /**
-   * Returns a count of the number of times that the regular expression pattern `regexp`
-   * is matched in the string `str`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_count(str: Column, regexp: Column): Column = Column.fn("regexp_count", str, regexp)
-
-  /**
-   * Extract a specific group matched by a Java regex, from the specified string column.
-   * If the regex did not match, or the specified group did not match, an empty string is returned.
-   * if the specified group index exceeds the group count of regex, an IllegalArgumentException
-   * will be thrown.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def regexp_extract(e: Column, exp: String, groupIdx: Int): Column =
-    Column.fn("regexp_extract", e, lit(exp), lit(groupIdx))
-
-  /**
-   * Extract all strings in the `str` that match the `regexp` expression and
-   * corresponding to the first regex group index.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_extract_all(str: Column, regexp: Column): Column =
-    Column.fn("regexp_extract_all", str, regexp)
-
-  /**
-   * Extract all strings in the `str` that match the `regexp` expression and
-   * corresponding to the regex group index.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_extract_all(str: Column, regexp: Column, idx: Column): Column =
-    Column.fn("regexp_extract_all", str, regexp, idx)
-
-  /**
-   * Replace all substrings of the specified string value that match regexp with rep.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def regexp_replace(e: Column, pattern: String, replacement: String): Column =
-    regexp_replace(e, lit(pattern), lit(replacement))
-
-  /**
-   * Replace all substrings of the specified string value that match regexp with rep.
-   *
-   * @group string_funcs
-   * @since 2.1.0
-   */
-  def regexp_replace(e: Column, pattern: Column, replacement: Column): Column =
-    Column.fn("regexp_replace", e, pattern, replacement)
-
-  /**
-   * Returns the substring that matches the regular expression `regexp` within the string `str`.
-   * If the regular expression is not found, the result is null.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_substr(str: Column, regexp: Column): Column = Column.fn("regexp_substr", str, regexp)
-
-  /**
-   * Searches a string for a regular expression and returns an integer that indicates
-   * the beginning position of the matched substring. Positions are 1-based, not 0-based.
-   * If no match is found, returns 0.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_instr(str: Column, regexp: Column): Column = Column.fn("regexp_instr", str, regexp)
-
-  /**
-   * Searches a string for a regular expression and returns an integer that indicates
-   * the beginning position of the matched substring. Positions are 1-based, not 0-based.
-   * If no match is found, returns 0.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def regexp_instr(str: Column, regexp: Column, idx: Column): Column =
-    Column.fn("regexp_instr", str, regexp, idx)
-
-  /**
-   * Decodes a BASE64 encoded string column and returns it as a binary column.
-   * This is the reverse of base64.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def unbase64(e: Column): Column = Column.fn("unbase64", e)
-
-  /**
-   * Right-pad the string column with pad to a length of len. If the string column is longer
-   * than len, the return value is shortened to len characters.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def rpad(str: Column, len: Int, pad: String): Column =
-    Column.fn("rpad", str, lit(len), lit(pad))
-
-  /**
-   * Right-pad the binary column with pad to a byte length of len. If the binary column is longer
-   * than len, the return value is shortened to len bytes.
-   *
-   * @group string_funcs
-   * @since 3.3.0
-   */
-  def rpad(str: Column, len: Int, pad: Array[Byte]): Column =
-    Column.fn("rpad", str, lit(len), lit(pad))
-
-  /**
-   * Repeats a string column n times, and returns it as a new string column.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def repeat(str: Column, n: Int): Column = Column.fn("repeat", str, lit(n))
-
-  /**
-   * Repeats a string column n times, and returns it as a new string column.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def repeat(str: Column, n: Column): Column = Column.fn("repeat", str, n)
-
-  /**
-   * Trim the spaces from right end for the specified string value.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def rtrim(e: Column): Column = Column.fn("rtrim", e)
-
-  /**
-   * Trim the specified character string from right end for the specified string column.
-   * @group string_funcs
-   * @since 2.3.0
-   */
-  def rtrim(e: Column, trimString: String): Column = Column.fn("rtrim", lit(trimString), e)
-
-  /**
-   * Returns the soundex code for the specified expression.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def soundex(e: Column): Column = Column.fn("soundex", e)
-
-  /**
-   * Splits str around matches of the given pattern.
-   *
-   * @param str
-   *   a string expression to split
-   * @param pattern
-   *   a string representing a regular expression. The regex string should be a Java regular
-   *   expression.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def split(str: Column, pattern: String): Column = Column.fn("split", str, lit(pattern))
-
-  /**
-   * Splits str around matches of the given pattern.
-   *
-   * @param str
-   *   a string expression to split
-   * @param pattern
-   *   a column of string representing a regular expression. The regex string should be a Java
-   *   regular expression.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def split(str: Column, pattern: Column): Column = Column.fn("split", str, pattern)
-
-  /**
-   * Splits str around matches of the given pattern.
-   *
-   * @param str
-   *   a string expression to split
-   * @param pattern
-   *   a string representing a regular expression. The regex string should be a Java regular
-   *   expression.
-   * @param limit
-   *   an integer expression which controls the number of times the regex is applied. <ul>
-   *   <li>limit greater than 0: The resulting array's length will not be more than limit, and the
-   *   resulting array's last entry will contain all input beyond the last matched regex.</li>
-   *   <li>limit less than or equal to 0: `regex` will be applied as many times as possible, and
-   *   the resulting array can be of any size.</li> </ul>
-   *
-   * @group string_funcs
-   * @since 3.0.0
-   */
-  def split(str: Column, pattern: String, limit: Int): Column =
-    Column.fn("split", str, lit(pattern), lit(limit))
-
-  /**
-   * Splits str around matches of the given pattern.
-   *
-   * @param str
-   *   a string expression to split
-   * @param pattern
-   *   a column of string representing a regular expression. The regex string should be a Java
-   *   regular expression.
-   * @param limit
-   *   a column of integer expression which controls the number of times the regex is applied.
-   *   <ul> <li>limit greater than 0: The resulting array's length will not be more than limit,
-   *   and the resulting array's last entry will contain all input beyond the last matched
-   *   regex.</li> <li>limit less than or equal to 0: `regex` will be applied as many times as
-   *   possible, and the resulting array can be of any size.</li> </ul>
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def split(str: Column, pattern: Column, limit: Column): Column =
-    Column.fn("split", str, pattern, limit)
-
-  /**
-   * Substring starts at `pos` and is of length `len` when str is String type or
-   * returns the slice of byte array that starts at `pos` in byte and is of length `len`
-   * when str is Binary type
-   *
-   * @note The position is not zero based, but 1 based index.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def substring(str: Column, pos: Int, len: Int): Column =
-    Column.fn("substring", str, lit(pos), lit(len))
-
-  /**
-   * Substring starts at `pos` and is of length `len` when str is String type or
-   * returns the slice of byte array that starts at `pos` in byte and is of length `len`
-   * when str is Binary type
-   *
-   * @note The position is not zero based, but 1 based index.
-   *
-   * @group string_funcs
-   * @since 4.0.0
-   */
-  def substring(str: Column, pos: Column, len: Column): Column =
-    Column.fn("substring", str, pos, len)
-
-  /**
-   * Returns the substring from string str before count occurrences of the delimiter delim.
-   * If count is positive, everything the left of the final delimiter (counting from left) is
-   * returned. If count is negative, every to the right of the final delimiter (counting from the
-   * right) is returned. substring_index performs a case-sensitive match when searching for delim.
-   *
-   * @group string_funcs
-   */
-  def substring_index(str: Column, delim: String, count: Int): Column =
-    Column.fn("substring_index", str, lit(delim), lit(count))
-
-  /**
-   * Overlay the specified portion of `src` with `replace`,
-   *  starting from byte position `pos` of `src` and proceeding for `len` bytes.
-   *
-   * @group string_funcs
-   * @since 3.0.0
-   */
-  def overlay(src: Column, replace: Column, pos: Column, len: Column): Column =
-    Column.fn("overlay", src, replace, pos, len)
-
-  /**
-   * Overlay the specified portion of `src` with `replace`,
-   *  starting from byte position `pos` of `src`.
-   *
-   * @group string_funcs
-   * @since 3.0.0
-   */
-  def overlay(src: Column, replace: Column, pos: Column): Column =
-    Column.fn("overlay", src, replace, pos)
-
-  /**
-   * Splits a string into arrays of sentences, where each sentence is an array of words.
-   * @group string_funcs
-   * @since 3.2.0
-   */
-  def sentences(string: Column, language: Column, country: Column): Column =
-    Column.fn("sentences", string, language, country)
-
-  /**
-   * Splits a string into arrays of sentences, where each sentence is an array of words.
-   * The default locale is used.
-   * @group string_funcs
-   * @since 3.2.0
-   */
-  def sentences(string: Column): Column = Column.fn("sentences", string)
-
-  /**
-   * Translate any character in the src by a character in replaceString.
-   * The characters in replaceString correspond to the characters in matchingString.
-   * The translate will happen when any character in the string matches the character
-   * in the `matchingString`.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def translate(src: Column, matchingString: String, replaceString: String): Column =
-    Column.fn("translate", src, lit(matchingString), lit(replaceString))
-
-  /**
-   * Trim the spaces from both ends for the specified string column.
-   *
-   * @group string_funcs
-   * @since 1.5.0
-   */
-  def trim(e: Column): Column = Column.fn("trim", e)
-
-  /**
-   * Trim the specified character from both ends for the specified string column.
-   * @group string_funcs
-   * @since 2.3.0
-   */
-  def trim(e: Column, trimString: String): Column = Column.fn("trim", lit(trimString), e)
-
-  /**
-   * Converts a string column to upper case.
-   *
-   * @group string_funcs
-   * @since 1.3.0
-   */
-  def upper(e: Column): Column = Column.fn("upper", e)
-
-  /**
-   * Converts the input `e` to a binary value based on the supplied `format`.
-   * The `format` can be a case-insensitive string literal of "hex", "utf-8", "utf8", or "base64".
-   * By default, the binary format for conversion is "hex" if `format` is omitted.
-   * The function returns NULL if at least one of the input parameters is NULL.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def to_binary(e: Column, f: Column): Column = Column.fn("to_binary", e, f)
-
-  /**
-   * Converts the input `e` to a binary value based on the default format "hex".
-   * The function returns NULL if at least one of the input parameters is NULL.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def to_binary(e: Column): Column = Column.fn("to_binary", e)
-
-  // scalastyle:off line.size.limit
-  /**
-   * Convert `e` to a string based on the `format`.
-   * Throws an exception if the conversion fails. The format can consist of the following
-   * characters, case insensitive:
-   *   '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
-   *     string matches a sequence of digits in the input value, generating a result string of the
-   *     same length as the corresponding sequence in the format string. The result string is
-   *     left-padded with zeros if the 0/9 sequence comprises more digits than the matching part of
-   *     the decimal value, starts with 0, and is before the decimal point. Otherwise, it is
-   *     padded with spaces.
-   *   '.' or 'D': Specifies the position of the decimal point (optional, only allowed once).
-   *   ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be
-   *     a 0 or 9 to the left and right of each grouping separator.
-   *   '$': Specifies the location of the $ currency sign. This character may only be specified
-   *     once.
-   *   'S' or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at
-   *     the beginning or end of the format string). Note that 'S' prints '+' for positive values
-   *     but 'MI' prints a space.
-   *   'PR': Only allowed at the end of the format string; specifies that the result string will be
-   *     wrapped by angle brackets if the input value is negative.
-   *
-   *  If `e` is a datetime, `format` shall be a valid datetime pattern, see
-   *  <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>.
-   *  If `e` is a binary, it is converted to a string in one of the formats:
-   *     'base64': a base 64 string.
-   *     'hex': a string in the hexadecimal format.
-   *     'utf-8': the input binary is decoded to UTF-8 string.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  // scalastyle:on line.size.limit
-  def to_char(e: Column, format: Column): Column = Column.fn("to_char", e, format)
-
-  // scalastyle:off line.size.limit
-  /**
-   * Convert `e` to a string based on the `format`.
-   * Throws an exception if the conversion fails. The format can consist of the following
-   * characters, case insensitive:
-   *   '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
-   *     string matches a sequence of digits in the input value, generating a result string of the
-   *     same length as the corresponding sequence in the format string. The result string is
-   *     left-padded with zeros if the 0/9 sequence comprises more digits than the matching part of
-   *     the decimal value, starts with 0, and is before the decimal point. Otherwise, it is
-   *     padded with spaces.
-   *   '.' or 'D': Specifies the position of the decimal point (optional, only allowed once).
-   *   ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be
-   *     a 0 or 9 to the left and right of each grouping separator.
-   *   '$': Specifies the location of the $ currency sign. This character may only be specified
-   *     once.
-   *   'S' or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at
-   *     the beginning or end of the format string). Note that 'S' prints '+' for positive values
-   *     but 'MI' prints a space.
-   *   'PR': Only allowed at the end of the format string; specifies that the result string will be
-   *     wrapped by angle brackets if the input value is negative.
-   *
-   *  If `e` is a datetime, `format` shall be a valid datetime pattern, see
-   *  <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">Datetime Patterns</a>.
-   *  If `e` is a binary, it is converted to a string in one of the formats:
-   *     'base64': a base 64 string.
-   *     'hex': a string in the hexadecimal format.
-   *     'utf-8': the input binary is decoded to UTF-8 string.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  // scalastyle:on line.size.limit
-  def to_varchar(e: Column, format: Column): Column = Column.fn("to_varchar", e, format)
-
-  /**
-   * Convert string 'e' to a number based on the string format 'format'.
-   * Throws an exception if the conversion fails. The format can consist of the following
-   * characters, case insensitive:
-   *   '0' or '9': Specifies an expected digit between 0 and 9. A sequence of 0 or 9 in the format
-   *     string matches a sequence of digits in the input string. If the 0/9 sequence starts with
-   *     0 and is before the decimal point, it can only match a digit sequence of the same size.
-   *     Otherwise, if the sequence starts with 9 or is after the decimal point, it can match a
-   *     digit sequence that has the same or smaller size.
-   *   '.' or 'D': Specifies the position of the decimal point (optional, only allowed once).
-   *   ',' or 'G': Specifies the position of the grouping (thousands) separator (,). There must be
-   *     a 0 or 9 to the left and right of each grouping separator. 'expr' must match the
-   *     grouping separator relevant for the size of the number.
-   *   '$': Specifies the location of the $ currency sign. This character may only be specified
-   *     once.
-   *   'S' or 'MI': Specifies the position of a '-' or '+' sign (optional, only allowed once at
-   *     the beginning or end of the format string). Note that 'S' allows '-' but 'MI' does not.
-   *   'PR': Only allowed at the end of the format string; specifies that 'expr' indicates a
-   *     negative number with wrapping angled brackets.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def to_number(e: Column, format: Column): Column = Column.fn("to_number", e, format)
-
-  /**
-   * Replaces all occurrences of `search` with `replace`.
-   *
-   * @param src
-   *   A column of string to be replaced
-   * @param search
-   *   A column of string, If `search` is not found in `str`, `str` is returned unchanged.
-   * @param replace
-   *   A column of string, If `replace` is not specified or is an empty string, nothing replaces
-   *   the string that is removed from `str`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def replace(src: Column, search: Column, replace: Column): Column =
-    Column.fn("replace", src, search, replace)
-
-  /**
-   * Replaces all occurrences of `search` with `replace`.
-   *
-   * @param src
-   *   A column of string to be replaced
-   * @param search
-   *   A column of string, If `search` is not found in `src`, `src` is returned unchanged.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def replace(src: Column, search: Column): Column = Column.fn("replace", src, search)
-
-  /**
-   * Splits `str` by delimiter and return requested part of the split (1-based).
-   * If any input is null, returns null. if `partNum` is out of range of split parts,
-   * returns empty string. If `partNum` is 0, throws an error. If `partNum` is negative,
-   * the parts are counted backward from the end of the string.
-   * If the `delimiter` is an empty string, the `str` is not split.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def split_part(str: Column, delimiter: Column, partNum: Column): Column =
-    Column.fn("split_part", str, delimiter, partNum)
-
-  /**
-   * Returns the substring of `str` that starts at `pos` and is of length `len`,
-   * or the slice of byte array that starts at `pos` and is of length `len`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def substr(str: Column, pos: Column, len: Column): Column =
-    Column.fn("substr", str, pos, len)
-
-  /**
-   * Returns the substring of `str` that starts at `pos`,
-   * or the slice of byte array that starts at `pos`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def substr(str: Column, pos: Column): Column = Column.fn("substr", str, pos)
-
-  /**
-   * Extracts a part from a URL.
-   *
-   * @group url_funcs
-   * @since 3.5.0
-   */
-  def parse_url(url: Column, partToExtract: Column, key: Column): Column =
-    Column.fn("parse_url", url, partToExtract, key)
-
-  /**
-   * Extracts a part from a URL.
-   *
-   * @group url_funcs
-   * @since 3.5.0
-   */
-  def parse_url(url: Column, partToExtract: Column): Column =
-    Column.fn("parse_url", url, partToExtract)
-
-  /**
-   * Formats the arguments in printf-style and returns the result as a string column.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def printf(format: Column, arguments: Column*): Column =
-    Column.fn("printf", (format +: arguments): _*)
-
-  /**
-   * Decodes a `str` in 'application/x-www-form-urlencoded' format
-   * using a specific encoding scheme.
-   *
-   * @group url_funcs
-   * @since 3.5.0
-   */
-  def url_decode(str: Column): Column = Column.fn("url_decode", str)
-
-  /**
-   * This is a special version of `url_decode` that performs the same operation, but returns
-   * a NULL value instead of raising an error if the decoding cannot be performed.
-   *
-   * @group url_funcs
-   * @since 4.0.0
-   */
-  def try_url_decode(str: Column): Column = Column.fn("try_url_decode", str)
-
-  /**
-   * Translates a string into 'application/x-www-form-urlencoded' format
-   * using a specific encoding scheme.
-   *
-   * @group url_funcs
-   * @since 3.5.0
-   */
-  def url_encode(str: Column): Column = Column.fn("url_encode", str)
-
-  /**
-   * Returns the position of the first occurrence of `substr` in `str` after position `start`.
-   * The given `start` and return value are 1-based.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def position(substr: Column, str: Column, start: Column): Column =
-    Column.fn("position", substr, str, start)
-
-  /**
-   * Returns the position of the first occurrence of `substr` in `str` after position `1`.
-   * The return value are 1-based.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def position(substr: Column, str: Column): Column =
-    Column.fn("position", substr, str)
-
-  /**
-   * Returns a boolean. The value is True if str ends with suffix.
-   * Returns NULL if either input expression is NULL. Otherwise, returns False.
-   * Both str or suffix must be of STRING or BINARY type.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def endswith(str: Column, suffix: Column): Column =
-    Column.fn("endswith", str, suffix)
-
-  /**
-   * Returns a boolean. The value is True if str starts with prefix.
-   * Returns NULL if either input expression is NULL. Otherwise, returns False.
-   * Both str or prefix must be of STRING or BINARY type.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def startswith(str: Column, prefix: Column): Column =
-    Column.fn("startswith", str, prefix)
-
-  /**
-   * Returns the ASCII character having the binary equivalent to `n`.
-   * If n is larger than 256 the result is equivalent to char(n % 256)
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def char(n: Column): Column = Column.fn("char", n)
-
-  /**
-   * Removes the leading and trailing space characters from `str`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def btrim(str: Column): Column = Column.fn("btrim", str)
-
-  /**
-   * Remove the leading and trailing `trim` characters from `str`.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def btrim(str: Column, trim: Column): Column = Column.fn("btrim", str, trim)
-
-  /**
-   * This is a special version of `to_binary` that performs the same operation, but returns a NULL
-   * value instead of raising an error if the conversion cannot be performed.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def try_to_binary(e: Column, f: Column): Column = Column.fn("try_to_binary", e, f)
-
-  /**
-   * This is a special version of `to_binary` that performs the same operation, but returns a NULL
-   * value instead of raising an error if the conversion cannot be performed.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def try_to_binary(e: Column): Column = Column.fn("try_to_binary", e)
-
-  /**
-   * Convert string `e` to a number based on the string format `format`. Returns NULL if the
-   * string `e` does not match the expected format. The format follows the same semantics as the
-   * to_number function.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def try_to_number(e: Column, format: Column): Column = Column.fn("try_to_number", e, format)
-
-  /**
-   * Returns the character length of string data or number of bytes of binary data.
-   * The length of string data includes the trailing spaces.
-   * The length of binary data includes binary zeros.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def char_length(str: Column): Column = Column.fn("char_length", str)
-
-  /**
-   * Returns the character length of string data or number of bytes of binary data.
-   * The length of string data includes the trailing spaces.
-   * The length of binary data includes binary zeros.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def character_length(str: Column): Column = Column.fn("character_length", str)
-
-  /**
-   * Returns the ASCII character having the binary equivalent to `n`.
-   * If n is larger than 256 the result is equivalent to chr(n % 256)
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def chr(n: Column): Column = Column.fn("chr", n)
-
-  /**
-   * Returns a boolean. The value is True if right is found inside left.
-   * Returns NULL if either input expression is NULL. Otherwise, returns False.
-   * Both left or right must be of STRING or BINARY type.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def contains(left: Column, right: Column): Column = Column.fn("contains", left, right)
-
-  /**
-   * Returns the `n`-th input, e.g., returns `input2` when `n` is 2.
-   * The function returns NULL if the index exceeds the length of the array
-   * and `spark.sql.ansi.enabled` is set to false. If `spark.sql.ansi.enabled` is set to true,
-   * it throws ArrayIndexOutOfBoundsException for invalid indices.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  @scala.annotation.varargs
-  def elt(inputs: Column*): Column = Column.fn("elt", inputs: _*)
-
-  /**
-   * Returns the index (1-based) of the given string (`str`) in the comma-delimited
-   * list (`strArray`). Returns 0, if the string was not found or if the given string (`str`)
-   * contains a comma.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def find_in_set(str: Column, strArray: Column): Column = Column.fn("find_in_set", str, strArray)
-
-  /**
-   * Returns true if str matches `pattern` with `escapeChar`, null if any arguments are null,
-   * false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def like(str: Column, pattern: Column, escapeChar: Column): Column = withExpr {
-    escapeChar.expr match {
-      case StringLiteral(v) if v.length == 1 =>
-        Like(str.expr, pattern.expr, v.charAt(0))
-      case _ =>
-        throw QueryCompilationErrors.invalidEscapeChar(escapeChar.expr)
-    }
-  }
-
-  /**
-   * Returns true if str matches `pattern` with `escapeChar`('\'), null if any arguments are null,
-   * false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def like(str: Column, pattern: Column): Column = Column.fn("like", str, pattern)
-
-  /**
-   * Returns true if str matches `pattern` with `escapeChar` case-insensitively, null if any
-   * arguments are null, false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def ilike(str: Column, pattern: Column, escapeChar: Column): Column = withExpr {
-    escapeChar.expr match {
-      case StringLiteral(v) if v.length == 1 =>
-        ILike(str.expr, pattern.expr, v.charAt(0))
-      case _ =>
-        throw QueryCompilationErrors.invalidEscapeChar(escapeChar.expr)
-    }
-  }
-
-  /**
-   * Returns true if str matches `pattern` with `escapeChar`('\') case-insensitively, null if any
-   * arguments are null, false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def ilike(str: Column, pattern: Column): Column = Column.fn("ilike", str, pattern)
-
-  /**
-   * Returns `str` with all characters changed to lowercase.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def lcase(str: Column): Column = Column.fn("lcase", str)
-
-  /**
-   * Returns `str` with all characters changed to uppercase.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def ucase(str: Column): Column = Column.fn("ucase", str)
-
-  /**
-   * Returns the leftmost `len`(`len` can be string type) characters from the string `str`,
-   * if `len` is less or equal than 0 the result is an empty string.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def left(str: Column, len: Column): Column = Column.fn("left", str, len)
-
-  /**
-   * Returns the rightmost `len`(`len` can be string type) characters from the string `str`,
-   * if `len` is less or equal than 0 the result is an empty string.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def right(str: Column, len: Column): Column = Column.fn("right", str, len)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // DateTime functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns the date that is `numMonths` after `startDate`.
-   *
-   * @param startDate A date, timestamp or string. If a string, the data must be in a format that
-   *                  can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param numMonths The number of months to add to `startDate`, can be negative to subtract months
-   * @return A date, or null if `startDate` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def add_months(startDate: Column, numMonths: Int): Column = add_months(startDate, lit(numMonths))
-
-  /**
-   * Returns the date that is `numMonths` after `startDate`.
-   *
-   * @param startDate A date, timestamp or string. If a string, the data must be in a format that
-   *                  can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param numMonths A column of the number of months to add to `startDate`, can be negative to
-   *                  subtract months
-   * @return A date, or null if `startDate` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 3.0.0
-   */
-  def add_months(startDate: Column, numMonths: Column): Column =
-    Column.fn("add_months", startDate, numMonths)
-
-  /**
-   * Returns the current date at the start of query evaluation as a date column.
-   * All calls of current_date within the same query return the same value.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def curdate(): Column = Column.fn("curdate")
-
-  /**
-   * Returns the current date at the start of query evaluation as a date column.
-   * All calls of current_date within the same query return the same value.
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def current_date(): Column = Column.fn("current_date")
-
-  /**
-   * Returns the current session local timezone.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def current_timezone(): Column = Column.fn("current_timezone")
-
-  /**
-   * Returns the current timestamp at the start of query evaluation as a timestamp column.
-   * All calls of current_timestamp within the same query return the same value.
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def current_timestamp(): Column = Column.fn("current_timestamp")
-
-  /**
-   * Returns the current timestamp at the start of query evaluation.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def now(): Column = Column.fn("now")
-
-  /**
-   * Returns the current timestamp without time zone at the start of query evaluation
-   * as a timestamp without time zone column.
-   * All calls of localtimestamp within the same query return the same value.
-   *
-   * @group datetime_funcs
-   * @since 3.3.0
-   */
-  def localtimestamp(): Column = Column.fn("localtimestamp")
-
-  /**
-   * Converts a date/timestamp/string to a value of string in the format specified by the date
-   * format given by the second argument.
-   *
-   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
-   *   Datetime Patterns</a>
-   * for valid date and time format patterns
-   *
-   * @param dateExpr A date, timestamp or string. If a string, the data must be in a format that
-   *                 can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param format A pattern `dd.MM.yyyy` would return a string like `18.03.1993`
-   * @return A string, or null if `dateExpr` was a string that could not be cast to a timestamp
-   * @note Use specialized functions like [[year]] whenever possible as they benefit from a
-   * specialized implementation.
-   * @throws IllegalArgumentException if the `format` pattern is invalid
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def date_format(dateExpr: Column, format: String): Column =
-    Column.fn("date_format", dateExpr, lit(format))
-
-  /**
-   * Returns the date that is `days` days after `start`
-   *
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param days  The number of days to add to `start`, can be negative to subtract days
-   * @return A date, or null if `start` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def date_add(start: Column, days: Int): Column = date_add(start, lit(days))
-
-  /**
-   * Returns the date that is `days` days after `start`
-   *
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param days  A column of the number of days to add to `start`, can be negative to subtract days
-   * @return A date, or null if `start` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 3.0.0
-   */
-  def date_add(start: Column, days: Column): Column = Column.fn("date_add", start, days)
-
-  /**
-   * Returns the date that is `days` days after `start`
-   *
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param days  A column of the number of days to add to `start`, can be negative to subtract days
-   * @return A date, or null if `start` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def dateadd(start: Column, days: Column): Column = Column.fn("dateadd", start, days)
-
-  /**
-   * Returns the date that is `days` days before `start`
-   *
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param days  The number of days to subtract from `start`, can be negative to add days
-   * @return A date, or null if `start` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def date_sub(start: Column, days: Int): Column = date_sub(start, lit(days))
-
-  /**
-   * Returns the date that is `days` days before `start`
-   *
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param days  A column of the number of days to subtract from `start`, can be negative to add
-   *              days
-   * @return A date, or null if `start` was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 3.0.0
-   */
-  def date_sub(start: Column, days: Column): Column =
-    Column.fn("date_sub", start, days)
-
-  /**
-   * Returns the number of days from `start` to `end`.
-   *
-   * Only considers the date part of the input. For example:
-   * {{{
-   * dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
-   * // returns 1
-   * }}}
-   *
-   * @param end A date, timestamp or string. If a string, the data must be in a format that
-   *            can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return An integer, or null if either `end` or `start` were strings that could not be cast to
-   *         a date. Negative if `end` is before `start`
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def datediff(end: Column, start: Column): Column = Column.fn("datediff", end, start)
-
-  /**
-   * Returns the number of days from `start` to `end`.
-   *
-   * Only considers the date part of the input. For example:
-   * {{{
-   * dateddiff("2018-01-10 00:00:00", "2018-01-09 23:59:59")
-   * // returns 1
-   * }}}
-   *
-   * @param end A date, timestamp or string. If a string, the data must be in a format that
-   *            can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param start A date, timestamp or string. If a string, the data must be in a format that
-   *              can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return An integer, or null if either `end` or `start` were strings that could not be cast to
-   *         a date. Negative if `end` is before `start`
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def date_diff(end: Column, start: Column): Column = Column.fn("date_diff", end, start)
-
-  /**
-   * Create date from the number of `days` since 1970-01-01.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def date_from_unix_date(days: Column): Column = Column.fn("date_from_unix_date", days)
-
-  /**
-   * Extracts the year as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def year(e: Column): Column = Column.fn("year", e)
-
-  /**
-   * Extracts the quarter as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def quarter(e: Column): Column = Column.fn("quarter", e)
-
-  /**
-   * Extracts the month as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def month(e: Column): Column = Column.fn("month", e)
-
-  /**
-   * Extracts the day of the week as an integer from a given date/timestamp/string.
-   * Ranges from 1 for a Sunday through to 7 for a Saturday
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 2.3.0
-   */
-  def dayofweek(e: Column): Column = Column.fn("dayofweek", e)
-
-  /**
-   * Extracts the day of the month as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def dayofmonth(e: Column): Column = Column.fn("dayofmonth", e)
-
-  /**
-   * Extracts the day of the month as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def day(e: Column): Column = Column.fn("day", e)
-
-  /**
-   * Extracts the day of the year as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def dayofyear(e: Column): Column = Column.fn("dayofyear", e)
-
-  /**
-   * Extracts the hours as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def hour(e: Column): Column = Column.fn("hour", e)
-
-  /**
-   * Extracts a part of the date/timestamp or interval source.
-   *
-   * @param field selects which part of the source should be extracted.
-   * @param source a date/timestamp or interval column from where `field` should be extracted.
-   * @return a part of the date/timestamp or interval source
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def extract(field: Column, source: Column): Column = {
-    Column.fn("extract", field, source)
-  }
-
-  /**
-   * Extracts a part of the date/timestamp or interval source.
-   *
-   * @param field selects which part of the source should be extracted, and supported string values
-   *              are as same as the fields of the equivalent function `extract`.
-   * @param source a date/timestamp or interval column from where `field` should be extracted.
-   * @return a part of the date/timestamp or interval source
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def date_part(field: Column, source: Column): Column = {
-    Column.fn("date_part", field, source)
-  }
-
-  /**
-   * Extracts a part of the date/timestamp or interval source.
-   *
-   * @param field selects which part of the source should be extracted, and supported string values
-   *              are as same as the fields of the equivalent function `EXTRACT`.
-   * @param source a date/timestamp or interval column from where `field` should be extracted.
-   * @return a part of the date/timestamp or interval source
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def datepart(field: Column, source: Column): Column = {
-    Column.fn("datepart", field, source)
-  }
-
-  /**
-   * Returns the last day of the month which the given date belongs to.
-   * For example, input "2015-07-27" returns "2015-07-31" since July 31 is the last day of the
-   * month in July 2015.
-   *
-   * @param e A date, timestamp or string. If a string, the data must be in a format that can be
-   *          cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return A date, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def last_day(e: Column): Column = Column.fn("last_day", e)
-
-  /**
-   * Extracts the minutes as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def minute(e: Column): Column = Column.fn("minute", e)
-
-  /**
-   * Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, ..., 6 = Sunday).
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def weekday(e: Column): Column = Column.fn("weekday", e)
-
-  /**
-   * @return A date created from year, month and day fields.
-   * @group datetime_funcs
-   * @since 3.3.0
-   */
-  def make_date(year: Column, month: Column, day: Column): Column =
-    Column.fn("make_date", year, month, day)
-
-  /**
-   * Returns number of months between dates `start` and `end`.
-   *
-   * A whole number is returned if both inputs have the same day of month or both are the last day
-   * of their respective months. Otherwise, the difference is calculated assuming 31 days per month.
-   *
-   * For example:
-   * {{{
-   * months_between("2017-11-14", "2017-07-14")  // returns 4.0
-   * months_between("2017-01-01", "2017-01-10")  // returns 0.29032258
-   * months_between("2017-06-01", "2017-06-16 12:00:00")  // returns -0.5
-   * }}}
-   *
-   * @param end   A date, timestamp or string. If a string, the data must be in a format that can
-   *              be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param start A date, timestamp or string. If a string, the data must be in a format that can
-   *              cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return A double, or null if either `end` or `start` were strings that could not be cast to a
-   *         timestamp. Negative if `end` is before `start`
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def months_between(end: Column, start: Column): Column =
-    Column.fn("months_between", end, start)
-
-  /**
-   * Returns number of months between dates `end` and `start`. If `roundOff` is set to true, the
-   * result is rounded off to 8 digits; it is not rounded otherwise.
-   * @group datetime_funcs
-   * @since 2.4.0
-   */
-  def months_between(end: Column, start: Column, roundOff: Boolean): Column =
-    Column.fn("months_between", end, start, lit(roundOff))
-
-  /**
-   * Returns the first date which is later than the value of the `date` column that is on the
-   * specified day of the week.
-   *
-   * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
-   * Sunday after 2015-07-27.
-   *
-   * @param date      A date, timestamp or string. If a string, the data must be in a format that
-   *                  can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param dayOfWeek Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"
-   * @return A date, or null if `date` was a string that could not be cast to a date or if
-   *         `dayOfWeek` was an invalid value
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def next_day(date: Column, dayOfWeek: String): Column = next_day(date, lit(dayOfWeek))
-
-  /**
-   * Returns the first date which is later than the value of the `date` column that is on the
-   * specified day of the week.
-   *
-   * For example, `next_day('2015-07-27', "Sunday")` returns 2015-08-02 because that is the first
-   * Sunday after 2015-07-27.
-   *
-   * @param date      A date, timestamp or string. If a string, the data must be in a format that
-   *                  can be cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param dayOfWeek A column of the day of week. Case insensitive, and accepts: "Mon", "Tue",
-   *                  "Wed", "Thu", "Fri", "Sat", "Sun"
-   * @return A date, or null if `date` was a string that could not be cast to a date or if
-   *         `dayOfWeek` was an invalid value
-   * @group datetime_funcs
-   * @since 3.2.0
-   */
-  def next_day(date: Column, dayOfWeek: Column): Column =
-    Column.fn("next_day", date, dayOfWeek)
-
-  /**
-   * Extracts the seconds as an integer from a given date/timestamp/string.
-   * @return An integer, or null if the input was a string that could not be cast to a timestamp
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def second(e: Column): Column = Column.fn("second", e)
-
-  /**
-   * Extracts the week number as an integer from a given date/timestamp/string.
-   *
-   * A week is considered to start on a Monday and week 1 is the first week with more than 3 days,
-   * as defined by ISO 8601
-   *
-   * @return An integer, or null if the input was a string that could not be cast to a date
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def weekofyear(e: Column): Column = Column.fn("weekofyear", e)
-
-  /**
-   * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
-   * representing the timestamp of that moment in the current system time zone in the
-   * yyyy-MM-dd HH:mm:ss format.
-   *
-   * @param ut A number of a type that is castable to a long, such as string or integer. Can be
-   *           negative for timestamps before the unix epoch
-   * @return A string, or null if the input was a string that could not be cast to a long
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def from_unixtime(ut: Column): Column = Column.fn("from_unixtime", ut)
-
-  /**
-   * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
-   * representing the timestamp of that moment in the current system time zone in the given
-   * format.
-   *
-   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
-   *   Datetime Patterns</a>
-   * for valid date and time format patterns
-   *
-   * @param ut A number of a type that is castable to a long, such as string or integer. Can be
-   *           negative for timestamps before the unix epoch
-   * @param f  A date time pattern that the input will be formatted to
-   * @return A string, or null if `ut` was a string that could not be cast to a long or `f` was
-   *         an invalid date time pattern
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def from_unixtime(ut: Column, f: String): Column =
-    Column.fn("from_unixtime", ut, lit(f))
-
-  /**
-   * Returns the current Unix timestamp (in seconds) as a long.
-   *
-   * @note All calls of `unix_timestamp` within the same query return the same value
-   * (i.e. the current timestamp is calculated at the start of query evaluation).
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def unix_timestamp(): Column = unix_timestamp(current_timestamp())
-
-  /**
-   * Converts time string in format yyyy-MM-dd HH:mm:ss to Unix timestamp (in seconds),
-   * using the default timezone and the default locale.
-   *
-   * @param s A date, timestamp or string. If a string, the data must be in the
-   *          `yyyy-MM-dd HH:mm:ss` format
-   * @return A long, or null if the input was a string not of the correct format
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def unix_timestamp(s: Column): Column = Column.fn("unix_timestamp", s)
-
-  /**
-   * Converts time string with given pattern to Unix timestamp (in seconds).
-   *
-   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
-   *   Datetime Patterns</a>
-   * for valid date and time format patterns
-   *
-   * @param s A date, timestamp or string. If a string, the data must be in a format that can be
-   *          cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param p A date time pattern detailing the format of `s` when `s` is a string
-   * @return A long, or null if `s` was a string that could not be cast to a date or `p` was
-   *         an invalid format
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def unix_timestamp(s: Column, p: String): Column =
-    Column.fn("unix_timestamp", s, lit(p))
-
-  /**
-   * Converts to a timestamp by casting rules to `TimestampType`.
-   *
-   * @param s A date, timestamp or string. If a string, the data must be in a format that can be
-   *          cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return A timestamp, or null if the input was a string that could not be cast to a timestamp
-   * @group datetime_funcs
-   * @since 2.2.0
-   */
-  def to_timestamp(s: Column): Column = Column.fn("to_timestamp", s)
-
-  /**
-   * Converts time string with the given pattern to timestamp.
-   *
-   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
-   *   Datetime Patterns</a>
-   * for valid date and time format patterns
-   *
-   * @param s   A date, timestamp or string. If a string, the data must be in a format that can be
-   *            cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param fmt A date time pattern detailing the format of `s` when `s` is a string
-   * @return A timestamp, or null if `s` was a string that could not be cast to a timestamp or
-   *         `fmt` was an invalid format
-   * @group datetime_funcs
-   * @since 2.2.0
-   */
-  def to_timestamp(s: Column, fmt: String): Column = Column.fn("to_timestamp", s, lit(fmt))
-
-  /**
-   * Parses the `s` with the `format` to a timestamp. The function always returns null on an
-   * invalid input with`/`without ANSI SQL mode enabled. The result data type is consistent with
-   * the value of configuration `spark.sql.timestampType`.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def try_to_timestamp(s: Column, format: Column): Column =
-    Column.fn("try_to_timestamp", s, format)
-
-  /**
-   * Parses the `s` to a timestamp. The function always returns null on an invalid
-   * input with`/`without ANSI SQL mode enabled. It follows casting rules to a timestamp. The
-   * result data type is consistent with the value of configuration `spark.sql.timestampType`.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def try_to_timestamp(s: Column): Column = Column.fn("try_to_timestamp", s)
-
-  /**
-   * Converts the column into `DateType` by casting rules to `DateType`.
-   *
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def to_date(e: Column): Column = Column.fn("to_date", e)
-
-  /**
-   * Converts the column into a `DateType` with a specified format
-   *
-   * See <a href="https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html">
-   *   Datetime Patterns</a>
-   * for valid date and time format patterns
-   *
-   * @param e   A date, timestamp or string. If a string, the data must be in a format that can be
-   *            cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param fmt A date time pattern detailing the format of `e` when `e`is a string
-   * @return A date, or null if `e` was a string that could not be cast to a date or `fmt` was an
-   *         invalid format
-   * @group datetime_funcs
-   * @since 2.2.0
-   */
-  def to_date(e: Column, fmt: String): Column = Column.fn("to_date", e, lit(fmt))
-
-  /**
-   * Returns the number of days since 1970-01-01.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def unix_date(e: Column): Column = Column.fn("unix_date", e)
-
-  /**
-   * Returns the number of microseconds since 1970-01-01 00:00:00 UTC.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def unix_micros(e: Column): Column = Column.fn("unix_micros", e)
-
-  /**
-   * Returns the number of milliseconds since 1970-01-01 00:00:00 UTC.
-   * Truncates higher levels of precision.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def unix_millis(e: Column): Column = Column.fn("unix_millis", e)
-
-  /**
-   * Returns the number of seconds since 1970-01-01 00:00:00 UTC.
-   * Truncates higher levels of precision.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def unix_seconds(e: Column): Column = Column.fn("unix_seconds", e)
-
-  /**
-   * Returns date truncated to the unit specified by the format.
-   *
-   * For example, `trunc("2018-11-19 12:01:19", "year")` returns 2018-01-01
-   *
-   * @param date A date, timestamp or string. If a string, the data must be in a format that can be
-   *             cast to a date, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param format: 'year', 'yyyy', 'yy' to truncate by year,
-   *               or 'month', 'mon', 'mm' to truncate by month
-   *               Other options are: 'week', 'quarter'
-   *
-   * @return A date, or null if `date` was a string that could not be cast to a date or `format`
-   *         was an invalid value
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def trunc(date: Column, format: String): Column = Column.fn("trunc", date, lit(format))
-
-  /**
-   * Returns timestamp truncated to the unit specified by the format.
-   *
-   * For example, `date_trunc("year", "2018-11-19 12:01:19")` returns 2018-01-01 00:00:00
-   *
-   * @param format: 'year', 'yyyy', 'yy' to truncate by year,
-   *                'month', 'mon', 'mm' to truncate by month,
-   *                'day', 'dd' to truncate by day,
-   *                Other options are:
-   *                'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter'
-   * @param timestamp A date, timestamp or string. If a string, the data must be in a format that
-   *                  can be cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @return A timestamp, or null if `timestamp` was a string that could not be cast to a timestamp
-   *         or `format` was an invalid value
-   * @group datetime_funcs
-   * @since 2.3.0
-   */
-  def date_trunc(format: String, timestamp: Column): Column =
-    Column.fn("date_trunc", lit(format), timestamp)
-
-  /**
-   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
-   * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield
-   * '2017-07-14 03:40:00.0'.
-   *
-   * @param ts A date, timestamp or string. If a string, the data must be in a format that can be
-   *           cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param tz A string detailing the time zone ID that the input should be adjusted to. It should
-   *           be in the format of either region-based zone IDs or zone offsets. Region IDs must
-   *           have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
-   *           the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
-   *           supported as aliases of '+00:00'. Other short names are not recommended to use
-   *           because they can be ambiguous.
-   * @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
-   *         `tz` was an invalid value
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def from_utc_timestamp(ts: Column, tz: String): Column = from_utc_timestamp(ts, lit(tz))
-
-  /**
-   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders
-   * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield
-   * '2017-07-14 03:40:00.0'.
-   * @group datetime_funcs
-   * @since 2.4.0
-   */
-  def from_utc_timestamp(ts: Column, tz: Column): Column =
-    Column.fn("from_utc_timestamp", ts, tz)
-
-  /**
-   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
-   * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
-   * '2017-07-14 01:40:00.0'.
-   *
-   * @param ts A date, timestamp or string. If a string, the data must be in a format that can be
-   *           cast to a timestamp, such as `yyyy-MM-dd` or `yyyy-MM-dd HH:mm:ss.SSSS`
-   * @param tz A string detailing the time zone ID that the input should be adjusted to. It should
-   *           be in the format of either region-based zone IDs or zone offsets. Region IDs must
-   *           have the form 'area/city', such as 'America/Los_Angeles'. Zone offsets must be in
-   *           the format '(+|-)HH:mm', for example '-08:00' or '+01:00'. Also 'UTC' and 'Z' are
-   *           supported as aliases of '+00:00'. Other short names are not recommended to use
-   *           because they can be ambiguous.
-   * @return A timestamp, or null if `ts` was a string that could not be cast to a timestamp or
-   *         `tz` was an invalid value
-   * @group datetime_funcs
-   * @since 1.5.0
-   */
-  def to_utc_timestamp(ts: Column, tz: String): Column = to_utc_timestamp(ts, lit(tz))
-
-  /**
-   * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time
-   * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield
-   * '2017-07-14 01:40:00.0'.
-   * @group datetime_funcs
-   * @since 2.4.0
-   */
-  def to_utc_timestamp(ts: Column, tz: Column): Column = Column.fn("to_utc_timestamp", ts, tz)
-
-  /**
-   * Bucketize rows into one or more time windows given a timestamp specifying column. Window
-   * starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
-   * [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
-   * the order of months are not supported. The following example takes the average stock price for
-   * a one minute window every 10 seconds starting 5 seconds after the hour:
-   *
-   * {{{
-   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
-   *   df.groupBy(window($"timestamp", "1 minute", "10 seconds", "5 seconds"), $"stockId")
-   *     .agg(mean("price"))
-   * }}}
-   *
-   * The windows will look like:
-   *
-   * {{{
-   *   09:00:05-09:01:05
-   *   09:00:15-09:01:15
-   *   09:00:25-09:01:25 ...
-   * }}}
-   *
-   * For a streaming query, you may use the function `current_timestamp` to generate windows on
-   * processing time.
-   *
-   * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType or TimestampNTZType.
-   * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
-   *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
-   *                       valid duration identifiers. Note that the duration is a fixed length of
-   *                       time, and does not vary over time according to a calendar. For example,
-   *                       `1 day` always means 86,400,000 milliseconds, not a calendar day.
-   * @param slideDuration A string specifying the sliding interval of the window, e.g. `1 minute`.
-   *                      A new window will be generated every `slideDuration`. Must be less than
-   *                      or equal to the `windowDuration`. Check
-   *                      `org.apache.spark.unsafe.types.CalendarInterval` for valid duration
-   *                      identifiers. This duration is likewise absolute, and does not vary
-   *                      according to a calendar.
-   * @param startTime The offset with respect to 1970-01-01 00:00:00 UTC with which to start
-   *                  window intervals. For example, in order to have hourly tumbling windows that
-   *                  start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide
-   *                  `startTime` as `15 minutes`.
-   *
-   * @group datetime_funcs
-   * @since 2.0.0
-   */
-  def window(
-      timeColumn: Column,
-      windowDuration: String,
-      slideDuration: String,
-      startTime: String): Column =
-    Column.fn("window", timeColumn, lit(windowDuration), lit(slideDuration), lit(startTime))
-
-  /**
-   * Bucketize rows into one or more time windows given a timestamp specifying column. Window
-   * starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
-   * [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
-   * the order of months are not supported. The windows start beginning at 1970-01-01 00:00:00 UTC.
-   * The following example takes the average stock price for a one minute window every 10 seconds:
-   *
-   * {{{
-   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
-   *   df.groupBy(window($"timestamp", "1 minute", "10 seconds"), $"stockId")
-   *     .agg(mean("price"))
-   * }}}
-   *
-   * The windows will look like:
-   *
-   * {{{
-   *   09:00:00-09:01:00
-   *   09:00:10-09:01:10
-   *   09:00:20-09:01:20 ...
-   * }}}
-   *
-   * For a streaming query, you may use the function `current_timestamp` to generate windows on
-   * processing time.
-   *
-   * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType or TimestampNTZType.
-   * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
-   *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
-   *                       valid duration identifiers. Note that the duration is a fixed length of
-   *                       time, and does not vary over time according to a calendar. For example,
-   *                       `1 day` always means 86,400,000 milliseconds, not a calendar day.
-   * @param slideDuration A string specifying the sliding interval of the window, e.g. `1 minute`.
-   *                      A new window will be generated every `slideDuration`. Must be less than
-   *                      or equal to the `windowDuration`. Check
-   *                      `org.apache.spark.unsafe.types.CalendarInterval` for valid duration
-   *                      identifiers. This duration is likewise absolute, and does not vary
-   *                      according to a calendar.
-   *
-   * @group datetime_funcs
-   * @since 2.0.0
-   */
-  def window(timeColumn: Column, windowDuration: String, slideDuration: String): Column = {
-    window(timeColumn, windowDuration, slideDuration, "0 second")
-  }
-
-  /**
-   * Generates tumbling time windows given a timestamp specifying column. Window
-   * starts are inclusive but the window ends are exclusive, e.g. 12:05 will be in the window
-   * [12:05,12:10) but not in [12:00,12:05). Windows can support microsecond precision. Windows in
-   * the order of months are not supported. The windows start beginning at 1970-01-01 00:00:00 UTC.
-   * The following example takes the average stock price for a one minute tumbling window:
-   *
-   * {{{
-   *   val df = ... // schema => timestamp: TimestampType, stockId: StringType, price: DoubleType
-   *   df.groupBy(window($"timestamp", "1 minute"), $"stockId")
-   *     .agg(mean("price"))
-   * }}}
-   *
-   * The windows will look like:
-   *
-   * {{{
-   *   09:00:00-09:01:00
-   *   09:01:00-09:02:00
-   *   09:02:00-09:03:00 ...
-   * }}}
-   *
-   * For a streaming query, you may use the function `current_timestamp` to generate windows on
-   * processing time.
-   *
-   * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType or TimestampNTZType.
-   * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`,
-   *                       `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
-   *                       valid duration identifiers.
-   *
-   * @group datetime_funcs
-   * @since 2.0.0
-   */
-  def window(timeColumn: Column, windowDuration: String): Column = {
-    window(timeColumn, windowDuration, windowDuration, "0 second")
-  }
-
-  /**
-   * Extracts the event time from the window column.
-   *
-   * The window column is of StructType { start: Timestamp, end: Timestamp } where start is
-   * inclusive and end is exclusive. Since event time can support microsecond precision,
-   * window_time(window) = window.end - 1 microsecond.
-   *
-   * @param windowColumn The window column (typically produced by window aggregation) of type
-   *                     StructType { start: Timestamp, end: Timestamp }
-   *
-   * @group datetime_funcs
-   * @since 3.4.0
-   */
-  def window_time(windowColumn: Column): Column = Column.fn("window_time", windowColumn)
-
-  /**
-   * Generates session window given a timestamp specifying column.
-   *
-   * Session window is one of dynamic windows, which means the length of window is varying
-   * according to the given inputs. The length of session window is defined as "the timestamp
-   * of latest input of the session + gap duration", so when the new inputs are bound to the
-   * current session window, the end time of session window can be expanded according to the new
-   * inputs.
-   *
-   * Windows can support microsecond precision. gapDuration in the order of months are not
-   * supported.
-   *
-   * For a streaming query, you may use the function `current_timestamp` to generate windows on
-   * processing time.
-   *
-   * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType or TimestampNTZType.
-   * @param gapDuration A string specifying the timeout of the session, e.g. `10 minutes`,
-   *                    `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for
-   *                    valid duration identifiers.
-   *
-   * @group datetime_funcs
-   * @since 3.2.0
-   */
-  def session_window(timeColumn: Column, gapDuration: String): Column = {
-    withExpr {
-      SessionWindow(timeColumn.expr, gapDuration)
-    }.as("session_window")
-  }
-
-  /**
-   * Generates session window given a timestamp specifying column.
-   *
-   * Session window is one of dynamic windows, which means the length of window is varying
-   * according to the given inputs. For static gap duration, the length of session window
-   * is defined as "the timestamp of latest input of the session + gap duration", so when
-   * the new inputs are bound to the current session window, the end time of session window
-   * can be expanded according to the new inputs.
-   *
-   * Besides a static gap duration value, users can also provide an expression to specify
-   * gap duration dynamically based on the input row. With dynamic gap duration, the closing
-   * of a session window does not depend on the latest input anymore. A session window's range
-   * is the union of all events' ranges which are determined by event start time and evaluated
-   * gap duration during the query execution. Note that the rows with negative or zero gap
-   * duration will be filtered out from the aggregation.
-   *
-   * Windows can support microsecond precision. gapDuration in the order of months are not
-   * supported.
-   *
-   * For a streaming query, you may use the function `current_timestamp` to generate windows on
-   * processing time.
-   *
-   * @param timeColumn The column or the expression to use as the timestamp for windowing by time.
-   *                   The time column must be of TimestampType or TimestampNTZType.
-   * @param gapDuration A column specifying the timeout of the session. It could be static value,
-   *                    e.g. `10 minutes`, `1 second`, or an expression/UDF that specifies gap
-   *                    duration dynamically based on the input row.
-   *
-   * @group datetime_funcs
-   * @since 3.2.0
-   */
-  def session_window(timeColumn: Column, gapDuration: Column): Column =
-    Column.fn("session_window", timeColumn, gapDuration).as("session_window")
-
-  /**
-   * Converts the number of seconds from the Unix epoch (1970-01-01T00:00:00Z)
-   * to a timestamp.
-   * @group datetime_funcs
-   * @since 3.1.0
-   */
-  def timestamp_seconds(e: Column): Column = Column.fn("timestamp_seconds", e)
-
-  /**
-   * Creates timestamp from the number of milliseconds since UTC epoch.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def timestamp_millis(e: Column): Column = Column.fn("timestamp_millis", e)
-
-  /**
-   * Creates timestamp from the number of microseconds since UTC epoch.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def timestamp_micros(e: Column): Column = Column.fn("timestamp_micros", e)
-
-  /**
-   * Gets the difference between the timestamps in the specified units by truncating
-   * the fraction part.
-   *
-   * @group datetime_funcs
-   * @since 4.0.0
-   */
-  def timestamp_diff(unit: String, start: Column, end: Column): Column = withExpr {
-    TimestampDiff(unit, start.expr, end.expr)
-  }
-
-  /**
-   * Adds the specified number of units to the given timestamp.
-   *
-   * @group datetime_funcs
-   * @since 4.0.0
-   */
-  def timestamp_add(unit: String, quantity: Column, ts: Column): Column = withExpr {
-    TimestampAdd(unit, quantity.expr, ts.expr)
-  }
-
-  /**
-   * Parses the `timestamp` expression with the `format` expression
-   * to a timestamp without time zone. Returns null with invalid input.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_timestamp_ltz(timestamp: Column, format: Column): Column =
-    Column.fn("to_timestamp_ltz", timestamp, format)
-
-  /**
-   * Parses the `timestamp` expression with the default format to a timestamp without time zone.
-   * The default format follows casting rules to a timestamp. Returns null with invalid input.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_timestamp_ltz(timestamp: Column): Column =
-    Column.fn("to_timestamp_ltz", timestamp)
-
-  /**
-   * Parses the `timestamp_str` expression with the `format` expression
-   * to a timestamp without time zone. Returns null with invalid input.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_timestamp_ntz(timestamp: Column, format: Column): Column =
-    Column.fn("to_timestamp_ntz", timestamp, format)
-
-  /**
-   * Parses the `timestamp` expression with the default format to a timestamp without time zone.
-   * The default format follows casting rules to a timestamp. Returns null with invalid input.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_timestamp_ntz(timestamp: Column): Column =
-    Column.fn("to_timestamp_ntz", timestamp)
-
-  /**
-   * Returns the UNIX timestamp of the given time.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_unix_timestamp(timeExp: Column, format: Column): Column =
-    Column.fn("to_unix_timestamp", timeExp, format)
-
-  /**
-   * Returns the UNIX timestamp of the given time.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def to_unix_timestamp(timeExp: Column): Column =
-    Column.fn("to_unix_timestamp", timeExp)
-
-  /**
-   * Extracts the three-letter abbreviated month name from a given date/timestamp/string.
-   *
-   * @group datetime_funcs
-   * @since 4.0.0
-   */
-  def monthname(timeExp: Column): Column =
-    Column.fn("monthname", timeExp)
-
-  /**
-   * Extracts the three-letter abbreviated day name from a given date/timestamp/string.
-   *
-   * @group datetime_funcs
-   * @since 4.0.0
-   */
-  def dayname(timeExp: Column): Column =
-    Column.fn("dayname", timeExp)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Collection functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns null if the array is null, true if the array contains `value`, and false otherwise.
-   * @group array_funcs
-   * @since 1.5.0
-   */
-  def array_contains(column: Column, value: Any): Column =
-    Column.fn("array_contains", column, lit(value))
-
-  /**
-   * Returns an ARRAY containing all elements from the source ARRAY as well as the new element.
-   * The new element/column is located at end of the ARRAY.
-   *
-   * @group array_funcs
-   * @since 3.4.0
-   */
-  def array_append(column: Column, element: Any): Column =
-    Column.fn("array_append", column, lit(element))
-
-  /**
-   * Returns `true` if `a1` and `a2` have at least one non-null element in common. If not and both
-   * the arrays are non-empty and any of them contains a `null`, it returns `null`. It returns
-   * `false` otherwise.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def arrays_overlap(a1: Column, a2: Column): Column = Column.fn("arrays_overlap", a1, a2)
-
-  /**
-   * Returns an array containing all the elements in `x` from index `start` (or starting from the
-   * end if `start` is negative) with the specified `length`.
-   *
-   * @param x the array column to be sliced
-   * @param start the starting index
-   * @param length the length of the slice
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def slice(x: Column, start: Int, length: Int): Column =
-    slice(x, lit(start), lit(length))
-
-  /**
-   * Returns an array containing all the elements in `x` from index `start` (or starting from the
-   * end if `start` is negative) with the specified `length`.
-   *
-   * @param x the array column to be sliced
-   * @param start the starting index
-   * @param length the length of the slice
-   *
-   * @group array_funcs
-   * @since 3.1.0
-   */
-  def slice(x: Column, start: Column, length: Column): Column =
-    Column.fn("slice", x, start, length)
-
-  /**
-   * Concatenates the elements of `column` using the `delimiter`. Null values are replaced with
-   * `nullReplacement`.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_join(column: Column, delimiter: String, nullReplacement: String): Column =
-    Column.fn("array_join", column, lit(delimiter), lit(nullReplacement))
-
-  /**
-   * Concatenates the elements of `column` using the `delimiter`.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_join(column: Column, delimiter: String): Column =
-    Column.fn("array_join", column, lit(delimiter))
-
-  /**
-   * Concatenates multiple input columns together into a single column.
-   * The function works with strings, binary and compatible array columns.
-   *
-   * @note Returns null if any of the input columns are null.
-   *
-   * @group collection_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  def concat(exprs: Column*): Column = Column.fn("concat", exprs: _*)
-
-  /**
-   * Locates the position of the first occurrence of the value in the given array as long.
-   * Returns null if either of the arguments are null.
-   *
-   * @note The position is not zero based, but 1 based index. Returns 0 if value
-   * could not be found in array.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_position(column: Column, value: Any): Column =
-    Column.fn("array_position", column, lit(value))
-
-  /**
-   * Returns element of array at given index in value if column is array. Returns value for
-   * the given key in value if column is map.
-   *
-   * @group collection_funcs
-   * @since 2.4.0
-   */
-  def element_at(column: Column, value: Any): Column = Column.fn("element_at", column, lit(value))
-
-  /**
-   * (array, index) - Returns element of array at given (1-based) index. If Index is 0, Spark will
-   * throw an error. If index &lt; 0, accesses elements from the last to the first. The function
-   * always returns NULL if the index exceeds the length of the array.
-   *
-   * (map, key) - Returns value for given key. The function always returns NULL if the key is not
-   * contained in the map.
-   *
-   * @group collection_funcs
-   * @since 3.5.0
-   */
-  def try_element_at(column: Column, value: Column): Column =
-    Column.fn("try_element_at", column, value)
-
-  /**
-   * Returns element of array at given (0-based) index. If the index points
-   * outside of the array boundaries, then this function returns NULL.
-   *
-   * @group array_funcs
-   * @since 3.4.0
-   */
-  def get(column: Column, index: Column): Column = Column.fn("get", column, index)
-
-  /**
-   * Sorts the input array in ascending order. The elements of the input array must be orderable.
-   * NaN is greater than any non-NaN elements for double/float type.
-   * Null elements will be placed at the end of the returned array.
-   *
-   * @group collection_funcs
-   * @since 2.4.0
-   */
-  def array_sort(e: Column): Column = Column.fn("array_sort", e)
-
-  /**
-   * Sorts the input array based on the given comparator function. The comparator will take two
-   * arguments representing two elements of the array. It returns a negative integer, 0, or a
-   * positive integer as the first element is less than, equal to, or greater than the second
-   * element. If the comparator function returns null, the function will fail and raise an error.
-   *
-   * @group collection_funcs
-   * @since 3.4.0
-   */
-  def array_sort(e: Column, comparator: (Column, Column) => Column): Column =
-    Column.fn("array_sort", e, createLambda(comparator))
-
-  /**
-   * Remove all elements that equal to element from the given array.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_remove(column: Column, element: Any): Column =
-    Column.fn("array_remove", column, lit(element))
-
-  /**
-   * Remove all null elements from the given array.
-   *
-   * @group array_funcs
-   * @since 3.4.0
-   */
-  def array_compact(column: Column): Column = Column.fn("array_compact", column)
-
-  /**
-   * Returns an array containing value as well as all elements from array. The new element is
-   * positioned at the beginning of the array.
-   *
-   * @group array_funcs
-   * @since 3.5.0
-   */
-  def array_prepend(column: Column, element: Any): Column =
-    Column.fn("array_prepend", column, lit(element))
-
-  /**
-   * Removes duplicate values from the array.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_distinct(e: Column): Column = Column.fn("array_distinct", e)
-
-  /**
-   * Returns an array of the elements in the intersection of the given two arrays,
-   * without duplicates.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_intersect(col1: Column, col2: Column): Column =
-    Column.fn("array_intersect", col1, col2)
-
-  /**
-   * Adds an item into a given array at a specified position
-   *
-   * @group array_funcs
-   * @since 3.4.0
-   */
-  def array_insert(arr: Column, pos: Column, value: Column): Column =
-    Column.fn("array_insert", arr, pos, value)
-
-  /**
-   * Returns an array of the elements in the union of the given two arrays, without duplicates.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_union(col1: Column, col2: Column): Column =
-    Column.fn("array_union", col1, col2)
-
-  /**
-   * Returns an array of the elements in the first array but not in the second array,
-   * without duplicates. The order of elements in the result is not determined
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_except(col1: Column, col2: Column): Column =
-    Column.fn("array_except", col1, col2)
-
-  private def createLambda(f: Column => Column) = withOrigin {
-    Column {
-      val x = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("x")))
-      val function = f(Column(x)).expr
-      LambdaFunction(function, Seq(x))
-    }
-  }
-
-  private def createLambda(f: (Column, Column) => Column) = withOrigin {
-    Column {
-      val x = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("x")))
-      val y = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("y")))
-      val function = f(Column(x), Column(y)).expr
-      LambdaFunction(function, Seq(x, y))
-    }
-  }
-
-  private def createLambda(f: (Column, Column, Column) => Column) = withOrigin {
-    Column {
-      val x = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("x")))
-      val y = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("y")))
-      val z = UnresolvedNamedLambdaVariable(Seq(UnresolvedNamedLambdaVariable.freshVarName("z")))
-      val function = f(Column(x), Column(y), Column(z)).expr
-      LambdaFunction(function, Seq(x, y, z))
-    }
-  }
-
-  /**
-   * Returns an array of elements after applying a transformation to each element
-   * in the input array.
-   * {{{
-   *   df.select(transform(col("i"), x => x + 1))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f col => transformed_col, the lambda function to transform the input column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def transform(column: Column, f: Column => Column): Column =
-    Column.fn("transform", column, createLambda(f))
-
-  /**
-   * Returns an array of elements after applying a transformation to each element
-   * in the input array.
-   * {{{
-   *   df.select(transform(col("i"), (x, i) => x + i))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f (col, index) => transformed_col, the lambda function to transform the input
-   *           column given the index. Indices start at 0.
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def transform(column: Column, f: (Column, Column) => Column): Column =
-    Column.fn("transform", column, createLambda(f))
-
-  /**
-   * Returns whether a predicate holds for one or more elements in the array.
-   * {{{
-   *   df.select(exists(col("i"), _ % 2 === 0))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f col => predicate, the Boolean predicate to check the input column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def exists(column: Column, f: Column => Column): Column =
-    Column.fn("exists", column, createLambda(f))
-
-  /**
-   * Returns whether a predicate holds for every element in the array.
-   * {{{
-   *   df.select(forall(col("i"), x => x % 2 === 0))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f col => predicate, the Boolean predicate to check the input column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def forall(column: Column, f: Column => Column): Column =
-    Column.fn("forall", column, createLambda(f))
-
-  /**
-   * Returns an array of elements for which a predicate holds in a given array.
-   * {{{
-   *   df.select(filter(col("s"), x => x % 2 === 0))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f col => predicate, the Boolean predicate to filter the input column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def filter(column: Column, f: Column => Column): Column =
-    Column.fn("filter", column, createLambda(f))
-
-  /**
-   * Returns an array of elements for which a predicate holds in a given array.
-   * {{{
-   *   df.select(filter(col("s"), (x, i) => i % 2 === 0))
-   * }}}
-   *
-   * @param column the input array column
-   * @param f (col, index) => predicate, the Boolean predicate to filter the input column
-   *           given the index. Indices start at 0.
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def filter(column: Column, f: (Column, Column) => Column): Column =
-    Column.fn("filter", column, createLambda(f))
-
-  /**
-   * Applies a binary operator to an initial state and all elements in the array,
-   * and reduces this to a single state. The final state is converted into the final result
-   * by applying a finish function.
-   * {{{
-   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
-   * }}}
-   *
-   * @param expr the input array column
-   * @param initialValue the initial value
-   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
-   *              an input value to the combined_value
-   * @param finish combined_value => final_value, the lambda function to convert the combined value
-   *               of all inputs to final result
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def aggregate(
-      expr: Column,
-      initialValue: Column,
-      merge: (Column, Column) => Column,
-      finish: Column => Column): Column =
-    Column.fn("aggregate", expr, initialValue, createLambda(merge), createLambda(finish))
-
-  /**
-   * Applies a binary operator to an initial state and all elements in the array,
-   * and reduces this to a single state.
-   * {{{
-   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
-   * }}}
-   *
-   * @param expr the input array column
-   * @param initialValue the initial value
-   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
-   *              an input value to the combined_value
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def aggregate(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
-    aggregate(expr, initialValue, merge, c => c)
-
-  /**
-   * Applies a binary operator to an initial state and all elements in the array,
-   * and reduces this to a single state. The final state is converted into the final result
-   * by applying a finish function.
-   * {{{
-   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x, _ * 10))
-   * }}}
-   *
-   * @param expr the input array column
-   * @param initialValue the initial value
-   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
-   *              an input value to the combined_value
-   * @param finish combined_value => final_value, the lambda function to convert the combined value
-   *               of all inputs to final result
-   *
-   * @group collection_funcs
-   * @since 3.5.0
-   */
-  def reduce(
-      expr: Column,
-      initialValue: Column,
-      merge: (Column, Column) => Column,
-      finish: Column => Column): Column =
-    Column.fn("reduce", expr, initialValue, createLambda(merge), createLambda(finish))
-
-  /**
-   * Applies a binary operator to an initial state and all elements in the array,
-   * and reduces this to a single state.
-   * {{{
-   *   df.select(aggregate(col("i"), lit(0), (acc, x) => acc + x))
-   * }}}
-   *
-   * @param expr the input array column
-   * @param initialValue the initial value
-   * @param merge (combined_value, input_value) => combined_value, the merge function to merge
-   *              an input value to the combined_value
-   * @group collection_funcs
-   * @since 3.5.0
-   */
-  def reduce(expr: Column, initialValue: Column, merge: (Column, Column) => Column): Column =
-    reduce(expr, initialValue, merge, c => c)
-
-  /**
-   * Merge two given arrays, element-wise, into a single array using a function.
-   * If one array is shorter, nulls are appended at the end to match the length of the longer
-   * array, before applying the function.
-   * {{{
-   *   df.select(zip_with(df1("val1"), df1("val2"), (x, y) => x + y))
-   * }}}
-   *
-   * @param left the left input array column
-   * @param right the right input array column
-   * @param f (lCol, rCol) => col, the lambda function to merge two input columns into one column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def zip_with(left: Column, right: Column, f: (Column, Column) => Column): Column =
-    Column.fn("zip_with", left, right, createLambda(f))
-
-  /**
-   * Applies a function to every key-value pair in a map and returns
-   * a map with the results of those applications as the new keys for the pairs.
-   * {{{
-   *   df.select(transform_keys(col("i"), (k, v) => k + v))
-   * }}}
-   *
-   * @param expr the input map column
-   * @param f (key, value) => new_key, the lambda function to transform the key of input map column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def transform_keys(expr: Column, f: (Column, Column) => Column): Column =
-    Column.fn("transform_keys", expr, createLambda(f))
-
-  /**
-   * Applies a function to every key-value pair in a map and returns
-   * a map with the results of those applications as the new values for the pairs.
-   * {{{
-   *   df.select(transform_values(col("i"), (k, v) => k + v))
-   * }}}
-   *
-   * @param expr the input map column
-   * @param f (key, value) => new_value, the lambda function to transform the value of input map
-   *          column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def transform_values(expr: Column, f: (Column, Column) => Column): Column =
-    Column.fn("transform_values", expr, createLambda(f))
-
-  /**
-   * Returns a map whose key-value pairs satisfy a predicate.
-   * {{{
-   *   df.select(map_filter(col("m"), (k, v) => k * 10 === v))
-   * }}}
-   *
-   * @param expr the input map column
-   * @param f (key, value) => predicate, the Boolean predicate to filter the input map column
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def map_filter(expr: Column, f: (Column, Column) => Column): Column =
-    Column.fn("map_filter", expr, createLambda(f))
-
-  /**
-   * Merge two given maps, key-wise into a single map using a function.
-   * {{{
-   *   df.select(map_zip_with(df("m1"), df("m2"), (k, v1, v2) => k === v1 + v2))
-   * }}}
-   *
-   * @param left the left input map column
-   * @param right the right input map column
-   * @param f (key, value1, value2) => new_value, the lambda function to merge the map values
-   *
-   * @group collection_funcs
-   * @since 3.0.0
-   */
-  def map_zip_with(left: Column, right: Column, f: (Column, Column, Column) => Column): Column =
-    Column.fn("map_zip_with", left, right, createLambda(f))
-
-  /**
-   * Creates a new row for each element in the given array or map column.
-   * Uses the default column name `col` for elements in the array and
-   * `key` and `value` for elements in the map unless specified otherwise.
-   *
-   * @group generator_funcs
-   * @since 1.3.0
-   */
-  def explode(e: Column): Column = Column.fn("explode", e)
-
-  /**
-   * Creates a new row for each element in the given array or map column.
-   * Uses the default column name `col` for elements in the array and
-   * `key` and `value` for elements in the map unless specified otherwise.
-   * Unlike explode, if the array/map is null or empty then null is produced.
-   *
-   * @group generator_funcs
-   * @since 2.2.0
-   */
-  def explode_outer(e: Column): Column = Column.fn("explode_outer", e)
-
-  /**
-   * Creates a new row for each element with position in the given array or map column.
-   * Uses the default column name `pos` for position, and `col` for elements in the array
-   * and `key` and `value` for elements in the map unless specified otherwise.
-   *
-   * @group generator_funcs
-   * @since 2.1.0
-   */
-  def posexplode(e: Column): Column = Column.fn("posexplode", e)
-
-  /**
-   * Creates a new row for each element with position in the given array or map column.
-   * Uses the default column name `pos` for position, and `col` for elements in the array
-   * and `key` and `value` for elements in the map unless specified otherwise.
-   * Unlike posexplode, if the array/map is null or empty then the row (null, null) is produced.
-   *
-   * @group generator_funcs
-   * @since 2.2.0
-   */
-  def posexplode_outer(e: Column): Column = Column.fn("posexplode_outer", e)
-
-   /**
-   * Creates a new row for each element in the given array of structs.
-   *
-   * @group generator_funcs
-   * @since 3.4.0
-   */
-  def inline(e: Column): Column = Column.fn("inline", e)
-
-  /**
-   * Creates a new row for each element in the given array of structs.
-   * Unlike inline, if the array is null or empty then null is produced for each nested column.
-   *
-   * @group generator_funcs
-   * @since 3.4.0
-   */
-  def inline_outer(e: Column): Column = Column.fn("inline_outer", e)
-
-  /**
-   * Extracts json object from a json string based on json path specified, and returns json string
-   * of the extracted json object. It will return null if the input json string is invalid.
-   *
-   * @group json_funcs
-   * @since 1.6.0
-   */
-  def get_json_object(e: Column, path: String): Column =
-    Column.fn("get_json_object", e, lit(path))
-
-  /**
-   * Creates a new row for a json column according to the given field names.
-   *
-   * @group json_funcs
-   * @since 1.6.0
-   */
-  @scala.annotation.varargs
-  def json_tuple(json: Column, fields: String*): Column = {
-    require(fields.nonEmpty, "at least 1 field name should be given.")
-    Column.fn("json_tuple", json +: fields.map(lit): _*)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Scala-specific) Parses a column containing a JSON string into a `StructType` with the
-   * specified schema. Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. Accepts the same options as the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: StructType, options: Map[String, String]): Column =
-    from_json(e, schema.asInstanceOf[DataType], options)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.2.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: DataType, options: Map[String, String]): Column = {
-    from_json(e, lit(schema.sql), options.iterator)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a JSON string into a `StructType` with the
-   * specified schema. Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: StructType, options: java.util.Map[String, String]): Column =
-    from_json(e, schema, options.asScala.toMap)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.2.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: DataType, options: java.util.Map[String, String]): Column = {
-    from_json(e, CharVarcharUtils.failIfHasCharVarchar(schema), options.asScala.toMap)
-  }
-
-  /**
-   * Parses a column containing a JSON string into a `StructType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  def from_json(e: Column, schema: StructType): Column =
-    from_json(e, schema, Map.empty[String, String])
-
-  /**
-   * Parses a column containing a JSON string into a `MapType` with `StringType` as keys type,
-   * `StructType` or `ArrayType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   *
-   * @group json_funcs
-   * @since 2.2.0
-   */
-  def from_json(e: Column, schema: DataType): Column =
-    from_json(e, schema, Map.empty[String, String])
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema as a DDL-formatted string.
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
-    from_json(e, schema, options.asScala.toMap)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema as a DDL-formatted string.
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.3.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: String, options: Map[String, String]): Column = {
-    val dataType = parseTypeWithFallback(
-      schema,
-      DataType.fromJson,
-      fallbackParser = DataType.fromDDL)
-    from_json(e, dataType, options)
-  }
-
-  /**
-   * (Scala-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` of `StructType`s with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   *
-   * @group json_funcs
-   * @since 2.4.0
-   */
-  def from_json(e: Column, schema: Column): Column = {
-    from_json(e, schema, Map.empty[String, String].asJava)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a JSON string into a `MapType` with `StringType`
-   * as keys type, `StructType` or `ArrayType` of `StructType`s with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing JSON data.
-   * @param schema the schema to use when parsing the json string
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group json_funcs
-   * @since 2.4.0
-   */
-  // scalastyle:on line.size.limit
-  def from_json(e: Column, schema: Column, options: java.util.Map[String, String]): Column = {
-    from_json(e, schema, options.asScala.iterator)
-  }
-
-  /**
-   * Invoke a function with an options map as its last argument. If there are no options, its
-   * column is dropped.
-   */
-  private def fnWithOptions(
-      name: String,
-      options: Iterator[(String, String)],
-      arguments: Column*): Column = {
-    val augmentedArguments = if (options.hasNext) {
-      val flattenedKeyValueIterator = options.flatMap { case (k, v) =>
-        Iterator(lit(k), lit(v))
-      }
-      arguments :+ map(flattenedKeyValueIterator.toSeq: _*)
-    } else {
-      arguments
-    }
-    Column.fn(name, augmentedArguments: _*)
-  }
-
-  private def from_json(
-      e: Column,
-      schema: Column,
-      options: Iterator[(String, String)]): Column = {
-    fnWithOptions("from_json", options, e, schema)
-  }
-
-  /**
-   * Parses a JSON string and constructs a Variant value. Returns null if the input string is not
-   * a valid JSON value.
-   *
-   * @param json a string column that contains JSON data.
-   *
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def try_parse_json(json: Column): Column = Column.fn("try_parse_json", json)
-
-  /**
-   * Parses a JSON string and constructs a Variant value.
-   *
-   * @param json
-   *   a string column that contains JSON data.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def parse_json(json: Column): Column = Column.fn("parse_json", json)
-
-  /**
-   * Check if a variant value is a variant null. Returns true if and only if the input is a
-   * variant null and false otherwise (including in the case of SQL NULL).
-   *
-   * @param v
-   *   a variant column.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def is_variant_null(v: Column): Column = Column.fn("is_variant_null", v)
-
-  /**
-   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
-   * `targetType`. Returns null if the path does not exist. Throws an exception if the cast fails.
-   *
-   * @param v
-   *   a variant column.
-   * @param path
-   *   the extraction path. A valid path should start with `$` and is followed by zero or more
-   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
-   * @param targetType
-   *   the target data type to cast into, in a DDL-formatted string.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def variant_get(v: Column, path: String, targetType: String): Column =
-    Column.fn("variant_get", v, lit(path), lit(targetType))
-
-  /**
-   * Extracts a sub-variant from `v` according to `path`, and then cast the sub-variant to
-   * `targetType`. Returns null if the path does not exist or the cast fails..
-   *
-   * @param v
-   *   a variant column.
-   * @param path
-   *   the extraction path. A valid path should start with `$` and is followed by zero or more
-   *   segments like `[123]`, `.name`, `['name']`, or `["name"]`.
-   * @param targetType
-   *   the target data type to cast into, in a DDL-formatted string.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def try_variant_get(v: Column, path: String, targetType: String): Column =
-    Column.fn("try_variant_get", v, lit(path), lit(targetType))
-
-  /**
-   * Returns schema in the SQL format of a variant.
-   *
-   * @param v
-   *   a variant column.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def schema_of_variant(v: Column): Column = Column.fn("schema_of_variant", v)
-
-  /**
-   * Returns the merged schema in the SQL format of a variant column.
-   *
-   * @param v
-   *   a variant column.
-   * @group variant_funcs
-   * @since 4.0.0
-   */
-  def schema_of_variant_agg(v: Column): Column = Column.fn("schema_of_variant_agg", v)
-
-  /**
-   * Parses a JSON string and infers its schema in DDL format.
-   *
-   * @param json a JSON string.
-   *
-   * @group json_funcs
-   * @since 2.4.0
-   */
-  def schema_of_json(json: String): Column = schema_of_json(lit(json))
-
-  /**
-   * Parses a JSON string and infers its schema in DDL format.
-   *
-   * @param json a foldable string column containing a JSON string.
-   *
-   * @group json_funcs
-   * @since 2.4.0
-   */
-  def schema_of_json(json: Column): Column = Column.fn("schema_of_json", json)
-
-  // scalastyle:off line.size.limit
-  /**
-   * Parses a JSON string and infers its schema in DDL format using options.
-   *
-   * @param json a foldable string column containing JSON data.
-   * @param options options to control how the json is parsed. accepts the same options and the
-   *                json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   * @return a column with string literal containing schema in DDL format.
-   *
-   * @group json_funcs
-   * @since 3.0.0
-   */
-  // scalastyle:on line.size.limit
-  def schema_of_json(json: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("schema_of_json", options.asScala.iterator, json)
-
-  /**
-   * Returns the number of elements in the outermost JSON array. `NULL` is returned in case of
-   * any other valid JSON string, `NULL` or an invalid JSON.
-   *
-   * @group json_funcs
-   * @since 3.5.0
-   */
-  def json_array_length(e: Column): Column = Column.fn("json_array_length", e)
-
-  /**
-   * Returns all the keys of the outermost JSON object as an array. If a valid JSON object is
-   * given, all the keys of the outermost object will be returned as an array. If it is any
-   * other valid JSON string, an invalid JSON string or an empty string, the function returns null.
-   *
-   * @group json_funcs
-   * @since 3.5.0
-   */
-  def json_object_keys(e: Column): Column = Column.fn("json_object_keys", e)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Scala-specific) Converts a column containing a `StructType`, `ArrayType` or
-   * a `MapType` into a JSON string with the specified schema.
-   * Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct, an array or a map.
-   * @param options options to control how the struct column is converted into a json string.
-   *                accepts the same options and the json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *                Additionally the function supports the `pretty` option which enables
-   *                pretty JSON generation.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  // scalastyle:on line.size.limit
-  def to_json(e: Column, options: Map[String, String]): Column =
-    fnWithOptions("to_json", options.iterator, e)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Converts a column containing a `StructType`, `ArrayType` or
-   * a `MapType` into a JSON string with the specified schema.
-   * Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct, an array or a map.
-   * @param options options to control how the struct column is converted into a json string.
-   *                accepts the same options and the json data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *                Additionally the function supports the `pretty` option which enables
-   *                pretty JSON generation.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  // scalastyle:on line.size.limit
-  def to_json(e: Column, options: java.util.Map[String, String]): Column =
-    to_json(e, options.asScala.toMap)
-
-  /**
-   * Converts a column containing a `StructType`, `ArrayType` or
-   * a `MapType` into a JSON string with the specified schema.
-   * Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct, an array or a map.
-   *
-   * @group json_funcs
-   * @since 2.1.0
-   */
-  def to_json(e: Column): Column =
-    to_json(e, Map.empty[String, String])
-
-  /**
-   * Masks the given string value. The function replaces characters with 'X' or 'x', and numbers
-   * with 'n'.
-   * This can be useful for creating copies of tables with sensitive information removed.
-   *
-   * @param input string value to mask. Supported types: STRING, VARCHAR, CHAR
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column): Column = Column.fn("mask", input)
-
-  /**
-   * Masks the given string value. The function replaces upper-case characters with specific
-   * character, lower-case characters with 'x', and numbers with 'n'.
-   * This can be useful for creating copies of tables with sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column): Column =
-    Column.fn("mask", input, upperChar)
-
-  /**
-   * Masks the given string value. The function replaces upper-case and lower-case characters with
-   * the characters specified respectively, and numbers with 'n'.
-   * This can be useful for creating copies of tables with sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column, lowerChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar)
-
-  /**
-   * Masks the given string value. The function replaces upper-case, lower-case characters and
-   * numbers with the characters specified respectively.
-   * This can be useful for creating copies of tables with sensitive information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   * @param digitChar
-   *   character to replace digit characters with. Specify NULL to retain original character.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(input: Column, upperChar: Column, lowerChar: Column, digitChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar, digitChar)
-
-  /**
-   * Masks the given string value. This can be useful for creating copies of tables with sensitive
-   * information removed.
-   *
-   * @param input
-   *   string value to mask. Supported types: STRING, VARCHAR, CHAR
-   * @param upperChar
-   *   character to replace upper-case characters with. Specify NULL to retain original character.
-   * @param lowerChar
-   *   character to replace lower-case characters with. Specify NULL to retain original character.
-   * @param digitChar
-   *   character to replace digit characters with. Specify NULL to retain original character.
-   * @param otherChar
-   *   character to replace all other characters with. Specify NULL to retain original character.
-   *
-   * @group string_funcs
-   * @since 3.5.0
-   */
-  def mask(
-      input: Column,
-      upperChar: Column,
-      lowerChar: Column,
-      digitChar: Column,
-      otherChar: Column): Column =
-    Column.fn("mask", input, upperChar, lowerChar, digitChar, otherChar)
-
-  /**
-   * Returns length of array or map.
-   *
-   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
-   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input.
-   * With the default settings, the function returns null for null input.
-   *
-   * @group collection_funcs
-   * @since 1.5.0
-   */
-  def size(e: Column): Column = Column.fn("size", e)
-
-  /**
-   * Returns length of array or map. This is an alias of `size` function.
-   *
-   * This function returns -1 for null input only if spark.sql.ansi.enabled is false and
-   * spark.sql.legacy.sizeOfNull is true. Otherwise, it returns null for null input.
-   * With the default settings, the function returns null for null input.
-   *
-   * @group collection_funcs
-   * @since 3.5.0
-   */
-  def cardinality(e: Column): Column = Column.fn("cardinality", e)
-
-  /**
-   * Sorts the input array for the given column in ascending order,
-   * according to the natural ordering of the array elements.
-   * Null elements will be placed at the beginning of the returned array.
-   *
-   * @group array_funcs
-   * @since 1.5.0
-   */
-  def sort_array(e: Column): Column = sort_array(e, asc = true)
-
-  /**
-   * Sorts the input array for the given column in ascending or descending order,
-   * according to the natural ordering of the array elements. NaN is greater than any non-NaN
-   * elements for double/float type. Null elements will be placed at the beginning of the returned
-   * array in ascending order or
-   * at the end of the returned array in descending order.
-   *
-   * @group array_funcs
-   * @since 1.5.0
-   */
-  def sort_array(e: Column, asc: Boolean): Column = Column.fn("sort_array", e, lit(asc))
-
-  /**
-   * Returns the minimum value in the array. NaN is greater than any non-NaN elements for
-   * double/float type. NULL elements are skipped.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_min(e: Column): Column = Column.fn("array_min", e)
-
-  /**
-   * Returns the maximum value in the array. NaN is greater than any non-NaN elements for
-   * double/float type. NULL elements are skipped.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_max(e: Column): Column = Column.fn("array_max", e)
-
-  /**
-   * Returns the total number of elements in the array. The function returns null for null input.
-   *
-   * @group array_funcs
-   * @since 3.5.0
-   */
-  def array_size(e: Column): Column = Column.fn("array_size", e)
-
-  /**
-   * Aggregate function: returns a list of objects with duplicates.
-   *
-   * @note The function is non-deterministic because the order of collected results depends
-   *       on the order of the rows which may be non-deterministic after a shuffle.
-   * @group agg_funcs
-   * @since 3.5.0
-   */
-  def array_agg(e: Column): Column = Column.fn("array_agg", e)
-
-  /**
-   * Returns a random permutation of the given array.
-   *
-   * @note The function is non-deterministic.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def shuffle(e: Column): Column = withExpr { Shuffle(e.expr, Some(Utils.random.nextLong)) }
-
-  /**
-   * Returns a reversed string or an array with reverse order of elements.
-   * @group collection_funcs
-   * @since 1.5.0
-   */
-  def reverse(e: Column): Column = Column.fn("reverse", e)
-
-  /**
-   * Creates a single array from an array of arrays. If a structure of nested arrays is deeper than
-   * two levels, only one level of nesting is removed.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def flatten(e: Column): Column = Column.fn("flatten", e)
-
-  /**
-   * Generate a sequence of integers from start to stop, incrementing by step.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def sequence(start: Column, stop: Column, step: Column): Column =
-    Column.fn("sequence", start, stop, step)
-
-  /**
-   * Generate a sequence of integers from start to stop,
-   * incrementing by 1 if start is less than or equal to stop, otherwise -1.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def sequence(start: Column, stop: Column): Column = Column.fn("sequence", start, stop)
-
-  /**
-   * Creates an array containing the left argument repeated the number of times given by the
-   * right argument.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_repeat(left: Column, right: Column): Column = Column.fn("array_repeat", left, right)
-
-  /**
-   * Creates an array containing the left argument repeated the number of times given by the
-   * right argument.
-   *
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  def array_repeat(e: Column, count: Int): Column = array_repeat(e, lit(count))
-
-  /**
-   * Returns true if the map contains the key.
-   * @group map_funcs
-   * @since 3.3.0
-   */
-  def map_contains_key(column: Column, key: Any): Column =
-    Column.fn("map_contains_key", column, lit(key))
-
-  /**
-   * Returns an unordered array containing the keys of the map.
-   * @group map_funcs
-   * @since 2.3.0
-   */
-  def map_keys(e: Column): Column = Column.fn("map_keys", e)
-
-  /**
-   * Returns an unordered array containing the values of the map.
-   * @group map_funcs
-   * @since 2.3.0
-   */
-  def map_values(e: Column): Column = Column.fn("map_values", e)
-
-  /**
-   * Returns an unordered array of all entries in the given map.
-   * @group map_funcs
-   * @since 3.0.0
-   */
-  def map_entries(e: Column): Column = Column.fn("map_entries", e)
-
-  /**
-   * Returns a map created from the given array of entries.
-   * @group map_funcs
-   * @since 2.4.0
-   */
-  def map_from_entries(e: Column): Column = Column.fn("map_from_entries", e)
-
-  /**
-   * Returns a merged array of structs in which the N-th struct contains all N-th values of input
-   * arrays.
-   * @group array_funcs
-   * @since 2.4.0
-   */
-  @scala.annotation.varargs
-  def arrays_zip(e: Column*): Column = Column.fn("arrays_zip", e: _*)
-
-  /**
-   * Returns the union of all the given maps.
-   * @group map_funcs
-   * @since 2.4.0
-   */
-  @scala.annotation.varargs
-  def map_concat(cols: Column*): Column = Column.fn("map_concat", cols: _*)
-
-  // scalastyle:off line.size.limit
-  /**
-   * Parses a column containing a CSV string into a `StructType` with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing CSV data.
-   * @param schema the schema to use when parsing the CSV string
-   * @param options options to control how the CSV is parsed. accepts the same options and the
-   *                CSV data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_csv(e: Column, schema: StructType, options: Map[String, String]): Column =
-    from_csv(e, lit(schema.toDDL), options.iterator)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a CSV string into a `StructType`
-   * with the specified schema. Returns `null`, in the case of an unparseable string.
-   *
-   * @param e a string column containing CSV data.
-   * @param schema the schema to use when parsing the CSV string
-   * @param options options to control how the CSV is parsed. accepts the same options and the
-   *                CSV data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_csv(e: Column, schema: Column, options: java.util.Map[String, String]): Column =
-    from_csv(e, schema, options.asScala.iterator)
-
-  private def from_csv(e: Column, schema: Column, options: Iterator[(String, String)]): Column =
-    fnWithOptions("from_csv", options, e, schema)
-
-  /**
-   * Parses a CSV string and infers its schema in DDL format.
-   *
-   * @param csv a CSV string.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  def schema_of_csv(csv: String): Column = schema_of_csv(lit(csv))
-
-  /**
-   * Parses a CSV string and infers its schema in DDL format.
-   *
-   * @param csv a foldable string column containing a CSV string.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  def schema_of_csv(csv: Column): Column = schema_of_csv(csv, Collections.emptyMap())
-
-  // scalastyle:off line.size.limit
-  /**
-   * Parses a CSV string and infers its schema in DDL format using options.
-   *
-   * @param csv a foldable string column containing a CSV string.
-   * @param options options to control how the CSV is parsed. accepts the same options and the
-   *                CSV data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   * @return a column with string literal containing schema in DDL format.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  // scalastyle:on line.size.limit
-  def schema_of_csv(csv: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("schema_of_csv", options.asScala.iterator, csv)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Converts a column containing a `StructType` into a CSV string with
-   * the specified schema. Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct.
-   * @param options options to control how the struct column is converted into a CSV string.
-   *                It accepts the same options and the CSV data source.
-   *                See
-   *                <a href=
-   *                  "https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *                  Data Source Option</a> in the version you use.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  // scalastyle:on line.size.limit
-  def to_csv(e: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("to_csv", options.asScala.iterator, e)
-
-  /**
-   * Converts a column containing a `StructType` into a CSV string with the specified schema.
-   * Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct.
-   *
-   * @group csv_funcs
-   * @since 3.0.0
-   */
-  def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
-
-  // scalastyle:off line.size.limit
-  /**
-   * Parses a column containing a XML string into the data type corresponding to the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e       a string column containing XML data.
-   * @param schema  the schema to use when parsing the XML string
-   * @param options options to control how the XML is parsed. accepts the same options and the
-   *                XML data source.
-   *                See
-   *                <a href=
-   *                "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   *                Data Source Option</a> in the version you use.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_xml(e: Column, schema: StructType, options: java.util.Map[String, String]): Column =
-    from_xml(e, lit(CharVarcharUtils.failIfHasCharVarchar(schema).sql), options.asScala.iterator)
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a XML string into a `StructType`
-   * with the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e       a string column containing XML data.
-   * @param schema  the schema as a DDL-formatted string.
-   * @param options options to control how the XML is parsed. accepts the same options and the
-   *                xml data source.
-   *                See
-   *                <a href=
-   *                "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   *                Data Source Option</a> in the version you use.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_xml(e: Column, schema: String, options: java.util.Map[String, String]): Column = {
-    val dataType =
-      parseTypeWithFallback(schema, DataType.fromJson, fallbackParser = DataType.fromDDL)
-    val structType = dataType match {
-      case t: StructType => t
-      case _ => throw DataTypeErrors.failedParsingStructTypeError(schema)
-    }
-    from_xml(e, structType, options)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a XML string into a `StructType`
-   * with the specified schema. Returns `null`, in the case of an unparseable string.
-   *
-   * @param e       a string column containing XML data.
-   * @param schema  the schema to use when parsing the XML string
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_xml(e: Column, schema: Column): Column = {
-    from_xml(e, schema, Iterator.empty)
-  }
-
-  // scalastyle:off line.size.limit
-  /**
-   * (Java-specific) Parses a column containing a XML string into a `StructType`
-   * with the specified schema. Returns `null`, in the case of an unparseable string.
-   *
-   * @param e       a string column containing XML data.
-   * @param schema  the schema to use when parsing the XML string
-   * @param options options to control how the XML is parsed. accepts the same options and the
-   *                XML data source.
-   *                See
-   *                <a href=
-   *                "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   *                Data Source Option</a> in the version you use.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def from_xml(e: Column, schema: Column, options: java.util.Map[String, String]): Column =
-    from_xml(e, schema, options.asScala.iterator)
-
-  /**
-   * Parses a column containing a XML string into the data type
-   * corresponding to the specified schema.
-   * Returns `null`, in the case of an unparseable string.
-   *
-   * @param e       a string column containing XML data.
-   * @param schema  the schema to use when parsing the XML string
-
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  def from_xml(e: Column, schema: StructType): Column =
-    from_xml(e, schema, Map.empty[String, String].asJava)
-
-  private def from_xml(e: Column, schema: Column, options: Iterator[(String, String)]): Column = {
-    fnWithOptions("from_xml", options, e, schema)
-  }
-
-  /**
-   * Parses a XML string and infers its schema in DDL format.
-   *
-   * @param xml a XML string.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))
-
-  /**
-   * Parses a XML string and infers its schema in DDL format.
-   *
-   * @param xml a foldable string column containing a XML string.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  def schema_of_xml(xml: Column): Column = withExpr(new SchemaOfXml(xml.expr))
-
-  // scalastyle:off line.size.limit
-
-  /**
-   * Parses a XML string and infers its schema in DDL format using options.
-   *
-   * @param xml    a foldable string column containing XML data.
-   * @param options options to control how the xml is parsed. accepts the same options and the
-   *                XML data source.
-   *                See
-   *                <a href=
-   *                "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   *                Data Source Option</a> in the version you use.
-   * @return a column with string literal containing schema in DDL format.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def schema_of_xml(xml: Column, options: java.util.Map[String, String]): Column = {
-    withExpr(SchemaOfXml(xml.expr, options.asScala.toMap))
-  }
-
-  // scalastyle:off line.size.limit
-
-  /**
-   * (Java-specific) Converts a column containing a `StructType` into a XML string with
-   * the specified schema. Throws an exception, in the case of an unsupported type.
-   *
-   * @param e       a column containing a struct.
-   * @param options options to control how the struct column is converted into a XML string.
-   *                It accepts the same options as the XML data source.
-   *                See
-   *                <a href=
-   *                "https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   *                Data Source Option</a> in the version you use.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  // scalastyle:on line.size.limit
-  def to_xml(e: Column, options: java.util.Map[String, String]): Column =
-    fnWithOptions("to_xml", options.asScala.iterator, e)
-
-  /**
-   * Converts a column containing a `StructType` into a XML string with the specified schema.
-   * Throws an exception, in the case of an unsupported type.
-   *
-   * @param e a column containing a struct.
-   * @group xml_funcs
-   * @since 4.0.0
-   */
-  def to_xml(e: Column): Column = to_xml(e, Map.empty[String, String].asJava)
-
-  /**
-   * (Java-specific) A transform for timestamps and dates to partition data into years.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def years(e: Column): Column = partitioning.years(e)
-
-  /**
-   * (Java-specific) A transform for timestamps and dates to partition data into months.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def months(e: Column): Column = partitioning.months(e)
-
-  /**
-   * (Java-specific) A transform for timestamps and dates to partition data into days.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def days(e: Column): Column = partitioning.days(e)
-
-  /**
-   * Returns a string array of values within the nodes of xml that match the XPath expression.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath(xml: Column, path: Column): Column =
-    Column.fn("xpath", xml, path)
-
-  /**
-   * Returns true if the XPath expression evaluates to true, or if a matching node is found.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_boolean(xml: Column, path: Column): Column =
-    Column.fn("xpath_boolean", xml, path)
-
-  /**
-   * Returns a double value, the value zero if no match is found,
-   * or NaN if a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_double(xml: Column, path: Column): Column =
-    Column.fn("xpath_double", xml, path)
-
-  /**
-   * Returns a double value, the value zero if no match is found,
-   * or NaN if a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_number(xml: Column, path: Column): Column =
-    Column.fn("xpath_number", xml, path)
-
-  /**
-   * Returns a float value, the value zero if no match is found,
-   * or NaN if a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_float(xml: Column, path: Column): Column =
-    Column.fn("xpath_float", xml, path)
-
-  /**
-   * Returns an integer value, or the value zero if no match is found,
-   * or a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_int(xml: Column, path: Column): Column =
-    Column.fn("xpath_int", xml, path)
-
-  /**
-   * Returns a long integer value, or the value zero if no match is found,
-   * or a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_long(xml: Column, path: Column): Column =
-    Column.fn("xpath_long", xml, path)
-
-  /**
-   * Returns a short integer value, or the value zero if no match is found,
-   * or a match is found but the value is non-numeric.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_short(xml: Column, path: Column): Column =
-    Column.fn("xpath_short", xml, path)
-
-  /**
-   * Returns the text contents of the first xml node that matches the XPath expression.
-   *
-   * @group xml_funcs
-   * @since 3.5.0
-   */
-  def xpath_string(xml: Column, path: Column): Column =
-    Column.fn("xpath_string", xml, path)
-
-  /**
-   * (Java-specific) A transform for timestamps to partition data into hours.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def hours(e: Column): Column = partitioning.hours(e)
-
-  /**
-   * Converts the timestamp without time zone `sourceTs`
-   * from the `sourceTz` time zone to `targetTz`.
-   *
-   * @param sourceTz the time zone for the input timestamp. If it is missed,
-   *                 the current session time zone is used as the source time zone.
-   * @param targetTz the time zone to which the input timestamp should be converted.
-   * @param sourceTs a timestamp without time zone.
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def convert_timezone(sourceTz: Column, targetTz: Column, sourceTs: Column): Column =
-    Column.fn("convert_timezone", sourceTz, targetTz, sourceTs)
-
-  /**
-   * Converts the timestamp without time zone `sourceTs`
-   * from the current time zone to `targetTz`.
-   *
-   * @param targetTz the time zone to which the input timestamp should be converted.
-   * @param sourceTs a timestamp without time zone.
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def convert_timezone(targetTz: Column, sourceTs: Column): Column =
-    Column.fn("convert_timezone", targetTz, sourceTs)
-
-  /**
-   * Make DayTimeIntervalType duration from days, hours, mins and secs.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_dt_interval(days: Column, hours: Column, mins: Column, secs: Column): Column =
-    Column.fn("make_dt_interval", days, hours, mins, secs)
-
-  /**
-   * Make DayTimeIntervalType duration from days, hours and mins.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_dt_interval(days: Column, hours: Column, mins: Column): Column =
-    Column.fn("make_dt_interval", days, hours, mins)
-
-  /**
-   * Make DayTimeIntervalType duration from days and hours.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_dt_interval(days: Column, hours: Column): Column =
-    Column.fn("make_dt_interval", days, hours)
-
-  /**
-   * Make DayTimeIntervalType duration from days.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_dt_interval(days: Column): Column =
-    Column.fn("make_dt_interval", days)
-
-  /**
-   * Make DayTimeIntervalType duration.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_dt_interval(): Column =
-    Column.fn("make_dt_interval")
-
-  /**
-   * Make interval from years, months, weeks, days, hours, mins and secs.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(
-      years: Column,
-      months: Column,
-      weeks: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column): Column =
-    Column.fn("make_interval", years, months, weeks, days, hours, mins, secs)
-
-  /**
-   * Make interval from years, months, weeks, days, hours and mins.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(
-      years: Column,
-      months: Column,
-      weeks: Column,
-      days: Column,
-      hours: Column,
-      mins: Column): Column =
-    Column.fn("make_interval", years, months, weeks, days, hours, mins)
-
-  /**
-   * Make interval from years, months, weeks, days and hours.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(
-      years: Column,
-      months: Column,
-      weeks: Column,
-      days: Column,
-      hours: Column): Column =
-    Column.fn("make_interval", years, months, weeks, days, hours)
-
-  /**
-   * Make interval from years, months, weeks and days.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(years: Column, months: Column, weeks: Column, days: Column): Column =
-    Column.fn("make_interval", years, months, weeks, days)
-
-  /**
-   * Make interval from years, months and weeks.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(years: Column, months: Column, weeks: Column): Column =
-    Column.fn("make_interval", years, months, weeks)
-
-  /**
-   * Make interval from years and months.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(years: Column, months: Column): Column =
-    Column.fn("make_interval", years, months)
-
-  /**
-   * Make interval from years.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(years: Column): Column =
-    Column.fn("make_interval", years)
-
-  /**
-   * Make interval.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_interval(): Column =
-    Column.fn("make_interval")
-
-  /**
-   * Create timestamp from years, months, days, hours, mins, secs and timezone fields. The result
-   * data type is consistent with the value of configuration `spark.sql.timestampType`. If the
-   * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
-   * Otherwise, it will throw an error instead.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_timestamp(
-      years: Column,
-      months: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column,
-      timezone: Column): Column =
-    Column.fn("make_timestamp", years, months, days, hours, mins, secs, timezone)
-
-  /**
-   * Create timestamp from years, months, days, hours, mins and secs fields. The result data type
-   * is consistent with the value of configuration `spark.sql.timestampType`. If the configuration
-   * `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs. Otherwise, it
-   * will throw an error instead.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_timestamp(
-      years: Column,
-      months: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column): Column =
-    Column.fn("make_timestamp", years, months, days, hours, mins, secs)
-
-  /**
-   * Create the current timestamp with local time zone from years, months, days, hours, mins, secs
-   * and timezone fields. If the configuration `spark.sql.ansi.enabled` is false, the function
-   * returns NULL on invalid inputs. Otherwise, it will throw an error instead.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_timestamp_ltz(
-      years: Column,
-      months: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column,
-      timezone: Column): Column =
-    Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs, timezone)
-
-  /**
-   * Create the current timestamp with local time zone from years, months, days, hours, mins and
-   * secs fields. If the configuration `spark.sql.ansi.enabled` is false, the function returns
-   * NULL on invalid inputs. Otherwise, it will throw an error instead.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_timestamp_ltz(
-      years: Column,
-      months: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column): Column =
-    Column.fn("make_timestamp_ltz", years, months, days, hours, mins, secs)
-
-  /**
-   * Create local date-time from years, months, days, hours, mins, secs fields. If the
-   * configuration `spark.sql.ansi.enabled` is false, the function returns NULL on invalid inputs.
-   * Otherwise, it will throw an error instead.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_timestamp_ntz(
-      years: Column,
-      months: Column,
-      days: Column,
-      hours: Column,
-      mins: Column,
-      secs: Column): Column =
-    Column.fn("make_timestamp_ntz", years, months, days, hours, mins, secs)
-
-  /**
-   * Make year-month interval from years, months.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_ym_interval(years: Column, months: Column): Column =
-    Column.fn("make_ym_interval", years, months)
-
-  /**
-   * Make year-month interval from years.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_ym_interval(years: Column): Column = Column.fn("make_ym_interval", years)
-
-  /**
-   * Make year-month interval.
-   *
-   * @group datetime_funcs
-   * @since 3.5.0
-   */
-  def make_ym_interval(): Column = Column.fn("make_ym_interval")
-
-  /**
-   * (Java-specific) A transform for any type that partitions by a hash of the input column.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def bucket(numBuckets: Column, e: Column): Column = partitioning.bucket(numBuckets, e)
-
-  /**
-   * (Java-specific) A transform for any type that partitions by a hash of the input column.
-   *
-   * @group partition_transforms
-   * @since 3.0.0
-   */
-  def bucket(numBuckets: Int, e: Column): Column = partitioning.bucket(numBuckets, e)
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Predicates functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Returns `col2` if `col1` is null, or `col1` otherwise.
-   *
-   * @group conditional_funcs
-   * @since 3.5.0
-   */
-  def ifnull(col1: Column, col2: Column): Column = Column.fn("ifnull", col1, col2)
-
-  /**
-   * Returns true if `col` is not null, or false otherwise.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def isnotnull(col: Column): Column = Column.fn("isnotnull", col)
-
-  /**
-   * Returns same result as the EQUAL(=) operator for non-null operands,
-   * but returns true if both are null, false if one of the them is null.
-   *
-   * @group predicate_funcs
-   * @since 3.5.0
-   */
-  def equal_null(col1: Column, col2: Column): Column = Column.fn("equal_null", col1, col2)
-
-  /**
-   * Returns null if `col1` equals to `col2`, or `col1` otherwise.
-   *
-   * @group conditional_funcs
-   * @since 3.5.0
-   */
-  def nullif(col1: Column, col2: Column): Column = Column.fn("nullif", col1, col2)
-
-  /**
-   * Returns `col2` if `col1` is null, or `col1` otherwise.
-   *
-   * @group conditional_funcs
-   * @since 3.5.0
-   */
-  def nvl(col1: Column, col2: Column): Column = Column.fn("nvl", col1, col2)
-
-  /**
-   * Returns `col2` if `col1` is not null, or `col3` otherwise.
-   *
-   * @group conditional_funcs
-   * @since 3.5.0
-   */
-  def nvl2(col1: Column, col2: Column, col3: Column): Column = Column.fn("nvl2", col1, col2, col3)
-
-  // scalastyle:off line.size.limit
-  // scalastyle:off parameter.number
-
-  /* Use the following code to generate:
-
-  (0 to 10).foreach { x =>
-    val types = (1 to x).foldRight("RT")((i, s) => {s"A$i, $s"})
-    val typeTags = (1 to x).map(i => s"A$i: TypeTag").foldLeft("RT: TypeTag")(_ + ", " + _)
-    val inputEncoders = (1 to x).foldRight("Nil")((i, s) => {s"Try(ExpressionEncoder[A$i]()).toOption :: $s"})
-    println(s"""
-      |/**
-      | * Defines a Scala closure of $x arguments as user-defined function (UDF).
-      | * The data types are automatically inferred based on the Scala closure's
-      | * signature. By default the returned UDF is deterministic. To change it to
-      | * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-      | *
-      | * @group udf_funcs
-      | * @since 1.3.0
-      | */
-      |def udf[$typeTags](f: Function$x[$types]): UserDefinedFunction = {
-      |  val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-      |  val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-      |  val inputEncoders = $inputEncoders
-      |  val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-      |  if (nullable) udf else udf.asNonNullable()
-      |}""".stripMargin)
-  }
-
-  (0 to 10).foreach { i =>
-    val extTypeArgs = (0 to i).map(_ => "_").mkString(", ")
-    val anyTypeArgs = (0 to i).map(_ => "Any").mkString(", ")
-    val anyCast = s".asInstanceOf[UDF$i[$anyTypeArgs]]"
-    val anyParams = (1 to i).map(_ => "_: Any").mkString(", ")
-    val funcCall = if (i == 0) s"() => f$anyCast.call($anyParams)" else s"f$anyCast.call($anyParams)"
-    println(s"""
-      |/**
-      | * Defines a Java UDF$i instance as user-defined function (UDF).
-      | * The caller must specify the output data type, and there is no automatic input type coercion.
-      | * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-      | * API `UserDefinedFunction.asNondeterministic()`.
-      | *
-      | * @group udf_funcs
-      | * @since 2.3.0
-      | */
-      |def udf(f: UDF$i[$extTypeArgs], returnType: DataType): UserDefinedFunction = {
-      |  val func = $funcCall
-      |  SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill($i)(None))
-      |}""".stripMargin)
-  }
-
-  */
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Scala UDF functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator`
-   * so that it may be used with untyped Data Frames.
-   * {{{
-   *   val agg = // Aggregator[IN, BUF, OUT]
-   *
-   *   // declare a UDF based on agg
-   *   val aggUDF = udaf(agg)
-   *   val aggData = df.agg(aggUDF($"colname"))
-   *
-   *   // register agg as a named function
-   *   spark.udf.register("myAggName", udaf(agg))
-   * }}}
-   *
-   * @tparam IN the aggregator input type
-   * @tparam BUF the aggregating buffer type
-   * @tparam OUT the finalized output type
-   *
-   * @param agg the typed Aggregator
-   *
-   * @return a UserDefinedFunction that can be used as an aggregating expression.
-   *
-   * @group udf_funcs
-   * @note The input encoder is inferred from the input type IN.
-   */
-  def udaf[IN: TypeTag, BUF, OUT](agg: Aggregator[IN, BUF, OUT]): UserDefinedFunction = {
-    udaf(agg, ExpressionEncoder[IN]())
-  }
-
-  /**
-   * Obtains a `UserDefinedFunction` that wraps the given `Aggregator`
-   * so that it may be used with untyped Data Frames.
-   * {{{
-   *   Aggregator<IN, BUF, OUT> agg = // custom Aggregator
-   *   Encoder<IN> enc = // input encoder
-   *
-   *   // declare a UDF based on agg
-   *   UserDefinedFunction aggUDF = udaf(agg, enc)
-   *   DataFrame aggData = df.agg(aggUDF($"colname"))
-   *
-   *   // register agg as a named function
-   *   spark.udf.register("myAggName", udaf(agg, enc))
-   * }}}
-   *
-   * @tparam IN the aggregator input type
-   * @tparam BUF the aggregating buffer type
-   * @tparam OUT the finalized output type
-   *
-   * @param agg the typed Aggregator
-   * @param inputEncoder a specific input encoder to use
-   *
-   * @return a UserDefinedFunction that can be used as an aggregating expression
-   *
-   * @group udf_funcs
-   * @note This overloading takes an explicit input encoder, to support UDAF
-   * declarations in Java.
-   */
-  def udaf[IN, BUF, OUT](
-      agg: Aggregator[IN, BUF, OUT],
-      inputEncoder: Encoder[IN]): UserDefinedFunction = {
-    UserDefinedAggregator(agg, inputEncoder)
-  }
-
-  /**
-   * Defines a Scala closure of 0 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag](f: Function0[RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 1 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag](f: Function1[A1, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 2 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag](f: Function2[A1, A2, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 3 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag](f: Function3[A1, A2, A3, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 4 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag](f: Function4[A1, A2, A3, A4, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 5 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag](f: Function5[A1, A2, A3, A4, A5, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 6 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag](f: Function6[A1, A2, A3, A4, A5, A6, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 7 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag](f: Function7[A1, A2, A3, A4, A5, A6, A7, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 8 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag](f: Function8[A1, A2, A3, A4, A5, A6, A7, A8, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 9 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag](f: Function9[A1, A2, A3, A4, A5, A6, A7, A8, A9, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  /**
-   * Defines a Scala closure of 10 arguments as user-defined function (UDF).
-   * The data types are automatically inferred based on the Scala closure's
-   * signature. By default the returned UDF is deterministic. To change it to
-   * nondeterministic, call the API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 1.3.0
-   */
-  def udf[RT: TypeTag, A1: TypeTag, A2: TypeTag, A3: TypeTag, A4: TypeTag, A5: TypeTag, A6: TypeTag, A7: TypeTag, A8: TypeTag, A9: TypeTag, A10: TypeTag](f: Function10[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, RT]): UserDefinedFunction = {
-    val outputEncoder = Try(ExpressionEncoder[RT]()).toOption
-    val ScalaReflection.Schema(dataType, nullable) = outputEncoder.map(UDFRegistration.outputSchema).getOrElse(ScalaReflection.schemaFor[RT])
-    val inputEncoders = Try(ExpressionEncoder[A1]()).toOption :: Try(ExpressionEncoder[A2]()).toOption :: Try(ExpressionEncoder[A3]()).toOption :: Try(ExpressionEncoder[A4]()).toOption :: Try(ExpressionEncoder[A5]()).toOption :: Try(ExpressionEncoder[A6]()).toOption :: Try(ExpressionEncoder[A7]()).toOption :: Try(ExpressionEncoder[A8]()).toOption :: Try(ExpressionEncoder[A9]()).toOption :: Try(ExpressionEncoder[A10]()).toOption :: Nil
-    val udf = SparkUserDefinedFunction(f, dataType, inputEncoders, outputEncoder)
-    if (nullable) udf else udf.asNonNullable()
-  }
-
-  //////////////////////////////////////////////////////////////////////////////////////////////
-  // Java UDF functions
-  //////////////////////////////////////////////////////////////////////////////////////////////
-
-  /**
-   * Defines a Java UDF0 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF0[_], returnType: DataType): UserDefinedFunction = {
-    val func = () => f.asInstanceOf[UDF0[Any]].call()
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(0)(None))
-  }
-
-  /**
-   * Defines a Java UDF1 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF1[_, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF1[Any, Any]].call(_: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(1)(None))
-  }
-
-  /**
-   * Defines a Java UDF2 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF2[_, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF2[Any, Any, Any]].call(_: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(2)(None))
-  }
-
-  /**
-   * Defines a Java UDF3 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF3[_, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF3[Any, Any, Any, Any]].call(_: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(3)(None))
-  }
-
-  /**
-   * Defines a Java UDF4 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF4[_, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF4[Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(4)(None))
-  }
-
-  /**
-   * Defines a Java UDF5 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF5[_, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF5[Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(5)(None))
-  }
-
-  /**
-   * Defines a Java UDF6 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF6[_, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF6[Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(6)(None))
-  }
-
-  /**
-   * Defines a Java UDF7 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF7[_, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF7[Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(7)(None))
-  }
-
-  /**
-   * Defines a Java UDF8 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF8[_, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF8[Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(8)(None))
-  }
-
-  /**
-   * Defines a Java UDF9 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF9[_, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF9[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(9)(None))
-  }
-
-  /**
-   * Defines a Java UDF10 instance as user-defined function (UDF).
-   * The caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * @group udf_funcs
-   * @since 2.3.0
-   */
-  def udf(f: UDF10[_, _, _, _, _, _, _, _, _, _, _], returnType: DataType): UserDefinedFunction = {
-    val func = f.asInstanceOf[UDF10[Any, Any, Any, Any, Any, Any, Any, Any, Any, Any, Any]].call(_: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any, _: Any)
-    SparkUserDefinedFunction(func, returnType, inputEncoders = Seq.fill(10)(None))
-  }
-
-  // scalastyle:on parameter.number
-  // scalastyle:on line.size.limit
-
-  /**
-   * Defines a deterministic user-defined function (UDF) using a Scala closure. For this variant,
-   * the caller must specify the output data type, and there is no automatic input type coercion.
-   * By default the returned UDF is deterministic. To change it to nondeterministic, call the
-   * API `UserDefinedFunction.asNondeterministic()`.
-   *
-   * Note that, although the Scala closure can have primitive-type function argument, it doesn't
-   * work well with null values. Because the Scala closure is passed in as Any type, there is no
-   * type information for the function arguments. Without the type information, Spark may blindly
-   * pass null to the Scala closure with primitive-type argument, and the closure will see the
-   * default value of the Java type for the null argument, e.g. `udf((x: Int) => x, IntegerType)`,
-   * the result is 0 for null input.
-   *
-   * @param f  A closure in Scala
-   * @param dataType  The output data type of the UDF
-   *
-   * @group udf_funcs
-   * @since 2.0.0
-   */
-  @deprecated("Scala `udf` method with return type parameter is deprecated. " +
-    "Please use Scala `udf` method without return type parameter.", "3.0.0")
-  def udf(f: AnyRef, dataType: DataType): UserDefinedFunction = {
-    if (!SQLConf.get.getConf(SQLConf.LEGACY_ALLOW_UNTYPED_SCALA_UDF)) {
-      throw QueryCompilationErrors.usingUntypedScalaUDFError()
-    }
-    SparkUserDefinedFunction(f, dataType, inputEncoders = Nil)
-  }
-
-  /**
-   * Call an user-defined function.
-   *
-   * @group udf_funcs
-   * @since 1.5.0
-   */
-  @scala.annotation.varargs
-  @deprecated("Use call_udf")
-  def callUDF(udfName: String, cols: Column*): Column =
-    call_function(Seq(udfName), cols: _*)
-
-  /**
-   * Call an user-defined function.
-   * Example:
-   * {{{
-   *  import org.apache.spark.sql._
-   *
-   *  val df = Seq(("id1", 1), ("id2", 4), ("id3", 5)).toDF("id", "value")
-   *  val spark = df.sparkSession
-   *  spark.udf.register("simpleUDF", (v: Int) => v * v)
-   *  df.select($"id", call_udf("simpleUDF", $"value"))
-   * }}}
-   *
-   * @group udf_funcs
-   * @since 3.2.0
-   */
-  @scala.annotation.varargs
-  def call_udf(udfName: String, cols: Column*): Column =
-    call_function(Seq(udfName), cols: _*)
-
-  /**
-   * Call a SQL function.
-   *
-   * @param funcName function name that follows the SQL identifier syntax
-   *                 (can be quoted, can be qualified)
-   * @param cols the expression parameters of function
-   * @group normal_funcs
-   * @since 3.5.0
-   */
-  @scala.annotation.varargs
-  def call_function(funcName: String, cols: Column*): Column = {
-    val parser = SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
-      new SparkSqlParser()
-    }
-    val nameParts = parser.parseMultipartIdentifier(funcName)
-    call_function(nameParts, cols: _*)
-  }
-
-  private def call_function(nameParts: Seq[String], cols: Column*): Column = withExpr {
-    UnresolvedFunction(nameParts, cols.map(_.expr), false)
-  }
-
-  /**
-   * Unwrap UDT data type column into its underlying type.
-   * @group udf_funcs
-   * @since 3.4.0
-   */
-  def unwrap_udt(column: Column): Column = withExpr {
-    UnwrapUDT(column.expr)
-  }
-
-  // scalastyle:off
-  // TODO(SPARK-45970): Use @static annotation so Java can access to those
-  //   API in the same way. Once we land this fix, should deprecate
-  //   functions.hours, days, months, years and bucket.
-  object partitioning {
-  // scalastyle:on
-    /**
-     * (Scala-specific) A transform for timestamps and dates to partition data into years.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def years(e: Column): Column = withExpr { Years(e.expr) }
-
-    /**
-     * (Scala-specific) A transform for timestamps and dates to partition data into months.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def months(e: Column): Column = withExpr { Months(e.expr) }
-
-    /**
-     * (Scala-specific) A transform for timestamps and dates to partition data into days.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def days(e: Column): Column = withExpr { Days(e.expr) }
-
-    /**
-     * (Scala-specific) A transform for timestamps to partition data into hours.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def hours(e: Column): Column = withExpr { Hours(e.expr) }
-
-    /**
-     * (Scala-specific) A transform for any type that partitions by a hash of the input column.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def bucket(numBuckets: Column, e: Column): Column = withExpr {
-      numBuckets.expr match {
-        case lit @ Literal(_, IntegerType) =>
-          Bucket(lit, e.expr)
-        case _ =>
-          throw QueryCompilationErrors.invalidBucketsNumberError(numBuckets.toString, e.toString)
-      }
-    }
-
-    /**
-     * (Scala-specific) A transform for any type that partitions by a hash of the input column.
-     *
-     * @group partition_transforms
-     * @since 4.0.0
-     */
-    def bucket(numBuckets: Int, e: Column): Column = withExpr {
-      Bucket(Literal(numBuckets), e.expr)
-    }
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
index 4660970814e21..59a873ef982fe 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/BaseSessionStateBuilder.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.internal
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.sql.{ExperimentalMethods, SparkSession, UDFRegistration, _}
 import org.apache.spark.sql.artifact.ArtifactManager
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, ReplaceCharWithVarchar, ResolveSessionCatalog, TableFunctionRegistry}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, FunctionRegistry, InvokeProcedures, ReplaceCharWithVarchar, ResolveSessionCatalog, ResolveTranspose, TableFunctionRegistry}
 import org.apache.spark.sql.catalyst.catalog.{FunctionExpressionBuilder, SessionCatalog}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.optimizer.Optimizer
@@ -181,7 +181,7 @@ abstract class BaseSessionStateBuilder(
    * Note 1: The user-defined functions must be deterministic.
    * Note 2: This depends on the `functionRegistry` field.
    */
-  protected def udfRegistration: UDFRegistration = new UDFRegistration(functionRegistry)
+  protected def udfRegistration: UDFRegistration = new UDFRegistration(session, functionRegistry)
 
   protected def udtfRegistration: UDTFRegistration = new UDTFRegistration(tableFunctionRegistry)
 
@@ -197,6 +197,8 @@ abstract class BaseSessionStateBuilder(
    * Note: this depends on the `conf` and `catalog` fields.
    */
   protected def analyzer: Analyzer = new Analyzer(catalogManager) {
+    override val hintResolutionRules: Seq[Rule[LogicalPlan]] =
+      customHintResolutionRules
     override val extendedResolutionRules: Seq[Rule[LogicalPlan]] =
       new FindDataSourceTable(session) +:
         new ResolveSQLOnFile(session) +:
@@ -205,6 +207,8 @@ abstract class BaseSessionStateBuilder(
         new ResolveSessionCatalog(this.catalogManager) +:
         ResolveWriteToStream +:
         new EvalSubqueriesForTimeTravel +:
+        new ResolveTranspose(session) +:
+        new InvokeProcedures(session) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -223,7 +227,6 @@ abstract class BaseSessionStateBuilder(
         HiveOnlyCheck +:
         TableCapabilityCheck +:
         CommandCheck +:
-        CollationCheck +:
         ViewSyncSchemaToMetaStore +:
         customCheckRules
   }
@@ -238,6 +241,13 @@ abstract class BaseSessionStateBuilder(
     extensions.buildResolutionRules(session)
   }
 
+  /**
+   * Custom hint resolution rules to add to the Analyzer.
+   */
+  protected def customHintResolutionRules: Seq[Rule[LogicalPlan]] = {
+    extensions.buildHintResolutionRules(session)
+  }
+
   /**
    * Custom post resolution rules to add to the Analyzer. Prefer overriding this instead of
    * creating your own Analyzer.
@@ -370,7 +380,9 @@ abstract class BaseSessionStateBuilder(
    * Resource manager that handles the storage of artifacts as well as preparing the artifacts for
    * use.
    */
-  protected def artifactManager: ArtifactManager = new ArtifactManager(session)
+  protected def artifactManager: ArtifactManager = {
+    parentState.map(_.artifactManager.clone(session)).getOrElse(new ArtifactManager(session))
+  }
 
   /**
    * Function used to make clones of the session state.
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
index 3e20a23a0a066..64689e75e2e5e 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/CatalogImpl.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.expressions.{Expression, Literal}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnDefinition, CreateTable, LocalRelation, LogicalPlan, OptionList, RecoverPartitions, ShowFunctions, ShowNamespaces, ShowTables, UnresolvedTableSpec, View}
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{CatalogManager, SupportsNamespaces, TableCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.{CatalogHelper, MultipartIdentifierHelper, NamespaceHelper, TransformHelper}
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -176,7 +177,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
         try {
           Some(makeTable(catalogName +: ns :+ tableName))
         } catch {
-          case e: AnalysisException if e.getErrorClass == "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE" =>
+          case e: AnalysisException if e.getCondition == "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE" =>
             Some(new Table(
               name = tableName,
               catalog = catalogName,
@@ -188,7 +189,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
         }
       }
     } catch {
-      case e: AnalysisException if e.getErrorClass == "TABLE_OR_VIEW_NOT_FOUND" => None
+      case e: AnalysisException if e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" => None
     }
   }
 
@@ -202,7 +203,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
         case _ => false
       }
     } catch {
-      case e: AnalysisException if e.getErrorClass == "TABLE_OR_VIEW_NOT_FOUND" => false
+      case e: AnalysisException if e.getCondition == "TABLE_OR_VIEW_NOT_FOUND" => false
     }
   }
 
@@ -322,7 +323,7 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
         case _ => false
       }
     } catch {
-      case e: AnalysisException if e.getErrorClass == "UNRESOLVED_ROUTINE" => false
+      case e: AnalysisException if e.getCondition == "UNRESOLVED_ROUTINE" => false
     }
   }
 
@@ -394,11 +395,14 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
 
     val columns = sparkSession.sessionState.executePlan(plan).analyzed match {
       case ResolvedTable(_, _, table, _) =>
-        // TODO (SPARK-45787): Support clusterBySpec for listColumns().
-        val (partitionColumnNames, bucketSpecOpt, _) =
+        val (partitionColumnNames, bucketSpecOpt, clusterBySpecOpt) =
           table.partitioning.toImmutableArraySeq.convertTransforms
         val bucketColumnNames = bucketSpecOpt.map(_.bucketColumnNames).getOrElse(Nil)
-        schemaToColumns(table.schema(), partitionColumnNames.contains, bucketColumnNames.contains)
+        val clusteringColumnNames = clusterBySpecOpt.map { clusterBySpec =>
+          clusterBySpec.columnNames.map(_.toString)
+        }.getOrElse(Nil).toSet
+        schemaToColumns(table.schema(), partitionColumnNames.contains, bucketColumnNames.contains,
+          clusteringColumnNames.contains)
 
       case ResolvedPersistentView(_, _, metadata) =>
         schemaToColumns(metadata.schema)
@@ -415,7 +419,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
   private def schemaToColumns(
       schema: StructType,
       isPartCol: String => Boolean = _ => false,
-      isBucketCol: String => Boolean = _ => false): Seq[Column] = {
+      isBucketCol: String => Boolean = _ => false,
+      isClusteringCol: String => Boolean = _ => false): Seq[Column] = {
     schema.map { field =>
       new Column(
         name = field.name,
@@ -423,7 +428,8 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
         dataType = field.dataType.simpleString,
         nullable = field.nullable,
         isPartition = isPartCol(field.name),
-        isBucket = isBucketCol(field.name))
+        isBucket = isBucketCol(field.name),
+        isCluster = isClusteringCol(field.name))
     }
   }
 
@@ -666,12 +672,9 @@ class CatalogImpl(sparkSession: SparkSession) extends Catalog {
     } else {
       CatalogTableType.MANAGED
     }
-    val location = if (storage.locationUri.isDefined) {
-      val locationStr = storage.locationUri.get.toString
-      Some(locationStr)
-    } else {
-      None
-    }
+
+    // The location in UnresolvedTableSpec should be the original user-provided path string.
+    val location = CaseInsensitiveMap(options).get("path")
 
     val newOptions = OptionList(options.map { case (key, value) =>
       (key, Literal(value).asInstanceOf[Expression])
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
similarity index 59%
rename from sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
rename to sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
index 2d6d5f0e8b2b9..16f9fcf77d622 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterImpl.scala
@@ -15,29 +15,30 @@
  * limitations under the License.
  */
 
-package org.apache.spark.sql
+package org.apache.spark.sql.internal
 
-import java.util.{Locale, Properties}
+import java.util.Locale
 
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Stable
+import org.apache.spark.sql.{DataFrameWriter, Dataset, SaveMode, SparkSession}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.{EliminateSubqueryAliases, NoSuchTableException, UnresolvedIdentifier, UnresolvedRelation}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.plans.logical.{AppendData, CreateTableAsSelect, InsertIntoStatement, LogicalPlan, OptionList, OverwriteByExpression, OverwritePartitionsDynamic, ReplaceTableAsSelect, UnresolvedTableSpec}
+import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
-import org.apache.spark.sql.connector.catalog.{CatalogPlugin, CatalogV2Implicits, CatalogV2Util, Identifier, SupportsCatalogOptions, Table, TableCatalog, TableProvider, V1Table}
+import org.apache.spark.sql.connector.catalog._
 import org.apache.spark.sql.connector.catalog.TableCapability._
-import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege._
+import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference, IdentityTransform, Transform}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.command.DDLUtils
 import org.apache.spark.sql.execution.datasources.{CreateTable, DataSource, DataSourceUtils, LogicalRelation}
 import org.apache.spark.sql.execution.datasources.v2._
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
-import org.apache.spark.sql.sources.BaseRelation
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 import org.apache.spark.util.ArrayImplicits._
@@ -49,183 +50,58 @@ import org.apache.spark.util.ArrayImplicits._
  * @since 1.4.0
  */
 @Stable
-final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
+final class DataFrameWriterImpl[T] private[sql](ds: Dataset[T]) extends DataFrameWriter[T] {
+  format(ds.sparkSession.sessionState.conf.defaultDataSourceName)
 
   private val df = ds.toDF()
 
-  /**
-   * Specifies the behavior when data or table already exists. Options include:
-   * <ul>
-   * <li>`SaveMode.Overwrite`: overwrite the existing data.</li>
-   * <li>`SaveMode.Append`: append the data.</li>
-   * <li>`SaveMode.Ignore`: ignore the operation (i.e. no-op).</li>
-   * <li>`SaveMode.ErrorIfExists`: throw an exception at runtime.</li>
-   * </ul>
-   * <p>
-   * The default option is `ErrorIfExists`.
-   *
-   * @since 1.4.0
-   */
-  def mode(saveMode: SaveMode): DataFrameWriter[T] = {
-    this.mode = saveMode
-    this
-  }
+  /** @inheritdoc */
+  override def mode(saveMode: SaveMode): this.type = super.mode(saveMode)
 
-  /**
-   * Specifies the behavior when data or table already exists. Options include:
-   * <ul>
-   * <li>`overwrite`: overwrite the existing data.</li>
-   * <li>`append`: append the data.</li>
-   * <li>`ignore`: ignore the operation (i.e. no-op).</li>
-   * <li>`error` or `errorifexists`: default option, throw an exception at runtime.</li>
-   * </ul>
-   *
-   * @since 1.4.0
-   */
-  def mode(saveMode: String): DataFrameWriter[T] = {
-    saveMode.toLowerCase(Locale.ROOT) match {
-      case "overwrite" => mode(SaveMode.Overwrite)
-      case "append" => mode(SaveMode.Append)
-      case "ignore" => mode(SaveMode.Ignore)
-      case "error" | "errorifexists" | "default" => mode(SaveMode.ErrorIfExists)
-      case _ => throw QueryCompilationErrors.invalidSaveModeError(saveMode)
-    }
-  }
+  /** @inheritdoc */
+  override def mode(saveMode: String): this.type = super.mode(saveMode)
 
-  /**
-   * Specifies the underlying output data source. Built-in options include "parquet", "json", etc.
-   *
-   * @since 1.4.0
-   */
-  def format(source: String): DataFrameWriter[T] = {
-    this.source = source
-    this
-  }
+  /** @inheritdoc */
+  override def format(source: String): this.type = super.format(source)
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def option(key: String, value: String): DataFrameWriter[T] = {
-    this.extraOptions = this.extraOptions + (key -> value)
-    this
-  }
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = super.option(key, value)
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Boolean): DataFrameWriter[T] = option(key, value.toString)
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Long): DataFrameWriter[T] = option(key, value.toString)
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Double): DataFrameWriter[T] = option(key, value.toString)
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
 
-  /**
-   * (Scala-specific) Adds output options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataFrameWriter[T] = {
-    this.extraOptions ++= options
-    this
-  }
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type =
+    super.options(options)
 
-  /**
-   * Adds output options for the underlying data source.
-   *
-   * All options are maintained in a case-insensitive way in terms of key names.
-   * If a new option has the same key case-insensitively, it will override the existing option.
-   *
-   * @since 1.4.0
-   */
-  def options(options: java.util.Map[String, String]): DataFrameWriter[T] = {
-    this.options(options.asScala)
-    this
-  }
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type =
+    super.options(options)
 
-  /**
-   * Partitions the output by the given columns on the file system. If specified, the output is
-   * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
-   * partition a dataset by year and then month, the directory layout would look like:
-   * <ul>
-   * <li>year=2016/month=01/</li>
-   * <li>year=2016/month=02/</li>
-   * </ul>
-   *
-   * Partitioning is one of the most widely used techniques to optimize physical data layout.
-   * It provides a coarse-grained index for skipping unnecessary data reads when queries have
-   * predicates on the partitioned columns. In order for partitioning to work well, the number
-   * of distinct values in each column should typically be less than tens of thousands.
-   *
-   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
-   * 2.1.0.
-   *
-   * @since 1.4.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def partitionBy(colNames: String*): DataFrameWriter[T] = {
-    this.partitioningColumns = Option(colNames)
-    this
-  }
+  override def partitionBy(colNames: String*): this.type = super.partitionBy(colNames: _*)
 
-  /**
-   * Buckets the output by the given columns. If specified, the output is laid out on the file
-   * system similar to Hive's bucketing scheme, but with a different bucket hash function
-   * and is not compatible with Hive's bucketing.
-   *
-   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
-   * 2.1.0.
-   *
-   * @since 2.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def bucketBy(numBuckets: Int, colName: String, colNames: String*): DataFrameWriter[T] = {
-    this.numBuckets = Option(numBuckets)
-    this.bucketColumnNames = Option(colName +: colNames)
-    this
-  }
+  override def bucketBy(numBuckets: Int, colName: String, colNames: String*): this.type =
+    super.bucketBy(numBuckets, colName, colNames: _*)
 
-  /**
-   * Sorts the output in each bucket by the given columns.
-   *
-   * This is applicable for all file-based data sources (e.g. Parquet, JSON) starting with Spark
-   * 2.1.0.
-   *
-   * @since 2.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def sortBy(colName: String, colNames: String*): DataFrameWriter[T] = {
-    this.sortColumnNames = Option(colName +: colNames)
-    this
-  }
+  override def sortBy(colName: String, colNames: String*): this.type =
+    super.sortBy(colName, colNames: _*)
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def clusterBy(colName: String, colNames: String*): this.type =
+    super.clusterBy(colName, colNames: _*)
 
   /**
    * Saves the content of the `DataFrame` at the specified path.
@@ -377,6 +253,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
         DataSourceUtils.PARTITIONING_COLUMNS_KEY ->
         DataSourceUtils.encodePartitioningColumns(columns))
     }
+    clusteringColumns.foreach { columns =>
+      extraOptions = extraOptions + (
+        DataSourceUtils.CLUSTERING_COLUMNS_KEY ->
+        DataSourceUtils.encodePartitioningColumns(columns))
+    }
 
     val optionsWithPath = getOptionsWithPath(path)
 
@@ -449,7 +330,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   private def insertInto(catalog: CatalogPlugin, ident: Identifier): Unit = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
-    val table = catalog.asTableCatalog.loadTable(ident) match {
+    val table = catalog.asTableCatalog.loadTable(ident, getWritePrivileges.toSet.asJava) match {
       case _: V1Table =>
         return insertInto(TableIdentifier(ident.name(), ident.namespace().headOption))
       case t =>
@@ -480,7 +361,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
   private def insertInto(tableIdent: TableIdentifier): Unit = {
     runCommand(df.sparkSession) {
       InsertIntoStatement(
-        table = UnresolvedRelation(tableIdent),
+        table = UnresolvedRelation(tableIdent).requireWritePrivileges(getWritePrivileges),
         partitionSpec = Map.empty[String, Option[String]],
         Nil,
         query = df.logicalPlan,
@@ -489,32 +370,18 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
-  private def getBucketSpec: Option[BucketSpec] = {
-    if (sortColumnNames.isDefined && numBuckets.isEmpty) {
-      throw QueryCompilationErrors.sortByWithoutBucketingError()
-    }
+  private def getWritePrivileges: Seq[TableWritePrivilege] = mode match {
+    case SaveMode.Overwrite => Seq(INSERT, DELETE)
+    case _ => Seq(INSERT)
+  }
 
+  private def getBucketSpec: Option[BucketSpec] = {
+    isBucketed()
     numBuckets.map { n =>
       BucketSpec(n, bucketColumnNames.get, sortColumnNames.getOrElse(Nil))
     }
   }
 
-  private def assertNotBucketed(operation: String): Unit = {
-    if (getBucketSpec.isDefined) {
-      if (sortColumnNames.isEmpty) {
-        throw QueryCompilationErrors.bucketByUnsupportedByOperationError(operation)
-      } else {
-        throw QueryCompilationErrors.bucketByAndSortByUnsupportedByOperationError(operation)
-      }
-    }
-  }
-
-  private def assertNotPartitioned(operation: String): Unit = {
-    if (partitioningColumns.isDefined) {
-      throw QueryCompilationErrors.operationNotSupportPartitioningError(operation)
-    }
-  }
-
   /**
    * Saves the content of the `DataFrame` as the specified table.
    *
@@ -558,7 +425,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
     val session = df.sparkSession
-    val canUseV2 = lookupV2Provider().isDefined
+    val canUseV2 = lookupV2Provider().isDefined || (df.sparkSession.sessionState.conf.getConf(
+        SQLConf.V2_SESSION_CATALOG_IMPLEMENTATION).isDefined &&
+        !df.sparkSession.sessionState.catalogManager.catalog(CatalogManager.SESSION_CATALOG_NAME)
+          .isInstanceOf[CatalogExtension])
 
     session.sessionState.sqlParser.parseMultipartIdentifier(tableName) match {
       case nameParts @ NonSessionCatalogAndIdentifier(catalog, ident) =>
@@ -579,7 +449,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
 
   private def saveAsTable(
       catalog: TableCatalog, ident: Identifier, nameParts: Seq[String]): Unit = {
-    val tableOpt = try Option(catalog.loadTable(ident)) catch {
+    val tableOpt = try Option(catalog.loadTable(ident, getWritePrivileges.toSet.asJava)) catch {
       case _: NoSuchTableException => None
     }
 
@@ -640,7 +510,6 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val catalog = df.sparkSession.sessionState.catalog
     val qualifiedIdent = catalog.qualifyIdentifier(tableIdent)
     val tableExists = catalog.tableExists(qualifiedIdent)
-    val tableName = qualifiedIdent.unquotedString
 
     (tableExists, mode) match {
       case (true, SaveMode.Ignore) =>
@@ -652,14 +521,14 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       case (true, SaveMode.Overwrite) =>
         // Get all input data source or hive relations of the query.
         val srcRelations = df.logicalPlan.collect {
-          case LogicalRelation(src: BaseRelation, _, _, _) => src
+          case l: LogicalRelation => l.relation
           case relation: HiveTableRelation => relation.tableMeta.identifier
         }
 
         val tableRelation = df.sparkSession.table(qualifiedIdent).queryExecution.analyzed
         EliminateSubqueryAliases(tableRelation) match {
           // check if the table is a data source table (the relation is a BaseRelation).
-          case LogicalRelation(dest: BaseRelation, _, _, _) if srcRelations.contains(dest) =>
+          case l: LogicalRelation if srcRelations.contains(l.relation) =>
             throw QueryCompilationErrors.cannotOverwriteTableThatIsBeingReadFromError(
               qualifiedIdent)
           // check hive table relation when overwrite mode
@@ -688,6 +557,13 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       CatalogTableType.MANAGED
     }
 
+    val properties = if (clusteringColumns.isEmpty) {
+      Map.empty[String, String]
+    } else {
+      Map(ClusterBySpec.toPropertyWithoutValidation(
+        ClusterBySpec.fromColumnNames(clusteringColumns.get)))
+    }
+
     val tableDesc = CatalogTable(
       identifier = tableIdent,
       tableType = tableType,
@@ -695,7 +571,8 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       schema = new StructType,
       provider = Some(source),
       partitionColumnNames = partitioningColumns.getOrElse(Nil),
-      bucketSpec = getBucketSpec)
+      bucketSpec = getBucketSpec,
+      properties = properties)
 
     runCommand(df.sparkSession)(
       CreateTable(tableDesc, mode, Some(df.logicalPlan)))
@@ -708,7 +585,10 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     }.getOrElse(Seq.empty[Transform])
     val bucketing =
       getBucketSpec.map(spec => CatalogV2Implicits.BucketSpecHelper(spec).asTransform).toSeq
-    partitioning ++ bucketing
+    val clustering = clusteringColumns.map { colNames =>
+      ClusterByTransform(colNames.map(FieldReference(_)))
+    }
+    partitioning ++ bucketing ++ clustering
   }
 
   /**
@@ -719,170 +599,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
     val v2Partitions = partitioningAsV2
     if (v2Partitions.isEmpty) return
     require(v2Partitions.sameElements(existingTable.partitioning()),
-      "The provided partitioning does not match of the table.\n" +
+      "The provided partitioning or clustering columns do not match the existing table's.\n" +
       s" - provided: ${v2Partitions.mkString(", ")}\n" +
       s" - table: ${existingTable.partitioning().mkString(", ")}")
   }
 
-  /**
-   * Saves the content of the `DataFrame` to an external database table via JDBC. In the case the
-   * table already exists in the external database, behavior of this function depends on the
-   * save mode, specified by the `mode` function (default to throwing an exception).
-   *
-   * Don't create too many partitions in parallel on a large cluster; otherwise Spark might crash
-   * your external database systems.
-   *
-   * JDBC-specific option and parameter documentation for storing tables via JDBC in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @param table Name of the table in the external database.
-   * @param connectionProperties JDBC database connection arguments, a list of arbitrary string
-   *                             tag/value. Normally at least a "user" and "password" property
-   *                             should be included. "batchsize" can be used to control the
-   *                             number of rows per insert. "isolationLevel" can be one of
-   *                             "NONE", "READ_COMMITTED", "READ_UNCOMMITTED", "REPEATABLE_READ",
-   *                             or "SERIALIZABLE", corresponding to standard transaction
-   *                             isolation levels defined by JDBC's Connection object, with default
-   *                             of "READ_UNCOMMITTED".
-   * @since 1.4.0
-   */
-  def jdbc(url: String, table: String, connectionProperties: Properties): Unit = {
-    assertNotPartitioned("jdbc")
-    assertNotBucketed("jdbc")
-    // connectionProperties should override settings in extraOptions.
-    this.extraOptions ++= connectionProperties.asScala
-    // explicit url and dbtable should override all
-    this.extraOptions ++= Seq("url" -> url, "dbtable" -> table)
-    format("jdbc").save()
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in JSON format (<a href="http://jsonlines.org/">
-   * JSON Lines text format or newline-delimited JSON</a>) at the specified path.
-   * This is equivalent to:
-   * {{{
-   *   format("json").save(path)
-   * }}}
-   *
-   * You can find the JSON-specific options for writing JSON files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.4.0
-   */
-  def json(path: String): Unit = {
-    format("json").save(path)
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in Parquet format at the specified path.
-   * This is equivalent to:
-   * {{{
-   *   format("parquet").save(path)
-   * }}}
-   *
-   * Parquet-specific option(s) for writing Parquet files can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.4.0
-   */
-  def parquet(path: String): Unit = {
-    format("parquet").save(path)
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in ORC format at the specified path.
-   * This is equivalent to:
-   * {{{
-   *   format("orc").save(path)
-   * }}}
-   *
-   * ORC-specific option(s) for writing ORC files can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.5.0
-   */
-  def orc(path: String): Unit = {
-    format("orc").save(path)
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in a text file at the specified path.
-   * The DataFrame must have only one column that is of string type.
-   * Each row becomes a new line in the output file. For example:
-   * {{{
-   *   // Scala:
-   *   df.write.text("/path/to/output")
-   *
-   *   // Java:
-   *   df.write().text("/path/to/output")
-   * }}}
-   * The text files will be encoded as UTF-8.
-   *
-   * You can find the text-specific options for writing text files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 1.6.0
-   */
-  def text(path: String): Unit = {
-    format("text").save(path)
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in CSV format at the specified path.
-   * This is equivalent to:
-   * {{{
-   *   format("csv").save(path)
-   * }}}
-   *
-   * You can find the CSV-specific options for writing CSV files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
-  def csv(path: String): Unit = {
-    format("csv").save(path)
-  }
-
-  /**
-   * Saves the content of the `DataFrame` in XML format at the specified path.
-   * This is equivalent to:
-   * {{{
-   *   format("xml").save(path)
-   * }}}
-   *
-   * Note that writing a XML file from `DataFrame` having a field `ArrayType` with
-   * its element as `ArrayType` would have an additional nested field for the element.
-   * For example, the `DataFrame` having a field below,
-   *
-   *    {@code fieldA [[data1], [data2]]}
-   *
-   * would produce a XML file below.
-   *    {@code
-   *    <fieldA>
-   *        <item>data1</item>
-   *    </fieldA>
-   *    <fieldA>
-   *        <item>data2</item>
-   *    </fieldA>}
-   *
-   * Namely, roundtrip in writing and reading can end up in different schema structure.
-   *
-   * You can find the XML-specific options for writing XML files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   */
-  def xml(path: String): Unit = {
-    format("xml").save(path)
-  }
-
   /**
    * Wrap a DataFrameWriter action to track the QueryExecution and time cost, then report to the
    * user-registered callback functions.
@@ -899,22 +620,4 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) {
       case other => other
     }
   }
-
-  ///////////////////////////////////////////////////////////////////////////////////////
-  // Builder pattern config options
-  ///////////////////////////////////////////////////////////////////////////////////////
-
-  private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName
-
-  private var mode: SaveMode = SaveMode.ErrorIfExists
-
-  private var extraOptions = CaseInsensitiveMap[String](Map.empty)
-
-  private var partitioningColumns: Option[Seq[String]] = None
-
-  private var bucketColumnNames: Option[Seq[String]] = None
-
-  private var numBuckets: Option[Int] = None
-
-  private var sortColumnNames: Option[Seq[String]] = None
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
new file mode 100644
index 0000000000000..0a19e6c47afa9
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/DataFrameWriterV2Impl.scala
@@ -0,0 +1,245 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import java.util
+
+import scala.collection.mutable
+import scala.jdk.CollectionConverters.MapHasAsScala
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.{Column, DataFrame, DataFrameWriterV2, Dataset}
+import org.apache.spark.sql.catalyst.analysis.{NoSuchTableException, UnresolvedFunction, UnresolvedIdentifier, UnresolvedRelation}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.TableWritePrivilege._
+import org.apache.spark.sql.connector.expressions._
+import org.apache.spark.sql.errors.QueryCompilationErrors
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.types.IntegerType
+
+/**
+ * Interface used to write a [[org.apache.spark.sql.Dataset]] to external storage using the v2 API.
+ *
+ * @since 3.0.0
+ */
+@Experimental
+final class DataFrameWriterV2Impl[T] private[sql](table: String, ds: Dataset[T])
+    extends DataFrameWriterV2[T] {
+
+  private val df: DataFrame = ds.toDF()
+
+  private val sparkSession = ds.sparkSession
+  import sparkSession.expression
+
+  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
+
+  private val logicalPlan = df.queryExecution.logical
+
+  private var provider: Option[String] = None
+
+  private val options = new mutable.HashMap[String, String]()
+
+  private val properties = new mutable.HashMap[String, String]()
+
+  private var partitioning: Option[Seq[Transform]] = None
+
+  private var clustering: Option[ClusterByTransform] = None
+
+  /** @inheritdoc */
+  override def using(provider: String): this.type = {
+    this.provider = Some(provider)
+    this
+  }
+
+  /** @inheritdoc */
+  override def option(key: String, value: String): this.type = {
+    this.options.put(key, value)
+    this
+  }
+
+  /** @inheritdoc */
+  override def options(options: scala.collection.Map[String, String]): this.type = {
+    options.foreach {
+      case (key, value) =>
+        this.options.put(key, value)
+    }
+    this
+  }
+
+  /** @inheritdoc */
+  override def options(options: util.Map[String, String]): this.type = {
+    this.options(options.asScala)
+    this
+  }
+
+  /** @inheritdoc */
+  override def tableProperty(property: String, value: String): this.type = {
+    this.properties.put(property, value)
+    this
+  }
+
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def partitionedBy(column: Column, columns: Column*): this.type = {
+    def ref(name: String): NamedReference = LogicalExpressions.parseReference(name)
+
+    val asTransforms = (column +: columns).map(expression).map {
+      case PartitionTransform.YEARS(Seq(attr: Attribute)) =>
+        LogicalExpressions.years(ref(attr.name))
+      case PartitionTransform.MONTHS(Seq(attr: Attribute)) =>
+        LogicalExpressions.months(ref(attr.name))
+      case PartitionTransform.DAYS(Seq(attr: Attribute)) =>
+        LogicalExpressions.days(ref(attr.name))
+      case PartitionTransform.HOURS(Seq(attr: Attribute)) =>
+        LogicalExpressions.hours(ref(attr.name))
+      case PartitionTransform.BUCKET(Seq(Literal(numBuckets: Int, IntegerType), attr: Attribute)) =>
+        LogicalExpressions.bucket(numBuckets, Array(ref(attr.name)))
+      case PartitionTransform.BUCKET(Seq(numBuckets, e)) =>
+        throw QueryCompilationErrors.invalidBucketsNumberError(numBuckets.toString, e.toString)
+      case attr: Attribute =>
+        LogicalExpressions.identity(ref(attr.name))
+      case expr =>
+        throw QueryCompilationErrors.invalidPartitionTransformationError(expr)
+    }
+
+    this.partitioning = Some(asTransforms)
+    validatePartitioning()
+    this
+  }
+
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  override def clusterBy(colName: String, colNames: String*): this.type = {
+    this.clustering =
+      Some(ClusterByTransform((colName +: colNames).map(col => FieldReference(col))))
+    validatePartitioning()
+    this
+  }
+
+  /**
+   * Validate that clusterBy is not used with partitionBy.
+   */
+  private def validatePartitioning(): Unit = {
+    if (partitioning.nonEmpty && clustering.nonEmpty) {
+      throw QueryCompilationErrors.clusterByWithPartitionedBy()
+    }
+  }
+
+  /** @inheritdoc */
+  override def create(): Unit = {
+    val tableSpec = UnresolvedTableSpec(
+      properties = properties.toMap,
+      provider = provider,
+      optionExpression = OptionList(Seq.empty),
+      location = None,
+      comment = None,
+      serde = None,
+      external = false)
+    runCommand(
+      CreateTableAsSelect(
+        UnresolvedIdentifier(tableName),
+        partitioning.getOrElse(Seq.empty) ++ clustering,
+        logicalPlan,
+        tableSpec,
+        options.toMap,
+        false))
+  }
+
+  /** @inheritdoc */
+  override def replace(): Unit = {
+    internalReplace(orCreate = false)
+  }
+
+  /** @inheritdoc */
+  override def createOrReplace(): Unit = {
+    internalReplace(orCreate = true)
+  }
+
+  /** @inheritdoc */
+  @throws(classOf[NoSuchTableException])
+  def append(): Unit = {
+    val append = AppendData.byName(
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT)),
+      logicalPlan, options.toMap)
+    runCommand(append)
+  }
+
+  /** @inheritdoc */
+  @throws(classOf[NoSuchTableException])
+  def overwrite(condition: Column): Unit = {
+    val overwrite = OverwriteByExpression.byName(
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT, DELETE)),
+      logicalPlan, expression(condition), options.toMap)
+    runCommand(overwrite)
+  }
+
+  /** @inheritdoc */
+  @throws(classOf[NoSuchTableException])
+  def overwritePartitions(): Unit = {
+    val dynamicOverwrite = OverwritePartitionsDynamic.byName(
+      UnresolvedRelation(tableName).requireWritePrivileges(Seq(INSERT, DELETE)),
+      logicalPlan, options.toMap)
+    runCommand(dynamicOverwrite)
+  }
+
+  /**
+   * Wrap an action to track the QueryExecution and time cost, then report to the user-registered
+   * callback functions.
+   */
+  private def runCommand(command: LogicalPlan): Unit = {
+    val qe = new QueryExecution(sparkSession, command, df.queryExecution.tracker)
+    qe.assertCommandExecuted()
+  }
+
+  private def internalReplace(orCreate: Boolean): Unit = {
+    val tableSpec = UnresolvedTableSpec(
+      properties = properties.toMap,
+      provider = provider,
+      optionExpression = OptionList(Seq.empty),
+      location = None,
+      comment = None,
+      serde = None,
+      external = false)
+    runCommand(ReplaceTableAsSelect(
+      UnresolvedIdentifier(tableName),
+      partitioning.getOrElse(Seq.empty) ++ clustering,
+      logicalPlan,
+      tableSpec,
+      writeOptions = options.toMap,
+      orCreate = orCreate))
+  }
+}
+
+private object PartitionTransform {
+  class ExtractTransform(name: String) {
+    private val NAMES = Seq(name)
+
+    def unapply(e: Expression): Option[Seq[Expression]] = e match {
+      case UnresolvedFunction(NAMES, children, false, None, false, Nil, true) => Option(children)
+      case _ => None
+    }
+  }
+
+  val HOURS = new ExtractTransform("hours")
+  val DAYS = new ExtractTransform("days")
+  val MONTHS = new ExtractTransform("months")
+  val YEARS = new ExtractTransform("years")
+  val BUCKET = new ExtractTransform("bucket")
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
new file mode 100644
index 0000000000000..bb8146e3e0e33
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/MergeIntoWriterImpl.scala
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import scala.collection.mutable
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.sql.{Column, DataFrame, Dataset, MergeIntoWriter}
+import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.functions.expr
+
+/**
+ * `MergeIntoWriter` provides methods to define and execute merge actions based
+ * on specified conditions.
+ *
+ * @tparam T the type of data in the Dataset.
+ * @param table the name of the target table for the merge operation.
+ * @param ds the source Dataset to merge into the target table.
+ * @param on the merge condition.
+ *
+ * @since 4.0.0
+ */
+@Experimental
+class MergeIntoWriterImpl[T] private[sql] (table: String, ds: Dataset[T], on: Column)
+  extends MergeIntoWriter[T] {
+
+  private val df: DataFrame = ds.toDF()
+
+  private[sql] val sparkSession = ds.sparkSession
+  import sparkSession.RichColumn
+
+  private val tableName = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(table)
+
+  private val logicalPlan = df.queryExecution.logical
+
+  private[sql] val matchedActions = mutable.Buffer.empty[MergeAction]
+  private[sql] val notMatchedActions = mutable.Buffer.empty[MergeAction]
+  private[sql] val notMatchedBySourceActions = mutable.Buffer.empty[MergeAction]
+
+  /** @inheritdoc */
+  def merge(): Unit = {
+    if (matchedActions.isEmpty && notMatchedActions.isEmpty && notMatchedBySourceActions.isEmpty) {
+      throw new SparkRuntimeException(
+        errorClass = "NO_MERGE_ACTION_SPECIFIED",
+        messageParameters = Map.empty)
+    }
+
+    val merge = MergeIntoTable(
+      UnresolvedRelation(tableName).requireWritePrivileges(MergeIntoTable.getWritePrivileges(
+        matchedActions, notMatchedActions, notMatchedBySourceActions)),
+      logicalPlan,
+      on.expr,
+      matchedActions.toSeq,
+      notMatchedActions.toSeq,
+      notMatchedBySourceActions.toSeq,
+      schemaEvolutionEnabled)
+    val qe = sparkSession.sessionState.executePlan(merge)
+    qe.assertCommandExecuted()
+  }
+
+  override protected[sql] def insertAll(condition: Option[Column]): MergeIntoWriter[T] = {
+    this.notMatchedActions += InsertStarAction(condition.map(_.expr))
+    this
+  }
+
+  override protected[sql] def insert(
+    condition: Option[Column],
+    map: Map[String, Column]): MergeIntoWriter[T] = {
+    this.notMatchedActions += InsertAction(condition.map(_.expr), mapToAssignments(map))
+    this
+  }
+
+  override protected[sql] def updateAll(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(UpdateStarAction(condition.map(_.expr)), notMatchedBySource)
+  }
+
+  override protected[sql] def update(
+      condition: Option[Column],
+      map: Map[String, Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(
+      UpdateAction(condition.map(_.expr), mapToAssignments(map)),
+      notMatchedBySource)
+  }
+
+  override protected[sql] def delete(
+      condition: Option[Column],
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    appendUpdateDeleteAction(DeleteAction(condition.map(_.expr)), notMatchedBySource)
+  }
+
+  private def appendUpdateDeleteAction(
+      action: MergeAction,
+      notMatchedBySource: Boolean): MergeIntoWriter[T] = {
+    if (notMatchedBySource) {
+      notMatchedBySourceActions += action
+    } else {
+      matchedActions += action
+    }
+    this
+  }
+
+  private def mapToAssignments(map: Map[String, Column]): Seq[Assignment] = {
+    map.map(x => Assignment(expr(x._1).expr, x._2.expr)).toSeq
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/ObservationManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/ObservationManager.scala
new file mode 100644
index 0000000000000..4fa1f0f4962a0
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/ObservationManager.scala
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import java.util.concurrent.ConcurrentHashMap
+
+import org.apache.spark.sql.{Dataset, Observation, SparkSession}
+import org.apache.spark.sql.catalyst.plans.logical.CollectMetrics
+import org.apache.spark.sql.execution.QueryExecution
+import org.apache.spark.sql.util.QueryExecutionListener
+
+/**
+ * This class keeps track of registered Observations that await query completion.
+ */
+private[sql] class ObservationManager(session: SparkSession) {
+  private val observations = new ConcurrentHashMap[(String, Long), Observation]
+  session.listenerManager.register(Listener)
+
+  def register(observation: Observation, ds: Dataset[_]): Unit = {
+    if (ds.isStreaming) {
+      throw new IllegalArgumentException("Observation does not support streaming Datasets." +
+        "This is because there will be multiple observed metrics as microbatches are constructed" +
+        ". Please register a StreamingQueryListener and get the metric for each microbatch in " +
+        "QueryProgressEvent.progress, or use query.lastProgress or query.recentProgress.")
+    }
+    register(observation, ds.id)
+  }
+
+  def register(observation: Observation, dataFrameId: Long): Unit = {
+    observation.markRegistered()
+    observations.putIfAbsent((observation.name, dataFrameId), observation)
+  }
+
+  private def tryComplete(qe: QueryExecution): Unit = {
+    val allMetrics = qe.observedMetrics
+    qe.logical.foreach {
+      case c: CollectMetrics =>
+        allMetrics.get(c.name).foreach { metrics =>
+          val observation = observations.remove((c.name, c.dataframeId))
+          if (observation != null) {
+            observation.setMetricsAndNotify(metrics)
+          }
+        }
+      case _ =>
+    }
+  }
+
+  private object Listener extends QueryExecutionListener {
+    override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit =
+      tryComplete(qe)
+
+    override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit =
+      tryComplete(qe)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala
new file mode 100644
index 0000000000000..1739b86c8dcb4
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/RuntimeConfigImpl.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.internal
+
+import scala.jdk.CollectionConverters._
+
+import org.apache.spark.SPARK_DOC_ROOT
+import org.apache.spark.annotation.Stable
+import org.apache.spark.internal.config.{ConfigEntry, DEFAULT_PARALLELISM}
+import org.apache.spark.sql.RuntimeConfig
+import org.apache.spark.sql.errors.QueryCompilationErrors
+
+/**
+ * Runtime configuration interface for Spark. To access this, use `SparkSession.conf`.
+ *
+ * Options set here are automatically propagated to the Hadoop configuration during I/O.
+ *
+ * @since 2.0.0
+ */
+@Stable
+class RuntimeConfigImpl private[sql](val sqlConf: SQLConf = new SQLConf) extends RuntimeConfig {
+
+  /** @inheritdoc */
+  def set(key: String, value: String): Unit = {
+    requireNonStaticConf(key)
+    sqlConf.setConfString(key, value)
+  }
+
+  /** @inheritdoc */
+  @throws[NoSuchElementException]("if the key is not set and there is no default value")
+  def get(key: String): String = {
+    sqlConf.getConfString(key)
+  }
+
+  /** @inheritdoc */
+  def get(key: String, default: String): String = {
+    sqlConf.getConfString(key, default)
+  }
+
+  /** @inheritdoc */
+  def getAll: Map[String, String] = {
+    sqlConf.getAllConfs
+  }
+
+  private[sql] def getAllAsJava: java.util.Map[String, String] = {
+    getAll.asJava
+  }
+
+  /** @inheritdoc */
+  def getOption(key: String): Option[String] = {
+    try Option(get(key)) catch {
+      case _: NoSuchElementException => None
+    }
+  }
+
+  /** @inheritdoc */
+  def unset(key: String): Unit = {
+    requireNonStaticConf(key)
+    sqlConf.unsetConf(key)
+  }
+
+  /** @inheritdoc */
+  def isModifiable(key: String): Boolean = sqlConf.isModifiable(key)
+
+  /**
+   * Returns whether a particular key is set.
+   */
+  private[sql] def contains(key: String): Boolean = {
+    sqlConf.contains(key)
+  }
+
+  private[sql] def requireNonStaticConf(key: String): Unit = {
+    // We documented `spark.default.parallelism` by SPARK-48773, however this config
+    // is actually a static config so now a spark.conf.set("spark.default.parallelism")
+    // will fail. Before SPARK-48773 it does not, then this becomes a behavior change.
+    // Technically the current behavior is correct, however it still forms a behavior change.
+    // To address the change, we need a check here and do not fail on default parallelism
+    // setting through spark session conf to maintain the same behavior.
+    if (key == DEFAULT_PARALLELISM.key) {
+      return
+    }
+    if (SQLConf.isStaticConfigKey(key)) {
+      throw QueryCompilationErrors.cannotModifyValueOfStaticConfigError(key)
+    }
+    if (sqlConf.setCommandRejectsSparkCoreConfs &&
+        ConfigEntry.findEntry(key) != null && !SQLConf.containsConfigKey(key)) {
+      throw QueryCompilationErrors.cannotModifyValueOfSparkConfigError(key, SPARK_DOC_ROOT)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/TypedAggUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/TypedAggUtils.scala
index 68bda47cf8ce0..23ceb8135fa8a 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/internal/TypedAggUtils.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/TypedAggUtils.scala
@@ -17,7 +17,9 @@
 
 package org.apache.spark.sql.internal
 
+import org.apache.spark.sql.Encoder
 import org.apache.spark.sql.catalyst.analysis.UnresolvedDeserializer
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.agnosticEncoderFor
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
@@ -25,10 +27,10 @@ import org.apache.spark.sql.execution.aggregate.TypedAggregateExpression
 private[sql] object TypedAggUtils {
 
   def aggKeyColumn[A](
-      encoder: ExpressionEncoder[A],
+      encoder: Encoder[A],
       groupingAttributes: Seq[Attribute]): NamedExpression = {
-    if (!encoder.isSerializedAsStructForTopLevel) {
-      assert(groupingAttributes.length == 1)
+    val agnosticEncoder = agnosticEncoderFor(encoder)
+    if (!agnosticEncoder.isStruct) {
       if (SQLConf.get.nameNonStructGroupingKeyAsValue) {
         groupingAttributes.head
       } else {
@@ -43,13 +45,12 @@ private[sql] object TypedAggUtils {
    * Insert inputs into typed aggregate expressions. For untyped aggregate expressions,
    * the resolving is handled in the analyzer directly.
    */
-  private[sql] def withInputType(
-      expr: Expression,
+  private[sql] def withInputType[T <: Expression](
+      expr: T,
       inputEncoder: ExpressionEncoder[_],
-      inputAttributes: Seq[Attribute]): Expression = {
+      inputAttributes: Seq[Attribute]): T = {
     val unresolvedDeserializer = UnresolvedDeserializer(inputEncoder.deserializer, inputAttributes)
-
-    expr transform {
+    val transformed = expr transform {
       case ta: TypedAggregateExpression if ta.inputDeserializer.isEmpty =>
         ta.withInputInfo(
           deser = unresolvedDeserializer,
@@ -57,6 +58,7 @@ private[sql] object TypedAggUtils {
           schema = inputEncoder.schema
         )
     }
+    transformed.asInstanceOf[T]
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/UserDefinedFunctionUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/UserDefinedFunctionUtils.scala
new file mode 100644
index 0000000000000..bd8735d15be13
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/UserDefinedFunctionUtils.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UnboundRowEncoder
+import org.apache.spark.sql.catalyst.encoders.encoderFor
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalaUDF}
+import org.apache.spark.sql.expressions.SparkUserDefinedFunction
+
+private[sql] object UserDefinedFunctionUtils {
+  /**
+   * Convert a UDF into an (executable) ScalaUDF expressions.
+   *
+   * This function should be moved to ScalaUDF when we move SparkUserDefinedFunction to sql/api.
+   */
+  def toScalaUDF(udf: SparkUserDefinedFunction, children: Seq[Expression]): ScalaUDF = {
+    ScalaUDF(
+      udf.f,
+      udf.dataType,
+      children,
+      udf.inputEncoders.map(_.collect {
+        // At some point it would be nice if were to support this.
+        case e if e != UnboundRowEncoder => encoderFor(e)
+      }),
+      udf.outputEncoder.map(encoderFor(_)),
+      udfName = udf.givenName,
+      nullable = udf.nullable,
+      udfDeterministic = udf.deterministic)
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala b/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
new file mode 100644
index 0000000000000..00e9a01f33c1d
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/internal/columnNodeSupport.scala
@@ -0,0 +1,342 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import scala.language.implicitConversions
+
+import UserDefinedFunctionUtils.toScalaUDF
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{Column, Dataset, SparkSession}
+import org.apache.spark.sql.catalyst.{analysis, expressions, CatalystTypeConverters}
+import org.apache.spark.sql.catalyst.analysis.{MultiAlias, UnresolvedAlias}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, Generator, NamedExpression, Unevaluable}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, AggregateFunction}
+import org.apache.spark.sql.catalyst.parser.{ParserInterface, ParserUtils}
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.catalyst.util.{toPrettySQL, CharVarcharUtils}
+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.execution.aggregate.{ScalaAggregator, ScalaUDAF, TypedAggregateExpression}
+import org.apache.spark.sql.execution.analysis.DetectAmbiguousSelfJoin
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregateFunction, UserDefinedAggregator}
+import org.apache.spark.sql.types.{DataType, NullType}
+
+/**
+ * Convert a [[ColumnNode]] into an [[Expression]].
+ */
+private[sql] trait ColumnNodeToExpressionConverter extends (ColumnNode => Expression) {
+
+  protected def parser: ParserInterface
+  protected def conf: SQLConf
+
+  override def apply(node: ColumnNode): Expression = SQLConf.withExistingConf(conf) {
+    CurrentOrigin.withOrigin(node.origin) {
+      node match {
+        case Literal(value, Some(dataType), _) =>
+          val converter = CatalystTypeConverters.createToCatalystConverter(dataType)
+          expressions.Literal(converter(value), dataType)
+
+        case Literal(value, None, _) =>
+          expressions.Literal(value)
+
+        case UnresolvedAttribute(nameParts, planId, isMetadataColumn, _) =>
+          convertUnresolvedAttribute(nameParts, planId, isMetadataColumn)
+
+        case UnresolvedStar(unparsedTarget, None, _) =>
+          val target = unparsedTarget.map { t =>
+            analysis.UnresolvedAttribute.parseAttributeName(t.stripSuffix(".*"))
+          }
+          analysis.UnresolvedStar(target)
+
+        case UnresolvedStar(None, Some(planId), _) =>
+          analysis.UnresolvedDataFrameStar(planId)
+
+        case UnresolvedRegex(ParserUtils.escapedIdentifier(columnNameRegex), _, _) =>
+          analysis.UnresolvedRegex(columnNameRegex, None, conf.caseSensitiveAnalysis)
+
+        case UnresolvedRegex(
+        ParserUtils.qualifiedEscapedIdentifier(nameParts, columnNameRegex), _, _) =>
+          analysis.UnresolvedRegex(columnNameRegex, Some(nameParts), conf.caseSensitiveAnalysis)
+
+        case UnresolvedRegex(unparsedIdentifier, planId, _) =>
+          convertUnresolvedRegex(unparsedIdentifier, planId)
+
+        case UnresolvedFunction(functionName, arguments, isDistinct, isUDF, isInternal, _) =>
+          val nameParts = if (isUDF) {
+            parser.parseMultipartIdentifier(functionName)
+          } else {
+            Seq(functionName)
+          }
+          analysis.UnresolvedFunction(
+            nameParts = nameParts,
+            arguments = arguments.map(apply),
+            isDistinct = isDistinct,
+            isInternal = isInternal)
+
+        case Alias(child, Seq(name), None, _) =>
+          expressions.Alias(apply(child), name)(
+            nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY))
+
+        case Alias(child, Seq(name), metadata, _) =>
+          expressions.Alias(apply(child), name)(explicitMetadata = metadata)
+
+        case Alias(child, names, None, _) if names.nonEmpty =>
+          analysis.MultiAlias(apply(child), names)
+
+        case Cast(child, dataType, evalMode, _) =>
+          val convertedEvalMode = evalMode match {
+            case Some(Cast.Ansi) => expressions.EvalMode.ANSI
+            case Some(Cast.Legacy) => expressions.EvalMode.LEGACY
+            case Some(Cast.Try) => expressions.EvalMode.TRY
+            case _ => expressions.EvalMode.fromSQLConf(conf)
+          }
+          val cast = expressions.Cast(
+            apply(child),
+            CharVarcharUtils.replaceCharVarcharWithStringForCast(dataType),
+            None,
+            convertedEvalMode)
+          cast.setTagValue(expressions.Cast.USER_SPECIFIED_CAST, ())
+          cast
+
+        case SqlExpression(expression, _) =>
+          parser.parseExpression(expression)
+
+        case sortOrder: SortOrder =>
+          convertSortOrder(sortOrder)
+
+        case Window(function, spec, _) =>
+          val frame = spec.frame match {
+            case Some(WindowFrame(frameType, lower, upper)) =>
+              val convertedFrameType = frameType match {
+                case WindowFrame.Range => expressions.RangeFrame
+                case WindowFrame.Row => expressions.RowFrame
+              }
+              expressions.SpecifiedWindowFrame(
+                convertedFrameType,
+                convertWindowFrameBoundary(lower),
+                convertWindowFrameBoundary(upper))
+            case None =>
+              expressions.UnspecifiedFrame
+          }
+          expressions.WindowExpression(
+            apply(function),
+            expressions.WindowSpecDefinition(
+              partitionSpec = spec.partitionColumns.map(apply),
+              orderSpec = spec.sortColumns.map(convertSortOrder),
+              frameSpecification = frame))
+
+        case LambdaFunction(function, arguments, _) =>
+          expressions.LambdaFunction(
+            apply(function),
+            arguments.map(convertUnresolvedNamedLambdaVariable))
+
+        case v: UnresolvedNamedLambdaVariable =>
+          convertUnresolvedNamedLambdaVariable(v)
+
+        case UnresolvedExtractValue(child, extraction, _) =>
+          analysis.UnresolvedExtractValue(apply(child), apply(extraction))
+
+        case UpdateFields(struct, field, Some(value), _) =>
+          expressions.UpdateFields(apply(struct), field, apply(value))
+
+        case UpdateFields(struct, field, None, _) =>
+          expressions.UpdateFields(apply(struct), field)
+
+        case CaseWhenOtherwise(branches, otherwise, _) =>
+          expressions.CaseWhen(
+            branches = branches.map { case (condition, value) =>
+              (apply(condition), apply(value))
+            },
+            elseValue = otherwise.map(apply))
+
+        case InvokeInlineUserDefinedFunction(
+        a: Aggregator[Any @unchecked, Any @unchecked, Any @unchecked], Nil, isDistinct, _) =>
+          TypedAggregateExpression(a)(a.bufferEncoder, a.outputEncoder)
+            .toAggregateExpression(isDistinct)
+
+        case InvokeInlineUserDefinedFunction(
+        a: UserDefinedAggregator[Any @unchecked, Any @unchecked, Any @unchecked],
+        arguments, isDistinct, _) =>
+          ScalaAggregator(a, arguments.map(apply)).toAggregateExpression(isDistinct)
+
+        case InvokeInlineUserDefinedFunction(
+        a: UserDefinedAggregateFunction, arguments, isDistinct, _) =>
+          ScalaUDAF(udaf = a, children = arguments.map(apply)).toAggregateExpression(isDistinct)
+
+        case InvokeInlineUserDefinedFunction(udf: SparkUserDefinedFunction, arguments, _, _) =>
+          toScalaUDF(udf, arguments.map(apply))
+
+        case ExpressionColumnNode(expression, _) =>
+          val transformed = expression.transformDown {
+            case ColumnNodeExpression(node) => apply(node)
+          }
+          transformed match {
+            case f: AggregateFunction => f.toAggregateExpression()
+            case _ => transformed
+          }
+
+        case l: LazyExpression =>
+          analysis.LazyExpression(apply(l.child))
+
+        case node =>
+          throw SparkException.internalError("Unsupported ColumnNode: " + node)
+      }
+    }
+  }
+
+  private def convertUnresolvedNamedLambdaVariable(
+      v: UnresolvedNamedLambdaVariable): expressions.UnresolvedNamedLambdaVariable = {
+    expressions.UnresolvedNamedLambdaVariable(Seq(v.name))
+  }
+
+  private def convertSortOrder(sortOrder: SortOrder): expressions.SortOrder = {
+    val sortDirection = sortOrder.sortDirection match {
+      case SortOrder.Ascending => expressions.Ascending
+      case SortOrder.Descending => expressions.Descending
+    }
+    val nullOrdering = sortOrder.nullOrdering match {
+      case SortOrder.NullsFirst => expressions.NullsFirst
+      case SortOrder.NullsLast => expressions.NullsLast
+    }
+    expressions.SortOrder(apply(sortOrder.child), sortDirection, nullOrdering, Nil)
+  }
+
+  private def convertWindowFrameBoundary(boundary: WindowFrame.FrameBoundary): Expression = {
+    boundary match {
+      case WindowFrame.CurrentRow => expressions.CurrentRow
+      case WindowFrame.UnboundedPreceding => expressions.UnboundedPreceding
+      case WindowFrame.UnboundedFollowing => expressions.UnboundedFollowing
+      case WindowFrame.Value(node) => apply(node)
+    }
+  }
+
+  private def convertUnresolvedAttribute(
+      nameParts: Seq[String],
+      planId: Option[Long],
+      isMetadataColumn: Boolean): analysis.UnresolvedAttribute = {
+    val attribute = analysis.UnresolvedAttribute(nameParts)
+    if (planId.isDefined) {
+      attribute.setTagValue(LogicalPlan.PLAN_ID_TAG, planId.get)
+    }
+    if (isMetadataColumn) {
+      attribute.setTagValue(LogicalPlan.IS_METADATA_COL, ())
+    }
+    attribute
+  }
+
+  private def convertUnresolvedRegex(
+      unparsedIdentifier: String,
+      planId: Option[Long]): analysis.UnresolvedAttribute = {
+    val attribute = analysis.UnresolvedAttribute.quotedString(unparsedIdentifier)
+    if (planId.isDefined) {
+      attribute.setTagValue(LogicalPlan.PLAN_ID_TAG, planId.get)
+    }
+    attribute
+  }
+}
+
+private[sql] object ColumnNodeToExpressionConverter extends ColumnNodeToExpressionConverter {
+  override protected def parser: ParserInterface = {
+    SparkSession.getActiveSession.map(_.sessionState.sqlParser).getOrElse {
+      new SparkSqlParser()
+    }
+  }
+
+  override protected def conf: SQLConf = SQLConf.get
+}
+
+/**
+ * [[ColumnNode]] wrapper for an [[Expression]].
+ */
+private[sql] case class ExpressionColumnNode private(
+    expression: Expression,
+    override val origin: Origin = CurrentOrigin.get) extends ColumnNode {
+  override def normalize(): ExpressionColumnNode = {
+    val updated = expression.transform {
+      case a: AttributeReference =>
+        DetectAmbiguousSelfJoin.stripColumnReferenceMetadata(a)
+    }
+    copy(updated, ColumnNode.NO_ORIGIN)
+  }
+
+  override def sql: String = expression.sql
+}
+
+private[sql] object ExpressionColumnNode {
+  def apply(e: Expression): ColumnNode = e match {
+    case ColumnNodeExpression(node) => node
+    case _ => new ExpressionColumnNode(e)
+  }
+}
+
+private[internal] case class ColumnNodeExpression private(node: ColumnNode) extends Unevaluable {
+  override def nullable: Boolean = true
+  override def dataType: DataType = NullType
+  override def children: Seq[Expression] = Nil
+  override protected def withNewChildrenInternal(c: IndexedSeq[Expression]): Expression = this
+}
+
+private[sql] object ColumnNodeExpression {
+  def apply(node: ColumnNode): Expression = node match {
+    case ExpressionColumnNode(e, _) => e
+    case _ => new ColumnNodeExpression(node)
+  }
+}
+
+private[spark] object ExpressionUtils {
+  /**
+   * Create an Expression backed Column.
+   */
+  implicit def column(e: Expression): Column = Column(ExpressionColumnNode(e))
+
+  /**
+   * Create an ColumnNode backed Expression. Please not that this has to be converted to an actual
+   * Expression before it is used.
+   */
+  implicit def expression(c: Column): Expression = ColumnNodeExpression(c.node)
+
+  /**
+   * Returns the expression either with an existing or auto assigned name.
+   */
+  def toNamed(expr: Expression): NamedExpression = expr match {
+    case expr: NamedExpression => expr
+
+    // Leave an unaliased generator with an empty list of names since the analyzer will generate
+    // the correct defaults after the nested expression's type has been resolved.
+    case g: Generator => MultiAlias(g, Nil)
+
+    // If we have a top level Cast, there is a chance to give it a better alias, if there is a
+    // NamedExpression under this Cast.
+    case c: expressions.Cast =>
+      c.transformUp {
+        case c @ expressions.Cast(_: NamedExpression, _, _, _) => UnresolvedAlias(c)
+      } match {
+        case ne: NamedExpression => ne
+        case _ => UnresolvedAlias(expr, Some(generateAlias))
+      }
+
+    case expr: Expression => UnresolvedAlias(expr, Some(generateAlias))
+  }
+
+  def generateAlias(e: Expression): String = {
+    e match {
+      case AggregateExpression(f: TypedAggregateExpression, _, _, _, _) => f.toString
+      case expr => toPrettySQL(expr)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 8ccf94166a70e..2f54f1f62fde1 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -22,12 +22,12 @@ import java.util.Locale
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.Expression
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.types._
 
@@ -152,12 +152,12 @@ private case class DB2Dialect() extends JdbcDialect with SQLConfHelper with NoLe
   override def removeSchemaCommentQuery(schema: String): String = {
     s"COMMENT ON SCHEMA ${quoteIdentifier(schema)} IS ''"
   }
-
   override def classifyException(
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     e match {
       case sqlException: SQLException =>
         sqlException.getSQLState match {
@@ -167,9 +167,13 @@ private case class DB2Dialect() extends JdbcDialect with SQLConfHelper with NoLe
               namespace = messageParameters.get("namespace").toArray,
               details = sqlException.getMessage,
               cause = Some(e))
-          case _ => super.classifyException(e, errorClass, messageParameters, description)
+          case "42710" if errorClass == "FAILED_JDBC.RENAME_TABLE" =>
+            val newTable = messageParameters("newName")
+            throw QueryCompilationErrors.tableAlreadyExistsError(newTable)
+          case _ =>
+            super.classifyException(e, errorClass, messageParameters, description, isRuntime)
         }
-      case _ => super.classifyException(e, errorClass, messageParameters, description)
+      case _ => super.classifyException(e, errorClass, messageParameters, description, isRuntime)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
index 3ece44ece9e6a..798ecb5b36ff2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala
@@ -27,8 +27,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
@@ -200,7 +199,8 @@ private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError {
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     e match {
       case exception: SQLException =>
         // Error codes are from https://www.h2database.com/javadoc/org/h2/api/ErrorCode.html
@@ -244,7 +244,7 @@ private[sql] case class H2Dialect() extends JdbcDialect with NoLegacyJDBCError {
         }
       case _ => // do nothing
     }
-    super.classifyException(e, errorClass, messageParameters, description)
+    super.classifyException(e, errorClass, messageParameters, description, isRuntime)
   }
 
   override def compileExpression(expr: Expression): Option[String] = {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
index 290665020f883..81ad1a6d38bbf 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala
@@ -28,7 +28,7 @@ import scala.util.control.NonFatal
 
 import org.apache.commons.lang3.StringUtils
 
-import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.{SparkRuntimeException, SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.annotation.{DeveloperApi, Since}
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.AnalysisException
@@ -42,6 +42,7 @@ import org.apache.spark.sql.connector.catalog.functions.UnboundFunction
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, Literal, NamedReference}
 import org.apache.spark.sql.connector.expressions.aggregate.AggregateFunc
+import org.apache.spark.sql.connector.expressions.filter.Predicate
 import org.apache.spark.sql.connector.util.V2ExpressionSQLBuilder
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions, JdbcOptionsInWrite, JdbcUtils}
@@ -377,6 +378,18 @@ abstract class JdbcDialect extends Serializable with Logging {
   }
 
   private[jdbc] class JDBCSQLBuilder extends V2ExpressionSQLBuilder {
+    // Some dialects do not support boolean type and this convenient util function is
+    // provided to generate SQL string without boolean values.
+    protected def inputToSQLNoBool(input: Expression): String = input match {
+      case p: Predicate if p.name() == "ALWAYS_TRUE" => "1"
+      case p: Predicate if p.name() == "ALWAYS_FALSE" => "0"
+      case p: Predicate => predicateToIntSQL(inputToSQL(p))
+      case _ => inputToSQL(input)
+    }
+
+    protected def predicateToIntSQL(input: String): String =
+      "CASE WHEN " + input + " THEN 1 ELSE 0 END"
+
     override def visitLiteral(literal: Literal[_]): String = {
       Option(literal.value()).map(v =>
         compileValue(CatalystTypeConverters.convertToScala(v, literal.dataType())).toString)
@@ -741,13 +754,15 @@ abstract class JdbcDialect extends Serializable with Logging {
    * @param errorClass The error class assigned in the case of an unclassified `e`
    * @param messageParameters The message parameters of `errorClass`
    * @param description The error description
-   * @return `AnalysisException` or its sub-class.
+   * @param isRuntime Whether the exception is a runtime exception or not.
+   * @return `SparkThrowable + Throwable` or its sub-class.
    */
   def classifyException(
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     classifyException(description, e)
   }
 
@@ -850,11 +865,19 @@ trait NoLegacyJDBCError extends JdbcDialect {
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
-    new AnalysisException(
-      errorClass = errorClass,
-      messageParameters = messageParameters,
-      cause = Some(e))
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
+    if (isRuntime) {
+      new SparkRuntimeException(
+        errorClass = errorClass,
+        messageParameters = messageParameters,
+        cause = e)
+    } else {
+      new AnalysisException(
+        errorClass = errorClass,
+        messageParameters = messageParameters,
+        cause = Some(e))
+    }
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index d03602b0338c7..7d339a90db8c8 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -22,12 +22,12 @@ import java.util.Locale
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.SparkThrowable
 import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.{Expression, NullOrdering, SortDirection}
 import org.apache.spark.sql.connector.expressions.filter.Predicate
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.jdbc.MsSqlServerDialect.{GEOGRAPHY, GEOMETRY}
@@ -59,6 +59,8 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr
     supportedFunctions.contains(funcName)
 
   class MsSqlServerSQLBuilder extends JDBCSQLBuilder {
+    override protected def predicateToIntSQL(input: String): String =
+      "IIF(" + input + ", 1, 0)"
     override def visitSortOrder(
         sortKey: String, sortDirection: SortDirection, nullOrdering: NullOrdering): String = {
       (sortDirection, nullOrdering) match {
@@ -87,12 +89,24 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr
       expr match {
         case e: Predicate => e.name() match {
           case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">=" =>
-            val Array(l, r) = e.children().map {
-              case p: Predicate => s"CASE WHEN ${inputToSQL(p)} THEN 1 ELSE 0 END"
-              case o => inputToSQL(o)
-            }
+            val Array(l, r) = e.children().map(inputToSQLNoBool)
             visitBinaryComparison(e.name(), l, r)
-          case "CASE_WHEN" => visitCaseWhen(expressionsToStringArray(e.children())) + " = 1"
+          case "CASE_WHEN" =>
+            // Since MsSqlServer cannot handle boolean expressions inside
+            // a CASE WHEN, it is necessary to convert those to another
+            // CASE WHEN expression that will return 1 or 0 depending on
+            // the result.
+            // Example:
+            // In:  ... CASE WHEN a = b THEN c = d ... END
+            // Out: ... CASE WHEN a = b THEN CASE WHEN c = d THEN 1 ELSE 0 END ... END = 1
+            val stringArray = e.children().grouped(2).flatMap {
+              case Array(whenExpression, thenExpression) =>
+                Array(inputToSQL(whenExpression), inputToSQLNoBool(thenExpression))
+              case Array(elseExpression) =>
+                Array(inputToSQLNoBool(elseExpression))
+            }.toArray
+
+            visitCaseWhen(stringArray) + " = 1"
           case _ => super.build(expr)
         }
         case _ => super.build(expr)
@@ -207,7 +221,8 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     e match {
       case sqlException: SQLException =>
         sqlException.getErrorCode match {
@@ -216,9 +231,13 @@ private case class MsSqlServerDialect() extends JdbcDialect with NoLegacyJDBCErr
               namespace = messageParameters.get("namespace").toArray,
               details = sqlException.getMessage,
               cause = Some(e))
-          case _ => super.classifyException(e, errorClass, messageParameters, description)
+           case 15335 if errorClass == "FAILED_JDBC.RENAME_TABLE" =>
+             val newTable = messageParameters("newName")
+             throw QueryCompilationErrors.tableAlreadyExistsError(newTable)
+          case _ =>
+            super.classifyException(e, errorClass, messageParameters, description, isRuntime)
         }
-      case _ => super.classifyException(e, errorClass, messageParameters, description)
+      case _ => super.classifyException(e, errorClass, messageParameters, description, isRuntime)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
index 0f1bccbb01d51..dd0118d875998 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala
@@ -24,14 +24,13 @@ import java.util.Locale
 import scala.collection.mutable.ArrayBuilder
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.AnalysisException
+import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NoSuchIndexException}
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.catalog.index.TableIndex
 import org.apache.spark.sql.connector.expressions.{Expression, FieldReference, NamedReference, NullOrdering, SortDirection}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.types._
 
@@ -46,12 +45,33 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
   // See https://dev.mysql.com/doc/refman/8.0/en/aggregate-functions.html
   private val supportedAggregateFunctions =
     Set("MAX", "MIN", "SUM", "COUNT", "AVG") ++ distinctUnsupportedAggregateFunctions
-  private val supportedFunctions = supportedAggregateFunctions
+  private val supportedFunctions = supportedAggregateFunctions ++ Set("DATE_ADD", "DATE_DIFF")
 
   override def isSupportedFunction(funcName: String): Boolean =
     supportedFunctions.contains(funcName)
 
   class MySQLSQLBuilder extends JDBCSQLBuilder {
+    override def visitExtract(field: String, source: String): String = {
+      field match {
+        case "DAY_OF_YEAR" => s"DAYOFYEAR($source)"
+        case "YEAR_OF_WEEK" => s"EXTRACT(YEAR FROM $source)"
+        // WEEKDAY uses Monday = 0, Tuesday = 1, ... and ISO standard is Monday = 1, ...,
+        // so we use the formula (WEEKDAY + 1) to follow the ISO standard.
+        case "DAY_OF_WEEK" => s"(WEEKDAY($source) + 1)"
+        case _ => super.visitExtract(field, source)
+      }
+    }
+
+    override def visitSQLFunction(funcName: String, inputs: Array[String]): String = {
+      funcName match {
+        case "DATE_ADD" =>
+          s"DATE_ADD(${inputs(0)}, INTERVAL ${inputs(1)} DAY)"
+        case "DATE_DIFF" =>
+          s"DATEDIFF(${inputs(0)}, ${inputs(1)})"
+        case _ => super.visitSQLFunction(funcName, inputs)
+      }
+    }
+
     override def visitSortOrder(
         sortKey: String, sortDirection: SortDirection, nullOrdering: NullOrdering): String = {
       (sortDirection, nullOrdering) match {
@@ -332,11 +352,15 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     e match {
       case sqlException: SQLException =>
         sqlException.getErrorCode match {
           // ER_DUP_KEYNAME
+          case 1050 if errorClass == "FAILED_JDBC.RENAME_TABLE" =>
+            val newTable = messageParameters("newName")
+            throw QueryCompilationErrors.tableAlreadyExistsError(newTable)
           case 1061 if errorClass == "FAILED_JDBC.CREATE_INDEX" =>
             val indexName = messageParameters("indexName")
             val tableName = messageParameters("tableName")
@@ -345,10 +369,11 @@ private case class MySQLDialect() extends JdbcDialect with SQLConfHelper with No
             val indexName = messageParameters("indexName")
             val tableName = messageParameters("tableName")
             throw new NoSuchIndexException(indexName, tableName, cause = Some(e))
-          case _ => super.classifyException(e, errorClass, messageParameters, description)
+          case _ =>
+            super.classifyException(e, errorClass, messageParameters, description, isRuntime)
         }
       case unsupported: UnsupportedOperationException => throw unsupported
-      case _ => super.classifyException(e, errorClass, messageParameters, description)
+      case _ => super.classifyException(e, errorClass, messageParameters, description, isRuntime)
     }
   }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
index 627007e275599..a73a34c646356 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala
@@ -17,14 +17,15 @@
 
 package org.apache.spark.sql.jdbc
 
-import java.sql.{Date, Timestamp, Types}
+import java.sql.{Date, SQLException, Timestamp, Types}
 import java.util.Locale
 
 import scala.util.control.NonFatal
 
-import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.{SparkThrowable, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.connector.expressions.Expression
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
 import org.apache.spark.sql.jdbc.OracleDialect._
 import org.apache.spark.sql.types._
@@ -229,6 +230,25 @@ private case class OracleDialect() extends JdbcDialect with SQLConfHelper with N
   override def supportsLimit: Boolean = true
 
   override def supportsOffset: Boolean = true
+
+  override def classifyException(
+      e: Throwable,
+      errorClass: String,
+      messageParameters: Map[String, String],
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
+    e match {
+      case sqlException: SQLException =>
+        sqlException.getErrorCode match {
+          case 955 if errorClass == "FAILED_JDBC.RENAME_TABLE" =>
+            val newTable = messageParameters("newName")
+            throw QueryCompilationErrors.tableAlreadyExistsError(newTable)
+          case _ =>
+            super.classifyException(e, errorClass, messageParameters, description, isRuntime)
+        }
+      case _ => super.classifyException(e, errorClass, messageParameters, description, isRuntime)
+    }
+  }
 }
 
 private[jdbc] object OracleDialect {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
index 03fefd82802ef..bce9c67042782 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala
@@ -23,14 +23,15 @@ import java.util
 import java.util.Locale
 
 import scala.util.Using
+import scala.util.control.NonFatal
 
+import org.apache.spark.SparkThrowable
 import org.apache.spark.internal.LogKeys.COLUMN_NAME
 import org.apache.spark.internal.MDC
-import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.SQLConfHelper
 import org.apache.spark.sql.catalyst.analysis.{IndexAlreadyExistsException, NonEmptyNamespaceException, NoSuchIndexException}
 import org.apache.spark.sql.connector.catalog.Identifier
-import org.apache.spark.sql.connector.expressions.NamedReference
+import org.apache.spark.sql.connector.expressions.{Expression, NamedReference}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
 import org.apache.spark.sql.execution.datasources.v2.TableSampleInfo
@@ -260,7 +261,8 @@ private case class PostgresDialect()
       e: Throwable,
       errorClass: String,
       messageParameters: Map[String, String],
-      description: String): AnalysisException = {
+      description: String,
+      isRuntime: Boolean): Throwable with SparkThrowable = {
     e match {
       case sqlException: SQLException =>
         sqlException.getSQLState match {
@@ -279,7 +281,7 @@ private case class PostgresDialect()
               if (tblRegexp.nonEmpty) {
                 throw QueryCompilationErrors.tableAlreadyExistsError(tblRegexp.get.group(1))
               } else {
-                super.classifyException(e, errorClass, messageParameters, description)
+                super.classifyException(e, errorClass, messageParameters, description, isRuntime)
               }
             }
           case "42704" if errorClass == "FAILED_JDBC.DROP_INDEX" =>
@@ -291,10 +293,33 @@ private case class PostgresDialect()
               namespace = messageParameters.get("namespace").toArray,
               details = sqlException.getMessage,
               cause = Some(e))
-          case _ => super.classifyException(e, errorClass, messageParameters, description)
+          case _ =>
+            super.classifyException(e, errorClass, messageParameters, description, isRuntime)
         }
       case unsupported: UnsupportedOperationException => throw unsupported
-      case _ => super.classifyException(e, errorClass, messageParameters, description)
+      case _ => super.classifyException(e, errorClass, messageParameters, description, isRuntime)
+    }
+  }
+
+  class PostgresSQLBuilder extends JDBCSQLBuilder {
+    override def visitExtract(field: String, source: String): String = {
+      field match {
+        case "DAY_OF_YEAR" => s"EXTRACT(DOY FROM $source)"
+        case "YEAR_OF_WEEK" => s"EXTRACT(YEAR FROM $source)"
+        case "DAY_OF_WEEK" => s"EXTRACT(DOW FROM $source)"
+        case _ => super.visitExtract(field, source)
+      }
+    }
+  }
+
+  override def compileExpression(expr: Expression): Option[String] = {
+    val postgresSQLBuilder = new PostgresSQLBuilder()
+    try {
+      Some(postgresSQLBuilder.build(expr))
+    } catch {
+      case NonFatal(e) =>
+        logWarning("Error occurs while compiling V2 expression", e)
+        None
     }
   }
 
@@ -368,7 +393,9 @@ private case class PostgresDialect()
         try {
           Using.resource(conn.createStatement()) { stmt =>
             Using.resource(stmt.executeQuery(query)) { rs =>
-              if (rs.next()) metadata.putLong("arrayDimension", rs.getLong(1))
+              // Metadata can return 0 for CTAS tables. For such tables, we are always reading
+              // them as 1D array
+              if (rs.next()) metadata.putLong("arrayDimension", Math.max(1L, rs.getLong(1)))
             }
           }
         } catch {
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/package.scala b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
index 96b5e2193f270..1794ac513749f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/package.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/package.scala
@@ -17,12 +17,8 @@
 
 package org.apache.spark
 
-import java.util.regex.Pattern
-
 import org.apache.spark.annotation.{DeveloperApi, Unstable}
-import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin, PySparkCurrentOrigin}
 import org.apache.spark.sql.execution.SparkStrategy
-import org.apache.spark.sql.internal.SQLConf
 
 /**
  * Allows the execution of relational queries, including those expressed in SQL using Spark.
@@ -77,47 +73,4 @@ package object sql {
    * with rebasing.
    */
   private[sql] val SPARK_LEGACY_INT96_METADATA_KEY = "org.apache.spark.legacyINT96"
-
-  /**
-   * This helper function captures the Spark API and its call site in the user code from the current
-   * stacktrace.
-   *
-   * As adding `withOrigin` explicitly to all Spark API definition would be a huge change,
-   * `withOrigin` is used only at certain places where all API implementation surely pass through
-   * and the current stacktrace is filtered to the point where first Spark API code is invoked from
-   * the user code.
-   *
-   * As there might be multiple nested `withOrigin` calls (e.g. any Spark API implementations can
-   * invoke other APIs) only the first `withOrigin` is captured because that is closer to the user
-   * code.
-   *
-   * @param f The function that can use the origin.
-   * @return The result of `f`.
-   */
-  private[sql] def withOrigin[T](f: => T): T = {
-    if (CurrentOrigin.get.stackTrace.isDefined) {
-      f
-    } else {
-      val st = Thread.currentThread().getStackTrace
-      var i = 0
-      // Find the beginning of Spark code traces
-      while (i < st.length && !sparkCode(st(i))) i += 1
-      // Stop at the end of the first Spark code traces
-      while (i < st.length && sparkCode(st(i))) i += 1
-      val origin = Origin(stackTrace = Some(st.slice(
-        from = i - 1,
-        until = i + SQLConf.get.stackTracesInDataFrameContext)),
-        pysparkErrorContext = PySparkCurrentOrigin.get())
-      CurrentOrigin.withOrigin(origin)(f)
-    }
-  }
-
-  private val sparkCodePattern = Pattern.compile("(org\\.apache\\.spark\\.sql\\." +
-      "(?:functions|Column|ColumnName|SQLImplicits|Dataset|DataFrameStatFunctions|DatasetHolder)" +
-      "(?:|\\..*|\\$.*))" +
-      "|(scala\\.collection\\..*)")
-
-  private def sparkCode(ste: StackTraceElement): Boolean = {
-    sparkCodePattern.matcher(ste.getClassName).matches()
-  }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala
new file mode 100644
index 0000000000000..59252f6229180
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecution.scala
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import org.apache.spark.SparkException
+import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.{CommandResult, CompoundBody}
+
+/**
+ * SQL scripting executor - executes script and returns result statements.
+ * This supports returning multiple result statements from a single script.
+ *
+ * @param sqlScript CompoundBody which need to be executed.
+ * @param session Spark session that SQL script is executed within.
+ * @param args A map of parameter names to SQL literal expressions.
+ */
+class SqlScriptingExecution(
+    sqlScript: CompoundBody,
+    session: SparkSession,
+    args: Map[String, Expression]) extends Iterator[DataFrame] {
+
+  // Build the execution plan for the script.
+  private val executionPlan: Iterator[CompoundStatementExec] =
+    SqlScriptingInterpreter(session).buildExecutionPlan(sqlScript, args)
+
+  private var current = getNextResult
+
+  override def hasNext: Boolean = current.isDefined
+
+  override def next(): DataFrame = {
+    if (!hasNext) throw SparkException.internalError("No more elements to iterate through.")
+    val nextDataFrame = current.get
+    current = getNextResult
+    nextDataFrame
+  }
+
+  /** Helper method to iterate through statements until next result statement is encountered. */
+  private def getNextResult: Option[DataFrame] = {
+
+    def getNextStatement: Option[CompoundStatementExec] =
+      if (executionPlan.hasNext) Some(executionPlan.next()) else None
+
+    var currentStatement = getNextStatement
+    // While we don't have a result statement, execute the statements.
+    while (currentStatement.isDefined) {
+      currentStatement match {
+        case Some(stmt: SingleStatementExec) if !stmt.isExecuted =>
+          withErrorHandling {
+            val df = stmt.buildDataFrame(session)
+            df.logicalPlan match {
+              case _: CommandResult => // pass
+              case _ => return Some(df) // If the statement is a result, return it to the caller.
+            }
+          }
+        case _ => // pass
+      }
+      currentStatement = getNextStatement
+    }
+    None
+  }
+
+  private def handleException(e: Throwable): Unit = {
+    // Rethrow the exception.
+    // TODO: SPARK-48353 Add error handling for SQL scripts
+    throw e
+  }
+
+  def withErrorHandling(f: => Unit): Unit = {
+    try {
+      f
+    } catch {
+      case e: Throwable =>
+        handleException(e)
+    }
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
index 51e9304297f4d..e3559e8f18ae2 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNode.scala
@@ -17,10 +17,17 @@
 
 package org.apache.spark.sql.scripting
 
+import java.util
+
 import org.apache.spark.SparkException
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.{DataFrame, Dataset, Row, SparkSession}
+import org.apache.spark.sql.catalyst.analysis.{NameParameterizedQuery, UnresolvedAttribute, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.expressions.{Alias, CreateArray, CreateMap, CreateNamedStruct, Expression, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DefaultValueExpression, DropVariable, LogicalPlan, OneRowRelation, Project, SetVariable}
 import org.apache.spark.sql.catalyst.trees.{Origin, WithOrigin}
+import org.apache.spark.sql.errors.SqlScriptingErrors
+import org.apache.spark.sql.types.BooleanType
 
 /**
  * Trait for all SQL scripting execution nodes used during interpretation phase.
@@ -55,6 +62,45 @@ trait NonLeafStatementExec extends CompoundStatementExec {
    *   Tree iterator.
    */
   def getTreeIterator: Iterator[CompoundStatementExec]
+
+  /**
+   * Evaluate the boolean condition represented by the statement.
+   * @param session SparkSession that SQL script is executed within.
+   * @param statement Statement representing the boolean condition to evaluate.
+   * @return
+   *    The value (`true` or `false`) of condition evaluation;
+   *    or throw the error during the evaluation (eg: returning multiple rows of data
+   *    or non-boolean statement).
+   */
+  protected def evaluateBooleanCondition(
+      session: SparkSession,
+      statement: LeafStatementExec): Boolean = statement match {
+    case statement: SingleStatementExec =>
+      assert(!statement.isExecuted)
+      statement.isExecuted = true
+
+      // DataFrame evaluates to True if it is single row, single column
+      //  of boolean type with value True.
+      val df = statement.buildDataFrame(session)
+      df.schema.fields match {
+        case Array(field) if field.dataType == BooleanType =>
+          df.limit(2).collect() match {
+            case Array(row) =>
+              if (row.isNullAt(0)) {
+                throw SqlScriptingErrors.booleanStatementWithEmptyRow(
+                  statement.origin, statement.getText)
+              }
+              row.getBoolean(0)
+            case _ =>
+              throw SparkException.internalError(
+                s"Boolean statement ${statement.getText} is invalid. It returns more than one row.")
+          }
+        case _ =>
+          throw SqlScriptingErrors.invalidBooleanStatement(statement.origin, statement.getText)
+      }
+    case _ =>
+      throw SparkException.internalError("Boolean condition must be SingleStatementExec")
+  }
 }
 
 /**
@@ -63,6 +109,8 @@ trait NonLeafStatementExec extends CompoundStatementExec {
  *   Logical plan of the parsed statement.
  * @param origin
  *   Origin descriptor for the statement.
+ * @param args
+ *   A map of parameter names to SQL literal expressions.
  * @param isInternal
  *   Whether the statement originates from the SQL script or it is created during the
  *   interpretation. Example: DropVariable statements are automatically created at the end of each
@@ -71,6 +119,7 @@ trait NonLeafStatementExec extends CompoundStatementExec {
 class SingleStatementExec(
     var parsedPlan: LogicalPlan,
     override val origin: Origin,
+    val args: Map[String, Expression],
     override val isInternal: Boolean)
   extends LeafStatementExec with WithOrigin {
 
@@ -80,6 +129,17 @@ class SingleStatementExec(
    */
   var isExecuted = false
 
+  /**
+   * Plan with named parameters.
+   */
+  private lazy val preparedPlan: LogicalPlan = {
+    if (args.nonEmpty) {
+      NameParameterizedQuery(parsedPlan, args)
+    } else {
+      parsedPlan
+    }
+  }
+
   /**
    * Get the SQL query text corresponding to this statement.
    * @return
@@ -90,21 +150,35 @@ class SingleStatementExec(
     origin.sqlText.get.substring(origin.startIndex.get, origin.stopIndex.get + 1)
   }
 
+  /**
+   * Builds a DataFrame from the parsedPlan of this SingleStatementExec
+   * @param session The SparkSession on which the parsedPlan is built.
+   * @return
+   *   The DataFrame.
+   */
+  def buildDataFrame(session: SparkSession): DataFrame = {
+    Dataset.ofRows(session, preparedPlan)
+  }
+
   override def reset(): Unit = isExecuted = false
 }
 
 /**
- * Abstract class for all statements that contain nested statements.
- * Implements recursive iterator logic over all child execution nodes.
- * @param collection
- *   Collection of child execution nodes.
+ * Executable node for CompoundBody.
+ * @param statements
+ *   Executable nodes for nested statements within the CompoundBody.
+ * @param label
+ *   Label set by user to CompoundBody or None otherwise.
  */
-abstract class CompoundNestedStatementIteratorExec(collection: Seq[CompoundStatementExec])
+class CompoundBodyExec(statements: Seq[CompoundStatementExec], label: Option[String] = None)
   extends NonLeafStatementExec {
 
-  private var localIterator = collection.iterator
+  private var localIterator = statements.iterator
   private var curr = if (localIterator.hasNext) Some(localIterator.next()) else None
 
+  /** Used to stop the iteration in cases when LEAVE statement is encountered. */
+  private var stopIteration = false
+
   private lazy val treeIterator: Iterator[CompoundStatementExec] =
     new Iterator[CompoundStatementExec] {
       override def hasNext: Boolean = {
@@ -115,7 +189,7 @@ abstract class CompoundNestedStatementIteratorExec(collection: Seq[CompoundState
           case _ => throw SparkException.internalError(
             "Unknown statement type encountered during SQL script interpretation.")
         }
-        localIterator.hasNext || childHasNext
+        !stopIteration && (localIterator.hasNext || childHasNext)
       }
 
       @scala.annotation.tailrec
@@ -123,12 +197,28 @@ abstract class CompoundNestedStatementIteratorExec(collection: Seq[CompoundState
         curr match {
           case None => throw SparkException.internalError(
             "No more elements to iterate through in the current SQL compound statement.")
+          case Some(leaveStatement: LeaveStatementExec) =>
+            handleLeaveStatement(leaveStatement)
+            curr = None
+            leaveStatement
+          case Some(iterateStatement: IterateStatementExec) =>
+            handleIterateStatement(iterateStatement)
+            curr = None
+            iterateStatement
           case Some(statement: LeafStatementExec) =>
             curr = if (localIterator.hasNext) Some(localIterator.next()) else None
             statement
           case Some(body: NonLeafStatementExec) =>
             if (body.getTreeIterator.hasNext) {
-              body.getTreeIterator.next()
+              body.getTreeIterator.next() match {
+                case leaveStatement: LeaveStatementExec =>
+                  handleLeaveStatement(leaveStatement)
+                  leaveStatement
+                case iterateStatement: IterateStatementExec =>
+                  handleIterateStatement(iterateStatement)
+                  iterateStatement
+                case other => other
+              }
             } else {
               curr = if (localIterator.hasNext) Some(localIterator.next()) else None
               next()
@@ -142,16 +232,654 @@ abstract class CompoundNestedStatementIteratorExec(collection: Seq[CompoundState
   override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
 
   override def reset(): Unit = {
-    collection.foreach(_.reset())
-    localIterator = collection.iterator
+    statements.foreach(_.reset())
+    localIterator = statements.iterator
     curr = if (localIterator.hasNext) Some(localIterator.next()) else None
+    stopIteration = false
+  }
+
+  /** Actions to do when LEAVE statement is encountered, to stop the execution of this compound. */
+  private def handleLeaveStatement(leaveStatement: LeaveStatementExec): Unit = {
+    if (!leaveStatement.hasBeenMatched) {
+      // Stop the iteration.
+      stopIteration = true
+
+      // TODO: Variable cleanup (once we add SQL script execution logic).
+      // TODO: Add interpreter tests as well.
+
+      // Check if label has been matched.
+      leaveStatement.hasBeenMatched = label.isDefined && label.get.equals(leaveStatement.label)
+    }
+  }
+
+  /**
+   * Actions to do when ITERATE statement is encountered, to stop the execution of this compound.
+   */
+  private def handleIterateStatement(iterateStatement: IterateStatementExec): Unit = {
+    if (!iterateStatement.hasBeenMatched) {
+      // Stop the iteration.
+      stopIteration = true
+
+      // TODO: Variable cleanup (once we add SQL script execution logic).
+      // TODO: Add interpreter tests as well.
+
+      // No need to check if label has been matched, since ITERATE statement is already
+      //   not allowed in CompoundBody.
+    }
   }
 }
 
 /**
- * Executable node for CompoundBody.
- * @param statements
- *   Executable nodes for nested statements within the CompoundBody.
+ * Executable node for IfElseStatement.
+ * @param conditions Collection of executable conditions. First condition corresponds to IF clause,
+ *                   while others (if any) correspond to following ELSE IF clauses.
+ * @param conditionalBodies Collection of executable bodies that have a corresponding condition,
+*                 in IF or ELSE IF branches.
+ * @param elseBody Body that is executed if none of the conditions are met,
+ *                          i.e. ELSE branch.
+ * @param session Spark session that SQL script is executed within.
+ */
+class IfElseStatementExec(
+    conditions: Seq[SingleStatementExec],
+    conditionalBodies: Seq[CompoundBodyExec],
+    elseBody: Option[CompoundBodyExec],
+    session: SparkSession) extends NonLeafStatementExec {
+  private object IfElseState extends Enumeration {
+    val Condition, Body = Value
+  }
+
+  private var state = IfElseState.Condition
+  private var curr: Option[CompoundStatementExec] = Some(conditions.head)
+
+  private var clauseIdx: Int = 0
+  private val conditionsCount = conditions.length
+  assert(conditionsCount == conditionalBodies.length)
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+      override def hasNext: Boolean = curr.nonEmpty
+
+      override def next(): CompoundStatementExec = state match {
+        case IfElseState.Condition =>
+          val condition = curr.get.asInstanceOf[SingleStatementExec]
+          if (evaluateBooleanCondition(session, condition)) {
+            state = IfElseState.Body
+            curr = Some(conditionalBodies(clauseIdx))
+          } else {
+            clauseIdx += 1
+            if (clauseIdx < conditionsCount) {
+              // There are ELSE IF clauses remaining.
+              state = IfElseState.Condition
+              curr = Some(conditions(clauseIdx))
+            } else if (elseBody.isDefined) {
+              // ELSE clause exists.
+              state = IfElseState.Body
+              curr = Some(elseBody.get)
+            } else {
+              // No remaining clauses.
+              curr = None
+            }
+          }
+          condition
+        case IfElseState.Body =>
+          assert(curr.get.isInstanceOf[CompoundBodyExec])
+          val currBody = curr.get.asInstanceOf[CompoundBodyExec]
+          val retStmt = currBody.getTreeIterator.next()
+          if (!currBody.getTreeIterator.hasNext) {
+            curr = None
+          }
+          retStmt
+      }
+    }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = IfElseState.Condition
+    curr = Some(conditions.head)
+    clauseIdx = 0
+    conditions.foreach(c => c.reset())
+    conditionalBodies.foreach(b => b.reset())
+    elseBody.foreach(b => b.reset())
+  }
+}
+
+/**
+ * Executable node for WhileStatement.
+ * @param condition Executable node for the condition.
+ * @param body Executable node for the body.
+ * @param label Label set to WhileStatement by user or None otherwise.
+ * @param session Spark session that SQL script is executed within.
+ */
+class WhileStatementExec(
+    condition: SingleStatementExec,
+    body: CompoundBodyExec,
+    label: Option[String],
+    session: SparkSession) extends NonLeafStatementExec {
+
+  private object WhileState extends Enumeration {
+    val Condition, Body = Value
+  }
+
+  private var state = WhileState.Condition
+  private var curr: Option[CompoundStatementExec] = Some(condition)
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+      override def hasNext: Boolean = curr.nonEmpty
+
+      override def next(): CompoundStatementExec = state match {
+          case WhileState.Condition =>
+            val condition = curr.get.asInstanceOf[SingleStatementExec]
+            if (evaluateBooleanCondition(session, condition)) {
+              state = WhileState.Body
+              curr = Some(body)
+              body.reset()
+            } else {
+              curr = None
+            }
+            condition
+          case WhileState.Body =>
+            val retStmt = body.getTreeIterator.next()
+
+            // Handle LEAVE or ITERATE statement if it has been encountered.
+            retStmt match {
+              case leaveStatementExec: LeaveStatementExec if !leaveStatementExec.hasBeenMatched =>
+                if (label.contains(leaveStatementExec.label)) {
+                  leaveStatementExec.hasBeenMatched = true
+                }
+                curr = None
+                return retStmt
+              case iterStatementExec: IterateStatementExec if !iterStatementExec.hasBeenMatched =>
+                if (label.contains(iterStatementExec.label)) {
+                  iterStatementExec.hasBeenMatched = true
+                }
+                state = WhileState.Condition
+                curr = Some(condition)
+                condition.reset()
+                return retStmt
+              case _ =>
+            }
+
+            if (!body.getTreeIterator.hasNext) {
+              state = WhileState.Condition
+              curr = Some(condition)
+              condition.reset()
+            }
+            retStmt
+        }
+    }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = WhileState.Condition
+    curr = Some(condition)
+    condition.reset()
+    body.reset()
+  }
+}
+
+/**
+ * Executable node for CaseStatement.
+ * @param conditions Collection of executable conditions which correspond to WHEN clauses.
+ * @param conditionalBodies Collection of executable bodies that have a corresponding condition,
+ *                 in WHEN branches.
+ * @param elseBody Body that is executed if none of the conditions are met, i.e. ELSE branch.
+ * @param session Spark session that SQL script is executed within.
  */
-class CompoundBodyExec(statements: Seq[CompoundStatementExec])
-  extends CompoundNestedStatementIteratorExec(statements)
+class CaseStatementExec(
+    conditions: Seq[SingleStatementExec],
+    conditionalBodies: Seq[CompoundBodyExec],
+    elseBody: Option[CompoundBodyExec],
+    session: SparkSession) extends NonLeafStatementExec {
+  private object CaseState extends Enumeration {
+    val Condition, Body = Value
+  }
+
+  private var state = CaseState.Condition
+  private var curr: Option[CompoundStatementExec] = Some(conditions.head)
+
+  private var clauseIdx: Int = 0
+  private val conditionsCount = conditions.length
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+      override def hasNext: Boolean = curr.nonEmpty
+
+      override def next(): CompoundStatementExec = state match {
+        case CaseState.Condition =>
+          val condition = curr.get.asInstanceOf[SingleStatementExec]
+          if (evaluateBooleanCondition(session, condition)) {
+            state = CaseState.Body
+            curr = Some(conditionalBodies(clauseIdx))
+          } else {
+            clauseIdx += 1
+            if (clauseIdx < conditionsCount) {
+              // There are WHEN clauses remaining.
+              state = CaseState.Condition
+              curr = Some(conditions(clauseIdx))
+            } else if (elseBody.isDefined) {
+              // ELSE clause exists.
+              state = CaseState.Body
+              curr = Some(elseBody.get)
+            } else {
+              // No remaining clauses.
+              curr = None
+            }
+          }
+          condition
+        case CaseState.Body =>
+          assert(curr.get.isInstanceOf[CompoundBodyExec])
+          val currBody = curr.get.asInstanceOf[CompoundBodyExec]
+          val retStmt = currBody.getTreeIterator.next()
+          if (!currBody.getTreeIterator.hasNext) {
+            curr = None
+          }
+          retStmt
+      }
+    }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = CaseState.Condition
+    curr = Some(conditions.head)
+    clauseIdx = 0
+    conditions.foreach(c => c.reset())
+    conditionalBodies.foreach(b => b.reset())
+    elseBody.foreach(b => b.reset())
+  }
+}
+
+/**
+ * Executable node for RepeatStatement.
+ * @param condition Executable node for the condition - evaluates to a row with a single boolean
+ *                  expression, otherwise throws an exception
+ * @param body Executable node for the body.
+ * @param label Label set to RepeatStatement by user, None if not set
+ * @param session Spark session that SQL script is executed within.
+ */
+class RepeatStatementExec(
+    condition: SingleStatementExec,
+    body: CompoundBodyExec,
+    label: Option[String],
+    session: SparkSession) extends NonLeafStatementExec {
+
+  private object RepeatState extends Enumeration {
+    val Condition, Body = Value
+  }
+
+  private var state = RepeatState.Body
+  private var curr: Option[CompoundStatementExec] = Some(body)
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+      override def hasNext: Boolean = curr.nonEmpty
+
+      override def next(): CompoundStatementExec = state match {
+        case RepeatState.Condition =>
+          val condition = curr.get.asInstanceOf[SingleStatementExec]
+          if (!evaluateBooleanCondition(session, condition)) {
+            state = RepeatState.Body
+            curr = Some(body)
+            body.reset()
+          } else {
+            curr = None
+          }
+          condition
+        case RepeatState.Body =>
+          val retStmt = body.getTreeIterator.next()
+
+          retStmt match {
+            case leaveStatementExec: LeaveStatementExec if !leaveStatementExec.hasBeenMatched =>
+              if (label.contains(leaveStatementExec.label)) {
+                leaveStatementExec.hasBeenMatched = true
+              }
+              curr = None
+              return retStmt
+            case iterStatementExec: IterateStatementExec if !iterStatementExec.hasBeenMatched =>
+              if (label.contains(iterStatementExec.label)) {
+                iterStatementExec.hasBeenMatched = true
+              }
+              state = RepeatState.Condition
+              curr = Some(condition)
+              condition.reset()
+              return retStmt
+            case _ =>
+          }
+
+          if (!body.getTreeIterator.hasNext) {
+            state = RepeatState.Condition
+            curr = Some(condition)
+            condition.reset()
+          }
+          retStmt
+      }
+    }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = RepeatState.Body
+    curr = Some(body)
+    body.reset()
+    condition.reset()
+  }
+}
+
+/**
+ * Executable node for LeaveStatement.
+ * @param label Label of the compound or loop to leave.
+ */
+class LeaveStatementExec(val label: String) extends LeafStatementExec {
+  /**
+   * Label specified in the LEAVE statement might not belong to the immediate surrounding compound,
+   *   but to the any surrounding compound.
+   * Iteration logic is recursive, i.e. when iterating through the compound, if another
+   *   compound is encountered, next() will be called to iterate its body. The same logic
+   *   is applied to any other compound down the traversal tree.
+   * In such cases, when LEAVE statement is encountered (as the leaf of the traversal tree),
+   *   it will be propagated upwards and the logic will try to match it to the labels of
+   *   surrounding compounds.
+   * Once the match is found, this flag is set to true to indicate that search should be stopped.
+   */
+  var hasBeenMatched: Boolean = false
+  override def reset(): Unit = hasBeenMatched = false
+}
+
+/**
+ * Executable node for ITERATE statement.
+ * @param label Label of the loop to iterate.
+ */
+class IterateStatementExec(val label: String) extends LeafStatementExec {
+  /**
+   * Label specified in the ITERATE statement might not belong to the immediate compound,
+   * but to the any surrounding compound.
+   * Iteration logic is recursive, i.e. when iterating through the compound, if another
+   * compound is encountered, next() will be called to iterate its body. The same logic
+   * is applied to any other compound down the tree.
+   * In such cases, when ITERATE statement is encountered (as the leaf of the traversal tree),
+   * it will be propagated upwards and the logic will try to match it to the labels of
+   * surrounding compounds.
+   * Once the match is found, this flag is set to true to indicate that search should be stopped.
+   */
+  var hasBeenMatched: Boolean = false
+  override def reset(): Unit = hasBeenMatched = false
+}
+
+/**
+ * Executable node for LoopStatement.
+ * @param body Executable node for the body, executed on every loop iteration.
+ * @param label Label set to LoopStatement by user, None if not set.
+ */
+class LoopStatementExec(
+    body: CompoundBodyExec,
+    val label: Option[String]) extends NonLeafStatementExec {
+
+  /**
+   * Loop can be interrupted by LeaveStatementExec
+   */
+  private var interrupted: Boolean = false
+
+  /**
+   * Loop can be iterated by IterateStatementExec
+   */
+  private var iterated: Boolean = false
+
+  private lazy val treeIterator =
+    new Iterator[CompoundStatementExec] {
+      override def hasNext: Boolean = !interrupted
+
+      override def next(): CompoundStatementExec = {
+        if (!body.getTreeIterator.hasNext || iterated) {
+          reset()
+        }
+
+        val retStmt = body.getTreeIterator.next()
+
+        retStmt match {
+          case leaveStatementExec: LeaveStatementExec if !leaveStatementExec.hasBeenMatched =>
+            if (label.contains(leaveStatementExec.label)) {
+              leaveStatementExec.hasBeenMatched = true
+            }
+            interrupted = true
+          case iterStatementExec: IterateStatementExec if !iterStatementExec.hasBeenMatched =>
+            if (label.contains(iterStatementExec.label)) {
+              iterStatementExec.hasBeenMatched = true
+            }
+            iterated = true
+          case _ =>
+        }
+
+        retStmt
+      }
+    }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    interrupted = false
+    iterated = false
+    body.reset()
+  }
+}
+
+/**
+ * Executable node for ForStatement.
+ * @param query Executable node for the query.
+ * @param variableName Name of variable used for accessing current row during iteration.
+ * @param body Executable node for the body.
+ * @param label Label set to ForStatement by user or None otherwise.
+ * @param session Spark session that SQL script is executed within.
+ */
+class ForStatementExec(
+    query: SingleStatementExec,
+    variableName: Option[String],
+    body: CompoundBodyExec,
+    val label: Option[String],
+    session: SparkSession) extends NonLeafStatementExec {
+
+  private object ForState extends Enumeration {
+    val VariableAssignment, Body, VariableCleanup = Value
+  }
+  private var state = ForState.VariableAssignment
+  private var areVariablesDeclared = false
+
+  // map of all variables created internally by the for statement
+  // (variableName -> variableExpression)
+  private var variablesMap: Map[String, Expression] = Map()
+
+  // compound body used for dropping variables while in ForState.VariableAssignment
+  private var dropVariablesExec: CompoundBodyExec = null
+
+  private var queryResult: util.Iterator[Row] = _
+  private var isResultCacheValid = false
+  private def cachedQueryResult(): util.Iterator[Row] = {
+    if (!isResultCacheValid) {
+      queryResult = query.buildDataFrame(session).toLocalIterator()
+      query.isExecuted = true
+      isResultCacheValid = true
+    }
+    queryResult
+  }
+
+  /**
+   * For can be interrupted by LeaveStatementExec
+   */
+  private var interrupted: Boolean = false
+
+  private lazy val treeIterator: Iterator[CompoundStatementExec] =
+    new Iterator[CompoundStatementExec] {
+
+      override def hasNext: Boolean = !interrupted && (state match {
+          case ForState.VariableAssignment => cachedQueryResult().hasNext
+          case ForState.Body => true
+          case ForState.VariableCleanup => dropVariablesExec.getTreeIterator.hasNext
+        })
+
+      override def next(): CompoundStatementExec = state match {
+
+        case ForState.VariableAssignment =>
+          variablesMap = createVariablesMapFromRow(cachedQueryResult().next())
+
+          if (!areVariablesDeclared) {
+            // create and execute declare var statements
+            variablesMap.keys.toSeq
+              .map(colName => createDeclareVarExec(colName, variablesMap(colName)))
+              .foreach(declareVarExec => declareVarExec.buildDataFrame(session).collect())
+            areVariablesDeclared = true
+          }
+
+          // create and execute set var statements
+          variablesMap.keys.toSeq
+            .map(colName => createSetVarExec(colName, variablesMap(colName)))
+            .foreach(setVarExec => setVarExec.buildDataFrame(session).collect())
+
+          state = ForState.Body
+          body.reset()
+          next()
+
+        case ForState.Body =>
+          val retStmt = body.getTreeIterator.next()
+
+          // Handle LEAVE or ITERATE statement if it has been encountered.
+          retStmt match {
+            case leaveStatementExec: LeaveStatementExec if !leaveStatementExec.hasBeenMatched =>
+              if (label.contains(leaveStatementExec.label)) {
+                leaveStatementExec.hasBeenMatched = true
+              }
+              interrupted = true
+              // If this for statement encounters LEAVE, it will either not be executed
+              // again, or it will be reset before being executed.
+              // In either case, variables will not
+              // be dropped normally, from ForState.VariableCleanup, so we drop them here.
+              dropVars()
+              return retStmt
+            case iterStatementExec: IterateStatementExec if !iterStatementExec.hasBeenMatched =>
+              if (label.contains(iterStatementExec.label)) {
+                iterStatementExec.hasBeenMatched = true
+              } else {
+                // if an outer loop is being iterated, this for statement will either not be
+                // executed again, or it will be reset before being executed.
+                // In either case, variables will not
+                // be dropped normally, from ForState.VariableCleanup, so we drop them here.
+                dropVars()
+              }
+              switchStateFromBody()
+              return retStmt
+            case _ =>
+          }
+
+          if (!body.getTreeIterator.hasNext) {
+            switchStateFromBody()
+          }
+          retStmt
+
+        case ForState.VariableCleanup =>
+          dropVariablesExec.getTreeIterator.next()
+      }
+    }
+
+  /**
+   * Recursively creates a Catalyst expression from Scala value.<br>
+   * See https://spark.apache.org/docs/latest/sql-ref-datatypes.html for Spark -> Scala mappings
+   */
+  private def createExpressionFromValue(value: Any): Expression = value match {
+    case m: Map[_, _] =>
+      // arguments of CreateMap are in the format: (key1, val1, key2, val2, ...)
+      val mapArgs = m.keys.toSeq.flatMap { key =>
+        Seq(createExpressionFromValue(key), createExpressionFromValue(m(key)))
+      }
+      CreateMap(mapArgs, useStringTypeWhenEmpty = false)
+
+    // structs and rows match this case
+    case s: Row =>
+    // arguments of CreateNamedStruct are in the format: (name1, val1, name2, val2, ...)
+    val namedStructArgs = s.schema.names.toSeq.flatMap { colName =>
+        val valueExpression = createExpressionFromValue(s.getAs(colName))
+        Seq(Literal(colName), valueExpression)
+      }
+      CreateNamedStruct(namedStructArgs)
+
+    // arrays match this case
+    case a: collection.Seq[_] =>
+      val arrayArgs = a.toSeq.map(createExpressionFromValue(_))
+      CreateArray(arrayArgs, useStringTypeWhenEmpty = false)
+
+    case _ => Literal(value)
+  }
+
+  private def createVariablesMapFromRow(row: Row): Map[String, Expression] = {
+    var variablesMap = row.schema.names.toSeq.map { colName =>
+      colName -> createExpressionFromValue(row.getAs(colName))
+    }.toMap
+
+    if (variableName.isDefined) {
+      val namedStructArgs = variablesMap.keys.toSeq.flatMap { colName =>
+        Seq(Literal(colName), variablesMap(colName))
+      }
+      val forVariable = CreateNamedStruct(namedStructArgs)
+      variablesMap = variablesMap + (variableName.get -> forVariable)
+    }
+    variablesMap
+  }
+
+  /**
+   * Create and immediately execute dropVariable exec nodes for all variables in variablesMap.
+   */
+  private def dropVars(): Unit = {
+    variablesMap.keys.toSeq
+      .map(colName => createDropVarExec(colName))
+      .foreach(dropVarExec => dropVarExec.buildDataFrame(session).collect())
+    areVariablesDeclared = false
+  }
+
+  private def switchStateFromBody(): Unit = {
+    state = if (cachedQueryResult().hasNext) ForState.VariableAssignment
+    else {
+      // create compound body for dropping nodes after execution is complete
+      dropVariablesExec = new CompoundBodyExec(
+        variablesMap.keys.toSeq.map(colName => createDropVarExec(colName))
+      )
+      ForState.VariableCleanup
+    }
+  }
+
+  private def createDeclareVarExec(varName: String, variable: Expression): SingleStatementExec = {
+    val defaultExpression = DefaultValueExpression(Literal(null, variable.dataType), "null")
+    val declareVariable = CreateVariable(
+      UnresolvedIdentifier(Seq(varName)),
+      defaultExpression,
+      replace = true
+    )
+    new SingleStatementExec(declareVariable, Origin(), Map.empty, isInternal = true)
+  }
+
+  private def createSetVarExec(varName: String, variable: Expression): SingleStatementExec = {
+    val projectNamedStruct = Project(
+      Seq(Alias(variable, varName)()),
+      OneRowRelation()
+    )
+    val setIdentifierToCurrentRow =
+      SetVariable(Seq(UnresolvedAttribute(varName)), projectNamedStruct)
+    new SingleStatementExec(setIdentifierToCurrentRow, Origin(), Map.empty, isInternal = true)
+  }
+
+  private def createDropVarExec(varName: String): SingleStatementExec = {
+    val dropVar = DropVariable(UnresolvedIdentifier(Seq(varName)), ifExists = true)
+    new SingleStatementExec(dropVar, Origin(), Map.empty, isInternal = true)
+  }
+
+  override def getTreeIterator: Iterator[CompoundStatementExec] = treeIterator
+
+  override def reset(): Unit = {
+    state = ForState.VariableAssignment
+    isResultCacheValid = false
+    variablesMap = Map()
+    areVariablesDeclared = false
+    dropVariablesExec = null
+    interrupted = false
+    body.reset()
+  }
+}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
index 6453b204e1623..a3dc3d4599314 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreter.scala
@@ -17,15 +17,19 @@
 
 package org.apache.spark.sql.scripting
 
+import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.analysis.UnresolvedIdentifier
-import org.apache.spark.sql.catalyst.parser.{CompoundBody, CompoundPlanStatement, SingleStatement}
-import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DropVariable, LogicalPlan}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.{CaseStatement, CompoundBody, CompoundPlanStatement, CreateVariable, DropVariable, ForStatement, IfElseStatement, IterateStatement, LeaveStatement, LogicalPlan, LoopStatement, RepeatStatement, SingleStatement, WhileStatement}
 import org.apache.spark.sql.catalyst.trees.Origin
 
 /**
  * SQL scripting interpreter - builds SQL script execution plan.
+ *
+ * @param session
+ *   Spark session that SQL script is executed within.
  */
-case class SqlScriptingInterpreter() {
+case class SqlScriptingInterpreter(session: SparkSession) {
 
   /**
    * Build execution plan and return statements that need to be executed,
@@ -33,11 +37,16 @@ case class SqlScriptingInterpreter() {
    *
    * @param compound
    *   CompoundBody for which to build the plan.
+   * @param args
+   *   A map of parameter names to SQL literal expressions.
    * @return
    *   Iterator through collection of statements to be executed.
    */
-  def buildExecutionPlan(compound: CompoundBody): Iterator[CompoundStatementExec] = {
-    transformTreeIntoExecutable(compound).asInstanceOf[CompoundBodyExec].getTreeIterator
+  def buildExecutionPlan(
+      compound: CompoundBody,
+      args: Map[String, Expression]): Iterator[CompoundStatementExec] = {
+    transformTreeIntoExecutable(compound, args)
+      .asInstanceOf[CompoundBodyExec].getTreeIterator
   }
 
   /**
@@ -55,29 +64,109 @@ case class SqlScriptingInterpreter() {
 
   /**
    * Transform the parsed tree to the executable node.
+   *
    * @param node
    *   Root node of the parsed tree.
+   * @param args
+   *   A map of parameter names to SQL literal expressions.
    * @return
    *   Executable statement.
    */
-  private def transformTreeIntoExecutable(node: CompoundPlanStatement): CompoundStatementExec =
+  private def transformTreeIntoExecutable(
+      node: CompoundPlanStatement,
+      args: Map[String, Expression]): CompoundStatementExec =
     node match {
-      case body: CompoundBody =>
+      case CompoundBody(collection, label) =>
         // TODO [SPARK-48530]: Current logic doesn't support scoped variables and shadowing.
-        val variables = body.collection.flatMap {
+        val variables = collection.flatMap {
           case st: SingleStatement => getDeclareVarNameFromPlan(st.parsedPlan)
           case _ => None
         }
         val dropVariables = variables
           .map(varName => DropVariable(varName, ifExists = true))
-          .map(new SingleStatementExec(_, Origin(), isInternal = true))
+          .map(new SingleStatementExec(_, Origin(), args, isInternal = true))
           .reverse
         new CompoundBodyExec(
-          body.collection.map(st => transformTreeIntoExecutable(st)) ++ dropVariables)
+          collection.map(st => transformTreeIntoExecutable(st, args)) ++ dropVariables,
+          label)
+
+      case IfElseStatement(conditions, conditionalBodies, elseBody) =>
+        val conditionsExec = conditions.map(condition =>
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false))
+        val conditionalBodiesExec = conditionalBodies.map(body =>
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec])
+        val unconditionalBodiesExec = elseBody.map(body =>
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec])
+        new IfElseStatementExec(
+          conditionsExec, conditionalBodiesExec, unconditionalBodiesExec, session)
+
+      case CaseStatement(conditions, conditionalBodies, elseBody) =>
+        val conditionsExec = conditions.map(condition =>
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false))
+        val conditionalBodiesExec = conditionalBodies.map(body =>
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec])
+        val unconditionalBodiesExec = elseBody.map(body =>
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec])
+        new CaseStatementExec(
+          conditionsExec, conditionalBodiesExec, unconditionalBodiesExec, session)
+
+      case WhileStatement(condition, body, label) =>
+        val conditionExec =
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false)
+        val bodyExec =
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec]
+        new WhileStatementExec(conditionExec, bodyExec, label, session)
+
+      case RepeatStatement(condition, body, label) =>
+        val conditionExec =
+          new SingleStatementExec(
+            condition.parsedPlan,
+            condition.origin,
+            args,
+            isInternal = false)
+        val bodyExec =
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec]
+        new RepeatStatementExec(conditionExec, bodyExec, label, session)
+
+      case LoopStatement(body, label) =>
+        val bodyExec = transformTreeIntoExecutable(body, args)
+          .asInstanceOf[CompoundBodyExec]
+        new LoopStatementExec(bodyExec, label)
+
+      case ForStatement(query, variableNameOpt, body, label) =>
+        val queryExec =
+          new SingleStatementExec(
+            query.parsedPlan,
+            query.origin,
+            args,
+            isInternal = false)
+        val bodyExec =
+          transformTreeIntoExecutable(body, args).asInstanceOf[CompoundBodyExec]
+        new ForStatementExec(queryExec, variableNameOpt, bodyExec, label, session)
+
+      case leaveStatement: LeaveStatement =>
+        new LeaveStatementExec(leaveStatement.label)
+
+      case iterateStatement: IterateStatement =>
+        new IterateStatementExec(iterateStatement.label)
+
       case sparkStatement: SingleStatement =>
         new SingleStatementExec(
           sparkStatement.parsedPlan,
           sparkStatement.origin,
+          args,
           isInternal = false)
     }
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
index 24d769fc8fc87..f42d8b667ab12 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala
@@ -22,12 +22,12 @@ import java.util.Locale
 import scala.jdk.CollectionConverters._
 
 import org.apache.spark.annotation.Evolving
-import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{DataFrame, Dataset, SparkSession}
+import org.apache.spark.sql.{api, DataFrame, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils}
+import org.apache.spark.sql.classic.ClassicConversions._
 import org.apache.spark.sql.connector.catalog.{SupportsRead, TableProvider}
 import org.apache.spark.sql.connector.catalog.TableCapability._
 import org.apache.spark.sql.errors.QueryCompilationErrors
@@ -49,25 +49,15 @@ import org.apache.spark.sql.util.CaseInsensitiveStringMap
  * @since 2.0.0
  */
 @Evolving
-final class DataStreamReader private[sql](sparkSession: SparkSession) extends Logging {
-  /**
-   * Specifies the input data source format.
-   *
-   * @since 2.0.0
-   */
-  def format(source: String): DataStreamReader = {
+final class DataStreamReader private[sql](sparkSession: SparkSession) extends api.DataStreamReader {
+  /** @inheritdoc */
+  def format(source: String): this.type = {
     this.source = source
     this
   }
 
-  /**
-   * Specifies the input schema. Some data sources (e.g. JSON) can infer the input schema
-   * automatically from data. By specifying the schema here, the underlying data source can
-   * skip the schema inference step, and thus speed up data loading.
-   *
-   * @since 2.0.0
-   */
-  def schema(schema: StructType): DataStreamReader = {
+  /** @inheritdoc */
+  def schema(schema: StructType): this.type = {
     if (schema != null) {
       val replaced = CharVarcharUtils.failIfHasCharVarchar(schema).asInstanceOf[StructType]
       this.userSpecifiedSchema = Option(replaced)
@@ -75,75 +65,19 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
     this
   }
 
-  /**
-   * Specifies the schema by using the input DDL-formatted string. Some data sources (e.g. JSON) can
-   * infer the input schema automatically from data. By specifying the schema here, the underlying
-   * data source can skip the schema inference step, and thus speed up data loading.
-   *
-   * @since 2.3.0
-   */
-  def schema(schemaString: String): DataStreamReader = {
-    schema(StructType.fromDDL(schemaString))
-  }
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: String): DataStreamReader = {
+  /** @inheritdoc */
+  def option(key: String, value: String): this.type = {
     this.extraOptions += (key -> value)
     this
   }
 
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Boolean): DataStreamReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Long): DataStreamReader = option(key, value.toString)
-
-  /**
-   * Adds an input option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Double): DataStreamReader = option(key, value.toString)
-
-  /**
-   * (Scala-specific) Adds input options for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataStreamReader = {
+  /** @inheritdoc */
+  def options(options: scala.collection.Map[String, String]): this.type = {
     this.extraOptions ++= options
     this
   }
 
-  /**
-   * (Java-specific) Adds input options for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def options(options: java.util.Map[String, String]): DataStreamReader = {
-    this.options(options.asScala)
-    this
-  }
-
-
-  /**
-   * Loads input data stream in as a `DataFrame`, for data streams that don't require a path
-   * (e.g. external key-value stores).
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def load(): DataFrame = loadInternal(None)
 
   private def loadInternal(path: Option[String]): DataFrame = {
@@ -205,11 +139,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
     }
   }
 
-  /**
-   * Loads input in as a `DataFrame`, for data streams that read from some path.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def load(path: String): DataFrame = {
     if (!sparkSession.sessionState.conf.legacyPathOptionBehavior &&
         extraOptions.contains("path")) {
@@ -218,133 +148,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
     loadInternal(Some(path))
   }
 
-  /**
-   * Loads a JSON file stream and returns the results as a `DataFrame`.
-   *
-   * <a href="http://jsonlines.org/">JSON Lines</a> (newline-delimited JSON) is supported by
-   * default. For JSON (one record per file), set the `multiLine` option to true.
-   *
-   * This function goes through the input once to determine the input schema. If you know the
-   * schema in advance, use the version that specifies the schema to avoid the extra scan.
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * You can find the JSON-specific options for reading JSON file stream in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-json.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
-  def json(path: String): DataFrame = {
-    userSpecifiedSchema.foreach(checkJsonSchema)
-    format("json").load(path)
-  }
-
-  /**
-   * Loads a CSV file stream and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * You can find the CSV-specific options for reading CSV file stream in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-csv.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
-  def csv(path: String): DataFrame = format("csv").load(path)
-
-  /**
-   * Loads a XML file stream and returns the result as a `DataFrame`.
-   *
-   * This function will go through the input once to determine the input schema if `inferSchema`
-   * is enabled. To avoid going through the entire data once, disable `inferSchema` option or
-   * specify the schema explicitly using `schema`.
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * You can find the XML-specific options for reading XML file stream in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option">
-   * Data Source Option</a> in the version you use.
-   *
-   * @since 4.0.0
-   */
-  def xml(path: String): DataFrame = {
-    userSpecifiedSchema.foreach(checkXmlSchema)
-    format("xml").load(path)
-  }
-
-  /**
-   * Loads a ORC file stream, returning the result as a `DataFrame`.
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * ORC-specific option(s) for reading ORC file stream can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-orc.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.3.0
-   */
-  def orc(path: String): DataFrame = {
-    format("orc").load(path)
-  }
-
-  /**
-   * Loads a Parquet file stream, returning the result as a `DataFrame`.
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * Parquet-specific option(s) for reading Parquet file stream can be found in
-   * <a href=
-   *   "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
-  def parquet(path: String): DataFrame = {
-    format("parquet").load(path)
-  }
-
-  /**
-   * Define a Streaming DataFrame on a Table. The DataSource corresponding to the table should
-   * support streaming mode.
-   * @param tableName The name of the table
-   * @since 3.1.0
-   */
+  /** @inheritdoc */
   def table(tableName: String): DataFrame = {
     require(tableName != null, "The table name can't be null")
     val identifier = sparkSession.sessionState.sqlParser.parseMultipartIdentifier(tableName)
@@ -356,65 +160,56 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo
         isStreaming = true))
   }
 
-  /**
-   * Loads text files and returns a `DataFrame` whose schema starts with a string column named
-   * "value", and followed by partitioned columns if there are any.
-   * The text files must be encoded as UTF-8.
-   *
-   * By default, each line in the text files is a new row in the resulting DataFrame. For example:
-   * {{{
-   *   // Scala:
-   *   spark.readStream.text("/path/to/directory/")
-   *
-   *   // Java:
-   *   spark.readStream().text("/path/to/directory/")
-   * }}}
-   *
-   * You can set the following option(s):
-   * <ul>
-   * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be
-   * considered in every trigger.</li>
-   * <li>`maxBytesPerTrigger` (default: no max limit): sets the maximum total size of new files
-   * to be considered in every trigger.</li>
-   * </ul>
-   *
-   * You can find the text-specific options for reading text files in
-   * <a href="https://spark.apache.org/docs/latest/sql-data-sources-text.html#data-source-option">
-   *   Data Source Option</a> in the version you use.
-   *
-   * @since 2.0.0
-   */
-  def text(path: String): DataFrame = format("text").load(path)
-
-  /**
-   * Loads text file(s) and returns a `Dataset` of String. The underlying schema of the Dataset
-   * contains a single string column named "value".
-   * The text files must be encoded as UTF-8.
-   *
-   * If the directory structure of the text files contains partitioning information, those are
-   * ignored in the resulting Dataset. To include partitioning information as columns, use `text`.
-   *
-   * By default, each line in the text file is a new element in the resulting Dataset. For example:
-   * {{{
-   *   // Scala:
-   *   spark.readStream.textFile("/path/to/spark/README.md")
-   *
-   *   // Java:
-   *   spark.readStream().textFile("/path/to/spark/README.md")
-   * }}}
-   *
-   * You can set the text-specific options as specified in `DataStreamReader.text`.
-   *
-   * @param path input path
-   * @since 2.1.0
-   */
-  def textFile(path: String): Dataset[String] = {
+  override protected def assertNoSpecifiedSchema(operation: String): Unit = {
     if (userSpecifiedSchema.nonEmpty) {
-      throw QueryCompilationErrors.userSpecifiedSchemaUnsupportedError("textFile")
+      throw QueryCompilationErrors.userSpecifiedSchemaUnsupportedError(operation)
     }
-    text(path).select("value").as[String](sparkSession.implicits.newStringEncoder)
   }
 
+  override protected def validateJsonSchema(): Unit = userSpecifiedSchema.foreach(checkJsonSchema)
+
+  override protected def validateXmlSchema(): Unit = userSpecifiedSchema.foreach(checkXmlSchema)
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Covariant overrides.
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  /** @inheritdoc */
+  override def schema(schemaString: String): this.type = super.schema(schemaString)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def options(options: java.util.Map[String, String]): this.type = super.options(options)
+
+  /** @inheritdoc */
+  override def json(path: String): DataFrame = super.json(path)
+
+  /** @inheritdoc */
+  override def csv(path: String): DataFrame = super.csv(path)
+
+  /** @inheritdoc */
+  override def xml(path: String): DataFrame = super.xml(path)
+
+  /** @inheritdoc */
+  override def orc(path: String): DataFrame = super.orc(path)
+
+  /** @inheritdoc */
+  override def parquet(path: String): DataFrame = super.parquet(path)
+
+  /** @inheritdoc */
+  override def text(path: String): DataFrame = super.text(path)
+
+  /** @inheritdoc */
+  override def textFile(path: String): Dataset[String] = super.textFile(path)
+
   ///////////////////////////////////////////////////////////////////////////////////////
   // Builder pattern config options
   ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
index 1db03c5d816fa..b0233d2c51b75 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamWriter.scala
@@ -36,9 +36,10 @@ import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsWrite, Table, TableCatalog, TableProvider, V1Table, V2TableWithV1Fallback}
 import org.apache.spark.sql.connector.catalog.TableCapability._
+import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference}
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.command.DDLUtils
-import org.apache.spark.sql.execution.datasources.DataSource
+import org.apache.spark.sql.execution.datasources.{DataSource, DataSourceUtils}
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2Utils, FileDataSourceV2}
 import org.apache.spark.sql.execution.datasources.v2.python.PythonDataSourceV2
 import org.apache.spark.sql.execution.streaming._
@@ -54,259 +55,134 @@ import org.apache.spark.util.Utils
  * @since 2.0.0
  */
 @Evolving
-final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
-  import DataStreamWriter._
+final class DataStreamWriter[T] private[sql](ds: Dataset[T]) extends api.DataStreamWriter[T] {
+  type DS[U] = Dataset[U]
 
-  private val df = ds.toDF()
-
-  /**
-   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
-   * <ul>
-   * <li> `OutputMode.Append()`: only the new rows in the streaming DataFrame/Dataset will be
-   * written to the sink.</li>
-   * <li> `OutputMode.Complete()`: all the rows in the streaming DataFrame/Dataset will be written
-   * to the sink every time there are some updates.</li>
-   * <li> `OutputMode.Update()`: only the rows that were updated in the streaming
-   * DataFrame/Dataset will be written to the sink every time there are some updates.
-   * If the query doesn't contain aggregations, it will be equivalent to
-   * `OutputMode.Append()` mode.</li>
-   * </ul>
-   *
-   * @since 2.0.0
-   */
-  def outputMode(outputMode: OutputMode): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def outputMode(outputMode: OutputMode): this.type = {
     this.outputMode = outputMode
     this
   }
 
-  /**
-   * Specifies how data of a streaming DataFrame/Dataset is written to a streaming sink.
-   * <ul>
-   * <li> `append`: only the new rows in the streaming DataFrame/Dataset will be written to
-   * the sink.</li>
-   * <li> `complete`: all the rows in the streaming DataFrame/Dataset will be written to the sink
-   * every time there are some updates.</li>
-   * <li> `update`: only the rows that were updated in the streaming DataFrame/Dataset will
-   * be written to the sink every time there are some updates. If the query doesn't
-   * contain aggregations, it will be equivalent to `append` mode.</li>
-   * </ul>
-   *
-   * @since 2.0.0
-   */
-  def outputMode(outputMode: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def outputMode(outputMode: String): this.type = {
     this.outputMode = InternalOutputModes(outputMode)
     this
   }
 
-  /**
-   * Set the trigger for the stream query. The default value is `ProcessingTime(0)` and it will run
-   * the query as fast as possible.
-   *
-   * Scala Example:
-   * {{{
-   *   df.writeStream.trigger(ProcessingTime("10 seconds"))
-   *
-   *   import scala.concurrent.duration._
-   *   df.writeStream.trigger(ProcessingTime(10.seconds))
-   * }}}
-   *
-   * Java Example:
-   * {{{
-   *   df.writeStream().trigger(ProcessingTime.create("10 seconds"))
-   *
-   *   import java.util.concurrent.TimeUnit
-   *   df.writeStream().trigger(ProcessingTime.create(10, TimeUnit.SECONDS))
-   * }}}
-   *
-   * @since 2.0.0
-   */
-  def trigger(trigger: Trigger): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def trigger(trigger: Trigger): this.type = {
     this.trigger = trigger
     this
   }
 
-  /**
-   * Specifies the name of the [[StreamingQuery]] that can be started with `start()`.
-   * This name must be unique among all the currently active queries in the associated SQLContext.
-   *
-   * @since 2.0.0
-   */
-  def queryName(queryName: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def queryName(queryName: String): this.type = {
     this.extraOptions += ("queryName" -> queryName)
     this
   }
 
-  /**
-   * Specifies the underlying output data source.
-   *
-   * @since 2.0.0
-   */
-  def format(source: String): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def format(source: String): this.type = {
     this.source = source
     this
   }
 
-  /**
-   * Partitions the output by the given columns on the file system. If specified, the output is
-   * laid out on the file system similar to Hive's partitioning scheme. As an example, when we
-   * partition a dataset by year and then month, the directory layout would look like:
-   *
-   * <ul>
-   * <li> year=2016/month=01/</li>
-   * <li> year=2016/month=02/</li>
-   * </ul>
-   *
-   * Partitioning is one of the most widely used techniques to optimize physical data layout.
-   * It provides a coarse-grained index for skipping unnecessary data reads when queries have
-   * predicates on the partitioned columns. In order for partitioning to work well, the number
-   * of distinct values in each column should typically be less than tens of thousands.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @scala.annotation.varargs
-  def partitionBy(colNames: String*): DataStreamWriter[T] = {
+  def partitionBy(colNames: String*): this.type = {
     this.partitioningColumns = Option(colNames)
+    validatePartitioningAndClustering()
     this
   }
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: String): DataStreamWriter[T] = {
-    this.extraOptions += (key -> value)
+  /** @inheritdoc */
+  @scala.annotation.varargs
+  def clusterBy(colNames: String*): this.type = {
+    this.clusteringColumns = Option(colNames)
+    validatePartitioningAndClustering()
     this
   }
 
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Boolean): DataStreamWriter[T] = option(key, value.toString)
-
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Long): DataStreamWriter[T] = option(key, value.toString)
-
-  /**
-   * Adds an output option for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def option(key: String, value: Double): DataStreamWriter[T] = option(key, value.toString)
+  /** @inheritdoc */
+  def option(key: String, value: String): this.type = {
+    this.extraOptions += (key -> value)
+    this
+  }
 
-  /**
-   * (Scala-specific) Adds output options for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def options(options: scala.collection.Map[String, String]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def options(options: scala.collection.Map[String, String]): this.type = {
     this.extraOptions ++= options
     this
   }
 
-  /**
-   * Adds output options for the underlying data source.
-   *
-   * @since 2.0.0
-   */
-  def options(options: java.util.Map[String, String]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def options(options: java.util.Map[String, String]): this.type = {
     this.options(options.asScala)
     this
   }
 
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the given
-   * path as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
-   * the stream.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def start(path: String): StreamingQuery = {
-    if (!df.sparkSession.sessionState.conf.legacyPathOptionBehavior &&
+    if (!ds.sparkSession.sessionState.conf.legacyPathOptionBehavior &&
         extraOptions.contains("path")) {
       throw QueryCompilationErrors.setPathOptionAndCallWithPathParameterError("start")
     }
     startInternal(Some(path))
   }
 
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the given
-   * path as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
-   * the stream. Throws a `TimeoutException` if the following conditions are met:
-   *  - Another run of the same streaming query, that is a streaming query
-   *    sharing the same checkpoint location, is already active on the same
-   *    Spark Driver
-   *  - The SQL configuration `spark.sql.streaming.stopActiveRunOnRestart`
-   *    is enabled
-   *  - The active run cannot be stopped within the timeout controlled by
-   *    the SQL configuration `spark.sql.streaming.stopTimeout`
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @throws[TimeoutException]
   def start(): StreamingQuery = startInternal(None)
 
-  /**
-   * Starts the execution of the streaming query, which will continually output results to the given
-   * table as new data arrives. The returned [[StreamingQuery]] object can be used to interact with
-   * the stream.
-   *
-   * For v1 table, partitioning columns provided by `partitionBy` will be respected no matter the
-   * table exists or not. A new table will be created if the table not exists.
-   *
-   * For v2 table, `partitionBy` will be ignored if the table already exists. `partitionBy` will be
-   * respected only if the v2 table does not exist. Besides, the v2 table created by this API lacks
-   * some functionalities (e.g., customized properties, options, and serde info). If you need them,
-   * please create the v2 table manually before the execution to avoid creating a table with
-   * incomplete information.
-   *
-   * @since 3.1.0
-   */
+  /** @inheritdoc */
   @Evolving
   @throws[TimeoutException]
   def toTable(tableName: String): StreamingQuery = {
-    this.tableName = tableName
 
-    import df.sparkSession.sessionState.analyzer.CatalogAndIdentifier
+    import ds.sparkSession.sessionState.analyzer.CatalogAndIdentifier
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
-    val parser = df.sparkSession.sessionState.sqlParser
+    val parser = ds.sparkSession.sessionState.sqlParser
     val originalMultipartIdentifier = parser.parseMultipartIdentifier(tableName)
     val CatalogAndIdentifier(catalog, identifier) = originalMultipartIdentifier
 
     // Currently we don't create a logical streaming writer node in logical plan, so cannot rely
     // on analyzer to resolve it. Directly lookup only for temp view to provide clearer message.
     // TODO (SPARK-27484): we should add the writing node before the plan is analyzed.
-    if (df.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) {
+    if (ds.sparkSession.sessionState.catalog.isTempView(originalMultipartIdentifier)) {
       throw QueryCompilationErrors.tempViewNotSupportStreamingWriteError(tableName)
     }
 
     if (!catalog.asTableCatalog.tableExists(identifier)) {
       import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
+
+      val properties = normalizedClusteringCols.map { cols =>
+        Map(
+          DataSourceUtils.CLUSTERING_COLUMNS_KEY -> DataSourceUtils.encodePartitioningColumns(cols))
+      }.getOrElse(Map.empty)
+      val partitioningOrClusteringTransform = normalizedClusteringCols.map { colNames =>
+        Array(ClusterByTransform(colNames.map(col => FieldReference(col)))).toImmutableArraySeq
+      }.getOrElse(partitioningColumns.getOrElse(Nil).asTransforms.toImmutableArraySeq)
+
       /**
        * Note, currently the new table creation by this API doesn't fully cover the V2 table.
        * TODO (SPARK-33638): Full support of v2 table creation
        */
       val tableSpec = UnresolvedTableSpec(
-        Map.empty[String, String],
+        properties,
         Some(source),
         OptionList(Seq.empty),
         extraOptions.get("path"),
         None,
         None,
-        false)
+        external = false)
       val cmd = CreateTable(
         UnresolvedIdentifier(originalMultipartIdentifier),
-        df.schema.asNullable.map(ColumnDefinition.fromV1Column(_, parser)),
-        partitioningColumns.getOrElse(Nil).asTransforms.toImmutableArraySeq,
+        ds.schema.asNullable.map(ColumnDefinition.fromV1Column(_, parser)),
+        partitioningOrClusteringTransform,
         tableSpec,
         ignoreIfExists = false)
-      Dataset.ofRows(df.sparkSession, cmd)
+      Dataset.ofRows(ds.sparkSession, cmd)
     }
 
     val tableInstance = catalog.asTableCatalog.loadTable(identifier)
@@ -343,34 +219,34 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       throw QueryCompilationErrors.cannotOperateOnHiveDataSourceFilesError("write")
     }
 
-    if (source == SOURCE_NAME_MEMORY) {
-      assertNotPartitioned(SOURCE_NAME_MEMORY)
+    if (source == DataStreamWriter.SOURCE_NAME_MEMORY) {
+      assertNotPartitioned(DataStreamWriter.SOURCE_NAME_MEMORY)
       if (extraOptions.get("queryName").isEmpty) {
         throw QueryCompilationErrors.queryNameNotSpecifiedForMemorySinkError()
       }
       val sink = new MemorySink()
-      val resultDf = Dataset.ofRows(df.sparkSession,
-        MemoryPlan(sink, DataTypeUtils.toAttributes(df.schema)))
+      val resultDf = Dataset.ofRows(ds.sparkSession,
+        MemoryPlan(sink, DataTypeUtils.toAttributes(ds.schema)))
       val recoverFromCheckpoint = outputMode == OutputMode.Complete()
       val query = startQuery(sink, extraOptions, recoverFromCheckpoint = recoverFromCheckpoint,
         catalogTable = catalogTable)
       resultDf.createOrReplaceTempView(query.name)
       query
-    } else if (source == SOURCE_NAME_FOREACH) {
-      assertNotPartitioned(SOURCE_NAME_FOREACH)
+    } else if (source == DataStreamWriter.SOURCE_NAME_FOREACH) {
+      assertNotPartitioned(DataStreamWriter.SOURCE_NAME_FOREACH)
       val sink = ForeachWriterTable[Any](foreachWriter, foreachWriterEncoder)
       startQuery(sink, extraOptions, catalogTable = catalogTable)
-    } else if (source == SOURCE_NAME_FOREACH_BATCH) {
-      assertNotPartitioned(SOURCE_NAME_FOREACH_BATCH)
+    } else if (source == DataStreamWriter.SOURCE_NAME_FOREACH_BATCH) {
+      assertNotPartitioned(DataStreamWriter.SOURCE_NAME_FOREACH_BATCH)
       if (trigger.isInstanceOf[ContinuousTrigger]) {
         throw QueryCompilationErrors.sourceNotSupportedWithContinuousTriggerError(source)
       }
       val sink = new ForeachBatchSink[T](foreachBatchWriter, ds.exprEnc)
       startQuery(sink, extraOptions, catalogTable = catalogTable)
     } else {
-      val cls = DataSource.lookupDataSource(source, df.sparkSession.sessionState.conf)
+      val cls = DataSource.lookupDataSource(source, ds.sparkSession.sessionState.conf)
       val disabledSources =
-        Utils.stringToSeq(df.sparkSession.sessionState.conf.disabledV2StreamingWriters)
+        Utils.stringToSeq(ds.sparkSession.sessionState.conf.disabledV2StreamingWriters)
       val useV1Source = disabledSources.contains(cls.getCanonicalName) ||
         // file source v2 does not support streaming yet.
         classOf[FileDataSourceV2].isAssignableFrom(cls)
@@ -384,7 +260,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       val sink = if (classOf[TableProvider].isAssignableFrom(cls) && !useV1Source) {
         val provider = cls.getConstructor().newInstance().asInstanceOf[TableProvider]
         val sessionOptions = DataSourceV2Utils.extractSessionConfigs(
-          source = provider, conf = df.sparkSession.sessionState.conf)
+          source = provider, conf = ds.sparkSession.sessionState.conf)
         val finalOptions = sessionOptions.filter { case (k, _) => !optionsWithPath.contains(k) } ++
           optionsWithPath.originalMap
         val dsOptions = new CaseInsensitiveStringMap(finalOptions.asJava)
@@ -392,7 +268,7 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
         // to `getTable`. This is for avoiding schema inference, which can be very expensive.
         // If the query schema is not compatible with the existing data, the behavior is undefined.
         val outputSchema = if (provider.supportsExternalMetadata()) {
-          Some(df.schema)
+          Some(ds.schema)
         } else {
           None
         }
@@ -422,12 +298,12 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
       recoverFromCheckpoint: Boolean = true,
       catalogAndIdent: Option[(TableCatalog, Identifier)] = None,
       catalogTable: Option[CatalogTable] = None): StreamingQuery = {
-    val useTempCheckpointLocation = SOURCES_ALLOW_ONE_TIME_QUERY.contains(source)
+    val useTempCheckpointLocation = DataStreamWriter.SOURCES_ALLOW_ONE_TIME_QUERY.contains(source)
 
-    df.sparkSession.sessionState.streamingQueryManager.startQuery(
+    ds.sparkSession.sessionState.streamingQueryManager.startQuery(
       newOptions.get("queryName"),
       newOptions.get("checkpointLocation"),
-      df,
+      ds,
       newOptions.originalMap,
       sink,
       outputMode,
@@ -439,27 +315,34 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
   }
 
   private def createV1Sink(optionsWithPath: CaseInsensitiveMap[String]): Sink = {
+    // Do not allow the user to specify clustering columns in the options. Ignoring this option is
+    // consistent with the behavior of DataFrameWriter on non Path-based tables and with the
+    // behavior of DataStreamWriter on partitioning columns specified in options.
+    val optionsWithoutClusteringKey =
+      optionsWithPath.originalMap - DataSourceUtils.CLUSTERING_COLUMNS_KEY
+
+    val optionsWithClusteringColumns = normalizedClusteringCols match {
+      case Some(cols) => optionsWithoutClusteringKey + (
+        DataSourceUtils.CLUSTERING_COLUMNS_KEY ->
+          DataSourceUtils.encodePartitioningColumns(cols))
+      case None => optionsWithoutClusteringKey
+    }
     val ds = DataSource(
-      df.sparkSession,
+      this.ds.sparkSession,
       className = source,
-      options = optionsWithPath.originalMap,
+      options = optionsWithClusteringColumns,
       partitionColumns = normalizedParCols.getOrElse(Nil))
     ds.createSink(outputMode)
   }
 
-  /**
-   * Sets the output of the streaming query to be processed using the provided writer object.
-   * object. See [[org.apache.spark.sql.ForeachWriter]] for more details on the lifecycle and
-   * semantics.
-   * @since 2.0.0
-   */
-  def foreach(writer: ForeachWriter[T]): DataStreamWriter[T] = {
+  /** @inheritdoc */
+  def foreach(writer: ForeachWriter[T]): this.type = {
     foreachImplementation(writer.asInstanceOf[ForeachWriter[Any]])
   }
 
   private[sql] def foreachImplementation(writer: ForeachWriter[Any],
-      encoder: Option[ExpressionEncoder[Any]] = None): DataStreamWriter[T] = {
-    this.source = SOURCE_NAME_FOREACH
+      encoder: Option[ExpressionEncoder[Any]] = None): this.type = {
+    this.source = DataStreamWriter.SOURCE_NAME_FOREACH
     this.foreachWriter = if (writer != null) {
       ds.sparkSession.sparkContext.clean(writer)
     } else {
@@ -469,59 +352,31 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     this
   }
 
-  /**
-   * :: Experimental ::
-   *
-   * (Scala-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only in the micro-batch execution modes (that is, when the
-   * trigger is not continuous). In every micro-batch, the provided function will be called in
-   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier.
-   * The batchId can be used to deduplicate and transactionally write the output
-   * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed
-   * to be exactly the same for the same batchId (assuming all operations are deterministic
-   * in the query).
-   *
-   * @since 2.4.0
-   */
+  /** @inheritdoc */
   @Evolving
-  def foreachBatch(function: (Dataset[T], Long) => Unit): DataStreamWriter[T] = {
-    this.source = SOURCE_NAME_FOREACH_BATCH
+  def foreachBatch(function: (Dataset[T], Long) => Unit): this.type = {
+    this.source = DataStreamWriter.SOURCE_NAME_FOREACH_BATCH
     if (function == null) throw new IllegalArgumentException("foreachBatch function cannot be null")
     this.foreachBatchWriter = function
     this
   }
 
-  /**
-   * :: Experimental ::
-   *
-   * (Java-specific) Sets the output of the streaming query to be processed using the provided
-   * function. This is supported only in the micro-batch execution modes (that is, when the
-   * trigger is not continuous). In every micro-batch, the provided function will be called in
-   * every micro-batch with (i) the output rows as a Dataset and (ii) the batch identifier.
-   * The batchId can be used to deduplicate and transactionally write the output
-   * (that is, the provided Dataset) to external systems. The output Dataset is guaranteed
-   * to be exactly the same for the same batchId (assuming all operations are deterministic
-   * in the query).
-   *
-   * @since 2.4.0
-   */
-  @Evolving
-  def foreachBatch(function: VoidFunction2[Dataset[T], java.lang.Long]): DataStreamWriter[T] = {
-    foreachBatch((batchDs: Dataset[T], batchId: Long) => function.call(batchDs, batchId))
-  }
-
   private def normalizedParCols: Option[Seq[String]] = partitioningColumns.map { cols =>
     cols.map(normalize(_, "Partition"))
   }
 
+  private def normalizedClusteringCols: Option[Seq[String]] = clusteringColumns.map { cols =>
+    cols.map(normalize(_, "Clustering"))
+  }
+
   /**
    * The given column name may not be equal to any of the existing column names if we were in
    * case-insensitive context. Normalize the given column name to the real one so that we don't
    * need to care about case sensitivity afterwards.
    */
   private def normalize(columnName: String, columnType: String): String = {
-    val validColumnNames = df.logicalPlan.output.map(_.name)
-    validColumnNames.find(df.sparkSession.sessionState.analyzer.resolver(_, columnName))
+    val validColumnNames = ds.logicalPlan.output.map(_.name)
+    validColumnNames.find(ds.sparkSession.sessionState.analyzer.resolver(_, columnName))
       .getOrElse(throw QueryCompilationErrors.columnNotFoundInExistingColumnsError(
         columnType, columnName, validColumnNames))
   }
@@ -532,13 +387,36 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
     }
   }
 
+  // Validate that partitionBy isn't used with clusterBy.
+  private def validatePartitioningAndClustering(): Unit = {
+    if (clusteringColumns.nonEmpty && partitioningColumns.nonEmpty) {
+      throw QueryCompilationErrors.clusterByWithPartitionedBy()
+    }
+  }
+
   ///////////////////////////////////////////////////////////////////////////////////////
-  // Builder pattern config options
+  // Covariant Overrides
   ///////////////////////////////////////////////////////////////////////////////////////
 
-  private var source: String = df.sparkSession.sessionState.conf.defaultDataSourceName
+  /** @inheritdoc */
+  override def option(key: String, value: Boolean): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  override def option(key: String, value: Long): this.type = super.option(key, value)
 
-  private var tableName: String = null
+  /** @inheritdoc */
+  override def option(key: String, value: Double): this.type = super.option(key, value)
+
+  /** @inheritdoc */
+  @Evolving
+  override def foreachBatch(function: VoidFunction2[Dataset[T], java.lang.Long]): this.type =
+    super.foreachBatch(function)
+
+  ///////////////////////////////////////////////////////////////////////////////////////
+  // Builder pattern config options
+  ///////////////////////////////////////////////////////////////////////////////////////
+
+  private var source: String = ds.sparkSession.sessionState.conf.defaultDataSourceName
 
   private var outputMode: OutputMode = OutputMode.Append
 
@@ -546,25 +424,27 @@ final class DataStreamWriter[T] private[sql](ds: Dataset[T]) {
 
   private var extraOptions = CaseInsensitiveMap[String](Map.empty)
 
-  private var foreachWriter: ForeachWriter[Any] = null
+  private var foreachWriter: ForeachWriter[Any] = _
 
   private var foreachWriterEncoder: ExpressionEncoder[Any] =
     ds.exprEnc.asInstanceOf[ExpressionEncoder[Any]]
 
-  private var foreachBatchWriter: (Dataset[T], Long) => Unit = null
+  private var foreachBatchWriter: (Dataset[T], Long) => Unit = _
 
   private var partitioningColumns: Option[Seq[String]] = None
+
+  private var clusteringColumns: Option[Seq[String]] = None
 }
 
 object DataStreamWriter {
-  val SOURCE_NAME_MEMORY = "memory"
-  val SOURCE_NAME_FOREACH = "foreach"
-  val SOURCE_NAME_FOREACH_BATCH = "foreachBatch"
-  val SOURCE_NAME_CONSOLE = "console"
-  val SOURCE_NAME_TABLE = "table"
-  val SOURCE_NAME_NOOP = "noop"
+  val SOURCE_NAME_MEMORY: String = "memory"
+  val SOURCE_NAME_FOREACH: String = "foreach"
+  val SOURCE_NAME_FOREACH_BATCH: String = "foreachBatch"
+  val SOURCE_NAME_CONSOLE: String = "console"
+  val SOURCE_NAME_TABLE: String = "table"
+  val SOURCE_NAME_NOOP: String = "noop"
 
   // these writer sources are also used for one-time query, hence allow temp checkpoint location
-  val SOURCES_ALLOW_ONE_TIME_QUERY = Seq(SOURCE_NAME_MEMORY, SOURCE_NAME_FOREACH,
+  val SOURCES_ALLOW_ONE_TIME_QUERY: Seq[String] = Seq(SOURCE_NAME_MEMORY, SOURCE_NAME_FOREACH,
     SOURCE_NAME_FOREACH_BATCH, SOURCE_NAME_CONSOLE, SOURCE_NAME_NOOP)
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
index 63d937cb34820..7cf92db59067c 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala
@@ -14,159 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
 package org.apache.spark.sql.streaming
 
-import java.util.UUID
-import java.util.concurrent.TimeoutException
-
-import org.apache.spark.annotation.Evolving
-import org.apache.spark.sql.SparkSession
-
-/**
- * A handle to a query that is executing continuously in the background as new data arrives.
- * All these methods are thread-safe.
- * @since 2.0.0
- */
-@Evolving
-trait StreamingQuery {
-
-  /**
-   * Returns the user-specified name of the query, or null if not specified.
-   * This name can be specified in the `org.apache.spark.sql.streaming.DataStreamWriter`
-   * as `dataframe.writeStream.queryName("query").start()`.
-   * This name, if set, must be unique across all active queries.
-   *
-   * @since 2.0.0
-   */
-  def name: String
-
-  /**
-   * Returns the unique id of this query that persists across restarts from checkpoint data.
-   * That is, this id is generated when a query is started for the first time, and
-   * will be the same every time it is restarted from checkpoint data. Also see [[runId]].
-   *
-   * @since 2.1.0
-   */
-  def id: UUID
-
-  /**
-   * Returns the unique id of this run of the query. That is, every start/restart of a query will
-   * generate a unique runId. Therefore, every time a query is restarted from
-   * checkpoint, it will have the same [[id]] but different [[runId]]s.
-   */
-  def runId: UUID
-
-  /**
-   * Returns the `SparkSession` associated with `this`.
-   *
-   * @since 2.0.0
-   */
-  def sparkSession: SparkSession
-
-  /**
-   * Returns `true` if this query is actively running.
-   *
-   * @since 2.0.0
-   */
-  def isActive: Boolean
-
-  /**
-   * Returns the [[StreamingQueryException]] if the query was terminated by an exception.
-   * @since 2.0.0
-   */
-  def exception: Option[StreamingQueryException]
-
-  /**
-   * Returns the current status of the query.
-   *
-   * @since 2.0.2
-   */
-  def status: StreamingQueryStatus
-
-  /**
-   * Returns an array of the most recent [[StreamingQueryProgress]] updates for this query.
-   * The number of progress updates retained for each stream is configured by Spark session
-   * configuration `spark.sql.streaming.numRecentProgressUpdates`.
-   *
-   * @since 2.1.0
-   */
-  def recentProgress: Array[StreamingQueryProgress]
-
-  /**
-   * Returns the most recent [[StreamingQueryProgress]] update of this streaming query.
-   *
-   * @since 2.1.0
-   */
-  def lastProgress: StreamingQueryProgress
-
-  /**
-   * Waits for the termination of `this` query, either by `query.stop()` or by an exception.
-   * If the query has terminated with an exception, then the exception will be thrown.
-   *
-   * If the query has terminated, then all subsequent calls to this method will either return
-   * immediately (if the query was terminated by `stop()`), or throw the exception
-   * immediately (if the query has terminated with exception).
-   *
-   * @throws StreamingQueryException if the query has terminated with an exception.
-   *
-   * @since 2.0.0
-   */
-  @throws[StreamingQueryException]
-  def awaitTermination(): Unit
-
-  /**
-   * Waits for the termination of `this` query, either by `query.stop()` or by an exception.
-   * If the query has terminated with an exception, then the exception will be thrown.
-   * Otherwise, it returns whether the query has terminated or not within the `timeoutMs`
-   * milliseconds.
-   *
-   * If the query has terminated, then all subsequent calls to this method will either return
-   * `true` immediately (if the query was terminated by `stop()`), or throw the exception
-   * immediately (if the query has terminated with exception).
-   *
-   * @throws StreamingQueryException if the query has terminated with an exception
-   *
-   * @since 2.0.0
-   */
-  @throws[StreamingQueryException]
-  def awaitTermination(timeoutMs: Long): Boolean
-
-  /**
-   * Blocks until all available data in the source has been processed and committed to the sink.
-   * This method is intended for testing. Note that in the case of continually arriving data, this
-   * method may block forever. Additionally, this method is only guaranteed to block until data that
-   * has been synchronously appended data to a `org.apache.spark.sql.execution.streaming.Source`
-   * prior to invocation. (i.e. `getOffset` must immediately reflect the addition).
-   * @since 2.0.0
-   */
-  def processAllAvailable(): Unit
-
-  /**
-   * Stops the execution of this query if it is running. This waits until the termination of the
-   * query execution threads or until a timeout is hit.
-   *
-   * By default stop will block indefinitely. You can configure a timeout by the configuration
-   * `spark.sql.streaming.stopTimeout`. A timeout of 0 (or negative) milliseconds will block
-   * indefinitely. If a `TimeoutException` is thrown, users can retry stopping the stream. If the
-   * issue persists, it is advisable to kill the Spark application.
-   *
-   * @since 2.0.0
-   */
-  @throws[TimeoutException]
-  def stop(): Unit
-
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   * @since 2.0.0
-   */
-  def explain(): Unit
+import org.apache.spark.sql.{api, SparkSession}
 
-  /**
-   * Prints the physical plan to the console for debugging purposes.
-   *
-   * @param extended whether to do extended explain or not
-   * @since 2.0.0
-   */
-  def explain(extended: Boolean): Unit
+/** @inheritdoc */
+trait StreamingQuery extends api.StreamingQuery {
+  /** @inheritdoc */
+  override def sparkSession: SparkSession
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
deleted file mode 100644
index c1ceed048ae2c..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import java.util.UUID
-
-import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
-import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
-import org.json4s.{JObject, JString}
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL.{jobject2assoc, pair2Assoc}
-import org.json4s.jackson.JsonMethods.{compact, render}
-
-import org.apache.spark.annotation.Evolving
-import org.apache.spark.scheduler.SparkListenerEvent
-
-/**
- * Interface for listening to events related to [[StreamingQuery StreamingQueries]].
- * @note The methods are not thread-safe as they may be called from different threads.
- *
- * @since 2.0.0
- */
-@Evolving
-abstract class StreamingQueryListener extends Serializable {
-
-  import StreamingQueryListener._
-
-  /**
-   * Called when a query is started.
-   * @note This is called synchronously with
-   *       [[org.apache.spark.sql.streaming.DataStreamWriter `DataStreamWriter.start()`]],
-   *       that is, `onQueryStart` will be called on all listeners before
-   *       `DataStreamWriter.start()` returns the corresponding [[StreamingQuery]]. Please
-   *       don't block this method as it will block your query.
-   * @since 2.0.0
-   */
-  def onQueryStarted(event: QueryStartedEvent): Unit
-
-  /**
-   * Called when there is some status update (ingestion rate updated, etc.)
-   *
-   * @note This method is asynchronous. The status in [[StreamingQuery]] will always be
-   *       latest no matter when this method is called. Therefore, the status of [[StreamingQuery]]
-   *       may be changed before/when you process the event. E.g., you may find [[StreamingQuery]]
-   *       is terminated when you are processing `QueryProgressEvent`.
-   * @since 2.0.0
-   */
-  def onQueryProgress(event: QueryProgressEvent): Unit
-
-  /**
-   * Called when the query is idle and waiting for new data to process.
-   * @since 3.5.0
-   */
-  def onQueryIdle(event: QueryIdleEvent): Unit = {}
-
-  /**
-   * Called when a query is stopped, with or without error.
-   * @since 2.0.0
-   */
-  def onQueryTerminated(event: QueryTerminatedEvent): Unit
-}
-
-/**
- * Py4J allows a pure interface so this proxy is required.
- */
-private[spark] trait PythonStreamingQueryListener {
-  import StreamingQueryListener._
-
-  def onQueryStarted(event: QueryStartedEvent): Unit
-
-  def onQueryProgress(event: QueryProgressEvent): Unit
-
-  def onQueryIdle(event: QueryIdleEvent): Unit
-
-  def onQueryTerminated(event: QueryTerminatedEvent): Unit
-}
-
-private[spark] class PythonStreamingQueryListenerWrapper(
-    listener: PythonStreamingQueryListener) extends StreamingQueryListener {
-  import StreamingQueryListener._
-
-  def onQueryStarted(event: QueryStartedEvent): Unit = listener.onQueryStarted(event)
-
-  def onQueryProgress(event: QueryProgressEvent): Unit = listener.onQueryProgress(event)
-
-  override def onQueryIdle(event: QueryIdleEvent): Unit = listener.onQueryIdle(event)
-
-  def onQueryTerminated(event: QueryTerminatedEvent): Unit = listener.onQueryTerminated(event)
-}
-
-/**
- * Companion object of [[StreamingQueryListener]] that defines the listener events.
- * @since 2.0.0
- */
-@Evolving
-object StreamingQueryListener extends Serializable {
-
-  /**
-   * Base type of [[StreamingQueryListener]] events
-   * @since 2.0.0
-   */
-  @Evolving
-  trait Event extends SparkListenerEvent
-
-  /**
-   * Event representing the start of a query
-   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
-   * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
-   * @param name User-specified name of the query, null if not specified.
-   * @param timestamp The timestamp to start a query.
-   * @since 2.1.0
-   */
-  @Evolving
-  class QueryStartedEvent private[sql](
-      val id: UUID,
-      val runId: UUID,
-      val name: String,
-      val timestamp: String) extends Event with Serializable {
-
-    def json: String = compact(render(jsonValue))
-
-    private def jsonValue: JValue = {
-      ("id" -> JString(id.toString)) ~
-      ("runId" -> JString(runId.toString)) ~
-      ("name" -> JString(name)) ~
-      ("timestamp" -> JString(timestamp))
-    }
-  }
-
-  private[spark] object QueryStartedEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
-
-    private[spark] def jsonString(event: QueryStartedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryStartedEvent =
-      mapper.readValue[QueryStartedEvent](json)
-  }
-
-  /**
-   * Event representing any progress updates in a query.
-   * @param progress The query progress updates.
-   * @since 2.1.0
-   */
-  @Evolving
-  class QueryProgressEvent private[sql](val progress: StreamingQueryProgress) extends Event
-    with Serializable {
-
-    def json: String = compact(render(jsonValue))
-
-    private def jsonValue: JValue = JObject("progress" -> progress.jsonValue)
-  }
-
-  private[spark] object QueryProgressEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
-
-    private[spark] def jsonString(event: QueryProgressEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryProgressEvent =
-      mapper.readValue[QueryProgressEvent](json)
-  }
-
-  /**
-   * Event representing that query is idle and waiting for new data to process.
-   *
-   * @param id    A unique query id that persists across restarts. See `StreamingQuery.id()`.
-   * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
-   * @param timestamp The timestamp when the latest no-batch trigger happened.
-   * @since 3.5.0
-   */
-  @Evolving
-  class QueryIdleEvent private[sql](
-      val id: UUID,
-      val runId: UUID,
-      val timestamp: String) extends Event with Serializable {
-
-    def json: String = compact(render(jsonValue))
-
-    private def jsonValue: JValue = {
-      ("id" -> JString(id.toString)) ~
-      ("runId" -> JString(runId.toString)) ~
-      ("timestamp" -> JString(timestamp))
-    }
-  }
-
-  private[spark] object QueryIdleEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
-
-    private[spark] def jsonString(event: QueryTerminatedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryTerminatedEvent =
-      mapper.readValue[QueryTerminatedEvent](json)
-  }
-
-  /**
-   * Event representing that termination of a query.
-   *
-   * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
-   * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
-   * @param exception The exception message of the query if the query was terminated
-   *                  with an exception. Otherwise, it will be `None`.
-   * @param errorClassOnException The error class from the exception if the query was terminated
-   *                              with an exception which is a part of error class framework.
-   *                              If the query was terminated without an exception, or the
-   *                              exception is not a part of error class framework, it will be
-   *                              `None`.
-   * @since 2.1.0
-   */
-  @Evolving
-  class QueryTerminatedEvent private[sql](
-      val id: UUID,
-      val runId: UUID,
-      val exception: Option[String],
-      val errorClassOnException: Option[String]) extends Event with Serializable {
-    // compatibility with versions in prior to 3.5.0
-    def this(id: UUID, runId: UUID, exception: Option[String]) = {
-      this(id, runId, exception, None)
-    }
-
-    def json: String = compact(render(jsonValue))
-
-    private def jsonValue: JValue = {
-      ("id" -> JString(id.toString)) ~
-      ("runId" -> JString(runId.toString)) ~
-      ("exception" -> JString(exception.orNull)) ~
-      ("errorClassOnException" -> JString(errorClassOnException.orNull))
-    }
-  }
-
-  private[spark] object QueryTerminatedEvent {
-    private val mapper = {
-      val ret = new ObjectMapper() with ClassTagExtensions
-      ret.registerModule(DefaultScalaModule)
-      ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-      ret
-    }
-
-    private[spark] def jsonString(event: QueryTerminatedEvent): String =
-      mapper.writeValueAsString(event)
-
-    private[spark] def fromJson(json: String): QueryTerminatedEvent =
-      mapper.readValue[QueryTerminatedEvent](json)
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
index 55d2e639a56b1..42f6d04466b08 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryManager.scala
@@ -27,7 +27,7 @@ import scala.jdk.CollectionConverters._
 import org.apache.spark.annotation.Evolving
 import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.{CLASS_NAME, QUERY_ID, RUN_ID}
-import org.apache.spark.sql.{DataFrame, SparkSession}
+import org.apache.spark.sql.{api, Dataset, SparkSession}
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
 import org.apache.spark.sql.catalyst.streaming.{WriteToStream, WriteToStreamStatement}
 import org.apache.spark.sql.connector.catalog.{Identifier, SupportsWrite, Table, TableCatalog}
@@ -47,7 +47,9 @@ import org.apache.spark.util.{Clock, SystemClock, Utils}
 @Evolving
 class StreamingQueryManager private[sql] (
     sparkSession: SparkSession,
-    sqlConf: SQLConf) extends Logging {
+    sqlConf: SQLConf)
+  extends api.StreamingQueryManager
+  with Logging {
 
   private[sql] val stateStoreCoordinator =
     StateStoreCoordinatorRef.forDriver(sparkSession.sparkContext.env)
@@ -70,7 +72,7 @@ class StreamingQueryManager private[sql] (
    *   failed. The exception is the exception of the last failed query.
    */
   @GuardedBy("awaitTerminationLock")
-  private var lastTerminatedQueryException: Option[StreamingQueryException] = null
+  private var lastTerminatedQueryException: Option[StreamingQueryException] = _
 
   try {
     sparkSession.sparkContext.conf.get(STREAMING_QUERY_LISTENERS).foreach { classNames =>
@@ -90,51 +92,20 @@ class StreamingQueryManager private[sql] (
       throw QueryExecutionErrors.registeringStreamingQueryListenerError(e)
   }
 
-  /**
-   * Returns a list of active queries associated with this SQLContext
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def active: Array[StreamingQuery] = activeQueriesSharedLock.synchronized {
     activeQueries.values.toArray
   }
 
-  /**
-   * Returns the query if there is an active query with the given id, or null.
-   *
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   def get(id: UUID): StreamingQuery = activeQueriesSharedLock.synchronized {
     activeQueries.get(id).orNull
   }
 
-  /**
-   * Returns the query if there is an active query with the given id, or null.
-   *
-   * @since 2.1.0
-   */
+  /** @inheritdoc */
   def get(id: String): StreamingQuery = get(UUID.fromString(id))
 
-  /**
-   * Wait until any of the queries on the associated SQLContext has terminated since the
-   * creation of the context, or since `resetTerminated()` was called. If any query was terminated
-   * with an exception, then the exception will be thrown.
-   *
-   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
-   * return immediately (if the query was terminated by `query.stop()`),
-   * or throw the exception immediately (if the query was terminated with exception). Use
-   * `resetTerminated()` to clear past terminations and wait for new terminations.
-   *
-   * In the case where multiple queries have terminated since `resetTermination()` was called,
-   * if any query has terminated with exception, then `awaitAnyTermination()` will
-   * throw any of the exception. For correctly documenting exceptions across multiple queries,
-   * users need to stop all of them after any of them terminates with exception, and then check the
-   * `query.exception()` for each query.
-   *
-   * @throws StreamingQueryException if any query has terminated with an exception
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @throws[StreamingQueryException]
   def awaitAnyTermination(): Unit = {
     awaitTerminationLock.synchronized {
@@ -147,27 +118,7 @@ class StreamingQueryManager private[sql] (
     }
   }
 
-  /**
-   * Wait until any of the queries on the associated SQLContext has terminated since the
-   * creation of the context, or since `resetTerminated()` was called. Returns whether any query
-   * has terminated or not (multiple may have terminated). If any query has terminated with an
-   * exception, then the exception will be thrown.
-   *
-   * If a query has terminated, then subsequent calls to `awaitAnyTermination()` will either
-   * return `true` immediately (if the query was terminated by `query.stop()`),
-   * or throw the exception immediately (if the query was terminated with exception). Use
-   * `resetTerminated()` to clear past terminations and wait for new terminations.
-   *
-   * In the case where multiple queries have terminated since `resetTermination()` was called,
-   * if any query has terminated with exception, then `awaitAnyTermination()` will
-   * throw any of the exception. For correctly documenting exceptions across multiple queries,
-   * users need to stop all of them after any of them terminates with exception, and then check the
-   * `query.exception()` for each query.
-   *
-   * @throws StreamingQueryException if any query has terminated with an exception
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   @throws[StreamingQueryException]
   def awaitAnyTermination(timeoutMs: Long): Boolean = {
 
@@ -187,42 +138,24 @@ class StreamingQueryManager private[sql] (
     }
   }
 
-  /**
-   * Forget about past terminated queries so that `awaitAnyTermination()` can be used again to
-   * wait for new terminations.
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def resetTerminated(): Unit = {
     awaitTerminationLock.synchronized {
       lastTerminatedQueryException = null
     }
   }
 
-  /**
-   * Register a [[StreamingQueryListener]] to receive up-calls for life cycle events of
-   * [[StreamingQuery]].
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def addListener(listener: StreamingQueryListener): Unit = {
     listenerBus.addListener(listener)
   }
 
-  /**
-   * Deregister a [[StreamingQueryListener]].
-   *
-   * @since 2.0.0
-   */
+  /** @inheritdoc */
   def removeListener(listener: StreamingQueryListener): Unit = {
     listenerBus.removeListener(listener)
   }
 
-  /**
-   * List all [[StreamingQueryListener]]s attached to this [[StreamingQueryManager]].
-   *
-   * @since 3.0.0
-   */
+  /** @inheritdoc */
   def listListeners(): Array[StreamingQueryListener] = {
     listenerBus.listeners.asScala.toArray
   }
@@ -241,7 +174,7 @@ class StreamingQueryManager private[sql] (
   private def createQuery(
       userSpecifiedName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
-      df: DataFrame,
+      df: Dataset[_],
       extraOptions: Map[String, String],
       sink: Table,
       outputMode: OutputMode,
@@ -322,7 +255,7 @@ class StreamingQueryManager private[sql] (
   private[sql] def startQuery(
       userSpecifiedName: Option[String],
       userSpecifiedCheckpointLocation: Option[String],
-      df: DataFrame,
+      df: Dataset[_],
       extraOptions: Map[String, String],
       sink: Table,
       outputMode: OutputMode,
@@ -364,7 +297,7 @@ class StreamingQueryManager private[sql] (
         .orElse(activeQueries.get(query.id)) // shouldn't be needed but paranoia ...
 
       val shouldStopActiveRun =
-        sparkSession.conf.get(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART)
+        sparkSession.sessionState.conf.getConf(SQLConf.STREAMING_STOP_ACTIVE_RUN_ON_RESTART)
       if (activeOption.isDefined) {
         if (shouldStopActiveRun) {
           val oldQuery = activeOption.get
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
deleted file mode 100644
index fe187917ec021..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryStatus.scala
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import org.json4s._
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.annotation.Evolving
-
-/**
- * Reports information about the instantaneous status of a streaming query.
- *
- * @param message A human readable description of what the stream is currently doing.
- * @param isDataAvailable True when there is new data to be processed. Doesn't apply
- *                        to ContinuousExecution where it is always false.
- * @param isTriggerActive True when the trigger is actively firing, false when waiting for the
- *                        next trigger time. Doesn't apply to ContinuousExecution where it is
- *                        always false.
- *
- * @since 2.1.0
- */
-@Evolving
-class StreamingQueryStatus protected[sql](
-    val message: String,
-    val isDataAvailable: Boolean,
-    val isTriggerActive: Boolean) extends Serializable {
-
-  /** The compact JSON representation of this status. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this status. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String = prettyJson
-
-  private[sql] def copy(
-      message: String = this.message,
-      isDataAvailable: Boolean = this.isDataAvailable,
-      isTriggerActive: Boolean = this.isTriggerActive): StreamingQueryStatus = {
-    new StreamingQueryStatus(
-      message = message,
-      isDataAvailable = isDataAvailable,
-      isTriggerActive = isTriggerActive)
-  }
-
-  private[sql] def jsonValue: JValue = {
-    ("message" -> JString(message)) ~
-    ("isDataAvailable" -> JBool(isDataAvailable)) ~
-    ("isTriggerActive" -> JBool(isTriggerActive))
-  }
-}
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
deleted file mode 100644
index 05323d9d03811..0000000000000
--- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/progress.scala
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.streaming
-
-import java.{util => ju}
-import java.lang.{Long => JLong}
-import java.util.UUID
-
-import scala.jdk.CollectionConverters._
-import scala.util.control.NonFatal
-
-import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper}
-import com.fasterxml.jackson.databind.annotation.JsonDeserialize
-import com.fasterxml.jackson.module.scala.{ClassTagExtensions, DefaultScalaModule}
-import org.json4s._
-import org.json4s.JsonAST.JValue
-import org.json4s.JsonDSL._
-import org.json4s.jackson.JsonMethods._
-
-import org.apache.spark.annotation.Evolving
-import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
-import org.apache.spark.sql.streaming.SafeJsonSerializer.{safeDoubleToJValue, safeMapToJValue}
-import org.apache.spark.sql.streaming.SinkProgress.DEFAULT_NUM_OUTPUT_ROWS
-
-/**
- * Information about updates made to stateful operators in a [[StreamingQuery]] during a trigger.
- */
-@Evolving
-class StateOperatorProgress private[spark](
-    val operatorName: String,
-    val numRowsTotal: Long,
-    val numRowsUpdated: Long,
-    val allUpdatesTimeMs: Long,
-    val numRowsRemoved: Long,
-    val allRemovalsTimeMs: Long,
-    val commitTimeMs: Long,
-    val memoryUsedBytes: Long,
-    val numRowsDroppedByWatermark: Long,
-    val numShufflePartitions: Long,
-    val numStateStoreInstances: Long,
-    val customMetrics: ju.Map[String, JLong] = new ju.HashMap()
-  ) extends Serializable {
-
-  /** The compact JSON representation of this progress. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this progress. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  private[sql] def copy(
-      newNumRowsUpdated: Long,
-      newNumRowsDroppedByWatermark: Long): StateOperatorProgress =
-    new StateOperatorProgress(
-      operatorName = operatorName, numRowsTotal = numRowsTotal, numRowsUpdated = newNumRowsUpdated,
-      allUpdatesTimeMs = allUpdatesTimeMs, numRowsRemoved = numRowsRemoved,
-      allRemovalsTimeMs = allRemovalsTimeMs, commitTimeMs = commitTimeMs,
-      memoryUsedBytes = memoryUsedBytes, numRowsDroppedByWatermark = newNumRowsDroppedByWatermark,
-      numShufflePartitions = numShufflePartitions, numStateStoreInstances = numStateStoreInstances,
-      customMetrics = customMetrics)
-
-  private[sql] def jsonValue: JValue = {
-    ("operatorName" -> JString(operatorName)) ~
-    ("numRowsTotal" -> JInt(numRowsTotal)) ~
-    ("numRowsUpdated" -> JInt(numRowsUpdated)) ~
-    ("allUpdatesTimeMs" -> JInt(allUpdatesTimeMs)) ~
-    ("numRowsRemoved" -> JInt(numRowsRemoved)) ~
-    ("allRemovalsTimeMs" -> JInt(allRemovalsTimeMs)) ~
-    ("commitTimeMs" -> JInt(commitTimeMs)) ~
-    ("memoryUsedBytes" -> JInt(memoryUsedBytes)) ~
-    ("numRowsDroppedByWatermark" -> JInt(numRowsDroppedByWatermark)) ~
-    ("numShufflePartitions" -> JInt(numShufflePartitions)) ~
-    ("numStateStoreInstances" -> JInt(numStateStoreInstances)) ~
-    ("customMetrics" -> {
-      if (!customMetrics.isEmpty) {
-        val keys = customMetrics.keySet.asScala.toSeq.sorted
-        keys.map { k => k -> JInt(customMetrics.get(k).toLong) : JObject }.reduce(_ ~ _)
-      } else {
-        JNothing
-      }
-    })
-  }
-
-  override def toString: String = prettyJson
-}
-
-/**
- * Information about progress made in the execution of a [[StreamingQuery]] during
- * a trigger. Each event relates to processing done for a single trigger of the streaming
- * query. Events are emitted even when no new data is available to be processed.
- *
- * @param id A unique query id that persists across restarts. See `StreamingQuery.id()`.
- * @param runId A query id that is unique for every start/restart. See `StreamingQuery.runId()`.
- * @param name User-specified name of the query, null if not specified.
- * @param timestamp Beginning time of the trigger in ISO8601 format, i.e. UTC timestamps.
- * @param batchId A unique id for the current batch of data being processed.  Note that in the
- *                case of retries after a failure a given batchId my be executed more than once.
- *                Similarly, when there is no data to be processed, the batchId will not be
- *                incremented.
- * @param batchDuration The process duration of each batch.
- * @param durationMs The amount of time taken to perform various operations in milliseconds.
- * @param eventTime Statistics of event time seen in this batch. It may contain the following keys:
- *                 {{{
- *                   "max" -> "2016-12-05T20:54:20.827Z"  // maximum event time seen in this trigger
- *                   "min" -> "2016-12-05T20:54:20.827Z"  // minimum event time seen in this trigger
- *                   "avg" -> "2016-12-05T20:54:20.827Z"  // average event time seen in this trigger
- *                   "watermark" -> "2016-12-05T20:54:20.827Z"  // watermark used in this trigger
- *                 }}}
- *                 All timestamps are in ISO8601 format, i.e. UTC timestamps.
- * @param stateOperators Information about operators in the query that store state.
- * @param sources detailed statistics on data being read from each of the streaming sources.
- * @since 2.1.0
- */
-@Evolving
-class StreamingQueryProgress private[spark](
-  val id: UUID,
-  val runId: UUID,
-  val name: String,
-  val timestamp: String,
-  val batchId: Long,
-  val batchDuration: Long,
-  val durationMs: ju.Map[String, JLong],
-  val eventTime: ju.Map[String, String],
-  val stateOperators: Array[StateOperatorProgress],
-  val sources: Array[SourceProgress],
-  val sink: SinkProgress,
-  @JsonDeserialize(contentAs = classOf[GenericRowWithSchema])
-  val observedMetrics: ju.Map[String, Row]) extends Serializable {
-
-  /** The aggregate (across all sources) number of records processed in a trigger. */
-  def numInputRows: Long = sources.map(_.numInputRows).sum
-
-  /** The aggregate (across all sources) rate of data arriving. */
-  def inputRowsPerSecond: Double = sources.map(_.inputRowsPerSecond).sum
-
-  /** The aggregate (across all sources) rate at which Spark is processing data. */
-  def processedRowsPerSecond: Double = sources.map(_.processedRowsPerSecond).sum
-
-  /** The compact JSON representation of this progress. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this progress. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String = prettyJson
-
-  private[sql] def jsonValue: JValue = {
-    ("id" -> JString(id.toString)) ~
-    ("runId" -> JString(runId.toString)) ~
-    ("name" -> JString(name)) ~
-    ("timestamp" -> JString(timestamp)) ~
-    ("batchId" -> JInt(batchId)) ~
-    ("batchDuration" -> JInt(batchDuration)) ~
-    ("numInputRows" -> JInt(numInputRows)) ~
-    ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
-    ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) ~
-    ("durationMs" -> safeMapToJValue[JLong](durationMs, v => JInt(v.toLong))) ~
-    ("eventTime" -> safeMapToJValue[String](eventTime, s => JString(s))) ~
-    ("stateOperators" -> JArray(stateOperators.map(_.jsonValue).toList)) ~
-    ("sources" -> JArray(sources.map(_.jsonValue).toList)) ~
-    ("sink" -> sink.jsonValue) ~
-    ("observedMetrics" -> safeMapToJValue[Row](observedMetrics, row => row.jsonValue))
-  }
-}
-
-private[spark] object StreamingQueryProgress {
-  private[this] val mapper = {
-    val ret = new ObjectMapper() with ClassTagExtensions
-    ret.registerModule(DefaultScalaModule)
-    ret.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false)
-    ret
-  }
-
-  private[spark] def jsonString(progress: StreamingQueryProgress): String =
-    mapper.writeValueAsString(progress)
-
-  private[spark] def fromJson(json: String): StreamingQueryProgress =
-    mapper.readValue[StreamingQueryProgress](json)
-}
-
-/**
- * Information about progress made for a source in the execution of a [[StreamingQuery]]
- * during a trigger. See [[StreamingQueryProgress]] for more information.
- *
- * @param description            Description of the source.
- * @param startOffset            The starting offset for data being read.
- * @param endOffset              The ending offset for data being read.
- * @param latestOffset           The latest offset from this source.
- * @param numInputRows           The number of records read from this source.
- * @param inputRowsPerSecond     The rate at which data is arriving from this source.
- * @param processedRowsPerSecond The rate at which data from this source is being processed by
- *                               Spark.
- * @since 2.1.0
- */
-@Evolving
-class SourceProgress protected[spark](
-  val description: String,
-  val startOffset: String,
-  val endOffset: String,
-  val latestOffset: String,
-  val numInputRows: Long,
-  val inputRowsPerSecond: Double,
-  val processedRowsPerSecond: Double,
-  val metrics: ju.Map[String, String] = Map[String, String]().asJava) extends Serializable {
-
-  /** The compact JSON representation of this progress. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this progress. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String = prettyJson
-
-  private[sql] def jsonValue: JValue = {
-    ("description" -> JString(description)) ~
-    ("startOffset" -> tryParse(startOffset)) ~
-    ("endOffset" -> tryParse(endOffset)) ~
-    ("latestOffset" -> tryParse(latestOffset)) ~
-    ("numInputRows" -> JInt(numInputRows)) ~
-    ("inputRowsPerSecond" -> safeDoubleToJValue(inputRowsPerSecond)) ~
-    ("processedRowsPerSecond" -> safeDoubleToJValue(processedRowsPerSecond)) ~
-    ("metrics" -> safeMapToJValue[String](metrics, s => JString(s)))
-  }
-
-  private def tryParse(json: String) = try {
-    parse(json)
-  } catch {
-    case NonFatal(e) => JString(json)
-  }
-}
-
-/**
- * Information about progress made for a sink in the execution of a [[StreamingQuery]]
- * during a trigger. See [[StreamingQueryProgress]] for more information.
- *
- * @param description Description of the source corresponding to this status.
- * @param numOutputRows Number of rows written to the sink or -1 for Continuous Mode (temporarily)
- * or Sink V1 (until decommissioned).
- * @since 2.1.0
- */
-@Evolving
-class SinkProgress protected[spark](
-    val description: String,
-    val numOutputRows: Long,
-    val metrics: ju.Map[String, String] = Map[String, String]().asJava) extends Serializable {
-
-  /** SinkProgress without custom metrics. */
-  protected[sql] def this(description: String) = {
-    this(description, DEFAULT_NUM_OUTPUT_ROWS)
-  }
-
-  /** The compact JSON representation of this progress. */
-  def json: String = compact(render(jsonValue))
-
-  /** The pretty (i.e. indented) JSON representation of this progress. */
-  def prettyJson: String = pretty(render(jsonValue))
-
-  override def toString: String = prettyJson
-
-  private[sql] def jsonValue: JValue = {
-    ("description" -> JString(description)) ~
-      ("numOutputRows" -> JInt(numOutputRows)) ~
-      ("metrics" -> safeMapToJValue[String](metrics, s => JString(s)))
-  }
-}
-
-private[sql] object SinkProgress {
-  val DEFAULT_NUM_OUTPUT_ROWS: Long = -1L
-
-  def apply(description: String, numOutputRows: Option[Long],
-            metrics: ju.Map[String, String] = Map[String, String]().asJava): SinkProgress =
-    new SinkProgress(description, numOutputRows.getOrElse(DEFAULT_NUM_OUTPUT_ROWS), metrics)
-}
-
-private object SafeJsonSerializer {
-  def safeDoubleToJValue(value: Double): JValue = {
-    if (value.isNaN || value.isInfinity) JNothing else JDouble(value)
-  }
-
-  /** Convert map to JValue while handling empty maps. Also, this sorts the keys. */
-  def safeMapToJValue[T](map: ju.Map[String, T], valueToJValue: T => JValue): JValue = {
-    if (map == null || map.isEmpty) return JNothing
-    val keys = map.asScala.keySet.toSeq.sorted
-    keys.map { k => k -> valueToJValue(map.get(k)) : JObject }.reduce(_ ~ _)
-  }
-}
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
index 9fbd1919a2668..9988d04220f0f 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/JavaColumnExpressionSuite.java
@@ -85,7 +85,7 @@ public void isInCollectionCheckExceptionMessage() {
     Dataset<Row> df = spark.createDataFrame(rows, schema);
     AnalysisException e = Assertions.assertThrows(AnalysisException.class,
       () -> df.filter(df.col("a").isInCollection(Arrays.asList(new Column("b")))));
-    Assertions.assertTrue(e.getErrorClass().equals("DATATYPE_MISMATCH.DATA_DIFF_TYPES"));
+    Assertions.assertTrue(e.getCondition().equals("DATATYPE_MISMATCH.DATA_DIFF_TYPES"));
     Map<String, String> messageParameters = new HashMap<>();
     messageParameters.put("functionName", "`in`");
     messageParameters.put("dataType", "[\"INT\", \"ARRAY<INT>\"]");
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
index b9841ee0f9735..9b0beb39bf13e 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessor.java
@@ -42,58 +42,55 @@ public void init(
       OutputMode outputMode,
       TimeMode timeMode) {
     countState = this.getHandle().getValueState("countState",
-      Encoders.LONG());
+      Encoders.LONG(), TTLConfig.NONE());
 
     keyCountMap = this.getHandle().getMapState("keyCountMap",
-      Encoders.STRING(), Encoders.LONG());
+      Encoders.STRING(), Encoders.LONG(), TTLConfig.NONE());
 
     keysList = this.getHandle().getListState("keyList",
-      Encoders.STRING());
+      Encoders.STRING(), TTLConfig.NONE());
   }
 
   @Override
   public scala.collection.Iterator<String> handleInputRows(
       Integer key,
       scala.collection.Iterator<String> rows,
-      TimerValues timerValues,
-      ExpiredTimerInfo expiredTimerInfo) {
+      TimerValues timerValues) {
 
     java.util.List<String> result = new ArrayList<>();
-    if (!expiredTimerInfo.isValid()) {
-      long count = 0;
-      // Perform various operations on composite types to verify compatibility for the Java API
-      if (countState.exists()) {
-        count = countState.get();
-      }
+    long count = 0;
+    // Perform various operations on composite types to verify compatibility for the Java API
+    if (countState.exists()) {
+      count = countState.get();
+    }
 
-      long numRows = 0;
-      StringBuilder sb = new StringBuilder(key.toString());
-      while (rows.hasNext()) {
-        numRows++;
-        String value = rows.next();
-        if (keyCountMap.containsKey(value)) {
-          keyCountMap.updateValue(value, keyCountMap.getValue(value) + 1);
-        } else {
-          keyCountMap.updateValue(value, 1L);
-        }
-        assertTrue(keyCountMap.containsKey(value));
-        keysList.appendValue(value);
-        sb.append(value);
+    long numRows = 0;
+    StringBuilder sb = new StringBuilder(key.toString());
+    while (rows.hasNext()) {
+      numRows++;
+      String value = rows.next();
+      if (keyCountMap.containsKey(value)) {
+        keyCountMap.updateValue(value, keyCountMap.getValue(value) + 1);
+      } else {
+        keyCountMap.updateValue(value, 1L);
       }
+      assertTrue(keyCountMap.containsKey(value));
+      keysList.appendValue(value);
+      sb.append(value);
+    }
 
-      scala.collection.Iterator<String> keys = keysList.get();
-      while (keys.hasNext()) {
-        String keyVal = keys.next();
-        assertTrue(keyCountMap.containsKey(keyVal));
-        assertTrue(keyCountMap.getValue(keyVal) > 0);
-      }
+    scala.collection.Iterator<String> keys = keysList.get();
+    while (keys.hasNext()) {
+      String keyVal = keys.next();
+      assertTrue(keyCountMap.containsKey(keyVal));
+      assertTrue(keyCountMap.getValue(keyVal) > 0);
+    }
 
-      count += numRows;
-      countState.update(count);
-      assertEquals(count, (long) countState.get());
+    count += numRows;
+    countState.update(count);
+    assertEquals(count, (long) countState.get());
 
-      result.add(sb.toString());
-    }
+    result.add(sb.toString());
     return CollectionConverters.asScala(result).iterator();
   }
 }
diff --git a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
index 55046a7c0d3df..63cac8c369155 100644
--- a/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
+++ b/sql/core/src/test/java/test/org/apache/spark/sql/TestStatefulProcessorWithInitialState.java
@@ -41,7 +41,7 @@ public void init(
       OutputMode outputMode,
       TimeMode timeMode) {
     testState = this.getHandle().getValueState("testState",
-      Encoders.STRING());
+      Encoders.STRING(), TTLConfig.NONE());
   }
 
   @Override
@@ -53,30 +53,27 @@ public void handleInitialState(Integer key, String initialState, TimerValues tim
   public scala.collection.Iterator<String> handleInputRows(
       Integer key,
       scala.collection.Iterator<String> rows,
-      TimerValues timerValues,
-      ExpiredTimerInfo expiredTimerInfo) {
+      TimerValues timerValues) {
 
     java.util.List<String> result = new ArrayList<>();
-    if (!expiredTimerInfo.isValid()) {
-      String existingValue = "";
-      if (testState.exists()) {
-        existingValue = testState.get();
-      }
+    String existingValue = "";
+    if (testState.exists()) {
+      existingValue = testState.get();
+    }
 
-      StringBuilder sb = new StringBuilder(key.toString());
-      if (!existingValue.isEmpty()) {
-        sb.append(existingValue);
-      }
+    StringBuilder sb = new StringBuilder(key.toString());
+    if (!existingValue.isEmpty()) {
+      sb.append(existingValue);
+    }
 
-      while (rows.hasNext()) {
-        sb.append(rows.next());
-      }
+    while (rows.hasNext()) {
+      sb.append(rows.next());
+    }
 
-      testState.clear();
-      assertFalse(testState.exists());
+    testState.clear();
+    assertFalse(testState.exists());
 
-      result.add(sb.toString());
-    }
+    result.add(sb.toString());
     return CollectionConverters.asScala(result).iterator();
   }
 }
diff --git a/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class
new file mode 100644
index 0000000000000..f0ff0c4f5cf03
Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.class differ
diff --git a/sql/core/src/test/resources/artifact-tests/HelloWithPackage.java b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.java
new file mode 100644
index 0000000000000..5c2c9fcdb178f
--- /dev/null
+++ b/sql/core/src/test/resources/artifact-tests/HelloWithPackage.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Compile: javac --source 8 --target 8 HelloWithPackage.java
+
+package my.custom.pkg;
+
+public class HelloWithPackage {
+    String name = "there";
+
+    public HelloWithPackage() {
+    }
+
+    public HelloWithPackage(String name) {
+        this.name = name;
+    }
+
+    public String msg() {
+        return "Hello " + name + "! Nice to meet you!";
+    }
+}
diff --git a/sql/core/src/test/resources/artifact-tests/IntSumUdf.class b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class
new file mode 100644
index 0000000000000..75a41446cfca1
Binary files /dev/null and b/sql/core/src/test/resources/artifact-tests/IntSumUdf.class differ
diff --git a/sql/core/src/test/resources/artifact-tests/IntSumUdf.scala b/sql/core/src/test/resources/artifact-tests/IntSumUdf.scala
new file mode 100644
index 0000000000000..9678caaed5db5
--- /dev/null
+++ b/sql/core/src/test/resources/artifact-tests/IntSumUdf.scala
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.spark.sql.api.java.UDF2
+
+class IntSumUdf extends UDF2[Long, Long, Long] {
+  override def call(t1: Long, t2: Long): Long = t1 + t2
+}
diff --git a/sql/core/src/test/resources/collations/ICU-collations-map.md b/sql/core/src/test/resources/collations/ICU-collations-map.md
index a704034c694aa..6308571009bd8 100644
--- a/sql/core/src/test/resources/collations/ICU-collations-map.md
+++ b/sql/core/src/test/resources/collations/ICU-collations-map.md
@@ -90,55 +90,58 @@
 | 85 | nl |
 | 86 | nn |
 | 87 | no |
-| 88 | om |
-| 89 | or |
-| 90 | pa |
-| 91 | pa_Guru |
-| 92 | pa_Guru_IND |
-| 93 | pl |
-| 94 | ps |
-| 95 | pt |
-| 96 | ro |
-| 97 | ru |
-| 98 | sa |
-| 99 | se |
-| 100 | si |
-| 101 | sk |
-| 102 | sl |
-| 103 | smn |
-| 104 | sq |
-| 105 | sr |
-| 106 | sr_Cyrl |
-| 107 | sr_Cyrl_BIH |
-| 108 | sr_Cyrl_MNE |
-| 109 | sr_Cyrl_SRB |
-| 110 | sr_Latn |
-| 111 | sr_Latn_BIH |
-| 112 | sr_Latn_SRB |
-| 113 | sv |
-| 114 | sw |
-| 115 | ta |
-| 116 | te |
-| 117 | th |
-| 118 | tk |
-| 119 | to |
-| 120 | tr |
-| 121 | ug |
-| 122 | uk |
-| 123 | ur |
-| 124 | uz |
-| 125 | vi |
-| 126 | wae |
-| 127 | wo |
-| 128 | xh |
-| 129 | yi |
-| 130 | yo |
-| 131 | zh |
-| 132 | zh_Hans |
-| 133 | zh_Hans_CHN |
-| 134 | zh_Hans_SGP |
-| 135 | zh_Hant |
-| 136 | zh_Hant_HKG |
-| 137 | zh_Hant_MAC |
-| 138 | zh_Hant_TWN |
-| 139 | zu |
+| 88 | nso |
+| 89 | om |
+| 90 | or |
+| 91 | pa |
+| 92 | pa_Guru |
+| 93 | pa_Guru_IND |
+| 94 | pl |
+| 95 | ps |
+| 96 | pt |
+| 97 | ro |
+| 98 | ru |
+| 99 | sa |
+| 100 | se |
+| 101 | si |
+| 102 | sk |
+| 103 | sl |
+| 104 | smn |
+| 105 | sq |
+| 106 | sr |
+| 107 | sr_Cyrl |
+| 108 | sr_Cyrl_BIH |
+| 109 | sr_Cyrl_MNE |
+| 110 | sr_Cyrl_SRB |
+| 111 | sr_Latn |
+| 112 | sr_Latn_BIH |
+| 113 | sr_Latn_SRB |
+| 114 | st |
+| 115 | sv |
+| 116 | sw |
+| 117 | ta |
+| 118 | te |
+| 119 | th |
+| 120 | tk |
+| 121 | tn |
+| 122 | to |
+| 123 | tr |
+| 124 | ug |
+| 125 | uk |
+| 126 | ur |
+| 127 | uz |
+| 128 | vi |
+| 129 | wae |
+| 130 | wo |
+| 131 | xh |
+| 132 | yi |
+| 133 | yo |
+| 134 | zh |
+| 135 | zh_Hans |
+| 136 | zh_Hans_CHN |
+| 137 | zh_Hans_SGP |
+| 138 | zh_Hant |
+| 139 | zh_Hant_HKG |
+| 140 | zh_Hant_MAC |
+| 141 | zh_Hant_TWN |
+| 142 | zu |
diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
index 88013d41a5758..c54e09735a9be 100644
--- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
+++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md
@@ -238,6 +238,7 @@
 | org.apache.spark.sql.catalyst.expressions.Now | now | SELECT now() | struct<now():timestamp> |
 | org.apache.spark.sql.catalyst.expressions.NthValue | nth_value | SELECT a, b, nth_value(b, 2) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,nth_value(b, 2) OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.NullIf | nullif | SELECT nullif(2, 2) | struct<nullif(2, 2):int> |
+| org.apache.spark.sql.catalyst.expressions.NullIfZero | nullifzero | SELECT nullifzero(0) | struct<nullifzero(0):int> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | ifnull | SELECT ifnull(NULL, array('2')) | struct<ifnull(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl | nvl | SELECT nvl(NULL, array('2')) | struct<nvl(NULL, array(2)):array<string>> |
 | org.apache.spark.sql.catalyst.expressions.Nvl2 | nvl2 | SELECT nvl2(NULL, 2, 1) | struct<nvl2(NULL, 2, 1):int> |
@@ -264,6 +265,7 @@
 | org.apache.spark.sql.catalyst.expressions.RaiseErrorExpressionBuilder | raise_error | SELECT raise_error('custom error message') | struct<raise_error(USER_RAISED_EXCEPTION, map(errorMessage, custom error message)):void> |
 | org.apache.spark.sql.catalyst.expressions.Rand | rand | SELECT rand() | struct<rand():double> |
 | org.apache.spark.sql.catalyst.expressions.Rand | random | SELECT random() | struct<rand():double> |
+| org.apache.spark.sql.catalyst.expressions.RandStr | randstr | SELECT randstr(3, 0) AS result | struct<result:string> |
 | org.apache.spark.sql.catalyst.expressions.Randn | randn | SELECT randn() | struct<randn():double> |
 | org.apache.spark.sql.catalyst.expressions.Rank | rank | SELECT a, b, rank(b) OVER (PARTITION BY a ORDER BY b) FROM VALUES ('A1', 2), ('A1', 1), ('A2', 3), ('A1', 1) tab(a, b) | struct<a:string,b:int,RANK() OVER (PARTITION BY a ORDER BY b ASC NULLS FIRST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):int> |
 | org.apache.spark.sql.catalyst.expressions.RegExpCount | regexp_count | SELECT regexp_count('Steven Jones and Stephen Smith are the best players', 'Ste(v&#124;ph)en') | struct<regexp_count(Steven Jones and Stephen Smith are the best players, Ste(v&#124;ph)en):int> |
@@ -352,8 +354,13 @@
 | org.apache.spark.sql.catalyst.expressions.TryAesDecrypt | try_aes_decrypt | SELECT try_aes_decrypt(unhex('6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210'), '0000111122223333', 'GCM') | struct<try_aes_decrypt(unhex(6E7CA17BBB468D3084B5744BCA729FB7B2B7BCB8E4472847D02670489D95FA97DBBA7D3210), 0000111122223333, GCM, DEFAULT, ):binary> |
 | org.apache.spark.sql.catalyst.expressions.TryDivide | try_divide | SELECT try_divide(3, 2) | struct<try_divide(3, 2):double> |
 | org.apache.spark.sql.catalyst.expressions.TryElementAt | try_element_at | SELECT try_element_at(array(1, 2, 3), 2) | struct<try_element_at(array(1, 2, 3), 2):int> |
+| org.apache.spark.sql.catalyst.expressions.TryMakeInterval | try_make_interval | SELECT try_make_interval(100, 11, 1, 1, 12, 30, 01.001001) | struct<try_make_interval(100, 11, 1, 1, 12, 30, 1.001001):interval> |
+| org.apache.spark.sql.catalyst.expressions.TryMakeTimestamp | try_make_timestamp | SELECT try_make_timestamp(2014, 12, 28, 6, 30, 45.887) | struct<try_make_timestamp(2014, 12, 28, 6, 30, 45.887):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.TryMakeTimestampLTZExpressionBuilder | try_make_timestamp_ltz | SELECT try_make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887) | struct<try_make_timestamp_ltz(2014, 12, 28, 6, 30, 45.887):timestamp> |
+| org.apache.spark.sql.catalyst.expressions.TryMakeTimestampNTZExpressionBuilder | try_make_timestamp_ntz | SELECT try_make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887) | struct<try_make_timestamp_ntz(2014, 12, 28, 6, 30, 45.887):timestamp_ntz> |
 | org.apache.spark.sql.catalyst.expressions.TryMod | try_mod | SELECT try_mod(3, 2) | struct<try_mod(3, 2):int> |
 | org.apache.spark.sql.catalyst.expressions.TryMultiply | try_multiply | SELECT try_multiply(2, 3) | struct<try_multiply(2, 3):int> |
+| org.apache.spark.sql.catalyst.expressions.TryParseUrl | try_parse_url | SELECT try_parse_url('http://spark.apache.org/path?query=1', 'HOST') | struct<try_parse_url(http://spark.apache.org/path?query=1, HOST):string> |
 | org.apache.spark.sql.catalyst.expressions.TryReflect | try_reflect | SELECT try_reflect('java.util.UUID', 'randomUUID') | struct<try_reflect(java.util.UUID, randomUUID):string> |
 | org.apache.spark.sql.catalyst.expressions.TrySubtract | try_subtract | SELECT try_subtract(2, 1) | struct<try_subtract(2, 1):int> |
 | org.apache.spark.sql.catalyst.expressions.TryToBinary | try_to_binary | SELECT try_to_binary('abc', 'utf-8') | struct<try_to_binary(abc, utf-8):binary> |
@@ -366,6 +373,7 @@
 | org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> |
 | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | SELECT positive(1) | struct<(+ 1):int> |
 | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> |
+| org.apache.spark.sql.catalyst.expressions.Uniform | uniform | SELECT uniform(10, 20, 0) > 0 AS result | struct<result:boolean> |
 | org.apache.spark.sql.catalyst.expressions.UnixDate | unix_date | SELECT unix_date(DATE("1970-01-02")) | struct<unix_date(1970-01-02):int> |
 | org.apache.spark.sql.catalyst.expressions.UnixMicros | unix_micros | SELECT unix_micros(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_micros(1970-01-01 00:00:01Z):bigint> |
 | org.apache.spark.sql.catalyst.expressions.UnixMillis | unix_millis | SELECT unix_millis(TIMESTAMP('1970-01-01 00:00:01Z')) | struct<unix_millis(1970-01-01 00:00:01Z):bigint> |
@@ -384,6 +392,7 @@
 | org.apache.spark.sql.catalyst.expressions.XmlToStructs | from_xml | SELECT from_xml('<p><a>1</a><b>0.8</b></p>', 'a INT, b DOUBLE') | struct<from_xml(<p><a>1</a><b>0.8</b></p>):struct<a:int,b:double>> |
 | org.apache.spark.sql.catalyst.expressions.XxHash64 | xxhash64 | SELECT xxhash64('Spark', array(123), 2) | struct<xxhash64(Spark, array(123), 2):bigint> |
 | org.apache.spark.sql.catalyst.expressions.Year | year | SELECT year('2016-07-30') | struct<year(2016-07-30):int> |
+| org.apache.spark.sql.catalyst.expressions.ZeroIfNull | zeroifnull | SELECT zeroifnull(NULL) | struct<zeroifnull(NULL):int> |
 | org.apache.spark.sql.catalyst.expressions.ZipWith | zip_with | SELECT zip_with(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, x)) | struct<zip_with(array(1, 2, 3), array(a, b, c), lambdafunction(named_struct(y, namedlambdavariable(), x, namedlambdavariable()), namedlambdavariable(), namedlambdavariable())):array<struct<y:string,x:int>>> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.AnyValue | any_value | SELECT any_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<any_value(col):int> |
 | org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile | approx_percentile | SELECT approx_percentile(col, array(0.5, 0.4, 0.1), 100) FROM VALUES (0), (1), (2), (10) AS tab(col) | struct<approx_percentile(col, array(0.5, 0.4, 0.1), 100):array<int>> |
@@ -449,6 +458,7 @@
 | org.apache.spark.sql.catalyst.expressions.variant.ParseJsonExpressionBuilder | parse_json | SELECT parse_json('{"a":1,"b":0.8}') | struct<parse_json({"a":1,"b":0.8}):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariant | schema_of_variant | SELECT schema_of_variant(parse_json('null')) | struct<schema_of_variant(parse_json(null)):string> |
 | org.apache.spark.sql.catalyst.expressions.variant.SchemaOfVariantAgg | schema_of_variant_agg | SELECT schema_of_variant_agg(parse_json(j)) FROM VALUES ('1'), ('2'), ('3') AS tab(j) | struct<schema_of_variant_agg(parse_json(j)):string> |
+| org.apache.spark.sql.catalyst.expressions.variant.ToVariantObject | to_variant_object | SELECT to_variant_object(named_struct('a', 1, 'b', 2)) | struct<to_variant_object(named_struct(a, 1, b, 2)):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.TryParseJsonExpressionBuilder | try_parse_json | SELECT try_parse_json('{"a":1,"b":0.8}') | struct<try_parse_json({"a":1,"b":0.8}):variant> |
 | org.apache.spark.sql.catalyst.expressions.variant.TryVariantGetExpressionBuilder | try_variant_get | SELECT try_variant_get(parse_json('{"a": 1}'), '$.a', 'int') | struct<try_variant_get(parse_json({"a": 1}), $.a):int> |
 | org.apache.spark.sql.catalyst.expressions.variant.VariantGetExpressionBuilder | variant_get | SELECT variant_get(parse_json('{"a": 1}'), '$.a', 'int') | struct<variant_get(parse_json({"a": 1}), $.a):int> |
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/decimalArithmeticOperations.sql.out
deleted file mode 100644
index bcaa991ddae04..0000000000000
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/decimalArithmeticOperations.sql.out
+++ /dev/null
@@ -1,170 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
--- !query analysis
-CreateDataSourceTableCommand `spark_catalog`.`default`.`decimals_test`, false
-
-
--- !query
-insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
-  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
--- !query analysis
-InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/decimals_test, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/decimals_test], Append, `spark_catalog`.`default`.`decimals_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/decimals_test), [id, a, b]
-+- Project [cast(col1#x as int) AS id#x, cast(col2#x as decimal(38,18)) AS a#x, cast(col3#x as decimal(38,18)) AS b#x]
-   +- LocalRelation [col1#x, col2#x, col3#x]
-
-
--- !query
-select id, a*10, b/10 from decimals_test order by id
--- !query analysis
-Sort [id#x ASC NULLS FIRST], true
-+- Project [id#x, (a#x * cast(10 as decimal(2,0))) AS (a * 10)#x, (b#x / cast(10 as decimal(2,0))) AS (b / 10)#x]
-   +- SubqueryAlias spark_catalog.default.decimals_test
-      +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
-
-
--- !query
-select 10.3 * 3.0
--- !query analysis
-Project [(10.3 * 3.0) AS (10.3 * 3.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 10.3000 * 3.0
--- !query analysis
-Project [(10.3000 * 3.0) AS (10.3000 * 3.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 10.30000 * 30.0
--- !query analysis
-Project [(10.30000 * 30.0) AS (10.30000 * 30.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 10.300000000000000000 * 3.000000000000000000
--- !query analysis
-Project [(10.300000000000000000 * 3.000000000000000000) AS (10.300000000000000000 * 3.000000000000000000)#x]
-+- OneRowRelation
-
-
--- !query
-select 10.300000000000000000 * 3.0000000000000000000
--- !query analysis
-Project [(10.300000000000000000 * 3.0000000000000000000) AS (10.300000000000000000 * 3.0000000000000000000)#x]
-+- OneRowRelation
-
-
--- !query
-select (5e36BD + 0.1) + 5e36BD
--- !query analysis
-Project [((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000) AS ((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000)#x]
-+- OneRowRelation
-
-
--- !query
-select (-4e36BD - 0.1) - 7e36BD
--- !query analysis
-Project [((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000) AS ((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000)#x]
-+- OneRowRelation
-
-
--- !query
-select 12345678901234567890.0 * 12345678901234567890.0
--- !query analysis
-Project [(12345678901234567890.0 * 12345678901234567890.0) AS (12345678901234567890.0 * 12345678901234567890.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1e35BD / 0.1
--- !query analysis
-Project [(100000000000000000000000000000000000 / 0.1) AS (100000000000000000000000000000000000 / 0.1)#x]
-+- OneRowRelation
-
-
--- !query
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query analysis
-Project [(123456789123456789.1234567890 * 1.123456789123456789) AS (123456789123456789.1234567890 * 1.123456789123456789)#x]
-+- OneRowRelation
-
-
--- !query
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query analysis
-Project [(123456789123456789.1234567890 * 1.123456789123456789) AS (123456789123456789.1234567890 * 1.123456789123456789)#x]
-+- OneRowRelation
-
-
--- !query
-select 12345678912345.123456789123 / 0.000000012345678
--- !query analysis
-Project [(12345678912345.123456789123 / 1.2345678E-8) AS (12345678912345.123456789123 / 1.2345678E-8)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e36BD / 0.1
--- !query analysis
-Project [(1012345678901234567890123456789012345.6 / 0.1) AS (1012345678901234567890123456789012345.6 / 0.1)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e35BD / 1.0
--- !query analysis
-Project [(101234567890123456789012345678901234.56 / 1.0) AS (101234567890123456789012345678901234.56 / 1.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e34BD / 1.0
--- !query analysis
-Project [(10123456789012345678901234567890123.456 / 1.0) AS (10123456789012345678901234567890123.456 / 1.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e33BD / 1.0
--- !query analysis
-Project [(1012345678901234567890123456789012.3456 / 1.0) AS (1012345678901234567890123456789012.3456 / 1.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e32BD / 1.0
--- !query analysis
-Project [(101234567890123456789012345678901.23456 / 1.0) AS (101234567890123456789012345678901.23456 / 1.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 1.0
--- !query analysis
-Project [(10123456789012345678901234567890.123456 / 1.0) AS (10123456789012345678901234567890.123456 / 1.0)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 0.1
--- !query analysis
-Project [(10123456789012345678901234567890.123456 / 0.1) AS (10123456789012345678901234567890.123456 / 0.1)#x]
-+- OneRowRelation
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 10.0
--- !query analysis
-Project [(10123456789012345678901234567890.123456 / 10.0) AS (10123456789012345678901234567890.123456 / 10.0)#x]
-+- OneRowRelation
-
-
--- !query
-drop table decimals_test
--- !query analysis
-DropTable false, false
-+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.decimals_test
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
index fb331089d7545..53595d1b8a3eb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/array.sql.out
@@ -194,25 +194,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select sort_array(array('b', 'd'), cast(NULL as boolean))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
-    "inputType" : "\"BOOLEAN\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
-  } ]
-}
+Project [sort_array(array(b, d), cast(null as boolean)) AS sort_array(array(b, d), CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
 
 
 -- !query
@@ -229,7 +212,7 @@ select
   size(timestamp_array)
 from primitive_arrays
 -- !query analysis
-Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x]
+Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_array#x, false) AS size(tinyint_array)#x, size(smallint_array#x, false) AS size(smallint_array)#x, size(int_array#x, false) AS size(int_array)#x, size(bigint_array#x, false) AS size(bigint_array)#x, size(decimal_array#x, false) AS size(decimal_array)#x, size(double_array#x, false) AS size(double_array)#x, size(float_array#x, false) AS size(float_array)#x, size(date_array#x, false) AS size(date_array)#x, size(timestamp_array#x, false) AS size(timestamp_array)#x]
 +- SubqueryAlias primitive_arrays
    +- View (`primitive_arrays`, [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x])
       +- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
@@ -241,70 +224,70 @@ Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_arra
 -- !query
 select element_at(array(1, 2, 3), 5)
 -- !query analysis
-Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x]
+Project [element_at(array(1, 2, 3), 5, None, true) AS element_at(array(1, 2, 3), 5)#x]
 +- OneRowRelation
 
 
 -- !query
 select element_at(array(1, 2, 3), -5)
 -- !query analysis
-Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x]
+Project [element_at(array(1, 2, 3), -5, None, true) AS element_at(array(1, 2, 3), -5)#x]
 +- OneRowRelation
 
 
 -- !query
 select element_at(array(1, 2, 3), 0)
 -- !query analysis
-Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x]
+Project [element_at(array(1, 2, 3), 0, None, true) AS element_at(array(1, 2, 3), 0)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(4, '123', '456')
 -- !query analysis
-Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x]
+Project [elt(4, 123, 456, true) AS elt(4, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(0, '123', '456')
 -- !query analysis
-Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x]
+Project [elt(0, 123, 456, true) AS elt(0, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(-1, '123', '456')
 -- !query analysis
-Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x]
+Project [elt(-1, 123, 456, true) AS elt(-1, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(null, '123', '456')
 -- !query analysis
-Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x]
+Project [elt(cast(null as int), 123, 456, true) AS elt(NULL, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(null, '123', null)
 -- !query analysis
-Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x]
+Project [elt(cast(null as int), 123, cast(null as string), true) AS elt(NULL, 123, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(1, '123', null)
 -- !query analysis
-Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x]
+Project [elt(1, 123, cast(null as string), true) AS elt(1, 123, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(2, '123', null)
 -- !query analysis
-Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x]
+Project [elt(2, 123, cast(null as string), true) AS elt(2, 123, NULL)#x]
 +- OneRowRelation
 
 
@@ -377,21 +360,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), false) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
 -- !query
 select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
 -- !query
 select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out
index e0687b564d3d1..643dfd3771ffe 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cast.sql.out
@@ -205,57 +205,193 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
 -- !query
 SELECT HEX(CAST(CAST(123 AS byte) AS binary))
 -- !query analysis
-Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "CAST(CAST(123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
 -- !query analysis
-Project [hex(cast(cast(-123 as tinyint) as binary)) AS hex(CAST(CAST(-123 AS TINYINT) AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 45,
+    "fragment" : "CAST(CAST(-123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123S AS binary))
 -- !query analysis
-Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123S AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123S AS binary))
 -- !query analysis
-Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123S AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123 AS binary))
 -- !query analysis
-Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 30,
+    "fragment" : "CAST(123 AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123 AS binary))
 -- !query analysis
-Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(-123 AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123L AS binary))
 -- !query analysis
-Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123L AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123L AS binary))
 -- !query analysis
-Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123L AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -804,8 +940,25 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
 -- !query
 SELECT HEX((123 :: byte) :: binary)
 -- !query analysis
-Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
-+- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 34,
+    "fragment" : "(123 :: byte) :: binary"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
index 645057b85c000..64ae845550f22 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/change-column.sql.out
@@ -167,11 +167,11 @@ ALTER TABLE test_change CHANGE invalid_col TYPE INT
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1331",
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
   "messageParameters" : {
-    "fieldName" : "invalid_col",
-    "schema" : "root\n |-- a: integer (nullable = true)\n |-- b: string (nullable = true)\n |-- c: integer (nullable = true)\n",
-    "table" : "spark_catalog.default.test_change"
+    "objectName" : "`invalid_col`",
+    "proposal" : "`a`, `b`, `c`"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
index 5c1417f7c0aae..524797015a2f6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/charvarchar.sql.out
@@ -263,6 +263,18 @@ desc formatted char_part
 DescribeTableCommand `spark_catalog`.`default`.`char_part`, true, [col_name#x, data_type#x, comment#x]
 
 
+-- !query
+alter table char_part change column c1 comment 'char comment'
+-- !query analysis
+AlterTableChangeColumnCommand `spark_catalog`.`default`.`char_part`, c1, StructField(c1,CharType(5),true)
+
+
+-- !query
+alter table char_part change column v1 comment 'varchar comment'
+-- !query analysis
+AlterTableChangeColumnCommand `spark_catalog`.`default`.`char_part`, v1, StructField(v1,VarcharType(6),true)
+
+
 -- !query
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
index 6f9cb3b759710..45ab1cdcff79e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out
@@ -206,6 +206,131 @@ Intersect false
    +- LocalRelation [col1#x]
 
 
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "EXCEPT",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 162,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "EXCEPT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 166,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 161,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 165,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "INTERSECT",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 156,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
 -- !query
 create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET
 -- !query analysis
@@ -389,3 +514,2496 @@ select 'I' collate tr_ci = 'ı'
 -- !query analysis
 Project [(collate(I, tr_ci) = cast(ı as string collate tr_CI)) AS (collate(I, tr_ci) = ı)#x]
 +- OneRowRelation
+
+
+-- !query
+create table t4 (text string collate utf8_binary, pairDelim string collate utf8_lcase, keyValueDelim string collate utf8_binary) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t4`, false
+
+
+-- !query
+insert into t4 values('a:1,b:2,c:3', ',', ':')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t4, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t4], Append, `spark_catalog`.`default`.`t4`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t4), [text, pairDelim, keyValueDelim]
++- Project [cast(col1#x as string) AS text#x, cast(col2#x as string collate UTF8_LCASE) AS pairDelim#x, cast(col3#x as string) AS keyValueDelim#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+select str_to_map(text, pairDelim, keyValueDelim) from t4
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_lcase, keyValueDelim collate utf8_binary) from t4
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_binary, keyValueDelim collate utf8_binary) from t4
+-- !query analysis
+Project [str_to_map(collate(text#x, utf8_binary), collate(pairDelim#x, utf8_binary), collate(keyValueDelim#x, utf8_binary)) AS str_to_map(collate(text, utf8_binary), collate(pairDelim, utf8_binary), collate(keyValueDelim, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t4
+   +- Relation spark_catalog.default.t4[text#x,pairDelim#x,keyValueDelim#x] parquet
+
+
+-- !query
+select str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai) from t4
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(text, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"str_to_map(collate(text, unicode_ai), collate(pairDelim, unicode_ai), collate(keyValueDelim, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 106,
+    "fragment" : "str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+drop table t4
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t4
+
+
+-- !query
+create table t5(s string, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t5`, false
+
+
+-- !query
+insert into t5 values ('Spark', 'Spark', 'SQL')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaAAaA')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaA')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaAaaAaaAaAaaAaaAaA')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('bbAbaAbA', 'bbAbAAbA', 'a')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'İo')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'İo ')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('İo', 'İo ', 'İo')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'i̇o')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('efd2', 'efd2', 'efd2')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('Hello, world! Nice day.', 'Hello, world! Nice day.', 'Hello, world! Nice day.')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('Something else. Nothing here.', 'Something else. Nothing here.', 'Something else. Nothing here.')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('kitten', 'kitten', 'sitTing')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('abc', 'abc', 'abc')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+insert into t5 values ('abcdcba', 'abcdcba', 'aBcDCbA')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t5, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t5], Append, `spark_catalog`.`default`.`t5`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t5), [s, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS s#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+create table t6(ascii long) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t6`, false
+
+
+-- !query
+insert into t6 values (97)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t6, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t6], Append, `spark_catalog`.`default`.`t6`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t6), [ascii]
++- Project [cast(col1#x as bigint) AS ascii#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+insert into t6 values (66)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t6, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t6], Append, `spark_catalog`.`default`.`t6`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t6), [ascii]
++- Project [cast(col1#x as bigint) AS ascii#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+create table t7(ascii double) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t7`, false
+
+
+-- !query
+insert into t7 values (97.52143)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t7, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t7], Append, `spark_catalog`.`default`.`t7`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t7), [ascii]
++- Project [cast(col1#x as double) AS ascii#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+insert into t7 values (66.421)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t7, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t7], Append, `spark_catalog`.`default`.`t7`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t7), [ascii]
++- Project [cast(col1#x as double) AS ascii#x]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+create table t8(format string collate utf8_binary, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t8`, false
+
+
+-- !query
+insert into t8 values ('%s%s', 'abCdE', 'abCdE')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t8, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t8], Append, `spark_catalog`.`default`.`t8`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t8), [format, utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS format#x, cast(col2#x as string) AS utf8_binary#x, cast(col3#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+create table t9(num long) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t9`, false
+
+
+-- !query
+insert into t9 values (97)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t9, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t9], Append, `spark_catalog`.`default`.`t9`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t9), [num]
++- Project [cast(col1#x as bigint) AS num#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+insert into t9 values (66)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t9, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t9], Append, `spark_catalog`.`default`.`t9`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t9), [num]
++- Project [cast(col1#x as bigint) AS num#xL]
+   +- LocalRelation [col1#x]
+
+
+-- !query
+create table t10(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t10`, false
+
+
+-- !query
+insert into t10 values ('aaAaAAaA', 'aaAaaAaA')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t10, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t10], Append, `spark_catalog`.`default`.`t10`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t10), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+insert into t10 values ('efd2', 'efd2')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t10, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t10], Append, `spark_catalog`.`default`.`t10`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t10), [utf8_binary, utf8_lcase]
++- Project [cast(col1#x as string) AS utf8_binary#x, cast(col2#x as string collate UTF8_LCASE) AS utf8_lcase#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+-- !query analysis
+Project [concat_ws(cast(  as string collate UTF8_LCASE), utf8_lcase#x, utf8_lcase#x) AS concat_ws( , utf8_lcase, utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select concat_ws(' ', utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select concat_ws(' ' collate utf8_binary, utf8_binary, 'SQL' collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select concat_ws(' ' collate utf8_lcase, utf8_binary, 'SQL' collate utf8_lcase) from t5
+-- !query analysis
+Project [concat_ws(collate( , utf8_lcase), cast(utf8_binary#x as string collate UTF8_LCASE), collate(SQL, utf8_lcase)) AS concat_ws(collate( , utf8_lcase), utf8_binary, collate(SQL, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word') from t5
+-- !query analysis
+Project [concat_ws(cast(, as string collate UTF8_LCASE), utf8_lcase#x, cast(word as string collate UTF8_LCASE)) AS concat_ws(,, utf8_lcase, word)#x, concat_ws(,, utf8_binary#x, word) AS concat_ws(,, utf8_binary, word)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',', utf8_binary, 'word' collate utf8_lcase) from t5
+-- !query analysis
+Project [concat_ws(,, cast(utf8_lcase#x as string), collate(word, utf8_binary)) AS concat_ws(,, utf8_lcase, collate(word, utf8_binary))#x, concat_ws(cast(, as string collate UTF8_LCASE), cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase)) AS concat_ws(,, utf8_binary, collate(word, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select elt(2, s, utf8_binary) from t5
+-- !query analysis
+Project [elt(2, s#x, utf8_binary#x, true) AS elt(2, s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select elt(2, utf8_binary, utf8_lcase, s) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [elt(1, collate(utf8_binary#x, utf8_binary), collate(utf8_lcase#x, utf8_binary), true) AS elt(1, collate(utf8_binary, utf8_binary), collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase) from t5
+-- !query analysis
+Project [elt(1, collate(utf8_binary#x, utf8_binary), cast(utf8_lcase#x as string), true) AS elt(1, collate(utf8_binary, utf8_binary), utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
+-- !query analysis
+Project [elt(1, utf8_binary#x, word, true) AS elt(1, utf8_binary, word)#x, elt(1, utf8_lcase#x, cast(word as string collate UTF8_LCASE), true) AS elt(1, utf8_lcase, word)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select elt(1, utf8_binary, 'word' collate utf8_lcase), elt(1, utf8_lcase, 'word' collate utf8_binary) from t5
+-- !query analysis
+Project [elt(1, cast(utf8_binary#x as string collate UTF8_LCASE), collate(word, utf8_lcase), true) AS elt(1, utf8_binary, collate(word, utf8_lcase))#x, elt(1, cast(utf8_lcase#x as string), collate(word, utf8_binary), true) AS elt(1, utf8_lcase, collate(word, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary, utf8_lcase, 3) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select split_part(s, utf8_binary, 1) from t5
+-- !query analysis
+Project [split_part(s#x, utf8_binary#x, 1) AS split_part(s, utf8_binary, 1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary collate utf8_binary, s collate utf8_lcase, 1) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select split_part(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [split_part(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), 2) AS split_part(utf8_binary, collate(utf8_lcase, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query analysis
+Project [split_part(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), 2) AS split_part(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"split_part(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 83,
+    "fragment" : "split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2)"
+  } ]
+}
+
+
+-- !query
+select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t5
+-- !query analysis
+Project [split_part(utf8_binary#x, a, 3) AS split_part(utf8_binary, a, 3)#x, split_part(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 3) AS split_part(utf8_lcase, a, 3)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary, 'a' collate utf8_lcase, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5
+-- !query analysis
+Project [split_part(cast(utf8_binary#x as string collate UTF8_LCASE), collate(a, utf8_lcase), 3) AS split_part(utf8_binary, collate(a, utf8_lcase), 3)#x, split_part(cast(utf8_lcase#x as string), collate(a, utf8_binary), 3) AS split_part(utf8_lcase, collate(a, utf8_binary), 3)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select split_part(utf8_binary, 'a ' collate utf8_lcase_rtrim, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5
+-- !query analysis
+Project [split_part(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(a , utf8_lcase_rtrim), 3) AS split_part(utf8_binary, collate(a , utf8_lcase_rtrim), 3)#x, split_part(cast(utf8_lcase#x as string), collate(a, utf8_binary), 3) AS split_part(utf8_lcase, collate(a, utf8_binary), 3)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select contains(s, utf8_binary) from t5
+-- !query analysis
+Project [Contains(s#x, utf8_binary#x) AS contains(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select contains(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [Contains(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS contains(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [Contains(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS contains(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"contains(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select contains(utf8_binary, 'a'), contains(utf8_lcase, 'a') from t5
+-- !query analysis
+Project [Contains(utf8_binary#x, a) AS contains(utf8_binary, a)#x, Contains(utf8_lcase#x, cast(a as string collate UTF8_LCASE)) AS contains(utf8_lcase, a)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary, 'AaAA' collate utf8_lcase), contains(utf8_lcase, 'AAa' collate utf8_binary) from t5
+-- !query analysis
+Project [Contains(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase)) AS contains(utf8_binary, collate(AaAA, utf8_lcase))#x, Contains(cast(utf8_lcase#x as string), collate(AAa, utf8_binary)) AS contains(utf8_lcase, collate(AAa, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select contains(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim), contains(utf8_lcase, 'AAa ' collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [Contains(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(AaAA , utf8_lcase_rtrim)) AS contains(utf8_binary, collate(AaAA , utf8_lcase_rtrim))#x, Contains(cast(utf8_lcase#x as string collate UTF8_BINARY_RTRIM), collate(AAa , utf8_binary_rtrim)) AS contains(utf8_lcase, collate(AAa , utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary, utf8_lcase, 2) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select substring_index(s, utf8_binary,1) from t5
+-- !query analysis
+Project [substring_index(s#x, utf8_binary#x, 1) AS substring_index(s, utf8_binary, 1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select substring_index(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [substring_index(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), 2) AS substring_index(utf8_binary, collate(utf8_lcase, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query analysis
+Project [substring_index(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), 2) AS substring_index(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"substring_index(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 88,
+    "fragment" : "substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2)"
+  } ]
+}
+
+
+-- !query
+select substring_index(utf8_binary, 'a', 2), substring_index(utf8_lcase, 'a', 2) from t5
+-- !query analysis
+Project [substring_index(utf8_binary#x, a, 2) AS substring_index(utf8_binary, a, 2)#x, substring_index(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 2) AS substring_index(utf8_lcase, a, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary, 'AaAA' collate utf8_lcase, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query analysis
+Project [substring_index(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase), 2) AS substring_index(utf8_binary, collate(AaAA, utf8_lcase), 2)#x, substring_index(cast(utf8_lcase#x as string), collate(AAa, utf8_binary), 2) AS substring_index(utf8_lcase, collate(AAa, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substring_index(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query analysis
+Project [substring_index(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(AaAA , utf8_lcase_rtrim), 2) AS substring_index(utf8_binary, collate(AaAA , utf8_lcase_rtrim), 2)#x, substring_index(cast(utf8_lcase#x as string), collate(AAa, utf8_binary), 2) AS substring_index(utf8_lcase, collate(AAa, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select instr(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select instr(s, utf8_binary) from t5
+-- !query analysis
+Project [instr(s#x, utf8_binary#x) AS instr(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select instr(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select instr(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [instr(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS instr(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select instr(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [instr(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS instr(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"instr(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select instr(utf8_binary, 'a'), instr(utf8_lcase, 'a') from t5
+-- !query analysis
+Project [instr(utf8_binary#x, a) AS instr(utf8_binary, a)#x, instr(utf8_lcase#x, cast(a as string collate UTF8_LCASE)) AS instr(utf8_lcase, a)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select instr(utf8_binary, 'AaAA' collate utf8_lcase), instr(utf8_lcase, 'AAa' collate utf8_binary) from t5
+-- !query analysis
+Project [instr(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase)) AS instr(utf8_binary, collate(AaAA, utf8_lcase))#x, instr(cast(utf8_lcase#x as string), collate(AAa, utf8_binary)) AS instr(utf8_lcase, collate(AAa, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select find_in_set(s, utf8_binary) from t5
+-- !query analysis
+Project [find_in_set(s#x, utf8_binary#x) AS find_in_set(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select find_in_set(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [find_in_set(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS find_in_set(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [find_in_set(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS find_in_set(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o'), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o') from t5
+-- !query analysis
+Project [find_in_set(utf8_binary#x, aaAaaAaA,i̇o) AS find_in_set(utf8_binary, aaAaaAaA,i̇o)#x, find_in_set(utf8_lcase#x, cast(aaAaaAaA,i̇o as string collate UTF8_LCASE)) AS find_in_set(utf8_lcase, aaAaaAaA,i̇o)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o' collate utf8_lcase), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5
+-- !query analysis
+Project [find_in_set(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA,i̇o, utf8_lcase)) AS find_in_set(utf8_binary, collate(aaAaaAaA,i̇o, utf8_lcase))#x, find_in_set(cast(utf8_lcase#x as string), collate(aaAaaAaA,i̇o, utf8_binary)) AS find_in_set(utf8_lcase, collate(aaAaaAaA,i̇o, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o ' collate utf8_lcase_rtrim), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5
+-- !query analysis
+Project [find_in_set(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA,i̇o , utf8_lcase_rtrim)) AS find_in_set(utf8_binary, collate(aaAaaAaA,i̇o , utf8_lcase_rtrim))#x, find_in_set(cast(utf8_lcase#x as string), collate(aaAaaAaA,i̇o, utf8_binary)) AS find_in_set(utf8_lcase, collate(aaAaaAaA,i̇o, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select startswith(s, utf8_binary) from t5
+-- !query analysis
+Project [StartsWith(s#x, utf8_binary#x) AS startswith(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select startswith(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [StartsWith(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS startswith(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [StartsWith(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS startswith(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"startswith(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA'), startswith(utf8_lcase, 'aaAaaAaA') from t5
+-- !query analysis
+Project [StartsWith(utf8_binary#x, aaAaaAaA) AS startswith(utf8_binary, aaAaaAaA)#x, StartsWith(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE)) AS startswith(utf8_lcase, aaAaaAaA)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query analysis
+Project [StartsWith(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA, utf8_lcase)) AS startswith(utf8_binary, collate(aaAaaAaA, utf8_lcase))#x, StartsWith(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary)) AS startswith(utf8_lcase, collate(aaAaaAaA, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query analysis
+Project [StartsWith(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA , utf8_lcase_rtrim)) AS startswith(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim))#x, StartsWith(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary)) AS startswith(utf8_lcase, collate(aaAaaAaA, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select translate(utf8_lcase, utf8_lcase, '12345') from t5
+-- !query analysis
+Project [translate(utf8_lcase#x, utf8_lcase#x, cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_lcase, utf8_lcase, 12345)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select translate(utf8_binary, utf8_lcase, '12345') from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select translate(utf8_binary, 'aBc' collate utf8_lcase, '12345' collate utf8_binary) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select translate(utf8_binary, 'SQL' collate utf8_lcase, '12345' collate utf8_lcase) from t5
+-- !query analysis
+Project [translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(SQL, utf8_lcase), collate(12345, utf8_lcase)) AS translate(utf8_binary, collate(SQL, utf8_lcase), collate(12345, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"utf8_binary\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"translate(utf8_binary, collate(SQL, unicode_ai), collate(12345, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 83,
+    "fragment" : "translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select translate(utf8_lcase, 'aaAaaAaA', '12345'), translate(utf8_binary, 'aaAaaAaA', '12345') from t5
+-- !query analysis
+Project [translate(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_lcase, aaAaaAaA, 12345)#x, translate(utf8_binary#x, aaAaaAaA, 12345) AS translate(utf8_binary, aaAaaAaA, 12345)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select translate(utf8_lcase, 'aBc' collate utf8_binary, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
+-- !query analysis
+Project [translate(cast(utf8_lcase#x as string), collate(aBc, utf8_binary), 12345) AS translate(utf8_lcase, collate(aBc, utf8_binary), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select translate(utf8_lcase, 'aBc ' collate utf8_binary_rtrim, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
+-- !query analysis
+Project [translate(cast(utf8_lcase#x as string collate UTF8_BINARY_RTRIM), collate(aBc , utf8_binary_rtrim), cast(12345 as string collate UTF8_BINARY_RTRIM)) AS translate(utf8_lcase, collate(aBc , utf8_binary_rtrim), 12345)#x, translate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aBc, utf8_lcase), cast(12345 as string collate UTF8_LCASE)) AS translate(utf8_binary, collate(aBc, utf8_lcase), 12345)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary, utf8_lcase, 'abc') from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select replace(s, utf8_binary, 'abc') from t5
+-- !query analysis
+Project [replace(s#x, utf8_binary#x, abc) AS replace(s, utf8_binary, abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary collate utf8_binary, s collate utf8_lcase, 'abc') from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select replace(utf8_binary, utf8_lcase collate utf8_binary, 'abc') from t5
+-- !query analysis
+Project [replace(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), abc) AS replace(utf8_binary, collate(utf8_lcase, utf8_binary), abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 'abc') from t5
+-- !query analysis
+Project [replace(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), cast(abc as string collate UTF8_LCASE)) AS replace(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc') from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : "replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc')"
+  } ]
+}
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA', 'abc'), replace(utf8_lcase, 'aaAaaAaA', 'abc') from t5
+-- !query analysis
+Project [replace(utf8_binary#x, aaAaaAaA, abc) AS replace(utf8_binary, aaAaaAaA, abc)#x, replace(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE), cast(abc as string collate UTF8_LCASE)) AS replace(utf8_lcase, aaAaaAaA, abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA' collate utf8_lcase, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
+-- !query analysis
+Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA, utf8_lcase), cast(abc as string collate UTF8_LCASE)) AS replace(utf8_binary, collate(aaAaaAaA, utf8_lcase), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
+-- !query analysis
+Project [replace(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA , utf8_lcase_rtrim), cast(abc as string collate UTF8_LCASE_RTRIM)) AS replace(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim), abc)#x, replace(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary), abc) AS replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select endswith(s, utf8_binary) from t5
+-- !query analysis
+Project [EndsWith(s#x, utf8_binary#x) AS endswith(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select endswith(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [EndsWith(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS endswith(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [EndsWith(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS endswith(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"endswith(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA'), endswith(utf8_lcase, 'aaAaaAaA') from t5
+-- !query analysis
+Project [EndsWith(utf8_binary#x, aaAaaAaA) AS endswith(utf8_binary, aaAaaAaA)#x, EndsWith(utf8_lcase#x, cast(aaAaaAaA as string collate UTF8_LCASE)) AS endswith(utf8_lcase, aaAaaAaA)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query analysis
+Project [EndsWith(cast(utf8_binary#x as string collate UTF8_LCASE), collate(aaAaaAaA, utf8_lcase)) AS endswith(utf8_binary, collate(aaAaaAaA, utf8_lcase))#x, EndsWith(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary)) AS endswith(utf8_lcase, collate(aaAaaAaA, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query analysis
+Project [EndsWith(cast(utf8_binary#x as string collate UTF8_LCASE_RTRIM), collate(aaAaaAaA , utf8_lcase_rtrim)) AS endswith(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim))#x, EndsWith(cast(utf8_lcase#x as string), collate(aaAaaAaA, utf8_binary)) AS endswith(utf8_lcase, collate(aaAaaAaA, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select repeat(utf8_binary, 3), repeat(utf8_lcase, 2) from t5
+-- !query analysis
+Project [repeat(utf8_binary#x, 3) AS repeat(utf8_binary, 3)#x, repeat(utf8_lcase#x, 2) AS repeat(utf8_lcase, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select repeat(utf8_binary collate utf8_lcase, 3), repeat(utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [repeat(collate(utf8_binary#x, utf8_lcase), 3) AS repeat(collate(utf8_binary, utf8_lcase), 3)#x, repeat(collate(utf8_lcase#x, utf8_binary), 2) AS repeat(collate(utf8_lcase, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select ascii(utf8_binary), ascii(utf8_lcase) from t5
+-- !query analysis
+Project [ascii(utf8_binary#x) AS ascii(utf8_binary)#x, ascii(utf8_lcase#x) AS ascii(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select ascii(utf8_binary collate utf8_lcase), ascii(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [ascii(collate(utf8_binary#x, utf8_lcase)) AS ascii(collate(utf8_binary, utf8_lcase))#x, ascii(collate(utf8_lcase#x, utf8_binary)) AS ascii(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select unbase64(utf8_binary), unbase64(utf8_lcase) from t10
+-- !query analysis
+Project [unbase64(utf8_binary#x, false) AS unbase64(utf8_binary)#x, unbase64(utf8_lcase#x, false) AS unbase64(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t10
+   +- Relation spark_catalog.default.t10[utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select unbase64(utf8_binary collate utf8_lcase), unbase64(utf8_lcase collate utf8_binary) from t10
+-- !query analysis
+Project [unbase64(collate(utf8_binary#x, utf8_lcase), false) AS unbase64(collate(utf8_binary, utf8_lcase))#x, unbase64(collate(utf8_lcase#x, utf8_binary), false) AS unbase64(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t10
+   +- Relation spark_catalog.default.t10[utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select chr(ascii) from t6
+-- !query analysis
+Project [chr(ascii#xL) AS chr(ascii)#x]
++- SubqueryAlias spark_catalog.default.t6
+   +- Relation spark_catalog.default.t6[ascii#xL] parquet
+
+
+-- !query
+select base64(utf8_binary), base64(utf8_lcase) from t5
+-- !query analysis
+Project [base64(cast(utf8_binary#x as binary)) AS base64(utf8_binary)#x, base64(cast(utf8_lcase#x as binary)) AS base64(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select base64(utf8_binary collate utf8_lcase), base64(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [base64(cast(collate(utf8_binary#x, utf8_lcase) as binary)) AS base64(collate(utf8_binary, utf8_lcase))#x, base64(cast(collate(utf8_lcase#x, utf8_binary) as binary)) AS base64(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select decode(encode(utf8_binary, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase, 'utf-8'), 'utf-8') from t5
+-- !query analysis
+Project [decode(encode(utf8_binary#x, utf-8), utf-8) AS decode(encode(utf8_binary, utf-8), utf-8)#x, decode(encode(utf8_lcase#x, utf-8), utf-8) AS decode(encode(utf8_lcase, utf-8), utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select decode(encode(utf8_binary collate utf8_lcase, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase collate utf8_binary, 'utf-8'), 'utf-8') from t5
+-- !query analysis
+Project [decode(encode(collate(utf8_binary#x, utf8_lcase), utf-8), utf-8) AS decode(encode(collate(utf8_binary, utf8_lcase), utf-8), utf-8)#x, decode(encode(collate(utf8_lcase#x, utf8_binary), utf-8), utf-8) AS decode(encode(collate(utf8_lcase, utf8_binary), utf-8), utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select format_number(ascii, '###.###') from t7
+-- !query analysis
+Project [format_number(ascii#x, ###.###) AS format_number(ascii, ###.###)#x]
++- SubqueryAlias spark_catalog.default.t7
+   +- Relation spark_catalog.default.t7[ascii#x] parquet
+
+
+-- !query
+select format_number(ascii, '###.###' collate utf8_lcase) from t7
+-- !query analysis
+Project [format_number(ascii#x, collate(###.###, utf8_lcase)) AS format_number(ascii, collate(###.###, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t7
+   +- Relation spark_catalog.default.t7[ascii#x] parquet
+
+
+-- !query
+select encode(utf8_binary, 'utf-8'), encode(utf8_lcase, 'utf-8') from t5
+-- !query analysis
+Project [encode(utf8_binary#x, utf-8) AS encode(utf8_binary, utf-8)#x, encode(utf8_lcase#x, utf-8) AS encode(utf8_lcase, utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select encode(utf8_binary collate utf8_lcase, 'utf-8'), encode(utf8_lcase collate utf8_binary, 'utf-8') from t5
+-- !query analysis
+Project [encode(collate(utf8_binary#x, utf8_lcase), utf-8) AS encode(collate(utf8_binary, utf8_lcase), utf-8)#x, encode(collate(utf8_lcase#x, utf8_binary), utf-8) AS encode(collate(utf8_lcase, utf8_binary), utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select to_binary(utf8_binary, 'utf-8'), to_binary(utf8_lcase, 'utf-8') from t5
+-- !query analysis
+Project [to_binary(utf8_binary#x, Some(utf-8), false) AS to_binary(utf8_binary, utf-8)#x, to_binary(utf8_lcase#x, Some(utf-8), false) AS to_binary(utf8_lcase, utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select to_binary(utf8_binary collate utf8_lcase, 'utf-8'), to_binary(utf8_lcase collate utf8_binary, 'utf-8') from t5
+-- !query analysis
+Project [to_binary(collate(utf8_binary#x, utf8_lcase), Some(utf-8), false) AS to_binary(collate(utf8_binary, utf8_lcase), utf-8)#x, to_binary(collate(utf8_lcase#x, utf8_binary), Some(utf-8), false) AS to_binary(collate(utf8_lcase, utf8_binary), utf-8)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select sentences(utf8_binary), sentences(utf8_lcase) from t5
+-- !query analysis
+Project [sentences(utf8_binary#x, , ) AS sentences(utf8_binary, , )#x, sentences(utf8_lcase#x, , ) AS sentences(utf8_lcase, , )#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select sentences(utf8_binary collate utf8_lcase), sentences(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [sentences(collate(utf8_binary#x, utf8_lcase), , ) AS sentences(collate(utf8_binary, utf8_lcase), , )#x, sentences(collate(utf8_lcase#x, utf8_binary), , ) AS sentences(collate(utf8_lcase, utf8_binary), , )#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select upper(utf8_binary), upper(utf8_lcase) from t5
+-- !query analysis
+Project [upper(utf8_binary#x) AS upper(utf8_binary)#x, upper(utf8_lcase#x) AS upper(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select upper(utf8_binary collate utf8_lcase), upper(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [upper(collate(utf8_binary#x, utf8_lcase)) AS upper(collate(utf8_binary, utf8_lcase))#x, upper(collate(utf8_lcase#x, utf8_binary)) AS upper(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lower(utf8_binary), lower(utf8_lcase) from t5
+-- !query analysis
+Project [lower(utf8_binary#x) AS lower(utf8_binary)#x, lower(utf8_lcase#x) AS lower(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lower(utf8_binary collate utf8_lcase), lower(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [lower(collate(utf8_binary#x, utf8_lcase)) AS lower(collate(utf8_binary, utf8_lcase))#x, lower(collate(utf8_lcase#x, utf8_binary)) AS lower(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select initcap(utf8_binary), initcap(utf8_lcase) from t5
+-- !query analysis
+Project [initcap(utf8_binary#x) AS initcap(utf8_binary)#x, initcap(utf8_lcase#x) AS initcap(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select initcap(utf8_binary collate utf8_lcase), initcap(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [initcap(collate(utf8_binary#x, utf8_lcase)) AS initcap(collate(utf8_binary, utf8_lcase))#x, initcap(collate(utf8_lcase#x, utf8_binary)) AS initcap(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select overlay(utf8_binary, utf8_lcase, 2) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select overlay(s, utf8_binary,1) from t5
+-- !query analysis
+Project [overlay(s#x, utf8_binary#x, 1, -1) AS overlay(s, utf8_binary, 1, -1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select overlay(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select overlay(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [overlay(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), 2, -1) AS overlay(utf8_binary, collate(utf8_lcase, utf8_binary), 2, -1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select overlay(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query analysis
+Project [overlay(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), 2, -1) AS overlay(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2, -1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
+-- !query analysis
+Project [overlay(utf8_binary#x, a, 2, -1) AS overlay(utf8_binary, a, 2, -1)#x, overlay(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 2, -1) AS overlay(utf8_lcase, a, 2, -1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select overlay(utf8_binary, 'AaAA' collate utf8_lcase, 2), overlay(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query analysis
+Project [overlay(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase), 2, -1) AS overlay(utf8_binary, collate(AaAA, utf8_lcase), 2, -1)#x, overlay(cast(utf8_lcase#x as string), collate(AAa, utf8_binary), 2, -1) AS overlay(utf8_lcase, collate(AAa, utf8_binary), 2, -1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select format_string(format, utf8_binary, utf8_lcase) from t8
+-- !query analysis
+Project [format_string(format#x, utf8_binary#x, utf8_lcase#x) AS format_string(format, utf8_binary, utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t8
+   +- Relation spark_catalog.default.t8[format#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select format_string(format collate utf8_lcase, utf8_lcase, utf8_binary collate utf8_lcase, 3), format_string(format, utf8_lcase collate utf8_binary, utf8_binary) from t8
+-- !query analysis
+Project [format_string(collate(format#x, utf8_lcase), utf8_lcase#x, collate(utf8_binary#x, utf8_lcase), 3) AS format_string(collate(format, utf8_lcase), utf8_lcase, collate(utf8_binary, utf8_lcase), 3)#x, format_string(format#x, collate(utf8_lcase#x, utf8_binary), utf8_binary#x) AS format_string(format, collate(utf8_lcase, utf8_binary), utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t8
+   +- Relation spark_catalog.default.t8[format#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select format_string(format, utf8_binary, utf8_lcase) from t8
+-- !query analysis
+Project [format_string(format#x, utf8_binary#x, utf8_lcase#x) AS format_string(format, utf8_binary, utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t8
+   +- Relation spark_catalog.default.t8[format#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select soundex(utf8_binary), soundex(utf8_lcase) from t5
+-- !query analysis
+Project [soundex(utf8_binary#x) AS soundex(utf8_binary)#x, soundex(utf8_lcase#x) AS soundex(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select soundex(utf8_binary collate utf8_lcase), soundex(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [soundex(collate(utf8_binary#x, utf8_lcase)) AS soundex(collate(utf8_binary, utf8_lcase))#x, soundex(collate(utf8_lcase#x, utf8_binary)) AS soundex(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select length(utf8_binary), length(utf8_lcase) from t5
+-- !query analysis
+Project [length(utf8_binary#x) AS length(utf8_binary)#x, length(utf8_lcase#x) AS length(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select length(utf8_binary collate utf8_lcase), length(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [length(collate(utf8_binary#x, utf8_lcase)) AS length(collate(utf8_binary, utf8_lcase))#x, length(collate(utf8_lcase#x, utf8_binary)) AS length(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select bit_length(utf8_binary), bit_length(utf8_lcase) from t5
+-- !query analysis
+Project [bit_length(utf8_binary#x) AS bit_length(utf8_binary)#x, bit_length(utf8_lcase#x) AS bit_length(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select bit_length(utf8_binary collate utf8_lcase), bit_length(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [bit_length(collate(utf8_binary#x, utf8_lcase)) AS bit_length(collate(utf8_binary, utf8_lcase))#x, bit_length(collate(utf8_lcase#x, utf8_binary)) AS bit_length(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select octet_length(utf8_binary), octet_length(utf8_lcase) from t5
+-- !query analysis
+Project [octet_length(utf8_binary#x) AS octet_length(utf8_binary)#x, octet_length(utf8_lcase#x) AS octet_length(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select octet_length(utf8_binary collate utf8_lcase), octet_length(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [octet_length(collate(utf8_binary#x, utf8_lcase)) AS octet_length(collate(utf8_binary, utf8_lcase))#x, octet_length(collate(utf8_lcase#x, utf8_binary)) AS octet_length(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select luhn_check(num) from t9
+-- !query analysis
+Project [luhn_check(cast(num#xL as string)) AS luhn_check(num)#x]
++- SubqueryAlias spark_catalog.default.t9
+   +- Relation spark_catalog.default.t9[num#xL] parquet
+
+
+-- !query
+select levenshtein(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select levenshtein(s, utf8_binary) from t5
+-- !query analysis
+Project [levenshtein(s#x, utf8_binary#x, None) AS levenshtein(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select levenshtein(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select levenshtein(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [levenshtein(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), None) AS levenshtein(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select levenshtein(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [levenshtein(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), None) AS levenshtein(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select levenshtein(utf8_binary, 'a'), levenshtein(utf8_lcase, 'a') from t5
+-- !query analysis
+Project [levenshtein(utf8_binary#x, a, None) AS levenshtein(utf8_binary, a)#x, levenshtein(utf8_lcase#x, cast(a as string collate UTF8_LCASE), None) AS levenshtein(utf8_lcase, a)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select levenshtein(utf8_binary, 'AaAA' collate utf8_lcase, 3), levenshtein(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5
+-- !query analysis
+Project [levenshtein(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase), Some(3)) AS levenshtein(utf8_binary, collate(AaAA, utf8_lcase), 3)#x, levenshtein(cast(utf8_lcase#x as string), collate(AAa, utf8_binary), Some(4)) AS levenshtein(utf8_lcase, collate(AAa, utf8_binary), 4)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select is_valid_utf8(utf8_binary), is_valid_utf8(utf8_lcase) from t5
+-- !query analysis
+Project [is_valid_utf8(utf8_binary#x) AS is_valid_utf8(utf8_binary)#x, is_valid_utf8(utf8_lcase#x) AS is_valid_utf8(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select is_valid_utf8(utf8_binary collate utf8_lcase), is_valid_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [is_valid_utf8(collate(utf8_binary#x, utf8_lcase)) AS is_valid_utf8(collate(utf8_binary, utf8_lcase))#x, is_valid_utf8(collate(utf8_lcase#x, utf8_binary)) AS is_valid_utf8(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5
+-- !query analysis
+Project [make_valid_utf8(utf8_binary#x) AS make_valid_utf8(utf8_binary)#x, make_valid_utf8(utf8_lcase#x) AS make_valid_utf8(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select make_valid_utf8(utf8_binary collate utf8_lcase), make_valid_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [make_valid_utf8(collate(utf8_binary#x, utf8_lcase)) AS make_valid_utf8(collate(utf8_binary, utf8_lcase))#x, make_valid_utf8(collate(utf8_lcase#x, utf8_binary)) AS make_valid_utf8(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5
+-- !query analysis
+Project [validate_utf8(utf8_binary#x) AS validate_utf8(utf8_binary)#x, validate_utf8(utf8_lcase#x) AS validate_utf8(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select validate_utf8(utf8_binary collate utf8_lcase), validate_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [validate_utf8(collate(utf8_binary#x, utf8_lcase)) AS validate_utf8(collate(utf8_binary, utf8_lcase))#x, validate_utf8(collate(utf8_lcase#x, utf8_binary)) AS validate_utf8(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5
+-- !query analysis
+Project [try_validate_utf8(utf8_binary#x) AS try_validate_utf8(utf8_binary)#x, try_validate_utf8(utf8_lcase#x) AS try_validate_utf8(utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select try_validate_utf8(utf8_binary collate utf8_lcase), try_validate_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [try_validate_utf8(collate(utf8_binary#x, utf8_lcase)) AS try_validate_utf8(collate(utf8_binary, utf8_lcase))#x, try_validate_utf8(collate(utf8_lcase#x, utf8_binary)) AS try_validate_utf8(collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5
+-- !query analysis
+Project [substr(utf8_binary#x, 2, 2) AS substr(utf8_binary, 2, 2)#x, substr(utf8_lcase#x, 2, 2) AS substr(utf8_lcase, 2, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select substr(utf8_binary collate utf8_lcase, 2, 2), substr(utf8_lcase collate utf8_binary, 2, 2) from t5
+-- !query analysis
+Project [substr(collate(utf8_binary#x, utf8_lcase), 2, 2) AS substr(collate(utf8_binary, utf8_lcase), 2, 2)#x, substr(collate(utf8_lcase#x, utf8_binary), 2, 2) AS substr(collate(utf8_lcase, utf8_binary), 2, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select right(utf8_binary, 2), right(utf8_lcase, 2) from t5
+-- !query analysis
+Project [right(utf8_binary#x, 2) AS right(utf8_binary, 2)#x, right(utf8_lcase#x, 2) AS right(utf8_lcase, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select right(utf8_binary collate utf8_lcase, 2), right(utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [right(collate(utf8_binary#x, utf8_lcase), 2) AS right(collate(utf8_binary, utf8_lcase), 2)#x, right(collate(utf8_lcase#x, utf8_binary), 2) AS right(collate(utf8_lcase, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select left(utf8_binary, '2' collate utf8_lcase), left(utf8_lcase, 2) from t5
+-- !query analysis
+Project [left(utf8_binary#x, cast(collate(2, utf8_lcase) as int)) AS left(utf8_binary, collate(2, utf8_lcase))#x, left(utf8_lcase#x, 2) AS left(utf8_lcase, 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select left(utf8_binary collate utf8_lcase, 2), left(utf8_lcase collate utf8_binary, 2) from t5
+-- !query analysis
+Project [left(collate(utf8_binary#x, utf8_lcase), 2) AS left(collate(utf8_binary, utf8_lcase), 2)#x, left(collate(utf8_lcase#x, utf8_binary), 2) AS left(collate(utf8_lcase, utf8_binary), 2)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select rpad(utf8_binary, 8, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select rpad(s, 8, utf8_binary) from t5
+-- !query analysis
+Project [rpad(s#x, 8, utf8_binary#x) AS rpad(s, 8, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select rpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select rpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [rpad(utf8_binary#x, 8, collate(utf8_lcase#x, utf8_binary)) AS rpad(utf8_binary, 8, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select rpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [rpad(collate(utf8_binary#x, utf8_lcase), 8, collate(utf8_lcase#x, utf8_lcase)) AS rpad(collate(utf8_binary, utf8_lcase), 8, collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [lpad(collate(utf8_binary#x, utf8_binary_rtrim), 8, collate(utf8_lcase#x, utf8_binary_rtrim)) AS lpad(collate(utf8_binary, utf8_binary_rtrim), 8, collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select rpad(utf8_binary, 8, 'a'), rpad(utf8_lcase, 8, 'a') from t5
+-- !query analysis
+Project [rpad(utf8_binary#x, 8, a) AS rpad(utf8_binary, 8, a)#x, rpad(utf8_lcase#x, 8, cast(a as string collate UTF8_LCASE)) AS rpad(utf8_lcase, 8, a)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select rpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), rpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5
+-- !query analysis
+Project [rpad(cast(utf8_binary#x as string collate UTF8_LCASE), 8, collate(AaAA, utf8_lcase)) AS rpad(utf8_binary, 8, collate(AaAA, utf8_lcase))#x, rpad(cast(utf8_lcase#x as string), 8, collate(AAa, utf8_binary)) AS rpad(utf8_lcase, 8, collate(AAa, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary, 8, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select lpad(s, 8, utf8_binary) from t5
+-- !query analysis
+Project [lpad(s#x, 8, utf8_binary#x) AS lpad(s, 8, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select lpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [lpad(utf8_binary#x, 8, collate(utf8_lcase#x, utf8_binary)) AS lpad(utf8_binary, 8, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [lpad(collate(utf8_binary#x, utf8_lcase), 8, collate(utf8_lcase#x, utf8_lcase)) AS lpad(collate(utf8_binary, utf8_lcase), 8, collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [lpad(collate(utf8_binary#x, utf8_binary_rtrim), 8, collate(utf8_lcase#x, utf8_binary_rtrim)) AS lpad(collate(utf8_binary, utf8_binary_rtrim), 8, collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary, 8, 'a'), lpad(utf8_lcase, 8, 'a') from t5
+-- !query analysis
+Project [lpad(utf8_binary#x, 8, a) AS lpad(utf8_binary, 8, a)#x, lpad(utf8_lcase#x, 8, cast(a as string collate UTF8_LCASE)) AS lpad(utf8_lcase, 8, a)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select lpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), lpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5
+-- !query analysis
+Project [lpad(cast(utf8_binary#x as string collate UTF8_LCASE), 8, collate(AaAA, utf8_lcase)) AS lpad(utf8_binary, 8, collate(AaAA, utf8_lcase))#x, lpad(cast(utf8_lcase#x as string), 8, collate(AAa, utf8_binary)) AS lpad(utf8_lcase, 8, collate(AAa, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select locate(s, utf8_binary) from t5
+-- !query analysis
+Project [locate(s#x, utf8_binary#x, 1) AS locate(s, utf8_binary, 1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select locate(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [locate(utf8_binary#x, collate(utf8_lcase#x, utf8_binary), 1) AS locate(utf8_binary, collate(utf8_lcase, utf8_binary), 1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 3) from t5
+-- !query analysis
+Project [locate(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase), 3) AS locate(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 3)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"locate(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 3)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3)"
+  } ]
+}
+
+
+-- !query
+select locate(utf8_binary, 'a'), locate(utf8_lcase, 'a') from t5
+-- !query analysis
+Project [locate(utf8_binary#x, a, 1) AS locate(utf8_binary, a, 1)#x, locate(utf8_lcase#x, cast(a as string collate UTF8_LCASE), 1) AS locate(utf8_lcase, a, 1)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary, 'AaAA' collate utf8_lcase, 4), locate(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5
+-- !query analysis
+Project [locate(cast(utf8_binary#x as string collate UTF8_LCASE), collate(AaAA, utf8_lcase), 4) AS locate(utf8_binary, collate(AaAA, utf8_lcase), 4)#x, locate(cast(utf8_lcase#x as string), collate(AAa, utf8_binary), 4) AS locate(utf8_lcase, collate(AAa, utf8_binary), 4)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select locate(utf8_binary, 'AaAA ' collate utf8_binary_rtrim, 4), locate(utf8_lcase, 'AAa ' collate utf8_binary, 4) from t5
+-- !query analysis
+Project [locate(cast(utf8_binary#x as string collate UTF8_BINARY_RTRIM), collate(AaAA , utf8_binary_rtrim), 4) AS locate(utf8_binary, collate(AaAA , utf8_binary_rtrim), 4)#x, locate(cast(utf8_lcase#x as string), collate(AAa , utf8_binary), 4) AS locate(utf8_lcase, collate(AAa , utf8_binary), 4)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select TRIM(s, utf8_binary) from t5
+-- !query analysis
+Project [trim(utf8_binary#x, Some(s#x)) AS TRIM(BOTH s FROM utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select TRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [trim(collate(utf8_lcase#x, utf8_binary), Some(utf8_binary#x)) AS TRIM(BOTH utf8_binary FROM collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [trim(collate(utf8_lcase#x, utf8_lcase), Some(collate(utf8_binary#x, utf8_lcase))) AS TRIM(BOTH collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(BOTH collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [trim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binary#x, utf8_binary_rtrim))) AS TRIM(BOTH collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM('ABc', utf8_binary), TRIM('ABc', utf8_lcase) from t5
+-- !query analysis
+Project [trim(utf8_binary#x, Some(ABc)) AS TRIM(BOTH ABc FROM utf8_binary)#x, trim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(BOTH ABc FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select TRIM('ABc' collate utf8_lcase, utf8_binary), TRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query analysis
+Project [trim(cast(utf8_binary#x as string collate UTF8_LCASE), Some(collate(ABc, utf8_lcase))) AS TRIM(BOTH collate(ABc, utf8_lcase) FROM utf8_binary)#x, trim(cast(utf8_lcase#x as string), Some(collate(AAa, utf8_binary))) AS TRIM(BOTH collate(AAa, utf8_binary) FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select BTRIM(s, utf8_binary) from t5
+-- !query analysis
+Project [btrim(s#x, utf8_binary#x) AS btrim(s, utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select BTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [btrim(utf8_binary#x, collate(utf8_lcase#x, utf8_binary)) AS btrim(utf8_binary, collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [btrim(collate(utf8_binary#x, utf8_lcase), collate(utf8_lcase#x, utf8_lcase)) AS btrim(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(BOTH collate(utf8_lcase, unicode_ai) FROM collate(utf8_binary, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [btrim(collate(utf8_binary#x, utf8_binary_rtrim), collate(utf8_lcase#x, utf8_binary_rtrim)) AS btrim(collate(utf8_binary, utf8_binary_rtrim), collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM('ABc', utf8_binary), BTRIM('ABc', utf8_lcase) from t5
+-- !query analysis
+Project [btrim(ABc, utf8_binary#x) AS btrim(ABc, utf8_binary)#x, btrim(ABc, utf8_lcase#x) AS btrim(ABc, utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select BTRIM('ABc' collate utf8_lcase, utf8_binary), BTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query analysis
+Project [btrim(collate(ABc, utf8_lcase), utf8_binary#x) AS btrim(collate(ABc, utf8_lcase), utf8_binary)#x, btrim(collate(AAa, utf8_binary), utf8_lcase#x) AS btrim(collate(AAa, utf8_binary), utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select LTRIM(s, utf8_binary) from t5
+-- !query analysis
+Project [ltrim(utf8_binary#x, Some(s#x)) AS TRIM(LEADING s FROM utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select LTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [ltrim(collate(utf8_lcase#x, utf8_binary), Some(utf8_binary#x)) AS TRIM(LEADING utf8_binary FROM collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [ltrim(collate(utf8_lcase#x, utf8_lcase), Some(collate(utf8_binary#x, utf8_lcase))) AS TRIM(LEADING collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(LEADING collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [ltrim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binary#x, utf8_binary_rtrim))) AS TRIM(LEADING collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM('ABc', utf8_binary), LTRIM('ABc', utf8_lcase) from t5
+-- !query analysis
+Project [ltrim(utf8_binary#x, Some(ABc)) AS TRIM(LEADING ABc FROM utf8_binary)#x, ltrim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(LEADING ABc FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select LTRIM('ABc' collate utf8_lcase, utf8_binary), LTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query analysis
+Project [ltrim(cast(utf8_binary#x as string collate UTF8_LCASE), Some(collate(ABc, utf8_lcase))) AS TRIM(LEADING collate(ABc, utf8_lcase) FROM utf8_binary)#x, ltrim(cast(utf8_lcase#x as string), Some(collate(AAa, utf8_binary))) AS TRIM(LEADING collate(AAa, utf8_binary) FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM(utf8_binary, utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select RTRIM(s, utf8_binary) from t5
+-- !query analysis
+Project [rtrim(utf8_binary#x, Some(s#x)) AS TRIM(TRAILING s FROM utf8_binary)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select RTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query analysis
+Project [rtrim(collate(utf8_lcase#x, utf8_binary), Some(utf8_binary#x)) AS TRIM(TRAILING utf8_binary FROM collate(utf8_lcase, utf8_binary))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query analysis
+Project [rtrim(collate(utf8_lcase#x, utf8_lcase), Some(collate(utf8_binary#x, utf8_lcase))) AS TRIM(TRAILING collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(TRAILING collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query analysis
+Project [rtrim(collate(utf8_lcase#x, utf8_binary_rtrim), Some(collate(utf8_binary#x, utf8_binary_rtrim))) AS TRIM(TRAILING collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim))#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5
+-- !query analysis
+Project [rtrim(utf8_binary#x, Some(ABc)) AS TRIM(TRAILING ABc FROM utf8_binary)#x, rtrim(utf8_lcase#x, Some(cast(ABc as string collate UTF8_LCASE))) AS TRIM(TRAILING ABc FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+select RTRIM('ABc' collate utf8_lcase, utf8_binary), RTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query analysis
+Project [rtrim(cast(utf8_binary#x as string collate UTF8_LCASE), Some(collate(ABc, utf8_lcase))) AS TRIM(TRAILING collate(ABc, utf8_lcase) FROM utf8_binary)#x, rtrim(cast(utf8_lcase#x as string), Some(collate(AAa, utf8_binary))) AS TRIM(TRAILING collate(AAa, utf8_binary) FROM utf8_lcase)#x]
++- SubqueryAlias spark_catalog.default.t5
+   +- Relation spark_catalog.default.t5[s#x,utf8_binary#x,utf8_lcase#x] parquet
+
+
+-- !query
+drop table t5
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t5
+
+
+-- !query
+drop table t6
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t6
+
+
+-- !query
+drop table t7
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t7
+
+
+-- !query
+drop table t8
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t8
+
+
+-- !query
+drop table t9
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t9
+
+
+-- !query
+drop table t10
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t10
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/comparator.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/comparator.sql.out
index 022c260ac6f60..cf17e20fc76df 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/comparator.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/comparator.sql.out
@@ -16,28 +16,28 @@ Project [(0x00 < 0xFF) AS (X'00' < X'FF')#x]
 -- !query
 select '1 ' = 1Y
 -- !query analysis
-Project [(cast(1  as tinyint) = 1) AS (1  = 1)#x]
+Project [(cast(1  as bigint) = cast(1 as bigint)) AS (1  = 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select '\t1 ' = 1Y
 -- !query analysis
-Project [(cast(	1  as tinyint) = 1) AS (	1  = 1)#x]
+Project [(cast(	1  as bigint) = cast(1 as bigint)) AS (	1  = 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select '1 ' = 1S
 -- !query analysis
-Project [(cast(1  as smallint) = 1) AS (1  = 1)#x]
+Project [(cast(1  as bigint) = cast(1 as bigint)) AS (1  = 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select '1 ' = 1
 -- !query analysis
-Project [(cast(1  as int) = 1) AS (1  = 1)#x]
+Project [(cast(1  as bigint) = cast(1 as bigint)) AS (1  = 1)#x]
 +- OneRowRelation
 
 
@@ -51,7 +51,7 @@ Project [(cast( 1 as bigint) = 1) AS ( 1 = 1)#x]
 -- !query
 select ' 1' = cast(1.0 as float)
 -- !query analysis
-Project [(cast( 1 as float) = cast(1.0 as float)) AS ( 1 = CAST(1.0 AS FLOAT))#x]
+Project [(cast( 1 as double) = cast(cast(1.0 as float) as double)) AS ( 1 = CAST(1.0 AS FLOAT))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/conditional-functions.sql.out
new file mode 100644
index 0000000000000..5effa73c413a6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/conditional-functions.sql.out
@@ -0,0 +1,142 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2)
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`conditional_t`, ErrorIfExists, [c1, c2]
+   +- Project [c1#x, c2#x]
+      +- SubqueryAlias t
+         +- LocalRelation [c1#x, c2#x]
+
+
+-- !query
+SELECT nanvl(c2, c1/c2 + c1/c2) FROM conditional_t
+-- !query analysis
+Project [nanvl(cast(c2#x as double), ((c1#x / cast(c2#x as double)) + (c1#x / cast(c2#x as double)))) AS nanvl(c2, ((c1 / c2) + (c1 / c2)))#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT nanvl(c2, 1/0) FROM conditional_t
+-- !query analysis
+Project [nanvl(cast(c2#x as double), (cast(1 as double) / cast(0 as double))) AS nanvl(c2, (1 / 0))#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT nanvl(1-0, 1/0) FROM conditional_t
+-- !query analysis
+Project [nanvl(cast((1 - 0) as double), (cast(1 as double) / cast(0 as double))) AS nanvl((1 - 0), (1 / 0))#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT if(c2 >= 0, 1-0, 1/0) from conditional_t
+-- !query analysis
+Project [if ((c2#x >= 0)) cast((1 - 0) as double) else (cast(1 as double) / cast(0 as double)) AS (IF((c2 >= 0), (1 - 0), (1 / 0)))#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT if(1 == 1, 1, 1/0)
+-- !query analysis
+Project [if ((1 = 1)) cast(1 as double) else (cast(1 as double) / cast(0 as double)) AS (IF((1 = 1), 1, (1 / 0)))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT if(1 != 1, 1/0, 1)
+-- !query analysis
+Project [if (NOT (1 = 1)) (cast(1 as double) / cast(0 as double)) else cast(1 as double) AS (IF((NOT (1 = 1)), (1 / 0), 1))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT coalesce(c2, 1/0) from conditional_t
+-- !query analysis
+Project [coalesce(cast(c2#x as double), (cast(1 as double) / cast(0 as double))) AS coalesce(c2, (1 / 0))#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT coalesce(1, 1/0)
+-- !query analysis
+Project [coalesce(cast(1 as double), (cast(1 as double) / cast(0 as double))) AS coalesce(1, (1 / 0))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT coalesce(null, 1, 1/0)
+-- !query analysis
+Project [coalesce(cast(null as double), cast(1 as double), (cast(1 as double) / cast(0 as double))) AS coalesce(NULL, 1, (1 / 0))#x]
++- OneRowRelation
+
+
+-- !query
+SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t
+-- !query analysis
+Project [CASE WHEN (c2#x >= 0) THEN cast(1 as double) ELSE (cast(1 as double) / cast(0 as double)) END AS CASE WHEN (c2 >= 0) THEN 1 ELSE (1 / 0) END#x]
++- SubqueryAlias spark_catalog.default.conditional_t
+   +- Relation spark_catalog.default.conditional_t[c1#x,c2#x] parquet
+
+
+-- !query
+SELECT case when 1 < 2 then 1 else 1/0 end
+-- !query analysis
+Project [CASE WHEN (1 < 2) THEN cast(1 as double) ELSE (cast(1 as double) / cast(0 as double)) END AS CASE WHEN (1 < 2) THEN 1 ELSE (1 / 0) END#x]
++- OneRowRelation
+
+
+-- !query
+SELECT case when 1 > 2 then 1/0 else 1 end
+-- !query analysis
+Project [CASE WHEN (1 > 2) THEN (cast(1 as double) / cast(0 as double)) ELSE cast(1 as double) END AS CASE WHEN (1 > 2) THEN (1 / 0) ELSE 1 END#x]
++- OneRowRelation
+
+
+-- !query
+SELECT nullifzero(0),
+  nullifzero(cast(0 as tinyint)),
+  nullifzero(cast(0 as bigint)),
+  nullifzero('0'),
+  nullifzero(0.0),
+  nullifzero(1),
+  nullifzero(null)
+-- !query analysis
+Project [nullifzero(0) AS nullifzero(0)#x, nullifzero(cast(0 as tinyint)) AS nullifzero(CAST(0 AS TINYINT))#x, nullifzero(cast(0 as bigint)) AS nullifzero(CAST(0 AS BIGINT))#xL, nullifzero(0) AS nullifzero(0)#x, nullifzero(0.0) AS nullifzero(0.0)#x, nullifzero(1) AS nullifzero(1)#x, nullifzero(null) AS nullifzero(NULL)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT nullifzero('abc')
+-- !query analysis
+Project [nullifzero(abc) AS nullifzero(abc)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT zeroifnull(null),
+  zeroifnull(1),
+  zeroifnull(cast(1 as tinyint)),
+  zeroifnull(cast(1 as bigint))
+-- !query analysis
+Project [zeroifnull(null) AS zeroifnull(NULL)#x, zeroifnull(1) AS zeroifnull(1)#x, zeroifnull(cast(1 as tinyint)) AS zeroifnull(CAST(1 AS TINYINT))#x, zeroifnull(cast(1 as bigint)) AS zeroifnull(CAST(1 AS BIGINT))#xL]
++- OneRowRelation
+
+
+-- !query
+SELECT zeroifnull('abc')
+-- !query analysis
+Project [zeroifnull(abc) AS zeroifnull(abc)#xL]
++- OneRowRelation
+
+
+-- !query
+DROP TABLE conditional_t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.conditional_t
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
index 4149f5f09947c..691864ef8b1cb 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/csv-functions.sql.out
@@ -217,7 +217,7 @@ Project [to_csv(named_struct(a, 1, b, 2), Some(America/Los_Angeles)) AS to_csv(n
 -- !query
 select to_csv(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'))
 -- !query analysis
-Project [to_csv((timestampFormat,dd/MM/yyyy), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false)), Some(America/Los_Angeles)) AS to_csv(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
+Project [to_csv((timestampFormat,dd/MM/yyyy), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true)), Some(America/Los_Angeles)) AS to_csv(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
index 155308ee0d7ea..9b86c4df62c4d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/cte.sql.out
@@ -233,9 +233,10 @@ SELECT * FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0038",
+  "errorClass" : "DUPLICATED_CTE_NAMES",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "duplicateNames" : "'t'"
+    "duplicateNames" : "`t`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -688,9 +689,10 @@ with cte1 as (select 42), cte1 as (select 42) select * FROM cte1
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0038",
+  "errorClass" : "DUPLICATED_CTE_NAMES",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "duplicateNames" : "'cte1'"
+    "duplicateNames" : "`cte1`"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
index 48137e06467e8..0e4d2d4e99e26 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/date.sql.out
@@ -37,21 +37,21 @@ org.apache.spark.sql.catalyst.parser.ParseException
 -- !query
 select make_date(2019, 1, 1), make_date(12, 12, 12)
 -- !query analysis
-Project [make_date(2019, 1, 1, false) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, false) AS make_date(12, 12, 12)#x]
+Project [make_date(2019, 1, 1, true) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, true) AS make_date(12, 12, 12)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 13, 1)
 -- !query analysis
-Project [make_date(2000, 13, 1, false) AS make_date(2000, 13, 1)#x]
+Project [make_date(2000, 13, 1, true) AS make_date(2000, 13, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query analysis
-Project [make_date(2000, 1, 33, false) AS make_date(2000, 1, 33)#x]
+Project [make_date(2000, 1, 33, true) AS make_date(2000, 1, 33)#x]
 +- OneRowRelation
 
 
@@ -148,21 +148,21 @@ select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(n
 -- !query
 select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_date(cast(null as string), None, Some(America/Los_Angeles), false) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), false) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), false) AS to_date(2016-12-31, yyyy-MM-dd)#x]
+Project [to_date(cast(null as string), None, Some(America/Los_Angeles), true) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), true) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), true) AS to_date(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("16", "dd")
 -- !query analysis
-Project [to_date(16, Some(dd), Some(America/Los_Angeles), false) AS to_date(16, dd)#x]
+Project [to_date(16, Some(dd), Some(America/Los_Angeles), true) AS to_date(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query analysis
-Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), false) AS to_date(02-29, MM-dd)#x]
+Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), true) AS to_date(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
@@ -201,21 +201,21 @@ select dayOfYear('1500-01-01'), dayOfYear('1582-10-15 13:10:15'), dayOfYear(time
 -- !query
 select next_day("2015-07-23", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), Mon, false) AS next_day(2015-07-23, Mon)#x]
+Project [next_day(cast(2015-07-23 as date), Mon, true) AS next_day(2015-07-23, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), xx, false) AS next_day(2015-07-23, xx)#x]
+Project [next_day(cast(2015-07-23 as date), xx, true) AS next_day(2015-07-23, xx)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(2015-07-23 12:12:12, Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(2015-07-23 12:12:12, Mon)#x]
 +- OneRowRelation
 
 
@@ -228,28 +228,28 @@ select next_day(timestamp_ltz"2015-07-23 12:12:12", "Mon")
 -- !query
 select next_day(timestamp_ntz"2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("xx", "Mon")
 -- !query analysis
-Project [next_day(cast(xx as date), Mon, false) AS next_day(xx, Mon)#x]
+Project [next_day(cast(xx as date), Mon, true) AS next_day(xx, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "Mon")
 -- !query analysis
-Project [next_day(cast(null as date), Mon, false) AS next_day(NULL, Mon)#x]
+Project [next_day(cast(null as date), Mon, true) AS next_day(NULL, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "xx")
 -- !query analysis
-Project [next_day(cast(null as date), xx, false) AS next_day(NULL, xx)#x]
+Project [next_day(cast(null as date), xx, true) AS next_day(NULL, xx)#x]
 +- OneRowRelation
 
 
@@ -355,21 +355,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select date_add('2011-11-11', '1')
 -- !query analysis
-Project [date_add(cast(2011-11-11 as date), 1) AS date_add(2011-11-11, 1)#x]
+Project [date_add(cast(2011-11-11 as date), cast(1 as int)) AS date_add(2011-11-11, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select date_add('2011-11-11', '1.2')
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
-  "messageParameters" : {
-    "functionName" : "date_add"
-  }
-}
+Project [date_add(cast(2011-11-11 as date), cast(1.2 as int)) AS date_add(2011-11-11, 1.2)#x]
++- OneRowRelation
 
 
 -- !query
@@ -505,14 +499,7 @@ select date_sub(date'2011-11-11', '1')
 -- !query
 select date_sub(date'2011-11-11', '1.2')
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
-  "messageParameters" : {
-    "functionName" : "date_sub"
-  }
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -543,49 +530,23 @@ Project [date_sub(cast(2011-11-11 12:12:12 as date), 1) AS date_sub(TIMESTAMP_NT
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_add('2011-11-11', int_str)"
-  } ]
-}
+Project [date_add(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_add(2011-11-11, int_str)#x]
++- SubqueryAlias date_view
+   +- View (`date_view`, [date_str#x, int_str#x])
+      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
+         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
+            +- OneRowRelation
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_sub('2011-11-11', int_str)"
-  } ]
-}
+Project [date_sub(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_sub(2011-11-11, int_str)#x]
++- SubqueryAlias date_view
+   +- View (`date_view`, [date_str#x, int_str#x])
+      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
+         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -661,25 +622,7 @@ select date '2001-10-01' - date '2001-09-28'
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2001-09-28\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "date '2001-10-01' - '2001-09-28'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -709,25 +652,7 @@ select date_str - date '2001-09-28' from date_view
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"date_str\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "date '2001-09-28' - date_str"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -739,7 +664,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -762,11 +687,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"DATE '2011-11-11'\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -804,14 +729,14 @@ select date '2012-01-01' - interval '2-2' year to month,
 -- !query
 select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 -- !query analysis
-Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), false) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
+Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), true) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"d":"26/October/2015"})#x]
+Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"d":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
index 1e49f4df8267a..3681a5dfd3904 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-legacy.sql.out
@@ -37,21 +37,21 @@ org.apache.spark.sql.catalyst.parser.ParseException
 -- !query
 select make_date(2019, 1, 1), make_date(12, 12, 12)
 -- !query analysis
-Project [make_date(2019, 1, 1, false) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, false) AS make_date(12, 12, 12)#x]
+Project [make_date(2019, 1, 1, true) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, true) AS make_date(12, 12, 12)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 13, 1)
 -- !query analysis
-Project [make_date(2000, 13, 1, false) AS make_date(2000, 13, 1)#x]
+Project [make_date(2000, 13, 1, true) AS make_date(2000, 13, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query analysis
-Project [make_date(2000, 1, 33, false) AS make_date(2000, 1, 33)#x]
+Project [make_date(2000, 1, 33, true) AS make_date(2000, 1, 33)#x]
 +- OneRowRelation
 
 
@@ -148,21 +148,21 @@ select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(n
 -- !query
 select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_date(cast(null as string), None, Some(America/Los_Angeles), false) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), false) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), false) AS to_date(2016-12-31, yyyy-MM-dd)#x]
+Project [to_date(cast(null as string), None, Some(America/Los_Angeles), true) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), true) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), true) AS to_date(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("16", "dd")
 -- !query analysis
-Project [to_date(16, Some(dd), Some(America/Los_Angeles), false) AS to_date(16, dd)#x]
+Project [to_date(16, Some(dd), Some(America/Los_Angeles), true) AS to_date(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query analysis
-Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), false) AS to_date(02-29, MM-dd)#x]
+Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), true) AS to_date(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
@@ -201,21 +201,21 @@ select dayOfYear('1500-01-01'), dayOfYear('1582-10-15 13:10:15'), dayOfYear(time
 -- !query
 select next_day("2015-07-23", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), Mon, false) AS next_day(2015-07-23, Mon)#x]
+Project [next_day(cast(2015-07-23 as date), Mon, true) AS next_day(2015-07-23, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), xx, false) AS next_day(2015-07-23, xx)#x]
+Project [next_day(cast(2015-07-23 as date), xx, true) AS next_day(2015-07-23, xx)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(2015-07-23 12:12:12, Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(2015-07-23 12:12:12, Mon)#x]
 +- OneRowRelation
 
 
@@ -228,28 +228,28 @@ select next_day(timestamp_ltz"2015-07-23 12:12:12", "Mon")
 -- !query
 select next_day(timestamp_ntz"2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("xx", "Mon")
 -- !query analysis
-Project [next_day(cast(xx as date), Mon, false) AS next_day(xx, Mon)#x]
+Project [next_day(cast(xx as date), Mon, true) AS next_day(xx, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "Mon")
 -- !query analysis
-Project [next_day(cast(null as date), Mon, false) AS next_day(NULL, Mon)#x]
+Project [next_day(cast(null as date), Mon, true) AS next_day(NULL, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "xx")
 -- !query analysis
-Project [next_day(cast(null as date), xx, false) AS next_day(NULL, xx)#x]
+Project [next_day(cast(null as date), xx, true) AS next_day(NULL, xx)#x]
 +- OneRowRelation
 
 
@@ -355,21 +355,15 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select date_add('2011-11-11', '1')
 -- !query analysis
-Project [date_add(cast(2011-11-11 as date), 1) AS date_add(2011-11-11, 1)#x]
+Project [date_add(cast(2011-11-11 as date), cast(1 as int)) AS date_add(2011-11-11, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select date_add('2011-11-11', '1.2')
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
-  "messageParameters" : {
-    "functionName" : "date_add"
-  }
-}
+Project [date_add(cast(2011-11-11 as date), cast(1.2 as int)) AS date_add(2011-11-11, 1.2)#x]
++- OneRowRelation
 
 
 -- !query
@@ -505,14 +499,7 @@ select date_sub(date'2011-11-11', '1')
 -- !query
 select date_sub(date'2011-11-11', '1.2')
 -- !query analysis
-org.apache.spark.sql.AnalysisException
-{
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
-  "messageParameters" : {
-    "functionName" : "date_sub"
-  }
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -543,49 +530,23 @@ Project [date_sub(cast(2011-11-11 12:12:12 as date), 1) AS date_sub(TIMESTAMP_NT
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_add('2011-11-11', int_str)"
-  } ]
-}
+Project [date_add(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_add(2011-11-11, int_str)#x]
++- SubqueryAlias date_view
+   +- View (`date_view`, [date_str#x, int_str#x])
+      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
+         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
+            +- OneRowRelation
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_sub('2011-11-11', int_str)"
-  } ]
-}
+Project [date_sub(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_sub(2011-11-11, int_str)#x]
++- SubqueryAlias date_view
+   +- View (`date_view`, [date_str#x, int_str#x])
+      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
+         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -661,25 +622,7 @@ select date '2001-10-01' - date '2001-09-28'
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2001-09-28\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "date '2001-10-01' - '2001-09-28'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -709,25 +652,7 @@ select date_str - date '2001-09-28' from date_view
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"date_str\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "date '2001-09-28' - date_str"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -739,7 +664,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -762,11 +687,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"DATE '2011-11-11'\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -804,14 +729,14 @@ select date '2012-01-01' - interval '2-2' year to month,
 -- !query
 select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 -- !query analysis
-Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), false) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
+Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), true) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"d":"26/October/2015"})#x]
+Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"d":"26/October/2015"})#x]
 +- OneRowRelation
 
 
@@ -1121,70 +1046,70 @@ Project [(localtimestamp(Some(America/Los_Angeles)) = localtimestamp(Some(Americ
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 1)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 60)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, null)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 59.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
 +- OneRowRelation
 
 
@@ -1333,231 +1258,231 @@ select UNIX_MICROS(timestamp'2020-12-01 14:30:08Z'), UNIX_MICROS(timestamp'2020-
 -- !query
 select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
+Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp(1)
 -- !query analysis
-Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1)#x]
+Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.[SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S10:11:12.12345", "yyyy-MM-dd'S'HH:mm:ss.SSSSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
+Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.12342019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'")
 -- !query analysis
-Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
+Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd")
 -- !query analysis
-Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
+Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
+Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
+Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("16", "dd")
 -- !query analysis
-Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(16, dd)#x]
+Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query analysis
-Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(02-29, MM-dd)#x]
+Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 40", "yyyy mm")
 -- !query analysis
-Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 40, yyyy mm)#x]
+Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 40, yyyy mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss")
 -- !query analysis
-Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
+Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
 +- OneRowRelation
 
 
@@ -1582,49 +1507,13 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:10\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:11\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -1650,49 +1539,13 @@ CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, Loc
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -1700,11 +1553,11 @@ select timestamp'2011-11-11 11:11:11' + '1'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -1722,11 +1575,11 @@ select '1' + timestamp'2011-11-11 11:11:11'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
@@ -1805,35 +1658,35 @@ select date '2012-01-01' - interval 3 hours,
 -- !query
 select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query analysis
-Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
+Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), false) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
+Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), true) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"t":"26/October/2015"})#x]
+Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"t":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-invalid.sql.out
index ad4a2feb9661c..74146ab17a4d4 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-invalid.sql.out
@@ -2,126 +2,126 @@
 -- !query
 select to_timestamp('294248', 'y')
 -- !query analysis
-Project [to_timestamp(294248, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(294248, y)#x]
+Project [to_timestamp(294248, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(294248, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1', 'yy')
 -- !query analysis
-Project [to_timestamp(1, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, yy)#x]
+Project [to_timestamp(1, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('-12', 'yy')
 -- !query analysis
-Project [to_timestamp(-12, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(-12, yy)#x]
+Project [to_timestamp(-12, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(-12, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('123', 'yy')
 -- !query analysis
-Project [to_timestamp(123, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(123, yy)#x]
+Project [to_timestamp(123, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(123, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1', 'yyy')
 -- !query analysis
-Project [to_timestamp(1, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, yyy)#x]
+Project [to_timestamp(1, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1234567', 'yyyyyyy')
 -- !query analysis
-Project [to_timestamp(1234567, Some(yyyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1234567, yyyyyyy)#x]
+Project [to_timestamp(1234567, Some(yyyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1234567, yyyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('366', 'D')
 -- !query analysis
-Project [to_timestamp(366, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(366, D)#x]
+Project [to_timestamp(366, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(366, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'DD')
 -- !query analysis
-Project [to_timestamp(9, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, DD)#x]
+Project [to_timestamp(9, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('366', 'DD')
 -- !query analysis
-Project [to_timestamp(366, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(366, DD)#x]
+Project [to_timestamp(366, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(366, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'DDD')
 -- !query analysis
-Project [to_timestamp(9, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, DDD)#x]
+Project [to_timestamp(9, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'DDD')
 -- !query analysis
-Project [to_timestamp(99, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, DDD)#x]
+Project [to_timestamp(99, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('30-365', 'dd-DDD')
 -- !query analysis
-Project [to_timestamp(30-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(30-365, dd-DDD)#x]
+Project [to_timestamp(30-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(30-365, dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('11-365', 'MM-DDD')
 -- !query analysis
-Project [to_timestamp(11-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(11-365, MM-DDD)#x]
+Project [to_timestamp(11-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(11-365, MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-366', 'yyyy-DDD')
 -- !query analysis
-Project [to_timestamp(2019-366, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-366, yyyy-DDD)#x]
+Project [to_timestamp(2019-366, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-366, yyyy-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-30-365', 'MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(12-30-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-30-365, MM-dd-DDD)#x]
+Project [to_timestamp(12-30-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-30-365, MM-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-01-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-01-365, yyyy-dd-DDD)#x]
+Project [to_timestamp(2020-01-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-01-365, yyyy-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
 -- !query analysis
-Project [to_timestamp(2020-10-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-10-350, yyyy-MM-DDD)#x]
+Project [to_timestamp(2020-10-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-10-350, yyyy-MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-11-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD)#x]
+Project [to_timestamp(2020-11-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD)#x]
 +- OneRowRelation
 
 
@@ -135,56 +135,56 @@ Project [from_csv(StructField(date,DateType,true), (dateFormat,yyyy-DDD), 2018-3
 -- !query
 select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_date(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), false) AS to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_date(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), true) AS to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_date(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), false) AS to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_date(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), true) AS to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_timestamp(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_timestamp(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_timestamp(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_timestamp(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-legacy.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-legacy.sql.out
index 7325f2756949e..7907279fb0204 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing-legacy.sql.out
@@ -2,180 +2,180 @@
 -- !query
 select to_timestamp('1', 'y')
 -- !query analysis
-Project [to_timestamp(1, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, y)#x]
+Project [to_timestamp(1, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'y')
 -- !query analysis
-Project [to_timestamp(009999, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, y)#x]
+Project [to_timestamp(009999, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('00', 'yy')
 -- !query analysis
-Project [to_timestamp(00, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(00, yy)#x]
+Project [to_timestamp(00, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(00, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'yy')
 -- !query analysis
-Project [to_timestamp(99, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, yy)#x]
+Project [to_timestamp(99, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('001', 'yyy')
 -- !query analysis
-Project [to_timestamp(001, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(001, yyy)#x]
+Project [to_timestamp(001, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(001, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'yyy')
 -- !query analysis
-Project [to_timestamp(009999, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, yyy)#x]
+Project [to_timestamp(009999, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('0001', 'yyyy')
 -- !query analysis
-Project [to_timestamp(0001, Some(yyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(0001, yyyy)#x]
+Project [to_timestamp(0001, Some(yyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(0001, yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9999', 'yyyy')
 -- !query analysis
-Project [to_timestamp(9999, Some(yyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9999, yyyy)#x]
+Project [to_timestamp(9999, Some(yyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9999, yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('00001', 'yyyyy')
 -- !query analysis
-Project [to_timestamp(00001, Some(yyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(00001, yyyyy)#x]
+Project [to_timestamp(00001, Some(yyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(00001, yyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('09999', 'yyyyy')
 -- !query analysis
-Project [to_timestamp(09999, Some(yyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(09999, yyyyy)#x]
+Project [to_timestamp(09999, Some(yyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(09999, yyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('000001', 'yyyyyy')
 -- !query analysis
-Project [to_timestamp(000001, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(000001, yyyyyy)#x]
+Project [to_timestamp(000001, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(000001, yyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'yyyyyy')
 -- !query analysis
-Project [to_timestamp(009999, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, yyyyyy)#x]
+Project [to_timestamp(009999, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, yyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'D')
 -- !query analysis
-Project [to_timestamp(9, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, D)#x]
+Project [to_timestamp(9, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('300', 'D')
 -- !query analysis
-Project [to_timestamp(300, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(300, D)#x]
+Project [to_timestamp(300, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(300, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('09', 'DD')
 -- !query analysis
-Project [to_timestamp(09, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(09, DD)#x]
+Project [to_timestamp(09, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(09, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'DD')
 -- !query analysis
-Project [to_timestamp(99, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, DD)#x]
+Project [to_timestamp(99, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('100', 'DD')
 -- !query analysis
-Project [to_timestamp(100, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(100, DD)#x]
+Project [to_timestamp(100, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(100, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009', 'DDD')
 -- !query analysis
-Project [to_timestamp(009, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009, DDD)#x]
+Project [to_timestamp(009, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('365', 'DDD')
 -- !query analysis
-Project [to_timestamp(365, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(365, DDD)#x]
+Project [to_timestamp(365, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(365, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('31-365', 'dd-DDD')
 -- !query analysis
-Project [to_timestamp(31-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(31-365, dd-DDD)#x]
+Project [to_timestamp(31-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(31-365, dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-365', 'MM-DDD')
 -- !query analysis
-Project [to_timestamp(12-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-365, MM-DDD)#x]
+Project [to_timestamp(12-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-365, MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-365', 'yyyy-DDD')
 -- !query analysis
-Project [to_timestamp(2020-365, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-365, yyyy-DDD)#x]
+Project [to_timestamp(2020-365, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-365, yyyy-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-31-365', 'MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(12-31-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-31-365, MM-dd-DDD)#x]
+Project [to_timestamp(12-31-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-31-365, MM-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-30-365', 'yyyy-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-30-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-30-365, yyyy-dd-DDD)#x]
+Project [to_timestamp(2020-30-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-30-365, yyyy-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-12-350', 'yyyy-MM-DDD')
 -- !query analysis
-Project [to_timestamp(2020-12-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-12-350, yyyy-MM-DDD)#x]
+Project [to_timestamp(2020-12-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-12-350, yyyy-MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-12-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-12-31-366, yyyy-MM-dd-DDD)#x]
+Project [to_timestamp(2020-12-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-12-31-366, yyyy-MM-dd-DDD)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing.sql.out
index 7325f2756949e..7907279fb0204 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-parsing.sql.out
@@ -2,180 +2,180 @@
 -- !query
 select to_timestamp('1', 'y')
 -- !query analysis
-Project [to_timestamp(1, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, y)#x]
+Project [to_timestamp(1, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'y')
 -- !query analysis
-Project [to_timestamp(009999, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, y)#x]
+Project [to_timestamp(009999, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('00', 'yy')
 -- !query analysis
-Project [to_timestamp(00, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(00, yy)#x]
+Project [to_timestamp(00, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(00, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'yy')
 -- !query analysis
-Project [to_timestamp(99, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, yy)#x]
+Project [to_timestamp(99, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('001', 'yyy')
 -- !query analysis
-Project [to_timestamp(001, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(001, yyy)#x]
+Project [to_timestamp(001, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(001, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'yyy')
 -- !query analysis
-Project [to_timestamp(009999, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, yyy)#x]
+Project [to_timestamp(009999, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('0001', 'yyyy')
 -- !query analysis
-Project [to_timestamp(0001, Some(yyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(0001, yyyy)#x]
+Project [to_timestamp(0001, Some(yyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(0001, yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9999', 'yyyy')
 -- !query analysis
-Project [to_timestamp(9999, Some(yyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9999, yyyy)#x]
+Project [to_timestamp(9999, Some(yyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9999, yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('00001', 'yyyyy')
 -- !query analysis
-Project [to_timestamp(00001, Some(yyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(00001, yyyyy)#x]
+Project [to_timestamp(00001, Some(yyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(00001, yyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('09999', 'yyyyy')
 -- !query analysis
-Project [to_timestamp(09999, Some(yyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(09999, yyyyy)#x]
+Project [to_timestamp(09999, Some(yyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(09999, yyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('000001', 'yyyyyy')
 -- !query analysis
-Project [to_timestamp(000001, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(000001, yyyyyy)#x]
+Project [to_timestamp(000001, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(000001, yyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009999', 'yyyyyy')
 -- !query analysis
-Project [to_timestamp(009999, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009999, yyyyyy)#x]
+Project [to_timestamp(009999, Some(yyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009999, yyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'D')
 -- !query analysis
-Project [to_timestamp(9, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, D)#x]
+Project [to_timestamp(9, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('300', 'D')
 -- !query analysis
-Project [to_timestamp(300, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(300, D)#x]
+Project [to_timestamp(300, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(300, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('09', 'DD')
 -- !query analysis
-Project [to_timestamp(09, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(09, DD)#x]
+Project [to_timestamp(09, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(09, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'DD')
 -- !query analysis
-Project [to_timestamp(99, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, DD)#x]
+Project [to_timestamp(99, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('100', 'DD')
 -- !query analysis
-Project [to_timestamp(100, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(100, DD)#x]
+Project [to_timestamp(100, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(100, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('009', 'DDD')
 -- !query analysis
-Project [to_timestamp(009, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(009, DDD)#x]
+Project [to_timestamp(009, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(009, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('365', 'DDD')
 -- !query analysis
-Project [to_timestamp(365, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(365, DDD)#x]
+Project [to_timestamp(365, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(365, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('31-365', 'dd-DDD')
 -- !query analysis
-Project [to_timestamp(31-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(31-365, dd-DDD)#x]
+Project [to_timestamp(31-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(31-365, dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-365', 'MM-DDD')
 -- !query analysis
-Project [to_timestamp(12-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-365, MM-DDD)#x]
+Project [to_timestamp(12-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-365, MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-365', 'yyyy-DDD')
 -- !query analysis
-Project [to_timestamp(2020-365, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-365, yyyy-DDD)#x]
+Project [to_timestamp(2020-365, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-365, yyyy-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-31-365', 'MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(12-31-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-31-365, MM-dd-DDD)#x]
+Project [to_timestamp(12-31-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-31-365, MM-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-30-365', 'yyyy-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-30-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-30-365, yyyy-dd-DDD)#x]
+Project [to_timestamp(2020-30-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-30-365, yyyy-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-12-350', 'yyyy-MM-DDD')
 -- !query analysis
-Project [to_timestamp(2020-12-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-12-350, yyyy-MM-DDD)#x]
+Project [to_timestamp(2020-12-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-12-350, yyyy-MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-12-31-366', 'yyyy-MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-12-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-12-31-366, yyyy-MM-dd-DDD)#x]
+Project [to_timestamp(2020-12-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-12-31-366, yyyy-MM-dd-DDD)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-special.sql.out
index 01d1f2c40a4a6..6768297fd8116 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-special.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/datetime-special.sql.out
@@ -8,7 +8,7 @@ select date'999999-03-18', date'-0001-1-28', date'0015'
 -- !query
 select make_date(999999, 3, 18), make_date(-1, 1, 28)
 -- !query analysis
-Project [make_date(999999, 3, 18, false) AS make_date(999999, 3, 18)#x, make_date(-1, 1, 28, false) AS make_date(-1, 1, 28)#x]
+Project [make_date(999999, 3, 18, true) AS make_date(999999, 3, 18)#x, make_date(-1, 1, 28, true) AS make_date(-1, 1, 28)#x]
 +- OneRowRelation
 
 
@@ -21,5 +21,5 @@ select timestamp'-1969-12-31 16:00:00', timestamp'-0015-03-18 16:00:00', timesta
 -- !query
 select make_timestamp(-1969, 12, 31, 16, 0, 0.0), make_timestamp(-15, 3, 18, 16, 0, 0.0), make_timestamp(99999, 3, 18, 12, 3, 17.0)
 -- !query analysis
-Project [make_timestamp(-1969, 12, 31, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(-1969, 12, 31, 16, 0, 0.0)#x, make_timestamp(-15, 3, 18, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(-15, 3, 18, 16, 0, 0.0)#x, make_timestamp(99999, 3, 18, 12, 3, cast(17.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(99999, 3, 18, 12, 3, 17.0)#x]
+Project [make_timestamp(-1969, 12, 31, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(-1969, 12, 31, 16, 0, 0.0)#x, make_timestamp(-15, 3, 18, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(-15, 3, 18, 16, 0, 0.0)#x, make_timestamp(99999, 3, 18, 12, 3, cast(17.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(99999, 3, 18, 12, 3, 17.0)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
index 4a2199033f819..d75f4d41bd425 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/decimalArithmeticOperations.sql.out
@@ -268,12 +268,13 @@ SetCommand (spark.sql.decimalOperations.allowPrecisionLoss,Some(false))
 
 
 -- !query
-select id, a+b, a-b, a*b, a/b from decimals_test order by id
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id
 -- !query analysis
 Sort [id#x ASC NULLS FIRST], true
-+- Project [id#x, (a#x + b#x) AS (a + b)#x, (a#x - b#x) AS (a - b)#x, (a#x * b#x) AS (a * b)#x, (a#x / b#x) AS (a / b)#x]
-   +- SubqueryAlias spark_catalog.default.decimals_test
-      +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
++- Repartition 1, false
+   +- Project [id#x, (a#x + b#x) AS (a + b)#x, (a#x - b#x) AS (a - b)#x, (a#x * b#x) AS (a * b)#x, (a#x / b#x) AS (a / b)#x]
+      +- SubqueryAlias spark_catalog.default.decimals_test
+         +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-enabled.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/double-quoted-identifiers-enabled.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
index 78bf1ccb1678c..ce510527c8781 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/execute-immediate.sql.out
@@ -471,7 +471,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'invalid_cast_error_expected'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
@@ -662,7 +661,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'name1'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
index 3aea86b232cba..f9a282c2b927b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain-aqe.sql.out
@@ -196,7 +196,7 @@ ExplainCommand 'Aggregate ['key], ['key, unresolvedalias('MIN('val))], Formatted
 -- !query
 EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 -- !query analysis
-ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false, ExtendedMode
+ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false, ExtendedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
index 3aea86b232cba..f9a282c2b927b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/explain.sql.out
@@ -196,7 +196,7 @@ ExplainCommand 'Aggregate ['key], ['key, unresolvedalias('MIN('val))], Formatted
 -- !query
 EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 -- !query analysis
-ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false, ExtendedMode
+ExplainCommand 'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false, ExtendedMode
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
index c42f2db3f0f9f..987941eee05c7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/extract.sql.out
@@ -3,7 +3,7 @@
 CREATE TEMPORARY VIEW t AS select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j
 -- !query analysis
 CreateViewCommand `t`, select '2011-05-06 07:08:09.1234567' as c, to_timestamp_ntz('2011-05-06 07:08:09.1234567') as ntz, interval 10 year 20 month as i, interval 30 day 40 hour 50 minute 6.7890 second as j, false, false, LocalTempView, UNSUPPORTED, true
-   +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+   +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
       +- OneRowRelation
 
 
@@ -14,7 +14,7 @@ Project [extract(year, c#x) AS extract(year FROM c)#x, extract(year, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -25,7 +25,7 @@ Project [extract(y, c#x) AS extract(y FROM c)#x, extract(y, ntz#x) AS extract(y
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -36,7 +36,7 @@ Project [extract(years, c#x) AS extract(years FROM c)#x, extract(years, ntz#x) A
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -47,7 +47,7 @@ Project [extract(yr, c#x) AS extract(yr FROM c)#x, extract(yr, ntz#x) AS extract
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -58,7 +58,7 @@ Project [extract(yrs, c#x) AS extract(yrs FROM c)#x, extract(yrs, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -69,7 +69,7 @@ Project [extract(yearofweek, c#x) AS extract(yearofweek FROM c)#x, extract(yearo
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -80,7 +80,7 @@ Project [extract(quarter, c#x) AS extract(quarter FROM c)#x, extract(quarter, nt
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -91,7 +91,7 @@ Project [extract(qtr, c#x) AS extract(qtr FROM c)#x, extract(qtr, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -102,7 +102,7 @@ Project [extract(month, c#x) AS extract(month FROM c)#x, extract(month, ntz#x) A
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -113,7 +113,7 @@ Project [extract(mon, c#x) AS extract(mon FROM c)#x, extract(mon, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -124,7 +124,7 @@ Project [extract(mons, c#x) AS extract(mons FROM c)#x, extract(mons, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -135,7 +135,7 @@ Project [extract(months, c#x) AS extract(months FROM c)#x, extract(months, ntz#x
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -146,7 +146,7 @@ Project [extract(week, c#x) AS extract(week FROM c)#x, extract(week, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -157,7 +157,7 @@ Project [extract(w, c#x) AS extract(w FROM c)#x, extract(w, ntz#x) AS extract(w
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -168,7 +168,7 @@ Project [extract(weeks, c#x) AS extract(weeks FROM c)#x, extract(weeks, ntz#x) A
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -179,7 +179,7 @@ Project [extract(day, c#x) AS extract(day FROM c)#x, extract(day, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -190,7 +190,7 @@ Project [extract(d, c#x) AS extract(d FROM c)#x, extract(d, ntz#x) AS extract(d
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -201,7 +201,7 @@ Project [extract(days, c#x) AS extract(days FROM c)#x, extract(days, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -212,7 +212,7 @@ Project [extract(dayofweek, c#x) AS extract(dayofweek FROM c)#x, extract(dayofwe
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -223,7 +223,7 @@ Project [extract(dow, c#x) AS extract(dow FROM c)#x, extract(dow, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -234,7 +234,7 @@ Project [extract(dayofweek_iso, c#x) AS extract(dayofweek_iso FROM c)#x, extract
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -245,7 +245,7 @@ Project [extract(dow_iso, c#x) AS extract(dow_iso FROM c)#x, extract(dow_iso, nt
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -256,7 +256,7 @@ Project [extract(doy, c#x) AS extract(doy FROM c)#x, extract(doy, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -267,7 +267,7 @@ Project [extract(hour, c#x) AS extract(hour FROM c)#x, extract(hour, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -278,7 +278,7 @@ Project [extract(h, c#x) AS extract(h FROM c)#x, extract(h, ntz#x) AS extract(h
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -289,7 +289,7 @@ Project [extract(hours, c#x) AS extract(hours FROM c)#x, extract(hours, ntz#x) A
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -300,7 +300,7 @@ Project [extract(hr, c#x) AS extract(hr FROM c)#x, extract(hr, ntz#x) AS extract
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -311,7 +311,7 @@ Project [extract(hrs, c#x) AS extract(hrs FROM c)#x, extract(hrs, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -322,7 +322,7 @@ Project [extract(minute, c#x) AS extract(minute FROM c)#x, extract(minute, ntz#x
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -333,7 +333,7 @@ Project [extract(m, c#x) AS extract(m FROM c)#x, extract(m, ntz#x) AS extract(m
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -344,7 +344,7 @@ Project [extract(min, c#x) AS extract(min FROM c)#x, extract(min, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -355,7 +355,7 @@ Project [extract(mins, c#x) AS extract(mins FROM c)#x, extract(mins, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -366,7 +366,7 @@ Project [extract(minutes, c#x) AS extract(minutes FROM c)#x, extract(minutes, nt
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -377,7 +377,7 @@ Project [extract(second, c#x) AS extract(second FROM c)#x, extract(second, ntz#x
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -388,7 +388,7 @@ Project [extract(s, c#x) AS extract(s FROM c)#x, extract(s, ntz#x) AS extract(s
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -399,7 +399,7 @@ Project [extract(sec, c#x) AS extract(sec FROM c)#x, extract(sec, ntz#x) AS extr
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -410,7 +410,7 @@ Project [extract(seconds, c#x) AS extract(seconds FROM c)#x, extract(seconds, nt
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -421,7 +421,7 @@ Project [extract(secs, c#x) AS extract(secs FROM c)#x, extract(secs, ntz#x) AS e
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -495,7 +495,7 @@ Project [date_part(year, c#x) AS date_part(year, c)#x, date_part(year, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -506,7 +506,7 @@ Project [date_part(y, c#x) AS date_part(y, c)#x, date_part(y, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -517,7 +517,7 @@ Project [date_part(years, c#x) AS date_part(years, c)#x, date_part(years, ntz#x)
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -528,7 +528,7 @@ Project [date_part(yr, c#x) AS date_part(yr, c)#x, date_part(yr, ntz#x) AS date_
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -539,7 +539,7 @@ Project [date_part(yrs, c#x) AS date_part(yrs, c)#x, date_part(yrs, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -550,7 +550,7 @@ Project [date_part(yearofweek, c#x) AS date_part(yearofweek, c)#x, date_part(yea
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -561,7 +561,7 @@ Project [date_part(quarter, c#x) AS date_part(quarter, c)#x, date_part(quarter,
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -572,7 +572,7 @@ Project [date_part(qtr, c#x) AS date_part(qtr, c)#x, date_part(qtr, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -583,7 +583,7 @@ Project [date_part(month, c#x) AS date_part(month, c)#x, date_part(month, ntz#x)
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -594,7 +594,7 @@ Project [date_part(mon, c#x) AS date_part(mon, c)#x, date_part(mon, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -605,7 +605,7 @@ Project [date_part(mons, c#x) AS date_part(mons, c)#x, date_part(mons, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -616,7 +616,7 @@ Project [date_part(months, c#x) AS date_part(months, c)#x, date_part(months, ntz
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -627,7 +627,7 @@ Project [date_part(week, c#x) AS date_part(week, c)#x, date_part(week, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -638,7 +638,7 @@ Project [date_part(w, c#x) AS date_part(w, c)#x, date_part(w, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -649,7 +649,7 @@ Project [date_part(weeks, c#x) AS date_part(weeks, c)#x, date_part(weeks, ntz#x)
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -660,7 +660,7 @@ Project [date_part(day, c#x) AS date_part(day, c)#x, date_part(day, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -671,7 +671,7 @@ Project [date_part(d, c#x) AS date_part(d, c)#x, date_part(d, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -682,7 +682,7 @@ Project [date_part(days, c#x) AS date_part(days, c)#x, date_part(days, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -693,7 +693,7 @@ Project [date_part(dayofweek, c#x) AS date_part(dayofweek, c)#x, date_part(dayof
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -704,7 +704,7 @@ Project [date_part(dow, c#x) AS date_part(dow, c)#x, date_part(dow, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -715,7 +715,7 @@ Project [date_part(dayofweek_iso, c#x) AS date_part(dayofweek_iso, c)#x, date_pa
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -726,7 +726,7 @@ Project [date_part(dow_iso, c#x) AS date_part(dow_iso, c)#x, date_part(dow_iso,
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -737,7 +737,7 @@ Project [date_part(doy, c#x) AS date_part(doy, c)#x, date_part(doy, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -748,7 +748,7 @@ Project [date_part(hour, c#x) AS date_part(hour, c)#x, date_part(hour, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -759,7 +759,7 @@ Project [date_part(h, c#x) AS date_part(h, c)#x, date_part(h, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -770,7 +770,7 @@ Project [date_part(hours, c#x) AS date_part(hours, c)#x, date_part(hours, ntz#x)
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -781,7 +781,7 @@ Project [date_part(hr, c#x) AS date_part(hr, c)#x, date_part(hr, ntz#x) AS date_
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -792,7 +792,7 @@ Project [date_part(hrs, c#x) AS date_part(hrs, c)#x, date_part(hrs, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -803,7 +803,7 @@ Project [date_part(minute, c#x) AS date_part(minute, c)#x, date_part(minute, ntz
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -814,7 +814,7 @@ Project [date_part(m, c#x) AS date_part(m, c)#x, date_part(m, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -825,7 +825,7 @@ Project [date_part(min, c#x) AS date_part(min, c)#x, date_part(min, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -836,7 +836,7 @@ Project [date_part(mins, c#x) AS date_part(mins, c)#x, date_part(mins, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -847,7 +847,7 @@ Project [date_part(minutes, c#x) AS date_part(minutes, c)#x, date_part(minutes,
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -858,7 +858,7 @@ Project [date_part(second, c#x) AS date_part(second, c)#x, date_part(second, ntz
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -869,7 +869,7 @@ Project [date_part(s, c#x) AS date_part(s, c)#x, date_part(s, ntz#x) AS date_par
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -880,7 +880,7 @@ Project [date_part(sec, c#x) AS date_part(sec, c)#x, date_part(sec, ntz#x) AS da
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -891,7 +891,7 @@ Project [date_part(seconds, c#x) AS date_part(seconds, c)#x, date_part(seconds,
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -902,7 +902,7 @@ Project [date_part(secs, c#x) AS date_part(secs, c)#x, date_part(secs, ntz#x) AS
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -956,7 +956,7 @@ Project [date_part(null, c#x) AS date_part(NULL, c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -989,7 +989,7 @@ Project [date_part(null, i#x) AS date_part(NULL, i)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1000,7 +1000,7 @@ Project [extract(year, c#x) AS extract(year FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1011,7 +1011,7 @@ Project [extract(quarter, c#x) AS extract(quarter FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1022,7 +1022,7 @@ Project [extract(month, c#x) AS extract(month FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1033,7 +1033,7 @@ Project [extract(week, c#x) AS extract(week FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1044,7 +1044,7 @@ Project [extract(day, c#x) AS extract(day FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1055,7 +1055,7 @@ Project [extract(days, c#x) AS extract(days FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1066,7 +1066,7 @@ Project [extract(dayofweek, c#x) AS extract(dayofweek FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1077,7 +1077,7 @@ Project [extract(dow, c#x) AS extract(dow FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1088,7 +1088,7 @@ Project [extract(doy, c#x) AS extract(doy FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1099,7 +1099,7 @@ Project [extract(hour, c#x) AS extract(hour FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1110,7 +1110,7 @@ Project [extract(minute, c#x) AS extract(minute FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1121,7 +1121,7 @@ Project [extract(second, c#x) AS extract(second FROM c)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1132,7 +1132,7 @@ Project [cast(c#x - j#x as string) AS c - j#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1143,7 +1143,7 @@ Project [day(cast(cast(c#x - j#x as string) as date)) AS day(c - j)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1154,29 +1154,29 @@ Project [extract(day, cast(c#x - j#x as string)) AS extract(day FROM c - j)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
 -- !query
 select extract(month from to_timestamp(c) - i) from t
 -- !query analysis
-Project [extract(month, to_timestamp(c#x, None, TimestampType, Some(America/Los_Angeles), false) - i#x) AS extract(month FROM to_timestamp(c) - i)#x]
+Project [extract(month, to_timestamp(c#x, None, TimestampType, Some(America/Los_Angeles), true) - i#x) AS extract(month FROM to_timestamp(c) - i)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
 -- !query
 select extract(second from to_timestamp(c) - j) from t
 -- !query analysis
-Project [extract(second, cast(to_timestamp(c#x, None, TimestampType, Some(America/Los_Angeles), false) - j#x as timestamp)) AS extract(second FROM to_timestamp(c) - j)#x]
+Project [extract(second, cast(to_timestamp(c#x, None, TimestampType, Some(America/Los_Angeles), true) - j#x as timestamp)) AS extract(second FROM to_timestamp(c) - j)#x]
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
@@ -1432,7 +1432,7 @@ Project [datepart(year, c#x) AS datepart(year FROM c)#x, datepart(year, ntz#x) A
 +- SubqueryAlias t
    +- View (`t`, [c#x, ntz#x, i#x, j#x])
       +- Project [cast(c#x as string) AS c#x, cast(ntz#x as timestamp_ntz) AS ntz#x, cast(i#x as interval year to month) AS i#x, cast(j#x as interval day to second) AS j#x]
-         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), false) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
+         +- Project [2011-05-06 07:08:09.1234567 AS c#x, to_timestamp_ntz(2011-05-06 07:08:09.1234567, None, TimestampNTZType, Some(America/Los_Angeles), true) AS ntz#x, INTERVAL '11-8' YEAR TO MONTH AS i#x, INTERVAL '31 16:50:06.789' DAY TO SECOND AS j#x]
             +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
index da3f3de3fb448..34d22ad6c6176 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-all-mosha.sql.out
@@ -56,11 +56,11 @@ Sort [i#x ASC NULLS FIRST, i#x ASC NULLS FIRST, ci#xL ASC NULLS FIRST, ci#xL ASC
 
 
 -- !query
-SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), d / 2, size(a) FROM stuff
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), octet_length(d), size(a) FROM stuff
 GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2
 -- !query analysis
-Sort [(i + 1)#x ASC NULLS FIRST, substring(s, 2, 3)#x ASC NULLS FIRST, extract(year FROM t)#x ASC NULLS FIRST, (d / 2)#x ASC NULLS FIRST, size(a)#x ASC NULLS FIRST, (f / i)#x ASC NULLS FIRST], true
-+- Aggregate [(i#x + 1), (f#x / cast(i#x as decimal(10,0))), substring(s#x, 2, 3), extract(year, t#x), (cast(d#x as double) / cast(2 as double)), size(a#x, true)], [(i#x + 1) AS (i + 1)#x, (f#x / cast(i#x as decimal(10,0))) AS (f / i)#x, substring(s#x, 2, 3) AS substring(s, 2, 3)#x, extract(year, t#x) AS extract(year FROM t)#x, (cast(d#x as double) / cast(2 as double)) AS (d / 2)#x, size(a#x, true) AS size(a)#x]
+Sort [(i + 1)#x ASC NULLS FIRST, substring(s, 2, 3)#x ASC NULLS FIRST, extract(year FROM t)#x ASC NULLS FIRST, octet_length(d)#x ASC NULLS FIRST, size(a)#x ASC NULLS FIRST, (f / i)#x ASC NULLS FIRST], true
++- Aggregate [(i#x + 1), (f#x / cast(i#x as decimal(10,0))), substring(s#x, 2, 3), extract(year, t#x), octet_length(d#x), size(a#x, false)], [(i#x + 1) AS (i + 1)#x, (f#x / cast(i#x as decimal(10,0))) AS (f / i)#x, substring(s#x, 2, 3) AS substring(s, 2, 3)#x, extract(year, t#x) AS extract(year FROM t)#x, octet_length(d#x) AS octet_length(d)#x, size(a#x, false) AS size(a)#x]
    +- SubqueryAlias stuff
       +- View (`stuff`, [i#x, f#x, s#x, t#x, d#x, a#x])
          +- Project [cast(i#x as int) AS i#x, cast(f#x as decimal(6,4)) AS f#x, cast(s#x as string) AS s#x, cast(t#x as string) AS t#x, cast(d#x as string) AS d#x, cast(a#x as array<int>) AS a#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
index 94d39111b29ed..dd36da7723556 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by-filter.sql.out
@@ -119,7 +119,7 @@ SELECT COUNT(id) FILTER (WHERE hiredate = date "2001-01-01") FROM emp
 -- !query
 SELECT COUNT(id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp
 -- !query analysis
-Aggregate [count(id#x) FILTER (WHERE (hiredate#x = to_date(2001-01-01 00:00:00, None, Some(America/Los_Angeles), false))) AS count(id) FILTER (WHERE (hiredate = to_date(2001-01-01 00:00:00)))#xL]
+Aggregate [count(id#x) FILTER (WHERE (hiredate#x = to_date(2001-01-01 00:00:00, None, Some(America/Los_Angeles), true))) AS count(id) FILTER (WHERE (hiredate = to_date(2001-01-01 00:00:00)))#xL]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -131,7 +131,7 @@ Aggregate [count(id#x) FILTER (WHERE (hiredate#x = to_date(2001-01-01 00:00:00,
 -- !query
 SELECT COUNT(id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")) FROM emp
 -- !query analysis
-Aggregate [count(id#x) FILTER (WHERE (cast(hiredate#x as timestamp) = to_timestamp(2001-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), false))) AS count(id) FILTER (WHERE (hiredate = to_timestamp(2001-01-01 00:00:00)))#xL]
+Aggregate [count(id#x) FILTER (WHERE (cast(hiredate#x as timestamp) = to_timestamp(2001-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), true))) AS count(id) FILTER (WHERE (hiredate = to_timestamp(2001-01-01 00:00:00)))#xL]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -179,7 +179,7 @@ Aggregate [count(distinct id#x) AS count(DISTINCT id)#xL, count(distinct id#x) F
 -- !query
 SELECT COUNT(DISTINCT id) FILTER (WHERE hiredate = to_timestamp("2001-01-01 00:00:00")), COUNT(DISTINCT id) FILTER (WHERE hiredate = to_date('2001-01-01 00:00:00')) FROM emp
 -- !query analysis
-Aggregate [count(distinct id#x) FILTER (WHERE (cast(hiredate#x as timestamp) = to_timestamp(2001-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), false))) AS count(DISTINCT id) FILTER (WHERE (hiredate = to_timestamp(2001-01-01 00:00:00)))#xL, count(distinct id#x) FILTER (WHERE (hiredate#x = to_date(2001-01-01 00:00:00, None, Some(America/Los_Angeles), false))) AS count(DISTINCT id) FILTER (WHERE (hiredate = to_date(2001-01-01 00:00:00)))#xL]
+Aggregate [count(distinct id#x) FILTER (WHERE (cast(hiredate#x as timestamp) = to_timestamp(2001-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), true))) AS count(DISTINCT id) FILTER (WHERE (hiredate = to_timestamp(2001-01-01 00:00:00)))#xL, count(distinct id#x) FILTER (WHERE (hiredate#x = to_date(2001-01-01 00:00:00, None, Some(America/Los_Angeles), true))) AS count(DISTINCT id) FILTER (WHERE (hiredate = to_date(2001-01-01 00:00:00)))#xL]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -373,7 +373,7 @@ SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > date "2003-01-01") FROM emp
 -- !query
 SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_date("2003-01-01")) FROM emp GROUP BY dept_id
 -- !query analysis
-Aggregate [dept_id#x], [dept_id#x, sum(salary#x) FILTER (WHERE (hiredate#x > to_date(2003-01-01, None, Some(America/Los_Angeles), false))) AS sum(salary) FILTER (WHERE (hiredate > to_date(2003-01-01)))#x]
+Aggregate [dept_id#x], [dept_id#x, sum(salary#x) FILTER (WHERE (hiredate#x > to_date(2003-01-01, None, Some(America/Los_Angeles), true))) AS sum(salary) FILTER (WHERE (hiredate > to_date(2003-01-01)))#x]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -385,7 +385,7 @@ Aggregate [dept_id#x], [dept_id#x, sum(salary#x) FILTER (WHERE (hiredate#x > to_
 -- !query
 SELECT dept_id, SUM(salary) FILTER (WHERE hiredate > to_timestamp("2003-01-01 00:00:00")) FROM emp GROUP BY dept_id
 -- !query analysis
-Aggregate [dept_id#x], [dept_id#x, sum(salary#x) FILTER (WHERE (cast(hiredate#x as timestamp) > to_timestamp(2003-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), false))) AS sum(salary) FILTER (WHERE (hiredate > to_timestamp(2003-01-01 00:00:00)))#x]
+Aggregate [dept_id#x], [dept_id#x, sum(salary#x) FILTER (WHERE (cast(hiredate#x as timestamp) > to_timestamp(2003-01-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), true))) AS sum(salary) FILTER (WHERE (hiredate > to_timestamp(2003-01-01 00:00:00)))#x]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -481,7 +481,7 @@ SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= date "2003-01-01") FROM emp
 -- !query
 SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_date("2003-01-01")) FROM emp GROUP BY 1
 -- !query analysis
-Aggregate [foo], [foo AS foo#x, sum(salary#x) FILTER (WHERE (hiredate#x >= to_date(2003-01-01, None, Some(America/Los_Angeles), false))) AS sum(salary) FILTER (WHERE (hiredate >= to_date(2003-01-01)))#x]
+Aggregate [foo], [foo AS foo#x, sum(salary#x) FILTER (WHERE (hiredate#x >= to_date(2003-01-01, None, Some(America/Los_Angeles), true))) AS sum(salary) FILTER (WHERE (hiredate >= to_date(2003-01-01)))#x]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
@@ -493,7 +493,7 @@ Aggregate [foo], [foo AS foo#x, sum(salary#x) FILTER (WHERE (hiredate#x >= to_da
 -- !query
 SELECT 'foo', SUM(salary) FILTER (WHERE hiredate >= to_timestamp("2003-01-01")) FROM emp GROUP BY 1
 -- !query analysis
-Aggregate [foo], [foo AS foo#x, sum(salary#x) FILTER (WHERE (cast(hiredate#x as timestamp) >= to_timestamp(2003-01-01, None, TimestampType, Some(America/Los_Angeles), false))) AS sum(salary) FILTER (WHERE (hiredate >= to_timestamp(2003-01-01)))#x]
+Aggregate [foo], [foo AS foo#x, sum(salary#x) FILTER (WHERE (cast(hiredate#x as timestamp) >= to_timestamp(2003-01-01, None, TimestampType, Some(America/Los_Angeles), true))) AS sum(salary) FILTER (WHERE (hiredate >= to_timestamp(2003-01-01)))#x]
 +- SubqueryAlias emp
    +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
       +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
index 46b40e4515260..34ff2a2186f0b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/group-by.sql.out
@@ -700,25 +700,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT every("true")
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"true\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"every(true)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 20,
-    "fragment" : "every(\"true\")"
-  } ]
-}
+Aggregate [every(cast(true as boolean)) AS every(true)#x]
++- OneRowRelation
 
 
 -- !query
@@ -1190,3 +1173,133 @@ ORDER BY col1
 Sort [col1#x ASC NULLS FIRST], true
 +- Aggregate [col1#x], [col1#x, count(1) AS cnt#xL]
    +- LocalRelation [col1#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(MAP(1, 2, 2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3, 1, 2), MAP(1, 3))) as t(a)
+GROUP BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a.b
+-- !query analysis
+Aggregate [a#x.b], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', array(map(1, 2, 2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3, 1, 2), map(1, 3)))) as t(a)
+GROUP BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(named_struct('b', map(1, 2, 2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3, 1, 2)), named_struct('b', map(1, 3)))) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(array(1, 2), 2, array(2, 3), 3)), (map(array(1, 3), 3)), (map(array(2, 3), 3, array(1, 2), 2)) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(array(1, 2, 3), 3)), (map(array(3, 2, 1), 3)) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(map(array(1, 2), 2, array(2, 3), 3))), (ARRAY(MAP(ARRAY(1, 3), 3))), (ARRAY(map(array(2, 3), 3, array(1, 2), 2))) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(named_struct('b', 1), 2, named_struct('b', 2), 3)), (map(named_struct('b', 1), 3)), (map(named_struct('b', 2), 3, named_struct('b', 1), 2)) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a.b
+-- !query analysis
+Aggregate [a#x.b], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (Map(1, Map(1,2), 2, Map(2, 3, 1, 2))), (Map(2, Map(1, 2, 2,3), 1, Map(1, 2))), (Map(1, Map(1,2), 2, Map(2, 4))) as t(a)
+GROUP BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (Map(1, Array(Map(1,2)), 2, Array(Map(2, 3, 1, 2)))), (Map(2, Array(Map(1, 2, 2,3)), 1, Array(Map(1, 2)))), (Map(1, Array(Map(1,2)), 2, Array(Map(2, 4)))) as t(a)
+GROUP BY a
+-- !query analysis
+Aggregate [a#x], [count(1) AS count(1)#xL]
++- SubqueryAlias t
+   +- LocalRelation [a#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
index 1281b19eb2f86..c06d1e5534aed 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/higher-order-functions.sql.out
@@ -171,7 +171,7 @@ Project [aggregate(ys#x, named_struct(sum, 0, n, 0), lambdafunction(named_struct
 -- !query
 select transform(zs, z -> aggregate(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
 -- !query analysis
-Project [transform(zs#x, lambdafunction(aggregate(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, true)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
+Project [transform(zs#x, lambdafunction(aggregate(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, false)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
@@ -211,7 +211,7 @@ Project [reduce(ys#x, named_struct(sum, 0, n, 0), lambdafunction(named_struct(co
 -- !query
 select transform(zs, z -> reduce(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
 -- !query analysis
-Project [transform(zs#x, lambdafunction(reduce(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, true)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
+Project [transform(zs#x, lambdafunction(reduce(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, false)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
@@ -247,7 +247,7 @@ Project [exists(cast(null as array<int>), lambdafunction((lambda y#x > 30), lamb
 -- !query
 select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested
 -- !query analysis
-Project [zip_with(ys#x, zs#x, lambdafunction((lambda a#x + size(lambda b#x, true)), lambda a#x, lambda b#x, false)) AS v#x]
+Project [zip_with(ys#x, zs#x, lambdafunction((lambda a#x + size(lambda b#x, false)), lambda a#x, lambda b#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
index f0bf8b883dd8b..20e6ca1e6a2ec 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/identifier-clause.sql.out
@@ -893,7 +893,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
   "errorClass" : "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
   "sqlState" : "42000",
   "messageParameters" : {
-    "funcName" : "`default`.`myDoubleAvg`",
+    "name" : "`default`.`myDoubleAvg`",
     "statement" : "DROP TEMPORARY FUNCTION"
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
index 988df7de1a3cf..78539effe188e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/inline-table.sql.out
@@ -115,7 +115,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select * from values ("one", 2.0), ("two") as data(a, b)
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
@@ -157,7 +157,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select * from values ("one"), ("two") as data(a, b)
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
index 290e55052931d..c023e3b56f117 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/interval.sql.out
@@ -605,63 +605,63 @@ Project [INTERVAL '59' SECOND AS INTERVAL '59' SECOND#x]
 -- !query
 select make_interval(1)
 -- !query analysis
-Project [make_interval(1, 0, 0, 0, 0, 0, 0.000000, false) AS make_interval(1, 0, 0, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 0, 0, 0, 0, 0, 0.000000, true) AS make_interval(1, 0, 0, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2)
 -- !query analysis
-Project [make_interval(1, 2, 0, 0, 0, 0, 0.000000, false) AS make_interval(1, 2, 0, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 0, 0, 0, 0, 0.000000, true) AS make_interval(1, 2, 0, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3)
 -- !query analysis
-Project [make_interval(1, 2, 3, 0, 0, 0, 0.000000, false) AS make_interval(1, 2, 3, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 0, 0, 0, 0.000000, true) AS make_interval(1, 2, 3, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 0, 0, 0.000000, false) AS make_interval(1, 2, 3, 4, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 0, 0, 0.000000, true) AS make_interval(1, 2, 3, 4, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 0, 0.000000, false) AS make_interval(1, 2, 3, 4, 5, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 5, 0, 0.000000, true) AS make_interval(1, 2, 3, 4, 5, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 6, 0.000000, false) AS make_interval(1, 2, 3, 4, 5, 6, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 5, 6, 0.000000, true) AS make_interval(1, 2, 3, 4, 5, 6, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 6, cast(7.008009 as decimal(18,6)), false) AS make_interval(1, 2, 3, 4, 5, 6, 7.008009)#x]
+Project [make_interval(1, 2, 3, 4, 5, 6, cast(7.008009 as decimal(18,6)), true) AS make_interval(1, 2, 3, 4, 5, 6, 7.008009)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456, false) AS make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)#x]
+Project [make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456, true) AS make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
 -- !query analysis
-Project [make_interval(0, 0, 0, 0, 0, 0, cast(1234567890123456789 as decimal(18,6)), false) AS make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)#x]
+Project [make_interval(0, 0, 0, 0, 0, 0, cast(1234567890123456789 as decimal(18,6)), true) AS make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)#x]
 +- OneRowRelation
 
 
@@ -981,9 +981,13 @@ select interval '20 15:40:32.99899999' day to hour
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1000,9 +1004,13 @@ select interval '20 15:40:32.99899999' day to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1019,9 +1027,13 @@ select interval '15:40:32.99899999' hour to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1038,9 +1050,13 @@ select interval '15:40.99899999' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1057,9 +1073,13 @@ select interval '15:40' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1076,9 +1096,13 @@ select interval '20 40:32.99899999' minute to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1209,9 +1233,11 @@ select interval '1' year to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+  "sqlState" : "22006",
   "messageParameters" : {
     "from" : "year",
+    "input" : "1",
     "to" : "second"
   },
   "queryContext" : [ {
@@ -1460,9 +1486,11 @@ SELECT INTERVAL '178956970-8' YEAR TO MONTH
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Error parsing interval year-month string: integer overflow"
+    "input" : "178956970-8",
+    "interval" : "year-month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1909,9 +1937,13 @@ select interval '-\t2-2\t' year to month
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
+    "input" : "-\t2-2\t",
+    "intervalStr" : "year-month",
+    "supportedFormat" : "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
+    "typeName" : "interval year to month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1935,9 +1967,13 @@ select interval '\n-\t10\t 12:34:46.789\t' day to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "\n-\t10\t 12:34:46.789\t",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2074,7 +2110,7 @@ SELECT
   to_csv(named_struct('a', interval 32 year, 'b', interval 10 month)),
   from_csv(to_csv(named_struct('a', interval 32 year, 'b', interval 10 month)), 'a interval year, b interval month')
 -- !query analysis
-Project [from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles)) AS from_json({"a":"1 days"})#x, from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None) AS from_csv(1, 1)#x, to_json(from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1 days"}))#x, to_csv(from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None), Some(America/Los_Angeles)) AS to_csv(from_csv(1, 1))#x, to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)) AS to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH))#x, from_csv(StructField(a,YearMonthIntervalType(0,0),true), StructField(b,YearMonthIntervalType(1,1),true), to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), None) AS from_csv(to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH)))#x]
+Project [from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles), false) AS from_json({"a":"1 days"})#x, from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None) AS from_csv(1, 1)#x, to_json(from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1 days"}))#x, to_csv(from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None), Some(America/Los_Angeles)) AS to_csv(from_csv(1, 1))#x, to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)) AS to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH))#x, from_csv(StructField(a,YearMonthIntervalType(0,0),true), StructField(b,YearMonthIntervalType(1,1),true), to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), None) AS from_csv(to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH)))#x]
 +- OneRowRelation
 
 
@@ -2085,7 +2121,7 @@ SELECT
   to_json(map('a', interval 100 day 130 minute)),
   from_json(to_json(map('a', interval 100 day 130 minute)), 'a interval day to minute')
 -- !query analysis
-Project [from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE))#x, from_json(StructField(a,DayTimeIntervalType(0,2),true), to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS from_json(to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE)))#x]
+Project [from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE))#x, from_json(StructField(a,DayTimeIntervalType(0,2),true), to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)), Some(America/Los_Angeles), false) AS from_json(to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE)))#x]
 +- OneRowRelation
 
 
@@ -2096,7 +2132,7 @@ SELECT
   to_json(map('a', interval 32 year 10 month)),
   from_json(to_json(map('a', interval 32 year 10 month)), 'a interval year to month')
 -- !query analysis
-Project [from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '32-10' YEAR TO MONTH))#x, from_json(StructField(a,YearMonthIntervalType(0,1),true), to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS from_json(to_json(map(a, INTERVAL '32-10' YEAR TO MONTH)))#x]
+Project [from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '32-10' YEAR TO MONTH))#x, from_json(StructField(a,YearMonthIntervalType(0,1),true), to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), false) AS from_json(to_json(map(a, INTERVAL '32-10' YEAR TO MONTH)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
index e81ee769f57d6..5bf893605423c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/join-lateral.sql.out
@@ -3017,6 +3017,53 @@ Project [c1#x, c2#x, t#x]
             +- LocalRelation [col1#x, col2#x]
 
 
+-- !query
+select 1
+from t1 as t_outer
+left join
+ lateral(
+     select b1,b2
+     from
+     (
+         select
+             t2.c1 as b1,
+             1 as b2
+         from t2
+         union
+         select t_outer.c1 as b1,
+                null as b2
+     ) as t_inner
+     where (t_inner.b1 < t_outer.c2  or t_inner.b1 is null)
+      and  t_inner.b1 = t_outer.c1
+     order by t_inner.b1,t_inner.b2 desc limit 1
+ ) as lateral_table
+-- !query analysis
+Project [1 AS 1#x]
++- LateralJoin lateral-subquery#x [c2#x && c1#x && c1#x], LeftOuter
+   :  +- SubqueryAlias lateral_table
+   :     +- GlobalLimit 1
+   :        +- LocalLimit 1
+   :           +- Sort [b1#x ASC NULLS FIRST, b2#x DESC NULLS LAST], true
+   :              +- Project [b1#x, b2#x]
+   :                 +- Filter (((b1#x < outer(c2#x)) OR isnull(b1#x)) AND (b1#x = outer(c1#x)))
+   :                    +- SubqueryAlias t_inner
+   :                       +- Distinct
+   :                          +- Union false, false
+   :                             :- Project [c1#x AS b1#x, 1 AS b2#x]
+   :                             :  +- SubqueryAlias spark_catalog.default.t2
+   :                             :     +- View (`spark_catalog`.`default`.`t2`, [c1#x, c2#x])
+   :                             :        +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   :                             :           +- LocalRelation [col1#x, col2#x]
+   :                             +- Project [b1#x, cast(b2#x as int) AS b2#x]
+   :                                +- Project [outer(c1#x) AS b1#x, null AS b2#x]
+   :                                   +- OneRowRelation
+   +- SubqueryAlias t_outer
+      +- SubqueryAlias spark_catalog.default.t1
+         +- View (`spark_catalog`.`default`.`t1`, [c1#x, c2#x])
+            +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+               +- LocalRelation [col1#x, col2#x]
+
+
 -- !query
 DROP VIEW t1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
index 0d7c6b2056231..842b190c5a753 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/json-functions.sql.out
@@ -9,7 +9,7 @@ Project [to_json(named_struct(a, 1, b, 2), Some(America/Los_Angeles)) AS to_json
 -- !query
 select to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'))
 -- !query analysis
-Project [to_json((timestampFormat,dd/MM/yyyy), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false)), Some(America/Los_Angeles)) AS to_json(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
+Project [to_json((timestampFormat,dd/MM/yyyy), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true)), Some(America/Los_Angeles)) AS to_json(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
 +- OneRowRelation
 
 
@@ -118,14 +118,14 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select from_json('{"a":1}', 'a INT')
 -- !query analysis
-Project [from_json(StructField(a,IntegerType,true), {"a":1}, Some(America/Los_Angeles)) AS from_json({"a":1})#x]
+Project [from_json(StructField(a,IntegerType,true), {"a":1}, Some(America/Los_Angeles), false) AS from_json({"a":1})#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(time,TimestampType,true), (timestampFormat,dd/MM/yyyy), {"time":"26/08/2015"}, Some(America/Los_Angeles)) AS from_json({"time":"26/08/2015"})#x]
+Project [from_json(StructField(time,TimestampType,true), (timestampFormat,dd/MM/yyyy), {"time":"26/08/2015"}, Some(America/Los_Angeles), false) AS from_json({"time":"26/08/2015"})#x]
 +- OneRowRelation
 
 
@@ -279,14 +279,14 @@ DropTempViewCommand jsonTable
 -- !query
 select from_json('{"a":1, "b":2}', 'map<string, int>')
 -- !query analysis
-Project [from_json(MapType(StringType,IntegerType,true), {"a":1, "b":2}, Some(America/Los_Angeles)) AS entries#x]
+Project [from_json(MapType(StringType,IntegerType,true), {"a":1, "b":2}, Some(America/Los_Angeles), false) AS entries#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"a":1, "b":"2"}', 'struct<a:int,b:string>')
 -- !query analysis
-Project [from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), {"a":1, "b":"2"}, Some(America/Los_Angeles)) AS from_json({"a":1, "b":"2"})#x]
+Project [from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), {"a":1, "b":"2"}, Some(America/Los_Angeles), false) AS from_json({"a":1, "b":"2"})#x]
 +- OneRowRelation
 
 
@@ -300,70 +300,70 @@ Project [schema_of_json({"c1":0, "c2":[1]}) AS schema_of_json({"c1":0, "c2":[1]}
 -- !query
 select from_json('{"c1":[1, 2, 3]}', schema_of_json('{"c1":[0]}'))
 -- !query analysis
-Project [from_json(StructField(c1,ArrayType(LongType,true),true), {"c1":[1, 2, 3]}, Some(America/Los_Angeles)) AS from_json({"c1":[1, 2, 3]})#x]
+Project [from_json(StructField(c1,ArrayType(LongType,true),true), {"c1":[1, 2, 3]}, Some(America/Los_Angeles), false) AS from_json({"c1":[1, 2, 3]})#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[1, 2, 3]', 'array<int>')
 -- !query analysis
-Project [from_json(ArrayType(IntegerType,true), [1, 2, 3], Some(America/Los_Angeles)) AS from_json([1, 2, 3])#x]
+Project [from_json(ArrayType(IntegerType,true), [1, 2, 3], Some(America/Los_Angeles), false) AS from_json([1, 2, 3])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[1, "2", 3]', 'array<int>')
 -- !query analysis
-Project [from_json(ArrayType(IntegerType,true), [1, "2", 3], Some(America/Los_Angeles)) AS from_json([1, "2", 3])#x]
+Project [from_json(ArrayType(IntegerType,true), [1, "2", 3], Some(America/Los_Angeles), false) AS from_json([1, "2", 3])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[1, 2, null]', 'array<int>')
 -- !query analysis
-Project [from_json(ArrayType(IntegerType,true), [1, 2, null], Some(America/Los_Angeles)) AS from_json([1, 2, null])#x]
+Project [from_json(ArrayType(IntegerType,true), [1, 2, null], Some(America/Los_Angeles), false) AS from_json([1, 2, null])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[{"a": 1}, {"a":2}]', 'array<struct<a:int>>')
 -- !query analysis
-Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), [{"a": 1}, {"a":2}], Some(America/Los_Angeles)) AS from_json([{"a": 1}, {"a":2}])#x]
+Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), [{"a": 1}, {"a":2}], Some(America/Los_Angeles), false) AS from_json([{"a": 1}, {"a":2}])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"a": 1}', 'array<struct<a:int>>')
 -- !query analysis
-Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), {"a": 1}, Some(America/Los_Angeles)) AS from_json({"a": 1})#x]
+Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), {"a": 1}, Some(America/Los_Angeles), false) AS from_json({"a": 1})#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[null, {"a":2}]', 'array<struct<a:int>>')
 -- !query analysis
-Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), [null, {"a":2}], Some(America/Los_Angeles)) AS from_json([null, {"a":2}])#x]
+Project [from_json(ArrayType(StructType(StructField(a,IntegerType,true)),true), [null, {"a":2}], Some(America/Los_Angeles), false) AS from_json([null, {"a":2}])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[{"a": 1}, {"b":2}]', 'array<map<string,int>>')
 -- !query analysis
-Project [from_json(ArrayType(MapType(StringType,IntegerType,true),true), [{"a": 1}, {"b":2}], Some(America/Los_Angeles)) AS from_json([{"a": 1}, {"b":2}])#x]
+Project [from_json(ArrayType(MapType(StringType,IntegerType,true),true), [{"a": 1}, {"b":2}], Some(America/Los_Angeles), false) AS from_json([{"a": 1}, {"b":2}])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('[{"a": 1}, 2]', 'array<map<string,int>>')
 -- !query analysis
-Project [from_json(ArrayType(MapType(StringType,IntegerType,true),true), [{"a": 1}, 2], Some(America/Los_Angeles)) AS from_json([{"a": 1}, 2])#x]
+Project [from_json(ArrayType(MapType(StringType,IntegerType,true),true), [{"a": 1}, 2], Some(America/Los_Angeles), false) AS from_json([{"a": 1}, 2])#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"d": "2012-12-15", "t": "2012-12-15 15:15:15"}', 'd date, t timestamp')
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), StructField(t,TimestampType,true), {"d": "2012-12-15", "t": "2012-12-15 15:15:15"}, Some(America/Los_Angeles)) AS from_json({"d": "2012-12-15", "t": "2012-12-15 15:15:15"})#x]
+Project [from_json(StructField(d,DateType,true), StructField(t,TimestampType,true), {"d": "2012-12-15", "t": "2012-12-15 15:15:15"}, Some(America/Los_Angeles), false) AS from_json({"d": "2012-12-15", "t": "2012-12-15 15:15:15"})#x]
 +- OneRowRelation
 
 
@@ -373,7 +373,7 @@ select from_json(
   'd date, t timestamp',
   map('dateFormat', 'MM/dd yyyy', 'timestampFormat', 'MM/dd yyyy HH:mm:ss'))
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), StructField(t,TimestampType,true), (dateFormat,MM/dd yyyy), (timestampFormat,MM/dd yyyy HH:mm:ss), {"d": "12/15 2012", "t": "12/15 2012 15:15:15"}, Some(America/Los_Angeles)) AS from_json({"d": "12/15 2012", "t": "12/15 2012 15:15:15"})#x]
+Project [from_json(StructField(d,DateType,true), StructField(t,TimestampType,true), (dateFormat,MM/dd yyyy), (timestampFormat,MM/dd yyyy HH:mm:ss), {"d": "12/15 2012", "t": "12/15 2012 15:15:15"}, Some(America/Los_Angeles), false) AS from_json({"d": "12/15 2012", "t": "12/15 2012 15:15:15"})#x]
 +- OneRowRelation
 
 
@@ -383,7 +383,7 @@ select from_json(
   'd date',
   map('dateFormat', 'MM-dd'))
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), (dateFormat,MM-dd), {"d": "02-29"}, Some(America/Los_Angeles)) AS from_json({"d": "02-29"})#x]
+Project [from_json(StructField(d,DateType,true), (dateFormat,MM-dd), {"d": "02-29"}, Some(America/Los_Angeles), false) AS from_json({"d": "02-29"})#x]
 +- OneRowRelation
 
 
@@ -393,7 +393,7 @@ select from_json(
   't timestamp',
   map('timestampFormat', 'MM-dd'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampType,true), (timestampFormat,MM-dd), {"t": "02-29"}, Some(America/Los_Angeles)) AS from_json({"t": "02-29"})#x]
+Project [from_json(StructField(t,TimestampType,true), (timestampFormat,MM-dd), {"t": "02-29"}, Some(America/Los_Angeles), false) AS from_json({"t": "02-29"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/keywords-enforced.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/keywords.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/keywords-enforced.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/map.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/map.sql.out
index cd8f0e043b9ae..177f73608fba9 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/map.sql.out
@@ -2,7 +2,7 @@
 -- !query
 select element_at(map(1, 'a', 2, 'b'), 5)
 -- !query analysis
-Project [element_at(map(1, a, 2, b), 5, None, false) AS element_at(map(1, a, 2, b), 5)#x]
+Project [element_at(map(1, a, 2, b), 5, None, true) AS element_at(map(1, a, 2, b), 5)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
index 5fe1b69352f57..1fa7b7513993d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/math.sql.out
@@ -394,42 +394,42 @@ Project [bround(-9223372036854775808, -1) AS bround(-9223372036854775808, -1)#xL
 -- !query
 SELECT conv('100', 2, 10)
 -- !query analysis
-Project [conv(100, 2, 10, false) AS conv(100, 2, 10)#x]
+Project [conv(100, 2, 10, true) AS conv(100, 2, 10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv(-10, 16, -10)
 -- !query analysis
-Project [conv(cast(-10 as string), 16, -10, false) AS conv(-10, 16, -10)#x]
+Project [conv(cast(-10 as string), 16, -10, true) AS conv(-10, 16, -10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('9223372036854775808', 10, 16)
 -- !query analysis
-Project [conv(9223372036854775808, 10, 16, false) AS conv(9223372036854775808, 10, 16)#x]
+Project [conv(9223372036854775808, 10, 16, true) AS conv(9223372036854775808, 10, 16)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('92233720368547758070', 10, 16)
 -- !query analysis
-Project [conv(92233720368547758070, 10, 16, false) AS conv(92233720368547758070, 10, 16)#x]
+Project [conv(92233720368547758070, 10, 16, true) AS conv(92233720368547758070, 10, 16)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('9223372036854775807', 36, 10)
 -- !query analysis
-Project [conv(9223372036854775807, 36, 10, false) AS conv(9223372036854775807, 36, 10)#x]
+Project [conv(9223372036854775807, 36, 10, true) AS conv(9223372036854775807, 36, 10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('-9223372036854775807', 36, 10)
 -- !query analysis
-Project [conv(-9223372036854775807, 36, 10, false) AS conv(-9223372036854775807, 36, 10)#x]
+Project [conv(-9223372036854775807, 36, 10, true) AS conv(-9223372036854775807, 36, 10)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
index 051ad64c6268c..2315a5f0678a0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/named-function-arguments.sql.out
@@ -149,6 +149,104 @@ Project [num#x, val#x, Spark AS Spark#x]
          +- OneRowRelation
 
 
+-- !query
+SELECT * FROM posexplode(collection => array(1, 2))
+-- !query analysis
+Project [pos#x, col#x]
++- Generate posexplode(array(1, 2)), false, [pos#x, col#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM posexplode_outer(collection => map('a', 1, 'b', 2))
+-- !query analysis
+Project [pos#x, key#x, value#x]
++- Generate posexplode(map(a, 1, b, 2)), true, [pos#x, key#x, value#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM posexplode(array(1, 2)), posexplode(array(3, 4))
+-- !query analysis
+Project [pos#x, col#x, pos#x, col#x]
++- Join Inner
+   :- Generate posexplode(array(1, 2)), false, [pos#x, col#x]
+   :  +- OneRowRelation
+   +- Generate posexplode(array(3, 4)), false, [pos#x, col#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT * FROM posexplode(array(1, 2)) AS t, LATERAL posexplode(array(3 * t.col, 4 * t.col))
+-- !query analysis
+Project [pos#x, col#x, pos#x, col#x]
++- LateralJoin lateral-subquery#x [col#x && col#x], Inner
+   :  +- Generate posexplode(array((3 * outer(col#x)), (4 * outer(col#x)))), false, [pos#x, col#x]
+   :     +- OneRowRelation
+   +- SubqueryAlias t
+      +- Generate posexplode(array(1, 2)), false, [pos#x, col#x]
+         +- OneRowRelation
+
+
+-- !query
+SELECT pos, num, val, 'Spark' FROM posexplode(map(1, 'a', 2, 'b')) AS t(pos, num, val)
+-- !query analysis
+Project [pos#x, num#x, val#x, Spark AS Spark#x]
++- SubqueryAlias t
+   +- Project [pos#x AS pos#x, key#x AS num#x, value#x AS val#x]
+      +- Generate posexplode(map(1, a, 2, b)), false, [pos#x, key#x, value#x]
+         +- OneRowRelation
+
+
+-- !query
+SELECT * FROM inline(input => array(struct(1, 'a'), struct(2, 'b')))
+-- !query analysis
+Project [col1#x, col2#x]
++- Generate inline(array(struct(col1, 1, col2, a), struct(col1, 2, col2, b))), false, [col1#x, col2#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM inline_outer(input => array(struct(1, 'a'), struct(2, 'b')))
+-- !query analysis
+Project [col1#x, col2#x]
++- Generate inline(array(struct(col1, 1, col2, a), struct(col1, 2, col2, b))), true, [col1#x, col2#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))), inline(array(struct(3, 'c'), struct(4, 'd')))
+-- !query analysis
+Project [col1#x, col2#x, col1#x, col2#x]
++- Join Inner
+   :- Generate inline(array(struct(col1, 1, col2, a), struct(col1, 2, col2, b))), false, [col1#x, col2#x]
+   :  +- OneRowRelation
+   +- Generate inline(array(struct(col1, 3, col2, c), struct(col1, 4, col2, d))), false, [col1#x, col2#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t, LATERAL inline(array(struct(3 * t.col1, 4 * t.col1)))
+-- !query analysis
+Project [col1#x, col2#x, col1#x, col2#x]
++- LateralJoin lateral-subquery#x [col1#x && col1#x], Inner
+   :  +- Generate inline(array(struct(col1, (3 * outer(col1#x)), col2, (4 * outer(col1#x))))), false, [col1#x, col2#x]
+   :     +- OneRowRelation
+   +- SubqueryAlias t
+      +- Generate inline(array(struct(col1, 1, col2, a), struct(col1, 2, col2, b))), false, [col1#x, col2#x]
+         +- OneRowRelation
+
+
+-- !query
+SELECT num, val, 'Spark' FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t(num, val)
+-- !query analysis
+Project [num#x, val#x, Spark AS Spark#x]
++- SubqueryAlias t
+   +- Project [col1#x AS num#x, col2#x AS val#x]
+      +- Generate inline(array(struct(col1, 1, col2, a), struct(col1, 2, col2, b))), false, [col1#x, col2#x]
+         +- OneRowRelation
+
+
 -- !query
 SELECT * FROM explode(collection => explode(array(1)))
 -- !query analysis
@@ -202,17 +300,21 @@ SELECT * FROM explode(collection => TABLE(v))
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
-  "sqlState" : "0A000",
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
   "messageParameters" : {
-    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1)\n+- OneRowRelation\n"
+    "inputSql" : "\"functiontablesubqueryargumentexpression()\"",
+    "inputType" : "\"STRUCT<id: BIGINT NOT NULL>\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(functiontablesubqueryargumentexpression())\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 37,
-    "stopIndex" : 44,
-    "fragment" : "TABLE(v)"
+    "startIndex" : 15,
+    "stopIndex" : 45,
+    "fragment" : "explode(collection => TABLE(v))"
   } ]
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
similarity index 91%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
index 57108c4582f45..4db56d6c70561 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/array.sql.out
@@ -194,25 +194,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select sort_array(array('b', 'd'), cast(NULL as boolean))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
-    "inputType" : "\"BOOLEAN\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
-  } ]
-}
+Project [sort_array(array(b, d), cast(null as boolean)) AS sort_array(array(b, d), CAST(NULL AS BOOLEAN))#x]
++- OneRowRelation
 
 
 -- !query
@@ -229,7 +212,7 @@ select
   size(timestamp_array)
 from primitive_arrays
 -- !query analysis
-Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_array#x, false) AS size(tinyint_array)#x, size(smallint_array#x, false) AS size(smallint_array)#x, size(int_array#x, false) AS size(int_array)#x, size(bigint_array#x, false) AS size(bigint_array)#x, size(decimal_array#x, false) AS size(decimal_array)#x, size(double_array#x, false) AS size(double_array)#x, size(float_array#x, false) AS size(float_array)#x, size(date_array#x, false) AS size(date_array)#x, size(timestamp_array#x, false) AS size(timestamp_array)#x]
+Project [size(boolean_array#x, true) AS size(boolean_array)#x, size(tinyint_array#x, true) AS size(tinyint_array)#x, size(smallint_array#x, true) AS size(smallint_array)#x, size(int_array#x, true) AS size(int_array)#x, size(bigint_array#x, true) AS size(bigint_array)#x, size(decimal_array#x, true) AS size(decimal_array)#x, size(double_array#x, true) AS size(double_array)#x, size(float_array#x, true) AS size(float_array)#x, size(date_array#x, true) AS size(date_array)#x, size(timestamp_array#x, true) AS size(timestamp_array)#x]
 +- SubqueryAlias primitive_arrays
    +- View (`primitive_arrays`, [boolean_array#x, tinyint_array#x, smallint_array#x, int_array#x, bigint_array#x, decimal_array#x, double_array#x, float_array#x, date_array#x, timestamp_array#x])
       +- Project [cast(boolean_array#x as array<boolean>) AS boolean_array#x, cast(tinyint_array#x as array<tinyint>) AS tinyint_array#x, cast(smallint_array#x as array<smallint>) AS smallint_array#x, cast(int_array#x as array<int>) AS int_array#x, cast(bigint_array#x as array<bigint>) AS bigint_array#x, cast(decimal_array#x as array<decimal(19,0)>) AS decimal_array#x, cast(double_array#x as array<double>) AS double_array#x, cast(float_array#x as array<float>) AS float_array#x, cast(date_array#x as array<date>) AS date_array#x, cast(timestamp_array#x as array<timestamp>) AS timestamp_array#x]
@@ -241,70 +224,70 @@ Project [size(boolean_array#x, false) AS size(boolean_array)#x, size(tinyint_arr
 -- !query
 select element_at(array(1, 2, 3), 5)
 -- !query analysis
-Project [element_at(array(1, 2, 3), 5, None, true) AS element_at(array(1, 2, 3), 5)#x]
+Project [element_at(array(1, 2, 3), 5, None, false) AS element_at(array(1, 2, 3), 5)#x]
 +- OneRowRelation
 
 
 -- !query
 select element_at(array(1, 2, 3), -5)
 -- !query analysis
-Project [element_at(array(1, 2, 3), -5, None, true) AS element_at(array(1, 2, 3), -5)#x]
+Project [element_at(array(1, 2, 3), -5, None, false) AS element_at(array(1, 2, 3), -5)#x]
 +- OneRowRelation
 
 
 -- !query
 select element_at(array(1, 2, 3), 0)
 -- !query analysis
-Project [element_at(array(1, 2, 3), 0, None, true) AS element_at(array(1, 2, 3), 0)#x]
+Project [element_at(array(1, 2, 3), 0, None, false) AS element_at(array(1, 2, 3), 0)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(4, '123', '456')
 -- !query analysis
-Project [elt(4, 123, 456, true) AS elt(4, 123, 456)#x]
+Project [elt(4, 123, 456, false) AS elt(4, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(0, '123', '456')
 -- !query analysis
-Project [elt(0, 123, 456, true) AS elt(0, 123, 456)#x]
+Project [elt(0, 123, 456, false) AS elt(0, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(-1, '123', '456')
 -- !query analysis
-Project [elt(-1, 123, 456, true) AS elt(-1, 123, 456)#x]
+Project [elt(-1, 123, 456, false) AS elt(-1, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(null, '123', '456')
 -- !query analysis
-Project [elt(cast(null as int), 123, 456, true) AS elt(NULL, 123, 456)#x]
+Project [elt(cast(null as int), 123, 456, false) AS elt(NULL, 123, 456)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(null, '123', null)
 -- !query analysis
-Project [elt(cast(null as int), 123, cast(null as string), true) AS elt(NULL, 123, NULL)#x]
+Project [elt(cast(null as int), 123, cast(null as string), false) AS elt(NULL, 123, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(1, '123', null)
 -- !query analysis
-Project [elt(1, 123, cast(null as string), true) AS elt(1, 123, NULL)#x]
+Project [elt(1, 123, cast(null as string), false) AS elt(1, 123, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 select elt(2, '123', null)
 -- !query analysis
-Project [elt(2, 123, cast(null as string), true) AS elt(2, 123, NULL)#x]
+Project [elt(2, 123, cast(null as string), false) AS elt(2, 123, NULL)#x]
 +- OneRowRelation
 
 
@@ -377,21 +360,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), false) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2), true) AS size(arrays_zip(array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
 -- !query
 select size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(), array(1, 2, 3), array(4), array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
 -- !query
 select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
 -- !query analysis
-Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), false) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
+Project [size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10), 0, 1, 2, 3), true) AS size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out
similarity index 82%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out
index 643dfd3771ffe..e0687b564d3d1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/cast.sql.out
@@ -205,193 +205,57 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
 -- !query
 SELECT HEX(CAST(CAST(123 AS byte) AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 44,
-    "fragment" : "CAST(CAST(123 AS byte) AS binary)"
-  } ]
-}
+Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 45,
-    "fragment" : "CAST(CAST(-123 AS byte) AS binary)"
-  } ]
-}
+Project [hex(cast(cast(-123 as tinyint) as binary)) AS hex(CAST(CAST(-123 AS TINYINT) AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(123S AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"SMALLINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(123S AS binary)"
-  } ]
-}
+Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(-123S AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"SMALLINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 32,
-    "fragment" : "CAST(-123S AS binary)"
-  } ]
-}
+Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(123 AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"INT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 30,
-    "fragment" : "CAST(123 AS binary)"
-  } ]
-}
+Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(-123 AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"INT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(-123 AS binary)"
-  } ]
-}
+Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(123L AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"BIGINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(123L AS binary)"
-  } ]
-}
+Project [hex(cast(123 as binary)) AS hex(CAST(123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
 SELECT HEX(CAST(-123L AS binary))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"BIGINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 32,
-    "fragment" : "CAST(-123L AS binary)"
-  } ]
-}
+Project [hex(cast(-123 as binary)) AS hex(CAST(-123 AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
@@ -940,25 +804,8 @@ Project [hex(cast(abc as binary)) AS hex(CAST(abc AS BINARY))#x]
 -- !query
 SELECT HEX((123 :: byte) :: binary)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 34,
-    "fragment" : "(123 :: byte) :: binary"
-  } ]
-}
+Project [hex(cast(cast(123 as tinyint) as binary)) AS hex(CAST(CAST(123 AS TINYINT) AS BINARY))#x]
++- OneRowRelation
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/conditional-functions.sql.out
similarity index 76%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/conditional-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/conditional-functions.sql.out
index 38640878316d4..7df6556cddd4e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/conditional-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/conditional-functions.sql.out
@@ -98,6 +98,43 @@ Project [CASE WHEN (1 > 2) THEN (cast(1 as double) / cast(0 as double)) ELSE cas
 +- OneRowRelation
 
 
+-- !query
+SELECT nullifzero(0),
+  nullifzero(cast(0 as tinyint)),
+  nullifzero(cast(0 as bigint)),
+  nullifzero('0'),
+  nullifzero(0.0),
+  nullifzero(1),
+  nullifzero(null)
+-- !query analysis
+Project [nullifzero(0) AS nullifzero(0)#x, nullifzero(cast(0 as tinyint)) AS nullifzero(CAST(0 AS TINYINT))#x, nullifzero(cast(0 as bigint)) AS nullifzero(CAST(0 AS BIGINT))#xL, nullifzero(0) AS nullifzero(0)#x, nullifzero(0.0) AS nullifzero(0.0)#x, nullifzero(1) AS nullifzero(1)#x, nullifzero(null) AS nullifzero(NULL)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT nullifzero('abc')
+-- !query analysis
+Project [nullifzero(abc) AS nullifzero(abc)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT zeroifnull(null),
+  zeroifnull(1),
+  zeroifnull(cast(1 as tinyint)),
+  zeroifnull(cast(1 as bigint))
+-- !query analysis
+Project [zeroifnull(null) AS zeroifnull(NULL)#x, zeroifnull(1) AS zeroifnull(1)#x, zeroifnull(cast(1 as tinyint)) AS zeroifnull(CAST(1 AS TINYINT))#x, zeroifnull(cast(1 as bigint)) AS zeroifnull(CAST(1 AS BIGINT))#xL]
++- OneRowRelation
+
+
+-- !query
+SELECT zeroifnull('abc')
+-- !query analysis
+Project [zeroifnull(abc) AS zeroifnull(abc)#x]
++- OneRowRelation
+
+
 -- !query
 DROP TABLE conditional_t
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/date.sql.out
similarity index 84%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/date.sql.out
index fd927b99c6456..88c7d7b4e7d72 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/date.sql.out
@@ -37,21 +37,21 @@ org.apache.spark.sql.catalyst.parser.ParseException
 -- !query
 select make_date(2019, 1, 1), make_date(12, 12, 12)
 -- !query analysis
-Project [make_date(2019, 1, 1, true) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, true) AS make_date(12, 12, 12)#x]
+Project [make_date(2019, 1, 1, false) AS make_date(2019, 1, 1)#x, make_date(12, 12, 12, false) AS make_date(12, 12, 12)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 13, 1)
 -- !query analysis
-Project [make_date(2000, 13, 1, true) AS make_date(2000, 13, 1)#x]
+Project [make_date(2000, 13, 1, false) AS make_date(2000, 13, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query analysis
-Project [make_date(2000, 1, 33, true) AS make_date(2000, 1, 33)#x]
+Project [make_date(2000, 1, 33, false) AS make_date(2000, 1, 33)#x]
 +- OneRowRelation
 
 
@@ -148,21 +148,21 @@ select UNIX_DATE(DATE('1970-01-01')), UNIX_DATE(DATE('2020-12-04')), UNIX_DATE(n
 -- !query
 select to_date(null), to_date('2016-12-31'), to_date('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_date(cast(null as string), None, Some(America/Los_Angeles), true) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), true) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), true) AS to_date(2016-12-31, yyyy-MM-dd)#x]
+Project [to_date(cast(null as string), None, Some(America/Los_Angeles), false) AS to_date(NULL)#x, to_date(2016-12-31, None, Some(America/Los_Angeles), false) AS to_date(2016-12-31)#x, to_date(2016-12-31, Some(yyyy-MM-dd), Some(America/Los_Angeles), false) AS to_date(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("16", "dd")
 -- !query analysis
-Project [to_date(16, Some(dd), Some(America/Los_Angeles), true) AS to_date(16, dd)#x]
+Project [to_date(16, Some(dd), Some(America/Los_Angeles), false) AS to_date(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query analysis
-Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), true) AS to_date(02-29, MM-dd)#x]
+Project [to_date(02-29, Some(MM-dd), Some(America/Los_Angeles), false) AS to_date(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
@@ -201,21 +201,21 @@ select dayOfYear('1500-01-01'), dayOfYear('1582-10-15 13:10:15'), dayOfYear(time
 -- !query
 select next_day("2015-07-23", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), Mon, true) AS next_day(2015-07-23, Mon)#x]
+Project [next_day(cast(2015-07-23 as date), Mon, false) AS next_day(2015-07-23, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query analysis
-Project [next_day(cast(2015-07-23 as date), xx, true) AS next_day(2015-07-23, xx)#x]
+Project [next_day(cast(2015-07-23 as date), xx, false) AS next_day(2015-07-23, xx)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(2015-07-23 12:12:12, Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(2015-07-23 12:12:12, Mon)#x]
 +- OneRowRelation
 
 
@@ -228,28 +228,28 @@ select next_day(timestamp_ltz"2015-07-23 12:12:12", "Mon")
 -- !query
 select next_day(timestamp_ntz"2015-07-23 12:12:12", "Mon")
 -- !query analysis
-Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, true) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
+Project [next_day(cast(2015-07-23 12:12:12 as date), Mon, false) AS next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day("xx", "Mon")
 -- !query analysis
-Project [next_day(cast(xx as date), Mon, true) AS next_day(xx, Mon)#x]
+Project [next_day(cast(xx as date), Mon, false) AS next_day(xx, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "Mon")
 -- !query analysis
-Project [next_day(cast(null as date), Mon, true) AS next_day(NULL, Mon)#x]
+Project [next_day(cast(null as date), Mon, false) AS next_day(NULL, Mon)#x]
 +- OneRowRelation
 
 
 -- !query
 select next_day(null, "xx")
 -- !query analysis
-Project [next_day(cast(null as date), xx, true) AS next_day(NULL, xx)#x]
+Project [next_day(cast(null as date), xx, false) AS next_day(NULL, xx)#x]
 +- OneRowRelation
 
 
@@ -355,15 +355,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select date_add('2011-11-11', '1')
 -- !query analysis
-Project [date_add(cast(2011-11-11 as date), cast(1 as int)) AS date_add(2011-11-11, 1)#x]
+Project [date_add(cast(2011-11-11 as date), 1) AS date_add(2011-11-11, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select date_add('2011-11-11', '1.2')
 -- !query analysis
-Project [date_add(cast(2011-11-11 as date), cast(1.2 as int)) AS date_add(2011-11-11, 1.2)#x]
-+- OneRowRelation
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_add"
+  }
+}
 
 
 -- !query
@@ -499,7 +505,14 @@ select date_sub(date'2011-11-11', '1')
 -- !query
 select date_sub(date'2011-11-11', '1.2')
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "functionName" : "date_sub"
+  }
+}
 
 
 -- !query
@@ -530,23 +543,49 @@ Project [date_sub(cast(2011-11-11 12:12:12 as date), 1) AS date_sub(TIMESTAMP_NT
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query analysis
-Project [date_add(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_add(2011-11-11, int_str)#x]
-+- SubqueryAlias date_view
-   +- View (`date_view`, [date_str#x, int_str#x])
-      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
-         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_add('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query analysis
-Project [date_sub(cast(2011-11-11 as date), cast(int_str#x as int)) AS date_sub(2011-11-11, int_str)#x]
-+- SubqueryAlias date_view
-   +- View (`date_view`, [date_str#x, int_str#x])
-      +- Project [cast(date_str#x as string) AS date_str#x, cast(int_str#x as string) AS int_str#x]
-         +- Project [2011-11-11 AS date_str#x, 1 AS int_str#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_sub('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -622,7 +661,25 @@ select date '2001-10-01' - date '2001-09-28'
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2001-09-28\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "date '2001-10-01' - '2001-09-28'"
+  } ]
+}
 
 
 -- !query
@@ -652,7 +709,25 @@ select date_str - date '2001-09-28' from date_view
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"date_str\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "date '2001-09-28' - date_str"
+  } ]
+}
 
 
 -- !query
@@ -664,7 +739,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DATE\"",
+    "inputType" : "\"DOUBLE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -687,11 +762,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"DATE '2011-11-11'\"",
-    "inputType" : "\"DATE\"",
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -729,14 +804,14 @@ select date '2012-01-01' - interval '2-2' year to month,
 -- !query
 select to_date('26/October/2015', 'dd/MMMMM/yyyy')
 -- !query analysis
-Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), true) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
+Project [to_date(26/October/2015, Some(dd/MMMMM/yyyy), Some(America/Los_Angeles), false) AS to_date(26/October/2015, dd/MMMMM/yyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"d":"26/October/2015"}', 'd Date', map('dateFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"d":"26/October/2015"})#x]
+Project [from_json(StructField(d,DateType,true), (dateFormat,dd/MMMMM/yyyy), {"d":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"d":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-parsing-invalid.sql.out
similarity index 71%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-parsing-invalid.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-parsing-invalid.sql.out
index 74146ab17a4d4..ad4a2feb9661c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-parsing-invalid.sql.out
@@ -2,126 +2,126 @@
 -- !query
 select to_timestamp('294248', 'y')
 -- !query analysis
-Project [to_timestamp(294248, Some(y), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(294248, y)#x]
+Project [to_timestamp(294248, Some(y), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(294248, y)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1', 'yy')
 -- !query analysis
-Project [to_timestamp(1, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, yy)#x]
+Project [to_timestamp(1, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('-12', 'yy')
 -- !query analysis
-Project [to_timestamp(-12, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(-12, yy)#x]
+Project [to_timestamp(-12, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(-12, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('123', 'yy')
 -- !query analysis
-Project [to_timestamp(123, Some(yy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(123, yy)#x]
+Project [to_timestamp(123, Some(yy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(123, yy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1', 'yyy')
 -- !query analysis
-Project [to_timestamp(1, Some(yyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1, yyy)#x]
+Project [to_timestamp(1, Some(yyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1, yyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('1234567', 'yyyyyyy')
 -- !query analysis
-Project [to_timestamp(1234567, Some(yyyyyyy), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1234567, yyyyyyy)#x]
+Project [to_timestamp(1234567, Some(yyyyyyy), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1234567, yyyyyyy)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('366', 'D')
 -- !query analysis
-Project [to_timestamp(366, Some(D), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(366, D)#x]
+Project [to_timestamp(366, Some(D), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(366, D)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'DD')
 -- !query analysis
-Project [to_timestamp(9, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, DD)#x]
+Project [to_timestamp(9, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('366', 'DD')
 -- !query analysis
-Project [to_timestamp(366, Some(DD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(366, DD)#x]
+Project [to_timestamp(366, Some(DD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(366, DD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('9', 'DDD')
 -- !query analysis
-Project [to_timestamp(9, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(9, DDD)#x]
+Project [to_timestamp(9, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(9, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('99', 'DDD')
 -- !query analysis
-Project [to_timestamp(99, Some(DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(99, DDD)#x]
+Project [to_timestamp(99, Some(DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(99, DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('30-365', 'dd-DDD')
 -- !query analysis
-Project [to_timestamp(30-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(30-365, dd-DDD)#x]
+Project [to_timestamp(30-365, Some(dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(30-365, dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('11-365', 'MM-DDD')
 -- !query analysis
-Project [to_timestamp(11-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(11-365, MM-DDD)#x]
+Project [to_timestamp(11-365, Some(MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(11-365, MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-366', 'yyyy-DDD')
 -- !query analysis
-Project [to_timestamp(2019-366, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-366, yyyy-DDD)#x]
+Project [to_timestamp(2019-366, Some(yyyy-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-366, yyyy-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('12-30-365', 'MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(12-30-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12-30-365, MM-dd-DDD)#x]
+Project [to_timestamp(12-30-365, Some(MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12-30-365, MM-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-01-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-01-365, yyyy-dd-DDD)#x]
+Project [to_timestamp(2020-01-365, Some(yyyy-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-01-365, yyyy-dd-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
 -- !query analysis
-Project [to_timestamp(2020-10-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-10-350, yyyy-MM-DDD)#x]
+Project [to_timestamp(2020-10-350, Some(yyyy-MM-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-10-350, yyyy-MM-DDD)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
 -- !query analysis
-Project [to_timestamp(2020-11-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD)#x]
+Project [to_timestamp(2020-11-31-366, Some(yyyy-MM-dd-DDD), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD)#x]
 +- OneRowRelation
 
 
@@ -135,56 +135,56 @@ Project [from_csv(StructField(date,DateType,true), (dateFormat,yyyy-DDD), 2018-3
 -- !query
 select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_date(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), true) AS to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_date(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), false) AS to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_date(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), true) AS to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_date(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), Some(America/Los_Angeles), false) AS to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_timestamp(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_timestamp(2020-01-27T20:06:11.847, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_timestamp(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
+Project [to_timestamp(Unparseable, Some(yyyy-MM-dd HH:mm:ss.SSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
 -- !query
 select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query analysis
-Project [to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), true) AS to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
+Project [to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS, Some(America/Los_Angeles), false) AS to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS)#xL]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-special.sql.out
similarity index 56%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-special.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-special.sql.out
index 6768297fd8116..01d1f2c40a4a6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/datetime-special.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/datetime-special.sql.out
@@ -8,7 +8,7 @@ select date'999999-03-18', date'-0001-1-28', date'0015'
 -- !query
 select make_date(999999, 3, 18), make_date(-1, 1, 28)
 -- !query analysis
-Project [make_date(999999, 3, 18, true) AS make_date(999999, 3, 18)#x, make_date(-1, 1, 28, true) AS make_date(-1, 1, 28)#x]
+Project [make_date(999999, 3, 18, false) AS make_date(999999, 3, 18)#x, make_date(-1, 1, 28, false) AS make_date(-1, 1, 28)#x]
 +- OneRowRelation
 
 
@@ -21,5 +21,5 @@ select timestamp'-1969-12-31 16:00:00', timestamp'-0015-03-18 16:00:00', timesta
 -- !query
 select make_timestamp(-1969, 12, 31, 16, 0, 0.0), make_timestamp(-15, 3, 18, 16, 0, 0.0), make_timestamp(99999, 3, 18, 12, 3, 17.0)
 -- !query analysis
-Project [make_timestamp(-1969, 12, 31, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(-1969, 12, 31, 16, 0, 0.0)#x, make_timestamp(-15, 3, 18, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(-15, 3, 18, 16, 0, 0.0)#x, make_timestamp(99999, 3, 18, 12, 3, cast(17.0 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(99999, 3, 18, 12, 3, 17.0)#x]
+Project [make_timestamp(-1969, 12, 31, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(-1969, 12, 31, 16, 0, 0.0)#x, make_timestamp(-15, 3, 18, 16, 0, cast(0.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(-15, 3, 18, 16, 0, 0.0)#x, make_timestamp(99999, 3, 18, 12, 3, cast(17.0 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(99999, 3, 18, 12, 3, 17.0)#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/decimalArithmeticOperations.sql.out
new file mode 100644
index 0000000000000..d75f4d41bd425
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/decimalArithmeticOperations.sql.out
@@ -0,0 +1,447 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
+-- !query analysis
+CreateViewCommand `t`, SELECT 1.0 as a, 0.0 as b, false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [1.0 AS a#x, 0.0 AS b#x]
+      +- OneRowRelation
+
+
+-- !query
+select a / b from t
+-- !query analysis
+Project [(a#x / b#x) AS (a / b)#x]
++- SubqueryAlias t
+   +- View (`t`, [a#x, b#x])
+      +- Project [cast(a#x as decimal(2,1)) AS a#x, cast(b#x as decimal(1,1)) AS b#x]
+         +- Project [1.0 AS a#x, 0.0 AS b#x]
+            +- OneRowRelation
+
+
+-- !query
+select a % b from t
+-- !query analysis
+Project [(a#x % b#x) AS (a % b)#x]
++- SubqueryAlias t
+   +- View (`t`, [a#x, b#x])
+      +- Project [cast(a#x as decimal(2,1)) AS a#x, cast(b#x as decimal(1,1)) AS b#x]
+         +- Project [1.0 AS a#x, 0.0 AS b#x]
+            +- OneRowRelation
+
+
+-- !query
+select pmod(a, b) from t
+-- !query analysis
+Project [pmod(a#x, b#x) AS pmod(a, b)#x]
++- SubqueryAlias t
+   +- View (`t`, [a#x, b#x])
+      +- Project [cast(a#x as decimal(2,1)) AS a#x, cast(b#x as decimal(1,1)) AS b#x]
+         +- Project [1.0 AS a#x, 0.0 AS b#x]
+            +- OneRowRelation
+
+
+-- !query
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`decimals_test`, false
+
+
+-- !query
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/decimals_test, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/decimals_test], Append, `spark_catalog`.`default`.`decimals_test`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/decimals_test), [id, a, b]
++- Project [cast(col1#x as int) AS id#x, cast(col2#x as decimal(38,18)) AS a#x, cast(col3#x as decimal(38,18)) AS b#x]
+   +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+select id, a+b, a-b, a*b, a/b from decimals_test order by id
+-- !query analysis
+Sort [id#x ASC NULLS FIRST], true
++- Project [id#x, (a#x + b#x) AS (a + b)#x, (a#x - b#x) AS (a - b)#x, (a#x * b#x) AS (a * b)#x, (a#x / b#x) AS (a / b)#x]
+   +- SubqueryAlias spark_catalog.default.decimals_test
+      +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
+
+
+-- !query
+select id, a*10, b/10 from decimals_test order by id
+-- !query analysis
+Sort [id#x ASC NULLS FIRST], true
++- Project [id#x, (a#x * cast(10 as decimal(2,0))) AS (a * 10)#x, (b#x / cast(10 as decimal(2,0))) AS (b / 10)#x]
+   +- SubqueryAlias spark_catalog.default.decimals_test
+      +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
+
+
+-- !query
+select 10.3 * 3.0
+-- !query analysis
+Project [(10.3 * 3.0) AS (10.3 * 3.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.3000 * 3.0
+-- !query analysis
+Project [(10.3000 * 3.0) AS (10.3000 * 3.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.30000 * 30.0
+-- !query analysis
+Project [(10.30000 * 30.0) AS (10.30000 * 30.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.300000000000000000 * 3.000000000000000000
+-- !query analysis
+Project [(10.300000000000000000 * 3.000000000000000000) AS (10.300000000000000000 * 3.000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.300000000000000000 * 3.0000000000000000000
+-- !query analysis
+Project [(10.300000000000000000 * 3.0000000000000000000) AS (10.300000000000000000 * 3.0000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 2.35E10 * 1.0
+-- !query analysis
+Project [(2.35E10 * cast(1.0 as double)) AS (2.35E10 * 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query analysis
+Project [((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000) AS ((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query analysis
+Project [((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000) AS ((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query analysis
+Project [(12345678901234567890.0 * 12345678901234567890.0) AS (12345678901234567890.0 * 12345678901234567890.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1e35BD / 0.1
+-- !query analysis
+Project [(100000000000000000000000000000000000 / 0.1) AS (100000000000000000000000000000000000 / 0.1)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query analysis
+Project [(1234567890123456789000000000000 * 12345678901234567890000000) AS (1234567890123456789000000000000 * 12345678901234567890000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
+-- !query analysis
+Project [(12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345) AS (12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345)#x]
++- OneRowRelation
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query analysis
+Project [(123456789123456789.1234567890 * 1.123456789123456789) AS (123456789123456789.1234567890 * 1.123456789123456789)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678912345.123456789123 / 0.000000012345678
+-- !query analysis
+Project [(12345678912345.123456789123 / 1.2345678E-8) AS (12345678912345.123456789123 / 1.2345678E-8)#x]
++- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) + CAST(90 AS DECIMAL(3, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- OneRowRelation
++- Project [(cast(10 as decimal(3,1)) + cast(90 as decimal(3,1))) AS (CAST(10 AS DECIMAL(3,1)) + CAST(90 AS DECIMAL(3,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) - CAST(-90 AS DECIMAL(3, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- OneRowRelation
++- Project [(cast(10 as decimal(3,1)) - cast(-90 as decimal(3,1))) AS (CAST(10 AS DECIMAL(3,1)) - CAST(-90 AS DECIMAL(3,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) * CAST(10 AS DECIMAL(3, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(CAST(20 AS DECIMAL(4,1))#x as decimal(7,2)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:     +- OneRowRelation
++- Project [(cast(10 as decimal(3,1)) * cast(10 as decimal(3,1))) AS (CAST(10 AS DECIMAL(3,1)) * CAST(10 AS DECIMAL(3,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) / CAST(10 AS DECIMAL(3, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(CAST(20 AS DECIMAL(4,1))#x as decimal(9,6)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:     +- OneRowRelation
++- Project [(cast(10 as decimal(3,1)) / cast(10 as decimal(3,1))) AS (CAST(10 AS DECIMAL(3,1)) / CAST(10 AS DECIMAL(3,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) % CAST(3 AS DECIMAL(5, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(CAST(20 AS DECIMAL(4,1))#x as decimal(6,2)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:     +- OneRowRelation
++- Project [(cast(10 as decimal(10,2)) % cast(3 as decimal(5,1))) AS (CAST(10 AS DECIMAL(10,2)) % CAST(3 AS DECIMAL(5,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT pmod(CAST(10 AS DECIMAL(10, 2)), CAST(3 AS DECIMAL(5, 1)))
+-- !query analysis
+Union false, false
+:- Project [cast(CAST(20 AS DECIMAL(4,1))#x as decimal(6,2)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:     +- OneRowRelation
++- Project [pmod(cast(10 as decimal(10,2)), cast(3 as decimal(5,1))) AS pmod(CAST(10 AS DECIMAL(10,2)), CAST(3 AS DECIMAL(5,1)))#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1))
+-- !query analysis
+Union false, false
+:- Project [cast(CAST(20 AS DECIMAL(4,1))#x as decimal(21,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:  +- Project [cast(20 as decimal(4,1)) AS CAST(20 AS DECIMAL(4,1))#x]
+:     +- OneRowRelation
++- Project [cast((CAST(10 AS DECIMAL(10,2)) div CAST(3 AS DECIMAL(5,1)))#xL as decimal(21,1)) AS (CAST(10 AS DECIMAL(10,2)) div CAST(3 AS DECIMAL(5,1)))#x]
+   +- Project [(cast(10 as decimal(10,2)) div cast(3 as decimal(5,1))) AS (CAST(10 AS DECIMAL(10,2)) div CAST(3 AS DECIMAL(5,1)))#xL]
+      +- OneRowRelation
+
+
+-- !query
+set spark.sql.decimalOperations.allowPrecisionLoss=false
+-- !query analysis
+SetCommand (spark.sql.decimalOperations.allowPrecisionLoss,Some(false))
+
+
+-- !query
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id
+-- !query analysis
+Sort [id#x ASC NULLS FIRST], true
++- Repartition 1, false
+   +- Project [id#x, (a#x + b#x) AS (a + b)#x, (a#x - b#x) AS (a - b)#x, (a#x * b#x) AS (a * b)#x, (a#x / b#x) AS (a / b)#x]
+      +- SubqueryAlias spark_catalog.default.decimals_test
+         +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
+
+
+-- !query
+select id, a*10, b/10 from decimals_test order by id
+-- !query analysis
+Sort [id#x ASC NULLS FIRST], true
++- Project [id#x, (a#x * cast(10 as decimal(2,0))) AS (a * 10)#x, (b#x / cast(10 as decimal(2,0))) AS (b / 10)#x]
+   +- SubqueryAlias spark_catalog.default.decimals_test
+      +- Relation spark_catalog.default.decimals_test[id#x,a#x,b#x] parquet
+
+
+-- !query
+select 10.3 * 3.0
+-- !query analysis
+Project [(10.3 * 3.0) AS (10.3 * 3.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.3000 * 3.0
+-- !query analysis
+Project [(10.3000 * 3.0) AS (10.3000 * 3.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.30000 * 30.0
+-- !query analysis
+Project [(10.30000 * 30.0) AS (10.30000 * 30.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.300000000000000000 * 3.000000000000000000
+-- !query analysis
+Project [(10.300000000000000000 * 3.000000000000000000) AS (10.300000000000000000 * 3.000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 10.300000000000000000 * 3.0000000000000000000
+-- !query analysis
+Project [(10.300000000000000000 * 3.0000000000000000000) AS (10.300000000000000000 * 3.0000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 2.35E10 * 1.0
+-- !query analysis
+Project [(2.35E10 * cast(1.0 as double)) AS (2.35E10 * 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query analysis
+Project [((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000) AS ((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query analysis
+Project [((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000) AS ((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query analysis
+Project [(12345678901234567890.0 * 12345678901234567890.0) AS (12345678901234567890.0 * 12345678901234567890.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1e35BD / 0.1
+-- !query analysis
+Project [(100000000000000000000000000000000000 / 0.1) AS (100000000000000000000000000000000000 / 0.1)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query analysis
+Project [(1234567890123456789000000000000 * 12345678901234567890000000) AS (1234567890123456789000000000000 * 12345678901234567890000000)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
+-- !query analysis
+Project [(12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345) AS (12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345)#x]
++- OneRowRelation
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query analysis
+Project [(123456789123456789.1234567890 * 1.123456789123456789) AS (123456789123456789.1234567890 * 1.123456789123456789)#x]
++- OneRowRelation
+
+
+-- !query
+select 12345678912345.123456789123 / 0.000000012345678
+-- !query analysis
+Project [(12345678912345.123456789123 / 1.2345678E-8) AS (12345678912345.123456789123 / 1.2345678E-8)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query analysis
+Project [(1012345678901234567890123456789012345.6 / 0.1) AS (1012345678901234567890123456789012345.6 / 0.1)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query analysis
+Project [(101234567890123456789012345678901234.56 / 1.0) AS (101234567890123456789012345678901234.56 / 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query analysis
+Project [(10123456789012345678901234567890123.456 / 1.0) AS (10123456789012345678901234567890123.456 / 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query analysis
+Project [(1012345678901234567890123456789012.3456 / 1.0) AS (1012345678901234567890123456789012.3456 / 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query analysis
+Project [(101234567890123456789012345678901.23456 / 1.0) AS (101234567890123456789012345678901.23456 / 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query analysis
+Project [(10123456789012345678901234567890.123456 / 1.0) AS (10123456789012345678901234567890.123456 / 1.0)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query analysis
+Project [(10123456789012345678901234567890.123456 / 0.1) AS (10123456789012345678901234567890.123456 / 0.1)#x]
++- OneRowRelation
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query analysis
+Project [(10123456789012345678901234567890.123456 / 10.0) AS (10123456789012345678901234567890.123456 / 10.0)#x]
++- OneRowRelation
+
+
+-- !query
+drop table decimals_test
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.decimals_test
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/double-quoted-identifiers.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/double-quoted-identifiers-disabled.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/double-quoted-identifiers.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/higher-order-functions.sql.out
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/higher-order-functions.sql.out
index c06d1e5534aed..1281b19eb2f86 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/higher-order-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/higher-order-functions.sql.out
@@ -171,7 +171,7 @@ Project [aggregate(ys#x, named_struct(sum, 0, n, 0), lambdafunction(named_struct
 -- !query
 select transform(zs, z -> aggregate(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
 -- !query analysis
-Project [transform(zs#x, lambdafunction(aggregate(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, false)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
+Project [transform(zs#x, lambdafunction(aggregate(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, true)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
@@ -211,7 +211,7 @@ Project [reduce(ys#x, named_struct(sum, 0, n, 0), lambdafunction(named_struct(co
 -- !query
 select transform(zs, z -> reduce(z, 1, (acc, val) -> acc * val * size(z))) as v from nested
 -- !query analysis
-Project [transform(zs#x, lambdafunction(reduce(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, false)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
+Project [transform(zs#x, lambdafunction(reduce(lambda z#x, 1, lambdafunction(((lambda acc#x * lambda val#x) * size(lambda z#x, true)), lambda acc#x, lambda val#x, false), lambdafunction(lambda id#x, lambda id#x, false)), lambda z#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
@@ -247,7 +247,7 @@ Project [exists(cast(null as array<int>), lambdafunction((lambda y#x > 30), lamb
 -- !query
 select zip_with(ys, zs, (a, b) -> a + size(b)) as v from nested
 -- !query analysis
-Project [zip_with(ys#x, zs#x, lambdafunction((lambda a#x + size(lambda b#x, false)), lambda a#x, lambda b#x, false)) AS v#x]
+Project [zip_with(ys#x, zs#x, lambdafunction((lambda a#x + size(lambda b#x, true)), lambda a#x, lambda b#x, false)) AS v#x]
 +- SubqueryAlias nested
    +- View (`nested`, [x#x, ys#x, zs#x])
       +- Project [cast(x#x as int) AS x#x, cast(ys#x as array<int>) AS ys#x, cast(zs#x as array<array<int>>) AS zs#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/interval.sql.out
similarity index 93%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/interval.sql.out
index 12756576ded9b..c0196bbe118ef 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/interval.sql.out
@@ -605,63 +605,63 @@ Project [INTERVAL '59' SECOND AS INTERVAL '59' SECOND#x]
 -- !query
 select make_interval(1)
 -- !query analysis
-Project [make_interval(1, 0, 0, 0, 0, 0, 0.000000, true) AS make_interval(1, 0, 0, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 0, 0, 0, 0, 0, 0.000000, false) AS make_interval(1, 0, 0, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2)
 -- !query analysis
-Project [make_interval(1, 2, 0, 0, 0, 0, 0.000000, true) AS make_interval(1, 2, 0, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 0, 0, 0, 0, 0.000000, false) AS make_interval(1, 2, 0, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3)
 -- !query analysis
-Project [make_interval(1, 2, 3, 0, 0, 0, 0.000000, true) AS make_interval(1, 2, 3, 0, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 0, 0, 0, 0.000000, false) AS make_interval(1, 2, 3, 0, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 0, 0, 0.000000, true) AS make_interval(1, 2, 3, 4, 0, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 0, 0, 0.000000, false) AS make_interval(1, 2, 3, 4, 0, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 0, 0.000000, true) AS make_interval(1, 2, 3, 4, 5, 0, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 5, 0, 0.000000, false) AS make_interval(1, 2, 3, 4, 5, 0, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 6, 0.000000, true) AS make_interval(1, 2, 3, 4, 5, 6, 0.000000)#x]
+Project [make_interval(1, 2, 3, 4, 5, 6, 0.000000, false) AS make_interval(1, 2, 3, 4, 5, 6, 0.000000)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 5, 6, 7.008009)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 5, 6, cast(7.008009 as decimal(18,6)), true) AS make_interval(1, 2, 3, 4, 5, 6, 7.008009)#x]
+Project [make_interval(1, 2, 3, 4, 5, 6, cast(7.008009 as decimal(18,6)), false) AS make_interval(1, 2, 3, 4, 5, 6, 7.008009)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)
 -- !query analysis
-Project [make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456, true) AS make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)#x]
+Project [make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456, false) AS make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456)#x]
 +- OneRowRelation
 
 
 -- !query
 select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
 -- !query analysis
-Project [make_interval(0, 0, 0, 0, 0, 0, cast(1234567890123456789 as decimal(18,6)), true) AS make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)#x]
+Project [make_interval(0, 0, 0, 0, 0, 0, cast(1234567890123456789 as decimal(18,6)), false) AS make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)#x]
 +- OneRowRelation
 
 
@@ -981,9 +981,13 @@ select interval '20 15:40:32.99899999' day to hour
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1000,9 +1004,13 @@ select interval '20 15:40:32.99899999' day to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1019,9 +1027,13 @@ select interval '15:40:32.99899999' hour to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1038,9 +1050,13 @@ select interval '15:40.99899999' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1057,9 +1073,13 @@ select interval '15:40' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1076,9 +1096,13 @@ select interval '20 40:32.99899999' minute to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1209,9 +1233,11 @@ select interval '1' year to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+  "sqlState" : "22006",
   "messageParameters" : {
     "from" : "year",
+    "input" : "1",
     "to" : "second"
   },
   "queryContext" : [ {
@@ -1460,9 +1486,11 @@ SELECT INTERVAL '178956970-8' YEAR TO MONTH
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Error parsing interval year-month string: integer overflow"
+    "input" : "178956970-8",
+    "interval" : "year-month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1909,9 +1937,13 @@ select interval '-\t2-2\t' year to month
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
+    "input" : "-\t2-2\t",
+    "intervalStr" : "year-month",
+    "supportedFormat" : "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
+    "typeName" : "interval year to month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1935,9 +1967,13 @@ select interval '\n-\t10\t 12:34:46.789\t' day to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "\n-\t10\t 12:34:46.789\t",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2074,7 +2110,7 @@ SELECT
   to_csv(named_struct('a', interval 32 year, 'b', interval 10 month)),
   from_csv(to_csv(named_struct('a', interval 32 year, 'b', interval 10 month)), 'a interval year, b interval month')
 -- !query analysis
-Project [from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles)) AS from_json({"a":"1 days"})#x, from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None) AS from_csv(1, 1)#x, to_json(from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1 days"}))#x, to_csv(from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None), Some(America/Los_Angeles)) AS to_csv(from_csv(1, 1))#x, to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)) AS to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH))#x, from_csv(StructField(a,YearMonthIntervalType(0,0),true), StructField(b,YearMonthIntervalType(1,1),true), to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), None) AS from_csv(to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH)))#x]
+Project [from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles), false) AS from_json({"a":"1 days"})#x, from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None) AS from_csv(1, 1)#x, to_json(from_json(StructField(a,CalendarIntervalType,true), {"a":"1 days"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1 days"}))#x, to_csv(from_csv(StructField(a,IntegerType,true), StructField(b,YearMonthIntervalType(0,0),true), 1, 1, Some(America/Los_Angeles), None), Some(America/Los_Angeles)) AS to_csv(from_csv(1, 1))#x, to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)) AS to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH))#x, from_csv(StructField(a,YearMonthIntervalType(0,0),true), StructField(b,YearMonthIntervalType(1,1),true), to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), None) AS from_csv(to_csv(named_struct(a, INTERVAL '32' YEAR, b, INTERVAL '10' MONTH)))#x]
 +- OneRowRelation
 
 
@@ -2085,7 +2121,7 @@ SELECT
   to_json(map('a', interval 100 day 130 minute)),
   from_json(to_json(map('a', interval 100 day 130 minute)), 'a interval day to minute')
 -- !query analysis
-Project [from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE))#x, from_json(StructField(a,DayTimeIntervalType(0,2),true), to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS from_json(to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE)))#x]
+Project [from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,DayTimeIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE))#x, from_json(StructField(a,DayTimeIntervalType(0,2),true), to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE), Some(America/Los_Angeles)), Some(America/Los_Angeles), false) AS from_json(to_json(map(a, INTERVAL '100 02:10' DAY TO MINUTE)))#x]
 +- OneRowRelation
 
 
@@ -2096,7 +2132,7 @@ SELECT
   to_json(map('a', interval 32 year 10 month)),
   from_json(to_json(map('a', interval 32 year 10 month)), 'a interval year to month')
 -- !query analysis
-Project [from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '32-10' YEAR TO MONTH))#x, from_json(StructField(a,YearMonthIntervalType(0,1),true), to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles)) AS from_json(to_json(map(a, INTERVAL '32-10' YEAR TO MONTH)))#x]
+Project [from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false) AS from_json({"a":"1"})#x, to_json(from_json(StructField(a,YearMonthIntervalType(0,0),true), {"a":"1"}, Some(America/Los_Angeles), false), Some(America/Los_Angeles)) AS to_json(from_json({"a":"1"}))#x, to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)) AS to_json(map(a, INTERVAL '32-10' YEAR TO MONTH))#x, from_json(StructField(a,YearMonthIntervalType(0,1),true), to_json(map(a, INTERVAL '32-10' YEAR TO MONTH), Some(America/Los_Angeles)), Some(America/Los_Angeles), false) AS from_json(to_json(map(a, INTERVAL '32-10' YEAR TO MONTH)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/keywords.sql.out
new file mode 100644
index 0000000000000..a549a03316bce
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/keywords.sql.out
@@ -0,0 +1,16 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT * from SQL_KEYWORDS()
+-- !query analysis
+Project [keyword#x, reserved#x]
++- Generate sql_keywords(), false, [keyword#x, reserved#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT keyword from SQL_KEYWORDS() WHERE reserved
+-- !query analysis
+Project [keyword#x]
++- Filter reserved#x: boolean
+   +- Generate sql_keywords(), false, [keyword#x, reserved#x]
+      +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/literals.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/literals.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/literals.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/map.sql.out
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/map.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/map.sql.out
index 177f73608fba9..cd8f0e043b9ae 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/map.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/map.sql.out
@@ -2,7 +2,7 @@
 -- !query
 select element_at(map(1, 'a', 2, 'b'), 5)
 -- !query analysis
-Project [element_at(map(1, a, 2, b), 5, None, true) AS element_at(map(1, a, 2, b), 5)#x]
+Project [element_at(map(1, a, 2, b), 5, None, false) AS element_at(map(1, a, 2, b), 5)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/math.sql.out
similarity index 94%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/math.sql.out
index 1fa7b7513993d..5fe1b69352f57 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/math.sql.out
@@ -394,42 +394,42 @@ Project [bround(-9223372036854775808, -1) AS bround(-9223372036854775808, -1)#xL
 -- !query
 SELECT conv('100', 2, 10)
 -- !query analysis
-Project [conv(100, 2, 10, true) AS conv(100, 2, 10)#x]
+Project [conv(100, 2, 10, false) AS conv(100, 2, 10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv(-10, 16, -10)
 -- !query analysis
-Project [conv(cast(-10 as string), 16, -10, true) AS conv(-10, 16, -10)#x]
+Project [conv(cast(-10 as string), 16, -10, false) AS conv(-10, 16, -10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('9223372036854775808', 10, 16)
 -- !query analysis
-Project [conv(9223372036854775808, 10, 16, true) AS conv(9223372036854775808, 10, 16)#x]
+Project [conv(9223372036854775808, 10, 16, false) AS conv(9223372036854775808, 10, 16)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('92233720368547758070', 10, 16)
 -- !query analysis
-Project [conv(92233720368547758070, 10, 16, true) AS conv(92233720368547758070, 10, 16)#x]
+Project [conv(92233720368547758070, 10, 16, false) AS conv(92233720368547758070, 10, 16)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('9223372036854775807', 36, 10)
 -- !query analysis
-Project [conv(9223372036854775807, 36, 10, true) AS conv(9223372036854775807, 36, 10)#x]
+Project [conv(9223372036854775807, 36, 10, false) AS conv(9223372036854775807, 36, 10)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT conv('-9223372036854775807', 36, 10)
 -- !query analysis
-Project [conv(-9223372036854775807, 36, 10, true) AS conv(-9223372036854775807, 36, 10)#x]
+Project [conv(-9223372036854775807, 36, 10, false) AS conv(-9223372036854775807, 36, 10)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/parse-schema-string.sql.out
similarity index 84%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/parse-schema-string.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/parse-schema-string.sql.out
index 45fc3bd03a782..ae8e47ed3665c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/parse-schema-string.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/parse-schema-string.sql.out
@@ -16,12 +16,12 @@ Project [from_csv(StructField(cube,IntegerType,true), 1, Some(America/Los_Angele
 -- !query
 select from_json('{"create":1}', 'create INT')
 -- !query analysis
-Project [from_json(StructField(create,IntegerType,true), {"create":1}, Some(America/Los_Angeles)) AS from_json({"create":1})#x]
+Project [from_json(StructField(create,IntegerType,true), {"create":1}, Some(America/Los_Angeles), false) AS from_json({"create":1})#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"cube":1}', 'cube INT')
 -- !query analysis
-Project [from_json(StructField(cube,IntegerType,true), {"cube":1}, Some(America/Los_Angeles)) AS from_json({"cube":1})#x]
+Project [from_json(StructField(cube,IntegerType,true), {"cube":1}, Some(America/Los_Angeles), false) AS from_json({"cube":1})#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/string-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/string-functions.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/timestamp.sql.out
similarity index 76%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/timestamp.sql.out
index bf34490d657e3..dcfd783b648f8 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/timestamp.sql.out
@@ -90,70 +90,70 @@ Project [(localtimestamp(Some(America/Los_Angeles)) = localtimestamp(Some(Americ
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 1)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 60)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, null)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 59.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
 +- OneRowRelation
 
 
@@ -302,231 +302,231 @@ select UNIX_MICROS(timestamp'2020-12-01 14:30:08Z'), UNIX_MICROS(timestamp'2020-
 -- !query
 select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
+Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp(1)
 -- !query analysis
-Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1)#x]
+Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.[SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S10:11:12.12345", "yyyy-MM-dd'S'HH:mm:ss.SSSSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
+Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.12342019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'")
 -- !query analysis
-Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
+Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd")
 -- !query analysis
-Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
+Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
+Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
+Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("16", "dd")
 -- !query analysis
-Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(16, dd)#x]
+Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query analysis
-Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(02-29, MM-dd)#x]
+Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 40", "yyyy mm")
 -- !query analysis
-Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 40, yyyy mm)#x]
+Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 40, yyyy mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss")
 -- !query analysis
-Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
+Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
 +- OneRowRelation
 
 
@@ -551,13 +551,49 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:10\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:11\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -583,13 +619,49 @@ CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, Loc
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query analysis
-[Analyzer test output redacted due to nondeterminism]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
+  } ]
+}
 
 
 -- !query
@@ -597,11 +669,11 @@ select timestamp'2011-11-11 11:11:11' + '1'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "actualDataType" : "\"TIMESTAMP\"",
-    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -619,11 +691,11 @@ select '1' + timestamp'2011-11-11 11:11:11'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "actualDataType" : "\"TIMESTAMP\"",
-    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
@@ -702,35 +774,35 @@ select date '2012-01-01' - interval 3 hours,
 -- !query
 select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query analysis
-Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
+Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), true) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
+Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), false) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"t":"26/October/2015"})#x]
+Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"t":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_aggregates.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_aggregates.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_aggregates.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_arithmetic.sql.out
similarity index 99%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_arithmetic.sql.out
index 30654d1d71e2b..caf997f6ccbb2 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_arithmetic.sql.out
@@ -23,7 +23,7 @@ Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)
 -- !query
 SELECT try_add(2147483647, "1")
 -- !query analysis
-Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
 +- OneRowRelation
 
 
@@ -305,7 +305,7 @@ Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(214
 -- !query
 SELECT try_subtract(2147483647, "-1")
 -- !query analysis
-Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
 +- OneRowRelation
 
 
@@ -403,7 +403,7 @@ Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(214
 -- !query
 SELECT try_multiply(2147483647, "-2")
 -- !query analysis
-Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_datetime_functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_datetime_functions.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_datetime_functions.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_datetime_functions.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_element_at.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_element_at.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/analyzer-results/ansi/try_element_at.sql.out
rename to sql/core/src/test/resources/sql-tests/analyzer-results/nonansi/try_element_at.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/null-handling.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/null-handling.sql.out
index 26e9394932a17..37d84f6c5fc00 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/null-handling.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/null-handling.sql.out
@@ -69,6 +69,24 @@ Project [a#x, (b#x + c#x) AS (b + c)#x]
    +- Relation spark_catalog.default.t1[a#x,b#x,c#x] parquet
 
 
+-- !query
+select b + 0 from t1 where a = 5
+-- !query analysis
+Project [(b#x + 0) AS (b + 0)#x]
++- Filter (a#x = 5)
+   +- SubqueryAlias spark_catalog.default.t1
+      +- Relation spark_catalog.default.t1[a#x,b#x,c#x] parquet
+
+
+-- !query
+select -100 + b + 100 from t1 where a = 5
+-- !query analysis
+Project [((-100 + b#x) + 100) AS ((-100 + b) + 100)#x]
++- Filter (a#x = 5)
+   +- SubqueryAlias spark_catalog.default.t1
+      +- Relation spark_catalog.default.t1[a#x,b#x,c#x] parquet
+
+
 -- !query
 select a+10, b*0 from t1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/parse-schema-string.sql.out
index 45fc3bd03a782..ae8e47ed3665c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/parse-schema-string.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/parse-schema-string.sql.out
@@ -16,12 +16,12 @@ Project [from_csv(StructField(cube,IntegerType,true), 1, Some(America/Los_Angele
 -- !query
 select from_json('{"create":1}', 'create INT')
 -- !query analysis
-Project [from_json(StructField(create,IntegerType,true), {"create":1}, Some(America/Los_Angeles)) AS from_json({"create":1})#x]
+Project [from_json(StructField(create,IntegerType,true), {"create":1}, Some(America/Los_Angeles), false) AS from_json({"create":1})#x]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"cube":1}', 'cube INT')
 -- !query analysis
-Project [from_json(StructField(cube,IntegerType,true), {"cube":1}, Some(America/Los_Angeles)) AS from_json({"cube":1})#x]
+Project [from_json(StructField(cube,IntegerType,true), {"cube":1}, Some(America/Los_Angeles), false) AS from_json({"cube":1})#x]
 +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
new file mode 100644
index 0000000000000..a4cda92fab2e3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/pipe-operators.sql.out
@@ -0,0 +1,3383 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+drop table if exists t
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+create table t(x int, y string) using csv
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`t`, false
+
+
+-- !query
+insert into t values (0, 'abc'), (1, 'def')
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t, false, CSV, [path=file:[not included in comparison]/{warehouse_dir}/t], Append, `spark_catalog`.`default`.`t`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t), [x, y]
++- Project [cast(col1#x as int) AS x#x, cast(col2#x as string) AS y#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+drop table if exists other
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.other
+
+
+-- !query
+create table other(a int, b int) using json
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`other`, false
+
+
+-- !query
+insert into other values (1, 1), (1, 2), (2, 4)
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/other, false, JSON, [path=file:[not included in comparison]/{warehouse_dir}/other], Append, `spark_catalog`.`default`.`other`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/other), [a, b]
++- Project [cast(col1#x as int) AS a#x, cast(col2#x as int) AS b#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+drop table if exists st
+-- !query analysis
+DropTable true, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.st
+
+
+-- !query
+create table st(x int, col struct<i1:int, i2:int>) using parquet
+-- !query analysis
+CreateDataSourceTableCommand `spark_catalog`.`default`.`st`, false
+
+
+-- !query
+insert into st values (1, (2, 3))
+-- !query analysis
+InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/st, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/st], Append, `spark_catalog`.`default`.`st`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/st), [x, col]
++- Project [cast(col1#x as int) AS x#x, named_struct(i1, cast(col2#x.col1 as int), i2, cast(col2#x.col2 as int)) AS col#x]
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temporary view courseSales as select * from values
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  as courseSales(course, year, earnings)
+-- !query analysis
+CreateViewCommand `courseSales`, select * from values
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  as courseSales(course, year, earnings), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [course#x, year#x, earnings#x]
+      +- SubqueryAlias courseSales
+         +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+create temporary view courseEarnings as select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`)
+-- !query analysis
+CreateViewCommand `courseEarnings`, select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [course#x, 2012#x, 2013#x, 2014#x]
+      +- SubqueryAlias courseEarnings
+         +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+create temporary view courseEarningsAndSales as select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(
+    course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014)
+-- !query analysis
+CreateViewCommand `courseEarningsAndSales`, select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(
+    course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+      +- SubqueryAlias courseEarningsAndSales
+         +- LocalRelation [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+
+
+-- !query
+create temporary view yearsWithComplexTypes as select * from values
+  (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+  (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+  as yearsWithComplexTypes(y, a, m, s)
+-- !query analysis
+CreateViewCommand `yearsWithComplexTypes`, select * from values
+  (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+  (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+  as yearsWithComplexTypes(y, a, m, s), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [y#x, a#x, m#x, s#x]
+      +- SubqueryAlias yearsWithComplexTypes
+         +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
+create temporary view join_test_t1 as select * from values (1) as grouping(a)
+-- !query analysis
+CreateViewCommand `join_test_t1`, select * from values (1) as grouping(a), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x]
+      +- SubqueryAlias grouping
+         +- LocalRelation [a#x]
+
+
+-- !query
+create temporary view join_test_t2 as select * from values (1) as grouping(a)
+-- !query analysis
+CreateViewCommand `join_test_t2`, select * from values (1) as grouping(a), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x]
+      +- SubqueryAlias grouping
+         +- LocalRelation [a#x]
+
+
+-- !query
+create temporary view join_test_t3 as select * from values (1) as grouping(a)
+-- !query analysis
+CreateViewCommand `join_test_t3`, select * from values (1) as grouping(a), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x]
+      +- SubqueryAlias grouping
+         +- LocalRelation [a#x]
+
+
+-- !query
+create temporary view join_test_empty_table as select a from join_test_t2 where false
+-- !query analysis
+CreateViewCommand `join_test_empty_table`, select a from join_test_t2 where false, false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [a#x]
+      +- Filter false
+         +- SubqueryAlias join_test_t2
+            +- View (`join_test_t2`, [a#x])
+               +- Project [cast(a#x as int) AS a#x]
+                  +- Project [a#x]
+                     +- SubqueryAlias grouping
+                        +- LocalRelation [a#x]
+
+
+-- !query
+create temporary view lateral_test_t1(c1, c2)
+  as values (0, 1), (1, 2)
+-- !query analysis
+CreateViewCommand `lateral_test_t1`, [(c1,None), (c2,None)], values (0, 1), (1, 2), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temporary view lateral_test_t2(c1, c2)
+  as values (0, 2), (0, 3)
+-- !query analysis
+CreateViewCommand `lateral_test_t2`, [(c1,None), (c2,None)], values (0, 2), (0, 3), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temporary view lateral_test_t3(c1, c2)
+  as values (0, array(0, 1)), (1, array(2)), (2, array()), (null, array(4))
+-- !query analysis
+CreateViewCommand `lateral_test_t3`, [(c1,None), (c2,None)], values (0, array(0, 1)), (1, array(2)), (2, array()), (null, array(4)), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temporary view lateral_test_t4(c1, c2)
+  as values (0, 1), (0, 2), (1, 1), (1, 3)
+-- !query analysis
+CreateViewCommand `lateral_test_t4`, [(c1,None), (c2,None)], values (0, 1), (0, 2), (1, 1), (1, 3), false, false, LocalTempView, UNSUPPORTED, true
+   +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+create temporary view natural_join_test_t1 as select * from values
+  ("one", 1), ("two", 2), ("three", 3) as natural_join_test_t1(k, v1)
+-- !query analysis
+CreateViewCommand `natural_join_test_t1`, select * from values
+  ("one", 1), ("two", 2), ("three", 3) as natural_join_test_t1(k, v1), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [k#x, v1#x]
+      +- SubqueryAlias natural_join_test_t1
+         +- LocalRelation [k#x, v1#x]
+
+
+-- !query
+create temporary view natural_join_test_t2 as select * from values
+  ("one", 1), ("two", 22), ("one", 5) as natural_join_test_t2(k, v2)
+-- !query analysis
+CreateViewCommand `natural_join_test_t2`, select * from values
+  ("one", 1), ("two", 22), ("one", 5) as natural_join_test_t2(k, v2), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [k#x, v2#x]
+      +- SubqueryAlias natural_join_test_t2
+         +- LocalRelation [k#x, v2#x]
+
+
+-- !query
+create temporary view natural_join_test_t3 as select * from values
+  ("one", 4), ("two", 5), ("one", 6) as natural_join_test_t3(k, v3)
+-- !query analysis
+CreateViewCommand `natural_join_test_t3`, select * from values
+  ("one", 4), ("two", 5), ("one", 6) as natural_join_test_t3(k, v3), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [k#x, v3#x]
+      +- SubqueryAlias natural_join_test_t3
+         +- LocalRelation [k#x, v3#x]
+
+
+-- !query
+create temporary view windowTestData as select * from values
+  (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 2L, 2.5D, date("2017-08-02"), timestamp_seconds(1502000000), "a"),
+  (2, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "a"),
+  (1, null, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "b"),
+  (2, 3L, 3.3D, date("2017-08-03"), timestamp_seconds(1503000000), "b"),
+  (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
+  (null, null, null, null, null, null),
+  (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
+  AS testData(val, val_long, val_double, val_date, val_timestamp, cate)
+-- !query analysis
+CreateViewCommand `windowTestData`, select * from values
+  (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 2L, 2.5D, date("2017-08-02"), timestamp_seconds(1502000000), "a"),
+  (2, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "a"),
+  (1, null, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "b"),
+  (2, 3L, 3.3D, date("2017-08-03"), timestamp_seconds(1503000000), "b"),
+  (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
+  (null, null, null, null, null, null),
+  (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
+  AS testData(val, val_long, val_double, val_date, val_timestamp, cate), false, false, LocalTempView, UNSUPPORTED, true
+   +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+      +- SubqueryAlias testData
+         +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table t
+|> select 1 as x
+-- !query analysis
+Project [pipeexpression(1, false, SELECT) AS x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select x, y
+-- !query analysis
+Project [x#x, y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select x, y
+|> select x + length(y) as z
+-- !query analysis
+Project [pipeexpression((x#x + length(y#x)), false, SELECT) AS z#x]
++- Project [x#x, y#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+values (0), (1) tab(col)
+|> select col * 2 as result
+-- !query analysis
+Project [pipeexpression((col#x * 2), false, SELECT) AS result#x]
++- SubqueryAlias tab
+   +- LocalRelation [col#x]
+
+
+-- !query
+(select * from t union all select * from t)
+|> select x + length(y) as result
+-- !query analysis
+Project [pipeexpression((x#x + length(y#x)), false, SELECT) AS result#x]
++- Union false, false
+   :- Project [x#x, y#x]
+   :  +- SubqueryAlias spark_catalog.default.t
+   :     +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- Project [x#x, y#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(table t
+ |> select x, y
+ |> select x)
+union all
+select x from t where x < 1
+-- !query analysis
+Union false, false
+:- Project [x#x]
+:  +- Project [x#x, y#x]
+:     +- SubqueryAlias spark_catalog.default.t
+:        +- Relation spark_catalog.default.t[x#x,y#x] csv
++- Project [x#x]
+   +- Filter (x#x < 1)
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select col from st)
+|> select col.i1
+-- !query analysis
+Project [col#x.i1 AS i1#x]
++- Project [col#x]
+   +- SubqueryAlias spark_catalog.default.st
+      +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table st
+|> select st.col.i1
+-- !query analysis
+Project [col#x.i1 AS i1#x]
++- SubqueryAlias spark_catalog.default.st
+   +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> select (select a from other where x = a limit 1) as result
+-- !query analysis
+Project [pipeexpression(scalar-subquery#x [x#x], false, SELECT) AS result#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select (values (0) tab(col) |> select col) as result
+-- !query analysis
+Project [scalar-subquery#x [] AS result#x]
+:  +- Project [col#x]
+:     +- SubqueryAlias tab
+:        +- LocalRelation [col#x]
++- OneRowRelation
+
+
+-- !query
+table t
+|> select (select any_value(a) from other where x = a limit 1) as result
+-- !query analysis
+Project [pipeexpression(scalar-subquery#x [x#x], false, SELECT) AS result#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Aggregate [any_value(a#x, false) AS any_value(a)#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select x + length(x) as z, z + 1 as plus_one
+-- !query analysis
+Project [z#x, pipeexpression((z#x + 1), false, SELECT) AS plus_one#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(cast(x#x as string))), false, SELECT) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select first_value(x) over (partition by y) as result
+-- !query analysis
+Project [result#x]
++- Project [x#x, y#x, _we0#x, pipeexpression(_we0#x, false, SELECT) AS result#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#x], [y#x]
+      +- Project [x#x, y#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> select 1 + sum(x) over (),
+     avg(y) over (),
+     x,
+     avg(x+1) over (partition by y order by z) AS a2
+|> select a2
+-- !query analysis
+Project [a2#x]
++- Project [(1 + sum(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, x#x, a2#x]
+   +- Project [x#x, y#x, _w1#x, z#x, _we0#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, _we2#x, (cast(1 as bigint) + _we0#xL) AS (1 + sum(x) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING))#xL, avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x, pipeexpression(_we2#x, false, SELECT) AS a2#x]
+      +- Window [avg(_w1#x) windowspecdefinition(y#x, z#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS _we2#x], [y#x], [z#x ASC NULLS FIRST]
+         +- Window [sum(x#x) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#xL, avg(y#x) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS avg(y) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#x]
+            +- Project [x#x, y#x, (x#x + 1) AS _w1#x, z#x]
+               +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+                  +- OneRowRelation
+
+
+-- !query
+table t
+|> select x, count(*) over ()
+|> select x
+-- !query analysis
+Project [x#x]
++- Project [x#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
+   +- Project [x#x, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL, count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
+      +- Window [count(1) windowspecdefinition(specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS count(1) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL]
+         +- Project [x#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select distinct x, y
+-- !query analysis
+Distinct
++- Project [x#x, y#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select *
+-- !query analysis
+Project [x#x, y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select * except (y)
+-- !query analysis
+Project [x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ *
+-- !query analysis
+Repartition 3, true
++- Project [x#x, y#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ distinct x
+-- !query analysis
+Repartition 3, true
++- Distinct
+   +- Project [x#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ all x
+-- !query analysis
+Repartition 3, true
++- Project [x#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select sum(x) as result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> select y, length(y) + sum(x) as result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 39,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend 1 as z
+-- !query analysis
+Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1
+-- !query analysis
+Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS pipeexpression(1)#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x as z
+-- !query analysis
+Project [x#x, y#x, pipeexpression(x#x, false, EXTEND) AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+-- !query analysis
+Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query analysis
+Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x, pipeexpression((x#x + 1), false, EXTEND) AS zz#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query analysis
+Project [x#x, y#x, z#x, pipeexpression((z#x + 1), false, EXTEND) AS zz#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query analysis
+Project [col#x, pipeexpression(col#x.i1, false, EXTEND) AS z#x]
++- Project [col#x]
+   +- SubqueryAlias spark_catalog.default.st
+      +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query analysis
+Project [x#x, y#x, pipeexpression(scalar-subquery#x [x#x], false, EXTEND) AS z#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b))
+-- !query analysis
+Filter exists#x [x#x]
+:  +- Project [pipeexpression(outer(spark_catalog.default.t.x))#x]
+:     +- Project [a#x, b#x, pipeexpression(outer(x#x), false, EXTEND) AS pipeexpression(outer(spark_catalog.default.t.x))#x]
+:        +- SubqueryAlias spark_catalog.default.other
+:           +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query analysis
+Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query analysis
+Project [x#x, y#x, result#x]
++- Project [x#x, y#x, _we0#x, pipeexpression(_we0#x, false, EXTEND) AS result#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#x], [y#x]
+      +- Project [x#x, y#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query analysis
+Project [x#x, y#x, z#x, pipeexpression((z#x + 1), false, EXTEND) AS plus_one#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, EXTEND) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "EXTEND",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> extend *
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_USAGE_OF_STAR_OR_REGEX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "elem" : "'*'",
+    "prettyName" : "expression `pipeexpression`"
+  }
+}
+
+
+-- !query
+table t
+|> set x = 1
+-- !query analysis
+Project [pipeexpression(1, false, SET) AS x#x, y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> set y = x
+-- !query analysis
+Project [x#x, pipeexpression(x#x, false, SET) AS y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+-- !query analysis
+Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS z#x]
++- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1
+-- !query analysis
+Project [x#x, y#x, z#x, pipeexpression((x#x + 1), false, SET) AS zz#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS z#x, zz#x]
+   +- Project [x#x, y#x, z#x, pipeexpression(2, false, EXTEND) AS zz#x]
+      +- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table other
+|> extend 3 as c
+|> set a = b, b = c
+-- !query analysis
+Project [a#x, pipeexpression(c#x, false, SET) AS b#x, c#x]
++- Project [pipeexpression(b#x, false, SET) AS a#x, b#x, c#x]
+   +- Project [a#x, b#x, pipeexpression(3, false, EXTEND) AS c#x]
+      +- SubqueryAlias spark_catalog.default.other
+         +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1
+-- !query analysis
+Project [x#x, y#x, z#x, pipeexpression((z#x + 1), false, SET) AS zz#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS z#x, zz#x]
+   +- Project [x#x, y#x, z#x, pipeexpression(2, false, EXTEND) AS zz#x]
+      +- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1
+-- !query analysis
+Project [x#x, y#x, pipeexpression((z#x + 1), false, SET) AS z#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS z#x]
+   +- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1
+-- !query analysis
+Project [x#x, y#x, pipeexpression((z#x + 1), false, SET) AS z#x]
++- Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS z#x]
+   +- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+select col from st
+|> extend 1 as z
+|> set z = col.i1
+-- !query analysis
+Project [col#x, pipeexpression(col#x.i1, false, SET) AS z#x]
++- Project [col#x, pipeexpression(1, false, EXTEND) AS z#x]
+   +- Project [col#x]
+      +- SubqueryAlias spark_catalog.default.st
+         +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> set y = (select a from other where x = a limit 1)
+-- !query analysis
+Project [x#x, pipeexpression(scalar-subquery#x [x#x], false, SET) AS y#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y)
+-- !query analysis
+Project [x#x, y#x, pipeexpression((x#x + length(y#x)), false, SET) AS x.y.z#x]
++- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS x.y.z#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y)
+-- !query analysis
+Project [x#x, y#x, z#x]
++- Project [x#x, y#x, _we0#x, pipeexpression(_we0#x, false, SET) AS z#x]
+   +- Window [first_value(x#x, false) windowspecdefinition(y#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#x], [y#x]
+      +- Project [x#x, y#x]
+         +- Project [x#x, y#x, pipeexpression(1, false, EXTEND) AS z#x]
+            +- SubqueryAlias spark_catalog.default.t
+               +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> set z = 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 20,
+    "fragment" : "table t\n|> set z = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> set x = 1 as z
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select col from st
+|> set col.i1 = 42
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "SQL pipe syntax |> SET operator with multi-part assignment key (only single-part keys are allowed)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 37,
+    "fragment" : "col.i1 = 42"
+  } ]
+}
+
+
+-- !query
+table t
+|> where true
+-- !query analysis
+Filter true
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where x + length(y) < 4
+-- !query analysis
+Filter ((x#x + length(y#x)) < 4)
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where x + length(y) < 4
+|> where x + length(y) < 3
+-- !query analysis
+Filter ((x#x + length(y#x)) < 3)
++- SubqueryAlias __auto_generated_subquery_name
+   +- Filter ((x#x + length(y#x)) < 4)
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where x = 1
+-- !query analysis
+Filter (x#x = 1)
++- SubqueryAlias __auto_generated_subquery_name
+   +- Aggregate [x#x], [x#x, sum(length(y#x)) AS sum_len#xL]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where t.x = 1
+-- !query analysis
+Filter (x#x = 1)
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where spark_catalog.default.t.x = 1
+-- !query analysis
+Filter (x#x = 1)
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select col from st)
+|> where col.i1 = 1
+-- !query analysis
+Filter (col#x.i1 = 1)
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [col#x]
+      +- SubqueryAlias spark_catalog.default.st
+         +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table st
+|> where st.col.i1 = 2
+-- !query analysis
+Filter (col#x.i1 = 2)
++- SubqueryAlias spark_catalog.default.st
+   +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table t
+|> where exists (select a from other where x = a limit 1)
+-- !query analysis
+Filter exists#x [x#x]
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Project [a#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where (select any_value(a) from other where x = a limit 1) = 1
+-- !query analysis
+Filter (scalar-subquery#x [x#x] = 1)
+:  +- GlobalLimit 1
+:     +- LocalLimit 1
+:        +- Aggregate [any_value(a#x, false) AS any_value(a)#x]
+:           +- Filter (outer(x#x) = a#x)
+:              +- SubqueryAlias spark_catalog.default.other
+:                 +- Relation spark_catalog.default.other[a#x,b#x] json
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where sum(x) = 1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(sum(x) = 1)\"",
+    "expressionList" : "sum(spark_catalog.default.t.x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 27,
+    "fragment" : "table t\n|> where sum(x) = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> where y = 'abc' or length(y) + sum(x) = 1
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"((y = abc) OR ((length(y) + sum(x)) = 1))\"",
+    "expressionList" : "sum(spark_catalog.default.t.x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 52,
+    "fragment" : "table t\n|> where y = 'abc' or length(y) + sum(x) = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> where sum(x) over (partition by y) = 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
+
+
+-- !query
+table t
+|> where sum(x) over w = 1
+   window w as (partition by y)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_PIPE_OPERATOR_WHERE.WINDOW_CLAUSE",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 66,
+    "fragment" : "table t\n|> where sum(x) over w = 1\n   window w as (partition by y)"
+  } ]
+}
+
+
+-- !query
+select * from t where sum(x) over (partition by y) = 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> where x + length(y) < 4
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`x`, `z`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 57,
+    "stopIndex" : 57,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where sum(length(y)) = 3
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`x`, `sum_len`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 77,
+    "stopIndex" : 77,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+     sum(earnings)
+     for course in ('dotNET', 'Java')
+  )
+-- !query analysis
+Project [year#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS dotNET#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS Java#xL]
++- Aggregate [year#x], [year#x, pivotfirst(course#x, sum(coursesales.earnings)#xL, dotNET, Java, 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x]
+   +- Aggregate [year#x, course#x], [year#x, course#x, sum(earnings#x) AS sum(coursesales.earnings)#xL]
+      +- Project [year#x, course#x, earnings#x]
+         +- SubqueryAlias coursesales
+            +- View (`courseSales`, [course#x, year#x, earnings#x])
+               +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+                  +- Project [course#x, year#x, earnings#x]
+                     +- SubqueryAlias courseSales
+                        +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+     sum(e) as s, avg(e) as a
+     for y in (2012 as firstYear, 2013 as secondYear)
+   )
+-- !query analysis
+Project [c#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[0] AS firstYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[0] AS firstYear_a#x, __pivot_sum(e) AS s AS `sum(e) AS s`#x[1] AS secondYear_s#xL, __pivot_avg(e) AS a AS `avg(e) AS a`#x[1] AS secondYear_a#x]
++- Aggregate [c#x], [c#x, pivotfirst(y#x, sum(e) AS s#xL, 2012, 2013, 0, 0) AS __pivot_sum(e) AS s AS `sum(e) AS s`#x, pivotfirst(y#x, avg(e) AS a#x, 2012, 2013, 0, 0) AS __pivot_avg(e) AS a AS `avg(e) AS a`#x]
+   +- Aggregate [c#x, y#x], [c#x, y#x, sum(e#x) AS sum(e) AS s#xL, avg(e#x) AS avg(e) AS a#x]
+      +- Project [pipeexpression(year#x, false, SELECT) AS y#x, pipeexpression(course#x, false, SELECT) AS c#x, pipeexpression(earnings#x, false, SELECT) AS e#x]
+         +- SubqueryAlias coursesales
+            +- View (`courseSales`, [course#x, year#x, earnings#x])
+               +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+                  +- Project [course#x, year#x, earnings#x]
+                     +- SubqueryAlias courseSales
+                        +- LocalRelation [course#x, year#x, earnings#x]
+
+
+-- !query
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     max(a)
+     for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+   )
+-- !query analysis
+Aggregate [year#x], [year#x, max(if ((named_struct(y, y#x, course, course#x) <=> cast(named_struct(col1, 2012, col2, dotNET) as struct<y:int,course:string>))) a#x else cast(null as array<int>)) AS {2012, dotNET}#x, max(if ((named_struct(y, y#x, course, course#x) <=> cast(named_struct(col1, 2013, col2, Java) as struct<y:int,course:string>))) a#x else cast(null as array<int>)) AS {2013, Java}#x]
++- Project [course#x, year#x, y#x, a#x]
+   +- Join Inner, (year#x = y#x)
+      :- SubqueryAlias coursesales
+      :  +- View (`courseSales`, [course#x, year#x, earnings#x])
+      :     +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+      :        +- Project [course#x, year#x, earnings#x]
+      :           +- SubqueryAlias courseSales
+      :              +- LocalRelation [course#x, year#x, earnings#x]
+      +- SubqueryAlias yearswithcomplextypes
+         +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x])
+            +- Project [cast(y#x as int) AS y#x, cast(a#x as array<int>) AS a#x, cast(m#x as map<string,int>) AS m#x, cast(s#x as struct<col1:int,col2:string>) AS s#x]
+               +- Project [y#x, a#x, m#x, s#x]
+                  +- SubqueryAlias yearsWithComplexTypes
+                     +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     sum(earnings)
+     for s in ((1, 'a'), (2, 'b'))
+   )
+-- !query analysis
+Project [year#x, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[0] AS {1, a}#xL, __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x[1] AS {2, b}#xL]
++- Aggregate [year#x], [year#x, pivotfirst(s#x, sum(coursesales.earnings)#xL, [1,a], [2,b], 0, 0) AS __pivot_sum(coursesales.earnings) AS `sum(coursesales.earnings)`#x]
+   +- Aggregate [year#x, s#x], [year#x, s#x, sum(earnings#x) AS sum(coursesales.earnings)#xL]
+      +- Project [earnings#x, year#x, s#x]
+         +- Join Inner, (year#x = y#x)
+            :- SubqueryAlias coursesales
+            :  +- View (`courseSales`, [course#x, year#x, earnings#x])
+            :     +- Project [cast(course#x as string) AS course#x, cast(year#x as int) AS year#x, cast(earnings#x as int) AS earnings#x]
+            :        +- Project [course#x, year#x, earnings#x]
+            :           +- SubqueryAlias courseSales
+            :              +- LocalRelation [course#x, year#x, earnings#x]
+            +- SubqueryAlias yearswithcomplextypes
+               +- View (`yearsWithComplexTypes`, [y#x, a#x, m#x, s#x])
+                  +- Project [cast(y#x as int) AS y#x, cast(a#x as array<int>) AS a#x, cast(m#x as map<string,int>) AS m#x, cast(s#x as struct<col1:int,col2:string>) AS s#x]
+                     +- Project [y#x, a#x, m#x, s#x]
+                        +- SubqueryAlias yearsWithComplexTypes
+                           +- LocalRelation [y#x, a#x, m#x, s#x]
+
+
+-- !query
+table courseEarnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query analysis
+Filter isnotnull(coalesce(earningsYear#x))
++- Expand [[course#x, 2012, 2012#x], [course#x, 2013, 2013#x], [course#x, 2014, 2014#x]], [course#x, year#x, earningsYear#x]
+   +- SubqueryAlias courseearnings
+      +- View (`courseEarnings`, [course#x, 2012#x, 2013#x, 2014#x])
+         +- Project [cast(course#x as string) AS course#x, cast(2012#x as int) AS 2012#x, cast(2013#x as int) AS 2013#x, cast(2014#x as int) AS 2014#x]
+            +- Project [course#x, 2012#x, 2013#x, 2014#x]
+               +- SubqueryAlias courseEarnings
+                  +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+table courseEarnings
+|> unpivot include nulls (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query analysis
+Expand [[course#x, 2012, 2012#x], [course#x, 2013, 2013#x], [course#x, 2014, 2014#x]], [course#x, year#x, earningsYear#x]
++- SubqueryAlias courseearnings
+   +- View (`courseEarnings`, [course#x, 2012#x, 2013#x, 2014#x])
+      +- Project [cast(course#x as string) AS course#x, cast(2012#x as int) AS 2012#x, cast(2013#x as int) AS 2013#x, cast(2014#x as int) AS 2014#x]
+         +- Project [course#x, 2012#x, 2013#x, 2014#x]
+            +- SubqueryAlias courseEarnings
+               +- LocalRelation [course#x, 2012#x, 2013#x, 2014#x]
+
+
+-- !query
+table courseEarningsAndSales
+|> unpivot include nulls (
+     (earnings, sales) for `year` in (
+       (earnings2012, sales2012) as `2012`,
+       (earnings2013, sales2013) as `2013`,
+       (earnings2014, sales2014) as `2014`)
+   )
+-- !query analysis
+Expand [[course#x, 2012, earnings2012#x, sales2012#x], [course#x, 2013, earnings2013#x, sales2013#x], [course#x, 2014, earnings2014#x, sales2014#x]], [course#x, year#x, earnings#x, sales#x]
++- SubqueryAlias courseearningsandsales
+   +- View (`courseEarningsAndSales`, [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x])
+      +- Project [cast(course#x as string) AS course#x, cast(earnings2012#x as int) AS earnings2012#x, cast(sales2012#x as int) AS sales2012#x, cast(earnings2013#x as int) AS earnings2013#x, cast(sales2013#x as int) AS sales2013#x, cast(earnings2014#x as int) AS earnings2014#x, cast(sales2014#x as int) AS sales2014#x]
+         +- Project [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+            +- SubqueryAlias courseEarningsAndSales
+               +- LocalRelation [course#x, earnings2012#x, sales2012#x, earnings2013#x, sales2013#x, earnings2014#x, sales2014#x]
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`year`",
+    "proposal" : "`course`, `earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 49,
+    "stopIndex" : 111,
+    "fragment" : "pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> pivot (
+     sum(earnings)
+     for `year` in (course, 2013)
+   )
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+  "sqlState" : "42K08",
+  "messageParameters" : {
+    "expression" : "\"course\""
+  }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 186,
+    "fragment" : "table courseSales\n|> select course, earnings\n|> pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )\n   unpivot (\n     earningsYear for `year` in (`2012`, `2013`, `2014`)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 186,
+    "fragment" : "table courseSales\n|> select course, earnings\n|> unpivot (\n     earningsYear for `year` in (`2012`, `2013`, `2014`)\n   )\n   pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'pivot'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'unpivot'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> tablesample (100 percent) repeatable (0)
+-- !query analysis
+Sample 0.0, 1.0, false, 0
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> tablesample (2 rows) repeatable (0)
+-- !query analysis
+GlobalLimit 2
++- LocalLimit 2
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> tablesample (bucket 1 out of 1) repeatable (0)
+-- !query analysis
+Sample 0.0, 1.0, false, 0
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> tablesample (100 percent) repeatable (0)
+|> tablesample (5 rows) repeatable (0)
+|> tablesample (bucket 1 out of 1) repeatable (0)
+-- !query analysis
+Sample 0.0, 1.0, false, 0
++- GlobalLimit 5
+   +- LocalLimit 5
+      +- Sample 0.0, 1.0, false, 0
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> tablesample ()
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0014",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 25,
+    "fragment" : "tablesample ()"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (-100 percent) repeatable (0)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (-1.0) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 52,
+    "fragment" : "tablesample (-100 percent) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (-5 rows)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"-5\"",
+    "name" : "limit",
+    "v" : "-5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 26,
+    "fragment" : "-5"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (x rows)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_LIMIT_LIKE_EXPRESSION.IS_UNFOLDABLE",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"x\"",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 25,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (bucket 2 out of 1)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (2.0) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 42,
+    "fragment" : "tablesample (bucket 2 out of 1)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (200b) repeatable (0)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0015",
+  "messageParameters" : {
+    "msg" : "byteLengthLiteral"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "tablesample (200b) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (200) repeatable (0)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0016",
+  "messageParameters" : {
+    "bytesStr" : "200"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 43,
+    "fragment" : "tablesample (200) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_empty_table
+-- !query analysis
+Join Inner
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> cross join join_test_empty_table
+-- !query analysis
+Join Cross
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> left outer join join_test_empty_table
+-- !query analysis
+Join LeftOuter
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> right outer join join_test_empty_table
+-- !query analysis
+Join RightOuter
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> full outer join join_test_empty_table using (a)
+-- !query analysis
+Project [coalesce(a#x, a#x) AS a#x]
++- Join FullOuter, (a#x = a#x)
+   :- SubqueryAlias join_test_t1
+   :  +- View (`join_test_t1`, [a#x])
+   :     +- Project [cast(a#x as int) AS a#x]
+   :        +- Project [a#x]
+   :           +- SubqueryAlias grouping
+   :              +- LocalRelation [a#x]
+   +- SubqueryAlias join_test_empty_table
+      +- View (`join_test_empty_table`, [a#x])
+         +- Project [cast(a#x as int) AS a#x]
+            +- Project [a#x]
+               +- Filter false
+                  +- SubqueryAlias join_test_t2
+                     +- View (`join_test_t2`, [a#x])
+                        +- Project [cast(a#x as int) AS a#x]
+                           +- Project [a#x]
+                              +- SubqueryAlias grouping
+                                 +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a)
+-- !query analysis
+Join FullOuter, (a#x = a#x)
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> left semi join join_test_empty_table
+-- !query analysis
+Join LeftSemi
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> left anti join join_test_empty_table
+-- !query analysis
+Join LeftAnti
+:- SubqueryAlias join_test_t1
+:  +- View (`join_test_t1`, [a#x])
+:     +- Project [cast(a#x as int) AS a#x]
+:        +- Project [a#x]
+:           +- SubqueryAlias grouping
+:              +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+select * from join_test_t1 where true
+|> inner join join_test_empty_table
+-- !query analysis
+Join Inner
+:- Project [a#x]
+:  +- Filter true
+:     +- SubqueryAlias join_test_t1
+:        +- View (`join_test_t1`, [a#x])
+:           +- Project [cast(a#x as int) AS a#x]
+:              +- Project [a#x]
+:                 +- SubqueryAlias grouping
+:                    +- LocalRelation [a#x]
++- SubqueryAlias join_test_empty_table
+   +- View (`join_test_empty_table`, [a#x])
+      +- Project [cast(a#x as int) AS a#x]
+         +- Project [a#x]
+            +- Filter false
+               +- SubqueryAlias join_test_t2
+                  +- View (`join_test_t2`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+select 1 as x, 2 as y
+|> inner join (select 1 as x, 4 as y) using (x)
+-- !query analysis
+Project [x#x, y#x, y#x]
++- Join Inner, (x#x = x#x)
+   :- Project [1 AS x#x, 2 AS y#x]
+   :  +- OneRowRelation
+   +- SubqueryAlias __auto_generated_subquery_name
+      +- Project [1 AS x#x, 4 AS y#x]
+         +- OneRowRelation
+
+
+-- !query
+table join_test_t1
+|> inner join (join_test_t2 jt2 inner join join_test_t3 jt3 using (a)) using (a)
+|> select a, join_test_t1.a, jt2.a, jt3.a
+-- !query analysis
+Project [a#x, a#x, a#x, a#x]
++- Project [a#x, a#x, a#x]
+   +- Join Inner, (a#x = a#x)
+      :- SubqueryAlias join_test_t1
+      :  +- View (`join_test_t1`, [a#x])
+      :     +- Project [cast(a#x as int) AS a#x]
+      :        +- Project [a#x]
+      :           +- SubqueryAlias grouping
+      :              +- LocalRelation [a#x]
+      +- Project [a#x, a#x]
+         +- Join Inner, (a#x = a#x)
+            :- SubqueryAlias jt2
+            :  +- SubqueryAlias join_test_t2
+            :     +- View (`join_test_t2`, [a#x])
+            :        +- Project [cast(a#x as int) AS a#x]
+            :           +- Project [a#x]
+            :              +- SubqueryAlias grouping
+            :                 +- LocalRelation [a#x]
+            +- SubqueryAlias jt3
+               +- SubqueryAlias join_test_t3
+                  +- View (`join_test_t3`, [a#x])
+                     +- Project [cast(a#x as int) AS a#x]
+                        +- Project [a#x]
+                           +- SubqueryAlias grouping
+                              +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_t2 tablesample (100 percent) repeatable (0) jt2 using (a)
+-- !query analysis
+Project [a#x]
++- Join Inner, (a#x = a#x)
+   :- SubqueryAlias join_test_t1
+   :  +- View (`join_test_t1`, [a#x])
+   :     +- Project [cast(a#x as int) AS a#x]
+   :        +- Project [a#x]
+   :           +- SubqueryAlias grouping
+   :              +- LocalRelation [a#x]
+   +- Sample 0.0, 1.0, false, 0
+      +- SubqueryAlias jt2
+         +- SubqueryAlias join_test_t2
+            +- View (`join_test_t2`, [a#x])
+               +- Project [cast(a#x as int) AS a#x]
+                  +- Project [a#x]
+                     +- SubqueryAlias grouping
+                        +- LocalRelation [a#x]
+
+
+-- !query
+table join_test_t1
+|> inner join (select 1 as a) tablesample (100 percent) repeatable (0) jt2 using (a)
+-- !query analysis
+Project [a#x]
++- Join Inner, (a#x = a#x)
+   :- SubqueryAlias join_test_t1
+   :  +- View (`join_test_t1`, [a#x])
+   :     +- Project [cast(a#x as int) AS a#x]
+   :        +- Project [a#x]
+   :           +- SubqueryAlias grouping
+   :              +- LocalRelation [a#x]
+   +- SubqueryAlias jt2
+      +- Sample 0.0, 1.0, false, 0
+         +- Project [1 AS a#x]
+            +- OneRowRelation
+
+
+-- !query
+table join_test_t1
+|> join join_test_t1 using (a)
+-- !query analysis
+Project [a#x]
++- Join Inner, (a#x = a#x)
+   :- SubqueryAlias join_test_t1
+   :  +- View (`join_test_t1`, [a#x])
+   :     +- Project [cast(a#x as int) AS a#x]
+   :        +- Project [a#x]
+   :           +- SubqueryAlias grouping
+   :              +- LocalRelation [a#x]
+   +- SubqueryAlias join_test_t1
+      +- View (`join_test_t1`, [a#x])
+         +- Project [cast(a#x as int) AS a#x]
+            +- Project [a#x]
+               +- SubqueryAlias grouping
+                  +- LocalRelation [a#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select c1)
+-- !query analysis
+LateralJoin lateral-subquery#x [c1#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [outer(c1#x) AS c1#x]
+:        +- OneRowRelation
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select c1 from lateral_test_t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [c1#x]
+:        +- SubqueryAlias lateral_test_t2
+:           +- View (`lateral_test_t2`, [c1#x, c2#x])
+:              +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 from lateral_test_t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [c1#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [outer(c1#x) AS c1#x]
+:        +- SubqueryAlias lateral_test_t2
+:           +- View (`lateral_test_t2`, [c1#x, c2#x])
+:              +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 + t2.c1 from lateral_test_t2 t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [c1#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [(outer(c1#x) + c1#x) AS (outer(lateral_test_t1.c1) + c1)#x]
+:        +- SubqueryAlias t2
+:           +- SubqueryAlias lateral_test_t2
+:              +- View (`lateral_test_t2`, [c1#x, c2#x])
+:                 +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                    +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select *)
+-- !query analysis
+LateralJoin lateral-subquery#x [], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project
+:        +- OneRowRelation
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select * from lateral_test_t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [c1#x, c2#x]
+:        +- SubqueryAlias lateral_test_t2
+:           +- View (`lateral_test_t2`, [c1#x, c2#x])
+:              +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.* from lateral_test_t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [c1#x && c2#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [outer(c1#x) AS c1#x, outer(c2#x) AS c2#x]
+:        +- SubqueryAlias lateral_test_t2
+:           +- View (`lateral_test_t2`, [c1#x, c2#x])
+:              +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.*, t2.* from lateral_test_t2 t2)
+-- !query analysis
+LateralJoin lateral-subquery#x [c1#x && c2#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [outer(c1#x) AS c1#x, outer(c2#x) AS c2#x, c1#x, c2#x]
+:        +- SubqueryAlias t2
+:           +- SubqueryAlias lateral_test_t2
+:              +- View (`lateral_test_t2`, [c1#x, c2#x])
+:                 +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+:                    +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias lateral_test_t1
+   +- View (`lateral_test_t1`, [c1#x, c2#x])
+      +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table lateral_test_t1
+|> join lateral_test_t2
+|> join lateral (select lateral_test_t1.c2 + lateral_test_t2.c2)
+-- !query analysis
+LateralJoin lateral-subquery#x [c2#x && c2#x], Inner
+:  +- SubqueryAlias __auto_generated_subquery_name
+:     +- Project [(outer(c2#x) + outer(c2#x)) AS (outer(lateral_test_t1.c2) + outer(lateral_test_t2.c2))#x]
+:        +- OneRowRelation
++- Join Inner
+   :- SubqueryAlias lateral_test_t1
+   :  +- View (`lateral_test_t1`, [c1#x, c2#x])
+   :     +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+   :        +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias lateral_test_t2
+      +- View (`lateral_test_t2`, [c1#x, c2#x])
+         +- Project [cast(col1#x as int) AS c1#x, cast(col2#x as int) AS c2#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2
+|> where k = "one"
+-- !query analysis
+Filter (k#x = one)
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [k#x, v1#x, v2#x]
+      +- Join Inner, (k#x = k#x)
+         :- SubqueryAlias natural_join_test_t1
+         :  +- View (`natural_join_test_t1`, [k#x, v1#x])
+         :     +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x]
+         :        +- Project [k#x, v1#x]
+         :           +- SubqueryAlias natural_join_test_t1
+         :              +- LocalRelation [k#x, v1#x]
+         +- SubqueryAlias natural_join_test_t2
+            +- View (`natural_join_test_t2`, [k#x, v2#x])
+               +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x]
+                  +- Project [k#x, v2#x]
+                     +- SubqueryAlias natural_join_test_t2
+                        +- LocalRelation [k#x, v2#x]
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> select natural_join_test_t1.*
+-- !query analysis
+Project [k#x, v1#x]
++- Project [k#x, v1#x, v2#x]
+   +- Join Inner, (k#x = k#x)
+      :- SubqueryAlias natural_join_test_t1
+      :  +- View (`natural_join_test_t1`, [k#x, v1#x])
+      :     +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x]
+      :        +- Project [k#x, v1#x]
+      :           +- SubqueryAlias natural_join_test_t1
+      :              +- LocalRelation [k#x, v1#x]
+      +- SubqueryAlias nt2
+         +- SubqueryAlias natural_join_test_t2
+            +- View (`natural_join_test_t2`, [k#x, v2#x])
+               +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x]
+                  +- Project [k#x, v2#x]
+                     +- SubqueryAlias natural_join_test_t2
+                        +- LocalRelation [k#x, v2#x]
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> natural join natural_join_test_t3 nt3
+|> select natural_join_test_t1.*, nt2.*, nt3.*
+-- !query analysis
+Project [k#x, v1#x, k#x, v2#x, k#x, v3#x]
++- Project [k#x, v1#x, v2#x, v3#x, k#x, k#x]
+   +- Join Inner, (k#x = k#x)
+      :- Project [k#x, v1#x, v2#x, k#x]
+      :  +- Join Inner, (k#x = k#x)
+      :     :- SubqueryAlias natural_join_test_t1
+      :     :  +- View (`natural_join_test_t1`, [k#x, v1#x])
+      :     :     +- Project [cast(k#x as string) AS k#x, cast(v1#x as int) AS v1#x]
+      :     :        +- Project [k#x, v1#x]
+      :     :           +- SubqueryAlias natural_join_test_t1
+      :     :              +- LocalRelation [k#x, v1#x]
+      :     +- SubqueryAlias nt2
+      :        +- SubqueryAlias natural_join_test_t2
+      :           +- View (`natural_join_test_t2`, [k#x, v2#x])
+      :              +- Project [cast(k#x as string) AS k#x, cast(v2#x as int) AS v2#x]
+      :                 +- Project [k#x, v2#x]
+      :                    +- SubqueryAlias natural_join_test_t2
+      :                       +- LocalRelation [k#x, v2#x]
+      +- SubqueryAlias nt3
+         +- SubqueryAlias natural_join_test_t3
+            +- View (`natural_join_test_t3`, [k#x, v3#x])
+               +- Project [cast(k#x as string) AS k#x, cast(v3#x as int) AS v3#x]
+                  +- Project [k#x, v3#x]
+                     +- SubqueryAlias natural_join_test_t3
+                        +- LocalRelation [k#x, v3#x]
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_empty_table
+   inner join join_test_empty_table
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'inner'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table join_test_t1
+|> select 1 + 2 as result
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`join_test_t1`.`a`",
+    "proposal" : "`result`, `join_test_empty_table`.`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 91,
+    "stopIndex" : 104,
+    "fragment" : "join_test_t1.a"
+  } ]
+}
+
+
+-- !query
+table join_test_t1 jt
+|> cross join (select * from jt)
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'jt'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> union all table t
+-- !query analysis
+Union false, false
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> union table t
+-- !query analysis
+Distinct
++- Union false, false
+   :- SubqueryAlias spark_catalog.default.t
+   :  +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select * from t)
+|> union all table t
+-- !query analysis
+Union false, false
+:- Project [x#x, y#x]
+:  +- SubqueryAlias spark_catalog.default.t
+:     +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select * from t)
+|> union table t
+-- !query analysis
+Distinct
++- Union false, false
+   :- Project [x#x, y#x]
+   :  +- SubqueryAlias spark_catalog.default.t
+   :     +- Relation spark_catalog.default.t[x#x,y#x] csv
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+values (0, 'abc') tab(x, y)
+|> union all table t
+-- !query analysis
+Union false, false
+:- SubqueryAlias tab
+:  +- LocalRelation [x#x, y#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+values (0, 1) tab(x, y)
+|> union table t
+|> where x = 0
+-- !query analysis
+Distinct
++- Union false, false
+   :- Project [x#x, cast(y#x as bigint) AS y#xL]
+   :  +- SubqueryAlias tab
+   :     +- LocalRelation [x#x, y#x]
+   +- Project [x#x, cast(y#x as bigint) AS y#xL]
+      +- Filter (x#x = 0)
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select * from t)
+|> union all (select * from t)
+-- !query analysis
+Union false, false
+:- Project [x#x, y#x]
+:  +- SubqueryAlias spark_catalog.default.t
+:     +- Relation spark_catalog.default.t[x#x,y#x] csv
++- Project [x#x, y#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> except all table t
+-- !query analysis
+Except All true
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> except table t
+-- !query analysis
+Except false
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> intersect all table t
+-- !query analysis
+Intersect All true
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> intersect table t
+-- !query analysis
+Intersect false
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> minus all table t
+-- !query analysis
+Except All true
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> minus table t
+-- !query analysis
+Except false
+:- SubqueryAlias spark_catalog.default.t
+:  +- Relation spark_catalog.default.t[x#x,y#x] csv
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> select x
+|> union all table t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "UNION"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 40,
+    "fragment" : "table t\n|> select x\n|> union all table t"
+  } ]
+}
+
+
+-- !query
+table t
+|> union all table st
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "second",
+    "dataType1" : "\"STRUCT<i1: INT, i2: INT>\"",
+    "dataType2" : "\"STRING\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 29,
+    "fragment" : "table t\n|> union all table st"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x
+-- !query analysis
+Sort [x#x ASC NULLS FIRST], true
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+(select * from t)
+|> order by x
+-- !query analysis
+Sort [x#x ASC NULLS FIRST], true
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [x#x, y#x]
+      +- SubqueryAlias spark_catalog.default.t
+         +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+values (0, 'abc') tab(x, y)
+|> order by x
+-- !query analysis
+Sort [x#x ASC NULLS FIRST], true
++- SubqueryAlias tab
+   +- LocalRelation [x#x, y#x]
+
+
+-- !query
+table t
+|> order by x
+|> limit 1
+-- !query analysis
+GlobalLimit 1
++- LocalLimit 1
+   +- SubqueryAlias __auto_generated_subquery_name
+      +- Sort [x#x ASC NULLS FIRST], true
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where x = 1
+|> select y
+|> limit 2 offset 1
+-- !query analysis
+GlobalLimit 2
++- LocalLimit 2
+   +- Offset 1
+      +- SubqueryAlias __auto_generated_subquery_name
+         +- Project [y#x]
+            +- Filter (x#x = 1)
+               +- SubqueryAlias spark_catalog.default.t
+                  +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> where x = 1
+|> select y
+|> offset 1
+-- !query analysis
+Offset 1
++- SubqueryAlias __auto_generated_subquery_name
+   +- Project [y#x]
+      +- Filter (x#x = 1)
+         +- SubqueryAlias spark_catalog.default.t
+            +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> limit all offset 0
+-- !query analysis
+Offset 0
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> distribute by x
+-- !query analysis
+RepartitionByExpression [x#x]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> cluster by x
+-- !query analysis
+Sort [x#x ASC NULLS FIRST], false
++- RepartitionByExpression [x#x]
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> sort by x distribute by x
+-- !query analysis
+RepartitionByExpression [x#x]
++- Sort [x#x ASC NULLS FIRST], false
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> order by x desc
+order by y
+-- !query analysis
+Sort [y#x ASC NULLS FIRST], true
++- Sort [x#x DESC NULLS LAST], true
+   +- SubqueryAlias spark_catalog.default.t
+      +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> order by x desc order by x + y
+order by y
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'order'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> select 1 + 2 as result
+|> order by x
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`x`",
+    "proposal" : "`result`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 47,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> select 1 + 2 as result
+|> distribute by x
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`x`",
+    "proposal" : "`result`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 52,
+    "stopIndex" : 52,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x limit 1
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "MULTIPLE_QUERY_RESULT_CLAUSES_WITH_PIPE_OPERATORS",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "clause1" : "ORDER BY",
+    "clause2" : "LIMIT"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 29,
+    "fragment" : "order by x limit 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x sort by x
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+  "sqlState" : "0A000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "order by x sort by x"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate sum(b) as result group by a
+-- !query analysis
+Aggregate [a#x], [a#x, pipeexpression(sum(b#x), true, AGGREGATE) AS result#xL]
++- SubqueryAlias spark_catalog.default.other
+   +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+table other
+|> aggregate sum(b) as result group by a
+|> select result
+-- !query analysis
+Project [result#xL]
++- Aggregate [a#x], [a#x, pipeexpression(sum(b#x), true, AGGREGATE) AS result#xL]
+   +- SubqueryAlias spark_catalog.default.other
+      +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+table other
+|> aggregate sum(b) group by a + 1 as gkey
+|> select gkey
+-- !query analysis
+Project [gkey#x]
++- Aggregate [(a#x + 1)], [(a#x + 1) AS gkey#x, pipeexpression(sum(b#x), true, AGGREGATE) AS pipeexpression(sum(b))#xL]
+   +- SubqueryAlias spark_catalog.default.other
+      +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+select 1 as x, 2 as y
+|> aggregate group by x, y
+-- !query analysis
+Aggregate [x#x, y#x], [x#x, y#x]
++- Project [1 AS x#x, 2 AS y#x]
+   +- OneRowRelation
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate group by 1, 2
+-- !query analysis
+Aggregate [1, 2], [1 AS 1#x, 2 AS 2#x]
++- Project [3 AS x#x, 4 AS y#x]
+   +- OneRowRelation
+
+
+-- !query
+table t
+|> aggregate sum(x)
+-- !query analysis
+Aggregate [pipeexpression(sum(x#x), true, AGGREGATE) AS pipeexpression(sum(x))#xL]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table t
+|> aggregate sum(x) + 1 as result_plus_one
+-- !query analysis
+Aggregate [pipeexpression((sum(x#x) + cast(1 as bigint)), true, AGGREGATE) AS result_plus_one#xL]
++- SubqueryAlias spark_catalog.default.t
+   +- Relation spark_catalog.default.t[x#x,y#x] csv
+
+
+-- !query
+table other
+|> aggregate group by a
+|> where a = 1
+-- !query analysis
+Filter (a#x = 1)
++- SubqueryAlias __auto_generated_subquery_name
+   +- Aggregate [a#x], [a#x]
+      +- SubqueryAlias spark_catalog.default.other
+         +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x, y, x + y as z
+-- !query analysis
+Aggregate [x#x, y#x, (x#x + y#x)], [x#x, y#x, (x#x + y#x) AS z#x]
++- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x as z, x + y as z
+-- !query analysis
+Aggregate [x#x, (x#x + y#x)], [x#x AS z#x, (x#x + y#x) AS z#x]
++- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+   +- OneRowRelation
+
+
+-- !query
+select 1 as x, 2 as y, named_struct('z', 3) as st
+|> aggregate group by x, y, x, x, st.z, (st).z, 1 + x, 2 + x
+-- !query analysis
+Aggregate [x#x, y#x, x#x, x#x, st#x.z, st#x.z, (1 + x#x), (2 + x#x)], [x#x, y#x, x#x, x#x, st#x.z AS z#x, st#x.z AS z#x, (1 + x#x) AS (1 + x)#x, (2 + x#x) AS (2 + x)#x]
++- Project [1 AS x#x, 2 AS y#x, named_struct(z, 3) AS st#x]
+   +- OneRowRelation
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> aggregate sum(z) z group by x, y
+|> aggregate avg(z) z group by x
+|> aggregate count(distinct z) c
+-- !query analysis
+Aggregate [pipeexpression(count(distinct z#x), true, AGGREGATE) AS c#xL]
++- Aggregate [x#x], [x#x, pipeexpression(avg(z#xL), true, AGGREGATE) AS z#x]
+   +- Aggregate [x#x, y#x], [x#x, y#x, pipeexpression(sum(z#x), true, AGGREGATE) AS z#xL]
+      +- Project [1 AS x#x, 2 AS y#x, 3 AS z#x]
+         +- OneRowRelation
+
+
+-- !query
+select 1 x, 3 z
+|> aggregate count(*) group by x, z, x
+|> select x
+-- !query analysis
+Project [x#x]
++- Aggregate [x#x, z#x, x#x], [x#x, z#x, x#x, pipeexpression(count(1), true, AGGREGATE) AS pipeexpression(count(1))#xL]
+   +- Project [1 AS x#x, 3 AS z#x]
+      +- OneRowRelation
+
+
+-- !query
+table other
+|> aggregate a + count(b) group by a
+-- !query analysis
+Aggregate [a#x], [a#x, pipeexpression((cast(a#x as bigint) + count(b#x)), true, AGGREGATE) AS pipeexpression((a + count(b)))#xL]
++- SubqueryAlias spark_catalog.default.other
+   +- Relation spark_catalog.default.other[a#x,b#x] json
+
+
+-- !query
+table other
+|> aggregate a group by a
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expr" : "a#x"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 37,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate group by all
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 47,
+    "fragment" : "select 3 as x, 4 as y\n|> aggregate group by all"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by rollup(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY ROLLUP"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 123,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by rollup(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by cube(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY CUBE"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 121,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by cube(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by course, `year` grouping sets(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING SETS"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 145,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by course, `year` grouping sets(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings), grouping(course) + 1
+   group by course
+|> where course = 'dotNET' and `year` = '2013'
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 132,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings), grouping(course) + 1\n   group by course\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings), grouping_id(course)
+   group by course
+|> where course = 'dotNET' and `year` = '2013'
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING_ID"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 131,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings), grouping_id(course)\n   group by course\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+select 1 as x, 2 as y
+|> aggregate group by ()
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "')'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table other
+|> aggregate a
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expr" : "a#x"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 26,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+table other
+|> select sum(a) as result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(a#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 28,
+    "fragment" : "sum(a)"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "The AGGREGATE clause requires a list of aggregate expressions or a list of grouping expressions, or both"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 24,
+    "fragment" : "table other\n|> aggregate"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate group by
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`group`",
+    "proposal" : "`a`, `b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 30,
+    "fragment" : "group"
+  } ]
+}
+
+
+-- !query
+table other
+|> group by a
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'group'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table other
+|> aggregate sum(a) over () group by b
+-- !query analysis
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "window functions"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 50,
+    "fragment" : "table other\n|> aggregate sum(a) over () group by b"
+  } ]
+}
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> aggregate count(*) AS c, sum(x) AS x group by x
+|> where c = 1
+|> where x = 1
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`x`",
+    "referenceNames" : "[`x`, `x`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 97,
+    "stopIndex" : 97,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (partition by cate order by val)
+-- !query analysis
+Project [cate#x, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [cate#x, val#x, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [cate#x], [val#x ASC NULLS FIRST]
+      +- Project [cate#x, val#x]
+         +- SubqueryAlias windowtestdata
+            +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+               +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                  +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                     +- SubqueryAlias testData
+                        +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (order by val_timestamp range between unbounded preceding and current row)
+-- !query analysis
+Project [cate#x, sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [cate#x, val#x, val_timestamp#x, sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(val#x) windowspecdefinition(val_timestamp#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [val_timestamp#x ASC NULLS FIRST]
+      +- Project [cate#x, val#x, val_timestamp#x]
+         +- SubqueryAlias windowtestdata
+            +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+               +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                  +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                     +- SubqueryAlias testData
+                        +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, val
+    window w as (partition by cate order by val)
+-- !query analysis
+Project [cate#x, val#x]
++- SubqueryAlias windowtestdata
+   +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+      +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+         +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+            +- SubqueryAlias testData
+               +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate)
+|> select cate, val, sum_val, first_value(cate) over w
+   window w as (order by val)
+-- !query analysis
+Project [cate#x, val#x, sum_val#xL, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
++- Project [cate#x, val#x, sum_val#xL, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
+   +- Window [first_value(cate#x, false) windowspecdefinition(val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [val#x ASC NULLS FIRST]
+      +- Project [cate#x, val#x, sum_val#xL]
+         +- Project [cate#x, val#x, sum_val#xL]
+            +- Project [cate#x, val#x, _we0#xL, pipeexpression(_we0#xL, false, SELECT) AS sum_val#xL]
+               +- Window [sum(val#x) windowspecdefinition(cate#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS _we0#xL], [cate#x]
+                  +- Project [cate#x, val#x]
+                     +- SubqueryAlias windowtestdata
+                        +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+                           +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                              +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                                 +- SubqueryAlias testData
+                                    +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate), w2 as (order by val)
+-- !query analysis
+Project [cate#x, val#x, sum(val) OVER (PARTITION BY cate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
++- Project [cate#x, val#x, sum(val) OVER (PARTITION BY cate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x, sum(val) OVER (PARTITION BY cate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL, first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x]
+   +- Window [first_value(cate#x, false) windowspecdefinition(val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#x], [val#x ASC NULLS FIRST]
+      +- Window [sum(val#x) windowspecdefinition(cate#x, specifiedwindowframe(RowFrame, unboundedpreceding$(), unboundedfollowing$())) AS sum(val) OVER (PARTITION BY cate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)#xL], [cate#x]
+         +- Project [cate#x, val#x]
+            +- SubqueryAlias windowtestdata
+               +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+                  +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                     +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                        +- SubqueryAlias testData
+                           +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w, first_value(val) over w
+   window w1 as (partition by cate order by val)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+(select col from st)
+|> select col.i1, sum(col.i2) over w
+   window w as (partition by col.i1 order by col.i2)
+-- !query analysis
+Project [i1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [i1#x, _w0#x, _w1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [_w1#x], [_w0#x ASC NULLS FIRST]
+      +- Project [col#x.i1 AS i1#x, col#x.i2 AS _w0#x, col#x.i1 AS _w1#x]
+         +- Project [col#x]
+            +- SubqueryAlias spark_catalog.default.st
+               +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table st
+|> select st.col.i1, sum(st.col.i2) over w
+   window w as (partition by st.col.i1 order by st.col.i2)
+-- !query analysis
+Project [i1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [i1#x, _w0#x, _w1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [_w1#x], [_w0#x ASC NULLS FIRST]
+      +- Project [col#x.i1 AS i1#x, col#x.i2 AS _w0#x, col#x.i1 AS _w1#x]
+         +- SubqueryAlias spark_catalog.default.st
+            +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table st
+|> select spark_catalog.default.st.col.i1, sum(spark_catalog.default.st.col.i2) over w
+   window w as (partition by spark_catalog.default.st.col.i1 order by spark_catalog.default.st.col.i2)
+-- !query analysis
+Project [i1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [i1#x, _w0#x, _w1#x, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(_w0#x) windowspecdefinition(_w1#x, _w0#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [_w1#x], [_w0#x ASC NULLS FIRST]
+      +- Project [col#x.i1 AS i1#x, col#x.i2 AS _w0#x, col#x.i1 AS _w1#x]
+         +- SubqueryAlias spark_catalog.default.st
+            +- Relation spark_catalog.default.st[x#x,col#x] parquet
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over val
+   window val as (partition by cate order by val)
+-- !query analysis
+Project [cate#x, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
++- Project [cate#x, val#x, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL, sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL]
+   +- Window [sum(val#x) windowspecdefinition(cate#x, val#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)#xL], [cate#x], [val#x ASC NULLS FIRST]
+      +- Project [cate#x, val#x]
+         +- SubqueryAlias windowtestdata
+            +- View (`windowTestData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+               +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                  +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                     +- SubqueryAlias testData
+                        +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate)
+   window w2 as (order by val)
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w2"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate order by val)
+|> select cate, val, sum_val, first_value(cate) over w
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, first_value(cate) over w as first_val
+|> select cate, val, sum(val) over w as sum_val
+   window w as (order by val)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+drop table t
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t
+
+
+-- !query
+drop table other
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.other
+
+
+-- !query
+drop table st
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.st
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/interval.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/interval.sql.out
index 8d41651cb743a..1add0830d9b77 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/interval.sql.out
@@ -88,9 +88,13 @@ SELECT interval '1 2:03' day to hour
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -107,9 +111,13 @@ SELECT interval '1 2:03:04' day to hour
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -133,9 +141,13 @@ SELECT interval '1 2:03:04' day to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -152,9 +164,13 @@ SELECT interval '1 2:03' day to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -178,9 +194,13 @@ SELECT interval '1 2:03' hour to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -197,9 +217,13 @@ SELECT interval '1 2:03:04' hour to minute
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -216,9 +240,13 @@ SELECT interval '1 2:03' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -235,9 +263,13 @@ SELECT interval '1 2:03:04' hour to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -254,9 +286,13 @@ SELECT interval '1 2:03' minute to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -273,9 +309,13 @@ SELECT interval '1 2:03:04' minute to second
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
index cdcd563de4f6a..330e1c1cad7ef 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part2.sql.out
@@ -449,7 +449,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'NaN'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part4.sql.out
index 2333cce874d31..f042116182f7d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/postgreSQL/window_part4.sql.out
@@ -498,7 +498,7 @@ SELECT a, b,
        SUM(b) OVER(ORDER BY A ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)
 FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
index 7e720995c44b4..55822a10041f5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/predicate-functions.sql.out
@@ -156,7 +156,7 @@ Project [(1 = 1) AS (1 = 1)#x]
 -- !query
 select 1 = '1'
 -- !query analysis
-Project [(1 = cast(1 as int)) AS (1 = 1)#x]
+Project [(cast(1 as bigint) = cast(1 as bigint)) AS (1 = 1)#x]
 +- OneRowRelation
 
 
@@ -177,28 +177,28 @@ Project [(cast(1.5 as double) = cast(1.51 as double)) AS (1.5 = 1.51)#x]
 -- !query
 select 1 > '1'
 -- !query analysis
-Project [(1 > cast(1 as int)) AS (1 > 1)#x]
+Project [(cast(1 as bigint) > cast(1 as bigint)) AS (1 > 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 > '1.0'
 -- !query analysis
-Project [(2 > cast(1.0 as int)) AS (2 > 1.0)#x]
+Project [(cast(2 as bigint) > cast(1.0 as bigint)) AS (2 > 1.0)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 > '2.0'
 -- !query analysis
-Project [(2 > cast(2.0 as int)) AS (2 > 2.0)#x]
+Project [(cast(2 as bigint) > cast(2.0 as bigint)) AS (2 > 2.0)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 > '2.2'
 -- !query analysis
-Project [(2 > cast(2.2 as int)) AS (2 > 2.2)#x]
+Project [(cast(2 as bigint) > cast(2.2 as bigint)) AS (2 > 2.2)#x]
 +- OneRowRelation
 
 
@@ -212,35 +212,35 @@ Project [(cast(1.5 as double) > cast(0.5 as double)) AS (1.5 > 0.5)#x]
 -- !query
 select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) > to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false)) AS (to_date(2009-07-30 04:17:52) > to_date(2009-07-30 04:17:52))#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) > to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true)) AS (to_date(2009-07-30 04:17:52) > to_date(2009-07-30 04:17:52))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) > cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) > 2009-07-30 04:17:52)#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) > cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) > 2009-07-30 04:17:52)#x]
 +- OneRowRelation
 
 
 -- !query
 select 1 >= '1'
 -- !query analysis
-Project [(1 >= cast(1 as int)) AS (1 >= 1)#x]
+Project [(cast(1 as bigint) >= cast(1 as bigint)) AS (1 >= 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 >= '1.0'
 -- !query analysis
-Project [(2 >= cast(1.0 as int)) AS (2 >= 1.0)#x]
+Project [(cast(2 as bigint) >= cast(1.0 as bigint)) AS (2 >= 1.0)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 >= '2.0'
 -- !query analysis
-Project [(2 >= cast(2.0 as int)) AS (2 >= 2.0)#x]
+Project [(cast(2 as bigint) >= cast(2.0 as bigint)) AS (2 >= 2.0)#x]
 +- OneRowRelation
 
 
@@ -261,35 +261,35 @@ Project [(cast(1.5 as double) >= cast(0.5 as double)) AS (1.5 >= 0.5)#x]
 -- !query
 select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) >= to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false)) AS (to_date(2009-07-30 04:17:52) >= to_date(2009-07-30 04:17:52))#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) >= to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true)) AS (to_date(2009-07-30 04:17:52) >= to_date(2009-07-30 04:17:52))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) >= cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) >= 2009-07-30 04:17:52)#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) >= cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) >= 2009-07-30 04:17:52)#x]
 +- OneRowRelation
 
 
 -- !query
 select 1 < '1'
 -- !query analysis
-Project [(1 < cast(1 as int)) AS (1 < 1)#x]
+Project [(cast(1 as bigint) < cast(1 as bigint)) AS (1 < 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 < '1.0'
 -- !query analysis
-Project [(2 < cast(1.0 as int)) AS (2 < 1.0)#x]
+Project [(cast(2 as bigint) < cast(1.0 as bigint)) AS (2 < 1.0)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 < '2.0'
 -- !query analysis
-Project [(2 < cast(2.0 as int)) AS (2 < 2.0)#x]
+Project [(cast(2 as bigint) < cast(2.0 as bigint)) AS (2 < 2.0)#x]
 +- OneRowRelation
 
 
@@ -310,35 +310,35 @@ Project [(cast(0.5 as double) < cast(1.5 as double)) AS (0.5 < 1.5)#x]
 -- !query
 select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) < to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false)) AS (to_date(2009-07-30 04:17:52) < to_date(2009-07-30 04:17:52))#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) < to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true)) AS (to_date(2009-07-30 04:17:52) < to_date(2009-07-30 04:17:52))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) < cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) < 2009-07-30 04:17:52)#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) < cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) < 2009-07-30 04:17:52)#x]
 +- OneRowRelation
 
 
 -- !query
 select 1 <= '1'
 -- !query analysis
-Project [(1 <= cast(1 as int)) AS (1 <= 1)#x]
+Project [(cast(1 as bigint) <= cast(1 as bigint)) AS (1 <= 1)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 <= '1.0'
 -- !query analysis
-Project [(2 <= cast(1.0 as int)) AS (2 <= 1.0)#x]
+Project [(cast(2 as bigint) <= cast(1.0 as bigint)) AS (2 <= 1.0)#x]
 +- OneRowRelation
 
 
 -- !query
 select 2 <= '2.0'
 -- !query analysis
-Project [(2 <= cast(2.0 as int)) AS (2 <= 2.0)#x]
+Project [(cast(2 as bigint) <= cast(2.0 as bigint)) AS (2 <= 2.0)#x]
 +- OneRowRelation
 
 
@@ -359,49 +359,49 @@ Project [(cast(0.5 as double) <= cast(1.5 as double)) AS (0.5 <= 1.5)#x]
 -- !query
 select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) <= to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false)) AS (to_date(2009-07-30 04:17:52) <= to_date(2009-07-30 04:17:52))#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) <= to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true)) AS (to_date(2009-07-30 04:17:52) <= to_date(2009-07-30 04:17:52))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'
 -- !query analysis
-Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), false) <= cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) <= 2009-07-30 04:17:52)#x]
+Project [(to_date(2009-07-30 04:17:52, None, Some(America/Los_Angeles), true) <= cast(2009-07-30 04:17:52 as date)) AS (to_date(2009-07-30 04:17:52) <= 2009-07-30 04:17:52)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2017-03-01') = to_timestamp('2017-03-01 00:00:00')
 -- !query analysis
-Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), false) as timestamp) = to_timestamp(2017-03-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), false)) AS (to_date(2017-03-01) = to_timestamp(2017-03-01 00:00:00))#x]
+Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), true) as timestamp) = to_timestamp(2017-03-01 00:00:00, None, TimestampType, Some(America/Los_Angeles), true)) AS (to_date(2017-03-01) = to_timestamp(2017-03-01 00:00:00))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2017-03-01 00:00:01') > to_date('2017-03-01')
 -- !query analysis
-Project [(to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), false) > cast(to_date(2017-03-01, None, Some(America/Los_Angeles), false) as timestamp)) AS (to_timestamp(2017-03-01 00:00:01) > to_date(2017-03-01))#x]
+Project [(to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), true) > cast(to_date(2017-03-01, None, Some(America/Los_Angeles), true) as timestamp)) AS (to_timestamp(2017-03-01 00:00:01) > to_date(2017-03-01))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2017-03-01 00:00:01') >= to_date('2017-03-01')
 -- !query analysis
-Project [(to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), false) >= cast(to_date(2017-03-01, None, Some(America/Los_Angeles), false) as timestamp)) AS (to_timestamp(2017-03-01 00:00:01) >= to_date(2017-03-01))#x]
+Project [(to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), true) >= cast(to_date(2017-03-01, None, Some(America/Los_Angeles), true) as timestamp)) AS (to_timestamp(2017-03-01 00:00:01) >= to_date(2017-03-01))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2017-03-01') < to_timestamp('2017-03-01 00:00:01')
 -- !query analysis
-Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), false) as timestamp) < to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), false)) AS (to_date(2017-03-01) < to_timestamp(2017-03-01 00:00:01))#x]
+Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), true) as timestamp) < to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), true)) AS (to_date(2017-03-01) < to_timestamp(2017-03-01 00:00:01))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_date('2017-03-01') <= to_timestamp('2017-03-01 00:00:01')
 -- !query analysis
-Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), false) as timestamp) <= to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), false)) AS (to_date(2017-03-01) <= to_timestamp(2017-03-01 00:00:01))#x]
+Project [(cast(to_date(2017-03-01, None, Some(America/Los_Angeles), true) as timestamp) <= to_timestamp(2017-03-01 00:00:01, None, TimestampType, Some(America/Los_Angeles), true)) AS (to_date(2017-03-01) <= to_timestamp(2017-03-01 00:00:01))#x]
 +- OneRowRelation
 
 
@@ -436,14 +436,14 @@ Project [cast(1 as decimal(11,1)) IN (cast(1.0 as decimal(11,1)),cast(2.0 as dec
 -- !query
 select 1 in ('2', '3', '4')
 -- !query analysis
-Project [cast(1 as string) IN (cast(2 as string),cast(3 as string),cast(4 as string)) AS (1 IN (2, 3, 4))#x]
+Project [cast(1 as bigint) IN (cast(2 as bigint),cast(3 as bigint),cast(4 as bigint)) AS (1 IN (2, 3, 4))#x]
 +- OneRowRelation
 
 
 -- !query
 select 1 in ('2', '3', '4', null)
 -- !query analysis
-Project [cast(1 as string) IN (cast(2 as string),cast(3 as string),cast(4 as string),cast(null as string)) AS (1 IN (2, 3, 4, NULL))#x]
+Project [cast(1 as bigint) IN (cast(2 as bigint),cast(3 as bigint),cast(4 as bigint),cast(null as bigint)) AS (1 IN (2, 3, 4, NULL))#x]
 +- OneRowRelation
 
 
@@ -492,14 +492,14 @@ Project [NOT cast(1 as decimal(11,1)) IN (cast(1.0 as decimal(11,1)),cast(2.0 as
 -- !query
 select 1 not in ('2', '3', '4')
 -- !query analysis
-Project [NOT cast(1 as string) IN (cast(2 as string),cast(3 as string),cast(4 as string)) AS (NOT (1 IN (2, 3, 4)))#x]
+Project [NOT cast(1 as bigint) IN (cast(2 as bigint),cast(3 as bigint),cast(4 as bigint)) AS (NOT (1 IN (2, 3, 4)))#x]
 +- OneRowRelation
 
 
 -- !query
 select 1 not in ('2', '3', '4', null)
 -- !query analysis
-Project [NOT cast(1 as string) IN (cast(2 as string),cast(3 as string),cast(4 as string),cast(null as string)) AS (NOT (1 IN (2, 3, 4, NULL)))#x]
+Project [NOT cast(1 as bigint) IN (cast(2 as bigint),cast(3 as bigint),cast(4 as bigint),cast(null as bigint)) AS (NOT (1 IN (2, 3, 4, NULL)))#x]
 +- OneRowRelation
 
 
@@ -548,7 +548,7 @@ Project [between(b, a, c) AS between(b, a, c)#x]
 -- !query
 select to_timestamp('2022-12-26 00:00:01') between to_date('2022-03-01') and to_date('2022-12-31')
 -- !query analysis
-Project [between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), false), to_date(2022-03-01, None, Some(America/Los_Angeles), false), to_date(2022-12-31, None, Some(America/Los_Angeles), false)) AS between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31))#x]
+Project [between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), true), to_date(2022-03-01, None, Some(America/Los_Angeles), true), to_date(2022-12-31, None, Some(America/Los_Angeles), true)) AS between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31))#x]
 +- OneRowRelation
 
 
@@ -589,7 +589,7 @@ Project [NOT between(b, a, c) AS (NOT between(b, a, c))#x]
 -- !query
 select to_timestamp('2022-12-26 00:00:01') not between to_date('2022-03-01') and to_date('2022-12-31')
 -- !query analysis
-Project [NOT between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), false), to_date(2022-03-01, None, Some(America/Los_Angeles), false), to_date(2022-12-31, None, Some(America/Los_Angeles), false)) AS (NOT between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31)))#x]
+Project [NOT between(to_timestamp(2022-12-26 00:00:01, None, TimestampType, Some(America/Los_Angeles), true), to_date(2022-03-01, None, Some(America/Los_Angeles), true), to_date(2022-12-31, None, Some(America/Los_Angeles), true)) AS (NOT between(to_timestamp(2022-12-26 00:00:01), to_date(2022-03-01), to_date(2022-12-31)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
index 3cacbdc141053..31919381c99b6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/random.sql.out
@@ -93,3 +93,404 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "rand('1')"
   } ]
 }
+
+
+-- !query
+SELECT uniform(0, 1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 10, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0L, 10L, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 10L, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 10S, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, 20, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10.0F, 20.0F, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, 20.0F, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, 20, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, 20.0F) IS NOT NULL AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(NULL, 1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, NULL, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(0, 1, NULL) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`seed`",
+    "inputType" : "integer or floating-point",
+    "sqlExpr" : "\"uniform(10, 20, col)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "uniform(10, 20, col)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(col, 10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`min`",
+    "inputType" : "integer or floating-point",
+    "sqlExpr" : "\"uniform(col, 10, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "uniform(col, 10, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(10) AS result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`uniform`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "uniform(10)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(10, 20, 30, 40) AS result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "4",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`uniform`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "uniform(10, 20, 30, 40)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(1, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(5, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10S, 0) AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10) IS NOT NULL AS result
+-- !query analysis
+[Analyzer test output redacted due to nondeterminism]
+
+
+-- !query
+SELECT randstr(10L, 0) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "randstr(10L, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10.0F, 0) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10.0, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10.0F, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10.0D, 0) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10.0, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10.0D, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(NULL, 0) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(NULL, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(NULL, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(0, NULL) AS result
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "second",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(0, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(0, NULL)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`length`",
+    "inputType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(col, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "randstr(col, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`seed`",
+    "inputType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10, col)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, col)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, 0, 1) AS result
+-- !query analysis
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "3",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[1, 2]",
+    "functionName" : "`randstr`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10, 0, 1)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
index 27e75187cdba7..76c3b88a3ce6b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/show_columns.sql.out
@@ -94,10 +94,11 @@ SHOW COLUMNS IN showdb.showcolumn1 FROM baddb
 -- !query analysis
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1057",
+  "errorClass" : "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
+  "sqlState" : "42K05",
   "messageParameters" : {
-    "dbA" : "baddb",
-    "dbB" : "showdb"
+    "namespaceA" : "`baddb`",
+    "namespaceB" : "`showdb`"
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
index 78e2a876da861..b098a9758fe4e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-on-files.sql.out
@@ -1,4 +1,19 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE DATABASE IF NOT EXISTS sql_on_files
+-- !query analysis
+CreateNamespace true
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files]
+
+
+-- !query
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_parquet`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
+
+
 -- !query
 SELECT * FROM parquet.``
 -- !query analysis
@@ -33,12 +48,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1
+SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`
+-- !query analysis
+Project [1#x]
++- Relation [1#x] parquet
+
+
+-- !query
+DROP TABLE sql_on_files.test_parquet
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_parquet
+
+
+-- !query
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [fixed_len_dec#x]
-      +- Relation [fixed_len_dec#x] parquet
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_orc`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -75,12 +103,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`
+-- !query analysis
+Project [1#x]
++- Relation [1#x] orc
+
+
+-- !query
+DROP TABLE sql_on_files.test_orc
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [dt#x]
-      +- Relation [dt#x] orc
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_orc
+
+
+-- !query
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1
+-- !query analysis
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_csv`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -117,12 +158,25 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`
+-- !query analysis
+Project [_c0#x]
++- Relation [_c0#x] csv
+
+
+-- !query
+DROP TABLE sql_on_files.test_csv
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_csv
+
+
+-- !query
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [_c0#x, _c1#x, _c2#x, _c3#x, _c4#x]
-      +- Relation [_c0#x,_c1#x,_c2#x,_c3#x,_c4#x] csv
+CreateDataSourceTableAsSelectCommand `spark_catalog`.`sql_on_files`.`test_json`, ErrorIfExists, [1]
+   +- Project [1 AS 1#x]
+      +- OneRowRelation
 
 
 -- !query
@@ -159,9 +213,21 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`
+-- !query analysis
+Project [1#xL]
++- Relation [1#xL] json
+
+
+-- !query
+DROP TABLE sql_on_files.test_json
+-- !query analysis
+DropTable false, false
++- ResolvedIdentifier V2SessionCatalog(spark_catalog), sql_on_files.test_json
+
+
+-- !query
+DROP DATABASE sql_on_files
 -- !query analysis
-GlobalLimit 1
-+- LocalLimit 1
-   +- Project [id#xL, intervals#x]
-      +- Relation [id#xL,intervals#x] json
+DropNamespace false, false
++- ResolvedNamespace V2SessionCatalog(spark_catalog), [sql_on_files]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
index f5ce5ed2e8b6e..02e7c39ae83fd 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/sql-session-variables.sql.out
@@ -150,7 +150,7 @@ SetVariable [variablereference(system.session.title='Create Variable - Failure C
 
 
 -- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
 -- !query analysis
 CreateVariable defaultvalueexpression(null, null), false
 +- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
@@ -164,7 +164,7 @@ Project [variablereference(system.session.var1=CAST(NULL AS INT)) AS var1#x]
 
 
 -- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
 -- !query analysis
 DropVariable false
 +- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.var1
@@ -842,7 +842,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'hello'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
@@ -885,7 +884,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INT\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "100000"
@@ -1002,7 +1000,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"DOUBLE\"",
     "targetType" : "\"INT\"",
     "value" : "1.0E10D"
@@ -1062,7 +1059,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'hello'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
@@ -2151,7 +2147,7 @@ CreateVariable defaultvalueexpression(cast(a INT as string), 'a INT'), true
 -- !query
 SELECT from_json('{"a": 1}', var1)
 -- !query analysis
-Project [from_json(StructField(a,IntegerType,true), {"a": 1}, Some(America/Los_Angeles)) AS from_json({"a": 1})#x]
+Project [from_json(StructField(a,IntegerType,true), {"a": 1}, Some(America/Los_Angeles), false) AS from_json({"a": 1})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
index 94073f2751b3e..41807814adfb6 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subexp-elimination.sql.out
@@ -15,7 +15,7 @@ AS testData(a, b), false, true, LocalTempView, UNSUPPORTED, true
 -- !query
 SELECT from_json(a, 'struct<a:int,b:string>').a, from_json(a, 'struct<a:int,b:string>').b, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].b FROM testData
 -- !query analysis
-Project [from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a AS from_json(a).a#x, from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).b AS from_json(a).b#x, from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].a AS from_json(b)[0].a#x, from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b AS from_json(b)[0].b#x]
+Project [from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a AS from_json(a).a#x, from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).b AS from_json(a).b#x, from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].a AS from_json(b)[0].a#x, from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b AS from_json(b)[0].b#x]
 +- SubqueryAlias testdata
    +- View (`testData`, [a#x, b#x])
       +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
@@ -27,7 +27,7 @@ Project [from_json(StructField(a,IntegerType,true), StructField(b,StringType,tru
 -- !query
 SELECT if(from_json(a, 'struct<a:int,b:string>').a > 1, from_json(b, 'array<struct<a:int,b:int>>')[0].a, from_json(b, 'array<struct<a:int,b:int>>')[0].a + 1) FROM testData
 -- !query analysis
-Project [if ((from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a > 1)) from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].a else (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].a + 1) AS (IF((from_json(a).a > 1), from_json(b)[0].a, (from_json(b)[0].a + 1)))#x]
+Project [if ((from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a > 1)) from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].a else (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].a + 1) AS (IF((from_json(a).a > 1), from_json(b)[0].a, (from_json(b)[0].a + 1)))#x]
 +- SubqueryAlias testdata
    +- View (`testData`, [a#x, b#x])
       +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
@@ -39,7 +39,7 @@ Project [if ((from_json(StructField(a,IntegerType,true), StructField(b,StringTyp
 -- !query
 SELECT if(isnull(from_json(a, 'struct<a:int,b:string>').a), from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1, from_json(b, 'array<struct<a:int,b:int>>')[0].b) FROM testData
 -- !query analysis
-Project [if (isnull(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a)) (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b + 1) else from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b AS (IF((from_json(a).a IS NULL), (from_json(b)[0].b + 1), from_json(b)[0].b))#x]
+Project [if (isnull(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a)) (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b + 1) else from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b AS (IF((from_json(a).a IS NULL), (from_json(b)[0].b + 1), from_json(b)[0].b))#x]
 +- SubqueryAlias testdata
    +- View (`testData`, [a#x, b#x])
       +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
@@ -51,7 +51,7 @@ Project [if (isnull(from_json(StructField(a,IntegerType,true), StructField(b,Str
 -- !query
 SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData
 -- !query analysis
-Project [CASE WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a > 5) THEN from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).b WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a > 4) THEN cast((cast(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).b as double) + cast(1 as double)) as string) ELSE cast((cast(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).b as double) + cast(2 as double)) as string) END AS CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN (from_json(a).b + 1) ELSE (from_json(a).b + 2) END#x]
+Project [CASE WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a > 5) THEN cast(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).b as bigint) WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a > 4) THEN (cast(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).b as bigint) + cast(1 as bigint)) ELSE (cast(from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).b as bigint) + cast(2 as bigint)) END AS CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN (from_json(a).b + 1) ELSE (from_json(a).b + 2) END#xL]
 +- SubqueryAlias testdata
    +- View (`testData`, [a#x, b#x])
       +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
@@ -63,7 +63,7 @@ Project [CASE WHEN (from_json(StructField(a,IntegerType,true), StructField(b,Str
 -- !query
 SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(b, 'array<struct<a:int,b:int>>')[0].b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(b, 'array<struct<a:int,b:int>>')[0].b + 1 else from_json(b, 'array<struct<a:int,b:int>>')[0].b + 2 end FROM testData
 -- !query analysis
-Project [CASE WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a > 5) THEN from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles)).a > 4) THEN (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b + 1) ELSE (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles))[0].b + 2) END AS CASE WHEN (from_json(a).a > 5) THEN from_json(b)[0].b WHEN (from_json(a).a > 4) THEN (from_json(b)[0].b + 1) ELSE (from_json(b)[0].b + 2) END#x]
+Project [CASE WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a > 5) THEN from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b WHEN (from_json(StructField(a,IntegerType,true), StructField(b,StringType,true), a#x, Some(America/Los_Angeles), false).a > 4) THEN (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b + 1) ELSE (from_json(ArrayType(StructType(StructField(a,IntegerType,true),StructField(b,IntegerType,true)),true), b#x, Some(America/Los_Angeles), false)[0].b + 2) END AS CASE WHEN (from_json(a).a > 5) THEN from_json(b)[0].b WHEN (from_json(a).a > 4) THEN (from_json(b)[0].b + 1) ELSE (from_json(b)[0].b + 2) END#x]
 +- SubqueryAlias testdata
    +- View (`testData`, [a#x, b#x])
       +- Project [cast(a#x as string) AS a#x, cast(b#x as string) AS b#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
index 3f56e346d0a53..25bd87d8afd9c 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/exists-subquery/exists-orderby-limit.sql.out
@@ -482,21 +482,27 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 47,
-    "stopIndex" : 191,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+Project [emp_name#x]
++- Filter NOT exists#x [dept_id#x]
+   :  +- GlobalLimit 2
+   :     +- LocalLimit 2
+   :        +- Offset 1
+   :           +- Project [a#x]
+   :              +- Sort [state#x ASC NULLS FIRST], true
+   :                 +- Aggregate [state#x], [max(dept_id#x) AS a#x, state#x]
+   :                    +- Filter (dept_id#x = outer(dept_id#x))
+   :                       +- SubqueryAlias dept
+   :                          +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                             +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                                +- Project [dept_id#x, dept_name#x, state#x]
+   :                                   +- SubqueryAlias DEPT
+   :                                      +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
@@ -742,21 +748,25 @@ WHERE  EXISTS (SELECT dept.dept_name
                LIMIT  1
                OFFSET 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter NOT (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 36,
-    "stopIndex" : 133,
-    "fragment" : "SELECT dept.dept_name\n               FROM   dept\n               WHERE  dept.dept_id <> emp.dept_id"
-  } ]
-}
+Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
++- Filter exists#x [dept_id#x]
+   :  +- GlobalLimit 1
+   :     +- LocalLimit 1
+   :        +- Offset 2
+   :           +- Project [dept_name#x]
+   :              +- Filter NOT (dept_id#x = outer(dept_id#x))
+   :                 +- SubqueryAlias dept
+   :                    +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                       +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                          +- Project [dept_id#x, dept_name#x, state#x]
+   :                             +- SubqueryAlias DEPT
+   :                                +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
index a828cb92e59d3..1820f23cfc793 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/in-subquery/in-limit.sql.out
@@ -165,6 +165,127 @@ GlobalLimit 2
                         +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+OFFSET 1
+-- !query analysis
+GlobalLimit 2
++- LocalLimit 2
+   +- Offset 1
+      +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+         +- Filter t1a#x IN (list#x [t1d#xL])
+            :  +- Project [t2a#x]
+            :     +- Filter (outer(t1d#xL) = t2d#xL)
+            :        +- SubqueryAlias t2
+            :           +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+            :              +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+            :                 +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+            :                    +- SubqueryAlias t2
+            :                       +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+            +- SubqueryAlias t1
+               +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+                  +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+                     +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                        +- SubqueryAlias t1
+                           +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               LIMIT 10
+               OFFSET 2)
+LIMIT  2
+OFFSET 1
+-- !query analysis
+GlobalLimit 2
++- LocalLimit 2
+   +- Offset 1
+      +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+         +- Filter t1a#x IN (list#x [t1d#xL])
+            :  +- GlobalLimit 10
+            :     +- LocalLimit 10
+            :        +- Offset 2
+            :           +- Project [t2a#x]
+            :              +- Filter (outer(t1d#xL) = t2d#xL)
+            :                 +- SubqueryAlias t2
+            :                    +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+            :                       +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+            :                          +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+            :                             +- SubqueryAlias t2
+            :                                +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+            +- SubqueryAlias t1
+               +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+                  +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+                     +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                        +- SubqueryAlias t1
+                           +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+OFFSET 1
+-- !query analysis
+Offset 1
++- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+   +- Filter t1a#x IN (list#x [t1d#xL])
+      :  +- Project [t2a#x]
+      :     +- Filter (outer(t1d#xL) = t2d#xL)
+      :        +- SubqueryAlias t2
+      :           +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+      :              +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+      :                 +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      :                    +- SubqueryAlias t2
+      :                       +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      +- SubqueryAlias t1
+         +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+            +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+               +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                  +- SubqueryAlias t1
+                     +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               OFFSET 2)
+OFFSET 1
+-- !query analysis
+Offset 1
++- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+   +- Filter t1a#x IN (list#x [t1d#xL])
+      :  +- Offset 2
+      :     +- Project [t2a#x]
+      :        +- Filter (outer(t1d#xL) = t2d#xL)
+      :           +- SubqueryAlias t2
+      :              +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+      :                 +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+      :                    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      :                       +- SubqueryAlias t2
+      :                          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      +- SubqueryAlias t1
+         +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+            +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+               +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                  +- SubqueryAlias t1
+                     +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+
+
 -- !query
 SELECT *
 FROM   t1
@@ -570,21 +691,25 @@ WHERE  t1b NOT IN (SELECT t2b
                    LIMIT  2
                    OFFSET 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x = outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 39,
-    "stopIndex" : 113,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b = t1b"
-  } ]
-}
+Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
++- Filter NOT t1b#x IN (list#x [t1b#x])
+   :  +- GlobalLimit 2
+   :     +- LocalLimit 2
+   :        +- Offset 2
+   :           +- Project [t2b#x]
+   :              +- Filter (t2b#x = outer(t1b#x))
+   :                 +- SubqueryAlias t2
+   :                    +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+   :                       +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+   :                          +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   :                             +- SubqueryAlias t2
+   :                                +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   +- SubqueryAlias t1
+      +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+         +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+            +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+               +- SubqueryAlias t1
+                  +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
@@ -642,21 +767,31 @@ ORDER BY t1b NULLS last
 LIMIT  1
 OFFSET 1
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x > outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 72,
-    "stopIndex" : 145,
-    "fragment" : "SELECT t2d\n                   FROM   t2\n                   WHERE t2b > t1b"
-  } ]
-}
+GlobalLimit 1
++- LocalLimit 1
+   +- Offset 1
+      +- Sort [t1b#x ASC NULLS LAST], true
+         +- Aggregate [t1b#x], [count(distinct t1a#x) AS count(DISTINCT t1a)#xL, t1b#x]
+            +- Filter NOT t1d#xL IN (list#x [t1b#x])
+               :  +- GlobalLimit 1
+               :     +- LocalLimit 1
+               :        +- Offset 1
+               :           +- Project [t2d#xL]
+               :              +- Sort [t2b#x DESC NULLS FIRST, t2d#xL ASC NULLS FIRST], true
+               :                 +- Project [t2d#xL, t2b#x]
+               :                    +- Filter (t2b#x > outer(t1b#x))
+               :                       +- SubqueryAlias t2
+               :                          +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+               :                             +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+               :                                +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+               :                                   +- SubqueryAlias t2
+               :                                      +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+               +- SubqueryAlias t1
+                  +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+                     +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+                        +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                           +- SubqueryAlias t1
+                              +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
@@ -696,21 +831,25 @@ WHERE  t1a IN (SELECT t2a
                OFFSET 2)
 OFFSET 2
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(t1d#xL) = t2d#xL)\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 35,
-    "stopIndex" : 101,
-    "fragment" : "SELECT t2a\n               FROM   t2\n               WHERE  t1d = t2d"
-  } ]
-}
+Offset 2
++- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+   +- Filter t1a#x IN (list#x [t1d#xL])
+      :  +- Offset 2
+      :     +- Sort [t2a#x ASC NULLS FIRST], true
+      :        +- Project [t2a#x]
+      :           +- Filter (outer(t1d#xL) = t2d#xL)
+      :              +- SubqueryAlias t2
+      :                 +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+      :                    +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+      :                       +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      :                          +- SubqueryAlias t2
+      :                             +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      +- SubqueryAlias t1
+         +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+            +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+               +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                  +- SubqueryAlias t1
+                     +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
@@ -752,21 +891,25 @@ WHERE  t1c IN (SELECT t2c
                OFFSET 2)
 OFFSET 1
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x < outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 35,
-    "stopIndex" : 101,
-    "fragment" : "SELECT t2c\n               FROM   t2\n               WHERE  t2b < t1b"
-  } ]
-}
+Offset 1
++- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+   +- Filter t1c#x IN (list#x [t1b#x])
+      :  +- Offset 2
+      :     +- Sort [t2c#x ASC NULLS FIRST], true
+      :        +- Project [t2c#x]
+      :           +- Filter (t2b#x < outer(t1b#x))
+      :              +- SubqueryAlias t2
+      :                 +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+      :                    +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+      :                       +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      :                          +- SubqueryAlias t2
+      :                             +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+      +- SubqueryAlias t1
+         +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+            +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+               +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+                  +- SubqueryAlias t1
+                     +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
@@ -838,21 +981,23 @@ WHERE  t1b NOT IN (SELECT t2b
                    WHERE  t2b < t1b
                    OFFSET 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x < outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 46,
-    "stopIndex" : 120,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b < t1b"
-  } ]
-}
+Aggregate [count(1) AS count(1)#xL]
++- Filter NOT t1b#x IN (list#x [t1b#x])
+   :  +- Offset 2
+   :     +- Project [t2b#x]
+   :        +- Filter (t2b#x < outer(t1b#x))
+   :           +- SubqueryAlias t2
+   :              +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+   :                 +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+   :                    +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   :                       +- SubqueryAlias t2
+   :                          +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   +- SubqueryAlias t1
+      +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+         +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+            +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+               +- SubqueryAlias t1
+                  +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
@@ -989,21 +1134,25 @@ WHERE  t1b NOT IN (SELECT t2b
                    LIMIT  2
                    OFFSET 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x = outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 39,
-    "stopIndex" : 113,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b = t1b"
-  } ]
-}
+Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
++- Filter NOT t1b#x IN (list#x [t1b#x])
+   :  +- GlobalLimit 2
+   :     +- LocalLimit 2
+   :        +- Offset 2
+   :           +- Project [t2b#x]
+   :              +- Filter (t2b#x = outer(t1b#x))
+   :                 +- SubqueryAlias t2
+   :                    +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
+   :                       +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]
+   :                          +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   :                             +- SubqueryAlias t2
+   :                                +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]
+   +- SubqueryAlias t1
+      +- View (`t1`, [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x])
+         +- Project [cast(t1a#x as string) AS t1a#x, cast(t1b#x as smallint) AS t1b#x, cast(t1c#x as int) AS t1c#x, cast(t1d#xL as bigint) AS t1d#xL, cast(t1e#x as float) AS t1e#x, cast(t1f#x as double) AS t1f#x, cast(t1g#x as decimal(4,0)) AS t1g#x, cast(t1h#x as timestamp) AS t1h#x, cast(t1i#x as date) AS t1i#x]
+            +- Project [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
+               +- SubqueryAlias t1
+                  +- LocalRelation [t1a#x, t1b#x, t1c#x, t1d#xL, t1e#x, t1f#x, t1g#x, t1h#x, t1i#x]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
index 671557aa39566..01de7beda551d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
@@ -109,6 +109,45 @@ Project [x1#x, x2#x, scalar-subquery#x [x1#x && x1#x] AS scalarsubquery(x1, x1)#
          +- LocalRelation [col1#x, col2#x]
 
 
+-- !query
+select *, (select count(*) from y where x1 = y1 and cast(y2 as double) = x1 + 1
+           group by cast(y2 as double)) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x && x1#x] AS scalarsubquery(x1, x1)#xL]
+:  +- Aggregate [cast(y2#x as double)], [count(1) AS count(1)#xL]
+:     +- Filter ((outer(x1#x) = y1#x) AND (cast(y2#x as double) = cast((outer(x1#x) + 1) as double)))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from y where y2 + 1 = x1 + x2 group by y2 + 1) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x && x2#x] AS scalarsubquery(x1, x2)#xL]
+:  +- Aggregate [(y2#x + 1)], [count(1) AS count(1)#xL]
+:     +- Filter ((y2#x + 1) = (outer(x1#x) + outer(x2#x)))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false
+-- !query analysis
+SetCommand (spark.sql.optimizer.scalarSubqueryUseSingleJoin,Some(false))
+
+
 -- !query
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
 -- !query analysis
@@ -150,41 +189,120 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 
 
 -- !query
-select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "value" : "y1"
+    "value" : "y2"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 11,
-    "stopIndex" : 106,
-    "fragment" : "(select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1)"
+    "stopIndex" : 81,
+    "fragment" : "(select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2)"
   } ]
 }
 
 
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = true
+-- !query analysis
+SetCommand (spark.sql.optimizer.scalarSubqueryUseSingleJoin,Some(true))
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter (scalar-subquery#x [x1#x] = cast(1 as bigint))
+   :  +- Aggregate [y1#x], [count(1) AS count(1)#xL]
+   :     +- Filter (y1#x > outer(x1#x))
+   :        +- SubqueryAlias y
+   :           +- View (`y`, [y1#x, y2#x])
+   :              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+   :                 +- LocalRelation [col1#x, col2#x]
+   +- SubqueryAlias x
+      +- View (`x`, [x1#x, x2#x])
+         +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+            +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x] AS scalarsubquery(x1)#xL]
+:  +- Aggregate [y1#x], [count(1) AS count(1)#xL]
+:     +- Filter ((y1#x + y2#x) = outer(x1#x))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x && x1#x] AS scalarsubquery(x1, x1)#xL]
+:  +- Aggregate [y2#x], [count(1) AS count(1)#xL]
+:     +- Filter ((outer(x1#x) = y1#x) AND ((y2#x + 10) = (outer(x1#x) + 1)))
+:        +- SubqueryAlias y
+:           +- View (`y`, [y1#x, y2#x])
+:              +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                 +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+-- !query analysis
+Project [x1#x, x2#x, scalar-subquery#x [x1#x] AS scalarsubquery(x1)#xL]
+:  +- Aggregate [y1#x], [count(1) AS count(1)#xL]
+:     +- SubqueryAlias sub
+:        +- Union false, false
+:           :- Project [y1#x, y2#x]
+:           :  +- Filter (y1#x = outer(x1#x))
+:           :     +- SubqueryAlias y
+:           :        +- View (`y`, [y1#x, y2#x])
+:           :           +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:           :              +- LocalRelation [col1#x, col2#x]
+:           +- Project [y1#x, y2#x]
+:              +- SubqueryAlias y
+:                 +- View (`y`, [y1#x, y2#x])
+:                    +- Project [cast(col1#x as int) AS y1#x, cast(col2#x as int) AS y2#x]
+:                       +- LocalRelation [col1#x, col2#x]
++- SubqueryAlias x
+   +- View (`x`, [x1#x, x2#x])
+      +- Project [cast(col1#x as int) AS x1#x, cast(col2#x as int) AS x2#x]
+         +- LocalRelation [col1#x, col2#x]
+
+
 -- !query
 select *, (select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "value" : "z1"
+    "treeNode" : "Filter (z1#x = outer(x1#x))\n+- SubqueryAlias z\n   +- View (`z`, [z1#x, z2#x])\n      +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 11,
-    "stopIndex" : 103,
-    "fragment" : "(select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1)"
+    "startIndex" : 46,
+    "stopIndex" : 74,
+    "fragment" : "select * from z where z1 = x1"
   } ]
 }
 
@@ -195,6 +313,12 @@ set spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate =
 SetCommand (spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate,Some(true))
 
 
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false
+-- !query analysis
+SetCommand (spark.sql.optimizer.scalarSubqueryUseSingleJoin,Some(false))
+
+
 -- !query
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index 3648a97e9872a..f8f23009258ab 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -315,7 +315,7 @@ WHERE  t1c + 5 = (SELECT max(t2e)
                   FROM   t2)
 -- !query analysis
 Project [t1a#x, t1b#x, t1g#x]
-+- Filter (cast((t1c#x + 5) as float) = scalar-subquery#x [])
++- Filter (cast((t1c#x + 5) as double) = cast(scalar-subquery#x [] as double))
    :  +- Aggregate [max(t2e#x) AS max(t2e)#x]
    :     +- SubqueryAlias t2
    :        +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])
@@ -1710,3 +1710,59 @@ Project [t0a#x, t0b#x]
       +- View (`t0`, [t0a#x, t0b#x])
          +- Project [cast(col1#x as int) AS t0a#x, cast(col2#x as int) AS t0b#x]
             +- LocalRelation [col1#x, col2#x]
+
+
+-- !query
+select *
+from range(1, 3) t1
+where (select t2.id c
+       from range (1, 2) t2 where t1.id = t2.id
+      ) between 1 and 2
+-- !query analysis
+Project [id#xL]
++- Filter between(scalar-subquery#x [id#xL], 1, 2)
+   :  +- Project [id#xL AS c#xL]
+   :     +- Filter (outer(id#xL) = id#xL)
+   :        +- SubqueryAlias t2
+   :           +- Range (1, 2, step=1)
+   +- SubqueryAlias t1
+      +- Range (1, 3, step=1)
+
+
+-- !query
+SELECT *
+FROM t1
+WHERE (SELECT max(t2c)
+       FROM t2 WHERE t1b = t2b
+      ) between 1 and 2
+-- !query analysis
+Project [t1a#x, t1b#x, t1c#x]
++- Filter between(scalar-subquery#x [t1b#x], 1, 2)
+   :  +- Aggregate [max(t2c#x) AS max(t2c)#x]
+   :     +- Filter (outer(t1b#x) = t2b#x)
+   :        +- SubqueryAlias t2
+   :           +- View (`t2`, [t2a#x, t2b#x, t2c#x])
+   :              +- Project [cast(col1#x as int) AS t2a#x, cast(col2#x as int) AS t2b#x, cast(col3#x as int) AS t2c#x]
+   :                 +- LocalRelation [col1#x, col2#x, col3#x]
+   +- SubqueryAlias t1
+      +- View (`t1`, [t1a#x, t1b#x, t1c#x])
+         +- Project [cast(col1#x as int) AS t1a#x, cast(col2#x as int) AS t1b#x, cast(col3#x as int) AS t1c#x]
+            +- LocalRelation [col1#x, col2#x, col3#x]
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a = (SELECT distinct(t1c) FROM t1 WHERE t1a = t0a)
+-- !query analysis
+Project [t0a#x, t0b#x]
++- Filter (t0a#x = scalar-subquery#x [t0a#x])
+   :  +- Distinct
+   :     +- Project [t1c#x]
+   :        +- Filter (t1a#x = outer(t0a#x))
+   :           +- SubqueryAlias t1
+   :              +- View (`t1`, [t1a#x, t1b#x, t1c#x])
+   :                 +- Project [cast(col1#x as int) AS t1a#x, cast(col2#x as int) AS t1b#x, cast(col3#x as int) AS t1c#x]
+   :                    +- LocalRelation [col1#x, col2#x, col3#x]
+   +- SubqueryAlias t0
+      +- View (`t0`, [t0a#x, t0b#x])
+         +- Project [cast(col1#x as int) AS t0a#x, cast(col2#x as int) AS t0b#x]
+            +- LocalRelation [col1#x, col2#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
index 104c1b0f41a10..bedb1b07a80c5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/subquery/subquery-offset.sql.out
@@ -44,41 +44,34 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d
 -- !query
 select * from x where exists (select * from y where x1 = y1 limit 1 offset 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 31,
-    "stopIndex" : 59,
-    "fragment" : "select * from y where x1 = y1"
-  } ]
-}
+Project [x1#x, x2#x]
++- Filter exists#x [x1#x]
+   :  +- GlobalLimit 1
+   :     +- LocalLimit 1
+   :        +- Offset 2
+   :           +- Project [y1#x, y2#x]
+   :              +- Filter (outer(x1#x) = y1#x)
+   :                 +- SubqueryAlias spark_catalog.default.y
+   :                    +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
 
 
 -- !query
 select * from x join lateral (select * from y where x1 = y1 limit 1 offset 2)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 31,
-    "stopIndex" : 59,
-    "fragment" : "select * from y where x1 = y1"
-  } ]
-}
+Project [x1#x, x2#x, y1#x, y2#x]
++- LateralJoin lateral-subquery#x [x1#x], Inner
+   :  +- SubqueryAlias __auto_generated_subquery_name
+   :     +- GlobalLimit 1
+   :        +- LocalLimit 1
+   :           +- Offset 2
+   :              +- Project [y1#x, y2#x]
+   :                 +- Filter (outer(x1#x) = y1#x)
+   :                    +- SubqueryAlias spark_catalog.default.y
+   :                       +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
 
 
 -- !query
@@ -99,21 +92,73 @@ Project [x1#x, x2#x]
 -- !query
 select * from x where (select sum(y2) from y where x1 = y1 limit 1 offset 2) > 2
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 58,
-    "fragment" : "select sum(y2) from y where x1 = y1"
-  } ]
-}
+Project [x1#x, x2#x]
++- Filter (scalar-subquery#x [x1#x] > cast(2 as bigint))
+   :  +- GlobalLimit 1
+   :     +- LocalLimit 1
+   :        +- Offset 2
+   :           +- Aggregate [sum(y2#x) AS sum(y2)#xL]
+   :              +- Filter (outer(x1#x) = y1#x)
+   :                 +- SubqueryAlias spark_catalog.default.y
+   :                    +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
+
+
+-- !query
+select * from x where exists (select * from y where x1 = y1 offset 2)
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter exists#x [x1#x]
+   :  +- Offset 2
+   :     +- Project [y1#x, y2#x]
+   :        +- Filter (outer(x1#x) = y1#x)
+   :           +- SubqueryAlias spark_catalog.default.y
+   :              +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
+
+
+-- !query
+select * from x join lateral (select * from y where x1 = y1 offset 2)
+-- !query analysis
+Project [x1#x, x2#x, y1#x, y2#x]
++- LateralJoin lateral-subquery#x [x1#x], Inner
+   :  +- SubqueryAlias __auto_generated_subquery_name
+   :     +- Offset 2
+   :        +- Project [y1#x, y2#x]
+   :           +- Filter (outer(x1#x) = y1#x)
+   :              +- SubqueryAlias spark_catalog.default.y
+   :                 +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
+
+
+-- !query
+select * from x where x1 in (select y1 from y offset 2)
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter x1#x IN (list#x [])
+   :  +- Offset 2
+   :     +- Project [y1#x]
+   :        +- SubqueryAlias spark_catalog.default.y
+   :           +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
+
+
+-- !query
+select * from x where (select sum(y2) from y where x1 = y1 offset 2) > 2
+-- !query analysis
+Project [x1#x, x2#x]
++- Filter (scalar-subquery#x [x1#x] > cast(2 as bigint))
+   :  +- Offset 2
+   :     +- Aggregate [sum(y2#x) AS sum(y2)#xL]
+   :        +- Filter (outer(x1#x) = y1#x)
+   :           +- SubqueryAlias spark_catalog.default.y
+   :              +- Relation spark_catalog.default.y[y1#x,y2#x] json
+   +- SubqueryAlias spark_catalog.default.x
+      +- Relation spark_catalog.default.x[x1#x,x2#x] json
 
 
 -- !query
@@ -179,21 +224,27 @@ WHERE EXISTS (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 42,
-    "stopIndex" : 186,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+Project [emp_name#x]
++- Filter exists#x [dept_id#x]
+   :  +- GlobalLimit 2
+   :     +- LocalLimit 2
+   :        +- Offset 1
+   :           +- Project [a#x]
+   :              +- Sort [state#x ASC NULLS FIRST], true
+   :                 +- Aggregate [state#x], [max(dept_id#x) AS a#x, state#x]
+   :                    +- Filter (dept_id#x = outer(dept_id#x))
+   :                       +- SubqueryAlias dept
+   :                          +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                             +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                                +- Project [dept_id#x, dept_name#x, state#x]
+   :                                   +- SubqueryAlias DEPT
+   :                                      +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
@@ -207,21 +258,91 @@ JOIN LATERAL (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 42,
-    "stopIndex" : 186,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+Project [emp_name#x]
++- LateralJoin lateral-subquery#x [dept_id#x], Inner
+   :  +- SubqueryAlias __auto_generated_subquery_name
+   :     +- GlobalLimit 2
+   :        +- LocalLimit 2
+   :           +- Offset 1
+   :              +- Project [a#x]
+   :                 +- Sort [state#x ASC NULLS FIRST], true
+   :                    +- Aggregate [state#x], [max(dept_id#x) AS a#x, state#x]
+   :                       +- Filter (dept_id#x = outer(dept_id#x))
+   :                          +- SubqueryAlias dept
+   :                             +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                                +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                                   +- Project [dept_id#x, dept_name#x, state#x]
+   :                                      +- SubqueryAlias DEPT
+   :                                         +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+
+
+-- !query
+SELECT emp_name
+FROM   emp
+WHERE EXISTS (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1)
+-- !query analysis
+Project [emp_name#x]
++- Filter exists#x [dept_id#x]
+   :  +- Offset 1
+   :     +- Project [a#x]
+   :        +- Sort [state#x ASC NULLS FIRST], true
+   :           +- Aggregate [state#x], [max(dept_id#x) AS a#x, state#x]
+   :              +- Filter (dept_id#x = outer(dept_id#x))
+   :                 +- SubqueryAlias dept
+   :                    +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                       +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                          +- Project [dept_id#x, dept_name#x, state#x]
+   :                             +- SubqueryAlias DEPT
+   :                                +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+
+
+-- !query
+SELECT emp_name
+FROM   emp
+JOIN LATERAL (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1)
+-- !query analysis
+Project [emp_name#x]
++- LateralJoin lateral-subquery#x [dept_id#x], Inner
+   :  +- SubqueryAlias __auto_generated_subquery_name
+   :     +- Offset 1
+   :        +- Project [a#x]
+   :           +- Sort [state#x ASC NULLS FIRST], true
+   :              +- Aggregate [state#x], [max(dept_id#x) AS a#x, state#x]
+   :                 +- Filter (dept_id#x = outer(dept_id#x))
+   :                    +- SubqueryAlias dept
+   :                       +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])
+   :                          +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]
+   :                             +- Project [dept_id#x, dept_name#x, state#x]
+   :                                +- SubqueryAlias DEPT
+   :                                   +- LocalRelation [dept_id#x, dept_name#x, state#x]
+   +- SubqueryAlias emp
+      +- View (`EMP`, [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x])
+         +- Project [cast(id#x as int) AS id#x, cast(emp_name#x as string) AS emp_name#x, cast(hiredate#x as date) AS hiredate#x, cast(salary#x as double) AS salary#x, cast(dept_id#x as int) AS dept_id#x]
+            +- Project [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
+               +- SubqueryAlias EMP
+                  +- LocalRelation [id#x, emp_name#x, hiredate#x, salary#x, dept_id#x]
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz.sql.out
index c371876fde6a6..039556b59b0ba 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ltz.sql.out
@@ -8,42 +8,42 @@ select timestamp_ltz'2016-12-31 00:12:00', timestamp_ltz'2016-12-31'
 -- !query
 select to_timestamp_ltz(null), to_timestamp_ltz('2016-12-31 00:12:00'), to_timestamp_ltz('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_timestamp_ltz(cast(null as string), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(NULL)#x, to_timestamp_ltz(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(2016-12-31 00:12:00)#x, to_timestamp_ltz(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(2016-12-31, yyyy-MM-dd)#x]
+Project [to_timestamp_ltz(cast(null as string), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(NULL)#x, to_timestamp_ltz(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(2016-12-31 00:12:00)#x, to_timestamp_ltz(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp_ltz(to_date(null)), to_timestamp_ltz(to_date('2016-12-31'))
 -- !query analysis
-Project [to_timestamp_ltz(to_date(cast(null as string), None, Some(America/Los_Angeles), false), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(to_date(NULL))#x, to_timestamp_ltz(to_date(2016-12-31, None, Some(America/Los_Angeles), false), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(to_date(2016-12-31))#x]
+Project [to_timestamp_ltz(to_date(cast(null as string), None, Some(America/Los_Angeles), true), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(to_date(NULL))#x, to_timestamp_ltz(to_date(2016-12-31, None, Some(America/Los_Angeles), true), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(to_date(2016-12-31))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp_ltz(to_timestamp_ntz(null)), to_timestamp_ltz(to_timestamp_ntz('2016-12-31 00:12:00'))
 -- !query analysis
-Project [to_timestamp_ltz(to_timestamp_ntz(cast(null as string), None, TimestampNTZType, Some(America/Los_Angeles), false), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(to_timestamp_ntz(NULL))#x, to_timestamp_ltz(to_timestamp_ntz(2016-12-31 00:12:00, None, TimestampNTZType, Some(America/Los_Angeles), false), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp_ltz(to_timestamp_ntz(2016-12-31 00:12:00))#x]
+Project [to_timestamp_ltz(to_timestamp_ntz(cast(null as string), None, TimestampNTZType, Some(America/Los_Angeles), true), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(to_timestamp_ntz(NULL))#x, to_timestamp_ltz(to_timestamp_ntz(2016-12-31 00:12:00, None, TimestampNTZType, Some(America/Los_Angeles), true), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp_ltz(to_timestamp_ntz(2016-12-31 00:12:00))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678)
 -- !query analysis
-Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678)#x]
+Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 45.678, 'CET')
 -- !query analysis
-Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), false, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678, CET)#x]
+Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), true, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678, CET)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
 -- !query analysis
-Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007)#x]
+Project [make_timestamp_ltz(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out
index c285169c572e5..e92a392e22b67 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp-ntz.sql.out
@@ -9,28 +9,28 @@ Project [2016-12-31 00:12:00 AS TIMESTAMP_NTZ '2016-12-31 00:12:00'#x, 2016-12-3
 -- !query
 select to_timestamp_ntz(null), to_timestamp_ntz('2016-12-31 00:12:00'), to_timestamp_ntz('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_timestamp_ntz(cast(null as string), None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(NULL)#x, to_timestamp_ntz(2016-12-31 00:12:00, None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(2016-12-31 00:12:00)#x, to_timestamp_ntz(2016-12-31, Some(yyyy-MM-dd), TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(2016-12-31, yyyy-MM-dd)#x]
+Project [to_timestamp_ntz(cast(null as string), None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(NULL)#x, to_timestamp_ntz(2016-12-31 00:12:00, None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(2016-12-31 00:12:00)#x, to_timestamp_ntz(2016-12-31, Some(yyyy-MM-dd), TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp_ntz(to_date(null)), to_timestamp_ntz(to_date('2016-12-31'))
 -- !query analysis
-Project [to_timestamp_ntz(to_date(cast(null as string), None, Some(America/Los_Angeles), false), None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(to_date(NULL))#x, to_timestamp_ntz(to_date(2016-12-31, None, Some(America/Los_Angeles), false), None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(to_date(2016-12-31))#x]
+Project [to_timestamp_ntz(to_date(cast(null as string), None, Some(America/Los_Angeles), true), None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(to_date(NULL))#x, to_timestamp_ntz(to_date(2016-12-31, None, Some(America/Los_Angeles), true), None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(to_date(2016-12-31))#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp_ntz(to_timestamp_ltz(null)), to_timestamp_ntz(to_timestamp_ltz('2016-12-31 00:12:00'))
 -- !query analysis
-Project [to_timestamp_ntz(to_timestamp_ltz(cast(null as string), None, TimestampType, Some(America/Los_Angeles), false), None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(to_timestamp_ltz(NULL))#x, to_timestamp_ntz(to_timestamp_ltz(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), false), None, TimestampNTZType, Some(America/Los_Angeles), false) AS to_timestamp_ntz(to_timestamp_ltz(2016-12-31 00:12:00))#x]
+Project [to_timestamp_ntz(to_timestamp_ltz(cast(null as string), None, TimestampType, Some(America/Los_Angeles), true), None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(to_timestamp_ltz(NULL))#x, to_timestamp_ntz(to_timestamp_ltz(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), true), None, TimestampNTZType, Some(America/Los_Angeles), true) AS to_timestamp_ntz(to_timestamp_ltz(2016-12-31 00:12:00))#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 45.678)
 -- !query analysis
-Project [make_timestamp_ntz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampNTZType) AS make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678)#x]
+Project [make_timestamp_ntz(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampNTZType) AS make_timestamp_ntz(2021, 7, 11, 6, 30, 45.678)#x]
 +- OneRowRelation
 
 
@@ -60,7 +60,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
 -- !query analysis
-Project [make_timestamp_ntz(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampNTZType) AS make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007)#x]
+Project [make_timestamp_ntz(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampNTZType) AS make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
index 6ca35b8b141dc..560974d28c545 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestamp.sql.out
@@ -90,70 +90,70 @@ Project [(localtimestamp(Some(America/Los_Angeles)) = localtimestamp(Some(Americ
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 45.678, 'CET')
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(45.678 as decimal(16,6)), Some(CET), Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 45.678, CET)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query analysis
-Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
+Project [make_timestamp(2021, 7, 11, 6, 30, cast(60.007 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(2021, 7, 11, 6, 30, 60.007)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 1)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(1 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 1)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 60)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(60 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 60)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(61 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 61)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, null)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(null as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, NULL)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 59.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(59.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 59.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(99.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 99.999999)#x]
 +- OneRowRelation
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query analysis
-Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), false, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
+Project [make_timestamp(1, 1, 1, 1, 1, cast(999.999999 as decimal(16,6)), None, Some(America/Los_Angeles), true, TimestampType) AS make_timestamp(1, 1, 1, 1, 1, 999.999999)#x]
 +- OneRowRelation
 
 
@@ -302,231 +302,231 @@ select UNIX_MICROS(timestamp'2020-12-01 14:30:08Z'), UNIX_MICROS(timestamp'2020-
 -- !query
 select to_timestamp(null), to_timestamp('2016-12-31 00:12:00'), to_timestamp('2016-12-31', 'yyyy-MM-dd')
 -- !query analysis
-Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
+Project [to_timestamp(cast(null as string), None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(NULL)#x, to_timestamp(2016-12-31 00:12:00, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31 00:12:00)#x, to_timestamp(2016-12-31, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2016-12-31, yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp(1)
 -- !query analysis
-Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(1)#x]
+Project [to_timestamp(1, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(1)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12., Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.0, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123UTC, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12345CST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123456PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234567PST, Some(yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(123456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query analysis
-Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
+Project [to_timestamp(223456 2019-10-06 10:11:12.123456PST, Some(SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.[SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.1234, Some(yyyy-MM-dd HH:mm:ss.[SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.123, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12, Some(yyyy-MM-dd HH:mm:ss[.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11:12.12, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query analysis
-Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
+Project [to_timestamp(2019-10-06 10:11, Some(yyyy-MM-dd HH:mm[:ss.SSSSSS]), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS])#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S10:11:12.12345", "yyyy-MM-dd'S'HH:mm:ss.SSSSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
+Project [to_timestamp(2019-10-06S10:11:12.12345, Some(yyyy-MM-dd'S'HH:mm:ss.SSSSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.12342019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.12342019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyyyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1232019-10-06S10:11, Some(ss.SSSSyy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm")
 -- !query analysis
-Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
+Project [to_timestamp(12.1234019-10-06S10:11, Some(ss.SSSSy-MM-dd'S'HH:mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06S", "yyyy-MM-dd'S'")
 -- !query analysis
-Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
+Project [to_timestamp(2019-10-06S, Some(yyyy-MM-dd'S'), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06S, yyyy-MM-dd'S')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("S2019-10-06", "'S'yyyy-MM-dd")
 -- !query analysis
-Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
+Project [to_timestamp(S2019-10-06, Some('S'yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(S2019-10-06, 'S'yyyy-MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'12", "yyyy-MM-dd'T'HH:mm:ss''SSSS")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
+Project [to_timestamp(2019-10-06T10:11:12'12, Some(yyyy-MM-dd'T'HH:mm:ss''SSSS), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12'12, yyyy-MM-dd'T'HH:mm:ss''SSSS)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019-10-06T10:11:12'", "yyyy-MM-dd'T'HH:mm:ss''")
 -- !query analysis
-Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
+Project [to_timestamp(2019-10-06T10:11:12', Some(yyyy-MM-dd'T'HH:mm:ss''), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06T10:11:12', yyyy-MM-dd'T'HH:mm:ss'')#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("'2019-10-06T10:11:12", "''yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp('2019-10-06T10:11:12, Some(''yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp('2019-10-06T10:11:12, ''yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("P2019-10-06T10:11:12", "'P'yyyy-MM-dd'T'HH:mm:ss")
 -- !query analysis
-Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
+Project [to_timestamp(P2019-10-06T10:11:12, Some('P'yyyy-MM-dd'T'HH:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(P2019-10-06T10:11:12, 'P'yyyy-MM-dd'T'HH:mm:ss)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("16", "dd")
 -- !query analysis
-Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(16, dd)#x]
+Project [to_timestamp(16, Some(dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(16, dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query analysis
-Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(02-29, MM-dd)#x]
+Project [to_timestamp(02-29, Some(MM-dd), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(02-29, MM-dd)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 40", "yyyy mm")
 -- !query analysis
-Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 40, yyyy mm)#x]
+Project [to_timestamp(2019 40, Some(yyyy mm), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 40, yyyy mm)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp("2019 10:10:10", "yyyy hh:mm:ss")
 -- !query analysis
-Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
+Project [to_timestamp(2019 10:10:10, Some(yyyy hh:mm:ss), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019 10:10:10, yyyy hh:mm:ss)#x]
 +- OneRowRelation
 
 
@@ -551,49 +551,13 @@ select timestamp'2019-10-06 10:11:12.345678' - date'2020-01-01'
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:10\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:11\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -619,49 +583,13 @@ CreateViewCommand `ts_view`, select '2011-11-11 11:11:11' str, false, false, Loc
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
-  } ]
-}
+[Analyzer test output redacted due to nondeterminism]
 
 
 -- !query
@@ -669,11 +597,11 @@ select timestamp'2011-11-11 11:11:11' + '1'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -691,11 +619,11 @@ select '1' + timestamp'2011-11-11 11:11:11'
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
@@ -774,35 +702,35 @@ select date '2012-01-01' - interval 3 hours,
 -- !query
 select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query analysis
-Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
+Project [to_timestamp(2019-10-06 A, Some(yyyy-MM-dd GGGGG), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select to_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
+Project [to_timestamp(22 05 2020 Friday, Some(dd MM yyyy EEEEE), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#x]
 +- OneRowRelation
 
 
 -- !query
 select unix_timestamp('22 05 2020 Friday', 'dd MM yyyy EEEEE')
 -- !query analysis
-Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), false) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
+Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_Angeles), true) AS unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE)#xL]
 +- OneRowRelation
 
 
 -- !query
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"t":"26/October/2015"})#x]
+Project [from_json(StructField(t,TimestampType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"t":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
index e50c860270563..ec227afc87fe1 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp-ansi.sql.out
@@ -745,7 +745,7 @@ Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_An
 -- !query
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"t":"26/October/2015"})#x]
+Project [from_json(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"t":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
index 098abfb3852cf..7475f837250d5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timestampNTZ/timestamp.sql.out
@@ -805,7 +805,7 @@ Project [unix_timestamp(22 05 2020 Friday, dd MM yyyy EEEEE, Some(America/Los_An
 -- !query
 select from_json('{"t":"26/October/2015"}', 't Timestamp', map('timestampFormat', 'dd/MMMMM/yyyy'))
 -- !query analysis
-Project [from_json(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles)) AS from_json({"t":"26/October/2015"})#x]
+Project [from_json(StructField(t,TimestampNTZType,true), (timestampFormat,dd/MMMMM/yyyy), {"t":"26/October/2015"}, Some(America/Los_Angeles), false) AS from_json({"t":"26/October/2015"})#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/timezone.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/timezone.sql.out
index 9059f37f3607b..5b55a0c218934 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/timezone.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/timezone.sql.out
@@ -64,7 +64,11 @@ SET TIME ZONE INTERVAL 3 DAYS
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "3"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -80,7 +84,11 @@ SET TIME ZONE INTERVAL 24 HOURS
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "24"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -96,7 +104,11 @@ SET TIME ZONE INTERVAL '19:40:32' HOUR TO SECOND
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "19"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -128,7 +140,11 @@ SET TIME ZONE INTERVAL 10 HOURS 1 MILLISECOND
 -- !query analysis
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "36000"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
index 951a4025d5fb2..8275e4f1c0ff0 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
@@ -36,7 +36,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format",
-    "sqlExpr" : "\"fromavro(s, 42, map())\""
+    "sqlExpr" : "\"from_avro(s, 42, map())\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -58,7 +58,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format",
-    "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
+    "sqlExpr" : "\"from_avro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -80,7 +80,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format",
-    "sqlExpr" : "\"toavro(s, 42)\""
+    "sqlExpr" : "\"to_avro(s, 42)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
index 7cf8a2886069d..f0d3b8b999a29 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/transform.sql.out
@@ -786,8 +786,8 @@ FROM(
 SELECT (b + 1) AS result
 ORDER BY result
 -- !query analysis
-Sort [result#x ASC NULLS FIRST], true
-+- Project [(cast(b#x as double) + cast(1 as double)) AS result#x]
+Sort [result#xL ASC NULLS FIRST], true
++- Project [(cast(b#x as bigint) + cast(1 as bigint)) AS result#xL]
    +- SubqueryAlias t
       +- ScriptTransformation cat, [a#x, b#x], ScriptInputOutputSchema(List(),List(),None,None,List(),List(),None,None,false)
          +- Aggregate [a#x], [a#x, sum(b#x) AS sum(b)#xL]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
index caf997f6ccbb2..30654d1d71e2b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/try_arithmetic.sql.out
@@ -23,7 +23,7 @@ Project [try_add(2147483647, cast(1 as decimal(10,0))) AS try_add(2147483647, 1)
 -- !query
 SELECT try_add(2147483647, "1")
 -- !query analysis
-Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#x]
+Project [try_add(2147483647, 1) AS try_add(2147483647, 1)#xL]
 +- OneRowRelation
 
 
@@ -305,7 +305,7 @@ Project [try_subtract(2147483647, cast(-1 as decimal(10,0))) AS try_subtract(214
 -- !query
 SELECT try_subtract(2147483647, "-1")
 -- !query analysis
-Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#x]
+Project [try_subtract(2147483647, -1) AS try_subtract(2147483647, -1)#xL]
 +- OneRowRelation
 
 
@@ -403,7 +403,7 @@ Project [try_multiply(2147483647, cast(-2 as decimal(10,0))) AS try_multiply(214
 -- !query
 SELECT try_multiply(2147483647, "-2")
 -- !query analysis
-Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#x]
+Project [try_multiply(2147483647, -2) AS try_multiply(2147483647, -2)#xL]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
index d15418c17b730..1395cc350db72 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/binaryComparison.sql.out
@@ -10,271 +10,583 @@ CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
 -- !query
 SELECT cast(1 as binary) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as binary) = cast(1 as binary)) AS (CAST(1 AS BINARY) = 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as binary) > cast(2 as binary)) AS (CAST(1 AS BINARY) > 2)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as binary) >= cast(2 as binary)) AS (CAST(1 AS BINARY) >= 2)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as binary) < cast(2 as binary)) AS (CAST(1 AS BINARY) < 2)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as binary) <= cast(2 as binary)) AS (CAST(1 AS BINARY) <= 2)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as binary) = cast(2 as binary)) AS (NOT (CAST(1 AS BINARY) = 2))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as binary) = cast(cast(null as string) as binary)) AS (CAST(1 AS BINARY) = CAST(NULL AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as binary) > cast(cast(null as string) as binary)) AS (CAST(1 AS BINARY) > CAST(NULL AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as binary) >= cast(cast(null as string) as binary)) AS (CAST(1 AS BINARY) >= CAST(NULL AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as binary) < cast(cast(null as string) as binary)) AS (CAST(1 AS BINARY) < CAST(NULL AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as binary) <= cast(cast(null as string) as binary)) AS (CAST(1 AS BINARY) <= CAST(NULL AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as binary) = cast(cast(null as string) as binary)) AS (NOT (CAST(1 AS BINARY) = CAST(NULL AS STRING)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' = cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(1 as binary) = cast(1 as binary)) AS (1 = CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' > cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(2 as binary) > cast(1 as binary)) AS (2 > CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' >= cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(2 as binary) >= cast(1 as binary)) AS (2 >= CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' < cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(2 as binary) < cast(1 as binary)) AS (2 < CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' <= cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(2 as binary) <= cast(1 as binary)) AS (2 <= CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' <> cast(1 as binary) FROM t
 -- !query analysis
-Project [NOT (cast(2 as binary) = cast(1 as binary)) AS (NOT (2 = CAST(1 AS BINARY)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) = cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as binary) = cast(1 as binary)) AS (CAST(NULL AS STRING) = CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) > cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as binary) > cast(1 as binary)) AS (CAST(NULL AS STRING) > CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) >= cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as binary) >= cast(1 as binary)) AS (CAST(NULL AS STRING) >= CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) < cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as binary) < cast(1 as binary)) AS (CAST(NULL AS STRING) < CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) <= cast(1 as binary) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as binary) <= cast(1 as binary)) AS (CAST(NULL AS STRING) <= CAST(1 AS BINARY))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) <> cast(1 as binary) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as binary) = cast(1 as binary)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS BINARY)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as tinyint) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (CAST(1 AS TINYINT) = 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) = cast(1 as bigint)) AS (CAST(1 AS TINYINT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -285,7 +597,7 @@ Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (CAST(1 AS TINYINT) = 1)#x
 -- !query
 SELECT cast(1 as tinyint) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) > cast(2 as tinyint)) AS (CAST(1 AS TINYINT) > 2)#x]
+Project [(cast(cast(1 as tinyint) as bigint) > cast(2 as bigint)) AS (CAST(1 AS TINYINT) > 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -296,7 +608,7 @@ Project [(cast(1 as tinyint) > cast(2 as tinyint)) AS (CAST(1 AS TINYINT) > 2)#x
 -- !query
 SELECT cast(1 as tinyint) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) >= cast(2 as tinyint)) AS (CAST(1 AS TINYINT) >= 2)#x]
+Project [(cast(cast(1 as tinyint) as bigint) >= cast(2 as bigint)) AS (CAST(1 AS TINYINT) >= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -307,7 +619,7 @@ Project [(cast(1 as tinyint) >= cast(2 as tinyint)) AS (CAST(1 AS TINYINT) >= 2)
 -- !query
 SELECT cast(1 as tinyint) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) < cast(2 as tinyint)) AS (CAST(1 AS TINYINT) < 2)#x]
+Project [(cast(cast(1 as tinyint) as bigint) < cast(2 as bigint)) AS (CAST(1 AS TINYINT) < 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -318,7 +630,7 @@ Project [(cast(1 as tinyint) < cast(2 as tinyint)) AS (CAST(1 AS TINYINT) < 2)#x
 -- !query
 SELECT cast(1 as tinyint) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <= cast(2 as tinyint)) AS (CAST(1 AS TINYINT) <= 2)#x]
+Project [(cast(cast(1 as tinyint) as bigint) <= cast(2 as bigint)) AS (CAST(1 AS TINYINT) <= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -329,7 +641,7 @@ Project [(cast(1 as tinyint) <= cast(2 as tinyint)) AS (CAST(1 AS TINYINT) <= 2)
 -- !query
 SELECT cast(1 as tinyint) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as tinyint) = cast(2 as tinyint)) AS (NOT (CAST(1 AS TINYINT) = 2))#x]
+Project [NOT (cast(cast(1 as tinyint) as bigint) = cast(2 as bigint)) AS (NOT (CAST(1 AS TINYINT) = 2))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -340,7 +652,7 @@ Project [NOT (cast(1 as tinyint) = cast(2 as tinyint)) AS (NOT (CAST(1 AS TINYIN
 -- !query
 SELECT cast(1 as tinyint) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(cast(null as string) as tinyint)) AS (CAST(1 AS TINYINT) = CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as tinyint) as bigint) = cast(cast(null as string) as bigint)) AS (CAST(1 AS TINYINT) = CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -351,7 +663,7 @@ Project [(cast(1 as tinyint) = cast(cast(null as string) as tinyint)) AS (CAST(1
 -- !query
 SELECT cast(1 as tinyint) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) > cast(cast(null as string) as tinyint)) AS (CAST(1 AS TINYINT) > CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as tinyint) as bigint) > cast(cast(null as string) as bigint)) AS (CAST(1 AS TINYINT) > CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -362,7 +674,7 @@ Project [(cast(1 as tinyint) > cast(cast(null as string) as tinyint)) AS (CAST(1
 -- !query
 SELECT cast(1 as tinyint) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) >= cast(cast(null as string) as tinyint)) AS (CAST(1 AS TINYINT) >= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as tinyint) as bigint) >= cast(cast(null as string) as bigint)) AS (CAST(1 AS TINYINT) >= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -373,7 +685,7 @@ Project [(cast(1 as tinyint) >= cast(cast(null as string) as tinyint)) AS (CAST(
 -- !query
 SELECT cast(1 as tinyint) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) < cast(cast(null as string) as tinyint)) AS (CAST(1 AS TINYINT) < CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as tinyint) as bigint) < cast(cast(null as string) as bigint)) AS (CAST(1 AS TINYINT) < CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -384,7 +696,7 @@ Project [(cast(1 as tinyint) < cast(cast(null as string) as tinyint)) AS (CAST(1
 -- !query
 SELECT cast(1 as tinyint) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <= cast(cast(null as string) as tinyint)) AS (CAST(1 AS TINYINT) <= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as tinyint) as bigint) <= cast(cast(null as string) as bigint)) AS (CAST(1 AS TINYINT) <= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -395,7 +707,7 @@ Project [(cast(1 as tinyint) <= cast(cast(null as string) as tinyint)) AS (CAST(
 -- !query
 SELECT cast(1 as tinyint) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as tinyint) = cast(cast(null as string) as tinyint)) AS (NOT (CAST(1 AS TINYINT) = CAST(NULL AS STRING)))#x]
+Project [NOT (cast(cast(1 as tinyint) as bigint) = cast(cast(null as string) as bigint)) AS (NOT (CAST(1 AS TINYINT) = CAST(NULL AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -406,7 +718,7 @@ Project [NOT (cast(1 as tinyint) = cast(cast(null as string) as tinyint)) AS (NO
 -- !query
 SELECT '1' = cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (1 = CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as tinyint) as bigint)) AS (1 = CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -417,7 +729,7 @@ Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (1 = CAST(1 AS TINYINT))#x
 -- !query
 SELECT '2' > cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(2 as tinyint) > cast(1 as tinyint)) AS (2 > CAST(1 AS TINYINT))#x]
+Project [(cast(2 as bigint) > cast(cast(1 as tinyint) as bigint)) AS (2 > CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -428,7 +740,7 @@ Project [(cast(2 as tinyint) > cast(1 as tinyint)) AS (2 > CAST(1 AS TINYINT))#x
 -- !query
 SELECT '2' >= cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(2 as tinyint) >= cast(1 as tinyint)) AS (2 >= CAST(1 AS TINYINT))#x]
+Project [(cast(2 as bigint) >= cast(cast(1 as tinyint) as bigint)) AS (2 >= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -439,7 +751,7 @@ Project [(cast(2 as tinyint) >= cast(1 as tinyint)) AS (2 >= CAST(1 AS TINYINT))
 -- !query
 SELECT '2' < cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(2 as tinyint) < cast(1 as tinyint)) AS (2 < CAST(1 AS TINYINT))#x]
+Project [(cast(2 as bigint) < cast(cast(1 as tinyint) as bigint)) AS (2 < CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -450,7 +762,7 @@ Project [(cast(2 as tinyint) < cast(1 as tinyint)) AS (2 < CAST(1 AS TINYINT))#x
 -- !query
 SELECT '2' <= cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(2 as tinyint) <= cast(1 as tinyint)) AS (2 <= CAST(1 AS TINYINT))#x]
+Project [(cast(2 as bigint) <= cast(cast(1 as tinyint) as bigint)) AS (2 <= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -461,7 +773,7 @@ Project [(cast(2 as tinyint) <= cast(1 as tinyint)) AS (2 <= CAST(1 AS TINYINT))
 -- !query
 SELECT '2' <> cast(1 as tinyint) FROM t
 -- !query analysis
-Project [NOT (cast(2 as tinyint) = cast(1 as tinyint)) AS (NOT (2 = CAST(1 AS TINYINT)))#x]
+Project [NOT (cast(2 as bigint) = cast(cast(1 as tinyint) as bigint)) AS (NOT (2 = CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -472,7 +784,7 @@ Project [NOT (cast(2 as tinyint) = cast(1 as tinyint)) AS (NOT (2 = CAST(1 AS TI
 -- !query
 SELECT cast(null as string) = cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as tinyint) = cast(1 as tinyint)) AS (CAST(NULL AS STRING) = CAST(1 AS TINYINT))#x]
+Project [(cast(cast(null as string) as bigint) = cast(cast(1 as tinyint) as bigint)) AS (CAST(NULL AS STRING) = CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -483,7 +795,7 @@ Project [(cast(cast(null as string) as tinyint) = cast(1 as tinyint)) AS (CAST(N
 -- !query
 SELECT cast(null as string) > cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as tinyint) > cast(1 as tinyint)) AS (CAST(NULL AS STRING) > CAST(1 AS TINYINT))#x]
+Project [(cast(cast(null as string) as bigint) > cast(cast(1 as tinyint) as bigint)) AS (CAST(NULL AS STRING) > CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -494,7 +806,7 @@ Project [(cast(cast(null as string) as tinyint) > cast(1 as tinyint)) AS (CAST(N
 -- !query
 SELECT cast(null as string) >= cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as tinyint) >= cast(1 as tinyint)) AS (CAST(NULL AS STRING) >= CAST(1 AS TINYINT))#x]
+Project [(cast(cast(null as string) as bigint) >= cast(cast(1 as tinyint) as bigint)) AS (CAST(NULL AS STRING) >= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -505,7 +817,7 @@ Project [(cast(cast(null as string) as tinyint) >= cast(1 as tinyint)) AS (CAST(
 -- !query
 SELECT cast(null as string) < cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as tinyint) < cast(1 as tinyint)) AS (CAST(NULL AS STRING) < CAST(1 AS TINYINT))#x]
+Project [(cast(cast(null as string) as bigint) < cast(cast(1 as tinyint) as bigint)) AS (CAST(NULL AS STRING) < CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -516,7 +828,7 @@ Project [(cast(cast(null as string) as tinyint) < cast(1 as tinyint)) AS (CAST(N
 -- !query
 SELECT cast(null as string) <= cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as tinyint) <= cast(1 as tinyint)) AS (CAST(NULL AS STRING) <= CAST(1 AS TINYINT))#x]
+Project [(cast(cast(null as string) as bigint) <= cast(cast(1 as tinyint) as bigint)) AS (CAST(NULL AS STRING) <= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -527,7 +839,7 @@ Project [(cast(cast(null as string) as tinyint) <= cast(1 as tinyint)) AS (CAST(
 -- !query
 SELECT cast(null as string) <> cast(1 as tinyint) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as tinyint) = cast(1 as tinyint)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS TINYINT)))#x]
+Project [NOT (cast(cast(null as string) as bigint) = cast(cast(1 as tinyint) as bigint)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -538,7 +850,7 @@ Project [NOT (cast(cast(null as string) as tinyint) = cast(1 as tinyint)) AS (NO
 -- !query
 SELECT cast(1 as smallint) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(1 as smallint)) AS (CAST(1 AS SMALLINT) = 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) = cast(1 as bigint)) AS (CAST(1 AS SMALLINT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -549,7 +861,7 @@ Project [(cast(1 as smallint) = cast(1 as smallint)) AS (CAST(1 AS SMALLINT) = 1
 -- !query
 SELECT cast(1 as smallint) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) > cast(2 as smallint)) AS (CAST(1 AS SMALLINT) > 2)#x]
+Project [(cast(cast(1 as smallint) as bigint) > cast(2 as bigint)) AS (CAST(1 AS SMALLINT) > 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -560,7 +872,7 @@ Project [(cast(1 as smallint) > cast(2 as smallint)) AS (CAST(1 AS SMALLINT) > 2
 -- !query
 SELECT cast(1 as smallint) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) >= cast(2 as smallint)) AS (CAST(1 AS SMALLINT) >= 2)#x]
+Project [(cast(cast(1 as smallint) as bigint) >= cast(2 as bigint)) AS (CAST(1 AS SMALLINT) >= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -571,7 +883,7 @@ Project [(cast(1 as smallint) >= cast(2 as smallint)) AS (CAST(1 AS SMALLINT) >=
 -- !query
 SELECT cast(1 as smallint) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) < cast(2 as smallint)) AS (CAST(1 AS SMALLINT) < 2)#x]
+Project [(cast(cast(1 as smallint) as bigint) < cast(2 as bigint)) AS (CAST(1 AS SMALLINT) < 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -582,7 +894,7 @@ Project [(cast(1 as smallint) < cast(2 as smallint)) AS (CAST(1 AS SMALLINT) < 2
 -- !query
 SELECT cast(1 as smallint) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <= cast(2 as smallint)) AS (CAST(1 AS SMALLINT) <= 2)#x]
+Project [(cast(cast(1 as smallint) as bigint) <= cast(2 as bigint)) AS (CAST(1 AS SMALLINT) <= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -593,7 +905,7 @@ Project [(cast(1 as smallint) <= cast(2 as smallint)) AS (CAST(1 AS SMALLINT) <=
 -- !query
 SELECT cast(1 as smallint) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as smallint) = cast(2 as smallint)) AS (NOT (CAST(1 AS SMALLINT) = 2))#x]
+Project [NOT (cast(cast(1 as smallint) as bigint) = cast(2 as bigint)) AS (NOT (CAST(1 AS SMALLINT) = 2))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -604,7 +916,7 @@ Project [NOT (cast(1 as smallint) = cast(2 as smallint)) AS (NOT (CAST(1 AS SMAL
 -- !query
 SELECT cast(1 as smallint) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(cast(null as string) as smallint)) AS (CAST(1 AS SMALLINT) = CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as smallint) as bigint) = cast(cast(null as string) as bigint)) AS (CAST(1 AS SMALLINT) = CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -615,7 +927,7 @@ Project [(cast(1 as smallint) = cast(cast(null as string) as smallint)) AS (CAST
 -- !query
 SELECT cast(1 as smallint) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) > cast(cast(null as string) as smallint)) AS (CAST(1 AS SMALLINT) > CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as smallint) as bigint) > cast(cast(null as string) as bigint)) AS (CAST(1 AS SMALLINT) > CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -626,7 +938,7 @@ Project [(cast(1 as smallint) > cast(cast(null as string) as smallint)) AS (CAST
 -- !query
 SELECT cast(1 as smallint) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) >= cast(cast(null as string) as smallint)) AS (CAST(1 AS SMALLINT) >= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as smallint) as bigint) >= cast(cast(null as string) as bigint)) AS (CAST(1 AS SMALLINT) >= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -637,7 +949,7 @@ Project [(cast(1 as smallint) >= cast(cast(null as string) as smallint)) AS (CAS
 -- !query
 SELECT cast(1 as smallint) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) < cast(cast(null as string) as smallint)) AS (CAST(1 AS SMALLINT) < CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as smallint) as bigint) < cast(cast(null as string) as bigint)) AS (CAST(1 AS SMALLINT) < CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -648,7 +960,7 @@ Project [(cast(1 as smallint) < cast(cast(null as string) as smallint)) AS (CAST
 -- !query
 SELECT cast(1 as smallint) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <= cast(cast(null as string) as smallint)) AS (CAST(1 AS SMALLINT) <= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as smallint) as bigint) <= cast(cast(null as string) as bigint)) AS (CAST(1 AS SMALLINT) <= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -659,7 +971,7 @@ Project [(cast(1 as smallint) <= cast(cast(null as string) as smallint)) AS (CAS
 -- !query
 SELECT cast(1 as smallint) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as smallint) = cast(cast(null as string) as smallint)) AS (NOT (CAST(1 AS SMALLINT) = CAST(NULL AS STRING)))#x]
+Project [NOT (cast(cast(1 as smallint) as bigint) = cast(cast(null as string) as bigint)) AS (NOT (CAST(1 AS SMALLINT) = CAST(NULL AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -670,7 +982,7 @@ Project [NOT (cast(1 as smallint) = cast(cast(null as string) as smallint)) AS (
 -- !query
 SELECT '1' = cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(1 as smallint)) AS (1 = CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as smallint) as bigint)) AS (1 = CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -681,7 +993,7 @@ Project [(cast(1 as smallint) = cast(1 as smallint)) AS (1 = CAST(1 AS SMALLINT)
 -- !query
 SELECT '2' > cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(2 as smallint) > cast(1 as smallint)) AS (2 > CAST(1 AS SMALLINT))#x]
+Project [(cast(2 as bigint) > cast(cast(1 as smallint) as bigint)) AS (2 > CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -692,7 +1004,7 @@ Project [(cast(2 as smallint) > cast(1 as smallint)) AS (2 > CAST(1 AS SMALLINT)
 -- !query
 SELECT '2' >= cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(2 as smallint) >= cast(1 as smallint)) AS (2 >= CAST(1 AS SMALLINT))#x]
+Project [(cast(2 as bigint) >= cast(cast(1 as smallint) as bigint)) AS (2 >= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -703,7 +1015,7 @@ Project [(cast(2 as smallint) >= cast(1 as smallint)) AS (2 >= CAST(1 AS SMALLIN
 -- !query
 SELECT '2' < cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(2 as smallint) < cast(1 as smallint)) AS (2 < CAST(1 AS SMALLINT))#x]
+Project [(cast(2 as bigint) < cast(cast(1 as smallint) as bigint)) AS (2 < CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -714,7 +1026,7 @@ Project [(cast(2 as smallint) < cast(1 as smallint)) AS (2 < CAST(1 AS SMALLINT)
 -- !query
 SELECT '2' <= cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(2 as smallint) <= cast(1 as smallint)) AS (2 <= CAST(1 AS SMALLINT))#x]
+Project [(cast(2 as bigint) <= cast(cast(1 as smallint) as bigint)) AS (2 <= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -725,7 +1037,7 @@ Project [(cast(2 as smallint) <= cast(1 as smallint)) AS (2 <= CAST(1 AS SMALLIN
 -- !query
 SELECT '2' <> cast(1 as smallint) FROM t
 -- !query analysis
-Project [NOT (cast(2 as smallint) = cast(1 as smallint)) AS (NOT (2 = CAST(1 AS SMALLINT)))#x]
+Project [NOT (cast(2 as bigint) = cast(cast(1 as smallint) as bigint)) AS (NOT (2 = CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -736,7 +1048,7 @@ Project [NOT (cast(2 as smallint) = cast(1 as smallint)) AS (NOT (2 = CAST(1 AS
 -- !query
 SELECT cast(null as string) = cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as smallint) = cast(1 as smallint)) AS (CAST(NULL AS STRING) = CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(null as string) as bigint) = cast(cast(1 as smallint) as bigint)) AS (CAST(NULL AS STRING) = CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -747,7 +1059,7 @@ Project [(cast(cast(null as string) as smallint) = cast(1 as smallint)) AS (CAST
 -- !query
 SELECT cast(null as string) > cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as smallint) > cast(1 as smallint)) AS (CAST(NULL AS STRING) > CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(null as string) as bigint) > cast(cast(1 as smallint) as bigint)) AS (CAST(NULL AS STRING) > CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -758,7 +1070,7 @@ Project [(cast(cast(null as string) as smallint) > cast(1 as smallint)) AS (CAST
 -- !query
 SELECT cast(null as string) >= cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as smallint) >= cast(1 as smallint)) AS (CAST(NULL AS STRING) >= CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(null as string) as bigint) >= cast(cast(1 as smallint) as bigint)) AS (CAST(NULL AS STRING) >= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -769,7 +1081,7 @@ Project [(cast(cast(null as string) as smallint) >= cast(1 as smallint)) AS (CAS
 -- !query
 SELECT cast(null as string) < cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as smallint) < cast(1 as smallint)) AS (CAST(NULL AS STRING) < CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(null as string) as bigint) < cast(cast(1 as smallint) as bigint)) AS (CAST(NULL AS STRING) < CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -780,7 +1092,7 @@ Project [(cast(cast(null as string) as smallint) < cast(1 as smallint)) AS (CAST
 -- !query
 SELECT cast(null as string) <= cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as smallint) <= cast(1 as smallint)) AS (CAST(NULL AS STRING) <= CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(null as string) as bigint) <= cast(cast(1 as smallint) as bigint)) AS (CAST(NULL AS STRING) <= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -791,7 +1103,7 @@ Project [(cast(cast(null as string) as smallint) <= cast(1 as smallint)) AS (CAS
 -- !query
 SELECT cast(null as string) <> cast(1 as smallint) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as smallint) = cast(1 as smallint)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS SMALLINT)))#x]
+Project [NOT (cast(cast(null as string) as bigint) = cast(cast(1 as smallint) as bigint)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -802,7 +1114,7 @@ Project [NOT (cast(cast(null as string) as smallint) = cast(1 as smallint)) AS (
 -- !query
 SELECT cast(1 as int) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(1 as int)) AS (CAST(1 AS INT) = 1)#x]
+Project [(cast(cast(1 as int) as bigint) = cast(1 as bigint)) AS (CAST(1 AS INT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -813,7 +1125,7 @@ Project [(cast(1 as int) = cast(1 as int)) AS (CAST(1 AS INT) = 1)#x]
 -- !query
 SELECT cast(1 as int) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as int) > cast(2 as int)) AS (CAST(1 AS INT) > 2)#x]
+Project [(cast(cast(1 as int) as bigint) > cast(2 as bigint)) AS (CAST(1 AS INT) > 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -824,7 +1136,7 @@ Project [(cast(1 as int) > cast(2 as int)) AS (CAST(1 AS INT) > 2)#x]
 -- !query
 SELECT cast(1 as int) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as int) >= cast(2 as int)) AS (CAST(1 AS INT) >= 2)#x]
+Project [(cast(cast(1 as int) as bigint) >= cast(2 as bigint)) AS (CAST(1 AS INT) >= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -835,7 +1147,7 @@ Project [(cast(1 as int) >= cast(2 as int)) AS (CAST(1 AS INT) >= 2)#x]
 -- !query
 SELECT cast(1 as int) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as int) < cast(2 as int)) AS (CAST(1 AS INT) < 2)#x]
+Project [(cast(cast(1 as int) as bigint) < cast(2 as bigint)) AS (CAST(1 AS INT) < 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -846,7 +1158,7 @@ Project [(cast(1 as int) < cast(2 as int)) AS (CAST(1 AS INT) < 2)#x]
 -- !query
 SELECT cast(1 as int) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as int) <= cast(2 as int)) AS (CAST(1 AS INT) <= 2)#x]
+Project [(cast(cast(1 as int) as bigint) <= cast(2 as bigint)) AS (CAST(1 AS INT) <= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -857,7 +1169,7 @@ Project [(cast(1 as int) <= cast(2 as int)) AS (CAST(1 AS INT) <= 2)#x]
 -- !query
 SELECT cast(1 as int) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as int) = cast(2 as int)) AS (NOT (CAST(1 AS INT) = 2))#x]
+Project [NOT (cast(cast(1 as int) as bigint) = cast(2 as bigint)) AS (NOT (CAST(1 AS INT) = 2))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -868,7 +1180,7 @@ Project [NOT (cast(1 as int) = cast(2 as int)) AS (NOT (CAST(1 AS INT) = 2))#x]
 -- !query
 SELECT cast(1 as int) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(cast(null as string) as int)) AS (CAST(1 AS INT) = CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as int) as bigint) = cast(cast(null as string) as bigint)) AS (CAST(1 AS INT) = CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -879,7 +1191,7 @@ Project [(cast(1 as int) = cast(cast(null as string) as int)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as int) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as int) > cast(cast(null as string) as int)) AS (CAST(1 AS INT) > CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as int) as bigint) > cast(cast(null as string) as bigint)) AS (CAST(1 AS INT) > CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -890,7 +1202,7 @@ Project [(cast(1 as int) > cast(cast(null as string) as int)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as int) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as int) >= cast(cast(null as string) as int)) AS (CAST(1 AS INT) >= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as int) as bigint) >= cast(cast(null as string) as bigint)) AS (CAST(1 AS INT) >= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -901,7 +1213,7 @@ Project [(cast(1 as int) >= cast(cast(null as string) as int)) AS (CAST(1 AS INT
 -- !query
 SELECT cast(1 as int) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as int) < cast(cast(null as string) as int)) AS (CAST(1 AS INT) < CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as int) as bigint) < cast(cast(null as string) as bigint)) AS (CAST(1 AS INT) < CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -912,7 +1224,7 @@ Project [(cast(1 as int) < cast(cast(null as string) as int)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as int) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as int) <= cast(cast(null as string) as int)) AS (CAST(1 AS INT) <= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as int) as bigint) <= cast(cast(null as string) as bigint)) AS (CAST(1 AS INT) <= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -923,7 +1235,7 @@ Project [(cast(1 as int) <= cast(cast(null as string) as int)) AS (CAST(1 AS INT
 -- !query
 SELECT cast(1 as int) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as int) = cast(cast(null as string) as int)) AS (NOT (CAST(1 AS INT) = CAST(NULL AS STRING)))#x]
+Project [NOT (cast(cast(1 as int) as bigint) = cast(cast(null as string) as bigint)) AS (NOT (CAST(1 AS INT) = CAST(NULL AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -934,7 +1246,7 @@ Project [NOT (cast(1 as int) = cast(cast(null as string) as int)) AS (NOT (CAST(
 -- !query
 SELECT '1' = cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(1 as int)) AS (1 = CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as int) as bigint)) AS (1 = CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -945,7 +1257,7 @@ Project [(cast(1 as int) = cast(1 as int)) AS (1 = CAST(1 AS INT))#x]
 -- !query
 SELECT '2' > cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(2 as int) > cast(1 as int)) AS (2 > CAST(1 AS INT))#x]
+Project [(cast(2 as bigint) > cast(cast(1 as int) as bigint)) AS (2 > CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -956,7 +1268,7 @@ Project [(cast(2 as int) > cast(1 as int)) AS (2 > CAST(1 AS INT))#x]
 -- !query
 SELECT '2' >= cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(2 as int) >= cast(1 as int)) AS (2 >= CAST(1 AS INT))#x]
+Project [(cast(2 as bigint) >= cast(cast(1 as int) as bigint)) AS (2 >= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -967,7 +1279,7 @@ Project [(cast(2 as int) >= cast(1 as int)) AS (2 >= CAST(1 AS INT))#x]
 -- !query
 SELECT '2' < cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(2 as int) < cast(1 as int)) AS (2 < CAST(1 AS INT))#x]
+Project [(cast(2 as bigint) < cast(cast(1 as int) as bigint)) AS (2 < CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -978,7 +1290,7 @@ Project [(cast(2 as int) < cast(1 as int)) AS (2 < CAST(1 AS INT))#x]
 -- !query
 SELECT '2' <> cast(1 as int) FROM t
 -- !query analysis
-Project [NOT (cast(2 as int) = cast(1 as int)) AS (NOT (2 = CAST(1 AS INT)))#x]
+Project [NOT (cast(2 as bigint) = cast(cast(1 as int) as bigint)) AS (NOT (2 = CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -989,7 +1301,7 @@ Project [NOT (cast(2 as int) = cast(1 as int)) AS (NOT (2 = CAST(1 AS INT)))#x]
 -- !query
 SELECT '2' <= cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(2 as int) <= cast(1 as int)) AS (2 <= CAST(1 AS INT))#x]
+Project [(cast(2 as bigint) <= cast(cast(1 as int) as bigint)) AS (2 <= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1000,7 +1312,7 @@ Project [(cast(2 as int) <= cast(1 as int)) AS (2 <= CAST(1 AS INT))#x]
 -- !query
 SELECT cast(null as string) = cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as int) = cast(1 as int)) AS (CAST(NULL AS STRING) = CAST(1 AS INT))#x]
+Project [(cast(cast(null as string) as bigint) = cast(cast(1 as int) as bigint)) AS (CAST(NULL AS STRING) = CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1011,7 +1323,7 @@ Project [(cast(cast(null as string) as int) = cast(1 as int)) AS (CAST(NULL AS S
 -- !query
 SELECT cast(null as string) > cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as int) > cast(1 as int)) AS (CAST(NULL AS STRING) > CAST(1 AS INT))#x]
+Project [(cast(cast(null as string) as bigint) > cast(cast(1 as int) as bigint)) AS (CAST(NULL AS STRING) > CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1022,7 +1334,7 @@ Project [(cast(cast(null as string) as int) > cast(1 as int)) AS (CAST(NULL AS S
 -- !query
 SELECT cast(null as string) >= cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as int) >= cast(1 as int)) AS (CAST(NULL AS STRING) >= CAST(1 AS INT))#x]
+Project [(cast(cast(null as string) as bigint) >= cast(cast(1 as int) as bigint)) AS (CAST(NULL AS STRING) >= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1033,7 +1345,7 @@ Project [(cast(cast(null as string) as int) >= cast(1 as int)) AS (CAST(NULL AS
 -- !query
 SELECT cast(null as string) < cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as int) < cast(1 as int)) AS (CAST(NULL AS STRING) < CAST(1 AS INT))#x]
+Project [(cast(cast(null as string) as bigint) < cast(cast(1 as int) as bigint)) AS (CAST(NULL AS STRING) < CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1044,7 +1356,7 @@ Project [(cast(cast(null as string) as int) < cast(1 as int)) AS (CAST(NULL AS S
 -- !query
 SELECT cast(null as string) <> cast(1 as int) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as int) = cast(1 as int)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS INT)))#x]
+Project [NOT (cast(cast(null as string) as bigint) = cast(cast(1 as int) as bigint)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1055,7 +1367,7 @@ Project [NOT (cast(cast(null as string) as int) = cast(1 as int)) AS (NOT (CAST(
 -- !query
 SELECT cast(null as string) <= cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as int) <= cast(1 as int)) AS (CAST(NULL AS STRING) <= CAST(1 AS INT))#x]
+Project [(cast(cast(null as string) as bigint) <= cast(cast(1 as int) as bigint)) AS (CAST(NULL AS STRING) <= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1330,7 +1642,7 @@ Project [NOT (cast(cast(null as string) as bigint) = cast(1 as bigint)) AS (NOT
 -- !query
 SELECT cast(1 as decimal(10, 0)) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) = cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1341,7 +1653,7 @@ Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(1 AS DEC
 -- !query
 SELECT cast(1 as decimal(10, 0)) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) > cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > 2)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) > cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) > 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1352,7 +1664,7 @@ Project [(cast(1 as decimal(10,0)) > cast(2 as decimal(10,0))) AS (CAST(1 AS DEC
 -- !query
 SELECT cast(1 as decimal(10, 0)) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) >= cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= 2)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) >= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1363,7 +1675,7 @@ Project [(cast(1 as decimal(10,0)) >= cast(2 as decimal(10,0))) AS (CAST(1 AS DE
 -- !query
 SELECT cast(1 as decimal(10, 0)) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) < cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < 2)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) < cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) < 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1374,7 +1686,7 @@ Project [(cast(1 as decimal(10,0)) < cast(2 as decimal(10,0))) AS (CAST(1 AS DEC
 -- !query
 SELECT cast(1 as decimal(10, 0)) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(2 as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 2))#x]
+Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(2 as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 2))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1385,7 +1697,7 @@ Project [NOT (cast(1 as decimal(10,0)) = cast(2 as decimal(10,0))) AS (NOT (CAST
 -- !query
 SELECT cast(1 as decimal(10, 0)) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <= cast(2 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= 2)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(2 as double)) AS (CAST(1 AS DECIMAL(10,0)) <= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1396,7 +1708,7 @@ Project [(cast(1 as decimal(10,0)) <= cast(2 as decimal(10,0))) AS (CAST(1 AS DE
 -- !query
 SELECT cast(1 as decimal(10, 0)) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1407,7 +1719,7 @@ Project [(cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10,0))
 -- !query
 SELECT cast(1 as decimal(10, 0)) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) > cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) > cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) > CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1418,7 +1730,7 @@ Project [(cast(1 as decimal(10,0)) > cast(cast(null as string) as decimal(10,0))
 -- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) >= cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1429,7 +1741,7 @@ Project [(cast(1 as decimal(10,0)) >= cast(cast(null as string) as decimal(10,0)
 -- !query
 SELECT cast(1 as decimal(10, 0)) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) < cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) < cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) < CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1440,7 +1752,7 @@ Project [(cast(1 as decimal(10,0)) < cast(cast(null as string) as decimal(10,0))
 -- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING)))#x]
+Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(cast(null as string) as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(NULL AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1451,7 +1763,7 @@ Project [NOT (cast(1 as decimal(10,0)) = cast(cast(null as string) as decimal(10
 -- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <= cast(cast(null as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(cast(null as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1462,7 +1774,7 @@ Project [(cast(1 as decimal(10,0)) <= cast(cast(null as string) as decimal(10,0)
 -- !query
 SELECT '1' = cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1473,7 +1785,7 @@ Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (1 = CAST(1 AS
 -- !query
 SELECT '2' > cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(2 as decimal(10,0)) > cast(1 as decimal(10,0))) AS (2 > CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(2 as double) > cast(cast(1 as decimal(10,0)) as double)) AS (2 > CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1484,7 +1796,7 @@ Project [(cast(2 as decimal(10,0)) > cast(1 as decimal(10,0))) AS (2 > CAST(1 AS
 -- !query
 SELECT '2' >= cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(2 as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (2 >= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(2 as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (2 >= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1495,7 +1807,7 @@ Project [(cast(2 as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (2 >= CAST(1
 -- !query
 SELECT '2' < cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(2 as decimal(10,0)) < cast(1 as decimal(10,0))) AS (2 < CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(2 as double) < cast(cast(1 as decimal(10,0)) as double)) AS (2 < CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1506,7 +1818,7 @@ Project [(cast(2 as decimal(10,0)) < cast(1 as decimal(10,0))) AS (2 < CAST(1 AS
 -- !query
 SELECT '2' <= cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(2 as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (2 <= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(2 as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (2 <= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1517,7 +1829,7 @@ Project [(cast(2 as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (2 <= CAST(1
 -- !query
 SELECT '2' <> cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [NOT (cast(2 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (2 = CAST(1 AS DECIMAL(10,0))))#x]
+Project [NOT (cast(2 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (NOT (2 = CAST(1 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1528,7 +1840,7 @@ Project [NOT (cast(2 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (2 =
 -- !query
 SELECT cast(null as string) = cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(cast(null as string) as double) = cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1539,7 +1851,7 @@ Project [(cast(cast(null as string) as decimal(10,0)) = cast(1 as decimal(10,0))
 -- !query
 SELECT cast(null as string) > cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as decimal(10,0)) > cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) > CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(cast(null as string) as double) > cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) > CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1550,7 +1862,7 @@ Project [(cast(cast(null as string) as decimal(10,0)) > cast(1 as decimal(10,0))
 -- !query
 SELECT cast(null as string) >= cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) >= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(cast(null as string) as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) >= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1561,7 +1873,7 @@ Project [(cast(cast(null as string) as decimal(10,0)) >= cast(1 as decimal(10,0)
 -- !query
 SELECT cast(null as string) < cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as decimal(10,0)) < cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) < CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(cast(null as string) as double) < cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) < CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1572,7 +1884,7 @@ Project [(cast(cast(null as string) as decimal(10,0)) < cast(1 as decimal(10,0))
 -- !query
 SELECT cast(null as string) <= cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (CAST(NULL AS STRING) <= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(cast(null as string) as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (CAST(NULL AS STRING) <= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1583,7 +1895,7 @@ Project [(cast(cast(null as string) as decimal(10,0)) <= cast(1 as decimal(10,0)
 -- !query
 SELECT cast(null as string) <> cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0))))#x]
+Project [NOT (cast(cast(null as string) as double) = cast(cast(1 as decimal(10,0)) as double)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1858,7 +2170,7 @@ Project [NOT (cast(cast(null as string) as double) = cast(1 as double)) AS (NOT
 -- !query
 SELECT cast(1 as float) = '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(1 as float)) AS (CAST(1 AS FLOAT) = 1)#x]
+Project [(cast(cast(1 as float) as double) = cast(1 as double)) AS (CAST(1 AS FLOAT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1869,7 +2181,7 @@ Project [(cast(1 as float) = cast(1 as float)) AS (CAST(1 AS FLOAT) = 1)#x]
 -- !query
 SELECT cast(1 as float) > '2' FROM t
 -- !query analysis
-Project [(cast(1 as float) > cast(2 as float)) AS (CAST(1 AS FLOAT) > 2)#x]
+Project [(cast(cast(1 as float) as double) > cast(2 as double)) AS (CAST(1 AS FLOAT) > 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1880,7 +2192,7 @@ Project [(cast(1 as float) > cast(2 as float)) AS (CAST(1 AS FLOAT) > 2)#x]
 -- !query
 SELECT cast(1 as float) >= '2' FROM t
 -- !query analysis
-Project [(cast(1 as float) >= cast(2 as float)) AS (CAST(1 AS FLOAT) >= 2)#x]
+Project [(cast(cast(1 as float) as double) >= cast(2 as double)) AS (CAST(1 AS FLOAT) >= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1891,7 +2203,7 @@ Project [(cast(1 as float) >= cast(2 as float)) AS (CAST(1 AS FLOAT) >= 2)#x]
 -- !query
 SELECT cast(1 as float) < '2' FROM t
 -- !query analysis
-Project [(cast(1 as float) < cast(2 as float)) AS (CAST(1 AS FLOAT) < 2)#x]
+Project [(cast(cast(1 as float) as double) < cast(2 as double)) AS (CAST(1 AS FLOAT) < 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1902,7 +2214,7 @@ Project [(cast(1 as float) < cast(2 as float)) AS (CAST(1 AS FLOAT) < 2)#x]
 -- !query
 SELECT cast(1 as float) <= '2' FROM t
 -- !query analysis
-Project [(cast(1 as float) <= cast(2 as float)) AS (CAST(1 AS FLOAT) <= 2)#x]
+Project [(cast(cast(1 as float) as double) <= cast(2 as double)) AS (CAST(1 AS FLOAT) <= 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1913,7 +2225,7 @@ Project [(cast(1 as float) <= cast(2 as float)) AS (CAST(1 AS FLOAT) <= 2)#x]
 -- !query
 SELECT cast(1 as float) <> '2' FROM t
 -- !query analysis
-Project [NOT (cast(1 as float) = cast(2 as float)) AS (NOT (CAST(1 AS FLOAT) = 2))#x]
+Project [NOT (cast(cast(1 as float) as double) = cast(2 as double)) AS (NOT (CAST(1 AS FLOAT) = 2))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1924,7 +2236,7 @@ Project [NOT (cast(1 as float) = cast(2 as float)) AS (NOT (CAST(1 AS FLOAT) = 2
 -- !query
 SELECT cast(1 as float) = cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(cast(null as string) as float)) AS (CAST(1 AS FLOAT) = CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as float) as double) = cast(cast(null as string) as double)) AS (CAST(1 AS FLOAT) = CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1935,7 +2247,7 @@ Project [(cast(1 as float) = cast(cast(null as string) as float)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as float) > cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as float) > cast(cast(null as string) as float)) AS (CAST(1 AS FLOAT) > CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as float) as double) > cast(cast(null as string) as double)) AS (CAST(1 AS FLOAT) > CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1946,7 +2258,7 @@ Project [(cast(1 as float) > cast(cast(null as string) as float)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as float) >= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as float) >= cast(cast(null as string) as float)) AS (CAST(1 AS FLOAT) >= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as float) as double) >= cast(cast(null as string) as double)) AS (CAST(1 AS FLOAT) >= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1957,7 +2269,7 @@ Project [(cast(1 as float) >= cast(cast(null as string) as float)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as float) < cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as float) < cast(cast(null as string) as float)) AS (CAST(1 AS FLOAT) < CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as float) as double) < cast(cast(null as string) as double)) AS (CAST(1 AS FLOAT) < CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1968,7 +2280,7 @@ Project [(cast(1 as float) < cast(cast(null as string) as float)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as float) <= cast(null as string) FROM t
 -- !query analysis
-Project [(cast(1 as float) <= cast(cast(null as string) as float)) AS (CAST(1 AS FLOAT) <= CAST(NULL AS STRING))#x]
+Project [(cast(cast(1 as float) as double) <= cast(cast(null as string) as double)) AS (CAST(1 AS FLOAT) <= CAST(NULL AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1979,7 +2291,7 @@ Project [(cast(1 as float) <= cast(cast(null as string) as float)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as float) <> cast(null as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as float) = cast(cast(null as string) as float)) AS (NOT (CAST(1 AS FLOAT) = CAST(NULL AS STRING)))#x]
+Project [NOT (cast(cast(1 as float) as double) = cast(cast(null as string) as double)) AS (NOT (CAST(1 AS FLOAT) = CAST(NULL AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1990,7 +2302,7 @@ Project [NOT (cast(1 as float) = cast(cast(null as string) as float)) AS (NOT (C
 -- !query
 SELECT '1' = cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(1 as float)) AS (1 = CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) = cast(cast(1 as float) as double)) AS (1 = CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2001,7 +2313,7 @@ Project [(cast(1 as float) = cast(1 as float)) AS (1 = CAST(1 AS FLOAT))#x]
 -- !query
 SELECT '2' > cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(2 as float) > cast(1 as float)) AS (2 > CAST(1 AS FLOAT))#x]
+Project [(cast(2 as double) > cast(cast(1 as float) as double)) AS (2 > CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2012,7 +2324,7 @@ Project [(cast(2 as float) > cast(1 as float)) AS (2 > CAST(1 AS FLOAT))#x]
 -- !query
 SELECT '2' >= cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(2 as float) >= cast(1 as float)) AS (2 >= CAST(1 AS FLOAT))#x]
+Project [(cast(2 as double) >= cast(cast(1 as float) as double)) AS (2 >= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2023,7 +2335,7 @@ Project [(cast(2 as float) >= cast(1 as float)) AS (2 >= CAST(1 AS FLOAT))#x]
 -- !query
 SELECT '2' < cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(2 as float) < cast(1 as float)) AS (2 < CAST(1 AS FLOAT))#x]
+Project [(cast(2 as double) < cast(cast(1 as float) as double)) AS (2 < CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2034,7 +2346,7 @@ Project [(cast(2 as float) < cast(1 as float)) AS (2 < CAST(1 AS FLOAT))#x]
 -- !query
 SELECT '2' <= cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(2 as float) <= cast(1 as float)) AS (2 <= CAST(1 AS FLOAT))#x]
+Project [(cast(2 as double) <= cast(cast(1 as float) as double)) AS (2 <= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2045,7 +2357,7 @@ Project [(cast(2 as float) <= cast(1 as float)) AS (2 <= CAST(1 AS FLOAT))#x]
 -- !query
 SELECT '2' <> cast(1 as float) FROM t
 -- !query analysis
-Project [NOT (cast(2 as float) = cast(1 as float)) AS (NOT (2 = CAST(1 AS FLOAT)))#x]
+Project [NOT (cast(2 as double) = cast(cast(1 as float) as double)) AS (NOT (2 = CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2056,7 +2368,7 @@ Project [NOT (cast(2 as float) = cast(1 as float)) AS (NOT (2 = CAST(1 AS FLOAT)
 -- !query
 SELECT cast(null as string) = cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as float) = cast(1 as float)) AS (CAST(NULL AS STRING) = CAST(1 AS FLOAT))#x]
+Project [(cast(cast(null as string) as double) = cast(cast(1 as float) as double)) AS (CAST(NULL AS STRING) = CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2067,7 +2379,7 @@ Project [(cast(cast(null as string) as float) = cast(1 as float)) AS (CAST(NULL
 -- !query
 SELECT cast(null as string) > cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as float) > cast(1 as float)) AS (CAST(NULL AS STRING) > CAST(1 AS FLOAT))#x]
+Project [(cast(cast(null as string) as double) > cast(cast(1 as float) as double)) AS (CAST(NULL AS STRING) > CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2078,7 +2390,7 @@ Project [(cast(cast(null as string) as float) > cast(1 as float)) AS (CAST(NULL
 -- !query
 SELECT cast(null as string) >= cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as float) >= cast(1 as float)) AS (CAST(NULL AS STRING) >= CAST(1 AS FLOAT))#x]
+Project [(cast(cast(null as string) as double) >= cast(cast(1 as float) as double)) AS (CAST(NULL AS STRING) >= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2089,7 +2401,7 @@ Project [(cast(cast(null as string) as float) >= cast(1 as float)) AS (CAST(NULL
 -- !query
 SELECT cast(null as string) < cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as float) < cast(1 as float)) AS (CAST(NULL AS STRING) < CAST(1 AS FLOAT))#x]
+Project [(cast(cast(null as string) as double) < cast(cast(1 as float) as double)) AS (CAST(NULL AS STRING) < CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2100,7 +2412,7 @@ Project [(cast(cast(null as string) as float) < cast(1 as float)) AS (CAST(NULL
 -- !query
 SELECT cast(null as string) <= cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(cast(null as string) as float) <= cast(1 as float)) AS (CAST(NULL AS STRING) <= CAST(1 AS FLOAT))#x]
+Project [(cast(cast(null as string) as double) <= cast(cast(1 as float) as double)) AS (CAST(NULL AS STRING) <= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2111,7 +2423,7 @@ Project [(cast(cast(null as string) as float) <= cast(1 as float)) AS (CAST(NULL
 -- !query
 SELECT cast(null as string) <> cast(1 as float) FROM t
 -- !query analysis
-Project [NOT (cast(cast(null as string) as float) = cast(1 as float)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS FLOAT)))#x]
+Project [NOT (cast(cast(null as string) as double) = cast(cast(1 as float) as double)) AS (NOT (CAST(NULL AS STRING) = CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
index de6c0b72c1c79..4fe85374f4b1b 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/booleanEquality.sql.out
@@ -10,78 +10,155 @@ CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
 -- !query
 SELECT true = cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(true as tinyint) = cast(1 as tinyint)) AS (true = CAST(1 AS TINYINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "true = cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(true as smallint) = cast(1 as smallint)) AS (true = CAST(1 AS SMALLINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true = cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(true as int) = cast(1 as int)) AS (true = CAST(1 AS INT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "true = cast(1 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as bigint) FROM t
 -- !query analysis
-Project [(cast(true as bigint) = cast(1 as bigint)) AS (true = CAST(1 AS BIGINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "true = cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as float) FROM t
 -- !query analysis
-Project [(cast(true as float) = cast(1 as float)) AS (true = CAST(1 AS FLOAT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "true = cast(1 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as double) FROM t
 -- !query analysis
-Project [(cast(true as double) = cast(1 as double)) AS (true = CAST(1 AS DOUBLE))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(true = CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "true = cast(1 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(true as decimal(10,0)) = cast(1 as decimal(10,0))) AS (true = CAST(1 AS DECIMAL(10,0)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(true = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "true = cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -175,126 +252,71 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT true <=> cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(true as tinyint) <=> cast(1 as tinyint)) AS (true <=> CAST(1 AS TINYINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "true <=> cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(true as smallint) <=> cast(1 as smallint)) AS (true <=> CAST(1 AS SMALLINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as int) FROM t
--- !query analysis
-Project [(cast(true as int) <=> cast(1 as int)) AS (true <=> CAST(1 AS INT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as bigint) FROM t
--- !query analysis
-Project [(cast(true as bigint) <=> cast(1 as bigint)) AS (true <=> CAST(1 AS BIGINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as float) FROM t
--- !query analysis
-Project [(cast(true as float) <=> cast(1 as float)) AS (true <=> CAST(1 AS FLOAT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as double) FROM t
--- !query analysis
-Project [(cast(true as double) <=> cast(1 as double)) AS (true <=> CAST(1 AS DOUBLE))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as decimal(10, 0)) FROM t
--- !query analysis
-Project [(cast(true as decimal(10,0)) <=> cast(1 as decimal(10,0))) AS (true <=> CAST(1 AS DECIMAL(10,0)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast(1 as string) FROM t
--- !query analysis
-Project [(true <=> cast(cast(1 as string) as boolean)) AS (true <=> CAST(1 AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT true <=> cast('1' as binary) FROM t
--- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
     "left" : "\"BOOLEAN\"",
-    "right" : "\"BINARY\"",
-    "sqlExpr" : "\"(true <=> CAST(1 AS BINARY))\""
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS SMALLINT))\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
     "stopIndex" : 35,
-    "fragment" : "true <=> cast('1' as binary)"
+    "fragment" : "true <=> cast(1 as smallint)"
   } ]
 }
 
 
 -- !query
-SELECT true <=> cast(1 as boolean) FROM t
+SELECT true <=> cast(1 as int) FROM t
 -- !query analysis
-Project [(true <=> cast(1 as boolean)) AS (true <=> CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "true <=> cast(1 as int)"
+  } ]
+}
 
 
 -- !query
-SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+SELECT true <=> cast(1 as bigint) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
@@ -302,21 +324,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "left" : "\"BOOLEAN\"",
-    "right" : "\"TIMESTAMP\"",
-    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS BIGINT))\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 58,
-    "fragment" : "true <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+    "stopIndex" : 33,
+    "fragment" : "true <=> cast(1 as bigint)"
   } ]
 }
 
 
 -- !query
-SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t
+SELECT true <=> cast(1 as float) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
@@ -324,23 +346,67 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "left" : "\"BOOLEAN\"",
-    "right" : "\"DATE\"",
-    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS FLOAT))\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 51,
-    "fragment" : "true <=> cast('2017-12-11 09:30:00' as date)"
+    "stopIndex" : 32,
+    "fragment" : "true <=> cast(1 as float)"
   } ]
 }
 
 
 -- !query
-SELECT cast(1 as tinyint) = true FROM t
+SELECT true <=> cast(1 as double) FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(true as tinyint)) AS (CAST(1 AS TINYINT) = true)#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true <=> cast(1 as double)"
+  } ]
+}
+
+
+-- !query
+SELECT true <=> cast(1 as decimal(10, 0)) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "true <=> cast(1 as decimal(10, 0))"
+  } ]
+}
+
+
+-- !query
+SELECT true <=> cast(1 as string) FROM t
+-- !query analysis
+Project [(true <=> cast(cast(1 as string) as boolean)) AS (true <=> CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -349,9 +415,31 @@ Project [(cast(1 as tinyint) = cast(true as tinyint)) AS (CAST(1 AS TINYINT) = t
 
 
 -- !query
-SELECT cast(1 as smallint) = true FROM t
+SELECT true <=> cast('1' as binary) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "true <=> cast('1' as binary)"
+  } ]
+}
+
+
+-- !query
+SELECT true <=> cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(true as smallint)) AS (CAST(1 AS SMALLINT) = true)#x]
+Project [(true <=> cast(1 as boolean)) AS (true <=> CAST(1 AS BOOLEAN))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -359,59 +447,202 @@ Project [(cast(1 as smallint) = cast(true as smallint)) AS (CAST(1 AS SMALLINT)
             +- OneRowRelation
 
 
+-- !query
+SELECT true <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 58,
+    "fragment" : "true <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
+
+
+-- !query
+SELECT true <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(true <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "true <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
+
+
+-- !query
+SELECT cast(1 as tinyint) = true FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(1 as tinyint) = true"
+  } ]
+}
+
+
+-- !query
+SELECT cast(1 as smallint) = true FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as smallint) = true"
+  } ]
+}
+
+
 -- !query
 SELECT cast(1 as int) = true FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(true as int)) AS (CAST(1 AS INT) = true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast(1 as int) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as bigint) = true FROM t
 -- !query analysis
-Project [(cast(1 as bigint) = cast(true as bigint)) AS (CAST(1 AS BIGINT) = true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as bigint) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as float) = true FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(true as float)) AS (CAST(1 AS FLOAT) = true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as float) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as double) = true FROM t
 -- !query analysis
-Project [(cast(1 as double) = cast(true as double)) AS (CAST(1 AS DOUBLE) = true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as double) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) = true FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(true as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast(1 as decimal(10, 0)) = true"
+  } ]
+}
 
 
 -- !query
@@ -505,78 +736,155 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <=> cast(true as tinyint)) AS (CAST(1 AS TINYINT) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(1 as tinyint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as smallint) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <=> cast(true as smallint)) AS (CAST(1 AS SMALLINT) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast(1 as smallint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as int) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as int) <=> cast(true as int)) AS (CAST(1 AS INT) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as int) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as bigint) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as bigint) <=> cast(true as bigint)) AS (CAST(1 AS BIGINT) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as bigint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as float) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as float) <=> cast(true as float)) AS (CAST(1 AS FLOAT) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(1 as float) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as double) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as double) <=> cast(true as double)) AS (CAST(1 AS DOUBLE) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as double) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <=> true FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <=> cast(true as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <=> true)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> true"
+  } ]
+}
 
 
 -- !query
@@ -670,78 +978,155 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT false = cast(0 as tinyint) FROM t
 -- !query analysis
-Project [(cast(false as tinyint) = cast(0 as tinyint)) AS (false = CAST(0 AS TINYINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "false = cast(0 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as smallint) FROM t
 -- !query analysis
-Project [(cast(false as smallint) = cast(0 as smallint)) AS (false = CAST(0 AS SMALLINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false = cast(0 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as int) FROM t
 -- !query analysis
-Project [(cast(false as int) = cast(0 as int)) AS (false = CAST(0 AS INT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "false = cast(0 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as bigint) FROM t
 -- !query analysis
-Project [(cast(false as bigint) = cast(0 as bigint)) AS (false = CAST(0 AS BIGINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "false = cast(0 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as float) FROM t
 -- !query analysis
-Project [(cast(false as float) = cast(0 as float)) AS (false = CAST(0 AS FLOAT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "false = cast(0 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as double) FROM t
 -- !query analysis
-Project [(cast(false as double) = cast(0 as double)) AS (false = CAST(0 AS DOUBLE))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(false = CAST(0 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "false = cast(0 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(false as decimal(10,0)) = cast(0 as decimal(10,0))) AS (false = CAST(0 AS DECIMAL(10,0)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(false = CAST(0 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "false = cast(0 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -835,73 +1220,161 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT false <=> cast(0 as tinyint) FROM t
 -- !query analysis
-Project [(cast(false as tinyint) <=> cast(0 as tinyint)) AS (false <=> CAST(0 AS TINYINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "false <=> cast(0 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as smallint) FROM t
 -- !query analysis
-Project [(cast(false as smallint) <=> cast(0 as smallint)) AS (false <=> CAST(0 AS SMALLINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "false <=> cast(0 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as int) FROM t
 -- !query analysis
-Project [(cast(false as int) <=> cast(0 as int)) AS (false <=> CAST(0 AS INT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "false <=> cast(0 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as bigint) FROM t
 -- !query analysis
-Project [(cast(false as bigint) <=> cast(0 as bigint)) AS (false <=> CAST(0 AS BIGINT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false <=> cast(0 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as float) FROM t
 -- !query analysis
-Project [(cast(false as float) <=> cast(0 as float)) AS (false <=> CAST(0 AS FLOAT))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "false <=> cast(0 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as double) FROM t
 -- !query analysis
-Project [(cast(false as double) <=> cast(0 as double)) AS (false <=> CAST(0 AS DOUBLE))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false <=> cast(0 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as decimal(10, 0)) FROM t
 -- !query analysis
-Project [(cast(false as decimal(10,0)) <=> cast(0 as decimal(10,0))) AS (false <=> CAST(0 AS DECIMAL(10,0)))#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "false <=> cast(0 as decimal(10, 0))"
+  } ]
+}
+
+
+-- !query
+SELECT false <=> cast(0 as string) FROM t
+-- !query analysis
+Project [(false <=> cast(cast(0 as string) as boolean)) AS (false <=> CAST(0 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -910,9 +1383,31 @@ Project [(cast(false as decimal(10,0)) <=> cast(0 as decimal(10,0))) AS (false <
 
 
 -- !query
-SELECT false <=> cast(0 as string) FROM t
+SELECT false <=> cast('0' as binary) FROM t
 -- !query analysis
-Project [(false <=> cast(cast(0 as string) as boolean)) AS (false <=> CAST(0 AS STRING))#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BINARY\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS BINARY))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "false <=> cast('0' as binary)"
+  } ]
+}
+
+
+-- !query
+SELECT false <=> cast(0 as boolean) FROM t
+-- !query analysis
+Project [(false <=> cast(0 as boolean)) AS (false <=> CAST(0 AS BOOLEAN))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -921,157 +1416,201 @@ Project [(false <=> cast(cast(0 as string) as boolean)) AS (false <=> CAST(0 AS
 
 
 -- !query
-SELECT false <=> cast('0' as binary) FROM t
+SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TIMESTAMP\"",
+    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "false <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
+
+
+-- !query
+SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DATE\"",
+    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "false <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
+
+
+-- !query
+SELECT cast(0 as tinyint) = false FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS TINYINT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(0 as tinyint) = false"
+  } ]
+}
+
+
+-- !query
+SELECT cast(0 as smallint) = false FROM t
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS SMALLINT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as smallint) = false"
+  } ]
+}
+
+
+-- !query
+SELECT cast(0 as int) = false FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"BINARY\"",
-    "sqlExpr" : "\"(false <=> CAST(0 AS BINARY))\""
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS INT) = false)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "false <=> cast('0' as binary)"
+    "stopIndex" : 29,
+    "fragment" : "cast(0 as int) = false"
   } ]
 }
 
 
 -- !query
-SELECT false <=> cast(0 as boolean) FROM t
--- !query analysis
-Project [(false <=> cast(0 as boolean)) AS (false <=> CAST(0 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT false <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+SELECT cast(0 as bigint) = false FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"TIMESTAMP\"",
-    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BIGINT) = false)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 59,
-    "fragment" : "false <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+    "stopIndex" : 32,
+    "fragment" : "cast(0 as bigint) = false"
   } ]
 }
 
 
 -- !query
-SELECT false <=> cast('2017-12-11 09:30:00' as date) FROM t
+SELECT cast(0 as float) = false FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
   "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DATE\"",
-    "sqlExpr" : "\"(false <=> CAST(2017-12-11 09:30:00 AS DATE))\""
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS FLOAT) = false)\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 52,
-    "fragment" : "false <=> cast('2017-12-11 09:30:00' as date)"
+    "stopIndex" : 31,
+    "fragment" : "cast(0 as float) = false"
   } ]
 }
 
 
--- !query
-SELECT cast(0 as tinyint) = false FROM t
--- !query analysis
-Project [(cast(0 as tinyint) = cast(false as tinyint)) AS (CAST(0 AS TINYINT) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT cast(0 as smallint) = false FROM t
--- !query analysis
-Project [(cast(0 as smallint) = cast(false as smallint)) AS (CAST(0 AS SMALLINT) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT cast(0 as int) = false FROM t
--- !query analysis
-Project [(cast(0 as int) = cast(false as int)) AS (CAST(0 AS INT) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT cast(0 as bigint) = false FROM t
--- !query analysis
-Project [(cast(0 as bigint) = cast(false as bigint)) AS (CAST(0 AS BIGINT) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
--- !query
-SELECT cast(0 as float) = false FROM t
--- !query analysis
-Project [(cast(0 as float) = cast(false as float)) AS (CAST(0 AS FLOAT) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
-
-
 -- !query
 SELECT cast(0 as double) = false FROM t
 -- !query analysis
-Project [(cast(0 as double) = cast(false as double)) AS (CAST(0 AS DOUBLE) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DOUBLE) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(0 as double) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as decimal(10, 0)) = false FROM t
 -- !query analysis
-Project [(cast(0 as decimal(10,0)) = cast(false as decimal(10,0))) AS (CAST(0 AS DECIMAL(10,0)) = false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DECIMAL(10,0)) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "cast(0 as decimal(10, 0)) = false"
+  } ]
+}
 
 
 -- !query
@@ -1165,78 +1704,155 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(0 as tinyint) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as tinyint) <=> cast(false as tinyint)) AS (CAST(0 AS TINYINT) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS TINYINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast(0 as tinyint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as smallint) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as smallint) <=> cast(false as smallint)) AS (CAST(0 AS SMALLINT) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS SMALLINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "cast(0 as smallint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as int) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as int) <=> cast(false as int)) AS (CAST(0 AS INT) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS INT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(0 as int) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as bigint) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as bigint) <=> cast(false as bigint)) AS (CAST(0 AS BIGINT) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BIGINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as bigint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as float) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as float) <=> cast(false as float)) AS (CAST(0 AS FLOAT) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS FLOAT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(0 as float) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as double) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as double) <=> cast(false as double)) AS (CAST(0 AS DOUBLE) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DOUBLE) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as double) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as decimal(10, 0)) <=> false FROM t
 -- !query analysis
-Project [(cast(0 as decimal(10,0)) <=> cast(false as decimal(10,0))) AS (CAST(0 AS DECIMAL(10,0)) <=> false)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DECIMAL(10,0)) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(0 as decimal(10, 0)) <=> false"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
index 4124fc57996cd..45ce3bdd96d90 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -54,7 +54,7 @@ Project [CASE WHEN true THEN cast(cast(1 as tinyint) as bigint) ELSE cast(2 as b
 -- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as float) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as tinyint) as float) ELSE cast(2 as float) END AS CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS FLOAT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as tinyint) as double) ELSE cast(cast(2 as float) as double) END AS CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS FLOAT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -87,7 +87,7 @@ Project [CASE WHEN true THEN cast(cast(1 as tinyint) as decimal(10,0)) ELSE cast
 -- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as tinyint) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as tinyint) as bigint) ELSE cast(cast(2 as string) as bigint) END AS CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS STRING) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -230,7 +230,7 @@ Project [CASE WHEN true THEN cast(cast(1 as smallint) as bigint) ELSE cast(2 as
 -- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as float) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as smallint) as float) ELSE cast(2 as float) END AS CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS FLOAT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as smallint) as double) ELSE cast(cast(2 as float) as double) END AS CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS FLOAT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -263,7 +263,7 @@ Project [CASE WHEN true THEN cast(cast(1 as smallint) as decimal(10,0)) ELSE cas
 -- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as smallint) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as smallint) as bigint) ELSE cast(cast(2 as string) as bigint) END AS CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS STRING) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -406,7 +406,7 @@ Project [CASE WHEN true THEN cast(cast(1 as int) as bigint) ELSE cast(2 as bigin
 -- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as float) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as int) as float) ELSE cast(2 as float) END AS CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS FLOAT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as int) as double) ELSE cast(cast(2 as float) as double) END AS CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS FLOAT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -439,7 +439,7 @@ Project [CASE WHEN true THEN cast(cast(1 as int) as decimal(10,0)) ELSE cast(2 a
 -- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as int) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as int) as bigint) ELSE cast(cast(2 as string) as bigint) END AS CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS STRING) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -582,7 +582,7 @@ Project [CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as bigint) END AS CAS
 -- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as float) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as bigint) as float) ELSE cast(2 as float) END AS CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS FLOAT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as bigint) as double) ELSE cast(cast(2 as float) as double) END AS CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS FLOAT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -615,7 +615,7 @@ Project [CASE WHEN true THEN cast(cast(1 as bigint) as decimal(20,0)) ELSE cast(
 -- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as bigint) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(1 as bigint) ELSE cast(cast(2 as string) as bigint) END AS CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS STRING) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -714,7 +714,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as tinyint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as tinyint) as float) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS TINYINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 as tinyint) as double) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS TINYINT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -725,7 +725,7 @@ Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as tinyint) as fl
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as smallint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as smallint) as float) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS SMALLINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 as smallint) as double) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS SMALLINT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -736,7 +736,7 @@ Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as smallint) as f
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as int) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as int) as float) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS INT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 as int) as double) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS INT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -747,7 +747,7 @@ Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as int) as float)
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as bigint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as float) ELSE cast(cast(2 as bigint) as float) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BIGINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 as bigint) as double) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BIGINT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -791,7 +791,7 @@ Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 a
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as float) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as float) as double) ELSE cast(cast(2 as string) as double) END AS CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS STRING) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -967,7 +967,7 @@ Project [CASE WHEN true THEN cast(1 as double) ELSE cast(cast(2 as decimal(10,0)
 -- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as double) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(1 as double) ELSE cast(cast(2 as string) as double) END AS CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS STRING) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1143,7 +1143,7 @@ Project [CASE WHEN true THEN cast(1 as decimal(10,0)) ELSE cast(2 as decimal(10,
 -- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(1 as decimal(10,0)) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as decimal(10,0)) as double) ELSE cast(cast(2 as string) as double) END AS CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS STRING) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1242,7 +1242,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as tinyint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as tinyint) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS TINYINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as bigint) ELSE cast(cast(2 as tinyint) as bigint) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS TINYINT) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1253,7 +1253,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as tinyint) as s
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as smallint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as smallint) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS SMALLINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as bigint) ELSE cast(cast(2 as smallint) as bigint) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS SMALLINT) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1264,7 +1264,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as smallint) as
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as int) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as int) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS INT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as bigint) ELSE cast(cast(2 as int) as bigint) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS INT) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1275,7 +1275,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as int) as strin
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as bigint) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as bigint) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BIGINT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as bigint) ELSE cast(2 as bigint) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BIGINT) END#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1286,7 +1286,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as bigint) as st
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as float) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as float) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS FLOAT) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as double) ELSE cast(cast(2 as float) as double) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS FLOAT) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1297,7 +1297,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as float) as str
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as double) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as double) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DOUBLE) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as double) ELSE cast(2 as double) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DOUBLE) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1308,7 +1308,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as double) as st
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as decimal(10, 0)) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2 as decimal(10,0)) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DECIMAL(10,0)) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as double) ELSE cast(cast(2 as decimal(10,0)) as double) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DECIMAL(10,0)) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1330,51 +1330,29 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(2 as string) END AS CAS
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BINARY) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 73,
-    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END"
-  } ]
-}
+Project [CASE WHEN true THEN cast(cast(1 as string) as binary) ELSE cast(2 as binary) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BINARY) END#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 72,
-    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END"
-  } ]
-}
+Project [CASE WHEN true THEN cast(cast(1 as string) as boolean) ELSE cast(2 as boolean) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2017-12-11 09:30:00.0 as timestamp) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as timestamp) ELSE cast(2017-12-11 09:30:00.0 as timestamp) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1385,7 +1363,7 @@ Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2017-12-11 09:30:0
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(1 as string) ELSE cast(cast(2017-12-11 09:30:00 as date) as string) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00 AS DATE) END#x]
+Project [CASE WHEN true THEN cast(cast(1 as string) as date) ELSE cast(2017-12-11 09:30:00 as date) END AS CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00 AS DATE) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1550,23 +1528,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS STRING) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 73,
-    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END"
-  } ]
-}
+Project [CASE WHEN true THEN cast(1 as binary) ELSE cast(cast(2 as string) as binary) END AS CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS STRING) END#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -1803,23 +1770,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 72,
-    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END"
-  } ]
-}
+Project [CASE WHEN true THEN cast(1 as boolean) ELSE cast(cast(2 as string) as boolean) END AS CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -2056,7 +2012,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(2017-12-12 09:30:00.0 as timestamp) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(2017-12-12 09:30:00.0 as timestamp) ELSE cast(cast(2 as string) as timestamp) END AS CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS STRING) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2287,7 +2243,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as string) END FROM t
 -- !query analysis
-Project [CASE WHEN true THEN cast(cast(2017-12-12 09:30:00 as date) as string) ELSE cast(2 as string) END AS CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS STRING) END#x]
+Project [CASE WHEN true THEN cast(2017-12-12 09:30:00 as date) ELSE cast(cast(2 as string) as date) END AS CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS STRING) END#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
index 62e3a87473263..6f3bc9ccb66f3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/concat.sql.out
@@ -271,7 +271,7 @@ SELECT
     (string_array1 || int_array2) sti_array
 FROM various_arrays
 -- !query analysis
-Project [concat(cast(tinyint_array1#x as array<smallint>), smallint_array2#x) AS ts_array#x, concat(cast(smallint_array1#x as array<int>), int_array2#x) AS si_array#x, concat(cast(int_array1#x as array<bigint>), bigint_array2#x) AS ib_array#x, concat(cast(bigint_array1#x as array<decimal(20,0)>), cast(decimal_array2#x as array<decimal(20,0)>)) AS bd_array#x, concat(cast(decimal_array1#x as array<double>), double_array2#x) AS dd_array#x, concat(double_array1#x, cast(float_array2#x as array<double>)) AS df_array#x, concat(string_array1#x, cast(data_array2#x as array<string>)) AS std_array#x, concat(cast(timestamp_array1#x as array<string>), string_array2#x) AS tst_array#x, concat(string_array1#x, cast(int_array2#x as array<string>)) AS sti_array#x]
+Project [concat(cast(tinyint_array1#x as array<smallint>), smallint_array2#x) AS ts_array#x, concat(cast(smallint_array1#x as array<int>), int_array2#x) AS si_array#x, concat(cast(int_array1#x as array<bigint>), bigint_array2#x) AS ib_array#x, concat(cast(bigint_array1#x as array<decimal(20,0)>), cast(decimal_array2#x as array<decimal(20,0)>)) AS bd_array#x, concat(cast(decimal_array1#x as array<double>), double_array2#x) AS dd_array#x, concat(double_array1#x, cast(float_array2#x as array<double>)) AS df_array#x, concat(cast(string_array1#x as array<date>), data_array2#x) AS std_array#x, concat(timestamp_array1#x, cast(string_array2#x as array<timestamp>)) AS tst_array#x, concat(cast(string_array1#x as array<bigint>), cast(int_array2#x as array<bigint>)) AS sti_array#x]
 +- SubqueryAlias various_arrays
    +- View (`various_arrays`, [boolean_array1#x, boolean_array2#x, tinyint_array1#x, tinyint_array2#x, smallint_array1#x, smallint_array2#x, int_array1#x, int_array2#x, bigint_array1#x, bigint_array2#x, decimal_array1#x, decimal_array2#x, double_array1#x, double_array2#x, float_array1#x, float_array2#x, date_array1#x, data_array2#x, timestamp_array1#x, timestamp_array2#x, string_array1#x, string_array2#x, array_array1#x, array_array2#x, ... 4 more fields])
       +- Project [cast(boolean_array1#x as array<boolean>) AS boolean_array1#x, cast(boolean_array2#x as array<boolean>) AS boolean_array2#x, cast(tinyint_array1#x as array<tinyint>) AS tinyint_array1#x, cast(tinyint_array2#x as array<tinyint>) AS tinyint_array2#x, cast(smallint_array1#x as array<smallint>) AS smallint_array1#x, cast(smallint_array2#x as array<smallint>) AS smallint_array2#x, cast(int_array1#x as array<int>) AS int_array1#x, cast(int_array2#x as array<int>) AS int_array2#x, cast(bigint_array1#x as array<bigint>) AS bigint_array1#x, cast(bigint_array2#x as array<bigint>) AS bigint_array2#x, cast(decimal_array1#x as array<decimal(19,0)>) AS decimal_array1#x, cast(decimal_array2#x as array<decimal(19,0)>) AS decimal_array2#x, cast(double_array1#x as array<double>) AS double_array1#x, cast(double_array2#x as array<double>) AS double_array2#x, cast(float_array1#x as array<float>) AS float_array1#x, cast(float_array2#x as array<float>) AS float_array2#x, cast(date_array1#x as array<date>) AS date_array1#x, cast(data_array2#x as array<date>) AS data_array2#x, cast(timestamp_array1#x as array<timestamp>) AS timestamp_array1#x, cast(timestamp_array2#x as array<timestamp>) AS timestamp_array2#x, cast(string_array1#x as array<string>) AS string_array1#x, cast(string_array2#x as array<string>) AS string_array2#x, cast(array_array1#x as array<array<string>>) AS array_array1#x, cast(array_array2#x as array<array<string>>) AS array_array2#x, ... 4 more fields]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
index 093297f03edb7..4458e15e53cf7 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/decimalPrecision.sql.out
@@ -8594,7 +8594,7 @@ Project [(cast(1 as decimal(20,0)) = cast(cast(1 as decimal(10,0)) as decimal(20
 -- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) = cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) = cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -8605,7 +8605,7 @@ Project [(cast(1 as decimal(3,0)) = cast(cast(1 as string) as decimal(3,0))) AS
 -- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) = cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) = cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -8616,7 +8616,7 @@ Project [(cast(1 as decimal(5,0)) = cast(cast(1 as string) as decimal(5,0))) AS
 -- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) = cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -8627,7 +8627,7 @@ Project [(cast(1 as decimal(10,0)) = cast(cast(1 as string) as decimal(10,0))) A
 -- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) = cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) = cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -8726,45 +8726,89 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) = cast(cast(1 as boolean) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) = cast(cast(1 as boolean) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(cast(1 as boolean) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) = cast(cast(1 as boolean) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -9826,7 +9870,7 @@ Project [(cast(1 as decimal(20,0)) <=> cast(cast(1 as decimal(10,0)) as decimal(
 -- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) <=> cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) <=> cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -9837,7 +9881,7 @@ Project [(cast(1 as decimal(3,0)) <=> cast(cast(1 as string) as decimal(3,0))) A
 -- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) <=> cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) <=> cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -9848,7 +9892,7 @@ Project [(cast(1 as decimal(5,0)) <=> cast(cast(1 as string) as decimal(5,0))) A
 -- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <=> cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <=> cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -9859,7 +9903,7 @@ Project [(cast(1 as decimal(10,0)) <=> cast(cast(1 as string) as decimal(10,0)))
 -- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) <=> cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) <=> cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -9958,45 +10002,89 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) <=> cast(cast(1 as boolean) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(3, 0))  <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) <=> cast(cast(1 as boolean) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(5, 0))  <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <=> cast(cast(1 as boolean) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as boolean) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) <=> cast(cast(1 as boolean) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS BOOLEAN))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(20, 0)) <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -11058,7 +11146,7 @@ Project [(cast(1 as decimal(20,0)) < cast(cast(1 as decimal(10,0)) as decimal(20
 -- !query
 SELECT cast(1 as decimal(3, 0))  < cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) < cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) < CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) < cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) < CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -11069,7 +11157,7 @@ Project [(cast(1 as decimal(3,0)) < cast(cast(1 as string) as decimal(3,0))) AS
 -- !query
 SELECT cast(1 as decimal(5, 0))  < cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) < cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) < CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) < cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) < CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -11080,7 +11168,7 @@ Project [(cast(1 as decimal(5,0)) < cast(cast(1 as string) as decimal(5,0))) AS
 -- !query
 SELECT cast(1 as decimal(10, 0)) < cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) < cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) < cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) < CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -11091,7 +11179,7 @@ Project [(cast(1 as decimal(10,0)) < cast(cast(1 as string) as decimal(10,0))) A
 -- !query
 SELECT cast(1 as decimal(20, 0)) < cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) < cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) < CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) < cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) < CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -12334,7 +12422,7 @@ Project [(cast(1 as decimal(20,0)) <= cast(cast(1 as decimal(10,0)) as decimal(2
 -- !query
 SELECT cast(1 as decimal(3, 0))  <= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) <= cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) <= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) <= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -12345,7 +12433,7 @@ Project [(cast(1 as decimal(3,0)) <= cast(cast(1 as string) as decimal(3,0))) AS
 -- !query
 SELECT cast(1 as decimal(5, 0))  <= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) <= cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) <= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) <= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -12356,7 +12444,7 @@ Project [(cast(1 as decimal(5,0)) <= cast(cast(1 as string) as decimal(5,0))) AS
 -- !query
 SELECT cast(1 as decimal(10, 0)) <= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <= cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) <= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -12367,7 +12455,7 @@ Project [(cast(1 as decimal(10,0)) <= cast(cast(1 as string) as decimal(10,0)))
 -- !query
 SELECT cast(1 as decimal(20, 0)) <= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) <= cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) <= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) <= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -13610,7 +13698,7 @@ Project [(cast(1 as decimal(20,0)) > cast(cast(1 as decimal(10,0)) as decimal(20
 -- !query
 SELECT cast(1 as decimal(3, 0))  > cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) > cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) > CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) > cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) > CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -13621,7 +13709,7 @@ Project [(cast(1 as decimal(3,0)) > cast(cast(1 as string) as decimal(3,0))) AS
 -- !query
 SELECT cast(1 as decimal(5, 0))  > cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) > cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) > CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) > cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) > CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -13632,7 +13720,7 @@ Project [(cast(1 as decimal(5,0)) > cast(cast(1 as string) as decimal(5,0))) AS
 -- !query
 SELECT cast(1 as decimal(10, 0)) > cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) > cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) > cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) > CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -13643,7 +13731,7 @@ Project [(cast(1 as decimal(10,0)) > cast(cast(1 as string) as decimal(10,0))) A
 -- !query
 SELECT cast(1 as decimal(20, 0)) > cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) > cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) > CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) > cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) > CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -14886,7 +14974,7 @@ Project [(cast(1 as decimal(20,0)) >= cast(cast(1 as decimal(10,0)) as decimal(2
 -- !query
 SELECT cast(1 as decimal(3, 0))  >= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(3,0)) >= cast(cast(1 as string) as decimal(3,0))) AS (CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(3,0)) as double) >= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(3,0)) >= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -14897,7 +14985,7 @@ Project [(cast(1 as decimal(3,0)) >= cast(cast(1 as string) as decimal(3,0))) AS
 -- !query
 SELECT cast(1 as decimal(5, 0))  >= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(5,0)) >= cast(cast(1 as string) as decimal(5,0))) AS (CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(5,0)) as double) >= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(5,0)) >= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -14908,7 +14996,7 @@ Project [(cast(1 as decimal(5,0)) >= cast(cast(1 as string) as decimal(5,0))) AS
 -- !query
 SELECT cast(1 as decimal(10, 0)) >= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) >= cast(cast(1 as string) as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) >= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -14919,7 +15007,7 @@ Project [(cast(1 as decimal(10,0)) >= cast(cast(1 as string) as decimal(10,0)))
 -- !query
 SELECT cast(1 as decimal(20, 0)) >= cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(1 as decimal(20,0)) >= cast(cast(1 as string) as decimal(20,0))) AS (CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as decimal(20,0)) as double) >= cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(20,0)) >= CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -16162,7 +16250,7 @@ Project [NOT (cast(1 as decimal(20,0)) = cast(cast(1 as decimal(10,0)) as decima
 -- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(3,0)) = cast(cast(1 as string) as decimal(3,0))) AS (NOT (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS STRING)))#x]
+Project [NOT (cast(cast(1 as decimal(3,0)) as double) = cast(cast(1 as string) as double)) AS (NOT (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -16173,7 +16261,7 @@ Project [NOT (cast(1 as decimal(3,0)) = cast(cast(1 as string) as decimal(3,0)))
 -- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(5,0)) = cast(cast(1 as string) as decimal(5,0))) AS (NOT (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS STRING)))#x]
+Project [NOT (cast(cast(1 as decimal(5,0)) as double) = cast(cast(1 as string) as double)) AS (NOT (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -16184,7 +16272,7 @@ Project [NOT (cast(1 as decimal(5,0)) = cast(cast(1 as string) as decimal(5,0)))
 -- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(cast(1 as string) as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS STRING)))#x]
+Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(cast(1 as string) as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -16195,7 +16283,7 @@ Project [NOT (cast(1 as decimal(10,0)) = cast(cast(1 as string) as decimal(10,0)
 -- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as string) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(20,0)) = cast(cast(1 as string) as decimal(20,0))) AS (NOT (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS STRING)))#x]
+Project [NOT (cast(cast(1 as decimal(20,0)) as double) = cast(cast(1 as string) as double)) AS (NOT (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -16294,45 +16382,89 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as boolean) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(3,0)) = cast(cast(1 as boolean) as decimal(3,0))) AS (NOT (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as boolean) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(5,0)) = cast(cast(1 as boolean) as decimal(5,0))) AS (NOT (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as boolean) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(cast(1 as boolean) as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as boolean) FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(20,0)) = cast(cast(1 as boolean) as decimal(20,0))) AS (NOT (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN)))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
index 22b870bc0b420..f168b3221150e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/division.sql.out
@@ -87,7 +87,7 @@ Project [(cast(cast(1 as tinyint) as decimal(3,0)) / cast(1 as decimal(10,0))) A
 -- !query
 SELECT cast(1 as tinyint) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) / cast(cast(cast(1 as string) as double) as double)) AS (CAST(1 AS TINYINT) / CAST(1 AS STRING))#x]
+Project [(cast(cast(cast(1 as tinyint) as bigint) as double) / cast(cast(cast(1 as string) as bigint) as double)) AS (CAST(1 AS TINYINT) / CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -263,7 +263,7 @@ Project [(cast(cast(1 as smallint) as decimal(5,0)) / cast(1 as decimal(10,0)))
 -- !query
 SELECT cast(1 as smallint) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) / cast(cast(cast(1 as string) as double) as double)) AS (CAST(1 AS SMALLINT) / CAST(1 AS STRING))#x]
+Project [(cast(cast(cast(1 as smallint) as bigint) as double) / cast(cast(cast(1 as string) as bigint) as double)) AS (CAST(1 AS SMALLINT) / CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -439,7 +439,7 @@ Project [(cast(cast(1 as int) as decimal(10,0)) / cast(1 as decimal(10,0))) AS (
 -- !query
 SELECT cast(1 as int) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) / cast(cast(cast(1 as string) as double) as double)) AS (CAST(1 AS INT) / CAST(1 AS STRING))#x]
+Project [(cast(cast(cast(1 as int) as bigint) as double) / cast(cast(cast(1 as string) as bigint) as double)) AS (CAST(1 AS INT) / CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -615,7 +615,7 @@ Project [(cast(cast(1 as bigint) as decimal(20,0)) / cast(1 as decimal(10,0))) A
 -- !query
 SELECT cast(1 as bigint) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) / cast(cast(cast(1 as string) as double) as double)) AS (CAST(1 AS BIGINT) / CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(cast(1 as string) as bigint) as double)) AS (CAST(1 AS BIGINT) / CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -791,7 +791,7 @@ Project [(cast(cast(1 as float) as double) / cast(cast(cast(1 as decimal(10,0))
 -- !query
 SELECT cast(1 as float) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as float) as double) / cast(cast(cast(1 as string) as double) as double)) AS (CAST(1 AS FLOAT) / CAST(1 AS STRING))#x]
+Project [(cast(cast(1 as float) as double) / cast(cast(1 as string) as double)) AS (CAST(1 AS FLOAT) / CAST(1 AS STRING))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1242,7 +1242,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as string) / cast(1 as tinyint) FROM t
 -- !query analysis
-Project [(cast(cast(1 as string) as double) / cast(cast(1 as tinyint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS TINYINT))#x]
+Project [(cast(cast(cast(1 as string) as bigint) as double) / cast(cast(cast(1 as tinyint) as bigint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1253,7 +1253,7 @@ Project [(cast(cast(1 as string) as double) / cast(cast(1 as tinyint) as double)
 -- !query
 SELECT cast(1 as string) / cast(1 as smallint) FROM t
 -- !query analysis
-Project [(cast(cast(1 as string) as double) / cast(cast(1 as smallint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(cast(1 as string) as bigint) as double) / cast(cast(cast(1 as smallint) as bigint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1264,7 +1264,7 @@ Project [(cast(cast(1 as string) as double) / cast(cast(1 as smallint) as double
 -- !query
 SELECT cast(1 as string) / cast(1 as int) FROM t
 -- !query analysis
-Project [(cast(cast(1 as string) as double) / cast(cast(1 as int) as double)) AS (CAST(1 AS STRING) / CAST(1 AS INT))#x]
+Project [(cast(cast(cast(1 as string) as bigint) as double) / cast(cast(cast(1 as int) as bigint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1275,7 +1275,7 @@ Project [(cast(cast(1 as string) as double) / cast(cast(1 as int) as double)) AS
 -- !query
 SELECT cast(1 as string) / cast(1 as bigint) FROM t
 -- !query analysis
-Project [(cast(cast(1 as string) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS BIGINT))#x]
+Project [(cast(cast(cast(1 as string) as bigint) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS STRING) / CAST(1 AS BIGINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1319,12 +1319,23 @@ Project [(cast(cast(1 as string) as double) / cast(cast(1 as decimal(10,0)) as d
 -- !query
 SELECT cast(1 as string) / cast(1 as string) FROM t
 -- !query analysis
-Project [(cast(cast(1 as string) as double) / cast(cast(1 as string) as double)) AS (CAST(1 AS STRING) / CAST(1 AS STRING))#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "cast(1 as string) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -1332,11 +1343,11 @@ SELECT cast(1 as string) / cast('1' as binary) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -1354,11 +1365,11 @@ SELECT cast(1 as string) / cast(1 as boolean) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -1376,11 +1387,11 @@ SELECT cast(1 as string) / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -1398,11 +1409,11 @@ SELECT cast(1 as string) / cast('2017-12-11 09:30:00' as date) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -1574,11 +1585,11 @@ SELECT cast('1' as binary) / cast(1 as string) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -1838,11 +1849,11 @@ SELECT cast(1 as boolean) / cast(1 as string) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -2102,11 +2113,11 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / cast(1 as string) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -2366,11 +2377,11 @@ SELECT cast('2017-12-11 09:30:00' as date) / cast(1 as string) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
index f4902012f0f96..f5b0740f2b462 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/elt.sql.out
@@ -11,7 +11,7 @@ FROM (
   FROM range(10)
 )
 -- !query analysis
-Project [elt(2, col1#x, cast(col2#xL as string), col3#x, cast(col4#x as string), cast(col5#x as string), false) AS col#x]
+Project [elt(2, col1#x, cast(col2#xL as string), col3#x, cast(col4#x as string), cast(col5#x as string), true) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Project [prefix_ AS col1#x, id#xL AS col2#xL, cast((id#xL + cast(1 as bigint)) as string) AS col3#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col4#x, cast(id#xL as double) AS col5#x]
       +- Range (0, 10, step=1)
@@ -28,7 +28,7 @@ FROM (
   FROM range(10)
 )
 -- !query analysis
-Project [elt(3, col1#x, col2#x, cast(col3#x as string), cast(col4#x as string), false) AS col#x]
+Project [elt(3, col1#x, col2#x, cast(col3#x as string), cast(col4#x as string), true) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Project [cast(id#xL as string) AS col1#x, cast((id#xL + cast(1 as bigint)) as string) AS col2#x, encode(cast((id#xL + cast(2 as bigint)) as string), utf-8) AS col3#x, encode(cast((id#xL + cast(3 as bigint)) as string), utf-8) AS col4#x]
       +- Range (0, 10, step=1)
@@ -49,7 +49,7 @@ FROM (
   FROM range(10)
 )
 -- !query analysis
-Project [elt(1, cast(col1#x as string), cast(col2#x as string), false) AS col#x]
+Project [elt(1, cast(col1#x as string), cast(col2#x as string), true) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
       +- Range (0, 10, step=1)
@@ -70,7 +70,7 @@ FROM (
   FROM range(10)
 )
 -- !query analysis
-Project [elt(2, col1#x, col2#x, false) AS col#x]
+Project [elt(2, col1#x, col2#x, true) AS col#x]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Project [encode(cast(id#xL as string), utf-8) AS col1#x, encode(cast((id#xL + cast(1 as bigint)) as string), utf-8) AS col2#x]
       +- Range (0, 10, step=1)
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
index b1d07bd7be902..791b75a869718 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/ifCoercion.sql.out
@@ -54,7 +54,7 @@ Project [if (true) cast(cast(1 as tinyint) as bigint) else cast(2 as bigint) AS
 -- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as float)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as tinyint) as float) else cast(2 as float) AS (IF(true, CAST(1 AS TINYINT), CAST(2 AS FLOAT)))#x]
+Project [if (true) cast(cast(1 as tinyint) as double) else cast(cast(2 as float) as double) AS (IF(true, CAST(1 AS TINYINT), CAST(2 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -87,7 +87,7 @@ Project [if (true) cast(cast(1 as tinyint) as decimal(10,0)) else cast(2 as deci
 -- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as tinyint) as string) else cast(2 as string) AS (IF(true, CAST(1 AS TINYINT), CAST(2 AS STRING)))#x]
+Project [if (true) cast(cast(1 as tinyint) as bigint) else cast(cast(2 as string) as bigint) AS (IF(true, CAST(1 AS TINYINT), CAST(2 AS STRING)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -230,7 +230,7 @@ Project [if (true) cast(cast(1 as smallint) as bigint) else cast(2 as bigint) AS
 -- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as float)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as smallint) as float) else cast(2 as float) AS (IF(true, CAST(1 AS SMALLINT), CAST(2 AS FLOAT)))#x]
+Project [if (true) cast(cast(1 as smallint) as double) else cast(cast(2 as float) as double) AS (IF(true, CAST(1 AS SMALLINT), CAST(2 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -263,7 +263,7 @@ Project [if (true) cast(cast(1 as smallint) as decimal(10,0)) else cast(2 as dec
 -- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as smallint) as string) else cast(2 as string) AS (IF(true, CAST(1 AS SMALLINT), CAST(2 AS STRING)))#x]
+Project [if (true) cast(cast(1 as smallint) as bigint) else cast(cast(2 as string) as bigint) AS (IF(true, CAST(1 AS SMALLINT), CAST(2 AS STRING)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -406,7 +406,7 @@ Project [if (true) cast(cast(1 as int) as bigint) else cast(2 as bigint) AS (IF(
 -- !query
 SELECT IF(true, cast(1 as int), cast(2 as float)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as int) as float) else cast(2 as float) AS (IF(true, CAST(1 AS INT), CAST(2 AS FLOAT)))#x]
+Project [if (true) cast(cast(1 as int) as double) else cast(cast(2 as float) as double) AS (IF(true, CAST(1 AS INT), CAST(2 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -439,7 +439,7 @@ Project [if (true) cast(cast(1 as int) as decimal(10,0)) else cast(2 as decimal(
 -- !query
 SELECT IF(true, cast(1 as int), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as int) as string) else cast(2 as string) AS (IF(true, CAST(1 AS INT), CAST(2 AS STRING)))#x]
+Project [if (true) cast(cast(1 as int) as bigint) else cast(cast(2 as string) as bigint) AS (IF(true, CAST(1 AS INT), CAST(2 AS STRING)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -582,7 +582,7 @@ Project [if (true) cast(1 as bigint) else cast(2 as bigint) AS (IF(true, CAST(1
 -- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as float)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as bigint) as float) else cast(2 as float) AS (IF(true, CAST(1 AS BIGINT), CAST(2 AS FLOAT)))#x]
+Project [if (true) cast(cast(1 as bigint) as double) else cast(cast(2 as float) as double) AS (IF(true, CAST(1 AS BIGINT), CAST(2 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -615,7 +615,7 @@ Project [if (true) cast(cast(1 as bigint) as decimal(20,0)) else cast(cast(2 as
 -- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as bigint) as string) else cast(2 as string) AS (IF(true, CAST(1 AS BIGINT), CAST(2 AS STRING)))#x]
+Project [if (true) cast(1 as bigint) else cast(cast(2 as string) as bigint) AS (IF(true, CAST(1 AS BIGINT), CAST(2 AS STRING)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -714,7 +714,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as tinyint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as float) else cast(cast(2 as tinyint) as float) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS TINYINT)))#x]
+Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as tinyint) as double) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -725,7 +725,7 @@ Project [if (true) cast(1 as float) else cast(cast(2 as tinyint) as float) AS (I
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as smallint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as float) else cast(cast(2 as smallint) as float) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS SMALLINT)))#x]
+Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as smallint) as double) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -736,7 +736,7 @@ Project [if (true) cast(1 as float) else cast(cast(2 as smallint) as float) AS (
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as int)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as float) else cast(cast(2 as int) as float) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS INT)))#x]
+Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as int) as double) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -747,7 +747,7 @@ Project [if (true) cast(1 as float) else cast(cast(2 as int) as float) AS (IF(tr
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as bigint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as float) else cast(cast(2 as bigint) as float) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS BIGINT)))#x]
+Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as bigint) as double) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS BIGINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -791,7 +791,7 @@ Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as decimal(
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as float) as string) else cast(2 as string) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS STRING)))#x]
+Project [if (true) cast(cast(1 as float) as double) else cast(cast(2 as string) as double) AS (IF(true, CAST(1 AS FLOAT), CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -967,7 +967,7 @@ Project [if (true) cast(1 as double) else cast(cast(2 as decimal(10,0)) as doubl
 -- !query
 SELECT IF(true, cast(1 as double), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as double) as string) else cast(2 as string) AS (IF(true, CAST(1 AS DOUBLE), CAST(2 AS STRING)))#x]
+Project [if (true) cast(1 as double) else cast(cast(2 as string) as double) AS (IF(true, CAST(1 AS DOUBLE), CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1143,7 +1143,7 @@ Project [if (true) cast(1 as decimal(10,0)) else cast(2 as decimal(10,0)) AS (IF
 -- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(1 as decimal(10,0)) as string) else cast(2 as string) AS (IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS STRING)))#x]
+Project [if (true) cast(cast(1 as decimal(10,0)) as double) else cast(cast(2 as string) as double) AS (IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1242,7 +1242,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as tinyint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as tinyint) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS TINYINT)))#x]
+Project [if (true) cast(cast(1 as string) as bigint) else cast(cast(2 as tinyint) as bigint) AS (IF(true, CAST(1 AS STRING), CAST(2 AS TINYINT)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1253,7 +1253,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as tinyint) as string) AS
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as smallint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as smallint) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS SMALLINT)))#x]
+Project [if (true) cast(cast(1 as string) as bigint) else cast(cast(2 as smallint) as bigint) AS (IF(true, CAST(1 AS STRING), CAST(2 AS SMALLINT)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1264,7 +1264,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as smallint) as string) AS
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as int)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as int) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS INT)))#x]
+Project [if (true) cast(cast(1 as string) as bigint) else cast(cast(2 as int) as bigint) AS (IF(true, CAST(1 AS STRING), CAST(2 AS INT)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1275,7 +1275,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as int) as string) AS (IF(
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as bigint)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as bigint) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS BIGINT)))#x]
+Project [if (true) cast(cast(1 as string) as bigint) else cast(2 as bigint) AS (IF(true, CAST(1 AS STRING), CAST(2 AS BIGINT)))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1286,7 +1286,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as bigint) as string) AS (
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as float)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as float) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS FLOAT)))#x]
+Project [if (true) cast(cast(1 as string) as double) else cast(cast(2 as float) as double) AS (IF(true, CAST(1 AS STRING), CAST(2 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1297,7 +1297,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as float) as string) AS (I
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as double)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as double) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS DOUBLE)))#x]
+Project [if (true) cast(cast(1 as string) as double) else cast(2 as double) AS (IF(true, CAST(1 AS STRING), CAST(2 AS DOUBLE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1308,7 +1308,7 @@ Project [if (true) cast(1 as string) else cast(cast(2 as double) as string) AS (
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as decimal(10, 0))) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2 as decimal(10,0)) as string) AS (IF(true, CAST(1 AS STRING), CAST(2 AS DECIMAL(10,0))))#x]
+Project [if (true) cast(cast(1 as string) as double) else cast(cast(2 as decimal(10,0)) as double) AS (IF(true, CAST(1 AS STRING), CAST(2 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1330,51 +1330,29 @@ Project [if (true) cast(1 as string) else cast(2 as string) AS (IF(true, CAST(1
 -- !query
 SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BINARY)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "IF(true, cast(1 as string), cast('2' as binary))"
-  } ]
-}
+Project [if (true) cast(cast(1 as string) as binary) else cast(2 as binary) AS (IF(true, CAST(1 AS STRING), CAST(2 AS BINARY)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "IF(true, cast(1 as string), cast(2 as boolean))"
-  } ]
-}
+Project [if (true) cast(cast(1 as string) as boolean) else cast(2 as boolean) AS (IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2017-12-11 09:30:00.0 as timestamp) as string) AS (IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
+Project [if (true) cast(cast(1 as string) as timestamp) else cast(2017-12-11 09:30:00.0 as timestamp) AS (IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1385,7 +1363,7 @@ Project [if (true) cast(1 as string) else cast(cast(2017-12-11 09:30:00.0 as tim
 -- !query
 SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query analysis
-Project [if (true) cast(1 as string) else cast(cast(2017-12-11 09:30:00 as date) as string) AS (IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE)))#x]
+Project [if (true) cast(cast(1 as string) as date) else cast(2017-12-11 09:30:00 as date) AS (IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1550,23 +1528,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "IF(true, cast('1' as binary), cast(2 as string))"
-  } ]
-}
+Project [if (true) cast(1 as binary) else cast(cast(2 as string) as binary) AS (IF(true, CAST(1 AS BINARY), CAST(2 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -1803,23 +1770,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "IF(true, cast(1 as boolean), cast(2 as string))"
-  } ]
-}
+Project [if (true) cast(1 as boolean) else cast(cast(2 as string) as boolean) AS (IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -2056,7 +2012,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(2017-12-12 09:30:00.0 as timestamp) as string) else cast(2 as string) AS (IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS STRING)))#x]
+Project [if (true) cast(2017-12-12 09:30:00.0 as timestamp) else cast(cast(2 as string) as timestamp) AS (IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2287,7 +2243,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as string)) FROM t
 -- !query analysis
-Project [if (true) cast(cast(2017-12-12 09:30:00 as date) as string) else cast(2 as string) AS (IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS STRING)))#x]
+Project [if (true) cast(2017-12-12 09:30:00 as date) else cast(cast(2 as string) as date) AS (IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
index 43aaea63fd045..977b1e1459c3e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
 -- !query
 SELECT 1 + '2' FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(2 as double)) AS (1 + 2)#x]
+Project [(cast(1 as bigint) + cast(2 as bigint)) AS (1 + 2)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -21,7 +21,7 @@ Project [(cast(1 as double) + cast(2 as double)) AS (1 + 2)#x]
 -- !query
 SELECT 1 - '2' FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(2 as double)) AS (1 - 2)#x]
+Project [(cast(1 as bigint) - cast(2 as bigint)) AS (1 - 2)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -32,7 +32,7 @@ Project [(cast(1 as double) - cast(2 as double)) AS (1 - 2)#x]
 -- !query
 SELECT 1 * '2' FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(2 as double)) AS (1 * 2)#x]
+Project [(cast(1 as bigint) * cast(2 as bigint)) AS (1 * 2)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -43,7 +43,7 @@ Project [(cast(1 as double) * cast(2 as double)) AS (1 * 2)#x]
 -- !query
 SELECT 4 / '2' FROM t
 -- !query analysis
-Project [(cast(4 as double) / cast(cast(2 as double) as double)) AS (4 / 2)#x]
+Project [(cast(cast(4 as bigint) as double) / cast(cast(2 as bigint) as double)) AS (4 / 2)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
index 0db96719a3fb0..71bc2fef3ab8e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/inConversion.sql.out
@@ -54,7 +54,7 @@ Project [cast(cast(1 as tinyint) as bigint) IN (cast(cast(1 as bigint) as bigint
 -- !query
 SELECT cast(1 as tinyint) in (cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as tinyint) as float) IN (cast(cast(1 as float) as float)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as tinyint) as double) IN (cast(cast(1 as float) as double)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -87,7 +87,7 @@ Project [cast(cast(1 as tinyint) as decimal(10,0)) IN (cast(cast(1 as decimal(10
 -- !query
 SELECT cast(1 as tinyint) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as tinyint) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as tinyint) as bigint) IN (cast(cast(1 as string) as bigint)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -230,7 +230,7 @@ Project [cast(cast(1 as smallint) as bigint) IN (cast(cast(1 as bigint) as bigin
 -- !query
 SELECT cast(1 as smallint) in (cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as smallint) as float) IN (cast(cast(1 as float) as float)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as smallint) as double) IN (cast(cast(1 as float) as double)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -263,7 +263,7 @@ Project [cast(cast(1 as smallint) as decimal(10,0)) IN (cast(cast(1 as decimal(1
 -- !query
 SELECT cast(1 as smallint) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as smallint) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as smallint) as bigint) IN (cast(cast(1 as string) as bigint)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -406,7 +406,7 @@ Project [cast(cast(1 as int) as bigint) IN (cast(cast(1 as bigint) as bigint)) A
 -- !query
 SELECT cast(1 as int) in (cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as int) as float) IN (cast(cast(1 as float) as float)) AS (CAST(1 AS INT) IN (CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as int) as double) IN (cast(cast(1 as float) as double)) AS (CAST(1 AS INT) IN (CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -439,7 +439,7 @@ Project [cast(cast(1 as int) as decimal(10,0)) IN (cast(cast(1 as decimal(10,0))
 -- !query
 SELECT cast(1 as int) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as int) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS INT) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as int) as bigint) IN (cast(cast(1 as string) as bigint)) AS (CAST(1 AS INT) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -582,7 +582,7 @@ Project [cast(1 as bigint) IN (cast(1 as bigint)) AS (CAST(1 AS BIGINT) IN (CAST
 -- !query
 SELECT cast(1 as bigint) in (cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as bigint) as float) IN (cast(cast(1 as float) as float)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as bigint) as double) IN (cast(cast(1 as float) as double)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -615,7 +615,7 @@ Project [cast(cast(1 as bigint) as decimal(20,0)) IN (cast(cast(1 as decimal(10,
 -- !query
 SELECT cast(1 as bigint) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as bigint) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as bigint) as bigint) IN (cast(cast(1 as string) as bigint)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -714,7 +714,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as float) in (cast(1 as tinyint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as tinyint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS TINYINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as tinyint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -725,7 +725,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as tinyint) as float))
 -- !query
 SELECT cast(1 as float) in (cast(1 as smallint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as smallint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS SMALLINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as smallint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -736,7 +736,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as smallint) as float))
 -- !query
 SELECT cast(1 as float) in (cast(1 as int)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as int) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS INT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as int) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -747,7 +747,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as int) as float)) AS (
 -- !query
 SELECT cast(1 as float) in (cast(1 as bigint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as bigint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS BIGINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as bigint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS BIGINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -791,7 +791,7 @@ Project [cast(cast(1 as float) as double) IN (cast(cast(1 as decimal(10,0)) as d
 -- !query
 SELECT cast(1 as float) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as string) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -967,7 +967,7 @@ Project [cast(cast(1 as double) as double) IN (cast(cast(1 as decimal(10,0)) as
 -- !query
 SELECT cast(1 as double) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as double) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS DOUBLE) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as double) as double) IN (cast(cast(1 as string) as double)) AS (CAST(1 AS DOUBLE) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1143,7 +1143,7 @@ Project [cast(1 as decimal(10,0)) IN (cast(1 as decimal(10,0))) AS (CAST(1 AS DE
 -- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as decimal(10,0)) as string) IN (cast(cast(1 as string) as string)) AS (CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as decimal(10,0)) as double) IN (cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1242,7 +1242,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as string) in (cast(1 as tinyint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as tinyint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS TINYINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as tinyint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1253,7 +1253,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as tinyint) as string
 -- !query
 SELECT cast(1 as string) in (cast(1 as smallint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as smallint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS SMALLINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as smallint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1264,7 +1264,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as smallint) as strin
 -- !query
 SELECT cast(1 as string) in (cast(1 as int)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as int) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS INT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as int) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1275,7 +1275,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as int) as string)) A
 -- !query
 SELECT cast(1 as string) in (cast(1 as bigint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as bigint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS BIGINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as bigint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS BIGINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1286,7 +1286,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as bigint) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as float) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as float) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1297,7 +1297,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as float) as string))
 -- !query
 SELECT cast(1 as string) in (cast(1 as double)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as double) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS DOUBLE)))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as double) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS DOUBLE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1308,7 +1308,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as double) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as decimal(10, 0))) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as decimal(10,0)) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS DECIMAL(10,0))))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as decimal(10,0)) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1330,51 +1330,29 @@ Project [cast(1 as string) IN (cast(1 as string)) AS (CAST(1 AS STRING) IN (CAST
 -- !query
 SELECT cast(1 as string) in (cast('1' as binary)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BINARY)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 49,
-    "fragment" : "in (cast('1' as binary))"
-  } ]
-}
+Project [cast(cast(1 as string) as binary) IN (cast(cast(1 as binary) as binary)) AS (CAST(1 AS STRING) IN (CAST(1 AS BINARY)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 48,
-    "fragment" : "in (cast(1 as boolean))"
-  } ]
-}
+Project [cast(cast(1 as string) as boolean) IN (cast(cast(1 as boolean) as boolean)) AS (CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(2017-12-11 09:30:00.0 as timestamp) as string)) AS (CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
+Project [cast(cast(1 as string) as timestamp) IN (cast(cast(2017-12-11 09:30:00.0 as timestamp) as timestamp)) AS (CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1385,7 +1363,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(2017-12-11 09:30:00.0 a
 -- !query
 SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(2017-12-11 09:30:00 as date) as string)) AS (CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00 AS DATE)))#x]
+Project [cast(cast(1 as string) as date) IN (cast(cast(2017-12-11 09:30:00 as date) as date)) AS (CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00 AS DATE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1550,23 +1528,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) in (cast(1 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 28,
-    "stopIndex" : 49,
-    "fragment" : "in (cast(1 as string))"
-  } ]
-}
+Project [cast(cast(1 as binary) as binary) IN (cast(cast(1 as string) as binary)) AS (CAST(1 AS BINARY) IN (CAST(1 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -1803,23 +1770,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT true in (cast(1 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(true IN (CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 13,
-    "stopIndex" : 34,
-    "fragment" : "in (cast(1 as string))"
-  } ]
-}
+Project [cast(true as boolean) IN (cast(cast(1 as string) as boolean)) AS (true IN (CAST(1 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -2056,7 +2012,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t
 -- !query analysis
-Project [cast(cast(2017-12-12 09:30:00.0 as timestamp) as string) IN (cast(cast(2 as string) as string)) AS (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS STRING)))#x]
+Project [cast(cast(2017-12-12 09:30:00.0 as timestamp) as timestamp) IN (cast(cast(2 as string) as timestamp)) AS (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2287,7 +2243,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t
 -- !query analysis
-Project [cast(cast(2017-12-12 09:30:00 as date) as string) IN (cast(cast(2 as string) as string)) AS (CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS STRING)))#x]
+Project [cast(cast(2017-12-12 09:30:00 as date) as date) IN (cast(cast(2 as string) as date)) AS (CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2408,7 +2364,7 @@ Project [cast(cast(1 as tinyint) as bigint) IN (cast(cast(1 as tinyint) as bigin
 -- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as tinyint) as float) IN (cast(cast(1 as tinyint) as float),cast(cast(1 as float) as float)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as tinyint) as double) IN (cast(cast(1 as tinyint) as double),cast(cast(1 as float) as double)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2441,7 +2397,7 @@ Project [cast(cast(1 as tinyint) as decimal(10,0)) IN (cast(cast(1 as tinyint) a
 -- !query
 SELECT cast(1 as tinyint) in (cast(1 as tinyint), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as tinyint) as string) IN (cast(cast(1 as tinyint) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as tinyint) as bigint) IN (cast(cast(1 as tinyint) as bigint),cast(cast(1 as string) as bigint)) AS (CAST(1 AS TINYINT) IN (CAST(1 AS TINYINT), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2584,7 +2540,7 @@ Project [cast(cast(1 as smallint) as bigint) IN (cast(cast(1 as smallint) as big
 -- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as smallint) as float) IN (cast(cast(1 as smallint) as float),cast(cast(1 as float) as float)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as smallint) as double) IN (cast(cast(1 as smallint) as double),cast(cast(1 as float) as double)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2617,7 +2573,7 @@ Project [cast(cast(1 as smallint) as decimal(10,0)) IN (cast(cast(1 as smallint)
 -- !query
 SELECT cast(1 as smallint) in (cast(1 as smallint), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as smallint) as string) IN (cast(cast(1 as smallint) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as smallint) as bigint) IN (cast(cast(1 as smallint) as bigint),cast(cast(1 as string) as bigint)) AS (CAST(1 AS SMALLINT) IN (CAST(1 AS SMALLINT), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2760,7 +2716,7 @@ Project [cast(cast(1 as int) as bigint) IN (cast(cast(1 as int) as bigint),cast(
 -- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as int) as float) IN (cast(cast(1 as int) as float),cast(cast(1 as float) as float)) AS (CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as int) as double) IN (cast(cast(1 as int) as double),cast(cast(1 as float) as double)) AS (CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2793,7 +2749,7 @@ Project [cast(cast(1 as int) as decimal(10,0)) IN (cast(cast(1 as int) as decima
 -- !query
 SELECT cast(1 as int) in (cast(1 as int), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as int) as string) IN (cast(cast(1 as int) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as int) as bigint) IN (cast(cast(1 as int) as bigint),cast(cast(1 as string) as bigint)) AS (CAST(1 AS INT) IN (CAST(1 AS INT), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2936,7 +2892,7 @@ Project [cast(1 as bigint) IN (cast(1 as bigint),cast(1 as bigint)) AS (CAST(1 A
 -- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as bigint) as float) IN (cast(cast(1 as bigint) as float),cast(cast(1 as float) as float)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as bigint) as double) IN (cast(cast(1 as bigint) as double),cast(cast(1 as float) as double)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2969,7 +2925,7 @@ Project [cast(cast(1 as bigint) as decimal(20,0)) IN (cast(cast(1 as bigint) as
 -- !query
 SELECT cast(1 as bigint) in (cast(1 as bigint), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as bigint) as string) IN (cast(cast(1 as bigint) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as bigint) as bigint) IN (cast(cast(1 as bigint) as bigint),cast(cast(1 as string) as bigint)) AS (CAST(1 AS BIGINT) IN (CAST(1 AS BIGINT), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3068,7 +3024,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as tinyint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cast(cast(1 as tinyint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS TINYINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),cast(cast(1 as tinyint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3079,7 +3035,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cas
 -- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as smallint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cast(cast(1 as smallint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS SMALLINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),cast(cast(1 as smallint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3090,7 +3046,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cas
 -- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as int)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cast(cast(1 as int) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS INT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),cast(cast(1 as int) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3101,7 +3057,7 @@ Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cas
 -- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as bigint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as float) IN (cast(cast(1 as float) as float),cast(cast(1 as bigint) as float)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BIGINT)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),cast(cast(1 as bigint) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS BIGINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3145,7 +3101,7 @@ Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),c
 -- !query
 SELECT cast(1 as float) in (cast(1 as float), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as float) as string) IN (cast(cast(1 as float) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as float) as double) IN (cast(cast(1 as float) as double),cast(cast(1 as string) as double)) AS (CAST(1 AS FLOAT) IN (CAST(1 AS FLOAT), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3321,7 +3277,7 @@ Project [cast(cast(1 as double) as double) IN (cast(cast(1 as double) as double)
 -- !query
 SELECT cast(1 as double) in (cast(1 as double), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as double) as string) IN (cast(cast(1 as double) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as double) as double) IN (cast(cast(1 as double) as double),cast(cast(1 as string) as double)) AS (CAST(1 AS DOUBLE) IN (CAST(1 AS DOUBLE), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3497,7 +3453,7 @@ Project [cast(1 as decimal(10,0)) IN (cast(1 as decimal(10,0)),cast(1 as decimal
 -- !query
 SELECT cast(1 as decimal(10, 0)) in (cast(1 as decimal(10, 0)), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(1 as decimal(10,0)) as string) IN (cast(cast(1 as decimal(10,0)) as string),cast(cast(1 as string) as string)) AS (CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS STRING)))#x]
+Project [cast(cast(1 as decimal(10,0)) as double) IN (cast(cast(1 as decimal(10,0)) as double),cast(cast(1 as string) as double)) AS (CAST(1 AS DECIMAL(10,0)) IN (CAST(1 AS DECIMAL(10,0)), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3596,7 +3552,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as tinyint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as tinyint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS TINYINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as string) as bigint),cast(cast(1 as tinyint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3607,7 +3563,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as smallint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as smallint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS SMALLINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as string) as bigint),cast(cast(1 as smallint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3618,7 +3574,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as int)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as int) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS INT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as string) as bigint),cast(cast(1 as int) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3629,7 +3585,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as bigint)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as bigint) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BIGINT)))#x]
+Project [cast(cast(1 as string) as bigint) IN (cast(cast(1 as string) as bigint),cast(cast(1 as bigint) as bigint)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BIGINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3640,7 +3596,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as float)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as float) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS FLOAT)))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as string) as double),cast(cast(1 as float) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3651,7 +3607,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as double)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as double) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS DOUBLE)))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as string) as double),cast(cast(1 as double) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS DOUBLE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3662,7 +3618,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as decimal(10, 0))) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(1 as decimal(10,0)) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS DECIMAL(10,0))))#x]
+Project [cast(cast(1 as string) as double) IN (cast(cast(1 as string) as double),cast(cast(1 as decimal(10,0)) as double)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3684,51 +3640,29 @@ Project [cast(1 as string) IN (cast(1 as string),cast(1 as string)) AS (CAST(1 A
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"STRING\", \"BINARY\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BINARY)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 68,
-    "fragment" : "in (cast(1 as string), cast('1' as binary))"
-  } ]
-}
+Project [cast(cast(1 as string) as binary) IN (cast(cast(1 as string) as binary),cast(cast(1 as binary) as binary)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BINARY)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"STRING\", \"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 67,
-    "fragment" : "in (cast(1 as string), cast(1 as boolean))"
-  } ]
-}
+Project [cast(cast(1 as string) as boolean) IN (cast(cast(1 as string) as boolean),cast(cast(1 as boolean) as boolean)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(2017-12-11 09:30:00.0 as timestamp) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
+Project [cast(cast(1 as string) as timestamp) IN (cast(cast(1 as string) as timestamp),cast(cast(2017-12-11 09:30:00.0 as timestamp) as timestamp)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3739,7 +3673,7 @@ Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string)
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query analysis
-Project [cast(cast(1 as string) as string) IN (cast(cast(1 as string) as string),cast(cast(2017-12-11 09:30:00 as date) as string)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE)))#x]
+Project [cast(cast(1 as string) as date) IN (cast(cast(1 as string) as date),cast(cast(2017-12-11 09:30:00 as date) as date)) AS (CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3904,23 +3838,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"BINARY\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 28,
-    "stopIndex" : 70,
-    "fragment" : "in (cast('1' as binary), cast(1 as string))"
-  } ]
-}
+Project [cast(cast(1 as binary) as binary) IN (cast(cast(1 as binary) as binary),cast(cast(1 as string) as binary)) AS (CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -4157,23 +4080,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 29,
-    "stopIndex" : 72,
-    "fragment" : "in (cast('1' as boolean), cast(1 as string))"
-  } ]
-}
+Project [cast(cast(1 as boolean) as boolean) IN (cast(cast(1 as boolean) as boolean),cast(cast(1 as string) as boolean)) AS (CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS STRING)))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -4410,7 +4322,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast('2017-12-12 09:30:00.0' as timestamp), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(2017-12-12 09:30:00.0 as timestamp) as string) IN (cast(cast(2017-12-12 09:30:00.0 as timestamp) as string),cast(cast(1 as string) as string)) AS (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS STRING)))#x]
+Project [cast(cast(2017-12-12 09:30:00.0 as timestamp) as timestamp) IN (cast(cast(2017-12-12 09:30:00.0 as timestamp) as timestamp),cast(cast(1 as string) as timestamp)) AS (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -4641,7 +4553,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast('2017-12-12 09:30:00' as date), cast(1 as string)) FROM t
 -- !query analysis
-Project [cast(cast(2017-12-12 09:30:00 as date) as string) IN (cast(cast(2017-12-12 09:30:00 as date) as string),cast(cast(1 as string) as string)) AS (CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS STRING)))#x]
+Project [cast(cast(2017-12-12 09:30:00 as date) as date) IN (cast(cast(2017-12-12 09:30:00 as date) as date),cast(cast(1 as string) as date)) AS (CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2017-12-12 09:30:00 AS DATE), CAST(1 AS STRING)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
index 0f72b0cf8a0e3..13cca708b8cbc 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapZipWith.sql.out
@@ -193,52 +193,96 @@ Project [map_zip_with(cast(decimal_map2#x as map<double,decimal(36,35)>), double
 SELECT map_zip_with(string_map1, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query analysis
-Project [map_zip_with(string_map1#x, cast(int_map#x as map<string,int>), lambdafunction(struct(k, lambda k#x, v1, lambda v1#x, v2, lambda v2#x), lambda k#x, lambda v1#x, lambda v2#x, false)) AS m#x]
-+- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x])
-      +- Project [cast(boolean_map#x as map<boolean,boolean>) AS boolean_map#x, cast(tinyint_map#x as map<tinyint,tinyint>) AS tinyint_map#x, cast(smallint_map#x as map<smallint,smallint>) AS smallint_map#x, cast(int_map#x as map<int,int>) AS int_map#x, cast(bigint_map#x as map<bigint,bigint>) AS bigint_map#x, cast(decimal_map1#x as map<decimal(36,0),decimal(36,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(36,35),decimal(36,35)>) AS decimal_map2#x, cast(double_map#x as map<double,double>) AS double_map#x, cast(float_map#x as map<float,float>) AS float_map#x, cast(date_map#x as map<date,date>) AS date_map#x, cast(timestamp_map#x as map<timestamp,timestamp>) AS timestamp_map#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(string_map3#x as map<string,string>) AS string_map3#x, cast(string_map4#x as map<string,string>) AS string_map4#x, cast(array_map1#x as map<array<bigint>,array<bigint>>) AS array_map1#x, cast(array_map2#x as map<array<int>,array<int>>) AS array_map2#x, cast(struct_map1#x as map<struct<col1:smallint,col2:bigint>,struct<col1:smallint,col2:bigint>>) AS struct_map1#x, cast(struct_map2#x as map<struct<col1:int,col2:int>,struct<col1:int,col2:int>>) AS struct_map2#x]
-         +- Project [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
-            +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"STRING\"",
+    "rightType" : "\"INT\"",
+    "sqlExpr" : "\"map_zip_with(string_map1, int_map, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "map_zip_with(string_map1, int_map, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(string_map2, date_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query analysis
-Project [map_zip_with(string_map2#x, cast(date_map#x as map<string,date>), lambdafunction(struct(k, lambda k#x, v1, lambda v1#x, v2, lambda v2#x), lambda k#x, lambda v1#x, lambda v2#x, false)) AS m#x]
-+- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x])
-      +- Project [cast(boolean_map#x as map<boolean,boolean>) AS boolean_map#x, cast(tinyint_map#x as map<tinyint,tinyint>) AS tinyint_map#x, cast(smallint_map#x as map<smallint,smallint>) AS smallint_map#x, cast(int_map#x as map<int,int>) AS int_map#x, cast(bigint_map#x as map<bigint,bigint>) AS bigint_map#x, cast(decimal_map1#x as map<decimal(36,0),decimal(36,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(36,35),decimal(36,35)>) AS decimal_map2#x, cast(double_map#x as map<double,double>) AS double_map#x, cast(float_map#x as map<float,float>) AS float_map#x, cast(date_map#x as map<date,date>) AS date_map#x, cast(timestamp_map#x as map<timestamp,timestamp>) AS timestamp_map#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(string_map3#x as map<string,string>) AS string_map3#x, cast(string_map4#x as map<string,string>) AS string_map4#x, cast(array_map1#x as map<array<bigint>,array<bigint>>) AS array_map1#x, cast(array_map2#x as map<array<int>,array<int>>) AS array_map2#x, cast(struct_map1#x as map<struct<col1:smallint,col2:bigint>,struct<col1:smallint,col2:bigint>>) AS struct_map1#x, cast(struct_map2#x as map<struct<col1:int,col2:int>,struct<col1:int,col2:int>>) AS struct_map2#x]
-         +- Project [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
-            +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"STRING\"",
+    "rightType" : "\"DATE\"",
+    "sqlExpr" : "\"map_zip_with(string_map2, date_map, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "map_zip_with(string_map2, date_map, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(timestamp_map, string_map3, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query analysis
-Project [map_zip_with(cast(timestamp_map#x as map<string,timestamp>), string_map3#x, lambdafunction(struct(k, lambda k#x, v1, lambda v1#x, v2, lambda v2#x), lambda k#x, lambda v1#x, lambda v2#x, false)) AS m#x]
-+- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x])
-      +- Project [cast(boolean_map#x as map<boolean,boolean>) AS boolean_map#x, cast(tinyint_map#x as map<tinyint,tinyint>) AS tinyint_map#x, cast(smallint_map#x as map<smallint,smallint>) AS smallint_map#x, cast(int_map#x as map<int,int>) AS int_map#x, cast(bigint_map#x as map<bigint,bigint>) AS bigint_map#x, cast(decimal_map1#x as map<decimal(36,0),decimal(36,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(36,35),decimal(36,35)>) AS decimal_map2#x, cast(double_map#x as map<double,double>) AS double_map#x, cast(float_map#x as map<float,float>) AS float_map#x, cast(date_map#x as map<date,date>) AS date_map#x, cast(timestamp_map#x as map<timestamp,timestamp>) AS timestamp_map#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(string_map3#x as map<string,string>) AS string_map3#x, cast(string_map4#x as map<string,string>) AS string_map4#x, cast(array_map1#x as map<array<bigint>,array<bigint>>) AS array_map1#x, cast(array_map2#x as map<array<int>,array<int>>) AS array_map2#x, cast(struct_map1#x as map<struct<col1:smallint,col2:bigint>,struct<col1:smallint,col2:bigint>>) AS struct_map1#x, cast(struct_map2#x as map<struct<col1:int,col2:int>,struct<col1:int,col2:int>>) AS struct_map2#x]
-         +- Project [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
-            +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"TIMESTAMP\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_zip_with(timestamp_map, string_map3, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "map_zip_with(timestamp_map, string_map3, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(decimal_map1, string_map4, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query analysis
-Project [map_zip_with(cast(decimal_map1#x as map<string,decimal(36,0)>), string_map4#x, lambdafunction(struct(k, lambda k#x, v1, lambda v1#x, v2, lambda v2#x), lambda k#x, lambda v1#x, lambda v2#x, false)) AS m#x]
-+- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x])
-      +- Project [cast(boolean_map#x as map<boolean,boolean>) AS boolean_map#x, cast(tinyint_map#x as map<tinyint,tinyint>) AS tinyint_map#x, cast(smallint_map#x as map<smallint,smallint>) AS smallint_map#x, cast(int_map#x as map<int,int>) AS int_map#x, cast(bigint_map#x as map<bigint,bigint>) AS bigint_map#x, cast(decimal_map1#x as map<decimal(36,0),decimal(36,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(36,35),decimal(36,35)>) AS decimal_map2#x, cast(double_map#x as map<double,double>) AS double_map#x, cast(float_map#x as map<float,float>) AS float_map#x, cast(date_map#x as map<date,date>) AS date_map#x, cast(timestamp_map#x as map<timestamp,timestamp>) AS timestamp_map#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(string_map3#x as map<string,string>) AS string_map3#x, cast(string_map4#x as map<string,string>) AS string_map4#x, cast(array_map1#x as map<array<bigint>,array<bigint>>) AS array_map1#x, cast(array_map2#x as map<array<int>,array<int>>) AS array_map2#x, cast(struct_map1#x as map<struct<col1:smallint,col2:bigint>,struct<col1:smallint,col2:bigint>>) AS struct_map1#x, cast(struct_map2#x as map<struct<col1:int,col2:int>,struct<col1:int,col2:int>>) AS struct_map2#x]
-         +- Project [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
-            +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map#x, tinyint_map#x, smallint_map#x, int_map#x, bigint_map#x, decimal_map1#x, decimal_map2#x, double_map#x, float_map#x, date_map#x, timestamp_map#x, string_map1#x, string_map2#x, string_map3#x, string_map4#x, array_map1#x, array_map2#x, struct_map1#x, struct_map2#x]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"DECIMAL(36,0)\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_zip_with(decimal_map1, string_map4, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "map_zip_with(decimal_map1, string_map4, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
index dd3e56fe9322d..b1a3dc46dabac 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/mapconcat.sql.out
@@ -117,13 +117,23 @@ SELECT
     map_concat(int_string_map1, tinyint_map2) istt_map
 FROM various_maps
 -- !query analysis
-Project [map_concat(cast(tinyint_map1#x as map<smallint,smallint>), smallint_map2#x) AS ts_map#x, map_concat(cast(smallint_map1#x as map<int,int>), int_map2#x) AS si_map#x, map_concat(cast(int_map1#x as map<bigint,bigint>), bigint_map2#x) AS ib_map#x, map_concat(cast(bigint_map1#x as map<decimal(20,0),decimal(20,0)>), cast(decimal_map2#x as map<decimal(20,0),decimal(20,0)>)) AS bd_map#x, map_concat(cast(decimal_map1#x as map<double,double>), cast(float_map2#x as map<double,double>)) AS df_map#x, map_concat(string_map1#x, cast(date_map2#x as map<string,string>)) AS std_map#x, map_concat(cast(timestamp_map1#x as map<string,string>), string_map2#x) AS tst_map#x, map_concat(string_map1#x, cast(int_map2#x as map<string,string>)) AS sti_map#x, map_concat(int_string_map1#x, cast(tinyint_map2#x as map<int,string>)) AS istt_map#x]
-+- SubqueryAlias various_maps
-   +- View (`various_maps`, [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields])
-      +- Project [cast(boolean_map1#x as map<boolean,boolean>) AS boolean_map1#x, cast(boolean_map2#x as map<boolean,boolean>) AS boolean_map2#x, cast(tinyint_map1#x as map<tinyint,tinyint>) AS tinyint_map1#x, cast(tinyint_map2#x as map<tinyint,tinyint>) AS tinyint_map2#x, cast(smallint_map1#x as map<smallint,smallint>) AS smallint_map1#x, cast(smallint_map2#x as map<smallint,smallint>) AS smallint_map2#x, cast(int_map1#x as map<int,int>) AS int_map1#x, cast(int_map2#x as map<int,int>) AS int_map2#x, cast(bigint_map1#x as map<bigint,bigint>) AS bigint_map1#x, cast(bigint_map2#x as map<bigint,bigint>) AS bigint_map2#x, cast(decimal_map1#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map1#x, cast(decimal_map2#x as map<decimal(19,0),decimal(19,0)>) AS decimal_map2#x, cast(double_map1#x as map<double,double>) AS double_map1#x, cast(double_map2#x as map<double,double>) AS double_map2#x, cast(float_map1#x as map<float,float>) AS float_map1#x, cast(float_map2#x as map<float,float>) AS float_map2#x, cast(date_map1#x as map<date,date>) AS date_map1#x, cast(date_map2#x as map<date,date>) AS date_map2#x, cast(timestamp_map1#x as map<timestamp,timestamp>) AS timestamp_map1#x, cast(timestamp_map2#x as map<timestamp,timestamp>) AS timestamp_map2#x, cast(string_map1#x as map<string,string>) AS string_map1#x, cast(string_map2#x as map<string,string>) AS string_map2#x, cast(array_map1#x as map<array<string>,array<string>>) AS array_map1#x, cast(array_map2#x as map<array<string>,array<string>>) AS array_map2#x, ... 6 more fields]
-         +- Project [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
-            +- SubqueryAlias various_maps
-               +- LocalRelation [boolean_map1#x, boolean_map2#x, tinyint_map1#x, tinyint_map2#x, smallint_map1#x, smallint_map2#x, int_map1#x, int_map2#x, bigint_map1#x, bigint_map2#x, decimal_map1#x, decimal_map2#x, double_map1#x, double_map2#x, float_map1#x, float_map2#x, date_map1#x, date_map2#x, timestamp_map1#x, timestamp_map2#x, string_map1#x, string_map2#x, array_map1#x, array_map2#x, ... 6 more fields]
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<STRING, STRING>\" or \"MAP<DATE, DATE>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(string_map1, date_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 257,
+    "stopIndex" : 290,
+    "fragment" : "map_concat(string_map1, date_map2)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
index ccd34cfaeb67f..a3420c3cb0635 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/promoteStrings.sql.out
@@ -10,7 +10,7 @@ CreateViewCommand `t`, SELECT 1, false, false, LocalTempView, UNSUPPORTED, true
 -- !query
 SELECT '1' + cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(cast(1 as tinyint) as double)) AS (1 + CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) + cast(cast(1 as tinyint) as bigint)) AS (1 + CAST(1 AS TINYINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -21,7 +21,7 @@ Project [(cast(1 as double) + cast(cast(1 as tinyint) as double)) AS (1 + CAST(1
 -- !query
 SELECT '1' + cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(cast(1 as smallint) as double)) AS (1 + CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) + cast(cast(1 as smallint) as bigint)) AS (1 + CAST(1 AS SMALLINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -32,7 +32,7 @@ Project [(cast(1 as double) + cast(cast(1 as smallint) as double)) AS (1 + CAST(
 -- !query
 SELECT '1' + cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(cast(1 as int) as double)) AS (1 + CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) + cast(cast(1 as int) as bigint)) AS (1 + CAST(1 AS INT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -43,7 +43,7 @@ Project [(cast(1 as double) + cast(cast(1 as int) as double)) AS (1 + CAST(1 AS
 -- !query
 SELECT '1' + cast(1 as bigint)                          FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(cast(1 as bigint) as double)) AS (1 + CAST(1 AS BIGINT))#x]
+Project [(cast(1 as bigint) + cast(1 as bigint)) AS (1 + CAST(1 AS BIGINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -87,12 +87,23 @@ Project [(cast(1 as double) + cast(cast(1 as decimal(10,0)) as double)) AS (1 +
 -- !query
 SELECT '1' + '1'                                        FROM t
 -- !query analysis
-Project [(cast(1 as double) + cast(1 as double)) AS (1 + 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -100,11 +111,11 @@ SELECT '1' + cast('1' as binary)                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -122,11 +133,11 @@ SELECT '1' + cast(1 as boolean)                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -144,11 +155,11 @@ SELECT '1' + cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -169,11 +180,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"CAST(2017-12-11 09:30:00 AS DATE)\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+    "sqlExpr" : "\"date_add(1, CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -188,7 +199,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' - cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(cast(1 as tinyint) as double)) AS (1 - CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) - cast(cast(1 as tinyint) as bigint)) AS (1 - CAST(1 AS TINYINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -199,7 +210,7 @@ Project [(cast(1 as double) - cast(cast(1 as tinyint) as double)) AS (1 - CAST(1
 -- !query
 SELECT '1' - cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(cast(1 as smallint) as double)) AS (1 - CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) - cast(cast(1 as smallint) as bigint)) AS (1 - CAST(1 AS SMALLINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -210,7 +221,7 @@ Project [(cast(1 as double) - cast(cast(1 as smallint) as double)) AS (1 - CAST(
 -- !query
 SELECT '1' - cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(cast(1 as int) as double)) AS (1 - CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) - cast(cast(1 as int) as bigint)) AS (1 - CAST(1 AS INT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -221,7 +232,7 @@ Project [(cast(1 as double) - cast(cast(1 as int) as double)) AS (1 - CAST(1 AS
 -- !query
 SELECT '1' - cast(1 as bigint)                          FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(cast(1 as bigint) as double)) AS (1 - CAST(1 AS BIGINT))#x]
+Project [(cast(1 as bigint) - cast(1 as bigint)) AS (1 - CAST(1 AS BIGINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -265,12 +276,23 @@ Project [(cast(1 as double) - cast(cast(1 as decimal(10,0)) as double)) AS (1 -
 -- !query
 SELECT '1' - '1'                                        FROM t
 -- !query analysis
-Project [(cast(1 as double) - cast(1 as double)) AS (1 - 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' - '1'"
+  } ]
+}
 
 
 -- !query
@@ -278,11 +300,11 @@ SELECT '1' - cast('1' as binary)                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 - CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -300,11 +322,11 @@ SELECT '1' - cast(1 as boolean)                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 - CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -320,25 +342,12 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(1 - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "'1' - cast('2017-12-11 09:30:00.0' as timestamp)"
-  } ]
-}
+Project [(cast(1 as timestamp) - cast(2017-12-11 09:30:00.0 as timestamp)) AS (1 - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
@@ -355,7 +364,7 @@ Project [(cast(1 as date) - cast(2017-12-11 09:30:00 as date)) AS (1 - CAST(2017
 -- !query
 SELECT '1' * cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(cast(1 as tinyint) as double)) AS (1 * CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) * cast(cast(1 as tinyint) as bigint)) AS (1 * CAST(1 AS TINYINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -366,7 +375,7 @@ Project [(cast(1 as double) * cast(cast(1 as tinyint) as double)) AS (1 * CAST(1
 -- !query
 SELECT '1' * cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(cast(1 as smallint) as double)) AS (1 * CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) * cast(cast(1 as smallint) as bigint)) AS (1 * CAST(1 AS SMALLINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -377,7 +386,7 @@ Project [(cast(1 as double) * cast(cast(1 as smallint) as double)) AS (1 * CAST(
 -- !query
 SELECT '1' * cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(cast(1 as int) as double)) AS (1 * CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) * cast(cast(1 as int) as bigint)) AS (1 * CAST(1 AS INT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -388,7 +397,7 @@ Project [(cast(1 as double) * cast(cast(1 as int) as double)) AS (1 * CAST(1 AS
 -- !query
 SELECT '1' * cast(1 as bigint)                          FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(cast(1 as bigint) as double)) AS (1 * CAST(1 AS BIGINT))#x]
+Project [(cast(1 as bigint) * cast(1 as bigint)) AS (1 * CAST(1 AS BIGINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -432,12 +441,23 @@ Project [(cast(1 as double) * cast(cast(1 as decimal(10,0)) as double)) AS (1 *
 -- !query
 SELECT '1' * '1'                                        FROM t
 -- !query analysis
-Project [(cast(1 as double) * cast(1 as double)) AS (1 * 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"(1 * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' * '1'"
+  } ]
+}
 
 
 -- !query
@@ -445,11 +465,11 @@ SELECT '1' * cast('1' as binary)                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -467,11 +487,11 @@ SELECT '1' * cast(1 as boolean)                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -489,11 +509,11 @@ SELECT '1' * cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -511,11 +531,11 @@ SELECT '1' * cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -531,7 +551,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' / cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as double) / cast(cast(1 as tinyint) as double)) AS (1 / CAST(1 AS TINYINT))#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(cast(1 as tinyint) as bigint) as double)) AS (1 / CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -542,7 +562,7 @@ Project [(cast(1 as double) / cast(cast(1 as tinyint) as double)) AS (1 / CAST(1
 -- !query
 SELECT '1' / cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as double) / cast(cast(1 as smallint) as double)) AS (1 / CAST(1 AS SMALLINT))#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(cast(1 as smallint) as bigint) as double)) AS (1 / CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -553,7 +573,7 @@ Project [(cast(1 as double) / cast(cast(1 as smallint) as double)) AS (1 / CAST(
 -- !query
 SELECT '1' / cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as double) / cast(cast(1 as int) as double)) AS (1 / CAST(1 AS INT))#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(cast(1 as int) as bigint) as double)) AS (1 / CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -564,7 +584,7 @@ Project [(cast(1 as double) / cast(cast(1 as int) as double)) AS (1 / CAST(1 AS
 -- !query
 SELECT '1' / cast(1 as bigint)                          FROM t
 -- !query analysis
-Project [(cast(1 as double) / cast(cast(1 as bigint) as double)) AS (1 / CAST(1 AS BIGINT))#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(1 as bigint) as double)) AS (1 / CAST(1 AS BIGINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -608,12 +628,23 @@ Project [(cast(1 as double) / cast(cast(1 as decimal(10,0)) as double)) AS (1 /
 -- !query
 SELECT '1' / '1'                                        FROM t
 -- !query analysis
-Project [(cast(1 as double) / cast(1 as double)) AS (1 / 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(1 / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' / '1'"
+  } ]
+}
 
 
 -- !query
@@ -621,11 +652,11 @@ SELECT '1' / cast('1' as binary)                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -643,11 +674,11 @@ SELECT '1' / cast(1 as boolean)                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -665,11 +696,11 @@ SELECT '1' / cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -687,11 +718,11 @@ SELECT '1' / cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -707,7 +738,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' % cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as double) % cast(cast(1 as tinyint) as double)) AS (1 % CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) % cast(cast(1 as tinyint) as bigint)) AS (1 % CAST(1 AS TINYINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -718,7 +749,7 @@ Project [(cast(1 as double) % cast(cast(1 as tinyint) as double)) AS (1 % CAST(1
 -- !query
 SELECT '1' % cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as double) % cast(cast(1 as smallint) as double)) AS (1 % CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) % cast(cast(1 as smallint) as bigint)) AS (1 % CAST(1 AS SMALLINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -729,7 +760,7 @@ Project [(cast(1 as double) % cast(cast(1 as smallint) as double)) AS (1 % CAST(
 -- !query
 SELECT '1' % cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as double) % cast(cast(1 as int) as double)) AS (1 % CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) % cast(cast(1 as int) as bigint)) AS (1 % CAST(1 AS INT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -740,7 +771,7 @@ Project [(cast(1 as double) % cast(cast(1 as int) as double)) AS (1 % CAST(1 AS
 -- !query
 SELECT '1' % cast(1 as bigint)                          FROM t
 -- !query analysis
-Project [(cast(1 as double) % cast(cast(1 as bigint) as double)) AS (1 % CAST(1 AS BIGINT))#x]
+Project [(cast(1 as bigint) % cast(1 as bigint)) AS (1 % CAST(1 AS BIGINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -784,12 +815,23 @@ Project [(cast(1 as double) % cast(cast(1 as decimal(10,0)) as double)) AS (1 %
 -- !query
 SELECT '1' % '1'                                        FROM t
 -- !query analysis
-Project [(cast(1 as double) % cast(1 as double)) AS (1 % 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"(1 % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' % '1'"
+  } ]
+}
 
 
 -- !query
@@ -797,11 +839,11 @@ SELECT '1' % cast('1' as binary)                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -819,11 +861,11 @@ SELECT '1' % cast(1 as boolean)                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -841,11 +883,11 @@ SELECT '1' % cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -863,11 +905,11 @@ SELECT '1' % cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -883,7 +925,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT pmod('1', cast(1 as tinyint))                         FROM t
 -- !query analysis
-Project [pmod(cast(1 as double), cast(cast(1 as tinyint) as double)) AS pmod(1, CAST(1 AS TINYINT))#x]
+Project [pmod(cast(1 as bigint), cast(cast(1 as tinyint) as bigint)) AS pmod(1, CAST(1 AS TINYINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -894,7 +936,7 @@ Project [pmod(cast(1 as double), cast(cast(1 as tinyint) as double)) AS pmod(1,
 -- !query
 SELECT pmod('1', cast(1 as smallint))                        FROM t
 -- !query analysis
-Project [pmod(cast(1 as double), cast(cast(1 as smallint) as double)) AS pmod(1, CAST(1 AS SMALLINT))#x]
+Project [pmod(cast(1 as bigint), cast(cast(1 as smallint) as bigint)) AS pmod(1, CAST(1 AS SMALLINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -905,7 +947,7 @@ Project [pmod(cast(1 as double), cast(cast(1 as smallint) as double)) AS pmod(1,
 -- !query
 SELECT pmod('1', cast(1 as int))                             FROM t
 -- !query analysis
-Project [pmod(cast(1 as double), cast(cast(1 as int) as double)) AS pmod(1, CAST(1 AS INT))#x]
+Project [pmod(cast(1 as bigint), cast(cast(1 as int) as bigint)) AS pmod(1, CAST(1 AS INT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -916,7 +958,7 @@ Project [pmod(cast(1 as double), cast(cast(1 as int) as double)) AS pmod(1, CAST
 -- !query
 SELECT pmod('1', cast(1 as bigint))                          FROM t
 -- !query analysis
-Project [pmod(cast(1 as double), cast(cast(1 as bigint) as double)) AS pmod(1, CAST(1 AS BIGINT))#x]
+Project [pmod(cast(1 as bigint), cast(1 as bigint)) AS pmod(1, CAST(1 AS BIGINT))#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -960,12 +1002,23 @@ Project [pmod(cast(1 as double), cast(cast(1 as decimal(10,0)) as double)) AS pm
 -- !query
 SELECT pmod('1', '1')                                        FROM t
 -- !query analysis
-Project [pmod(cast(1 as double), cast(1 as double)) AS pmod(1, 1)#x]
-+- SubqueryAlias t
-   +- View (`t`, [1#x])
-      +- Project [cast(1#x as int) AS 1#x]
-         +- Project [1 AS 1#x]
-            +- OneRowRelation
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"pmod(1, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "pmod('1', '1')"
+  } ]
+}
 
 
 -- !query
@@ -973,11 +1026,11 @@ SELECT pmod('1', cast('1' as binary))                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -995,11 +1048,11 @@ SELECT pmod('1', cast(1 as boolean))                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -1017,11 +1070,11 @@ SELECT pmod('1', cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -1039,11 +1092,11 @@ SELECT pmod('1', cast('2017-12-11 09:30:00' as date))        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -1059,7 +1112,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         + '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) + cast(1 as double)) AS (CAST(1 AS TINYINT) + 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) + cast(1 as bigint)) AS (CAST(1 AS TINYINT) + 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1070,7 +1123,7 @@ Project [(cast(cast(1 as tinyint) as double) + cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as smallint)                        + '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) + cast(1 as double)) AS (CAST(1 AS SMALLINT) + 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) + cast(1 as bigint)) AS (CAST(1 AS SMALLINT) + 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1081,7 +1134,7 @@ Project [(cast(cast(1 as smallint) as double) + cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as int)                             + '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) + cast(1 as double)) AS (CAST(1 AS INT) + 1)#x]
+Project [(cast(cast(1 as int) as bigint) + cast(1 as bigint)) AS (CAST(1 AS INT) + 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1092,7 +1145,7 @@ Project [(cast(cast(1 as int) as double) + cast(1 as double)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as bigint)                          + '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) + cast(1 as double)) AS (CAST(1 AS BIGINT) + 1)#x]
+Project [(cast(1 as bigint) + cast(1 as bigint)) AS (CAST(1 AS BIGINT) + 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1138,11 +1191,11 @@ SELECT cast('1' as binary)                        + '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) + 1)\""
   },
   "queryContext" : [ {
@@ -1160,11 +1213,11 @@ SELECT cast(1 as boolean)                         + '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) + 1)\""
   },
   "queryContext" : [ {
@@ -1182,11 +1235,11 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) + '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + 1)\""
   },
   "queryContext" : [ {
@@ -1208,7 +1261,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
@@ -1226,7 +1279,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         - '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) - cast(1 as double)) AS (CAST(1 AS TINYINT) - 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) - cast(1 as bigint)) AS (CAST(1 AS TINYINT) - 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1237,7 +1290,7 @@ Project [(cast(cast(1 as tinyint) as double) - cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as smallint)                        - '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) - cast(1 as double)) AS (CAST(1 AS SMALLINT) - 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) - cast(1 as bigint)) AS (CAST(1 AS SMALLINT) - 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1248,7 +1301,7 @@ Project [(cast(cast(1 as smallint) as double) - cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as int)                             - '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) - cast(1 as double)) AS (CAST(1 AS INT) - 1)#x]
+Project [(cast(cast(1 as int) as bigint) - cast(1 as bigint)) AS (CAST(1 AS INT) - 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1259,7 +1312,7 @@ Project [(cast(cast(1 as int) as double) - cast(1 as double)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as bigint)                          - '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) - cast(1 as double)) AS (CAST(1 AS BIGINT) - 1)#x]
+Project [(cast(1 as bigint) - cast(1 as bigint)) AS (CAST(1 AS BIGINT) - 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1305,11 +1358,11 @@ SELECT cast('1' as binary)                        - '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) - 1)\""
   },
   "queryContext" : [ {
@@ -1327,11 +1380,11 @@ SELECT cast(1 as boolean)                         - '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) - 1)\""
   },
   "queryContext" : [ {
@@ -1347,55 +1400,29 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - 1)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) - '1'"
-  } ]
-}
+Project [(cast(2017-12-11 09:30:00.0 as timestamp) - cast(1 as timestamp)) AS (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - 1)#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "cast('2017-12-11 09:30:00' as date)        - '1'"
-  } ]
-}
+Project [(cast(2017-12-11 09:30:00 as date) - cast(1 as date)) AS (CAST(2017-12-11 09:30:00 AS DATE) - 1)#x]
++- SubqueryAlias t
+   +- View (`t`, [1#x])
+      +- Project [cast(1#x as int) AS 1#x]
+         +- Project [1 AS 1#x]
+            +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as tinyint)                         * '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) * cast(1 as double)) AS (CAST(1 AS TINYINT) * 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) * cast(1 as bigint)) AS (CAST(1 AS TINYINT) * 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1406,7 +1433,7 @@ Project [(cast(cast(1 as tinyint) as double) * cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as smallint)                        * '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) * cast(1 as double)) AS (CAST(1 AS SMALLINT) * 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) * cast(1 as bigint)) AS (CAST(1 AS SMALLINT) * 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1417,7 +1444,7 @@ Project [(cast(cast(1 as smallint) as double) * cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as int)                             * '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) * cast(1 as double)) AS (CAST(1 AS INT) * 1)#x]
+Project [(cast(cast(1 as int) as bigint) * cast(1 as bigint)) AS (CAST(1 AS INT) * 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1428,7 +1455,7 @@ Project [(cast(cast(1 as int) as double) * cast(1 as double)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as bigint)                          * '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) * cast(1 as double)) AS (CAST(1 AS BIGINT) * 1)#x]
+Project [(cast(1 as bigint) * cast(1 as bigint)) AS (CAST(1 AS BIGINT) * 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1474,11 +1501,11 @@ SELECT cast('1' as binary)                        * '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BINARY) * 1)\""
   },
   "queryContext" : [ {
@@ -1496,11 +1523,11 @@ SELECT cast(1 as boolean)                         * '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) * 1)\""
   },
   "queryContext" : [ {
@@ -1518,11 +1545,11 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) * '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) * 1)\""
   },
   "queryContext" : [ {
@@ -1540,11 +1567,11 @@ SELECT cast('2017-12-11 09:30:00' as date)        * '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) * 1)\""
   },
   "queryContext" : [ {
@@ -1560,7 +1587,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         / '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) / cast(cast(1 as double) as double)) AS (CAST(1 AS TINYINT) / 1)#x]
+Project [(cast(cast(cast(1 as tinyint) as bigint) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS TINYINT) / 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1571,7 +1598,7 @@ Project [(cast(cast(1 as tinyint) as double) / cast(cast(1 as double) as double)
 -- !query
 SELECT cast(1 as smallint)                        / '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) / cast(cast(1 as double) as double)) AS (CAST(1 AS SMALLINT) / 1)#x]
+Project [(cast(cast(cast(1 as smallint) as bigint) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS SMALLINT) / 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1582,7 +1609,7 @@ Project [(cast(cast(1 as smallint) as double) / cast(cast(1 as double) as double
 -- !query
 SELECT cast(1 as int)                             / '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) / cast(cast(1 as double) as double)) AS (CAST(1 AS INT) / 1)#x]
+Project [(cast(cast(cast(1 as int) as bigint) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS INT) / 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1593,7 +1620,7 @@ Project [(cast(cast(1 as int) as double) / cast(cast(1 as double) as double)) AS
 -- !query
 SELECT cast(1 as bigint)                          / '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) / cast(cast(1 as double) as double)) AS (CAST(1 AS BIGINT) / 1)#x]
+Project [(cast(cast(1 as bigint) as double) / cast(cast(1 as bigint) as double)) AS (CAST(1 AS BIGINT) / 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1604,7 +1631,7 @@ Project [(cast(cast(1 as bigint) as double) / cast(cast(1 as double) as double))
 -- !query
 SELECT cast(1 as float)                           / '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as float) as double) / cast(cast(1 as double) as double)) AS (CAST(1 AS FLOAT) / 1)#x]
+Project [(cast(cast(1 as float) as double) / cast(1 as double)) AS (CAST(1 AS FLOAT) / 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1639,11 +1666,11 @@ SELECT cast('1' as binary)                        / '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) / 1)\""
   },
   "queryContext" : [ {
@@ -1661,11 +1688,11 @@ SELECT cast(1 as boolean)                         / '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / 1)\""
   },
   "queryContext" : [ {
@@ -1683,11 +1710,11 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) / '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / 1)\""
   },
   "queryContext" : [ {
@@ -1705,11 +1732,11 @@ SELECT cast('2017-12-11 09:30:00' as date)        / '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / 1)\""
   },
   "queryContext" : [ {
@@ -1725,7 +1752,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         % '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as tinyint) as double) % cast(1 as double)) AS (CAST(1 AS TINYINT) % 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) % cast(1 as bigint)) AS (CAST(1 AS TINYINT) % 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1736,7 +1763,7 @@ Project [(cast(cast(1 as tinyint) as double) % cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as smallint)                        % '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as smallint) as double) % cast(1 as double)) AS (CAST(1 AS SMALLINT) % 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) % cast(1 as bigint)) AS (CAST(1 AS SMALLINT) % 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1747,7 +1774,7 @@ Project [(cast(cast(1 as smallint) as double) % cast(1 as double)) AS (CAST(1 AS
 -- !query
 SELECT cast(1 as int)                             % '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as int) as double) % cast(1 as double)) AS (CAST(1 AS INT) % 1)#x]
+Project [(cast(cast(1 as int) as bigint) % cast(1 as bigint)) AS (CAST(1 AS INT) % 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1758,7 +1785,7 @@ Project [(cast(cast(1 as int) as double) % cast(1 as double)) AS (CAST(1 AS INT)
 -- !query
 SELECT cast(1 as bigint)                          % '1' FROM t
 -- !query analysis
-Project [(cast(cast(1 as bigint) as double) % cast(1 as double)) AS (CAST(1 AS BIGINT) % 1)#x]
+Project [(cast(1 as bigint) % cast(1 as bigint)) AS (CAST(1 AS BIGINT) % 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1804,11 +1831,11 @@ SELECT cast('1' as binary)                        % '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BINARY) % 1)\""
   },
   "queryContext" : [ {
@@ -1826,11 +1853,11 @@ SELECT cast(1 as boolean)                         % '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) % 1)\""
   },
   "queryContext" : [ {
@@ -1848,11 +1875,11 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) % '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % 1)\""
   },
   "queryContext" : [ {
@@ -1870,11 +1897,11 @@ SELECT cast('2017-12-11 09:30:00' as date)        % '1' FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % 1)\""
   },
   "queryContext" : [ {
@@ -1890,7 +1917,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT pmod(cast(1 as tinyint), '1')                         FROM t
 -- !query analysis
-Project [pmod(cast(cast(1 as tinyint) as double), cast(1 as double)) AS pmod(CAST(1 AS TINYINT), 1)#x]
+Project [pmod(cast(cast(1 as tinyint) as bigint), cast(1 as bigint)) AS pmod(CAST(1 AS TINYINT), 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1901,7 +1928,7 @@ Project [pmod(cast(cast(1 as tinyint) as double), cast(1 as double)) AS pmod(CAS
 -- !query
 SELECT pmod(cast(1 as smallint), '1')                        FROM t
 -- !query analysis
-Project [pmod(cast(cast(1 as smallint) as double), cast(1 as double)) AS pmod(CAST(1 AS SMALLINT), 1)#x]
+Project [pmod(cast(cast(1 as smallint) as bigint), cast(1 as bigint)) AS pmod(CAST(1 AS SMALLINT), 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1912,7 +1939,7 @@ Project [pmod(cast(cast(1 as smallint) as double), cast(1 as double)) AS pmod(CA
 -- !query
 SELECT pmod(cast(1 as int), '1')                             FROM t
 -- !query analysis
-Project [pmod(cast(cast(1 as int) as double), cast(1 as double)) AS pmod(CAST(1 AS INT), 1)#x]
+Project [pmod(cast(cast(1 as int) as bigint), cast(1 as bigint)) AS pmod(CAST(1 AS INT), 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1923,7 +1950,7 @@ Project [pmod(cast(cast(1 as int) as double), cast(1 as double)) AS pmod(CAST(1
 -- !query
 SELECT pmod(cast(1 as bigint), '1')                          FROM t
 -- !query analysis
-Project [pmod(cast(cast(1 as bigint) as double), cast(1 as double)) AS pmod(CAST(1 AS BIGINT), 1)#x]
+Project [pmod(cast(1 as bigint), cast(1 as bigint)) AS pmod(CAST(1 AS BIGINT), 1)#xL]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -1969,11 +1996,11 @@ SELECT pmod(cast('1' as binary), '1')                        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(1 AS BINARY), 1)\""
   },
   "queryContext" : [ {
@@ -1991,11 +2018,11 @@ SELECT pmod(cast(1 as boolean), '1')                         FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(1 AS BOOLEAN), 1)\""
   },
   "queryContext" : [ {
@@ -2013,11 +2040,11 @@ SELECT pmod(cast('2017-12-11 09:30:00.0' as timestamp), '1') FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), 1)\""
   },
   "queryContext" : [ {
@@ -2035,11 +2062,11 @@ SELECT pmod(cast('2017-12-11 09:30:00' as date), '1')        FROM t
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
   },
   "queryContext" : [ {
@@ -2055,7 +2082,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' = cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (1 = CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as tinyint) as bigint)) AS (1 = CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2066,7 +2093,7 @@ Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (1 = CAST(1 AS TINYINT))#x
 -- !query
 SELECT '1' = cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(1 as smallint)) AS (1 = CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as smallint) as bigint)) AS (1 = CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2077,7 +2104,7 @@ Project [(cast(1 as smallint) = cast(1 as smallint)) AS (1 = CAST(1 AS SMALLINT)
 -- !query
 SELECT '1' = cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(1 as int)) AS (1 = CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) = cast(cast(1 as int) as bigint)) AS (1 = CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2099,7 +2126,7 @@ Project [(cast(1 as bigint) = cast(1 as bigint)) AS (1 = CAST(1 AS BIGINT))#x]
 -- !query
 SELECT '1' = cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(1 as float)) AS (1 = CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) = cast(cast(1 as float) as double)) AS (1 = CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2121,7 +2148,7 @@ Project [(cast(1 as double) = cast(1 as double)) AS (1 = CAST(1 AS DOUBLE))#x]
 -- !query
 SELECT '1' = cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (1 = CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2187,7 +2214,7 @@ Project [(cast(1 as date) = cast(2017-12-11 09:30:00 as date)) AS (1 = CAST(2017
 -- !query
 SELECT cast(1 as tinyint)                         = '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (CAST(1 AS TINYINT) = 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) = cast(1 as bigint)) AS (CAST(1 AS TINYINT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2198,7 +2225,7 @@ Project [(cast(1 as tinyint) = cast(1 as tinyint)) AS (CAST(1 AS TINYINT) = 1)#x
 -- !query
 SELECT cast(1 as smallint)                        = '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) = cast(1 as smallint)) AS (CAST(1 AS SMALLINT) = 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) = cast(1 as bigint)) AS (CAST(1 AS SMALLINT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2209,7 +2236,7 @@ Project [(cast(1 as smallint) = cast(1 as smallint)) AS (CAST(1 AS SMALLINT) = 1
 -- !query
 SELECT cast(1 as int)                             = '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) = cast(1 as int)) AS (CAST(1 AS INT) = 1)#x]
+Project [(cast(cast(1 as int) as bigint) = cast(1 as bigint)) AS (CAST(1 AS INT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2231,7 +2258,7 @@ Project [(cast(1 as bigint) = cast(1 as bigint)) AS (CAST(1 AS BIGINT) = 1)#x]
 -- !query
 SELECT cast(1 as float)                           = '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) = cast(1 as float)) AS (CAST(1 AS FLOAT) = 1)#x]
+Project [(cast(cast(1 as float) as double) = cast(1 as double)) AS (CAST(1 AS FLOAT) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2253,7 +2280,7 @@ Project [(cast(1 as double) = cast(1 as double)) AS (CAST(1 AS DOUBLE) = 1)#x]
 -- !query
 SELECT cast(1 as decimal(10, 0))                  = '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) = cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) = 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2308,7 +2335,7 @@ Project [(cast(2017-12-11 09:30:00 as date) = cast(1 as date)) AS (CAST(2017-12-
 -- !query
 SELECT '1' <=> cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <=> cast(1 as tinyint)) AS (1 <=> CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) <=> cast(cast(1 as tinyint) as bigint)) AS (1 <=> CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2319,7 +2346,7 @@ Project [(cast(1 as tinyint) <=> cast(1 as tinyint)) AS (1 <=> CAST(1 AS TINYINT
 -- !query
 SELECT '1' <=> cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <=> cast(1 as smallint)) AS (1 <=> CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) <=> cast(cast(1 as smallint) as bigint)) AS (1 <=> CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2330,7 +2357,7 @@ Project [(cast(1 as smallint) <=> cast(1 as smallint)) AS (1 <=> CAST(1 AS SMALL
 -- !query
 SELECT '1' <=> cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) <=> cast(1 as int)) AS (1 <=> CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) <=> cast(cast(1 as int) as bigint)) AS (1 <=> CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2352,7 +2379,7 @@ Project [(cast(1 as bigint) <=> cast(1 as bigint)) AS (1 <=> CAST(1 AS BIGINT))#
 -- !query
 SELECT '1' <=> cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) <=> cast(1 as float)) AS (1 <=> CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) <=> cast(cast(1 as float) as double)) AS (1 <=> CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2374,7 +2401,7 @@ Project [(cast(1 as double) <=> cast(1 as double)) AS (1 <=> CAST(1 AS DOUBLE))#
 -- !query
 SELECT '1' <=> cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <=> cast(1 as decimal(10,0))) AS (1 <=> CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) <=> cast(cast(1 as decimal(10,0)) as double)) AS (1 <=> CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2440,7 +2467,7 @@ Project [(cast(1 as date) <=> cast(2017-12-11 09:30:00 as date)) AS (1 <=> CAST(
 -- !query
 SELECT cast(1 as tinyint)                         <=> '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <=> cast(1 as tinyint)) AS (CAST(1 AS TINYINT) <=> 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) <=> cast(1 as bigint)) AS (CAST(1 AS TINYINT) <=> 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2451,7 +2478,7 @@ Project [(cast(1 as tinyint) <=> cast(1 as tinyint)) AS (CAST(1 AS TINYINT) <=>
 -- !query
 SELECT cast(1 as smallint)                        <=> '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <=> cast(1 as smallint)) AS (CAST(1 AS SMALLINT) <=> 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) <=> cast(1 as bigint)) AS (CAST(1 AS SMALLINT) <=> 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2462,7 +2489,7 @@ Project [(cast(1 as smallint) <=> cast(1 as smallint)) AS (CAST(1 AS SMALLINT) <
 -- !query
 SELECT cast(1 as int)                             <=> '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) <=> cast(1 as int)) AS (CAST(1 AS INT) <=> 1)#x]
+Project [(cast(cast(1 as int) as bigint) <=> cast(1 as bigint)) AS (CAST(1 AS INT) <=> 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2484,7 +2511,7 @@ Project [(cast(1 as bigint) <=> cast(1 as bigint)) AS (CAST(1 AS BIGINT) <=> 1)#
 -- !query
 SELECT cast(1 as float)                           <=> '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) <=> cast(1 as float)) AS (CAST(1 AS FLOAT) <=> 1)#x]
+Project [(cast(cast(1 as float) as double) <=> cast(1 as double)) AS (CAST(1 AS FLOAT) <=> 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2506,7 +2533,7 @@ Project [(cast(1 as double) <=> cast(1 as double)) AS (CAST(1 AS DOUBLE) <=> 1)#
 -- !query
 SELECT cast(1 as decimal(10, 0))                  <=> '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <=> cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <=> 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <=> cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) <=> 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2561,7 +2588,7 @@ Project [(cast(2017-12-11 09:30:00 as date) <=> cast(1 as date)) AS (CAST(2017-1
 -- !query
 SELECT '1' < cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) < cast(1 as tinyint)) AS (1 < CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) < cast(cast(1 as tinyint) as bigint)) AS (1 < CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2572,7 +2599,7 @@ Project [(cast(1 as tinyint) < cast(1 as tinyint)) AS (1 < CAST(1 AS TINYINT))#x
 -- !query
 SELECT '1' < cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) < cast(1 as smallint)) AS (1 < CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) < cast(cast(1 as smallint) as bigint)) AS (1 < CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2583,7 +2610,7 @@ Project [(cast(1 as smallint) < cast(1 as smallint)) AS (1 < CAST(1 AS SMALLINT)
 -- !query
 SELECT '1' < cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) < cast(1 as int)) AS (1 < CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) < cast(cast(1 as int) as bigint)) AS (1 < CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2605,7 +2632,7 @@ Project [(cast(1 as bigint) < cast(1 as bigint)) AS (1 < CAST(1 AS BIGINT))#x]
 -- !query
 SELECT '1' < cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) < cast(1 as float)) AS (1 < CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) < cast(cast(1 as float) as double)) AS (1 < CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2627,7 +2654,7 @@ Project [(cast(1 as double) < cast(1 as double)) AS (1 < CAST(1 AS DOUBLE))#x]
 -- !query
 SELECT '1' < cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) < cast(1 as decimal(10,0))) AS (1 < CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) < cast(cast(1 as decimal(10,0)) as double)) AS (1 < CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2693,7 +2720,7 @@ Project [(cast(1 as date) < cast(2017-12-11 09:30:00 as date)) AS (1 < CAST(2017
 -- !query
 SELECT '1' <= cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <= cast(1 as tinyint)) AS (1 <= CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) <= cast(cast(1 as tinyint) as bigint)) AS (1 <= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2704,7 +2731,7 @@ Project [(cast(1 as tinyint) <= cast(1 as tinyint)) AS (1 <= CAST(1 AS TINYINT))
 -- !query
 SELECT '1' <= cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <= cast(1 as smallint)) AS (1 <= CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) <= cast(cast(1 as smallint) as bigint)) AS (1 <= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2715,7 +2742,7 @@ Project [(cast(1 as smallint) <= cast(1 as smallint)) AS (1 <= CAST(1 AS SMALLIN
 -- !query
 SELECT '1' <= cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) <= cast(1 as int)) AS (1 <= CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) <= cast(cast(1 as int) as bigint)) AS (1 <= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2737,7 +2764,7 @@ Project [(cast(1 as bigint) <= cast(1 as bigint)) AS (1 <= CAST(1 AS BIGINT))#x]
 -- !query
 SELECT '1' <= cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) <= cast(1 as float)) AS (1 <= CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) <= cast(cast(1 as float) as double)) AS (1 <= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2759,7 +2786,7 @@ Project [(cast(1 as double) <= cast(1 as double)) AS (1 <= CAST(1 AS DOUBLE))#x]
 -- !query
 SELECT '1' <= cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (1 <= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) <= cast(cast(1 as decimal(10,0)) as double)) AS (1 <= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2825,7 +2852,7 @@ Project [(cast(1 as date) <= cast(2017-12-11 09:30:00 as date)) AS (1 <= CAST(20
 -- !query
 SELECT '1' > cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) > cast(1 as tinyint)) AS (1 > CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) > cast(cast(1 as tinyint) as bigint)) AS (1 > CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2836,7 +2863,7 @@ Project [(cast(1 as tinyint) > cast(1 as tinyint)) AS (1 > CAST(1 AS TINYINT))#x
 -- !query
 SELECT '1' > cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) > cast(1 as smallint)) AS (1 > CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) > cast(cast(1 as smallint) as bigint)) AS (1 > CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2847,7 +2874,7 @@ Project [(cast(1 as smallint) > cast(1 as smallint)) AS (1 > CAST(1 AS SMALLINT)
 -- !query
 SELECT '1' > cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) > cast(1 as int)) AS (1 > CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) > cast(cast(1 as int) as bigint)) AS (1 > CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2869,7 +2896,7 @@ Project [(cast(1 as bigint) > cast(1 as bigint)) AS (1 > CAST(1 AS BIGINT))#x]
 -- !query
 SELECT '1' > cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) > cast(1 as float)) AS (1 > CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) > cast(cast(1 as float) as double)) AS (1 > CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2891,7 +2918,7 @@ Project [(cast(1 as double) > cast(1 as double)) AS (1 > CAST(1 AS DOUBLE))#x]
 -- !query
 SELECT '1' > cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) > cast(1 as decimal(10,0))) AS (1 > CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) > cast(cast(1 as decimal(10,0)) as double)) AS (1 > CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2957,7 +2984,7 @@ Project [(cast(1 as date) > cast(2017-12-11 09:30:00 as date)) AS (1 > CAST(2017
 -- !query
 SELECT '1' >= cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) >= cast(1 as tinyint)) AS (1 >= CAST(1 AS TINYINT))#x]
+Project [(cast(1 as bigint) >= cast(cast(1 as tinyint) as bigint)) AS (1 >= CAST(1 AS TINYINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2968,7 +2995,7 @@ Project [(cast(1 as tinyint) >= cast(1 as tinyint)) AS (1 >= CAST(1 AS TINYINT))
 -- !query
 SELECT '1' >= cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [(cast(1 as smallint) >= cast(1 as smallint)) AS (1 >= CAST(1 AS SMALLINT))#x]
+Project [(cast(1 as bigint) >= cast(cast(1 as smallint) as bigint)) AS (1 >= CAST(1 AS SMALLINT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -2979,7 +3006,7 @@ Project [(cast(1 as smallint) >= cast(1 as smallint)) AS (1 >= CAST(1 AS SMALLIN
 -- !query
 SELECT '1' >= cast(1 as int)                             FROM t
 -- !query analysis
-Project [(cast(1 as int) >= cast(1 as int)) AS (1 >= CAST(1 AS INT))#x]
+Project [(cast(1 as bigint) >= cast(cast(1 as int) as bigint)) AS (1 >= CAST(1 AS INT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3001,7 +3028,7 @@ Project [(cast(1 as bigint) >= cast(1 as bigint)) AS (1 >= CAST(1 AS BIGINT))#x]
 -- !query
 SELECT '1' >= cast(1 as float)                           FROM t
 -- !query analysis
-Project [(cast(1 as float) >= cast(1 as float)) AS (1 >= CAST(1 AS FLOAT))#x]
+Project [(cast(1 as double) >= cast(cast(1 as float) as double)) AS (1 >= CAST(1 AS FLOAT))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3023,7 +3050,7 @@ Project [(cast(1 as double) >= cast(1 as double)) AS (1 >= CAST(1 AS DOUBLE))#x]
 -- !query
 SELECT '1' >= cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (1 >= CAST(1 AS DECIMAL(10,0)))#x]
+Project [(cast(1 as double) >= cast(cast(1 as decimal(10,0)) as double)) AS (1 >= CAST(1 AS DECIMAL(10,0)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3089,7 +3116,7 @@ Project [(cast(1 as date) >= cast(2017-12-11 09:30:00 as date)) AS (1 >= CAST(20
 -- !query
 SELECT '1' <> cast(1 as tinyint)                         FROM t
 -- !query analysis
-Project [NOT (cast(1 as tinyint) = cast(1 as tinyint)) AS (NOT (1 = CAST(1 AS TINYINT)))#x]
+Project [NOT (cast(1 as bigint) = cast(cast(1 as tinyint) as bigint)) AS (NOT (1 = CAST(1 AS TINYINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3100,7 +3127,7 @@ Project [NOT (cast(1 as tinyint) = cast(1 as tinyint)) AS (NOT (1 = CAST(1 AS TI
 -- !query
 SELECT '1' <> cast(1 as smallint)                        FROM t
 -- !query analysis
-Project [NOT (cast(1 as smallint) = cast(1 as smallint)) AS (NOT (1 = CAST(1 AS SMALLINT)))#x]
+Project [NOT (cast(1 as bigint) = cast(cast(1 as smallint) as bigint)) AS (NOT (1 = CAST(1 AS SMALLINT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3111,7 +3138,7 @@ Project [NOT (cast(1 as smallint) = cast(1 as smallint)) AS (NOT (1 = CAST(1 AS
 -- !query
 SELECT '1' <> cast(1 as int)                             FROM t
 -- !query analysis
-Project [NOT (cast(1 as int) = cast(1 as int)) AS (NOT (1 = CAST(1 AS INT)))#x]
+Project [NOT (cast(1 as bigint) = cast(cast(1 as int) as bigint)) AS (NOT (1 = CAST(1 AS INT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3133,7 +3160,7 @@ Project [NOT (cast(1 as bigint) = cast(1 as bigint)) AS (NOT (1 = CAST(1 AS BIGI
 -- !query
 SELECT '1' <> cast(1 as float)                           FROM t
 -- !query analysis
-Project [NOT (cast(1 as float) = cast(1 as float)) AS (NOT (1 = CAST(1 AS FLOAT)))#x]
+Project [NOT (cast(1 as double) = cast(cast(1 as float) as double)) AS (NOT (1 = CAST(1 AS FLOAT)))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3155,7 +3182,7 @@ Project [NOT (cast(1 as double) = cast(1 as double)) AS (NOT (1 = CAST(1 AS DOUB
 -- !query
 SELECT '1' <> cast(1 as decimal(10, 0))                  FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (1 = CAST(1 AS DECIMAL(10,0))))#x]
+Project [NOT (cast(1 as double) = cast(cast(1 as decimal(10,0)) as double)) AS (NOT (1 = CAST(1 AS DECIMAL(10,0))))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3221,7 +3248,7 @@ Project [NOT (cast(1 as date) = cast(2017-12-11 09:30:00 as date)) AS (NOT (1 =
 -- !query
 SELECT cast(1 as tinyint)                         < '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) < cast(1 as tinyint)) AS (CAST(1 AS TINYINT) < 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) < cast(1 as bigint)) AS (CAST(1 AS TINYINT) < 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3232,7 +3259,7 @@ Project [(cast(1 as tinyint) < cast(1 as tinyint)) AS (CAST(1 AS TINYINT) < 1)#x
 -- !query
 SELECT cast(1 as smallint)                        < '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) < cast(1 as smallint)) AS (CAST(1 AS SMALLINT) < 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) < cast(1 as bigint)) AS (CAST(1 AS SMALLINT) < 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3243,7 +3270,7 @@ Project [(cast(1 as smallint) < cast(1 as smallint)) AS (CAST(1 AS SMALLINT) < 1
 -- !query
 SELECT cast(1 as int)                             < '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) < cast(1 as int)) AS (CAST(1 AS INT) < 1)#x]
+Project [(cast(cast(1 as int) as bigint) < cast(1 as bigint)) AS (CAST(1 AS INT) < 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3265,7 +3292,7 @@ Project [(cast(1 as bigint) < cast(1 as bigint)) AS (CAST(1 AS BIGINT) < 1)#x]
 -- !query
 SELECT cast(1 as float)                           < '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) < cast(1 as float)) AS (CAST(1 AS FLOAT) < 1)#x]
+Project [(cast(cast(1 as float) as double) < cast(1 as double)) AS (CAST(1 AS FLOAT) < 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3287,7 +3314,7 @@ Project [(cast(1 as double) < cast(1 as double)) AS (CAST(1 AS DOUBLE) < 1)#x]
 -- !query
 SELECT cast(1 as decimal(10, 0))                  < '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) < cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) < 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) < cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) < 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3353,7 +3380,7 @@ Project [(cast(2017-12-11 09:30:00 as date) < cast(1 as date)) AS (CAST(2017-12-
 -- !query
 SELECT cast(1 as tinyint)                         <= '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) <= cast(1 as tinyint)) AS (CAST(1 AS TINYINT) <= 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) <= cast(1 as bigint)) AS (CAST(1 AS TINYINT) <= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3364,7 +3391,7 @@ Project [(cast(1 as tinyint) <= cast(1 as tinyint)) AS (CAST(1 AS TINYINT) <= 1)
 -- !query
 SELECT cast(1 as smallint)                        <= '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) <= cast(1 as smallint)) AS (CAST(1 AS SMALLINT) <= 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) <= cast(1 as bigint)) AS (CAST(1 AS SMALLINT) <= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3375,7 +3402,7 @@ Project [(cast(1 as smallint) <= cast(1 as smallint)) AS (CAST(1 AS SMALLINT) <=
 -- !query
 SELECT cast(1 as int)                             <= '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) <= cast(1 as int)) AS (CAST(1 AS INT) <= 1)#x]
+Project [(cast(cast(1 as int) as bigint) <= cast(1 as bigint)) AS (CAST(1 AS INT) <= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3397,7 +3424,7 @@ Project [(cast(1 as bigint) <= cast(1 as bigint)) AS (CAST(1 AS BIGINT) <= 1)#x]
 -- !query
 SELECT cast(1 as float)                           <= '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) <= cast(1 as float)) AS (CAST(1 AS FLOAT) <= 1)#x]
+Project [(cast(cast(1 as float) as double) <= cast(1 as double)) AS (CAST(1 AS FLOAT) <= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3419,7 +3446,7 @@ Project [(cast(1 as double) <= cast(1 as double)) AS (CAST(1 AS DOUBLE) <= 1)#x]
 -- !query
 SELECT cast(1 as decimal(10, 0))                  <= '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) <= cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) <= 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) <= cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) <= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3485,7 +3512,7 @@ Project [(cast(2017-12-11 09:30:00 as date) <= cast(1 as date)) AS (CAST(2017-12
 -- !query
 SELECT cast(1 as tinyint)                         > '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) > cast(1 as tinyint)) AS (CAST(1 AS TINYINT) > 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) > cast(1 as bigint)) AS (CAST(1 AS TINYINT) > 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3496,7 +3523,7 @@ Project [(cast(1 as tinyint) > cast(1 as tinyint)) AS (CAST(1 AS TINYINT) > 1)#x
 -- !query
 SELECT cast(1 as smallint)                        > '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) > cast(1 as smallint)) AS (CAST(1 AS SMALLINT) > 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) > cast(1 as bigint)) AS (CAST(1 AS SMALLINT) > 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3507,7 +3534,7 @@ Project [(cast(1 as smallint) > cast(1 as smallint)) AS (CAST(1 AS SMALLINT) > 1
 -- !query
 SELECT cast(1 as int)                             > '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) > cast(1 as int)) AS (CAST(1 AS INT) > 1)#x]
+Project [(cast(cast(1 as int) as bigint) > cast(1 as bigint)) AS (CAST(1 AS INT) > 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3529,7 +3556,7 @@ Project [(cast(1 as bigint) > cast(1 as bigint)) AS (CAST(1 AS BIGINT) > 1)#x]
 -- !query
 SELECT cast(1 as float)                           > '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) > cast(1 as float)) AS (CAST(1 AS FLOAT) > 1)#x]
+Project [(cast(cast(1 as float) as double) > cast(1 as double)) AS (CAST(1 AS FLOAT) > 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3551,7 +3578,7 @@ Project [(cast(1 as double) > cast(1 as double)) AS (CAST(1 AS DOUBLE) > 1)#x]
 -- !query
 SELECT cast(1 as decimal(10, 0))                  > '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) > cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) > 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) > cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) > 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3617,7 +3644,7 @@ Project [(cast(2017-12-11 09:30:00 as date) > cast(1 as date)) AS (CAST(2017-12-
 -- !query
 SELECT cast(1 as tinyint)                         >= '1' FROM t
 -- !query analysis
-Project [(cast(1 as tinyint) >= cast(1 as tinyint)) AS (CAST(1 AS TINYINT) >= 1)#x]
+Project [(cast(cast(1 as tinyint) as bigint) >= cast(1 as bigint)) AS (CAST(1 AS TINYINT) >= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3628,7 +3655,7 @@ Project [(cast(1 as tinyint) >= cast(1 as tinyint)) AS (CAST(1 AS TINYINT) >= 1)
 -- !query
 SELECT cast(1 as smallint)                        >= '1' FROM t
 -- !query analysis
-Project [(cast(1 as smallint) >= cast(1 as smallint)) AS (CAST(1 AS SMALLINT) >= 1)#x]
+Project [(cast(cast(1 as smallint) as bigint) >= cast(1 as bigint)) AS (CAST(1 AS SMALLINT) >= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3639,7 +3666,7 @@ Project [(cast(1 as smallint) >= cast(1 as smallint)) AS (CAST(1 AS SMALLINT) >=
 -- !query
 SELECT cast(1 as int)                             >= '1' FROM t
 -- !query analysis
-Project [(cast(1 as int) >= cast(1 as int)) AS (CAST(1 AS INT) >= 1)#x]
+Project [(cast(cast(1 as int) as bigint) >= cast(1 as bigint)) AS (CAST(1 AS INT) >= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3661,7 +3688,7 @@ Project [(cast(1 as bigint) >= cast(1 as bigint)) AS (CAST(1 AS BIGINT) >= 1)#x]
 -- !query
 SELECT cast(1 as float)                           >= '1' FROM t
 -- !query analysis
-Project [(cast(1 as float) >= cast(1 as float)) AS (CAST(1 AS FLOAT) >= 1)#x]
+Project [(cast(cast(1 as float) as double) >= cast(1 as double)) AS (CAST(1 AS FLOAT) >= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3683,7 +3710,7 @@ Project [(cast(1 as double) >= cast(1 as double)) AS (CAST(1 AS DOUBLE) >= 1)#x]
 -- !query
 SELECT cast(1 as decimal(10, 0))                  >= '1' FROM t
 -- !query analysis
-Project [(cast(1 as decimal(10,0)) >= cast(1 as decimal(10,0))) AS (CAST(1 AS DECIMAL(10,0)) >= 1)#x]
+Project [(cast(cast(1 as decimal(10,0)) as double) >= cast(1 as double)) AS (CAST(1 AS DECIMAL(10,0)) >= 1)#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3749,7 +3776,7 @@ Project [(cast(2017-12-11 09:30:00 as date) >= cast(1 as date)) AS (CAST(2017-12
 -- !query
 SELECT cast(1 as tinyint)                         <> '1' FROM t
 -- !query analysis
-Project [NOT (cast(1 as tinyint) = cast(1 as tinyint)) AS (NOT (CAST(1 AS TINYINT) = 1))#x]
+Project [NOT (cast(cast(1 as tinyint) as bigint) = cast(1 as bigint)) AS (NOT (CAST(1 AS TINYINT) = 1))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3760,7 +3787,7 @@ Project [NOT (cast(1 as tinyint) = cast(1 as tinyint)) AS (NOT (CAST(1 AS TINYIN
 -- !query
 SELECT cast(1 as smallint)                        <> '1' FROM t
 -- !query analysis
-Project [NOT (cast(1 as smallint) = cast(1 as smallint)) AS (NOT (CAST(1 AS SMALLINT) = 1))#x]
+Project [NOT (cast(cast(1 as smallint) as bigint) = cast(1 as bigint)) AS (NOT (CAST(1 AS SMALLINT) = 1))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3771,7 +3798,7 @@ Project [NOT (cast(1 as smallint) = cast(1 as smallint)) AS (NOT (CAST(1 AS SMAL
 -- !query
 SELECT cast(1 as int)                             <> '1' FROM t
 -- !query analysis
-Project [NOT (cast(1 as int) = cast(1 as int)) AS (NOT (CAST(1 AS INT) = 1))#x]
+Project [NOT (cast(cast(1 as int) as bigint) = cast(1 as bigint)) AS (NOT (CAST(1 AS INT) = 1))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3793,7 +3820,7 @@ Project [NOT (cast(1 as bigint) = cast(1 as bigint)) AS (NOT (CAST(1 AS BIGINT)
 -- !query
 SELECT cast(1 as float)                           <> '1' FROM t
 -- !query analysis
-Project [NOT (cast(1 as float) = cast(1 as float)) AS (NOT (CAST(1 AS FLOAT) = 1))#x]
+Project [NOT (cast(cast(1 as float) as double) = cast(1 as double)) AS (NOT (CAST(1 AS FLOAT) = 1))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
@@ -3815,7 +3842,7 @@ Project [NOT (cast(1 as double) = cast(1 as double)) AS (NOT (CAST(1 AS DOUBLE)
 -- !query
 SELECT cast(1 as decimal(10, 0))                  <> '1' FROM t
 -- !query analysis
-Project [NOT (cast(1 as decimal(10,0)) = cast(1 as decimal(10,0))) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 1))#x]
+Project [NOT (cast(cast(1 as decimal(10,0)) as double) = cast(1 as double)) AS (NOT (CAST(1 AS DECIMAL(10,0)) = 1))#x]
 +- SubqueryAlias t
    +- View (`t`, [1#x])
       +- Project [cast(1#x as int) AS 1#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
index 009e91f7ffacf..e57f803124ee3 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -197,7 +197,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select to_timestamp(a) from t
 -- !query analysis
-Project [to_timestamp(a#x, None, TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(a)#x]
+Project [to_timestamp(a#x, None, TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(a)#x]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -208,7 +208,7 @@ Project [to_timestamp(a#x, None, TimestampType, Some(America/Los_Angeles), false
 -- !query
 select to_timestamp('2018-01-01', a) from t
 -- !query analysis
-Project [to_timestamp(2018-01-01, Some(a#x), TimestampType, Some(America/Los_Angeles), false) AS to_timestamp(2018-01-01, a)#x]
+Project [to_timestamp(2018-01-01, Some(a#x), TimestampType, Some(America/Los_Angeles), true) AS to_timestamp(2018-01-01, a)#x]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -219,7 +219,7 @@ Project [to_timestamp(2018-01-01, Some(a#x), TimestampType, Some(America/Los_Ang
 -- !query
 select to_unix_timestamp(a) from t
 -- !query analysis
-Project [to_unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), false) AS to_unix_timestamp(a, yyyy-MM-dd HH:mm:ss)#xL]
+Project [to_unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS to_unix_timestamp(a, yyyy-MM-dd HH:mm:ss)#xL]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -230,7 +230,7 @@ Project [to_unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles),
 -- !query
 select to_unix_timestamp('2018-01-01', a) from t
 -- !query analysis
-Project [to_unix_timestamp(2018-01-01, a#x, Some(America/Los_Angeles), false) AS to_unix_timestamp(2018-01-01, a)#xL]
+Project [to_unix_timestamp(2018-01-01, a#x, Some(America/Los_Angeles), true) AS to_unix_timestamp(2018-01-01, a)#xL]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -241,7 +241,7 @@ Project [to_unix_timestamp(2018-01-01, a#x, Some(America/Los_Angeles), false) AS
 -- !query
 select unix_timestamp(a) from t
 -- !query analysis
-Project [unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), false) AS unix_timestamp(a, yyyy-MM-dd HH:mm:ss)#xL]
+Project [unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), true) AS unix_timestamp(a, yyyy-MM-dd HH:mm:ss)#xL]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -252,7 +252,7 @@ Project [unix_timestamp(a#x, yyyy-MM-dd HH:mm:ss, Some(America/Los_Angeles), fal
 -- !query
 select unix_timestamp('2018-01-01', a) from t
 -- !query analysis
-Project [unix_timestamp(2018-01-01, a#x, Some(America/Los_Angeles), false) AS unix_timestamp(2018-01-01, a)#xL]
+Project [unix_timestamp(2018-01-01, a#x, Some(America/Los_Angeles), true) AS unix_timestamp(2018-01-01, a)#xL]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -285,7 +285,7 @@ Project [from_unixtime(cast(2018-01-01 as bigint), a#x, Some(America/Los_Angeles
 -- !query
 select next_day(a, 'MO') from t
 -- !query analysis
-Project [next_day(cast(a#x as date), MO, false) AS next_day(a, MO)#x]
+Project [next_day(cast(a#x as date), MO, true) AS next_day(a, MO)#x]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -296,7 +296,7 @@ Project [next_day(cast(a#x as date), MO, false) AS next_day(a, MO)#x]
 -- !query
 select next_day('2018-01-01', a) from t
 -- !query analysis
-Project [next_day(cast(2018-01-01 as date), a#x, false) AS next_day(2018-01-01, a)#x]
+Project [next_day(cast(2018-01-01 as date), a#x, true) AS next_day(2018-01-01, a)#x]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
@@ -370,7 +370,7 @@ Project [c0#x]
 -- !query
 select from_json(a, 'a INT') from t
 -- !query analysis
-Project [from_json(StructField(a,IntegerType,true), a#x, Some(America/Los_Angeles)) AS from_json(a)#x]
+Project [from_json(StructField(a,IntegerType,true), a#x, Some(America/Los_Angeles), false) AS from_json(a)#x]
 +- SubqueryAlias t
    +- View (`t`, [a#x])
       +- Project [cast(a#x as string) AS a#x]
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
index 029ec4abb6faf..e855cdc14a921 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -91,19 +91,20 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS TINYINT)#x as float) AS CAST(1 AS TINYINT)#x]
+   :- Project [cast(CAST(1 AS TINYINT)#x as double) AS CAST(1 AS TINYINT)#x]
    :  +- Project [cast(1 as tinyint) AS CAST(1 AS TINYINT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS FLOAT)#x as double) AS CAST(2 AS FLOAT)#x]
+      +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -151,19 +152,20 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS TINYINT)#x as string) AS CAST(1 AS TINYINT)#x]
+   :- Project [cast(CAST(1 AS TINYINT)#x as bigint) AS CAST(1 AS TINYINT)#xL]
    :  +- Project [cast(1 as tinyint) AS CAST(1 AS TINYINT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as bigint) AS CAST(2 AS STRING)#xL]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -350,19 +352,20 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS SMALLINT)#x as float) AS CAST(1 AS SMALLINT)#x]
+   :- Project [cast(CAST(1 AS SMALLINT)#x as double) AS CAST(1 AS SMALLINT)#x]
    :  +- Project [cast(1 as smallint) AS CAST(1 AS SMALLINT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS FLOAT)#x as double) AS CAST(2 AS FLOAT)#x]
+      +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -410,19 +413,20 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS SMALLINT)#x as string) AS CAST(1 AS SMALLINT)#x]
+   :- Project [cast(CAST(1 AS SMALLINT)#x as bigint) AS CAST(1 AS SMALLINT)#xL]
    :  +- Project [cast(1 as smallint) AS CAST(1 AS SMALLINT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as bigint) AS CAST(2 AS STRING)#xL]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -609,19 +613,20 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS INT)#x as float) AS CAST(1 AS INT)#x]
+   :- Project [cast(CAST(1 AS INT)#x as double) AS CAST(1 AS INT)#x]
    :  +- Project [cast(1 as int) AS CAST(1 AS INT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS FLOAT)#x as double) AS CAST(2 AS FLOAT)#x]
+      +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -669,19 +674,20 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS INT)#x as string) AS CAST(1 AS INT)#x]
+   :- Project [cast(CAST(1 AS INT)#x as bigint) AS CAST(1 AS INT)#xL]
    :  +- Project [cast(1 as int) AS CAST(1 AS INT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as bigint) AS CAST(2 AS STRING)#xL]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -868,19 +874,20 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS BIGINT)#xL as float) AS CAST(1 AS BIGINT)#x]
+   :- Project [cast(CAST(1 AS BIGINT)#xL as double) AS CAST(1 AS BIGINT)#x]
    :  +- Project [cast(1 as bigint) AS CAST(1 AS BIGINT)#xL]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS FLOAT)#x as double) AS CAST(2 AS FLOAT)#x]
+      +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -929,19 +936,19 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS BIGINT)#xL as string) AS CAST(1 AS BIGINT)#x]
-   :  +- Project [cast(1 as bigint) AS CAST(1 AS BIGINT)#xL]
-   :     +- SubqueryAlias t
-   :        +- View (`t`, [1#x])
-   :           +- Project [cast(1#x as int) AS 1#x]
-   :              +- Project [1 AS 1#x]
-   :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   :- Project [cast(1 as bigint) AS CAST(1 AS BIGINT)#xL]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as bigint) AS CAST(2 AS STRING)#xL]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -1049,13 +1056,14 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as tinyint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS TINYINT)#x as float) AS CAST(2 AS TINYINT)#x]
+   :- Project [cast(CAST(1 AS FLOAT)#x as double) AS CAST(1 AS FLOAT)#x]
+   :  +- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS TINYINT)#x as double) AS CAST(2 AS TINYINT)#x]
       +- Project [cast(2 as tinyint) AS CAST(2 AS TINYINT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1069,13 +1077,14 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as smallint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS SMALLINT)#x as float) AS CAST(2 AS SMALLINT)#x]
+   :- Project [cast(CAST(1 AS FLOAT)#x as double) AS CAST(1 AS FLOAT)#x]
+   :  +- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS SMALLINT)#x as double) AS CAST(2 AS SMALLINT)#x]
       +- Project [cast(2 as smallint) AS CAST(2 AS SMALLINT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1089,13 +1098,14 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as int) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS INT)#x as float) AS CAST(2 AS INT)#x]
+   :- Project [cast(CAST(1 AS FLOAT)#x as double) AS CAST(1 AS FLOAT)#x]
+   :  +- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS INT)#x as double) AS CAST(2 AS INT)#x]
       +- Project [cast(2 as int) AS CAST(2 AS INT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1109,13 +1119,14 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as bigint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS BIGINT)#xL as float) AS CAST(2 AS BIGINT)#x]
+   :- Project [cast(CAST(1 AS FLOAT)#x as double) AS CAST(1 AS FLOAT)#x]
+   :  +- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS BIGINT)#xL as double) AS CAST(2 AS BIGINT)#x]
       +- Project [cast(2 as bigint) AS CAST(2 AS BIGINT)#xL]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1189,19 +1200,20 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS FLOAT)#x as string) AS CAST(1 AS FLOAT)#x]
+   :- Project [cast(CAST(1 AS FLOAT)#x as double) AS CAST(1 AS FLOAT)#x]
    :  +- Project [cast(1 as float) AS CAST(1 AS FLOAT)#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as double) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -1448,19 +1460,19 @@ SELECT cast(1 as double) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS DOUBLE)#x as string) AS CAST(1 AS DOUBLE)#x]
-   :  +- Project [cast(1 as double) AS CAST(1 AS DOUBLE)#x]
-   :     +- SubqueryAlias t
-   :        +- View (`t`, [1#x])
-   :           +- Project [cast(1#x as int) AS 1#x]
-   :              +- Project [1 AS 1#x]
-   :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   :- Project [cast(1 as double) AS CAST(1 AS DOUBLE)#x]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as double) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -1709,19 +1721,20 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(1 AS DECIMAL(10,0))#x as string) AS CAST(1 AS DECIMAL(10,0))#x]
+   :- Project [cast(CAST(1 AS DECIMAL(10,0))#x as double) AS CAST(1 AS DECIMAL(10,0))#x]
    :  +- Project [cast(1 as decimal(10,0)) AS CAST(1 AS DECIMAL(10,0))#x]
    :     +- SubqueryAlias t
    :        +- View (`t`, [1#x])
    :           +- Project [cast(1#x as int) AS 1#x]
    :              +- Project [1 AS 1#x]
    :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as double) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -1829,13 +1842,14 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as tinyint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS TINYINT)#x as string) AS CAST(2 AS TINYINT)#x]
+   :- Project [cast(CAST(1 AS STRING)#x as bigint) AS CAST(1 AS STRING)#xL]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS TINYINT)#x as bigint) AS CAST(2 AS TINYINT)#xL]
       +- Project [cast(2 as tinyint) AS CAST(2 AS TINYINT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1849,13 +1863,14 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as smallint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS SMALLINT)#x as string) AS CAST(2 AS SMALLINT)#x]
+   :- Project [cast(CAST(1 AS STRING)#x as bigint) AS CAST(1 AS STRING)#xL]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS SMALLINT)#x as bigint) AS CAST(2 AS SMALLINT)#xL]
       +- Project [cast(2 as smallint) AS CAST(2 AS SMALLINT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1869,13 +1884,14 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as int) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS INT)#x as string) AS CAST(2 AS INT)#x]
+   :- Project [cast(CAST(1 AS STRING)#x as bigint) AS CAST(1 AS STRING)#xL]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS INT)#x as bigint) AS CAST(2 AS INT)#xL]
       +- Project [cast(2 as int) AS CAST(2 AS INT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1889,19 +1905,19 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as bigint) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS BIGINT)#xL as string) AS CAST(2 AS BIGINT)#x]
-      +- Project [cast(2 as bigint) AS CAST(2 AS BIGINT)#xL]
-         +- SubqueryAlias t
-            +- View (`t`, [1#x])
-               +- Project [cast(1#x as int) AS 1#x]
-                  +- Project [1 AS 1#x]
-                     +- OneRowRelation
+   :- Project [cast(CAST(1 AS STRING)#x as bigint) AS CAST(1 AS STRING)#xL]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2 as bigint) AS CAST(2 AS BIGINT)#xL]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
@@ -1909,13 +1925,14 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS FLOAT)#x as string) AS CAST(2 AS FLOAT)#x]
+   :- Project [cast(CAST(1 AS STRING)#x as double) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS FLOAT)#x as double) AS CAST(2 AS FLOAT)#x]
       +- Project [cast(2 as float) AS CAST(2 AS FLOAT)#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1929,19 +1946,19 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as double) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS DOUBLE)#x as string) AS CAST(2 AS DOUBLE)#x]
-      +- Project [cast(2 as double) AS CAST(2 AS DOUBLE)#x]
-         +- SubqueryAlias t
-            +- View (`t`, [1#x])
-               +- Project [cast(1#x as int) AS 1#x]
-                  +- Project [1 AS 1#x]
-                     +- OneRowRelation
+   :- Project [cast(CAST(1 AS STRING)#x as double) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2 as double) AS CAST(2 AS DOUBLE)#x]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
@@ -1949,13 +1966,14 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2 AS DECIMAL(10,0))#x as string) AS CAST(2 AS DECIMAL(10,0))#x]
+   :- Project [cast(CAST(1 AS STRING)#x as double) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(CAST(2 AS DECIMAL(10,0))#x as double) AS CAST(2 AS DECIMAL(10,0))#x]
       +- Project [cast(2 as decimal(10,0)) AS CAST(2 AS DECIMAL(10,0))#x]
          +- SubqueryAlias t
             +- View (`t`, [1#x])
@@ -1986,51 +2004,41 @@ Distinct
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
-  "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"BINARY\"",
-    "dataType2" : "\"STRING\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 71,
-    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t"
-  } ]
-}
+Distinct
++- Union false, false
+   :- Project [cast(CAST(1 AS STRING)#x as binary) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2 as binary) AS CAST(2 AS BINARY)#x]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
-  "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"BOOLEAN\"",
-    "dataType2" : "\"STRING\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 70,
-    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t"
-  } ]
-}
+Distinct
++- Union false, false
+   :- Project [cast(CAST(1 AS STRING)#x as boolean) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2 as boolean) AS CAST(2 AS BOOLEAN)#x]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
@@ -2038,19 +2046,19 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as tim
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)#x as string) AS CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)#x]
-      +- Project [cast(2017-12-11 09:30:00.0 as timestamp) AS CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)#x]
-         +- SubqueryAlias t
-            +- View (`t`, [1#x])
-               +- Project [cast(1#x as int) AS 1#x]
-                  +- Project [1 AS 1#x]
-                     +- OneRowRelation
+   :- Project [cast(CAST(1 AS STRING)#x as timestamp) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2017-12-11 09:30:00.0 as timestamp) AS CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)#x]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
@@ -2058,19 +2066,19 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date)
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
-   :  +- SubqueryAlias t
-   :     +- View (`t`, [1#x])
-   :        +- Project [cast(1#x as int) AS 1#x]
-   :           +- Project [1 AS 1#x]
-   :              +- OneRowRelation
-   +- Project [cast(CAST(2017-12-11 09:30:00 AS DATE)#x as string) AS CAST(2017-12-11 09:30:00 AS DATE)#x]
-      +- Project [cast(2017-12-11 09:30:00 as date) AS CAST(2017-12-11 09:30:00 AS DATE)#x]
-         +- SubqueryAlias t
-            +- View (`t`, [1#x])
-               +- Project [cast(1#x as int) AS 1#x]
-                  +- Project [1 AS 1#x]
-                     +- OneRowRelation
+   :- Project [cast(CAST(1 AS STRING)#x as date) AS CAST(1 AS STRING)#x]
+   :  +- Project [cast(1 as string) AS CAST(1 AS STRING)#x]
+   :     +- SubqueryAlias t
+   :        +- View (`t`, [1#x])
+   :           +- Project [cast(1#x as int) AS 1#x]
+   :              +- Project [1 AS 1#x]
+   :                 +- OneRowRelation
+   +- Project [cast(2017-12-11 09:30:00 as date) AS CAST(2017-12-11 09:30:00 AS DATE)#x]
+      +- SubqueryAlias t
+         +- View (`t`, [1#x])
+            +- Project [cast(1#x as int) AS 1#x]
+               +- Project [1 AS 1#x]
+                  +- OneRowRelation
 
 
 -- !query
@@ -2251,26 +2259,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
-  "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"STRING\"",
-    "dataType2" : "\"BINARY\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 71,
-    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t"
-  } ]
-}
+Distinct
++- Union false, false
+   :- Project [cast(1 as binary) AS CAST(1 AS BINARY)#x]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as binary) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -2545,26 +2548,21 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
-  "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"STRING\"",
-    "dataType2" : "\"BOOLEAN\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 70,
-    "fragment" : "SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t"
-  } ]
-}
+Distinct
++- Union false, false
+   :- Project [cast(1 as boolean) AS CAST(1 AS BOOLEAN)#x]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as boolean) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -2841,19 +2839,19 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP)#x as string) AS CAST(2017-12-12 09:30:00.0 AS TIMESTAMP)#x]
-   :  +- Project [cast(2017-12-12 09:30:00.0 as timestamp) AS CAST(2017-12-12 09:30:00.0 AS TIMESTAMP)#x]
-   :     +- SubqueryAlias t
-   :        +- View (`t`, [1#x])
-   :           +- Project [cast(1#x as int) AS 1#x]
-   :              +- Project [1 AS 1#x]
-   :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   :- Project [cast(2017-12-12 09:30:00.0 as timestamp) AS CAST(2017-12-12 09:30:00.0 AS TIMESTAMP)#x]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as timestamp) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
@@ -3125,19 +3123,19 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string)
 -- !query analysis
 Distinct
 +- Union false, false
-   :- Project [cast(CAST(2017-12-12 09:30:00 AS DATE)#x as string) AS CAST(2017-12-12 09:30:00 AS DATE)#x]
-   :  +- Project [cast(2017-12-12 09:30:00 as date) AS CAST(2017-12-12 09:30:00 AS DATE)#x]
-   :     +- SubqueryAlias t
-   :        +- View (`t`, [1#x])
-   :           +- Project [cast(1#x as int) AS 1#x]
-   :              +- Project [1 AS 1#x]
-   :                 +- OneRowRelation
-   +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
-      +- SubqueryAlias t
-         +- View (`t`, [1#x])
-            +- Project [cast(1#x as int) AS 1#x]
-               +- Project [1 AS 1#x]
-                  +- OneRowRelation
+   :- Project [cast(2017-12-12 09:30:00 as date) AS CAST(2017-12-12 09:30:00 AS DATE)#x]
+   :  +- SubqueryAlias t
+   :     +- View (`t`, [1#x])
+   :        +- Project [cast(1#x as int) AS 1#x]
+   :           +- Project [1 AS 1#x]
+   :              +- OneRowRelation
+   +- Project [cast(CAST(2 AS STRING)#x as date) AS CAST(2 AS STRING)#x]
+      +- Project [cast(2 as string) AS CAST(2 AS STRING)#x]
+         +- SubqueryAlias t
+            +- View (`t`, [1#x])
+               +- Project [cast(1#x as int) AS 1#x]
+                  +- Project [1 AS 1#x]
+                     +- OneRowRelation
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
index 170e7dff38ac3..d516b82508de5 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -301,13 +301,12 @@ SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary) DESC RANGE BET
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
-    "exprType" : "\"BINARY\"",
-    "location" : "upper",
-    "sqlExpr" : "\"RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING\""
+    "orderSpecType" : "\"BINARY\"",
+    "sqlExpr" : "\"(PARTITION BY 1 ORDER BY CAST(1 AS BINARY) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\"",
+    "valueBoundaryType" : "\"INT\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
index c5ee1742f5d7c..391eb371d0d54 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-cross-join.sql.out
@@ -95,7 +95,7 @@ Project [k#x, v1#x, k#x, v2#x]
 SELECT * FROM nt1 cross join nt2 where udf(nt1.v1) = "1" and udf(nt2.v2) = "22"
 -- !query analysis
 Project [k#x, v1#x, k#x, v2#x]
-+- Filter ((cast(udf(cast(v1#x as string)) as int) = cast(1 as int)) AND (cast(udf(cast(v2#x as string)) as int) = cast(22 as int)))
++- Filter ((cast(cast(udf(cast(v1#x as string)) as int) as bigint) = cast(1 as bigint)) AND (cast(cast(udf(cast(v2#x as string)) as int) as bigint) = cast(22 as bigint)))
    +- Join Cross
       :- SubqueryAlias nt1
       :  +- View (`nt1`, [k#x, v1#x])
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
index 5811a4ff6566c..e016a8e4fab1e 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-group-by.sql.out
@@ -619,25 +619,8 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT udf(every("true"))
 -- !query analysis
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"true\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"every(true)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 24,
-    "fragment" : "every(\"true\")"
-  } ]
-}
+Aggregate [cast(udf(cast(every(cast(true as boolean)) as string)) as boolean) AS udf(every(true))#x]
++- OneRowRelation
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inline-table.sql.out
index fb6130be5b6b4..786b5ac49b126 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-inline-table.sql.out
@@ -101,7 +101,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
@@ -143,7 +143,7 @@ org.apache.spark.sql.AnalysisException
 -- !query
 select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
 -- !query analysis
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
index a1436d0a77c83..df316707c7261 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udf/udf-union.sql.out
@@ -40,31 +40,32 @@ Project [cast(udf(cast(c1#x as string)) as int) AS c1#x, cast(udf(cast(c2#x as s
 
 -- !query
 SELECT udf(c1) as c1, udf(c2) as c2
-FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
+FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2)
 -- !query analysis
-Project [cast(udf(cast(c1#x as string)) as decimal(11,1)) AS c1#x, cast(udf(cast(c2#x as string)) as string) AS c2#x]
+Project [cast(udf(cast(c1#x as string)) as decimal(11,1)) AS c1#x, cast(udf(cast(c2#xL as string)) as bigint) AS c2#xL]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Union false, false
       :- Union false, false
-      :  :- Project [cast(c1#x as decimal(11,1)) AS c1#x, c2#x]
+      :  :- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
       :  :  +- Project [cast(udf(cast(c1#x as string)) as int) AS c1#x, cast(udf(cast(c2#x as string)) as string) AS c2#x]
-      :  :     +- SubqueryAlias t1
-      :  :        +- View (`t1`, [c1#x, c2#x])
-      :  :           +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as string) AS c2#x]
-      :  :              +- SubqueryAlias tbl
-      :  :                 +- LocalRelation [c1#x, c2#x]
-      :  +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as string) AS c2#x]
+      :  :     +- Filter (c2#x = a)
+      :  :        +- SubqueryAlias t1
+      :  :           +- View (`t1`, [c1#x, c2#x])
+      :  :              +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as string) AS c2#x]
+      :  :                 +- SubqueryAlias tbl
+      :  :                    +- LocalRelation [c1#x, c2#x]
+      :  +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
       :     +- Project [cast(udf(cast(c1#x as string)) as decimal(2,1)) AS c1#x, cast(udf(cast(c2#x as string)) as int) AS c2#x]
       :        +- SubqueryAlias t2
       :           +- View (`t2`, [c1#x, c2#x])
       :              +- Project [cast(c1#x as decimal(2,1)) AS c1#x, cast(c2#x as int) AS c2#x]
       :                 +- SubqueryAlias tbl
       :                    +- LocalRelation [c1#x, c2#x]
-      +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as string) AS c2#x]
+      +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
          +- Project [cast(udf(cast(c1#x as string)) as decimal(2,1)) AS c1#x, cast(udf(cast(c2#x as string)) as int) AS c2#x]
             +- SubqueryAlias t2
                +- View (`t2`, [c1#x, c2#x])
@@ -159,10 +160,10 @@ UNION ALL
 SELECT map(1, 2, 3, NULL), udf(1)
 -- !query analysis
 Union false, false
-:- Project [cast(map(1, 2)#x as map<int,int>) AS map(1, 2)#x, str#x]
+:- Project [cast(map(1, 2)#x as map<int,int>) AS map(1, 2)#x, cast(str#x as bigint) AS str#xL]
 :  +- Project [map(1, 2) AS map(1, 2)#x, cast(udf(cast(str as string)) as string) AS str#x]
 :     +- OneRowRelation
-+- Project [map(1, 2, 3, NULL)#x, cast(udf(1)#x as string) AS udf(1)#x]
++- Project [map(1, 2, 3, NULL)#x, cast(udf(1)#x as bigint) AS udf(1)#xL]
    +- Project [map(1, 2, 3, cast(null as int)) AS map(1, 2, 3, NULL)#x, cast(udf(cast(1 as string)) as int) AS udf(1)#x]
       +- OneRowRelation
 
@@ -173,10 +174,10 @@ UNION ALL
 SELECT array(1, 2, 3, NULL), udf(1)
 -- !query analysis
 Union false, false
-:- Project [cast(array(1, 2)#x as array<int>) AS array(1, 2)#x, str#x]
+:- Project [cast(array(1, 2)#x as array<int>) AS array(1, 2)#x, cast(str#x as bigint) AS str#xL]
 :  +- Project [array(1, 2) AS array(1, 2)#x, cast(udf(cast(str as string)) as string) AS str#x]
 :     +- OneRowRelation
-+- Project [array(1, 2, 3, NULL)#x, cast(udf(1)#x as string) AS udf(1)#x]
++- Project [array(1, 2, 3, NULL)#x, cast(udf(1)#x as bigint) AS udf(1)#xL]
    +- Project [array(1, 2, 3, cast(null as int)) AS array(1, 2, 3, NULL)#x, cast(udf(cast(1 as string)) as int) AS udf(1)#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
index 312a684b4f7ea..8f42ba0388767 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/udtf/udtf.sql.out
@@ -924,6 +924,27 @@ SELECT * FROM UDTFPartitionByIndexingBug(
 [Analyzer test output redacted due to nondeterminism]
 
 
+-- !query
+SELECT * FROM
+    InvalidEvalReturnsNoneToNonNullableColumnScalarType(TABLE(SELECT 1 AS X), unresolved_column)
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`unresolved_column`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 93,
+    "stopIndex" : 109,
+    "fragment" : "unresolved_column"
+  } ]
+}
+
+
 -- !query
 DROP VIEW t1
 -- !query analysis
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
index cafdd850e86d6..dfba3688f0b7d 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/union.sql.out
@@ -40,31 +40,32 @@ Project [c1#x, c2#x]
 
 -- !query
 SELECT *
-FROM   (SELECT * FROM t1
+FROM   (SELECT * FROM t1 where c1 = 1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
         SELECT * FROM t2)
 -- !query analysis
-Project [c1#x, c2#x]
+Project [c1#x, c2#xL]
 +- SubqueryAlias __auto_generated_subquery_name
    +- Union false, false
       :- Union false, false
-      :  :- Project [cast(c1#x as decimal(11,1)) AS c1#x, c2#x]
+      :  :- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
       :  :  +- Project [c1#x, c2#x]
-      :  :     +- SubqueryAlias t1
-      :  :        +- View (`t1`, [c1#x, c2#x])
-      :  :           +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as string) AS c2#x]
-      :  :              +- SubqueryAlias tbl
-      :  :                 +- LocalRelation [c1#x, c2#x]
-      :  +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as string) AS c2#x]
+      :  :     +- Filter (c1#x = 1)
+      :  :        +- SubqueryAlias t1
+      :  :           +- View (`t1`, [c1#x, c2#x])
+      :  :              +- Project [cast(c1#x as int) AS c1#x, cast(c2#x as string) AS c2#x]
+      :  :                 +- SubqueryAlias tbl
+      :  :                    +- LocalRelation [c1#x, c2#x]
+      :  +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
       :     +- Project [c1#x, c2#x]
       :        +- SubqueryAlias t2
       :           +- View (`t2`, [c1#x, c2#x])
       :              +- Project [cast(c1#x as decimal(2,1)) AS c1#x, cast(c2#x as int) AS c2#x]
       :                 +- SubqueryAlias tbl
       :                    +- LocalRelation [c1#x, c2#x]
-      +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as string) AS c2#x]
+      +- Project [cast(c1#x as decimal(11,1)) AS c1#x, cast(c2#x as bigint) AS c2#xL]
          +- Project [c1#x, c2#x]
             +- SubqueryAlias t2
                +- View (`t2`, [c1#x, c2#x])
@@ -159,10 +160,10 @@ UNION ALL
 SELECT map(1, 2, 3, NULL), 1
 -- !query analysis
 Union false, false
-:- Project [cast(map(1, 2)#x as map<int,int>) AS map(1, 2)#x, str#x]
+:- Project [cast(map(1, 2)#x as map<int,int>) AS map(1, 2)#x, cast(str#x as bigint) AS str#xL]
 :  +- Project [map(1, 2) AS map(1, 2)#x, str AS str#x]
 :     +- OneRowRelation
-+- Project [map(1, 2, 3, NULL)#x, cast(1#x as string) AS 1#x]
++- Project [map(1, 2, 3, NULL)#x, cast(1#x as bigint) AS 1#xL]
    +- Project [map(1, 2, 3, cast(null as int)) AS map(1, 2, 3, NULL)#x, 1 AS 1#x]
       +- OneRowRelation
 
@@ -173,10 +174,10 @@ UNION ALL
 SELECT array(1, 2, 3, NULL), 1
 -- !query analysis
 Union false, false
-:- Project [cast(array(1, 2)#x as array<int>) AS array(1, 2)#x, str#x]
+:- Project [cast(array(1, 2)#x as array<int>) AS array(1, 2)#x, cast(str#x as bigint) AS str#xL]
 :  +- Project [array(1, 2) AS array(1, 2)#x, str AS str#x]
 :     +- OneRowRelation
-+- Project [array(1, 2, 3, NULL)#x, cast(1#x as string) AS 1#x]
++- Project [array(1, 2, 3, NULL)#x, cast(1#x as bigint) AS 1#xL]
    +- Project [array(1, 2, 3, cast(null as int)) AS array(1, 2, 3, NULL)#x, 1 AS 1#x]
       +- OneRowRelation
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/url-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/url-functions.sql.out
index ca5984f932803..bdf90f6a0ed14 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/url-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/url-functions.sql.out
@@ -2,56 +2,56 @@
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'HOST')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, HOST, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, HOST)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, HOST, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, HOST)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PATH')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PATH, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PATH)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PATH, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PATH)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'QUERY')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, QUERY, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, QUERY)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, QUERY, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, QUERY)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'REF')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, REF, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, REF)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, REF, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, REF)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'PROTOCOL')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PROTOCOL, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PROTOCOL)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PROTOCOL, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, PROTOCOL)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'FILE')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, FILE, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, FILE)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, FILE, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, FILE)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'AUTHORITY')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, AUTHORITY, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, AUTHORITY)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, AUTHORITY, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, AUTHORITY)#x]
 +- OneRowRelation
 
 
 -- !query
 select parse_url('http://userinfo@spark.apache.org/path?query=1#Ref', 'USERINFO')
 -- !query analysis
-Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, USERINFO, false) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, USERINFO)#x]
+Project [parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, USERINFO, true) AS parse_url(http://userinfo@spark.apache.org/path?query=1#Ref, USERINFO)#x]
 +- OneRowRelation
 
 
@@ -79,28 +79,28 @@ Project [url_encode(cast(null as string)) AS url_encode(NULL)#x]
 -- !query
 select url_decode('https%3A%2F%2Fspark.apache.org')
 -- !query analysis
-Project [url_decode(https%3A%2F%2Fspark.apache.org) AS url_decode(https%3A%2F%2Fspark.apache.org)#x]
+Project [url_decode(https%3A%2F%2Fspark.apache.org, true) AS url_decode(https%3A%2F%2Fspark.apache.org)#x]
 +- OneRowRelation
 
 
 -- !query
 select url_decode('http%3A%2F%2spark.apache.org')
 -- !query analysis
-Project [url_decode(http%3A%2F%2spark.apache.org) AS url_decode(http%3A%2F%2spark.apache.org)#x]
+Project [url_decode(http%3A%2F%2spark.apache.org, true) AS url_decode(http%3A%2F%2spark.apache.org)#x]
 +- OneRowRelation
 
 
 -- !query
 select url_decode('inva lid://user:pass@host/file\\;param?query\\;p2')
 -- !query analysis
-Project [url_decode(inva lid://user:pass@host/file\;param?query\;p2) AS url_decode(inva lid://user:pass@host/file\;param?query\;p2)#x]
+Project [url_decode(inva lid://user:pass@host/file\;param?query\;p2, true) AS url_decode(inva lid://user:pass@host/file\;param?query\;p2)#x]
 +- OneRowRelation
 
 
 -- !query
 select url_decode(null)
 -- !query analysis
-Project [url_decode(cast(null as string)) AS url_decode(NULL)#x]
+Project [url_decode(cast(null as string), true) AS url_decode(NULL)#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/variant/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/variant/named-function-arguments.sql.out
new file mode 100644
index 0000000000000..571ec68048cc3
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/variant/named-function-arguments.sql.out
@@ -0,0 +1,48 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT * FROM variant_explode(input => parse_json('["hello", "world"]'))
+-- !query analysis
+Project [pos#x, key#x, value#x]
++- Generate variant_explode(parse_json(["hello", "world"], true)), false, [pos#x, key#x, value#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM variant_explode_outer(input => parse_json('{"a": true, "b": 3.14}'))
+-- !query analysis
+Project [pos#x, key#x, value#x]
++- Generate variant_explode(parse_json({"a": true, "b": 3.14}, true)), true, [pos#x, key#x, value#x]
+   +- OneRowRelation
+
+
+-- !query
+SELECT * FROM variant_explode(parse_json('["hello", "world"]')), variant_explode(parse_json('{"a": true, "b": 3.14}'))
+-- !query analysis
+Project [pos#x, key#x, value#x, pos#x, key#x, value#x]
++- Join Inner
+   :- Generate variant_explode(parse_json(["hello", "world"], true)), false, [pos#x, key#x, value#x]
+   :  +- OneRowRelation
+   +- Generate variant_explode(parse_json({"a": true, "b": 3.14}, true)), false, [pos#x, key#x, value#x]
+      +- OneRowRelation
+
+
+-- !query
+SELECT * FROM variant_explode(parse_json('{"a": ["hello", "world"], "b": {"x": true, "y": 3.14}}')) AS t, LATERAL variant_explode(t.value)
+-- !query analysis
+Project [pos#x, key#x, value#x, pos#x, key#x, value#x]
++- LateralJoin lateral-subquery#x [value#x], Inner
+   :  +- Generate variant_explode(outer(value#x)), false, [pos#x, key#x, value#x]
+   :     +- OneRowRelation
+   +- SubqueryAlias t
+      +- Generate variant_explode(parse_json({"a": ["hello", "world"], "b": {"x": true, "y": 3.14}}, true)), false, [pos#x, key#x, value#x]
+         +- OneRowRelation
+
+
+-- !query
+SELECT num, key, val, 'Spark' FROM variant_explode(parse_json('["hello", "world"]')) AS t(num, key, val)
+-- !query analysis
+Project [num#x, key#x, val#x, Spark AS Spark#x]
++- SubqueryAlias t
+   +- Project [pos#x AS num#x, key#x AS key#x, value#x AS val#x]
+      +- Generate variant_explode(parse_json(["hello", "world"], true)), false, [pos#x, key#x, value#x]
+         +- OneRowRelation
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
index 8c129534e7d03..a1a2b76006378 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/window.sql.out
@@ -268,11 +268,11 @@ SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY t
 RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
 ORDER BY cate, to_timestamp_ntz(val_timestamp)
 -- !query analysis
-Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) ASC NULLS FIRST], true
+Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) ASC NULLS FIRST], true
 +- Project [val_timestamp#x, cate#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 04' DAY TO HOUR FOLLOWING)#x]
    +- Project [val_timestamp#x, cate#x, _w0#x, _w1#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 04' DAY TO HOUR FOLLOWING)#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 04' DAY TO HOUR FOLLOWING)#x]
       +- Window [avg(_w0#x) windowspecdefinition(cate#x, _w1#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), INTERVAL '23 04' DAY TO HOUR)) AS avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '23 04' DAY TO HOUR FOLLOWING)#x], [cate#x], [_w1#x ASC NULLS FIRST]
-         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) AS _w1#x]
+         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) AS _w1#x]
             +- SubqueryAlias testdata
                +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
                   +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
@@ -304,11 +304,11 @@ SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY t
 RANGE BETWEEN CURRENT ROW AND interval '1-1' year to month FOLLOWING) FROM testData
 ORDER BY cate, to_timestamp_ntz(val_timestamp)
 -- !query analysis
-Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) ASC NULLS FIRST], true
+Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) ASC NULLS FIRST], true
 +- Project [val_timestamp#x, cate#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING)#x]
    +- Project [val_timestamp#x, cate#x, _w0#x, _w1#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING)#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING)#x]
       +- Window [avg(_w0#x) windowspecdefinition(cate#x, _w1#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), INTERVAL '1-1' YEAR TO MONTH)) AS avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1-1' YEAR TO MONTH FOLLOWING)#x], [cate#x], [_w1#x ASC NULLS FIRST]
-         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) AS _w1#x]
+         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) AS _w1#x]
             +- SubqueryAlias testdata
                +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
                   +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
@@ -340,11 +340,11 @@ SELECT val_timestamp, cate, avg(val_timestamp) OVER(PARTITION BY cate ORDER BY t
 RANGE BETWEEN CURRENT ROW AND interval '1 2:3:4.001' day to second FOLLOWING) FROM testData
 ORDER BY cate, to_timestamp_ntz(val_timestamp)
 -- !query analysis
-Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) ASC NULLS FIRST], true
+Sort [cate#x ASC NULLS FIRST, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) ASC NULLS FIRST], true
 +- Project [val_timestamp#x, cate#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)#x]
    +- Project [val_timestamp#x, cate#x, _w0#x, _w1#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)#x, avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)#x]
       +- Window [avg(_w0#x) windowspecdefinition(cate#x, _w1#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, currentrow$(), INTERVAL '1 02:03:04.001' DAY TO SECOND)) AS avg(val_timestamp) OVER (PARTITION BY cate ORDER BY to_timestamp_ntz(val_timestamp) ASC NULLS FIRST RANGE BETWEEN CURRENT ROW AND INTERVAL '1 02:03:04.001' DAY TO SECOND FOLLOWING)#x], [cate#x], [_w1#x ASC NULLS FIRST]
-         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), false) AS _w1#x]
+         +- Project [val_timestamp#x, cate#x, cast(val_timestamp#x as double) AS _w0#x, to_timestamp_ntz(val_timestamp#x, None, TimestampNTZType, Some(America/Los_Angeles), true) AS _w1#x]
             +- SubqueryAlias testdata
                +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
                   +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
@@ -995,6 +995,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1007,9 +1008,9 @@ WINDOW w AS (ORDER BY id)
 ORDER BY id
 -- !query analysis
 Sort [id#x ASC NULLS FIRST], true
-+- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x]
-   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, ... 7 more fields]
-      +- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 3)) AS lead_3#x, lag(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lag_0#x, lag(v#x, -1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag_1#x, lag(v#x, -2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS lag_2#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_3#x, nth_value(v#x, 1, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_1#x, nth_value(v#x, 2, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_2#x, nth_value(v#x, 3, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_3#x, first(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, any_value(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, last(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x], [id#x ASC NULLS FIRST]
++- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x]
+   +- Project [content#x, id#x, v#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, lag_2#x, lag_3#x, lag_plus_3#x, nth_value_1#x, nth_value_2#x, nth_value_3#x, first_value#x, any_value#x, last_value#x, lead_0#x, lead_1#x, lead_2#x, lead_3#x, lag_0#x, lag_1#x, ... 9 more fields]
+      +- Window [lead(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lead_0#x, lead(v#x, 1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 1, 1)) AS lead_1#x, lead(v#x, 2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 2, 2)) AS lead_2#x, lead(v#x, 3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 3, 3)) AS lead_3#x, lag(v#x, 0, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, 0, 0)) AS lag_0#x, lag(v#x, -1, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -1, -1)) AS lag_1#x, lag(v#x, -2, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -2, -2)) AS lag_2#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_3#x, lag(v#x, -3, null) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RowFrame, -3, -3)) AS lag_plus_3#x, nth_value(v#x, 1, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_1#x, nth_value(v#x, 2, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_2#x, nth_value(v#x, 3, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS nth_value_3#x, first(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS first_value#x, any_value(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS any_value#x, last(v#x, true) windowspecdefinition(id#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, unboundedpreceding$(), currentrow$())) AS last_value#x], [id#x ASC NULLS FIRST]
          +- Project [content#x, id#x, v#x]
             +- SubqueryAlias test_ignore_null
                +- View (`test_ignore_null`, [content#x, id#x, v#x])
@@ -1259,8 +1260,10 @@ FROM
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
   "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
     "windowName" : "w"
   }
 }
@@ -1274,8 +1277,10 @@ FROM
 -- !query analysis
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
   "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
     "windowName" : "w"
   }
 }
@@ -1410,3 +1415,63 @@ Project [cate#x, val#x, r#x]
                            +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
                               +- SubqueryAlias testData
                                  +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '5' DAY PRECEDING) AS mean FROM testData
+-- !query analysis
+Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x, mean#x]
++- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x, mean#x, mean#x]
+   +- Window [mean(val_double#x) windowspecdefinition(val#x, val_date#x ASC NULLS FIRST, specifiedwindowframe(RangeFrame, -INTERVAL '5' DAY, currentrow$())) AS mean#x], [val#x], [val_date#x ASC NULLS FIRST]
+      +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+         +- SubqueryAlias testdata
+            +- View (`testData`, [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x])
+               +- Project [cast(val#x as int) AS val#x, cast(val_long#xL as bigint) AS val_long#xL, cast(val_double#x as double) AS val_double#x, cast(val_date#x as date) AS val_date#x, cast(val_timestamp#x as timestamp) AS val_timestamp#x, cast(cate#x as string) AS cate#x]
+                  +- Project [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+                     +- SubqueryAlias testData
+                        +- LocalRelation [val#x, val_long#xL, val_double#x, val_date#x, val_timestamp#x, cate#x]
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '1 2:3:4.001' DAY TO SECOND PRECEDING) AS mean FROM testData
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"DATE\"",
+    "sqlExpr" : "\"(PARTITION BY val ORDER BY val_date ASC NULLS FIRST RANGE BETWEEN INTERVAL '1 02:03:04.001' DAY TO SECOND PRECEDING AND CURRENT ROW)\"",
+    "valueBoundaryType" : "\"INTERVAL DAY TO SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 33,
+    "stopIndex" : 121,
+    "fragment" : "(partition BY val ORDER BY val_date RANGE INTERVAL '1 2:3:4.001' DAY TO SECOND PRECEDING)"
+  } ]
+}
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE DATE '2024-01-01' FOLLOWING) AS mean FROM testData
+-- !query analysis
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "exprType" : "\"DATE\"",
+    "location" : "lower",
+    "sqlExpr" : "\"RANGE BETWEEN DATE '2024-01-01' FOLLOWING AND CURRENT ROW\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 33,
+    "stopIndex" : 102,
+    "fragment" : "(partition BY val ORDER BY val_date RANGE DATE '2024-01-01' FOLLOWING)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
index de9fb2f395210..e0a249e4cf3fe 100644
--- a/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/analyzer-results/xml-functions.sql.out
@@ -9,7 +9,7 @@ Project [to_xml((indent,), named_struct(a, 1, b, 2), Some(America/Los_Angeles))
 -- !query
 select to_xml(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy', 'indent', ''))
 -- !query analysis
-Project [to_xml((timestampFormat,dd/MM/yyyy), (indent,), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), false)), Some(America/Los_Angeles)) AS to_xml(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
+Project [to_xml((timestampFormat,dd/MM/yyyy), (indent,), named_struct(time, to_timestamp(2015-08-26, Some(yyyy-MM-dd), TimestampType, Some(America/Los_Angeles), true)), Some(America/Los_Angeles)) AS to_xml(named_struct(time, to_timestamp(2015-08-26, yyyy-MM-dd)))#x]
 +- OneRowRelation
 
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
deleted file mode 100644
index c447511ba6055..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/decimalArithmeticOperations.sql
+++ /dev/null
@@ -1,41 +0,0 @@
--- SPARK-23179: SQL ANSI 2011 states that in case of overflow during arithmetic operations,
--- an exception should be thrown instead of returning NULL.
--- This is what most of the SQL DBs do (e.g. SQLServer, DB2).
-
--- tests for decimals handling in operations
-create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet;
-
-insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
-  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789);
-
--- test operations between decimals and constants
-select id, a*10, b/10 from decimals_test order by id;
-
--- test operations on constants
-select 10.3 * 3.0;
-select 10.3000 * 3.0;
-select 10.30000 * 30.0;
-select 10.300000000000000000 * 3.000000000000000000;
-select 10.300000000000000000 * 3.0000000000000000000;
-
--- arithmetic operations causing an overflow throw exception
-select (5e36BD + 0.1) + 5e36BD;
-select (-4e36BD - 0.1) - 7e36BD;
-select 12345678901234567890.0 * 12345678901234567890.0;
-select 1e35BD / 0.1;
-
--- arithmetic operations causing a precision loss throw exception
-select 123456789123456789.1234567890 * 1.123456789123456789;
-select 123456789123456789.1234567890 * 1.123456789123456789;
-select 12345678912345.123456789123 / 0.000000012345678;
-
-select 1.0123456789012345678901234567890123456e36BD / 0.1;
-select 1.0123456789012345678901234567890123456e35BD / 1.0;
-select 1.0123456789012345678901234567890123456e34BD / 1.0;
-select 1.0123456789012345678901234567890123456e33BD / 1.0;
-select 1.0123456789012345678901234567890123456e32BD / 1.0;
-select 1.0123456789012345678901234567890123456e31BD / 1.0;
-select 1.0123456789012345678901234567890123456e31BD / 0.1;
-select 1.0123456789012345678901234567890123456e31BD / 10.0;
-
-drop table decimals_test;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql b/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql
deleted file mode 100644
index b8ff8cdb81376..0000000000000
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-disabled.sql
+++ /dev/null
@@ -1,2 +0,0 @@
---SET spark.sql.ansi.doubleQuotedIdentifiers=false
---IMPORT double-quoted-identifiers.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
index 8117dec53f4ab..be038e1083cd8 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/charvarchar.sql
@@ -49,6 +49,8 @@ desc formatted char_tbl1;
 create table char_part(c1 char(5), c2 char(2), v1 varchar(6), v2 varchar(2)) using parquet partitioned by (v2, c2);
 desc formatted char_part;
 
+alter table char_part change column c1 comment 'char comment';
+alter table char_part change column v1 comment 'varchar comment';
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1';
 desc formatted char_part;
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
index c0262a0f0ad14..b4d33bb0196c9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql
@@ -49,6 +49,13 @@ select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), (
 select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate utf8_lcase from values ('aaa'), ('bbb');
 select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate utf8_lcase from values ('aaa'), ('bbb');
 
+-- set operations with conflicting collations
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb');
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb');
+
 -- create table with struct field
 create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET;
 
@@ -90,3 +97,349 @@ select 'a' collate en_ci_ai = 'Å';
 select 'Kypper' collate sv < 'Köpfe';
 select 'Kypper' collate de > 'Köpfe';
 select 'I' collate tr_ci = 'ı';
+
+-- create table for str_to_map
+create table t4 (text string collate utf8_binary, pairDelim string collate utf8_lcase, keyValueDelim string collate utf8_binary) using parquet;
+
+insert into t4 values('a:1,b:2,c:3', ',', ':');
+
+select str_to_map(text, pairDelim, keyValueDelim) from t4;
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_lcase, keyValueDelim collate utf8_binary) from t4;
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_binary, keyValueDelim collate utf8_binary) from t4;
+select str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai) from t4;
+
+drop table t4;
+
+create table t5(s string, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t5 values ('Spark', 'Spark', 'SQL');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaAAaA');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaA');
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaAaaAaaAaAaaAaaAaA');
+insert into t5 values ('bbAbaAbA', 'bbAbAAbA', 'a');
+insert into t5 values ('İo', 'İo', 'İo');
+insert into t5 values ('İo', 'İo', 'İo ');
+insert into t5 values ('İo', 'İo ', 'İo');
+insert into t5 values ('İo', 'İo', 'i̇o');
+insert into t5 values ('efd2', 'efd2', 'efd2');
+insert into t5 values ('Hello, world! Nice day.', 'Hello, world! Nice day.', 'Hello, world! Nice day.');
+insert into t5 values ('Something else. Nothing here.', 'Something else. Nothing here.', 'Something else. Nothing here.');
+insert into t5 values ('kitten', 'kitten', 'sitTing');
+insert into t5 values ('abc', 'abc', 'abc');
+insert into t5 values ('abcdcba', 'abcdcba', 'aBcDCbA');
+
+create table t6(ascii long) using parquet;
+insert into t6 values (97);
+insert into t6 values (66);
+
+create table t7(ascii double) using parquet;
+insert into t7 values (97.52143);
+insert into t7 values (66.421);
+
+create table t8(format string collate utf8_binary, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t8 values ('%s%s', 'abCdE', 'abCdE');
+
+create table t9(num long) using parquet;
+insert into t9 values (97);
+insert into t9 values (66);
+
+create table t10(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t10 values ('aaAaAAaA', 'aaAaaAaA');
+insert into t10 values ('efd2', 'efd2');
+
+-- ConcatWs
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5;
+select concat_ws(' ', utf8_binary, utf8_lcase) from t5;
+select concat_ws(' ' collate utf8_binary, utf8_binary, 'SQL' collate utf8_lcase) from t5;
+select concat_ws(' ' collate utf8_lcase, utf8_binary, 'SQL' collate utf8_lcase) from t5;
+select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word') from t5;
+select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',', utf8_binary, 'word' collate utf8_lcase) from t5;
+
+-- Elt
+select elt(2, s, utf8_binary) from t5;
+select elt(2, utf8_binary, utf8_lcase, s) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_lcase) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase) from t5;
+select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5;
+select elt(1, utf8_binary, 'word' collate utf8_lcase), elt(1, utf8_lcase, 'word' collate utf8_binary) from t5;
+
+-- SplitPart
+select split_part(utf8_binary, utf8_lcase, 3) from t5;
+select split_part(s, utf8_binary, 1) from t5;
+select split_part(utf8_binary collate utf8_binary, s collate utf8_lcase, 1) from t5;
+select split_part(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select split_part(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5;
+select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t5;
+select split_part(utf8_binary, 'a' collate utf8_lcase, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5;
+select split_part(utf8_binary, 'a ' collate utf8_lcase_rtrim, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5;
+
+-- Contains
+select contains(utf8_binary, utf8_lcase) from t5;
+select contains(s, utf8_binary) from t5;
+select contains(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select contains(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select contains(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select contains(utf8_binary, 'a'), contains(utf8_lcase, 'a') from t5;
+select contains(utf8_binary, 'AaAA' collate utf8_lcase), contains(utf8_lcase, 'AAa' collate utf8_binary) from t5;
+select contains(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim), contains(utf8_lcase, 'AAa ' collate utf8_binary_rtrim) from t5;
+
+-- SubstringIndex
+select substring_index(utf8_binary, utf8_lcase, 2) from t5;
+select substring_index(s, utf8_binary,1) from t5;
+select substring_index(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5;
+select substring_index(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select substring_index(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5;
+select substring_index(utf8_binary, 'a', 2), substring_index(utf8_lcase, 'a', 2) from t5;
+select substring_index(utf8_binary, 'AaAA' collate utf8_lcase, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+select substring_index(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+
+-- StringInStr
+select instr(utf8_binary, utf8_lcase) from t5;
+select instr(s, utf8_binary) from t5;
+select instr(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select instr(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select instr(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select instr(utf8_binary, 'a'), instr(utf8_lcase, 'a') from t5;
+select instr(utf8_binary, 'AaAA' collate utf8_lcase), instr(utf8_lcase, 'AAa' collate utf8_binary) from t5;
+
+-- FindInSet
+select find_in_set(utf8_binary, utf8_lcase) from t5;
+select find_in_set(s, utf8_binary) from t5;
+select find_in_set(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select find_in_set(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select find_in_set(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o'), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o') from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o' collate utf8_lcase), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5;
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o ' collate utf8_lcase_rtrim), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5;
+-- StartsWith
+select startswith(utf8_binary, utf8_lcase) from t5;
+select startswith(s, utf8_binary) from t5;
+select startswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select startswith(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select startswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select startswith(utf8_binary, 'aaAaaAaA'), startswith(utf8_lcase, 'aaAaaAaA') from t5;
+select startswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+select startswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+
+-- StringTranslate
+select translate(utf8_lcase, utf8_lcase, '12345') from t5;
+select translate(utf8_binary, utf8_lcase, '12345') from t5;
+select translate(utf8_binary, 'aBc' collate utf8_lcase, '12345' collate utf8_binary) from t5;
+select translate(utf8_binary, 'SQL' collate utf8_lcase, '12345' collate utf8_lcase) from t5;
+select translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai) from t5;
+select translate(utf8_lcase, 'aaAaaAaA', '12345'), translate(utf8_binary, 'aaAaaAaA', '12345') from t5;
+select translate(utf8_lcase, 'aBc' collate utf8_binary, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5;
+select translate(utf8_lcase, 'aBc ' collate utf8_binary_rtrim, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5;
+
+-- Replace
+select replace(utf8_binary, utf8_lcase, 'abc') from t5;
+select replace(s, utf8_binary, 'abc') from t5;
+select replace(utf8_binary collate utf8_binary, s collate utf8_lcase, 'abc') from t5;
+select replace(utf8_binary, utf8_lcase collate utf8_binary, 'abc') from t5;
+select replace(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 'abc') from t5;
+select replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA', 'abc'), replace(utf8_lcase, 'aaAaaAaA', 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA' collate utf8_lcase, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5;
+select replace(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5;
+
+-- EndsWith
+select endswith(utf8_binary, utf8_lcase) from t5;
+select endswith(s, utf8_binary) from t5;
+select endswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select endswith(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select endswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select endswith(utf8_binary, 'aaAaaAaA'), endswith(utf8_lcase, 'aaAaaAaA') from t5;
+select endswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+select endswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5;
+
+-- StringRepeat
+select repeat(utf8_binary, 3), repeat(utf8_lcase, 2) from t5;
+select repeat(utf8_binary collate utf8_lcase, 3), repeat(utf8_lcase collate utf8_binary, 2) from t5;
+
+-- Ascii & UnBase64 string expressions
+select ascii(utf8_binary), ascii(utf8_lcase) from t5;
+select ascii(utf8_binary collate utf8_lcase), ascii(utf8_lcase collate utf8_binary) from t5;
+select unbase64(utf8_binary), unbase64(utf8_lcase) from t10;
+select unbase64(utf8_binary collate utf8_lcase), unbase64(utf8_lcase collate utf8_binary) from t10;
+
+-- Chr
+select chr(ascii) from t6;
+
+-- Base64, Decode
+select base64(utf8_binary), base64(utf8_lcase) from t5;
+select base64(utf8_binary collate utf8_lcase), base64(utf8_lcase collate utf8_binary) from t5;
+select decode(encode(utf8_binary, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase, 'utf-8'), 'utf-8') from t5;
+select decode(encode(utf8_binary collate utf8_lcase, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase collate utf8_binary, 'utf-8'), 'utf-8') from t5;
+
+-- FormatNumber
+select format_number(ascii, '###.###') from t7;
+select format_number(ascii, '###.###' collate utf8_lcase) from t7;
+
+-- Encode, ToBinary
+select encode(utf8_binary, 'utf-8'), encode(utf8_lcase, 'utf-8') from t5;
+select encode(utf8_binary collate utf8_lcase, 'utf-8'), encode(utf8_lcase collate utf8_binary, 'utf-8') from t5;
+select to_binary(utf8_binary, 'utf-8'), to_binary(utf8_lcase, 'utf-8') from t5;
+select to_binary(utf8_binary collate utf8_lcase, 'utf-8'), to_binary(utf8_lcase collate utf8_binary, 'utf-8') from t5;
+
+-- Sentences
+select sentences(utf8_binary), sentences(utf8_lcase) from t5;
+select sentences(utf8_binary collate utf8_lcase), sentences(utf8_lcase collate utf8_binary) from t5;
+
+-- Upper
+select upper(utf8_binary), upper(utf8_lcase) from t5;
+select upper(utf8_binary collate utf8_lcase), upper(utf8_lcase collate utf8_binary) from t5;
+
+-- Lower
+select lower(utf8_binary), lower(utf8_lcase) from t5;
+select lower(utf8_binary collate utf8_lcase), lower(utf8_lcase collate utf8_binary) from t5;
+
+-- InitCap
+select initcap(utf8_binary), initcap(utf8_lcase) from t5;
+select initcap(utf8_binary collate utf8_lcase), initcap(utf8_lcase collate utf8_binary) from t5;
+
+-- Overlay
+select overlay(utf8_binary, utf8_lcase, 2) from t5;
+select overlay(s, utf8_binary,1) from t5;
+select overlay(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5;
+select overlay(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5;
+select overlay(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5;
+select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5;
+select overlay(utf8_binary, 'AaAA' collate utf8_lcase, 2), overlay(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5;
+
+-- FormatString
+select format_string(format, utf8_binary, utf8_lcase) from t8;
+select format_string(format collate utf8_lcase, utf8_lcase, utf8_binary collate utf8_lcase, 3), format_string(format, utf8_lcase collate utf8_binary, utf8_binary) from t8;
+select format_string(format, utf8_binary, utf8_lcase) from t8;
+
+-- SoundEx
+select soundex(utf8_binary), soundex(utf8_lcase) from t5;
+select soundex(utf8_binary collate utf8_lcase), soundex(utf8_lcase collate utf8_binary) from t5;
+
+-- Length, BitLength & OctetLength
+select length(utf8_binary), length(utf8_lcase) from t5;
+select length(utf8_binary collate utf8_lcase), length(utf8_lcase collate utf8_binary) from t5;
+select bit_length(utf8_binary), bit_length(utf8_lcase) from t5;
+select bit_length(utf8_binary collate utf8_lcase), bit_length(utf8_lcase collate utf8_binary) from t5;
+select octet_length(utf8_binary), octet_length(utf8_lcase) from t5;
+select octet_length(utf8_binary collate utf8_lcase), octet_length(utf8_lcase collate utf8_binary) from t5;
+
+-- Luhncheck
+select luhn_check(num) from t9;
+
+-- Levenshtein
+select levenshtein(utf8_binary, utf8_lcase) from t5;
+select levenshtein(s, utf8_binary) from t5;
+select levenshtein(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select levenshtein(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select levenshtein(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select levenshtein(utf8_binary, 'a'), levenshtein(utf8_lcase, 'a') from t5;
+select levenshtein(utf8_binary, 'AaAA' collate utf8_lcase, 3), levenshtein(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5;
+
+-- IsValidUTF8
+select is_valid_utf8(utf8_binary), is_valid_utf8(utf8_lcase) from t5;
+select is_valid_utf8(utf8_binary collate utf8_lcase), is_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+
+-- MakeValidUTF8
+select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5;
+select make_valid_utf8(utf8_binary collate utf8_lcase), make_valid_utf8(utf8_lcase collate utf8_binary) from t5;
+
+-- ValidateUTF8
+select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5;
+select validate_utf8(utf8_binary collate utf8_lcase), validate_utf8(utf8_lcase collate utf8_binary) from t5;
+
+-- TryValidateUTF8
+select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5;
+select try_validate_utf8(utf8_binary collate utf8_lcase), try_validate_utf8(utf8_lcase collate utf8_binary) from t5;
+
+-- Left/Right/Substr
+select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5;
+select substr(utf8_binary collate utf8_lcase, 2, 2), substr(utf8_lcase collate utf8_binary, 2, 2) from t5;
+select right(utf8_binary, 2), right(utf8_lcase, 2) from t5;
+select right(utf8_binary collate utf8_lcase, 2), right(utf8_lcase collate utf8_binary, 2) from t5;
+select left(utf8_binary, '2' collate utf8_lcase), left(utf8_lcase, 2) from t5;
+select left(utf8_binary collate utf8_lcase, 2), left(utf8_lcase collate utf8_binary, 2) from t5;
+
+-- StringRPad
+select rpad(utf8_binary, 8, utf8_lcase) from t5;
+select rpad(s, 8, utf8_binary) from t5;
+select rpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5;
+select rpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5;
+select rpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5;
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5;
+select rpad(utf8_binary, 8, 'a'), rpad(utf8_lcase, 8, 'a') from t5;
+select rpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), rpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5;
+
+-- StringLPad
+select lpad(utf8_binary, 8, utf8_lcase) from t5;
+select lpad(s, 8, utf8_binary) from t5;
+select lpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5;
+select lpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5;
+select lpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5;
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5;
+select lpad(utf8_binary, 8, 'a'), lpad(utf8_lcase, 8, 'a') from t5;
+select lpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), lpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5;
+
+-- Locate
+select locate(utf8_binary, utf8_lcase) from t5;
+select locate(s, utf8_binary) from t5;
+select locate(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select locate(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select locate(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 3) from t5;
+select locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3) from t5;
+select locate(utf8_binary, 'a'), locate(utf8_lcase, 'a') from t5;
+select locate(utf8_binary, 'AaAA' collate utf8_lcase, 4), locate(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5;
+select locate(utf8_binary, 'AaAA ' collate utf8_binary_rtrim, 4), locate(utf8_lcase, 'AAa ' collate utf8_binary, 4) from t5;
+
+-- StringTrim
+select TRIM(utf8_binary, utf8_lcase) from t5;
+select TRIM(s, utf8_binary) from t5;
+select TRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select TRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select TRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select TRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select TRIM('ABc', utf8_binary), TRIM('ABc', utf8_lcase) from t5;
+select TRIM('ABc' collate utf8_lcase, utf8_binary), TRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimBoth
+select BTRIM(utf8_binary, utf8_lcase) from t5;
+select BTRIM(s, utf8_binary) from t5;
+select BTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select BTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select BTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select BTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select BTRIM('ABc', utf8_binary), BTRIM('ABc', utf8_lcase) from t5;
+select BTRIM('ABc' collate utf8_lcase, utf8_binary), BTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimLeft
+select LTRIM(utf8_binary, utf8_lcase) from t5;
+select LTRIM(s, utf8_binary) from t5;
+select LTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select LTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select LTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select LTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select LTRIM('ABc', utf8_binary), LTRIM('ABc', utf8_lcase) from t5;
+select LTRIM('ABc' collate utf8_lcase, utf8_binary), LTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+-- StringTrimRight
+select RTRIM(utf8_binary, utf8_lcase) from t5;
+select RTRIM(s, utf8_binary) from t5;
+select RTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5;
+select RTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5;
+select RTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5;
+select RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5;
+select RTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5;
+select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5;
+select RTRIM('ABc' collate utf8_lcase, utf8_binary), RTRIM('AAa' collate utf8_binary, utf8_lcase) from t5;
+
+drop table t5;
+drop table t6;
+drop table t7;
+drop table t8;
+drop table t9;
+drop table t10;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/conditional-functions.sql
similarity index 67%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql
rename to sql/core/src/test/resources/sql-tests/inputs/conditional-functions.sql
index e7835619f583a..c7a4b055f024e 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/ansi/conditional-functions.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/conditional-functions.sql
@@ -18,4 +18,21 @@ SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t;
 SELECT case when 1 < 2 then 1 else 1/0 end;
 SELECT case when 1 > 2 then 1/0 else 1 end;
 
+SELECT nullifzero(0),
+  nullifzero(cast(0 as tinyint)),
+  nullifzero(cast(0 as bigint)),
+  nullifzero('0'),
+  nullifzero(0.0),
+  nullifzero(1),
+  nullifzero(null);
+
+SELECT nullifzero('abc');
+
+SELECT zeroifnull(null),
+  zeroifnull(1),
+  zeroifnull(cast(1 as tinyint)),
+  zeroifnull(cast(1 as bigint));
+
+SELECT zeroifnull('abc');
+
 DROP TABLE conditional_t;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
index 0775b9780332c..13bbd9d81b799 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/decimalArithmeticOperations.sql
@@ -88,7 +88,7 @@ SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1));
 set spark.sql.decimalOperations.allowPrecisionLoss=false;
 
 -- test decimal operations
-select id, a+b, a-b, a*b, a/b from decimals_test order by id;
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id;
 
 -- test operations between decimals and constants
 select id, a*10, b/10 from decimals_test order by id;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-enabled.sql b/sql/core/src/test/resources/sql-tests/inputs/double-quoted-identifiers-enabled.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/double-quoted-identifiers-enabled.sql
rename to sql/core/src/test/resources/sql-tests/inputs/double-quoted-identifiers-enabled.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql
index 4dc6d3d0189cc..451f745a97ee6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by-all-mosha.sql
@@ -12,7 +12,7 @@ SELECT i + 1 AS i1, COUNT(i - 2) ci, f / i AS fi, SUM(i + f) sif FROM stuff GROU
 
 SELECT i AS i, COUNT(i) ci, f AS f, SUM(i + f) sif FROM stuff GROUP BY ALL ORDER BY 1, i, 2, ci, 3, f, 4, sif;
 
-SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), d / 2, size(a) FROM stuff
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), octet_length(d), size(a) FROM stuff
 GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2;
 
 -- unlike Mosha, I'm failing this case because IMO it is too implicit to automatically group by i.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
index 7d6116ac1e3f1..6dd0adbc87221 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/group-by.sql
@@ -274,3 +274,67 @@ FROM VALUES
 GROUP BY col1
 ORDER BY col1
 ;
+
+-- SC-170296: Verify that group by works when MapType is inside complex type for column type
+-- ARRAY<MAP<INT,INT>>
+SELECT count(*)
+FROM VALUES (ARRAY(MAP(1, 2, 2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3, 1, 2), MAP(1, 3))) as t(a)
+GROUP BY a;
+
+-- STRUCT<b:MAP<INT,INT>>
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a;
+
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a.b;
+
+-- STRUCT<b:ARRAY<MAP<INT,INT>>>
+SELECT count(*)
+FROM VALUES (named_struct('b', array(map(1, 2, 2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3, 1, 2), map(1, 3)))) as t(a)
+GROUP BY a;
+
+-- ARRAY<STRUCT<b:MAP<INT,INT>>>
+SELECT count(*)
+FROM VALUES (ARRAY(named_struct('b', map(1, 2, 2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3, 1, 2)), named_struct('b', map(1, 3)))) as t(a)
+group BY a;
+
+-- MAP<ARRAY<INT>,INT>
+SELECT count(*)
+FROM VALUES (map(array(1, 2), 2, array(2, 3), 3)), (map(array(1, 3), 3)), (map(array(2, 3), 3, array(1, 2), 2)) as t(a)
+group BY a;
+
+SELECT count(*)
+FROM VALUES (map(array(1, 2, 3), 3)), (map(array(3, 2, 1), 3)) as t(a)
+group BY a;
+
+-- ARRAY<MAP<ARRAY<INT>,INT>>
+SELECT count(*)
+FROM VALUES (ARRAY(map(array(1, 2), 2, array(2, 3), 3))), (ARRAY(MAP(ARRAY(1, 3), 3))), (ARRAY(map(array(2, 3), 3, array(1, 2), 2))) as t(a)
+group BY a;
+
+-- MAP<STRUCT<b:INT>,INT>
+SELECT count(*)
+FROM VALUES (map(named_struct('b', 1), 2, named_struct('b', 2), 3)), (map(named_struct('b', 1), 3)), (map(named_struct('b', 2), 3, named_struct('b', 1), 2)) as t(a)
+group BY a;
+
+-- STRUCT<b:MAP<STRUCT<c:INT>,INT>>
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a;
+
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a.b;
+
+-- Map valueType contains MapType (possibly nested)
+-- MAP<INT, MAP<INT,INT>>
+SELECT count(*)
+FROM VALUES (Map(1, Map(1,2), 2, Map(2, 3, 1, 2))), (Map(2, Map(1, 2, 2,3), 1, Map(1, 2))), (Map(1, Map(1,2), 2, Map(2, 4))) as t(a)
+GROUP BY a;
+
+-- MAP<INT, ARRAY<MAP<INT,INT>>>
+SELECT count(*)
+FROM VALUES (Map(1, Array(Map(1,2)), 2, Array(Map(2, 3, 1, 2)))), (Map(2, Array(Map(1, 2, 2,3)), 1, Array(Map(1, 2)))), (Map(1, Array(Map(1,2)), 2, Array(Map(2, 4)))) as t(a)
+GROUP BY a;
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
index 8bff1f109aa65..e3cef9207d20f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-lateral.sql
@@ -531,6 +531,27 @@ select * from t1 join lateral
    (select t4.c1 as t from t4 where t1.c1 = t4.c1)) as foo
    order by foo.t limit 5);
 
+
+select 1
+from t1 as t_outer
+left join
+ lateral(
+     select b1,b2
+     from
+     (
+         select
+             t2.c1 as b1,
+             1 as b2
+         from t2
+         union
+         select t_outer.c1 as b1,
+                null as b2
+     ) as t_inner
+     where (t_inner.b1 < t_outer.c2  or t_inner.b1 is null)
+      and  t_inner.b1 = t_outer.c1
+     order by t_inner.b1,t_inner.b2 desc limit 1
+ ) as lateral_table;
+
 -- clean up
 DROP VIEW t1;
 DROP VIEW t2;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/keywords.sql b/sql/core/src/test/resources/sql-tests/inputs/keywords-enforced.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/keywords.sql
rename to sql/core/src/test/resources/sql-tests/inputs/keywords-enforced.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql b/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql
index 99f33d7815255..a795a19828c13 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/named-function-arguments.sql
@@ -32,6 +32,20 @@ SELECT * FROM explode(array(1, 2)), explode(array(3, 4));
 SELECT * FROM explode(array(1, 2)) AS t, LATERAL explode(array(3 * t.col, 4 * t.col));
 SELECT num, val, 'Spark' FROM explode(map(1, 'a', 2, 'b')) AS t(num, val);
 
+-- Test for tabled value functions posexplode and posexplode_outer
+SELECT * FROM posexplode(collection => array(1, 2));
+SELECT * FROM posexplode_outer(collection => map('a', 1, 'b', 2));
+SELECT * FROM posexplode(array(1, 2)), posexplode(array(3, 4));
+SELECT * FROM posexplode(array(1, 2)) AS t, LATERAL posexplode(array(3 * t.col, 4 * t.col));
+SELECT pos, num, val, 'Spark' FROM posexplode(map(1, 'a', 2, 'b')) AS t(pos, num, val);
+
+-- Test for tabled value functions inline and inline_outer
+SELECT * FROM inline(input => array(struct(1, 'a'), struct(2, 'b')));
+SELECT * FROM inline_outer(input => array(struct(1, 'a'), struct(2, 'b')));
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))), inline(array(struct(3, 'c'), struct(4, 'd')));
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t, LATERAL inline(array(struct(3 * t.col1, 4 * t.col1)));
+SELECT num, val, 'Spark' FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t(num, val);
+
 -- Test for wrapped EXPLODE call to check error preservation
 SELECT * FROM explode(collection => explode(array(1)));
 SELECT * FROM explode(collection => explode(collection => array(1)));
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/array.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/array.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/array.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/cast.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/cast.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/cast.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/conditional-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/conditional-functions.sql
new file mode 100644
index 0000000000000..c4b3418cc6d89
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/conditional-functions.sql
@@ -0,0 +1 @@
+--IMPORT conditional-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/date.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/date.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/date.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/date.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/datetime-parsing-invalid.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-parsing-invalid.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/datetime-parsing-invalid.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-special.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/datetime-special.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/datetime-special.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/datetime-special.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/decimalArithmeticOperations.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/decimalArithmeticOperations.sql
new file mode 100644
index 0000000000000..7d6c336df4528
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/decimalArithmeticOperations.sql
@@ -0,0 +1 @@
+--IMPORT decimalArithmeticOperations.sql
\ No newline at end of file
diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/double-quoted-identifiers.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/double-quoted-identifiers.sql
new file mode 100644
index 0000000000000..7ccd294e709b6
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/double-quoted-identifiers.sql
@@ -0,0 +1 @@
+--IMPORT double-quoted-identifiers.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/higher-order-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/higher-order-functions.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/higher-order-functions.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/higher-order-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/interval.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/interval.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/interval.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/nonansi/keywords.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/keywords.sql
new file mode 100644
index 0000000000000..ebc9345c3a738
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/nonansi/keywords.sql
@@ -0,0 +1 @@
+--IMPORT keywords.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/literals.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/literals.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/literals.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/literals.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/map.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/map.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/map.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/math.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/math.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/math.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/math.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/parse-schema-string.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/parse-schema-string.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/parse-schema-string.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/string-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/string-functions.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/string-functions.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/string-functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/timestamp.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/timestamp.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/timestamp.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/timestamp.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/try_aggregates.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/try_aggregates.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/try_aggregates.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/try_arithmetic.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/try_arithmetic.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/try_arithmetic.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_datetime_functions.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/try_datetime_functions.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/try_datetime_functions.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/try_datetime_functions.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/ansi/try_element_at.sql b/sql/core/src/test/resources/sql-tests/inputs/nonansi/try_element_at.sql
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/inputs/ansi/try_element_at.sql
rename to sql/core/src/test/resources/sql-tests/inputs/nonansi/try_element_at.sql
diff --git a/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
index 040be00503442..dcdf241df73d9 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql
@@ -10,6 +10,8 @@ insert into t1 values(7,null,null);
 
 -- Adding anything to null gives null
 select a, b+c from t1;
+select b + 0 from t1 where a = 5;
+select -100 + b + 100 from t1 where a = 5;
 
 -- Multiplying null by zero gives null
 select a+10, b*0 from t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
new file mode 100644
index 0000000000000..6d0e490649d6f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql
@@ -0,0 +1,1047 @@
+-- Prepare some test data.
+--------------------------
+drop table if exists t;
+create table t(x int, y string) using csv;
+insert into t values (0, 'abc'), (1, 'def');
+
+drop table if exists other;
+create table other(a int, b int) using json;
+insert into other values (1, 1), (1, 2), (2, 4);
+
+drop table if exists st;
+create table st(x int, col struct<i1:int, i2:int>) using parquet;
+insert into st values (1, (2, 3));
+
+create temporary view courseSales as select * from values
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  as courseSales(course, year, earnings);
+
+create temporary view courseEarnings as select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`);
+
+create temporary view courseEarningsAndSales as select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(
+    course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014);
+
+create temporary view yearsWithComplexTypes as select * from values
+  (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+  (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+  as yearsWithComplexTypes(y, a, m, s);
+
+create temporary view join_test_t1 as select * from values (1) as grouping(a);
+create temporary view join_test_t2 as select * from values (1) as grouping(a);
+create temporary view join_test_t3 as select * from values (1) as grouping(a);
+create temporary view join_test_empty_table as select a from join_test_t2 where false;
+
+create temporary view lateral_test_t1(c1, c2)
+  as values (0, 1), (1, 2);
+create temporary view lateral_test_t2(c1, c2)
+  as values (0, 2), (0, 3);
+create temporary view lateral_test_t3(c1, c2)
+  as values (0, array(0, 1)), (1, array(2)), (2, array()), (null, array(4));
+create temporary view lateral_test_t4(c1, c2)
+  as values (0, 1), (0, 2), (1, 1), (1, 3);
+
+create temporary view natural_join_test_t1 as select * from values
+  ("one", 1), ("two", 2), ("three", 3) as natural_join_test_t1(k, v1);
+
+create temporary view natural_join_test_t2 as select * from values
+  ("one", 1), ("two", 22), ("one", 5) as natural_join_test_t2(k, v2);
+
+create temporary view natural_join_test_t3 as select * from values
+  ("one", 4), ("two", 5), ("one", 6) as natural_join_test_t3(k, v3);
+
+create temporary view windowTestData as select * from values
+  (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 2L, 2.5D, date("2017-08-02"), timestamp_seconds(1502000000), "a"),
+  (2, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "a"),
+  (1, null, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "b"),
+  (2, 3L, 3.3D, date("2017-08-03"), timestamp_seconds(1503000000), "b"),
+  (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
+  (null, null, null, null, null, null),
+  (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
+  AS testData(val, val_long, val_double, val_date, val_timestamp, cate);
+
+-- SELECT operators: positive tests.
+---------------------------------------
+
+-- Selecting a constant.
+table t
+|> select 1 as x;
+
+-- Selecting attributes.
+table t
+|> select x, y;
+
+-- Chained pipe SELECT operators.
+table t
+|> select x, y
+|> select x + length(y) as z;
+
+-- Using the VALUES list as the source relation.
+values (0), (1) tab(col)
+|> select col * 2 as result;
+
+-- Using a table subquery as the source relation.
+(select * from t union all select * from t)
+|> select x + length(y) as result;
+
+-- Enclosing the result of a pipe SELECT operation in a table subquery.
+(table t
+ |> select x, y
+ |> select x)
+union all
+select x from t where x < 1;
+
+-- Selecting struct fields.
+(select col from st)
+|> select col.i1;
+
+table st
+|> select st.col.i1;
+
+-- Expression subqueries in the pipe operator SELECT list.
+table t
+|> select (select a from other where x = a limit 1) as result;
+
+-- Pipe operator SELECT inside expression subqueries.
+select (values (0) tab(col) |> select col) as result;
+
+-- Aggregations are allowed within expression subqueries in the pipe operator SELECT list as long as
+-- no aggregate functions exist in the top-level select list.
+table t
+|> select (select any_value(a) from other where x = a limit 1) as result;
+
+-- Lateral column aliases in the pipe operator SELECT list.
+table t
+|> select x + length(x) as z, z + 1 as plus_one;
+
+-- Window functions are allowed in the pipe operator SELECT list.
+table t
+|> select first_value(x) over (partition by y) as result;
+
+select 1 x, 2 y, 3 z
+|> select 1 + sum(x) over (),
+     avg(y) over (),
+     x,
+     avg(x+1) over (partition by y order by z) AS a2
+|> select a2;
+
+table t
+|> select x, count(*) over ()
+|> select x;
+
+-- DISTINCT is supported.
+table t
+|> select distinct x, y;
+
+-- SELECT * is supported.
+table t
+|> select *;
+
+table t
+|> select * except (y);
+
+-- Hints are supported.
+table t
+|> select /*+ repartition(3) */ *;
+
+table t
+|> select /*+ repartition(3) */ distinct x;
+
+table t
+|> select /*+ repartition(3) */ all x;
+
+-- SELECT operators: negative tests.
+---------------------------------------
+
+-- Aggregate functions are not allowed in the pipe operator SELECT list.
+table t
+|> select sum(x) as result;
+
+table t
+|> select y, length(y) + sum(x) as result;
+
+-- EXTEND operators: positive tests.
+------------------------------------
+
+-- Extending with a constant.
+table t
+|> extend 1 as z;
+
+-- Extending without an explicit alias.
+table t
+|> extend 1;
+
+-- Extending with an attribute.
+table t
+|> extend x as z;
+
+-- Extending with an expression.
+table t
+|> extend x + length(y) as z;
+
+-- Extending two times.
+table t
+|> extend x + length(y) as z, x + 1 as zz;
+
+-- Extending two times in sequence.
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz;
+
+-- Extending with a struct field.
+select col from st
+|> extend col.i1 as z;
+
+-- Extending with a subquery.
+table t
+|> extend (select a from other where x = a limit 1) as z;
+
+-- Extending with a correlated reference.
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b));
+
+-- Extending with a column name that already exists in the input relation.
+table t
+|> extend 1 as x;
+
+-- Window functions are allowed in the pipe operator EXTEND list.
+table t
+|> extend first_value(x) over (partition by y) as result;
+
+-- Lateral column aliases in the pipe operator EXTEND list.
+table t
+|> extend x + length(y) as z, z + 1 as plus_one;
+
+-- EXTEND operators: negative tests.
+------------------------------------
+
+-- Aggregations are not allowed.
+table t
+|> extend sum(x) as z;
+
+-- DISTINCT is not supported.
+table t
+|> extend distinct x as z;
+
+-- EXTEND * is not supported.
+table t
+|> extend *;
+
+-- SET operators: positive tests.
+---------------------------------
+
+-- Setting with a constant.
+-- The indicated column is not the last column in the table, and the SET operator will replace it
+-- with the new value in its existing position.
+table t
+|> set x = 1;
+
+-- Setting with an attribute.
+table t
+|> set y = x;
+
+-- Setting with an expression.
+table t
+|> extend 1 as z
+|> set z = x + length(y);
+
+-- Setting two times.
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1;
+
+table other
+|> extend 3 as c
+|> set a = b, b = c;
+
+-- Setting two times with a lateral reference.
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1;
+
+-- Setting two times in sequence.
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1;
+
+-- SET assignments with duplicate keys. This is supported, and we can update the column as we go.
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1;
+
+-- Setting with a struct field.
+select col from st
+|> extend 1 as z
+|> set z = col.i1;
+
+-- Setting with a subquery.
+table t
+|> set y = (select a from other where x = a limit 1);
+
+-- Setting with a backquoted column name with a dot inside.
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y);
+
+-- Window functions are allowed in the pipe operator SET list.
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y);
+
+-- SET operators: negative tests.
+---------------------------------
+
+-- SET with a column name that does not exist in the input relation.
+table t
+|> set z = 1;
+
+-- SET with an alias.
+table t
+|> set x = 1 as z;
+
+-- Setting nested fields in structs is not supported.
+select col from st
+|> set col.i1 = 42;
+
+-- WHERE operators: positive tests.
+-----------------------------------
+
+-- Filtering with a constant predicate.
+table t
+|> where true;
+
+-- Filtering with a predicate based on attributes from the input relation.
+table t
+|> where x + length(y) < 4;
+
+-- Two consecutive filters are allowed.
+table t
+|> where x + length(y) < 4
+|> where x + length(y) < 3;
+
+-- It is possible to use the WHERE operator instead of the HAVING clause when processing the result
+-- of aggregations. For example, this WHERE operator is equivalent to the normal SQL "HAVING x = 1".
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where x = 1;
+
+-- Filtering by referring to the table or table subquery alias.
+table t
+|> where t.x = 1;
+
+table t
+|> where spark_catalog.default.t.x = 1;
+
+-- Filtering using struct fields.
+(select col from st)
+|> where col.i1 = 1;
+
+table st
+|> where st.col.i1 = 2;
+
+-- Expression subqueries in the WHERE clause.
+table t
+|> where exists (select a from other where x = a limit 1);
+
+-- Aggregations are allowed within expression subqueries in the pipe operator WHERE clause as long
+-- no aggregate functions exist in the top-level expression predicate.
+table t
+|> where (select any_value(a) from other where x = a limit 1) = 1;
+
+-- WHERE operators: negative tests.
+-----------------------------------
+
+-- Aggregate functions are not allowed in the top-level WHERE predicate.
+-- (Note: to implement this behavior, perform the aggregation first separately and then add a
+-- pipe-operator WHERE clause referring to the result of aggregate expression(s) therein).
+table t
+|> where sum(x) = 1;
+
+table t
+|> where y = 'abc' or length(y) + sum(x) = 1;
+
+-- Window functions are not allowed in the WHERE clause (pipe operators or otherwise).
+-- (Note: to implement this behavior, perform the window function first separately in a SELECT
+-- clause and then add a pipe-operator WHERE clause referring to the result of the window function
+-- expression(s) therein).
+table t
+|> where sum(x) over (partition by y) = 1;
+
+table t
+|> where sum(x) over w = 1
+   window w as (partition by y);
+
+select * from t where sum(x) over (partition by y) = 1;
+
+-- Pipe operators may only refer to attributes produced as output from the directly-preceding
+-- pipe operator, not from earlier ones.
+table t
+|> select x, length(y) as z
+|> where x + length(y) < 4;
+
+-- If the WHERE clause wants to filter rows produced by an aggregation, it is not valid to try to
+-- refer to the aggregate functions directly; it is necessary to use aliases instead.
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where sum(length(y)) = 3;
+
+-- Pivot and unpivot operators: positive tests.
+-----------------------------------------------
+
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+     sum(earnings)
+     for course in ('dotNET', 'Java')
+  );
+
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+     sum(e) as s, avg(e) as a
+     for y in (2012 as firstYear, 2013 as secondYear)
+   );
+
+-- Pivot on multiple pivot columns with aggregate columns of complex data types.
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     max(a)
+     for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+   );
+
+-- Pivot on pivot column of struct type.
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     sum(earnings)
+     for s in ((1, 'a'), (2, 'b'))
+   );
+
+table courseEarnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   );
+
+table courseEarnings
+|> unpivot include nulls (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   );
+
+table courseEarningsAndSales
+|> unpivot include nulls (
+     (earnings, sales) for `year` in (
+       (earnings2012, sales2012) as `2012`,
+       (earnings2013, sales2013) as `2013`,
+       (earnings2014, sales2014) as `2014`)
+   );
+
+-- Pivot and unpivot operators: negative tests.
+-----------------------------------------------
+
+-- The PIVOT operator refers to a column 'year' is not available in the input relation.
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   );
+
+-- Non-literal PIVOT values are not supported.
+table courseSales
+|> pivot (
+     sum(earnings)
+     for `year` in (course, 2013)
+   );
+
+-- The PIVOT and UNPIVOT clauses are mutually exclusive.
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   );
+
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   );
+
+-- Multiple PIVOT and/or UNPIVOT clauses are not supported in the same pipe operator.
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   );
+
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   );
+
+-- Sampling operators: positive tests.
+--------------------------------------
+
+-- We will use the REPEATABLE clause and/or adjust the sampling options to either remove no rows or
+-- all rows to help keep the tests deterministic.
+table t
+|> tablesample (100 percent) repeatable (0);
+
+table t
+|> tablesample (2 rows) repeatable (0);
+
+table t
+|> tablesample (bucket 1 out of 1) repeatable (0);
+
+table t
+|> tablesample (100 percent) repeatable (0)
+|> tablesample (5 rows) repeatable (0)
+|> tablesample (bucket 1 out of 1) repeatable (0);
+
+-- Sampling operators: negative tests.
+--------------------------------------
+
+-- The sampling method is required.
+table t
+|> tablesample ();
+
+-- Negative sampling options are not supported.
+table t
+|> tablesample (-100 percent) repeatable (0);
+
+table t
+|> tablesample (-5 rows);
+
+-- The sampling method may not refer to attribute names from the input relation.
+table t
+|> tablesample (x rows);
+
+-- The bucket number is invalid.
+table t
+|> tablesample (bucket 2 out of 1);
+
+-- Byte literals are not supported.
+table t
+|> tablesample (200b) repeatable (0);
+
+-- Invalid byte literal syntax.
+table t
+|> tablesample (200) repeatable (0);
+
+-- JOIN operators: positive tests.
+----------------------------------
+
+table join_test_t1
+|> inner join join_test_empty_table;
+
+table join_test_t1
+|> cross join join_test_empty_table;
+
+table join_test_t1
+|> left outer join join_test_empty_table;
+
+table join_test_t1
+|> right outer join join_test_empty_table;
+
+table join_test_t1
+|> full outer join join_test_empty_table using (a);
+
+table join_test_t1
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a);
+
+table join_test_t1
+|> left semi join join_test_empty_table;
+
+table join_test_t1
+|> left anti join join_test_empty_table;
+
+select * from join_test_t1 where true
+|> inner join join_test_empty_table;
+
+select 1 as x, 2 as y
+|> inner join (select 1 as x, 4 as y) using (x);
+
+table join_test_t1
+|> inner join (join_test_t2 jt2 inner join join_test_t3 jt3 using (a)) using (a)
+|> select a, join_test_t1.a, jt2.a, jt3.a;
+
+table join_test_t1
+|> inner join join_test_t2 tablesample (100 percent) repeatable (0) jt2 using (a);
+
+table join_test_t1
+|> inner join (select 1 as a) tablesample (100 percent) repeatable (0) jt2 using (a);
+
+table join_test_t1
+|> join join_test_t1 using (a);
+
+-- Lateral joins.
+table lateral_test_t1
+|> join lateral (select c1);
+
+table lateral_test_t1
+|> join lateral (select c1 from lateral_test_t2);
+
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 from lateral_test_t2);
+
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 + t2.c1 from lateral_test_t2 t2);
+
+table lateral_test_t1
+|> join lateral (select *);
+
+table lateral_test_t1
+|> join lateral (select * from lateral_test_t2);
+
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.* from lateral_test_t2);
+
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.*, t2.* from lateral_test_t2 t2);
+
+table lateral_test_t1
+|> join lateral_test_t2
+|> join lateral (select lateral_test_t1.c2 + lateral_test_t2.c2);
+
+-- Natural joins.
+table natural_join_test_t1
+|> natural join natural_join_test_t2
+|> where k = "one";
+
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> select natural_join_test_t1.*;
+
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> natural join natural_join_test_t3 nt3
+|> select natural_join_test_t1.*, nt2.*, nt3.*;
+
+-- JOIN operators: negative tests.
+----------------------------------
+
+-- Multiple joins within the same pipe operator are not supported without parentheses.
+table join_test_t1
+|> inner join join_test_empty_table
+   inner join join_test_empty_table;
+
+-- The join pipe operator can only refer to column names from the previous relation.
+table join_test_t1
+|> select 1 + 2 as result
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a);
+
+-- The table from the pipe input is not visible as a table name in the right side.
+table join_test_t1 jt
+|> cross join (select * from jt);
+
+-- Set operations: positive tests.
+----------------------------------
+
+-- Union all.
+table t
+|> union all table t;
+
+-- Union distinct.
+table t
+|> union table t;
+
+-- Union all with a table subquery.
+(select * from t)
+|> union all table t;
+
+-- Union distinct with a table subquery.
+(select * from t)
+|> union table t;
+
+-- Union all with a VALUES list.
+values (0, 'abc') tab(x, y)
+|> union all table t;
+
+-- Union distinct with a VALUES list.
+values (0, 1) tab(x, y)
+|> union table t
+|> where x = 0;
+
+-- Union all with a table subquery on both the source and target sides.
+(select * from t)
+|> union all (select * from t);
+
+-- Except all.
+table t
+|> except all table t;
+
+-- Except distinct.
+table t
+|> except table t;
+
+-- Intersect all.
+table t
+|> intersect all table t;
+
+-- Intersect distinct.
+table t
+|> intersect table t;
+
+-- Minus all.
+table t
+|> minus all table t;
+
+-- Minus distinct.
+table t
+|> minus table t;
+
+-- Set operations: negative tests.
+----------------------------------
+
+-- The UNION operator requires the same number of columns in the input relations.
+table t
+|> select x
+|> union all table t;
+
+-- The UNION operator requires the column types to be compatible.
+table t
+|> union all table st;
+
+-- Sorting and repartitioning operators: positive tests.
+--------------------------------------------------------
+
+-- Order by.
+table t
+|> order by x;
+
+-- Order by with a table subquery.
+(select * from t)
+|> order by x;
+
+-- Order by with a VALUES list.
+values (0, 'abc') tab(x, y)
+|> order by x;
+
+-- Limit.
+table t
+|> order by x
+|> limit 1;
+
+-- Limit with offset.
+table t
+|> where x = 1
+|> select y
+|> limit 2 offset 1;
+
+-- Offset is allowed without limit.
+table t
+|> where x = 1
+|> select y
+|> offset 1;
+
+-- LIMIT ALL and OFFSET 0 are equivalent to no LIMIT or OFFSET clause, respectively.
+table t
+|> limit all offset 0;
+
+-- Distribute by.
+table t
+|> distribute by x;
+
+-- Cluster by.
+table t
+|> cluster by x;
+
+-- Sort and distribute by.
+table t
+|> sort by x distribute by x;
+
+-- It is possible to apply a final ORDER BY clause on the result of a query containing pipe
+-- operators.
+table t
+|> order by x desc
+order by y;
+
+-- Sorting and repartitioning operators: negative tests.
+--------------------------------------------------------
+
+-- Multiple order by clauses are not supported in the same pipe operator.
+-- We add an extra "ORDER BY y" clause at the end in this test to show that the "ORDER BY x + y"
+-- clause was consumed end the of the final query, not as part of the pipe operator.
+table t
+|> order by x desc order by x + y
+order by y;
+
+-- The ORDER BY clause may only refer to column names from the previous input relation.
+table t
+|> select 1 + 2 as result
+|> order by x;
+
+-- The DISTRIBUTE BY clause may only refer to column names from the previous input relation.
+table t
+|> select 1 + 2 as result
+|> distribute by x;
+
+-- Combinations of multiple ordering and limit clauses are not supported.
+table t
+|> order by x limit 1;
+
+-- ORDER BY and SORT BY are not supported at the same time.
+table t
+|> order by x sort by x;
+
+-- Aggregation operators: positive tests.
+-----------------------------------------
+
+-- Basic aggregation with a GROUP BY clause. The resulting table contains all the attributes from
+-- the grouping keys followed by all the attributes from the aggregate functions, in order.
+table other
+|> aggregate sum(b) as result group by a;
+
+-- Basic aggregation with a GROUP BY clause, followed by a SELECT of just the aggregate function.
+-- This restricts the output attributes to just the aggregate function.
+table other
+|> aggregate sum(b) as result group by a
+|> select result;
+
+-- Basic aggregation with a GROUP BY clause, followed by a SELECT of just the grouping expression.
+-- This restricts the output attributes to just the grouping expression. Note that we must use an
+-- alias for the grouping expression to refer to it in the SELECT clause.
+table other
+|> aggregate sum(b) group by a + 1 as gkey
+|> select gkey;
+
+-- Basic aggregation on a constant table.
+select 1 as x, 2 as y
+|> aggregate group by x, y;
+
+-- Basic aggregation with group by ordinals.
+select 3 as x, 4 as y
+|> aggregate group by 1, 2;
+
+-- Basic table aggregation.
+table t
+|> aggregate sum(x);
+
+-- Basic table aggregation with an alias.
+table t
+|> aggregate sum(x) + 1 as result_plus_one;
+
+-- Grouping with no aggregate functions.
+table other
+|> aggregate group by a
+|> where a = 1;
+
+-- Group by an expression on columns, all of which are already grouped.
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x, y, x + y as z;
+
+-- Group by an expression on columns, some of which (y) aren't already grouped.
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x as z, x + y as z;
+
+-- We get an output column for each item in GROUP BY, even when they are duplicate expressions.
+select 1 as x, 2 as y, named_struct('z', 3) as st
+|> aggregate group by x, y, x, x, st.z, (st).z, 1 + x, 2 + x;
+
+-- Chained aggregates.
+select 1 x, 2 y, 3 z
+|> aggregate sum(z) z group by x, y
+|> aggregate avg(z) z group by x
+|> aggregate count(distinct z) c;
+
+-- Ambiguous name from duplicate GROUP BY item. This is generally allowed.
+select 1 x, 3 z
+|> aggregate count(*) group by x, z, x
+|> select x;
+
+-- Aggregate expressions may contain a mix of aggregate functions and grouping expressions.
+table other
+|> aggregate a + count(b) group by a;
+
+-- Aggregation operators: negative tests.
+-----------------------------------------
+
+-- All aggregate expressions must contain at least one aggregate function.
+table other
+|> aggregate a group by a;
+
+-- GROUP BY ALL is not currently supported.
+select 3 as x, 4 as y
+|> aggregate group by all;
+
+-- GROUP BY ROLLUP is not supported yet.
+table courseSales
+|> aggregate sum(earnings) group by rollup(course, `year`)
+|> where course = 'dotNET' and `year` = '2013';
+
+-- GROUP BY CUBE is not supported yet.
+table courseSales
+|> aggregate sum(earnings) group by cube(course, `year`)
+|> where course = 'dotNET' and `year` = '2013';
+
+-- GROUPING SETS is not supported yet.
+table courseSales
+|> aggregate sum(earnings) group by course, `year` grouping sets(course, `year`)
+|> where course = 'dotNET' and `year` = '2013';
+
+-- GROUPING/GROUPING_ID is not supported yet.
+table courseSales
+|> aggregate sum(earnings), grouping(course) + 1
+   group by course
+|> where course = 'dotNET' and `year` = '2013';
+
+-- GROUPING/GROUPING_ID is not supported yet.
+table courseSales
+|> aggregate sum(earnings), grouping_id(course)
+   group by course
+|> where course = 'dotNET' and `year` = '2013';
+
+-- GROUP BY () is not valid syntax.
+select 1 as x, 2 as y
+|> aggregate group by ();
+
+-- Non-aggregate expressions are not allowed in place of aggregate functions.
+table other
+|> aggregate a;
+
+-- Using aggregate functions without the AGGREGATE keyword is not allowed.
+table other
+|> select sum(a) as result;
+
+-- The AGGREGATE keyword requires a GROUP BY clause and/or aggregation function(s).
+table other
+|> aggregate;
+
+-- The AGGREGATE GROUP BY list cannot be empty.
+table other
+|> aggregate group by;
+
+-- The AGGREGATE keyword is required to perform grouping.
+table other
+|> group by a;
+
+-- Window functions are not allowed in the AGGREGATE expression list.
+table other
+|> aggregate sum(a) over () group by b;
+
+-- Ambiguous name from AGGREGATE list vs GROUP BY.
+select 1 x, 2 y, 3 z
+|> aggregate count(*) AS c, sum(x) AS x group by x
+|> where c = 1
+|> where x = 1;
+
+-- WINDOW operators (within SELECT): positive tests.
+---------------------------------------------------
+
+-- SELECT with a WINDOW clause.
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (partition by cate order by val);
+
+-- SELECT with RANGE BETWEEN as part of the window definition.
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (order by val_timestamp range between unbounded preceding and current row);
+
+-- SELECT with a WINDOW clause not being referred in the SELECT list.
+table windowTestData
+|> select cate, val
+    window w as (partition by cate order by val);
+
+-- multiple SELECT clauses, each with a WINDOW clause (with the same window definition names).
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate)
+|> select cate, val, sum_val, first_value(cate) over w
+   window w as (order by val);
+
+-- SELECT with a WINDOW clause for multiple window definitions.
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate), w2 as (order by val);
+
+-- SELECT with a WINDOW clause for multiple window functions over one window definition
+table windowTestData
+|> select cate, val, sum(val) over w, first_value(val) over w
+   window w1 as (partition by cate order by val);
+
+-- SELECT with a WINDOW clause, using struct fields.
+(select col from st)
+|> select col.i1, sum(col.i2) over w
+   window w as (partition by col.i1 order by col.i2);
+
+table st
+|> select st.col.i1, sum(st.col.i2) over w
+   window w as (partition by st.col.i1 order by st.col.i2);
+
+table st
+|> select spark_catalog.default.st.col.i1, sum(spark_catalog.default.st.col.i2) over w
+   window w as (partition by spark_catalog.default.st.col.i1 order by spark_catalog.default.st.col.i2);
+
+-- SELECT with one WINDOW definition shadowing a column name.
+table windowTestData
+|> select cate, sum(val) over val
+   window val as (partition by cate order by val);
+
+-- WINDOW operators (within SELECT): negative tests.
+---------------------------------------------------
+
+-- WINDOW without definition is not allowed in the pipe operator SELECT clause.
+table windowTestData
+|> select cate, sum(val) over w;
+
+-- Multiple WINDOW clauses are not supported in the pipe operator SELECT clause.
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate)
+   window w2 as (order by val);
+
+-- WINDOW definition cannot be referred across different pipe operator SELECT clauses.
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate order by val)
+|> select cate, val, sum_val, first_value(cate) over w;
+
+table windowTestData
+|> select cate, val, first_value(cate) over w as first_val
+|> select cate, val, sum(val) over w as sum_val
+   window w as (order by val);
+
+-- Cleanup.
+-----------
+drop table t;
+drop table other;
+drop table st;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/random.sql b/sql/core/src/test/resources/sql-tests/inputs/random.sql
index a1aae7b8759dc..a71b0293295fc 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/random.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/random.sql
@@ -14,4 +14,44 @@ SELECT randn(NULL);
 SELECT randn(cast(NULL AS long));
 
 -- randn unsupported data type
-SELECT rand('1')
+SELECT rand('1');
+
+-- The uniform random number generation function supports generating random numbers within a
+-- specified range. We use a seed of zero for these queries to keep tests deterministic.
+SELECT uniform(0, 1, 0) AS result;
+SELECT uniform(0, 10, 0) AS result;
+SELECT uniform(0L, 10L, 0) AS result;
+SELECT uniform(0, 10L, 0) AS result;
+SELECT uniform(0, 10S, 0) AS result;
+SELECT uniform(10, 20, 0) AS result;
+SELECT uniform(10.0F, 20.0F, 0) AS result;
+SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result;
+SELECT uniform(10, 20.0F, 0) AS result;
+SELECT uniform(10, 20, 0) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT uniform(10, 20.0F) IS NOT NULL AS result;
+-- Negative test cases for the uniform random number generator.
+SELECT uniform(NULL, 1, 0) AS result;
+SELECT uniform(0, NULL, 0) AS result;
+SELECT uniform(0, 1, NULL) AS result;
+SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT uniform(col, 10, 0) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT uniform(10) AS result;
+SELECT uniform(10, 20, 30, 40) AS result;
+
+-- The randstr random string generation function supports generating random strings within a
+-- specified length. We use a seed of zero for these queries to keep tests deterministic.
+SELECT randstr(1, 0) AS result;
+SELECT randstr(5, 0) AS result;
+SELECT randstr(10, 0) AS result;
+SELECT randstr(10S, 0) AS result;
+SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT randstr(10) IS NOT NULL AS result;
+-- Negative test cases for the randstr random number generator.
+SELECT randstr(10L, 0) AS result;
+SELECT randstr(10.0F, 0) AS result;
+SELECT randstr(10.0D, 0) AS result;
+SELECT randstr(NULL, 0) AS result;
+SELECT randstr(0, NULL) AS result;
+SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col);
+SELECT randstr(10, 0, 1) AS result;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
index aee8aaa4d195b..8a00e4400e6b0 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-on-files.sql
@@ -1,19 +1,30 @@
+CREATE DATABASE IF NOT EXISTS sql_on_files;
 -- Parquet
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1;
 SELECT * FROM parquet.``;
 SELECT * FROM parquet.`/file/not/found`;
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1;
+SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`;
+DROP TABLE sql_on_files.test_parquet;
 
 -- ORC
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1;
 SELECT * FROM orc.``;
 SELECT * FROM orc.`/file/not/found`;
-SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1;
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`;
+DROP TABLE sql_on_files.test_orc;
 
 -- CSV
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1;
 SELECT * FROM csv.``;
 SELECT * FROM csv.`/file/not/found`;
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1;
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`;
+DROP TABLE sql_on_files.test_csv;
 
 -- JSON
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1;
 SELECT * FROM json.``;
 SELECT * FROM json.`/file/not/found`;
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1;
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`;
+DROP TABLE sql_on_files.test_json;
+
+DROP DATABASE sql_on_files;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
index 2dd205adfa04b..d876be1bb6bcd 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/sql-session-variables.sql
@@ -26,9 +26,9 @@ DECLARE VARIABLE IF NOT EXISTS var1 INT;
 DROP TEMPORARY VARIABLE IF EXISTS var1;
 
 SET VARIABLE title = 'Drop Variable';
-DECLARE VARIABLE var1 INT;
+DECLARE VAR var1 INT;
 SELECT var1;
-DROP TEMPORARY VARIABLE var1;
+DROP TEMPORARY VAR var1;
 
 -- Variable is gone
 SELECT var1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
index e2f2e05d9b855..7c816d8a41672 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-limit.sql
@@ -71,6 +71,47 @@ WHERE  t1a IN (SELECT t2a
                LIMIT 10)
 LIMIT  2;
 
+-- correlated IN subquery
+-- LIMIT with OFFSET in parent side
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+OFFSET 1;
+
+-- correlated IN subquery
+-- LIMIT with OFFSET on both parent and subquery sides
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               LIMIT 10
+               OFFSET 2)
+LIMIT  2
+OFFSET 1;
+
+-- correlated IN subquery
+-- OFFSET in parent side
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+OFFSET 1;
+
+-- correlated IN subquery
+-- OFFSET on both parent and subquery sides
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               OFFSET 2)
+OFFSET 1;
+
 -- TC 01.02
 SELECT *
 FROM   t1
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql
index 6787fac75b39a..a23083e9e0e4d 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-group-by.sql
@@ -15,10 +15,25 @@ select * from x where (select count(*) from y where y1 > x1 group by x1) = 1;
 select *, (select count(*) from y where x1 = y1 and y2 = 1 group by y2) from x;
 -- Group-by column equal to expression with constants and outer refs - legal
 select *, (select count(*) from y where x1 = y1 and y2 = x1 + 1 group by y2) from x;
+-- Group-by expression is the same as the one we filter on - legal
+select *, (select count(*) from y where x1 = y1 and cast(y2 as double) = x1 + 1
+           group by cast(y2 as double)) from x;
+-- Group-by expression equal to an expression that depends on 2 outer refs -- legal
+select *, (select count(*) from y where y2 + 1 = x1 + x2 group by y2 + 1) from x;
 
--- Illegal queries
+
+-- Illegal queries (single join disabled)
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false;
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1;
 select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x;
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x;
+
+-- Same queries, with LeftSingle join
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = true;
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1;
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x;
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x;
+
 
 -- Certain other operators like OUTER JOIN or UNION between the correlating filter and the group-by also can cause the scalar subquery to return multiple values and hence make the query illegal.
 select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x;
@@ -26,5 +41,6 @@ select *, (select count(*) from y left join (select * from z where z1 = x1) sub
 
 -- Test legacy behavior conf
 set spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate = true;
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false;
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1;
 reset spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
index 902ae2c807c32..81e0c5f98d82b 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/scalar-subquery/scalar-subquery-predicate.sql
@@ -518,3 +518,17 @@ SELECT * FROM t0 WHERE t0a <
   FROM  t1 LEFT JOIN t2 ON (t1a = t0a AND t2b = t0b))
 );
 
+select *
+from range(1, 3) t1
+where (select t2.id c
+       from range (1, 2) t2 where t1.id = t2.id
+      ) between 1 and 2;
+
+SELECT *
+FROM t1
+WHERE (SELECT max(t2c)
+       FROM t2 WHERE t1b = t2b
+      ) between 1 and 2;
+
+
+SELECT * FROM t0 WHERE t0a = (SELECT distinct(t1c) FROM t1 WHERE t1a = t0a);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
index cc9c70df9df87..e421be4f81481 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/subquery/subquery-offset.sql
@@ -12,6 +12,11 @@ select * from x join lateral (select * from y where x1 = y1 limit 1 offset 2);
 select * from x where x1 in (select y1 from y limit 1 offset 2);
 select * from x where (select sum(y2) from y where x1 = y1 limit 1 offset 2) > 2;
 
+select * from x where exists (select * from y where x1 = y1 offset 2);
+select * from x join lateral (select * from y where x1 = y1 offset 2);
+select * from x where x1 in (select y1 from y offset 2);
+select * from x where (select sum(y2) from y where x1 = y1 offset 2) > 2;
+
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
   (100, "emp 1", date "2005-01-01", 100.00D, 10),
@@ -53,5 +58,23 @@ JOIN LATERAL (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1);
 
+SELECT emp_name
+FROM   emp
+WHERE EXISTS (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1);
+
+SELECT emp_name
+FROM   emp
+JOIN LATERAL (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1);
+
 drop table x;
 drop table y;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-union.sql b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-union.sql
index 883369705c3cb..207bf557acb0c 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udf/udf-union.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udf/udf-union.sql
@@ -11,7 +11,7 @@ FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
 
 -- Type Coerced Union
 SELECT udf(c1) as c1, udf(c2) as c2
-FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
+FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2
         UNION ALL
diff --git a/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql b/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
index a437b1f93b604..a4f598618ab6f 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/udtf/udtf.sql
@@ -159,6 +159,12 @@ SELECT * FROM UDTFPartitionByIndexingBug(
             1.0 AS double_col
     )
 );
+-- Exercise a query with both a valid TABLE argument and an invalid unresolved column reference.
+-- The 'eval' method of this UDTF would later throw an exception, but that is not relevant here
+-- because the analysis of this query should fail before that point. We just want to make sure that
+-- this analysis failure returns a reasonable error message.
+SELECT * FROM
+    InvalidEvalReturnsNoneToNonNullableColumnScalarType(TABLE(SELECT 1 AS X), unresolved_column);
 
 -- cleanup
 DROP VIEW t1;
diff --git a/sql/core/src/test/resources/sql-tests/inputs/union.sql b/sql/core/src/test/resources/sql-tests/inputs/union.sql
index 8a5b6c50fc1e3..ab81cc7bbabb6 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/union.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/union.sql
@@ -9,7 +9,7 @@ FROM   (SELECT * FROM t1
 
 -- Type Coerced Union
 SELECT *
-FROM   (SELECT * FROM t1
+FROM   (SELECT * FROM t1 where c1 = 1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
diff --git a/sql/core/src/test/resources/sql-tests/inputs/variant/named-function-arguments.sql b/sql/core/src/test/resources/sql-tests/inputs/variant/named-function-arguments.sql
new file mode 100644
index 0000000000000..e63e68169a817
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/inputs/variant/named-function-arguments.sql
@@ -0,0 +1,6 @@
+-- Test for tabled value functions variant_explode and variant_explode_outer
+SELECT * FROM variant_explode(input => parse_json('["hello", "world"]'));
+SELECT * FROM variant_explode_outer(input => parse_json('{"a": true, "b": 3.14}'));
+SELECT * FROM variant_explode(parse_json('["hello", "world"]')), variant_explode(parse_json('{"a": true, "b": 3.14}'));
+SELECT * FROM variant_explode(parse_json('{"a": ["hello", "world"], "b": {"x": true, "y": 3.14}}')) AS t, LATERAL variant_explode(t.value);
+SELECT num, key, val, 'Spark' FROM variant_explode(parse_json('["hello", "world"]')) AS t(num, key, val);
diff --git a/sql/core/src/test/resources/sql-tests/inputs/window.sql b/sql/core/src/test/resources/sql-tests/inputs/window.sql
index f94ff0f0a68a5..bec79247f9a61 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/window.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/window.sql
@@ -327,6 +327,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -478,3 +479,7 @@ SELECT * FROM (SELECT cate, val, dense_rank() OVER(PARTITION BY cate ORDER BY va
 SELECT * FROM (SELECT cate, val, dense_rank() OVER(PARTITION BY cate ORDER BY val) as r FROM testData) where r <= 2;
 SELECT * FROM (SELECT cate, val, row_number() OVER(PARTITION BY cate ORDER BY val) as r FROM testData) where r = 1;
 SELECT * FROM (SELECT cate, val, row_number() OVER(PARTITION BY cate ORDER BY val) as r FROM testData) where r <= 2;
+
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '5' DAY PRECEDING) AS mean FROM testData;
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '1 2:3:4.001' DAY TO SECOND PRECEDING) AS mean FROM testData;
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE DATE '2024-01-01' FOLLOWING) AS mean FROM testData;
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
deleted file mode 100644
index 7dd7180165f2b..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out
+++ /dev/null
@@ -1,2046 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-SELECT CAST('1.23' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.23'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 26,
-    "fragment" : "CAST('1.23' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('1.23' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.23'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "CAST('1.23' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('-4.56' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'-4.56'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "CAST('-4.56' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('-4.56' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'-4.56'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "CAST('-4.56' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('abc' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "CAST('abc' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('abc' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 26,
-    "fragment" : "CAST('abc' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('abc' AS float)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"FLOAT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "CAST('abc' AS float)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('abc' AS double)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "CAST('abc' AS double)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('1234567890123' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1234567890123'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "CAST('1234567890123' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('12345678901234567890123' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'12345678901234567890123'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 46,
-    "fragment" : "CAST('12345678901234567890123' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "''",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 22,
-    "fragment" : "CAST('' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "''",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "CAST('' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('' AS float)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "''",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"FLOAT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "CAST('' AS float)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('' AS double)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "''",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "CAST('' AS double)"
-  } ]
-}
-
-
--- !query
-SELECT CAST(NULL AS int)
--- !query schema
-struct<CAST(NULL AS INT):int>
--- !query output
-NULL
-
-
--- !query
-SELECT CAST(NULL AS long)
--- !query schema
-struct<CAST(NULL AS BIGINT):bigint>
--- !query output
-NULL
-
-
--- !query
-SELECT CAST('123.a' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'123.a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "CAST('123.a' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('123.a' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'123.a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "CAST('123.a' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('123.a' AS float)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'123.a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"FLOAT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 29,
-    "fragment" : "CAST('123.a' AS float)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('123.a' AS double)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'123.a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "CAST('123.a' AS double)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('-2147483648' AS int)
--- !query schema
-struct<CAST(-2147483648 AS INT):int>
--- !query output
--2147483648
-
-
--- !query
-SELECT CAST('-2147483649' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'-2147483649'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 33,
-    "fragment" : "CAST('-2147483649' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('2147483647' AS int)
--- !query schema
-struct<CAST(2147483647 AS INT):int>
--- !query output
-2147483647
-
-
--- !query
-SELECT CAST('2147483648' AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'2147483648'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 32,
-    "fragment" : "CAST('2147483648' AS int)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('-9223372036854775808' AS long)
--- !query schema
-struct<CAST(-9223372036854775808 AS BIGINT):bigint>
--- !query output
--9223372036854775808
-
-
--- !query
-SELECT CAST('-9223372036854775809' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'-9223372036854775809'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "CAST('-9223372036854775809' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT CAST('9223372036854775807' AS long)
--- !query schema
-struct<CAST(9223372036854775807 AS BIGINT):bigint>
--- !query output
-9223372036854775807
-
-
--- !query
-SELECT CAST('9223372036854775808' AS long)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'9223372036854775808'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 42,
-    "fragment" : "CAST('9223372036854775808' AS long)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST('abc' AS binary))
--- !query schema
-struct<hex(CAST(abc AS BINARY)):string>
--- !query output
-616263
-
-
--- !query
-SELECT HEX(CAST(CAST(123 AS byte) AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 44,
-    "fragment" : "CAST(CAST(123 AS byte) AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 45,
-    "fragment" : "CAST(CAST(-123 AS byte) AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(123S AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"SMALLINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(123S AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(-123S AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"SMALLINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 32,
-    "fragment" : "CAST(-123S AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(123 AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"INT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 30,
-    "fragment" : "CAST(123 AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(-123 AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"INT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(-123 AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(123L AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
-    "srcType" : "\"BIGINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 31,
-    "fragment" : "CAST(123L AS binary)"
-  } ]
-}
-
-
--- !query
-SELECT HEX(CAST(-123L AS binary))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
-    "srcType" : "\"BIGINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 32,
-    "fragment" : "CAST(-123L AS binary)"
-  } ]
-}
-
-
--- !query
-DESC FUNCTION boolean
--- !query schema
-struct<function_desc:string>
--- !query output
-Class: org.apache.spark.sql.catalyst.expressions.Cast
-Function: boolean
-Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
-
-
--- !query
-DESC FUNCTION EXTENDED boolean
--- !query schema
-struct<function_desc:string>
--- !query output
-Class: org.apache.spark.sql.catalyst.expressions.Cast
-Extended Usage:
-    No example/argument for boolean.
-
-    Since: 2.0.1
-
-Function: boolean
-Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
-
-
--- !query
-SELECT CAST('interval 3 month 1 hour' AS interval)
--- !query schema
-struct<CAST(interval 3 month 1 hour AS INTERVAL):interval>
--- !query output
-3 months 1 hours
-
-
--- !query
-SELECT CAST("interval '3-1' year to month" AS interval year to month)
--- !query schema
-struct<CAST(interval '3-1' year to month AS INTERVAL YEAR TO MONTH):interval year to month>
--- !query output
-3-1
-
-
--- !query
-SELECT CAST("interval '3 00:00:01' day to second" AS interval day to second)
--- !query schema
-struct<CAST(interval '3 00:00:01' day to second AS INTERVAL DAY TO SECOND):interval day to second>
--- !query output
-3 00:00:01.000000000
-
-
--- !query
-SELECT CAST(interval 3 month 1 hour AS string)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
-  "messageParameters" : {
-    "literal" : "interval 3 month 1 hour"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 13,
-    "stopIndex" : 35,
-    "fragment" : "interval 3 month 1 hour"
-  } ]
-}
-
-
--- !query
-SELECT CAST(interval 3 year 1 month AS string)
--- !query schema
-struct<CAST(INTERVAL '3-1' YEAR TO MONTH AS STRING):string>
--- !query output
-INTERVAL '3-1' YEAR TO MONTH
-
-
--- !query
-SELECT CAST(interval 3 day 1 second AS string)
--- !query schema
-struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string>
--- !query output
-INTERVAL '3 00:00:01' DAY TO SECOND
-
-
--- !query
-select cast(' 1' as tinyint)
--- !query schema
-struct<CAST( 1 AS TINYINT):tinyint>
--- !query output
-1
-
-
--- !query
-select cast(' 1\t' as tinyint)
--- !query schema
-struct<CAST( 1	 AS TINYINT):tinyint>
--- !query output
-1
-
-
--- !query
-select cast(' 1' as smallint)
--- !query schema
-struct<CAST( 1 AS SMALLINT):smallint>
--- !query output
-1
-
-
--- !query
-select cast(' 1' as INT)
--- !query schema
-struct<CAST( 1 AS INT):int>
--- !query output
-1
-
-
--- !query
-select cast(' 1' as bigint)
--- !query schema
-struct<CAST( 1 AS BIGINT):bigint>
--- !query output
-1
-
-
--- !query
-select cast(' 1' as float)
--- !query schema
-struct<CAST( 1 AS FLOAT):float>
--- !query output
-1.0
-
-
--- !query
-select cast(' 1 ' as DOUBLE)
--- !query schema
-struct<CAST( 1  AS DOUBLE):double>
--- !query output
-1.0
-
-
--- !query
-select cast('1.0 ' as DEC)
--- !query schema
-struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
--- !query output
-1
-
-
--- !query
-select cast('1中文' as tinyint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1中文'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TINYINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 29,
-    "fragment" : "cast('1中文' as tinyint)"
-  } ]
-}
-
-
--- !query
-select cast('1中文' as smallint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1中文'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"SMALLINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "cast('1中文' as smallint)"
-  } ]
-}
-
-
--- !query
-select cast('1中文' as INT)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1中文'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "cast('1中文' as INT)"
-  } ]
-}
-
-
--- !query
-select cast('中文1' as bigint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'中文1'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "cast('中文1' as bigint)"
-  } ]
-}
-
-
--- !query
-select cast('1中文' as bigint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1中文'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "cast('1中文' as bigint)"
-  } ]
-}
-
-
--- !query
-select cast('\t\t true \n\r ' as boolean)
--- !query schema
-struct<CAST(		 true 
-  AS BOOLEAN):boolean>
--- !query output
-true
-
-
--- !query
-select cast('\t\n false \t\r' as boolean)
--- !query schema
-struct<CAST(	
- false 	 AS BOOLEAN):boolean>
--- !query output
-false
-
-
--- !query
-select cast('\t\n xyz \t\r' as boolean)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkRuntimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'\t\n xyz \t\r'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BOOLEAN\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "cast('\\t\\n xyz \\t\\r' as boolean)"
-  } ]
-}
-
-
--- !query
-select cast('23.45' as decimal(4, 2))
--- !query schema
-struct<CAST(23.45 AS DECIMAL(4,2)):decimal(4,2)>
--- !query output
-23.45
-
-
--- !query
-select cast('123.45' as decimal(4, 2))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "4",
-    "scale" : "2",
-    "value" : "123.45"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "cast('123.45' as decimal(4, 2))"
-  } ]
-}
-
-
--- !query
-select cast('xyz' as decimal(4, 2))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'xyz'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DECIMAL(4,2)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "cast('xyz' as decimal(4, 2))"
-  } ]
-}
-
-
--- !query
-select cast('2022-01-01' as date)
--- !query schema
-struct<CAST(2022-01-01 AS DATE):date>
--- !query output
-2022-01-01
-
-
--- !query
-select cast('a' as date)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DATE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "cast('a' as date)"
-  } ]
-}
-
-
--- !query
-select cast('2022-01-01 00:00:00' as timestamp)
--- !query schema
-struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp>
--- !query output
-2022-01-01 00:00:00
-
-
--- !query
-select cast('a' as timestamp)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 29,
-    "fragment" : "cast('a' as timestamp)"
-  } ]
-}
-
-
--- !query
-select cast('2022-01-01 00:00:00' as timestamp_ntz)
--- !query schema
-struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP_NTZ):timestamp_ntz>
--- !query output
-2022-01-01 00:00:00
-
-
--- !query
-select cast('a' as timestamp_ntz)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP_NTZ\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 33,
-    "fragment" : "cast('a' as timestamp_ntz)"
-  } ]
-}
-
-
--- !query
-select cast(cast('inf' as double) as timestamp)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "Infinity",
-    "sourceType" : "\"DOUBLE\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 47,
-    "fragment" : "cast(cast('inf' as double) as timestamp)"
-  } ]
-}
-
-
--- !query
-select cast(cast('inf' as float) as timestamp)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "Infinity",
-    "sourceType" : "\"DOUBLE\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 46,
-    "fragment" : "cast(cast('inf' as float) as timestamp)"
-  } ]
-}
-
-
--- !query
-select cast(interval '1' year as tinyint)
--- !query schema
-struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
--- !query output
-1
-
-
--- !query
-select cast(interval '-10-2' year to month as smallint)
--- !query schema
-struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
--- !query output
--122
-
-
--- !query
-select cast(interval '1000' month as int)
--- !query schema
-struct<CAST(INTERVAL '1000' MONTH AS INT):int>
--- !query output
-1000
-
-
--- !query
-select cast(interval -'10.123456' second as tinyint)
--- !query schema
-struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
--- !query output
--10
-
-
--- !query
-select cast(interval '23:59:59' hour to second as smallint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"INTERVAL HOUR TO SECOND\"",
-    "targetType" : "\"SMALLINT\"",
-    "value" : "INTERVAL '23:59:59' HOUR TO SECOND"
-  }
-}
-
-
--- !query
-select cast(interval -'1 02:03:04.123' day to second as int)
--- !query schema
-struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
--- !query output
--93784
-
-
--- !query
-select cast(interval '10' day as bigint)
--- !query schema
-struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
--- !query output
-10
-
-
--- !query
-select cast(interval '-1000' month as tinyint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"INTERVAL MONTH\"",
-    "targetType" : "\"TINYINT\"",
-    "value" : "INTERVAL '-1000' MONTH"
-  }
-}
-
-
--- !query
-select cast(interval '1000000' second as smallint)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"INTERVAL SECOND\"",
-    "targetType" : "\"SMALLINT\"",
-    "value" : "INTERVAL '1000000' SECOND"
-  }
-}
-
-
--- !query
-select cast(1Y as interval year)
--- !query schema
-struct<CAST(1 AS INTERVAL YEAR):interval year>
--- !query output
-1-0
-
-
--- !query
-select cast(-122S as interval year to month)
--- !query schema
-struct<CAST(-122 AS INTERVAL YEAR TO MONTH):interval year to month>
--- !query output
--10-2
-
-
--- !query
-select cast(ym as interval year to month) from values(-122S) as t(ym)
--- !query schema
-struct<ym:interval year to month>
--- !query output
--10-2
-
-
--- !query
-select cast(1000 as interval month)
--- !query schema
-struct<CAST(1000 AS INTERVAL MONTH):interval month>
--- !query output
-83-4
-
-
--- !query
-select cast(-10L as interval second)
--- !query schema
-struct<CAST(-10 AS INTERVAL SECOND):interval second>
--- !query output
--0 00:00:10.000000000
-
-
--- !query
-select cast(100Y as interval hour to second)
--- !query schema
-struct<CAST(100 AS INTERVAL HOUR TO SECOND):interval hour to second>
--- !query output
-0 00:01:40.000000000
-
-
--- !query
-select cast(dt as interval hour to second) from values(100Y) as t(dt)
--- !query schema
-struct<dt:interval hour to second>
--- !query output
-0 00:01:40.000000000
-
-
--- !query
-select cast(-1000S as interval day to second)
--- !query schema
-struct<CAST(-1000 AS INTERVAL DAY TO SECOND):interval day to second>
--- !query output
--0 00:16:40.000000000
-
-
--- !query
-select cast(10 as interval day)
--- !query schema
-struct<CAST(10 AS INTERVAL DAY):interval day>
--- !query output
-10 00:00:00.000000000
-
-
--- !query
-select cast(2147483647 as interval year)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"INT\"",
-    "targetType" : "\"INTERVAL YEAR\"",
-    "value" : "2147483647"
-  }
-}
-
-
--- !query
-select cast(-9223372036854775808L as interval day)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"BIGINT\"",
-    "targetType" : "\"INTERVAL DAY\"",
-    "value" : "-9223372036854775808L"
-  }
-}
-
-
--- !query
-select cast(interval '-1' year as decimal(10, 0))
--- !query schema
-struct<CAST(INTERVAL '-1' YEAR AS DECIMAL(10,0)):decimal(10,0)>
--- !query output
--1
-
-
--- !query
-select cast(interval '1.000001' second as decimal(10, 6))
--- !query schema
-struct<CAST(INTERVAL '01.000001' SECOND AS DECIMAL(10,6)):decimal(10,6)>
--- !query output
-1.000001
-
-
--- !query
-select cast(interval '08:11:10.001' hour to second as decimal(10, 4))
--- !query schema
-struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
--- !query output
-29470.0010
-
-
--- !query
-select cast(interval '1 01:02:03.1' day to second as decimal(8, 1))
--- !query schema
-struct<CAST(INTERVAL '1 01:02:03.1' DAY TO SECOND AS DECIMAL(8,1)):decimal(8,1)>
--- !query output
-90123.1
-
-
--- !query
-select cast(interval '10.123' second as decimal(4, 2))
--- !query schema
-struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(4,2)):decimal(4,2)>
--- !query output
-10.12
-
-
--- !query
-select cast(interval '10.005' second as decimal(4, 2))
--- !query schema
-struct<CAST(INTERVAL '10.005' SECOND AS DECIMAL(4,2)):decimal(4,2)>
--- !query output
-10.01
-
-
--- !query
-select cast(interval '10.123' second as decimal(5, 2))
--- !query schema
-struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(5,2)):decimal(5,2)>
--- !query output
-10.12
-
-
--- !query
-select cast(interval '10.123' second as decimal(1, 0))
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "1",
-    "scale" : "0",
-    "value" : "10.123000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "cast(interval '10.123' second as decimal(1, 0))"
-  } ]
-}
-
-
--- !query
-select cast(10.123456BD as interval day to second)
--- !query schema
-struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
--- !query output
-0 00:00:10.123456000
-
-
--- !query
-select cast(80.654321BD as interval hour to minute)
--- !query schema
-struct<CAST(80.654321 AS INTERVAL HOUR TO MINUTE):interval hour to minute>
--- !query output
-0 01:20:00.000000000
-
-
--- !query
-select cast(-10.123456BD as interval year to month)
--- !query schema
-struct<CAST(-10.123456 AS INTERVAL YEAR TO MONTH):interval year to month>
--- !query output
--0-10
-
-
--- !query
-select cast(10.654321BD as interval month)
--- !query schema
-struct<CAST(10.654321 AS INTERVAL MONTH):interval month>
--- !query output
-0-11
-
-
--- !query
-SELECT '1.23' :: int
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.23'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 20,
-    "fragment" : "'1.23' :: int"
-  } ]
-}
-
-
--- !query
-SELECT 'abc' :: int
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'abc'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 19,
-    "fragment" : "'abc' :: int"
-  } ]
-}
-
-
--- !query
-SELECT '12345678901234567890123' :: long
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'12345678901234567890123'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"BIGINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 40,
-    "fragment" : "'12345678901234567890123' :: long"
-  } ]
-}
-
-
--- !query
-SELECT '' :: int
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "''",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 16,
-    "fragment" : "'' :: int"
-  } ]
-}
-
-
--- !query
-SELECT NULL :: int
--- !query schema
-struct<CAST(NULL AS INT):int>
--- !query output
-NULL
-
-
--- !query
-SELECT '123.a' :: int
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'123.a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 21,
-    "fragment" : "'123.a' :: int"
-  } ]
-}
-
-
--- !query
-SELECT '-2147483648' :: int
--- !query schema
-struct<CAST(-2147483648 AS INT):int>
--- !query output
--2147483648
-
-
--- !query
-SELECT HEX('abc' :: binary)
--- !query schema
-struct<hex(CAST(abc AS BINARY)):string>
--- !query output
-616263
-
-
--- !query
-SELECT HEX((123 :: byte) :: binary)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "configVal" : "'false'",
-    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
-    "srcType" : "\"TINYINT\"",
-    "targetType" : "\"BINARY\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 34,
-    "fragment" : "(123 :: byte) :: binary"
-  } ]
-}
-
-
--- !query
-SELECT 'interval 3 month 1 hour' :: interval
--- !query schema
-struct<CAST(interval 3 month 1 hour AS INTERVAL):interval>
--- !query output
-3 months 1 hours
-
-
--- !query
-SELECT interval 3 day 1 second :: string
--- !query schema
-struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string>
--- !query output
-INTERVAL '3 00:00:01' DAY TO SECOND
-
-
--- !query
-select ' 1 ' :: DOUBLE
--- !query schema
-struct<CAST( 1  AS DOUBLE):double>
--- !query output
-1.0
-
-
--- !query
-select '1.0 ' :: DEC
--- !query schema
-struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
--- !query output
-1
-
-
--- !query
-select '\t\t true \n\r ' :: boolean
--- !query schema
-struct<CAST(		 true 
-  AS BOOLEAN):boolean>
--- !query output
-true
-
-
--- !query
-select '2022-01-01 00:00:00' :: timestamp
--- !query schema
-struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp>
--- !query output
-2022-01-01 00:00:00
-
-
--- !query
-select interval '-10-2' year to month :: smallint
--- !query schema
-struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
--- !query output
--122
-
-
--- !query
-select -10L :: interval second
--- !query schema
-struct<CAST(-10 AS INTERVAL SECOND):interval second>
--- !query output
--0 00:00:10.000000000
-
-
--- !query
-select interval '08:11:10.001' hour to second :: decimal(10, 4)
--- !query schema
-struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
--- !query output
-29470.0010
-
-
--- !query
-select 10.123456BD :: interval day to second
--- !query schema
-struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
--- !query output
-0 00:00:10.123456000
-
-
--- !query
-SELECT '1.23' :: int :: long
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.23'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 20,
-    "fragment" : "'1.23' :: int"
-  } ]
-}
-
-
--- !query
-SELECT '2147483648' :: long :: int
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"BIGINT\"",
-    "targetType" : "\"INT\"",
-    "value" : "2147483648L"
-  }
-}
-
-
--- !query
-SELECT CAST('2147483648' :: long AS int)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "CAST_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "sourceType" : "\"BIGINT\"",
-    "targetType" : "\"INT\"",
-    "value" : "2147483648L"
-  }
-}
-
-
--- !query
-SELECT map(1, '123', 2, '456')[1] :: int
--- !query schema
-struct<CAST(map(1, 123, 2, 456)[1] AS INT):int>
--- !query output
-123
-
-
--- !query
-SELECT '2147483648' :: BINT
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "UNSUPPORTED_DATATYPE",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "typeName" : "\"BINT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 27,
-    "fragment" : "BINT"
-  } ]
-}
-
-
--- !query
-SELECT '2147483648' :: SELECT
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "UNSUPPORTED_DATATYPE",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "typeName" : "\"SELECT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 29,
-    "fragment" : "SELECT"
-  } ]
-}
-
-
--- !query
-SELECT FALSE IS NOT NULL :: string
--- !query schema
-struct<>
--- !query output
-org.apache.spark.sql.catalyst.parser.ParseException
-{
-  "errorClass" : "PARSE_SYNTAX_ERROR",
-  "sqlState" : "42601",
-  "messageParameters" : {
-    "error" : "'::'",
-    "hint" : ""
-  }
-}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
deleted file mode 100644
index 514a0c6ae7d31..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-parsing-invalid.sql.out
+++ /dev/null
@@ -1,467 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-select to_timestamp('294248', 'y')
--- !query schema
-struct<>
--- !query output
-java.lang.ArithmeticException
-long overflow
-
-
--- !query
-select to_timestamp('1', 'yy')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '1' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('-12', 'yy')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '-12' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('123', 'yy')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '123' could not be parsed, unparsed text found at index 2"
-  }
-}
-
-
--- !query
-select to_timestamp('1', 'yyy')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '1' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('1234567', 'yyyyyyy')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkUpgradeException
-{
-  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
-  "sqlState" : "42K0B",
-  "messageParameters" : {
-    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
-    "docroot" : "https://spark.apache.org/docs/latest",
-    "pattern" : "'yyyyyyy'"
-  }
-}
-
-
--- !query
-select to_timestamp('366', 'D')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
-  }
-}
-
-
--- !query
-select to_timestamp('9', 'DD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '9' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('366', 'DD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
-  }
-}
-
-
--- !query
-select to_timestamp('9', 'DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '9' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('99', 'DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '99' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp('30-365', 'dd-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
-  }
-}
-
-
--- !query
-select to_timestamp('11-365', 'MM-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."
-  }
-}
-
-
--- !query
-select to_timestamp('2019-366', 'yyyy-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year"
-  }
-}
-
-
--- !query
-select to_timestamp('12-30-365', 'MM-dd-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
-  }
-}
-
-
--- !query
-select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30"
-  }
-}
-
-
--- !query
-select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15"
-  }
-}
-
-
--- !query
-select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'"
-  }
-}
-
-
--- !query
-select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
--- !query schema
-struct<from_csv(2018-366):struct<date:date>>
--- !query output
-{"date":null}
-
-
--- !query
-select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
-  }
-}
-
-
--- !query
-select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text 'Unparseable' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
-  }
-}
-
-
--- !query
-select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text 'Unparseable' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
-  }
-}
-
-
--- !query
-select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text 'Unparseable' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
-  }
-}
-
-
--- !query
-select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text 'Unparseable' could not be parsed at index 0"
-  }
-}
-
-
--- !query
-select cast("Unparseable" as timestamp)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'Unparseable'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "cast(\"Unparseable\" as timestamp)"
-  } ]
-}
-
-
--- !query
-select cast("Unparseable" as date)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'Unparseable'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DATE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 34,
-    "fragment" : "cast(\"Unparseable\" as date)"
-  } ]
-}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
deleted file mode 100644
index fd30ecf203028..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/ansi/decimalArithmeticOperations.sql.out
+++ /dev/null
@@ -1,365 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
-  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
--- !query schema
-struct<>
--- !query output
-
-
-
--- !query
-select id, a*10, b/10 from decimals_test order by id
--- !query schema
-struct<id:int,(a * 10):decimal(38,15),(b / 10):decimal(38,18)>
--- !query output
-1	1000.000000000000000	99.900000000000000000
-2	123451.230000000000000	1234.512300000000000000
-3	1.234567891011000	123.410000000000000000
-4	1234567891234567890.000000000000000	0.112345678912345679
-
-
--- !query
-select 10.3 * 3.0
--- !query schema
-struct<(10.3 * 3.0):decimal(6,2)>
--- !query output
-30.90
-
-
--- !query
-select 10.3000 * 3.0
--- !query schema
-struct<(10.3000 * 3.0):decimal(9,5)>
--- !query output
-30.90000
-
-
--- !query
-select 10.30000 * 30.0
--- !query schema
-struct<(10.30000 * 30.0):decimal(11,6)>
--- !query output
-309.000000
-
-
--- !query
-select 10.300000000000000000 * 3.000000000000000000
--- !query schema
-struct<(10.300000000000000000 * 3.000000000000000000):decimal(38,34)>
--- !query output
-30.9000000000000000000000000000000000
-
-
--- !query
-select 10.300000000000000000 * 3.0000000000000000000
--- !query schema
-struct<(10.300000000000000000 * 3.0000000000000000000):decimal(38,34)>
--- !query output
-30.9000000000000000000000000000000000
-
-
--- !query
-select (5e36BD + 0.1) + 5e36BD
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "1",
-    "value" : "10000000000000000000000000000000000000.1"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "(5e36BD + 0.1) + 5e36BD"
-  } ]
-}
-
-
--- !query
-select (-4e36BD - 0.1) - 7e36BD
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "1",
-    "value" : "-11000000000000000000000000000000000000.1"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 31,
-    "fragment" : "(-4e36BD - 0.1) - 7e36BD"
-  } ]
-}
-
-
--- !query
-select 12345678901234567890.0 * 12345678901234567890.0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "2",
-    "value" : "152415787532388367501905199875019052100"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "12345678901234567890.0 * 12345678901234567890.0"
-  } ]
-}
-
-
--- !query
-select 1e35BD / 0.1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "1000000000000000000000000000000000000.000000000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 19,
-    "fragment" : "1e35BD / 0.1"
-  } ]
-}
-
-
--- !query
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query schema
-struct<(123456789123456789.1234567890 * 1.123456789123456789):decimal(38,18)>
--- !query output
-138698367904130467.654320988515622621
-
-
--- !query
-select 123456789123456789.1234567890 * 1.123456789123456789
--- !query schema
-struct<(123456789123456789.1234567890 * 1.123456789123456789):decimal(38,18)>
--- !query output
-138698367904130467.654320988515622621
-
-
--- !query
-select 12345678912345.123456789123 / 0.000000012345678
--- !query schema
-struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,9)>
--- !query output
-1000000073899961059796.725866332
-
-
--- !query
-select 1.0123456789012345678901234567890123456e36BD / 0.1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "10123456789012345678901234567890123456.000000000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e36BD / 0.1"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e35BD / 1.0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "101234567890123456789012345678901234.560000000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e35BD / 1.0"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e34BD / 1.0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "10123456789012345678901234567890123.456000000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e34BD / 1.0"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e33BD / 1.0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "1012345678901234567890123456789012.345600000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e33BD / 1.0"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e32BD / 1.0
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "101234567890123456789012345678901.234560000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e32BD / 1.0"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 1.0
--- !query schema
-struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
--- !query output
-10123456789012345678901234567890.123456
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 0.1
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "38",
-    "scale" : "6",
-    "value" : "101234567890123456789012345678901.234560000000000000000000000000000000000"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "1.0123456789012345678901234567890123456e31BD / 0.1"
-  } ]
-}
-
-
--- !query
-select 1.0123456789012345678901234567890123456e31BD / 10.0
--- !query schema
-struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,6)>
--- !query output
-1012345678901234567890123456789.012346
-
-
--- !query
-drop table decimals_test
--- !query schema
-struct<>
--- !query output
-
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
deleted file mode 100644
index e2abcb099130a..0000000000000
--- a/sql/core/src/test/resources/sql-tests/results/ansi/math.sql.out
+++ /dev/null
@@ -1,904 +0,0 @@
--- Automatically generated by SQLQueryTestSuite
--- !query
-SELECT round(25y, 1)
--- !query schema
-struct<round(25, 1):tinyint>
--- !query output
-25
-
-
--- !query
-SELECT round(25y, 0)
--- !query schema
-struct<round(25, 0):tinyint>
--- !query output
-25
-
-
--- !query
-SELECT round(25y, -1)
--- !query schema
-struct<round(25, -1):tinyint>
--- !query output
-30
-
-
--- !query
-SELECT round(25y, -2)
--- !query schema
-struct<round(25, -2):tinyint>
--- !query output
-0
-
-
--- !query
-SELECT round(25y, -3)
--- !query schema
-struct<round(25, -3):tinyint>
--- !query output
-0
-
-
--- !query
-SELECT round(127y, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 22,
-    "fragment" : "round(127y, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(-128y, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "round(-128y, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(525s, 1)
--- !query schema
-struct<round(525, 1):smallint>
--- !query output
-525
-
-
--- !query
-SELECT round(525s, 0)
--- !query schema
-struct<round(525, 0):smallint>
--- !query output
-525
-
-
--- !query
-SELECT round(525s, -1)
--- !query schema
-struct<round(525, -1):smallint>
--- !query output
-530
-
-
--- !query
-SELECT round(525s, -2)
--- !query schema
-struct<round(525, -2):smallint>
--- !query output
-500
-
-
--- !query
-SELECT round(525s, -3)
--- !query schema
-struct<round(525, -3):smallint>
--- !query output
-1000
-
-
--- !query
-SELECT round(32767s, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "round(32767s, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(-32768s, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "round(-32768s, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(525, 1)
--- !query schema
-struct<round(525, 1):int>
--- !query output
-525
-
-
--- !query
-SELECT round(525, 0)
--- !query schema
-struct<round(525, 0):int>
--- !query output
-525
-
-
--- !query
-SELECT round(525, -1)
--- !query schema
-struct<round(525, -1):int>
--- !query output
-530
-
-
--- !query
-SELECT round(525, -2)
--- !query schema
-struct<round(525, -2):int>
--- !query output
-500
-
-
--- !query
-SELECT round(525, -3)
--- !query schema
-struct<round(525, -3):int>
--- !query output
-1000
-
-
--- !query
-SELECT round(2147483647, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "round(2147483647, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(-2147483647, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 29,
-    "fragment" : "round(-2147483647, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(525L, 1)
--- !query schema
-struct<round(525, 1):bigint>
--- !query output
-525
-
-
--- !query
-SELECT round(525L, 0)
--- !query schema
-struct<round(525, 0):bigint>
--- !query output
-525
-
-
--- !query
-SELECT round(525L, -1)
--- !query schema
-struct<round(525, -1):bigint>
--- !query output
-530
-
-
--- !query
-SELECT round(525L, -2)
--- !query schema
-struct<round(525, -2):bigint>
--- !query output
-500
-
-
--- !query
-SELECT round(525L, -3)
--- !query schema
-struct<round(525, -3):bigint>
--- !query output
-1000
-
-
--- !query
-SELECT round(9223372036854775807L, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "round(9223372036854775807L, -1)"
-  } ]
-}
-
-
--- !query
-SELECT round(-9223372036854775808L, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "round(-9223372036854775808L, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(25y, 1)
--- !query schema
-struct<bround(25, 1):tinyint>
--- !query output
-25
-
-
--- !query
-SELECT bround(25y, 0)
--- !query schema
-struct<bround(25, 0):tinyint>
--- !query output
-25
-
-
--- !query
-SELECT bround(25y, -1)
--- !query schema
-struct<bround(25, -1):tinyint>
--- !query output
-20
-
-
--- !query
-SELECT bround(25y, -2)
--- !query schema
-struct<bround(25, -2):tinyint>
--- !query output
-0
-
-
--- !query
-SELECT bround(25y, -3)
--- !query schema
-struct<bround(25, -3):tinyint>
--- !query output
-0
-
-
--- !query
-SELECT bround(127y, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 23,
-    "fragment" : "bround(127y, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(-128y, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "bround(-128y, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(525s, 1)
--- !query schema
-struct<bround(525, 1):smallint>
--- !query output
-525
-
-
--- !query
-SELECT bround(525s, 0)
--- !query schema
-struct<bround(525, 0):smallint>
--- !query output
-525
-
-
--- !query
-SELECT bround(525s, -1)
--- !query schema
-struct<bround(525, -1):smallint>
--- !query output
-520
-
-
--- !query
-SELECT bround(525s, -2)
--- !query schema
-struct<bround(525, -2):smallint>
--- !query output
-500
-
-
--- !query
-SELECT bround(525s, -3)
--- !query schema
-struct<bround(525, -3):smallint>
--- !query output
-1000
-
-
--- !query
-SELECT bround(32767s, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "bround(32767s, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(-32768s, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 26,
-    "fragment" : "bround(-32768s, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(525, 1)
--- !query schema
-struct<bround(525, 1):int>
--- !query output
-525
-
-
--- !query
-SELECT bround(525, 0)
--- !query schema
-struct<bround(525, 0):int>
--- !query output
-525
-
-
--- !query
-SELECT bround(525, -1)
--- !query schema
-struct<bround(525, -1):int>
--- !query output
-520
-
-
--- !query
-SELECT bround(525, -2)
--- !query schema
-struct<bround(525, -2):int>
--- !query output
-500
-
-
--- !query
-SELECT bround(525, -3)
--- !query schema
-struct<bround(525, -3):int>
--- !query output
-1000
-
-
--- !query
-SELECT bround(2147483647, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 29,
-    "fragment" : "bround(2147483647, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(-2147483647, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "bround(-2147483647, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(525L, 1)
--- !query schema
-struct<bround(525, 1):bigint>
--- !query output
-525
-
-
--- !query
-SELECT bround(525L, 0)
--- !query schema
-struct<bround(525, 0):bigint>
--- !query output
-525
-
-
--- !query
-SELECT bround(525L, -1)
--- !query schema
-struct<bround(525, -1):bigint>
--- !query output
-520
-
-
--- !query
-SELECT bround(525L, -2)
--- !query schema
-struct<bround(525, -2):bigint>
--- !query output
-500
-
-
--- !query
-SELECT bround(525L, -3)
--- !query schema
-struct<bround(525, -3):bigint>
--- !query output
-1000
-
-
--- !query
-SELECT bround(9223372036854775807L, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "bround(9223372036854775807L, -1)"
-  } ]
-}
-
-
--- !query
-SELECT bround(-9223372036854775808L, -1)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 40,
-    "fragment" : "bround(-9223372036854775808L, -1)"
-  } ]
-}
-
-
--- !query
-SELECT conv('100', 2, 10)
--- !query schema
-struct<conv(100, 2, 10):string>
--- !query output
-4
-
-
--- !query
-SELECT conv(-10, 16, -10)
--- !query schema
-struct<conv(-10, 16, -10):string>
--- !query output
--16
-
-
--- !query
-SELECT conv('9223372036854775808', 10, 16)
--- !query schema
-struct<conv(9223372036854775808, 10, 16):string>
--- !query output
-8000000000000000
-
-
--- !query
-SELECT conv('92233720368547758070', 10, 16)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow in function conv()"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "conv('92233720368547758070', 10, 16)"
-  } ]
-}
-
-
--- !query
-SELECT conv('9223372036854775807', 36, 10)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow in function conv()"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 42,
-    "fragment" : "conv('9223372036854775807', 36, 10)"
-  } ]
-}
-
-
--- !query
-SELECT conv('-9223372036854775807', 36, 10)
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : "",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Overflow in function conv()"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "conv('-9223372036854775807', 36, 10)"
-  } ]
-}
-
-
--- !query
-SELECT BIN(0)
--- !query schema
-struct<bin(0):string>
--- !query output
-0
-
-
--- !query
-SELECT BIN(25)
--- !query schema
-struct<bin(25):string>
--- !query output
-11001
-
-
--- !query
-SELECT BIN(25L)
--- !query schema
-struct<bin(25):string>
--- !query output
-11001
-
-
--- !query
-SELECT BIN(25.5)
--- !query schema
-struct<bin(25.5):string>
--- !query output
-11001
-
-
--- !query
-SELECT POSITIVE(0Y)
--- !query schema
-struct<(+ 0):tinyint>
--- !query output
-0
-
-
--- !query
-SELECT POSITIVE(25)
--- !query schema
-struct<(+ 25):int>
--- !query output
-25
-
-
--- !query
-SELECT POSITIVE(-25L)
--- !query schema
-struct<(+ -25):bigint>
--- !query output
--25
-
-
--- !query
-SELECT POSITIVE(25.5)
--- !query schema
-struct<(+ 25.5):decimal(3,1)>
--- !query output
-25.5
-
-
--- !query
-SELECT POSITIVE("25.5")
--- !query schema
-struct<(+ 25.5):double>
--- !query output
-25.5
-
-
--- !query
-SELECT POSITIVE("invalid")
--- !query schema
-struct<>
--- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'invalid'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 26,
-    "fragment" : "POSITIVE(\"invalid\")"
-  } ]
-}
-
-
--- !query
-SELECT POSITIVE(null)
--- !query schema
-struct<(+ NULL):double>
--- !query output
-NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/array.sql.out b/sql/core/src/test/resources/sql-tests/results/array.sql.out
index 92da0a490ff81..7394e428091c7 100644
--- a/sql/core/src/test/resources/sql-tests/results/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/array.sql.out
@@ -151,27 +151,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select sort_array(array('b', 'd'), cast(NULL as boolean))
 -- !query schema
-struct<>
+struct<sort_array(array(b, d), CAST(NULL AS BOOLEAN)):array<string>>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
-    "inputType" : "\"BOOLEAN\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -196,17 +178,49 @@ struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,
 -- !query
 select element_at(array(1, 2, 3), 5)
 -- !query schema
-struct<element_at(array(1, 2, 3), 5):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "element_at(array(1, 2, 3), 5)"
+  } ]
+}
 
 
 -- !query
 select element_at(array(1, 2, 3), -5)
 -- !query schema
-struct<element_at(array(1, 2, 3), -5):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "-5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "element_at(array(1, 2, 3), -5)"
+  } ]
+}
 
 
 -- !query
@@ -217,32 +231,87 @@ struct<>
 org.apache.spark.SparkRuntimeException
 {
   "errorClass" : "INVALID_INDEX_OF_ZERO",
-  "sqlState" : "22003"
+  "sqlState" : "22003",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "element_at(array(1, 2, 3), 0)"
+  } ]
 }
 
 
 -- !query
 select elt(4, '123', '456')
 -- !query schema
-struct<elt(4, 123, 456):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "4"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "elt(4, '123', '456')"
+  } ]
+}
 
 
 -- !query
 select elt(0, '123', '456')
 -- !query schema
-struct<elt(0, 123, 456):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "0"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "elt(0, '123', '456')"
+  } ]
+}
 
 
 -- !query
 select elt(-1, '123', '456')
 -- !query schema
-struct<elt(-1, 123, 456):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "2",
+    "indexValue" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "elt(-1, '123', '456')"
+  } ]
+}
 
 
 -- !query
@@ -280,17 +349,49 @@ NULL
 -- !query
 select array(1, 2, 3)[5]
 -- !query schema
-struct<array(1, 2, 3)[5]:int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "array(1, 2, 3)[5]"
+  } ]
+}
 
 
 -- !query
 select array(1, 2, 3)[-1]
 -- !query schema
-struct<array(1, 2, 3)[-1]:int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArrayIndexOutOfBoundsException
+{
+  "errorClass" : "INVALID_ARRAY_INDEX",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "arraySize" : "3",
+    "indexValue" : "-1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "array(1, 2, 3)[-1]"
+  } ]
+}
 
 
 -- !query
@@ -372,7 +473,7 @@ select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
 -- !query schema
 struct<size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10))):int>
 -- !query output
--1
+NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
index 6f74c63da3543..0dbdf1d9975c9 100644
--- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out
@@ -2,113 +2,337 @@
 -- !query
 SELECT CAST('1.23' AS int)
 -- !query schema
-struct<CAST(1.23 AS INT):int>
+struct<>
 -- !query output
-1
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "CAST('1.23' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('1.23' AS long)
 -- !query schema
-struct<CAST(1.23 AS BIGINT):bigint>
+struct<>
 -- !query output
-1
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('1.23' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('-4.56' AS int)
 -- !query schema
-struct<CAST(-4.56 AS INT):int>
+struct<>
 -- !query output
--4
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'-4.56'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('-4.56' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('-4.56' AS long)
 -- !query schema
-struct<CAST(-4.56 AS BIGINT):bigint>
+struct<>
 -- !query output
--4
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'-4.56'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('-4.56' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('abc' AS int)
 -- !query schema
-struct<CAST(abc AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "CAST('abc' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('abc' AS long)
 -- !query schema
-struct<CAST(abc AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "CAST('abc' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('abc' AS float)
 -- !query schema
-struct<CAST(abc AS FLOAT):float>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('abc' AS float)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('abc' AS double)
 -- !query schema
-struct<CAST(abc AS DOUBLE):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('abc' AS double)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('1234567890123' AS int)
 -- !query schema
-struct<CAST(1234567890123 AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1234567890123'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "CAST('1234567890123' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('12345678901234567890123' AS long)
 -- !query schema
-struct<CAST(12345678901234567890123 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'12345678901234567890123'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "CAST('12345678901234567890123' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('' AS int)
 -- !query schema
-struct<CAST( AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "CAST('' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('' AS long)
 -- !query schema
-struct<CAST( AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "CAST('' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('' AS float)
 -- !query schema
-struct<CAST( AS FLOAT):float>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "CAST('' AS float)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('' AS double)
 -- !query schema
-struct<CAST( AS DOUBLE):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "CAST('' AS double)"
+  } ]
+}
 
 
 -- !query
@@ -130,33 +354,97 @@ NULL
 -- !query
 SELECT CAST('123.a' AS int)
 -- !query schema
-struct<CAST(123.a AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "CAST('123.a' AS int)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('123.a' AS long)
 -- !query schema
-struct<CAST(123.a AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "CAST('123.a' AS long)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('123.a' AS float)
 -- !query schema
-struct<CAST(123.a AS FLOAT):float>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "CAST('123.a' AS float)"
+  } ]
+}
 
 
 -- !query
 SELECT CAST('123.a' AS double)
 -- !query schema
-struct<CAST(123.a AS DOUBLE):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "CAST('123.a' AS double)"
+  } ]
+}
 
 
 -- !query
@@ -170,9 +458,25 @@ struct<CAST(-2147483648 AS INT):int>
 -- !query
 SELECT CAST('-2147483649' AS int)
 -- !query schema
-struct<CAST(-2147483649 AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'-2147483649'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "CAST('-2147483649' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -186,9 +490,25 @@ struct<CAST(2147483647 AS INT):int>
 -- !query
 SELECT CAST('2147483648' AS int)
 -- !query schema
-struct<CAST(2147483648 AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2147483648'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "CAST('2147483648' AS int)"
+  } ]
+}
 
 
 -- !query
@@ -202,9 +522,25 @@ struct<CAST(-9223372036854775808 AS BIGINT):bigint>
 -- !query
 SELECT CAST('-9223372036854775809' AS long)
 -- !query schema
-struct<CAST(-9223372036854775809 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'-9223372036854775809'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "CAST('-9223372036854775809' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -218,9 +554,25 @@ struct<CAST(9223372036854775807 AS BIGINT):bigint>
 -- !query
 SELECT CAST('9223372036854775808' AS long)
 -- !query schema
-struct<CAST(9223372036854775808 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'9223372036854775808'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "CAST('9223372036854775808' AS long)"
+  } ]
+}
 
 
 -- !query
@@ -234,65 +586,209 @@ struct<hex(CAST(abc AS BINARY)):string>
 -- !query
 SELECT HEX(CAST(CAST(123 AS byte) AS binary))
 -- !query schema
-struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
+struct<>
 -- !query output
-7B
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "CAST(CAST(123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
 -- !query schema
-struct<hex(CAST(CAST(-123 AS TINYINT) AS BINARY)):string>
+struct<>
 -- !query output
-85
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(-123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 45,
+    "fragment" : "CAST(CAST(-123 AS byte) AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123S AS binary))
 -- !query schema
-struct<hex(CAST(123 AS BINARY)):string>
+struct<>
 -- !query output
-007B
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123S AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123S AS binary))
 -- !query schema
-struct<hex(CAST(-123 AS BINARY)):string>
+struct<>
 -- !query output
-FF85
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"SMALLINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123S AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123 AS binary))
 -- !query schema
-struct<hex(CAST(123 AS BINARY)):string>
+struct<>
 -- !query output
-0000007B
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 30,
+    "fragment" : "CAST(123 AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123 AS binary))
 -- !query schema
-struct<hex(CAST(-123 AS BINARY)):string>
+struct<>
 -- !query output
-FFFFFF85
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(-123 AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(123L AS binary))
 -- !query schema
-struct<hex(CAST(123 AS BINARY)):string>
+struct<>
 -- !query output
-000000000000007B
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "CAST(123L AS binary)"
+  } ]
+}
 
 
 -- !query
 SELECT HEX(CAST(-123L AS binary))
 -- !query schema
-struct<hex(CAST(-123 AS BINARY)):string>
+struct<>
 -- !query output
-FFFFFFFFFFFFFF85
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(-123 AS BINARY)\"",
+    "srcType" : "\"BIGINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 32,
+    "fragment" : "CAST(-123L AS binary)"
+  } ]
+}
 
 
 -- !query
@@ -448,41 +944,121 @@ struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
 -- !query
 select cast('1中文' as tinyint)
 -- !query schema
-struct<CAST(1中文 AS TINYINT):tinyint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TINYINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "cast('1中文' as tinyint)"
+  } ]
+}
 
 
 -- !query
 select cast('1中文' as smallint)
 -- !query schema
-struct<CAST(1中文 AS SMALLINT):smallint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"SMALLINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast('1中文' as smallint)"
+  } ]
+}
 
 
 -- !query
 select cast('1中文' as INT)
 -- !query schema
-struct<CAST(1中文 AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "cast('1中文' as INT)"
+  } ]
+}
 
 
 -- !query
 select cast('中文1' as bigint)
 -- !query schema
-struct<CAST(中文1 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'中文1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast('中文1' as bigint)"
+  } ]
+}
 
 
 -- !query
 select cast('1中文' as bigint)
 -- !query schema
-struct<CAST(1中文 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1中文'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast('1中文' as bigint)"
+  } ]
+}
 
 
 -- !query
@@ -506,10 +1082,25 @@ false
 -- !query
 select cast('\t\n xyz \t\r' as boolean)
 -- !query schema
-struct<CAST(	
- xyz 	 AS BOOLEAN):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'\t\n xyz \t\r'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast('\\t\\n xyz \\t\\r' as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -523,17 +1114,50 @@ struct<CAST(23.45 AS DECIMAL(4,2)):decimal(4,2)>
 -- !query
 select cast('123.45' as decimal(4, 2))
 -- !query schema
-struct<CAST(123.45 AS DECIMAL(4,2)):decimal(4,2)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "4",
+    "scale" : "2",
+    "value" : "123.45"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "cast('123.45' as decimal(4, 2))"
+  } ]
+}
 
 
 -- !query
 select cast('xyz' as decimal(4, 2))
 -- !query schema
-struct<CAST(xyz AS DECIMAL(4,2)):decimal(4,2)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'xyz'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DECIMAL(4,2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast('xyz' as decimal(4, 2))"
+  } ]
+}
 
 
 -- !query
@@ -547,9 +1171,25 @@ struct<CAST(2022-01-01 AS DATE):date>
 -- !query
 select cast('a' as date)
 -- !query schema
-struct<CAST(a AS DATE):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast('a' as date)"
+  } ]
+}
 
 
 -- !query
@@ -563,9 +1203,25 @@ struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp>
 -- !query
 select cast('a' as timestamp)
 -- !query schema
-struct<CAST(a AS TIMESTAMP):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "cast('a' as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -579,25 +1235,73 @@ struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP_NTZ):timestamp_ntz>
 -- !query
 select cast('a' as timestamp_ntz)
 -- !query schema
-struct<CAST(a AS TIMESTAMP_NTZ):timestamp_ntz>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP_NTZ\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast('a' as timestamp_ntz)"
+  } ]
+}
 
 
 -- !query
 select cast(cast('inf' as double) as timestamp)
 -- !query schema
-struct<CAST(CAST(inf AS DOUBLE) AS TIMESTAMP):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "Infinity",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 47,
+    "fragment" : "cast(cast('inf' as double) as timestamp)"
+  } ]
+}
 
 
 -- !query
 select cast(cast('inf' as float) as timestamp)
 -- !query schema
-struct<CAST(CAST(inf AS FLOAT) AS TIMESTAMP):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "Infinity",
+    "sourceType" : "\"DOUBLE\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 46,
+    "fragment" : "cast(cast('inf' as float) as timestamp)"
+  } ]
+}
 
 
 -- !query
@@ -642,7 +1346,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INTERVAL HOUR TO SECOND\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "INTERVAL '23:59:59' HOUR TO SECOND"
@@ -676,7 +1379,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INTERVAL MONTH\"",
     "targetType" : "\"TINYINT\"",
     "value" : "INTERVAL '-1000' MONTH"
@@ -694,7 +1396,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INTERVAL SECOND\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "INTERVAL '1000000' SECOND"
@@ -784,7 +1485,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INT\"",
     "targetType" : "\"INTERVAL YEAR\"",
     "value" : "2147483647"
@@ -802,7 +1502,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"INTERVAL DAY\"",
     "value" : "-9223372036854775808L"
@@ -880,7 +1579,14 @@ org.apache.spark.SparkArithmeticException
     "precision" : "1",
     "scale" : "0",
     "value" : "10.123000"
-  }
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(interval '10.123' second as decimal(1, 0))"
+  } ]
 }
 
 
@@ -919,33 +1625,97 @@ struct<CAST(10.654321 AS INTERVAL MONTH):interval month>
 -- !query
 SELECT '1.23' :: int
 -- !query schema
-struct<CAST(1.23 AS INT):int>
+struct<>
 -- !query output
-1
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "'1.23' :: int"
+  } ]
+}
 
 
 -- !query
 SELECT 'abc' :: int
 -- !query schema
-struct<CAST(abc AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "'abc' :: int"
+  } ]
+}
 
 
 -- !query
 SELECT '12345678901234567890123' :: long
 -- !query schema
-struct<CAST(12345678901234567890123 AS BIGINT):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'12345678901234567890123'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "'12345678901234567890123' :: long"
+  } ]
+}
 
 
 -- !query
 SELECT '' :: int
 -- !query schema
-struct<CAST( AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "''",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'' :: int"
+  } ]
+}
 
 
 -- !query
@@ -959,9 +1729,25 @@ NULL
 -- !query
 SELECT '123.a' :: int
 -- !query schema
-struct<CAST(123.a AS INT):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'123.a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "'123.a' :: int"
+  } ]
+}
 
 
 -- !query
@@ -983,9 +1769,27 @@ struct<hex(CAST(abc AS BINARY)):string>
 -- !query
 SELECT HEX((123 :: byte) :: binary)
 -- !query schema
-struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
+struct<>
 -- !query output
-7B
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(CAST(123 AS TINYINT) AS BINARY)\"",
+    "srcType" : "\"TINYINT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 34,
+    "fragment" : "(123 :: byte) :: binary"
+  } ]
+}
 
 
 -- !query
@@ -1072,25 +1876,59 @@ struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
 -- !query
 SELECT '1.23' :: int :: long
 -- !query schema
-struct<CAST(CAST(1.23 AS INT) AS BIGINT):bigint>
+struct<>
 -- !query output
-1
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.23'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 20,
+    "fragment" : "'1.23' :: int"
+  } ]
+}
 
 
 -- !query
 SELECT '2147483648' :: long :: int
 -- !query schema
-struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int>
+struct<>
 -- !query output
--2147483648
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\"",
+    "value" : "2147483648L"
+  }
+}
 
 
 -- !query
 SELECT CAST('2147483648' :: long AS int)
 -- !query schema
-struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int>
+struct<>
 -- !query output
--2147483648
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INT\"",
+    "value" : "2147483648L"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
index b1c0094bcfa9f..a6110543159ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/change-column.sql.out
@@ -219,11 +219,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1331",
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
   "messageParameters" : {
-    "fieldName" : "invalid_col",
-    "schema" : "root\n |-- a: integer (nullable = true)\n |-- b: string (nullable = true)\n |-- c: integer (nullable = true)\n",
-    "table" : "spark_catalog.default.test_change"
+    "objectName" : "`invalid_col`",
+    "proposal" : "`a`, `b`, `c`"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
index 568c9f3b29e87..8aafa25c5caaf 100644
--- a/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/charvarchar.sql.out
@@ -556,6 +556,22 @@ Location [not included in comparison]/{warehouse_dir}/char_part
 Partition Provider  	Catalog
 
 
+-- !query
+alter table char_part change column c1 comment 'char comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+alter table char_part change column v1 comment 'varchar comment'
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 alter table char_part add partition (v2='ke', c2='nt') location 'loc1'
 -- !query schema
@@ -569,8 +585,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -612,8 +628,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -647,8 +663,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
@@ -682,8 +698,8 @@ desc formatted char_part
 -- !query schema
 struct<col_name:string,data_type:string,comment:string>
 -- !query output
-c1                  	char(5)             	                    
-v1                  	varchar(6)          	                    
+c1                  	char(5)             	char comment        
+v1                  	varchar(6)          	varchar comment     
 v2                  	varchar(2)          	                    
 c2                  	char(2)             	                    
 # Partition Information	                    	                    
diff --git a/sql/core/src/test/resources/sql-tests/results/collations.sql.out b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
index 37fb6c4e114ef..f92fc5de8c3f4 100644
--- a/sql/core/src/test/resources/sql-tests/results/collations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/collations.sql.out
@@ -220,6 +220,141 @@ aaa
 bbb
 
 
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "EXCEPT",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 162,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "EXCEPT ALL",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 166,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') except all select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 161,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 165,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('AAA'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') union all select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
+-- !query
+select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "first",
+    "dataType1" : "\"STRING COLLATE UNICODE_CI\"",
+    "dataType2" : "\"STRING COLLATE UTF8_LCASE\"",
+    "hint" : "",
+    "operator" : "INTERSECT",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 156,
+    "fragment" : "select col1 collate utf8_lcase from values ('aaa'), ('bbb'), ('BBB'), ('zzz'), ('ZZZ') intersect select col1 collate unicode_ci from values ('aaa'), ('bbb')"
+  } ]
+}
+
+
 -- !query
 create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_lcase: string collate utf8_lcase>) USING PARQUET
 -- !query schema
@@ -427,3 +562,4763 @@ select 'I' collate tr_ci = 'ı'
 struct<(collate(I, tr_ci) = ı):boolean>
 -- !query output
 true
+
+
+-- !query
+create table t4 (text string collate utf8_binary, pairDelim string collate utf8_lcase, keyValueDelim string collate utf8_binary) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t4 values('a:1,b:2,c:3', ',', ':')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select str_to_map(text, pairDelim, keyValueDelim) from t4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_lcase, keyValueDelim collate utf8_binary) from t4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select str_to_map(text collate utf8_binary, pairDelim collate utf8_binary, keyValueDelim collate utf8_binary) from t4
+-- !query schema
+struct<str_to_map(collate(text, utf8_binary), collate(pairDelim, utf8_binary), collate(keyValueDelim, utf8_binary)):map<string,string>>
+-- !query output
+{"a":"1","b":"2","c":"3"}
+
+
+-- !query
+select str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai) from t4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(text, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"str_to_map(collate(text, unicode_ai), collate(pairDelim, unicode_ai), collate(keyValueDelim, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 106,
+    "fragment" : "str_to_map(text collate unicode_ai, pairDelim collate unicode_ai, keyValueDelim collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+drop table t4
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t5(s string, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('Spark', 'Spark', 'SQL')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaAAaA')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaA')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('aaAaAAaA', 'aaAaAAaA', 'aaAaaAaAaaAaaAaAaaAaaAaA')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('bbAbaAbA', 'bbAbAAbA', 'a')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'İo')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'İo ')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('İo', 'İo ', 'İo')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('İo', 'İo', 'i̇o')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('efd2', 'efd2', 'efd2')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('Hello, world! Nice day.', 'Hello, world! Nice day.', 'Hello, world! Nice day.')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('Something else. Nothing here.', 'Something else. Nothing here.', 'Something else. Nothing here.')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('kitten', 'kitten', 'sitTing')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('abc', 'abc', 'abc')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t5 values ('abcdcba', 'abcdcba', 'aBcDCbA')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t6(ascii long) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t6 values (97)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t6 values (66)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t7(ascii double) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t7 values (97.52143)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t7 values (66.421)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t8(format string collate utf8_binary, utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t8 values ('%s%s', 'abCdE', 'abCdE')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t9(num long) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t9 values (97)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t9 values (66)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t10(utf8_binary string collate utf8_binary, utf8_lcase string collate utf8_lcase) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t10 values ('aaAaAAaA', 'aaAaaAaA')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t10 values ('efd2', 'efd2')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select concat_ws(' ', utf8_lcase, utf8_lcase) from t5
+-- !query schema
+struct<concat_ws( , utf8_lcase, utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. Hello, world! Nice day.
+SQL SQL
+Something else. Nothing here. Something else. Nothing here.
+a a
+aBcDCbA aBcDCbA
+aaAaAAaA aaAaAAaA
+aaAaaAaA aaAaaAaA
+aaAaaAaAaaAaaAaAaaAaaAaA aaAaaAaAaaAaaAaAaaAaaAaA
+abc abc
+efd2 efd2
+i̇o i̇o
+sitTing sitTing
+İo  İo 
+İo İo
+İo İo
+
+
+-- !query
+select concat_ws(' ', utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select concat_ws(' ' collate utf8_binary, utf8_binary, 'SQL' collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select concat_ws(' ' collate utf8_lcase, utf8_binary, 'SQL' collate utf8_lcase) from t5
+-- !query schema
+struct<concat_ws(collate( , utf8_lcase), utf8_binary, collate(SQL, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day. SQL
+Something else. Nothing here. SQL
+Spark SQL
+aaAaAAaA SQL
+aaAaAAaA SQL
+aaAaAAaA SQL
+abc SQL
+abcdcba SQL
+bbAbAAbA SQL
+efd2 SQL
+kitten SQL
+İo  SQL
+İo SQL
+İo SQL
+İo SQL
+
+
+-- !query
+select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word') from t5
+-- !query schema
+struct<concat_ws(,, utf8_lcase, word):string collate UTF8_LCASE,concat_ws(,, utf8_binary, word):string>
+-- !query output
+Hello, world! Nice day.,word	Hello, world! Nice day.,word
+SQL,word	Spark,word
+Something else. Nothing here.,word	Something else. Nothing here.,word
+a,word	bbAbAAbA,word
+aBcDCbA,word	abcdcba,word
+aaAaAAaA,word	aaAaAAaA,word
+aaAaaAaA,word	aaAaAAaA,word
+aaAaaAaAaaAaaAaAaaAaaAaA,word	aaAaAAaA,word
+abc,word	abc,word
+efd2,word	efd2,word
+i̇o,word	İo,word
+sitTing,word	kitten,word
+İo ,word	İo,word
+İo,word	İo ,word
+İo,word	İo,word
+
+
+-- !query
+select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',', utf8_binary, 'word' collate utf8_lcase) from t5
+-- !query schema
+struct<concat_ws(,, utf8_lcase, collate(word, utf8_binary)):string,concat_ws(,, utf8_binary, collate(word, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.,word	Hello, world! Nice day.,word
+SQL,word	Spark,word
+Something else. Nothing here.,word	Something else. Nothing here.,word
+a,word	bbAbAAbA,word
+aBcDCbA,word	abcdcba,word
+aaAaAAaA,word	aaAaAAaA,word
+aaAaaAaA,word	aaAaAAaA,word
+aaAaaAaAaaAaaAaAaaAaaAaA,word	aaAaAAaA,word
+abc,word	abc,word
+efd2,word	efd2,word
+i̇o,word	İo,word
+sitTing,word	kitten,word
+İo ,word	İo,word
+İo,word	İo ,word
+İo,word	İo,word
+
+
+-- !query
+select elt(2, s, utf8_binary) from t5
+-- !query schema
+struct<elt(2, s, utf8_binary):string>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select elt(2, utf8_binary, utf8_lcase, s) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<elt(1, collate(utf8_binary, utf8_binary), collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select elt(1, utf8_binary collate utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<elt(1, collate(utf8_binary, utf8_binary), utf8_lcase):string>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t5
+-- !query schema
+struct<elt(1, utf8_binary, word):string,elt(1, utf8_lcase, word):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select elt(1, utf8_binary, 'word' collate utf8_lcase), elt(1, utf8_lcase, 'word' collate utf8_binary) from t5
+-- !query schema
+struct<elt(1, utf8_binary, collate(word, utf8_lcase)):string collate UTF8_LCASE,elt(1, utf8_lcase, collate(word, utf8_binary)):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select split_part(utf8_binary, utf8_lcase, 3) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select split_part(s, utf8_binary, 1) from t5
+-- !query schema
+struct<split_part(s, utf8_binary, 1):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+
+bbAbaAbA
+İo
+
+
+-- !query
+select split_part(utf8_binary collate utf8_binary, s collate utf8_lcase, 1) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select split_part(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<split_part(utf8_binary, collate(utf8_lcase, utf8_binary), 2):string>
+-- !query output
+
+
+
+-- !query
+select split_part(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query schema
+struct<split_part(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2):string collate UTF8_LCASE>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+b
+
+
+-- !query
+select split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"split_part(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 83,
+    "fragment" : "split_part(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2)"
+  } ]
+}
+
+
+-- !query
+select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t5
+-- !query schema
+struct<split_part(utf8_binary, a, 3):string,split_part(utf8_lcase, a, 3):string collate UTF8_LCASE>
+-- !query output
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+A	
+A	
+A
+
+
+-- !query
+select split_part(utf8_binary, 'a' collate utf8_lcase, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5
+-- !query schema
+struct<split_part(utf8_binary, collate(a, utf8_lcase), 3):string collate UTF8_LCASE,split_part(utf8_lcase, collate(a, utf8_binary), 3):string>
+-- !query output
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	A
+	A
+	A
+
+
+-- !query
+select split_part(utf8_binary, 'a ' collate utf8_lcase_rtrim, 3), split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t5
+-- !query schema
+struct<split_part(utf8_binary, collate(a , utf8_lcase_rtrim), 3):string collate UTF8_LCASE_RTRIM,split_part(utf8_lcase, collate(a, utf8_binary), 3):string>
+-- !query output
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	
+	A
+	A
+	A
+
+
+-- !query
+select contains(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select contains(s, utf8_binary) from t5
+-- !query schema
+struct<contains(s, utf8_binary):boolean>
+-- !query output
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select contains(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select contains(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<contains(utf8_binary, collate(utf8_lcase, utf8_binary)):boolean>
+-- !query output
+false
+false
+false
+false
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select contains(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<contains(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):boolean>
+-- !query output
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"contains(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "contains(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select contains(utf8_binary, 'a'), contains(utf8_lcase, 'a') from t5
+-- !query schema
+struct<contains(utf8_binary, a):boolean,contains(utf8_lcase, a):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	true
+true	false
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+
+
+-- !query
+select contains(utf8_binary, 'AaAA' collate utf8_lcase), contains(utf8_lcase, 'AAa' collate utf8_binary) from t5
+-- !query schema
+struct<contains(utf8_binary, collate(AaAA, utf8_lcase)):boolean,contains(utf8_lcase, collate(AAa, utf8_binary)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	false
+true	true
+
+
+-- !query
+select contains(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim), contains(utf8_lcase, 'AAa ' collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<contains(utf8_binary, collate(AaAA , utf8_lcase_rtrim)):boolean,contains(utf8_lcase, collate(AAa , utf8_binary_rtrim)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	false
+true	true
+
+
+-- !query
+select substring_index(utf8_binary, utf8_lcase, 2) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select substring_index(s, utf8_binary,1) from t5
+-- !query schema
+struct<substring_index(s, utf8_binary, 1):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+
+bbAbaAbA
+İo
+
+
+-- !query
+select substring_index(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select substring_index(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<substring_index(utf8_binary, collate(utf8_lcase, utf8_binary), 2):string>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select substring_index(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query schema
+struct<substring_index(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAb
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"substring_index(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 88,
+    "fragment" : "substring_index(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 2)"
+  } ]
+}
+
+
+-- !query
+select substring_index(utf8_binary, 'a', 2), substring_index(utf8_lcase, 'a', 2) from t5
+-- !query schema
+struct<substring_index(utf8_binary, a, 2):string,substring_index(utf8_lcase, a, 2):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+a	a
+a	a
+a	a
+abc	abc
+abcdcb	aBcDCb
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select substring_index(utf8_binary, 'AaAA' collate utf8_lcase, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query schema
+struct<substring_index(utf8_binary, collate(AaAA, utf8_lcase), 2):string collate UTF8_LCASE,substring_index(utf8_lcase, collate(AAa, utf8_binary), 2):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+a	aaAaAAaA
+a	aaAaaAaA
+a	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select substring_index(utf8_binary, 'AaAA ' collate utf8_lcase_rtrim, 2), substring_index(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query schema
+struct<substring_index(utf8_binary, collate(AaAA , utf8_lcase_rtrim), 2):string collate UTF8_LCASE_RTRIM,substring_index(utf8_lcase, collate(AAa, utf8_binary), 2):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+a	aaAaAAaA
+a	aaAaaAaA
+a	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select instr(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select instr(s, utf8_binary) from t5
+-- !query schema
+struct<instr(s, utf8_binary):int>
+-- !query output
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select instr(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select instr(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<instr(utf8_binary, collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select instr(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<instr(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):int>
+-- !query output
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+3
+
+
+-- !query
+select instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"instr(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "instr(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select instr(utf8_binary, 'a'), instr(utf8_lcase, 'a') from t5
+-- !query schema
+struct<instr(utf8_binary, a):int,instr(utf8_lcase, a):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	1
+1	1
+1	1
+1	1
+1	1
+1	1
+21	21
+3	0
+
+
+-- !query
+select instr(utf8_binary, 'AaAA' collate utf8_lcase), instr(utf8_lcase, 'AAa' collate utf8_binary) from t5
+-- !query schema
+struct<instr(utf8_binary, collate(AaAA, utf8_lcase)):int,instr(utf8_lcase, collate(AAa, utf8_binary)):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+1	0
+1	0
+1	5
+
+
+-- !query
+select find_in_set(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select find_in_set(s, utf8_binary) from t5
+-- !query schema
+struct<find_in_set(s, utf8_binary):int>
+-- !query output
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select find_in_set(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select find_in_set(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<find_in_set(utf8_binary, collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+
+
+-- !query
+select find_in_set(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<find_in_set(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o'), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o') from t5
+-- !query schema
+struct<find_in_set(utf8_binary, aaAaaAaA,i̇o):int,find_in_set(utf8_lcase, aaAaaAaA,i̇o):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	1
+0	1
+0	2
+0	2
+0	2
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o' collate utf8_lcase), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5
+-- !query schema
+struct<find_in_set(utf8_binary, collate(aaAaaAaA,i̇o, utf8_lcase)):int,find_in_set(utf8_lcase, collate(aaAaaAaA,i̇o, utf8_binary)):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+1	0
+1	0
+1	1
+2	0
+2	0
+2	2
+
+
+-- !query
+select find_in_set(utf8_binary, 'aaAaaAaA,i̇o ' collate utf8_lcase_rtrim), find_in_set(utf8_lcase, 'aaAaaAaA,i̇o' collate utf8_binary) from t5
+-- !query schema
+struct<find_in_set(utf8_binary, collate(aaAaaAaA,i̇o , utf8_lcase_rtrim)):int,find_in_set(utf8_lcase, collate(aaAaaAaA,i̇o, utf8_binary)):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+1	0
+1	0
+1	1
+2	0
+2	0
+2	0
+2	2
+
+
+-- !query
+select startswith(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select startswith(s, utf8_binary) from t5
+-- !query schema
+struct<startswith(s, utf8_binary):boolean>
+-- !query output
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select startswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select startswith(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<startswith(utf8_binary, collate(utf8_lcase, utf8_binary)):boolean>
+-- !query output
+false
+false
+false
+false
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select startswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<startswith(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):boolean>
+-- !query output
+false
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"startswith(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "startswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA'), startswith(utf8_lcase, 'aaAaaAaA') from t5
+-- !query schema
+struct<startswith(utf8_binary, aaAaaAaA):boolean,startswith(utf8_lcase, aaAaaAaA):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	true
+false	true
+false	true
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query schema
+struct<startswith(utf8_binary, collate(aaAaaAaA, utf8_lcase)):boolean,startswith(utf8_lcase, collate(aaAaaAaA, utf8_binary)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	true
+true	true
+
+
+-- !query
+select startswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), startswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query schema
+struct<startswith(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim)):boolean,startswith(utf8_lcase, collate(aaAaaAaA, utf8_binary)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	true
+true	true
+
+
+-- !query
+select translate(utf8_lcase, utf8_lcase, '12345') from t5
+-- !query schema
+struct<translate(utf8_lcase, utf8_lcase, 12345):string collate UTF8_LCASE>
+-- !query output
+1
+11111111
+11111111
+111111111111111111111111
+12
+12
+123
+123
+123
+123
+12332
+12335532
+1234
+1234321
+123454142544
+
+
+-- !query
+select translate(utf8_binary, utf8_lcase, '12345') from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select translate(utf8_binary, 'aBc' collate utf8_lcase, '12345' collate utf8_binary) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select translate(utf8_binary, 'SQL' collate utf8_lcase, '12345' collate utf8_lcase) from t5
+-- !query schema
+struct<translate(utf8_binary, collate(SQL, utf8_lcase), collate(12345, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+1omething e31e. Nothing here.
+1park
+He33o, wor3d! Nice day.
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"utf8_binary\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"translate(utf8_binary, collate(SQL, unicode_ai), collate(12345, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 83,
+    "fragment" : "translate(utf8_binary, 'SQL' collate unicode_ai, '12345' collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select translate(utf8_lcase, 'aaAaaAaA', '12345'), translate(utf8_binary, 'aaAaaAaA', '12345') from t5
+-- !query schema
+struct<translate(utf8_lcase, aaAaaAaA, 12345):string collate UTF8_LCASE,translate(utf8_binary, aaAaaAaA, 12345):string>
+-- !query output
+1	bb3b33b3
+11111111	11313313
+11111111	11313313
+111111111111111111111111	11313313
+1BcDCb1	1bcdcb1
+1bc	1bc
+Hello, world! Nice d1y.	Hello, world! Nice d1y.
+SQL	Sp1rk
+Something else. Nothing here.	Something else. Nothing here.
+efd2	efd2
+i̇o	İo
+sitTing	kitten
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select translate(utf8_lcase, 'aBc' collate utf8_binary, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
+-- !query schema
+struct<translate(utf8_lcase, collate(aBc, utf8_binary), 12345):string,translate(utf8_binary, collate(aBc, utf8_lcase), 12345):string collate UTF8_LCASE>
+-- !query output
+1	22121121
+11A11A1A	11111111
+11A11A1A11A11A1A11A11A1A	11111111
+11A1AA1A	11111111
+123DCbA	123d321
+1b3	123
+Hello, world! Ni3e d1y.	Hello, world! Ni3e d1y.
+SQL	Sp1rk
+Something else. Nothing here.	Something else. Nothing here.
+efd2	efd2
+i̇o	İo
+sitTing	kitten
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select translate(utf8_lcase, 'aBc ' collate utf8_binary_rtrim, '12345'), translate(utf8_binary, 'aBc' collate utf8_lcase, '12345') from t5
+-- !query schema
+struct<translate(utf8_lcase, collate(aBc , utf8_binary_rtrim), 12345):string collate UTF8_BINARY_RTRIM,translate(utf8_binary, collate(aBc, utf8_lcase), 12345):string collate UTF8_LCASE>
+-- !query output
+1	22121121
+11A11A1A	11111111
+11A11A1A11A11A1A11A11A1A	11111111
+11A1AA1A	11111111
+123DCbA	123d321
+1b3	123
+Hello,4world!4Ni3e4d1y.	Hello, world! Ni3e d1y.
+SQL	Sp1rk
+Something4else.4Nothing4here.	Something else. Nothing here.
+efd2	efd2
+i̇o	İo
+sitTing	kitten
+İo	İo
+İo	İo 
+İo4	İo
+
+
+-- !query
+select replace(utf8_binary, utf8_lcase, 'abc') from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select replace(s, utf8_binary, 'abc') from t5
+-- !query schema
+struct<replace(s, utf8_binary, abc):string>
+-- !query output
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+bbAbaAbA
+İo
+
+
+-- !query
+select replace(utf8_binary collate utf8_binary, s collate utf8_lcase, 'abc') from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select replace(utf8_binary, utf8_lcase collate utf8_binary, 'abc') from t5
+-- !query schema
+struct<replace(utf8_binary, collate(utf8_lcase, utf8_binary), abc):string>
+-- !query output
+Spark
+aaAaAAaA
+aaAaAAaA
+abc
+abc
+abc
+abc
+abc
+abc
+abc 
+abcdcba
+bbAbAAbA
+kitten
+İo
+İo
+
+
+-- !query
+select replace(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 'abc') from t5
+-- !query schema
+struct<replace(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), abc):string collate UTF8_LCASE>
+-- !query output
+Spark
+aaAaAAaA
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc
+abc 
+bbabcbabcabcbabc
+kitten
+İo
+
+
+-- !query
+select replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc') from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"replace(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), abc)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : "replace(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 'abc')"
+  } ]
+}
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA', 'abc'), replace(utf8_lcase, 'aaAaaAaA', 'abc') from t5
+-- !query schema
+struct<replace(utf8_binary, aaAaaAaA, abc):string,replace(utf8_lcase, aaAaaAaA, abc):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	abc
+aaAaAAaA	abc
+aaAaAAaA	abcabcabc
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA' collate utf8_lcase, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
+-- !query schema
+struct<replace(utf8_binary, collate(aaAaaAaA, utf8_lcase), abc):string collate UTF8_LCASE,replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+abc	aaAaAAaA
+abc	abc
+abc	abc
+abc	abcabcabc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select replace(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim, 'abc'), replace(utf8_lcase, 'aaAaaAaA' collate utf8_binary, 'abc') from t5
+-- !query schema
+struct<replace(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim), abc):string collate UTF8_LCASE_RTRIM,replace(utf8_lcase, collate(aaAaaAaA, utf8_binary), abc):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	abc
+aaAaAAaA	abcabcabc
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select endswith(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select endswith(s, utf8_binary) from t5
+-- !query schema
+struct<endswith(s, utf8_binary):boolean>
+-- !query output
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select endswith(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select endswith(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<endswith(utf8_binary, collate(utf8_lcase, utf8_binary)):boolean>
+-- !query output
+false
+false
+false
+false
+false
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select endswith(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<endswith(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):boolean>
+-- !query output
+false
+false
+false
+false
+false
+true
+true
+true
+true
+true
+true
+true
+true
+true
+true
+
+
+-- !query
+select endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"endswith(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 78,
+    "fragment" : "endswith(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA'), endswith(utf8_lcase, 'aaAaaAaA') from t5
+-- !query schema
+struct<endswith(utf8_binary, aaAaaAaA):boolean,endswith(utf8_lcase, aaAaaAaA):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	true
+false	true
+false	true
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA' collate utf8_lcase), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query schema
+struct<endswith(utf8_binary, collate(aaAaaAaA, utf8_lcase)):boolean,endswith(utf8_lcase, collate(aaAaaAaA, utf8_binary)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	true
+true	true
+
+
+-- !query
+select endswith(utf8_binary, 'aaAaaAaA ' collate utf8_lcase_rtrim), endswith(utf8_lcase, 'aaAaaAaA' collate utf8_binary) from t5
+-- !query schema
+struct<endswith(utf8_binary, collate(aaAaaAaA , utf8_lcase_rtrim)):boolean,endswith(utf8_lcase, collate(aaAaaAaA, utf8_binary)):boolean>
+-- !query output
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+false	false
+true	false
+true	true
+true	true
+
+
+-- !query
+select repeat(utf8_binary, 3), repeat(utf8_lcase, 2) from t5
+-- !query schema
+struct<repeat(utf8_binary, 3):string,repeat(utf8_lcase, 2):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.Hello, world! Nice day.Hello, world! Nice day.	Hello, world! Nice day.Hello, world! Nice day.
+Something else. Nothing here.Something else. Nothing here.Something else. Nothing here.	Something else. Nothing here.Something else. Nothing here.
+SparkSparkSpark	SQLSQL
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaAAaAaaAaAAaA
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaaAaAaaAaaAaA
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaA
+abcabcabc	abcabc
+abcdcbaabcdcbaabcdcba	aBcDCbAaBcDCbA
+bbAbAAbAbbAbAAbAbbAbAAbA	aa
+efd2efd2efd2	efd2efd2
+kittenkittenkitten	sitTingsitTing
+İo İo İo 	İoİo
+İoİoİo	i̇oi̇o
+İoİoİo	İo İo 
+İoİoİo	İoİo
+
+
+-- !query
+select repeat(utf8_binary collate utf8_lcase, 3), repeat(utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<repeat(collate(utf8_binary, utf8_lcase), 3):string collate UTF8_LCASE,repeat(collate(utf8_lcase, utf8_binary), 2):string>
+-- !query output
+Hello, world! Nice day.Hello, world! Nice day.Hello, world! Nice day.	Hello, world! Nice day.Hello, world! Nice day.
+Something else. Nothing here.Something else. Nothing here.Something else. Nothing here.	Something else. Nothing here.Something else. Nothing here.
+SparkSparkSpark	SQLSQL
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaAAaAaaAaAAaA
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaaAaAaaAaaAaA
+aaAaAAaAaaAaAAaAaaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaAaaAaaAaA
+abcabcabc	abcabc
+abcdcbaabcdcbaabcdcba	aBcDCbAaBcDCbA
+bbAbAAbAbbAbAAbAbbAbAAbA	aa
+efd2efd2efd2	efd2efd2
+kittenkittenkitten	sitTingsitTing
+İo İo İo 	İoİo
+İoİoİo	i̇oi̇o
+İoİoİo	İo İo 
+İoİoİo	İoİo
+
+
+-- !query
+select ascii(utf8_binary), ascii(utf8_lcase) from t5
+-- !query schema
+struct<ascii(utf8_binary):int,ascii(utf8_lcase):int>
+-- !query output
+101	101
+107	115
+304	105
+304	304
+304	304
+304	304
+72	72
+83	83
+83	83
+97	97
+97	97
+97	97
+97	97
+97	97
+98	97
+
+
+-- !query
+select ascii(utf8_binary collate utf8_lcase), ascii(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<ascii(collate(utf8_binary, utf8_lcase)):int,ascii(collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+101	101
+107	115
+304	105
+304	304
+304	304
+304	304
+72	72
+83	83
+83	83
+97	97
+97	97
+97	97
+97	97
+97	97
+98	97
+
+
+-- !query
+select unbase64(utf8_binary), unbase64(utf8_lcase) from t10
+-- !query schema
+struct<unbase64(utf8_binary):binary,unbase64(utf8_lcase):binary>
+-- !query output
+i� �	i�h�
+y�v	y�v
+
+
+-- !query
+select unbase64(utf8_binary collate utf8_lcase), unbase64(utf8_lcase collate utf8_binary) from t10
+-- !query schema
+struct<unbase64(collate(utf8_binary, utf8_lcase)):binary,unbase64(collate(utf8_lcase, utf8_binary)):binary>
+-- !query output
+i� �	i�h�
+y�v	y�v
+
+
+-- !query
+select chr(ascii) from t6
+-- !query schema
+struct<chr(ascii):string>
+-- !query output
+B
+a
+
+
+-- !query
+select base64(utf8_binary), base64(utf8_lcase) from t5
+-- !query schema
+struct<base64(utf8_binary):string,base64(utf8_lcase):string>
+-- !query output
+SGVsbG8sIHdvcmxkISBOaWNlIGRheS4=	SGVsbG8sIHdvcmxkISBOaWNlIGRheS4=
+U29tZXRoaW5nIGVsc2UuIE5vdGhpbmcgaGVyZS4=	U29tZXRoaW5nIGVsc2UuIE5vdGhpbmcgaGVyZS4=
+U3Bhcms=	U1FM
+YWFBYUFBYUE=	YWFBYUFBYUE=
+YWFBYUFBYUE=	YWFBYWFBYUE=
+YWFBYUFBYUE=	YWFBYWFBYUFhYUFhYUFhQWFhQWFhQWFB
+YWJj	YWJj
+YWJjZGNiYQ==	YUJjRENiQQ==
+YmJBYkFBYkE=	YQ==
+ZWZkMg==	ZWZkMg==
+a2l0dGVu	c2l0VGluZw==
+xLBv	acyHbw==
+xLBv	xLBv
+xLBv	xLBvIA==
+xLBvIA==	xLBv
+
+
+-- !query
+select base64(utf8_binary collate utf8_lcase), base64(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<base64(collate(utf8_binary, utf8_lcase)):string,base64(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+SGVsbG8sIHdvcmxkISBOaWNlIGRheS4=	SGVsbG8sIHdvcmxkISBOaWNlIGRheS4=
+U29tZXRoaW5nIGVsc2UuIE5vdGhpbmcgaGVyZS4=	U29tZXRoaW5nIGVsc2UuIE5vdGhpbmcgaGVyZS4=
+U3Bhcms=	U1FM
+YWFBYUFBYUE=	YWFBYUFBYUE=
+YWFBYUFBYUE=	YWFBYWFBYUE=
+YWFBYUFBYUE=	YWFBYWFBYUFhYUFhYUFhQWFhQWFhQWFB
+YWJj	YWJj
+YWJjZGNiYQ==	YUJjRENiQQ==
+YmJBYkFBYkE=	YQ==
+ZWZkMg==	ZWZkMg==
+a2l0dGVu	c2l0VGluZw==
+xLBv	acyHbw==
+xLBv	xLBv
+xLBv	xLBvIA==
+xLBvIA==	xLBv
+
+
+-- !query
+select decode(encode(utf8_binary, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase, 'utf-8'), 'utf-8') from t5
+-- !query schema
+struct<decode(encode(utf8_binary, utf-8), utf-8):string,decode(encode(utf8_lcase, utf-8), utf-8):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select decode(encode(utf8_binary collate utf8_lcase, 'utf-8'), 'utf-8'), decode(encode(utf8_lcase collate utf8_binary, 'utf-8'), 'utf-8') from t5
+-- !query schema
+struct<decode(encode(collate(utf8_binary, utf8_lcase), utf-8), utf-8):string,decode(encode(collate(utf8_lcase, utf8_binary), utf-8), utf-8):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select format_number(ascii, '###.###') from t7
+-- !query schema
+struct<format_number(ascii, ###.###):string>
+-- !query output
+66.421
+97.521
+
+
+-- !query
+select format_number(ascii, '###.###' collate utf8_lcase) from t7
+-- !query schema
+struct<format_number(ascii, collate(###.###, utf8_lcase)):string>
+-- !query output
+66.421
+97.521
+
+
+-- !query
+select encode(utf8_binary, 'utf-8'), encode(utf8_lcase, 'utf-8') from t5
+-- !query schema
+struct<encode(utf8_binary, utf-8):binary,encode(utf8_lcase, utf-8):binary>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select encode(utf8_binary collate utf8_lcase, 'utf-8'), encode(utf8_lcase collate utf8_binary, 'utf-8') from t5
+-- !query schema
+struct<encode(collate(utf8_binary, utf8_lcase), utf-8):binary,encode(collate(utf8_lcase, utf8_binary), utf-8):binary>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select to_binary(utf8_binary, 'utf-8'), to_binary(utf8_lcase, 'utf-8') from t5
+-- !query schema
+struct<to_binary(utf8_binary, utf-8):binary,to_binary(utf8_lcase, utf-8):binary>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select to_binary(utf8_binary collate utf8_lcase, 'utf-8'), to_binary(utf8_lcase collate utf8_binary, 'utf-8') from t5
+-- !query schema
+struct<to_binary(collate(utf8_binary, utf8_lcase), utf-8):binary,to_binary(collate(utf8_lcase, utf8_binary), utf-8):binary>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select sentences(utf8_binary), sentences(utf8_lcase) from t5
+-- !query schema
+struct<sentences(utf8_binary, , ):array<array<string>>,sentences(utf8_lcase, , ):array<array<string collate UTF8_LCASE>>>
+-- !query output
+[["Hello","world"],["Nice","day"]]	[["Hello","world"],["Nice","day"]]
+[["Something","else"],["Nothing","here"]]	[["Something","else"],["Nothing","here"]]
+[["Spark"]]	[["SQL"]]
+[["aaAaAAaA"]]	[["aaAaAAaA"]]
+[["aaAaAAaA"]]	[["aaAaaAaA"]]
+[["aaAaAAaA"]]	[["aaAaaAaAaaAaaAaAaaAaaAaA"]]
+[["abc"]]	[["abc"]]
+[["abcdcba"]]	[["aBcDCbA"]]
+[["bbAbAAbA"]]	[["a"]]
+[["efd2"]]	[["efd2"]]
+[["kitten"]]	[["sitTing"]]
+[["İo"]]	[["i̇o"]]
+[["İo"]]	[["İo"]]
+[["İo"]]	[["İo"]]
+[["İo"]]	[["İo"]]
+
+
+-- !query
+select sentences(utf8_binary collate utf8_lcase), sentences(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<sentences(collate(utf8_binary, utf8_lcase), , ):array<array<string collate UTF8_LCASE>>,sentences(collate(utf8_lcase, utf8_binary), , ):array<array<string>>>
+-- !query output
+[["Hello","world"],["Nice","day"]]	[["Hello","world"],["Nice","day"]]
+[["Something","else"],["Nothing","here"]]	[["Something","else"],["Nothing","here"]]
+[["Spark"]]	[["SQL"]]
+[["aaAaAAaA"]]	[["aaAaAAaA"]]
+[["aaAaAAaA"]]	[["aaAaaAaA"]]
+[["aaAaAAaA"]]	[["aaAaaAaAaaAaaAaAaaAaaAaA"]]
+[["abc"]]	[["abc"]]
+[["abcdcba"]]	[["aBcDCbA"]]
+[["bbAbAAbA"]]	[["a"]]
+[["efd2"]]	[["efd2"]]
+[["kitten"]]	[["sitTing"]]
+[["İo"]]	[["i̇o"]]
+[["İo"]]	[["İo"]]
+[["İo"]]	[["İo"]]
+[["İo"]]	[["İo"]]
+
+
+-- !query
+select upper(utf8_binary), upper(utf8_lcase) from t5
+-- !query schema
+struct<upper(utf8_binary):string,upper(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+AAAAAAAA	AAAAAAAA
+AAAAAAAA	AAAAAAAA
+AAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAA
+ABC	ABC
+ABCDCBA	ABCDCBA
+BBABAABA	A
+EFD2	EFD2
+HELLO, WORLD! NICE DAY.	HELLO, WORLD! NICE DAY.
+KITTEN	SITTING
+SOMETHING ELSE. NOTHING HERE.	SOMETHING ELSE. NOTHING HERE.
+SPARK	SQL
+İO	İO
+İO	İO
+İO	İO 
+İO 	İO
+
+
+-- !query
+select upper(utf8_binary collate utf8_lcase), upper(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<upper(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,upper(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+AAAAAAAA	AAAAAAAA
+AAAAAAAA	AAAAAAAA
+AAAAAAAA	AAAAAAAAAAAAAAAAAAAAAAAA
+ABC	ABC
+ABCDCBA	ABCDCBA
+BBABAABA	A
+EFD2	EFD2
+HELLO, WORLD! NICE DAY.	HELLO, WORLD! NICE DAY.
+KITTEN	SITTING
+SOMETHING ELSE. NOTHING HERE.	SOMETHING ELSE. NOTHING HERE.
+SPARK	SQL
+İO	İO
+İO	İO
+İO	İO 
+İO 	İO
+
+
+-- !query
+select lower(utf8_binary), lower(utf8_lcase) from t5
+-- !query schema
+struct<lower(utf8_binary):string,lower(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+aaaaaaaa	aaaaaaaa
+aaaaaaaa	aaaaaaaa
+aaaaaaaa	aaaaaaaaaaaaaaaaaaaaaaaa
+abc	abc
+abcdcba	abcdcba
+bbabaaba	a
+efd2	efd2
+hello, world! nice day.	hello, world! nice day.
+i̇o	i̇o
+i̇o	i̇o
+i̇o	i̇o 
+i̇o 	i̇o
+kitten	sitting
+something else. nothing here.	something else. nothing here.
+spark	sql
+
+
+-- !query
+select lower(utf8_binary collate utf8_lcase), lower(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<lower(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,lower(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+aaaaaaaa	aaaaaaaa
+aaaaaaaa	aaaaaaaa
+aaaaaaaa	aaaaaaaaaaaaaaaaaaaaaaaa
+abc	abc
+abcdcba	abcdcba
+bbabaaba	a
+efd2	efd2
+hello, world! nice day.	hello, world! nice day.
+i̇o	i̇o
+i̇o	i̇o
+i̇o	i̇o 
+i̇o 	i̇o
+kitten	sitting
+something else. nothing here.	something else. nothing here.
+spark	sql
+
+
+-- !query
+select initcap(utf8_binary), initcap(utf8_lcase) from t5
+-- !query schema
+struct<initcap(utf8_binary):string,initcap(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Aaaaaaaa	Aaaaaaaa
+Aaaaaaaa	Aaaaaaaa
+Aaaaaaaa	Aaaaaaaaaaaaaaaaaaaaaaaa
+Abc	Abc
+Abcdcba	Abcdcba
+Bbabaaba	A
+Efd2	Efd2
+Hello, World! Nice Day.	Hello, World! Nice Day.
+Kitten	Sitting
+Something Else. Nothing Here.	Something Else. Nothing Here.
+Spark	Sql
+İo	İo
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select initcap(utf8_binary collate utf8_lcase), initcap(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<initcap(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,initcap(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Aaaaaaaa	Aaaaaaaa
+Aaaaaaaa	Aaaaaaaa
+Aaaaaaaa	Aaaaaaaaaaaaaaaaaaaaaaaa
+Abc	Abc
+Abcdcba	Abcdcba
+Bbabaaba	A
+Efd2	Efd2
+Hello, World! Nice Day.	Hello, World! Nice Day.
+Kitten	Sitting
+Something Else. Nothing Here.	Something Else. Nothing Here.
+Spark	Sql
+İo	İo
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select overlay(utf8_binary, utf8_lcase, 2) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select overlay(s, utf8_binary,1) from t5
+-- !query schema
+struct<overlay(s, utf8_binary, 1, -1):string>
+-- !query output
+Hello, world! Nice day.
+Something else. Nothing here.
+Spark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abc
+abcdcba
+bbAbAAbA
+efd2
+kitten
+İo
+İo
+İo
+İo
+
+
+-- !query
+select overlay(utf8_binary collate utf8_binary, s collate utf8_lcase, 3) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select overlay(utf8_binary, utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<overlay(utf8_binary, collate(utf8_lcase, utf8_binary), 2, -1):string>
+-- !query output
+HHello, world! Nice day.
+SSQLk
+SSomething else. Nothing here.
+aaBcDCbA
+aaaAaAAaA
+aaaAaaAaA
+aaaAaaAaAaaAaaAaAaaAaaAaA
+aabc
+baAbAAbA
+eefd2
+ksitTing
+İi̇o
+İİo
+İİo
+İİo
+
+
+-- !query
+select overlay(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 2) from t5
+-- !query schema
+struct<overlay(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 2, -1):string collate UTF8_LCASE>
+-- !query output
+HHello, world! Nice day.
+SSQLk
+SSomething else. Nothing here.
+aaBcDCbA
+aaaAaAAaA
+aaaAaaAaA
+aaaAaaAaAaaAaaAaAaaAaaAaA
+aabc
+baAbAAbA
+eefd2
+ksitTing
+İi̇o
+İİo
+İİo
+İİo
+
+
+-- !query
+select overlay(utf8_binary, 'a', 2), overlay(utf8_lcase, 'a', 2) from t5
+-- !query schema
+struct<overlay(utf8_binary, a, 2, -1):string,overlay(utf8_lcase, a, 2, -1):string collate UTF8_LCASE>
+-- !query output
+Hallo, world! Nice day.	Hallo, world! Nice day.
+Saark	SaL
+Samething else. Nothing here.	Samething else. Nothing here.
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+aac	aac
+aacdcba	aacDCbA
+baAbAAbA	aa
+ead2	ead2
+katten	satTing
+İa	iao
+İa	İa
+İa	İa 
+İa 	İa
+
+
+-- !query
+select overlay(utf8_binary, 'AaAA' collate utf8_lcase, 2), overlay(utf8_lcase, 'AAa' collate utf8_binary, 2) from t5
+-- !query schema
+struct<overlay(utf8_binary, collate(AaAA, utf8_lcase), 2, -1):string collate UTF8_LCASE,overlay(utf8_lcase, collate(AAa, utf8_binary), 2, -1):string>
+-- !query output
+HAaAA, world! Nice day.	HAAao, world! Nice day.
+SAaAA	SAAa
+SAaAAhing else. Nothing here.	SAAathing else. Nothing here.
+aAaAA	aAAa
+aAaAAAaA	aAAaAAaA
+aAaAAAaA	aAAaaAaA
+aAaAAAaA	aAAaaAaAaaAaaAaAaaAaaAaA
+aAaAAba	aAAaCbA
+bAaAAAbA	aAAa
+eAaAA	eAAa
+kAaAAn	sAAaing
+İAaAA	iAAa
+İAaAA	İAAa
+İAaAA	İAAa
+İAaAA	İAAa
+
+
+-- !query
+select format_string(format, utf8_binary, utf8_lcase) from t8
+-- !query schema
+struct<format_string(format, utf8_binary, utf8_lcase):string>
+-- !query output
+abCdEabCdE
+
+
+-- !query
+select format_string(format collate utf8_lcase, utf8_lcase, utf8_binary collate utf8_lcase, 3), format_string(format, utf8_lcase collate utf8_binary, utf8_binary) from t8
+-- !query schema
+struct<format_string(collate(format, utf8_lcase), utf8_lcase, collate(utf8_binary, utf8_lcase), 3):string collate UTF8_LCASE,format_string(format, collate(utf8_lcase, utf8_binary), utf8_binary):string>
+-- !query output
+abCdEabCdE	abCdEabCdE
+
+
+-- !query
+select format_string(format, utf8_binary, utf8_lcase) from t8
+-- !query schema
+struct<format_string(format, utf8_binary, utf8_lcase):string>
+-- !query output
+abCdEabCdE
+
+
+-- !query
+select soundex(utf8_binary), soundex(utf8_lcase) from t5
+-- !query schema
+struct<soundex(utf8_binary):string,soundex(utf8_lcase):string>
+-- !query output
+A000	A000
+A000	A000
+A000	A000
+A120	A120
+A123	A123
+B110	A000
+E130	E130
+H464	H464
+K350	S352
+S162	S400
+S535	S535
+İo	I000
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select soundex(utf8_binary collate utf8_lcase), soundex(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<soundex(collate(utf8_binary, utf8_lcase)):string,soundex(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+A000	A000
+A000	A000
+A000	A000
+A120	A120
+A123	A123
+B110	A000
+E130	E130
+H464	H464
+K350	S352
+S162	S400
+S535	S535
+İo	I000
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select length(utf8_binary), length(utf8_lcase) from t5
+-- !query schema
+struct<length(utf8_binary):int,length(utf8_lcase):int>
+-- !query output
+2	2
+2	3
+2	3
+23	23
+29	29
+3	2
+3	3
+4	4
+5	3
+6	7
+7	7
+8	1
+8	24
+8	8
+8	8
+
+
+-- !query
+select length(utf8_binary collate utf8_lcase), length(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<length(collate(utf8_binary, utf8_lcase)):int,length(collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+2	2
+2	3
+2	3
+23	23
+29	29
+3	2
+3	3
+4	4
+5	3
+6	7
+7	7
+8	1
+8	24
+8	8
+8	8
+
+
+-- !query
+select bit_length(utf8_binary), bit_length(utf8_lcase) from t5
+-- !query schema
+struct<bit_length(utf8_binary):int,bit_length(utf8_lcase):int>
+-- !query output
+184	184
+232	232
+24	24
+24	24
+24	32
+24	32
+32	24
+32	32
+40	24
+48	56
+56	56
+64	192
+64	64
+64	64
+64	8
+
+
+-- !query
+select bit_length(utf8_binary collate utf8_lcase), bit_length(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<bit_length(collate(utf8_binary, utf8_lcase)):int,bit_length(collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+184	184
+232	232
+24	24
+24	24
+24	32
+24	32
+32	24
+32	32
+40	24
+48	56
+56	56
+64	192
+64	64
+64	64
+64	8
+
+
+-- !query
+select octet_length(utf8_binary), octet_length(utf8_lcase) from t5
+-- !query schema
+struct<octet_length(utf8_binary):int,octet_length(utf8_lcase):int>
+-- !query output
+23	23
+29	29
+3	3
+3	3
+3	4
+3	4
+4	3
+4	4
+5	3
+6	7
+7	7
+8	1
+8	24
+8	8
+8	8
+
+
+-- !query
+select octet_length(utf8_binary collate utf8_lcase), octet_length(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<octet_length(collate(utf8_binary, utf8_lcase)):int,octet_length(collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+23	23
+29	29
+3	3
+3	3
+3	4
+3	4
+4	3
+4	4
+5	3
+6	7
+7	7
+8	1
+8	24
+8	8
+8	8
+
+
+-- !query
+select luhn_check(num) from t9
+-- !query schema
+struct<luhn_check(num):boolean>
+-- !query output
+false
+false
+
+
+-- !query
+select levenshtein(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select levenshtein(s, utf8_binary) from t5
+-- !query schema
+struct<levenshtein(s, utf8_binary):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+
+
+-- !query
+select levenshtein(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select levenshtein(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<levenshtein(utf8_binary, collate(utf8_lcase, utf8_binary)):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+1
+1
+1
+16
+2
+4
+4
+4
+8
+
+
+-- !query
+select levenshtein(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<levenshtein(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+1
+1
+1
+16
+2
+4
+4
+4
+8
+
+
+-- !query
+select levenshtein(utf8_binary, 'a'), levenshtein(utf8_lcase, 'a') from t5
+-- !query schema
+struct<levenshtein(utf8_binary, a):int,levenshtein(utf8_lcase, a):int>
+-- !query output
+2	2
+2	2
+2	3
+2	3
+22	22
+29	29
+3	2
+4	3
+4	4
+6	6
+6	7
+7	23
+7	7
+7	7
+8	0
+
+
+-- !query
+select levenshtein(utf8_binary, 'AaAA' collate utf8_lcase, 3), levenshtein(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5
+-- !query schema
+struct<levenshtein(utf8_binary, collate(AaAA, utf8_lcase), 3):int,levenshtein(utf8_lcase, collate(AAa, utf8_binary), 4):int>
+-- !query output
+-1	-1
+-1	-1
+-1	-1
+-1	-1
+-1	-1
+-1	-1
+-1	-1
+-1	2
+-1	3
+-1	3
+-1	3
+-1	3
+-1	3
+-1	4
+3	3
+
+
+-- !query
+select is_valid_utf8(utf8_binary), is_valid_utf8(utf8_lcase) from t5
+-- !query schema
+struct<is_valid_utf8(utf8_binary):boolean,is_valid_utf8(utf8_lcase):boolean>
+-- !query output
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+
+
+-- !query
+select is_valid_utf8(utf8_binary collate utf8_lcase), is_valid_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<is_valid_utf8(collate(utf8_binary, utf8_lcase)):boolean,is_valid_utf8(collate(utf8_lcase, utf8_binary)):boolean>
+-- !query output
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+true	true
+
+
+-- !query
+select make_valid_utf8(utf8_binary), make_valid_utf8(utf8_lcase) from t5
+-- !query schema
+struct<make_valid_utf8(utf8_binary):string,make_valid_utf8(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select make_valid_utf8(utf8_binary collate utf8_lcase), make_valid_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<make_valid_utf8(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,make_valid_utf8(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select validate_utf8(utf8_binary), validate_utf8(utf8_lcase) from t5
+-- !query schema
+struct<validate_utf8(utf8_binary):string,validate_utf8(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select validate_utf8(utf8_binary collate utf8_lcase), validate_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<validate_utf8(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,validate_utf8(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select try_validate_utf8(utf8_binary), try_validate_utf8(utf8_lcase) from t5
+-- !query schema
+struct<try_validate_utf8(utf8_binary):string,try_validate_utf8(utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select try_validate_utf8(utf8_binary collate utf8_lcase), try_validate_utf8(utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<try_validate_utf8(collate(utf8_binary, utf8_lcase)):string collate UTF8_LCASE,try_validate_utf8(collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaAaaAaaAaAaaAaaAaA
+abc	abc
+abcdcba	aBcDCbA
+bbAbAAbA	a
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select substr(utf8_binary, 2, 2), substr(utf8_lcase, 2, 2) from t5
+-- !query schema
+struct<substr(utf8_binary, 2, 2):string,substr(utf8_lcase, 2, 2):string collate UTF8_LCASE>
+-- !query output
+aA	aA
+aA	aA
+aA	aA
+bA	
+bc	Bc
+bc	bc
+el	el
+fd	fd
+it	it
+o	o
+o	o 
+o	̇o
+o 	o
+om	om
+pa	QL
+
+
+-- !query
+select substr(utf8_binary collate utf8_lcase, 2, 2), substr(utf8_lcase collate utf8_binary, 2, 2) from t5
+-- !query schema
+struct<substr(collate(utf8_binary, utf8_lcase), 2, 2):string collate UTF8_LCASE,substr(collate(utf8_lcase, utf8_binary), 2, 2):string>
+-- !query output
+aA	aA
+aA	aA
+aA	aA
+bA	
+bc	Bc
+bc	bc
+el	el
+fd	fd
+it	it
+o	o
+o	o 
+o	̇o
+o 	o
+om	om
+pa	QL
+
+
+-- !query
+select right(utf8_binary, 2), right(utf8_lcase, 2) from t5
+-- !query schema
+struct<right(utf8_binary, 2):string,right(utf8_lcase, 2):string collate UTF8_LCASE>
+-- !query output
+aA	aA
+aA	aA
+aA	aA
+bA	a
+ba	bA
+bc	bc
+d2	d2
+e.	e.
+en	ng
+o 	İo
+rk	QL
+y.	y.
+İo	o 
+İo	İo
+İo	̇o
+
+
+-- !query
+select right(utf8_binary collate utf8_lcase, 2), right(utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<right(collate(utf8_binary, utf8_lcase), 2):string collate UTF8_LCASE,right(collate(utf8_lcase, utf8_binary), 2):string>
+-- !query output
+aA	aA
+aA	aA
+aA	aA
+bA	a
+ba	bA
+bc	bc
+d2	d2
+e.	e.
+en	ng
+o 	İo
+rk	QL
+y.	y.
+İo	o 
+İo	İo
+İo	̇o
+
+
+-- !query
+select left(utf8_binary, '2' collate utf8_lcase), left(utf8_lcase, 2) from t5
+-- !query schema
+struct<left(utf8_binary, collate(2, utf8_lcase)):string,left(utf8_lcase, 2):string collate UTF8_LCASE>
+-- !query output
+He	He
+So	So
+Sp	SQ
+aa	aa
+aa	aa
+aa	aa
+ab	aB
+ab	ab
+bb	a
+ef	ef
+ki	si
+İo	i̇
+İo	İo
+İo	İo
+İo	İo
+
+
+-- !query
+select left(utf8_binary collate utf8_lcase, 2), left(utf8_lcase collate utf8_binary, 2) from t5
+-- !query schema
+struct<left(collate(utf8_binary, utf8_lcase), 2):string collate UTF8_LCASE,left(collate(utf8_lcase, utf8_binary), 2):string>
+-- !query output
+He	He
+So	So
+Sp	SQ
+aa	aa
+aa	aa
+aa	aa
+ab	aB
+ab	ab
+bb	a
+ef	ef
+ki	si
+İo	i̇
+İo	İo
+İo	İo
+İo	İo
+
+
+-- !query
+select rpad(utf8_binary, 8, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select rpad(s, 8, utf8_binary) from t5
+-- !query schema
+struct<rpad(s, 8, utf8_binary):string>
+-- !query output
+Hello, w
+Somethin
+SparkSpa
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abcabcab
+abcdcbaa
+bbAbaAbA
+efd2efd2
+kittenki
+İoİo İo 
+İoİoİoİo
+İoİoİoİo
+İoİoİoİo
+
+
+-- !query
+select rpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select rpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<rpad(utf8_binary, 8, collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, w
+Somethin
+SparkSQL
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abcabcab
+abcdcbaa
+bbAbAAbA
+efd2efd2
+kittensi
+İo İoİoİ
+İoi̇oi̇o
+İoİo İo 
+İoİoİoİo
+
+
+-- !query
+select rpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<rpad(collate(utf8_binary, utf8_lcase), 8, collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+Hello, w
+Somethin
+SparkSQL
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+abcabcab
+abcdcbaa
+bbAbAAbA
+efd2efd2
+kittensi
+İo İoİoİ
+İoi̇oi̇o
+İoİo İo 
+İoİoİoİo
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<lpad(collate(utf8_binary, utf8_binary_rtrim), 8, collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+Hello, w
+SQLSpark
+Somethin
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+aabcdcba
+abcababc
+bbAbAAbA
+efd2efd2
+i̇oi̇oİo
+sikitten
+İo İo İo
+İoİoİoİo
+İoİoİİo
+
+
+-- !query
+select rpad(utf8_binary, 8, 'a'), rpad(utf8_lcase, 8, 'a') from t5
+-- !query schema
+struct<rpad(utf8_binary, 8, a):string,rpad(utf8_lcase, 8, a):string collate UTF8_LCASE>
+-- !query output
+Hello, w	Hello, w
+Somethin	Somethin
+Sparkaaa	SQLaaaaa
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaA
+abcaaaaa	abcaaaaa
+abcdcbaa	aBcDCbAa
+bbAbAAbA	aaaaaaaa
+efd2aaaa	efd2aaaa
+kittenaa	sitTinga
+İo aaaaa	İoaaaaaa
+İoaaaaaa	i̇oaaaaa
+İoaaaaaa	İo aaaaa
+İoaaaaaa	İoaaaaaa
+
+
+-- !query
+select rpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), rpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5
+-- !query schema
+struct<rpad(utf8_binary, 8, collate(AaAA, utf8_lcase)):string collate UTF8_LCASE,rpad(utf8_lcase, 8, collate(AAa, utf8_binary)):string>
+-- !query output
+Hello, w	Hello, w
+Somethin	Somethin
+SparkAaA	SQLAAaAA
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaA
+abcAaAAA	abcAAaAA
+abcdcbaA	aBcDCbAA
+bbAbAAbA	aAAaAAaA
+efd2AaAA	efd2AAaA
+kittenAa	sitTingA
+İo AaAAA	İoAAaAAa
+İoAaAAAa	i̇oAAaAA
+İoAaAAAa	İo AAaAA
+İoAaAAAa	İoAAaAAa
+
+
+-- !query
+select lpad(utf8_binary, 8, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select lpad(s, 8, utf8_binary) from t5
+-- !query schema
+struct<lpad(s, 8, utf8_binary):string>
+-- !query output
+Hello, w
+Somethin
+SpaSpark
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+aabcdcba
+abcababc
+bbAbaAbA
+efd2efd2
+kikitten
+İo İo İo
+İoİoİoİo
+İoİoİoİo
+İoİoİoİo
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary, 8, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select lpad(utf8_binary, 8, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<lpad(utf8_binary, 8, collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+Hello, w
+SQLSpark
+Somethin
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+aabcdcba
+abcababc
+bbAbAAbA
+efd2efd2
+i̇oi̇oİo
+sikitten
+İo İo İo
+İoİoİoİo
+İoİoİİo
+
+
+-- !query
+select lpad(utf8_binary collate utf8_lcase, 8, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<lpad(collate(utf8_binary, utf8_lcase), 8, collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+Hello, w
+SQLSpark
+Somethin
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+aabcdcba
+abcababc
+bbAbAAbA
+efd2efd2
+i̇oi̇oİo
+sikitten
+İo İo İo
+İoİoİoİo
+İoİoİİo
+
+
+-- !query
+select lpad(utf8_binary collate utf8_binary_rtrim, 8, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<lpad(collate(utf8_binary, utf8_binary_rtrim), 8, collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+Hello, w
+SQLSpark
+Somethin
+aaAaAAaA
+aaAaAAaA
+aaAaAAaA
+aabcdcba
+abcababc
+bbAbAAbA
+efd2efd2
+i̇oi̇oİo
+sikitten
+İo İo İo
+İoİoİoİo
+İoİoİİo
+
+
+-- !query
+select lpad(utf8_binary, 8, 'a'), lpad(utf8_lcase, 8, 'a') from t5
+-- !query schema
+struct<lpad(utf8_binary, 8, a):string,lpad(utf8_lcase, 8, a):string collate UTF8_LCASE>
+-- !query output
+Hello, w	Hello, w
+Somethin	Somethin
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaA
+aaaSpark	aaaaaSQL
+aaaaaabc	aaaaaabc
+aaaaaaİo	aaaaaaİo
+aaaaaaİo	aaaaai̇o
+aaaaaaİo	aaaaaİo 
+aaaaaİo 	aaaaaaİo
+aaaaefd2	aaaaefd2
+aabcdcba	aaBcDCbA
+aakitten	asitTing
+bbAbAAbA	aaaaaaaa
+
+
+-- !query
+select lpad(utf8_binary, 8, 'AaAA' collate utf8_lcase), lpad(utf8_lcase, 8, 'AAa' collate utf8_binary) from t5
+-- !query schema
+struct<lpad(utf8_binary, 8, collate(AaAA, utf8_lcase)):string collate UTF8_LCASE,lpad(utf8_lcase, 8, collate(AAa, utf8_binary)):string>
+-- !query output
+AaAAAabc	AAaAAabc
+AaAAAaİo	AAaAAaİo
+AaAAAaİo	AAaAAi̇o
+AaAAAaİo	AAaAAİo 
+AaAAAİo 	AAaAAaİo
+AaAAefd2	AAaAefd2
+AaASpark	AAaAASQL
+Aabcdcba	AaBcDCbA
+Aakitten	AsitTing
+Hello, w	Hello, w
+Somethin	Somethin
+aaAaAAaA	aaAaAAaA
+aaAaAAaA	aaAaaAaA
+aaAaAAaA	aaAaaAaA
+bbAbAAbA	AAaAAaAa
+
+
+-- !query
+select locate(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select locate(s, utf8_binary) from t5
+-- !query schema
+struct<locate(s, utf8_binary, 1):int>
+-- !query output
+0
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select locate(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select locate(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<locate(utf8_binary, collate(utf8_lcase, utf8_binary), 1):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+0
+1
+1
+1
+1
+1
+1
+1
+
+
+-- !query
+select locate(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase, 3) from t5
+-- !query schema
+struct<locate(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase), 3):int>
+-- !query output
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+0
+3
+
+
+-- !query
+select locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"locate(collate(utf8_binary, unicode_ai), collate(utf8_lcase, unicode_ai), 3)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 79,
+    "fragment" : "locate(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai, 3)"
+  } ]
+}
+
+
+-- !query
+select locate(utf8_binary, 'a'), locate(utf8_lcase, 'a') from t5
+-- !query schema
+struct<locate(utf8_binary, a, 1):int,locate(utf8_lcase, a, 1):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	1
+
+
+-- !query
+select locate(utf8_binary, 'AaAA' collate utf8_lcase, 4), locate(utf8_lcase, 'AAa' collate utf8_binary, 4) from t5
+-- !query schema
+struct<locate(utf8_binary, collate(AaAA, utf8_lcase), 4):int,locate(utf8_lcase, collate(AAa, utf8_binary), 4):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+
+
+-- !query
+select locate(utf8_binary, 'AaAA ' collate utf8_binary_rtrim, 4), locate(utf8_lcase, 'AAa ' collate utf8_binary, 4) from t5
+-- !query schema
+struct<locate(utf8_binary, collate(AaAA , utf8_binary_rtrim), 4):int,locate(utf8_lcase, collate(AAa , utf8_binary), 4):int>
+-- !query output
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+0	0
+
+
+-- !query
+select TRIM(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select TRIM(s, utf8_binary) from t5
+-- !query schema
+struct<TRIM(BOTH s FROM utf8_binary):string>
+-- !query output
+
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select TRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<TRIM(BOTH utf8_binary FROM collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+BcDCbA
+QL
+a
+i̇
+sitTing
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<TRIM(BOTH collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+QL
+sitTing
+
+
+-- !query
+select TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(BOTH collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 74,
+    "fragment" : "TRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select TRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<TRIM(BOTH collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+BcDCbA
+QL
+a
+i̇
+sitTing
+
+
+-- !query
+select TRIM('ABc', utf8_binary), TRIM('ABc', utf8_lcase) from t5
+-- !query schema
+struct<TRIM(BOTH ABc FROM utf8_binary):string,TRIM(BOTH ABc FROM utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAa	
+aaAaAAa	
+aaAaAAa	
+ab	
+abcdcba	D
+bbAbAAb	
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select TRIM('ABc' collate utf8_lcase, utf8_binary), TRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<TRIM(BOTH collate(ABc, utf8_lcase) FROM utf8_binary):string collate UTF8_LCASE,TRIM(BOTH collate(AAa, utf8_binary) FROM utf8_lcase):string>
+-- !query output
+	
+	
+	
+	
+	bc
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+d	BcDCb
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select BTRIM(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select BTRIM(s, utf8_binary) from t5
+-- !query schema
+struct<btrim(s, utf8_binary):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+a
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING\", \"STRING COLLATE UTF8_LCASE\""
+  }
+}
+
+
+-- !query
+select BTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<btrim(utf8_binary, collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+bbAbAAbA
+d
+kitte
+park
+İ
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<btrim(collate(utf8_binary, utf8_lcase), collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+ 
+bbAbAAb
+kitte
+park
+İ
+
+
+-- !query
+select BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_binary, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(BOTH collate(utf8_lcase, unicode_ai) FROM collate(utf8_binary, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "BTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select BTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<btrim(collate(utf8_binary, utf8_binary_rtrim), collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+bbAbAAbA
+d
+kitte
+park
+İ
+
+
+-- !query
+select BTRIM('ABc', utf8_binary), BTRIM('ABc', utf8_lcase) from t5
+-- !query schema
+struct<btrim(ABc, utf8_binary):string,btrim(ABc, utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+AB	
+AB	
+AB	B
+ABc	ABc
+ABc	ABc
+ABc	ABc
+ABc	ABc
+ABc	ABc
+ABc	ABc
+ABc	ABc
+ABc	ABc
+Bc	Bc
+Bc	Bc
+Bc	Bc
+Bc	Bc
+
+
+-- !query
+select BTRIM('ABc' collate utf8_lcase, utf8_binary), BTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<btrim(collate(ABc, utf8_lcase), utf8_binary):string collate UTF8_LCASE,btrim(collate(AAa, utf8_binary), utf8_lcase):string>
+-- !query output
+	
+	AA
+ABc	AAa
+ABc	AAa
+ABc	AAa
+ABc	AAa
+ABc	AAa
+ABc	AAa
+ABc	AAa
+B	AA
+Bc	
+Bc	
+Bc	
+Bc	AAa
+c	AA
+
+
+-- !query
+select LTRIM(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select LTRIM(s, utf8_binary) from t5
+-- !query schema
+struct<TRIM(LEADING s FROM utf8_binary):string>
+-- !query output
+
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select LTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<TRIM(LEADING utf8_binary FROM collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+BcDCbA
+QL
+a
+i̇o
+sitTing
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<TRIM(LEADING collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+ 
+QL
+sitTing
+
+
+-- !query
+select LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(LEADING collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "LTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select LTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<TRIM(LEADING collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+BcDCbA
+QL
+a
+i̇o
+sitTing
+
+
+-- !query
+select LTRIM('ABc', utf8_binary), LTRIM('ABc', utf8_lcase) from t5
+-- !query schema
+struct<TRIM(LEADING ABc FROM utf8_binary):string,TRIM(LEADING ABc FROM utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAaA	
+aaAaAAaA	
+aaAaAAaA	
+abc	
+abcdcba	DCbA
+bbAbAAbA	
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select LTRIM('ABc' collate utf8_lcase, utf8_binary), LTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<TRIM(LEADING collate(ABc, utf8_lcase) FROM utf8_binary):string collate UTF8_LCASE,TRIM(LEADING collate(AAa, utf8_binary) FROM utf8_lcase):string>
+-- !query output
+	
+	
+	
+	
+	bc
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+dcba	BcDCbA
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select RTRIM(utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.IMPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "implicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select RTRIM(s, utf8_binary) from t5
+-- !query schema
+struct<TRIM(TRAILING s FROM utf8_binary):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+İo
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_binary, s collate utf8_lcase) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "COLLATION_MISMATCH.EXPLICIT",
+  "sqlState" : "42P21",
+  "messageParameters" : {
+    "explicitTypes" : "\"STRING COLLATE UTF8_LCASE\", \"STRING\""
+  }
+}
+
+
+-- !query
+select RTRIM(utf8_binary, utf8_lcase collate utf8_binary) from t5
+-- !query schema
+struct<TRIM(TRAILING utf8_binary FROM collate(utf8_lcase, utf8_binary)):string>
+-- !query output
+
+
+
+
+
+
+
+
+
+SQL
+a
+aBcDCbA
+i̇
+sitTing
+İo
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_lcase, utf8_lcase collate utf8_lcase) from t5
+-- !query schema
+struct<TRIM(TRAILING collate(utf8_binary, utf8_lcase) FROM collate(utf8_lcase, utf8_lcase)):string collate UTF8_LCASE>
+-- !query output
+
+
+
+
+
+
+
+
+
+
+
+
+SQL
+sitTing
+İo
+
+
+-- !query
+select RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai) from t5
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"collate(utf8_lcase, unicode_ai)\"",
+    "inputType" : "\"STRING COLLATE UNICODE_AI\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"STRING\"",
+    "sqlExpr" : "\"TRIM(TRAILING collate(utf8_binary, unicode_ai) FROM collate(utf8_lcase, unicode_ai))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "RTRIM(utf8_binary collate unicode_ai, utf8_lcase collate unicode_ai)"
+  } ]
+}
+
+
+-- !query
+select RTRIM(utf8_binary collate utf8_binary_rtrim, utf8_lcase collate utf8_binary_rtrim) from t5
+-- !query schema
+struct<TRIM(TRAILING collate(utf8_binary, utf8_binary_rtrim) FROM collate(utf8_lcase, utf8_binary_rtrim)):string collate UTF8_BINARY_RTRIM>
+-- !query output
+
+
+
+
+
+
+
+
+
+ 
+SQL
+a
+aBcDCbA
+i̇
+sitTing
+
+
+-- !query
+select RTRIM('ABc', utf8_binary), RTRIM('ABc', utf8_lcase) from t5
+-- !query schema
+struct<TRIM(TRAILING ABc FROM utf8_binary):string,TRIM(TRAILING ABc FROM utf8_lcase):string collate UTF8_LCASE>
+-- !query output
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+aaAaAAa	
+aaAaAAa	
+aaAaAAa	
+ab	
+abcdcba	aBcD
+bbAbAAb	
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+select RTRIM('ABc' collate utf8_lcase, utf8_binary), RTRIM('AAa' collate utf8_binary, utf8_lcase) from t5
+-- !query schema
+struct<TRIM(TRAILING collate(ABc, utf8_lcase) FROM utf8_binary):string collate UTF8_LCASE,TRIM(TRAILING collate(AAa, utf8_binary) FROM utf8_lcase):string>
+-- !query output
+	
+	
+	
+	
+	abc
+Hello, world! Nice day.	Hello, world! Nice day.
+Something else. Nothing here.	Something else. Nothing here.
+Spark	SQL
+abcd	aBcDCb
+efd2	efd2
+kitten	sitTing
+İo	i̇o
+İo	İo
+İo	İo 
+İo 	İo
+
+
+-- !query
+drop table t5
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t6
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t7
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t8
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t9
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table t10
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/conditional-functions.sql.out
new file mode 100644
index 0000000000000..aa8a600f87560
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/conditional-functions.sql.out
@@ -0,0 +1,202 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TABLE conditional_t USING PARQUET AS SELECT c1, c2 FROM VALUES(1d, 0),(2d, 1),(null, 1),(CAST('NaN' AS DOUBLE), 0) AS t(c1, c2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+SELECT nanvl(c2, c1/c2 + c1/c2) FROM conditional_t
+-- !query schema
+struct<nanvl(c2, ((c1 / c2) + (c1 / c2))):double>
+-- !query output
+0.0
+0.0
+1.0
+1.0
+
+
+-- !query
+SELECT nanvl(c2, 1/0) FROM conditional_t
+-- !query schema
+struct<nanvl(c2, (1 / 0)):double>
+-- !query output
+0.0
+0.0
+1.0
+1.0
+
+
+-- !query
+SELECT nanvl(1-0, 1/0) FROM conditional_t
+-- !query schema
+struct<nanvl((1 - 0), (1 / 0)):double>
+-- !query output
+1.0
+1.0
+1.0
+1.0
+
+
+-- !query
+SELECT if(c2 >= 0, 1-0, 1/0) from conditional_t
+-- !query schema
+struct<(IF((c2 >= 0), (1 - 0), (1 / 0))):double>
+-- !query output
+1.0
+1.0
+1.0
+1.0
+
+
+-- !query
+SELECT if(1 == 1, 1, 1/0)
+-- !query schema
+struct<(IF((1 = 1), 1, (1 / 0))):double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT if(1 != 1, 1/0, 1)
+-- !query schema
+struct<(IF((NOT (1 = 1)), (1 / 0), 1)):double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT coalesce(c2, 1/0) from conditional_t
+-- !query schema
+struct<coalesce(c2, (1 / 0)):double>
+-- !query output
+0.0
+0.0
+1.0
+1.0
+
+
+-- !query
+SELECT coalesce(1, 1/0)
+-- !query schema
+struct<coalesce(1, (1 / 0)):double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT coalesce(null, 1, 1/0)
+-- !query schema
+struct<coalesce(NULL, 1, (1 / 0)):double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT case when c2 >= 0 then 1 else 1/0 end from conditional_t
+-- !query schema
+struct<CASE WHEN (c2 >= 0) THEN 1 ELSE (1 / 0) END:double>
+-- !query output
+1.0
+1.0
+1.0
+1.0
+
+
+-- !query
+SELECT case when 1 < 2 then 1 else 1/0 end
+-- !query schema
+struct<CASE WHEN (1 < 2) THEN 1 ELSE (1 / 0) END:double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT case when 1 > 2 then 1/0 else 1 end
+-- !query schema
+struct<CASE WHEN (1 > 2) THEN (1 / 0) ELSE 1 END:double>
+-- !query output
+1.0
+
+
+-- !query
+SELECT nullifzero(0),
+  nullifzero(cast(0 as tinyint)),
+  nullifzero(cast(0 as bigint)),
+  nullifzero('0'),
+  nullifzero(0.0),
+  nullifzero(1),
+  nullifzero(null)
+-- !query schema
+struct<nullifzero(0):int,nullifzero(CAST(0 AS TINYINT)):tinyint,nullifzero(CAST(0 AS BIGINT)):bigint,nullifzero(0):string,nullifzero(0.0):decimal(1,1),nullifzero(1):int,nullifzero(NULL):void>
+-- !query output
+NULL	NULL	NULL	NULL	NULL	1	NULL
+
+
+-- !query
+SELECT nullifzero('abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "nullifzero('abc')"
+  } ]
+}
+
+
+-- !query
+SELECT zeroifnull(null),
+  zeroifnull(1),
+  zeroifnull(cast(1 as tinyint)),
+  zeroifnull(cast(1 as bigint))
+-- !query schema
+struct<zeroifnull(NULL):int,zeroifnull(1):int,zeroifnull(CAST(1 AS TINYINT)):int,zeroifnull(CAST(1 AS BIGINT)):bigint>
+-- !query output
+0	1	1	1
+
+
+-- !query
+SELECT zeroifnull('abc')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "zeroifnull('abc')"
+  } ]
+}
+
+
+-- !query
+DROP TABLE conditional_t
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/cte.sql.out b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
index 245210e28b1d4..754a8832ef6c5 100644
--- a/sql/core/src/test/resources/sql-tests/results/cte.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/cte.sql.out
@@ -188,9 +188,10 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0038",
+  "errorClass" : "DUPLICATED_CTE_NAMES",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "duplicateNames" : "'t'"
+    "duplicateNames" : "`t`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -497,9 +498,10 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0038",
+  "errorClass" : "DUPLICATED_CTE_NAMES",
+  "sqlState" : "42602",
   "messageParameters" : {
-    "duplicateNames" : "'cte1'"
+    "duplicateNames" : "`cte1`"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out
index c46c200ff026f..aa283d3249617 100644
--- a/sql/core/src/test/resources/sql-tests/results/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out
@@ -49,17 +49,33 @@ struct<make_date(2019, 1, 1):date,make_date(12, 12, 12):date>
 -- !query
 select make_date(2000, 13, 1)
 -- !query schema
-struct<make_date(2000, 13, 1):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
+  }
+}
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query schema
-struct<make_date(2000, 1, 33):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33"
+  }
+}
 
 
 -- !query
@@ -184,9 +200,17 @@ struct<to_date(16, dd):date>
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query schema
-struct<to_date(02-29, MM-dd):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -242,9 +266,16 @@ struct<next_day(2015-07-23, Mon):date>
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query schema
-struct<next_day(2015-07-23, xx):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ILLEGAL_DAY_OF_WEEK",
+  "sqlState" : "22009",
+  "messageParameters" : {
+    "string" : "xx"
+  }
+}
 
 
 -- !query
@@ -274,9 +305,25 @@ struct<next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon):date>
 -- !query
 select next_day("xx", "Mon")
 -- !query schema
-struct<next_day(xx, Mon):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'xx'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "next_day(\"xx\", \"Mon\")"
+  } ]
+}
 
 
 -- !query
@@ -418,13 +465,22 @@ select date_add('2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.SparkNumberFormatException
 {
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "functionName" : "date_add"
-  }
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "date_add('2011-11-11', '1.2')"
+  } ]
 }
 
 
@@ -583,13 +639,22 @@ select date_sub(date'2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.SparkNumberFormatException
 {
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "functionName" : "date_sub"
-  }
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "date_sub(date'2011-11-11', '1.2')"
+  } ]
 }
 
 
@@ -628,53 +693,17 @@ struct<date_sub(TIMESTAMP_NTZ '2011-11-11 12:12:12', 1):date>
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_add(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_add('2011-11-11', int_str)"
-  } ]
-}
+2011-11-12
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_sub(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_sub('2011-11-11', int_str)"
-  } ]
-}
+2011-11-10
 
 
 -- !query
@@ -754,27 +783,9 @@ struct<(DATE '2001-10-01' - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query schema
-struct<>
+struct<(DATE '2001-10-01' - 2001-09-28):interval day>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2001-09-28\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "date '2001-10-01' - '2001-09-28'"
-  } ]
-}
+3 00:00:00.000000000
 
 
 -- !query
@@ -812,27 +823,9 @@ struct<(date_str - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query schema
-struct<>
+struct<(DATE '2001-09-28' - date_str):interval day>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"date_str\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "date '2001-09-28' - date_str"
-  } ]
-}
+-3696 00:00:00.000000000
 
 
 -- !query
@@ -846,7 +839,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -871,11 +864,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"DATE '2011-11-11'\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
index 7353df600dd4f..5635196efc2e5 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out
@@ -49,17 +49,33 @@ struct<make_date(2019, 1, 1):date,make_date(12, 12, 12):date>
 -- !query
 select make_date(2000, 13, 1)
 -- !query schema
-struct<make_date(2000, 13, 1):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
+  }
+}
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query schema
-struct<make_date(2000, 1, 33):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33"
+  }
+}
 
 
 -- !query
@@ -184,9 +200,17 @@ struct<to_date(16, dd):date>
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query schema
-struct<to_date(02-29, MM-dd):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"02-29\""
+  }
+}
 
 
 -- !query
@@ -242,9 +266,16 @@ struct<next_day(2015-07-23, Mon):date>
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query schema
-struct<next_day(2015-07-23, xx):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ILLEGAL_DAY_OF_WEEK",
+  "sqlState" : "22009",
+  "messageParameters" : {
+    "string" : "xx"
+  }
+}
 
 
 -- !query
@@ -274,9 +305,25 @@ struct<next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon):date>
 -- !query
 select next_day("xx", "Mon")
 -- !query schema
-struct<next_day(xx, Mon):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'xx'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "next_day(\"xx\", \"Mon\")"
+  } ]
+}
 
 
 -- !query
@@ -418,13 +465,22 @@ select date_add('2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.SparkNumberFormatException
 {
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "functionName" : "date_add"
-  }
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "date_add('2011-11-11', '1.2')"
+  } ]
 }
 
 
@@ -583,13 +639,22 @@ select date_sub(date'2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.SparkNumberFormatException
 {
-  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
-  "sqlState" : "22023",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "functionName" : "date_sub"
-  }
+    "expression" : "'1.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "date_sub(date'2011-11-11', '1.2')"
+  } ]
 }
 
 
@@ -628,53 +693,17 @@ struct<date_sub(TIMESTAMP_NTZ '2011-11-11 12:12:12', 1):date>
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_add(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_add('2011-11-11', int_str)"
-  } ]
-}
+2011-11-12
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query schema
-struct<>
+struct<date_sub(2011-11-11, int_str):date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"int_str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 38,
-    "fragment" : "date_sub('2011-11-11', int_str)"
-  } ]
-}
+2011-11-10
 
 
 -- !query
@@ -754,27 +783,9 @@ struct<(DATE '2001-10-01' - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query schema
-struct<>
+struct<(DATE '2001-10-01' - 2001-09-28):interval day>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2001-09-28\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 39,
-    "fragment" : "date '2001-10-01' - '2001-09-28'"
-  } ]
-}
+3 00:00:00.000000000
 
 
 -- !query
@@ -812,27 +823,9 @@ struct<(date_str - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query schema
-struct<>
+struct<(DATE '2001-09-28' - date_str):interval day>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"date_str\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "date '2001-09-28' - date_str"
-  } ]
-}
+-3696 00:00:00.000000000
 
 
 -- !query
@@ -846,7 +839,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -871,11 +864,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"DATE '2011-11-11'\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
+    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1316,9 +1309,16 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query schema
-struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "secAndMicros" : "60.007"
+  }
+}
 
 
 -- !query
@@ -1340,9 +1340,17 @@ struct<make_timestamp(1, 1, 1, 1, 1, 60):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 61):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
+  }
+}
 
 
 -- !query
@@ -1364,17 +1372,33 @@ struct<make_timestamp(1, 1, 1, 1, 1, 59.999999):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 99.999999):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
+  }
+}
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 999.999999):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
+  }
+}
 
 
 -- !query
@@ -1554,169 +1578,337 @@ struct<to_timestamp(1):timestamp>
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.0', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.0, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.0\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.1\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123UTC', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.123UTC, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.123UTC\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12345CST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.12345CST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.12345CST\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123456PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.123456PST\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.1234567PST\""
+  }
+}
 
 
 -- !query
 select to_timestamp('123456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"123456 2019-10-06 10:11:12.123456PST\""
+  }
+}
 
 
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"223456 2019-10-06 10:11:12.123456PST\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234', 'yyyy-MM-dd HH:mm:ss.[SSSSSS]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1234, yyyy-MM-dd HH:mm:ss.[SSSSSS]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.1234\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.123', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.123, yyyy-MM-dd HH:mm:ss[.SSSSSS]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.123\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12', 'yyyy-MM-dd HH:mm:ss[.SSSSSS]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12, yyyy-MM-dd HH:mm:ss[.SSSSSS]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11:12.12', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.12, yyyy-MM-dd HH:mm[:ss.SSSSSS]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11:12.12\""
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-10-06 10:11', 'yyyy-MM-dd HH:mm[:ss.SSSSSS]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11, yyyy-MM-dd HH:mm[:ss.SSSSSS]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 10:11\""
+  }
+}
 
 
 -- !query
 select to_timestamp("2019-10-06S10:11:12.12345", "yyyy-MM-dd'S'HH:mm:ss.SSSSSS")
 -- !query schema
-struct<to_timestamp(2019-10-06S10:11:12.12345, yyyy-MM-dd'S'HH:mm:ss.SSSSSS):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06S10:11:12.12345\""
+  }
+}
 
 
 -- !query
 select to_timestamp("12.12342019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"12.12342019-10-06S10:11\""
+  }
+}
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"12.1232019-10-06S10:11\""
+  }
+}
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"12.1232019-10-06S10:11\""
+  }
+}
 
 
 -- !query
 select to_timestamp("12.1234019-10-06S10:11", "ss.SSSSy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1234019-10-06S10:11, ss.SSSSy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"12.1234019-10-06S10:11\""
+  }
+}
 
 
 -- !query
@@ -1778,9 +1970,17 @@ struct<to_timestamp(16, dd):timestamp>
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query schema
-struct<to_timestamp(02-29, MM-dd):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"02-29\""
+  }
+}
 
 
 -- !query
@@ -1826,53 +2026,17 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:10\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
-  } ]
-}
+0 00:00:01.000000000
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10'):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:11\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
-  } ]
-}
+0 00:00:01.000000000
 
 
 -- !query
@@ -1902,53 +2066,17 @@ struct<>
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query schema
-struct<>
+struct<(str - TIMESTAMP '2011-11-11 11:11:11'):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
-  } ]
-}
+0 00:00:00.000000000
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - str):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
-  } ]
-}
+0 00:00:00.000000000
 
 
 -- !query
@@ -1958,11 +2086,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -1982,11 +2110,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
@@ -2073,9 +2201,17 @@ struct<DATE '2012-01-01' - INTERVAL '03' HOUR:timestamp,DATE '2012-01-01' + INTE
 -- !query
 select to_timestamp('2019-10-06 A', 'yyyy-MM-dd GGGGG')
 -- !query schema
-struct<to_timestamp(2019-10-06 A, yyyy-MM-dd GGGGG):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Unparseable date: \"2019-10-06 A\""
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
index fffbb2a4e017f..0708a523900ff 100644
--- a/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/datetime-parsing-invalid.sql.out
@@ -11,33 +11,65 @@ long overflow
 -- !query
 select to_timestamp('1', 'yy')
 -- !query schema
-struct<to_timestamp(1, yy):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '1' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp('-12', 'yy')
 -- !query schema
-struct<to_timestamp(-12, yy):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '-12' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp('123', 'yy')
 -- !query schema
-struct<to_timestamp(123, yy):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '123' could not be parsed, unparsed text found at index 2"
+  }
+}
 
 
 -- !query
 select to_timestamp('1', 'yyy')
 -- !query schema
-struct<to_timestamp(1, yyy):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '1' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -60,97 +92,193 @@ org.apache.spark.SparkUpgradeException
 -- !query
 select to_timestamp('366', 'D')
 -- !query schema
-struct<to_timestamp(366, D):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
 select to_timestamp('9', 'DD')
 -- !query schema
-struct<to_timestamp(9, DD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '9' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp('366', 'DD')
 -- !query schema
-struct<to_timestamp(366, DD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'DayOfYear 366' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
 select to_timestamp('9', 'DDD')
 -- !query schema
-struct<to_timestamp(9, DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '9' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp('99', 'DDD')
 -- !query schema
-struct<to_timestamp(99, DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '99' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp('30-365', 'dd-DDD')
 -- !query schema
-struct<to_timestamp(30-365, dd-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
 select to_timestamp('11-365', 'MM-DDD')
 -- !query schema
-struct<to_timestamp(11-365, MM-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field MonthOfYear 11 differs from MonthOfYear 12 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
 select to_timestamp('2019-366', 'yyyy-DDD')
 -- !query schema
-struct<to_timestamp(2019-366, yyyy-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-366' could not be parsed: Invalid date 'DayOfYear 366' as '2019' is not a leap year"
+  }
+}
 
 
 -- !query
 select to_timestamp('12-30-365', 'MM-dd-DDD')
 -- !query schema
-struct<to_timestamp(12-30-365, MM-dd-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Conflict found: Field DayOfMonth 30 differs from DayOfMonth 31 derived from 1970-12-31."
+  }
+}
 
 
 -- !query
 select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
 -- !query schema
-struct<to_timestamp(2020-01-365, yyyy-dd-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-365' could not be parsed: Conflict found: Field DayOfMonth 30 differs from DayOfMonth 1 derived from 2020-12-30"
+  }
+}
 
 
 -- !query
 select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
 -- !query schema
-struct<to_timestamp(2020-10-350, yyyy-MM-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-10-350' could not be parsed: Conflict found: Field MonthOfYear 12 differs from MonthOfYear 10 derived from 2020-12-15"
+  }
+}
 
 
 -- !query
 select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
 -- !query schema
-struct<to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-11-31-366' could not be parsed: Invalid date 'NOVEMBER 31'"
+  }
+}
 
 
 -- !query
@@ -164,78 +292,174 @@ struct<from_csv(2018-366):struct<date:date>>
 -- !query
 select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
 select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
 select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
 select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2020-01-27T20:06:11.847' could not be parsed at index 10"
+  }
+}
 
 
 -- !query
 select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
 -- !query schema
-struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'Unparseable' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
 select cast("Unparseable" as timestamp)
 -- !query schema
-struct<CAST(Unparseable AS TIMESTAMP):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'Unparseable'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast(\"Unparseable\" as timestamp)"
+  } ]
+}
 
 
 -- !query
 select cast("Unparseable" as date)
 -- !query schema
-struct<CAST(Unparseable AS DATE):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'Unparseable'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(\"Unparseable\" as date)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
index 42e603981848e..cb52778c420ae 100644
--- a/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/decimalArithmeticOperations.sql.out
@@ -10,25 +10,67 @@ struct<>
 -- !query
 select a / b from t
 -- !query schema
-struct<(a / b):decimal(8,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "a / b"
+  } ]
+}
 
 
 -- !query
 select a % b from t
 -- !query schema
-struct<(a % b):decimal(1,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "a % b"
+  } ]
+}
 
 
 -- !query
 select pmod(a, b) from t
 -- !query schema
-struct<pmod(a, b):decimal(1,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "pmod(a, b)"
+  } ]
+}
 
 
 -- !query
@@ -121,41 +163,126 @@ struct<(2.35E10 * 1.0):double>
 -- !query
 select (5e36BD + 0.1) + 5e36BD
 -- !query schema
-struct<((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000):decimal(38,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "10000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "(5e36BD + 0.1) + 5e36BD"
+  } ]
+}
 
 
 -- !query
 select (-4e36BD - 0.1) - 7e36BD
 -- !query schema
-struct<((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000):decimal(38,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "-11000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "(-4e36BD - 0.1) - 7e36BD"
+  } ]
+}
 
 
 -- !query
 select 12345678901234567890.0 * 12345678901234567890.0
 -- !query schema
-struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "2",
+    "value" : "152415787532388367501905199875019052100"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "12345678901234567890.0 * 12345678901234567890.0"
+  } ]
+}
 
 
 -- !query
 select 1e35BD / 0.1
 -- !query schema
-struct<(100000000000000000000000000000000000 / 0.1):decimal(38,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "1000000000000000000000000000000000000.000000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "1e35BD / 0.1"
+  } ]
+}
 
 
 -- !query
 select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
 -- !query schema
-struct<(1234567890123456789000000000000 * 12345678901234567890000000):decimal(38,0)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "0",
+    "value" : "15241578753238836750190519987501905210000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "1.2345678901234567890E30BD * 1.2345678901234567890E25BD"
+  } ]
+}
 
 
 -- !query
@@ -268,14 +395,28 @@ spark.sql.decimalOperations.allowPrecisionLoss	false
 
 
 -- !query
-select id, a+b, a-b, a*b, a/b from decimals_test order by id
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id
 -- !query schema
-struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
+struct<>
 -- !query output
-1	1099.000000000000000000	-899.000000000000000000	NULL	0.100100100100100100
-2	24690.246000000000000000	0.000000000000000000	NULL	1.000000000000000000
-3	1234.223456789101100000	-1233.976543210898900000	NULL	0.000100037913541123
-4	123456789123456790.123456789123456789	123456789123456787.876543210876543211	NULL	109890109097814272.043109406191131436
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "36",
+    "value" : "152.358023429667510000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 41,
+    "stopIndex" : 43,
+    "fragment" : "a*b"
+  } ]
+}
 
 
 -- !query
@@ -324,9 +465,26 @@ struct<(10.300000000000000000 * 3.000000000000000000):decimal(38,36)>
 -- !query
 select 10.300000000000000000 * 3.0000000000000000000
 -- !query schema
-struct<(10.300000000000000000 * 3.0000000000000000000):decimal(38,37)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "37",
+    "value" : "30.9000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 52,
+    "fragment" : "10.300000000000000000 * 3.0000000000000000000"
+  } ]
+}
 
 
 -- !query
@@ -340,81 +498,251 @@ struct<(2.35E10 * 1.0):double>
 -- !query
 select (5e36BD + 0.1) + 5e36BD
 -- !query schema
-struct<((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000):decimal(38,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "10000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "(5e36BD + 0.1) + 5e36BD"
+  } ]
+}
 
 
 -- !query
 select (-4e36BD - 0.1) - 7e36BD
 -- !query schema
-struct<((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000):decimal(38,1)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "1",
+    "value" : "-11000000000000000000000000000000000000.1"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "(-4e36BD - 0.1) - 7e36BD"
+  } ]
+}
 
 
 -- !query
 select 12345678901234567890.0 * 12345678901234567890.0
 -- !query schema
-struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "2",
+    "value" : "152415787532388367501905199875019052100"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "12345678901234567890.0 * 12345678901234567890.0"
+  } ]
+}
 
 
 -- !query
 select 1e35BD / 0.1
 -- !query schema
-struct<(100000000000000000000000000000000000 / 0.1):decimal(38,3)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "3",
+    "value" : "1000000000000000000000000000000000000.000000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 19,
+    "fragment" : "1e35BD / 0.1"
+  } ]
+}
 
 
 -- !query
 select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
 -- !query schema
-struct<(1234567890123456789000000000000 * 12345678901234567890000000):decimal(38,0)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "0",
+    "value" : "15241578753238836750190519987501905210000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 62,
+    "fragment" : "1.2345678901234567890E30BD * 1.2345678901234567890E25BD"
+  } ]
+}
 
 
 -- !query
 select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
 -- !query schema
-struct<(12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345):decimal(38,7)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "7",
+    "value" : "10012345678912345678912345678911.2469067"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 84,
+    "fragment" : "12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345"
+  } ]
+}
 
 
 -- !query
 select 123456789123456789.1234567890 * 1.123456789123456789
 -- !query schema
-struct<(123456789123456789.1234567890 * 1.123456789123456789):decimal(38,28)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "28",
+    "value" : "138698367904130467.654320988515622620750"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "123456789123456789.1234567890 * 1.123456789123456789"
+  } ]
+}
 
 
 -- !query
 select 12345678912345.123456789123 / 0.000000012345678
 -- !query schema
-struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,18)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "18",
+    "value" : "1000000073899961059796.725866331521039184725213147467478092333"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "12345678912345.123456789123 / 0.000000012345678"
+  } ]
+}
 
 
 -- !query
 select 1.0123456789012345678901234567890123456e36BD / 0.1
 -- !query schema
-struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "2",
+    "value" : "10123456789012345678901234567890123456.000000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e36BD / 0.1"
+  } ]
+}
 
 
 -- !query
 select 1.0123456789012345678901234567890123456e35BD / 1.0
 -- !query schema
-struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "3",
+    "value" : "101234567890123456789012345678901234.560000000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e35BD / 1.0"
+  } ]
+}
 
 
 -- !query
@@ -452,9 +780,26 @@ struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
 -- !query
 select 1.0123456789012345678901234567890123456e31BD / 0.1
 -- !query schema
-struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "38",
+    "scale" : "6",
+    "value" : "101234567890123456789012345678901.234560000000000000000000000000000000000"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "1.0123456789012345678901234567890123456e31BD / 0.1"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-enabled.sql.out b/sql/core/src/test/resources/sql-tests/results/double-quoted-identifiers-enabled.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-enabled.sql.out
rename to sql/core/src/test/resources/sql-tests/results/double-quoted-identifiers-enabled.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out b/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out
index 9249d7eb3e517..21ea4436f4fa5 100644
--- a/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/execute-immediate.sql.out
@@ -392,7 +392,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'invalid_cast_error_expected'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
@@ -603,7 +602,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'name1'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
index 3830b47ba8a6d..16077a78f3892 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain-aqe.sql.out
@@ -1139,7 +1139,7 @@ EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
-'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false
 +- 'Project [*]
    +- 'UnresolvedRelation [explain_temp4], [], false
 
diff --git a/sql/core/src/test/resources/sql-tests/results/explain.sql.out b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
index c0dee38e6d07a..9d25b829e03fc 100644
--- a/sql/core/src/test/resources/sql-tests/results/explain.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/explain.sql.out
@@ -1031,7 +1031,7 @@ EXPLAIN EXTENDED INSERT INTO TABLE explain_temp5 SELECT * FROM explain_temp4
 struct<plan:string>
 -- !query output
 == Parsed Logical Plan ==
-'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [], false, false, false, false
+'InsertIntoStatement 'UnresolvedRelation [explain_temp5], [__required_write_privileges__=INSERT], false, false, false, false
 +- 'Project [*]
    +- 'UnresolvedRelation [explain_temp4], [], false
 
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out
index 9bc5fe0cc379e..f0708c56224fe 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-all-mosha.sql.out
@@ -41,14 +41,14 @@ struct<i:int,ci:bigint,f:decimal(6,4),sif:decimal(25,4)>
 
 
 -- !query
-SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), d / 2, size(a) FROM stuff
+SELECT i + 1, f / i, substring(s, 2, 3), extract(year from t), octet_length(d), size(a) FROM stuff
 GROUP BY ALL ORDER BY 1, 3, 4, 5, 6, 2
 -- !query schema
-struct<(i + 1):int,(f / i):decimal(17,15),substring(s, 2, 3):string,extract(year FROM t):int,(d / 2):double,size(a):int>
+struct<(i + 1):int,(f / i):decimal(17,15),substring(s, 2, 3):string,extract(year FROM t):int,octet_length(d):int,size(a):int>
 -- !query output
-43	0.232142857142857	ell	1970	6.685	3
-43	0.318333333333333	est	1970	6.17283945E8	3
-1338	0.000923335826477	h n	2000	21.0	3
+43	0.232142857142857	ell	1970	5	3
+43	0.318333333333333	est	1970	10	3
+1338	0.000923335826477	h n	2000	4	3
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
index 0b7bdfd852237..633133ad7e4d6 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by.sql.out
@@ -626,27 +626,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT every("true")
 -- !query schema
-struct<>
+struct<every(true):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"true\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"every(true)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 20,
-    "fragment" : "every(\"true\")"
-  } ]
-}
+true
 
 
 -- !query
@@ -1124,3 +1106,146 @@ struct<col1:double,cnt:bigint>
 0.0	2
 Infinity	2
 NaN	2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(MAP(1, 2, 2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3), MAP(1, 3))), (ARRAY(MAP(2, 3, 1, 2), MAP(1, 3))) as t(a)
+GROUP BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(1, 2, 2, 3))), (named_struct('b', map(1, 3))), (named_struct('b', map(2, 3, 1, 2))) as t(a)
+GROUP BY a.b
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', array(map(1, 2, 2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3), map(1, 3)))), (named_struct('b', array(map(2, 3, 1, 2), map(1, 3)))) as t(a)
+GROUP BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(named_struct('b', map(1, 2, 2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3)), named_struct('b', map(1, 3)))), (ARRAY(named_struct('b', map(2, 3, 1, 2)), named_struct('b', map(1, 3)))) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(array(1, 2), 2, array(2, 3), 3)), (map(array(1, 3), 3)), (map(array(2, 3), 3, array(1, 2), 2)) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(array(1, 2, 3), 3)), (map(array(3, 2, 1), 3)) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+1
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (ARRAY(map(array(1, 2), 2, array(2, 3), 3))), (ARRAY(MAP(ARRAY(1, 3), 3))), (ARRAY(map(array(2, 3), 3, array(1, 2), 2))) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (map(named_struct('b', 1), 2, named_struct('b', 2), 3)), (map(named_struct('b', 1), 3)), (map(named_struct('b', 2), 3, named_struct('b', 1), 2)) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (named_struct('b', map(named_struct('c', 1), 2, named_struct('c', 2), 3))), (named_struct('b', map(named_struct('c', 1), 3))), (named_struct('b', map(named_struct('c', 2), 3, named_struct('c', 1), 2))) as t(a)
+group BY a.b
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (Map(1, Map(1,2), 2, Map(2, 3, 1, 2))), (Map(2, Map(1, 2, 2,3), 1, Map(1, 2))), (Map(1, Map(1,2), 2, Map(2, 4))) as t(a)
+GROUP BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT count(*)
+FROM VALUES (Map(1, Array(Map(1,2)), 2, Array(Map(2, 3, 1, 2)))), (Map(2, Array(Map(1, 2, 2,3)), 1, Array(Map(1, 2)))), (Map(1, Array(Map(1,2)), 2, Array(Map(2, 4)))) as t(a)
+GROUP BY a
+-- !query schema
+struct<count(1):bigint>
+-- !query output
+1
+2
diff --git a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
index 952fb8fdc2bd2..596745b4ba5d8 100644
--- a/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/identifier-clause.sql.out
@@ -1024,7 +1024,7 @@ org.apache.spark.sql.catalyst.parser.ParseException
   "errorClass" : "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
   "sqlState" : "42000",
   "messageParameters" : {
-    "funcName" : "`default`.`myDoubleAvg`",
+    "name" : "`default`.`myDoubleAvg`",
     "statement" : "DROP TEMPORARY FUNCTION"
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
index 4dcdf8ac3e980..0a2c7b0f55ed2 100644
--- a/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/inline-table.sql.out
@@ -131,7 +131,7 @@ select * from values ("one", 2.0), ("two") as data(a, b)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
@@ -177,7 +177,7 @@ select * from values ("one"), ("two") as data(a, b)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index e2b9e11eb6331..4e012df792dea 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -123,33 +123,97 @@ struct<(INTERVAL '2' YEAR / 2):interval year to month>
 -- !query
 select interval 2 second * 'a'
 -- !query schema
-struct<(INTERVAL '02' SECOND * a):interval day to second>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval 2 second * 'a'"
+  } ]
+}
 
 
 -- !query
 select interval 2 second / 'a'
 -- !query schema
-struct<(INTERVAL '02' SECOND / a):interval day to second>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "interval 2 second / 'a'"
+  } ]
+}
 
 
 -- !query
 select interval 2 year * 'a'
 -- !query schema
-struct<(INTERVAL '2' YEAR * a):interval year to month>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval 2 year * 'a'"
+  } ]
+}
 
 
 -- !query
 select interval 2 year / 'a'
 -- !query schema
-struct<(INTERVAL '2' YEAR / a):interval year to month>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "interval 2 year / 'a'"
+  } ]
+}
 
 
 -- !query
@@ -171,17 +235,49 @@ struct<(INTERVAL '2' YEAR * 2):interval year to month>
 -- !query
 select 'a' * interval 2 second
 -- !query schema
-struct<(INTERVAL '02' SECOND * a):interval day to second>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "'a' * interval 2 second"
+  } ]
+}
 
 
 -- !query
 select 'a' * interval 2 year
 -- !query schema
-struct<(INTERVAL '2' YEAR * a):interval year to month>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "'a' * interval 2 year"
+  } ]
+}
 
 
 -- !query
@@ -773,9 +869,26 @@ struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
 -- !query
 select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
 -- !query schema
-struct<make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789):interval>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "18",
+    "scale" : "6",
+    "value" : "1234567890123456789"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 59,
+    "fragment" : "make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)"
+  } ]
+}
 
 
 -- !query
@@ -823,8 +936,18 @@ select make_dt_interval(2147483647)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-long overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "make_dt_interval(2147483647)"
+  } ]
+}
 
 
 -- !query
@@ -864,8 +987,18 @@ select make_ym_interval(178956970, 8)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "make_ym_interval(178956970, 8)"
+  } ]
+}
 
 
 -- !query
@@ -881,8 +1014,18 @@ select make_ym_interval(-178956970, -9)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "make_ym_interval(-178956970, -9)"
+  } ]
+}
 
 
 -- !query
@@ -1142,9 +1285,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1163,9 +1310,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1184,9 +1335,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1205,9 +1360,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1226,9 +1385,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1247,9 +1410,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1398,9 +1565,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+  "sqlState" : "22006",
   "messageParameters" : {
     "from" : "year",
+    "input" : "1",
     "to" : "second"
   },
   "queryContext" : [ {
@@ -1678,9 +1847,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Error parsing interval year-month string: integer overflow"
+    "input" : "178956970-8",
+    "interval" : "year-month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1819,17 +1990,49 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select '4 11:11' - interval '4 22:12' day to minute
 -- !query schema
-struct<4 11:11 - INTERVAL '4 22:12' DAY TO MINUTE:string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'4 11:11'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 51,
+    "fragment" : "'4 11:11' - interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
 select '4 12:12:12' + interval '4 22:12' day to minute
 -- !query schema
-struct<4 12:12:12 + INTERVAL '4 22:12' DAY TO MINUTE:string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'4 12:12:12'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "'4 12:12:12' + interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -1891,17 +2094,49 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select str - interval '4 22:12' day to minute from interval_view
 -- !query schema
-struct<str - INTERVAL '4 22:12' DAY TO MINUTE:string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "str - interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
 select str + interval '4 22:12' day to minute from interval_view
 -- !query schema
-struct<str + INTERVAL '4 22:12' DAY TO MINUTE:string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 45,
+    "fragment" : "str + interval '4 22:12' day to minute"
+  } ]
+}
 
 
 -- !query
@@ -2161,9 +2396,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
+    "input" : "-\t2-2\t",
+    "intervalStr" : "year-month",
+    "supportedFormat" : "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
+    "typeName" : "interval year to month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2190,9 +2429,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "\n-\t10\t 12:34:46.789\t",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2280,12 +2523,8 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
-  "sqlState" : "22015",
-  "messageParameters" : {
-    "alternative" : "",
-    "message" : "integer overflow"
-  }
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015"
 }
 
 
@@ -2296,11 +2535,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
-    "message" : "integer overflow"
+    "functionName" : "`try_subtract`"
   }
 }
 
@@ -2312,11 +2550,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "message" : "integer overflow"
+    "functionName" : "`try_add`"
   }
 }
 
@@ -2625,11 +2862,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2648,11 +2884,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2679,7 +2914,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1.0D
 struct<>
 -- !query output
 java.lang.ArithmeticException
-not in range
+rounded value is out of range for input 2.147483648E9 and rounding mode HALF_UP
 
 
 -- !query
@@ -2705,11 +2940,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2728,11 +2962,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2759,7 +2992,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1.0D
 struct<>
 -- !query output
 java.lang.ArithmeticException
-not in range
+rounded value is out of range for input 9.223372036854776E18 and rounding mode HALF_UP
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
index ced8d6398a66f..11bafb2cf63c9 100644
--- a/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/join-lateral.sql.out
@@ -1878,6 +1878,33 @@ struct<c1:int,c2:int,t:int>
 1	2	3
 
 
+-- !query
+select 1
+from t1 as t_outer
+left join
+ lateral(
+     select b1,b2
+     from
+     (
+         select
+             t2.c1 as b1,
+             1 as b2
+         from t2
+         union
+         select t_outer.c1 as b1,
+                null as b2
+     ) as t_inner
+     where (t_inner.b1 < t_outer.c2  or t_inner.b1 is null)
+      and  t_inner.b1 = t_outer.c1
+     order by t_inner.b1,t_inner.b2 desc limit 1
+ ) as lateral_table
+-- !query schema
+struct<1:int>
+-- !query output
+1
+1
+
+
 -- !query
 DROP VIEW t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
similarity index 97%
rename from sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
rename to sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
index 498b13f6423c1..7d96a3e98c832 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords-enforced.sql.out
@@ -6,6 +6,7 @@ struct<keyword:string,reserved:boolean>
 -- !query output
 ADD	false
 AFTER	false
+AGGREGATE	false
 ALL	true
 ALTER	false
 ALWAYS	false
@@ -32,6 +33,7 @@ BUCKETS	false
 BY	false
 BYTE	false
 CACHE	false
+CALL	true
 CALLED	false
 CASCADE	false
 CASE	true
@@ -98,6 +100,7 @@ DIRECTORY	false
 DISTINCT	true
 DISTRIBUTE	false
 DIV	false
+DO	false
 DOUBLE	false
 DROP	false
 ELSE	true
@@ -112,6 +115,7 @@ EXECUTE	true
 EXISTS	false
 EXPLAIN	false
 EXPORT	false
+EXTEND	false
 EXTENDED	false
 EXTERNAL	false
 EXTRACT	false
@@ -140,6 +144,7 @@ HAVING	true
 HOUR	false
 HOURS	false
 IDENTIFIER	false
+IDENTITY	false
 IF	false
 IGNORE	false
 ILIKE	false
@@ -147,6 +152,7 @@ IMMEDIATE	false
 IMPORT	false
 IN	true
 INCLUDE	false
+INCREMENT	false
 INDEX	false
 INDEXES	false
 INNER	true
@@ -162,6 +168,7 @@ INTO	true
 INVOKER	false
 IS	true
 ITEMS	false
+ITERATE	false
 JOIN	true
 KEYS	false
 LANGUAGE	false
@@ -169,6 +176,7 @@ LAST	false
 LATERAL	true
 LAZY	false
 LEADING	true
+LEAVE	false
 LEFT	true
 LIKE	false
 LIMIT	false
@@ -181,6 +189,7 @@ LOCK	false
 LOCKS	false
 LOGICAL	false
 LONG	false
+LOOP	false
 MACRO	false
 MAP	false
 MATCHED	false
@@ -248,6 +257,7 @@ REFERENCES	true
 REFRESH	false
 RENAME	false
 REPAIR	false
+REPEAT	false
 REPEATABLE	false
 REPLACE	false
 RESET	false
@@ -333,6 +343,7 @@ UNKNOWN	true
 UNLOCK	false
 UNPIVOT	false
 UNSET	false
+UNTIL	false
 UPDATE	false
 USE	false
 USER	true
@@ -341,7 +352,6 @@ VALUES	false
 VAR	false
 VARCHAR	false
 VARIABLE	false
-VARIABLES	false
 VARIANT	false
 VERSION	false
 VIEW	false
@@ -351,6 +361,7 @@ WEEK	false
 WEEKS	false
 WHEN	true
 WHERE	true
+WHILE	false
 WINDOW	false
 WITH	true
 WITHIN	true
@@ -371,6 +382,7 @@ ANY
 AS
 AUTHORIZATION
 BOTH
+CALL
 CASE
 CAST
 CHECK
diff --git a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
index 943d0aaff68e6..6500c47b4efc8 100644
--- a/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/keywords.sql.out
@@ -6,6 +6,7 @@ struct<keyword:string,reserved:boolean>
 -- !query output
 ADD	false
 AFTER	false
+AGGREGATE	false
 ALL	false
 ALTER	false
 ALWAYS	false
@@ -32,6 +33,7 @@ BUCKETS	false
 BY	false
 BYTE	false
 CACHE	false
+CALL	false
 CALLED	false
 CASCADE	false
 CASE	false
@@ -98,6 +100,7 @@ DIRECTORY	false
 DISTINCT	false
 DISTRIBUTE	false
 DIV	false
+DO	false
 DOUBLE	false
 DROP	false
 ELSE	false
@@ -112,6 +115,7 @@ EXECUTE	false
 EXISTS	false
 EXPLAIN	false
 EXPORT	false
+EXTEND	false
 EXTENDED	false
 EXTERNAL	false
 EXTRACT	false
@@ -140,6 +144,7 @@ HAVING	false
 HOUR	false
 HOURS	false
 IDENTIFIER	false
+IDENTITY	false
 IF	false
 IGNORE	false
 ILIKE	false
@@ -147,6 +152,7 @@ IMMEDIATE	false
 IMPORT	false
 IN	false
 INCLUDE	false
+INCREMENT	false
 INDEX	false
 INDEXES	false
 INNER	false
@@ -162,6 +168,7 @@ INTO	false
 INVOKER	false
 IS	false
 ITEMS	false
+ITERATE	false
 JOIN	false
 KEYS	false
 LANGUAGE	false
@@ -169,6 +176,7 @@ LAST	false
 LATERAL	false
 LAZY	false
 LEADING	false
+LEAVE	false
 LEFT	false
 LIKE	false
 LIMIT	false
@@ -181,6 +189,7 @@ LOCK	false
 LOCKS	false
 LOGICAL	false
 LONG	false
+LOOP	false
 MACRO	false
 MAP	false
 MATCHED	false
@@ -248,6 +257,7 @@ REFERENCES	false
 REFRESH	false
 RENAME	false
 REPAIR	false
+REPEAT	false
 REPEATABLE	false
 REPLACE	false
 RESET	false
@@ -333,6 +343,7 @@ UNKNOWN	false
 UNLOCK	false
 UNPIVOT	false
 UNSET	false
+UNTIL	false
 UPDATE	false
 USE	false
 USER	false
@@ -351,6 +362,7 @@ WEEK	false
 WEEKS	false
 WHEN	false
 WHERE	false
+WHILE	false
 WINDOW	false
 WITH	false
 WITHIN	false
diff --git a/sql/core/src/test/resources/sql-tests/results/math.sql.out b/sql/core/src/test/resources/sql-tests/results/math.sql.out
index 09f4383933288..fb60a920040e6 100644
--- a/sql/core/src/test/resources/sql-tests/results/math.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/math.sql.out
@@ -42,17 +42,49 @@ struct<round(25, -3):tinyint>
 -- !query
 SELECT round(127y, -1)
 -- !query schema
-struct<round(127, -1):tinyint>
--- !query output
--126
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "round(127y, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT round(-128y, -1)
 -- !query schema
-struct<round(-128, -1):tinyint>
+struct<>
 -- !query output
-126
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "round(-128y, -1)"
+  } ]
+}
 
 
 -- !query
@@ -98,17 +130,49 @@ struct<round(525, -3):smallint>
 -- !query
 SELECT round(32767s, -1)
 -- !query schema
-struct<round(32767, -1):smallint>
+struct<>
 -- !query output
--32766
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "round(32767s, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT round(-32768s, -1)
 -- !query schema
-struct<round(-32768, -1):smallint>
+struct<>
 -- !query output
-32766
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "round(-32768s, -1)"
+  } ]
+}
 
 
 -- !query
@@ -154,17 +218,49 @@ struct<round(525, -3):int>
 -- !query
 SELECT round(2147483647, -1)
 -- !query schema
-struct<round(2147483647, -1):int>
+struct<>
 -- !query output
--2147483646
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "round(2147483647, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT round(-2147483647, -1)
 -- !query schema
-struct<round(-2147483647, -1):int>
+struct<>
 -- !query output
-2147483646
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "round(-2147483647, -1)"
+  } ]
+}
 
 
 -- !query
@@ -210,17 +306,49 @@ struct<round(525, -3):bigint>
 -- !query
 SELECT round(9223372036854775807L, -1)
 -- !query schema
-struct<round(9223372036854775807, -1):bigint>
+struct<>
 -- !query output
--9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "round(9223372036854775807L, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT round(-9223372036854775808L, -1)
 -- !query schema
-struct<round(-9223372036854775808, -1):bigint>
+struct<>
 -- !query output
-9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "round(-9223372036854775808L, -1)"
+  } ]
+}
 
 
 -- !query
@@ -266,17 +394,49 @@ struct<bround(25, -3):tinyint>
 -- !query
 SELECT bround(127y, -1)
 -- !query schema
-struct<bround(127, -1):tinyint>
+struct<>
 -- !query output
--126
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "bround(127y, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT bround(-128y, -1)
 -- !query schema
-struct<bround(-128, -1):tinyint>
+struct<>
 -- !query output
-126
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "bround(-128y, -1)"
+  } ]
+}
 
 
 -- !query
@@ -322,17 +482,49 @@ struct<bround(525, -3):smallint>
 -- !query
 SELECT bround(32767s, -1)
 -- !query schema
-struct<bround(32767, -1):smallint>
+struct<>
 -- !query output
--32766
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "bround(32767s, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT bround(-32768s, -1)
 -- !query schema
-struct<bround(-32768, -1):smallint>
+struct<>
 -- !query output
-32766
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "bround(-32768s, -1)"
+  } ]
+}
 
 
 -- !query
@@ -378,17 +570,49 @@ struct<bround(525, -3):int>
 -- !query
 SELECT bround(2147483647, -1)
 -- !query schema
-struct<bround(2147483647, -1):int>
+struct<>
 -- !query output
--2147483646
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "bround(2147483647, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT bround(-2147483647, -1)
 -- !query schema
-struct<bround(-2147483647, -1):int>
+struct<>
 -- !query output
-2147483646
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "bround(-2147483647, -1)"
+  } ]
+}
 
 
 -- !query
@@ -434,17 +658,49 @@ struct<bround(525, -3):bigint>
 -- !query
 SELECT bround(9223372036854775807L, -1)
 -- !query schema
-struct<bround(9223372036854775807, -1):bigint>
+struct<>
 -- !query output
--9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "bround(9223372036854775807L, -1)"
+  } ]
+}
 
 
 -- !query
 SELECT bround(-9223372036854775808L, -1)
 -- !query schema
-struct<bround(-9223372036854775808, -1):bigint>
+struct<>
 -- !query output
-9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "bround(-9223372036854775808L, -1)"
+  } ]
+}
 
 
 -- !query
@@ -474,25 +730,73 @@ struct<conv(9223372036854775808, 10, 16):string>
 -- !query
 SELECT conv('92233720368547758070', 10, 16)
 -- !query schema
-struct<conv(92233720368547758070, 10, 16):string>
+struct<>
 -- !query output
-FFFFFFFFFFFFFFFF
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "conv('92233720368547758070', 10, 16)"
+  } ]
+}
 
 
 -- !query
 SELECT conv('9223372036854775807', 36, 10)
 -- !query schema
-struct<conv(9223372036854775807, 36, 10):string>
+struct<>
 -- !query output
-18446744073709551615
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "conv('9223372036854775807', 36, 10)"
+  } ]
+}
 
 
 -- !query
 SELECT conv('-9223372036854775807', 36, 10)
 -- !query schema
-struct<conv(-9223372036854775807, 36, 10):string>
+struct<>
 -- !query output
-18446744073709551615
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : "",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Overflow in function conv()"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "conv('-9223372036854775807', 36, 10)"
+  } ]
+}
 
 
 -- !query
@@ -570,9 +874,25 @@ struct<(+ 25.5):double>
 -- !query
 SELECT POSITIVE("invalid")
 -- !query schema
-struct<(+ invalid):double>
--- !query output
-NULL
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'invalid'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "POSITIVE(\"invalid\")"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/mode.sql.out b/sql/core/src/test/resources/sql-tests/results/mode.sql.out
index 9eac2c40e3eea..ad7d59eeb1634 100644
--- a/sql/core/src/test/resources/sql-tests/results/mode.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/mode.sql.out
@@ -182,15 +182,9 @@ struct<mode(col):map<int,string>>
 -- !query
 SELECT mode(col, true) FROM VALUES (map(1, 'a')) AS tab(col)
 -- !query schema
-struct<>
+struct<mode() WITHIN GROUP (ORDER BY col DESC):map<int,string>>
 -- !query output
-org.apache.spark.SparkIllegalArgumentException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2005",
-  "messageParameters" : {
-    "dataType" : "PhysicalMapType"
-  }
-}
+{1:"a"}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
index 8f6844f0157ba..e5063dc0cf31c 100644
--- a/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/named-function-arguments.sql.out
@@ -126,6 +126,102 @@ struct<num:int,val:string,Spark:string>
 2	b	Spark
 
 
+-- !query
+SELECT * FROM posexplode(collection => array(1, 2))
+-- !query schema
+struct<pos:int,col:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+SELECT * FROM posexplode_outer(collection => map('a', 1, 'b', 2))
+-- !query schema
+struct<pos:int,key:string,value:int>
+-- !query output
+0	a	1
+1	b	2
+
+
+-- !query
+SELECT * FROM posexplode(array(1, 2)), posexplode(array(3, 4))
+-- !query schema
+struct<pos:int,col:int,pos:int,col:int>
+-- !query output
+0	1	0	3
+0	1	1	4
+1	2	0	3
+1	2	1	4
+
+
+-- !query
+SELECT * FROM posexplode(array(1, 2)) AS t, LATERAL posexplode(array(3 * t.col, 4 * t.col))
+-- !query schema
+struct<pos:int,col:int,pos:int,col:int>
+-- !query output
+0	1	0	3
+0	1	1	4
+1	2	0	6
+1	2	1	8
+
+
+-- !query
+SELECT pos, num, val, 'Spark' FROM posexplode(map(1, 'a', 2, 'b')) AS t(pos, num, val)
+-- !query schema
+struct<pos:int,num:int,val:string,Spark:string>
+-- !query output
+0	1	a	Spark
+1	2	b	Spark
+
+
+-- !query
+SELECT * FROM inline(input => array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+1	a
+2	b
+
+
+-- !query
+SELECT * FROM inline_outer(input => array(struct(1, 'a'), struct(2, 'b')))
+-- !query schema
+struct<col1:int,col2:string>
+-- !query output
+1	a
+2	b
+
+
+-- !query
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))), inline(array(struct(3, 'c'), struct(4, 'd')))
+-- !query schema
+struct<col1:int,col2:string,col1:int,col2:string>
+-- !query output
+1	a	3	c
+1	a	4	d
+2	b	3	c
+2	b	4	d
+
+
+-- !query
+SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t, LATERAL inline(array(struct(3 * t.col1, 4 * t.col1)))
+-- !query schema
+struct<col1:int,col2:string,col1:int,col2:int>
+-- !query output
+1	a	3	4
+2	b	6	8
+
+
+-- !query
+SELECT num, val, 'Spark' FROM inline(array(struct(1, 'a'), struct(2, 'b'))) AS t(num, val)
+-- !query schema
+struct<num:int,val:string,Spark:string>
+-- !query output
+1	a	Spark
+2	b	Spark
+
+
 -- !query
 SELECT * FROM explode(collection => explode(array(1)))
 -- !query schema
@@ -185,17 +281,21 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
-  "sqlState" : "0A000",
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
   "messageParameters" : {
-    "treeNode" : "'Generate explode(table-argument#x []), false\n:  +- SubqueryAlias v\n:     +- View (`v`, [id#xL])\n:        +- Project [cast(id#xL as bigint) AS id#xL]\n:           +- Project [id#xL]\n:              +- Range (0, 8, step=1)\n+- OneRowRelation\n"
+    "inputSql" : "\"functiontablesubqueryargumentexpression()\"",
+    "inputType" : "\"STRUCT<id: BIGINT NOT NULL>\"",
+    "paramIndex" : "first",
+    "requiredType" : "(\"ARRAY\" or \"MAP\")",
+    "sqlExpr" : "\"explode(functiontablesubqueryargumentexpression())\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 37,
-    "stopIndex" : 44,
-    "fragment" : "TABLE(v)"
+    "startIndex" : 15,
+    "stopIndex" : 45,
+    "fragment" : "explode(collection => TABLE(v))"
   } ]
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
similarity index 83%
rename from sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
index d17d87900fc71..c1330c620acfb 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/array.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/array.sql.out
@@ -151,27 +151,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select sort_array(array('b', 'd'), cast(NULL as boolean))
 -- !query schema
-struct<>
+struct<sort_array(array(b, d), CAST(NULL AS BOOLEAN)):array<string>>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"CAST(NULL AS BOOLEAN)\"",
-    "inputType" : "\"BOOLEAN\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"sort_array(array(b, d), CAST(NULL AS BOOLEAN))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 57,
-    "fragment" : "sort_array(array('b', 'd'), cast(NULL as boolean))"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -196,49 +178,17 @@ struct<size(boolean_array):int,size(tinyint_array):int,size(smallint_array):int,
 -- !query
 select element_at(array(1, 2, 3), 5)
 -- !query schema
-struct<>
+struct<element_at(array(1, 2, 3), 5):int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "3",
-    "indexValue" : "5"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "element_at(array(1, 2, 3), 5)"
-  } ]
-}
+NULL
 
 
 -- !query
 select element_at(array(1, 2, 3), -5)
 -- !query schema
-struct<>
+struct<element_at(array(1, 2, 3), -5):int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "3",
-    "indexValue" : "-5"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 37,
-    "fragment" : "element_at(array(1, 2, 3), -5)"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -249,87 +199,32 @@ struct<>
 org.apache.spark.SparkRuntimeException
 {
   "errorClass" : "INVALID_INDEX_OF_ZERO",
-  "sqlState" : "22003",
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "element_at(array(1, 2, 3), 0)"
-  } ]
+  "sqlState" : "22003"
 }
 
 
 -- !query
 select elt(4, '123', '456')
 -- !query schema
-struct<>
+struct<elt(4, 123, 456):string>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "2",
-    "indexValue" : "4"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "elt(4, '123', '456')"
-  } ]
-}
+NULL
 
 
 -- !query
 select elt(0, '123', '456')
 -- !query schema
-struct<>
+struct<elt(0, 123, 456):string>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "2",
-    "indexValue" : "0"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 27,
-    "fragment" : "elt(0, '123', '456')"
-  } ]
-}
+NULL
 
 
 -- !query
 select elt(-1, '123', '456')
 -- !query schema
-struct<>
+struct<elt(-1, 123, 456):string>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "2",
-    "indexValue" : "-1"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "elt(-1, '123', '456')"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -367,49 +262,17 @@ NULL
 -- !query
 select array(1, 2, 3)[5]
 -- !query schema
-struct<>
+struct<array(1, 2, 3)[5]:int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "3",
-    "indexValue" : "5"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 24,
-    "fragment" : "array(1, 2, 3)[5]"
-  } ]
-}
+NULL
 
 
 -- !query
 select array(1, 2, 3)[-1]
 -- !query schema
-struct<>
+struct<array(1, 2, 3)[-1]:int>
 -- !query output
-org.apache.spark.SparkArrayIndexOutOfBoundsException
-{
-  "errorClass" : "INVALID_ARRAY_INDEX",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "arraySize" : "3",
-    "indexValue" : "-1"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 25,
-    "fragment" : "array(1, 2, 3)[-1]"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -491,7 +354,7 @@ select size(arrays_zip(array(1, 2, 3), array(4), null, array(7, 8, 9, 10)))
 -- !query schema
 struct<size(arrays_zip(array(1, 2, 3), array(4), NULL, array(7, 8, 9, 10))):int>
 -- !query output
-NULL
+-1
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out
new file mode 100644
index 0000000000000..738697c638832
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/cast.sql.out
@@ -0,0 +1,1156 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT CAST('1.23' AS int)
+-- !query schema
+struct<CAST(1.23 AS INT):int>
+-- !query output
+1
+
+
+-- !query
+SELECT CAST('1.23' AS long)
+-- !query schema
+struct<CAST(1.23 AS BIGINT):bigint>
+-- !query output
+1
+
+
+-- !query
+SELECT CAST('-4.56' AS int)
+-- !query schema
+struct<CAST(-4.56 AS INT):int>
+-- !query output
+-4
+
+
+-- !query
+SELECT CAST('-4.56' AS long)
+-- !query schema
+struct<CAST(-4.56 AS BIGINT):bigint>
+-- !query output
+-4
+
+
+-- !query
+SELECT CAST('abc' AS int)
+-- !query schema
+struct<CAST(abc AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('abc' AS long)
+-- !query schema
+struct<CAST(abc AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('abc' AS float)
+-- !query schema
+struct<CAST(abc AS FLOAT):float>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('abc' AS double)
+-- !query schema
+struct<CAST(abc AS DOUBLE):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('1234567890123' AS int)
+-- !query schema
+struct<CAST(1234567890123 AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('12345678901234567890123' AS long)
+-- !query schema
+struct<CAST(12345678901234567890123 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('' AS int)
+-- !query schema
+struct<CAST( AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('' AS long)
+-- !query schema
+struct<CAST( AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('' AS float)
+-- !query schema
+struct<CAST( AS FLOAT):float>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('' AS double)
+-- !query schema
+struct<CAST( AS DOUBLE):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST(NULL AS int)
+-- !query schema
+struct<CAST(NULL AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST(NULL AS long)
+-- !query schema
+struct<CAST(NULL AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('123.a' AS int)
+-- !query schema
+struct<CAST(123.a AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('123.a' AS long)
+-- !query schema
+struct<CAST(123.a AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('123.a' AS float)
+-- !query schema
+struct<CAST(123.a AS FLOAT):float>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('123.a' AS double)
+-- !query schema
+struct<CAST(123.a AS DOUBLE):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('-2147483648' AS int)
+-- !query schema
+struct<CAST(-2147483648 AS INT):int>
+-- !query output
+-2147483648
+
+
+-- !query
+SELECT CAST('-2147483649' AS int)
+-- !query schema
+struct<CAST(-2147483649 AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('2147483647' AS int)
+-- !query schema
+struct<CAST(2147483647 AS INT):int>
+-- !query output
+2147483647
+
+
+-- !query
+SELECT CAST('2147483648' AS int)
+-- !query schema
+struct<CAST(2147483648 AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('-9223372036854775808' AS long)
+-- !query schema
+struct<CAST(-9223372036854775808 AS BIGINT):bigint>
+-- !query output
+-9223372036854775808
+
+
+-- !query
+SELECT CAST('-9223372036854775809' AS long)
+-- !query schema
+struct<CAST(-9223372036854775809 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT CAST('9223372036854775807' AS long)
+-- !query schema
+struct<CAST(9223372036854775807 AS BIGINT):bigint>
+-- !query output
+9223372036854775807
+
+
+-- !query
+SELECT CAST('9223372036854775808' AS long)
+-- !query schema
+struct<CAST(9223372036854775808 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT HEX(CAST('abc' AS binary))
+-- !query schema
+struct<hex(CAST(abc AS BINARY)):string>
+-- !query output
+616263
+
+
+-- !query
+SELECT HEX(CAST(CAST(123 AS byte) AS binary))
+-- !query schema
+struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
+-- !query output
+7B
+
+
+-- !query
+SELECT HEX(CAST(CAST(-123 AS byte) AS binary))
+-- !query schema
+struct<hex(CAST(CAST(-123 AS TINYINT) AS BINARY)):string>
+-- !query output
+85
+
+
+-- !query
+SELECT HEX(CAST(123S AS binary))
+-- !query schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query output
+007B
+
+
+-- !query
+SELECT HEX(CAST(-123S AS binary))
+-- !query schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query output
+FF85
+
+
+-- !query
+SELECT HEX(CAST(123 AS binary))
+-- !query schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query output
+0000007B
+
+
+-- !query
+SELECT HEX(CAST(-123 AS binary))
+-- !query schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query output
+FFFFFF85
+
+
+-- !query
+SELECT HEX(CAST(123L AS binary))
+-- !query schema
+struct<hex(CAST(123 AS BINARY)):string>
+-- !query output
+000000000000007B
+
+
+-- !query
+SELECT HEX(CAST(-123L AS binary))
+-- !query schema
+struct<hex(CAST(-123 AS BINARY)):string>
+-- !query output
+FFFFFFFFFFFFFF85
+
+
+-- !query
+DESC FUNCTION boolean
+-- !query schema
+struct<function_desc:string>
+-- !query output
+Class: org.apache.spark.sql.catalyst.expressions.Cast
+Function: boolean
+Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
+
+
+-- !query
+DESC FUNCTION EXTENDED boolean
+-- !query schema
+struct<function_desc:string>
+-- !query output
+Class: org.apache.spark.sql.catalyst.expressions.Cast
+Extended Usage:
+    No example/argument for boolean.
+
+    Since: 2.0.1
+
+Function: boolean
+Usage: boolean(expr) - Casts the value `expr` to the target data type `boolean`.
+
+
+-- !query
+SELECT CAST('interval 3 month 1 hour' AS interval)
+-- !query schema
+struct<CAST(interval 3 month 1 hour AS INTERVAL):interval>
+-- !query output
+3 months 1 hours
+
+
+-- !query
+SELECT CAST("interval '3-1' year to month" AS interval year to month)
+-- !query schema
+struct<CAST(interval '3-1' year to month AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+3-1
+
+
+-- !query
+SELECT CAST("interval '3 00:00:01' day to second" AS interval day to second)
+-- !query schema
+struct<CAST(interval '3 00:00:01' day to second AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+3 00:00:01.000000000
+
+
+-- !query
+SELECT CAST(interval 3 month 1 hour AS string)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0029",
+  "messageParameters" : {
+    "literal" : "interval 3 month 1 hour"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 13,
+    "stopIndex" : 35,
+    "fragment" : "interval 3 month 1 hour"
+  } ]
+}
+
+
+-- !query
+SELECT CAST(interval 3 year 1 month AS string)
+-- !query schema
+struct<CAST(INTERVAL '3-1' YEAR TO MONTH AS STRING):string>
+-- !query output
+INTERVAL '3-1' YEAR TO MONTH
+
+
+-- !query
+SELECT CAST(interval 3 day 1 second AS string)
+-- !query schema
+struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string>
+-- !query output
+INTERVAL '3 00:00:01' DAY TO SECOND
+
+
+-- !query
+select cast(' 1' as tinyint)
+-- !query schema
+struct<CAST( 1 AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1\t' as tinyint)
+-- !query schema
+struct<CAST( 1	 AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as smallint)
+-- !query schema
+struct<CAST( 1 AS SMALLINT):smallint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as INT)
+-- !query schema
+struct<CAST( 1 AS INT):int>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as bigint)
+-- !query schema
+struct<CAST( 1 AS BIGINT):bigint>
+-- !query output
+1
+
+
+-- !query
+select cast(' 1' as float)
+-- !query schema
+struct<CAST( 1 AS FLOAT):float>
+-- !query output
+1.0
+
+
+-- !query
+select cast(' 1 ' as DOUBLE)
+-- !query schema
+struct<CAST( 1  AS DOUBLE):double>
+-- !query output
+1.0
+
+
+-- !query
+select cast('1.0 ' as DEC)
+-- !query schema
+struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+1
+
+
+-- !query
+select cast('1中文' as tinyint)
+-- !query schema
+struct<CAST(1中文 AS TINYINT):tinyint>
+-- !query output
+NULL
+
+
+-- !query
+select cast('1中文' as smallint)
+-- !query schema
+struct<CAST(1中文 AS SMALLINT):smallint>
+-- !query output
+NULL
+
+
+-- !query
+select cast('1中文' as INT)
+-- !query schema
+struct<CAST(1中文 AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+select cast('中文1' as bigint)
+-- !query schema
+struct<CAST(中文1 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select cast('1中文' as bigint)
+-- !query schema
+struct<CAST(1中文 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select cast('\t\t true \n\r ' as boolean)
+-- !query schema
+struct<CAST(		 true 
+  AS BOOLEAN):boolean>
+-- !query output
+true
+
+
+-- !query
+select cast('\t\n false \t\r' as boolean)
+-- !query schema
+struct<CAST(	
+ false 	 AS BOOLEAN):boolean>
+-- !query output
+false
+
+
+-- !query
+select cast('\t\n xyz \t\r' as boolean)
+-- !query schema
+struct<CAST(	
+ xyz 	 AS BOOLEAN):boolean>
+-- !query output
+NULL
+
+
+-- !query
+select cast('23.45' as decimal(4, 2))
+-- !query schema
+struct<CAST(23.45 AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+23.45
+
+
+-- !query
+select cast('123.45' as decimal(4, 2))
+-- !query schema
+struct<CAST(123.45 AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+NULL
+
+
+-- !query
+select cast('xyz' as decimal(4, 2))
+-- !query schema
+struct<CAST(xyz AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+NULL
+
+
+-- !query
+select cast('2022-01-01' as date)
+-- !query schema
+struct<CAST(2022-01-01 AS DATE):date>
+-- !query output
+2022-01-01
+
+
+-- !query
+select cast('a' as date)
+-- !query schema
+struct<CAST(a AS DATE):date>
+-- !query output
+NULL
+
+
+-- !query
+select cast('2022-01-01 00:00:00' as timestamp)
+-- !query schema
+struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp>
+-- !query output
+2022-01-01 00:00:00
+
+
+-- !query
+select cast('a' as timestamp)
+-- !query schema
+struct<CAST(a AS TIMESTAMP):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select cast('2022-01-01 00:00:00' as timestamp_ntz)
+-- !query schema
+struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP_NTZ):timestamp_ntz>
+-- !query output
+2022-01-01 00:00:00
+
+
+-- !query
+select cast('a' as timestamp_ntz)
+-- !query schema
+struct<CAST(a AS TIMESTAMP_NTZ):timestamp_ntz>
+-- !query output
+NULL
+
+
+-- !query
+select cast(cast('inf' as double) as timestamp)
+-- !query schema
+struct<CAST(CAST(inf AS DOUBLE) AS TIMESTAMP):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select cast(cast('inf' as float) as timestamp)
+-- !query schema
+struct<CAST(CAST(inf AS FLOAT) AS TIMESTAMP):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select cast(interval '1' year as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint>
+-- !query output
+1
+
+
+-- !query
+select cast(interval '-10-2' year to month as smallint)
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select cast(interval '1000' month as int)
+-- !query schema
+struct<CAST(INTERVAL '1000' MONTH AS INT):int>
+-- !query output
+1000
+
+
+-- !query
+select cast(interval -'10.123456' second as tinyint)
+-- !query schema
+struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint>
+-- !query output
+-10
+
+
+-- !query
+select cast(interval '23:59:59' hour to second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"INTERVAL HOUR TO SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '23:59:59' HOUR TO SECOND"
+  }
+}
+
+
+-- !query
+select cast(interval -'1 02:03:04.123' day to second as int)
+-- !query schema
+struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int>
+-- !query output
+-93784
+
+
+-- !query
+select cast(interval '10' day as bigint)
+-- !query schema
+struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint>
+-- !query output
+10
+
+
+-- !query
+select cast(interval '-1000' month as tinyint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"INTERVAL MONTH\"",
+    "targetType" : "\"TINYINT\"",
+    "value" : "INTERVAL '-1000' MONTH"
+  }
+}
+
+
+-- !query
+select cast(interval '1000000' second as smallint)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"INTERVAL SECOND\"",
+    "targetType" : "\"SMALLINT\"",
+    "value" : "INTERVAL '1000000' SECOND"
+  }
+}
+
+
+-- !query
+select cast(1Y as interval year)
+-- !query schema
+struct<CAST(1 AS INTERVAL YEAR):interval year>
+-- !query output
+1-0
+
+
+-- !query
+select cast(-122S as interval year to month)
+-- !query schema
+struct<CAST(-122 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(ym as interval year to month) from values(-122S) as t(ym)
+-- !query schema
+struct<ym:interval year to month>
+-- !query output
+-10-2
+
+
+-- !query
+select cast(1000 as interval month)
+-- !query schema
+struct<CAST(1000 AS INTERVAL MONTH):interval month>
+-- !query output
+83-4
+
+
+-- !query
+select cast(-10L as interval second)
+-- !query schema
+struct<CAST(-10 AS INTERVAL SECOND):interval second>
+-- !query output
+-0 00:00:10.000000000
+
+
+-- !query
+select cast(100Y as interval hour to second)
+-- !query schema
+struct<CAST(100 AS INTERVAL HOUR TO SECOND):interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(dt as interval hour to second) from values(100Y) as t(dt)
+-- !query schema
+struct<dt:interval hour to second>
+-- !query output
+0 00:01:40.000000000
+
+
+-- !query
+select cast(-1000S as interval day to second)
+-- !query schema
+struct<CAST(-1000 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+-0 00:16:40.000000000
+
+
+-- !query
+select cast(10 as interval day)
+-- !query schema
+struct<CAST(10 AS INTERVAL DAY):interval day>
+-- !query output
+10 00:00:00.000000000
+
+
+-- !query
+select cast(2147483647 as interval year)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"INT\"",
+    "targetType" : "\"INTERVAL YEAR\"",
+    "value" : "2147483647"
+  }
+}
+
+
+-- !query
+select cast(-9223372036854775808L as interval day)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "CAST_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "sourceType" : "\"BIGINT\"",
+    "targetType" : "\"INTERVAL DAY\"",
+    "value" : "-9223372036854775808L"
+  }
+}
+
+
+-- !query
+select cast(interval '-1' year as decimal(10, 0))
+-- !query schema
+struct<CAST(INTERVAL '-1' YEAR AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+-1
+
+
+-- !query
+select cast(interval '1.000001' second as decimal(10, 6))
+-- !query schema
+struct<CAST(INTERVAL '01.000001' SECOND AS DECIMAL(10,6)):decimal(10,6)>
+-- !query output
+1.000001
+
+
+-- !query
+select cast(interval '08:11:10.001' hour to second as decimal(10, 4))
+-- !query schema
+struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
+-- !query output
+29470.0010
+
+
+-- !query
+select cast(interval '1 01:02:03.1' day to second as decimal(8, 1))
+-- !query schema
+struct<CAST(INTERVAL '1 01:02:03.1' DAY TO SECOND AS DECIMAL(8,1)):decimal(8,1)>
+-- !query output
+90123.1
+
+
+-- !query
+select cast(interval '10.123' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.005' second as decimal(4, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.005' SECOND AS DECIMAL(4,2)):decimal(4,2)>
+-- !query output
+10.01
+
+
+-- !query
+select cast(interval '10.123' second as decimal(5, 2))
+-- !query schema
+struct<CAST(INTERVAL '10.123' SECOND AS DECIMAL(5,2)):decimal(5,2)>
+-- !query output
+10.12
+
+
+-- !query
+select cast(interval '10.123' second as decimal(1, 0))
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "precision" : "1",
+    "scale" : "0",
+    "value" : "10.123000"
+  }
+}
+
+
+-- !query
+select cast(10.123456BD as interval day to second)
+-- !query schema
+struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+0 00:00:10.123456000
+
+
+-- !query
+select cast(80.654321BD as interval hour to minute)
+-- !query schema
+struct<CAST(80.654321 AS INTERVAL HOUR TO MINUTE):interval hour to minute>
+-- !query output
+0 01:20:00.000000000
+
+
+-- !query
+select cast(-10.123456BD as interval year to month)
+-- !query schema
+struct<CAST(-10.123456 AS INTERVAL YEAR TO MONTH):interval year to month>
+-- !query output
+-0-10
+
+
+-- !query
+select cast(10.654321BD as interval month)
+-- !query schema
+struct<CAST(10.654321 AS INTERVAL MONTH):interval month>
+-- !query output
+0-11
+
+
+-- !query
+SELECT '1.23' :: int
+-- !query schema
+struct<CAST(1.23 AS INT):int>
+-- !query output
+1
+
+
+-- !query
+SELECT 'abc' :: int
+-- !query schema
+struct<CAST(abc AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT '12345678901234567890123' :: long
+-- !query schema
+struct<CAST(12345678901234567890123 AS BIGINT):bigint>
+-- !query output
+NULL
+
+
+-- !query
+SELECT '' :: int
+-- !query schema
+struct<CAST( AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT NULL :: int
+-- !query schema
+struct<CAST(NULL AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT '123.a' :: int
+-- !query schema
+struct<CAST(123.a AS INT):int>
+-- !query output
+NULL
+
+
+-- !query
+SELECT '-2147483648' :: int
+-- !query schema
+struct<CAST(-2147483648 AS INT):int>
+-- !query output
+-2147483648
+
+
+-- !query
+SELECT HEX('abc' :: binary)
+-- !query schema
+struct<hex(CAST(abc AS BINARY)):string>
+-- !query output
+616263
+
+
+-- !query
+SELECT HEX((123 :: byte) :: binary)
+-- !query schema
+struct<hex(CAST(CAST(123 AS TINYINT) AS BINARY)):string>
+-- !query output
+7B
+
+
+-- !query
+SELECT 'interval 3 month 1 hour' :: interval
+-- !query schema
+struct<CAST(interval 3 month 1 hour AS INTERVAL):interval>
+-- !query output
+3 months 1 hours
+
+
+-- !query
+SELECT interval 3 day 1 second :: string
+-- !query schema
+struct<CAST(INTERVAL '3 00:00:01' DAY TO SECOND AS STRING):string>
+-- !query output
+INTERVAL '3 00:00:01' DAY TO SECOND
+
+
+-- !query
+select ' 1 ' :: DOUBLE
+-- !query schema
+struct<CAST( 1  AS DOUBLE):double>
+-- !query output
+1.0
+
+
+-- !query
+select '1.0 ' :: DEC
+-- !query schema
+struct<CAST(1.0  AS DECIMAL(10,0)):decimal(10,0)>
+-- !query output
+1
+
+
+-- !query
+select '\t\t true \n\r ' :: boolean
+-- !query schema
+struct<CAST(		 true 
+  AS BOOLEAN):boolean>
+-- !query output
+true
+
+
+-- !query
+select '2022-01-01 00:00:00' :: timestamp
+-- !query schema
+struct<CAST(2022-01-01 00:00:00 AS TIMESTAMP):timestamp>
+-- !query output
+2022-01-01 00:00:00
+
+
+-- !query
+select interval '-10-2' year to month :: smallint
+-- !query schema
+struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint>
+-- !query output
+-122
+
+
+-- !query
+select -10L :: interval second
+-- !query schema
+struct<CAST(-10 AS INTERVAL SECOND):interval second>
+-- !query output
+-0 00:00:10.000000000
+
+
+-- !query
+select interval '08:11:10.001' hour to second :: decimal(10, 4)
+-- !query schema
+struct<CAST(INTERVAL '08:11:10.001' HOUR TO SECOND AS DECIMAL(10,4)):decimal(10,4)>
+-- !query output
+29470.0010
+
+
+-- !query
+select 10.123456BD :: interval day to second
+-- !query schema
+struct<CAST(10.123456 AS INTERVAL DAY TO SECOND):interval day to second>
+-- !query output
+0 00:00:10.123456000
+
+
+-- !query
+SELECT '1.23' :: int :: long
+-- !query schema
+struct<CAST(CAST(1.23 AS INT) AS BIGINT):bigint>
+-- !query output
+1
+
+
+-- !query
+SELECT '2147483648' :: long :: int
+-- !query schema
+struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int>
+-- !query output
+-2147483648
+
+
+-- !query
+SELECT CAST('2147483648' :: long AS int)
+-- !query schema
+struct<CAST(CAST(2147483648 AS BIGINT) AS INT):int>
+-- !query output
+-2147483648
+
+
+-- !query
+SELECT map(1, '123', 2, '456')[1] :: int
+-- !query schema
+struct<CAST(map(1, 123, 2, 456)[1] AS INT):int>
+-- !query output
+123
+
+
+-- !query
+SELECT '2147483648' :: BINT
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_DATATYPE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "typeName" : "\"BINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 27,
+    "fragment" : "BINT"
+  } ]
+}
+
+
+-- !query
+SELECT '2147483648' :: SELECT
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_DATATYPE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "typeName" : "\"SELECT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 29,
+    "fragment" : "SELECT"
+  } ]
+}
+
+
+-- !query
+SELECT FALSE IS NOT NULL :: string
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'::'",
+    "hint" : ""
+  }
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/conditional-functions.sql.out
similarity index 68%
rename from sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/conditional-functions.sql.out
index bdfbb9404b2fa..33882561f518a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/conditional-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/conditional-functions.sql.out
@@ -121,6 +121,47 @@ struct<CASE WHEN (1 > 2) THEN (1 / 0) ELSE 1 END:double>
 1.0
 
 
+-- !query
+SELECT nullifzero(0),
+  nullifzero(cast(0 as tinyint)),
+  nullifzero(cast(0 as bigint)),
+  nullifzero('0'),
+  nullifzero(0.0),
+  nullifzero(1),
+  nullifzero(null)
+-- !query schema
+struct<nullifzero(0):int,nullifzero(CAST(0 AS TINYINT)):tinyint,nullifzero(CAST(0 AS BIGINT)):bigint,nullifzero(0):string,nullifzero(0.0):decimal(1,1),nullifzero(1):int,nullifzero(NULL):void>
+-- !query output
+NULL	NULL	NULL	NULL	NULL	1	NULL
+
+
+-- !query
+SELECT nullifzero('abc')
+-- !query schema
+struct<nullifzero(abc):string>
+-- !query output
+abc
+
+
+-- !query
+SELECT zeroifnull(null),
+  zeroifnull(1),
+  zeroifnull(cast(1 as tinyint)),
+  zeroifnull(cast(1 as bigint))
+-- !query schema
+struct<zeroifnull(NULL):int,zeroifnull(1):int,zeroifnull(CAST(1 AS TINYINT)):int,zeroifnull(CAST(1 AS BIGINT)):bigint>
+-- !query output
+0	1	1	1
+
+
+-- !query
+SELECT zeroifnull('abc')
+-- !query schema
+struct<zeroifnull(abc):string>
+-- !query output
+abc
+
+
 -- !query
 DROP TABLE conditional_t
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/date.sql.out
similarity index 91%
rename from sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/date.sql.out
index b1b26b2f74ad1..c46c200ff026f 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/date.sql.out
@@ -49,31 +49,17 @@ struct<make_date(2019, 1, 1):date,make_date(12, 12, 12):date>
 -- !query
 select make_date(2000, 13, 1)
 -- !query schema
-struct<>
+struct<make_date(2000, 13, 1):date>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
-  }
-}
+NULL
 
 
 -- !query
 select make_date(2000, 1, 33)
 -- !query schema
-struct<>
+struct<make_date(2000, 1, 33):date>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): 33"
-  }
-}
+NULL
 
 
 -- !query
@@ -198,17 +184,9 @@ struct<to_date(16, dd):date>
 -- !query
 select to_date("02-29", "MM-dd")
 -- !query schema
-struct<>
+struct<to_date(02-29, MM-dd):date>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
-  }
-}
+NULL
 
 
 -- !query
@@ -264,15 +242,9 @@ struct<next_day(2015-07-23, Mon):date>
 -- !query
 select next_day("2015-07-23", "xx")
 -- !query schema
-struct<>
+struct<next_day(2015-07-23, xx):date>
 -- !query output
-org.apache.spark.SparkIllegalArgumentException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_3209",
-  "messageParameters" : {
-    "string" : "xx"
-  }
-}
+NULL
 
 
 -- !query
@@ -302,26 +274,9 @@ struct<next_day(TIMESTAMP_NTZ '2015-07-23 12:12:12', Mon):date>
 -- !query
 select next_day("xx", "Mon")
 -- !query schema
-struct<>
+struct<next_day(xx, Mon):date>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'xx'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DATE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "next_day(\"xx\", \"Mon\")"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -463,23 +418,13 @@ select date_add('2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkNumberFormatException
+org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.2'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 36,
-    "fragment" : "date_add('2011-11-11', '1.2')"
-  } ]
+    "functionName" : "date_add"
+  }
 }
 
 
@@ -638,23 +583,13 @@ select date_sub(date'2011-11-11', '1.2')
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkNumberFormatException
+org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
+  "errorClass" : "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+  "sqlState" : "22023",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1.2'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 40,
-    "fragment" : "date_sub(date'2011-11-11', '1.2')"
-  } ]
+    "functionName" : "date_sub"
+  }
 }
 
 
@@ -693,17 +628,53 @@ struct<date_sub(TIMESTAMP_NTZ '2011-11-11 12:12:12', 1):date>
 -- !query
 select date_add('2011-11-11', int_str) from date_view
 -- !query schema
-struct<date_add(2011-11-11, int_str):date>
+struct<>
 -- !query output
-2011-11-12
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_add(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_add('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
 select date_sub('2011-11-11', int_str) from date_view
 -- !query schema
-struct<date_sub(2011-11-11, int_str):date>
+struct<>
 -- !query output
-2011-11-10
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"int_str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(2011-11-11, int_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 38,
+    "fragment" : "date_sub('2011-11-11', int_str)"
+  } ]
+}
 
 
 -- !query
@@ -783,9 +754,27 @@ struct<(DATE '2001-10-01' - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-10-01' - '2001-09-28'
 -- !query schema
-struct<(DATE '2001-10-01' - 2001-09-28):interval day>
+struct<>
 -- !query output
-3 00:00:00.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2001-09-28\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-10-01', 2001-09-28)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "date '2001-10-01' - '2001-09-28'"
+  } ]
+}
 
 
 -- !query
@@ -823,9 +812,27 @@ struct<(date_str - DATE '2001-09-28'):interval day>
 -- !query
 select date '2001-09-28' - date_str from date_view
 -- !query schema
-struct<(DATE '2001-09-28' - date_str):interval day>
+struct<>
 -- !query output
--3696 00:00:00.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"date_str\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "second",
+    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
+    "sqlExpr" : "\"date_sub(DATE '2001-09-28', date_str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "date '2001-09-28' - date_str"
+  } ]
+}
 
 
 -- !query
@@ -839,7 +846,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DATE\"",
+    "inputType" : "\"DOUBLE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
@@ -864,11 +871,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"DATE '2011-11-11'\"",
-    "inputType" : "\"DATE\"",
+    "inputSql" : "\"1\"",
+    "inputType" : "\"DOUBLE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(1, DATE '2011-11-11')\""
+    "sqlExpr" : "\"date_add(DATE '2011-11-11', 1)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/datetime-parsing-invalid.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/datetime-parsing-invalid.sql.out
new file mode 100644
index 0000000000000..fffbb2a4e017f
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/datetime-parsing-invalid.sql.out
@@ -0,0 +1,241 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+select to_timestamp('294248', 'y')
+-- !query schema
+struct<>
+-- !query output
+java.lang.ArithmeticException
+long overflow
+
+
+-- !query
+select to_timestamp('1', 'yy')
+-- !query schema
+struct<to_timestamp(1, yy):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('-12', 'yy')
+-- !query schema
+struct<to_timestamp(-12, yy):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('123', 'yy')
+-- !query schema
+struct<to_timestamp(123, yy):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('1', 'yyy')
+-- !query schema
+struct<to_timestamp(1, yyy):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('1234567', 'yyyyyyy')
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkUpgradeException
+{
+  "errorClass" : "INCONSISTENT_BEHAVIOR_CROSS_VERSION.DATETIME_PATTERN_RECOGNITION",
+  "sqlState" : "42K0B",
+  "messageParameters" : {
+    "config" : "\"spark.sql.legacy.timeParserPolicy\"",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "pattern" : "'yyyyyyy'"
+  }
+}
+
+
+-- !query
+select to_timestamp('366', 'D')
+-- !query schema
+struct<to_timestamp(366, D):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('9', 'DD')
+-- !query schema
+struct<to_timestamp(9, DD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('366', 'DD')
+-- !query schema
+struct<to_timestamp(366, DD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('9', 'DDD')
+-- !query schema
+struct<to_timestamp(9, DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('99', 'DDD')
+-- !query schema
+struct<to_timestamp(99, DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('30-365', 'dd-DDD')
+-- !query schema
+struct<to_timestamp(30-365, dd-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('11-365', 'MM-DDD')
+-- !query schema
+struct<to_timestamp(11-365, MM-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('2019-366', 'yyyy-DDD')
+-- !query schema
+struct<to_timestamp(2019-366, yyyy-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('12-30-365', 'MM-dd-DDD')
+-- !query schema
+struct<to_timestamp(12-30-365, MM-dd-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('2020-01-365', 'yyyy-dd-DDD')
+-- !query schema
+struct<to_timestamp(2020-01-365, yyyy-dd-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('2020-10-350', 'yyyy-MM-DDD')
+-- !query schema
+struct<to_timestamp(2020-10-350, yyyy-MM-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp('2020-11-31-366', 'yyyy-MM-dd-DDD')
+-- !query schema
+struct<to_timestamp(2020-11-31-366, yyyy-MM-dd-DDD):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select from_csv('2018-366', 'date Date', map('dateFormat', 'yyyy-DDD'))
+-- !query schema
+struct<from_csv(2018-366):struct<date:date>>
+-- !query output
+{"date":null}
+
+
+-- !query
+select to_date("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_date("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_date(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):date>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select to_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("2020-01-27T20:06:11.847", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(2020-01-27T20:06:11.847, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select to_unix_timestamp("Unparseable", "yyyy-MM-dd HH:mm:ss.SSS")
+-- !query schema
+struct<to_unix_timestamp(Unparseable, yyyy-MM-dd HH:mm:ss.SSS):bigint>
+-- !query output
+NULL
+
+
+-- !query
+select cast("Unparseable" as timestamp)
+-- !query schema
+struct<CAST(Unparseable AS TIMESTAMP):timestamp>
+-- !query output
+NULL
+
+
+-- !query
+select cast("Unparseable" as date)
+-- !query schema
+struct<CAST(Unparseable AS DATE):date>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/datetime-special.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/datetime-special.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/datetime-special.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/decimalArithmeticOperations.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/decimalArithmeticOperations.sql.out
new file mode 100644
index 0000000000000..8276168d8bb87
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/decimalArithmeticOperations.sql.out
@@ -0,0 +1,473 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE TEMPORARY VIEW t AS SELECT 1.0 as a, 0.0 as b
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select a / b from t
+-- !query schema
+struct<(a / b):decimal(8,6)>
+-- !query output
+NULL
+
+
+-- !query
+select a % b from t
+-- !query schema
+struct<(a % b):decimal(1,1)>
+-- !query output
+NULL
+
+
+-- !query
+select pmod(a, b) from t
+-- !query schema
+struct<pmod(a, b):decimal(1,1)>
+-- !query output
+NULL
+
+
+-- !query
+create table decimals_test(id int, a decimal(38,18), b decimal(38,18)) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into decimals_test values(1, 100.0, 999.0), (2, 12345.123, 12345.123),
+  (3, 0.1234567891011, 1234.1), (4, 123456789123456789.0, 1.123456789123456789)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+select id, a+b, a-b, a*b, a/b from decimals_test order by id
+-- !query schema
+struct<id:int,(a + b):decimal(38,17),(a - b):decimal(38,17),(a * b):decimal(38,6),(a / b):decimal(38,6)>
+-- !query output
+1	1099.00000000000000000	-899.00000000000000000	99900.000000	0.100100
+2	24690.24600000000000000	0.00000000000000000	152402061.885129	1.000000
+3	1234.22345678910110000	-1233.97654321089890000	152.358023	0.000100
+4	123456789123456790.12345678912345679	123456789123456787.87654321087654321	138698367904130467.515623	109890109097814272.043109
+
+
+-- !query
+select id, a*10, b/10 from decimals_test order by id
+-- !query schema
+struct<id:int,(a * 10):decimal(38,15),(b / 10):decimal(38,18)>
+-- !query output
+1	1000.000000000000000	99.900000000000000000
+2	123451.230000000000000	1234.512300000000000000
+3	1.234567891011000	123.410000000000000000
+4	1234567891234567890.000000000000000	0.112345678912345679
+
+
+-- !query
+select 10.3 * 3.0
+-- !query schema
+struct<(10.3 * 3.0):decimal(6,2)>
+-- !query output
+30.90
+
+
+-- !query
+select 10.3000 * 3.0
+-- !query schema
+struct<(10.3000 * 3.0):decimal(9,5)>
+-- !query output
+30.90000
+
+
+-- !query
+select 10.30000 * 30.0
+-- !query schema
+struct<(10.30000 * 30.0):decimal(11,6)>
+-- !query output
+309.000000
+
+
+-- !query
+select 10.300000000000000000 * 3.000000000000000000
+-- !query schema
+struct<(10.300000000000000000 * 3.000000000000000000):decimal(38,34)>
+-- !query output
+30.9000000000000000000000000000000000
+
+
+-- !query
+select 10.300000000000000000 * 3.0000000000000000000
+-- !query schema
+struct<(10.300000000000000000 * 3.0000000000000000000):decimal(38,34)>
+-- !query output
+30.9000000000000000000000000000000000
+
+
+-- !query
+select 2.35E10 * 1.0
+-- !query schema
+struct<(2.35E10 * 1.0):double>
+-- !query output
+2.35E10
+
+
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query schema
+struct<((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000):decimal(38,1)>
+-- !query output
+NULL
+
+
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query schema
+struct<((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000):decimal(38,1)>
+-- !query output
+NULL
+
+
+-- !query
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query schema
+struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1e35BD / 0.1
+-- !query schema
+struct<(100000000000000000000000000000000000 / 0.1):decimal(38,6)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query schema
+struct<(1234567890123456789000000000000 * 12345678901234567890000000):decimal(38,0)>
+-- !query output
+NULL
+
+
+-- !query
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
+-- !query schema
+struct<(12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345):decimal(38,6)>
+-- !query output
+10012345678912345678912345678911.246907
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query schema
+struct<(123456789123456789.1234567890 * 1.123456789123456789):decimal(38,18)>
+-- !query output
+138698367904130467.654320988515622621
+
+
+-- !query
+select 12345678912345.123456789123 / 0.000000012345678
+-- !query schema
+struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,9)>
+-- !query output
+1000000073899961059796.725866332
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) + CAST(90 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(4,1)>
+-- !query output
+100.0
+20.0
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) - CAST(-90 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(4,1)>
+-- !query output
+100.0
+20.0
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) * CAST(10 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(7,2)>
+-- !query output
+100.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(3, 1)) / CAST(10 AS DECIMAL(3, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(9,6)>
+-- !query output
+1.000000
+20.000000
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) % CAST(3 AS DECIMAL(5, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(6,2)>
+-- !query output
+1.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT pmod(CAST(10 AS DECIMAL(10, 2)), CAST(3 AS DECIMAL(5, 1)))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(6,2)>
+-- !query output
+1.00
+20.00
+
+
+-- !query
+SELECT CAST(20 AS DECIMAL(4, 1))
+UNION ALL
+SELECT CAST(10 AS DECIMAL(10, 2)) div CAST(3 AS DECIMAL(5, 1))
+-- !query schema
+struct<CAST(20 AS DECIMAL(4,1)):decimal(21,1)>
+-- !query output
+20.0
+3.0
+
+
+-- !query
+set spark.sql.decimalOperations.allowPrecisionLoss=false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.decimalOperations.allowPrecisionLoss	false
+
+
+-- !query
+select /*+ COALESCE(1) */ id, a+b, a-b, a*b, a/b from decimals_test order by id
+-- !query schema
+struct<id:int,(a + b):decimal(38,18),(a - b):decimal(38,18),(a * b):decimal(38,36),(a / b):decimal(38,18)>
+-- !query output
+1	1099.000000000000000000	-899.000000000000000000	NULL	0.100100100100100100
+2	24690.246000000000000000	0.000000000000000000	NULL	1.000000000000000000
+3	1234.223456789101100000	-1233.976543210898900000	NULL	0.000100037913541123
+4	123456789123456790.123456789123456789	123456789123456787.876543210876543211	NULL	109890109097814272.043109406191131436
+
+
+-- !query
+select id, a*10, b/10 from decimals_test order by id
+-- !query schema
+struct<id:int,(a * 10):decimal(38,18),(b / 10):decimal(38,19)>
+-- !query output
+1	1000.000000000000000000	99.9000000000000000000
+2	123451.230000000000000000	1234.5123000000000000000
+3	1.234567891011000000	123.4100000000000000000
+4	1234567891234567890.000000000000000000	0.1123456789123456789
+
+
+-- !query
+select 10.3 * 3.0
+-- !query schema
+struct<(10.3 * 3.0):decimal(6,2)>
+-- !query output
+30.90
+
+
+-- !query
+select 10.3000 * 3.0
+-- !query schema
+struct<(10.3000 * 3.0):decimal(9,5)>
+-- !query output
+30.90000
+
+
+-- !query
+select 10.30000 * 30.0
+-- !query schema
+struct<(10.30000 * 30.0):decimal(11,6)>
+-- !query output
+309.000000
+
+
+-- !query
+select 10.300000000000000000 * 3.000000000000000000
+-- !query schema
+struct<(10.300000000000000000 * 3.000000000000000000):decimal(38,36)>
+-- !query output
+30.900000000000000000000000000000000000
+
+
+-- !query
+select 10.300000000000000000 * 3.0000000000000000000
+-- !query schema
+struct<(10.300000000000000000 * 3.0000000000000000000):decimal(38,37)>
+-- !query output
+NULL
+
+
+-- !query
+select 2.35E10 * 1.0
+-- !query schema
+struct<(2.35E10 * 1.0):double>
+-- !query output
+2.35E10
+
+
+-- !query
+select (5e36BD + 0.1) + 5e36BD
+-- !query schema
+struct<((5000000000000000000000000000000000000 + 0.1) + 5000000000000000000000000000000000000):decimal(38,1)>
+-- !query output
+NULL
+
+
+-- !query
+select (-4e36BD - 0.1) - 7e36BD
+-- !query schema
+struct<((-4000000000000000000000000000000000000 - 0.1) - 7000000000000000000000000000000000000):decimal(38,1)>
+-- !query output
+NULL
+
+
+-- !query
+select 12345678901234567890.0 * 12345678901234567890.0
+-- !query schema
+struct<(12345678901234567890.0 * 12345678901234567890.0):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1e35BD / 0.1
+-- !query schema
+struct<(100000000000000000000000000000000000 / 0.1):decimal(38,3)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.2345678901234567890E30BD * 1.2345678901234567890E25BD
+-- !query schema
+struct<(1234567890123456789000000000000 * 12345678901234567890000000):decimal(38,0)>
+-- !query output
+NULL
+
+
+-- !query
+select 12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345
+-- !query schema
+struct<(12345678912345678912345678912.1234567 + 9999999999999999999999999999999.12345):decimal(38,7)>
+-- !query output
+NULL
+
+
+-- !query
+select 123456789123456789.1234567890 * 1.123456789123456789
+-- !query schema
+struct<(123456789123456789.1234567890 * 1.123456789123456789):decimal(38,28)>
+-- !query output
+NULL
+
+
+-- !query
+select 12345678912345.123456789123 / 0.000000012345678
+-- !query schema
+struct<(12345678912345.123456789123 / 1.2345678E-8):decimal(38,18)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e36BD / 0.1
+-- !query schema
+struct<(1012345678901234567890123456789012345.6 / 0.1):decimal(38,2)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e35BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901234.56 / 1.0):decimal(38,3)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e34BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890123.456 / 1.0):decimal(38,3)>
+-- !query output
+10123456789012345678901234567890123.456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e33BD / 1.0
+-- !query schema
+struct<(1012345678901234567890123456789012.3456 / 1.0):decimal(38,4)>
+-- !query output
+1012345678901234567890123456789012.3456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e32BD / 1.0
+-- !query schema
+struct<(101234567890123456789012345678901.23456 / 1.0):decimal(38,5)>
+-- !query output
+101234567890123456789012345678901.23456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 1.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 1.0):decimal(38,6)>
+-- !query output
+10123456789012345678901234567890.123456
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 0.1
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 0.1):decimal(38,6)>
+-- !query output
+NULL
+
+
+-- !query
+select 1.0123456789012345678901234567890123456e31BD / 10.0
+-- !query schema
+struct<(10123456789012345678901234567890.123456 / 10.0):decimal(38,7)>
+-- !query output
+1012345678901234567890123456789.0123456
+
+
+-- !query
+drop table decimals_test
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-disabled.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/double-quoted-identifiers.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/double-quoted-identifiers-disabled.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/double-quoted-identifiers.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/higher-order-functions.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/higher-order-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/higher-order-functions.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/interval.sql.out
similarity index 90%
rename from sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/interval.sql.out
index 4e220ba9885c1..a8a0423bdb3e0 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/interval.sql.out
@@ -123,101 +123,33 @@ struct<(INTERVAL '2' YEAR / 2):interval year to month>
 -- !query
 select interval 2 second * 'a'
 -- !query schema
-struct<>
+struct<(INTERVAL '02' SECOND * a):interval day to second>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "interval 2 second * 'a'"
-  } ]
-}
+NULL
 
 
 -- !query
 select interval 2 second / 'a'
 -- !query schema
-struct<>
+struct<(INTERVAL '02' SECOND / a):interval day to second>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "interval 2 second / 'a'"
-  } ]
-}
+NULL
 
 
 -- !query
 select interval 2 year * 'a'
 -- !query schema
-struct<>
+struct<(INTERVAL '2' YEAR * a):interval year to month>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "interval 2 year * 'a'"
-  } ]
-}
+NULL
 
 
 -- !query
 select interval 2 year / 'a'
 -- !query schema
-struct<>
+struct<(INTERVAL '2' YEAR / a):interval year to month>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "interval 2 year / 'a'"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -239,51 +171,17 @@ struct<(INTERVAL '2' YEAR * 2):interval year to month>
 -- !query
 select 'a' * interval 2 second
 -- !query schema
-struct<>
+struct<(INTERVAL '02' SECOND * a):interval day to second>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 30,
-    "fragment" : "'a' * interval 2 second"
-  } ]
-}
+NULL
 
 
 -- !query
 select 'a' * interval 2 year
 -- !query schema
-struct<>
+struct<(INTERVAL '2' YEAR * a):interval year to month>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"DOUBLE\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 28,
-    "fragment" : "'a' * interval 2 year"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -875,26 +773,9 @@ struct<make_interval(1, 2, 3, 4, 0, 0, 123456789012.123456):interval>
 -- !query
 select make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)
 -- !query schema
-struct<>
+struct<make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789):interval>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "precision" : "18",
-    "scale" : "6",
-    "value" : "1234567890123456789"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 59,
-    "fragment" : "make_interval(0, 0, 0, 0, 0, 0, 1234567890123456789)"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -942,8 +823,18 @@ select make_dt_interval(2147483647)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-long overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "make_dt_interval(2147483647)"
+  } ]
+}
 
 
 -- !query
@@ -983,8 +874,18 @@ select make_ym_interval(178956970, 8)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "make_ym_interval(178956970, 8)"
+  } ]
+}
 
 
 -- !query
@@ -1000,8 +901,18 @@ select make_ym_interval(-178956970, -9)
 -- !query schema
 struct<>
 -- !query output
-java.lang.ArithmeticException
-integer overflow
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "make_ym_interval(-178956970, -9)"
+  } ]
+}
 
 
 -- !query
@@ -1261,9 +1172,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1282,9 +1197,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 20 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1303,9 +1222,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 15:40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1324,9 +1247,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1345,9 +1272,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 15:40, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "15:40",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1366,9 +1297,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 20 40:32.99899999, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "20 40:32.99899999",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1517,9 +1452,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0028",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNSUPPORTED_FROM_TO_EXPRESSION",
+  "sqlState" : "22006",
   "messageParameters" : {
     "from" : "year",
+    "input" : "1",
     "to" : "second"
   },
   "queryContext" : [ {
@@ -1797,9 +1734,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Error parsing interval year-month string: integer overflow"
+    "input" : "178956970-8",
+    "interval" : "year-month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1938,51 +1877,17 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select '4 11:11' - interval '4 22:12' day to minute
 -- !query schema
-struct<>
+struct<4 11:11 - INTERVAL '4 22:12' DAY TO MINUTE:string>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'4 11:11'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 51,
-    "fragment" : "'4 11:11' - interval '4 22:12' day to minute"
-  } ]
-}
+NULL
 
 
 -- !query
 select '4 12:12:12' + interval '4 22:12' day to minute
 -- !query schema
-struct<>
+struct<4 12:12:12 + INTERVAL '4 22:12' DAY TO MINUTE:string>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'4 12:12:12'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "'4 12:12:12' + interval '4 22:12' day to minute"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -2044,51 +1949,17 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select str - interval '4 22:12' day to minute from interval_view
 -- !query schema
-struct<>
+struct<str - INTERVAL '4 22:12' DAY TO MINUTE:string>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 45,
-    "fragment" : "str - interval '4 22:12' day to minute"
-  } ]
-}
+NULL
 
 
 -- !query
 select str + interval '4 22:12' day to minute from interval_view
 -- !query schema
-struct<>
+struct<str + INTERVAL '4 22:12' DAY TO MINUTE:string>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'1'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"TIMESTAMP\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 45,
-    "fragment" : "str + interval '4 22:12' day to minute"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -2348,9 +2219,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match year-month format of `[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH` when cast to interval year to month: -\t2-2\t"
+    "input" : "-\t2-2\t",
+    "intervalStr" : "year-month",
+    "supportedFormat" : "`[+|-]y-m`, `INTERVAL [+|-]'[+|-]y-m' YEAR TO MONTH`",
+    "typeName" : "interval year to month"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2377,9 +2252,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: \n-\t10\t 12:34:46.789\t, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "\n-\t10\t 12:34:46.789\t",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2467,12 +2346,8 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
-  "sqlState" : "22015",
-  "messageParameters" : {
-    "alternative" : "",
-    "message" : "integer overflow"
-  }
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITHOUT_SUGGESTION",
+  "sqlState" : "22015"
 }
 
 
@@ -2483,11 +2358,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_subtract' to tolerate overflow and return NULL instead.",
-    "message" : "integer overflow"
+    "functionName" : "`try_subtract`"
   }
 }
 
@@ -2499,11 +2373,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "message" : "integer overflow"
+    "functionName" : "`try_add`"
   }
 }
 
@@ -2812,11 +2685,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2835,11 +2707,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2866,7 +2737,7 @@ SELECT (INTERVAL '-178956970-8' YEAR TO MONTH) / -1.0D
 struct<>
 -- !query output
 java.lang.ArithmeticException
-not in range
+rounded value is out of range for input 2.147483648E9 and rounding mode HALF_UP
 
 
 -- !query
@@ -2892,11 +2763,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2915,11 +2785,10 @@ struct<>
 -- !query output
 org.apache.spark.SparkArithmeticException
 {
-  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW",
+  "errorClass" : "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
   "sqlState" : "22015",
   "messageParameters" : {
-    "alternative" : " Use 'try_divide' to tolerate overflow and return NULL instead.",
-    "message" : "Interval value overflows after being divided by -1"
+    "functionName" : "`try_divide`"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2946,7 +2815,7 @@ SELECT (INTERVAL '-106751991 04:00:54.775808' DAY TO SECOND) / -1.0D
 struct<>
 -- !query output
 java.lang.ArithmeticException
-not in range
+rounded value is out of range for input 9.223372036854776E18 and rounding mode HALF_UP
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
new file mode 100644
index 0000000000000..6500c47b4efc8
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/keywords.sql.out
@@ -0,0 +1,380 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT * from SQL_KEYWORDS()
+-- !query schema
+struct<keyword:string,reserved:boolean>
+-- !query output
+ADD	false
+AFTER	false
+AGGREGATE	false
+ALL	false
+ALTER	false
+ALWAYS	false
+ANALYZE	false
+AND	false
+ANTI	false
+ANY	false
+ANY_VALUE	false
+ARCHIVE	false
+ARRAY	false
+AS	false
+ASC	false
+AT	false
+AUTHORIZATION	false
+BEGIN	false
+BETWEEN	false
+BIGINT	false
+BINARY	false
+BINDING	false
+BOOLEAN	false
+BOTH	false
+BUCKET	false
+BUCKETS	false
+BY	false
+BYTE	false
+CACHE	false
+CALL	false
+CALLED	false
+CASCADE	false
+CASE	false
+CAST	false
+CATALOG	false
+CATALOGS	false
+CHANGE	false
+CHAR	false
+CHARACTER	false
+CHECK	false
+CLEAR	false
+CLUSTER	false
+CLUSTERED	false
+CODEGEN	false
+COLLATE	false
+COLLATION	false
+COLLECTION	false
+COLUMN	false
+COLUMNS	false
+COMMENT	false
+COMMIT	false
+COMPACT	false
+COMPACTIONS	false
+COMPENSATION	false
+COMPUTE	false
+CONCATENATE	false
+CONSTRAINT	false
+CONTAINS	false
+COST	false
+CREATE	false
+CROSS	false
+CUBE	false
+CURRENT	false
+CURRENT_DATE	false
+CURRENT_TIME	false
+CURRENT_TIMESTAMP	false
+CURRENT_USER	false
+DATA	false
+DATABASE	false
+DATABASES	false
+DATE	false
+DATEADD	false
+DATEDIFF	false
+DATE_ADD	false
+DATE_DIFF	false
+DAY	false
+DAYOFYEAR	false
+DAYS	false
+DBPROPERTIES	false
+DEC	false
+DECIMAL	false
+DECLARE	false
+DEFAULT	false
+DEFINED	false
+DEFINER	false
+DELETE	false
+DELIMITED	false
+DESC	false
+DESCRIBE	false
+DETERMINISTIC	false
+DFS	false
+DIRECTORIES	false
+DIRECTORY	false
+DISTINCT	false
+DISTRIBUTE	false
+DIV	false
+DO	false
+DOUBLE	false
+DROP	false
+ELSE	false
+END	false
+ESCAPE	false
+ESCAPED	false
+EVOLUTION	false
+EXCEPT	false
+EXCHANGE	false
+EXCLUDE	false
+EXECUTE	false
+EXISTS	false
+EXPLAIN	false
+EXPORT	false
+EXTEND	false
+EXTENDED	false
+EXTERNAL	false
+EXTRACT	false
+FALSE	false
+FETCH	false
+FIELDS	false
+FILEFORMAT	false
+FILTER	false
+FIRST	false
+FLOAT	false
+FOLLOWING	false
+FOR	false
+FOREIGN	false
+FORMAT	false
+FORMATTED	false
+FROM	false
+FULL	false
+FUNCTION	false
+FUNCTIONS	false
+GENERATED	false
+GLOBAL	false
+GRANT	false
+GROUP	false
+GROUPING	false
+HAVING	false
+HOUR	false
+HOURS	false
+IDENTIFIER	false
+IDENTITY	false
+IF	false
+IGNORE	false
+ILIKE	false
+IMMEDIATE	false
+IMPORT	false
+IN	false
+INCLUDE	false
+INCREMENT	false
+INDEX	false
+INDEXES	false
+INNER	false
+INPATH	false
+INPUT	false
+INPUTFORMAT	false
+INSERT	false
+INT	false
+INTEGER	false
+INTERSECT	false
+INTERVAL	false
+INTO	false
+INVOKER	false
+IS	false
+ITEMS	false
+ITERATE	false
+JOIN	false
+KEYS	false
+LANGUAGE	false
+LAST	false
+LATERAL	false
+LAZY	false
+LEADING	false
+LEAVE	false
+LEFT	false
+LIKE	false
+LIMIT	false
+LINES	false
+LIST	false
+LOAD	false
+LOCAL	false
+LOCATION	false
+LOCK	false
+LOCKS	false
+LOGICAL	false
+LONG	false
+LOOP	false
+MACRO	false
+MAP	false
+MATCHED	false
+MERGE	false
+MICROSECOND	false
+MICROSECONDS	false
+MILLISECOND	false
+MILLISECONDS	false
+MINUS	false
+MINUTE	false
+MINUTES	false
+MODIFIES	false
+MONTH	false
+MONTHS	false
+MSCK	false
+NAME	false
+NAMESPACE	false
+NAMESPACES	false
+NANOSECOND	false
+NANOSECONDS	false
+NATURAL	false
+NO	false
+NONE	false
+NOT	false
+NULL	false
+NULLS	false
+NUMERIC	false
+OF	false
+OFFSET	false
+ON	false
+ONLY	false
+OPTION	false
+OPTIONS	false
+OR	false
+ORDER	false
+OUT	false
+OUTER	false
+OUTPUTFORMAT	false
+OVER	false
+OVERLAPS	false
+OVERLAY	false
+OVERWRITE	false
+PARTITION	false
+PARTITIONED	false
+PARTITIONS	false
+PERCENT	false
+PIVOT	false
+PLACING	false
+POSITION	false
+PRECEDING	false
+PRIMARY	false
+PRINCIPALS	false
+PROPERTIES	false
+PURGE	false
+QUARTER	false
+QUERY	false
+RANGE	false
+READS	false
+REAL	false
+RECORDREADER	false
+RECORDWRITER	false
+RECOVER	false
+REDUCE	false
+REFERENCES	false
+REFRESH	false
+RENAME	false
+REPAIR	false
+REPEAT	false
+REPEATABLE	false
+REPLACE	false
+RESET	false
+RESPECT	false
+RESTRICT	false
+RETURN	false
+RETURNS	false
+REVOKE	false
+RIGHT	false
+ROLE	false
+ROLES	false
+ROLLBACK	false
+ROLLUP	false
+ROW	false
+ROWS	false
+SCHEMA	false
+SCHEMAS	false
+SECOND	false
+SECONDS	false
+SECURITY	false
+SELECT	false
+SEMI	false
+SEPARATED	false
+SERDE	false
+SERDEPROPERTIES	false
+SESSION_USER	false
+SET	false
+SETS	false
+SHORT	false
+SHOW	false
+SINGLE	false
+SKEWED	false
+SMALLINT	false
+SOME	false
+SORT	false
+SORTED	false
+SOURCE	false
+SPECIFIC	false
+SQL	false
+START	false
+STATISTICS	false
+STORED	false
+STRATIFY	false
+STRING	false
+STRUCT	false
+SUBSTR	false
+SUBSTRING	false
+SYNC	false
+SYSTEM_TIME	false
+SYSTEM_VERSION	false
+TABLE	false
+TABLES	false
+TABLESAMPLE	false
+TARGET	false
+TBLPROPERTIES	false
+TERMINATED	false
+THEN	false
+TIME	false
+TIMEDIFF	false
+TIMESTAMP	false
+TIMESTAMPADD	false
+TIMESTAMPDIFF	false
+TIMESTAMP_LTZ	false
+TIMESTAMP_NTZ	false
+TINYINT	false
+TO	false
+TOUCH	false
+TRAILING	false
+TRANSACTION	false
+TRANSACTIONS	false
+TRANSFORM	false
+TRIM	false
+TRUE	false
+TRUNCATE	false
+TRY_CAST	false
+TYPE	false
+UNARCHIVE	false
+UNBOUNDED	false
+UNCACHE	false
+UNION	false
+UNIQUE	false
+UNKNOWN	false
+UNLOCK	false
+UNPIVOT	false
+UNSET	false
+UNTIL	false
+UPDATE	false
+USE	false
+USER	false
+USING	false
+VALUES	false
+VAR	false
+VARCHAR	false
+VARIABLE	false
+VARIABLES	false
+VARIANT	false
+VERSION	false
+VIEW	false
+VIEWS	false
+VOID	false
+WEEK	false
+WEEKS	false
+WHEN	false
+WHERE	false
+WHILE	false
+WINDOW	false
+WITH	false
+WITHIN	false
+X	false
+YEAR	false
+YEARS	false
+ZONE	false
+
+
+-- !query
+SELECT keyword from SQL_KEYWORDS() WHERE reserved
+-- !query schema
+struct<keyword:string>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/literals.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/literals.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/literals.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/map.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/map.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/map.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/nonansi/math.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/math.sql.out
new file mode 100644
index 0000000000000..09f4383933288
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/math.sql.out
@@ -0,0 +1,583 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT round(25y, 1)
+-- !query schema
+struct<round(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, 0)
+-- !query schema
+struct<round(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT round(25y, -1)
+-- !query schema
+struct<round(25, -1):tinyint>
+-- !query output
+30
+
+
+-- !query
+SELECT round(25y, -2)
+-- !query schema
+struct<round(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(25y, -3)
+-- !query schema
+struct<round(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT round(127y, -1)
+-- !query schema
+struct<round(127, -1):tinyint>
+-- !query output
+-126
+
+
+-- !query
+SELECT round(-128y, -1)
+-- !query schema
+struct<round(-128, -1):tinyint>
+-- !query output
+126
+
+
+-- !query
+SELECT round(525s, 1)
+-- !query schema
+struct<round(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, 0)
+-- !query schema
+struct<round(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525s, -1)
+-- !query schema
+struct<round(525, -1):smallint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525s, -2)
+-- !query schema
+struct<round(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525s, -3)
+-- !query schema
+struct<round(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(32767s, -1)
+-- !query schema
+struct<round(32767, -1):smallint>
+-- !query output
+-32766
+
+
+-- !query
+SELECT round(-32768s, -1)
+-- !query schema
+struct<round(-32768, -1):smallint>
+-- !query output
+32766
+
+
+-- !query
+SELECT round(525, 1)
+-- !query schema
+struct<round(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, 0)
+-- !query schema
+struct<round(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525, -1)
+-- !query schema
+struct<round(525, -1):int>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525, -2)
+-- !query schema
+struct<round(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525, -3)
+-- !query schema
+struct<round(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(2147483647, -1)
+-- !query schema
+struct<round(2147483647, -1):int>
+-- !query output
+-2147483646
+
+
+-- !query
+SELECT round(-2147483647, -1)
+-- !query schema
+struct<round(-2147483647, -1):int>
+-- !query output
+2147483646
+
+
+-- !query
+SELECT round(525L, 1)
+-- !query schema
+struct<round(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, 0)
+-- !query schema
+struct<round(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT round(525L, -1)
+-- !query schema
+struct<round(525, -1):bigint>
+-- !query output
+530
+
+
+-- !query
+SELECT round(525L, -2)
+-- !query schema
+struct<round(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT round(525L, -3)
+-- !query schema
+struct<round(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT round(9223372036854775807L, -1)
+-- !query schema
+struct<round(9223372036854775807, -1):bigint>
+-- !query output
+-9223372036854775806
+
+
+-- !query
+SELECT round(-9223372036854775808L, -1)
+-- !query schema
+struct<round(-9223372036854775808, -1):bigint>
+-- !query output
+9223372036854775806
+
+
+-- !query
+SELECT bround(25y, 1)
+-- !query schema
+struct<bround(25, 1):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, 0)
+-- !query schema
+struct<bround(25, 0):tinyint>
+-- !query output
+25
+
+
+-- !query
+SELECT bround(25y, -1)
+-- !query schema
+struct<bround(25, -1):tinyint>
+-- !query output
+20
+
+
+-- !query
+SELECT bround(25y, -2)
+-- !query schema
+struct<bround(25, -2):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(25y, -3)
+-- !query schema
+struct<bround(25, -3):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT bround(127y, -1)
+-- !query schema
+struct<bround(127, -1):tinyint>
+-- !query output
+-126
+
+
+-- !query
+SELECT bround(-128y, -1)
+-- !query schema
+struct<bround(-128, -1):tinyint>
+-- !query output
+126
+
+
+-- !query
+SELECT bround(525s, 1)
+-- !query schema
+struct<bround(525, 1):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, 0)
+-- !query schema
+struct<bround(525, 0):smallint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525s, -1)
+-- !query schema
+struct<bround(525, -1):smallint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525s, -2)
+-- !query schema
+struct<bround(525, -2):smallint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525s, -3)
+-- !query schema
+struct<bround(525, -3):smallint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(32767s, -1)
+-- !query schema
+struct<bround(32767, -1):smallint>
+-- !query output
+-32766
+
+
+-- !query
+SELECT bround(-32768s, -1)
+-- !query schema
+struct<bround(-32768, -1):smallint>
+-- !query output
+32766
+
+
+-- !query
+SELECT bround(525, 1)
+-- !query schema
+struct<bround(525, 1):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, 0)
+-- !query schema
+struct<bround(525, 0):int>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525, -1)
+-- !query schema
+struct<bround(525, -1):int>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525, -2)
+-- !query schema
+struct<bround(525, -2):int>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525, -3)
+-- !query schema
+struct<bround(525, -3):int>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(2147483647, -1)
+-- !query schema
+struct<bround(2147483647, -1):int>
+-- !query output
+-2147483646
+
+
+-- !query
+SELECT bround(-2147483647, -1)
+-- !query schema
+struct<bround(-2147483647, -1):int>
+-- !query output
+2147483646
+
+
+-- !query
+SELECT bround(525L, 1)
+-- !query schema
+struct<bround(525, 1):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, 0)
+-- !query schema
+struct<bround(525, 0):bigint>
+-- !query output
+525
+
+
+-- !query
+SELECT bround(525L, -1)
+-- !query schema
+struct<bround(525, -1):bigint>
+-- !query output
+520
+
+
+-- !query
+SELECT bround(525L, -2)
+-- !query schema
+struct<bround(525, -2):bigint>
+-- !query output
+500
+
+
+-- !query
+SELECT bround(525L, -3)
+-- !query schema
+struct<bround(525, -3):bigint>
+-- !query output
+1000
+
+
+-- !query
+SELECT bround(9223372036854775807L, -1)
+-- !query schema
+struct<bround(9223372036854775807, -1):bigint>
+-- !query output
+-9223372036854775806
+
+
+-- !query
+SELECT bround(-9223372036854775808L, -1)
+-- !query schema
+struct<bround(-9223372036854775808, -1):bigint>
+-- !query output
+9223372036854775806
+
+
+-- !query
+SELECT conv('100', 2, 10)
+-- !query schema
+struct<conv(100, 2, 10):string>
+-- !query output
+4
+
+
+-- !query
+SELECT conv(-10, 16, -10)
+-- !query schema
+struct<conv(-10, 16, -10):string>
+-- !query output
+-16
+
+
+-- !query
+SELECT conv('9223372036854775808', 10, 16)
+-- !query schema
+struct<conv(9223372036854775808, 10, 16):string>
+-- !query output
+8000000000000000
+
+
+-- !query
+SELECT conv('92233720368547758070', 10, 16)
+-- !query schema
+struct<conv(92233720368547758070, 10, 16):string>
+-- !query output
+FFFFFFFFFFFFFFFF
+
+
+-- !query
+SELECT conv('9223372036854775807', 36, 10)
+-- !query schema
+struct<conv(9223372036854775807, 36, 10):string>
+-- !query output
+18446744073709551615
+
+
+-- !query
+SELECT conv('-9223372036854775807', 36, 10)
+-- !query schema
+struct<conv(-9223372036854775807, 36, 10):string>
+-- !query output
+18446744073709551615
+
+
+-- !query
+SELECT BIN(0)
+-- !query schema
+struct<bin(0):string>
+-- !query output
+0
+
+
+-- !query
+SELECT BIN(25)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25L)
+-- !query schema
+struct<bin(25):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT BIN(25.5)
+-- !query schema
+struct<bin(25.5):string>
+-- !query output
+11001
+
+
+-- !query
+SELECT POSITIVE(0Y)
+-- !query schema
+struct<(+ 0):tinyint>
+-- !query output
+0
+
+
+-- !query
+SELECT POSITIVE(25)
+-- !query schema
+struct<(+ 25):int>
+-- !query output
+25
+
+
+-- !query
+SELECT POSITIVE(-25L)
+-- !query schema
+struct<(+ -25):bigint>
+-- !query output
+-25
+
+
+-- !query
+SELECT POSITIVE(25.5)
+-- !query schema
+struct<(+ 25.5):decimal(3,1)>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("25.5")
+-- !query schema
+struct<(+ 25.5):double>
+-- !query output
+25.5
+
+
+-- !query
+SELECT POSITIVE("invalid")
+-- !query schema
+struct<(+ invalid):double>
+-- !query output
+NULL
+
+
+-- !query
+SELECT POSITIVE(null)
+-- !query schema
+struct<(+ NULL):double>
+-- !query output
+NULL
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/parse-schema-string.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/parse-schema-string.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/parse-schema-string.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out
similarity index 94%
rename from sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out
index d4adec22c50f1..3f9f24f817f2c 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/string-functions.sql.out
@@ -94,26 +94,9 @@ NULL
 -- !query
 select left("abcd", -2), left("abcd", 0), left("abcd", 'a')
 -- !query schema
-struct<>
+struct<left(abcd, -2):string,left(abcd, 0):string,left(abcd, a):string>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 43,
-    "stopIndex" : 59,
-    "fragment" : "left(\"abcd\", 'a')"
-  } ]
-}
+		NULL
 
 
 -- !query
@@ -135,26 +118,9 @@ NULL
 -- !query
 select right("abcd", -2), right("abcd", 0), right("abcd", 'a')
 -- !query schema
-struct<>
+struct<right(abcd, -2):string,right(abcd, 0):string,right(abcd, a):string>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'a'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 45,
-    "stopIndex" : 62,
-    "fragment" : "right(\"abcd\", 'a')"
-  } ]
-}
+		NULL
 
 
 -- !query
@@ -500,51 +466,17 @@ bar
 -- !query
 SELECT lpad('hi', 'invalid_length')
 -- !query schema
-struct<>
+struct<lpad(hi, invalid_length,  ):string>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'invalid_length'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "lpad('hi', 'invalid_length')"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT rpad('hi', 'invalid_length')
 -- !query schema
-struct<>
+struct<rpad(hi, invalid_length,  ):string>
 -- !query output
-org.apache.spark.SparkNumberFormatException
-{
-  "errorClass" : "CAST_INVALID_INPUT",
-  "sqlState" : "22018",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "expression" : "'invalid_length'",
-    "sourceType" : "\"STRING\"",
-    "targetType" : "\"INT\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 35,
-    "fragment" : "rpad('hi', 'invalid_length')"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -846,7 +778,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -864,7 +796,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -882,7 +814,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -900,7 +832,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -1144,7 +1076,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1162,7 +1094,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1212,7 +1144,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1230,7 +1162,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/timestamp.sql.out
similarity index 89%
rename from sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/timestamp.sql.out
index d7a58e321b0f0..0e0b014a3b161 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/timestamp.sql.out
@@ -119,16 +119,9 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query schema
-struct<>
+struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "INVALID_FRACTION_OF_SECOND",
-  "sqlState" : "22023",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\""
-  }
-}
+NULL
 
 
 -- !query
@@ -150,16 +143,9 @@ struct<make_timestamp(1, 1, 1, 1, 1, 60):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query schema
-struct<>
+struct<make_timestamp(1, 1, 1, 1, 1, 61):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
-  }
-}
+NULL
 
 
 -- !query
@@ -181,31 +167,17 @@ struct<make_timestamp(1, 1, 1, 1, 1, 59.999999):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query schema
-struct<>
+struct<make_timestamp(1, 1, 1, 1, 1, 99.999999):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
-  }
-}
+NULL
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query schema
-struct<>
+struct<make_timestamp(1, 1, 1, 1, 1, 999.999999):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
-  }
-}
+NULL
 
 
 -- !query
@@ -385,17 +357,9 @@ struct<to_timestamp(1):timestamp>
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<>
+struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
-  }
-}
+NULL
 
 
 -- !query
@@ -457,17 +421,9 @@ struct<to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zz
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<>
+struct<to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
-  }
-}
+NULL
 
 
 -- !query
@@ -481,17 +437,9 @@ struct<to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:m
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<>
+struct<to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
-  }
-}
+NULL
 
 
 -- !query
@@ -553,33 +501,17 @@ struct<to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestam
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<>
+struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
-  }
-}
+NULL
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<>
+struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
-  }
-}
+NULL
 
 
 -- !query
@@ -649,17 +581,9 @@ struct<to_timestamp(16, dd):timestamp>
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query schema
-struct<>
+struct<to_timestamp(02-29, MM-dd):timestamp>
 -- !query output
-org.apache.spark.SparkDateTimeException
-{
-  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
-  "sqlState" : "22007",
-  "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
-  }
-}
+NULL
 
 
 -- !query
@@ -705,17 +629,53 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query schema
-struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
+struct<>
 -- !query output
-0 00:00:01.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:10\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query schema
-struct<(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10'):interval day to second>
+struct<>
 -- !query output
-0 00:00:01.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"2011-11-11 11:11:11\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 61,
+    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
+  } ]
+}
 
 
 -- !query
@@ -745,17 +705,53 @@ struct<>
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query schema
-struct<(str - TIMESTAMP '2011-11-11 11:11:11'):interval day to second>
+struct<>
 -- !query output
-0 00:00:00.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "first",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
+  } ]
+}
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query schema
-struct<(TIMESTAMP '2011-11-11 11:11:11' - str):interval day to second>
+struct<>
 -- !query output
-0 00:00:00.000000000
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"str\"",
+    "inputType" : "\"STRING\"",
+    "paramIndex" : "second",
+    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
+    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 43,
+    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
+  } ]
+}
 
 
 -- !query
@@ -765,11 +761,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "actualDataType" : "\"TIMESTAMP\"",
-    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "left" : "\"TIMESTAMP\"",
+    "right" : "\"DOUBLE\"",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -789,11 +785,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "actualDataType" : "\"TIMESTAMP\"",
-    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "left" : "\"DOUBLE\"",
+    "right" : "\"TIMESTAMP\"",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out
similarity index 67%
rename from sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out
index 94048ac8897bb..df1fe996781ad 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out
@@ -82,91 +82,33 @@ NULL
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):decimal(18,6)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col + 1)):bigint>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 23,
-    "fragment" : "col + 1L"
-  } ]
-}
+-9223372036854775806
 
 
 -- !query
@@ -290,91 +232,33 @@ NULL
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):decimal(12,10)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col + 1)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 23,
-    "fragment" : "col + 1L"
-  } ]
-}
+-4.6116860184273879E18
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out.java21
similarity index 67%
rename from sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out.java21
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out.java21
index 9d3c97baecabd..7affe568234f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_aggregates.sql.out.java21
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/try_aggregates.sql.out.java21
@@ -82,91 +82,33 @@ NULL
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):decimal(18,6)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<>
+struct<try_sum((col + 1)):bigint>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 23,
-    "fragment" : "col + 1L"
-  } ]
-}
+-9223372036854775806
 
 
 -- !query
@@ -290,91 +232,33 @@ NULL
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):decimal(12,10)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col / 0)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 22,
-    "fragment" : "col / 0"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<>
+struct<try_avg((col + 1)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 16,
-    "stopIndex" : 23,
-    "fragment" : "col + 1L"
-  } ]
-}
+-4.611686018427388E18
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/try_arithmetic.sql.out
similarity index 66%
rename from sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_arithmetic.sql.out
index acf6e70a50dea..b12680c2a6751 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/nonansi/try_arithmetic.sql.out
@@ -26,9 +26,9 @@ struct<try_add(2147483647, 1):decimal(11,0)>
 -- !query
 SELECT try_add(2147483647, "1")
 -- !query schema
-struct<try_add(2147483647, 1):bigint>
+struct<try_add(2147483647, 1):double>
 -- !query output
-2147483648
+2.147483648E9
 
 
 -- !query
@@ -58,71 +58,25 @@ NULL
 -- !query
 SELECT try_add(1, (2147483647 + 1))
 -- !query schema
-struct<>
+struct<try_add(1, (2147483647 + 1)):int>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "integer overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 20,
-    "stopIndex" : 33,
-    "fragment" : "2147483647 + 1"
-  } ]
-}
+-2147483647
 
 
 -- !query
 SELECT try_add(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<>
+struct<try_add(1, (9223372036854775807 + 1)):bigint>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 21,
-    "stopIndex" : 45,
-    "fragment" : "9223372036854775807L + 1L"
-  } ]
-}
+-9223372036854775807
 
 
 -- !query
 SELECT try_add(1, 1.0 / 0.0)
 -- !query schema
-struct<>
+struct<try_add(1, (1.0 / 0.0)):decimal(9,6)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 19,
-    "stopIndex" : 27,
-    "fragment" : "1.0 / 0.0"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -290,71 +244,25 @@ NULL
 -- !query
 SELECT try_divide(1, (2147483647 + 1))
 -- !query schema
-struct<>
+struct<try_divide(1, (2147483647 + 1)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "integer overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 23,
-    "stopIndex" : 36,
-    "fragment" : "2147483647 + 1"
-  } ]
-}
+-4.6566128730773926E-10
 
 
 -- !query
 SELECT try_divide(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<>
+struct<try_divide(1, (9223372036854775807 + 1)):double>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 48,
-    "fragment" : "9223372036854775807L + 1L"
-  } ]
-}
+-1.0842021724855044E-19
 
 
 -- !query
 SELECT try_divide(1, 1.0 / 0.0)
 -- !query schema
-struct<>
+struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 22,
-    "stopIndex" : 30,
-    "fragment" : "1.0 / 0.0"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -448,9 +356,9 @@ struct<try_subtract(2147483647, -1):decimal(11,0)>
 -- !query
 SELECT try_subtract(2147483647, "-1")
 -- !query schema
-struct<try_subtract(2147483647, -1):bigint>
+struct<try_subtract(2147483647, -1):double>
 -- !query output
-2147483648
+2.147483648E9
 
 
 -- !query
@@ -480,71 +388,25 @@ NULL
 -- !query
 SELECT try_subtract(1, (2147483647 + 1))
 -- !query schema
-struct<>
+struct<try_subtract(1, (2147483647 + 1)):int>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "integer overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 25,
-    "stopIndex" : 38,
-    "fragment" : "2147483647 + 1"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_subtract(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<>
+struct<try_subtract(1, (9223372036854775807 + 1)):bigint>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 50,
-    "fragment" : "9223372036854775807L + 1L"
-  } ]
-}
+NULL
 
 
 -- !query
 SELECT try_subtract(1, 1.0 / 0.0)
 -- !query schema
-struct<>
+struct<try_subtract(1, (1.0 / 0.0)):decimal(9,6)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 32,
-    "fragment" : "1.0 / 0.0"
-  } ]
-}
+NULL
 
 
 -- !query
@@ -606,9 +468,9 @@ struct<try_multiply(2147483647, -2):decimal(21,0)>
 -- !query
 SELECT try_multiply(2147483647, "-2")
 -- !query schema
-struct<try_multiply(2147483647, -2):bigint>
+struct<try_multiply(2147483647, -2):double>
 -- !query output
--4294967294
+-4.294967294E9
 
 
 -- !query
@@ -638,71 +500,25 @@ NULL
 -- !query
 SELECT try_multiply(1, (2147483647 + 1))
 -- !query schema
-struct<>
+struct<try_multiply(1, (2147483647 + 1)):int>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "integer overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 25,
-    "stopIndex" : 38,
-    "fragment" : "2147483647 + 1"
-  } ]
-}
+-2147483648
 
 
 -- !query
 SELECT try_multiply(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<>
+struct<try_multiply(1, (9223372036854775807 + 1)):bigint>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "ARITHMETIC_OVERFLOW",
-  "sqlState" : "22003",
-  "messageParameters" : {
-    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
-    "config" : "\"spark.sql.ansi.enabled\"",
-    "message" : "long overflow"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 26,
-    "stopIndex" : 50,
-    "fragment" : "9223372036854775807L + 1L"
-  } ]
-}
+-9223372036854775808
 
 
 -- !query
 SELECT try_multiply(1, 1.0 / 0.0)
 -- !query schema
-struct<>
+struct<try_multiply(1, (1.0 / 0.0)):decimal(10,6)>
 -- !query output
-org.apache.spark.SparkArithmeticException
-{
-  "errorClass" : "DIVIDE_BY_ZERO",
-  "sqlState" : "22012",
-  "messageParameters" : {
-    "config" : "\"spark.sql.ansi.enabled\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 32,
-    "fragment" : "1.0 / 0.0"
-  } ]
-}
+NULL
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/nonansi/try_arithmetic.sql.out.java21
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out.java21
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_arithmetic.sql.out.java21
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_datetime_functions.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/try_datetime_functions.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/try_datetime_functions.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_datetime_functions.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out b/sql/core/src/test/resources/sql-tests/results/nonansi/try_element_at.sql.out
similarity index 100%
rename from sql/core/src/test/resources/sql-tests/results/ansi/try_element_at.sql.out
rename to sql/core/src/test/resources/sql-tests/results/nonansi/try_element_at.sql.out
diff --git a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
index ece6dbef1605d..fb96be8317a5b 100644
--- a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out
@@ -77,6 +77,22 @@ struct<a:int,(b + c):int>
 7	NULL
 
 
+-- !query
+select b + 0 from t1 where a = 5
+-- !query schema
+struct<(b + 0):int>
+-- !query output
+NULL
+
+
+-- !query
+select -100 + b + 100 from t1 where a = 5
+-- !query schema
+struct<((-100 + b) + 100):int>
+-- !query output
+NULL
+
+
 -- !query
 select a+10, b*0 from t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
index 93ac6b49b0168..356e5eca5feb2 100644
--- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out
@@ -130,9 +130,23 @@ struct<(5 / 2):double>
 -- !query
 select 5 / 0
 -- !query schema
-struct<(5 / 0):double>
--- !query output
-NULL
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 12,
+    "fragment" : "5 / 0"
+  } ]
+}
 
 
 -- !query
@@ -162,9 +176,23 @@ struct<(5 div 2):bigint>
 -- !query
 select 5 div 0
 -- !query schema
-struct<(5 div 0):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 14,
+    "fragment" : "5 div 0"
+  } ]
+}
 
 
 -- !query
@@ -194,9 +222,23 @@ struct<(CAST(51 AS DECIMAL(10,0)) div CAST(2 AS DECIMAL(2,0))):bigint>
 -- !query
 select cast(5 as decimal(1, 0)) div cast(0 as decimal(2, 0))
 -- !query schema
-struct<(CAST(5 AS DECIMAL(1,0)) div CAST(0 AS DECIMAL(2,0))):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 60,
+    "fragment" : "cast(5 as decimal(1, 0)) div cast(0 as decimal(2, 0))"
+  } ]
+}
 
 
 -- !query
@@ -450,9 +492,23 @@ true
 -- !query
 select mod(7, 2), mod(7, 0), mod(0, 2), mod(7, null), mod(null, 2), mod(null, null)
 -- !query schema
-struct<mod(7, 2):int,mod(7, 0):int,mod(0, 2):int,mod(7, NULL):int,mod(NULL, 2):int,mod(NULL, NULL):double>
+struct<>
 -- !query output
-1	NULL	0	NULL	NULL	NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 27,
+    "fragment" : "mod(7, 0)"
+  } ]
+}
 
 
 -- !query
@@ -506,17 +562,45 @@ struct<(+ -1.11):double,(+ -1.11):decimal(3,2),negative(-1.11):double,negative(-
 -- !query
 select pmod(-7, 2), pmod(0, 2), pmod(7, 0), pmod(7, null), pmod(null, 2), pmod(null, null)
 -- !query schema
-struct<pmod(-7, 2):int,pmod(0, 2):int,pmod(7, 0):int,pmod(7, NULL):int,pmod(NULL, 2):int,pmod(NULL, NULL):double>
+struct<>
 -- !query output
-1	0	NULL	NULL	NULL	NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 33,
+    "stopIndex" : 42,
+    "fragment" : "pmod(7, 0)"
+  } ]
+}
 
 
 -- !query
 select pmod(cast(3.13 as decimal), cast(0 as decimal)), pmod(cast(2 as smallint), cast(0 as smallint))
 -- !query schema
-struct<pmod(CAST(3.13 AS DECIMAL(10,0)), CAST(0 AS DECIMAL(10,0))):decimal(10,0),pmod(CAST(2 AS SMALLINT), CAST(0 AS SMALLINT)):smallint>
--- !query output
-NULL	NULL
+struct<>
+-- !query output
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "pmod(cast(3.13 as decimal), cast(0 as decimal))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
new file mode 100644
index 0000000000000..85a411f60fe22
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out
@@ -0,0 +1,3065 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+drop table if exists t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table t(x int, y string) using csv
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into t values (0, 'abc'), (1, 'def')
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table if exists other
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table other(a int, b int) using json
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into other values (1, 1), (1, 2), (2, 4)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table if exists st
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create table st(x int, col struct<i1:int, i2:int>) using parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+insert into st values (1, (2, 3))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view courseSales as select * from values
+  ("dotNET", 2012, 10000),
+  ("Java", 2012, 20000),
+  ("dotNET", 2012, 5000),
+  ("dotNET", 2013, 48000),
+  ("Java", 2013, 30000)
+  as courseSales(course, year, earnings)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view courseEarnings as select * from values
+  ("dotNET", 15000, 48000, 22500),
+  ("Java", 20000, 30000, NULL)
+  as courseEarnings(course, `2012`, `2013`, `2014`)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view courseEarningsAndSales as select * from values
+  ("dotNET", 15000, NULL, 48000, 1, 22500, 1),
+  ("Java", 20000, 1, 30000, 2, NULL, NULL)
+  as courseEarningsAndSales(
+    course, earnings2012, sales2012, earnings2013, sales2013, earnings2014, sales2014)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view yearsWithComplexTypes as select * from values
+  (2012, array(1, 1), map('1', 1), struct(1, 'a')),
+  (2013, array(2, 2), map('2', 2), struct(2, 'b'))
+  as yearsWithComplexTypes(y, a, m, s)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view join_test_t1 as select * from values (1) as grouping(a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view join_test_t2 as select * from values (1) as grouping(a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view join_test_t3 as select * from values (1) as grouping(a)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view join_test_empty_table as select a from join_test_t2 where false
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view lateral_test_t1(c1, c2)
+  as values (0, 1), (1, 2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view lateral_test_t2(c1, c2)
+  as values (0, 2), (0, 3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view lateral_test_t3(c1, c2)
+  as values (0, array(0, 1)), (1, array(2)), (2, array()), (null, array(4))
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view lateral_test_t4(c1, c2)
+  as values (0, 1), (0, 2), (1, 1), (1, 3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view natural_join_test_t1 as select * from values
+  ("one", 1), ("two", 2), ("three", 3) as natural_join_test_t1(k, v1)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view natural_join_test_t2 as select * from values
+  ("one", 1), ("two", 22), ("one", 5) as natural_join_test_t2(k, v2)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view natural_join_test_t3 as select * from values
+  ("one", 4), ("two", 5), ("one", 6) as natural_join_test_t3(k, v3)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+create temporary view windowTestData as select * from values
+  (null, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "a"),
+  (1, 2L, 2.5D, date("2017-08-02"), timestamp_seconds(1502000000), "a"),
+  (2, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "a"),
+  (1, null, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), "b"),
+  (2, 3L, 3.3D, date("2017-08-03"), timestamp_seconds(1503000000), "b"),
+  (3, 2147483650L, 100.001D, date("2020-12-31"), timestamp_seconds(1609372800), "b"),
+  (null, null, null, null, null, null),
+  (3, 1L, 1.0D, date("2017-08-01"), timestamp_seconds(1501545600), null)
+  AS testData(val, val_long, val_double, val_date, val_timestamp, cate)
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+table t
+|> select 1 as x
+-- !query schema
+struct<x:int>
+-- !query output
+1
+1
+
+
+-- !query
+table t
+|> select x, y
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> select x, y
+|> select x + length(y) as z
+-- !query schema
+struct<z:int>
+-- !query output
+3
+4
+
+
+-- !query
+values (0), (1) tab(col)
+|> select col * 2 as result
+-- !query schema
+struct<result:int>
+-- !query output
+0
+2
+
+
+-- !query
+(select * from t union all select * from t)
+|> select x + length(y) as result
+-- !query schema
+struct<result:int>
+-- !query output
+3
+3
+4
+4
+
+
+-- !query
+(table t
+ |> select x, y
+ |> select x)
+union all
+select x from t where x < 1
+-- !query schema
+struct<x:int>
+-- !query output
+0
+0
+1
+
+
+-- !query
+(select col from st)
+|> select col.i1
+-- !query schema
+struct<i1:int>
+-- !query output
+2
+
+
+-- !query
+table st
+|> select st.col.i1
+-- !query schema
+struct<i1:int>
+-- !query output
+2
+
+
+-- !query
+table t
+|> select (select a from other where x = a limit 1) as result
+-- !query schema
+struct<result:int>
+-- !query output
+1
+NULL
+
+
+-- !query
+select (values (0) tab(col) |> select col) as result
+-- !query schema
+struct<result:int>
+-- !query output
+0
+
+
+-- !query
+table t
+|> select (select any_value(a) from other where x = a limit 1) as result
+-- !query schema
+struct<result:int>
+-- !query output
+1
+NULL
+
+
+-- !query
+table t
+|> select x + length(x) as z, z + 1 as plus_one
+-- !query schema
+struct<z:int,plus_one:int>
+-- !query output
+1	2
+2	3
+
+
+-- !query
+table t
+|> select first_value(x) over (partition by y) as result
+-- !query schema
+struct<result:int>
+-- !query output
+0
+1
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> select 1 + sum(x) over (),
+     avg(y) over (),
+     x,
+     avg(x+1) over (partition by y order by z) AS a2
+|> select a2
+-- !query schema
+struct<a2:double>
+-- !query output
+2.0
+
+
+-- !query
+table t
+|> select x, count(*) over ()
+|> select x
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+table t
+|> select distinct x, y
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> select *
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> select * except (y)
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ *
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ distinct x
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+table t
+|> select /*+ repartition(3) */ all x
+-- !query schema
+struct<x:int>
+-- !query output
+0
+1
+
+
+-- !query
+table t
+|> select sum(x) as result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> select y, length(y) + sum(x) as result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 34,
+    "stopIndex" : 39,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend 1 as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	1
+1	def	1
+
+
+-- !query
+table t
+|> extend 1
+-- !query schema
+struct<x:int,y:string,pipeexpression(1):int>
+-- !query output
+0	abc	1
+1	def	1
+
+
+-- !query
+table t
+|> extend x as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	0
+1	def	1
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	3
+1	def	4
+
+
+-- !query
+table t
+|> extend x + length(y) as z, x + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	1
+1	def	4	2
+
+
+-- !query
+table t
+|> extend x + length(y) as z
+|> extend z + 1 as zz
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+select col from st
+|> extend col.i1 as z
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3}	2
+
+
+-- !query
+table t
+|> extend (select a from other where x = a limit 1) as z
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	NULL
+1	def	1
+
+
+-- !query
+table t
+|> where exists (
+    table other
+    |> extend t.x
+    |> select * except (a, b))
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> extend 1 as x
+-- !query schema
+struct<x:int,y:string,x:int>
+-- !query output
+0	abc	1
+1	def	1
+
+
+-- !query
+table t
+|> extend first_value(x) over (partition by y) as result
+-- !query schema
+struct<x:int,y:string,result:int>
+-- !query output
+0	abc	0
+1	def	1
+
+
+-- !query
+table t
+|> extend x + length(y) as z, z + 1 as plus_one
+-- !query schema
+struct<x:int,y:string,z:int,plus_one:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+table t
+|> extend sum(x) as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "EXTEND",
+    "expr" : "sum(x#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 24,
+    "fragment" : "sum(x)"
+  } ]
+}
+
+
+-- !query
+table t
+|> extend distinct x as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> extend *
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "INVALID_USAGE_OF_STAR_OR_REGEX",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "elem" : "'*'",
+    "prettyName" : "expression `pipeexpression`"
+  }
+}
+
+
+-- !query
+table t
+|> set x = 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	abc
+1	def
+
+
+-- !query
+table t
+|> set y = x
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+0	0
+1	1
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	3
+1	def	4
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = x + 1
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	1
+1	def	4	2
+
+
+-- !query
+table other
+|> extend 3 as c
+|> set a = b, b = c
+-- !query schema
+struct<a:int,b:int,c:int>
+-- !query output
+1	3	3
+2	3	3
+4	3	3
+
+
+-- !query
+table t
+|> extend 1 as z
+|> extend 2 as zz
+|> set z = x + length(y), zz = z + 1
+-- !query schema
+struct<x:int,y:string,z:int,zz:int>
+-- !query output
+0	abc	3	4
+1	def	4	5
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y)
+|> set z = z + 1
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	4
+1	def	5
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = x + length(y), z = z + 1
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	4
+1	def	5
+
+
+-- !query
+select col from st
+|> extend 1 as z
+|> set z = col.i1
+-- !query schema
+struct<col:struct<i1:int,i2:int>,z:int>
+-- !query output
+{"i1":2,"i2":3}	2
+
+
+-- !query
+table t
+|> set y = (select a from other where x = a limit 1)
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+0	NULL
+1	1
+
+
+-- !query
+table t
+|> extend 1 as `x.y.z`
+|> set `x.y.z` = x + length(y)
+-- !query schema
+struct<x:int,y:string,x.y.z:int>
+-- !query output
+0	abc	3
+1	def	4
+
+
+-- !query
+table t
+|> extend 1 as z
+|> set z = first_value(x) over (partition by y)
+-- !query schema
+struct<x:int,y:string,z:int>
+-- !query output
+0	abc	0
+1	def	1
+
+
+-- !query
+table t
+|> set z = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`z`",
+    "proposal" : "`x`, `y`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 20,
+    "fragment" : "table t\n|> set z = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> set x = 1 as z
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'as'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+select col from st
+|> set col.i1 = 42
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "SQL pipe syntax |> SET operator with multi-part assignment key (only single-part keys are allowed)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 27,
+    "stopIndex" : 37,
+    "fragment" : "col.i1 = 42"
+  } ]
+}
+
+
+-- !query
+table t
+|> where true
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> where x + length(y) < 4
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+
+
+-- !query
+table t
+|> where x + length(y) < 4
+|> where x + length(y) < 3
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+
+
+
+-- !query
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where x = 1
+-- !query schema
+struct<x:int,sum_len:bigint>
+-- !query output
+1	3
+
+
+-- !query
+table t
+|> where t.x = 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	def
+
+
+-- !query
+table t
+|> where spark_catalog.default.t.x = 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	def
+
+
+-- !query
+(select col from st)
+|> where col.i1 = 1
+-- !query schema
+struct<col:struct<i1:int,i2:int>>
+-- !query output
+
+
+
+-- !query
+table st
+|> where st.col.i1 = 2
+-- !query schema
+struct<x:int,col:struct<i1:int,i2:int>>
+-- !query output
+1	{"i1":2,"i2":3}
+
+
+-- !query
+table t
+|> where exists (select a from other where x = a limit 1)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	def
+
+
+-- !query
+table t
+|> where (select any_value(a) from other where x = a limit 1) = 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+1	def
+
+
+-- !query
+table t
+|> where sum(x) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"(sum(x) = 1)\"",
+    "expressionList" : "sum(spark_catalog.default.t.x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 27,
+    "fragment" : "table t\n|> where sum(x) = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> where y = 'abc' or length(y) + sum(x) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_WHERE_CONDITION",
+  "sqlState" : "42903",
+  "messageParameters" : {
+    "condition" : "\"((y = abc) OR ((length(y) + sum(x)) = 1))\"",
+    "expressionList" : "sum(spark_catalog.default.t.x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 52,
+    "fragment" : "table t\n|> where y = 'abc' or length(y) + sum(x) = 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> where sum(x) over (partition by y) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
+
+
+-- !query
+table t
+|> where sum(x) over w = 1
+   window w as (partition by y)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_PIPE_OPERATOR_WHERE.WINDOW_CLAUSE",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 66,
+    "fragment" : "table t\n|> where sum(x) over w = 1\n   window w as (partition by y)"
+  } ]
+}
+
+
+-- !query
+select * from t where sum(x) over (partition by y) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_1034",
+  "messageParameters" : {
+    "clauseName" : "WHERE"
+  }
+}
+
+
+-- !query
+table t
+|> select x, length(y) as z
+|> where x + length(y) < 4
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`x`, `z`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 57,
+    "stopIndex" : 57,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+(select x, sum(length(y)) as sum_len from t group by x)
+|> where sum(length(y)) = 3
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`y`",
+    "proposal" : "`x`, `sum_len`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 77,
+    "stopIndex" : 77,
+    "fragment" : "y"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select `year`, course, earnings
+|> pivot (
+     sum(earnings)
+     for course in ('dotNET', 'Java')
+  )
+-- !query schema
+struct<year:int,dotNET:bigint,Java:bigint>
+-- !query output
+2012	15000	20000
+2013	48000	30000
+
+
+-- !query
+table courseSales
+|> select `year` as y, course as c, earnings as e
+|> pivot (
+     sum(e) as s, avg(e) as a
+     for y in (2012 as firstYear, 2013 as secondYear)
+   )
+-- !query schema
+struct<c:string,firstYear_s:bigint,firstYear_a:double,secondYear_s:bigint,secondYear_a:double>
+-- !query output
+Java	20000	20000.0	30000	30000.0
+dotNET	15000	7500.0	48000	48000.0
+
+
+-- !query
+select course, `year`, y, a
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     max(a)
+     for (y, course) in ((2012, 'dotNET'), (2013, 'Java'))
+   )
+-- !query schema
+struct<year:int,{2012, dotNET}:array<int>,{2013, Java}:array<int>>
+-- !query output
+2012	[1,1]	NULL
+2013	NULL	[2,2]
+
+
+-- !query
+select earnings, `year`, s
+from courseSales
+join yearsWithComplexTypes on `year` = y
+|> pivot (
+     sum(earnings)
+     for s in ((1, 'a'), (2, 'b'))
+   )
+-- !query schema
+struct<year:int,{1, a}:bigint,{2, b}:bigint>
+-- !query output
+2012	35000	NULL
+2013	NULL	78000
+
+
+-- !query
+table courseEarnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java	2012	20000
+Java	2013	30000
+dotNET	2012	15000
+dotNET	2013	48000
+dotNET	2014	22500
+
+
+-- !query
+table courseEarnings
+|> unpivot include nulls (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query schema
+struct<course:string,year:string,earningsYear:int>
+-- !query output
+Java	2012	20000
+Java	2013	30000
+Java	2014	NULL
+dotNET	2012	15000
+dotNET	2013	48000
+dotNET	2014	22500
+
+
+-- !query
+table courseEarningsAndSales
+|> unpivot include nulls (
+     (earnings, sales) for `year` in (
+       (earnings2012, sales2012) as `2012`,
+       (earnings2013, sales2013) as `2013`,
+       (earnings2014, sales2014) as `2014`)
+   )
+-- !query schema
+struct<course:string,year:string,earnings:int,sales:int>
+-- !query output
+Java	2012	20000	1
+Java	2013	30000	2
+Java	2014	NULL	NULL
+dotNET	2012	15000	NULL
+dotNET	2013	48000	1
+dotNET	2014	22500	1
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`year`",
+    "proposal" : "`course`, `earnings`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 49,
+    "stopIndex" : 111,
+    "fragment" : "pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> pivot (
+     sum(earnings)
+     for `year` in (course, 2013)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "NON_LITERAL_PIVOT_VALUES",
+  "sqlState" : "42K08",
+  "messageParameters" : {
+    "expression" : "\"course\""
+  }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 186,
+    "fragment" : "table courseSales\n|> select course, earnings\n|> pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )\n   unpivot (\n     earningsYear for `year` in (`2012`, `2013`, `2014`)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "NOT_ALLOWED_IN_FROM.UNPIVOT_WITH_PIVOT",
+  "sqlState" : "42601",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 186,
+    "fragment" : "table courseSales\n|> select course, earnings\n|> unpivot (\n     earningsYear for `year` in (`2012`, `2013`, `2014`)\n   )\n   pivot (\n     sum(earnings)\n     for `year` in (2012, 2013)\n   )"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'pivot'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table courseSales
+|> select course, earnings
+|> unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   unpivot (
+     earningsYear for `year` in (`2012`, `2013`, `2014`)
+   )
+   pivot (
+     sum(earnings)
+     for `year` in (2012, 2013)
+   )
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'unpivot'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> tablesample (100 percent) repeatable (0)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> tablesample (2 rows) repeatable (0)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> tablesample (bucket 1 out of 1) repeatable (0)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> tablesample (100 percent) repeatable (0)
+|> tablesample (5 rows) repeatable (0)
+|> tablesample (bucket 1 out of 1) repeatable (0)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> tablesample ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0014",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 25,
+    "fragment" : "tablesample ()"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (-100 percent) repeatable (0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (-1.0) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 52,
+    "fragment" : "tablesample (-100 percent) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (-5 rows)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_LIMIT_LIKE_EXPRESSION.IS_NEGATIVE",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"-5\"",
+    "name" : "limit",
+    "v" : "-5"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 26,
+    "fragment" : "-5"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (x rows)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INVALID_LIMIT_LIKE_EXPRESSION.IS_UNFOLDABLE",
+  "sqlState" : "42K0E",
+  "messageParameters" : {
+    "expr" : "\"x\"",
+    "name" : "limit"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 25,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (bucket 2 out of 1)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0064",
+  "messageParameters" : {
+    "msg" : "Sampling fraction (2.0) must be on interval [0, 1]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 42,
+    "fragment" : "tablesample (bucket 2 out of 1)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (200b) repeatable (0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0015",
+  "messageParameters" : {
+    "msg" : "byteLengthLiteral"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 44,
+    "fragment" : "tablesample (200b) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table t
+|> tablesample (200) repeatable (0)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0016",
+  "messageParameters" : {
+    "bytesStr" : "200"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 43,
+    "fragment" : "tablesample (200) repeatable (0)"
+  } ]
+}
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_empty_table
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+
+
+
+-- !query
+table join_test_t1
+|> cross join join_test_empty_table
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+
+
+
+-- !query
+table join_test_t1
+|> left outer join join_test_empty_table
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+1	NULL
+
+
+-- !query
+table join_test_t1
+|> right outer join join_test_empty_table
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+
+
+
+-- !query
+table join_test_t1
+|> full outer join join_test_empty_table using (a)
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+table join_test_t1
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a)
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+1	NULL
+
+
+-- !query
+table join_test_t1
+|> left semi join join_test_empty_table
+-- !query schema
+struct<a:int>
+-- !query output
+
+
+
+-- !query
+table join_test_t1
+|> left anti join join_test_empty_table
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+select * from join_test_t1 where true
+|> inner join join_test_empty_table
+-- !query schema
+struct<a:int,a:int>
+-- !query output
+
+
+
+-- !query
+select 1 as x, 2 as y
+|> inner join (select 1 as x, 4 as y) using (x)
+-- !query schema
+struct<x:int,y:int,y:int>
+-- !query output
+1	2	4
+
+
+-- !query
+table join_test_t1
+|> inner join (join_test_t2 jt2 inner join join_test_t3 jt3 using (a)) using (a)
+|> select a, join_test_t1.a, jt2.a, jt3.a
+-- !query schema
+struct<a:int,a:int,a:int,a:int>
+-- !query output
+1	1	1	1
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_t2 tablesample (100 percent) repeatable (0) jt2 using (a)
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+table join_test_t1
+|> inner join (select 1 as a) tablesample (100 percent) repeatable (0) jt2 using (a)
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+table join_test_t1
+|> join join_test_t1 using (a)
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select c1)
+-- !query schema
+struct<c1:int,c2:int,c1:int>
+-- !query output
+0	1	0
+1	2	1
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select c1 from lateral_test_t2)
+-- !query schema
+struct<c1:int,c2:int,c1:int>
+-- !query output
+0	1	0
+0	1	0
+1	2	0
+1	2	0
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 from lateral_test_t2)
+-- !query schema
+struct<c1:int,c2:int,c1:int>
+-- !query output
+0	1	0
+0	1	0
+1	2	1
+1	2	1
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.c1 + t2.c1 from lateral_test_t2 t2)
+-- !query schema
+struct<c1:int,c2:int,(outer(lateral_test_t1.c1) + c1):int>
+-- !query output
+0	1	0
+0	1	0
+1	2	1
+1	2	1
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select *)
+-- !query schema
+struct<c1:int,c2:int>
+-- !query output
+0	1
+1	2
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select * from lateral_test_t2)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int>
+-- !query output
+0	1	0	2
+0	1	0	3
+1	2	0	2
+1	2	0	3
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.* from lateral_test_t2)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int>
+-- !query output
+0	1	0	1
+0	1	0	1
+1	2	1	2
+1	2	1	2
+
+
+-- !query
+table lateral_test_t1
+|> join lateral (select lateral_test_t1.*, t2.* from lateral_test_t2 t2)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int,c1:int,c2:int>
+-- !query output
+0	1	0	1	0	2
+0	1	0	1	0	3
+1	2	1	2	0	2
+1	2	1	2	0	3
+
+
+-- !query
+table lateral_test_t1
+|> join lateral_test_t2
+|> join lateral (select lateral_test_t1.c2 + lateral_test_t2.c2)
+-- !query schema
+struct<c1:int,c2:int,c1:int,c2:int,(outer(lateral_test_t1.c2) + outer(lateral_test_t2.c2)):int>
+-- !query output
+0	1	0	2	3
+0	1	0	3	4
+1	2	0	2	4
+1	2	0	3	5
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2
+|> where k = "one"
+-- !query schema
+struct<k:string,v1:int,v2:int>
+-- !query output
+one	1	1
+one	1	5
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> select natural_join_test_t1.*
+-- !query schema
+struct<k:string,v1:int>
+-- !query output
+one	1
+one	1
+two	2
+
+
+-- !query
+table natural_join_test_t1
+|> natural join natural_join_test_t2 nt2
+|> natural join natural_join_test_t3 nt3
+|> select natural_join_test_t1.*, nt2.*, nt3.*
+-- !query schema
+struct<k:string,v1:int,k:string,v2:int,k:string,v3:int>
+-- !query output
+one	1	one	1	one	4
+one	1	one	1	one	6
+one	1	one	5	one	4
+one	1	one	5	one	6
+two	2	two	22	two	5
+
+
+-- !query
+table join_test_t1
+|> inner join join_test_empty_table
+   inner join join_test_empty_table
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'inner'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table join_test_t1
+|> select 1 + 2 as result
+|> full outer join join_test_empty_table on (join_test_t1.a = join_test_empty_table.a)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`join_test_t1`.`a`",
+    "proposal" : "`result`, `join_test_empty_table`.`a`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 91,
+    "stopIndex" : 104,
+    "fragment" : "join_test_t1.a"
+  } ]
+}
+
+
+-- !query
+table join_test_t1 jt
+|> cross join (select * from jt)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'jt'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> union all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+0	abc
+1	def
+1	def
+
+
+-- !query
+table t
+|> union table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+(select * from t)
+|> union all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+0	abc
+1	def
+1	def
+
+
+-- !query
+(select * from t)
+|> union table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+values (0, 'abc') tab(x, y)
+|> union all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+0	abc
+1	def
+
+
+-- !query
+values (0, 1) tab(x, y)
+|> union table t
+|> where x = 0
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'abc'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 55,
+    "fragment" : "values (0, 1) tab(x, y)\n|> union table t\n|> where x = 0"
+  } ]
+}
+
+
+-- !query
+(select * from t)
+|> union all (select * from t)
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+0	abc
+1	def
+1	def
+
+
+-- !query
+table t
+|> except all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> except table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> intersect all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> intersect table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> minus all table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> minus table t
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> select x
+|> union all table t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "NUM_COLUMNS_MISMATCH",
+  "sqlState" : "42826",
+  "messageParameters" : {
+    "firstNumColumns" : "1",
+    "invalidNumColumns" : "2",
+    "invalidOrdinalNum" : "second",
+    "operator" : "UNION"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 40,
+    "fragment" : "table t\n|> select x\n|> union all table t"
+  } ]
+}
+
+
+-- !query
+table t
+|> union all table st
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
+  "sqlState" : "42825",
+  "messageParameters" : {
+    "columnOrdinalNumber" : "second",
+    "dataType1" : "\"STRUCT<i1: INT, i2: INT>\"",
+    "dataType2" : "\"STRING\"",
+    "hint" : "",
+    "operator" : "UNION",
+    "tableOrdinalNumber" : "second"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 29,
+    "fragment" : "table t\n|> union all table st"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+(select * from t)
+|> order by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+values (0, 'abc') tab(x, y)
+|> order by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+
+
+-- !query
+table t
+|> order by x
+|> limit 1
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+
+
+-- !query
+table t
+|> where x = 1
+|> select y
+|> limit 2 offset 1
+-- !query schema
+struct<y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> where x = 1
+|> select y
+|> offset 1
+-- !query schema
+struct<y:string>
+-- !query output
+
+
+
+-- !query
+table t
+|> limit all offset 0
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> distribute by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> cluster by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> sort by x distribute by x
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> order by x desc
+order by y
+-- !query schema
+struct<x:int,y:string>
+-- !query output
+0	abc
+1	def
+
+
+-- !query
+table t
+|> order by x desc order by x + y
+order by y
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'order'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table t
+|> select 1 + 2 as result
+|> order by x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`x`",
+    "proposal" : "`result`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 47,
+    "stopIndex" : 47,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> select 1 + 2 as result
+|> distribute by x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`x`",
+    "proposal" : "`result`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 52,
+    "stopIndex" : 52,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x limit 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "MULTIPLE_QUERY_RESULT_CLAUSES_WITH_PIPE_OPERATORS",
+  "sqlState" : "42000",
+  "messageParameters" : {
+    "clause1" : "ORDER BY",
+    "clause2" : "LIMIT"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 29,
+    "fragment" : "order by x limit 1"
+  } ]
+}
+
+
+-- !query
+table t
+|> order by x sort by x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.COMBINATION_QUERY_RESULT_CLAUSES",
+  "sqlState" : "0A000",
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 12,
+    "stopIndex" : 31,
+    "fragment" : "order by x sort by x"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate sum(b) as result group by a
+-- !query schema
+struct<a:int,result:bigint>
+-- !query output
+1	3
+2	4
+
+
+-- !query
+table other
+|> aggregate sum(b) as result group by a
+|> select result
+-- !query schema
+struct<result:bigint>
+-- !query output
+3
+4
+
+
+-- !query
+table other
+|> aggregate sum(b) group by a + 1 as gkey
+|> select gkey
+-- !query schema
+struct<gkey:int>
+-- !query output
+2
+3
+
+
+-- !query
+select 1 as x, 2 as y
+|> aggregate group by x, y
+-- !query schema
+struct<x:int,y:int>
+-- !query output
+1	2
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate group by 1, 2
+-- !query schema
+struct<1:int,2:int>
+-- !query output
+1	2
+
+
+-- !query
+table t
+|> aggregate sum(x)
+-- !query schema
+struct<pipeexpression(sum(x)):bigint>
+-- !query output
+1
+
+
+-- !query
+table t
+|> aggregate sum(x) + 1 as result_plus_one
+-- !query schema
+struct<result_plus_one:bigint>
+-- !query output
+2
+
+
+-- !query
+table other
+|> aggregate group by a
+|> where a = 1
+-- !query schema
+struct<a:int>
+-- !query output
+1
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x, y, x + y as z
+-- !query schema
+struct<x:int,y:int,z:int>
+-- !query output
+1	2	3
+
+
+-- !query
+select 1 as x, 2 as y, 3 as z
+|> aggregate group by x as z, x + y as z
+-- !query schema
+struct<z:int,z:int>
+-- !query output
+1	3
+
+
+-- !query
+select 1 as x, 2 as y, named_struct('z', 3) as st
+|> aggregate group by x, y, x, x, st.z, (st).z, 1 + x, 2 + x
+-- !query schema
+struct<x:int,y:int,x:int,x:int,z:int,z:int,(1 + x):int,(2 + x):int>
+-- !query output
+1	2	1	1	3	3	2	3
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> aggregate sum(z) z group by x, y
+|> aggregate avg(z) z group by x
+|> aggregate count(distinct z) c
+-- !query schema
+struct<c:bigint>
+-- !query output
+1
+
+
+-- !query
+select 1 x, 3 z
+|> aggregate count(*) group by x, z, x
+|> select x
+-- !query schema
+struct<x:int>
+-- !query output
+1
+
+
+-- !query
+table other
+|> aggregate a + count(b) group by a
+-- !query schema
+struct<a:int,pipeexpression((a + count(b))):bigint>
+-- !query output
+1	3
+2	3
+
+
+-- !query
+table other
+|> aggregate a group by a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expr" : "a#x"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 37,
+    "stopIndex" : 37,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+select 3 as x, 4 as y
+|> aggregate group by all
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY ALL"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 47,
+    "fragment" : "select 3 as x, 4 as y\n|> aggregate group by all"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by rollup(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY ROLLUP"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 123,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by rollup(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by cube(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUP BY CUBE"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 121,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by cube(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings) group by course, `year` grouping sets(course, `year`)
+|> where course = 'dotNET' and `year` = '2013'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING SETS"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 145,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings) group by course, `year` grouping sets(course, `year`)\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings), grouping(course) + 1
+   group by course
+|> where course = 'dotNET' and `year` = '2013'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 132,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings), grouping(course) + 1\n   group by course\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+table courseSales
+|> aggregate sum(earnings), grouping_id(course)
+   group by course
+|> where course = 'dotNET' and `year` = '2013'
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "GROUPING_ID"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 131,
+    "fragment" : "table courseSales\n|> aggregate sum(earnings), grouping_id(course)\n   group by course\n|> where course = 'dotNET' and `year` = '2013'"
+  } ]
+}
+
+
+-- !query
+select 1 as x, 2 as y
+|> aggregate group by ()
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "')'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table other
+|> aggregate a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_AGGREGATE_EXPRESSION_CONTAINS_NO_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "expr" : "a#x"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 26,
+    "fragment" : "a"
+  } ]
+}
+
+
+-- !query
+table other
+|> select sum(a) as result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "PIPE_OPERATOR_CONTAINS_AGGREGATE_FUNCTION",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "clause" : "SELECT",
+    "expr" : "sum(a#x)"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 28,
+    "fragment" : "sum(a)"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "_LEGACY_ERROR_TEMP_0035",
+  "messageParameters" : {
+    "message" : "The AGGREGATE clause requires a list of aggregate expressions or a list of grouping expressions, or both"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 24,
+    "fragment" : "table other\n|> aggregate"
+  } ]
+}
+
+
+-- !query
+table other
+|> aggregate group by
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`group`",
+    "proposal" : "`a`, `b`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 30,
+    "fragment" : "group"
+  } ]
+}
+
+
+-- !query
+table other
+|> group by a
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42601",
+  "messageParameters" : {
+    "error" : "'group'",
+    "hint" : ""
+  }
+}
+
+
+-- !query
+table other
+|> aggregate sum(a) over () group by b
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.parser.ParseException
+{
+  "errorClass" : "UNSUPPORTED_FEATURE.PIPE_OPERATOR_AGGREGATE_UNSUPPORTED_CASE",
+  "sqlState" : "0A000",
+  "messageParameters" : {
+    "case" : "window functions"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 50,
+    "fragment" : "table other\n|> aggregate sum(a) over () group by b"
+  } ]
+}
+
+
+-- !query
+select 1 x, 2 y, 3 z
+|> aggregate count(*) AS c, sum(x) AS x group by x
+|> where c = 1
+|> where x = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "AMBIGUOUS_REFERENCE",
+  "sqlState" : "42704",
+  "messageParameters" : {
+    "name" : "`x`",
+    "referenceNames" : "[`x`, `x`]"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 97,
+    "stopIndex" : 97,
+    "fragment" : "x"
+  } ]
+}
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (partition by cate order by val)
+-- !query schema
+struct<cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+NULL	3
+NULL	NULL
+a	2
+a	2
+a	4
+a	NULL
+b	1
+b	3
+b	6
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+   window w as (order by val_timestamp range between unbounded preceding and current row)
+-- !query schema
+struct<cate:string,sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+NULL	5
+NULL	NULL
+a	13
+a	5
+a	5
+a	6
+b	13
+b	5
+b	8
+
+
+-- !query
+table windowTestData
+|> select cate, val
+    window w as (partition by cate order by val)
+-- !query schema
+struct<cate:string,val:int>
+-- !query output
+NULL	3
+NULL	NULL
+a	1
+a	1
+a	2
+a	NULL
+b	1
+b	2
+b	3
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate)
+|> select cate, val, sum_val, first_value(cate) over w
+   window w as (order by val)
+-- !query schema
+struct<cate:string,val:int,sum_val:bigint,first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):string>
+-- !query output
+NULL	3	3	a
+NULL	NULL	3	a
+a	1	4	a
+a	1	4	a
+a	2	4	a
+a	NULL	4	a
+b	1	6	a
+b	2	6	a
+b	3	6	a
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate), w2 as (order by val)
+-- !query schema
+struct<cate:string,val:int,sum(val) OVER (PARTITION BY cate ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):bigint,first_value(cate) OVER (ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):string>
+-- !query output
+NULL	3	3	a
+NULL	NULL	3	a
+a	1	4	a
+a	1	4	a
+a	2	4	a
+a	NULL	4	a
+b	1	6	a
+b	2	6	a
+b	3	6	a
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w, first_value(val) over w
+   window w1 as (partition by cate order by val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+(select col from st)
+|> select col.i1, sum(col.i2) over w
+   window w as (partition by col.i1 order by col.i2)
+-- !query schema
+struct<i1:int,sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+2	3
+
+
+-- !query
+table st
+|> select st.col.i1, sum(st.col.i2) over w
+   window w as (partition by st.col.i1 order by st.col.i2)
+-- !query schema
+struct<i1:int,sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+2	3
+
+
+-- !query
+table st
+|> select spark_catalog.default.st.col.i1, sum(spark_catalog.default.st.col.i2) over w
+   window w as (partition by spark_catalog.default.st.col.i1 order by spark_catalog.default.st.col.i2)
+-- !query schema
+struct<i1:int,sum(col.i2) OVER (PARTITION BY col.i1 ORDER BY col.i2 ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+2	3
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over val
+   window val as (partition by cate order by val)
+-- !query schema
+struct<cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+NULL	3
+NULL	NULL
+a	2
+a	2
+a	4
+a	NULL
+b	1
+b	3
+b	6
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+   window w1 as (partition by cate)
+   window w2 as (order by val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w2"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+   window w as (partition by cate order by val)
+|> select cate, val, sum_val, first_value(cate) over w
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+table windowTestData
+|> select cate, val, first_value(cate) over w as first_val
+|> select cate, val, sum(val) over w as sum_val
+   window w as (order by val)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
+  "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "windowName" : "w"
+  }
+}
+
+
+-- !query
+drop table t
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table other
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+drop table st
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
index 12660768b95cb..052e7b4f25224 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/boolean.sql.out
@@ -57,7 +57,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'test'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -90,7 +89,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'foo'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -131,7 +129,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'yeah'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -172,7 +169,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'nay'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -197,7 +193,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'on'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -222,7 +217,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'off'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -247,7 +241,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'of'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -272,7 +265,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'o'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -297,7 +289,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'on_'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -322,7 +313,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'off_'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -355,7 +345,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'11'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -388,7 +377,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'000'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -413,7 +401,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "''",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -535,7 +522,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'  tru e '",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
@@ -560,7 +546,6 @@ org.apache.spark.SparkRuntimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "''",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BOOLEAN\""
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
index 8caf8c54b9f39..d9f4301dd0e8d 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/date.sql.out
@@ -687,10 +687,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid date 'FEBRUARY 30'"
+    "rangeMessage" : "Invalid date 'FEBRUARY 30'"
   }
 }
 
@@ -702,10 +703,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
+    "rangeMessage" : "Invalid value for MonthOfYear (valid values 1 - 12): 13"
   }
 }
 
@@ -717,10 +719,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for DayOfMonth (valid values 1 - 28/31): -1"
+    "rangeMessage" : "Invalid value for DayOfMonth (valid values 1 - 28/31): -1"
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
index 6b4b343d9ccae..1a15610b4dede 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out
@@ -97,7 +97,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'N A N'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -122,7 +121,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'NaN x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -147,7 +145,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "' INFINITY    x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -196,7 +193,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'nan'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DECIMAL(10,0)\""
@@ -393,7 +389,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"INT\"",
     "value" : "2.14748365E9"
@@ -419,7 +414,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"INT\"",
     "value" : "-2.1474839E9"
@@ -461,7 +455,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"BIGINT\"",
     "value" : "-9.22338E18"
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out.java21
index 6126411071bc1..3c2189c399639 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out.java21
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float4.sql.out.java21
@@ -97,7 +97,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'N A N'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -122,7 +121,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'NaN x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -147,7 +145,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "' INFINITY    x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"FLOAT\""
@@ -196,7 +193,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'nan'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DECIMAL(10,0)\""
@@ -393,7 +389,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"INT\"",
     "value" : "2.1474836E9"
@@ -419,7 +414,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"INT\"",
     "value" : "-2.147484E9"
@@ -461,7 +455,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"FLOAT\"",
     "targetType" : "\"BIGINT\"",
     "value" : "-9.22338E18"
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
index e1b880f343709..b1a114bea30ee 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/float8.sql.out
@@ -129,7 +129,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'N A N'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DOUBLE\""
@@ -154,7 +153,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'NaN x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DOUBLE\""
@@ -179,7 +177,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "' INFINITY    x'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DOUBLE\""
@@ -228,7 +225,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'nan'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DECIMAL(10,0)\""
@@ -898,7 +894,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"DOUBLE\"",
     "targetType" : "\"BIGINT\"",
     "value" : "-9.22337203685478E18D"
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
index f6e4bd8bd7e08..5e8abc273b125 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out
@@ -737,7 +737,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"INT\"",
     "value" : "4567890123456789L"
@@ -763,7 +762,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "4567890123456789L"
@@ -809,7 +807,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"DOUBLE\"",
     "targetType" : "\"BIGINT\"",
     "value" : "9.223372036854776E20D"
@@ -898,7 +895,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"INT\"",
     "value" : "-9223372036854775808L"
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out.java21
index ee3f8625da8a4..e7df03dc8cadd 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out.java21
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/int8.sql.out.java21
@@ -737,7 +737,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"INT\"",
     "value" : "4567890123456789L"
@@ -763,7 +762,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "4567890123456789L"
@@ -809,7 +807,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"DOUBLE\"",
     "targetType" : "\"BIGINT\"",
     "value" : "9.223372036854776E20D"
@@ -898,7 +895,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"BIGINT\"",
     "targetType" : "\"INT\"",
     "value" : "-9223372036854775808L"
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
index bff615e22af0b..3855d922361bc 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/interval.sql.out
@@ -102,9 +102,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -123,9 +127,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR` when cast to interval day to hour: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h`, `INTERVAL [+|-]'[+|-]d h' DAY TO HOUR`",
+    "typeName" : "interval day to hour"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -152,9 +160,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE` when cast to interval day to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m`, `INTERVAL [+|-]'[+|-]d h:m' DAY TO MINUTE`",
+    "typeName" : "interval day to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -173,9 +185,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND` when cast to interval day to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]d h:m:s.n`, `INTERVAL [+|-]'[+|-]d h:m:s.n' DAY TO SECOND`",
+    "typeName" : "interval day to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -202,9 +218,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -223,9 +243,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE` when cast to interval hour to minute: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m`, `INTERVAL [+|-]'[+|-]h:m' HOUR TO MINUTE`",
+    "typeName" : "interval hour to minute"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -244,9 +268,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -265,9 +293,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND` when cast to interval hour to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]h:m:s.n`, `INTERVAL [+|-]'[+|-]h:m:s.n' HOUR TO SECOND`",
+    "typeName" : "interval hour to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -286,9 +318,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -307,9 +343,13 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0063",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.UNMATCHED_FORMAT_STRING_WITH_NOTICE",
+  "sqlState" : "22006",
   "messageParameters" : {
-    "msg" : "Interval string does not match day-time format of `[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND` when cast to interval minute to second: 1 2:03:04, set spark.sql.legacy.fromDayTimeString.enabled to true to restore the behavior before Spark 3.0."
+    "input" : "1 2:03:04",
+    "intervalStr" : "day-time",
+    "supportedFormat" : "`[+|-]m:s.n`, `INTERVAL [+|-]'[+|-]m:s.n' MINUTE TO SECOND`",
+    "typeName" : "interval minute to second"
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
index 37b8a3e8fd19c..0a940f5f3c74a 100755
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/text.sql.out
@@ -66,7 +66,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'four: 2'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BIGINT\""
@@ -91,7 +90,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'four: 2'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"BIGINT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
index 7c920bbd32b3c..94692a57300f9 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
@@ -700,7 +700,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'foo'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"DOUBLE\""
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
index 6cf5e69758d2a..352c5f05cb06c 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part2.sql.out
@@ -489,7 +489,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'NaN'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
index 2085186dc8cfa..2d539725b2a70 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/window_part4.sql.out
@@ -497,7 +497,7 @@ FROM (VALUES(1,1),(2,2),(3,(cast('nan' as int))),(4,3),(5,4)) t(a,b)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.FAILED_SQL_EXPRESSION_EVALUATION",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
index 5b97f2a27b8ed..e2d0563a0c451 100644
--- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out
@@ -210,25 +210,73 @@ false
 -- !query
 select 2 > '1.0'
 -- !query schema
-struct<(2 > 1.0):boolean>
--- !query output
-true
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "2 > '1.0'"
+  } ]
+}
 
 
 -- !query
 select 2 > '2.0'
 -- !query schema
-struct<(2 > 2.0):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "2 > '2.0'"
+  } ]
+}
 
 
 -- !query
 select 2 > '2.2'
 -- !query schema
-struct<(2 > 2.2):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2.2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "2 > '2.2'"
+  } ]
+}
 
 
 -- !query
@@ -266,17 +314,49 @@ true
 -- !query
 select 2 >= '1.0'
 -- !query schema
-struct<(2 >= 1.0):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "2 >= '1.0'"
+  } ]
+}
 
 
 -- !query
 select 2 >= '2.0'
 -- !query schema
-struct<(2 >= 2.0):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "2 >= '2.0'"
+  } ]
+}
 
 
 -- !query
@@ -322,17 +402,49 @@ false
 -- !query
 select 2 < '1.0'
 -- !query schema
-struct<(2 < 1.0):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "2 < '1.0'"
+  } ]
+}
 
 
 -- !query
 select 2 < '2.0'
 -- !query schema
-struct<(2 < 2.0):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "2 < '2.0'"
+  } ]
+}
 
 
 -- !query
@@ -378,17 +490,49 @@ true
 -- !query
 select 2 <= '1.0'
 -- !query schema
-struct<(2 <= 1.0):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "2 <= '1.0'"
+  } ]
+}
 
 
 -- !query
 select 2 <= '2.0'
 -- !query schema
-struct<(2 <= 2.0):boolean>
--- !query output
-true
+struct<>
+-- !query output
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2.0'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "2 <= '2.0'"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index 16984de3ff257..01638abdcec6e 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -113,3 +113,472 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
     "fragment" : "rand('1')"
   } ]
 }
+
+
+-- !query
+SELECT uniform(0, 1, 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+0
+
+
+-- !query
+SELECT uniform(0, 10, 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+7
+
+
+-- !query
+SELECT uniform(0L, 10L, 0) AS result
+-- !query schema
+struct<result:bigint>
+-- !query output
+7
+
+
+-- !query
+SELECT uniform(0, 10L, 0) AS result
+-- !query schema
+struct<result:bigint>
+-- !query output
+7
+
+
+-- !query
+SELECT uniform(0, 10S, 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+7
+
+
+-- !query
+SELECT uniform(10, 20, 0) AS result
+-- !query schema
+struct<result:int>
+-- !query output
+17
+
+
+-- !query
+SELECT uniform(10.0F, 20.0F, 0) AS result
+-- !query schema
+struct<result:float>
+-- !query output
+17.604954
+
+
+-- !query
+SELECT uniform(10.0D, 20.0D, CAST(3 / 7 AS LONG)) AS result
+-- !query schema
+struct<result:double>
+-- !query output
+17.604953758285916
+
+
+-- !query
+SELECT uniform(10, 20.0F, 0) AS result
+-- !query schema
+struct<result:float>
+-- !query output
+17.604954
+
+
+-- !query
+SELECT uniform(10, 20, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<result:int>
+-- !query output
+15
+16
+17
+
+
+-- !query
+SELECT uniform(10, 20.0F) IS NOT NULL AS result
+-- !query schema
+struct<result:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT uniform(NULL, 1, 0) AS result
+-- !query schema
+struct<result:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(0, NULL, 0) AS result
+-- !query schema
+struct<result:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(0, 1, NULL) AS result
+-- !query schema
+struct<result:void>
+-- !query output
+NULL
+
+
+-- !query
+SELECT uniform(10, 20, col) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`seed`",
+    "inputType" : "integer or floating-point",
+    "sqlExpr" : "\"uniform(10, 20, col)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "uniform(10, 20, col)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(col, 10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`min`",
+    "inputType" : "integer or floating-point",
+    "sqlExpr" : "\"uniform(col, 10, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 26,
+    "fragment" : "uniform(col, 10, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(10) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "1",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`uniform`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 18,
+    "fragment" : "uniform(10)"
+  } ]
+}
+
+
+-- !query
+SELECT uniform(10, 20, 30, 40) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "4",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[2, 3]",
+    "functionName" : "`uniform`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "uniform(10, 20, 30, 40)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(1, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+c
+
+
+-- !query
+SELECT randstr(5, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0P
+
+
+-- !query
+SELECT randstr(10, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(10S, 0) AS result
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+
+
+-- !query
+SELECT randstr(10, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<result:string>
+-- !query output
+ceV0PXaR2I
+fYxVfArnv7
+iSIv0VT2XL
+
+
+-- !query
+SELECT randstr(10) IS NOT NULL AS result
+-- !query schema
+struct<result:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT randstr(10L, 0) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10\"",
+    "inputType" : "\"BIGINT\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "randstr(10L, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10.0F, 0) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"FLOAT\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10.0, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10.0F, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10.0D, 0) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"10.0\"",
+    "inputType" : "\"DOUBLE\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10.0, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10.0D, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(NULL, 0) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "first",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(NULL, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(NULL, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(0, NULL) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputSql" : "\"NULL\"",
+    "inputType" : "\"VOID\"",
+    "paramIndex" : "second",
+    "requiredType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(0, NULL)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(0, NULL)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(col, 0) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`length`",
+    "inputType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(col, 0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "randstr(col, 0)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, col) AS result FROM VALUES (0), (1), (2) tab(col)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "inputExpr" : "\"col\"",
+    "inputName" : "`seed`",
+    "inputType" : "INT or SMALLINT",
+    "sqlExpr" : "\"randstr(10, col)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "randstr(10, col)"
+  } ]
+}
+
+
+-- !query
+SELECT randstr(10, 0, 1) AS result
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+  "errorClass" : "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+  "sqlState" : "42605",
+  "messageParameters" : {
+    "actualNum" : "3",
+    "docroot" : "https://spark.apache.org/docs/latest",
+    "expectedNum" : "[1, 2]",
+    "functionName" : "`randstr`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "randstr(10, 0, 1)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
index ad96e7e106ad9..c5e05411b6dbf 100644
--- a/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show-create-table.sql.out
@@ -78,7 +78,7 @@ CREATE TABLE spark_catalog.default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:///path/to/table'
+LOCATION 'file:/path/to/table'
 
 
 -- !query
@@ -108,7 +108,7 @@ CREATE TABLE spark_catalog.default.tbl (
   b STRING,
   c INT)
 USING parquet
-LOCATION 'file:///path/to/table'
+LOCATION 'file:/path/to/table'
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
index 9a0d82d3617af..bb4e7e08c6f5b 100644
--- a/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/show_columns.sql.out
@@ -123,10 +123,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1057",
+  "errorClass" : "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
+  "sqlState" : "42K05",
   "messageParameters" : {
-    "dbA" : "baddb",
-    "dbB" : "showdb"
+    "namespaceA" : "`baddb`",
+    "namespaceB" : "`showdb`"
   }
 }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
index 5c1e5697d029d..fc8f44bc22fee 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-on-files.sql.out
@@ -1,4 +1,20 @@
 -- Automatically generated by SQLQueryTestSuite
+-- !query
+CREATE DATABASE IF NOT EXISTS sql_on_files
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_parquet USING PARQUET AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
+
+
 -- !query
 SELECT * FROM parquet.``
 -- !query schema
@@ -37,11 +53,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM parquet.`src/test/resources/test-data/dec-in-fixed-len.parquet` LIMIT 1
+SELECT * FROM parquet.`${spark.sql.warehouse.dir}/sql_on_files.db/test_parquet`
 -- !query schema
-struct<fixed_len_dec:decimal(10,2)>
+struct<1:int>
 -- !query output
-0.00
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_parquet
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_orc USING ORC AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
@@ -82,11 +114,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM orc.`src/test/resources/test-data/before_1582_date_v2_4.snappy.orc` LIMIT 1
+SELECT * FROM orc.`${spark.sql.warehouse.dir}/sql_on_files.db/test_orc`
+-- !query schema
+struct<1:int>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_orc
+-- !query schema
+struct<>
+-- !query output
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_csv USING CSV AS SELECT 1
 -- !query schema
-struct<dt:date>
+struct<>
 -- !query output
-1200-01-01
+
 
 
 -- !query
@@ -127,11 +175,27 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM csv.`src/test/resources/test-data/cars.csv` LIMIT 1
+SELECT * FROM csv.`${spark.sql.warehouse.dir}/sql_on_files.db/test_csv`
+-- !query schema
+struct<_c0:string>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_csv
 -- !query schema
-struct<_c0:string,_c1:string,_c2:string,_c3:string,_c4:string>
+struct<>
 -- !query output
-year	make	model	comment	blank
+
+
+
+-- !query
+CREATE TABLE sql_on_files.test_json USING JSON AS SELECT 1
+-- !query schema
+struct<>
+-- !query output
+
 
 
 -- !query
@@ -172,8 +236,24 @@ org.apache.spark.sql.AnalysisException
 
 
 -- !query
-SELECT * FROM json.`src/test/resources/test-data/with-map-fields.json` LIMIT 1
+SELECT * FROM json.`${spark.sql.warehouse.dir}/sql_on_files.db/test_json`
 -- !query schema
-struct<id:bigint,intervals:struct<a:struct<endTime:bigint,startTime:bigint>,b:struct<endTime:bigint,startTime:bigint>>>
+struct<1:bigint>
+-- !query output
+1
+
+
+-- !query
+DROP TABLE sql_on_files.test_json
+-- !query schema
+struct<>
 -- !query output
-1	{"a":{"endTime":211,"startTime":111},"b":{"endTime":221,"startTime":121}}
+
+
+
+-- !query
+DROP DATABASE sql_on_files
+-- !query schema
+struct<>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
index 67f867e257419..249a03fdfbf87 100644
--- a/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/sql-session-variables.sql.out
@@ -168,7 +168,7 @@ struct<>
 
 
 -- !query
-DECLARE VARIABLE var1 INT
+DECLARE VAR var1 INT
 -- !query schema
 struct<>
 -- !query output
@@ -184,7 +184,7 @@ NULL
 
 
 -- !query
-DROP TEMPORARY VARIABLE var1
+DROP TEMPORARY VAR var1
 -- !query schema
 struct<>
 -- !query output
@@ -943,7 +943,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'hello'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
@@ -990,7 +989,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"INT\"",
     "targetType" : "\"SMALLINT\"",
     "value" : "100000"
@@ -1104,7 +1102,6 @@ org.apache.spark.SparkArithmeticException
   "errorClass" : "CAST_OVERFLOW",
   "sqlState" : "22003",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "sourceType" : "\"DOUBLE\"",
     "targetType" : "\"INT\"",
     "value" : "1.0E10D"
@@ -1171,7 +1168,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'hello'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
index 08a7e317c116f..706673606625b 100644
--- a/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/string-functions.sql.out
@@ -94,9 +94,25 @@ NULL
 -- !query
 select left("abcd", -2), left("abcd", 0), left("abcd", 'a')
 -- !query schema
-struct<left(abcd, -2):string,left(abcd, 0):string,left(abcd, a):string>
+struct<>
 -- !query output
-		NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 43,
+    "stopIndex" : 59,
+    "fragment" : "left(\"abcd\", 'a')"
+  } ]
+}
 
 
 -- !query
@@ -118,9 +134,25 @@ NULL
 -- !query
 select right("abcd", -2), right("abcd", 0), right("abcd", 'a')
 -- !query schema
-struct<right(abcd, -2):string,right(abcd, 0):string,right(abcd, a):string>
+struct<>
 -- !query output
-		NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 45,
+    "stopIndex" : 62,
+    "fragment" : "right(\"abcd\", 'a')"
+  } ]
+}
 
 
 -- !query
@@ -466,17 +498,49 @@ bar
 -- !query
 SELECT lpad('hi', 'invalid_length')
 -- !query schema
-struct<lpad(hi, invalid_length,  ):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'invalid_length'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "lpad('hi', 'invalid_length')"
+  } ]
+}
 
 
 -- !query
 SELECT rpad('hi', 'invalid_length')
 -- !query schema
-struct<rpad(hi, invalid_length,  ):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'invalid_length'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "rpad('hi', 'invalid_length')"
+  } ]
+}
 
 
 -- !query
@@ -778,7 +842,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -796,7 +860,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -814,7 +878,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -832,7 +896,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`encode`",
     "parameter" : "`charset`"
   }
@@ -1076,7 +1140,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1094,7 +1158,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "Windows-xxx",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1144,7 +1208,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
@@ -1162,7 +1226,7 @@ org.apache.spark.SparkIllegalArgumentException
   "sqlState" : "22023",
   "messageParameters" : {
     "charset" : "WINDOWS-1252",
-    "charsets" : "UTF-16LE, UTF-8, UTF-32, UTF-16BE, UTF-16, US-ASCII, ISO-8859-1",
+    "charsets" : "iso-8859-1, us-ascii, utf-16, utf-16be, utf-16le, utf-32, utf-8",
     "functionName" : "`decode`",
     "parameter" : "`charset`"
   }
diff --git a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
index 0f7ff3f107567..e355055895162 100644
--- a/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subexp-elimination.sql.out
@@ -48,12 +48,12 @@ NULL
 -- !query
 SELECT case when from_json(a, 'struct<a:int,b:string>').a > 5 then from_json(a, 'struct<a:int,b:string>').b when from_json(a, 'struct<a:int,b:string>').a > 4 then from_json(a, 'struct<a:int,b:string>').b + 1 else from_json(a, 'struct<a:int,b:string>').b + 2 end FROM testData
 -- !query schema
-struct<CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN (from_json(a).b + 1) ELSE (from_json(a).b + 2) END:string>
+struct<CASE WHEN (from_json(a).a > 5) THEN from_json(a).b WHEN (from_json(a).a > 4) THEN (from_json(a).b + 1) ELSE (from_json(a).b + 2) END:bigint>
 -- !query output
-4.0
-4.0
-5.0
-7.0
+4
+4
+5
+7
 NULL
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
index 2440b08780bb2..7d7537e384412 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/exists-subquery/exists-orderby-limit.sql.out
@@ -305,23 +305,17 @@ WHERE  NOT EXISTS (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query schema
-struct<>
+struct<emp_name:string>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 47,
-    "stopIndex" : 191,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+emp 1
+emp 1
+emp 2
+emp 3
+emp 4
+emp 5
+emp 6 - no dept
+emp 7
+emp 8
 
 
 -- !query
@@ -499,23 +493,16 @@ WHERE  EXISTS (SELECT dept.dept_name
                LIMIT  1
                OFFSET 2)
 -- !query schema
-struct<>
+struct<id:int,emp_name:string,hiredate:date,salary:double,dept_id:int>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter NOT (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 36,
-    "stopIndex" : 133,
-    "fragment" : "SELECT dept.dept_name\n               FROM   dept\n               WHERE  dept.dept_id <> emp.dept_id"
-  } ]
-}
+100	emp 1	2005-01-01	100.0	10
+100	emp 1	2005-01-01	100.0	10
+200	emp 2	2003-01-01	200.0	10
+300	emp 3	2002-01-01	300.0	20
+400	emp 4	2005-01-01	400.0	30
+600	emp 6 - no dept	2001-01-01	400.0	100
+700	emp 7	2010-01-01	400.0	100
+800	emp 8	2016-01-01	150.0	70
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
index 1bc7dea188d13..d501c93973a31 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/in-subquery/in-limit.sql.out
@@ -92,6 +92,69 @@ val1b	8	16	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
 val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
 
 
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+LIMIT  2
+OFFSET 1
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               LIMIT 10
+               OFFSET 2)
+LIMIT  2
+OFFSET 1
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d)
+OFFSET 1
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
+-- !query
+SELECT *
+FROM   t1
+WHERE  t1a IN (SELECT t2a
+               FROM   t2
+               WHERE  t1d = t2d
+               OFFSET 2)
+OFFSET 1
+-- !query schema
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
+-- !query output
+val1c	8	16	19	17.0	25.0	2600	2014-05-04 01:02:00.001	2014-05-05
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+
+
 -- !query
 SELECT *
 FROM   t1
@@ -304,23 +367,18 @@ WHERE  t1b NOT IN (SELECT t2b
                    LIMIT  2
                    OFFSET 2)
 -- !query schema
-struct<>
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x = outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 39,
-    "stopIndex" : 113,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b = t1b"
-  } ]
-}
+val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
+val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
+val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
+val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
+val1d	NULL	16	22	17.0	25.0	2600	2014-06-04 01:01:00	NULL
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 
 
 -- !query
@@ -357,23 +415,9 @@ ORDER BY t1b NULLS last
 LIMIT  1
 OFFSET 1
 -- !query schema
-struct<>
+struct<count(DISTINCT t1a):bigint,t1b:smallint>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x > outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 72,
-    "stopIndex" : 145,
-    "fragment" : "SELECT t2d\n                   FROM   t2\n                   WHERE t2b > t1b"
-  } ]
-}
+2	8
 
 
 -- !query
@@ -400,23 +444,10 @@ WHERE  t1a IN (SELECT t2a
                OFFSET 2)
 OFFSET 2
 -- !query schema
-struct<>
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(t1d#xL) = t2d#xL)\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 35,
-    "stopIndex" : 101,
-    "fragment" : "SELECT t2a\n               FROM   t2\n               WHERE  t1d = t2d"
-  } ]
-}
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
 
 
 -- !query
@@ -444,23 +475,9 @@ WHERE  t1c IN (SELECT t2c
                OFFSET 2)
 OFFSET 1
 -- !query schema
-struct<>
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x < outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 35,
-    "stopIndex" : 101,
-    "fragment" : "SELECT t2c\n               FROM   t2\n               WHERE  t2b < t1b"
-  } ]
-}
+val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
 
 
 -- !query
@@ -505,23 +522,9 @@ WHERE  t1b NOT IN (SELECT t2b
                    WHERE  t2b < t1b
                    OFFSET 2)
 -- !query schema
-struct<>
+struct<count(1):bigint>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x < outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 46,
-    "stopIndex" : 120,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b < t1b"
-  } ]
-}
+12
 
 
 -- !query
@@ -599,23 +602,18 @@ WHERE  t1b NOT IN (SELECT t2b
                    LIMIT  2
                    OFFSET 2)
 -- !query schema
-struct<>
+struct<t1a:string,t1b:smallint,t1c:int,t1d:bigint,t1e:float,t1f:double,t1g:decimal(4,0),t1h:timestamp,t1i:date>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (t2b#x = outer(t1b#x))\n+- SubqueryAlias t2\n   +- View (`t2`, [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x])\n      +- Project [cast(t2a#x as string) AS t2a#x, cast(t2b#x as smallint) AS t2b#x, cast(t2c#x as int) AS t2c#x, cast(t2d#xL as bigint) AS t2d#xL, cast(t2e#x as float) AS t2e#x, cast(t2f#x as double) AS t2f#x, cast(t2g#x as decimal(4,0)) AS t2g#x, cast(t2h#x as timestamp) AS t2h#x, cast(t2i#x as date) AS t2i#x]\n         +- Project [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n            +- SubqueryAlias t2\n               +- LocalRelation [t2a#x, t2b#x, t2c#x, t2d#xL, t2e#x, t2f#x, t2g#x, t2h#x, t2i#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 39,
-    "stopIndex" : 113,
-    "fragment" : "SELECT t2b\n                   FROM   t2\n                   WHERE  t2b = t1b"
-  } ]
-}
+val1a	16	12	10	15.0	20.0	2000	2014-07-04 01:01:00	2014-07-04
+val1a	16	12	21	15.0	20.0	2000	2014-06-04 01:02:00.001	2014-06-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:00:00	2014-04-04
+val1a	6	8	10	15.0	20.0	2000	2014-04-04 01:02:00.001	2014-04-04
+val1d	10	NULL	12	17.0	25.0	2600	2015-05-04 01:01:00	2015-05-04
+val1d	NULL	16	19	17.0	25.0	2600	2014-07-04 01:02:00.001	NULL
+val1d	NULL	16	22	17.0	25.0	2600	2014-06-04 01:01:00	NULL
+val1e	10	NULL	19	17.0	25.0	2600	2014-05-04 01:01:00	2014-05-04
+val1e	10	NULL	19	17.0	25.0	2600	2014-09-04 01:02:00.001	2014-09-04
+val1e	10	NULL	25	17.0	25.0	2600	2014-08-04 01:01:00	2014-08-04
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
index 85ebd91c28c9c..56932edd4e545 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-group-by.sql.out
@@ -93,6 +93,33 @@ struct<x1:int,x2:int,scalarsubquery(x1, x1):bigint>
 2	2	NULL
 
 
+-- !query
+select *, (select count(*) from y where x1 = y1 and cast(y2 as double) = x1 + 1
+           group by cast(y2 as double)) from x
+-- !query schema
+struct<x1:int,x2:int,scalarsubquery(x1, x1):bigint>
+-- !query output
+1	1	NULL
+2	2	NULL
+
+
+-- !query
+select *, (select count(*) from y where y2 + 1 = x1 + x2 group by y2 + 1) from x
+-- !query schema
+struct<x1:int,x2:int,scalarsubquery(x1, x2):bigint>
+-- !query output
+1	1	NULL
+2	2	NULL
+
+
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.scalarSubqueryUseSingleJoin	false
+
+
 -- !query
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
 -- !query schema
@@ -138,7 +165,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 
 
 -- !query
-select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x
 -- !query schema
 struct<>
 -- !query output
@@ -147,18 +174,71 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "value" : "y1"
+    "value" : "y2"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 11,
-    "stopIndex" : 106,
-    "fragment" : "(select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1)"
+    "stopIndex" : 81,
+    "fragment" : "(select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2)"
   } ]
 }
 
 
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = true
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.scalarSubqueryUseSingleJoin	true
+
+
+-- !query
+select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+  "sqlState" : "21000"
+}
+
+
+-- !query
+select *, (select count(*) from y where y1 + y2 = x1 group by y1) from x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+  "sqlState" : "21000"
+}
+
+
+-- !query
+select *, (select count(*) from y where x1 = y1 and y2 + 10 = x1 + 1 group by y2) from x
+-- !query schema
+struct<x1:int,x2:int,scalarsubquery(x1, x1):bigint>
+-- !query output
+1	1	NULL
+2	2	NULL
+
+
+-- !query
+select *, (select count(*) from (select * from y where y1 = x1 union all select * from y) sub group by y1) from x
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+  "sqlState" : "21000"
+}
+
+
 -- !query
 select *, (select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1) from x
 -- !query schema
@@ -166,17 +246,17 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.NON_CORRELATED_COLUMNS_IN_GROUP_BY",
+  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
   "sqlState" : "0A000",
   "messageParameters" : {
-    "value" : "z1"
+    "treeNode" : "Filter (z1#x = outer(x1#x))\n+- SubqueryAlias z\n   +- View (`z`, [z1#x, z2#x])\n      +- Project [cast(col1#x as int) AS z1#x, cast(col2#x as int) AS z2#x]\n         +- LocalRelation [col1#x, col2#x]\n"
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
-    "startIndex" : 11,
-    "stopIndex" : 103,
-    "fragment" : "(select count(*) from y left join (select * from z where z1 = x1) sub on y2 = z2 group by z1)"
+    "startIndex" : 46,
+    "stopIndex" : 74,
+    "fragment" : "select * from z where z1 = x1"
   } ]
 }
 
@@ -189,6 +269,14 @@ struct<key:string,value:string>
 spark.sql.legacy.scalarSubqueryAllowGroupByNonEqualityCorrelatedPredicate	true
 
 
+-- !query
+set spark.sql.optimizer.scalarSubqueryUseSingleJoin = false
+-- !query schema
+struct<key:string,value:string>
+-- !query output
+spark.sql.optimizer.scalarSubqueryUseSingleJoin	false
+
+
 -- !query
 select * from x where (select count(*) from y where y1 > x1 group by y1) = 1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
index e85fed2417efb..2460c2452ea56 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/scalar-subquery/scalar-subquery-predicate.sql.out
@@ -882,3 +882,35 @@ struct<t0a:int,t0b:int>
 -- !query output
 1	1
 2	0
+
+
+-- !query
+select *
+from range(1, 3) t1
+where (select t2.id c
+       from range (1, 2) t2 where t1.id = t2.id
+      ) between 1 and 2
+-- !query schema
+struct<id:bigint>
+-- !query output
+1
+
+
+-- !query
+SELECT *
+FROM t1
+WHERE (SELECT max(t2c)
+       FROM t2 WHERE t1b = t2b
+      ) between 1 and 2
+-- !query schema
+struct<t1a:int,t1b:int,t1c:int>
+-- !query output
+
+
+
+-- !query
+SELECT * FROM t0 WHERE t0a = (SELECT distinct(t1c) FROM t1 WHERE t1a = t0a)
+-- !query schema
+struct<t0a:int,t0b:int>
+-- !query output
+
diff --git a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-offset.sql.out b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-offset.sql.out
index a38ea1011f627..d547b4272ded9 100644
--- a/sql/core/src/test/resources/sql-tests/results/subquery/subquery-offset.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/subquery/subquery-offset.sql.out
@@ -50,45 +50,17 @@ struct<>
 -- !query
 select * from x where exists (select * from y where x1 = y1 limit 1 offset 2)
 -- !query schema
-struct<>
+struct<x1:int,x2:int>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 31,
-    "stopIndex" : 59,
-    "fragment" : "select * from y where x1 = y1"
-  } ]
-}
+
 
 
 -- !query
 select * from x join lateral (select * from y where x1 = y1 limit 1 offset 2)
 -- !query schema
-struct<>
+struct<x1:int,x2:int,y1:int,y2:int>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 31,
-    "stopIndex" : 59,
-    "fragment" : "select * from y where x1 = y1"
-  } ]
-}
+
 
 
 -- !query
@@ -102,23 +74,41 @@ struct<x1:int,x2:int>
 -- !query
 select * from x where (select sum(y2) from y where x1 = y1 limit 1 offset 2) > 2
 -- !query schema
-struct<>
+struct<x1:int,x2:int>
+-- !query output
+
+
+
+-- !query
+select * from x where exists (select * from y where x1 = y1 offset 2)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+
+
+
+-- !query
+select * from x join lateral (select * from y where x1 = y1 offset 2)
+-- !query schema
+struct<x1:int,x2:int,y1:int,y2:int>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (outer(x1#x) = y1#x)\n+- SubqueryAlias spark_catalog.default.y\n   +- Relation spark_catalog.default.y[y1#x,y2#x] json\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 24,
-    "stopIndex" : 58,
-    "fragment" : "select sum(y2) from y where x1 = y1"
-  } ]
-}
+
+
+
+-- !query
+select * from x where x1 in (select y1 from y offset 2)
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+1	1
+
+
+-- !query
+select * from x where (select sum(y2) from y where x1 = y1 offset 2) > 2
+-- !query schema
+struct<x1:int,x2:int>
+-- !query output
+
 
 
 -- !query
@@ -165,23 +155,9 @@ WHERE EXISTS (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query schema
-struct<>
+struct<emp_name:string>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 42,
-    "stopIndex" : 186,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+
 
 
 -- !query
@@ -195,23 +171,39 @@ JOIN LATERAL (SELECT max(dept.dept_id) a
                    LIMIT 2
                    OFFSET 1)
 -- !query schema
-struct<>
+struct<emp_name:string>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
-  "sqlState" : "0A000",
-  "messageParameters" : {
-    "treeNode" : "Filter (dept_id#x = outer(dept_id#x))\n+- SubqueryAlias dept\n   +- View (`DEPT`, [dept_id#x, dept_name#x, state#x])\n      +- Project [cast(dept_id#x as int) AS dept_id#x, cast(dept_name#x as string) AS dept_name#x, cast(state#x as string) AS state#x]\n         +- Project [dept_id#x, dept_name#x, state#x]\n            +- SubqueryAlias DEPT\n               +- LocalRelation [dept_id#x, dept_name#x, state#x]\n"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 42,
-    "stopIndex" : 186,
-    "fragment" : "SELECT max(dept.dept_id) a\n                   FROM   dept\n                   WHERE  dept.dept_id = emp.dept_id\n                   GROUP  BY state"
-  } ]
-}
+
+
+
+-- !query
+SELECT emp_name
+FROM   emp
+WHERE EXISTS (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1)
+-- !query schema
+struct<emp_name:string>
+-- !query output
+
+
+
+-- !query
+SELECT emp_name
+FROM   emp
+JOIN LATERAL (SELECT max(dept.dept_id) a
+                   FROM   dept
+                   WHERE  dept.dept_id = emp.dept_id
+                   GROUP  BY state
+                   ORDER  BY state
+                   OFFSET 1)
+-- !query schema
+struct<emp_name:string>
+-- !query output
+
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
index b4ab5bdeb4ff8..963505615225a 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ltz.sql.out
@@ -50,9 +50,16 @@ struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
 -- !query
 SELECT make_timestamp_ltz(2021, 07, 11, 6, 30, 60.007)
 -- !query schema
-struct<make_timestamp_ltz(2021, 7, 11, 6, 30, 60.007):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "secAndMicros" : "60.007"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
index 81fa1f1dc3327..3a473dad828a9 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp-ntz.sql.out
@@ -67,9 +67,16 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT make_timestamp_ntz(2021, 07, 11, 6, 30, 60.007)
 -- !query schema
-struct<make_timestamp_ntz(2021, 7, 11, 6, 30, 60.007):timestamp_ntz>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "secAndMicros" : "60.007"
+  }
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
index 0e0b014a3b161..e3cf1a1549228 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestamp.sql.out
@@ -119,9 +119,16 @@ struct<make_timestamp(2021, 7, 11, 6, 30, 45.678, CET):timestamp>
 -- !query
 SELECT make_timestamp(2021, 07, 11, 6, 30, 60.007)
 -- !query schema
-struct<make_timestamp(2021, 7, 11, 6, 30, 60.007):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "INVALID_FRACTION_OF_SECOND",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "secAndMicros" : "60.007"
+  }
+}
 
 
 -- !query
@@ -143,9 +150,17 @@ struct<make_timestamp(1, 1, 1, 1, 1, 60):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 61)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 61):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
+  }
+}
 
 
 -- !query
@@ -167,17 +182,33 @@ struct<make_timestamp(1, 1, 1, 1, 1, 59.999999):timestamp>
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 99.999999)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 99.999999):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
+  }
+}
 
 
 -- !query
 SELECT make_timestamp(1, 1, 1, 1, 1, 999.999999)
 -- !query schema
-struct<make_timestamp(1, 1, 1, 1, 1, 999.999999):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
+  }
+}
 
 
 -- !query
@@ -357,9 +388,17 @@ struct<to_timestamp(1):timestamp>
 -- !query
 select to_timestamp('2019-10-06 10:11:12.', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12., yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.' could not be parsed at index 20"
+  }
+}
 
 
 -- !query
@@ -421,9 +460,17 @@ struct<to_timestamp(2019-10-06 10:11:12.123456PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zz
 -- !query
 select to_timestamp('2019-10-06 10:11:12.1234567PST', 'yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(2019-10-06 10:11:12.1234567PST, yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '2019-10-06 10:11:12.1234567PST' could not be parsed, unparsed text found at index 26"
+  }
+}
 
 
 -- !query
@@ -437,9 +484,17 @@ struct<to_timestamp(123456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:m
 -- !query
 select to_timestamp('223456 2019-10-06 10:11:12.123456PST', 'SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]')
 -- !query schema
-struct<to_timestamp(223456 2019-10-06 10:11:12.123456PST, SSSSSS yyyy-MM-dd HH:mm:ss.SSSSSS[zzz]):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '223456 2019-10-06 10:11:12.123456PST' could not be parsed at index 27"
+  }
+}
 
 
 -- !query
@@ -501,17 +556,33 @@ struct<to_timestamp(12.12342019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestam
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyyyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyyyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 7"
+  }
+}
 
 
 -- !query
 select to_timestamp("12.1232019-10-06S10:11", "ss.SSSSyy-MM-dd'S'HH:mm")
 -- !query schema
-struct<to_timestamp(12.1232019-10-06S10:11, ss.SSSSyy-MM-dd'S'HH:mm):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text '12.1232019-10-06S10:11' could not be parsed at index 9"
+  }
+}
 
 
 -- !query
@@ -581,9 +652,17 @@ struct<to_timestamp(16, dd):timestamp>
 -- !query
 select to_timestamp("02-29", "MM-dd")
 -- !query schema
-struct<to_timestamp(02-29, MM-dd):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Invalid date 'February 29' as '1970' is not a leap year"
+  }
+}
 
 
 -- !query
@@ -629,53 +708,17 @@ struct<(TIMESTAMP '2019-10-06 10:11:12.345678' - DATE '2020-01-01'):interval day
 -- !query
 select timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:10\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - 2011-11-11 11:11:10)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - '2011-11-11 11:11:10'"
-  } ]
-}
+0 00:00:01.000000000
 
 
 -- !query
 select '2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'
 -- !query schema
-struct<>
+struct<(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10'):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"2011-11-11 11:11:11\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(2011-11-11 11:11:11 - TIMESTAMP '2011-11-11 11:11:10')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 61,
-    "fragment" : "'2011-11-11 11:11:11' - timestamp'2011-11-11 11:11:10'"
-  } ]
-}
+0 00:00:01.000000000
 
 
 -- !query
@@ -705,53 +748,17 @@ struct<>
 -- !query
 select str - timestamp'2011-11-11 11:11:11' from ts_view
 -- !query schema
-struct<>
+struct<(str - TIMESTAMP '2011-11-11 11:11:11'):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(str - TIMESTAMP '2011-11-11 11:11:11')\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "str - timestamp'2011-11-11 11:11:11'"
-  } ]
-}
+0 00:00:00.000000000
 
 
 -- !query
 select timestamp'2011-11-11 11:11:11' - str from ts_view
 -- !query schema
-struct<>
+struct<(TIMESTAMP '2011-11-11 11:11:11' - str):interval day to second>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"str\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' - str)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 43,
-    "fragment" : "timestamp'2011-11-11 11:11:11' - str"
-  } ]
-}
+0 00:00:00.000000000
 
 
 -- !query
@@ -761,11 +768,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(TIMESTAMP '2011-11-11 11:11:11' + 1)\""
   },
   "queryContext" : [ {
@@ -785,11 +792,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + TIMESTAMP '2011-11-11 11:11:11')\""
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
index 25aaadfc8e783..681306ba9f405 100644
--- a/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timestampNTZ/timestamp-ansi.sql.out
@@ -126,7 +126,7 @@ org.apache.spark.SparkDateTimeException
   "errorClass" : "INVALID_FRACTION_OF_SECOND",
   "sqlState" : "22023",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\""
+    "secAndMicros" : "60.007"
   }
 }
 
@@ -154,10 +154,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 61"
   }
 }
 
@@ -185,10 +186,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 99"
   }
 }
 
@@ -200,10 +202,11 @@ struct<>
 -- !query output
 org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_2000",
+  "errorClass" : "DATETIME_FIELD_OUT_OF_BOUNDS",
+  "sqlState" : "22023",
   "messageParameters" : {
     "ansiConfig" : "\"spark.sql.ansi.enabled\"",
-    "message" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
+    "rangeMessage" : "Invalid value for SecondOfMinute (valid values 0 - 59): 999"
   }
 }
 
@@ -384,7 +387,6 @@ org.apache.spark.SparkDateTimeException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'1'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"TIMESTAMP_NTZ\""
diff --git a/sql/core/src/test/resources/sql-tests/results/timezone.sql.out b/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
index d34599a49c5ff..5f0fdef50e3db 100644
--- a/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/timezone.sql.out
@@ -80,7 +80,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "3"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -98,7 +102,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "24"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -116,7 +124,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "19"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
@@ -152,7 +164,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0044",
+  "errorClass" : "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+  "sqlState" : "22006",
+  "messageParameters" : {
+    "input" : "36000"
+  },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
index f9f491bd70fd1..a94175b1df39a 100644
--- a/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
@@ -38,7 +38,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format",
-    "sqlExpr" : "\"fromavro(s, 42, map())\""
+    "sqlExpr" : "\"from_avro(s, 42, map())\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -62,7 +62,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format",
-    "sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
+    "sqlExpr" : "\"from_avro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -86,7 +86,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "messageParameters" : {
     "hint" : "",
     "msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format",
-    "sqlExpr" : "\"toavro(s, 42)\""
+    "sqlExpr" : "\"to_avro(s, 42)\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/transform.sql.out b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
index 7975392fd0147..3c704dfcc4618 100644
--- a/sql/core/src/test/resources/sql-tests/results/transform.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/transform.sql.out
@@ -611,11 +611,11 @@ FROM(
 SELECT (b + 1) AS result
 ORDER BY result
 -- !query schema
-struct<result:double>
+struct<result:bigint>
 -- !query output
-3.0
-6.0
-9.0
+3
+6
+9
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
index df1fe996781ad..94048ac8897bb 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out
@@ -82,33 +82,91 @@ NULL
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):decimal(18,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<try_sum((col + 1)):bigint>
+struct<>
 -- !query output
--9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
 
 
 -- !query
@@ -232,33 +290,91 @@ NULL
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):decimal(12,10)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<try_avg((col + 1)):double>
+struct<>
 -- !query output
--4.6116860184273879E18
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out.java21 b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out.java21
index 7affe568234f9..9d3c97baecabd 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out.java21
+++ b/sql/core/src/test/resources/sql-tests/results/try_aggregates.sql.out.java21
@@ -82,33 +82,91 @@ NULL
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):decimal(18,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<try_sum((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_sum(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<try_sum((col + 1)):bigint>
+struct<>
 -- !query output
--9223372036854775806
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
 
 
 -- !query
@@ -232,33 +290,91 @@ NULL
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5), (10), (15) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (5.0), (10.0), (15.0) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):decimal(12,10)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col / 0) FROM VALUES (NULL), (10), (15) AS tab(col)
 -- !query schema
-struct<try_avg((col / 0)):double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 22,
+    "fragment" : "col / 0"
+  } ]
+}
 
 
 -- !query
 SELECT try_avg(col + 1L) FROM VALUES (9223372036854775807L), (1L) AS tab(col)
 -- !query schema
-struct<try_avg((col + 1)):double>
+struct<>
 -- !query output
--4.611686018427388E18
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 16,
+    "stopIndex" : 23,
+    "fragment" : "col + 1L"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
index b12680c2a6751..acf6e70a50dea 100644
--- a/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/try_arithmetic.sql.out
@@ -26,9 +26,9 @@ struct<try_add(2147483647, 1):decimal(11,0)>
 -- !query
 SELECT try_add(2147483647, "1")
 -- !query schema
-struct<try_add(2147483647, 1):double>
+struct<try_add(2147483647, 1):bigint>
 -- !query output
-2.147483648E9
+2147483648
 
 
 -- !query
@@ -58,25 +58,71 @@ NULL
 -- !query
 SELECT try_add(1, (2147483647 + 1))
 -- !query schema
-struct<try_add(1, (2147483647 + 1)):int>
+struct<>
 -- !query output
--2147483647
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 20,
+    "stopIndex" : 33,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
 
 
 -- !query
 SELECT try_add(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<try_add(1, (9223372036854775807 + 1)):bigint>
+struct<>
 -- !query output
--9223372036854775807
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 21,
+    "stopIndex" : 45,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
 
 
 -- !query
 SELECT try_add(1, 1.0 / 0.0)
 -- !query schema
-struct<try_add(1, (1.0 / 0.0)):decimal(9,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 19,
+    "stopIndex" : 27,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
 
 
 -- !query
@@ -244,25 +290,71 @@ NULL
 -- !query
 SELECT try_divide(1, (2147483647 + 1))
 -- !query schema
-struct<try_divide(1, (2147483647 + 1)):double>
+struct<>
 -- !query output
--4.6566128730773926E-10
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 23,
+    "stopIndex" : 36,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
 
 
 -- !query
 SELECT try_divide(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<try_divide(1, (9223372036854775807 + 1)):double>
+struct<>
 -- !query output
--1.0842021724855044E-19
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 48,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
 
 
 -- !query
 SELECT try_divide(1, 1.0 / 0.0)
 -- !query schema
-struct<try_divide(1, (1.0 / 0.0)):decimal(16,9)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 22,
+    "stopIndex" : 30,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
 
 
 -- !query
@@ -356,9 +448,9 @@ struct<try_subtract(2147483647, -1):decimal(11,0)>
 -- !query
 SELECT try_subtract(2147483647, "-1")
 -- !query schema
-struct<try_subtract(2147483647, -1):double>
+struct<try_subtract(2147483647, -1):bigint>
 -- !query output
-2.147483648E9
+2147483648
 
 
 -- !query
@@ -388,25 +480,71 @@ NULL
 -- !query
 SELECT try_subtract(1, (2147483647 + 1))
 -- !query schema
-struct<try_subtract(1, (2147483647 + 1)):int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 38,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
 
 
 -- !query
 SELECT try_subtract(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<try_subtract(1, (9223372036854775807 + 1)):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 50,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
 
 
 -- !query
 SELECT try_subtract(1, 1.0 / 0.0)
 -- !query schema
-struct<try_subtract(1, (1.0 / 0.0)):decimal(9,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 32,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
 
 
 -- !query
@@ -468,9 +606,9 @@ struct<try_multiply(2147483647, -2):decimal(21,0)>
 -- !query
 SELECT try_multiply(2147483647, "-2")
 -- !query schema
-struct<try_multiply(2147483647, -2):double>
+struct<try_multiply(2147483647, -2):bigint>
 -- !query output
--4.294967294E9
+-4294967294
 
 
 -- !query
@@ -500,25 +638,71 @@ NULL
 -- !query
 SELECT try_multiply(1, (2147483647 + 1))
 -- !query schema
-struct<try_multiply(1, (2147483647 + 1)):int>
+struct<>
 -- !query output
--2147483648
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "integer overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 25,
+    "stopIndex" : 38,
+    "fragment" : "2147483647 + 1"
+  } ]
+}
 
 
 -- !query
 SELECT try_multiply(1L, (9223372036854775807L + 1L))
 -- !query schema
-struct<try_multiply(1, (9223372036854775807 + 1)):bigint>
+struct<>
 -- !query output
--9223372036854775808
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "ARITHMETIC_OVERFLOW",
+  "sqlState" : "22003",
+  "messageParameters" : {
+    "alternative" : " Use 'try_add' to tolerate overflow and return NULL instead.",
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "message" : "long overflow"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 26,
+    "stopIndex" : 50,
+    "fragment" : "9223372036854775807L + 1L"
+  } ]
+}
 
 
 -- !query
 SELECT try_multiply(1, 1.0 / 0.0)
 -- !query schema
-struct<try_multiply(1, (1.0 / 0.0)):decimal(10,6)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 24,
+    "stopIndex" : 32,
+    "fragment" : "1.0 / 0.0"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
index ee0536967ad3d..f3263241a5561 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/binaryComparison.sql.out
@@ -10,193 +10,625 @@ struct<>
 -- !query
 SELECT cast(1 as binary) = '1' FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) = 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) > '2' FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) > 2):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) >= '2' FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) >= 2):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) < '2' FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) < 2):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <= '2' FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) <= 2):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <> '2' FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS BINARY) = 2)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) = cast(null as string) FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) = CAST(NULL AS STRING)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) > cast(null as string) FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) > CAST(NULL AS STRING)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) >= cast(null as string) FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) >= CAST(NULL AS STRING)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) < cast(null as string) FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) < CAST(NULL AS STRING)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <= cast(null as string) FROM t
 -- !query schema
-struct<(CAST(1 AS BINARY) <= CAST(NULL AS STRING)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as binary) <> cast(null as string) FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS BINARY) = CAST(NULL AS STRING))):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' = cast(1 as binary) FROM t
 -- !query schema
-struct<(1 = CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' > cast(1 as binary) FROM t
 -- !query schema
-struct<(2 > CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' >= cast(1 as binary) FROM t
 -- !query schema
-struct<(2 >= CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' < cast(1 as binary) FROM t
 -- !query schema
-struct<(2 < CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 14,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' <= cast(1 as binary) FROM t
 -- !query schema
-struct<(2 <= CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT '2' <> cast(1 as binary) FROM t
 -- !query schema
-struct<(NOT (2 = CAST(1 AS BINARY))):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 15,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) = cast(1 as binary) FROM t
 -- !query schema
-struct<(CAST(NULL AS STRING) = CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) > cast(1 as binary) FROM t
 -- !query schema
-struct<(CAST(NULL AS STRING) > CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) >= cast(1 as binary) FROM t
 -- !query schema
-struct<(CAST(NULL AS STRING) >= CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) < cast(1 as binary) FROM t
 -- !query schema
-struct<(CAST(NULL AS STRING) < CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 31,
+    "stopIndex" : 47,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) <= cast(1 as binary) FROM t
 -- !query schema
-struct<(CAST(NULL AS STRING) <= CAST(1 AS BINARY)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(null as string) <> cast(1 as binary) FROM t
 -- !query schema
-struct<(NOT (CAST(NULL AS STRING) = CAST(1 AS BINARY))):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.CAST_WITH_CONF_SUGGESTION",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\"",
+    "configVal" : "'false'",
+    "sqlExpr" : "\"CAST(1 AS BINARY)\"",
+    "srcType" : "\"INT\"",
+    "targetType" : "\"BINARY\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 32,
+    "stopIndex" : 48,
+    "fragment" : "cast(1 as binary)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
index 9b363b15c507f..f83284e9bf6f0 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/booleanEquality.sql.out
@@ -10,57 +10,169 @@ struct<>
 -- !query
 SELECT true = cast(1 as tinyint) FROM t
 -- !query schema
-struct<(true = CAST(1 AS TINYINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "true = cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as smallint) FROM t
 -- !query schema
-struct<(true = CAST(1 AS SMALLINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true = cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as int) FROM t
 -- !query schema
-struct<(true = CAST(1 AS INT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "true = cast(1 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as bigint) FROM t
 -- !query schema
-struct<(true = CAST(1 AS BIGINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "true = cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as float) FROM t
 -- !query schema
-struct<(true = CAST(1 AS FLOAT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(true = CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "true = cast(1 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as double) FROM t
 -- !query schema
-struct<(true = CAST(1 AS DOUBLE)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(true = CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "true = cast(1 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT true = cast(1 as decimal(10, 0)) FROM t
 -- !query schema
-struct<(true = CAST(1 AS DECIMAL(10,0))):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(true = CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "true = cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -154,57 +266,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT true <=> cast(1 as tinyint) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS TINYINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "true <=> cast(1 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as smallint) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS SMALLINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "true <=> cast(1 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as int) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS INT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "true <=> cast(1 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as bigint) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS BIGINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true <=> cast(1 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as float) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS FLOAT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "true <=> cast(1 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as double) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS DOUBLE)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "true <=> cast(1 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT true <=> cast(1 as decimal(10, 0)) FROM t
 -- !query schema
-struct<(true <=> CAST(1 AS DECIMAL(10,0))):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(true <=> CAST(1 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "true <=> cast(1 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -298,57 +522,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint) = true FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) = true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(1 as tinyint) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as smallint) = true FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) = true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as smallint) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as int) = true FROM t
 -- !query schema
-struct<(CAST(1 AS INT) = true):boolean>
--- !query output
-true
-
-
--- !query
-SELECT cast(1 as bigint) = true FROM t
--- !query schema
-struct<(CAST(1 AS BIGINT) = true):boolean>
+struct<>
 -- !query output
-true
-
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 28,
+    "fragment" : "cast(1 as int) = true"
+  } ]
+}
+
+
+-- !query
+SELECT cast(1 as bigint) = true FROM t
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as bigint) = true"
+  } ]
+}
+
 
 -- !query
 SELECT cast(1 as float) = true FROM t
 -- !query schema
-struct<(CAST(1 AS FLOAT) = true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as float) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as double) = true FROM t
 -- !query schema
-struct<(CAST(1 AS DOUBLE) = true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(1 as double) = true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) = true FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(10,0)) = true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 39,
+    "fragment" : "cast(1 as decimal(10, 0)) = true"
+  } ]
+}
 
 
 -- !query
@@ -442,57 +778,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS TINYINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(1 as tinyint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as smallint) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS SMALLINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast(1 as smallint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as int) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS INT) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS INT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 30,
+    "fragment" : "cast(1 as int) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as bigint) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS BIGINT) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS BIGINT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as bigint) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as float) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS FLOAT) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS FLOAT) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(1 as float) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as double) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS DOUBLE) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DOUBLE) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(1 as double) <=> true"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <=> true FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(10,0)) <=> true):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> true)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 41,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> true"
+  } ]
+}
 
 
 -- !query
@@ -586,57 +1034,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT false = cast(0 as tinyint) FROM t
 -- !query schema
-struct<(false = CAST(0 AS TINYINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "false = cast(0 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as smallint) FROM t
 -- !query schema
-struct<(false = CAST(0 AS SMALLINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false = cast(0 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as int) FROM t
 -- !query schema
-struct<(false = CAST(0 AS INT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "false = cast(0 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as bigint) FROM t
 -- !query schema
-struct<(false = CAST(0 AS BIGINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "false = cast(0 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as float) FROM t
 -- !query schema
-struct<(false = CAST(0 AS FLOAT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(false = CAST(0 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "false = cast(0 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as double) FROM t
 -- !query schema
-struct<(false = CAST(0 AS DOUBLE)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(false = CAST(0 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "false = cast(0 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT false = cast(0 as decimal(10, 0)) FROM t
 -- !query schema
-struct<(false = CAST(0 AS DECIMAL(10,0))):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(false = CAST(0 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "false = cast(0 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -730,57 +1290,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT false <=> cast(0 as tinyint) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS TINYINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"TINYINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS TINYINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "false <=> cast(0 as tinyint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as smallint) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS SMALLINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"SMALLINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS SMALLINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "false <=> cast(0 as smallint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as int) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS INT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"INT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS INT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "false <=> cast(0 as int)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as bigint) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS BIGINT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"BIGINT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS BIGINT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false <=> cast(0 as bigint)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as float) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS FLOAT)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"FLOAT\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS FLOAT))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "false <=> cast(0 as float)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as double) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS DOUBLE)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DOUBLE\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS DOUBLE))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "false <=> cast(0 as double)"
+  } ]
+}
 
 
 -- !query
 SELECT false <=> cast(0 as decimal(10, 0)) FROM t
 -- !query schema
-struct<(false <=> CAST(0 AS DECIMAL(10,0))):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BOOLEAN\"",
+    "right" : "\"DECIMAL(10,0)\"",
+    "sqlExpr" : "\"(false <=> CAST(0 AS DECIMAL(10,0)))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "false <=> cast(0 as decimal(10, 0))"
+  } ]
+}
 
 
 -- !query
@@ -874,57 +1546,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(0 as tinyint) = false FROM t
 -- !query schema
-struct<(CAST(0 AS TINYINT) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS TINYINT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(0 as tinyint) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as smallint) = false FROM t
 -- !query schema
-struct<(CAST(0 AS SMALLINT) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS SMALLINT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as smallint) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as int) = false FROM t
 -- !query schema
-struct<(CAST(0 AS INT) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS INT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 29,
+    "fragment" : "cast(0 as int) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as bigint) = false FROM t
 -- !query schema
-struct<(CAST(0 AS BIGINT) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BIGINT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(0 as bigint) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as float) = false FROM t
 -- !query schema
-struct<(CAST(0 AS FLOAT) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS FLOAT) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(0 as float) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as double) = false FROM t
 -- !query schema
-struct<(CAST(0 AS DOUBLE) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DOUBLE) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 32,
+    "fragment" : "cast(0 as double) = false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as decimal(10, 0)) = false FROM t
 -- !query schema
-struct<(CAST(0 AS DECIMAL(10,0)) = false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DECIMAL(10,0)) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 40,
+    "fragment" : "cast(0 as decimal(10, 0)) = false"
+  } ]
+}
 
 
 -- !query
@@ -1018,57 +1802,169 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(0 as tinyint) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS TINYINT) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"TINYINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS TINYINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 35,
+    "fragment" : "cast(0 as tinyint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as smallint) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS SMALLINT) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"SMALLINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS SMALLINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 36,
+    "fragment" : "cast(0 as smallint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as int) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS INT) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS INT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 31,
+    "fragment" : "cast(0 as int) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as bigint) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS BIGINT) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"BIGINT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS BIGINT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as bigint) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as float) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS FLOAT) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"FLOAT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS FLOAT) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 33,
+    "fragment" : "cast(0 as float) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as double) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS DOUBLE) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DOUBLE\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DOUBLE) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 34,
+    "fragment" : "cast(0 as double) <=> false"
+  } ]
+}
 
 
 -- !query
 SELECT cast(0 as decimal(10, 0)) <=> false FROM t
 -- !query schema
-struct<(CAST(0 AS DECIMAL(10,0)) <=> false):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(0 AS DECIMAL(10,0)) <=> false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 42,
+    "fragment" : "cast(0 as decimal(10, 0)) <=> false"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
index 7973d11573767..35ff9e79d9808 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/caseWhenCoercion.sql.out
@@ -42,7 +42,7 @@ struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS BIGINT) END:bigint>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as float) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS FLOAT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS FLOAT) END:double>
 -- !query output
 1.0
 
@@ -66,7 +66,7 @@ struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS DECIMAL(10,0)) END:
 -- !query
 SELECT CASE WHEN true THEN cast(1 as tinyint) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS TINYINT) ELSE CAST(2 AS STRING) END:bigint>
 -- !query output
 1
 
@@ -202,7 +202,7 @@ struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS BIGINT) END:bigint
 -- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as float) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS FLOAT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS FLOAT) END:double>
 -- !query output
 1.0
 
@@ -226,7 +226,7 @@ struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS DECIMAL(10,0)) END
 -- !query
 SELECT CASE WHEN true THEN cast(1 as smallint) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS SMALLINT) ELSE CAST(2 AS STRING) END:bigint>
 -- !query output
 1
 
@@ -362,7 +362,7 @@ struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS BIGINT) END:bigint>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as float) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS FLOAT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS FLOAT) END:double>
 -- !query output
 1.0
 
@@ -386,7 +386,7 @@ struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS DECIMAL(10,0)) END:deci
 -- !query
 SELECT CASE WHEN true THEN cast(1 as int) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS INT) ELSE CAST(2 AS STRING) END:bigint>
 -- !query output
 1
 
@@ -522,7 +522,7 @@ struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS BIGINT) END:bigint>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as float) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS FLOAT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS FLOAT) END:double>
 -- !query output
 1.0
 
@@ -546,7 +546,7 @@ struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS DECIMAL(10,0)) END:d
 -- !query
 SELECT CASE WHEN true THEN cast(1 as bigint) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS BIGINT) ELSE CAST(2 AS STRING) END:bigint>
 -- !query output
 1
 
@@ -650,7 +650,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as tinyint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS TINYINT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS TINYINT) END:double>
 -- !query output
 1.0
 
@@ -658,7 +658,7 @@ struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS TINYINT) END:float>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as smallint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS SMALLINT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS SMALLINT) END:double>
 -- !query output
 1.0
 
@@ -666,7 +666,7 @@ struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS SMALLINT) END:float>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as int) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS INT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS INT) END:double>
 -- !query output
 1.0
 
@@ -674,7 +674,7 @@ struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS INT) END:float>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as bigint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BIGINT) END:float>
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS BIGINT) END:double>
 -- !query output
 1.0
 
@@ -706,7 +706,7 @@ struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS DECIMAL(10,0)) END:do
 -- !query
 SELECT CASE WHEN true THEN cast(1 as float) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS FLOAT) ELSE CAST(2 AS STRING) END:double>
 -- !query output
 1.0
 
@@ -866,7 +866,7 @@ struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS DECIMAL(10,0)) END:d
 -- !query
 SELECT CASE WHEN true THEN cast(1 as double) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS DOUBLE) ELSE CAST(2 AS STRING) END:double>
 -- !query output
 1.0
 
@@ -1026,9 +1026,9 @@ struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS DECIMAL(10,0)
 -- !query
 SELECT CASE WHEN true THEN cast(1 as decimal(10, 0)) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(1 AS DECIMAL(10,0)) ELSE CAST(2 AS STRING) END:double>
 -- !query output
-1
+1.0
 
 
 -- !query
@@ -1130,7 +1130,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as tinyint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS TINYINT) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS TINYINT) END:bigint>
 -- !query output
 1
 
@@ -1138,7 +1138,7 @@ struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS TINYINT) END:string>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as smallint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS SMALLINT) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS SMALLINT) END:bigint>
 -- !query output
 1
 
@@ -1146,7 +1146,7 @@ struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS SMALLINT) END:string
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as int) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS INT) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS INT) END:bigint>
 -- !query output
 1
 
@@ -1154,7 +1154,7 @@ struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS INT) END:string>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as bigint) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BIGINT) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BIGINT) END:bigint>
 -- !query output
 1
 
@@ -1162,25 +1162,25 @@ struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BIGINT) END:string>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as float) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS FLOAT) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS FLOAT) END:double>
 -- !query output
-1
+1.0
 
 
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as double) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DOUBLE) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DOUBLE) END:double>
 -- !query output
-1
+1.0
 
 
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as decimal(10, 0)) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DECIMAL(10,0)) END:string>
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS DECIMAL(10,0)) END:double>
 -- !query output
-1
+1.0
 
 
 -- !query
@@ -1194,67 +1194,67 @@ struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS STRING) END:string>
 -- !query
 SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END FROM t
 -- !query schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BINARY) END:binary>
+-- !query output
+1
+
+
+-- !query
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
+-- !query schema
+struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END:boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
+-- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BINARY) END\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 73,
-    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast('2' as binary) END"
+    "stopIndex" : 96,
+    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END"
   } ]
 }
 
 
 -- !query
-SELECT CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END FROM t
+SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2 AS BOOLEAN) END\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 72,
-    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast(2 as boolean) END"
+    "stopIndex" : 89,
+    "fragment" : "CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END"
   } ]
 }
 
 
--- !query
-SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00.0' as timestamp) END FROM t
--- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) END:string>
--- !query output
-1
-
-
--- !query
-SELECT CASE WHEN true THEN cast(1 as string) ELSE cast('2017-12-11 09:30:00' as date) END FROM t
--- !query schema
-struct<CASE WHEN true THEN CAST(1 AS STRING) ELSE CAST(2017-12-11 09:30:00 AS DATE) END:string>
--- !query output
-1
-
-
 -- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as tinyint) END FROM t
 -- !query schema
@@ -1426,25 +1426,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<>
+struct<CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS STRING) END:binary>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BINARY) ELSE CAST(2 AS STRING) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 73,
-    "fragment" : "CASE WHEN true THEN cast('1' as binary) ELSE cast(2 as string) END"
-  } ]
-}
+1
 
 
 -- !query
@@ -1698,25 +1682,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<>
+struct<CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END:boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`casewhen`",
-    "sqlExpr" : "\"CASE WHEN true THEN CAST(1 AS BOOLEAN) ELSE CAST(2 AS STRING) END\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 72,
-    "fragment" : "CASE WHEN true THEN cast(1 as boolean) ELSE cast(2 as string) END"
-  } ]
-}
+true
 
 
 -- !query
@@ -1970,7 +1938,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00.0' as timestamp) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) ELSE CAST(2 AS STRING) END:timestamp>
 -- !query output
 2017-12-12 09:30:00
 
@@ -2210,7 +2178,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT CASE WHEN true THEN cast('2017-12-12 09:30:00' as date) ELSE cast(2 as string) END FROM t
 -- !query schema
-struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS STRING) END:string>
+struct<CASE WHEN true THEN CAST(2017-12-12 09:30:00 AS DATE) ELSE CAST(2 AS STRING) END:date>
 -- !query output
 2017-12-12
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
index bb02058f6c4ad..0f42834d28246 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/concat.sql.out
@@ -311,6 +311,22 @@ SELECT
     (string_array1 || int_array2) sti_array
 FROM various_arrays
 -- !query schema
-struct<ts_array:array<smallint>,si_array:array<int>,ib_array:array<bigint>,bd_array:array<decimal(20,0)>,dd_array:array<double>,df_array:array<double>,std_array:array<string>,tst_array:array<string>,sti_array:array<string>>
+struct<>
 -- !query output
-[2,1,3,4]	[2,1,3,4]	[2,1,3,4]	[2,1,9223372036854775808,9223372036854775809]	[9.223372036854776E18,9.223372036854776E18,3.0,4.0]	[2.0,1.0,3.0,4.0]	["a","b","2016-03-12","2016-03-11"]	["2016-11-15 20:54:00","2016-11-12 20:54:00","c","d"]	["a","b","3","4"]
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 295,
+    "stopIndex" : 322,
+    "fragment" : "string_array1 || data_array2"
+  } ]
+}
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
index b23e57c470fe4..54e26851ba57e 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out
@@ -7834,33 +7834,97 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  = cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(3, 0))  = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  = cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(5, 0))  = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) = cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(10, 0)) = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) = cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 53,
+    "fragment" : "cast(1 as decimal(20, 0)) = cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -8922,33 +8986,97 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  <=> cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(3, 0))  <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  <=> cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(5, 0))  <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <=> cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(10, 0)) <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) <=> cast(1 as boolean) FROM t
 -- !query schema
-struct<(CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS BOOLEAN)):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) <=> CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast(1 as decimal(20, 0)) <=> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
@@ -14618,33 +14746,97 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as decimal(3, 0))  <> cast(1 as boolean) FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(3,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(3,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(3, 0))  <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(5, 0))  <> cast(1 as boolean) FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(5,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(5,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(5, 0))  <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(10, 0)) <> cast(1 as boolean) FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(10,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(10,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(10, 0)) <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
 SELECT cast(1 as decimal(20, 0)) <> cast(1 as boolean) FROM t
 -- !query schema
-struct<(NOT (CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"DECIMAL(20,0)\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(CAST(1 AS DECIMAL(20,0)) = CAST(1 AS BOOLEAN))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 54,
+    "fragment" : "cast(1 as decimal(20, 0)) <> cast(1 as boolean)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
index 5f16135fcaf4f..cf6931a4ffdaa 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/division.sql.out
@@ -1186,9 +1186,25 @@ struct<(CAST(1 AS STRING) / CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT cast(1 as string) / cast(1 as string) FROM t
 -- !query schema
-struct<(CAST(1 AS STRING) / CAST(1 AS STRING)):double>
+struct<>
 -- !query output
-1.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS STRING))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 44,
+    "fragment" : "cast(1 as string) / cast(1 as string)"
+  } ]
+}
 
 
 -- !query
@@ -1198,11 +1214,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -1222,11 +1238,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -1246,11 +1262,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -1270,11 +1286,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS STRING) / CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -1462,11 +1478,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -1750,11 +1766,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -2038,11 +2054,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
@@ -2326,11 +2342,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / CAST(1 AS STRING))\""
   },
   "queryContext" : [ {
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
index c0c3cefab8413..b2e0f50028a4e 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/ifCoercion.sql.out
@@ -42,7 +42,7 @@ struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS BIGINT))):bigint>
 -- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as float)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS FLOAT))):float>
+struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS FLOAT))):double>
 -- !query output
 1.0
 
@@ -66,7 +66,7 @@ struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
 -- !query
 SELECT IF(true, cast(1 as tinyint), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS TINYINT), CAST(2 AS STRING))):bigint>
 -- !query output
 1
 
@@ -202,7 +202,7 @@ struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS BIGINT))):bigint>
 -- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as float)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS FLOAT))):float>
+struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS FLOAT))):double>
 -- !query output
 1.0
 
@@ -226,7 +226,7 @@ struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
 -- !query
 SELECT IF(true, cast(1 as smallint), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS SMALLINT), CAST(2 AS STRING))):bigint>
 -- !query output
 1
 
@@ -362,7 +362,7 @@ struct<(IF(true, CAST(1 AS INT), CAST(2 AS BIGINT))):bigint>
 -- !query
 SELECT IF(true, cast(1 as int), cast(2 as float)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS INT), CAST(2 AS FLOAT))):float>
+struct<(IF(true, CAST(1 AS INT), CAST(2 AS FLOAT))):double>
 -- !query output
 1.0
 
@@ -386,7 +386,7 @@ struct<(IF(true, CAST(1 AS INT), CAST(2 AS DECIMAL(10,0)))):decimal(10,0)>
 -- !query
 SELECT IF(true, cast(1 as int), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS INT), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS INT), CAST(2 AS STRING))):bigint>
 -- !query output
 1
 
@@ -522,7 +522,7 @@ struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS BIGINT))):bigint>
 -- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as float)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS FLOAT))):float>
+struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS FLOAT))):double>
 -- !query output
 1.0
 
@@ -546,7 +546,7 @@ struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS DECIMAL(10,0)))):decimal(20,0)>
 -- !query
 SELECT IF(true, cast(1 as bigint), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS BIGINT), CAST(2 AS STRING))):bigint>
 -- !query output
 1
 
@@ -650,7 +650,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as tinyint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS TINYINT))):float>
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS TINYINT))):double>
 -- !query output
 1.0
 
@@ -658,7 +658,7 @@ struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS TINYINT))):float>
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as smallint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS SMALLINT))):float>
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS SMALLINT))):double>
 -- !query output
 1.0
 
@@ -666,7 +666,7 @@ struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS SMALLINT))):float>
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as int)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS INT))):float>
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS INT))):double>
 -- !query output
 1.0
 
@@ -674,7 +674,7 @@ struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS INT))):float>
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as bigint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS BIGINT))):float>
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS BIGINT))):double>
 -- !query output
 1.0
 
@@ -706,7 +706,7 @@ struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS DECIMAL(10,0)))):double>
 -- !query
 SELECT IF(true, cast(1 as float), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS FLOAT), CAST(2 AS STRING))):double>
 -- !query output
 1.0
 
@@ -866,7 +866,7 @@ struct<(IF(true, CAST(1 AS DOUBLE), CAST(2 AS DECIMAL(10,0)))):double>
 -- !query
 SELECT IF(true, cast(1 as double), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS DOUBLE), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS DOUBLE), CAST(2 AS STRING))):double>
 -- !query output
 1.0
 
@@ -1026,9 +1026,9 @@ struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS DECIMAL(10,0)))):decimal(10
 -- !query
 SELECT IF(true, cast(1 as decimal(10, 0)), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(1 AS DECIMAL(10,0)), CAST(2 AS STRING))):double>
 -- !query output
-1
+1.0
 
 
 -- !query
@@ -1130,7 +1130,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as tinyint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS TINYINT))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS TINYINT))):bigint>
 -- !query output
 1
 
@@ -1138,7 +1138,7 @@ struct<(IF(true, CAST(1 AS STRING), CAST(2 AS TINYINT))):string>
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as smallint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS SMALLINT))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS SMALLINT))):bigint>
 -- !query output
 1
 
@@ -1146,7 +1146,7 @@ struct<(IF(true, CAST(1 AS STRING), CAST(2 AS SMALLINT))):string>
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as int)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS INT))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS INT))):bigint>
 -- !query output
 1
 
@@ -1154,7 +1154,7 @@ struct<(IF(true, CAST(1 AS STRING), CAST(2 AS INT))):string>
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as bigint)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS BIGINT))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS BIGINT))):bigint>
 -- !query output
 1
 
@@ -1162,25 +1162,25 @@ struct<(IF(true, CAST(1 AS STRING), CAST(2 AS BIGINT))):string>
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as float)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS FLOAT))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS FLOAT))):double>
 -- !query output
-1
+1.0
 
 
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as double)) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS DOUBLE))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS DOUBLE))):double>
 -- !query output
-1
+1.0
 
 
 -- !query
 SELECT IF(true, cast(1 as string), cast(2 as decimal(10, 0))) FROM t
 -- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2 AS DECIMAL(10,0)))):string>
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS DECIMAL(10,0)))):double>
 -- !query output
-1
+1.0
 
 
 -- !query
@@ -1194,67 +1194,67 @@ struct<(IF(true, CAST(1 AS STRING), CAST(2 AS STRING))):string>
 -- !query
 SELECT IF(true, cast(1 as string), cast('2' as binary)) FROM t
 -- !query schema
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS BINARY))):binary>
+-- !query output
+1
+
+
+-- !query
+SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
+-- !query schema
+struct<(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN))):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BINARY)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "IF(true, cast(1 as string), cast('2' as binary))"
+    "stopIndex" : 78,
+    "fragment" : "IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp))"
   } ]
 }
 
 
 -- !query
-SELECT IF(true, cast(1 as string), cast(2 as boolean)) FROM t
+SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS STRING), CAST(2 AS BOOLEAN)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "IF(true, cast(1 as string), cast(2 as boolean))"
+    "stopIndex" : 71,
+    "fragment" : "IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date))"
   } ]
 }
 
 
--- !query
-SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):string>
--- !query output
-1
-
-
--- !query
-SELECT IF(true, cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query schema
-struct<(IF(true, CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE))):string>
--- !query output
-1
-
-
 -- !query
 SELECT IF(true, cast('1' as binary), cast(2 as tinyint)) FROM t
 -- !query schema
@@ -1426,25 +1426,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('1' as binary), cast(2 as string)) FROM t
 -- !query schema
-struct<>
+struct<(IF(true, CAST(1 AS BINARY), CAST(2 AS STRING))):binary>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS BINARY), CAST(2 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 55,
-    "fragment" : "IF(true, cast('1' as binary), cast(2 as string))"
-  } ]
-}
+1
 
 
 -- !query
@@ -1698,25 +1682,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast(1 as boolean), cast(2 as string)) FROM t
 -- !query schema
-struct<>
+struct<(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING))):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`if`",
-    "sqlExpr" : "\"(IF(true, CAST(1 AS BOOLEAN), CAST(2 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 8,
-    "stopIndex" : 54,
-    "fragment" : "IF(true, cast(1 as boolean), cast(2 as string))"
-  } ]
-}
+true
 
 
 -- !query
@@ -1970,7 +1938,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('2017-12-12 09:30:00.0' as timestamp), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(2017-12-12 09:30:00.0 AS TIMESTAMP), CAST(2 AS STRING))):timestamp>
 -- !query output
 2017-12-12 09:30:00
 
@@ -2210,7 +2178,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT IF(true, cast('2017-12-12 09:30:00' as date), cast(2 as string)) FROM t
 -- !query schema
-struct<(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS STRING))):string>
+struct<(IF(true, CAST(2017-12-12 09:30:00 AS DATE), CAST(2 AS STRING))):date>
 -- !query output
 2017-12-12
 
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
index a746500c746f9..bb75fe5991acf 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/implicitTypeCasts.sql.out
@@ -10,25 +10,25 @@ struct<>
 -- !query
 SELECT 1 + '2' FROM t
 -- !query schema
-struct<(1 + 2):double>
+struct<(1 + 2):bigint>
 -- !query output
-3.0
+3
 
 
 -- !query
 SELECT 1 - '2' FROM t
 -- !query schema
-struct<(1 - 2):double>
+struct<(1 - 2):bigint>
 -- !query output
--1.0
+-1
 
 
 -- !query
 SELECT 1 * '2' FROM t
 -- !query schema
-struct<(1 * 2):double>
+struct<(1 * 2):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
index 3ca78fa17a64b..7c9152a66a9c1 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/inConversion.sql.out
@@ -708,7 +708,7 @@ SELECT cast(1 as float) in (cast(1 as string)) FROM t
 -- !query schema
 struct<(CAST(1 AS FLOAT) IN (CAST(1 AS STRING))):boolean>
 -- !query output
-false
+true
 
 
 -- !query
@@ -868,7 +868,7 @@ SELECT cast(1 as double) in (cast(1 as string)) FROM t
 -- !query schema
 struct<(CAST(1 AS DOUBLE) IN (CAST(1 AS STRING))):boolean>
 -- !query output
-false
+true
 
 
 -- !query
@@ -1164,7 +1164,7 @@ SELECT cast(1 as string) in (cast(1 as float)) FROM t
 -- !query schema
 struct<(CAST(1 AS STRING) IN (CAST(1 AS FLOAT))):boolean>
 -- !query output
-false
+true
 
 
 -- !query
@@ -1172,7 +1172,7 @@ SELECT cast(1 as string) in (cast(1 as double)) FROM t
 -- !query schema
 struct<(CAST(1 AS STRING) IN (CAST(1 AS DOUBLE))):boolean>
 -- !query output
-false
+true
 
 
 -- !query
@@ -1194,67 +1194,67 @@ true
 -- !query
 SELECT cast(1 as string) in (cast('1' as binary)) FROM t
 -- !query schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS BINARY))):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
+-- !query schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN))):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BINARY\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BINARY)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 26,
-    "stopIndex" : 49,
-    "fragment" : "in (cast('1' as binary))"
+    "stopIndex" : 72,
+    "fragment" : "in (cast('2017-12-11 09:30:00.0' as timestamp))"
   } ]
 }
 
 
 -- !query
-SELECT cast(1 as string) in (cast(1 as boolean)) FROM t
+SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS BOOLEAN)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 26,
-    "stopIndex" : 48,
-    "fragment" : "in (cast(1 as boolean))"
+    "stopIndex" : 65,
+    "fragment" : "in (cast('2017-12-11 09:30:00' as date))"
   } ]
 }
 
 
--- !query
-SELECT cast(1 as string) in (cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query schema
-struct<(CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
--- !query output
-false
-
-
--- !query
-SELECT cast(1 as string) in (cast('2017-12-11 09:30:00' as date)) FROM t
--- !query schema
-struct<(CAST(1 AS STRING) IN (CAST(2017-12-11 09:30:00 AS DATE))):boolean>
--- !query output
-false
-
-
 -- !query
 SELECT cast('1' as binary) in (cast(1 as tinyint)) FROM t
 -- !query schema
@@ -1426,25 +1426,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) in (cast(1 as string)) FROM t
 -- !query schema
-struct<>
+struct<(CAST(1 AS BINARY) IN (CAST(1 AS STRING))):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 28,
-    "stopIndex" : 49,
-    "fragment" : "in (cast(1 as string))"
-  } ]
-}
+true
 
 
 -- !query
@@ -1698,25 +1682,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT true in (cast(1 as string)) FROM t
 -- !query schema
-struct<>
+struct<(true IN (CAST(1 AS STRING))):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(true IN (CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 13,
-    "stopIndex" : 34,
-    "fragment" : "in (cast(1 as string))"
-  } ]
-}
+true
 
 
 -- !query
@@ -1970,9 +1938,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) in (cast(2 as string)) FROM t
 -- !query schema
-struct<(CAST(2017-12-12 09:30:00.0 AS TIMESTAMP) IN (CAST(2 AS STRING))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 72,
+    "fragment" : "in (cast(2 as string))"
+  } ]
+}
 
 
 -- !query
@@ -2210,9 +2194,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00' as date) in (cast(2 as string)) FROM t
 -- !query schema
-struct<(CAST(2017-12-12 09:30:00 AS DATE) IN (CAST(2 AS STRING))):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 44,
+    "stopIndex" : 65,
+    "fragment" : "in (cast(2 as string))"
+  } ]
+}
 
 
 -- !query
@@ -3466,67 +3466,67 @@ true
 -- !query
 SELECT cast(1 as string) in (cast(1 as string), cast('1' as binary)) FROM t
 -- !query schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BINARY))):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
+-- !query schema
+struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN))):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
+-- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"STRING\", \"BINARY\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BINARY)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 26,
-    "stopIndex" : 68,
-    "fragment" : "in (cast(1 as string), cast('1' as binary))"
+    "stopIndex" : 91,
+    "fragment" : "in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp))"
   } ]
 }
 
 
 -- !query
-SELECT cast(1 as string) in (cast(1 as string), cast(1 as boolean)) FROM t
+SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "dataType" : "[\"STRING\", \"STRING\", \"BOOLEAN\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(1 AS BOOLEAN)))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 26,
-    "stopIndex" : 67,
-    "fragment" : "in (cast(1 as string), cast(1 as boolean))"
+    "stopIndex" : 84,
+    "fragment" : "in (cast(1 as string), cast('2017-12-11 09:30:00' as date))"
   } ]
 }
 
 
--- !query
-SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00.0' as timestamp)) FROM t
--- !query schema
-struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
--- !query output
-true
-
-
--- !query
-SELECT cast(1 as string) in (cast(1 as string), cast('2017-12-11 09:30:00' as date)) FROM t
--- !query schema
-struct<(CAST(1 AS STRING) IN (CAST(1 AS STRING), CAST(2017-12-11 09:30:00 AS DATE))):boolean>
--- !query output
-true
-
-
 -- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as tinyint)) FROM t
 -- !query schema
@@ -3698,25 +3698,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) in (cast('1' as binary), cast(1 as string)) FROM t
 -- !query schema
-struct<>
+struct<(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS STRING))):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BINARY\", \"BINARY\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BINARY) IN (CAST(1 AS BINARY), CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 28,
-    "stopIndex" : 70,
-    "fragment" : "in (cast('1' as binary), cast(1 as string))"
-  } ]
-}
+true
 
 
 -- !query
@@ -3970,25 +3954,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as boolean) in (cast('1' as boolean), cast(1 as string)) FROM t
 -- !query schema
-struct<>
+struct<(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS STRING))):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "dataType" : "[\"BOOLEAN\", \"BOOLEAN\", \"STRING\"]",
-    "functionName" : "`in`",
-    "sqlExpr" : "\"(CAST(1 AS BOOLEAN) IN (CAST(1 AS BOOLEAN), CAST(1 AS STRING)))\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 29,
-    "stopIndex" : 72,
-    "fragment" : "in (cast('1' as boolean), cast(1 as string))"
-  } ]
-}
+true
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
index 5c00e8a5b63db..a0bd111f6ba64 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapZipWith.sql.out
@@ -124,36 +124,104 @@ struct<m:map<double,struct<k:double,v1:decimal(36,35),v2:double>>>
 SELECT map_zip_with(string_map1, int_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query schema
-struct<m:map<string,struct<k:string,v1:string,v2:int>>>
+struct<>
 -- !query output
-{"2":{"k":"2","v1":"1","v2":1},"true":{"k":"true","v1":"false","v2":null}}
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"STRING\"",
+    "rightType" : "\"INT\"",
+    "sqlExpr" : "\"map_zip_with(string_map1, int_map, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 75,
+    "fragment" : "map_zip_with(string_map1, int_map, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(string_map2, date_map, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query schema
-struct<m:map<string,struct<k:string,v1:string,v2:date>>>
+struct<>
 -- !query output
-{"2016-03-14":{"k":"2016-03-14","v1":"2016-03-13","v2":2016-03-13}}
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"STRING\"",
+    "rightType" : "\"DATE\"",
+    "sqlExpr" : "\"map_zip_with(string_map2, date_map, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 76,
+    "fragment" : "map_zip_with(string_map2, date_map, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(timestamp_map, string_map3, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query schema
-struct<m:map<string,struct<k:string,v1:timestamp,v2:string>>>
+struct<>
 -- !query output
-{"2016-11-15 20:54:00":{"k":"2016-11-15 20:54:00","v1":2016-11-12 20:54:00,"v2":null},"2016-11-15 20:54:00.000":{"k":"2016-11-15 20:54:00.000","v1":null,"v2":"2016-11-12 20:54:00.000"}}
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"TIMESTAMP\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_zip_with(timestamp_map, string_map3, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 81,
+    "fragment" : "map_zip_with(timestamp_map, string_map3, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
 SELECT map_zip_with(decimal_map1, string_map4, (k, v1, v2) -> struct(k, v1, v2)) m
 FROM various_maps
 -- !query schema
-struct<m:map<string,struct<k:string,v1:decimal(36,0),v2:string>>>
+struct<>
 -- !query output
-{"922337203685477897945456575809789456":{"k":"922337203685477897945456575809789456","v1":922337203685477897945456575809789456,"v2":"text"}}
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "functionName" : "`map_zip_with`",
+    "leftType" : "\"DECIMAL(36,0)\"",
+    "rightType" : "\"STRING\"",
+    "sqlExpr" : "\"map_zip_with(decimal_map1, string_map4, lambdafunction(struct(k, v1, v2), k, v1, v2))\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 80,
+    "fragment" : "map_zip_with(decimal_map1, string_map4, (k, v1, v2) -> struct(k, v1, v2))"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
index 8fd398ff87f0b..893e9b511986b 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/mapconcat.sql.out
@@ -78,9 +78,25 @@ SELECT
     map_concat(int_string_map1, tinyint_map2) istt_map
 FROM various_maps
 -- !query schema
-struct<ts_map:map<smallint,smallint>,si_map:map<int,int>,ib_map:map<bigint,bigint>,bd_map:map<decimal(20,0),decimal(20,0)>,df_map:map<double,double>,std_map:map<string,string>,tst_map:map<string,string>,sti_map:map<string,string>,istt_map:map<int,string>>
+struct<>
 -- !query output
-{1:2,3:4}	{1:2,7:8}	{4:6,8:9}	{6:7,9223372036854775808:9223372036854775809}	{3.0:4.0,9.223372036854776E18:9.223372036854776E18}	{"2016-03-12":"2016-03-11","a":"b"}	{"2016-11-15 20:54:00":"2016-11-12 20:54:00","c":"d"}	{"7":"8","a":"b"}	{1:"a",3:"4"}
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "dataType" : "(\"MAP<STRING, STRING>\" or \"MAP<DATE, DATE>\")",
+    "functionName" : "`map_concat`",
+    "sqlExpr" : "\"map_concat(string_map1, date_map2)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 257,
+    "stopIndex" : 290,
+    "fragment" : "map_concat(string_map1, date_map2)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
index 5005d682e1927..a97abd5dd181b 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/promoteStrings.sql.out
@@ -10,33 +10,33 @@ struct<>
 -- !query
 SELECT '1' + cast(1 as tinyint)                         FROM t
 -- !query schema
-struct<(1 + CAST(1 AS TINYINT)):double>
+struct<(1 + CAST(1 AS TINYINT)):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT '1' + cast(1 as smallint)                        FROM t
 -- !query schema
-struct<(1 + CAST(1 AS SMALLINT)):double>
+struct<(1 + CAST(1 AS SMALLINT)):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT '1' + cast(1 as int)                             FROM t
 -- !query schema
-struct<(1 + CAST(1 AS INT)):double>
+struct<(1 + CAST(1 AS INT)):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT '1' + cast(1 as bigint)                          FROM t
 -- !query schema
-struct<(1 + CAST(1 AS BIGINT)):double>
+struct<(1 + CAST(1 AS BIGINT)):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
@@ -66,9 +66,25 @@ struct<(1 + CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT '1' + '1'                                        FROM t
 -- !query schema
-struct<(1 + 1):double>
+struct<>
 -- !query output
-2.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 + 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' + '1'"
+  } ]
+}
 
 
 -- !query
@@ -78,11 +94,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -102,11 +118,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -126,11 +142,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 + CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -153,11 +169,11 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputSql" : "\"CAST(2017-12-11 09:30:00 AS DATE)\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+    "sqlExpr" : "\"date_add(1, CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -172,33 +188,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' - cast(1 as tinyint)                         FROM t
 -- !query schema
-struct<(1 - CAST(1 AS TINYINT)):double>
+struct<(1 - CAST(1 AS TINYINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' - cast(1 as smallint)                        FROM t
 -- !query schema
-struct<(1 - CAST(1 AS SMALLINT)):double>
+struct<(1 - CAST(1 AS SMALLINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' - cast(1 as int)                             FROM t
 -- !query schema
-struct<(1 - CAST(1 AS INT)):double>
+struct<(1 - CAST(1 AS INT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' - cast(1 as bigint)                          FROM t
 -- !query schema
-struct<(1 - CAST(1 AS BIGINT)):double>
+struct<(1 - CAST(1 AS BIGINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -228,9 +244,25 @@ struct<(1 - CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT '1' - '1'                                        FROM t
 -- !query schema
-struct<(1 - 1):double>
+struct<>
 -- !query output
-0.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "sqlExpr" : "\"(1 - 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' - '1'"
+  } ]
+}
 
 
 -- !query
@@ -240,11 +272,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 - CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -264,11 +296,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(1 - CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -286,16 +318,14 @@ SELECT '1' - cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(1 - CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -310,41 +340,57 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' - cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 - CAST(2017-12-11 09:30:00 AS DATE)):interval day>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' - cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' * cast(1 as tinyint)                         FROM t
 -- !query schema
-struct<(1 * CAST(1 AS TINYINT)):double>
+struct<(1 * CAST(1 AS TINYINT)):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT '1' * cast(1 as smallint)                        FROM t
 -- !query schema
-struct<(1 * CAST(1 AS SMALLINT)):double>
+struct<(1 * CAST(1 AS SMALLINT)):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT '1' * cast(1 as int)                             FROM t
 -- !query schema
-struct<(1 * CAST(1 AS INT)):double>
+struct<(1 * CAST(1 AS INT)):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT '1' * cast(1 as bigint)                          FROM t
 -- !query schema
-struct<(1 * CAST(1 AS BIGINT)):double>
+struct<(1 * CAST(1 AS BIGINT)):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
@@ -374,9 +420,25 @@ struct<(1 * CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT '1' * '1'                                        FROM t
 -- !query schema
-struct<(1 * 1):double>
+struct<>
 -- !query output
-1.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"(1 * 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' * '1'"
+  } ]
+}
 
 
 -- !query
@@ -386,11 +448,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -410,11 +472,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -434,11 +496,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -458,11 +520,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 * CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -534,9 +596,25 @@ struct<(1 / CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT '1' / '1'                                        FROM t
 -- !query schema
-struct<(1 / 1):double>
+struct<>
 -- !query output
-1.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
+    "sqlExpr" : "\"(1 / 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' / '1'"
+  } ]
+}
 
 
 -- !query
@@ -546,11 +624,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -570,11 +648,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -594,11 +672,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -618,11 +696,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(1 / CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -638,33 +716,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT '1' % cast(1 as tinyint)                         FROM t
 -- !query schema
-struct<(1 % CAST(1 AS TINYINT)):double>
+struct<(1 % CAST(1 AS TINYINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' % cast(1 as smallint)                        FROM t
 -- !query schema
-struct<(1 % CAST(1 AS SMALLINT)):double>
+struct<(1 % CAST(1 AS SMALLINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' % cast(1 as int)                             FROM t
 -- !query schema
-struct<(1 % CAST(1 AS INT)):double>
+struct<(1 % CAST(1 AS INT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT '1' % cast(1 as bigint)                          FROM t
 -- !query schema
-struct<(1 % CAST(1 AS BIGINT)):double>
+struct<(1 % CAST(1 AS BIGINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -694,9 +772,25 @@ struct<(1 % CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT '1' % '1'                                        FROM t
 -- !query schema
-struct<(1 % 1):double>
+struct<>
 -- !query output
-0.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"(1 % 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 16,
+    "fragment" : "'1' % '1'"
+  } ]
+}
 
 
 -- !query
@@ -706,11 +800,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -730,11 +824,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -754,11 +848,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -778,11 +872,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(1 % CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -798,33 +892,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT pmod('1', cast(1 as tinyint))                         FROM t
 -- !query schema
-struct<pmod(1, CAST(1 AS TINYINT)):double>
+struct<pmod(1, CAST(1 AS TINYINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod('1', cast(1 as smallint))                        FROM t
 -- !query schema
-struct<pmod(1, CAST(1 AS SMALLINT)):double>
+struct<pmod(1, CAST(1 AS SMALLINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod('1', cast(1 as int))                             FROM t
 -- !query schema
-struct<pmod(1, CAST(1 AS INT)):double>
+struct<pmod(1, CAST(1 AS INT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod('1', cast(1 as bigint))                          FROM t
 -- !query schema
-struct<pmod(1, CAST(1 AS BIGINT)):double>
+struct<pmod(1, CAST(1 AS BIGINT)):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -854,9 +948,25 @@ struct<pmod(1, CAST(1 AS DECIMAL(10,0))):double>
 -- !query
 SELECT pmod('1', '1')                                        FROM t
 -- !query schema
-struct<pmod(1, 1):double>
+struct<>
 -- !query output
-0.0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "actualDataType" : "\"STRING\"",
+    "inputType" : "\"NUMERIC\"",
+    "sqlExpr" : "\"pmod(1, 1)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "pmod('1', '1')"
+  } ]
+}
 
 
 -- !query
@@ -866,11 +976,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BINARY\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(1 AS BINARY))\""
   },
   "queryContext" : [ {
@@ -890,11 +1000,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"BOOLEAN\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(1 AS BOOLEAN))\""
   },
   "queryContext" : [ {
@@ -914,11 +1024,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"TIMESTAMP\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))\""
   },
   "queryContext" : [ {
@@ -938,11 +1048,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DOUBLE\"",
-    "right" : "\"DATE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(1, CAST(2017-12-11 09:30:00 AS DATE))\""
   },
   "queryContext" : [ {
@@ -958,33 +1068,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         + '1' FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) + 1):double>
+struct<(CAST(1 AS TINYINT) + 1):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT cast(1 as smallint)                        + '1' FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) + 1):double>
+struct<(CAST(1 AS SMALLINT) + 1):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT cast(1 as int)                             + '1' FROM t
 -- !query schema
-struct<(CAST(1 AS INT) + 1):double>
+struct<(CAST(1 AS INT) + 1):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
 SELECT cast(1 as bigint)                          + '1' FROM t
 -- !query schema
-struct<(CAST(1 AS BIGINT) + 1):double>
+struct<(CAST(1 AS BIGINT) + 1):bigint>
 -- !query output
-2.0
+2
 
 
 -- !query
@@ -1018,11 +1128,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) + 1)\""
   },
   "queryContext" : [ {
@@ -1042,11 +1152,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) + 1)\""
   },
   "queryContext" : [ {
@@ -1066,11 +1176,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) + 1)\""
   },
   "queryContext" : [ {
@@ -1094,7 +1204,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
   "sqlState" : "42K09",
   "messageParameters" : {
     "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
+    "inputType" : "\"DATE\"",
     "paramIndex" : "second",
     "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
     "sqlExpr" : "\"date_add(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
@@ -1112,33 +1222,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         - '1' FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) - 1):double>
+struct<(CAST(1 AS TINYINT) - 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as smallint)                        - '1' FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) - 1):double>
+struct<(CAST(1 AS SMALLINT) - 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as int)                             - '1' FROM t
 -- !query schema
-struct<(CAST(1 AS INT) - 1):double>
+struct<(CAST(1 AS INT) - 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as bigint)                          - '1' FROM t
 -- !query schema
-struct<(CAST(1 AS BIGINT) - 1):double>
+struct<(CAST(1 AS BIGINT) - 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -1172,11 +1282,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) - 1)\""
   },
   "queryContext" : [ {
@@ -1196,11 +1306,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) - 1)\""
   },
   "queryContext" : [ {
@@ -1218,16 +1328,14 @@ SELECT cast('2017-12-11 09:30:00.0' as timestamp) - '1' FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "second",
-    "requiredType" : "\"(TIMESTAMP OR TIMESTAMP WITHOUT TIME ZONE)\"",
-    "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) - 1)\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1244,16 +1352,14 @@ SELECT cast('2017-12-11 09:30:00' as date)        - '1' FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "inputSql" : "\"1\"",
-    "inputType" : "\"DOUBLE\"",
-    "paramIndex" : "second",
-    "requiredType" : "(\"INT\" or \"SMALLINT\" or \"TINYINT\")",
-    "sqlExpr" : "\"date_sub(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -1268,33 +1374,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         * '1' FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) * 1):double>
+struct<(CAST(1 AS TINYINT) * 1):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT cast(1 as smallint)                        * '1' FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) * 1):double>
+struct<(CAST(1 AS SMALLINT) * 1):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT cast(1 as int)                             * '1' FROM t
 -- !query schema
-struct<(CAST(1 AS INT) * 1):double>
+struct<(CAST(1 AS INT) * 1):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
 SELECT cast(1 as bigint)                          * '1' FROM t
 -- !query schema
-struct<(CAST(1 AS BIGINT) * 1):double>
+struct<(CAST(1 AS BIGINT) * 1):bigint>
 -- !query output
-1.0
+1
 
 
 -- !query
@@ -1328,11 +1434,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BINARY) * 1)\""
   },
   "queryContext" : [ {
@@ -1352,11 +1458,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) * 1)\""
   },
   "queryContext" : [ {
@@ -1376,11 +1482,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) * 1)\""
   },
   "queryContext" : [ {
@@ -1400,11 +1506,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) * 1)\""
   },
   "queryContext" : [ {
@@ -1480,11 +1586,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BINARY) / 1)\""
   },
   "queryContext" : [ {
@@ -1504,11 +1610,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) / 1)\""
   },
   "queryContext" : [ {
@@ -1528,11 +1634,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) / 1)\""
   },
   "queryContext" : [ {
@@ -1552,11 +1658,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "(\"DOUBLE\" or \"DECIMAL\")",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) / 1)\""
   },
   "queryContext" : [ {
@@ -1572,33 +1678,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as tinyint)                         % '1' FROM t
 -- !query schema
-struct<(CAST(1 AS TINYINT) % 1):double>
+struct<(CAST(1 AS TINYINT) % 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as smallint)                        % '1' FROM t
 -- !query schema
-struct<(CAST(1 AS SMALLINT) % 1):double>
+struct<(CAST(1 AS SMALLINT) % 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as int)                             % '1' FROM t
 -- !query schema
-struct<(CAST(1 AS INT) % 1):double>
+struct<(CAST(1 AS INT) % 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT cast(1 as bigint)                          % '1' FROM t
 -- !query schema
-struct<(CAST(1 AS BIGINT) % 1):double>
+struct<(CAST(1 AS BIGINT) % 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -1632,11 +1738,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BINARY) % 1)\""
   },
   "queryContext" : [ {
@@ -1656,11 +1762,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(1 AS BOOLEAN) % 1)\""
   },
   "queryContext" : [ {
@@ -1680,11 +1786,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) % 1)\""
   },
   "queryContext" : [ {
@@ -1704,11 +1810,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"(CAST(2017-12-11 09:30:00 AS DATE) % 1)\""
   },
   "queryContext" : [ {
@@ -1724,33 +1830,33 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT pmod(cast(1 as tinyint), '1')                         FROM t
 -- !query schema
-struct<pmod(CAST(1 AS TINYINT), 1):double>
+struct<pmod(CAST(1 AS TINYINT), 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod(cast(1 as smallint), '1')                        FROM t
 -- !query schema
-struct<pmod(CAST(1 AS SMALLINT), 1):double>
+struct<pmod(CAST(1 AS SMALLINT), 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod(cast(1 as int), '1')                             FROM t
 -- !query schema
-struct<pmod(CAST(1 AS INT), 1):double>
+struct<pmod(CAST(1 AS INT), 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
 SELECT pmod(cast(1 as bigint), '1')                          FROM t
 -- !query schema
-struct<pmod(CAST(1 AS BIGINT), 1):double>
+struct<pmod(CAST(1 AS BIGINT), 1):bigint>
 -- !query output
-0.0
+0
 
 
 -- !query
@@ -1784,11 +1890,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BINARY\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BINARY\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(1 AS BINARY), 1)\""
   },
   "queryContext" : [ {
@@ -1808,11 +1914,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"BOOLEAN\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"BOOLEAN\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(1 AS BOOLEAN), 1)\""
   },
   "queryContext" : [ {
@@ -1832,11 +1938,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"TIMESTAMP\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"TIMESTAMP\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP), 1)\""
   },
   "queryContext" : [ {
@@ -1856,11 +1962,11 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_WRONG_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "left" : "\"DATE\"",
-    "right" : "\"DOUBLE\"",
+    "actualDataType" : "\"DATE\"",
+    "inputType" : "\"NUMERIC\"",
     "sqlExpr" : "\"pmod(CAST(2017-12-11 09:30:00 AS DATE), 1)\""
   },
   "queryContext" : [ {
@@ -1956,17 +2062,49 @@ true
 -- !query
 SELECT '1' = cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' = cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' = cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 = CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' = cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2044,17 +2182,49 @@ true
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) = '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) = '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        = '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) = 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        = '1'"
+  } ]
+}
 
 
 -- !query
@@ -2140,17 +2310,49 @@ true
 -- !query
 SELECT '1' <=> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 <=> CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "'1' <=> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' <=> cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 <=> CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 50,
+    "fragment" : "'1' <=> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2228,17 +2430,49 @@ true
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <=> '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <=> 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <=> '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <=> '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) <=> 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 57,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        <=> '1'"
+  } ]
+}
 
 
 -- !query
@@ -2324,17 +2558,49 @@ false
 -- !query
 SELECT '1' < cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 < CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' < cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' < cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 < CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' < cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2420,17 +2686,49 @@ true
 -- !query
 SELECT '1' <= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 <= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "'1' <= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' <= cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 <= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "'1' <= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2516,17 +2814,49 @@ false
 -- !query
 SELECT '1' > cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 > CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "'1' > cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' > cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 > CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 48,
+    "fragment" : "'1' > cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2612,17 +2942,49 @@ true
 -- !query
 SELECT '1' >= cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(1 >= CAST(2017-12-11 09:30:00.0 AS TIMESTAMP)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "'1' >= cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' >= cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(1 >= CAST(2017-12-11 09:30:00 AS DATE)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "'1' >= cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2708,17 +3070,49 @@ false
 -- !query
 SELECT '1' <> cast('2017-12-11 09:30:00.0' as timestamp) FROM t
 -- !query schema
-struct<(NOT (1 = CAST(2017-12-11 09:30:00.0 AS TIMESTAMP))):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "'1' <> cast('2017-12-11 09:30:00.0' as timestamp)"
+  } ]
+}
 
 
 -- !query
 SELECT '1' <> cast('2017-12-11 09:30:00' as date)        FROM t
 -- !query schema
-struct<(NOT (1 = CAST(2017-12-11 09:30:00 AS DATE))):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 49,
+    "fragment" : "'1' <> cast('2017-12-11 09:30:00' as date)"
+  } ]
+}
 
 
 -- !query
@@ -2804,17 +3198,49 @@ false
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) < '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) < 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) < '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        < '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) < 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        < '1'"
+  } ]
+}
 
 
 -- !query
@@ -2900,17 +3326,49 @@ true
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <= '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) <= 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <= '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <= '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) <= 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        <= '1'"
+  } ]
+}
 
 
 -- !query
@@ -2996,17 +3454,49 @@ false
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) > '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) > 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) > '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        > '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) > 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 55,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        > '1'"
+  } ]
+}
 
 
 -- !query
@@ -3092,17 +3582,49 @@ true
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) >= '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) >= 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) >= '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        >= '1' FROM t
 -- !query schema
-struct<(CAST(2017-12-11 09:30:00 AS DATE) >= 1):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        >= '1'"
+  } ]
+}
 
 
 -- !query
@@ -3188,17 +3710,49 @@ false
 -- !query
 SELECT cast('2017-12-11 09:30:00.0' as timestamp) <> '1' FROM t
 -- !query schema
-struct<(NOT (CAST(2017-12-11 09:30:00.0 AS TIMESTAMP) = 1)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00.0' as timestamp) <> '1'"
+  } ]
+}
 
 
 -- !query
 SELECT cast('2017-12-11 09:30:00' as date)        <> '1' FROM t
 -- !query schema
-struct<(NOT (CAST(2017-12-11 09:30:00 AS DATE) = 1)):boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 56,
+    "fragment" : "cast('2017-12-11 09:30:00' as date)        <> '1'"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
index 08b70ff920eb6..781fff4835c51 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/stringCastAndExpressions.sql.out
@@ -10,81 +10,241 @@ struct<>
 -- !query
 select cast(a as byte) from t
 -- !query schema
-struct<a:tinyint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TINYINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "cast(a as byte)"
+  } ]
+}
 
 
 -- !query
 select cast(a as short) from t
 -- !query schema
-struct<a:smallint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"SMALLINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "cast(a as short)"
+  } ]
+}
 
 
 -- !query
 select cast(a as int) from t
 -- !query schema
-struct<a:int>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "cast(a as int)"
+  } ]
+}
 
 
 -- !query
 select cast(a as long) from t
 -- !query schema
-struct<a:bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "cast(a as long)"
+  } ]
+}
 
 
 -- !query
 select cast(a as float) from t
 -- !query schema
-struct<a:float>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"FLOAT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "cast(a as float)"
+  } ]
+}
 
 
 -- !query
 select cast(a as double) from t
 -- !query schema
-struct<a:double>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DOUBLE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "cast(a as double)"
+  } ]
+}
 
 
 -- !query
 select cast(a as decimal) from t
 -- !query schema
-struct<a:decimal(10,0)>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DECIMAL(10,0)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "cast(a as decimal)"
+  } ]
+}
 
 
 -- !query
 select cast(a as boolean) from t
 -- !query schema
-struct<a:boolean>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkRuntimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 25,
+    "fragment" : "cast(a as boolean)"
+  } ]
+}
 
 
 -- !query
 select cast(a as timestamp) from t
 -- !query schema
-struct<a:timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 27,
+    "fragment" : "cast(a as timestamp)"
+  } ]
+}
 
 
 -- !query
 select cast(a as date) from t
 -- !query schema
-struct<a:date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 22,
+    "fragment" : "cast(a as date)"
+  } ]
+}
 
 
 -- !query
@@ -170,9 +330,23 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 select to_timestamp(a) from t
 -- !query schema
-struct<to_timestamp(a):timestamp>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "fragment" : ""
+  } ]
+}
 
 
 -- !query
@@ -195,9 +369,17 @@ org.apache.spark.SparkUpgradeException
 -- !query
 select to_unix_timestamp(a) from t
 -- !query schema
-struct<to_unix_timestamp(a, yyyy-MM-dd HH:mm:ss):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'aa' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -220,9 +402,17 @@ org.apache.spark.SparkUpgradeException
 -- !query
 select unix_timestamp(a) from t
 -- !query schema
-struct<unix_timestamp(a, yyyy-MM-dd HH:mm:ss):bigint>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CANNOT_PARSE_TIMESTAMP",
+  "sqlState" : "22007",
+  "messageParameters" : {
+    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
+    "message" : "Text 'aa' could not be parsed at index 0"
+  }
+}
 
 
 -- !query
@@ -245,41 +435,112 @@ org.apache.spark.SparkUpgradeException
 -- !query
 select from_unixtime(a) from t
 -- !query schema
-struct<from_unixtime(a, yyyy-MM-dd HH:mm:ss):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 23,
+    "fragment" : "from_unixtime(a)"
+  } ]
+}
 
 
 -- !query
 select from_unixtime('2018-01-01', a) from t
 -- !query schema
-struct<from_unixtime(2018-01-01, a):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2018-01-01'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 37,
+    "fragment" : "from_unixtime('2018-01-01', a)"
+  } ]
+}
 
 
 -- !query
 select next_day(a, 'MO') from t
 -- !query schema
-struct<next_day(a, MO):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 24,
+    "fragment" : "next_day(a, 'MO')"
+  } ]
+}
 
 
 -- !query
 select next_day('2018-01-01', a) from t
 -- !query schema
-struct<next_day(2018-01-01, a):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkIllegalArgumentException
+{
+  "errorClass" : "ILLEGAL_DAY_OF_WEEK",
+  "sqlState" : "22009",
+  "messageParameters" : {
+    "string" : "aa"
+  }
+}
 
 
 -- !query
 select trunc(a, 'MM') from t
 -- !query schema
-struct<trunc(a, MM):date>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 21,
+    "fragment" : "trunc(a, 'MM')"
+  } ]
+}
 
 
 -- !query
@@ -301,9 +562,25 @@ NULL
 -- !query
 select sha2(a, a) from t
 -- !query schema
-struct<sha2(a, a):string>
+struct<>
 -- !query output
-NULL
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'aa'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"INT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 8,
+    "stopIndex" : 17,
+    "fragment" : "sha2(a, a)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
index 79184489758bf..678d00a39b2d5 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -46,7 +46,7 @@ struct<CAST(1 AS TINYINT):bigint>
 -- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query schema
-struct<CAST(1 AS TINYINT):float>
+struct<CAST(1 AS TINYINT):double>
 -- !query output
 1.0
 2.0
@@ -73,7 +73,7 @@ struct<CAST(1 AS TINYINT):decimal(10,0)>
 -- !query
 SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS TINYINT):string>
+struct<CAST(1 AS TINYINT):bigint>
 -- !query output
 1
 2
@@ -226,7 +226,7 @@ struct<CAST(1 AS SMALLINT):bigint>
 -- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query schema
-struct<CAST(1 AS SMALLINT):float>
+struct<CAST(1 AS SMALLINT):double>
 -- !query output
 1.0
 2.0
@@ -253,7 +253,7 @@ struct<CAST(1 AS SMALLINT):decimal(10,0)>
 -- !query
 SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS SMALLINT):string>
+struct<CAST(1 AS SMALLINT):bigint>
 -- !query output
 1
 2
@@ -406,7 +406,7 @@ struct<CAST(1 AS INT):bigint>
 -- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query schema
-struct<CAST(1 AS INT):float>
+struct<CAST(1 AS INT):double>
 -- !query output
 1.0
 2.0
@@ -433,7 +433,7 @@ struct<CAST(1 AS INT):decimal(10,0)>
 -- !query
 SELECT cast(1 as int) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS INT):string>
+struct<CAST(1 AS INT):bigint>
 -- !query output
 1
 2
@@ -586,7 +586,7 @@ struct<CAST(1 AS BIGINT):bigint>
 -- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query schema
-struct<CAST(1 AS BIGINT):float>
+struct<CAST(1 AS BIGINT):double>
 -- !query output
 1.0
 2.0
@@ -613,7 +613,7 @@ struct<CAST(1 AS BIGINT):decimal(20,0)>
 -- !query
 SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS BIGINT):string>
+struct<CAST(1 AS BIGINT):bigint>
 -- !query output
 1
 2
@@ -730,7 +730,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as tinyint) FROM t
 -- !query schema
-struct<CAST(1 AS FLOAT):float>
+struct<CAST(1 AS FLOAT):double>
 -- !query output
 1.0
 2.0
@@ -739,7 +739,7 @@ struct<CAST(1 AS FLOAT):float>
 -- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as smallint) FROM t
 -- !query schema
-struct<CAST(1 AS FLOAT):float>
+struct<CAST(1 AS FLOAT):double>
 -- !query output
 1.0
 2.0
@@ -748,7 +748,7 @@ struct<CAST(1 AS FLOAT):float>
 -- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as int) FROM t
 -- !query schema
-struct<CAST(1 AS FLOAT):float>
+struct<CAST(1 AS FLOAT):double>
 -- !query output
 1.0
 2.0
@@ -757,7 +757,7 @@ struct<CAST(1 AS FLOAT):float>
 -- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as bigint) FROM t
 -- !query schema
-struct<CAST(1 AS FLOAT):float>
+struct<CAST(1 AS FLOAT):double>
 -- !query output
 1.0
 2.0
@@ -793,10 +793,10 @@ struct<CAST(1 AS FLOAT):double>
 -- !query
 SELECT cast(1 as float) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS FLOAT):string>
+struct<CAST(1 AS FLOAT):double>
 -- !query output
 1.0
-2
+2.0
 
 
 -- !query
@@ -973,10 +973,10 @@ struct<CAST(1 AS DOUBLE):double>
 -- !query
 SELECT cast(1 as double) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS DOUBLE):string>
+struct<CAST(1 AS DOUBLE):double>
 -- !query output
 1.0
-2
+2.0
 
 
 -- !query
@@ -1153,10 +1153,10 @@ struct<CAST(1 AS DECIMAL(10,0)):decimal(10,0)>
 -- !query
 SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(1 AS DECIMAL(10,0)):string>
+struct<CAST(1 AS DECIMAL(10,0)):double>
 -- !query output
-1
-2
+1.0
+2.0
 
 
 -- !query
@@ -1270,7 +1270,7 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as tinyint) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):bigint>
 -- !query output
 1
 2
@@ -1279,7 +1279,7 @@ struct<CAST(1 AS STRING):string>
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as smallint) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):bigint>
 -- !query output
 1
 2
@@ -1288,7 +1288,7 @@ struct<CAST(1 AS STRING):string>
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as int) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):bigint>
 -- !query output
 1
 2
@@ -1297,7 +1297,7 @@ struct<CAST(1 AS STRING):string>
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as bigint) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):bigint>
 -- !query output
 1
 2
@@ -1306,28 +1306,28 @@ struct<CAST(1 AS STRING):string>
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as float) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):double>
 -- !query output
-1
+1.0
 2.0
 
 
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as double) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):double>
 -- !query output
-1
+1.0
 2.0
 
 
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast(2 as decimal(10, 0)) FROM t
 -- !query schema
-struct<CAST(1 AS STRING):string>
+struct<CAST(1 AS STRING):double>
 -- !query output
-1
-2
+1.0
+2.0
 
 
 -- !query
@@ -1342,75 +1342,68 @@ struct<CAST(1 AS STRING):string>
 -- !query
 SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t
 -- !query schema
+struct<CAST(1 AS STRING):binary>
+-- !query output
+1
+2
+
+
+-- !query
+SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
+-- !query schema
+struct<CAST(1 AS STRING):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
+-- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"BINARY\"",
-    "dataType2" : "\"STRING\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 1,
-    "stopIndex" : 71,
-    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast('2' as binary) FROM t"
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t"
   } ]
 }
 
 
 -- !query
-SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t
+SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkDateTimeException
 {
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"BOOLEAN\"",
-    "dataType2" : "\"STRING\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
+    "expression" : "'1'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
   },
   "queryContext" : [ {
     "objectType" : "",
     "objectName" : "",
     "startIndex" : 1,
-    "stopIndex" : 70,
-    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast(2 as boolean) FROM t"
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t"
   } ]
 }
 
 
--- !query
-SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00.0' as timestamp) FROM t
--- !query schema
-struct<CAST(1 AS STRING):string>
--- !query output
-1
-2017-12-11 09:30:00
-
-
--- !query
-SELECT cast(1 as string) FROM t UNION SELECT cast('2017-12-11 09:30:00' as date) FROM t
--- !query schema
-struct<CAST(1 AS STRING):string>
--- !query output
-1
-2017-12-11
-
-
 -- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as tinyint) FROM t
 -- !query schema
@@ -1603,28 +1596,10 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<>
+struct<CAST(1 AS BINARY):binary>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
-  "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"STRING\"",
-    "dataType2" : "\"BINARY\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 1,
-    "stopIndex" : 71,
-    "fragment" : "SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as string) FROM t"
-  } ]
-}
+1
+2
 
 
 -- !query
@@ -1911,17 +1886,14 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
+org.apache.spark.SparkRuntimeException
 {
-  "errorClass" : "INCOMPATIBLE_COLUMN_TYPE",
-  "sqlState" : "42825",
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
   "messageParameters" : {
-    "columnOrdinalNumber" : "first",
-    "dataType1" : "\"STRING\"",
-    "dataType2" : "\"BOOLEAN\"",
-    "hint" : "",
-    "operator" : "UNION",
-    "tableOrdinalNumber" : "second"
+    "expression" : "'2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BOOLEAN\""
   },
   "queryContext" : [ {
     "objectType" : "",
@@ -2214,10 +2186,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(2017-12-12 09:30:00.0 AS TIMESTAMP):string>
+struct<>
 -- !query output
-2
-2017-12-12 09:30:00
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"TIMESTAMP\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 94,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t UNION SELECT cast(2 as string) FROM t"
+  } ]
+}
 
 
 -- !query
@@ -2484,10 +2471,25 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string) FROM t
 -- !query schema
-struct<CAST(2017-12-12 09:30:00 AS DATE):string>
+struct<>
 -- !query output
-2
-2017-12-12
+org.apache.spark.SparkDateTimeException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'2'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"DATE\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 87,
+    "fragment" : "SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION SELECT cast(2 as string) FROM t"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
index f6d1120c75ff2..67645cfb732f5 100644
--- a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out
@@ -191,13 +191,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
   "sqlState" : "42K09",
   "messageParameters" : {
-    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
-    "exprType" : "\"BINARY\"",
-    "location" : "upper",
-    "sqlExpr" : "\"RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING\""
+    "orderSpecType" : "\"BINARY\"",
+    "sqlExpr" : "\"(PARTITION BY 1 ORDER BY CAST(1 AS BINARY) DESC NULLS LAST RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING)\"",
+    "valueBoundaryType" : "\"INT\""
   },
   "queryContext" : [ {
     "objectType" : "",
diff --git a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
index 1caeac58ab0ba..ad12f8bd03fd0 100644
--- a/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udaf/udaf-group-by.sql.out
@@ -397,9 +397,25 @@ org.apache.spark.sql.AnalysisException
 -- !query
 SELECT k, udaf(v) FROM test_agg GROUP BY k HAVING udaf(v) = false
 -- !query schema
-struct<k:int,udaf(v):int>
+struct<>
 -- !query output
-4	0
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "left" : "\"INT\"",
+    "right" : "\"BOOLEAN\"",
+    "sqlExpr" : "\"(udaf(v) = false)\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 51,
+    "stopIndex" : 65,
+    "fragment" : "udaf(v) = false"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
index ce70c91d8d06e..6a70c8b96841d 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-group-by.sql.out
@@ -520,27 +520,9 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
 -- !query
 SELECT udf(every("true"))
 -- !query schema
-struct<>
+struct<udf(every(true)):boolean>
 -- !query output
-org.apache.spark.sql.catalyst.ExtendedAnalysisException
-{
-  "errorClass" : "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-  "sqlState" : "42K09",
-  "messageParameters" : {
-    "inputSql" : "\"true\"",
-    "inputType" : "\"STRING\"",
-    "paramIndex" : "first",
-    "requiredType" : "\"BOOLEAN\"",
-    "sqlExpr" : "\"every(true)\""
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 12,
-    "stopIndex" : 24,
-    "fragment" : "every(\"true\")"
-  } ]
-}
+true
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
index d09f56a836788..3e84ec09c2150 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-inline-table.sql.out
@@ -115,7 +115,7 @@ select udf(a), udf(b) from values ("one", 2.0), ("two") as data(a, b)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
@@ -161,7 +161,7 @@ select udf(a), udf(b) from values ("one"), ("two") as data(a, b)
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.sql.AnalysisException
+org.apache.spark.sql.catalyst.parser.ParseException
 {
   "errorClass" : "INVALID_INLINE_TABLE.NUM_COLUMNS_MISMATCH",
   "sqlState" : "42000",
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
index a355bdb16580a..7f2931c3ade0a 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-union.sql.out
@@ -31,20 +31,31 @@ struct<c1:int,c2:string>
 
 -- !query
 SELECT udf(c1) as c1, udf(c2) as c2
-FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1
+FROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2
         UNION ALL
         SELECT udf(c1) as c1, udf(c2) as c2 FROM t2)
 -- !query schema
-struct<c1:decimal(11,1),c2:string>
+struct<>
 -- !query output
-1.0	1
-1.0	1
-1.0	a
-2.0	4
-2.0	4
-2.0	b
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 243,
+    "fragment" : "SELECT udf(c1) as c1, udf(c2) as c2\nFROM   (SELECT udf(c1) as c1, udf(c2) as c2 FROM t1 WHERE c2 = 'a'\n        UNION ALL\n        SELECT udf(c1) as c1, udf(c2) as c2 FROM t2\n        UNION ALL\n        SELECT udf(c1) as c1, udf(c2) as c2 FROM t2)"
+  } ]
+}
 
 
 -- !query
@@ -106,10 +117,25 @@ SELECT map(1, 2), udf('str') as str
 UNION ALL
 SELECT map(1, 2, 3, NULL), udf(1)
 -- !query schema
-struct<map(1, 2):map<int,int>,str:string>
+struct<>
 -- !query output
-{1:2,3:null}	1
-{1:2}	str
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'str'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 79,
+    "fragment" : "SELECT map(1, 2), udf('str') as str\nUNION ALL\nSELECT map(1, 2, 3, NULL), udf(1)"
+  } ]
+}
 
 
 -- !query
@@ -117,10 +143,25 @@ SELECT array(1, 2), udf('str') as str
 UNION ALL
 SELECT array(1, 2, 3, NULL), udf(1)
 -- !query schema
-struct<array(1, 2):array<int>,str:string>
+struct<>
 -- !query output
-[1,2,3,null]	1
-[1,2]	str
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'str'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 83,
+    "fragment" : "SELECT array(1, 2), udf('str') as str\nUNION ALL\nSELECT array(1, 2, 3, NULL), udf(1)"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
index 451f514a21708..40e24e7b4e873 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-window.sql.out
@@ -379,17 +379,23 @@ FROM testData
 WINDOW w AS (PARTITION BY udf(cate) ORDER BY udf(val))
 ORDER BY cate, udf(val)
 -- !query schema
-struct<udf(val):int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
+struct<>
 -- !query output
-NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
-3	b	3	1	1	3	6	2.0	1.0	1	1	1	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1126,
+    "stopIndex" : 1161,
+    "fragment" : "corr(udf(val), udf(val_long)) OVER w"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out b/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
index 9368aea5eb1f0..b205161511463 100644
--- a/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udtf/udtf.sql.out
@@ -1095,6 +1095,29 @@ NULL	1.0
 NULL	1.0
 
 
+-- !query
+SELECT * FROM
+    InvalidEvalReturnsNoneToNonNullableColumnScalarType(TABLE(SELECT 1 AS X), unresolved_column)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+  "sqlState" : "42703",
+  "messageParameters" : {
+    "objectName" : "`unresolved_column`"
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 93,
+    "stopIndex" : 109,
+    "fragment" : "unresolved_column"
+  } ]
+}
+
+
 -- !query
 DROP VIEW t1
 -- !query schema
diff --git a/sql/core/src/test/resources/sql-tests/results/union.sql.out b/sql/core/src/test/resources/sql-tests/results/union.sql.out
index 674e58cd102f0..d7db2163f8b87 100644
--- a/sql/core/src/test/resources/sql-tests/results/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/union.sql.out
@@ -31,20 +31,31 @@ struct<c1:int,c2:string>
 
 -- !query
 SELECT *
-FROM   (SELECT * FROM t1
+FROM   (SELECT * FROM t1 where c1 = 1
         UNION ALL
         SELECT * FROM t2
         UNION ALL
         SELECT * FROM t2)
 -- !query schema
-struct<c1:decimal(11,1),c2:string>
+struct<>
 -- !query output
-1.0	1
-1.0	1
-1.0	a
-2.0	4
-2.0	4
-2.0	b
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'a'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 133,
+    "fragment" : "SELECT *\nFROM   (SELECT * FROM t1 where c1 = 1\n        UNION ALL\n        SELECT * FROM t2\n        UNION ALL\n        SELECT * FROM t2)"
+  } ]
+}
 
 
 -- !query
@@ -106,10 +117,25 @@ SELECT map(1, 2), 'str'
 UNION ALL
 SELECT map(1, 2, 3, NULL), 1
 -- !query schema
-struct<map(1, 2):map<int,int>,str:string>
+struct<>
 -- !query output
-{1:2,3:null}	1
-{1:2}	str
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'str'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 62,
+    "fragment" : "SELECT map(1, 2), 'str'\nUNION ALL\nSELECT map(1, 2, 3, NULL), 1"
+  } ]
+}
 
 
 -- !query
@@ -117,10 +143,25 @@ SELECT array(1, 2), 'str'
 UNION ALL
 SELECT array(1, 2, 3, NULL), 1
 -- !query schema
-struct<array(1, 2):array<int>,str:string>
+struct<>
 -- !query output
-[1,2,3,null]	1
-[1,2]	str
+org.apache.spark.SparkNumberFormatException
+{
+  "errorClass" : "CAST_INVALID_INPUT",
+  "sqlState" : "22018",
+  "messageParameters" : {
+    "expression" : "'str'",
+    "sourceType" : "\"STRING\"",
+    "targetType" : "\"BIGINT\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1,
+    "stopIndex" : 66,
+    "fragment" : "SELECT array(1, 2), 'str'\nUNION ALL\nSELECT array(1, 2, 3, NULL), 1"
+  } ]
+}
 
 
 -- !query
diff --git a/sql/core/src/test/resources/sql-tests/results/variant/named-function-arguments.sql.out b/sql/core/src/test/resources/sql-tests/results/variant/named-function-arguments.sql.out
new file mode 100644
index 0000000000000..10a439831fbdc
--- /dev/null
+++ b/sql/core/src/test/resources/sql-tests/results/variant/named-function-arguments.sql.out
@@ -0,0 +1,48 @@
+-- Automatically generated by SQLQueryTestSuite
+-- !query
+SELECT * FROM variant_explode(input => parse_json('["hello", "world"]'))
+-- !query schema
+struct<pos:int,key:string,value:variant>
+-- !query output
+0	NULL	"hello"
+1	NULL	"world"
+
+
+-- !query
+SELECT * FROM variant_explode_outer(input => parse_json('{"a": true, "b": 3.14}'))
+-- !query schema
+struct<pos:int,key:string,value:variant>
+-- !query output
+0	a	true
+1	b	3.14
+
+
+-- !query
+SELECT * FROM variant_explode(parse_json('["hello", "world"]')), variant_explode(parse_json('{"a": true, "b": 3.14}'))
+-- !query schema
+struct<pos:int,key:string,value:variant,pos:int,key:string,value:variant>
+-- !query output
+0	NULL	"hello"	0	a	true
+0	NULL	"hello"	1	b	3.14
+1	NULL	"world"	0	a	true
+1	NULL	"world"	1	b	3.14
+
+
+-- !query
+SELECT * FROM variant_explode(parse_json('{"a": ["hello", "world"], "b": {"x": true, "y": 3.14}}')) AS t, LATERAL variant_explode(t.value)
+-- !query schema
+struct<pos:int,key:string,value:variant,pos:int,key:string,value:variant>
+-- !query output
+0	a	["hello","world"]	0	NULL	"hello"
+0	a	["hello","world"]	1	NULL	"world"
+1	b	{"x":true,"y":3.14}	0	x	true
+1	b	{"x":true,"y":3.14}	1	y	3.14
+
+
+-- !query
+SELECT num, key, val, 'Spark' FROM variant_explode(parse_json('["hello", "world"]')) AS t(num, key, val)
+-- !query schema
+struct<num:int,key:string,val:variant,Spark:string>
+-- !query output
+0	NULL	"hello"	Spark
+1	NULL	"world"	Spark
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
index b0d497e070477..4288457d56b40 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-binding-config.sql.out
@@ -701,7 +701,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'a'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
index ffd1fbec47bbb..641365309d51c 100644
--- a/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/view-schema-compensation.sql.out
@@ -187,7 +187,6 @@ org.apache.spark.SparkNumberFormatException
   "errorClass" : "CAST_INVALID_INPUT",
   "sqlState" : "22018",
   "messageParameters" : {
-    "ansiConfig" : "\"spark.sql.ansi.enabled\"",
     "expression" : "'a'",
     "sourceType" : "\"STRING\"",
     "targetType" : "\"INT\""
diff --git a/sql/core/src/test/resources/sql-tests/results/window.sql.out b/sql/core/src/test/resources/sql-tests/results/window.sql.out
index d70f7c974c2a3..ce88fb57f8aa6 100644
--- a/sql/core/src/test/resources/sql-tests/results/window.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/window.sql.out
@@ -557,17 +557,23 @@ FROM testData
 WINDOW w AS (PARTITION BY cate ORDER BY val)
 ORDER BY cate, val
 -- !query schema
-struct<val:int,cate:string,max:int,min:int,min:int,count:bigint,sum:bigint,avg:double,stddev:double,first_value:int,first_value_ignore_null:int,first_value_contain_null:int,any_value:int,any_value_ignore_null:int,any_value_contain_null:int,last_value:int,last_value_ignore_null:int,last_value_contain_null:int,rank:int,dense_rank:int,cume_dist:double,percent_rank:double,ntile:int,row_number:int,var_pop:double,var_samp:double,approx_count_distinct:bigint,covar_pop:double,corr:double,stddev_samp:double,stddev_pop:double,collect_list:array<int>,collect_set:array<int>,skewness:double,kurtosis:double>
+struct<>
 -- !query output
-NULL	NULL	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.5	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-3	NULL	3	3	3	1	3	3.0	NULL	NULL	3	NULL	NULL	3	NULL	3	3	3	2	2	1.0	1.0	2	2	0.0	NULL	1	0.0	NULL	NULL	0.0	[3]	[3]	NULL	NULL
-NULL	a	NULL	NULL	NULL	0	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	1	1	0.25	0.0	1	1	NULL	NULL	0	NULL	NULL	NULL	NULL	[]	[]	NULL	NULL
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	1	2	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-1	a	1	1	1	2	2	1.0	0.0	NULL	1	NULL	NULL	1	NULL	1	1	1	2	2	0.75	0.3333333333333333	2	3	0.0	0.0	1	0.0	NULL	0.0	0.0	[1,1]	[1]	0.7071067811865476	-1.5
-2	a	2	1	1	3	4	1.3333333333333333	0.5773502691896258	NULL	1	NULL	NULL	1	NULL	2	2	2	4	3	1.0	1.0	2	4	0.22222222222222224	0.33333333333333337	2	4.772185885555555E8	1.0	0.5773502691896258	0.4714045207910317	[1,1,2]	[1,2]	1.1539890888012805	-0.6672217220327235
-1	b	1	1	1	1	1	1.0	NULL	1	1	1	1	1	1	1	1	1	1	1	0.3333333333333333	0.0	1	1	0.0	NULL	1	NULL	NULL	NULL	0.0	[1]	[1]	NULL	NULL
-2	b	2	1	1	2	3	1.5	0.7071067811865476	1	1	1	1	1	1	2	2	2	2	2	0.6666666666666666	0.5	1	2	0.25	0.5	2	0.0	NULL	0.7071067811865476	0.5	[1,2]	[1,2]	0.0	-2.0000000000000013
-3	b	3	1	1	3	6	2.0	1.0	1	1	1	1	1	1	3	3	3	3	3	1.0	1.0	2	3	0.6666666666666666	1.0	3	5.3687091175E8	1.0	1.0	0.816496580927726	[1,2,3]	[1,2,3]	0.7057890433107311	-1.4999999999999984
+org.apache.spark.SparkArithmeticException
+{
+  "errorClass" : "DIVIDE_BY_ZERO",
+  "sqlState" : "22012",
+  "messageParameters" : {
+    "config" : "\"spark.sql.ansi.enabled\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 1016,
+    "stopIndex" : 1041,
+    "fragment" : "corr(val, val_long) OVER w"
+  } ]
+}
 
 
 -- !query
@@ -1054,6 +1060,7 @@ SELECT
     lag(v, 1) IGNORE NULLS OVER w lag_1,
     lag(v, 2) IGNORE NULLS OVER w lag_2,
     lag(v, 3) IGNORE NULLS OVER w lag_3,
+    lag(v, +3) IGNORE NULLS OVER w lag_plus_3,
     nth_value(v, 1) IGNORE NULLS OVER w nth_value_1,
     nth_value(v, 2) IGNORE NULLS OVER w nth_value_2,
     nth_value(v, 3) IGNORE NULLS OVER w nth_value_3,
@@ -1065,17 +1072,17 @@ FROM
 WINDOW w AS (ORDER BY id)
 ORDER BY id
 -- !query schema
-struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
+struct<content:string,id:int,v:string,lead_0:string,lead_1:string,lead_2:string,lead_3:string,lag_0:string,lag_1:string,lag_2:string,lag_3:string,lag_plus_3:string,nth_value_1:string,nth_value_2:string,nth_value_3:string,first_value:string,any_value:string,last_value:string>
 -- !query output
-a	0	NULL	NULL	x	y	z	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
-a	1	x	x	y	z	v	x	NULL	NULL	NULL	x	NULL	NULL	x	x	x
-b	2	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x	x
-c	3	NULL	NULL	y	z	v	NULL	x	NULL	NULL	x	NULL	NULL	x	x	x
-a	4	y	y	z	v	NULL	y	x	NULL	NULL	x	y	NULL	x	x	y
-b	5	NULL	NULL	z	v	NULL	NULL	y	x	NULL	x	y	NULL	x	x	y
-a	6	z	z	v	NULL	NULL	z	y	x	NULL	x	y	z	x	x	z
-a	7	v	v	NULL	NULL	NULL	v	z	y	x	x	y	z	x	x	v
-a	8	NULL	NULL	NULL	NULL	NULL	NULL	v	z	y	x	y	z	x	x	v
+a	0	NULL	NULL	x	y	z	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL	NULL
+a	1	x	x	y	z	v	x	NULL	NULL	NULL	NULL	x	NULL	NULL	x	x	x
+b	2	NULL	NULL	y	z	v	NULL	x	NULL	NULL	NULL	x	NULL	NULL	x	x	x
+c	3	NULL	NULL	y	z	v	NULL	x	NULL	NULL	NULL	x	NULL	NULL	x	x	x
+a	4	y	y	z	v	NULL	y	x	NULL	NULL	NULL	x	y	NULL	x	x	y
+b	5	NULL	NULL	z	v	NULL	NULL	y	x	NULL	NULL	x	y	NULL	x	x	y
+a	6	z	z	v	NULL	NULL	z	y	x	NULL	NULL	x	y	z	x	x	z
+a	7	v	v	NULL	NULL	NULL	v	z	y	x	x	x	y	z	x	x	v
+a	8	NULL	NULL	NULL	NULL	NULL	NULL	v	z	y	y	x	y	z	x	x	v
 
 
 -- !query
@@ -1320,8 +1327,10 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
   "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
     "windowName" : "w"
   }
 }
@@ -1337,8 +1346,10 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.ExtendedAnalysisException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_1004",
+  "errorClass" : "MISSING_WINDOW_SPECIFICATION",
+  "sqlState" : "42P20",
   "messageParameters" : {
+    "docroot" : "https://spark.apache.org/docs/latest",
     "windowName" : "w"
   }
 }
@@ -1432,3 +1443,68 @@ a	1	2
 a	NULL	1
 b	1	1
 b	2	2
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '5' DAY PRECEDING) AS mean FROM testData
+-- !query schema
+struct<val:int,val_long:bigint,val_double:double,val_date:date,val_timestamp:timestamp,cate:string,mean:double>
+-- !query output
+1	1	1.0	2017-08-01	2017-07-31 17:00:00	a	1.0
+1	2	2.5	2017-08-02	2017-08-05 23:13:20	a	1.5
+1	NULL	1.0	2017-08-01	2017-07-31 17:00:00	b	1.0
+2	2147483650	100.001	2020-12-31	2020-12-30 16:00:00	a	100.001
+2	3	3.3	2017-08-03	2017-08-17 13:00:00	b	3.3
+3	1	1.0	2017-08-01	2017-07-31 17:00:00	NULL	1.0
+3	2147483650	100.001	2020-12-31	2020-12-30 16:00:00	b	100.001
+NULL	1	1.0	2017-08-01	2017-07-31 17:00:00	a	1.0
+NULL	NULL	NULL	NULL	NULL	NULL	NULL
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE INTERVAL '1 2:3:4.001' DAY TO SECOND PRECEDING) AS mean FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.RANGE_FRAME_INVALID_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "orderSpecType" : "\"DATE\"",
+    "sqlExpr" : "\"(PARTITION BY val ORDER BY val_date ASC NULLS FIRST RANGE BETWEEN INTERVAL '1 02:03:04.001' DAY TO SECOND PRECEDING AND CURRENT ROW)\"",
+    "valueBoundaryType" : "\"INTERVAL DAY TO SECOND\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 33,
+    "stopIndex" : 121,
+    "fragment" : "(partition BY val ORDER BY val_date RANGE INTERVAL '1 2:3:4.001' DAY TO SECOND PRECEDING)"
+  } ]
+}
+
+
+-- !query
+SELECT *, mean(val_double) over (partition BY val ORDER BY val_date RANGE DATE '2024-01-01' FOLLOWING) AS mean FROM testData
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.catalyst.ExtendedAnalysisException
+{
+  "errorClass" : "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+  "sqlState" : "42K09",
+  "messageParameters" : {
+    "expectedType" : "(\"NUMERIC\" or \"INTERVAL DAY TO SECOND\" or \"INTERVAL YEAR TO MONTH\" or \"INTERVAL\")",
+    "exprType" : "\"DATE\"",
+    "location" : "lower",
+    "sqlExpr" : "\"RANGE BETWEEN DATE '2024-01-01' FOLLOWING AND CURRENT ROW\""
+  },
+  "queryContext" : [ {
+    "objectType" : "",
+    "objectName" : "",
+    "startIndex" : 33,
+    "stopIndex" : 102,
+    "fragment" : "(partition BY val ORDER BY val_date RANGE DATE '2024-01-01' FOLLOWING)"
+  } ]
+}
diff --git a/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet
new file mode 100644
index 0000000000000..d315eb467a02b
Binary files /dev/null and b/sql/core/src/test/resources/test-data/group-field-with-enum-as-logical-annotation.parquet differ
diff --git a/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet
new file mode 100644
index 0000000000000..1504c6e4b4c84
Binary files /dev/null and b/sql/core/src/test/resources/test-data/interval-using-fixed-len-byte-array.parquet differ
diff --git a/sql/core/src/test/resources/test-data/more-columns.csv b/sql/core/src/test/resources/test-data/more-columns.csv
new file mode 100644
index 0000000000000..06db38f0a145a
--- /dev/null
+++ b/sql/core/src/test/resources/test-data/more-columns.csv
@@ -0,0 +1 @@
+1,3.14,string,5,7
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
index 273e8e08fd7a5..3b987529afcb5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ApproximatePercentileQuerySuite.scala
@@ -349,7 +349,7 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
             |FROM VALUES (0), (1), (2), (10) AS tab(col);
             |""".stripMargin).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "accuracy",
         "sqlExpr" -> "\"percentile_approx(col, array(0.5, 0.4, 0.1), NULL)\""),
@@ -363,7 +363,7 @@ class ApproximatePercentileQuerySuite extends QueryTest with SharedSparkSession
             |FROM VALUES (0), (1), (2), (10) AS tab(col);
             |""".stripMargin).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "percentage",
         "sqlExpr" -> "\"percentile_approx(col, NULL, 100)\""),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
index 0778599d54f49..97814e3bac44b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BitmapExpressionsQuerySuite.scala
@@ -214,7 +214,7 @@ class BitmapExpressionsQuerySuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("bitmap_count(a)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"bitmap_count(a)\"",
         "paramIndex" -> "first",
@@ -236,7 +236,7 @@ class BitmapExpressionsQuerySuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("bitmap_or_agg(a)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"bitmap_or_agg(a)\"",
         "paramIndex" -> "first",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
index 9b39a2295e7d6..af97856fd222e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/BloomFilterAggregateQuerySuite.scala
@@ -98,7 +98,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
             }
             checkError(
               exception = exception,
-              errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+              condition = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
               parameters = Map(
                 "exprName" -> "estimatedNumItems",
                 "valueRange" -> "[0, positive]",
@@ -126,7 +126,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
             }
             checkError(
               exception = exception,
-              errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+              condition = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
               parameters = Map(
                 "exprName" -> "numBits",
                 "valueRange" -> "[0, positive]",
@@ -159,7 +159,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception1,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
       parameters = Map(
         "functionName" -> "`bloom_filter_agg`",
         "sqlExpr" -> "\"bloom_filter_agg(a, 1000000, 8388608)\"",
@@ -182,7 +182,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception2,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
       parameters = Map(
         "functionName" -> "`bloom_filter_agg`",
         "sqlExpr" -> "\"bloom_filter_agg(a, 2, (2 * 8))\"",
@@ -205,7 +205,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception3,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
       parameters = Map(
         "functionName" -> "`bloom_filter_agg`",
         "sqlExpr" -> "\"bloom_filter_agg(a, CAST(2 AS BIGINT), 5)\"",
@@ -228,7 +228,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception4,
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "estimatedNumItems or numBits",
         "sqlExpr" -> "\"bloom_filter_agg(a, NULL, 5)\""
@@ -248,7 +248,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception5,
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "estimatedNumItems or numBits",
         "sqlExpr" -> "\"bloom_filter_agg(a, 5, NULL)\""
@@ -268,7 +268,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception1,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"might_contain(1.0, 1)\"",
         "functionName" -> "`might_contain`",
@@ -289,7 +289,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception2,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_WRONG_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"might_contain(NULL, 0.1)\"",
         "functionName" -> "`might_contain`",
@@ -314,7 +314,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception1,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"might_contain(CAST(a AS BINARY), CAST(5 AS BIGINT))\"",
         "functionName" -> "`might_contain`",
@@ -335,7 +335,7 @@ class BloomFilterAggregateQuerySuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception2,
-      errorClass = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
+      condition = "DATATYPE_MISMATCH.BLOOM_FILTER_BINARY_OP_WRONG_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"might_contain(scalarsubquery(a), CAST(5 AS BIGINT))\"",
         "functionName" -> "`might_contain`",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 7b608b7438c29..f22d90d9f35d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -511,7 +511,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df1, Row(2, 2) :: Nil)
       df1.queryExecution.analyzed match {
-        case Aggregate(_, _, WithCTE(_, cteDefs)) => assert(cteDefs.length == 2)
+        case Aggregate(_, _, WithCTE(_, cteDefs), _) => assert(cteDefs.length == 2)
         case other => fail(s"Expect pattern Aggregate(WithCTE(_)) but got $other")
       }
 
@@ -530,7 +530,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df2, Row(2, 2) :: Nil)
       df2.queryExecution.analyzed match {
-        case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_, cteDefs)), _, _, _)) =>
+        case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_, cteDefs)), _, _, _), _) =>
           assert(cteDefs.length == 1)
         case other => fail(s"Expect pattern Aggregate(Join(_, WithCTE(_))) but got $other")
       }
@@ -560,7 +560,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df3, Row(4, 4) :: Nil)
       df3.queryExecution.analyzed match {
-        case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_: Union, cteDefs)), _, _, _)) =>
+        case Aggregate(_, _, Join(_, SubqueryAlias(_, WithCTE(_: Union, cteDefs)), _, _, _), _) =>
           assert(cteDefs.length == 2)
         case other => fail(
           s"Expect pattern Aggregate(Join(_, (WithCTE(Union(_, _))))) but got $other")
@@ -585,7 +585,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df4, Row(4, 4) :: Nil)
       df4.queryExecution.analyzed match {
-        case Aggregate(_, _, Join(_, SubqueryAlias(_, Union(children, _, _)), _, _, _))
+        case Aggregate(_, _, Join(_, SubqueryAlias(_, Union(children, _, _)), _, _, _), _)
           if children.head.find(_.isInstanceOf[WithCTE]).isDefined =>
           assert(
             children.head.collect {
@@ -618,7 +618,7 @@ abstract class CTEInlineSuiteBase
          """.stripMargin)
       checkAnswer(df5, Row(4, 4) :: Nil)
       df5.queryExecution.analyzed match {
-        case Aggregate(_, _, WithCTE(_, cteDefs)) => assert(cteDefs.length == 2)
+        case Aggregate(_, _, WithCTE(_, cteDefs), _) => assert(cteDefs.length == 2)
         case other => fail(s"Expect pattern Aggregate(WithCTE(_)) but got $other")
       }
 
@@ -714,6 +714,27 @@ abstract class CTEInlineSuiteBase
         |""".stripMargin)
     checkAnswer(df, Row(1))
   }
+
+  test("SPARK-49816: should only update out-going-ref-count for referenced outer CTE relation") {
+    withView("v") {
+      sql(
+        """
+          |WITH
+          |t1 AS (SELECT 1 col),
+          |t2 AS (SELECT * FROM t1)
+          |SELECT * FROM t2
+          |""".stripMargin).createTempView("v")
+      // r1 is un-referenced, but it should not decrease the ref count of t2 inside view v.
+      val df = sql(
+        """
+          |WITH
+          |r1 AS (SELECT * FROM v),
+          |r2 AS (SELECT * FROM v)
+          |SELECT * FROM r2
+          |""".stripMargin)
+      checkAnswer(df, Row(1))
+    }
+  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with DisableAdaptiveExecutionSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
index 3ac433f31288c..ca3f282d8cd40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CachedTableSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.io.{File, FilenameFilter}
 import java.nio.file.{Files, Paths}
 import java.time.{Duration, LocalDateTime, Period}
+import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.collection.mutable.HashSet
 import scala.concurrent.duration._
@@ -157,7 +158,7 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
         sql("CACHE TABLE tempView AS SELECT 1")
       }
       checkError(e,
-        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map("relationName" -> "`tempView`"))
     }
   }
@@ -1787,6 +1788,47 @@ class CachedTableSuite extends QueryTest with SQLTestUtils
         Row(0, 1, 0, 1) :: Row(1, 2, 1, 2) :: Nil)
       assert(getNumInMemoryRelations(df) == 1)
     }
+  }
 
+  Seq(true, false).foreach { callerEnableAQE =>
+    test(s"SPARK-49982: AQE negative caching with in memory table cache - callerEnableAQE=" +
+      callerEnableAQE) {
+      val triggered = new AtomicBoolean(false)
+      val func: Long => Boolean = (i: Long) => {
+        if (!triggered.get()) {
+          throw new Exception("SPARK-49982")
+        }
+        i % 2 == 0
+      }
+      withUserDefinedFunction("func" -> true) {
+        spark.udf.register("func", func)
+        // make sure the cached plan is AQE plan
+        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
+          val df1 = spark.range(1024).select($"id".as("key1"))
+          val df2 = spark.range(2048).select($"id".as("key2"))
+            .withColumn("group_key", $"key2" % 1024)
+          val df = df1.filter(expr("func(key1)")).hint("MERGE").join(df2, $"key1" === $"key2")
+          df.cache()
+          try {
+            withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> callerEnableAQE.toString) {
+              val finalDf1 = df.groupBy($"group_key").agg("key1" -> "count")
+              val finalDf2 = df.groupBy($"group_key").agg("key1" -> "avg")
+              intercept[Throwable] {
+                finalDf1.collect()
+              }
+              triggered.set(true)
+              // Collect again on the same df will trigger AQE negative caching
+              intercept[Throwable] {
+                finalDf1.collect()
+              }
+              // Collect on a different df will use the refreshed InMemoryRelation
+              finalDf2.collect()
+            }
+          } finally {
+            df.unpersist(blocking = true)
+          }
+        }
+      }
+    }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
index 59a566a3f2967..d3b11274fe1c8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CharVarcharTestSuite.scala
@@ -67,7 +67,7 @@ trait CharVarcharTestSuite extends QueryTest with SQLTestUtils {
   def assertLengthCheckFailure(func: () => Unit): Unit = {
     checkError(
       exception = intercept[SparkRuntimeException](func()),
-      errorClass = "EXCEED_LIMIT_LENGTH",
+      condition = "EXCEED_LIMIT_LENGTH",
       parameters = Map("limit" -> "5")
     )
   }
@@ -702,7 +702,7 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("""SELECT from_json('{"a": "str"}', 'a CHAR(5)')""")
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING",
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "from_json('{\"a\": \"str\"}', 'a CHAR(5)')",
@@ -724,19 +724,19 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.createDataFrame(df.collectAsList(), schema)
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     checkError(
       exception = intercept[AnalysisException] {
         spark.createDataFrame(df.rdd, schema)
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     checkError(
       exception = intercept[AnalysisException] {
         spark.createDataFrame(df.toJavaRDD, schema)
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
       val df1 = spark.createDataFrame(df.collectAsList(), schema)
@@ -750,12 +750,12 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.read.schema(new StructType().add("id", CharType(5)))
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING")
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING")
     checkError(
       exception = intercept[AnalysisException] {
         spark.read.schema("id char(5)")
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
       val ds = spark.range(10).map(_.toString)
@@ -792,13 +792,13 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.udf.register("testchar", () => "B", VarcharType(1))
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     checkError(
       exception = intercept[AnalysisException] {
         spark.udf.register("testchar2", (x: String) => x, VarcharType(1))
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
       spark.udf.register("testchar", () => "B", VarcharType(1))
@@ -817,13 +817,13 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.readStream.schema(new StructType().add("id", CharType(5)))
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     checkError(
       exception = intercept[AnalysisException] {
         spark.readStream.schema("id char(5)")
       },
-      errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
+      condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING"
     )
     withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
       withTempPath { dir =>
@@ -845,7 +845,7 @@ class BasicCharVarcharTestSuite extends QueryTest with SharedSparkSession {
       val df = sql("SELECT * FROM t")
       checkError(exception = intercept[AnalysisException] {
         df.to(newSchema)
-      }, errorClass = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING", parameters = Map.empty)
+      }, condition = "UNSUPPORTED_CHAR_OR_VARCHAR_AS_STRING", parameters = Map.empty)
       withSQLConf((SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key, "true")) {
         val df1 = df.to(newSchema)
         checkAnswer(df1, df.select("v", "c"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
index e4f4bb6e85575..bc62fa5fdd331 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationExpressionWalkerSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import java.sql.Timestamp
 
 import org.apache.spark.{SparkFunSuite, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.analysis.ExpressionBuilder
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.variant.ParseJson
 import org.apache.spark.sql.internal.SqlApiConf
@@ -39,14 +40,14 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
 
   case object Utf8Binary extends CollationType
 
-  case object Utf8BinaryLcase extends CollationType
+  case object Utf8Lcase extends CollationType
 
   /**
    * Helper function to generate all necessary parameters
    *
    * @param inputEntry - List of all input entries that need to be generated
    * @param collationType - Flag defining collation type to use
-   * @return
+   * @return - List of data generated for expression instance creation
    */
   def generateData(
       inputEntry: Seq[Any],
@@ -54,33 +55,21 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
     inputEntry.map(generateSingleEntry(_, collationType))
   }
 
-  /**
-   * Helper function to generate single entry of data as a string.
-   * @param inputEntry - Single input entry that requires generation
-   * @param collationType - Flag defining collation type to use
-   * @return
-   */
-  def generateDataAsStrings(
-      inputEntry: Seq[AbstractDataType],
-      collationType: CollationType): Seq[Any] = {
-    inputEntry.map(generateInputAsString(_, collationType))
-  }
-
   /**
    * Helper function to generate single entry of data.
    * @param inputEntry - Single input entry that requires generation
    * @param collationType - Flag defining collation type to use
-   * @return
+   * @return - Single input entry data
    */
   def generateSingleEntry(
       inputEntry: Any,
       collationType: CollationType): Any =
     inputEntry match {
       case e: Class[_] if e.isAssignableFrom(classOf[Expression]) =>
-        generateLiterals(StringTypeAnyCollation, collationType)
+        generateLiterals(StringTypeWithCollation, collationType)
       case se: Class[_] if se.isAssignableFrom(classOf[Seq[Expression]]) =>
-        CreateArray(Seq(generateLiterals(StringTypeAnyCollation, collationType),
-          generateLiterals(StringTypeAnyCollation, collationType)))
+        CreateArray(Seq(generateLiterals(StringTypeWithCollation, collationType),
+          generateLiterals(StringTypeWithCollation, collationType)))
       case oe: Class[_] if oe.isAssignableFrom(classOf[Option[Any]]) => None
       case b: Class[_] if b.isAssignableFrom(classOf[Boolean]) => false
       case dt: Class[_] if dt.isAssignableFrom(classOf[DataType]) => StringType
@@ -100,7 +89,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    *
    * @param inputType    - Single input literal type that requires generation
    * @param collationType - Flag defining collation type to use
-   * @return
+   * @return - Literal/Expression containing expression ready for evaluation
    */
   def generateLiterals(
       inputType: AbstractDataType,
@@ -111,11 +100,12 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case BinaryType => collationType match {
         case Utf8Binary =>
           Literal.create("dummy string".getBytes)
-        case Utf8BinaryLcase =>
+        case Utf8Lcase =>
           Literal.create("DuMmY sTrInG".getBytes)
       }
       case BooleanType => Literal(true)
       case _: DatetimeType => Literal(Timestamp.valueOf("2009-07-30 12:58:59"))
+      case DecimalType => Literal((new Decimal).set(5))
       case _: DecimalType => Literal((new Decimal).set(5))
       case _: DoubleType => Literal(5.0)
       case IntegerType | NumericType | IntegralType => Literal(5)
@@ -125,13 +115,13 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
         collationType match {
           case Utf8Binary =>
             Literal.create("dummy string", StringType("UTF8_BINARY"))
-          case Utf8BinaryLcase =>
+          case Utf8Lcase =>
             Literal.create("DuMmY sTrInG", StringType("UTF8_LCASE"))
         }
       case VariantType => collationType match {
         case Utf8Binary =>
           ParseJson(Literal.create("{}", StringType("UTF8_BINARY")))
-        case Utf8BinaryLcase =>
+        case Utf8Lcase =>
           ParseJson(Literal.create("{}", StringType("UTF8_LCASE")))
       }
       case TypeCollection(typeCollection) =>
@@ -152,21 +142,26 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
           lit => Literal.create(Seq(lit.asInstanceOf[Literal].value), ArrayType(lit.dataType))
         ).head
       case ArrayType =>
-        generateLiterals(StringTypeAnyCollation, collationType).map(
+        generateLiterals(StringTypeWithCollation, collationType).map(
           lit => Literal.create(Seq(lit.asInstanceOf[Literal].value), ArrayType(lit.dataType))
         ).head
       case MapType =>
-        val key = generateLiterals(StringTypeAnyCollation, collationType)
-        val value = generateLiterals(StringTypeAnyCollation, collationType)
-        Literal.create(Map(key -> value))
+        val key = generateLiterals(StringTypeWithCollation, collationType)
+        val value = generateLiterals(StringTypeWithCollation, collationType)
+        CreateMap(Seq(key, value))
       case MapType(keyType, valueType, _) =>
         val key = generateLiterals(keyType, collationType)
         val value = generateLiterals(valueType, collationType)
-        Literal.create(Map(key -> value))
+        CreateMap(Seq(key, value))
+      case AbstractMapType(keyType, valueType) =>
+        val key = generateLiterals(keyType, collationType)
+        val value = generateLiterals(valueType, collationType)
+        CreateMap(Seq(key, value))
       case StructType =>
         CreateNamedStruct(
-          Seq(Literal("start"), generateLiterals(StringTypeAnyCollation, collationType),
-          Literal("end"), generateLiterals(StringTypeAnyCollation, collationType)))
+          Seq(Literal("start"),
+            generateLiterals(StringTypeWithCollation, collationType),
+          Literal("end"), generateLiterals(StringTypeWithCollation, collationType)))
     }
 
   /**
@@ -174,7 +169,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    *
    * @param inputType     - Single input type that requires generation
    * @param collationType - Flag defining collation type to use
-   * @return
+   * @return - String representation of a input ready for SQL query
    */
   def generateInputAsString(
       inputType: AbstractDataType,
@@ -185,10 +180,11 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case BinaryType =>
         collationType match {
           case Utf8Binary => "Cast('dummy string' collate utf8_binary as BINARY)"
-          case Utf8BinaryLcase => "Cast('DuMmY sTrInG' collate utf8_lcase as BINARY)"
+          case Utf8Lcase => "Cast('DuMmY sTrInG' collate utf8_lcase as BINARY)"
         }
       case BooleanType => "True"
       case _: DatetimeType => "date'2016-04-08'"
+      case DecimalType => "5.0"
       case _: DecimalType => "5.0"
       case _: DoubleType => "5.0"
       case IntegerType | NumericType | IntegralType => "5"
@@ -196,7 +192,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case _: StringType | AnyDataType | _: AbstractStringType =>
         collationType match {
           case Utf8Binary => "'dummy string' COLLATE UTF8_BINARY"
-          case Utf8BinaryLcase => "'DuMmY sTrInG' COLLATE UTF8_LCASE"
+          case Utf8Lcase => "'DuMmY sTrInG' COLLATE UTF8_LCASE"
         }
       case NullType => "null"
       case VariantType => s"parse_json('{}')"
@@ -214,16 +210,20 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case ArrayType(elementType, _) =>
         "array(" + generateInputAsString(elementType, collationType) + ")"
       case ArrayType =>
-        "array(" + generateInputAsString(StringTypeAnyCollation, collationType) + ")"
+        "array(" + generateInputAsString(StringTypeWithCollation, collationType) + ")"
       case MapType =>
-        "map(" + generateInputAsString(StringTypeAnyCollation, collationType) + ", " +
-          generateInputAsString(StringTypeAnyCollation, collationType) + ")"
+        "map(" + generateInputAsString(StringTypeWithCollation, collationType) + ", " +
+          generateInputAsString(StringTypeWithCollation, collationType) + ")"
       case MapType(keyType, valueType, _) =>
         "map(" + generateInputAsString(keyType, collationType) + ", " +
           generateInputAsString(valueType, collationType) + ")"
+      case AbstractMapType(keyType, valueType) =>
+        "map(" + generateInputAsString(keyType, collationType) + ", " +
+          generateInputAsString(valueType, collationType) + ")"
       case StructType =>
-        "named_struct( 'start', " + generateInputAsString(StringTypeAnyCollation, collationType) +
-          ", 'end', " + generateInputAsString(StringTypeAnyCollation, collationType) + ")"
+        "named_struct( 'start', " +
+          generateInputAsString(StringTypeWithCollation, collationType) + ", 'end', " +
+          generateInputAsString(StringTypeWithCollation, collationType) + ")"
       case StructType(fields) =>
         "named_struct(" + fields.map(f => "'" + f.name + "', " +
           generateInputAsString(f.dataType, collationType)).mkString(", ") + ")"
@@ -234,7 +234,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    *
    * @param inputType     - Single input type that requires generation
    * @param collationType - Flag defining collation type to use
-   * @return
+   * @return - String representation for SQL query of a inputType
    */
   def generateInputTypeAsStrings(
       inputType: AbstractDataType,
@@ -244,6 +244,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case BinaryType => "BINARY"
       case BooleanType => "BOOLEAN"
       case _: DatetimeType => "DATE"
+      case DecimalType => "DECIMAL(2, 1)"
       case _: DecimalType => "DECIMAL(2, 1)"
       case _: DoubleType => "DOUBLE"
       case IntegerType | NumericType | IntegralType => "INT"
@@ -251,7 +252,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case _: StringType | AnyDataType | _: AbstractStringType =>
         collationType match {
           case Utf8Binary => "STRING"
-          case Utf8BinaryLcase => "STRING COLLATE UTF8_LCASE"
+          case Utf8Lcase => "STRING COLLATE UTF8_LCASE"
         }
       case VariantType => "VARIANT"
       case TypeCollection(typeCollection) =>
@@ -268,17 +269,23 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case ArrayType(elementType, _) =>
         "array<" + generateInputTypeAsStrings(elementType, collationType) + ">"
       case ArrayType =>
-        "array<" + generateInputTypeAsStrings(StringTypeAnyCollation, collationType) + ">"
+        "array<" + generateInputTypeAsStrings(StringTypeWithCollation, collationType) +
+          ">"
       case MapType =>
-        "map<" + generateInputTypeAsStrings(StringTypeAnyCollation, collationType) + ", " +
-          generateInputTypeAsStrings(StringTypeAnyCollation, collationType) + ">"
+        "map<" + generateInputTypeAsStrings(StringTypeWithCollation, collationType) +
+          ", " +
+          generateInputTypeAsStrings(StringTypeWithCollation, collationType) + ">"
       case MapType(keyType, valueType, _) =>
         "map<" + generateInputTypeAsStrings(keyType, collationType) + ", " +
           generateInputTypeAsStrings(valueType, collationType) + ">"
+      case AbstractMapType(keyType, valueType) =>
+        "map<" + generateInputTypeAsStrings(keyType, collationType) + ", " +
+          generateInputTypeAsStrings(valueType, collationType) + ">"
       case StructType =>
-        "struct<start:" + generateInputTypeAsStrings(StringTypeAnyCollation, collationType) +
+        "struct<start:" +
+          generateInputTypeAsStrings(StringTypeWithCollation, collationType) +
           ", end:" +
-          generateInputTypeAsStrings(StringTypeAnyCollation, collationType) + ">"
+          generateInputTypeAsStrings(StringTypeWithCollation, collationType) + ">"
       case StructType(fields) =>
         "named_struct<" + fields.map(f => "'" + f.name + "', " +
           generateInputTypeAsStrings(f.dataType, collationType)).mkString(", ") + ">"
@@ -287,12 +294,12 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
   /**
    * Helper function to extract types of relevance
    * @param inputType
-   * @return
+   * @return - Boolean that represents if inputType has/is a StringType
    */
   def hasStringType(inputType: AbstractDataType): Boolean = {
     inputType match {
-      case _: StringType | StringTypeAnyCollation | StringTypeBinaryLcase | AnyDataType =>
-        true
+      case _: StringType | StringTypeWithCollation | StringTypeBinaryLcase | AnyDataType
+      => true
       case ArrayType => true
       case MapType => true
       case MapType(keyType, valueType, _) => hasStringType(keyType) || hasStringType(valueType)
@@ -300,7 +307,6 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       case AbstractArrayType(elementType) => hasStringType(elementType)
       case TypeCollection(typeCollection) =>
         typeCollection.exists(hasStringType)
-      case StructType => true
       case StructType(fields) => fields.exists(sf => hasStringType(sf.dataType))
       case _ => false
     }
@@ -310,7 +316,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    * Helper function to replace expected parameters with expected input types.
    * @param inputTypes - Input types generated by ExpectsInputType.inputTypes
    * @param params - Parameters that are read from expression info
-   * @return
+   * @return - List of parameters where Expressions are replaced with input types
    */
   def replaceExpressions(inputTypes: Seq[AbstractDataType], params: Seq[Class[_]]): Seq[Any] = {
     (inputTypes, params) match {
@@ -325,7 +331,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
 
   /**
    * Helper method to extract relevant expressions that can be walked over.
-   * @return
+   * @return - (List of relevant expressions that expect input, List of expressions to skip)
    */
   def extractRelevantExpressions(): (Array[ExpressionInfo], List[String]) = {
     var expressionCounter = 0
@@ -384,6 +390,47 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
     (funInfos, toSkip)
   }
 
+  /**
+   * Helper method to extract relevant expressions that can be walked over but are built with
+   * expression builder.
+   *
+   * @return - (List of expressions that are relevant builders, List of expressions to skip)
+   */
+  def extractRelevantBuilders(): (Array[ExpressionInfo], List[String]) = {
+    var builderExpressionCounter = 0
+    val funInfos = spark.sessionState.functionRegistry.listFunction().map { funcId =>
+      spark.sessionState.catalog.lookupFunctionInfo(funcId)
+    }.filter(funInfo => {
+      // make sure that there is a constructor.
+      val cl = Utils.classForName(funInfo.getClassName)
+      cl.isAssignableFrom(classOf[ExpressionBuilder])
+    }).filter(funInfo => {
+      builderExpressionCounter = builderExpressionCounter + 1
+      val cl = Utils.classForName(funInfo.getClassName)
+      val method = cl.getMethod("build",
+        Utils.classForName("java.lang.String"),
+        Utils.classForName("scala.collection.Seq"))
+      var input: Seq[Expression] = Seq.empty
+      var i = 0
+      for (_ <- 1 to 10) {
+        input = input :+ generateLiterals(StringTypeWithCollation, Utf8Binary)
+        try {
+          method.invoke(null, funInfo.getClassName, input).asInstanceOf[ExpectsInputTypes]
+        }
+        catch {
+          case _: Exception => i = i + 1
+        }
+      }
+      if (i == 10) false
+      else true
+    }).toArray
+
+    logInfo("Total number of expression that are built: " + builderExpressionCounter)
+    logInfo("Number of extracted expressions of relevance: " + funInfos.length)
+
+    (funInfos, List())
+  }
+
   /**
    * Helper function to generate string of an expression suitable for execution.
    * @param expr - Expression that needs to be converted
@@ -415,7 +462,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       collationType: CollationType): DataFrame = {
     val tblName = collationType match {
       case Utf8Binary => "tbl"
-      case Utf8BinaryLcase => "tbl_lcase"
+      case Utf8Lcase => "tbl_lcase"
     }
 
     sql(s"CREATE TABLE $tblName (" +
@@ -441,10 +488,36 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    * 5) Otherwise, check if exceptions are the same
    */
   test("SPARK-48280: Expression Walker for expression evaluation") {
-    val (funInfos, toSkip) = extractRelevantExpressions()
+    val (funInfosExpr, toSkip) = extractRelevantExpressions()
+    val (funInfosBuild, _) = extractRelevantBuilders()
+    val funInfos = funInfosExpr ++ funInfosBuild
 
     for (f <- funInfos.filter(f => !toSkip.contains(f.getName))) {
-      val cl = Utils.classForName(f.getClassName)
+      val TempCl = Utils.classForName(f.getClassName)
+      val cl = if (TempCl.isAssignableFrom(classOf[ExpressionBuilder])) {
+        val clTemp = Utils.classForName(f.getClassName)
+        val method = clTemp.getMethod("build",
+          Utils.classForName("java.lang.String"),
+          Utils.classForName("scala.collection.Seq"))
+        val instance = {
+          var input: Seq[Expression] = Seq.empty
+          var result: Expression = null
+          for (_ <- 1 to 10) {
+            input = input :+ generateLiterals(StringTypeWithCollation, Utf8Binary)
+            try {
+              val tempResult = method.invoke(null, f.getClassName, input)
+              if (result == null) result = tempResult.asInstanceOf[Expression]
+            }
+            catch {
+              case _: Exception =>
+            }
+          }
+          result
+        }
+        instance.getClass
+      }
+      else Utils.classForName(f.getClassName)
+
       val headConstructor = cl.getConstructors
         .zip(cl.getConstructors.map(c => c.getParameters.length)).minBy(a => a._2)._1
       val params = headConstructor.getParameters.map(p => p.getType)
@@ -462,7 +535,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       val inputDataLcase =
         generateData(
           replaceExpressions(inputTypes, headConstructor.getParameters.map(p => p.getType).toSeq),
-          Utf8BinaryLcase
+          Utf8Lcase
         )
       val instanceLcase = headConstructor.newInstance(inputDataLcase: _*).asInstanceOf[Expression]
 
@@ -526,10 +599,36 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
    * 5) Otherwise, check if exceptions are the same
    */
   test("SPARK-48280: Expression Walker for codeGen generation") {
-    val (funInfos, toSkip) = extractRelevantExpressions()
+    val (funInfosExpr, toSkip) = extractRelevantExpressions()
+    val (funInfosBuild, _) = extractRelevantBuilders()
+    val funInfos = funInfosExpr ++ funInfosBuild
 
     for (f <- funInfos.filter(f => !toSkip.contains(f.getName))) {
-      val cl = Utils.classForName(f.getClassName)
+      val TempCl = Utils.classForName(f.getClassName)
+      val cl = if (TempCl.isAssignableFrom(classOf[ExpressionBuilder])) {
+        val clTemp = Utils.classForName(f.getClassName)
+        val method = clTemp.getMethod("build",
+          Utils.classForName("java.lang.String"),
+          Utils.classForName("scala.collection.Seq"))
+        val instance = {
+          var input: Seq[Expression] = Seq.empty
+          var result: Expression = null
+          for (_ <- 1 to 10) {
+            input = input :+ generateLiterals(StringTypeWithCollation, Utf8Binary)
+            try {
+              val tempResult = method.invoke(null, f.getClassName, input)
+              if (result == null) result = tempResult.asInstanceOf[Expression]
+            }
+            catch {
+              case _: Exception =>
+            }
+          }
+          result
+        }
+        instance.getClass
+      }
+      else Utils.classForName(f.getClassName)
+
       val headConstructor = cl.getConstructors
         .zip(cl.getConstructors.map(c => c.getParameters.length)).minBy(a => a._2)._1
       val params = headConstructor.getParameters.map(p => p.getType)
@@ -539,7 +638,7 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       withTable("tbl", "tbl_lcase") {
 
         val utf8_df = generateTableData(expr.inputTypes.take(2), Utf8Binary)
-        val utf8_lcase_df = generateTableData(expr.inputTypes.take(2), Utf8BinaryLcase)
+        val utf8_lcase_df = generateTableData(expr.inputTypes.take(2), Utf8Lcase)
 
         val utf8BinaryResult = try {
           val df = utf8_df.selectExpr(transformExpressionToString(expr, Utf8Binary))
@@ -548,37 +647,37 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
         } catch {
           case e: Throwable => scala.util.Left(e)
         }
-        val utf8BinaryLcaseResult = try {
-          val df = utf8_lcase_df.selectExpr(transformExpressionToString(expr, Utf8BinaryLcase))
+        val utf8LcaseResult = try {
+          val df = utf8_lcase_df.selectExpr(transformExpressionToString(expr, Utf8Lcase))
           df.getRows(1, 0)
           scala.util.Right(df)
         } catch {
           case e: Throwable => scala.util.Left(e)
         }
 
-        assert(utf8BinaryResult.isLeft === utf8BinaryLcaseResult.isLeft)
+        assert(utf8BinaryResult.isLeft === utf8LcaseResult.isLeft)
 
         if (utf8BinaryResult.isRight) {
           val utf8BinaryResultChecked = utf8BinaryResult.getOrElse(null)
-          val utf8BinaryLcaseResultChecked = utf8BinaryLcaseResult.getOrElse(null)
+          val utf8LcaseResultChecked = utf8LcaseResult.getOrElse(null)
 
           val dt = utf8BinaryResultChecked.schema.fields.head.dataType
 
           dt match {
-            case st if utf8BinaryResultChecked != null && utf8BinaryLcaseResultChecked != null &&
+            case st if utf8BinaryResultChecked != null && utf8LcaseResultChecked != null &&
               hasStringType(st) =>
               // scalastyle:off caselocale
               assert(utf8BinaryResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1) ===
-                utf8BinaryLcaseResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1))
+                utf8LcaseResultChecked.getRows(1, 0).map(_.map(_.toLowerCase))(1))
               // scalastyle:on caselocale
             case _ =>
               assert(utf8BinaryResultChecked.getRows(1, 0)(1) ===
-                utf8BinaryLcaseResultChecked.getRows(1, 0)(1))
+                utf8LcaseResultChecked.getRows(1, 0)(1))
           }
         }
         else {
           assert(utf8BinaryResult.getOrElse(new Exception()).getClass
-            == utf8BinaryResult.getOrElse(new Exception()).getClass)
+            == utf8LcaseResult.getOrElse(new Exception()).getClass)
         }
       }
     }
@@ -618,6 +717,8 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       "reflect",
       "try_reflect",
       "java_method",
+      "hash",
+      "xxhash64",
       // need to skip as these are random functions
       "rand",
       "random",
@@ -626,20 +727,22 @@ class CollationExpressionWalkerSuite extends SparkFunSuite with SharedSparkSessi
       "shuffle",
       // other functions which are not yet supported
       "to_avro",
-      "from_avro"
+      "from_avro",
+      "to_protobuf",
+      "from_protobuf"
     )
 
     for (funInfo <- funInfos.filter(f => !toSkip.contains(f.getName))) {
-      for (m <- "> .*;".r.findAllIn(funInfo.getExamples)) {
+      for (query <- "> .*;".r.findAllIn(funInfo.getExamples).map(s => s.substring(2))) {
         try {
-          val resultUTF8 = sql(m.substring(2))
+          val resultUTF8 = sql(query)
           withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
-            val resultUTF8Lcase = sql(m.substring(2))
+            val resultUTF8Lcase = sql(query)
             assert(resultUTF8.collect() === resultUTF8Lcase.collect())
           }
         } catch {
-          case e: SparkRuntimeException => assert(e.getErrorClass == "USER_RAISED_EXCEPTION")
-          case other: Throwable => throw other
+          case e: SparkRuntimeException => assert(e.getCondition == "USER_RAISED_EXCEPTION")
+          case other: Throwable => throw new Exception(s"Query $query failed", other)
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
index 300f23c317ec6..cf494fcd87451 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLExpressionsSuite.scala
@@ -23,8 +23,8 @@ import java.text.SimpleDateFormat
 import scala.collection.immutable.Seq
 
 import org.apache.spark.{SparkConf, SparkException, SparkIllegalArgumentException, SparkRuntimeException}
-import org.apache.spark.sql.catalyst.ExtendedAnalysisException
-import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.catalyst.{ExtendedAnalysisException, InternalRow}
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate.Mode
 import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -34,10 +34,12 @@ import org.apache.spark.util.collection.OpenHashMap
 
 // scalastyle:off nonascii
 class CollationSQLExpressionsSuite
-  extends QueryTest
-  with SharedSparkSession {
+    extends QueryTest
+    with SharedSparkSession
+    with ExpressionEvalHelper {
 
   private val testSuppCollations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
+  private val testAdditionalCollations = Seq("UNICODE", "SR", "SR_CI", "SR_AI", "SR_CI_AI")
 
   test("Support Md5 hash expression with collation") {
     case class Md5TestCase(
@@ -48,9 +50,14 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       Md5TestCase("Spark", "UTF8_BINARY", "8cde774d6f7333752ed72cacddb05126"),
+      Md5TestCase("Spark", "UTF8_BINARY_RTRIM", "8cde774d6f7333752ed72cacddb05126"),
       Md5TestCase("Spark", "UTF8_LCASE", "8cde774d6f7333752ed72cacddb05126"),
+      Md5TestCase("Spark", "UTF8_LCASE_RTRIM", "8cde774d6f7333752ed72cacddb05126"),
       Md5TestCase("SQL", "UNICODE", "9778840a0100cb30c982876741b0b5a2"),
-      Md5TestCase("SQL", "UNICODE_CI", "9778840a0100cb30c982876741b0b5a2")
+      Md5TestCase("SQL", "UNICODE_RTRIM", "9778840a0100cb30c982876741b0b5a2"),
+      Md5TestCase("SQL", "UNICODE_CI", "9778840a0100cb30c982876741b0b5a2"),
+      Md5TestCase("SQL", "UNICODE_CI_RTRIM", "9778840a0100cb30c982876741b0b5a2"),
+      Md5TestCase("SQL", "SR_CI_AI", "9778840a0100cb30c982876741b0b5a2")
     )
 
     // Supported collations
@@ -80,11 +87,21 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       Sha2TestCase("Spark", "UTF8_BINARY", 256,
         "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
+      Sha2TestCase("Spark", "UTF8_BINARY_RTRIM", 256,
+        "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
       Sha2TestCase("Spark", "UTF8_LCASE", 256,
         "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
+      Sha2TestCase("Spark", "UTF8_LCASE_RTRIM", 256,
+        "529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b"),
       Sha2TestCase("SQL", "UNICODE", 256,
         "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35"),
+      Sha2TestCase("SQL", "UNICODE_RTRIM", 256,
+        "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35"),
       Sha2TestCase("SQL", "UNICODE_CI", 256,
+        "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35"),
+      Sha2TestCase("SQL", "UNICODE_CI_RTRIM", 256,
+        "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35"),
+      Sha2TestCase("SQL", "SR_AI", 256,
         "a7056a455639d1c7deec82ee787db24a0c1878e2792b4597709f0facf7cc7b35")
     )
 
@@ -113,9 +130,14 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       Sha1TestCase("Spark", "UTF8_BINARY", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
+      Sha1TestCase("Spark", "UTF8_BINARY_RTRIM", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
       Sha1TestCase("Spark", "UTF8_LCASE", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
+      Sha1TestCase("Spark", "UTF8_LCASE_RTRIM", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c"),
       Sha1TestCase("SQL", "UNICODE", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d"),
-      Sha1TestCase("SQL", "UNICODE_CI", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d")
+      Sha1TestCase("SQL", "UNICODE_RTRIM", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d"),
+      Sha1TestCase("SQL", "UNICODE_CI", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d"),
+      Sha1TestCase("SQL", "UNICODE_CI_RTRIM", "2064cb643caa8d9e1de12eea7f3e143ca9f8680d"),
+      Sha1TestCase("Spark", "SR_CI", "85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c")
     )
 
     // Supported collations
@@ -143,9 +165,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       Crc321TestCase("Spark", "UTF8_BINARY", 1557323817),
+      Crc321TestCase("Spark", "UTF8_BINARY_RTRIM", 1557323817),
       Crc321TestCase("Spark", "UTF8_LCASE", 1557323817),
+      Crc321TestCase("Spark", "UTF8_LCASE_RTRIM", 1557323817),
       Crc321TestCase("SQL", "UNICODE", 1299261525),
-      Crc321TestCase("SQL", "UNICODE_CI", 1299261525)
+      Crc321TestCase("SQL", "UNICODE_RTRIM", 1299261525),
+      Crc321TestCase("SQL", "UNICODE_CI", 1299261525),
+      Crc321TestCase("SQL", "UNICODE_CI_RTRIM", 1299261525)
     )
 
     // Supported collations
@@ -171,9 +197,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       Murmur3HashTestCase("Spark", "UTF8_BINARY", 228093765),
-      Murmur3HashTestCase("Spark", "UTF8_LCASE", 228093765),
-      Murmur3HashTestCase("SQL", "UNICODE", 17468742),
-      Murmur3HashTestCase("SQL", "UNICODE_CI", 17468742)
+      Murmur3HashTestCase("Spark  ", "UTF8_BINARY_RTRIM", 1779328737),
+      Murmur3HashTestCase("Spark", "UTF8_LCASE", -1928694360),
+      Murmur3HashTestCase("Spark  ", "UTF8_LCASE_RTRIM", -1928694360),
+      Murmur3HashTestCase("SQL", "UNICODE", 1483684981),
+      Murmur3HashTestCase("SQL ", "UNICODE_RTRIM", 1483684981),
+      Murmur3HashTestCase("SQL", "UNICODE_CI", 279787709),
+      Murmur3HashTestCase("SQL ", "UNICODE_CI_RTRIM", 279787709)
     )
 
     // Supported collations
@@ -199,9 +229,13 @@ class CollationSQLExpressionsSuite
 
     val testCases = Seq(
       XxHash64TestCase("Spark", "UTF8_BINARY", -4294468057691064905L),
-      XxHash64TestCase("Spark", "UTF8_LCASE", -4294468057691064905L),
-      XxHash64TestCase("SQL", "UNICODE", -2147923034195946097L),
-      XxHash64TestCase("SQL", "UNICODE_CI", -2147923034195946097L)
+      XxHash64TestCase("Spark ", "UTF8_BINARY_RTRIM", 6480371823304753502L),
+      XxHash64TestCase("Spark", "UTF8_LCASE", -3142112654825786434L),
+      XxHash64TestCase("Spark ", "UTF8_LCASE_RTRIM", -3142112654825786434L),
+      XxHash64TestCase("SQL", "UNICODE", 7549349329256749019L),
+      XxHash64TestCase("SQL ", "UNICODE_RTRIM", 7549349329256749019L),
+      XxHash64TestCase("SQL", "UNICODE_CI", -3010409544364398863L),
+      XxHash64TestCase("SQL ", "UNICODE_CI_RTRIM", -3010409544364398863L)
     )
 
     // Supported collations
@@ -342,6 +376,10 @@ class CollationSQLExpressionsSuite
           StructField("B", DoubleType, nullable = true)
         )),
       CsvToStructsTestCase("\"Spark\"", "UNICODE", "'a STRING'", "",
+        Row("Spark"), Seq(
+          StructField("a", StringType, nullable = true)
+        )),
+      CsvToStructsTestCase("\"Spark\"", "UTF8_BINARY", "'a STRING COLLATE UNICODE'", "",
         Row("Spark"), Seq(
           StructField("a", StringType("UNICODE"), nullable = true)
         )),
@@ -520,7 +558,8 @@ class CollationSQLExpressionsSuite
       HexTestCase("Spark SQL", "UTF8_BINARY", "537061726B2053514C"),
       HexTestCase("Spark SQL", "UTF8_LCASE", "537061726B2053514C"),
       HexTestCase("Spark SQL", "UNICODE", "537061726B2053514C"),
-      HexTestCase("Spark SQL", "UNICODE_CI", "537061726B2053514C")
+      HexTestCase("Spark SQL", "UNICODE_CI", "537061726B2053514C"),
+      HexTestCase("Spark SQL", "DE_CI_AI", "537061726B2053514C")
     )
     testCases.foreach(t => {
       val query =
@@ -543,7 +582,8 @@ class CollationSQLExpressionsSuite
       UnHexTestCase("537061726B2053514C", "UTF8_BINARY", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "UTF8_LCASE", "Spark SQL"),
       UnHexTestCase("537061726B2053514C", "UNICODE", "Spark SQL"),
-      UnHexTestCase("537061726B2053514C", "UNICODE_CI", "Spark SQL")
+      UnHexTestCase("537061726B2053514C", "UNICODE_CI", "Spark SQL"),
+      UnHexTestCase("537061726B2053514C", "DE", "Spark SQL")
     )
     testCases.foreach(t => {
       val query =
@@ -611,7 +651,8 @@ class CollationSQLExpressionsSuite
       StringSpaceTestCase(1, "UTF8_BINARY", " "),
       StringSpaceTestCase(2, "UTF8_LCASE", "  "),
       StringSpaceTestCase(3, "UNICODE", "   "),
-      StringSpaceTestCase(4, "UNICODE_CI", "    ")
+      StringSpaceTestCase(4, "UNICODE_CI", "    "),
+      StringSpaceTestCase(5, "AF_CI_AI", "     ")
     )
 
     // Supported collations
@@ -685,7 +726,7 @@ class CollationSQLExpressionsSuite
           val testQuery = sql(query)
           testQuery.collect()
         },
-        errorClass = "INVALID_FORMAT.MISMATCH_INPUT",
+        condition = "INVALID_FORMAT.MISMATCH_INPUT",
         parameters = Map("inputType" -> "\"STRING\"", "input" -> "xx", "format" -> "999")
       )
     }
@@ -964,25 +1005,70 @@ class CollationSQLExpressionsSuite
     })
   }
 
-  test("Support StringToMap expression with collation") {
-    // Supported collations
-    case class StringToMapTestCase[R](t: String, p: String, k: String, c: String, result: R)
+  test("Support `StringToMap` expression with collation") {
+    case class StringToMapTestCase[R](
+        text: String,
+        pairDelim: String,
+        keyValueDelim: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
       StringToMapTestCase("a:1,b:2,c:3", ",", ":", "UTF8_BINARY",
         Map("a" -> "1", "b" -> "2", "c" -> "3")),
-      StringToMapTestCase("A-1;B-2;C-3", ";", "-", "UTF8_LCASE",
+      StringToMapTestCase("A-1xB-2xC-3", "X", "-", "UTF8_LCASE",
         Map("A" -> "1", "B" -> "2", "C" -> "3")),
-      StringToMapTestCase("1:a,2:b,3:c", ",", ":", "UNICODE",
+      StringToMapTestCase("1:ax2:bx3:c", "x", ":", "UNICODE",
         Map("1" -> "a", "2" -> "b", "3" -> "c")),
-      StringToMapTestCase("1/A!2/B!3/C", "!", "/", "UNICODE_CI",
-        Map("1" -> "A", "2" -> "B", "3" -> "C"))
-    )
+      StringToMapTestCase("1/AX2/BX3/C", "x", "/", "UNICODE_CI",
+        Map("1" -> "A", "2" -> "B", "3" -> "C")),
+      StringToMapTestCase("1:cx2:čx3:ć", "x", ":", "SR_CI_AI",
+        Map("1" -> "c", "2" -> "č", "3" -> "ć")),
+      StringToMapTestCase("c:1,č:2,ć:3", ",", ":", "SR_CI",
+        Map("c" -> "1", "č" -> "2", "ć" -> "3"))
+    )
+    val unsupportedTestCases = Seq(
+      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UNICODE_AI", null),
+      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UNICODE_RTRIM", null),
+      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UTF8_BINARY_RTRIM", null),
+      StringToMapTestCase("a:1,b:2,c:3", "?", "?", "UTF8_LCASE_RTRIM", null))
     testCases.foreach(t => {
-      val query = s"SELECT str_to_map(collate('${t.t}', '${t.c}'), '${t.p}', '${t.k}');"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      val dataType = MapType(StringType(t.c), StringType(t.c), true)
-      assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+      // Unit test.
+      val text = Literal.create(t.text, StringType(t.collation))
+      val pairDelim = Literal.create(t.pairDelim, StringType(t.collation))
+      val keyValueDelim = Literal.create(t.keyValueDelim, StringType(t.collation))
+      checkEvaluation(StringToMap(text, pairDelim, keyValueDelim), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"SELECT str_to_map('${t.text}', '${t.pairDelim}', '${t.keyValueDelim}')"
+        checkAnswer(sql(query), Row(t.result))
+        val dataType = MapType(StringType(t.collation), StringType(t.collation), true)
+        assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+      }
+    })
+    // Test unsupported collation.
+    unsupportedTestCases.foreach(t => {
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query =
+          s"select str_to_map('${t.text}', '${t.pairDelim}', " +
+            s"'${t.keyValueDelim}')"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(query).collect()
+          },
+          condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+          sqlState = Some("42K09"),
+          parameters = Map(
+            "sqlExpr" -> ("\"str_to_map('a:1,b:2,c:3' collate " + s"${t.collation}, " +
+              "'?' collate " + s"${t.collation}, '?' collate ${t.collation})" + "\""),
+            "paramIndex" -> "first",
+            "inputSql" -> ("\"'a:1,b:2,c:3' collate " + s"${t.collation}" + "\""),
+            "inputType" -> ("\"STRING COLLATE " + s"${t.collation}" + "\""),
+            "requiredType" -> "\"STRING\""),
+          context = ExpectedContext(
+            fragment = "str_to_map('a:1,b:2,c:3', '?', '?')",
+            start = 7,
+            stop = 41))
+      }
     })
   }
 
@@ -1003,7 +1089,7 @@ class CollationSQLExpressionsSuite
           exception = intercept[SparkRuntimeException] {
             sql(query).collect()
           },
-          errorClass = "USER_RAISED_EXCEPTION",
+          condition = "USER_RAISED_EXCEPTION",
           parameters = Map("errorMessage" -> t.errorMessage)
         )
       }
@@ -1012,7 +1098,7 @@ class CollationSQLExpressionsSuite
 
   test("Support CurrentDatabase/Catalog/User expressions with collation") {
     // Supported collations
-    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI", "SR_CI_AI").foreach(collationName =>
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
         val queryDatabase = sql("SELECT current_schema()")
         val queryCatalog = sql("SELECT current_catalog()")
@@ -1028,7 +1114,7 @@ class CollationSQLExpressionsSuite
 
   test("Support Uuid misc expression with collation") {
     // Supported collations
-    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+    Seq("UTF8_LCASE", "UNICODE", "UNICODE_CI", "NO_CI_AI").foreach(collationName =>
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
         val query = s"SELECT uuid()"
         // Result & data type
@@ -1044,7 +1130,7 @@ class CollationSQLExpressionsSuite
 
   test("Support SparkVersion misc expression with collation") {
     // Supported collations
-    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName =>
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI", "DE").foreach(collationName =>
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collationName) {
         val query = s"SELECT version()"
         // Result & data type
@@ -1181,8 +1267,10 @@ class CollationSQLExpressionsSuite
       exception = intercept[AnalysisException] {
         sql("SELECT mask(collate('ab-CD-12-@$','UNICODE'),collate('X','UNICODE_CI'),'x','0','#')")
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map("explicitTypes" -> "`string collate UNICODE`, `string collate UNICODE_CI`")
+      condition = "COLLATION_MISMATCH.EXPLICIT",
+      parameters = Map(
+        "explicitTypes" -> """"STRING COLLATE UNICODE", "STRING COLLATE UNICODE_CI""""
+      )
     )
   }
 
@@ -1207,6 +1295,10 @@ class CollationSQLExpressionsSuite
           StructField("B", DoubleType, nullable = true)
         )),
       XmlToStructsTestCase("<p><s>Spark</s></p>", "UNICODE", "'s STRING'", "",
+        Row("Spark"), Seq(
+          StructField("s", StringType, nullable = true)
+        )),
+      XmlToStructsTestCase("<p><s>Spark</s></p>", "UTF8_BINARY", "'s STRING COLLATE UNICODE'", "",
         Row("Spark"), Seq(
           StructField("s", StringType("UNICODE"), nullable = true)
         )),
@@ -1373,7 +1465,7 @@ class CollationSQLExpressionsSuite
           val testQuery = sql(query)
           testQuery.collect()
         },
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map("badRecord" -> "{\"a\":1,", "failFastMode" -> "FAILFAST")
       )
     }
@@ -1431,8 +1523,13 @@ class CollationSQLExpressionsSuite
     val testCases = Seq(
       VariantGetTestCase("{\"a\": 1}", "$.a", "int", "UTF8_BINARY", 1, IntegerType),
       VariantGetTestCase("{\"a\": 1}", "$.b", "int", "UTF8_LCASE", null, IntegerType),
-      VariantGetTestCase("[1, \"2\"]", "$[1]", "string", "UNICODE", "2", StringType("UNICODE")),
+      VariantGetTestCase("[1, \"2\"]", "$[1]", "string", "UNICODE", "2",
+        StringType),
+      VariantGetTestCase("[1, \"2\"]", "$[1]", "string collate unicode", "UTF8_BINARY", "2",
+        StringType("UNICODE")),
       VariantGetTestCase("[1, \"2\"]", "$[2]", "string", "UNICODE_CI", null,
+        StringType),
+      VariantGetTestCase("[1, \"2\"]", "$[2]", "string collate unicode_CI", "UTF8_BINARY", null,
         StringType("UNICODE_CI"))
     )
 
@@ -1477,7 +1574,7 @@ class CollationSQLExpressionsSuite
           val testQuery = sql(query)
           testQuery.collect()
         },
-        errorClass = "INVALID_VARIANT_CAST",
+        condition = "INVALID_VARIANT_CAST",
         parameters = Map("value" -> "\"Spark\"", "dataType" -> "\"INT\"")
       )
     }
@@ -1564,9 +1661,9 @@ class CollationSQLExpressionsSuite
       SchemaOfVariantTestCase("null", "UTF8_BINARY", "VOID"),
       SchemaOfVariantTestCase("[]", "UTF8_LCASE", "ARRAY<VOID>"),
       SchemaOfVariantTestCase("[{\"a\":true,\"b\":0}]", "UNICODE",
-        "ARRAY<STRUCT<a: BOOLEAN, b: BIGINT>>"),
+        "ARRAY<OBJECT<a: BOOLEAN, b: BIGINT>>"),
       SchemaOfVariantTestCase("[{\"A\":\"x\",\"B\":-1.00}]", "UNICODE_CI",
-        "ARRAY<STRUCT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(1,0)>>")
+        "ARRAY<OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(1,0)>>")
     )
 
     // Supported collations
@@ -1595,9 +1692,9 @@ class CollationSQLExpressionsSuite
       SchemaOfVariantAggTestCase("('1'), ('2'), ('3')", "UTF8_BINARY", "BIGINT"),
       SchemaOfVariantAggTestCase("('true'), ('false'), ('true')", "UTF8_LCASE", "BOOLEAN"),
       SchemaOfVariantAggTestCase("('{\"a\": 1}'), ('{\"b\": true}'), ('{\"c\": 1.23}')",
-        "UNICODE", "STRUCT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>"),
+        "UNICODE", "OBJECT<a: BIGINT, b: BOOLEAN, c: DECIMAL(3,2)>"),
       SchemaOfVariantAggTestCase("('{\"A\": \"x\"}'), ('{\"B\": 9.99}'), ('{\"C\": 0}')",
-        "UNICODE_CI", "STRUCT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(3,2), C: BIGINT>")
+        "UNICODE_CI", "OBJECT<A: STRING COLLATE UNICODE_CI, B: DECIMAL(3,2), C: BIGINT>")
     )
 
     // Supported collations
@@ -1617,7 +1714,7 @@ class CollationSQLExpressionsSuite
 
   test("Support InputFileName expression with collation") {
     // Supported collations
-    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collationName => {
+    Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI", "MT_CI_AI").foreach(collationName => {
       val query =
         s"""
            |select input_file_name()
@@ -1665,7 +1762,7 @@ class CollationSQLExpressionsSuite
   }
 
   test("Support mode for string expression with collation - Basic Test") {
-    Seq("utf8_binary", "UTF8_LCASE", "unicode_ci", "unicode").foreach { collationId =>
+    Seq("utf8_binary", "UTF8_LCASE", "unicode_ci", "unicode", "NL_AI").foreach { collationId =>
       val query = s"SELECT mode(collate('abc', '${collationId}'))"
       checkAnswer(sql(query), Row("abc"))
       assert(sql(query).schema.fields.head.dataType.sameType(StringType(collationId)))
@@ -1678,7 +1775,8 @@ class CollationSQLExpressionsSuite
       ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
       ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
-      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a")
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("SR", Map("c" -> 3L, "č" -> 2L, "Č" -> 2L), "c")
     )
     testCases.foreach(t => {
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
@@ -1716,7 +1814,7 @@ class CollationSQLExpressionsSuite
       UTF8StringModeTestCase("unicode_ci", bufferValuesUTF8String, "b"),
       UTF8StringModeTestCase("unicode", bufferValuesUTF8String, "a"))
 
-    testCasesUTF8String.foreach(t => {
+    testCasesUTF8String.foreach ( t => {
       val buffer = new OpenHashMap[AnyRef, Long](5)
       val myMode = Mode(child = Literal.create("some_column_name", StringType(t.collationId)))
       t.bufferValues.foreach { case (k, v) => buffer.update(k, v) }
@@ -1724,6 +1822,40 @@ class CollationSQLExpressionsSuite
     })
   }
 
+  test("Support Mode.eval(buffer) with complex types") {
+    case class UTF8StringModeTestCase[R](
+        collationId: String,
+        bufferValues: Map[InternalRow, Long],
+        result: R)
+
+    val bufferValuesUTF8String: Map[Any, Long] = Map(
+      UTF8String.fromString("a") -> 5L,
+      UTF8String.fromString("b") -> 4L,
+      UTF8String.fromString("B") -> 3L,
+      UTF8String.fromString("d") -> 2L,
+      UTF8String.fromString("e") -> 1L)
+
+    val bufferValuesComplex = bufferValuesUTF8String.map{
+      case (k, v) => (InternalRow.fromSeq(Seq(k, k, k)), v)
+    }
+    val testCasesUTF8String = Seq(
+      UTF8StringModeTestCase("utf8_binary", bufferValuesComplex, "[a,a,a]"),
+      UTF8StringModeTestCase("UTF8_LCASE", bufferValuesComplex, "[b,b,b]"),
+      UTF8StringModeTestCase("unicode_ci", bufferValuesComplex, "[b,b,b]"),
+      UTF8StringModeTestCase("unicode", bufferValuesComplex, "[a,a,a]"))
+
+    testCasesUTF8String.foreach { t =>
+      val buffer = new OpenHashMap[AnyRef, Long](5)
+      val myMode = Mode(child = Literal.create(null, StructType(Seq(
+        StructField("f1", StringType(t.collationId), true),
+        StructField("f2", StringType(t.collationId), true),
+        StructField("f3", StringType(t.collationId), true)
+      ))))
+      t.bufferValues.foreach { case (k, v) => buffer.update(k, v) }
+      assert(myMode.eval(buffer).toString.toLowerCase() == t.result.toLowerCase())
+    }
+  }
+
   test("Support mode for string expression with collated strings in struct") {
     case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
     val testCases = Seq(
@@ -1744,33 +1876,7 @@ class CollationSQLExpressionsSuite
           t.collationId + ", f2: INT>) USING parquet")
         sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
         val query = s"SELECT lower(mode(i).f1) FROM ${tableName}"
-        if(t.collationId == "UTF8_LCASE" ||
-          t.collationId == "unicode_ci" ||
-          t.collationId == "unicode") {
-          // Cannot resolve "mode(i)" due to data type mismatch:
-          // Input to function mode was a complex type with strings collated on non-binary
-          // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13;
-          val params = Seq(("sqlExpr", "\"mode(i)\""),
-            ("msg", "The input to the function 'mode'" +
-              " was a type of binary-unstable type that is not currently supported by mode."),
-            ("hint", "")).toMap
-          checkError(
-            exception = intercept[AnalysisException] {
-              sql(query)
-            },
-            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-            parameters = params,
-            queryContext = Array(
-              ExpectedContext(objectType = "",
-                objectName = "",
-                startIndex = 13,
-                stopIndex = 19,
-                fragment = "mode(i)")
-            )
-          )
-        } else {
-          checkAnswer(sql(query), Row(t.result))
-        }
+        checkAnswer(sql(query), Row(t.result))
       }
     })
   }
@@ -1783,47 +1889,21 @@ class CollationSQLExpressionsSuite
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
-    testCases.foreach(t => {
+    testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
         (0L to numRepeats).map(_ => s"named_struct('f1', " +
           s"named_struct('f2', collate('$elt', '${t.collationId}')), 'f3', 1)").mkString(",")
       }.mkString(",")
 
-      val tableName = s"t_${t.collationId}_mode_nested_struct"
+      val tableName = s"t_${t.collationId}_mode_nested_struct1"
       withTable(tableName) {
         sql(s"CREATE TABLE ${tableName}(i STRUCT<f1: STRUCT<f2: STRING COLLATE " +
           t.collationId + ">, f3: INT>) USING parquet")
         sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
         val query = s"SELECT lower(mode(i).f1.f2) FROM ${tableName}"
-        if(t.collationId == "UTF8_LCASE" ||
-          t.collationId == "unicode_ci" ||
-          t.collationId == "unicode") {
-          // Cannot resolve "mode(i)" due to data type mismatch:
-          // Input to function mode was a complex type with strings collated on non-binary
-          // collations, which is not yet supported.. SQLSTATE: 42K09; line 1 pos 13;
-          val params = Seq(("sqlExpr", "\"mode(i)\""),
-            ("msg", "The input to the function 'mode' " +
-              "was a type of binary-unstable type that is not currently supported by mode."),
-            ("hint", "")).toMap
-          checkError(
-            exception = intercept[AnalysisException] {
-              sql(query)
-            },
-            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-            parameters = params,
-            queryContext = Array(
-              ExpectedContext(objectType = "",
-                objectName = "",
-                startIndex = 13,
-                stopIndex = 19,
-                fragment = "mode(i)")
-            )
-          )
-        } else {
-          checkAnswer(sql(query), Row(t.result))
-        }
+        checkAnswer(sql(query), Row(t.result))
       }
-    })
+    }
   }
 
   test("Support mode for string expression with collated strings in array complex type") {
@@ -1834,44 +1914,105 @@ class CollationSQLExpressionsSuite
       ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
       ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
     )
-    testCases.foreach(t => {
+    testCases.foreach { t =>
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ => s"array(named_struct('f2', " +
+          s"collate('$elt', '${t.collationId}'), 'f3', 1))").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_nested_struct2"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(" +
+          s"i ARRAY< STRUCT<f2: STRING COLLATE ${t.collationId}, f3: INT>>)" +
+          s" USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(element_at(mode(i).f2, 1)) FROM ${tableName}"
+        checkAnswer(sql(query), Row(t.result))
+      }
+    }
+  }
+
+  test("Support mode for string expression with collated strings in 3D array type") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach { t =>
+      val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
+        (0L to numRepeats).map(_ =>
+          s"array(array(array(collate('$elt', '${t.collationId}'))))").mkString(",")
+      }.mkString(",")
+
+      val tableName = s"t_${t.collationId}_mode_nested_3d_array"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(i ARRAY<ARRAY<ARRAY" +
+          s"<STRING COLLATE ${t.collationId}>>>) USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
+        val query = s"SELECT lower(" +
+          s"element_at(element_at(element_at(mode(i),1),1),1)) FROM ${tableName}"
+        checkAnswer(sql(query), Row(t.result))
+      }
+    }
+  }
+
+  test("Support mode for string expression with collated complex type - Highly nested") {
+    case class ModeTestCase[R](collationId: String, bufferValues: Map[String, Long], result: R)
+    val testCases = Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("UTF8_LCASE", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "a"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "b")
+    )
+    testCases.foreach { t =>
       val valuesToAdd = t.bufferValues.map { case (elt, numRepeats) =>
         (0L to numRepeats).map(_ => s"array(named_struct('s1', named_struct('a2', " +
           s"array(collate('$elt', '${t.collationId}'))), 'f3', 1))").mkString(",")
       }.mkString(",")
 
-      val tableName = s"t_${t.collationId}_mode_nested_struct"
+      val tableName = s"t_${t.collationId}_mode_highly_nested_struct"
       withTable(tableName) {
         sql(s"CREATE TABLE ${tableName}(" +
           s"i ARRAY<STRUCT<s1: STRUCT<a2: ARRAY<STRING COLLATE ${t.collationId}>>, f3: INT>>)" +
           s" USING parquet")
         sql(s"INSERT INTO ${tableName} VALUES " + valuesToAdd)
         val query = s"SELECT lower(element_at(element_at(mode(i), 1).s1.a2, 1)) FROM ${tableName}"
-        if(t.collationId == "UTF8_LCASE" ||
-          t.collationId == "unicode_ci" || t.collationId == "unicode") {
-          val params = Seq(("sqlExpr", "\"mode(i)\""),
-            ("msg", "The input to the function 'mode' was a type" +
-              " of binary-unstable type that is not currently supported by mode."),
-            ("hint", "")).toMap
-          checkError(
-            exception = intercept[AnalysisException] {
-              sql(query)
-            },
-            errorClass = "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT",
-            parameters = params,
-            queryContext = Array(
-              ExpectedContext(objectType = "",
-                objectName = "",
-                startIndex = 35,
-                stopIndex = 41,
-                fragment = "mode(i)")
-            )
-          )
-        } else {
+
           checkAnswer(sql(query), Row(t.result))
-        }
       }
-    })
+    }
+  }
+
+  test("Support mode for string expression with collated complex type - nested map") {
+    case class ModeTestCase(collationId: String, bufferValues: Map[String, Long], result: String)
+    Seq(
+      ModeTestCase("utf8_binary", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
+      ModeTestCase("unicode", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{a -> 1}"),
+      ModeTestCase("utf8_lcase", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b -> 1}"),
+      ModeTestCase("unicode_ci", Map("a" -> 3L, "b" -> 2L, "B" -> 2L), "{b -> 1}")
+    ).foreach { t1 =>
+      def getValuesToAdd(t: ModeTestCase): String = {
+        val valuesToAdd = t.bufferValues.map {
+          case (elt, numRepeats) =>
+            (0L to numRepeats).map(i =>
+              s"named_struct('m1', map(collate('$elt', '${t.collationId}'), 1))"
+            ).mkString(",")
+        }.mkString(",")
+        valuesToAdd
+      }
+      val tableName = s"t_${t1.collationId}_mode_nested_map_struct1"
+      withTable(tableName) {
+        sql(s"CREATE TABLE ${tableName}(" +
+          s"i STRUCT<m1: MAP<STRING COLLATE ${t1.collationId}, INT>>) USING parquet")
+        sql(s"INSERT INTO ${tableName} VALUES ${getValuesToAdd(t1)}")
+        val query = "SELECT lower(cast(mode(i).m1 as string))" +
+          s" FROM ${tableName}"
+        val queryResult = sql(query)
+        checkAnswer(queryResult, Row(t1.result))
+      }
+    }
   }
 
   test("SPARK-48430: Map value extraction with collations") {
@@ -2288,8 +2429,15 @@ class CollationSQLExpressionsSuite
            |collate('${testCase.left}', '${testCase.leftCollation}'))=
            |collate('${testCase.right}', '${testCase.rightCollation}');
            |""".stripMargin
-      val testQuery = sql(query)
-      checkAnswer(testQuery, Row(testCase.result))
+
+      if (testCase.leftCollation == testCase.rightCollation) {
+        checkAnswer(sql(query), Row(testCase.result))
+      } else {
+        val exception = intercept[AnalysisException] {
+          sql(query)
+        }
+        assert(exception.getCondition === "COLLATION_MISMATCH.EXPLICIT")
+      }
     })
 
     val queryPass =
@@ -2307,7 +2455,7 @@ class CollationSQLExpressionsSuite
       exception = intercept[ExtendedAnalysisException] {
         sql(queryFail).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_STATIC_METHOD",
       parameters = Map(
         "methodName" -> "toHexString",
         "className" -> "java.lang.Integer",
@@ -2319,6 +2467,732 @@ class CollationSQLExpressionsSuite
     )
   }
 
+  // common method for subsequent tests verifying various SQL expressions with collations
+  private def testCollationSqlExpressionCommon(
+      query: String,
+      collation: String,
+      result: Row,
+      expectedType: DataType): Unit = {
+    testCollationSqlExpressionCommon(query, collation, Seq(result), Seq(expectedType))
+  }
+
+  // common method for subsequent tests verifying various SQL expressions with collations
+  private def testCollationSqlExpressionCommon(
+      query: String,
+      collation: String,
+      result: Seq[Row],
+      expectedTypes: Seq[DataType]): Unit = {
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      // check result correctness
+      checkAnswer(sql(query), result)
+      // check result rows data types
+      for (i <- 0 until expectedTypes.length)
+        assert(sql(query).schema(i).dataType == expectedTypes(i))
+    }
+  }
+
+  test("min_by supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT min_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y)",
+        collation,
+        result = Row("a"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("max_by supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT max_by(x, y) FROM VALUES ('a', 10), ('b', 50), ('c', 20) AS tab(x, y)",
+        collation,
+        result = Row("b"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("array supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array('a', 'b', 'c')",
+        collation,
+        result = Row(Seq("a", "b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_agg supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_agg(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col)",
+        collation,
+        result = Row(Seq("a", "b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_contains supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_contains(array('a', 'b', 'c'), 'b')",
+        collation,
+        result = Row(true),
+        expectedType = BooleanType
+      )
+    }
+  }
+
+  test("arrays_overlap supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT arrays_overlap(array('a', 'b', 'c'), array('c', 'd', 'e'))",
+        collation,
+        result = Row(true),
+        expectedType = BooleanType
+      )
+    }
+  }
+
+  test("array_insert supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_insert(array('a', 'b', 'c', 'd'), 5, 'e')",
+        collation,
+        result = Row(Seq("a", "b", "c", "d", "e")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("array_intersect supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_intersect(array('a', 'b', 'c'), array('b', 'c', 'd'))",
+        collation,
+        result = Row(Seq("b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_join supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_join(array('hello', 'world'), ' ')",
+        collation,
+        result = Row("hello world"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("array_position supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_position(array('a', 'b', 'c', 'c'), 'c')",
+        collation,
+        result = Row(3),
+        expectedType = LongType
+      )
+    }
+  }
+
+  test("array_size supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_size(array('a', 'b', 'c', 'c'))",
+        collation,
+        result = Row(4),
+        expectedType = IntegerType
+      )
+    }
+  }
+
+  test("array_sort supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_sort(array('b', null, 'A'))",
+        collation,
+        result = Row(Seq("A", "b", null)),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("array_except supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_except(array('a', 'b', 'c'), array('c', 'd', 'e'))",
+        collation,
+        result = Row(Seq("a", "b")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_union supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_union(array('a', 'b', 'c'), array('a', 'c', 'd'))",
+        collation,
+        result = Row(Seq("a", "b", "c", "d")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_compact supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_compact(array('a', 'b', null, 'c'))",
+        collation,
+        result = Row(Seq("a", "b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("arrays_zip supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT arrays_zip(array('a', 'b', 'c'), array(1, 2, 3))",
+        collation,
+        result = Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3))),
+        expectedType = ArrayType(StructType(
+          StructField("0", StringType(collation), true) ::
+            StructField("1", IntegerType, true) :: Nil
+        ), false)
+      )
+    }
+  }
+
+  test("array_min supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_min(array('a', 'b', null, 'c'))",
+        collation,
+        result = Row("a"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("array_max supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_max(array('a', 'b', null, 'c'))",
+        collation,
+        result = Row("c"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("array_append supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_append(array('b', 'd', 'c', 'a'), 'e')",
+        collation,
+        result = Row(Seq("b", "d", "c", "a", "e")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("array_repeat supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_repeat('abc', 2)",
+        collation,
+        result = Row(Seq("abc", "abc")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("array_remove supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_remove(array('a', 'b', null, 'c'), 'b')",
+        collation,
+        result = Row(Seq("a", null, "c")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("array_prepend supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_prepend(array('b', 'd', 'c', 'a'), 'd')",
+        collation,
+        result = Row(Seq("d", "b", "d", "c", "a")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("array_distinct supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT array_distinct(array('a', 'b', 'c', null, 'c'))",
+        collation,
+        result = Row(Seq("a", "b", "c", null)),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("collect_list supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT collect_list(col) FROM VALUES ('a'), ('b'), ('c') AS tab(col)",
+        collation,
+        result = Row(Seq("a", "b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("collect_set does not support collation") {
+    testAdditionalCollations.foreach { collation =>
+      val query = "SELECT collect_set(col) FROM VALUES ('a'), ('b'), ('a') AS tab(col);"
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(query)
+          },
+          condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+          sqlState = Some("42K09"),
+          parameters = Map(
+            "functionName" -> "`collect_set`",
+            "dataType" -> "\"MAP\" or \"COLLATED STRING\"",
+            "sqlExpr" -> "\"collect_set(col)\""),
+          context = ExpectedContext(
+            fragment = "collect_set(col)",
+            start = 7,
+            stop = 22))
+      }
+    }
+  }
+
+  test("element_at supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT element_at(array('a', 'b', 'c'), 2)",
+        collation,
+        result = Row("b"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("aggregate supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT aggregate(array('a', 'b', 'c'), '', (acc, x) -> concat(acc, x))",
+        collation,
+        result = Row("abc"),
+        expectedType = StringType(collation)
+      )
+    }
+  }
+
+  test("explode supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT explode(array('a', 'b'))",
+        collation,
+        result = Seq(
+          Row("a"),
+          Row("b")
+        ),
+        expectedTypes = Seq(
+          StringType(collation)
+        )
+      )
+    }
+  }
+
+  test("posexplode supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT posexplode(array('a', 'b'))",
+        collation,
+        result = Seq(
+          Row(0, "a"),
+          Row(1, "b")
+        ),
+        expectedTypes = Seq(
+          IntegerType,
+          StringType(collation)
+        )
+      )
+    }
+  }
+
+  test("filter supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT filter(array('a', 'b', 'c'), x -> x < 'b')",
+        collation,
+        result = Row(Seq("a")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("flatten supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT flatten(array(array('a', 'b'), array('c', 'd')))",
+        collation,
+        result = Row(Seq("a", "b", "c", "d")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("inline supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT inline(array(struct(1, 'a'), struct(2, 'b')))",
+        collation,
+        Seq(
+          Row(1, "a"),
+          Row(2, "b")
+        ),
+        expectedTypes = Seq(
+          IntegerType,
+          StringType(collation)
+        )
+      )
+    }
+  }
+
+  test("shuffle supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      val query = "SELECT shuffle(array('a', 'b', 'c', 'd'));"
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+        // check result row data type
+        val dataType = ArrayType(StringType(collation), false)
+        assert(sql(query).schema.head.dataType == dataType)
+      }
+    }
+  }
+
+  test("slice supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT slice(array('a', 'b', 'c', 'd'), 2, 2)",
+        collation,
+        result = Row(Seq("b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("sort_array supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT sort_array(array('b', 'd', null, 'c', 'a'), true)",
+        collation,
+        result = Row(Seq(null, "a", "b", "c", "d")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("zip_with supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT zip_with(array('a', 'b'), array('x', 'y'), (x, y) -> concat(x, y))",
+        collation,
+        result = Row(Seq("ax", "by")),
+        expectedType = ArrayType(
+          StringType(collation),
+          containsNull = true
+        )
+      )
+    }
+  }
+
+  test("map_contains_key supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_contains_key(map('a', 1, 'b', 2), 'a')",
+        collation,
+        result = Row(true),
+        expectedType = BooleanType
+      )
+    }
+  }
+
+  test("map_from_arrays supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_from_arrays(array('a','b','c'), array(1,2,3))",
+        collation,
+        result = Row(Map("a" -> 1, "b" -> 2, "c" -> 3)),
+        expectedType = MapType(
+          StringType(collation),
+          IntegerType, false
+        )
+      )
+    }
+  }
+
+  test("map_keys supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_keys(map('a', 1, 'b', 2))",
+        collation,
+        result = Row(Seq("a", "b")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("map_values supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_values(map(1, 'a', 2, 'b'))",
+        collation,
+        result = Row(Seq("a", "b")),
+        expectedType = ArrayType(StringType(collation), true)
+      )
+    }
+  }
+
+  test("map_entries supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_entries(map('a', 1, 'b', 2))",
+        collation,
+        result = Row(Seq(Row("a", 1), Row("b", 2))),
+        expectedType = ArrayType(StructType(
+          StructField("key", StringType(collation), false) ::
+            StructField("value", IntegerType, false) :: Nil
+        ), false)
+      )
+    }
+  }
+
+  test("map_from_entries supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b')))",
+        collation,
+        result = Row(Map(1 -> "a", 2 -> "b")),
+        expectedType = MapType(
+          IntegerType,
+          StringType(collation),
+          valueContainsNull = false
+        )
+      )
+    }
+  }
+
+  test("map_concat supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_concat(map(1, 'a'), map(2, 'b'))",
+        collation,
+        result = Row(Map(1 -> "a", 2 -> "b")),
+        expectedType = MapType(
+          IntegerType,
+          StringType(collation),
+          valueContainsNull = false
+        )
+      )
+    }
+  }
+
+  test("map_filter supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_filter(map('a', 1, 'b', 2, 'c', 3), (k, v) -> k < 'c')",
+        collation,
+        result = Row(Map("a" -> 1, "b" -> 2)),
+        expectedType = MapType(
+          StringType(collation),
+          IntegerType,
+          valueContainsNull = false
+        )
+      )
+    }
+  }
+
+  test("map_zip_with supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT map_zip_with(map(1, 'a'), map(1, 'x'), (k, v1, v2) -> concat(v1, v2))",
+        collation,
+        result = Row(Map(1 -> "ax")),
+        expectedType = MapType(
+          IntegerType,
+          StringType(collation),
+          valueContainsNull = true
+        )
+      )
+    }
+  }
+
+  test("transform supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT transform(array('aa', 'bb', 'cc'), x -> substring(x, 2))",
+        collation,
+        result = Row(Seq("a", "b", "c")),
+        expectedType = ArrayType(StringType(collation), false)
+      )
+    }
+  }
+
+  test("transform_values supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT transform_values(map_from_arrays(array(1, 2, 3)," +
+          "array('aa', 'bb', 'cc')), (k, v) -> substring(v, 2))",
+        collation,
+        result = Row(Map(1 -> "a", 2 -> "b", 3 -> "c")),
+        expectedType = MapType(
+          IntegerType,
+          StringType(collation),
+          false
+        )
+      )
+    }
+  }
+
+  test("transform_keys supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT transform_keys(map_from_arrays(array('aa', 'bb', 'cc')," +
+          "array(1, 2, 3)), (k, v) -> substring(k, 2))",
+        collation,
+        result = Row(Map("a" -> 1, "b" -> 2, "c" -> 3)),
+        expectedType = MapType(
+          StringType(collation),
+          IntegerType,
+          false
+        )
+      )
+    }
+  }
+
+  test("stack supports collation") {
+    testAdditionalCollations.foreach { collation =>
+      testCollationSqlExpressionCommon(
+        query = "SELECT stack(2, 'a', 'b', 'c')",
+        collation,
+        result = Seq(
+          Row("a", "b"),
+          Row("c", null)
+        ),
+        expectedTypes = Seq(
+          StringType(collation)
+        )
+      )
+    }
+  }
+
+  test("SPARK-50060: set operators with conflicting collations") {
+    val setOperators = Seq[(String, Int, Int)](
+      ("UNION", 64, 45),
+      ("INTERSECT", 68, 49),
+      ("EXCEPT", 65, 46))
+
+    for {
+      ansiEnabled <- Seq(true, false)
+      (operator, stopExplicit, stopDefault) <- setOperators
+    } {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiEnabled.toString,
+        SqlApiConf.DEFAULT_COLLATION -> "UNICODE_CI") {
+        val explicitConflictQuery =
+          s"SELECT 'a' COLLATE UTF8_LCASE $operator SELECT 'A' COLLATE UNICODE_CI"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(explicitConflictQuery)
+          },
+          condition = "INCOMPATIBLE_COLUMN_TYPE",
+          parameters = Map(
+            "columnOrdinalNumber" -> "first",
+            "tableOrdinalNumber" -> "second",
+            "dataType1" -> "\"STRING COLLATE UNICODE_CI\"",
+            "dataType2" -> "\"STRING COLLATE UTF8_LCASE\"",
+            "operator" -> operator,
+            "hint" -> ""),
+          context = ExpectedContext(
+            fragment = explicitConflictQuery,
+            start = 0,
+            stop = stopExplicit))
+
+        val defaultConflictQuery =
+          s"SELECT 'a' COLLATE UTF8_LCASE $operator SELECT 'A'"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(defaultConflictQuery)
+          },
+          condition = "INCOMPATIBLE_COLUMN_TYPE",
+          parameters = Map(
+            "columnOrdinalNumber" -> "first",
+            "tableOrdinalNumber" -> "second",
+            "dataType1" -> "\"STRING COLLATE UNICODE_CI\"",
+            "dataType2" -> "\"STRING COLLATE UTF8_LCASE\"",
+            "operator" -> operator,
+            "hint" -> ""),
+          context = ExpectedContext(
+            fragment = defaultConflictQuery,
+            start = 0,
+            stop = stopDefault))
+      }
+    }
+  }
+
+  test("Support HyperLogLogPlusPlus expression with collation") {
+    case class HyperLogLogPlusPlusTestCase(
+      collation: String,
+      input: Seq[String],
+      output: Seq[Row]
+    )
+
+    val testCases = Seq(
+      HyperLogLogPlusPlusTestCase("utf8_binary", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
+        "aA", "Aa", "aa"), Seq(Row(10))),
+      HyperLogLogPlusPlusTestCase("utf8_lcase", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
+        "aA", "Aa", "aa"), Seq(Row(5))),
+      HyperLogLogPlusPlusTestCase("UNICODE", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
+        "aA", "Aa", "aa"), Seq(Row(9))),
+      HyperLogLogPlusPlusTestCase("UNICODE_CI", Seq("a", "a", "A", "z", "zz", "ZZ", "w", "AA",
+        "aA", "Aa", "aa"), Seq(Row(5)))
+    )
+
+    testCases.foreach( t => {
+      // Using explicit collate clause
+      val query =
+        s"""
+           |SELECT approx_count_distinct(col) FROM VALUES
+           |${t.input.map(s => s"('${s}' collate ${t.collation})").mkString(", ") } tab(col)
+           |""".stripMargin
+      checkAnswer(sql(query), t.output)
+
+      // Using default collation
+      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.collation) {
+        val query =
+          s"""
+             |SELECT approx_count_distinct(col) FROM VALUES
+             |${t.input.map(s => s"('${s}')").mkString(", ") } tab(col)
+             |""".stripMargin
+        checkAnswer(sql(query), t.output)
+      }
+    })
+  }
+
   // TODO: Add more tests for other SQL expressions
 
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
index 40cc6f19550d8..7cafb999ffcf0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSQLRegexpSuite.scala
@@ -54,7 +54,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"collate(ABC, UNICODE_CI) LIKE %b%\"",
           "paramIndex" -> "first",
@@ -111,15 +111,17 @@ class CollationSQLRegexpSuite
     }
     val tableNameLcase = "T_LCASE"
     withTable(tableNameLcase) {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
-        sql(s"CREATE TABLE IF NOT EXISTS $tableNameLcase(c STRING) using PARQUET")
-        sql(s"INSERT INTO $tableNameLcase(c) VALUES('ABC')")
-        checkAnswer(sql(s"select c like 'ab%' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like '%bc' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like 'a%c' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like '%b%' FROM $tableNameLcase"), Row(true))
-        checkAnswer(sql(s"select c like 'abc' FROM $tableNameLcase"), Row(true))
-      }
+      sql(s"""
+           |CREATE TABLE IF NOT EXISTS $tableNameLcase(
+           |  c STRING COLLATE UTF8_LCASE
+           |) using PARQUET
+           |""".stripMargin)
+      sql(s"INSERT INTO $tableNameLcase(c) VALUES('ABC')")
+      checkAnswer(sql(s"select c like 'ab%' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like '%bc' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like 'a%c' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like '%b%' FROM $tableNameLcase"), Row(true))
+      checkAnswer(sql(s"select c like 'abc' FROM $tableNameLcase"), Row(true))
     }
   }
 
@@ -148,7 +150,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"ilike(collate(ABC, UNICODE_CI), %b%)\"",
           "paramIndex" -> "first",
@@ -188,7 +190,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"likeall(collate(Foo, UNICODE_CI))\"",
           "paramIndex" -> "first",
@@ -228,7 +230,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"notlikeall(collate(Foo, UNICODE_CI))\"",
           "paramIndex" -> "first",
@@ -268,7 +270,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"likeany(collate(Foo, UNICODE_CI))\"",
           "paramIndex" -> "first",
@@ -308,7 +310,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"notlikeany(collate(Foo, UNICODE_CI))\"",
           "paramIndex" -> "first",
@@ -348,7 +350,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"RLIKE(collate(ABC, UNICODE_CI), .b.)\"",
           "paramIndex" -> "first",
@@ -388,7 +390,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"split(collate(ABC, UNICODE_CI), [b], -1)\"",
           "paramIndex" -> "first",
@@ -429,8 +431,10 @@ class CollationSQLRegexpSuite
       exception = intercept[AnalysisException] {
         sql(s"SELECT regexp_replace(collate('ABCDE','$c1'), '.c.', collate('FFF','$c2'))")
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map("explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
+      condition = "COLLATION_MISMATCH.EXPLICIT",
+      parameters = Map(
+        "explicitTypes" -> """"STRING", "STRING COLLATE UTF8_LCASE""""
+      )
     )
     // Unsupported collations
     case class RegExpReplaceTestFail(l: String, r: String, c: String)
@@ -444,7 +448,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_replace(collate(ABCDE, UNICODE_CI), .c., FFF, 1)\"",
           "paramIndex" -> "first",
@@ -486,7 +490,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_extract(collate(ABCDE, UNICODE_CI), .c., 0)\"",
           "paramIndex" -> "first",
@@ -528,7 +532,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_extract_all(collate(ABCDE, UNICODE_CI), .c., 0)\"",
           "paramIndex" -> "first",
@@ -568,7 +572,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_count(collate(ABCDE, UNICODE_CI), .c.)\"",
           "paramIndex" -> "first",
@@ -608,7 +612,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_substr(collate(ABCDE, UNICODE_CI), .c.)\"",
           "paramIndex" -> "first",
@@ -648,7 +652,7 @@ class CollationSQLRegexpSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"regexp_instr(collate(ABCDE, UNICODE_CI), .c., 0)\"",
           "paramIndex" -> "first",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
index 296a6a7f56c15..626bd0b239366 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationStringExpressionsSuite.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{SparkConf, SparkIllegalArgumentException}
-import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal, StringTrim, StringTrimLeft, StringTrimRight}
-import org.apache.spark.sql.catalyst.util.CollationFactory
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.catalyst.analysis.CollationTypeCasts
+import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.sql.types.{ArrayType, BinaryType, BooleanType, DataType, IntegerType, StringType}
+import org.apache.spark.sql.types._
 
 // scalastyle:off nonascii
 class CollationStringExpressionsSuite
@@ -30,1085 +30,1760 @@ class CollationStringExpressionsSuite
   with SharedSparkSession
   with ExpressionEvalHelper {
 
-  test("Support ConcatWs string expression with collation") {
-    // Supported collations
-    case class ConcatWsTestCase[R](s: String, a: Array[String], c: String, result: R)
+  test("Support `ConcatWs` string expression with collation") {
+    case class ConcatWsTestCase[R](
+        sep: String,
+        arrayStr: Array[String],
+        collation: String,
+        result: R)
     val testCases = Seq(
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_BINARY_RTRIM", "Spark SQL"),
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_LCASE", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UTF8_LCASE_RTRIM", "Spark SQL"),
       ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE", "Spark SQL"),
-      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI", "Spark SQL")
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_RTRIM", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI_RTRIM", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "SQL"), "UNICODE_CI", "Spark SQL"),
+      ConcatWsTestCase(" ", Array("Spark", "Unterstützung"), "DE_CI_AI", "Spark Unterstützung")
     )
     testCases.foreach(t => {
-      val arrCollated = t.a.map(s => s"collate('$s', '${t.c}')").mkString(", ")
-      var query = s"SELECT concat_ws(collate('${t.s}', '${t.c}'), $arrCollated)"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // Implicit casting
-      val arr = t.a.map(s => s"'$s'").mkString(", ")
-      query = s"SELECT concat_ws(collate('${t.s}', '${t.c}'), $arr)"
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      query = s"SELECT concat_ws('${t.s}', $arrCollated)"
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val inputExprs = t.arrayStr.map {
+        case null => Literal.create(null, StringType(t.collation))
+        case s: String => Literal.create(s, StringType(t.collation))
+      }
+      val sepExpr = Literal.create(t.sep, StringType(t.collation))
+      checkEvaluation(ConcatWs(sepExpr +: inputExprs.toIndexedSeq), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val array = t.arrayStr.map(s => s"'$s'").mkString(", ")
+        val query = s"select concat_ws('${t.sep}', $array)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT concat_ws(' ', collate('Spark', 'UTF8_LCASE'), collate('SQL', 'UNICODE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map("explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE`")
-    )
   }
 
-  test("Support Elt string expression with collation") {
-    // Supported collations
-    case class EltTestCase[R](index: Int, inputs: Array[String], c: String, result: R)
+  test("Support `Elt` string expression with collation") {
+    case class EltTestCase[R](index: Integer, inputs: Array[String], collation: String, result: R)
     val testCases = Seq(
       EltTestCase(1, Array("Spark", "SQL"), "UTF8_BINARY", "Spark"),
+      EltTestCase(1, Array("Spark", "SQL"), "UTF8_BINARY_RTRIM", "Spark"),
       EltTestCase(1, Array("Spark", "SQL"), "UTF8_LCASE", "Spark"),
+      EltTestCase(1, Array("Spark", "SQL"), "UTF8_LCASE_RTRIM", "Spark"),
       EltTestCase(2, Array("Spark", "SQL"), "UNICODE", "SQL"),
-      EltTestCase(2, Array("Spark", "SQL"), "UNICODE_CI", "SQL")
+      EltTestCase(2, Array("Spark", "SQL"), "UNICODE_RTRIM", "SQL"),
+      EltTestCase(2, Array("Spark", "SQL"), "UNICODE_CI", "SQL"),
+      EltTestCase(2, Array("Spark", "SQL"), "UNICODE_CI_RTRIM", "SQL"),
+      EltTestCase(2, Array("Spark", "SQL"), "UNICODE_CI", "SQL"),
+      EltTestCase(2, Array("Spark", "Unterstützung"), "DE_CI", "Unterstützung")
     )
     testCases.foreach(t => {
-      var query = s"SELECT elt(${t.index}, collate('${t.inputs(0)}', '${t.c}')," +
-        s" collate('${t.inputs(1)}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // Implicit casting
-      query = s"SELECT elt(${t.index}, collate('${t.inputs(0)}', '${t.c}'), '${t.inputs(1)}')"
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      query = s"SELECT elt(${t.index}, '${t.inputs(0)}', collate('${t.inputs(1)}', '${t.c}'))"
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val inputExprs = t.inputs.map {
+        case null => Literal.create(null, StringType(t.collation))
+        case s: String => Literal.create(s, StringType(t.collation))
+      }
+      val intExpr = Literal.create(t.index, IntegerType)
+      checkEvaluation(Elt(intExpr +: inputExprs.toIndexedSeq), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select elt(${t.index}, '${t.inputs(0)}', '${t.inputs(1)}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT elt(0, collate('Spark', 'UTF8_LCASE'), collate('SQL', 'UNICODE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map("explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE`")
-    )
   }
 
-  test("Support SplitPart string expression with collation") {
-    // Supported collations
-    case class SplitPartTestCase[R](s: String, d: String, p: Int, c: String, result: R)
+  test("Support `SplitPart` string expression with collation") {
+    case class SplitPartTestCase[R](
+        str: String,
+        delimiter: String,
+        partNum: Integer,
+        collation: String,
+        result: R)
     val testCases = Seq(
       SplitPartTestCase("1a2", "a", 2, "UTF8_BINARY", "2"),
+      SplitPartTestCase("1a2", "a ", 1, "UTF8_BINARY_RTRIM", "1"),
       SplitPartTestCase("1a2", "a", 2, "UNICODE", "2"),
+      SplitPartTestCase("1a 2", "a  ", 2, "UNICODE_RTRIM", " 2"),
       SplitPartTestCase("1a2", "A", 2, "UTF8_LCASE", "2"),
-      SplitPartTestCase("1a2", "A", 2, "UNICODE_CI", "2")
+      SplitPartTestCase("1 a2", "A   ", 2, "UTF8_LCASE_RTRIM", "2"),
+      SplitPartTestCase("1a2", "A", 2, "UNICODE_CI", "2"),
+      SplitPartTestCase("1 a2 ", "A  ", 2, "UNICODE_CI_RTRIM", "2 "),
+      SplitPartTestCase("1a2", "A", 2, "UNICODE_CI", "2"),
+      SplitPartTestCase("1ö2", "O", 2, "DE_CI_AI", "2")
     )
+    val unsupportedTestCase = SplitPartTestCase("1a2", "a", 2, "UNICODE_AI", "2")
     testCases.foreach(t => {
-      val query = s"SELECT split_part(collate('${t.s}','${t.c}'),collate('${t.d}','${t.c}'),${t.p})"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val delimiter = Literal.create(t.delimiter, StringType(t.collation))
+      val partNum = Literal.create(t.partNum, IntegerType)
+      checkEvaluation(SplitPart(str, delimiter, partNum), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select split_part('${t.str}', '${t.delimiter}', ${t.partNum})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select split_part('${unsupportedTestCase.str}', '${unsupportedTestCase.delimiter}', " +
+        s"${unsupportedTestCase.partNum})"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"split_part('1a2' collate UNICODE_AI, 'a' collate UNICODE_AI, 2)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'1a2' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "split_part('1a2', 'a', 2)", start = 7, stop = 31)
+      )
+    }
   }
 
-  test("Support Contains string expression with collation") {
-    // Supported collations
-    case class ContainsTestCase[R](l: String, r: String, c: String, result: R)
+  test("Support `StringSplitSQL` string expression with collation") {
+    case class StringSplitSQLTestCase[R](
+        str: String,
+        delimiter: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
-      ContainsTestCase("", "", "UTF8_BINARY", true),
-      ContainsTestCase("abcde", "C", "UNICODE", false),
-      ContainsTestCase("abcde", "FGH", "UTF8_LCASE", false),
-      ContainsTestCase("abcde", "BCD", "UNICODE_CI", true)
+      StringSplitSQLTestCase("1a2", "a", "UTF8_BINARY", Array("1", "2")),
+      StringSplitSQLTestCase("1a2", "a  ", "UTF8_BINARY_RTRIM", Array("1", "2")),
+      StringSplitSQLTestCase("1a2", "a", "UNICODE", Array("1", "2")),
+      StringSplitSQLTestCase("1a 2", "a  ", "UNICODE_RTRIM", Array("1", " 2")),
+      StringSplitSQLTestCase("1a2", "A", "UTF8_LCASE", Array("1", "2")),
+      StringSplitSQLTestCase("1 a2", "A   ", "UTF8_LCASE_RTRIM", Array("1 ", "2")),
+      StringSplitSQLTestCase("1a2", "A", "UNICODE_CI", Array("1", "2")),
+      StringSplitSQLTestCase("1 a2 ", "A  ", "UNICODE_CI_RTRIM", Array("1 ", "2 ")),
+      StringSplitSQLTestCase("1a2", "A", "UNICODE_CI", Array("1", "2")),
+      StringSplitSQLTestCase("1ä2", "Ä", "DE_CI", Array("1", "2")),
+      StringSplitSQLTestCase("1ä2", "A", "DE_CI_AI", Array("1", "2"))
     )
     testCases.foreach(t => {
-      val query = s"SELECT contains(collate('${t.l}','${t.c}'),collate('${t.r}','${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT contains(collate('${t.l}','${t.c}'),'${t.r}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT contains('${t.l}',collate('${t.r}','${t.c}'))"), Row(t.result))
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val delimiter = Literal.create(t.delimiter, StringType(t.collation))
+      checkEvaluation(StringSplitSQL(str, delimiter), t.result)
     })
-    // Collation mismatch
+
+    // Because `StringSplitSQL` is an internal expression,
+    // E2E SQL test cannot be performed in `collations.sql`.
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        val expr = StringSplitSQL(
+          Literal.create("1a2", StringType("UTF8_BINARY")),
+          Literal.create("a", StringType("UTF8_LCASE")))
+        CollationTypeCasts.transform(expr)
+      },
+      condition = "COLLATION_MISMATCH.IMPLICIT",
+      sqlState = "42P21",
+      parameters = Map(
+        "implicitTypes" -> """"STRING", "STRING COLLATE UTF8_LCASE""""
+      )
+    )
+
     checkError(
       exception = intercept[AnalysisException] {
-        sql("SELECT contains(collate('abcde', 'UTF8_LCASE'), collate('C', 'UNICODE_CI'))")
+        val expr = StringSplitSQL(
+          Collate(Literal.create("1a2", StringType("UTF8_BINARY")), "UTF8_BINARY"),
+          Collate(Literal.create("a", StringType("UTF8_BINARY")), "UTF8_LCASE"))
+        CollationTypeCasts.transform(expr)
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
+      condition = "COLLATION_MISMATCH.EXPLICIT",
+      sqlState = "42P21",
       parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE_CI`")
+        "explicitTypes" -> """"STRING", "STRING COLLATE UTF8_LCASE""""
+      )
+    )
+  }
+
+  test("Support `Contains` string expression with collation") {
+    case class ContainsTestCase[R](left: String, right: String, collation: String, result: R)
+    val testCases = Seq(
+      ContainsTestCase("", "", "UTF8_BINARY", true),
+      ContainsTestCase("", "  ", "UTF8_BINARY_RTRIM", true),
+      ContainsTestCase("abcde", "C", "UNICODE", false),
+      ContainsTestCase("abcde", " C ", "UNICODE_RTRIM", false),
+      ContainsTestCase("abcde", "FGH", "UTF8_LCASE", false),
+      ContainsTestCase("abcde", "ABC ", "UTF8_LCASE_RTRIM", true),
+      ContainsTestCase("abcde", "BCD", "UNICODE_CI", true),
+      ContainsTestCase("ab c de ", "B C D  ", "UNICODE_CI_RTRIM", true),
+      ContainsTestCase("abcde", "BCD", "UNICODE_CI", true),
+      ContainsTestCase("Priča o Maču u kamenu", "MAC", "SR_CI_AI", true),
+      ContainsTestCase("Priča o Maču u kamenu", "MAC", "SR_CI", false),
+      ContainsTestCase("Priča o Maču u kamenu", "MAČ", "SR", false),
+      ContainsTestCase("Priča o Maču u kamenu", "Mač", "SR", true),
+      ContainsTestCase("Прича о Мачу у камену", "мач", "sr_Cyrl_CI_AI", true),
+      ContainsTestCase("Прича о Мачу у камену", "мац", "sr_Cyrl_CI_AI", false)
     )
+    val unsupportedTestCase = ContainsTestCase("abcde", "A", "UNICODE_AI", false)
+    testCases.foreach(t => {
+      // Unit test.
+      val left = Literal.create(t.left, StringType(t.collation))
+      val right = Literal.create(t.right, StringType(t.collation))
+      checkEvaluation(Contains(left, right), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select contains('${t.left}', '${t.right}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      }
+    })
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select contains('${unsupportedTestCase.left}', '${unsupportedTestCase.right}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"contains('abcde' collate UNICODE_AI, 'A' collate UNICODE_AI)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'abcde' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "contains('abcde', 'A')", start = 7, stop = 28)
+      )
+    }
   }
 
-  test("Support SubstringIndex expression with collation") {
-    case class SubstringIndexTestCase[R](string: String, delimiter: String, count: Integer,
-      c: String, result: R)
+  test("Support `SubstringIndex` expression with collation") {
+    case class SubstringIndexTestCase[R](
+        strExpr: String,
+        delimExpr: String,
+        countExpr: Integer,
+        collation: String,
+        result: R)
     val testCases = Seq(
       SubstringIndexTestCase("wwwgapachegorg", "g", -3, "UTF8_BINARY", "apachegorg"),
       SubstringIndexTestCase("www||apache||org", "||", 2, "UTF8_BINARY", "www||apache"),
+      SubstringIndexTestCase("wwwgapachegorg", "g ", -3, "UTF8_BINARY_RTRIM", "apachegorg"),
+      SubstringIndexTestCase("www ||apache||org", "||  ", 2, "UTF8_BINARY_RTRIM", "www ||apache"),
       SubstringIndexTestCase("wwwXapacheXorg", "x", 2, "UTF8_LCASE", "wwwXapache"),
+      SubstringIndexTestCase("AAA ", "a ", -2, "UTF8_LCASE_RTRIM", "A "),
       SubstringIndexTestCase("aaaaaaaaaa", "aa", 2, "UNICODE", "a"),
-      SubstringIndexTestCase("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg")
+      SubstringIndexTestCase("aaaaaaaaaa  ", "aa ", 2, "UNICODE_RTRIM", "a"),
+      SubstringIndexTestCase("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg"),
+        SubstringIndexTestCase("AA  A ", "a ", -2, "UNICODE_CI_RTRIM", "  A "),
+      SubstringIndexTestCase("wwwmapacheMorg", "M", -2, "UNICODE_CI", "apacheMorg"),
+      SubstringIndexTestCase("wwwüapacheüorg", "U", 2, "DE_CI_AI", "wwwüapache")
     )
+    val unsupportedTestCase = SubstringIndexTestCase("abacde", "a", 2, "UNICODE_AI", "cde")
     testCases.foreach(t => {
-      val query = s"SELECT substring_index(collate('${t.string}','${t.c}')," +
-        s"collate('${t.delimiter}','${t.c}'),${t.count})"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(
-        StringType(CollationFactory.collationNameToId(t.c))))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT substring_index(collate('${t.string}','${t.c}')," +
-        s"'${t.delimiter}',${t.count})"), Row(t.result))
-      checkAnswer(sql(s"SELECT substring_index('${t.string}',collate('${t.delimiter}','${t.c}')," +
-        s"${t.count})"), Row(t.result))
+      // Unit test.
+      val strExpr = Literal.create(t.strExpr, StringType(t.collation))
+      val delimExpr = Literal.create(t.delimExpr, StringType(t.collation))
+      val countExpr = Literal.create(t.countExpr, IntegerType)
+      checkEvaluation(SubstringIndex(strExpr, delimExpr, countExpr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select substring_index('${t.strExpr}', '${t.delimExpr}', ${t.countExpr})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT substring_index(collate('abcde', 'UTF8_LCASE'), collate('C', 'UNICODE_CI'),1)")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE_CI`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select substring_index('${unsupportedTestCase.strExpr}', " +
+        s"'${unsupportedTestCase.delimExpr}', ${unsupportedTestCase.countExpr})"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> ("\"substring_index('abacde' collate UNICODE_AI, " +
+            "'a' collate UNICODE_AI, 2)\""),
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'abacde' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(
+          fragment = "substring_index('abacde', 'a', 2)",
+          start = 7,
+          stop = 39))
+    }
   }
 
-  test("Support StringInStr string expression with collation") {
-    case class StringInStrTestCase[R](string: String, substring: String, c: String, result: R)
+  test("Support `StringInStr` string expression with collation") {
+    case class StringInStrTestCase[R](str: String, substr: String, collation: String, result: R)
     val testCases = Seq(
-      // scalastyle:off
       StringInStrTestCase("test大千世界X大千世界", "大千", "UTF8_BINARY", 5),
+      StringInStrTestCase("test大千世界X大千世界", "大千 ", "UTF8_BINARY_RTRIM", 5),
       StringInStrTestCase("test大千世界X大千世界", "界x", "UTF8_LCASE", 8),
+      StringInStrTestCase(" test大千世界X大千世界 ", "界x ", "UTF8_LCASE_RTRIM", 9),
       StringInStrTestCase("test大千世界X大千世界", "界x", "UNICODE", 0),
+      StringInStrTestCase("test大千世界X大千世界", "界x", "UNICODE_RTRIM", 0),
       StringInStrTestCase("test大千世界X大千世界", "界y", "UNICODE_CI", 0),
       StringInStrTestCase("test大千世界X大千世界", "界x", "UNICODE_CI", 8),
-      StringInStrTestCase("abİo12", "i̇o", "UNICODE_CI", 3)
-      // scalastyle:on
+      StringInStrTestCase("abİo12", "i̇o", "UNICODE_CI", 3),
+      StringInStrTestCase("test大千世界X大千世界", "大 ", "UNICODE_CI_RTRIM", 5),
+      StringInStrTestCase("test大千世界X大千世界", " 大 ", "UNICODE_CI_RTRIM", 0)
     )
+    val unsupportedTestCase = StringInStrTestCase("a", "abcde", "UNICODE_AI", 0)
     testCases.foreach(t => {
-      val query = s"SELECT instr(collate('${t.string}','${t.c}')," +
-        s"collate('${t.substring}','${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT instr(collate('${t.string}','${t.c}')," +
-        s"'${t.substring}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT instr('${t.string}'," +
-        s"collate('${t.substring}','${t.c}'))"), Row(t.result))
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val substr = Literal.create(t.substr, StringType(t.collation))
+      checkEvaluation(StringInstr(str, substr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select instr('${t.str}', '${t.substr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(s"SELECT instr(collate('aaads', 'UTF8_BINARY'), collate('Aa', 'UTF8_LCASE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select instr('${unsupportedTestCase.str}', '${unsupportedTestCase.substr}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"instr('a' collate UNICODE_AI, 'abcde' collate UNICODE_AI)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'a' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "instr('a', 'abcde')", start = 7, stop = 25)
+      )
+    }
   }
 
-  test("Support FindInSet string expression with collation") {
-    case class FindInSetTestCase[R](word: String, set: String, c: String, result: R)
+  test("Support `FindInSet` string expression with collation") {
+    case class FindInSetTestCase[R](left: String, right: String, collation: String, result: R)
     val testCases = Seq(
       FindInSetTestCase("AB", "abc,b,ab,c,def", "UTF8_BINARY", 0),
+      FindInSetTestCase("b ", "abc,b,ab,c,def", "UTF8_BINARY_RTRIM", 2),
+      FindInSetTestCase("def", "abc,b,ab,c,def ", "UTF8_BINARY_RTRIM", 5),
       FindInSetTestCase("C", "abc,b,ab,c,def", "UTF8_LCASE", 4),
+      FindInSetTestCase("C ", "abc,b,ab,c  ,def", "UTF8_LCASE_RTRIM", 4),
       FindInSetTestCase("d,ef", "abc,b,ab,c,def", "UNICODE", 0),
-      // scalastyle:off
+      FindInSetTestCase(" def", "abc,b,ab,c,def", "UNICODE_RTRIM", 0),
       FindInSetTestCase("i̇o", "ab,İo,12", "UNICODE_CI", 2),
-      FindInSetTestCase("İo", "ab,i̇o,12", "UNICODE_CI", 2)
-      // scalastyle:on
+      FindInSetTestCase("İo", "ab,i̇o,12", "UNICODE_CI", 2),
+      FindInSetTestCase("İo", "ab,i̇o,12", "UNICODE_CI", 2),
+      FindInSetTestCase("a", "A ,B ,C", "UNICODE_CI_RTRIM", 1),
+      FindInSetTestCase(" a", "A ,B ,C", "UNICODE_CI_RTRIM", 0)
     )
     testCases.foreach(t => {
-      val query = s"SELECT find_in_set(collate('${t.word}', '${t.c}')," +
-        s"collate('${t.set}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT find_in_set(collate('${t.word}', '${t.c}')," +
-        s"'${t.set}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT find_in_set('${t.word}'," +
-        s"collate('${t.set}', '${t.c}'))"), Row(t.result))
+      // Unit test.
+      val left = Literal.create(t.left, StringType(t.collation))
+      val right = Literal.create(t.right, StringType(t.collation))
+      checkEvaluation(FindInSet(left, right), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select find_in_set('${t.left}', '${t.right}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(s"SELECT find_in_set(collate('AB', 'UTF8_BINARY'), " +
-          s"collate('ab,xyz,fgh', 'UTF8_LCASE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
-    )
   }
 
-  test("Support StartsWith string expression with collation") {
-    // Supported collations
-    case class StartsWithTestCase[R](l: String, r: String, c: String, result: R)
+  test("Support `StartsWith` string expression with collation") {
+    case class StartsWithTestCase[R](left: String, right: String, collation: String, result: R)
     val testCases = Seq(
       StartsWithTestCase("", "", "UTF8_BINARY", true),
+      StartsWithTestCase("", " ", "UTF8_BINARY_RTRIM", true),
       StartsWithTestCase("abcde", "A", "UNICODE", false),
+      StartsWithTestCase("abcde", "a ", "UNICODE_RTRIM", true),
       StartsWithTestCase("abcde", "FGH", "UTF8_LCASE", false),
-      StartsWithTestCase("abcde", "ABC", "UNICODE_CI", true)
+      StartsWithTestCase("abcde ", "FGH ", "UTF8_LCASE_RTRIM", false),
+      StartsWithTestCase("abcde", "ABC", "UNICODE_CI", true),
+      StartsWithTestCase("a b c de  ", "A B C ", "UNICODE_CI_RTRIM", true),
+      StartsWithTestCase("abcde", "ABC", "UNICODE_CI", true),
+      StartsWithTestCase("Šuma", "šum", "SR_CI_AI", true),
+      StartsWithTestCase("Šuma", "šum", "SR", false)
     )
+    val unsupportedTestCase = StartsWithTestCase("abcde", "A", "UNICODE_AI", false)
     testCases.foreach(t => {
-      val query = s"SELECT startswith(collate('${t.l}','${t.c}'),collate('${t.r}','${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT startswith(collate('${t.l}', '${t.c}'),'${t.r}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT startswith('${t.l}', collate('${t.r}', '${t.c}'))"), Row(t.result))
+      // Unit test.
+      val left = Literal.create(t.left, StringType(t.collation))
+      val right = Literal.create(t.right, StringType(t.collation))
+      checkEvaluation(StartsWith(left, right), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select startswith('${t.left}', '${t.right}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT startswith(collate('abcde', 'UTF8_LCASE'), collate('C', 'UNICODE_CI'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE_CI`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select startswith('${unsupportedTestCase.left}', '${unsupportedTestCase.right}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"startswith('abcde' collate UNICODE_AI, 'A' collate UNICODE_AI)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'abcde' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "startswith('abcde', 'A')", start = 7, stop = 30)
+      )
+    }
   }
 
-  test("Support StringTranslate string expression with collation") {
-    // Supported collations
-    case class TranslateTestCase[R](input: String, matchExpression: String,
-      replaceExpression: String, collation: String, result: R)
+  test("Support `StringTranslate` string expression with collation") {
+    case class StringTranslateTestCase[R](
+        srcExpr: String,
+        matchingExpr: String,
+        replaceExpr: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
-      TranslateTestCase("Translate", "Rnlt", "12", "UTF8_BINARY", "Tra2sae"),
-      TranslateTestCase("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e"),
-      TranslateTestCase("Translate", "Rn", "\u0000\u0000", "UNICODE", "Traslate"),
-      TranslateTestCase("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate")
+      StringTranslateTestCase("Translate", "Rnlt", "12", "UTF8_BINARY", "Tra2sae"),
+      StringTranslateTestCase(" abc ", "abc", "123", "UTF8_BINARY_RTRIM", " 123 "),
+      StringTranslateTestCase("Translate", "Rnlt", "1234", "UTF8_LCASE", "41a2s3a4e"),
+      StringTranslateTestCase(" abc ", " AB", "123", "UTF8_LCASE_RTRIM", "123c1"),
+      StringTranslateTestCase("Translate", "Rn", "\u0000\u0000", "UNICODE", "Traslate"),
+      StringTranslateTestCase(" a b c ", "abc ", "1234", "UNICODE_RTRIM", "4142434"),
+      StringTranslateTestCase("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate"),
+      StringTranslateTestCase(" abc ", "AB ", "123", "UNICODE_CI_RTRIM", "312c3"),
+      StringTranslateTestCase("Translate", "Rn", "1234", "UNICODE_CI", "T1a2slate"),
+      StringTranslateTestCase("Êtèréêë", "te", "12", "AF_CI_AI", "212r222")
     )
-
+    val unsupportedTestCase = StringTranslateTestCase("ABC", "AB", "12", "UNICODE_AI", "12C")
     testCases.foreach(t => {
-      val query = s"SELECT translate(collate('${t.input}', '${t.collation}')," +
-        s"collate('${t.matchExpression}', '${t.collation}')," +
-        s"collate('${t.replaceExpression}', '${t.collation}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(
-        StringType(CollationFactory.collationNameToId(t.collation))))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT translate(collate('${t.input}', '${t.collation}')," +
-        s"'${t.matchExpression}', '${t.replaceExpression}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT translate('${t.input}', collate('${t.matchExpression}'," +
-        s"'${t.collation}'), '${t.replaceExpression}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT translate('${t.input}', '${t.matchExpression}'," +
-        s"collate('${t.replaceExpression}', '${t.collation}'))"), Row(t.result))
+      // Unit test.
+      val srcExpr = Literal.create(t.srcExpr, StringType(t.collation))
+      val matchingExpr = Literal.create(t.matchingExpr, StringType(t.collation))
+      val replaceExpr = Literal.create(t.replaceExpr, StringType(t.collation))
+      checkEvaluation(StringTranslate(srcExpr, matchingExpr, replaceExpr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select translate('${t.srcExpr}', '${t.matchingExpr}', '${t.replaceExpr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql(s"SELECT translate(collate('Translate', 'UTF8_LCASE'), " +
-          s"collate('Rnlt', 'UNICODE'), '1234')")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select translate('${unsupportedTestCase.srcExpr}', " +
+        s"'${unsupportedTestCase.matchingExpr}', '${unsupportedTestCase.replaceExpr}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> ("\"translate('ABC' collate UNICODE_AI, 'AB' collate UNICODE_AI, " +
+            "'12' collate UNICODE_AI)\""),
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'ABC' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "translate('ABC', 'AB', '12')", start = 7, stop = 34)
+      )
+    }
   }
 
-  test("Support Replace string expression with collation") {
-    case class ReplaceTestCase[R](source: String, search: String, replace: String,
-        c: String, result: R)
+  test("Support `StringReplace` string expression with collation") {
+    case class StringReplaceTestCase[R](
+        srcExpr: String,
+        searchExpr: String,
+        replaceExpr: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
-      // scalastyle:off
-      ReplaceTestCase("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"),
-      ReplaceTestCase("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"),
-      ReplaceTestCase("abcdabcd", "bc", "", "UNICODE", "adad"),
-      ReplaceTestCase("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"),
-      ReplaceTestCase("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"),
-      ReplaceTestCase("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx")
-      // scalastyle:on
+      StringReplaceTestCase("r世eplace", "pl", "123", "UTF8_BINARY", "r世e123ace"),
+      StringReplaceTestCase(" abc ", "b ", "x", "UTF8_BINARY_RTRIM", " abc "),
+      StringReplaceTestCase("repl世ace", "PL", "AB", "UTF8_LCASE", "reAB世ace"),
+      StringReplaceTestCase(" abc ", " AB", "123", "UTF8_LCASE_RTRIM", "123c "),
+      StringReplaceTestCase("abcdabcd", "bc", "", "UNICODE", "adad"),
+      StringReplaceTestCase(" abc ", "b ", "x", "UNICODE_RTRIM", " abc "),
+      StringReplaceTestCase("aBc世abc", "b", "12", "UNICODE_CI", "a12c世a12c"),
+      StringReplaceTestCase("abi̇o12i̇o", "İo", "yy", "UNICODE_CI", "abyy12yy"),
+      StringReplaceTestCase("abİo12i̇o", "i̇o", "xx", "UNICODE_CI", "abxx12xx"),
+      StringReplaceTestCase(" ABC ", "bc ", "123", "UNICODE_CI_RTRIM", " A123"),
+      StringReplaceTestCase("češalj", "eSal", "A", "SR_CI_AI", "čAj")
     )
+    val unsupportedTestCase = StringReplaceTestCase("abcde", "A", "B", "UNICODE_AI", "abcde")
     testCases.foreach(t => {
-      val query = s"SELECT replace(collate('${t.source}','${t.c}'),collate('${t.search}'," +
-        s"'${t.c}'),collate('${t.replace}','${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(
-        StringType(CollationFactory.collationNameToId(t.c))))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT replace(collate('${t.source}','${t.c}'),'${t.search}'," +
-        s"'${t.replace}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT replace('${t.source}',collate('${t.search}','${t.c}')," +
-        s"'${t.replace}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT replace('${t.source}','${t.search}'," +
-        s"collate('${t.replace}','${t.c}'))"), Row(t.result))
+      // Unit test.
+      val srcExpr = Literal.create(t.srcExpr, StringType(t.collation))
+      val searchExpr = Literal.create(t.searchExpr, StringType(t.collation))
+      val replaceExpr = Literal.create(t.replaceExpr, StringType(t.collation))
+      checkEvaluation(StringReplace(srcExpr, searchExpr, replaceExpr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select replace('${t.srcExpr}', '${t.searchExpr}', '${t.replaceExpr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT startswith(collate('abcde', 'UTF8_LCASE'), collate('C', 'UNICODE_CI'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE_CI`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select replace('${unsupportedTestCase.srcExpr}', '${unsupportedTestCase.searchExpr}', " +
+        s"'${unsupportedTestCase.replaceExpr}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> ("\"replace('abcde' collate UNICODE_AI, 'A' collate UNICODE_AI, " +
+            "'B' collate UNICODE_AI)\""),
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'abcde' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "replace('abcde', 'A', 'B')", start = 7, stop = 32)
+      )
+    }
   }
 
-  test("Support EndsWith string expression with collation") {
-    // Supported collations
-    case class EndsWithTestCase[R](l: String, r: String, c: String, result: R)
+  test("Support `EndsWith` string expression with collation") {
+    case class EndsWithTestCase[R](left: String, right: String, collation: String, result: R)
     val testCases = Seq(
       EndsWithTestCase("", "", "UTF8_BINARY", true),
+      EndsWithTestCase("", " ", "UTF8_BINARY_RTRIM", true),
       EndsWithTestCase("abcde", "E", "UNICODE", false),
+      EndsWithTestCase("abcde  ", "E ", "UNICODE_RTRIM", false),
       EndsWithTestCase("abcde", "FGH", "UTF8_LCASE", false),
-      EndsWithTestCase("abcde", "CDE", "UNICODE_CI", true)
+      EndsWithTestCase("abcde ", "FGH ", "UTF8_LCASE_RTRIM", false),
+      EndsWithTestCase("abcde", "CDE", "UNICODE_CI", true),
+      EndsWithTestCase("abc d e  ", "C D E", "UNICODE_CI_RTRIM", true),
+      EndsWithTestCase("abcde", "CDE", "UNICODE_CI", true),
+      EndsWithTestCase("xnigħat", "għat", "MT", true),
+      // The following two test cases showcase different behavior based on collation.
+      EndsWithTestCase("xnigħat", "ħat", "MT_CI", false),
+      EndsWithTestCase("muljavo", "javo", "SR_CI", true),
+      EndsWithTestCase("xnigħat", "GĦat", "MT_CI", true),
+      EndsWithTestCase("xnigħat", "Għat", "MT_CI", true)
     )
+    val unsupportedTestCase = EndsWithTestCase("abcde", "A", "UNICODE_AI", false)
     testCases.foreach(t => {
-      val query = s"SELECT endswith(collate('${t.l}', '${t.c}'), collate('${t.r}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT endswith(collate('${t.l}', '${t.c}'),'${t.r}')"), Row(t.result))
-      checkAnswer(sql(s"SELECT endswith('${t.l}', collate('${t.r}', '${t.c}'))"), Row(t.result))
+      // Unit test.
+      val left = Literal.create(t.left, StringType(t.collation))
+      val right = Literal.create(t.right, StringType(t.collation))
+      checkEvaluation(EndsWith(left, right), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select endswith('${t.left}', '${t.right}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      }
+      // Test unsupported collation.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+        val query =
+          s"select endswith('${unsupportedTestCase.left}', '${unsupportedTestCase.right}')"
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(query).collect()
+          },
+          condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+          sqlState = Some("42K09"),
+          parameters = Map(
+            "sqlExpr" -> "\"endswith('abcde' collate UNICODE_AI, 'A' collate UNICODE_AI)\"",
+            "paramIndex" -> "first",
+            "inputSql" -> "\"'abcde' collate UNICODE_AI\"",
+            "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+            "requiredType" -> "\"STRING\""),
+          context = ExpectedContext(fragment = "endswith('abcde', 'A')", start = 7, stop = 28)
+        )
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT endswith(collate('abcde', 'UTF8_LCASE'), collate('C', 'UNICODE_CI'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UTF8_LCASE`, `string collate UNICODE_CI`")
-    )
   }
 
-  test("Support StringRepeat string expression with collation") {
-    // Supported collations
-    case class StringRepeatTestCase[R](s: String, n: Int, c: String, result: R)
+  test("Support `StringRepeat` string expression with collation") {
+    case class StringRepeatTestCase[R](str: String, times: Integer, collation: String, result: R)
     val testCases = Seq(
       StringRepeatTestCase("", 1, "UTF8_BINARY", ""),
+      StringRepeatTestCase(" ", 1, "UTF8_BINARY_RTRIM", " "),
       StringRepeatTestCase("a", 0, "UNICODE", ""),
+      StringRepeatTestCase("a", 0, "UNICODE_RTRIM", ""),
       StringRepeatTestCase("XY", 3, "UTF8_LCASE", "XYXYXY"),
-      StringRepeatTestCase("123", 2, "UNICODE_CI", "123123")
+      StringRepeatTestCase("XY ", 3, "UTF8_LCASE_RTRIM", "XY XY XY "),
+      StringRepeatTestCase("123", 2, "UNICODE_CI", "123123"),
+      StringRepeatTestCase("123 ", 2, "UNICODE_CI_RTRIM", "123 123 ")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val times = Literal.create(t.times, IntegerType)
+      checkEvaluation(StringRepeat(str, times), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select repeat('${t.str}', ${t.times})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
+
+  test("Support `Ascii` string expression with collation") {
+    case class AsciiTestCase[R](input: String, collation: String, result: R)
+    val testCases = Seq(
+      AsciiTestCase("a", "UTF8_BINARY", 97),
+      AsciiTestCase("a ", "UTF8_BINARY_RTRIM", 97),
+      AsciiTestCase("B", "UTF8_LCASE", 66),
+      AsciiTestCase("B ", "UTF8_LCASE_RTRIM", 66),
+      AsciiTestCase("#", "UNICODE", 35),
+      AsciiTestCase("# ", "UNICODE_RTRIM", 35),
+      AsciiTestCase("!", "UNICODE_CI", 33),
+      AsciiTestCase("! ", "UNICODE_CI_RTRIM", 33)
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(Ascii(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select ascii('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
+    })
+  }
+
+  test("Support `Chr` string expression with collation") {
+    case class ChrTestCase[R](input: Long, collation: String, result: R)
+    val testCases = Seq(
+      ChrTestCase(65, "UTF8_BINARY", "A"),
+      ChrTestCase(65, "UTF8_BINARY_RTRIM", "A"),
+      ChrTestCase(66, "UTF8_LCASE", "B"),
+      ChrTestCase(66, "UTF8_LCASE_RTRIM", "B"),
+      ChrTestCase(97, "UNICODE", "a"),
+      ChrTestCase(97, "UNICODE_RTRIM", "a"),
+      ChrTestCase(98, "UNICODE_CI", "b"),
+      ChrTestCase(98, "UNICODE_CI_RTRIM", "b")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(Chr(Literal(t.input)), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select chr(${t.input})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
+
+  test("Support `UnBase64` string expression with collation") {
+    case class UnBase64TestCase[R](input: String, collation: String, result: R)
+    val testCases = Seq(
+      UnBase64TestCase("QUJD", "UTF8_BINARY", Array(65, 66, 67)),
+      UnBase64TestCase("QUJD", "UTF8_BINARY_RTRIM", Array(65, 66, 67)),
+      UnBase64TestCase("eHl6", "UTF8_LCASE", Array(120, 121, 122)),
+      UnBase64TestCase("eHl6", "UTF8_LCASE_RTRIM", Array(120, 121, 122)),
+      UnBase64TestCase("IyMj", "UNICODE", Array(35, 35, 35)),
+      UnBase64TestCase("IyMj", "UNICODE_RTRIM", Array(35, 35, 35)),
+      UnBase64TestCase("IQ==", "UNICODE_CI", Array(33)),
+      UnBase64TestCase("IQ==", "UNICODE_CI_RTRIM", Array(33))
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(Base64(UnBase64(Literal.create(t.input, StringType(t.collation)))), t.input)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select unbase64('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BinaryType))
+      }
+    })
+  }
+
+  test("Support `Base64` string expression with collation") {
+    case class Base64TestCase[R](input: Array[Byte], collation: String, result: R)
+    val testCases = Seq(
+      Base64TestCase(Array(65, 66, 67), "UTF8_BINARY", "QUJD"),
+      Base64TestCase(Array(65, 66, 67), "UTF8_BINARY_RTRIM", "QUJD"),
+      Base64TestCase(Array(120, 121, 122), "UTF8_LCASE", "eHl6"),
+      Base64TestCase(Array(120, 121, 122), "UTF8_LCASE_RTRIM", "eHl6"),
+      Base64TestCase(Array(35, 35, 35), "UNICODE", "IyMj"),
+      Base64TestCase(Array(35, 35, 35), "UNICODE_RTRIM", "IyMj"),
+      Base64TestCase(Array(33), "UNICODE_CI", "IQ=="),
+      Base64TestCase(Array(33), "UNICODE_CI_RTRIM", "IQ==")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(Base64(Literal.create(t.input, BinaryType)), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val str = new String(t.input.map(_.toChar))
+        val query = s"select base64('$str')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
+
+  test("Support `FormatNumber` string expression with collation") {
+    case class FormatNumberTestCase[R](x: Double, d: String, collation: String, r: R)
+    val testCases = Seq(
+      FormatNumberTestCase(123.123, "###.###", "UTF8_BINARY", "123.123"),
+      FormatNumberTestCase(123.123, "###.###", "UTF8_BINARY_RTRIM", "123.123"),
+      FormatNumberTestCase(99.99, "##.##", "UTF8_LCASE", "99.99"),
+      FormatNumberTestCase(99.99, "##.##", "UTF8_LCASE_RTRIM", "99.99"),
+      FormatNumberTestCase(123.123, "###.###", "UNICODE", "123.123"),
+      FormatNumberTestCase(123.123, "###.###", "UNICODE_RTRIM", "123.123"),
+      FormatNumberTestCase(99.99, "##.##", "UNICODE_CI", "99.99"),
+      FormatNumberTestCase(99.99, "##.##", "UNICODE_CI_RTRIM", "99.99"),
+      FormatNumberTestCase(99.99, "##.##", "UNICODE_CI", "99.99"),
+      FormatNumberTestCase(99.999, "##.###", "AF_CI_AI", "99.999")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      val x = Literal(t.x)
+      val d = Literal.create(t.d, StringType(t.collation))
+      checkEvaluation(FormatNumber(x, d), t.r)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select format_number(${t.x}, '${t.d}')"
+        checkAnswer(sql(query), Row(t.r))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
+
+  test("Support `Decode` string expression with collation") {
+    case class DecodeTestCase[R](input: String, collation: String, result: R)
+    val testCases = Seq(
+      DecodeTestCase("a", "UTF8_BINARY", "a"),
+      DecodeTestCase("a", "UTF8_BINARY_RTRIM", "a"),
+      DecodeTestCase("A", "UTF8_LCASE", "A"),
+      DecodeTestCase("A", "UTF8_LCASE_RTRIM", "A"),
+      DecodeTestCase("b", "UNICODE", "b"),
+      DecodeTestCase("b", "UNICODE_RTRIM", "b"),
+      DecodeTestCase("B", "UNICODE_CI", "B"),
+      DecodeTestCase("B", "UNICODE_CI_RTRIM", "B")
     )
     testCases.foreach(t => {
-      val query = s"SELECT repeat(collate('${t.s}', '${t.c}'), ${t.n})"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val input = Literal.create(t.input, StringType(t.collation))
+      val default = Literal.create("default", StringType(t.collation))
+      val params = Seq(Literal(1), Literal(1), input, default)
+      checkEvaluation(Decode(params, Decode.createExpr(params)), t.input)
+      val encoding = Literal.create("UTF-8", StringType(t.collation))
+      val encodeExpr = Encode(Literal.create(t.input, StringType(t.collation)), encoding)
+      checkEvaluation(StringDecode(encodeExpr, encoding), t.input)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val queryDecode = s"select decode(1, 1, '${t.input}', 'default')"
+        checkAnswer(sql(queryDecode), Row(t.result))
+        assert(sql(queryDecode).schema.fields.head.dataType.sameType(StringType(t.collation)))
+        val queryStrDecode = s"select decode(encode('${t.input}', 'utf-8'), 'utf-8')"
+        checkAnswer(sql(queryStrDecode), Row(t.result))
+        assert(sql(queryStrDecode).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
   }
 
-  test("Ascii & UnBase64 string expressions with collation") {
-    case class AsciiUnBase64TestCase[R](q: String, dt: DataType, r: R)
+  test("Support `Encode` string expression with collation") {
+    case class EncodeTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      AsciiUnBase64TestCase("select ascii('a' collate utf8_binary)", IntegerType, 97),
-      AsciiUnBase64TestCase("select ascii('B' collate utf8_lcase)", IntegerType, 66),
-      AsciiUnBase64TestCase("select ascii('#' collate unicode)", IntegerType, 35),
-      AsciiUnBase64TestCase("select ascii('!' collate unicode_ci)", IntegerType, 33),
-      AsciiUnBase64TestCase("select unbase64('QUJD' collate utf8_binary)", BinaryType,
-        Seq(65, 66, 67)),
-      AsciiUnBase64TestCase("select unbase64('eHl6' collate utf8_lcase)", BinaryType,
-        Seq(120, 121, 122)),
-      AsciiUnBase64TestCase("select unbase64('IyMj' collate utf8_binary)", BinaryType,
-        Seq(35, 35, 35)),
-      AsciiUnBase64TestCase("select unbase64('IQ==' collate utf8_lcase)", BinaryType,
-        Seq(33))
+      EncodeTestCase("a", "UTF8_BINARY", Array(97)),
+      EncodeTestCase("a ", "UTF8_BINARY_RTRIM", Array(97, 32)),
+      EncodeTestCase("A", "UTF8_LCASE", Array(65)),
+      EncodeTestCase("A ", "UTF8_LCASE_RTRIM", Array(65, 32)),
+      EncodeTestCase("b", "UNICODE", Array(98)),
+      EncodeTestCase("b ", "UNICODE_RTRIM", Array(98, 32)),
+      EncodeTestCase("B", "UNICODE_CI", Array(66)),
+      EncodeTestCase("B ", "UNICODE_CI_RTRIM", Array(66, 32))
     )
     testCases.foreach(t => {
-      // Result & data type
-      checkAnswer(sql(t.q), Row(t.r))
-      assert(sql(t.q).schema.fields.head.dataType.sameType(t.dt))
+      // Unit test.
+      val encoding = Literal.create("UTF-8", StringType(t.collation))
+      val encodeExpr = Encode(Literal.create(t.input, StringType(t.collation)), encoding)
+      checkEvaluation(StringDecode(encodeExpr, encoding), t.input)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select encode('${t.input}', 'utf-8')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BinaryType))
+      }
     })
   }
 
-  test("Chr, Base64, Decode & FormatNumber string expressions with collation") {
-    case class DefaultCollationTestCase[R](q: String, c: String, r: R)
+  test("Support `ToBinary` string expression with collation") {
+    case class ToBinaryTestCase[R](expr: String, format: String, collation: String, result: R)
     val testCases = Seq(
-      DefaultCollationTestCase("select chr(97)", "UTF8_BINARY", "a"),
-      DefaultCollationTestCase("select chr(66)", "UTF8_LCASE", "B"),
-      DefaultCollationTestCase("select base64('xyz')", "UNICODE", "eHl6"),
-      DefaultCollationTestCase("select base64('!')", "UNICODE_CI", "IQ=="),
-      DefaultCollationTestCase("select decode(encode('$', 'utf-8'), 'utf-8')", "UTF8_BINARY", "$"),
-      DefaultCollationTestCase("select decode(encode('X', 'utf-8'), 'utf-8')", "UTF8_LCASE",
-        "X"),
-      DefaultCollationTestCase("select format_number(123.123, '###.###')", "UNICODE", "123.123"),
-      DefaultCollationTestCase("select format_number(99.99, '##.##')", "UNICODE_CI", "99.99")
+      ToBinaryTestCase("a", "utf-8", "UTF8_BINARY", Array(97)),
+      ToBinaryTestCase("a ", "utf-8", "UTF8_BINARY_RTRIM", Array(97, 32)),
+      ToBinaryTestCase("A", "utf-8", "UTF8_LCASE", Array(65)),
+      ToBinaryTestCase("A ", "utf-8", "UTF8_LCASE_RTRIM", Array(65, 32)),
+      ToBinaryTestCase("b", "utf-8", "UNICODE", Array(98)),
+      ToBinaryTestCase("b ", "utf-8", "UNICODE_RTRIM", Array(98, 32)),
+      ToBinaryTestCase("B", "utf-8", "UNICODE_CI", Array(66)),
+      ToBinaryTestCase("B ", "utf-8", "UNICODE_CI_RTRIM", Array(66, 32))
     )
     testCases.foreach(t => {
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.c) {
-        // Result & data type
-        checkAnswer(sql(t.q), Row(t.r))
-        assert(sql(t.q).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val expr = Literal.create(t.expr, StringType(t.collation))
+      val format = Literal.create(t.format, StringType(t.collation))
+      checkEvaluation(StringDecode(ToBinary(expr, Some(format)), format), t.expr)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select to_binary('${t.expr}', 'utf-8')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BinaryType))
       }
     })
   }
 
-  test("Encode, ToBinary & Sentences string expressions with collation") {
-    case class EncodeToBinarySentencesTestCase[R](q: String, dt: DataType, r: R)
+  test("Support `Sentences` string expression with collation") {
+    case class SentencesTestCase[R](str: String, collation: String, result: R)
     val testCases = Seq(
-      EncodeToBinarySentencesTestCase("select encode('a' collate utf8_binary, 'utf-8')",
-        BinaryType, Seq(97)),
-      EncodeToBinarySentencesTestCase("select encode('$' collate utf8_lcase, 'utf-8')",
-        BinaryType, Seq(36)),
-      EncodeToBinarySentencesTestCase("select to_binary('B' collate unicode, 'utf-8')",
-        BinaryType, Seq(66)),
-      EncodeToBinarySentencesTestCase("select to_binary('#' collate unicode_ci, 'utf-8')",
-        BinaryType, Seq(35)),
-      EncodeToBinarySentencesTestCase(
-        """
-          |select sentences('Hello, world! Nice day.' collate utf8_binary)
-          |""".stripMargin,
-        ArrayType(ArrayType(StringType)), Seq(Seq("Hello", "world"), Seq("Nice", "day"))),
-      EncodeToBinarySentencesTestCase(
-        """
-          |select sentences('Something else. Nothing here.' collate utf8_lcase)
-          |""".stripMargin,
-        ArrayType(ArrayType(StringType("UTF8_LCASE"))),
-        Seq(Seq("Something", "else"), Seq("Nothing", "here")))
+      SentencesTestCase(
+        "Hello, world! Nice day.",
+        "UTF8_BINARY",
+        Seq(Seq("Hello", "world"), Seq("Nice", "day"))
+      ),
+      SentencesTestCase(
+        "Something else. Nothing here.",
+        "UTF8_LCASE",
+        Seq(Seq("Something", "else"), Seq("Nothing", "here"))
+      ),
+      SentencesTestCase(
+        "Hello, world! Nice day.",
+        "UNICODE",
+        Seq(Seq("Hello", "world"), Seq("Nice", "day"))
+      ),
+      SentencesTestCase(
+        "Something else. Nothing here.",
+        "UNICODE_CI",
+        Seq(Seq("Something", "else"), Seq("Nothing", "here"))
+      ),
+      SentencesTestCase(
+        "Hello, dinja! Ġurnata sabiħa.",
+        "MT_AI",
+        Seq(Seq("Hello", "dinja"), Seq("Ġurnata", "sabiħa"))
+      )
     )
     testCases.foreach(t => {
-      // Result & data type
-      checkAnswer(sql(t.q), Row(t.r))
-      assert(sql(t.q).schema.fields.head.dataType.sameType(t.dt))
+      // Unit test.
+      checkEvaluation(Sentences(Literal.create(t.str, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select sentences('${t.str}')"
+        checkAnswer(sql(query), Row(t.result))
+        val expectedDataType = ArrayType(ArrayType(StringType(t.collation)))
+        assert(sql(query).schema.fields.head.dataType.sameType(expectedDataType))
+      }
     })
   }
 
-  test("SPARK-47357: Support Upper string expression with collation") {
-    // Supported collations
-    case class UpperTestCase[R](s: String, c: String, result: R)
+  test("Support `Upper` string expression with collation") {
+    case class UpperTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
       UpperTestCase("aBc", "UTF8_BINARY", "ABC"),
+      UpperTestCase("aBc  ", "UTF8_BINARY_RTRIM", "ABC  "),
       UpperTestCase("aBc", "UTF8_LCASE", "ABC"),
+      UpperTestCase("aBc  ", "UTF8_LCASE_RTRIM", "ABC  "),
       UpperTestCase("aBc", "UNICODE", "ABC"),
-      UpperTestCase("aBc", "UNICODE_CI", "ABC")
+      UpperTestCase("aBc  ", "UNICODE_RTRIM", "ABC  "),
+      UpperTestCase("aBc", "UNICODE_CI", "ABC"),
+      UpperTestCase("aBc  ", "UNICODE_CI_RTRIM", "ABC  "),
+      UpperTestCase("aBc", "UNICODE_CI", "ABC"),
+      UpperTestCase("xnìgħat", "MT_CI_AI", "XNÌGĦAT")
     )
     testCases.foreach(t => {
-      val query = s"SELECT upper(collate('${t.s}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      checkEvaluation(Upper(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select upper('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
   }
 
-  test("SPARK-47357: Support Lower string expression with collation") {
-    // Supported collations
-    case class LowerTestCase[R](s: String, c: String, result: R)
+  test("Support `Lower` string expression with collation") {
+    case class LowerTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
       LowerTestCase("aBc", "UTF8_BINARY", "abc"),
+      LowerTestCase("aBc  ", "UTF8_BINARY_RTRIM", "abc  "),
       LowerTestCase("aBc", "UTF8_LCASE", "abc"),
+      LowerTestCase("aBc  ", "UTF8_LCASE_RTRIM", "abc  "),
       LowerTestCase("aBc", "UNICODE", "abc"),
-      LowerTestCase("aBc", "UNICODE_CI", "abc")
+      LowerTestCase("aBc  ", "UNICODE_RTRIM", "abc  "),
+      LowerTestCase("aBc", "UNICODE_CI", "abc"),
+      LowerTestCase("aBc  ", "UNICODE_CI_RTRIM", "abc  "),
+      LowerTestCase("aBc", "UNICODE_CI", "abc"),
+      LowerTestCase("VeRGrÖßeRn", "DE_CI", "vergrößern")
     )
     testCases.foreach(t => {
-      val query = s"SELECT lower(collate('${t.s}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      checkEvaluation(Lower(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select lower('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
   }
 
-  test("SPARK-47357: Support InitCap string expression with collation") {
-    // Supported collations
-    case class InitCapTestCase[R](s: String, c: String, result: R)
+  test("Support `InitCap` string expression with collation") {
+    case class InitCapTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
       InitCapTestCase("aBc ABc", "UTF8_BINARY", "Abc Abc"),
+      InitCapTestCase(" aBc ABc ", "UTF8_BINARY_RTRIM", " Abc Abc "),
       InitCapTestCase("aBc ABc", "UTF8_LCASE", "Abc Abc"),
+      InitCapTestCase(" aBc ABc ", "UTF8_LCASE_RTRIM", " Abc Abc "),
       InitCapTestCase("aBc ABc", "UNICODE", "Abc Abc"),
-      InitCapTestCase("aBc ABc", "UNICODE_CI", "Abc Abc")
+      InitCapTestCase(" aBc ABc ", "UNICODE_RTRIM", " Abc Abc "),
+      InitCapTestCase("aBc ABc", "UNICODE_CI", "Abc Abc"),
+      InitCapTestCase(" aBc ABc ", "UNICODE_CI_RTRIM", " Abc Abc "),
+      InitCapTestCase("aBc ABc", "UNICODE_CI", "Abc Abc"),
+      InitCapTestCase("æØÅ ÆøÅ", "NO", "Æøå Æøå")
     )
     testCases.foreach(t => {
-      val query = s"SELECT initcap(collate('${t.s}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      checkEvaluation(InitCap(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select initcap('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
   }
 
-  test("Overlay string expression with collation") {
-    // Supported collations
-    case class OverlayTestCase(l: String, r: String, pos: Int, c: String, result: String)
+  test("Support `Overlay` string expression with collation") {
+    case class OverlayTestCase[R](
+        input: String,
+        replace: String,
+        pos: Integer,
+        len: Integer,
+        collation: String,
+        result: R)
     val testCases = Seq(
-      OverlayTestCase("hello", " world", 6, "UTF8_BINARY", "hello world"),
-      OverlayTestCase("nice", " day", 5, "UTF8_LCASE", "nice day"),
-      OverlayTestCase("A", "B", 1, "UNICODE", "B"),
-      OverlayTestCase("!", "!!!", 1, "UNICODE_CI", "!!!")
+      OverlayTestCase("hello", " world", 6, -1, "UTF8_BINARY", "hello world"),
+      OverlayTestCase("hello ", " world ", 7, -1, "UTF8_BINARY_RTRIM", "hello  world "),
+      OverlayTestCase("nice", " day", 5, -1, "UTF8_LCASE", "nice day"),
+      OverlayTestCase(" nice ", " day ", 7, -1, "UTF8_LCASE_RTRIM", " nice  day "),
+      OverlayTestCase("A", "B", 1, -1, "UNICODE", "B"),
+      OverlayTestCase("A", " B ", 1, -1, "UNICODE_RTRIM", " B "),
+      OverlayTestCase("!", "!!!", 1, -1, "UNICODE_CI", "!!!"),
+      OverlayTestCase("!", " !!! ", 1, -1, "UNICODE_CI_RTRIM", " !!! ")
     )
     testCases.foreach(t => {
-      val query =
-        s"""
-           |select overlay(collate('${t.l}', '${t.c}') placing
-           |collate('${t.r}', '${t.c}') from ${t.pos})
-           |""".stripMargin
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // Implicit casting
-      checkAnswer(sql(
-        s"""
-           |select overlay(collate('${t.l}', '${t.c}') placing '${t.r}' from ${t.pos})
-           |""".stripMargin), Row(t.result))
-      checkAnswer(sql(
-        s"""
-           |select overlay('${t.l}' placing collate('${t.r}', '${t.c}') from ${t.pos})
-           |""".stripMargin), Row(t.result))
-      checkAnswer(sql(
-        s"""
-           |select overlay(collate('${t.l}', '${t.c}')
-           |placing '${t.r}' from collate('${t.pos}', '${t.c}'))
-           |""".stripMargin), Row(t.result))
+      // Unit test.
+      val input = Literal.create(t.input, StringType(t.collation))
+      val replace = Literal.create(t.replace, StringType(t.collation))
+      val pos = Literal.create(t.pos, IntegerType)
+      val len = Literal.create(t.len, IntegerType)
+      checkEvaluation(Overlay(input, replace, pos, len), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select overlay('${t.input}' placing '${t.replace}' from ${t.pos})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT overlay('a' collate UNICODE PLACING 'b' collate UNICODE_CI FROM 1)")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UNICODE`, `string collate UNICODE_CI`")
-    )
   }
 
-  test("FormatString string expression with collation") {
-    // Supported collations
-    case class FormatStringTestCase(f: String, a: Seq[Any], c: String, r: String)
+  test("Support `FormatString` string expression with collation") {
+    case class FormatStringTestCase[R](
+        format: String,
+        input: Seq[Any],
+        collation: String,
+        result: R)
     val testCases = Seq(
-      FormatStringTestCase("%s%s", Seq("'a'", "'b'"), "UTF8_BINARY", "ab"),
+      FormatStringTestCase("%s%s", Seq("a", "b"), "UTF8_BINARY", "ab"),
+      FormatStringTestCase("%s%s", Seq("a", "b "), "UTF8_BINARY_RTRIM", "ab "),
       FormatStringTestCase("%d", Seq(123), "UTF8_LCASE", "123"),
-      FormatStringTestCase("%s%d", Seq("'A'", 0), "UNICODE", "A0"),
-      FormatStringTestCase("%s%s", Seq("'Hello'", "'!!!'"), "UNICODE_CI", "Hello!!!")
+      FormatStringTestCase("%d", Seq(123), "UTF8_LCASE_RTRIM", "123"),
+      FormatStringTestCase("%s%d", Seq("A", 0), "UNICODE", "A0"),
+      FormatStringTestCase("%s%d", Seq(" A ", 0), "UNICODE_RTRIM", " A 0"),
+      FormatStringTestCase("%s%s", Seq("Hello", "!!!"), "UNICODE_CI", "Hello!!!"),
+      FormatStringTestCase("%s%s", Seq(" Hello ", " !!! "), "UNICODE_CI_RTRIM", " Hello  !!! "),
+      FormatStringTestCase("%s%s", Seq("Hello", "!!!"), "UNICODE_CI", "Hello!!!"),
+      FormatStringTestCase("%s%s", Seq("Storslått", ".?!"), "NN_AI", "Storslått.?!")
     )
     testCases.foreach(t => {
-      val query =
-        s"""
-           |select format_string(collate('${t.f}', '${t.c}'), ${t.a.mkString(", ")})
-           |""".stripMargin
-      // Result & data type
-      checkAnswer(sql(query), Row(t.r))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      val format = Literal.create(t.format, StringType(t.collation))
+      val arguments = t.input.map {
+        case s: String => Literal.create(s, StringType(t.collation))
+        case i: Integer => Literal.create(i, IntegerType)
+      }
+      checkEvaluation(FormatString(format +: arguments: _*), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val args = t.input
+          .map {
+            case s: String => s"'$s'"
+            case other => other.toString
+          }
+          .mkString(", ")
+        val query = s"select format_string('${t.format}', $args)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
+
+  test("Support `SoundEx` string expression with collation") {
+    case class SoundExTestCase[R](input: String, collation: String, result: R)
+    val testCases = Seq(
+      SoundExTestCase("A", "UTF8_BINARY", "A000"),
+      SoundExTestCase("A", "UTF8_BINARY_RTRIM", "A000"),
+      SoundExTestCase("!", "UTF8_LCASE", "!"),
+      SoundExTestCase("!", "UTF8_LCASE_RTRIM", "!"),
+      SoundExTestCase("$", "UNICODE", "$"),
+      SoundExTestCase("$", "UNICODE_RTRIM", "$"),
+      SoundExTestCase("X", "UNICODE_CI", "X000"),
+      SoundExTestCase("X", "UNICODE_CI_RTRIM", "X000"),
+      SoundExTestCase("X", "UNICODE_CI", "X000"),
+      SoundExTestCase("ß", "DE", "ß")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(SoundEx(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select soundex('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
   }
 
-  test("SoundEx string expression with collation") {
-    // Supported collations
-    case class SoundExTestCase(q: String, c: String, r: String)
+  test("Support `Length` string expression with collation") {
+    case class LengthTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      SoundExTestCase("select soundex('A' collate utf8_binary)", "UTF8_BINARY", "A000"),
-      SoundExTestCase("select soundex('!' collate utf8_lcase)", "UTF8_LCASE", "!"),
-      SoundExTestCase("select soundex('$' collate unicode)", "UNICODE", "$"),
-      SoundExTestCase("select soundex('X' collate unicode_ci)", "UNICODE_CI", "X000")
+      LengthTestCase("", "UTF8_BINARY", 0),
+      LengthTestCase(" ", "UTF8_BINARY_RTRIM", 1),
+      LengthTestCase("abc", "UTF8_LCASE", 3),
+      LengthTestCase("abc ", "UTF8_LCASE_RTRIM", 4),
+      LengthTestCase("hello", "UNICODE", 5),
+      LengthTestCase("hello ", "UNICODE_RTRIM", 6),
+      LengthTestCase("ﬀ", "UNICODE_CI", 1),
+      LengthTestCase("ﬀ ", "UNICODE_CI_RTRIM", 2),
+      LengthTestCase("ﬀ", "UNICODE_CI", 1),
+      LengthTestCase("groß", "DE_CI_AI", 4),
+      LengthTestCase("gross", "DE_AI", 5)
     )
     testCases.foreach(t => {
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.c) {
-        // Result & data type
-        checkAnswer(sql(t.q), Row(t.r))
-        assert(sql(t.q).schema.fields.head.dataType.sameType(StringType(t.c)))
+      // Unit test.
+      checkEvaluation(Length(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select length('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
       }
     })
   }
 
-  test("Length, BitLength & OctetLength string expressions with collations") {
-    // Supported collations
-    case class LenTestCase(q: String, r: Int)
+  test("Support `BitLength` string expression with collation") {
+    case class BitLengthTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      LenTestCase("select length('hello' collate utf8_binary)", 5),
-      LenTestCase("select length('world' collate utf8_lcase)", 5),
-      LenTestCase("select length('ﬀ' collate unicode)", 1),
-      LenTestCase("select bit_length('hello' collate unicode_ci)", 40),
-      LenTestCase("select bit_length('world' collate utf8_binary)", 40),
-      LenTestCase("select bit_length('ﬀ' collate utf8_lcase)", 24),
-      LenTestCase("select octet_length('hello' collate unicode)", 5),
-      LenTestCase("select octet_length('world' collate unicode_ci)", 5),
-      LenTestCase("select octet_length('ﬀ' collate utf8_binary)", 3)
+      BitLengthTestCase("", "UTF8_BINARY", 0),
+      BitLengthTestCase(" ", "UTF8_BINARY_RTRIM", 8),
+      BitLengthTestCase("abc", "UTF8_LCASE", 24),
+      BitLengthTestCase("abc ", "UTF8_LCASE_RTRIM", 32),
+      BitLengthTestCase("hello", "UNICODE", 40),
+      BitLengthTestCase("hello ", "UNICODE_RTRIM", 48),
+      BitLengthTestCase("ﬀ", "UNICODE_CI", 24),
+      BitLengthTestCase("ﬀ ", "UNICODE_CI_RTRIM", 32),
+      BitLengthTestCase("ﬀ", "UNICODE_CI", 24),
+      BitLengthTestCase("GROß", "DE", 40)
     )
     testCases.foreach(t => {
-      // Result & data type
-      checkAnswer(sql(t.q), Row(t.r))
-      assert(sql(t.q).schema.fields.head.dataType.sameType(IntegerType))
+      // Unit test.
+      checkEvaluation(BitLength(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select bit_length('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
     })
   }
 
-  test("Luhncheck string expression with collation") {
-    // Supported collations
-    case class LuhncheckTestCase(q: String, c: String, r: Boolean)
+  test("Support `OctetLength` string expression with collation") {
+    case class OctetLengthTestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      LuhncheckTestCase("123", "UTF8_BINARY", r = false),
-      LuhncheckTestCase("000", "UTF8_LCASE", r = true),
-      LuhncheckTestCase("111", "UNICODE", r = false),
-      LuhncheckTestCase("222", "UNICODE_CI", r = false)
+      OctetLengthTestCase("", "UTF8_BINARY", 0),
+      OctetLengthTestCase(" ", "UTF8_BINARY_RTRIM", 1),
+      OctetLengthTestCase("abc", "UTF8_LCASE", 3),
+      OctetLengthTestCase("abc ", "UTF8_LCASE_RTRIM", 4),
+      OctetLengthTestCase("hello", "UNICODE", 5),
+      OctetLengthTestCase("hello ", "UNICODE_RTRIM", 6),
+      OctetLengthTestCase("ﬀ", "UNICODE_CI", 3),
+      OctetLengthTestCase("ﬀ ", "UNICODE_CI_RTRIM", 4)
     )
     testCases.foreach(t => {
-      val query = s"select luhn_check(${t.q})"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.r))
-      assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      // Unit test.
+      checkEvaluation(OctetLength(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select octet_length('${t.input}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
     })
   }
 
-  test("Levenshtein string expression with collation") {
-    // Supported collations
-    case class LevenshteinTestCase(
-      left: String, right: String, collationName: String, threshold: Option[Int], result: Int
+  test("Support `Luhncheck` string expression with collation") {
+    case class LuhncheckTestCase[R](input: String, collation: String, result: R)
+    val testCases = Seq(
+      LuhncheckTestCase("123", "UTF8_BINARY", false),
+      LuhncheckTestCase("123", "UTF8_BINARY_RTRIM", false),
+      LuhncheckTestCase("000", "UTF8_LCASE", true),
+      LuhncheckTestCase("000", "UTF8_LCASE_RTRIM", true),
+      LuhncheckTestCase("111", "UNICODE", false),
+      LuhncheckTestCase("111", "UNICODE_RTRIM", false),
+      LuhncheckTestCase("222", "UNICODE_CI", false),
+      LuhncheckTestCase("222", "UNICODE_CI_RTRIM", false)
     )
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(Luhncheck(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select luhn_check(${t.input})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
+      }
+    })
+  }
+
+  test("Support `Levenshtein` string expression with collation") {
+    case class LevenshteinTestCase[R](
+        left: String,
+        right: String,
+        collation: String,
+        threshold: Option[Integer],
+        result: R)
     val testCases = Seq(
-      LevenshteinTestCase("kitten", "sitTing", "UTF8_BINARY", None, result = 4),
-      LevenshteinTestCase("kitten", "sitTing", "UTF8_LCASE", None, result = 4),
-      LevenshteinTestCase("kitten", "sitTing", "UNICODE", Some(3), result = -1),
-      LevenshteinTestCase("kitten", "sitTing", "UNICODE_CI", Some(3), result = -1)
+      LevenshteinTestCase("kitten", "sitTing", "UTF8_BINARY", None, 4),
+      LevenshteinTestCase("kitten", "sitTing  ", "UTF8_BINARY_RTRIM", None, 6),
+      LevenshteinTestCase("kitten", "sitTing", "UTF8_LCASE", None, 4),
+      LevenshteinTestCase("kitten", "sitTing  ", "UTF8_LCASE", None, 6),
+      LevenshteinTestCase("kitten", "sitTing", "UNICODE", Some(3), -1),
+      LevenshteinTestCase("kitten", "sitTing ", "UNICODE_RTRIM", Some(3), -1),
+      LevenshteinTestCase("kitten", "sitTing", "UNICODE_CI", Some(3), -1),
+      LevenshteinTestCase("kitten ", "sitTing ", "UNICODE_CI_RTRIM", Some(3), -1),
+      LevenshteinTestCase("kitten", "sitTing", "UNICODE_CI", Some(3), -1),
+      // Levenshtein function is currently not collation-aware (not considering case or accent).
+      LevenshteinTestCase("gr", "GR", "UNICODE_CI_AI", None, 2),
+      LevenshteinTestCase("groß", "Größer", "UNICODE_CI_AI", None, 4)
     )
     testCases.foreach(t => {
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collationName) {
+      // Unit test.
+      val left = Literal.create(t.left, StringType(t.collation))
+      val right = Literal.create(t.right, StringType(t.collation))
+      val threshold = t.threshold.map(Literal(_))
+      checkEvaluation(Levenshtein(left, right, threshold), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
         val th = if (t.threshold.isDefined) s", ${t.threshold.get}" else ""
         val query = s"select levenshtein('${t.left}', '${t.right}'$th)"
-        // Result & data type
         checkAnswer(sql(query), Row(t.result))
         assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
       }
     })
   }
 
-  test("Support IsValidUTF8 string expression with collation") {
-    // Supported collations
-    case class IsValidUTF8TestCase(input: String, collationName: String, result: Any)
+  test("Support `IsValidUTF8` string expression with collation") {
+    case class IsValidUTF8TestCase[R](input: Any, collation: String, result: R)
     val testCases = Seq(
-      IsValidUTF8TestCase("null", "UTF8_BINARY", result = null),
-      IsValidUTF8TestCase("''", "UTF8_LCASE", result = true),
-      IsValidUTF8TestCase("'abc'", "UNICODE", result = true),
-      IsValidUTF8TestCase("x'FF'", "UNICODE_CI", result = false)
+      IsValidUTF8TestCase(null, "UTF8_BINARY", null),
+      IsValidUTF8TestCase(null, "UTF8_BINARY_RTRIM", null),
+      IsValidUTF8TestCase("", "UTF8_LCASE", true),
+      IsValidUTF8TestCase("", "UTF8_LCASE_RTRIM", true),
+      IsValidUTF8TestCase("abc", "UNICODE", true),
+      IsValidUTF8TestCase("abc", "UNICODE_RTRIM", true),
+      IsValidUTF8TestCase("hello", "UNICODE_CI", true),
+      IsValidUTF8TestCase("hello", "UNICODE_CI_RTRIM", true),
+      IsValidUTF8TestCase("hello", "UNICODE_CI", true),
+      IsValidUTF8TestCase("ćao", "SR_CI_AI", true)
     )
-    testCases.foreach { testCase =>
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
-        val query = s"SELECT is_valid_utf8(${testCase.input})"
-        // Result & data type
-        checkAnswer(sql(query), Row(testCase.result))
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(IsValidUTF8(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val str = if (t.input == null) "null" else s"'${t.input}'"
+        val query = s"select is_valid_utf8($str)"
+        checkAnswer(sql(query), Row(t.result))
         assert(sql(query).schema.fields.head.dataType.sameType(BooleanType))
       }
-    }
+    })
   }
 
-  test("Support MakeValidUTF8 string expression with collation") {
-    // Supported collations
-    case class MakeValidUTF8TestCase(input: String, collationName: String, result: Any)
+  test("Support `MakeValidUTF8` string expression with collation") {
+    case class MakeValidUTF8TestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      MakeValidUTF8TestCase("null", "UTF8_BINARY", result = null),
-      MakeValidUTF8TestCase("''", "UTF8_LCASE", result = ""),
-      MakeValidUTF8TestCase("'abc'", "UNICODE", result = "abc"),
-      MakeValidUTF8TestCase("x'FF'", "UNICODE_CI", result = "\uFFFD")
+      MakeValidUTF8TestCase(null, "UTF8_BINARY", null),
+      MakeValidUTF8TestCase(null, "UTF8_BINARY_RTRIM", null),
+      MakeValidUTF8TestCase("", "UTF8_LCASE", ""),
+      MakeValidUTF8TestCase("", "UTF8_LCASE_RTRIM", ""),
+      MakeValidUTF8TestCase("abc", "UNICODE", "abc"),
+      MakeValidUTF8TestCase("abc", "UNICODE_RTRIM", "abc"),
+      MakeValidUTF8TestCase("hello", "UNICODE_CI", "hello"),
+      MakeValidUTF8TestCase("hello", "UNICODE_CI_RTRIM", "hello")
     )
-    testCases.foreach { testCase =>
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
-        val query = s"SELECT make_valid_utf8(${testCase.input})"
-        // Result & data type
-        checkAnswer(sql(query), Row(testCase.result))
-        val dataType = StringType(testCase.collationName)
-        assert(sql(query).schema.fields.head.dataType.sameType(dataType))
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(MakeValidUTF8(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val str = if (t.input == null) "null" else s"'${t.input}'"
+        val query = s"select make_valid_utf8($str)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
       }
-    }
+    })
   }
 
-  test("Support ValidateUTF8 string expression with collation") {
-    // Supported collations
-    case class ValidateUTF8TestCase(input: String, collationName: String, result: Any)
+  test("Support `ValidateUTF8` string expression with collation") {
+    case class ValidateUTF8TestCase[R](input: String, collation: String, result: R)
     val testCases = Seq(
-      ValidateUTF8TestCase("null", "UTF8_BINARY", result = null),
-      ValidateUTF8TestCase("''", "UTF8_LCASE", result = ""),
-      ValidateUTF8TestCase("'abc'", "UNICODE", result = "abc"),
-      ValidateUTF8TestCase("x'FF'", "UNICODE_CI", result = None)
+      ValidateUTF8TestCase(null, "UTF8_BINARY", null),
+      ValidateUTF8TestCase(null, "UTF8_BINARY_RTRIM", null),
+      ValidateUTF8TestCase("", "UTF8_LCASE", ""),
+      ValidateUTF8TestCase("", "UTF8_LCASE_RTRIM", ""),
+      ValidateUTF8TestCase("abc", "UNICODE", "abc"),
+      ValidateUTF8TestCase("abc", "UNICODE_RTRIM", "abc"),
+      ValidateUTF8TestCase("hello", "UNICODE_CI", "hello"),
+      ValidateUTF8TestCase("hello", "UNICODE_CI_RTRIM", "hello")
     )
-    testCases.foreach { testCase =>
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
-        val query = s"SELECT validate_utf8(${testCase.input})"
-        if (testCase.result == None) {
-          // Exception thrown
-          checkError(
-            exception = intercept[SparkIllegalArgumentException] {
-              sql(query).collect()
-            },
-            errorClass = "INVALID_UTF8_STRING",
-            parameters = Map("str" -> "\\xFF")
-          )
-        } else {
-          // Result & data type
-          checkAnswer(sql(query), Row(testCase.result))
-          val dataType = StringType(testCase.collationName)
-          assert(sql(query).schema.fields.head.dataType.sameType(dataType))
-        }
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(ValidateUTF8(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val str = if (t.input == null) "null" else s"'${t.input}'"
+        val query = s"select validate_utf8($str)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
       }
-    }
+    })
   }
 
-  test("Support TryValidateUTF8 string expression with collation") {
-    // Supported collations
-    case class ValidateUTF8TestCase(input: String, collationName: String, result: Any)
+  test("Support `TryValidateUTF8` string expression with collation") {
+    case class ValidateUTF8TestCase(input: String, collation: String, result: Any)
     val testCases = Seq(
-      ValidateUTF8TestCase("null", "UTF8_BINARY", result = null),
-      ValidateUTF8TestCase("''", "UTF8_LCASE", result = ""),
-      ValidateUTF8TestCase("'abc'", "UNICODE", result = "abc"),
-      ValidateUTF8TestCase("x'FF'", "UNICODE_CI", result = null)
+      ValidateUTF8TestCase(null, "UTF8_BINARY", null),
+      ValidateUTF8TestCase(null, "UTF8_BINARY_RTRIM", null),
+      ValidateUTF8TestCase("", "UTF8_LCASE", ""),
+      ValidateUTF8TestCase("", "UTF8_LCASE_RTRIM", ""),
+      ValidateUTF8TestCase("abc", "UNICODE", "abc"),
+      ValidateUTF8TestCase("abc", "UNICODE_RTRIM", "abc"),
+      ValidateUTF8TestCase("hello", "UNICODE_CI", "hello"),
+      ValidateUTF8TestCase("hello", "UNICODE_CI_RTRIM", "hello")
     )
-    testCases.foreach { testCase =>
-      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> testCase.collationName) {
-        val query = s"SELECT try_validate_utf8(${testCase.input})"
-        // Result & data type
-        checkAnswer(sql(query), Row(testCase.result))
-        assert(sql(query).schema.fields.head.dataType.sameType(StringType(testCase.collationName)))
+    testCases.foreach(t => {
+      // Unit test.
+      checkEvaluation(TryValidateUTF8(Literal.create(t.input, StringType(t.collation))), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val str = if (t.input == null) "null" else s"'${t.input}'"
+        val query = s"select try_validate_utf8($str)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
       }
-    }
+    })
   }
 
-  test("Support Left/Right/Substr with collation") {
-    case class SubstringTestCase(
-        method: String,
+  test("Support `Substring` string expression with collation") {
+    case class SubstringTestCase[R](
         str: String,
-        len: String,
-        pad: Option[String],
+        pos: Integer,
+        len: Option[Integer],
         collation: String,
-        result: Row) {
-      val strString = if (str == "null") "null" else s"'$str'"
-      val query =
-        s"SELECT $method(collate($strString, '$collation')," +
-          s" $len${pad.map(p => s", '$p'").getOrElse("")})"
-    }
+        result: R)
+    val testCases = Seq(
+      SubstringTestCase("example", 1, Some(100), "UTF8_LCASE", "example"),
+      SubstringTestCase("example  ", 1, Some(100), "UTF8_LCASE_RTRIM", "example  "),
+      SubstringTestCase("example", 2, Some(2), "UTF8_BINARY", "xa"),
+      SubstringTestCase("example", 0, Some(0), "UNICODE", ""),
+      SubstringTestCase("example", 0, Some(0), "UNICODE_RTRIM", ""),
+      SubstringTestCase("example", -3, Some(2), "UNICODE_CI", "pl"),
+      SubstringTestCase("example ", -3, Some(2), "UNICODE_CI_RTRIM", "le"),
+      SubstringTestCase(" a世a ", 2, Some(3), "UTF8_LCASE", "a世a"),
+      SubstringTestCase("", 1, Some(1), "UTF8_LCASE", ""),
+      SubstringTestCase("", 1, Some(1), "UNICODE", ""),
+      SubstringTestCase(null, 1, None, "UTF8_BINARY", null),
+      SubstringTestCase(null, 1, Some(1), "UNICODE_CI", null),
+      SubstringTestCase(null, null, Some(null), "UTF8_BINARY", null),
+      SubstringTestCase(null, null, None, "UNICODE_CI", null),
+      SubstringTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", null, None, "UTF8_BINARY", null),
+      SubstringTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", null, None, "UTF8_BINARY_RTRIM", null),
+      SubstringTestCase("", null, None, "UNICODE_CI", null),
+      SubstringTestCase("", null, None, "UNICODE_CI", null),
+      SubstringTestCase("xnigħat", 4, Some(2), "MT_CI_AI", "għ")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val pos = Literal.create(t.pos, IntegerType)
+      val len = Literal.create(t.len.getOrElse(Integer.MAX_VALUE), IntegerType)
+      checkEvaluation(Substring(str, pos, len), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val input = if (t.str == null) "null" else s"'${t.str}'"
+        val length = if (t.len.isDefined) s", ${t.len.get}" else ""
+        val query = s"select substring($input, ${t.pos}$length)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
 
-    val checks = Seq(
-      SubstringTestCase("substr", "example", "1", Some("100"), "utf8_lcase", Row("example")),
-      SubstringTestCase("substr", "example", "2", Some("2"), "utf8_binary", Row("xa")),
-      SubstringTestCase("right", "", "1", None, "utf8_lcase", Row("")),
-      SubstringTestCase("substr", "example", "0", Some("0"), "unicode", Row("")),
-      SubstringTestCase("substr", "example", "-3", Some("2"), "unicode_ci", Row("pl")),
-      SubstringTestCase("substr", " a世a ", "2", Some("3"), "utf8_lcase", Row("a世a")),
-      SubstringTestCase("left", " a世a ", "3", None, "utf8_binary", Row(" a世")),
-      SubstringTestCase("right", " a世a ", "3", None, "unicode", Row("世a ")),
-      SubstringTestCase("left", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "3", None, "unicode_ci", Row("ÀÃÂ")),
-      SubstringTestCase("right", "ÀÃÂĀĂȦÄäâãȻȻȻȻȻǢǼÆ", "3", None, "utf8_lcase", Row("ǢǼÆ")),
-      SubstringTestCase("substr", "", "1", Some("1"), "utf8_lcase", Row("")),
-      SubstringTestCase("substr", "", "1", Some("1"), "unicode", Row("")),
-      SubstringTestCase("left", "", "1", None, "utf8_binary", Row("")),
-      SubstringTestCase("left", "null", "1", None, "utf8_lcase", Row(null)),
-      SubstringTestCase("right", "null", "1", None, "unicode", Row(null)),
-      SubstringTestCase("substr", "null", "1", None, "utf8_binary", Row(null)),
-      SubstringTestCase("substr", "null", "1", Some("1"), "unicode_ci", Row(null)),
-      SubstringTestCase("left", "null", "null", None, "utf8_lcase", Row(null)),
-      SubstringTestCase("right", "null", "null", None, "unicode", Row(null)),
-      SubstringTestCase("substr", "null", "null", Some("null"), "utf8_binary", Row(null)),
-      SubstringTestCase("substr", "null", "null", None, "unicode_ci", Row(null)),
-      SubstringTestCase("left", "ÀÃÂȦÄäåäáâãȻȻȻǢǼÆ", "null", None, "utf8_lcase", Row(null)),
-      SubstringTestCase("right", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "null", None, "unicode", Row(null)),
-      SubstringTestCase("substr", "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "null", None, "utf8_binary", Row(null)),
-      SubstringTestCase("substr", "", "null", None, "unicode_ci", Row(null))
+  test("Support `Left` string expression with collation") {
+    case class LeftTestCase[R](str: String, len: Integer, collation: String, result: R)
+    val testCases = Seq(
+      LeftTestCase(null, null, "UTF8_BINARY", null),
+      LeftTestCase(null, null, "UTF8_BINARY_RTRIM", null),
+      LeftTestCase(" a世a ", 3, "UTF8_LCASE", " a世"),
+      LeftTestCase(" a世a ", 3, "UTF8_LCASE_RTRIM", " a世"),
+      LeftTestCase("", 1, "UNICODE", ""),
+      LeftTestCase("", 1, "UNICODE_RTRIM", ""),
+      LeftTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE", "ÀÃÂ"),
+      LeftTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE_RTRIM", "ÀÃÂ"),
+      LeftTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE", "ÀÃÂ"),
+      LeftTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 7, "NO_AI", "ÀÃÂĀĂȦÄ")
     )
+    testCases.foreach(t => {
+      // Unit test.
+      val str = Literal.create(t.str, StringType)
+      val len = Literal.create(t.len, IntegerType)
+      checkEvaluation(Left(str, len), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val input = if (t.str == null) "null" else s"'${t.str}'"
+        val query = s"select left($input, ${t.len})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
 
-    checks.foreach { check =>
-      // Result & data type
-      checkAnswer(sql(check.query), check.result)
-      assert(sql(check.query).schema.fields.head.dataType.sameType(StringType(check.collation)))
-    }
+  test("Support `Right` string expression with collation") {
+    case class RightTestCase[R](str: String, len: Integer, collation: String, result: R)
+    val testCases = Seq(
+      RightTestCase(null, null, "UTF8_BINARY", null),
+      RightTestCase(null, null, "UTF8_BINARY_RTRIM", null),
+      RightTestCase(" a世a ", 3, "UTF8_LCASE", "世a "),
+      RightTestCase(" a世a ", 3, "UTF8_LCASE_RTRIM", "世a "),
+      RightTestCase("", 1, "UNICODE", ""),
+      RightTestCase("", 1, "UNICODE_RTRIM", ""),
+      RightTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE", "ǢǼÆ"),
+      RightTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE_RTRIM", "ǢǼÆ"),
+      RightTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 3, "UNICODE", "ǢǼÆ"),
+      RightTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", 5, "NO_CI_AI", "ȻȻǢǼÆ")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      val str = Literal.create(t.str, StringType)
+      val len = Literal.create(t.len, IntegerType)
+      checkEvaluation(Right(str, len), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val input = if (t.str == null) "null" else s"'${t.str}'"
+        val query = s"select right($input, ${t.len})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
   }
 
-  test("Support StringRPad string expressions with collation") {
-    // Supported collations
-    case class StringRPadTestCase[R](s: String, len: Int, pad: String, c: String, result: R)
+  test("Support `StringRPad` string expression with collation") {
+    case class StringRPadTestCase[R](
+        str: String,
+        len: Integer,
+        pad: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
       StringRPadTestCase("", 5, " ", "UTF8_BINARY", "     "),
+      StringRPadTestCase("", 5, " ", "UTF8_BINARY_RTRIM", "     "),
       StringRPadTestCase("abc", 5, " ", "UNICODE", "abc  "),
+      StringRPadTestCase("ab c ", 5, " ", "UNICODE_RTRIM", "ab c "),
       StringRPadTestCase("Hello", 7, "Wörld", "UTF8_LCASE", "HelloWö"),
       StringRPadTestCase("1234567890", 5, "aaaAAa", "UNICODE_CI", "12345"),
+      StringRPadTestCase("12 34567890", 5, "aaaAAa", "UNICODE_CI_RTRIM", "12 34"),
       StringRPadTestCase("aaAA", 2, " ", "UTF8_BINARY", "aa"),
       StringRPadTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ℀℃", 2, "1", "UTF8_LCASE", "ÀÃ"),
       StringRPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "UNICODE", "ĂȦÄäåäáÀÃÂĀĂȦÄäåäáâã"),
-      StringRPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "aȦÄäa1a1")
+      StringRPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻ", "UNICODE_RTRIM", "ĂȦÄäåäáÀÃÂĀĂȦÄäåäáâã"),
+      StringRPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "aȦÄäa1a1"),
+      StringRPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI_RTRIM", "aȦÄäa1a1")
     )
     testCases.foreach(t => {
-      val query = s"SELECT rpad(collate('${t.s}', '${t.c}')," +
-        s" ${t.len}, collate('${t.pad}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // Implicit casting
-      checkAnswer(
-        sql(s"SELECT rpad(collate('${t.s}', '${t.c}'), ${t.len}, '${t.pad}')"),
-        Row(t.result))
-      checkAnswer(
-        sql(s"SELECT rpad('${t.s}', ${t.len}, collate('${t.pad}', '${t.c}'))"),
-        Row(t.result))
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val len = Literal.create(t.len, IntegerType)
+      val pad = Literal.create(t.pad, StringType(t.collation))
+      checkEvaluation(StringRPad(str, len, pad), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select rpad('${t.str}', ${t.len}, '${t.pad}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT rpad(collate('abcde', 'UNICODE_CI'), 1, collate('C', 'UTF8_LCASE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UNICODE_CI`, `string collate UTF8_LCASE`")
-    )
   }
 
-  test("Support StringLPad string expressions with collation") {
-    // Supported collations
-    case class StringLPadTestCase[R](s: String, len: Int, pad: String, c: String, result: R)
+  test("Support `StringLPad` string expression with collation") {
+    case class StringLPadTestCase[R](
+        str: String,
+        len: Integer,
+        pad: String,
+        collation: String,
+        result: R)
     val testCases = Seq(
       StringLPadTestCase("", 5, " ", "UTF8_BINARY", "     "),
+      StringLPadTestCase("", 5, " ", "UTF8_BINARY_RTRIM", "     "),
       StringLPadTestCase("abc", 5, " ", "UNICODE", "  abc"),
       StringLPadTestCase("Hello", 7, "Wörld", "UTF8_LCASE", "WöHello"),
+      StringLPadTestCase("Hello", 7, "W örld", "UTF8_LCASE_RTRIM", "W Hello"),
       StringLPadTestCase("1234567890", 5, "aaaAAa", "UNICODE_CI", "12345"),
       StringLPadTestCase("aaAA", 2, " ", "UTF8_BINARY", "aa"),
       StringLPadTestCase("ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ℀℃", 2, "1", "UTF8_LCASE", "ÀÃ"),
       StringLPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻȻȻȻǢǼÆ", "UNICODE", "ÀÃÂĀĂȦÄäåäáâãĂȦÄäåäá"),
-      StringLPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "a1a1aȦÄä")
+      StringLPadTestCase("ĂȦÄäåäá", 20, "ÀÃÂĀĂȦÄäåäáâãȻȻ", "UNICODE_RTRIM", "ÀÃÂĀĂȦÄäåäáâãĂȦÄäåäá"),
+      StringLPadTestCase("aȦÄä", 8, "a1", "UNICODE_CI", "a1a1aȦÄä"),
+      StringLPadTestCase("aȦÄ ", 8, "a1", "UNICODE_CI_RTRIM", "a1a1aȦÄ ")
     )
     testCases.foreach(t => {
-      val query = s"SELECT lpad(collate('${t.s}', '${t.c}')," +
-        s" ${t.len}, collate('${t.pad}', '${t.c}'))"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.c)))
-      // Implicit casting
-      checkAnswer(
-        sql(s"SELECT lpad(collate('${t.s}', '${t.c}'), ${t.len}, '${t.pad}')"),
-        Row(t.result))
-      checkAnswer(
-        sql(s"SELECT lpad('${t.s}', ${t.len}, collate('${t.pad}', '${t.c}'))"),
-        Row(t.result))
+      // Unit test.
+      val str = Literal.create(t.str, StringType(t.collation))
+      val len = Literal.create(t.len, IntegerType)
+      val pad = Literal.create(t.pad, StringType(t.collation))
+      checkEvaluation(StringLPad(str, len, pad), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select lpad('${t.str}', ${t.len}, '${t.pad}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT lpad(collate('abcde', 'UNICODE_CI'), 1, collate('C', 'UTF8_LCASE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string collate UNICODE_CI`, `string collate UTF8_LCASE`")
-    )
-  }
-
-  test("Support StringLPad string expressions with explicit collation on second parameter") {
-    val query = "SELECT lpad('abc', collate('5', 'unicode_ci'), ' ')"
-    checkAnswer(sql(query), Row("  abc"))
-    assert(sql(query).schema.fields.head.dataType.sameType(StringType(0)))
   }
 
-  test("Support Locate string expression with collation") {
-    case class StringLocateTestCase[R](substring: String, string: String, start: Integer,
-        c: String, result: R)
+  test("Support `StringLocate` string expression with collation") {
+    case class StringLocateTestCase[R](
+        substr: String,
+        str: String,
+        start: Integer,
+        collation: String,
+        result: R)
     val testCases = Seq(
-      // scalastyle:off
       StringLocateTestCase("aa", "aaads", 0, "UTF8_BINARY", 0),
+      StringLocateTestCase(" ", "", 1, "UTF8_BINARY_RTRIM", 1),
+      StringLocateTestCase(" abc ", "   cdfg     abc ", 1, "UTF8_BINARY_RTRIM", 12),
       StringLocateTestCase("aa", "Aaads", 0, "UTF8_LCASE", 0),
       StringLocateTestCase("界x", "test大千世界X大千世界", 1, "UTF8_LCASE", 8),
       StringLocateTestCase("aBc", "abcabc", 4, "UTF8_LCASE", 4),
+      StringLocateTestCase("aa", "Aaads", 0, "UTF8_LCASE_RTRIM", 0),
+      StringLocateTestCase("界  ", "test大千世界X大千世界", 1, "UTF8_LCASE_RTRIM", 8),
+      StringLocateTestCase("aBc", "a bc abc ", 4, "UTF8_LCASE_RTRIM", 6),
       StringLocateTestCase("aa", "Aaads", 0, "UNICODE", 0),
       StringLocateTestCase("abC", "abCabC", 2, "UNICODE", 4),
+      StringLocateTestCase("aa", "Aaads", 0, "UNICODE_RTRIM", 0),
+      StringLocateTestCase("abC ", "ab C abC ", 2, "UNICODE_RTRIM", 6),
       StringLocateTestCase("aa", "Aaads", 0, "UNICODE_CI", 0),
-      StringLocateTestCase("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8)
-      // scalastyle:on
+      StringLocateTestCase("界x", "test大千世界X大千世界", 1, "UNICODE_CI", 8),
+      StringLocateTestCase("aa", "Aaads", 0, "UNICODE_CI_RTRIM", 0),
+      StringLocateTestCase(" 界", "test大千世界X大千世界", 1, "UNICODE_CI_RTRIM", 0),
+      StringLocateTestCase("oa", "TÖäöäoAoa", 1, "DE", 8),
+      StringLocateTestCase("oa", "TÖäöäoAoa", 1, "DE_CI", 6),
+      StringLocateTestCase("oa", "TÖäöäoAoa", 1, "DE_CI_AI", 2)
     )
+    val unsupportedTestCase = StringLocateTestCase("aa", "Aaads", 0, "UNICODE_AI", 1)
     testCases.foreach(t => {
-      val query = s"SELECT locate(collate('${t.substring}','${t.c}')," +
-        s"collate('${t.string}','${t.c}'),${t.start})"
-      // Result & data type
-      checkAnswer(sql(query), Row(t.result))
-      assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
-      // Implicit casting
-      checkAnswer(sql(s"SELECT locate(collate('${t.substring}','${t.c}')," +
-        s"'${t.string}',${t.start})"), Row(t.result))
-      checkAnswer(sql(s"SELECT locate('${t.substring}',collate('${t.string}'," +
-        s"'${t.c}'),${t.start})"), Row(t.result))
+      // Unit test.
+      val substr = Literal.create(t.substr, StringType(t.collation))
+      val str = Literal.create(t.str, StringType(t.collation))
+      val start = Literal.create(t.start, IntegerType)
+      checkEvaluation(StringLocate(substr, str, start), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val query = s"select locate('${t.substr}', '${t.str}', ${t.start})"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(IntegerType))
+      }
     })
-    // Collation mismatch
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT locate(collate('aBc', 'UTF8_BINARY'), collate('abcabc', 'UTF8_LCASE'), 4)")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
-    )
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val query =
+        s"select locate('${unsupportedTestCase.substr}', '${unsupportedTestCase.str}', " +
+        s"${unsupportedTestCase.start})"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"locate('aa' collate UNICODE_AI, 'Aaads' collate UNICODE_AI, 0)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'aa' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "locate('aa', 'Aaads', 0)", start = 7, stop = 30)
+      )
+    }
   }
 
-  test("StringTrim* functions - unit tests for both paths (codegen and eval)") {
-    def evalStringTrim(src: Any, trim: Any, result: String): Unit = {
-      Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach { collation =>
-        val dt: DataType = StringType(collation)
-        checkEvaluation(StringTrim(Literal.create(src, dt), Literal.create(trim, dt)), result)
-        checkEvaluation(StringTrimLeft(Literal.create(src, dt), Literal.create(trim, dt)), result)
-        checkEvaluation(StringTrimRight(Literal.create(src, dt), Literal.create(trim, dt)), result)
+  test("Support `StringTrimLeft` string expression with collation") {
+    case class StringTrimLeftTestCase[R](
+        srcStr: String,
+        trimStr: Option[String],
+        collation: String,
+        result: R)
+    val testCases = Seq(
+      StringTrimLeftTestCase("xxasdxx", Some("x"), "UTF8_BINARY", "asdxx"),
+      StringTrimLeftTestCase("  xxasdxx", Some("x"), "UTF8_BINARY_RTRIM", "  xxasdxx"),
+      StringTrimLeftTestCase("  xxasdxx", Some("x "), "UTF8_BINARY_RTRIM", "asdxx"),
+      StringTrimLeftTestCase("  xxasdxx  ", Some("x "), "UTF8_BINARY_RTRIM", "asdxx  "),
+      StringTrimLeftTestCase("xxasdxx", Some("X"), "UTF8_LCASE", "asdxx"),
+      StringTrimLeftTestCase("xxasdxx  ", Some("X "), "UTF8_LCASE_RTRIM", "asdxx  "),
+      StringTrimLeftTestCase("xxasdxx  ", Some("X"), "UTF8_LCASE_RTRIM", "asdxx  "),
+      StringTrimLeftTestCase(" xxasdxx  ", Some("X "), "UTF8_LCASE_RTRIM", "asdxx  "),
+      StringTrimLeftTestCase("xxasdxx", Some("y"), "UNICODE", "xxasdxx"),
+      StringTrimLeftTestCase("xxasdxx", Some("y"), "UNICODE_RTRIM", "xxasdxx"),
+      StringTrimLeftTestCase("  asd  ", None, "UNICODE_RTRIM", "asd  "),
+      StringTrimLeftTestCase("  asd  ", None, "UNICODE_CI", "asd  "),
+      StringTrimLeftTestCase("  asd  ", Some("A"), "UNICODE_CI_RTRIM", "  asd  "),
+      StringTrimLeftTestCase("  asd  ", None, "UNICODE_CI", "asd  "),
+      StringTrimLeftTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR", "ĆčČcCabCcČčĆć"),
+      StringTrimLeftTestCase("ćĆčČcCabCcČčĆć", Some("Ć"), "SR_CI", "čČcCabCcČčĆć"),
+      StringTrimLeftTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR_CI_AI", "abCcČčĆć")
+    )
+    val unsupportedTestCase = StringTrimLeftTestCase("xxasdxx", Some("x"), "UNICODE_AI", null)
+    testCases.foreach(t => {
+      // Unit test.
+      val srcStr = Literal.create(t.srcStr, StringType(t.collation))
+      val trimStr = t.trimStr.map(Literal.create(_, StringType(t.collation)))
+      checkEvaluation(StringTrimLeft(srcStr, trimStr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val trimString = if (t.trimStr.isDefined) s"'${t.trimStr.get}', " else ""
+        val query = s"select ltrim($trimString'${t.srcStr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
       }
+    })
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val trimString = s"'${unsupportedTestCase.trimStr.get}', "
+      val query = s"select ltrim($trimString'${unsupportedTestCase.srcStr}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> "\"TRIM(LEADING 'x' collate UNICODE_AI FROM 'xxasdxx' collate UNICODE_AI)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'xxasdxx' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "ltrim('x', 'xxasdxx')", start = 7, stop = 27)
+      )
     }
-    // General edge cases and basic tests.
-    evalStringTrim(null, null, null)
-    evalStringTrim(null, "", null)
-    evalStringTrim(null, "a", null)
-    evalStringTrim("", null, null)
-    evalStringTrim("a", null, null)
-    evalStringTrim("", "", "")
-    evalStringTrim("", " ", "")
-    evalStringTrim("", "a", "")
-    evalStringTrim("", "aaa", "")
-    evalStringTrim(" ", "", " ")
-    evalStringTrim("a", "", "a")
-    evalStringTrim("aaa", "", "aaa")
-    evalStringTrim(" ", " ", "")
-    evalStringTrim(" ", "   ", "")
-    evalStringTrim("   ", " ", "")
-    evalStringTrim("   ", "   ", "")
-    evalStringTrim("a", "aaa", "")
-    evalStringTrim("aaa", "a", "")
-    evalStringTrim("aaa", "aaa", "")
-    evalStringTrim("abc", "cba", "")
-    evalStringTrim("cba", "abc", "")
-
-    // Without trimString param.
-    checkEvaluation(
-      StringTrim(Literal.create( "  asd  ", StringType("UTF8_BINARY"))), "asd")
-    checkEvaluation(
-      StringTrimLeft(Literal.create("  asd  ", StringType("UTF8_LCASE"))), "asd  ")
-    checkEvaluation(StringTrimRight(
-      Literal.create("  asd  ", StringType("UTF8_BINARY"))), "  asd")
-
-    // With trimString param.
-    checkEvaluation(
-      StringTrim(
-        Literal.create("  asd  ", StringType("UTF8_BINARY")),
-        Literal.create(" ", StringType("UTF8_BINARY"))),
-      "asd")
-    checkEvaluation(
-      StringTrimLeft(
-        Literal.create("  asd  ", StringType("UTF8_LCASE")),
-        Literal.create(" ", StringType("UTF8_LCASE"))),
-      "asd  ")
-    checkEvaluation(
-      StringTrimRight(
-        Literal.create("  asd  ", StringType("UTF8_BINARY")),
-        Literal.create(" ", StringType("UTF8_BINARY"))),
-      "  asd")
-
-    checkEvaluation(
-      StringTrim(
-        Literal.create("xxasdxx", StringType("UTF8_BINARY")),
-        Literal.create("x", StringType("UTF8_BINARY"))),
-      "asd")
-    checkEvaluation(
-      StringTrimLeft(
-        Literal.create("xxasdxx", StringType("UTF8_LCASE")),
-        Literal.create("x", StringType("UTF8_LCASE"))),
-      "asdxx")
-    checkEvaluation(
-      StringTrimRight(
-        Literal.create("xxasdxx", StringType("UTF8_BINARY")),
-        Literal.create("x", StringType("UTF8_BINARY"))),
-      "xxasd")
   }
 
-  test("StringTrim* functions - E2E tests") {
-    case class StringTrimTestCase(
-      collation: String,
-      trimFunc: String,
-      sourceString: String,
-      hasTrimString: Boolean,
-      trimString: String,
-      expectedResultString: String)
-
+  test("Support `StringTrimRight` string expression with collation") {
+    case class StringTrimRightTestCase[R](
+        srcStr: String,
+        trimStr: Option[String],
+        collation: String,
+        result: R)
     val testCases = Seq(
-      StringTrimTestCase("UTF8_BINARY", "TRIM", "  asd  ", false, null, "asd"),
-      StringTrimTestCase("UTF8_BINARY", "BTRIM", "  asd  ", true, null, null),
-      StringTrimTestCase("UTF8_BINARY", "LTRIM", "xxasdxx", true, "x", "asdxx"),
-      StringTrimTestCase("UTF8_BINARY", "RTRIM", "xxasdxx", true, "x", "xxasd"),
-
-      StringTrimTestCase("UTF8_LCASE", "TRIM", "  asd  ", true, null, null),
-      StringTrimTestCase("UTF8_LCASE", "BTRIM", "xxasdxx", true, "x", "asd"),
-      StringTrimTestCase("UTF8_LCASE", "LTRIM", "xxasdxx", true, "x", "asdxx"),
-      StringTrimTestCase("UTF8_LCASE", "RTRIM", "  asd  ", false, null, "  asd"),
-
-      StringTrimTestCase("UTF8_BINARY", "TRIM", "xxasdxx", true, "x", "asd"),
-      StringTrimTestCase("UTF8_BINARY", "BTRIM", "xxasdxx", true, "x", "asd"),
-      StringTrimTestCase("UTF8_BINARY", "LTRIM", "  asd  ", false, null, "asd  "),
-      StringTrimTestCase("UTF8_BINARY", "RTRIM", "  asd  ", true, null, null)
-
-      // Other more complex cases can be found in unit tests in CollationSupportSuite.java.
+      StringTrimRightTestCase("  xxasdxx", Some("x  "), "UTF8_BINARY", "  xxasd"),
+      StringTrimRightTestCase("xxasdxx  ", Some("x "), "UTF8_BINARY_RTRIM", "xxasd"),
+      StringTrimRightTestCase("xxasdxx  ", Some("x"), "UTF8_BINARY_RTRIM", "xxasd  "),
+      StringTrimRightTestCase(" xxasdxx   ", Some("x "), "UTF8_BINARY_RTRIM", " xxasd"),
+      StringTrimRightTestCase(" xxasdxx", Some("x"), "UTF8_BINARY_RTRIM", " xxasd"),
+      StringTrimRightTestCase("xxasdxx", Some("X"), "UTF8_LCASE", "xxasd"),
+      StringTrimRightTestCase("xxasdxx  ", Some("X "), "UTF8_LCASE_RTRIM", "xxasd"),
+      StringTrimRightTestCase("xxasdxx  ", Some("X"), "UTF8_LCASE_RTRIM", "xxasd  "),
+      StringTrimRightTestCase(" xxasdxx   ", Some("X "), "UTF8_LCASE_RTRIM", " xxasd"),
+      StringTrimRightTestCase(" xxasdxx", Some("x"), "UTF8_LCASE_RTRIM", " xxasd"),
+      StringTrimRightTestCase("xxasdxx", Some("y"), "UNICODE", "xxasdxx"),
+      StringTrimRightTestCase("xxasdxx", Some("y"), "UNICODE_RTRIM", "xxasdxx"),
+      StringTrimRightTestCase("  asd  ", None, "UNICODE_RTRIM", "  asd"),
+      StringTrimRightTestCase("  asd  ", None, "UNICODE_CI", "  asd"),
+      StringTrimRightTestCase("  asd  ", Some("D"), "UNICODE_CI_RTRIM", "  as  "),
+      StringTrimRightTestCase("  asd  ", None, "UNICODE_CI", "  asd"),
+      StringTrimRightTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR", "ćĆčČcCabCcČčĆ"),
+      StringTrimRightTestCase("ćĆčČcCabCcČčĆć", Some("Ć"), "SR_CI", "ćĆčČcCabCcČč"),
+      StringTrimRightTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR_CI_AI", "ćĆčČcCab")
     )
-
-    testCases.foreach(testCase => {
-      var df: DataFrame = null
-
-      if (testCase.trimFunc.equalsIgnoreCase("BTRIM")) {
-        // BTRIM has arguments in (srcStr, trimStr) order
-        df = sql(s"SELECT ${testCase.trimFunc}(" +
-          s"COLLATE('${testCase.sourceString}', '${testCase.collation}')" +
-            (if (!testCase.hasTrimString) ""
-            else if (testCase.trimString == null) ", null"
-            else s", '${testCase.trimString}'") +
-          ")")
-      }
-      else {
-        // While other functions have arguments in (trimStr, srcStr) order
-        df = sql(s"SELECT ${testCase.trimFunc}(" +
-            (if (!testCase.hasTrimString) ""
-            else if (testCase.trimString == null) "null, "
-            else s"'${testCase.trimString}', ") +
-          s"COLLATE('${testCase.sourceString}', '${testCase.collation}')" +
-          ")")
+    val unsupportedTestCase = StringTrimRightTestCase("xxasdxx", Some("x"), "UNICODE_AI", "xxasd")
+    testCases.foreach(t => {
+      // Unit test.
+      val srcStr = Literal.create(t.srcStr, StringType(t.collation))
+      val trimStr = t.trimStr.map(Literal.create(_, StringType(t.collation)))
+      checkEvaluation(StringTrimRight(srcStr, trimStr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val trimString = if (t.trimStr.isDefined) s"'${t.trimStr.get}', " else ""
+        val query = s"select rtrim($trimString'${t.srcStr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
       }
-
-      checkAnswer(df = df, expectedAnswer = Row(testCase.expectedResultString))
     })
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val trimString = s"'${unsupportedTestCase.trimStr.get}', "
+      val query = s"select rtrim($trimString'${unsupportedTestCase.srcStr}')"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(query).collect()
+        },
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
+        parameters = Map(
+          "sqlExpr" -> ("\"TRIM(TRAILING 'x' collate UNICODE_AI FROM 'xxasdxx'" +
+            " collate UNICODE_AI)\""),
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'xxasdxx' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "rtrim('x', 'xxasdxx')", start = 7, stop = 27)
+      )
+    }
   }
 
-  test("StringTrim* functions - implicit collations") {
-    checkAnswer(
-      df = sql("SELECT TRIM(COLLATE('x', 'UTF8_BINARY'), COLLATE('xax', 'UTF8_BINARY'))"),
-      expectedAnswer = Row("a"))
-    checkAnswer(
-      df = sql("SELECT BTRIM(COLLATE('xax', 'UTF8_LCASE'), "
-        + "COLLATE('x', 'UTF8_LCASE'))"),
-      expectedAnswer = Row("a"))
-    checkAnswer(
-      df = sql("SELECT LTRIM(COLLATE('x', 'UTF8_BINARY'), COLLATE('xax', 'UTF8_BINARY'))"),
-      expectedAnswer = Row("ax"))
-
-    checkAnswer(
-      df = sql("SELECT RTRIM('x', COLLATE('xax', 'UTF8_BINARY'))"),
-      expectedAnswer = Row("xa"))
-    checkAnswer(
-      df = sql("SELECT TRIM('x', COLLATE('xax', 'UTF8_LCASE'))"),
-      expectedAnswer = Row("a"))
-    checkAnswer(
-      df = sql("SELECT BTRIM('xax', COLLATE('x', 'UTF8_BINARY'))"),
-      expectedAnswer = Row("a"))
-
-    checkAnswer(
-      df = sql("SELECT LTRIM(COLLATE('x', 'UTF8_BINARY'), 'xax')"),
-      expectedAnswer = Row("ax"))
-    checkAnswer(
-      df = sql("SELECT RTRIM(COLLATE('x', 'UTF8_LCASE'), 'xax')"),
-      expectedAnswer = Row("xa"))
-    checkAnswer(
-      df = sql("SELECT TRIM(COLLATE('x', 'UTF8_BINARY'), 'xax')"),
-      expectedAnswer = Row("a"))
-  }
-
-  test("StringTrim* functions - collation type mismatch") {
-    List("TRIM", "LTRIM", "RTRIM").foreach(func => {
+  test("Support `StringTrim` string expression with collation") {
+    case class StringTrimTestCase[R](
+        srcStr: String,
+        trimStr: Option[String],
+        collation: String,
+        result: R)
+    val testCases = Seq(
+      StringTrimTestCase("xxasdxx", Some("x"), "UTF8_BINARY", "asd"),
+      StringTrimTestCase("xxasdxx  ", Some("x "), "UTF8_BINARY_RTRIM", "asd"),
+      StringTrimTestCase("xxasdxx  ", Some("x"), "UTF8_BINARY_RTRIM", "asd  "),
+      StringTrimTestCase(" xxasdxx   ", Some("x "), "UTF8_BINARY_RTRIM", "asd"),
+      StringTrimTestCase(" xxasdxx", Some("x"), "UTF8_BINARY_RTRIM", " xxasd"),
+      StringTrimTestCase("xxasdxx", Some("X"), "UTF8_LCASE", "asd"),
+      StringTrimTestCase("xxasdxx  ", Some("X "), "UTF8_LCASE_RTRIM", "asd"),
+      StringTrimTestCase("xxasdxx  ", Some("X"), "UTF8_LCASE_RTRIM", "asd  "),
+      StringTrimTestCase(" xxasdxx   ", Some("X "), "UTF8_LCASE_RTRIM", "asd"),
+      StringTrimTestCase(" xxasdxx", Some("x"), "UTF8_LCASE_RTRIM", " xxasd"),
+      StringTrimTestCase("xxasdxx", Some("y"), "UNICODE", "xxasdxx"),
+      StringTrimTestCase("xxasdxx", Some("y"), "UNICODE_RTRIM", "xxasdxx"),
+      StringTrimTestCase("  asd  ", None, "UNICODE_RTRIM", "asd"),
+      StringTrimTestCase("  asd  ", None, "UNICODE_CI", "asd"),
+      StringTrimTestCase("  asd  ", Some("D"), "UNICODE_CI_RTRIM", "  as  "),
+      StringTrimTestCase("  asd  ", None, "UNICODE_CI", "asd"),
+      StringTrimTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR", "ĆčČcCabCcČčĆ"),
+      StringTrimTestCase("ćĆčČcCabCcČčĆć", Some("Ć"), "SR_CI", "čČcCabCcČč"),
+      StringTrimTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR_CI_AI", "ab"),
+      StringTrimTestCase("   ćĆčČcCabCcČčĆć  ", None, "SR_CI_AI", "ćĆčČcCabCcČčĆć")
+    )
+    val unsupportedTestCase = StringTrimTestCase("xxasdxx", Some("x"), "UNICODE_AI", "asd")
+    testCases.foreach(t => {
+      // Unit test.
+      val srcStr = Literal.create(t.srcStr, StringType(t.collation))
+      val trimStr = t.trimStr.map(Literal.create(_, StringType(t.collation)))
+      checkEvaluation(StringTrim(srcStr, trimStr), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val trimString = if (t.trimStr.isDefined) s"'${t.trimStr.get}', " else ""
+        val query = s"select trim($trimString'${t.srcStr}')"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+    // Test unsupported collation.
+    withSQLConf(SQLConf.DEFAULT_COLLATION.key -> unsupportedTestCase.collation) {
+      val trimString = s"'${unsupportedTestCase.trimStr.get}', "
+      val query = s"select trim($trimString'${unsupportedTestCase.srcStr}')"
       checkError(
         exception = intercept[AnalysisException] {
-          sql("SELECT " + func + "(COLLATE('x', 'UTF8_LCASE'), COLLATE('xxaaaxx', 'UTF8_BINARY'))")
+          sql(query).collect()
         },
-        errorClass = "COLLATION_MISMATCH.EXPLICIT",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        sqlState = Some("42K09"),
         parameters = Map(
-          "explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
+          "sqlExpr" -> "\"TRIM(BOTH 'x' collate UNICODE_AI FROM 'xxasdxx' collate UNICODE_AI)\"",
+          "paramIndex" -> "first",
+          "inputSql" -> "\"'xxasdxx' collate UNICODE_AI\"",
+          "inputType" -> "\"STRING COLLATE UNICODE_AI\"",
+          "requiredType" -> "\"STRING\""),
+        context = ExpectedContext(fragment = "trim('x', 'xxasdxx')", start = 7, stop = 26)
       )
-    })
-    checkError(
-      exception = intercept[AnalysisException] {
-        sql("SELECT BTRIM(COLLATE('xxaaaxx', 'UTF8_BINARY'), COLLATE('x', 'UTF8_LCASE'))")
-      },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
-      parameters = Map(
-        "explicitTypes" -> "`string`, `string collate UTF8_LCASE`")
-    )
+    }
   }
 
-  // TODO: Add more tests for other string expressions
+  test("Support `StringTrimBoth` string expression with collation") {
+    case class StringTrimBothTestCase[R](
+        srcStr: String,
+        trimStr: Option[String],
+        collation: String,
+        result: R)
+    val testCases = Seq(
+      StringTrimBothTestCase("xxasdxx", Some("x"), "UTF8_BINARY", "asd"),
+      StringTrimBothTestCase("xxasdxx  ", Some("x "), "UTF8_BINARY_RTRIM", "asd"),
+      StringTrimBothTestCase("xxasdxx  ", Some("x"), "UTF8_BINARY_RTRIM", "asd  "),
+      StringTrimBothTestCase(" xxasdxx   ", Some("x "), "UTF8_BINARY_RTRIM", "asd"),
+      StringTrimBothTestCase(" xxasdxx", Some("x"), "UTF8_BINARY_RTRIM", " xxasd"),
+      StringTrimBothTestCase("xxasdxx", Some("X"), "UTF8_LCASE", "asd"),
+      StringTrimBothTestCase("xxasdxx  ", Some("X "), "UTF8_LCASE_RTRIM", "asd"),
+      StringTrimBothTestCase("xxasdxx  ", Some("X"), "UTF8_LCASE_RTRIM", "asd  "),
+      StringTrimBothTestCase(" xxasdxx   ", Some("X "), "UTF8_LCASE_RTRIM", "asd"),
+      StringTrimBothTestCase(" xxasdxx", Some("x"), "UTF8_LCASE_RTRIM", " xxasd"),
+      StringTrimBothTestCase("xxasdxx", Some("y"), "UNICODE", "xxasdxx"),
+      StringTrimBothTestCase("xxasdxx", Some("y"), "UNICODE_RTRIM", "xxasdxx"),
+      StringTrimBothTestCase("  asd  ", None, "UNICODE_RTRIM", "asd"),
+      StringTrimBothTestCase("  asd  ", None, "UNICODE_CI", "asd"),
+      StringTrimBothTestCase("  asd  ", Some("D"), "UNICODE_CI_RTRIM", "  as  "),
+      StringTrimBothTestCase("  asd  ", None, "UNICODE_CI", "asd"),
+      StringTrimBothTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR", "ĆčČcCabCcČčĆ"),
+      StringTrimBothTestCase("ćĆčČcCabCcČčĆć", Some("Ć"), "SR_CI", "čČcCabCcČč"),
+      StringTrimBothTestCase("ćĆčČcCabCcČčĆć", Some("ć"), "SR_CI_AI", "ab"),
+      StringTrimBothTestCase("  ćĆčČcCabCcČčĆć   ", None, "SR_CI_AI", "ćĆčČcCabCcČčĆć")
+    )
+    testCases.foreach(t => {
+      // Unit test.
+      val srcStr = Literal.create(t.srcStr, StringType(t.collation))
+      val trimStr = t.trimStr.map(Literal.create(_, StringType(t.collation)))
+      val replacement = StringTrim(srcStr, trimStr)
+      checkEvaluation(StringTrimBoth(srcStr, trimStr, replacement), t.result)
+      // E2E SQL test.
+      withSQLConf(SQLConf.DEFAULT_COLLATION.key -> t.collation) {
+        val trimString = if (t.trimStr.isDefined) s", '${t.trimStr.get}'" else ""
+        val query = s"select btrim('${t.srcStr}'$trimString)"
+        checkAnswer(sql(query), Row(t.result))
+        assert(sql(query).schema.fields.head.dataType.sameType(StringType(t.collation)))
+      }
+    })
+  }
 
 }
 // scalastyle:on nonascii
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
index e9e3432195a40..f0f81e713457b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala
@@ -29,12 +29,13 @@ import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable}
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.CatalogHelper
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec}
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.joins._
 import org.apache.spark.sql.internal.{SqlApiConf, SQLConf}
-import org.apache.spark.sql.types.{MapType, StringType, StructField, StructType}
+import org.apache.spark.sql.types.{ArrayType, IntegerType, MapType, Metadata, MetadataBuilder, StringType, StructField, StructType}
 
 class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   protected val v2Source = classOf[FakeV2ProviderWithCustomSchema].getName
@@ -43,43 +44,106 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   private val collationNonPreservingSources = Seq("orc", "csv", "json", "text")
   private val allFileBasedDataSources = collationPreservingSources ++  collationNonPreservingSources
 
+  @inline
+  private def isSortMergeForced: Boolean = {
+    SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD) == -1
+  }
+
+  private def checkRightTypeOfJoinUsed(queryPlan: SparkPlan): Unit = {
+    assert(
+      collectFirst(queryPlan) {
+        case _: SortMergeJoinExec => assert(isSortMergeForced)
+        case _: HashJoin => assert(!isSortMergeForced)
+      }.nonEmpty
+    )
+  }
+
+  private def checkCollationKeyInQueryPlan(queryPlan: SparkPlan, collationName: String): Unit = {
+    // Only if collation doesn't support binary equality, collation key should be injected.
+    if (!CollationFactory.fetchCollation(collationName).supportsBinaryEquality) {
+      assert(queryPlan.toString().contains("collationkey"))
+    } else {
+      assert(!queryPlan.toString().contains("collationkey"))
+    }
+  }
+
   test("collate returns proper type") {
-    Seq("utf8_binary", "utf8_lcase", "unicode", "unicode_ci").foreach { collationName =>
+    Seq(
+      "utf8_binary",
+      "utf8_lcase",
+      "unicode",
+      "unicode_ci",
+      "unicode_rtrim_ci",
+      "utf8_lcase_rtrim",
+      "utf8_binary_rtrim"
+    ).foreach { collationName =>
       checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
       val collationId = CollationFactory.collationNameToId(collationName)
-      assert(sql(s"select 'aaa' collate $collationName").schema(0).dataType
-        == StringType(collationId))
+      assert(
+        sql(s"select 'aaa' collate $collationName").schema(0).dataType
+        == StringType(collationId)
+      )
     }
   }
 
   test("collation name is case insensitive") {
-    Seq("uTf8_BiNaRy", "utf8_lcase", "uNicOde", "UNICODE_ci").foreach { collationName =>
+    Seq(
+      "uTf8_BiNaRy",
+      "utf8_lcase",
+      "uNicOde",
+      "UNICODE_ci",
+      "uNiCoDE_rtRIm_cI",
+      "UtF8_lCaSE_rtRIM",
+      "utf8_biNAry_RtRiM"
+    ).foreach { collationName =>
       checkAnswer(sql(s"select 'aaa' collate $collationName"), Row("aaa"))
       val collationId = CollationFactory.collationNameToId(collationName)
-      assert(sql(s"select 'aaa' collate $collationName").schema(0).dataType
-        == StringType(collationId))
+      assert(
+        sql(s"select 'aaa' collate $collationName").schema(0).dataType
+        == StringType(collationId)
+      )
     }
   }
 
   test("collation expression returns name of collation") {
-    Seq("utf8_binary", "utf8_lcase", "unicode", "unicode_ci").foreach { collationName =>
+    Seq(
+      "utf8_binary",
+      "utf8_lcase",
+      "unicode",
+      "unicode_ci",
+      "unicode_ci_rtrim",
+      "utf8_lcase_rtrim",
+      "utf8_binary_rtrim"
+    ).foreach { collationName =>
       checkAnswer(
-        sql(s"select collation('aaa' collate $collationName)"), Row(collationName.toUpperCase()))
+        sql(s"select collation('aaa' collate $collationName)"),
+        Row(collationName.toUpperCase())
+      )
     }
   }
 
   test("collate function syntax") {
     assert(sql(s"select collate('aaa', 'utf8_binary')").schema(0).dataType ==
       StringType("UTF8_BINARY"))
+    assert(sql(s"select collate('aaa', 'utf8_binary_rtrim')").schema(0).dataType ==
+      StringType("UTF8_BINARY_RTRIM"))
     assert(sql(s"select collate('aaa', 'utf8_lcase')").schema(0).dataType ==
       StringType("UTF8_LCASE"))
+    assert(sql(s"select collate('aaa', 'utf8_lcase_rtrim')").schema(0).dataType ==
+      StringType("UTF8_LCASE_RTRIM"))
   }
 
   test("collate function syntax with default collation set") {
     withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_LCASE") {
-      assert(sql(s"select collate('aaa', 'utf8_lcase')").schema(0).dataType ==
-        StringType("UTF8_LCASE"))
+      assert(
+        sql(s"select collate('aaa', 'utf8_lcase')").schema(0).dataType ==
+        StringType("UTF8_LCASE")
+      )
       assert(sql(s"select collate('aaa', 'UNICODE')").schema(0).dataType == StringType("UNICODE"))
+      assert(
+        sql(s"select collate('aaa', 'UNICODE_RTRIM')").schema(0).dataType ==
+        StringType("UNICODE_RTRIM")
+      )
     }
   }
 
@@ -90,7 +154,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"select collate($args)")
         },
-        errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+        condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
         sqlState = "42605",
         parameters = Map(
           "functionName" -> "`collate`",
@@ -105,7 +169,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   test("collate function invalid collation data type") {
     checkError(
       exception = intercept[AnalysisException](sql("select collate('abc', 123)")),
-      errorClass = "UNEXPECTED_INPUT_TYPE",
+      condition = "UNEXPECTED_INPUT_TYPE",
       sqlState = "42K09",
       Map(
         "functionName" -> "`collate`",
@@ -121,7 +185,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     checkError(
       exception = intercept[AnalysisException] {
         sql("select collate('abc', cast(null as string))") },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       sqlState = "42K09",
       Map("exprName" -> "`collation`", "sqlExpr" -> "\"CAST(NULL AS STRING)\""),
       context = ExpectedContext(
@@ -132,7 +196,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   test("collate function invalid input data type") {
     checkError(
       exception = intercept[ExtendedAnalysisException] { sql(s"select collate(1, 'UTF8_BINARY')") },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = "42K09",
       parameters = Map(
         "sqlExpr" -> "\"collate(1, UTF8_BINARY)\"",
@@ -151,7 +215,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   test("invalid collation name throws exception") {
     checkError(
       exception = intercept[SparkException] { sql("select 'aaa' collate UTF8_BS") },
-      errorClass = "COLLATION_INVALID_NAME",
+      condition = "COLLATION_INVALID_NAME",
       sqlState = "42704",
       parameters = Map("collationName" -> "UTF8_BS", "proposals" -> "UTF8_LCASE"))
   }
@@ -162,9 +226,14 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       withTable(tableName) {
         sql(
           s"""
-             |CREATE TABLE $tableName
-             |(id INT, c1 STRING COLLATE UNICODE, c2 string)
-             |USING parquet
+             |CREATE TABLE $tableName (
+             |  id INT,
+             |  c1 STRING COLLATE UNICODE,
+             |  c2 STRING,
+             |  struct_col STRUCT<col1: STRING COLLATE UNICODE, col2: STRING>,
+             |  array_col ARRAY<STRING COLLATE UNICODE>,
+             |  map_col MAP<STRING COLLATE UNICODE, STRING>
+             |) USING parquet
              |CLUSTERED BY (${bucketColumns.mkString(",")})
              |INTO 4 BUCKETS""".stripMargin
         )
@@ -175,29 +244,67 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     createTable("c2")
     createTable("id", "c2")
 
-    Seq(Seq("c1"), Seq("c1", "id"), Seq("c1", "c2")).foreach { bucketColumns =>
+    val failBucketingColumns = Seq(
+      Seq("c1"), Seq("c1", "id"), Seq("c1", "c2"),
+      Seq("struct_col"), Seq("array_col"), Seq("map_col")
+    )
+
+    failBucketingColumns.foreach { bucketColumns =>
       checkError(
         exception = intercept[AnalysisException] {
           createTable(bucketColumns: _*)
         },
-        errorClass = "INVALID_BUCKET_COLUMN_DATA_TYPE",
-        parameters = Map("type" -> "\"STRING COLLATE UNICODE\"")
-      );
+        condition = "INVALID_BUCKET_COLUMN_DATA_TYPE",
+        parameters = Map("type" -> ".*STRING COLLATE UNICODE.*"),
+        matchPVals = true
+      )
     }
   }
 
+  test("check difference betweeen SR_AI and SR_Latn_AI collations") {
+    // scalastyle:off nonascii
+    Seq(
+      ("c", "ć"),
+      ("c", "č"),
+      ("ć", "č"),
+      ("C", "Ć"),
+      ("C", "Č"),
+      ("Ć", "Č"),
+      ("s", "š"),
+      ("S", "Š"),
+      ("z", "ž"),
+      ("Z", "Ž")
+    ).foreach {
+      case (c1, c2) =>
+        // SR_Latn_AI
+        checkAnswer(sql(s"SELECT '$c1' = '$c2' COLLATE SR_Latn_AI"), Row(false))
+        // SR_AI
+        checkAnswer(sql(s"SELECT '$c1' = '$c2' COLLATE SR_AI"), Row(true))
+    }
+    // scalastyle:on nonascii
+  }
+
   test("equality check respects collation") {
     Seq(
       ("utf8_binary", "aaa", "AAA", false),
       ("utf8_binary", "aaa", "aaa", true),
+      ("utf8_binary_rtrim", "aaa", "AAA", false),
+      ("utf8_binary_rtrim", "aaa", "aaa  ", true),
       ("utf8_lcase", "aaa", "aaa", true),
       ("utf8_lcase", "aaa", "AAA", true),
       ("utf8_lcase", "aaa", "bbb", false),
+      ("utf8_lcase_rtrim", "aaa", "AAA  ", true),
+      ("utf8_lcase_rtrim", "aaa", "bbb", false),
       ("unicode", "aaa", "aaa", true),
       ("unicode", "aaa", "AAA", false),
+      ("unicode_rtrim", "aaa  ", "aaa ", true),
+      ("unicode_rtrim", "aaa", "AAA", false),
       ("unicode_CI", "aaa", "aaa", true),
       ("unicode_CI", "aaa", "AAA", true),
-      ("unicode_CI", "aaa", "bbb", false)
+      ("unicode_CI", "aaa", "bbb", false),
+      ("unicode_CI_rtrim", "aaa", "aaa", true),
+      ("unicode_CI_rtrim", "aaa ", "AAA  ", true),
+      ("unicode_CI_rtrim", "aaa", "bbb", false)
     ).foreach {
       case (collationName, left, right, expected) =>
         checkAnswer(
@@ -214,15 +321,19 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       ("utf8_binary", "AAA", "aaa", true),
       ("utf8_binary", "aaa", "aaa", false),
       ("utf8_binary", "aaa", "BBB", false),
+      ("utf8_binary_rtrim", "aaa ", "aaa  ", false),
       ("utf8_lcase", "aaa", "aaa", false),
       ("utf8_lcase", "AAA", "aaa", false),
       ("utf8_lcase", "aaa", "bbb", true),
+      ("utf8_lcase_rtrim", "AAA  ", "aaa", false),
       ("unicode", "aaa", "aaa", false),
       ("unicode", "aaa", "AAA", true),
       ("unicode", "aaa", "BBB", true),
+      ("unicode_rtrim", "aaa ", "aaa", false),
       ("unicode_CI", "aaa", "aaa", false),
       ("unicode_CI", "aaa", "AAA", false),
-      ("unicode_CI", "aaa", "bbb", true)
+      ("unicode_CI", "aaa", "bbb", true),
+      ("unicode_CI_rtrim", "aaa ", "aaa", false)
     ).foreach {
       case (collationName, left, right, expected) =>
         checkAnswer(
@@ -246,11 +357,11 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         spark.sql(s"SELECT contains(collate('$left', '$leftCollationName')," +
           s"collate('$right', '$rightCollationName'))")
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
+      condition = "COLLATION_MISMATCH.EXPLICIT",
       sqlState = "42P21",
       parameters = Map(
         "explicitTypes" ->
-          s"`string collate $leftCollationName`, `string collate $rightCollationName`"
+          s""""STRING COLLATE $leftCollationName", "STRING COLLATE $rightCollationName""""
       )
     )
     // startsWith
@@ -260,11 +371,11 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         spark.sql(s"SELECT startsWith(collate('$left', '$leftCollationName')," +
           s"collate('$right', '$rightCollationName'))")
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
+      condition = "COLLATION_MISMATCH.EXPLICIT",
       sqlState = "42P21",
       parameters = Map(
         "explicitTypes" ->
-          s"`string collate $leftCollationName`, `string collate $rightCollationName`"
+          s""""STRING COLLATE $leftCollationName", "STRING COLLATE $rightCollationName""""
       )
     )
     // endsWith
@@ -274,29 +385,33 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         spark.sql(s"SELECT endsWith(collate('$left', '$leftCollationName')," +
           s"collate('$right', '$rightCollationName'))")
       },
-      errorClass = "COLLATION_MISMATCH.EXPLICIT",
+      condition = "COLLATION_MISMATCH.EXPLICIT",
       sqlState = "42P21",
       parameters = Map(
         "explicitTypes" ->
-          s"`string collate $leftCollationName`, `string collate $rightCollationName`"
+          s""""STRING COLLATE $leftCollationName", "STRING COLLATE $rightCollationName""""
       )
     )
   }
 
   test("aggregates count respects collation") {
     Seq(
+      ("utf8_binary_rtrim", Seq("aaa", "aaa "), Seq(Row(2, "aaa"))),
       ("utf8_binary", Seq("AAA", "aaa"), Seq(Row(1, "AAA"), Row(1, "aaa"))),
       ("utf8_binary", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("utf8_binary", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
       ("utf8_lcase", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("utf8_lcase", Seq("AAA", "aaa"), Seq(Row(2, "AAA"))),
       ("utf8_lcase", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
+      ("utf8_lcase_rtrim", Seq("aaa", "AAA  "), Seq(Row(2, "aaa"))),
       ("unicode", Seq("AAA", "aaa"), Seq(Row(1, "AAA"), Row(1, "aaa"))),
       ("unicode", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("unicode", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
+      ("unicode_rtrim", Seq("aaa", "aaa "), Seq(Row(2, "aaa"))),
       ("unicode_CI", Seq("aaa", "aaa"), Seq(Row(2, "aaa"))),
       ("unicode_CI", Seq("AAA", "aaa"), Seq(Row(2, "AAA"))),
-      ("unicode_CI", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb")))
+      ("unicode_CI", Seq("aaa", "bbb"), Seq(Row(1, "aaa"), Row(1, "bbb"))),
+      ("unicode_CI_rtrim", Seq("aaa", "AAA "), Seq(Row(2, "aaa")))
     ).foreach {
       case (collationName: String, input: Seq[String], expected: Seq[Row]) =>
         checkAnswer(sql(
@@ -425,6 +540,68 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("SPARK-48413: Alter column with collation") {
+    val tableName = "testcat.alter_column_tbl"
+    withTable(tableName) {
+      spark.sql(
+        s"""CREATE TABLE $tableName (
+           |c1 STRING,
+           |c2 ARRAY<STRING>,
+           |c3 MAP<INT, STRING>,
+           |c4 STRUCT<t: STRING>)
+           |USING PARQUET
+           |""".stripMargin)
+      sql(s"INSERT INTO $tableName VALUES ('a', array('b'), map(1, 'c'), struct('d'))")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c1 TYPE STRING COLLATE UTF8_LCASE")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c2.element TYPE STRING COLLATE UNICODE_CI")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c3.value TYPE STRING COLLATE UTF8_BINARY")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c4.t TYPE STRING COLLATE UNICODE")
+      checkAnswer(sql(s"SELECT collation(c1), collation(c2[0]), " +
+        s"collation(c3[1]), collation(c4.t) FROM $tableName"),
+        Seq(Row("UTF8_LCASE", "UNICODE_CI", "UTF8_BINARY", "UNICODE")))
+    }
+  }
+
+  test("SPARK-50262: Alter column with collation preserve metadata") {
+    def createMetadata(column: String): Metadata =
+      new MetadataBuilder().putString("key", column).build()
+
+    val tableName = "testcat.alter_column_tbl"
+    withTable(tableName) {
+      val df = spark.createDataFrame(
+        java.util.List.of[Row](),
+        StructType(Seq(
+          StructField("c1", StringType, metadata = createMetadata("c1")),
+          StructField("c2", ArrayType(StringType), metadata = createMetadata("c2")),
+          StructField("c3", MapType(IntegerType, StringType), metadata = createMetadata("c3")),
+          StructField("c4",
+            StructType(Seq(StructField("t", StringType, metadata = createMetadata("c4t")))),
+            metadata = createMetadata("c4"))
+        ))
+      )
+      df.write.format("parquet").saveAsTable(tableName)
+
+      sql(s"INSERT INTO $tableName VALUES ('a', array('b'), map(1, 'c'), struct('d'))")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c1 TYPE STRING COLLATE UTF8_LCASE")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c2.element TYPE STRING COLLATE UNICODE_CI")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c3.value TYPE STRING COLLATE UTF8_BINARY")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN c4.t TYPE STRING COLLATE UNICODE")
+      val testCatalog = catalog("testcat").asTableCatalog
+      val tableSchema = testCatalog.loadTable(Identifier.of(Array(), "alter_column_tbl")).schema()
+      val c1Metadata = tableSchema.find(_.name == "c1").get.metadata
+      assert(c1Metadata === createMetadata("c1"))
+      val c2Metadata = tableSchema.find(_.name == "c2").get.metadata
+      assert(c2Metadata === createMetadata("c2"))
+      val c3Metadata = tableSchema.find(_.name == "c3").get.metadata
+      assert(c3Metadata === createMetadata("c3"))
+      val c4Metadata = tableSchema.find(_.name == "c4").get.metadata
+      assert(c4Metadata === createMetadata("c4"))
+      val c4tMetadata = tableSchema.find(_.name == "c4").get.dataType
+        .asInstanceOf[StructType].find(_.name == "t").get.metadata
+      assert(c4tMetadata === createMetadata("c4t"))
+    }
+  }
+
   test("SPARK-47210: Implicit casting of collated strings") {
     val tableName = "parquet_dummy_implicit_cast_t22"
     withTable(tableName) {
@@ -447,15 +624,11 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       checkAnswer(sql(s"SELECT c1 FROM $tableName WHERE c1 = COLLATE('a', 'UTF8_BINARY')"),
         Seq(Row("a")))
 
-      // fail with implicit mismatch, as function return should be considered implicit
-      checkError(
-        exception = intercept[AnalysisException] {
-          sql(s"SELECT c1 FROM $tableName " +
-            s"WHERE c1 = SUBSTR(COLLATE('a', 'UNICODE'), 0)")
-        },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT",
-        parameters = Map.empty
-      )
+      // explicit collation propagates up
+      checkAnswer(
+        sql(s"SELECT c1 FROM $tableName " +
+          s"WHERE c1 = SUBSTR(COLLATE('a', 'UNICODE'), 0)"),
+        Row("a"))
 
       // in operator
       checkAnswer(sql(s"SELECT c1 FROM $tableName WHERE c1 IN ('a')"),
@@ -477,7 +650,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"SELECT c1 || c2 FROM $tableName")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.IMPLICIT",
+        parameters = Map(
+          "implicitTypes" -> """"STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE""""
+        )
       )
 
 
@@ -492,7 +668,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"SELECT c1 FROM $tableName WHERE c1 = c3")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.IMPLICIT",
+        parameters = Map(
+          "implicitTypes" -> """"STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI""""
+        )
       )
 
       // different explicit collations are set
@@ -504,9 +683,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
                |WHERE COLLATE('a', 'UTF8_BINARY') = COLLATE('a', 'UNICODE')"""
               .stripMargin)
         },
-        errorClass = "COLLATION_MISMATCH.EXPLICIT",
+        condition = "COLLATION_MISMATCH.EXPLICIT",
         parameters = Map(
-          "explicitTypes" -> "`string`, `string collate UNICODE`"
+          "explicitTypes" -> """"STRING", "STRING COLLATE UNICODE""""
         )
       )
 
@@ -516,9 +695,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
           sql(s"SELECT c1 FROM $tableName WHERE c1 IN " +
             "(COLLATE('a', 'UTF8_BINARY'), COLLATE('b', 'UNICODE'))")
         },
-        errorClass = "COLLATION_MISMATCH.EXPLICIT",
+        condition = "COLLATION_MISMATCH.EXPLICIT",
         parameters = Map(
-          "explicitTypes" -> "`string`, `string collate UNICODE`"
+          "explicitTypes" -> """"STRING", "STRING COLLATE UNICODE""""
         )
       )
       checkError(
@@ -526,9 +705,9 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
           sql(s"SELECT c1 FROM $tableName WHERE COLLATE(c1, 'UNICODE') IN " +
             "(COLLATE('a', 'UTF8_BINARY'))")
         },
-        errorClass = "COLLATION_MISMATCH.EXPLICIT",
+        condition = "COLLATION_MISMATCH.EXPLICIT",
         parameters = Map(
-          "explicitTypes" -> "`string collate UNICODE`, `string`"
+          "explicitTypes" -> """"STRING COLLATE UNICODE", "STRING""""
         )
       )
 
@@ -538,7 +717,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"SELECT c1 FROM $tableName WHERE c1 || c3 = 'aa'")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.IMPLICIT",
+        parameters = Map(
+          "implicitTypes" -> """"STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI""""
+        )
       )
 
       // concat on different implicit collations should succeed,
@@ -547,13 +729,23 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM $tableName ORDER BY c1 || c3")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.IMPLICIT",
+        parameters = Map(
+          "implicitTypes" -> """"STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI""""
+        )
       )
 
       // concat + in
-      checkAnswer(sql(s"SELECT c1 FROM $tableName WHERE c1 || COLLATE('a', 'UTF8_BINARY') IN " +
-        s"(COLLATE('aa', 'UNICODE'))"),
-        Seq(Row("a")))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SELECT c1 FROM $tableName where c1 || COLLATE('a', 'UTF8_BINARY') IN " +
+            s"(COLLATE('aa', 'UNICODE'))")
+        },
+        condition = "COLLATION_MISMATCH.EXPLICIT",
+        parameters = Map(
+          "explicitTypes" -> """"STRING", "STRING COLLATE UNICODE""""
+        )
+      )
 
       // array creation supports implicit casting
       checkAnswer(sql(s"SELECT typeof(array('a' COLLATE UNICODE, 'b')[1])"),
@@ -564,21 +756,70 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM $tableName WHERE contains(c1||c3, 'a')")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.IMPLICIT",
+        parameters = Map(
+          "implicitTypes" -> """"STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI""""
+        )
       )
 
       checkError(
         exception = intercept[AnalysisException] {
           sql(s"SELECT array('A', 'a' COLLATE UNICODE) == array('b' COLLATE UNICODE_CI)")
         },
-        errorClass = "COLLATION_MISMATCH.IMPLICIT"
+        condition = "COLLATION_MISMATCH.EXPLICIT",
+        parameters = Map(
+          "explicitTypes" -> """"STRING COLLATE UNICODE", "STRING COLLATE UNICODE_CI""""
+        )
       )
 
-      checkAnswer(sql("SELECT array_join(array('a', 'b' collate UNICODE), 'c' collate UNICODE_CI)"),
-        Seq(Row("acb")))
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SELECT array_join(array('a', 'b' collate UNICODE), 'c' collate UNICODE_CI)")
+        },
+        condition = "COLLATION_MISMATCH.EXPLICIT",
+        parameters = Map(
+          "explicitTypes" -> """"STRING COLLATE UNICODE", "STRING COLLATE UNICODE_CI""""
+        )
+      )
     }
   }
 
+  test("SPARK-49138: ArrayAppend and CreateMap coercion testing") {
+    val df_array_append = sql(s"SELECT array_append(array('a', 'b'), 'c' COLLATE UNICODE)")
+    // array_append expression
+    checkAnswer(df_array_append, Seq(Row(Seq("a", "b", "c"))))
+    assert(df_array_append.schema.head.dataType == ArrayType(StringType("UNICODE"), true))
+
+    // make sure we fail this query even when collations are in
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c')")
+      },
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map("functionName" -> "`map`", "expectedNum" -> "2n (n > 0)",
+        "actualNum" -> "3", "docroot" -> "https://spark.apache.org/docs/latest")
+    )
+
+    // make sure we fail this query even when collations are in
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c' COLLATE UNICODE, 'c')")
+      },
+      condition = "COLLATION_MISMATCH.EXPLICIT",
+      sqlState = "42P21",
+      parameters = Map(
+        "explicitTypes" ->
+          s""""STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE""""
+      )
+    )
+
+    // map creation keys respects proper collation
+    val df_create_map = sql("select map('a' COLLATE UTF8_LCASE, 'b', 'c', 'c')")
+    checkAnswer(df_create_map, Seq(Row(Map("a" -> "b", "c" -> "c"))))
+    assert(df_create_map.schema.head.dataType ==
+      MapType(StringType("UTF8_LCASE"), StringType("UTF8_BINARY"), false))
+  }
+
   test("SPARK-47692: Parameter marker with EXECUTE IMMEDIATE implicit casting") {
     sql(s"DECLARE stmtStr1 = 'SELECT collation(:var1 || :var2)';")
     sql(s"DECLARE stmtStr2 = 'SELECT collation(:var1 || (\\\'a\\\' COLLATE UNICODE))';")
@@ -618,26 +859,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("SPARK-47692: Parameter markers with variable mapping") {
-    checkAnswer(
-      spark.sql(
-        "SELECT collation(:var1 || :var2)",
-        Map("var1" -> Literal.create('a', StringType("UTF8_BINARY")),
-            "var2" -> Literal.create('b', StringType("UNICODE")))),
-      Seq(Row("UTF8_BINARY"))
-    )
-
-    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-      checkAnswer(
-        spark.sql(
-          "SELECT collation(:var1 || :var2)",
-          Map("var1" -> Literal.create('a', StringType("UTF8_BINARY")),
-              "var2" -> Literal.create('b', StringType("UNICODE")))),
-        Seq(Row("UNICODE"))
-      )
-    }
-  }
-
   test("SPARK-47210: Cast of default collated strings in IN expression") {
     val tableName = "t1"
     withTable(tableName) {
@@ -684,7 +905,11 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
             exception = intercept[AnalysisException] {
               sql(s"CREATE TABLE $newTableName AS SELECT c1 || c2 FROM $tableName")
             },
-            errorClass = "COLLATION_MISMATCH.IMPLICIT")
+            condition = "COLLATION_MISMATCH.IMPLICIT",
+            parameters = Map(
+              "implicitTypes" -> """"STRING COLLATE UNICODE", "STRING COLLATE UTF8_LCASE""""
+            )
+          )
         }
       }
     }
@@ -745,7 +970,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         exception = intercept[AnalysisException] {
           createTable(partitionColumns: _*)
         },
-        errorClass = "INVALID_PARTITION_COLUMN_DATA_TYPE",
+        condition = "INVALID_PARTITION_COLUMN_DATA_TYPE",
         parameters = Map("type" -> "\"STRING COLLATE UNICODE\"")
       );
     }
@@ -782,7 +1007,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
              |USING $v2Source
              |""".stripMargin)
       },
-      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      condition = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
       parameters = Map(
         "fieldName" -> "c2",
         "expressionStr" -> "SUBSTRING(c1, 0, 1)",
@@ -800,7 +1025,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
              |USING $v2Source
              |""".stripMargin)
       },
-      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      condition = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
       parameters = Map(
         "fieldName" -> "c2",
         "expressionStr" -> "LOWER(c1)",
@@ -818,7 +1043,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
              |USING $v2Source
              |""".stripMargin)
       },
-      errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+      condition = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
       parameters = Map(
         "fieldName" -> "c2",
         "expressionStr" -> "UCASE(struct1.a)",
@@ -836,7 +1061,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     checkError(
       exception = intercept[ParseException]
         (sql("SELECT cast(1 as string collate unicode)")),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map(
         "typeName" -> toSQLType(StringType("UNICODE"))),
       context =
@@ -846,7 +1071,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     checkError(
       exception = intercept[ParseException]
         (sql("SELECT 'A' :: string collate unicode")),
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map(
         "typeName" -> toSQLType(StringType("UNICODE"))),
       context = ExpectedContext(fragment = s"'A' :: string collate unicode", start = 7, stop = 35)
@@ -859,7 +1084,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       checkError(
         exception = intercept[ParseException]
           (sql("SELECT cast(1 as string collate unicode)")),
-        errorClass = "UNSUPPORTED_DATATYPE",
+        condition = "UNSUPPORTED_DATATYPE",
         parameters = Map(
           "typeName" -> toSQLType(StringType("UNICODE"))),
         context =
@@ -871,29 +1096,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("SPARK-47431: Default collation set to UNICODE, column type test") {
-    withTable("t") {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-        sql(s"CREATE TABLE t(c1 STRING) USING PARQUET")
-        sql(s"INSERT INTO t VALUES ('a')")
-        checkAnswer(sql(s"SELECT collation(c1) FROM t"), Seq(Row("UNICODE")))
-      }
-    }
-  }
-
-  test("SPARK-47431: Create table with UTF8_BINARY, make sure collation persists on read") {
-    withTable("t") {
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UTF8_BINARY") {
-        sql("CREATE TABLE t(c1 STRING) USING PARQUET")
-        sql("INSERT INTO t VALUES ('a')")
-        checkAnswer(sql("SELECT collation(c1) FROM t"), Seq(Row("UTF8_BINARY")))
-      }
-      withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE") {
-        checkAnswer(sql("SELECT collation(c1) FROM t"), Seq(Row("UTF8_BINARY")))
-      }
-    }
-  }
-
   test("Create dataframe with non utf8 binary collation") {
     val schema = StructType(Seq(StructField("Name", StringType("UNICODE_CI"))))
     val data = Seq(Row("Alice"), Row("Bob"), Row("bob"))
@@ -911,7 +1113,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     withTable(table) {
       sql(s"create table $table (a array<string collate utf8_lcase>) using parquet")
       sql(s"insert into $table values (array('aaa')), (array('AAA'))")
-      checkAnswer(sql(s"select distinct a from $table"), Seq(Row(Seq("aaa"))))
+      val result = sql(s"select distinct a from $table").collect()
+      assert(result.length === 1)
+      val data = result.head.getSeq[String](0)
+      assert(data === Array("aaa") || data === Array("AAA"))
     }
     // map doesn't support aggregation
     withTable(table) {
@@ -919,7 +1124,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       val query = s"select distinct m from $table"
       checkError(
         exception = intercept[ExtendedAnalysisException](sql(query)),
-        errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
         parameters = Map(
           "colName" -> "`m`",
           "dataType" -> toSQLType(MapType(
@@ -932,7 +1137,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     withTable(table) {
       sql(s"create table $table (s struct<fld:string collate utf8_lcase>) using parquet")
       sql(s"insert into $table values (named_struct('fld', 'aaa')), (named_struct('fld', 'AAA'))")
-      checkAnswer(sql(s"select s.fld from $table group by s"), Seq(Row("aaa")))
+      val result = sql(s"select s.fld from $table group by s").collect()
+      assert(result.length === 1)
+      val data = result.head.getString(0)
+      assert(data === "aaa" || data === "AAA")
     }
   }
 
@@ -961,7 +1169,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       val ctx = s"$tableLeft.m = $tableRight.m"
       checkError(
         exception = intercept[AnalysisException](sql(query)),
-        errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+        condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
         parameters = Map(
           "functionName" -> "`=`",
           "dataType" -> toSQLType(MapType(
@@ -989,6 +1197,330 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
+  test("Check order by on table with collated string column") {
+    val tableName = "t"
+    Seq(
+      // (collationName, data, expResult)
+      (
+        "", // non-collated
+        Seq((5, "bbb"), (3, "a"), (1, "A"), (4, "aaaa"), (6, "cc"), (2, "BbB")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UTF8_BINARY",
+        Seq((5, "bbb"), (3, "a"), (1, "A"), (4, "aaaa"), (6, "cc"), (2, "BbB")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UTF8_LCASE",
+        Seq((2, "bbb"), (1, "a"), (1, "A"), (1, "aaaa"), (3, "cc"), (2, "BbB")),
+        Seq(1, 1, 1, 2, 2, 3)
+      ),
+      (
+        "UNICODE",
+        Seq((4, "bbb"), (1, "a"), (2, "A"), (3, "aaaa"), (6, "cc"), (5, "BbB")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UNICODE_CI",
+        Seq((2, "bbb"), (1, "a"), (1, "A"), (1, "aaaa"), (3, "cc"), (2, "BbB")),
+        Seq(1, 1, 1, 2, 2, 3)
+      )
+    ).foreach {
+      case (collationName, data, expResult) =>
+        val collationSetup = if (collationName.isEmpty) "" else "collate " + collationName
+        withTable(tableName) {
+          sql(s"create table $tableName (c1 integer, c2 string $collationSetup)")
+          data.foreach {
+            case (c1, c2) =>
+              sql(s"insert into $tableName values ($c1, '$c2')")
+          }
+          checkAnswer(sql(s"select c1 from $tableName order by c2"), expResult.map(Row(_)))
+        }
+    }
+  }
+
+  test("Check order by on StructType") {
+    Seq(
+      // (collationName, data, expResult)
+      (
+        "", // non-collated
+        Seq((5, "b", "A"), (3, "aa", "A"), (6, "b", "B"), (2, "A", "c"), (1, "A", "D"),
+          (4, "aa", "B")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UTF8_BINARY",
+        Seq((5, "b", "A"), (3, "aa", "A"), (6, "b", "B"), (2, "A", "c"), (1, "A", "D"),
+          (4, "aa", "B")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UTF8_LCASE",
+        Seq((3, "A", "C"), (2, "A", "b"), (2, "a", "b"), (4, "B", "c"), (1, "a", "a"),
+          (5, "b", "d")),
+        Seq(1, 2, 2, 3, 4, 5)
+      ),
+      (
+        "UNICODE",
+        Seq((4, "A", "C"), (3, "A", "b"), (2, "a", "b"), (5, "b", "c"), (1, "a", "a"),
+          (6, "b", "d")),
+        Seq(1, 2, 3, 4, 5, 6)
+      ),
+      (
+        "UNICODE_CI",
+        Seq((3, "A", "C"), (2, "A", "b"), (2, "a", "b"), (4, "B", "c"), (1, "a", "a"),
+          (5, "b", "d")),
+        Seq(1, 2, 2, 3, 4, 5)
+      )
+    ).foreach {
+      case (collationName, data, expResult) =>
+        val collationSetup = if (collationName.isEmpty) "" else "collate " + collationName
+        val tableName = "t"
+        withTable(tableName) {
+          sql(s"create table $tableName (c1 integer, c2 struct<" +
+            s"s1: string $collationSetup," +
+            s"s2: string $collationSetup>)")
+          data.foreach {
+            case (c1, s1, s2) =>
+              sql(s"insert into $tableName values ($c1, struct('$s1', '$s2'))")
+          }
+          checkAnswer(sql(s"select c1 from $tableName order by c2"), expResult.map(Row(_)))
+        }
+    }
+  }
+
+  test("Check order by on StructType with few collated fields") {
+    val data = Seq(
+      (2, "b", "a", "a", "a", "a"),
+      (4, "b", "b", "B", "a", "a"),
+      (1, "a", "a", "a", "a", "a"),
+      (6, "b", "b", "b", "B", "B"),
+      (3, "b", "b", "a", "a", "a"),
+      (5, "b", "b", "b", "B", "a"))
+    val tableName = "t"
+    withTable(tableName) {
+      sql(s"create table $tableName (c1 integer, c2 struct<" +
+        s"s1: string, " +
+        s"s2: string collate UTF8_BINARY, " +
+        s"s3: string collate UTF8_LCASE, " +
+        s"s4: string collate UNICODE, " +
+        s"s5: string collate UNICODE_CI>)")
+      data.foreach {
+        case (order, s1, s2, s3, s4, s5) =>
+          sql(s"insert into $tableName values ($order, struct('$s1', '$s2', '$s3', '$s4', '$s5'))")
+      }
+      val expResult = Seq(1, 2, 3, 4, 5, 6)
+      checkAnswer(sql(s"select c1 from $tableName order by c2"), expResult.map(Row(_)))
+    }
+  }
+
+  test("Check order by on ArrayType with collated strings") {
+    Seq(
+      // (collationName, order, data)
+      (
+        "",
+        Seq((3, Seq("b", "Aa", "c")), (2, Seq("A", "b")), (1, Seq("A")), (2, Seq("A", "b"))),
+        Seq(1, 2, 2, 3)
+      ),
+      (
+        "UTF8_BINARY",
+        Seq((3, Seq("b", "Aa", "c")), (2, Seq("A", "b")), (1, Seq("A")), (2, Seq("A", "b"))),
+        Seq(1, 2, 2, 3)
+      ),
+      (
+        "UTF8_LCASE",
+        Seq((4, Seq("B", "a")), (4, Seq("b", "A")), (2, Seq("aa")), (1, Seq("A")),
+          (5, Seq("b", "e")), (3, Seq("b"))),
+        Seq(1, 2, 3, 4, 4, 5)
+      ),
+      (
+        "UNICODE",
+        Seq((5, Seq("b", "C")), (4, Seq("b", "AA")), (1, Seq("a")), (4, Seq("b", "AA")),
+          (3, Seq("b")), (2, Seq("A", "a"))),
+        Seq(1, 2, 3, 4, 4, 5)
+      ),
+      (
+        "UNICODE_CI",
+        Seq((4, Seq("B", "a")), (4, Seq("b", "A")), (2, Seq("aa")), (1, Seq("A")),
+          (5, Seq("b", "e")), (3, Seq("b"))),
+        Seq(1, 2, 3, 4, 4, 5)
+      )
+    ).foreach {
+      case (collationName, dataWithOrder, expResult) =>
+        val collationSetup = if (collationName.isEmpty) "" else "collate " + collationName
+        val tableName1 = "t1"
+        val tableName2 = "t2"
+        withTable(tableName1, tableName2) {
+          sql(s"create table $tableName1 (c1 integer, c2 array<string $collationSetup>)")
+          sql(s"create table $tableName2 (c1 integer," +
+            s" c2 struct<f1: array<string $collationSetup>>)")
+          dataWithOrder.foreach {
+            case (order, data) =>
+              val arrayData = data.map(d => s"'$d'").mkString(", ")
+              sql(s"insert into $tableName1 values ($order, array($arrayData))")
+              sql(s"insert into $tableName2 values ($order, struct(array($arrayData)))")
+          }
+          checkAnswer(sql(s"select c1 from $tableName1 order by c2"), expResult.map(Row(_)))
+          checkAnswer(sql(s"select c1 from $tableName2 order by c2"), expResult.map(Row(_)))
+        }
+    }
+  }
+
+  test("Check order by on StructType with different types containing collated strings") {
+    val data = Seq(
+      (5, ("b", Seq(("b", "B", "a"), ("a", "a", "a")), "a")),
+      (2, ("b", Seq(("a", "a", "a")), "a")),
+      (2, ("b", Seq(("a", "a", "a")), "a")),
+      (4, ("b", Seq(("b", "a", "a")), "a")),
+      (3, ("b", Seq(("a", "a", "a"), ("a", "a", "a")), "a")),
+      (5, ("b", Seq(("b", "B", "a")), "a")),
+      (4, ("b", Seq(("b", "a", "a")), "a")),
+      (6, ("b", Seq(("b", "b", "B")), "A")),
+      (5, ("b", Seq(("b", "b", "a")), "a")),
+      (1, ("a", Seq(("a", "a", "a")), "a")),
+      (7, ("b", Seq(("b", "b", "B")), "b")),
+      (6, ("b", Seq(("b", "b", "B")), "a")),
+      (5, ("b", Seq(("b", "b", "a")), "a"))
+    )
+    val tableName = "t"
+    withTable(tableName) {
+      sql(s"create table $tableName " +
+        s"(c1 integer," +
+        s"c2 string," +
+        s"c3 array<struct<f1: string collate UTF8_BINARY," +
+        s"f2 string collate UTF8_LCASE," +
+        s"f3 string collate UNICODE>>," +
+        s"c4 string collate UNICODE_CI)")
+      data.foreach {
+        case (c1, (c2, c3, c4)) =>
+          val c3String = c3.map { case (f1, f2, f3) => s"struct('$f1', '$f2', '$f3')"}
+            .mkString(", ")
+          sql(s"insert into $tableName values ($c1, '$c2', array($c3String), '$c4')")
+      }
+      val expResult = Seq(1, 2, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 7)
+      checkAnswer(sql(s"select c1 from $tableName order by c2, c3, c4"), expResult.map(Row(_)))
+    }
+  }
+
+  for (collation <- Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI",
+    "UNICODE_CI_RTRIM", "")) {
+    for (codeGen <- Seq("NO_CODEGEN", "CODEGEN_ONLY")) {
+      val collationSetup = if (collation.isEmpty) "" else " COLLATE " + collation
+      val supportsBinaryEquality = collation.isEmpty || collation == "UNICODE" ||
+        CollationFactory.fetchCollation(collation).supportsBinaryEquality
+
+      test(s"Group by on map containing$collationSetup strings ($codeGen)") {
+        val tableName = "t"
+
+        withTable(tableName) {
+          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+            sql(s"create table $tableName" +
+              s" (m map<string$collationSetup, string$collationSetup>)")
+            sql(s"insert into $tableName values (map('aaa', 'AAA'))")
+            sql(s"insert into $tableName values (map('AAA', 'aaa'))")
+            sql(s"insert into $tableName values (map('aaa', 'AAA'))")
+            sql(s"insert into $tableName values (map('bbb', 'BBB'))")
+            sql(s"insert into $tableName values (map('aAA', 'AaA'))")
+            sql(s"insert into $tableName values (map('BBb', 'bBB'))")
+            sql(s"insert into $tableName values (map('aaaa', 'AAA'))")
+
+            val df = sql(s"select count(*) from $tableName group by m")
+            if (supportsBinaryEquality) {
+              checkAnswer(df, Seq(Row(2), Row(1), Row(1), Row(1), Row(1), Row(1)))
+            } else {
+              checkAnswer(df, Seq(Row(4), Row(2), Row(1)))
+            }
+          }
+        }
+      }
+
+      test(s"Group by on map containing structs with $collationSetup strings ($codeGen)") {
+        val tableName = "t"
+
+        withTable(tableName) {
+          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+            sql(s"create table $tableName" +
+              s" (m map<struct<fld1: string$collationSetup, fld2: string$collationSetup>, " +
+              s"struct<fld1: string$collationSetup, fld2: string$collationSetup>>)")
+            sql(s"insert into $tableName values " +
+              s"(map(struct('aaa', 'bbb'), struct('ccc', 'ddd')))")
+            sql(s"insert into $tableName values " +
+              s"(map(struct('Aaa', 'BBB'), struct('cCC', 'dDd')))")
+            sql(s"insert into $tableName values " +
+              s"(map(struct('AAA', 'BBb'), struct('cCc', 'DDD')))")
+            sql(s"insert into $tableName values " +
+              s"(map(struct('aaa', 'bbB'), struct('CCC', 'DDD')))")
+
+            val df = sql(s"select count(*) from $tableName group by m")
+            if (supportsBinaryEquality) {
+              checkAnswer(df, Seq(Row(1), Row(1), Row(1), Row(1)))
+            } else {
+              checkAnswer(df, Seq(Row(4)))
+            }
+          }
+        }
+      }
+
+      test(s"Group by on map containing arrays with$collationSetup strings ($codeGen)") {
+        val tableName = "t"
+
+        withTable(tableName) {
+          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+            sql(s"create table $tableName " +
+              s"(m map<array<string$collationSetup>, array<string$collationSetup>>)")
+            sql(s"insert into $tableName values (map(array('aaa', 'bbb'), array('ccc', 'ddd')))")
+            sql(s"insert into $tableName values (map(array('AAA', 'BbB'), array('Ccc', 'ddD')))")
+            sql(s"insert into $tableName values (map(array('AAA', 'BbB', 'Ccc'), array('ddD')))")
+            sql(s"insert into $tableName values (map(array('aAa', 'Bbb'), array('CCC', 'DDD')))")
+            sql(s"insert into $tableName values (map(array('AAa', 'BBb'), array('cCC', 'DDd')))")
+            sql(s"insert into $tableName values (map(array('AAA', 'BBB', 'CCC'), array('DDD')))")
+
+            val df = sql(s"select count(*) from $tableName group by m")
+            if (supportsBinaryEquality) {
+              checkAnswer(df, Seq(Row(1), Row(1), Row(1), Row(1), Row(1), Row(1)))
+            } else {
+              checkAnswer(df, Seq(Row(4), Row(2)))
+            }
+          }
+        }
+      }
+
+      test(s"Check that order by on map with$collationSetup strings fails ($codeGen)") {
+        val tableName = "t"
+        withTable(tableName) {
+          withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codeGen) {
+            sql(s"create table $tableName" +
+              s" (m map<string$collationSetup, string$collationSetup>, " +
+              s"  c integer)")
+            sql(s"insert into $tableName values (map('aaa', 'AAA'), 1)")
+            sql(s"insert into $tableName values (map('BBb', 'bBB'), 2)")
+
+            // `collationSetupError` is created because "COLLATE UTF8_BINARY" is omitted in data
+            // type in checkError
+            val collationSetupError = if (collation != "UTF8_BINARY") collationSetup else ""
+            val query = s"select c from $tableName order by m"
+            val ctx = "m"
+            checkError(
+              exception = intercept[AnalysisException](sql(query)),
+              condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+              parameters = Map(
+                "functionName" -> "`sortorder`",
+                "dataType" -> s"\"MAP<STRING$collationSetupError, STRING$collationSetupError>\"",
+                "sqlExpr" -> "\"m ASC NULLS FIRST\""
+              ),
+              context = ExpectedContext(
+                fragment = ctx,
+                start = query.length - ctx.length,
+                stop = query.length - 1
+              )
+            )
+          }
+        }
+      }
+    }
+  }
+
   test("Support operations on complex types containing collated strings") {
     checkAnswer(sql("select reverse('abc' collate utf8_lcase)"), Seq(Row("cba")))
     checkAnswer(sql(
@@ -1020,25 +1552,10 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         |select map('a' collate utf8_lcase, 1, 'b' collate utf8_lcase, 2)
         |['A' collate utf8_lcase]
         |""".stripMargin), Seq(Row(1)))
-    val ctx = "map('aaa' collate utf8_lcase, 1, 'AAA' collate utf8_lcase, 2)['AaA']"
-    val query = s"select $ctx"
-    checkError(
-      exception = intercept[AnalysisException](sql(query)),
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
-      parameters = Map(
-        "sqlExpr" -> "\"map(collate(aaa, utf8_lcase), 1, collate(AAA, utf8_lcase), 2)[AaA]\"",
-        "paramIndex" -> "second",
-        "inputSql" -> "\"AaA\"",
-        "inputType" -> toSQLType(StringType),
-        "requiredType" -> toSQLType(StringType(
-          CollationFactory.collationNameToId("UTF8_LCASE")))
-      ),
-      context = ExpectedContext(
-        fragment = ctx,
-        start = query.length - ctx.length,
-        stop = query.length - 1
-      )
-    )
+    checkAnswer(sql(
+      """
+        |select map('a' collate utf8_lcase, 1, 'b' collate utf8_lcase, 2)['A']
+        |""".stripMargin), Seq(Row(1)))
   }
 
   test("window aggregates should respect collation") {
@@ -1061,278 +1578,284 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
     }
   }
 
-  test("hash join should be used for collated strings") {
+  test("hash join should be used for collated strings if sort merge join is not forced") {
     val t1 = "T_1"
     val t2 = "T_2"
 
-    case class HashJoinTestCase[R](collation: String, result: R)
+    case class HashJoinTestCase[R](collation: String, data1: String, data2: String, result: R)
     val testCases = Seq(
-      HashJoinTestCase("UTF8_BINARY", Seq(Row("aa", 1, "aa", 2))),
-      HashJoinTestCase("UTF8_LCASE", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
-      HashJoinTestCase("UNICODE", Seq(Row("aa", 1, "aa", 2))),
-      HashJoinTestCase("UNICODE_CI", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2)))
+      HashJoinTestCase("UTF8_BINARY", "aa", "AA", Seq(Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UTF8_LCASE", "aa", "AA", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UNICODE", "aa", "AA", Seq(Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UNICODE_CI", "aa", "AA", Seq(Row("aa", 1, "AA", 2), Row("aa", 1, "aa", 2))),
+      HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ", Seq(Row("aa", 1, "AA ", 2),
+        Row("aa", 1, "aa", 2)))
     )
-
-    testCases.foreach(t => {
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x STRING COLLATE ${t.collation}, i int) USING PARQUET")
-        sql(s"INSERT INTO $t1 VALUES ('aa', 1)")
-
-        sql(s"CREATE TABLE $t2 (y STRING COLLATE ${t.collation}, j int) USING PARQUET")
-        sql(s"INSERT INTO $t2 VALUES ('AA', 2), ('aa', 2)")
-
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
-        checkAnswer(df, t.result)
-
-        val queryPlan = df.queryExecution.executedPlan
-
-        // confirm that hash join is used instead of sort merge join
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: SortMergeJoinExec => ()
-          }.isEmpty
-        )
-
-        // Only if collation doesn't support binary equality, collation key should be injected.
-        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
-          assert(collectFirst(queryPlan) {
-            case b: HashJoin => b.leftKeys.head
-          }.head.isInstanceOf[CollationKey])
-        } else {
-          assert(!collectFirst(queryPlan) {
-            case b: HashJoin => b.leftKeys.head
-          }.head.isInstanceOf[CollationKey])
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x STRING COLLATE ${t.collation}, i int) USING PARQUET")
+          sql(s"INSERT INTO $t1 VALUES ('${t.data1}', 1)")
+
+          sql(s"CREATE TABLE $t2 (y STRING COLLATE ${t.collation}, j int) USING PARQUET")
+          sql(s"INSERT INTO $t2 VALUES ('${t.data2}', 2), ('${t.data1}', 2)")
+
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+          checkAnswer(df, t.result)
+
+          val queryPlan = df.queryExecution.executedPlan
+
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
+
+          if (isSortMergeForced) {
+            // Confirm proper injection of collation key.
+            checkCollationKeyInQueryPlan(queryPlan, t.collation)
+          }
+          else {
+            // Only if collation doesn't support binary equality, collation key should be injected.
+            if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+              assert(collectFirst(queryPlan) {
+                case b: HashJoin => b.leftKeys.head
+              }.head.isInstanceOf[CollationKey])
+            } else {
+              assert(!collectFirst(queryPlan) {
+                case b: HashJoin => b.leftKeys.head
+              }.head.isInstanceOf[CollationKey])
+            }
+          }
         }
       }
-    })
+    }
   }
 
-  test("hash join should be used for arrays of collated strings") {
+  test("hash join should be used for arrays of collated strings if sort merge join is not forced") {
     val t1 = "T_1"
     val t2 = "T_2"
 
-    case class HashJoinTestCase[R](collation: String, result: R)
+    case class HashJoinTestCase[R](collation: String, data1: String, data2: String, result: R)
     val testCases = Seq(
-      HashJoinTestCase("UTF8_BINARY",
+      HashJoinTestCase("UTF8_BINARY", "aa", "AA",
         Seq(Row(Seq("aa"), 1, Seq("aa"), 2))),
-      HashJoinTestCase("UTF8_LCASE",
+      HashJoinTestCase("UTF8_LCASE", "aa", "AA",
         Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2))),
-      HashJoinTestCase("UNICODE",
+      HashJoinTestCase("UNICODE", "aa", "AA",
         Seq(Row(Seq("aa"), 1, Seq("aa"), 2))),
-      HashJoinTestCase("UNICODE_CI",
-        Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2)))
+      HashJoinTestCase("UNICODE_CI", "aa", "AA",
+        Seq(Row(Seq("aa"), 1, Seq("AA"), 2), Row(Seq("aa"), 1, Seq("aa"), 2))),
+      HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
+        Seq(Row(Seq("aa"), 1, Seq("AA "), 2), Row(Seq("aa"), 1, Seq("aa"), 2)))
     )
 
-    testCases.foreach(t => {
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x ARRAY<STRING COLLATE ${t.collation}>, i int) USING PARQUET")
-        sql(s"INSERT INTO $t1 VALUES (array('aa'), 1)")
-
-        sql(s"CREATE TABLE $t2 (y ARRAY<STRING COLLATE ${t.collation}>, j int) USING PARQUET")
-        sql(s"INSERT INTO $t2 VALUES (array('AA'), 2), (array('aa'), 2)")
-
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
-        checkAnswer(df, t.result)
-
-        val queryPlan = df.queryExecution.executedPlan
-
-        // confirm that hash join is used instead of sort merge join
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: ShuffledJoin => ()
-          }.isEmpty
-        )
-
-        // Only if collation doesn't support binary equality, collation key should be injected.
-        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
-          assert(collectFirst(queryPlan) {
-            case b: BroadcastHashJoinExec => b.leftKeys.head
-          }.head.asInstanceOf[ArrayTransform].function.asInstanceOf[LambdaFunction].
-            function.isInstanceOf[CollationKey])
-        } else {
-          assert(!collectFirst(queryPlan) {
-            case b: BroadcastHashJoinExec => b.leftKeys.head
-          }.head.isInstanceOf[ArrayTransform])
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x ARRAY<STRING COLLATE ${t.collation}>, i int) USING PARQUET")
+          sql(s"INSERT INTO $t1 VALUES (array('${t.data1}'), 1)")
+
+          sql(s"CREATE TABLE $t2 (y ARRAY<STRING COLLATE ${t.collation}>, j int) USING PARQUET")
+          sql(s"INSERT INTO $t2 VALUES (array('${t.data2}'), 2), (array('${t.data1}'), 2)")
+
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+          checkAnswer(df, t.result)
+
+          val queryPlan = df.queryExecution.executedPlan
+
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
+
+          if (isSortMergeForced) {
+            // Confirm proper injection of collation key.
+            checkCollationKeyInQueryPlan(queryPlan, t.collation)
+          }
+          else {
+            // Only if collation doesn't support binary equality, collation key should be injected.
+            if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+              assert(collectFirst(queryPlan) {
+                case b: BroadcastHashJoinExec => b.leftKeys.head
+              }.head.asInstanceOf[ArrayTransform].function.asInstanceOf[LambdaFunction].
+                function.isInstanceOf[CollationKey])
+            } else {
+              assert(!collectFirst(queryPlan) {
+                case b: BroadcastHashJoinExec => b.leftKeys.head
+              }.head.isInstanceOf[ArrayTransform])
+            }
+          }
         }
       }
-    })
+    }
   }
 
-  test("hash join should be used for arrays of arrays of collated strings") {
+  test("hash join should be used for arrays of arrays of collated strings " +
+    "if sort merge join is not forced") {
     val t1 = "T_1"
     val t2 = "T_2"
 
-    case class HashJoinTestCase[R](collation: String, result: R)
+    case class HashJoinTestCase[R](collation: String, data1: String, data2: String, result: R)
     val testCases = Seq(
-      HashJoinTestCase("UTF8_BINARY",
+      HashJoinTestCase("UTF8_BINARY", "aa", "AA",
         Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
-      HashJoinTestCase("UTF8_LCASE",
+      HashJoinTestCase("UTF8_LCASE", "aa", "AA",
         Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
-      HashJoinTestCase("UNICODE",
+      HashJoinTestCase("UNICODE", "aa", "AA",
         Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
-      HashJoinTestCase("UNICODE_CI",
-        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2)))
+      HashJoinTestCase("UNICODE_CI", "aa", "AA",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2))),
+      HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
+        Seq(Row(Seq(Seq("aa")), 1, Seq(Seq("AA ")), 2), Row(Seq(Seq("aa")), 1, Seq(Seq("aa")), 2)))
     )
 
-    testCases.foreach(t => {
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, i int) USING " +
-          s"PARQUET")
-        sql(s"INSERT INTO $t1 VALUES (array(array('aa')), 1)")
-
-        sql(s"CREATE TABLE $t2 (y ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, j int) USING " +
-          s"PARQUET")
-        sql(s"INSERT INTO $t2 VALUES (array(array('AA')), 2), (array(array('aa')), 2)")
-
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
-        checkAnswer(df, t.result)
-
-        val queryPlan = df.queryExecution.executedPlan
-
-        // confirm that hash join is used instead of sort merge join
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: ShuffledJoin => ()
-          }.isEmpty
-        )
-
-        // Only if collation doesn't support binary equality, collation key should be injected.
-        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
-          assert(collectFirst(queryPlan) {
-            case b: BroadcastHashJoinExec => b.leftKeys.head
-          }.head.asInstanceOf[ArrayTransform].function.
-            asInstanceOf[LambdaFunction].function.asInstanceOf[ArrayTransform].function.
-            asInstanceOf[LambdaFunction].function.isInstanceOf[CollationKey])
-        } else {
-          assert(!collectFirst(queryPlan) {
-            case b: BroadcastHashJoinExec => b.leftKeys.head
-          }.head.isInstanceOf[ArrayTransform])
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, i int) USING " +
+            s"PARQUET")
+          sql(s"INSERT INTO $t1 VALUES (array(array('${t.data1}')), 1)")
+
+          sql(s"CREATE TABLE $t2 (y ARRAY<ARRAY<STRING COLLATE ${t.collation}>>, j int) USING " +
+            s"PARQUET")
+          sql(s"INSERT INTO $t2 VALUES (array(array('${t.data2}')), 2)," +
+            s" (array(array('${t.data1}')), 2)")
+
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+          checkAnswer(df, t.result)
+
+          val queryPlan = df.queryExecution.executedPlan
+
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
+
+          if (isSortMergeForced) {
+            // Confirm proper injection of collation key.
+            checkCollationKeyInQueryPlan(queryPlan, t.collation)
+          }
+          else {
+            // Only if collation doesn't support binary equality, collation key should be injected.
+            if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
+              assert(collectFirst(queryPlan) {
+                case b: BroadcastHashJoinExec => b.leftKeys.head
+              }.head.asInstanceOf[ArrayTransform].function.
+                asInstanceOf[LambdaFunction].function.asInstanceOf[ArrayTransform].function.
+                asInstanceOf[LambdaFunction].function.isInstanceOf[CollationKey])
+            } else {
+              assert(!collectFirst(queryPlan) {
+                case b: BroadcastHashJoinExec => b.leftKeys.head
+              }.head.isInstanceOf[ArrayTransform])
+            }
+          }
         }
       }
-    })
+    }
   }
 
-  test("hash join should respect collation for struct of strings") {
+  test("hash and sort merge join should respect collation for struct of strings") {
     val t1 = "T_1"
     val t2 = "T_2"
 
-    case class HashJoinTestCase[R](collation: String, result: R)
+    case class HashJoinTestCase[R](collation: String, data1 : String, data2: String, result: R)
     val testCases = Seq(
-      HashJoinTestCase("UTF8_BINARY",
+      HashJoinTestCase("UTF8_BINARY", "aa", "AA",
         Seq(Row(Row("aa"), 1, Row("aa"), 2))),
-      HashJoinTestCase("UTF8_LCASE",
+      HashJoinTestCase("UTF8_LCASE", "aa", "AA",
         Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2))),
-      HashJoinTestCase("UNICODE",
+      HashJoinTestCase("UNICODE", "aa", "AA",
         Seq(Row(Row("aa"), 1, Row("aa"), 2))),
-      HashJoinTestCase("UNICODE_CI",
-        Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2)))
+      HashJoinTestCase("UNICODE_CI", "aa", "AA",
+        Seq(Row(Row("aa"), 1, Row("AA"), 2), Row(Row("aa"), 1, Row("aa"), 2))),
+      HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
+        Seq(Row(Row("aa"), 1, Row("AA "), 2), Row(Row("aa"), 1, Row("aa"), 2)))
     )
-    testCases.foreach(t => {
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x STRUCT<f:STRING COLLATE ${t.collation}>, i int) USING PARQUET")
-        sql(s"INSERT INTO $t1 VALUES (named_struct('f', 'aa'), 1)")
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x STRUCT<f:STRING COLLATE ${t.collation}>, i int) USING PARQUET")
+          sql(s"INSERT INTO $t1 VALUES (named_struct('f', '${t.data1}'), 1)")
 
-        sql(s"CREATE TABLE $t2 (y STRUCT<f:STRING COLLATE ${t.collation}>, j int) USING PARQUET")
-        sql(s"INSERT INTO $t2 VALUES (named_struct('f', 'AA'), 2), (named_struct('f', 'aa'), 2)")
+          sql(s"CREATE TABLE $t2 (y STRUCT<f:STRING COLLATE ${t.collation}>, j int) USING PARQUET")
+          sql(s"INSERT INTO $t2 VALUES (named_struct('f', '${t.data2}'), 2)," +
+            s" (named_struct('f', '${t.data1}'), 2)")
 
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
-        checkAnswer(df, t.result)
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+          checkAnswer(df, t.result)
 
-        val queryPlan = df.queryExecution.executedPlan
+          val queryPlan = df.queryExecution.executedPlan
 
-        // Confirm that hash join is used instead of sort merge join.
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: ShuffledJoin => ()
-          }.isEmpty
-        )
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
 
-        // Only if collation doesn't support binary equality, collation key should be injected.
-        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
-          assert(queryPlan.toString().contains("collationkey"))
-        } else {
-          assert(!queryPlan.toString().contains("collationkey"))
+          // Confirm proper injection of collation key.
+          checkCollationKeyInQueryPlan(queryPlan, t.collation)
         }
       }
-    })
+    }
   }
 
-  test("hash join should respect collation for struct of array of struct of strings") {
+  test("hash and sort merge join should respect collation " +
+    "for struct of array of struct of strings") {
     val t1 = "T_1"
     val t2 = "T_2"
 
-    case class HashJoinTestCase[R](collation: String, result: R)
+    case class HashJoinTestCase[R](collation: String, data1: String, data2: String, result: R)
     val testCases = Seq(
-      HashJoinTestCase("UTF8_BINARY",
+      HashJoinTestCase("UTF8_BINARY", "aa", "AA",
         Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
-      HashJoinTestCase("UTF8_LCASE",
+      HashJoinTestCase("UTF8_LCASE", "aa", "AA",
         Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA"))), 2),
           Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
-      HashJoinTestCase("UNICODE",
+      HashJoinTestCase("UNICODE", "aa", "AA",
         Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
-      HashJoinTestCase("UNICODE_CI",
+      HashJoinTestCase("UNICODE_CI", "aa", "AA",
         Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA"))), 2),
+          Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2))),
+      HashJoinTestCase("UNICODE_CI_RTRIM", "aa", "AA ",
+        Seq(Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("AA "))), 2),
           Row(Row(Seq(Row("aa"))), 1, Row(Seq(Row("aa"))), 2)))
     )
-    testCases.foreach(t => {
+
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
-          s"i int) USING PARQUET")
-        sql(s"INSERT INTO $t1 VALUES (named_struct('f', array(named_struct('f', 'aa'))), 1)")
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
+            s"i int) USING PARQUET")
+          sql(s"INSERT INTO $t1 VALUES (named_struct('f', array(named_struct('f', " +
+            s"'${t.data1}'))), 1)")
 
-        sql(s"CREATE TABLE $t2 (y STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
-          s"j int) USING PARQUET")
-        sql(s"INSERT INTO $t2 VALUES (named_struct('f', array(named_struct('f', 'AA'))), 2), " +
-          s"(named_struct('f', array(named_struct('f', 'aa'))), 2)")
+          sql(s"CREATE TABLE $t2 (y STRUCT<f:ARRAY<STRUCT<f:STRING COLLATE ${t.collation}>>>, " +
+            s"j int) USING PARQUET")
+          sql(s"INSERT INTO $t2 VALUES (named_struct('f', array(named_struct('f', " +
+            s"'${t.data2}'))), 2), (named_struct('f', array(named_struct('f', '${t.data1}'))), 2)")
 
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
-        checkAnswer(df, t.result)
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.y")
+          checkAnswer(df, t.result)
 
-        val queryPlan = df.queryExecution.executedPlan
+          val queryPlan = df.queryExecution.executedPlan
 
-        // confirm that hash join is used instead of sort merge join
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: ShuffledJoin => ()
-          }.isEmpty
-        )
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
 
-        // Only if collation doesn't support binary equality, collation key should be injected.
-        if (!CollationFactory.fetchCollation(t.collation).supportsBinaryEquality) {
-          assert(queryPlan.toString().contains("collationkey"))
-        } else {
-          assert(!queryPlan.toString().contains("collationkey"))
+          // Confirm proper injection of collation key.
+          checkCollationKeyInQueryPlan(queryPlan, t.collation)
         }
       }
-    })
+    }
   }
 
-  test("rewrite with collationkey should be an excludable rule") {
+  test("rewrite with collationkey should be a non-excludable rule") {
     val t1 = "T_1"
     val t2 = "T_2"
     val collation = "UTF8_LCASE"
@@ -1354,12 +1877,12 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
         assert(
           collectFirst(queryPlan) {
             case _: HashJoin => ()
-          }.isEmpty
+          }.nonEmpty
         )
         assert(
           collectFirst(queryPlan) {
             case _: SortMergeJoinExec => ()
-          }.nonEmpty
+          }.isEmpty
         )
       }
     }
@@ -1384,48 +1907,50 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       HashMultiJoinTestCase("STRING COLLATE UTF8_BINARY", "STRING COLLATE UTF8_LCASE",
         "'a', 'a', 1", "'a', 'A', 1", Row("a", "a", 1, "a", "A", 1)),
       HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI",
-        "'a', 'a', 1", "'A', 'A', 1", Row("a", "a", 1, "A", "A", 1))
+        "'a', 'a', 1", "'A', 'A', 1", Row("a", "a", 1, "A", "A", 1)),
+      HashMultiJoinTestCase("STRING COLLATE UTF8_LCASE", "STRING COLLATE UNICODE_CI_RTRIM",
+        "'a', 'a', 1", "'A', 'A ', 1", Row("a", "a", 1, "A", "A ", 1))
     )
 
-    testCases.foreach(t => {
+    for {
+      t <- testCases
+      broadcastJoinThreshold <- Seq(-1, SQLConf.get.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+    } {
       withTable(t1, t2) {
-        sql(s"CREATE TABLE $t1 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
-        sql(s"INSERT INTO $t1 VALUES (${t.data1})")
-        sql(s"CREATE TABLE $t2 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
-        sql(s"INSERT INTO $t2 VALUES (${t.data2})")
+        withSQLConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> broadcastJoinThreshold.toString) {
+          sql(s"CREATE TABLE $t1 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
+          sql(s"INSERT INTO $t1 VALUES (${t.data1})")
+          sql(s"CREATE TABLE $t2 (x ${t.type1}, y ${t.type2}, i int) USING PARQUET")
+          sql(s"INSERT INTO $t2 VALUES (${t.data2})")
 
-        val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.x AND $t1.y = $t2.y")
-        checkAnswer(df, t.result)
+          val df = sql(s"SELECT * FROM $t1 JOIN $t2 ON $t1.x = $t2.x AND $t1.y = $t2.y")
+          checkAnswer(df, t.result)
 
-        val queryPlan = df.queryExecution.executedPlan
+          val queryPlan = df.queryExecution.executedPlan
 
-        // confirm that hash join is used instead of sort merge join
-        assert(
-          collectFirst(queryPlan) {
-            case _: HashJoin => ()
-          }.nonEmpty
-        )
-        assert(
-          collectFirst(queryPlan) {
-            case _: SortMergeJoinExec => ()
-          }.isEmpty
-        )
+          // confirm that right kind of join is used.
+          checkRightTypeOfJoinUsed(queryPlan)
+        }
       }
-    })
+    }
   }
 
   test("hll sketch aggregate should respect collation") {
     case class HllSketchAggTestCase[R](c: String, result: R)
     val testCases = Seq(
-      HllSketchAggTestCase("UTF8_BINARY", 4),
-      HllSketchAggTestCase("UTF8_LCASE", 3),
-      HllSketchAggTestCase("UNICODE", 4),
-      HllSketchAggTestCase("UNICODE_CI", 3)
+      HllSketchAggTestCase("UTF8_BINARY", 5),
+      HllSketchAggTestCase("UTF8_BINARY_RTRIM", 4),
+      HllSketchAggTestCase("UTF8_LCASE", 4),
+      HllSketchAggTestCase("UTF8_LCASE_RTRIM", 3),
+      HllSketchAggTestCase("UNICODE", 5),
+      HllSketchAggTestCase("UNICODE_RTRIM", 4),
+      HllSketchAggTestCase("UNICODE_CI", 4),
+      HllSketchAggTestCase("UNICODE_CI_RTRIM", 3)
     )
     testCases.foreach(t => {
       withSQLConf(SqlApiConf.DEFAULT_COLLATION -> t.c) {
         val q = "SELECT hll_sketch_estimate(hll_sketch_agg(col)) FROM " +
-          "VALUES ('a'), ('A'), ('b'), ('b'), ('c') tab(col)"
+          "VALUES ('a'), ('A'), ('b'), ('b'), ('c'), ('c ') tab(col)"
         val df = sql(q)
         checkAnswer(df, Seq(Row(t.result)))
       }
@@ -1433,7 +1958,7 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
   }
 
   test("cache table with collated columns") {
-    val collations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI")
+    val collations = Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI", "SR_CI_AI")
     val lazyOptions = Seq(false, true)
 
     for (
@@ -1465,4 +1990,78 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper {
       }
     }
   }
+
+  test("TVF collations()") {
+    assert(sql("SELECT * FROM collations()").collect().length >= 562)
+
+    // verify that the output ordering is as expected (UTF8_BINARY, UTF8_LCASE, etc.)
+    val df = sql("SELECT * FROM collations() limit 10")
+    val icvVersion = "76.1.0.0"
+    checkAnswer(df,
+      Seq(Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null),
+        Row("SYSTEM", "BUILTIN", "UTF8_LCASE", null, null,
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", null),
+        Row("SYSTEM", "BUILTIN", "UNICODE", null, null,
+          "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "UNICODE_AI", null, null,
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "UNICODE_CI", null, null,
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "UNICODE_CI_AI", null, null,
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "af", "Afrikaans", null,
+          "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "af_AI", "Afrikaans", null,
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "af_CI", "Afrikaans", null,
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "af_CI_AI", "Afrikaans", null,
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion)))
+
+    checkAnswer(sql("SELECT * FROM collations() WHERE NAME LIKE '%UTF8_BINARY%'"),
+      Row("SYSTEM", "BUILTIN", "UTF8_BINARY", null, null,
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", null))
+
+    checkAnswer(sql("SELECT * FROM collations() WHERE NAME LIKE '%zh_Hant_HKG%'"),
+      Seq(Row("SYSTEM", "BUILTIN", "zh_Hant_HKG", "Chinese", "Hong Kong SAR China",
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_AI", "Chinese", "Hong Kong SAR China",
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI", "Chinese", "Hong Kong SAR China",
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hant_HKG_CI_AI", "Chinese", "Hong Kong SAR China",
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion)))
+
+    checkAnswer(sql("SELECT * FROM collations() WHERE COUNTRY = 'Singapore'"),
+      Seq(Row("SYSTEM", "BUILTIN", "zh_Hans_SGP", "Chinese", "Singapore",
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_AI", "Chinese", "Singapore",
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI", "Chinese", "Singapore",
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "zh_Hans_SGP_CI_AI", "Chinese", "Singapore",
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion)))
+
+    checkAnswer(sql("SELECT * FROM collations() WHERE LANGUAGE = 'English' " +
+      "and COUNTRY = 'United States'"),
+      Seq(Row("SYSTEM", "BUILTIN", "en_USA", "English", "United States",
+        "ACCENT_SENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "en_USA_AI", "English", "United States",
+          "ACCENT_INSENSITIVE", "CASE_SENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "en_USA_CI", "English", "United States",
+          "ACCENT_SENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion),
+        Row("SYSTEM", "BUILTIN", "en_USA_CI_AI", "English", "United States",
+          "ACCENT_INSENSITIVE", "CASE_INSENSITIVE", "NO_PAD", icvVersion)))
+
+    checkAnswer(sql("SELECT NAME, LANGUAGE, ACCENT_SENSITIVITY, CASE_SENSITIVITY " +
+      "FROM collations() WHERE COUNTRY = 'United States'"),
+      Seq(Row("en_USA", "English", "ACCENT_SENSITIVE", "CASE_SENSITIVE"),
+        Row("en_USA_AI", "English", "ACCENT_INSENSITIVE", "CASE_SENSITIVE"),
+        Row("en_USA_CI", "English", "ACCENT_SENSITIVE", "CASE_INSENSITIVE"),
+        Row("en_USA_CI_AI", "English", "ACCENT_INSENSITIVE", "CASE_INSENSITIVE")))
+
+    checkAnswer(sql("SELECT NAME FROM collations() WHERE ICU_VERSION is null"),
+      Seq(Row("UTF8_BINARY"), Row("UTF8_LCASE")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
index 936bcc21b763d..9cd35e527df57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ColumnExpressionSuite.scala
@@ -455,7 +455,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.filter($"a".isin($"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = Map(
         "functionName" -> "`in`",
         "dataType" -> "[\"INT\", \"ARRAY<INT>\"]",
@@ -523,7 +523,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               df2.filter($"a".isInCollection(Seq($"b")))
             },
-            errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+            condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
             parameters = Map(
               "functionName" -> "`in`",
               "dataType" -> "[\"INT\", \"ARRAY<INT>\"]",
@@ -734,7 +734,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               sql(s"SELECT *, $f() FROM tab1 JOIN tab2 ON tab1.id = tab2.id")
             },
-            errorClass = "MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION",
+            condition = "MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION",
             parameters = Map("expr" -> s""""$f()""""),
             context = ExpectedContext(
               fragment = s"$f()",
@@ -753,7 +753,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
               exception = intercept[AnalysisException] {
                 sql(stmt)
               },
-              errorClass = "MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION",
+              condition = "MULTI_SOURCES_UNSUPPORTED_FOR_EXPRESSION",
               parameters = Map("expr" -> """"input_file_name()""""),
               context = ExpectedContext(
                 fragment = s"input_file_name()",
@@ -978,15 +978,15 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
   test("SPARK-37646: lit") {
     assert(lit($"foo") == $"foo")
     assert(lit($"foo") == $"foo")
-    assert(lit(1) == Column(Literal(1)))
-    assert(lit(null) == Column(Literal(null, NullType)))
+    assert(lit(1).expr == Literal(1))
+    assert(lit(null).expr == Literal(null, NullType))
   }
 
   test("typedLit") {
     assert(typedLit($"foo") == $"foo")
     assert(typedLit($"foo") == $"foo")
-    assert(typedLit(1) == Column(Literal(1)))
-    assert(typedLit[String](null) == Column(Literal(null, StringType)))
+    assert(typedLit(1).expr == Literal(1))
+    assert(typedLit[String](null).expr == Literal(null, StringType))
 
     val df = Seq(Tuple1(0)).toDF("a")
     // Only check the types `lit` cannot handle
@@ -1055,7 +1055,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         testData.withColumn("key", $"key".withField("a", lit(2)))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"update_fields(key, WithField(2))\"",
         "paramIndex" -> "first",
@@ -1087,14 +1087,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel2.withColumn("a", $"a".withField("x.b", lit(2)))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
 
     checkError(
       exception = intercept[AnalysisException] {
         structLevel3.withColumn("a", $"a".withField("a.x.b", lit(2)))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
   }
 
@@ -1103,7 +1103,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel1.withColumn("a", $"a".withField("b.a", lit(2)))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"update_fields(a.b, WithField(2))\"",
         "paramIndex" -> "first",
@@ -1129,7 +1129,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
         structLevel2.withColumn("a", $"a".withField("a.b", lit(2)))
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`a`", "count" -> "2")
     )
@@ -1532,7 +1532,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.withColumn("a", $"a".withField("a.b.e.f", lit(2)))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`a`", "fields" -> "`a`.`b`"))
   }
 
@@ -1644,14 +1644,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           mixedCaseStructLevel2.withColumn("a", $"a".withField("A.a", lit(2)))
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`A`", "fields" -> "`a`, `B`"))
 
       checkError(
         exception = intercept[AnalysisException] {
           mixedCaseStructLevel2.withColumn("a", $"a".withField("b.a", lit(2)))
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`b`", "fields" -> "`a`, `B`"))
     }
   }
@@ -1687,7 +1687,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
           .select($"struct_col".withField("a.c", lit(3)))
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`a`", "count" -> "2")
     )
@@ -1854,7 +1854,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         testData.withColumn("key", $"key".dropFields("a"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"update_fields(key, dropfield())\"",
         "paramIndex" -> "first",
@@ -1878,14 +1878,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel2.withColumn("a", $"a".dropFields("x.b"))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
 
     checkError(
       exception = intercept[AnalysisException] {
         structLevel3.withColumn("a", $"a".dropFields("a.x.b"))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`x`", "fields" -> "`a`"))
   }
 
@@ -1894,7 +1894,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel1.withColumn("a", $"a".dropFields("b.a"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"update_fields(a.b, dropfield())\"",
         "paramIndex" -> "first",
@@ -1920,7 +1920,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
 
         structLevel2.withColumn("a", $"a".dropFields("a.b"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`a`", "count" -> "2")
     )
@@ -1968,7 +1968,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel1.withColumn("a", $"a".dropFields("a", "b", "c"))
       },
-      errorClass = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
+      condition = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
       parameters = Map("sqlExpr" -> "\"update_fields(a, dropfield(), dropfield(), dropfield())\""),
       context = ExpectedContext(
         fragment = "dropFields",
@@ -2158,14 +2158,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           mixedCaseStructLevel2.withColumn("a", $"a".dropFields("A.a"))
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`A`", "fields" -> "`a`, `B`"))
 
       checkError(
         exception = intercept[AnalysisException] {
           mixedCaseStructLevel2.withColumn("a", $"a".dropFields("b.a"))
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`b`", "fields" -> "`a`, `B`"))
     }
   }
@@ -2243,7 +2243,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         sql("SELECT named_struct('a', 1, 'b', 2) struct_col")
           .select($"struct_col".dropFields("a", "b"))
       },
-      errorClass = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
+      condition = "DATATYPE_MISMATCH.CANNOT_DROP_ALL_FIELDS",
       parameters = Map("sqlExpr" -> "\"update_fields(struct_col, dropfield(), dropfield())\""),
       context = ExpectedContext(
         fragment = "dropFields",
@@ -2270,7 +2270,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
         sql("SELECT named_struct('a', named_struct('b', 1), 'a', named_struct('c', 2)) struct_col")
           .select($"struct_col".dropFields("a.c"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`a`", "count" -> "2")
     )
@@ -2420,7 +2420,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         structLevel1.select($"a".withField("d", lit(4)).withField("e", $"a.d" + 1).as("a"))
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`d`", "fields" -> "`a`, `b`, `c`"),
       context = ExpectedContext(
         fragment = "$",
@@ -2476,7 +2476,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
           .select($"a".dropFields("c").as("a"))
           .select($"a".withField("z", $"a.c")).as("a")
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"),
       context = ExpectedContext(
         fragment = "$",
@@ -2575,7 +2575,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         booleanDf.select(assert_true($"cond", lit(null.asInstanceOf[String]))).collect()
       },
-      errorClass = "USER_RAISED_EXCEPTION",
+      condition = "USER_RAISED_EXCEPTION",
       parameters = Map("errorMessage" -> "null"))
 
     val nullDf = Seq(("first row", None), ("second row", Some(true))).toDF("n", "cond")
@@ -2587,7 +2587,7 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         nullDf.select(assert_true($"cond", $"n")).collect()
       },
-      errorClass = "USER_RAISED_EXCEPTION",
+      condition = "USER_RAISED_EXCEPTION",
       parameters = Map("errorMessage" -> "first row"))
 
     // assert_true(condition)
@@ -2607,14 +2607,14 @@ class ColumnExpressionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         strDf.select(raise_error(lit(null.asInstanceOf[String]))).collect()
       },
-      errorClass = "USER_RAISED_EXCEPTION",
+      condition = "USER_RAISED_EXCEPTION",
       parameters = Map("errorMessage" -> "null"))
 
     checkError(
       exception = intercept[SparkRuntimeException] {
         strDf.select(raise_error($"a")).collect()
       },
-      errorClass = "USER_RAISED_EXCEPTION",
+      condition = "USER_RAISED_EXCEPTION",
       parameters = Map("errorMessage" -> "hello"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
index bb3c00d238ca6..970ed5843b3c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CsvFunctionsSuite.scala
@@ -24,7 +24,8 @@ import java.util.Locale
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.{SparkException, SparkUnsupportedOperationException, SparkUpgradeException}
+import org.apache.spark.{SparkException, SparkRuntimeException,
+  SparkUnsupportedOperationException, SparkUpgradeException}
 import org.apache.spark.sql.errors.DataTypeErrors.toSQLType
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -51,7 +52,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDS().select(from_csv($"value", lit("ARRAY<int>"), Map[String, String]().asJava))
       },
-      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      condition = "INVALID_SCHEMA.NON_STRUCT_TYPE",
       parameters = Map(
         "inputSchema" -> "\"ARRAY<int>\"",
         "dataType" -> "\"ARRAY<INT>\""
@@ -63,7 +64,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDF("csv").selectExpr(s"from_csv(csv, 'ARRAY<int>')")
       },
-      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      condition = "INVALID_SCHEMA.NON_STRUCT_TYPE",
       parameters = Map(
         "inputSchema" -> "\"ARRAY<int>\"",
         "dataType" -> "\"ARRAY<INT>\""
@@ -109,7 +110,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkUpgradeException] {
           df2.collect()
         },
-        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+        condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
         parameters = Map(
           "datetime" -> "'2013-111-11 12:13:14'",
           "config" -> "\"spark.sql.legacy.timeParserPolicy\""))
@@ -184,7 +185,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkUnsupportedOperationException] {
         df.select(from_csv(to_csv($"value"), schema, options)).collect()
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> toSQLType(valueType))
     )
   }
@@ -234,7 +235,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
     val schema = new StructType().add("str", StringType)
     val options = Map("maxCharsPerColumn" -> "2")
 
-    val exception = intercept[SparkException] {
+    val exception = intercept[SparkRuntimeException] {
       df.select(from_csv($"value", schema, options)).collect()
     }.getCause.getMessage
 
@@ -343,7 +344,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkException] {
           df.select(from_csv($"value", schema, Map("mode" -> "FAILFAST"))).collect()
         },
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map("badRecord" -> "[null,null,\"]", "failFastMode" -> "FAILFAST")
       )
 
@@ -351,12 +352,10 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df.select(from_csv($"value", schema, Map("mode" -> "DROPMALFORMED"))).collect()
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1099",
+        condition = "PARSE_MODE_UNSUPPORTED",
         parameters = Map(
-          "funcName" -> "from_csv",
-          "mode" -> "DROPMALFORMED",
-          "permissiveMode" -> "PERMISSIVE",
-          "failFastMode" -> "FAILFAST"))
+          "funcName" -> "`from_csv`",
+          "mode" -> "DROPMALFORMED"))
     }
   }
 
@@ -433,7 +432,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(("1", "i int")).toDF("csv", "schema")
           .select(from_csv($"csv", $"schema", options)).collect()
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"schema\""),
       context = ExpectedContext(fragment = "from_csv", getCurrentClassCallSitePattern)
     )
@@ -442,7 +441,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDF("csv").select(from_csv($"csv", lit(1), options)).collect()
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"1\""),
       context = ExpectedContext(fragment = "from_csv", getCurrentClassCallSitePattern)
     )
@@ -493,14 +492,14 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             df.selectExpr("parsed.a").collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map("badRecord" -> "[1,null]", "failFastMode" -> "FAILFAST"))
 
         checkError(
           exception = intercept[SparkException] {
             df.selectExpr("parsed.b").collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map("badRecord" -> "[1,null]", "failFastMode" -> "FAILFAST"))
       }
     }
@@ -753,7 +752,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(to_csv($"value")).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
       parameters = Map(
         "functionName" -> "`to_csv`",
         "dataType" -> "\"STRUCT<age: BIGINT, name: STRING, v: VARIANT>\"",
@@ -765,7 +764,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkUnsupportedOperationException] {
         df.select(from_csv(lit("data"), valueSchema, Map.empty[String, String])).collect()
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"VARIANT\"")
     )
   }
@@ -776,7 +775,7 @@ class CsvFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(to_csv($"value")).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
       parameters = Map(
         "functionName" -> "`to_csv`",
         "dataType" -> "\"INT\"",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
index 620ee430cab20..6348e5f315395 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala
@@ -24,7 +24,9 @@ import scala.util.Random
 import org.scalatest.matchers.must.Matchers.the
 
 import org.apache.spark.{SparkArithmeticException, SparkRuntimeException}
+import org.apache.spark.sql.catalyst.plans.logical.Expand
 import org.apache.spark.sql.catalyst.util.AUTO_GENERATED_ALIAS
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
@@ -644,10 +646,10 @@ class DataFrameAggregateSuite extends QueryTest
     }
     checkError(
       exception = error,
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNSUPPORTED_INPUT_TYPE",
       parameters = Map(
         "functionName" -> "`collect_set`",
-        "dataType" -> "\"MAP\"",
+        "dataType" -> "\"MAP\" or \"COLLATED STRING\"",
         "sqlExpr" -> "\"collect_set(b)\""
       ),
       context = ExpectedContext(
@@ -724,7 +726,7 @@ class DataFrameAggregateSuite extends QueryTest
       exception = intercept[AnalysisException] {
         testData.groupBy(sum($"key")).count()
       },
-      errorClass = "GROUP_BY_AGGREGATE",
+      condition = "GROUP_BY_AGGREGATE",
       parameters = Map("sqlExpr" -> "sum(key)"),
       context = ExpectedContext(fragment = "sum", callSitePattern = getCurrentClassCallSitePattern)
     )
@@ -984,7 +986,7 @@ class DataFrameAggregateSuite extends QueryTest
       }
       checkError(
         exception = error,
-        errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+        condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
         sqlState = None,
         parameters = Map(
           "functionName" -> "`max_by`",
@@ -1054,7 +1056,7 @@ class DataFrameAggregateSuite extends QueryTest
       }
       checkError(
         exception = error,
-        errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+        condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
         sqlState = None,
         parameters = Map(
           "functionName" -> "`min_by`",
@@ -1185,7 +1187,7 @@ class DataFrameAggregateSuite extends QueryTest
           exception = intercept[AnalysisException] {
             sql("SELECT COUNT_IF(x) FROM tempView")
           },
-          errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+          condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
           sqlState = None,
           parameters = Map(
             "sqlExpr" -> "\"count_if(x)\"",
@@ -1349,7 +1351,7 @@ class DataFrameAggregateSuite extends QueryTest
       exception = intercept[AnalysisException] {
         Seq(Tuple1(Seq(1))).toDF("col").groupBy(struct($"col.a")).count()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"col[a]\"",
         "paramIndex" -> "second",
@@ -1484,15 +1486,22 @@ class DataFrameAggregateSuite extends QueryTest
     val df2 = Seq((Period.ofMonths(Int.MaxValue), Duration.ofDays(106751991)),
       (Period.ofMonths(10), Duration.ofDays(10)))
       .toDF("year-month", "day")
-    val error = intercept[SparkArithmeticException] {
-      checkAnswer(df2.select(sum($"year-month")), Nil)
-    }
-    assert(error.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] integer overflow")
 
-    val error2 = intercept[SparkArithmeticException] {
-      checkAnswer(df2.select(sum($"day")), Nil)
-    }
-    assert(error2.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] long overflow")
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        checkAnswer(df2.select(sum($"year-month")), Nil)
+      },
+      condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
+      parameters = Map("functionName" -> toSQLId("try_add"))
+    )
+
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        checkAnswer(df2.select(sum($"day")), Nil)
+      },
+      condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
+      parameters = Map("functionName" -> toSQLId("try_add"))
+    )
   }
 
   test("SPARK-34837: Support ANSI SQL intervals by the aggregate function `avg`") {
@@ -1619,15 +1628,22 @@ class DataFrameAggregateSuite extends QueryTest
     val df2 = Seq((Period.ofMonths(Int.MaxValue), Duration.ofDays(106751991)),
       (Period.ofMonths(10), Duration.ofDays(10)))
       .toDF("year-month", "day")
-    val error = intercept[SparkArithmeticException] {
-      checkAnswer(df2.select(avg($"year-month")), Nil)
-    }
-    assert(error.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] integer overflow")
 
-    val error2 = intercept[SparkArithmeticException] {
-      checkAnswer(df2.select(avg($"day")), Nil)
-    }
-    assert(error2.getMessage contains "[INTERVAL_ARITHMETIC_OVERFLOW] long overflow")
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        checkAnswer(df2.select(avg($"year-month")), Nil)
+      },
+      condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
+      parameters = Map("functionName" -> toSQLId("try_add"))
+    )
+
+    checkError(
+      exception = intercept[SparkArithmeticException] {
+        checkAnswer(df2.select(avg($"day")), Nil)
+      },
+      condition = "INTERVAL_ARITHMETIC_OVERFLOW.WITH_SUGGESTION",
+      parameters = Map("functionName" -> toSQLId("try_add"))
+    )
 
     val df3 = intervalData.filter($"class" > 4)
     val avgDF3 = df3.select(avg($"year-month"), avg($"day"))
@@ -1923,7 +1939,7 @@ class DataFrameAggregateSuite extends QueryTest
           )
           .collect()
       },
-      errorClass = "HLL_INVALID_LG_K",
+      condition = "HLL_INVALID_LG_K",
       parameters = Map(
         "function" -> "`hll_sketch_agg`",
         "min" -> "4",
@@ -1939,7 +1955,7 @@ class DataFrameAggregateSuite extends QueryTest
           )
           .collect()
       },
-      errorClass = "HLL_INVALID_LG_K",
+      condition = "HLL_INVALID_LG_K",
       parameters = Map(
         "function" -> "`hll_sketch_agg`",
         "min" -> "4",
@@ -1962,7 +1978,7 @@ class DataFrameAggregateSuite extends QueryTest
           .withColumn("union", hll_union("hllsketch_left", "hllsketch_right"))
           .collect()
       },
-      errorClass = "HLL_UNION_DIFFERENT_LG_K",
+      condition = "HLL_UNION_DIFFERENT_LG_K",
       parameters = Map(
         "left" -> "12",
         "right" -> "20",
@@ -1985,7 +2001,7 @@ class DataFrameAggregateSuite extends QueryTest
           )
           .collect()
       },
-      errorClass = "HLL_UNION_DIFFERENT_LG_K",
+      condition = "HLL_UNION_DIFFERENT_LG_K",
       parameters = Map(
         "left" -> "12",
         "right" -> "20",
@@ -2006,7 +2022,7 @@ class DataFrameAggregateSuite extends QueryTest
             |""".stripMargin)
         checkAnswer(res, Nil)
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"hll_sketch_agg(value, text)\"",
         "paramIndex" -> "second",
@@ -2035,7 +2051,7 @@ class DataFrameAggregateSuite extends QueryTest
             |""".stripMargin)
         checkAnswer(res, Nil)
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"hll_union_agg(sketch, Hll_4)\"",
         "paramIndex" -> "second",
@@ -2077,7 +2093,7 @@ class DataFrameAggregateSuite extends QueryTest
             | cte1 join cte2 on cte1.id = cte2.id
             |""".stripMargin).collect()
       },
-      errorClass = "HLL_UNION_DIFFERENT_LG_K",
+      condition = "HLL_UNION_DIFFERENT_LG_K",
       parameters = Map(
         "left" -> "12",
         "right" -> "20",
@@ -2113,7 +2129,7 @@ class DataFrameAggregateSuite extends QueryTest
             |group by 1
             |""".stripMargin).collect()
       },
-      errorClass = "HLL_UNION_DIFFERENT_LG_K",
+      condition = "HLL_UNION_DIFFERENT_LG_K",
       parameters = Map(
         "left" -> "12",
         "right" -> "20",
@@ -2161,8 +2177,9 @@ class DataFrameAggregateSuite extends QueryTest
     )
   }
 
-  private def assertAggregateOnDataframe(df: DataFrame,
-    expected: Int, aggregateColumn: String): Unit = {
+  private def assertAggregateOnDataframe(
+      df: => DataFrame,
+      expected: Int): Unit = {
     val configurations = Seq(
       Seq.empty[(String, String)], // hash aggregate is used by default
       Seq(SQLConf.CODEGEN_FACTORY_MODE.key -> "NO_CODEGEN",
@@ -2174,32 +2191,64 @@ class DataFrameAggregateSuite extends QueryTest
       Seq("spark.sql.test.forceApplySortAggregate" -> "true")
     )
 
-    for (conf <- configurations) {
-      withSQLConf(conf: _*) {
-        assert(createAggregate(df).count() == expected)
+    // Make tests faster
+    withSQLConf(SQLConf.SHUFFLE_PARTITIONS.key -> "3") {
+      for (conf <- configurations) {
+        withSQLConf(conf: _*) {
+          assert(df.count() == expected, df.queryExecution.simpleString)
+        }
       }
     }
-
-    def createAggregate(df: DataFrame): DataFrame = df.groupBy(aggregateColumn).agg(count("*"))
   }
 
   test("SPARK-47430 Support GROUP BY MapType") {
-    val numRows = 50
-
-    val dfSameInt = (0 until numRows)
-      .map(_ => Tuple1(Map(1 -> 1)))
-      .toDF("m0")
-    assertAggregateOnDataframe(dfSameInt, 1, "m0")
-
-    val dfSameFloat = (0 until numRows)
-      .map(i => Tuple1(Map(if (i % 2 == 0) 1 -> 0.0 else 1 -> -0.0 )))
-      .toDF("m0")
-    assertAggregateOnDataframe(dfSameFloat, 1, "m0")
-
-    val dfDifferent = (0 until numRows)
-      .map(i => Tuple1(Map(i -> i)))
-      .toDF("m0")
-    assertAggregateOnDataframe(dfDifferent, numRows, "m0")
+    def genMapData(dataType: String): String = {
+      s"""
+        |case when id % 4 == 0 then map()
+        |when id % 4 == 1 then map(cast(0 as $dataType), cast(0 as $dataType))
+        |when id % 4 == 2 then map(cast(0 as $dataType), cast(0 as $dataType),
+        |  cast(1 as $dataType), cast(1 as $dataType))
+        |else map(cast(1 as $dataType), cast(1 as $dataType),
+        |  cast(0 as $dataType), cast(0 as $dataType))
+        |end
+        |""".stripMargin
+    }
+    Seq("int", "long", "float", "double", "decimal(10, 2)", "string", "varchar(6)").foreach { dt =>
+      withTempView("v") {
+        spark.range(20)
+          .selectExpr(
+            s"cast(1 as $dt) as c1",
+            s"${genMapData(dt)} as c2",
+            "map(c1, null) as c3",
+            s"cast(null as map<$dt, $dt>) as c4")
+          .createOrReplaceTempView("v")
+
+        assertAggregateOnDataframe(
+          spark.sql("SELECT count(*) FROM v GROUP BY c2"),
+          3)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT c2, count(*) FROM v GROUP BY c2"),
+          3)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT c1, c2, count(*) FROM v GROUP BY c1, c2"),
+          3)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT map(c1, c1) FROM v GROUP BY map(c1, c1)"),
+          1)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT map(c1, c1), count(*) FROM v GROUP BY map(c1, c1)"),
+          1)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT c3, count(*) FROM v GROUP BY c3"),
+          1)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT c4, count(*) FROM v GROUP BY c4"),
+          1)
+        assertAggregateOnDataframe(
+          spark.sql("SELECT c1, c2, c3, c4, count(*) FROM v GROUP BY c1, c2, c3, c4"),
+          3)
+      }
+    }
   }
 
   test("SPARK-46536 Support GROUP BY CalendarIntervalType") {
@@ -2208,12 +2257,16 @@ class DataFrameAggregateSuite extends QueryTest
     val dfSame = (0 until numRows)
       .map(_ => Tuple1(new CalendarInterval(1, 2, 3)))
       .toDF("c0")
-    assertAggregateOnDataframe(dfSame, 1, "c0")
+      .groupBy($"c0")
+      .count()
+    assertAggregateOnDataframe(dfSame, 1)
 
     val dfDifferent = (0 until numRows)
       .map(i => Tuple1(new CalendarInterval(i, i, i)))
       .toDF("c0")
-    assertAggregateOnDataframe(dfDifferent, numRows, "c0")
+      .groupBy($"c0")
+      .count()
+    assertAggregateOnDataframe(dfDifferent, numRows)
   }
 
   test("SPARK-46779: Group by subquery with a cached relation") {
@@ -2232,7 +2285,7 @@ class DataFrameAggregateSuite extends QueryTest
   }
 
   private def assertDecimalSumOverflow(
-      df: DataFrame, ansiEnabled: Boolean, expectedAnswer: Row): Unit = {
+      df: DataFrame, ansiEnabled: Boolean, fnName: String, expectedAnswer: Row): Unit = {
     if (!ansiEnabled) {
       checkAnswer(df, expectedAnswer)
     } else {
@@ -2240,11 +2293,12 @@ class DataFrameAggregateSuite extends QueryTest
         df.collect()
       }
       assert(e.getMessage.contains("cannot be represented as Decimal") ||
-        e.getMessage.contains("Overflow in sum of decimals"))
+        e.getMessage.contains(s"Overflow in sum of decimals. Use 'try_$fnName' to tolerate " +
+          s"overflow and return NULL instead."))
     }
   }
 
-  def checkAggResultsForDecimalOverflow(aggFn: Column => Column): Unit = {
+  def checkAggResultsForDecimalOverflow(aggFn: Column => Column, fnName: String): Unit = {
     Seq("true", "false").foreach { wholeStageEnabled =>
       withSQLConf((SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key, wholeStageEnabled)) {
         Seq(true, false).foreach { ansiEnabled =>
@@ -2268,27 +2322,27 @@ class DataFrameAggregateSuite extends QueryTest
               join(df, "intNum").agg(aggFn($"decNum"))
 
             val expectedAnswer = Row(null)
-            assertDecimalSumOverflow(df2, ansiEnabled, expectedAnswer)
+            assertDecimalSumOverflow(df2, ansiEnabled, fnName, expectedAnswer)
 
             val decStr = "1" + "0" * 19
             val d1 = spark.range(0, 12, 1, 1)
             val d2 = d1.select(expr(s"cast('$decStr' as decimal (38, 18)) as d")).agg(aggFn($"d"))
-            assertDecimalSumOverflow(d2, ansiEnabled, expectedAnswer)
+            assertDecimalSumOverflow(d2, ansiEnabled, fnName, expectedAnswer)
 
             val d3 = spark.range(0, 1, 1, 1).union(spark.range(0, 11, 1, 1))
             val d4 = d3.select(expr(s"cast('$decStr' as decimal (38, 18)) as d")).agg(aggFn($"d"))
-            assertDecimalSumOverflow(d4, ansiEnabled, expectedAnswer)
+            assertDecimalSumOverflow(d4, ansiEnabled, fnName, expectedAnswer)
 
             val d5 = d3.select(expr(s"cast('$decStr' as decimal (38, 18)) as d"),
               lit(1).as("key")).groupBy("key").agg(aggFn($"d").alias("aggd")).select($"aggd")
-            assertDecimalSumOverflow(d5, ansiEnabled, expectedAnswer)
+            assertDecimalSumOverflow(d5, ansiEnabled, fnName, expectedAnswer)
 
             val nullsDf = spark.range(1, 4, 1).select(expr(s"cast(null as decimal(38,18)) as d"))
 
             val largeDecimals = Seq(BigDecimal("1"* 20 + ".123"), BigDecimal("9"* 20 + ".123")).
               toDF("d")
             assertDecimalSumOverflow(
-              nullsDf.union(largeDecimals).agg(aggFn($"d")), ansiEnabled, expectedAnswer)
+              nullsDf.union(largeDecimals).agg(aggFn($"d")), ansiEnabled, fnName, expectedAnswer)
 
             val df3 = Seq(
               (BigDecimal("10000000000000000000"), 1),
@@ -2306,9 +2360,9 @@ class DataFrameAggregateSuite extends QueryTest
               (BigDecimal("20000000000000000000"), 2)).toDF("decNum", "intNum")
 
             val df6 = df3.union(df4).union(df5)
-            val df7 = df6.groupBy("intNum").agg(sum("decNum"), countDistinct("decNum")).
+            val df7 = df6.groupBy("intNum").agg(aggFn($"decNum"), countDistinct("decNum")).
               filter("intNum == 1")
-            assertDecimalSumOverflow(df7, ansiEnabled, Row(1, null, 2))
+            assertDecimalSumOverflow(df7, ansiEnabled, fnName, Row(1, null, 2))
           }
         }
       }
@@ -2316,11 +2370,11 @@ class DataFrameAggregateSuite extends QueryTest
   }
 
   test("SPARK-28067: Aggregate sum should not return wrong results for decimal overflow") {
-    checkAggResultsForDecimalOverflow(c => sum(c))
+    checkAggResultsForDecimalOverflow(c => sum(c), "sum")
   }
 
   test("SPARK-35955: Aggregate avg should not return wrong results for decimal overflow") {
-    checkAggResultsForDecimalOverflow(c => avg(c))
+    checkAggResultsForDecimalOverflow(c => avg(c), "avg")
   }
 
   test("SPARK-28224: Aggregate sum big decimal overflow") {
@@ -2331,7 +2385,7 @@ class DataFrameAggregateSuite extends QueryTest
     Seq(true, false).foreach { ansiEnabled =>
       withSQLConf((SQLConf.ANSI_ENABLED.key, ansiEnabled.toString)) {
         val structDf = largeDecimals.select("a").agg(sum("a"))
-        assertDecimalSumOverflow(structDf, ansiEnabled, Row(null))
+        assertDecimalSumOverflow(structDf, ansiEnabled, "sum", Row(null))
       }
     }
   }
@@ -2339,6 +2393,140 @@ class DataFrameAggregateSuite extends QueryTest
   test("SPARK-32761: aggregating multiple distinct CONSTANT columns") {
      checkAnswer(sql("select count(distinct 2), count(distinct 2,3)"), Row(1, 1))
   }
+
+  test("aggregating with various distinct expressions") {
+    abstract class AggregateTestCaseBase(
+        val query: String,
+        val resultSeq: Seq[Seq[Row]],
+        val hasExpandNodeInPlan: Boolean)
+    case class AggregateTestCase(
+        override val query: String,
+        override val resultSeq: Seq[Seq[Row]],
+        override val hasExpandNodeInPlan: Boolean)
+      extends AggregateTestCaseBase(query, resultSeq, hasExpandNodeInPlan)
+    case class AggregateTestCaseDefault(
+        override val query: String)
+      extends AggregateTestCaseBase(
+        query,
+        Seq(Seq(Row(0)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = true)
+
+    val t = "t"
+    val testCases: Seq[AggregateTestCaseBase] = Seq(
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1 + 2) FROM $t"
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1, 2, 1 + 2) FROM $t"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(1), COUNT(DISTINCT 1) FROM $t",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(2, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT 1, "col") FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT collation("abc")) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT current_date()) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT array(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT map(1, 2)[1]) FROM $t"""
+      ),
+      AggregateTestCaseDefault(
+        s"""SELECT COUNT(DISTINCT struct(1, 2).col1) FROM $t"""
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t GROUP BY col",
+        Seq(Seq(), Seq(Row(1)), Seq(Row(1), Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 1"
+      ),
+      AggregateTestCase(
+        s"SELECT COUNT(DISTINCT 1) FROM $t WHERE 1 = 0",
+        Seq(Seq(Row(0)), Seq(Row(0)), Seq(Row(0))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(DISTINCT 1) FROM (SELECT COUNT(1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCase(
+        s"SELECT SUM(1) FROM (SELECT COUNT(DISTINCT 1) FROM $t)",
+        Seq(Seq(Row(1)), Seq(Row(1)), Seq(Row(1))),
+        hasExpandNodeInPlan = false
+      ),
+      AggregateTestCaseDefault(
+        s"SELECT SUM(x) FROM (SELECT COUNT(DISTINCT 1) AS x FROM $t)"),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT "col") FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 1))),
+        hasExpandNodeInPlan = true
+      ),
+      AggregateTestCase(
+        s"""SELECT COUNT(DISTINCT 1), COUNT(DISTINCT col) FROM $t""",
+        Seq(Seq(Row(0, 0)), Seq(Row(1, 1)), Seq(Row(1, 2))),
+        hasExpandNodeInPlan = true
+      )
+    )
+    withTable(t) {
+      sql(s"create table $t(col int) using parquet")
+      Seq(0, 1, 2).foreach(columnValue => {
+        if (columnValue != 0) {
+          sql(s"insert into $t(col) values($columnValue)")
+        }
+        testCases.foreach(testCase => {
+          val query = sql(testCase.query)
+          checkAnswer(query, testCase.resultSeq(columnValue))
+          val hasExpandNodeInPlan = query.queryExecution.optimizedPlan.collectFirst {
+            case _: Expand => true
+          }.nonEmpty
+          assert(hasExpandNodeInPlan == testCase.hasExpandNodeInPlan)
+        })
+      })
+    }
+  }
+
+  test("SPARK-49261: Literals in grouping expressions shouldn't result in unresolved aggregation") {
+    val data = Seq((1, 1.001d, 2), (2, 3.001d, 4), (2, 3.001, 4)).toDF("a", "b", "c")
+    withTempView("v1") {
+      data.createOrReplaceTempView("v1")
+      val df =
+        sql("""SELECT
+              |  ROUND(SUM(b), 6) AS sum1,
+              |  COUNT(DISTINCT a) AS count1,
+              |  COUNT(DISTINCT c) AS count2
+              |FROM (
+              |  SELECT
+              |    6 AS gb,
+              |    *
+              |  FROM v1
+              |)
+              |GROUP BY a, gb
+              |""".stripMargin)
+      checkAnswer(df, Row(1.001d, 1, 1) :: Row(6.002d, 1, 1) :: Nil)
+    }
+  }
 }
 
 case class B(c: Option[Double])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
index a03f083123558..5f0ae918524e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameAsOfJoinSuite.scala
@@ -108,7 +108,7 @@ class DataFrameAsOfJoinSuite extends QueryTest
           joinType = "inner", tolerance = df1.col("b"), allowExactMatches = true,
           direction = "backward")
       },
-      errorClass = "AS_OF_JOIN.TOLERANCE_IS_UNFOLDABLE",
+      condition = "AS_OF_JOIN.TOLERANCE_IS_UNFOLDABLE",
       parameters = Map.empty)
   }
 
@@ -120,7 +120,7 @@ class DataFrameAsOfJoinSuite extends QueryTest
           joinType = "inner", tolerance = lit(-1), allowExactMatches = true,
           direction = "backward")
       },
-      errorClass = "AS_OF_JOIN.TOLERANCE_IS_NON_NEGATIVE",
+      condition = "AS_OF_JOIN.TOLERANCE_IS_NON_NEGATIVE",
       parameters = Map.empty)
   }
 
@@ -133,7 +133,7 @@ class DataFrameAsOfJoinSuite extends QueryTest
           joinType = "inner", tolerance = lit(-1), allowExactMatches = true,
           direction = direction)
       },
-      errorClass = "AS_OF_JOIN.UNSUPPORTED_DIRECTION",
+      condition = "AS_OF_JOIN.UNSUPPORTED_DIRECTION",
       sqlState = "42604",
       parameters = Map(
         "direction" -> direction,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
index d982a000ad374..48ea0e01a4372 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameComplexTypeSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.expressions.objects.MapObjects
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{ArrayType, BooleanType, Decimal, DoubleType, IntegerType, MapType, StringType, StructField, StructType}
@@ -38,6 +39,15 @@ import org.apache.spark.unsafe.types.CalendarInterval
 class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
+  test("ArrayTransform with scan input") {
+    withTempPath { f =>
+      spark.sql("select array(array(1, null, 3), array(4, 5, null), array(null, 8, 9)) as a")
+        .write.parquet(f.getAbsolutePath)
+      val df = spark.read.parquet(f.getAbsolutePath).selectExpr("transform(a, (x, i) -> x)")
+      checkAnswer(df, Row(Seq(Seq(1, null, 3), Seq(4, 5, null), Seq(null, 8, 9))))
+    }
+  }
+
   test("UDF on struct") {
     val f = udf((a: String) => a)
     val df = sparkContext.parallelize(Seq((1, 1))).toDF("a", "b")
@@ -82,8 +92,8 @@ class DataFrameComplexTypeSuite extends QueryTest with SharedSparkSession {
 
       // items: Seq[Int] => items.map { item => Seq(Struct(item)) }
       val result = df.select(
-        new Column(MapObjects(
-          (item: Expression) => array(struct(new Column(item))).expr,
+        column(MapObjects(
+          (item: Expression) => array(struct(column(item))).expr,
           $"items".expr,
           df.schema("items").dataType.asInstanceOf[ArrayType].elementType
         )) as "items"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
index 830becc84c604..975a82e26f4eb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala
@@ -17,22 +17,22 @@
 
 package org.apache.spark.sql
 
-import java.io.File
-import java.lang.reflect.Modifier
 import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 
+import scala.reflect.runtime.universe.runtimeMirror
 import scala.util.Random
 
-import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkRuntimeException}
+import org.apache.spark.{QueryContextType, SPARK_DOC_ROOT, SparkException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.analysis.{FunctionRegistry, UnresolvedAttribute}
-import org.apache.spark.sql.catalyst.expressions.{Alias, ArraysZip, AttributeReference, Expression, NamedExpression, UnaryExpression}
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
+import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, UnaryExpression}
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.codegen.CodegenFallback
 import org.apache.spark.sql.catalyst.plans.logical.OneRowRelation
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils.{withDefaultTimeZone, UTC}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -45,7 +45,7 @@ import org.apache.spark.tags.ExtendedSQLTest
 class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   import testImplicits._
 
-  test("DataFrame function and SQL functon parity") {
+  test("DataFrame function and SQL function parity") {
     // This test compares the available list of DataFrame functions in
     // org.apache.spark.sql.functions with the SQL function registry. This attempts to verify that
     // the DataFrame functions are a subset of the functions in the SQL function registry (subject
@@ -82,7 +82,6 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       "bucket", "days", "hours", "months", "years", // Datasource v2 partition transformations
       "product", // Discussed in https://github.com/apache/spark/pull/30745
       "unwrap_udt",
-      "collect_top_k",
       "timestamp_add",
       "timestamp_diff"
     )
@@ -92,10 +91,13 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     val word_pattern = """\w*"""
 
     // Set of DataFrame functions in org.apache.spark.sql.functions
-    val dataFrameFunctions = functions.getClass
-      .getDeclaredMethods
-      .filter(m => Modifier.isPublic(m.getModifiers))
-      .map(_.getName)
+    val dataFrameFunctions = runtimeMirror(getClass.getClassLoader)
+      .reflect(functions)
+      .symbol
+      .typeSignature
+      .decls
+      .filter(s => s.isMethod && s.isPublic)
+      .map(_.name.toString)
       .toSet
       .filter(_.matches(word_pattern))
       .diff(excludedDataFrameFunctions)
@@ -166,7 +168,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df4.select(map_from_arrays($"k", $"v"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"map_from_arrays(k, v)\"",
         "paramIndex" -> "first",
@@ -185,7 +187,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         df5.select(map_from_arrays($"k", $"v")).collect()
       },
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
 
@@ -313,6 +315,44 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(isnotnull(col("a"))), Seq(Row(false)))
   }
 
+  test("nullif function") {
+    Seq(true, false).foreach { alwaysInlineCommonExpr =>
+      withSQLConf(SQLConf.ALWAYS_INLINE_COMMON_EXPR.key -> alwaysInlineCommonExpr.toString) {
+        Seq(
+          "SELECT NULLIF(1, 1)" -> Seq(Row(null)),
+          "SELECT NULLIF(1, 2)" -> Seq(Row(1)),
+          "SELECT NULLIF(NULL, 1)" -> Seq(Row(null)),
+          "SELECT NULLIF(1, NULL)" -> Seq(Row(1)),
+          "SELECT NULLIF(NULL, NULL)" -> Seq(Row(null)),
+          "SELECT NULLIF('abc', 'abc')" -> Seq(Row(null)),
+          "SELECT NULLIF('abc', 'xyz')" -> Seq(Row("abc")),
+          "SELECT NULLIF(id, 1) " +
+            "FROM range(10) " +
+            "GROUP BY NULLIF(id, 1)" -> Seq(Row(null), Row(2), Row(3), Row(4), Row(5), Row(6),
+            Row(7), Row(8), Row(9), Row(0)),
+          "SELECT NULLIF(id, 1), COUNT(*)" +
+            "FROM range(10) " +
+            "GROUP BY NULLIF(id, 1) " +
+            "HAVING COUNT(*) > 1" -> Seq.empty[Row]
+        ).foreach {
+          case (sqlText, expected) => checkAnswer(sql(sqlText), expected)
+        }
+
+        checkError(
+         exception = intercept[AnalysisException] {
+           sql("SELECT NULLIF(id, 1), COUNT(*) " +
+             "FROM range(10) " +
+             "GROUP BY NULLIF(id, 2)")
+         },
+         condition = "MISSING_AGGREGATION",
+         parameters = Map(
+           "expression" -> "\"id\"",
+           "expressionAnyValue" -> "\"any_value(id)\"")
+        )
+      }
+    }
+  }
+
   test("equal_null function") {
     val df = Seq[(Integer, Integer)]((null, 8)).toDF("a", "b")
     checkAnswer(df.selectExpr("equal_null(a, b)"), Seq(Row(false)))
@@ -322,13 +362,64 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(equal_null(col("a"), col("a"))), Seq(Row(true)))
   }
 
-  test("nullif function") {
-    val df = Seq((5, 8)).toDF("a", "b")
-    checkAnswer(df.selectExpr("nullif(5, 8)"), Seq(Row(5)))
-    checkAnswer(df.select(nullif(lit(5), lit(8))), Seq(Row(5)))
-
-    checkAnswer(df.selectExpr("nullif(a, a)"), Seq(Row(null)))
-    checkAnswer(df.select(nullif(lit(5), lit(5))), Seq(Row(null)))
+  test("nullifzero function") {
+    withTable("t") {
+      // Here we exercise a non-nullable, non-foldable column.
+      sql("create table t(col int not null) using csv")
+      sql("insert into t values (0)")
+      val df = sql("select col from t")
+      checkAnswer(df.select(nullifzero($"col")), Seq(Row(null)))
+    }
+    // Here we exercise invalid cases including types that do not support ordering.
+    val df = Seq((0)).toDF("a")
+    var expr = nullifzero(map(lit(1), lit("a")))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+      parameters = Map(
+        "left" -> "\"MAP<INT, STRING>\"",
+        "right" -> "\"INT\"",
+        "sqlExpr" -> "\"(map(1, a) = 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "nullifzero",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = nullifzero(array(lit(1), lit(2)))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+      parameters = Map(
+        "left" -> "\"ARRAY<INT>\"",
+        "right" -> "\"INT\"",
+        "sqlExpr" -> "\"(array(1, 2) = 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "nullifzero",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = nullifzero(Literal.create(20201231, DateType))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+      parameters = Map(
+        "left" -> "\"DATE\"",
+        "right" -> "\"INT\"",
+        "sqlExpr" ->  "\"(DATE '+57279-02-03' = 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "nullifzero",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
   }
 
   test("nvl") {
@@ -349,6 +440,170 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(nvl2(col("b"), col("a"), col("c"))), Seq(Row(null)))
   }
 
+  test("randstr function") {
+    withTable("t") {
+      sql("create table t(col int not null) using csv")
+      sql("insert into t values (0)")
+      val df = sql("select col from t")
+      checkAnswer(
+        df.select(randstr(lit(5), lit(0)).alias("x")).select(length(col("x"))),
+        Seq(Row(5)))
+      // The random seed is optional.
+      checkAnswer(
+        df.select(randstr(lit(5)).alias("x")).select(length(col("x"))),
+        Seq(Row(5)))
+    }
+    // Here we exercise some error cases.
+    val df = Seq((0)).toDF("a")
+    var expr = randstr(lit(10), lit("a"))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"randstr(10, a)\"",
+        "paramIndex" -> "second",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "INT or SMALLINT"),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "randstr",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = randstr(col("a"), lit(10))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "inputName" -> "`length`",
+        "inputType" -> "INT or SMALLINT",
+        "inputExpr" -> "\"a\"",
+        "sqlExpr" -> "\"randstr(a, 10)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "randstr",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+  }
+
+  test("uniform function") {
+    withTable("t") {
+      sql("create table t(col int not null) using csv")
+      sql("insert into t values (0)")
+      val df = sql("select col from t")
+      checkAnswer(
+        df.select(uniform(lit(10), lit(20), lit(0)).alias("x")).selectExpr("x > 5"),
+        Seq(Row(true)))
+      // The random seed is optional.
+      checkAnswer(
+        df.select(uniform(lit(10), lit(20)).alias("x")).selectExpr("x > 5"),
+        Seq(Row(true)))
+    }
+    // Here we exercise some error cases.
+    val df = Seq((0)).toDF("a")
+    var expr = uniform(lit(10), lit("a"), lit(1))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "\"uniform(10, a, 1)\"",
+        "paramIndex" -> "second",
+        "inputSql" -> "\"a\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "integer or floating-point"),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "uniform",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = uniform(col("a"), lit(10), lit(1))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      parameters = Map(
+        "inputName" -> "`min`",
+        "inputType" -> "integer or floating-point",
+        "inputExpr" -> "\"a\"",
+        "sqlExpr" -> "\"uniform(a, 10, 1)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "uniform",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+  }
+
+  test("zeroifnull function") {
+    withTable("t") {
+      // Here we exercise a non-nullable, non-foldable column.
+      sql("create table t(col int not null) using csv")
+      sql("insert into t values (0)")
+      val df = sql("select col from t")
+      checkAnswer(df.select(zeroifnull($"col")), Seq(Row(0)))
+    }
+    // Here we exercise invalid cases including types that do not support ordering.
+    val df = Seq((0)).toDF("a")
+    var expr = zeroifnull(map(lit(1), lit("a")))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`coalesce`",
+        "dataType" -> "(\"MAP<INT, STRING>\" or \"INT\")",
+        "sqlExpr" -> "\"coalesce(map(1, a), 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "zeroifnull",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = zeroifnull(array(lit(1), lit(2)))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`coalesce`",
+        "dataType" -> "(\"ARRAY<INT>\" or \"INT\")",
+        "sqlExpr" -> "\"coalesce(array(1, 2), 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "zeroifnull",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+    expr = zeroifnull(Literal.create(20201231, DateType))
+    checkError(
+      intercept[AnalysisException](df.select(expr)),
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      parameters = Map(
+        "functionName" -> "`coalesce`",
+        "dataType" -> "(\"DATE\" or \"INT\")",
+        "sqlExpr" -> "\"coalesce(DATE '+57279-02-03', 0)\""),
+      context = ExpectedContext(
+        contextType = QueryContextType.DataFrame,
+        fragment = "zeroifnull",
+        objectType = "",
+        objectName = "",
+        callSitePattern = "",
+        startIndex = 0,
+        stopIndex = 0))
+  }
+
   test("misc md5 function") {
     val df = Seq(("ABC", Array[Byte](1, 2, 3, 4, 5, 6))).toDF("a", "b")
     checkAnswer(
@@ -766,7 +1021,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(array_sort(col("a"), (x, y) => x - y))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> """"array_sort\(a, lambdafunction\(`-`\(x_\d+, y_\d+\), x_\d+, y_\d+\)\)"""",
         "paramIndex" -> "first",
@@ -833,7 +1088,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkError(
       exception = error,
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"sort_array(a, true)\"",
         "paramIndex" -> "first",
@@ -844,6 +1099,36 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       queryContext = Array(ExpectedContext("", "", 0, 12, "sort_array(a)"))
     )
 
+    val df4 = Seq((Array[Int](2, 1, 3), true), (Array.empty[Int], false)).toDF("a", "b")
+    checkError(
+      exception = intercept[AnalysisException] {
+        df4.selectExpr("sort_array(a, b)").collect()
+      },
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      sqlState = "42K09",
+      parameters = Map(
+        "inputName" -> "`ascendingOrder`",
+        "inputType" -> "\"BOOLEAN\"",
+        "inputExpr" -> "\"b\"",
+        "sqlExpr" -> "\"sort_array(a, b)\""),
+      context = ExpectedContext(fragment = "sort_array(a, b)", start = 0, stop = 15)
+    )
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        df4.selectExpr("sort_array(a, 'A')").collect()
+      },
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      sqlState = "42K09",
+      parameters = Map(
+        "sqlExpr" -> "\"sort_array(a, A)\"",
+        "paramIndex" -> "second",
+        "inputSql" -> "\"A\"",
+        "inputType" -> "\"STRING\"",
+        "requiredType" -> "\"BOOLEAN\""),
+      context = ExpectedContext(fragment = "sort_array(a, 'A')", start = 0, stop = 17)
+    )
+
     checkAnswer(
       df.select(array_sort($"a"), array_sort($"b")),
       Seq(
@@ -869,7 +1154,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("array_sort(a)").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"array_sort(a, lambdafunction((IF(((left IS NULL) AND (right IS NULL)), 0, (IF((left IS NULL), 1, (IF((right IS NULL), -1, (IF((left < right), -1, (IF((left > right), 1, 0)))))))))), left, right))\"",
@@ -974,75 +1259,40 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-35876: arrays_zip should retain field names") {
-    withTempDir { dir =>
-      val df = spark.sparkContext.parallelize(
-        Seq((Seq(9001, 9002, 9003), Seq(4, 5, 6)))).toDF("val1", "val2")
-      val qualifiedDF = df.as("foo")
-
-      // Fields are UnresolvedAttribute
-      val zippedDF1 =
-        qualifiedDF.select(Column(ArraysZip(Seq($"foo.val1".expr, $"foo.val2".expr))) as "zipped")
-      val maybeAlias1 = zippedDF1.queryExecution.logical.expressions.head
-      assert(maybeAlias1.isInstanceOf[Alias])
-      val maybeArraysZip1 = maybeAlias1.children.head
-      assert(maybeArraysZip1.isInstanceOf[ArraysZip])
-      assert(maybeArraysZip1.children.forall(_.isInstanceOf[UnresolvedAttribute]))
-      val file1 = new File(dir, "arrays_zip1")
-      zippedDF1.write.parquet(file1.getAbsolutePath)
-      val restoredDF1 = spark.read.parquet(file1.getAbsolutePath)
-      val fieldNames1 = restoredDF1.schema.head.dataType.asInstanceOf[ArrayType]
-        .elementType.asInstanceOf[StructType].fieldNames
-      assert(fieldNames1.toSeq === Seq("val1", "val2"))
-
-      // Fields are resolved NamedExpression
-      val zippedDF2 =
-        df.select(Column(ArraysZip(Seq(df("val1").expr, df("val2").expr))) as "zipped")
-      val maybeAlias2 = zippedDF2.queryExecution.logical.expressions.head
-      assert(maybeAlias2.isInstanceOf[Alias])
-      val maybeArraysZip2 = maybeAlias2.children.head
-      assert(maybeArraysZip2.isInstanceOf[ArraysZip])
-      assert(maybeArraysZip2.children.forall(
-        e => e.isInstanceOf[AttributeReference] && e.resolved))
-      val file2 = new File(dir, "arrays_zip2")
-      zippedDF2.write.parquet(file2.getAbsolutePath)
-      val restoredDF2 = spark.read.parquet(file2.getAbsolutePath)
-      val fieldNames2 = restoredDF2.schema.head.dataType.asInstanceOf[ArrayType]
-        .elementType.asInstanceOf[StructType].fieldNames
-      assert(fieldNames2.toSeq === Seq("val1", "val2"))
-
-      // Fields are unresolved NamedExpression
-      val zippedDF3 = df.select(
-        Column(ArraysZip(Seq(($"val1" as "val3").expr, ($"val2" as "val4").expr))) as "zipped")
-      val maybeAlias3 = zippedDF3.queryExecution.logical.expressions.head
-      assert(maybeAlias3.isInstanceOf[Alias])
-      val maybeArraysZip3 = maybeAlias3.children.head
-      assert(maybeArraysZip3.isInstanceOf[ArraysZip])
-      assert(maybeArraysZip3.children.forall(e => e.isInstanceOf[Alias] && !e.resolved))
-      val file3 = new File(dir, "arrays_zip3")
-      zippedDF3.write.parquet(file3.getAbsolutePath)
-      val restoredDF3 = spark.read.parquet(file3.getAbsolutePath)
-      val fieldNames3 = restoredDF3.schema.head.dataType.asInstanceOf[ArrayType]
-        .elementType.asInstanceOf[StructType].fieldNames
-      assert(fieldNames3.toSeq === Seq("val3", "val4"))
-
-      // Fields are neither UnresolvedAttribute nor NamedExpression
-      val zippedDF4 = df.select(
-        Column(ArraysZip(Seq(array_sort($"val1").expr, array_sort($"val2").expr))) as "zipped")
-      val maybeAlias4 = zippedDF4.queryExecution.logical.expressions.head
-      assert(maybeAlias4.isInstanceOf[Alias])
-      val maybeArraysZip4 = maybeAlias4.children.head
-      assert(maybeArraysZip4.isInstanceOf[ArraysZip])
-      assert(maybeArraysZip4.children.forall {
-        case _: UnresolvedAttribute | _: NamedExpression => false
-        case _ => true
-      })
-      val file4 = new File(dir, "arrays_zip4")
-      zippedDF4.write.parquet(file4.getAbsolutePath)
-      val restoredDF4 = spark.read.parquet(file4.getAbsolutePath)
-      val fieldNames4 = restoredDF4.schema.head.dataType.asInstanceOf[ArrayType]
-        .elementType.asInstanceOf[StructType].fieldNames
-      assert(fieldNames4.toSeq === Seq("0", "1"))
-    }
+    val df = Seq((Seq(9001, 9002, 9003), Seq(4, 5, 6))).toDF("val1", "val2")
+    val qualifiedDF = df.as("foo")
+
+    // Fields are UnresolvedAttribute
+    val zippedDF1 = qualifiedDF.select(arrays_zip($"foo.val1", $"foo.val2") as "zipped")
+    val zippedDF1expectedSchema = new StructType()
+      .add("zipped", ArrayType(new StructType()
+        .add("val1", IntegerType)
+        .add("val2", IntegerType)))
+    val zippedDF1Schema = zippedDF1.queryExecution.executedPlan.schema.toNullable
+    assert(zippedDF1Schema == zippedDF1expectedSchema)
+
+    // Fields are resolved NamedExpression
+    val zippedDF2 = df.select(arrays_zip(df("val1"), df("val2")) as "zipped")
+    val zippedDF2Schema = zippedDF2.queryExecution.executedPlan.schema.toNullable
+    assert(zippedDF1Schema == zippedDF1expectedSchema)
+
+    // Fields are unresolved NamedExpression
+    val zippedDF3 = df.select(arrays_zip($"val1" as "val3", $"val2" as "val4") as "zipped")
+    val zippedDF3expectedSchema = new StructType()
+      .add("zipped", ArrayType(new StructType()
+        .add("val3", IntegerType)
+        .add("val4", IntegerType)))
+    val zippedDF3Schema = zippedDF3.queryExecution.executedPlan.schema.toNullable
+    assert(zippedDF3Schema == zippedDF3expectedSchema)
+
+    // Fields are neither UnresolvedAttribute nor NamedExpression
+    val zippedDF4 = df.select(arrays_zip(array_sort($"val1"), array_sort($"val2")) as "zipped")
+    val zippedDF4expectedSchema = new StructType()
+      .add("zipped", ArrayType(new StructType()
+        .add("0", IntegerType)
+        .add("1", IntegerType)))
+    val zippedDF4Schema = zippedDF4.queryExecution.executedPlan.schema.toNullable
+    assert(zippedDF4Schema == zippedDF4expectedSchema)
   }
 
   test("SPARK-40292: arrays_zip should retain field names in nested structs") {
@@ -1217,7 +1467,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_contains_key(a, null)").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"map_contains_key(a, NULL)\"",
         "functionName" -> "`map_contains_key`"),
@@ -1294,7 +1544,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("map_concat(map1, map2)").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_concat(map1, map2)\"",
@@ -1310,7 +1560,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.select(map_concat($"map1", $"map2")).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_concat(map1, map2)\"",
@@ -1326,7 +1576,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("map_concat(map1, 12)").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_concat(map1, 12)\"",
@@ -1342,7 +1592,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.select(map_concat($"map1", lit(12))).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.MAP_CONCAT_DIFF_TYPES",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_concat(map1, 12)\"",
@@ -1413,7 +1663,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         wrongTypeDF.select(map_from_entries($"a"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"map_from_entries(a)\"",
         "paramIndex" -> "first",
@@ -1457,7 +1707,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(array_contains(df("a"), null))
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_contains(a, NULL)\"",
         "functionName" -> "`array_contains`"
@@ -1471,7 +1721,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("array_contains(a, null)")
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_contains(a, NULL)\"",
         "functionName" -> "`array_contains`"
@@ -1482,7 +1732,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("array_contains(null, 1)")
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_contains(NULL, 1)\"",
         "functionName" -> "`array_contains`"
@@ -1538,7 +1788,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("array_contains(array(1), 'foo')")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_contains(array(1), foo)\"",
         "functionName" -> "`array_contains`",
@@ -1554,7 +1804,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("array_contains('a string', 'foo')")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_contains(a string, foo)\"",
         "paramIndex" -> "first",
@@ -1603,7 +1853,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select arrays_overlap(array(1, 2, 3), array('a', 'b', 'c'))")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"arrays_overlap(array(1, 2, 3), array(a, b, c))\"",
         "functionName" -> "`arrays_overlap`",
@@ -1619,7 +1869,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select arrays_overlap(null, null)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"arrays_overlap(NULL, NULL)\"",
         "functionName" -> "`arrays_overlap`",
@@ -1634,7 +1884,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select arrays_overlap(map(1, 2), map(3, 4))")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"arrays_overlap(map(1, 2), map(3, 4))\"",
         "functionName" -> "`arrays_overlap`",
@@ -1709,7 +1959,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         idf.selectExpr("array_join(x, 1)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_join(x, 1)\"",
         "paramIndex" -> "second",
@@ -1723,7 +1973,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         idf.selectExpr("array_join(x, ', ', 1)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_join(x, , , 1)\"",
         "paramIndex" -> "third",
@@ -1839,7 +2089,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq((true, false)).toDF().selectExpr("sequence(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      condition = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"sequence(_1, _2)\"",
         "functionName" -> "`sequence`",
@@ -1853,7 +2103,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq((true, false, 42)).toDF().selectExpr("sequence(_1, _2, _3)")
       },
-      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      condition = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"sequence(_1, _2, _3)\"",
         "functionName" -> "`sequence`",
@@ -1867,7 +2117,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq((1, 2, 0.5)).toDF().selectExpr("sequence(_1, _2, _3)")
       },
-      errorClass = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
+      condition = "DATATYPE_MISMATCH.SEQUENCE_WRONG_INPUT_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"sequence(_1, _2, _3)\"",
         "functionName" -> "`sequence`",
@@ -1983,7 +2233,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select reverse(struct(1, 'a'))")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"reverse(struct(1, a))\"",
         "paramIndex" -> "first",
@@ -1998,7 +2248,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select reverse(map(1, 'a'))")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"reverse(map(1, a))\"",
         "paramIndex" -> "first",
@@ -2084,7 +2334,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq((null, "a")).toDF().selectExpr("array_position(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_position(_1, _2)\"",
         "functionName" -> "`array_position`"
@@ -2096,7 +2346,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq(("a string element", null)).toDF().selectExpr("array_position(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.NULL_TYPE",
+      condition = "DATATYPE_MISMATCH.NULL_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_position(_1, _2)\"",
         "functionName" -> "`array_position`"
@@ -2108,7 +2358,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq(("a string element", "a")).toDF().selectExpr("array_position(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_position(_1, _2)\"",
         "paramIndex" -> "first",
@@ -2123,7 +2373,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("array_position(array(1), '1')")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_position(array(1), 1)\"",
         "functionName" -> "`array_position`",
@@ -2196,7 +2446,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq(("a string element", 1)).toDF().selectExpr("element_at(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"element_at(_1, _2)\"",
         "paramIndex" -> "first",
@@ -2226,7 +2476,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("element_at(array('a', 'b'), 1L)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"element_at(array(a, b), 1)\"",
         "paramIndex" -> "second",
@@ -2273,7 +2523,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("element_at(map(1, 'a', 2, 'b'), '1')")
       },
-      errorClass = "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.MAP_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"element_at(map(1, a, 2, b), 1)\"",
         "functionName" -> "`element_at`",
@@ -2355,7 +2605,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.select(array_union($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2371,7 +2621,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.selectExpr("array_union(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2390,7 +2640,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.select(array_union($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2404,7 +2654,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.selectExpr("array_union(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2423,7 +2673,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.select(array_union($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2437,7 +2687,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.selectExpr("array_union(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_union(a, b)\"",
         "functionName" -> "`array_union`",
@@ -2524,7 +2774,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("concat(i1, i2, null)")
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"concat(i1, i2, NULL)\"",
         "functionName" -> "`concat`",
@@ -2537,7 +2787,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("concat(i1, array(i1, i2))")
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"concat(i1, array(i1, i2))\"",
         "functionName" -> "`concat`",
@@ -2550,7 +2800,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("concat(map(1, 2), map(3, 4))")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"concat(map(1, 2), map(3, 4))\"",
         "paramIndex" -> "first",
@@ -2661,7 +2911,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         oneRowDF.select(flatten($"arr"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"flatten(arr)\"",
         "paramIndex" -> "first",
@@ -2676,7 +2926,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         oneRowDF.select(flatten($"i"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"flatten(i)\"",
         "paramIndex" -> "first",
@@ -2691,7 +2941,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         oneRowDF.select(flatten($"s"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"flatten(s)\"",
         "paramIndex" -> "first",
@@ -2706,7 +2956,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         oneRowDF.selectExpr("flatten(null)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"flatten(NULL)\"",
         "paramIndex" -> "first",
@@ -2802,7 +3052,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         invalidTypeDF.select(array_repeat($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_repeat(a, b)\"",
         "paramIndex" -> "second",
@@ -2817,7 +3067,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         invalidTypeDF.select(array_repeat($"a", lit("1")))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_repeat(a, 1)\"",
         "paramIndex" -> "second",
@@ -2832,7 +3082,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         invalidTypeDF.selectExpr("array_repeat(a, 1.0)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_repeat(a, 1.0)\"",
         "paramIndex" -> "second",
@@ -2883,7 +3133,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq(("a string element", "a")).toDF().selectExpr("array_prepend(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "paramIndex" -> "first",
         "sqlExpr" -> "\"array_prepend(_1, _2)\"",
@@ -2895,7 +3145,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("array_prepend(array(1, 2), '1')")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_prepend(array(1, 2), 1)\"",
         "functionName" -> "`array_prepend`",
@@ -2999,7 +3249,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq(("a string element", "a")).toDF().selectExpr("array_remove(_1, _2)")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_remove(_1, _2)\"",
         "functionName" -> "`array_remove`",
@@ -3014,7 +3264,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         OneRowRelation().selectExpr("array_remove(array(1, 2), '1')")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_remove(array(1, 2), 1)\"",
         "functionName" -> "`array_remove`",
@@ -3147,7 +3397,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.select(array_except($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3162,7 +3412,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.selectExpr("array_except(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3177,7 +3427,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.select(array_except($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3192,7 +3442,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.selectExpr("array_except(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3207,7 +3457,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.select(array_except($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3222,7 +3472,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.selectExpr("array_except(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3237,7 +3487,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df9.select(array_except($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3252,7 +3502,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df9.selectExpr("array_except(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_except(a, b)\"",
         "functionName" -> "`array_except`",
@@ -3308,7 +3558,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.select(array_intersect($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3323,7 +3573,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.selectExpr("array_intersect(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3339,7 +3589,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.select(array_intersect($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3354,7 +3604,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df7.selectExpr("array_intersect(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3370,7 +3620,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.select(array_intersect($"a", $"b"))
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3387,7 +3637,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df8.selectExpr("array_intersect(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.BINARY_ARRAY_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array_intersect(a, b)\"",
         "functionName" -> "`array_intersect`",
@@ -3421,7 +3671,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         df5.selectExpr("array_insert(a, b, c)").show()
       },
-      errorClass = "INVALID_INDEX_OF_ZERO",
+      condition = "INVALID_INDEX_OF_ZERO",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "array_insert(a, b, c)",
@@ -3663,7 +3913,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("transform(s, (x, y, z) -> x + y + z)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "1"),
       context = ExpectedContext(
         fragment = "(x, y, z) -> x + y + z",
@@ -3673,7 +3923,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkError(
       exception = intercept[AnalysisException](df.selectExpr("transform(i, x -> x)")),
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"transform(i, lambdafunction(x, x))\"",
@@ -3689,7 +3939,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkError(
       exception =
         intercept[AnalysisException](df.selectExpr("transform(a, x -> x)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
@@ -3747,7 +3997,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_filter(s, (x, y, z) -> x + y + z)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "2"),
       context = ExpectedContext(
         fragment = "(x, y, z) -> x + y + z",
@@ -3759,7 +4009,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_filter(s, x -> x)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "1", "actualNumArgs" -> "2"),
       context = ExpectedContext(
         fragment = "x -> x",
@@ -3771,7 +4021,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_filter(i, (k, v) -> k > v)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_filter(i, lambdafunction((k > v), k, v))\"",
@@ -3788,7 +4038,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(map_filter(col("i"), (k, v) => k > v))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"map_filter\(i, lambdafunction\(`>`\(x_\d+, y_\d+\), x_\d+, y_\d+\)\)"""",
@@ -3802,7 +4052,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkError(
       exception =
         intercept[AnalysisException](df.selectExpr("map_filter(a, (k, v) -> k > v)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
@@ -3944,7 +4194,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("filter(s, (x, y, z) -> x + y)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "1"),
       context = ExpectedContext(
         fragment = "(x, y, z) -> x + y",
@@ -3956,7 +4206,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("filter(i, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"filter(i, lambdafunction(x, x))\"",
@@ -3973,7 +4223,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(filter(col("i"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"filter\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
@@ -3988,7 +4238,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("filter(s, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"filter(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4004,7 +4254,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(filter(col("s"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"filter(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4018,7 +4268,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkError(
       exception =
         intercept[AnalysisException](df.selectExpr("filter(a, x -> x)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
@@ -4132,7 +4382,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("exists(s, (x, y) -> x + y)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "2", "actualNumArgs" -> "1"),
       context = ExpectedContext(
         fragment = "(x, y) -> x + y",
@@ -4144,7 +4394,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("exists(i, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"exists(i, lambdafunction(x, x))\"",
@@ -4161,7 +4411,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(exists(col("i"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"exists\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
@@ -4176,7 +4426,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("exists(s, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"exists(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4193,7 +4443,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(exists(df("s"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"exists(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4205,7 +4455,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkError(
       exception = intercept[AnalysisException](df.selectExpr("exists(a, x -> x)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
@@ -4333,7 +4583,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("forall(s, (x, y) -> x + y)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "2", "actualNumArgs" -> "1"),
       context = ExpectedContext(
         fragment = "(x, y) -> x + y",
@@ -4345,7 +4595,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("forall(i, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"forall(i, lambdafunction(x, x))\"",
@@ -4362,7 +4612,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(forall(col("i"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"forall\(i, lambdafunction\(x_\d+, x_\d+\)\)"""",
@@ -4377,7 +4627,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("forall(s, x -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"forall(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4393,7 +4643,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(forall(col("s"), x => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"forall(s, lambdafunction(namedlambdavariable(), namedlambdavariable()))\"",
         "paramIndex" -> "second",
@@ -4405,7 +4655,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkError(
       exception = intercept[AnalysisException](df.selectExpr("forall(a, x -> x)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
@@ -4415,7 +4665,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
 
     checkError(
       exception = intercept[AnalysisException](df.select(forall(col("a"), x => x))),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
       queryContext = Array(
         ExpectedContext(fragment = "col", callSitePattern = getCurrentClassCallSitePattern)))
@@ -4604,7 +4854,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             df.selectExpr(s"$agg(s, '', x -> x)")
           },
-          errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+          condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
           parameters = Map("expectedNumArgs" -> "1", "actualNumArgs" -> "2"),
           context = ExpectedContext(
             fragment = "x -> x",
@@ -4616,7 +4866,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             df.selectExpr(s"$agg(s, '', (acc, x) -> x, (acc, x) -> x)")
           },
-          errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+          condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
           parameters = Map("expectedNumArgs" -> "2", "actualNumArgs" -> "1"),
           context = ExpectedContext(
             fragment = "(acc, x) -> x",
@@ -4630,7 +4880,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df.selectExpr(s"$agg(i, 0, (acc, x) -> x)")
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         sqlState = None,
         parameters = Map(
           "sqlExpr" -> s""""$agg(i, 0, lambdafunction(x, acc, x), lambdafunction(id, id))"""",
@@ -4649,7 +4899,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(aggregate(col("i"), lit(0), (_, x) => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"aggregate\(i, 0, lambdafunction\(y_\d+, x_\d+, y_\d+\), lambdafunction\(x_\d+, x_\d+\)\)"""",
@@ -4667,7 +4917,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df.selectExpr(s"$agg(s, 0, (acc, x) -> x)")
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> s""""$agg(s, 0, lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))"""",
           "paramIndex" -> "third",
@@ -4687,7 +4937,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(aggregate(col("s"), lit(0), (acc, x) => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> """"aggregate(s, 0, lambdafunction(namedlambdavariable(), namedlambdavariable(), namedlambdavariable()), lambdafunction(namedlambdavariable(), namedlambdavariable()))"""",
         "paramIndex" -> "third",
@@ -4703,7 +4953,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       checkError(
         exception =
           intercept[AnalysisException](df.selectExpr(s"$agg(a, 0, (acc, x) -> x)")),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map("objectName" -> "`a`", "proposal" -> "`i`, `s`"),
         context = ExpectedContext(
@@ -4768,7 +5018,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_zip_with(mii, mis, (x, y) -> x + y)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "2", "actualNumArgs" -> "3"),
       context = ExpectedContext(
         fragment = "(x, y) -> x + y",
@@ -4780,7 +5030,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_zip_with(mis, mmi, (x, y, z) -> concat(x, y, z))")
       },
-      errorClass = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"map_zip_with(mis, mmi, lambdafunction(concat(x, y, z), x, y, z))\"",
         "functionName" -> "`map_zip_with`",
@@ -4796,7 +5046,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(map_zip_with(df("mis"), col("mmi"), (x, y, z) => concat(x, y, z)))
       },
-      errorClass = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.MAP_ZIP_WITH_DIFF_TYPES",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"map_zip_with\(mis, mmi, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
@@ -4811,7 +5061,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_zip_with(i, mis, (x, y, z) -> concat(x, y, z))")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_zip_with(i, mis, lambdafunction(concat(x, y, z), x, y, z))\"",
@@ -4828,7 +5078,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(map_zip_with(col("i"), col("mis"), (x, y, z) => concat(x, y, z)))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"map_zip_with\(i, mis, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
@@ -4843,7 +5093,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_zip_with(mis, i, (x, y, z) -> concat(x, y, z))")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_zip_with(mis, i, lambdafunction(concat(x, y, z), x, y, z))\"",
@@ -4860,7 +5110,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(map_zip_with(col("mis"), col("i"), (x, y, z) => concat(x, y, z)))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" -> """"map_zip_with\(mis, i, lambdafunction\(concat\(x_\d+, y_\d+, z_\d+\), x_\d+, y_\d+, z_\d+\)\)"""",
@@ -4875,7 +5125,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("map_zip_with(mmi, mmi, (x, y, z) -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"map_zip_with(mmi, mmi, lambdafunction(x, x, y, z))\"",
@@ -4995,7 +5245,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfExample1.selectExpr("transform_keys(i, k -> k)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "1", "actualNumArgs" -> "2"),
       context = ExpectedContext(
         fragment = "k -> k",
@@ -5007,7 +5257,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfExample1.selectExpr("transform_keys(i, (k, v, x) -> k + 1)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "2"),
       context = ExpectedContext(
         fragment = "(k, v, x) -> k + 1",
@@ -5019,7 +5269,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         dfExample1.selectExpr("transform_keys(i, (k, v) -> v)").show()
       },
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
 
@@ -5027,7 +5277,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         dfExample1.select(transform_keys(col("i"), (k, v) => v)).show()
       },
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
 
@@ -5035,7 +5285,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfExample2.selectExpr("transform_keys(j, (k, v) -> k + 1)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"transform_keys(j, lambdafunction((k + 1), k, v))\"",
@@ -5271,7 +5521,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           dfExample1.selectExpr("transform_values(i, k -> k)")
         },
-        errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+        condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
         parameters = Map("expectedNumArgs" -> "1", "actualNumArgs" -> "2"),
         context = ExpectedContext(
           fragment = "k -> k",
@@ -5283,7 +5533,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           dfExample2.selectExpr("transform_values(j, (k, v, x) -> k + 1)")
         },
-        errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+        condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
         parameters = Map("expectedNumArgs" -> "3", "actualNumArgs" -> "2"),
         context = ExpectedContext(
           fragment = "(k, v, x) -> k + 1",
@@ -5295,7 +5545,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           dfExample3.selectExpr("transform_values(x, (k, v) -> k + 1)")
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         sqlState = None,
         parameters = Map(
           "sqlExpr" -> "\"transform_values(x, lambdafunction((k + 1), k, v))\"",
@@ -5312,7 +5562,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           dfExample3.select(transform_values(col("x"), (k, v) => k + 1))
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         matchPVals = true,
         parameters = Map(
           "sqlExpr" ->
@@ -5395,7 +5645,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("zip_with(a1, a2, x -> x)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.NUM_ARGS_MISMATCH",
       parameters = Map(
         "expectedNumArgs" -> "1",
         "actualNumArgs" -> "2"),
@@ -5409,7 +5659,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("zip_with(a1, a2, (x, x) -> x)")
       },
-      errorClass = "INVALID_LAMBDA_FUNCTION_CALL.DUPLICATE_ARG_NAMES",
+      condition = "INVALID_LAMBDA_FUNCTION_CALL.DUPLICATE_ARG_NAMES",
       parameters = Map(
         "args" -> "`x`, `x`",
         "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""),
@@ -5423,7 +5673,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("zip_with(a1, a2, (acc, x) -> x, (acc, x) -> x)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId("zip_with"),
         "expectedNum" -> "3",
@@ -5439,7 +5689,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("zip_with(i, a2, (acc, x) -> x)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"zip_with(i, a2, lambdafunction(x, acc, x))\"",
@@ -5456,7 +5706,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(zip_with(df("i"), df("a2"), (_, x) => x))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       matchPVals = true,
       parameters = Map(
         "sqlExpr" ->
@@ -5471,7 +5721,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     checkError(
       exception =
         intercept[AnalysisException](df.selectExpr("zip_with(a1, a, (acc, x) -> x)")),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`", "proposal" -> "`a1`, `a2`, `i`"),
       context = ExpectedContext(
@@ -5485,7 +5735,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
     import DataFrameFunctionsSuite.CodegenFallbackExpr
     for ((codegenFallback, wholeStage) <- Seq((true, false), (false, false), (false, true))) {
       val c = if (codegenFallback) {
-        Column(CodegenFallbackExpr(v.expr))
+        column(CodegenFallbackExpr(v.expr))
       } else {
         v
       }
@@ -5524,7 +5774,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(coalesce())
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`coalesce`",
@@ -5537,7 +5787,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("coalesce()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`coalesce`",
@@ -5550,7 +5800,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(hash())
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`hash`",
@@ -5563,7 +5813,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("hash()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`hash`",
@@ -5576,7 +5826,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(xxhash64())
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`xxhash64`",
@@ -5589,7 +5839,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("xxhash64()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`xxhash64`",
@@ -5602,7 +5852,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(greatest())
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`greatest`",
@@ -5615,7 +5865,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("greatest()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`greatest`",
@@ -5628,7 +5878,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(least())
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`least`",
@@ -5641,7 +5891,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("least()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`least`",
@@ -5657,7 +5907,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         df.select(map_from_arrays(concat($"k1", $"k2"), $"v")).show()
       },
-      errorClass = "NULL_MAP_KEY",
+      condition = "NULL_MAP_KEY",
       parameters = Map.empty
     )
   }
@@ -5716,7 +5966,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(map($"m", lit(1)))
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_MAP_KEY_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_MAP_KEY_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"map(m, 1)\"",
         "keyType" -> "\"MAP<INT, STRING>\""
@@ -5757,7 +6007,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select from_json('{\"a\":1}', 1)")
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map(
         "inputSchema" -> "\"1\""
       ),
@@ -5846,7 +6096,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         invalidDatatypeDF.select(array_compact($"a"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_compact(a)\"",
         "paramIndex" -> "first",
@@ -5869,7 +6119,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.select(array_append(col("a"), col("b")))
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "functionName" -> "`array_append`",
         "dataType" -> "\"ARRAY\"",
@@ -5888,7 +6138,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("array_append(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.ARRAY_FUNCTION_DIFF_TYPES",
       parameters = Map(
         "functionName" -> "`array_append`",
         "leftType" -> "\"ARRAY<STRING>\"",
@@ -5920,7 +6170,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df6.selectExpr("array_append(a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"array_append(a, b)\"",
         "paramIndex" -> "first",
@@ -6025,7 +6275,7 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkException] {
         df1.map(r => df2.count() * r.getInt(0)).collect()
       },
-      errorClass = "CANNOT_INVOKE_IN_TRANSFORMATIONS",
+      condition = "CANNOT_INVOKE_IN_TRANSFORMATIONS",
       parameters = Map.empty
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index f6fd6b501d790..ed182322aec90 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -448,7 +448,7 @@ class DataFrameJoinSuite extends QueryTest
             }
             assert(broadcastExchanges.size == 1)
             val tables = broadcastExchanges.head.collect {
-              case FileSourceScanExec(_, _, _, _, _, _, _, Some(tableIdent), _) => tableIdent
+              case FileSourceScanExec(_, _, _, _, _, _, _, _, Some(tableIdent), _) => tableIdent
             }
             assert(tables.size == 1)
             assert(tables.head ===
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
index ab900e2135576..e2bdf1c732078 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameNaFunctionsSuite.scala
@@ -284,7 +284,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         joined_df.na.fill("", cols = Seq("f2"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`f2`",
         "referenceNames" -> "[`f2`, `f2`]"
@@ -304,7 +304,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.na.drop("any", Seq("*"))
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`*`", "proposal" -> "`name`, `age`, `height`")
     )
   }
@@ -411,7 +411,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.na.fill("hello", Seq("col2"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`col2`",
         "referenceNames" -> "[`col2`, `col2`]"
@@ -434,7 +434,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.na.drop("any", Seq("col2"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`col2`",
         "referenceNames" -> "[`col2`, `col2`]"
@@ -540,7 +540,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = exception,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`aa`", "proposal" -> "`Col`.`1`, `Col`.`2`")
     )
   }
@@ -551,7 +551,7 @@ class DataFrameNaFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkUnsupportedOperationException] {
         df.na.replace("c1.c1-1", Map("b1" ->"a1"))
       },
-      errorClass = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN",
+      condition = "UNSUPPORTED_FEATURE.REPLACE_NESTED_COLUMN",
       parameters = Map("colName" -> "`c1`.`c1-1`")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
index b3bf9405a99f2..cf4fbe61101b3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala
@@ -309,7 +309,7 @@ class DataFramePivotSuite extends QueryTest with SharedSparkSession {
           .pivot(min($"training"), Seq("Experts"))
           .agg(sum($"sales.earnings"))
       },
-      errorClass = "GROUP_BY_AGGREGATE",
+      condition = "GROUP_BY_AGGREGATE",
       parameters = Map("sqlExpr" -> "min(training)"),
       context = ExpectedContext(fragment = "min", callSitePattern = getCurrentClassCallSitePattern)
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
index 7dc40549a17bc..f0ed2241fd286 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSelfJoinSuite.scala
@@ -21,7 +21,8 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.expressions.{Alias, Ascending, AttributeReference, PythonUDF, SortOrder}
 import org.apache.spark.sql.catalyst.plans.logical.{Expand, Generate, ScriptInputOutputSchema, ScriptTransformation, Window => WindowPlan}
 import org.apache.spark.sql.expressions.Window
-import org.apache.spark.sql.functions.{count, explode, sum, year}
+import org.apache.spark.sql.functions.{col, count, explode, sum, year}
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.test.SQLTestData.TestData
@@ -403,7 +404,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
     assertAmbiguousSelfJoin(df12.join(df11, df11("x") === df12("y")))
 
     // Test for AttachDistributedSequence
-    val df13 = df1.withSequenceColumn("seq")
+    val df13 = df1.select(Column.internalFn("distributed_sequence_id").alias("seq"), col("*"))
     val df14 = df13.filter($"value" === "A2")
     assertAmbiguousSelfJoin(df13.join(df14, df13("key1") === df14("key2")))
     assertAmbiguousSelfJoin(df14.join(df13, df13("key1") === df14("key2")))
@@ -482,7 +483,7 @@ class DataFrameSelfJoinSuite extends QueryTest with SharedSparkSession {
         df3.join(df1, year($"df1.timeStr") === year($"df3.tsStr"))
       )
       checkError(ex,
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`df1`.`timeStr`",
           "proposal" -> "`df3`.`timeStr`, `df1`.`tsStr`"),
         context = ExpectedContext(fragment = "$", getCurrentClassCallSitePattern))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
index 1ac1dda374fa7..6c1ca94a03079 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSessionWindowingSuite.scala
@@ -547,4 +547,55 @@ class DataFrameSessionWindowingSuite extends QueryTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-49836 using window fn with window as parameter should preserve parent operator") {
+    withTempView("clicks") {
+      val df = Seq(
+        // small window: [00:00, 01:00), user1, 2
+        ("2024-09-30 00:00:00", "user1"), ("2024-09-30 00:00:30", "user1"),
+        // small window: [01:00, 02:00), user2, 2
+        ("2024-09-30 00:01:00", "user2"), ("2024-09-30 00:01:30", "user2"),
+        // small window: [03:00, 04:00), user1, 1
+        ("2024-09-30 00:03:30", "user1"),
+        // small window: [11:00, 12:00), user1, 3
+        ("2024-09-30 00:11:00", "user1"), ("2024-09-30 00:11:30", "user1"),
+        ("2024-09-30 00:11:45", "user1")
+      ).toDF("eventTime", "userId")
+
+      // session window: (01:00, 09:00), user1, 3 / (02:00, 07:00), user2, 2 /
+      //   (12:00, 12:05), user1, 3
+
+      df.createOrReplaceTempView("clicks")
+
+      val aggregatedData = spark.sql(
+        """
+          |SELECT
+          |  userId,
+          |  avg(cpu_large.numClicks) AS clicksPerSession
+          |FROM
+          |(
+          |  SELECT
+          |    session_window(small_window, '5 minutes') AS session,
+          |    userId,
+          |    sum(numClicks) AS numClicks
+          |  FROM
+          |  (
+          |    SELECT
+          |      window(eventTime, '1 minute') AS small_window,
+          |      userId,
+          |      count(*) AS numClicks
+          |    FROM clicks
+          |    GROUP BY window, userId
+          |  ) cpu_small
+          |  GROUP BY session_window, userId
+          |) cpu_large
+          |GROUP BY userId
+          |""".stripMargin)
+
+      checkAnswer(
+        aggregatedData,
+        Seq(Row("user1", 3), Row("user2", 2))
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index cbc39557ce4cc..5ff737d2b57cb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -354,21 +354,21 @@ class DataFrameSetOperationsSuite extends QueryTest
     val df = spark.range(1).select(map(lit("key"), $"id").as("m"))
     checkError(
       exception = intercept[AnalysisException](df.intersect(df)),
-      errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
       parameters = Map(
         "colName" -> "`m`",
         "dataType" -> "\"MAP<STRING, BIGINT>\"")
     )
     checkError(
       exception = intercept[AnalysisException](df.except(df)),
-      errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
       parameters = Map(
         "colName" -> "`m`",
         "dataType" -> "\"MAP<STRING, BIGINT>\"")
     )
     checkError(
       exception = intercept[AnalysisException](df.distinct()),
-      errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+      condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
       parameters = Map(
         "colName" -> "`m`",
         "dataType" -> "\"MAP<STRING, BIGINT>\""))
@@ -376,7 +376,7 @@ class DataFrameSetOperationsSuite extends QueryTest
       df.createOrReplaceTempView("v")
       checkError(
         exception = intercept[AnalysisException](sql("SELECT DISTINCT m FROM v")),
-        errorClass = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
+        condition = "UNSUPPORTED_FEATURE.SET_OPERATION_ON_MAP_TYPE",
         parameters = Map(
           "colName" -> "`m`",
           "dataType" -> "\"MAP<STRING, BIGINT>\""),
@@ -546,7 +546,7 @@ class DataFrameSetOperationsSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df1.unionByName(df2)
       },
-      errorClass = "NUM_COLUMNS_MISMATCH",
+      condition = "NUM_COLUMNS_MISMATCH",
       parameters = Map(
         "operator" -> "UNION",
         "firstNumColumns" -> "2",
@@ -610,7 +610,7 @@ class DataFrameSetOperationsSuite extends QueryTest
           exception = intercept[AnalysisException] {
             df1.unionByName(df2)
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         df1 = Seq((1, 1)).toDF("c0", "c1")
         df2 = Seq((1, 1)).toDF(c0, c1)
@@ -618,7 +618,7 @@ class DataFrameSetOperationsSuite extends QueryTest
           exception = intercept[AnalysisException] {
             df1.unionByName(df2)
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -1022,7 +1022,7 @@ class DataFrameSetOperationsSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df1.unionByName(df2)
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`c`", "fields" -> "`a`, `b`"))
 
     // If right side of the nested struct has extra col.
@@ -1032,7 +1032,7 @@ class DataFrameSetOperationsSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df1.unionByName(df2)
       },
-      errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+      condition = "INCOMPATIBLE_COLUMN_TYPE",
       parameters = Map(
         "tableOrdinalNumber" -> "second",
         "columnOrdinalNumber" -> "third",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
index d728cc5810a21..86d3ca45fd08e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameShowSuite.scala
@@ -112,13 +112,12 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
                            ||key|value|
                            |+---+-----+
                            |+---+-----+
-                           |only showing top 0 rows
-                           |""".stripMargin
+                           |only showing top 0 rows""".stripMargin
     assert(testData.select($"*").showString(-1) === expectedAnswer)
   }
 
   test("showString(negative), vertical = true") {
-    val expectedAnswer = "(0 rows)\n"
+    val expectedAnswer = "(0 rows)"
     assert(testData.select($"*").showString(-1, vertical = true) === expectedAnswer)
   }
 
@@ -127,8 +126,7 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
                            ||key|value|
                            |+---+-----+
                            |+---+-----+
-                           |only showing top 0 rows
-                           |""".stripMargin
+                           |only showing top 0 rows""".stripMargin
     assert(testData.select($"*").showString(0) === expectedAnswer)
   }
 
@@ -145,7 +143,7 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
   }
 
   test("showString(0), vertical = true") {
-    val expectedAnswer = "(0 rows)\n"
+    val expectedAnswer = "(0 rows)"
     assert(testData.select($"*").showString(0, vertical = true) === expectedAnswer)
   }
 
@@ -286,8 +284,7 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
                            |+---+-----+
                            ||  1|    1|
                            |+---+-----+
-                           |only showing top 1 row
-                           |""".stripMargin
+                           |only showing top 1 row""".stripMargin
     assert(testData.select($"*").showString(1) === expectedAnswer)
   }
 
@@ -295,7 +292,7 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
     val expectedAnswer = "-RECORD 0----\n" +
                          " key   | 1   \n" +
                          " value | 1   \n" +
-                         "only showing top 1 row\n"
+                         "only showing top 1 row"
     assert(testData.select($"*").showString(1, vertical = true) === expectedAnswer)
   }
 
@@ -337,7 +334,7 @@ class DataFrameShowSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-7327 show with empty dataFrame, vertical = true") {
-    assert(testData.select($"*").filter($"key" < 0).showString(1, vertical = true) === "(0 rows)\n")
+    assert(testData.select($"*").filter($"key" < 0).showString(1, vertical = true) === "(0 rows)")
   }
 
   test("SPARK-18350 show with session local timezone") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 8eee8fc37661c..6581cf07d8e9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -143,7 +143,7 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfx.stat.freqItems(Array("num"))
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`num`",
         "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
@@ -155,7 +155,7 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfx.stat.approxQuantile("num", Array(0.1), 0.0)
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`num`",
         "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
@@ -167,7 +167,7 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfx.stat.cov("num", "num")
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`num`",
         "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
@@ -177,7 +177,7 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         dfx.stat.corr("num", "num")
       },
-      errorClass = "AMBIGUOUS_REFERENCE",
+      condition = "AMBIGUOUS_REFERENCE",
       parameters = Map(
         "name" -> "`num`",
         "referenceNames" -> "[`table1`.`num`, `table2`.`num`]"
@@ -588,16 +588,16 @@ class DataFrameStatSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkIllegalArgumentException] {
         person2.summary("foo")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2114",
-      parameters = Map("stats" -> "foo")
+      condition = "UNRECOGNIZED_STATISTIC",
+      parameters = Map("stats" -> "'foo'")
     )
 
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         person2.summary("foo%")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2113",
-      parameters = Map("stats" -> "foo%")
+      condition = "UNRECOGNIZED_STATISTIC",
+      parameters = Map("stats" -> "'foo%'")
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
new file mode 100644
index 0000000000000..2420ad34d9bab
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSubquerySuite.scala
@@ -0,0 +1,380 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DataFrameSubquerySuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  setupTestData()
+
+  val row = identity[(java.lang.Integer, java.lang.Double)](_)
+
+  lazy val l = Seq(
+    row((1, 2.0)),
+    row((1, 2.0)),
+    row((2, 1.0)),
+    row((2, 1.0)),
+    row((3, 3.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("a", "b")
+
+  lazy val r = Seq(
+    row((2, 3.0)),
+    row((2, 3.0)),
+    row((3, 2.0)),
+    row((4, 1.0)),
+    row((null, null)),
+    row((null, 5.0)),
+    row((6, null))).toDF("c", "d")
+
+  protected override def beforeAll(): Unit = {
+    super.beforeAll()
+    l.createOrReplaceTempView("l")
+    r.createOrReplaceTempView("r")
+  }
+
+  test("noop outer()") {
+    checkAnswer(spark.range(1).select($"id".outer()), Row(0))
+    checkError(
+      intercept[AnalysisException](spark.range(1).select($"outer_col".outer()).collect()),
+      "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`outer_col`", "proposal" -> "`id`"),
+      context = ExpectedContext(
+        fragment = "$",
+        callSitePattern = getCurrentClassCallSitePattern)
+    )
+  }
+
+  test("simple uncorrelated scalar subquery") {
+    checkAnswer(
+      spark.range(1).select(
+        spark.range(1).select(lit(1)).scalar().as("b")
+      ),
+      sql("select (select 1 as b) as b")
+    )
+
+    checkAnswer(
+      spark.range(1).select(
+        spark.range(1).select(spark.range(1).select(lit(1)).scalar() + 1).scalar() + lit(1)
+      ),
+      sql("select (select (select 1) + 1) + 1")
+    )
+
+    // string type
+    checkAnswer(
+      spark.range(1).select(
+        spark.range(1).select(lit("s")).scalar().as("b")
+      ),
+      sql("select (select 's' as s) as b")
+    )
+  }
+
+  test("uncorrelated scalar subquery should return null if there is 0 rows") {
+    checkAnswer(
+      spark.range(1).select(
+        spark.range(1).select(lit("s")).limit(0).scalar().as("b")
+      ),
+      sql("select (select 's' as s limit 0) as b")
+    )
+  }
+
+  test("uncorrelated scalar subquery on a DataFrame generated query") {
+    withTempView("subqueryData") {
+      val df = Seq((1, "one"), (2, "two"), (3, "three")).toDF("key", "value")
+      df.createOrReplaceTempView("subqueryData")
+
+      checkAnswer(
+        spark.range(1).select(
+          spark.table("subqueryData")
+            .select($"key").where($"key" > 2).orderBy($"key").limit(1).scalar() + lit(1)
+        ),
+        sql("select (select key from subqueryData where key > 2 order by key limit 1) + 1")
+      )
+
+      checkAnswer(
+        spark.range(1).select(
+          -spark.table("subqueryData").select(max($"key")).scalar()
+        ),
+        sql("select -(select max(key) from subqueryData)")
+      )
+
+      checkAnswer(
+        spark.range(1).select(
+          spark.table("subqueryData").select($"value").limit(0).scalar()
+        ),
+        sql("select (select value from subqueryData limit 0)")
+      )
+
+      checkAnswer(
+        spark.range(1).select(
+          spark.table("subqueryData")
+            .where(
+              $"key" === spark.table("subqueryData").select(max($"key")).scalar() - lit(1)
+            ).select(
+              min($"value")
+            ).scalar()
+        ),
+        sql("select (select min(value) from subqueryData" +
+          " where key = (select max(key) from subqueryData) - 1)")
+      )
+    }
+  }
+
+  test("correlated scalar subquery in SELECT with outer() function") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"t1.a" === $"t2.a".outer(),
+      ($"t1.a" === $"t2.a").outer(),
+      expr("t1.a = t2.a").outer()).foreach { cond =>
+      checkAnswer(
+        df1.select(
+          $"a",
+          df2.where(cond).select(sum($"b")).scalar().as("sum_b")
+        ),
+        sql("select a, (select sum(b) from l t1 where t1.a = t2.a) sum_b from l t2")
+      )
+    }
+  }
+
+  test("correlated scalar subquery in WHERE with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"a".outer() === $"c",
+      ($"a" === $"c").outer(),
+      expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where(
+          $"b" < spark.table("r").where(cond).select(max($"d")).scalar()
+        ),
+        sql("select * from l where b < (select max(d) from r where a = c)")
+      )
+    }
+  }
+
+  test("EXISTS predicate subquery with outer() function") {
+    // We can use the `.outer()` function to wrap either the outer column, or the entire condition,
+    // or the SQL string of the condition.
+    Seq(
+      $"a".outer() === $"c",
+      ($"a" === $"c").outer(),
+      expr("a = c").outer()).foreach { cond =>
+      checkAnswer(
+        spark.table("l").where(
+          spark.table("r").where(cond).exists()
+        ),
+        sql("select * from l where exists (select * from r where l.a = r.c)")
+      )
+
+      checkAnswer(
+        spark.table("l").where(
+          spark.table("r").where(cond).exists() && $"a" <= lit(2)
+        ),
+        sql("select * from l where exists (select * from r where l.a = r.c) and l.a <= 2")
+      )
+    }
+  }
+
+  test("SPARK-15677: Queries against local relations with scalar subquery in Select list") {
+    withTempView("t1", "t2") {
+      Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t1")
+      Seq((1, 1), (2, 2)).toDF("c1", "c2").createOrReplaceTempView("t2")
+
+      checkAnswer(
+        spark.table("t1").select(
+          spark.range(1).select(lit(1).as("col")).scalar()
+        ),
+        sql("SELECT (select 1 as col) from t1")
+      )
+
+      checkAnswer(
+        spark.table("t1").select(
+          spark.table("t2").select(max($"c1")).scalar()
+        ),
+        sql("SELECT (select max(c1) from t2) from t1")
+      )
+
+      checkAnswer(
+        spark.table("t1").select(
+          lit(1) + spark.range(1).select(lit(1).as("col")).scalar()
+        ),
+        sql("SELECT 1 + (select 1 as col) from t1")
+      )
+
+      checkAnswer(
+        spark.table("t1").select(
+          $"c1",
+          spark.table("t2").select(max($"c1")).scalar() + $"c2"
+        ),
+        sql("SELECT c1, (select max(c1) from t2) + c2 from t1")
+      )
+
+      checkAnswer(
+        spark.table("t1").select(
+          $"c1",
+          spark.table("t2").where($"t1.c2".outer() === $"t2.c2").select(max($"c1")).scalar()
+        ),
+        sql("SELECT c1, (select max(c1) from t2 where t1.c2 = t2.c2) from t1")
+      )
+    }
+  }
+
+  test("NOT EXISTS predicate subquery") {
+    checkAnswer(
+      spark.table("l").where(
+        !spark.table("r").where($"a".outer() === $"c").exists()
+      ),
+      sql("select * from l where not exists (select * from r where l.a = r.c)")
+    )
+
+    checkAnswer(
+      spark.table("l").where(
+        !spark.table("r").where($"a".outer() === $"c" && $"b".outer() < $"d").exists()
+      ),
+      sql("select * from l where not exists (select * from r where l.a = r.c and l.b < r.d)")
+    )
+  }
+
+  test("EXISTS predicate subquery within OR") {
+    checkAnswer(
+      spark.table("l").where(
+        spark.table("r").where($"a".outer() === $"c").exists() ||
+        spark.table("r").where($"a".outer() === $"c").exists()
+      ),
+      sql("select * from l where exists (select * from r where l.a = r.c)" +
+        " or exists (select * from r where l.a = r.c)")
+    )
+
+    checkAnswer(
+      spark.table("l").where(
+        !spark.table("r").where($"a".outer() === $"c" && $"b".outer() < $"d").exists() ||
+        !spark.table("r").where($"a".outer() === $"c").exists()
+      ),
+      sql("select * from l where not exists (select * from r where l.a = r.c and l.b < r.d)" +
+        " or not exists (select * from r where l.a = r.c)")
+    )
+  }
+
+  test("correlated scalar subquery in select (null safe equal)") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    checkAnswer(
+      df1.select(
+        $"a",
+        df2.where($"t2.a" <=> $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")
+      ),
+      sql("select a, (select sum(b) from l t2 where t2.a <=> t1.a) sum_b from l t1")
+    )
+  }
+
+  test("correlated scalar subquery in aggregate") {
+    checkAnswer(
+      spark.table("l").groupBy(
+        $"a",
+        spark.table("r").where($"a".outer() === $"c").select(sum($"d")).scalar().as("sum_d")
+      ).agg(Map.empty[String, String]),
+      sql("select a, (select sum(d) from r where a = c) sum_d from l l1 group by 1, 2")
+    )
+  }
+
+  test("SPARK-34269: correlated subquery with view in aggregate's grouping expression") {
+    withTable("tr") {
+      withView("vr") {
+        r.write.saveAsTable("tr")
+        sql("create view vr as select * from tr")
+        checkAnswer(
+          spark.table("l").groupBy(
+            $"a",
+            spark.table("vr").where($"a".outer() === $"c").select(sum($"d")).scalar().as("sum_d")
+          ).agg(Map.empty[String, String]),
+          sql("select a, (select sum(d) from vr where a = c) sum_d from l l1 group by 1, 2")
+        )
+      }
+    }
+  }
+
+  test("non-aggregated correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    val exception1 = intercept[SparkRuntimeException] {
+      df1.select(
+        $"a",
+        df2.where($"t1.a" === $"t2.a".outer()).select($"b").scalar().as("sum_b")
+      ).collect()
+    }
+    checkError(
+      exception1,
+      condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS"
+    )
+  }
+
+  test("non-equal correlated scalar subquery") {
+    val df1 = spark.table("l").as("t1")
+    val df2 = spark.table("l").as("t2")
+    checkAnswer(
+      df1.select(
+        $"a",
+        df2.where($"t2.a" < $"t1.a".outer()).select(sum($"b")).scalar().as("sum_b")
+      ),
+      sql("select a, (select sum(b) from l t2 where t2.a < t1.a) sum_b from l t1")
+    )
+  }
+
+  test("disjunctive correlated scalar subquery") {
+    checkAnswer(
+      spark.table("l").where(
+        spark.table("r").where(
+          ($"a".outer() === $"c" && $"d" === 2.0) ||
+            ($"a".outer() === $"c" && $"d" === 1.0)
+        ).select(count(lit(1))).scalar() > 0
+      ).select($"a"),
+      sql("""
+            |select a
+            |from   l
+            |where  (select count(*)
+            |        from   r
+            |        where (a = c and d = 2.0) or (a = c and d = 1.0)) > 0
+        """.stripMargin)
+    )
+  }
+
+  test("correlated scalar subquery with outer reference errors") {
+    // Missing `outer()`
+    val exception1 = intercept[AnalysisException] {
+      spark.table("l").select(
+        $"a",
+        spark.table("r").where($"c" === $"a").select(sum($"d")).scalar()
+      )
+    }
+    checkError(
+      exception1,
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      parameters = Map("objectName" -> "`a`", "proposal" -> "`c`, `d`"),
+      queryContext =
+        Array(ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
index 155acc98cb33b..ff251ddbbfb52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala
@@ -32,7 +32,7 @@ import org.apache.spark.api.python.PythonEvalType
 import org.apache.spark.sql.catalyst.{InternalRow, TableIdentifier}
 import org.apache.spark.sql.catalyst.analysis.MultiInstanceRelation
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, CreateMap, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery, Uuid}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Cast, EqualTo, ExpressionSet, GreaterThan, Literal, PythonUDF, ScalarSubquery}
 import org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{Filter, LeafNode, LocalRelation, LogicalPlan, OneRowRelation}
@@ -43,6 +43,7 @@ import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ReusedExchangeExec, ShuffleExchangeExec, ShuffleExchangeLike}
 import org.apache.spark.sql.expressions.{Aggregator, Window}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession}
 import org.apache.spark.sql.test.SQLTestData.{ArrayStringWrapper, ContainerStringWrapper, StringWrapper, TestData2}
@@ -223,7 +224,7 @@ class DataFrameSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df.select(explode($"*"))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"explode(csv)\"",
         "paramIndex" -> "first",
@@ -567,7 +568,7 @@ class DataFrameSuite extends QueryTest
         testData.toDF().withColumns(Seq("newCol1", "newCOL1"),
           Seq(col("key") + 1, col("key") + 2))
       },
-      errorClass = "COLUMN_ALREADY_EXISTS",
+      condition = "COLUMN_ALREADY_EXISTS",
       parameters = Map("columnName" -> "`newcol1`"))
   }
 
@@ -587,7 +588,7 @@ class DataFrameSuite extends QueryTest
           testData.toDF().withColumns(Seq("newCol1", "newCol1"),
             Seq(col("key") + 1, col("key") + 2))
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`newCol1`"))
     }
   }
@@ -630,7 +631,7 @@ class DataFrameSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df1.withMetadata("x1", metadata)
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`x1`", "proposal" -> "`x`")
     )
   }
@@ -1115,7 +1116,7 @@ class DataFrameSuite extends QueryTest
           exception = intercept[org.apache.spark.sql.AnalysisException] {
             df(name)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1049",
+          condition = "INVALID_ATTRIBUTE_NAME_SYNTAX",
           parameters = Map("name" -> name))
       }
 
@@ -1201,7 +1202,7 @@ class DataFrameSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "COLUMN_ALREADY_EXISTS",
+      condition = "COLUMN_ALREADY_EXISTS",
       parameters = Map("columnName" -> "`column1`"))
 
     // multiple duplicate columns present
@@ -1212,7 +1213,7 @@ class DataFrameSuite extends QueryTest
     }
     checkError(
       exception = f,
-      errorClass = "COLUMN_ALREADY_EXISTS",
+      condition = "COLUMN_ALREADY_EXISTS",
       parameters = Map("columnName" -> "`column1`"))
   }
 
@@ -1244,7 +1245,7 @@ class DataFrameSuite extends QueryTest
           exception = intercept[AnalysisException] {
             insertion.write.insertInto("rdd_base")
           },
-          errorClass = "UNSUPPORTED_INSERT.RDD_BASED",
+          condition = "UNSUPPORTED_INSERT.RDD_BASED",
           parameters = Map.empty
         )
 
@@ -1255,7 +1256,7 @@ class DataFrameSuite extends QueryTest
           exception = intercept[AnalysisException] {
             insertion.write.insertInto("indirect_ds")
           },
-          errorClass = "UNSUPPORTED_INSERT.RDD_BASED",
+          condition = "UNSUPPORTED_INSERT.RDD_BASED",
           parameters = Map.empty
         )
 
@@ -1265,7 +1266,7 @@ class DataFrameSuite extends QueryTest
           exception = intercept[AnalysisException] {
             insertion.write.insertInto("one_row")
           },
-          errorClass = "UNSUPPORTED_INSERT.RDD_BASED",
+          condition = "UNSUPPORTED_INSERT.RDD_BASED",
           parameters = Map.empty
         )
       }
@@ -1566,7 +1567,7 @@ class DataFrameSuite extends QueryTest
   test("SPARK-46794: exclude subqueries from LogicalRDD constraints") {
     withTempDir { checkpointDir =>
       val subquery =
-        new Column(ScalarSubquery(spark.range(10).selectExpr("max(id)").logicalPlan))
+        column(ScalarSubquery(spark.range(10).selectExpr("max(id)").logicalPlan))
       val df = spark.range(1000).filter($"id" === subquery)
       assert(df.logicalPlan.constraints.exists(_.exists(_.isInstanceOf[ScalarSubquery])))
 
@@ -1839,7 +1840,7 @@ class DataFrameSuite extends QueryTest
   }
 
   test("Uuid expressions should produce same results at retries in the same DataFrame") {
-    val df = spark.range(1).select($"id", new Column(Uuid()))
+    val df = spark.range(1).select($"id", uuid())
     checkAnswer(df, df.collect())
   }
 
@@ -1966,7 +1967,7 @@ class DataFrameSuite extends QueryTest
   test("SPARK-29442 Set `default` mode should override the existing mode") {
     val df = Seq(Tuple1(1)).toDF()
     val writer = df.write.mode("overwrite").mode("default")
-    val modeField = classOf[DataFrameWriter[Tuple1[Int]]].getDeclaredField("mode")
+    val modeField = classOf[DataFrameWriter[_]].getDeclaredField("mode")
     modeField.setAccessible(true)
     assert(SaveMode.ErrorIfExists === modeField.get(writer).asInstanceOf[SaveMode])
   }
@@ -2035,7 +2036,7 @@ class DataFrameSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df.groupBy($"d", $"b").as[GroupByKey, Row]
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`d`", "proposal" -> "`a`, `b`, `c`"),
       context = ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
   }
@@ -2073,7 +2074,7 @@ class DataFrameSuite extends QueryTest
     val emptyDf = spark.emptyDataFrame.withColumn("id", lit(1L))
     val joined = spark.range(10).join(emptyDf, "id")
     joined.queryExecution.optimizedPlan match {
-      case LocalRelation(Seq(id), Nil, _) =>
+      case LocalRelation(Seq(id), Nil, _, _) =>
         assert(id.name == "id")
       case _ =>
         fail("emptyDataFrame should be foldable")
@@ -2315,7 +2316,8 @@ class DataFrameSuite extends QueryTest
   }
 
   test("SPARK-36338: DataFrame.withSequenceColumn should append unique sequence IDs") {
-    val ids = spark.range(10).repartition(5).withSequenceColumn("default_index")
+    val ids = spark.range(10).repartition(5).select(
+      Column.internalFn("distributed_sequence_id").alias("default_index"), col("id"))
     assert(ids.collect().map(_.getLong(0)).toSet === Range(0, 10).toSet)
     assert(ids.take(5).map(_.getLong(0)).toSet === Range(0, 5).toSet)
   }
@@ -2417,7 +2419,7 @@ class DataFrameSuite extends QueryTest
         |  SELECT a, b FROM (SELECT a, b FROM VALUES (1, 2) AS t(a, b))
         |)
         |""".stripMargin)
-    val stringCols = df.logicalPlan.output.map(Column(_).cast(StringType))
+    val stringCols = df.logicalPlan.output.map(column(_).cast(StringType))
     val castedDf = df.select(stringCols: _*)
     checkAnswer(castedDf, Row("1", "1") :: Row("1", "2") :: Nil)
   }
@@ -2505,7 +2507,7 @@ class DataFrameSuite extends QueryTest
       assert(row.getInt(0).toString == row.getString(3))
     }
 
-    val v3 = Column(CreateMap(Seq(Literal("key"), Literal("value"))))
+    val v3 = map(lit("key"), lit("value"))
     val v4 = to_csv(struct(v3.as("a"))) // to_csv is CodegenFallback
     df.select(v3, v3, v4, v4).collect().foreach { row =>
       assert(row.getMap(0).toString() == row.getMap(1).toString())
@@ -2547,7 +2549,7 @@ class DataFrameSuite extends QueryTest
         exception = intercept[ParseException] {
           spark.range(1).toDF("CASE").filter("CASE").collect()
         },
-        errorClass = "PARSE_SYNTAX_ERROR",
+        condition = "PARSE_SYNTAX_ERROR",
         parameters = Map("error" -> "'CASE'", "hint" -> ""))
     }
   }
@@ -2559,7 +2561,7 @@ class DataFrameSuite extends QueryTest
         exception = intercept[AnalysisException] {
           spark.range(1).createTempView("AUTHORIZATION")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1321",
+        condition = "_LEGACY_ERROR_TEMP_1321",
         parameters = Map("viewName" -> "AUTHORIZATION"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
new file mode 100644
index 0000000000000..c2f53ff56d1aa
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTableValuedFunctionsSuite.scala
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DataFrameTableValuedFunctionsSuite extends QueryTest with SharedSparkSession {
+
+  test("explode") {
+    val actual1 = spark.tvf.explode(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM explode(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.explode(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM explode(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.explode(array())
+    val expected3 = spark.sql("SELECT * FROM explode(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.explode(map())
+    val expected4 = spark.sql("SELECT * FROM explode(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.explode(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM explode(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.explode(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM explode(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("explode_outer") {
+    val actual1 = spark.tvf.explode_outer(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM explode_outer(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.explode_outer(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM explode_outer(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.explode_outer(array())
+    val expected3 = spark.sql("SELECT * FROM explode_outer(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.explode_outer(map())
+    val expected4 = spark.sql("SELECT * FROM explode_outer(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.explode_outer(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM explode_outer(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.explode_outer(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM explode_outer(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("inline") {
+    val actual1 = spark.tvf.inline(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
+    val expected1 = spark.sql("SELECT * FROM inline(array(struct(1, 'a'), struct(2, 'b')))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.inline(array().cast("array<struct<a:int,b:int>>"))
+    val expected2 = spark.sql("SELECT * FROM inline(array() :: array<struct<a:int,b:int>>)")
+    checkAnswer(actual2, expected2)
+
+    val actual3 = spark.tvf.inline(array(
+      named_struct(lit("a"), lit(1), lit("b"), lit(2)),
+      lit(null),
+      named_struct(lit("a"), lit(3), lit("b"), lit(4))
+    ))
+    val expected3 = spark.sql(
+      "SELECT * FROM " +
+        "inline(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))")
+    checkAnswer(actual3, expected3)
+  }
+
+  test("inline_outer") {
+    val actual1 = spark.tvf.inline_outer(array(struct(lit(1), lit("a")), struct(lit(2), lit("b"))))
+    val expected1 = spark.sql("SELECT * FROM inline_outer(array(struct(1, 'a'), struct(2, 'b')))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.inline_outer(array().cast("array<struct<a:int,b:int>>"))
+    val expected2 = spark.sql("SELECT * FROM inline_outer(array() :: array<struct<a:int,b:int>>)")
+    checkAnswer(actual2, expected2)
+
+    val actual3 = spark.tvf.inline_outer(array(
+      named_struct(lit("a"), lit(1), lit("b"), lit(2)),
+      lit(null),
+      named_struct(lit("a"), lit(3), lit("b"), lit(4))
+    ))
+    val expected3 = spark.sql(
+      "SELECT * FROM " +
+        "inline_outer(array(named_struct('a', 1, 'b', 2), null, named_struct('a', 3, 'b', 4)))")
+    checkAnswer(actual3, expected3)
+  }
+
+  test("json_tuple") {
+    val actual = spark.tvf.json_tuple(lit("""{"a":1,"b":2}"""), lit("a"), lit("b"))
+    val expected = spark.sql("""SELECT * FROM json_tuple('{"a":1,"b":2}', 'a', 'b')""")
+    checkAnswer(actual, expected)
+
+    val ex = intercept[AnalysisException] {
+      spark.tvf.json_tuple(lit("""{"a":1,"b":2}""")).collect()
+    }
+    assert(ex.errorClass.get == "WRONG_NUM_ARGS.WITHOUT_SUGGESTION")
+    assert(ex.messageParameters("functionName") == "`json_tuple`")
+  }
+
+  test("posexplode") {
+    val actual1 = spark.tvf.posexplode(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM posexplode(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.posexplode(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM posexplode(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.posexplode(array())
+    val expected3 = spark.sql("SELECT * FROM posexplode(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.posexplode(map())
+    val expected4 = spark.sql("SELECT * FROM posexplode(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.posexplode(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM posexplode(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.posexplode(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM posexplode(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("posexplode_outer") {
+    val actual1 = spark.tvf.posexplode_outer(array(lit(1), lit(2)))
+    val expected1 = spark.sql("SELECT * FROM posexplode_outer(array(1, 2))")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.posexplode_outer(map(lit("a"), lit(1), lit("b"), lit(2)))
+    val expected2 = spark.sql("SELECT * FROM posexplode_outer(map('a', 1, 'b', 2))")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.posexplode_outer(array())
+    val expected3 = spark.sql("SELECT * FROM posexplode_outer(array())")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.posexplode_outer(map())
+    val expected4 = spark.sql("SELECT * FROM posexplode_outer(map())")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.posexplode_outer(lit(null).cast("array<int>"))
+    val expected5 = spark.sql("SELECT * FROM posexplode_outer(null :: array<int>)")
+    checkAnswer(actual5, expected5)
+
+    val actual6 = spark.tvf.posexplode_outer(lit(null).cast("map<string, int>"))
+    val expected6 = spark.sql("SELECT * FROM posexplode_outer(null :: map<string, int>)")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("stack") {
+    val actual = spark.tvf.stack(lit(2), lit(1), lit(2), lit(3))
+    val expected = spark.sql("SELECT * FROM stack(2, 1, 2, 3)")
+    checkAnswer(actual, expected)
+  }
+
+  test("collations") {
+    val actual = spark.tvf.collations()
+    val expected = spark.sql("SELECT * FROM collations()")
+    checkAnswer(actual, expected)
+  }
+
+  test("sql_keywords") {
+    val actual = spark.tvf.sql_keywords()
+    val expected = spark.sql("SELECT * FROM sql_keywords()")
+    checkAnswer(actual, expected)
+  }
+
+  test("variant_explode") {
+    val actual1 = spark.tvf.variant_explode(parse_json(lit("""["hello", "world"]""")))
+    val expected1 = spark.sql(
+      """SELECT * FROM variant_explode(parse_json('["hello", "world"]'))""")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.variant_explode(parse_json(lit("""{"a": true, "b": 3.14}""")))
+    val expected2 = spark.sql(
+      """SELECT * FROM variant_explode(parse_json('{"a": true, "b": 3.14}'))""")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.variant_explode(parse_json(lit("[]")))
+    val expected3 = spark.sql("SELECT * FROM variant_explode(parse_json('[]'))")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.variant_explode(parse_json(lit("{}")))
+    val expected4 = spark.sql("SELECT * FROM variant_explode(parse_json('{}'))")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.variant_explode(lit(null).cast("variant"))
+    val expected5 = spark.sql("SELECT * FROM variant_explode(null :: variant)")
+    checkAnswer(actual5, expected5)
+
+    // not a variant object/array
+    val actual6 = spark.tvf.variant_explode(parse_json(lit("1")))
+    val expected6 = spark.sql("SELECT * FROM variant_explode(parse_json('1'))")
+    checkAnswer(actual6, expected6)
+  }
+
+  test("variant_explode_outer") {
+    val actual1 = spark.tvf.variant_explode_outer(parse_json(lit("""["hello", "world"]""")))
+    val expected1 = spark.sql(
+      """SELECT * FROM variant_explode_outer(parse_json('["hello", "world"]'))""")
+    checkAnswer(actual1, expected1)
+
+    val actual2 = spark.tvf.variant_explode_outer(parse_json(lit("""{"a": true, "b": 3.14}""")))
+    val expected2 = spark.sql(
+      """SELECT * FROM variant_explode_outer(parse_json('{"a": true, "b": 3.14}'))""")
+    checkAnswer(actual2, expected2)
+
+    // empty
+    val actual3 = spark.tvf.variant_explode_outer(parse_json(lit("[]")))
+    val expected3 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('[]'))")
+    checkAnswer(actual3, expected3)
+
+    val actual4 = spark.tvf.variant_explode_outer(parse_json(lit("{}")))
+    val expected4 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('{}'))")
+    checkAnswer(actual4, expected4)
+
+    // null
+    val actual5 = spark.tvf.variant_explode_outer(lit(null).cast("variant"))
+    val expected5 = spark.sql("SELECT * FROM variant_explode_outer(null :: variant)")
+    checkAnswer(actual5, expected5)
+
+    // not a variant object/array
+    val actual6 = spark.tvf.variant_explode_outer(parse_json(lit("1")))
+    val expected6 = spark.sql("SELECT * FROM variant_explode_outer(parse_json('1'))")
+    checkAnswer(actual6, expected6)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
index 6ee173bc6af67..c52d428cd5dd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTimeWindowingSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql
 
+import java.sql.Timestamp
 import java.time.LocalDateTime
 
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
@@ -714,4 +715,56 @@ class DataFrameTimeWindowingSuite extends QueryTest with SharedSparkSession {
       )
     }
   }
+
+  test("SPARK-49836 using window fn with window as parameter should preserve parent operator") {
+    withTempView("clicks") {
+      val df = Seq(
+        // small window: [00:00, 01:00), user1, 2
+        ("2024-09-30 00:00:00", "user1"), ("2024-09-30 00:00:30", "user1"),
+        // small window: [01:00, 02:00), user2, 2
+        ("2024-09-30 00:01:00", "user2"), ("2024-09-30 00:01:30", "user2"),
+        // small window: [07:00, 08:00), user1, 1
+        ("2024-09-30 00:07:00", "user1"),
+        // small window: [11:00, 12:00), user1, 3
+        ("2024-09-30 00:11:00", "user1"), ("2024-09-30 00:11:30", "user1"),
+        ("2024-09-30 00:11:45", "user1")
+      ).toDF("eventTime", "userId")
+
+      // large window: [00:00, 10:00), user1, 3, [00:00, 10:00), user2, 2, [10:00, 20:00), user1, 3
+
+      df.createOrReplaceTempView("clicks")
+
+      val aggregatedData = spark.sql(
+        """
+          |SELECT
+          |  cpu_large.large_window.end AS timestamp,
+          |  avg(cpu_large.numClicks) AS avgClicksPerUser
+          |FROM
+          |(
+          |  SELECT
+          |    window(small_window, '10 minutes') AS large_window,
+          |    userId,
+          |    sum(numClicks) AS numClicks
+          |  FROM
+          |  (
+          |    SELECT
+          |      window(eventTime, '1 minute') AS small_window,
+          |      userId,
+          |      count(*) AS numClicks
+          |    FROM clicks
+          |    GROUP BY window, userId
+          |  ) cpu_small
+          |  GROUP BY window, userId
+          |) cpu_large
+          |GROUP BY timestamp
+          |""".stripMargin)
+
+      checkAnswer(
+        aggregatedData,
+        Seq(
+          Row(Timestamp.valueOf("2024-09-30 00:10:00"), 2.5),
+          Row(Timestamp.valueOf("2024-09-30 00:20:00"), 3))
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala
index 160f583c983d8..f166043e4d554 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameToSchemaSuite.scala
@@ -58,7 +58,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](Seq("a" -> "b").toDF("i", "j").to(schema))
     checkError(
       exception = e,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`non_exist`",
         "proposal" -> "`i`, `j`"))
@@ -69,7 +69,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](Seq("a" -> "b").toDF("i", "I").to(schema))
     checkError(
       exception = e,
-      errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+      condition = "AMBIGUOUS_COLUMN_OR_FIELD",
       parameters = Map(
         "name" -> "`i`",
         "n" -> "2"))
@@ -92,7 +92,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](data.to(schema))
     checkError(
       exception = e,
-      errorClass = "NULLABLE_COLUMN_OR_FIELD",
+      condition = "NULLABLE_COLUMN_OR_FIELD",
       parameters = Map("name" -> "`i`"))
   }
 
@@ -108,7 +108,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](Seq("a" -> 1).toDF("i", "j").to(schema))
     checkError(
       exception = e,
-      errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+      condition = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
       parameters = Map(
         "name" -> "`i`",
         "type" -> "\"STRING\"",
@@ -160,7 +160,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = e,
-      errorClass = "UNRESOLVED_FIELD.WITH_SUGGESTION",
+      condition = "UNRESOLVED_FIELD.WITH_SUGGESTION",
       parameters = Map(
         "fieldName" -> "`non_exist`",
         "columnPath" -> "`struct`",
@@ -200,7 +200,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](data.to(schema))
     checkError(
       exception = e,
-      errorClass = "NULLABLE_COLUMN_OR_FIELD",
+      condition = "NULLABLE_COLUMN_OR_FIELD",
       parameters = Map("name" -> "`struct`.`i`"))
   }
 
@@ -220,7 +220,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = e,
-      errorClass = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
+      condition = "INVALID_COLUMN_OR_FIELD_DATA_TYPE",
       parameters = Map(
         "name" -> "`struct`.`i`",
         "type" -> "\"STRING\"",
@@ -284,7 +284,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](data.to(schema))
     checkError(
       exception = e,
-      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.ARRAY_ELEMENT",
+      condition = "NOT_NULL_CONSTRAINT_VIOLATION.ARRAY_ELEMENT",
       parameters = Map("columnPath" -> "`arr`"))
   }
 
@@ -362,7 +362,7 @@ class DataFrameToSchemaSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkThrowable](data.to(schema))
     checkError(
       exception = e,
-      errorClass = "NOT_NULL_CONSTRAINT_VIOLATION.MAP_VALUE",
+      condition = "NOT_NULL_CONSTRAINT_VIOLATION.MAP_VALUE",
       parameters = Map("columnPath" -> "`map`"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala
new file mode 100644
index 0000000000000..51de8553216c6
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameTransposeSuite.scala
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class DataFrameTransposeSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  //
+  // Test cases: input parameter
+  //
+
+  test("transpose with default index column") {
+    checkAnswer(
+      salary.transpose(),
+      Row("salary", 2000.0, 1000.0) :: Nil
+    )
+  }
+
+  test("transpose with specified index column") {
+    checkAnswer(
+      salary.transpose($"salary"),
+      Row("personId", 1, 0) :: Nil
+    )
+  }
+
+  //
+  // Test cases: API behavior
+  //
+
+  test("enforce least common type for non-index columns") {
+    val df = Seq(("x", 1, 10.0), ("y", 2, 20.0)).toDF("name", "id", "value")
+    val transposedDf = df.transpose()
+    checkAnswer(
+      transposedDf,
+      Row("id", 1.0, 2.0) :: Row("value", 10.0, 20.0) :: Nil
+    )
+    // (id,IntegerType) -> (x,DoubleType)
+    // (value,DoubleType) -> (y,DoubleType)
+    assertResult(DoubleType)(transposedDf.schema("x").dataType)
+    assertResult(DoubleType)(transposedDf.schema("y").dataType)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        person.transpose()
+      },
+      condition = "TRANSPOSE_NO_LEAST_COMMON_TYPE",
+      parameters = Map("dt1" -> "\"STRING\"", "dt2" -> "\"INT\"")
+    )
+  }
+
+  test("enforce ascending order based on index column values for transposed columns") {
+    val transposedDf = person.transpose($"name")
+    checkAnswer(
+      transposedDf,
+      Row("id", 1, 0) :: Row("age", 20, 30)  :: Nil
+    )
+    // mike, jim -> jim, mike
+    assertResult(Array("key", "jim", "mike"))(transposedDf.columns)
+  }
+
+  test("enforce AtomicType Attribute for index column values") {
+    val exceptionAtomic = intercept[AnalysisException] {
+      complexData.transpose($"m")  // (m,MapType(StringType,IntegerType,false))
+    }
+    assert(exceptionAtomic.getMessage.contains(
+      "[TRANSPOSE_INVALID_INDEX_COLUMN] Invalid index column for TRANSPOSE because:" +
+        " Index column must be of atomic type, but found"))
+
+    val exceptionAttribute = intercept[AnalysisException] {
+      // (s,StructType(StructField(key,IntegerType,false),StructField(value,StringType,true)))
+      complexData.transpose($"s.key")
+    }
+    assert(exceptionAttribute.getMessage.contains(
+      "[TRANSPOSE_INVALID_INDEX_COLUMN] Invalid index column for TRANSPOSE because:" +
+        " Index column must be an atomic attribute"))
+  }
+
+  test("enforce transpose max values") {
+    spark.conf.set(SQLConf.DATAFRAME_TRANSPOSE_MAX_VALUES.key, 1)
+    val exception = intercept[AnalysisException](
+      person.transpose($"name")
+    )
+    assert(exception.getMessage.contains(
+      "[TRANSPOSE_EXCEED_ROW_LIMIT] Number of rows exceeds the allowed limit of"))
+    spark.conf.set(SQLConf.DATAFRAME_TRANSPOSE_MAX_VALUES.key,
+      SQLConf.DATAFRAME_TRANSPOSE_MAX_VALUES.defaultValue.get)
+  }
+
+  //
+  // Test cases: special frame
+  //
+
+  test("transpose empty frame w. column names") {
+    val schema = StructType(Seq(
+      StructField("id", IntegerType),
+      StructField("name", StringType)
+    ))
+    val emptyDF = spark.createDataFrame(spark.sparkContext.emptyRDD[Row], schema)
+    val transposedDF = emptyDF.transpose()
+    checkAnswer(
+      transposedDF,
+      Row("name") :: Nil
+    )
+    assertResult(StringType)(transposedDF.schema("key").dataType)
+  }
+
+  test("transpose empty frame w/o column names") {
+    val emptyDF = spark.emptyDataFrame
+    checkAnswer(
+      emptyDF.transpose(),
+      Nil
+    )
+  }
+
+  test("transpose frame with only index column") {
+    val transposedDf = tableName.transpose()
+    checkAnswer(
+      transposedDf,
+      Nil
+    )
+    assertResult(Array("key", "test"))(transposedDf.columns)
+  }
+
+  test("transpose frame with duplicates in index column") {
+    val df = Seq(
+      ("A", 1, 2),
+      ("B", 3, 4),
+      ("A", 5, 6)
+    ).toDF("id", "val1", "val2")
+    val transposedDf = df.transpose()
+    checkAnswer(
+      transposedDf,
+      Seq(
+        Row("val1", 1, 5, 3),
+        Row("val2", 2, 6, 4)
+      )
+    )
+    assertResult(Array("key", "A", "A", "B"))(transposedDf.columns)
+  }
+
+  test("transpose frame with nulls in index column") {
+    val df = Seq(
+      ("A", 1, 2),
+      ("B", 3, 4),
+      (null, 5, 6)
+    ).toDF("id", "val1", "val2")
+    val transposedDf = df.transpose()
+    checkAnswer(
+      transposedDf,
+      Seq(
+        Row("val1", 1, 3),
+        Row("val2", 2, 4)
+      )
+    )
+    assertResult(Array("key", "A", "B"))(transposedDf.columns)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
index 95f4cc78d1564..d03288d7dbcdf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFramesSuite.scala
@@ -17,12 +17,12 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.{Ascending, Literal, NonFoldableLiteral, RangeFrame, SortOrder, SpecifiedWindowFrame, UnaryMinus, UnspecifiedFrame}
+import org.apache.spark.sql.catalyst.expressions.{Literal, NonFoldableLiteral}
 import org.apache.spark.sql.catalyst.optimizer.EliminateWindowPartitions
 import org.apache.spark.sql.catalyst.plans.logical.{Window => WindowNode}
-import org.apache.spark.sql.expressions.{Window, WindowSpec}
+import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.internal.{ExpressionColumnNode, SQLConf}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.CalendarIntervalType
 
@@ -186,7 +186,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
           $"key",
           count("key").over(
             Window.partitionBy($"value").orderBy($"key").rowsBetween(2147483648L, 0)))),
-      errorClass = "INVALID_BOUNDARY.START",
+      condition = "INVALID_BOUNDARY.START",
       parameters = Map(
         "invalidValue" -> "2147483648L",
         "boundary" -> "`start`",
@@ -200,7 +200,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
           $"key",
           count("key").over(
             Window.partitionBy($"value").orderBy($"key").rowsBetween(0, 2147483648L)))),
-      errorClass = "INVALID_BOUNDARY.END",
+      condition = "INVALID_BOUNDARY.END",
       parameters = Map(
         "invalidValue" -> "2147483648L",
         "boundary" -> "`end`",
@@ -226,7 +226,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("key").over(window.rangeBetween(Window.unboundedPreceding, 1)))
       ),
-      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      condition = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
       parameters = Map(
         "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
         "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
@@ -242,7 +242,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("key").over(window.rangeBetween(-1, Window.unboundedFollowing)))
       ),
-      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      condition = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
       parameters = Map(
         "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
         "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
@@ -258,7 +258,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("key").over(window.rangeBetween(-1, 1)))
       ),
-      errorClass = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
+      condition = "DATATYPE_MISMATCH.RANGE_FRAME_MULTI_ORDER",
       parameters = Map(
         "orderSpec" -> """key#\d+ ASC NULLS FIRST,value#\d+ ASC NULLS FIRST""",
         "sqlExpr" -> (""""\(ORDER BY key ASC NULLS FIRST, value ASC NULLS FIRST RANGE """ +
@@ -287,7 +287,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("value").over(window.rangeBetween(Window.unboundedPreceding, 1)))
       ),
-      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      condition = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
       parameters = Map(
         "location" -> "upper",
         "exprType" -> "\"STRING\"",
@@ -303,7 +303,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("value").over(window.rangeBetween(-1, Window.unboundedFollowing)))
       ),
-      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      condition = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
       parameters = Map(
         "location" -> "lower",
         "exprType" -> "\"STRING\"",
@@ -319,7 +319,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
         df.select(
           min("value").over(window.rangeBetween(-1, 1)))
       ),
-      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
+      condition = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_UNACCEPTED_TYPE",
       parameters = Map(
         "location" -> "lower",
         "exprType" -> "\"STRING\"",
@@ -503,16 +503,16 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
   test("Window frame bounds lower and upper do not have the same type") {
     val df = Seq((1L, "1"), (1L, "1")).toDF("key", "value")
-    val windowSpec = new WindowSpec(
-      Seq(Column("value").expr),
-      Seq(SortOrder(Column("key").expr, Ascending)),
-      SpecifiedWindowFrame(RangeFrame, Literal.create(null, CalendarIntervalType), Literal(2))
-    )
+
+    val windowSpec = Window.partitionBy($"value").orderBy($"key".asc).withFrame(
+      internal.WindowFrame.Range,
+      internal.WindowFrame.Value(ExpressionColumnNode(Literal.create(null, CalendarIntervalType))),
+      internal.WindowFrame.Value(lit(2).node))
     checkError(
       exception = intercept[AnalysisException] {
         df.select($"key", count("key").over(windowSpec)).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"RANGE BETWEEN NULL FOLLOWING AND 2 FOLLOWING\"",
         "lower" -> "\"NULL\"",
@@ -526,16 +526,15 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
   test("Window frame lower bound is not a literal") {
     val df = Seq((1L, "1"), (1L, "1")).toDF("key", "value")
-    val windowSpec = new WindowSpec(
-      Seq(Column("value").expr),
-      Seq(SortOrder(Column("key").expr, Ascending)),
-      SpecifiedWindowFrame(RangeFrame, NonFoldableLiteral(1), Literal(2))
-    )
+    val windowSpec = Window.partitionBy($"value").orderBy($"key".asc).withFrame(
+      internal.WindowFrame.Range,
+      internal.WindowFrame.Value(ExpressionColumnNode(NonFoldableLiteral(1))),
+      internal.WindowFrame.Value(lit(2).node))
     checkError(
       exception = intercept[AnalysisException] {
         df.select($"key", count("key").over(windowSpec)).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE",
+      condition = "DATATYPE_MISMATCH.SPECIFIED_WINDOW_FRAME_WITHOUT_FOLDABLE",
       parameters = Map(
         "sqlExpr" -> "\"RANGE BETWEEN nonfoldableliteral() FOLLOWING AND 2 FOLLOWING\"",
         "location" -> "lower",
@@ -546,8 +545,7 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-41805: Reuse expressions in WindowSpecDefinition") {
     val ds = Seq((1, 1), (1, 2), (1, 3), (2, 1), (2, 2)).toDF("n", "i")
-    val sortOrder = SortOrder($"n".cast("string").expr, Ascending)
-    val window = new WindowSpec(Seq($"n".expr), Seq(sortOrder), UnspecifiedFrame)
+    val window = Window.partitionBy($"n").orderBy($"n".cast("string").asc)
     val df = ds.select(sum("i").over(window), avg("i").over(window))
     val ws = df.queryExecution.analyzed.collect { case w: WindowNode => w }
     assert(ws.size === 1)
@@ -557,9 +555,10 @@ class DataFrameWindowFramesSuite extends QueryTest with SharedSparkSession {
 
   test("SPARK-41793: Incorrect result for window frames defined by a range clause on large " +
     "decimals") {
-    val window = new WindowSpec(Seq($"a".expr), Seq(SortOrder($"b".expr, Ascending)),
-      SpecifiedWindowFrame(RangeFrame,
-        UnaryMinus(Literal(BigDecimal(10.2345))), Literal(BigDecimal(6.7890))))
+    val window = Window.partitionBy($"a").orderBy($"b".asc).withFrame(
+      internal.WindowFrame.Range,
+      internal.WindowFrame.Value((-lit(BigDecimal(10.2345))).node),
+      internal.WindowFrame.Value(lit(BigDecimal(10.2345)).node))
 
     val df = Seq(
       1 -> "11342371013783243717493546650944543.47",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
index e3aff9b36aece..8a86aa10887c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala
@@ -29,6 +29,7 @@ import org.apache.spark.sql.execution.exchange.{ENSURE_REQUIREMENTS, Exchange, S
 import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.expressions.{Aggregator, MutableAggregationBuffer, UserDefinedAggregateFunction, Window}
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -387,7 +388,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest
       df.select($"key", count("invalid").over()))
     checkError(
       exception = e,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`invalid`",
         "proposal" -> "`value`, `key`"),
@@ -861,7 +862,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest
           lead($"value", 2, null, true).over(window),
           lead($"value", 3, null, true).over(window),
           lead(concat($"value", $"key"), 1, null, true).over(window),
-          Column(Lag($"value".expr, NonFoldableLiteral(1), Literal(null), true)).over(window),
+          column(Lag($"value".expr, NonFoldableLiteral(1), Literal(null), true)).over(window),
           lag($"value", 2).over(window),
           lag($"value", 0, null, true).over(window),
           lag($"value", 1, null, true).over(window),
@@ -869,7 +870,7 @@ class DataFrameWindowFunctionsSuite extends QueryTest
           lag($"value", 3, null, true).over(window),
           lag(concat($"value", $"key"), 1, null, true).over(window)).orderBy($"order").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"lag(value, nonfoldableliteral(), NULL)\"",
         "inputName" -> "`offset`",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
index 44d47abc93faa..b7ac6af22a204 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWriterV2Suite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AppendData, LogicalPlan, Ove
 import org.apache.spark.sql.connector.InMemoryV1Provider
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable, InMemoryTableCatalog, TableCatalog}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
-import org.apache.spark.sql.connector.expressions.{BucketTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
+import org.apache.spark.sql.connector.expressions.{BucketTransform, ClusterByTransform, DaysTransform, FieldReference, HoursTransform, IdentityTransform, LiteralValue, MonthsTransform, YearsTransform}
 import org.apache.spark.sql.execution.QueryExecution
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2Relation
 import org.apache.spark.sql.execution.streaming.MemoryStream
@@ -146,7 +146,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       exception = intercept[AnalysisException] {
         spark.table("source").withColumnRenamed("data", "d").writeTo("testcat.table_name").append()
       },
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       parameters = Map("tableName" -> "`testcat`.`table_name`", "colName" -> "`data`")
     )
 
@@ -251,7 +251,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
         spark.table("source").withColumnRenamed("data", "d")
           .writeTo("testcat.table_name").overwrite(lit(true))
       },
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       parameters = Map("tableName" -> "`testcat`.`table_name`", "colName" -> "`data`")
     )
 
@@ -356,7 +356,7 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
         spark.table("source").withColumnRenamed("data", "d")
           .writeTo("testcat.table_name").overwritePartitions()
       },
-      errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+      condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
       parameters = Map("tableName" -> "`testcat`.`table_name`", "colName" -> "`data`")
     )
 
@@ -524,6 +524,18 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
         Seq(BucketTransform(LiteralValue(4, IntegerType), Seq(FieldReference("id")))))
   }
 
+  test("Create: cluster by") {
+    spark.table("source")
+      .writeTo("testcat.table_name")
+      .clusterBy("id")
+      .create()
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    assert(table.name === "testcat.table_name")
+    assert(table.partitioning === Seq(ClusterByTransform(Seq(FieldReference("id")))))
+  }
+
   test("Create: fail if table already exists") {
     spark.sql(
       "CREATE TABLE testcat.table_name (id bigint, data string) USING foo PARTITIONED BY (id)")
@@ -634,6 +646,42 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
     assert(replaced.properties === defaultOwnership.asJava)
   }
 
+  test("Replace: clustered table") {
+    spark.sql("CREATE TABLE testcat.table_name (id bigint, data string) USING foo")
+    spark.sql("INSERT INTO TABLE testcat.table_name SELECT * FROM source")
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(1L, "a"), Row(2L, "b"), Row(3L, "c")))
+
+    val table = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the initial table
+    assert(table.name === "testcat.table_name")
+    assert(table.schema === new StructType().add("id", LongType).add("data", StringType))
+    assert(table.partitioning.isEmpty)
+    assert(table.properties === (Map("provider" -> "foo") ++ defaultOwnership).asJava)
+
+    spark.table("source2")
+        .withColumn("even_or_odd", when(($"id" % 2) === 0, "even").otherwise("odd"))
+        .writeTo("testcat.table_name").clusterBy("id").replace()
+
+    checkAnswer(
+      spark.table("testcat.table_name"),
+      Seq(Row(4L, "d", "even"), Row(5L, "e", "odd"), Row(6L, "f", "even")))
+
+    val replaced = catalog("testcat").loadTable(Identifier.of(Array(), "table_name"))
+
+    // validate the replacement table
+    assert(replaced.name === "testcat.table_name")
+    assert(replaced.schema === new StructType()
+        .add("id", LongType)
+        .add("data", StringType)
+        .add("even_or_odd", StringType))
+    assert(replaced.partitioning === Seq(ClusterByTransform(Seq(FieldReference("id")))))
+    assert(replaced.properties === defaultOwnership.asJava)
+  }
+
   test("Replace: fail if table does not exist") {
     val exc = intercept[CannotReplaceMissingTableException] {
       spark.table("source").writeTo("testcat.table_name").replace()
@@ -781,14 +829,14 @@ class DataFrameWriterV2Suite extends QueryTest with SharedSparkSession with Befo
       exception = intercept[AnalysisException] {
         ds.write
       },
-      errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
+      condition = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
       parameters = Map("methodName" -> "`write`"))
 
     checkError(
       exception = intercept[AnalysisException] {
         ds.writeTo("testcat.table_name")
       },
-      errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
+      condition = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
       parameters = Map("methodName" -> "`writeTo`"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index fdb2ec30fdd2d..ca584d6b9ce88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -345,7 +345,7 @@ class DatasetSuite extends QueryTest
         exception = intercept[AnalysisException] {
           ds.select(expr("`(_1)?+.+`").as[Int])
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map(
           "objectName" -> "`(_1)?+.+`",
@@ -359,7 +359,7 @@ class DatasetSuite extends QueryTest
         exception = intercept[AnalysisException] {
           ds.select(expr("`(_1|_2)`").as[Int])
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map(
           "objectName" -> "`(_1|_2)`",
@@ -373,7 +373,7 @@ class DatasetSuite extends QueryTest
         exception = intercept[AnalysisException] {
           ds.select(ds("`(_1)?+.+`"))
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`(_1)?+.+`", "proposal" -> "`_1`, `_2`")
       )
 
@@ -381,7 +381,7 @@ class DatasetSuite extends QueryTest
         exception = intercept[AnalysisException] {
           ds.select(ds("`(_1|_2)`"))
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`(_1|_2)`", "proposal" -> "`_1`, `_2`")
       )
     }
@@ -549,7 +549,7 @@ class DatasetSuite extends QueryTest
         exception = intercept[AnalysisException](
           ds1.joinWith(ds2, $"a.value" === $"b.value", joinType)
         ),
-        errorClass = "INVALID_JOIN_TYPE_FOR_JOINWITH",
+        condition = "INVALID_JOIN_TYPE_FOR_JOINWITH",
         sqlState = "42613",
         parameters = semiErrorParameters
       )
@@ -611,7 +611,7 @@ class DatasetSuite extends QueryTest
           (g, iter) => Iterator(g, iter.mkString(", "))
         }
       },
-      errorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      condition = "INVALID_USAGE_OF_STAR_OR_REGEX",
       parameters = Map("elem" -> "'*'", "prettyName" -> "MapGroups"),
       context = ExpectedContext(fragment = "$", getCurrentClassCallSitePattern))
   }
@@ -640,7 +640,7 @@ class DatasetSuite extends QueryTest
           (g, iter) => Iterator(g, iter.mkString(", "))
         }
       },
-      errorClass = "INVALID_USAGE_OF_STAR_OR_REGEX",
+      condition = "INVALID_USAGE_OF_STAR_OR_REGEX",
       parameters = Map("elem" -> "'*'", "prettyName" -> "MapGroups"),
       context = ExpectedContext(fragment = "$", getCurrentClassCallSitePattern))
   }
@@ -1187,11 +1187,15 @@ class DatasetSuite extends QueryTest
       exception = intercept[AnalysisException] {
         df.as[KryoData]
       },
-      errorClass = "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map(
-        "sqlExpr" -> "\"a\"",
-        "srcType" -> "\"DOUBLE\"",
-        "targetType" -> "\"BINARY\""))
+        "expression" -> "a",
+        "sourceType" -> "\"DOUBLE\"",
+        "targetType" -> "\"BINARY\"",
+        "details" -> ("The type path of the target object is:\n- root class: " +
+          "\"org.apache.spark.sql.KryoData\"\n" +
+          "You can either add an explicit cast to the input data or choose a " +
+          "higher precision type of the field in the target object")))
   }
 
   test("Java encoder") {
@@ -1239,7 +1243,7 @@ class DatasetSuite extends QueryTest
     val ds = Seq(ClassData("a", 1)).toDS()
     checkError(
       exception = intercept[AnalysisException] (ds.as[ClassData2]),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`c`",
         "proposal" -> "`a`, `b`"))
@@ -1269,7 +1273,7 @@ class DatasetSuite extends QueryTest
     // Just check the error class here to avoid flakiness due to different parameters.
     assert(intercept[SparkRuntimeException] {
       buildDataset(Row(Row("hello", null))).collect()
-    }.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+    }.getCondition == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("SPARK-12478: top level null field") {
@@ -1412,7 +1416,7 @@ class DatasetSuite extends QueryTest
     val ex = intercept[SparkRuntimeException] {
       spark.createDataFrame(rdd, schema).collect()
     }
-    assert(ex.getErrorClass == "EXPRESSION_ENCODING_FAILED")
+    assert(ex.getCondition == "EXPRESSION_ENCODING_FAILED")
     assert(ex.getCause.getMessage.contains("The 1th field 'b' of input row cannot be null"))
   }
 
@@ -1429,7 +1433,7 @@ class DatasetSuite extends QueryTest
       dataset.createTempView("tempView"))
     intercept[AnalysisException](dataset.createTempView("tempView"))
     checkError(e,
-      errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+      condition = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
       parameters = Map("relationName" -> "`tempView`"))
     dataset.sparkSession.catalog.dropTempView("tempView")
 
@@ -1440,7 +1444,7 @@ class DatasetSuite extends QueryTest
           val e = intercept[AnalysisException](
             dataset.createTempView("test_db.tempView"))
           checkError(e,
-            errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+            condition = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
             parameters = Map("actualName" -> "test_db.tempView"))
         }
 
@@ -1608,7 +1612,7 @@ class DatasetSuite extends QueryTest
 
   test("Dataset should throw RuntimeException if top-level product input object is null") {
     val e = intercept[SparkRuntimeException](Seq(ClassData("a", 1), null).toDS())
-    assert(e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+    assert(e.getCondition == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("dropDuplicates") {
@@ -1845,6 +1849,26 @@ class DatasetSuite extends QueryTest
     }
   }
 
+  test("Dataset().localCheckpoint() lazy with StorageLevel") {
+    val df = spark.range(10).repartition($"id" % 2)
+    val checkpointedDf = df.localCheckpoint(eager = false, StorageLevel.DISK_ONLY)
+    val checkpointedPlan = checkpointedDf.queryExecution.analyzed
+    val rdd = checkpointedPlan.asInstanceOf[LogicalRDD].rdd
+    assert(rdd.getStorageLevel == StorageLevel.DISK_ONLY)
+    assert(!rdd.isCheckpointed)
+    checkpointedDf.collect()
+    assert(rdd.isCheckpointed)
+  }
+
+  test("Dataset().localCheckpoint() eager with StorageLevel") {
+    val df = spark.range(10).repartition($"id" % 2)
+    val checkpointedDf = df.localCheckpoint(eager = true, StorageLevel.DISK_ONLY)
+    val checkpointedPlan = checkpointedDf.queryExecution.analyzed
+    val rdd = checkpointedPlan.asInstanceOf[LogicalRDD].rdd
+    assert(rdd.isCheckpointed)
+    assert(rdd.getStorageLevel == StorageLevel.DISK_ONLY)
+  }
+
   test("identity map for primitive arrays") {
     val arrayByte = Array(1.toByte, 2.toByte, 3.toByte)
     val arrayInt = Array(1, 2, 3)
@@ -1902,20 +1926,20 @@ class DatasetSuite extends QueryTest
       exception = intercept[SparkUnsupportedOperationException] {
         Seq(CircularReferenceClassA(null)).toDS()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2139",
-      parameters = Map("t" -> "org.apache.spark.sql.CircularReferenceClassA"))
+      condition = "CIRCULAR_CLASS_REFERENCE",
+      parameters = Map("t" -> "'org.apache.spark.sql.CircularReferenceClassA'"))
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
         Seq(CircularReferenceClassC(null)).toDS()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2139",
-      parameters = Map("t" -> "org.apache.spark.sql.CircularReferenceClassC"))
+      condition = "CIRCULAR_CLASS_REFERENCE",
+      parameters = Map("t" -> "'org.apache.spark.sql.CircularReferenceClassC'"))
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
         Seq(CircularReferenceClassD(null)).toDS()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2139",
-      parameters = Map("t" -> "org.apache.spark.sql.CircularReferenceClassD"))
+      condition = "CIRCULAR_CLASS_REFERENCE",
+      parameters = Map("t" -> "'org.apache.spark.sql.CircularReferenceClassD'"))
   }
 
   test("SPARK-20125: option of map") {
@@ -2051,17 +2075,17 @@ class DatasetSuite extends QueryTest
   test("SPARK-22472: add null check for top-level primitive values") {
     // If the primitive values are from Option, we need to do runtime null check.
     val ds = Seq(Some(1), None).toDS().as[Int]
-    val errorClass = "NOT_NULL_ASSERT_VIOLATION"
+    val condition = "NOT_NULL_ASSERT_VIOLATION"
     val sqlState = "42000"
     val parameters = Map("walkedTypePath" -> "\n- root class: \"int\"\n")
     checkError(
       exception = intercept[SparkRuntimeException](ds.collect()),
-      errorClass = errorClass,
+      condition = condition,
       sqlState = sqlState,
       parameters = parameters)
     checkError(
       exception = intercept[SparkRuntimeException](ds.map(_ * 2).collect()),
-      errorClass = errorClass,
+      condition = condition,
       sqlState = sqlState,
       parameters = parameters)
 
@@ -2071,12 +2095,12 @@ class DatasetSuite extends QueryTest
       val ds = spark.read.parquet(path.getCanonicalPath).as[Int]
       checkError(
         exception = intercept[SparkRuntimeException](ds.collect()),
-        errorClass = errorClass,
+        condition = condition,
         sqlState = sqlState,
         parameters = parameters)
       checkError(
         exception = intercept[SparkRuntimeException](ds.map(_ * 2).collect()),
-        errorClass = errorClass,
+        condition = condition,
         sqlState = sqlState,
         parameters = parameters)
     }
@@ -2097,7 +2121,7 @@ class DatasetSuite extends QueryTest
   test("SPARK-23835: null primitive data type should throw NullPointerException") {
     val ds = Seq[(Option[Int], Option[Int])]((Some(1), None)).toDS()
     val exception = intercept[SparkRuntimeException](ds.as[(Int, Int)].collect())
-    assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+    assert(exception.getCondition == "NOT_NULL_ASSERT_VIOLATION")
   }
 
   test("SPARK-24569: Option of primitive types are mistakenly mapped to struct type") {
@@ -2317,7 +2341,7 @@ class DatasetSuite extends QueryTest
           exception = intercept[AnalysisException] {
             ds(colName)
           },
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           parameters = Map("objectName" -> colName, "proposal" -> "`field`.`1`, `field 2`")
         )
       }
@@ -2334,7 +2358,7 @@ class DatasetSuite extends QueryTest
             // has different semantics than ds.select(colName)
             ds.select(colName)
           },
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           sqlState = None,
           parameters = Map(
             "objectName" -> s"`${colName.replace(".", "`.`")}`",
@@ -2349,7 +2373,7 @@ class DatasetSuite extends QueryTest
       exception = intercept[AnalysisException] {
         Seq(0).toDF("the.id").select("the.id")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "objectName" -> "`the`.`id`",
@@ -2364,7 +2388,7 @@ class DatasetSuite extends QueryTest
           .select(map(lit("key"), lit(1)).as("map"), lit(2).as("other.column"))
           .select($"`map`"($"nonexisting")).show()
       },
-      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      condition = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "objectName" -> "`nonexisting`",
@@ -2676,7 +2700,7 @@ class DatasetSuite extends QueryTest
       // Expression decoding error
       checkError(
         exception = exception,
-        errorClass = "EXPRESSION_DECODING_FAILED",
+        condition = "EXPRESSION_DECODING_FAILED",
         parameters = Map(
           "expressions" -> expressions.map(
             _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n"))
@@ -2684,7 +2708,7 @@ class DatasetSuite extends QueryTest
       // class unsupported by map objects
       checkError(
         exception = exception.getCause.asInstanceOf[org.apache.spark.SparkRuntimeException],
-        errorClass = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
+        condition = "CLASS_UNSUPPORTED_BY_MAP_OBJECTS",
         parameters = Map("cls" -> classOf[Array[Int]].getName))
     }
   }
@@ -2697,7 +2721,7 @@ class DatasetSuite extends QueryTest
     }
     checkError(
       exception = exception,
-      errorClass = "EXPRESSION_ENCODING_FAILED",
+      condition = "EXPRESSION_ENCODING_FAILED",
       parameters = Map(
         "expressions" -> enc.serializer.map(
           _.simpleString(SQLConf.get.maxToStringFields)).mkString("\n"))
@@ -2746,7 +2770,7 @@ class DatasetSuite extends QueryTest
       }
       checkError(
         exception,
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = "42703",
         parameters = Map("objectName" -> "`a`", "proposal" -> "`value`"),
         context = ExpectedContext(fragment = "col", callSitePattern = callSitePattern))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
index 5e5e4d09c5274..dad69a9aab06d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetUnpivotSuite.scala
@@ -149,7 +149,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      condition = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
       parameters = Map())
 
     // ids expressions are not allowed when no values are given
@@ -162,7 +162,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e2,
-      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      condition = "UNPIVOT_REQUIRES_ATTRIBUTES",
       parameters = Map(
         "given" -> "id",
         "empty" -> "value",
@@ -178,7 +178,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e3,
-      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      condition = "UNPIVOT_REQUIRES_ATTRIBUTES",
       parameters = Map(
         "given" -> "id",
         "empty" -> "value",
@@ -207,7 +207,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      condition = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
       parameters = Map())
   }
 
@@ -315,7 +315,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      condition = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
       parameters = Map(
         "types" -> (
           """"BIGINT" (`long1`, `long2`), """ +
@@ -371,7 +371,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e1,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`1`",
         "proposal" -> "`id`, `int1`, `str1`, `long1`, `str2`"),
@@ -388,7 +388,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e2,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`does`",
         "proposal" -> "`id`, `int1`, `long1`, `str1`, `str2`"),
@@ -404,7 +404,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e3,
-      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      condition = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
       parameters = Map(
         "types" -> """"BIGINT" (`long1`), "INT" (`id`, `int1`), "STRING" (`str1`, `str2`)"""
       )
@@ -420,7 +420,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e4,
-      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      condition = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
       parameters = Map()
     )
 
@@ -436,7 +436,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e5,
-      errorClass = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
+      condition = "UNPIVOT_VALUE_DATA_TYPE_MISMATCH",
       parameters = Map(
         "types" -> """"BIGINT" (`long1`), "INT" (`id`, `int1`), "STRING" (`str1`, `str2`)"""
       )
@@ -452,7 +452,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e6,
-      errorClass = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
+      condition = "UNPIVOT_REQUIRES_VALUE_COLUMNS",
       parameters = Map.empty
     )
   }
@@ -507,7 +507,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`an`.`id`",
         "proposal" -> "`an.id`, `int1`, `long1`, `str.one`, `str.two`"),
@@ -607,7 +607,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e,
-      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      condition = "UNPIVOT_REQUIRES_ATTRIBUTES",
       parameters = Map(
         "given" -> "value",
         "empty" -> "id",
@@ -635,7 +635,7 @@ class DatasetUnpivotSuite extends QueryTest
     }
     checkError(
       exception = e2,
-      errorClass = "UNPIVOT_REQUIRES_ATTRIBUTES",
+      condition = "UNPIVOT_REQUIRES_ATTRIBUTES",
       parameters = Map(
         "given" -> "value",
         "empty" -> "id",
@@ -661,7 +661,7 @@ class DatasetUnpivotSuite extends QueryTest
         }
         checkError(
           exception = e,
-          errorClass = "UNPIVOT_VALUE_SIZE_MISMATCH",
+          condition = "UNPIVOT_VALUE_SIZE_MISMATCH",
           parameters = Map("names" -> "2"))
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
index b261ecfb0cee4..b65636dfcde07 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DateFunctionsSuite.scala
@@ -54,7 +54,7 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("SELECT CURDATE(1)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`curdate`",
         "expectedNum" -> "0",
@@ -1366,6 +1366,84 @@ class DateFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(result1, result2)
   }
 
+  test("try_make_timestamp") {
+    val df = Seq((100, 11, 1, 12, 30, 01.001001, "UTC")).
+      toDF("year", "month", "day", "hour", "min", "sec", "timezone")
+
+    val result1 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec, timezone)")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val result2 = df.select(make_timestamp(
+        col("year"), col("month"), col("day"), col("hour"),
+        col("min"), col("sec"), col("timezone")))
+      checkAnswer(result1, result2)
+    }
+
+    val result3 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec)")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val result4 = df.select(make_timestamp(
+        col("year"), col("month"), col("day"), col("hour"),
+        col("min"), col("sec")))
+      checkAnswer(result3, result4)
+    }
+
+    val result5 = df.selectExpr("try_make_timestamp(year, month, day, hour, min, sec)")
+    val result6 = df.select(try_make_timestamp(
+      col("year"), col("month"), col("day"), col("hour"),
+      col("min"), col("sec")))
+    checkAnswer(result5, result6)
+  }
+
+  test("try_make_timestamp_ntz") {
+    val df = Seq((100, 11, 1, 12, 30, 01.001001)).
+      toDF("year", "month", "day", "hour", "min", "sec")
+
+    val result1 = df.selectExpr(
+      "try_make_timestamp_ntz(year, month, day, hour, min, sec)")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val result2 = df.select(make_timestamp_ntz(
+        col("year"), col("month"), col("day"), col("hour"),
+        col("min"), col("sec")))
+      checkAnswer(result1, result2)
+    }
+
+    val result3 = df.selectExpr(
+    "try_make_timestamp_ntz(year, month, day, hour, min, sec)")
+    val result4 = df.select(try_make_timestamp_ntz(
+      col("year"), col("month"), col("day"), col("hour"),
+      col("min"), col("sec")))
+    checkAnswer(result3, result4)
+  }
+
+  test("try_make_timestamp_ltz") {
+    val df = Seq((100, 11, 1, 12, 30, 01.001001, "UTC")).
+      toDF("year", "month", "day", "hour", "min", "sec", "timezone")
+
+    val result1 = df.selectExpr(
+      "try_make_timestamp_ltz(year, month, day, hour, min, sec, timezone)")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val result2 = df.select(make_timestamp_ltz(
+        col("year"), col("month"), col("day"), col("hour"),
+        col("min"), col("sec"), col("timezone")))
+      checkAnswer(result1, result2)
+    }
+
+    val result3 = df.selectExpr(
+      "try_make_timestamp_ltz(year, month, day, hour, min, sec)")
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      val result4 = df.select(make_timestamp_ltz(
+        col("year"), col("month"), col("day"), col("hour"),
+        col("min"), col("sec")))
+      checkAnswer(result3, result4)
+    }
+
+    val result5 = df.selectExpr(
+    "try_make_timestamp_ltz(year, month, day, hour, min, sec)")
+    val result6 = df.select(try_make_timestamp_ltz(
+      col("year"), col("month"), col("day"), col("hour"),
+      col("min"), col("sec")))
+    checkAnswer(result5, result6)
+  }
+
   test("make_ym_interval") {
     val df = Seq((100, 11)).toDF("year", "month")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
index 443597f10056b..8c0231fddf39f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala
@@ -117,10 +117,12 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession {
         // Note: We need to filter out the commands that set the parameters, such as:
         // SET spark.sql.parser.escapedStringLiterals=true
         example.split("  > ").tail.filterNot(_.trim.startsWith("SET")).take(1).foreach {
-          case _ if funcName == "from_avro" || funcName == "to_avro" =>
-            // Skip running the example queries for the from_avro and to_avro functions because
-            // these functions dynamically load the AvroDataToCatalyst or CatalystDataToAvro classes
-            // which are not available in this test.
+          case _ if funcName == "from_avro" || funcName == "to_avro" ||
+            funcName == "from_protobuf" || funcName == "to_protobuf" =>
+              // Skip running the example queries for the from_avro, to_avro, from_protobuf and
+              // to_protobuf functions because these functions dynamically load the
+              // AvroDataToCatalyst or CatalystDataToAvro classes which are not available in this
+              // test.
           case exampleRe(sql, _) =>
             val df = spark.sql(sql)
             val escapedSql = sql.replaceAll("\\|", "&#124;")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
index 229677d208136..9c529d1422119 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/FileBasedDataSourceSuite.scala
@@ -52,7 +52,7 @@ class FileBasedDataSourceSuite extends QueryTest
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION.key, "native")
   }
 
   override def afterAll(): Unit = {
@@ -128,13 +128,20 @@ class FileBasedDataSourceSuite extends QueryTest
 
   allFileBasedDataSources.foreach { format =>
     test(s"SPARK-23372 error while writing empty schema files using $format") {
+      val formatMapping = Map(
+        "csv" -> "CSV",
+        "json" -> "JSON",
+        "parquet" -> "Parquet",
+        "orc" -> "ORC",
+        "text" -> "Text"
+      )
       withTempPath { outputPath =>
         checkError(
           exception = intercept[AnalysisException] {
             spark.emptyDataFrame.write.format(format).save(outputPath.toString)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1142",
-          parameters = Map.empty
+          condition = "EMPTY_SCHEMA_NOT_SUPPORTED_FOR_DATASOURCE",
+          parameters = Map("format" -> formatMapping(format))
         )
       }
 
@@ -150,8 +157,8 @@ class FileBasedDataSourceSuite extends QueryTest
           exception = intercept[AnalysisException] {
             df.write.format(format).save(outputPath.toString)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1142",
-          parameters = Map.empty
+          condition = "EMPTY_SCHEMA_NOT_SUPPORTED_FOR_DATASOURCE",
+          parameters = Map("format" -> formatMapping(format))
         )
       }
     }
@@ -250,7 +257,7 @@ class FileBasedDataSourceSuite extends QueryTest
               exception = intercept[SparkException] {
                 testIgnoreMissingFiles(options)
               },
-              errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+              condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
               parameters = Map("path" -> ".*")
             )
           }
@@ -282,7 +289,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq(1).toDF().write.text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`value`",
           "columnType" -> "\"INT\"",
@@ -293,7 +300,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq(1.2).toDF().write.text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`value`",
           "columnType" -> "\"DOUBLE\"",
@@ -304,7 +311,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq(true).toDF().write.text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`value`",
           "columnType" -> "\"BOOLEAN\"",
@@ -315,7 +322,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq(1).toDF("a").selectExpr("struct(a)").write.text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`struct(a)`",
           "columnType" -> "\"STRUCT<a: INT NOT NULL>\"",
@@ -326,7 +333,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq((Map("Tesla" -> 3))).toDF("cars").write.mode("overwrite").text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`cars`",
           "columnType" -> "\"MAP<STRING, INT>\"",
@@ -338,7 +345,7 @@ class FileBasedDataSourceSuite extends QueryTest
           Seq((Array("Tesla", "Chevy", "Ford"))).toDF("brands")
             .write.mode("overwrite").text(textDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`brands`",
           "columnType" -> "\"ARRAY<STRING>\"",
@@ -352,7 +359,7 @@ class FileBasedDataSourceSuite extends QueryTest
           val schema = StructType(StructField("a", IntegerType, true) :: Nil)
           spark.read.schema(schema).text(textDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"INT\"",
@@ -364,7 +371,7 @@ class FileBasedDataSourceSuite extends QueryTest
           val schema = StructType(StructField("a", DoubleType, true) :: Nil)
           spark.read.schema(schema).text(textDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"DOUBLE\"",
@@ -376,7 +383,7 @@ class FileBasedDataSourceSuite extends QueryTest
           val schema = StructType(StructField("a", BooleanType, true) :: Nil)
           spark.read.schema(schema).text(textDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"BOOLEAN\"",
@@ -397,7 +404,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq((1, "Tesla")).toDF("a", "b").selectExpr("struct(a, b)").write.csv(csvDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`struct(a, b)`",
           "columnType" -> "\"STRUCT<a: INT NOT NULL, b: STRING>\"",
@@ -410,7 +417,7 @@ class FileBasedDataSourceSuite extends QueryTest
           spark.range(1).write.mode("overwrite").csv(csvDir)
           spark.read.schema(schema).csv(csvDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"STRUCT<b: INT>\"",
@@ -421,7 +428,7 @@ class FileBasedDataSourceSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq((1, Map("Tesla" -> 3))).toDF("id", "cars").write.mode("overwrite").csv(csvDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`cars`",
           "columnType" -> "\"MAP<STRING, INT>\"",
@@ -434,7 +441,7 @@ class FileBasedDataSourceSuite extends QueryTest
           spark.range(1).write.mode("overwrite").csv(csvDir)
           spark.read.schema(schema).csv(csvDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"MAP<INT, INT>\"",
@@ -446,7 +453,7 @@ class FileBasedDataSourceSuite extends QueryTest
           Seq((1, Array("Tesla", "Chevy", "Ford"))).toDF("id", "brands")
             .write.mode("overwrite").csv(csvDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`brands`",
           "columnType" -> "\"ARRAY<STRING>\"",
@@ -459,7 +466,7 @@ class FileBasedDataSourceSuite extends QueryTest
           spark.range(1).write.mode("overwrite").csv(csvDir)
           spark.read.schema(schema).csv(csvDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"ARRAY<INT>\"",
@@ -471,7 +478,7 @@ class FileBasedDataSourceSuite extends QueryTest
           Seq((1, new TestUDT.MyDenseVector(Array(0.25, 2.25, 4.25)))).toDF("id", "vectors")
             .write.mode("overwrite").csv(csvDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`vectors`",
           "columnType" -> "UDT(\"ARRAY<DOUBLE>\")",
@@ -484,7 +491,7 @@ class FileBasedDataSourceSuite extends QueryTest
           spark.range(1).write.mode("overwrite").csv(csvDir)
           spark.read.schema(schema).csv(csvDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "UDT(\"ARRAY<DOUBLE>\")",
@@ -506,14 +513,23 @@ class FileBasedDataSourceSuite extends QueryTest
         withSQLConf(
           SQLConf.USE_V1_SOURCE_LIST.key -> useV1List,
           SQLConf.LEGACY_INTERVAL_ENABLED.key -> "true") {
+          val formatMapping = Map(
+            "csv" -> "CSV",
+            "json" -> "JSON",
+            "parquet" -> "Parquet",
+            "orc" -> "ORC"
+          )
           // write path
           Seq("csv", "json", "parquet", "orc").foreach { format =>
             checkError(
               exception = intercept[AnalysisException] {
                 sql("select interval 1 days").write.format(format).mode("overwrite").save(tempDir)
               },
-              errorClass = "_LEGACY_ERROR_TEMP_1136",
-              parameters = Map.empty
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              parameters = Map(
+                "format" -> formatMapping(format),
+                "columnName" -> "`INTERVAL '1 days'`",
+                "columnType" -> "\"INTERVAL\"")
             )
           }
 
@@ -529,7 +545,7 @@ class FileBasedDataSourceSuite extends QueryTest
                 spark.range(1).write.format(format).mode("overwrite").save(tempDir)
                 spark.read.schema(schema).format(format).load(tempDir).collect()
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
                 "columnType" -> "\"INTERVAL\"",
@@ -542,7 +558,7 @@ class FileBasedDataSourceSuite extends QueryTest
                 spark.range(1).write.format(format).mode("overwrite").save(tempDir)
                 spark.read.schema(schema).format(format).load(tempDir).collect()
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
                 "columnType" -> "UDT(\"INTERVAL\")",
@@ -579,7 +595,7 @@ class FileBasedDataSourceSuite extends QueryTest
               exception = intercept[AnalysisException] {
                 sql("select null").write.format(format).mode("overwrite").save(tempDir)
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`NULL`",
                 "columnType" -> "\"VOID\"",
@@ -592,7 +608,7 @@ class FileBasedDataSourceSuite extends QueryTest
                 spark.udf.register("testType", () => new NullData())
                 sql("select testType()").write.format(format).mode("overwrite").save(tempDir)
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`testType()`",
                 "columnType" -> "UDT(\"VOID\")",
@@ -607,7 +623,7 @@ class FileBasedDataSourceSuite extends QueryTest
                 spark.range(1).write.format(format).mode("overwrite").save(tempDir)
                 spark.read.schema(schema).format(format).load(tempDir).collect()
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
                 "columnType" -> "\"VOID\"",
@@ -621,7 +637,7 @@ class FileBasedDataSourceSuite extends QueryTest
                 spark.range(1).write.format(format).mode("overwrite").save(tempDir)
                 spark.read.schema(schema).format(format).load(tempDir).collect()
               },
-              errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+              condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
               parameters = Map(
                 "columnName" -> "`a`",
                 "columnType" -> "UDT(\"VOID\")",
@@ -657,14 +673,14 @@ class FileBasedDataSourceSuite extends QueryTest
               exception = intercept[SparkException] {
                 sql(s"select b from $tableName").collect()
               }.getCause.asInstanceOf[SparkRuntimeException],
-              errorClass = "_LEGACY_ERROR_TEMP_2093",
+              condition = "_LEGACY_ERROR_TEMP_2093",
               parameters = Map("requiredFieldName" -> "b", "matchedOrcFields" -> "[b, B]")
             )
             checkError(
               exception = intercept[SparkException] {
                 sql(s"select B from $tableName").collect()
               }.getCause.asInstanceOf[SparkRuntimeException],
-              errorClass = "_LEGACY_ERROR_TEMP_2093",
+              condition = "_LEGACY_ERROR_TEMP_2093",
               parameters = Map("requiredFieldName" -> "b", "matchedOrcFields" -> "[b, B]")
             )
           }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
index 00b757e4f78fb..6cd8ade41da14 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GenTPCDSData.scala
@@ -225,7 +225,7 @@ class TPCDSTables(spark: SparkSession, dsdgenDir: String, scaleFactor: Int)
           // datagen speed files will be truncated to maxRecordsPerFile value, so the final
           // result will be the same.
           val numRows = data.count()
-          val maxRecordPerFile = spark.conf.get(SQLConf.MAX_RECORDS_PER_FILE)
+          val maxRecordPerFile = spark.sessionState.conf.getConf(SQLConf.MAX_RECORDS_PER_FILE)
 
           if (maxRecordPerFile > 0 && numRows > maxRecordPerFile) {
             val numFiles = (numRows.toDouble/maxRecordPerFile).ceil.toInt
@@ -359,7 +359,7 @@ class GenTPCDSDataConfig(args: Array[String]) {
   private def printUsageAndExit(exitCode: Int): Unit = {
     // scalastyle:off
     System.err.println("""
-      |build/sbt "test:runMain <this class> [Options]"
+      |build/sbt "Test/runMain <this class> [Options]"
       |Options:
       |  --master                        the Spark master to use, default to local[*]
       |  --dsdgenDir                     location of dsdgen
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
index 97a56bdea7be7..b9491a79cc3a4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/GeneratorFunctionSuite.scala
@@ -59,7 +59,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("stack(1.1, 1, 2, 3)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"stack(1.1, 1, 2, 3)\"",
         "paramIndex" -> "first",
@@ -77,7 +77,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("stack(-1, 1, 2, 3)")
       },
-      errorClass = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
+      condition = "DATATYPE_MISMATCH.VALUE_OUT_OF_RANGE",
       parameters = Map(
         "sqlExpr" -> "\"stack(-1, 1, 2, 3)\"",
         "exprName" -> "`n`",
@@ -95,7 +95,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("stack(2, 1, '2.2')")
       },
-      errorClass = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"stack(2, 1, 2.2)\"",
         "columnIndex" -> "0",
@@ -118,7 +118,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("stack(n, a, b, c)")
       },
-      errorClass = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
+      condition = "DATATYPE_MISMATCH.NON_FOLDABLE_INPUT",
       parameters = Map(
         "sqlExpr" -> "\"stack(n, a, b, c)\"",
         "inputName" -> "`n`",
@@ -136,7 +136,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("stack(2, a, b)")
       },
-      errorClass = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.STACK_COLUMN_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"stack(2, a, b)\"",
         "columnIndex" -> "0",
@@ -287,7 +287,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.range(2).select(inline(array()))
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"inline(array())\"",
         "paramIndex" -> "first",
@@ -330,7 +330,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(inline(array(struct(Symbol("a")), struct(Symbol("b")))))
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array(struct(a), struct(b))\"",
         "functionName" -> "`array`",
@@ -348,7 +348,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select(inline(array(struct(Symbol("a")), struct(lit(2)))))
       },
-      errorClass = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
+      condition = "DATATYPE_MISMATCH.DATA_DIFF_TYPES",
       parameters = Map(
         "sqlExpr" -> "\"array(struct(a), struct(2))\"",
         "functionName" -> "`array`",
@@ -427,7 +427,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("select 1 + explode(array(min(c2), max(c2))) from t1 group by c1")
         },
-        errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+        condition = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
         parameters = Map(
           "expression" -> "\"(1 + explode(array(min(c2), max(c2))))\""))
 
@@ -440,7 +440,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
               |  posexplode(array(min(c2), max(c2)))
               |from t1 group by c1""".stripMargin)
         },
-        errorClass = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
+        condition = "UNSUPPORTED_GENERATOR.MULTI_GENERATOR",
         parameters = Map(
           "num" -> "2",
           "generators" -> ("\"explode(array(min(c2), max(c2)))\", " +
@@ -453,7 +453,7 @@ class GeneratorFunctionSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("SELECT array(array(1, 2), array(3)) v").select(explode(explode($"v"))).collect()
       },
-      errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+      condition = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
       parameters = Map("expression" -> "\"explode(explode(v))\""))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
index bc16a69475106..7d7185ae6c139 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/InjectRuntimeFilterSuite.scala
@@ -255,7 +255,7 @@ class InjectRuntimeFilterSuite extends QueryTest with SQLTestUtils with SharedSp
       case Filter(condition, _) => condition.collect {
         case subquery: org.apache.spark.sql.catalyst.expressions.ScalarSubquery
         => subquery.plan.collect {
-          case Aggregate(_, aggregateExpressions, _) =>
+          case Aggregate(_, aggregateExpressions, _, _) =>
             aggregateExpressions.map {
               case Alias(AggregateExpression(bfAgg : BloomFilterAggregate, _, _, _, _),
               _) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/InlineTableParsingImprovementsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/InlineTableParsingImprovementsSuite.scala
new file mode 100644
index 0000000000000..8c776874eaa1c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/InlineTableParsingImprovementsSuite.scala
@@ -0,0 +1,372 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.UUID
+
+import org.apache.spark.sql.catalyst.analysis.UnresolvedInlineTable
+import org.apache.spark.sql.catalyst.expressions.{Expression, ScalarSubquery}
+import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Project}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class InlineTableParsingImprovementsSuite extends QueryTest with SharedSparkSession {
+
+  /**
+   * SQL parser.
+   */
+  private lazy val parser = spark.sessionState.sqlParser
+
+  /**
+   * Generate a random table name.
+   */
+  private def getRandomTableName(): String =
+    s"test_${UUID.randomUUID()}".replaceAll("-", "_")
+
+  /**
+   * Create a table using a randomly generated name and return that name.
+   */
+  private def createTable: String = {
+    val tableName = getRandomTableName()
+    spark.sql(s"""
+      CREATE TABLE $tableName (
+        id INT,
+        first_name VARCHAR(50) DEFAULT 'John',
+        last_name VARCHAR(50) DEFAULT 'Doe',
+        age INT DEFAULT 25,
+        gender CHAR(1) DEFAULT 'M',
+        email VARCHAR(100) DEFAULT 'john.doe@databricks.com',
+        phone_number VARCHAR(20) DEFAULT '555-555-5555',
+        address VARCHAR(200) DEFAULT '123 John Doe St',
+        city VARCHAR(50) DEFAULT 'John Doe City',
+        state VARCHAR(50) DEFAULT 'CA',
+        zip_code VARCHAR(10) DEFAULT '12345',
+        country VARCHAR(50) DEFAULT 'USA',
+        registration_date String DEFAULT '2021-01-01')
+    """)
+    tableName
+  }
+
+  /**
+   * Generate an INSERT INTO VALUES statement with basic literals with the given number of rows.
+   */
+  private def generateInsertStatementWithLiterals(tableName: String, numRows: Int): String = {
+    val baseQuery = s"INSERT INTO $tableName (id, first_name, last_name, age, gender," +
+      s" email, phone_number, address, city, state, zip_code, country, registration_date) "
+    baseQuery + generateValuesWithLiterals(numRows) + ";"
+  }
+
+  /**
+   * Generate a VALUES clause with the given number of rows using basic literals.
+   */
+  private def generateValuesWithLiterals(numRows: Int = 10): String = {
+    val rows = (1 to numRows).map { i =>
+      val id = i
+      val firstName = s"'FirstName_$id'"
+      val lastName = s"'LastName_$id'"
+      val age = (20 + i % 50) // Just a simple pattern for age
+      val gender = if (i % 2 == 0) "'M'" else "'F'"
+      val email = s"'user_$id@example.com'"
+      val phoneNumber = s"'555-${1000 + i}'"
+      val address = s"'$id Fake St'"
+      val city = "'Anytown'"
+      val state = "'CA'"
+      val zipCode = "'12345'"
+      val country = "'USA'"
+      val registrationDate = s"'2021-${1 + i % 12}-01'" // Varying the month part of the date
+
+      s"($id, $firstName, $lastName, $age, $gender, $email, $phoneNumber," +
+        s" $address, $city, $state, $zipCode, $country, $registrationDate)"
+    }.mkString(",\n")
+
+    s" VALUES $rows"
+  }
+
+  /**
+   * Traverse the plan and check for the presence of the given node type.
+   */
+  private def traversePlanAndCheckForNodeType[T <: LogicalPlan](
+      plan: LogicalPlan, nodeType: Class[T]): Boolean = plan match {
+    case node if nodeType.isInstance(node) => true
+    case n: Project =>
+      // If the plan node is a Project, we need to check the expressions in the project list
+      // and the child nodes.
+      n.projectList.exists(traverseExpressionAndCheckForNodeType(_, nodeType)) ||
+      n.children.exists(traversePlanAndCheckForNodeType(_, nodeType))
+    case node if node.children.isEmpty => false
+    case _ => plan.children.exists(traversePlanAndCheckForNodeType(_, nodeType))
+  }
+
+  /**
+   * Traverse the expression and check for the presence of the given node type.
+   */
+  private def traverseExpressionAndCheckForNodeType[T <: LogicalPlan](
+        expression: Expression, nodeType: Class[T]): Boolean = expression match {
+    case scalarSubquery: ScalarSubquery => scalarSubquery.plan.exists(
+      traversePlanAndCheckForNodeType(_, nodeType))
+    case _ =>
+      expression.children.exists(traverseExpressionAndCheckForNodeType(_, nodeType))
+  }
+
+  /**
+   * Generate an INSERT INTO VALUES statement with both literals and expressions.
+   */
+  private def generateInsertStatementsWithComplexExpressions(
+      tableName: String): String = {
+    s"""
+      INSERT INTO $tableName (id, first_name, last_name, age, gender,
+        email, phone_number, address, city, state, zip_code, country, registration_date) VALUES
+      (1, base64('FirstName_1'), base64('LastName_1'), 10+10, 'M', 'usr' || '@gmail.com',
+        concat('555','-1234'), hex('123 Fake St'), 'Anytown', 'CA', '12345', 'USA',
+        '2021-01-01'),
+
+      (2, 'FirstName_2', string(5), abs(-8), 'F', 'usr@gmail.com', '555-1234', '123 Fake St',
+        concat('Anytown', 'sada'), 'CA', '12345', 'USA', '2021-01-01'),
+
+      (3, 'FirstName_3', 'LastName_3', 34::int, 'M', 'usr@gmail.com', '555-1234',
+        '123 Fake St', 'Anytown', 'CA', '12345', 'USA', '2021-01-01'),
+
+      (4, left('FirstName_4', 5), upper('LastName_4'), acos(1), 'F', 'user@gmail.com',
+        '555-1234', '123 Fake St', 'Anytown', 'CA', '12345', 'USA', '2021-01-01');
+    """
+  }
+  test("Insert Into Values optimization - Basic literals.") {
+    // Set the number of inserted rows to 10.
+    val rowCount = 10
+    var firstTableName: Option[String] = None
+    Seq(true, false).foreach { eagerEvalOfUnresolvedInlineTableEnabled =>
+
+      // Create a table with a randomly generated name.
+      val tableName = createTable
+
+      // Set the feature flag for the InsertIntoValues improvement.
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+        eagerEvalOfUnresolvedInlineTableEnabled.toString) {
+
+        // Generate an INSERT INTO VALUES statement.
+        val sqlStatement = generateInsertStatementWithLiterals(tableName, rowCount)
+
+        // Parse the SQL statement.
+        val plan = parser.parsePlan(sqlStatement)
+
+        // Traverse the plan and check for the presence of appropriate nodes depending on the
+        // feature flag.
+        if (eagerEvalOfUnresolvedInlineTableEnabled) {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[LocalRelation]))
+        } else {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[UnresolvedInlineTable]))
+        }
+
+        spark.sql(sqlStatement)
+
+        // Double check that the insertion was successful.
+        val countStar = spark.sql(s"SELECT count(*) FROM $tableName").collect()
+        assert(countStar.head.getLong(0) == rowCount,
+          "The number of rows in the table should match the number of rows inserted.")
+
+        // Check that both insertions will produce equivalent tables.
+        if (firstTableName.isEmpty) {
+          firstTableName = Some(tableName)
+        } else {
+          val df1 = spark.table(firstTableName.get)
+          val df2 = spark.table(tableName)
+          checkAnswer(df1, df2)
+        }
+      }
+    }
+  }
+
+  test("Insert Into Values optimization - Basic literals & expressions.") {
+    var firstTableName: Option[String] = None
+    Seq(true, false).foreach { eagerEvalOfUnresolvedInlineTableEnabled =>
+      // Create a table with a randomly generated name.
+      val tableName = createTable
+
+      // Set the feature flag for the InsertIntoValues improvement.
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+        eagerEvalOfUnresolvedInlineTableEnabled.toString) {
+
+        // Generate an INSERT INTO VALUES statement.
+        val sqlStatement = generateInsertStatementsWithComplexExpressions(tableName)
+
+        // Parse the SQL statement.
+        val plan = parser.parsePlan(sqlStatement)
+
+        // Traverse the plan and check for the presence of appropriate nodes.
+        // In this case, the plan should always contain a UnresolvedInlineTable node
+        // because the expressions are not eagerly resolved, therefore
+        // `plan.expressionsResolved` in `EvaluateUnresolvedInlineTable.evaluate` will
+        // always be false.
+        assert(traversePlanAndCheckForNodeType(plan, classOf[UnresolvedInlineTable]))
+
+        spark.sql(sqlStatement)
+
+        // Check that both insertions will produce equivalent tables.
+        if (firstTableName.isEmpty) {
+          firstTableName = Some(tableName)
+        } else {
+            val df1 = spark.table(firstTableName.get)
+            val df2 = spark.table(tableName)
+            checkAnswer(df1, df2)
+        }
+      }
+    }
+  }
+
+  test("Insert Into Values with defaults.") {
+    var firstTableName: Option[String] = None
+    Seq(true, false).foreach { eagerEvalOfUnresolvedInlineTableEnabled =>
+      // Create a table with default values specified.
+      val tableName = createTable
+
+      // Set the feature flag for the InsertIntoValues improvement.
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+        eagerEvalOfUnresolvedInlineTableEnabled.toString) {
+
+        // Generate an INSERT INTO VALUES statement that omits all columns
+        // containing a DEFAULT value.
+        val sqlStatement = s"INSERT INTO $tableName (id) VALUES (1);"
+
+        // Parse the SQL statement.
+        val plan = parser.parsePlan(sqlStatement)
+
+        // Traverse the plan and check for the presence of appropriate nodes depending on the
+        // feature flag.
+        if (eagerEvalOfUnresolvedInlineTableEnabled) {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[LocalRelation]))
+        } else {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[UnresolvedInlineTable]))
+        }
+
+        spark.sql(sqlStatement)
+
+        // Verify that the default values are applied correctly.
+        val resultRow = spark.sql(
+          s"""
+        SELECT
+          first_name,
+          last_name,
+          gender,
+          email,
+          phone_number,
+          address,
+          city,
+          state,
+          zip_code,
+          country,
+          registration_date
+        FROM $tableName WHERE id = 1""").collect()
+
+        // Checking that the default values are applied correctly.
+        assert(resultRow.head.getString(0) == "John", "Default name should be 'John'")
+        assert(resultRow.head.getString(1) == "Doe", "Default last name should be 'Doe'")
+        assert(resultRow.head.getString(2) == "M", "Default gender should be 'M'")
+        assert(resultRow.head.getString(3) == "john.doe@databricks.com",
+          "Default email should be 'john.doe@databricks.com'")
+        assert(resultRow.head.getString(4) == "555-555-5555",
+          "Default phone number should be '555-555-5555'")
+        assert(resultRow.head.getString(5) == "123 John Doe St",
+          "Default address should be '123 John Doe St'")
+        assert(resultRow.head.getString(6) == "John Doe City",
+          "Default city should be 'John Doe City'")
+        assert(resultRow.head.getString(7) == "CA", "Default state should be 'CA'")
+        assert(resultRow.head.getString(8) == "12345", "Default zip code should be '12345'")
+        assert(resultRow.head.getString(9) == "USA", "Default country should be 'USA'")
+        assert(resultRow.head.getString(10) == "2021-01-01",
+          "Default registration date should be '2021-01-01'")
+
+        // Check that both insertions will produce equivalent tables.
+        if (firstTableName.isEmpty) {
+          firstTableName = Some(tableName)
+        } else {
+          val df1 = spark.table(firstTableName.get)
+          val df2 = spark.table(tableName)
+          checkAnswer(df1, df2)
+        }
+      }
+    }
+  }
+
+  test("SPARK-49269: Value list in subquery") {
+    var firstDF: Option[DataFrame] = None
+    val flagVals = Seq(true, false)
+    flagVals.foreach { eagerEvalOfUnresolvedInlineTableEnabled =>
+      // Set the feature flag for the InsertIntoValues improvement.
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+        eagerEvalOfUnresolvedInlineTableEnabled.toString) {
+
+        // Generate a subquery with a VALUES clause.
+        val sqlStatement = s"SELECT * FROM (${generateValuesWithLiterals()});"
+
+        // Parse the SQL statement.
+        val plan = parser.parsePlan(sqlStatement)
+
+        // Traverse the plan and check for the presence of appropriate nodes depending on the
+        // feature flag.
+        if (eagerEvalOfUnresolvedInlineTableEnabled) {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[LocalRelation]))
+        } else {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[UnresolvedInlineTable]))
+        }
+
+        val res = spark.sql(sqlStatement)
+
+        // Check that both insertions will produce equivalent tables.
+        if (flagVals.head == eagerEvalOfUnresolvedInlineTableEnabled) {
+          firstDF = Some(res)
+        } else {
+          checkAnswer(res, firstDF.get)
+        }
+      }
+    }
+  }
+
+  test("SPARK-49269: Value list in projection list subquery") {
+    var firstDF: Option[DataFrame] = None
+    val flagVals = Seq(true, false)
+    flagVals.foreach { eagerEvalOfUnresolvedInlineTableEnabled =>
+      // Set the feature flag for the InsertIntoValues improvement.
+      withSQLConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED.key ->
+        eagerEvalOfUnresolvedInlineTableEnabled.toString) {
+
+        // Generate a subquery with a VALUES clause in the projection list.
+        val sqlStatement = s"SELECT (SELECT COUNT(*) FROM ${generateValuesWithLiterals()});"
+
+        // Parse the SQL statement.
+        val plan = parser.parsePlan(sqlStatement)
+
+        // Traverse the plan and check for the presence of appropriate nodes depending on the
+        // feature flag.
+        if (eagerEvalOfUnresolvedInlineTableEnabled) {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[LocalRelation]))
+        } else {
+          assert(traversePlanAndCheckForNodeType(plan, classOf[UnresolvedInlineTable]))
+        }
+
+        val res = spark.sql(sqlStatement)
+
+        // Check that both insertions will produce equivalent tables.
+        if (flagVals.head == eagerEvalOfUnresolvedInlineTableEnabled) {
+          firstDF = Some(res)
+        } else {
+          checkAnswer(res, firstDF.get)
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
index 97be9526849ae..cdea4446d9461 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/IntegratedUDFTestUtils.scala
@@ -28,11 +28,14 @@ import org.scalatest.Assertions._
 import org.apache.spark.TestUtils
 import org.apache.spark.api.python.{PythonBroadcast, PythonEvalType, PythonFunction, PythonUtils, SimplePythonFunction}
 import org.apache.spark.broadcast.Broadcast
+import org.apache.spark.sql.catalyst.analysis.FunctionRegistry
 import org.apache.spark.sql.catalyst.expressions.{Cast, Expression, ExprId, PythonUDF}
 import org.apache.spark.sql.catalyst.plans.SQLHelper
 import org.apache.spark.sql.execution.datasources.v2.python.UserDefinedPythonDataSource
 import org.apache.spark.sql.execution.python.{UserDefinedPythonFunction, UserDefinedPythonTableFunction}
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
+import org.apache.spark.sql.internal.UserDefinedFunctionUtils.toScalaUDF
 import org.apache.spark.sql.types.{DataType, IntegerType, NullType, StringType, StructType, VariantType}
 import org.apache.spark.util.ArrayImplicits._
 
@@ -1566,40 +1569,30 @@ object IntegratedUDFTestUtils extends SQLHelper {
    *   casted_col.cast(df.schema("col").dataType)
    * }}}
    */
-  class TestInternalScalaUDF(
-      name: String,
-      returnType: Option[DataType] = None) extends SparkUserDefinedFunction(
-    (input: Any) => if (input == null) {
-      null
-    } else {
-      input.toString
-    },
-    StringType,
-    inputEncoders = Seq.fill(1)(None),
-    name = Some(name)) {
+  case class TestScalaUDF(name: String, returnType: Option[DataType] = None) extends TestUDF {
+    private val udf: SparkUserDefinedFunction = {
+      val unnamed = functions.udf { (input: Any) =>
+        if (input == null) {
+          null
+        } else {
+          input.toString
+        }
+      }
+      unnamed.withName(name).asInstanceOf[SparkUserDefinedFunction]
+    }
 
-    override def apply(exprs: Column*): Column = {
+    val builder: FunctionRegistry.FunctionBuilder = { exprs =>
       assert(exprs.length == 1, "Defined UDF only has one column")
-      val expr = exprs.head.expr
+      val expr = exprs.head
       val rt = returnType.getOrElse {
         assert(expr.resolved, "column should be resolved to use the same type " +
-            "as input. Try df(name) or df.col(name)")
+          "as input. Try df(name) or df.col(name)")
         expr.dataType
       }
-      Column(Cast(createScalaUDF(Cast(expr, StringType) :: Nil), rt))
+      Cast(toScalaUDF(udf, Cast(expr, StringType) :: Nil), rt)
     }
 
-    override def withName(name: String): TestInternalScalaUDF = {
-      // "withName" should overridden to return TestInternalScalaUDF. Otherwise, the current object
-      // is sliced and the overridden "apply" is not invoked.
-      new TestInternalScalaUDF(name)
-    }
-  }
-
-  case class TestScalaUDF(name: String, returnType: Option[DataType] = None) extends TestUDF {
-    private[IntegratedUDFTestUtils] lazy val udf = new TestInternalScalaUDF(name, returnType)
-
-    def apply(exprs: Column*): Column = udf(exprs: _*)
+    def apply(exprs: Column*): Column = builder(exprs.map(expression))
 
     val prettyName: String = "Scala UDF"
   }
@@ -1611,7 +1604,9 @@ object IntegratedUDFTestUtils extends SQLHelper {
     case udf: TestPythonUDF => session.udf.registerPython(udf.name, udf.udf)
     case udf: TestScalarPandasUDF => session.udf.registerPython(udf.name, udf.udf)
     case udf: TestGroupedAggPandasUDF => session.udf.registerPython(udf.name, udf.udf)
-    case udf: TestScalaUDF => session.udf.register(udf.name, udf.udf)
+    case udf: TestScalaUDF =>
+      val registry = session.sessionState.functionRegistry
+      registry.createOrReplaceTempFunction(udf.name, udf.builder, "scala_udf")
     case other => throw new RuntimeException(s"Unknown UDF class [${other.getClass}]")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
index fcb937d82ba42..0f5582def82da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JoinSuite.scala
@@ -597,10 +597,10 @@ class JoinSuite extends QueryTest with SharedSparkSession with AdaptiveSparkPlan
       SQLConf.CROSS_JOINS_ENABLED.key -> "true") {
 
       assert(statisticSizeInByte(spark.table("testData2")) >
-        spark.conf.get[Long](SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+        sqlConf.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
 
       assert(statisticSizeInByte(spark.table("testData")) <
-        spark.conf.get[Long](SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+        sqlConf.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
 
       Seq(
         ("SELECT * FROM testData LEFT SEMI JOIN testData2 ON key = a",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
index 7dbad885169d1..84408d8e2495d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/JsonFunctionsSuite.scala
@@ -23,12 +23,13 @@ import java.util.Locale
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.{SparkException, SparkIllegalArgumentException, SparkRuntimeException}
+import org.apache.spark.{SparkException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Literal, StructsToJson}
 import org.apache.spark.sql.catalyst.expressions.Cast._
+import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -189,7 +190,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         nonStringDF.select(json_tuple($"a", "1")).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.NON_STRING_TYPE",
+      condition = "DATATYPE_MISMATCH.NON_STRING_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"json_tuple(a, 1)\"",
         "funcName" -> "`json_tuple`"
@@ -498,7 +499,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("to_json(a, named_struct('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      condition = "INVALID_OPTIONS.NON_MAP_FUNCTION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "to_json(a, named_struct('a', 1))",
@@ -511,7 +512,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("to_json(a, map('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      condition = "INVALID_OPTIONS.NON_STRING_TYPE",
       parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
       context = ExpectedContext(
         fragment = "to_json(a, map('a', 1))",
@@ -542,7 +543,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_json(value, 1)")
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"1\""),
       context = ExpectedContext(
         fragment = "from_json(value, 1)",
@@ -555,7 +556,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("""from_json(value, 'time InvalidType')""")
       },
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map(
         "error" -> "'InvalidType'",
@@ -571,7 +572,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_json(value, 'time Timestamp', named_struct('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      condition = "INVALID_OPTIONS.NON_MAP_FUNCTION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "from_json(value, 'time Timestamp', named_struct('a', 1))",
@@ -583,7 +584,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_json(value, 'time Timestamp', map('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      condition = "INVALID_OPTIONS.NON_STRING_TYPE",
       parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
       context = ExpectedContext(
         fragment = "from_json(value, 'time Timestamp', map('a', 1))",
@@ -656,7 +657,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("""{{"f": 1}: "a"}""").toDS().select(from_json($"value", schema))
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_JSON_MAP_KEY_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_JSON_MAP_KEY_TYPE",
       parameters = Map(
         "schema" -> "\"MAP<STRUCT<f: INT>, STRING>\"",
         "sqlExpr" -> "\"entries\""),
@@ -850,7 +851,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkException] {
           df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))).collect()
         },
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map(
           "badRecord" -> "[null,null,{\"a\" 1, \"b\": 11}]",
           "failFastMode" -> "FAILFAST")
@@ -860,12 +861,10 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df.select(from_json($"value", schema, Map("mode" -> "DROPMALFORMED"))).collect()
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1099",
+        condition = "PARSE_MODE_UNSUPPORTED",
         parameters = Map(
-          "funcName" -> "from_json",
-          "mode" -> "DROPMALFORMED",
-          "permissiveMode" -> "PERMISSIVE",
-          "failFastMode" -> "FAILFAST"))
+          "funcName" -> "`from_json`",
+          "mode" -> "DROPMALFORMED"))
     }
   }
 
@@ -888,14 +887,14 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
 
       checkError(
         exception = ex,
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map(
           "badRecord" -> "[null,11,{\"a\": \"1\", \"b\": 11}]",
           "failFastMode" -> "FAILFAST")
       )
       checkError(
         exception = ex.getCause.asInstanceOf[SparkRuntimeException],
-        errorClass = "CANNOT_PARSE_JSON_FIELD",
+        condition = "CANNOT_PARSE_JSON_FIELD",
         parameters = Map(
           "fieldName" -> toSQLValue("a", StringType),
           "fieldValue" -> "1",
@@ -972,7 +971,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(("""{"i":1}""", "i int")).toDF("json", "schema")
           .select(from_json($"json", $"schema", options)).collect()
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"schema\""),
       context = ExpectedContext(fragment = "from_json", getCurrentClassCallSitePattern)
     )
@@ -1204,42 +1203,32 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = Seq("""{"a":1}""").toDF("json")
     val invalidJsonSchema = """{"fields": [{"a":123}], "type": "struct"}"""
     checkError(
-      exception = intercept[SparkIllegalArgumentException] {
+      exception = intercept[AnalysisException] {
         df.select(from_json($"json", invalidJsonSchema, Map.empty[String, String])).collect()
       },
-      errorClass = "INVALID_JSON_DATA_TYPE",
-      parameters = Map("invalidType" -> """{"a":123}"""))
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'{'", "hint" -> ""),
+      ExpectedContext("from_json", getCurrentClassCallSitePattern)
+    )
 
     val invalidDataType = "MAP<INT, cow>"
-    val invalidDataTypeReason = "Unrecognized token 'MAP': " +
-      "was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n " +
-      "at [Source: REDACTED (`StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION` disabled); " +
-      "line: 1, column: 4]"
     checkError(
       exception = intercept[AnalysisException] {
         df.select(from_json($"json", invalidDataType, Map.empty[String, String])).collect()
       },
-      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
-      parameters = Map(
-        "inputSchema" -> "\"MAP<INT, cow>\"",
-        "reason" -> invalidDataTypeReason
-      )
+      condition = "UNSUPPORTED_DATATYPE",
+      parameters = Map("typeName" -> "\"COW\""),
+      ExpectedContext("from_json", getCurrentClassCallSitePattern)
     )
 
     val invalidTableSchema = "x INT, a cow"
-    val invalidTableSchemaReason = "Unrecognized token 'x': " +
-      "was expecting (JSON String, Number, Array, Object or token 'null', 'true' or 'false')\n" +
-      " at [Source: REDACTED (`StreamReadFeature.INCLUDE_SOURCE_IN_LOCATION` disabled); " +
-      "line: 1, column: 2]"
     checkError(
       exception = intercept[AnalysisException] {
         df.select(from_json($"json", invalidTableSchema, Map.empty[String, String])).collect()
       },
-      errorClass = "INVALID_SCHEMA.PARSE_ERROR",
-      parameters = Map(
-        "inputSchema" -> "\"x INT, a cow\"",
-        "reason" -> invalidTableSchemaReason
-      )
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'INT'", "hint" -> ""),
+      ExpectedContext("from_json", getCurrentClassCallSitePattern)
     )
   }
 
@@ -1256,7 +1245,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map(
             "badRecord" -> "[null,null]",
             "failFastMode" -> "FAILFAST")
@@ -1266,7 +1255,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map(
             "badRecord" -> "[null,null]",
             "failFastMode" -> "FAILFAST")
@@ -1288,7 +1277,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("b")).collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map(
             "badRecord" -> "[null]",
             "failFastMode" -> "FAILFAST")
@@ -1298,7 +1287,7 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             df.select(from_json($"value", schema, Map("mode" -> "FAILFAST"))("a")).collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
           parameters = Map(
             "badRecord" -> "[null]",
             "failFastMode" -> "FAILFAST")
@@ -1405,17 +1394,18 @@ class JsonFunctionsSuite extends QueryTest with SharedSparkSession {
     val df = Seq(1).toDF("a")
     val schema = StructType(StructField("b", ObjectType(classOf[java.lang.Integer])) :: Nil)
     val row = InternalRow.fromSeq(Seq(Integer.valueOf(1)))
-    val structData = Literal.create(row, schema)
+    val structData = column(Literal.create(row, schema))
     checkError(
       exception = intercept[AnalysisException] {
-        df.select($"a").withColumn("c", Column(StructsToJson(Map.empty, structData))).collect()
+        df.select($"a").withColumn("c", to_json(structData)).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.CANNOT_CONVERT_TO_JSON",
+      condition = "DATATYPE_MISMATCH.CANNOT_CONVERT_TO_JSON",
       parameters = Map(
         "sqlExpr" -> "\"to_json(NAMED_STRUCT('b', 1))\"",
         "name" -> "`b`",
         "type" -> "\"JAVA.LANG.INTEGER\""
-      )
+      ),
+      context = ExpectedContext("to_json", getCurrentClassCallSitePattern)
     )
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
index 14895c2f8692b..d7177e19a6177 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LateralColumnAliasSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, ExpressionSet}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.Aggregate
@@ -184,7 +185,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       query: String, parameters: Map[String, String]): Unit = {
     checkError(
       exception = intercept[AnalysisException] {sql(query)},
-      errorClass = "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
+      condition = "AMBIGUOUS_LATERAL_COLUMN_ALIAS",
       sqlState = "42702",
       parameters = parameters
     )
@@ -194,7 +195,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       query: String, lca: String, windowExprRegex: String): Unit = {
     checkErrorMatchPVals(
       exception = intercept[AnalysisException] {sql(query)},
-      errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_WINDOW",
+      condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_WINDOW",
       parameters = Map("lca" -> lca, "windowExpr" -> windowExprRegex)
     )
   }
@@ -204,16 +205,19 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
     withLCAOn { checkAnswer(sql(query), expectedAnswerLCAOn) }
     withLCAOff {
       assert(intercept[AnalysisException]{ sql(query) }
-        .getErrorClass == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+        .getCondition == "UNRESOLVED_COLUMN.WITH_SUGGESTION")
     }
   }
 
   private def checkSameError(
-      q1: String, q2: String, errorClass: String, errorParams: Map[String, String]): Unit = {
+      q1: String,
+      q2: String,
+      condition: String,
+      errorParams: Map[String, String]): Unit = {
     val e1 = intercept[AnalysisException] { sql(q1) }
     val e2 = intercept[AnalysisException] { sql(q2) }
-    assert(e1.getErrorClass == errorClass)
-    assert(e2.getErrorClass == errorClass)
+    assert(e1.getCondition == condition)
+    assert(e2.getCondition == condition)
     errorParams.foreach { case (k, v) =>
       assert(e1.messageParameters.get(k).exists(_ == v))
       assert(e2.messageParameters.get(k).exists(_ == v))
@@ -258,7 +262,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         exception = intercept[AnalysisException] {
           sql(s"SELECT 10000 AS lca, count(lca) FROM $testTable GROUP BY dept")
         },
-        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
         sqlState = "0A000",
         parameters = Map(
           "lca" -> "`lca`",
@@ -269,7 +273,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         exception = intercept[AnalysisException] {
           sql(s"SELECT dept AS lca, avg(lca) FROM $testTable GROUP BY dept")
         },
-        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
         sqlState = "0A000",
         parameters = Map(
           "lca" -> "`lca`",
@@ -281,7 +285,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         exception = intercept[AnalysisException] {
           sql(s"SELECT sum(salary) AS a, avg(a) FROM $testTable")
         },
-        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
         sqlState = "0A000",
         parameters = Map(
           "lca" -> "`a`",
@@ -518,7 +522,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         exception = intercept[AnalysisException] {
           sql(query2)
         },
-        errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
         sqlState = "42703",
         parameters = Map("objectName" -> s"`id1`"),
         context = ExpectedContext(
@@ -551,7 +555,15 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
        |  FROM (SELECT dept * 2.0 AS id, id + 1 AS id2 FROM $testTable)) > 5
        |ORDER BY id
        |""".stripMargin
-    withLCAOff { intercept[AnalysisException] { sql(query4) } }
+    withLCAOff {
+      val exception = intercept[SparkRuntimeException] {
+        sql(query4).collect()
+      }
+      checkError(
+        exception,
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS"
+      )
+    }
     withLCAOn {
       val analyzedPlan = sql(query4).queryExecution.analyzed
       assert(!analyzedPlan.containsPattern(OUTER_REFERENCE))
@@ -796,7 +808,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"SELECT dept AS d, d AS new_dept, new_dep + 1 AS newer_dept FROM $testTable")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = "42703",
       parameters = Map("objectName" -> s"`new_dep`",
         "proposal" -> "`dept`, `name`, `bonus`, `salary`, `properties`"),
@@ -809,7 +821,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"SELECT count(name) AS cnt, cnt + 1, count(unresovled) FROM $testTable GROUP BY dept")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = "42703",
       parameters = Map("objectName" -> s"`unresovled`",
         "proposal" -> "`name`, `bonus`, `dept`, `properties`, `salary`"),
@@ -823,7 +835,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         sql(s"SELECT * FROM range(1, 7) WHERE (" +
           s"SELECT id2 FROM (SELECT 1 AS id, other_id + 1 AS id2)) > 5")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       sqlState = "42703",
       parameters = Map("objectName" -> s"`other_id`"),
       context = ExpectedContext(
@@ -842,7 +854,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
        |""".stripMargin
     val analyzedPlan = sql(query).queryExecution.analyzed
     analyzedPlan.collect {
-      case Aggregate(_, aggregateExpressions, _) =>
+      case Aggregate(_, aggregateExpressions, _, _) =>
         val extracted = aggregateExpressions.collect {
           case Alias(child, _) => child
           case a: Attribute => a
@@ -898,7 +910,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       exception = intercept[AnalysisException] { sql(
         "SELECT dept AS a, dept, " +
           s"(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept) $groupBySeg") },
-      errorClass = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
+      condition = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
       parameters = Map("sqlExpr" -> "\"scalarsubquery(dept)\""),
       context = ExpectedContext(
         fragment = "(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept)",
@@ -910,7 +922,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         "SELECT dept AS a, a, " +
           s"(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept) $groupBySeg"
       ) },
-      errorClass = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
+      condition = "SCALAR_SUBQUERY_IS_IN_GROUP_BY_OR_AGGREGATE_FUNCTION",
       parameters = Map("sqlExpr" -> "\"scalarsubquery(dept)\""),
       context = ExpectedContext(
         fragment = "(SELECT count(col) FROM VALUES (1), (2) AS data(col) WHERE col = dept)",
@@ -924,7 +936,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         exception = intercept[AnalysisException] {
           sql(s"SELECT avg(salary) AS a, avg(a) $windowExpr $groupBySeg")
         },
-        errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
+        condition = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_FUNC",
         sqlState = "0A000",
         parameters = Map("lca" -> "`a`", "aggFunc" -> "\"avg(lateralAliasReference(a))\"")
       )
@@ -1009,7 +1021,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
           "(partition by dept order by salary rows between n preceding and current row) as rank " +
           s"from $testTable where dept in (1, 6)")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> "Frame bound value must be a literal."),
       context = ExpectedContext(fragment = "n preceding", start = 87, stop = 97)
     )
@@ -1175,7 +1187,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
           "sum_avg * 1.0 as sum_avg1, sum_avg1 + dept " +
           s"from $testTable group by dept, properties.joinYear $havingSuffix"
       ).foreach { query =>
-        assert(intercept[AnalysisException](sql(query)).getErrorClass ==
+        assert(intercept[AnalysisException](sql(query)).getCondition ==
           "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_AGGREGATE_WITH_WINDOW_AND_HAVING")
       }
     }
@@ -1188,7 +1200,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         s"from $testTable",
       s"select dept as d, d,    rank() over (partition by dept order by avg(salary)) " +
         s"from $testTable",
-      errorClass = "MISSING_GROUP_BY",
+      condition = "MISSING_GROUP_BY",
       errorParams = Map.empty
     )
     checkSameError(
@@ -1196,7 +1208,7 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
         s"from $testTable",
       "select salary as s, s,      sum(sum(salary)) over (partition by dept order by salary) " +
         s"from $testTable",
-      errorClass = "MISSING_GROUP_BY",
+      condition = "MISSING_GROUP_BY",
       errorParams = Map.empty
     )
 
@@ -1319,4 +1331,38 @@ class LateralColumnAliasSuite extends LateralColumnAliasSuiteBase {
       Row(2) :: Nil
     )
   }
+
+  test("LCA internal error should have lower priority") {
+    // in this query, the 'order by Freq DESC' error should be the top error surfaced to users
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(
+          """
+            |WITH group_counts AS (
+            |  SELECT id, count(*) as Freq, CASE WHEN Freq <= 10 THEN "1" ELSE "2" END AS Group
+            |  FROM values (123) as data(id)
+            |  GROUP BY id
+            |)
+            |SELECT Group, count(*) * 100.0 / (select count(*) from group_counts) AS Percentage
+            |FROM group_counts
+            |Group BY Group
+            |ORDER BY Freq DESC;
+            |""".stripMargin
+        )
+      },
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      sqlState = "42703",
+      parameters = Map(
+        "objectName" -> "`Freq`",
+        "proposal" -> "`Percentage`, `group_counts`.`Group`"
+      ),
+      context = ExpectedContext(
+        fragment = "Freq",
+        start = 280,
+        stop = 283)
+    )
+
+    // the states are cleared - a subsequent correct query should succeed
+    sql("select 1 as a, a").queryExecution.assertAnalyzed()
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
index df0fbf15a98ee..873337e7a4242 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/LogQuerySuite.scala
@@ -21,7 +21,7 @@ import java.io.File
 
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.sql.test.SharedSparkSession
-import org.apache.spark.util.LogUtils.LOG_SCHEMA
+import org.apache.spark.util.LogUtils.SPARK_LOG_SCHEMA
 
 /**
  * Test suite for querying Spark logs using SQL.
@@ -42,7 +42,11 @@ class LogQuerySuite extends QueryTest with SharedSparkSession with Logging {
   }
 
   private def createTempView(viewName: String): Unit = {
-    spark.read.schema(LOG_SCHEMA).json(logFile.getCanonicalPath).createOrReplaceTempView(viewName)
+    spark
+      .read
+      .schema(SPARK_LOG_SCHEMA)
+      .json(logFile.getCanonicalPath)
+      .createOrReplaceTempView(viewName)
   }
 
   test("Query Spark logs using SQL") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
index cc0cce08162ae..ebca6b26fce95 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MetadataCacheSuite.scala
@@ -56,7 +56,7 @@ abstract class MetadataCacheSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkException] {
           df.count()
         },
-        errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+        condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
         parameters = Map("path" -> ".*")
       )
     }
@@ -87,7 +87,7 @@ class MetadataCacheV1Suite extends MetadataCacheSuite {
         exception = intercept[SparkException] {
           sql("select count(*) from view_refresh").first()
         },
-        errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+        condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
         parameters = Map("path" -> ".*")
       )
 
@@ -115,7 +115,7 @@ class MetadataCacheV1Suite extends MetadataCacheSuite {
             exception = intercept[SparkException] {
               sql("select count(*) from view_refresh").first()
             },
-            errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+            condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
             parameters = Map("path" -> ".*")
           )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
index b9daece4913f2..b95b7b9d4c00d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MiscFunctionsSuite.scala
@@ -61,7 +61,7 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
         checkAnswer(sql(s"select $func"), Row(user))
         checkError(
           exception = intercept[ParseException](sql(s"select $func()")),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          condition = "PARSE_SYNTAX_ERROR",
           parameters = Map("error" -> s"'$func'", "hint" -> ""))
       }
     }
@@ -238,7 +238,7 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("reflect(cast(null as string), 'fromString', a)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "`class`",
         "sqlExpr" -> "\"reflect(CAST(NULL AS STRING), fromString, a)\""),
@@ -247,7 +247,7 @@ class MiscFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("reflect('java.util.UUID', cast(null as string), a)")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_NULL",
       parameters = Map(
         "exprName" -> "`method`",
         "sqlExpr" -> "\"reflect(java.util.UUID, CAST(NULL AS STRING), a)\""),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
index f83e7b6727b16..f570fc3ab25f8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/NestedDataSourceSuite.scala
@@ -65,7 +65,7 @@ trait NestedDataSourceSuiteBase extends QueryTest with SharedSparkSession {
                     .load(path)
                     .collect()
                 },
-                errorClass = "COLUMN_ALREADY_EXISTS",
+                condition = "COLUMN_ALREADY_EXISTS",
                 parameters = Map("columnName" -> "`camelcase`")
               )
             }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
index 013f36be47450..791bcc91d5094 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ParametersSuite.scala
@@ -22,6 +22,7 @@ import java.time.{Instant, LocalDate, LocalDateTime, ZoneId}
 import org.apache.spark.sql.catalyst.expressions.Literal
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.PlanTest
+import org.apache.spark.sql.catalyst.plans.logical.Limit
 import org.apache.spark.sql.functions.{array, call_function, lit, map, map_from_arrays, map_from_entries, str_to_map, struct}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -72,7 +73,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         spark.sql("select :P", Map("p" -> 1))
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "P"),
       context = ExpectedContext(
         fragment = ":P",
@@ -245,8 +246,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -260,8 +261,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -275,8 +276,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -290,8 +291,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -309,8 +310,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -328,8 +329,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[ParseException] {
         spark.sql(sqlText, args)
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
-      parameters = Map("statement" -> "CREATE VIEW body"),
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CREATE VIEW"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -341,7 +342,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         spark.sql("select :abc, :def", Map("abc" -> 1))
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "def"),
       context = ExpectedContext(
         fragment = ":def",
@@ -351,7 +352,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         sql("select :abc").collect()
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "abc"),
       context = ExpectedContext(
         fragment = ":abc",
@@ -364,7 +365,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         spark.sql("select ?, ?", Array(1))
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "_10"),
       context = ExpectedContext(
         fragment = "?",
@@ -374,7 +375,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         sql("select ?").collect()
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "_7"),
       context = ExpectedContext(
         fragment = "?",
@@ -471,7 +472,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         spark.sql("select :param1, ?", Map("param1" -> 1))
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "_16"),
       context = ExpectedContext(
         fragment = "?",
@@ -482,7 +483,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       exception = intercept[AnalysisException] {
         spark.sql("select :param1, ?", Array(1))
       },
-      errorClass = "UNBOUND_SQL_PARAMETER",
+      condition = "UNBOUND_SQL_PARAMETER",
       parameters = Map("name" -> "param1"),
       context = ExpectedContext(
         fragment = ":param1",
@@ -497,7 +498,7 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
           "CREATE TABLE t11(c1 int default :parm) USING parquet",
           args = Map("parm" -> 5))
       },
-      errorClass = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
       parameters = Map("statement" -> "DEFAULT"),
       context = ExpectedContext(
         fragment = "default :parm",
@@ -535,27 +536,27 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
   test("SPARK-45033: maps as parameters") {
     import org.apache.spark.util.ArrayImplicits._
     def fromArr(keys: Array[_], values: Array[_]): Column = {
-      map_from_arrays(Column(Literal(keys)), Column(Literal(values)))
+      map_from_arrays(lit(keys), lit(values))
     }
     def callFromArr(keys: Array[_], values: Array[_]): Column = {
-      call_function("map_from_arrays", Column(Literal(keys)), Column(Literal(values)))
+      call_function("map_from_arrays", lit(keys), lit(values))
     }
     def createMap(keys: Array[_], values: Array[_]): Column = {
-      val zipped = keys.map(k => Column(Literal(k))).zip(values.map(v => Column(Literal(v))))
+      val zipped = keys.map(k => lit(k)).zip(values.map(v => lit(v)))
       map(zipped.flatMap { case (k, v) => Seq(k, v) }.toImmutableArraySeq: _*)
     }
     def callMap(keys: Array[_], values: Array[_]): Column = {
-      val zipped = keys.map(k => Column(Literal(k))).zip(values.map(v => Column(Literal(v))))
+      val zipped = keys.map(k => lit(k)).zip(values.map(v => lit(v)))
       call_function("map", zipped.flatMap { case (k, v) => Seq(k, v) }.toImmutableArraySeq: _*)
     }
     def fromEntries(keys: Array[_], values: Array[_]): Column = {
       val structures = keys.zip(values)
-        .map { case (k, v) => struct(Column(Literal(k)), Column(Literal(v)))}
+        .map { case (k, v) => struct(lit(k), lit(v))}
       map_from_entries(array(structures.toImmutableArraySeq: _*))
     }
     def callFromEntries(keys: Array[_], values: Array[_]): Column = {
       val structures = keys.zip(values)
-        .map { case (k, v) => struct(Column(Literal(k)), Column(Literal(v)))}
+        .map { case (k, v) => struct(lit(k), lit(v))}
       call_function("map_from_entries", call_function("array", structures.toImmutableArraySeq: _*))
     }
 
@@ -589,8 +590,8 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
       spark.sql("SELECT :m['a'][1]",
         Map("m" ->
           map_from_arrays(
-            Column(Literal(Array("a"))),
-            array(map_from_arrays(Column(Literal(Array(1))), Column(Literal(Array(2)))))))),
+            lit(Array("a")),
+            array(map_from_arrays(lit(Array(1)), lit(Array(2))))))),
       Row(2))
     // `str_to_map` is not supported
     checkError(
@@ -598,10 +599,10 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
         spark.sql("SELECT :m['a'][1]",
           Map("m" ->
             map_from_arrays(
-              Column(Literal(Array("a"))),
-              array(str_to_map(Column(Literal("a:1,b:2,c:3")))))))
+              lit(Array("a")),
+              array(str_to_map(lit("a:1,b:2,c:3"))))))
       },
-      errorClass = "INVALID_SQL_ARG",
+      condition = "INVALID_SQL_ARG",
       parameters = Map("name" -> "m"),
       context = ExpectedContext(
         fragment = "map_from_arrays",
@@ -624,6 +625,79 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
     comparePlans(expected, parameterizedSql)
   }
 
+  test("SPARK-49017: bind named parameters with IDENTIFIER clause") {
+    withTable("testtab") {
+      // Create table
+      spark.sql("create table testtab (id int, name string)")
+
+      // Insert into table using single param
+      spark.sql("insert into identifier(:tab) values(1, 'test1')", Map("tab" -> "testtab"))
+
+      // Select from table using param
+      checkAnswer(spark.sql("select * from identifier(:tab)", Map("tab" -> "testtab")),
+        Seq(Row(1, "test1")))
+
+      // Insert into table using multiple params
+      spark.sql("insert into identifier(:tab) values(2, :name)",
+        Map("tab" -> "testtab", "name" -> "test2"))
+
+      // Select from table using param
+      checkAnswer(sql("select * from testtab"), Seq(Row(1, "test1"), Row(2, "test2")))
+
+      // Insert into table using multiple params and idents
+      sql("insert into testtab values(2, 'test3')")
+
+      // Select from table using param
+      checkAnswer(spark.sql("select identifier(:col) from identifier(:tab) where :name == name",
+        Map("tab" -> "testtab", "name" -> "test2", "col" -> "id")), Seq(Row(2)))
+    }
+  }
+
+  test("SPARK-49017: bind positional parameters with IDENTIFIER clause") {
+    withTable("testtab") {
+      // Create table
+      spark.sql("create table testtab (id int, name string)")
+
+      // Insert into table using single param
+      spark.sql("insert into identifier(?) values(1, 'test1')",
+        Array("testtab"))
+
+      // Select from table using param
+      checkAnswer(spark.sql("select * from identifier(?)", Array("testtab")),
+        Seq(Row(1, "test1")))
+
+      // Insert into table using multiple params
+      spark.sql("insert into identifier(?) values(2, ?)",
+        Array("testtab", "test2"))
+
+      // Select from table using param
+      checkAnswer(sql("select * from testtab"), Seq(Row(1, "test1"), Row(2, "test2")))
+
+      // Insert into table using multiple params and idents
+      sql("insert into testtab values(2, 'test3')")
+
+      // Select from table using param
+      checkAnswer(spark.sql("select identifier(?) from identifier(?) where ? == name",
+        Array("id", "testtab", "test2")), Seq(Row(2)))
+    }
+  }
+
+  test("SPARK-49017: bind named parameters with IDENTIFIER clause in create table as") {
+    withTable("testtab", "testtab1") {
+
+      sql("create table testtab (id int, name string)")
+      sql("insert into testtab values(1, 'test1')")
+
+      // create table with parameters in query
+      spark.sql(
+        """create table identifier(:tab) as
+          | select * from testtab where identifier(:col) == 1""".stripMargin,
+        Map("tab" -> "testtab1", "col" -> "id"))
+
+      checkAnswer(sql("select * from testtab1"), Seq(Row(1, "test1")))
+    }
+  }
+
   test("SPARK-46999: bind parameters for nested IDENTIFIER clause") {
     val query = sql(
       """
@@ -633,4 +707,55 @@ class ParametersSuite extends QueryTest with SharedSparkSession with PlanTest {
         |""".stripMargin)
     checkAnswer(query, Row("ABC"))
   }
+
+  test("SPARK-48843: Prevent infinite loop with BindParameters") {
+    val df =
+      sql("EXECUTE IMMEDIATE 'SELECT SUM(c1) num_sum FROM VALUES (?), (?) AS t(c1) ' USING 5, 6;")
+    val analyzedPlan = Limit(Literal.create(100), df.queryExecution.logical)
+    spark.sessionState.analyzer.executeAndCheck(analyzedPlan, df.queryExecution.tracker)
+    checkAnswer(df, Row(11))
+  }
+
+  test("SPARK-49398: Cache Table with parameter markers in select query should throw " +
+    "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT") {
+    val sqlText = "CACHE TABLE CacheTable as SELECT 1 + :param1"
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql(sqlText, Map("param1" -> "1")).show()
+      },
+      condition = "UNSUPPORTED_FEATURE.PARAMETER_MARKER_IN_UNEXPECTED_STATEMENT",
+      parameters = Map("statement" -> "the query of CACHE TABLE"),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = sqlText.length - 1)
+    )
+  }
+
+  test("SPARK-49398: Cache Table with parameter in identifier should work") {
+    val cacheName = "MyCacheTable"
+    withCache(cacheName) {
+      spark.sql("CACHE TABLE IDENTIFIER(:param) as SELECT 1 as c1", Map("param" -> cacheName))
+      checkAnswer(
+        spark.sql("SHOW COLUMNS FROM IDENTIFIER(?)", args = Array(cacheName)),
+        Row("c1"))
+    }
+  }
+
+  test("SPARK-50322: parameterized identifier in a sub-query") {
+    withTable("tt1") {
+      sql("CREATE TABLE tt1 (c1 INT)")
+      sql("INSERT INTO tt1 VALUES (1)")
+      def query(p: String): String = {
+        s"""
+          |WITH v1 AS (
+          |  SELECT * FROM tt1
+          |  WHERE 1 = (SELECT * FROM IDENTIFIER($p))
+          |) SELECT * FROM v1""".stripMargin
+      }
+
+      checkAnswer(spark.sql(query(":tab"), args = Map("tab" -> "tt1")), Row(1))
+      checkAnswer(spark.sql(query("?"), args = Array("tt1")), Row(1))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
index f5ba655e3e85f..b59c83c23d3c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/QueryTest.scala
@@ -27,8 +27,9 @@ import org.scalatest.Assertions
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
 import org.apache.spark.sql.catalyst.plans._
 import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
+import org.apache.spark.sql.util.QueryExecutionListener
 import org.apache.spark.storage.StorageLevel
 import org.apache.spark.util.ArrayImplicits._
 
@@ -447,6 +448,28 @@ object QueryTest extends Assertions {
       case None =>
     }
   }
+
+  def withQueryExecutionsCaptured(spark: SparkSession)(thunk: => Unit): Seq[QueryExecution] = {
+    var capturedQueryExecutions = Seq.empty[QueryExecution]
+
+    val listener = new QueryExecutionListener {
+      override def onSuccess(funcName: String, qe: QueryExecution, durationNs: Long): Unit = {
+        capturedQueryExecutions = capturedQueryExecutions :+ qe
+      }
+      override def onFailure(funcName: String, qe: QueryExecution, exception: Exception): Unit = {}
+    }
+
+    spark.sparkContext.listenerBus.waitUntilEmpty(15000)
+    spark.listenerManager.register(listener)
+    try {
+      thunk
+      spark.sparkContext.listenerBus.waitUntilEmpty(15000)
+    } finally {
+      spark.listenerManager.unregister(listener)
+    }
+
+    capturedQueryExecutions
+  }
 }
 
 class QueryTestSuite extends QueryTest with test.SharedSparkSession {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
index bca1472799939..cb9d0909554b4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ResolveDefaultColumnsSuite.scala
@@ -40,7 +40,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t values (timestamp'2020-12-31')")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`c1`, `c2`",
@@ -68,7 +68,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t values (timestamp'2020-12-31')")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`c1`, `c2`",
@@ -85,7 +85,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t values (1, 2, 3)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
@@ -102,7 +102,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t partition(c3=3, c4=4) values (1)")
         },
-        errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
+        condition = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
@@ -120,7 +120,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t partition(c3=3, c4) values (1, 2)")
         },
-        errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
+        condition = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`c1`, `c2`, `c3`, `c4`",
@@ -173,7 +173,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("create table demos.test_ts_other (a int default 'abc') using parquet")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`a`",
@@ -184,7 +184,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("create table demos.test_ts_other (a timestamp default 'invalid') using parquet")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`a`",
@@ -195,7 +195,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("create table demos.test_ts_other (a boolean default 'true') using parquet")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`a`",
@@ -206,7 +206,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("create table demos.test_ts_other (a int default true) using parquet")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`a`",
@@ -237,7 +237,7 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
       checkError(
         exception = intercept[SparkRuntimeException](
           sql(s"CREATE TABLE t(c $typeName(3) DEFAULT 'spark') USING parquet")),
-        errorClass = "EXCEED_LIMIT_LENGTH",
+        condition = "EXCEED_LIMIT_LENGTH",
         parameters = Map("limit" -> "3"))
     }
   }
@@ -287,4 +287,25 @@ class ResolveDefaultColumnsSuite extends QueryTest with SharedSparkSession {
       checkAnswer(sql("select v from t"), sql("select parse_json('1')").collect())
     }
   }
+
+  test("SPARK-49054: Create table with current_user() default") {
+    val tableName = "test_current_user"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s string default current_user()) USING parquet")
+      sql(s"INSERT INTO $tableName (i) VALUES ((0))")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(0, user)))
+    }
+  }
+
+  test("SPARK-49054: Alter table with current_user() default") {
+    val tableName = "test_current_user"
+    val user = spark.sparkContext.sparkUser
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName(i int, s string) USING parquet")
+      sql(s"ALTER TABLE $tableName ALTER COLUMN s SET DEFAULT current_user()")
+      sql(s"INSERT INTO $tableName (i) VALUES ((0))")
+      checkAnswer(sql(s"SELECT * FROM $tableName"), Seq(Row(0, user)))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
index cb8dafb9a8f37..5de4170a1c112 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RowSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql
 
-import org.apache.spark.{SparkFunSuite, SparkUnsupportedOperationException}
+import org.apache.spark.{SparkFunSuite, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, SpecificInternalRow}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -119,8 +119,21 @@ class RowSuite extends SparkFunSuite with SharedSparkSession {
       exception = intercept[SparkUnsupportedOperationException] {
         rowWithoutSchema.fieldIndex("foo")
       },
-      errorClass = "UNSUPPORTED_CALL.FIELD_INDEX",
+      condition = "UNSUPPORTED_CALL.FIELD_INDEX",
       parameters = Map("methodName" -> "fieldIndex", "className" -> "Row", "fieldName" -> "`foo`")
     )
   }
+
+  test("SPARK-42307: get a value from a null column should result in error") {
+    val position = 0
+    val rowWithNullValue = Row.fromSeq(Seq(null))
+
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        rowWithNullValue.getLong(position)
+      },
+      condition = "ROW_VALUE_IS_NULL",
+      parameters = Map("index" -> position.toString)
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
index 4052130720811..c80787c40c487 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeConfigSuite.scala
@@ -19,12 +19,14 @@ package org.apache.spark.sql
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.internal.config
+import org.apache.spark.internal.config.DEFAULT_PARALLELISM
+import org.apache.spark.sql.internal.{RuntimeConfigImpl, SQLConf}
 import org.apache.spark.sql.internal.SQLConf.CHECKPOINT_LOCATION
 import org.apache.spark.sql.internal.StaticSQLConf.GLOBAL_TEMP_DATABASE
 
 class RuntimeConfigSuite extends SparkFunSuite {
 
-  private def newConf(): RuntimeConfig = new RuntimeConfig
+  private def newConf(): RuntimeConfig = new RuntimeConfigImpl()
 
   test("set and get") {
     val conf = newConf()
@@ -80,4 +82,30 @@ class RuntimeConfigSuite extends SparkFunSuite {
     }
     assert(ex.getMessage.contains("Spark config"))
   }
+
+  test("set and get a config with defaultValue") {
+    val conf = newConf()
+    val key = SQLConf.SESSION_LOCAL_TIMEZONE.key
+    // By default, the value when getting an unset config entry is its defaultValue.
+    assert(conf.get(key) == SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get)
+    assert(conf.getOption(key).contains(SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get))
+    // Get the unset config entry with a different default value, which should return the given
+    // default parameter.
+    assert(conf.get(key, "Europe/Amsterdam") == "Europe/Amsterdam")
+
+    // Set a config entry.
+    conf.set(key, "Europe/Berlin")
+    // Get the set config entry.
+    assert(conf.get(key) == "Europe/Berlin")
+    // Unset the config entry.
+    conf.unset(key)
+    // Get the unset config entry, which should return its defaultValue again.
+    assert(conf.get(key) == SQLConf.SESSION_LOCAL_TIMEZONE.defaultValue.get)
+  }
+
+  test("SPARK-48773: set spark.default.parallelism does not fail") {
+    val conf = newConf()
+    // this set should not fail
+    conf.set(DEFAULT_PARALLELISM.key, "1")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
index 754c46cc5cd3e..b48ff7121c767 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/RuntimeNullChecksV2Writes.scala
@@ -64,7 +64,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
           sql("INSERT INTO t VALUES ('txt', null)")
         }
       }
-      assert(e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+      assert(e.getCondition == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
@@ -404,7 +404,7 @@ class RuntimeNullChecksV2Writes extends QueryTest with SQLTestUtils with SharedS
 
   private def assertNotNullException(e: SparkRuntimeException, colPath: Seq[String]): Unit = {
     e.getCause match {
-      case _ if e.getErrorClass == "NOT_NULL_ASSERT_VIOLATION" =>
+      case _ if e.getCondition == "NOT_NULL_ASSERT_VIOLATION" =>
       case other =>
         fail(s"Unexpected exception cause: $other")
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
index 63ed26bdeddf1..a4e39df6c632d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLInsertTestSuite.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
+import org.apache.spark.sql.types.{DataType, IntegerType, SQLUserDefinedType, UserDefinedType}
 import org.apache.spark.unsafe.types.UTF8String
 
 /**
@@ -276,7 +277,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
       checkError(
         exception = intercept[AnalysisException](
           sql(s"INSERT INTO t1 (c1, c2, c2) values(1, 2, 3)")),
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`c2`"))
     }
   }
@@ -288,7 +289,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
       checkError(
         exception =
           intercept[AnalysisException](sql(s"INSERT INTO t1 (c1, c2, c4) values(1, 2, 3)")),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map("objectName" -> "`c4`", "proposal" -> "`c1`, `c2`, `c3`"),
         context = ExpectedContext(
@@ -307,7 +308,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
             sql(s"INSERT INTO t1 (c1, c2) values(1, 2, 3)")
           },
           sqlState = None,
-          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+          condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
           parameters = Map(
             "tableName" -> ".*`t1`",
             "tableColumns" -> "`c1`, `c2`",
@@ -319,7 +320,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
             sql(s"INSERT INTO t1 (c1, c2, c3) values(1, 2)")
           },
           sqlState = None,
-          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "tableName" -> ".*`t1`",
             "tableColumns" -> "`c1`, `c2`, `c3`",
@@ -399,7 +400,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
           sql("INSERT OVERWRITE t PARTITION (c='2', C='3') VALUES (1)")
         },
         sqlState = None,
-        errorClass = "DUPLICATE_KEY",
+        condition = "DUPLICATE_KEY",
         parameters = Map("keyColumn" -> "`c`"),
         context = ExpectedContext("PARTITION (c='2', C='3')", 19, 42)
       )
@@ -441,12 +442,11 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
               exception = intercept[SparkNumberFormatException] {
                 sql("insert into t partition(a='ansi') values('ansi')")
               },
-              errorClass = "CAST_INVALID_INPUT",
+              condition = "CAST_INVALID_INPUT",
               parameters = Map(
                 "expression" -> "'ansi'",
                 "sourceType" -> "\"STRING\"",
-                "targetType" -> "\"INT\"",
-                "ansiConfig" -> "\"spark.sql.ansi.enabled\""
+                "targetType" -> "\"INT\""
               ),
               context = ExpectedContext("insert into t partition(a='ansi')", 0, 32)
             )
@@ -492,7 +492,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
           exception = intercept[AnalysisException] {
             sql("alter table t drop partition(dt='8')")
           },
-          errorClass = "PARTITIONS_NOT_FOUND",
+          condition = "PARTITIONS_NOT_FOUND",
           sqlState = None,
           parameters = Map(
             "partitionList" -> "PARTITION \\(`dt` = 8\\)",
@@ -512,7 +512,7 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
           exception = intercept[AnalysisException] {
             sql("alter table t drop partition(dt='08')")
           },
-          errorClass = "PARTITIONS_NOT_FOUND",
+          condition = "PARTITIONS_NOT_FOUND",
           sqlState = None,
           parameters = Map(
             "partitionList" -> "PARTITION \\(`dt` = 08\\)",
@@ -550,6 +550,56 @@ trait SQLInsertTestSuite extends QueryTest with SQLTestUtils with AdaptiveSparkP
       assert(reusedExchanges.size == 1)
     }
   }
+
+  test("SPARK-50340: unwrap UDT before insertion") {
+    withTempView("v") {
+      Seq((1, MyInt(2))).toDF("c1", "c2").createTempView("v")
+
+      withTable("t") {
+        createTable("t", Seq("c1", "c2"), Seq("int", "int"))
+        sql("INSERT INTO t SELECT * FROM v")
+        checkAnswer(spark.table("t"), Row(1, 2))
+      }
+
+      // can upcast UDT input
+      withTable("t") {
+        createTable("t", Seq("c1", "c2"), Seq("int", "long"))
+        sql("INSERT INTO t SELECT * FROM v")
+        checkAnswer(spark.table("t"), Row(1, 2L))
+      }
+
+      // Array of UDT
+      withTable("t") {
+        createTable("t", Seq("c1", "c2"), Seq("int", "array<int>"))
+        sql("INSERT INTO t SELECT c1, array(c2) FROM v")
+        checkAnswer(spark.table("t"), Row(1, Seq(2)))
+      }
+
+      // Map of UDT
+      withTable("t") {
+        createTable("t", Seq("c1", "c2"), Seq("int", "map<int, int>"))
+        sql("INSERT INTO t SELECT c1, map(c2, c2) FROM v")
+        checkAnswer(spark.table("t"), Row(1, Map(2 -> 2)))
+      }
+
+      // Struct of UDT
+      withTable("t") {
+        createTable("t", Seq("c1", "c2"), Seq("int", "struct<f1 int, f2 int>"))
+        sql("INSERT INTO t SELECT c1, struct(c2 as f1, c2 as f2) FROM v")
+        checkAnswer(spark.table("t"), Row(1, Row(2, 2)))
+      }
+    }
+  }
+}
+
+@SQLUserDefinedType(udt = classOf[MyIntUDT])
+private case class MyInt(i: Int)
+
+private class MyIntUDT extends UserDefinedType[MyInt] {
+  override def sqlType: DataType = IntegerType
+  override def serialize(obj: MyInt): Any = obj.i
+  override def deserialize(datum: Any): MyInt = MyInt(datum.asInstanceOf[Int])
+  override def userClass: Class[MyInt] = classOf[MyInt]
 }
 
 class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession {
@@ -562,7 +612,7 @@ class FileSourceSQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSe
       v2ErrorClass: String,
       v1Parameters: Map[String, String],
       v2Parameters: Map[String, String]): Unit = {
-    checkError(exception = exception, sqlState = None, errorClass = v1ErrorClass,
+    checkError(exception = exception, sqlState = None, condition = v1ErrorClass,
       parameters = v1Parameters)
   }
 
@@ -582,7 +632,7 @@ class DSV2SQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession
       v2ErrorClass: String,
       v1Parameters: Map[String, String],
       v2Parameters: Map[String, String]): Unit = {
-    checkError(exception = exception, sqlState = None, errorClass = v2ErrorClass,
+    checkError(exception = exception, sqlState = None, condition = v2ErrorClass,
       parameters = v2Parameters)
   }
   protected override def sparkConf: SparkConf = {
@@ -598,7 +648,7 @@ class DSV2SQLInsertTestSuite extends SQLInsertTestSuite with SharedSparkSession
         exception = intercept[AnalysisException] {
           sql("INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')")
         },
-        errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
+        condition = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
         parameters = Map("staticName" -> "c"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 302b05e9b5cec..1b8f596a999b7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql
 
 import java.io.File
-import java.net.{MalformedURLException, URL}
+import java.net.{MalformedURLException, URI}
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, Period}
 import java.util.Locale
@@ -31,7 +31,7 @@ import org.apache.commons.io.FileUtils
 import org.apache.spark.{AccumulatorSuite, SPARK_DOC_ROOT, SparkArithmeticException, SparkDateTimeException, SparkException, SparkNumberFormatException, SparkRuntimeException}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart}
 import org.apache.spark.sql.catalyst.ExtendedAnalysisException
-import org.apache.spark.sql.catalyst.expressions.{GenericRow, Hex}
+import org.apache.spark.sql.catalyst.expressions.{CodegenObjectFactoryMode, GenericRow, Hex}
 import org.apache.spark.sql.catalyst.expressions.Cast._
 import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial}
 import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite}
@@ -43,7 +43,7 @@ import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate._
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.v2.BatchScanExec
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -100,7 +100,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     val sqlText = "describe functioN abcadf"
     checkError(
       exception = intercept[AnalysisException](sql(sqlText)),
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       parameters = Map(
         "routineName" -> "`abcadf`",
         "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -111,10 +111,13 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
   }
 
   test("SPARK-34678: describe functions for table-valued functions") {
+    sql("describe function range").show(false)
     checkKeywordsExist(sql("describe function range"),
       "Function: range",
       "Class: org.apache.spark.sql.catalyst.plans.logical.Range",
-      "range(end: long)"
+      "range(start[, end[, step[, numSlices]]])",
+      "range(end)",
+      "Returns a table of values within a specified range."
     )
   }
 
@@ -1430,6 +1433,17 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     }
   }
 
+  test("SPARK-49200: Fix null type non-codegen ordering exception") {
+    withSQLConf(
+        SQLConf.CODEGEN_FACTORY_MODE.key -> CodegenObjectFactoryMode.NO_CODEGEN.toString,
+        SQLConf.OPTIMIZER_EXCLUDED_RULES.key ->
+          "org.apache.spark.sql.catalyst.optimizer.EliminateSorts") {
+      checkAnswer(
+        sql("SELECT * FROM range(3) ORDER BY array(null)"),
+        Seq(Row(0), Row(1), Row(2)))
+    }
+  }
+
   test("SPARK-8837: use keyword in column name") {
     withTempView("t") {
       val df = Seq(1 -> "a").toDF("count", "sort")
@@ -1648,7 +1662,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       exception = intercept[AnalysisException] {
         sql("select * from json.invalid_file")
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> "file:/.*invalid_file"),
       matchPVals = true
     )
@@ -1657,7 +1671,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       exception = intercept[AnalysisException] {
         sql(s"select id from `org.apache.spark.sql.hive.orc`.`file_path`")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1138"
+      condition = "_LEGACY_ERROR_TEMP_1138"
     )
 
     e = intercept[AnalysisException] {
@@ -1822,7 +1836,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           exception = intercept[AnalysisException]{
             sql("SELECT abc.* FROM nestedStructTable")
           },
-          errorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+          condition = "CANNOT_RESOLVE_STAR_EXPAND",
           parameters = Map("targetString" -> "`abc`", "columns" -> "`record`"),
           context = ExpectedContext(fragment = "abc.*", start = 7, stop = 11))
       }
@@ -1857,7 +1871,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException]{
           sql("select a.* from testData2")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1050",
+        condition = "_LEGACY_ERROR_TEMP_1050",
         sqlState = None,
         parameters = Map("attributes" -> "(ArrayBuffer|List)\\(a\\)"),
         matchPVals = true,
@@ -1911,7 +1925,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException] {
           sql("SELECT a.* FROM temp_table_no_cols a")
         },
-        errorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+        condition = "CANNOT_RESOLVE_STAR_EXPAND",
         parameters = Map("targetString" -> "`a`", "columns" -> ""),
         context = ExpectedContext(fragment = "a.*", start = 7, stop = 9))
 
@@ -1919,7 +1933,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException] {
           dfNoCols.select($"b.*")
         },
-        errorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+        condition = "CANNOT_RESOLVE_STAR_EXPAND",
         parameters = Map("targetString" -> "`b`", "columns" -> ""),
         context = ExpectedContext(
           fragment = "$",
@@ -2557,20 +2571,21 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       Seq(1 -> "a").toDF("i", "j").write.parquet(path.getCanonicalPath)
 
       val newSession = spark.newSession()
+      val newSqlConf = newSession.sessionState.conf
       val originalValue = newSession.sessionState.conf.runSQLonFile
 
       try {
-        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, false)
+        newSqlConf.setConf(SQLConf.RUN_SQL_ON_FILES, false)
         intercept[AnalysisException] {
           newSession.sql(s"SELECT i, j FROM parquet.`${path.getCanonicalPath}`")
         }
 
-        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, true)
+        newSqlConf.setConf(SQLConf.RUN_SQL_ON_FILES, true)
         checkAnswer(
           newSession.sql(s"SELECT i, j FROM parquet.`${path.getCanonicalPath}`"),
           Row(1, "a"))
       } finally {
-        newSession.conf.set(SQLConf.RUN_SQL_ON_FILES, originalValue)
+        newSqlConf.setConf(SQLConf.RUN_SQL_ON_FILES, originalValue)
       }
     }
   }
@@ -2654,10 +2669,10 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
     val jarFromInvalidFs = "fffs://doesnotmatter/test.jar"
 
     // if 'hdfs' is not supported, MalformedURLException will be thrown
-    new URL(jarFromHdfs)
+    new URI(jarFromHdfs).toURL
 
     intercept[MalformedURLException] {
-      new URL(jarFromInvalidFs)
+      new URI(jarFromInvalidFs).toURL
     }
   }
 
@@ -2666,7 +2681,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       exception = intercept[AnalysisException] {
         sql("SELECT nvl(1, 2, 3)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId("nvl"),
         "expectedNum" -> "2",
@@ -2727,7 +2742,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException] {
           sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))")
         },
-        errorClass = "INCOMPATIBLE_COLUMN_TYPE",
+        condition = "INCOMPATIBLE_COLUMN_TYPE",
         parameters = Map(
           "tableOrdinalNumber" -> "second",
           "columnOrdinalNumber" -> "first",
@@ -2750,7 +2765,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
           exception = intercept[AnalysisException] {
             sql(query)
           },
-          errorClass = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
+          condition = "DATATYPE_MISMATCH.BINARY_OP_DIFF_TYPES",
           sqlState = None,
           parameters = Map(
             "sqlExpr" -> "\"(c = C)\"",
@@ -3062,7 +3077,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             checkAnswer(sql("select s.I from t group by s.i"), Nil)
           }
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`I`", "fields" -> "`i`"),
         context = ExpectedContext(
           fragment = "s.I",
@@ -3773,7 +3788,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException] {
           sql("SELECT s LIKE 'm%@ca' ESCAPE '%' FROM df").collect()
         },
-        errorClass = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
+        condition = "INVALID_FORMAT.ESC_IN_THE_MIDDLE",
         parameters = Map(
           "format" -> toSQLValue("m%@ca", StringType),
           "char" -> toSQLValue("@", StringType)))
@@ -3790,7 +3805,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         exception = intercept[AnalysisException] {
           sql("SELECT a LIKE 'jialiuping%' ESCAPE '%' FROM df").collect()
         },
-        errorClass = "INVALID_FORMAT.ESC_AT_THE_END",
+        condition = "INVALID_FORMAT.ESC_AT_THE_END",
         parameters = Map("format" -> toSQLValue("jialiuping%", StringType)))
     }
   }
@@ -3890,7 +3905,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
             exception = intercept[AnalysisException] {
               sql(s"CREATE TEMPORARY FUNCTION $functionName AS '$sumFuncClass'")
             },
-            errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+            condition = "CANNOT_LOAD_FUNCTION_CLASS",
             parameters = Map(
               "className" -> "org.apache.spark.examples.sql.Spark33084",
               "functionName" -> "`test_udf`"
@@ -3985,7 +4000,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         }
         checkError(
           exception = e,
-          errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+          condition = "INVALID_TEMP_OBJ_REFERENCE",
           parameters = Map(
             "obj" -> "VIEW",
             "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$testViewName`",
@@ -4004,7 +4019,7 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
         }
         checkError(
           exception = e2,
-          errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+          condition = "INVALID_TEMP_OBJ_REFERENCE",
           parameters = Map(
             "obj" -> "VIEW",
             "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$testViewName`",
@@ -4870,12 +4885,62 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark
       )
       checkAnswer(df2, df1)
       val relations = df2.queryExecution.analyzed.collect {
-        case LogicalRelation(fs: HadoopFsRelation, _, _, _) => fs
+        case LogicalRelationWithTable(fs: HadoopFsRelation, _) => fs
       }
       assert(relations.size == 1)
       assert(relations.head.options == Map("key1" -> "1", "key2" -> "2"))
     }
   }
+
+  test(
+    "SPARK-49250: CheckAnalysis for UnresolvedWindowExpression must produce " +
+    "MISSING_WINDOW_SPECIFICATION error"
+  ) {
+    for (sqlText <- Seq(
+      "SELECT SUM(col1) OVER(unspecified_window) FROM VALUES (1)",
+      "SELECT SUM(col1) OVER(unspecified_window) FROM VALUES (1) GROUP BY col1",
+      "SELECT (SUM(col1) OVER(unspecified_window) / 1) FROM VALUES (1)"
+    )) {
+      checkError(
+        exception = intercept[AnalysisException](
+          sql(sqlText)
+        ),
+        condition = "MISSING_WINDOW_SPECIFICATION",
+        parameters = Map(
+          "windowName" -> "unspecified_window",
+          "docroot" -> SPARK_DOC_ROOT
+        )
+      )
+    }
+  }
+
+  test("SPARK-49659: Unsupported scalar subqueries in VALUES") {
+    checkError(
+      exception = intercept[AnalysisException](
+        sql("SELECT * FROM VALUES ((SELECT 1) + (SELECT 2))")
+      ),
+      condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.SCALAR_SUBQUERY_IN_VALUES",
+      parameters = Map(),
+      context = ExpectedContext(
+        fragment = "VALUES ((SELECT 1) + (SELECT 2))",
+        start = 14,
+        stop = 45
+      )
+    )
+  }
+
+  test("SPARK-49743: OptimizeCsvJsonExpr does not change schema when pruning struct") {
+    val df = sql("""
+        | SELECT
+        |    from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').a,
+        |    from_json('[{"a": '||id||', "b": '|| (2*id) ||'}]', 'array<struct<a: INT, b: INT>>').A
+        | FROM
+        |    range(3) as t
+        |""".stripMargin)
+    val expectedAnswer = Seq(
+      Row(Array(0), Array(0)), Row(Array(1), Array(1)), Row(Array(2), Array(2)))
+    checkAnswer(df, expectedAnswer)
+  }
 }
 
 case class Foo(bar: Option[String])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
index 38e004e0b7209..7daf2c6b1b58b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestHelper.scala
@@ -114,7 +114,7 @@ trait SQLQueryTestHelper extends Logging {
          | _: DescribeColumnCommand
          | _: DescribeRelation
          | _: DescribeColumn => true
-    case PhysicalOperation(_, _, Sort(_, true, _)) => true
+    case PhysicalOperation(_, _, Sort(_, true, _, _)) => true
     case _ => plan.children.iterator.exists(isSemanticallySorted)
   }
 
@@ -148,7 +148,7 @@ trait SQLQueryTestHelper extends Logging {
     try {
       result
     } catch {
-      case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+      case e: SparkThrowable with Throwable if e.getCondition != null =>
         (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
       case a: AnalysisException =>
         // Do not output the logical plan tree which contains expression IDs.
@@ -160,7 +160,7 @@ trait SQLQueryTestHelper extends Logging {
         // information of stage, task ID, etc.
         // To make result matching simpler, here we match the cause of the exception if it exists.
         s.getCause match {
-          case e: SparkThrowable with Throwable if e.getErrorClass != null =>
+          case e: SparkThrowable with Throwable if e.getCondition != null =>
             (emptySchema, Seq(e.getClass.getName, getMessage(e, format)))
           case cause =>
             (emptySchema, Seq(cause.getClass.getName, cause.getMessage))
@@ -185,8 +185,8 @@ trait SQLQueryTestHelper extends Logging {
    */
   protected trait PgSQLTest
 
-  /** Trait that indicates ANSI-related tests with the ANSI mode enabled. */
-  protected trait AnsiTest
+  /** Trait that indicates Non-ANSI-related tests with the ANSI mode disabled. */
+  protected trait NonAnsiTest
 
   /** Trait that indicates an analyzer test that shows the analyzed plan string as output. */
   protected trait AnalyzerTest extends TestCase {
@@ -214,10 +214,10 @@ trait SQLQueryTestHelper extends Logging {
   }
 
   /** An ANSI-related test case. */
-  protected case class AnsiTestCase(
-      name: String, inputFile: String, resultFile: String) extends TestCase with AnsiTest {
+  protected case class NonAnsiTestCase(
+      name: String, inputFile: String, resultFile: String) extends TestCase with NonAnsiTest {
     override def asAnalyzerTest(newName: String, newResultFile: String): TestCase =
-      AnsiAnalyzerTestCase(newName, inputFile, newResultFile)
+      NonAnsiAnalyzerTestCase(newName, inputFile, newResultFile)
   }
 
   /** An analyzer test that shows the analyzed plan string as output. */
@@ -290,9 +290,9 @@ trait SQLQueryTestHelper extends Logging {
   protected case class RegularAnalyzerTestCase(
       name: String, inputFile: String, resultFile: String)
     extends AnalyzerTest
-  protected case class AnsiAnalyzerTestCase(
+  protected case class NonAnsiAnalyzerTestCase(
       name: String, inputFile: String, resultFile: String)
-    extends AnalyzerTest with AnsiTest
+    extends AnalyzerTest with NonAnsiTest
   protected case class PgSQLAnalyzerTestCase(
       name: String, inputFile: String, resultFile: String)
     extends AnalyzerTest with PgSQLTest
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index b031f45ddbf34..5c56377f21c20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -306,13 +306,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         localSparkSession.udf.register("vol", (s: String) => s)
         localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
         localSparkSession.conf.set(SQLConf.LEGACY_INTERVAL_ENABLED.key, true)
-      case _: SQLQueryTestSuite#AnsiTest =>
-        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
+      case _: SQLQueryTestSuite#NonAnsiTest =>
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, false)
       case _: SQLQueryTestSuite#TimestampNTZTest =>
         localSparkSession.conf.set(SQLConf.TIMESTAMP_TYPE.key,
           TimestampTypes.TIMESTAMP_NTZ.toString)
       case _ =>
-        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, false)
+        localSparkSession.conf.set(SQLConf.ANSI_ENABLED.key, true)
     }
 
     if (sparkConfigSet.nonEmpty) {
@@ -445,8 +445,8 @@ class SQLQueryTestSuite extends QueryTest with SharedSparkSession with SQLHelper
         }
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
-        AnsiTestCase(testCaseName, absPath, resultFile) :: Nil
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}nonansi")) {
+        NonAnsiTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}timestampNTZ")) {
         TimestampNTZTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}cte.sql")) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
index 01b9fdec9be3d..76919d6583106 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/ScalaReflectionRelationSuite.scala
@@ -163,9 +163,9 @@ class ScalaReflectionRelationSuite extends SparkFunSuite with SharedSparkSession
       exception = intercept[SparkUnsupportedOperationException] {
         Seq(InvalidInJava(1)).toDS()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2140",
+      condition = "INVALID_JAVA_IDENTIFIER_AS_FIELD_NAME",
       parameters = Map(
-        "fieldName" -> "abstract",
+        "fieldName" -> "`abstract`",
         "walkedTypePath" -> "- root class: \"org.apache.spark.sql.InvalidInJava\""))
   }
 
@@ -174,9 +174,9 @@ class ScalaReflectionRelationSuite extends SparkFunSuite with SharedSparkSession
       exception = intercept[SparkUnsupportedOperationException] {
         Seq(InvalidInJava2(1)).toDS()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2140",
+      condition = "INVALID_JAVA_IDENTIFIER_AS_FIELD_NAME",
       parameters = Map(
-        "fieldName" -> "0",
+        "fieldName" -> "`0`",
         "walkedTypePath" ->
           "- root class: \"org.apache.spark.sql.ScalaReflectionRelationSuite.InvalidInJava2\""))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
index a8b359f308a2b..f4ea87b39c39b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SetCommandSuite.scala
@@ -139,7 +139,7 @@ class SetCommandSuite extends QueryTest with SharedSparkSession with ResetSystem
     withSQLConf(key1 -> value1) {
       checkError(
         intercept[ParseException](sql("SET ${test.password}")),
-        errorClass = "INVALID_SET_SYNTAX"
+        condition = "INVALID_SET_SYNTAX"
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
new file mode 100644
index 0000000000000..c4fd16ca5ce59
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderImplementationBindingSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql
+
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * Make sure the api.SparkSessionBuilder binds to Classic implementation.
+ */
+class SparkSessionBuilderImplementationBindingSuite
+  extends SharedSparkSession
+  with api.SparkSessionBuilderImplementationBindingSuite
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
index 4ac05373e5a34..a30b13df74ae8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionBuilderSuite.scala
@@ -201,10 +201,10 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       .getOrCreate()
 
     assert(session.conf.get("spark.app.name") === "test-app-SPARK-31234")
-    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31234")
     session.sql("RESET")
     assert(session.conf.get("spark.app.name") === "test-app-SPARK-31234")
-    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31234")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31234")
   }
 
   test("SPARK-31354: SparkContext only register one SparkSession ApplicationEnd listener") {
@@ -244,8 +244,8 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       .builder()
       .config(GLOBAL_TEMP_DATABASE.key, "globalTempDB-SPARK-31532-1")
       .getOrCreate()
-    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532")
-    assert(session1.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31532")
+    assert(session1.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31532")
 
     // do not propagate static sql configs to the existing default session
     SparkSession.clearActiveSession()
@@ -255,9 +255,9 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       .config(GLOBAL_TEMP_DATABASE.key, value = "globalTempDB-SPARK-31532-2")
       .getOrCreate()
 
-    assert(!session.conf.get(WAREHOUSE_PATH).contains("SPARK-31532-db"))
-    assert(session.conf.get(WAREHOUSE_PATH) === session2.conf.get(WAREHOUSE_PATH))
-    assert(session2.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532")
+    assert(!session.conf.get(WAREHOUSE_PATH.key).contains("SPARK-31532-db"))
+    assert(session.conf.get(WAREHOUSE_PATH.key) === session2.conf.get(WAREHOUSE_PATH.key))
+    assert(session2.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31532")
   }
 
   test("SPARK-31532: propagate static sql configs if no existing SparkSession") {
@@ -275,8 +275,8 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       .config(WAREHOUSE_PATH.key, "SPARK-31532-db-2")
       .getOrCreate()
     assert(session.conf.get("spark.app.name") === "test-app-SPARK-31532-2")
-    assert(session.conf.get(GLOBAL_TEMP_DATABASE) === "globaltempdb-spark-31532-2")
-    assert(session.conf.get(WAREHOUSE_PATH) contains "SPARK-31532-db-2")
+    assert(session.conf.get(GLOBAL_TEMP_DATABASE.key) === "globalTempDB-SPARK-31532-2")
+    assert(session.conf.get(WAREHOUSE_PATH.key) contains "SPARK-31532-db-2")
   }
 
   test("SPARK-32062: reset listenerRegistered in SparkSession") {
@@ -461,7 +461,7 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       val expected = path.getFileSystem(hadoopConf).makeQualified(path).toString
       // session related configs
       assert(hadoopConf.get("hive.metastore.warehouse.dir") === expected)
-      assert(session.conf.get(WAREHOUSE_PATH) === expected)
+      assert(session.conf.get(WAREHOUSE_PATH.key) === expected)
       assert(session.sessionState.conf.warehousePath === expected)
 
       // shared configs
@@ -589,4 +589,13 @@ class SparkSessionBuilderSuite extends SparkFunSuite with Eventually {
       assert(session.conf.get(e._1) == e._2.toString)
     }
   }
+
+  test("SPARK-50222: Support spark.submit.appName") {
+    val session = SparkSession.builder()
+      .master("local")
+      .appName("appName")
+      .config("spark.submit.appName", "newAppName")
+      .getOrCreate()
+    assert(session.sparkContext.appName === "newAppName")
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
index 1f0033a0efcdc..90cca58b3bd29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionExtensionSuite.scala
@@ -29,10 +29,10 @@ import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow, TableIden
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
-import org.apache.spark.sql.catalyst.expressions.aggregate.{Final, Partial}
-import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, CompoundBody, ParserInterface}
-import org.apache.spark.sql.catalyst.plans.SQLHelper
-import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Limit, LocalRelation, LogicalPlan, Statistics, UnresolvedHint}
+import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Final, Max, Partial}
+import org.apache.spark.sql.catalyst.parser.{CatalystSqlParser, ParserInterface}
+import org.apache.spark.sql.catalyst.plans.{PlanTest, SQLHelper}
+import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AggregateHint, ColumnStat, Limit, LocalRelation, LogicalPlan, Sort, SortHint, Statistics, UnresolvedHint}
 import org.apache.spark.sql.catalyst.plans.physical.{Partitioning, SinglePartition}
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.trees.TreeNodeTag
@@ -43,6 +43,7 @@ import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.datasources.{FileFormat, WriteFilesExec, WriteFilesExecBase, WriteFilesSpec}
 import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, BroadcastExchangeLike, ShuffleExchangeExec, ShuffleExchangeLike, ShuffleOrigin}
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.COLUMN_BATCH_SIZE
 import org.apache.spark.sql.internal.StaticSQLConf.SPARK_SESSION_EXTENSIONS
@@ -53,7 +54,8 @@ import org.apache.spark.unsafe.types.UTF8String
 /**
  * Test cases for the [[SparkSessionExtensions]].
  */
-class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with AdaptiveSparkPlanHelper {
+class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with AdaptiveSparkPlanHelper
+  with PlanTest {
   private def create(
       builder: SparkSessionExtensionsProvider): Seq[SparkSessionExtensionsProvider] = Seq(builder)
 
@@ -159,7 +161,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
   }
 
   test("inject custom hint rule") {
-    withSession(Seq(_.injectPostHocResolutionRule(MyHintRule))) { session =>
+    withSession(Seq(_.injectHintResolutionRule(MyHintRule))) { session =>
       assert(
         session.range(1).hint("CONVERT_TO_EMPTY").logicalPlan.isInstanceOf[LocalRelation],
         "plan is expected to be a local relation"
@@ -178,12 +180,12 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
         MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule()))
     }
     withSession(extensions) { session =>
-      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, true)
       assert(session.sessionState.adaptiveRulesHolder.queryStagePrepRules
         .contains(MyQueryStagePrepRule()))
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(MyNewQueryStageRule(), MyNewQueryStageRule())))
-      import session.sqlContext.implicits._
+      import session.implicits._
       val data = Seq((100L), (200L), (300L)).toDF("vals").repartition(1)
       val df = data.selectExpr("vals + 1")
       df.collect()
@@ -221,11 +223,11 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
     }
     withSession(extensions) { session =>
-      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, true)
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, true)
       session.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
-      import session.sqlContext.implicits._
+      import session.implicits._
       // perform a join to inject a shuffle exchange
       val left = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("l1", "l2")
       val right = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("r1", "r2")
@@ -280,10 +282,10 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule()))
     }
     withSession(extensions) { session =>
-      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED, enableAQE)
+      session.conf.set(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key, enableAQE)
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
-      import session.sqlContext.implicits._
+      import session.implicits._
       // perform a join to inject a broadcast exchange
       val left = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("l1", "l2")
       val right = Seq((1, 50L), (2, 100L), (3, 150L)).toDF("r1", "r2")
@@ -327,7 +329,7 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
     try {
       assert(session.sessionState.columnarRules.contains(
         MyColumnarRule(PreRuleReplaceAddWithBrokenVersion(), MyPostRule())))
-      import session.sqlContext.implicits._
+      import session.implicits._
 
       val input = Seq((100L), (200L), (300L))
       val data = input.toDF("vals").repartition(1)
@@ -543,6 +545,31 @@ class SparkSessionExtensionSuite extends SparkFunSuite with SQLHelper with Adapt
       }
     }
   }
+
+  test("custom aggregate hint") {
+    // The custom hint allows us to replace the aggregate (without grouping keys) with just
+    // Literal.
+    withSession(Seq(_.injectHintResolutionRule(CustomerAggregateHintResolutionRule),
+      _.injectOptimizerRule(CustomAggregateRule))) { session =>
+      val res = session.range(10).agg(max("id")).as("max_id")
+        .hint("MAX_VALUE", "id", 10)
+        .queryExecution.optimizedPlan
+      assert(res.isInstanceOf[Aggregate])
+      val expectedAlias = Alias(Literal(10L), "max(id)")()
+      compareExpressions(expectedAlias, res.asInstanceOf[Aggregate].aggregateExpressions.head)
+    }
+  }
+
+  test("custom sort hint") {
+    // The custom hint allows us to replace the sort with its input
+    withSession(Seq(_.injectHintResolutionRule(CustomerSortHintResolutionRule),
+      _.injectOptimizerRule(CustomSortRule))) { session =>
+      val res = session.range(10).sort("id")
+        .hint("INPUT_SORTED")
+        .queryExecution.optimizedPlan
+      assert(res.collect {case s: Sort => s}.isEmpty)
+    }
+  }
 }
 
 case class MyRule(spark: SparkSession) extends Rule[LogicalPlan] {
@@ -581,9 +608,6 @@ case class MyParser(spark: SparkSession, delegate: ParserInterface) extends Pars
 
   override def parseQuery(sqlText: String): LogicalPlan =
     delegate.parseQuery(sqlText)
-
-  override def parseScript(sqlScriptText: String): CompoundBody =
-    delegate.parseScript(sqlScriptText)
 }
 
 object MyExtensions {
@@ -1006,7 +1030,7 @@ case class MyShuffleExchangeExec(delegate: ShuffleExchangeExec) extends ShuffleE
     delegate.shuffleOrigin
   }
   override def mapOutputStatisticsFuture: Future[MapOutputStatistics] =
-    delegate.submitShuffleJob
+    delegate.submitShuffleJob()
   override def getShuffleRDD(partitionSpecs: Array[ShufflePartitionSpec]): RDD[_] =
     delegate.getShuffleRDD(partitionSpecs)
   override def runtimeStatistics: Statistics = {
@@ -1032,7 +1056,7 @@ case class MyBroadcastExchangeExec(delegate: BroadcastExchangeExec) extends Broa
   override val runId: UUID = delegate.runId
   override def relationFuture: java.util.concurrent.Future[Broadcast[Any]] =
     delegate.relationFuture
-  override def completionFuture: Future[Broadcast[Any]] = delegate.submitBroadcastJob
+  override def completionFuture: Future[Broadcast[Any]] = delegate.submitBroadcastJob()
   override def runtimeStatistics: Statistics = delegate.runtimeStatistics
   override def child: SparkPlan = delegate.child
   override protected def doPrepare(): Unit = delegate.prepare()
@@ -1231,3 +1255,82 @@ object MyQueryPostPlannerStrategyRule extends Rule[SparkPlan] {
     }
   }
 }
+
+
+// Example of an Aggregate hint that tells that 'attribute' values are no larger than 'max'.
+// We will use them to rewrite MAX(attribute) with 'max' constant.
+case class CustomAggHint(attribute: AttributeReference, max: Int) extends AggregateHint
+
+// Attaches the CustomAggHint to the aggregate node without grouping keys if the aggregate
+// function is MAX over the specified column.
+case class CustomerAggregateHintResolutionRule(spark: SparkSession) extends Rule[LogicalPlan] {
+  val MY_HINT_NAME = Set("MAX_VALUE")
+
+  def isMax(expr: NamedExpression, attribute: String): Option[AttributeReference] = {
+    expr match {
+      case Alias(AggregateExpression(Max(a @ AttributeReference(name, _, _, _)), _, _, _, _), _)
+        if name.equalsIgnoreCase(attribute) =>
+        Some(a)
+      case _ => None
+    }
+  }
+
+  private def applyMaxValueHint(
+      plan: LogicalPlan,
+      attribute: String,
+      max: Int): LogicalPlan = {
+    val newPlan = plan match {
+      case a @ Aggregate(keys, aggs, _, None) if keys.isEmpty && aggs.size == 1 =>
+        isMax(aggs.head, attribute) match {
+          case Some(attr) => a.copy(hint = Some(CustomAggHint(attr, max)))
+          case None => a
+        }
+      case _ => plan
+    }
+    newPlan.mapChildren { child =>
+      applyMaxValueHint(child, attribute, max)
+    }
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    case h: UnresolvedHint if MY_HINT_NAME.contains(h.name.toUpperCase(Locale.ROOT)) =>
+      applyMaxValueHint(h.child, "id", 10)
+  }
+}
+
+// Logical rule that replaces the MAX aggregation function (in Aggregates with CustomAggHint)
+// with just the constant from the hint.
+case class CustomAggregateRule(spark: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = {
+    plan transformDown {
+      case a @ Aggregate(groupingKeys, aggregates, _, Some(CustomAggHint(_, max)))
+        if groupingKeys.isEmpty && aggregates.size == 1 =>
+        a.copy(aggregateExpressions = Seq(Alias(Cast(Literal(max), aggregates.head.dataType),
+          aggregates.head.name)()), hint = None)
+    }
+  }
+}
+
+// Example of a Sort hint that tells that the input is already sorted,
+// and the rule that removes all Sort nodes based on such hint.
+case class CustomSortHint(inputSorted: Boolean) extends SortHint
+
+// Attaches the CustomSortHint to the sort node.
+case class CustomerSortHintResolutionRule(spark: SparkSession) extends Rule[LogicalPlan] {
+  val MY_HINT_NAME = Set("INPUT_SORTED")
+
+  private def applySortHint(plan: LogicalPlan): LogicalPlan = plan.transformDown {
+    case s @ Sort(_, _, _, None) => s.copy(hint = Some(CustomSortHint(true)))
+  }
+
+  override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperators {
+    case h: UnresolvedHint if MY_HINT_NAME.contains(h.name.toUpperCase(Locale.ROOT)) =>
+      applySortHint(h.child)
+  }
+}
+
+case class CustomSortRule(spark: SparkSession) extends Rule[LogicalPlan] {
+  override def apply(plan: LogicalPlan): LogicalPlan = plan transformDown {
+    case s @ Sort(_, _, _, Some(CustomSortHint(true))) => s.child
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionJobTaggingAndCancellationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionJobTaggingAndCancellationSuite.scala
new file mode 100644
index 0000000000000..89500fe51f3ac
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SparkSessionJobTaggingAndCancellationSuite.scala
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.util.concurrent.{ConcurrentHashMap, Executors, Semaphore, TimeUnit}
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.concurrent.{ExecutionContext, Future}
+import scala.jdk.CollectionConverters._
+
+import org.scalatest.concurrent.Eventually
+import org.scalatest.time.SpanSugar._
+
+import org.apache.spark.{LocalSparkContext, SparkContext, SparkException, SparkFunSuite}
+import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd, SparkListenerJobStart}
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.tags.ExtendedSQLTest
+import org.apache.spark.util.ThreadUtils
+
+/**
+ * Test cases for the tagging and cancellation APIs provided by [[SparkSession]].
+ */
+@ExtendedSQLTest
+class SparkSessionJobTaggingAndCancellationSuite
+  extends SparkFunSuite
+  with Eventually
+  with LocalSparkContext {
+
+  override def afterEach(): Unit = {
+    try {
+      // This suite should not interfere with the other test suites.
+      SparkSession.getActiveSession.foreach(_.stop())
+      SparkSession.clearActiveSession()
+      SparkSession.getDefaultSession.foreach(_.stop())
+      SparkSession.clearDefaultSession()
+      resetSparkContext()
+    } finally {
+      super.afterEach()
+    }
+  }
+
+  test("Tags are not inherited by new sessions") {
+    val session = SparkSession.builder().master("local").getOrCreate()
+
+    assert(session.getTags() == Set())
+    session.addTag("one")
+    assert(session.getTags() == Set("one"))
+
+    val newSession = session.newSession()
+    assert(newSession.getTags() == Set())
+  }
+
+  test("Tags are inherited by cloned sessions") {
+    val session = SparkSession.builder().master("local").getOrCreate()
+
+    assert(session.getTags() == Set())
+    session.addTag("one")
+    assert(session.getTags() == Set("one"))
+
+    val clonedSession = session.cloneSession()
+    assert(clonedSession.getTags() == Set("one"))
+    clonedSession.addTag("two")
+    assert(clonedSession.getTags() == Set("one", "two"))
+
+    // Tags are not propagated back to the original session
+    assert(session.getTags() == Set("one"))
+  }
+
+  test("Tags set from session are prefixed with session UUID") {
+    sc = new SparkContext("local[2]", "test")
+    val session = SparkSession.builder().sparkContext(sc).getOrCreate()
+    import session.implicits._
+
+    val sem = new Semaphore(0)
+    sc.addSparkListener(new SparkListener {
+      override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+        sem.release()
+      }
+    })
+
+    session.addTag("one")
+    Future {
+      session.range(1, 10000).map { i => Thread.sleep(100); i }.count()
+    }(ExecutionContext.global)
+
+    assert(sem.tryAcquire(1, 1, TimeUnit.MINUTES))
+    val activeJobsFuture =
+      session.sparkContext.cancelJobsWithTagWithFuture(
+        session.managedJobTags.get()("one"), "reason")
+    val activeJob = ThreadUtils.awaitResult(activeJobsFuture, 60.seconds).head
+    val actualTags = activeJob.properties.getProperty(SparkContext.SPARK_JOB_TAGS)
+      .split(SparkContext.SPARK_JOB_TAGS_SEP)
+    assert(actualTags.toSet == Set(
+      session.sessionJobTag,
+      s"${session.sessionJobTag}-thread-${session.threadUuid.get()}-one",
+      SQLExecution.executionIdJobTag(
+        session,
+        activeJob.properties.get(SQLExecution.EXECUTION_ROOT_ID_KEY).asInstanceOf[String].toLong)))
+  }
+
+  // TODO(SPARK-50205): Re-enable this test case.
+  ignore("Cancellation APIs in SparkSession are isolated") {
+    sc = new SparkContext("local[2]", "test")
+    val globalSession = SparkSession.builder().sparkContext(sc).getOrCreate()
+    var (sessionA, sessionB, sessionC): (SparkSession, SparkSession, SparkSession) =
+      (null, null, null)
+    var (threadUuidA, threadUuidB, threadUuidC): (String, String, String) = (null, null, null)
+
+    // global ExecutionContext has only 2 threads in Apache Spark CI
+    // create own thread pool for four Futures used in this test
+    val threadPool = Executors.newFixedThreadPool(3)
+    implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(threadPool)
+
+    try {
+      // Add a listener to release the semaphore once jobs are launched.
+      val sem = new Semaphore(0)
+      val jobEnded = new AtomicInteger(0)
+      val jobProperties: ConcurrentHashMap[Int, java.util.Properties] = new ConcurrentHashMap()
+
+      sc.addSparkListener(new SparkListener {
+        override def onJobStart(jobStart: SparkListenerJobStart): Unit = {
+          jobProperties.put(jobStart.jobId, jobStart.properties)
+          sem.release()
+        }
+
+        override def onJobEnd(jobEnd: SparkListenerJobEnd): Unit = {
+          sem.release()
+          jobEnded.incrementAndGet()
+        }
+      })
+
+      var realTagOneForSessionA: String = null
+      var childThread: Thread = null
+      val childThreadLock = new Semaphore(0)
+
+      // Note: since tags are added in the Future threads, they don't need to be cleared in between.
+      val jobA = Future {
+        sessionA = globalSession.cloneSession()
+        import globalSession.implicits._
+
+        threadUuidA = sessionA.threadUuid.get()
+        assert(sessionA.getTags() == Set())
+        sessionA.addTag("two")
+        assert(sessionA.getTags() == Set("two"))
+        sessionA.clearTags() // check that clearing all tags works
+        assert(sessionA.getTags() == Set())
+        sessionA.addTag("one")
+        realTagOneForSessionA = sessionA.managedJobTags.get()("one")
+        assert(realTagOneForSessionA ==
+          s"${sessionA.sessionJobTag}-thread-${sessionA.threadUuid.get()}-one")
+        assert(sessionA.getTags() == Set("one"))
+
+        // Create a child thread which inherits thread-local variables and tries to interrupt
+        // the job started from the parent thread. The child thread is blocked until the main
+        // thread releases the lock.
+        childThread = new Thread {
+          override def run(): Unit = {
+            assert(childThreadLock.tryAcquire(1, 20, TimeUnit.SECONDS))
+            assert(sessionA.getTags() == Set("one"))
+            assert(sessionA.interruptTag("one").size == 1)
+          }
+        }
+        childThread.start()
+        try {
+          sessionA.range(1, 10000).map { i => Thread.sleep(100); i }.count()
+        } finally {
+          childThread.interrupt()
+          sessionA.clearTags() // clear for the case of thread reuse by another Future
+        }
+      }
+      val jobB = Future {
+        sessionB = globalSession.cloneSession()
+        import globalSession.implicits._
+
+        threadUuidB = sessionB.threadUuid.get()
+        assert(sessionB.getTags() == Set())
+        sessionB.addTag("one")
+        sessionB.addTag("two")
+        sessionB.addTag("one")
+        sessionB.addTag("two") // duplicates shouldn't matter
+        assert(sessionB.getTags() == Set("one", "two"))
+        try {
+          sessionB.range(1, 10000, 2).map { i => Thread.sleep(100); i }.count()
+        } finally {
+          sessionB.clearTags() // clear for the case of thread reuse by another Future
+        }
+      }
+      val jobC = Future {
+        sessionC = globalSession.cloneSession()
+        import globalSession.implicits._
+
+        threadUuidC = sessionC.threadUuid.get()
+        sessionC.addTag("foo")
+        sessionC.removeTag("foo")
+        assert(sessionC.getTags() == Set()) // check that remove works removing the last tag
+        sessionC.addTag("boo")
+        try {
+          sessionC.range(1, 10000, 2).map { i => Thread.sleep(100); i }.count()
+        } finally {
+          sessionC.clearTags() // clear for the case of thread reuse by another Future
+        }
+      }
+
+      // Block until four jobs have started.
+      assert(sem.tryAcquire(3, 1, TimeUnit.MINUTES))
+
+      // Tags are applied
+      def realUserTag(s: String, t: String, ta: String): String = s"spark-session-$s-thread-$t-$ta"
+      assert(jobProperties.size == 3)
+      for (ss <- Seq(sessionA, sessionB, sessionC)) {
+        val jobProperty = jobProperties.values().asScala.filter(_.get(SparkContext.SPARK_JOB_TAGS)
+          .asInstanceOf[String].contains(ss.sessionUUID))
+        assert(jobProperty.size == 1)
+        val tags = jobProperty.head.get(SparkContext.SPARK_JOB_TAGS).asInstanceOf[String]
+          .split(SparkContext.SPARK_JOB_TAGS_SEP)
+
+        val executionRootIdTag = SQLExecution.executionIdJobTag(
+          ss,
+          jobProperty.head.get(SQLExecution.EXECUTION_ROOT_ID_KEY).asInstanceOf[String].toLong)
+
+        ss match {
+          case s if s == sessionA => assert(tags.toSet == Set(
+            s.sessionJobTag, executionRootIdTag, realUserTag(s.sessionUUID, threadUuidA, "one")))
+          case s if s == sessionB => assert(tags.toSet == Set(
+            s.sessionJobTag,
+            executionRootIdTag,
+            realUserTag(s.sessionUUID, threadUuidB, "one"),
+            realUserTag(s.sessionUUID, threadUuidB, "two")))
+          case s if s == sessionC => assert(tags.toSet == Set(
+            s.sessionJobTag, executionRootIdTag, realUserTag(s.sessionUUID, threadUuidC, "boo")))
+        }
+      }
+
+      // Global session cancels nothing
+      assert(globalSession.interruptAll().isEmpty)
+      assert(globalSession.interruptTag("one").isEmpty)
+      assert(globalSession.interruptTag("two").isEmpty)
+      for (i <- SQLExecution.executionIdToQueryExecution.keys().asScala) {
+        assert(globalSession.interruptOperation(i.toString).isEmpty)
+      }
+      assert(jobEnded.intValue == 0)
+
+      // One job cancelled
+      for (i <- SQLExecution.executionIdToQueryExecution.keys().asScala) {
+        sessionC.interruptOperation(i.toString)
+      }
+      val eC = intercept[SparkException] {
+        ThreadUtils.awaitResult(jobC, 1.minute)
+      }.getCause
+      assert(eC.getMessage contains "cancelled")
+      assert(sem.tryAcquire(1, 1, TimeUnit.MINUTES))
+      assert(jobEnded.intValue == 1)
+
+      // Another job cancelled. The next line cancels nothing because we're now in another thread.
+      // The real cancel is done through unblocking a child thread, which is waiting for a lock
+      assert(sessionA.interruptTag("one").isEmpty)
+      childThreadLock.release()
+      val eA = intercept[SparkException] {
+        ThreadUtils.awaitResult(jobA, 1.minute)
+      }.getCause
+      assert(eA.getMessage contains "cancelled job tags one")
+      assert(sem.tryAcquire(1, 1, TimeUnit.MINUTES))
+      assert(jobEnded.intValue == 2)
+
+      // The last job cancelled
+      sessionB.interruptAll()
+      val eB = intercept[SparkException] {
+        ThreadUtils.awaitResult(jobB, 1.minute)
+      }.getCause
+      assert(eB.getMessage contains "cancellation of all jobs")
+      assert(sem.tryAcquire(1, 1, TimeUnit.MINUTES))
+      assert(jobEnded.intValue == 3)
+    } finally {
+      threadPool.shutdownNow()
+    }
+  }
+
+  test("Tags are isolated in multithreaded environment") {
+    // Custom thread pool for multi-threaded testing
+    val threadPool = Executors.newFixedThreadPool(2)
+    implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(threadPool)
+
+    val session = SparkSession.builder().master("local").getOrCreate()
+    @volatile var output1: Set[String] = null
+    @volatile var output2: Set[String] = null
+
+    def tag1(): Unit = {
+      session.addTag("tag1")
+      output1 = session.getTags()
+    }
+
+    def tag2(): Unit = {
+      session.addTag("tag2")
+      output2 = session.getTags()
+    }
+
+    try {
+      // Run tasks in separate threads
+      val future1 = Future {
+        tag1()
+      }
+      val future2 = Future {
+        tag2()
+      }
+
+      // Wait for threads to complete
+      ThreadUtils.awaitResult(Future.sequence(Seq(future1, future2)), 1.minute)
+
+      // Assert outputs
+      assert(output1 != null)
+      assert(output1 == Set("tag1"))
+      assert(output2 != null)
+      assert(output2 == Set("tag2"))
+    } finally {
+      threadPool.shutdownNow()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
index 919958d304f10..8d7ada15381bf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -76,14 +76,14 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
           exception = intercept[AnalysisException] {
             sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
           },
-          errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
+          condition = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
           parameters = Map.empty
         )
         checkError(
           exception = intercept[AnalysisException] {
             sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
           },
-          errorClass = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
+          condition = "UNSUPPORTED_FEATURE.ANALYZE_VIEW",
           parameters = Map.empty
         )
       }
@@ -136,7 +136,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS data")
         },
-        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
+        condition = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
         parameters = Map(
           "columnType" -> "\"ARRAY<INT>\"",
           "columnName" -> "`data`",
@@ -149,7 +149,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS some_random_column")
         },
-        errorClass = "COLUMN_NOT_FOUND",
+        condition = "COLUMN_NOT_FOUND",
         parameters = Map(
           "colName" -> "`some_random_column`",
           "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
@@ -630,7 +630,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         exception = intercept[AnalysisException] {
           sql("ANALYZE TABLE tempView COMPUTE STATISTICS FOR COLUMNS id")
         },
-        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        condition = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
         parameters = Map("viewName" -> "`tempView`")
       )
 
@@ -656,7 +656,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $globalTempDB.gTempView COMPUTE STATISTICS FOR COLUMNS id")
         },
-        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        condition = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
         parameters = Map("viewName" -> "`global_temp`.`gTempView`")
       )
 
@@ -678,6 +678,21 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
     }
   }
 
+  test("analyze stats for collated strings") {
+    val tableName = "collated_strings"
+    Seq[String]("sr_CI").foreach { collation =>
+      withTable(tableName) {
+        sql(s"CREATE TABLE $tableName (c STRING COLLATE $collation) USING PARQUET")
+        sql(s"INSERT INTO $tableName VALUES ('a'), ('A')")
+        sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS c")
+
+        val table = getCatalogTable(tableName)
+        assert(table.stats.get.colStats("c") ==
+          CatalogColumnStat(Some(1), None, None, Some(0), Some(1), Some(1)))
+      }
+    }
+  }
+
   test("analyzes table statistics in cached catalog view") {
     def getTableStats(tableName: String): Statistics = {
       spark.table(tableName).queryExecution.optimizedPlan.stats
@@ -775,7 +790,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
             exception = intercept[AnalysisException] {
               sql(s"ANALYZE TABLE $table COMPUTE STATISTICS FOR COLUMNS value, name, $dupCol")
             },
-            errorClass = "COLUMN_ALREADY_EXISTS",
+            condition = "COLUMN_ALREADY_EXISTS",
             parameters = Map("columnName" -> "`value`"))
         }
       }
@@ -849,7 +864,7 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       sql(s"ANALYZE TABLES IN db_not_exists COMPUTE STATISTICS")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`spark_catalog`.`db_not_exists`"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
index 87eb35ee3e506..74329ac0e0d23 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionTestBase.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.expressions.AttributeMap
 import org.apache.spark.sql.catalyst.plans.logical.{ColumnStat, Histogram, HistogramBin, HistogramSerializer, LogicalPlan, Statistics}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
 import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.datasources.LogicalRelation
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.SQLTestUtils
@@ -269,7 +270,8 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
 
   def getTableFromCatalogCache(tableName: String): LogicalPlan = {
     val catalog = spark.sessionState.catalog
-    val qualifiedTableName = QualifiedTableName(catalog.getCurrentDatabase, tableName)
+    val qualifiedTableName = QualifiedTableName(
+      CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, tableName)
     catalog.getCachedTable(qualifiedTableName)
   }
 
@@ -364,7 +366,7 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
       val stats = spark.table("ds_tbl").queryExecution.optimizedPlan.stats
       assert(stats.sizeInBytes > 0, "non-empty partitioned table should not report zero size.")
 
-      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
+      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION.key) == "hive") {
         sql("CREATE TABLE hive_tbl(i int) PARTITIONED BY (j int)")
         sql("INSERT INTO hive_tbl PARTITION(j=1) SELECT 1")
         val stats2 = spark.table("hive_tbl").queryExecution.optimizedPlan.stats
@@ -379,7 +381,7 @@ abstract class StatisticsCollectionTestBase extends QueryTest with SQLTestUtils
       // Test data source table
       checkStatsConversion(tableName = "ds_tbl", isDatasourceTable = true)
       // Test hive serde table
-      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION) == "hive") {
+      if (spark.conf.get(StaticSQLConf.CATALOG_IMPLEMENTATION.key) == "hive") {
         checkStatsConversion(tableName = "hive_tbl", isDatasourceTable = false)
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
index fd2661003a151..2e91d60e4ba04 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql
 
 import org.apache.spark.{SPARK_DOC_ROOT, SparkIllegalArgumentException, SparkRuntimeException}
 import org.apache.spark.sql.catalyst.expressions.Cast._
-import org.apache.spark.sql.execution.FormattedMode
+import org.apache.spark.sql.catalyst.expressions.IsNotNull
+import org.apache.spark.sql.execution.{FilterExec, FormattedMode, WholeStageCodegenExec}
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
@@ -352,6 +353,44 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
     // scalastyle:on
   }
 
+  test("UTF-8 string is valid") {
+    // scalastyle:off
+    checkAnswer(Seq("大千世界").toDF("a").select(is_valid_utf8($"a")), Row(true))
+    checkAnswer(Seq(("abc", null)).toDF("a", "b").select(is_valid_utf8($"b")), Row(null))
+    checkAnswer(Seq(Array[Byte](-1)).toDF("a").select(is_valid_utf8($"a")), Row(false))
+    // scalastyle:on
+  }
+
+  test("UTF-8 string make valid") {
+    // scalastyle:off
+    checkAnswer(Seq("大千世界").toDF("a").select(make_valid_utf8($"a")), Row("大千世界"))
+    checkAnswer(Seq(("abc", null)).toDF("a", "b").select(make_valid_utf8($"b")), Row(null))
+    checkAnswer(Seq(Array[Byte](-1)).toDF("a").select(make_valid_utf8($"a")), Row("\uFFFD"))
+    // scalastyle:on
+  }
+
+  test("UTF-8 string validate") {
+    // scalastyle:off
+    checkAnswer(Seq("大千世界").toDF("a").select(validate_utf8($"a")), Row("大千世界"))
+    checkAnswer(Seq(("abc", null)).toDF("a", "b").select(validate_utf8($"b")), Row(null))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        Seq(Array[Byte](-1)).toDF("a").select(validate_utf8($"a")).collect()
+      },
+      condition = "INVALID_UTF8_STRING",
+      parameters = Map("str" -> "\\xFF")
+    )
+    // scalastyle:on
+  }
+
+  test("UTF-8 string try validate") {
+    // scalastyle:off
+    checkAnswer(Seq("大千世界").toDF("a").select(try_validate_utf8($"a")), Row("大千世界"))
+    checkAnswer(Seq(("abc", null)).toDF("a", "b").select(try_validate_utf8($"b")), Row(null))
+    checkAnswer(Seq(Array[Byte](-1)).toDF("a").select(try_validate_utf8($"a")), Row(null))
+    // scalastyle:on
+  }
+
   test("string translate") {
     val df = Seq(("translate", "")).toDF("a", "b")
     checkAnswer(df.select(translate($"a", "rnlt", "123")), Row("1a2s3ae"))
@@ -424,6 +463,19 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       df.selectExpr("substring_index(a, '.', 2)"),
       Row("www.apache")
     )
+
+    // TODO SPARK-48779 Move E2E SQL tests with column input to collations.sql golden file.
+    val testTable = "test_substring_index"
+    withTable(testTable) {
+      sql(s"CREATE TABLE $testTable (num int) USING parquet")
+      sql(s"INSERT INTO $testTable VALUES (1), (2), (3), (NULL)")
+      Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collation =>
+        withSQLConf(SQLConf.DEFAULT_COLLATION.key -> collation) {
+          val query = s"SELECT num, SUBSTRING_INDEX('a_a_a', '_', num) as sub_str FROM $testTable"
+          checkAnswer(sql(query), Seq(Row(1, "a"), Row(2, "a_a"), Row(3, "a_a_a"), Row(null, null)))
+        }
+      )
+    }
   }
 
   test("string locate function") {
@@ -701,6 +753,34 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       df.select(sentences($"str", $"language", $"country")),
       Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
 
+    checkAnswer(
+      df.selectExpr("sentences(str, language)"),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.select(sentences($"str", $"language")),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.selectExpr("sentences(str)"),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.select(sentences($"str")),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.selectExpr("sentences(str, null, null)"),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.selectExpr("sentences(str, '', null)"),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
+    checkAnswer(
+      df.selectExpr("sentences(str, null)"),
+      Row(Seq(Seq("Hi", "there"), Seq("The", "price", "was"), Seq("But", "not", "now"))))
+
     // Type coercion
     checkAnswer(
       df.selectExpr("sentences(null)", "sentences(10)", "sentences(3.14)"),
@@ -714,7 +794,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("sentences()")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId("sentences"),
         "expectedNum" -> "[1, 2, 3]",
@@ -815,7 +895,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("select regexp_replace(collect_list(1), '1', '2')")
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       sqlState = None,
       parameters = Map(
         "sqlExpr" -> "\"regexp_replace(collect_list(1), 1, 2, 1)\"",
@@ -835,7 +915,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         sql("select regexp_replace('', '[a\\\\d]{0, 2}', 'x')").collect()
       },
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("regexp_replace"),
@@ -846,7 +926,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         sql("select regexp_extract('', '[a\\\\d]{0, 2}', 1)").collect()
       },
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("regexp_extract"),
@@ -857,7 +937,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkRuntimeException] {
         sql("select rlike('', '[a\\\\d]{0, 2}')").collect()
       },
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("rlike"),
@@ -907,7 +987,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df2.select(func(col("input"), col("format"))).collect()
         },
-        errorClass = "NON_FOLDABLE_ARGUMENT",
+        condition = "NON_FOLDABLE_ARGUMENT",
         parameters = Map(
           "funcName" -> s"`$funcName`",
           "paramName" -> "`format`",
@@ -919,7 +999,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           df2.select(func(col("input"), lit("invalid_format"))).collect()
         },
-        errorClass = "INVALID_PARAMETER_VALUE.BINARY_FORMAT",
+        condition = "INVALID_PARAMETER_VALUE.BINARY_FORMAT",
         parameters = Map(
           "parameter" -> "`format`",
           "functionName" -> s"`$funcName`",
@@ -931,7 +1011,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"select $funcName('a', 'b', 'c')")
         },
-        errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+        condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
         parameters = Map(
           "functionName" -> s"`$funcName`",
           "expectedNum" -> "2",
@@ -942,7 +1022,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"select $funcName(x'537061726b2053514c', CAST(NULL AS STRING))")
         },
-        errorClass = "INVALID_PARAMETER_VALUE.NULL",
+        condition = "INVALID_PARAMETER_VALUE.NULL",
         parameters = Map(
           "functionName" -> s"`$funcName`",
           "parameter" -> "`format`"),
@@ -1045,16 +1125,18 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df1.select(like(col("a"), col("b"), lit(618))).collect()
       },
-      errorClass = "INVALID_ESCAPE_CHAR",
-      parameters = Map("sqlExpr" -> "\"618\"")
+      condition = "INVALID_ESCAPE_CHAR",
+      parameters = Map("sqlExpr" -> "\"618\""),
+      context = ExpectedContext("like", getCurrentClassCallSitePattern)
     )
 
     checkError(
       exception = intercept[AnalysisException] {
         df1.select(ilike(col("a"), col("b"), lit(618))).collect()
       },
-      errorClass = "INVALID_ESCAPE_CHAR",
-      parameters = Map("sqlExpr" -> "\"618\"")
+      condition = "INVALID_ESCAPE_CHAR",
+      parameters = Map("sqlExpr" -> "\"618\""),
+      context = ExpectedContext("ilike", getCurrentClassCallSitePattern)
     )
 
     // scalastyle:off
@@ -1063,16 +1145,18 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df1.select(like(col("a"), col("b"), lit("中国"))).collect()
       },
-      errorClass = "INVALID_ESCAPE_CHAR",
-      parameters = Map("sqlExpr" -> "\"中国\"")
+      condition = "INVALID_ESCAPE_CHAR",
+      parameters = Map("sqlExpr" -> "\"中国\""),
+      context = ExpectedContext("like", getCurrentClassCallSitePattern)
     )
 
     checkError(
       exception = intercept[AnalysisException] {
         df1.select(ilike(col("a"), col("b"), lit("中国"))).collect()
       },
-      errorClass = "INVALID_ESCAPE_CHAR",
-      parameters = Map("sqlExpr" -> "\"中国\"")
+      condition = "INVALID_ESCAPE_CHAR",
+      parameters = Map("sqlExpr" -> "\"中国\""),
+      context = ExpectedContext("ilike", getCurrentClassCallSitePattern)
     )
     // scalastyle:on
   }
@@ -1265,7 +1349,7 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
     intercept[SparkRuntimeException](df.queryExecution.optimizedPlan)
     checkError(
       exception = intercept[SparkRuntimeException](df.queryExecution.explainString(FormattedMode)),
-      errorClass = "INVALID_PARAMETER_VALUE.PATTERN",
+      condition = "INVALID_PARAMETER_VALUE.PATTERN",
       parameters = Map(
         "parameter" -> toSQLId("regexp"),
         "functionName" -> toSQLId("regexp_replace"),
@@ -1282,4 +1366,90 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
       e.getCause.getMessage
         .startsWith("URLDecoder: Illegal hex characters in escape (%) pattern - "))
   }
+
+  test("SPARK-49188: concat_ws called on array of arrays of string - invalid cast") {
+    for (enableANSI <- Seq(false, true)) {
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> enableANSI.toString) {
+        val testTable = "invalidCastTestTable"
+        withTable(testTable) {
+          sql(s"create table $testTable(dat array<string>) using parquet")
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"select concat_ws(',', collect_list(dat)) FROM $testTable")
+            },
+            condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+            parameters = Map(
+              "sqlExpr" -> """"concat_ws(,, collect_list(dat))"""",
+              "paramIndex" -> "second",
+              "inputSql" -> """"collect_list(dat)"""",
+              "inputType" -> """"ARRAY<ARRAY<STRING>>"""",
+              "requiredType" -> """("ARRAY<STRING>" or "STRING")"""
+            ),
+            context = ExpectedContext(
+              fragment = """concat_ws(',', collect_list(dat))""",
+              start = 7,
+              stop = 39
+            )
+          )
+        }
+      }
+    }
+  }
+
+  test("RegExpReplace throws the right exception when replace fails on a particular row") {
+    val tableName = "regexpReplaceException"
+    withTable(tableName) {
+      sql(s"CREATE TABLE IF NOT EXISTS $tableName(s STRING)")
+      sql(s"INSERT INTO $tableName VALUES('first last')")
+      Seq("NO_CODEGEN", "CODEGEN_ONLY").foreach { codegenMode =>
+        withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode) {
+          val query = s"SELECT regexp_replace(s, '(?<first>[a-zA-Z]+) (?<last>[a-zA-Z]+)', " +
+            s"'$$3 $$1') FROM $tableName"
+          val df = sql(query)
+          val plan = df.queryExecution.executedPlan
+          assert(plan.isInstanceOf[WholeStageCodegenExec] == (codegenMode == "CODEGEN_ONLY"))
+          val exception = intercept[SparkRuntimeException] {
+            df.collect()
+          }
+          checkError(
+            exception = exception,
+            condition = "INVALID_REGEXP_REPLACE",
+            parameters = Map(
+              "source" -> "first last",
+              "pattern" -> "(?<first>[a-zA-Z]+) (?<last>[a-zA-Z]+)",
+              "replacement" -> "$3 $1",
+              "position" -> "1")
+          )
+          assert(exception.getCause.getMessage.contains("No group 3"))
+        }
+      }
+    }
+  }
+
+  test("SPARK-50224: The replacement of validate utf8 functions should be NullIntolerant") {
+    def check(df: DataFrame, expected: Seq[Row]): Unit = {
+      val filter = df.queryExecution
+        .sparkPlan
+        .find(_.isInstanceOf[FilterExec])
+        .get.asInstanceOf[FilterExec]
+      assert(filter.condition.find(_.isInstanceOf[IsNotNull]).nonEmpty)
+      checkAnswer(df, expected)
+    }
+    withTable("test_table") {
+      sql("CREATE TABLE test_table" +
+        " AS SELECT * FROM VALUES ('abc', 'def'), ('ghi', 'jkl'), ('mno', NULL) T(a, b)")
+      check(
+        sql("SELECT * FROM test_table WHERE is_valid_utf8(b)"),
+        Seq(Row("abc", "def"), Row("ghi", "jkl")))
+      check(
+        sql("SELECT * FROM test_table WHERE make_valid_utf8(b) = 'def'"),
+        Seq(Row("abc", "def")))
+      check(
+        sql("SELECT * FROM test_table WHERE validate_utf8(b) = 'jkl'"),
+        Seq(Row("ghi", "jkl")))
+      check(
+        sql("SELECT * FROM test_table WHERE try_validate_utf8(b) = 'def'"),
+        Seq(Row("abc", "def")))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
index 68f14f13bbd66..9e97c224736d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SubquerySuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql
 
 import scala.collection.mutable.ArrayBuffer
 
+import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.catalyst.expressions.SubqueryExpression
 import org.apache.spark.sql.catalyst.plans.{LeftAnti, LeftSemi}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, Filter, Join, LogicalPlan, Project, Sort, Union}
@@ -527,43 +528,30 @@ class SubquerySuite extends QueryTest
   test("SPARK-18504 extra GROUP BY column in correlated scalar subquery is not permitted") {
     withTempView("v") {
       Seq((1, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
-
-      val exception = intercept[AnalysisException] {
-        sql("select (select sum(-1) from v t2 where t1.c2 = t2.c1 group by t2.c2) sum from v t1")
+      val exception = intercept[SparkRuntimeException] {
+        sql("select (select sum(-1) from v t2 where t1.c2 = t2.c1 group by t2.c2) sum from v t1").
+          collect()
       }
       checkError(
         exception,
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
-          "NON_CORRELATED_COLUMNS_IN_GROUP_BY",
-        parameters = Map("value" -> "c2"),
-        sqlState = None,
-        context = ExpectedContext(
-          fragment = "(select sum(-1) from v t2 where t1.c2 = t2.c1 group by t2.c2)",
-          start = 7, stop = 67)) }
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS"
+      )
+    }
   }
 
   test("non-aggregated correlated scalar subquery") {
-    val exception1 = intercept[AnalysisException] {
-      sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1")
+    val exception1 = intercept[SparkRuntimeException] {
+      sql("select a, (select b from l l2 where l2.a = l1.a) sum_b from l l1").collect()
     }
     checkError(
       exception1,
-      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
-        "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
-      parameters = Map.empty,
-      context = ExpectedContext(
-        fragment = "(select b from l l2 where l2.a = l1.a)", start = 10, stop = 47))
-    val exception2 = intercept[AnalysisException] {
-      sql("select a, (select b from l l2 where l2.a = l1.a group by 1) sum_b from l l1")
-    }
-    checkErrorMatchPVals(
-      exception2,
-      errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
-        "MUST_AGGREGATE_CORRELATED_SCALAR_SUBQUERY",
-      parameters = Map.empty[String, String],
-      sqlState = None,
-      context = ExpectedContext(
-        fragment = "(select b from l l2 where l2.a = l1.a group by 1)", start = 10, stop = 58))
+      condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS"
+    )
+    checkAnswer(
+      sql("select a, (select b from l l2 where l2.a = l1.a group by 1) sum_b from l l1"),
+      Row(1, 2.0) :: Row(1, 2.0) :: Row(2, 1.0) :: Row(2, 1.0) :: Row(3, 3.0) ::
+        Row(null, null) :: Row(null, null) :: Row(6, null) :: Nil
+    )
   }
 
   test("non-equal correlated scalar subquery") {
@@ -850,7 +838,7 @@ class SubquerySuite extends QueryTest
       }
       checkErrorMatchPVals(
         exception1,
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
           "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
         parameters = Map("treeNode" -> "(?s).*"),
         sqlState = None,
@@ -872,7 +860,7 @@ class SubquerySuite extends QueryTest
       }
       checkErrorMatchPVals(
         exception2,
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
           "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
         parameters = Map("treeNode" -> "(?s).*"),
         sqlState = None,
@@ -893,7 +881,7 @@ class SubquerySuite extends QueryTest
       }
       checkErrorMatchPVals(
         exception3,
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
           "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED",
         parameters = Map("treeNode" -> "(?s).*"),
         sqlState = None,
@@ -937,12 +925,12 @@ class SubquerySuite extends QueryTest
 
       withSQLConf(SQLConf.DECORRELATE_INNER_QUERY_ENABLED.key -> "false") {
         val error = intercept[AnalysisException] { sql(query) }
-        assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        assert(error.getCondition == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
           "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
       }
       withSQLConf(SQLConf.DECORRELATE_SET_OPS_ENABLED.key -> "false") {
         val error = intercept[AnalysisException] { sql(query) }
-        assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+        assert(error.getCondition == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
           "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
       }
 
@@ -1016,12 +1004,12 @@ class SubquerySuite extends QueryTest
 
           withSQLConf(SQLConf.DECORRELATE_INNER_QUERY_ENABLED.key -> "false") {
             val error = intercept[AnalysisException] { sql(query) }
-            assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            assert(error.getCondition == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
               "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
           }
           withSQLConf(SQLConf.DECORRELATE_SET_OPS_ENABLED.key -> "false") {
             val error = intercept[AnalysisException] { sql(query) }
-            assert(error.getErrorClass == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+            assert(error.getCondition == "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
               "ACCESSING_OUTER_QUERY_COLUMN_IS_NOT_ALLOWED")
           }
         }
@@ -1057,7 +1045,7 @@ class SubquerySuite extends QueryTest
       }
       checkError(
         exception1,
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
+        condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.CORRELATED_REFERENCE",
         parameters = Map("sqlExprs" -> "\"explode(arr_c2)\""),
         context = ExpectedContext(
           fragment = "LATERAL VIEW explode(t2.arr_c2) q AS c2",
@@ -1098,7 +1086,7 @@ class SubquerySuite extends QueryTest
       checkError(
         exception =
           intercept[AnalysisException](sql(query)),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map(
           "objectName" -> "`a`",
@@ -1536,7 +1524,7 @@ class SubquerySuite extends QueryTest
       // need to execute the query before we can examine fs.inputRDDs()
       assert(stripAQEPlan(df.queryExecution.executedPlan) match {
         case WholeStageCodegenExec(ColumnarToRowExec(InputAdapter(
-            fs @ FileSourceScanExec(_, _, _, partitionFilters, _, _, _, _, _)))) =>
+            fs @ FileSourceScanExec(_, _, _, _, partitionFilters, _, _, _, _, _)))) =>
           partitionFilters.exists(ExecSubqueryExpression.hasSubquery) &&
             fs.inputRDDs().forall(
               _.asInstanceOf[FileScanRDD].filePartitions.forall(
@@ -2154,6 +2142,24 @@ class SubquerySuite extends QueryTest
     }
   }
 
+  test("SPARK-49819: Do not collapse projects with exist subqueries") {
+    withTempView("v") {
+      Seq((0, 1), (1, 2)).toDF("c1", "c2").createOrReplaceTempView("v")
+      checkAnswer(
+        sql("""
+              |SELECT m, CASE WHEN EXISTS (SELECT SUM(c2) FROM v WHERE c1 = m) THEN 1 ELSE 0 END
+              |FROM (SELECT MIN(c2) AS m FROM v)
+              |""".stripMargin),
+        Row(1, 1) :: Nil)
+      checkAnswer(
+        sql("""
+              |SELECT c, CASE WHEN EXISTS (SELECT SUM(c2) FROM v WHERE c1 = c) THEN 1 ELSE 0 END
+              |FROM (SELECT c1 AS c FROM v GROUP BY c1)
+              |""".stripMargin),
+        Row(0, 1) :: Row(1, 1) :: Nil)
+    }
+  }
+
   test("SPARK-37199: deterministic in QueryPlan considers subquery") {
     val deterministicQueryPlan = sql("select (select 1 as b) as b")
       .queryExecution.executedPlan
@@ -2552,7 +2558,7 @@ class SubquerySuite extends QueryTest
                 |""".stripMargin
             ).collect()
           },
-          errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
+          condition = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY." +
             "UNSUPPORTED_CORRELATED_REFERENCE_DATA_TYPE",
           parameters = Map("expr" -> "v1.x", "dataType" -> "map"),
           context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala
index 2e56327a63136..aa82ac57089f0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TableOptionsConstantFoldingSuite.scala
@@ -70,42 +70,42 @@ class TableOptionsConstantFoldingSuite extends QueryTest with SharedSparkSession
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = 1 + 2 + unresolvedAttribute)")),
-      errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       parameters = Map(
         "objectName" -> "`unresolvedAttribute`"),
       queryContext = Array(ExpectedContext("", "", 60, 78, "unresolvedAttribute")))
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = true or false or unresolvedAttribute)")),
-      errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       parameters = Map(
         "objectName" -> "`unresolvedAttribute`"),
       queryContext = Array(ExpectedContext("", "", 69, 87, "unresolvedAttribute")))
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = cast(array('9', '9') as array<byte>))")),
-      errorClass = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
+      condition = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
       parameters = Map(
         "key" -> "k",
         "supported" -> "constant expressions"))
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = cast(map('9', '9') as map<string, string>))")),
-      errorClass = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
+      condition = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
       parameters = Map(
         "key" -> "k",
         "supported" -> "constant expressions"))
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = raise_error('failure'))")),
-      errorClass = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
+      condition = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
       parameters = Map(
         "key" -> "k",
         "supported" -> "constant expressions"))
     checkError(
       exception = intercept[AnalysisException](
         sql(s"$prefix ('k' = raise_error('failure'))")),
-      errorClass = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
+      condition = "INVALID_SQL_SYNTAX.OPTION_IS_INVALID",
       parameters = Map(
         "key" -> "k",
         "supported" -> "constant expressions"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
index 986e625137a77..624bae70ce09c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/TypedImperativeAggregateSuite.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.trees.UnaryLike
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.expressions.Window
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.{column => toColumn, expression}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 
@@ -88,10 +89,10 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession {
 
   test("dataframe aggregate with object aggregate buffer, should not use HashAggregate") {
     val df = data.toDF("a", "b")
-    val max = TypedMax($"a".expr)
+    val max = TypedMax($"a")
 
     // Always uses SortAggregateExec
-    val sparkPlan = df.select(Column(max.toAggregateExpression())).queryExecution.sparkPlan
+    val sparkPlan = df.select(max).queryExecution.sparkPlan
     assert(!sparkPlan.isInstanceOf[HashAggregateExec])
   }
 
@@ -211,15 +212,9 @@ class TypedImperativeAggregateSuite extends QueryTest with SharedSparkSession {
     checkAnswer(query, expected)
   }
 
-  private def typedMax(column: Column): Column = {
-    val max = TypedMax(column.expr, nullable = false)
-    Column(max.toAggregateExpression())
-  }
+  private def typedMax(column: Column): Column = TypedMax(column)
 
-  private def nullableTypedMax(column: Column): Column = {
-    val max = TypedMax(column.expr, nullable = true)
-    Column(max.toAggregateExpression())
-  }
+  private def nullableTypedMax(column: Column): Column = TypedMax(column, nullable = true)
 }
 
 object TypedImperativeAggregateSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
index 7e940252430f8..18af2fcb0ee73 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala
@@ -125,7 +125,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.selectExpr("substr('abcd', 2, 3, 4)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId("substr"),
         "expectedNum" -> "[2, 3]",
@@ -146,7 +146,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
         spark.udf.register("foo", (_: String).length)
         df.selectExpr("foo(2, 3, 4)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> toSQLId("foo"),
         "expectedNum" -> "1",
@@ -166,7 +166,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.emptyDataFrame.selectExpr(sqlText)
       },
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       parameters = Map(
         "routineName" -> "`a_function_that_does_not_exist`",
         "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -772,10 +772,11 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     checkError(
       exception =
         intercept[AnalysisException](df.select(myUdf(Column("col")))),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map(
         "objectName" -> "`b`",
-        "proposal" -> "`a`"))
+        "proposal" -> "`a`"),
+      context = ExpectedContext("apply", ".*"))
   }
 
   test("wrong order of input fields for case class") {
@@ -820,14 +821,14 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val e1 = intercept[SparkException] {
       Seq("20").toDF("col").select(udf(f1).apply(Column("col"))).collect()
     }
-    assert(e1.getErrorClass == "FAILED_EXECUTE_UDF")
+    assert(e1.getCondition == "FAILED_EXECUTE_UDF")
     assert(e1.getCause.getStackTrace.head.toString.contains(
       "UDFSuite$MalformedClassObject$MalformedNonPrimitiveFunction"))
 
     val e2 = intercept[SparkException] {
       Seq(20).toDF("col").select(udf(f2).apply(Column("col"))).collect()
     }
-    assert(e2.getErrorClass == "FAILED_EXECUTE_UDF")
+    assert(e2.getCondition == "FAILED_EXECUTE_UDF")
     assert(e2.getCause.getStackTrace.head.toString.contains(
       "UDFSuite$MalformedClassObject$MalformedPrimitiveFunction"))
   }
@@ -937,7 +938,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkException] {
       input.select(overflowFunc($"dateTime")).collect()
     }
-    assert(e.getErrorClass == "FAILED_EXECUTE_UDF")
+    assert(e.getCondition == "FAILED_EXECUTE_UDF")
     assert(e.getCause.isInstanceOf[java.lang.ArithmeticException])
   }
 
@@ -1052,7 +1053,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkException] {
       input.select(overflowFunc($"d")).collect()
     }
-    assert(e.getErrorClass == "FAILED_EXECUTE_UDF")
+    assert(e.getCondition == "FAILED_EXECUTE_UDF")
     assert(e.getCause.isInstanceOf[java.lang.ArithmeticException])
   }
 
@@ -1100,7 +1101,7 @@ class UDFSuite extends QueryTest with SharedSparkSession {
     val e = intercept[SparkException] {
       input.select(overflowFunc($"p")).collect()
     }
-    assert(e.getErrorClass == "FAILED_EXECUTE_UDF")
+    assert(e.getCondition == "FAILED_EXECUTE_UDF")
     assert(e.getCause.isInstanceOf[java.lang.ArithmeticException])
   }
 
@@ -1204,8 +1205,8 @@ class UDFSuite extends QueryTest with SharedSparkSession {
         dt
       )
       checkError(
-        intercept[AnalysisException](spark.range(1).select(f())),
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
+        intercept[AnalysisException](spark.range(1).select(f()).encoder),
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_ENCODER",
         sqlState = "0A000",
         parameters = Map("dataType" -> s"\"${dt.sql}\"")
       )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala
index c89ddd0e6a1f1..d6fe01cbf379e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UrlFunctionsSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql
 
 import org.apache.spark.SparkIllegalArgumentException
-import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLConf
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -76,14 +75,55 @@ class UrlFunctionsSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkIllegalArgumentException] {
           sql(s"SELECT parse_url('$url', 'HOST')").collect()
         },
-        errorClass = "INVALID_URL",
-        parameters = Map(
-          "url" -> url,
-          "ansiConfig" -> toSQLConf(SQLConf.ANSI_ENABLED.key)
-        ))
+        condition = "INVALID_URL",
+        parameters = Map("url" -> url))
     }
   }
 
+  test("url try_parse_url function") {
+
+    def testUrl(url: String, expected: Row): Unit = {
+      checkAnswer(Seq[String]((url)).toDF("url").selectExpr(
+        "try_parse_url(url, 'HOST')", "try_parse_url(url, 'PATH')",
+        "try_parse_url(url, 'QUERY')", "try_parse_url(url, 'REF')",
+        "try_parse_url(url, 'PROTOCOL')", "try_parse_url(url, 'FILE')",
+        "try_parse_url(url, 'AUTHORITY')", "try_parse_url(url, 'USERINFO')",
+        "try_parse_url(url, 'QUERY', 'query')"), expected)
+    }
+
+    testUrl(
+      "http://userinfo@spark.apache.org/path?query=1#Ref",
+      Row("spark.apache.org", "/path", "query=1", "Ref",
+        "http", "/path?query=1", "userinfo@spark.apache.org", "userinfo", "1"))
+
+    testUrl(
+      "https://use%20r:pas%20s@example.com/dir%20/pa%20th.HTML?query=x%20y&q2=2#Ref%20two",
+      Row("example.com", "/dir%20/pa%20th.HTML", "query=x%20y&q2=2", "Ref%20two",
+        "https", "/dir%20/pa%20th.HTML?query=x%20y&q2=2", "use%20r:pas%20s@example.com",
+        "use%20r:pas%20s", "x%20y"))
+
+    testUrl(
+      "http://user:pass@host",
+      Row("host", "", null, null, "http", "", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/",
+      Row("host", "/", null, null, "http", "/", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/?#",
+      Row("host", "/", "", "", "http", "/?", "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "http://user:pass@host/file;param?query;p2",
+      Row("host", "/file;param", "query;p2", null, "http", "/file;param?query;p2",
+        "user:pass@host", "user:pass", null))
+
+    testUrl(
+      "inva lid://user:pass@host/file;param?query;p2",
+      Row(null, null, null, null, null, null, null, null, null))
+  }
+
   test("url encode/decode function") {
     def testUrl(url: String, fn: String, expected: Row): Unit = {
       checkAnswer(Seq[String]((url)).toDF("url")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
index c4dba850cf777..cf83ae30a6798 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantEndToEndSuite.scala
@@ -16,14 +16,20 @@
  */
 package org.apache.spark.sql
 
-import org.apache.spark.sql.catalyst.expressions.{Cast, CreateArray, CreateNamedStruct, JsonToStructs, Literal, StructsToJson}
-import org.apache.spark.sql.catalyst.expressions.variant.ParseJson
+import org.apache.spark.{SparkException, SparkRuntimeException}
+import org.apache.spark.sql.QueryTest.sameRows
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Cast, Literal}
+import org.apache.spark.sql.catalyst.expressions.variant.{ToVariantObject, VariantExpressionEvalUtils}
 import org.apache.spark.sql.execution.WholeStageCodegenExec
 import org.apache.spark.sql.execution.vectorized.OnHeapColumnVector
+import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.vectorized.ColumnarArray
 import org.apache.spark.types.variant.VariantBuilder
+import org.apache.spark.types.variant.VariantUtil._
 import org.apache.spark.unsafe.types.VariantVal
 
 class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
@@ -32,9 +38,11 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
   test("parse_json/to_json round-trip") {
     def check(input: String, output: String = null): Unit = {
       val df = Seq(input).toDF("v")
-      val variantDF = df.select(Column(StructsToJson(Map.empty, ParseJson(Column("v").expr))))
+      val variantDF = df.select(to_json(parse_json(col("v"))))
+      val variantDF2 = df.select(to_json(from_json(col("v"), VariantType)))
       val expected = if (output != null) output else input
       checkAnswer(variantDF, Seq(Row(expected)))
+      checkAnswer(variantDF2, Seq(Row(expected)))
     }
 
     check("null")
@@ -57,13 +65,18 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     )
     // scalastyle:on nonascii
     check("[0.0, 1.00, 1.10, 1.23]", "[0,1,1.1,1.23]")
+    withSQLConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS.key -> "true") {
+      check(
+        """{"c": [], "b": 0, "a": null, "a": {"x": 0, "x": 1}, "b": 1, "b": 2, "c": [3]}""",
+        """{"a":{"x":1},"b":2,"c":[3]}"""
+      )
+    }
   }
 
   test("from_json/to_json round-trip") {
     def check(input: String, output: String = null): Unit = {
       val df = Seq(input).toDF("v")
-      val variantDF = df.select(Column(StructsToJson(Map.empty,
-        JsonToStructs(VariantType, Map.empty, Column("v").expr))))
+      val variantDF = df.select(to_json(from_json(col("v"), VariantType)))
       val expected = if (output != null) output else input
       checkAnswer(variantDF, Seq(Row(expected)))
     }
@@ -127,29 +140,59 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
 
   test("to_json with nested variant") {
     val df = Seq(1).toDF("v")
-    val variantDF1 = df.select(
-      Column(StructsToJson(Map.empty, CreateArray(Seq(
-        ParseJson(Literal("{}")), ParseJson(Literal("\"\"")), ParseJson(Literal("[1, 2, 3]")))))))
+    val variantDF1 = df.select(to_json(array(
+      parse_json(lit("{}")),
+      parse_json(lit("\"\"")),
+      parse_json(lit("[1, 2, 3]")))))
     checkAnswer(variantDF1, Seq(Row("[{},\"\",[1,2,3]]")))
 
     val variantDF2 = df.select(
-      Column(StructsToJson(Map.empty, CreateNamedStruct(Seq(
-        Literal("a"), ParseJson(Literal("""{ "x": 1, "y": null, "z": "str" }""")),
-        Literal("b"), ParseJson(Literal("[[]]")),
-        Literal("c"), ParseJson(Literal("false")))))))
+      to_json(named_struct(
+        lit("a"), parse_json(lit("""{ "x": 1, "y": null, "z": "str" }""")),
+        lit("b"), parse_json(lit("[[]]")),
+        lit("c"), parse_json(lit("false"))
+      )))
     checkAnswer(variantDF2, Seq(Row("""{"a":{"x":1,"y":null,"z":"str"},"b":[[]],"c":false}""")))
   }
 
   test("parse_json - Codegen Support") {
     val df = Seq(("1", """{"a": 1}""")).toDF("key", "v").toDF()
-    val variantDF = df.select(Column(ParseJson(Column("v").expr)))
+    val variantDF = df.select(parse_json(col("v")))
     val plan = variantDF.queryExecution.executedPlan
     assert(plan.isInstanceOf[WholeStageCodegenExec])
-    val v = VariantBuilder.parseJson("""{"a":1}""")
+    val v = VariantBuilder.parseJson("""{"a":1}""", false)
     val expected = new VariantVal(v.getValue, v.getMetadata)
     checkAnswer(variantDF, Seq(Row(expected)))
   }
 
+  test("to_variant_object - Codegen Support") {
+    Seq("CODEGEN_ONLY", "NO_CODEGEN").foreach { codegenMode =>
+      withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode) {
+        val schema = StructType(Array(
+          StructField("v", StructType(Array(StructField("a", IntegerType))))
+        ))
+        val data = Seq(Row(Row(1)), Row(Row(2)), Row(Row(3)), Row(null))
+        val df = spark.createDataFrame(spark.sparkContext.parallelize(data), schema)
+        val variantDF = df.select(to_variant_object(col("v")))
+        val plan = variantDF.queryExecution.executedPlan
+        assert(plan.isInstanceOf[WholeStageCodegenExec] == (codegenMode == "CODEGEN_ONLY"))
+        val v1 = VariantExpressionEvalUtils.castToVariant(InternalRow(1),
+          StructType(Array(StructField("a", IntegerType))))
+        val v2 = VariantExpressionEvalUtils.castToVariant(InternalRow(2),
+          StructType(Array(StructField("a", IntegerType))))
+        val v3 = VariantExpressionEvalUtils.castToVariant(InternalRow(3),
+          StructType(Array(StructField("a", IntegerType))))
+        val v4 = VariantExpressionEvalUtils.castToVariant(null,
+          StructType(Array(StructField("a", IntegerType))))
+        val expected = Seq(Row(new VariantVal(v1.getValue, v1.getMetadata)),
+          Row(new VariantVal(v2.getValue, v2.getMetadata)),
+          Row(new VariantVal(v3.getValue, v3.getMetadata)),
+          Row(new VariantVal(v4.getValue, v4.getMetadata)))
+        sameRows(variantDF.collect().toSeq, expected)
+      }
+    }
+  }
+
   test("schema_of_variant") {
     def check(json: String, expected: String): Unit = {
       val df = Seq(json).toDF("j").selectExpr("schema_of_variant(parse_json(j))")
@@ -173,8 +216,8 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     check("1E0", "DOUBLE")
     check("true", "BOOLEAN")
     check("\"2000-01-01\"", "STRING")
-    check("""{"a":0}""", "STRUCT<a: BIGINT>")
-    check("""{"b": {"c": "c"}, "a":["a"]}""", "STRUCT<a: ARRAY<STRING>, b: STRUCT<c: STRING>>")
+    check("""{"a":0}""", "OBJECT<a: BIGINT>")
+    check("""{"b": {"c": "c"}, "a":["a"]}""", "OBJECT<a: ARRAY<STRING>, b: OBJECT<c: STRING>>")
     check("[]", "ARRAY<VOID>")
     check("[false]", "ARRAY<BOOLEAN>")
     check("[null, 1, 1.0]", "ARRAY<DECIMAL(20,0)>")
@@ -184,11 +227,11 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     check("[1.1, 11111111111111111111111111111111111111]", "ARRAY<DOUBLE>")
     check("[1, \"1\"]", "ARRAY<VARIANT>")
     check("[{}, true]", "ARRAY<VARIANT>")
-    check("""[{"c": ""}, {"a": null}, {"b": 1}]""", "ARRAY<STRUCT<a: VOID, b: BIGINT, c: STRING>>")
-    check("""[{"a": ""}, {"a": null}, {"b": 1}]""", "ARRAY<STRUCT<a: STRING, b: BIGINT>>")
+    check("""[{"c": ""}, {"a": null}, {"b": 1}]""", "ARRAY<OBJECT<a: VOID, b: BIGINT, c: STRING>>")
+    check("""[{"a": ""}, {"a": null}, {"b": 1}]""", "ARRAY<OBJECT<a: STRING, b: BIGINT>>")
     check(
       """[{"a": 1, "b": null}, {"b": true, "a": 1E0}]""",
-      "ARRAY<STRUCT<a: DOUBLE, b: BOOLEAN>>"
+      "ARRAY<OBJECT<a: DOUBLE, b: BOOLEAN>>"
     )
   }
 
@@ -225,7 +268,7 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     // Literal input.
     checkAnswer(
       sql("""SELECT schema_of_variant_agg(parse_json('{"a": [1, 2, 3]}'))"""),
-      Seq(Row("STRUCT<a: ARRAY<BIGINT>>")))
+      Seq(Row("OBJECT<a: ARRAY<BIGINT>>")))
 
     // Non-grouping aggregation.
     def checkNonGrouping(input: Seq[String], expected: String): Unit = {
@@ -233,20 +276,20 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
         Seq(Row(expected)))
     }
 
-    checkNonGrouping(Seq("""{"a": [1, 2, 3]}"""), "STRUCT<a: ARRAY<BIGINT>>")
-    checkNonGrouping((0 to 100).map(i => s"""{"a": [$i]}"""), "STRUCT<a: ARRAY<BIGINT>>")
-    checkNonGrouping(Seq("""[{"a": 1}, {"b": 2}]"""), "ARRAY<STRUCT<a: BIGINT, b: BIGINT>>")
-    checkNonGrouping(Seq("""{"a": [1, 2, 3]}""", """{"a": "banana"}"""), "STRUCT<a: VARIANT>")
+    checkNonGrouping(Seq("""{"a": [1, 2, 3]}"""), "OBJECT<a: ARRAY<BIGINT>>")
+    checkNonGrouping((0 to 100).map(i => s"""{"a": [$i]}"""), "OBJECT<a: ARRAY<BIGINT>>")
+    checkNonGrouping(Seq("""[{"a": 1}, {"b": 2}]"""), "ARRAY<OBJECT<a: BIGINT, b: BIGINT>>")
+    checkNonGrouping(Seq("""{"a": [1, 2, 3]}""", """{"a": "banana"}"""), "OBJECT<a: VARIANT>")
     checkNonGrouping(Seq("""{"a": "banana"}""", """{"b": "apple"}"""),
-      "STRUCT<a: STRING, b: STRING>")
-    checkNonGrouping(Seq("""{"a": "data"}""", null), "STRUCT<a: STRING>")
+      "OBJECT<a: STRING, b: STRING>")
+    checkNonGrouping(Seq("""{"a": "data"}""", null), "OBJECT<a: STRING>")
     checkNonGrouping(Seq(null, null), "VOID")
-    checkNonGrouping(Seq("""{"a": null}""", """{"a": null}"""), "STRUCT<a: VOID>")
+    checkNonGrouping(Seq("""{"a": null}""", """{"a": null}"""), "OBJECT<a: VOID>")
     checkNonGrouping(Seq(
       """{"hi":[]}""",
       """{"hi":[{},{}]}""",
       """{"hi":[{"it's":[{"me":[{"a": 1}]}]}]}"""),
-      "STRUCT<hi: ARRAY<STRUCT<`it's`: ARRAY<STRUCT<me: ARRAY<STRUCT<a: BIGINT>>>>>>>")
+      "OBJECT<hi: ARRAY<OBJECT<`it's`: ARRAY<OBJECT<me: ARRAY<OBJECT<a: BIGINT>>>>>>>")
 
     // Grouping aggregation.
     withView("v") {
@@ -255,11 +298,11 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
         (id, json)
       }.toDF("id", "json").createTempView("v")
       checkAnswer(sql("select schema_of_variant_agg(parse_json(json)) from v group by id % 2"),
-        Seq(Row("STRUCT<a: ARRAY<STRING>>"), Row("STRUCT<a: ARRAY<VARIANT>>")))
+        Seq(Row("OBJECT<a: ARRAY<STRING>>"), Row("OBJECT<a: ARRAY<VARIANT>>")))
       checkAnswer(sql("select schema_of_variant_agg(parse_json(json)) from v group by id % 3"),
-        Seq.fill(3)(Row("STRUCT<a: ARRAY<VARIANT>>")))
+        Seq.fill(3)(Row("OBJECT<a: ARRAY<VARIANT>>")))
       checkAnswer(sql("select schema_of_variant_agg(parse_json(json)) from v group by id % 4"),
-        Seq.fill(3)(Row("STRUCT<a: ARRAY<STRING>>")) ++ Seq(Row("STRUCT<a: ARRAY<BIGINT>>")))
+        Seq.fill(3)(Row("OBJECT<a: ARRAY<STRING>>")) ++ Seq(Row("OBJECT<a: ARRAY<BIGINT>>")))
     }
   }
 
@@ -271,22 +314,133 @@ class VariantEndToEndSuite extends QueryTest with SharedSparkSession {
     dataVector.appendLong(456)
     val array = new ColumnarArray(dataVector, 0, 4)
     val variant = Cast(Literal(array, ArrayType(LongType)), VariantType).eval()
+    val variant2 = ToVariantObject(Literal(array, ArrayType(LongType))).eval()
     assert(variant.toString == "[null,123,null,456]")
+    assert(variant2.toString == "[null,123,null,456]")
     dataVector.close()
   }
 
-  test("cast to variant with scan input") {
-    withTempPath { dir =>
-      val path = dir.getAbsolutePath
-      val input = Seq(Row(Array(1, null), Map("k1" -> null, "k2" -> false), Row(null, "str")))
-      val schema = StructType.fromDDL(
-        "a array<int>, m map<string, boolean>, s struct<f1 string, f2 string>")
-      spark.createDataFrame(spark.sparkContext.parallelize(input), schema).write.parquet(path)
-      val df = spark.read.parquet(path).selectExpr(
-        s"cast(cast(a as variant) as ${schema(0).dataType.sql})",
-        s"cast(cast(m as variant) as ${schema(1).dataType.sql})",
-        s"cast(cast(s as variant) as ${schema(2).dataType.sql})")
-      checkAnswer(df, input)
+  test("cast to variant/to_variant_object with scan input") {
+    Seq("NO_CODEGEN", "CODEGEN_ONLY").foreach { codegenMode =>
+      withSQLConf(SQLConf.CODEGEN_FACTORY_MODE.key -> codegenMode) {
+        withTempPath { dir =>
+          val path = dir.getAbsolutePath
+          val input = Seq(
+              Row(Array(1, null), Map("k1" -> null, "k2" -> false), Row(null, "str")),
+              Row(null, null, null)
+            )
+          val schema = StructType.fromDDL(
+            "a array<int>, m map<string, boolean>, s struct<f1 string, f2 string>")
+          spark.createDataFrame(spark.sparkContext.parallelize(input), schema).write.parquet(path)
+          val df = spark.read.parquet(path).selectExpr(
+            s"cast(cast(a as variant) as ${schema(0).dataType.sql})",
+            s"cast(to_variant_object(m) as ${schema(1).dataType.sql})",
+            s"cast(to_variant_object(s) as ${schema(2).dataType.sql})")
+          checkAnswer(df, input)
+          val plan = df.queryExecution.executedPlan
+          assert(plan.isInstanceOf[WholeStageCodegenExec] == (codegenMode == "CODEGEN_ONLY"))
+        }
+      }
+    }
+  }
+
+  test("from_json(_, 'variant') with duplicate keys") {
+    val json: String = """{"a": 1, "b": 2, "c": "3", "a": 4}"""
+    withSQLConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS.key -> "true") {
+      val df = Seq(json).toDF("j")
+        .selectExpr("from_json(j,'variant')")
+      val actual = df.collect().head(0).asInstanceOf[VariantVal]
+      val expectedValue: Array[Byte] = Array(objectHeader(false, 1, 1),
+        /* size */ 3,
+        /* id list */ 0, 1, 2,
+        /* offset list */ 4, 0, 2, 6,
+        /* field data */ primitiveHeader(INT1), 2, shortStrHeader(1), '3',
+        primitiveHeader(INT1), 4)
+      val expectedMetadata: Array[Byte] = Array(VERSION, 3, 0, 1, 2, 3, 'a', 'b', 'c')
+      assert(actual === new VariantVal(expectedValue, expectedMetadata))
+    }
+    // Check whether the parse_json and from_json expressions throw the correct exception.
+    Seq("from_json(j, 'variant')", "parse_json(j)").foreach { expr =>
+      withSQLConf(SQLConf.VARIANT_ALLOW_DUPLICATE_KEYS.key -> "false") {
+        val df = Seq(json).toDF("j").selectExpr(expr)
+        val exception = intercept[SparkException] {
+          df.collect()
+        }
+        checkError(
+          exception = exception,
+          condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+          parameters = Map("badRecord" -> json, "failFastMode" -> "FAILFAST")
+        )
+        checkError(
+          exception = exception.getCause.asInstanceOf[SparkRuntimeException],
+          condition = "VARIANT_DUPLICATE_KEY",
+          parameters = Map("key" -> "a")
+        )
+      }
+    }
+  }
+
+  test("SPARK-49985: Disable support for interval types in the variant spec") {
+    // Top level intervals
+    assert(intercept[AnalysisException] {
+      sql("select interval '1' month::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select interval '1' day::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    // struct<interval>
+    assert(intercept[AnalysisException] {
+      sql("select named_struct('i', interval '1' month)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select named_struct('i', interval '1' day)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    // struct<struct<interval>>
+    assert(intercept[AnalysisException] {
+      sql("select struct(named_struct('i', interval '1' month))::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select struct(named_struct('i', interval '1' day))::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    // array<interval>
+    assert(intercept[AnalysisException] {
+      sql("select array(interval '1' month)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select array(interval '1' day)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    // map<string, interval>
+    assert(intercept[AnalysisException] {
+      sql("select map('i', interval '1' month)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select map('i', interval '1' day)::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    // map<string, struct<interval>>
+    assert(intercept[AnalysisException] {
+      sql("select map('i', struct(interval '1' month))::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    assert(intercept[AnalysisException] {
+      sql("select map('i', struct(interval '1' day))::variant as v")
+    }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+    val tableName = "_v"
+    withTable(tableName) {
+      sql(s"create table $tableName (" +
+        s"i1 interval month," +
+        s"i2 interval day," +
+        s"i3 struct<i interval month>," +
+        s"i4 struct<i interval day>," +
+        s"i5 array<interval month>," +
+        s"i6 array<interval day>," +
+        s"i7 map<string, interval month>," +
+        s"i8 map<string, interval day>," +
+        s"i9 struct<i struct<i map<string, array<interval month>>>>," +
+        s"i10 struct<i struct<i map<string, array<interval day>>>>)")
+      (1 to 10).foreach { i =>
+        assert(intercept[AnalysisException] {
+          sql(s"select i$i::variant from $tableName")
+        }.getCondition == "DATATYPE_MISMATCH.CAST_WITHOUT_SUGGESTION")
+      }
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
index de1e4330c5641..5d59a3e0f8256 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantSuite.scala
@@ -87,8 +87,8 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
     def rows(results: Any*): Seq[Row] = results.map(Row(_))
 
     checkAnswer(df.select(is_variant_null(v)), rows(false, false))
-    checkAnswer(df.select(schema_of_variant(v)), rows("STRUCT<a: BIGINT>", "STRUCT<b: BIGINT>"))
-    checkAnswer(df.select(schema_of_variant_agg(v)), rows("STRUCT<a: BIGINT, b: BIGINT>"))
+    checkAnswer(df.select(schema_of_variant(v)), rows("OBJECT<a: BIGINT>", "OBJECT<b: BIGINT>"))
+    checkAnswer(df.select(schema_of_variant_agg(v)), rows("OBJECT<a: BIGINT, b: BIGINT>"))
 
     checkAnswer(df.select(variant_get(v, "$.a", "int")), rows(1, null))
     checkAnswer(df.select(variant_get(v, "$.b", "int")), rows(null, 2))
@@ -97,7 +97,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
       exception = intercept[SparkRuntimeException] {
         df.select(variant_get(v, "$.a", "binary")).collect()
       },
-      errorClass = "INVALID_VARIANT_CAST",
+      condition = "INVALID_VARIANT_CAST",
       parameters = Map("value" -> "1", "dataType" -> "\"BINARY\"")
     )
 
@@ -223,7 +223,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         exception = intercept[AnalysisException] {
           query.write.partitionBy("v").parquet(tempDir)
         },
-        errorClass = "INVALID_PARTITION_COLUMN_DATA_TYPE",
+        condition = "INVALID_PARTITION_COLUMN_DATA_TYPE",
         parameters = Map("type" -> "\"VARIANT\"")
       )
     }
@@ -239,7 +239,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         exception = intercept[AnalysisException] {
           query.write.partitionBy("v").saveAsTable("t")
         },
-        errorClass = "INVALID_PARTITION_COLUMN_DATA_TYPE",
+        condition = "INVALID_PARTITION_COLUMN_DATA_TYPE",
         parameters = Map("type" -> "\"VARIANT\"")
       )
     }
@@ -255,7 +255,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         exception = intercept[AnalysisException] {
           spark.sql(s"CREATE TABLE t USING PARQUET PARTITIONED BY (v) AS $queryString")
         },
-        errorClass = "INVALID_PARTITION_COLUMN_DATA_TYPE",
+        condition = "INVALID_PARTITION_COLUMN_DATA_TYPE",
         parameters = Map("type" -> "\"VARIANT\"")
       )
     }
@@ -290,7 +290,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
       (s"named_struct('value', $v, 'metadata', cast(null as binary))",
         "INVALID_VARIANT_FROM_PARQUET.NULLABLE_OR_NOT_BINARY_FIELD", Map("field" -> "metadata"))
     )
-    cases.foreach { case (structDef, errorClass, parameters) =>
+    cases.foreach { case (structDef, condition, parameters) =>
       Seq(false, true).foreach { vectorizedReader =>
         withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> vectorizedReader.toString) {
           withTempDir { dir =>
@@ -302,7 +302,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
             val e = intercept[org.apache.spark.SparkException](result.collect())
             checkError(
               exception = e.getCause.asInstanceOf[AnalysisException],
-              errorClass = errorClass,
+              condition = condition,
               parameters = parameters
             )
           }
@@ -346,7 +346,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         exception = intercept[AnalysisException] {
           spark.read.format("json").option("singleVariantColumn", "var").schema("var variant")
         },
-        errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
+        condition = "INVALID_SINGLE_VARIANT_COLUMN",
         parameters = Map.empty
       )
       checkError(
@@ -354,7 +354,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
           spark.read.format("json").option("singleVariantColumn", "another_name")
             .schema("var variant").json(file.getAbsolutePath).collect()
         },
-        errorClass = "INVALID_SINGLE_VARIANT_COLUMN",
+        condition = "INVALID_SINGLE_VARIANT_COLUMN",
         parameters = Map.empty
       )
     }
@@ -422,7 +422,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
       exception = intercept[AnalysisException] {
         spark.sql("select parse_json('') group by 1")
       },
-      errorClass = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE",
+      condition = "GROUP_EXPRESSION_TYPE_IS_NOT_ORDERABLE",
       parameters = Map("sqlExpr" -> "\"parse_json()\"", "dataType" -> "\"VARIANT\""),
       context = ExpectedContext(fragment = "parse_json('')", start = 7, stop = 20)
     )
@@ -431,7 +431,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
       exception = intercept[AnalysisException] {
         spark.sql("select parse_json('') order by 1")
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`sortorder`",
         "dataType" -> "\"VARIANT\"",
@@ -443,7 +443,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
       exception = intercept[AnalysisException] {
         spark.sql("select parse_json('') sort by 1")
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`sortorder`",
         "dataType" -> "\"VARIANT\"",
@@ -456,7 +456,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         spark.sql("with t as (select 1 as a, parse_json('') as v) " +
           "select rank() over (partition by a order by v) from t")
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`sortorder`",
         "dataType" -> "\"VARIANT\"",
@@ -469,7 +469,7 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
         spark.sql("with t as (select parse_json('') as v) " +
           "select t1.v from t as t1 join t as t2 on t1.v = t2.v")
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
+      condition = "DATATYPE_MISMATCH.INVALID_ORDERING_TYPE",
       parameters = Map(
         "functionName" -> "`=`",
         "dataType" -> "\"VARIANT\"",
@@ -639,6 +639,131 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
     }
   }
 
+  test("variant in a cached row-based df") {
+    val query = """select
+      parse_json(format_string('{\"a\": %s}', id)) v,
+      cast(null as variant) as null_v,
+      case when id % 2 = 0 then parse_json(cast(id as string)) else null end as some_null
+    from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("variant with many keys in a cached row-based df") {
+    // The initial size of the buffer backing a cached dataframe column is 128KB.
+    // See `ColumnBuilder`.
+    val numKeys = 128 * 1024
+    var keyIterator = (0 until numKeys).iterator
+    val entries = Array.fill(numKeys)(s"""\"${keyIterator.next()}\": \"test\"""")
+    val jsonStr = s"{${entries.mkString(", ")}}"
+    val query = s"""select parse_json('${jsonStr}') v from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("struct of variant in a cached row-based df") {
+    val query = """select named_struct(
+      'v', parse_json(format_string('{\"a\": %s}', id)),
+      'null_v', cast(null as variant),
+      'some_null', case when id % 2 = 0 then parse_json(cast(id as string)) else null end
+    ) v
+    from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("array of variant in a cached row-based df") {
+    val query = """select array(
+      parse_json(cast(id as string)),
+      parse_json(format_string('{\"a\": %s}', id)),
+      null,
+      case when id % 2 = 0 then parse_json(cast(id as string)) else null end) v
+    from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("array variant with many keys in a cached row-based df") {
+    // The initial size of the buffer backing a cached dataframe column is 128KB.
+    // See `ColumnBuilder`.
+    val numKeys = 128 * 1024
+    var keyIterator = (0 until numKeys).iterator
+    val entries = Array.fill(numKeys)(s"""\"${keyIterator.next()}\": \"test\"""")
+    val jsonStr = s"{${entries.mkString(", ")}}"
+    val query = s"""select array(parse_json('${jsonStr}')) v from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("map of variant in a cached row-based df") {
+    val query = """select map(
+      'v', parse_json(format_string('{\"a\": %s}', id)),
+      'null_v', cast(null as variant),
+      'some_null', case when id % 2 = 0 then parse_json(cast(id as string)) else null end
+    ) v
+    from range(0, 10)"""
+    val df = spark.sql(query)
+    df.cache()
+
+    val expected = spark.sql(query)
+    checkAnswer(df, expected.collect())
+  }
+
+  test("variant in a cached column-based df") {
+    withTable("t") {
+      val query = """select named_struct(
+        'v', parse_json(format_string('{\"a\": %s}', id)),
+        'null_v', cast(null as variant),
+        'some_null', case when id % 2 = 0 then parse_json(cast(id as string)) else null end
+      ) v
+      from range(0, 10)"""
+      spark.sql(query).write.format("parquet").mode("overwrite").saveAsTable("t")
+      val df = spark.sql("select * from t")
+      df.cache()
+
+      val expected = spark.sql(query)
+      checkAnswer(df, expected.collect())
+    }
+  }
+
+  test("variant with many keys in a cached column-based df") {
+    withTable("t") {
+       // The initial size of the buffer backing a cached dataframe column is 128KB.
+       // See `ColumnBuilder`.
+      val numKeys = 128 * 1024
+      var keyIterator = (0 until numKeys).iterator
+      val entries = Array.fill(numKeys)(s"""\"${keyIterator.next()}\": \"test\"""")
+      val jsonStr = s"{${entries.mkString(", ")}}"
+      val query = s"""select named_struct(
+        'v', parse_json('$jsonStr'),
+        'null_v', cast(null as variant),
+        'some_null', case when id % 2 = 0 then parse_json(cast(id as string)) else null end
+      ) v
+      from range(0, 10)"""
+      spark.sql(query).write.format("parquet").mode("overwrite").saveAsTable("t")
+      val df = spark.sql("select * from t")
+      df.cache()
+
+      val expected = spark.sql(query)
+      checkAnswer(df, expected.collect())
+    }
+  }
+
   test("variant_get size") {
     val largeKey = "x" * 1000
     val df = Seq(s"""{ "$largeKey": {"a" : 1 },
@@ -681,4 +806,11 @@ class VariantSuite extends QueryTest with SharedSparkSession with ExpressionEval
     checkSize(structResult.getAs[VariantVal](0), 5, 10, 5, 10)
     checkSize(structResult.getAs[VariantVal](1), 2, 4, 2, 4)
   }
+
+  test("schema_of_variant(object)") {
+    for (expr <- Seq("schema_of_variant", "schema_of_variant_agg")) {
+      val q = s"""select $expr(parse_json('{"STRUCT": {"!special!": true}}'))"""
+      checkAnswer(sql(q), Row("""OBJECT<STRUCT: OBJECT<`!special!`: BOOLEAN>>"""))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala
new file mode 100644
index 0000000000000..ed66ddb1f0f44
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/VariantWriteShreddingSuite.scala
@@ -0,0 +1,218 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.CatalystTypeConverters
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.expressions.variant.VariantExpressionEvalUtils
+import org.apache.spark.sql.execution.datasources.parquet.SparkShreddingUtils
+import org.apache.spark.sql.types._
+import org.apache.spark.types.variant.Variant
+import org.apache.spark.unsafe.types.{UTF8String, VariantVal}
+
+class VariantWriteShreddingSuite extends SparkFunSuite with ExpressionEvalHelper {
+
+  private def parseJson(input: String): VariantVal =
+    VariantExpressionEvalUtils.parseJson(UTF8String.fromString(input))
+
+  private def toVariant(input: Expression): VariantVal = {
+    Cast(input, VariantType, true).eval().asInstanceOf[VariantVal]
+  }
+
+  private def untypedValue(input: String): Array[Byte] = {
+    val variantVal = parseJson(input)
+    variantVal.getValue
+  }
+
+  private def untypedValue(input: VariantVal): Array[Byte] = input.getValue
+
+  // Shreds variantVal with the requested schema, and verifies that the result is
+  // equal to `expected`.
+  private def testWithSchema(variantVal: VariantVal,
+                             schema: DataType, expected: Row): Unit = {
+    val shreddingSchema = SparkShreddingUtils.variantShreddingSchema(schema)
+    val variant = new Variant(variantVal.getValue, variantVal.getMetadata)
+    val variantSchema = SparkShreddingUtils.buildVariantSchema(shreddingSchema)
+    val actual = SparkShreddingUtils.castShredded(variant, variantSchema)
+
+    val catalystExpected = CatalystTypeConverters.convertToCatalyst(expected)
+    if (!checkResult(actual, catalystExpected, shreddingSchema, exprNullable = false)) {
+      fail(s"Incorrect evaluation of castShredded: " +
+        s"actual: $actual, " +
+        s"expected: $expected")
+    }
+  }
+
+  // Parse the provided JSON into a Variant, shred it to the provided schema, and verify the result.
+  private def testWithSchema(input: String, dataType: DataType, expected: Row): Unit = {
+    val variantVal = parseJson(input)
+    testWithSchema(variantVal, dataType, expected)
+  }
+
+  private val emptyMetadata: Array[Byte] = parseJson("null").getMetadata
+
+  test("shredding as fixed numeric types") {
+    /* Cast integer to any wider numeric type. */
+    testWithSchema("1", IntegerType, Row(emptyMetadata, null, 1))
+    testWithSchema("1", LongType, Row(emptyMetadata, null, 1))
+    testWithSchema("1", ShortType, Row(emptyMetadata, null, 1))
+    testWithSchema("1", ByteType, Row(emptyMetadata, null, 1))
+
+    // Invalid casts
+    Seq(StringType, DecimalType(5, 5), TimestampType, DateType, BooleanType, DoubleType, FloatType,
+      BinaryType, ArrayType(IntegerType),
+      StructType.fromDDL("a int, b int")).foreach { t =>
+      testWithSchema("1", t, Row(emptyMetadata, untypedValue("1"), null))
+    }
+
+    /* Test conversions between numeric types and scales. */
+    testWithSchema("1", DecimalType(5, 2), Row(emptyMetadata, null, Decimal("1")))
+    testWithSchema("1", DecimalType(38, 37), Row(emptyMetadata, null, Decimal("1")))
+    // Decimals that are effectively storing integers can also be cast to integer.
+    testWithSchema("1.0", IntegerType, Row(emptyMetadata, null, 1))
+    testWithSchema("1.0000000000000000000000000000000000000", IntegerType,
+      Row(emptyMetadata, null, 1))
+    // Don't overflow the integer type when converting from decimal.
+    testWithSchema("32767.0", ShortType, Row(emptyMetadata, null, 32767))
+    testWithSchema("32768.0", ShortType, Row(emptyMetadata, untypedValue("32768.0"), null))
+    // Don't overflow decimal type when converting from integer.
+    testWithSchema("99999", DecimalType(7, 2), Row(emptyMetadata, null, Decimal("99999.00")))
+    testWithSchema("100000", DecimalType(7, 2), Row(emptyMetadata, untypedValue("100000"), null))
+    // Allow scale to increase
+    testWithSchema("12.34", DecimalType(7, 4), Row(emptyMetadata, null, Decimal("12.3400")))
+    // Allow scale to decrease if there are trailing zeros
+    testWithSchema("12.3400", DecimalType(4, 2), Row(emptyMetadata, null, Decimal("12.34")))
+    testWithSchema("12.3410", DecimalType(4, 2), Row(emptyMetadata, untypedValue("12.3410"), null))
+
+    // The string 1 is not numeric
+    testWithSchema("\"1\"", IntegerType, Row(emptyMetadata, untypedValue("\"1\""), null))
+    // Decimal would lose information.
+    testWithSchema("1.1", IntegerType, Row(emptyMetadata, untypedValue("1.1"), null))
+    // Exponential notation is parsed as double, cannot be shredded to other numeric types.
+    testWithSchema("1e2", IntegerType, Row(emptyMetadata, untypedValue("1e2"), null))
+    // Null is not an integer
+    testWithSchema("null", IntegerType, Row(emptyMetadata, untypedValue("null"), null))
+
+    // Overflow leads to storing as unshredded.
+    testWithSchema("32767", ShortType, Row(emptyMetadata, null, 32767))
+    testWithSchema("32768", ShortType, Row(emptyMetadata, untypedValue("32768"), null))
+
+    testWithSchema("1e2", DoubleType, Row(emptyMetadata, null, 1e2))
+    // We currently don't allow shredding double as float.
+    testWithSchema("1e2", FloatType, Row(emptyMetadata, untypedValue("1e2"), null))
+  }
+
+  test("shredding as other scalar types") {
+    // Test types that aren't produced by parseJson
+    val floatV = toVariant(Literal(1.2f, FloatType))
+    testWithSchema(floatV, FloatType, Row(emptyMetadata, null, 1.2f))
+    testWithSchema(floatV, DoubleType, Row(emptyMetadata, untypedValue(floatV), null))
+
+    val booleanV = toVariant(Literal(true, BooleanType))
+    testWithSchema(booleanV, BooleanType, Row(emptyMetadata, null, true))
+    testWithSchema(booleanV, StringType, Row(emptyMetadata, untypedValue(booleanV), null))
+
+    val binaryV = toVariant(Literal(Array[Byte](-1, -2), BinaryType))
+    testWithSchema(binaryV, BinaryType, Row(emptyMetadata, null, Array[Byte](-1, -2)))
+    testWithSchema(binaryV, StringType, Row(emptyMetadata, untypedValue(binaryV), null))
+
+    val dateV = toVariant(Literal(0, DateType))
+    testWithSchema(dateV, DateType, Row(emptyMetadata, null, 0))
+    testWithSchema(dateV, TimestampType, Row(emptyMetadata, untypedValue(dateV), null))
+
+    val timestampV = toVariant(Literal(0L, TimestampType))
+    testWithSchema(timestampV, TimestampType, Row(emptyMetadata, null, 0))
+    testWithSchema(timestampV, TimestampNTZType, Row(emptyMetadata, untypedValue(timestampV), null))
+
+    val timestampNtzV = toVariant(Literal(0L, TimestampNTZType))
+    testWithSchema(timestampNtzV, TimestampNTZType, Row(emptyMetadata, null, 0))
+    testWithSchema(timestampNtzV, TimestampType,
+        Row(emptyMetadata, untypedValue(timestampNtzV), null))
+  }
+
+  test("shredding as object") {
+    val obj = parseJson("""{"a": 1, "b": "hello"}""")
+    // Can't be cast to scalar or array.
+    Seq(IntegerType, LongType, ShortType, ByteType, StringType, DecimalType(5, 5),
+        TimestampType, DateType, BooleanType, DoubleType, FloatType, BinaryType,
+        ArrayType(IntegerType)).foreach { t =>
+      testWithSchema(obj, t, Row(obj.getMetadata, untypedValue(obj), null))
+    }
+
+    // Happy path
+    testWithSchema(obj, StructType.fromDDL("a int, b string"),
+      Row(obj.getMetadata, null, Row(Row(null, 1), Row(null, "hello"))))
+    // Missing field.
+    testWithSchema(obj, StructType.fromDDL("a int, c string, b string"),
+      Row(obj.getMetadata, null, Row(Row(null, 1), Row(null, null), Row(null, "hello"))))
+    // "a" is not present in shredding schema.
+    testWithSchema(obj, StructType.fromDDL("b string, c string"),
+      Row(obj.getMetadata, untypedValue("""{"a": 1}"""), Row(Row(null, "hello"), Row(null, null))))
+    // "b" is not present in shredding schema. This case is a bit trickier, because the ID
+    // will be 1, not 0, since we'll use the original metadata dictionary that contains a and b.
+    // So we need to edit the variant value produced by parseJson.
+    val residual = untypedValue("""{"b": "hello"}""")
+    // First byte is the type, second is number of fields, and the third is the
+    // dictionary ID of the first field.
+    residual(2) = 1
+    testWithSchema(obj, StructType.fromDDL("a int, c string"),
+      Row(obj.getMetadata, residual, Row(Row(null, 1), Row(null, null))))
+    // "a" is the wrong type.
+    testWithSchema(obj, StructType.fromDDL("a string, b string"),
+      Row(obj.getMetadata, null, Row(Row(untypedValue("1"), null), Row(null, "hello"))))
+    // Not an object
+    testWithSchema(obj, ArrayType(StructType.fromDDL("a int, b string")),
+      Row(obj.getMetadata, untypedValue(obj), null))
+  }
+
+  test("shredding as array") {
+    val arr = parseJson("""[{"a": 1, "b": "hello"}, 2, null, 4]""")
+    // Can't be cast to scalar or object.
+    Seq(IntegerType, LongType, ShortType, ByteType, StringType, DecimalType(5, 5),
+      TimestampType, DateType, BooleanType, DoubleType, FloatType, BinaryType,
+      StructType.fromDDL("a int, b string")).foreach { t =>
+      testWithSchema(arr, t, Row(arr.getMetadata, untypedValue(arr), null))
+    }
+    // First element is shredded
+    testWithSchema(arr, ArrayType(StructType.fromDDL("a int, b string")),
+      Row(arr.getMetadata, null, Array(
+        Row(null, Row(Row(null, 1), Row(null, "hello"))),
+        Row(untypedValue("2"), null),
+        Row(untypedValue("null"), null),
+        Row(untypedValue("4"), null)
+      )))
+    // Second and fourth are shredded
+    testWithSchema(arr, ArrayType(LongType),
+      Row(arr.getMetadata, null, Array(
+        Row(untypedValue("""{"a": 1, "b": "hello"}"""), null),
+        Row(null, 2),
+        Row(untypedValue("null"), null),
+        Row(null, 4)
+      )))
+
+    // Fully shredded
+    testWithSchema("[1,2,3]", ArrayType(LongType),
+      Row(emptyMetadata, null, Array(
+        Row(null, 1),
+        Row(null, 2),
+        Row(null, 3)
+      )))
+  }
+
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
index f08466e8f8d9d..f2a86cbf54152 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/XPathFunctionsSuite.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.sql
 
+import org.apache.spark.sql.catalyst.expressions.IsNotNull
+import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -76,4 +78,38 @@ class XPathFunctionsSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df.select(xpath(col("xml"), lit("a/*/text()"))),
       Row(Seq("b1", "b2", "b3", "c1", "c2")))
   }
+
+  test("The replacement of `xpath*` functions should be NullIntolerant") {
+    def check(df: DataFrame, expected: Seq[Row]): Unit = {
+      val filter = df.queryExecution
+        .sparkPlan
+        .find(_.isInstanceOf[FilterExec])
+        .get.asInstanceOf[FilterExec]
+      assert(filter.condition.find(_.isInstanceOf[IsNotNull]).nonEmpty)
+      checkAnswer(df, expected)
+    }
+    withTable("t") {
+      sql("CREATE TABLE t AS SELECT * FROM VALUES ('<a><b>1</b></a>'), (NULL) T(xml)")
+      check(sql("SELECT * FROM t WHERE xpath_boolean(xml, 'a/b') = true"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_short(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_int(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_long(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_float(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_double(xml, 'a/b') = 1"),
+        Seq(Row("<a><b>1</b></a>")))
+      check(sql("SELECT * FROM t WHERE xpath_string(xml, 'a/b') = '1'"),
+        Seq(Row("<a><b>1</b></a>")))
+    }
+    withTable("t") {
+      sql("CREATE TABLE t AS SELECT * FROM VALUES " +
+        "('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>'), (NULL) T(xml)")
+      check(sql("SELECT * FROM t WHERE xpath(xml, 'a/b/text()') = array('b1', 'b2', 'b3')"),
+        Seq(Row("<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>")))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
index 4169d53e4fc8e..f9d003572a229 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala
@@ -126,7 +126,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDS().select(from_xml($"value", lit("ARRAY<int>"), Map[String, String]().asJava))
       },
-      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      condition = "INVALID_SCHEMA.NON_STRUCT_TYPE",
       parameters = Map(
         "inputSchema" -> "\"ARRAY<int>\"",
         "dataType" -> "\"ARRAY<INT>\""
@@ -138,7 +138,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         Seq("1").toDF("xml").selectExpr(s"from_xml(xml, 'ARRAY<int>')")
       },
-      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      condition = "INVALID_SCHEMA.NON_STRUCT_TYPE",
       parameters = Map(
         "inputSchema" -> "\"ARRAY<int>\"",
         "dataType" -> "\"ARRAY<INT>\""
@@ -285,7 +285,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("to_xml(a, named_struct('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      condition = "INVALID_OPTIONS.NON_MAP_FUNCTION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "to_xml(a, named_struct('a', 1))",
@@ -298,7 +298,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df2.selectExpr("to_xml(a, map('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      condition = "INVALID_OPTIONS.NON_STRING_TYPE",
       parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
       context = ExpectedContext(
         fragment = "to_xml(a, map('a', 1))",
@@ -350,7 +350,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_xml(value, 1)")
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"1\""),
       context = ExpectedContext(
         fragment = "from_xml(value, 1)",
@@ -362,7 +362,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("""from_xml(value, 'time InvalidType')""")
       },
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map(
         "error" -> "'InvalidType'",
@@ -378,7 +378,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_xml(value, 'time Timestamp', named_struct('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_MAP_FUNCTION",
+      condition = "INVALID_OPTIONS.NON_MAP_FUNCTION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "from_xml(value, 'time Timestamp', named_struct('a', 1))",
@@ -390,7 +390,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df3.selectExpr("from_xml(value, 'time Timestamp', map('a', 1))")
       },
-      errorClass = "INVALID_OPTIONS.NON_STRING_TYPE",
+      condition = "INVALID_OPTIONS.NON_STRING_TYPE",
       parameters = Map("mapType" -> "\"MAP<STRING, INT>\""),
       context = ExpectedContext(
         fragment = "from_xml(value, 'time Timestamp', map('a', 1))",
@@ -518,7 +518,7 @@ class XmlFunctionsSuite extends QueryTest with SharedSparkSession {
         Seq(("""<ROW><i>1</i></ROW>""", "i int")).toDF("xml", "schema")
           .select(from_xml($"xml", $"schema", options)).collect()
       },
-      errorClass = "INVALID_SCHEMA.NON_STRING_LITERAL",
+      condition = "INVALID_SCHEMA.NON_STRING_LITERAL",
       parameters = Map("inputSchema" -> "\"schema\""),
       context = ExpectedContext(fragment = "from_xml", getCurrentClassCallSitePattern)
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
index 5f68f235485eb..e935af8b8bf8c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/artifact/ArtifactManagerSuite.scala
@@ -18,14 +18,16 @@ package org.apache.spark.sql.artifact
 
 import java.io.File
 import java.nio.charset.StandardCharsets
-import java.nio.file.{Files, Paths}
+import java.nio.file.{Files, Path, Paths}
 
 import org.apache.commons.io.FileUtils
 
 import org.apache.spark.{SparkConf, SparkException}
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.DataTypes
 import org.apache.spark.storage.CacheId
 import org.apache.spark.util.Utils
 
@@ -34,6 +36,8 @@ class ArtifactManagerSuite extends SharedSparkSession {
   override protected def sparkConf: SparkConf = {
     val conf = super.sparkConf
     conf.set("spark.sql.artifact.copyFromLocalToFs.allowDestLocal", "true")
+    conf.set(SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED, true)
+    conf.set(SQLConf.ARTIFACTS_SESSION_ISOLATION_ALWAYS_APPLY_CLASSLOADER, true)
   }
 
   private val artifactPath = new File("src/test/resources/artifact-tests").toPath
@@ -293,4 +297,154 @@ class ArtifactManagerSuite extends SharedSparkSession {
     assert(!sessionDirectory.exists())
     assert(ArtifactManager.artifactRootDirectory.toFile.exists())
   }
+
+  test("Add artifact to local session - by path") {
+    val (fileName, binaryName) = ("Hello.class", "Hello")
+    testAddArtifactToLocalSession(fileName, binaryName) { classPath =>
+      spark.addArtifact(classPath.toString)
+      fileName
+    }
+  }
+
+  test("Add artifact to local session - by URI") {
+    val (fileName, binaryName) = ("Hello.class", "Hello")
+    testAddArtifactToLocalSession(fileName, binaryName) { classPath =>
+      spark.addArtifact(classPath.toUri)
+      fileName
+    }
+  }
+
+  test("Add artifact to local session - custom target path") {
+    val (fileName, binaryName) = ("HelloWithPackage.class", "my.custom.pkg.HelloWithPackage")
+    val filePath = "my/custom/pkg/HelloWithPackage.class"
+    testAddArtifactToLocalSession(fileName, binaryName) { classPath =>
+      spark.addArtifact(classPath.toString, filePath)
+      filePath
+    }
+  }
+
+  test("Add artifact to local session - in memory") {
+    val (fileName, binaryName) = ("HelloWithPackage.class", "my.custom.pkg.HelloWithPackage")
+    val filePath = "my/custom/pkg/HelloWithPackage.class"
+    testAddArtifactToLocalSession(fileName, binaryName) { classPath =>
+      val buffer = Files.readAllBytes(classPath)
+      spark.addArtifact(buffer, filePath)
+      filePath
+    }
+  }
+
+  test("Added artifact can be loaded by the current SparkSession") {
+    val buffer = Files.readAllBytes(artifactPath.resolve("IntSumUdf.class"))
+    spark.addArtifact(buffer, "IntSumUdf.class")
+
+    spark.udf.registerJava("intSum", "IntSumUdf", DataTypes.LongType)
+
+    val r = spark.range(5)
+      .withColumn("id2", col("id") + 1)
+      .selectExpr("intSum(id, id2)")
+      .collect()
+    assert(r.map(_.getLong(0)).toSeq == Seq(1, 3, 5, 7, 9))
+  }
+
+  private def testAddArtifactToLocalSession(
+      classFileToUse: String, binaryName: String)(addFunc: Path => String): Unit = {
+    val copyDir = Utils.createTempDir().toPath
+    FileUtils.copyDirectory(artifactPath.toFile, copyDir.toFile)
+    val classPath = copyDir.resolve(classFileToUse)
+    assert(classPath.toFile.exists())
+
+    val movedClassPath = addFunc(classPath)
+
+    val movedClassFile = ArtifactManager.artifactRootDirectory
+      .resolve(s"$sessionUUID/classes/$movedClassPath")
+      .toFile
+    assert(movedClassFile.exists())
+
+    val classLoader = artifactManager.classloader
+
+    val instance = classLoader
+      .loadClass(binaryName)
+      .getDeclaredConstructor(classOf[String])
+      .newInstance("Talon")
+
+    val msg = instance.getClass.getMethod("msg").invoke(instance)
+    assert(msg == "Hello Talon! Nice to meet you!")
+  }
+
+  test("Support Windows style paths") {
+    withTempPath { path =>
+      val stagingPath = path.toPath
+      Files.write(path.toPath, "test".getBytes(StandardCharsets.UTF_8))
+      val remotePath = Paths.get("windows\\abc.txt")
+      artifactManager.addArtifact(remotePath, stagingPath, None)
+      val file = ArtifactManager.artifactRootDirectory
+        .resolve(s"$sessionUUID/windows/abc.txt")
+        .toFile
+      assert(file.exists())
+    }
+  }
+
+  test("Cloning artifact manager will clone all artifacts") {
+    withTempPath { dir =>
+      val path = dir.toPath
+      // Setup artifact dir
+      FileUtils.copyDirectory(artifactPath.toFile, dir)
+      val randomFilePath = path.resolve("random_file")
+      val testBytes = "test".getBytes(StandardCharsets.UTF_8)
+      Files.write(randomFilePath, testBytes)
+
+      // Register multiple kinds of artifacts
+      artifactManager.addArtifact( // Class
+        Paths.get("classes/Hello.class"), path.resolve("Hello.class"), None)
+      artifactManager.addArtifact( // Python
+        Paths.get("pyfiles/abc.zip"), randomFilePath, None, deleteStagedFile = false)
+      artifactManager.addArtifact( // JAR
+        Paths.get("jars/udf_noA.jar"), path.resolve("udf_noA.jar"), None)
+      artifactManager.addArtifact( // Cached
+        Paths.get("cache/test"), randomFilePath, None)
+      assert(FileUtils.listFiles(artifactManager.artifactPath.toFile, null, true).size() === 3)
+
+      // Clone the artifact manager
+      val newSession = spark.cloneSession()
+      val newArtifactManager = newSession.artifactManager
+      assert(newArtifactManager !== artifactManager)
+      assert(newArtifactManager.artifactPath !== artifactManager.artifactPath)
+
+      // Load the cached artifact
+      val blockManager = newSession.sparkContext.env.blockManager
+      for (sessionId <- Seq(spark.sessionUUID, newSession.sessionUUID)) {
+        val cacheId = CacheId(sessionId, "test")
+        try {
+          assert(blockManager.getLocalBytes(cacheId).get.toByteBuffer().array() === testBytes)
+        } finally {
+          blockManager.releaseLock(cacheId)
+        }
+      }
+
+      val allFiles = FileUtils.listFiles(newArtifactManager.artifactPath.toFile, null, true)
+      assert(allFiles.size() === 3)
+      allFiles.forEach { file =>
+        assert(!file.getCanonicalPath.contains(spark.sessionUUID))
+        assert(file.getCanonicalPath.contains(newSession.sessionUUID))
+        val originalFile = Paths.get(file.getCanonicalPath.replace(
+          newSession.sessionUUID, spark.sessionUUID))
+        assert(Files.exists(originalFile))
+        assert(Files.readAllBytes(originalFile) === Files.readAllBytes(file.toPath))
+      }
+      assert(artifactManager.getPythonIncludes === newArtifactManager.getPythonIncludes)
+      assert(
+        artifactManager.getAddedJars.map(_.toString.replace(spark.sessionUUID, "")) ===
+          newArtifactManager.getAddedJars.map(_.toString.replace(newSession.sessionUUID, "")))
+
+      // Try load class from the cloned artifact manager
+      val instance = newArtifactManager
+        .classloader
+        .loadClass("Hello")
+        .getDeclaredConstructor(classOf[String])
+        .newInstance("Talon")
+
+      val msg = instance.getClass.getMethod("msg").invoke(instance)
+      assert(msg == "Hello Talon! Nice to meet you!")
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/catalyst/expressions/ValidateExternalTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/catalyst/expressions/ValidateExternalTypeSuite.scala
new file mode 100644
index 0000000000000..6e54c2a1942ef
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/catalyst/expressions/ValidateExternalTypeSuite.scala
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.expressions
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{StringType, StructType}
+
+class ValidateExternalTypeSuite extends QueryTest with SharedSparkSession {
+  test("SPARK-49044 ValidateExternalType should be user visible") {
+    checkError(
+      exception = intercept[SparkRuntimeException] {
+        spark.createDataFrame(spark.sparkContext.parallelize(Seq(
+          Row(
+            "".toCharArray.map(_.toByte)
+          )
+        )), new StructType().add("f3", StringType)).show()
+      }.getCause.asInstanceOf[SparkRuntimeException],
+      condition = "INVALID_EXTERNAL_TYPE",
+      parameters = Map(
+        ("externalType", "[B"),
+        ("type", "\"STRING\""),
+        ("expr", "\"getexternalrowfield(assertnotnull(" +
+          "input[0, org.apache.spark.sql.Row, true]), 0, f3)\"")
+      )
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
index ab8e82162ce10..9b54fe4bb052c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollatedFilterPushDownToParquetSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.{DataFrame, QueryTest}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFilters, SparkToParquetSchemaConverter}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
@@ -231,7 +231,7 @@ class CollatedFilterPushDownToParquetV1Suite extends CollatedFilterPushDownToPar
     var maybeRelation: Option[HadoopFsRelation] = None
     val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
       case PhysicalOperation(_, filters,
-          LogicalRelation(relation: HadoopFsRelation, _, _, _)) =>
+          LogicalRelationWithTable(relation: HadoopFsRelation, _)) =>
         maybeRelation = Some(relation)
         filters
     }.flatten
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSQLFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSQLFunctionsSuite.scala
new file mode 100644
index 0000000000000..83ec8c8d1bafb
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationSQLFunctionsSuite.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.collation
+
+import org.apache.spark.sql.{Column, Dataset, QueryTest}
+import org.apache.spark.sql.functions.{from_json, from_xml}
+import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types._
+
+class CollationSQLFunctionsSuite extends QueryTest with SharedSparkSession {
+
+  test("SPARK-50214: from_json and from_xml work correctly with session collation") {
+    import testImplicits._
+
+    def checkSchema(
+        dataset: Dataset[String],
+        transformation: Column,
+        expectedSchema: StructType): Unit = {
+      val transformedSchema = dataset.select(transformation.as("result")).schema
+      assert(transformedSchema === expectedSchema)
+    }
+
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> "UNICODE_CI_AI") {
+      Seq(
+        StringType,
+        StringType("UTF8_BINARY"),
+        StringType("UNICODE"),
+        StringType("UNICODE_CI_AI")).foreach { stringType =>
+        val dataSchema = StructType(Seq(StructField("fieldName", stringType)))
+        val expectedSchema = StructType(Seq(StructField("result", dataSchema)))
+
+        // JSON Test
+        val jsonData = Seq("""{"fieldName": "fieldValue"}""")
+        val jsonDataset = spark.createDataset(jsonData)
+        checkSchema(jsonDataset, from_json($"value", dataSchema), expectedSchema)
+
+        // XML Test
+        val xmlData = Seq("<root><fieldName>fieldValue</fieldName></root>")
+        val xmlDataset = spark.createDataset(xmlData)
+        checkSchema(xmlDataset, from_xml($"value", dataSchema), expectedSchema)
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala
new file mode 100644
index 0000000000000..4a904a85e0a7b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/CollationTypePrecedenceSuite.scala
@@ -0,0 +1,420 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.collation
+
+import org.apache.spark.SparkThrowable
+import org.apache.spark.sql.{DataFrame, QueryTest, Row}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class CollationTypePrecedenceSuite extends QueryTest with SharedSparkSession {
+
+  val dataSource: String = "parquet"
+
+  private def assertThrowsError(df: => DataFrame, errorClass: String): Unit = {
+    val exception = intercept[SparkThrowable] {
+      df
+    }
+    assert(exception.getCondition === errorClass)
+  }
+
+  private def assertExplicitMismatch(df: => DataFrame): Unit =
+    assertThrowsError(df, "COLLATION_MISMATCH.EXPLICIT")
+
+  private def assertImplicitMismatch(df: => DataFrame): Unit =
+    assertThrowsError(df, "COLLATION_MISMATCH.IMPLICIT")
+
+  test("explicit collation propagates up") {
+    checkAnswer(
+      sql(s"SELECT COLLATION('a' collate unicode)"),
+      Row("UNICODE"))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION('a' collate unicode || 'b')"),
+      Row("UNICODE"))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(SUBSTRING('a' collate unicode, 0, 1))"),
+      Row("UNICODE"))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(SUBSTRING('a' collate unicode, 0, 1) || 'b')"),
+      Row("UNICODE"))
+
+    assertExplicitMismatch(
+      sql(s"SELECT COLLATION('a' collate unicode || 'b' collate utf8_lcase)"))
+
+    assertExplicitMismatch(
+      sql(s"""
+           |SELECT COLLATION(
+           |  SUBSTRING('a' collate unicode, 0, 1) ||
+           |  SUBSTRING('b' collate utf8_lcase, 0, 1))
+           |""".stripMargin))
+  }
+
+  test("implicit collation in columns") {
+    val tableName = "implicit_coll_tbl"
+    val c1Collation = "UNICODE"
+    val c2Collation = "UNICODE_CI"
+    val structCollation = "UTF8_LCASE"
+    withTable(tableName) {
+      sql(s"""
+           |CREATE TABLE $tableName (
+           |  c1 STRING COLLATE $c1Collation,
+           |  c2 STRING COLLATE $c2Collation,
+           |  c3 STRUCT<col1: STRING COLLATE $structCollation>)
+           |""".stripMargin)
+      sql(s"INSERT INTO $tableName VALUES ('a', 'b', named_struct('col1', 'c'))")
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || 'a') FROM $tableName"),
+        Seq(Row(c1Collation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c3.col1 || 'a') FROM $tableName"),
+        Seq(Row(structCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(SUBSTRING(c1, 0, 1) || 'a') FROM $tableName"),
+        Seq(Row(c1Collation)))
+
+      assertImplicitMismatch(sql(s"SELECT COLLATION(c1 || c2) FROM $tableName"))
+      assertImplicitMismatch(sql(s"SELECT COLLATION(c1 || c3.col1) FROM $tableName"))
+      assertImplicitMismatch(
+        sql(s"SELECT COLLATION(SUBSTRING(c1, 0, 1) || c2) FROM $tableName"))
+    }
+  }
+
+  test("variables have implicit collation") {
+    val v1Collation = "UTF8_BINARY"
+    val v2Collation = "UTF8_LCASE"
+    sql(s"DECLARE v1 = 'a'")
+    sql(s"DECLARE v2 = 'b' collate $v2Collation")
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(v1 || 'a')"),
+      Row(v1Collation))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(v2 || 'a')"),
+      Row(v2Collation))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(v2 || 'a' COLLATE UTF8_BINARY)"),
+      Row("UTF8_BINARY"))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(SUBSTRING(v2, 0, 1) || 'a')"),
+      Row(v2Collation))
+
+    assertImplicitMismatch(sql(s"SELECT COLLATION(v1 || v2)"))
+    assertImplicitMismatch(sql(s"SELECT COLLATION(SUBSTRING(v1, 0, 1) || v2)"))
+  }
+
+  test("subqueries have implicit collation strength") {
+    withTable("t") {
+      sql(s"CREATE TABLE t (c STRING COLLATE UTF8_LCASE) USING $dataSource")
+
+      sql(s"SELECT (SELECT 'text' COLLATE UTF8_BINARY) || c collate UTF8_BINARY from t")
+      assertImplicitMismatch(
+        sql(s"SELECT (SELECT 'text' COLLATE UTF8_BINARY) || c from t"))
+    }
+
+    // Simple subquery with explicit collation
+    checkAnswer(
+      sql(s"SELECT COLLATION((SELECT 'text' COLLATE UTF8_BINARY) || 'suffix')"),
+      Row("UTF8_BINARY")
+    )
+
+    checkAnswer(
+      sql(s"SELECT COLLATION((SELECT 'text' COLLATE UTF8_LCASE) || 'suffix')"),
+      Row("UTF8_LCASE")
+    )
+
+    // Nested subquery should retain the collation of the deepest expression
+    checkAnswer(
+      sql(s"SELECT COLLATION((SELECT (SELECT 'inner' COLLATE UTF8_LCASE) || 'outer'))"),
+      Row("UTF8_LCASE")
+    )
+
+    checkAnswer(
+      sql(s"SELECT COLLATION((SELECT (SELECT 'inner' COLLATE UTF8_BINARY) || 'outer'))"),
+      Row("UTF8_BINARY")
+    )
+
+    // Subqueries with mixed collations should follow collation precedence rules
+    checkAnswer(
+      sql(s"SELECT COLLATION((SELECT 'string1' COLLATE UTF8_LCASE || " +
+        s"(SELECT 'string2' COLLATE UTF8_BINARY)))"),
+      Row("UTF8_LCASE")
+    )
+  }
+
+  test("struct test") {
+    val tableName = "struct_tbl"
+    val c1Collation = "UNICODE_CI"
+    val c2Collation = "UNICODE"
+    withTable(tableName) {
+      sql(s"""
+           |CREATE TABLE $tableName (
+           |  c1 STRUCT<col1: STRING COLLATE $c1Collation>,
+           |  c2 STRUCT<col1: STRUCT<col1: STRING COLLATE $c2Collation>>)
+           |USING $dataSource
+           |""".stripMargin)
+      sql(s"INSERT INTO $tableName VALUES (named_struct('col1', 'a')," +
+        s"named_struct('col1', named_struct('col1', 'c')))")
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c2.col1.col1 || 'a') FROM $tableName"),
+        Seq(Row(c2Collation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1.col1 || 'a') FROM $tableName"),
+        Seq(Row(c1Collation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1.col1 || 'a' collate UNICODE) FROM $tableName"),
+        Seq(Row("UNICODE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(struct('a').col1 || 'a' collate UNICODE) FROM $tableName"),
+        Seq(Row("UNICODE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(struct('a' collate UNICODE).col1 || 'a') FROM $tableName"),
+        Seq(Row("UNICODE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(struct('a').col1 collate UNICODE || 'a' collate UNICODE) " +
+          s"FROM $tableName"),
+        Seq(Row("UNICODE")))
+
+      assertExplicitMismatch(
+        sql(s"SELECT COLLATION(struct('a').col1 collate UNICODE || 'a' collate UTF8_LCASE) " +
+          s"FROM $tableName"))
+
+      assertExplicitMismatch(
+        sql(s"SELECT COLLATION(struct('a' collate UNICODE).col1 || 'a' collate UTF8_LCASE) " +
+          s"FROM $tableName"))
+    }
+  }
+
+  test("array test") {
+    val tableName = "array_tbl"
+    val columnCollation = "UNICODE"
+    val arrayCollation = "UNICODE_CI"
+    withTable(tableName) {
+      sql(s"""
+           |CREATE TABLE $tableName (
+           |  c1 STRING COLLATE $columnCollation,
+           |  c2 ARRAY<STRING COLLATE $arrayCollation>)
+           |USING $dataSource
+           |""".stripMargin)
+
+      sql(s"INSERT INTO $tableName VALUES ('a', array('b', 'c'))")
+
+      checkAnswer(
+        sql(s"SELECT collation(element_at(array('a', 'b' collate utf8_lcase), 1))"),
+        Seq(Row("UTF8_LCASE")))
+
+      assertExplicitMismatch(
+        sql(s"SELECT collation(element_at(array('a' collate unicode, 'b' collate utf8_lcase), 1))")
+      )
+
+      checkAnswer(
+        sql(s"SELECT collation(element_at(array('a', 'b' collate utf8_lcase), 1) || c1)" +
+          s"from $tableName"),
+        Seq(Row("UTF8_LCASE")))
+
+      checkAnswer(
+        sql(s"SELECT collation(element_at(array_append(c2, 'd'), 1)) FROM $tableName"),
+        Seq(Row(arrayCollation))
+      )
+
+      checkAnswer(
+        sql(s"SELECT collation(element_at(array_append(c2, 'd' collate utf8_lcase), 1))" +
+          s"FROM $tableName"),
+        Seq(Row("UTF8_LCASE"))
+      )
+    }
+  }
+
+  test("array cast") {
+    val tableName = "array_cast_tbl"
+    val columnCollation = "UNICODE"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (c1 ARRAY<STRING COLLATE $columnCollation>) USING $dataSource")
+      sql(s"INSERT INTO $tableName VALUES (array('a'))")
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1[0]) FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(cast(c1 AS ARRAY<STRING>)[0]) FROM $tableName"),
+        Seq(Row("UTF8_BINARY")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(cast(c1 AS ARRAY<STRING COLLATE UTF8_LCASE>)[0]) FROM $tableName"),
+        Seq(Row("UTF8_LCASE")))
+    }
+  }
+
+  test("user defined cast") {
+    val tableName = "dflt_coll_tbl"
+    val columnCollation = "UNICODE"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (c1 STRING COLLATE $columnCollation) USING $dataSource")
+      sql(s"INSERT INTO $tableName VALUES ('a')")
+
+      // only for non string inputs cast results in default collation
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || CAST(to_char(DATE'2016-04-08', 'y') AS STRING)) " +
+          s"FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(CAST(to_char(DATE'2016-04-08', 'y') AS STRING)) " +
+          s"FROM $tableName"),
+        Seq(Row("UTF8_BINARY")))
+
+      // for string inputs collation is of the child expression
+      checkAnswer(
+        sql(s"SELECT COLLATION(CAST('a' AS STRING)) FROM $tableName"),
+        Seq(Row("UTF8_BINARY")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(CAST(c1 AS STRING)) FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(CAST(c1 collate UTF8_LCASE AS STRING)) FROM $tableName"),
+        Seq(Row("UTF8_LCASE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || CAST('a' AS STRING)) FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || CAST('a' collate UTF8_LCASE AS STRING)) FROM $tableName"),
+        Seq(Row("UTF8_LCASE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || CAST(c1 AS STRING)) FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || SUBSTRING(CAST(c1 AS STRING), 0, 1)) FROM $tableName"),
+        Seq(Row(columnCollation)))
+      }
+  }
+
+  test("str fns without params have default strength") {
+    val tableName = "str_fns_tbl"
+    val columnCollation = "UNICODE"
+    withTable(tableName) {
+      sql(s"CREATE TABLE $tableName (c1 STRING COLLATE $columnCollation) USING $dataSource")
+      sql(s"INSERT INTO $tableName VALUES ('a')")
+
+      checkAnswer(
+        sql(s"SELECT COLLATION('a' collate utf8_lcase || current_database()) FROM $tableName"),
+        Seq(Row("UTF8_LCASE")))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION(c1 || current_database()) FROM $tableName"),
+        Seq(Row(columnCollation)))
+
+      checkAnswer(
+        sql(s"SELECT COLLATION('a' || current_database()) FROM $tableName"),
+        Seq(Row("UTF8_BINARY")))
+    }
+  }
+
+  test("functions that contain both string and non string params") {
+    checkAnswer(
+      sql(s"SELECT COLLATION(elt(2, 'a', 'b'))"),
+      Row("UTF8_BINARY"))
+
+    checkAnswer(
+      sql(s"SELECT COLLATION(elt(2, 'a' collate UTF8_LCASE, 'b'))"),
+      Row("UTF8_LCASE"))
+
+    assertExplicitMismatch(
+      sql(s"SELECT COLLATION(elt(2, 'a' collate UTF8_LCASE, 'b' collate UNICODE))"))
+  }
+
+  test("named_struct names and values") {
+    checkAnswer(
+      sql(s"SELECT named_struct('name1', 'value1', 'name2', 'value2')"),
+      Row(Row("value1", "value2")))
+
+    checkAnswer(
+      sql(s"SELECT named_struct" +
+        s"('name1' collate unicode, 'value1', 'name2' collate unicode, 'value2')"),
+      Row(Row("value1", "value2")))
+
+    checkAnswer(
+      sql(s"SELECT named_struct" +
+        s"('name1', 'value1' collate unicode, 'name2', 'value2' collate unicode)"),
+      Row(Row("value1", "value2")))
+
+    checkAnswer(
+      sql(s"SELECT named_struct('name1' collate utf8_lcase, 'value1' collate unicode," +
+        s"'name2' collate utf8_lcase, 'value2' collate unicode)"),
+      Row(Row("value1", "value2")))
+
+    assertExplicitMismatch(
+      sql(s"SELECT named_struct" +
+        s"('name1' collate unicode, 'value1', 'name2' collate utf8_lcase, 'value2')"))
+
+    assertExplicitMismatch(
+      sql(s"SELECT named_struct" +
+        s"('name1', 'value1' collate unicode, 'name2', 'value2' collate utf8_lcase)"))
+  }
+
+  test("access collated map via literal") {
+    val tableName = "map_with_lit"
+
+    def selectQuery(condition: String): DataFrame =
+      sql(s"SELECT c1 FROM $tableName WHERE $condition = 'B'")
+
+    withTable(tableName) {
+      sql(s"""
+           |CREATE TABLE $tableName (
+           |  c1 MAP<STRING COLLATE UNICODE_CI, STRING COLLATE UNICODE_CI>,
+           |  c2 STRING
+           |) USING $dataSource
+           |""".stripMargin)
+
+      sql(s"INSERT INTO $tableName VALUES (map('a', 'b'), 'a')")
+
+      Seq("c1['A']",
+        "c1['A' COLLATE UNICODE_CI]",
+        "c1[c2 COLLATE UNICODE_CI]").foreach { condition =>
+        checkAnswer(selectQuery(condition), Seq(Row(Map("a" -> "b"))))
+      }
+
+      Seq(
+        // different explicit collation
+        "c1['A' COLLATE UNICODE]",
+        // different implicit collation
+        "c1[c2]").foreach { condition =>
+        assertThrowsError(selectQuery(condition), "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE")
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
new file mode 100644
index 0000000000000..0de638d4e9bf9
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/collation/DefaultCollationTestSuite.scala
@@ -0,0 +1,490 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.collation
+
+import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row}
+import org.apache.spark.sql.connector.DatasourceV2SQLBase
+import org.apache.spark.sql.internal.SqlApiConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.StringType
+
+abstract class DefaultCollationTestSuite extends QueryTest with SharedSparkSession {
+
+  def dataSource: String = "parquet"
+  def testTable: String = "test_tbl"
+  def testView: String = "test_view"
+
+  def withSessionCollationAndTable(collation: String, testTables: String*)(f: => Unit): Unit = {
+    withTable(testTables: _*) {
+      withSessionCollation(collation) {
+        f
+      }
+    }
+  }
+
+  def withSessionCollationAndView(collation: String, viewNames: String*)(f: => Unit): Unit = {
+    withView(viewNames: _*) {
+      withSessionCollation(collation) {
+        f
+      }
+    }
+  }
+
+  def withSessionCollation(collation: String)(f: => Unit): Unit = {
+    withSQLConf(SqlApiConf.DEFAULT_COLLATION -> collation) {
+      f
+    }
+  }
+
+  def assertTableColumnCollation(
+      table: String,
+      column: String,
+      expectedCollation: String): Unit = {
+    val colType = spark.table(table).schema(column).dataType
+    assert(colType === StringType(expectedCollation))
+  }
+
+  def assertThrowsImplicitMismatch(f: => DataFrame): Unit = {
+    val exception = intercept[AnalysisException] {
+      f
+    }
+    assert(exception.getCondition === "COLLATION_MISMATCH.IMPLICIT")
+  }
+
+  // region DDL tests
+
+  test("create/alter table") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      // create table with implicit collation
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+
+      // alter table add column with implicit collation
+      sql(s"ALTER TABLE $testTable ADD COLUMN c2 STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+
+      sql(s"ALTER TABLE $testTable ALTER COLUMN c2 TYPE STRING COLLATE UNICODE")
+      assertTableColumnCollation(testTable, "c2", "UNICODE")
+
+      sql(s"ALTER TABLE $testTable ALTER COLUMN c2 TYPE STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+    }
+  }
+
+  test("create table with explicit collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_LCASE")
+    }
+
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UNICODE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UNICODE")
+    }
+  }
+
+  test("create table as select") {
+    // literals in select do not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS SELECT
+           |  'a' AS c1,
+           |  'a' || 'a' AS c2,
+           |  SUBSTRING('a', 1, 1) AS c3,
+           |  SUBSTRING(SUBSTRING('ab', 1, 1), 1, 1) AS c4,
+           |  'a' = 'A' AS truthy
+           |""".stripMargin)
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c3", "UTF8_BINARY")
+      assertTableColumnCollation(testTable, "c4", "UTF8_BINARY")
+
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE truthy"), Seq(Row(0)))
+    }
+
+    // literals in inline table do not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT c1, c1 = 'A' as c2 FROM VALUES ('a'), ('A') AS vals(c1)
+           |""".stripMargin)
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(1)))
+    }
+
+    // cast in select does not pick up session collation
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable USING $dataSource AS SELECT cast('a' AS STRING) AS c1")
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+    }
+  }
+
+  test("ctas with complex types") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT
+           |  struct('a') AS c1,
+           |  map('a', 'b') AS c2,
+           |  array('a') AS c3
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT COLLATION(c1.col1) FROM $testTable"), Seq(Row("UTF8_BINARY")))
+      checkAnswer(sql(s"SELECT COLLATION(c2['a']) FROM $testTable"), Seq(Row("UTF8_BINARY")))
+      checkAnswer(sql(s"SELECT COLLATION(c3[0]) FROM $testTable"), Seq(Row("UTF8_BINARY")))
+    }
+  }
+
+  test("ctas with union") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT 'a' = 'A' AS c1
+           |UNION
+           |SELECT 'b' = 'B' AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row(false)))
+    }
+
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable USING $dataSource AS
+           |SELECT 'a' = 'A' AS c1
+           |UNION ALL
+           |SELECT 'b' = 'B' AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row(false), Row(false)))
+    }
+  }
+
+  test("add column") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      assertTableColumnCollation(testTable, "c1", "UTF8_LCASE")
+
+      sql(s"ALTER TABLE $testTable ADD COLUMN c2 STRING")
+      assertTableColumnCollation(testTable, "c2", "UTF8_BINARY")
+
+      sql(s"ALTER TABLE $testTable ADD COLUMN c3 STRING COLLATE UNICODE")
+      assertTableColumnCollation(testTable, "c3", "UNICODE")
+    }
+  }
+
+  test("inline table in CTAS") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable
+           |USING $dataSource
+           |AS SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |AS inline_table(c1, c2);
+           |""".stripMargin)
+
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(0)))
+    }
+  }
+
+  test("subsequent analyzer iterations correctly resolve default string types") {
+    // since concat coercion happens after resolving default types this test
+    // makes sure that we are correctly resolving the default string types
+    // in subsequent analyzer iterations
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+           |CREATE TABLE $testTable
+           |USING $dataSource AS
+           |SELECT CONCAT(X'68656C6C6F', 'world') AS c1
+           |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT c1 FROM $testTable"), Seq(Row("helloworld")))
+    }
+
+    // ELT is similar
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"""
+             |CREATE TABLE $testTable
+             |USING $dataSource AS
+             |SELECT ELT(1, X'68656C6C6F', 'world') AS c1
+             |""".stripMargin)
+
+      checkAnswer(sql(s"SELECT c1 FROM $testTable"), Seq(Row("hello")))
+    }
+  }
+
+  // endregion
+
+  // region DML tests
+
+  test("literals with default collation") {
+    val sessionCollation = "UTF8_LCASE"
+    withSessionCollation(sessionCollation) {
+
+      // literal without collation
+      checkAnswer(sql("SELECT COLLATION('a')"), Seq(Row(sessionCollation)))
+
+      checkAnswer(sql("SELECT COLLATION(map('a', 'b')['a'])"), Seq(Row(sessionCollation)))
+
+      checkAnswer(sql("SELECT COLLATION(array('a')[0])"), Seq(Row(sessionCollation)))
+
+      checkAnswer(sql("SELECT COLLATION(struct('a' as c)['c'])"), Seq(Row(sessionCollation)))
+    }
+  }
+
+  test("literals with explicit collation") {
+    withSessionCollation("UTF8_LCASE") {
+      checkAnswer(sql("SELECT COLLATION('a' collate unicode)"), Seq(Row("UNICODE")))
+
+      checkAnswer(
+        sql("SELECT COLLATION(map('a', 'b' collate unicode)['a'])"),
+        Seq(Row("UNICODE")))
+
+      checkAnswer(sql("SELECT COLLATION(array('a' collate unicode)[0])"), Seq(Row("UNICODE")))
+
+      checkAnswer(
+        sql("SELECT COLLATION(struct('a' collate unicode as c)['c'])"),
+        Seq(Row("UNICODE")))
+    }
+  }
+
+  test("cast is aware of session collation") {
+    val sessionCollation = "UTF8_LCASE"
+    withSessionCollation(sessionCollation) {
+      checkAnswer(sql("SELECT COLLATION(cast('a' as STRING))"), Seq(Row(sessionCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(map('a', 'b') as MAP<STRING, STRING>)['a'])"),
+        Seq(Row(sessionCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(map_keys(cast(map('a', 'b') as MAP<STRING, STRING>))[0])"),
+        Seq(Row(sessionCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(array('a') as ARRAY<STRING>)[0])"),
+        Seq(Row(sessionCollation)))
+
+      checkAnswer(
+        sql("SELECT COLLATION(cast(struct('a' as c) as STRUCT<c: STRING>)['c'])"),
+        Seq(Row(sessionCollation)))
+    }
+  }
+
+  test("expressions in where are aware of session collation") {
+    withSessionCollation("UTF8_LCASE") {
+      // expression in where is aware of session collation
+      checkAnswer(sql("SELECT 1 WHERE 'a' = 'A'"), Seq(Row(1)))
+
+      checkAnswer(sql("SELECT 1 WHERE 'a' = cast('A' as STRING)"), Seq(Row(1)))
+    }
+  }
+
+  test("having group by is aware of session collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a'), ('A')")
+
+      // having clause uses session (default) collation
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $testTable GROUP BY c1 HAVING 'a' = 'A'"),
+        Seq(Row(1), Row(1)))
+
+      // having clause uses column (implicit) collation
+      checkAnswer(
+        sql(s"SELECT COUNT(*) FROM $testTable GROUP BY c1 HAVING c1 = 'A'"),
+        Seq(Row(1)))
+    }
+  }
+
+  test("min/max are aware of session collation") {
+    // scalastyle:off nonascii
+    withSessionCollationAndTable("UNICODE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('1'), ('½')")
+
+      checkAnswer(sql(s"SELECT MIN(c1) FROM $testTable"), Seq(Row("1")))
+
+      checkAnswer(sql(s"SELECT MAX(c1) FROM $testTable"), Seq(Row("½")))
+    }
+    // scalastyle:on nonascii
+  }
+
+  test("union operation with subqueries") {
+    withSessionCollation("UTF8_LCASE") {
+      checkAnswer(
+        sql(s"""
+             |SELECT 'a' = 'A'
+             |UNION
+             |SELECT 'b' = 'B'
+             |""".stripMargin),
+        Seq(Row(true)))
+
+      checkAnswer(
+        sql(s"""
+             |SELECT 'a' = 'A'
+             |UNION ALL
+             |SELECT 'b' = 'B'
+             |""".stripMargin),
+        Seq(Row(true), Row(true)))
+    }
+  }
+
+  test("inline table in SELECT") {
+    withSessionCollation("UTF8_LCASE") {
+      val df = s"""
+           |SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |""".stripMargin
+
+      checkAnswer(sql(df), Seq(Row("a", true)))
+    }
+  }
+
+  test("inline table in insert") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 BOOLEAN) USING $dataSource")
+
+      sql(s"INSERT INTO $testTable VALUES ('a', 'a' = 'A')")
+      checkAnswer(sql(s"SELECT * FROM $testTable"), Seq(Row("a", true)))
+    }
+  }
+
+  test("literals in insert inherit session level collation") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 BOOLEAN) USING $dataSource")
+
+      sql(s"INSERT INTO $testTable VALUES ('a' = 'A')")
+      sql(s"INSERT INTO $testTable VALUES (array_contains(array('a'), 'A'))")
+      sql(s"INSERT INTO $testTable VALUES (CONCAT(X'68656C6C6F', 'world') = 'HELLOWORLD')")
+
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c1"), Seq(Row(3)))
+    }
+  }
+
+  // endregion
+}
+
+class DefaultCollationTestSuiteV1 extends DefaultCollationTestSuite {
+
+  test("create/alter view created from a table") {
+    val sessionCollation = "UTF8_LCASE"
+    withSessionCollationAndTable(sessionCollation, testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 STRING COLLATE UNICODE_CI) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a', 'a'), ('A', 'A')")
+
+      withView(testView) {
+        sql(s"CREATE VIEW $testView AS SELECT * FROM $testTable")
+
+        assertTableColumnCollation(testView, "c1", "UTF8_BINARY")
+        assertTableColumnCollation(testView, "c2", "UNICODE_CI")
+        checkAnswer(
+          sql(s"SELECT DISTINCT COLLATION(c1), COLLATION('a') FROM $testView"),
+          Row("UTF8_BINARY", sessionCollation))
+
+        // filter should use session collation
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE 'a' = 'A'"), Row(2))
+
+        // filter should use column collation
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Row(1))
+
+        checkAnswer(
+          sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = substring('A', 0, 1)"),
+          Row(1))
+
+        // literal with explicit collation wins
+        checkAnswer(
+          sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A' collate UNICODE_CI"),
+          Row(2))
+
+        // two implicit collations -> errors out
+        assertThrowsImplicitMismatch(sql(s"SELECT c1 = c2 FROM $testView"))
+
+        sql(s"ALTER VIEW $testView AS SELECT c1 COLLATE UNICODE_CI AS c1, c2 FROM $testTable")
+        assertTableColumnCollation(testView, "c1", "UNICODE_CI")
+        assertTableColumnCollation(testView, "c2", "UNICODE_CI")
+        checkAnswer(
+          sql(s"SELECT DISTINCT COLLATION(c1), COLLATION('a') FROM $testView"),
+          Row("UNICODE_CI", sessionCollation))
+
+        // after alter both rows should be returned
+        checkAnswer(sql(s"SELECT COUNT(*) FROM $testView WHERE c1 = 'A'"), Row(2))
+      }
+    }
+  }
+
+  test("join view with table") {
+    val viewTableName = "view_table"
+    val joinTableName = "join_table"
+    val sessionCollation = "sr"
+
+    withSessionCollationAndTable(sessionCollation, viewTableName, joinTableName) {
+      sql(s"CREATE TABLE $viewTableName (c1 STRING COLLATE UNICODE_CI) USING $dataSource")
+      sql(s"CREATE TABLE $joinTableName (c1 STRING COLLATE UTF8_LCASE) USING $dataSource")
+      sql(s"INSERT INTO $viewTableName VALUES ('a')")
+      sql(s"INSERT INTO $joinTableName VALUES ('A')")
+
+      withView(testView) {
+        sql(s"CREATE VIEW $testView AS SELECT * FROM $viewTableName")
+
+        assertThrowsImplicitMismatch(
+          sql(s"SELECT * FROM $testView JOIN $joinTableName ON $testView.c1 = $joinTableName.c1"))
+
+        checkAnswer(
+          sql(s"""
+                 |SELECT COLLATION($testView.c1), COLLATION($joinTableName.c1)
+                 |FROM $testView JOIN $joinTableName
+                 |ON $testView.c1 = $joinTableName.c1 COLLATE UNICODE_CI
+                 |""".stripMargin),
+          Row("UNICODE_CI", "UTF8_LCASE"))
+      }
+    }
+  }
+}
+
+class DefaultCollationTestSuiteV2 extends DefaultCollationTestSuite with DatasourceV2SQLBase {
+  override def testTable: String = s"testcat.${super.testTable}"
+  override def testView: String = s"testcat.${super.testView}"
+
+  // delete only works on v2
+  test("delete behavior") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING) USING $dataSource")
+      sql(s"INSERT INTO $testTable VALUES ('a'), ('A')")
+
+      sql(s"DELETE FROM $testTable WHERE 'a' = 'A'")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable"), Seq(Row(0)))
+    }
+  }
+
+  test("inline table in RTAS") {
+    withSessionCollationAndTable("UTF8_LCASE", testTable) {
+      sql(s"CREATE TABLE $testTable (c1 STRING, c2 BOOLEAN) USING $dataSource")
+      sql(s"""
+           |REPLACE TABLE $testTable
+           |USING $dataSource
+           |AS SELECT *
+           |FROM (VALUES ('a', 'a' = 'A'))
+           |AS inline_table(c1, c2);
+           |""".stripMargin)
+
+      assertTableColumnCollation(testTable, "c1", "UTF8_BINARY")
+      checkAnswer(sql(s"SELECT COUNT(*) FROM $testTable WHERE c2"), Seq(Row(0)))
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
index 28605958c71da..21aa57cc1eace 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/AlterTableTests.scala
@@ -166,7 +166,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         sql(s"ALTER TABLE $t ADD COLUMN c string AFTER non_exist"))
       checkError(
         exception = e1,
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`c`", "fields" -> "a, point, b")
       )
 
@@ -191,7 +191,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         sql(s"ALTER TABLE $t ADD COLUMN point.x2 int AFTER non_exist"))
       checkError(
         exception = e2,
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`x2`", "fields" -> "y, x, z")
       )
     }
@@ -231,7 +231,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         sql(s"ALTER TABLE $t ADD COLUMNS (yy int AFTER xx, xx int)"))
       checkError(
         exception = e,
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map("fieldName" -> "`yy`", "fields" -> "a, x, y, z, b, point")
       )
     }
@@ -372,7 +372,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql("alter table t add column s bigint default badvalue")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+          condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
           parameters = Map(
             "statement" -> "ALTER TABLE",
             "colName" -> "`s`",
@@ -383,7 +383,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql("alter table t alter column s set default badvalue")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+          condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
           parameters = Map(
             "statement" -> "ALTER TABLE ALTER COLUMN",
             "colName" -> "`s`",
@@ -432,11 +432,20 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ADD COLUMN point.z double")
-      }
-
-      assert(exc.getMessage.contains("Missing field point"))
+      val sqlText = s"ALTER TABLE $t ADD COLUMN point.z double"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(
+          fragment = "point.z double",
+          start = 24 + t.length,
+          stop = 37 + t.length))
     }
   }
 
@@ -466,7 +475,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t ADD COLUMNS $field double")
           },
-          errorClass = "FIELD_ALREADY_EXISTS",
+          condition = "FIELD_ALREADY_EXISTS",
           parameters = expectedParameters,
           context = ExpectedContext(
             fragment = s"ALTER TABLE $t ADD COLUMNS $field double",
@@ -485,7 +494,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t ADD COLUMNS (data string, data1 string, data string)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`data`"))
     }
   }
@@ -498,7 +507,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t ADD COLUMNS (point.z double, point.z double, point.xx double)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> toSQLId("point.z")))
     }
   }
@@ -529,7 +538,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
               exception = intercept[AnalysisException] {
                 sql(sqlText)
               },
-              errorClass = "CANNOT_UPDATE_FIELD.INTERVAL_TYPE",
+              condition = "CANNOT_UPDATE_FIELD.INTERVAL_TYPE",
               parameters = Map(
                 "table" -> s"${toSQLId(prependCatalogName(t))}",
                 "fieldName" -> "`id`"),
@@ -591,7 +600,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "CANNOT_UPDATE_FIELD.STRUCT_TYPE",
+        condition = "CANNOT_UPDATE_FIELD.STRUCT_TYPE",
         parameters = Map(
           "table" -> s"${toSQLId(prependCatalogName(t))}",
           "fieldName" -> "`point`"),
@@ -622,7 +631,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "CANNOT_UPDATE_FIELD.ARRAY_TYPE",
+        condition = "CANNOT_UPDATE_FIELD.ARRAY_TYPE",
         parameters = Map(
           "table" -> s"${toSQLId(prependCatalogName(t))}",
           "fieldName" -> "`points`"),
@@ -666,7 +675,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "CANNOT_UPDATE_FIELD.MAP_TYPE",
+        condition = "CANNOT_UPDATE_FIELD.MAP_TYPE",
         parameters = Map(
           "table" -> s"${toSQLId(prependCatalogName(t))}",
           "fieldName" -> "`m`"),
@@ -758,11 +767,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ALTER COLUMN data TYPE string")
-      }
-
-      assert(exc.getMessage.contains("Missing field data"))
+      val sqlText = s"ALTER TABLE $t ALTER COLUMN data TYPE string"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`data`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -771,11 +786,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ALTER COLUMN point.x TYPE double")
-      }
-
-      assert(exc.getMessage.contains("Missing field point.x"))
+      val sqlText = s"ALTER TABLE $t ALTER COLUMN point.x TYPE double"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`.`x`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -788,7 +809,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(sql1)
         },
-        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        condition = "NOT_SUPPORTED_CHANGE_COLUMN",
         sqlState = None,
         parameters = Map(
           "originType" -> "\"INT\"",
@@ -840,9 +861,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           .add("z", IntegerType))
         .add("b", IntegerType))
 
-      val e1 = intercept[AnalysisException](
-        sql(s"ALTER TABLE $t ALTER COLUMN b AFTER non_exist"))
-      assert(e1.getMessage.contains("Missing field non_exist"))
+      val sqlText1 = s"ALTER TABLE $t ALTER COLUMN b AFTER non_exist"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText1)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`non_exist`",
+          "proposal" -> "`a`, `point`, `b`"),
+        context = ExpectedContext(fragment = sqlText1, start = 0, stop = sqlText1.length - 1))
 
       sql(s"ALTER TABLE $t ALTER COLUMN point.y FIRST")
       assert(getTableMetadata(t).schema == new StructType()
@@ -862,9 +891,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           .add("y", IntegerType))
         .add("b", IntegerType))
 
-      val e2 = intercept[AnalysisException](
-        sql(s"ALTER TABLE $t ALTER COLUMN point.y AFTER non_exist"))
-      assert(e2.getMessage.contains("Missing field point.non_exist"))
+      val sqlText2 = s"ALTER TABLE $t ALTER COLUMN point.y AFTER non_exist"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText2)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`.`non_exist`",
+          "proposal" -> "`a`, `point`, `b`"),
+        context = ExpectedContext(fragment = sqlText2, start = 0, stop = sqlText2.length - 1))
 
       // `AlterTable.resolved` checks column existence.
       intercept[AnalysisException](
@@ -947,11 +984,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ALTER COLUMN data COMMENT 'doc'")
-      }
-
-      assert(exc.getMessage.contains("Missing field data"))
+      val sqlText = s"ALTER TABLE $t ALTER COLUMN data COMMENT 'doc'"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`data`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -960,11 +1003,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t ALTER COLUMN point.x COMMENT 'doc'")
-      }
-
-      assert(exc.getMessage.contains("Missing field point.x"))
+      val sqlText = s"ALTER TABLE $t ALTER COLUMN point.x COMMENT 'doc'"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`.`x`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -1056,11 +1105,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t RENAME COLUMN data TO some_string")
-      }
-
-      assert(exc.getMessage.contains("Missing field data"))
+      val sqlText = s"ALTER TABLE $t RENAME COLUMN data TO some_string"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`data`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -1069,11 +1124,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t RENAME COLUMN point.x TO z")
-      }
-
-      assert(exc.getMessage.contains("Missing field point.x"))
+      val sqlText = s"ALTER TABLE $t RENAME COLUMN point.x TO z"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`.`x`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
     }
   }
 
@@ -1116,7 +1177,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t RENAME COLUMN $field TO $newName")
           },
-          errorClass = "FIELD_ALREADY_EXISTS",
+          condition = "FIELD_ALREADY_EXISTS",
           parameters = Map(
             "op" -> "rename",
             "fieldNames" -> s"${toSQLId(expectedName)}",
@@ -1216,11 +1277,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP COLUMN data")
-      }
-
-      assert(exc.getMessage.contains("Missing field data"))
+      val sqlText = s"ALTER TABLE $t DROP COLUMN data"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`data`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
 
       // with if exists it should pass
       sql(s"ALTER TABLE $t DROP COLUMN IF EXISTS data")
@@ -1234,11 +1301,17 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
     withTable(t) {
       sql(s"CREATE TABLE $t (id int) USING $v2Format")
 
-      val exc = intercept[AnalysisException] {
-        sql(s"ALTER TABLE $t DROP COLUMN point.x")
-      }
-
-      assert(exc.getMessage.contains("Missing field point.x"))
+      val sqlText = s"ALTER TABLE $t DROP COLUMN point.x"
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText)
+        },
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
+        parameters = Map(
+          "objectName" -> "`point`.`x`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = sqlText.length - 1))
 
       // with if exists it should pass
       sql(s"ALTER TABLE $t DROP COLUMN IF EXISTS point.x")
@@ -1319,7 +1392,7 @@ trait AlterTableTests extends SharedSparkSession with QueryErrorsBase {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t REPLACE COLUMNS (data string, data1 string, data string)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`data`"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
index 7bbb6485c273f..fe078c5ae4413 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSessionCatalogSuite.scala
@@ -55,8 +55,7 @@ class DataSourceV2DataFrameSessionCatalogSuite
     "and a same-name temp view exist") {
     withTable("same_name") {
       withTempView("same_name") {
-        val format = spark.sessionState.conf.defaultDataSourceName
-        sql(s"CREATE TABLE same_name(id LONG) USING $format")
+        sql(s"CREATE TABLE same_name(id LONG) USING $v2Format")
         spark.range(10).createTempView("same_name")
         spark.range(20).write.format(v2Format).mode(SaveMode.Append).saveAsTable("same_name")
         checkAnswer(spark.table("same_name"), spark.range(10).toDF())
@@ -88,6 +87,15 @@ class DataSourceV2DataFrameSessionCatalogSuite
       assert(tableInfo.properties().get("provider") === v2Format)
     }
   }
+
+  test("SPARK-49246: saveAsTable with v1 format") {
+    withTable("t") {
+      sql("CREATE TABLE t(c INT) USING csv")
+      val df = spark.range(10).toDF()
+      df.write.mode(SaveMode.Overwrite).format("csv").saveAsTable("t")
+      verifyTable("t", df)
+    }
+  }
 }
 
 class InMemoryTableSessionCatalog extends TestV2SessionCatalogBase[InMemoryTable] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
index 7d48459a8a517..c1e8b70ffddbe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2DataFrameSuite.scala
@@ -184,17 +184,17 @@ class DataSourceV2DataFrameSuite
         val v2Writer = df.writeTo("testcat.table_name")
         checkError(
           exception = intercept[AnalysisException](v2Writer.append()),
-          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          condition = "_LEGACY_ERROR_TEMP_1183",
           parameters = Map.empty
         )
         checkError(
           exception = intercept[AnalysisException](v2Writer.overwrite(df("i"))),
-          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          condition = "_LEGACY_ERROR_TEMP_1183",
           parameters = Map.empty
         )
         checkError(
           exception = intercept[AnalysisException](v2Writer.overwritePartitions()),
-          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          condition = "_LEGACY_ERROR_TEMP_1183",
           parameters = Map.empty
         )
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
index 95bdb2543e376..6b0fd6084099c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2FunctionSuite.scala
@@ -145,7 +145,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
       exception = intercept[AnalysisException](
         sql("SELECT testcat.non_exist('abc')").collect()
       ),
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       parameters = Map(
         "routineName" -> "`testcat`.`non_exist`",
         "searchPath" -> "[`system`.`builtin`, `system`.`session`, `testcat`.`default`]"),
@@ -161,7 +161,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
         exception = intercept[AnalysisException](
           sql("SELECT testcat.strlen('abc')").collect()
         ),
-        errorClass = "_LEGACY_ERROR_TEMP_1184",
+        condition = "_LEGACY_ERROR_TEMP_1184",
         parameters = Map("plugin" -> "testcat", "ability" -> "functions")
       )
     }
@@ -174,7 +174,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
       exception = intercept[AnalysisException] {
         sql("DESCRIBE FUNCTION testcat.abc")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      condition = "_LEGACY_ERROR_TEMP_1184",
       parameters = Map(
         "plugin" -> "testcat",
         "ability" -> "functions"
@@ -185,7 +185,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
       exception = intercept[AnalysisException] {
         sql("DESCRIBE FUNCTION default.ns1.ns2.fun")
       },
-      errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+      condition = "REQUIRES_SINGLE_PART_NAMESPACE",
       parameters = Map(
         "sessionCatalog" -> "spark_catalog",
         "namespace" -> "`default`.`ns1`.`ns2`")
@@ -343,7 +343,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
 
     checkError(
       exception = intercept[AnalysisException](sql("SELECT testcat.ns.strlen(42)")),
-      errorClass = "_LEGACY_ERROR_TEMP_1198",
+      condition = "_LEGACY_ERROR_TEMP_1198",
       parameters = Map(
         "unbound" -> "strlen",
         "arguments" -> "int",
@@ -358,7 +358,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
 
     checkError(
       exception = intercept[AnalysisException](sql("SELECT testcat.ns.strlen('a', 'b')")),
-      errorClass = "_LEGACY_ERROR_TEMP_1198",
+      condition = "_LEGACY_ERROR_TEMP_1198",
       parameters = Map(
         "unbound" -> "strlen",
         "arguments" -> "string, string",
@@ -414,8 +414,8 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
       new JavaStrLen(new JavaStrLenNoImpl))
     checkError(
       exception = intercept[AnalysisException](sql("SELECT testcat.ns.strlen('abc')").collect()),
-      errorClass = "_LEGACY_ERROR_TEMP_3055",
-      parameters = Map("scalarFunc" -> "strlen"),
+      condition = "SCALAR_FUNCTION_NOT_FULLY_IMPLEMENTED",
+      parameters = Map("scalarFunc" -> "`strlen`"),
       context = ExpectedContext(
         fragment = "testcat.ns.strlen('abc')",
         start = 7,
@@ -429,7 +429,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     addFunction(Identifier.of(Array("ns"), "strlen"), StrLen(StrLenBadInputTypes))
     checkError(
       exception = intercept[AnalysisException](sql("SELECT testcat.ns.strlen('abc')").collect()),
-      errorClass = "_LEGACY_ERROR_TEMP_1199",
+      condition = "_LEGACY_ERROR_TEMP_1199",
       parameters = Map(
         "bound" -> "strlen_bad_input_types",
         "argsLen" -> "1",
@@ -448,8 +448,8 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     addFunction(Identifier.of(Array("ns"), "add"), new JavaLongAdd(new JavaLongAddMismatchMagic))
     checkError(
       exception = intercept[AnalysisException](sql("SELECT testcat.ns.add(1L, 2L)").collect()),
-      errorClass = "_LEGACY_ERROR_TEMP_3055",
-      parameters = Map("scalarFunc" -> "long_add_mismatch_magic"),
+      condition = "SCALAR_FUNCTION_NOT_FULLY_IMPLEMENTED",
+      parameters = Map("scalarFunc" -> "`long_add_mismatch_magic`"),
       context = ExpectedContext(
         fragment = "testcat.ns.add(1L, 2L)",
         start = 7,
@@ -458,6 +458,23 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     )
   }
 
+  test("SPARK-49549: scalar function w/ mismatch a compatible ScalarFunction#produceResult") {
+    case object CharLength extends ScalarFunction[Int] {
+      override def inputTypes(): Array[DataType] = Array(StringType)
+      override def resultType(): DataType = IntegerType
+      override def name(): String = "CHAR_LENGTH"
+    }
+
+    catalog("testcat").asInstanceOf[SupportsNamespaces].createNamespace(Array("ns"), emptyProps)
+    addFunction(Identifier.of(Array("ns"), "my_strlen"), StrLen(CharLength))
+    checkError(
+      exception = intercept[SparkUnsupportedOperationException]
+        (sql("SELECT testcat.ns.my_strlen('abc')").collect()),
+      condition = "SCALAR_FUNCTION_NOT_COMPATIBLE",
+      parameters = Map("scalarFunc" -> "`CHAR_LENGTH`")
+    )
+  }
+
   test("SPARK-35390: scalar function w/ type coercion") {
     catalog("testcat").asInstanceOf[SupportsNamespaces].createNamespace(Array("ns"), emptyProps)
     addFunction(Identifier.of(Array("ns"), "add"), new JavaLongAdd(new JavaLongAddDefault(false)))
@@ -481,7 +498,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
         exception = intercept[AnalysisException] {
           sql(sqlText).collect()
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         sqlState = None,
         parameters = Map(
           "sqlExpr" -> ".*",
@@ -539,7 +556,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
     checkError(
       exception = intercept[AnalysisException](
         sql("SELECT testcat.ns.strlen('abc')")),
-      errorClass = "INVALID_UDF_IMPLEMENTATION",
+      condition = "INVALID_UDF_IMPLEMENTATION",
       parameters = Map(
         "funcName" -> "`bad_bound_func`"),
       context = ExpectedContext(
@@ -602,7 +619,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
       Seq(1.toShort, 2.toShort).toDF("i").write.saveAsTable(t)
       checkError(
         exception = intercept[AnalysisException](sql(s"SELECT testcat.ns.avg(i) from $t")),
-        errorClass = "_LEGACY_ERROR_TEMP_1198",
+        condition = "_LEGACY_ERROR_TEMP_1198",
         parameters = Map(
           "unbound" -> "iavg",
           "arguments" -> "smallint",
@@ -637,7 +654,7 @@ class DataSourceV2FunctionSuite extends DatasourceV2SQLBase {
           sql("SELECT testcat.ns.avg(*) from values " +
             "(date '2021-06-01' - date '2011-06-01'), (date '2000-01-01' - date '1900-01-01')")
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         parameters = Map(
           "sqlExpr" -> "\"v2aggregator(col1)\"",
           "paramIndex" -> "first",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala
new file mode 100644
index 0000000000000..fe28b85528632
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2MetricsSuite.scala
@@ -0,0 +1,159 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util
+
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, Identifier, InMemoryTable, InMemoryTableCatalog, StagedTable, StagingInMemoryTableCatalog}
+import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
+import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomSumMetric, CustomTaskMetric}
+import org.apache.spark.sql.execution.SparkPlan
+import org.apache.spark.sql.execution.datasources.v2.{AtomicCreateTableAsSelectExec, AtomicReplaceTableAsSelectExec, AtomicReplaceTableExec, CreateTableAsSelectExec, ReplaceTableAsSelectExec, ReplaceTableExec}
+
+class StagingInMemoryTableCatalogWithMetrics extends StagingInMemoryTableCatalog {
+
+  case class TestSupportedCommitMetric(name: String, description: String) extends CustomSumMetric
+
+  override def supportedCustomMetrics(): Array[CustomMetric] = Array(
+    TestSupportedCommitMetric("numFiles", "number of written files"),
+    TestSupportedCommitMetric("numOutputRows", "number of output rows"),
+    TestSupportedCommitMetric("numOutputBytes", "written output"))
+
+  private class TestStagedTableWithMetric(
+      ident: Identifier,
+      delegateTable: InMemoryTable
+  ) extends TestStagedTable(ident, delegateTable) with StagedTable {
+
+    private var stagedChangesCommitted = false
+
+    override def commitStagedChanges(): Unit = {
+      tables.put(ident, delegateTable)
+      stagedChangesCommitted = true
+    }
+
+    override def reportDriverMetrics: Array[CustomTaskMetric] = {
+      assert(stagedChangesCommitted)
+      StagingInMemoryTableCatalogWithMetrics.testMetrics
+    }
+  }
+
+  override def stageCreate(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    new TestStagedTableWithMetric(
+      ident,
+      new InMemoryTable(s"$name.${ident.quoted}",
+        CatalogV2Util.v2ColumnsToStructType(columns), partitions, properties))
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    stageCreate(ident, columns, partitions, properties)
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      columns: Array[Column],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable =
+    stageCreate(ident, columns, partitions, properties)
+}
+
+object StagingInMemoryTableCatalogWithMetrics {
+
+  case class TestCustomTaskMetric(name: String, value: Long) extends CustomTaskMetric
+
+  val testMetrics: Array[CustomTaskMetric] = Array(
+    TestCustomTaskMetric("numFiles", 1337),
+    TestCustomTaskMetric("numOutputRows", 1338),
+    TestCustomTaskMetric("numOutputBytes", 1339))
+}
+
+class DataSourceV2MetricsSuite extends DatasourceV2SQLBase {
+
+  private val testCatalog = "test_catalog"
+  private val atomicTestCatalog = "atomic_test_catalog"
+  private val nonExistingTable = "non_existing_table"
+  private val existingTable = "existing_table"
+
+  private def captureStagedTableWrite(thunk: => Unit): SparkPlan = {
+    val physicalPlans = withQueryExecutionsCaptured(spark)(thunk).map(_.executedPlan)
+    val stagedTableWrites = physicalPlans.filter {
+      case _: AtomicCreateTableAsSelectExec | _: CreateTableAsSelectExec |
+           _: AtomicReplaceTableAsSelectExec | _: ReplaceTableAsSelectExec |
+           _: AtomicReplaceTableExec | _: ReplaceTableExec => true
+      case _ => false
+    }
+    assert(stagedTableWrites.size === 1)
+    stagedTableWrites.head
+  }
+
+  private def commands: Seq[String => Unit] = Seq(
+    { catalogName =>
+      sql(s"CREATE TABLE $catalogName.$nonExistingTable AS SELECT * FROM $existingTable") },
+    { catalogName =>
+      spark.table(existingTable).write.saveAsTable(s"$catalogName.$nonExistingTable") },
+    { catalogName =>
+      sql(s"CREATE OR REPLACE TABLE $catalogName.$nonExistingTable " +
+          s"AS SELECT * FROM $existingTable") },
+    { catalogName =>
+      sql(s"REPLACE TABLE $catalogName.$existingTable AS SELECT * FROM $existingTable") },
+    { catalogName =>
+        spark.table(existingTable)
+          .write.mode("overwrite").saveAsTable(s"$catalogName.$existingTable") },
+    { catalogName =>
+      sql(s"REPLACE TABLE $catalogName.$existingTable (id bigint, data string)") })
+
+  private def catalogCommitMetricsTest(
+      testName: String, catalogName: String)(testFunction: SparkPlan => Unit): Unit = {
+    commands.foreach { command =>
+      test(s"$testName - $command") {
+        registerCatalog(testCatalog, classOf[InMemoryTableCatalog])
+        registerCatalog(atomicTestCatalog, classOf[StagingInMemoryTableCatalogWithMetrics])
+        withTable(existingTable, s"$catalogName.$existingTable") {
+          sql(s"CREATE TABLE $existingTable (id bigint, data string)")
+          sql(s"CREATE TABLE $catalogName.$existingTable (id bigint, data string)")
+
+          testFunction(captureStagedTableWrite(command(catalogName)))
+        }
+      }
+    }
+  }
+
+  catalogCommitMetricsTest(
+      "No metrics in the plan if the catalog does not support them", testCatalog) { sparkPlan =>
+    val metrics = sparkPlan.metrics
+
+    assert(metrics.isEmpty)
+  }
+
+  catalogCommitMetricsTest(
+      "Plan metrics values are the values from the catalog", atomicTestCatalog) { sparkPlan =>
+    val metrics = sparkPlan.metrics
+
+    assert(metrics.size === StagingInMemoryTableCatalogWithMetrics.testMetrics.length)
+    StagingInMemoryTableCatalogWithMetrics.testMetrics.foreach(customTaskMetric =>
+      assert(metrics(customTaskMetric.name()).value === customTaskMetric.value()))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
new file mode 100644
index 0000000000000..70291336ba317
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2OptionSuite.scala
@@ -0,0 +1,327 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.{AnalysisException, Row}
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
+import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.connector.catalog.InMemoryBaseTable
+import org.apache.spark.sql.execution.CommandResultExec
+import org.apache.spark.sql.execution.datasources.v2._
+import org.apache.spark.sql.functions.lit
+
+class DataSourceV2OptionSuite extends DatasourceV2SQLBase {
+  import testImplicits._
+
+  private val catalogAndNamespace = "testcat.ns1.ns2."
+
+  test("SPARK-36680: Supports Dynamic Table Options for SQL Select") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      var df = sql(s"SELECT * FROM $t1")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.isEmpty)
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      df = sql(s"SELECT * FROM $t1 WITH (`split-size` = 5)")
+      collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.get("split-size") == "5")
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      collected = df.queryExecution.executedPlan.collect {
+        case BatchScanExec(_, scan: InMemoryBaseTable#InMemoryBatchScan, _, _, _, _) =>
+          assert(scan.options.get("split-size") === "5")
+      }
+      assert (collected.size == 1)
+
+      val noValues = intercept[AnalysisException](
+        sql(s"SELECT * FROM $t1 WITH (`split-size`)"))
+      assert(noValues.message.contains(
+        "Operation not allowed: Values must be specified for key(s): [split-size]"))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameReader") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      var df = spark.table(t1)
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.isEmpty)
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      df = spark.read.option("split-size", "5").table(t1)
+      collected = df.queryExecution.optimizedPlan.collect {
+        case scan: DataSourceV2ScanRelation =>
+          assert(scan.relation.options.get("split-size") == "5")
+      }
+      assert (collected.size == 1)
+      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+
+      collected = df.queryExecution.executedPlan.collect {
+        case BatchScanExec(_, scan: InMemoryBaseTable#InMemoryBatchScan, _, _, _, _) =>
+          assert(scan.options.get("split-size") === "5")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) VALUES (1, 'a'), (2, 'b')")
+
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_, AppendData(relation: DataSourceV2Relation, _, _, _, _, _), _, _) =>
+          assert(relation.options.get("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, AppendDataExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(1, "a"), Row(2, "b")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriter Append") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .write
+          .option("write.split-size", "10")
+          .mode("append")
+          .insertInto(t1)
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case AppendData(_: DataSourceV2Relation, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case AppendDataExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 Append") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .append()
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case AppendData(_: DataSourceV2Relation, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case AppendDataExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#Append]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val df = sql(s"INSERT OVERWRITE $t1 WITH (`write.split-size` = 10) " +
+        s"VALUES (3, 'c'), (4, 'd')")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_,
+          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
+          _, _) =>
+          assert(relation.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, OverwriteByExpressionExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 OverwritePartitions") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(3 -> "c", 4 -> "d").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .overwritePartitions()
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case OverwritePartitionsDynamic(_: DataSourceV2Relation, _, writeOptions, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwritePartitionsDynamicExec(_, _, write) =>
+          val dynOverwrite = write.toBatch.asInstanceOf[InMemoryBaseTable#DynamicOverwrite]
+          assert(dynOverwrite.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-49098, SPARK-50286: Supports Dynamic Table Options for SQL Insert Replace") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val df = sql(s"INSERT INTO $t1 WITH (`write.split-size` = 10) " +
+        s"REPLACE WHERE TRUE " +
+        s"VALUES (3, 'c'), (4, 'd')")
+      var collected = df.queryExecution.optimizedPlan.collect {
+        case CommandResult(_,
+          OverwriteByExpression(relation: DataSourceV2Relation, _, _, _, _, _, _),
+          _, _) =>
+          assert(relation.options.get("write.split-size") == "10")
+      }
+      assert (collected.size == 1)
+
+      collected = df.queryExecution.executedPlan.collect {
+        case CommandResultExec(
+          _, OverwriteByExpressionExec(_, _, write),
+          _) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      val insertResult = sql(s"SELECT * FROM $t1")
+      checkAnswer(insertResult, Seq(Row(3, "c"), Row(4, "d")))
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriter Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(1 -> "a", 2 -> "b").toDF("id", "data")
+          .write
+          .option("write.split-size", "10")
+          .mode("overwrite")
+          .insertInto(t1)
+      }
+      assert(captured.size === 1)
+
+      val qe = captured.head
+      var collected = qe.optimizedPlan.collect {
+        case OverwriteByExpression(_: DataSourceV2Relation, _, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwriteByExpressionExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+
+  test("SPARK-50286: Propagate options for DataFrameWriterV2 Overwrite") {
+    val t1 = s"${catalogAndNamespace}table"
+    withTable(t1) {
+      sql(s"CREATE TABLE $t1 (id bigint, data string)")
+      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+
+      val captured = withQueryExecutionsCaptured(spark) {
+        Seq(3 -> "c", 4 -> "d").toDF("id", "data")
+          .writeTo(t1)
+          .option("write.split-size", "10")
+          .overwrite(lit(true))
+      }
+      assert(captured.size === 1)
+      val qe = captured.head
+
+      var collected = qe.optimizedPlan.collect {
+        case OverwriteByExpression(_: DataSourceV2Relation, _, _, writeOptions, _, _, _) =>
+          assert(writeOptions("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+
+      collected = qe.executedPlan.collect {
+        case OverwriteByExpressionExec(_, _, write) =>
+          val append = write.toBatch.asInstanceOf[InMemoryBaseTable#TruncateAndAppend]
+          assert(append.info.options.get("write.split-size") === "10")
+      }
+      assert (collected.size == 1)
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
index 95624f3f61c5c..7463eb34d17ff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSessionCatalogSuite.scala
@@ -71,4 +71,12 @@ class DataSourceV2SQLSessionCatalogSuite
       sql(s"CREATE EXTERNAL TABLE t (i INT) USING $v2Format TBLPROPERTIES($prop)")
     }
   }
+
+  test("SPARK-49152: partition columns should be put at the end") {
+    withTable("t") {
+      sql("CREATE TABLE t (c1 INT, c2 INT) USING json PARTITIONED BY (c1)")
+      // partition columns should be put at the end.
+      assert(getTableMetadata("default.t").columns().map(_.name()) === Seq("c2", "c1"))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
index 51d7f270e1a53..6a659fa6e3ee9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2SQLSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import java.sql.Timestamp
 import java.time.{Duration, LocalDate, Period}
+import java.util
 import java.util.Locale
 
 import scala.concurrent.duration.MICROSECONDS
@@ -26,9 +27,10 @@ import scala.jdk.CollectionConverters._
 
 import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.{InternalRow, QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.CurrentUserContext.CURRENT_USER
-import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.analysis.{CannotReplaceMissingTableException, NoSuchNamespaceException, TableAlreadyExistsException}
+import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType, CatalogUtils}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.ColumnStat
 import org.apache.spark.sql.catalyst.statsEstimation.StatsEstimationTestBase
@@ -36,11 +38,12 @@ import org.apache.spark.sql.catalyst.util.DateTimeUtils
 import org.apache.spark.sql.connector.catalog.{Column => ColumnV2, _}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.CatalogV2Util.withDefaultOwnership
-import org.apache.spark.sql.connector.expressions.LiteralValue
+import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.FilterExec
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.columnar.InMemoryRelation
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
@@ -128,7 +131,7 @@ class DataSourceV2SQLSuiteV1Filter
 
       checkError(
         exception = analysisException(s"DESCRIBE $t invalid_col"),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`invalid_col`",
           "proposal" -> "`id`, `data`"),
@@ -161,7 +164,7 @@ class DataSourceV2SQLSuiteV1Filter
       sql(s"CREATE TABLE $t (d struct<a: INT, b: INT>) USING foo")
       checkError(
         exception = analysisException(s"describe $t d.a"),
-        errorClass = "_LEGACY_ERROR_TEMP_1060",
+        condition = "_LEGACY_ERROR_TEMP_1060",
         parameters = Map(
           "command" -> "DESC TABLE COLUMN",
           "column" -> "d.a"))
@@ -215,7 +218,7 @@ class DataSourceV2SQLSuiteV1Filter
         spark.sql("CREATE TABLE testcat.table_name " +
           "(id bigint, data string, id2 bigint) USING bar")
       },
-      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
       parameters = Map("relationName" -> "`table_name`"))
 
     // table should not have changed
@@ -298,14 +301,14 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"$action TABLE table_name (id int, value interval) USING $v2Format")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1183",
+        condition = "_LEGACY_ERROR_TEMP_1183",
         parameters = Map.empty)
 
       checkError(
         exception = intercept[AnalysisException] {
           sql(s"$action TABLE table_name (id array<interval>) USING $v2Format")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1183",
+        condition = "_LEGACY_ERROR_TEMP_1183",
         parameters = Map.empty)
     }
   }
@@ -317,14 +320,14 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[AnalysisException] {
             sql(s"$action TABLE table_name USING $v2Format as select interval 1 day")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          condition = "_LEGACY_ERROR_TEMP_1183",
           parameters = Map.empty)
 
         checkError(
           exception = intercept[AnalysisException] {
             sql(s"$action TABLE table_name USING $v2Format as select array(interval 1 day)")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1183",
+          condition = "_LEGACY_ERROR_TEMP_1183",
           parameters = Map.empty)
       }
     }
@@ -430,6 +433,25 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-49152: CreateTable should store location as qualified") {
+    val tbl = "testcat.table_name"
+
+    def testWithLocation(location: String, qualified: String): Unit = {
+      withTable(tbl) {
+        sql(s"CREATE TABLE $tbl USING foo LOCATION '$location'")
+        val loc = catalog("testcat").asTableCatalog
+          .loadTable(Identifier.of(Array.empty, "table_name"))
+          .properties().get(TableCatalog.PROP_LOCATION)
+        assert(loc === qualified)
+      }
+    }
+
+    testWithLocation("/absolute/path", "file:/absolute/path")
+    testWithLocation("s3://host/full/path", "s3://host/full/path")
+    testWithLocation("relative/path", "relative/path")
+    testWithLocation("/path/special+ char", "file:/path/special+ char")
+  }
+
   test("SPARK-37545: CreateTableAsSelect should store location as qualified") {
     val basicIdentifier = "testcat.table_name"
     val atomicIdentifier = "testcat_atomic.table_name"
@@ -639,7 +661,7 @@ class DataSourceV2SQLSuiteV1Filter
           spark.sql(s"REPLACE TABLE $catalog.replaced USING $v2Source " +
             s"AS SELECT id, data FROM source")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`replaced`"))
     }
   }
@@ -654,7 +676,7 @@ class DataSourceV2SQLSuiteV1Filter
           s" TBLPROPERTIES (`$SIMULATE_DROP_BEFORE_REPLACE_PROPERTY`=true)" +
           s" AS SELECT id, data FROM source")
       },
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      condition = "TABLE_OR_VIEW_NOT_FOUND",
       parameters = Map("relationName" -> "`replaced`"))
   }
 
@@ -697,7 +719,7 @@ class DataSourceV2SQLSuiteV1Filter
         spark.sql("CREATE TABLE testcat.table_name USING bar AS " +
           "SELECT id, data, id as id2 FROM source2")
       },
-      errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+      condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
       parameters = Map("relationName" -> "`table_name`"))
 
     // table should not have changed
@@ -817,7 +839,7 @@ class DataSourceV2SQLSuiteV1Filter
           val exception = intercept[SparkRuntimeException] {
             insertNullValueAndCheck()
           }
-          assert(exception.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+          assert(exception.getCondition == "NOT_NULL_ASSERT_VIOLATION")
         }
     }
   }
@@ -1049,7 +1071,7 @@ class DataSourceV2SQLSuiteV1Filter
 
       checkError(
         exception = analysisException(s"SELECT ns1.ns2.ns3.tbl.id from $t"),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`ns1`.`ns2`.`ns3`.`tbl`.`id`",
           "proposal" -> "`testcat`.`ns1`.`ns2`.`tbl`.`id`, `testcat`.`ns1`.`ns2`.`tbl`.`point`"),
@@ -1112,7 +1134,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO $t1 VALUES(4)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`tbl`",
           "tableColumns" -> "`id`, `data`",
@@ -1124,7 +1146,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO $t1(data, data) VALUES(5)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`data`"))
     }
   }
@@ -1147,7 +1169,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT OVERWRITE $t1 VALUES(4)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`tbl`",
           "tableColumns" -> "`id`, `data`",
@@ -1159,7 +1181,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`data`"))
     }
   }
@@ -1183,7 +1205,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT OVERWRITE $t1 VALUES('a', 4)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`tbl`",
           "tableColumns" -> "`id`, `data`, `data2`",
@@ -1195,7 +1217,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"INSERT OVERWRITE $t1(data, data) VALUES(5)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`data`"))
     }
   }
@@ -1207,7 +1229,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("INSERT OVERWRITE t PARTITION (c='1') (c) VALUES ('2')")
         },
-        errorClass = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
+        condition = "STATIC_PARTITION_COLUMN_IN_INSERT_COLUMN_LIST",
         parameters = Map("staticName" -> "c"))
     }
   }
@@ -1217,7 +1239,7 @@ class DataSourceV2SQLSuiteV1Filter
       exception = intercept[AnalysisException] {
         sql("SHOW VIEWS FROM a.b")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1126",
+      condition = "_LEGACY_ERROR_TEMP_1126",
       parameters = Map("catalog" -> "a.b"))
   }
 
@@ -1226,7 +1248,7 @@ class DataSourceV2SQLSuiteV1Filter
       exception = intercept[AnalysisException] {
         sql("SHOW VIEWS FROM testcat")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      condition = "_LEGACY_ERROR_TEMP_1184",
       parameters = Map("plugin" -> "testcat", "ability" -> "views"))
   }
 
@@ -1248,7 +1270,7 @@ class DataSourceV2SQLSuiteV1Filter
               exception = intercept[ParseException] {
                 sql(sqlText)
               },
-              errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+              condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
               parameters = Map(
                 "property" -> key,
                 "msg" -> keyParameters.getOrElse(
@@ -1265,7 +1287,7 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[ParseException] {
             sql(sql1)
           },
-          errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+          condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
           parameters = Map(
             "property" -> key,
             "msg" -> keyParameters.getOrElse(
@@ -1280,7 +1302,7 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[ParseException] {
             sql(sql2)
           },
-          errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+          condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
           parameters = Map(
             "property" -> key,
             "msg" -> keyParameters.getOrElse(
@@ -1325,7 +1347,7 @@ class DataSourceV2SQLSuiteV1Filter
               exception = intercept[ParseException] {
                 sql(sql1)
               },
-              errorClass = "_LEGACY_ERROR_TEMP_0032",
+              condition = "_LEGACY_ERROR_TEMP_0032",
               parameters = Map("pathOne" -> "foo", "pathTwo" -> "bar"),
               context = ExpectedContext(
                 fragment = sql1,
@@ -1338,7 +1360,7 @@ class DataSourceV2SQLSuiteV1Filter
               exception = intercept[ParseException] {
                 sql(sql2)
               },
-              errorClass = "_LEGACY_ERROR_TEMP_0032",
+              condition = "_LEGACY_ERROR_TEMP_0032",
               parameters = Map("pathOne" -> "foo", "pathTwo" -> "bar"),
               context = ExpectedContext(
                 fragment = sql2,
@@ -1351,8 +1373,7 @@ class DataSourceV2SQLSuiteV1Filter
             val identifier = Identifier.of(Array(), "reservedTest")
             val location = tableCatalog.loadTable(identifier).properties()
               .get(TableCatalog.PROP_LOCATION)
-            assert(location.startsWith("file:") && location.endsWith("foo"),
-              "path as a table property should not have side effects")
+            assert(location == "foo", "path as a table property should not have side effects")
             assert(tableCatalog.loadTable(identifier).properties().get("path") == "bar",
               "path as a table property should not have side effects")
             assert(tableCatalog.loadTable(identifier).properties().get("Path") == "noop",
@@ -1431,7 +1452,7 @@ class DataSourceV2SQLSuiteV1Filter
       sql("USE ns1")
     }
     checkError(exception,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`spark_catalog`.`ns1`"))
   }
 
@@ -1442,8 +1463,8 @@ class DataSourceV2SQLSuiteV1Filter
       sql("USE testcat.ns1.ns2")
     }
     checkError(exception,
-      errorClass = "SCHEMA_NOT_FOUND",
-      parameters = Map("schemaName" -> "`ns1`.`ns2`"))
+      condition = "SCHEMA_NOT_FOUND",
+      parameters = Map("schemaName" -> "`testcat`.`ns1`.`ns2`"))
   }
 
   test("SPARK-31100: Use: v2 catalog that does not implement SupportsNameSpaces is used " +
@@ -1481,7 +1502,7 @@ class DataSourceV2SQLSuiteV1Filter
             sql("USE dummy")
             sql(s"$statement dummy.$tableDefinition")
           },
-          errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+          condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
           parameters = Map(
             "tableName" -> "`dummy`.`my_tab`",
             "operation" -> "column default value"
@@ -1513,7 +1534,7 @@ class DataSourceV2SQLSuiteV1Filter
             sql("USE dummy")
             sql(s"$statement dummy.$tableDefinition USING foo")
           },
-          errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+          condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
           parameters = Map(
             "tableName" -> "`dummy`.`my_tab`",
             "operation" -> "generated columns"
@@ -1537,7 +1558,7 @@ class DataSourceV2SQLSuiteV1Filter
             exception = intercept[AnalysisException] {
               sql(s"$statement testcat.$tableDefinition USING foo")
             },
-            errorClass = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
+            condition = "GENERATED_COLUMN_WITH_DEFAULT_VALUE",
             parameters = Map(
               "colName" -> "eventYear",
               "defaultValue" -> "0",
@@ -1562,7 +1583,7 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[AnalysisException] {
             sql(customTableDef.getOrElse(tableDef))
           },
-          errorClass = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
+          condition = "UNSUPPORTED_EXPRESSION_GENERATED_COLUMN",
           parameters = Map(
             "fieldName" -> "b",
             "expressionStr" -> expr,
@@ -1605,7 +1626,7 @@ class DataSourceV2SQLSuiteV1Filter
             sql(s"CREATE TABLE testcat.$tblName(a INT, " +
               "b INT GENERATED ALWAYS AS (B + 1)) USING foo")
           },
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           parameters = Map("objectName" -> "`B`", "proposal" -> "`a`"),
           context = ExpectedContext(fragment = "B", start = 0, stop = 0)
         )
@@ -1663,7 +1684,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"CREATE TABLE testcat.$tblName(a INT, b INT GENERATED ALWAYS AS (c + 1)) USING foo")
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`c`", "proposal" -> "`a`"),
         context = ExpectedContext(fragment = "c", start = 0, stop = 0)
       )
@@ -1731,6 +1752,64 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
+  test("SPARK-48824: Column cannot have both an identity column spec and a default value") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(id BIGINT GENERATED ALWAYS AS IDENTITY DEFAULT 0, name STRING)"
+    withSQLConf(SQLConf.DEFAULT_COLUMN_ALLOWED_PROVIDERS.key -> "foo") {
+      for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+        withTable(s"testcat.$tblName") {
+          if (statement == "REPLACE TABLE") {
+            sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+          }
+          checkError(
+            exception = intercept[AnalysisException] {
+              sql(s"$statement testcat.$tableDefinition USING foo")
+            },
+            condition = "IDENTITY_COLUMN_WITH_DEFAULT_VALUE",
+            parameters = Map(
+              "colName" -> "id",
+              "defaultValue" -> "0",
+              "identityColumnSpec" ->
+                "IdentityColumnSpec{start=1, step=1, allowExplicitInsert=false}")
+          )
+        }
+      }
+    }
+  }
+
+  test("SPARK-48824: Identity columns only allowed with TableCatalogs that " +
+    "SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS") {
+    val tblName = "my_tab"
+    val tableDefinition =
+      s"$tblName(id BIGINT GENERATED ALWAYS AS IDENTITY(), val INT)"
+    for (statement <- Seq("CREATE TABLE", "REPLACE TABLE")) {
+      // InMemoryTableCatalog.capabilities() = {SUPPORTS_CREATE_TABLE_WITH_IDENTITY_COLUMNS}
+      withTable(s"testcat.$tblName") {
+        if (statement == "REPLACE TABLE") {
+          sql(s"CREATE TABLE testcat.$tblName(a INT) USING foo")
+        }
+        // Can create table with an identity column
+        sql(s"$statement testcat.$tableDefinition USING foo")
+        assert(catalog("testcat").asTableCatalog.tableExists(Identifier.of(Array(), tblName)))
+      }
+      // BasicInMemoryTableCatalog.capabilities() = {}
+      withSQLConf("spark.sql.catalog.dummy" -> classOf[BasicInMemoryTableCatalog].getName) {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql("USE dummy")
+            sql(s"$statement dummy.$tableDefinition USING foo")
+          },
+          condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+          parameters = Map(
+            "tableName" -> "`dummy`.`my_tab`",
+            "operation" -> "identity column"
+          )
+        )
+      }
+    }
+  }
+
   test("SPARK-46972: asymmetrical replacement for char/varchar in V2SessionCatalog.createTable") {
     // unset this config to use the default v2 session catalog.
     spark.conf.unset(V2_SESSION_CATALOG_IMPLEMENTATION.key)
@@ -1808,7 +1887,7 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[AnalysisException] {
             sql(statement)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3060",
+          condition = "_LEGACY_ERROR_TEMP_3060",
           parameters = Map(
             "i" -> i,
             "schema" ->
@@ -1835,22 +1914,22 @@ class DataSourceV2SQLSuiteV1Filter
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         checkError(
           exception = analysisException(s"CREATE TABLE t ($c0 INT, $c1 INT) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE t ($c0 INT, $c1 INT) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE testcat.t ($c0 INT, $c1 INT) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -1862,23 +1941,23 @@ class DataSourceV2SQLSuiteV1Filter
         checkError(
           exception = analysisException(
             s"CREATE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}"))
         )
         checkError(
           exception = analysisException(
             s"CREATE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE testcat.t (d struct<$c0: INT, $c1: INT>) USING $v2Source"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> toSQLId(s"d.${c0.toLowerCase(Locale.ROOT)}")))
       }
     }
@@ -1888,7 +1967,7 @@ class DataSourceV2SQLSuiteV1Filter
     checkError(
       exception = analysisException(
         s"CREATE TABLE tbl (a int, b string) USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
-      errorClass = "_LEGACY_ERROR_TEMP_3060",
+      condition = "_LEGACY_ERROR_TEMP_3060",
       parameters = Map(
         "i" -> "c",
         "schema" ->
@@ -1899,7 +1978,7 @@ class DataSourceV2SQLSuiteV1Filter
     checkError(
       exception = analysisException(s"CREATE TABLE testcat.tbl (a int, b string) " +
         s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
-      errorClass = "_LEGACY_ERROR_TEMP_3060",
+      condition = "_LEGACY_ERROR_TEMP_3060",
       parameters = Map(
         "i" -> "c",
         "schema" ->
@@ -1910,7 +1989,7 @@ class DataSourceV2SQLSuiteV1Filter
     checkError(
       exception = analysisException(s"CREATE OR REPLACE TABLE tbl (a int, b string) " +
         s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
-      errorClass = "_LEGACY_ERROR_TEMP_3060",
+      condition = "_LEGACY_ERROR_TEMP_3060",
       parameters = Map(
         "i" -> "c",
         "schema" ->
@@ -1921,7 +2000,7 @@ class DataSourceV2SQLSuiteV1Filter
     checkError(
       exception = analysisException(s"CREATE OR REPLACE TABLE testcat.tbl (a int, b string) " +
         s"USING $v2Source CLUSTERED BY (c) INTO 4 BUCKETS"),
-      errorClass = "_LEGACY_ERROR_TEMP_3060",
+      condition = "_LEGACY_ERROR_TEMP_3060",
       parameters = Map(
         "i" -> "c",
         "schema" ->
@@ -1956,22 +2035,22 @@ class DataSourceV2SQLSuiteV1Filter
         checkError(
           exception = analysisException(
             s"CREATE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
-          errorClass = "_LEGACY_ERROR_TEMP_3058",
+          condition = "_LEGACY_ERROR_TEMP_3058",
           parameters = Map("checkType" -> "in the partitioning", "duplicateColumns" -> dupCol))
         checkError(
           exception = analysisException(
             s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
-          errorClass = "_LEGACY_ERROR_TEMP_3058",
+          condition = "_LEGACY_ERROR_TEMP_3058",
           parameters = Map("checkType" -> "in the partitioning", "duplicateColumns" -> dupCol))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source PARTITIONED BY ($c0, $c1)"),
-          errorClass = "_LEGACY_ERROR_TEMP_3058",
+          condition = "_LEGACY_ERROR_TEMP_3058",
           parameters = Map("checkType" -> "in the partitioning", "duplicateColumns" -> dupCol))
         checkError(
           exception = analysisException(s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) " +
             s"USING $v2Source PARTITIONED BY ($c0, $c1)"),
-          errorClass = "_LEGACY_ERROR_TEMP_3058",
+          condition = "_LEGACY_ERROR_TEMP_3058",
           parameters = Map("checkType" -> "in the partitioning", "duplicateColumns" -> dupCol))
       }
     }
@@ -1985,26 +2064,26 @@ class DataSourceV2SQLSuiteV1Filter
           exception = analysisException(
             s"CREATE TABLE t ($c0 INT) USING $v2Source " +
               s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map(
             "columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE TABLE testcat.t ($c0 INT) USING $v2Source " +
               s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE t ($c0 INT) USING $v2Source " +
               s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = analysisException(
             s"CREATE OR REPLACE TABLE testcat.t ($c0 INT) USING $v2Source " +
               s"CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS"),
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c0.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -2108,10 +2187,13 @@ class DataSourceV2SQLSuiteV1Filter
     }
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
       sqlState = "0A000",
-      parameters = Map("tableName" -> "`spark_catalog`.`default`.`tbl`",
-        "operation" -> "REPLACE TABLE"))
+      parameters = Map(
+        "tableName" -> "`spark_catalog`.`default`.`tbl`",
+        "operation" -> "REPLACE TABLE"
+      )
+    )
   }
 
   test("DeleteFrom: - delete with invalid predicate") {
@@ -2123,7 +2205,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"DELETE FROM $t WHERE id = 2 AND id = id")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1110",
+        condition = "_LEGACY_ERROR_TEMP_1110",
         parameters = Map(
           "table" -> "testcat.ns1.ns2.tbl",
           "filters" -> "[id = 2, id = id]"))
@@ -2144,7 +2226,7 @@ class DataSourceV2SQLSuiteV1Filter
       // UPDATE non-existing table
       checkError(
         exception = analysisException("UPDATE dummy SET name='abc'"),
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`dummy`"),
         context = ExpectedContext(
           fragment = "dummy",
@@ -2154,7 +2236,7 @@ class DataSourceV2SQLSuiteV1Filter
       // UPDATE non-existing column
       checkError(
         exception = analysisException(s"UPDATE $t SET dummy='abc'"),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`dummy`",
           "proposal" -> "`age`, `id`, `name`, `p`"
@@ -2165,7 +2247,7 @@ class DataSourceV2SQLSuiteV1Filter
           stop = 41))
       checkError(
         exception = analysisException(s"UPDATE $t SET name='abc' WHERE dummy=1"),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`dummy`",
           "proposal" -> "`age`, `id`, `name`, `p`"
@@ -2180,7 +2262,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[SparkUnsupportedOperationException] {
           sql(s"UPDATE $t SET name='Robert', age=32 WHERE p=1")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2096",
+        condition = "_LEGACY_ERROR_TEMP_2096",
         parameters = Map("ddl" -> "UPDATE TABLE")
       )
     }
@@ -2215,7 +2297,7 @@ class DataSourceV2SQLSuiteV1Filter
              |WHEN NOT MATCHED AND (target.col2='insert')
              |THEN INSERT *
            """.stripMargin),
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`testcat`.`ns1`.`ns2`.`dummy`"),
         context = ExpectedContext(
           fragment = "testcat.ns1.ns2.dummy",
@@ -2235,7 +2317,7 @@ class DataSourceV2SQLSuiteV1Filter
              |WHEN NOT MATCHED AND (target.col2='insert')
              |THEN INSERT *
            """.stripMargin),
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`testcat`.`ns1`.`ns2`.`dummy`"),
         context = ExpectedContext(
           fragment = "testcat.ns1.ns2.dummy",
@@ -2253,7 +2335,7 @@ class DataSourceV2SQLSuiteV1Filter
            |THEN INSERT *""".stripMargin
       checkError(
         exception = analysisException(sql1),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`target`.`dummy`",
           "proposal" -> "`age`, `id`, `name`, `p`"),
@@ -2269,7 +2351,7 @@ class DataSourceV2SQLSuiteV1Filter
              |WHEN MATCHED AND (target.age > 10) THEN UPDATE SET target.age = source.dummy
              |WHEN NOT MATCHED AND (target.col2='insert')
              |THEN INSERT *""".stripMargin),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "objectName" -> "`source`.`dummy`",
           "proposal" -> "`age`, `age`, `id`, `id`, `name`, `name`, `p`, `p`"),
@@ -2286,7 +2368,7 @@ class DataSourceV2SQLSuiteV1Filter
                |WHEN MATCHED AND (target.p > 0) THEN UPDATE SET *
                |WHEN NOT MATCHED THEN INSERT *""".stripMargin)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2096",
+        condition = "_LEGACY_ERROR_TEMP_2096",
         parameters = Map("ddl" -> "MERGE INTO TABLE"))
     }
   }
@@ -2299,7 +2381,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW testcat.ns1.ns2.old RENAME TO ns1.new")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1123",
+        condition = "_LEGACY_ERROR_TEMP_1123",
         parameters = Map.empty)
     }
   }
@@ -2373,17 +2455,6 @@ class DataSourceV2SQLSuiteV1Filter
     sql(s"UNCACHE TABLE IF EXISTS $t")
   }
 
-  test("SHOW COLUMNS") {
-    val t = "testcat.ns1.ns2.tbl"
-    withTable(t) {
-      spark.sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
-
-      testNotSupportedV2Command("SHOW COLUMNS", s"FROM $t")
-      testNotSupportedV2Command("SHOW COLUMNS", s"IN $t")
-      testNotSupportedV2Command("SHOW COLUMNS", "FROM tbl IN testcat.ns1.ns2")
-    }
-  }
-
   test("ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]") {
     val t = "testcat.ns1.ns2.tbl"
     withTable(t) {
@@ -2404,12 +2475,12 @@ class DataSourceV2SQLSuiteV1Filter
       exception = intercept[AnalysisException] {
         sql(s"CREATE VIEW $v AS SELECT 1")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      condition = "_LEGACY_ERROR_TEMP_1184",
       parameters = Map("plugin" -> "testcat", "ability" -> "views"))
   }
 
   test("global temp view should not be masked by v2 catalog") {
-    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE.key)
     registerCatalog(globalTempDB, classOf[InMemoryTableCatalog])
 
     try {
@@ -2423,7 +2494,7 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-30104: global temp db is used as a table name under v2 catalog") {
-    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE.key)
     val t = s"testcat.$globalTempDB"
     withTable(t) {
       sql(s"CREATE TABLE $t (id bigint, data string) USING foo")
@@ -2434,7 +2505,7 @@ class DataSourceV2SQLSuiteV1Filter
   }
 
   test("SPARK-30104: v2 catalog named global_temp will be masked") {
-    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE.key)
     registerCatalog(globalTempDB, classOf[InMemoryTableCatalog])
     checkError(
       exception = intercept[AnalysisException] {
@@ -2442,7 +2513,7 @@ class DataSourceV2SQLSuiteV1Filter
         // the session catalog, not the `global_temp` v2 catalog.
         sql(s"CREATE TABLE $globalTempDB.ns1.ns2.tbl (id bigint, data string) USING json")
       },
-      errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+      condition = "REQUIRES_SINGLE_PART_NAMESPACE",
       parameters = Map(
         "sessionCatalog" -> "spark_catalog",
         "namespace" -> "`global_temp`.`ns1`.`ns2`"))
@@ -2478,7 +2549,7 @@ class DataSourceV2SQLSuiteV1Filter
         def verify(sql: String): Unit = {
           checkError(
             exception = intercept[AnalysisException](spark.sql(sql)),
-            errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+            condition = "REQUIRES_SINGLE_PART_NAMESPACE",
             parameters = Map("sessionCatalog" -> "spark_catalog", "namespace" -> ""))
         }
 
@@ -2497,7 +2568,6 @@ class DataSourceV2SQLSuiteV1Filter
         verify(s"CACHE TABLE $t")
         verify(s"UNCACHE TABLE $t")
         verify(s"TRUNCATE TABLE $t")
-        verify(s"SHOW COLUMNS FROM $t")
       }
     }
 
@@ -2555,7 +2625,7 @@ class DataSourceV2SQLSuiteV1Filter
           exception = intercept[AnalysisException] {
             sql(s"CREATE VIEW $sessionCatalogName.default.v AS SELECT * FROM t")
           },
-          errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+          condition = "INVALID_TEMP_OBJ_REFERENCE",
           parameters = Map(
             "obj" -> "VIEW",
             "objName" -> "`spark_catalog`.`default`.`v`",
@@ -2587,7 +2657,7 @@ class DataSourceV2SQLSuiteV1Filter
 
     checkError(
       exception = intercept[AnalysisException](sql("COMMENT ON NAMESPACE abc IS NULL")),
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`spark_catalog`.`abc`"))
 
     // V2 non-session catalog is used.
@@ -2597,7 +2667,7 @@ class DataSourceV2SQLSuiteV1Filter
     checkNamespaceComment("testcat.ns1", "NULL")
     checkError(
       exception = intercept[AnalysisException](sql("COMMENT ON NAMESPACE testcat.abc IS NULL")),
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`testcat`.`abc`"))
   }
 
@@ -2623,7 +2693,7 @@ class DataSourceV2SQLSuiteV1Filter
     val sql1 = "COMMENT ON TABLE abc IS NULL"
     checkError(
       exception = intercept[AnalysisException](sql(sql1)),
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      condition = "TABLE_OR_VIEW_NOT_FOUND",
       parameters = Map("relationName" -> "`abc`"),
       context = ExpectedContext(fragment = "abc", start = 17, stop = 19))
 
@@ -2637,17 +2707,17 @@ class DataSourceV2SQLSuiteV1Filter
     val sql2 = "COMMENT ON TABLE testcat.abc IS NULL"
     checkError(
       exception = intercept[AnalysisException](sql(sql2)),
-      errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+      condition = "TABLE_OR_VIEW_NOT_FOUND",
       parameters = Map("relationName" -> "`testcat`.`abc`"),
       context = ExpectedContext(fragment = "testcat.abc", start = 17, stop = 27))
 
-    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE)
+    val globalTempDB = spark.conf.get(StaticSQLConf.GLOBAL_TEMP_DATABASE.key)
     registerCatalog(globalTempDB, classOf[InMemoryTableCatalog])
     withTempView("v") {
       sql("create global temp view v as select 1")
       checkError(
         exception = intercept[AnalysisException](sql("COMMENT ON TABLE global_temp.v IS NULL")),
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> "`global_temp`.`v`",
           "operation" -> "COMMENT ON TABLE"),
@@ -2687,7 +2757,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT ns1.ns2.ns3.tbl.* from $t")
         },
-        errorClass = "CANNOT_RESOLVE_STAR_EXPAND",
+        condition = "CANNOT_RESOLVE_STAR_EXPAND",
         parameters = Map(
           "targetString" -> "`ns1`.`ns2`.`ns3`.`tbl`",
           "columns" -> "`id`, `name`"),
@@ -2751,7 +2821,7 @@ class DataSourceV2SQLSuiteV1Filter
       val e = intercept[AnalysisException](sql(sqlStatement))
       checkError(
         e,
-        errorClass = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
         parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
     }
 
@@ -2810,12 +2880,54 @@ class DataSourceV2SQLSuiteV1Filter
       exception = intercept[CatalogNotFoundException] {
         sql("SET CATALOG not_exist_catalog")
       },
-      errorClass = "CATALOG_NOT_FOUND",
+      condition = "CATALOG_NOT_FOUND",
       parameters = Map(
         "catalogName" -> "`not_exist_catalog`",
         "config" -> "\"spark.sql.catalog.not_exist_catalog\""))
   }
 
+  test("SPARK-49757: SET CATALOG statement with IDENTIFIER should work") {
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME)
+
+    sql("SET CATALOG IDENTIFIER('testcat')")
+    assert(catalogManager.currentCatalog.name() == "testcat")
+
+    spark.sql("SET CATALOG IDENTIFIER(:param)", Map("param" -> "testcat2"))
+    assert(catalogManager.currentCatalog.name() == "testcat2")
+
+    checkError(
+      exception = intercept[CatalogNotFoundException] {
+        sql("SET CATALOG IDENTIFIER('not_exist_catalog')")
+      },
+      condition = "CATALOG_NOT_FOUND",
+      parameters = Map(
+        "catalogName" -> "`not_exist_catalog`",
+        "config" -> "\"spark.sql.catalog.not_exist_catalog\"")
+    )
+  }
+
+  test("SPARK-49757: SET CATALOG statement with IDENTIFIER with multipart name should fail") {
+    val catalogManager = spark.sessionState.catalogManager
+    assert(catalogManager.currentCatalog.name() == SESSION_CATALOG_NAME)
+
+    val sqlText = "SET CATALOG IDENTIFIER(:param)"
+    checkError(
+      exception = intercept[ParseException] {
+        spark.sql(sqlText, Map("param" -> "testcat.ns1"))
+      },
+      condition = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      parameters = Map(
+        "name" -> "`testcat`.`ns1`",
+        "statement" -> "SET CATALOG"
+      ),
+      context = ExpectedContext(
+        fragment = sqlText,
+        start = 0,
+        stop = 29)
+    )
+  }
+
   test("SPARK-35973: ShowCatalogs") {
     val schema = new StructType()
       .add("catalog", StringType, nullable = false)
@@ -2846,26 +2958,19 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(sql1)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "non_exist",
-          "table" -> "testcat.tbl",
-          "schema" ->
-            """root
-              | |-- id: long (nullable = true)
-              | |-- data: string (nullable = true)
-              |""".stripMargin),
-        context = ExpectedContext(
-          fragment = sql1,
-          start = 0,
-          stop = 40))
+          "objectName" -> "`non_exist`",
+          "proposal" -> "`id`, `data`"),
+        context = ExpectedContext(fragment = sql1, start = 0, stop = 40))
 
       val sql2 = s"CREATE index i1 ON $t(id)"
       checkError(
         exception = intercept[AnalysisException] {
           sql(sql2)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1332",
+        condition = "_LEGACY_ERROR_TEMP_1332",
         parameters = Map(
           "errorMessage" -> "CreateIndex is not supported in this table testcat.tbl."))
     }
@@ -3076,7 +3181,7 @@ class DataSourceV2SQLSuiteV1Filter
           // a fake time travel implementation that only supports two hardcoded timestamp values.
           sql("SELECT * FROM t TIMESTAMP AS OF current_date()")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`t`"),
         context = ExpectedContext(
           fragment = "t",
@@ -3087,7 +3192,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF INTERVAL 1 DAY").collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
         parameters = Map(
           "expr" -> "\"INTERVAL '1' DAY\""))
 
@@ -3095,14 +3200,14 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF 'abc'").collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.INPUT",
         parameters = Map("expr" -> "\"abc\""))
 
       checkError(
         exception = intercept[AnalysisException] {
           spark.read.option("timestampAsOf", "abc").table("t").collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.OPTION",
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.OPTION",
         parameters = Map("expr" -> "'abc'"))
 
       checkError(
@@ -3113,27 +3218,27 @@ class DataSourceV2SQLSuiteV1Filter
             .table("t")
             .collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_SPEC")
+        condition = "INVALID_TIME_TRAVEL_SPEC")
 
       checkError(
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF current_user()").collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.UNEVALUABLE",
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.UNEVALUABLE",
         parameters = Map("expr" -> "\"current_user()\""))
 
       checkError(
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF CAST(rand() AS STRING)").collect()
         },
-        errorClass = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.NON_DETERMINISTIC",
+        condition = "INVALID_TIME_TRAVEL_TIMESTAMP_EXPR.NON_DETERMINISTIC",
         parameters = Map("expr" -> "\"CAST(rand() AS STRING)\""))
 
       checkError(
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF abs(true)").collect()
         },
-        errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+        condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
         sqlState = None,
         parameters = Map(
           "sqlExpr" -> "\"abs(true)\"",
@@ -3151,7 +3256,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM parquet.`/the/path` VERSION AS OF 1")
         },
-        errorClass = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
+        condition = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
         sqlState = None,
         parameters = Map("relationId" -> "`parquet`.`/the/path`"))
 
@@ -3159,7 +3264,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("WITH x AS (SELECT 1) SELECT * FROM x VERSION AS OF 1")
         },
-        errorClass = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
+        condition = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
         sqlState = None,
         parameters = Map("relationId" -> "`x`"))
 
@@ -3167,7 +3272,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM non_exist VERSION AS OF 1")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`non_exist`"),
         context = ExpectedContext(
           fragment = "non_exist",
@@ -3179,7 +3284,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery1)").collect()
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`non_exist`"),
         ExpectedContext(
           fragment = "non_exist",
@@ -3190,7 +3295,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery1))").collect()
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`non_exist`"),
         ExpectedContext(
           fragment = "non_exist",
@@ -3202,7 +3307,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery2)").collect()
         },
-        errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
         parameters = Map("objectName" -> "`col`"),
         ExpectedContext(
           fragment = "col",
@@ -3212,7 +3317,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery2))").collect()
         },
-        errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
         parameters = Map("objectName" -> "`col`"),
         ExpectedContext(
           fragment = "col",
@@ -3224,7 +3329,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery3)").collect()
         },
-        errorClass =
+        condition =
           "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
         parameters = Map("number" -> "2"),
         ExpectedContext(
@@ -3235,7 +3340,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[AnalysisException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery3))").collect()
         },
-        errorClass =
+        condition =
           "INVALID_SUBQUERY_EXPRESSION.SCALAR_SUBQUERY_RETURN_MORE_THAN_ONE_OUTPUT_COLUMN",
         parameters = Map("number" -> "2"),
         ExpectedContext(
@@ -3248,7 +3353,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[SparkException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF ($subquery4)").collect()
         },
-        errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
         parameters = Map.empty,
         ExpectedContext(
           fragment = "(SELECT * FROM VALUES (1), (2))",
@@ -3258,7 +3363,7 @@ class DataSourceV2SQLSuiteV1Filter
         exception = intercept[SparkException] {
           sql(s"SELECT * FROM t TIMESTAMP AS OF (SELECT ($subquery4))").collect()
         },
-        errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
         parameters = Map.empty,
         ExpectedContext(
           fragment = "(SELECT * FROM VALUES (1), (2))",
@@ -3291,7 +3396,7 @@ class DataSourceV2SQLSuiteV1Filter
       val properties = table.properties
       assert(properties.get(TableCatalog.PROP_PROVIDER) == "parquet")
       assert(properties.get(TableCatalog.PROP_COMMENT) == "This is a comment")
-      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:///tmp")
+      assert(properties.get(TableCatalog.PROP_LOCATION) == "file:/tmp")
       assert(properties.containsKey(TableCatalog.PROP_OWNER))
       assert(properties.get(TableCatalog.PROP_EXTERNAL) == "true")
       assert(properties.get(s"${TableCatalog.OPTION_PREFIX}from") == "0")
@@ -3362,7 +3467,7 @@ class DataSourceV2SQLSuiteV1Filter
       }
       checkError(
         exception,
-        errorClass = "UNSUPPORTED_FEATURE.OVERWRITE_BY_SUBQUERY",
+        condition = "UNSUPPORTED_FEATURE.OVERWRITE_BY_SUBQUERY",
         sqlState = "0A000",
         parameters = Map.empty,
         context = ExpectedContext(
@@ -3518,32 +3623,170 @@ class DataSourceV2SQLSuiteV1Filter
     }
   }
 
-  test("SPARK-36680: Supports Dynamic Table Options for Spark SQL") {
-    val t1 = s"${catalogAndNamespace}table"
-    withTable(t1) {
-      sql(s"CREATE TABLE $t1 (id bigint, data string) USING $v2Format")
-      sql(s"INSERT INTO $t1 VALUES (1, 'a'), (2, 'b')")
+  test("SPARK-49099: Switch current schema with custom spark_catalog") {
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    withSQLConf(V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[InMemoryCatalog].getName) {
+      sql("CREATE DATABASE test_db")
+      sql("USE test_db")
+    }
+  }
 
-      var df = sql(s"SELECT * FROM $t1")
-      var collected = df.queryExecution.optimizedPlan.collect {
-        case scan: DataSourceV2ScanRelation =>
-          assert(scan.relation.options.isEmpty)
+  test("SPARK-49183: custom spark_catalog generates location for managed tables") {
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    withSQLConf(V2_SESSION_CATALOG_IMPLEMENTATION.key -> classOf[SimpleDelegatingCatalog].getName) {
+      withTable("t") {
+        sql(s"CREATE TABLE t (i INT) USING $v2Format")
+        val table = catalog(SESSION_CATALOG_NAME).asTableCatalog
+          .loadTable(Identifier.of(Array("default"), "t"))
+        assert(!table.properties().containsKey(TableCatalog.PROP_EXTERNAL))
       }
-      assert (collected.size == 1)
-      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+    }
+  }
 
-      df = sql(s"SELECT * FROM $t1 WITH (`split-size` = 5)")
-      collected = df.queryExecution.optimizedPlan.collect {
-        case scan: DataSourceV2ScanRelation =>
-          assert(scan.relation.options.get("split-size") == "5")
+  test("SPARK-49211: V2 Catalog can support built-in data sources") {
+    def checkParquet(tableName: String, path: String): Unit = {
+      withTable(tableName) {
+        sql("CREATE TABLE " + tableName +
+          " (name STRING) USING PARQUET LOCATION '" + path + "'")
+        sql("INSERT INTO " + tableName + " VALUES('Bob')")
+        val df = sql("SELECT * FROM " + tableName)
+        assert(df.queryExecution.analyzed.exists {
+          case LogicalRelationWithTable(_: HadoopFsRelation, _) => true
+          case _ => false
+        })
+        checkAnswer(df, Row("Bob"))
       }
-      assert (collected.size == 1)
-      checkAnswer(df, Seq(Row(1, "a"), Row(2, "b")))
+    }
+
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    val table1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    spark.sessionState.catalogManager.reset()
+    withSQLConf(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key ->
+        classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkParquet(table1.toString, path.getAbsolutePath)
+      }
+    }
+    val table2 = QualifiedTableName("testcat3", "default", "t")
+    withSQLConf(
+      "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkParquet(table2.toString, path.getAbsolutePath)
+      }
+    }
+  }
+
+  test("SPARK-49211: V2 Catalog support CTAS") {
+    def checkCTAS(tableName: String, path: String): Unit = {
+      sql("CREATE TABLE " + tableName + " USING PARQUET LOCATION '" + path +
+        "' AS SELECT 1, 2, 3")
+      checkAnswer(sql("SELECT * FROM " + tableName), Row(1, 2, 3))
+    }
+
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    val table1 = QualifiedTableName(SESSION_CATALOG_NAME, "default", "t")
+    withSQLConf(
+      V2_SESSION_CATALOG_IMPLEMENTATION.key ->
+        classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkCTAS(table1.toString, path.getAbsolutePath)
+      }
+    }
+
+    val table2 = QualifiedTableName("testcat3", "default", "t")
+    withSQLConf(
+      "spark.sql.catalog.testcat3" -> classOf[V2CatalogSupportBuiltinDataSource].getName) {
+      withTempPath { path =>
+        checkCTAS(table2.toString, path.getAbsolutePath)
+      }
+    }
+  }
+
+  test("SPARK-49246: read-only catalog") {
+    def assertPrivilegeError(f: => Unit, privilege: String): Unit = {
+      val e = intercept[RuntimeException](f)
+      assert(e.getMessage.contains(privilege))
+    }
+
+    def checkWriteOperations(catalog: String): Unit = {
+      withSQLConf(s"spark.sql.catalog.$catalog" -> classOf[ReadOnlyCatalog].getName) {
+        val input = sql("SELECT 1")
+        val tbl = s"$catalog.default.t"
+        withTable(tbl) {
+          sql(s"CREATE TABLE $tbl (i INT)")
+          val df = sql(s"SELECT * FROM $tbl")
+          assert(df.collect().isEmpty)
+          assert(df.schema == new StructType().add("i", "int"))
+
+          assertPrivilegeError(sql(s"INSERT INTO $tbl SELECT 1"), "INSERT")
+          assertPrivilegeError(
+            sql(s"INSERT INTO $tbl REPLACE WHERE i = 0 SELECT 1"), "DELETE,INSERT")
+          assertPrivilegeError(sql(s"INSERT OVERWRITE $tbl SELECT 1"), "DELETE,INSERT")
+          assertPrivilegeError(sql(s"DELETE FROM $tbl WHERE i = 0"), "DELETE")
+          assertPrivilegeError(sql(s"UPDATE $tbl SET i = 0"), "UPDATE")
+          assertPrivilegeError(
+            sql(s"""
+               |MERGE INTO $tbl USING (SELECT 1 i) AS source
+               |ON source.i = $tbl.i
+               |WHEN MATCHED THEN UPDATE SET *
+               |WHEN NOT MATCHED THEN INSERT *
+               |WHEN NOT MATCHED BY SOURCE THEN DELETE
+               |""".stripMargin),
+            "DELETE,INSERT,UPDATE"
+          )
+
+          assertPrivilegeError(input.write.insertInto(tbl), "INSERT")
+          assertPrivilegeError(input.write.mode("overwrite").insertInto(tbl), "DELETE,INSERT")
+          assertPrivilegeError(input.write.mode("append").saveAsTable(tbl), "INSERT")
+          assertPrivilegeError(input.write.mode("overwrite").saveAsTable(tbl), "DELETE,INSERT")
+          assertPrivilegeError(input.writeTo(tbl).append(), "INSERT")
+          assertPrivilegeError(input.writeTo(tbl).overwrite(df.col("i") === 1), "DELETE,INSERT")
+          assertPrivilegeError(input.writeTo(tbl).overwritePartitions(), "DELETE,INSERT")
+        }
 
-      val noValues = intercept[AnalysisException](
-        sql(s"SELECT * FROM $t1 WITH (`split-size`)"))
-      assert(noValues.message.contains(
-        "Operation not allowed: Values must be specified for key(s): [split-size]"))
+        // Test CTAS
+        withTable(tbl) {
+          assertPrivilegeError(sql(s"CREATE TABLE $tbl AS SELECT 1 i"), "INSERT")
+        }
+        withTable(tbl) {
+          assertPrivilegeError(sql(s"CREATE OR REPLACE TABLE $tbl AS SELECT 1 i"), "INSERT")
+        }
+        withTable(tbl) {
+          assertPrivilegeError(input.write.saveAsTable(tbl), "INSERT")
+        }
+        withTable(tbl) {
+          assertPrivilegeError(input.writeTo(tbl).create(), "INSERT")
+        }
+        withTable(tbl) {
+          assertPrivilegeError(input.writeTo(tbl).createOrReplace(), "INSERT")
+        }
+      }
+    }
+    // Reset CatalogManager to clear the materialized `spark_catalog` instance, so that we can
+    // configure a new implementation.
+    spark.sessionState.catalogManager.reset()
+    checkWriteOperations(SESSION_CATALOG_NAME)
+    checkWriteOperations("read_only_cat")
+  }
+
+  test("StagingTableCatalog without atomic support") {
+    withSQLConf("spark.sql.catalog.fakeStagedCat" -> classOf[FakeStagedTableCatalog].getName) {
+      withTable("fakeStagedCat.t") {
+        sql("CREATE TABLE fakeStagedCat.t AS SELECT 1 col")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(1))
+        sql("REPLACE TABLE fakeStagedCat.t AS SELECT 2 col")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(2))
+        sql("CREATE OR REPLACE TABLE fakeStagedCat.t AS SELECT 1 c1, 2 c2")
+        checkAnswer(spark.table("fakeStagedCat.t"), Row(1, 2))
+      }
     }
   }
 
@@ -3555,7 +3798,7 @@ class DataSourceV2SQLSuiteV1Filter
       exception = intercept[AnalysisException] {
         sql(s"$sqlCommand $sqlParams")
       },
-      errorClass = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
+      condition = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
       sqlState = "0A000",
       parameters = Map("cmd" -> expectedArgument.getOrElse(sqlCommand)))
   }
@@ -3568,3 +3811,102 @@ class DataSourceV2SQLSuiteV2Filter extends DataSourceV2SQLSuite {
 class ReserveSchemaNullabilityCatalog extends InMemoryCatalog {
   override def useNullableQuerySchema(): Boolean = false
 }
+
+class SimpleDelegatingCatalog extends DelegatingCatalogExtension {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    val newProps = new util.HashMap[String, String]
+    newProps.putAll(properties)
+    newProps.put(TableCatalog.PROP_LOCATION, "/tmp/test_path")
+    newProps.put(TableCatalog.PROP_IS_MANAGED_LOCATION, "true")
+    super.createTable(ident, columns, partitions, newProps)
+  }
+}
+
+
+class V2CatalogSupportBuiltinDataSource extends InMemoryCatalog {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def loadTable(ident: Identifier): Table = {
+    val superTable = super.loadTable(ident)
+    val tableIdent = {
+      TableIdentifier(ident.name(), Some(ident.namespace().head), Some(name))
+    }
+    val uri = CatalogUtils.stringToURI(superTable.properties().get(TableCatalog.PROP_LOCATION))
+    val sparkTable = CatalogTable(
+      tableIdent,
+      tableType = CatalogTableType.EXTERNAL,
+      storage = CatalogStorageFormat.empty.copy(
+        locationUri = Some(uri),
+        properties = superTable.properties().asScala.toMap
+      ),
+      schema = superTable.schema(),
+      provider = Some(superTable.properties().get(TableCatalog.PROP_PROVIDER)),
+      tracksPartitionsInCatalog = false
+    )
+    V1Table(sparkTable)
+  }
+}
+
+class ReadOnlyCatalog extends InMemoryCatalog {
+  override def createTable(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): Table = {
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def loadTable(
+      ident: Identifier,
+      writePrivileges: util.Set[TableWritePrivilege]): Table = {
+    throw new RuntimeException("cannot write with " +
+      writePrivileges.asScala.toSeq.map(_.toString).sorted.mkString(","))
+  }
+}
+
+class FakeStagedTableCatalog extends InMemoryCatalog with StagingTableCatalog {
+  override def stageCreate(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def stageReplace(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    super.dropTable(ident)
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+
+  override def stageCreateOrReplace(
+      ident: Identifier,
+      columns: Array[ColumnV2],
+      partitions: Array[Transform],
+      properties: util.Map[String, String]): StagedTable = {
+    try {
+      super.dropTable(ident)
+    } catch {
+      case _: Throwable =>
+    }
+    super.createTable(ident, columns, partitions, properties)
+    null
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
index 1de535df246b7..d61d554025e50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DataSourceV2Suite.scala
@@ -454,7 +454,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
               .write.format(cls.getName)
               .option("path", path).mode("ignore").save()
           },
-          errorClass = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
+          condition = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
           parameters = Map(
             "source" -> cls.getName,
             "createMode" -> "\"Ignore\""
@@ -467,7 +467,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
               .write.format(cls.getName)
               .option("path", path).mode("error").save()
           },
-          errorClass = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
+          condition = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
           parameters = Map(
             "source" -> cls.getName,
             "createMode" -> "\"ErrorIfExists\""
@@ -651,7 +651,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
           exception = intercept[SparkUnsupportedOperationException] {
             sql(s"CREATE TABLE test(a INT, b INT) USING ${cls.getName}")
           },
-          errorClass = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED",
+          condition = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED",
           parameters = Map("tableName" -> "`default`.`test`", "provider" -> cls.getName)
         )
       }
@@ -732,7 +732,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql(s"CREATE TABLE test (x INT, y INT) USING ${cls.getName}")
         },
-        errorClass = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
+        condition = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
         parameters = Map(
           "dsSchema" -> "\"STRUCT<i: INT, j: INT>\"",
           "expectedSchema" -> "\"STRUCT<x: INT, y: INT>\""))
@@ -770,7 +770,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql(s"CREATE TABLE test USING ${cls.getName} AS VALUES (0, 1), (1, 2)")
         },
-        errorClass = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
+        condition = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
         parameters = Map(
           "dsSchema" -> "\"STRUCT<i: INT, j: INT>\"",
           "expectedSchema" -> "\"STRUCT<col1: INT, col2: INT>\""))
@@ -788,7 +788,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
                |AS VALUES ('a', 'b'), ('c', 'd') t(i, j)
                |""".stripMargin)
         },
-        errorClass = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
+        condition = "DATA_SOURCE_TABLE_SCHEMA_MISMATCH",
         parameters = Map(
           "dsSchema" -> "\"STRUCT<i: INT, j: INT>\"",
           "expectedSchema" -> "\"STRUCT<i: STRING, j: STRING>\""))
@@ -839,7 +839,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[SparkUnsupportedOperationException] {
           sql(s"CREATE TABLE test USING ${cls.getName} AS VALUES (0, 1)")
         },
-        errorClass = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED",
+        condition = "CANNOT_CREATE_DATA_SOURCE_TABLE.EXTERNAL_METADATA_UNSUPPORTED",
         parameters = Map(
           "tableName" -> "`default`.`test`",
           "provider" -> "org.apache.spark.sql.connector.SimpleDataSourceV2"))
@@ -851,7 +851,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql(s"CREATE TABLE test USING ${cls.getName} AS SELECT * FROM VALUES (0, 1)")
         },
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`test`",
           "operation" -> "append in batch mode"))
@@ -881,7 +881,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO test VALUES (4)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`test`",
           "tableColumns" -> "`x`, `y`",
@@ -893,7 +893,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO test(x, x) VALUES (4, 5)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`x`"))
     }
   }
@@ -935,13 +935,13 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql("INSERT INTO test PARTITION(z = 1) VALUES (2)")
         },
-        errorClass = "NON_PARTITION_COLUMN",
+        condition = "NON_PARTITION_COLUMN",
         parameters = Map("columnName" -> "`z`"))
       checkError(
         exception = intercept[AnalysisException] {
           sql("INSERT INTO test PARTITION(x, y = 1) VALUES (2, 3)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`test`",
           "tableColumns" -> "`x`, `y`",
@@ -959,7 +959,7 @@ class DataSourceV2Suite extends QueryTest with SharedSparkSession with AdaptiveS
         exception = intercept[AnalysisException] {
           sql("INSERT OVERWRITE test PARTITION(x = 1) VALUES (5)")
         },
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`test`",
           "operation" -> "overwrite by filter in batch mode")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
index eeef0566b8faf..fd022580db42b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeleteFromTests.scala
@@ -112,7 +112,7 @@ trait DeleteFromTests extends DatasourceV2SQLBase {
 
       checkError(
         exception = exc,
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         sqlState = "0A000",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`tbl`",
           "operation" -> "DELETE"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala
index 7336b3a6e9206..be180eb89ce20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedDeleteFromTableSuite.scala
@@ -37,7 +37,7 @@ class DeltaBasedDeleteFromTableSuite extends DeleteFromTableSuiteBase {
     checkError(
       exception = intercept[AnalysisException](
         sql(s"DELETE FROM $tableNameAsString WHERE id <= 1 AND rand() > 0.5")),
-      errorClass = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
+      condition = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
       parameters = Map(
         "sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\""),
       context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
index 46942cac1c7e3..89b42b5e6db7b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/DeltaBasedUpdateTableSuiteBase.scala
@@ -32,7 +32,7 @@ abstract class DeltaBasedUpdateTableSuiteBase extends UpdateTableSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"UPDATE $tableNameAsString SET salary = -1 WHERE pk = 1")
       },
-      errorClass = "NULLABLE_ROW_ID_ATTRIBUTES",
+      condition = "NULLABLE_ROW_ID_ATTRIBUTES",
       parameters = Map("nullableRowIdAttrs" -> "pk#\\d+")
     )
   }
@@ -62,7 +62,7 @@ abstract class DeltaBasedUpdateTableSuiteBase extends UpdateTableSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"UPDATE $tableNameAsString SET dep = 'invalid' WHERE id <= 1 AND rand() > 0.5")
       },
-      errorClass = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
+      condition = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
       parameters = Map("sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\""),
       context = ExpectedContext(
         fragment = "UPDATE cat.ns1.test_table SET dep = 'invalid' WHERE id <= 1 AND rand() > 0.5",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
index c6060dcdd51a7..2a0ab21ddb09c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/FileDataSourceV2FallBackSuite.scala
@@ -103,7 +103,7 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
         exception = intercept[SparkException] {
           spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
         },
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map("message" -> "Dummy file reader"))
     }
   }
@@ -131,7 +131,7 @@ class FileDataSourceV2FallBackSuite extends QueryTest with SharedSparkSession {
           exception = intercept[SparkException] {
             spark.read.format(dummyReadOnlyFileSourceV2).load(path).collect()
           },
-          errorClass = "INTERNAL_ERROR",
+          condition = "INTERNAL_ERROR",
           parameters = Map("message" -> "Dummy file reader"))
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala
index 0aeab95f58a7b..1be318f948fd9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedDeleteFromTableSuite.scala
@@ -34,7 +34,7 @@ class GroupBasedDeleteFromTableSuite extends DeleteFromTableSuiteBase {
     checkError(
       exception = intercept[AnalysisException](
         sql(s"DELETE FROM $tableNameAsString WHERE id <= 1 AND rand() > 0.5")),
-      errorClass = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
+      condition = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
       parameters = Map(
         "sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\", \"((id <= 1) AND (rand() > 0.5))\""),
       context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedUpdateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedUpdateTableSuite.scala
index 3e736421a315c..774ae97734d25 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedUpdateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/GroupBasedUpdateTableSuite.scala
@@ -122,7 +122,7 @@ class GroupBasedUpdateTableSuite extends UpdateTableSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"UPDATE $tableNameAsString SET dep = 'invalid' WHERE id <= 1 AND rand() > 0.5")
       },
-      errorClass = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
+      condition = "INVALID_NON_DETERMINISTIC_EXPRESSIONS",
       parameters = Map(
         "sqlExprs" -> "\"((id <= 1) AND (rand() > 0.5))\", \"((id <= 1) AND (rand() > 0.5))\""),
       context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
index fa30969d65c52..d6e86bc93c9d6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/InsertIntoTests.scala
@@ -135,7 +135,7 @@ abstract class InsertIntoTests(
       exception = intercept[AnalysisException] {
         doInsert(t1, df)
       },
-      errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+      condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       parameters = Map(
         "tableName" -> tableName,
         "tableColumns" -> "`id`, `data`, `missing`",
@@ -158,7 +158,7 @@ abstract class InsertIntoTests(
         exception = intercept[AnalysisException] {
           doInsert(t1, df)
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> tableName,
           "tableColumns" -> "`id`, `data`",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
index 5e5453b4cd500..152896499010c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/KeyGroupedPartitioningSuite.scala
@@ -16,6 +16,7 @@
  */
 package org.apache.spark.sql.connector
 
+import java.sql.Timestamp
 import java.util.Collections
 
 import org.apache.spark.SparkConf
@@ -369,6 +370,28 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     checkAnswer(df.sort("res"), Seq(Row(10.0), Row(15.5), Row(41.0)))
   }
 
+  test("SPARK-49179: Fix v2 multi bucketed inner joins throw AssertionError") {
+    val cols = Array(
+      Column.create("id", LongType),
+      Column.create("name", StringType))
+    val buckets = Array(bucket(8, "id"))
+
+    withTable("t1", "t2", "t3") {
+      Seq("t1", "t2", "t3").foreach { t =>
+        createTable(t, cols, buckets)
+        sql(s"INSERT INTO testcat.ns.$t VALUES (1, 'aa'), (2, 'bb'), (3, 'cc')")
+      }
+      val df = sql(
+        """
+          |SELECT t1.id, t2.id, t3.name FROM testcat.ns.t1
+          |JOIN testcat.ns.t2 ON t1.id = t2.id
+          |JOIN testcat.ns.t3 ON t1.id = t3.id
+          |""".stripMargin)
+      checkAnswer(df, Seq(Row(1, 1, "aa"), Row(2, 2, "bb"), Row(3, 3, "cc")))
+      assert(collectShuffles(df.queryExecution.executedPlan).isEmpty)
+    }
+  }
+
   test("partitioned join: join with two partition keys and matching & sorted partitions") {
     val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
     createTable(items, itemsColumns, items_partitions)
@@ -561,6 +584,55 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
+  test("SPARK-49205: KeyGroupedPartitioning should inherit HashPartitioningLike") {
+    val items_partitions = Array(days("arrive_time"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+      "(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 'aa', 41.0, cast('2020-01-15' as timestamp)), " +
+      "(2, 'bb', 10.0, cast('2020-01-01' as timestamp)), " +
+      "(2, 'bb', 10.5, cast('2020-01-01' as timestamp)), " +
+      "(3, 'cc', 15.5, cast('2020-02-01' as timestamp))")
+
+    val purchases_partitions = Array(days("time"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+      "(1, 42.0, cast('2020-01-01' as timestamp)), " +
+      "(1, 44.0, cast('2020-01-15' as timestamp)), " +
+      "(1, 45.0, cast('2020-01-15' as timestamp)), " +
+      "(2, 11.0, cast('2020-01-01' as timestamp)), " +
+      "(3, 19.5, cast('2020-02-01' as timestamp))")
+
+    val df = sql(
+      s"""
+        |SELECT x, count(*) FROM (
+        | SELECT /*+ broadcast(t2) */ arrive_time as x, * FROM testcat.ns.$items t1
+        | JOIN testcat.ns.$purchases t2 ON t1.arrive_time = t2.time
+        |)
+        |GROUP BY x
+        |""".stripMargin)
+    checkAnswer(df,
+      Seq(Row(Timestamp.valueOf("2020-01-01 00:00:00"), 6),
+        Row(Timestamp.valueOf("2020-01-15 00:00:00"), 2),
+        Row(Timestamp.valueOf("2020-02-01 00:00:00"), 1)))
+    assert(collectAllShuffles(df.queryExecution.executedPlan).isEmpty)
+
+    val df2 = sql(
+      s"""
+        |WITH t1 (SELECT * FROM testcat.ns.$items)
+        |SELECT x, count(*) FROM (
+        | SELECT /*+ broadcast(t2) */ t2.time as x FROM t1
+        | JOIN testcat.ns.$purchases t2 ON t1.arrive_time = t2.time
+        | JOIN t1 t3 ON t1.arrive_time = t3.arrive_time
+        |) GROUP BY x
+        |""".stripMargin)
+    checkAnswer(df2,
+      Seq(Row(Timestamp.valueOf("2020-01-01 00:00:00"), 18),
+        Row(Timestamp.valueOf("2020-01-15 00:00:00"), 2),
+        Row(Timestamp.valueOf("2020-02-01 00:00:00"), 1)))
+    assert(collectAllShuffles(df2.queryExecution.executedPlan).isEmpty)
+  }
+
   test("SPARK-42038: partially clustered: with same partition keys and one side fully clustered") {
     val items_partitions = Array(identity("id"))
     createTable(items, itemsColumns, items_partitions)
@@ -667,11 +739,12 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(5, 30.0, cast('2023-01-01' as timestamp))")
 
     Seq(true, false).foreach { pushDownValues =>
-      Seq(("true", 10), ("false", 5)).foreach {
-        case (enable, expected) =>
+      Seq((true, true, 8), (false, true, 3), (true, false, 10), (false, false, 5)).foreach {
+        case (partial, filter, expected) =>
           withSQLConf(
-              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
-              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+            SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> filter.toString,
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> partial.toString) {
             val df = createJoinTestDF(Seq("id" -> "item_id"))
             val shuffles = collectShuffles(df.queryExecution.executedPlan)
             if (pushDownValues) {
@@ -692,6 +765,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
+
   test("SPARK-42038: partially clustered: with different partition keys and missing keys on " +
       "left-hand side") {
     val items_partitions = Array(identity("id"))
@@ -715,11 +789,13 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(5, 30.0, cast('2023-01-01' as timestamp))")
 
     Seq(true, false).foreach { pushDownValues =>
-      Seq(("true", 9), ("false", 5)).foreach {
-        case (enable, expected) =>
+      Seq((true, true, 3), (false, true, 2), (true, false, 9), (false, false, 5)).foreach {
+        case(partial, filter, expected) =>
           withSQLConf(
               SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
-              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+              SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> filter.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+                partial.toString) {
             val df = createJoinTestDF(Seq("id" -> "item_id"))
             val shuffles = collectShuffles(df.queryExecution.executedPlan)
             if (pushDownValues) {
@@ -759,11 +835,13 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         s"(5, 30.0, cast('2023-01-01' as timestamp))")
 
     Seq(true, false).foreach { pushDownValues =>
-      Seq(("true", 6), ("false", 5)).foreach {
-        case (enable, expected) =>
+      Seq((true, true, 2), (false, true, 2), (true, false, 6), (false, false, 5)).foreach {
+        case (partial, filter, expected) =>
           withSQLConf(
               SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
-              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+              SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> filter.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+                partial.toString) {
             val df = createJoinTestDF(Seq("id" -> "item_id"))
             val shuffles = collectShuffles(df.queryExecution.executedPlan)
             if (pushDownValues) {
@@ -802,12 +880,14 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     // In a left-outer join, and when the left side has larger stats, partially clustered
     // distribution should kick in and pick the right hand side to replicate partitions.
     Seq(true, false).foreach { pushDownValues =>
-      Seq(("true", 7), ("false", 5)).foreach {
-        case (enable, expected) =>
+      Seq((true, true, 5), (false, true, 3), (true, false, 7), (false, false, 5)).foreach {
+        case (partial, filter, expected) =>
           withSQLConf(
             SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> false.toString,
             SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
-            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> enable) {
+            SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> filter.toString,
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+              partial.toString) {
             val df = createJoinTestDF(
               Seq("id" -> "item_id", "arrive_time" -> "time"), joinType = "LEFT")
             val shuffles = collectShuffles(df.queryExecution.executedPlan)
@@ -815,7 +895,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
               assert(shuffles.isEmpty, "should not contain any shuffle")
               val scans = collectScans(df.queryExecution.executedPlan)
               assert(scans.forall(_.inputRDD.partitions.length == expected),
-              s"Expected $expected but got ${scans.head.inputRDD.partitions.length}")
+                s"Expected $expected but got ${scans.head.inputRDD.partitions.length}")
             } else {
               assert(shuffles.nonEmpty,
                 "should contain shuffle when not pushing down partition values")
@@ -1310,7 +1390,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
-  test("SPARK-44647: test join key is subset of cluster key " +
+  test("SPARK-44647: SPJ: test join key is subset of cluster key " +
       "with push values and partially-clustered") {
     val table1 = "tab1e1"
     val table2 = "table2"
@@ -1336,69 +1416,78 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
         "(2, 'ww', cast('2020-01-01' as timestamp))")
 
     Seq(true, false).foreach { pushDownValues =>
-      Seq(true, false).foreach { partiallyClustered =>
-        Seq(true, false).foreach { allowJoinKeysSubsetOfPartitionKeys =>
-
-          withSQLConf(
-            SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
-            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
-            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
-                partiallyClustered.toString,
-            SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key ->
-                allowJoinKeysSubsetOfPartitionKeys.toString) {
-            val df = sql(
-              s"""
-                |${selectWithMergeJoinHint("t1", "t2")}
-                |t1.id AS id, t1.data AS t1data, t2.data AS t2data
-                |FROM testcat.ns.$table1 t1 JOIN testcat.ns.$table2 t2
-                |ON t1.id = t2.id ORDER BY t1.id, t1data, t2data
-                |""".stripMargin)
-            val shuffles = collectShuffles(df.queryExecution.executedPlan)
-            if (allowJoinKeysSubsetOfPartitionKeys) {
-              assert(shuffles.isEmpty, "SPJ should be triggered")
-            } else {
-              assert(shuffles.nonEmpty, "SPJ should not be triggered")
-            }
-
-            val scans = collectScans(df.queryExecution.executedPlan)
-                .map(_.inputRDD.partitions.length)
-
-            (allowJoinKeysSubsetOfPartitionKeys, partiallyClustered) match {
-              // SPJ and partially-clustered
-              case (true, true) => assert(scans == Seq(8, 8))
-              // SPJ and not partially-clustered
-              case (true, false) => assert(scans == Seq(4, 4))
-              // No SPJ
-              case _ => assert(scans == Seq(5, 4))
+      Seq(true, false).foreach { filter =>
+        Seq(true, false).foreach { partiallyClustered =>
+          Seq(true, false).foreach { allowJoinKeysSubsetOfPartitionKeys =>
+            withSQLConf(
+              SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+              SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> pushDownValues.toString,
+              SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key ->
+                  partiallyClustered.toString,
+              SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> filter.toString,
+              SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key ->
+                  allowJoinKeysSubsetOfPartitionKeys.toString) {
+              val df = sql(
+                s"""
+                  |${selectWithMergeJoinHint("t1", "t2")}
+                  |t1.id AS id, t1.data AS t1data, t2.data AS t2data
+                  |FROM testcat.ns.$table1 t1 JOIN testcat.ns.$table2 t2
+                  |ON t1.id = t2.id ORDER BY t1.id, t1data, t2data
+                  |""".stripMargin)
+              val shuffles = collectShuffles(df.queryExecution.executedPlan)
+              if (allowJoinKeysSubsetOfPartitionKeys) {
+                assert(shuffles.isEmpty, "SPJ should be triggered")
+              } else {
+                assert(shuffles.nonEmpty, "SPJ should not be triggered")
+              }
+
+              val scannedPartitions = collectScans(df.queryExecution.executedPlan)
+                  .map(_.inputRDD.partitions.length)
+              (allowJoinKeysSubsetOfPartitionKeys, partiallyClustered, filter) match {
+                // SPJ, partially-clustered, with filter
+                case (true, true, true) => assert(scannedPartitions == Seq(6, 6))
+
+                // SPJ, partially-clustered, no filter
+                case (true, true, false) => assert(scannedPartitions == Seq(8, 8))
+
+                // SPJ and not partially-clustered, with filter
+                case (true, false, true) => assert(scannedPartitions == Seq(2, 2))
+
+                // SPJ and not partially-clustered, no filter
+                case (true, false, false) => assert(scannedPartitions == Seq(4, 4))
+
+                // No SPJ
+                case _ => assert(scannedPartitions == Seq(5, 4))
+              }
+
+              checkAnswer(df, Seq(
+                Row(2, "bb", "ww"),
+                Row(2, "cc", "ww"),
+                Row(3, "dd", "xx"),
+                Row(3, "dd", "xx"),
+                Row(3, "dd", "xx"),
+                Row(3, "dd", "xx"),
+                Row(3, "dd", "yy"),
+                Row(3, "dd", "yy"),
+                Row(3, "dd", "yy"),
+                Row(3, "dd", "yy"),
+                Row(3, "ee", "xx"),
+                Row(3, "ee", "xx"),
+                Row(3, "ee", "xx"),
+                Row(3, "ee", "xx"),
+                Row(3, "ee", "yy"),
+                Row(3, "ee", "yy"),
+                Row(3, "ee", "yy"),
+                Row(3, "ee", "yy")
+              ))
             }
-
-            checkAnswer(df, Seq(
-              Row(2, "bb", "ww"),
-              Row(2, "cc", "ww"),
-              Row(3, "dd", "xx"),
-              Row(3, "dd", "xx"),
-              Row(3, "dd", "xx"),
-              Row(3, "dd", "xx"),
-              Row(3, "dd", "yy"),
-              Row(3, "dd", "yy"),
-              Row(3, "dd", "yy"),
-              Row(3, "dd", "yy"),
-              Row(3, "ee", "xx"),
-              Row(3, "ee", "xx"),
-              Row(3, "ee", "xx"),
-              Row(3, "ee", "xx"),
-              Row(3, "ee", "yy"),
-              Row(3, "ee", "yy"),
-              Row(3, "ee", "yy"),
-              Row(3, "ee", "yy")
-            ))
           }
         }
       }
     }
   }
 
-  test("SPARK-47094: Support compatible buckets") {
+  test("SPARK-47094: SPJ: Support compatible buckets") {
     val table1 = "tab1e1"
     val table2 = "table2"
 
@@ -1491,11 +1580,11 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
             val shuffles = collectShuffles(df.queryExecution.executedPlan)
             assert(shuffles.isEmpty, "SPJ should be triggered")
 
-            val scans = collectScans(df.queryExecution.executedPlan).map(_.inputRDD.
+            val partions = collectScans(df.queryExecution.executedPlan).map(_.inputRDD.
               partitions.length)
             val expectedBuckets = Math.min(table1buckets1, table2buckets1) *
               Math.min(table1buckets2, table2buckets2)
-            assert(scans == Seq(expectedBuckets, expectedBuckets))
+            assert(partions == Seq(expectedBuckets, expectedBuckets))
 
             checkAnswer(df, Seq(
               Row(0, 0, "aa", "aa"),
@@ -1558,7 +1647,7 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
-  test("SPARK-47094: Support compatible buckets with common divisor") {
+  test("SPARK-47094: SPJ:Support compatible buckets with common divisor") {
     val table1 = "tab1e1"
     val table2 = "table2"
 
@@ -1655,9 +1744,9 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
               partitions.length)
 
             def gcd(a: Int, b: Int): Int = BigInt(a).gcd(BigInt(b)).toInt
-            val expectedBuckets = gcd(table1buckets1, table2buckets1) *
+            val expectedPartitions = gcd(table1buckets1, table2buckets1) *
               gcd(table1buckets2, table2buckets2)
-            assert(scans == Seq(expectedBuckets, expectedBuckets))
+            assert(scans == Seq(expectedPartitions, expectedPartitions))
 
             checkAnswer(df, Seq(
               Row(0, 0, "aa", "aa"),
@@ -1720,6 +1809,55 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
     }
   }
 
+  test("SPARK-47094: SPJ: Does not trigger when incompatible number of buckets on both side") {
+    val table1 = "tab1e1"
+    val table2 = "table2"
+
+    Seq(
+      (2, 3),
+      (3, 4)
+    ).foreach {
+      case (table1buckets1, table2buckets1) =>
+        catalog.clearTables()
+
+        val partition1 = Array(bucket(table1buckets1, "store_id"))
+        val partition2 = Array(bucket(table2buckets1, "store_id"))
+
+        Seq((table1, partition1), (table2, partition2)).foreach { case (tab, part) =>
+          createTable(tab, columns2, part)
+          val insertStr = s"INSERT INTO testcat.ns.$tab VALUES " +
+            "(0, 0, 'aa'), " +
+            "(1, 0, 'ab'), " + // duplicate partition key
+            "(2, 2, 'ac'), " +
+            "(3, 3, 'ad'), " +
+            "(4, 2, 'bc') "
+
+          sql(insertStr)
+        }
+
+        Seq(true, false).foreach { allowJoinKeysSubsetOfPartitionKeys =>
+          withSQLConf(
+            SQLConf.REQUIRE_ALL_CLUSTER_KEYS_FOR_CO_PARTITION.key -> "false",
+            SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+            SQLConf.V2_BUCKETING_PARTIALLY_CLUSTERED_DISTRIBUTION_ENABLED.key -> "false",
+            SQLConf.V2_BUCKETING_ALLOW_JOIN_KEYS_SUBSET_OF_PARTITION_KEYS.key ->
+              allowJoinKeysSubsetOfPartitionKeys.toString,
+            SQLConf.V2_BUCKETING_ALLOW_COMPATIBLE_TRANSFORMS.key -> "true") {
+            val df = sql(
+              s"""
+                 |${selectWithMergeJoinHint("t1", "t2")}
+                 |t1.store_id, t1.dept_id, t1.data, t2.data
+                 |FROM testcat.ns.$table1 t1 JOIN testcat.ns.$table2 t2
+                 |ON t1.store_id = t2.store_id AND t1.dept_id = t2.dept_id
+                 |""".stripMargin)
+
+            val shuffles = collectShuffles(df.queryExecution.executedPlan)
+            assert(shuffles.nonEmpty, "SPJ should not be triggered")
+          }
+        }
+    }
+  }
+
   test("SPARK-47094: Support compatible buckets with less join keys than partition keys") {
     val table1 = "tab1e1"
     val table2 = "table2"
@@ -2176,4 +2314,194 @@ class KeyGroupedPartitioningSuite extends DistributionAndOrderingSuiteBase {
        Row(3, "bb", 10.0, 19.5)))
    }
   }
+
+  test("SPARK-48949: test partition filters inner join") {
+    val items_partitions = Array(bucket(8, "id"), days("arrive_time"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 39.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 'bb', 41.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 'bb', 42.0, cast('2020-01-04' as timestamp)), " +
+        s"(4, 'cc', 43.5, cast('2020-01-05' as timestamp)), " +
+        s"(5, 'cc', 44.5, cast('2020-01-15' as timestamp)), " +
+        s"(6, 'dd', 45.5, cast('2020-02-07' as timestamp))")
+
+    val purchases_partitions = Array(bucket(8, "item_id"), days("time"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp)), " +
+        s"(7, 46.5, cast('2020-02-08' as timestamp))")
+
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id", "arrive_time" -> "time"))
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+      checkAnswer(df,
+        Seq(Row(1, "aa", 40.0, 42.0), Row(5, "cc", 44.5, 44.0))
+      )
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 2))
+    }
+  }
+
+  test("SPARK-48949: test partition filters with no matches") {
+    val items_partitions = Array(bucket(8, "id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 39.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 40.0, cast('2020-01-02' as timestamp))")
+
+    val purchases_partitions = Array(bucket(8, "item_id"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(4, 42.0, cast('2020-01-01' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp))")
+
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id"))
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+      assert(df.collect().isEmpty, "should return no results")
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 0))
+    }
+  }
+
+  test("SPARK-48949: test partition filters with right outer") {
+    val items_partitions = Array(bucket(8, "id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 39.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 40.0, cast('2020-01-02' as timestamp))")
+
+    val purchases_partitions = Array(bucket(8, "item_id"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(4, 42.0, cast('2020-01-02' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp))")
+
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id"), joinType = "RIGHT OUTER")
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+
+      checkAnswer(df,
+        Seq(Row(null, null, null, 42.0),
+          Row(null, null, null, 44.0),
+          Row(1, "aa", 40.0, 40.0))
+      )
+
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 3))
+    }
+  }
+
+  test("SPARK-48949: test partition filters with full outer") {
+    val items_partitions = Array(bucket(8, "id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 39.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 40.0, cast('2020-01-02' as timestamp))")
+
+    val purchases_partitions = Array(bucket(8, "item_id"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(4, 42.0, cast('2020-01-02' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp))")
+
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id"), joinType = "FULL OUTER")
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+
+      checkAnswer(df,
+        Seq(Row(null, null, null, 42.0),
+          Row(null, null, null, 44.0),
+          Row(0, "aa", 39.0, null),
+          Row(1, "aa", 40.0, 40.0))
+      )
+
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 4))
+    }
+  }
+
+  test("SPARK-48949: test partition filters with left outer") {
+    val items_partitions = Array(bucket(8, "id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 38.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 39.0, cast('2020-01-02' as timestamp)), " +
+        s"(4, 'aa', 40.0, cast('2020-01-02' as timestamp))")
+
+    val purchases_partitions = Array(bucket(8, "item_id"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(4, 42.0, cast('2020-01-01' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp))")
+
+    withSQLConf(SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id"), joinType = "LEFT OUTER")
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+
+      checkAnswer(df,
+        Seq(Row(0, "aa", 38.0, null),
+          Row(1, "aa", 39.0, null),
+          Row(4, "aa", 40.0, 42.0))
+      )
+
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 3))
+    }
+  }
+
+  test("SPARK-48949: test partition filters with compatible transforms") {
+    val items_partitions = Array(bucket(8, "id"))
+    createTable(items, itemsColumns, items_partitions)
+    sql(s"INSERT INTO testcat.ns.$items VALUES " +
+        s"(0, 'aa', 39.0, cast('2020-01-01' as timestamp)), " +
+        s"(1, 'aa', 40.0, cast('2020-01-01' as timestamp)), " +
+        s"(2, 'bb', 41.0, cast('2020-01-03' as timestamp)), " +
+        s"(3, 'bb', 42.0, cast('2020-01-04' as timestamp)), " +
+        s"(4, 'cc', 43.5, cast('2020-01-05' as timestamp)), " +
+        s"(5, 'cc', 44.5, cast('2020-01-15' as timestamp)), " +
+        s"(6, 'dd', 45.5, cast('2020-02-07' as timestamp))")
+
+    val purchases_partitions = Array(bucket(4, "item_id"))
+    createTable(purchases, purchasesColumns, purchases_partitions)
+    sql(s"INSERT INTO testcat.ns.$purchases VALUES " +
+        s"(1, 42.0, cast('2020-01-01' as timestamp)), " +
+        s"(5, 44.0, cast('2020-01-15' as timestamp)), " +
+        s"(7, 46.5, cast('2020-02-08' as timestamp))")
+
+    withSQLConf(
+      SQLConf.V2_BUCKETING_PUSH_PART_VALUES_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_PARTITION_FILTER_ENABLED.key -> "true",
+      SQLConf.V2_BUCKETING_ALLOW_COMPATIBLE_TRANSFORMS.key -> "true") {
+
+      val df = createJoinTestDF(Seq("id" -> "item_id"))
+      val shuffles = collectShuffles(df.queryExecution.executedPlan)
+      assert(shuffles.isEmpty, "should not add shuffle for both sides of the join")
+      checkAnswer(df,
+        Seq(Row(1, "aa", 40.0, 42.0), Row(5, "cc", 44.5, 44.0))
+      )
+      val scans = collectScans(df.queryExecution.executedPlan)
+      assert(scans.forall(_.inputRDD.partitions.length == 2))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
index c080a66bce257..8aa8fb21f4ae3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoDataFrameSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.connector
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.MergeIntoWriterImpl
 
 class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
 
@@ -950,7 +951,7 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
 
       // an arbitrary merge
       val writer1 = spark.table("source")
-        .mergeInto("dummy", $"col" === $"col")
+        .mergeInto("dummy", $"colA" === $"colB")
         .whenMatched(col("col") === 1)
         .updateAll()
         .whenMatched()
@@ -959,16 +960,15 @@ class MergeIntoDataFrameSuite extends RowLevelOperationSuiteBase {
         .insertAll()
         .whenNotMatchedBySource(col("col") === 1)
         .delete()
+        .asInstanceOf[MergeIntoWriterImpl[Row]]
       val writer2 = writer1.withSchemaEvolution()
+        .asInstanceOf[MergeIntoWriterImpl[Row]]
 
+      assert(writer1 eq writer2)
       assert(writer1.matchedActions.length === 2)
       assert(writer1.notMatchedActions.length === 1)
       assert(writer1.notMatchedBySourceActions.length === 1)
-
-      assert(writer1.matchedActions === writer2.matchedActions)
-      assert(writer1.notMatchedActions === writer2.notMatchedActions)
-      assert(writer1.notMatchedBySourceActions === writer2.notMatchedBySourceActions)
-      assert(writer2.schemaEvolutionEnabled)
+      assert(writer1.schemaEvolutionEnabled)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
index 9d4e4fc016722..053616c88d638 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MergeIntoTableSuiteBase.scala
@@ -1326,7 +1326,7 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | UPDATE SET s = named_struct('n_i', null, 'n_l', -1L)
              |""".stripMargin)
       }
-      assert(e1.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+      assert(e1.getCondition == "NOT_NULL_ASSERT_VIOLATION")
 
       val e2 = intercept[SparkRuntimeException] {
         sql(
@@ -1337,7 +1337,7 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | UPDATE SET s = named_struct('n_i', null, 'n_l', -1L)
              |""".stripMargin)
       }
-      assert(e2.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+      assert(e2.getCondition == "NOT_NULL_ASSERT_VIOLATION")
 
       val e3 = intercept[SparkRuntimeException] {
         sql(
@@ -1348,7 +1348,7 @@ abstract class MergeIntoTableSuiteBase extends RowLevelOperationSuiteBase {
              | INSERT (pk, s, dep) VALUES (s.pk, named_struct('n_i', null, 'n_l', -1L), 'invalid')
              |""".stripMargin)
       }
-      assert(e3.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+      assert(e3.getCondition == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
index b043bf2f5be23..741e30a739f5e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/MetadataColumnSuite.scala
@@ -303,7 +303,7 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
         exception = intercept[AnalysisException] {
           df.metadataColumn("foo")
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`foo`", "proposal" -> "`index`, `_partition`"),
         queryContext = Array(ExpectedContext("select index from testcat.t", 0, 26)))
 
@@ -312,7 +312,7 @@ class MetadataColumnSuite extends DatasourceV2SQLBase {
         exception = intercept[AnalysisException] {
           df.metadataColumn("data")
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`data`", "proposal" -> "`index`, `_partition`"),
         queryContext = Array(ExpectedContext("select index from testcat.t", 0, 26)))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
new file mode 100644
index 0000000000000..c8faf5a874f5f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/ProcedureSuite.scala
@@ -0,0 +1,656 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import java.util.Collections
+
+import org.scalatest.BeforeAndAfter
+
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, SparkNumberFormatException}
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
+import org.apache.spark.sql.connector.catalog.{BasicInMemoryTableCatalog, Identifier, InMemoryCatalog}
+import org.apache.spark.sql.connector.catalog.procedures.{BoundProcedure, ProcedureParameter, UnboundProcedure}
+import org.apache.spark.sql.connector.catalog.procedures.ProcedureParameter.Mode
+import org.apache.spark.sql.connector.catalog.procedures.ProcedureParameter.Mode.{IN, INOUT, OUT}
+import org.apache.spark.sql.connector.read.{LocalScan, Scan}
+import org.apache.spark.sql.errors.DataTypeErrors.{toSQLType, toSQLValue}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DataType, DataTypes, StructField, StructType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class ProcedureSuite extends QueryTest with SharedSparkSession with BeforeAndAfter {
+
+  before {
+    spark.conf.set(s"spark.sql.catalog.cat", classOf[InMemoryCatalog].getName)
+  }
+
+  after {
+    spark.sessionState.catalogManager.reset()
+    spark.sessionState.conf.unsetConf(s"spark.sql.catalog.cat")
+  }
+
+  private def catalog: InMemoryCatalog = {
+    val catalog = spark.sessionState.catalogManager.catalog("cat")
+    catalog.asInstanceOf[InMemoryCatalog]
+  }
+
+  test("position arguments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(sql("CALL cat.ns.sum(5, 5)"), Row(10) :: Nil)
+  }
+
+  test("named arguments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(sql("CALL cat.ns.sum(in2 => 3, in1 => 5)"), Row(8) :: Nil)
+  }
+
+  test("position and named arguments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(sql("CALL cat.ns.sum(3, in2 => 1)"), Row(4) :: Nil)
+  }
+
+  test("foldable expressions") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(sql("CALL cat.ns.sum(1 + 1, in2 => 2)"), Row(4) :: Nil)
+    checkAnswer(sql("CALL cat.ns.sum(in2 => 1, in1 => 2 + 1)"), Row(4) :: Nil)
+    checkAnswer(sql("CALL cat.ns.sum((1 + 1) * 2, in2 => (2 + 1) / 3)"), Row(5) :: Nil)
+  }
+
+  test("type coercion") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundLongSum)
+    checkAnswer(sql("CALL cat.ns.sum(1, 2)"), Row(3) :: Nil)
+    checkAnswer(sql("CALL cat.ns.sum(1L, 2)"), Row(3) :: Nil)
+    checkAnswer(sql("CALL cat.ns.sum(1, 2L)"), Row(3) :: Nil)
+  }
+
+  test("multiple output rows") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "complex"), UnboundComplexProcedure)
+    checkAnswer(
+      sql("CALL cat.ns.complex('X', 'Y', 3)"),
+      Row(1, "X1", "Y1") :: Row(2, "X2", "Y2") :: Row(3, "X3", "Y3") :: Nil)
+  }
+
+  test("parameters with default values") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "complex"), UnboundComplexProcedure)
+    checkAnswer(sql("CALL cat.ns.complex()"), Row(1, "A1", "B1") :: Nil)
+    checkAnswer(sql("CALL cat.ns.complex('X', 'Y')"), Row(1, "X1", "Y1") :: Nil)
+  }
+
+  test("parameters with invalid default values") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundInvalidDefaultProcedure)
+    checkError(
+      exception = intercept[AnalysisException](
+        sql("CALL cat.ns.sum()")
+      ),
+      condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+      parameters = Map(
+        "statement" -> "CALL",
+        "colName" -> toSQLId("in2"),
+        "defaultValue" -> toSQLValue("B"),
+        "expectedType" -> toSQLType("INT"),
+        "actualType" -> toSQLType("STRING")))
+  }
+
+  test("IDENTIFIER") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(
+      spark.sql("CALL IDENTIFIER(:p1)(1, 2)", Map("p1" -> "cat.ns.sum")),
+      Row(3) :: Nil)
+  }
+
+  test("parameterized statements") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkAnswer(
+      spark.sql("CALL cat.ns.sum(?, ?)", Array(2, 3)),
+      Row(5) :: Nil)
+  }
+
+  test("undefined procedure") {
+    checkError(
+      exception = intercept[AnalysisException](
+        sql("CALL cat.non_exist(1, 2)")
+      ),
+      sqlState = Some("38000"),
+      condition = "FAILED_TO_LOAD_ROUTINE",
+      parameters = Map("routineName" -> "`cat`.`non_exist`")
+    )
+  }
+
+  test("non-procedure catalog") {
+    withSQLConf("spark.sql.catalog.testcat" -> classOf[BasicInMemoryTableCatalog].getName) {
+      checkError(
+        exception = intercept[AnalysisException](
+          sql("CALL testcat.procedure(1, 2)")
+        ),
+        condition = "_LEGACY_ERROR_TEMP_1184",
+        parameters = Map("plugin" -> "testcat", "ability" -> "procedures")
+      )
+    }
+  }
+
+  test("too many arguments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException](
+        sql("CALL cat.ns.sum(1, 2, 3)")
+      ),
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "functionName" -> toSQLId("sum"),
+        "expectedNum" -> "2",
+        "actualNum" -> "3",
+        "docroot" -> SPARK_DOC_ROOT))
+  }
+
+  test("custom default catalog") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> "cat") {
+      catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+      val df = sql("CALL ns.sum(1, 2)")
+      checkAnswer(df, Row(3) :: Nil)
+    }
+  }
+
+  test("custom default catalog and namespace") {
+    withSQLConf(SQLConf.DEFAULT_CATALOG.key -> "cat") {
+      catalog.createNamespace(Array("ns"), Collections.emptyMap)
+      catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+      sql("USE ns")
+      val df = sql("CALL sum(1, 2)")
+      checkAnswer(df, Row(3) :: Nil)
+    }
+  }
+
+  test("required parameter not found") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CALL cat.ns.sum()")
+      },
+      condition = "REQUIRED_PARAMETER_NOT_FOUND",
+      parameters = Map(
+        "routineName" -> toSQLId("sum"),
+        "parameterName" -> toSQLId("in1"),
+        "index" -> "0"))
+  }
+
+  test("conflicting position and named parameter assignments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CALL cat.ns.sum(1, in1 => 2)")
+      },
+      condition = "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.BOTH_POSITIONAL_AND_NAMED",
+      parameters = Map(
+        "routineName" -> toSQLId("sum"),
+        "parameterName" -> toSQLId("in1")))
+  }
+
+  test("duplicate named parameter assignments") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CALL cat.ns.sum(in1 => 1, in1 => 2)")
+      },
+      condition = "DUPLICATE_ROUTINE_PARAMETER_ASSIGNMENT.DOUBLE_NAMED_ARGUMENT_REFERENCE",
+      parameters = Map(
+        "routineName" -> toSQLId("sum"),
+        "parameterName" -> toSQLId("in1")))
+  }
+
+  test("unknown parameter name") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CALL cat.ns.sum(in1 => 1, in5 => 2)")
+      },
+      condition = "UNRECOGNIZED_PARAMETER_NAME",
+      parameters = Map(
+        "routineName" -> toSQLId("sum"),
+        "argumentName" -> toSQLId("in5"),
+        "proposal" -> (toSQLId("in1") + " " + toSQLId("in2"))))
+  }
+
+  test("position parameter after named parameter") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql("CALL cat.ns.sum(in1 => 1, 2)")
+      },
+      condition = "UNEXPECTED_POSITIONAL_ARGUMENT",
+      parameters = Map(
+        "routineName" -> toSQLId("sum"),
+        "parameterName" -> toSQLId("in1")))
+  }
+
+  test("invalid argument type") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+    val call = "CALL cat.ns.sum(1, TIMESTAMP '2016-11-15 20:54:00.000')"
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(call)
+      },
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "CALL",
+        "paramIndex" -> "second",
+        "inputSql" -> "\"TIMESTAMP '2016-11-15 20:54:00'\"",
+        "inputType" -> toSQLType("TIMESTAMP"),
+        "requiredType" -> toSQLType("INT")),
+      context = ExpectedContext(fragment = call, start = 0, stop = call.length - 1))
+  }
+
+  test("malformed input to implicit cast") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> true.toString) {
+      catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+      val call = "CALL cat.ns.sum('A', 2)"
+      checkError(
+        exception = intercept[SparkNumberFormatException](
+          sql(call)
+        ),
+        condition = "CAST_INVALID_INPUT",
+        parameters = Map(
+          "expression" -> toSQLValue("A"),
+          "sourceType" -> toSQLType("STRING"),
+          "targetType" -> toSQLType("INT")),
+        context = ExpectedContext(fragment = call, start = 0, stop = call.length - 1))
+    }
+  }
+
+  test("required parameters after optional") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundInvalidSum)
+    val e = intercept[SparkException] {
+      sql("CALL cat.ns.sum(in2 => 1)")
+    }
+    assert(e.getMessage.contains("required arguments should come before optional arguments"))
+  }
+
+  test("INOUT parameters are not supported") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "procedure"), UnboundInoutProcedure)
+    val e = intercept[SparkException] {
+      sql("CALL cat.ns.procedure(1)")
+    }
+    assert(e.getMessage.contains(" Unsupported parameter mode: INOUT"))
+  }
+
+  test("OUT parameters are not supported") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "procedure"), UnboundOutProcedure)
+    val e = intercept[SparkException] {
+      sql("CALL cat.ns.procedure(1)")
+    }
+    assert(e.getMessage.contains("Unsupported parameter mode: OUT"))
+  }
+
+  test("EXPLAIN") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundNonExecutableSum)
+    val explain1 = sql("EXPLAIN CALL cat.ns.sum(5, 5)").head().get(0)
+    assert(explain1.toString.contains("cat.ns.sum(5, 5)"))
+    val explain2 = sql("EXPLAIN EXTENDED CALL cat.ns.sum(10, 10)").head().get(0)
+    assert(explain2.toString.contains("cat.ns.sum(10, 10)"))
+  }
+
+  test("void procedure") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "proc"), UnboundVoidProcedure)
+    checkAnswer(sql("CALL cat.ns.proc('A', 'B')"), Nil)
+  }
+
+  test("multi-result procedure") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "proc"), UnboundMultiResultProcedure)
+    checkAnswer(sql("CALL cat.ns.proc()"), Row("last") :: Nil)
+  }
+
+  test("invalid input to struct procedure") {
+    catalog.createProcedure(Identifier.of(Array("ns"), "proc"), UnboundStructProcedure)
+    val actualType =
+      StructType(Seq(
+        StructField("X", DataTypes.DateType, nullable = false),
+        StructField("Y", DataTypes.IntegerType, nullable = false)))
+    val expectedType = StructProcedure.parameters.head.dataType
+    val call = "CALL cat.ns.proc(named_struct('X', DATE '2011-11-11', 'Y', 2), 'VALUE')"
+    checkError(
+      exception = intercept[AnalysisException](sql(call)),
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      parameters = Map(
+        "sqlExpr" -> "CALL",
+        "paramIndex" -> "first",
+        "inputSql" -> "\"named_struct(X, DATE '2011-11-11', Y, 2)\"",
+        "inputType" -> toSQLType(actualType),
+        "requiredType" -> toSQLType(expectedType)),
+      context = ExpectedContext(fragment = call, start = 0, stop = call.length - 1))
+  }
+
+  test("save execution summary") {
+    withTable("summary") {
+      catalog.createProcedure(Identifier.of(Array("ns"), "sum"), UnboundSum)
+      val result = sql("CALL cat.ns.sum(1, 2)")
+      result.write.saveAsTable("summary")
+      checkAnswer(spark.table("summary"), Row(3) :: Nil)
+    }
+  }
+
+  object UnboundVoidProcedure extends UnboundProcedure {
+    override def name: String = "void"
+    override def description: String = "void procedure"
+    override def bind(inputType: StructType): BoundProcedure = VoidProcedure
+  }
+
+  object VoidProcedure extends BoundProcedure {
+    override def name: String = "void"
+
+    override def description: String = "void procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.StringType).build(),
+      ProcedureParameter.in("in2", DataTypes.StringType).build()
+    )
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      Collections.emptyIterator
+    }
+  }
+
+  object UnboundMultiResultProcedure extends UnboundProcedure {
+    override def name: String = "multi"
+    override def description: String = "multi-result procedure"
+    override def bind(inputType: StructType): BoundProcedure = MultiResultProcedure
+  }
+
+  object MultiResultProcedure extends BoundProcedure {
+    override def name: String = "multi"
+
+    override def description: String = "multi-result procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array()
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      val scans = java.util.Arrays.asList[Scan](
+        Result(
+          new StructType().add("out", DataTypes.IntegerType),
+          Array(InternalRow(1))),
+        Result(
+          new StructType().add("out", DataTypes.StringType),
+          Array(InternalRow(UTF8String.fromString("last"))))
+      )
+      scans.iterator()
+    }
+  }
+
+  object UnboundNonExecutableSum extends UnboundProcedure {
+    override def name: String = "sum"
+    override def description: String = "sum integers"
+    override def bind(inputType: StructType): BoundProcedure = Sum
+  }
+
+  object NonExecutableSum extends BoundProcedure {
+    override def name: String = "sum"
+
+    override def description: String = "sum integers"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.IntegerType).build(),
+      ProcedureParameter.in("in2", DataTypes.IntegerType).build()
+    )
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      throw new UnsupportedOperationException()
+    }
+  }
+
+  object UnboundSum extends UnboundProcedure {
+    override def name: String = "sum"
+    override def description: String = "sum integers"
+    override def bind(inputType: StructType): BoundProcedure = Sum
+  }
+
+  object Sum extends BoundProcedure {
+    override def name: String = "sum"
+
+    override def description: String = "sum integers"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.IntegerType).build(),
+      ProcedureParameter.in("in2", DataTypes.IntegerType).build()
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.IntegerType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      val in1 = input.getInt(0)
+      val in2 = input.getInt(1)
+      val result = Result(outputType, Array(InternalRow(in1 + in2)))
+      Collections.singleton[Scan](result).iterator()
+    }
+  }
+
+  object UnboundLongSum extends UnboundProcedure {
+    override def name: String = "long_sum"
+    override def description: String = "sum longs"
+    override def bind(inputType: StructType): BoundProcedure = LongSum
+  }
+
+  object LongSum extends BoundProcedure {
+    override def name: String = "long_sum"
+
+    override def description: String = "sum longs"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.LongType).build(),
+      ProcedureParameter.in("in2", DataTypes.LongType).build()
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.LongType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      val in1 = input.getLong(0)
+      val in2 = input.getLong(1)
+      val result = Result(outputType, Array(InternalRow(in1 + in2)))
+      Collections.singleton[Scan](result).iterator()
+    }
+  }
+
+  object UnboundInvalidSum extends UnboundProcedure {
+    override def name: String = "invalid"
+    override def description: String = "sum integers"
+    override def bind(inputType: StructType): BoundProcedure = InvalidSum
+  }
+
+  object InvalidSum extends BoundProcedure {
+    override def name: String = "invalid"
+
+    override def description: String = "sum integers"
+
+    override def isDeterministic: Boolean = false
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.IntegerType).defaultValue("1").build(),
+      ProcedureParameter.in("in2", DataTypes.IntegerType).build()
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.IntegerType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      throw new UnsupportedOperationException()
+    }
+  }
+
+  object UnboundInvalidDefaultProcedure extends UnboundProcedure {
+    override def name: String = "sum"
+    override def description: String = "invalid default value procedure"
+    override def bind(inputType: StructType): BoundProcedure = InvalidDefaultProcedure
+  }
+
+  object InvalidDefaultProcedure extends BoundProcedure {
+    override def name: String = "sum"
+
+    override def description: String = "invalid default value procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.IntegerType).defaultValue("10").build(),
+      ProcedureParameter.in("in2", DataTypes.IntegerType).defaultValue("'B'").build()
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.IntegerType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      throw new UnsupportedOperationException()
+    }
+  }
+
+  object UnboundComplexProcedure extends UnboundProcedure {
+    override def name: String = "complex"
+    override def description: String = "complex procedure"
+    override def bind(inputType: StructType): BoundProcedure = ComplexProcedure
+  }
+
+  object ComplexProcedure extends BoundProcedure {
+    override def name: String = "complex"
+
+    override def description: String = "complex procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter.in("in1", DataTypes.StringType).defaultValue("'A'").build(),
+      ProcedureParameter.in("in2", DataTypes.StringType).defaultValue("'B'").build(),
+      ProcedureParameter.in("in3", DataTypes.IntegerType).defaultValue("1 + 1 - 1").build()
+    )
+
+    def outputType: StructType = new StructType()
+      .add("out1", DataTypes.IntegerType)
+      .add("out2", DataTypes.StringType)
+      .add("out3", DataTypes.StringType)
+
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      val in1 = input.getString(0)
+      val in2 = input.getString(1)
+      val in3 = input.getInt(2)
+
+      val rows = (1 to in3).map { index =>
+        val v1 = UTF8String.fromString(s"$in1$index")
+        val v2 = UTF8String.fromString(s"$in2$index")
+        InternalRow(index, v1, v2)
+      }.toArray
+
+      val result = Result(outputType, rows)
+      Collections.singleton[Scan](result).iterator()
+    }
+  }
+
+  object UnboundStructProcedure extends UnboundProcedure {
+    override def name: String = "struct_input"
+    override def description: String = "struct procedure"
+    override def bind(inputType: StructType): BoundProcedure = StructProcedure
+  }
+
+  object StructProcedure extends BoundProcedure {
+    override def name: String = "struct_input"
+
+    override def description: String = "struct procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      ProcedureParameter
+        .in(
+          "in1",
+          StructType(Seq(
+            StructField("nested1", DataTypes.IntegerType),
+            StructField("nested2", DataTypes.StringType))))
+        .build(),
+      ProcedureParameter.in("in2", DataTypes.StringType).build()
+    )
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      Collections.emptyIterator
+    }
+  }
+
+  object UnboundInoutProcedure extends UnboundProcedure {
+    override def name: String = "procedure"
+    override def description: String = "inout procedure"
+    override def bind(inputType: StructType): BoundProcedure = InoutProcedure
+  }
+
+  object InoutProcedure extends BoundProcedure {
+    override def name: String = "procedure"
+
+    override def description: String = "inout procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      CustomParameterImpl(INOUT, "in1", DataTypes.IntegerType)
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.IntegerType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      throw new UnsupportedOperationException()
+    }
+  }
+
+  object UnboundOutProcedure extends UnboundProcedure {
+    override def name: String = "procedure"
+    override def description: String = "out procedure"
+    override def bind(inputType: StructType): BoundProcedure = OutProcedure
+  }
+
+  object OutProcedure extends BoundProcedure {
+    override def name: String = "procedure"
+
+    override def description: String = "out procedure"
+
+    override def isDeterministic: Boolean = true
+
+    override def parameters: Array[ProcedureParameter] = Array(
+      CustomParameterImpl(IN, "in1", DataTypes.IntegerType),
+      CustomParameterImpl(OUT, "out1", DataTypes.IntegerType)
+    )
+
+    def outputType: StructType = new StructType().add("out", DataTypes.IntegerType)
+
+    override def call(input: InternalRow): java.util.Iterator[Scan] = {
+      throw new UnsupportedOperationException()
+    }
+  }
+
+  case class Result(readSchema: StructType, rows: Array[InternalRow]) extends LocalScan
+
+  case class CustomParameterImpl(
+      mode: Mode,
+      name: String,
+      dataType: DataType) extends ProcedureParameter {
+    override def defaultValueExpression: String = null
+    override def comment: String = null
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/PushablePredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/PushablePredicateSuite.scala
new file mode 100644
index 0000000000000..8b99e3aa6981a
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/PushablePredicateSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.connector
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.catalyst.expressions.Literal
+import org.apache.spark.sql.execution.datasources.v2.PushablePredicate
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class PushablePredicateSuite extends QueryTest with SharedSparkSession {
+
+  test("PushablePredicate None returned - flag on") {
+    withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "true") {
+      val pushable = PushablePredicate.unapply(Literal.create("string"))
+      assert(!pushable.isDefined)
+    }
+  }
+
+  test("PushablePredicate success - flag on") {
+    withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "true") {
+      val pushable = PushablePredicate.unapply(Literal.create(true))
+      assert(pushable.isDefined)
+    }
+  }
+
+  test("PushablePredicate success") {
+    withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "false") {
+      val pushable = PushablePredicate.unapply(Literal.create(true))
+      assert(pushable.isDefined)
+    }
+  }
+
+  test("PushablePredicate throws") {
+    withSQLConf(SQLConf.DATA_SOURCE_DONT_ASSERT_ON_PREDICATE.key -> "false") {
+      intercept[java.lang.AssertionError] {
+        PushablePredicate.unapply(Literal.create("string"))
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
index de8cf7a7b2d7d..1cf342a45056d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/SimpleWritableDataSource.scala
@@ -107,7 +107,7 @@ class SimpleWritableDataSource extends TestingV2Source {
       try {
         for (file <- fs.listStatus(jobPath).map(_.getPath)) {
           val dest = new Path(finalPath, file.getName)
-          if(!fs.rename(file, dest)) {
+          if (!fs.rename(file, dest)) {
             throw new IOException(s"failed to rename($file, $dest)")
           }
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
index 3b2fc0379340b..2254abef3fcb6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/TestV2SessionCatalogBase.scala
@@ -22,8 +22,7 @@ import java.util.concurrent.atomic.AtomicBoolean
 
 import scala.jdk.CollectionConverters._
 
-import org.apache.spark.sql.catalyst.catalog.CatalogTableType
-import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, DelegatingCatalogExtension, Identifier, Table, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogV2Util, Column, DelegatingCatalogExtension, Identifier, Table, TableCatalog}
 import org.apache.spark.sql.connector.expressions.Transform
 import org.apache.spark.sql.types.StructType
 
@@ -53,14 +52,10 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
     if (tables.containsKey(ident)) {
       tables.get(ident)
     } else {
-      // Table was created through the built-in catalog
-      super.loadTable(ident) match {
-        case v1Table: V1Table if v1Table.v1Table.tableType == CatalogTableType.VIEW => v1Table
-        case t =>
-          val table = newTable(t.name(), t.schema(), t.partitioning(), t.properties())
-          addTable(ident, table)
-          table
-      }
+      // Table was created through the built-in catalog via v1 command, this is OK as the
+      // `loadTable` should always be invoked, and we set the `tableCreated` to pass validation.
+      tableCreated.set(true)
+      super.loadTable(ident)
     }
   }
 
@@ -71,18 +66,22 @@ private[connector] trait TestV2SessionCatalogBase[T <: Table] extends Delegating
       properties: java.util.Map[String, String]): Table = {
     import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.IdentifierHelper
     val key = TestV2SessionCatalogBase.SIMULATE_ALLOW_EXTERNAL_PROPERTY
-    val propsWithLocation = if (properties.containsKey(key)) {
+    val newProps = new java.util.HashMap[String, String]()
+    newProps.putAll(properties)
+    if (properties.containsKey(TableCatalog.PROP_LOCATION)) {
+      newProps.put(TableCatalog.PROP_EXTERNAL, "true")
+    }
+
+    val propsWithLocation = if (newProps.containsKey(key)) {
       // Always set a location so that CREATE EXTERNAL TABLE won't fail with LOCATION not specified.
-      if (!properties.containsKey(TableCatalog.PROP_LOCATION)) {
-        val newProps = new java.util.HashMap[String, String]()
-        newProps.putAll(properties)
+      if (!newProps.containsKey(TableCatalog.PROP_LOCATION)) {
         newProps.put(TableCatalog.PROP_LOCATION, "file:/abc")
         newProps
       } else {
-        properties
+        newProps
       }
     } else {
-      properties
+      newProps
     }
     super.createTable(ident, columns, partitions, propsWithLocation)
     val schema = CatalogV2Util.v2ColumnsToStructType(columns)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
index c2ae5f40cfaf6..f659ca6329e2f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/UpdateTableSuiteBase.scala
@@ -579,7 +579,7 @@ abstract class UpdateTableSuiteBase extends RowLevelOperationSuiteBase {
       exception = intercept[SparkRuntimeException] {
         sql(s"UPDATE $tableNameAsString SET s = named_struct('n_i', null, 'n_l', -1L) WHERE pk = 1")
       },
-      errorClass = "NOT_NULL_ASSERT_VIOLATION",
+      condition = "NOT_NULL_ASSERT_VIOLATION",
       sqlState = "42000",
       parameters = Map("walkedTypePath" -> "\ns\nn_i\n"))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
index ad31cf84eeb3f..68c2a01c69aea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V1WriteFallbackSuite.scala
@@ -24,6 +24,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{AnalysisException, DataFrame, QueryTest, Row, SaveMode, SparkSession, SQLContext}
+import org.apache.spark.sql.QueryTest.withQueryExecutionsCaptured
 import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan}
 import org.apache.spark.sql.catalyst.rules.Rule
@@ -31,9 +32,12 @@ import org.apache.spark.sql.catalyst.trees.TreeNodeTag
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryTable, SupportsRead, SupportsWrite, Table, TableCapability}
 import org.apache.spark.sql.connector.expressions.{FieldReference, IdentityTransform, Transform}
+import org.apache.spark.sql.connector.metric.{CustomMetric, CustomSumMetric, CustomTaskMetric}
 import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, V1Scan}
 import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, SupportsOverwrite, SupportsTruncate, V1Write, WriteBuilder}
+import org.apache.spark.sql.execution.SparkPlan
 import org.apache.spark.sql.execution.datasources.DataSourceUtils
+import org.apache.spark.sql.execution.datasources.v2.{AppendDataExecV1, OverwriteByExpressionExecV1}
 import org.apache.spark.sql.functions.lit
 import org.apache.spark.sql.internal.SQLConf.{OPTIMIZER_MAX_ITERATIONS, V2_SESSION_CATALOG_IMPLEMENTATION}
 import org.apache.spark.sql.sources._
@@ -198,6 +202,43 @@ class V1WriteFallbackSuite extends QueryTest with SharedSparkSession with Before
       SparkSession.setDefaultSession(spark)
     }
   }
+
+  test("SPARK-50315: metrics for V1 fallback writers") {
+    SparkSession.clearActiveSession()
+    SparkSession.clearDefaultSession()
+    try {
+      val session = SparkSession.builder()
+        .master("local[1]")
+        .config(V2_SESSION_CATALOG_IMPLEMENTATION.key, classOf[V1FallbackTableCatalog].getName)
+        .getOrCreate()
+
+      def captureWrite(sparkSession: SparkSession)(thunk: => Unit): SparkPlan = {
+        val queryExecutions = withQueryExecutionsCaptured(sparkSession)(thunk)
+        val v1FallbackWritePlans = queryExecutions.map(_.executedPlan).filter {
+          case _: AppendDataExecV1 | _: OverwriteByExpressionExecV1 => true
+          case _ => false
+        }
+
+        assert(v1FallbackWritePlans.size === 1)
+        v1FallbackWritePlans.head
+      }
+
+      val appendPlan = captureWrite(session) {
+        val df = session.createDataFrame(Seq((1, "x")))
+        df.write.mode("append").option("name", "t1").format(v2Format).saveAsTable("test")
+      }
+      assert(appendPlan.metrics("numOutputRows").value === 1)
+
+      val overwritePlan = captureWrite(session) {
+        val df2 = session.createDataFrame(Seq((2, "y")))
+        df2.writeTo("test").overwrite(lit(true))
+      }
+      assert(overwritePlan.metrics("numOutputRows").value === 1)
+    } finally {
+      SparkSession.setActiveSession(spark)
+      SparkSession.setDefaultSession(spark)
+    }
+  }
 }
 
 class V1WriteFallbackSessionCatalogSuite
@@ -376,10 +417,23 @@ class InMemoryTableWithV1Fallback(
     }
 
     override def build(): V1Write = new V1Write {
+      case class SupportedV1WriteMetric(name: String, description: String) extends CustomSumMetric
+
+      override def supportedCustomMetrics(): Array[CustomMetric] =
+        Array(SupportedV1WriteMetric("numOutputRows", "Number of output rows"))
+
+      private var writeMetrics = Array.empty[CustomTaskMetric]
+
+      override def reportDriverMetrics(): Array[CustomTaskMetric] = writeMetrics
+
       override def toInsertableRelation: InsertableRelation = {
         (data: DataFrame, overwrite: Boolean) => {
           assert(!overwrite, "V1 write fallbacks cannot be called with overwrite=true")
           val rows = data.collect()
+
+          case class V1WriteTaskMetric(name: String, value: Long) extends CustomTaskMetric
+          writeMetrics = Array(V1WriteTaskMetric("numOutputRows", rows.length))
+
           rows.groupBy(getPartitionValues).foreach { case (partition, elements) =>
             if (dataMap.contains(partition) && mode == "append") {
               dataMap.put(partition, dataMap(partition) ++ elements)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
index 3ab7edb78439c..5091c72ef96ac 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/V2CommandsCaseSensitivitySuite.scala
@@ -37,7 +37,6 @@ class V2CommandsCaseSensitivitySuite
   with QueryErrorsBase {
 
   import CreateTablePartitioningValidationSuite._
-  import org.apache.spark.sql.connector.catalog.CatalogV2Implicits._
 
   private val table = ResolvedTable(
     catalog,
@@ -156,7 +155,10 @@ class V2CommandsCaseSensitivitySuite
           Seq(QualifiedColType(
             Some(UnresolvedFieldName(field.init.toImmutableArraySeq)),
             field.last, LongType, true, None, None, None))),
-        Seq("Missing field " + field.head)
+        expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        expectedMessageParameters = Map(
+          "objectName" -> s"`${field.head}`",
+          "proposal" -> "`id`, `data`, `point`")
       )
     }
   }
@@ -175,9 +177,9 @@ class V2CommandsCaseSensitivitySuite
             None)))
       Seq(true, false).foreach { caseSensitive =>
         withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-          assertAnalysisErrorClass(
+          assertAnalysisErrorCondition(
             inputPlan = alter,
-            expectedErrorClass = "FIELD_NOT_FOUND",
+            expectedErrorCondition = "FIELD_NOT_FOUND",
             expectedMessageParameters = Map("fieldName" -> "`f`", "fields" -> "id, data, point")
           )
         }
@@ -206,9 +208,9 @@ class V2CommandsCaseSensitivitySuite
           None)))
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisErrorClass(
+        assertAnalysisErrorCondition(
           inputPlan = alter,
-          expectedErrorClass = "FIELD_NOT_FOUND",
+          expectedErrorCondition = "FIELD_NOT_FOUND",
           expectedMessageParameters = Map("fieldName" -> "`y`", "fields" -> "id, data, point, x")
         )
       }
@@ -229,9 +231,9 @@ class V2CommandsCaseSensitivitySuite
             None)))
       Seq(true, false).foreach { caseSensitive =>
         withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-          assertAnalysisErrorClass(
+          assertAnalysisErrorCondition(
             inputPlan = alter,
-            expectedErrorClass = "FIELD_NOT_FOUND",
+            expectedErrorCondition = "FIELD_NOT_FOUND",
             expectedMessageParameters = Map("fieldName" -> "`z`", "fields" -> "x, y")
           )
         }
@@ -260,9 +262,9 @@ class V2CommandsCaseSensitivitySuite
           None)))
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
-        assertAnalysisErrorClass(
+        assertAnalysisErrorCondition(
           inputPlan = alter,
-          expectedErrorClass = "FIELD_NOT_FOUND",
+          expectedErrorCondition = "FIELD_NOT_FOUND",
           expectedMessageParameters = Map("fieldName" -> "`zz`", "fields" -> "x, y, z")
         )
       }
@@ -270,7 +272,7 @@ class V2CommandsCaseSensitivitySuite
   }
 
   test("SPARK-36372: Adding duplicate columns should not be allowed") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       AddColumns(
         table,
         Seq(QualifiedColType(
@@ -295,7 +297,7 @@ class V2CommandsCaseSensitivitySuite
   }
 
   test("SPARK-36381: Check column name exist case sensitive and insensitive when add column") {
-    alterTableErrorClass(
+    alterTableErrorCondition(
       AddColumns(
         table,
         Seq(QualifiedColType(
@@ -315,7 +317,7 @@ class V2CommandsCaseSensitivitySuite
   }
 
   test("SPARK-36381: Check column name exist case sensitive and insensitive when rename column") {
-    alterTableErrorClass(
+    alterTableErrorCondition(
       RenameColumn(table, UnresolvedFieldName(Array("id").toImmutableArraySeq), "DATA"),
       "FIELD_ALREADY_EXISTS",
       Map(
@@ -328,18 +330,20 @@ class V2CommandsCaseSensitivitySuite
   test("AlterTable: drop column resolution") {
     Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
       Seq(true, false).foreach { ifExists =>
-        val expectedErrors = if (ifExists) {
-          Seq.empty[String]
-        } else {
-          Seq("Missing field " + ref.quoted)
-        }
         val alter = DropColumns(table, Seq(UnresolvedFieldName(ref.toImmutableArraySeq)), ifExists)
         if (ifExists) {
           // using IF EXISTS will silence all errors for missing columns
           assertAnalysisSuccess(alter, caseSensitive = true)
           assertAnalysisSuccess(alter, caseSensitive = false)
         } else {
-          alterTableTest(alter, expectedErrors, expectErrorOnCaseSensitive = true)
+          alterTableTest(
+            alter = alter,
+            expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            expectedMessageParameters = Map(
+              "objectName" -> s"${toSQLId(ref.toImmutableArraySeq)}",
+              "proposal" -> "`id`, `data`, `point`"
+            ),
+            expectErrorOnCaseSensitive = true)
         }
       }
     }
@@ -348,8 +352,11 @@ class V2CommandsCaseSensitivitySuite
   test("AlterTable: rename column resolution") {
     Seq(Array("ID"), Array("point", "X"), Array("POINT", "X"), Array("POINT", "x")).foreach { ref =>
       alterTableTest(
-        RenameColumn(table, UnresolvedFieldName(ref.toImmutableArraySeq), "newName"),
-        Seq("Missing field " + ref.quoted)
+        alter = RenameColumn(table, UnresolvedFieldName(ref.toImmutableArraySeq), "newName"),
+        expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        expectedMessageParameters = Map(
+          "objectName" -> s"${toSQLId(ref.toImmutableArraySeq)}",
+          "proposal" -> "`id`, `data`, `point`")
       )
     }
   }
@@ -359,7 +366,10 @@ class V2CommandsCaseSensitivitySuite
       alterTableTest(
         AlterColumn(table, UnresolvedFieldName(ref.toImmutableArraySeq),
           None, Some(true), None, None, None),
-        Seq("Missing field " + ref.quoted)
+        expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        expectedMessageParameters = Map(
+          "objectName" -> s"${toSQLId(ref.toImmutableArraySeq)}",
+          "proposal" -> "`id`, `data`, `point`")
       )
     }
   }
@@ -369,7 +379,10 @@ class V2CommandsCaseSensitivitySuite
       alterTableTest(
         AlterColumn(table, UnresolvedFieldName(ref.toImmutableArraySeq),
           Some(StringType), None, None, None, None),
-        Seq("Missing field " + ref.quoted)
+        expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        expectedMessageParameters = Map(
+          "objectName" -> s"${toSQLId(ref.toImmutableArraySeq)}",
+          "proposal" -> "`id`, `data`, `point`")
       )
     }
   }
@@ -379,13 +392,16 @@ class V2CommandsCaseSensitivitySuite
       alterTableTest(
         AlterColumn(table, UnresolvedFieldName(ref.toImmutableArraySeq),
           None, None, Some("comment"), None, None),
-        Seq("Missing field " + ref.quoted)
+        expectedErrorCondition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        expectedMessageParameters = Map(
+          "objectName" -> s"${toSQLId(ref.toImmutableArraySeq)}",
+          "proposal" -> "`id`, `data`, `point`")
       )
     }
   }
 
   test("SPARK-36449: Replacing columns with duplicate name should not be allowed") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       ReplaceColumns(
         table,
         Seq(QualifiedColType(None, "f", LongType, true, None, None, None),
@@ -397,13 +413,15 @@ class V2CommandsCaseSensitivitySuite
 
   private def alterTableTest(
       alter: => AlterTableCommand,
-      error: Seq[String],
+      expectedErrorCondition: String,
+      expectedMessageParameters: Map[String, String],
       expectErrorOnCaseSensitive: Boolean = true): Unit = {
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         val expectError = if (expectErrorOnCaseSensitive) caseSensitive else !caseSensitive
         if (expectError) {
-          assertAnalysisError(alter, error, caseSensitive)
+          assertAnalysisErrorCondition(
+            alter, expectedErrorCondition, expectedMessageParameters, caseSensitive = caseSensitive)
         } else {
           assertAnalysisSuccess(alter, caseSensitive)
         }
@@ -411,17 +429,17 @@ class V2CommandsCaseSensitivitySuite
     }
   }
 
-  private def alterTableErrorClass(
+  private def alterTableErrorCondition(
       alter: => AlterTableCommand,
-      errorClass: String,
+      condition: String,
       messageParameters: Map[String, String],
       expectErrorOnCaseSensitive: Boolean = true): Unit = {
     Seq(true, false).foreach { caseSensitive =>
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         val expectError = if (expectErrorOnCaseSensitive) caseSensitive else !caseSensitive
         if (expectError) {
-          assertAnalysisErrorClass(
-            alter, errorClass, messageParameters, caseSensitive = caseSensitive)
+          assertAnalysisErrorCondition(
+            alter, condition, messageParameters, caseSensitive = caseSensitive)
         } else {
           assertAnalysisSuccess(alter, caseSensitive)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
index 5364fc5d62423..b82cc2392e1fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/catalog/functions/transformFunctions.scala
@@ -101,8 +101,8 @@ object BucketFunction extends ScalarFunction[Int] with ReducibleFunction[Int, In
 
     if (otherFunc == BucketFunction) {
       val gcd = this.gcd(thisNumBuckets, otherNumBuckets)
-      if (gcd != thisNumBuckets) {
-        return BucketReducer(thisNumBuckets, gcd)
+      if (gcd > 1 && gcd != thisNumBuckets) {
+        return BucketReducer(gcd)
       }
     }
     null
@@ -111,7 +111,7 @@ object BucketFunction extends ScalarFunction[Int] with ReducibleFunction[Int, In
   private def gcd(a: Int, b: Int): Int = BigInt(a).gcd(BigInt(b)).toInt
 }
 
-case class BucketReducer(thisNumBuckets: Int, divisor: Int) extends Reducer[Int, Int] {
+case class BucketReducer(divisor: Int) extends Reducer[Int, Int] {
   override def reduce(bucket: Int): Int = bucket % divisor
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
index d9c3848d3b6b7..1401048cf705d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/connector/functions/V2FunctionBenchmark.scala
@@ -21,15 +21,15 @@ import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd
 import test.org.apache.spark.sql.connector.catalog.functions.JavaLongAdd.{JavaLongAddDefault, JavaLongAddMagic, JavaLongAddStaticMagic}
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.Column
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions.{BinaryArithmetic, EvalMode, Expression}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.util.TypeUtils
 import org.apache.spark.sql.connector.catalog.{Identifier, InMemoryCatalog}
 import org.apache.spark.sql.connector.catalog.functions.{BoundFunction, ScalarFunction, UnboundFunction}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
+import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.ExpressionUtils.{column, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{AbstractDataType, DataType, LongType, NumericType, StructType}
 
@@ -81,7 +81,7 @@ object V2FunctionBenchmark extends SqlBasedBenchmark {
             s"codegen = $codegenEnabled"
         val benchmark = new Benchmark(name, N, output = output)
         benchmark.addCase(s"native_long_add", numIters = 3) { _ =>
-          spark.range(N).select(Column(NativeAdd($"id".expr, $"id".expr, resultNullable))).noop()
+          spark.range(N).select(NativeAdd(col("id"), col("id"), resultNullable)).noop()
         }
         Seq("java_long_add_default", "java_long_add_magic", "java_long_add_static_magic",
             "scala_long_add_default", "scala_long_add_magic").foreach { functionName =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
index 39809c785af92..48d4e45ebf354 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsDSv2Suite.scala
@@ -51,7 +51,7 @@ class QueryCompilationErrorsDSv2Suite
         checkAnswer(spark.table(tbl), spark.emptyDataFrame)
         checkError(
           exception = e,
-          errorClass = "UNSUPPORTED_FEATURE.INSERT_PARTITION_SPEC_IF_NOT_EXISTS",
+          condition = "UNSUPPORTED_FEATURE.INSERT_PARTITION_SPEC_IF_NOT_EXISTS",
           parameters = Map("tableName" -> "`testcat`.`ns1`.`ns2`.`tbl`"),
           sqlState = "0A000")
       }
@@ -70,7 +70,7 @@ class QueryCompilationErrorsDSv2Suite
       verifyTable(t1, spark.emptyDataFrame)
       checkError(
         exception = e,
-        errorClass = "NON_PARTITION_COLUMN",
+        condition = "NON_PARTITION_COLUMN",
         parameters = Map("columnName" -> "`id`"))
     }
   }
@@ -87,7 +87,7 @@ class QueryCompilationErrorsDSv2Suite
       verifyTable(t1, spark.emptyDataFrame)
       checkError(
         exception = e,
-        errorClass = "NON_PARTITION_COLUMN",
+        condition = "NON_PARTITION_COLUMN",
         parameters = Map("columnName" -> "`data`"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
index 958d2b0130d8b..92c175fe2f94a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryCompilationErrorsSuite.scala
@@ -24,6 +24,7 @@ import org.apache.spark.sql._
 import org.apache.spark.sql.api.java.{UDF1, UDF2, UDF23Test}
 import org.apache.spark.sql.catalyst.expressions.{Coalesce, Literal, UnsafeRow}
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand
 import org.apache.spark.sql.execution.datasources.parquet.SparkToParquetSchemaConverter
 import org.apache.spark.sql.expressions.SparkUserDefinedFunction
 import org.apache.spark.sql.functions._
@@ -51,7 +52,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = e1,
-      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map("expression" -> "b", "sourceType" -> "\"BIGINT\"", "targetType" -> "\"INT\"",
         "details" -> (
         s"""
@@ -68,7 +69,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = e2,
-      errorClass = "CANNOT_UP_CAST_DATATYPE",
+      condition = "CANNOT_UP_CAST_DATATYPE",
       parameters = Map("expression" -> "b.`b`", "sourceType" -> "\"DECIMAL(38,18)\"",
         "targetType" -> "\"BIGINT\"",
         "details" -> (
@@ -94,7 +95,7 @@ class QueryCompilationErrorsSuite
       }
       checkError(
         exception = e,
-        errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
+        condition = "UNSUPPORTED_GROUPING_EXPRESSION",
         parameters = Map[String, String]())
     }
   }
@@ -112,7 +113,7 @@ class QueryCompilationErrorsSuite
       }
       checkError(
         exception = e,
-        errorClass = "UNSUPPORTED_GROUPING_EXPRESSION",
+        condition = "UNSUPPORTED_GROUPING_EXPRESSION",
         parameters = Map[String, String]())
     }
   }
@@ -123,7 +124,7 @@ class QueryCompilationErrorsSuite
         exception = intercept[AnalysisException] {
           sql("select format_string('%0$s', 'Hello')")
         },
-        errorClass = "INVALID_PARAMETER_VALUE.ZERO_INDEX",
+        condition = "INVALID_PARAMETER_VALUE.ZERO_INDEX",
         parameters = Map(
           "parameter" -> "`strfmt`",
           "functionName" -> "`format_string`"),
@@ -156,7 +157,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "INVALID_PANDAS_UDF_PLACEMENT",
+      condition = "INVALID_PANDAS_UDF_PLACEMENT",
       parameters = Map("functionList" -> "`pandas_udf_1`, `pandas_udf_2`"))
   }
 
@@ -183,7 +184,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
+      condition = "UNSUPPORTED_FEATURE.PYTHON_UDF_IN_ON_CLAUSE",
       parameters = Map("joinType" -> "LEFT OUTER"),
       sqlState = Some("0A000"))
   }
@@ -205,7 +206,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_FEATURE.PANDAS_UDAF_IN_PIVOT",
+      condition = "UNSUPPORTED_FEATURE.PANDAS_UDAF_IN_PIVOT",
       parameters = Map[String, String](),
       sqlState = "0A000")
   }
@@ -224,7 +225,7 @@ class QueryCompilationErrorsSuite
       )
       checkError(
         exception = e,
-        errorClass = "NO_HANDLER_FOR_UDAF",
+        condition = "NO_HANDLER_FOR_UDAF",
         parameters = Map("functionName" -> "org.apache.spark.sql.errors.MyCastToString"),
         context = ExpectedContext(
           fragment = "myCast(123)", start = 7, stop = 17))
@@ -234,7 +235,7 @@ class QueryCompilationErrorsSuite
   test("UNTYPED_SCALA_UDF: use untyped Scala UDF should fail by default") {
     checkError(
       exception = intercept[AnalysisException](udf((x: Int) => x, IntegerType)),
-      errorClass = "UNTYPED_SCALA_UDF",
+      condition = "UNTYPED_SCALA_UDF",
       parameters = Map[String, String]())
   }
 
@@ -248,7 +249,7 @@ class QueryCompilationErrorsSuite
     )
     checkError(
       exception = e,
-      errorClass = "NO_UDF_INTERFACE",
+      condition = "NO_UDF_INTERFACE",
       parameters = Map("className" -> className))
   }
 
@@ -262,7 +263,7 @@ class QueryCompilationErrorsSuite
     )
     checkError(
       exception = e,
-      errorClass = "MULTI_UDF_INTERFACE_ERROR",
+      condition = "MULTI_UDF_INTERFACE_ERROR",
       parameters = Map("className" -> className))
   }
 
@@ -276,7 +277,7 @@ class QueryCompilationErrorsSuite
     )
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_FEATURE.TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS",
+      condition = "UNSUPPORTED_FEATURE.TOO_MANY_TYPE_ARGUMENTS_FOR_UDF_CLASS",
       parameters = Map("num" -> "24"),
       sqlState = "0A000")
   }
@@ -287,7 +288,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = groupingColMismatchEx,
-      errorClass = "GROUPING_COLUMN_MISMATCH",
+      condition = "GROUPING_COLUMN_MISMATCH",
       parameters = Map("grouping" -> "earnings.*", "groupingColumns" -> "course.*,year.*"),
       sqlState = Some("42803"),
       matchPVals = true)
@@ -299,7 +300,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = groupingIdColMismatchEx,
-      errorClass = "GROUPING_ID_COLUMN_MISMATCH",
+      condition = "GROUPING_ID_COLUMN_MISMATCH",
       parameters = Map("groupingIdColumn" -> "earnings.*",
       "groupByColumns" -> "course.*,year.*"),
       sqlState = Some("42803"),
@@ -322,14 +323,14 @@ class QueryCompilationErrorsSuite
       withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "true") {
         checkError(
           exception = intercept[AnalysisException] { testGroupingIDs(33) },
-          errorClass = "GROUPING_SIZE_LIMIT_EXCEEDED",
+          condition = "GROUPING_SIZE_LIMIT_EXCEEDED",
           parameters = Map("maxSize" -> "32"))
       }
 
       withSQLConf(SQLConf.LEGACY_INTEGER_GROUPING_ID.key -> "false") {
         checkError(
           exception = intercept[AnalysisException] { testGroupingIDs(65) },
-          errorClass = "GROUPING_SIZE_LIMIT_EXCEEDED",
+          condition = "GROUPING_SIZE_LIMIT_EXCEEDED",
           parameters = Map("maxSize" -> "64"))
       }
     }
@@ -354,7 +355,7 @@ class QueryCompilationErrorsSuite
           exception = intercept[AnalysisException] {
             sql(s"DESC TABLE $tempViewName PARTITION (c='Us', d=1)")
           },
-          errorClass = "FORBIDDEN_OPERATION",
+          condition = "FORBIDDEN_OPERATION",
           parameters = Map("statement" -> "DESC PARTITION",
             "objectType" -> "TEMPORARY VIEW", "objectName" -> s"`$tempViewName`"))
       }
@@ -380,7 +381,7 @@ class QueryCompilationErrorsSuite
           exception = intercept[AnalysisException] {
             sql(s"DESC TABLE $viewName PARTITION (c='Us', d=1)")
           },
-          errorClass = "FORBIDDEN_OPERATION",
+          condition = "FORBIDDEN_OPERATION",
           parameters = Map("statement" -> "DESC PARTITION",
           "objectType" -> "VIEW", "objectName" -> s"`$viewName`"))
       }
@@ -394,7 +395,7 @@ class QueryCompilationErrorsSuite
         exception = intercept[AnalysisException] {
           sql("select date_add('1982-08-15', 'x')").collect()
         },
-        errorClass = "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
+        condition = "SECOND_FUNCTION_ARGUMENT_NOT_INTEGER",
         parameters = Map("functionName" -> "date_add"),
         sqlState = "22023")
     }
@@ -408,7 +409,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         spark.read.schema(schema).json(spark.emptyDataset[String])
       },
-      errorClass = "INVALID_JSON_SCHEMA_MAP_TYPE",
+      condition = "INVALID_JSON_SCHEMA_MAP_TYPE",
       parameters = Map("jsonSchema" -> "\"STRUCT<map: MAP<INT, INT> NOT NULL>\"")
     )
   }
@@ -418,7 +419,7 @@ class QueryCompilationErrorsSuite
     val query = "select m[a] from (select map('a', 'b') as m, 'aa' as aa)"
     checkError(
       exception = intercept[AnalysisException] {sql(query)},
-      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      condition = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map("objectName" -> "`a`",
         "proposal" -> "`aa`, `m`"),
@@ -433,7 +434,7 @@ class QueryCompilationErrorsSuite
     val query = "select m[a] from (select map('a', 'b') as m, 'aa' as `a.a`)"
     checkError(
       exception = intercept[AnalysisException] {sql(query)},
-      errorClass = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
+      condition = "UNRESOLVED_MAP_KEY.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "objectName" -> "`a`",
@@ -468,7 +469,7 @@ class QueryCompilationErrorsSuite
             |order by struct.a, struct.b
             |""".stripMargin)
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       sqlState = None,
       parameters = Map(
         "objectName" -> "`struct`.`a`",
@@ -489,7 +490,7 @@ class QueryCompilationErrorsSuite
       val query = "SELECT v.i from (SELECT i FROM v)"
       checkError(
         exception = intercept[AnalysisException](sql(query)),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map(
           "objectName" -> "`v`.`i`",
@@ -522,7 +523,7 @@ class QueryCompilationErrorsSuite
           exception = intercept[AnalysisException] {
             sql(query)
           },
-          errorClass = "AMBIGUOUS_ALIAS_IN_NESTED_CTE",
+          condition = "AMBIGUOUS_ALIAS_IN_NESTED_CTE",
           parameters = Map(
             "name" -> "`t`",
             "config" -> toSQLConf(SQLConf.LEGACY_CTE_PRECEDENCE_POLICY.key),
@@ -542,7 +543,7 @@ class QueryCompilationErrorsSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "AMBIGUOUS_COLUMN_OR_FIELD",
+        condition = "AMBIGUOUS_COLUMN_OR_FIELD",
         parameters = Map("name" -> "`c`.`X`", "n" -> "2"),
         context = ExpectedContext(
           fragment = query, start = 0, stop = 52))
@@ -566,7 +567,7 @@ class QueryCompilationErrorsSuite
           struct(lit("java"), lit("Dummies")))).
           agg(sum($"earnings")).collect()
       },
-      errorClass = "PIVOT_VALUE_DATA_TYPE_MISMATCH",
+      condition = "PIVOT_VALUE_DATA_TYPE_MISMATCH",
       parameters = Map("value" -> "struct(col1, dotnet, col2, Experts)",
         "valueType" -> "struct<col1:string,col2:string>",
         "pivotType" -> "int"))
@@ -581,7 +582,7 @@ class QueryCompilationErrorsSuite
       }
       checkError(
         exception = e,
-        errorClass = "INVALID_FIELD_NAME",
+        condition = "INVALID_FIELD_NAME",
         parameters = Map("fieldName" -> "`m`.`n`", "path" -> "`m`"),
         context = ExpectedContext(
           fragment = "m.n int", start = 27, stop = 33))
@@ -603,7 +604,7 @@ class QueryCompilationErrorsSuite
           pivot(df("year"), Seq($"earnings")).
           agg(sum($"earnings")).collect()
       },
-      errorClass = "NON_LITERAL_PIVOT_VALUES",
+      condition = "NON_LITERAL_PIVOT_VALUES",
       parameters = Map("expression" -> "\"earnings\""))
   }
 
@@ -613,7 +614,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
+      condition = "UNSUPPORTED_DESERIALIZER.DATA_TYPE_MISMATCH",
       parameters = Map("desiredType" -> "\"ARRAY\"", "dataType" -> "\"INT\""))
   }
 
@@ -626,7 +627,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = e1,
-      errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+      condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
       parameters = Map(
         "schema" -> "\"STRUCT<a: STRING, b: INT NOT NULL>\"",
         "ordinal" -> "3"))
@@ -636,7 +637,7 @@ class QueryCompilationErrorsSuite
     }
     checkError(
       exception = e2,
-      errorClass = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
+      condition = "UNSUPPORTED_DESERIALIZER.FIELD_NUMBER_MISMATCH",
       parameters = Map("schema" -> "\"STRUCT<a: STRING, b: INT NOT NULL>\"",
         "ordinal" -> "1"))
   }
@@ -649,7 +650,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
+      condition = "UNSUPPORTED_GENERATOR.NESTED_IN_EXPRESSIONS",
       parameters = Map("expression" -> "\"(explode(array(1, 2, 3)) + 1)\""))
   }
 
@@ -660,7 +661,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
+      condition = "UNSUPPORTED_GENERATOR.OUTSIDE_SELECT",
       parameters = Map("plan" -> "'Sort [explode(array(1, 2, 3)) ASC NULLS FIRST], true"))
   }
 
@@ -675,7 +676,7 @@ class QueryCompilationErrorsSuite
 
     checkError(
       exception = e,
-      errorClass = "UNSUPPORTED_GENERATOR.NOT_GENERATOR",
+      condition = "UNSUPPORTED_GENERATOR.NOT_GENERATOR",
       sqlState = None,
       parameters = Map(
         "functionName" -> "`array_contains`",
@@ -690,7 +691,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         Seq("""{"a":1}""").toDF("a").select(from_json($"a", IntegerType)).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.INVALID_JSON_SCHEMA",
+      condition = "DATATYPE_MISMATCH.INVALID_JSON_SCHEMA",
       parameters = Map("schema" -> "\"INT\"", "sqlExpr" -> "\"from_json(a)\""),
       context =
         ExpectedContext(fragment = "from_json", callSitePattern = getCurrentClassCallSitePattern))
@@ -701,7 +702,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         sql("SELECT CAST(1)")
       },
-      errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+      condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
       parameters = Map(
         "functionName" -> "`cast`",
         "expectedNum" -> "0",
@@ -717,7 +718,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[ParseException] {
         sql("CREATE TEMPORARY VIEW db_name.schema_name.view_name AS SELECT '1' as test_column")
       },
-      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      condition = "IDENTIFIER_TOO_MANY_NAME_PARTS",
       sqlState = "42601",
       parameters = Map("identifier" -> "`db_name`.`schema_name`.`view_name`")
     )
@@ -738,7 +739,7 @@ class QueryCompilationErrorsSuite
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName RENAME TO db_name.schema_name.new_table_name")
         },
-        errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+        condition = "IDENTIFIER_TOO_MANY_NAME_PARTS",
         sqlState = "42601",
         parameters = Map("identifier" -> "`db_name`.`schema_name`.`new_table_name`")
       )
@@ -762,7 +763,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         df.select($"name.firstname")
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`firstname`", "count" -> "2"),
       context = ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern)
@@ -776,7 +777,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         df.select($"firstname.test_field")
       },
-      errorClass = "INVALID_EXTRACT_BASE_FIELD_TYPE",
+      condition = "INVALID_EXTRACT_BASE_FIELD_TYPE",
       sqlState = "42000",
       parameters = Map("base" -> "\"firstname\"", "other" -> "\"STRING\""),
       context = ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern)
@@ -802,7 +803,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         df.select(struct($"name"(struct("test"))))
       },
-      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      condition = "INVALID_EXTRACT_FIELD_TYPE",
       sqlState = "42000",
       parameters = Map("extraction" -> "\"struct(test)\""))
 
@@ -810,7 +811,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         df.select($"name"(array("test")))
       },
-      errorClass = "INVALID_EXTRACT_FIELD_TYPE",
+      condition = "INVALID_EXTRACT_FIELD_TYPE",
       sqlState = "42000",
       parameters = Map("extraction" -> "\"array(test)\""))
   }
@@ -831,7 +832,7 @@ class QueryCompilationErrorsSuite
         exception = intercept[AnalysisException] {
           sql(query)
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map(
           "proposal" -> "`c1`, `v1`.`c2`, `v2`.`c2`",
           "objectName" -> "`b`"),
@@ -849,7 +850,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[SparkIllegalArgumentException] {
         coalesce.dataType
       },
-      errorClass = "COMPLEX_EXPRESSION_UNSUPPORTED_INPUT.NO_INPUTS",
+      condition = "COMPLEX_EXPRESSION_UNSUPPORTED_INPUT.NO_INPUTS",
       parameters = Map("expression" -> "\"coalesce()\""))
   }
 
@@ -861,54 +862,56 @@ class QueryCompilationErrorsSuite
       exception = intercept[SparkIllegalArgumentException] {
         coalesce.dataType
       },
-      errorClass = "COMPLEX_EXPRESSION_UNSUPPORTED_INPUT.MISMATCHED_TYPES",
+      condition = "COMPLEX_EXPRESSION_UNSUPPORTED_INPUT.MISMATCHED_TYPES",
       parameters = Map(
         "expression" -> "\"coalesce(1, a, a)\"",
         "inputTypes" -> "[\"INT\", \"STRING\", \"STRING\"]"))
   }
 
-  test("UNSUPPORTED_CALL: call the unsupported method update()") {
-    checkError(
-      exception = intercept[SparkUnsupportedOperationException] {
-        new UnsafeRow(1).update(0, 1)
-      },
-      errorClass = "UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
-      parameters = Map(
-        "methodName" -> "update",
-        "className" -> "org.apache.spark.sql.catalyst.expressions.UnsafeRow"))
-  }
-
-  test("SPARK-47102: the collation feature is off without collate builder call") {
-    withSQLConf(SQLConf.COLLATION_ENABLED.key -> "false") {
+  test("SPARK-49666: the trim collation feature is off without collate builder call") {
+    withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
       Seq(
-        "CREATE TABLE t(col STRING COLLATE UNICODE_CI) USING parquet",
-        "CREATE TABLE t(col STRING COLLATE UNKNOWN_COLLATION_STRING) USING parquet",
-        "SELECT 'aaa' COLLATE UNICODE_CI",
-        "select collation('aaa')"
+        "CREATE TABLE t(col STRING COLLATE EN_RTRIM_CI) USING parquet",
+        "CREATE TABLE t(col STRING COLLATE UTF8_LCASE_RTRIM) USING parquet",
+        "SELECT 'aaa' COLLATE UNICODE_LTRIM_CI"
       ).foreach { sqlText =>
         checkError(
           exception = intercept[AnalysisException](sql(sqlText)),
-          errorClass = "UNSUPPORTED_FEATURE.COLLATION")
+          condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION"
+        )
       }
     }
   }
 
-  test("SPARK-47102: the collation feature is off with collate builder call") {
-    withSQLConf(SQLConf.COLLATION_ENABLED.key -> "false") {
+  test("SPARK-49666: the trim collation feature is off with collate builder call") {
+    withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
       Seq(
-        "SELECT collate('aaa', 'UNICODE_CI')",
-        "SELECT collate('aaa', 'UNKNOWN_COLLATION_STRING')"
+        "SELECT collate('aaa', 'UNICODE_RTRIM')",
+        "SELECT collate('aaa', 'UTF8_BINARY_RTRIM')",
+        "SELECT collate('aaa', 'EN_AI_RTRIM')"
       ).foreach { sqlText =>
         checkError(
           exception = intercept[AnalysisException](sql(sqlText)),
-          errorClass = "UNSUPPORTED_FEATURE.COLLATION",
+          condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION",
           parameters = Map.empty,
-          context = ExpectedContext(
-            fragment = sqlText.substring(7), start = 7, stop = sqlText.length - 1))
+          context =
+            ExpectedContext(fragment = sqlText.substring(7), start = 7, stop = sqlText.length - 1)
+        )
       }
     }
   }
 
+  test("UNSUPPORTED_CALL: call the unsupported method update()") {
+    checkError(
+      exception = intercept[SparkUnsupportedOperationException] {
+        new UnsafeRow(1).update(0, 1)
+      },
+      condition = "UNSUPPORTED_CALL.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "methodName" -> "update",
+        "className" -> "org.apache.spark.sql.catalyst.expressions.UnsafeRow"))
+  }
+
   test("INTERNAL_ERROR: Convert unsupported data type from Spark to Parquet") {
     val converter = new SparkToParquetSchemaConverter
     val dummyDataType = new DataType {
@@ -922,7 +925,7 @@ class QueryCompilationErrorsSuite
       exception = intercept[AnalysisException] {
         converter.convertField(StructField("test", dummyDataType))
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Cannot convert Spark data type \"DUMMY\" to any Parquet type.")
     )
   }
@@ -950,13 +953,88 @@ class QueryCompilationErrorsSuite
           exception = intercept[AnalysisException] {
             sql(test.query)
           },
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           parameters = Map("objectName" -> "`dummy`", "proposal" -> "`a`, `b`"),
           context = ExpectedContext(fragment = "dummy", start = test.pos, stop = test.pos + 4)
         )
       })
     }
   }
+
+  test("Catch and log errors when failing to write to external data source") {
+    val password = "MyPassWord"
+    val token = "MyToken"
+    val value = "value"
+    val options = Map("password" -> password, "token" -> token, "key" -> value)
+    val query = spark.range(10).logicalPlan
+    val cmd = SaveIntoDataSourceCommand(query, null, options, SaveMode.Overwrite)
+
+    checkError(
+      exception = intercept[AnalysisException] {
+        cmd.run(spark)
+      },
+      condition = "DATA_SOURCE_EXTERNAL_ERROR",
+      sqlState = "KD010",
+      parameters = Map.empty
+    )
+  }
+
+  test("SPARK-49895: trailing comma in select statement") {
+    withTable("t1") {
+      sql(s"CREATE TABLE t1 (c1 INT, c2 INT) USING PARQUET")
+
+      val queries = Seq(
+        "SELECT *? FROM t1",
+        "SELECT c1? FROM t1",
+        "SELECT c1? FROM t1 WHERE c1 = 1",
+        "SELECT c1? FROM t1 GROUP BY c1",
+        "SELECT *, RANK() OVER (ORDER BY c1)? FROM t1",
+        "SELECT c1? FROM t1 ORDER BY c1",
+        "WITH cte AS (SELECT c1? FROM t1) SELECT * FROM cte",
+        "WITH cte AS (SELECT c1 FROM t1) SELECT *? FROM cte",
+        "SELECT * FROM (SELECT c1? FROM t1)")
+
+      queries.foreach { query =>
+        val queryWithoutTrailingComma = query.replaceAll("\\?", "")
+        val queryWithTrailingComma = query.replaceAll("\\?", ",")
+
+        sql(queryWithoutTrailingComma)
+        print(queryWithTrailingComma)
+        val exception = intercept[AnalysisException] {
+          sql(queryWithTrailingComma)
+        }
+        assert(exception.getCondition === "TRAILING_COMMA_IN_SELECT")
+      }
+
+      val unresolvedColumnErrors = Seq(
+        "SELECT c3 FROM t1",
+        "SELECT from FROM t1",
+        "SELECT from FROM (SELECT 'a' as c1)",
+        "SELECT from AS col FROM t1",
+        "SELECT from AS from FROM t1",
+        "SELECT from from FROM t1")
+      unresolvedColumnErrors.foreach { query =>
+        val exception = intercept[AnalysisException] {
+          sql(query)
+        }
+        assert(exception.getCondition === "UNRESOLVED_COLUMN.WITH_SUGGESTION")
+      }
+
+      // sanity checks
+      withTable("from") {
+        sql(s"CREATE TABLE from (from INT) USING PARQUET")
+
+        sql(s"SELECT from FROM from")
+        sql(s"SELECT from as from FROM from")
+        sql(s"SELECT from from FROM from from")
+        sql(s"SELECT c1, from FROM VALUES(1, 2) AS T(c1, from)")
+
+        intercept[ParseException] {
+          sql("SELECT 1,")
+        }
+      }
+    }
+  }
 }
 
 class MyCastToString extends SparkUserDefinedFunction(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
index 426822da3c912..693ebdc413c43 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryContextSuite.scala
@@ -16,27 +16,40 @@
  */
 package org.apache.spark.sql.errors
 
-import org.apache.spark.SparkArithmeticException
+import org.apache.spark.{SparkArithmeticException, SparkConf}
 import org.apache.spark.sql.QueryTest
-import org.apache.spark.sql.functions.lit
+import org.apache.spark.sql.functions.{col, lit}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
 class QueryContextSuite extends QueryTest with SharedSparkSession {
+  override def sparkConf: SparkConf = super.sparkConf.set(SQLConf.ANSI_ENABLED.key, "true")
+
+  private val ansiConf = "\"" + SQLConf.ANSI_ENABLED.key + "\""
 
   test("summary of DataFrame context") {
-    withSQLConf(
-      SQLConf.ANSI_ENABLED.key -> "true",
-      SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") {
+    withSQLConf(SQLConf.STACK_TRACES_IN_DATAFRAME_CONTEXT.key -> "2") {
       val e = intercept[SparkArithmeticException] {
         spark.range(1).select(lit(1) / lit(0)).collect()
       }
       assert(e.getQueryContext.head.summary() ==
         """== DataFrame ==
           |"div" was called from
-          |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:32)
+          |org.apache.spark.sql.errors.QueryContextSuite.$anonfun$new$3(QueryContextSuite.scala:33)
           |org.scalatest.Assertions.intercept(Assertions.scala:749)
           |""".stripMargin)
     }
   }
+
+  test("SPARK-50290: Add a flag to disable DataFrame context") {
+    withSQLConf(SQLConf.DATA_FRAME_QUERY_CONTEXT_ENABLED.key -> "false") {
+      val df = spark.range(1).select(lit(1) / col("id"))
+      checkError(
+        exception = intercept[SparkArithmeticException](df.collect()),
+        condition = "DIVIDE_BY_ZERO",
+        parameters = Map("config" -> ansiConf),
+        context = ExpectedContext("", -1, -1)
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
index 83495e0670240..f07d2d6620f72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionAnsiErrorsSuite.scala
@@ -45,11 +45,10 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         sql("select CAST(TIMESTAMP '9999-12-31T12:13:14.56789Z' AS INT)").collect()
       },
-      errorClass = "CAST_OVERFLOW",
+      condition = "CAST_OVERFLOW",
       parameters = Map("value" -> "TIMESTAMP '9999-12-31 04:13:14.56789'",
         "sourceType" -> "\"TIMESTAMP\"",
-        "targetType" -> "\"INT\"",
-        "ansiConfig" -> ansiConf),
+        "targetType" -> "\"INT\""),
       sqlState = "22003")
   }
 
@@ -58,7 +57,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         sql("select 6/0").collect()
       },
-      errorClass = "DIVIDE_BY_ZERO",
+      condition = "DIVIDE_BY_ZERO",
       sqlState = "22012",
       parameters = Map("config" -> ansiConf),
       context = ExpectedContext(fragment = "6/0", start = 7, stop = 9))
@@ -67,7 +66,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         OneRowRelation().select(lit(5) / lit(0)).collect()
       },
-      errorClass = "DIVIDE_BY_ZERO",
+      condition = "DIVIDE_BY_ZERO",
       sqlState = "22012",
       parameters = Map("config" -> ansiConf),
       context = ExpectedContext(fragment = "div", callSitePattern = getCurrentClassCallSitePattern))
@@ -76,7 +75,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         OneRowRelation().select(lit(5).divide(lit(0))).collect()
       },
-      errorClass = "DIVIDE_BY_ZERO",
+      condition = "DIVIDE_BY_ZERO",
       sqlState = "22012",
       parameters = Map("config" -> ansiConf),
       context = ExpectedContext(
@@ -89,7 +88,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         sql("select interval 1 day / 0").collect()
       },
-      errorClass = "INTERVAL_DIVIDED_BY_ZERO",
+      condition = "INTERVAL_DIVIDED_BY_ZERO",
       sqlState = "22012",
       parameters = Map.empty[String, String],
       context = ExpectedContext(fragment = "interval 1 day / 0", start = 7, stop = 24))
@@ -98,11 +97,13 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
   test("INVALID_FRACTION_OF_SECOND: in the function make_timestamp") {
     checkError(
       exception = intercept[SparkDateTimeException] {
-        sql("select make_timestamp(2012, 11, 30, 9, 19, 60.66666666)").collect()
+        sql("select make_timestamp(2012, 11, 30, 9, 19, 60.1)").collect()
       },
-      errorClass = "INVALID_FRACTION_OF_SECOND",
+      condition = "INVALID_FRACTION_OF_SECOND",
       sqlState = "22023",
-      parameters = Map("ansiConfig" -> ansiConf))
+      parameters = Map(
+        "secAndMicros" -> "60.1"
+      ))
   }
 
   test("NUMERIC_VALUE_OUT_OF_RANGE: cast string to decimal") {
@@ -110,7 +111,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         sql("select CAST('66666666666666.666' AS DECIMAL(8, 1))").collect()
       },
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
       sqlState = "22003",
       parameters = Map(
         "value" -> "66666666666666.666",
@@ -126,7 +127,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         OneRowRelation().select(lit("66666666666666.666").cast("DECIMAL(8, 1)")).collect()
       },
-      errorClass = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
+      condition = "NUMERIC_VALUE_OUT_OF_RANGE.WITH_SUGGESTION",
       sqlState = "22003",
       parameters = Map(
         "value" -> "66666666666666.666",
@@ -143,7 +144,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         sql("select array(1, 2, 3, 4, 5)[8]").collect()
       },
-      errorClass = "INVALID_ARRAY_INDEX",
+      condition = "INVALID_ARRAY_INDEX",
       parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
       context = ExpectedContext(fragment = "array(1, 2, 3, 4, 5)[8]", start = 7, stop = 29))
 
@@ -151,7 +152,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         OneRowRelation().select(lit(Array(1, 2, 3, 4, 5))(8)).collect()
       },
-      errorClass = "INVALID_ARRAY_INDEX",
+      condition = "INVALID_ARRAY_INDEX",
       parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
       context = ExpectedContext(
         fragment = "apply",
@@ -163,7 +164,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         sql("select element_at(array(1, 2, 3, 4, 5), 8)").collect()
       },
-      errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+      condition = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
       parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
       context = ExpectedContext(
         fragment = "element_at(array(1, 2, 3, 4, 5), 8)",
@@ -174,7 +175,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         OneRowRelation().select(element_at(lit(Array(1, 2, 3, 4, 5)), 8)).collect()
       },
-      errorClass = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
+      condition = "INVALID_ARRAY_INDEX_IN_ELEMENT_AT",
       parameters = Map("indexValue" -> "8", "arraySize" -> "5", "ansiConfig" -> ansiConf),
       context =
         ExpectedContext(fragment = "element_at", callSitePattern = getCurrentClassCallSitePattern))
@@ -185,7 +186,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkRuntimeException](
         sql("select element_at(array(1, 2, 3, 4, 5), 0)").collect()
       ),
-      errorClass = "INVALID_INDEX_OF_ZERO",
+      condition = "INVALID_INDEX_OF_ZERO",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "element_at(array(1, 2, 3, 4, 5), 0)",
@@ -197,7 +198,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkRuntimeException](
         OneRowRelation().select(element_at(lit(Array(1, 2, 3, 4, 5)), 0)).collect()
       ),
-      errorClass = "INVALID_INDEX_OF_ZERO",
+      condition = "INVALID_INDEX_OF_ZERO",
       parameters = Map.empty,
       context =
         ExpectedContext(fragment = "element_at", callSitePattern = getCurrentClassCallSitePattern))
@@ -208,12 +209,11 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkNumberFormatException] {
         sql("select CAST('111111111111xe23' AS DOUBLE)").collect()
       },
-      errorClass = "CAST_INVALID_INPUT",
+      condition = "CAST_INVALID_INPUT",
       parameters = Map(
         "expression" -> "'111111111111xe23'",
         "sourceType" -> "\"STRING\"",
-        "targetType" -> "\"DOUBLE\"",
-        "ansiConfig" -> ansiConf),
+        "targetType" -> "\"DOUBLE\""),
       context = ExpectedContext(
         fragment = "CAST('111111111111xe23' AS DOUBLE)",
         start = 7,
@@ -223,12 +223,11 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkNumberFormatException] {
         OneRowRelation().select(lit("111111111111xe23").cast("DOUBLE")).collect()
       },
-      errorClass = "CAST_INVALID_INPUT",
+      condition = "CAST_INVALID_INPUT",
       parameters = Map(
         "expression" -> "'111111111111xe23'",
         "sourceType" -> "\"STRING\"",
-        "targetType" -> "\"DOUBLE\"",
-        "ansiConfig" -> ansiConf),
+        "targetType" -> "\"DOUBLE\""),
       context = ExpectedContext(
         fragment = "cast",
         callSitePattern = getCurrentClassCallSitePattern))
@@ -239,7 +238,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkDateTimeException] {
         sql("select to_timestamp('abc', 'yyyy-MM-dd HH:mm:ss')").collect()
       },
-      errorClass = "CANNOT_PARSE_TIMESTAMP",
+      condition = "CANNOT_PARSE_TIMESTAMP",
       parameters = Map(
         "message" -> "Text 'abc' could not be parsed at index 0",
         "ansiConfig" -> ansiConf)
@@ -255,7 +254,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into $tableName values 12345678901234567890D")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"DOUBLE\"",
             "targetType" -> ("\"" + targetType + "\""),
@@ -272,11 +271,10 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         CheckOverflowInTableInsert(caseWhen, "col").eval(null)
       },
-      errorClass = "CAST_OVERFLOW",
+      condition = "CAST_OVERFLOW",
       parameters = Map("value" -> "1.2345678901234567E19D",
         "sourceType" -> "\"DOUBLE\"",
-        "targetType" -> ("\"TINYINT\""),
-        "ansiConfig" -> ansiConf)
+        "targetType" -> ("\"TINYINT\""))
     )
   }
 
@@ -291,11 +289,10 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         exception = intercept[SparkArithmeticException] {
           sql(insertCmd).collect()
         },
-        errorClass = "CAST_OVERFLOW",
+        condition = "CAST_OVERFLOW",
         parameters = Map("value" -> "-1.2345678901234567E19D",
           "sourceType" -> "\"DOUBLE\"",
-          "targetType" -> "\"TINYINT\"",
-          "ansiConfig" -> ansiConf),
+          "targetType" -> "\"TINYINT\""),
         sqlState = "22003")
     }
   }
@@ -306,7 +303,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         CheckOverflowInTableInsert(
           Cast(Literal.apply(12345678901234567890D), ByteType), "col").eval(null)
       }.asInstanceOf[SparkThrowable],
-      errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+      condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
       parameters = Map(
         "sourceType" -> "\"DOUBLE\"",
         "targetType" -> ("\"TINYINT\""),
@@ -322,7 +319,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       exception = intercept[SparkArithmeticException] {
         CheckOverflowInTableInsert(proxy, "col").eval(null)
       }.asInstanceOf[SparkThrowable],
-      errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+      condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
       parameters = Map(
         "sourceType" -> "\"DOUBLE\"",
         "targetType" -> ("\"TINYINT\""),
@@ -366,7 +363,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         checkError(
           // If error is user-facing, it will be thrown directly.
           exception = intercept[SparkArithmeticException](df3.collect()),
-          errorClass = "DIVIDE_BY_ZERO",
+          condition = "DIVIDE_BY_ZERO",
           parameters = Map("config" -> ansiConf),
           context = ExpectedContext(
             fragment = "div",
@@ -381,7 +378,7 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
         val df4 = spark.range(0, 10, 1, 2).select(lit(1) / $"id")
         checkError(
           exception = intercept[SparkArithmeticException](df4.collect()),
-          errorClass = "DIVIDE_BY_ZERO",
+          condition = "DIVIDE_BY_ZERO",
           parameters = Map("config" -> ansiConf),
           context = ExpectedContext(
             fragment = "div",
@@ -396,4 +393,14 @@ class QueryExecutionAnsiErrorsSuite extends QueryTest
       }
     }
   }
+
+  test("SPARK-49773: INVALID_TIMEZONE for bad timezone") {
+    checkError(
+      exception = intercept[SparkDateTimeException] {
+        sql("select make_timestamp(1, 2, 28, 23, 1, 1, -100)").collect()
+      },
+      condition = "INVALID_TIMEZONE",
+      parameters = Map("timeZone" -> "-100")
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
index 4a748d590feb1..1adb1fdf05032 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryExecutionErrorsSuite.scala
@@ -35,11 +35,12 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoder, Kry
 import org.apache.spark.sql.catalyst.FunctionIdentifier
 import org.apache.spark.sql.catalyst.analysis.{NamedParameter, UnresolvedGenerator}
 import org.apache.spark.sql.catalyst.encoders.{ExpressionEncoder, RowEncoder}
-import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Concat, CreateArray, EmptyRow, Expression, Flatten, Grouping, Literal, RowNumber, UnaryExpression}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Concat, CreateArray, EmptyRow, Expression, Flatten, Grouping, Literal, RowNumber, UnaryExpression, Years}
 import org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode._
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
 import org.apache.spark.sql.catalyst.expressions.objects.InitializeJavaBean
 import org.apache.spark.sql.catalyst.rules.RuleIdCollection
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLExpr
 import org.apache.spark.sql.execution.datasources.jdbc.{DriverRegistry, JDBCOptions}
 import org.apache.spark.sql.execution.datasources.jdbc.connection.ConnectionProvider
 import org.apache.spark.sql.execution.datasources.orc.OrcTest
@@ -77,7 +78,7 @@ class QueryExecutionErrorsSuite
         }
         checkError(
           exception,
-          errorClass = "CONVERSION_INVALID_INPUT",
+          condition = "CONVERSION_INVALID_INPUT",
           parameters = Map(
             "str" -> "'???'",
             "fmt" -> "'BASE64'",
@@ -95,7 +96,7 @@ class QueryExecutionErrorsSuite
         }
         checkError(
           exception,
-          errorClass = "CONVERSION_INVALID_INPUT",
+          condition = "CONVERSION_INVALID_INPUT",
           parameters = Map(
             "str" -> "'???'",
             "fmt" -> "'HEX'",
@@ -129,7 +130,7 @@ class QueryExecutionErrorsSuite
         exception = intercept[SparkRuntimeException] {
           df.collect()
         },
-        errorClass = "INVALID_PARAMETER_VALUE.AES_KEY_LENGTH",
+        condition = "INVALID_PARAMETER_VALUE.AES_KEY_LENGTH",
         parameters = Map(
           "parameter" -> "`key`",
           "functionName" -> "`aes_encrypt`/`aes_decrypt`",
@@ -166,7 +167,7 @@ class QueryExecutionErrorsSuite
         exception = intercept[SparkRuntimeException] {
           df2.selectExpr(s"aes_decrypt(unbase64($colName), binary('$key'), 'ECB')").collect()
         },
-        errorClass = "INVALID_PARAMETER_VALUE.AES_CRYPTO_ERROR",
+        condition = "INVALID_PARAMETER_VALUE.AES_CRYPTO_ERROR",
         parameters = Map("parameter" -> "`expr`, `key`",
           "functionName" -> "`aes_encrypt`/`aes_decrypt`",
           "detailMessage" -> ("Given final block not properly padded. " +
@@ -184,7 +185,7 @@ class QueryExecutionErrorsSuite
         exception = intercept[SparkRuntimeException] {
           df.collect()
         },
-        errorClass = "UNSUPPORTED_FEATURE.AES_MODE",
+        condition = "UNSUPPORTED_FEATURE.AES_MODE",
         parameters = Map("mode" -> mode,
         "padding" -> padding,
         "functionName" -> "`aes_encrypt`/`aes_decrypt`"),
@@ -211,8 +212,8 @@ class QueryExecutionErrorsSuite
   test("UNSUPPORTED_FEATURE: unsupported types (map and struct) in lit()") {
     def checkUnsupportedTypeInLiteral(v: Any, literal: String, dataType: String): Unit = {
       checkError(
-        exception = intercept[SparkRuntimeException] { lit(v) },
-        errorClass = "UNSUPPORTED_FEATURE.LITERAL_TYPE",
+        exception = intercept[SparkRuntimeException] { spark.expression(lit(v)) },
+        condition = "UNSUPPORTED_FEATURE.LITERAL_TYPE",
         parameters = Map("value" -> literal, "type" -> dataType),
         sqlState = "0A000")
     }
@@ -230,7 +231,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e2,
-      errorClass = "UNSUPPORTED_FEATURE.PIVOT_TYPE",
+      condition = "UNSUPPORTED_FEATURE.PIVOT_TYPE",
       parameters = Map("value" -> "[dotnet,Dummies]",
       "type" -> "unknown"),
       sqlState = "0A000")
@@ -247,7 +248,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e1,
-      errorClass = "REPEATED_CLAUSE",
+      condition = "REPEATED_CLAUSE",
       parameters = Map("clause" -> "PIVOT", "operation" -> "SUBQUERY"),
       sqlState = "42614")
 
@@ -260,7 +261,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e2,
-      errorClass = "UNSUPPORTED_FEATURE.PIVOT_AFTER_GROUP_BY",
+      condition = "UNSUPPORTED_FEATURE.PIVOT_AFTER_GROUP_BY",
       parameters = Map[String, String](),
       sqlState = "0A000")
   }
@@ -281,7 +282,7 @@ class QueryExecutionErrorsSuite
       val option = "\"datetimeRebaseMode\""
       checkError(
         exception = e,
-        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.READ_ANCIENT_DATETIME",
+        condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.READ_ANCIENT_DATETIME",
         parameters = Map("format" -> format, "config" -> config, "option" -> option))
     }
 
@@ -292,13 +293,13 @@ class QueryExecutionErrorsSuite
         val e = intercept[SparkException] {
           df.write.parquet(dir.getCanonicalPath)
         }
-        assert(e.getErrorClass == "TASK_WRITE_FAILED")
+        assert(e.getCondition == "TASK_WRITE_FAILED")
 
         val format = "Parquet"
         val config = "\"" + SQLConf.PARQUET_REBASE_MODE_IN_WRITE.key + "\""
         checkError(
           exception = e.getCause.asInstanceOf[SparkUpgradeException],
-          errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME",
+          condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.WRITE_ANCIENT_DATETIME",
           parameters = Map("format" -> format, "config" -> config))
       }
     }
@@ -311,10 +312,10 @@ class QueryExecutionErrorsSuite
         val ex = intercept[SparkException] {
           spark.read.schema("time timestamp_ntz").orc(file.getCanonicalPath).collect()
         }
-        assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+        assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
         checkError(
           exception = ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-          errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+          condition = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
           parameters = Map("orcType" -> "\"TIMESTAMP\"",
             "toType" -> "\"TIMESTAMP_NTZ\""),
           sqlState = "0A000")
@@ -333,10 +334,10 @@ class QueryExecutionErrorsSuite
         val ex = intercept[SparkException] {
           spark.read.schema("time timestamp_ltz").orc(file.getCanonicalPath).collect()
         }
-        assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+        assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
         checkError(
           exception = ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-          errorClass = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
+          condition = "UNSUPPORTED_FEATURE.ORC_TYPE_CAST",
           parameters = Map("orcType" -> "\"TIMESTAMP_NTZ\"",
             "toType" -> "\"TIMESTAMP\""),
           sqlState = "0A000")
@@ -349,7 +350,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkArithmeticException] {
         sql("select timestampadd(YEAR, 1000000, timestamp'2022-03-09 01:02:03')").collect()
       },
-      errorClass = "DATETIME_OVERFLOW",
+      condition = "DATETIME_OVERFLOW",
       parameters = Map("operation" -> "add 1000000 YEAR to TIMESTAMP '2022-03-09 01:02:03'"),
       sqlState = "22008")
   }
@@ -381,11 +382,11 @@ class QueryExecutionErrorsSuite
     }
 
     val e2 = e1.getCause.asInstanceOf[SparkException]
-    assert(e2.getErrorClass == "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
+    assert(e2.getCondition == "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
 
     checkError(
       exception = e2.getCause.asInstanceOf[SparkRuntimeException],
-      errorClass = "CANNOT_PARSE_DECIMAL",
+      condition = "CANNOT_PARSE_DECIMAL",
       parameters = Map[String, String](),
       sqlState = "22018")
   }
@@ -397,7 +398,7 @@ class QueryExecutionErrorsSuite
         sql(s"SELECT from_json('$jsonStr', 'a INT, b DOUBLE', map('mode','FAILFAST') )")
           .collect()
       },
-      errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS",
+      condition = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_JSON_ARRAYS_AS_STRUCTS",
       parameters = Map(
         "badRecord" -> jsonStr,
         "failFastMode" -> "FAILFAST"
@@ -429,7 +430,7 @@ class QueryExecutionErrorsSuite
           .createOrReplaceTempView("words")
         spark.sql("select luckyCharOfWord(word, index) from words").collect()
       },
-      errorClass = "FAILED_EXECUTE_UDF",
+      condition = "FAILED_EXECUTE_UDF",
       parameters = Map(
         "functionName" -> functionNameRegex,
         "signature" -> "string, int",
@@ -458,7 +459,7 @@ class QueryExecutionErrorsSuite
         val words = Seq(("Jacek", 5), ("Agata", 5), ("Sweet", 6)).toDF("word", "index")
         words.select(luckyCharOfWord($"word", $"index")).collect()
       },
-      errorClass = "FAILED_EXECUTE_UDF",
+      condition = "FAILED_EXECUTE_UDF",
       parameters = Map("functionName" -> functionNameRegex,
         "signature" -> "string, int",
         "result" -> "string",
@@ -487,7 +488,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INCOMPARABLE_PIVOT_COLUMN",
+      condition = "INCOMPARABLE_PIVOT_COLUMN",
       parameters = Map("columnName" -> "`map`"),
       sqlState = "42818")
   }
@@ -500,7 +501,7 @@ class QueryExecutionErrorsSuite
       }
       checkError(
         exception = e1,
-        errorClass = "UNSUPPORTED_SAVE_MODE.NON_EXISTENT_PATH",
+        condition = "UNSUPPORTED_SAVE_MODE.NON_EXISTENT_PATH",
         parameters = Map("saveMode" -> "NULL"))
 
       Utils.createDirectory(path)
@@ -511,7 +512,7 @@ class QueryExecutionErrorsSuite
       }
       checkError(
         exception = e2,
-        errorClass = "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH",
+        condition = "UNSUPPORTED_SAVE_MODE.EXISTENT_PATH",
         parameters = Map("saveMode" -> "NULL"))
     }
   }
@@ -521,7 +522,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkException] {
           RuleIdCollection.getRuleId("incorrect")
       },
-      errorClass = "RULE_ID_NOT_FOUND",
+      condition = "RULE_ID_NOT_FOUND",
       parameters = Map("ruleName" -> "incorrect")
     )
   }
@@ -540,7 +541,7 @@ class QueryExecutionErrorsSuite
 
           checkError(
             exception = e.getCause.asInstanceOf[SparkSecurityException],
-            errorClass = "CANNOT_RESTORE_PERMISSIONS_FOR_PATH",
+            condition = "CANNOT_RESTORE_PERMISSIONS_FOR_PATH",
             parameters = Map("permission" -> ".+",
               "path" -> ".+"),
             matchPVals = true)
@@ -569,7 +570,7 @@ class QueryExecutionErrorsSuite
       }
       checkError(
         exception = e,
-        errorClass = "INCOMPATIBLE_DATASOURCE_REGISTER",
+        condition = "INCOMPATIBLE_DATASOURCE_REGISTER",
         parameters = Map("message" -> ("Illegal configuration-file syntax: " +
           "META-INF/services/org.apache.spark.sql.sources.DataSourceRegister")))
     }
@@ -650,7 +651,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkSQLException] {
         spark.read.jdbc(urlWithUserAndPass, tableName, new Properties()).collect()
       },
-      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      condition = "UNRECOGNIZED_SQL_TYPE",
       parameters = Map("typeName" -> unrecognizedColumnTypeName, "jdbcType" -> "DATALINK"),
       sqlState = "42704")
 
@@ -675,7 +676,7 @@ class QueryExecutionErrorsSuite
         exception = intercept[SparkException] {
           aggregated.count()
         },
-        errorClass = "INVALID_BUCKET_FILE",
+        condition = "INVALID_BUCKET_FILE",
         parameters = Map("path" -> ".+"),
         matchPVals = true)
     }
@@ -688,7 +689,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkException] {
         sql("select (select a from (select 1 as a union all select 2 as a) t) as b").collect()
       },
-      errorClass = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+      condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
       queryContext = Array(
         ExpectedContext(
           fragment = "(select a from (select 1 as a union all select 2 as a) t)",
@@ -704,7 +705,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkArithmeticException](
         sql("select add_months('5500000-12-31', 10000000)").collect()
       ),
-      errorClass = "ARITHMETIC_OVERFLOW",
+      condition = "ARITHMETIC_OVERFLOW",
       parameters = Map(
         "message" -> "integer overflow",
         "alternative" -> "",
@@ -717,7 +718,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         StructType.fromString(raw)
       },
-      errorClass = "FAILED_PARSE_STRUCT_TYPE",
+      condition = "FAILED_PARSE_STRUCT_TYPE",
       parameters = Map("raw" -> s"'$raw'"))
   }
 
@@ -730,12 +731,11 @@ class QueryExecutionErrorsSuite
           exception = intercept[SparkArithmeticException] {
             sql(s"select CAST($sourceValue AS $it)").collect()
           },
-          errorClass = "CAST_OVERFLOW",
+          condition = "CAST_OVERFLOW",
           parameters = Map(
             "value" -> sourceValue,
             "sourceType" -> s""""${sourceType.sql}"""",
-            "targetType" -> s""""$it"""",
-            "ansiConfig" -> s""""${SQLConf.ANSI_ENABLED.key}""""),
+            "targetType" -> s""""$it""""),
           sqlState = "22003")
       }
     }
@@ -747,7 +747,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         sql(s"""SELECT from_json('$jsonStr', 'a FLOAT', map('mode','FAILFAST'))""").collect()
       },
-      errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_STRING_AS_DATATYPE",
+      condition = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_STRING_AS_DATATYPE",
       parameters = Map(
         "badRecord" -> jsonStr,
         "failFastMode" -> "FAILFAST",
@@ -764,11 +764,44 @@ class QueryExecutionErrorsSuite
         exception = intercept[SparkArithmeticException] {
           sql(s"select 127Y + 5Y").collect()
         },
-        errorClass = "BINARY_ARITHMETIC_OVERFLOW",
+        condition = "BINARY_ARITHMETIC_OVERFLOW",
         parameters = Map(
           "value1" -> "127S",
           "symbol" -> "+",
-          "value2" -> "5S"),
+          "value2" -> "5S",
+          "functionName" -> "`try_add`"),
+        sqlState = "22003")
+    }
+  }
+
+  test("BINARY_ARITHMETIC_OVERFLOW: byte minus byte result overflow") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkError(
+        exception = intercept[SparkArithmeticException] {
+          sql(s"select -2Y - 127Y").collect()
+        },
+        condition = "BINARY_ARITHMETIC_OVERFLOW",
+        parameters = Map(
+          "value1" -> "-2S",
+          "symbol" -> "-",
+          "value2" -> "127S",
+          "functionName" -> "`try_subtract`"),
+        sqlState = "22003")
+    }
+  }
+
+  test("BINARY_ARITHMETIC_OVERFLOW: byte multiply byte result overflow") {
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkError(
+        exception = intercept[SparkArithmeticException] {
+          sql(s"select 127Y * 5Y").collect()
+        },
+        condition = "BINARY_ARITHMETIC_OVERFLOW",
+        parameters = Map(
+          "value1" -> "127S",
+          "symbol" -> "*",
+          "value2" -> "5S",
+          "functionName" -> "`try_multiply`"),
         sqlState = "22003")
     }
   }
@@ -779,7 +812,7 @@ class QueryExecutionErrorsSuite
         val row = spark.sparkContext.parallelize(Seq(1, 2)).map(Row(_))
         spark.createDataFrame(row, StructType.fromString("StructType()"))
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map(
         "typeName" ->
           "StructType()[1.1] failure: 'TimestampType' expected but 'S' found\n\nStructType()\n^"
@@ -810,7 +843,7 @@ class QueryExecutionErrorsSuite
         val expectedPath = p.toURI
         checkError(
           exception = e,
-          errorClass = "RENAME_SRC_PATH_NOT_FOUND",
+          condition = "RENAME_SRC_PATH_NOT_FOUND",
           matchPVals = true,
           parameters = Map("sourcePath" -> s"$expectedPath.+")
         )
@@ -871,7 +904,7 @@ class QueryExecutionErrorsSuite
           exception = intercept[SparkSQLFeatureNotSupportedException] {
             sql("alter TABLE h2.test.people SET TBLPROPERTIES (xx='xx', yy='yy')")
           },
-          errorClass = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER",
+          condition = "UNSUPPORTED_FEATURE.MULTI_ACTION_ALTER",
           parameters = Map("tableName" -> "\"test\".\"people\""))
 
         JdbcDialects.unregisterDialect(testH2DialectUnsupportedJdbcTransaction)
@@ -888,7 +921,7 @@ class QueryExecutionErrorsSuite
     val e = intercept[StreamingQueryException] {
       query.awaitTermination()
     }
-    assert(e.getErrorClass === "STREAM_FAILED")
+    assert(e.getCondition === "STREAM_FAILED")
     assert(e.getCause.isInstanceOf[NullPointerException])
   }
 
@@ -927,7 +960,7 @@ class QueryExecutionErrorsSuite
           exceptions.flatten.map { e =>
             checkError(
               e,
-              errorClass = "CONCURRENT_QUERY",
+              condition = "CONCURRENT_QUERY",
               sqlState = Some("0A000"),
               parameters = e.getMessageParameters.asScala.toMap
             )
@@ -948,7 +981,7 @@ class QueryExecutionErrorsSuite
 
       checkError(
         exception = e,
-        errorClass = "UNSUPPORTED_EXPR_FOR_WINDOW",
+        condition = "UNSUPPORTED_EXPR_FOR_WINDOW",
         parameters = Map(
           "sqlExpr" -> "\"to_date(2009-07-30 04:17:52)\""
         ),
@@ -969,11 +1002,22 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Cannot evaluate expression: namedparameter(foo)"),
       sqlState = "XX000")
   }
 
+  test("PartitionTransformExpression error on eval") {
+    val expr = Years(Literal("foo"))
+    val e = intercept[SparkException] {
+      expr.eval()
+    }
+    checkError(
+      exception = e,
+      condition = "PARTITION_TRANSFORM_EXPRESSION_NOT_IN_PARTITIONED_BY",
+      parameters = Map("expression" -> toSQLExpr(expr)))
+  }
+
   test("INTERNAL_ERROR: Calling doGenCode on unresolved") {
     val e = intercept[SparkException] {
       val ctx = new CodegenContext
@@ -981,7 +1025,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> ("Cannot generate code for expression: " +
           "grouping(namedparameter(foo))")),
@@ -994,7 +1038,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Cannot terminate expression: 'foo()"),
       sqlState = "XX000")
   }
@@ -1008,7 +1052,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> ("""A method named "nonexistent" is not declared in """ +
           "any enclosing class nor any supertype")),
@@ -1021,7 +1065,7 @@ class QueryExecutionErrorsSuite
     }
     checkError(
       exception = e,
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> "The aggregate window function `row_number` does not support merging."),
       sqlState = "XX000")
@@ -1032,7 +1076,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         sql("select bitmap_construct_agg(col) from values (32768) as tab(col)").collect()
       },
-      errorClass = "INVALID_BITMAP_POSITION",
+      condition = "INVALID_BITMAP_POSITION",
       parameters = Map(
         "bitPosition" -> "32768",
         "bitmapNumBytes" -> "4096",
@@ -1045,7 +1089,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkArrayIndexOutOfBoundsException] {
         sql("select bitmap_construct_agg(col) from values (-1) as tab(col)").collect()
       },
-      errorClass = "INVALID_BITMAP_POSITION",
+      condition = "INVALID_BITMAP_POSITION",
       parameters = Map(
         "bitPosition" -> "-1",
         "bitmapNumBytes" -> "4096",
@@ -1060,7 +1104,7 @@ class QueryExecutionErrorsSuite
           maxBroadcastTableBytes = 1024 * 1024 * 1024,
           dataSize = 2 * 1024 * 1024 * 1024 - 1)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2249",
+      condition = "_LEGACY_ERROR_TEMP_2249",
       parameters = Map("maxBroadcastTableBytes" -> "1024.0 MiB", "dataSize" -> "2048.0 MiB"))
   }
 
@@ -1071,7 +1115,7 @@ class QueryExecutionErrorsSuite
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM t TIMESTAMP AS OF '2021-01-29 00:00:00'").collect()
         },
-        errorClass = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
+        condition = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
         parameters = Map("relationId" -> "`spark_catalog`.`default`.`t`")
       )
     }
@@ -1082,7 +1126,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         sql("select slice(array(1,2,3), 0, 1)").collect()
       },
-      errorClass = "INVALID_PARAMETER_VALUE.START",
+      condition = "INVALID_PARAMETER_VALUE.START",
       parameters = Map(
         "parameter" -> toSQLId("start"),
         "functionName" -> toSQLId("slice")
@@ -1095,7 +1139,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         sql("select slice(array(1,2,3), 1, -1)").collect()
       },
-      errorClass = "INVALID_PARAMETER_VALUE.LENGTH",
+      condition = "INVALID_PARAMETER_VALUE.LENGTH",
       parameters = Map(
         "parameter" -> toSQLId("length"),
         "length" -> (-1).toString,
@@ -1112,7 +1156,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         Concat(Seq(Literal.create(array, ArrayType(BooleanType)))).eval(EmptyRow)
       },
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.FUNCTION",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.FUNCTION",
       parameters = Map(
         "numberOfElements" -> Int.MaxValue.toString,
         "maxRoundedArrayLength" -> MAX_ROUNDED_ARRAY_LENGTH.toString,
@@ -1129,7 +1173,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         Flatten(CreateArray(Seq(Literal.create(array, ArrayType(BooleanType))))).eval(EmptyRow)
       },
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.FUNCTION",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.FUNCTION",
       parameters = Map(
         "numberOfElements" -> Int.MaxValue.toString,
         "maxRoundedArrayLength" -> MAX_ROUNDED_ARRAY_LENGTH.toString,
@@ -1144,7 +1188,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         sql(s"select array_repeat(1, $count)").collect()
       },
-      errorClass = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
+      condition = "COLLECTION_SIZE_LIMIT_EXCEEDED.PARAMETER",
       parameters = Map(
         "parameter" -> toSQLId("count"),
         "numberOfElements" -> count.toString,
@@ -1164,7 +1208,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         Seq(KryoData(1), KryoData(2)).toDS()
       },
-      errorClass = "INVALID_EXPRESSION_ENCODER",
+      condition = "INVALID_EXPRESSION_ENCODER",
       parameters = Map(
         "encoderType" -> kryoEncoder.getClass.getName,
         "docroot" -> SPARK_DOC_ROOT
@@ -1177,13 +1221,13 @@ class QueryExecutionErrorsSuite
     val enc: ExpressionEncoder[Row] = ExpressionEncoder(rowEnc)
     val deserializer = AttributeReference.apply("v", IntegerType)()
     implicit val im: ExpressionEncoder[Row] = new ExpressionEncoder[Row](
-      enc.objSerializer, deserializer, enc.clsTag)
+      rowEnc, enc.objSerializer, deserializer)
 
     checkError(
       exception = intercept[SparkRuntimeException] {
         spark.createDataset(Seq(Row(1))).collect()
       },
-      errorClass = "NOT_UNRESOLVED_ENCODER",
+      condition = "NOT_UNRESOLVED_ENCODER",
       parameters = Map(
         "attr" -> deserializer.toString
       )
@@ -1206,7 +1250,7 @@ class QueryExecutionErrorsSuite
       exception = intercept[SparkRuntimeException] {
         expr.eval(EmptyRow)
       },
-      errorClass = "CLASS_NOT_OVERRIDE_EXPECTED_METHOD",
+      condition = "CLASS_NOT_OVERRIDE_EXPECTED_METHOD",
       parameters = Map(
         "className" -> expr.getClass.getName,
         "method1" -> "eval",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
index b7fb65091ef73..666f85e19c1c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/errors/QueryParsingErrorsSuite.scala
@@ -39,7 +39,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     }
     checkError(
       exception = parseException(query),
-      errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
+      condition = "FAILED_TO_PARSE_TOO_COMPLEX",
       parameters = Map(),
       context = ExpectedContext(
         query,
@@ -53,7 +53,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       ", 2 as first, 3 as second, 4 as second, 5 as third"
     checkError(
       exception = parseException(query),
-      errorClass = "EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES",
+      condition = "EXEC_IMMEDIATE_DUPLICATE_ARGUMENT_ALIASES",
       parameters = Map("aliases" -> "`second`, `first`"),
       context = ExpectedContext(
         "USING 1 as first, 2 as first, 3 as second, 4 as second, 5 as third",
@@ -66,7 +66,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val query = "EXECUTE IMMEDIATE 'SELCT 1707 WHERE ? = 1' INTO a USING 1"
     checkError(
       exception = parseException(query),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'SELCT'", "hint" -> ""),
       context = ExpectedContext(
         start = 0,
@@ -79,7 +79,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     withSQLConf("spark.sql.allowNamedFunctionArguments" -> "false") {
       checkError(
         exception = parseException("SELECT explode(arr => array(10, 20))"),
-        errorClass = "NAMED_PARAMETER_SUPPORT_DISABLED",
+        condition = "NAMED_PARAMETER_SUPPORT_DISABLED",
         parameters = Map("functionName"-> toSQLId("explode"), "argument" -> toSQLId("arr"))
       )
     }
@@ -88,7 +88,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("UNSUPPORTED_FEATURE: LATERAL join with NATURAL join not supported") {
     checkError(
       exception = parseException("SELECT * FROM t1 NATURAL JOIN LATERAL (SELECT c1 + c2 AS c2)"),
-      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      condition = "INCOMPATIBLE_JOIN_TYPES",
       parameters = Map("joinType1" -> "LATERAL", "joinType2" -> "NATURAL"),
       sqlState = "42613",
       context = ExpectedContext(
@@ -100,7 +100,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("UNSUPPORTED_FEATURE: LATERAL join with USING join not supported") {
     checkError(
       exception = parseException("SELECT * FROM t1 JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)"),
-      errorClass = "UNSUPPORTED_FEATURE.LATERAL_JOIN_USING",
+      condition = "UNSUPPORTED_FEATURE.LATERAL_JOIN_USING",
       sqlState = "0A000",
       context = ExpectedContext(
         fragment = "JOIN LATERAL (SELECT c1 + c2 AS c2) USING (c2)",
@@ -116,7 +116,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       "LEFT ANTI" -> (17, 72)).foreach { case (joinType, (start, stop)) =>
       checkError(
         exception = parseException(s"SELECT * FROM t1 $joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3"),
-        errorClass = "INVALID_LATERAL_JOIN_TYPE",
+        condition = "INVALID_LATERAL_JOIN_TYPE",
         parameters = Map("joinType" -> joinType),
         context = ExpectedContext(
           fragment = s"$joinType JOIN LATERAL (SELECT c1 + c2 AS c3) ON c2 = c3",
@@ -136,7 +136,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     ).foreach { case (sqlText, (fragment, start, stop)) =>
       checkError(
         exception = parseException(s"SELECT * FROM t1$sqlText"),
-        errorClass = "INVALID_SQL_SYNTAX.LATERAL_WITHOUT_SUBQUERY_OR_TABLE_VALUED_FUNC",
+        condition = "INVALID_SQL_SYNTAX.LATERAL_WITHOUT_SUBQUERY_OR_TABLE_VALUED_FUNC",
         sqlState = "42000",
         context = ExpectedContext(fragment, start, stop))
     }
@@ -145,7 +145,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("UNSUPPORTED_FEATURE: NATURAL CROSS JOIN is not supported") {
     checkError(
       exception = parseException("SELECT * FROM a NATURAL CROSS JOIN b"),
-      errorClass = "INCOMPATIBLE_JOIN_TYPES",
+      condition = "INCOMPATIBLE_JOIN_TYPES",
       parameters = Map("joinType1" -> "NATURAL", "joinType2" -> "CROSS"),
       sqlState = "42613",
       context = ExpectedContext(
@@ -157,7 +157,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.REPETITIVE_WINDOW_DEFINITION: redefine window") {
     checkError(
       exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win, win AS win2"),
-      errorClass = "INVALID_SQL_SYNTAX.REPETITIVE_WINDOW_DEFINITION",
+      condition = "INVALID_SQL_SYNTAX.REPETITIVE_WINDOW_DEFINITION",
       sqlState = "42000",
       parameters = Map("windowName" -> "`win`"),
       context = ExpectedContext(
@@ -169,7 +169,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.INVALID_WINDOW_REFERENCE: invalid window reference") {
     checkError(
       exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win"),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_WINDOW_REFERENCE",
+      condition = "INVALID_SQL_SYNTAX.INVALID_WINDOW_REFERENCE",
       sqlState = "42000",
       parameters = Map("windowName" -> "`win`"),
       context = ExpectedContext(
@@ -181,7 +181,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.UNRESOLVED_WINDOW_REFERENCE: window reference cannot be resolved") {
     checkError(
       exception = parseException("SELECT min(a) OVER win FROM t1 WINDOW win AS win2"),
-      errorClass = "INVALID_SQL_SYNTAX.UNRESOLVED_WINDOW_REFERENCE",
+      condition = "INVALID_SQL_SYNTAX.UNRESOLVED_WINDOW_REFERENCE",
       sqlState = "42000",
       parameters = Map("windowName" -> "`win2`"),
       context = ExpectedContext(
@@ -194,7 +194,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "SELECT TRANSFORM(DISTINCT a) USING 'a' FROM t"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
+      condition = "UNSUPPORTED_FEATURE.TRANSFORM_DISTINCT_ALL",
       sqlState = "0A000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -207,7 +207,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       "'org.apache.hadoop.hive.serde2.OpenCSVSerde' USING 'a' FROM t"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+      condition = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
       sqlState = "0A000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -218,7 +218,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.TRANSFORM_WRONG_NUM_ARGS: Wrong number arguments for transform") {
     checkError(
       exception = parseException("CREATE TABLE table(col int) PARTITIONED BY (years(col,col))"),
-      errorClass = "INVALID_SQL_SYNTAX.TRANSFORM_WRONG_NUM_ARGS",
+      condition = "INVALID_SQL_SYNTAX.TRANSFORM_WRONG_NUM_ARGS",
       sqlState = "42000",
       parameters = Map(
         "transform" -> "`years`",
@@ -233,7 +233,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME: Invalid table value function name") {
     checkError(
       exception = parseException("SELECT * FROM db.func()"),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
+      condition = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
       sqlState = "42000",
       parameters = Map("funcName" -> "`db`.`func`"),
       context = ExpectedContext(
@@ -243,7 +243,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException("SELECT * FROM ns.db.func()"),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
+      condition = "INVALID_SQL_SYNTAX.INVALID_TABLE_VALUED_FUNC_NAME",
       sqlState = "42000",
       parameters = Map("funcName" -> "`ns`.`db`.`func`"),
       context = ExpectedContext(
@@ -256,7 +256,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "SHOW sys FUNCTIONS"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_SCOPE",
+      condition = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_SCOPE",
       sqlState = "42000",
       parameters = Map("scope" -> "`sys`"),
       context = ExpectedContext(
@@ -269,7 +269,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText1 = "SHOW FUNCTIONS IN db f1"
     checkError(
       exception = parseException(sqlText1),
-      errorClass = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_PATTERN",
+      condition = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_PATTERN",
       sqlState = "42000",
       parameters = Map("pattern" -> "`f1`"),
       context = ExpectedContext(
@@ -279,7 +279,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText2 = "SHOW FUNCTIONS IN db LIKE f1"
     checkError(
       exception = parseException(sqlText2),
-      errorClass = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_PATTERN",
+      condition = "INVALID_SQL_SYNTAX.SHOW_FUNCTIONS_INVALID_PATTERN",
       sqlState = "42000",
       parameters = Map("pattern" -> "`f1`"),
       context = ExpectedContext(
@@ -297,7 +297,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE",
+      condition = "INVALID_SQL_SYNTAX.CREATE_ROUTINE_WITH_IF_NOT_EXISTS_AND_REPLACE",
       sqlState = "42000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -314,7 +314,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS",
+      condition = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_IF_NOT_EXISTS",
       sqlState = "42000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -330,11 +330,11 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      condition = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
       sqlState = "42000",
       parameters = Map(
         "statement" -> "CREATE TEMPORARY FUNCTION",
-        "funcName" -> "`ns`.`db`.`func`"),
+        "name" -> "`ns`.`db`.`func`"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -350,7 +350,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
 
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE",
+      condition = "INVALID_SQL_SYNTAX.CREATE_TEMP_FUNC_WITH_DATABASE",
       sqlState = "42000",
       parameters = Map("database" -> "`db`"),
       context = ExpectedContext(
@@ -363,11 +363,11 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "DROP TEMPORARY FUNCTION db.func"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      condition = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
       sqlState = "42000",
       parameters = Map(
         "statement" -> "DROP TEMPORARY FUNCTION",
-        "funcName" -> "`db`.`func`"),
+        "name" -> "`db`.`func`"),
       context = ExpectedContext(
         fragment = sqlText,
         start = 0,
@@ -377,7 +377,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("DUPLICATE_KEY: Found duplicate partition keys") {
     checkError(
       exception = parseException("INSERT OVERWRITE TABLE table PARTITION(p1='1', p1='1') SELECT 'col1', 'col2'"),
-      errorClass = "DUPLICATE_KEY",
+      condition = "DUPLICATE_KEY",
       sqlState = "23505",
       parameters = Map("keyColumn" -> "`p1`"),
       context = ExpectedContext(
@@ -389,7 +389,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("DUPLICATE_KEY: in table properties") {
     checkError(
       exception = parseException("ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('key1' = '1', 'key1' = '2')"),
-      errorClass = "DUPLICATE_KEY",
+      condition = "DUPLICATE_KEY",
       sqlState = "23505",
       parameters = Map("keyColumn" -> "`key1`"),
       context = ExpectedContext(
@@ -401,24 +401,24 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("PARSE_EMPTY_STATEMENT: empty input") {
     checkError(
       exception = parseException(""),
-      errorClass = "PARSE_EMPTY_STATEMENT",
+      condition = "PARSE_EMPTY_STATEMENT",
       sqlState = Some("42617"))
 
     checkError(
       exception = parseException("   "),
-      errorClass = "PARSE_EMPTY_STATEMENT",
+      condition = "PARSE_EMPTY_STATEMENT",
       sqlState = Some("42617"))
 
     checkError(
       exception = parseException(" \n"),
-      errorClass = "PARSE_EMPTY_STATEMENT",
+      condition = "PARSE_EMPTY_STATEMENT",
       sqlState = Some("42617"))
   }
 
   test("PARSE_SYNTAX_ERROR: no viable input") {
     checkError(
       exception = parseException("select ((r + 1) "),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "end of input", "hint" -> ""))
   }
@@ -426,7 +426,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   def checkParseSyntaxError(sqlCommand: String, errorString: String, hint: String = ""): Unit = {
     checkError(
       exception = parseException(sqlCommand),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> errorString, "hint" -> hint)
     )
@@ -444,13 +444,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("PARSE_SYNTAX_ERROR: extraneous input") {
     checkError(
       exception = parseException("select 1 1"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'1'", "hint" -> ": extra input '1'"))
 
     checkError(
       exception = parseException("select *\nfrom r as q t"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'t'", "hint" -> ": extra input 't'"))
   }
@@ -458,13 +458,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("PARSE_SYNTAX_ERROR: mismatched input") {
     checkError(
       exception = parseException("select * from r order by q from t"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'from'", "hint" -> ""))
 
     checkError(
       exception = parseException("select *\nfrom r\norder by q\nfrom t"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'from'", "hint" -> ""))
   }
@@ -473,13 +473,13 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     // '<EOF>' -> end of input
     checkError(
       exception = parseException("select count(*"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "end of input", "hint" -> ""))
 
     checkError(
       exception = parseException("select 1 as a from"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "end of input", "hint" -> ""))
   }
@@ -488,19 +488,19 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     "misleading error message due to problematic antlr grammar") {
     checkError(
       exception = parseException("select * from a left join_ b on a.id = b.id"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'join_'", "hint" -> ": missing 'JOIN'"))
 
     checkError(
       exception = parseException("select * from test where test.t is like 'test'"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'is'", "hint" -> ""))
 
     checkError(
       exception = parseException("SELECT * FROM test WHERE x NOT NULL"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'NOT'", "hint" -> ""))
   }
@@ -508,7 +508,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE: show table partition key must set value") {
     checkError(
       exception = parseException("SHOW TABLE EXTENDED IN default LIKE 'employee' PARTITION (grade)"),
-      errorClass = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
+      condition = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
       sqlState = "42000",
       parameters = Map("partKey" -> "`grade`"),
       context = ExpectedContext(
@@ -522,7 +522,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     checkError(
       exception = parseException("CREATE TABLE my_tab(a INT, b STRING) " +
         "USING parquet PARTITIONED BY (bucket(32, a, 66))"),
-      errorClass = "INVALID_SQL_SYNTAX.INVALID_COLUMN_REFERENCE",
+      condition = "INVALID_SQL_SYNTAX.INVALID_COLUMN_REFERENCE",
       sqlState = "42000",
       parameters = Map(
         "transform" -> "`bucket`",
@@ -537,7 +537,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "DESCRIBE TABLE EXTENDED customer PARTITION (grade = 'A') customer.age"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
+      condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
       sqlState = "0A000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -549,7 +549,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "DESCRIBE TABLE EXTENDED customer PARTITION (grade)"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
+      condition = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
       sqlState = "42000",
       parameters = Map("partKey" -> "`grade`"),
       context = ExpectedContext(
@@ -562,7 +562,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "CREATE NAMESPACE IF NOT EXISTS a.b.c WITH PROPERTIES ('location'='/home/user/db')"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+      condition = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
       sqlState = "0A000",
       parameters = Map(
         "property" -> "location",
@@ -578,7 +578,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       "USING PARQUET TBLPROPERTIES ('provider'='parquet')"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
       sqlState = "0A000",
       parameters = Map(
         "property" -> "provider",
@@ -593,7 +593,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     val sqlText = "set =`value`"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "INVALID_PROPERTY_KEY",
+      condition = "INVALID_PROPERTY_KEY",
       parameters = Map("key" -> "\"\"", "value" -> "\"value\""),
       context = ExpectedContext(
         fragment = sqlText,
@@ -604,7 +604,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_PROPERTY_VALUE: invalid property value for set quoted configuration") {
     checkError(
       exception = parseException("set `key`=1;2;;"),
-      errorClass = "INVALID_PROPERTY_VALUE",
+      condition = "INVALID_PROPERTY_VALUE",
       parameters = Map("value" -> "\"1;2;;\"", "key" -> "\"key\""),
       context = ExpectedContext(
         fragment = "set `key`=1;2",
@@ -617,7 +617,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
       "WITH DBPROPERTIES('a'='a', 'b'='b', 'c'='c')"
     checkError(
       exception = parseException(sqlText),
-      errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
+      condition = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
       sqlState = "0A000",
       context = ExpectedContext(
         fragment = sqlText,
@@ -629,28 +629,28 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     // Cast simple array without specifying element type
     checkError(
       exception = parseException("SELECT CAST(array(1,2,3) AS ARRAY)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      condition = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "ARRAY", start = 28, stop = 32))
     // Cast array of array without specifying element type for inner array
     checkError(
       exception = parseException("SELECT CAST(array(array(3)) AS ARRAY<ARRAY>)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      condition = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "ARRAY", start = 37, stop = 41))
     // Create column of array type without specifying element type
     checkError(
       exception = parseException("CREATE TABLE tbl_120691 (col1 ARRAY)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      condition = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "ARRAY", start = 30, stop = 34))
     // Create column of array type without specifying element type in lowercase
     checkError(
       exception = parseException("CREATE TABLE tbl_120691 (col1 array)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
+      condition = "INCOMPLETE_TYPE_DEFINITION.ARRAY",
       sqlState = "42K01",
       parameters = Map("elementType" -> "<INT>"),
       context = ExpectedContext(fragment = "array", start = 30, stop = 34))
@@ -660,31 +660,31 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     // Cast simple struct without specifying field type
     checkError(
       exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "STRUCT", start = 29, stop = 34))
     // Cast array of struct without specifying field type in struct
     checkError(
       exception = parseException("SELECT CAST(array(struct(1,2)) AS ARRAY<STRUCT>)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "STRUCT", start = 40, stop = 45))
     // Create column of struct type without specifying field type
     checkError(
       exception = parseException("CREATE TABLE tbl_120691 (col1 STRUCT)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "STRUCT", start = 30, stop = 35))
     // Invalid syntax `STRUCT<INT>` without field name
     checkError(
       exception = parseException("SELECT CAST(struct(1,2,3) AS STRUCT<INT>)"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'<'", "hint" -> ": missing ')'"))
     // Create column of struct type without specifying field type in lowercase
     checkError(
       exception = parseException("CREATE TABLE tbl_120691 (col1 struct)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
+      condition = "INCOMPLETE_TYPE_DEFINITION.STRUCT",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "struct", start = 30, stop = 35))
   }
@@ -693,25 +693,25 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
     // Cast simple map without specifying element type
     checkError(
       exception = parseException("SELECT CAST(map(1,'2') AS MAP)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      condition = "INCOMPLETE_TYPE_DEFINITION.MAP",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "MAP", start = 26, stop = 28))
     // Create column of map type without specifying key/value types
     checkError(
       exception = parseException("CREATE TABLE tbl_120691 (col1 MAP)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      condition = "INCOMPLETE_TYPE_DEFINITION.MAP",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "MAP", start = 30, stop = 32))
     // Invalid syntax `MAP<String>` with only key type
     checkError(
       exception = parseException("SELECT CAST(map('1',2) AS MAP<STRING>)"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       sqlState = "42601",
       parameters = Map("error" -> "'<'", "hint" -> ": missing ')'"))
     // Create column of map type without specifying key/value types in lowercase
     checkError(
       exception = parseException("SELECT CAST(map('1',2) AS map)"),
-      errorClass = "INCOMPLETE_TYPE_DEFINITION.MAP",
+      condition = "INCOMPLETE_TYPE_DEFINITION.MAP",
       sqlState = "42K01",
       context = ExpectedContext(fragment = "map", start = 26, stop = 28))
   }
@@ -719,7 +719,7 @@ class QueryParsingErrorsSuite extends QueryTest with SharedSparkSession with SQL
   test("INVALID_ESC: Escape string must contain only one character") {
     checkError(
       exception = parseException("select * from test where test.t like 'pattern%' escape '##'"),
-      errorClass = "INVALID_ESC",
+      condition = "INVALID_ESC",
       parameters = Map("invalidEscape" -> "'##'"),
       context = ExpectedContext(
         fragment = "like 'pattern%' escape '##'",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
index 0efb4180dbdb6..c43149133d65d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/BroadcastExchangeSuite.scala
@@ -82,7 +82,8 @@ class BroadcastExchangeSuite extends SparkPlanTest
       val events = jobEvents.toArray
       val hasStart = events(0).isInstanceOf[SparkListenerJobStart]
       val hasCancelled = events(1).asInstanceOf[SparkListenerJobEnd].jobResult
-        .asInstanceOf[JobFailed].exception.getMessage.contains("cancelled job tag")
+        .asInstanceOf[JobFailed]
+        .exception.getMessage.contains("The corresponding broadcast query has failed.")
       events.length == 2 && hasStart && hasCancelled
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
index dc72b4a092aef..9ed4f1a006b2b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/CoalesceShufflePartitionsSuite.scala
@@ -317,7 +317,7 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with SQLConfHelper {
       import spark.implicits._
       spark.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "1KB")
       spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key, "10KB")
-      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR, 2.0)
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key, "2.0")
       val df00 = spark.range(0, 1000, 2)
         .selectExpr("id as key", "id as value")
         .union(Seq.fill(100000)((600, 600)).toDF("key", "value"))
@@ -345,7 +345,7 @@ class CoalesceShufflePartitionsSuite extends SparkFunSuite with SQLConfHelper {
       import spark.implicits._
       spark.conf.set(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, "-1")
       spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_THRESHOLD.key, "100B")
-      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR, 2.0)
+      spark.conf.set(SQLConf.SKEW_JOIN_SKEWED_PARTITION_FACTOR.key, "2.0")
       val df00 = spark.range(0, 10, 2)
         .selectExpr("id as key", "id as value")
         .union(Seq.fill(1000)((600, 600)).toDF("key", "value"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
index 6b0f0b5582dc5..62a32da22d957 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExecuteImmediateEndToEndSuite.scala
@@ -17,7 +17,6 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.sql.{QueryTest}
-import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.test.SharedSparkSession
 
 class ExecuteImmediateEndToEndSuite extends QueryTest with SharedSparkSession {
@@ -37,30 +36,4 @@ class ExecuteImmediateEndToEndSuite extends QueryTest with SharedSparkSession {
       spark.sql("DROP TEMPORARY VARIABLE IF EXISTS parm;")
     }
   }
-
-  test("EXEC IMMEDIATE STACK OVERFLOW") {
-    try {
-      spark.sql("DECLARE parm = 1;")
-      val query = (1 to 20000).map(x => "SELECT 1 as a").mkString(" UNION ALL ")
-      Seq(
-        s"EXECUTE IMMEDIATE '$query'",
-        s"EXECUTE IMMEDIATE '$query' INTO parm").foreach { q =>
-        val e = intercept[ParseException] {
-          spark.sql(q)
-        }
-
-        checkError(
-          exception = intercept[ParseException](sql(query).collect()),
-          errorClass = "FAILED_TO_PARSE_TOO_COMPLEX",
-          parameters = Map(),
-          context = ExpectedContext(
-            query,
-            start = 0,
-            stop = query.length - 1)
-        )
-      }
-    } finally {
-      spark.sql("DROP TEMPORARY VARIABLE IF EXISTS parm;")
-    }
-  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
index e7f83cb7eb4bc..0078c3f9f65de 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ExternalAppendOnlyUnsafeRowArrayBenchmark.scala
@@ -33,9 +33,9 @@ import org.apache.spark.util.collection.unsafe.sort.UnsafeExternalSorter
  *   1. without sbt:
  *      bin/spark-submit --class <this class> --jars <spark core test jar> <spark sql test jar>
  *   2. build/sbt build/sbt ";project sql;set javaOptions
- *        in Test += \"-Dspark.memory.debugFill=false\";test:runMain <this class>"
+ *        in Test += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
  *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt ";project sql;set javaOptions
- *        in Test += \"-Dspark.memory.debugFill=false\";test:runMain <this class>"
+ *        in Test += \"-Dspark.memory.debugFill=false\";Test/runMain <this class>"
  *      Results will be written to
  *      "benchmarks/ExternalAppendOnlyUnsafeRowArrayBenchmark-results.txt".
  * }}}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
index 936aaba51935a..d6e576fe312c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/HiveResultSuite.scala
@@ -24,6 +24,9 @@ import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
 import org.apache.spark.sql.execution.HiveResult._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.{ExamplePoint, ExamplePointUDT, SharedSparkSession}
+import org.apache.spark.sql.types.{YearMonthIntervalType => YM}
+import org.apache.spark.sql.types.YearMonthIntervalType
+
 
 class HiveResultSuite extends SharedSparkSession {
   import testImplicits._
@@ -119,6 +122,34 @@ class HiveResultSuite extends SharedSparkSession {
     assert(hiveResultString(plan2) === Seq("[-10-1]"))
   }
 
+  test("SPARK-49208: negative month intervals") {
+    Seq(
+      "0-0" -> (11, YM.YEAR, YM.YEAR),
+      "0-0" -> (-11, YM.YEAR, YM.YEAR),
+      "0-11" -> (11, YM.YEAR, YM.MONTH),
+      "-0-11" -> (-11, YM.YEAR, YM.MONTH),
+      "0-11" -> (11, YM.MONTH, YM.MONTH),
+      "-0-11" -> (-11, YM.MONTH, YM.MONTH),
+      "1-0" -> (12, YM.YEAR, YM.YEAR),
+      "-1-0" -> (-12, YM.YEAR, YM.YEAR),
+      "1-0" -> (12, YM.YEAR, YM.MONTH),
+      "-1-0" -> (-12, YM.YEAR, YM.MONTH),
+      "1-0" -> (12, YM.MONTH, YM.MONTH),
+      "-1-0" -> (-12, YM.MONTH, YM.MONTH),
+      "1-0" -> (13, YM.YEAR, YM.YEAR),
+      "-1-0" -> (-13, YM.YEAR, YM.YEAR),
+      "1-1" -> (13, YM.YEAR, YM.MONTH),
+      "-1-1" -> (-13, YM.YEAR, YM.MONTH),
+      "1-1" -> (13, YM.MONTH, YM.MONTH),
+      "-1-1" -> (-13, YM.MONTH, YM.MONTH)
+    ).foreach { case (hiveString, (months, startField, endField)) =>
+      assert(toHiveString((Period.ofMonths(months), YearMonthIntervalType(startField, endField)),
+        false,
+        getTimeFormatters,
+        getBinaryFormatter) === hiveString)
+    }
+  }
+
   test("SPARK-34984, SPARK-35016: day-time interval formatting in hive result") {
     val df = Seq(Duration.ofDays(5).plusMillis(10)).toDF("i")
     val plan1 = df.queryExecution.executedPlan
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
new file mode 100644
index 0000000000000..8d640a1840f4c
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/InsertSortForLimitAndOffsetSuite.scala
@@ -0,0 +1,120 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution
+
+import org.apache.spark.sql.QueryTest
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+class InsertSortForLimitAndOffsetSuite extends QueryTest
+  with SharedSparkSession
+  with AdaptiveSparkPlanHelper {
+  import testImplicits._
+
+  private def assertHasTopKSort(plan: SparkPlan): Unit = {
+    assert(find(plan) {
+      case _: TakeOrderedAndProjectExec => true
+      case _ => false
+    }.isDefined)
+  }
+
+  private def assertHasCollectLimitExec(plan: SparkPlan): Unit = {
+    assert(find(plan) {
+      case _: CollectLimitExec => true
+      case _ => false
+    }.isDefined)
+  }
+
+  private def assertHasGlobalLimitExec(plan: SparkPlan): Unit = {
+    assert(find(plan) {
+      case _: GlobalLimitExec => true
+      case _ => false
+    }.isDefined)
+  }
+
+  private def hasLocalSort(plan: SparkPlan): Boolean = {
+    find(plan) {
+      case GlobalLimitExec(_, s: SortExec, _) => !s.global
+      case _ => false
+    }.isDefined
+  }
+
+  test("root LIMIT preserves data ordering with top-K sort") {
+    val df = spark.range(10).orderBy($"id" % 8).limit(2)
+    df.collect()
+    val physicalPlan = df.queryExecution.executedPlan
+    assertHasTopKSort(physicalPlan)
+    // Extra local sort is not needed for LIMIT with top-K sort optimization.
+    assert(!hasLocalSort(physicalPlan))
+  }
+
+  test("middle LIMIT preserves data ordering with top-K sort") {
+    val df = spark.range(10).orderBy($"id" % 8).limit(2).distinct()
+    df.collect()
+    val physicalPlan = df.queryExecution.executedPlan
+    assertHasTopKSort(physicalPlan)
+    // Extra local sort is not needed for LIMIT with top-K sort optimization.
+    assert(!hasLocalSort(physicalPlan))
+  }
+
+  test("root LIMIT preserves data ordering with CollectLimitExec") {
+    withSQLConf(SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1") {
+      val df = spark.range(10).orderBy($"id" % 8).limit(2)
+      df.collect()
+      val physicalPlan = df.queryExecution.executedPlan
+      assertHasCollectLimitExec(physicalPlan)
+      // Extra local sort is not needed for root LIMIT
+      assert(!hasLocalSort(physicalPlan))
+    }
+  }
+
+  test("middle LIMIT preserves data ordering with the extra sort") {
+    withSQLConf(
+      SQLConf.TOP_K_SORT_FALLBACK_THRESHOLD.key -> "1",
+      // To trigger the bug, we have to disable the coalescing optimization. Otherwise we use only
+      // one partition to read the range-partition shuffle and there is only one shuffle block for
+      // the final single-partition shuffle, random fetch order is no longer an issue.
+      SQLConf.COALESCE_PARTITIONS_ENABLED.key -> "false") {
+      val df = spark.range(10).orderBy($"id" % 8).limit(2).distinct()
+      df.collect()
+      val physicalPlan = df.queryExecution.executedPlan
+      assertHasGlobalLimitExec(physicalPlan)
+      // Extra local sort is needed for middle LIMIT
+      assert(hasLocalSort(physicalPlan))
+    }
+  }
+
+  test("root OFFSET preserves data ordering with CollectLimitExec") {
+    val df = spark.range(10).orderBy($"id" % 8).offset(2)
+    df.collect()
+    val physicalPlan = df.queryExecution.executedPlan
+    assertHasCollectLimitExec(physicalPlan)
+    // Extra local sort is not needed for root OFFSET
+    assert(!hasLocalSort(physicalPlan))
+  }
+
+  test("middle OFFSET preserves data ordering with the extra sort") {
+    val df = spark.range(10).orderBy($"id" % 8).offset(2).distinct()
+    df.collect()
+    val physicalPlan = df.queryExecution.executedPlan
+    assertHasGlobalLimitExec(physicalPlan)
+    // Extra local sort is needed for middle OFFSET
+    assert(hasLocalSort(physicalPlan))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
index cbc565974cd68..3828ad410c896 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala
@@ -47,14 +47,14 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSparkSession {
 
   private def assertMetadataOnlyQuery(df: DataFrame): Unit = {
     val localRelations = df.queryExecution.optimizedPlan.collect {
-      case l @ LocalRelation(_, _, _) => l
+      case l: LocalRelation => l
     }
     assert(localRelations.size == 1)
   }
 
   private def assertNotMetadataOnlyQuery(df: DataFrame): Unit = {
     val localRelations = df.queryExecution.optimizedPlan.collect {
-      case l @ LocalRelation(_, _, _) => l
+      case l: LocalRelation => l
     }
     assert(localRelations.size == 0)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
index 3608e7c920767..974be2f627998 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/QueryExecutionSuite.scala
@@ -319,7 +319,7 @@ class QueryExecutionSuite extends SharedSparkSession {
     val blockManager = spark.sparkContext.env.blockManager
     blockManager.diskBlockManager.getAllBlocks().foreach {
       case ShuffleIndexBlockId(shuffleId, _, _) =>
-        spark.sparkContext.env.shuffleManager.unregisterShuffle(shuffleId)
+        spark.sparkContext.shuffleDriverComponents.removeShuffle(shuffleId, true)
       case _ =>
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
index 94d33731b6de5..059a4c9b83763 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLExecutionSuite.scala
@@ -228,7 +228,7 @@ class SQLExecutionSuite extends SparkFunSuite with SQLConfHelper {
       spark.range(1).collect()
 
       spark.sparkContext.listenerBus.waitUntilEmpty()
-      assert(jobTags.contains(jobTag))
+      assert(jobTags.get.contains(jobTag))
       assert(sqlJobTags.contains(jobTag))
     } finally {
       spark.sparkContext.removeJobTag(jobTag)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
index f54a4f4606061..b26cdfaeb756a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewSuite.scala
@@ -85,7 +85,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             exception = intercept[AnalysisException] {
               sql("CREATE VIEW jtv1 AS SELECT * FROM temp_jtv1 WHERE id < 6")
             },
-            errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+            condition = "INVALID_TEMP_OBJ_REFERENCE",
             parameters = Map(
               "obj" -> "VIEW",
               "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`jtv1`",
@@ -97,7 +97,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             exception = intercept[AnalysisException] {
               sql(s"CREATE VIEW jtv1 AS SELECT * FROM $globalTempDB.global_temp_jtv1 WHERE id < 6")
             },
-            errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+            condition = "INVALID_TEMP_OBJ_REFERENCE",
             parameters = Map(
               "obj" -> "VIEW",
               "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`jtv1`",
@@ -115,7 +115,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("CREATE OR REPLACE VIEW tab1 AS SELECT * FROM jt")
         },
-        errorClass = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE",
+        condition = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE",
         parameters = Map(
           "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`",
           "operation" -> "CREATE OR REPLACE VIEW")
@@ -124,7 +124,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("CREATE VIEW tab1 AS SELECT * FROM jt")
         },
-        errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map(
           "relationName" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")
       )
@@ -132,7 +132,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW tab1 AS SELECT * FROM jt")
         },
-        errorClass = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE",
+        condition = "EXPECT_VIEW_NOT_TABLE.NO_ALTERNATIVE",
         parameters = Map(
           "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`",
           "operation" -> "ALTER VIEW ... AS"
@@ -161,7 +161,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER VIEW $viewName SET TBLPROPERTIES ('p' = 'an')")
         },
-        errorClass = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
+        condition = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER VIEW ... SET TBLPROPERTIES"
@@ -176,7 +176,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER VIEW $viewName UNSET TBLPROPERTIES ('p')")
         },
-        errorClass = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
+        condition = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER VIEW ... UNSET TBLPROPERTIES"
@@ -198,7 +198,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName SET SERDE 'whatever'")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+        condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"
@@ -209,7 +209,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName PARTITION (a=1, b=2) SET SERDE 'whatever'")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+        condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"
@@ -220,7 +220,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName SET SERDEPROPERTIES ('p' = 'an')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+        condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"
@@ -231,7 +231,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName PARTITION (a='4') RENAME TO PARTITION (a='5')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... RENAME TO PARTITION"
@@ -242,7 +242,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName RECOVER PARTITIONS")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... RECOVER PARTITIONS"
@@ -253,7 +253,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName SET LOCATION '/path/to/your/lovely/heart'")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET LOCATION ..."
@@ -264,7 +264,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName PARTITION (a='4') SET LOCATION '/path/to/home'")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET LOCATION ..."
@@ -275,7 +275,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... ADD PARTITION ..."
@@ -286,7 +286,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName DROP PARTITION (a='4', b='8')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... DROP PARTITION ..."
@@ -297,7 +297,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName SET TBLPROPERTIES ('p' = 'an')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+        condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... SET TBLPROPERTIES"
@@ -308,7 +308,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $viewName UNSET TBLPROPERTIES ('p')")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+        condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ALTER TABLE ... UNSET TBLPROPERTIES"
@@ -327,7 +327,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO TABLE $viewName SELECT 1")
         },
-        errorClass = "UNSUPPORTED_INSERT.RDD_BASED",
+        condition = "UNSUPPORTED_INSERT.RDD_BASED",
         parameters = Map.empty
       )
 
@@ -338,7 +338,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "LOAD DATA"
@@ -353,7 +353,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"SHOW CREATE TABLE $viewName")
         },
-        errorClass = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
+        condition = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "SHOW CREATE TABLE"
@@ -368,7 +368,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS")
         },
-        errorClass = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
+        condition = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
         parameters = Map(
           "viewName" -> s"`$viewName`",
           "operation" -> "ANALYZE TABLE"
@@ -383,18 +383,19 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $viewName COMPUTE STATISTICS FOR COLUMNS id")
         },
-        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
+        condition = "UNSUPPORTED_FEATURE.ANALYZE_UNCACHED_TEMP_VIEW",
         parameters = Map("viewName" -> s"`$viewName`")
       )
     }
   }
 
-  private def assertAnalysisErrorClass(query: String,
-      errorClass: String,
+  private def assertAnalysisErrorCondition(
+      query: String,
+      condition: String,
       parameters: Map[String, String],
       context: ExpectedContext): Unit = {
     val e = intercept[AnalysisException](sql(query))
-    checkError(e, errorClass = errorClass, parameters = parameters, context = context)
+    checkError(e, condition = condition, parameters = parameters, context = context)
   }
 
   test("error handling: insert/load table commands against a view") {
@@ -405,7 +406,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO TABLE $viewName SELECT 1")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`testview`",
           "operation" -> "INSERT"
@@ -420,7 +421,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`testview`",
           "operation" -> "LOAD DATA"),
@@ -488,7 +489,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
 
   test("error handling: fail if the temp view sql itself is invalid") {
     // A database that does not exist
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       "CREATE OR REPLACE TEMPORARY VIEW myabcdview AS SELECT * FROM db_not_exist234.jt",
       "TABLE_OR_VIEW_NOT_FOUND",
       Map("relationName" -> "`db_not_exist234`.`jt`"),
@@ -513,7 +514,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[ParseException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        condition = "_LEGACY_ERROR_TEMP_0035",
         parameters = Map("message" -> "TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"),
         context = ExpectedContext(sqlText, 0, 77))
     }
@@ -867,7 +868,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql("CREATE VIEW testView2(x, y, z) AS SELECT * FROM tab1")
           },
-          errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`testView2`",
             "viewColumns" -> "`x`, `y`, `z`",
@@ -884,7 +885,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           .write.mode(SaveMode.Overwrite).saveAsTable("tab1")
         checkError(
           exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
-          errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+          condition = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`testview`",
             "actualCols" -> "[]",
@@ -914,7 +915,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           df2.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
           checkError(
             exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
-            errorClass = "CANNOT_UP_CAST_DATATYPE",
+            condition = "CANNOT_UP_CAST_DATATYPE",
             parameters = Map(
               "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id",
               "sourceType" -> "\"DOUBLE\"",
@@ -930,7 +931,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           df3.write.format("json").mode(SaveMode.Overwrite).saveAsTable("tab1")
           checkError(
             exception = intercept[AnalysisException](sql("SELECT * FROM testView")),
-            errorClass = "CANNOT_UP_CAST_DATATYPE",
+            condition = "CANNOT_UP_CAST_DATATYPE",
             parameters = Map(
               "expression" -> s"$SESSION_CATALOG_NAME.default.tab1.id1",
               "sourceType" -> "\"ARRAY<INT>\"",
@@ -956,7 +957,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW view1 AS SELECT * FROM view2")
         },
-        errorClass = "RECURSIVE_VIEW",
+        condition = "RECURSIVE_VIEW",
         parameters = Map(
           "viewIdent" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
           "newPath" -> (s"`$SESSION_CATALOG_NAME`.`default`.`view1` -> " +
@@ -970,7 +971,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW view1 AS SELECT * FROM view3 JOIN view2")
         },
-        errorClass = "RECURSIVE_VIEW",
+        condition = "RECURSIVE_VIEW",
         parameters = Map(
           "viewIdent" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
           "newPath" -> (s"`$SESSION_CATALOG_NAME`.`default`.`view1` -> " +
@@ -985,7 +986,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("CREATE OR REPLACE VIEW view1 AS SELECT * FROM view2")
         },
-        errorClass = "RECURSIVE_VIEW",
+        condition = "RECURSIVE_VIEW",
         parameters = Map(
           "viewIdent" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
           "newPath" -> (s"`$SESSION_CATALOG_NAME`.`default`.`view1` -> " +
@@ -999,7 +1000,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW view1 AS SELECT * FROM jt WHERE EXISTS (SELECT 1 FROM view2)")
         },
-        errorClass = "RECURSIVE_VIEW",
+        condition = "RECURSIVE_VIEW",
         parameters = Map(
           "viewIdent" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
           "newPath" -> (s"`$SESSION_CATALOG_NAME`.`default`.`view1` -> " +
@@ -1071,7 +1072,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             exception = intercept[SparkException] {
               sql("SELECT * FROM v1").collect()
             },
-            errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+            condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
             parameters = Map("path" -> ".*")
           )
         }
@@ -1091,7 +1092,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
             exception = intercept[SparkException] {
               sql("SELECT * FROM v1").collect()
             },
-            errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+            condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
             parameters = Map("path" -> ".*")
           )
         }
@@ -1157,7 +1158,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
               sql("SELECT * FROM v1")
             }
             checkError(e,
-              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
               sqlState = None,
               parameters = Map(
                 "objectName" -> "`C1`",
@@ -1178,7 +1179,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
               sql("SELECT * FROM v3")
             }
             checkError(e,
-              errorClass = "MISSING_AGGREGATION",
+              condition = "MISSING_AGGREGATION",
               parameters = Map(
                 "expression" -> "\"c1\"",
                 "expressionAnyValue" -> "\"any_value(c1)\""))
@@ -1188,7 +1189,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
               sql("SELECT * FROM v4")
             }
             checkError(e,
-              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
               sqlState = None,
               parameters = Map(
                 "objectName" -> "`a`",
@@ -1206,7 +1207,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
               exception = intercept[SparkArithmeticException] {
                 sql("SELECT * FROM v5").collect()
               },
-              errorClass = "DIVIDE_BY_ZERO",
+              condition = "DIVIDE_BY_ZERO",
               parameters = Map("config" -> "\"spark.sql.ansi.enabled\""),
               context = ExpectedContext(
                 objectType = "VIEW",
@@ -1225,7 +1226,7 @@ abstract class SQLViewSuite extends QueryTest with SQLTestUtils {
           exception = intercept[SparkArithmeticException] {
             sql("SELECT * FROM v1").collect()
           },
-          errorClass = "DIVIDE_BY_ZERO",
+          condition = "DIVIDE_BY_ZERO",
           parameters = Map("config" -> "\"spark.sql.ansi.enabled\""),
           context = ExpectedContext(
             objectType = "VIEW",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
index aa6295fa8815f..0faace9227dd1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SQLViewTestSuite.scala
@@ -182,7 +182,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             createView("v1", s"SELECT * FROM $viewName2", replace = true)
           },
-          errorClass = "RECURSIVE_VIEW",
+          condition = "RECURSIVE_VIEW",
           parameters = Map(
             "viewIdent" -> tableIdentifier("v1").quotedString,
             "newPath" -> (s"${tableIdentifier("v1").quotedString} " +
@@ -203,7 +203,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(s"ALTER VIEW $viewName1 AS SELECT * FROM $viewName2")
           },
-          errorClass = "RECURSIVE_VIEW",
+          condition = "RECURSIVE_VIEW",
           parameters = Map(
             "viewIdent" -> tableIdentifier("v1").quotedString,
             "newPath" -> (s"${tableIdentifier("v1").quotedString} " +
@@ -227,7 +227,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(s"SELECT * FROM ${viewNames.last}")
           },
-          errorClass = "VIEW_EXCEED_MAX_NESTED_DEPTH",
+          condition = "VIEW_EXCEED_MAX_NESTED_DEPTH",
           parameters = Map(
             "viewName" -> tableIdentifier("view0").quotedString,
             "maxNestedDepth" -> "10"),
@@ -363,7 +363,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
         sql("CREATE TABLE t(s STRUCT<j: INT>) USING json")
         checkError(
           exception = intercept[AnalysisException](spark.table(viewName)),
-          errorClass = "FIELD_NOT_FOUND",
+          condition = "FIELD_NOT_FOUND",
           parameters = Map("fieldName" -> "`i`", "fields" -> "`j`"),
           context = ExpectedContext(
             fragment = "s.i",
@@ -399,7 +399,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
             } else {
               checkErrorMatchPVals(
                 exception = intercept[AnalysisException](spark.table(viewName).collect()),
-                errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+                condition = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
                 parameters = Map(
                   "viewName" -> ".*test[v|V]iew.*",
                   "actualCols" -> "\\[COL,col,col\\]",
@@ -436,7 +436,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException](
           sql(s"SELECT * FROM $viewName VERSION AS OF 1").collect()
         ),
-        errorClass = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
+        condition = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
         parameters = Map("relationId" -> ".*test[v|V]iew.*")
       )
 
@@ -444,7 +444,7 @@ abstract class SQLViewTestSuite extends QueryTest with SQLTestUtils {
         exception = intercept[AnalysisException](
           sql(s"SELECT * FROM $viewName TIMESTAMP AS OF '2000-10-10'").collect()
         ),
-        errorClass = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
+        condition = "UNSUPPORTED_FEATURE.TIME_TRAVEL",
         parameters = Map("relationId" -> ".*test[v|V]iew.*")
       )
     }
@@ -489,7 +489,7 @@ abstract class TempViewTestSuite extends SQLViewTestSuite {
         exception = intercept[AnalysisException] {
           sql(s"SHOW CREATE TABLE ${formattedViewName(viewName)}")
         },
-        errorClass = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
+        condition = "EXPECT_PERMANENT_VIEW_NOT_TEMP",
         parameters = Map(
           "viewName" -> toSQLId(tableIdentifier(viewName).nameParts),
           "operation" -> "SHOW CREATE TABLE"),
@@ -577,7 +577,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("CREATE VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)")
         },
-        errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
+        condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
         parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"")
       )
       sql("CREATE VIEW v AS SELECT count(*) AS cnt FROM VALUES (1), (2), (3) t(a)")
@@ -591,7 +591,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b FROM VALUES (1, 2) t(a, b))")
         },
-        errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
+        condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
         parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"(a + b)\"")
       )
       sql("CREATE VIEW v AS SELECT * FROM (SELECT a + b AS col FROM VALUES (1, 2) t(a, b))")
@@ -606,7 +606,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("ALTER VIEW v AS SELECT count(*) FROM VALUES (1), (2), (3) t(a)")
         },
-        errorClass = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
+        condition = "CREATE_PERMANENT_VIEW_WITHOUT_ALIAS",
         parameters = Map("name" -> tableIdentifier("v").quotedString, "attr" -> "\"count(1)\"")
       )
     }
@@ -639,7 +639,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         val unquotedViewName = tableIdentifier("test_view").unquotedString
         checkError(
           exception = e,
-          errorClass = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
+          condition = "INCOMPATIBLE_VIEW_SCHEMA_CHANGE",
           parameters = Map(
             "viewName" -> tableIdentifier("test_view").quotedString,
             "suggestion" -> s"CREATE OR REPLACE VIEW $unquotedViewName AS SELECT * FROM t",
@@ -665,7 +665,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
             exception = intercept[AnalysisException] {
               sql("ALTER VIEW v1 AS SELECT * FROM v2")
             },
-            errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+            condition = "INVALID_TEMP_OBJ_REFERENCE",
             parameters = Map(
               "obj" -> "VIEW",
               "objName" -> tableIdentifier("v1").quotedString,
@@ -679,7 +679,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
               exception = intercept[AnalysisException] {
                 sql(s"ALTER VIEW v1 AS SELECT $tempFunctionName(id) from t")
               },
-              errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+              condition = "INVALID_TEMP_OBJ_REFERENCE",
               parameters = Map(
                 "obj" -> "VIEW",
                 "objName" -> tableIdentifier("v1").quotedString,
@@ -724,7 +724,7 @@ class PersistedViewTestSuite extends SQLViewTestSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM v")
         },
-        errorClass = "PARSE_SYNTAX_ERROR",
+        condition = "PARSE_SYNTAX_ERROR",
         parameters = Map("error" -> "'DROP'", "hint" -> ""),
         context = ExpectedContext(
           objectType = "VIEW",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
index 966f4e747122a..aed11badb7100 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlanSuite.scala
@@ -92,7 +92,7 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-30780 empty LocalTableScan should use RDD without partitions") {
-    assert(LocalTableScanExec(Nil, Nil).execute().getNumPartitions == 0)
+    assert(LocalTableScanExec(Nil, Nil, None).execute().getNumPartitions == 0)
   }
 
   test("SPARK-33617: change default parallelism of LocalTableScan") {
@@ -112,18 +112,18 @@ class SparkPlanSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkException] {
         planner.plan(deduplicate)
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map(
         "message" -> ("Deduplicate operator for non streaming data source should have been " +
           "replaced by aggregate in the optimizer")))
   }
 
   test("SPARK-37221: The collect-like API in SparkPlan should support columnar output") {
-    val emptyResults = ColumnarOp(LocalTableScanExec(Nil, Nil)).toRowBased.executeCollect()
+    val emptyResults = ColumnarOp(LocalTableScanExec(Nil, Nil, None)).toRowBased.executeCollect()
     assert(emptyResults.isEmpty)
 
     val relation = LocalTableScanExec(
-      Seq(AttributeReference("val", IntegerType)()), Seq(InternalRow(1)))
+      Seq(AttributeReference("val", IntegerType)()), Seq(InternalRow(1)), None)
     val nonEmpty = ColumnarOp(relation).toRowBased.executeCollect()
     assert(nonEmpty === relation.executeCollect())
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
index b4cb7e3fce3cf..d5c8cabe5003c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkPlannerSuite.scala
@@ -40,9 +40,9 @@ class SparkPlannerSuite extends SharedSparkSession {
         case u: Union =>
           planned += 1
           UnionExec(u.children.map(planLater)) :: planLater(NeverPlanned) :: Nil
-        case LocalRelation(output, data, _) =>
+        case LocalRelation(output, data, _, stream) =>
           planned += 1
-          LocalTableScanExec(output, data) :: planLater(NeverPlanned) :: Nil
+          LocalTableScanExec(output, data, stream) :: planLater(NeverPlanned) :: Nil
         case NeverPlanned =>
           fail("QueryPlanner should not go down to this branch.")
         case _ => Nil
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
index 52378f7370930..dbb8e9697089e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkScriptTransformationSuite.scala
@@ -54,7 +54,7 @@ class SparkScriptTransformationSuite extends BaseScriptTransformationSuite with
           |FROM v""".stripMargin
       checkError(
         exception = intercept[ParseException](sql(sqlText)),
-        errorClass = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
+        condition = "UNSUPPORTED_FEATURE.TRANSFORM_NON_HIVE",
         parameters = Map.empty,
         context = ExpectedContext(sqlText, 0, 185))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
index f60df77b7e9bd..36a003883a771 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SparkSqlParserSuite.scala
@@ -26,10 +26,11 @@ import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAlias, Un
 import org.apache.spark.sql.catalyst.expressions.{Ascending, AttributeReference, Concat, GreaterThan, Literal, NullsFirst, SortOrder, UnresolvedWindowExpression, UnspecifiedFrame, WindowSpecDefinition, WindowSpecReference}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical._
+import org.apache.spark.sql.catalyst.trees.TreePattern._
 import org.apache.spark.sql.connector.catalog.TableCatalog
 import org.apache.spark.sql.execution.command._
 import org.apache.spark.sql.execution.datasources.{CreateTempViewUsing, RefreshResource}
-import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.sql.internal.{SQLConf, StaticSQLConf}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.StringType
 import org.apache.spark.util.ArrayImplicits._
@@ -83,12 +84,12 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
 
     checkError(
       exception = parseException("SET k=`v` /*"),
-      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      condition = "UNCLOSED_BRACKETED_COMMENT",
       parameters = Map.empty)
 
     checkError(
       exception = parseException("SET `k`=`v` /*"),
-      errorClass = "UNCLOSED_BRACKETED_COMMENT",
+      condition = "UNCLOSED_BRACKETED_COMMENT",
       parameters = Map.empty)
   }
 
@@ -120,7 +121,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "SET spark.sql.key value"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql1,
@@ -130,7 +131,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "SET spark.sql.key   'value'"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql2,
@@ -140,7 +141,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "SET    spark.sql.key \"value\" "
     checkError(
       exception = parseException(sql3),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "SET    spark.sql.key \"value\"",
@@ -150,7 +151,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "SET spark.sql.key value1 value2"
     checkError(
       exception = parseException(sql4),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql4,
@@ -160,7 +161,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql5 = "SET spark.   sql.key=value"
     checkError(
       exception = parseException(sql5),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql5,
@@ -170,7 +171,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql6 = "SET spark   :sql:key=value"
     checkError(
       exception = parseException(sql6),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql6,
@@ -180,7 +181,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql7 = "SET spark .  sql.key=value"
     checkError(
       exception = parseException(sql7),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql7,
@@ -190,7 +191,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql8 = "SET spark.sql.   key=value"
     checkError(
       exception = parseException(sql8),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql8,
@@ -200,7 +201,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql9 = "SET spark.sql   :key=value"
     checkError(
       exception = parseException(sql9),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql9,
@@ -210,7 +211,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql10 = "SET spark.sql .  key=value"
     checkError(
       exception = parseException(sql10),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql10,
@@ -220,7 +221,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql11 = "SET ="
     checkError(
       exception = parseException(sql11),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql11,
@@ -230,7 +231,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql12 = "SET =value"
     checkError(
       exception = parseException(sql12),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql12,
@@ -251,7 +252,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "RESET spark.sql.key1 key2"
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql1,
@@ -261,7 +262,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "RESET spark.  sql.key1 key2"
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql2,
@@ -271,7 +272,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "RESET spark.sql.key1 key2 key3"
     checkError(
       exception = parseException(sql3),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql3,
@@ -281,7 +282,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "RESET spark:   sql:key"
     checkError(
       exception = parseException(sql4),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql4,
@@ -291,7 +292,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql5 = "RESET spark   .sql.key"
     checkError(
       exception = parseException(sql5),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql5,
@@ -301,7 +302,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql6 = "RESET spark :  sql:key"
     checkError(
       exception = parseException(sql6),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql6,
@@ -311,7 +312,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql7 = "RESET spark.sql:   key"
     checkError(
       exception = parseException(sql7),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql7,
@@ -321,7 +322,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql8 = "RESET spark.sql   .key"
     checkError(
       exception = parseException(sql8),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql8,
@@ -331,7 +332,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql9 = "RESET spark.sql :  key"
     checkError(
       exception = parseException(sql9),
-      errorClass = "_LEGACY_ERROR_TEMP_0043",
+      condition = "INVALID_RESET_COMMAND_FORMAT",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql9,
@@ -354,7 +355,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "SET a=1; SELECT 1"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql1,
@@ -364,7 +365,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "SET a=1;2;;"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_SET_SYNTAX",
+      condition = "INVALID_SET_SYNTAX",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "SET a=1;2",
@@ -374,7 +375,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "SET a b=`1;;`"
     checkError(
       exception = parseException(sql3),
-      errorClass = "INVALID_PROPERTY_KEY",
+      condition = "INVALID_PROPERTY_KEY",
       parameters = Map("key" -> "\"a b\"", "value" -> "\"1;;\""),
       context = ExpectedContext(
         fragment = sql3,
@@ -384,7 +385,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "SET `a`=1;2;;"
     checkError(
       exception = parseException(sql4),
-      errorClass = "INVALID_PROPERTY_VALUE",
+      condition = "INVALID_PROPERTY_VALUE",
       parameters = Map("value" -> "\"1;2;;\"", "key" -> "\"a\""),
       context = ExpectedContext(
         fragment = "SET `a`=1;2",
@@ -407,7 +408,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "REFRESH a b"
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql1,
@@ -417,7 +418,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "REFRESH a\tb"
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql2,
@@ -427,7 +428,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "REFRESH a\nb"
     checkError(
       exception = parseException(sql3),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql3,
@@ -437,7 +438,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "REFRESH a\rb"
     checkError(
       exception = parseException(sql4),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql4,
@@ -447,7 +448,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql5 = "REFRESH a\r\nb"
     checkError(
       exception = parseException(sql5),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql5,
@@ -457,7 +458,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql6 = "REFRESH @ $a$"
     checkError(
       exception = parseException(sql6),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg1),
       context = ExpectedContext(
         fragment = sql6,
@@ -468,7 +469,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql7 = "REFRESH  "
     checkError(
       exception = parseException(sql7),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg2),
       context = ExpectedContext(
         fragment = "REFRESH",
@@ -478,7 +479,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     val sql8 = "REFRESH"
     checkError(
       exception = parseException(sql8),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg2),
       context = ExpectedContext(
         fragment = sql8,
@@ -672,7 +673,9 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
                   UnresolvedFunction("max", Seq(UnresolvedAttribute("c")), isDistinct = false),
                   WindowSpecReference("w")), None)
             ),
-            UnresolvedRelation(TableIdentifier("testData")))),
+            UnresolvedRelation(TableIdentifier("testData"))),
+          forPipeSQL = false
+        ),
         ioSchema))
 
     assertEqual(
@@ -741,7 +744,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
          |FROM v""".stripMargin
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg),
       context = ExpectedContext(
         fragment = sql1,
@@ -763,7 +766,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
          |FROM v""".stripMargin
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" -> errMsg),
       context = ExpectedContext(
         fragment = sql2,
@@ -780,7 +783,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
       s"CREATE TABLE target LIKE source TBLPROPERTIES (${TableCatalog.PROP_OWNER}='howdy')"
     checkError(
       exception = parseException(sql1),
-      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
       parameters = Map("property" -> TableCatalog.PROP_OWNER,
         "msg" -> "it will be set to the current user"),
       context = ExpectedContext(
@@ -792,7 +795,7 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
       s"CREATE TABLE target LIKE source TBLPROPERTIES (${TableCatalog.PROP_PROVIDER}='howdy')"
     checkError(
       exception = parseException(sql2),
-      errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+      condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
       parameters = Map("property" -> TableCatalog.PROP_PROVIDER,
         "msg" -> "please use the USING clause to specify it"),
       context = ExpectedContext(
@@ -880,4 +883,108 @@ class SparkSqlParserSuite extends AnalysisTest with SharedSparkSession {
     parser.parsePlan("SELECT\u30001") // Unicode ideographic space
   }
   // scalastyle:on
+
+  test("Operator pipe SQL syntax") {
+    withSQLConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED.key -> "true") {
+      // Basic selection.
+      // Here we check that every parsed plan contains a projection and a source relation or
+      // inline table.
+      def check(query: String, patterns: Seq[TreePattern]): Unit = {
+        val plan: LogicalPlan = parser.parsePlan(query)
+        assert(patterns.exists(plan.containsPattern), s"Failed to parse $query, plan: $plan")
+        assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
+      }
+      def checkPipeSelect(query: String): Unit = check(query, Seq(PROJECT))
+      checkPipeSelect("TABLE t |> SELECT 1 AS X")
+      checkPipeSelect("TABLE t |> SELECT 1 AS X, 2 AS Y |> SELECT X + Y AS Z")
+      checkPipeSelect("VALUES (0), (1) tab(col) |> SELECT col * 2 AS result")
+      checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y")
+      checkPipeSelect("TABLE t |> EXTEND X + 1 AS Y, X + 2 Z")
+      checkPipeSelect("TABLE t |> EXTEND 1 AS z, 2 AS Z |> SET z = 1, Z = 2")
+      // Basic WHERE operators.
+      def checkPipeWhere(query: String): Unit = check(query, Seq(FILTER))
+      checkPipeWhere("TABLE t |> WHERE X = 1")
+      checkPipeWhere("TABLE t |> SELECT X, LENGTH(Y) AS Z |> WHERE X + LENGTH(Y) < 4")
+      checkPipeWhere("TABLE t |> WHERE X = 1 AND Y = 2 |> WHERE X + Y = 3")
+      checkPipeWhere("VALUES (0), (1) tab(col) |> WHERE col < 1")
+      // PIVOT and UNPIVOT operations
+      def checkPivotUnpivot(query: String): Unit = check(query, Seq(PIVOT, UNPIVOT))
+      checkPivotUnpivot(
+        """
+          |SELECT * FROM VALUES
+          |  ("dotNET", 2012, 10000),
+          |  ("Java", 2012, 20000),
+          |  ("dotNET", 2012, 5000),
+          |  ("dotNET", 2013, 48000),
+          |  ("Java", 2013, 30000)
+          |  AS courseSales(course, year, earnings)
+          ||> PIVOT (
+          |  SUM(earnings)
+          |  FOR course IN ('dotNET', 'Java')
+          |)
+          |""".stripMargin)
+      checkPivotUnpivot(
+        """
+          |SELECT * FROM VALUES
+          |  ("dotNET", 15000, 48000, 22500),
+          |  ("Java", 20000, 30000, NULL)
+          |  AS courseEarnings(course, `2012`, `2013`, `2014`)
+          ||> UNPIVOT (
+          |  earningsYear FOR year IN (`2012`, `2013`, `2014`)
+          |)
+          |""".stripMargin)
+      // Sampling operations
+      def checkSample(query: String): Unit = {
+        val plan: LogicalPlan = parser.parsePlan(query)
+        assert(plan.collectFirst(_.isInstanceOf[Sample]).nonEmpty)
+        assert(plan.containsAnyPattern(UNRESOLVED_RELATION, LOCAL_RELATION))
+      }
+      checkSample("TABLE t |> TABLESAMPLE (50 PERCENT)")
+      checkSample("TABLE t |> TABLESAMPLE (5 ROWS)")
+      checkSample("TABLE t |> TABLESAMPLE (BUCKET 4 OUT OF 10)")
+      // Joins.
+      def checkPipeJoin(query: String): Unit = check(query, Seq(JOIN))
+      Seq("", "INNER", "LEFT", "LEFT OUTER", "SEMI", "LEFT SEMI", "RIGHT", "RIGHT OUTER", "FULL",
+        "FULL OUTER", "ANTI", "LEFT ANTI", "CROSS").foreach { joinType =>
+        checkPipeJoin(s"TABLE t |> $joinType JOIN other ON (t.x = other.x)")
+      }
+      // Set operations
+      def checkDistinct(query: String): Unit = check(query, Seq(DISTINCT_LIKE))
+      def checkExcept(query: String): Unit = check(query, Seq(EXCEPT))
+      def checkIntersect(query: String): Unit = check(query, Seq(INTERSECT))
+      def checkUnion(query: String): Unit = check(query, Seq(UNION))
+      checkDistinct("TABLE t |> UNION DISTINCT TABLE t")
+      checkExcept("TABLE t |> EXCEPT ALL TABLE t")
+      checkExcept("TABLE t |> EXCEPT DISTINCT TABLE t")
+      checkExcept("TABLE t |> MINUS ALL TABLE t")
+      checkExcept("TABLE t |> MINUS DISTINCT TABLE t")
+      checkIntersect("TABLE t |> INTERSECT ALL TABLE t")
+      checkUnion("TABLE t |> UNION ALL TABLE t")
+      // Sorting and distributing operators.
+      def checkSort(query: String): Unit = check(query, Seq(SORT))
+      def checkRepartition(query: String): Unit = check(query, Seq(REPARTITION_OPERATION))
+      def checkLimit(query: String): Unit = check(query, Seq(LIMIT))
+      checkSort("TABLE t |> ORDER BY x")
+      checkSort("TABLE t |> SELECT x |> SORT BY x")
+      checkLimit("TABLE t |> LIMIT 1")
+      checkLimit("TABLE t |> LIMIT 2 OFFSET 1")
+      checkRepartition("TABLE t |> DISTRIBUTE BY x |> WHERE x = 1")
+      checkRepartition("TABLE t |> CLUSTER BY x |> TABLESAMPLE (100 PERCENT)")
+      checkRepartition("TABLE t |> SORT BY x DISTRIBUTE BY x")
+      // Aggregation
+      def checkAggregate(query: String): Unit = check(query, Seq(AGGREGATE))
+      checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a)")
+      checkAggregate("SELECT a, b FROM t |> AGGREGATE SUM(a) AS result GROUP BY b")
+      checkAggregate("SELECT a, b FROM t |> AGGREGATE GROUP BY b")
+      checkAggregate("SELECT a, b FROM t |> AGGREGATE COUNT(*) AS result GROUP BY b")
+      // Window
+      def checkWindow(query: String): Unit = check(query, Seq(WITH_WINDOW_DEFINITION))
+      checkWindow(
+        """
+          |TABLE windowTestData
+          ||> SELECT cate, SUM(val) OVER w
+          |   WINDOW w AS (PARTITION BY cate ORDER BY val)
+          |""".stripMargin)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
index e2ff7dc1c9aec..cf6553012ad86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/SubExprEliminationBenchmark.scala
@@ -17,8 +17,6 @@
 package org.apache.spark.sql.execution
 
 import org.apache.spark.benchmark.Benchmark
-import org.apache.spark.sql.Column
-import org.apache.spark.sql.catalyst.expressions.{Expression, Literal, Or}
 import org.apache.spark.sql.execution.benchmark.SqlBasedBenchmark
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
@@ -87,9 +85,11 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
       val numCols = 500
       val schema = writeWideRow(path.getAbsolutePath, rowsNum, numCols)
 
-      val predicate = (0 until numCols).map { idx =>
-        (from_json($"value", schema).getField(s"col$idx") >= Literal(100000)).expr
-      }.asInstanceOf[Seq[Expression]].reduce(Or)
+      val jsonValue = from_json($"value", schema)
+      val predicates = (0 until numCols).map { idx =>
+        jsonValue.getField(s"col$idx") >= lit(100000)
+      }
+      val predicate = predicates.reduce(_ || _)
 
       Seq(
         ("false", "true", "CODEGEN_ONLY"),
@@ -108,7 +108,7 @@ object SubExprEliminationBenchmark extends SqlBasedBenchmark {
             SQLConf.JSON_EXPRESSION_OPTIMIZATION.key -> "false") {
             val df = spark.read
               .text(path.getAbsolutePath)
-              .where(Column(predicate))
+              .where(predicate)
             df.write.mode("overwrite").format("noop").save()
           }
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index 4d2d465828924..a3cfdc5a240a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -878,7 +878,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
             exception = intercept[SparkException] {
               sql(query).collect()
             },
-            errorClass = "INTERNAL_ERROR",
+            condition = "INTERNAL_ERROR",
             parameters = Map("message" -> expectedErrMsg),
             matchPVals = true)
         }
@@ -903,7 +903,7 @@ class WholeStageCodegenSuite extends QueryTest with SharedSparkSession
             exception = intercept[SparkException] {
               sql(query).collect()
             },
-            errorClass = "INTERNAL_ERROR",
+            condition = "INTERNAL_ERROR",
             parameters = Map("message" -> expectedErrMsg),
             matchPVals = true)
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
index 93df399731d42..ad28fd5176d99 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/adaptive/AdaptiveQueryExecSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.expressions.Attribute
 import org.apache.spark.sql.catalyst.optimizer.{BuildLeft, BuildRight}
 import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, LogicalPlan}
-import org.apache.spark.sql.execution.{CollectLimitExec, ColumnarToRowExec, EmptyRelationExec, PartialReducerPartitionSpec, QueryExecution, ReusedSubqueryExec, ShuffledRowRDD, SortExec, SparkPlan, SparkPlanInfo, UnaryExecNode, UnionExec}
+import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.aggregate.BaseAggregateExec
 import org.apache.spark.sql.execution.columnar.{InMemoryTableScanExec, InMemoryTableScanLike}
 import org.apache.spark.sql.execution.command.DataWritingCommandExec
@@ -43,6 +43,7 @@ import org.apache.spark.sql.execution.exchange.{BroadcastExchangeExec, ENSURE_RE
 import org.apache.spark.sql.execution.joins.{BaseJoinExec, BroadcastHashJoinExec, BroadcastNestedLoopJoinExec, ShuffledHashJoinExec, ShuffledJoin, SortMergeJoinExec}
 import org.apache.spark.sql.execution.metric.SQLShuffleReadMetricsReporter
 import org.apache.spark.sql.execution.ui.{SparkListenerSQLAdaptiveExecutionUpdate, SparkListenerSQLAdaptiveSQLMetricUpdates, SparkListenerSQLExecutionStart}
+import org.apache.spark.sql.execution.window.WindowExec
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.PartitionOverwriteMode
@@ -904,91 +905,56 @@ class AdaptiveQueryExecSuite
         val error = intercept[SparkException] {
           aggregated.count()
         }
-        assert(error.getErrorClass === "INVALID_BUCKET_FILE")
+        assert(error.getCondition === "INVALID_BUCKET_FILE")
         assert(error.getMessage contains "Invalid bucket file")
       }
     }
   }
 
-  test("SPARK-47148: AQE should avoid to materialize ShuffleQueryStage on the cancellation") {
+  test("SPARK-47148: AQE should avoid to submit shuffle job on cancellation") {
     def createJoinedDF(): DataFrame = {
-      val df = spark.range(5).toDF("col")
-      val df2 = spark.range(10).toDF("col").coalesce(2)
-      val df3 = spark.range(15).toDF("col").filter(Symbol("col") >= 2)
-      df.join(df2, Seq("col")).join(df3, Seq("col"))
+      // Use subquery expression containing `slow_udf` to delay the submission of shuffle jobs.
+      val df = sql("SELECT id, (SELECT slow_udf() FROM range(2)) FROM range(5)")
+      val df2 = sql("SELECT id FROM range(10)").coalesce(2)
+      val df3 = sql("SELECT id, (SELECT slow_udf() FROM range(2)) FROM range(15) WHERE id > 2")
+      df.join(df2, Seq("id")).join(df3, Seq("id"))
     }
 
-    try {
-      spark.experimental.extraStrategies = TestProblematicCoalesceStrategy :: Nil
-      withSQLConf(
-        SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
-        SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
-        val joinedDF = createJoinedDF()
-
-        val error = intercept[SparkException] {
-          joinedDF.collect()
-        }
-        assert(error.getMessage() contains "ProblematicCoalesce execution is failed")
-
-        val adaptivePlan = joinedDF.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
-
-        // All QueryStages should be based on ShuffleQueryStageExec
-        val shuffleQueryStageExecs = collect(adaptivePlan) {
-          case sqse: ShuffleQueryStageExec => sqse
-        }
-        assert(shuffleQueryStageExecs.length == 3, s"Physical Plan should include " +
-          s"3 ShuffleQueryStages. Physical Plan: $adaptivePlan")
-        shuffleQueryStageExecs.foreach(sqse => assert(sqse.name.contains("ShuffleQueryStageExec-")))
-        // First ShuffleQueryStage is materialized so it needs to be canceled.
-        assert(shuffleQueryStageExecs(0).shuffle.isMaterializationStarted(),
-          "Materialization should be started.")
-        // Second ShuffleQueryStage materialization is failed so
-        // it is excluded from the cancellation due to earlyFailedStage.
-        assert(shuffleQueryStageExecs(1).shuffle.isMaterializationStarted(),
-          "Materialization should be started but it is failed.")
-        // Last ShuffleQueryStage is not materialized yet so it does not require
-        // to be canceled and it is just skipped from the cancellation.
-        assert(!shuffleQueryStageExecs(2).shuffle.isMaterializationStarted(),
-          "Materialization should not be started.")
-      }
-    } finally {
-      spark.experimental.extraStrategies = Nil
-    }
-  }
+    withUserDefinedFunction("slow_udf" -> true) {
+      spark.udf.register("slow_udf", () => {
+        Thread.sleep(3000)
+        1
+      })
 
-  test("SPARK-47148: Check if BroadcastQueryStage materialization is started") {
-    def createJoinedDF(): DataFrame = {
-      spark.range(10).toDF("col1").createTempView("t1")
-      spark.range(5).coalesce(2).toDF("col2").createTempView("t2")
-      spark.range(15).toDF("col3").filter(Symbol("col3") >= 2).createTempView("t3")
-      sql("SELECT /*+ BROADCAST(t3) */ * FROM (SELECT /*+ BROADCAST(t2) */ * FROM t1 " +
-        "INNER JOIN t2 ON t1.col1 = t2.col2) t JOIN t3 ON t.col1 = t3.col3;")
-    }
-    withTempView("t1", "t2", "t3") {
       try {
         spark.experimental.extraStrategies = TestProblematicCoalesceStrategy :: Nil
-        withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true") {
-          val joinedDF = createJoinedDF()
+        withSQLConf(
+          SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
+          SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
+          val joined = createJoinedDF()
+          joined.explain(true)
 
           val error = intercept[SparkException] {
-            joinedDF.collect()
+            joined.collect()
           }
-          assert(error.getMessage() contains "ProblematicCoalesce execution is failed")
+          assert((Seq(error) ++ Option(error.getCause) ++ error.getSuppressed()).exists(
+            e => e.getMessage() != null && e.getMessage().contains("coalesce test error")))
 
-          val adaptivePlan =
-            joinedDF.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
+          val adaptivePlan = joined.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
 
-          // All QueryStages should be based on BroadcastQueryStageExec
-          val broadcastQueryStageExecs = collect(adaptivePlan) {
-            case bqse: BroadcastQueryStageExec => bqse
-          }
-          assert(broadcastQueryStageExecs.length == 2, adaptivePlan)
-          broadcastQueryStageExecs.foreach { bqse =>
-            assert(bqse.name.contains("BroadcastQueryStageExec-"))
-            // Both BroadcastQueryStages are materialized at the beginning.
-            assert(bqse.broadcast.isMaterializationStarted(),
-              s"${bqse.name}' s materialization should be started.")
+          // All QueryStages should be based on ShuffleQueryStageExec
+          val shuffleQueryStageExecs = collect(adaptivePlan) {
+            case sqse: ShuffleQueryStageExec => sqse
           }
+          assert(shuffleQueryStageExecs.length == 3, s"Physical Plan should include " +
+            s"3 ShuffleQueryStages. Physical Plan: $adaptivePlan")
+          // First ShuffleQueryStage is cancelled before shuffle job is submitted.
+          assert(shuffleQueryStageExecs(0).shuffle.futureAction.get.isEmpty)
+          // Second ShuffleQueryStage has submitted the shuffle job but it failed.
+          assert(shuffleQueryStageExecs(1).shuffle.futureAction.get.isDefined,
+            "Materialization should be started but it is failed.")
+          // Third ShuffleQueryStage is cancelled before shuffle job is submitted.
+          assert(shuffleQueryStageExecs(2).shuffle.futureAction.get.isEmpty)
         }
       } finally {
         spark.experimental.extraStrategies = Nil
@@ -1118,7 +1084,7 @@ class AdaptiveQueryExecSuite
           val doExecute = PrivateMethod[Unit](Symbol("doExecute"))
           c.invokePrivate(doExecute())
         },
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map("message" -> "operating on canonicalized plan"))
     }
   }
@@ -1644,6 +1610,43 @@ class AdaptiveQueryExecSuite
     }
   }
 
+  test("SPARK-49460: NPE in EmptyRelationExec.cleanupResources") {
+    withTable("t1left", "t1right", "t1empty") {
+      spark.sql("create table t1left (a int, b int);")
+      spark.sql("insert into t1left values (1, 1), (2,2), (3,3);")
+      spark.sql("create table t1right (a int, b int);")
+      spark.sql("create table t1empty (a int, b int);")
+      spark.sql("insert into t1right values (2,20), (4, 40);")
+
+      spark.sql("""
+                  |with leftT as (
+                  |  with erp as (
+                  |    select
+                  |      *
+                  |    from
+                  |      t1left
+                  |      join t1empty on t1left.a = t1empty.a
+                  |      join t1right on t1left.a = t1right.a
+                  |  )
+                  |  SELECT
+                  |    CASE
+                  |      WHEN COUNT(*) = 0 THEN 4
+                  |      ELSE NULL
+                  |    END AS a
+                  |  FROM
+                  |    erp
+                  |  HAVING
+                  |    COUNT(*) = 0
+                  |)
+                  |select
+                  |  /*+ MERGEJOIN(t1right) */
+                  |  *
+                  |from
+                  |  leftT
+                  |  join t1right on leftT.a = t1right.a""".stripMargin).collect()
+    }
+  }
+
   test("SPARK-35585: Support propagate empty relation through project/filter") {
     withSQLConf(SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
       SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1") {
@@ -2828,6 +2831,38 @@ class AdaptiveQueryExecSuite
       assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1)
       assert(findTopLevelUnion(adaptivePlan).size == 0)
     }
+
+    withSQLConf(
+      SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "100") {
+      withTempView("t1", "t2", "t3", "t4") {
+        Seq(1).toDF().createOrReplaceTempView("t1")
+        spark.range(100).createOrReplaceTempView("t2")
+        spark.range(2).createOrReplaceTempView("t3")
+        spark.range(2).createOrReplaceTempView("t4")
+        val (_, adaptivePlan) = runAdaptiveAndVerifyResult(
+          """
+            |SELECT tt2.value
+            |FROM (
+            |  SELECT value
+            |  FROM t1
+            |  WHERE NOT EXISTS (
+            |      SELECT 1
+            |      FROM (
+            |        SELECT t2.id
+            |        FROM t2
+            |          JOIN t3 ON t2.id = t3.id
+            |        AND t2.id > 100
+            |      ) tt
+            |      WHERE t1.value = tt.id
+            |    )
+            |    AND t1.value = 1
+            |) tt2
+            |  LEFT JOIN t4 ON tt2.value = t4.id
+            |""".stripMargin
+        )
+        assert(findTopLevelBroadcastNestedLoopJoin(adaptivePlan).size == 1)
+      }
+    }
   }
 
   test("SPARK-39915: Dataset.repartition(N) may not create N partitions") {
@@ -3031,6 +3066,47 @@ class AdaptiveQueryExecSuite
       }
     }
   }
+
+  test("SPARK-49979: AQE hang forever when collecting twice on a failed AQE plan") {
+    val func: Long => Boolean = (i : Long) => {
+      throw new Exception("SPARK-49979")
+    }
+    withUserDefinedFunction("func" -> true) {
+      spark.udf.register("func", func)
+      val df1 = spark.range(1024).select($"id".as("key1"))
+      val df2 = spark.range(2048).select($"id".as("key2"))
+        .withColumn("group_key", $"key2" % 1024)
+      val df = df1.filter(expr("func(key1)")).hint("MERGE").join(df2, $"key1" === $"key2")
+        .groupBy($"group_key").agg("key1" -> "count")
+      intercept[Throwable] {
+        df.collect()
+      }
+      // second collect should not hang forever
+      intercept[Throwable] {
+        df.collect()
+      }
+    }
+  }
+
+  test("SPARK-50258: Fix output column order changed issue after AQE optimization") {
+    withTable("t") {
+      sql("SELECT course, year, earnings FROM courseSales").write.saveAsTable("t")
+      val df = sql(
+        """
+          |SELECT year, course, earnings, SUM(earnings) OVER (ORDER BY year, course) AS balance
+          |FROM t ORDER BY year, course
+          |LIMIT 100
+          |""".stripMargin)
+      df.collect()
+
+      val plan = df.queryExecution.executedPlan.asInstanceOf[AdaptiveSparkPlanExec]
+      assert(plan.inputPlan.isInstanceOf[TakeOrderedAndProjectExec])
+      assert(plan.finalPhysicalPlan.isInstanceOf[WindowExec])
+      plan.inputPlan.output.zip(plan.finalPhysicalPlan.output).foreach { case (o1, o2) =>
+        assert(o1.semanticEquals(o2), "Different output column order after AQE optimization")
+      }
+    }
+  }
 }
 
 /**
@@ -3057,8 +3133,11 @@ private case class SimpleShuffleSortCostEvaluator() extends CostEvaluator {
 private object TestProblematicCoalesceStrategy extends Strategy {
   private case class TestProblematicCoalesceExec(numPartitions: Int, child: SparkPlan)
     extends UnaryExecNode {
-    override protected def doExecute(): RDD[InternalRow] =
-      throw new SparkException("ProblematicCoalesce execution is failed")
+    override protected def doExecute(): RDD[InternalRow] = {
+      child.execute().mapPartitions { _ =>
+        throw new RuntimeException("coalesce test error")
+      }
+    }
     override def output: Seq[Attribute] = child.output
     override protected def withNewChildInternal(newChild: SparkPlan): TestProblematicCoalesceExec =
       copy(child = newChild)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
index eea2cb01ab46a..7965d5c3c17ae 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/aggregate/SortBasedAggregationStoreSuite.scala
@@ -88,7 +88,7 @@ class SortBasedAggregationStoreSuite  extends SparkFunSuite with LocalSparkConte
     }
 
     val iter = store.destructiveIterator()
-    while(iter.hasNext) {
+    while (iter.hasNext) {
       val agg = iter.next()
       assert(agg.aggregationBuffer.getInt(0) == expected(agg.groupingKey.getInt(0)))
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
index 275b35947182c..c90b1d3ca5978 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala
@@ -1217,8 +1217,8 @@ class ArrowConvertersSuite extends SharedSparkSession {
 
     val tempFile1 = new File(tempDataPath, "testData2-ints-part1.json")
     val tempFile2 = new File(tempDataPath, "testData2-ints-part2.json")
-    Files.write(json1, tempFile1, StandardCharsets.UTF_8)
-    Files.write(json2, tempFile2, StandardCharsets.UTF_8)
+    Files.asCharSink(tempFile1, StandardCharsets.UTF_8).write(json1)
+    Files.asCharSink(tempFile2, StandardCharsets.UTF_8).write(json2)
 
     validateConversion(schema, arrowBatches(0), tempFile1)
     validateConversion(schema, arrowBatches(1), tempFile2)
@@ -1501,7 +1501,7 @@ class ArrowConvertersSuite extends SharedSparkSession {
     // NOTE: coalesce to single partition because can only load 1 batch in validator
     val batchBytes = df.coalesce(1).toArrowBatchRdd.collect().head
     val tempFile = new File(tempDataPath, file)
-    Files.write(json, tempFile, StandardCharsets.UTF_8)
+    Files.asCharSink(tempFile, StandardCharsets.UTF_8).write(json)
     validateConversion(df.schema, batchBytes, tempFile, timeZoneId, errorOnDuplicatedFieldNames)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
index 523da0d606346..c34dbdcfcde60 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BloomFilterBenchmark.scala
@@ -98,8 +98,18 @@ object BloomFilterBenchmark extends SqlBasedBenchmark {
         benchmark.addCase("With bloom filter") { _ =>
           df.write.mode("overwrite")
             .option(ParquetOutputFormat.BLOOM_FILTER_ENABLED + "#value", true)
+            .option(ParquetOutputFormat.ADAPTIVE_BLOOM_FILTER_ENABLED + "#value", false)
             .parquet(path + "/withBF")
         }
+        Seq(3, 5, 9, 15).foreach { candidates =>
+          benchmark.addCase(s"With adaptive bloom filter & $candidates candidates ") { _ =>
+            df.write.mode("overwrite")
+              .option(ParquetOutputFormat.BLOOM_FILTER_ENABLED + "#value", true)
+              .option(ParquetOutputFormat.ADAPTIVE_BLOOM_FILTER_ENABLED + "#value", true)
+              .option(ParquetOutputFormat.BLOOM_FILTER_CANDIDATES_NUMBER + "#value", candidates)
+              .parquet(s"$path/withBF$candidates")
+          }
+        }
         benchmark.run()
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
index 86e9320ae9cde..37297f6aa94c0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/CollationBenchmark.scala
@@ -49,7 +49,7 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
   }
 
   def benchmarkUTFStringCompare(collationTypes: Seq[String], utf8Strings: Seq[UTF8String]): Unit = {
@@ -73,7 +73,7 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
   }
 
   def benchmarkUTFStringHashFunction(
@@ -99,7 +99,7 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
   }
 
   def benchmarkContains(
@@ -113,13 +113,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       warmupTime = 10.seconds,
       output = output)
     collationTypes.foreach { collationType => {
-      val collation = CollationFactory.fetchCollation(collationType)
+      val collationId = CollationFactory.collationNameToId(collationType)
       benchmark.addCase(s"$collationType") { _ =>
         sublistStrings.foreach { s1 =>
           utf8Strings.foreach { s =>
             (0 to 3).foreach { _ =>
               CollationSupport.Contains.exec(
-                s, s1, CollationFactory.collationNameToId(collation.collationName)
+                s, s1, collationId
               )
             }
           }
@@ -127,7 +127,7 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
   }
 
   def benchmarkStartsWith(
@@ -141,13 +141,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       warmupTime = 10.seconds,
       output = output)
     collationTypes.foreach { collationType => {
-      val collation = CollationFactory.fetchCollation(collationType)
+      val collationId = CollationFactory.collationNameToId(collationType)
       benchmark.addCase(s"$collationType") { _ =>
         sublistStrings.foreach { s1 =>
           utf8Strings.foreach { s =>
             (0 to 3).foreach { _ =>
               CollationSupport.StartsWith.exec(
-                s, s1, CollationFactory.collationNameToId(collation.collationName)
+                s, s1, collationId
               )
             }
           }
@@ -155,7 +155,7 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
   }
 
   def benchmarkEndsWith(
@@ -169,13 +169,13 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       warmupTime = 10.seconds,
       output = output)
     collationTypes.foreach { collationType => {
-      val collation = CollationFactory.fetchCollation(collationType)
+      val collationId = CollationFactory.collationNameToId(collationType)
       benchmark.addCase(s"$collationType") { _ =>
         sublistStrings.foreach { s1 =>
           utf8Strings.foreach { s =>
             (0 to 3).foreach { _ =>
               CollationSupport.EndsWith.exec(
-                s, s1, CollationFactory.collationNameToId(collation.collationName)
+                s, s1, collationId
               )
             }
           }
@@ -183,7 +183,49 @@ abstract class CollationBenchmarkBase extends BenchmarkBase {
       }
     }
     }
-    benchmark.run()
+    benchmark.run(relativeTime = true)
+  }
+
+  def benchmarkInitCap(
+      collationTypes: Seq[String],
+      utf8Strings: Seq[UTF8String]): Unit = {
+    type CollationId = Int
+    type InitCapEstimator = (UTF8String, CollationId) => Unit
+    def skipCollationTypeFilter: Any => Boolean = _ => true
+    def createBenchmark(
+        implName: String,
+        impl: InitCapEstimator,
+        collationTypeFilter: String => Boolean): Unit = {
+      val benchmark = new Benchmark(
+        s"collation unit benchmarks - initCap using impl $implName",
+        utf8Strings.size * 10,
+        warmupTime = 10.seconds,
+        output = output)
+      collationTypes.filter(collationTypeFilter).foreach { collationType => {
+        val collationId = CollationFactory.collationNameToId(collationType)
+        benchmark.addCase(collationType) { _ =>
+          utf8Strings.foreach { s =>
+            impl(s.repeat(1_000), collationId)
+          }
+        }
+      }
+      }
+      benchmark.run(relativeTime = true)
+    }
+
+    createBenchmark(
+      "execICU",
+      (s, collationId) => CollationSupport.InitCap.execICU(s, collationId),
+      collationType => CollationFactory.fetchCollation(collationType).collator != null)
+    createBenchmark(
+      "execBinaryICU",
+      (s, _) => CollationSupport.InitCap.execBinaryICU(s), skipCollationTypeFilter)
+    createBenchmark(
+      "execBinary",
+      (s, _) => CollationSupport.InitCap.execBinary(s), skipCollationTypeFilter)
+    createBenchmark(
+      "execLowercase",
+      (s, _) => CollationSupport.InitCap.execLowercase(s), skipCollationTypeFilter)
   }
 }
 
@@ -219,6 +261,7 @@ object CollationBenchmark extends CollationBenchmarkBase {
     benchmarkContains(collationTypes, inputs)
     benchmarkStartsWith(collationTypes, inputs)
     benchmarkEndsWith(collationTypes, inputs)
+    benchmarkInitCap(collationTypes, inputs)
   }
 }
 
@@ -248,5 +291,6 @@ object CollationNonASCIIBenchmark extends CollationBenchmarkBase {
     benchmarkContains(collationTypes, inputs)
     benchmarkStartsWith(collationTypes, inputs)
     benchmarkEndsWith(collationTypes, inputs)
+    benchmarkInitCap(collationTypes, inputs)
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StringFunctionsBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StringFunctionsBenchmark.scala
new file mode 100644
index 0000000000000..64cb434f0942f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/StringFunctionsBenchmark.scala
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.functions.{col, concat, lit}
+import org.apache.spark.sql.types.StructType
+
+/**
+ * Synthetic benchmark for the string functions.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
+ *      Results will be written to "benchmarks/StringFunctionsBenchmark-results.txt".
+ * }}}
+ */
+object StringFunctionsBenchmark extends SqlBasedBenchmark {
+  private val N = 10_000_00
+  private val M = 100
+
+  private val df = spark.range(N).to(new StructType().add("id", "int")).
+    withColumn("subject", concat(col("id"), lit("-"), col("id") * 2)).
+    withColumn("regexp", lit("(\\d+)")).
+    withColumn("rep", lit("num"))
+
+  private def doRegexpReplaceBenchmark(): Unit = {
+    df.selectExpr("regexp_replace(subject, regexp, rep)").noop()
+  }
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("SQL string functions") {
+      val benchmark = new Benchmark("regexp_replace", N, output = output)
+      benchmark.addCase("""regexp_replace('*-*', '(\\d+)', 'num')""", M) { _ =>
+        doRegexpReplaceBenchmark()
+      }
+      benchmark.run()
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
index d70e25bb026e7..c79f9f26d60df 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
@@ -28,7 +28,7 @@ import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
 import org.apache.spark.sql.catalyst.plans.logical.SubqueryAlias
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_SECOND
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.{LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.StructType
 
@@ -58,7 +58,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
       .set("spark.sql.crossJoin.enabled", "true")
       .set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
       .set("spark.kryo.registrationRequired", "true")
-      .set("spark.sql.ansi.enabled", "false")
 
     SparkSession.builder().config(conf).getOrCreate()
   }
@@ -100,7 +99,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark with Logging {
       spark.sql(queryString).queryExecution.analyzed.foreach {
         case SubqueryAlias(alias, _: LogicalRelation) =>
           queryRelations.add(alias.name)
-        case LogicalRelation(_, _, Some(catalogTable), _) =>
+        case LogicalRelationWithTable(_, Some(catalogTable)) =>
           queryRelations.add(catalogTable.identifier.table)
         case HiveTableRelation(tableMeta, _, _, _, _) =>
           queryRelations.add(tableMeta.identifier.table)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala
index 88cdfebbb1734..1244dd0299813 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TakeOrderedAndProjectBenchmark.scala
@@ -28,9 +28,9 @@ import org.apache.spark.sql.internal.SQLConf
  *   1. without sbt:
  *      bin/spark-submit --class <this class>
  *        --jars <spark core test jar>,<spark catalyst test jar> <sql core test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
  *   3. generate result:
- *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/TakeOrderedAndProjectBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TopKBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TopKBenchmark.scala
index 70853ec31291d..cb8c014e02aaf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TopKBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TopKBenchmark.scala
@@ -26,8 +26,8 @@ import org.apache.spark.sql.internal.SQLConf.WINDOW_GROUP_LIMIT_THRESHOLD
  * {{{
  *   1. without sbt: bin/spark-submit --class <this class>
  *      --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar>
- *   2. build/sbt "sql/test:runMain <this class>"
- *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>"
+ *   2. build/sbt "sql/Test/runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/Test/runMain <this class>"
  *      Results will be written to "benchmarks/TopKBenchmark-results.txt".
  * }}}
  */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
index b065c9a27a459..b44e899c1a4f1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/bucketing/CoalesceBucketsInJoinSuite.scala
@@ -73,7 +73,8 @@ class CoalesceBucketsInJoinSuite extends SQLTestUtils with SharedSparkSession {
       bucketSpec = Some(BucketSpec(setting.numBuckets, setting.cols.map(_.name), Nil)),
       fileFormat = new ParquetFileFormat(),
       options = Map.empty)(spark)
-    FileSourceScanExec(relation, setting.cols, relation.dataSchema, Nil, None, None, Nil, None)
+    FileSourceScanExec(relation, None, setting.cols, relation.dataSchema, Nil, None, None, Nil,
+      None)
   }
 
   private def run(setting: JoinSetting): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
index a95bda9bf71df..cb97066098f20 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/ColumnTypeSuite.scala
@@ -186,7 +186,7 @@ class ColumnTypeSuite extends SparkFunSuite {
       exception = intercept[SparkUnsupportedOperationException] {
         ColumnType(invalidType)
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"INVALID TYPE NAME\"")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
index ad755bf22ab09..4ea945d105e77 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/InMemoryColumnarQuerySuite.scala
@@ -150,7 +150,7 @@ class InMemoryColumnarQuerySuite extends QueryTest
       spark.catalog.cacheTable("sizeTst")
       assert(
         spark.table("sizeTst").queryExecution.analyzed.stats.sizeInBytes >
-          spark.conf.get(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
+          sqlConf.getConf(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD))
     }
   }
 
@@ -568,9 +568,10 @@ class InMemoryColumnarQuerySuite extends QueryTest
   }
 
   test("SPARK-39104: InMemoryRelation#isCachedColumnBuffersLoaded should be thread-safe") {
-    val plan = spark.range(1).queryExecution.executedPlan
+    val qe = spark.range(1).queryExecution
+    val plan = qe.executedPlan
     val serializer = new TestCachedBatchSerializer(true, 1)
-    val cachedRDDBuilder = CachedRDDBuilder(serializer, MEMORY_ONLY, plan, None)
+    val cachedRDDBuilder = CachedRDDBuilder(serializer, MEMORY_ONLY, plan, None, qe.logical)
 
     @volatile var isCachedColumnBuffersLoaded = false
     @volatile var stopped = false
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
index 885286843a143..88ff51d0ff4cf 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/PartitionBatchPruningSuite.scala
@@ -27,9 +27,9 @@ class PartitionBatchPruningSuite extends SharedSparkSession with AdaptiveSparkPl
 
   import testImplicits._
 
-  private lazy val originalColumnBatchSize = spark.conf.get(SQLConf.COLUMN_BATCH_SIZE)
+  private lazy val originalColumnBatchSize = spark.conf.get(SQLConf.COLUMN_BATCH_SIZE.key)
   private lazy val originalInMemoryPartitionPruning =
-    spark.conf.get(SQLConf.IN_MEMORY_PARTITION_PRUNING)
+    spark.conf.get(SQLConf.IN_MEMORY_PARTITION_PRUNING.key)
   private val testArrayData = (1 to 100).map { key =>
     Tuple1(Array.fill(key)(key))
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
index 05ae575305299..290cfd56b8bce 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/columnar/compression/CompressionSchemeBenchmark.scala
@@ -91,7 +91,7 @@ object CompressionSchemeBenchmark extends BenchmarkBase with AllCompressionSchem
 
     schemes.filter(_.supports(tpe)).foreach { scheme =>
       val (compressFunc, compressionRatio, buf) = prepareEncodeInternal(count, tpe, scheme, input)
-      val label = s"${getFormattedClassName(scheme)}(${compressionRatio.formatted("%.3f")})"
+      val label = s"${getFormattedClassName(scheme)}(${"%.3f".format(compressionRatio)})"
 
       benchmark.addCase(label)({ i: Int =>
         for (n <- 0L until iters) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
index ebb719a35a8bf..75837c59945fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignAssignmentsSuiteBase.scala
@@ -30,7 +30,7 @@ import org.apache.spark.sql.catalyst.expressions.objects.AssertNotNull
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.catalyst.rules.Rule
-import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Column, ColumnDefaultValue, Identifier, SupportsRowLevelOperations, TableCapability, TableCatalog}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, CatalogV2Util, Column, ColumnDefaultValue, Identifier, SupportsRowLevelOperations, TableCapability, TableCatalog, TableWritePrivilege}
 import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.errors.QueryExecutionErrors
 import org.apache.spark.sql.execution.datasources.v2.V2SessionCatalog
@@ -161,6 +161,8 @@ abstract class AlignAssignmentsSuiteBase extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn("cat")
     newCatalog
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignMergeAssignmentsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignMergeAssignmentsSuite.scala
index 488b4d31bd923..cd099a2a94813 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignMergeAssignmentsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignMergeAssignmentsSuite.scala
@@ -591,7 +591,7 @@ class AlignMergeAssignmentsSuite extends AlignAssignmentsSuiteBase {
           if (policy == StoreAssignmentPolicy.ANSI) {
             checkError(
               exception = e,
-              errorClass = "DATATYPE_MISMATCH.INVALID_ROW_LEVEL_OPERATION_ASSIGNMENTS",
+              condition = "DATATYPE_MISMATCH.INVALID_ROW_LEVEL_OPERATION_ASSIGNMENTS",
               parameters = Map(
                 "sqlExpr" -> "\"s.n_i = 1\", \"s.n_s = NULL\", \"s.n_i = -1\"",
                 "errors" -> "\n- Multiple assignments for 's.n_i': 1, -1")
@@ -599,7 +599,7 @@ class AlignMergeAssignmentsSuite extends AlignAssignmentsSuiteBase {
           } else {
             checkError(
               exception = e,
-              errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+              condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
               parameters = Map(
                 "tableName" -> "``",
                 "colName" -> "`s`.`n_s`",
@@ -701,7 +701,7 @@ class AlignMergeAssignmentsSuite extends AlignAssignmentsSuiteBase {
         }
         checkError(
           exception = e,
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map("tableName" -> "``", "colName" -> "`s`.`n_s`.`dn_l`")
         )
 
@@ -847,7 +847,7 @@ class AlignMergeAssignmentsSuite extends AlignAssignmentsSuiteBase {
         }
         checkError(
           exception = e,
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map("tableName" -> "``", "colName" -> "`s`.`n_s`.`dn_l`")
         )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignUpdateAssignmentsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignUpdateAssignmentsSuite.scala
index 599f3e994ef8a..3c8ce44f8167b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignUpdateAssignmentsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlignUpdateAssignmentsSuite.scala
@@ -478,7 +478,7 @@ class AlignUpdateAssignmentsSuite extends AlignAssignmentsSuiteBase {
         if (policy == StoreAssignmentPolicy.ANSI) {
           checkError(
             exception = e,
-            errorClass = "DATATYPE_MISMATCH.INVALID_ROW_LEVEL_OPERATION_ASSIGNMENTS",
+            condition = "DATATYPE_MISMATCH.INVALID_ROW_LEVEL_OPERATION_ASSIGNMENTS",
             parameters = Map(
               "sqlExpr" -> "\"s.n_i = 1\", \"s.n_s = NULL\", \"s.n_i = -1\"",
               "errors" -> "\n- Multiple assignments for 's.n_i': 1, -1")
@@ -486,7 +486,7 @@ class AlignUpdateAssignmentsSuite extends AlignAssignmentsSuiteBase {
         } else {
           checkError(
             exception = e,
-            errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
             parameters = Map(
               "tableName" -> "``",
               "colName" -> "`s`.`n_s`",
@@ -538,7 +538,7 @@ class AlignUpdateAssignmentsSuite extends AlignAssignmentsSuiteBase {
       }
       checkError(
         exception = e,
-        errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         parameters = Map("tableName" -> "``", "colName" -> "`s`.`n_s`.`dn_l`")
       )
 
@@ -591,7 +591,7 @@ class AlignUpdateAssignmentsSuite extends AlignAssignmentsSuiteBase {
       }
       checkError(
         exception = e,
-        errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+        condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
         parameters = Map("tableName" -> "``", "colName" -> "`s`.`n_s`.`dn_l`")
       )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
index 6427338a6c52e..00484c2efc838 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetLocationSuiteBase.scala
@@ -51,7 +51,7 @@ trait AlterNamespaceSetLocationSuiteBase extends QueryTest with DDLCommandTestUt
         exception = intercept[SparkIllegalArgumentException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_EMPTY_LOCATION",
+        condition = "INVALID_EMPTY_LOCATION",
         parameters = Map("location" -> ""))
     }
   }
@@ -62,7 +62,7 @@ trait AlterNamespaceSetLocationSuiteBase extends QueryTest with DDLCommandTestUt
       sql(s"ALTER DATABASE $catalog.$ns SET LOCATION 'loc'")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`$ns`"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
index 9d70ceeef578e..70abfe8af5266 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesParserSuite.scala
@@ -43,7 +43,7 @@ class AlterNamespaceSetPropertiesParserSuite extends AnalysisTest {
     val sql = "ALTER NAMESPACE my_db SET PROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
index d2f2d75d86ce9..3b0ac1d408234 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceSetPropertiesSuiteBase.scala
@@ -49,7 +49,7 @@ trait AlterNamespaceSetPropertiesSuiteBase extends QueryTest with DDLCommandTest
       sql(s"ALTER DATABASE $catalog.$ns SET PROPERTIES ('d'='d')")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`$ns`"))
   }
 
@@ -88,7 +88,7 @@ trait AlterNamespaceSetPropertiesSuiteBase extends QueryTest with DDLCommandTest
             exception = intercept[ParseException] {
               sql(sqlText)
             },
-            errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+            condition = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
             parameters = Map("property" -> key, "msg" -> ".*"),
             sqlState = None,
             context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala
index 72d307c816664..11e0f6c29bef5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesParserSuite.scala
@@ -54,7 +54,7 @@ class AlterNamespaceUnsetPropertiesParserSuite extends AnalysisTest with SharedS
     val sql = "ALTER NAMESPACE my_db UNSET PROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values should not be specified for key(s): [key_with_value]"),
       context = ExpectedContext(
         fragment = sql,
@@ -68,7 +68,7 @@ class AlterNamespaceUnsetPropertiesParserSuite extends AnalysisTest with SharedS
         val sql = s"ALTER $nsToken a.b.c UNSET $propToken IF EXISTS ('a', 'b', 'c')"
         checkError(
           exception = parseException(sql),
-          errorClass = "PARSE_SYNTAX_ERROR",
+          condition = "PARSE_SYNTAX_ERROR",
           parameters = Map("error" -> "'IF'", "hint" -> ": missing '('")
         )
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala
index c00f3f99f41f9..42550ef844361 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterNamespaceUnsetPropertiesSuiteBase.scala
@@ -56,7 +56,7 @@ trait AlterNamespaceUnsetPropertiesSuiteBase extends QueryTest with DDLCommandTe
       sql(s"ALTER NAMESPACE $catalog.$ns UNSET PROPERTIES ('d')")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`$ns`"))
   }
 
@@ -90,7 +90,7 @@ trait AlterNamespaceUnsetPropertiesSuiteBase extends QueryTest with DDLCommandTe
             exception = intercept[ParseException] {
               sql(sqlText)
             },
-            errorClass = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
+            condition = "UNSUPPORTED_FEATURE.SET_NAMESPACE_PROPERTY",
             parameters = Map("property" -> key, "msg" -> ".*"),
             sqlState = None,
             context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
index 3feb4f13c73f2..cb25942822f46 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableAddPartitionSuiteBase.scala
@@ -234,9 +234,8 @@ trait AlterTableAddPartitionSuiteBase extends QueryTest with DDLCommandTestUtils
               exception = intercept[SparkNumberFormatException] {
                 sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
               },
-              errorClass = "CAST_INVALID_INPUT",
+              condition = "CAST_INVALID_INPUT",
               parameters = Map(
-                "ansiConfig" -> "\"spark.sql.ansi.enabled\"",
                 "expression" -> "'aaa'",
                 "sourceType" -> "\"STRING\"",
                 "targetType" -> "\"INT\""),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
index 73a80cd910698..c0fd0a67d06aa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableClusterBySuiteBase.scala
@@ -83,7 +83,7 @@ trait AlterTableClusterBySuiteBase extends QueryTest with DDLCommandTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tbl CLUSTER BY (unknown)")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3060",
+        condition = "_LEGACY_ERROR_TEMP_3060",
         parameters = Map("i" -> "unknown",
           "schema" ->
             """root
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
index 9b4b026480a16..2aa77dac711d8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionParserSuite.scala
@@ -90,7 +90,7 @@ class AlterTableDropPartitionParserSuite extends AnalysisTest with SharedSparkSe
     val sql = "ALTER VIEW table_name DROP PARTITION (p=1)"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER VIEW ... DROP PARTITION"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
index ef9ae47847405..279042f675cd5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableDropPartitionSuiteBase.scala
@@ -146,7 +146,7 @@ trait AlterTableDropPartitionSuiteBase extends QueryTest with DDLCommandTestUtil
         "`test_catalog`.`ns`.`tbl`"
       }
       checkError(e,
-        errorClass = "PARTITIONS_NOT_FOUND",
+        condition = "PARTITIONS_NOT_FOUND",
         parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
         "tableName" -> expectedTableName))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
index 936b1a3dfdb20..babf490729564 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRecoverPartitionsParserSuite.scala
@@ -28,7 +28,7 @@ class AlterTableRecoverPartitionsParserSuite extends AnalysisTest with SharedSpa
     val sql = "ALTER TABLE RECOVER PARTITIONS"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'PARTITIONS'", "hint" -> ""))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameColumnParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameColumnParserSuite.scala
index 62ee8aa57a760..1df4800fa7542 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameColumnParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameColumnParserSuite.scala
@@ -37,12 +37,12 @@ class AlterTableRenameColumnParserSuite extends AnalysisTest with SharedSparkSes
     checkError(
       exception = parseException(parsePlan)(
         "ALTER TABLE t RENAME COLUMN test-col TO test"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-col"))
     checkError(
       exception = parseException(parsePlan)(
         "ALTER TABLE t RENAME COLUMN test TO test-col"),
-      errorClass = "INVALID_IDENTIFIER",
+      condition = "INVALID_IDENTIFIER",
       parameters = Map("ident" -> "test-col"))
   }
 
@@ -50,7 +50,7 @@ class AlterTableRenameColumnParserSuite extends AnalysisTest with SharedSparkSes
     checkError(
       exception = parseException(parsePlan)(
         "ALTER TABLE t RENAME COLUMN point.x to point.y"),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'.'", "hint" -> ""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
index 098750c929ecd..83d590e2bb35c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenameParserSuite.scala
@@ -44,13 +44,13 @@ class AlterTableRenameParserSuite extends AnalysisTest {
     val sql1 = "ALTER TABLE RENAME TO x.y.z"
     checkError(
       exception = parseException(parsePlan)(sql1),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'TO'", "hint" -> ""))
 
     val sql2 = "ALTER TABLE _ RENAME TO .z"
     checkError(
       exception = parseException(parsePlan)(sql2),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'.'", "hint" -> ""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
index d91085956e330..905e6cfb9caaa 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableRenamePartitionSuiteBase.scala
@@ -81,7 +81,7 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
         sql(s"ALTER TABLE $t PARTITION (id = 3) RENAME TO PARTITION (id = 2)")
       }
       checkError(e,
-        errorClass = "PARTITIONS_NOT_FOUND",
+        condition = "PARTITIONS_NOT_FOUND",
         parameters = Map("partitionList" -> "PARTITION (`id` = 3)",
           "tableName" -> parsed))
     }
@@ -103,7 +103,7 @@ trait AlterTableRenamePartitionSuiteBase extends QueryTest with DDLCommandTestUt
         sql(s"ALTER TABLE $t PARTITION (id = 1) RENAME TO PARTITION (id = 2)")
       }
       checkError(e,
-        errorClass = "PARTITIONS_ALREADY_EXIST",
+        condition = "PARTITIONS_ALREADY_EXIST",
         parameters = Map("partitionList" -> "PARTITION (`id` = 2)", "tableName" -> parsed))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala
index 8a3bfd47c6ea3..dcd3ad5681b06 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetSerdeParserSuite.scala
@@ -29,7 +29,7 @@ class AlterTableSetSerdeParserSuite extends AnalysisTest with SharedSparkSession
       "WITH SERDEPROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala
index 78abd1a8b7fd3..6b2c7069c4211 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesParserSuite.scala
@@ -45,7 +45,7 @@ class AlterTableSetTblPropertiesParserSuite extends AnalysisTest with SharedSpar
     val sql = "ALTER TABLE my_tab SET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
index ac3c84dff718c..52a90497fdd37 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableSetTblPropertiesSuiteBase.scala
@@ -52,7 +52,7 @@ trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestU
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> toSQLId(t)),
       context = ExpectedContext(
         fragment = t,
@@ -96,7 +96,7 @@ trait AlterTableSetTblPropertiesSuiteBase extends QueryTest with DDLCommandTestU
             exception = intercept[ParseException] {
               sql(sqlText)
             },
-            errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+            condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
             parameters = Map(
               "property" -> key,
               "msg" -> keyParameters.getOrElse(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesParserSuite.scala
index 1e675a64f2235..c9582a75aa8cd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesParserSuite.scala
@@ -55,7 +55,7 @@ class AlterTableUnsetTblPropertiesParserSuite extends AnalysisTest with SharedSp
     val sql = "ALTER TABLE my_tab UNSET TBLPROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values should not be specified for key(s): [key_with_value]"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
index be8d85d2ef670..0013919fca08f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/AlterTableUnsetTblPropertiesSuiteBase.scala
@@ -52,7 +52,7 @@ trait AlterTableUnsetTblPropertiesSuiteBase extends QueryTest with DDLCommandTes
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> toSQLId(t)),
         context = ExpectedContext(
           fragment = t,
@@ -116,7 +116,7 @@ trait AlterTableUnsetTblPropertiesSuiteBase extends QueryTest with DDLCommandTes
             exception = intercept[ParseException] {
               sql(sqlText)
             },
-            errorClass = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
+            condition = "UNSUPPORTED_FEATURE.SET_TABLE_PROPERTY",
             parameters = Map(
               "property" -> key,
               "msg" -> keyParameters.getOrElse(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
index 9c7f370278128..a5bb3058bedd4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CharVarcharDDLTestBase.scala
@@ -53,7 +53,7 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(alterSQL)
           },
-          errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+          condition = "NOT_SUPPORTED_CHANGE_COLUMN",
           parameters = Map(
             "originType" -> "\"CHAR(4)\"",
             "newType" -> "\"CHAR(5)\"",
@@ -74,7 +74,7 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(sql1)
           },
-          errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+          condition = "NOT_SUPPORTED_CHANGE_COLUMN",
           parameters = Map(
             "originType" -> "\"STRING\"",
             "newType" -> "\"CHAR(5)\"",
@@ -95,7 +95,7 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(sql1)
           },
-          errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+          condition = "NOT_SUPPORTED_CHANGE_COLUMN",
           parameters = Map(
             "originType" -> "\"INT\"",
             "newType" -> "\"CHAR(5)\"",
@@ -124,7 +124,7 @@ trait CharVarcharDDLTestBase extends QueryTest with SQLTestUtils {
           exception = intercept[AnalysisException] {
             sql(sql1)
           },
-          errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+          condition = "NOT_SUPPORTED_CHANGE_COLUMN",
           parameters = Map(
             "originType" -> "\"VARCHAR(4)\"",
             "newType" -> "\"VARCHAR(3)\"",
@@ -301,7 +301,7 @@ class DSV2CharVarcharDDLTestSuite extends CharVarcharDDLTestBase
         exception = intercept[AnalysisException] {
           sql(sql1)
         },
-        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        condition = "NOT_SUPPORTED_CHANGE_COLUMN",
         parameters = Map(
           "originType" -> "\"CHAR(4)\"",
           "newType" -> "\"VARCHAR(3)\"",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
index 46ccc0b1312da..469e1a06920a1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceParserSuite.scala
@@ -70,7 +70,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = createNamespace("COMMENT 'namespace_comment'")
     checkError(
       exception = parseException(sql1),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "COMMENT"),
       context = ExpectedContext(
         fragment = sql1,
@@ -80,7 +80,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = createNamespace("LOCATION '/home/user/db'")
     checkError(
       exception = parseException(sql2),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "LOCATION"),
       context = ExpectedContext(
         fragment = sql2,
@@ -90,7 +90,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = createNamespace("WITH PROPERTIES ('a'='a', 'b'='b', 'c'='c')")
     checkError(
       exception = parseException(sql3),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "WITH PROPERTIES"),
       context = ExpectedContext(
         fragment = sql3,
@@ -100,7 +100,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = createNamespace("WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')")
     checkError(
       exception = parseException(sql4),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "WITH DBPROPERTIES"),
       context = ExpectedContext(
         fragment = sql4,
@@ -112,7 +112,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
     val sql = "CREATE NAMESPACE a.b.c WITH PROPERTIES('key_without_value', 'key_with_value'='x')"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "Values must be specified for key(s): [key_without_value]"),
       context = ExpectedContext(
         fragment = sql,
@@ -127,7 +127,7 @@ class CreateNamespaceParserSuite extends AnalysisTest with SharedSparkSession {
          |WITH DBPROPERTIES ('a'='a', 'b'='b', 'c'='c')""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
+      condition = "UNSUPPORTED_FEATURE.SET_PROPERTIES_AND_DBPROPERTIES",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
index bfc32a761d57c..9733b104beecb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/CreateNamespaceSuiteBase.scala
@@ -74,7 +74,7 @@ trait CreateNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
           exception = intercept[SparkIllegalArgumentException] {
             sql(sqlText)
           },
-          errorClass = "INVALID_EMPTY_LOCATION",
+          condition = "INVALID_EMPTY_LOCATION",
           parameters = Map("location" -> ""))
         val uri = new Path(path).toUri
         sql(s"CREATE NAMESPACE $ns LOCATION '$uri'")
@@ -99,7 +99,7 @@ trait CreateNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
         sql(s"CREATE NAMESPACE $ns")
       }
       checkError(e,
-        errorClass = "SCHEMA_ALREADY_EXISTS",
+        condition = "SCHEMA_ALREADY_EXISTS",
         parameters = Map("schemaName" -> parsed))
 
       // The following will be no-op since the namespace already exists.
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
index 70276051defa9..8b868c0e17230 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLParserSuite.scala
@@ -64,7 +64,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val v2 = "INSERT OVERWRITE DIRECTORY USING parquet SELECT 1 as a"
     checkError(
       exception = parseException(v2),
-      errorClass = "_LEGACY_ERROR_TEMP_0049",
+      condition = "_LEGACY_ERROR_TEMP_0049",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = "INSERT OVERWRITE DIRECTORY USING parquet",
@@ -99,7 +99,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         | OPTIONS ('path' '/tmp/file', a 1, b 0.1, c TRUE)""".stripMargin
     checkError(
       exception = parseException(v4),
-      errorClass = "_LEGACY_ERROR_TEMP_0049",
+      condition = "_LEGACY_ERROR_TEMP_0049",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = fragment4,
@@ -113,7 +113,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |(dt='2008-08-08', country='us') WITH TABLE table_name_2""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE EXCHANGE PARTITION"),
       context = ExpectedContext(
         fragment = sql,
@@ -125,7 +125,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql = "ALTER TABLE table_name ARCHIVE PARTITION (dt='2008-08-08', country='us')"
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE ARCHIVE PARTITION"),
       context = ExpectedContext(
         fragment = sql,
@@ -137,7 +137,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql = "ALTER TABLE table_name UNARCHIVE PARTITION (dt='2008-08-08', country='us')"
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE UNARCHIVE PARTITION"),
       context = ExpectedContext(
         fragment = sql,
@@ -149,7 +149,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name SET FILEFORMAT INPUTFORMAT 'test' OUTPUTFORMAT 'test'"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SET FILEFORMAT"),
       context = ExpectedContext(
         fragment = sql1,
@@ -160,7 +160,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
       "SET FILEFORMAT PARQUET"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SET FILEFORMAT"),
       context = ExpectedContext(
         fragment = sql2,
@@ -172,7 +172,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name TOUCH"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE TOUCH"),
       context = ExpectedContext(
         fragment = sql1,
@@ -182,7 +182,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "ALTER TABLE table_name TOUCH PARTITION (dt='2008-08-08', country='us')"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE TOUCH"),
       context = ExpectedContext(
         fragment = sql2,
@@ -194,7 +194,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name COMPACT 'compaction_type'"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE COMPACT"),
       context = ExpectedContext(
         fragment = sql1,
@@ -206,7 +206,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |COMPACT 'MAJOR'""".stripMargin
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE COMPACT"),
       context = ExpectedContext(
         fragment = sql2,
@@ -218,7 +218,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name CONCATENATE"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CONCATENATE"),
       context = ExpectedContext(
         fragment = sql1,
@@ -228,7 +228,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "ALTER TABLE table_name PARTITION (dt='2008-08-08', country='us') CONCATENATE"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CONCATENATE"),
       context = ExpectedContext(
         fragment = sql2,
@@ -240,7 +240,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name CLUSTERED BY (col_name) SORTED BY (col2_name) INTO 3 BUCKETS"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CLUSTERED BY"),
       context = ExpectedContext(
         fragment = sql1,
@@ -250,7 +250,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "ALTER TABLE table_name CLUSTERED BY (col_name) INTO 3 BUCKETS"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CLUSTERED BY"),
       context = ExpectedContext(
         fragment = sql2,
@@ -260,7 +260,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "ALTER TABLE table_name NOT CLUSTERED"
     checkError(
       exception = parseException(sql3),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT CLUSTERED"),
       context = ExpectedContext(
         fragment = sql3,
@@ -270,7 +270,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "ALTER TABLE table_name NOT SORTED"
     checkError(
       exception = parseException(sql4),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT SORTED"),
       context = ExpectedContext(
         fragment = sql4,
@@ -282,7 +282,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "ALTER TABLE table_name NOT SKEWED"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT SKEWED"),
       context = ExpectedContext(
         fragment = sql1,
@@ -292,7 +292,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "ALTER TABLE table_name NOT STORED AS DIRECTORIES"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT STORED AS DIRECTORIES"),
       context = ExpectedContext(
         fragment = sql2,
@@ -302,7 +302,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql3 = "ALTER TABLE table_name SET SKEWED LOCATION (col_name1=\"location1\""
     checkError(
       exception = parseException(sql3),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SET SKEWED LOCATION"),
       context = ExpectedContext(
         fragment = sql3,
@@ -312,7 +312,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql4 = "ALTER TABLE table_name SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES"
     checkError(
       exception = parseException(sql4),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SKEWED BY"),
       context = ExpectedContext(
         fragment = sql4,
@@ -326,7 +326,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |COMMENT 'test_comment', new_col2 LONG COMMENT 'test_comment2') RESTRICT""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE REPLACE COLUMNS"),
       context = ExpectedContext(
         fragment = sql,
@@ -351,7 +351,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map(
         "message" -> "CREATE TEMPORARY TABLE ... AS ..., use CREATE TEMPORARY VIEW instead"),
       context = ExpectedContext(
@@ -365,7 +365,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |AS SELECT key, value FROM src ORDER BY key, value""".stripMargin
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map(
         "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
       context = ExpectedContext(
@@ -379,7 +379,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |AS SELECT key, value FROM src ORDER BY key, value""".stripMargin
     checkError(
       exception = parseException(sql3),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(
         fragment = sql3,
@@ -392,7 +392,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |FROM testData""".stripMargin
     checkError(
       exception = parseException(sql4),
-      errorClass = "_LEGACY_ERROR_TEMP_0048",
+      condition = "_LEGACY_ERROR_TEMP_0048",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql4,
@@ -402,13 +402,13 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
 
   test("Invalid interval term should throw AnalysisException") {
     val sql1 = "select interval '42-32' year to month"
-    val value1 = "Error parsing interval year-month string: " +
-      "requirement failed: month 32 outside range [0, 11]"
     val fragment1 = "'42-32' year to month"
     checkError(
       exception = parseException(sql1),
-      errorClass = "_LEGACY_ERROR_TEMP_0063",
-      parameters = Map("msg" -> value1),
+      condition = "INVALID_INTERVAL_FORMAT.INTERVAL_PARSING",
+      parameters = Map(
+        "input" -> "42-32",
+        "interval" -> "year-month"),
       context = ExpectedContext(
         fragment = fragment1,
         start = 16,
@@ -418,7 +418,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val fragment2 = "'5 49:12:15' day to second"
     checkError(
       exception = parseException(sql2),
-      errorClass = "_LEGACY_ERROR_TEMP_0063",
+      condition = "_LEGACY_ERROR_TEMP_0063",
       parameters = Map("msg" -> "requirement failed: hour 49 outside range [0, 23]"),
       context = ExpectedContext(
         fragment = fragment2,
@@ -429,7 +429,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val fragment3 = "'23:61:15' hour to second"
     checkError(
       exception = parseException(sql3),
-      errorClass = "_LEGACY_ERROR_TEMP_0063",
+      condition = "_LEGACY_ERROR_TEMP_0063",
       parameters = Map("msg" -> "requirement failed: minute 61 outside range [0, 59]"),
       context = ExpectedContext(
         fragment = fragment3,
@@ -524,7 +524,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val v3 = "CREATE TEMPORARY VIEW a.b AS SELECT 1"
     checkError(
       exception = parseException(v3),
-      errorClass = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
+      condition = "TEMP_VIEW_NAME_TOO_MANY_NAME_PARTS",
       parameters = Map("actualName" -> "`a`.`b`"),
       context = ExpectedContext(
         fragment = v3,
@@ -579,7 +579,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val v1 = "CREATE VIEW view1 partitioned on (ds, hr) as select * from srcpart"
     checkError(
       exception = parseException(v1),
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE VIEW ... PARTITIONED ON"),
       context = ExpectedContext(
         fragment = v1,
@@ -599,7 +599,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = createViewStatement("COMMENT 'BLABLA'")
     checkError(
       exception = parseException(sql1),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "COMMENT"),
       context = ExpectedContext(
         fragment = sql1,
@@ -609,7 +609,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = createViewStatement("TBLPROPERTIES('prop1Key'=\"prop1Val\")")
     checkError(
       exception = parseException(sql2),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "TBLPROPERTIES"),
       context = ExpectedContext(
         fragment = sql2,
@@ -655,7 +655,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql = "CREATE FUNCTION a as 'fun' USING OTHER 'o'"
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "CREATE FUNCTION with resource type 'other'"),
       context = ExpectedContext(
         fragment = sql,
@@ -687,8 +687,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql1 = "DROP TEMPORARY FUNCTION a.b"
     checkError(
       exception = parseException(sql1),
-      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
-      parameters = Map("statement" -> "DROP TEMPORARY FUNCTION", "funcName" -> "`a`.`b`"),
+      condition = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      parameters = Map("statement" -> "DROP TEMPORARY FUNCTION", "name" -> "`a`.`b`"),
       context = ExpectedContext(
         fragment = sql1,
         start = 0,
@@ -697,8 +697,8 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
     val sql2 = "DROP TEMPORARY FUNCTION IF EXISTS a.b"
     checkError(
       exception = parseException(sql2),
-      errorClass = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
-      parameters = Map("statement" -> "DROP TEMPORARY FUNCTION", "funcName" -> "`a`.`b`"),
+      condition = "INVALID_SQL_SYNTAX.MULTI_PART_NAME",
+      parameters = Map("statement" -> "DROP TEMPORARY FUNCTION", "name" -> "`a`.`b`"),
       context = ExpectedContext(
         fragment = sql2,
         start = 0,
@@ -713,7 +713,7 @@ class DDLParserSuite extends AnalysisTest with SharedSparkSession {
         |AS SELECT * FROM tab1""".stripMargin
     checkError(
       exception = parseException(sql),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" -> "TBLPROPERTIES can't coexist with CREATE TEMPORARY VIEW"),
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
index 994c420feae1f..32a63f5c61976 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.analysis.TempTableAlreadyExistsException
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.catalog.SupportsNamespaces.PROP_OWNER
 import org.apache.spark.sql.internal.SQLConf
@@ -87,7 +88,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"CREATE TABLE $tabName (i INT, j STRING) STORED AS parquet")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
+        condition = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
         parameters = Map("cmd" -> "CREATE Hive TABLE (AS SELECT)")
       )
     }
@@ -107,7 +108,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
                  |LOCATION '${tempDir.toURI}'
                """.stripMargin)
           },
-          errorClass = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
+          condition = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
           parameters = Map("cmd" -> "CREATE Hive TABLE (AS SELECT)")
         )
       }
@@ -121,7 +122,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t STORED AS parquet SELECT 1 as a, 1 as b")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
+        condition = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
         parameters = Map("cmd" -> "CREATE Hive TABLE (AS SELECT)")
       )
 
@@ -130,7 +131,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t STORED AS parquet SELECT a, b from t1")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
+        condition = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
         parameters = Map("cmd" -> "CREATE Hive TABLE (AS SELECT)")
       )
     }
@@ -194,7 +195,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t LIKE s USING org.apache.spark.sql.hive.orc")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1138",
+        condition = "_LEGACY_ERROR_TEMP_1138",
         parameters = Map.empty
       )
     }
@@ -208,7 +209,7 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
       }
       checkError(
         exception = e,
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         sqlState = "0A000",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
           "operation" -> "ALTER COLUMN ... FIRST | AFTER"))
@@ -218,7 +219,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
   test("SPARK-25403 refresh the table after inserting data") {
     withTable("t") {
       val catalog = spark.sessionState.catalog
-      val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+      val table = QualifiedTableName(
+        CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
       sql("CREATE TABLE t (a INT) USING parquet")
       sql("INSERT INTO TABLE t VALUES (1)")
       assert(catalog.getCachedTable(table) === null, "Table relation should be invalidated.")
@@ -231,7 +233,8 @@ class InMemoryCatalogedDDLSuite extends DDLSuite with SharedSparkSession {
     withTable("t") {
       withTempDir { dir =>
         val catalog = spark.sessionState.catalog
-        val table = QualifiedTableName(catalog.getCurrentDatabase, "t")
+        val table = QualifiedTableName(
+          CatalogManager.SESSION_CATALOG_NAME, catalog.getCurrentDatabase, "t")
         val p1 = s"${dir.getCanonicalPath}/p1"
         val p2 = s"${dir.getCanonicalPath}/p2"
         sql(s"CREATE TABLE t (a INT) USING parquet LOCATION '$p1'")
@@ -376,7 +379,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
             exception = intercept[SparkRuntimeException] {
               sql(createStmt)
             },
-            errorClass = "LOCATION_ALREADY_EXISTS",
+            condition = "LOCATION_ALREADY_EXISTS",
             parameters = Map(
               "location" -> expectedLoc,
               "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
@@ -389,7 +392,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
             exception = intercept[SparkRuntimeException] {
               sql(s"CREATE TABLE IF NOT EXISTS tab1 LIKE tab2")
             },
-            errorClass = "LOCATION_ALREADY_EXISTS",
+            condition = "LOCATION_ALREADY_EXISTS",
             parameters = Map(
               "location" -> expectedLoc,
               "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`"))
@@ -422,7 +425,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       if (userSpecifiedSchema.isEmpty && userSpecifiedPartitionCols.nonEmpty) {
         checkError(
           exception = intercept[AnalysisException](sql(sqlCreateTable)),
-          errorClass = "SPECIFY_PARTITION_IS_NOT_ALLOWED",
+          condition = "SPECIFY_PARTITION_IS_NOT_ALLOWED",
           parameters = Map.empty
         )
       } else {
@@ -526,7 +529,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE t($c0 INT, $c1 INT) USING parquet")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -537,7 +540,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql("CREATE TABLE tbl(a int, b string) USING json PARTITIONED BY (c)")
       },
-      errorClass = "COLUMN_NOT_DEFINED_IN_TABLE",
+      condition = "COLUMN_NOT_DEFINED_IN_TABLE",
       parameters = Map(
         "colType" -> "partition",
         "colName" -> "`c`",
@@ -550,7 +553,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql("CREATE TABLE tbl(a int, b string) USING json CLUSTERED BY (c) INTO 4 BUCKETS")
       },
-      errorClass = "COLUMN_NOT_DEFINED_IN_TABLE",
+      condition = "COLUMN_NOT_DEFINED_IN_TABLE",
       parameters = Map(
         "colType" -> "bucket",
         "colName" -> "`c`",
@@ -565,7 +568,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE t($c0 INT) USING parquet PARTITIONED BY ($c0, $c1)")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -578,7 +581,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE t($c0 INT) USING parquet CLUSTERED BY ($c0, $c1) INTO 2 BUCKETS")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
 
         checkError(
@@ -588,7 +591,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
                 |  SORTED BY ($c0, $c1) INTO 2 BUCKETS
                """.stripMargin)
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -615,7 +618,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
                 .option("path", dir2.getCanonicalPath)
                 .saveAsTable("path_test")
             },
-            errorClass = "_LEGACY_ERROR_TEMP_1160",
+            condition = "_LEGACY_ERROR_TEMP_1160",
             parameters = Map(
               "identifier" -> s"`$SESSION_CATALOG_NAME`.`default`.`path_test`",
               "existingTableLoc" -> ".*",
@@ -684,7 +687,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"CREATE VIEW t AS SELECT * FROM VALUES (1, 1) AS t($c0, $c1)")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -795,7 +798,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
                  |USING org.apache.spark.sql.execution.datasources.csv.CSVFileFormat
                  |OPTIONS (PATH '${tmpFile.toURI}')
                """.stripMargin)},
-          errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+          condition = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
           parameters = Map("relationName" -> "`testview`"))
       }
     }
@@ -818,7 +821,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE tab1 RENAME TO default.tab2")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1074",
+        condition = "_LEGACY_ERROR_TEMP_1074",
         parameters = Map(
           "oldName" -> "`tab1`",
           "newName" -> "`default`.`tab2`",
@@ -847,7 +850,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE view1 RENAME TO default.tab2")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1074",
+        condition = "_LEGACY_ERROR_TEMP_1074",
         parameters = Map(
           "oldName" -> "`view1`",
           "newName" -> "`default`.`tab2`",
@@ -869,7 +872,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       checkAnswer(spark.table("tab1"), spark.range(10).toDF())
       checkError(
         exception = intercept[AnalysisException] { spark.table("tab2") },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`tab2`")
       )
     }
@@ -956,7 +959,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CLUSTERED BY"),
       context = ExpectedContext(fragment = sql1, start = 0, stop = 70))
     val sql2 = "ALTER TABLE dbx.tab1 CLUSTERED BY (fuji) SORTED BY (grape) INTO 5 BUCKETS"
@@ -964,7 +967,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql2)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE CLUSTERED BY"),
       context = ExpectedContext(fragment = sql2, start = 0, stop = 72))
     val sql3 = "ALTER TABLE dbx.tab1 NOT CLUSTERED"
@@ -972,7 +975,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql3)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT CLUSTERED"),
       context = ExpectedContext(fragment = sql3, start = 0, stop = 33))
     val sql4 = "ALTER TABLE dbx.tab1 NOT SORTED"
@@ -980,7 +983,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql4)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT SORTED"),
       context = ExpectedContext(fragment = sql4, start = 0, stop = 30))
   }
@@ -996,7 +999,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SKEWED BY"),
       context = ExpectedContext(fragment = sql1, start = 0, stop = 113)
     )
@@ -1006,7 +1009,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql2)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE SKEWED BY"),
       context = ExpectedContext(fragment = sql2, start = 0, stop = 113)
     )
@@ -1015,7 +1018,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql3)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT SKEWED"),
       context = ExpectedContext(fragment = sql3, start = 0, stop = 30)
     )
@@ -1024,7 +1027,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql4)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER TABLE NOT STORED AS DIRECTORIES"),
       context = ExpectedContext(fragment = sql4, start = 0, stop = 45)
     )
@@ -1036,7 +1039,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER VIEW ... ADD PARTITION"),
       context = ExpectedContext(fragment = sql1, start = 0, stop = 54)
     )
@@ -1048,7 +1051,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[ParseException] {
         sql(sql1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "ALTER VIEW ... DROP PARTITION"),
       context = ExpectedContext(fragment = sql1, start = 0, stop = 51)
     )
@@ -1082,7 +1085,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
     }
     checkError(
       exception = e,
-      errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+      condition = "WRONG_COMMAND_FOR_OBJECT_TYPE",
       parameters = Map(
         "alternative" -> "DROP TABLE",
         "operation" -> "DROP VIEW",
@@ -1122,21 +1125,21 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql("DROP TEMPORARY FUNCTION year")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1255",
+          condition = "_LEGACY_ERROR_TEMP_1255",
           parameters = Map("functionName" -> "year")
         )
         checkError(
           exception = intercept[AnalysisException] {
             sql("DROP TEMPORARY FUNCTION YeAr")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1255",
+          condition = "_LEGACY_ERROR_TEMP_1255",
           parameters = Map("functionName" -> "YeAr")
         )
         checkError(
           exception = intercept[AnalysisException] {
             sql("DROP TEMPORARY FUNCTION `YeAr`")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1255",
+          condition = "_LEGACY_ERROR_TEMP_1255",
           parameters = Map("functionName" -> "YeAr")
         )
       }
@@ -1213,7 +1216,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
 
         checkError(
           exception = intercept[AnalysisException] { sql("CREATE TABLE tab1 USING json") },
-          errorClass = "UNABLE_TO_INFER_SCHEMA",
+          condition = "UNABLE_TO_INFER_SCHEMA",
           parameters = Map("format" -> "JSON")
         )
 
@@ -1241,7 +1244,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
                  |CLUSTERED BY (nonexistentColumnA) SORTED BY (nonexistentColumnB) INTO 2 BUCKETS
                """.stripMargin)
           },
-          errorClass = "SPECIFY_BUCKETING_IS_NOT_ALLOWED",
+          condition = "SPECIFY_BUCKETING_IS_NOT_ALLOWED",
           parameters = Map.empty
         )
       }
@@ -1268,7 +1271,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[AnalysisException] {
             sql("CREATE TEMPORARY VIEW view1 (col1, col3) AS SELECT * FROM tab1")
           },
-          errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "viewName" -> "`view1`",
             "viewColumns" -> "`col1`, `col3`",
@@ -1295,7 +1298,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         sql("CREATE TEMPORARY TABLE t_temp (c3 int, c4 string) USING JSON")
       }
       checkError(e,
-        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map("relationName" -> "`t_temp`"))
     }
   }
@@ -1307,7 +1310,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         sql("CREATE TEMPORARY VIEW t_temp (c3 int, c4 string) USING JSON")
       }
       checkError(e,
-        errorClass = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TEMP_TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map("relationName" -> "`t_temp`"))
     }
   }
@@ -1322,7 +1325,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           df.write.mode("append").partitionBy("a").saveAsTable("partitionedTable")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1163",
+        condition = "_LEGACY_ERROR_TEMP_1163",
         parameters = Map(
           "tableName" -> "spark_catalog.default.partitionedtable",
           "specifiedPartCols" -> "a",
@@ -1333,7 +1336,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           df.write.mode("append").partitionBy("b", "a").saveAsTable("partitionedTable")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1163",
+        condition = "_LEGACY_ERROR_TEMP_1163",
         parameters = Map(
           "tableName" -> "spark_catalog.default.partitionedtable",
           "specifiedPartCols" -> "b, a",
@@ -1344,7 +1347,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           df.write.mode("append").saveAsTable("partitionedTable")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1163",
+        condition = "_LEGACY_ERROR_TEMP_1163",
         parameters = Map(
           "tableName" -> "spark_catalog.default.partitionedtable",
           "specifiedPartCols" -> "", "existingPartCols" -> "a, b")
@@ -1373,39 +1376,6 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
     }
   }
 
-  test("show columns - negative test") {
-    // When case sensitivity is true, the user supplied database name in table identifier
-    // should match the supplied database name in case sensitive way.
-    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      withTempDatabase { db =>
-        val tabName = s"$db.showcolumn"
-        withTable(tabName) {
-          sql(s"CREATE TABLE $tabName(col1 int, col2 string) USING parquet ")
-          checkError(
-            exception = intercept[AnalysisException] {
-              sql(s"SHOW COLUMNS IN $db.showcolumn FROM ${db.toUpperCase(Locale.ROOT)}")
-            },
-            errorClass = "_LEGACY_ERROR_TEMP_1057",
-            parameters = Map("dbA" -> db.toUpperCase(Locale.ROOT), "dbB" -> db)
-          )
-        }
-      }
-    }
-  }
-
-  test("show columns - invalid db name") {
-    withTable("tbl") {
-      sql("CREATE TABLE tbl(col1 int, col2 string) USING parquet ")
-      checkError(
-        exception = intercept[AnalysisException] {
-          sql("SHOW COLUMNS IN tbl FROM a.b.c")
-        },
-        errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
-        parameters = Map("sessionCatalog" -> "spark_catalog", "namespace" -> "`a`.`b`.`c`")
-      )
-    }
-  }
-
   test("SPARK-18009 calling toLocalIterator on commands") {
     import scala.jdk.CollectionConverters._
     val df = sql("show databases")
@@ -1964,7 +1934,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE t1 ADD COLUMNS (c2 int)")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1260",
+        condition = "_LEGACY_ERROR_TEMP_1260",
         parameters = Map(
           "tableType" -> ("org\\.apache\\.spark\\.sql\\.execution\\." +
             "datasources\\.v2\\.text\\.TextDataSourceV2.*"),
@@ -1980,7 +1950,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE tmp_v ADD COLUMNS (c3 INT)")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> "`tmp_v`",
           "operation" -> "ALTER TABLE ... ADD COLUMNS"),
@@ -1999,7 +1969,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE v1 ADD COLUMNS (c3 INT)")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`v1`",
           "operation" -> "ALTER TABLE ... ADD COLUMNS"),
@@ -2018,7 +1988,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE t1 ADD COLUMNS (c1 string)")
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`c1`"))
     }
   }
@@ -2033,7 +2003,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
               exception = intercept[AnalysisException] {
                 sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
               },
-              errorClass = "COLUMN_ALREADY_EXISTS",
+              condition = "COLUMN_ALREADY_EXISTS",
               parameters = Map("columnName" -> "`c1`"))
           } else {
             sql("ALTER TABLE t1 ADD COLUMNS (C1 string)")
@@ -2088,7 +2058,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"SET ${config.CPUS_PER_TASK.key} = 4")
       },
-      errorClass = "CANNOT_MODIFY_CONFIG",
+      condition = "CANNOT_MODIFY_CONFIG",
       parameters = Map(
         "key" -> "\"spark.task.cpus\"",
         "docroot" -> "https://spark.apache.org/docs/latest"))
@@ -2150,7 +2120,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       }
       checkError(
         exception = e1,
-        errorClass = "DATA_SOURCE_NOT_FOUND",
+        condition = "DATA_SOURCE_NOT_FOUND",
         parameters = Map("provider" -> "unknown")
       )
 
@@ -2171,7 +2141,12 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       root = Utils.createTempDir().getCanonicalPath, namePrefix = "addDirectory")
     val testFile = File.createTempFile("testFile", "1", directoryToAdd)
     spark.sql(s"ADD FILE $directoryToAdd")
-    assert(new File(SparkFiles.get(s"${directoryToAdd.getName}/${testFile.getName}")).exists())
+    // TODO(SPARK-50244): ADD JAR is inside `sql()` thus isolated. This will break an existing Hive
+    //  use case (one session adds JARs and another session uses them). After we sort out the Hive
+    //  isolation issue we will decide if the next assert should be wrapped inside `withResources`.
+    spark.artifactManager.withResources {
+      assert(new File(SparkFiles.get(s"${directoryToAdd.getName}/${testFile.getName}")).exists())
+    }
   }
 
   test(s"Add a directory when ${SQLConf.LEGACY_ADD_SINGLE_FILE_IN_ADD_FILE.key} set to true") {
@@ -2181,7 +2156,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
           exception = intercept[SparkException] {
             sql(s"ADD FILE $testDir")
           },
-          errorClass = "UNSUPPORTED_ADD_FILE.DIRECTORY",
+          condition = "UNSUPPORTED_ADD_FILE.DIRECTORY",
           parameters = Map("path" -> s"file:${testDir.getCanonicalPath}/")
         )
       }
@@ -2193,7 +2168,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql("REFRESH FUNCTION md5")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1017",
+      condition = "_LEGACY_ERROR_TEMP_1017",
       parameters = Map(
         "name" -> "md5",
         "cmd" -> "REFRESH FUNCTION", "hintStr" -> ""),
@@ -2202,7 +2177,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql("REFRESH FUNCTION default.md5")
       },
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       parameters = Map(
         "routineName" -> "`default`.`md5`",
         "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -2217,7 +2192,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION func1")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1017",
+        condition = "_LEGACY_ERROR_TEMP_1017",
         parameters = Map("name" -> "func1", "cmd" -> "REFRESH FUNCTION", "hintStr" -> ""),
         context = ExpectedContext(
           fragment = "func1",
@@ -2233,7 +2208,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION func1")
         },
-        errorClass = "UNRESOLVED_ROUTINE",
+        condition = "UNRESOLVED_ROUTINE",
         parameters = Map(
           "routineName" -> "`func1`",
           "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -2249,7 +2224,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION func2")
         },
-        errorClass = "UNRESOLVED_ROUTINE",
+        condition = "UNRESOLVED_ROUTINE",
         parameters = Map(
           "routineName" -> "`func2`",
           "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -2265,7 +2240,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION func1")
         },
-        errorClass = "ROUTINE_NOT_FOUND",
+        condition = "ROUTINE_NOT_FOUND",
         parameters = Map("routineName" -> "`default`.`func1`")
       )
 
@@ -2278,7 +2253,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION func1")
         },
-        errorClass = "CANNOT_LOAD_FUNCTION_CLASS",
+        condition = "CANNOT_LOAD_FUNCTION_CLASS",
         parameters = Map(
           "className" -> "test.non.exists.udf",
           "functionName" -> "`spark_catalog`.`default`.`func1`"
@@ -2297,7 +2272,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("REFRESH FUNCTION rand")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1017",
+        condition = "_LEGACY_ERROR_TEMP_1017",
         parameters = Map("name" -> "rand", "cmd" -> "REFRESH FUNCTION", "hintStr" -> ""),
         context = ExpectedContext(fragment = "rand", start = 17, stop = 20)
       )
@@ -2312,12 +2287,23 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       exception = intercept[AnalysisException] {
         sql(s"create table t(a int, b int generated always as (a + 1)) using parquet")
       },
-      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
       parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
         "operation" -> "generated columns")
     )
   }
 
+  test("SPARK-48824: No identity columns with V1") {
+    checkError(
+      exception = intercept[AnalysisException] {
+        sql(s"create table t(a int, b bigint generated always as identity()) using parquet")
+      },
+      condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
+        "operation" -> "identity columns")
+    )
+  }
+
   test("SPARK-44837: Error when altering partition column in non-delta table") {
     withTable("t") {
       sql("CREATE TABLE t(i INT, j INT, k INT) USING parquet PARTITIONED BY (i, j)")
@@ -2325,7 +2311,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE t ALTER COLUMN i COMMENT 'comment'")
         },
-        errorClass = "CANNOT_ALTER_PARTITION_COLUMN",
+        condition = "CANNOT_ALTER_PARTITION_COLUMN",
         sqlState = "428FR",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`",
           "columnName" -> "`i`")
@@ -2348,7 +2334,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql(alterInt)
         },
-        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        condition = "NOT_SUPPORTED_CHANGE_COLUMN",
         parameters = Map(
           "originType" -> "\"STRING COLLATE UTF8_LCASE\"",
           "originName" -> "`col`",
@@ -2363,18 +2349,22 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       sql("CREATE TABLE t2(col ARRAY<STRING>) USING parquet")
       sql("INSERT INTO t2 VALUES (ARRAY('a'))")
       checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_BINARY"))
-      sql("ALTER TABLE t2 ALTER COLUMN col TYPE ARRAY<STRING COLLATE UTF8_LCASE>")
-      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_LCASE"))
+      assertThrows[AnalysisException] {
+        sql("ALTER TABLE t2 ALTER COLUMN col TYPE ARRAY<STRING COLLATE UTF8_LCASE>")
+      }
+      checkAnswer(sql("SELECT COLLATION(col[0]) FROM t2"), Row("UTF8_BINARY"))
 
       // `MapType` with collation.
       sql("CREATE TABLE t3(col MAP<STRING, STRING>) USING parquet")
       sql("INSERT INTO t3 VALUES (MAP('k', 'v'))")
       checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_BINARY"))
-      sql(
-        """
-          |ALTER TABLE t3 ALTER COLUMN col TYPE
-          |MAP<STRING, STRING COLLATE UTF8_LCASE>""".stripMargin)
-      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_LCASE"))
+      assertThrows[AnalysisException] {
+        sql(
+          """
+            |ALTER TABLE t3 ALTER COLUMN col TYPE
+            |MAP<STRING, STRING COLLATE UTF8_LCASE>""".stripMargin)
+      }
+      checkAnswer(sql("SELECT COLLATION(col['k']) FROM t3"), Row("UTF8_BINARY"))
 
       // Invalid change of map key collation.
       val alterMap =
@@ -2384,9 +2374,9 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql(alterMap)
         },
-        errorClass = "NOT_SUPPORTED_CHANGE_COLUMN",
+        condition = "NOT_SUPPORTED_CHANGE_COLUMN",
         parameters = Map(
-          "originType" -> "\"MAP<STRING, STRING COLLATE UTF8_LCASE>\"",
+          "originType" -> "\"MAP<STRING, STRING>\"",
           "originName" -> "`col`",
           "table" -> "`spark_catalog`.`default`.`t3`",
           "newType" -> "\"MAP<STRING COLLATE UTF8_LCASE, STRING>\"",
@@ -2399,8 +2389,10 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
       sql("CREATE TABLE t4(col STRUCT<a:STRING>) USING parquet")
       sql("INSERT INTO t4 VALUES (NAMED_STRUCT('a', 'value'))")
       checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_BINARY"))
-      sql("ALTER TABLE t4 ALTER COLUMN col TYPE STRUCT<a:STRING COLLATE UTF8_LCASE>")
-      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_LCASE"))
+      assertThrows[AnalysisException] {
+        sql("ALTER TABLE t4 ALTER COLUMN col TYPE STRUCT<a:STRING COLLATE UTF8_LCASE>")
+      }
+      checkAnswer(sql("SELECT COLLATION(col.a) FROM t4"), Row("UTF8_BINARY"))
     }
   }
 
@@ -2411,7 +2403,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE t1 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
         },
-        errorClass = "CANNOT_ALTER_PARTITION_COLUMN",
+        condition = "CANNOT_ALTER_PARTITION_COLUMN",
         sqlState = "428FR",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`t1`", "columnName" -> "`col`")
       )
@@ -2420,7 +2412,7 @@ abstract class DDLSuite extends QueryTest with DDLSuiteBase {
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE t2 ALTER COLUMN col TYPE STRING COLLATE UTF8_LCASE")
         },
-        errorClass = "CANNOT_ALTER_COLLATION_BUCKET_COLUMN",
+        condition = "CANNOT_ALTER_COLLATION_BUCKET_COLUMN",
         sqlState = "428FR",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`t2`", "columnName" -> "`col`")
       )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
new file mode 100644
index 0000000000000..02f1d012297bf
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DeclareVariableParserSuite.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.EvaluateUnresolvedInlineTable
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedAttribute, UnresolvedFunction, UnresolvedIdentifier, UnresolvedInlineTable}
+import org.apache.spark.sql.catalyst.expressions.{Add, Cast, Divide, Literal, ScalarSubquery}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.{CreateVariable, DefaultValueExpression, Project, SubqueryAlias}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{Decimal, DecimalType, DoubleType, IntegerType, MapType, NullType, StringType}
+import org.apache.spark.unsafe.types.UTF8String
+
+class DeclareVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+  test("declare variable") {
+    comparePlans(
+      parsePlan("DECLARE var1 INT = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE var1 INT"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(null, IntegerType), "null"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VAR var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 INT DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(1, IntegerType), IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE system.session.var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("system", "session", "var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE session.var1 DEFAULT 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("session", "var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = false))
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 STRING DEFAULT CURRENT_DATABASE()"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(UnresolvedFunction("CURRENT_DATABASE", Nil, isDistinct = false), StringType),
+          "CURRENT_DATABASE()"),
+        replace = false))
+    val subqueryAliasChild =
+      if (conf.getConf(SQLConf.EAGER_EVAL_OF_UNRESOLVED_INLINE_TABLE_ENABLED)) {
+        EvaluateUnresolvedInlineTable.evaluate(
+          UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil))
+      } else {
+        UnresolvedInlineTable(Seq("c1"), Seq(Literal(1)) :: Nil)
+      }
+    comparePlans(
+      parsePlan("DECLARE VARIABLE var1 INT DEFAULT (SELECT c1 FROM VALUES(1) AS T(c1))"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(ScalarSubquery(
+            Project(UnresolvedAttribute("c1") :: Nil,
+              SubqueryAlias(Seq("T"),
+                subqueryAliasChild))), IntegerType),
+          "(SELECT c1 FROM VALUES(1) AS T(c1))"),
+        replace = false))
+  }
+
+  test("declare or replace variable") {
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 = 1"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(1, IntegerType), "1"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 DOUBLE DEFAULT 1 + RAND(5)"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(
+          Cast(
+            Add(Literal(1, IntegerType),
+              UnresolvedFunction("RAND", Seq(Literal(5, IntegerType)), isDistinct = false)),
+            DoubleType),
+          "1 + RAND(5)"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 DEFAULT NULL"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Literal(null, NullType), "NULL"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE INT DEFAULT 5.0"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("INT")),
+        DefaultValueExpression(Literal(Decimal("5.0"), DecimalType(2, 1)), "5.0"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 MAP<string, double> " +
+        "DEFAULT MAP('Hello', 5.1, 'World', -7.1E10)"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(
+          UnresolvedFunction("MAP", Seq(
+            Literal(UTF8String.fromString("Hello"), StringType),
+            Literal(Decimal("5.1"), DecimalType(2, 1)),
+            Literal(UTF8String.fromString("World"), StringType),
+            Literal(-7.1E10, DoubleType)), isDistinct = false),
+          MapType(StringType, DoubleType)),
+          "MAP('Hello', 5.1, 'World', -7.1E10)"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT NULL"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(Literal(null, NullType), IntegerType), "NULL"),
+        replace = true))
+    comparePlans(
+      parsePlan("DECLARE OR REPLACE VARIABLE var1 INT DEFAULT 1 / 0"),
+      CreateVariable(
+        UnresolvedIdentifier(Seq("var1")),
+        DefaultValueExpression(Cast(
+          Divide(Literal(1, IntegerType), Literal(0, IntegerType)), IntegerType),
+          "1 / 0"),
+        replace = true))
+  }
+
+  test("declare variable - not support syntax") {
+    // IF NOT EXISTS
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DECLARE VARIABLE IF NOT EXISTS var1 INT")
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'EXISTS'", "hint" -> "")
+    )
+
+    // The datatype or default value of a variable must be specified
+    val sqlText = "DECLARE VARIABLE var1"
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan(sqlText)
+      },
+      condition = "INVALID_SQL_SYNTAX.VARIABLE_TYPE_OR_DEFAULT_REQUIRED",
+      parameters = Map.empty,
+      context = ExpectedContext(fragment = sqlText, start = 0, stop = 20)
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
index 6945352564e1e..36b17568d4716 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeNamespaceSuiteBase.scala
@@ -43,7 +43,7 @@ trait DescribeNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"DESCRIBE NAMESPACE EXTENDED $catalog.$ns")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`$ns`"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
index ee1b588741cd4..944f20bf8e924 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableParserSuite.scala
@@ -78,7 +78,7 @@ class DescribeTableParserSuite extends AnalysisTest {
     val sql = "DESCRIBE TABLE t PARTITION (ds='1970-01-01') col"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
+      condition = "UNSUPPORTED_FEATURE.DESC_TABLE_COLUMN_PARTITION",
       parameters = Map.empty,
       context = ExpectedContext(
         fragment = sql,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
index 02e8a5e689998..c4e9ff93ef85d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DescribeTableSuiteBase.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.execution.command
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
 import org.apache.spark.sql.catalyst.util.quoteIdentifier
+import org.apache.spark.sql.functions.{col, struct}
 import org.apache.spark.sql.types.{BooleanType, MetadataBuilder, StringType, StructType}
 
 /**
@@ -242,4 +243,54 @@ trait DescribeTableSuiteBase extends QueryTest with DDLCommandTestUtils {
           Row("# col_name", "data_type", "comment")))
     }
   }
+
+  test("describe a clustered table - dataframe writer v1") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      val df = spark.range(10).select(
+        col("id").cast("string").as("col1"),
+        struct(col("id").cast("int").as("x"), col("id").cast("int").as("y")).as("col2"))
+      df.write.mode("append").clusterBy("col1", "col2.x").saveAsTable(tbl)
+      val descriptionDf = sql(s"DESC $tbl")
+
+      descriptionDf.show(false)
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col1", "string", null),
+          Row("col2", "struct<x:int,y:int>", null),
+          Row("# Clustering Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("col2.x", "int", null),
+          Row("col1", "string", null)))
+    }
+  }
+
+  test("describe a clustered table - dataframe writer v2") {
+    withNamespaceAndTable("ns", "tbl") { tbl =>
+      val df = spark.range(10).select(
+        col("id").cast("string").as("col1"),
+        struct(col("id").cast("int").as("x"), col("id").cast("int").as("y")).as("col2"))
+      df.writeTo(tbl).clusterBy("col1", "col2.x").create()
+      val descriptionDf = sql(s"DESC $tbl")
+
+      descriptionDf.show(false)
+      assert(descriptionDf.schema.map(field => (field.name, field.dataType)) === Seq(
+        ("col_name", StringType),
+        ("data_type", StringType),
+        ("comment", StringType)))
+      QueryTest.checkAnswer(
+        descriptionDf,
+        Seq(
+          Row("col1", "string", null),
+          Row("col2", "struct<x:int,y:int>", null),
+          Row("# Clustering Information", "", ""),
+          Row("# col_name", "data_type", "comment"),
+          Row("col2.x", "int", null),
+          Row("col1", "string", null)))
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
index 2243517550b2c..2a7fe53a848e6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropNamespaceSuiteBase.scala
@@ -64,7 +64,7 @@ trait DropNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"DROP NAMESPACE $catalog.unknown")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`unknown`"))
   }
 
@@ -78,7 +78,7 @@ trait DropNamespaceSuiteBase extends QueryTest with DDLCommandTestUtils {
       sql(s"DROP NAMESPACE $catalog.ns")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_EMPTY",
+      condition = "SCHEMA_NOT_EMPTY",
       parameters = Map("schemaName" -> "`ns`"))
     sql(s"DROP TABLE $catalog.ns.table")
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
new file mode 100644
index 0000000000000..bc5e8c60ec812
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DropVariableParserSuite.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedIdentifier}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.parser.ParseException
+import org.apache.spark.sql.catalyst.plans.logical.DropVariable
+import org.apache.spark.sql.test.SharedSparkSession
+
+class DropVariableParserSuite extends AnalysisTest with SharedSparkSession {
+
+  test("drop variable") {
+    comparePlans(
+      parsePlan("DROP TEMPORARY VARIABLE var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY VAR var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = false))
+    comparePlans(
+      parsePlan("DROP TEMPORARY VARIABLE IF EXISTS var1"),
+      DropVariable(UnresolvedIdentifier(Seq("var1")), ifExists = true))
+  }
+
+  test("drop variable - not support syntax 'DROP VARIABLE|VAR'") {
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DROP VARIABLE var1")
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'VARIABLE'", "hint" -> "")
+    )
+    checkError(
+      exception = intercept[ParseException] {
+        parsePlan("DROP VAR var1")
+      },
+      condition = "PARSE_SYNTAX_ERROR",
+      parameters = Map("error" -> "'VAR'", "hint" -> "")
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
index f004ab7137f79..92467cbcb6c05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/PlanResolutionSuite.scala
@@ -36,7 +36,7 @@ import org.apache.spark.sql.catalyst.plans.logical.{AlterColumn, AnalysisOnlyCom
 import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLId
 import org.apache.spark.sql.connector.FakeV2Provider
-import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, ColumnDefaultValue, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, V1Table}
+import org.apache.spark.sql.connector.catalog.{CatalogManager, Column, ColumnDefaultValue, Identifier, SupportsDelete, Table, TableCapability, TableCatalog, TableWritePrivilege, V1Table}
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.connector.expressions.{LiteralValue, Transform}
 import org.apache.spark.sql.errors.QueryExecutionErrors
@@ -158,6 +158,8 @@ class PlanResolutionSuite extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn("testcat")
     newCatalog
   }
@@ -175,6 +177,8 @@ class PlanResolutionSuite extends AnalysisTest {
         case name => throw new NoSuchTableException(Seq(name))
       }
     })
+    when(newCatalog.loadTable(any(), any[java.util.Set[TableWritePrivilege]]()))
+      .thenCallRealMethod()
     when(newCatalog.name()).thenReturn(CatalogManager.SESSION_CATALOG_NAME)
     newCatalog
   }
@@ -260,7 +264,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(sql)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = parameters,
       context = context
     )
@@ -302,8 +306,8 @@ class PlanResolutionSuite extends AnalysisTest {
         exception = intercept[SparkUnsupportedOperationException] {
           parseAndResolve(query)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2067",
-        parameters = Map("transform" -> transform))
+        condition = "UNSUPPORTED_PARTITION_TRANSFORM",
+        parameters = Map("transform" -> toSQLId(transform)))
     }
   }
 
@@ -319,7 +323,7 @@ class PlanResolutionSuite extends AnalysisTest {
         exception = intercept[SparkUnsupportedOperationException] {
           parseAndResolve(query)
         },
-        errorClass = "UNSUPPORTED_FEATURE.MULTIPLE_BUCKET_TRANSFORMS",
+        condition = "UNSUPPORTED_FEATURE.MULTIPLE_BUCKET_TRANSFORMS",
         parameters = Map.empty)
     }
   }
@@ -413,7 +417,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parseAndResolve(v2)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0032",
+      condition = "_LEGACY_ERROR_TEMP_0032",
       parameters = Map("pathOne" -> "/tmp/file", "pathTwo" -> "/tmp/file"),
       context = ExpectedContext(
         fragment = v2,
@@ -759,7 +763,7 @@ class PlanResolutionSuite extends AnalysisTest {
     }
     checkError(
       e,
-      errorClass = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
+      condition = "UNSUPPORTED_FEATURE.CATALOG_OPERATION",
       parameters = Map("catalogName" -> "`testcat`", "operation" -> "views"))
   }
 
@@ -1203,7 +1207,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(sql6, checkAnalysis = true)
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`DEFAULT`", "proposal" -> "`i`, `s`"),
       context = ExpectedContext(
         fragment = "DEFAULT",
@@ -1215,7 +1219,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(sql7, checkAnalysis = true)
       },
-      errorClass = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
+      condition = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
       parameters = Map("colName" -> "`x`")
     )
   }
@@ -1263,7 +1267,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(sql2, checkAnalysis = true)
       },
-      errorClass = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
+      condition = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
       parameters = Map("colName" -> "`x`")
     )
 
@@ -1272,7 +1276,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(sql3, checkAnalysis = true)
       },
-      errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+      condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       parameters = Map(
         "tableName" -> "`testcat`.`tab2`",
         "tableColumns" -> "`i`, `x`",
@@ -1333,18 +1337,11 @@ class PlanResolutionSuite extends AnalysisTest {
             exception = intercept[AnalysisException] {
               parseAndResolve(sql3)
             },
-            errorClass = "_LEGACY_ERROR_TEMP_1331",
+            condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            sqlState = "42703",
             parameters = Map(
-              "fieldName" -> "j",
-              "table" -> "spark_catalog.default.v1Table",
-              "schema" ->
-                """root
-                  | |-- i: integer (nullable = true)
-                  | |-- s: string (nullable = true)
-                  | |-- point: struct (nullable = true)
-                  | |    |-- x: integer (nullable = true)
-                  | |    |-- y: integer (nullable = true)
-                  |""".stripMargin),
+              "objectName" -> "`j`",
+              "proposal" -> "`i`, `s`, `point`"),
             context = ExpectedContext(fragment = sql3, start = 0, stop = 55))
 
           val sql4 = s"ALTER TABLE $tblName ALTER COLUMN point.x TYPE bigint"
@@ -1353,10 +1350,19 @@ class PlanResolutionSuite extends AnalysisTest {
           }
           checkError(
             exception = e2,
-            errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+            condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
             sqlState = "0A000",
             parameters = Map("tableName" -> "`spark_catalog`.`default`.`v1Table`",
               "operation" -> "ALTER COLUMN with qualified column"))
+
+          checkError(
+            exception = intercept[AnalysisException] {
+              parseAndResolve(s"ALTER TABLE $tblName ALTER COLUMN i SET NOT NULL")
+            },
+            condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+            sqlState = "0A000",
+            parameters = Map("tableName" -> "`spark_catalog`.`default`.`v1Table`",
+              "operation" -> "ALTER COLUMN ... SET NOT NULL"))
         } else {
           parsed1 match {
             case AlterColumn(
@@ -1401,7 +1407,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parseAndResolve(sql)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map("message" ->
         "ALTER TABLE table ALTER COLUMN requires a TYPE, a SET/DROP, a COMMENT, or a FIRST/AFTER"),
       context = ExpectedContext(fragment = sql, start = 0, stop = 33))
@@ -1417,18 +1423,11 @@ class PlanResolutionSuite extends AnalysisTest {
             exception = intercept[AnalysisException] {
               parseAndResolve(sql)
             },
-            errorClass = "_LEGACY_ERROR_TEMP_1331",
+            condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+            sqlState = "42703",
             parameters = Map(
-              "fieldName" -> "I",
-              "table" -> "spark_catalog.default.v1Table",
-              "schema" ->
-                """root
-                  | |-- i: integer (nullable = true)
-                  | |-- s: string (nullable = true)
-                  | |-- point: struct (nullable = true)
-                  | |    |-- x: integer (nullable = true)
-                  | |    |-- y: integer (nullable = true)
-                  |""".stripMargin),
+              "objectName" -> "`I`",
+              "proposal" -> "`i`, `s`, `point`"),
             context = ExpectedContext(fragment = sql, start = 0, stop = 55))
         } else {
           val actual = parseAndResolve(sql)
@@ -1945,7 +1944,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(mergeWithDefaultReferenceInMergeCondition, checkAnalysis = true)
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`DEFAULT`",
         "proposal" -> "`target`.`i`, `source`.`i`, `target`.`s`, `source`.`s`"),
       context = ExpectedContext(
@@ -1974,7 +1973,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(mergeWithDefaultReferenceAsPartOfComplexExpression)
       },
-      errorClass = "DEFAULT_PLACEMENT_INVALID",
+      condition = "DEFAULT_PLACEMENT_INVALID",
       parameters = Map.empty)
 
     val mergeWithDefaultReferenceForNonNullableCol =
@@ -1989,7 +1988,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[AnalysisException] {
         parseAndResolve(mergeWithDefaultReferenceForNonNullableCol)
       },
-      errorClass = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
+      condition = "NO_DEFAULT_COLUMN_VALUE_AVAILABLE",
       parameters = Map("colName" -> "`x`")
     )
 
@@ -2094,7 +2093,7 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `i` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql2)),
-        errorClass = "AMBIGUOUS_REFERENCE",
+        condition = "AMBIGUOUS_REFERENCE",
         parameters = Map("name" -> "`i`", "referenceNames" -> referenceNames(target, "i")),
         context = ExpectedContext(
           fragment = "i",
@@ -2110,7 +2109,7 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql3)),
-        errorClass = "AMBIGUOUS_REFERENCE",
+        condition = "AMBIGUOUS_REFERENCE",
         parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
@@ -2126,7 +2125,7 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql4)),
-        errorClass = "AMBIGUOUS_REFERENCE",
+        condition = "AMBIGUOUS_REFERENCE",
         parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
@@ -2142,7 +2141,7 @@ class PlanResolutionSuite extends AnalysisTest {
       // resolve column `s` as it's ambiguous.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql5)),
-        errorClass = "AMBIGUOUS_REFERENCE",
+        condition = "AMBIGUOUS_REFERENCE",
         parameters = Map("name" -> "`s`", "referenceNames" -> referenceNames(target, "s")),
         context = ExpectedContext(
           fragment = "s",
@@ -2202,7 +2201,7 @@ class PlanResolutionSuite extends AnalysisTest {
       // update value in not matched by source clause can only reference the target table.
       checkError(
         exception = intercept[AnalysisException](parseAndResolve(sql7)),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> s"${toSQLId(source)}.`s`", "proposal" -> "`i`, `s`"),
         context = ExpectedContext(
           fragment = s"$source.s",
@@ -2236,7 +2235,7 @@ class PlanResolutionSuite extends AnalysisTest {
          |WHEN MATCHED THEN UPDATE SET *""".stripMargin
     checkError(
       exception = intercept[AnalysisException](parseAndResolve(sql2)),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`s`", "proposal" -> "`i`, `x`"),
       context = ExpectedContext(fragment = sql2, start = 0, stop = 80))
 
@@ -2248,7 +2247,7 @@ class PlanResolutionSuite extends AnalysisTest {
         |WHEN NOT MATCHED THEN INSERT *""".stripMargin
     checkError(
       exception = intercept[AnalysisException](parseAndResolve(sql3)),
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`s`", "proposal" -> "`i`, `x`"),
       context = ExpectedContext(fragment = sql3, start = 0, stop = 80))
 
@@ -2443,7 +2442,7 @@ class PlanResolutionSuite extends AnalysisTest {
     val sql = "CREATE TABLE my_tab(a: INT COMMENT 'test', b: STRING)"
     checkError(
       exception = parseException(parsePlan)(sql),
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "':'", "hint" -> ""))
   }
 
@@ -2592,49 +2591,49 @@ class PlanResolutionSuite extends AnalysisTest {
     val sql1 = createTableHeader("TBLPROPERTIES('test' = 'test2')")
     checkError(
       exception = parseException(parsePlan)(sql1),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "TBLPROPERTIES"),
       context = ExpectedContext(fragment = sql1, start = 0, stop = 117))
 
     val sql2 = createTableHeader("LOCATION '/tmp/file'")
     checkError(
       exception = parseException(parsePlan)(sql2),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "LOCATION"),
       context = ExpectedContext(fragment = sql2, start = 0, stop = 95))
 
     val sql3 = createTableHeader("COMMENT 'a table'")
     checkError(
       exception = parseException(parsePlan)(sql3),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "COMMENT"),
       context = ExpectedContext(fragment = sql3, start = 0, stop = 89))
 
     val sql4 = createTableHeader("CLUSTERED BY(b) INTO 256 BUCKETS")
     checkError(
       exception = parseException(parsePlan)(sql4),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "CLUSTERED BY"),
       context = ExpectedContext(fragment = sql4, start = 0, stop = 119))
 
     val sql5 = createTableHeader("PARTITIONED BY (k int)")
     checkError(
       exception = parseException(parsePlan)(sql5),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "PARTITIONED BY"),
       context = ExpectedContext(fragment = sql5, start = 0, stop = 99))
 
     val sql6 = createTableHeader("STORED AS parquet")
     checkError(
       exception = parseException(parsePlan)(sql6),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "STORED AS/BY"),
       context = ExpectedContext(fragment = sql6, start = 0, stop = 89))
 
     val sql7 = createTableHeader("ROW FORMAT SERDE 'parquet.hive.serde.ParquetHiveSerDe'")
     checkError(
       exception = parseException(parsePlan)(sql7),
-      errorClass = "DUPLICATE_CLAUSES",
+      condition = "DUPLICATE_CLAUSES",
       parameters = Map("clauseName" -> "ROW FORMAT"),
       context = ExpectedContext(fragment = sql7, start = 0, stop = 163))
   }
@@ -2775,7 +2774,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         extractTableDesc(s4)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "STORED BY"),
       context = ExpectedContext(
         fragment = "STORED BY 'storage.handler.class.name'",
@@ -2868,7 +2867,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map(
         "message" -> "CREATE TEMPORARY TABLE ..., use CREATE TEMPORARY VIEW instead"),
       context = ExpectedContext(fragment = query, start = 0, stop = 48))
@@ -2940,7 +2939,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(fragment = query1, start = 0, stop = 72))
 
@@ -2949,7 +2948,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query2)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(fragment = query2, start = 0, stop = 96))
 
@@ -2958,7 +2957,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query3)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "CREATE TABLE ... SKEWED BY"),
       context = ExpectedContext(fragment = query3, start = 0, stop = 118))
   }
@@ -3013,7 +3012,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query1)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "STORED BY"),
       context = ExpectedContext(
         fragment = "STORED BY 'org.papachi.StorageHandler'",
@@ -3025,7 +3024,7 @@ class PlanResolutionSuite extends AnalysisTest {
       exception = intercept[ParseException] {
         parsePlan(query2)
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> "STORED BY"),
       context = ExpectedContext(
         fragment = "STORED BY 'org.mamachi.StorageHandler' WITH SERDEPROPERTIES ('k1'='v1')",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsParserSuite.scala
new file mode 100644
index 0000000000000..c93beaa10ec13
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsParserSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import org.apache.spark.sql.catalyst.analysis.{AnalysisTest, UnresolvedTableOrView}
+import org.apache.spark.sql.catalyst.parser.CatalystSqlParser.parsePlan
+import org.apache.spark.sql.catalyst.plans.logical.ShowColumns
+
+class ShowColumnsParserSuite extends AnalysisTest {
+
+  test("show columns") {
+    comparePlans(
+      parsePlan("SHOW COLUMNS IN a.b.c"),
+      ShowColumns(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW COLUMNS", allowTempView = true),
+        None))
+    comparePlans(
+      parsePlan("SHOW COLUMNS FROM a.b.c"),
+      ShowColumns(
+        UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW COLUMNS", allowTempView = true),
+        None))
+    comparePlans(
+      parsePlan("SHOW COLUMNS IN a.b.c FROM a.b"),
+      ShowColumns(UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW COLUMNS", allowTempView = true),
+        Some(Seq("a", "b"))))
+    comparePlans(
+      parsePlan("SHOW COLUMNS FROM a.b.c IN a.b"),
+      ShowColumns(UnresolvedTableOrView(Seq("a", "b", "c"), "SHOW COLUMNS", allowTempView = true),
+        Some(Seq("a", "b"))))
+  }
+
+  test("illegal characters in unquoted identifier") {
+    checkError(
+      exception = parseException(parsePlan)("SHOW COLUMNS IN t FROM test-db"),
+      condition = "INVALID_IDENTIFIER",
+      sqlState = "42602",
+      parameters = Map("ident" -> "test-db")
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsSuiteBase.scala
new file mode 100644
index 0000000000000..54bc10d0024f0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowColumnsSuiteBase.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command
+
+import java.util.Locale
+
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * This base suite contains unified tests for the `SHOW COLUMNS ...` command that
+ * check V1 and V2 table catalogs. The tests that cannot run for all supported catalogs are
+ * located in more specific test suites:
+ *
+ *   - V2 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v2.ShowColumnsSuite`
+ *   - V1 table catalog tests:
+ *     `org.apache.spark.sql.execution.command.v1.ShowColumnsSuiteBase`
+ *     - V1 In-Memory catalog:
+ *       `org.apache.spark.sql.execution.command.v1.ShowColumnsSuite`
+ *     - V1 Hive External catalog:
+ *       `org.apache.spark.sql.hive.execution.command.ShowColumnsSuite`
+ */
+trait ShowColumnsSuiteBase extends QueryTest with DDLCommandTestUtils {
+  override val command = "SHOW COLUMNS ..."
+
+  test("basic test") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(col1 int, col2 string) $defaultUsing")
+      val expected = Seq(Row("col1"), Row("col2"))
+      checkAnswer(sql(s"SHOW COLUMNS FROM $t IN ns"), expected)
+      checkAnswer(sql(s"SHOW COLUMNS IN $t FROM ns"), expected)
+      checkAnswer(sql(s"SHOW COLUMNS IN $t"), expected)
+    }
+  }
+
+  test("negative test - the table does not exist") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(col1 int, col2 string) $defaultUsing")
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(s"SHOW COLUMNS IN tbl IN ns1")
+        },
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
+        parameters = Map("relationName" -> "`ns1`.`tbl`"),
+        context = ExpectedContext(fragment = "tbl", start = 16, stop = 18)
+      )
+    }
+  }
+
+  test("the namespace of the table conflicts with the specified namespace") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(col1 int, col2 string) $defaultUsing")
+
+      val sqlText1 = s"SHOW COLUMNS IN $t IN ns1"
+      val sqlText2 = s"SHOW COLUMNS IN $t FROM ${"ns".toUpperCase(Locale.ROOT)}"
+
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql(sqlText1)
+        },
+        condition = "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
+        parameters = Map(
+          "namespaceA" -> s"`ns1`",
+          "namespaceB" -> s"`ns`"
+        )
+      )
+      // When case sensitivity is true, the user supplied namespace name in table identifier
+      // should match the supplied namespace name in case-sensitive way.
+      withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+        checkError(
+          exception = intercept[AnalysisException] {
+            sql(sqlText2)
+          },
+          condition = "SHOW_COLUMNS_WITH_CONFLICT_NAMESPACE",
+          parameters = Map(
+            "namespaceA" -> s"`${"ns".toUpperCase(Locale.ROOT)}`",
+            "namespaceB" -> "`ns`"
+          )
+        )
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
index 3a5d57c5c7821..455689026d7d4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowPartitionsParserSuite.scala
@@ -47,7 +47,7 @@ class ShowPartitionsParserSuite extends AnalysisTest {
   test("empty values in non-optional partition specs") {
     checkError(
       exception = parseException(parsePlan)("SHOW PARTITIONS dbx.tab1 PARTITION (a='1', b)"),
-      errorClass = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
+      condition = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
       sqlState = "42000",
       parameters = Map("partKey" -> "`b`"),
       context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
index 1890726a376ba..f6a5f6a7da26a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/ShowTablesSuiteBase.scala
@@ -168,7 +168,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
       exception = intercept[AnalysisException] {
         sql(s"SHOW TABLES IN $catalog.nonexist")
       },
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`nonexist`"))
   }
 
@@ -177,7 +177,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
       exception = intercept[AnalysisException] {
         sql(s"SHOW TABLE EXTENDED IN $catalog.nonexist LIKE '*tbl*'")
       },
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`nonexist`"))
   }
 
@@ -202,7 +202,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
         exception = intercept[AnalysisException] {
           sql(s"SHOW TABLE EXTENDED IN $catalog.$namespace LIKE '$table' PARTITION(id = 2)")
         },
-        errorClass = "PARTITIONS_NOT_FOUND",
+        condition = "PARTITIONS_NOT_FOUND",
         parameters = Map(
           "partitionList" -> "PARTITION (`id` = 2)",
           "tableName" -> "`ns1`.`tbl`"
@@ -220,7 +220,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
         sql(s"SHOW TABLE EXTENDED IN $catalog.$namespace LIKE '$table' PARTITION(id = 1)")
       }
       val (errorClass, parameters) = extendedPartInNonPartedTableError(catalog, namespace, table)
-      checkError(exception = e, errorClass = errorClass, parameters = parameters)
+      checkError(exception = e, condition = errorClass, parameters = parameters)
     }
   }
 
@@ -261,7 +261,7 @@ trait ShowTablesSuiteBase extends QueryTest with DDLCommandTestUtils {
           sql(s"SHOW TABLE EXTENDED IN $catalog.$namespace " +
             s"LIKE '$table' PARTITION(id1 = 1)")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "id1",
           "partitionColumnNames" -> "id1, id2",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
index b903681e341f9..be37495acad05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableParserSuite.scala
@@ -47,7 +47,7 @@ class TruncateTableParserSuite extends AnalysisTest {
   test("empty values in non-optional partition specs") {
     checkError(
       exception = parseException(parsePlan)("TRUNCATE TABLE dbx.tab1 PARTITION (a='1', b)"),
-      errorClass = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
+      condition = "INVALID_SQL_SYNTAX.EMPTY_PARTITION_VALUE",
       sqlState = "42000",
       parameters = Map("partKey" -> "`b`"),
       context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
index 982c568d09a79..8c985ea1f0527 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/TruncateTableSuiteBase.scala
@@ -181,7 +181,7 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
           exception = intercept[AnalysisException] {
             sql("TRUNCATE TABLE v0")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> "`spark_catalog`.`default`.`v0`",
             "operation" -> "TRUNCATE TABLE"),
@@ -198,7 +198,7 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
           exception = intercept[AnalysisException] {
             sql("TRUNCATE TABLE v1")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> "`v1`",
             "operation" -> "TRUNCATE TABLE"),
@@ -213,7 +213,7 @@ trait TruncateTableSuiteBase extends QueryTest with DDLCommandTestUtils {
           exception = intercept[AnalysisException] {
             sql(s"TRUNCATE TABLE $v2")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> "`global_temp`.`v2`",
             "operation" -> "TRUNCATE TABLE"),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
index dac99c8ff7023..fea0d07278c1a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableAddPartitionSuite.scala
@@ -43,7 +43,7 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t ADD PARTITION (p1 = '')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([p1=]) contains an empty partition column value"
         )
@@ -155,7 +155,7 @@ trait AlterTableAddPartitionSuiteBase extends command.AlterTableAddPartitionSuit
           " PARTITION (id=2) LOCATION 'loc1'")
       }
       checkError(e,
-        errorClass = "PARTITIONS_ALREADY_EXIST",
+        condition = "PARTITIONS_ALREADY_EXIST",
         parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
           "tableName" -> "`ns`.`tbl`"))
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropColumnSuite.scala
index 6370a834746a5..85c7e66bdbe57 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropColumnSuite.scala
@@ -40,7 +40,7 @@ trait AlterTableDropColumnSuiteBase extends command.AlterTableDropColumnSuiteBas
         exception = intercept[AnalysisException](
           sql(s"ALTER TABLE $t DROP COLUMN id")
         ),
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         parameters = Map(
           "tableName" -> toSQLId(t),
           "operation" -> "DROP COLUMN"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
index 8d403429ca5d2..384aadfb3a6f4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableDropPartitionSuite.scala
@@ -83,7 +83,7 @@ class AlterTableDropPartitionSuite
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t DROP PARTITION (p1 = '')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map("details" -> "The spec ([p1=]) contains an empty partition column value")
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
index b219e21a3d881..54c0e7883ccda 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRecoverPartitionsSuite.scala
@@ -123,7 +123,7 @@ trait AlterTableRecoverPartitionsSuiteBase extends command.AlterTableRecoverPart
       }
       checkError(
         exception = exception,
-        errorClass = "NOT_A_PARTITIONED_TABLE",
+        condition = "NOT_A_PARTITIONED_TABLE",
         parameters = Map(
           "operation" -> "ALTER TABLE RECOVER PARTITIONS",
           "tableIdentWithDB" -> "`spark_catalog`.`default`.`tbl`")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameColumnSuite.scala
index 86b34311bfb3d..a6b43ad4d5a78 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameColumnSuite.scala
@@ -39,7 +39,7 @@ trait AlterTableRenameColumnSuiteBase extends command.AlterTableRenameColumnSuit
         exception = intercept[AnalysisException](
           sql(s"ALTER TABLE $t RENAME COLUMN col1 TO col3")
         ),
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         parameters = Map(
           "tableName" -> toSQLId(t),
           "operation" -> "RENAME COLUMN"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
index dfbdc6a4ca78e..f8708d5bff25a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableRenameSuite.scala
@@ -41,7 +41,7 @@ trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase with Q
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $src RENAME TO dst_ns.dst_tbl")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1073",
+          condition = "_LEGACY_ERROR_TEMP_1073",
           parameters = Map("db" -> "src_ns", "newDb" -> "dst_ns")
         )
       }
@@ -75,7 +75,7 @@ trait AlterTableRenameSuiteBase extends command.AlterTableRenameSuiteBase with Q
           exception = intercept[SparkRuntimeException] {
             sql(s"ALTER TABLE $src RENAME TO ns.dst_tbl")
           },
-          errorClass = "LOCATION_ALREADY_EXISTS",
+          condition = "LOCATION_ALREADY_EXISTS",
           parameters = Map(
             "location" -> s"'$dst_dir'",
             "identifier" -> toSQLId(dst)))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
index 53b9853f36c8c..8f5af2e1f2e76 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetLocationSuite.scala
@@ -93,7 +93,7 @@ trait AlterTableSetLocationSuiteBase extends command.AlterTableSetLocationSuiteB
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $t PARTITION (A='1', B='2') SET LOCATION '/path/to/part/ways3'")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1231",
+          condition = "_LEGACY_ERROR_TEMP_1231",
           parameters = Map("key" -> "A", "tblName" -> "`spark_catalog`.`ns`.`tbl`")
         )
       }
@@ -127,7 +127,7 @@ trait AlterTableSetLocationSuiteBase extends command.AlterTableSetLocationSuiteB
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t PARTITION (b='2') SET LOCATION '/mister/spark'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1232",
+        condition = "_LEGACY_ERROR_TEMP_1232",
         parameters = Map(
           "specKeys" -> "b",
           "partitionColumnNames" -> "a, b",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala
index 6e4d6a8a0c8f0..99ccec442fd41 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/AlterTableSetSerdeSuite.scala
@@ -19,6 +19,7 @@ package org.apache.spark.sql.execution.command.v1
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.TableIdentifier
+import org.apache.spark.sql.errors.DataTypeErrors.toSQLId
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
@@ -86,15 +87,15 @@ class AlterTableSetSerdeSuite extends AlterTableSetSerdeSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t SET SERDE 'whatever'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1248",
-        parameters = Map.empty)
+        condition = "UNSUPPORTED_FEATURE.ALTER_TABLE_SERDE_FOR_DATASOURCE_TABLE",
+        parameters = Map("tableName" -> toSQLId(t)))
       checkError(
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t SET SERDE 'org.apache.madoop' " +
             "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1248",
-        parameters = Map.empty)
+        condition = "UNSUPPORTED_FEATURE.ALTER_TABLE_SERDE_FOR_DATASOURCE_TABLE",
+        parameters = Map("tableName" -> toSQLId(t)))
 
       // set serde properties only
       sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
@@ -133,14 +134,14 @@ class AlterTableSetSerdeSuite extends AlterTableSetSerdeSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'whatever'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        condition = "_LEGACY_ERROR_TEMP_1247",
         parameters = Map.empty)
       checkError(
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t PARTITION (a=1, b=2) SET SERDE 'org.apache.madoop' " +
             "WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        condition = "_LEGACY_ERROR_TEMP_1247",
         parameters = Map.empty)
 
       // set serde properties only
@@ -149,7 +150,7 @@ class AlterTableSetSerdeSuite extends AlterTableSetSerdeSuiteBase with CommandSu
           sql(s"ALTER TABLE $t PARTITION (a=1, b=2) " +
             "SET SERDEPROPERTIES ('k' = 'vvv', 'kay' = 'vee')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        condition = "_LEGACY_ERROR_TEMP_1247",
         parameters = Map.empty)
 
       // set things without explicitly specifying database
@@ -158,7 +159,7 @@ class AlterTableSetSerdeSuite extends AlterTableSetSerdeSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE tbl PARTITION (a=1, b=2) SET SERDEPROPERTIES ('kay' = 'veee')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1247",
+        condition = "_LEGACY_ERROR_TEMP_1247",
         parameters = Map.empty)
 
       // table to alter does not exist
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
index 02cf1958b9499..eaf016ac2fa9f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DescribeTableSuite.scala
@@ -47,7 +47,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
         sql(s"DESCRIBE TABLE $tbl PARTITION (id = 1)")
       }
       checkError(e,
-        errorClass = "PARTITIONS_NOT_FOUND",
+        condition = "PARTITIONS_NOT_FOUND",
         parameters = Map("partitionList" -> "PARTITION (`id` = 1)",
           "tableName" -> "`ns`.`table`"))
     }
@@ -63,7 +63,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
         exception = intercept[AnalysisException] {
           sql(s"DESC $tbl key1").collect()
         },
-        errorClass = "COLUMN_NOT_FOUND",
+        condition = "COLUMN_NOT_FOUND",
         parameters = Map(
           "colName" -> "`key1`",
           "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
@@ -89,7 +89,7 @@ trait DescribeTableSuiteBase extends command.DescribeTableSuiteBase
           exception = intercept[AnalysisException] {
             sql(s"DESC $tbl KEY").collect()
           },
-          errorClass = "COLUMN_NOT_FOUND",
+          condition = "COLUMN_NOT_FOUND",
           parameters = Map(
             "colName" -> "`KEY`",
             "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
index cec72b8855291..f3f9369ea062c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/DropNamespaceSuite.scala
@@ -39,7 +39,7 @@ trait DropNamespaceSuiteBase extends command.DropNamespaceSuiteBase
       exception = intercept[AnalysisException] {
         sql(s"DROP NAMESPACE default")
       },
-      errorClass = "UNSUPPORTED_FEATURE.DROP_DATABASE",
+      condition = "UNSUPPORTED_FEATURE.DROP_DATABASE",
       parameters = Map("database" -> s"`$catalog`.`default`")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowColumnsSuite.scala
new file mode 100644
index 0000000000000..3e8ac98dbf767
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowColumnsSuite.scala
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v1
+
+import org.apache.spark.sql.AnalysisException
+import org.apache.spark.sql.execution.command
+
+/**
+ * This base suite contains unified tests for the `SHOW COLUMNS ...` command that check V1 table
+ * catalogs. The tests that cannot run for all V1 catalogs are located in more specific test suites:
+ *
+ *   - V1 In-Memory catalog:
+ *      `org.apache.spark.sql.execution.command.v1.ShowColumnsSuite`
+ *   - V1 Hive External catalog:
+ *     `org.apache.spark.sql.hive.execution.command.ShowColumnsSuite`
+ */
+trait ShowColumnsSuiteBase extends command.ShowColumnsSuiteBase {
+
+  test("invalid db name") {
+    withNamespaceAndTable("ns", "tbl") { t =>
+      sql(s"CREATE TABLE $t(col1 int, col2 string) $defaultUsing")
+      checkError(
+        exception = intercept[AnalysisException] {
+          sql("SHOW COLUMNS IN tbl FROM a.b.c")
+        },
+        condition = "REQUIRES_SINGLE_PART_NAMESPACE",
+        parameters = Map(
+          "sessionCatalog" -> catalog,
+          "namespace" -> "`a`.`b`.`c`"
+        )
+      )
+    }
+  }
+}
+
+/**
+ * The class contains tests for the `SHOW COLUMNS ...` command to check V1 In-Memory
+ * table catalog.
+ */
+class ShowColumnsSuite extends ShowColumnsSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
index 36fde23db5c03..afbb943bf91f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowCreateTableSuite.scala
@@ -162,8 +162,9 @@ trait ShowCreateTableSuiteBase extends command.ShowCreateTableSuiteBase
         exception = intercept[AnalysisException] {
           getShowCreateDDL(t, true)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1274",
-        parameters = Map("table" -> "`spark_catalog`.`ns1`.`tbl`")
+        condition = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_DATA_SOURCE_TABLE_WITH_AS_SERDE",
+        sqlState = "0A000",
+        parameters = Map("tableName" -> "`spark_catalog`.`ns1`.`tbl`")
       )
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
index 85a46cfb93233..30189b46db4ba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowNamespacesSuite.scala
@@ -49,7 +49,7 @@ trait ShowNamespacesSuiteBase extends command.ShowNamespacesSuiteBase {
       sql("SHOW NAMESPACES in dummy")
     }
     checkError(e,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> s"`$catalog`.`dummy`"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
index 9863942c6ea19..0f64fa49f4862 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowPartitionsSuite.scala
@@ -57,7 +57,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"SHOW PARTITIONS $view")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`spark_catalog`.`default`.`view1`",
             "operation" -> "SHOW PARTITIONS"
@@ -80,7 +80,7 @@ trait ShowPartitionsSuiteBase extends command.ShowPartitionsSuiteBase {
         exception = intercept[AnalysisException] {
           sql(s"SHOW PARTITIONS $viewName")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> "`test_view`",
           "operation" -> "SHOW PARTITIONS"
@@ -124,7 +124,7 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase
         exception = intercept[AnalysisException] {
           sql(s"SHOW PARTITIONS $viewName")
         },
-        errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+        condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
         parameters = Map(
           "viewName" -> "`test_view`",
           "operation" -> "SHOW PARTITIONS"
@@ -163,7 +163,7 @@ class ShowPartitionsSuite extends ShowPartitionsSuiteBase with CommandSuiteBase
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY",
         parameters = Map("name" -> tableName))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
index 9be802b5f1fea..92bea4d8655c5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/ShowTablesSuite.scala
@@ -56,7 +56,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.Tests
       exception = intercept[AnalysisException] {
         runShowTablesSql("SHOW TABLES FROM a.b", Seq())
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1126",
+      condition = "_LEGACY_ERROR_TEMP_1126",
       parameters = Map("catalog" -> "a.b")
     )
   }
@@ -102,7 +102,7 @@ trait ShowTablesSuiteBase extends command.ShowTablesSuiteBase with command.Tests
         exception = intercept[AnalysisException] {
           sql(showTableCmd)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1125",
+        condition = "MISSING_DATABASE_FOR_V1_SESSION_CATALOG",
         parameters = Map.empty
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
index cd0a057284705..40ae35bbe8aa3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v1/TruncateTableSuite.scala
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.permission.{AclEntry, AclEntryScope, AclEntryType, F
 
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.command
 import org.apache.spark.sql.execution.command.FakeLocalFsFileSystem
 import org.apache.spark.sql.internal.SQLConf
@@ -146,7 +147,8 @@ trait TruncateTableSuiteBase extends command.TruncateTableSuiteBase {
           spark.table(t)
 
           val catalog = spark.sessionState.catalog
-          val qualifiedTableName = QualifiedTableName("ns", "tbl")
+          val qualifiedTableName =
+            QualifiedTableName(CatalogManager.SESSION_CATALOG_NAME, "ns", "tbl")
           val cachedPlan = catalog.getCachedTable(qualifiedTableName)
           assert(cachedPlan.stats.sizeInBytes == 0)
         }
@@ -199,7 +201,7 @@ class TruncateTableSuite extends TruncateTableSuiteBase with CommandSuiteBase {
           exception = intercept[AnalysisException] {
             sql(s"TRUNCATE TABLE $t")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1266",
+          condition = "_LEGACY_ERROR_TEMP_1266",
           parameters = Map("tableIdentWithDB" -> "`spark_catalog`.`ns`.`tbl`")
         )
       }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
index defa026c0e281..e3b6a9b5e6107 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableAddPartitionSuite.scala
@@ -43,7 +43,7 @@ class AlterTableAddPartitionSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
         parameters = Map("name" -> tableName),
         context = ExpectedContext(
           fragment = t,
@@ -126,7 +126,7 @@ class AlterTableAddPartitionSuite
           " PARTITION (id=2) LOCATION 'loc1'")
       }
       checkError(e,
-        errorClass = "PARTITIONS_ALREADY_EXIST",
+        condition = "PARTITIONS_ALREADY_EXIST",
         parameters = Map("partitionList" -> "PARTITION (`id` = 2)",
         "tableName" -> "`test_catalog`.`ns`.`tbl`"))
 
@@ -146,9 +146,8 @@ class AlterTableAddPartitionSuite
             exception = intercept[SparkNumberFormatException] {
               sql(s"ALTER TABLE $t ADD PARTITION (p='aaa')")
             },
-            errorClass = "CAST_INVALID_INPUT",
+            condition = "CAST_INVALID_INPUT",
             parameters = Map(
-              "ansiConfig" -> "\"spark.sql.ansi.enabled\"",
               "expression" -> "'aaa'",
               "sourceType" -> "\"STRING\"",
               "targetType" -> "\"INT\""),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropColumnSuite.scala
index 3adcc42c6be5f..6cd9ed2628dbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropColumnSuite.scala
@@ -45,7 +45,7 @@ class AlterTableDropColumnSuite
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE does_not_exist DROP COLUMN id")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`does_not_exist`"),
         context = ExpectedContext(fragment = "does_not_exist", start = 12, stop = 25)
       )
@@ -127,16 +127,12 @@ class AlterTableDropColumnSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "does_not_exist",
-          "table" -> t,
-          "schema" ->
-            """root
-              | |-- id: integer (nullable = true)
-              |""".stripMargin),
-        context = ExpectedContext(fragment = sqlText, start = 0, stop = 57)
-      )
+          "objectName" -> "`does_not_exist`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = 57))
     }
   }
 
@@ -149,16 +145,12 @@ class AlterTableDropColumnSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "point.does_not_exist",
-          "table" -> t,
-          "schema" ->
-            """root
-              | |-- id: integer (nullable = true)
-              |""".stripMargin),
-        context = ExpectedContext(fragment = sqlText, start = 0, stop = 63)
-      )
+          "objectName" -> "`point`.`does_not_exist`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = 63))
 
       // with if exists it should pass
       sql(s"ALTER TABLE $t DROP COLUMN IF EXISTS point.does_not_exist")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
index 2df7eebaecc81..35afb00ff0f38 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableDropPartitionSuite.scala
@@ -43,7 +43,7 @@ class AlterTableDropPartitionSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
         parameters = Map("name" -> tableName),
         context = ExpectedContext(
           fragment = t,
@@ -61,7 +61,7 @@ class AlterTableDropPartitionSuite
           exception = intercept[SparkUnsupportedOperationException] {
             sql(s"ALTER TABLE $t DROP PARTITION (id=1) PURGE")
           },
-          errorClass = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
+          condition = "UNSUPPORTED_FEATURE.PURGE_PARTITION",
           parameters = Map.empty
         )
       } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRecoverPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRecoverPartitionsSuite.scala
index ff6ff0df5306a..508b8e9d0339d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRecoverPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRecoverPartitionsSuite.scala
@@ -35,7 +35,7 @@ class AlterTableRecoverPartitionsSuite
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t RECOVER PARTITIONS")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
+        condition = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
         parameters = Map("cmd" -> "ALTER TABLE ... RECOVER PARTITIONS")
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenameColumnSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenameColumnSuite.scala
index a15d141822611..6edf9ee4a10d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenameColumnSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableRenameColumnSuite.scala
@@ -45,7 +45,7 @@ class AlterTableRenameColumnSuite
         exception = intercept[AnalysisException] {
           sql("ALTER TABLE does_not_exist RENAME COLUMN col1 TO col3")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`does_not_exist`"),
         context = ExpectedContext(fragment = "does_not_exist", start = 12, stop = 25)
       )
@@ -153,16 +153,12 @@ class AlterTableRenameColumnSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "does_not_exist",
-          "table" -> t,
-          "schema" ->
-            """root
-              | |-- id: integer (nullable = true)
-              |""".stripMargin),
-        context = ExpectedContext(fragment = sqlText, start = 0, stop = 64)
-      )
+          "objectName" -> "`does_not_exist`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = 64))
     }
   }
 
@@ -175,16 +171,12 @@ class AlterTableRenameColumnSuite
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "point.does_not_exist",
-          "table" -> t,
-          "schema" ->
-            """root
-              | |-- id: integer (nullable = true)
-              |""".stripMargin),
-        context = ExpectedContext(fragment = sqlText, start = 0, stop = 70)
-      )
+          "objectName" -> "`point`.`does_not_exist`",
+          "proposal" -> "`id`"),
+        context = ExpectedContext(fragment = sqlText, start = 0, stop = 70))
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableReplaceColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableReplaceColumnsSuite.scala
index 599820d7622d4..4afe294549f45 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableReplaceColumnsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableReplaceColumnsSuite.scala
@@ -35,7 +35,7 @@ class AlterTableReplaceColumnsSuite
       exception = intercept[ParseException] {
         sql(sql1)
       },
-      errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITHOUT_SUGGESTION",
+      condition = "UNSUPPORTED_DEFAULT_VALUE.WITHOUT_SUGGESTION",
       parameters = Map.empty,
       context = ExpectedContext(sql1, 0, 48)
     )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala
index 13f6b8d5b33bb..feb00ce0ec69f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetLocationSuite.scala
@@ -60,7 +60,7 @@ class AlterTableSetLocationSuite
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t PARTITION(ds='2017-06-10') SET LOCATION 's3://bucket/path'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1045",
+        condition = "_LEGACY_ERROR_TEMP_1045",
         parameters = Map.empty
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala
index d17bab99d01fe..971a5cd077bc1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/AlterTableSetSerdeSuite.scala
@@ -40,7 +40,7 @@ class AlterTableSetSerdeSuite extends command.AlterTableSetSerdeSuiteBase with C
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $t SET SERDEPROPERTIES ('columns'='foo,bar', 'field.delim' = ',')")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
+        condition = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
         sqlState = "0A000",
         parameters = Map("cmd" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]")
       )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
index cfd26c09bf3e5..9cd7f0d8aade6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DescribeTableSuite.scala
@@ -106,7 +106,7 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
         exception = intercept[AnalysisException] {
           sql(query).collect()
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = "42703",
         parameters = Map(
           "objectName" -> "`key1`",
@@ -137,7 +137,7 @@ class DescribeTableSuite extends command.DescribeTableSuiteBase
           exception = intercept[AnalysisException] {
             sql(query).collect()
           },
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           sqlState = "42703",
           parameters = Map(
             "objectName" -> "`KEY`",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
index 83bded7ab4f52..ffc2c6c679a8b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/DropTableSuite.scala
@@ -35,7 +35,7 @@ class DropTableSuite extends command.DropTableSuiteBase with CommandSuiteBase {
         exception = intercept[SparkUnsupportedOperationException] {
           sql(s"DROP TABLE $catalog.ns.tbl PURGE")
         },
-        errorClass = "UNSUPPORTED_FEATURE.PURGE_TABLE",
+        condition = "UNSUPPORTED_FEATURE.PURGE_TABLE",
         parameters = Map.empty
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/MsckRepairTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/MsckRepairTableSuite.scala
index 381e55b49393c..73764e88bffa9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/MsckRepairTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/MsckRepairTableSuite.scala
@@ -36,7 +36,7 @@ class MsckRepairTableSuite
         exception = intercept[AnalysisException] {
           sql(s"MSCK REPAIR TABLE $t")
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
+        condition = "NOT_SUPPORTED_COMMAND_FOR_V2_TABLE",
         parameters = Map("cmd" -> "MSCK REPAIR TABLE")
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowColumnsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowColumnsSuite.scala
new file mode 100644
index 0000000000000..64ddce85658e8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowColumnsSuite.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.command.v2
+
+import org.apache.spark.sql.execution.command
+
+/**
+ * The class contains tests for the `SHOW COLUMNS ...` command to check V2 table catalogs.
+ */
+class ShowColumnsSuite extends command.ShowColumnsSuiteBase with CommandSuiteBase
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
index 8e1bb08162e3e..44a1bcad46a03 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowNamespacesSuite.scala
@@ -44,7 +44,7 @@ class ShowNamespacesSuite extends command.ShowNamespacesSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql("SHOW NAMESPACES")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1184",
+        condition = "_LEGACY_ERROR_TEMP_1184",
         parameters = Map(
           "plugin" -> "testcat_no_namespace",
           "ability" -> "namespaces"
@@ -58,7 +58,7 @@ class ShowNamespacesSuite extends command.ShowNamespacesSuiteBase with CommandSu
       exception = intercept[AnalysisException] {
         sql("SHOW NAMESPACES in testcat_no_namespace")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1184",
+      condition = "_LEGACY_ERROR_TEMP_1184",
       parameters = Map(
         "plugin" -> "testcat_no_namespace",
         "ability" -> "namespaces"
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
index 203ef4314ad25..1fb1c48890607 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/ShowPartitionsSuite.scala
@@ -38,7 +38,7 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_SCHEMA_IS_EMPTY",
         parameters = Map("name" -> tableName),
         context = ExpectedContext(
           fragment = t,
@@ -61,7 +61,7 @@ class ShowPartitionsSuite extends command.ShowPartitionsSuiteBase with CommandSu
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
         parameters = Map("name" -> tableName),
         context = ExpectedContext(
           fragment = table,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/TruncateTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/TruncateTableSuite.scala
index 36b994c21a083..972511a470465 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/TruncateTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/v2/TruncateTableSuite.scala
@@ -41,7 +41,7 @@ class TruncateTableSuite extends command.TruncateTableSuiteBase with CommandSuit
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
+        condition = "INVALID_PARTITION_OPERATION.PARTITION_MANAGEMENT_IS_UNSUPPORTED",
         parameters = Map("name" -> tableName),
         context = ExpectedContext(
           fragment = t,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
index 7f886940473de..fd9d31e7a594d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/DataSourceSuite.scala
@@ -128,7 +128,7 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
           enableGlobbing = true
         )
       ),
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> nonExistentPath.toString)
     )
   }
@@ -173,7 +173,7 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
           new File(uuid, "file3").getAbsolutePath,
           uuid).rdd
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> "file:.*"),
       matchPVals = true
     )
@@ -187,7 +187,7 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
       exception = intercept[AnalysisException] {
         spark.read.format("text").load(s"$nonExistentBasePath/*")
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> s"file:$nonExistentBasePath/*")
     )
 
@@ -200,7 +200,7 @@ class DataSourceSuite extends SharedSparkSession with PrivateMethodTester {
         exception = intercept[AnalysisException] {
           spark.read.json(s"${baseDir.getAbsolutePath}/*/*-xyz.json").rdd
         },
-        errorClass = "PATH_NOT_FOUND",
+        condition = "PATH_NOT_FOUND",
         parameters = Map("path" -> s"file:${baseDir.getAbsolutePath}/*/*-xyz.json")
       )
     } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
index 6399eb6da049f..33b4cc1d2e7f2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileIndexSuite.scala
@@ -112,7 +112,7 @@ class FileIndexSuite extends SharedSparkSession {
       }
 
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-        val msg = intercept[AssertionError] {
+        val msg = intercept[SparkRuntimeException] {
           val fileIndex = new InMemoryFileIndex(spark, Seq(path), Map.empty, None)
           fileIndex.partitionSpec()
         }.getMessage
@@ -137,8 +137,8 @@ class FileIndexSuite extends SharedSparkSession {
           exception = intercept[SparkRuntimeException] {
             fileIndex.partitionSpec()
           },
-          errorClass = "_LEGACY_ERROR_TEMP_2058",
-          parameters = Map("value" -> "foo", "dataType" -> "IntegerType", "columnName" -> "a")
+          condition = "INVALID_PARTITION_VALUE",
+          parameters = Map("value" -> "'foo'", "dataType" -> "\"INT\"", "columnName" -> "`a`")
         )
       }
 
@@ -566,7 +566,7 @@ class FileIndexSuite extends SharedSparkSession {
       new File(directoryPath, "part_col=1").renameTo(new File(directoryPath, "undefined"))
 
       // By default, we expect the invalid path assertion to trigger.
-      val ex = intercept[AssertionError] {
+      val ex = intercept[SparkRuntimeException] {
         spark.read
           .format("parquet")
           .load(directoryPath.getCanonicalPath)
@@ -585,7 +585,7 @@ class FileIndexSuite extends SharedSparkSession {
 
       // Data source option override takes precedence.
       withSQLConf(SQLConf.IGNORE_INVALID_PARTITION_PATHS.key -> "true") {
-        val ex = intercept[AssertionError] {
+        val ex = intercept[SparkRuntimeException] {
           spark.read
             .format("parquet")
             .option(FileIndexOptions.IGNORE_INVALID_PARTITION_PATHS, "false")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
index 185a7cf5a6b40..880f1dd9af8f9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileMetadataStructSuite.scala
@@ -243,7 +243,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.select("name", METADATA_FILE_NAME).collect()
       },
-      errorClass = "FIELD_NOT_FOUND",
+      condition = "FIELD_NOT_FOUND",
       parameters = Map("fieldName" -> "`file_name`", "fields" -> "`id`, `university`"),
       context =
         ExpectedContext(fragment = "select", callSitePattern = getCurrentClassCallSitePattern))
@@ -309,7 +309,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.metadataColumn("foo")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`foo`", "proposal" -> "`_metadata`"))
 
     // Name exists, but does not reference a metadata column
@@ -317,7 +317,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         df.metadataColumn("name")
       },
-      errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
       parameters = Map("objectName" -> "`name`", "proposal" -> "`_metadata`"))
   }
 
@@ -525,7 +525,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               df.select("name", "_metadata.file_name").collect()
             },
-            errorClass = "FIELD_NOT_FOUND",
+            condition = "FIELD_NOT_FOUND",
             parameters = Map("fieldName" -> "`file_name`", "fields" -> "`id`, `university`"),
             context = ExpectedContext(
               fragment = "select",
@@ -535,7 +535,7 @@ class FileMetadataStructSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               df.select("name", "_METADATA.file_NAME").collect()
             },
-            errorClass = "FIELD_NOT_FOUND",
+            condition = "FIELD_NOT_FOUND",
             parameters = Map("fieldName" -> "`file_NAME`", "fields" -> "`id`, `university`"),
             context = ExpectedContext(
               fragment = "select",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
index 91b2e18c6b209..76267ad4a8054 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/FileSourceStrategySuite.scala
@@ -691,7 +691,7 @@ class FileSourceStrategySuite extends QueryTest with SharedSparkSession {
 
     if (buckets > 0) {
       val bucketed = df.queryExecution.analyzed transform {
-        case l @ LogicalRelation(r: HadoopFsRelation, _, _, _) =>
+        case l @ LogicalRelationWithTable(r: HadoopFsRelation, _) =>
           l.copy(relation =
             r.copy(bucketSpec =
               Some(BucketSpec(numBuckets = buckets, "c1" :: Nil, Nil)))(r.sparkSession))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
index dee8d7ac3e794..7094404b3c1dd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/InMemoryTableMetricSuite.scala
@@ -42,7 +42,7 @@ class InMemoryTableMetricSuite
     spark.sessionState.conf.clear()
   }
 
-  private def testMetricOnDSv2(func: String => Unit, checker: Map[Long, String] => Unit): Unit = {
+  private def testMetricOnDSv2(func: String => Unit, checker: Map[String, String] => Unit): Unit = {
     withTable("testcat.table_name") {
       val statusStore = spark.sharedState.statusStore
       val oldCount = statusStore.executionsList().size
@@ -67,8 +67,14 @@ class InMemoryTableMetricSuite
           statusStore.executionsList().last.metricValues != null)
       }
 
-      val execId = statusStore.executionsList().last.executionId
-      val metrics = statusStore.executionMetrics(execId)
+      val exec = statusStore.executionsList().last
+      val execId = exec.executionId
+      val sqlMetrics = exec.metrics.map { metric =>
+        metric.accumulatorId -> metric.name
+      }.toMap
+      val metrics = statusStore.executionMetrics(execId).map { case (k, v) =>
+        sqlMetrics(k) -> v
+      }
       checker(metrics)
     }
   }
@@ -79,8 +85,8 @@ class InMemoryTableMetricSuite
       val v2Writer = df.writeTo(table)
       v2Writer.append()
     }, metrics => {
-      val customMetric = metrics.find(_._2 == "in-memory rows: 1")
-      assert(customMetric.isDefined)
+      assert(metrics.get("number of rows in buffer").contains("in-memory rows: 1"))
+      assert(metrics.get("number of rows from driver").contains("1"))
     })
   }
 
@@ -90,8 +96,8 @@ class InMemoryTableMetricSuite
       val v2Writer = df.writeTo(table)
       v2Writer.overwrite(lit(true))
     }, metrics => {
-      val customMetric = metrics.find(_._2 == "in-memory rows: 3")
-      assert(customMetric.isDefined)
+      assert(metrics.get("number of rows in buffer").contains("in-memory rows: 3"))
+      assert(metrics.get("number of rows from driver").contains("3"))
     })
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
index fefb16a351fdb..c798196c4f0ee 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/ReadSchemaSuite.scala
@@ -101,7 +101,7 @@ class OrcReadSchemaSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
+    originalConf = sqlConf.getConf(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "false")
   }
 
@@ -126,7 +126,7 @@ class VectorizedOrcReadSchemaSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConf = spark.conf.get(SQLConf.ORC_VECTORIZED_READER_ENABLED)
+    originalConf = sqlConf.getConf(SQLConf.ORC_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.ORC_VECTORIZED_READER_ENABLED.key, "true")
   }
 
@@ -169,7 +169,7 @@ class ParquetReadSchemaSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
+    originalConf = sqlConf.getConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "false")
   }
 
@@ -193,7 +193,7 @@ class VectorizedParquetReadSchemaSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConf = spark.conf.get(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
+    originalConf = sqlConf.getConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED)
     spark.conf.set(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key, "true")
   }
 
@@ -217,7 +217,7 @@ class MergedParquetReadSchemaSuite
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConf = spark.conf.get(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED)
+    originalConf = sqlConf.getConf(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED)
     spark.conf.set(SQLConf.PARQUET_SCHEMA_MERGING_ENABLED.key, "true")
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
index 3762241719acd..26962d89452ab 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/SaveIntoDataSourceCommandSuite.scala
@@ -101,7 +101,7 @@ class SaveIntoDataSourceCommandSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           dataSource.planForWriting(SaveMode.ErrorIfExists, df.logicalPlan)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map("columnName" -> "`col`", "columnType" -> s"\"${testCase._2}\"",
           "format" -> ".*JdbcRelationProvider.*"),
         matchPVals = true
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
index 5c118ac12b72a..62f2f2cb10a85 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/binaryfile/BinaryFileFormatSuite.scala
@@ -168,7 +168,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
             .format(BINARY_FILE)
             .save(s"$tmpDir/test_save")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2075",
+        condition = "UNSUPPORTED_FEATURE.WRITE_FOR_BINARY_SOURCE",
         parameters = Map.empty)
     }
   }
@@ -346,7 +346,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
   }
 
   test("fail fast and do not attempt to read if a file is too big") {
-    assert(spark.conf.get(SOURCES_BINARY_FILE_MAX_LENGTH) === Int.MaxValue)
+    assert(sqlConf.getConf(SOURCES_BINARY_FILE_MAX_LENGTH) === Int.MaxValue)
     withTempPath { file =>
       val path = file.getPath
       val content = "123".getBytes
@@ -368,7 +368,7 @@ class BinaryFileFormatSuite extends QueryTest with SharedSparkSession {
           checkAnswer(readContent(), expected)
         }
       }
-      assert(caught.getErrorClass.startsWith("FAILED_READ_FILE"))
+      assert(caught.getCondition.startsWith("FAILED_READ_FILE"))
       assert(caught.getCause.getMessage.contains("exceeds the max length allowed"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParsingOptionsSuite.scala
new file mode 100644
index 0000000000000..8c8304503cef8
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVParsingOptionsSuite.scala
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.csv
+
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class CSVParsingOptionsSuite extends QueryTest with SharedSparkSession {
+  import testImplicits._
+
+  test("SPARK-49955: null string value does not mean corrupted file") {
+    val str = "abc"
+    val stringDataset = Seq(str, null).toDS()
+    val df = spark.read.csv(stringDataset)
+    // `spark.read.csv(rdd)` removes all null values at the beginning.
+    checkAnswer(df, Seq(Row("abc")))
+    val df2 = spark.read.option("mode", "failfast").csv(stringDataset)
+    checkAnswer(df2, Seq(Row("abc")))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
index 93828ee649bf0..7cacd8ea2dc50 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/CSVSuite.scala
@@ -85,6 +85,7 @@ abstract class CSVSuite
   private val badAfterGoodFile = "test-data/bad_after_good.csv"
   private val malformedRowFile = "test-data/malformedRow.csv"
   private val charFile = "test-data/char.csv"
+  private val moreColumnsFile = "test-data/more-columns.csv"
 
   /** Verifies data and schema. */
   private def verifyCars(
@@ -266,7 +267,7 @@ abstract class CSVSuite
         spark.read.format("csv").option("charset", "1-9588-osi")
           .load(testFile(carsFile8859))
       },
-      errorClass = "INVALID_PARAMETER_VALUE.CHARSET",
+      condition = "INVALID_PARAMETER_VALUE.CHARSET",
       parameters = Map(
         "charset" -> "1-9588-osi",
         "functionName" -> toSQLId("CSVOptions"),
@@ -388,13 +389,13 @@ abstract class CSVSuite
       }
       checkErrorMatchPVals(
         exception = e1,
-        errorClass = "FAILED_READ_FILE.NO_HINT",
+        condition = "FAILED_READ_FILE.NO_HINT",
         parameters = Map("path" -> s".*$carsFile.*"))
       val e2 = e1.getCause.asInstanceOf[SparkException]
-      assert(e2.getErrorClass == "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
+      assert(e2.getCondition == "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION")
       checkError(
         exception = e2.getCause.asInstanceOf[SparkRuntimeException],
-        errorClass = "MALFORMED_CSV_RECORD",
+        condition = "MALFORMED_CSV_RECORD",
         parameters = Map("badRecord" -> "2015,Chevy,Volt")
       )
     }
@@ -650,7 +651,7 @@ abstract class CSVSuite
             .csv(csvDir)
         }
       },
-      errorClass = "INVALID_PARAMETER_VALUE.CHARSET",
+      condition = "INVALID_PARAMETER_VALUE.CHARSET",
       parameters = Map(
         "charset" -> "1-9588-osi",
         "functionName" -> toSQLId("CSVOptions"),
@@ -1269,7 +1270,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = ex,
-          errorClass = "TASK_WRITE_FAILED",
+          condition = "TASK_WRITE_FAILED",
           parameters = Map("path" -> s".*${path.getName}.*"))
         val msg = ex.getCause.getMessage
         assert(
@@ -1420,6 +1421,22 @@ abstract class CSVSuite
     }
   }
 
+  test("SPARK-49125: write CSV files with duplicated field names") {
+    withTempPath { path =>
+      sql("SELECT 's1' a, 's2' a").write.csv(path.getCanonicalPath)
+      val df = spark.read.csv(path.getCanonicalPath)
+      assert(df.columns === Array("_c0", "_c1"))
+      checkAnswer(df, Row("s1", "s2"))
+    }
+
+    withTempPath { path =>
+      sql(s"INSERT OVERWRITE DIRECTORY '${path.getCanonicalPath}' USING csv SELECT 's1' a, 's2' a")
+      val df = spark.read.csv(path.getCanonicalPath)
+      assert(df.columns === Array("_c0", "_c1"))
+      checkAnswer(df, Row("s1", "s2"))
+    }
+  }
+
   test("load null when the schema is larger than parsed tokens ") {
     withTempPath { path =>
       Seq("1").toDF().write.text(path.getAbsolutePath)
@@ -1493,8 +1510,9 @@ abstract class CSVSuite
             .csv(testFile(valueMalformedFile))
             .collect()
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1097",
-        parameters = Map.empty
+        condition = "INVALID_CORRUPT_RECORD_TYPE",
+        parameters = Map(
+          "columnName" -> toSQLId(columnNameOfCorruptRecord), "actualType" -> "\"INT\"")
       )
     }
   }
@@ -1507,7 +1525,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = e,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${inputFile.getName}.*")
         )
         assert(e.getCause.isInstanceOf[EOFException])
@@ -1517,7 +1535,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = e2,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${inputFile.getName}.*")
         )
         assert(e2.getCause.getCause.getCause.isInstanceOf[EOFException])
@@ -1541,7 +1559,7 @@ abstract class CSVSuite
           exception = intercept[SparkException] {
             df.collect()
           },
-          errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+          condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
           parameters = Map("path" -> s".*$dir.*")
         )
       }
@@ -1689,7 +1707,7 @@ abstract class CSVSuite
     }
     checkError(
       exception = exception,
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST"))
     assert(exception.getCause.getMessage.contains("""input string: "10u12""""))
 
@@ -1765,6 +1783,32 @@ abstract class CSVSuite
     }
   }
 
+  test("SPARK-49016: Queries from raw CSV files are disallowed when the referenced columns only" +
+    " include the internal corrupt record column") {
+    val schema = new StructType()
+      .add("a", IntegerType)
+      .add("b", DateType)
+      .add("_corrupt_record", StringType)
+
+    // negative cases
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.schema(schema).csv(testFile(valueMalformedFile))
+          .select("_corrupt_record").collect()
+      },
+      condition = "UNSUPPORTED_FEATURE.QUERY_ONLY_CORRUPT_RECORD_COLUMN",
+      parameters = Map.empty
+    )
+    // workaround
+    val df2 = spark.read.schema(schema).csv(testFile(valueMalformedFile)).cache()
+    assert(df2.filter($"_corrupt_record".isNotNull).count() == 1)
+    assert(df2.filter($"_corrupt_record".isNull).count() == 1)
+    checkAnswer(
+      df2.select("_corrupt_record"),
+      Row("0,2013-111_11 12:13:14") :: Row(null) :: Nil
+    )
+  }
+
   test("SPARK-23846: schema inferring touches less data if samplingRatio < 1.0") {
     // Set default values for the DataSource parameters to make sure
     // that whole test file is mapped to only one partition. This will guarantee
@@ -1971,7 +2015,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = exception,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${path.getCanonicalPath}.*"))
         assert(exception.getCause.getMessage.contains("CSV header does not conform to the schema"))
 
@@ -1987,7 +2031,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = exceptionForShortSchema,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${path.getCanonicalPath}.*"))
         assert(exceptionForShortSchema.getCause.getMessage.contains(
           "Number of column in CSV header is not equal to number of fields in the schema"))
@@ -2008,7 +2052,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = exceptionForLongSchema,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${path.getCanonicalPath}.*"))
         assert(exceptionForLongSchema.getCause.getMessage.contains(
           "Header length: 2, schema size: 3"))
@@ -2025,7 +2069,7 @@ abstract class CSVSuite
         }
         checkErrorMatchPVals(
           exception = caseSensitiveException,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${path.getCanonicalPath}.*"))
         assert(caseSensitiveException.getCause.getMessage.contains(
           "CSV header does not conform to the schema"))
@@ -2080,7 +2124,7 @@ abstract class CSVSuite
       exception = intercept[SparkIllegalArgumentException] {
         spark.read.schema(ischema).option("header", true).option("enforceSchema", false).csv(ds)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3241",
+      condition = "_LEGACY_ERROR_TEMP_3241",
       parameters = Map("msg" ->
         """CSV header does not conform to the schema.
           | Header: columnA, columnB
@@ -2119,7 +2163,7 @@ abstract class CSVSuite
           .schema(schema)
           .csv(Seq("col1,col2", "1.0,a").toDS())
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3241",
+      condition = "_LEGACY_ERROR_TEMP_3241",
       parameters = Map("msg" ->
         """CSV header does not conform to the schema.
           | Header: col1, col2
@@ -2748,7 +2792,7 @@ abstract class CSVSuite
         exception = intercept[SparkUpgradeException] {
           csv.collect()
         },
-        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+        condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
         parameters = Map(
           "datetime" -> "'2020-01-27T20:06:11.847-08000'",
           "config" -> "\"spark.sql.legacy.timeParserPolicy\""))
@@ -2808,7 +2852,7 @@ abstract class CSVSuite
               exception = intercept[AnalysisException] {
                 readback.filter($"AAA" === 2 && $"bbb" === 3).collect()
               },
-              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
               parameters = Map("objectName" -> "`AAA`", "proposal" -> "`BBB`, `aaa`"),
               context =
                 ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
@@ -3397,6 +3441,39 @@ abstract class CSVSuite
         expected)
     }
   }
+
+  test("SPARK-49444: CSV parsing failure with more than max columns") {
+    val schema = new StructType()
+      .add("intColumn", IntegerType, nullable = true)
+      .add("decimalColumn", DecimalType(10, 2), nullable = true)
+
+    val fileReadException = intercept[SparkException] {
+      spark
+        .read
+        .schema(schema)
+        .option("header", "false")
+        .option("maxColumns", "2")
+        .csv(testFile(moreColumnsFile))
+        .collect()
+    }
+
+    checkErrorMatchPVals(
+      exception = fileReadException,
+      condition = "FAILED_READ_FILE.NO_HINT",
+      parameters = Map("path" -> s".*$moreColumnsFile"))
+
+    val malformedCSVException = fileReadException.getCause.asInstanceOf[SparkRuntimeException]
+
+    checkError(
+      exception = malformedCSVException,
+      condition = "MALFORMED_CSV_RECORD",
+      parameters = Map("badRecord" -> "1,3.14,string,5,7"),
+      sqlState = "KD000")
+
+    assert(malformedCSVException.getCause.isInstanceOf[TextParsingException])
+    val textParsingException = malformedCSVException.getCause.asInstanceOf[TextParsingException]
+    assert(textParsingException.getCause.isInstanceOf[ArrayIndexOutOfBoundsException])
+  }
 }
 
 class CSVv1Suite extends CSVSuite {
@@ -3416,11 +3493,11 @@ class CSVv1Suite extends CSVSuite {
       }
       checkErrorMatchPVals(
         exception = ex,
-        errorClass = "FAILED_READ_FILE.NO_HINT",
+        condition = "FAILED_READ_FILE.NO_HINT",
         parameters = Map("path" -> s".*$carsFile"))
       checkError(
         exception = ex.getCause.asInstanceOf[SparkException],
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map(
           "badRecord" -> "[2015,Chevy,Volt,null,null]",
           "failFastMode" -> "FAILFAST")
@@ -3445,7 +3522,7 @@ class CSVv2Suite extends CSVSuite {
             .options(Map("header" -> "true", "mode" -> "failfast"))
             .load(testFile(carsFile)).collect()
         },
-        errorClass = "FAILED_READ_FILE.NO_HINT",
+        condition = "FAILED_READ_FILE.NO_HINT",
         parameters = Map("path" -> s".*$carsFile"),
         matchPVals = true
       )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
index 486532028de9c..d3723881bfa24 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtilsSuite.scala
@@ -51,7 +51,7 @@ class JdbcUtilsSuite extends SparkFunSuite {
     }
     checkError(
       exception = duplicate,
-      errorClass = "COLUMN_ALREADY_EXISTS",
+      condition = "COLUMN_ALREADY_EXISTS",
       parameters = Map("columnName" -> "`c1`"))
 
     // Throw ParseException
@@ -59,14 +59,14 @@ class JdbcUtilsSuite extends SparkFunSuite {
       exception = intercept[ParseException]{
         JdbcUtils.getCustomSchema(tableSchema, "c3 DATEE, C2 STRING", caseInsensitive)
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"DATEE\""))
 
     checkError(
       exception = intercept[ParseException]{
         JdbcUtils.getCustomSchema(tableSchema, "c3 DATE. C2 STRING", caseInsensitive)
       },
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'.'", "hint" -> ""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
index 703085dca66f1..11cc0b99bbde7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonParsingOptionsSuite.scala
@@ -17,6 +17,7 @@
 
 package org.apache.spark.sql.execution.datasources.json
 
+import org.apache.spark.SparkException
 import org.apache.spark.sql.{QueryTest, Row}
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{StringType, StructType}
@@ -185,4 +186,14 @@ class JsonParsingOptionsSuite extends QueryTest with SharedSparkSession {
     assert(df.first().getString(0) == "Cazen Lee")
     assert(df.first().getString(1) == "$10")
   }
+
+  test("SPARK-49955: null string value does not mean corrupted file") {
+    val str = "{\"name\": \"someone\"}"
+    val stringDataset = Seq(str, null).toDS()
+    val df = spark.read.json(stringDataset)
+    checkAnswer(df, Seq(Row(null, "someone"), Row(null, null)))
+
+    val e = intercept[SparkException](spark.read.option("mode", "failfast").json(stringDataset))
+    assert(e.getCause.isInstanceOf[NullPointerException])
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
index 7888e9ac56134..d79709317c3d5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/JsonSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.datasources.json
 
 import java.io._
-import java.nio.charset.{Charset, StandardCharsets, UnsupportedCharsetException}
+import java.nio.charset.{Charset, StandardCharsets}
 import java.nio.file.Files
 import java.sql.{Date, Timestamp}
 import java.time.{Duration, Instant, LocalDate, LocalDateTime, Period, ZoneId}
@@ -32,18 +32,21 @@ import org.apache.hadoop.io.SequenceFile.CompressionType
 import org.apache.hadoop.io.compress.GzipCodec
 
 import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException, SparkUpgradeException, TestUtils}
+import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.rdd.RDD
 import org.apache.spark.scheduler.{SparkListener, SparkListenerJobEnd}
 import org.apache.spark.sql.{functions => F, _}
 import org.apache.spark.sql.catalyst.json._
-import org.apache.spark.sql.catalyst.util.{DateTimeTestUtils, DateTimeUtils, HadoopCompressionCodec}
+import org.apache.spark.sql.catalyst.util.{CharsetProvider, DateTimeTestUtils, DateTimeUtils, HadoopCompressionCodec}
 import org.apache.spark.sql.catalyst.util.HadoopCompressionCodec.GZIP
 import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLType
+import org.apache.spark.sql.errors.QueryExecutionErrors.toSQLId
 import org.apache.spark.sql.execution.ExternalRDD
 import org.apache.spark.sql.execution.datasources.{CommonFileDataSourceSuite, DataSource, InMemoryFileIndex, NoopCache}
 import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.SQLTestData.{DecimalData, TestData}
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.types.StructType.fromDDL
 import org.apache.spark.sql.types.TestUDT.{MyDenseVector, MyDenseVectorUDT}
@@ -487,7 +490,7 @@ abstract class JsonSuite
 
       // The following tests are about type coercion instead of JSON data source.
       // Here we simply forcus on the behavior of non-Ansi.
-      if(!SQLConf.get.ansiEnabled) {
+      if (!SQLConf.get.ansiEnabled) {
         // Number and Boolean conflict: resolve the type as number in this query.
         checkAnswer(
           sql("select num_bool - 10 from jsonTable where num_bool > 11"),
@@ -1069,8 +1072,10 @@ abstract class JsonSuite
           .option("mode", "FAILFAST")
           .json(corruptRecords)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2165",
-      parameters = Map("failFastMode" -> "FAILFAST")
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "badRecord" -> "_corrupt_record",
+        "failFastMode" -> "FAILFAST")
     )
 
     checkError(
@@ -1081,7 +1086,7 @@ abstract class JsonSuite
           .json(corruptRecords)
           .collect()
       },
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map(
         "badRecord" -> "[null]",
         "failFastMode" -> "FAILFAST")
@@ -1959,7 +1964,7 @@ abstract class JsonSuite
         }
         checkErrorMatchPVals(
           exception = e,
-          errorClass = "FAILED_READ_FILE.NO_HINT",
+          condition = "FAILED_READ_FILE.NO_HINT",
           parameters = Map("path" -> s".*${inputFile.getName}.*"))
         assert(e.getCause.isInstanceOf[EOFException])
         assert(e.getCause.getMessage === "Unexpected end of input stream")
@@ -1987,7 +1992,7 @@ abstract class JsonSuite
           exception = intercept[SparkException] {
             df.collect()
           },
-          errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+          condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
           parameters = Map("path" -> s".*$dir.*")
         )
       }
@@ -2073,8 +2078,8 @@ abstract class JsonSuite
             .option("mode", "FAILFAST")
             .json(path)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2167",
-        parameters = Map("failFastMode" -> "FAILFAST", "dataType" -> "string|bigint"))
+        condition = "INVALID_JSON_RECORD_TYPE",
+        parameters = Map("failFastMode" -> "FAILFAST", "invalidType" -> "\"STRING\"|\"BIGINT\""))
 
       val ex = intercept[SparkException] {
         spark.read
@@ -2086,11 +2091,11 @@ abstract class JsonSuite
       }
       checkErrorMatchPVals(
         exception = ex,
-        errorClass = "FAILED_READ_FILE.NO_HINT",
+        condition = "FAILED_READ_FILE.NO_HINT",
         parameters = Map("path" -> s".*$path.*"))
       checkError(
         exception = ex.getCause.asInstanceOf[SparkException],
-        errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+        condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
         parameters = Map(
           "badRecord" -> "[null]",
           "failFastMode" -> "FAILFAST")
@@ -2114,8 +2119,9 @@ abstract class JsonSuite
           .schema(schema)
           .json(corruptRecords)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1097",
-      parameters = Map.empty
+      condition = "INVALID_CORRUPT_RECORD_TYPE",
+      parameters = Map(
+        "columnName" -> toSQLId(columnNameOfCorruptRecord), "actualType" -> "\"INT\"")
     )
 
     // We use `PERMISSIVE` mode by default if invalid string is given.
@@ -2131,8 +2137,9 @@ abstract class JsonSuite
             .json(path)
             .collect()
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1097",
-        parameters = Map.empty
+        condition = "INVALID_CORRUPT_RECORD_TYPE",
+        parameters = Map(
+          "columnName" -> toSQLId(columnNameOfCorruptRecord), "actualType" -> "\"INT\"")
       )
     }
   }
@@ -2179,7 +2186,7 @@ abstract class JsonSuite
               .json(Seq(lowerCasedJsonFieldValue._1).toDS())
               .collect()
           },
-          errorClass = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_STRING_AS_DATATYPE",
+          condition = "MALFORMED_RECORD_IN_PARSING.CANNOT_PARSE_STRING_AS_DATATYPE",
           parameters = Map(
             "failFastMode" -> "FAILFAST",
             "badRecord" -> lowerCasedJsonFieldValue._1,
@@ -2207,7 +2214,7 @@ abstract class JsonSuite
         exception = intercept[AnalysisException] {
           spark.read.schema(schema).json(path).select("_corrupt_record").collect()
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1285",
+        condition = "UNSUPPORTED_FEATURE.QUERY_ONLY_CORRUPT_RECORD_COLUMN",
         parameters = Map.empty
       )
 
@@ -2368,14 +2375,20 @@ abstract class JsonSuite
 
   test("SPARK-23723: Unsupported encoding name") {
     val invalidCharset = "UTF-128"
-    val exception = intercept[UnsupportedCharsetException] {
-      spark.read
-        .options(Map("encoding" -> invalidCharset, "lineSep" -> "\n"))
-        .json(testFile("test-data/utf16LE.json"))
-        .count()
-    }
-
-    assert(exception.getMessage.contains(invalidCharset))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        spark.read
+          .options(Map("encoding" -> invalidCharset, "lineSep" -> "\n"))
+          .json(testFile("test-data/utf16LE.json"))
+          .count()
+      },
+      condition = "INVALID_PARAMETER_VALUE.CHARSET",
+      parameters = Map(
+        "charset" -> invalidCharset,
+        "functionName" -> toSQLId("JSONOptionsInRead"),
+        "parameter" -> toSQLId("charset"),
+        "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", "))
+    )
   }
 
   test("SPARK-23723: checking that the encoding option is case agnostic") {
@@ -2403,11 +2416,11 @@ abstract class JsonSuite
     }
     checkErrorMatchPVals(
       exception = exception,
-      errorClass = "FAILED_READ_FILE.NO_HINT",
+      condition = "FAILED_READ_FILE.NO_HINT",
       parameters = Map("path" -> s".*$fileName.*"))
     checkError(
       exception = exception.getCause.asInstanceOf[SparkException],
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[empty row]", "failFastMode" -> "FAILFAST")
     )
   }
@@ -2426,17 +2439,19 @@ abstract class JsonSuite
   }
 
   test("SPARK-23723: save json in UTF-32BE") {
-    val encoding = "UTF-32BE"
-    withTempPath { path =>
-      val df = spark.createDataset(Seq(("Dog", 42)))
-      df.write
-        .options(Map("encoding" -> encoding))
-        .json(path.getCanonicalPath)
+    withSQLConf(SQLConf.LEGACY_JAVA_CHARSETS.key -> "true") {
+      val encoding = "UTF-32BE"
+      withTempPath { path =>
+        val df = spark.createDataset(Seq(("Dog", 42)))
+        df.write
+          .options(Map("encoding" -> encoding))
+          .json(path.getCanonicalPath)
 
-      checkEncoding(
-        expectedEncoding = encoding,
-        pathToJsonFiles = path.getCanonicalPath,
-        expectedContent = """{"_1":"Dog","_2":42}""")
+        checkEncoding(
+          expectedEncoding = encoding,
+          pathToJsonFiles = path.getCanonicalPath,
+          expectedContent = """{"_1":"Dog","_2":42}""")
+      }
     }
   }
 
@@ -2454,22 +2469,22 @@ abstract class JsonSuite
 
   test("SPARK-23723: wrong output encoding") {
     val encoding = "UTF-128"
-    val exception = intercept[SparkException] {
-      withTempPath { path =>
-        val df = spark.createDataset(Seq((0)))
-        df.write
-          .options(Map("encoding" -> encoding))
-          .json(path.getCanonicalPath)
-      }
-    }
-
-    val baos = new ByteArrayOutputStream()
-    val ps = new PrintStream(baos, true, StandardCharsets.UTF_8.name())
-    exception.printStackTrace(ps)
-    ps.flush()
-
-    assert(baos.toString.contains(
-      "java.nio.charset.UnsupportedCharsetException: UTF-128"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        withTempPath { path =>
+          val df = spark.createDataset(Seq((0)))
+          df.write
+            .options(Map("encoding" -> encoding))
+            .json(path.getCanonicalPath)
+        }
+      },
+      condition = "INVALID_PARAMETER_VALUE.CHARSET",
+      parameters = Map(
+        "charset" -> encoding,
+        "functionName" -> toSQLId("JSONOptions"),
+        "parameter" -> toSQLId("charset"),
+        "charsets" -> CharsetProvider.VALID_CHARSETS.mkString(", "))
+    )
   }
 
   test("SPARK-23723: read back json in UTF-16LE") {
@@ -2516,16 +2531,18 @@ abstract class JsonSuite
         val os = new FileOutputStream(path)
         os.write(data)
         os.close()
-        val reader = if (inferSchema) {
-          spark.read
-        } else {
-          spark.read.schema(schema)
+        withSQLConf(SQLConf.LEGACY_JAVA_CHARSETS.key -> "true") {
+          val reader = if (inferSchema) {
+            spark.read
+          } else {
+            spark.read.schema(schema)
+          }
+          val readBack = reader
+            .option("encoding", encoding)
+            .option("lineSep", lineSep)
+            .json(path.getCanonicalPath)
+          checkAnswer(readBack, records.map(rec => Row(rec._1, rec._2)))
         }
-        val readBack = reader
-          .option("encoding", encoding)
-          .option("lineSep", lineSep)
-          .json(path.getCanonicalPath)
-        checkAnswer(readBack, records.map(rec => Row(rec._1, rec._2)))
       }
     }
   }
@@ -2743,11 +2760,11 @@ abstract class JsonSuite
     val e = intercept[SparkException] { df.collect() }
     checkError(
       exception = e,
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST"))
     checkError(
       exception = e.getCause.asInstanceOf[SparkRuntimeException],
-      errorClass = "EMPTY_JSON_FIELD_VALUE",
+      condition = "EMPTY_JSON_FIELD_VALUE",
       parameters = Map("dataType" -> toSQLType(dataType))
     )
   }
@@ -2888,7 +2905,7 @@ abstract class JsonSuite
         exception = intercept[SparkUpgradeException] {
           json.collect()
         },
-        errorClass = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
+        condition = "INCONSISTENT_BEHAVIOR_CROSS_VERSION.PARSE_DATETIME_BY_NEW_PARSER",
         parameters = Map(
           "datetime" -> "'2020-01-27T20:06:11.847-08000'",
           "config" -> "\"spark.sql.legacy.timeParserPolicy\""))
@@ -3077,7 +3094,7 @@ abstract class JsonSuite
         }
         checkErrorMatchPVals(
           exception = err,
-          errorClass = "TASK_WRITE_FAILED",
+          condition = "TASK_WRITE_FAILED",
           parameters = Map("path" -> s".*${path.getName}.*"))
 
         val msg = err.getCause.getMessage
@@ -3184,7 +3201,7 @@ abstract class JsonSuite
               exception = intercept[AnalysisException] {
                 spark.read.json(path.getCanonicalPath).collect()
               },
-              errorClass = "COLUMN_ALREADY_EXISTS",
+              condition = "COLUMN_ALREADY_EXISTS",
               parameters = Map("columnName" -> "`aaa`"))
           }
           withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
@@ -3195,7 +3212,7 @@ abstract class JsonSuite
               exception = intercept[AnalysisException] {
                 readback.filter($"AAA" === 0 && $"bbb" === 1).collect()
               },
-              errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+              condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
               parameters = Map("objectName" -> "`AAA`", "proposal" -> "`BBB`, `aaa`"),
               context =
                 ExpectedContext(fragment = "$", callSitePattern = getCurrentClassCallSitePattern))
@@ -3349,13 +3366,13 @@ abstract class JsonSuite
 
     checkError(
       exception = exception,
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map("badRecord" -> "[null]", "failFastMode" -> "FAILFAST")
     )
 
     checkError(
       exception = ExceptionUtils.getRootCause(exception).asInstanceOf[SparkRuntimeException],
-      errorClass = "INVALID_JSON_ROOT_FIELD",
+      condition = "INVALID_JSON_ROOT_FIELD",
       parameters = Map.empty
     )
 
@@ -3760,7 +3777,7 @@ abstract class JsonSuite
   }
 
   test("SPARK-40667: validate JSON Options") {
-    assert(JSONOptions.getAllOptions.size == 29)
+    assert(JSONOptions.getAllOptions.size == 30)
     // Please add validation on any new Json options here
     assert(JSONOptions.isValidOption("samplingRatio"))
     assert(JSONOptions.isValidOption("primitivesAsString"))
@@ -3789,6 +3806,7 @@ abstract class JsonSuite
     assert(JSONOptions.isValidOption("timeZone"))
     assert(JSONOptions.isValidOption("writeNonAsciiCharacterAsCodePoint"))
     assert(JSONOptions.isValidOption("singleVariantColumn"))
+    assert(JSONOptions.isValidOption("useUnsafeRow"))
     assert(JSONOptions.isValidOption("encoding"))
     assert(JSONOptions.isValidOption("charset"))
     // Please add validation on any new Json options with alternative here
@@ -3838,7 +3856,7 @@ abstract class JsonSuite
           exception = intercept[AnalysisException](
             spark.read.schema(jsonDataSchema).json(Seq(jsonData).toDS()).collect()
           ),
-          errorClass = "INVALID_JSON_SCHEMA_MAP_TYPE",
+          condition = "INVALID_JSON_SCHEMA_MAP_TYPE",
           parameters = Map("jsonSchema" -> toSQLType(jsonDataSchema)))
 
         val jsonDir = new File(dir, "json").getCanonicalPath
@@ -3848,7 +3866,7 @@ abstract class JsonSuite
           exception = intercept[AnalysisException](
             spark.read.schema(jsonDirSchema).json(jsonDir).collect()
           ),
-          errorClass = "INVALID_JSON_SCHEMA_MAP_TYPE",
+          condition = "INVALID_JSON_SCHEMA_MAP_TYPE",
           parameters = Map("jsonSchema" -> toSQLType(jsonDirSchema)))
       }
     }
@@ -3955,6 +3973,34 @@ abstract class JsonSuite
       )
     }
   }
+
+  test("SPARK-48965: Dataset#toJSON should use correct schema #1: decimals") {
+    val numString = "123.456"
+    val bd = BigDecimal(numString)
+    val ds1 = sql(s"select ${numString}bd as a, ${numString}bd as b").as[DecimalData]
+    checkDataset(
+      ds1,
+      DecimalData(bd, bd)
+    )
+    val ds2 = ds1.toJSON
+    checkDataset(
+      ds2,
+      "{\"a\":123.456000000000000000,\"b\":123.456000000000000000}"
+    )
+  }
+
+  test("SPARK-48965: Dataset#toJSON should use correct schema #2: misaligned columns") {
+    val ds1 = sql("select 'Hey there' as value, 90000001 as key").as[TestData]
+    checkDataset(
+      ds1,
+      TestData(90000001, "Hey there")
+    )
+    val ds2 = ds1.toJSON
+    checkDataset(
+      ds2,
+      "{\"key\":90000001,\"value\":\"Hey there\"}"
+    )
+  }
 }
 
 class JsonV1Suite extends JsonSuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
index b7d29588f6bf4..a3ecbf07eccf5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcEncryptionSuite.scala
@@ -17,20 +17,53 @@
 
 package org.apache.spark.sql.execution.datasources.orc
 
+import java.lang.invoke.MethodHandles
+import java.util.{Map => JMap}
 import java.util.Random
 
-import org.apache.orc.impl.HadoopShimsFactory
+import scala.collection.mutable
 
+import org.apache.orc.impl.{CryptoUtils, HadoopShimsFactory, KeyProvider}
+
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.test.SharedSparkSession
 
 class OrcEncryptionSuite extends OrcTest with SharedSparkSession {
   import testImplicits._
 
+  override def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.hadoop.hadoop.security.key.provider.path", "test:///")
+  }
+
+  override def beforeAll(): Unit = {
+    // Backup `CryptoUtils#keyProviderCache` and clear it.
+    keyProviderCacheRef.entrySet()
+      .forEach(e => keyProviderCacheBackup.put(e.getKey, e.getValue))
+    keyProviderCacheRef.clear()
+    super.beforeAll()
+  }
+
+  override def afterAll(): Unit = {
+    super.afterAll()
+    // Restore `CryptoUtils#keyProviderCache`.
+    keyProviderCacheRef.clear()
+    keyProviderCacheBackup.foreach { case (k, v) => keyProviderCacheRef.put(k, v) }
+  }
+
   val originalData = Seq(("123456789", "dongjoon@apache.org", "Dongjoon Hyun"))
   val rowDataWithoutKey =
     Row(null, "841626795E7D351555B835A002E3BF10669DE9B81C95A3D59E10865AC37EA7C3", "Dongjoon Hyun")
 
+  private val keyProviderCacheBackup: mutable.Map[String, KeyProvider] = mutable.Map.empty
+
+  private val keyProviderCacheRef: JMap[String, KeyProvider] = {
+    val clazz = classOf[CryptoUtils]
+    val lookup = MethodHandles.privateLookupIn(clazz, MethodHandles.lookup())
+    lookup.findStaticVarHandle(clazz, "keyProviderCache", classOf[JMap[_, _]])
+      .get().asInstanceOf[JMap[String, KeyProvider]]
+  }
+
   test("Write and read an encrypted file") {
     val conf = spark.sessionState.newHadoopConf()
     val provider = HadoopShimsFactory.get.getHadoopKeyProvider(conf, new Random)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
index 83130abb80fec..500c0647bcb2a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcFilterSuite.scala
@@ -28,13 +28,14 @@ import org.apache.hadoop.hive.ql.io.sarg.{PredicateLeaf, SearchArgument, SearchA
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentFactory.newBuilder
 
 import org.apache.spark.{SparkConf, SparkException, SparkRuntimeException}
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Row}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
 import org.apache.spark.sql.catalyst.dsl.expressions._
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
 import org.apache.spark.sql.functions.col
+import org.apache.spark.sql.internal.ExpressionUtils
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
@@ -46,6 +47,7 @@ import org.apache.spark.util.ArrayImplicits._
  */
 @ExtendedSQLTest
 class OrcFilterSuite extends OrcTest with SharedSparkSession {
+  import testImplicits.toRichColumn
 
   override protected def sparkConf: SparkConf =
     super
@@ -58,8 +60,8 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
       checker: (SearchArgument) => Unit): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
+      .select(output.map(e => ExpressionUtils.column(e)): _*)
+      .where(ExpressionUtils.column(predicate))
 
     query.queryExecution.optimizedPlan match {
       case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
@@ -681,7 +683,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               sql(s"select a from $tableName where a < 0").collect()
             },
-            errorClass = "AMBIGUOUS_REFERENCE",
+            condition = "AMBIGUOUS_REFERENCE",
             parameters = Map(
               "name" -> "`a`",
               "referenceNames" -> ("[`spark_catalog`.`default`.`spark_32622`.`a`, " +
@@ -706,7 +708,7 @@ class OrcFilterSuite extends OrcTest with SharedSparkSession {
           val ex = intercept[SparkException] {
             sql(s"select A from $tableName where A < 0").collect()
           }
-          assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+          assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
           assert(ex.getCause.isInstanceOf[SparkRuntimeException])
           assert(ex.getCause.getMessage.contains(
             """Found duplicate field(s) "A": [A, a] in case-insensitive mode"""))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
index 536db3dfe74b5..ab0d4d9bc53b8 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcQuerySuite.scala
@@ -594,7 +594,7 @@ abstract class OrcQueryTest extends OrcTest {
         exception = intercept[AnalysisException] {
           testAllCorruptFiles()
         },
-        errorClass = "UNABLE_TO_INFER_SCHEMA",
+        condition = "UNABLE_TO_INFER_SCHEMA",
         parameters = Map("format" -> "ORC")
       )
       testAllCorruptFilesWithoutSchemaInfer()
@@ -604,14 +604,14 @@ abstract class OrcQueryTest extends OrcTest {
       val e1 = intercept[SparkException] {
         testIgnoreCorruptFiles()
       }
-      assert(e1.getErrorClass.startsWith("FAILED_READ_FILE"))
+      assert(e1.getCondition.startsWith("FAILED_READ_FILE"))
       assert(e1.getCause.getMessage.contains("Malformed ORC file") ||
         // Hive ORC table scan uses a different code path and has one more error stack
         e1.getCause.getCause.getMessage.contains("Malformed ORC file"))
       val e2 = intercept[SparkException] {
         testIgnoreCorruptFilesWithoutSchemaInfer()
       }
-      assert(e2.getErrorClass.startsWith("FAILED_READ_FILE"))
+      assert(e2.getCondition.startsWith("FAILED_READ_FILE"))
       assert(e2.getCause.getMessage.contains("Malformed ORC file") ||
         // Hive ORC table scan uses a different code path and has one more error stack
         e2.getCause.getCause.getMessage.contains("Malformed ORC file"))
@@ -619,13 +619,13 @@ abstract class OrcQueryTest extends OrcTest {
         exception = intercept[SparkException] {
           testAllCorruptFiles()
         },
-        errorClass = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
+        condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
         parameters = Map("path" -> "file:.*")
       )
       val e4 = intercept[SparkException] {
         testAllCorruptFilesWithoutSchemaInfer()
       }
-      assert(e4.getErrorClass.startsWith("FAILED_READ_FILE"))
+      assert(e4.getCondition.startsWith("FAILED_READ_FILE"))
       assert(e4.getCause.getMessage.contains("Malformed ORC file") ||
         // Hive ORC table scan uses a different code path and has one more error stack
         e4.getCause.getCause.getMessage.contains("Malformed ORC file"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
index 5d247c76b70be..040999476ece1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcSourceSuite.scala
@@ -235,7 +235,7 @@ abstract class OrcSuite
       exception = intercept[SparkException] {
         testMergeSchemasInParallel(false, schemaReader)
       }.getCause.getCause.asInstanceOf[SparkException],
-      errorClass = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
+      condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
       parameters = Map("path" -> "file:.*")
     )
   }
@@ -450,8 +450,8 @@ abstract class OrcSuite
         val ex = intercept[SparkException] {
           spark.read.orc(basePath).columns.length
         }
-        assert(ex.getErrorClass == "CANNOT_MERGE_SCHEMAS")
-        assert(ex.getCause.asInstanceOf[SparkException].getErrorClass ===
+        assert(ex.getCondition == "CANNOT_MERGE_SCHEMAS")
+        assert(ex.getCause.asInstanceOf[SparkException].getCondition ===
           "CANNOT_MERGE_INCOMPATIBLE_DATA_TYPE")
       }
 
@@ -481,7 +481,7 @@ abstract class OrcSuite
             exception = intercept[SparkException] {
               spark.read.orc(basePath).columns.length
             }.getCause.getCause.asInstanceOf[SparkException],
-            errorClass = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
+            condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
             parameters = Map("path" -> "file:.*")
           )
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
index 0c696acdedafa..b8669ee4d1ef1 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcTest.scala
@@ -31,6 +31,7 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.execution.datasources.FileBasedDataSourceTest
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.orc.OrcScan
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.SQLConf.ORC_IMPLEMENTATION
 import org.apache.spark.util.ArrayImplicits._
@@ -62,7 +63,7 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
-    originalConfORCImplementation = spark.conf.get(ORC_IMPLEMENTATION)
+    originalConfORCImplementation = spark.sessionState.conf.getConf(ORC_IMPLEMENTATION)
     spark.conf.set(ORC_IMPLEMENTATION.key, orcImp)
   }
 
@@ -117,8 +118,8 @@ trait OrcTest extends QueryTest with FileBasedDataSourceTest with BeforeAndAfter
       (implicit df: DataFrame): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
+      .select(output.map(e => column(e)): _*)
+      .where(predicate)
 
     query.queryExecution.optimizedPlan match {
       case PhysicalOperation(_, filters, DataSourceV2ScanRelation(_, o: OrcScan, _, _, _)) =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
index 6ca9f6cd525fa..5260ebf15e4f3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/orc/OrcV1FilterSuite.scala
@@ -21,12 +21,12 @@ import scala.jdk.CollectionConverters._
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgumentImpl
 
 import org.apache.spark.SparkConf
-import org.apache.spark.sql.{Column, DataFrame}
-import org.apache.spark.sql.catalyst.dsl.expressions._
-import org.apache.spark.sql.catalyst.expressions.{Attribute, Predicate}
+import org.apache.spark.sql.DataFrame
+import org.apache.spark.sql.catalyst.expressions.{And, Attribute, Predicate}
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.orc.OrcShimUtils.{Operator, SearchArgument}
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.tags.ExtendedSQLTest
 
@@ -44,15 +44,16 @@ class OrcV1FilterSuite extends OrcFilterSuite {
       checker: (SearchArgument) => Unit): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
+      .select(output.map(e => column(e)): _*)
+      .where(predicate)
 
     var maybeRelation: Option[HadoopFsRelation] = None
     val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+      case PhysicalOperation(_, filters,
+        LogicalRelationWithTable(orcRelation: HadoopFsRelation, _)) =>
         maybeRelation = Some(orcRelation)
         filters
-    }.flatten.reduceLeftOption(_ && _)
+    }.flatten.reduceLeftOption(And)
     assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
 
     val (_, selectedFilters, _) =
@@ -89,15 +90,16 @@ class OrcV1FilterSuite extends OrcFilterSuite {
       (implicit df: DataFrame): Unit = {
     val output = predicate.collect { case a: Attribute => a }.distinct
     val query = df
-      .select(output.map(e => Column(e)): _*)
-      .where(Column(predicate))
+      .select(output.map(e => column(e)): _*)
+      .where(predicate)
 
     var maybeRelation: Option[HadoopFsRelation] = None
     val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
-      case PhysicalOperation(_, filters, LogicalRelation(orcRelation: HadoopFsRelation, _, _, _)) =>
+      case PhysicalOperation(_, filters,
+        LogicalRelationWithTable(orcRelation: HadoopFsRelation, _)) =>
         maybeRelation = Some(orcRelation)
         filters
-    }.flatten.reduceLeftOption(_ && _)
+    }.flatten.reduceLeftOption(And)
     assert(maybeAnalyzedPredicate.isDefined, "No filter is analyzed from the given query")
 
     val (_, selectedFilters, _) =
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
index 28644720d0436..359436ca23636 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetCompressionCodecPrecedenceSuite.scala
@@ -125,7 +125,7 @@ class ParquetCompressionCodecPrecedenceSuite extends ParquetTest with SharedSpar
         exception = intercept[SparkIllegalArgumentException] {
           checkCompressionCodec("aa", isPartitioned)
         },
-        errorClass = "CODEC_NOT_AVAILABLE.WITH_AVAILABLE_CODECS_SUGGESTION",
+        condition = "CODEC_NOT_AVAILABLE.WITH_AVAILABLE_CODECS_SUGGESTION",
         parameters = Map(
           "codecName" -> "aa",
           "availableCodecs" -> ("brotli, uncompressed, lzo, snappy, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
index 17dc70df42a6d..fd81dcfe24d89 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetDeltaLengthByteArrayEncodingSuite.scala
@@ -132,7 +132,7 @@ class ParquetDeltaLengthByteArrayEncodingSuite
     val samples: Array[String] = new Array[String](numSamples)
     for (i <- 0 until numSamples) {
       var maxLen: Int = randomLen.nextInt(maxLength)
-      if(randomEmpty.nextInt() % 11 != 0) {
+      if (randomEmpty.nextInt() % 11 != 0) {
         maxLen = 0;
       }
       samples(i) = RandomStringUtils.randomAlphanumeric(0, maxLen)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
index b0995477030c9..ee283386b8eff 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileFormatSuite.scala
@@ -65,7 +65,7 @@ abstract class ParquetFileFormatSuite
       exception = intercept[SparkException] {
         testReadFooters(false)
       }.getCause.asInstanceOf[SparkException],
-      errorClass = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
+      condition = "FAILED_READ_FILE.CANNOT_READ_FILE_FOOTER",
       parameters = Map("path" -> "file:.*")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
index 5e59418f8f928..be8d41c75bfde 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFileMetadataStructRowIndexSuite.scala
@@ -130,7 +130,7 @@ class ParquetFileMetadataStructRowIndexSuite extends QueryTest with SharedSparkS
         exception = intercept[AnalysisException] {
           df.select("*", s"${FileFormat.METADATA_NAME}.${ROW_INDEX}")
         },
-        errorClass = "FIELD_NOT_FOUND",
+        condition = "FIELD_NOT_FOUND",
         parameters = Map(
           "fieldName" -> "`row_index`",
           "fields" -> ("`file_path`, `file_name`, `file_size`, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 795e9f46a8d1d..37edb9ea2315e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -45,11 +45,11 @@ import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
 import org.apache.spark.sql.connector.catalog.CatalogV2Implicits.parseColumnPath
 import org.apache.spark.sql.execution.ExplainMode
-import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation, PushableColumnAndNestedColumn}
+import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelationWithTable, PushableColumnAndNestedColumn}
 import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
 import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScan
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
+import org.apache.spark.sql.internal.{ExpressionUtils, LegacyBehaviorPolicy, SQLConf}
 import org.apache.spark.sql.internal.LegacyBehaviorPolicy.{CORRECTED, LEGACY}
 import org.apache.spark.sql.internal.SQLConf.ParquetOutputTimestampType.{INT96, TIMESTAMP_MICROS, TIMESTAMP_MILLIS}
 import org.apache.spark.sql.test.SharedSparkSession
@@ -77,6 +77,7 @@ import org.apache.spark.util.ArrayImplicits._
  * within the test.
  */
 abstract class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSparkSession {
+  import testImplicits.toRichColumn
 
   protected def createParquetFilters(
       schema: MessageType,
@@ -1957,7 +1958,7 @@ abstract class ParquetFilterSuite extends QueryTest with ParquetTest with Shared
           val ex = intercept[SparkException] {
             sql(s"select a from $tableName where b > 0").collect()
           }
-          assert(ex.getErrorClass.startsWith("FAILED_READ_FILE"))
+          assert(ex.getCondition.startsWith("FAILED_READ_FILE"))
           assert(ex.getCause.isInstanceOf[SparkRuntimeException])
           assert(ex.getCause.getMessage.contains(
             """Found duplicate field(s) "B": [B, b] in case-insensitive mode"""))
@@ -2259,8 +2260,8 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
         SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
         SQLConf.NESTED_PREDICATE_PUSHDOWN_FILE_SOURCE_LIST.key -> pushdownDsList) {
         val query = df
-          .select(output.map(e => Column(e)): _*)
-          .where(Column(predicate))
+          .select(output.map(ExpressionUtils.column): _*)
+          .where(ExpressionUtils.column(predicate))
 
         val nestedOrAttributes = predicate.collectFirst {
           case g: GetStructField => g
@@ -2276,7 +2277,7 @@ class ParquetV1FilterSuite extends ParquetFilterSuite {
         var maybeRelation: Option[HadoopFsRelation] = None
         val maybeAnalyzedPredicate = query.queryExecution.optimizedPlan.collect {
           case PhysicalOperation(_, filters,
-          LogicalRelation(relation: HadoopFsRelation, _, _, _)) =>
+            LogicalRelationWithTable(relation: HadoopFsRelation, _)) =>
             maybeRelation = Some(relation)
             filters
         }.flatten.reduceLeftOption(_ && _)
@@ -2338,8 +2339,8 @@ class ParquetV2FilterSuite extends ParquetFilterSuite {
       SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName,
       SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
       val query = df
-        .select(output.map(e => Column(e)): _*)
-        .where(Column(predicate))
+        .select(output.map(ExpressionUtils.column): _*)
+        .where(ExpressionUtils.column(predicate))
 
       query.queryExecution.optimizedPlan.collectFirst {
         case PhysicalOperation(_, filters,
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
index 02e1c70cc8cb7..22839d3f0d251 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetIOSuite.scala
@@ -846,7 +846,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
     def checkCompressionCodec(codec: ParquetCompressionCodec): Unit = {
       withSQLConf(SQLConf.PARQUET_COMPRESSION.key -> codec.name()) {
         withParquetFile(data) { path =>
-          assertResult(spark.conf.get(SQLConf.PARQUET_COMPRESSION).toUpperCase(Locale.ROOT)) {
+          assertResult(spark.conf.get(SQLConf.PARQUET_COMPRESSION.key).toUpperCase(Locale.ROOT)) {
             compressionCodecFor(path, codec.name())
           }
         }
@@ -855,7 +855,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
 
     // Checks default compression codec
     checkCompressionCodec(
-      ParquetCompressionCodec.fromString(spark.conf.get(SQLConf.PARQUET_COMPRESSION)))
+      ParquetCompressionCodec.fromString(spark.conf.get(SQLConf.PARQUET_COMPRESSION.key)))
 
     ParquetCompressionCodec.availableCodecs.asScala.foreach(checkCompressionCodec(_))
   }
@@ -1068,7 +1068,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         exception = intercept[SparkException] {
           spark.read.schema(readSchema).parquet(path).collect()
         },
-        errorClass = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
+        condition = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
         parameters = Map(
           "path" -> ".*",
           "column" -> "\\[_1\\]",
@@ -1223,7 +1223,7 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
         val m1 = intercept[SparkException] {
           spark.range(1).coalesce(1).write.options(extraOptions).parquet(dir.getCanonicalPath)
         }
-        assert(m1.getErrorClass == "TASK_WRITE_FAILED")
+        assert(m1.getCondition == "TASK_WRITE_FAILED")
         assert(m1.getCause.getMessage.contains("Intentional exception for testing purposes"))
       }
 
@@ -1233,8 +1233,8 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
             .coalesce(1)
           df.write.partitionBy("a").options(extraOptions).parquet(dir.getCanonicalPath)
         }
-        if (m2.getErrorClass != null) {
-          assert(m2.getErrorClass == "TASK_WRITE_FAILED")
+        if (m2.getCondition != null) {
+          assert(m2.getCondition == "TASK_WRITE_FAILED")
           assert(m2.getCause.getMessage.contains("Intentional exception for testing purposes"))
         } else {
           assert(m2.getMessage.contains("TASK_WRITE_FAILED"))
@@ -1585,6 +1585,17 @@ class ParquetIOSuite extends QueryTest with ParquetTest with SharedSparkSession
       }
     }
   }
+
+  test("SPARK-49991: Respect 'mapreduce.output.basename' to generate file names") {
+    withTempPath { dir =>
+      withSQLConf("mapreduce.output.basename" -> "apachespark") {
+        spark.range(1).coalesce(1).write.parquet(dir.getCanonicalPath)
+        val df = spark.read.parquet(dir.getCanonicalPath)
+        assert(df.inputFiles.head.contains("apachespark"))
+        checkAnswer(spark.read.parquet(dir.getCanonicalPath), Row(0))
+      }
+    }
+  }
 }
 
 class JobCommitFailureParquetOutputCommitter(outputPath: Path, context: TaskAttemptContext)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
index a6ad147c865d2..206fe0b791208 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetPartitionDiscoverySuite.scala
@@ -27,7 +27,7 @@ import com.google.common.io.Files
 import org.apache.hadoop.fs.Path
 import org.apache.parquet.hadoop.ParquetOutputFormat
 
-import org.apache.spark.SparkConf
+import org.apache.spark.{SparkConf, SparkRuntimeException}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.InternalRow
 import org.apache.spark.sql.catalyst.catalog.ExternalCatalogUtils
@@ -111,7 +111,7 @@ abstract class ParquetPartitionDiscoverySuite
       "hdfs://host:9000/path/a=10/b=20",
       "hdfs://host:9000/path/a=10.5/b=hello")
 
-    var exception = intercept[AssertionError] {
+    var exception = intercept[SparkRuntimeException] {
       parsePartitions(
         paths.map(new Path(_)), true, Set.empty[Path], None, true, true, timeZoneId, false)
     }
@@ -173,7 +173,7 @@ abstract class ParquetPartitionDiscoverySuite
       "hdfs://host:9000/path/a=10/b=20",
       "hdfs://host:9000/path/path1")
 
-    exception = intercept[AssertionError] {
+    exception = intercept[SparkRuntimeException] {
       parsePartitions(
         paths.map(new Path(_)),
         true,
@@ -197,7 +197,7 @@ abstract class ParquetPartitionDiscoverySuite
       "hdfs://host:9000/tmp/tables/nonPartitionedTable1",
       "hdfs://host:9000/tmp/tables/nonPartitionedTable2")
 
-    exception = intercept[AssertionError] {
+    exception = intercept[SparkRuntimeException] {
       parsePartitions(
         paths.map(new Path(_)),
         true,
@@ -878,7 +878,7 @@ abstract class ParquetPartitionDiscoverySuite
 
       checkAnswer(twoPartitionsDF, df.filter("b != 3"))
 
-      intercept[AssertionError] {
+      intercept[SparkRuntimeException] {
         spark
           .read
           .parquet(
@@ -958,54 +958,58 @@ abstract class ParquetPartitionDiscoverySuite
     }
   }
 
-  test("listConflictingPartitionColumns") {
-    def makeExpectedMessage(colNameLists: Seq[String], paths: Seq[String]): String = {
-      val conflictingColNameLists = colNameLists.zipWithIndex.map { case (list, index) =>
-        s"\tPartition column name list #$index: $list"
-      }.mkString("\n", "\n", "\n")
-
-      // scalastyle:off
-      s"""Conflicting partition column names detected:
-         |$conflictingColNameLists
-         |For partitioned table directories, data files should only live in leaf directories.
-         |And directories at the same level should have the same partition column name.
-         |Please check the following directories for unexpected files or inconsistent partition column names:
-         |${paths.map("\t" + _).mkString("\n", "\n", "")}
-       """.stripMargin.trim
-      // scalastyle:on
-    }
-
-    assert(
-      listConflictingPartitionColumns(
+  test("conflictingPartitionColumnsError") {
+    checkError(
+      exception = conflictingPartitionColumnsError(
         Seq(
           (new Path("file:/tmp/foo/a=1"),
             PartitionValues(Seq("a"), Seq(TypedPartValue("1", IntegerType)))),
           (new Path("file:/tmp/foo/b=1"),
-            PartitionValues(Seq("b"), Seq(TypedPartValue("1", IntegerType)))))).trim ===
-        makeExpectedMessage(Seq("a", "b"), Seq("file:/tmp/foo/a=1", "file:/tmp/foo/b=1")))
+            PartitionValues(Seq("b"), Seq(TypedPartValue("1", IntegerType))))
+        )
+      ),
+      condition = "CONFLICTING_PARTITION_COLUMN_NAMES",
+      parameters = Map(
+        "distinctPartColLists" ->
+          "\n\tPartition column name list #0: a\n\tPartition column name list #1: b\n",
+        "suspiciousPaths" -> "\n\tfile:/tmp/foo/a=1\n\tfile:/tmp/foo/b=1"
+      )
+    )
 
-    assert(
-      listConflictingPartitionColumns(
+    checkError(
+      exception = conflictingPartitionColumnsError(
         Seq(
           (new Path("file:/tmp/foo/a=1/_temporary"),
             PartitionValues(Seq("a"), Seq(TypedPartValue("1", IntegerType)))),
           (new Path("file:/tmp/foo/a=1"),
-            PartitionValues(Seq("a"), Seq(TypedPartValue("1", IntegerType)))))).trim ===
-        makeExpectedMessage(
-          Seq("a"),
-          Seq("file:/tmp/foo/a=1/_temporary", "file:/tmp/foo/a=1")))
+            PartitionValues(Seq("a"), Seq(TypedPartValue("1", IntegerType))))
+        )
+      ),
+      condition = "CONFLICTING_PARTITION_COLUMN_NAMES",
+      parameters = Map(
+        "distinctPartColLists" ->
+          "\n\tPartition column name list #0: a\n",
+        "suspiciousPaths" -> "\n\tfile:/tmp/foo/a=1/_temporary\n\tfile:/tmp/foo/a=1"
+      )
+    )
 
-    assert(
-      listConflictingPartitionColumns(
+    checkError(
+      exception = conflictingPartitionColumnsError(
         Seq(
           (new Path("file:/tmp/foo/a=1"),
             PartitionValues(Seq("a"), Seq(TypedPartValue("1", IntegerType)))),
           (new Path("file:/tmp/foo/a=1/b=foo"),
             PartitionValues(Seq("a", "b"),
-              Seq(TypedPartValue("1", IntegerType), TypedPartValue("foo", StringType)))))).trim ===
-        makeExpectedMessage(
-          Seq("a", "a, b"),
-          Seq("file:/tmp/foo/a=1", "file:/tmp/foo/a=1/b=foo")))
+              Seq(TypedPartValue("1", IntegerType), TypedPartValue("foo", StringType))))
+        )
+      ),
+      condition = "CONFLICTING_PARTITION_COLUMN_NAMES",
+      parameters = Map(
+        "distinctPartColLists" ->
+          "\n\tPartition column name list #0: a\n\tPartition column name list #1: a, b\n",
+        "suspiciousPaths" -> "\n\tfile:/tmp/foo/a=1\n\tfile:/tmp/foo/a=1/b=foo"
+      )
+    )
   }
 
   test("Parallel partition discovery") {
@@ -1145,6 +1149,44 @@ abstract class ParquetPartitionDiscoverySuite
       checkAnswer(res, Seq(Row(1, 2, 3, 4.0f)))
     }
   }
+
+  test("SPARK-49163: Attempt to create table based on broken parquet partition data") {
+    withTempDir { dir =>
+      val data = Seq[(String, String, String)](("a", "b", "c"))
+      data
+        .toDF("col1", "col2", "col3")
+        .write
+        .mode("overwrite")
+        .partitionBy("col1", "col2")
+        .parquet(dir.getCanonicalPath)
+
+      // Structure of parquet table in filesystem:
+      // <base>
+      // +- col1=a
+      //    +- col2=b
+      //       |- part-00000.parquet
+
+      val partition = new File(dir, "col1=a")
+      val dummyData = new File(partition, "dummy")
+      dummyData.createNewFile()
+
+      // Structure of parquet table in filesystem is now corrupt:
+      // <base>
+      // +- col1=a
+      //    |- dummy
+      //    +- col2=b
+      //       |- part-00000.parquet
+
+      val exception = intercept[SparkRuntimeException] {
+        spark.read.parquet(dir.toString)
+      }
+      val msg = exception.getMessage
+      assert(exception.getCondition === "CONFLICTING_PARTITION_COLUMN_NAMES")
+      // Partitions inside the error message can be presented in any order
+      assert("Partition column name list #[0-1]: col1".r.findFirstIn(msg).isDefined)
+      assert("Partition column name list #[0-1]: col1, col2".r.findFirstIn(msg).isDefined)
+    }
+  }
 }
 
 class ParquetV1PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
@@ -1240,8 +1282,8 @@ class ParquetV1PartitionDiscoverySuite extends ParquetPartitionDiscoverySuite {
       (1 to 10).map(i => (i, i.toString)).toDF("a", "b").write.parquet(dir.getCanonicalPath)
       val queryExecution = spark.read.parquet(dir.getCanonicalPath).queryExecution
       queryExecution.analyzed.collectFirst {
-        case LogicalRelation(
-        HadoopFsRelation(location: PartitioningAwareFileIndex, _, _, _, _, _), _, _, _) =>
+        case LogicalRelationWithTable(
+          HadoopFsRelation(location: PartitioningAwareFileIndex, _, _, _, _, _), _) =>
           assert(location.partitionSpec() === PartitionSpec.emptySpec)
       }.getOrElse {
         fail(s"Expecting a matching HadoopFsRelation, but got:\n$queryExecution")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
index 4d413efe50430..22a02447e720f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
@@ -1075,7 +1075,7 @@ abstract class ParquetQuerySuite extends QueryTest with ParquetTest with SharedS
         val e = intercept[SparkException] {
           readParquet("d DECIMAL(3, 2)", path).collect()
         }
-        assert(e.getErrorClass.startsWith("FAILED_READ_FILE"))
+        assert(e.getCondition.startsWith("FAILED_READ_FILE"))
         assert(e.getCause.getMessage.contains("Please read this column/field as Spark BINARY type"))
       }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
index 6d9092391a98e..30503af0fab6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRebaseDatetimeSuite.scala
@@ -414,7 +414,7 @@ abstract class ParquetRebaseDatetimeSuite
         val e = intercept[SparkException] {
           df.write.parquet(dir.getCanonicalPath)
         }
-        assert(e.getErrorClass == "TASK_WRITE_FAILED")
+        assert(e.getCondition == "TASK_WRITE_FAILED")
         val errMsg = e.getCause.asInstanceOf[SparkUpgradeException].getMessage
         assert(errMsg.contains("You may get a different result due to the upgrading"))
       }
@@ -431,7 +431,7 @@ abstract class ParquetRebaseDatetimeSuite
           val e = intercept[SparkException] {
             df.write.parquet(dir.getCanonicalPath)
           }
-          assert(e.getErrorClass == "TASK_WRITE_FAILED")
+          assert(e.getCondition == "TASK_WRITE_FAILED")
           val errMsg = e.getCause.asInstanceOf[SparkUpgradeException].getMessage
           assert(errMsg.contains("You may get a different result due to the upgrading"))
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
index 95378d9467478..08fd8a9ecb53e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowIndexSuite.scala
@@ -319,7 +319,7 @@ class ParquetRowIndexSuite extends QueryTest with SharedSparkSession {
             .load(path.getAbsolutePath)
 
           val exception = intercept[SparkException](dfRead.collect())
-          assert(exception.getErrorClass.startsWith("FAILED_READ_FILE"))
+          assert(exception.getCondition.startsWith("FAILED_READ_FILE"))
           assert(exception.getCause.getMessage.contains(
             ParquetFileFormat.ROW_INDEX_TEMPORARY_COLUMN_NAME))
         }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
index 25f6af1cc3384..0acb21f3e6fba 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaSuite.scala
@@ -989,7 +989,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
         exception = intercept[SparkException] {
           spark.read.option("mergeSchema", "true").parquet(path)
         },
-        errorClass = "CANNOT_MERGE_SCHEMAS",
+        condition = "CANNOT_MERGE_SCHEMAS",
         sqlState = "42KD9",
         parameters = Map(
           "left" -> toSQLType(df1.schema),
@@ -1056,7 +1056,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       if (col(0).dataType == StringType) {
         checkErrorMatchPVals(
           exception = e,
-          errorClass = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
+          condition = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
           parameters = Map(
             "path" -> s".*${dir.getCanonicalPath}.*",
             "column" -> "\\[a\\]",
@@ -1067,7 +1067,7 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
       } else {
         checkErrorMatchPVals(
           exception = e,
-          errorClass = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
+          condition = "FAILED_READ_FILE.PARQUET_COLUMN_DATA_TYPE_MISMATCH",
           parameters = Map(
             "path" -> s".*${dir.getCanonicalPath}.*",
             "column" -> "\\[a\\]",
@@ -1111,10 +1111,37 @@ class ParquetSchemaSuite extends ParquetSchemaTest {
 
   test("SPARK-40819: parquet file with TIMESTAMP(NANOS, true) (with default nanosAsLong=false)") {
     val testDataPath = testFile("test-data/timestamp-nanos.parquet")
-    val e = intercept[AnalysisException] {
-      spark.read.parquet(testDataPath).collect()
-    }
-    assert(e.getMessage.contains("Illegal Parquet type: INT64 (TIMESTAMP(NANOS,true))."))
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      condition = "PARQUET_TYPE_ILLEGAL",
+      parameters = Map("parquetType" -> "INT64 (TIMESTAMP(NANOS,true))")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unsupported type") {
+    val testDataPath = testFile("test-data/interval-using-fixed-len-byte-array.parquet")
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      condition = "PARQUET_TYPE_NOT_SUPPORTED",
+      parameters = Map("parquetType" -> "FIXED_LEN_BYTE_ARRAY (INTERVAL)")
+    )
+  }
+
+  test("SPARK-47261: parquet file with unrecognized parquet type") {
+    val testDataPath = testFile("test-data/group-field-with-enum-as-logical-annotation.parquet")
+    val expectedParameter = "required group my_list (ENUM) {\n  repeated group list {\n" +
+      "    optional binary element (STRING);\n  }\n}"
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.read.parquet(testDataPath).collect()
+      },
+      condition = "PARQUET_TYPE_NOT_RECOGNIZED",
+      parameters = Map("field" -> expectedParameter)
+    )
   }
 
   // =======================================================
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
index 4bd35e0789bd6..09ed6955a5163 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetTypeWideningSuite.scala
@@ -80,7 +80,7 @@ class ParquetTypeWideningSuite
                 exception.getCause
                   .isInstanceOf[org.apache.parquet.io.ParquetDecodingException] ||
                 exception.getCause.getMessage.contains(
-                  "Unable to create Parquet converter for data type"))
+                  "PARQUET_CONVERSION_FAILURE"))
             } else {
               checkAnswer(readParquetFiles(dir, toType), expected.select($"a".cast(toType)))
             }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
index 25aa6def052b8..5c373a2de9738 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/text/TextSuite.scala
@@ -111,7 +111,7 @@ abstract class TextSuite extends QueryTest with SharedSparkSession with CommonFi
           testDf.write.option("compression", "illegal").mode(
             SaveMode.Overwrite).text(dir.getAbsolutePath)
         },
-        errorClass = "CODEC_NOT_AVAILABLE.WITH_AVAILABLE_CODECS_SUGGESTION",
+        condition = "CODEC_NOT_AVAILABLE.WITH_AVAILABLE_CODECS_SUGGESTION",
         parameters = Map(
           "codecName" -> "illegal",
           "availableCodecs" -> "bzip2, deflate, uncompressed, snappy, none, lz4, gzip")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
index 4160516deece5..0d18e3bf809e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/FileTableSuite.scala
@@ -22,9 +22,16 @@ import org.apache.hadoop.fs.FileStatus
 
 import org.apache.spark.sql.{QueryTest, SparkSession}
 import org.apache.spark.sql.connector.read.ScanBuilder
-import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
+import org.apache.spark.sql.connector.write.{LogicalWriteInfo, LogicalWriteInfoImpl, WriteBuilder}
+import org.apache.spark.sql.execution.datasources.DataSource
 import org.apache.spark.sql.execution.datasources.FileFormat
 import org.apache.spark.sql.execution.datasources.text.TextFileFormat
+import org.apache.spark.sql.execution.datasources.v2.csv.CSVScanBuilder
+import org.apache.spark.sql.execution.datasources.v2.json.JsonScanBuilder
+import org.apache.spark.sql.execution.datasources.v2.orc.OrcScanBuilder
+import org.apache.spark.sql.execution.datasources.v2.parquet.ParquetScanBuilder
+import org.apache.spark.sql.execution.datasources.v2.text.TextScanBuilder
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types._
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
@@ -53,6 +60,8 @@ class DummyFileTable(
 
 class FileTableSuite extends QueryTest with SharedSparkSession {
 
+  private val allFileBasedDataSources = Seq("orc", "parquet", "csv", "json", "text")
+
   test("Data type validation should check data schema only") {
     withTempPath { dir =>
       val df = spark.createDataFrame(Seq(("a", 1), ("b", 2))).toDF("v", "p")
@@ -85,4 +94,47 @@ class FileTableSuite extends QueryTest with SharedSparkSession {
       assert(table.dataSchema == expectedDataSchema)
     }
   }
+
+  allFileBasedDataSources.foreach { format =>
+    test("SPARK-49519, SPARK-50287: Merge options of table and relation when " +
+      s"constructing ScanBuilder and WriteBuilder in FileFormat - $format") {
+      withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "") {
+        val userSpecifiedSchema = StructType(Seq(StructField("c1", StringType)))
+
+        DataSource.lookupDataSourceV2(format, spark.sessionState.conf) match {
+          case Some(provider) =>
+            val dsOptions = new CaseInsensitiveStringMap(
+              Map("k1" -> "v1", "k2" -> "ds_v2").asJava)
+            val table = provider.getTable(
+              userSpecifiedSchema,
+              Array.empty,
+              dsOptions.asCaseSensitiveMap()).asInstanceOf[FileTable]
+            val tableOptions = new CaseInsensitiveStringMap(
+              Map("k2" -> "table_v2", "k3" -> "v3").asJava)
+
+            val mergedReadOptions = table.newScanBuilder(tableOptions) match {
+              case csv: CSVScanBuilder => csv.options
+              case json: JsonScanBuilder => json.options
+              case orc: OrcScanBuilder => orc.options
+              case parquet: ParquetScanBuilder => parquet.options
+              case text: TextScanBuilder => text.options
+            }
+            assert(mergedReadOptions.size === 3)
+            assert(mergedReadOptions.get("k1") === "v1")
+            assert(mergedReadOptions.get("k2") === "table_v2")
+            assert(mergedReadOptions.get("k3") === "v3")
+
+            val writeInfo = LogicalWriteInfoImpl("query-id", userSpecifiedSchema, tableOptions)
+            val mergedWriteOptions = table.newWriteBuilder(writeInfo).build()
+              .asInstanceOf[FileWrite].options
+            assert(mergedWriteOptions.size === 3)
+            assert(mergedWriteOptions.get("k1") === "v1")
+            assert(mergedWriteOptions.get("k2") === "table_v2")
+            assert(mergedWriteOptions.get("k3") === "v3")
+          case _ =>
+            throw new IllegalArgumentException(s"Failed to get table provider for $format")
+        }
+      }
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
index 92892a583995f..b3a097180725b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2PredicateSuite.scala
@@ -107,7 +107,8 @@ class V2PredicateSuite extends SparkFunSuite {
     val predicate2 = new Predicate("<=>", Array[Expression](ref("a"), LiteralValue(1, IntegerType)))
     assert(predicate1.equals(predicate2))
     assert(predicate1.references.map(_.describe()).toSeq == Seq("a"))
-    assert(predicate1.describe.equals("(a = 1) OR (a IS NULL AND 1 IS NULL)"))
+    assert(predicate1.describe.equals(
+      "((a IS NOT NULL AND 1 IS NOT NULL AND a = 1) OR (a IS NULL AND 1 IS NULL))"))
 
     val v1Filter = EqualNullSafe("a", 1)
     assert(v1Filter.toV2 == predicate1)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
index 50988e133005a..851dceeb8ac88 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/V2SessionCatalogSuite.scala
@@ -26,7 +26,7 @@ import scala.jdk.CollectionConverters._
 import org.apache.hadoop.fs.Path
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkUnsupportedOperationException
+import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.catalyst.analysis.{NamespaceAlreadyExistsException, NoSuchNamespaceException, NoSuchTableException, TableAlreadyExistsException}
 import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
@@ -443,13 +443,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === columns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent,
-        TableChange.addColumn(Array("missing_col", "new_field"), StringType))
-    }
-
-    assert(exc.getMessage.contains("missing_col"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent,
+          TableChange.addColumn(Array("missing_col", "new_field"), StringType))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: update column data type") {
@@ -498,13 +498,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === columns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent,
-        TableChange.updateColumnType(Array("missing_col"), LongType))
-    }
-
-    assert(exc.getMessage.contains("missing_col"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent,
+          TableChange.updateColumnType(Array("missing_col"), LongType))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: add comment") {
@@ -554,13 +554,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === columns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent,
-        TableChange.updateColumnComment(Array("missing_col"), "comment"))
-    }
-
-    assert(exc.getMessage.contains("missing_col"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent,
+          TableChange.updateColumnComment(Array("missing_col"), "comment"))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: rename top-level column") {
@@ -628,13 +628,13 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === columns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent,
-        TableChange.renameColumn(Array("missing_col"), "new_name"))
-    }
-
-    assert(exc.getMessage.contains("missing_col"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent,
+          TableChange.renameColumn(Array("missing_col"), "new_name"))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
   }
 
   test("alterTable: multiple changes") {
@@ -702,12 +702,12 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === columns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col"), false))
-    }
-
-    assert(exc.getMessage.contains("missing_col"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col"), false))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`missing_col`", "fields" -> "`id`, `data`"))
 
     // with if exists it should pass
     catalog.alterTable(testIdent, TableChange.deleteColumn(Array("missing_col"), true))
@@ -725,12 +725,12 @@ class V2SessionCatalogTableSuite extends V2SessionCatalogBaseSuite {
 
     assert(table.columns === tableColumns)
 
-    val exc = intercept[IllegalArgumentException] {
-      catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z"), false))
-    }
-
-    assert(exc.getMessage.contains("z"))
-    assert(exc.getMessage.contains("Cannot find"))
+    checkError(
+      exception = intercept[SparkIllegalArgumentException] {
+        catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z"), false))
+      },
+      condition = "FIELD_NOT_FOUND",
+      parameters = Map("fieldName" -> "`z`", "fields" -> "`x`, `y`"))
 
     // with if exists it should pass
     catalog.alterTable(testIdent, TableChange.deleteColumn(Array("point", "z"), true))
@@ -1173,7 +1173,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
         exception = intercept[SparkUnsupportedOperationException] {
           catalog.alterNamespace(testNs, NamespaceChange.removeProperty(p))
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2069",
+        condition = "CANNOT_REMOVE_RESERVED_PROPERTY",
         parameters = Map("property" -> p))
 
     }
@@ -1184,7 +1184,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     val testIdent: IdentifierHelper = Identifier.of(Array("a", "b"), "c")
     checkError(
       exception = intercept[AnalysisException](testIdent.asTableIdentifier),
-      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      condition = "IDENTIFIER_TOO_MANY_NAME_PARTS",
       parameters = Map("identifier" -> "`a`.`b`.`c`")
     )
   }
@@ -1193,7 +1193,7 @@ class V2SessionCatalogNamespaceSuite extends V2SessionCatalogBaseSuite {
     val testIdent: MultipartIdentifierHelper = Seq("a", "b", "c")
     checkError(
       exception = intercept[AnalysisException](testIdent.asFunctionIdentifier),
-      errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+      condition = "IDENTIFIER_TOO_MANY_NAME_PARTS",
       parameters = Map("identifier" -> "`a`.`b`.`c`")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
index d793ef526c47b..6125777c7a426 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/DerbyTableCatalogSuite.scala
@@ -45,7 +45,7 @@ class DerbyTableCatalogSuite extends QueryTest with SharedSparkSession {
       checkError(
         exception = intercept[SparkUnsupportedOperationException](
           sql(s"ALTER TABLE $n1t1 RENAME TO $n2t2")),
-        errorClass = "CANNOT_RENAME_ACROSS_SCHEMA",
+        condition = "CANNOT_RENAME_ACROSS_SCHEMA",
         parameters = Map("type" -> "table"))
       sql(s"ALTER TABLE $n1t1 RENAME TO $n1t2")
       checkAnswer(sql(s"SHOW TABLES IN derby.test1"), Row("test1", "TABLE2", false))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
index d2ff33e104772..580034ff7b0e0 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
@@ -176,7 +176,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       sql("CREATE TABLE h2.bad_test.new_table(i INT, j STRING)")
     }
     checkError(exp,
-      errorClass = "SCHEMA_NOT_FOUND",
+      condition = "SCHEMA_NOT_FOUND",
       parameters = Map("schemaName" -> "`bad_test`"))
   }
 
@@ -200,7 +200,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName ADD COLUMNS (c3 DOUBLE)")
         },
-        errorClass = "FIELD_ALREADY_EXISTS",
+        condition = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "add",
           "fieldNames" -> "`c3`",
@@ -239,7 +239,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName RENAME COLUMN C TO C0")
         },
-        errorClass = "FIELD_ALREADY_EXISTS",
+        condition = "FIELD_ALREADY_EXISTS",
         parameters = Map(
           "op" -> "rename",
           "fieldNames" -> "`C0`",
@@ -279,14 +279,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "bad_column",
-          "table" -> "h2.test.alt_table",
-          "schema" ->
-            """root
-              | |-- C2: integer (nullable = true)
-              |""".stripMargin),
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`C2`"),
         context = ExpectedContext(sqlText, 0, 51))
     }
     // Drop a column to not existing table and namespace
@@ -319,22 +316,18 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "bad_column",
-          "table" -> "h2.test.alt_table",
-          "schema" ->
-            """root
-              | |-- ID: double (nullable = true)
-              | |-- deptno: double (nullable = true)
-              |""".stripMargin),
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`ID`, `deptno`"),
         context = ExpectedContext(sqlText, 0, 64))
       // Update column to wrong type
       checkError(
         exception = intercept[ParseException] {
           sql(s"ALTER TABLE $tableName ALTER COLUMN id TYPE bad_type")
         },
-        errorClass = "UNSUPPORTED_DATATYPE",
+        condition = "UNSUPPORTED_DATATYPE",
         parameters = Map("typeName" -> "\"BAD_TYPE\""),
         context = ExpectedContext("bad_type", 51, 58))
     }
@@ -368,15 +361,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "bad_column",
-          "table" -> "h2.test.alt_table",
-          "schema" ->
-            """root
-              | |-- ID: integer (nullable = true)
-              | |-- deptno: integer (nullable = true)
-              |""".stripMargin),
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`ID`, `deptno`"),
         context = ExpectedContext(sqlText, 0, 66))
     }
     // Update column nullability in not existing table and namespace
@@ -404,7 +393,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("CREATE NAMESPACE h2.test_namespace LOCATION './samplepath'")
           },
-          errorClass = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND",
+          condition = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND",
           sqlState = "0A000",
           parameters = Map("cmd" -> toSQLStmt("CREATE NAMESPACE ... LOCATION ...")))
       }
@@ -427,7 +416,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               sql(s"ALTER NAMESPACE h2.test_namespace SET LOCATION '/tmp/loc_test_2'")
             },
-            errorClass = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND_WITH_PROPERTY",
+            condition = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND_WITH_PROPERTY",
             sqlState = "0A000",
             parameters = Map(
               "cmd" -> toSQLStmt("SET NAMESPACE"),
@@ -437,7 +426,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
             exception = intercept[AnalysisException] {
               sql(s"ALTER NAMESPACE h2.test_namespace SET PROPERTIES('a'='b')")
             },
-            errorClass = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND_WITH_PROPERTY",
+            condition = "NOT_SUPPORTED_IN_JDBC_CATALOG.COMMAND_WITH_PROPERTY",
             sqlState = "0A000",
             parameters = Map(
               "cmd" -> toSQLStmt("SET NAMESPACE"),
@@ -455,7 +444,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tableName ALTER COLUMN ID COMMENT 'test'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1305",
+        condition = "_LEGACY_ERROR_TEMP_1305",
         parameters = Map("change" ->
           "org.apache.spark.sql.connector.catalog.TableChange\\$UpdateColumnComment.*"),
         matchPVals = true)
@@ -465,14 +454,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1331",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        sqlState = "42703",
         parameters = Map(
-          "fieldName" -> "bad_column",
-          "table" -> "h2.test.alt_table",
-          "schema" ->
-            """root
-              | |-- ID: integer (nullable = true)
-              |""".stripMargin),
+          "objectName" -> "`bad_column`",
+          "proposal" -> "`ID`"),
         context = ExpectedContext(sqlText, 0, 67))
     }
     // Update column comments in not existing table and namespace
@@ -504,15 +490,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql(sqlText)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1331",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = "42703",
           parameters = Map(
-            "fieldName" -> "C2",
-            "table" -> "h2.test.alt_table",
-            "schema" ->
-              """root
-                | |-- c1: integer (nullable = true)
-                | |-- c2: integer (nullable = true)
-                |""".stripMargin),
+            "objectName" -> "`C2`",
+            "proposal" -> "`c1`, `c2`"),
           context = ExpectedContext(sqlText, 0, 51))
       }
 
@@ -531,15 +513,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql(sqlText)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1331",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = "42703",
           parameters = Map(
-            "fieldName" -> "C3",
-            "table" -> "h2.test.alt_table",
-            "schema" ->
-              """root
-                | |-- c1: integer (nullable = true)
-                | |-- c3: integer (nullable = true)
-                |""".stripMargin),
+            "objectName" -> "`C3`",
+            "proposal" -> "`c1`, `c3`"),
           context = ExpectedContext(sqlText, 0, sqlText.length - 1))
       }
 
@@ -557,14 +535,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql(sqlText)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1331",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = "42703",
           parameters = Map(
-            "fieldName" -> "C1",
-            "table" -> "h2.test.alt_table",
-            "schema" ->
-              """root
-                | |-- c1: integer (nullable = true)
-                |""".stripMargin),
+            "objectName" -> "`C1`",
+            "proposal" -> "`c1`"),
           context = ExpectedContext(sqlText, 0, sqlText.length - 1))
       }
 
@@ -582,14 +557,11 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql(sqlText)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1331",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          sqlState = "42703",
           parameters = Map(
-            "fieldName" -> "C1",
-            "table" -> "h2.test.alt_table",
-            "schema" ->
-              """root
-                | |-- c1: double (nullable = true)
-                |""".stripMargin),
+            "objectName" -> "`C1`",
+            "proposal" -> "`c1`"),
           context = ExpectedContext(sqlText, 0, sqlText.length - 1))
       }
 
@@ -624,7 +596,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
           sql("CREATE TABLE h2.test.new_table(i INT, j STRING)" +
             " TBLPROPERTIES('ENGINE'='tableEngineName')")
         },
-        errorClass = "FAILED_JDBC.CREATE_TABLE",
+        condition = "FAILED_JDBC.CREATE_TABLE",
         parameters = Map(
           "url" -> "jdbc:.*",
           "tableName" -> "`test`.`new_table`"))
@@ -643,7 +615,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException]{
         sql("CREATE TABLE h2.test.new_table(c CHAR(1000000001))")
       },
-      errorClass = "FAILED_JDBC.CREATE_TABLE",
+      condition = "FAILED_JDBC.CREATE_TABLE",
       parameters = Map(
         "url" -> "jdbc:.*",
         "tableName" -> "`test`.`new_table`"))
@@ -654,7 +626,7 @@ class JDBCTableCatalogSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkIllegalArgumentException](
         sql("CREATE TABLE h2.test.new_table(c array<int>)")
       ),
-      errorClass = "_LEGACY_ERROR_TEMP_2082",
+      condition = "_LEGACY_ERROR_TEMP_2082",
       parameters = Map("catalogString" -> "array<int>")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceChangeDataReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceChangeDataReadSuite.scala
index 2858d356d4c9a..59c0af8afd198 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceChangeDataReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceChangeDataReadSuite.scala
@@ -58,7 +58,7 @@ abstract class StateDataSourceChangeDataReaderSuite extends StateDataSourceTestB
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.conf.set(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED, false)
+    spark.conf.set(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key, false)
     spark.conf.set(SQLConf.STATE_STORE_PROVIDER_CLASS.key, newStateStoreProvider().getClass.getName)
   }
 
@@ -90,7 +90,7 @@ abstract class StateDataSourceChangeDataReaderSuite extends StateDataSourceTestB
           .option(StateSourceOptions.CHANGE_END_BATCH_ID, 2)
           .load(tempDir.getAbsolutePath)
       }
-      assert(exc.getErrorClass === "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE")
+      assert(exc.getCondition === "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE")
     }
   }
 
@@ -103,7 +103,7 @@ abstract class StateDataSourceChangeDataReaderSuite extends StateDataSourceTestB
           .option(StateSourceOptions.CHANGE_END_BATCH_ID, 0)
           .load(tempDir.getAbsolutePath)
       }
-      assert(exc.getErrorClass === "STDS_INVALID_OPTION_VALUE.IS_NEGATIVE")
+      assert(exc.getCondition === "STDS_INVALID_OPTION_VALUE.IS_NEGATIVE")
     }
   }
 
@@ -116,7 +116,7 @@ abstract class StateDataSourceChangeDataReaderSuite extends StateDataSourceTestB
           .option(StateSourceOptions.CHANGE_END_BATCH_ID, 0)
           .load(tempDir.getAbsolutePath)
       }
-      assert(exc.getErrorClass === "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE")
+      assert(exc.getCondition === "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE")
     }
   }
 
@@ -130,7 +130,7 @@ abstract class StateDataSourceChangeDataReaderSuite extends StateDataSourceTestB
           .option(StateSourceOptions.CHANGE_END_BATCH_ID, 0)
           .load(tempDir.getAbsolutePath)
       }
-      assert(exc.getErrorClass === "STDS_CONFLICT_OPTIONS")
+      assert(exc.getCondition === "STDS_CONFLICT_OPTIONS")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
index e6cdd0dce9efa..4a274d51b62c3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceReadSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.execution.streaming.{CommitLog, MemoryStream, Offset
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.sql.streaming.{OutputMode, TimeMode, TransformWithStateSuiteUtils}
 import org.apache.spark.sql.types.{IntegerType, StructType}
 
 class StateDataSourceNegativeTestSuite extends StateDataSourceTestBase {
@@ -268,6 +268,63 @@ class StateDataSourceNegativeTestSuite extends StateDataSourceTestBase {
           "message" -> s"value should be less than or equal to $endBatchId"))
     }
   }
+
+  test("ERROR: trying to specify state variable name with " +
+    "non-transformWithState operator") {
+    withTempDir { tempDir =>
+      runDropDuplicatesQuery(tempDir.getAbsolutePath)
+
+      val exc = intercept[StateDataSourceInvalidOptionValue] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.STATE_VAR_NAME, "test")
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE", Some("42616"),
+        Map("optionName" -> StateSourceOptions.STATE_VAR_NAME,
+          "message" -> ".*"),
+        matchPVals = true)
+    }
+  }
+
+  test("ERROR: trying to specify state variable name along with " +
+    "readRegisteredTimers should fail") {
+    withTempDir { tempDir =>
+      val exc = intercept[StateDataSourceConflictOptions] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.STATE_VAR_NAME, "test")
+          .option(StateSourceOptions.READ_REGISTERED_TIMERS, true)
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_CONFLICT_OPTIONS", "42613",
+        Map("options" ->
+          s"['${
+            StateSourceOptions.READ_REGISTERED_TIMERS
+          }', '${StateSourceOptions.STATE_VAR_NAME}']"))
+    }
+  }
+
+  test("ERROR: trying to specify non boolean value for " +
+    "flattenCollectionTypes") {
+    withTempDir { tempDir =>
+      runDropDuplicatesQuery(tempDir.getAbsolutePath)
+
+      val exc = intercept[StateDataSourceInvalidOptionValue] {
+        spark.read.format("statestore")
+          // trick to bypass getting the last committed batch before validating operator ID
+          .option(StateSourceOptions.BATCH_ID, 0)
+          .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, "test")
+          .load(tempDir.getAbsolutePath)
+      }
+      checkError(exc, "STDS_INVALID_OPTION_VALUE.WITH_MESSAGE", Some("42616"),
+        Map("optionName" -> StateSourceOptions.FLATTEN_COLLECTION_TYPES,
+          "message" -> ".*"),
+        matchPVals = true)
+    }
+  }
 }
 
 /**
@@ -400,19 +457,19 @@ class HDFSBackedStateDataSourceReadSuite extends StateDataSourceReadSuite {
     testSnapshotPartitionId()
   }
 
-  test("snapshotStatBatchId on limit state") {
+  test("snapshotStartBatchId on limit state") {
     testSnapshotOnLimitState("hdfs")
   }
 
-  test("snapshotStatBatchId on aggregation state") {
+  test("snapshotStartBatchId on aggregation state") {
     testSnapshotOnAggregateState("hdfs")
   }
 
-  test("snapshotStatBatchId on deduplication state") {
+  test("snapshotStartBatchId on deduplication state") {
     testSnapshotOnDeduplicateState("hdfs")
   }
 
-  test("snapshotStatBatchId on join state") {
+  test("snapshotStartBatchId on join state") {
     testSnapshotOnJoinState("hdfs", 1)
     testSnapshotOnJoinState("hdfs", 2)
   }
@@ -429,6 +486,40 @@ class RocksDBStateDataSourceReadSuite extends StateDataSourceReadSuite {
     spark.conf.set("spark.sql.streaming.stateStore.rocksdb.changelogCheckpointing.enabled",
       "false")
   }
+
+  test("ERROR: Do not provide state variable name with " +
+    "transformWithState operator") {
+    import testImplicits._
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new StatefulProcessorWithSingleValueVar(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+
+        val e = intercept[StateDataSourceUnspecifiedRequiredOption] {
+          spark.read
+            .format("statestore")
+            .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+            .load()
+        }
+        checkError(e, "STDS_REQUIRED_OPTION_UNSPECIFIED", Some("42601"),
+          Map("optionName" -> "stateVarName"))
+      }
+    }
+  }
 }
 
 class RocksDBWithChangelogCheckpointStateDataSourceReaderSuite extends
@@ -459,19 +550,19 @@ StateDataSourceReadSuite {
     testSnapshotPartitionId()
   }
 
-  test("snapshotStatBatchId on limit state") {
+  test("snapshotStartBatchId on limit state") {
     testSnapshotOnLimitState("rocksdb")
   }
 
-  test("snapshotStatBatchId on aggregation state") {
+  test("snapshotStartBatchId on aggregation state") {
     testSnapshotOnAggregateState("rocksdb")
   }
 
-  test("snapshotStatBatchId on deduplication state") {
+  test("snapshotStartBatchId on deduplication state") {
     testSnapshotOnDeduplicateState("rocksdb")
   }
 
-  test("snapshotStatBatchId on join state") {
+  test("snapshotStartBatchId on join state") {
     testSnapshotOnJoinState("rocksdb", 1)
     testSnapshotOnJoinState("rocksdb", 2)
   }
@@ -889,7 +980,7 @@ abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Ass
         // skip version and operator ID to test out functionalities
         .load()
 
-      val numShufflePartitions = spark.conf.get(SQLConf.SHUFFLE_PARTITIONS)
+      val numShufflePartitions = sqlConf.getConf(SQLConf.SHUFFLE_PARTITIONS)
 
       val resultDf = stateReadDf
         .selectExpr("key.value AS key_value", "value.count AS value_count", "partition_id")
@@ -913,7 +1004,7 @@ abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Ass
   }
 
   test("partition_id column with stream-stream join") {
-    val numShufflePartitions = spark.conf.get(SQLConf.SHUFFLE_PARTITIONS)
+    val numShufflePartitions = sqlConf.getConf(SQLConf.SHUFFLE_PARTITIONS)
 
     withTempDir { tempDir =>
       runStreamStreamJoinQueryWithOneThousandInputs(tempDir.getAbsolutePath)
@@ -1046,7 +1137,7 @@ abstract class StateDataSourceReadSuite extends StateDataSourceTestBase with Ass
       val exc = intercept[StateStoreSnapshotPartitionNotFound] {
         stateDfError.show()
       }
-      assert(exc.getErrorClass === "CANNOT_LOAD_STATE_STORE.SNAPSHOT_PARTITION_ID_NOT_FOUND")
+      assert(exc.getCondition === "CANNOT_LOAD_STATE_STORE.SNAPSHOT_PARTITION_ID_NOT_FOUND")
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala
new file mode 100644
index 0000000000000..af64f563cf7b0
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/state/StateDataSourceTransformWithStateSuite.scala
@@ -0,0 +1,1150 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources.v2.state
+
+import java.io.File
+import java.time.Duration
+
+import org.apache.hadoop.conf.Configuration
+
+import org.apache.spark.io.CompressionCodec
+import org.apache.spark.sql.{Encoders, Row}
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, AlsoTestWithEncodingTypes, RocksDBFileManager, RocksDBStateStoreProvider, TestClass}
+import org.apache.spark.sql.functions.{col, explode, timestamp_seconds}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.{InputMapRow, ListState, MapInputEvent, MapOutputEvent, MapStateTTLProcessor, MaxEventTimeStatefulProcessor, OutputMode, RunningCountStatefulProcessor, RunningCountStatefulProcessorWithProcTimeTimerUpdates, StatefulProcessor, StateStoreMetricsTest, TestMapStateProcessor, TimeMode, TimerValues, TransformWithStateSuiteUtils, Trigger, TTLConfig, ValueState}
+import org.apache.spark.sql.streaming.util.StreamManualClock
+import org.apache.spark.util.Utils
+
+/** Stateful processor of single value state var with non-primitive type */
+class StatefulProcessorWithSingleValueVar extends RunningCountStatefulProcessor {
+  @transient private var _valueState: ValueState[TestClass] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _valueState = getHandle.getValueState[TestClass](
+      "valueState", Encoders.product[TestClass], TTLConfig.NONE)
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val count = _valueState.getOption().getOrElse(TestClass(0L, "dummyKey")).id + 1
+    _valueState.update(TestClass(count, "dummyKey"))
+    Iterator((key, count.toString))
+  }
+}
+
+class StatefulProcessorWithTTL
+  extends StatefulProcessor[String, String, (String, String)] {
+  @transient protected var _countState: ValueState[Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _countState = getHandle.getValueState[Long]("countState",
+      Encoders.scalaLong, TTLConfig(Duration.ofMillis(30000)))
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val count = _countState.getOption().getOrElse(0L) + 1
+    if (count == 3) {
+      _countState.clear()
+      Iterator.empty
+    } else {
+      _countState.update(count)
+      Iterator((key, count.toString))
+    }
+  }
+}
+
+/** Stateful processor tracking groups belonging to sessions with/without TTL */
+class SessionGroupsStatefulProcessor extends
+  StatefulProcessor[String, (String, String), String] {
+  @transient private var _groupsList: ListState[String] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _groupsList = getHandle.getListState("groupsList", Encoders.STRING, TTLConfig.NONE)
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[(String, String)],
+      timerValues: TimerValues): Iterator[String] = {
+    inputRows.foreach { inputRow =>
+      _groupsList.appendValue(inputRow._2)
+    }
+    Iterator.empty
+  }
+}
+
+class SessionGroupsStatefulProcessorWithTTL extends
+  StatefulProcessor[String, (String, String), String] {
+  @transient private var _groupsListWithTTL: ListState[String] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _groupsListWithTTL = getHandle.getListState("groupsListWithTTL", Encoders.STRING,
+      TTLConfig(Duration.ofMillis(30000)))
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[(String, String)],
+      timerValues: TimerValues): Iterator[String] = {
+    inputRows.foreach { inputRow =>
+      _groupsListWithTTL.appendValue(inputRow._2)
+    }
+    Iterator.empty
+  }
+}
+
+/**
+ * Test suite to verify integration of state data source reader with the transformWithState operator
+ */
+class StateDataSourceTransformWithStateSuite extends StateStoreMetricsTest
+  with AlsoTestWithChangelogCheckpointingEnabled with AlsoTestWithEncodingTypes {
+
+  import testImplicits._
+
+  test("state data source integration - value state with single variable") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new StatefulProcessorWithSingleValueVar(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          AddData(inputData, "b"),
+          CheckNewAnswer(("b", "1")),
+          StopStream
+        )
+
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "valueState")
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value AS groupingKey",
+          "value.id AS valueId", "value.name AS valueName",
+          "partition_id")
+
+        checkAnswer(resultDf,
+          Seq(Row("a", 1L, "dummyKey", 0), Row("b", 1L, "dummyKey", 1)))
+
+        // non existent state variable should fail
+        val ex = intercept[Exception] {
+          spark.read
+            .format("statestore")
+            .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+            .option(StateSourceOptions.STATE_VAR_NAME, "non-exist")
+            .load()
+        }
+        assert(ex.isInstanceOf[StateDataSourceInvalidOptionValue])
+        assert(ex.getMessage.contains("State variable non-exist is not defined"))
+
+        // Verify that trying to read timers in TimeMode as None fails
+        val ex1 = intercept[Exception] {
+          spark.read
+            .format("statestore")
+            .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+            .option(StateSourceOptions.READ_REGISTERED_TIMERS, true)
+            .load()
+        }
+        assert(ex1.isInstanceOf[StateDataSourceInvalidOptionValue])
+        assert(ex1.getMessage.contains("Registered timers are not available"))
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - " +
+    "value state with single variable") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new StatefulProcessorWithSingleValueVar(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          AddData(inputData, "b"),
+          CheckNewAnswer(("b", "1")),
+          StopStream
+        )
+
+        val changeFeedDf = spark.read
+            .format("statestore")
+            .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+            .option(StateSourceOptions.STATE_VAR_NAME, "valueState")
+            .option(StateSourceOptions.READ_CHANGE_FEED, true)
+            .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+            .load()
+
+        val opDf = changeFeedDf.selectExpr(
+          "change_type",
+          "key.value AS groupingKey",
+          "value.id AS valueId", "value.name AS valueName",
+          "partition_id")
+
+        checkAnswer(opDf,
+          Seq(Row("update", "a", 1L, "dummyKey", 0), Row("update", "b", 1L, "dummyKey", 1)))
+      }
+    }
+  }
+
+  test("state data source integration - value state with single variable and TTL") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new StatefulProcessorWithTTL(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, "a"),
+          AddData(inputData, "b"),
+          Execute { _ =>
+            // wait for the batch to run since we are using processing time
+            Thread.sleep(5000)
+          },
+          StopStream
+        )
+
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "countState")
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value", "value.value", "value.ttlExpirationMs", "partition_id")
+
+        var count = 0L
+        resultDf.collect().foreach { row =>
+          count = count + 1
+          assert(row.getLong(2) > 0)
+        }
+
+        // verify that 2 state rows are present
+        assert(count === 2)
+
+        val answerDf = stateReaderDf.selectExpr(
+          "key.value AS groupingKey",
+          "value.value.value AS valueId", "partition_id")
+        checkAnswer(answerDf,
+          Seq(Row("a", 1L, 0), Row("b", 1L, 1)))
+
+        // non existent state variable should fail
+        val ex = intercept[Exception] {
+          spark.read
+            .format("statestore")
+            .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+            .option(StateSourceOptions.STATE_VAR_NAME, "non-exist")
+            .load()
+        }
+        assert(ex.isInstanceOf[StateDataSourceInvalidOptionValue])
+        assert(ex.getMessage.contains("State variable non-exist is not defined"))
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - " +
+    "value state with single variable and TTL") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new StatefulProcessorWithTTL(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        val clock = new StreamManualClock
+        testStream(result)(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock,
+            checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          AddData(inputData, "b"),
+          AdvanceManualClock(5 * 1000),
+          CheckNewAnswer(("a", "1"), ("b", "1")),
+          AddData(inputData, "c"),
+          AdvanceManualClock(30 * 1000),
+          CheckNewAnswer(("c", "1")),
+          AddData(inputData, "d"),
+          AdvanceManualClock(30 * 1000),
+          CheckNewAnswer(("d", "1")),
+          StopStream
+        )
+
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "countState")
+          .option(StateSourceOptions.READ_CHANGE_FEED, true)
+          .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value", "value.value", "value.ttlExpirationMs", "partition_id")
+
+        var count = 0L
+        resultDf.collect().foreach { row =>
+          if (!row.anyNull) {
+            count = count + 1
+            assert(row.getLong(2) > 0)
+          }
+        }
+
+        // verify that 4 state rows are present
+        assert(count === 4)
+
+        val answerDf = stateReaderDf.selectExpr(
+          "change_type",
+          "key.value AS groupingKey",
+          "value.value.value AS valueId", "partition_id")
+        checkAnswer(answerDf,
+          Seq(Row("update", "a", 1L, 0),
+            Row("update", "b", 1L, 1),
+            Row("update", "c", 1L, 2),
+            Row("delete", "a", null, 0),
+            Row("delete", "b", null, 1),
+            Row("update", "d", 1L, 4),
+            Row("delete", "c", null, 2)))
+      }
+    }
+  }
+
+  test("state data source integration - list state") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName) {
+
+        val inputData = MemoryStream[(String, String)]
+        val result = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new SessionGroupsStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, ("session1", "group2")),
+          AddData(inputData, ("session1", "group1")),
+          AddData(inputData, ("session2", "group1")),
+          CheckNewAnswer(),
+          AddData(inputData, ("session3", "group7")),
+          AddData(inputData, ("session1", "group4")),
+          CheckNewAnswer(),
+          StopStream
+        )
+
+        // Verify that the state can be read in flattened/non-flattened modes
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsList")
+          .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, false)
+          .load()
+
+        val listStateDf = stateReaderDf
+          .selectExpr(
+      "key.value AS groupingKey",
+            "list_value.value AS valueList",
+            "partition_id")
+          .select($"groupingKey",
+            explode($"valueList"))
+
+        checkAnswer(listStateDf,
+          Seq(Row("session1", "group1"), Row("session1", "group2"), Row("session1", "group4"),
+            Row("session2", "group1"), Row("session3", "group7")))
+
+        val flattenedReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsList")
+          .load()
+
+        val resultDf = flattenedReaderDf.selectExpr(
+          "key.value AS groupingKey",
+          "list_element.value AS valueList")
+        checkAnswer(resultDf,
+          Seq(Row("session1", "group1"), Row("session1", "group2"), Row("session1", "group4"),
+            Row("session2", "group1"), Row("session3", "group7")))
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - list state") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName) {
+
+        val inputData = MemoryStream[(String, String)]
+        val result = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new SessionGroupsStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, ("session1", "group2")),
+          AddData(inputData, ("session1", "group1")),
+          AddData(inputData, ("session2", "group1")),
+          CheckNewAnswer(),
+          AddData(inputData, ("session3", "group7")),
+          AddData(inputData, ("session1", "group4")),
+          CheckNewAnswer(),
+          StopStream
+        )
+
+        val flattenedReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsList")
+          .option(StateSourceOptions.READ_CHANGE_FEED, true)
+          .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+          .load()
+
+        val resultDf = flattenedReaderDf.selectExpr(
+          "change_type",
+          "key.value AS groupingKey",
+          "list_element.value AS valueList",
+          "partition_id")
+        checkAnswer(resultDf,
+          Seq(Row("append", "session1", "group1", 0),
+            Row("append", "session1", "group2", 0),
+            Row("append", "session1", "group4", 0),
+            Row("append", "session2", "group1", 0),
+            Row("append", "session3", "group7", 3)))
+      }
+    }
+  }
+
+  test("state data source integration - list state and TTL") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[(String, String)]
+        val result = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new SessionGroupsStatefulProcessorWithTTL(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getAbsolutePath),
+          AddData(inputData, ("session1", "group2")),
+          AddData(inputData, ("session1", "group1")),
+          AddData(inputData, ("session2", "group1")),
+          AddData(inputData, ("session3", "group7")),
+          AddData(inputData, ("session1", "group4")),
+          Execute { _ =>
+            // wait for the batch to run since we are using processing time
+            Thread.sleep(5000)
+          },
+          StopStream
+        )
+
+        // Verify that the state can be read in flattened/non-flattened modes
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsListWithTTL")
+          .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, false)
+          .load()
+
+        val listStateDf = stateReaderDf
+          .selectExpr(
+      "key.value AS groupingKey",
+            "list_value AS valueList",
+            "partition_id")
+          .select($"groupingKey",
+            explode($"valueList").as("valueList"))
+
+        val resultDf = listStateDf.selectExpr("valueList.ttlExpirationMs")
+        var count = 0L
+        resultDf.collect().foreach { row =>
+          count = count + 1
+          assert(row.getLong(0) > 0)
+        }
+
+        // verify that 5 state rows are present
+        assert(count === 5)
+
+        val valuesDf = listStateDf.selectExpr("groupingKey",
+          "valueList.value.value AS groupId")
+
+        checkAnswer(valuesDf,
+          Seq(Row("session1", "group1"), Row("session1", "group2"), Row("session1", "group4"),
+          Row("session2", "group1"), Row("session3", "group7")))
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsListWithTTL")
+          .load()
+
+        val flattenedResultDf = flattenedStateReaderDf
+          .selectExpr("list_element.ttlExpirationMs AS ttlExpirationMs")
+        var flattenedCount = 0L
+        flattenedResultDf.collect().foreach { row =>
+          flattenedCount = flattenedCount + 1
+          assert(row.getLong(0) > 0)
+        }
+
+        // verify that 5 state rows are present
+        assert(flattenedCount === 5)
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr("key.value AS groupingKey",
+            "list_element.value.value AS groupId")
+
+        checkAnswer(outputDf,
+          Seq(Row("session1", "group1"), Row("session1", "group2"), Row("session1", "group4"),
+          Row("session2", "group1"), Row("session3", "group7")))
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - list state and TTL") {
+    withTempDir { tempDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+        val inputData = MemoryStream[(String, String)]
+        val result = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new SessionGroupsStatefulProcessorWithTTL(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        val clock = new StreamManualClock
+        testStream(result)(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock,
+            checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputData, ("session1", "group2")),
+          AddData(inputData, ("session1", "group1")),
+          AddData(inputData, ("session2", "group1")),
+          AdvanceManualClock(5 * 1000),
+          CheckNewAnswer(),
+          AddData(inputData, ("session3", "group7")),
+          AddData(inputData, ("session1", "group4")),
+          AdvanceManualClock(30 * 1000),
+          CheckNewAnswer(),
+          StopStream
+        )
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "groupsListWithTTL")
+          .option(StateSourceOptions.READ_CHANGE_FEED, true)
+          .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+          .load()
+
+        val flattenedResultDf = flattenedStateReaderDf
+          .selectExpr("list_element.ttlExpirationMs AS ttlExpirationMs")
+        var flattenedCount = 0L
+        flattenedResultDf.collect().foreach { row =>
+          if (!row.anyNull) {
+            flattenedCount = flattenedCount + 1
+            assert(row.getLong(0) > 0)
+          }
+        }
+
+        // verify that 6 state rows are present
+        assert(flattenedCount === 6)
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr(
+            "change_type",
+            "key.value AS groupingKey",
+            "list_element.value.value AS groupId",
+            "partition_id")
+
+        checkAnswer(outputDf,
+          Seq(Row("append", "session1", "group1", 0),
+            Row("append", "session1", "group2", 0),
+            Row("append", "session1", "group4", 0),
+            Row("append", "session2", "group1", 0),
+            Row("append", "session3", "group7", 3),
+            Row("delete", "session1", null, 0),
+            Row("delete", "session2", null, 0),
+            Row("update", "session1", "group4", 0)))
+      }
+    }
+  }
+
+  test("state data source integration - map state with single variable") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val inputData = MemoryStream[InputMapRow]
+        val result = inputData.toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(new TestMapStateProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+        testStream(result, OutputMode.Append())(
+          StartStream(checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputData, InputMapRow("k1", "updateValue", ("v1", "10"))),
+          AddData(inputData, InputMapRow("k1", "exists", ("", ""))),
+          AddData(inputData, InputMapRow("k2", "exists", ("", ""))),
+          CheckNewAnswer(("k1", "exists", "true"), ("k2", "exists", "false")),
+
+          AddData(inputData, InputMapRow("k1", "updateValue", ("v2", "5"))),
+          AddData(inputData, InputMapRow("k2", "updateValue", ("v2", "3"))),
+          ProcessAllAvailable(),
+          StopStream
+        )
+
+        // Verify that the state can be read in flattened/non-flattened modes
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "sessionState")
+          .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, false)
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value AS groupingKey", "map_value AS mapValue")
+
+        checkAnswer(resultDf,
+          Seq(
+            Row("k1",
+              Map(Row("v1") -> Row("10"), Row("v2") -> Row("5"))),
+            Row("k2",
+              Map(Row("v2") -> Row("3"))))
+        )
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "sessionState")
+          .load()
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr("key.value AS groupingKey",
+            "user_map_key.value AS mapKey",
+            "user_map_value.value AS mapValue")
+
+        checkAnswer(outputDf,
+          Seq(
+            Row("k1", "v1", "10"),
+            Row("k1", "v2", "5"),
+            Row("k2", "v2", "3"))
+        )
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - " +
+   "map state with single variable") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val inputData = MemoryStream[InputMapRow]
+        val result = inputData.toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(new TestMapStateProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+        testStream(result, OutputMode.Append())(
+          StartStream(checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputData, InputMapRow("k1", "updateValue", ("v1", "10"))),
+          AddData(inputData, InputMapRow("k1", "exists", ("", ""))),
+          AddData(inputData, InputMapRow("k2", "exists", ("", ""))),
+          CheckNewAnswer(("k1", "exists", "true"), ("k2", "exists", "false")),
+
+          AddData(inputData, InputMapRow("k1", "updateValue", ("v2", "5"))),
+          AddData(inputData, InputMapRow("k2", "updateValue", ("v2", "3"))),
+          ProcessAllAvailable(),
+          StopStream
+        )
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "sessionState")
+          .option(StateSourceOptions.READ_CHANGE_FEED, true)
+          .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+          .load()
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr(
+            "change_type",
+            "key.value AS groupingKey",
+            "user_map_key.value AS mapKey",
+            "user_map_value.value AS mapValue",
+            "partition_id")
+
+        checkAnswer(outputDf,
+          Seq(
+            Row("update", "k1", "v1", "10", 4L),
+            Row("update", "k1", "v2", "5", 4L),
+            Row("update", "k2", "v2", "3", 2L))
+        )
+      }
+    }
+  }
+
+  test("state data source integration - map state TTL with single variable") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val inputStream = MemoryStream[MapInputEvent]
+        val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+        val result = inputStream.toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(
+            new MapStateTTLProcessor(ttlConfig),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append())
+
+        val clock = new StreamManualClock
+        testStream(result)(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock,
+            checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputStream,
+            MapInputEvent("k1", "key1", "put", 1),
+            MapInputEvent("k1", "key2", "put", 2)
+          ),
+          AdvanceManualClock(1 * 1000), // batch timestamp: 1000
+          CheckNewAnswer(),
+          AddData(inputStream,
+            MapInputEvent("k1", "key1", "get", -1),
+            MapInputEvent("k1", "key2", "get", -1)
+          ),
+          AdvanceManualClock(30 * 1000), // batch timestamp: 31000
+          CheckNewAnswer(
+            MapOutputEvent("k1", "key1", 1, isTTLValue = false, -1),
+            MapOutputEvent("k1", "key2", 2, isTTLValue = false, -1)
+          ),
+          // get values from ttl state
+          AddData(inputStream,
+            MapInputEvent("k1", "", "get_values_in_ttl_state", -1)
+          ),
+          AdvanceManualClock(1 * 1000), // batch timestamp: 32000
+          CheckNewAnswer(
+            MapOutputEvent("k1", "key1", -1, isTTLValue = true, 61000),
+            MapOutputEvent("k1", "key2", -1, isTTLValue = true, 61000)
+          ),
+          StopStream
+        )
+
+        // Verify that the state can be read in flattened/non-flattened modes
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "mapState")
+          .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, false)
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value AS groupingKey", "map_value AS mapValue")
+
+        checkAnswer(resultDf,
+          Seq(
+            Row("k1",
+              Map(Row("key2") -> Row(Row(2), 61000L),
+                Row("key1") -> Row(Row(1), 61000L))))
+        )
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "mapState")
+          .load()
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr("key.value AS groupingKey",
+            "user_map_key.value AS mapKey",
+            "user_map_value.value.value AS mapValue",
+            "user_map_value.ttlExpirationMs AS ttlTimestamp")
+
+        checkAnswer(outputDf,
+          Seq(
+            Row("k1", "key1", 1, 61000L),
+            Row("k1", "key2", 2, 61000L))
+        )
+      }
+    }
+  }
+
+  testWithChangelogCheckpointingEnabled("state data source cdf integration - " +
+   "map state TTL with single variable") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val inputStream = MemoryStream[MapInputEvent]
+        val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+        val result = inputStream.toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(
+            new MapStateTTLProcessor(ttlConfig),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append())
+
+        val clock = new StreamManualClock
+        testStream(result)(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock,
+            checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputStream,
+            MapInputEvent("k1", "key1", "put", 1),
+            MapInputEvent("k1", "key2", "put", 2)
+          ),
+          AdvanceManualClock(1 * 1000), // batch timestamp: 1000
+          CheckNewAnswer(),
+          AddData(inputStream,
+            MapInputEvent("k1", "key1", "get", -1),
+            MapInputEvent("k1", "key2", "get", -1)
+          ),
+          AdvanceManualClock(30 * 1000), // batch timestamp: 31000
+          CheckNewAnswer(
+            MapOutputEvent("k1", "key1", 1, isTTLValue = false, -1),
+            MapOutputEvent("k1", "key2", 2, isTTLValue = false, -1)
+          ),
+          // get values from ttl state
+          AddData(inputStream,
+            MapInputEvent("k1", "", "get_values_in_ttl_state", -1)
+          ),
+          AdvanceManualClock(1 * 1000), // batch timestamp: 32000
+          CheckNewAnswer(
+            MapOutputEvent("k1", "key1", -1, isTTLValue = true, 61000),
+            MapOutputEvent("k1", "key2", -1, isTTLValue = true, 61000)
+          ),
+          AddData(inputStream,
+            MapInputEvent("k2", "key3", "put", 3)
+          ),
+          AdvanceManualClock(30 * 1000), // batch timestamp: 62000
+          CheckNewAnswer(),
+          StopStream
+        )
+
+        val flattenedStateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "mapState")
+          .option(StateSourceOptions.READ_CHANGE_FEED, true)
+          .option(StateSourceOptions.CHANGE_START_BATCH_ID, 0)
+          .load()
+
+        val outputDf = flattenedStateReaderDf
+          .selectExpr(
+            "change_type",
+            "key.value AS groupingKey",
+            "user_map_key.value AS mapKey",
+            "user_map_value.value.value AS mapValue",
+            "user_map_value.ttlExpirationMs AS ttlTimestamp",
+            "partition_id")
+
+        checkAnswer(outputDf,
+          Seq(
+            Row("update", "k1", "key1", 1, 61000L, 4L),
+            Row("update", "k1", "key2", 2, 61000L, 4L),
+            Row("delete", "k1", "key1", null, null, 4L),
+            Row("delete", "k1", "key2", null, null, 4L),
+            Row("update", "k2", "key3", 3, 122000L, 2L))
+        )
+      }
+    }
+  }
+
+  test("state data source - processing-time timers integration") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val clock = new StreamManualClock
+
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(
+            new RunningCountStatefulProcessorWithProcTimeTimerUpdates(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock,
+            checkpointLocation = tempDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(("a", "1")), // at batch 0, ts = 1, timer = "a" -> [6] (= 1 + 5)
+          AddData(inputData, "a"),
+          AdvanceManualClock(2 * 1000),
+          CheckNewAnswer(("a", "2")), // at batch 1, ts = 3, timer = "a" -> [10.5] (3 + 7.5)
+          StopStream)
+
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.READ_REGISTERED_TIMERS, true)
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+       "key.value AS groupingKey",
+          "expiration_timestamp_ms AS expiryTimestamp",
+          "partition_id")
+
+        checkAnswer(resultDf,
+          Seq(Row("a", 10500L, 0)))
+      }
+    }
+  }
+
+  test("state data source - event-time timers integration") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { tempDir =>
+        val inputData = MemoryStream[(String, Int)]
+        val result =
+          inputData.toDS()
+            .select($"_1".as("key"), timestamp_seconds($"_2").as("eventTime"))
+            .withWatermark("eventTime", "10 seconds")
+            .as[(String, Long)]
+            .groupByKey(_._1)
+            .transformWithState(
+              new MaxEventTimeStatefulProcessor(),
+              TimeMode.EventTime(),
+              OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = tempDir.getCanonicalPath),
+
+          AddData(inputData, ("a", 11), ("a", 13), ("a", 15)),
+          // Max event time = 15. Timeout timestamp for "a" = 15 + 5 = 20. Watermark = 15 - 10 = 5.
+          CheckNewAnswer(("a", 15)), // Output = max event time of a
+
+          AddData(inputData, ("a", 4)), // Add data older than watermark for "a"
+          CheckNewAnswer(), // No output as data should get filtered by watermark
+          StopStream)
+
+        val stateReaderDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, tempDir.getAbsolutePath)
+          .option(StateSourceOptions.READ_REGISTERED_TIMERS, true)
+          .load()
+
+        val resultDf = stateReaderDf.selectExpr(
+          "key.value AS groupingKey",
+          "expiration_timestamp_ms AS expiryTimestamp",
+          "partition_id")
+
+        checkAnswer(resultDf,
+          Seq(Row("a", 20000L, 0)))
+      }
+    }
+  }
+
+  /**
+   * Note that we cannot use the golden files approach for transformWithState. The new schema
+   * format keeps track of the schema file path as an absolute path which cannot be used with
+   * the getResource model used in other similar tests. Hence, we force the snapshot creation
+   * for given versions and ensure that we are loading from given start snapshot version for loading
+   * the state data.
+   */
+  testWithChangelogCheckpointingEnabled("snapshotStartBatchId with transformWithState") {
+    class AggregationStatefulProcessor extends StatefulProcessor[Int, (Int, Long), (Int, Long)] {
+      @transient protected var _countState: ValueState[Long] = _
+
+      override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {
+        _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+          TTLConfig.NONE)
+      }
+
+      override def handleInputRows(
+          key: Int,
+          inputRows: Iterator[(Int, Long)],
+          timerValues: TimerValues): Iterator[(Int, Long)] = {
+        val count = _countState.getOption().getOrElse(0L)
+        var totalSum = 0L
+        inputRows.foreach { entry =>
+          totalSum += entry._2
+        }
+        _countState.update(count + totalSum)
+        Iterator((key, count + totalSum))
+      }
+    }
+
+    withTempDir { tmpDir =>
+      withSQLConf(
+        SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key ->
+          TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+        SQLConf.STREAMING_MAINTENANCE_INTERVAL.key -> "100",
+        SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.key -> "1") {
+        val inputData = MemoryStream[(Int, Long)]
+        val query = inputData
+          .toDS()
+          .groupByKey(_._1)
+          .transformWithState(new AggregationStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+        testStream(query)(
+          StartStream(checkpointLocation = tmpDir.getCanonicalPath),
+          AddData(inputData, (1, 1L), (2, 2L), (3, 3L), (4, 4L)),
+          ProcessAllAvailable(),
+          AddData(inputData, (5, 1L), (6, 2L), (7, 3L), (8, 4L)),
+          ProcessAllAvailable(),
+          AddData(inputData, (9, 1L), (10, 2L), (11, 3L), (12, 4L)),
+          ProcessAllAvailable(),
+          AddData(inputData, (13, 1L), (14, 2L), (15, 3L), (16, 4L)),
+          ProcessAllAvailable(),
+          AddData(inputData, (17, 1L), (18, 2L), (19, 3L), (20, 4L)),
+          ProcessAllAvailable(),
+          // Ensure that we get a chance to upload created snapshots
+          Execute { _ => Thread.sleep(5000) },
+          StopStream
+        )
+      }
+
+      // Create a file manager for the state store with opId=0 and partition=4
+      val dfsRootDir = new File(tmpDir.getAbsolutePath + "/state/0/4")
+      val fileManager = new RocksDBFileManager(
+        dfsRootDir.getAbsolutePath, Utils.createTempDir(), new Configuration,
+        CompressionCodec.LZ4)
+
+      // Read the changelog for one of the partitions at version 3 and
+      // ensure that we have two entries
+      // For this test - keys 9 and 12 are written at version 3 for partition 4
+      val changelogReader = fileManager.getChangelogReader(3, true)
+      val entries = changelogReader.toSeq
+      assert(entries.size == 2)
+      val retainEntry = entries.head
+
+      // Retain one of the entries and delete the changelog file
+      val changelogFilePath = dfsRootDir.getAbsolutePath + "/3.changelog"
+      Utils.deleteRecursively(new File(changelogFilePath))
+
+      // Write the retained entry back to the changelog
+      val changelogWriter = fileManager.getChangeLogWriter(3, true)
+      changelogWriter.put(retainEntry._2, retainEntry._3)
+      changelogWriter.commit()
+
+      // Ensure that we have only one entry in the changelog for version 3
+      // For this test - key 9 is retained and key 12 is deleted
+      val changelogReader1 = fileManager.getChangelogReader(3, true)
+      val entries1 = changelogReader1.toSeq
+      assert(entries1.size == 1)
+
+      // Ensure that the state matches for the partition that is not modified and does not match for
+      // the other partition
+      Seq(1, 4).foreach { partition =>
+        val stateSnapshotDf = spark
+          .read
+          .format("statestore")
+          .option("snapshotPartitionId", partition)
+          .option("snapshotStartBatchId", 1)
+          .option("stateVarName", "countState")
+          .load(tmpDir.getCanonicalPath)
+
+        val stateDf = spark
+          .read
+          .format("statestore")
+          .option("stateVarName", "countState")
+          .load(tmpDir.getCanonicalPath)
+          .filter(col("partition_id") === partition)
+
+        if (partition == 1) {
+          checkAnswer(stateSnapshotDf, stateDf)
+        } else {
+          // Ensure that key 12 is not present in the final state loaded from given snapshot
+          val resultDfForSnapshot = stateSnapshotDf.selectExpr(
+            "key.value AS groupingKey",
+            "value.value AS count",
+            "partition_id")
+          checkAnswer(resultDfForSnapshot,
+            Seq(Row(16, 4L, 4),
+              Row(17, 1L, 4),
+              Row(19, 3L, 4),
+              Row(2, 2L, 4),
+              Row(6, 2L, 4),
+              Row(9, 1L, 4)))
+
+          // Ensure that key 12 is present in the final state loaded from the latest snapshot
+          val resultDf = stateDf.selectExpr(
+            "key.value AS groupingKey",
+            "value.value AS count",
+            "partition_id")
+
+          checkAnswer(resultDf,
+            Seq(Row(16, 4L, 4),
+              Row(17, 1L, 4),
+              Row(19, 3L, 4),
+              Row(2, 2L, 4),
+              Row(6, 2L, 4),
+              Row(9, 1L, 4),
+              Row(12, 4L, 4)))
+        }
+      }
+    }
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlInferSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlInferSchemaSuite.scala
index 286120ff40b85..618127fb6e615 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlInferSchemaSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlInferSchemaSuite.scala
@@ -17,6 +17,8 @@
 package org.apache.spark.sql.execution.datasources.xml
 
 import java.io.File
+import java.nio.file.Files
+import java.util.UUID
 
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Encoders, QueryTest, Row}
@@ -35,7 +37,11 @@ import org.apache.spark.sql.types.{
   StructType
 }
 
-class XmlInferSchemaSuite extends QueryTest with SharedSparkSession with TestXmlData {
+class XmlInferSchemaSuite
+    extends QueryTest
+    with SharedSparkSession
+    with TestXmlData
+    with XmlSchemaInferenceCaseSensitivityTests {
 
   private val baseOptions = Map("rowTag" -> "ROW")
 
@@ -50,6 +56,9 @@ class XmlInferSchemaSuite extends QueryTest with SharedSparkSession with TestXml
     spark.read.options(baseOptions ++ options).xml(dataset)
   }
 
+  override protected def customSQLConf
+      : Map[String, String] = Map.empty
+
   // TODO: add tests for type widening
   test("Type conflict in primitive field values") {
     val xmlDF = readData(primitiveFieldValueTypeConflict, Map("nullValue" -> ""))
@@ -630,3 +639,306 @@ class XmlInferSchemaSuite extends QueryTest with SharedSparkSession with TestXml
     checkAnswer(xmlDF, expectedAns)
   }
 }
+
+trait XmlSchemaInferenceCaseSensitivityTests extends QueryTest {
+
+  protected def customSQLConf: Map[String, String] = Map.empty
+
+  private def writeXmlStringToFile(
+      xmlString: String,
+      dir: File,
+      multiline: Boolean = true,
+      fileName: String = UUID.randomUUID().toString): String = {
+    val bytes = if (multiline) xmlString.getBytes() else xmlString.filter(_ >= ' ').getBytes
+    Files.write(new File(dir, fileName).toPath, bytes)
+    dir.getCanonicalPath + s"/$fileName"
+  }
+
+  private val valueTagCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseSensitiveValueTag =
+      """
+        |<ROW>
+        |    <a>
+        |       1
+        |       <b>2</b>
+        |    </a>
+        |</ROW>
+        |<ROW>
+        |    <A>
+        |       3
+        |    </A>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "value tag",
+      caseSensitiveValueTag,
+      expectedCaseSensitiveSchema = new StructType()
+        .add("A", LongType)
+        .add("a", new StructType().add("_VALUE", LongType).add("b", LongType)),
+      expectedCaseSensitiveAns = Seq(
+        Row(null, Row(1, 2)),
+        Row(3, null)
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add("a", new StructType().add("_VALUE", LongType).add("b", LongType)),
+      expectedCaseInsensitiveAns = Seq(
+        Row(Row(1, 2)),
+        Row(Row(3, null))
+      )
+    )
+  }
+
+  //  array type: a A b and A & a struct<b, c> and A struct<b, c>, a <struct B, c>
+  private val arrayComplexCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseSensitiveArrayType =
+      """
+        |<ROW>
+        |    <a>
+        |        1
+        |        <b>2</b>
+        |        <c>3</c>
+        |    </a>
+        |    <A>
+        |        4
+        |    </A>
+        |</ROW>
+        |<ROW>
+        |    <A>5</A>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "array type - simple",
+      caseSensitiveArrayType,
+      expectedCaseSensitiveSchema = new StructType()
+        .add("A", LongType)
+        .add(
+          "a",
+          new StructType()
+            .add("_VALUE", LongType)
+            .add("b", LongType)
+            .add("c", LongType)
+        ),
+      expectedCaseSensitiveAns = Seq(
+        Row(4, Row(1, 2, 3)),
+        Row(5, null)
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add(
+          "a",
+          ArrayType(
+            new StructType()
+              .add("_VALUE", LongType)
+              .add("b", LongType)
+              .add("c", LongType)
+          )
+        ),
+      expectedCaseInsensitiveAns = Seq(
+        Row(List(Row(1, 2, 3), Row(4, null, null))),
+        Row(List(Row(5, null, null)))
+      )
+    )
+  }
+
+  private val arraySimpleCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseSensitiveArrayType =
+      """
+        |<ROW>
+        |    <a>
+        |        <b>1</b>
+        |        <c>2</c>
+        |    </a>
+        |    <A>
+        |        <B>3</B>
+        |        <c>4</c>
+        |    </A>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "array type - complex",
+      caseSensitiveArrayType,
+      expectedCaseSensitiveSchema = new StructType()
+        .add("A", new StructType().add("B", LongType).add("c", LongType))
+        .add("a", new StructType().add("b", LongType).add("c", LongType)),
+      expectedCaseSensitiveAns = Seq(
+        Row(Row(3, 4), Row(1, 2))
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add("a", ArrayType(new StructType().add("b", LongType).add("c", LongType))),
+      expectedCaseInsensitiveAns = Seq(
+        Row(List(Row(1, 2), Row(3, 4)))
+      )
+    )
+  }
+
+  private val primitiveTypeCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseInSensitivePrimitiveTypes =
+      """
+        |<ROW>
+        |    <a>1</a>
+        |    <b>2</b>
+        |    <c>3</c>
+        |</ROW>
+        |<ROW>
+        |    <a>4</a>
+        |    <B>5</B>
+        |    <c>6</c>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "primitive type",
+      caseInSensitivePrimitiveTypes,
+      expectedCaseSensitiveSchema = new StructType()
+        .add("B", LongType)
+        .add("a", LongType)
+        .add("b", LongType)
+        .add("c", LongType),
+      expectedCaseSensitiveAns = Seq(
+        Row(null, 1, 2, 3),
+        Row(5, 4, null, 6)
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add("a", LongType)
+        .add("b", LongType)
+        .add("c", LongType),
+      expectedCaseInsensitiveAns = Seq(
+        Row(1, 2, 3),
+        Row(4, 5, 6)
+      )
+    )
+  }
+
+  private val attributesCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseSensitiveAttr =
+      """
+        |<ROW attr="1">
+        |    <a>1</a>
+        |    <b>2</b>
+        |    <c>3</c>
+        |</ROW>
+        |<ROW aTtr="2">
+        |    <a>4</a>
+        |    <b>5</b>
+        |    <c>6</c>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "attributes",
+      caseSensitiveAttr,
+      expectedCaseSensitiveSchema = new StructType()
+        .add("_aTtr", LongType)
+        .add("_attr", LongType)
+        .add("a", LongType)
+        .add("b", LongType)
+        .add("c", LongType),
+      expectedCaseSensitiveAns = Seq(
+        Row(null, 1, 1, 2, 3),
+        Row(2, null, 4, 5, 6)
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add("_attr", LongType)
+        .add("a", LongType)
+        .add("b", LongType)
+        .add("c", LongType),
+      expectedCaseInsensitiveAns = Seq(
+        Row(1, 1, 2, 3),
+        Row(2, 4, 5, 6)
+      )
+    )
+  }
+
+  // struct: A struct<a, c> and a <A, c>
+  private val structCaseSensitivityTestcase: XmlSchemaInferenceCaseSensitiveTestCase = {
+    val caseSensitiveStruct =
+      """
+        |<ROW>
+        |    <A>
+        |        <a>1</a>
+        |        <c>3</c>
+        |    </A>
+        |</ROW>
+        |<ROW>
+        |    <a>
+        |        <A>5</A>
+        |        <c>7</c>
+        |    </a>
+        |</ROW>
+        |""".stripMargin
+    XmlSchemaInferenceCaseSensitiveTestCase(
+      "struct",
+      caseSensitiveStruct,
+      expectedCaseSensitiveSchema = new StructType()
+        .add(
+          "A",
+          new StructType()
+            .add("a", LongType)
+            .add("c", LongType)
+        )
+        .add(
+          "a",
+          new StructType()
+            .add("A", LongType)
+            .add("c", LongType)
+        ),
+      expectedCaseSensitiveAns = Seq(
+        Row(Row(1, 3), null),
+        Row(null, Row(5, 7))
+      ),
+      expectedCaseInsensitiveSchema = new StructType()
+        .add(
+          "A",
+          new StructType()
+            .add("a", LongType)
+            .add("c", LongType)
+        ),
+      expectedCaseInsensitiveAns = Seq(
+        Row(Row(1, 3)),
+        Row(Row(5, 7))
+      )
+    )
+  }
+
+  case class XmlSchemaInferenceCaseSensitiveTestCase(
+      name: String,
+      xmlString: String,
+      expectedCaseSensitiveSchema: StructType,
+      expectedCaseSensitiveAns: Seq[Row],
+      expectedCaseInsensitiveSchema: StructType,
+      expectedCaseInsensitiveAns: Seq[Row]
+  )
+
+  private val testcases: Seq[XmlSchemaInferenceCaseSensitiveTestCase] = Seq(
+    valueTagCaseSensitivityTestcase,
+    arrayComplexCaseSensitivityTestcase,
+    arraySimpleCaseSensitivityTestcase,
+    primitiveTypeCaseSensitivityTestcase,
+    structCaseSensitivityTestcase,
+    attributesCaseSensitivityTestcase
+  )
+
+  testcases.foreach { testcase =>
+    test(s"case sensitivity test - ${testcase.name}") {
+      withTempDir { dir =>
+        withSQLConf(customSQLConf.toSeq: _*) {
+          val baseOptions = Map("rowTag" -> "ROW")
+          writeXmlStringToFile(testcase.xmlString, dir)
+          withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+            val xml =
+              spark.read
+                .options(baseOptions)
+                .xml(dir.getCanonicalPath)
+            assert(xml.schema == testcase.expectedCaseInsensitiveSchema)
+            checkAnswer(xml, testcase.expectedCaseInsensitiveAns)
+          }
+          withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
+            val xml = spark.read
+              .options(baseOptions)
+              .xml(dir.getCanonicalPath)
+            assert(xml.schema == testcase.expectedCaseSensitiveSchema)
+            checkAnswer(xml, testcase.expectedCaseSensitiveAns)
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index 30588dde965d7..fe910c21cb0c6 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -16,14 +16,14 @@
  */
 package org.apache.spark.sql.execution.datasources.xml
 
-import java.io.{EOFException, File}
+import java.io.{EOFException, File, StringWriter}
 import java.nio.charset.{StandardCharsets, UnsupportedCharsetException}
 import java.nio.file.{Files, Path, Paths}
 import java.sql.{Date, Timestamp}
 import java.time.{Instant, LocalDateTime}
 import java.util.TimeZone
 import java.util.concurrent.ConcurrentHashMap
-import javax.xml.stream.XMLStreamException
+import javax.xml.stream.{XMLOutputFactory, XMLStreamException}
 
 import scala.collection.immutable.ArraySeq
 import scala.collection.mutable
@@ -40,7 +40,7 @@ import org.apache.spark.{DebugFilesystem, SparkException}
 import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
-import org.apache.spark.sql.catalyst.xml.XmlOptions
+import org.apache.spark.sql.catalyst.xml.{IndentingXMLStreamWriter, XmlOptions}
 import org.apache.spark.sql.catalyst.xml.XmlOptions._
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.CommonFileDataSourceSuite
@@ -255,8 +255,10 @@ class XmlSuite
           .option("mode", FailFastMode.name)
           .xml(inputFile)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2165",
-      parameters = Map("failFastMode" -> "FAILFAST")
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "badRecord" -> "_corrupt_record",
+        "failFastMode" -> "FAILFAST")
     )
     val exceptionInParsing = intercept[SparkException] {
       spark.read
@@ -268,11 +270,11 @@ class XmlSuite
     }
     checkErrorMatchPVals(
       exception = exceptionInParsing,
-      errorClass = "FAILED_READ_FILE.NO_HINT",
+      condition = "FAILED_READ_FILE.NO_HINT",
       parameters = Map("path" -> s".*$inputFile.*"))
     checkError(
       exception = exceptionInParsing.getCause.asInstanceOf[SparkException],
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map(
         "badRecord" -> "[null]",
         "failFastMode" -> FailFastMode.name)
@@ -288,8 +290,10 @@ class XmlSuite
           .option("mode", FailFastMode.name)
           .xml(inputFile)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2165",
-      parameters = Map("failFastMode" -> "FAILFAST"))
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      parameters = Map(
+        "badRecord" -> "_corrupt_record",
+        "failFastMode" -> "FAILFAST"))
     val exceptionInParsing = intercept[SparkException] {
       spark.read
         .schema("_id string")
@@ -300,11 +304,11 @@ class XmlSuite
     }
     checkErrorMatchPVals(
       exception = exceptionInParsing,
-      errorClass = "FAILED_READ_FILE.NO_HINT",
+      condition = "FAILED_READ_FILE.NO_HINT",
       parameters = Map("path" -> s".*$inputFile.*"))
     checkError(
       exception = exceptionInParsing.getCause.asInstanceOf[SparkException],
-      errorClass = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map(
         "badRecord" -> "[null]",
         "failFastMode" -> FailFastMode.name)
@@ -1315,12 +1319,10 @@ class XmlSuite
         spark.sql(s"""SELECT schema_of_xml('<ROW><a>1<ROW>', map('mode', 'DROPMALFORMED'))""")
           .collect()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1099",
+      condition = "PARSE_MODE_UNSUPPORTED",
       parameters = Map(
-        "funcName" -> "schema_of_xml",
-        "mode" -> "DROPMALFORMED",
-        "permissiveMode" -> "PERMISSIVE",
-        "failFastMode" -> FailFastMode.name)
+        "funcName" -> "`schema_of_xml`",
+        "mode" -> "DROPMALFORMED")
     )
   }
 
@@ -1330,9 +1332,10 @@ class XmlSuite
         spark.sql(s"""SELECT schema_of_xml('<ROW><a>1<ROW>', map('mode', 'FAILFAST'))""")
           .collect()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2165",
+      condition = "MALFORMED_RECORD_IN_PARSING.WITHOUT_SUGGESTION",
       parameters = Map(
-        "failFastMode" -> FailFastMode.name)
+        "badRecord" -> "_corrupt_record",
+        "failFastMode" -> "FAILFAST")
     )
   }
 
@@ -1394,7 +1397,7 @@ class XmlSuite
       exception = intercept[AnalysisException] {
         df.select(to_xml($"value")).collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"to_xml(value)\"",
         "paramIndex" -> ordinalNumber(0),
@@ -1777,14 +1780,14 @@ class XmlSuite
       exception = intercept[AnalysisException] {
         spark.read.xml("/this/file/does/not/exist")
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> "file:/this/file/does/not/exist")
     )
     checkError(
       exception = intercept[AnalysisException] {
         spark.read.schema(buildSchema(field("dummy"))).xml("/this/file/does/not/exist")
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> "file:/this/file/does/not/exist")
     )
   }
@@ -2498,7 +2501,7 @@ class XmlSuite
         }
         checkErrorMatchPVals(
           exception = err,
-          errorClass = "TASK_WRITE_FAILED",
+          condition = "TASK_WRITE_FAILED",
           parameters = Map("path" -> s".*${path.getName}.*"))
         val msg = err.getCause.getMessage
         assert(
@@ -2923,7 +2926,7 @@ class XmlSuite
           exception = intercept[SparkException] {
             df.collect()
           },
-          errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+          condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
           parameters = Map("path" -> s".*$dir.*")
         )
       }
@@ -3020,7 +3023,7 @@ class XmlSuite
             }
             checkErrorMatchPVals(
               exception = e,
-              errorClass = "TASK_WRITE_FAILED",
+              condition = "TASK_WRITE_FAILED",
               parameters = Map("path" -> s".*${dir.getName}.*"))
             assert(e.getCause.isInstanceOf[XMLStreamException])
             assert(e.getCause.getMessage.contains(errorMsg))
@@ -3213,6 +3216,147 @@ class XmlSuite
     ))))
     checkAnswer(df.select("struct1.struct2.array1.string"), Seq(Row(Seq("string", "string"))))
   }
+
+  /////////////////////////////////////
+  // IndentingXMLStreamWriter        //
+  /////////////////////////////////////
+  test("write proper indentation for simple nested XML elements") {
+    val writer = new StringWriter()
+    val xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer)
+    val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter)
+    indentingXmlWriter.setIndentStep("    ")
+    val testString =
+      s"""|<a>
+          |    <b/>
+          |    <c>
+          |        <d/>
+          |    </c>
+          |</a>""".stripMargin
+
+    indentingXmlWriter.writeStartElement("a")
+    indentingXmlWriter.writeStartElement("b")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("c")
+    indentingXmlWriter.writeStartElement("d")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.flush()
+
+    val outputString = writer.toString
+    assert(outputString == testString)
+  }
+
+  test("write proper indentation for complex nested XML elements") {
+    val writer = new StringWriter()
+    val xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer)
+    val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter)
+    indentingXmlWriter.setIndentStep("    ")
+    val testString =
+      s"""|<a>
+          |    <b>
+          |        <c/>
+          |        <d>
+          |            <e/>
+          |        </d>
+          |        <f>
+          |            <g/>
+          |            <h/>
+          |        </f>
+          |    </b>
+          |    <i/>
+          |</a>""".stripMargin
+
+    indentingXmlWriter.writeStartElement("a")
+    indentingXmlWriter.writeStartElement("b")
+    indentingXmlWriter.writeStartElement("c")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("d")
+    indentingXmlWriter.writeStartElement("e")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("f")
+    indentingXmlWriter.writeStartElement("g")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("h")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("i")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.flush()
+
+    val outputString = writer.toString
+    assert(outputString == testString)
+  }
+
+  test("write proper indentation for nested XML elements with characters") {
+    val writer = new StringWriter()
+    val xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer)
+    val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter)
+    indentingXmlWriter.setIndentStep("    ")
+    val testString =
+      s"""|<a>
+          |    <b>
+          |        <c>1</c>
+          |        <d>
+          |            <e>2</e>
+          |        </d>
+          |    </b>
+          |    <f>3</f>
+          |</a>""".stripMargin
+
+    indentingXmlWriter.writeStartElement("a")
+    indentingXmlWriter.writeStartElement("b")
+    indentingXmlWriter.writeStartElement("c")
+    indentingXmlWriter.writeCharacters("1")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("d")
+    indentingXmlWriter.writeStartElement("e")
+    indentingXmlWriter.writeCharacters("2")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeStartElement("f")
+    indentingXmlWriter.writeCharacters("3")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.flush()
+
+    val outputString = writer.toString
+    assert(outputString == testString)
+  }
+
+  test("change indent step while writing") {
+    val writer = new StringWriter()
+    val xmlWriter = XMLOutputFactory.newInstance().createXMLStreamWriter(writer)
+    val indentingXmlWriter = new IndentingXMLStreamWriter(xmlWriter)
+    val testString =
+      s"""|<a>
+          |  <b/>
+          |    <c>
+          |  <d/>
+          |    </c>
+          |</a>""".stripMargin
+
+    indentingXmlWriter.setIndentStep("  ")
+    indentingXmlWriter.writeStartElement("a")
+    indentingXmlWriter.writeStartElement("b")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.setIndentStep("    ")
+    indentingXmlWriter.writeStartElement("c")
+    indentingXmlWriter.setIndentStep(" ")
+    indentingXmlWriter.writeStartElement("d")
+    indentingXmlWriter.setIndentStep("    ")
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.writeEndElement()
+    indentingXmlWriter.flush()
+
+    val outputString = writer.toString
+    assert(outputString == testString)
+  }
 }
 
 // Mock file system that checks the number of open files
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
index d3538cf65a50a..b704790e4296b 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/ExistenceJoinSuite.scala
@@ -30,6 +30,7 @@ import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{BooleanType, DoubleType, IntegerType, StructType}
 
 class ExistenceJoinSuite extends SparkPlanTest with SharedSparkSession {
+  import testImplicits.toRichColumn
 
   private val EnsureRequirements = new EnsureRequirements()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
index 079ab994736b2..6590deaa47e01 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/HashedRelationSuite.scala
@@ -548,8 +548,8 @@ class HashedRelationSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         keyIterator.next()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2104",
-      parameters = Map.empty
+      condition = "INTERNAL_ERROR",
+      parameters = Map("message" -> "End of the iterator.")
     )
     assert(buffer.sortWith(_ < _) === randomArray)
     buffer.clear()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
index c283d39425812..5de106415ec68 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/InnerJoinSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.types.{IntegerType, StringType, StructType}
 class InnerJoinSuite extends SparkPlanTest with SharedSparkSession {
   import testImplicits.newProductEncoder
   import testImplicits.localSeqToDatasetHolder
+  import testImplicits.toRichColumn
 
   private val EnsureRequirements = new EnsureRequirements()
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
index 962021604e717..7ba93ee13e182 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/OuterJoinSuite.scala
@@ -26,10 +26,12 @@ import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint}
 import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest}
 import org.apache.spark.sql.execution.exchange.EnsureRequirements
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.test.{SharedSparkSession, SQLTestData}
 import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
 
-class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
+class OuterJoinSuite extends SparkPlanTest with SharedSparkSession with SQLTestData {
+  import testImplicits.toRichColumn
+  setupTestData()
 
   private val EnsureRequirements = new EnsureRequirements()
 
@@ -325,4 +327,21 @@ class OuterJoinSuite extends SparkPlanTest with SharedSparkSession {
       (null, null, 7, 7.0)
     )
   )
+
+  testWithWholeStageCodegenOnAndOff(
+    "SPARK-46037: ShuffledHashJoin build left with left outer join, codegen off") { _ =>
+    def join(hint: String): DataFrame = {
+      sql(
+        s"""
+          |SELECT /*+ $hint */ *
+          |FROM testData t1
+          |LEFT OUTER JOIN
+          |testData2 t2
+          |ON key = a AND concat(value, b) = '12'
+          |""".stripMargin)
+    }
+    val df1 = join("SHUFFLE_HASH(t1)")
+    val df2 = join("SHUFFLE_MERGE(t1)")
+    checkAnswer(df1, identity, df2.collect().toSeq)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/SingleJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/SingleJoinSuite.scala
new file mode 100644
index 0000000000000..a318769af6871
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/joins/SingleJoinSuite.scala
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.joins
+
+import org.apache.spark.SparkRuntimeException
+import org.apache.spark.sql.{DataFrame, Row}
+import org.apache.spark.sql.catalyst.expressions._
+import org.apache.spark.sql.catalyst.optimizer.BuildRight
+import org.apache.spark.sql.catalyst.planning.ExtractEquiJoinKeys
+import org.apache.spark.sql.catalyst.plans._
+import org.apache.spark.sql.catalyst.plans.logical.{Join, JoinHint, Project}
+import org.apache.spark.sql.execution.{SparkPlan, SparkPlanTest}
+import org.apache.spark.sql.execution.exchange.EnsureRequirements
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{DoubleType, IntegerType, StructType}
+
+class SingleJoinSuite extends SparkPlanTest with SharedSparkSession {
+  import testImplicits.toRichColumn
+
+  private val EnsureRequirements = new EnsureRequirements()
+
+  private lazy val left = spark.createDataFrame(
+    sparkContext.parallelize(Seq(
+      Row(1, 2.0),
+      Row(1, 2.0),
+      Row(2, 1.0),
+      Row(2, 1.0),
+      Row(3, 3.0),
+      Row(null, null),
+      Row(null, 5.0),
+      Row(6, null)
+    )), new StructType().add("a", IntegerType).add("b", DoubleType))
+
+  // (a > c && a != 6)
+
+  private lazy val right = spark.createDataFrame(
+    sparkContext.parallelize(Seq(
+      Row(2, 3.0),
+      Row(3, 2.0),
+      Row(4, 1.0),
+      Row(4, 2.0),
+      Row(null, null),
+      Row(null, 5.0),
+      Row(6, null)
+    )), new StructType().add("c", IntegerType).add("d", DoubleType))
+
+  private lazy val singleConditionEQ = EqualTo(left.col("a").expr, right.col("c").expr)
+
+  private lazy val nonEqualityCond = And(GreaterThan(left.col("a").expr, right.col("c").expr),
+    Not(EqualTo(left.col("a").expr, Literal(6))))
+
+
+
+  private def testSingleJoin(
+      testName: String,
+      leftRows: => DataFrame,
+      rightRows: => DataFrame,
+      condition: => Option[Expression],
+      expectedAnswer: Seq[Row],
+      expectError: Boolean = false): Unit = {
+
+    def extractJoinParts(): Option[ExtractEquiJoinKeys.ReturnType] = {
+      val join = Join(leftRows.logicalPlan, rightRows.logicalPlan,
+        Inner, condition, JoinHint.NONE)
+      ExtractEquiJoinKeys.unapply(join)
+    }
+
+    def checkSingleJoinError(planFunction: (SparkPlan, SparkPlan) => SparkPlan): Unit = {
+      val outputPlan = planFunction(leftRows.queryExecution.sparkPlan,
+        rightRows.queryExecution.sparkPlan)
+      checkError(
+        exception = intercept[SparkRuntimeException] {
+          SparkPlanTest.executePlan(outputPlan, spark.sqlContext)
+        },
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty
+      )
+    }
+
+    testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastHashJoin") { _ =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _, _) =>
+        val planFunction = (left: SparkPlan, right: SparkPlan) =>
+          EnsureRequirements.apply(BroadcastHashJoinExec(
+            leftKeys, rightKeys, LeftSingle, BuildRight, boundCondition, left, right))
+        if (expectError) {
+          checkSingleJoinError(planFunction)
+        } else {
+          checkAnswer2(leftRows, rightRows, planFunction,
+            expectedAnswer,
+            sortAnswers = true)
+        }
+      }
+    }
+    testWithWholeStageCodegenOnAndOff(s"$testName using ShuffledHashJoin") { _ =>
+      extractJoinParts().foreach { case (_, leftKeys, rightKeys, boundCondition, _, _, _, _) =>
+        val planFunction = (left: SparkPlan, right: SparkPlan) =>
+          EnsureRequirements.apply(
+            ShuffledHashJoinExec(
+              leftKeys, rightKeys, LeftSingle, BuildRight, boundCondition, left, right))
+        if (expectError) {
+          checkSingleJoinError(planFunction)
+        } else {
+          checkAnswer2(leftRows, rightRows, planFunction,
+            expectedAnswer,
+            sortAnswers = true)
+        }
+      }
+    }
+
+    testWithWholeStageCodegenOnAndOff(s"$testName using BroadcastNestedLoopJoin") { _ =>
+      val planFunction = (left: SparkPlan, right: SparkPlan) =>
+        EnsureRequirements.apply(
+          BroadcastNestedLoopJoinExec(left, right, BuildRight, LeftSingle, condition))
+      if (expectError) {
+        checkSingleJoinError(planFunction)
+      } else {
+        checkAnswer2(leftRows, rightRows, planFunction,
+          expectedAnswer,
+          sortAnswers = true)
+      }
+    }
+  }
+
+  testSingleJoin(
+    "test single condition (equal) for a left single join",
+    left,
+    Project(Seq(right.col("c").expr.asInstanceOf[NamedExpression]), right.logicalPlan),
+    Some(singleConditionEQ),
+    Seq(Row(1, 2.0, null),
+      Row(1, 2.0, null),
+      Row(2, 1.0, 2),
+      Row(2, 1.0, 2),
+      Row(3, 3.0, 3),
+      Row(6, null, 6),
+      Row(null, 5.0, null),
+      Row(null, null, null)))
+
+  testSingleJoin(
+    "test single condition (equal) for a left single join -- multiple matches",
+    left,
+    Project(Seq(right.col("d").expr.asInstanceOf[NamedExpression]), right.logicalPlan),
+    Some(EqualTo(left.col("b").expr, right.col("d").expr)),
+    Seq.empty, true)
+
+  testSingleJoin(
+    "test non-equality for a left single join",
+    left,
+    Project(Seq(right.col("c").expr.asInstanceOf[NamedExpression]), right.logicalPlan),
+    Some(nonEqualityCond),
+    Seq(Row(1, 2.0, null),
+      Row(1, 2.0, null),
+      Row(2, 1.0, null),
+      Row(2, 1.0, null),
+      Row(3, 3.0, 2),
+      Row(6, null, null),
+      Row(null, 5.0, null),
+      Row(null, null, null)))
+
+  testSingleJoin(
+    "test non-equality for a left single join -- multiple matches",
+    left,
+    Project(Seq(right.col("c").expr.asInstanceOf[NamedExpression]), right.logicalPlan),
+    Some(GreaterThan(left.col("a").expr, right.col("c").expr)),
+    Seq.empty, expectError = true)
+
+  private lazy val emptyFrame = spark.createDataFrame(
+    spark.sparkContext.emptyRDD[Row], new StructType().add("c", IntegerType).add("d", DoubleType))
+
+  testSingleJoin(
+    "empty inner (right) side",
+    left,
+    Project(Seq(emptyFrame.col("c").expr.asInstanceOf[NamedExpression]), emptyFrame.logicalPlan),
+    Some(GreaterThan(left.col("a").expr, emptyFrame.col("c").expr)),
+    Seq(Row(1, 2.0, null),
+      Row(1, 2.0, null),
+      Row(2, 1.0, null),
+      Row(2, 1.0, null),
+      Row(3, 3.0, null),
+      Row(6, null, null),
+      Row(null, 5.0, null),
+      Row(null, null, null)))
+
+  testSingleJoin(
+    "empty outer (left) side",
+    Project(Seq(emptyFrame.col("c").expr.asInstanceOf[NamedExpression]), emptyFrame.logicalPlan),
+    right,
+    Some(EqualTo(emptyFrame.col("c").expr, right.col("c").expr)),
+    Seq.empty)
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
index 45c775e6c463c..0dd90925d3c74 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsSuite.scala
@@ -959,6 +959,11 @@ class SQLMetricsSuite extends SharedSparkSession with SQLMetricsTestUtils
       SQLMetrics.createNanoTimingMetric(sparkContext, name = "m", initValue = 5)
     }
   }
+
+  test("SQLMetric#toInfoUpdate") {
+    assert(SQLMetrics.createSizeMetric(sparkContext, name = "m").toInfoUpdate.update === Some(-1))
+    assert(SQLMetrics.createMetric(sparkContext, name = "m").toInfoUpdate.update === Some(0))
+  }
 }
 
 case class CustomFileCommitProtocol(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
index 72aa607591d57..1df5f522b4b8d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/metric/SQLMetricsTestUtils.scala
@@ -311,8 +311,8 @@ object InputOutputMetricsHelper {
       res.shuffleRecordsRead += taskEnd.taskMetrics.shuffleReadMetrics.recordsRead
 
       var maxOutputRows = 0L
-      for (accum <- taskEnd.taskMetrics.externalAccums) {
-        val info = accum.toInfo(Some(accum.value), None)
+      taskEnd.taskMetrics.withExternalAccums(_.foreach { accum =>
+        val info = accum.toInfoUpdate
         if (info.name.toString.contains("number of output rows")) {
           info.update match {
             case Some(n: Number) =>
@@ -322,7 +322,7 @@ object InputOutputMetricsHelper {
             case _ => // Ignore.
           }
         }
-      }
+      })
       res.sumMaxOutputRows += maxOutputRows
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriterSuite.scala
new file mode 100644
index 0000000000000..0417a839dc6b3
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/BaseStreamingArrowWriterSuite.scala
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import org.apache.arrow.vector.VectorSchemaRoot
+import org.apache.arrow.vector.ipc.ArrowStreamWriter
+import org.mockito.Mockito.{mock, never, times, verify}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.execution.arrow.ArrowWriter
+
+class BaseStreamingArrowWriterSuite extends SparkFunSuite with BeforeAndAfterEach {
+  // Setting the maximum number of records per batch to 2 to make test easier.
+  val arrowMaxRecordsPerBatch = 2
+  var transformWithStateInPandasWriter: BaseStreamingArrowWriter = _
+  var arrowWriter: ArrowWriter = _
+  var writer: ArrowStreamWriter = _
+
+  override def beforeEach(): Unit = {
+    val root: VectorSchemaRoot = mock(classOf[VectorSchemaRoot])
+    writer = mock(classOf[ArrowStreamWriter])
+    arrowWriter = mock(classOf[ArrowWriter])
+    transformWithStateInPandasWriter = new BaseStreamingArrowWriter(
+      root, writer, arrowMaxRecordsPerBatch, arrowWriter)
+  }
+
+  test("test writeRow") {
+    val dataRow = mock(classOf[InternalRow])
+    // Write 2 rows first, batch is not finalized.
+    transformWithStateInPandasWriter.writeRow(dataRow)
+    transformWithStateInPandasWriter.writeRow(dataRow)
+    verify(arrowWriter, times(2)).write(dataRow)
+    verify(writer, never()).writeBatch()
+    // Write a 3rd row, batch is finalized.
+    transformWithStateInPandasWriter.writeRow(dataRow)
+    verify(arrowWriter, times(3)).write(dataRow)
+    verify(writer).writeBatch()
+    // Write 2 more rows, a new batch is finalized.
+    transformWithStateInPandasWriter.writeRow(dataRow)
+    transformWithStateInPandasWriter.writeRow(dataRow)
+    verify(arrowWriter, times(5)).write(dataRow)
+    verify(writer, times(2)).writeBatch()
+  }
+
+  test("test finalizeCurrentArrowBatch") {
+    transformWithStateInPandasWriter.finalizeCurrentArrowBatch()
+    verify(arrowWriter).finish()
+    verify(writer).writeBatch()
+    verify(arrowWriter).reset()
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
index b207afeae1068..73c05ff0e0b58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonDataSourceSuite.scala
@@ -94,6 +94,27 @@ abstract class PythonDataSourceSuiteBase extends QueryTest with SharedSparkSessi
 class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
   import IntegratedUDFTestUtils._
 
+  test("SPARK-50426: should not trigger static Python data source lookup") {
+    assume(shouldTestPandasUDFs)
+    val testAppender = new LogAppender("Python data source lookup")
+    // Using builtin and Java data sources should not trigger a static
+    // Python data source lookup
+    withLogAppender(testAppender) {
+      spark.read.format("org.apache.spark.sql.test").load()
+      spark.range(3).write.mode("overwrite").format("noop").save()
+    }
+    assert(!testAppender.loggingEvents
+      .exists(msg => msg.getMessage.getFormattedMessage.contains(
+        "Loading static Python Data Sources.")))
+    // Now trigger a Python data source lookup
+    withLogAppender(testAppender) {
+      spark.read.format(staticSourceName).load()
+    }
+    assert(testAppender.loggingEvents
+      .exists(msg => msg.getMessage.getFormattedMessage.contains(
+        "Loading static Python Data Sources.")))
+  }
+
   test("SPARK-45917: automatic registration of Python Data Source") {
     assume(shouldTestPandasUDFs)
     val df = spark.read.format(staticSourceName).load()
@@ -188,7 +209,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
     spark.dataSource.registerPython(dataSourceName, dataSource)
     checkError(
       exception = intercept[AnalysisException](spark.read.format(dataSourceName).load()),
-      errorClass = "INVALID_SCHEMA.NON_STRUCT_TYPE",
+      condition = "INVALID_SCHEMA.NON_STRUCT_TYPE",
       parameters = Map("inputSchema" -> "INT", "dataType" -> "\"INT\""))
   }
 
@@ -309,7 +330,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
           exception = intercept[AnalysisException] {
             spark.dataSource.registerPython(provider, dataSource)
           },
-          errorClass = "DATA_SOURCE_ALREADY_EXISTS",
+          condition = "DATA_SOURCE_ALREADY_EXISTS",
           parameters = Map("provider" -> provider))
       }
     }
@@ -330,7 +351,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
     val err = intercept[AnalysisException] {
       spark.read.format(dataSourceName).schema(schema).load().collect()
     }
-    assert(err.getErrorClass == "PYTHON_DATA_SOURCE_ERROR")
+    assert(err.getCondition == "PYTHON_DATA_SOURCE_ERROR")
     assert(err.getMessage.contains("PySparkNotImplementedError"))
   }
 
@@ -350,7 +371,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
     val err = intercept[AnalysisException] {
       spark.read.format(dataSourceName).schema(schema).load().collect()
     }
-    assert(err.getErrorClass == "PYTHON_DATA_SOURCE_ERROR")
+    assert(err.getCondition == "PYTHON_DATA_SOURCE_ERROR")
     assert(err.getMessage.contains("error creating reader"))
   }
 
@@ -369,7 +390,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
     val err = intercept[AnalysisException] {
       spark.read.format(dataSourceName).schema(schema).load().collect()
     }
-    assert(err.getErrorClass == "PYTHON_DATA_SOURCE_ERROR")
+    assert(err.getCondition == "PYTHON_DATA_SOURCE_ERROR")
     assert(err.getMessage.contains("DATA_SOURCE_TYPE_MISMATCH"))
     assert(err.getMessage.contains("PySparkAssertionError"))
   }
@@ -480,7 +501,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
       spark.dataSource.registerPython(dataSourceName, dataSource)
       val err = intercept[AnalysisException](
         spark.read.format(dataSourceName).load().collect())
-      assert(err.getErrorClass == "PYTHON_DATA_SOURCE_ERROR")
+      assert(err.getCondition == "PYTHON_DATA_SOURCE_ERROR")
       assert(err.getMessage.contains("partitions"))
     }
   }
@@ -657,7 +678,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
         exception = intercept[AnalysisException] {
           spark.range(1).write.format(dataSourceName).save()
         },
-        errorClass = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
+        condition = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
         parameters = Map("source" -> "SimpleDataSource", "createMode" -> "\"ErrorIfExists\""))
     }
 
@@ -666,7 +687,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
         exception = intercept[AnalysisException] {
           spark.range(1).write.format(dataSourceName).mode("ignore").save()
         },
-        errorClass = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
+        condition = "UNSUPPORTED_DATA_SOURCE_SAVE_MODE",
         parameters = Map("source" -> "SimpleDataSource", "createMode" -> "\"Ignore\""))
     }
 
@@ -675,7 +696,7 @@ class PythonDataSourceSuite extends PythonDataSourceSuiteBase {
         exception = intercept[AnalysisException] {
           spark.range(1).write.format(dataSourceName).mode("foo").save()
         },
-        errorClass = "INVALID_SAVE_MODE",
+        condition = "INVALID_SAVE_MODE",
         parameters = Map("mode" -> "\"foo\""))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
index 3a8ce569d1ba9..a2d3318361837 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonForeachWriterSuite.scala
@@ -99,7 +99,7 @@ class PythonForeachWriterSuite extends SparkFunSuite with Eventually with Mockit
     }
     private val iterator = buffer.iterator
     private val outputBuffer = new ArrayBuffer[Int]
-    private val testTimeout = timeout(20.seconds)
+    private val testTimeout = timeout(60.seconds)
     private val intProj = UnsafeProjection.create(Array[DataType](IntegerType))
     private val thread = new Thread() {
       override def run(): Unit = {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
index 2d1449bd96cb5..3d91a045907fc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonStreamingDataSourceSuite.scala
@@ -30,33 +30,9 @@ import org.apache.spark.sql.streaming.StreamingQueryException
 import org.apache.spark.sql.types.StructType
 import org.apache.spark.sql.util.CaseInsensitiveStringMap
 
-class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
-
-  import testImplicits._
-
+class PythonStreamingDataSourceSimpleSuite extends PythonDataSourceSuiteBase {
   val waitTimeout = 15.seconds
 
-  protected def testDataStreamReaderScript: String =
-    """
-      |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
-      |
-      |class TestDataStreamReader(DataSourceStreamReader):
-      |    current = 0
-      |    def initialOffset(self):
-      |        return {"offset": {"partition-1": 0}}
-      |    def latestOffset(self):
-      |        self.current += 2
-      |        return {"offset": {"partition-1": self.current}}
-      |    def partitions(self, start: dict, end: dict):
-      |        start_index = start["offset"]["partition-1"]
-      |        end_index = end["offset"]["partition-1"]
-      |        return [InputPartition(i) for i in range(start_index, end_index)]
-      |    def commit(self, end: dict):
-      |        1 + 2
-      |    def read(self, partition):
-      |        yield (partition.value,)
-      |""".stripMargin
-
   protected def simpleDataStreamReaderScript: String =
     """
       |from pyspark.sql.datasource import SimpleDataSourceStreamReader
@@ -94,93 +70,8 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
       |        return iter([(i, ) for i in range(start_idx, end_idx)])
       |""".stripMargin
 
-  protected def errorDataStreamReaderScript: String =
-    """
-      |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
-      |
-      |class ErrorDataStreamReader(DataSourceStreamReader):
-      |    def initialOffset(self):
-      |        raise Exception("error reading initial offset")
-      |    def latestOffset(self):
-      |        raise Exception("error reading latest offset")
-      |    def partitions(self, start: dict, end: dict):
-      |        raise Exception("error planning partitions")
-      |    def commit(self, end: dict):
-      |        raise Exception("error committing offset")
-      |    def read(self, partition):
-      |        yield (0, partition.value)
-      |        yield (1, partition.value)
-      |        yield (2, partition.value)
-      |""".stripMargin
-
-  protected def simpleDataStreamWriterScript: String =
-    s"""
-       |import json
-       |import uuid
-       |import os
-       |from pyspark import TaskContext
-       |from pyspark.sql.datasource import DataSource, DataSourceStreamWriter
-       |from pyspark.sql.datasource import WriterCommitMessage
-       |
-       |class SimpleDataSourceStreamWriter(DataSourceStreamWriter):
-       |    def __init__(self, options, overwrite):
-       |        self.options = options
-       |        self.overwrite = overwrite
-       |
-       |    def write(self, iterator):
-       |        context = TaskContext.get()
-       |        partition_id = context.partitionId()
-       |        path = self.options.get("path")
-       |        assert path is not None
-       |        output_path = os.path.join(path, f"{partition_id}.json")
-       |        cnt = 0
-       |        mode = "w" if self.overwrite else "a"
-       |        with open(output_path, mode) as file:
-       |            for row in iterator:
-       |                file.write(json.dumps(row.asDict()) + "\\n")
-       |        return WriterCommitMessage()
-       |
-       |class SimpleDataSource(DataSource):
-       |    def schema(self) -> str:
-       |        return "id INT"
-       |    def streamWriter(self, schema, overwrite):
-       |        return SimpleDataSourceStreamWriter(self.options, overwrite)
-       |""".stripMargin
-
   private val errorDataSourceName = "ErrorDataSource"
 
-  test("Test PythonMicroBatchStream") {
-    assume(shouldTestPandasUDFs)
-    val dataSourceScript =
-      s"""
-         |from pyspark.sql.datasource import DataSource
-         |$testDataStreamReaderScript
-         |
-         |class $dataSourceName(DataSource):
-         |    def streamReader(self, schema):
-         |        return TestDataStreamReader()
-         |""".stripMargin
-    val inputSchema = StructType.fromDDL("input BINARY")
-
-    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
-    spark.dataSource.registerPython(dataSourceName, dataSource)
-    val pythonDs = new PythonDataSourceV2
-    pythonDs.setShortName("SimpleDataSource")
-    val stream = new PythonMicroBatchStream(
-      pythonDs, dataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
-
-    var startOffset = stream.initialOffset()
-    assert(startOffset.json == "{\"offset\": {\"partition-1\": 0}}")
-    for (i <- 1 to 50) {
-      val endOffset = stream.latestOffset()
-      assert(endOffset.json == s"""{"offset": {"partition-1": ${2 * i}}}""")
-      assert(stream.planInputPartitions(startOffset, endOffset).size == 2)
-      stream.commit(endOffset)
-      startOffset = endOffset
-    }
-    stream.stop()
-  }
-
   test("SimpleDataSourceStreamReader run query and restart") {
     assume(shouldTestPandasUDFs)
     val dataSourceScript =
@@ -204,8 +95,7 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
       val stopSignal1 = new CountDownLatch(1)
 
-      val q1 = df
-        .writeStream
+      val q1 = df.writeStream
         .option("checkpointLocation", checkpointDir.getAbsolutePath)
         .foreachBatch((df: DataFrame, batchId: Long) => {
           df.cache()
@@ -219,14 +109,14 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
       q1.awaitTermination()
 
       val stopSignal2 = new CountDownLatch(1)
-      val q2 = df
-        .writeStream
+      val q2 = df.writeStream
         .option("checkpointLocation", checkpointDir.getAbsolutePath)
         .foreachBatch((df: DataFrame, batchId: Long) => {
           df.cache()
           checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
           if (batchId == 20) stopSignal2.countDown()
-        }).start()
+        })
+        .start()
       stopSignal2.await()
       assert(q2.recentProgress.forall(_.numInputRows == 2))
       q2.stop()
@@ -259,8 +149,7 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
       val stopSignal = new CountDownLatch(1)
 
-      val q = df
-        .writeStream
+      val q = df.writeStream
         .option("checkpointLocation", checkpointDir.getAbsolutePath)
         .foreachBatch((df: DataFrame, batchId: Long) => {
           df.cache()
@@ -305,15 +194,13 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
         if (i % 2 == 0) {
           // Remove the last entry of commit log to test replaying microbatch during restart.
-          val offsetLog = new OffsetSeqLog(
-            spark, new File(checkpointDir, "offsets").getCanonicalPath)
-          val commitLog = new CommitLog(
-            spark, new File(checkpointDir, "commits").getCanonicalPath)
+          val offsetLog =
+            new OffsetSeqLog(spark, new File(checkpointDir, "offsets").getCanonicalPath)
+          val commitLog = new CommitLog(spark, new File(checkpointDir, "commits").getCanonicalPath)
           commitLog.purgeAfter(offsetLog.getLatest().get._1 - 1)
         }
 
-        val q = df
-          .writeStream
+        val q = df.writeStream
           .option("checkpointLocation", checkpointDir.getAbsolutePath)
           .format("json")
           .start(outputDir.getAbsolutePath)
@@ -330,8 +217,10 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
       // There may be one uncommitted batch that is not recorded in query progress.
       // The number of batch can be lastBatchId + 1 or lastBatchId + 2.
       assert(rowCount == 2 * (lastBatchId + 1) || rowCount == 2 * (lastBatchId + 2))
-      checkAnswer(spark.read.format("json").load(outputDir.getAbsolutePath),
-        (0 until rowCount.toInt).map(Row(_)))
+      checkAnswer(
+        spark.read.format("json").load(outputDir.getAbsolutePath),
+        (0 until rowCount.toInt).map(Row(_))
+      )
     }
   }
 
@@ -356,35 +245,45 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     val pythonDs = new PythonDataSourceV2
     pythonDs.setShortName("ErrorDataSource")
 
-    def testMicroBatchStreamError(action: String, msg: String)
-                                 (func: PythonMicroBatchStream => Unit): Unit = {
+    def testMicroBatchStreamError(action: String, msg: String)(
+        func: PythonMicroBatchStream => Unit): Unit = {
       val stream = new PythonMicroBatchStream(
-        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+        pythonDs,
+        errorDataSourceName,
+        inputSchema,
+        CaseInsensitiveStringMap.empty()
+      )
       val err = intercept[SparkException] {
         func(stream)
       }
-      checkErrorMatchPVals(err,
-        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+      checkErrorMatchPVals(
+        err,
+        condition = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
         parameters = Map(
           "action" -> action,
           "msg" -> "(.|\\n)*"
-        ))
+        )
+      )
       assert(err.getMessage.contains(msg))
       assert(err.getMessage.contains("ErrorDataSource"))
       stream.stop()
     }
 
     testMicroBatchStreamError(
-      "initialOffset", "[NOT_IMPLEMENTED] initialOffset is not implemented") {
-      stream => stream.initialOffset()
+      "initialOffset",
+      "[NOT_IMPLEMENTED] initialOffset is not implemented") {
+      stream =>
+        stream.initialOffset()
     }
 
     // User don't need to implement latestOffset for SimpleDataSourceStreamReader.
     // The latestOffset method of simple stream reader invokes initialOffset() and read()
     // So the not implemented method is initialOffset.
     testMicroBatchStreamError(
-      "latestOffset", "[NOT_IMPLEMENTED] initialOffset is not implemented") {
-      stream => stream.latestOffset()
+      "latestOffset",
+      "[NOT_IMPLEMENTED] initialOffset is not implemented") {
+      stream =>
+        stream.latestOffset()
     }
   }
 
@@ -412,29 +311,116 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     val pythonDs = new PythonDataSourceV2
     pythonDs.setShortName("ErrorDataSource")
 
-    def testMicroBatchStreamError(action: String, msg: String)
-                                 (func: PythonMicroBatchStream => Unit): Unit = {
+    def testMicroBatchStreamError(action: String, msg: String)(
+        func: PythonMicroBatchStream => Unit): Unit = {
       val stream = new PythonMicroBatchStream(
-        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+        pythonDs,
+        errorDataSourceName,
+        inputSchema,
+        CaseInsensitiveStringMap.empty()
+      )
       val err = intercept[SparkException] {
         func(stream)
       }
-      checkErrorMatchPVals(err,
-        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+      checkErrorMatchPVals(
+        err,
+        condition = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
         parameters = Map(
           "action" -> action,
           "msg" -> "(.|\\n)*"
-        ))
+        )
+      )
       assert(err.getMessage.contains(msg))
       assert(err.getMessage.contains("ErrorDataSource"))
       stream.stop()
     }
 
-    testMicroBatchStreamError(
-      "latestOffset", "Exception: error reading available data") {
-      stream => stream.latestOffset()
+    testMicroBatchStreamError("latestOffset", "Exception: error reading available data") { stream =>
+      stream.latestOffset()
     }
   }
+}
+
+class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
+  val waitTimeout = 15.seconds
+
+  protected def testDataStreamReaderScript: String =
+    """
+      |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
+      |
+      |class TestDataStreamReader(DataSourceStreamReader):
+      |    current = 0
+      |    def initialOffset(self):
+      |        return {"offset": {"partition-1": 0}}
+      |    def latestOffset(self):
+      |        self.current += 2
+      |        return {"offset": {"partition-1": self.current}}
+      |    def partitions(self, start: dict, end: dict):
+      |        start_index = start["offset"]["partition-1"]
+      |        end_index = end["offset"]["partition-1"]
+      |        return [InputPartition(i) for i in range(start_index, end_index)]
+      |    def commit(self, end: dict):
+      |        1 + 2
+      |    def read(self, partition):
+      |        yield (partition.value,)
+      |""".stripMargin
+
+  protected def errorDataStreamReaderScript: String =
+    """
+      |from pyspark.sql.datasource import DataSourceStreamReader, InputPartition
+      |
+      |class ErrorDataStreamReader(DataSourceStreamReader):
+      |    def initialOffset(self):
+      |        raise Exception("error reading initial offset")
+      |    def latestOffset(self):
+      |        raise Exception("error reading latest offset")
+      |    def partitions(self, start: dict, end: dict):
+      |        raise Exception("error planning partitions")
+      |    def commit(self, end: dict):
+      |        raise Exception("error committing offset")
+      |    def read(self, partition):
+      |        yield (0, partition.value)
+      |        yield (1, partition.value)
+      |        yield (2, partition.value)
+      |""".stripMargin
+
+  private val errorDataSourceName = "ErrorDataSource"
+
+  test("Test PythonMicroBatchStream") {
+    assume(shouldTestPandasUDFs)
+    val dataSourceScript =
+      s"""
+         |from pyspark.sql.datasource import DataSource
+         |$testDataStreamReaderScript
+         |
+         |class $dataSourceName(DataSource):
+         |    def streamReader(self, schema):
+         |        return TestDataStreamReader()
+         |""".stripMargin
+    val inputSchema = StructType.fromDDL("input BINARY")
+
+    val dataSource = createUserDefinedPythonDataSource(dataSourceName, dataSourceScript)
+    spark.dataSource.registerPython(dataSourceName, dataSource)
+    val pythonDs = new PythonDataSourceV2
+    pythonDs.setShortName("SimpleDataSource")
+    val stream = new PythonMicroBatchStream(
+      pythonDs,
+      dataSourceName,
+      inputSchema,
+      CaseInsensitiveStringMap.empty()
+    )
+
+    var startOffset = stream.initialOffset()
+    assert(startOffset.json == "{\"offset\": {\"partition-1\": 0}}")
+    for (i <- 1 to 50) {
+      val endOffset = stream.latestOffset()
+      assert(endOffset.json == s"""{"offset": {"partition-1": ${2 * i}}}""")
+      assert(stream.planInputPartitions(startOffset, endOffset).size == 2)
+      stream.commit(endOffset)
+      startOffset = endOffset
+    }
+    stream.stop()
+  }
 
   test("Read from test data stream source") {
     assume(shouldTestPandasUDFs)
@@ -457,13 +443,16 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
     val stopSignal = new CountDownLatch(1)
 
-    val q = df.writeStream.foreachBatch((df: DataFrame, batchId: Long) => {
-      // checkAnswer may materialize the dataframe more than once
-      // Cache here to make sure the numInputRows metrics is consistent.
-      df.cache()
-      checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
-      if (batchId > 30) stopSignal.countDown()
-    }).trigger(ProcessingTimeTrigger(0)).start()
+    val q = df.writeStream
+      .foreachBatch((df: DataFrame, batchId: Long) => {
+        // checkAnswer may materialize the dataframe more than once
+        // Cache here to make sure the numInputRows metrics is consistent.
+        df.cache()
+        checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+        if (batchId > 30) stopSignal.countDown()
+      })
+      .trigger(ProcessingTimeTrigger(0))
+      .start()
     stopSignal.await()
     assert(q.recentProgress.forall(_.numInputRows == 2))
     q.stop()
@@ -492,13 +481,16 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
     val stopSignal = new CountDownLatch(1)
 
-    val q = df.writeStream.foreachBatch((df: DataFrame, batchId: Long) => {
-      // checkAnswer may materialize the dataframe more than once
-      // Cache here to make sure the numInputRows metrics is consistent.
-      df.cache()
-      checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
-      if (batchId >= 2) stopSignal.countDown()
-    }).trigger(ProcessingTimeTrigger(20 * 1000)).start()
+    val q = df.writeStream
+      .foreachBatch((df: DataFrame, batchId: Long) => {
+        // checkAnswer may materialize the dataframe more than once
+        // Cache here to make sure the numInputRows metrics is consistent.
+        df.cache()
+        checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+        if (batchId >= 2) stopSignal.countDown()
+      })
+      .trigger(ProcessingTimeTrigger(20 * 1000))
+      .start()
     stopSignal.await()
     assert(q.recentProgress.forall(_.numInputRows == 2))
     q.stop()
@@ -547,13 +539,16 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
 
     val stopSignal = new CountDownLatch(1)
 
-    val q = df.writeStream.foreachBatch((df: DataFrame, batchId: Long) => {
-      // checkAnswer may materialize the dataframe more than once
-      // Cache here to make sure the numInputRows metrics is consistent.
-      df.cache()
-      checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
-      if (batchId > 30) stopSignal.countDown()
-    }).trigger(ProcessingTimeTrigger(0)).start()
+    val q = df.writeStream
+      .foreachBatch((df: DataFrame, batchId: Long) => {
+        // checkAnswer may materialize the dataframe more than once
+        // Cache here to make sure the numInputRows metrics is consistent.
+        df.cache()
+        checkAnswer(df, Seq(Row(batchId * 2), Row(batchId * 2 + 1)))
+        if (batchId > 30) stopSignal.countDown()
+      })
+      .trigger(ProcessingTimeTrigger(0))
+      .start()
     stopSignal.await()
     assert(q.recentProgress.forall(_.numInputRows == 2))
     q.stop()
@@ -571,16 +566,15 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
          |    def streamReader(self, schema):
          |        raise Exception("error creating stream reader")
          |""".stripMargin
-    val dataSource = createUserDefinedPythonDataSource(
-      name = dataSourceName, pythonScript = dataSourceScript)
+    val dataSource =
+      createUserDefinedPythonDataSource(name = dataSourceName, pythonScript = dataSourceScript)
     spark.dataSource.registerPython(dataSourceName, dataSource)
 
     val err = intercept[StreamingQueryException] {
-      val q = spark.readStream.format(dataSourceName).load()
-        .writeStream.format("console").start()
+      val q = spark.readStream.format(dataSourceName).load().writeStream.format("console").start()
       q.awaitTermination()
     }
-    assert(err.getErrorClass == "STREAM_FAILED")
+    assert(err.getCondition == "STREAM_FAILED")
     assert(err.getMessage.contains("error creating stream reader"))
   }
 
@@ -622,10 +616,12 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     val df = spark.readStream.format(dataSourceName).load()
 
     val err = intercept[StreamingQueryException] {
-      val q = df.writeStream.foreachBatch((df: DataFrame, _: Long) => {
-        df.count()
-        ()
-      }).start()
+      val q = df.writeStream
+        .foreachBatch((df: DataFrame, _: Long) => {
+          df.count()
+          ()
+        })
+        .start()
       q.awaitTermination()
     }
     assert(err.getMessage.contains("error reading data"))
@@ -652,38 +648,46 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     val pythonDs = new PythonDataSourceV2
     pythonDs.setShortName("ErrorDataSource")
 
-    def testMicroBatchStreamError(action: String, msg: String)
-                                 (func: PythonMicroBatchStream => Unit): Unit = {
+    def testMicroBatchStreamError(action: String, msg: String)(
+        func: PythonMicroBatchStream => Unit): Unit = {
       val stream = new PythonMicroBatchStream(
-        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+        pythonDs,
+        errorDataSourceName,
+        inputSchema,
+        CaseInsensitiveStringMap.empty()
+      )
       val err = intercept[SparkException] {
         func(stream)
       }
-      checkErrorMatchPVals(err,
-        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+      checkErrorMatchPVals(
+        err,
+        condition = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
         parameters = Map(
           "action" -> action,
           "msg" -> "(.|\\n)*"
-        ))
+        )
+      )
       assert(err.getMessage.contains(msg))
       assert(err.getMessage.contains("ErrorDataSource"))
       stream.stop()
     }
 
     testMicroBatchStreamError(
-      "initialOffset", "[NOT_IMPLEMENTED] initialOffset is not implemented") {
-      stream => stream.initialOffset()
+      "initialOffset",
+      "[NOT_IMPLEMENTED] initialOffset is not implemented") {
+      stream =>
+        stream.initialOffset()
     }
 
-    testMicroBatchStreamError(
-      "latestOffset", "[NOT_IMPLEMENTED] latestOffset is not implemented") {
-      stream => stream.latestOffset()
+    testMicroBatchStreamError("latestOffset", "[NOT_IMPLEMENTED] latestOffset is not implemented") {
+      stream =>
+        stream.latestOffset()
     }
 
     val offset = PythonStreamingSourceOffset("{\"offset\": \"2\"}")
-    testMicroBatchStreamError(
-      "planPartitions", "[NOT_IMPLEMENTED] partitions is not implemented") {
-      stream => stream.planInputPartitions(offset, offset)
+    testMicroBatchStreamError("planPartitions", "[NOT_IMPLEMENTED] partitions is not implemented") {
+      stream =>
+        stream.planInputPartitions(offset, offset)
     }
   }
 
@@ -706,40 +710,87 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
     pythonDs.setShortName("ErrorDataSource")
     val offset = PythonStreamingSourceOffset("{\"offset\": 2}")
 
-    def testMicroBatchStreamError(action: String, msg: String)
-                                 (func: PythonMicroBatchStream => Unit): Unit = {
+    def testMicroBatchStreamError(action: String, msg: String)(
+        func: PythonMicroBatchStream => Unit): Unit = {
       val stream = new PythonMicroBatchStream(
-        pythonDs, errorDataSourceName, inputSchema, CaseInsensitiveStringMap.empty())
+        pythonDs,
+        errorDataSourceName,
+        inputSchema,
+        CaseInsensitiveStringMap.empty()
+      )
       val err = intercept[SparkException] {
         func(stream)
       }
-      checkErrorMatchPVals(err,
-        errorClass = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
+      checkErrorMatchPVals(
+        err,
+        condition = "PYTHON_STREAMING_DATA_SOURCE_RUNTIME_ERROR",
         parameters = Map(
           "action" -> action,
           "msg" -> "(.|\\n)*"
-        ))
+        )
+      )
       assert(err.getMessage.contains(msg))
       assert(err.getMessage.contains("ErrorDataSource"))
       stream.stop()
     }
 
-    testMicroBatchStreamError("initialOffset", "error reading initial offset") {
-      stream => stream.initialOffset()
+    testMicroBatchStreamError("initialOffset", "error reading initial offset") { stream =>
+      stream.initialOffset()
     }
 
-    testMicroBatchStreamError("latestOffset", "error reading latest offset") {
-      stream => stream.latestOffset()
+    testMicroBatchStreamError("latestOffset", "error reading latest offset") { stream =>
+      stream.latestOffset()
     }
 
-    testMicroBatchStreamError("planPartitions", "error planning partitions") {
-      stream => stream.planInputPartitions(offset, offset)
+    testMicroBatchStreamError("planPartitions", "error planning partitions") { stream =>
+      stream.planInputPartitions(offset, offset)
     }
 
-    testMicroBatchStreamError("commitSource", "error committing offset") {
-      stream => stream.commit(offset)
+    testMicroBatchStreamError("commitSource", "error committing offset") { stream =>
+      stream.commit(offset)
     }
   }
+}
+
+class PythonStreamingDataSourceWriteSuite extends PythonDataSourceSuiteBase {
+
+  import testImplicits._
+
+  val waitTimeout = 15.seconds
+
+  protected def simpleDataStreamWriterScript: String =
+    s"""
+       |import json
+       |import uuid
+       |import os
+       |from pyspark import TaskContext
+       |from pyspark.sql.datasource import DataSource, DataSourceStreamWriter
+       |from pyspark.sql.datasource import WriterCommitMessage
+       |
+       |class SimpleDataSourceStreamWriter(DataSourceStreamWriter):
+       |    def __init__(self, options, overwrite):
+       |        self.options = options
+       |        self.overwrite = overwrite
+       |
+       |    def write(self, iterator):
+       |        context = TaskContext.get()
+       |        partition_id = context.partitionId()
+       |        path = self.options.get("path")
+       |        assert path is not None
+       |        output_path = os.path.join(path, f"{partition_id}.json")
+       |        cnt = 0
+       |        mode = "w" if self.overwrite else "a"
+       |        with open(output_path, mode) as file:
+       |            for row in iterator:
+       |                file.write(json.dumps(row.asDict()) + "\\n")
+       |        return WriterCommitMessage()
+       |
+       |class SimpleDataSource(DataSource):
+       |    def schema(self) -> str:
+       |        return "id INT"
+       |    def streamWriter(self, schema, overwrite):
+       |        return SimpleDataSourceStreamWriter(self.options, overwrite)
+       |""".stripMargin
 
   Seq("append", "complete").foreach { mode =>
     test(s"data source stream write - $mode mode") {
@@ -984,10 +1035,11 @@ class PythonStreamingDataSourceSuite extends PythonDataSourceSuiteBase {
         exception = intercept[AnalysisException] {
           runQuery("complete")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3102",
+        condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+        sqlState = "42KDE",
         parameters = Map(
-          "msg" -> ("Complete output mode not supported when there are no streaming aggregations" +
-            " on streaming DataFrames/Datasets")))
+          "outputMode" -> "complete",
+          "operation" -> "no streaming aggregations"))
 
       // Query should fail in planning with "invalid" mode.
       val error2 = intercept[IllegalArgumentException] {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
index 917851bf24f14..4b46331be107a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDFSuite.scala
@@ -86,61 +86,6 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
     checkAnswer(actual, expected)
   }
 
-  test("variant input to pandas grouped agg UDF") {
-    assume(shouldTestPandasUDFs)
-    val df = spark.range(0, 10).selectExpr(
-      """parse_json(format_string('{"%s": "test"}', id)) as v""")
-
-    val testUdf = TestGroupedAggPandasUDFStringifiedMax(name = "pandas_udf")
-    checkError(
-      exception = intercept[AnalysisException] {
-        df.agg(testUdf(df("v"))).collect()
-      },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-      parameters = Map("sqlExpr" -> "\"pandas_udf(v)\"", "dataType" -> "VARIANT"))
-  }
-
-  test("complex variant input to pandas grouped agg UDF") {
-    val df = spark.range(0, 10).selectExpr(
-      """array(parse_json(format_string('{"%s": "test"}', id))) as arr_v""")
-
-    val testUdf = TestGroupedAggPandasUDFStringifiedMax(name = "pandas_udf")
-    checkError(
-      exception = intercept[AnalysisException] {
-        df.agg(testUdf(df("arr_v"))).collect()
-      },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-      parameters = Map("sqlExpr" -> "\"pandas_udf(arr_v)\"", "dataType" -> "ARRAY<VARIANT>"))
-  }
-
-  test("variant output to pandas grouped agg UDF") {
-    assume(shouldTestPandasUDFs)
-    val df = spark.range(0, 10).toDF("id")
-
-    val testUdf = TestGroupedAggPandasUDFReturnVariant(name = "pandas_udf")
-    checkError(
-      exception = intercept[AnalysisException] {
-        df.agg(testUdf(df("id"))).collect()
-      },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-      parameters = Map("sqlExpr" -> "\"pandas_udf(id)\"", "dataType" -> "VARIANT"))
-  }
-
-  test("complex variant output to pandas grouped agg UDF") {
-    assume(shouldTestPandasUDFs)
-    val df = spark.range(0, 10).toDF("id")
-
-    val testUdf = TestGroupedAggPandasUDFReturnComplexVariant(name = "pandas_udf")
-    checkError(
-      exception = intercept[AnalysisException] {
-        df.agg(testUdf(df("id"))).collect()
-      },
-      errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-      parameters = Map(
-        "sqlExpr" -> "\"pandas_udf(id)\"",
-        "dataType" -> "STRUCT<a: STRUCT<v: VARIANT>>"))
-  }
-
   test("SPARK-34265: Instrument Python UDF execution using SQL Metrics") {
     assume(shouldTestPythonUDFs)
     val pythonSQLMetrics = List(
@@ -174,7 +119,7 @@ class PythonUDFSuite extends QueryTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.range(1).select(transform(array("id"), x => pythonTestUDF(x))).collect()
       },
-      errorClass = "UNSUPPORTED_FEATURE.LAMBDA_FUNCTION_WITH_PYTHON_UDF",
+      condition = "UNSUPPORTED_FEATURE.LAMBDA_FUNCTION_WITH_PYTHON_UDF",
       parameters = Map("funcName" -> "\"pyUDF(namedlambdavariable())\""),
       context = ExpectedContext(
         "transform", s".*${this.getClass.getSimpleName}.*"))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
index 1cd469a8cae29..8c20a40fede72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/PythonUDTFSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.python
 
 import org.apache.spark.api.python.PythonEvalType
-import org.apache.spark.sql.{AnalysisException, IntegratedUDFTestUtils, QueryTest, Row}
+import org.apache.spark.sql.{IntegratedUDFTestUtils, QueryTest, Row}
 import org.apache.spark.sql.catalyst.expressions.{Add, Alias, Expression, FunctionTableSubqueryArgumentExpression, Literal}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, OneRowRelation, Project, Repartition, RepartitionByExpression, Sort, SubqueryAlias}
@@ -124,84 +124,6 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
     checkAnswer(df, Seq(Row(1, 2, -1), Row(1, 2, 1), Row(1, 2, 3)))
   }
 
-  test("Simple variant input UDTF") {
-    assume(shouldTestPythonUDFs)
-    withTempView("t") {
-      spark.udtf.registerPython("variantInputUDTF", variantInputUDTF)
-      spark.range(0, 10).selectExpr("parse_json(cast(id as string)) v").createOrReplaceTempView("t")
-      checkError(
-        exception = intercept[AnalysisException] {
-          spark.sql("select udtf.* from t, lateral variantInputUDTF(v) udtf").collect()
-        },
-        errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-        parameters = Map(
-          "sqlExpr" -> """"InputVariantUDTF\(outer\(v#\d+\)\)"""",
-          "dataType" -> "VARIANT"),
-        matchPVals = true,
-        queryContext = Array(ExpectedContext(
-          fragment = "variantInputUDTF(v) udtf",
-          start = 30,
-          stop = 53)))
-    }
-  }
-
-  test("Complex variant input UDTF") {
-    assume(shouldTestPythonUDFs)
-    withTempView("t") {
-      spark.udtf.registerPython("variantInputUDTF", variantInputUDTF)
-      spark.range(0, 10)
-        .selectExpr("map(id, parse_json(cast(id as string))) map_v")
-        .createOrReplaceTempView("t")
-      checkError(
-        exception = intercept[AnalysisException] {
-          spark.sql("select udtf.* from t, lateral variantInputUDTF(map_v) udtf").collect()
-        },
-        errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_INPUT_TYPE",
-        parameters = Map(
-          "sqlExpr" -> """"InputVariantUDTF\(outer\(map_v#\d+\)\)"""",
-          "dataType" -> "MAP<BIGINT, VARIANT>"),
-        matchPVals = true,
-        queryContext = Array(ExpectedContext(
-          fragment = "variantInputUDTF(map_v) udtf",
-          start = 30,
-          stop = 57)))
-    }
-  }
-
-  test("Simple variant output UDTF") {
-    assume(shouldTestPythonUDFs)
-      spark.udtf.registerPython("variantOutUDTF", variantOutputUDTF)
-      checkError(
-        exception = intercept[AnalysisException] {
-          spark.sql("select * from variantOutUDTF()").collect()
-        },
-        errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-        parameters = Map(
-          "sqlExpr" -> "\"SimpleOutputVariantUDTF()\"",
-          "dataType" -> "VARIANT"),
-        context = ExpectedContext(
-          fragment = "variantOutUDTF()",
-          start = 14,
-          stop = 29))
-  }
-
-  test("Complex variant output UDTF") {
-    assume(shouldTestPythonUDFs)
-      spark.udtf.registerPython("arrayOfVariantOutUDTF", arrayOfVariantOutputUDTF)
-      checkError(
-        exception = intercept[AnalysisException] {
-          spark.sql("select * from arrayOfVariantOutUDTF()").collect()
-        },
-        errorClass = "DATATYPE_MISMATCH.UNSUPPORTED_UDF_OUTPUT_TYPE",
-        parameters = Map(
-          "sqlExpr" -> "\"OutputArrayOfVariantUDTF()\"",
-          "dataType" -> "ARRAY<VARIANT>"),
-        context = ExpectedContext(
-          fragment = "arrayOfVariantOutUDTF()",
-          start = 14,
-          stop = 36))
-  }
-
   test("PythonUDTF with lateral join") {
     assume(shouldTestPythonUDFs)
     withTempView("t") {
@@ -299,7 +221,7 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
         _, false, RepartitionByExpression(
           _, Project(
             _, SubqueryAlias(
-              _, _: LocalRelation)), _, _)) =>
+              _, _: LocalRelation)), _, _), _) =>
       case other =>
         failure(other)
     }
@@ -313,23 +235,10 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
       case Sort(
         _, false, Repartition(
           1, true, SubqueryAlias(
-            _, _: LocalRelation))) =>
+            _, _: LocalRelation)), _) =>
       case other =>
         failure(other)
     }
-    withTable("t") {
-      sql("create table t(col array<int>) using parquet")
-      val query = "select * from explode(table(t))"
-      checkErrorMatchPVals(
-        exception = intercept[AnalysisException](sql(query)),
-        errorClass = "UNSUPPORTED_SUBQUERY_EXPRESSION_CATEGORY.UNSUPPORTED_TABLE_ARGUMENT",
-        sqlState = None,
-        parameters = Map("treeNode" -> "(?s).*"),
-        context = ExpectedContext(
-          fragment = "table(t)",
-          start = 22,
-          stop = 29))
-    }
 
     spark.udtf.registerPython(UDTFCountSumLast.name, pythonUDTFCountSumLast)
     var plan = sql(
@@ -501,7 +410,7 @@ class PythonUDTFSuite extends QueryTest with SharedSparkSession {
           |  WITH SINGLE PARTITION
           |  ORDER BY device_id, data_ds)
           |""".stripMargin)),
-      errorClass = "_LEGACY_ERROR_TEMP_0064",
+      condition = "_LEGACY_ERROR_TEMP_0064",
       parameters = Map("msg" ->
         ("The table function call includes a table argument with an invalid " +
           "partitioning/ordering specification: the ORDER BY clause included multiple " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala
new file mode 100644
index 0000000000000..e05264825f773
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/python/TransformWithStateInPandasStateServerSuite.scala
@@ -0,0 +1,580 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.python
+
+import java.io.DataOutputStream
+import java.net.ServerSocket
+
+import scala.collection.mutable
+
+import com.google.protobuf.ByteString
+import org.mockito.ArgumentMatchers.{any, argThat}
+import org.mockito.Mockito.{mock, times, verify, when}
+import org.scalatest.BeforeAndAfterEach
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{Encoder, Row}
+import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.execution.streaming.{StatefulProcessorHandleImpl, StatefulProcessorHandleState}
+import org.apache.spark.sql.execution.streaming.state.StateMessage
+import org.apache.spark.sql.execution.streaming.state.StateMessage.{AppendList, AppendValue, Clear, ContainsKey, DeleteTimer, Exists, ExpiryTimerRequest, Get, GetProcessingTime, GetValue, GetWatermark, HandleState, Keys, ListStateCall, ListStateGet, ListStatePut, ListTimers, MapStateCall, RegisterTimer, RemoveKey, SetHandleState, StateCallCommand, StatefulProcessorCall, TimerRequest, TimerStateCallCommand, TimerValueRequest, UpdateValue, Values, ValueStateCall, ValueStateUpdate}
+import org.apache.spark.sql.streaming.{ListState, MapState, TTLConfig, ValueState}
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
+
+class TransformWithStateInPandasStateServerSuite extends SparkFunSuite with BeforeAndAfterEach {
+  val stateName = "test"
+  val iteratorId = "testId"
+  val serverSocket: ServerSocket = mock(classOf[ServerSocket])
+  val groupingKeySchema: StructType = StructType(Seq())
+  val stateSchema: StructType = StructType(Array(StructField("value", IntegerType)))
+  // Below byte array is a serialized row with a single integer value 1.
+  val byteArray: Array[Byte] = Array(0x80.toByte, 0x05.toByte, 0x95.toByte, 0x05.toByte,
+    0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte, 0x00.toByte,
+    'K'.toByte, 0x01.toByte, 0x85.toByte, 0x94.toByte, '.'.toByte
+  )
+
+  var statefulProcessorHandle: StatefulProcessorHandleImpl =
+    mock(classOf[StatefulProcessorHandleImpl])
+  var outputStream: DataOutputStream = _
+  var valueState: ValueState[Row] = _
+  var listState: ListState[Row] = _
+  var mapState: MapState[Row, Row] = _
+  var stateServer: TransformWithStateInPandasStateServer = _
+  var stateDeserializer: ExpressionEncoder.Deserializer[Row] = _
+  var stateSerializer: ExpressionEncoder.Serializer[Row] = _
+  var transformWithStateInPandasDeserializer: TransformWithStateInPandasDeserializer = _
+  var arrowStreamWriter: BaseStreamingArrowWriter = _
+  var batchTimestampMs: Option[Long] = _
+  var eventTimeWatermarkForEviction: Option[Long] = _
+  var valueStateMap: mutable.HashMap[String, ValueStateInfo] = mutable.HashMap()
+  var listStateMap: mutable.HashMap[String, ListStateInfo] = mutable.HashMap()
+  var mapStateMap: mutable.HashMap[String, MapStateInfo] = mutable.HashMap()
+  var expiryTimerIter: Iterator[(Any, Long)] = _
+  var listTimerMap: mutable.HashMap[String, Iterator[Long]] = mutable.HashMap()
+
+  override def beforeEach(): Unit = {
+    statefulProcessorHandle = mock(classOf[StatefulProcessorHandleImpl])
+    outputStream = mock(classOf[DataOutputStream])
+    valueState = mock(classOf[ValueState[Row]])
+    listState = mock(classOf[ListState[Row]])
+    mapState = mock(classOf[MapState[Row, Row]])
+    stateDeserializer = ExpressionEncoder(stateSchema).resolveAndBind().createDeserializer()
+    stateSerializer = ExpressionEncoder(stateSchema).resolveAndBind().createSerializer()
+    valueStateMap = mutable.HashMap[String, ValueStateInfo](stateName ->
+      ValueStateInfo(valueState, stateSchema, stateDeserializer))
+    listStateMap = mutable.HashMap[String, ListStateInfo](stateName ->
+      ListStateInfo(listState, stateSchema, stateDeserializer, stateSerializer))
+    mapStateMap = mutable.HashMap[String, MapStateInfo](stateName ->
+      MapStateInfo(mapState, stateSchema, stateSchema, stateDeserializer,
+        stateSerializer, stateDeserializer, stateSerializer))
+
+    // Iterator map for list/map state. Please note that `handleImplicitGroupingKeyRequest` would
+    // reset the iterator map to empty so be careful to call it if you want to access the iterator
+    // map later.
+    val testRow = getIntegerRow(1)
+    expiryTimerIter = Iterator.single(testRow, 1L /* a random long type value */)
+    val iteratorMap = mutable.HashMap[String, Iterator[Row]](iteratorId -> Iterator(testRow))
+    val keyValueIteratorMap = mutable.HashMap[String, Iterator[(Row, Row)]](iteratorId ->
+      Iterator((testRow, testRow)))
+    listTimerMap = mutable.HashMap[String, Iterator[Long]](iteratorId -> Iterator(1L))
+    transformWithStateInPandasDeserializer = mock(classOf[TransformWithStateInPandasDeserializer])
+    arrowStreamWriter = mock(classOf[BaseStreamingArrowWriter])
+    batchTimestampMs = mock(classOf[Option[Long]])
+    eventTimeWatermarkForEviction = mock(classOf[Option[Long]])
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false, 2,
+      batchTimestampMs, eventTimeWatermarkForEviction,
+      outputStream, valueStateMap, transformWithStateInPandasDeserializer, arrowStreamWriter,
+      listStateMap, iteratorMap, mapStateMap, keyValueIteratorMap, expiryTimerIter, listTimerMap)
+    when(transformWithStateInPandasDeserializer.readArrowBatches(any))
+      .thenReturn(Seq(getIntegerRow(1)))
+  }
+
+  test("set handle state") {
+    val message = StatefulProcessorCall.newBuilder().setSetHandleState(
+      SetHandleState.newBuilder().setState(HandleState.CREATED).build()).build()
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle).setHandleState(StatefulProcessorHandleState.CREATED)
+    verify(outputStream).writeInt(0)
+  }
+
+  Seq(true, false).foreach { useTTL =>
+    test(s"get value state, useTTL=$useTTL") {
+      val stateCallCommandBuilder = StateCallCommand.newBuilder()
+        .setStateName("newName")
+        .setSchema("StructType(List(StructField(value,IntegerType,true)))")
+      if (useTTL) {
+        stateCallCommandBuilder.setTtl(StateMessage.TTLConfig.newBuilder().setDurationMs(1000))
+      }
+      val message = StatefulProcessorCall
+        .newBuilder()
+        .setGetValueState(stateCallCommandBuilder.build())
+        .build()
+      stateServer.handleStatefulProcessorCall(message)
+      if (useTTL) {
+        verify(statefulProcessorHandle)
+          .getValueState[Row](any[String], any[Encoder[Row]], any[TTLConfig])
+      } else {
+        verify(statefulProcessorHandle).getValueState[Row](any[String], any[Encoder[Row]],
+          any[TTLConfig])
+      }
+      verify(outputStream).writeInt(0)
+    }
+  }
+
+  Seq(true, false).foreach { useTTL =>
+    test(s"get list state, useTTL=$useTTL") {
+      val stateCallCommandBuilder = StateCallCommand.newBuilder()
+        .setStateName("newName")
+        .setSchema("StructType(List(StructField(value,IntegerType,true)))")
+      if (useTTL) {
+        stateCallCommandBuilder.setTtl(StateMessage.TTLConfig.newBuilder().setDurationMs(1000))
+      }
+      val message = StatefulProcessorCall
+        .newBuilder()
+        .setGetListState(stateCallCommandBuilder.build())
+        .build()
+      stateServer.handleStatefulProcessorCall(message)
+      if (useTTL) {
+        verify(statefulProcessorHandle)
+          .getListState[Row](any[String], any[Encoder[Row]], any[TTLConfig])
+      } else {
+        verify(statefulProcessorHandle).getListState[Row](any[String], any[Encoder[Row]],
+          any[TTLConfig])
+      }
+      verify(outputStream).writeInt(0)
+    }
+  }
+
+  Seq(true, false).foreach { useTTL =>
+    test(s"get map state, useTTL=$useTTL") {
+      val stateCallCommandBuilder = StateCallCommand.newBuilder()
+        .setStateName("newName")
+        .setSchema("StructType(List(StructField(value,IntegerType,true)))")
+        .setMapStateValueSchema("StructType(List(StructField(value,IntegerType,true)))")
+      if (useTTL) {
+        stateCallCommandBuilder.setTtl(StateMessage.TTLConfig.newBuilder().setDurationMs(1000))
+      }
+      val message = StatefulProcessorCall
+        .newBuilder()
+        .setGetMapState(stateCallCommandBuilder.build())
+        .build()
+      stateServer.handleStatefulProcessorCall(message)
+      if (useTTL) {
+        verify(statefulProcessorHandle)
+          .getMapState[Row, Row](any[String], any[Encoder[Row]], any[Encoder[Row]], any[TTLConfig])
+      } else {
+        verify(statefulProcessorHandle).getMapState[Row, Row](any[String], any[Encoder[Row]],
+          any[Encoder[Row]], any[TTLConfig])
+      }
+      verify(outputStream).writeInt(0)
+    }
+  }
+
+  test("delete if exists") {
+    val stateCallCommandBuilder = StateCallCommand.newBuilder()
+      .setStateName("stateName")
+    val message = StatefulProcessorCall
+      .newBuilder()
+      .setDeleteIfExists(stateCallCommandBuilder.build())
+      .build()
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle).deleteIfExists(any[String])
+  }
+
+  test("value state exists") {
+    val message = ValueStateCall.newBuilder().setStateName(stateName)
+      .setExists(Exists.newBuilder().build()).build()
+    stateServer.handleValueStateRequest(message)
+    verify(valueState).exists()
+  }
+
+  test("value state get") {
+    val message = ValueStateCall.newBuilder().setStateName(stateName)
+      .setGet(Get.newBuilder().build()).build()
+    val schema = new StructType().add("value", "int")
+    when(valueState.getOption()).thenReturn(Some(getIntegerRow(1)))
+    stateServer.handleValueStateRequest(message)
+    verify(valueState).getOption()
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
+  test("value state get - not exist") {
+    val message = ValueStateCall.newBuilder().setStateName(stateName)
+      .setGet(Get.newBuilder().build()).build()
+    when(valueState.getOption()).thenReturn(None)
+    stateServer.handleValueStateRequest(message)
+    verify(valueState).getOption()
+    // We don't throw exception when value doesn't exist.
+    verify(outputStream).writeInt(0)
+  }
+
+  test("value state get - not initialized") {
+    val nonExistMessage = ValueStateCall.newBuilder().setStateName("nonExist")
+      .setGet(Get.newBuilder().build()).build()
+    stateServer.handleValueStateRequest(nonExistMessage)
+    verify(valueState, times(0)).getOption()
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
+  test("value state clear") {
+    val message = ValueStateCall.newBuilder().setStateName(stateName)
+      .setClear(Clear.newBuilder().build()).build()
+    stateServer.handleValueStateRequest(message)
+    verify(valueState).clear()
+    verify(outputStream).writeInt(0)
+  }
+
+  test("value state update") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = ValueStateCall.newBuilder().setStateName(stateName)
+      .setValueStateUpdate(ValueStateUpdate.newBuilder().setValue(byteString).build()).build()
+    stateServer.handleValueStateRequest(message)
+    verify(valueState).update(any[Row])
+    verify(outputStream).writeInt(0)
+  }
+
+  test("list state exists") {
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setExists(Exists.newBuilder().build()).build()
+    stateServer.handleListStateRequest(message)
+    verify(listState).exists()
+  }
+
+  test("list state get - iterator in map") {
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setListStateGet(ListStateGet.newBuilder().setIteratorId(iteratorId).build()).build()
+    stateServer.handleListStateRequest(message)
+    verify(listState, times(0)).get()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("list state get - iterator in map with multiple batches") {
+    val maxRecordsPerBatch = 2
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setListStateGet(ListStateGet.newBuilder().setIteratorId(iteratorId).build()).build()
+    val iteratorMap = mutable.HashMap[String, Iterator[Row]](iteratorId ->
+      Iterator(getIntegerRow(1), getIntegerRow(2), getIntegerRow(3), getIntegerRow(4)))
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction, outputStream,
+      valueStateMap, transformWithStateInPandasDeserializer, arrowStreamWriter,
+      listStateMap, iteratorMap)
+    // First call should send 2 records.
+    stateServer.handleListStateRequest(message)
+    verify(listState, times(0)).get()
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+    // Second call should send the remaining 2 records.
+    stateServer.handleListStateRequest(message)
+    verify(listState, times(0)).get()
+    // Since Mockito's verify counts the total number of calls, the expected number of writeRow call
+    // should be 2 * maxRecordsPerBatch.
+    verify(arrowStreamWriter, times(2 * maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter, times(2)).finalizeCurrentArrowBatch()
+  }
+
+  test("list state get - iterator not in map") {
+    val maxRecordsPerBatch = 2
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setListStateGet(ListStateGet.newBuilder().setIteratorId(iteratorId).build()).build()
+    val iteratorMap: mutable.HashMap[String, Iterator[Row]] = mutable.HashMap()
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction, outputStream,
+      valueStateMap, transformWithStateInPandasDeserializer, arrowStreamWriter,
+      listStateMap, iteratorMap)
+    when(listState.get()).thenReturn(Iterator(getIntegerRow(1), getIntegerRow(2), getIntegerRow(3)))
+    stateServer.handleListStateRequest(message)
+    verify(listState).get()
+    // Verify that only maxRecordsPerBatch (2) rows are written to the output stream while still
+    // having 1 row left in the iterator.
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("list state put") {
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setListStatePut(ListStatePut.newBuilder().build()).build()
+    stateServer.handleListStateRequest(message)
+    verify(transformWithStateInPandasDeserializer).readArrowBatches(any)
+    verify(listState).put(any)
+  }
+
+  test("list state append value") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setAppendValue(AppendValue.newBuilder().setValue(byteString).build()).build()
+    stateServer.handleListStateRequest(message)
+    verify(listState).appendValue(any[Row])
+  }
+
+  test("list state append list") {
+    val message = ListStateCall.newBuilder().setStateName(stateName)
+      .setAppendList(AppendList.newBuilder().build()).build()
+    stateServer.handleListStateRequest(message)
+    verify(transformWithStateInPandasDeserializer).readArrowBatches(any)
+    verify(listState).appendList(any)
+  }
+
+  test("map state exists") {
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setExists(Exists.newBuilder().build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).exists()
+  }
+
+  test("map state get") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setGetValue(GetValue.newBuilder().setUserKey(byteString).build()).build()
+    val schema = new StructType().add("value", "int")
+    when(mapState.getValue(any[Row])).thenReturn(getIntegerRow(1))
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).getValue(any[Row])
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
+  test("map state contains key") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setContainsKey(ContainsKey.newBuilder().setUserKey(byteString).build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).containsKey(any[Row])
+  }
+
+  test("map state update value") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setUpdateValue(UpdateValue.newBuilder().setUserKey(byteString).setValue(byteString).build())
+      .build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).updateValue(any[Row], any[Row])
+  }
+
+  test("map state iterator - iterator in map") {
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setIterator(StateMessage.Iterator.newBuilder().setIteratorId(iteratorId).build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState, times(0)).iterator()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("map state iterator - iterator in map with multiple batches") {
+    val maxRecordsPerBatch = 2
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setIterator(StateMessage.Iterator.newBuilder().setIteratorId(iteratorId).build()).build()
+    val keyValueIteratorMap = mutable.HashMap[String, Iterator[(Row, Row)]](iteratorId ->
+      Iterator((getIntegerRow(1), getIntegerRow(1)), (getIntegerRow(2), getIntegerRow(2)),
+        (getIntegerRow(3), getIntegerRow(3)), (getIntegerRow(4), getIntegerRow(4))))
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction, outputStream,
+      valueStateMap, transformWithStateInPandasDeserializer, arrowStreamWriter,
+      listStateMap, null, mapStateMap, keyValueIteratorMap)
+    // First call should send 2 records.
+    stateServer.handleMapStateRequest(message)
+    verify(mapState, times(0)).iterator()
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+    // Second call should send the remaining 2 records.
+    stateServer.handleMapStateRequest(message)
+    verify(mapState, times(0)).iterator()
+    // Since Mockito's verify counts the total number of calls, the expected number of writeRow call
+    // should be 2 * maxRecordsPerBatch.
+    verify(arrowStreamWriter, times(2 * maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter, times(2)).finalizeCurrentArrowBatch()
+  }
+
+  test("map state iterator - iterator not in map") {
+    val maxRecordsPerBatch = 2
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setIterator(StateMessage.Iterator.newBuilder().setIteratorId(iteratorId).build()).build()
+    val keyValueIteratorMap: mutable.HashMap[String, Iterator[(Row, Row)]] = mutable.HashMap()
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction,
+      outputStream, valueStateMap, transformWithStateInPandasDeserializer,
+      arrowStreamWriter, listStateMap, null, mapStateMap, keyValueIteratorMap)
+    when(mapState.iterator()).thenReturn(Iterator((getIntegerRow(1), getIntegerRow(1)),
+      (getIntegerRow(2), getIntegerRow(2)), (getIntegerRow(3), getIntegerRow(3))))
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).iterator()
+    // Verify that only maxRecordsPerBatch (2) rows are written to the output stream while still
+    // having 1 row left in the iterator.
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("map state keys - iterator in map") {
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setKeys(Keys.newBuilder().setIteratorId(iteratorId).build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState, times(0)).keys()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("map state keys - iterator not in map") {
+    val maxRecordsPerBatch = 2
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setKeys(Keys.newBuilder().setIteratorId(iteratorId).build()).build()
+    val iteratorMap: mutable.HashMap[String, Iterator[Row]] = mutable.HashMap()
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction,
+      outputStream, valueStateMap, transformWithStateInPandasDeserializer,
+      arrowStreamWriter, listStateMap, iteratorMap, mapStateMap)
+    when(mapState.keys()).thenReturn(Iterator(getIntegerRow(1), getIntegerRow(2), getIntegerRow(3)))
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).keys()
+    // Verify that only maxRecordsPerBatch (2) rows are written to the output stream while still
+    // having 1 row left in the iterator.
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("map state values - iterator in map") {
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setValues(Values.newBuilder().setIteratorId(iteratorId).build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState, times(0)).values()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("map state values - iterator not in map") {
+    val maxRecordsPerBatch = 2
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setValues(Values.newBuilder().setIteratorId(iteratorId).build()).build()
+    val iteratorMap: mutable.HashMap[String, Iterator[Row]] = mutable.HashMap()
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      maxRecordsPerBatch, batchTimestampMs, eventTimeWatermarkForEviction, outputStream,
+      valueStateMap, transformWithStateInPandasDeserializer,
+      arrowStreamWriter, listStateMap, iteratorMap, mapStateMap)
+    when(mapState.values()).thenReturn(Iterator(getIntegerRow(1), getIntegerRow(2),
+      getIntegerRow(3)))
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).values()
+    // Verify that only maxRecordsPerBatch (2) rows are written to the output stream while still
+    // having 1 row left in the iterator.
+    verify(arrowStreamWriter, times(maxRecordsPerBatch)).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("remove key") {
+    val byteString: ByteString = ByteString.copyFrom(byteArray)
+    val message = MapStateCall.newBuilder().setStateName(stateName)
+      .setRemoveKey(RemoveKey.newBuilder().setUserKey(byteString).build()).build()
+    stateServer.handleMapStateRequest(message)
+    verify(mapState).removeKey(any[Row])
+  }
+
+  test("timer value get processing time") {
+    val message = TimerRequest.newBuilder().setTimerValueRequest(
+      TimerValueRequest.newBuilder().setGetProcessingTimer(
+        GetProcessingTime.newBuilder().build()
+      ).build()
+    ).build()
+    stateServer.handleTimerRequest(message)
+    verify(batchTimestampMs).isDefined
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
+  test("timer value get watermark") {
+    val message = TimerRequest.newBuilder().setTimerValueRequest(
+      TimerValueRequest.newBuilder().setGetWatermark(
+        GetWatermark.newBuilder().build()
+      ).build()
+    ).build()
+    stateServer.handleTimerRequest(message)
+    verify(eventTimeWatermarkForEviction).isDefined
+    verify(outputStream).writeInt(argThat((x: Int) => x > 0))
+  }
+
+  test("get expiry timers") {
+    val message = TimerRequest.newBuilder().setExpiryTimerRequest(
+      ExpiryTimerRequest.newBuilder().setExpiryTimestampMs(
+        10L
+      ).build()
+    ).build()
+    stateServer.handleTimerRequest(message)
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("stateful processor register timer") {
+    val message = StatefulProcessorCall.newBuilder().setTimerStateCall(
+      TimerStateCallCommand.newBuilder()
+        .setRegister(RegisterTimer.newBuilder().setExpiryTimestampMs(10L).build())
+        .build()
+    ).build()
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle).registerTimer(any[Long])
+    verify(outputStream).writeInt(0)
+  }
+
+  test("stateful processor delete timer") {
+    val message = StatefulProcessorCall.newBuilder().setTimerStateCall(
+      TimerStateCallCommand.newBuilder()
+        .setDelete(DeleteTimer.newBuilder().setExpiryTimestampMs(10L).build())
+        .build()
+    ).build()
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle).deleteTimer(any[Long])
+    verify(outputStream).writeInt(0)
+  }
+
+  test("stateful processor list timer - iterator in map") {
+    val message = StatefulProcessorCall.newBuilder().setTimerStateCall(
+      TimerStateCallCommand.newBuilder()
+        .setList(ListTimers.newBuilder().setIteratorId(iteratorId).build())
+        .build()
+    ).build()
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle, times(0)).listTimers()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  test("stateful processor list timer - iterator not in map") {
+    val message = StatefulProcessorCall.newBuilder().setTimerStateCall(
+      TimerStateCallCommand.newBuilder()
+        .setList(ListTimers.newBuilder().setIteratorId("non-exist").build())
+        .build()
+    ).build()
+    stateServer = new TransformWithStateInPandasStateServer(serverSocket,
+      statefulProcessorHandle, groupingKeySchema, "", false, false,
+      2, batchTimestampMs, eventTimeWatermarkForEviction, outputStream,
+      valueStateMap, transformWithStateInPandasDeserializer,
+      arrowStreamWriter, listStateMap, null, mapStateMap, null,
+      null, listTimerMap)
+    when(statefulProcessorHandle.listTimers()).thenReturn(Iterator(1))
+    stateServer.handleStatefulProcessorCall(message)
+    verify(statefulProcessorHandle, times(1)).listTimers()
+    verify(arrowStreamWriter).writeRow(any)
+    verify(arrowStreamWriter).finalizeCurrentArrowBatch()
+  }
+
+  private def getIntegerRow(value: Int): Row = {
+    new GenericRowWithSchema(Array(value), stateSchema)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
index 42eb9fa17a210..808ffe036f89d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/CompactibleFileStreamLogSuite.scala
@@ -247,7 +247,7 @@ class CompactibleFileStreamLogSuite extends SharedSparkSession {
           exception = intercept[SparkUnsupportedOperationException] {
             compactibleLog.purge(2)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_2260",
+          condition = "_LEGACY_ERROR_TEMP_2260",
           parameters = Map.empty)
 
         // Below line would fail with IllegalStateException if we don't prevent purge:
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSessionsIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSessionsIteratorSuite.scala
index e550d8ef46085..8ed63f5680b0f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSessionsIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSessionsIteratorSuite.scala
@@ -197,7 +197,7 @@ class MergingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "Input iterator is not sorted based on session!"))
 
     // afterwards, calling either hasNext() or next() will throw IllegalStateException
@@ -205,14 +205,14 @@ class MergingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.hasNext
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
 
     checkError(
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSortWithSessionWindowStateIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSortWithSessionWindowStateIteratorSuite.scala
index fc0c239a5d996..55e22c6771bca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSortWithSessionWindowStateIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/MergingSortWithSessionWindowStateIteratorSuite.scala
@@ -209,7 +209,8 @@ class MergingSortWithSessionWindowStateIteratorSuite extends StreamTest with Bef
       f: (StreamingSessionWindowStateManager, StateStore) => Unit): Unit = {
     withTempDir { file =>
       val storeConf = new StateStoreConf()
-      val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
+      val stateInfo = StatefulOperatorStateInfo(
+        file.getAbsolutePath, UUID.randomUUID, 0, 0, 5, None)
 
       val manager = StreamingSessionWindowStateManager.createStateManager(
         keysWithoutSessionAttributes,
@@ -221,7 +222,7 @@ class MergingSortWithSessionWindowStateIteratorSuite extends StreamTest with Bef
       val store = StateStore.get(
         storeProviderId, manager.getStateKeySchema, manager.getStateValueSchema,
         PrefixKeyScanStateEncoderSpec(manager.getStateKeySchema, manager.getNumColsForPrefixKey),
-        stateInfo.storeVersion, useColumnFamilies = false, storeConf, new Configuration)
+        stateInfo.storeVersion, None, useColumnFamilies = false, storeConf, new Configuration)
 
       try {
         f(manager, store)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
index 88af5cfddb487..187eda5d36f61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/UpdatingSessionsIteratorSuite.scala
@@ -270,7 +270,7 @@ class UpdatingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator must be sorted by key and session start!"))
 
     // afterwards, calling either hasNext() or next() will throw IllegalStateException
@@ -278,14 +278,14 @@ class UpdatingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.hasNext
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
 
     checkError(
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
   }
 
@@ -312,7 +312,7 @@ class UpdatingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator must be sorted by key and session start!"))
 
     // afterwards, calling either hasNext() or next() will throw IllegalStateException
@@ -320,14 +320,14 @@ class UpdatingSessionsIteratorSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         iterator.hasNext
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
 
     checkError(
       exception = intercept[SparkException] {
         iterator.next()
       },
-      errorClass = "INTERNAL_ERROR",
+      condition = "INTERNAL_ERROR",
       parameters = Map("message" -> "The iterator is already corrupted."))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/WatermarkTrackerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/WatermarkTrackerSuite.scala
new file mode 100644
index 0000000000000..6018d286fc21e
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/WatermarkTrackerSuite.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.util.UUID
+
+import scala.collection.mutable
+
+import org.apache.spark.sql.execution.{SparkPlan, UnionExec}
+import org.apache.spark.sql.functions.timestamp_seconds
+import org.apache.spark.sql.streaming.StreamTest
+
+class WatermarkTrackerSuite extends StreamTest {
+
+  import testImplicits._
+
+  test("SPARK-50046 proper watermark advancement with dropped watermark nodes") {
+    val inputStream1 = MemoryStream[Int]
+    val inputStream2 = MemoryStream[Int]
+    val inputStream3 = MemoryStream[Int]
+
+    val df1 = inputStream1.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "10 seconds")
+
+    val df2 = inputStream2.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "20 seconds")
+
+    val df3 = inputStream3.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "30 seconds")
+
+    val union = df1.union(df2).union(df3)
+
+    testStream(union)(
+      // just to ensure that executedPlan has watermark nodes for every stream.
+      MultiAddData(
+        (inputStream1, Seq(0)),
+        (inputStream2, Seq(0)),
+        (inputStream3, Seq(0))
+      ),
+      ProcessAllAvailable(),
+      Execute { q =>
+        val initialPlan = q.logicalPlan
+        val executedPlan = q.lastExecution.executedPlan
+
+        val tracker = WatermarkTracker(spark.conf, initialPlan)
+        tracker.setWatermark(5)
+
+        val delayMsToNodeId = executedPlan.collect {
+          case e: EventTimeWatermarkExec => e.delayMs -> e.nodeId
+        }.toMap
+
+        def setupScenario(
+            data: Map[Long, Seq[Long]])(fnToPruneSubtree: UnionExec => UnionExec): SparkPlan = {
+          val eventTimeStatsMap = new mutable.HashMap[Long, EventTimeStatsAccum]()
+          executedPlan.foreach {
+            case e: EventTimeWatermarkExec =>
+              eventTimeStatsMap.put(e.delayMs, e.eventTimeStats)
+
+            case _ =>
+          }
+
+          data.foreach { case (delayMs, values) =>
+            val stats = eventTimeStatsMap(delayMs)
+            values.foreach { value =>
+              stats.add(value)
+            }
+          }
+
+          executedPlan.transform {
+            case e: UnionExec => fnToPruneSubtree(e)
+          }
+        }
+
+        def verifyWatermarkMap(expectation: Map[UUID, Option[Long]]): Unit = {
+          expectation.foreach { case (nodeId, watermarkValue) =>
+            assert(tracker.watermarkMap(nodeId) === watermarkValue,
+            s"Watermark value for nodeId $nodeId is ${tracker.watermarkMap(nodeId)}, where " +
+              s"we expect $watermarkValue")
+          }
+        }
+
+        // Before SPARK-50046, WatermarkTracker simply assumes that the watermark node won't
+        // be ever dropped, and the order of watermark nodes won't be changed. We don't find
+        // a case which breaks this, but it had been happening for other operators (e.g.
+        // PruneFilters), hence we would be better to guard against this in prior.
+
+        // Scenario: We have three streams with watermark defined per stream. The query has
+        // executed the first batch in the query run, and (due to some reason) Spark drops one
+        // of subtrees. This should be considered like stream being a part of dropped subtree
+        // had no data (because we do not know), hence watermark should not be advanced. But
+        // before SPARK-50046, WatermarkTracker does not indicate there were watermark node being
+        // dropped, hence watermark is advanced based on the calculation with remaining two
+        // streams.
+
+        val executedPlanFor1stBatch = setupScenario(
+          Map(
+            // watermark value for this node: 22 - 10 = 12
+            10000L -> Seq(20000L, 21000L, 22000L),
+            // watermark value for this node: 42 - 20 = 22
+            20000L -> Seq(40000L, 41000L, 42000L),
+            // watermark value for this node: 62 - 30 = 32
+            30000L -> Seq(60000L, 61000L, 62000L)
+          )
+        ) { unionExec =>
+          // drop the subtree which has watermark node having delay 10 seconds
+          unionExec.copy(unionExec.children.drop(1))
+        }
+
+        tracker.updateWatermark(executedPlanFor1stBatch)
+
+        // watermark hasn't advanced, hence taking default value.
+        assert(tracker.currentWatermark === 5)
+
+        verifyWatermarkMap(
+          Map(
+            delayMsToNodeId(10000L) -> None,
+            delayMsToNodeId(20000L) -> Some(22000L),
+            delayMsToNodeId(30000L) -> Some(32000L))
+        )
+
+        // NOTE: Before SPARK-50046, the above verification failed and the below verification works.
+        // WatermarkTracker can't track the dropped node, hence it advances the watermark from the
+        // remaining nodes, hence min(22, 32) = 22
+        //
+        // assert(tracker.currentWatermark === 22000)
+        //
+        // WatermarkTracker updates the map with shifted index. It should only update index 1 and
+        // 2, but it updates 0 and 1.
+        // verifyWatermarkMap(Map(0 -> Some(22000L), 1 -> Some(32000L)))
+
+        // Scenario: after the first batch, the query has executed the second batch. In the second
+        // batch, and (due to some reason) Spark only retains the middle of the subtrees. Before
+        // SPARK-50046, WatermarkTracker only tracks the watermark nodes from physical plan with
+        // index, hence the watermark node for the index 1 in logical plan is shifted to index 0,
+        // updating the map incorrectly and also advancing the watermark. The correct behavior is,
+        // the watermark node for the first stream has been dropped for both batches, hence
+        // watermark must not be advanced.
+
+        val executedPlanFor2ndBatch = setupScenario(
+          Map(
+            // watermark value for this node: 52 - 10 = 42
+            10000L -> Seq(50000L, 51000L, 52000L),
+            // watermark value for this node: 72 - 20 = 52
+            20000L -> Seq(70000L, 71000L, 72000L),
+            // watermark value for this node: 92 - 30 = 62
+            30000L -> Seq(90000L, 91000L, 92000L)
+          )
+        ) { unionExec =>
+          // only take the middle of the subtree, dropping remaining
+          unionExec.copy(Seq(unionExec.children(1)))
+        }
+
+        tracker.updateWatermark(executedPlanFor2ndBatch)
+
+        // watermark hasn't advanced, hence taking default value.
+        assert(tracker.currentWatermark === 5)
+
+        // WatermarkTracker properly updates the map for the middle of watermark node.
+        verifyWatermarkMap(
+          Map(
+            delayMsToNodeId(10000L) -> None,
+            delayMsToNodeId(20000L) -> Some(52000L),
+            delayMsToNodeId(30000L) -> Some(32000L))
+        )
+      }
+    )
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
index 97b95eb402b7e..b5f23853fd5b2 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ConsoleWriteSupportSuite.scala
@@ -102,7 +102,6 @@ class ConsoleWriteSupportSuite extends StreamTest {
         ||    2|
         |+-----+
         |only showing top 2 rows
-        |
         |""".stripMargin)
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
index 324717d92c972..32f92ce276a06 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/ForeachWriterSuite.scala
@@ -23,7 +23,7 @@ import scala.collection.mutable
 
 import org.scalatest.BeforeAndAfter
 
-import org.apache.spark.SparkException
+import org.apache.spark.{ExecutorDeadException, SparkException}
 import org.apache.spark.sql.ForeachWriter
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.functions.{count, timestamp_seconds, window}
@@ -128,12 +128,14 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
   testQuietly("foreach with error") {
     withTempDir { checkpointDir =>
       val input = MemoryStream[Int]
+
+      val funcEx = new RuntimeException("ForeachSinkSuite error")
       val query = input.toDS().repartition(1).writeStream
         .option("checkpointLocation", checkpointDir.getCanonicalPath)
         .foreach(new TestForeachWriter() {
           override def process(value: Int): Unit = {
             super.process(value)
-            throw new RuntimeException("ForeachSinkSuite error")
+            throw funcEx
           }
         }).start()
       input.addData(1, 2, 3, 4)
@@ -142,8 +144,13 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       val e = intercept[StreamingQueryException] {
         query.processAllAvailable()
       }
-      assert(e.getCause.isInstanceOf[SparkException])
-      assert(e.getCause.getCause.getMessage === "ForeachSinkSuite error")
+
+      val errClass = "FOREACH_USER_FUNCTION_ERROR"
+
+      // verify that we classified the exception
+      assert(e.getMessage.contains(errClass))
+      assert(e.cause.asInstanceOf[RuntimeException].getMessage == funcEx.getMessage)
+
       assert(query.isActive === false)
 
       val allEvents = ForeachWriterSuite.allEvents()
@@ -157,6 +164,23 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       assert(errorEvent.error.get.getMessage === "ForeachSinkSuite error")
       // 'close' shouldn't be called with abort message if close with error has been called
       assert(allEvents(0).size == 3)
+
+      val sparkEx = ExecutorDeadException("network error")
+      val e2 = intercept[StreamingQueryException] {
+        val query2 = input.toDS().repartition(1).writeStream
+          .foreach(new TestForeachWriter() {
+            override def process(value: Int): Unit = {
+              super.process(value)
+              throw sparkEx
+            }
+          }).start()
+        query2.processAllAvailable()
+      }
+
+      // we didn't wrap the spark exception
+      assert(!e2.getMessage.contains(errClass))
+      assert(e2.getCause.getCause.asInstanceOf[ExecutorDeadException].getMessage
+        == sparkEx.getMessage)
     }
   }
 
@@ -286,7 +310,7 @@ class ForeachWriterSuite extends StreamTest with SharedSparkSession with BeforeA
       val errorEvent = allEvents(0)(1).asInstanceOf[ForeachWriterSuite.Close]
       checkError(
         exception = errorEvent.error.get.asInstanceOf[SparkException],
-        errorClass = "_LEGACY_ERROR_TEMP_2256",
+        condition = "_LEGACY_ERROR_TEMP_2256",
         parameters = Map.empty
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
index 128b59b26b823..aa03545625ec9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RatePerMicroBatchProviderSuite.scala
@@ -202,7 +202,7 @@ class RatePerMicroBatchProviderSuite extends StreamTest {
           .schema(spark.range(1).schema)
           .load()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2242",
+      condition = "_LEGACY_ERROR_TEMP_2242",
       parameters = Map("provider" -> "RatePerMicroBatchProvider"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
index 0732e126a0131..aeb1bba31410d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/RateStreamProviderSuite.scala
@@ -208,7 +208,7 @@ class RateStreamProviderSuite extends StreamTest {
 
     checkError(
       exception = e,
-      errorClass = "INCORRECT_RAMP_UP_RATE",
+      condition = "INCORRECT_RAMP_UP_RATE",
       parameters = Map(
         "rowsPerSecond" -> Long.MaxValue.toString,
         "maxSeconds" -> "1",
@@ -229,7 +229,7 @@ class RateStreamProviderSuite extends StreamTest {
 
       checkError(
         exception = e,
-        errorClass = "INTERNAL_ERROR",
+        condition = "INTERNAL_ERROR",
         parameters = Map(
           ("message" ->
             ("Max offset with 100 rowsPerSecond is 92233720368547758, " +
@@ -352,7 +352,7 @@ class RateStreamProviderSuite extends StreamTest {
           .schema(spark.range(1).schema)
           .load()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2242",
+      condition = "_LEGACY_ERROR_TEMP_2242",
       parameters = Map("provider" -> "RateStreamProvider"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
index 87e34601dc098..2c17d75624d38 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/sources/TextSocketStreamSuite.scala
@@ -198,7 +198,7 @@ class TextSocketStreamSuite extends StreamTest with SharedSparkSession {
       exception = intercept[SparkUnsupportedOperationException] {
         spark.readStream.schema(userSpecifiedSchema).format("socket").options(params).load()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_2242",
+      condition = "_LEGACY_ERROR_TEMP_2242",
       parameters = Map("provider" -> "TextSocketSourceProvider"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
index ea6fd8ab312c9..2456999b4382a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/FlatMapGroupsWithStateExecHelperSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.execution.streaming.state
 import java.util.concurrent.atomic.AtomicInteger
 
 import org.apache.spark.sql.Encoder
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.catalyst.expressions.{GenericInternalRow, UnsafeProjection, UnsafeRow}
 import org.apache.spark.sql.execution.streaming.GroupStateImpl._
 import org.apache.spark.sql.streaming.StreamTest
@@ -201,7 +201,7 @@ class FlatMapGroupsWithStateExecHelperSuite extends StreamTest {
 
   private def newStateManager[T: Encoder](version: Int, withTimestamp: Boolean): StateManager = {
     FlatMapGroupsWithStateExecHelper.createStateManager(
-      implicitly[Encoder[T]].asInstanceOf[ExpressionEncoder[Any]],
+      encoderFor[T].asInstanceOf[ExpressionEncoder[Any]],
       withTimestamp,
       version)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
index 48816486cbd00..bb4343bf32159 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ListStateSuite.scala
@@ -22,7 +22,7 @@ import java.util.UUID
 
 import org.apache.spark.{SparkIllegalArgumentException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, ListStateImplWithTTL, StatefulProcessorHandleImpl}
 import org.apache.spark.sql.streaming.{ListState, TimeMode, TTLConfig, ValueState}
 
@@ -31,6 +31,8 @@ import org.apache.spark.sql.streaming.{ListState, TimeMode, TTLConfig, ValueStat
  * operators such as transformWithState
  */
 class ListStateSuite extends StateVariableSuiteBase {
+  import testImplicits._
+
   // overwrite useMultipleValuesPerKey in base suite to be true for list state
   override def useMultipleValuesPerKey: Boolean = true
 
@@ -38,9 +40,10 @@ class ListStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
-      val listState: ListState[Long] = handle.getListState[Long]("listState", Encoders.scalaLong)
+      val listState: ListState[Long] = handle.getListState[Long]("listState",
+        TTLConfig.NONE)
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       val e = intercept[SparkIllegalArgumentException] {
@@ -49,7 +52,7 @@ class ListStateSuite extends StateVariableSuiteBase {
 
       checkError(
         exception = e,
-        errorClass = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
+        condition = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
         sqlState = Some("42601"),
         parameters = Map("stateName" -> "listState")
       )
@@ -71,9 +74,10 @@ class ListStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
-      val testState: ListState[Long] = handle.getListState[Long]("testState", Encoders.scalaLong)
+      val testState: ListState[Long] = handle.getListState[Long]("testState",
+        TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
 
       // simple put and get test
@@ -99,10 +103,12 @@ class ListStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
-      val testState1: ListState[Long] = handle.getListState[Long]("testState1", Encoders.scalaLong)
-      val testState2: ListState[Long] = handle.getListState[Long]("testState2", Encoders.scalaLong)
+      val testState1: ListState[Long] = handle.getListState[Long]("testState1",
+        TTLConfig.NONE)
+      val testState2: ListState[Long] = handle.getListState[Long]("testState2",
+        TTLConfig.NONE)
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
 
@@ -137,12 +143,14 @@ class ListStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
-      val listState1: ListState[Long] = handle.getListState[Long]("listState1", Encoders.scalaLong)
-      val listState2: ListState[Long] = handle.getListState[Long]("listState2", Encoders.scalaLong)
+      val listState1: ListState[Long] = handle.getListState[Long]("listState1",
+        TTLConfig.NONE)
+      val listState2: ListState[Long] = handle.getListState[Long]("listState2",
+        TTLConfig.NONE)
       val valueState: ValueState[Long] = handle.getValueState[Long](
-        "valueState", Encoders.scalaLong)
+        "valueState", TTLConfig.NONE)
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       // simple put and get test
@@ -167,7 +175,7 @@ class ListStateSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
@@ -182,12 +190,12 @@ class ListStateSuite extends StateVariableSuiteBase {
       var ttlValues = testState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      var ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      var ttlStateValue = testState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
 
       // increment batchProcessingTime, or watermark and ensure expired value is not returned
       val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(ttlExpirationMs))
 
       val nextBatchTestState: ListStateImplWithTTL[String] =
@@ -204,10 +212,9 @@ class ListStateSuite extends StateVariableSuiteBase {
       ttlValues = nextBatchTestState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      ttlStateValueIterator = nextBatchTestState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
-      assert(ttlStateValueIterator.next() === ttlExpirationMs)
-      assert(ttlStateValueIterator.isEmpty)
+      ttlStateValue = nextBatchTestState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
+      assert(ttlStateValue.get === ttlExpirationMs)
 
       // getWithoutTTL should still return the expired value
       assert(nextBatchTestState.getWithoutEnforcingTTL().toSeq === Seq("v1", "v2", "v3"))
@@ -218,15 +225,15 @@ class ListStateSuite extends StateVariableSuiteBase {
     }
   }
 
-  test("test negative or zero TTL duration throws error") {
+  test("test null or negative TTL duration throws error") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val batchTimestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(batchTimestampMs))
 
-      Seq(null, Duration.ZERO, Duration.ofMinutes(-1)).foreach { ttlDuration =>
+      Seq(null, Duration.ofMinutes(-1)).foreach { ttlDuration =>
         val ttlConfig = TTLConfig(ttlDuration)
         val ex = intercept[SparkUnsupportedOperationException] {
           handle.getListState[String]("testState", Encoders.STRING, ttlConfig)
@@ -234,7 +241,7 @@ class ListStateSuite extends StateVariableSuiteBase {
 
         checkError(
           ex,
-          errorClass = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
+          condition = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
           parameters = Map(
             "operationType" -> "update",
             "stateName" -> "testState"
@@ -250,7 +257,7 @@ class ListStateSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.bean(classOf[POJOTestClass]).asInstanceOf[ExpressionEncoder[Any]],
+        encoderFor(Encoders.bean(classOf[POJOTestClass])).asInstanceOf[ExpressionEncoder[Any]],
         TimeMode.ProcessingTime(), batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
@@ -268,8 +275,8 @@ class ListStateSuite extends StateVariableSuiteBase {
       val ttlValues = testState.getTTLValues()
       assert(ttlValues.nonEmpty)
       assert(ttlValues.forall(_._2 === ttlExpirationMs))
-      val ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      val ttlStateValue = testState.getValueInTTLState()
+      assert(ttlStateValue.isDefined)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
index b5ba25518a5ea..9a0a891d538ec 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MapStateSuite.scala
@@ -22,7 +22,6 @@ import java.util.UUID
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, MapStateImplWithTTL, StatefulProcessorHandleImpl}
 import org.apache.spark.sql.streaming.{ListState, MapState, TimeMode, TTLConfig, ValueState}
 import org.apache.spark.sql.types.{BinaryType, StructType}
@@ -37,14 +36,17 @@ class MapStateSuite extends StateVariableSuiteBase {
     .add("key", BinaryType)
     .add("userKey", BinaryType)
 
+  import testImplicits._
+
   test("Map state operations for single instance") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState: MapState[String, Double] =
-        handle.getMapState[String, Double]("testState", Encoders.STRING, Encoders.scalaDouble)
+        handle.getMapState[String, Double]("testState", Encoders.STRING, Encoders.scalaDouble,
+          TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       // put initial value
       testState.updateValue("k1", 1.0)
@@ -75,12 +77,13 @@ class MapStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState1: MapState[Long, Double] =
-        handle.getMapState[Long, Double]("testState1", Encoders.scalaLong, Encoders.scalaDouble)
+        handle.getMapState[Long, Double]("testState1", TTLConfig.NONE)
       val testState2: MapState[Long, Int] =
-        handle.getMapState[Long, Int]("testState2", Encoders.scalaLong, Encoders.scalaInt)
+        handle.getMapState[Long, Int]("testState2",
+          TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       // put initial value
       testState1.updateValue(1L, 1.0)
@@ -114,16 +117,16 @@ class MapStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val mapTestState1: MapState[String, Int] =
-        handle.getMapState[String, Int]("mapTestState1", Encoders.STRING, Encoders.scalaInt)
+        handle.getMapState[String, Int]("mapTestState1", TTLConfig.NONE)
       val mapTestState2: MapState[String, Int] =
-        handle.getMapState[String, Int]("mapTestState2", Encoders.STRING, Encoders.scalaInt)
+        handle.getMapState[String, Int]("mapTestState2", TTLConfig.NONE)
       val valueTestState: ValueState[String] =
-        handle.getValueState[String]("valueTestState", Encoders.STRING)
+        handle.getValueState[String]("valueTestState", TTLConfig.NONE)
       val listTestState: ListState[String] =
-        handle.getListState[String]("listTestState", Encoders.STRING)
+        handle.getListState[String]("listTestState", TTLConfig.NONE)
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       // put initial values
@@ -175,7 +178,7 @@ class MapStateSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.ProcessingTime(),
+        stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
@@ -196,7 +199,7 @@ class MapStateSuite extends StateVariableSuiteBase {
 
       // increment batchProcessingTime, or watermark and ensure expired value is not returned
       val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(ttlExpirationMs))
 
       val nextBatchTestState: MapStateImplWithTTL[String, String] =
@@ -228,15 +231,15 @@ class MapStateSuite extends StateVariableSuiteBase {
     }
   }
 
-  test("test negative or zero TTL duration throws error") {
+  test("test null or negative TTL duration throws error") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val batchTimestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(batchTimestampMs))
 
-      Seq(null, Duration.ZERO, Duration.ofMinutes(-1)).foreach { ttlDuration =>
+      Seq(null, Duration.ofMinutes(-1)).foreach { ttlDuration =>
         val ttlConfig = TTLConfig(ttlDuration)
         val ex = intercept[SparkUnsupportedOperationException] {
           handle.getMapState[String, String](
@@ -245,7 +248,7 @@ class MapStateSuite extends StateVariableSuiteBase {
 
         checkError(
           ex,
-          errorClass = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
+          condition = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
           parameters = Map(
             "operationType" -> "update",
             "stateName" -> "testState"
@@ -261,7 +264,7 @@ class MapStateSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.ProcessingTime(),
+        stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala
index 6a476635a6dbe..9a04a0c759ac4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/MemoryStateStore.scala
@@ -74,4 +74,8 @@ class MemoryStateStore extends StateStore() {
   override def valuesIterator(key: UnsafeRow, colFamilyName: String): Iterator[UnsafeRow] = {
     throw new UnsupportedOperationException("Doesn't support multiple values per key")
   }
+
+  override def getStateStoreCheckpointInfo(): StateStoreCheckpointInfo = {
+    StateStoreCheckpointInfo(id.partitionId, version + 1, None, None)
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala
index dd8f7aab51dd0..45786fdc73d0e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/OperatorStateMetadataSuite.scala
@@ -22,9 +22,11 @@ import org.apache.hadoop.fs.Path
 import org.apache.spark.SparkRuntimeException
 import org.apache.spark.sql.{Column, Row}
 import org.apache.spark.sql.execution.datasources.v2.state.{StateDataSourceUnspecifiedRequiredOption, StateSourceOptions}
-import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, MemoryStream}
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, LongOffset, MemoryStream, OffsetSeq, OffsetSeqLog}
+import org.apache.spark.sql.execution.streaming.StreamingCheckpointConstants.DIR_NAME_OFFSETS
 import org.apache.spark.sql.functions._
-import org.apache.spark.sql.streaming.{OutputMode, StreamTest}
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming.{OutputMode, RunningCountStatefulProcessor, StreamTest, TimeMode}
 import org.apache.spark.sql.streaming.OutputMode.{Complete, Update}
 import org.apache.spark.sql.test.SharedSparkSession
 
@@ -38,31 +40,69 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
   private def checkOperatorStateMetadata(
       checkpointDir: String,
       operatorId: Int,
-      expectedMetadata: OperatorStateMetadataV1): Unit = {
+      expectedMetadata: OperatorStateMetadata,
+      expectedVersion: Int = 1,
+      batchId: Option[Long] = None): Unit = {
     val statePath = new Path(checkpointDir, s"state/$operatorId")
-    val operatorMetadata = new OperatorStateMetadataReader(statePath, hadoopConf).read()
-      .asInstanceOf[OperatorStateMetadataV1]
-    assert(operatorMetadata.operatorInfo == expectedMetadata.operatorInfo &&
-      operatorMetadata.stateStoreInfo.sameElements(expectedMetadata.stateStoreInfo))
+    val operatorMetadata = OperatorStateMetadataReader.createReader(statePath,
+      hadoopConf, expectedVersion, batchId.getOrElse(
+        OperatorStateMetadataUtils.getLastOffsetBatch(spark, checkpointDir))).read()
+    assert(operatorMetadata.isDefined)
+    assert(operatorMetadata.get.version == expectedVersion)
+
+    if (expectedVersion == 1) {
+      val operatorMetadataV1 = operatorMetadata.get.asInstanceOf[OperatorStateMetadataV1]
+      val expectedMetadataV1 = expectedMetadata.asInstanceOf[OperatorStateMetadataV1]
+      assert(operatorMetadataV1.operatorInfo == expectedMetadata.operatorInfo &&
+        operatorMetadataV1.stateStoreInfo.sameElements(expectedMetadataV1.stateStoreInfo))
+    } else {
+      val operatorMetadataV2 = operatorMetadata.get.asInstanceOf[OperatorStateMetadataV2]
+      val expectedMetadataV2 = expectedMetadata.asInstanceOf[OperatorStateMetadataV2]
+      assert(operatorMetadataV2.operatorInfo == expectedMetadataV2.operatorInfo)
+      assert(operatorMetadataV2.operatorPropertiesJson.nonEmpty)
+      val stateStoreInfo = operatorMetadataV2.stateStoreInfo.head
+      val expectedStateStoreInfo = expectedMetadataV2.stateStoreInfo.head
+      assert(stateStoreInfo.stateSchemaFilePath.nonEmpty)
+      assert(stateStoreInfo.storeName == expectedStateStoreInfo.storeName)
+      assert(stateStoreInfo.numPartitions == expectedStateStoreInfo.numPartitions)
+    }
   }
 
   test("Serialize and deserialize stateful operator metadata") {
     withTempDir { checkpointDir =>
+      val offsetLog = new OffsetSeqLog(spark,
+        new Path(checkpointDir.toString, DIR_NAME_OFFSETS).toString)
+      val batch0 = OffsetSeq.fill(LongOffset(0), LongOffset(1), LongOffset(2))
+      offsetLog.add(0, batch0)
       val statePath = new Path(checkpointDir.toString, "state/0")
       val stateStoreInfo = (1 to 4).map(i => StateStoreMetadataV1(s"store$i", 1, 200))
       val operatorInfo = OperatorInfoV1(1, "Join")
       val operatorMetadata = OperatorStateMetadataV1(operatorInfo, stateStoreInfo.toArray)
-      new OperatorStateMetadataWriter(statePath, hadoopConf).write(operatorMetadata)
+      new OperatorStateMetadataV1Writer(statePath, hadoopConf).write(operatorMetadata)
       checkOperatorStateMetadata(checkpointDir.toString, 0, operatorMetadata)
       val df = spark.read.format("state-metadata").load(checkpointDir.toString)
       // Commit log is empty, there is no available batch id.
-      checkAnswer(df, Seq(Row(1, "Join", "store1", 200, -1L, -1L),
-          Row(1, "Join", "store2", 200, -1L, -1L),
-          Row(1, "Join", "store3", 200, -1L, -1L),
-          Row(1, "Join", "store4", 200, -1L, -1L)
+      checkAnswer(df, Seq(Row(1, "Join", "store1", 200, -1L, -1L, null),
+          Row(1, "Join", "store2", 200, -1L, -1L, null),
+          Row(1, "Join", "store3", 200, -1L, -1L, null),
+          Row(1, "Join", "store4", 200, -1L, -1L, null)
         ))
       checkAnswer(df.select(df.metadataColumn("_numColsPrefixKey")),
         Seq(Row(1), Row(1), Row(1), Row(1)))
+
+      // verify that explicitly passing batchId has no effect if the operator is written with
+      // schema version v1
+      val testBatchId = OperatorStateMetadataUtils.getLastOffsetBatch(spark,
+        checkpointDir.toString) + 1
+      val testDf = spark.read.format("state-metadata")
+        .option("batchId", testBatchId).load(checkpointDir.toString)
+      checkAnswer(testDf, Seq(Row(1, "Join", "store1", 200, -1L, -1L, null),
+          Row(1, "Join", "store2", 200, -1L, -1L, null),
+          Row(1, "Join", "store3", 200, -1L, -1L, null),
+          Row(1, "Join", "store4", 200, -1L, -1L, null)
+        ))
+      checkAnswer(testDf.select(testDf.metadataColumn("_numColsPrefixKey")),
+        Seq(Row(1), Row(1), Row(1), Row(1)))
     }
   }
 
@@ -88,6 +128,66 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
     }
   }
 
+  test("Stateful operator metadata for streaming transformWithState") {
+    withTempDir { checkpointDir =>
+      withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+        SQLConf.SHUFFLE_PARTITIONS.key -> numShufflePartitions.toString) {
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.toString),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+      }
+
+      // Assign some placeholder values to the state store metadata since they are generated
+      // dynamically by the operator.
+      val expectedMetadata = OperatorStateMetadataV2(OperatorInfoV1(0, "transformWithStateExec"),
+        Array(StateStoreMetadataV2("default", 0, numShufflePartitions, checkpointDir.toString)),
+        "")
+      checkOperatorStateMetadata(checkpointDir.toString, 0, expectedMetadata, 2)
+
+      // Verify that the state store metadata is not available for invalid batches.
+      val ex = intercept[Exception] {
+        val invalidBatchId = OperatorStateMetadataUtils.getLastOffsetBatch(spark,
+          checkpointDir.toString) + 1
+        checkOperatorStateMetadata(checkpointDir.toString, 0, expectedMetadata, 2,
+          Some(invalidBatchId))
+      }
+
+      checkError(
+        ex.asInstanceOf[SparkRuntimeException],
+        "STDS_FAILED_TO_READ_OPERATOR_METADATA",
+        Some("42K03"),
+        Map(
+          "checkpointLocation" -> ".*",
+          "batchId" -> ".*"),
+        matchPVals = true)
+
+      val ex1 = intercept[Exception] {
+        checkOperatorStateMetadata(checkpointDir.toString, 0, expectedMetadata, 2,
+          Some(-1))
+      }
+
+      checkError(
+        ex1.asInstanceOf[SparkRuntimeException],
+        "STDS_FAILED_TO_READ_OPERATOR_METADATA",
+        Some("42K03"),
+        Map(
+          "checkpointLocation" -> ".*",
+          "batchId" -> ".*"),
+        matchPVals = true)
+    }
+  }
+
   test("Stateful operator metadata for streaming join") {
     withTempDir { checkpointDir =>
       val input1 = MemoryStream[Int]
@@ -118,10 +218,10 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
 
       val df = spark.read.format("state-metadata")
         .load(checkpointDir.toString)
-      checkAnswer(df, Seq(Row(0, "symmetricHashJoin", "left-keyToNumValues", 5, 0L, 1L),
-          Row(0, "symmetricHashJoin", "left-keyWithIndexToValue", 5, 0L, 1L),
-          Row(0, "symmetricHashJoin", "right-keyToNumValues", 5, 0L, 1L),
-          Row(0, "symmetricHashJoin", "right-keyWithIndexToValue", 5, 0L, 1L)
+      checkAnswer(df, Seq(Row(0, "symmetricHashJoin", "left-keyToNumValues", 5, 0L, 1L, null),
+          Row(0, "symmetricHashJoin", "left-keyWithIndexToValue", 5, 0L, 1L, null),
+          Row(0, "symmetricHashJoin", "right-keyToNumValues", 5, 0L, 1L, null),
+          Row(0, "symmetricHashJoin", "right-keyWithIndexToValue", 5, 0L, 1L, null)
         ))
       checkAnswer(df.select(df.metadataColumn("_numColsPrefixKey")),
         Seq(Row(0), Row(0), Row(0), Row(0)))
@@ -169,7 +269,7 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
       checkOperatorStateMetadata(checkpointDir.toString, 0, expectedMetadata)
 
       val df = spark.read.format("state-metadata").load(checkpointDir.toString)
-      checkAnswer(df, Seq(Row(0, "sessionWindowStateStoreSaveExec", "default", 5, 0L, 0L)))
+      checkAnswer(df, Seq(Row(0, "sessionWindowStateStoreSaveExec", "default", 5, 0L, 0L, null)))
       checkAnswer(df.select(df.metadataColumn("_numColsPrefixKey")), Seq(Row(1)))
     }
   }
@@ -202,8 +302,8 @@ class OperatorStateMetadataSuite extends StreamTest with SharedSparkSession {
       checkOperatorStateMetadata(checkpointDir.toString, 1, expectedMetadata1)
 
       val df = spark.read.format("state-metadata").load(checkpointDir.toString)
-      checkAnswer(df, Seq(Row(0, "stateStoreSave", "default", 5, 0L, 1L),
-          Row(1, "stateStoreSave", "default", 5, 0L, 1L)))
+      checkAnswer(df, Seq(Row(0, "stateStoreSave", "default", 5, 0L, 1L, null),
+          Row(1, "stateStoreSave", "default", 5, 0L, 1L, null)))
       checkAnswer(df.select(df.metadataColumn("_numColsPrefixKey")), Seq(Row(0), Row(0)))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala
new file mode 100644
index 0000000000000..9ac74eb5b9e8f
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreCheckpointFormatV2Suite.scala
@@ -0,0 +1,544 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming.state
+
+import org.apache.hadoop.conf.Configuration
+import org.scalatest.Tag
+
+import org.apache.spark.sql.catalyst.expressions.UnsafeRow
+import org.apache.spark.sql.execution.streaming.MemoryStream
+import org.apache.spark.sql.functions.count
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.streaming._
+import org.apache.spark.sql.streaming.OutputMode.Update
+import org.apache.spark.sql.types.StructType
+
+object CkptIdCollectingStateStoreWrapper {
+  // Internal list to hold checkpoint IDs (strings)
+  private var checkpointInfos: List[StateStoreCheckpointInfo] = List.empty
+
+  // Method to add a string (checkpoint ID) to the list in a synchronized way
+  def addCheckpointInfo(checkpointID: StateStoreCheckpointInfo): Unit = synchronized {
+    checkpointInfos = checkpointID :: checkpointInfos
+  }
+
+  // Method to read the list of checkpoint IDs in a synchronized way
+  def getStateStoreCheckpointInfos: List[StateStoreCheckpointInfo] = synchronized {
+    checkpointInfos
+  }
+
+  def clear(): Unit = synchronized {
+    checkpointInfos = List.empty
+  }
+}
+
+case class CkptIdCollectingStateStoreWrapper(innerStore: StateStore) extends StateStore {
+
+  // Implement methods from ReadStateStore (parent trait)
+
+  override def id: StateStoreId = innerStore.id
+  override def version: Long = innerStore.version
+
+  override def get(
+      key: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): UnsafeRow = {
+    innerStore.get(key, colFamilyName)
+  }
+
+  override def valuesIterator(
+      key: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Iterator[UnsafeRow] = {
+    innerStore.valuesIterator(key, colFamilyName)
+  }
+
+  override def prefixScan(
+      prefixKey: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Iterator[UnsafeRowPair] = {
+    innerStore.prefixScan(prefixKey, colFamilyName)
+  }
+
+  override def iterator(
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Iterator[UnsafeRowPair] = {
+    innerStore.iterator(colFamilyName)
+  }
+
+  override def abort(): Unit = innerStore.abort()
+
+  // Implement methods from StateStore (current trait)
+
+  override def removeColFamilyIfExists(colFamilyName: String): Boolean = {
+    innerStore.removeColFamilyIfExists(colFamilyName)
+  }
+
+  override def createColFamilyIfAbsent(
+      colFamilyName: String,
+      keySchema: StructType,
+      valueSchema: StructType,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      useMultipleValuesPerKey: Boolean = false,
+      isInternal: Boolean = false): Unit = {
+    innerStore.createColFamilyIfAbsent(
+      colFamilyName,
+      keySchema,
+      valueSchema,
+      keyStateEncoderSpec,
+      useMultipleValuesPerKey,
+      isInternal
+    )
+  }
+
+  override def put(
+      key: UnsafeRow,
+      value: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Unit = {
+    innerStore.put(key, value, colFamilyName)
+  }
+
+  override def remove(
+      key: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Unit = {
+    innerStore.remove(key, colFamilyName)
+  }
+
+  override def merge(
+      key: UnsafeRow,
+      value: UnsafeRow,
+      colFamilyName: String = StateStore.DEFAULT_COL_FAMILY_NAME): Unit = {
+    innerStore.merge(key, value, colFamilyName)
+  }
+
+  override def commit(): Long = innerStore.commit()
+  override def metrics: StateStoreMetrics = innerStore.metrics
+  override def getStateStoreCheckpointInfo(): StateStoreCheckpointInfo = {
+    val ret = innerStore.getStateStoreCheckpointInfo()
+    CkptIdCollectingStateStoreWrapper.addCheckpointInfo(ret)
+    ret
+  }
+  override def hasCommitted: Boolean = innerStore.hasCommitted
+}
+
+class CkptIdCollectingStateStoreProviderWrapper extends StateStoreProvider {
+
+  val innerProvider = new RocksDBStateStoreProvider()
+
+  // Now, delegate all methods in the wrapper class to the inner object
+  override def init(
+      stateStoreId: StateStoreId,
+      keySchema: StructType,
+      valueSchema: StructType,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      useColumnFamilies: Boolean,
+      storeConfs: StateStoreConf,
+      hadoopConf: Configuration,
+      useMultipleValuesPerKey: Boolean = false): Unit = {
+    innerProvider.init(
+      stateStoreId,
+      keySchema,
+      valueSchema,
+      keyStateEncoderSpec,
+      useColumnFamilies,
+      storeConfs,
+      hadoopConf,
+      useMultipleValuesPerKey
+    )
+  }
+
+  override def stateStoreId: StateStoreId = innerProvider.stateStoreId
+
+  override def close(): Unit = innerProvider.close()
+
+  override def getStore(version: Long, stateStoreCkptId: Option[String] = None): StateStore = {
+    val innerStateStore = innerProvider.getStore(version, stateStoreCkptId)
+    CkptIdCollectingStateStoreWrapper(innerStateStore)
+  }
+
+  override def getReadStore(version: Long, uniqueId: Option[String] = None): ReadStateStore = {
+    new WrappedReadStateStore(
+      CkptIdCollectingStateStoreWrapper(innerProvider.getReadStore(version, uniqueId)))
+  }
+
+  override def doMaintenance(): Unit = innerProvider.doMaintenance()
+
+  override def supportedCustomMetrics: Seq[StateStoreCustomMetric] =
+    innerProvider.supportedCustomMetrics
+}
+
+// TODO add a test case for two of the tasks for the same shuffle partitions to finish and
+// return their own state store checkpointID. This can happen because of task retry or
+// speculative execution.
+class RocksDBStateStoreCheckpointFormatV2Suite extends StreamTest
+  with AlsoTestWithChangelogCheckpointingEnabled {
+  import testImplicits._
+
+  val providerClassName = classOf[CkptIdCollectingStateStoreProviderWrapper].getCanonicalName
+
+
+  override protected def beforeAll(): Unit = {
+    super.beforeAll()
+  }
+
+  override def beforeEach(): Unit = {
+    CkptIdCollectingStateStoreWrapper.clear()
+  }
+
+  def testWithCheckpointInfoTracked(testName: String, testTags: Tag*)(
+      testBody: => Any): Unit = {
+    super.testWithChangelogCheckpointingEnabled(testName, testTags: _*) {
+      super.beforeEach()
+      withSQLConf(
+        (SQLConf.STATE_STORE_PROVIDER_CLASS.key -> providerClassName),
+        (SQLConf.STATE_STORE_CHECKPOINT_FORMAT_VERSION.key -> "2"),
+        (SQLConf.SHUFFLE_PARTITIONS.key, "2")) {
+        testBody
+      }
+      // in case tests have any code that needs to execute after every test
+      super.afterEach()
+    }
+  }
+
+  // This test enable checkpoint format V2 without validating the checkpoint ID. Just to make
+  // sure it doesn't break and return the correct query results.
+  testWithChangelogCheckpointingEnabled(s"checkpointFormatVersion2") {
+    withSQLConf((SQLConf.STATE_STORE_CHECKPOINT_FORMAT_VERSION.key, "2")) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[Int]
+        val aggregated =
+          inputData
+            .toDF()
+            .groupBy($"value")
+            .agg(count("*"))
+            .as[(Int, Long)]
+
+        testStream(aggregated, Update)(
+          StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+          AddData(inputData, 3),
+          CheckLastBatch((3, 1)),
+          AddData(inputData, 3, 2),
+          CheckLastBatch((3, 2), (2, 1)),
+          StopStream
+        )
+
+        // Run the stream with changelog checkpointing enabled.
+        testStream(aggregated, Update)(
+          StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+          AddData(inputData, 3, 2, 1),
+          CheckLastBatch((3, 3), (2, 2), (1, 1)),
+          // By default we run in new tuple mode.
+          AddData(inputData, 4, 4, 4, 4),
+          CheckLastBatch((4, 4)),
+          AddData(inputData, 5, 5),
+          CheckLastBatch((5, 2))
+        )
+
+        // Run the stream with changelog checkpointing disabled.
+        testStream(aggregated, Update)(
+          StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+          AddData(inputData, 4),
+          CheckLastBatch((4, 5))
+        )
+      }
+    }
+  }
+
+  def validateBaseCheckpointInfo(): Unit = {
+    val checkpointInfoList = CkptIdCollectingStateStoreWrapper.getStateStoreCheckpointInfos
+    // Here we assume for every task, we fetch checkpointID from the N state stores in the same
+    // order. So we can separate stateStoreCkptId for different stores based on the order inside the
+    // same (batchId, partitionId) group.
+    val grouped = checkpointInfoList
+      .groupBy(info => (info.batchVersion, info.partitionId))
+      .values
+      .flatMap { infos =>
+        infos.zipWithIndex.map { case (info, index) => index -> info }
+      }
+      .groupBy(_._1)
+      .map {
+        case (_, grouped) =>
+          grouped.map { case (_, info) => info }
+      }
+
+    grouped.foreach { l =>
+      for {
+        a <- l
+        b <- l
+        if a.partitionId == b.partitionId && a.batchVersion == b.batchVersion + 1
+      } {
+        // if batch version exists, it should be the same as the checkpoint ID of the previous batch
+        assert(!a.baseStateStoreCkptId.isDefined || b.stateStoreCkptId == a.baseStateStoreCkptId)
+      }
+    }
+  }
+
+  def validateCheckpointInfo(
+      numBatches: Int,
+      numStateStores: Int,
+      batchVersionSet: Set[Long]): Unit = {
+    val checkpointInfoList = CkptIdCollectingStateStoreWrapper.getStateStoreCheckpointInfos
+    // We have 6 batches, 2 partitions, and 1 state store per batch
+    assert(checkpointInfoList.size == numBatches * numStateStores * 2)
+    checkpointInfoList.foreach { l =>
+      assert(l.stateStoreCkptId.isDefined)
+      if (batchVersionSet.contains(l.batchVersion)) {
+        assert(l.baseStateStoreCkptId.isDefined)
+      }
+    }
+    assert(checkpointInfoList.count(_.partitionId == 0) == numBatches * numStateStores)
+    assert(checkpointInfoList.count(_.partitionId == 1) == numBatches * numStateStores)
+    for (i <- 1 to numBatches) {
+      assert(checkpointInfoList.count(_.batchVersion == i) == numStateStores * 2)
+    }
+    validateBaseCheckpointInfo()
+  }
+
+  testWithCheckpointInfoTracked(s"checkpointFormatVersion2 validate ID") {
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val aggregated =
+        inputData
+          .toDF()
+          .groupBy($"value")
+          .agg(count("*"))
+          .as[(Int, Long)]
+
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3),
+        CheckLastBatch((3, 1)),
+        AddData(inputData, 3, 2),
+        CheckLastBatch((3, 2), (2, 1)),
+        StopStream
+      )
+
+      // Test recovery
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3, 2, 1),
+        CheckLastBatch((3, 3), (2, 2), (1, 1)),
+        // By default we run in new tuple mode.
+        AddData(inputData, 4, 4, 4, 4),
+        CheckLastBatch((4, 4)),
+        AddData(inputData, 5, 5),
+        CheckLastBatch((5, 2)),
+        StopStream
+      )
+
+      // crash recovery again
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4),
+        CheckLastBatch((4, 5))
+      )
+    }
+
+    validateCheckpointInfo(6, 1, Set(2, 4, 5))
+  }
+
+  testWithCheckpointInfoTracked(
+    s"checkpointFormatVersion2 validate ID with dedup and groupBy") {
+    withTempDir { checkpointDir =>
+
+      val inputData = MemoryStream[Int]
+      val aggregated =
+        inputData
+          .toDF()
+          .dropDuplicates("value") // Deduplication operation
+          .groupBy($"value") // Group-by operation
+          .agg(count("*"))
+          .as[(Int, Long)]
+
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3),
+        CheckLastBatch((3, 1)),
+        AddData(inputData, 3, 2),
+        CheckLastBatch((2, 1)), // 3 is deduplicated
+        StopStream
+      )
+      // Test recovery
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3, 2, 1),
+        CheckLastBatch((1, 1)), // 2,3 is deduplicated
+        AddData(inputData, 4, 4, 4, 4),
+        CheckLastBatch((4, 1)),
+        AddData(inputData, 5, 5),
+        CheckLastBatch((5, 1)),
+        StopStream
+      )
+      // Crash recovery again
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4),
+        CheckLastBatch(), // 4 is deduplicated
+        StopStream
+      )
+    }
+    validateCheckpointInfo(6, 2, Set(2, 4, 5))
+  }
+
+  testWithCheckpointInfoTracked(
+    s"checkpointFormatVersion2 validate ID for stream-stream join") {
+    withTempDir { checkpointDir =>
+      val inputData1 = MemoryStream[Int]
+      val inputData2 = MemoryStream[Int]
+
+      val df1 = inputData1.toDS().toDF("value")
+      val df2 = inputData2.toDS().toDF("value")
+
+      val joined = df1.join(df2, df1("value") === df2("value"))
+
+      testStream(joined, OutputMode.Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData1, 3, 2),
+        AddData(inputData2, 3),
+        CheckLastBatch((3, 3)),
+        AddData(inputData2, 2),
+        // This data will be used after restarting the query
+        AddData(inputData1, 5),
+        CheckLastBatch((2, 2)),
+        StopStream
+      )
+
+      // Test recovery.
+      testStream(joined, OutputMode.Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData1, 4),
+        AddData(inputData2, 5),
+        CheckLastBatch((5, 5)),
+        AddData(inputData2, 4),
+        // This data will be used after restarting the query
+        AddData(inputData1, 7),
+        CheckLastBatch((4, 4)),
+        StopStream
+      )
+
+      // recovery again
+      testStream(joined, OutputMode.Append)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData1, 6),
+        AddData(inputData2, 6),
+        CheckLastBatch((6, 6)),
+        AddData(inputData2, 7),
+        CheckLastBatch((7, 7)),
+        StopStream
+      )
+    }
+    val checkpointInfoList = CkptIdCollectingStateStoreWrapper.getStateStoreCheckpointInfos
+    // We sometimes add data to both data sources before CheckLastBatch(). They could be picked
+    // up by one or two batches. There will be at least 6 batches, but less than 12.
+    assert(checkpointInfoList.size % 8 == 0)
+    val numBatches = checkpointInfoList.size / 8
+
+    // We don't pass batch versions that would need base checkpoint IDs because we don't know
+    // batchIDs for that. We only know that there are 3 batches without it.
+    validateCheckpointInfo(numBatches, 4, Set())
+    assert(CkptIdCollectingStateStoreWrapper
+      .getStateStoreCheckpointInfos
+      .count(_.baseStateStoreCkptId.isDefined) == (numBatches - 3) * 8)
+  }
+
+  testWithCheckpointInfoTracked(s"checkpointFormatVersion2 validate DropDuplicates") {
+    withTempDir { checkpointDir =>
+      val inputData = MemoryStream[Int]
+      val deduplicated = inputData
+        .toDF()
+        .dropDuplicates("value")
+        .as[Int]
+
+      testStream(deduplicated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3),
+        CheckLastBatch(3),
+        AddData(inputData, 3, 2),
+        CheckLastBatch(2),
+        AddData(inputData, 3, 2, 1),
+        CheckLastBatch(1),
+        StopStream
+      )
+
+      // Test recovery
+      testStream(deduplicated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4, 1, 3),
+        CheckLastBatch(4),
+        AddData(inputData, 5, 4, 4),
+        CheckLastBatch(5),
+        StopStream
+      )
+
+      // crash recovery again
+      testStream(deduplicated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4, 7),
+        CheckLastBatch(7)
+      )
+    }
+    validateCheckpointInfo(6, 1, Set(2, 3, 5))
+  }
+
+  testWithCheckpointInfoTracked(
+    s"checkpointFormatVersion2 validate FlatMapGroupsWithState") {
+    withTempDir { checkpointDir =>
+      val stateFunc = (key: Int, values: Iterator[Int], state: GroupState[Int]) => {
+        val count: Int = state.getOption.getOrElse(0) + values.size
+        state.update(count)
+        Iterator((key, count))
+      }
+
+      val inputData = MemoryStream[Int]
+      val aggregated = inputData
+        .toDF()
+        .toDF("key")
+        .selectExpr("key")
+        .as[Int]
+        .repartition($"key")
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(OutputMode.Update, GroupStateTimeout.NoTimeout())(stateFunc)
+
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 3),
+        CheckLastBatch((3, 1)),
+        AddData(inputData, 3, 2),
+        CheckLastBatch((3, 2), (2, 1)),
+        StopStream
+      )
+
+      // Test recovery
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4, 1, 3),
+        CheckLastBatch((4, 1), (1, 1), (3, 3)),
+        AddData(inputData, 5, 4, 4),
+        CheckLastBatch((5, 1), (4, 3)),
+        StopStream
+      )
+
+      // crash recovery again
+      testStream(aggregated, Update)(
+        StartStream(checkpointLocation = checkpointDir.getAbsolutePath),
+        AddData(inputData, 4, 7),
+        CheckLastBatch((4, 4), (7, 1)),
+        AddData (inputData, 5),
+        CheckLastBatch((5, 2)),
+        StopStream
+      )
+    }
+    validateCheckpointInfo(6, 1, Set(2, 4, 6))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
index 8fcd6edf1abb7..d20cfb04f8e81 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreIntegrationSuite.scala
@@ -119,7 +119,7 @@ class RocksDBStateStoreIntegrationSuite extends StreamTest
 
   private def getFormatVersion(query: StreamingQuery): Int = {
     query.asInstanceOf[StreamingQueryWrapper].streamingQuery.lastExecution.sparkSession
-      .conf.get(SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION)
+      .sessionState.conf.getConf(SQLConf.STATE_STORE_ROCKSDB_FORMAT_VERSION)
   }
 
   testWithColumnFamilies("SPARK-36519: store RocksDB format version in the checkpoint",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
index f6fffc519d8c3..0abdcadefbd55 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBStateStoreSuite.scala
@@ -43,6 +43,7 @@ import org.apache.spark.util.Utils
 @ExtendedSQLTest
 class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvider]
   with AlsoTestWithChangelogCheckpointingEnabled
+  with AlsoTestWithEncodingTypes
   with SharedSparkSession
   with BeforeAndAfter {
 
@@ -101,7 +102,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       val testSchema = StructType(Seq(StructField("key", StringType, true)))
       val testStateInfo = StatefulOperatorStateInfo(
         checkpointLocation = Utils.createTempDir().getAbsolutePath,
-        queryRunId = UUID.randomUUID, operatorId = 0, storeVersion = 0, numPartitions = 5)
+        queryRunId = UUID.randomUUID, operatorId = 0, storeVersion = 0, numPartitions = 5, None)
 
       // Create state store in a task and get the RocksDBConf from the instantiated RocksDB instance
       val rocksDBConfInTask: RocksDBConf = testRDD.mapPartitionsWithStateStore[RocksDBConf](
@@ -174,7 +175,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
     checkError(
       ex1,
-      errorClass = "STATE_STORE_INCORRECT_NUM_ORDERING_COLS_FOR_RANGE_SCAN",
+      condition = "STATE_STORE_INCORRECT_NUM_ORDERING_COLS_FOR_RANGE_SCAN",
       parameters = Map(
         "numOrderingCols" -> "0"
       ),
@@ -193,7 +194,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
     checkError(
       ex2,
-      errorClass = "STATE_STORE_INCORRECT_NUM_ORDERING_COLS_FOR_RANGE_SCAN",
+      condition = "STATE_STORE_INCORRECT_NUM_ORDERING_COLS_FOR_RANGE_SCAN",
       parameters = Map(
         "numOrderingCols" -> (keySchemaWithRangeScan.length + 1).toString
       ),
@@ -215,7 +216,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
     checkError(
       ex,
-      errorClass = "STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED",
+      condition = "STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED",
       parameters = Map(
         "fieldName" -> keySchemaWithVariableSizeCols.fields(0).name,
         "index" -> "0"
@@ -253,7 +254,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         }
         checkError(
           ex,
-          errorClass = "STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED",
+          condition = "STATE_STORE_VARIABLE_SIZE_ORDERING_COLS_NOT_SUPPORTED",
           parameters = Map(
             "fieldName" -> field.name,
             "index" -> index.toString
@@ -278,7 +279,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
     checkError(
       ex,
-      errorClass = "STATE_STORE_NULL_TYPE_ORDERING_COLS_NOT_SUPPORTED",
+      condition = "STATE_STORE_NULL_TYPE_ORDERING_COLS_NOT_SUPPORTED",
       parameters = Map(
         "fieldName" -> keySchemaWithNullTypeCols.fields(0).name,
         "index" -> "0"
@@ -934,7 +935,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         if (!colFamiliesEnabled) {
           checkError(
             ex,
-            errorClass = "STATE_STORE_UNSUPPORTED_OPERATION",
+            condition = "STATE_STORE_UNSUPPORTED_OPERATION",
             parameters = Map(
               "operationType" -> "create_col_family",
               "entity" -> "multiple column families is disabled in RocksDBStateStoreProvider"
@@ -944,7 +945,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         } else {
           checkError(
             ex,
-            errorClass = "STATE_STORE_CANNOT_USE_COLUMN_FAMILY_WITH_INVALID_NAME",
+            condition = "STATE_STORE_CANNOT_USE_COLUMN_FAMILY_WITH_INVALID_NAME",
             parameters = Map(
               "operationName" -> "create_col_family",
               "colFamilyName" -> colFamilyName
@@ -962,7 +963,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       newStoreProvider(useColumnFamilies = colFamiliesEnabled)) { provider =>
       val store = provider.getStore(0)
 
-      Seq("_internal", "_test", "_test123", "__12345").foreach { colFamilyName =>
+      Seq("$internal", "$test", "$test123", "$_12345", "$$$235").foreach { colFamilyName =>
         val ex = intercept[SparkUnsupportedOperationException] {
           store.createColFamilyIfAbsent(colFamilyName,
             keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema))
@@ -971,7 +972,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         if (!colFamiliesEnabled) {
           checkError(
             ex,
-            errorClass = "STATE_STORE_UNSUPPORTED_OPERATION",
+            condition = "STATE_STORE_UNSUPPORTED_OPERATION",
             parameters = Map(
               "operationType" -> "create_col_family",
               "entity" -> "multiple column families is disabled in RocksDBStateStoreProvider"
@@ -981,11 +982,11 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
         } else {
           checkError(
             ex,
-            errorClass = "STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS",
+            condition = "STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS",
             parameters = Map(
               "colFamilyName" -> colFamilyName
             ),
-            matchPVals = true
+            matchPVals = false
           )
         }
       }
@@ -1026,15 +1027,14 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
-  // TODO SPARK-48796 after restart state id will not be the same
-  ignore(s"get, put, iterator, commit, load with multiple column families") {
+  test(s"get, put, iterator, commit, load with multiple column families") {
     tryWithProviderResource(newStoreProvider(useColumnFamilies = true)) { provider =>
       def get(store: StateStore, col1: String, col2: Int, colFamilyName: String): UnsafeRow = {
         store.get(dataToKeyRow(col1, col2), colFamilyName)
       }
 
-      def iterator(store: StateStore, colFamilyName: String): Seq[((String, Int), Int)] = {
-        store.iterator(colFamilyName).toSeq.map {
+      def iterator(store: StateStore, colFamilyName: String): Iterator[((String, Int), Int)] = {
+        store.iterator(colFamilyName).map {
           case unsafePair =>
             (keyRowToData(unsafePair.key), valueRowToData(unsafePair.value))
         }
@@ -1063,12 +1063,21 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
       put(store, ("a", 1), 1, colFamily2)
       assert(valueRowToData(get(store, "a", 1, colFamily2)) === 1)
 
+      // calling commit on this store creates version 1
       store.commit()
 
       // reload version 0
       store = provider.getStore(0)
-      assert(get(store, "a", 1, colFamily1) === null)
-      assert(iterator(store, colFamily1).isEmpty)
+
+      val e = intercept[Exception]{
+        get(store, "a", 1, colFamily1)
+      }
+      checkError(
+        exception = e.asInstanceOf[StateStoreUnsupportedOperationOnMissingColumnFamily],
+        condition = "STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY",
+        sqlState = Some("42802"),
+        parameters = Map("operationType" -> "get", "colFamilyName" -> colFamily1)
+      )
 
       store = provider.getStore(1)
       // version 1 data recovered correctly
@@ -1090,6 +1099,50 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     }
   }
 
+
+  test("verify that column family id is assigned correctly after removal") {
+    tryWithProviderResource(newStoreProvider(useColumnFamilies = true)) { provider =>
+      var store = provider.getRocksDBStateStore(0)
+      val colFamily1: String = "abc"
+      val colFamily2: String = "def"
+      val colFamily3: String = "ghi"
+      val colFamily4: String = "jkl"
+      val colFamily5: String = "mno"
+
+      store.createColFamilyIfAbsent(colFamily1, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      store.createColFamilyIfAbsent(colFamily2, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      store.commit()
+
+      store = provider.getRocksDBStateStore(1)
+      store.removeColFamilyIfExists(colFamily2)
+      store.commit()
+
+      store = provider.getRocksDBStateStore(2)
+      store.createColFamilyIfAbsent(colFamily3, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      assert(store.getColumnFamilyId(colFamily3) == 3)
+      store.removeColFamilyIfExists(colFamily1)
+      store.removeColFamilyIfExists(colFamily3)
+      store.commit()
+
+      store = provider.getRocksDBStateStore(1)
+      // this should return the old id, because we didn't remove this colFamily for version 1
+      store.createColFamilyIfAbsent(colFamily1, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      assert(store.getColumnFamilyId(colFamily1) == 1)
+
+      store = provider.getRocksDBStateStore(3)
+      store.createColFamilyIfAbsent(colFamily4, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      assert(store.getColumnFamilyId(colFamily4) == 4)
+      store.createColFamilyIfAbsent(colFamily5, keySchema, valueSchema,
+        NoPrefixKeyStateEncoderSpec(keySchema))
+      assert(store.getColumnFamilyId(colFamily5) == 5)
+    }
+  }
+
   Seq(
     NoPrefixKeyStateEncoderSpec(keySchema), PrefixKeyScanStateEncoderSpec(keySchema, 1)
   ).foreach { keyEncoder =>
@@ -1169,7 +1222,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
 
         checkError(
           exception = e.asInstanceOf[StateStoreUnsupportedOperationOnMissingColumnFamily],
-          errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY",
+          condition = "STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY",
           sqlState = Some("42802"),
           parameters = Map("operationType" -> "iterator", "colFamilyName" -> cfName)
         )
@@ -1189,7 +1242,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     if (!colFamiliesEnabled) {
       checkError(
         ex,
-        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION",
+        condition = "STATE_STORE_UNSUPPORTED_OPERATION",
         parameters = Map(
           "operationType" -> operationName,
           "entity" -> "multiple column families is disabled in RocksDBStateStoreProvider"
@@ -1199,7 +1252,7 @@ class RocksDBStateStoreSuite extends StateStoreSuiteBase[RocksDBStateStoreProvid
     } else {
       checkError(
         ex,
-        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY",
+        condition = "STATE_STORE_UNSUPPORTED_OPERATION_ON_MISSING_COLUMN_FAMILY",
         parameters = Map(
           "operationType" -> operationName,
           "colFamilyName" -> colFamilyName
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
index b09e562d566be..61ca8e7c32f61 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/RocksDBSuite.scala
@@ -25,6 +25,7 @@ import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future}
 import scala.concurrent.duration._
 import scala.language.implicitConversions
+import scala.util.Random
 
 import org.apache.commons.io.FileUtils
 import org.apache.hadoop.conf.Configuration
@@ -33,7 +34,7 @@ import org.rocksdb.CompressionType
 import org.scalactic.source.Position
 import org.scalatest.Tag
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkConf, SparkException, TaskContext}
 import org.apache.spark.sql.catalyst.util.quietly
 import org.apache.spark.sql.execution.streaming.{CreateAtomicTestManager, FileSystemBasedCheckpointFileManager}
 import org.apache.spark.sql.execution.streaming.CheckpointFileManager.{CancellableFSDataOutputStream, RenameBasedFSDataOutputStream}
@@ -85,6 +86,19 @@ trait RocksDBStateStoreChangelogCheckpointingTestUtil {
   }
 }
 
+trait AlsoTestWithEncodingTypes extends SQLTestUtils {
+  override protected def test(testName: String, testTags: Tag*)(testBody: => Any)
+                             (implicit pos: Position): Unit = {
+    Seq("unsaferow", "avro").foreach { encoding =>
+      super.test(s"$testName (encoding = $encoding)", testTags: _*) {
+        withSQLConf(SQLConf.STREAMING_STATE_STORE_ENCODING_FORMAT.key -> encoding) {
+          testBody
+        }
+      }
+    }
+  }
+}
+
 trait AlsoTestWithChangelogCheckpointingEnabled
   extends SQLTestUtils with RocksDBStateStoreChangelogCheckpointingTestUtil {
 
@@ -166,7 +180,10 @@ trait AlsoTestWithChangelogCheckpointingEnabled
 @SlowSQLTest
 class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with SharedSparkSession {
 
-  sqlConf.setConf(SQLConf.STATE_STORE_PROVIDER_CLASS, classOf[RocksDBStateStoreProvider].getName)
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf
+      .set(SQLConf.STATE_STORE_PROVIDER_CLASS, classOf[RocksDBStateStoreProvider].getName)
+  }
 
   testWithColumnFamilies(
     "RocksDB: check changelog and snapshot version",
@@ -201,7 +218,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
     checkError(
       ex,
-      errorClass = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+      condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
       parameters = Map("version" -> "-1")
     )
     ex = intercept[SparkException] {
@@ -209,7 +226,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
     checkError(
       ex,
-      errorClass = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+      condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
       parameters = Map("version" -> "-1")
     )
 
@@ -221,7 +238,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       }
       checkError(
         ex,
-        errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
+        condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
         parameters = Map(
           "fileToRead" -> s"$remoteDir/1.changelog"
         )
@@ -230,12 +247,12 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
   }
 
   testWithColumnFamilies(
-    "RocksDB: purge changelog and snapshots",
+    "RocksDB: purge changelog and snapshots with minVersionsToDelete = 0",
     TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
     val remoteDir = Utils.createTempDir().toString
     new File(remoteDir).delete() // to make sure that the directory gets created
     val conf = dbConf.copy(enableChangelogCheckpointing = true,
-      minVersionsToRetain = 3, minDeltasForSnapshot = 1)
+      minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 0)
     withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
       db.load(0)
       db.commit()
@@ -271,6 +288,51 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithColumnFamilies(
+    "RocksDB: purge version files with minVersionsToDelete > 0",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+    val remoteDir = Utils.createTempDir().toString
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    val conf = dbConf.copy(
+      minVersionsToRetain = 3, minDeltasForSnapshot = 1, minVersionsToDelete = 3)
+    withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
+      // Commit 5 versions
+      // stale versions: (1, 2)
+      // keep versions: (3, 4, 5)
+      for (version <- 0 to 4) {
+        // Should upload latest snapshot but not delete any files
+        // since number of stale versions < minVersionsToDelete
+        db.load(version)
+        db.commit()
+        db.doMaintenance()
+      }
+
+      // Commit 1 more version
+      // stale versions: (1, 2, 3)
+      // keep versions: (4, 5, 6)
+      db.load(5)
+      db.commit()
+
+      // Checkpoint directory before maintenance
+      if (isChangelogCheckpointingEnabled) {
+        assert(snapshotVersionsPresent(remoteDir) == (1 to 5))
+        assert(changelogVersionsPresent(remoteDir) == (1 to 6))
+      } else {
+        assert(snapshotVersionsPresent(remoteDir) == (1 to 6))
+      }
+
+      // Should delete stale versions for zip files and change log files
+      // since number of stale versions >= minVersionsToDelete
+      db.doMaintenance()
+
+      // Checkpoint directory after maintenance
+      assert(snapshotVersionsPresent(remoteDir) == Seq(4, 5, 6))
+      if (isChangelogCheckpointingEnabled) {
+        assert(changelogVersionsPresent(remoteDir) == Seq(4, 5, 6))
+      }
+    }
+  }
+
   testWithColumnFamilies(
     "RocksDB: minDeltasForSnapshot",
     TestWithChangelogCheckpointingEnabled) { colFamiliesEnabled =>
@@ -477,12 +539,12 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
 
     val conf = RocksDBConf().copy(compression = "zstd")
     withDB(remoteDir, conf = conf, useColumnFamilies = colFamiliesEnabled) { db =>
-      assert(db.columnFamilyOptions.compressionType() == CompressionType.ZSTD_COMPRESSION)
+      assert(db.rocksDbOptions.compressionType() == CompressionType.ZSTD_COMPRESSION)
     }
 
     // Test the default is LZ4
     withDB(remoteDir, conf = RocksDBConf().copy(), useColumnFamilies = colFamiliesEnabled) { db =>
-      assert(db.columnFamilyOptions.compressionType() == CompressionType.LZ4_COMPRESSION)
+      assert(db.rocksDbOptions.compressionType() == CompressionType.LZ4_COMPRESSION)
     }
   }
 
@@ -593,6 +655,40 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
+  testWithColumnFamilies("RocksDB close tests - close before doMaintenance",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf,
+      useColumnFamilies = colFamiliesEnabled) { db =>
+      db.load(0)
+      db.put("foo", "bar")
+      db.commit()
+      // call close first and maintenance can be still be invoked in the context of the
+      // maintenance task's thread pool
+      db.close()
+      db.doMaintenance()
+    }
+  }
+
+  testWithColumnFamilies("RocksDB close tests - close after doMaintenance",
+    TestWithBothChangelogCheckpointingEnabledAndDisabled) { colFamiliesEnabled =>
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 1, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf,
+      useColumnFamilies = colFamiliesEnabled) { db =>
+      db.load(0)
+      db.put("foo", "bar")
+      db.commit()
+      // maintenance can be invoked in the context of the maintenance task's thread pool
+      // and close is invoked after that
+      db.doMaintenance()
+      db.close()
+    }
+  }
+
   testWithChangelogCheckpointingEnabled("RocksDB: Unsupported Operations" +
     " with Changelog Checkpointing") {
     val dfsRootDir = new File(Utils.createTempDir().getAbsolutePath + "/state/1/1")
@@ -719,15 +815,58 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       val cpFiles = Seq()
       generateFiles(verificationDir, cpFiles)
       assert(!dfsRootDir.exists())
-      saveCheckpointFiles(fileManager, cpFiles, version = 1, numKeys = -1)
+      val fileMapping = new RocksDBFileMapping
+      saveCheckpointFiles(fileManager, cpFiles, version = 1,
+        numKeys = -1, fileMapping)
       // The dfs root dir is created even with unknown number of keys
       assert(dfsRootDir.exists())
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, Nil, -1)
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, Nil, -1, fileMapping)
     } finally {
       Utils.deleteRecursively(dfsRootDir)
     }
   }
 
+  testWithChangelogCheckpointingEnabled("RocksDB: ensure that changelog files are written " +
+    "and snapshots uploaded optionally with changelog format v2") {
+    withTempDir { dir =>
+      val remoteDir = Utils.createTempDir().toString
+      val conf = dbConf.copy(minDeltasForSnapshot = 5, compactOnCommit = false)
+      new File(remoteDir).delete() // to make sure that the directory gets created
+      withDB(remoteDir, conf = conf, useColumnFamilies = true) { db =>
+        db.createColFamilyIfAbsent("test")
+        db.load(0)
+        db.put("a", "1")
+        db.put("b", "2")
+        db.commit()
+        assert(changelogVersionsPresent(remoteDir) == Seq(1))
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        db.load(1)
+        db.put("a", "3")
+        db.put("c", "4")
+        db.commit()
+
+        assert(changelogVersionsPresent(remoteDir) == Seq(1, 2))
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+
+        db.removeColFamilyIfExists("test")
+        db.load(2)
+        db.remove("a")
+        db.put("d", "5")
+        db.commit()
+        assert(changelogVersionsPresent(remoteDir) == Seq(1, 2, 3))
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1, 3))
+
+        db.load(3)
+        db.put("e", "6")
+        db.remove("b")
+        db.commit()
+        assert(changelogVersionsPresent(remoteDir) == Seq(1, 2, 3, 4))
+        assert(snapshotVersionsPresent(remoteDir) == Seq(1, 3))
+      }
+    }
+  }
+
   test("RocksDB: ensure merge operation correctness") {
     withTempDir { dir =>
       val remoteDir = Utils.createTempDir().toString
@@ -772,6 +911,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       // that checkpoint the same version of state
       val fileManager = new RocksDBFileManager(
         dfsRootDir, Utils.createTempDir(), new Configuration)
+      val rocksDBFileMapping = new RocksDBFileMapping()
       val fileManager_ = new RocksDBFileManager(
         dfsRootDir, Utils.createTempDir(), new Configuration)
       val sstDir = s"$dfsRootDir/SSTs"
@@ -788,7 +928,9 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00001.log" -> 1000,
         "archive/00002.log" -> 2000
       )
-      saveCheckpointFiles(fileManager, cpFiles1, version = 1, numKeys = 101)
+
+      saveCheckpointFiles(fileManager, cpFiles1, version = 1,
+        numKeys = 101, rocksDBFileMapping)
       assert(fileManager.getLatestVersion() === 1)
       assert(numRemoteSSTFiles == 2) // 2 sst files copied
       assert(numRemoteLogFiles == 2)
@@ -802,7 +944,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00002.log" -> 1000,
         "archive/00003.log" -> 2000
       )
-      saveCheckpointFiles(fileManager_, cpFiles1_, version = 1, numKeys = 101)
+      saveCheckpointFiles(fileManager_, cpFiles1_, version = 1,
+        numKeys = 101, new RocksDBFileMapping())
       assert(fileManager_.getLatestVersion() === 1)
       assert(numRemoteSSTFiles == 4)
       assert(numRemoteLogFiles == 4)
@@ -821,7 +964,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00004.log" -> 1000,
         "archive/00005.log" -> 2000
       )
-      saveCheckpointFiles(fileManager_, cpFiles2, version = 2, numKeys = 121)
+      saveCheckpointFiles(fileManager_, cpFiles2,
+        version = 2, numKeys = 121, new RocksDBFileMapping())
       fileManager_.deleteOldVersions(1)
       assert(numRemoteSSTFiles <= 4) // delete files recorded in 1.zip
       assert(numRemoteLogFiles <= 5) // delete files recorded in 1.zip and orphan 00001.log
@@ -835,7 +979,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00006.log" -> 1000,
         "archive/00007.log" -> 2000
       )
-      saveCheckpointFiles(fileManager_, cpFiles3, version = 3, numKeys = 131)
+      saveCheckpointFiles(fileManager_, cpFiles3,
+        version = 3, numKeys = 131, new RocksDBFileMapping())
       assert(fileManager_.getLatestVersion() === 3)
       fileManager_.deleteOldVersions(1)
       assert(numRemoteSSTFiles == 1)
@@ -872,7 +1017,9 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00001.log" -> 1000,
         "archive/00002.log" -> 2000
       )
-      saveCheckpointFiles(fileManager, cpFiles1, version = 1, numKeys = 101)
+      val rocksDBFileMapping = new RocksDBFileMapping()
+      saveCheckpointFiles(fileManager, cpFiles1,
+        version = 1, numKeys = 101, rocksDBFileMapping)
       fileManager.deleteOldVersions(1)
       // Should not delete orphan files even when they are older than all existing files
       // when there is only 1 version.
@@ -889,7 +1036,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00003.log" -> 1000,
         "archive/00004.log" -> 2000
       )
-      saveCheckpointFiles(fileManager, cpFiles2, version = 2, numKeys = 101)
+      saveCheckpointFiles(fileManager, cpFiles2,
+        version = 2, numKeys = 101, rocksDBFileMapping)
       assert(numRemoteSSTFiles == 5)
       assert(numRemoteLogFiles == 5)
       fileManager.deleteOldVersions(1)
@@ -910,13 +1058,14 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       def numRemoteSSTFiles: Int = listFiles(sstDir).length
       val logDir = s"$dfsRootDir/logs"
       def numRemoteLogFiles: Int = listFiles(logDir).length
+      val fileMapping = new RocksDBFileMapping
 
       // Verify behavior before any saved checkpoints
       assert(fileManager.getLatestVersion() === 0)
 
       // Try to load incorrect versions
       intercept[FileNotFoundException] {
-        fileManager.loadCheckpointFromDfs(1, Utils.createTempDir())
+        fileManager.loadCheckpointFromDfs(1, Utils.createTempDir(), fileMapping)
       }
 
       // Save a version of checkpoint files
@@ -928,7 +1077,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00001.log" -> 1000,
         "archive/00002.log" -> 2000
       )
-      saveCheckpointFiles(fileManager, cpFiles1, version = 1, numKeys = 101)
+      saveCheckpointFiles(fileManager, cpFiles1,
+        version = 1, numKeys = 101, fileMapping)
       assert(fileManager.getLatestVersion() === 1)
       assert(numRemoteSSTFiles == 2) // 2 sst files copied
       assert(numRemoteLogFiles == 2) // 2 log files copied
@@ -943,12 +1093,16 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "00005.log" -> 101,
         "archive/00007.log" -> 101
       ))
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, cpFiles1, 101)
+
+      // as we are loading version 1 again, the previously committed 1,zip and
+      // SST files would not be reused.
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+        version = 1, cpFiles1, 101, fileMapping)
 
       // Save SAME version again with different checkpoint files and load back again to verify
       // whether files were overwritten.
       val cpFiles1_ = Seq(
-        "sst-file1.sst" -> 10, // same SST file as before, this should get reused
+        "sst-file1.sst" -> 10, // same SST file as before, but will be uploaded again
         "sst-file2.sst" -> 25, // new SST file with same name as before, but different length
         "sst-file3.sst" -> 30, // new SST file
         "other-file1" -> 100, // same non-SST file as before, should not get copied
@@ -958,33 +1112,51 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         "archive/00002.log" -> 2500, // new log file with same name as before, but different length
         "archive/00003.log" -> 3000 // new log file
       )
-      saveCheckpointFiles(fileManager, cpFiles1_, version = 1, numKeys = 1001)
-      assert(numRemoteSSTFiles === 4, "shouldn't copy same files again") // 2 old + 2 new SST files
-      assert(numRemoteLogFiles === 4, "shouldn't copy same files again") // 2 old + 2 new log files
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, cpFiles1_, 1001)
+
+      // upload version 1 again, new checkpoint will be created and SST files from
+      // previously committed version 1 will not be reused.
+      saveCheckpointFiles(fileManager, cpFiles1_,
+        version = 1, numKeys = 1001, fileMapping)
+      assert(numRemoteSSTFiles === 5, "shouldn't reuse old version 1 SST files" +
+        " while uploading version 1 again") // 2 old + 3 new SST files
+      assert(numRemoteLogFiles === 5, "shouldn't reuse old version 1 log files" +
+        " while uploading version 1 again") // 2 old + 3 new log files
+
+      // verify checkpoint state is correct
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+        version = 1, cpFiles1_, 1001, fileMapping)
 
       // Save another version and verify
       val cpFiles2 = Seq(
-        "sst-file4.sst" -> 40,
+        "sst-file1.sst" -> 10, // same SST file as version 1, should be reused
+        "sst-file2.sst" -> 25, // same SST file as version 1, should be reused
+        "sst-file3.sst" -> 30, // same SST file as version 1, should be reused
+        "sst-file4.sst" -> 40, // new sst file, should be uploaded
         "other-file4" -> 400,
         "archive/00004.log" -> 4000
       )
-      saveCheckpointFiles(fileManager, cpFiles2, version = 2, numKeys = 1501)
-      assert(numRemoteSSTFiles === 5) // 1 new file over earlier 4 files
-      assert(numRemoteLogFiles === 5) // 1 new file over earlier 4 files
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 2, cpFiles2, 1501)
+
+      saveCheckpointFiles(fileManager, cpFiles2,
+        version = 2, numKeys = 1501, fileMapping)
+      assert(numRemoteSSTFiles === 6) // 1 new file over earlier 5 files
+      assert(numRemoteLogFiles === 6) // 1 new file over earlier 6 files
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+        version = 2, cpFiles2, 1501, fileMapping)
 
       // Loading an older version should work
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 1, cpFiles1_, 1001)
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+        version = 1, cpFiles1_, 1001, fileMapping)
 
       // Loading incorrect version should fail
       intercept[FileNotFoundException] {
-        loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 3, Nil, 1001)
+        loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+          version = 3, Nil, 1001, fileMapping)
       }
 
       // Loading 0 should delete all files
       require(verificationDir.list().length > 0)
-      loadAndVerifyCheckpointFiles(fileManager, verificationDir, version = 0, Nil, 0)
+      loadAndVerifyCheckpointFiles(fileManager, verificationDir,
+        version = 0, Nil, 0, fileMapping)
     }
   }
 
@@ -1001,7 +1173,8 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       val cpFiles = Seq("sst-file1.sst" -> 10, "sst-file2.sst" -> 20, "other-file1" -> 100)
       CreateAtomicTestManager.shouldFailInCreateAtomic = true
       intercept[IOException] {
-        saveCheckpointFiles(fileManager, cpFiles, version = 1, numKeys = 101)
+        saveCheckpointFiles(fileManager, cpFiles,
+          version = 1, numKeys = 101, new RocksDBFileMapping())
       }
       assert(CreateAtomicTestManager.cancelCalledInCreateAtomic)
     }
@@ -1027,7 +1200,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         }
         checkError(
           ex,
-          errorClass = "CANNOT_LOAD_STATE_STORE.UNRELEASED_THREAD_ERROR",
+          condition = "CANNOT_LOAD_STATE_STORE.UNRELEASED_THREAD_ERROR",
           parameters = Map(
             "loggingId" -> "\\[Thread-\\d+\\]",
             "operationType" -> "load_store",
@@ -1055,7 +1228,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
         }
         checkError(
           ex,
-          errorClass = "CANNOT_LOAD_STATE_STORE.UNRELEASED_THREAD_ERROR",
+          condition = "CANNOT_LOAD_STATE_STORE.UNRELEASED_THREAD_ERROR",
           parameters = Map(
             "loggingId" -> "\\[Thread-\\d+\\]",
             "operationType" -> "load_store",
@@ -1107,7 +1280,7 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       }
       checkError(
         e,
-        errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_CHECKPOINT",
+        condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_CHECKPOINT",
         parameters = Map(
           "expectedVersion" -> "v2",
           "actualVersion" -> "v1"
@@ -1655,37 +1828,171 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     "validate successful RocksDB load when metadata file is not overwritten") {
     val fmClass = "org.apache.spark.sql.execution.streaming.state." +
       "NoOverwriteFileSystemBasedCheckpointFileManager"
-    withTempDir { dir =>
-      val conf = dbConf.copy(minDeltasForSnapshot = 0) // create snapshot every commit
-      val hadoopConf = new Configuration()
-      hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, fmClass)
+    Seq(Some(fmClass), None).foreach { fm =>
+      withTempDir { dir =>
+        val conf = dbConf.copy(minDeltasForSnapshot = 0) // create snapshot every commit
+        val hadoopConf = new Configuration()
+        fm.foreach(value =>
+          hadoopConf.set(STREAMING_CHECKPOINT_FILE_MANAGER_CLASS.parent.key, value))
+        val remoteDir = dir.getCanonicalPath
+        withDB(remoteDir, conf = conf, hadoopConf = hadoopConf) { db =>
+          db.load(0)
+          db.put("a", "1")
+          db.commit()
 
-      val remoteDir = dir.getCanonicalPath
-      withDB(remoteDir, conf = conf, hadoopConf = hadoopConf) { db =>
-        db.load(0)
-        db.put("a", "1")
-        db.commit()
+          // load previous version, will recreate snapshot on commit
+          db.load(0)
+          db.put("a", "1")
 
-        // load previous version, and recreate the snapshot
-        db.load(0)
-        db.put("a", "1")
+          // upload version 1 snapshot created previously
+          db.doMaintenance()
+          assert(snapshotVersionsPresent(remoteDir) == Seq(1))
 
-        // do not upload version 1 snapshot created previously
-        db.doMaintenance()
-        assert(snapshotVersionsPresent(remoteDir) == Seq.empty)
+          db.commit() // create snapshot again
+
+          // load version 1 - should succeed
+          withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+          }
 
-        db.commit() // create snapshot again
+          // upload recently created snapshot
+          db.doMaintenance()
+          assert(snapshotVersionsPresent(remoteDir) == Seq(1))
 
-        // load version 1 - should succeed
-        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+          // load version 1 again - should succeed
+          withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+          }
         }
+      }
+    }
+  }
 
-        // upload recently created snapshot
-        db.doMaintenance()
-        assert(snapshotVersionsPresent(remoteDir) == Seq(1))
+  testWithChangelogCheckpointingEnabled("reloading the same version") {
+    // Keep executing the same batch for two or more times. Some queries with ForEachBatch
+    // will cause this behavior.
+    // The test was accidentally fixed by SPARK-48586 (https://github.com/apache/spark/pull/47130)
+    val remoteDir = Utils.createTempDir().toString
+    val conf = dbConf.copy(minDeltasForSnapshot = 2, compactOnCommit = false)
+    new File(remoteDir).delete() // to make sure that the directory gets created
+    withDB(remoteDir, conf = conf) { db =>
+      // load the same version of pending snapshot uploading
+      // This is possible because after committing version x, we can continue to x+1, and replay
+      // x+1. The replay will load a checkpoint by version x. At this moment, the snapshot
+      // uploading may not be finished.
+      // Previously this generated a problem: new files generated by reloading are added to
+      // local -> cloud file map and the information is used to skip some files uploading, which is
+      // wrong because these files aren't a part of the RocksDB checkpoint.
+      // This test was accidentally fixed by
+      // SPARK-48931 (https://github.com/apache/spark/pull/47393)
+
+      db.load(0)
+      db.put("foo", "bar")
+      // Snapshot checkpoint not needed
+      db.commit()
+
+      // Continue using local DB
+      db.load(1)
+      db.put("foo", "bar")
+      // Should create a local RocksDB snapshot
+      db.commit()
+      // Upload the local RocksDB snapshot to the cloud with 2.zip
+      db.doMaintenance()
+
+      // This will reload Db from the cloud.
+      db.load(1)
+      db.put("foo", "bar")
+      // Should create another local snapshot
+      db.commit()
+
+      // Continue using local DB
+      db.load(2)
+      db.put("foo", "bar")
+      // Snapshot checkpoint not needed
+      db.commit()
+
+      // Reload DB from the cloud, loading from 2.zip
+      db.load(2)
+      db.put("foo", "bar")
+      // Snapshot checkpoint not needed
+      db.commit()
+
+      // Will upload local snapshot and overwrite 2.zip
+      db.doMaintenance()
+
+      // Reload new 2.zip just uploaded to validate it is not corrupted.
+      db.load(2)
+      db.put("foo", "bar")
+      db.commit()
+
+      // Test the maintenance thread is delayed even after the next snapshot is created.
+      // There will be two outstanding snapshots.
+      for (batchVersion <- 3 to 6) {
+        db.load(batchVersion)
+        db.put("foo", "bar")
+        // In batchVersion 3 and 5, it will generate a local snapshot but won't be uploaded.
+        db.commit()
+      }
+      db.doMaintenance()
+
+      // Test the maintenance is called after each batch. This tests a common case where
+      // maintenance tasks finish quickly.
+      for (batchVersion <- 7 to 10) {
+        for (j <- 0 to 1) {
+          db.load(batchVersion)
+          db.put("foo", "bar")
+          db.commit()
+          db.doMaintenance()
+        }
+      }
+    }
+  }
 
-        // load version 1 again - should succeed
-        withDB(remoteDir, version = 1, conf = conf, hadoopConf = hadoopConf) { db =>
+  for (randomSeed <- 1 to 8) {
+    for (ifTestSkipBatch <- 0 to 1) {
+      testWithChangelogCheckpointingEnabled(
+        s"randomized snapshotting $randomSeed ifTestSkipBatch $ifTestSkipBatch") {
+        // The unit test simulates the case where batches can be reloaded and maintenance tasks
+        // can be delayed. After each batch, we randomly decide whether we would move onto the
+        // next batch, and whetehr maintenance task is executed.
+        val remoteDir = Utils.createTempDir().toString
+        val conf = dbConf.copy(minDeltasForSnapshot = 3, compactOnCommit = false)
+        new File(remoteDir).delete() // to make sure that the directory gets created
+        withDB(remoteDir, conf = conf) { db =>
+          // A second DB is opened to simulate another executor that runs some batches that
+          // skipped in the current DB.
+          withDB(remoteDir, conf = conf) { db2 =>
+            val random = new Random(randomSeed)
+            var curVer: Int = 0
+            for (i <- 1 to 100) {
+              db.load(curVer)
+              db.put("foo", "bar")
+              db.commit()
+              // For a one in five chance, maintenance task is executed. The chance is created to
+              // simulate the case where snapshot isn't immediatelly uploaded, and even delayed
+              // so that the next snapshot is ready. We create a snapshot in every 3 batches, so
+              // with 1/5 chance, it is more likely to create longer maintenance delay.
+              if (random.nextInt(5) == 0) {
+                db.doMaintenance()
+              }
+              // For half the chance, we move to the next version, and half the chance we keep the
+              // same version. When the same version is kept, the DB will be reloaded.
+              if (random.nextInt(2) == 0) {
+                val inc = if (ifTestSkipBatch == 1) {
+                  random.nextInt(3)
+                } else {
+                  1
+                }
+                if (inc > 1) {
+                  // Create changelog files in the gap
+                  for (j <- 1 to inc - 1) {
+                    db2.load(curVer + j)
+                    db2.put("foo", "bar")
+                    db2.commit()
+                  }
+                }
+                curVer = curVer + inc
+              }
+            }
+          }
         }
       }
     }
@@ -1945,9 +2252,195 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
     }
   }
 
-  private def sqlConf = SQLConf.get.clone()
+  test("Rocks DB task completion listener does not double unlock acquireThread") {
+    // This test verifies that a thread that locks then unlocks the db and then
+    // fires a completion listener (Thread 1) does not unlock the lock validly
+    // acquired by another thread (Thread 2).
+    //
+    // Timeline of this test (* means thread is active):
+    // STATE | MAIN             | THREAD 1         | THREAD 2         |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 0.    | wait for s3      | *load, commit    | wait for s1      |
+    //       |                  | *signal s1       |                  |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 1.    |                  | wait for s2      | *load, signal s2 |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 2.    |                  | *task complete   | wait for s4      |
+    //       |                  | *signal s3, END  |                  |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 3.    | *verify locked   |                  |                  |
+    //       | *signal s4       |                  |                  |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 4.    | wait for s5      |                  | *commit          |
+    //       |                  |                  | *signal s5, END  |
+    // ------| ---------------- | ---------------- | ---------------- |
+    // 5.    | *close db, END   |                  |                  |
+    //
+    // NOTE: state 4 and 5 are only for cleanup
+
+    // Create a custom ExecutionContext with 3 threads
+    implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(
+      ThreadUtils.newDaemonFixedThreadPool(3, "pool-thread-executor"))
+    val stateLock = new Object()
+    var state = 0
+
+    withTempDir { dir =>
+      val remoteDir = dir.getCanonicalPath
+      val db = new RocksDB(
+        remoteDir,
+        conf = dbConf,
+        localRootDir = Utils.createTempDir(),
+        hadoopConf = new Configuration(),
+        loggingId = s"[Thread-${Thread.currentThread.getId}]",
+        useColumnFamilies = false
+      )
+      try {
+        Future { // THREAD 1
+          // Set thread 1's task context so that it is not a clone
+          // of the main thread's taskContext, which will end if the
+          // task is marked as complete
+          val taskContext = TaskContext.empty()
+          TaskContext.setTaskContext(taskContext)
+
+          stateLock.synchronized {
+            // -------------------- STATE 0 --------------------
+            // Simulate a task that loads and commits, db should be unlocked after
+            db.load(0)
+            db.put("a", "1")
+            db.commit()
+            // Signal that we have entered state 1
+            state = 1
+            stateLock.notifyAll()
+
+            // -------------------- STATE 2 --------------------
+            // Wait until we have entered state 2 (thread 2 has loaded db and acquired lock)
+            while (state != 2) {
+              stateLock.wait()
+            }
+
+            // thread 1's task context is marked as complete and signal
+            // that we have entered state 3
+            // At this point, thread 2 should still hold the DB lock.
+            taskContext.markTaskCompleted(None)
+            state = 3
+            stateLock.notifyAll()
+          }
+        }
+
+        Future { // THREAD 2
+          // Set thread 2's task context so that it is not a clone of thread 1's
+          // so it won't be marked as complete
+          val taskContext = TaskContext.empty()
+          TaskContext.setTaskContext(taskContext)
+
+          stateLock.synchronized {
+            // -------------------- STATE 1 --------------------
+            // Wait until we have entered state 1 (thread 1 finished loading and committing)
+            while (state != 1) {
+              stateLock.wait()
+            }
+
+            // Load the db and signal that we have entered state 2
+            db.load(1)
+            assertAcquiredThreadIsCurrentThread(db)
+            state = 2
+            stateLock.notifyAll()
+
+            // -------------------- STATE 4 --------------------
+            // Wait until we have entered state 4 (thread 1 completed and
+            // main thread confirmed that lock is held)
+            while (state != 4) {
+              stateLock.wait()
+            }
+
+            // Ensure we still have the lock
+            assertAcquiredThreadIsCurrentThread(db)
+
+            // commit and signal that we have entered state 5
+            db.commit()
+            state = 5
+            stateLock.notifyAll()
+          }
+        }
+
+        // MAIN THREAD
+        stateLock.synchronized {
+          // -------------------- STATE 3 --------------------
+          // Wait until we have entered state 3 (thread 1 is complete)
+          while (state != 3) {
+            stateLock.wait()
+          }
+
+          // Verify that the lock is being held
+          val threadInfo = db.getAcquiredThreadInfo()
+          assert(threadInfo.nonEmpty, s"acquiredThreadInfo was None when it should be Some")
+
+          // Signal that we have entered state 4 (thread 2 can now release lock)
+          state = 4
+          stateLock.notifyAll()
+
+          // -------------------- STATE 5 --------------------
+          // Wait until we have entered state 5 (thread 2 has released lock)
+          // so that we can clean up
+          while (state != 5) {
+            stateLock.wait()
+          }
+        }
+      } finally {
+        db.close()
+      }
+    }
+  }
+
+  test("RocksDB task completion listener correctly releases for failed task") {
+    // This test verifies that a thread that locks the DB and then fails
+    // can rely on the completion listener to release the lock.
+
+    // Create a custom ExecutionContext with 1 thread
+    implicit val ec: ExecutionContext = ExecutionContext.fromExecutor(
+      ThreadUtils.newDaemonSingleThreadExecutor("single-thread-executor"))
+    val timeout = 5.seconds
+
+    withTempDir { dir =>
+      val remoteDir = dir.getCanonicalPath
+      withDB(remoteDir) { db =>
+        // Release the lock acquired by withDB
+        db.commit()
+
+        // New task that will load and then complete with failure
+        val fut = Future {
+          val taskContext = TaskContext.empty()
+          TaskContext.setTaskContext(taskContext)
+
+          db.load(0)
+          assertAcquiredThreadIsCurrentThread(db)
+
+          // Task completion listener should unlock
+          taskContext.markTaskCompleted(
+            Some(new SparkException("Task failure injection")))
+        }
+
+        ThreadUtils.awaitResult(fut, timeout)
+
+        // Assert that db is not locked
+        val threadInfo = db.getAcquiredThreadInfo()
+        assert(threadInfo.isEmpty, s"acquiredThreadInfo should be None but was $threadInfo")
+      }
+    }
+  }
+
+  private def assertAcquiredThreadIsCurrentThread(db: RocksDB): Unit = {
+    val threadInfo = db.getAcquiredThreadInfo()
+    assert(threadInfo != None,
+      "acquired thread info should not be null after load")
+    val threadId = threadInfo.get.threadRef.get.get.getId
+    assert(
+      threadId == Thread.currentThread().getId,
+      s"acquired thread should be curent thread ${Thread.currentThread().getId} " +
+        s"after load but was $threadId")
+  }
 
-  private def dbConf = RocksDBConf(StateStoreConf(sqlConf))
+  private def dbConf = RocksDBConf(StateStoreConf(SQLConf.get.clone()))
 
   def withDB[T](
       remoteDir: String,
@@ -1987,14 +2480,19 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       fileManager: RocksDBFileManager,
       fileToLengths: Seq[(String, Int)],
       version: Int,
-      numKeys: Int): Unit = {
+      numKeys: Int,
+      fileMapping: RocksDBFileMapping): Unit = {
     val checkpointDir = Utils.createTempDir().getAbsolutePath // local dir to create checkpoints
     generateFiles(checkpointDir, fileToLengths)
+    val (dfsFileSuffix, immutableFileMapping) = fileMapping.createSnapshotFileMapping(
+      fileManager, checkpointDir, version)
     fileManager.saveCheckpointToDfs(
       checkpointDir,
       version,
       numKeys,
-      fileManager.captureFileMapReference())
+      immutableFileMapping)
+    val snapshotInfo = RocksDBVersionSnapshotInfo(version, dfsFileSuffix)
+    fileMapping.snapshotsPendingUpload.remove(snapshotInfo)
   }
 
   def loadAndVerifyCheckpointFiles(
@@ -2002,8 +2500,10 @@ class RocksDBSuite extends AlsoTestWithChangelogCheckpointingEnabled with Shared
       verificationDir: String,
       version: Int,
       expectedFiles: Seq[(String, Int)],
-      expectedNumKeys: Int): Unit = {
-    val metadata = fileManager.loadCheckpointFromDfs(version, verificationDir)
+      expectedNumKeys: Int,
+      fileMapping: RocksDBFileMapping): Unit = {
+    val metadata = fileManager.loadCheckpointFromDfs(version,
+      verificationDir, fileMapping)
     val filesAndLengths =
       listFiles(verificationDir).map(f => f.getName -> f.length).toSet ++
       listFiles(verificationDir + "/archive").map(f => s"archive/${f.getName}" -> f.length()).toSet
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
index f5a5d1277d05d..99483bc0ee8dc 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateSchemaCompatibilityCheckerSuite.scala
@@ -275,7 +275,8 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
       val schemaFilePath = Some(new Path(stateSchemaDir,
         s"${batchId}_${UUID.randomUUID().toString}"))
       val checker = new StateSchemaCompatibilityChecker(providerId, hadoopConf,
-        schemaFilePath = schemaFilePath)
+        oldSchemaFilePath = schemaFilePath,
+        newSchemaFilePath = schemaFilePath)
       checker.createSchemaFile(storeColFamilySchema,
         SchemaHelper.SchemaWriter.createSchemaWriter(stateSchemaVersion))
       val stateSchema = checker.readSchemaFile()
@@ -359,6 +360,14 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     }
   }
 
+  private def getNewSchemaPath(stateSchemaDir: Path, stateSchemaVersion: Int): Option[Path] = {
+    if (stateSchemaVersion == 3) {
+      Some(new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}"))
+    } else {
+      None
+    }
+  }
+
   private def verifyException(
       oldKeySchema: StructType,
       oldValueSchema: StructType,
@@ -373,9 +382,9 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     val extraOptions = Map(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG
       -> formatValidationForValue.toString)
 
+    val stateSchemaDir = stateSchemaDirPath(stateInfo)
     Seq(2, 3).foreach { stateSchemaVersion =>
       val schemaFilePath = if (stateSchemaVersion == 3) {
-        val stateSchemaDir = stateSchemaDirPath(stateInfo)
         Some(new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}"))
       } else {
         None
@@ -384,10 +393,13 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
       val oldStateSchema = List(StateStoreColFamilySchema(StateStore.DEFAULT_COL_FAMILY_NAME,
         oldKeySchema, oldValueSchema,
         keyStateEncoderSpec = getKeyStateEncoderSpec(stateSchemaVersion, oldKeySchema)))
+      val newSchemaFilePath = getNewSchemaPath(stateSchemaDir, stateSchemaVersion)
       val result = Try(
         StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
           oldStateSchema, spark.sessionState, stateSchemaVersion = stateSchemaVersion,
-          schemaFilePath = schemaFilePath, extraOptions = extraOptions)
+          oldSchemaFilePath = schemaFilePath,
+          newSchemaFilePath = newSchemaFilePath,
+          extraOptions = extraOptions)
       ).toEither.fold(Some(_), _ => None)
 
       val ex = if (result.isDefined) {
@@ -399,21 +411,26 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
             keyStateEncoderSpec = getKeyStateEncoderSpec(stateSchemaVersion, newKeySchema)))
           StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
             newStateSchema, spark.sessionState, stateSchemaVersion = stateSchemaVersion,
-            schemaFilePath = schemaFilePath, extraOptions = extraOptions)
+            extraOptions = extraOptions,
+            oldSchemaFilePath = stateSchemaVersion match {
+                case 3 => newSchemaFilePath
+                case _ => None
+            },
+            newSchemaFilePath = getNewSchemaPath(stateSchemaDir, stateSchemaVersion))
         }
       }
 
       // collation checks are also performed in this path. so we need to check for them explicitly.
       if (keyCollationChecks) {
         assert(ex.getMessage.contains("Binary inequality column is not supported"))
-        assert(ex.getErrorClass === "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY")
+        assert(ex.getCondition === "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY")
       } else {
         if (ignoreValueSchema) {
           // if value schema is ignored, the mismatch has to be on the key schema
-          assert(ex.getErrorClass === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE")
+          assert(ex.getCondition === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE")
         } else {
-          assert(ex.getErrorClass === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE" ||
-            ex.getErrorClass === "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE")
+          assert(ex.getCondition === "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE" ||
+            ex.getCondition === "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE")
         }
         assert(ex.getMessage.contains("does not match existing"))
       }
@@ -433,9 +450,9 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
     val extraOptions = Map(StateStoreConf.FORMAT_VALIDATION_CHECK_VALUE_CONFIG
       -> formatValidationForValue.toString)
 
+    val stateSchemaDir = stateSchemaDirPath(stateInfo)
     Seq(2, 3).foreach { stateSchemaVersion =>
       val schemaFilePath = if (stateSchemaVersion == 3) {
-        val stateSchemaDir = stateSchemaDirPath(stateInfo)
         Some(new Path(stateSchemaDir, s"${batchId}_${UUID.randomUUID().toString}"))
       } else {
         None
@@ -446,14 +463,18 @@ class StateSchemaCompatibilityCheckerSuite extends SharedSparkSession {
         keyStateEncoderSpec = getKeyStateEncoderSpec(stateSchemaVersion, oldKeySchema)))
       StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
         oldStateSchema, spark.sessionState, stateSchemaVersion = stateSchemaVersion,
-        schemaFilePath = schemaFilePath, extraOptions = extraOptions)
+        oldSchemaFilePath = schemaFilePath,
+        newSchemaFilePath = getNewSchemaPath(stateSchemaDir, stateSchemaVersion),
+        extraOptions = extraOptions)
 
       val newStateSchema = List(StateStoreColFamilySchema(StateStore.DEFAULT_COL_FAMILY_NAME,
         newKeySchema, newValueSchema,
         keyStateEncoderSpec = getKeyStateEncoderSpec(stateSchemaVersion, newKeySchema)))
       StateSchemaCompatibilityChecker.validateAndMaybeEvolveStateSchema(stateInfo, hadoopConf,
         newStateSchema, spark.sessionState, stateSchemaVersion = stateSchemaVersion,
-        schemaFilePath = schemaFilePath, extraOptions = extraOptions)
+        oldSchemaFilePath = schemaFilePath,
+        newSchemaFilePath = getNewSchemaPath(stateSchemaDir, stateSchemaVersion),
+        extraOptions = extraOptions)
     }
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
index 3127c9f602492..947fccdfce72c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreRDDSuite.scala
@@ -234,7 +234,7 @@ class StateStoreRDDSuite extends SparkFunSuite with BeforeAndAfter {
       path: String,
       queryRunId: UUID = UUID.randomUUID,
       version: Int = 0): StatefulOperatorStateInfo = {
-    StatefulOperatorStateInfo(path, queryRunId, operatorId = 0, version, numPartitions = 5)
+    StatefulOperatorStateInfo(path, queryRunId, operatorId = 0, version, numPartitions = 5, None)
   }
 
   private val increment = (store: StateStore, iter: Iterator[(String, Int)]) => {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
index 46b971f7efe47..47dd77f1bb9fd 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StateStoreSuite.scala
@@ -50,6 +50,38 @@ import org.apache.spark.tags.ExtendedSQLTest
 import org.apache.spark.unsafe.types.UTF8String
 import org.apache.spark.util.Utils
 
+// MaintenanceErrorOnCertainPartitionsProvider is a test-only provider that throws an
+// exception during maintenance for partitions 0 and 1 (these are arbitrary choices). It is
+// used to test that an exception in a single provider's maintenance does not affect other
+// providers that do not experience exceptions.
+class MaintenanceErrorOnCertainPartitionsProvider extends HDFSBackedStateStoreProvider {
+  private var id: StateStoreId = null
+
+  override def init(
+      stateStoreId: StateStoreId,
+      keySchema: StructType,
+      valueSchema: StructType,
+      keyStateEncoderSpec: KeyStateEncoderSpec,
+      useColumnFamilies: Boolean,
+      storeConfs: StateStoreConf,
+      hadoopConf: Configuration,
+      useMultipleValuesPerKey: Boolean = false): Unit = {
+    id = stateStoreId
+
+    super.init(
+      stateStoreId,
+      keySchema, valueSchema, keyStateEncoderSpec, useColumnFamilies,
+      storeConfs, hadoopConf, useMultipleValuesPerKey)
+  }
+
+  override def doMaintenance(): Unit = {
+    if (id.partitionId == 0 || id.partitionId == 1) {
+      throw new RuntimeException("Intentional maintenance failure")
+    }
+    super.doMaintenance()
+  }
+}
+
 class FakeStateStoreProviderWithMaintenanceError extends StateStoreProvider {
   import FakeStateStoreProviderWithMaintenanceError._
   private var id: StateStoreId = null
@@ -76,7 +108,7 @@ class FakeStateStoreProviderWithMaintenanceError extends StateStoreProvider {
 
   override def close(): Unit = {}
 
-  override def getStore(version: Long): StateStore = null
+  override def getStore(version: Long, uniqueId: Option[String]): StateStore = null
 
   override def doMaintenance(): Unit = {
     Thread.currentThread.setUncaughtExceptionHandler(exceptionHandler)
@@ -143,7 +175,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
       checkError(
         ex,
-        errorClass = "UNSUPPORTED_FEATURE.STATE_STORE_MULTIPLE_COLUMN_FAMILIES",
+        condition = "UNSUPPORTED_FEATURE.STATE_STORE_MULTIPLE_COLUMN_FAMILIES",
         parameters = Map(
           "stateStoreProvider" -> "HDFSBackedStateStoreProvider"
         ),
@@ -155,7 +187,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
       checkError(
         ex,
-        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION",
+        condition = "STATE_STORE_UNSUPPORTED_OPERATION",
         parameters = Map(
           "operationType" -> operationName,
           "entity" -> "HDFSBackedStateStoreProvider"
@@ -209,7 +241,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
     }
     checkError(
       ex,
-      errorClass = "STATE_STORE_UNSUPPORTED_OPERATION",
+      condition = "STATE_STORE_UNSUPPORTED_OPERATION",
       parameters = Map(
         "operationType" -> "Range scan",
         "entity" -> "HDFSBackedStateStoreProvider"
@@ -341,7 +373,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
       checkError(
         e,
-        errorClass = "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED",
+        condition = "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED",
         parameters = Map.empty
       )
 
@@ -353,7 +385,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
       checkError(
         e,
-        errorClass = "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED",
+        condition = "CANNOT_LOAD_STATE_STORE.UNCATEGORIZED",
         parameters = Map.empty
       )
 
@@ -364,7 +396,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       }
       checkError(
         e,
-        errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
+        condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
         parameters = Map(
           "fileToRead" -> s"${provider.stateStoreId.storeCheckpointLocation()}/1.delta",
           "clazz" -> s"${provider.toString()}"
@@ -451,7 +483,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       for (i <- 1 to 20) {
         val store = StateStore.get(storeProviderId1, keySchema, valueSchema,
           NoPrefixKeyStateEncoderSpec(keySchema),
-          latestStoreVersion, useColumnFamilies = false, storeConf, hadoopConf)
+          latestStoreVersion, None, useColumnFamilies = false, storeConf, hadoopConf)
         put(store, "a", 0, i)
         store.commit()
         latestStoreVersion += 1
@@ -506,7 +538,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
           // Reload the store and verify
           StateStore.get(storeProviderId1, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            latestStoreVersion, useColumnFamilies = false, storeConf, hadoopConf)
+            latestStoreVersion, None, useColumnFamilies = false, storeConf, hadoopConf)
           assert(StateStore.isLoaded(storeProviderId1))
 
           // If some other executor loads the store, then this instance should be unloaded
@@ -519,7 +551,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
           // Reload the store and verify
           StateStore.get(storeProviderId1, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            latestStoreVersion, useColumnFamilies = false, storeConf, hadoopConf)
+            latestStoreVersion, None, useColumnFamilies = false, storeConf, hadoopConf)
           assert(StateStore.isLoaded(storeProviderId1))
 
           // If some other executor loads the store, and when this executor loads other store,
@@ -528,7 +560,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
             .reportActiveInstance(storeProviderId1, "other-host", "other-exec", Seq.empty)
           StateStore.get(storeProviderId2, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            0, useColumnFamilies = false, storeConf, hadoopConf)
+            0, None, useColumnFamilies = false, storeConf, hadoopConf)
           assert(!StateStore.isLoaded(storeProviderId1))
           assert(StateStore.isLoaded(storeProviderId2))
         }
@@ -565,7 +597,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       for (i <- 1 to 20) {
         val store = StateStore.get(storeProviderId1, keySchema, valueSchema,
           NoPrefixKeyStateEncoderSpec(keySchema),
-          latestStoreVersion, useColumnFamilies = false, storeConf, hadoopConf)
+          latestStoreVersion, None, useColumnFamilies = false, storeConf, hadoopConf)
         put(store, "a", 0, i)
         store.commit()
         latestStoreVersion += 1
@@ -699,7 +731,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       StateStore.get(
         storeId, keySchema, valueSchema,
         NoPrefixKeyStateEncoderSpec(keySchema),
-        version = 0, useColumnFamilies = false, storeConf, hadoopConf)
+        version = 0, None, useColumnFamilies = false, storeConf, hadoopConf)
     }
 
     // Put should create a temp file
@@ -717,7 +749,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       StateStore.get(
         storeId, keySchema, valueSchema,
         NoPrefixKeyStateEncoderSpec(keySchema),
-        version = 1, useColumnFamilies = false, storeConf, hadoopConf)
+        version = 1, None, useColumnFamilies = false, storeConf, hadoopConf)
     }
     remove(store1, _._1 == "a")
     assert(numTempFiles === 1)
@@ -733,7 +765,7 @@ class StateStoreSuite extends StateStoreSuiteBase[HDFSBackedStateStoreProvider]
       StateStore.get(
         storeId, keySchema, valueSchema,
         NoPrefixKeyStateEncoderSpec(keySchema),
-        version = 2, useColumnFamilies = false, storeConf, hadoopConf)
+        version = 2, None, useColumnFamilies = false, storeConf, hadoopConf)
     }
     store2.commit()
     assert(numTempFiles === 0)
@@ -1241,21 +1273,21 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
         if (version < 0) {
           checkError(
             e,
-            errorClass = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+            condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
             parameters = Map("version" -> version.toString)
           )
         } else {
           if (isHDFSBackedStoreProvider) {
             checkError(
               e,
-              errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
+              condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
               parameters = Map("fileToRead" -> ".*", "clazz" -> ".*"),
               matchPVals = true
             )
           } else {
             checkError(
               e,
-              errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
+              condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_STREAMING_STATE_FILE",
               parameters = Map("fileToRead" -> ".*"),
               matchPVals = true
             )
@@ -1341,7 +1373,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
       put(store, "a", 0, 0)
       val e = intercept[SparkException](quietly { store.commit() } )
 
-      assert(e.getErrorClass == "CANNOT_WRITE_STATE_STORE.CANNOT_COMMIT")
+      assert(e.getCondition == "CANNOT_WRITE_STATE_STORE.CANNOT_COMMIT")
       if (store.getClass.getName contains ROCKSDB_STATE_STORE) {
         assert(e.getMessage contains "RocksDBStateStore[id=(op=0,part=0)")
       } else {
@@ -1441,12 +1473,12 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
           var e = intercept[SparkException] {
             StateStore.get(
               storeId, keySchema, valueSchema,
-                NoPrefixKeyStateEncoderSpec(keySchema), -1, useColumnFamilies = false,
+                NoPrefixKeyStateEncoderSpec(keySchema), -1, None, useColumnFamilies = false,
                 storeConf, hadoopConf)
           }
           checkError(
             e,
-            errorClass = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
+            condition = "CANNOT_LOAD_STATE_STORE.UNEXPECTED_VERSION",
             parameters = Map(
               "version" -> "-1"
             )
@@ -1456,12 +1488,12 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
             StateStore.get(
               storeId, keySchema, valueSchema,
               NoPrefixKeyStateEncoderSpec(keySchema),
-              1, useColumnFamilies = false,
+              1, None, useColumnFamilies = false,
               storeConf, hadoopConf)
           }
           checkError(
             e,
-            errorClass = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
+            condition = "CANNOT_LOAD_STATE_STORE.CANNOT_READ_DELTA_FILE_NOT_EXISTS",
             parameters = Map(
               "fileToRead" -> s"$dir/0/0/1.delta",
               "clazz" -> "HDFSStateStoreProvider\\[.+\\]"
@@ -1473,7 +1505,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
           val store0 = StateStore.get(
             storeId, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            0, useColumnFamilies = false,
+            0, None, useColumnFamilies = false,
             storeConf, hadoopConf)
           assert(store0.version === 0)
           put(store0, "a", 0, 1)
@@ -1482,7 +1514,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
           val store1 = StateStore.get(
             storeId, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            1, useColumnFamilies = false,
+            1, None, useColumnFamilies = false,
             storeConf, hadoopConf)
           assert(StateStore.isLoaded(storeId))
           assert(store1.version === 1)
@@ -1492,7 +1524,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
           val store0reloaded = StateStore.get(
             storeId, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            0, useColumnFamilies = false,
+            0, None, useColumnFamilies = false,
             storeConf, hadoopConf)
           assert(store0reloaded.version === 0)
           assert(rowPairsToDataSet(store0reloaded.iterator()) === Set.empty)
@@ -1504,7 +1536,7 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
           val store1reloaded = StateStore.get(
             storeId, keySchema, valueSchema,
             NoPrefixKeyStateEncoderSpec(keySchema),
-            1, useColumnFamilies = false,
+            1, None, useColumnFamilies = false,
             storeConf, hadoopConf)
           assert(StateStore.isLoaded(storeId))
           assert(store1reloaded.version === 1)
@@ -1578,42 +1610,58 @@ abstract class StateStoreSuiteBase[ProviderClass <: StateStoreProvider]
     }
   }
 
-  test("SPARK-44438: maintenance task should be shutdown on error") {
-    val conf = new SparkConf()
-      .setMaster("local")
-      .setAppName("test")
+  test("SPARK-48997: maintenance threads with exceptions unload only themselves") {
     val sqlConf = getDefaultSQLConf(
       SQLConf.STATE_STORE_MIN_DELTAS_FOR_SNAPSHOT.defaultValue.get,
       SQLConf.MAX_BATCHES_TO_RETAIN_IN_MEMORY.defaultValue.get
     )
     // Make maintenance interval small so that maintenance task is called right after scheduling.
     sqlConf.setConf(SQLConf.STREAMING_MAINTENANCE_INTERVAL, 100L)
-    // Use the `FakeStateStoreProviderWithMaintenanceError` to run the test
-    sqlConf.setConf(SQLConf.STATE_STORE_PROVIDER_CLASS,
-      classOf[FakeStateStoreProviderWithMaintenanceError].getName)
+    // Use the `MaintenanceErrorOnCertainPartitionsProvider` to run the test
+    sqlConf.setConf(
+      SQLConf.STATE_STORE_PROVIDER_CLASS,
+      classOf[MaintenanceErrorOnCertainPartitionsProvider].getName
+    )
 
-    quietly {
-      withSpark(new SparkContext(conf)) { sc =>
-        withCoordinatorRef(sc) { _ =>
-          FakeStateStoreProviderWithMaintenanceError.errorOnMaintenance.set(false)
-          val storeId = StateStoreProviderId(StateStoreId("firstDir", 0, 1), UUID.randomUUID)
-          val storeConf = StateStoreConf(sqlConf)
-
-          // get the state store and kick off the maintenance task
-          StateStore.get(storeId, null, null,
-            NoPrefixKeyStateEncoderSpec(keySchema), 0, useColumnFamilies = false,
-            storeConf, sc.hadoopConfiguration)
-
-          eventually(timeout(30.seconds)) {
-            assert(!StateStore.isMaintenanceRunning)
-          }
+    val conf = new SparkConf().setMaster("local").setAppName("test")
+
+    withSpark(new SparkContext(conf)) { sc =>
+      withCoordinatorRef(sc) { _ =>
+        val rootLocation = s"${Utils.createTempDir().getAbsolutePath}/spark-48997"
+        // 0 and 1's maintenance will fail
+        val provider0Id =
+          StateStoreProviderId(StateStoreId(rootLocation, 0, 0), UUID.randomUUID)
+        val provider1Id =
+          StateStoreProviderId(StateStoreId(rootLocation, 0, 1), UUID.randomUUID)
+        val provider2Id =
+          StateStoreProviderId(StateStoreId(rootLocation, 0, 2), UUID.randomUUID)
+
+        // Create provider 2 first to start the maintenance task + pool
+        StateStore.get(
+          provider2Id,
+          keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
+          0, None, useColumnFamilies = false, new StateStoreConf(sqlConf), new Configuration()
+        )
 
-          // SPARK-45002: The maintenance task thread failure should not invoke the
-          // SparkUncaughtExceptionHandler which could lead to the executor process
-          // getting killed.
-          assert(!FakeStateStoreProviderWithMaintenanceError.errorOnMaintenance.get)
+        // The following 2 calls to `get` will cause the associated maintenance to fail
+        StateStore.get(
+          provider0Id,
+          keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
+          0, None, useColumnFamilies = false, new StateStoreConf(sqlConf), new Configuration()
+        )
+
+        StateStore.get(
+          provider1Id,
+          keySchema, valueSchema, NoPrefixKeyStateEncoderSpec(keySchema),
+          0, None, useColumnFamilies = false, new StateStoreConf(sqlConf), new Configuration()
+        )
 
-          StateStore.stop()
+        // Wait for the maintenance task for all the providers to run: it should happen relatively
+        // quickly since the maintenance interval is small.
+        eventually(timeout(5.seconds)) {
+          assert(!StateStore.isLoaded(provider0Id))
+          assert(!StateStore.isLoaded(provider1Id))
+          assert(StateStore.isLoaded(provider2Id))
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
index 52bdb0213c7e5..0d74aade67194 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StatefulProcessorHandleSuite.scala
@@ -22,7 +22,6 @@ import java.util.UUID
 
 import org.apache.spark.SparkUnsupportedOperationException
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl, StatefulProcessorHandleState}
 import org.apache.spark.sql.streaming.{TimeMode, TTLConfig}
 
@@ -33,8 +32,7 @@ import org.apache.spark.sql.streaming.{TimeMode, TTLConfig}
  */
 class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
 
-  private def keyExprEncoder: ExpressionEncoder[Any] =
-    Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]]
+  import testImplicits._
 
   private def getTimeMode(timeMode: String): TimeMode = {
     timeMode match {
@@ -50,9 +48,9 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
       tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
         val store = provider.getStore(0)
         val handle = new StatefulProcessorHandleImpl(store,
-          UUID.randomUUID(), keyExprEncoder, getTimeMode(timeMode))
+          UUID.randomUUID(), stringEncoder, getTimeMode(timeMode))
         assert(handle.getHandleState === StatefulProcessorHandleState.CREATED)
-        handle.getValueState[Long]("testState", Encoders.scalaLong)
+        handle.getValueState[Long]("testState", TTLConfig.NONE)
       }
     }
   }
@@ -68,7 +66,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     }
     checkError(
       ex,
-      errorClass = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_HANDLE_STATE",
+      condition = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_HANDLE_STATE",
       parameters = Map(
         "operationType" -> operationType,
         "handleState" -> handleState.toString
@@ -78,7 +76,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
   }
 
   private def createValueStateInstance(handle: StatefulProcessorHandleImpl): Unit = {
-    handle.getValueState[Long]("testState", Encoders.scalaLong)
+    handle.getValueState[Long]("testState", TTLConfig.NONE)
   }
 
   private def registerTimer(handle: StatefulProcessorHandleImpl): Unit = {
@@ -91,7 +89,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
       tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
         val store = provider.getStore(0)
         val handle = new StatefulProcessorHandleImpl(store,
-          UUID.randomUUID(), keyExprEncoder, getTimeMode(timeMode))
+          UUID.randomUUID(), stringEncoder, getTimeMode(timeMode))
 
         Seq(StatefulProcessorHandleState.INITIALIZED,
           StatefulProcessorHandleState.DATA_PROCESSED,
@@ -109,14 +107,14 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), keyExprEncoder, TimeMode.None())
+        UUID.randomUUID(), stringEncoder, TimeMode.None())
       val ex = intercept[SparkUnsupportedOperationException] {
         handle.registerTimer(10000L)
       }
 
       checkError(
         ex,
-        errorClass = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_TIME_MODE",
+        condition = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_TIME_MODE",
         parameters = Map(
           "operationType" -> "register_timer",
           "timeMode" -> TimeMode.None().toString
@@ -130,7 +128,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
 
       checkError(
         ex2,
-        errorClass = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_TIME_MODE",
+        condition = "STATEFUL_PROCESSOR_CANNOT_PERFORM_OPERATION_WITH_INVALID_TIME_MODE",
         parameters = Map(
           "operationType" -> "delete_timer",
           "timeMode" -> TimeMode.None().toString
@@ -145,7 +143,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
       tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
         val store = provider.getStore(0)
         val handle = new StatefulProcessorHandleImpl(store,
-          UUID.randomUUID(), keyExprEncoder, getTimeMode(timeMode))
+          UUID.randomUUID(), stringEncoder, getTimeMode(timeMode))
         handle.setHandleState(StatefulProcessorHandleState.INITIALIZED)
         assert(handle.getHandleState === StatefulProcessorHandleState.INITIALIZED)
 
@@ -166,7 +164,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
       tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
         val store = provider.getStore(0)
         val handle = new StatefulProcessorHandleImpl(store,
-          UUID.randomUUID(), keyExprEncoder, getTimeMode(timeMode))
+          UUID.randomUUID(), stringEncoder, getTimeMode(timeMode))
         handle.setHandleState(StatefulProcessorHandleState.DATA_PROCESSED)
         assert(handle.getHandleState === StatefulProcessorHandleState.DATA_PROCESSED)
 
@@ -206,7 +204,7 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
       tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
         val store = provider.getStore(0)
         val handle = new StatefulProcessorHandleImpl(store,
-          UUID.randomUUID(), keyExprEncoder, getTimeMode(timeMode))
+          UUID.randomUUID(), stringEncoder, getTimeMode(timeMode))
 
         Seq(StatefulProcessorHandleState.CREATED,
           StatefulProcessorHandleState.TIMER_PROCESSED,
@@ -223,14 +221,14 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), keyExprEncoder, TimeMode.ProcessingTime(),
+        UUID.randomUUID(), stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(10))
 
-      val valueStateWithTTL = handle.getValueState("testState",
-        Encoders.STRING, TTLConfig(Duration.ofHours(1)))
+      val valueStateWithTTL = handle.getValueState[String]("testState",
+        TTLConfig(Duration.ofHours(1)))
 
       // create another state without TTL, this should not be captured in the handle
-      handle.getValueState("testState", Encoders.STRING)
+      handle.getValueState[String]("testState", TTLConfig.NONE)
 
       assert(handle.ttlStates.size() === 1)
       assert(handle.ttlStates.get(0) === valueStateWithTTL)
@@ -241,14 +239,14 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), keyExprEncoder, TimeMode.ProcessingTime(),
+        UUID.randomUUID(), stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(10))
 
       val listStateWithTTL = handle.getListState("testState",
         Encoders.STRING, TTLConfig(Duration.ofHours(1)))
 
       // create another state without TTL, this should not be captured in the handle
-      handle.getListState("testState", Encoders.STRING)
+      handle.getListState("testState", Encoders.STRING, TTLConfig.NONE)
 
       assert(handle.ttlStates.size() === 1)
       assert(handle.ttlStates.get(0) === listStateWithTTL)
@@ -259,14 +257,15 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), keyExprEncoder, TimeMode.ProcessingTime(),
+        UUID.randomUUID(), stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(10))
 
       val mapStateWithTTL = handle.getMapState("testState",
         Encoders.STRING, Encoders.STRING, TTLConfig(Duration.ofHours(1)))
 
       // create another state without TTL, this should not be captured in the handle
-      handle.getMapState("testState", Encoders.STRING, Encoders.STRING)
+      handle.getMapState("testState", Encoders.STRING, Encoders.STRING,
+        TTLConfig.NONE)
 
       assert(handle.ttlStates.size() === 1)
       assert(handle.ttlStates.get(0) === mapStateWithTTL)
@@ -277,11 +276,12 @@ class StatefulProcessorHandleSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), keyExprEncoder, TimeMode.None())
+        UUID.randomUUID(), stringEncoder, TimeMode.None())
 
-      handle.getValueState("testValueState", Encoders.STRING)
-      handle.getListState("testListState", Encoders.STRING)
-      handle.getMapState("testMapState", Encoders.STRING, Encoders.STRING)
+      handle.getValueState("testValueState", Encoders.STRING, TTLConfig.NONE)
+      handle.getListState("testListState", Encoders.STRING, TTLConfig.NONE)
+      handle.getMapState("testMapState", Encoders.STRING, Encoders.STRING,
+        TTLConfig.NONE)
 
       assert(handle.ttlStates.isEmpty)
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManagerSuite.scala
index 1607d7e699d5d..a28366fbaa147 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/StreamingSessionWindowStateManagerSuite.scala
@@ -173,7 +173,8 @@ class StreamingSessionWindowStateManagerSuite extends StreamTest with BeforeAndA
       f: (StreamingSessionWindowStateManager, StateStore) => Unit): Unit = {
     withTempDir { file =>
       val storeConf = new StateStoreConf()
-      val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
+      val stateInfo = StatefulOperatorStateInfo(
+        file.getAbsolutePath, UUID.randomUUID, 0, 0, 5, None)
 
       val manager = StreamingSessionWindowStateManager.createStateManager(
         keysWithoutSessionAttributes,
@@ -185,7 +186,7 @@ class StreamingSessionWindowStateManagerSuite extends StreamTest with BeforeAndA
       val store = StateStore.get(
         storeProviderId, manager.getStateKeySchema, manager.getStateValueSchema,
         PrefixKeyScanStateEncoderSpec(manager.getStateKeySchema, manager.getNumColsForPrefixKey),
-        stateInfo.storeVersion, useColumnFamilies = false, storeConf, new Configuration)
+        stateInfo.storeVersion, None, useColumnFamilies = false, storeConf, new Configuration)
 
       try {
         f(manager, store)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
index 16f3e972c7697..96be142cfd5a5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/SymmetricHashJoinStateManagerSuite.scala
@@ -318,10 +318,11 @@ class SymmetricHashJoinStateManagerSuite extends StreamTest with BeforeAndAfter
       withSQLConf(SQLConf.STATE_STORE_SKIP_NULLS_FOR_STREAM_STREAM_JOINS.key ->
         skipNullsForStreamStreamJoins.toString) {
         val storeConf = new StateStoreConf(spark.sessionState.conf)
-        val stateInfo = StatefulOperatorStateInfo(file.getAbsolutePath, UUID.randomUUID, 0, 0, 5)
+        val stateInfo = StatefulOperatorStateInfo(
+          file.getAbsolutePath, UUID.randomUUID, 0, 0, 5, None)
         val manager = new SymmetricHashJoinStateManager(
           LeftSide, inputValueAttribs, joinKeyExprs, Some(stateInfo), storeConf, new Configuration,
-          partitionId = 0, stateFormatVersion, metric)
+          partitionId = 0, None, None, stateFormatVersion, metric)
         try {
           f(manager)
         } finally {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
index df6a3fd7b23e5..428845d5ebcbb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/TimerSuite.scala
@@ -18,7 +18,7 @@
 package org.apache.spark.sql.execution.streaming.state
 
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, TimerStateImpl}
 import org.apache.spark.sql.streaming.TimeMode
 
@@ -45,7 +45,7 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       val timerState = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       timerState.registerTimer(1L * 1000)
       assert(timerState.listTimers().toSet === Set(1000L))
       assert(timerState.getExpiredTimers(Long.MaxValue).toSeq === Seq(("test_key", 1000L)))
@@ -64,16 +64,17 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       val timerState1 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       val timerState2 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       timerState1.registerTimer(1L * 1000)
       timerState2.registerTimer(15L * 1000)
       assert(timerState1.listTimers().toSet === Set(15000L, 1000L))
       assert(timerState1.getExpiredTimers(Long.MaxValue).toSeq ===
         Seq(("test_key", 1000L), ("test_key", 15000L)))
-      // if timestamp equals to expiryTimestampsMs, will not considered expired
-      assert(timerState1.getExpiredTimers(15000L).toSeq === Seq(("test_key", 1000L)))
+      // if timestamp equals to expiryTimestampsMs, it will be considered expired
+      assert(timerState1.getExpiredTimers(15000L).toSeq ===
+        Seq(("test_key", 1000L), ("test_key", 15000L)))
       assert(timerState1.listTimers().toSet === Set(15000L, 1000L))
 
       timerState1.registerTimer(20L * 1000)
@@ -89,7 +90,7 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key1")
       val timerState1 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       timerState1.registerTimer(1L * 1000)
       timerState1.registerTimer(2L * 1000)
       assert(timerState1.listTimers().toSet === Set(1000L, 2000L))
@@ -97,7 +98,7 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key2")
       val timerState2 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       timerState2.registerTimer(15L * 1000)
       ImplicitGroupingKeyTracker.removeImplicitKey()
 
@@ -122,13 +123,13 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       val timerState = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       val timerTimerstamps = Seq(931L, 8000L, 452300L, 4200L, 90L, 1L, 2L, 8L, 3L, 35L, 6L, 9L, 5L)
       // register/put unordered timestamp into rocksDB
       timerTimerstamps.foreach(timerState.registerTimer)
       assert(timerState.getExpiredTimers(Long.MaxValue).toSeq.map(_._2) === timerTimerstamps.sorted)
       assert(timerState.getExpiredTimers(4200L).toSeq.map(_._2) ===
-        timerTimerstamps.sorted.takeWhile(_ < 4200L))
+        timerTimerstamps.sorted.takeWhile(_ <= 4200L))
       assert(timerState.getExpiredTimers(Long.MinValue).toSeq === Seq.empty)
       ImplicitGroupingKeyTracker.removeImplicitKey()
     }
@@ -141,19 +142,19 @@ class TimerSuite extends StateVariableSuiteBase {
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key1")
       val timerState1 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       val timerTimestamps1 = Seq(64L, 32L, 1024L, 4096L, 0L, 1L)
       timerTimestamps1.foreach(timerState1.registerTimer)
 
       val timerState2 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       val timerTimestamps2 = Seq(931L, 8000L, 452300L, 4200L)
       timerTimestamps2.foreach(timerState2.registerTimer)
       ImplicitGroupingKeyTracker.removeImplicitKey()
 
       ImplicitGroupingKeyTracker.setImplicitKey("test_key3")
       val timerState3 = new TimerStateImpl(store, timeMode,
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]])
+        stringEncoder)
       val timerTimerStamps3 = Seq(1L, 2L, 8L, 3L)
       timerTimerStamps3.foreach(timerState3.registerTimer)
       ImplicitGroupingKeyTracker.removeImplicitKey()
@@ -162,7 +163,7 @@ class TimerSuite extends StateVariableSuiteBase {
         (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted)
       assert(timerState1.getExpiredTimers(Long.MinValue).toSeq === Seq.empty)
       assert(timerState1.getExpiredTimers(8000L).toSeq.map(_._2) ===
-        (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted.takeWhile(_ < 8000L))
+        (timerTimestamps1 ++ timerTimestamps2 ++ timerTimerStamps3).sorted.takeWhile(_ <= 8000L))
     }
   }
 
@@ -171,7 +172,7 @@ class TimerSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       ImplicitGroupingKeyTracker.setImplicitKey(TestClass(1L, "k1"))
       val timerState = new TimerStateImpl(store, timeMode,
-        Encoders.product[TestClass].asInstanceOf[ExpressionEncoder[Any]])
+        encoderFor(Encoders.product[TestClass]).asInstanceOf[ExpressionEncoder[Any]])
 
       timerState.registerTimer(1L * 1000)
       assert(timerState.listTimers().toSet === Set(1000L))
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
index 7d5b3e4a6bdc9..037fed045e8ca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/state/ValueStateSuite.scala
@@ -27,7 +27,7 @@ import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException}
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, ExpressionEncoder}
 import org.apache.spark.sql.execution.streaming.{ImplicitGroupingKeyTracker, StatefulProcessorHandleImpl, ValueStateImplWithTTL}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.{TimeMode, TTLConfig, ValueState}
@@ -44,15 +44,17 @@ case class TestClass(var id: Long, var name: String)
 class ValueStateSuite extends StateVariableSuiteBase {
 
   import StateStoreTestsHelper._
+  import testImplicits._
 
   test("Implicit key operations") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val stateName = "testState"
-      val testState: ValueState[Long] = handle.getValueState[Long]("testState", Encoders.scalaLong)
+      val testState: ValueState[Long] = handle.getValueState[Long]("testState",
+        TTLConfig.NONE)
       assert(ImplicitGroupingKeyTracker.getImplicitKeyOption.isEmpty)
       val ex = intercept[Exception] {
         testState.update(123)
@@ -61,7 +63,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
       assert(ex.isInstanceOf[SparkException])
       checkError(
         ex.asInstanceOf[SparkException],
-        errorClass = "INTERNAL_ERROR_TWS",
+        condition = "INTERNAL_ERROR_TWS",
         parameters = Map(
           "message" -> s"Implicit key not found in state store for stateName=$stateName"
         ),
@@ -80,7 +82,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
       }
       checkError(
         ex1.asInstanceOf[SparkException],
-        errorClass = "INTERNAL_ERROR_TWS",
+        condition = "INTERNAL_ERROR_TWS",
         parameters = Map(
           "message" -> s"Implicit key not found in state store for stateName=$stateName"
         ),
@@ -93,9 +95,10 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
-      val testState: ValueState[Long] = handle.getValueState[Long]("testState", Encoders.scalaLong)
+      val testState: ValueState[Long] = handle.getValueState[Long]("testState",
+        TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState.update(123)
       assert(testState.get() === 123)
@@ -119,12 +122,12 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState1: ValueState[Long] = handle.getValueState[Long](
-        "testState1", Encoders.scalaLong)
+        "testState1", TTLConfig.NONE)
       val testState2: ValueState[Long] = handle.getValueState[Long](
-        "testState2", Encoders.scalaLong)
+        "testState2", TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState1.update(123)
       assert(testState1.get() === 123)
@@ -164,19 +167,19 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store,
-        UUID.randomUUID(), Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        UUID.randomUUID(), stringEncoder, TimeMode.None())
 
-      val cfName = "_testState"
+      val cfName = "$testState"
       val ex = intercept[SparkUnsupportedOperationException] {
-        handle.getValueState[Long](cfName, Encoders.scalaLong)
+        handle.getValueState[Long](cfName, TTLConfig.NONE)
       }
       checkError(
         ex,
-        errorClass = "STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS",
+        condition = "STATE_STORE_CANNOT_CREATE_COLUMN_FAMILY_WITH_RESERVED_CHARS",
         parameters = Map(
           "colFamilyName" -> cfName
         ),
-        matchPVals = true
+        matchPVals = false
       )
     }
   }
@@ -192,7 +195,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
     }
     checkError(
       ex,
-      errorClass = "UNSUPPORTED_FEATURE.STATE_STORE_MULTIPLE_COLUMN_FAMILIES",
+      condition = "UNSUPPORTED_FEATURE.STATE_STORE_MULTIPLE_COLUMN_FAMILIES",
       parameters = Map(
         "stateStoreProvider" -> "HDFSBackedStateStoreProvider"
       ),
@@ -204,10 +207,10 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState: ValueState[Double] = handle.getValueState[Double]("testState",
-        Encoders.scalaDouble)
+        Encoders.scalaDouble, TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState.update(1.0)
       assert(testState.get().equals(1.0))
@@ -230,10 +233,10 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState: ValueState[Long] = handle.getValueState[Long]("testState",
-        Encoders.scalaLong)
+        TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState.update(1L)
       assert(testState.get().equals(1L))
@@ -256,10 +259,10 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState: ValueState[TestClass] = handle.getValueState[TestClass]("testState",
-        Encoders.product[TestClass])
+        TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState.update(TestClass(1, "testcase1"))
       assert(testState.get().equals(TestClass(1, "testcase1")))
@@ -282,10 +285,10 @@ class ValueStateSuite extends StateVariableSuiteBase {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.None())
+        stringEncoder, TimeMode.None())
 
       val testState: ValueState[POJOTestClass] = handle.getValueState[POJOTestClass]("testState",
-        Encoders.bean(classOf[POJOTestClass]))
+         Encoders.bean(classOf[POJOTestClass]), TTLConfig.NONE)
       ImplicitGroupingKeyTracker.setImplicitKey("test_key")
       testState.update(new POJOTestClass("testcase1", 1))
       assert(testState.get().equals(new POJOTestClass("testcase1", 1)))
@@ -304,13 +307,12 @@ class ValueStateSuite extends StateVariableSuiteBase {
     }
   }
 
-
   test(s"test Value state TTL") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]], TimeMode.ProcessingTime(),
+        stringEncoder, TimeMode.ProcessingTime(),
         batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
@@ -325,12 +327,12 @@ class ValueStateSuite extends StateVariableSuiteBase {
       var ttlValue = testState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      var ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      var ttlStateValueIterator = testState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
 
       // increment batchProcessingTime, or watermark and ensure expired value is not returned
       val nextBatchHandle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(ttlExpirationMs))
 
       val nextBatchTestState: ValueStateImplWithTTL[String] =
@@ -347,10 +349,9 @@ class ValueStateSuite extends StateVariableSuiteBase {
       ttlValue = nextBatchTestState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      ttlStateValueIterator = nextBatchTestState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
-      assert(ttlStateValueIterator.next() === ttlExpirationMs)
-      assert(ttlStateValueIterator.isEmpty)
+      ttlStateValueIterator = nextBatchTestState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
+      assert(ttlStateValueIterator.get === ttlExpirationMs)
 
       // getWithoutTTL should still return the expired value
       assert(nextBatchTestState.getWithoutEnforcingTTL().get === "v1")
@@ -361,15 +362,15 @@ class ValueStateSuite extends StateVariableSuiteBase {
     }
   }
 
-  test("test negative or zero TTL duration throws error") {
+  test("test null or zero TTL duration throws error") {
     tryWithProviderResource(newStoreProviderWithStateVariable(true)) { provider =>
       val store = provider.getStore(0)
       val batchTimestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.STRING.asInstanceOf[ExpressionEncoder[Any]],
+        stringEncoder,
         TimeMode.ProcessingTime(), batchTimestampMs = Some(batchTimestampMs))
 
-      Seq(null, Duration.ZERO, Duration.ofMinutes(-1)).foreach { ttlDuration =>
+      Seq(null, Duration.ofMinutes(-1)).foreach { ttlDuration =>
         val ttlConfig = TTLConfig(ttlDuration)
         val ex = intercept[SparkUnsupportedOperationException] {
           handle.getValueState[String]("testState", Encoders.STRING, ttlConfig)
@@ -377,7 +378,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
 
         checkError(
           ex,
-          errorClass = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
+          condition = "STATEFUL_PROCESSOR_TTL_DURATION_MUST_BE_POSITIVE",
           parameters = Map(
             "operationType" -> "update",
             "stateName" -> "testState"
@@ -393,8 +394,8 @@ class ValueStateSuite extends StateVariableSuiteBase {
       val store = provider.getStore(0)
       val timestampMs = 10
       val handle = new StatefulProcessorHandleImpl(store, UUID.randomUUID(),
-        Encoders.product[TestClass].asInstanceOf[ExpressionEncoder[Any]], TimeMode.ProcessingTime(),
-        batchTimestampMs = Some(timestampMs))
+        encoderFor(Encoders.product[TestClass]).asInstanceOf[ExpressionEncoder[Any]],
+        TimeMode.ProcessingTime(), batchTimestampMs = Some(timestampMs))
 
       val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
       val testState: ValueStateImplWithTTL[POJOTestClass] =
@@ -410,8 +411,8 @@ class ValueStateSuite extends StateVariableSuiteBase {
       val ttlValue = testState.getTTLValue()
       assert(ttlValue.isDefined)
       assert(ttlValue.get._2 === ttlExpirationMs)
-      val ttlStateValueIterator = testState.getValuesInTTLState()
-      assert(ttlStateValueIterator.hasNext)
+      val ttlStateValueIterator = testState.getValueInTTLState()
+      assert(ttlStateValueIterator.isDefined)
     }
   }
 }
@@ -421,7 +422,7 @@ class ValueStateSuite extends StateVariableSuiteBase {
  * types (ValueState, ListState, MapState) used in arbitrary stateful operators.
  */
 abstract class StateVariableSuiteBase extends SharedSparkSession
-  with BeforeAndAfter {
+  with BeforeAndAfter with AlsoTestWithEncodingTypes {
 
   before {
     StateStore.stop()
@@ -437,6 +438,8 @@ abstract class StateVariableSuiteBase extends SharedSparkSession
 
   import StateStoreTestsHelper._
 
+  protected val stringEncoder = encoderFor(Encoders.STRING).asInstanceOf[ExpressionEncoder[Any]]
+
   // dummy schema for initializing rocksdb provider
   protected def schemaForKeyRow: StructType = new StructType().add("key", BinaryType)
   protected def schemaForValueRow: StructType = new StructType().add("value", BinaryType)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/UISeleniumSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/UISeleniumSuite.scala
index 111e233c04e32..8c10d646e935c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/UISeleniumSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/UISeleniumSuite.scala
@@ -127,7 +127,7 @@ class UISeleniumSuite extends SparkFunSuite with WebBrowser {
       exception = intercept[SparkRuntimeException] {
         spark.sql(s"SELECT raise_error('$errorMsg')").collect()
       },
-      errorClass = "USER_RAISED_EXCEPTION",
+      condition = "USER_RAISED_EXCEPTION",
       parameters = Map("errorMessage" -> escape))
     eventually(timeout(10.seconds), interval(100.milliseconds)) {
       val summary = findErrorSummaryOnSQLUI()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
index aca968745d198..0cc4f7bf2548e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnVectorSuite.scala
@@ -504,6 +504,12 @@ class ColumnVectorSuite extends SparkFunSuite with SQLHelper {
     val arr = new ColumnarArray(testVector, 0, testVector.capacity)
     assert(arr.toSeq(testVector.dataType) == expected)
     assert(arr.copy().toSeq(testVector.dataType) == expected)
+
+    if (expected.nonEmpty) {
+      val withOffset = new ColumnarArray(testVector, 1, testVector.capacity - 1)
+      assert(withOffset.toSeq(testVector.dataType) == expected.tail)
+      assert(withOffset.copy().toSeq(testVector.dataType) == expected.tail)
+    }
   }
 
   testVectors("getInts with dictionary and nulls", 3, IntegerType) { testVector =>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
index bf5d1b24af219..a6fc43aa087da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala
@@ -67,7 +67,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
         new ExpressionInfo(
           "testClass", null, "testName", null, "", "", "", invalidGroupName, "", "", "")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3202",
+      condition = "_LEGACY_ERROR_TEMP_3202",
       parameters = Map(
         "exprName" -> "testName",
         "group" -> invalidGroupName,
@@ -78,7 +78,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
     val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum"))
     assert(info.getSource === "built-in")
 
-    val validSources = Seq("built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf")
+    val validSources = Seq(
+      "built-in", "hive", "python_udf", "scala_udf", "java_udf", "python_udtf", "internal")
     validSources.foreach { source =>
       val info = new ExpressionInfo(
         "testClass", null, "testName", null, "", "", "", "", "", "", source)
@@ -90,7 +91,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
         new ExpressionInfo(
           "testClass", null, "testName", null, "", "", "", "", "", "", invalidSource)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3203",
+      condition = "_LEGACY_ERROR_TEMP_3203",
       parameters = Map(
         "exprName" -> "testName",
         "source" -> invalidSource,
@@ -103,7 +104,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       exception = intercept[SparkIllegalArgumentException] {
         new ExpressionInfo("testClass", null, "testName", null, "", "", invalidNote, "", "", "", "")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3201",
+      condition = "_LEGACY_ERROR_TEMP_3201",
       parameters = Map("exprName" -> "testName", "note" -> invalidNote))
 
     val invalidSince = "-3.0.0"
@@ -112,7 +113,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
         new ExpressionInfo(
           "testClass", null, "testName", null, "", "", "", "", invalidSince, "", "")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3204",
+      condition = "_LEGACY_ERROR_TEMP_3204",
       parameters = Map("since" -> invalidSince, "exprName" -> "testName"))
 
     val invalidDeprecated = "  invalid deprecated"
@@ -121,7 +122,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
         new ExpressionInfo(
           "testClass", null, "testName", null, "", "", "", "", "", invalidDeprecated, "")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3205",
+      condition = "_LEGACY_ERROR_TEMP_3205",
       parameters = Map("exprName" -> "testName", "deprecated" -> invalidDeprecated))
   }
 
@@ -228,6 +229,8 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Requires dynamic class loading not available in this test suite.
       "org.apache.spark.sql.catalyst.expressions.FromAvro",
       "org.apache.spark.sql.catalyst.expressions.ToAvro",
+      "org.apache.spark.sql.catalyst.expressions.FromProtobuf",
+      "org.apache.spark.sql.catalyst.expressions.ToProtobuf",
       classOf[CurrentUser].getName,
       // The encrypt expression includes a random initialization vector to its encrypted result
       classOf[AesEncrypt].getName)
@@ -240,7 +243,7 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       // Examples can change settings. We clone the session to prevent tests clashing.
       val clonedSpark = spark.cloneSession()
       // Coalescing partitions can change result order, so disable it.
-      clonedSpark.conf.set(SQLConf.COALESCE_PARTITIONS_ENABLED, false)
+      clonedSpark.conf.set(SQLConf.COALESCE_PARTITIONS_ENABLED.key, false)
       val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId)
       val className = info.getClassName
       if (!ignoreSet.contains(className)) {
@@ -285,15 +288,16 @@ class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession {
       candidateExprsToCheck.filter(superClass.isAssignableFrom).foreach { clazz =>
         val isEvalOverrode = clazz.getMethod("eval", classOf[InternalRow]) !=
           superClass.getMethod("eval", classOf[InternalRow])
-        val isNullIntolerantMixedIn = classOf[NullIntolerant].isAssignableFrom(clazz)
-        if (isEvalOverrode && isNullIntolerantMixedIn) {
-          fail(s"${clazz.getName} should not extend ${classOf[NullIntolerant].getSimpleName}, " +
+        val isNullIntolerantOverridden = clazz.getMethod("nullIntolerant") !=
+          classOf[Expression].getMethod("nullIntolerant")
+        if (isEvalOverrode && isNullIntolerantOverridden) {
+          fail(s"${clazz.getName} should not override nullIntolerant, " +
             s"or add ${clazz.getName} in the ignoreSet of this test.")
-        } else if (!isEvalOverrode && !isNullIntolerantMixedIn) {
-          fail(s"${clazz.getName} should extend ${classOf[NullIntolerant].getSimpleName}.")
+        } else if (!isEvalOverrode && !isNullIntolerantOverridden) {
+          fail(s"${clazz.getName} should override nullIntolerant.")
         } else {
-          assert((!isEvalOverrode && isNullIntolerantMixedIn) ||
-            (isEvalOverrode && !isNullIntolerantMixedIn))
+          assert((!isEvalOverrode && isNullIntolerantOverridden) ||
+            (isEvalOverrode && !isNullIntolerantOverridden))
         }
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
index 15733d1c8bf64..401b17d2b24a9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/CatalogSuite.scala
@@ -67,6 +67,10 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     sessionCatalog.createTable(utils.newTable(name, db), ignoreIfExists = false)
   }
 
+  private def createClusteredTable(name: String, db: Option[String] = None): Unit = {
+    sessionCatalog.createTable(utils.newTable(name, db, clusterBy = true), ignoreIfExists = false)
+  }
+
   private def createTable(name: String, db: String, catalog: String, source: String,
     schema: StructType, option: Map[String, String], description: String): DataFrame = {
     spark.catalog.createTable(Array(catalog, db, name).mkString("."), source,
@@ -106,13 +110,19 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
       .map { db => spark.catalog.listColumns(db, tableName) }
       .getOrElse { spark.catalog.listColumns(tableName) }
     assert(tableMetadata.schema.nonEmpty, "bad test")
-    assert(tableMetadata.partitionColumnNames.nonEmpty, "bad test")
-    assert(tableMetadata.bucketSpec.isDefined, "bad test")
+    if (tableMetadata.clusterBySpec.isEmpty) {
+      assert(tableMetadata.partitionColumnNames.nonEmpty, "bad test")
+      assert(tableMetadata.bucketSpec.isDefined, "bad test")
+    }
     assert(columns.collect().map(_.name).toSet == tableMetadata.schema.map(_.name).toSet)
     val bucketColumnNames = tableMetadata.bucketSpec.map(_.bucketColumnNames).getOrElse(Nil).toSet
+    val clusteringColumnNames = tableMetadata.clusterBySpec.map { clusterBySpec =>
+      clusterBySpec.columnNames.map(_.toString)
+    }.getOrElse(Nil).toSet
     columns.collect().foreach { col =>
       assert(col.isPartition == tableMetadata.partitionColumnNames.contains(col.name))
       assert(col.isBucket == bucketColumnNames.contains(col.name))
+      assert(col.isCluster == clusteringColumnNames.contains(col.name))
     }
 
     dbName.foreach { db =>
@@ -404,12 +414,18 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     testListColumns("tab1", dbName = Some("db1"))
   }
 
+  test("list columns in clustered table") {
+    createDatabase("db1")
+    createClusteredTable("tab1", Some("db1"))
+    testListColumns("tab1", dbName = Some("db1"))
+  }
+
   test("SPARK-39615: qualified name with catalog - listColumns") {
     val answers = Map(
-      "col1" -> ("int", true, false, true),
-      "col2" -> ("string", true, false, false),
-      "a" -> ("int", true, true, false),
-      "b" -> ("string", true, true, false)
+      "col1" -> ("int", true, false, true, false),
+      "col2" -> ("string", true, false, false, false),
+      "a" -> ("int", true, true, false, false),
+      "b" -> ("string", true, true, false, false)
     )
 
     assert(spark.catalog.currentCatalog() === "spark_catalog")
@@ -417,26 +433,31 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
 
     val columns1 = spark.catalog.listColumns("my_table1").collect()
     assert(answers ===
-      columns1.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+      columns1.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket,
+        c.isCluster)).toMap)
 
     val columns2 = spark.catalog.listColumns("default.my_table1").collect()
     assert(answers ===
-      columns2.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+      columns2.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket,
+        c.isCluster)).toMap)
 
     val columns3 = spark.catalog.listColumns("spark_catalog.default.my_table1").collect()
     assert(answers ===
-      columns3.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+      columns3.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket,
+        c.isCluster)).toMap)
 
     createDatabase("my_db1")
     createTable("my_table2", Some("my_db1"))
 
     val columns4 = spark.catalog.listColumns("my_db1.my_table2").collect()
     assert(answers ===
-      columns4.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+      columns4.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket,
+        c.isCluster)).toMap)
 
     val columns5 = spark.catalog.listColumns("spark_catalog.my_db1.my_table2").collect()
     assert(answers ===
-      columns5.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket)).toMap)
+      columns5.map(c => c.name -> (c.dataType, c.nullable, c.isPartition, c.isBucket,
+        c.isCluster)).toMap)
 
     val catalogName = "testcat"
     val dbName = "my_db2"
@@ -476,13 +497,13 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
 
   test("Column.toString") {
     assert(new Column("namama", "descaca", "datatapa",
-      nullable = true, isPartition = false, isBucket = true).toString ==
+      nullable = true, isPartition = false, isBucket = true, isCluster = false).toString ==
         "Column[name='namama', description='descaca', dataType='datatapa', " +
-          "nullable='true', isPartition='false', isBucket='true']")
+          "nullable='true', isPartition='false', isBucket='true', isCluster='false']")
     assert(new Column("namama", null, "datatapa",
-      nullable = false, isPartition = true, isBucket = true).toString ==
+      nullable = false, isPartition = true, isBucket = true, isCluster = true).toString ==
       "Column[name='namama', dataType='datatapa', " +
-        "nullable='false', isPartition='true', isBucket='true']")
+        "nullable='false', isPartition='true', isBucket='true', isCluster='true']")
   }
 
   test("catalog classes format in Dataset.show") {
@@ -491,7 +512,8 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
       isTemporary = false)
     val function = new Function("nama", "cataloa", Array("databasa"), "descripta", "classa", false)
     val column = new Column(
-      "nama", "descripta", "typa", nullable = false, isPartition = true, isBucket = true)
+      "nama", "descripta", "typa", nullable = false, isPartition = true, isBucket = true,
+      isCluster = true)
     val dbFields = getConstructorParameterValues(db)
     val tableFields = getConstructorParameterValues(table)
     val functionFields = getConstructorParameterValues(function)
@@ -503,7 +525,7 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     assert((functionFields(0), functionFields(1), functionFields(3), functionFields(4),
       functionFields(5)) == ("nama", "cataloa", "descripta", "classa", false))
     assert(functionFields(2).asInstanceOf[Array[String]].sameElements(Array("databasa")))
-    assert(columnFields == Seq("nama", "descripta", "typa", false, true, true))
+    assert(columnFields == Seq("nama", "descripta", "typa", false, true, true, true))
     val dbString = CatalogImpl.makeDataset(Seq(db), spark).showString(10)
     val tableString = CatalogImpl.makeDataset(Seq(table), spark).showString(10)
     val functionString = CatalogImpl.makeDataset(Seq(function), spark).showString(10)
@@ -678,7 +700,8 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
     val description = "this is a test table"
 
     withTable("t") {
-      withTempDir { dir =>
+      withTempDir { baseDir =>
+        val dir = new File(baseDir, "test%prefix")
         spark.catalog.createTable(
           tableName = "t",
           source = "json",
@@ -756,7 +779,7 @@ class CatalogSuite extends SharedSparkSession with AnalysisTest with BeforeAndAf
       exception = intercept[AnalysisException] {
         spark.catalog.recoverPartitions("my_temp_table")
       },
-      errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+      condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
       parameters = Map(
         "viewName" -> "`my_temp_table`",
         "operation" -> "recoverPartitions()")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeSuite.scala
new file mode 100644
index 0000000000000..7bf70695a9854
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeSuite.scala
@@ -0,0 +1,238 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import java.util.concurrent.atomic.AtomicInteger
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.{functions, Dataset}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, ExprId}
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.types.{IntegerType, LongType, Metadata, MetadataBuilder, StringType}
+
+class ColumnNodeSuite extends SparkFunSuite {
+  private val simpleUdf = functions.udf((i: Int) => i + 1)
+
+  test("sql") {
+    testSql(Literal(null), "NULL")
+    testSql(Literal(10), "10")
+    testSql(Literal(33L), "33L")
+    testSql(Literal(55.toShort), "55S")
+    testSql(Literal(3.14), "3.14")
+    testSql(Literal(Double.NaN), "NaN")
+    testSql(Literal(Double.NegativeInfinity), "-Infinity")
+    testSql(Literal(Double.PositiveInfinity), "Infinity")
+    testSql(Literal(3.9f), "3.9")
+    testSql(Literal(Float.NaN), "NaN")
+    testSql(Literal(Float.NegativeInfinity), "-Infinity")
+    testSql(Literal(Float.PositiveInfinity), "Infinity")
+    testSql(Literal("hello"), "'hello'")
+    testSql(Literal("\\_'"), "'\\\\_\\''")
+    testSql(Literal((1, 2)), "(1,2)")
+    testSql(UnresolvedStar(None), "*")
+    testSql(UnresolvedStar(Option("prefix")), "prefix.*")
+    testSql(UnresolvedAttribute("a"), "a")
+    testSql(SqlExpression("1 + 1"), "1 + 1")
+    testSql(Alias(UnresolvedAttribute("b"), Seq("new_b")), "b AS new_b")
+    testSql(Alias(UnresolvedAttribute("c"), Seq("x", "y", "z")), "c AS (x, y, z)")
+    testSql(Cast(UnresolvedAttribute("c"), IntegerType), "CAST(c AS INT)")
+    testSql(Cast(UnresolvedAttribute("d"), StringType, Option(Cast.Try)), "TRY_CAST(d AS STRING)")
+    testSql(
+      SortOrder(attribute("e"), SortOrder.Ascending, SortOrder.NullsLast),
+      "e ASC NULLS LAST")
+    testSql(
+      SortOrder(attribute("f"), SortOrder.Ascending, SortOrder.NullsFirst),
+      "f ASC NULLS FIRST")
+    testSql(
+      SortOrder(attribute("g"), SortOrder.Descending, SortOrder.NullsLast),
+      "g DESC NULLS LAST")
+    testSql(
+      SortOrder(attribute("h"), SortOrder.Descending, SortOrder.NullsFirst),
+      "h DESC NULLS FIRST")
+    testSql(
+      UnresolvedFunction("coalesce", Seq(Literal(null), UnresolvedAttribute("i"))),
+      "coalesce(NULL, i)")
+    val lambdaVariableX = new UnresolvedNamedLambdaVariable("x")
+    val lambdaVariableY = new UnresolvedNamedLambdaVariable("y")
+    testSql(
+      UnresolvedFunction(
+        "transform", Seq(
+          UnresolvedAttribute("input"),
+          LambdaFunction(
+            UnresolvedFunction("adjust", Seq(lambdaVariableX, UnresolvedAttribute("b"))),
+            Seq(lambdaVariableX)))),
+      "transform(input, x -> adjust(x, b))")
+    testSql(
+      UnresolvedFunction(
+        "transform", Seq(
+          UnresolvedAttribute("input"),
+          LambdaFunction(
+            UnresolvedFunction(
+              "combine",
+              Seq(lambdaVariableX, lambdaVariableY, UnresolvedAttribute("b"))),
+            Seq(lambdaVariableX, lambdaVariableY)))),
+      "transform(input, (x, y) -> combine(x, y, b))")
+    testSql(UnresolvedExtractValue(attribute("a", 2), attribute("b", 3)), "a[b]")
+    testSql(UpdateFields(UnresolvedAttribute("struct"), "a"), "drop_field(struct, a)")
+    testSql(
+      UpdateFields(UnresolvedAttribute("struct"), "b", Option(Literal(10.toLong))),
+      "update_field(struct, b, 10L)")
+    testSql(CaseWhenOtherwise(
+      Seq(UnresolvedAttribute("c1") -> UnresolvedAttribute("v1"))),
+      "CASE WHEN c1 THEN v1 END")
+    testSql(CaseWhenOtherwise(
+      Seq(
+        UnresolvedAttribute("c1") -> UnresolvedAttribute("v1"),
+        UnresolvedAttribute("c2") -> UnresolvedAttribute("v2")),
+      Option(Literal(25))),
+      "CASE WHEN c1 THEN v1 WHEN c2 THEN v2 ELSE 25 END")
+    val windowSpec = WindowSpec(
+      Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b")),
+      Seq(
+        SortOrder(attribute("x"), SortOrder.Descending, SortOrder.NullsFirst),
+        SortOrder(attribute("y"), SortOrder.Ascending, SortOrder.NullsFirst)))
+    val reducedWindowSpec = windowSpec.copy(
+      partitionColumns = windowSpec.partitionColumns.take(1),
+      sortColumns = windowSpec.sortColumns.take(1))
+    val window = Window(
+      UnresolvedFunction("sum", Seq(UnresolvedAttribute("i"))),
+      WindowSpec(Nil, Nil))
+    testSql(window, "sum(i) OVER ()")
+    testSql(
+      window.copy(windowSpec = windowSpec.copy(sortColumns = Nil)),
+      "sum(i) OVER (PARTITION BY a, b)")
+    testSql(
+      window.copy(windowSpec = windowSpec.copy(partitionColumns = Nil)),
+      "sum(i) OVER (ORDER BY x DESC NULLS FIRST, y ASC NULLS FIRST)")
+    testSql(
+      window.copy(windowSpec = windowSpec),
+      "sum(i) OVER (PARTITION BY a, b ORDER BY x DESC NULLS FIRST, y ASC NULLS FIRST)")
+    testSql(
+      window.copy(windowSpec = reducedWindowSpec),
+      "sum(i) OVER (PARTITION BY a ORDER BY x DESC NULLS FIRST)")
+    testSql(
+      window.copy(windowSpec = reducedWindowSpec.copy(frame = Option(WindowFrame(
+        WindowFrame.Row,
+        WindowFrame.UnboundedPreceding,
+        WindowFrame.CurrentRow)))),
+      "sum(i) OVER (PARTITION BY a ORDER BY x DESC NULLS FIRST " +
+        "ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)")
+    testSql(
+      window.copy(windowSpec = reducedWindowSpec.copy(frame = Option(WindowFrame(
+        WindowFrame.Range,
+        WindowFrame.Value(Literal(-10)),
+        WindowFrame.UnboundedFollowing)))),
+      "sum(i) OVER (PARTITION BY a ORDER BY x DESC NULLS FIRST " +
+        "RANGE BETWEEN -10 AND UNBOUNDED FOLLOWING)")
+    testSql(InvokeInlineUserDefinedFunction(simpleUdf, Seq(UnresolvedAttribute("x"))), "UDF(x)")
+    testSql(
+      InvokeInlineUserDefinedFunction(simpleUdf.withName("smple"), Seq(UnresolvedAttribute("x"))),
+      "smple(x)")
+  }
+
+  private def testSql(node: ColumnNode, expectedSql: String): Unit = {
+    assert(node.sql == expectedSql)
+  }
+
+  test("normalization") {
+    testNormalization(Literal(1))
+    testNormalization(UnresolvedStar(Option("a.b"), planId = planId()))
+    testNormalization(UnresolvedAttribute("x", planId = planId()))
+    testNormalization(UnresolvedRegex(".*", planId = planId()))
+    testNormalization(SqlExpression("1 + 1"))
+    testNormalization(attribute("a"))
+    testNormalization(Alias(attribute("a"), Seq("aa"), None))
+    testNormalization(Cast(attribute("b"), IntegerType, Some(Cast.Try)))
+    testNormalization(SortOrder(attribute("c"), SortOrder.Ascending, SortOrder.NullsLast))
+    val lambdaVariable = UnresolvedNamedLambdaVariable("x")
+    testNormalization(
+      UnresolvedFunction(
+        "transform", Seq(
+          attribute("input", 331),
+          LambdaFunction(
+            UnresolvedFunction("adjust", Seq(lambdaVariable, attribute("b", 2))),
+            Seq(lambdaVariable)))))
+    testNormalization(UnresolvedExtractValue(attribute("b", 2), attribute("a", 8)))
+    testNormalization(UpdateFields(attribute("struct", 4), "a", Option(attribute("a", 11))))
+    testNormalization(CaseWhenOtherwise(
+      Seq(
+        attribute("c1", 5) -> attribute("v1", 2),
+        attribute("c2", 3) -> attribute("v2", 4)),
+      Option(attribute("v2", 5))))
+    testNormalization(Window(
+      UnresolvedFunction("sum", Seq(attribute("a")), isInternal = true, isDistinct = true),
+      WindowSpec(
+        Seq(attribute("b", 2)),
+        Seq(SortOrder(attribute("c", 3), SortOrder.Descending, SortOrder.NullsFirst)),
+        // Not a supported frame, just here for testing.
+        Option(WindowFrame(
+          WindowFrame.Range,
+          WindowFrame.Value(attribute("d", 3)),
+          WindowFrame.Value(attribute("e", 4)))))))
+    testNormalization(InvokeInlineUserDefinedFunction(
+      simpleUdf,
+      Seq(attribute("a", 2))))
+  }
+
+  private def testNormalization(generate: => ColumnNode): Unit = {
+    val a = CurrentOrigin.withOrigin(origin())(generate)
+    val b = CurrentOrigin.withOrigin(origin())(generate)
+    val c = try {
+      createNormalized.set(true)
+      CurrentOrigin.withOrigin(ColumnNode.NO_ORIGIN)(generate)
+    } finally {
+      createNormalized.set(false)
+    }
+    assert(a != a.normalized)
+    assert(a.normalized eq a.normalized.normalized)
+    assert(a != b)
+    assert(a.normalized == b.normalized)
+    assert(a.normalized == c)
+  }
+
+  private val createNormalized: ThreadLocal[Boolean] = new ThreadLocal[Boolean] {
+    override def initialValue(): Boolean = false
+  }
+
+  private val idGenerator = new AtomicInteger()
+  private def nextId: Int = idGenerator.incrementAndGet()
+
+  private def origin(): Origin = Origin(line = Option(nextId))
+
+  private def planId(): Option[Long] = {
+    if (!createNormalized.get()) {
+      Some(nextId.toLong)
+    } else {
+      None
+    }
+  }
+
+  private def attribute(name: String, id: Long = 1): ColumnNode = {
+    val metadata = if (!createNormalized.get()) {
+      new MetadataBuilder()
+        .putLong(Dataset.DATASET_ID_KEY, nextId)
+        .build()
+    } else {
+      Metadata.empty
+    }
+    ExpressionColumnNode(AttributeReference(
+      name,
+      LongType,
+      metadata = metadata)(
+      exprId = ExprId(id)))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala
new file mode 100644
index 0000000000000..76fcdfc380950
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/ColumnNodeToExpressionConverterSuite.scala
@@ -0,0 +1,408 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.internal
+
+import org.apache.spark.{SparkException, SparkFunSuite}
+import org.apache.spark.sql.Dataset
+import org.apache.spark.sql.catalyst.{analysis, expressions, InternalRow}
+import org.apache.spark.sql.catalyst.encoders.{encoderFor, AgnosticEncoder}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders._
+import org.apache.spark.sql.catalyst.expressions.{Expression, ExprId}
+import org.apache.spark.sql.catalyst.parser.ParserInterface
+import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+import org.apache.spark.sql.catalyst.trees.{CurrentOrigin, Origin}
+import org.apache.spark.sql.execution.SparkSqlParser
+import org.apache.spark.sql.execution.aggregate
+import org.apache.spark.sql.expressions.{Aggregator, SparkUserDefinedFunction, UserDefinedAggregator}
+import org.apache.spark.sql.types._
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * Test suite for [[ColumnNode]] to [[Expression]] conversions.
+ */
+class ColumnNodeToExpressionConverterSuite extends SparkFunSuite {
+  private object Converter extends ColumnNodeToExpressionConverter {
+    override val conf: SQLConf = new SQLConf
+    override val parser: ParserInterface = new SparkSqlParser
+  }
+
+  private def testConversion(node: => ColumnNode, expected: Expression): Expression = {
+    val myOrigin = Origin()
+    CurrentOrigin.withOrigin(myOrigin) {
+      val expression = normalizeExpression(Converter(node))
+      assert(expression == normalizeExpression(expected))
+      assert(expression.origin eq myOrigin)
+      expression
+    }
+  }
+
+  private def normalizeExpression(e: Expression): Expression = e.transform {
+    case a: expressions.Alias =>
+      a.copy()(exprId = ExprId(0), a.qualifier, a.explicitMetadata, a.nonInheritableMetadataKeys)
+    case a: expressions.AttributeReference =>
+      a.withExprId(ExprId(0))
+    case d: analysis.UnresolvedDeserializer =>
+      d.copy(inputAttributes = d.inputAttributes.map(_.withExprId(ExprId(0))))
+    case a: expressions.aggregate.AggregateExpression =>
+      a.copy(resultId = ExprId(0))
+    case expressions.UnresolvedNamedLambdaVariable(Seq(name)) =>
+      expressions.UnresolvedNamedLambdaVariable(name.takeWhile(_ != '_') :: Nil)
+  }
+
+  test("literal") {
+    testConversion(Literal(1), expressions.Literal(1, IntegerType))
+    testConversion(
+      Literal("foo", Option(StringType)),
+      expressions.Literal.create("foo", StringType))
+    val dataType = new StructType()
+      .add("_1", DoubleType)
+      .add("_2", StringType)
+      .add("_3", DoubleType)
+      .add("_4", StringType)
+    testConversion(
+      Literal((12.0, "north", 60.0, "west"), Option(dataType)),
+      expressions.Literal(
+        InternalRow(12.0, UTF8String.fromString("north"), 60.0, UTF8String.fromString("west")),
+        dataType))
+  }
+
+  test("attribute") {
+    val expression1 = testConversion(UnresolvedAttribute("x"), analysis.UnresolvedAttribute("x"))
+    assert(expression1.getTagValue(LogicalPlan.PLAN_ID_TAG).isEmpty)
+    assert(expression1.getTagValue(LogicalPlan.IS_METADATA_COL).isEmpty)
+
+    val expression2 = testConversion(
+      UnresolvedAttribute("y", Option(44L), isMetadataColumn = true),
+      analysis.UnresolvedAttribute("y"))
+    assert(expression2.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(44L))
+    assert(expression2.getTagValue(LogicalPlan.IS_METADATA_COL).isDefined)
+  }
+
+  test("star") {
+    testConversion(UnresolvedStar(None), analysis.UnresolvedStar(None))
+    testConversion(
+      UnresolvedStar(Option("x.y.z.*")),
+      analysis.UnresolvedStar(Option(Seq("x", "y", "z"))))
+    testConversion(
+      UnresolvedStar(None, Option(10L)),
+      analysis.UnresolvedDataFrameStar(10L))
+  }
+
+  test("regex") {
+    testConversion(
+      UnresolvedRegex("`(_1)?+.+`"),
+      analysis.UnresolvedRegex("(_1)?+.+", None, caseSensitive = false))
+
+    val expression = testConversion(
+      UnresolvedRegex("a", planId = Option(11L)),
+      analysis.UnresolvedAttribute("a"))
+    assert(expression.getTagValue(LogicalPlan.PLAN_ID_TAG).contains(11L))
+    assert(expression.getTagValue(LogicalPlan.IS_METADATA_COL).isEmpty)
+  }
+
+  test("function") {
+    testConversion(
+      UnresolvedFunction("+", Seq(UnresolvedAttribute("a"), Literal(1))),
+      analysis.UnresolvedFunction(
+        Seq("+"),
+        Seq(analysis.UnresolvedAttribute("a"), expressions.Literal(1)),
+        isDistinct = false))
+    testConversion(
+      UnresolvedFunction(
+        "db1.myAgg",
+        Seq(UnresolvedAttribute("a")),
+        isDistinct = true,
+        isUserDefinedFunction = true),
+      analysis.UnresolvedFunction(
+        Seq("db1", "myAgg"),
+        Seq(analysis.UnresolvedAttribute("a")),
+        isDistinct = true))
+  }
+
+  test("alias") {
+    testConversion(
+      Alias(Literal("qwe"), "newA" :: Nil),
+      expressions.Alias(expressions.Literal("qwe"), "newA")(
+        nonInheritableMetadataKeys = Seq(Dataset.DATASET_ID_KEY, Dataset.COL_POS_KEY)))
+    val metadata = new MetadataBuilder().putLong("q", 10).build()
+    testConversion(
+      Alias(UnresolvedAttribute("a"), "b" :: Nil, Option(metadata)),
+      expressions.Alias(analysis.UnresolvedAttribute("a"), "b")(
+        explicitMetadata = Option(metadata)))
+    testConversion(
+      Alias(UnresolvedAttribute("complex"), "newA" :: "newB" :: Nil),
+      analysis.MultiAlias(analysis.UnresolvedAttribute("complex"), Seq("newA", "newB")))
+  }
+
+  private def testCast(
+      dataType: DataType,
+      colEvalMode: Cast.EvalMode,
+      catEvalMode: expressions.EvalMode.Value): Unit = {
+    testConversion(
+      Cast(UnresolvedAttribute("attr"), dataType, Option(colEvalMode)),
+      expressions.Cast(analysis.UnresolvedAttribute("attr"), dataType, evalMode = catEvalMode))
+  }
+
+  test("cast") {
+    testConversion(
+      Cast(UnresolvedAttribute("str"), DoubleType),
+      expressions.Cast(analysis.UnresolvedAttribute("str"), DoubleType))
+
+    testCast(LongType, Cast.Legacy, expressions.EvalMode.LEGACY)
+    testCast(BinaryType, Cast.Try, expressions.EvalMode.TRY)
+    testCast(ShortType, Cast.Ansi, expressions.EvalMode.ANSI)
+  }
+
+  private def testSortOrder(
+      colDirection: SortOrder.SortDirection,
+      colNullOrdering: SortOrder.NullOrdering,
+      catDirection: expressions.SortDirection,
+      catNullOrdering: expressions.NullOrdering): Unit = {
+    testConversion(
+      SortOrder(UnresolvedAttribute("unsorted"), colDirection, colNullOrdering),
+      new expressions.SortOrder(
+        analysis.UnresolvedAttribute("unsorted"),
+        catDirection,
+        catNullOrdering,
+        Nil))
+  }
+
+  test("sortOrder") {
+    testSortOrder(
+      SortOrder.Ascending,
+      SortOrder.NullsFirst,
+      expressions.Ascending,
+      expressions.NullsFirst)
+    testSortOrder(
+      SortOrder.Ascending,
+      SortOrder.NullsLast,
+      expressions.Ascending,
+      expressions.NullsLast)
+    testSortOrder(
+      SortOrder.Descending,
+      SortOrder.NullsFirst,
+      expressions.Descending,
+      expressions.NullsFirst)
+    testSortOrder(
+      SortOrder.Descending,
+      SortOrder.NullsLast,
+      expressions.Descending,
+      expressions.NullsLast)
+  }
+
+  private def testWindowFrame(
+      colFrameType: WindowFrame.FrameType,
+      colLower: WindowFrame.FrameBoundary,
+      colUpper: WindowFrame.FrameBoundary,
+      catFrameType: expressions.FrameType,
+      catLower: Expression,
+      catUpper: Expression): Unit = {
+    testConversion(
+      Window(
+        UnresolvedFunction("sum", Seq(UnresolvedAttribute("a"))),
+        WindowSpec(
+          Seq(UnresolvedAttribute("b"), UnresolvedAttribute("c")),
+          Seq(SortOrder(
+            UnresolvedAttribute("d"),
+            SortOrder.Descending,
+            SortOrder.NullsLast)),
+          Option(WindowFrame(colFrameType, colLower, colUpper)))),
+      expressions.WindowExpression(
+        analysis.UnresolvedFunction(
+          "sum",
+          Seq(analysis.UnresolvedAttribute("a")),
+          isDistinct = false),
+        expressions.WindowSpecDefinition(
+          Seq(analysis.UnresolvedAttribute("b"), analysis.UnresolvedAttribute("c")),
+          Seq(expressions.SortOrder(
+            analysis.UnresolvedAttribute("d"),
+            expressions.Descending,
+            expressions.NullsLast,
+            Nil)),
+          expressions.SpecifiedWindowFrame(catFrameType, catLower, catUpper))))
+  }
+
+  test("window") {
+    testConversion(
+      Window(
+        UnresolvedFunction("sum", Seq(UnresolvedAttribute("a"))),
+        WindowSpec(
+          Seq(UnresolvedAttribute("b"), UnresolvedAttribute("c")),
+          Nil,
+          None)),
+      expressions.WindowExpression(
+        analysis.UnresolvedFunction(
+          "sum",
+          Seq(analysis.UnresolvedAttribute("a")),
+          isDistinct = false),
+        expressions.WindowSpecDefinition(
+          Seq(analysis.UnresolvedAttribute("b"), analysis.UnresolvedAttribute("c")),
+          Nil,
+          expressions.UnspecifiedFrame)))
+    testWindowFrame(
+      WindowFrame.Row,
+      WindowFrame.Value(Literal(-10)),
+      WindowFrame.UnboundedFollowing,
+      expressions.RowFrame,
+      expressions.Literal(-10),
+      expressions.UnboundedFollowing)
+    testWindowFrame(
+      WindowFrame.Range,
+      WindowFrame.UnboundedPreceding,
+      WindowFrame.CurrentRow,
+      expressions.RangeFrame,
+      expressions.UnboundedPreceding,
+      expressions.CurrentRow)
+  }
+
+  test("lambda") {
+    val colX = UnresolvedNamedLambdaVariable("x")
+    val catX = expressions.UnresolvedNamedLambdaVariable(Seq("x"))
+    testConversion(
+      LambdaFunction(UnresolvedFunction("+", Seq(colX, UnresolvedAttribute("y"))), Seq(colX)),
+      expressions.LambdaFunction(
+        analysis.UnresolvedFunction(
+          "+",
+          Seq(catX, analysis.UnresolvedAttribute("y")),
+          isDistinct = false),
+        Seq(catX)))
+  }
+
+  test("sql") {
+    // Direct comparison because Origin is a bit messed up.
+    assert(Converter(SqlExpression("1 + 1")) == Converter.parser.parseExpression("1 + 1"))
+  }
+
+  test("caseWhen") {
+    testConversion(
+      CaseWhenOtherwise(
+        Seq(UnresolvedAttribute("c1") -> Literal("r1")),
+        Option(Literal("fallback"))),
+      expressions.CaseWhen(
+        Seq(analysis.UnresolvedAttribute("c1") -> expressions.Literal("r1")),
+        Option(expressions.Literal("fallback")))
+    )
+  }
+
+  test("extract field") {
+    testConversion(
+      UnresolvedExtractValue(UnresolvedAttribute("struct"), Literal("cl_a")),
+      analysis.UnresolvedExtractValue(
+        analysis.UnresolvedAttribute("struct"),
+        expressions.Literal("cl_a")))
+  }
+
+  test("update field") {
+    testConversion(
+      UpdateFields(UnresolvedAttribute("struct"), "col_b", Option(Literal("cl_a"))),
+      expressions.UpdateFields(
+        analysis.UnresolvedAttribute("struct"),
+        Seq(expressions.WithField("col_b", expressions.Literal("cl_a")))))
+
+    testConversion(
+      UpdateFields(UnresolvedAttribute("struct"), "col_c", None),
+      expressions.UpdateFields(
+        analysis.UnresolvedAttribute("struct"),
+        Seq(expressions.DropField("col_c"))))
+  }
+
+  private def toAny(a: AgnosticEncoder[_]): AgnosticEncoder[Any] =
+    a.asInstanceOf[AgnosticEncoder[Any]]
+
+  test("udf") {
+    val int2LongSum = new TypedSumLong[Int]((i: Int) => i.toLong)
+    val bufferEncoder = encoderFor(int2LongSum.bufferEncoder)
+    val outputEncoder = encoderFor(int2LongSum.outputEncoder)
+    val bufferAttrs = bufferEncoder.namedExpressions.map {
+      _.toAttribute.asInstanceOf[expressions.AttributeReference]
+    }
+
+    // Aggregator applied on the entire Dataset.
+    testConversion(
+      InvokeInlineUserDefinedFunction(int2LongSum, Nil),
+      aggregate.SimpleTypedAggregateExpression(
+        aggregator = int2LongSum.asInstanceOf[Aggregator[Any, Any, Any]],
+        inputDeserializer = None,
+        inputClass = None,
+        inputSchema = None,
+        bufferSerializer = bufferEncoder.namedExpressions,
+        aggBufferAttributes = bufferAttrs,
+        bufferDeserializer = analysis.UnresolvedDeserializer(
+          bufferEncoder.deserializer,
+          bufferAttrs),
+        outputSerializer = outputEncoder.serializer,
+        outputExternalType = LongType,
+        dataType = LongType,
+        nullable = false
+      ).toAggregateExpression())
+
+    // Aggregator applied on an input.
+    testConversion(
+      InvokeInlineUserDefinedFunction(
+        UserDefinedAggregator(
+          aggregator = int2LongSum,
+          inputEncoder = PrimitiveIntEncoder,
+          nullable = false,
+          givenName = Option("int2LongSum")),
+        UnresolvedAttribute("i_col") :: Nil),
+      aggregate.ScalaAggregator(
+        children = analysis.UnresolvedAttribute("i_col") :: Nil,
+        agg = int2LongSum,
+        inputEncoder = encoderFor(PrimitiveIntEncoder),
+        bufferEncoder = encoderFor(PrimitiveLongEncoder),
+        nullable = false,
+        aggregatorName = Option("int2LongSum")).toAggregateExpression())
+
+    // Regular function
+    val concat = (a: String, b: String) => a + b
+    testConversion(
+      InvokeInlineUserDefinedFunction(
+        SparkUserDefinedFunction(
+          f = concat,
+          inputEncoders = None :: Option(toAny(StringEncoder)) :: Nil,
+          outputEncoder = Option(toAny(StringEncoder)),
+          dataType = StringType,
+          nullable = false,
+          deterministic = false),
+        Seq(UnresolvedAttribute("a"), UnresolvedAttribute("b"))),
+      expressions.ScalaUDF(
+        function = concat,
+        dataType = StringType,
+        children = Seq(analysis.UnresolvedAttribute("a"), analysis.UnresolvedAttribute("b")),
+        inputEncoders = Seq(None, Option(encoderFor(StringEncoder))),
+        outputEncoder = Option(encoderFor(StringEncoder)),
+        udfName = None,
+        nullable = false,
+        udfDeterministic = false))
+  }
+
+  test("extension") {
+    testConversion(
+      ExpressionColumnNode(analysis.UnresolvedAttribute("bar")),
+      analysis.UnresolvedAttribute("bar"))
+  }
+
+  test("unsupported") {
+    intercept[SparkException](Converter(Nope()))
+  }
+}
+
+private[internal] case class Nope(override val origin: Origin = CurrentOrigin.get)
+  extends ColumnNode {
+  override private[internal] def normalize(): Nope = this
+  override def sql: String = "nope"
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
index 404ec865c1b00..2b58440baf852 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SQLConfSuite.scala
@@ -47,7 +47,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     // Set a conf first.
     spark.conf.set(testKey, testVal)
     // Clear the conf.
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
     // After clear, only overrideConfs used by unit test should be in the SQLConf.
     assert(spark.conf.getAll === TestSQLContext.overrideConfs)
 
@@ -62,11 +62,11 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     assert(spark.conf.get(testKey, testVal + "_") === testVal)
     assert(spark.conf.getAll.contains(testKey))
 
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
   }
 
   test("parse SQL set commands") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
     sql(s"set $testKey=$testVal")
     assert(spark.conf.get(testKey, testVal + "_") === testVal)
     assert(spark.conf.get(testKey, testVal + "_") === testVal)
@@ -84,11 +84,11 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     sql(s"set $key=")
     assert(spark.conf.get(key, "0") === "")
 
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
   }
 
   test("set command for display") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
     checkAnswer(
       sql("SET").where("key = 'spark.sql.groupByOrdinal'").select("key", "value"),
       Nil)
@@ -109,11 +109,11 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("deprecated property") {
-    spark.sessionState.conf.clear()
-    val original = spark.conf.get(SQLConf.SHUFFLE_PARTITIONS)
+    sqlConf.clear()
+    val original = sqlConf.getConf(SQLConf.SHUFFLE_PARTITIONS)
     try {
       sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10")
-      assert(spark.conf.get(SQLConf.SHUFFLE_PARTITIONS) === 10)
+      assert(sqlConf.getConf(SQLConf.SHUFFLE_PARTITIONS) === 10)
     } finally {
       sql(s"set ${SQLConf.SHUFFLE_PARTITIONS.key}=$original")
     }
@@ -146,18 +146,18 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("reset - public conf") {
-    spark.sessionState.conf.clear()
-    val original = spark.conf.get(SQLConf.GROUP_BY_ORDINAL)
+    sqlConf.clear()
+    val original = sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL)
     try {
-      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL))
+      assert(sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL))
       sql(s"set ${SQLConf.GROUP_BY_ORDINAL.key}=false")
-      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL) === false)
+      assert(sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL) === false)
       assert(sql(s"set").where(s"key = '${SQLConf.GROUP_BY_ORDINAL.key}'").count() == 1)
-      assert(spark.conf.get(SQLConf.OPTIMIZER_EXCLUDED_RULES).isEmpty)
+      assert(sqlConf.getConf(SQLConf.OPTIMIZER_EXCLUDED_RULES).isEmpty)
       sql(s"reset")
-      assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL))
+      assert(sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL))
       assert(sql(s"set").where(s"key = '${SQLConf.GROUP_BY_ORDINAL.key}'").count() == 0)
-      assert(spark.conf.get(SQLConf.OPTIMIZER_EXCLUDED_RULES) ===
+      assert(sqlConf.getConf(SQLConf.OPTIMIZER_EXCLUDED_RULES) ===
         Some("org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation"))
     } finally {
       sql(s"set ${SQLConf.GROUP_BY_ORDINAL.key}=$original")
@@ -165,15 +165,15 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("reset - internal conf") {
-    spark.sessionState.conf.clear()
-    val original = spark.conf.get(SQLConf.OPTIMIZER_MAX_ITERATIONS)
+    sqlConf.clear()
+    val original = sqlConf.getConf(SQLConf.OPTIMIZER_MAX_ITERATIONS)
     try {
-      assert(spark.conf.get(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 100)
+      assert(sqlConf.getConf(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 100)
       sql(s"set ${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}=10")
-      assert(spark.conf.get(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 10)
+      assert(sqlConf.getConf(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 10)
       assert(sql(s"set").where(s"key = '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}'").count() == 1)
       sql(s"reset")
-      assert(spark.conf.get(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 100)
+      assert(sqlConf.getConf(SQLConf.OPTIMIZER_MAX_ITERATIONS) === 100)
       assert(sql(s"set").where(s"key = '${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}'").count() == 0)
     } finally {
       sql(s"set ${SQLConf.OPTIMIZER_MAX_ITERATIONS.key}=$original")
@@ -181,7 +181,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("reset - user-defined conf") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
     val userDefinedConf = "x.y.z.reset"
     try {
       assert(spark.conf.getOption(userDefinedConf).isEmpty)
@@ -196,7 +196,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-32406: reset - single configuration") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
     // spark core conf w/o entry registered
     val appId = spark.sparkContext.getConf.getAppId
     sql("RESET spark.app.id")
@@ -204,7 +204,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     // spark core conf w/ entry registered
     checkError(
       exception = intercept[AnalysisException](sql("RESET spark.executor.cores")),
-      errorClass = "CANNOT_MODIFY_CONFIG",
+      condition = "CANNOT_MODIFY_CONFIG",
       parameters = Map("key" -> "\"spark.executor.cores\"", "docroot" -> SPARK_DOC_ROOT)
     )
 
@@ -216,25 +216,25 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     sql("RESET spark.abc") // ignore nonexistent keys
 
     // runtime sql configs
-    val original = spark.conf.get(SQLConf.GROUP_BY_ORDINAL)
+    val original = sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL)
     sql(s"SET ${SQLConf.GROUP_BY_ORDINAL.key}=false")
     sql(s"RESET ${SQLConf.GROUP_BY_ORDINAL.key}")
-    assert(spark.conf.get(SQLConf.GROUP_BY_ORDINAL) === original)
+    assert(sqlConf.getConf(SQLConf.GROUP_BY_ORDINAL) === original)
 
     // runtime sql configs with optional defaults
-    assert(spark.conf.get(SQLConf.OPTIMIZER_EXCLUDED_RULES).isEmpty)
+    assert(sqlConf.getConf(SQLConf.OPTIMIZER_EXCLUDED_RULES).isEmpty)
     sql(s"RESET ${SQLConf.OPTIMIZER_EXCLUDED_RULES.key}")
-    assert(spark.conf.get(SQLConf.OPTIMIZER_EXCLUDED_RULES) ===
+    assert(sqlConf.getConf(SQLConf.OPTIMIZER_EXCLUDED_RULES) ===
       Some("org.apache.spark.sql.catalyst.optimizer.ConvertToLocalRelation"))
     sql(s"SET ${SQLConf.PLAN_CHANGE_LOG_RULES.key}=abc")
     sql(s"RESET ${SQLConf.PLAN_CHANGE_LOG_RULES.key}")
-    assert(spark.conf.get(SQLConf.PLAN_CHANGE_LOG_RULES).isEmpty)
+    assert(sqlConf.getConf(SQLConf.PLAN_CHANGE_LOG_RULES).isEmpty)
 
     // static sql configs
     checkError(
       exception = intercept[AnalysisException](sql(s"RESET ${StaticSQLConf.WAREHOUSE_PATH.key}")),
-      errorClass = "_LEGACY_ERROR_TEMP_1325",
-      parameters = Map("key" -> "spark.sql.warehouse.dir"))
+      condition = "CANNOT_MODIFY_CONFIG",
+      parameters = Map("key" -> "\"spark.sql.warehouse.dir\"", "docroot" -> SPARK_DOC_ROOT))
 
   }
 
@@ -247,19 +247,19 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("Test ADVISORY_PARTITION_SIZE_IN_BYTES's method") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
 
     spark.conf.set(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key, "100")
-    assert(spark.conf.get(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 100)
+    assert(sqlConf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 100)
 
     spark.conf.set(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key, "1k")
-    assert(spark.conf.get(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1024)
+    assert(sqlConf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1024)
 
     spark.conf.set(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key, "1M")
-    assert(spark.conf.get(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1048576)
+    assert(sqlConf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1048576)
 
     spark.conf.set(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key, "1g")
-    assert(spark.conf.get(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1073741824)
+    assert(sqlConf.getConf(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES) === 1073741824)
 
     // test negative value
     intercept[IllegalArgumentException] {
@@ -277,7 +277,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       spark.conf.set(SQLConf.ADVISORY_PARTITION_SIZE_IN_BYTES.key, "-90000000000g")
     }
 
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
   }
 
   test("SparkSession can access configs set in SparkConf") {
@@ -305,7 +305,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     try {
       sparkContext.conf.set(GLOBAL_TEMP_DATABASE, "a")
       val newSession = new SparkSession(sparkContext)
-      assert(newSession.conf.get(GLOBAL_TEMP_DATABASE) == "a")
+      assert(newSession.sessionState.conf.getConf(GLOBAL_TEMP_DATABASE) == "a")
       checkAnswer(
         newSession.sql(s"SET ${GLOBAL_TEMP_DATABASE.key}"),
         Row(GLOBAL_TEMP_DATABASE.key, "a"))
@@ -315,10 +315,16 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("cannot set/unset static SQL conf") {
-    val e1 = intercept[AnalysisException](sql(s"SET ${GLOBAL_TEMP_DATABASE.key}=10"))
-    assert(e1.message.contains("Cannot modify the value of a static config"))
-    val e2 = intercept[AnalysisException](spark.conf.unset(GLOBAL_TEMP_DATABASE.key))
-    assert(e2.message.contains("Cannot modify the value of a static config"))
+    checkError(
+      exception = intercept[AnalysisException](sql(s"SET ${GLOBAL_TEMP_DATABASE.key}=10")),
+      condition = "CANNOT_MODIFY_CONFIG",
+      parameters = Map("key" -> "\"spark.sql.globalTempDatabase\"", "docroot" -> SPARK_DOC_ROOT)
+    )
+    checkError(
+      exception = intercept[AnalysisException](spark.conf.unset(GLOBAL_TEMP_DATABASE.key)),
+      condition = "CANNOT_MODIFY_CONFIG",
+      parameters = Map("key" -> "\"spark.sql.globalTempDatabase\"", "docroot" -> SPARK_DOC_ROOT)
+    )
   }
 
   test("SPARK-36643: Show migration guide when attempting SparkConf") {
@@ -338,16 +344,16 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   }
 
   test("SPARK-10365: PARQUET_OUTPUT_TIMESTAMP_TYPE") {
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
 
     // check default value
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.INT96)
 
-    spark.conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "timestamp_micros")
+    sqlConf.setConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "timestamp_micros")
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.TIMESTAMP_MICROS)
-    spark.conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "int96")
+    sqlConf.setConf(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE, "int96")
     assert(spark.sessionState.conf.parquetOutputTimestampType ==
       SQLConf.ParquetOutputTimestampType.INT96)
 
@@ -356,7 +362,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       spark.conf.set(SQLConf.PARQUET_OUTPUT_TIMESTAMP_TYPE.key, "invalid")
     }
 
-    spark.sessionState.conf.clear()
+    sqlConf.clear()
   }
 
   test("SPARK-22779: correctly compute default value for fallback configs") {
@@ -373,10 +379,10 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       .get
     assert(displayValue === fallback.defaultValueString)
 
-    spark.conf.set(SQLConf.PARQUET_COMPRESSION, GZIP.lowerCaseName())
+    sqlConf.setConf(SQLConf.PARQUET_COMPRESSION, GZIP.lowerCaseName())
     assert(spark.conf.get(fallback.key) === GZIP.lowerCaseName())
 
-    spark.conf.set(fallback, LZO.lowerCaseName())
+    sqlConf.setConf(fallback, LZO.lowerCaseName())
     assert(spark.conf.get(fallback.key) === LZO.lowerCaseName())
 
     val newDisplayValue = spark.sessionState.conf.getAllDefinedConfs
@@ -450,7 +456,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkIllegalArgumentException] {
         spark.conf.set(SQLConf.SESSION_LOCAL_TIMEZONE.key, invalidTz)
       },
-      errorClass = "INVALID_CONF_VALUE.TIME_ZONE",
+      condition = "INVALID_CONF_VALUE.TIME_ZONE",
       parameters = Map(
         "confValue" -> invalidTz,
         "confName" -> SQLConf.SESSION_LOCAL_TIMEZONE.key))
@@ -459,24 +465,24 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   test("set time zone") {
     TimeZone.getAvailableIDs().foreach { zid =>
       sql(s"set time zone '$zid'")
-      assert(spark.conf.get(SQLConf.SESSION_LOCAL_TIMEZONE) === zid)
+      assert(sqlConf.getConf(SQLConf.SESSION_LOCAL_TIMEZONE) === zid)
     }
     sql("set time zone local")
-    assert(spark.conf.get(SQLConf.SESSION_LOCAL_TIMEZONE) === TimeZone.getDefault.getID)
+    assert(sqlConf.getConf(SQLConf.SESSION_LOCAL_TIMEZONE) === TimeZone.getDefault.getID)
 
     val tz = "Invalid TZ"
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         sql(s"SET TIME ZONE '$tz'").collect()
       },
-      errorClass = "INVALID_CONF_VALUE.TIME_ZONE",
+      condition = "INVALID_CONF_VALUE.TIME_ZONE",
       parameters = Map(
         "confValue" -> tz,
         "confName" -> SQLConf.SESSION_LOCAL_TIMEZONE.key))
 
     (-18 to 18).map(v => (v, s"interval '$v' hours")).foreach { case (i, interval) =>
       sql(s"set time zone $interval")
-      val zone = spark.conf.get(SQLConf.SESSION_LOCAL_TIMEZONE)
+      val zone = sqlConf.getConf(SQLConf.SESSION_LOCAL_TIMEZONE)
       if (i == 0) {
         assert(zone === "Z")
       } else {
@@ -486,8 +492,8 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
     val sqlText = "set time zone interval 19 hours"
     checkError(
       exception = intercept[ParseException](sql(sqlText)),
-      errorClass = "_LEGACY_ERROR_TEMP_0044",
-      parameters = Map.empty,
+      condition = "INVALID_INTERVAL_FORMAT.TIMEZONE_INTERVAL_OUT_OF_RANGE",
+      parameters = Map("input" -> "19"),
       context = ExpectedContext(sqlText, 0, 30))
   }
 
@@ -504,25 +510,24 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   test("SPARK-47765: set collation") {
     Seq("UNICODE", "UNICODE_CI", "utf8_lcase", "utf8_binary").foreach { collation =>
       sql(s"set collation $collation")
-      assert(spark.conf.get(SQLConf.DEFAULT_COLLATION) === collation.toUpperCase(Locale.ROOT))
+      assert(sqlConf.getConf(SQLConf.DEFAULT_COLLATION) === collation.toUpperCase(Locale.ROOT))
     }
 
     checkError(
       exception = intercept[SparkIllegalArgumentException] {
         sql(s"SET COLLATION unicode_c").collect()
       },
-      errorClass = "INVALID_CONF_VALUE.DEFAULT_COLLATION",
+      condition = "INVALID_CONF_VALUE.DEFAULT_COLLATION",
       parameters = Map(
         "confValue" -> "UNICODE_C",
         "confName" -> "spark.sql.session.collation.default",
         "proposals" -> "UNICODE"
       ))
 
-    withSQLConf(SQLConf.COLLATION_ENABLED.key -> "false") {
+    withSQLConf(SQLConf.TRIM_COLLATION_ENABLED.key -> "false") {
       checkError(
-        exception = intercept[AnalysisException](sql(s"SET COLLATION UNICODE_CI")),
-        errorClass = "UNSUPPORTED_FEATURE.COLLATION",
-        parameters = Map.empty
+        exception = intercept[AnalysisException](sql(s"SET COLLATION UNICODE_CI_RTRIM")),
+        condition = "UNSUPPORTED_FEATURE.TRIM_COLLATION"
       )
     }
   }
@@ -530,7 +535,7 @@ class SQLConfSuite extends QueryTest with SharedSparkSession {
   test("SPARK-43028: config not found error") {
     checkError(
       exception = intercept[SparkNoSuchElementException](spark.conf.get("some.conf")),
-      errorClass = "SQL_CONF_NOT_FOUND",
+      condition = "SQL_CONF_NOT_FOUND",
       parameters = Map("sqlConf" -> "\"some.conf\""))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
index d3154d0125af8..b323c6366f520 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/internal/SharedStateSuite.scala
@@ -61,7 +61,7 @@ class SharedStateSuite extends SharedSparkSession {
       exception = intercept[SparkException] {
         spark.sharedState.externalCatalog
       },
-      errorClass = "DEFAULT_DATABASE_NOT_EXISTS",
+      condition = "DEFAULT_DATABASE_NOT_EXISTS",
       parameters = Map("defaultDatabase" -> "default_database_not_exists")
     )
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
index e4116b565818e..d4481dd3716af 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCSuite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.catalyst.plans.logical.ShowCreateTable
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, CharVarcharUtils, DateTimeTestUtils}
 import org.apache.spark.sql.execution.{DataSourceScanExec, ExtendedMode, ProjectExec}
 import org.apache.spark.sql.execution.command.{ExplainCommand, ShowCreateTableCommand}
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.{LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JDBCPartition, JDBCRelation, JdbcUtils}
 import org.apache.spark.sql.execution.metric.InputOutputMetricsHelper
 import org.apache.spark.sql.functions.{lit, percentile_approx}
@@ -308,7 +308,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
   // Check whether the tables are fetched in the expected degree of parallelism
   def checkNumPartitions(df: DataFrame, expectedNumPartitions: Int): Unit = {
     val jdbcRelations = df.queryExecution.analyzed.collect {
-      case LogicalRelation(r: JDBCRelation, _, _, _) => r
+      case LogicalRelationWithTable(r: JDBCRelation, _) => r
     }
     assert(jdbcRelations.length == 1)
     assert(jdbcRelations.head.parts.length == expectedNumPartitions,
@@ -839,8 +839,8 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       assert(doCompileFilter(IsNull(col1)) === """"col1" IS NULL""")
       assert(doCompileFilter(IsNotNull(col1)) === """"col1" IS NOT NULL""")
       assert(doCompileFilter(And(EqualNullSafe(col0, "abc"), EqualTo(col1, "def")))
-        === """(("col0" = 'abc') OR ("col0" IS NULL AND 'abc' IS NULL))"""
-        + """ AND ("col1" = 'def')""")
+        === """((("col0" IS NOT NULL AND 'abc' IS NOT NULL AND "col0" = 'abc') """ +
+          """OR ("col0" IS NULL AND 'abc' IS NULL))) AND ("col1" = 'def')""")
     }
     assert(doCompileFilter(EqualTo("col0.nested", 3)).isEmpty)
   }
@@ -1520,7 +1520,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       exception = intercept[SparkSQLException] {
         spark.read.jdbc(urlWithUserAndPass, "TEST.ARRAY_TABLE", new Properties()).collect()
       },
-      errorClass = "UNRECOGNIZED_SQL_TYPE",
+      condition = "UNRECOGNIZED_SQL_TYPE",
       parameters = Map("typeName" -> "INTEGER ARRAY", "jdbcType" -> "ARRAY"))
   }
 
@@ -1667,7 +1667,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
 
       val quotedPrtColName = testH2Dialect.quoteIdentifier(expectedColumnName)
       df.logicalPlan match {
-        case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+        case LogicalRelationWithTable(JDBCRelation(_, parts, _), _) =>
           val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
           assert(whereClauses === Set(
             s"$quotedPrtColName < 2 or $quotedPrtColName is null",
@@ -1809,7 +1809,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       .load()
 
     df1.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+      case LogicalRelationWithTable(JDBCRelation(_, parts, _), _) =>
         val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
         assert(whereClauses === Set(
           """"D" < '2018-07-11' or "D" is null""",
@@ -1829,7 +1829,7 @@ class JDBCSuite extends QueryTest with SharedSparkSession {
       .load()
 
     df2.logicalPlan match {
-      case LogicalRelation(JDBCRelation(_, parts, _), _, _, _) =>
+      case LogicalRelationWithTable(JDBCRelation(_, parts, _), _) =>
         val whereClauses = parts.map(_.asInstanceOf[JDBCPartition].whereClause).toSet
         assert(whereClauses === Set(
           """"T" < '2018-07-15 20:50:32.5' or "T" is null""",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
index db06aac7f5e0a..8d3379805e013 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala
@@ -37,7 +37,7 @@ import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.execution.FormattedMode
 import org.apache.spark.sql.execution.datasources.v2.{DataSourceV2ScanRelation, V1ScanWrapper}
 import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog
-import org.apache.spark.sql.functions.{abs, acos, asin, avg, ceil, coalesce, count, count_distinct, degrees, exp, floor, lit, log => logarithm, log10, not, pow, radians, round, signum, sqrt, sum, udf, when}
+import org.apache.spark.sql.functions.{abs, acos, asin, atan, atan2, avg, ceil, coalesce, cos, cosh, cot, count, count_distinct, degrees, exp, floor, lit, log => logarithm, log10, not, pow, radians, round, signum, sin, sinh, sqrt, sum, tan, tanh, udf, when}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{DataType, IntegerType, StringType}
@@ -228,6 +228,28 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       stmt.setString(1, "jen")
       stmt.setBytes(2, testBytes)
       stmt.executeUpdate()
+
+      conn.prepareStatement("CREATE TABLE \"test\".\"employee_bonus\" " +
+        "(name TEXT(32), salary NUMERIC(20, 2), bonus DOUBLE, factor DOUBLE)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"employee_bonus\" " +
+        "VALUES ('amy', 10000, 1000, 0.1)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"employee_bonus\" " +
+        "VALUES ('alex', 12000, 1200, 0.1)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"employee_bonus\" " +
+        "VALUES ('cathy', 8000, 1200, 0.15)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"employee_bonus\" " +
+        "VALUES ('david', 10000, 1300, 0.13)").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"employee_bonus\" " +
+        "VALUES ('jen', 12000, 2400, 0.2)").executeUpdate()
+
+      conn.prepareStatement(
+        "CREATE TABLE \"test\".\"strings_with_nulls\" (str TEXT(32))").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"strings_with_nulls\" VALUES " +
+        "('abc')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"strings_with_nulls\" VALUES " +
+        "('a a a')").executeUpdate()
+      conn.prepareStatement("INSERT INTO \"test\".\"strings_with_nulls\" VALUES " +
+        "(null)").executeUpdate()
     }
     h2Dialect.registerFunction("my_avg", IntegralAverage)
     h2Dialect.registerFunction("my_strlen", StrLen(CharLength))
@@ -377,7 +399,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       exception = intercept[AnalysisException] {
         df.collect()
       },
-      errorClass = "NULL_DATA_SOURCE_OPTION",
+      condition = "NULL_DATA_SOURCE_OPTION",
       parameters = Map(
         "option" -> "pushDownOffset")
     )
@@ -1258,29 +1280,25 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(df15, Seq(Row(1, "cathy", 9000, 1200, false),
       Row(2, "alex", 12000, 1200, false), Row(6, "jen", 12000, 1200, true)))
 
-    val df16 = sql(
-      """
-        |SELECT * FROM h2.test.employee
-        |WHERE sin(bonus) < -0.08
-        |AND sinh(bonus) > 200
-        |AND cos(bonus) > 0.9
-        |AND cosh(bonus) > 200
-        |AND tan(bonus) < -0.08
-        |AND tanh(bonus) = 1
-        |AND cot(bonus) < -11
-        |AND asin(bonus / salary) > 0.13
-        |AND acos(bonus / salary) < 1.47
-        |AND atan(bonus) > 1.4
-        |AND atan2(bonus, bonus) > 0.7
-        |""".stripMargin)
+    val df16 = spark.table("h2.test.employee_bonus")
+      .filter(sin($"bonus") < -0.08)
+      .filter(sinh($"bonus") > 200)
+      .filter(cos($"bonus") > 0.9)
+      .filter(cosh($"bonus") > 200)
+      .filter(tan($"bonus") < -0.08)
+      .filter(tanh($"bonus") === 1)
+      .filter(cot($"bonus") < -11)
+      .filter(asin($"factor") > 0.13)
+      .filter(acos($"factor") < 1.47)
+      .filter(atan($"bonus") > 1.4)
+      .filter(atan2($"bonus", $"bonus") > 0.7)
     checkFiltersRemoved(df16)
     checkPushedInfo(df16, "PushedFilters: [" +
-      "BONUS IS NOT NULL, SALARY IS NOT NULL, SIN(BONUS) < -0.08, SINH(BONUS) > 200.0, " +
+      "BONUS IS NOT NULL, FACTOR IS NOT NULL, SIN(BONUS) < -0.08, SINH(BONUS) > 200.0, " +
       "COS(BONUS) > 0.9, COSH(BONUS) > 200.0, TAN(BONUS) < -0.08, TANH(BONUS) = 1.0, " +
-      "COT(BONUS) < -11.0, ASIN(BONUS / CAST(SALARY AS double)) > 0.13, " +
-      "ACOS(BONUS / CAST(SALARY AS double)) < 1.47, " +
-      "ATAN(BONUS) > 1.4, (ATAN2(BONUS, BONUS)) > 0.7],")
-    checkAnswer(df16, Seq(Row(1, "cathy", 9000, 1200, false)))
+      "COT(BONUS) < -11.0, ASIN(FACTOR) > 0.13, ACOS(FACTOR) < 1.47, ATAN(BONUS) > 1.4, " +
+      "(ATAN2(BONUS, BONUS)) > 0.7],")
+    checkAnswer(df16, Seq(Row("cathy", 8000, 1200, 0.15)))
 
     // H2 does not support log2, asinh, acosh, atanh, cbrt
     val df17 = sql(
@@ -1615,15 +1633,15 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     checkAnswer(df4, Seq(Row(6, "jen", 12000, 1200, true)))
 
     val df5 = sql("SELECT name FROM h2.test.employee WHERE " +
-      "aes_encrypt(cast(null as string), name) is null")
+      "aes_encrypt(name, '1234567812345678') != 'spark'")
     checkFiltersRemoved(df5, false)
-    val expectedPlanFragment5 = "PushedFilters: [], "
+    val expectedPlanFragment5 = "PushedFilters: [NAME IS NOT NULL], "
     checkPushedInfo(df5, expectedPlanFragment5)
     checkAnswer(df5, Seq(Row("amy"), Row("cathy"), Row("alex"), Row("david"), Row("jen")))
 
     val df6 = sql("SELECT name FROM h2.test.employee WHERE " +
       "aes_decrypt(cast(null as binary), name) is null")
-    checkFiltersRemoved(df6, false)
+    checkFiltersRemoved(df6) // removed by null intolerant opt
     val expectedPlanFragment6 = "PushedFilters: [], "
     checkPushedInfo(df6, expectedPlanFragment6)
     checkAnswer(df6, Seq(Row("amy"), Row("cathy"), Row("alex"), Row("david"), Row("jen")))
@@ -1759,7 +1777,9 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         Row("test", "empty_table", false), Row("test", "employee", false),
         Row("test", "item", false), Row("test", "dept", false),
         Row("test", "person", false), Row("test", "view1", false), Row("test", "view2", false),
-        Row("test", "datetime", false), Row("test", "binary1", false)))
+        Row("test", "datetime", false), Row("test", "binary1", false),
+        Row("test", "employee_bonus", false),
+        Row("test", "strings_with_nulls", false)))
   }
 
   test("SQL API: create table as select") {
@@ -2668,7 +2688,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         val df = sql("SELECT SUM(2147483647 + DEPT) FROM h2.test.employee")
         checkAggregateRemoved(df, ansiMode)
         val expectedPlanFragment = if (ansiMode) {
-          "PushedAggregates: [SUM(2147483647 + DEPT)], " +
+          "PushedAggregates: [SUM(DEPT + 2147483647)], " +
             "PushedFilters: [], " +
             "PushedGroupByExpressions: []"
         } else {
@@ -2923,7 +2943,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         exception = intercept[AnalysisException] {
           checkAnswer(sql("SELECT h2.test.my_avg2(id) FROM h2.test.people"), Seq.empty)
         },
-        errorClass = "UNRESOLVED_ROUTINE",
+        condition = "UNRESOLVED_ROUTINE",
         parameters = Map(
           "routineName" -> "`h2`.`test`.`my_avg2`",
           "searchPath" -> "[`system`.`builtin`, `system`.`session`, `h2`.`default`]"),
@@ -2935,7 +2955,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         exception = intercept[AnalysisException] {
           checkAnswer(sql("SELECT h2.my_avg2(id) FROM h2.test.people"), Seq.empty)
         },
-        errorClass = "UNRESOLVED_ROUTINE",
+        condition = "UNRESOLVED_ROUTINE",
         parameters = Map(
           "routineName" -> "`h2`.`my_avg2`",
           "searchPath" -> "[`system`.`builtin`, `system`.`session`, `h2`.`default`]"),
@@ -3018,7 +3038,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       exception = intercept[IndexAlreadyExistsException] {
         sql(s"CREATE INDEX people_index ON TABLE h2.test.people (id)")
       },
-      errorClass = "INDEX_ALREADY_EXISTS",
+      condition = "INDEX_ALREADY_EXISTS",
       parameters = Map(
         "indexName" -> "`people_index`",
         "tableName" -> "`test`.`people`"
@@ -3036,7 +3056,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
       exception = intercept[NoSuchIndexException] {
         sql(s"DROP INDEX people_index ON TABLE h2.test.people")
       },
-      errorClass = "INDEX_NOT_FOUND",
+      condition = "INDEX_NOT_FOUND",
       parameters = Map("indexName" -> "`people_index`", "tableName" -> "`test`.`people`")
     )
     assert(jdbcTable.indexExists("people_index") == false)
@@ -3053,7 +3073,7 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
         exception = intercept[AnalysisException] {
           sql("SELECT * FROM h2.test.people where h2.db_name.schema_name.function_name()")
         },
-        errorClass = "IDENTIFIER_TOO_MANY_NAME_PARTS",
+        condition = "IDENTIFIER_TOO_MANY_NAME_PARTS",
         sqlState = "42601",
         parameters = Map("identifier" -> "`db_name`.`schema_name`.`function_name`")
       )
@@ -3068,4 +3088,13 @@ class JDBCV2Suite extends QueryTest with SharedSparkSession with ExplainSuiteHel
     val explained = getNormalizedExplain(df, FormattedMode)
     assert(explained.contains("External engine query:"))
   }
+
+  test("Test not nullSafeEqual") {
+    val df = sql("SELECT str FROM h2.test.strings_with_nulls WHERE NOT str <=> 'abc'")
+    val rows = df.collect()
+
+    assert(rows.length == 2)
+    assert(rows.contains(Row(null)))
+    assert(rows.contains(Row("a a a")))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
index 76a092b552f98..535b8257ad94a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCWriteSuite.scala
@@ -191,11 +191,13 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
         exception = intercept[AnalysisException] {
           df2.write.mode(SaveMode.Append).jdbc(url, "TEST.APPENDTEST", new Properties())
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1156",
+        condition = "COLUMN_NOT_DEFINED_IN_TABLE",
         parameters = Map(
-          "colName" -> "NAME",
-          "tableSchema" ->
-            "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
+          "colType" -> "\"STRING\"",
+          "colName" -> "`NAME`",
+          "tableName" -> "`TEST`.`APPENDTEST`",
+          "tableCols" -> "`NAME`, `ID`")
+      )
     }
 
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
@@ -224,11 +226,13 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
           df3.write.mode(SaveMode.Overwrite).option("truncate", true)
             .jdbc(url1, "TEST.TRUNCATETEST", properties)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1156",
+        condition = "COLUMN_NOT_DEFINED_IN_TABLE",
         parameters = Map(
-          "colName" -> "seq",
-          "tableSchema" ->
-            "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
+          "colType" -> "\"INT\"",
+          "colName" -> "`seq`",
+          "tableName" -> "`TEST`.`TRUNCATETEST`",
+          "tableCols" -> "`name`, `id`, `seq`")
+      )
     } finally {
       JdbcDialects.unregisterDialect(testH2Dialect)
       JdbcDialects.registerDialect(H2Dialect())
@@ -256,11 +260,13 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
       exception = intercept[AnalysisException] {
         df2.write.mode(SaveMode.Append).jdbc(url, "TEST.INCOMPATIBLETEST", new Properties())
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1156",
+      condition = "COLUMN_NOT_DEFINED_IN_TABLE",
       parameters = Map(
-        "colName" -> "seq",
-        "tableSchema" ->
-          "Some(StructType(StructField(name,StringType,true),StructField(id,IntegerType,true)))"))
+        "colType" -> "\"INT\"",
+        "colName" -> "`seq`",
+        "tableName" -> "`TEST`.`INCOMPATIBLETEST`",
+        "tableCols" -> "`name`, `id`, `seq`")
+    )
   }
 
   test("INSERT to JDBC Datasource") {
@@ -507,7 +513,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
           .option("createTableColumnTypes", "name CLOB(2000)")
           .jdbc(url1, "TEST.USERDBTYPETEST", properties)
       },
-      errorClass = "UNSUPPORTED_DATATYPE",
+      condition = "UNSUPPORTED_DATATYPE",
       parameters = Map("typeName" -> "\"CLOB(2000)\""))
   }
 
@@ -519,7 +525,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
           .option("createTableColumnTypes", "`name char(20)") // incorrectly quoted column
           .jdbc(url1, "TEST.USERDBTYPETEST", properties)
       },
-      errorClass = "PARSE_SYNTAX_ERROR",
+      condition = "PARSE_SYNTAX_ERROR",
       parameters = Map("error" -> "'`'", "hint" -> ""))
   }
 
@@ -533,7 +539,7 @@ class JDBCWriteSuite extends SharedSparkSession with BeforeAndAfter {
       }
       checkError(
         exception = e,
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`name`"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
new file mode 100644
index 0000000000000..afcdfd343e33b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingE2eSuite.scala
@@ -0,0 +1,188 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{AnalysisException, QueryTest, Row}
+import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
+import org.apache.spark.sql.catalyst.util.QuotingUtils.toSQLConf
+import org.apache.spark.sql.exceptions.SqlScriptingException
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+
+/**
+ * End-to-end tests for SQL Scripting.
+ * This suite is not intended to heavily test the SQL scripting (parser & interpreter) logic.
+ * It is rather focused on testing the sql() API - whether it can handle SQL scripts correctly,
+ *  results are returned in expected manner, config flags are applied properly, etc.
+ * For full functionality tests, see SqlScriptingParserSuite and SqlScriptingInterpreterSuite.
+ */
+class SqlScriptingE2eSuite extends QueryTest with SharedSparkSession {
+  // Helpers
+  private def verifySqlScriptResult(sqlText: String, expected: Seq[Row]): Unit = {
+    val df = spark.sql(sqlText)
+    checkAnswer(df, expected)
+  }
+
+  private def verifySqlScriptResultWithNamedParams(
+      sqlText: String,
+      expected: Seq[Row],
+      args: Map[String, Any]): Unit = {
+    val df = spark.sql(sqlText, args)
+    checkAnswer(df, expected)
+  }
+
+  // Tests setup
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
+  }
+
+  // Tests
+  test("SQL Scripting not enabled") {
+    withSQLConf(SQLConf.SQL_SCRIPTING_ENABLED.key -> "false") {
+      val sqlScriptText =
+        """
+          |BEGIN
+          |  SELECT 1;
+          |END""".stripMargin
+      checkError(
+        exception = intercept[SqlScriptingException] {
+          spark.sql(sqlScriptText).asInstanceOf[CompoundBody]
+        },
+        condition = "UNSUPPORTED_FEATURE.SQL_SCRIPTING",
+        parameters = Map("sqlScriptingEnabled" -> toSQLConf(SQLConf.SQL_SCRIPTING_ENABLED.key)))
+    }
+  }
+
+  test("single select") {
+    val sqlText = "SELECT 1;"
+    verifySqlScriptResult(sqlText, Seq(Row(1)))
+  }
+
+  test("multiple selects") {
+    val sqlText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  SELECT 2;
+        |END""".stripMargin
+    verifySqlScriptResult(sqlText, Seq(Row(2)))
+  }
+
+  test("multi statement - simple") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  SELECT a FROM t;
+          |END
+          |""".stripMargin
+      verifySqlScriptResult(sqlScript, Seq(Row(1)))
+    }
+  }
+
+  test("script without result statement") {
+    val sqlScript =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 1;
+        |  DROP TEMPORARY VARIABLE x;
+        |END
+        |""".stripMargin
+    verifySqlScriptResult(sqlScript, Seq.empty)
+  }
+
+  test("empty script") {
+    val sqlScript =
+      """
+        |BEGIN
+        |END
+        |""".stripMargin
+    verifySqlScriptResult(sqlScript, Seq.empty)
+  }
+
+  test("named params") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  IF :param_1 > 10 THEN
+        |    SELECT :param_2;
+        |  ELSE
+        |    SELECT :param_3;
+        |  END IF;
+        |END""".stripMargin
+    // Define a map with SQL parameters
+    val args: Map[String, Any] = Map(
+      "param_1" -> 5,
+      "param_2" -> "greater",
+      "param_3" -> "smaller"
+    )
+    verifySqlScriptResultWithNamedParams(sqlScriptText, Seq(Row("smaller")), args)
+  }
+
+  test("positional params") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT 1;
+        |  IF ? > 10 THEN
+        |    SELECT ?;
+        |  ELSE
+        |    SELECT ?;
+        |  END IF;
+        |END""".stripMargin
+    // Define an array with SQL parameters in the correct order.
+    val args: Array[Any] = Array(5, "greater", "smaller")
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        spark.sql(sqlScriptText, args).asInstanceOf[CompoundBody]
+      },
+      condition = "UNSUPPORTED_FEATURE.SQL_SCRIPTING_WITH_POSITIONAL_PARAMETERS",
+      parameters = Map.empty)
+  }
+
+  test("named params with positional params - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  SELECT ?;
+        |  IF :param > 10 THEN
+        |    SELECT 1;
+        |  ELSE
+        |    SELECT 2;
+        |  END IF;
+        |END""".stripMargin
+    // Define a map with SQL parameters.
+    val args: Map[String, Any] = Map("param" -> 5)
+    checkError(
+      exception = intercept[AnalysisException] {
+        spark.sql(sqlScriptText, args).asInstanceOf[CompoundBody]
+      },
+      condition = "UNBOUND_SQL_PARAMETER",
+      parameters = Map("name" -> "_16"),
+      context = ExpectedContext(
+        fragment = "?",
+        start = 16,
+        stop = 16))
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
index 003a8061a604b..a997b5beadd34 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionNodeSuite.scala
@@ -18,70 +18,1053 @@
 package org.apache.spark.sql.scripting
 
 import org.apache.spark.SparkFunSuite
-import org.apache.spark.sql.catalyst.expressions.{Alias, Literal}
-import org.apache.spark.sql.catalyst.plans.logical.{OneRowRelation, Project}
+import org.apache.spark.sql.{DataFrame, Row, SparkSession}
+import org.apache.spark.sql.catalyst.expressions.{Alias, Attribute, Literal}
+import org.apache.spark.sql.catalyst.plans.logical.{DropVariable, LeafNode, OneRowRelation, Project}
 import org.apache.spark.sql.catalyst.trees.Origin
+import org.apache.spark.sql.test.SharedSparkSession
+import org.apache.spark.sql.types.{IntegerType, StructField, StructType}
 
 /**
  * Unit tests for execution nodes from SqlScriptingExecutionNode.scala.
  * Execution nodes are constructed manually and iterated through.
  * It is then checked if the leaf statements have been iterated in the expected order.
  */
-class SqlScriptingExecutionNodeSuite extends SparkFunSuite {
+class SqlScriptingExecutionNodeSuite extends SparkFunSuite with SharedSparkSession {
   // Helpers
   case class TestLeafStatement(testVal: String) extends LeafStatementExec {
     override def reset(): Unit = ()
   }
 
-  case class TestNestedStatementIterator(statements: Seq[CompoundStatementExec])
-    extends CompoundNestedStatementIteratorExec(statements)
+  case class TestIfElseCondition(condVal: Boolean, description: String)
+    extends SingleStatementExec(
+      parsedPlan = Project(Seq(Alias(Literal(condVal), description)()), OneRowRelation()),
+      Origin(startIndex = Some(0), stopIndex = Some(description.length)),
+      Map.empty,
+      isInternal = false)
 
-  case class TestBody(statements: Seq[CompoundStatementExec]) extends CompoundBodyExec(statements)
+  case class DummyLogicalPlan() extends LeafNode {
+    override def output: Seq[Attribute] = Seq.empty
+  }
 
-  case class TestSparkStatementWithPlan(testVal: String)
+  case class TestLoopCondition(
+      condVal: Boolean, reps: Int, description: String)
     extends SingleStatementExec(
-      parsedPlan = Project(Seq(Alias(Literal(testVal), "condition")()), OneRowRelation()),
-      Origin(startIndex = Some(0), stopIndex = Some(testVal.length)),
+      parsedPlan = DummyLogicalPlan(),
+      Origin(startIndex = Some(0), stopIndex = Some(description.length)),
+      Map.empty,
       isInternal = false)
 
-  // Tests
-  test("test body - single statement") {
-    val iter = TestNestedStatementIterator(Seq(TestLeafStatement("one"))).getTreeIterator
-    val statements = iter.map {
-      case TestLeafStatement(v) => v
+  class LoopBooleanConditionEvaluator(condition: TestLoopCondition) {
+    private var callCount: Int = 0
+
+    def evaluateLoopBooleanCondition(): Boolean = {
+      if (callCount < condition.reps) {
+        callCount += 1
+        condition.condVal
+      } else {
+        callCount = 0
+        !condition.condVal
+      }
+    }
+  }
+
+  case class TestWhile(
+      condition: TestLoopCondition,
+      body: CompoundBodyExec,
+      label: Option[String] = None)
+    extends WhileStatementExec(condition, body, label, spark) {
+
+    private val evaluator = new LoopBooleanConditionEvaluator(condition)
+
+    override def evaluateBooleanCondition(
+        session: SparkSession,
+        statement: LeafStatementExec): Boolean = evaluator.evaluateLoopBooleanCondition()
+  }
+
+  case class TestRepeat(
+      condition: TestLoopCondition,
+      body: CompoundBodyExec,
+      label: Option[String] = None)
+    extends RepeatStatementExec(condition, body, label, spark) {
+
+    private val evaluator = new LoopBooleanConditionEvaluator(condition)
+
+    override def evaluateBooleanCondition(
+      session: SparkSession,
+      statement: LeafStatementExec): Boolean = evaluator.evaluateLoopBooleanCondition()
+  }
+
+  case class MockQuery(numberOfRows: Int, columnName: String, description: String)
+      extends SingleStatementExec(
+        DummyLogicalPlan(),
+        Origin(startIndex = Some(0), stopIndex = Some(description.length)),
+        Map.empty,
+        isInternal = false) {
+    override def buildDataFrame(session: SparkSession): DataFrame = {
+      val data = Seq.range(0, numberOfRows).map(Row(_))
+      val schema = List(StructField(columnName, IntegerType))
+
+      spark.createDataFrame(
+        spark.sparkContext.parallelize(data),
+        StructType(schema)
+      )
+    }
+  }
+
+  private def extractStatementValue(statement: CompoundStatementExec): String =
+    statement match {
+      case TestLeafStatement(testVal) => testVal
+      case TestIfElseCondition(_, description) => description
+      case TestLoopCondition(_, _, description) => description
+      case loopStmt: LoopStatementExec => loopStmt.label.get
+      case leaveStmt: LeaveStatementExec => leaveStmt.label
+      case iterateStmt: IterateStatementExec => iterateStmt.label
+      case forStmt: ForStatementExec => forStmt.label.get
+      case dropStmt: SingleStatementExec if dropStmt.parsedPlan.isInstanceOf[DropVariable]
+        => "DropVariable"
       case _ => fail("Unexpected statement type")
-    }.toList
+    }
 
-    assert(statements === List("one"))
+  // Tests
+  test("test body - single statement") {
+    val iter = new CompoundBodyExec(Seq(TestLeafStatement("one"))).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("one"))
   }
 
   test("test body - no nesting") {
-    val iter = TestNestedStatementIterator(
+    val iter = new CompoundBodyExec(
       Seq(
         TestLeafStatement("one"),
         TestLeafStatement("two"),
         TestLeafStatement("three")))
       .getTreeIterator
-    val statements = iter.map {
-      case TestLeafStatement(v) => v
-      case _ => fail("Unexpected statement type")
-    }.toList
-
+    val statements = iter.map(extractStatementValue).toSeq
     assert(statements === Seq("one", "two", "three"))
   }
 
   test("test body - nesting") {
-    val iter = TestNestedStatementIterator(
+    val iter = new CompoundBodyExec(
       Seq(
-        TestNestedStatementIterator(Seq(TestLeafStatement("one"), TestLeafStatement("two"))),
+        new CompoundBodyExec(Seq(TestLeafStatement("one"), TestLeafStatement("two"))),
         TestLeafStatement("three"),
-        TestNestedStatementIterator(Seq(TestLeafStatement("four"), TestLeafStatement("five")))))
+        new CompoundBodyExec(Seq(TestLeafStatement("four"), TestLeafStatement("five")))))
       .getTreeIterator
-    val statements = iter.map {
-      case TestLeafStatement(v) => v
-      case _ => fail("Unexpected statement type")
-    }.toList
-
+    val statements = iter.map(extractStatementValue).toSeq
     assert(statements === Seq("one", "two", "three", "four", "five"))
   }
+
+  test("if else - enter body of the IF clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = true, description = "con1")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1"))
+  }
+
+  test("if else - enter body of the ELSE clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body2"))
+  }
+
+  test("if else if - enter body of the IF clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = true, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1"))
+  }
+
+  test("if else if - enter body of the ELSE IF clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = true, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body2"))
+  }
+
+  test("if else if - enter body of the second ELSE IF clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2"),
+          TestIfElseCondition(condVal = true, description = "con3")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body3")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body4")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "con3", "body3"))
+  }
+
+  test("if else if - enter body of the ELSE clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body3"))
+  }
+
+  test("if else if - without else (successful check)") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = true, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = None,
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body2"))
+  }
+
+  test("if else if - without else (unsuccessful checks)") {
+    val iter = new CompoundBodyExec(Seq(
+      new IfElseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = None,
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2"))
+  }
+
+  test("while - doesn't enter body") {
+    val iter = new CompoundBodyExec(Seq(
+      TestWhile(
+        condition = TestLoopCondition(condVal = true, reps = 0, description = "con1"),
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1"))
+  }
+
+  test("while - enters body once") {
+    val iter = new CompoundBodyExec(Seq(
+      TestWhile(
+        condition = TestLoopCondition(condVal = true, reps = 1, description = "con1"),
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1", "con1"))
+  }
+
+  test("while - enters body with multiple statements multiple times") {
+    val iter = new CompoundBodyExec(Seq(
+      TestWhile(
+        condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "statement1", "statement2",
+                              "con1", "statement1", "statement2", "con1"))
+  }
+
+  test("nested while - 2 times outer 2 times inner") {
+    val iter = new CompoundBodyExec(Seq(
+      TestWhile(
+        condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+        body = new CompoundBodyExec(Seq(
+          TestWhile(
+            condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
+            body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+          ))
+        )
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body1",
+                                      "con2", "body1", "con2",
+                              "con1", "con2", "body1",
+                                      "con2", "body1", "con2", "con1"))
+  }
+
+  test("repeat - true condition") {
+    val iter = new CompoundBodyExec(Seq(
+      TestRepeat(
+        condition = TestLoopCondition(condVal = false, reps = 0, description = "con1"),
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "con1"))
+  }
+
+  test("repeat - condition false once") {
+    val iter = new CompoundBodyExec(Seq(
+      TestRepeat(
+        condition = TestLoopCondition(condVal = false, reps = 1, description = "con1"),
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "con1", "body1", "con1"))
+  }
+
+  test("repeat - enters body with multiple statements multiple times") {
+    val iter = new CompoundBodyExec(Seq(
+      TestRepeat(
+        condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("statement1", "statement2", "con1", "statement1", "statement2",
+      "con1", "statement1", "statement2", "con1"))
+  }
+
+  test("nested repeat") {
+    val iter = new CompoundBodyExec(Seq(
+      TestRepeat(
+        condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+        body = new CompoundBodyExec(Seq(
+          TestRepeat(
+            condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
+            body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+          ))
+        )
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "con2", "body1",
+      "con2", "body1", "con2",
+      "con1", "body1", "con2",
+      "body1", "con2", "body1",
+      "con2", "con1", "body1",
+      "con2", "body1", "con2",
+      "body1", "con2", "con1"))
+  }
+
+  test("leave compound block") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestLeafStatement("one"),
+        new LeaveStatementExec("lbl")
+      ),
+      label = Some("lbl")
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("one", "lbl"))
+  }
+
+  test("leave while loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestWhile(
+          condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestLeafStatement("body1"),
+            new LeaveStatementExec("lbl"))
+          ),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1", "lbl"))
+  }
+
+  test("leave repeat loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestRepeat(
+          condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestLeafStatement("body1"),
+            new LeaveStatementExec("lbl"))
+          ),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl"))
+  }
+
+  test("iterate while loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestWhile(
+          condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestLeafStatement("body1"),
+            new IterateStatementExec("lbl"),
+            TestLeafStatement("body2"))
+          ),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1", "lbl", "con1", "body1", "lbl", "con1"))
+  }
+
+  test("iterate repeat loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestRepeat(
+          condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestLeafStatement("body1"),
+            new IterateStatementExec("lbl"),
+            TestLeafStatement("body2"))
+          ),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(
+      statements === Seq("body1", "lbl", "con1", "body1", "lbl", "con1", "body1", "lbl", "con1"))
+  }
+
+  test("leave outer loop from nested while loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestWhile(
+          condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestWhile(
+              condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
+              body = new CompoundBodyExec(Seq(
+                TestLeafStatement("body1"),
+                new LeaveStatementExec("lbl"))
+              ),
+              label = Some("lbl2")
+            )
+          )),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body1", "lbl"))
+  }
+
+  test("leave outer loop from nested repeat loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestRepeat(
+          condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestRepeat(
+              condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
+              body = new CompoundBodyExec(Seq(
+                TestLeafStatement("body1"),
+                new LeaveStatementExec("lbl"))
+              ),
+              label = Some("lbl2")
+            )
+          )),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl"))
+  }
+
+  test("iterate outer loop from nested while loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestWhile(
+          condition = TestLoopCondition(condVal = true, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestWhile(
+              condition = TestLoopCondition(condVal = true, reps = 2, description = "con2"),
+              body = new CompoundBodyExec(Seq(
+                TestLeafStatement("body1"),
+                new IterateStatementExec("lbl"),
+                TestLeafStatement("body2"))
+              ),
+              label = Some("lbl2")
+            )
+          )),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "con1", "con2", "body1", "lbl",
+      "con1", "con2", "body1", "lbl",
+      "con1"))
+  }
+
+  test("iterate outer loop from nested repeat loop") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        TestRepeat(
+          condition = TestLoopCondition(condVal = false, reps = 2, description = "con1"),
+          body = new CompoundBodyExec(Seq(
+            TestRepeat(
+              condition = TestLoopCondition(condVal = false, reps = 2, description = "con2"),
+              body = new CompoundBodyExec(Seq(
+                TestLeafStatement("body1"),
+                new IterateStatementExec("lbl"),
+                TestLeafStatement("body2"))
+              ),
+              label = Some("lbl2")
+            )
+          )),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body1", "lbl", "con1",
+      "body1", "lbl", "con1",
+      "body1", "lbl", "con1"))
+  }
+
+  test("searched case - enter first WHEN clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new CaseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = true, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body1"))
+  }
+
+  test("searched case - enter body of the ELSE clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new CaseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body2")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "body2"))
+  }
+
+  test("searched case - enter second WHEN clause") {
+    val iter = new CompoundBodyExec(Seq(
+      new CaseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = true, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = Some(new CompoundBodyExec(Seq(TestLeafStatement("body3")))),
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body2"))
+  }
+
+  test("searched case - without else (successful check)") {
+    val iter = new CompoundBodyExec(Seq(
+      new CaseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = true, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = None,
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2", "body2"))
+  }
+
+  test("searched case - without else (unsuccessful checks)") {
+    val iter = new CompoundBodyExec(Seq(
+      new CaseStatementExec(
+        conditions = Seq(
+          TestIfElseCondition(condVal = false, description = "con1"),
+          TestIfElseCondition(condVal = false, description = "con2")
+        ),
+        conditionalBodies = Seq(
+          new CompoundBodyExec(Seq(TestLeafStatement("body1"))),
+          new CompoundBodyExec(Seq(TestLeafStatement("body2")))
+        ),
+        elseBody = None,
+        session = spark
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("con1", "con2"))
+  }
+
+  test("loop statement with leave") {
+    val iter = new CompoundBodyExec(
+      statements = Seq(
+        new LoopStatementExec(
+          body = new CompoundBodyExec(Seq(
+            TestLeafStatement("body1"),
+            new LeaveStatementExec("lbl"))
+          ),
+          label = Some("lbl")
+        )
+      )
+    ).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl"))
+  }
+
+  test("for statement - enters body once") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(1, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - enters body with multiple statements multiple times") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(
+          Seq(TestLeafStatement("statement1"), TestLeafStatement("statement2"))
+        )
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1",
+      "statement2",
+      "statement1",
+      "statement2",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - empty result") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(0, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq.empty[String])
+  }
+
+  test("for statement - nested") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          new ForStatementExec(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = Some("y"),
+            label = Some("for2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(TestLeafStatement("body")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable", // drop for loop var y
+      "body",
+      "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable", // drop for loop var y
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement no variable - enters body once") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(1, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - enters body with multiple statements multiple times") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1", "statement2", "statement1", "statement2",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - empty result") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(0, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(TestLeafStatement("body1")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq.empty[String])
+  }
+
+  test("for statement no variable - nested") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("for1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          new ForStatementExec(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("for2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(TestLeafStatement("body")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "body", "body",
+      "DropVariable", // drop for query var intCol1
+      "body", "body",
+      "DropVariable", // drop for query var intCol1
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement - iterate") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          new IterateStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1",
+      "lbl1",
+      "statement1",
+      "lbl1",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - leave") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          new LeaveStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("statement1", "lbl1"))
+  }
+
+  test("for statement - nested - iterate outer loop") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("outer_body"),
+          new ForStatementExec(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = Some("y"),
+            label = Some("lbl2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(
+              TestLeafStatement("body1"),
+              new IterateStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "outer_body",
+      "body1",
+      "lbl1",
+      "outer_body",
+      "body1",
+      "lbl1",
+      "DropVariable", // drop for query var intCol
+      "DropVariable" // drop for loop var x
+    ))
+  }
+
+  test("for statement - nested - leave outer loop") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = Some("x"),
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          new ForStatementExec(
+            query = MockQuery(2, "intCol", "query2"),
+            variableName = Some("y"),
+            label = Some("lbl2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(
+              TestLeafStatement("body1"),
+              new LeaveStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl1"))
+  }
+
+  test("for statement no variable - iterate") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          new IterateStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "statement1", "lbl1", "statement1", "lbl1",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - leave") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("statement1"),
+          new LeaveStatementExec("lbl1"),
+          TestLeafStatement("statement2")))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("statement1", "lbl1"))
+  }
+
+  test("for statement no variable - nested - iterate outer loop") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          TestLeafStatement("outer_body"),
+          new ForStatementExec(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("lbl2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(
+              TestLeafStatement("body1"),
+              new IterateStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq(
+      "outer_body", "body1", "lbl1", "outer_body", "body1", "lbl1",
+      "DropVariable" // drop for query var intCol
+    ))
+  }
+
+  test("for statement no variable - nested - leave outer loop") {
+    val iter = new CompoundBodyExec(Seq(
+      new ForStatementExec(
+        query = MockQuery(2, "intCol", "query1"),
+        variableName = None,
+        label = Some("lbl1"),
+        session = spark,
+        body = new CompoundBodyExec(Seq(
+          new ForStatementExec(
+            query = MockQuery(2, "intCol1", "query2"),
+            variableName = None,
+            label = Some("lbl2"),
+            session = spark,
+            body = new CompoundBodyExec(Seq(
+              TestLeafStatement("body1"),
+              new LeaveStatementExec("lbl1"),
+              TestLeafStatement("body2")))
+          )
+        ))
+      )
+    )).getTreeIterator
+    val statements = iter.map(extractStatementValue).toSeq
+    assert(statements === Seq("body1", "lbl1"))
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
new file mode 100644
index 0000000000000..bbeae942f9fe7
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingExecutionSuite.scala
@@ -0,0 +1,1059 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.scripting
+
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.{QueryTest, Row}
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
+import org.apache.spark.sql.internal.SQLConf
+import org.apache.spark.sql.test.SharedSparkSession
+
+/**
+ * SQL Scripting interpreter tests.
+ * Output from the parser is provided to the interpreter.
+ * Output from the interpreter (iterator over executable statements) is then checked - statements
+ *   are executed and output DataFrames are compared with expected outputs.
+ */
+class SqlScriptingExecutionSuite extends QueryTest with SharedSparkSession {
+
+  // Tests setup
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
+  }
+
+  // Helpers
+  private def runSqlScript(
+      sqlText: String,
+      args: Map[String, Expression] = Map.empty): Seq[Array[Row]] = {
+    val compoundBody = spark.sessionState.sqlParser.parsePlan(sqlText).asInstanceOf[CompoundBody]
+    val sse = new SqlScriptingExecution(compoundBody, spark, args)
+    sse.map { df => df.collect() }.toList
+  }
+
+  private def verifySqlScriptResult(sqlText: String, expected: Seq[Seq[Row]]): Unit = {
+    val result = runSqlScript(sqlText)
+    assert(result.length == expected.length)
+    result.zip(expected).foreach {
+      case (actualAnswer, expectedAnswer) =>
+        assert(actualAnswer.sameElements(expectedAnswer))
+    }
+  }
+
+  // Tests
+  test("multi statement - simple") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |SELECT a, b FROM t WHERE a = 12;
+          |SELECT a FROM t;
+          |END
+          |""".stripMargin
+      val expected = Seq(
+        Seq.empty[Row], // select
+        Seq(Row(1)) // select
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("multi statement - count") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |SELECT
+          | CASE WHEN COUNT(*) > 10 THEN true
+          | ELSE false
+          | END AS MoreThanTen
+          |FROM t;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(false)))
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("session vars - set and read (SET VAR)") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET VAR var = var + 1;
+        |SELECT var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - set and read (SET)") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET var = var + 1;
+        |SELECT var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - set and read scoped") {
+    val sqlScript =
+      """
+        |BEGIN
+        | BEGIN
+        |   DECLARE var = 1;
+        |   SELECT var;
+        | END;
+        | BEGIN
+        |   DECLARE var = 2;
+        |   SELECT var;
+        | END;
+        | BEGIN
+        |   DECLARE var = 3;
+        |   SET VAR var = var + 1;
+        |   SELECT var;
+        | END;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select
+      Seq(Row(2)), // select
+      Seq(Row(4)) // select
+    )
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("session vars - drop var statement") {
+    val sqlScript =
+      """
+        |BEGIN
+        |DECLARE var = 1;
+        |SET VAR var = var + 1;
+        |SELECT var;
+        |DROP TEMPORARY VARIABLE var;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScript, expected)
+  }
+
+  test("if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   SELECT 42;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if nested") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   IF 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END IF;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else if") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=1
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in else") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=2
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=3
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(44)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END IF;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("if else if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |    SELECT 42;
+          |  ELSE IF (SELECT COUNT(*) > 1 FROM t) THEN
+          |    SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END IF;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1=1 THEN
+        |   CASE
+        |    WHEN 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = (SELECT 2) THEN
+        |     SELECT 1;
+        |   WHEN 2 = 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 2 = 1 THEN
+        |     SELECT 1;
+        |   WHEN 3 IN (1,2) THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE
+          | WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  CASE
+          |  WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          |  WHEN (SELECT COUNT(*) > 1 FROM t) THEN
+          |   SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 2 THEN
+        |     SELECT 42;
+        |   WHEN 1 = 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |   CASE 2
+        |    WHEN (SELECT 3) THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE (SELECT 2)
+        |   WHEN 1 THEN
+        |     SELECT 1;
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 1;
+        |   WHEN 3 THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE (SELECT COUNT(*) FROM t)
+          | WHEN 1 THEN
+          |   SELECT 41;
+          | WHEN 2 THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(42)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (2, 'b', 2.0);
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   WHEN 3 THEN
+          |     SELECT 43;
+          |   ELSE
+          |     SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(44)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case compare with null") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT) USING parquet;
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   ELSE
+          |     SELECT 43;
+          |  END CASE;
+          |END
+          |""".stripMargin
+      val expected = Seq(Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0)), // select i
+      Seq(Row(1)), // select i
+      Seq(Row(2)) // select i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while: not entering body") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq.empty
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | WHILE i < 2 DO
+        |   SET VAR j = 0;
+        |   WHILE j < 2 DO
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   END WHILE;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0, 0)), // select i, j
+      Seq(Row(0, 1)), // select i, j
+      Seq(Row(1, 0)), // select i, j
+      Seq(Row(1, 1)) // select i, j
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |WHILE (SELECT COUNT(*) < 2 FROM t) DO
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |END WHILE;
+          |END
+          |""".stripMargin
+      val expected = Seq(
+        Seq(Row(42)), // select
+        Seq(Row(42)) // select
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   i = 3
+        | END REPEAT;
+        |END
+        |""".stripMargin
+    val expected = Seq(
+      Seq(Row(0)), // select i
+      Seq(Row(1)), // select i
+      Seq(Row(2)) // select i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat: enters body only once") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   1 = 1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(3)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | REPEAT
+        |   SET VAR j = 0;
+        |   REPEAT
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   UNTIL j >= 2
+        |   END REPEAT;
+        |   SET VAR i = i + 1;
+        | UNTIL i >= 2
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq(Row(0, 0)), // select i, j
+      Seq(Row(0, 1)), // select i, j
+      Seq(Row(1, 0)), // select i, j
+      Seq(Row(1, 1)) // select i, j
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |REPEAT
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |UNTIL (SELECT COUNT(*) >= 2 FROM t)
+          |END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq(Row(42)), // select
+        Seq(Row(42)) // select
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("leave compound block") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |    SELECT 2;
+        |  END;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested compounds in loop - leave in inner compound") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    BEGIN
+        |      SELECT 1;
+        |      lbl2: BEGIN
+        |        SELECT 2;
+        |        LEAVE lbl2;
+        |        SELECT 3;
+        |      END;
+        |    END;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(2)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("loop statement with leave") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    SELECT x;
+        |    IF x > 2
+        |    THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select x
+      Seq(Row(2)), // select x
+      Seq(Row(3)), // select x
+      Seq(Row(3)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested loop statement with leave") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE x = 0;
+        | DECLARE y = 0;
+        | lbl1: LOOP
+        |   SET VAR y = 0;
+        |   lbl2: LOOP
+        |     SELECT x, y;
+        |     SET VAR y = y + 1;
+        |     IF y >= 2 THEN
+        |       LEAVE lbl2;
+        |     END IF;
+        |   END LOOP;
+        |   SET VAR x = x + 1;
+        |   IF x >= 2 THEN
+        |     LEAVE lbl1;
+        |   END IF;
+        | END LOOP;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq(Row(0, 0)), // select x, y
+      Seq(Row(0, 1)), // select x, y
+      Seq(Row(1, 0)), // select x, y
+      Seq(Row(1, 1)) // select x, y
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("iterate loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 1 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(Seq(Row(2)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END LOOP;
+        |  END LOOP;
+        |END""".stripMargin
+    // Execution immediately leaves the outer loop after SELECT,
+    //   so we expect only a single row in the result set.
+    val expected = Seq(Seq(Row(1)))
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 2 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |      SET x = 10;
+        |    END LOOP;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)), // select 1
+      Seq(Row(1)), // select 1
+      Seq(Row(3)) // select x
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
index 52dd7547c6444..71556c5502225 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/scripting/SqlScriptingInterpreterSuite.scala
@@ -17,8 +17,13 @@
 
 package org.apache.spark.sql.scripting
 
-import org.apache.spark.sql.{AnalysisException, Dataset, QueryTest, Row}
+import org.apache.spark.{SparkConf, SparkException, SparkNumberFormatException}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, QueryTest, Row}
 import org.apache.spark.sql.catalyst.QueryPlanningTracker
+import org.apache.spark.sql.catalyst.expressions.Expression
+import org.apache.spark.sql.catalyst.plans.logical.CompoundBody
+import org.apache.spark.sql.exceptions.SqlScriptingException
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SharedSparkSession
 
 /**
@@ -28,12 +33,20 @@ import org.apache.spark.sql.test.SharedSparkSession
  *   are executed and output DataFrames are compared with expected outputs.
  */
 class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
+
+  // Tests setup
+  override protected def sparkConf: SparkConf = {
+    super.sparkConf.set(SQLConf.SQL_SCRIPTING_ENABLED.key, "true")
+  }
+
   // Helpers
-  private def verifySqlScriptResult(sqlText: String, expected: Seq[Seq[Row]]): Unit = {
-    val interpreter = SqlScriptingInterpreter()
-    val compoundBody = spark.sessionState.sqlParser.parseScript(sqlText)
-    val executionPlan = interpreter.buildExecutionPlan(compoundBody)
-    val result = executionPlan.flatMap {
+  private def runSqlScript(
+      sqlText: String,
+      args: Map[String, Expression] = Map.empty): Array[DataFrame] = {
+    val interpreter = SqlScriptingInterpreter(spark)
+    val compoundBody = spark.sessionState.sqlParser.parsePlan(sqlText).asInstanceOf[CompoundBody]
+    val executionPlan = interpreter.buildExecutionPlan(compoundBody, args)
+    executionPlan.flatMap {
       case statement: SingleStatementExec =>
         if (statement.isExecuted) {
           None
@@ -42,16 +55,15 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
         }
       case _ => None
     }.toArray
+  }
 
+  private def verifySqlScriptResult(sqlText: String, expected: Seq[Seq[Row]]): Unit = {
+    val result = runSqlScript(sqlText)
     assert(result.length == expected.length)
     result.zip(expected).foreach { case (df, expectedAnswer) => checkAnswer(df, expectedAnswer) }
   }
 
   // Tests
-  test("select 1") {
-    verifySqlScriptResult("SELECT 1;", Seq(Seq(Row(1))))
-  }
-
   test("multi statement - simple") {
     withTable("t") {
       val sqlScript =
@@ -185,7 +197,7 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     }
     checkError(
       exception = e,
-      errorClass = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
+      condition = "UNRESOLVED_COLUMN.WITHOUT_SUGGESTION",
       sqlState = "42703",
       parameters = Map("objectName" -> s"`$varName`"),
       context = ExpectedContext(
@@ -214,4 +226,2382 @@ class SqlScriptingInterpreterSuite extends QueryTest with SharedSparkSession {
     )
     verifySqlScriptResult(sqlScript, expected)
   }
+
+  test("if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   SELECT 42;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if nested") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1 THEN
+        |   IF 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END IF;
+        | END IF;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in if") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=1
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else if") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=1
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else going in else") {
+    val commands =
+      """
+        |BEGIN
+        | IF 1=2
+        | THEN
+        |   SELECT 42;
+        | ELSE
+        |   SELECT 43;
+        | END IF;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if else if going in else") {
+    val commands =
+      """
+        |BEGIN
+        |  IF 1=2
+        |  THEN
+        |    SELECT 42;
+        |  ELSE IF 1=3
+        |  THEN
+        |    SELECT 43;
+        |  ELSE
+        |    SELECT 44;
+        |  END IF;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(Seq(Row(44)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END IF;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("if else if with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  IF (SELECT COUNT(*) > 2 FROM t) THEN
+          |    SELECT 42;
+          |  ELSE IF (SELECT COUNT(*) > 1 FROM t) THEN
+          |    SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END IF;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1=1 THEN
+        |   CASE
+        |    WHEN 2=1 THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = (SELECT 2) THEN
+        |     SELECT 1;
+        |   WHEN 2 = 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 2 = 1 THEN
+        |     SELECT 1;
+        |   WHEN 3 IN (1,2) THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE
+          | WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  CASE
+          |  WHEN (SELECT COUNT(*) > 2 FROM t) THEN
+          |   SELECT 42;
+          |  WHEN (SELECT COUNT(*) > 1 FROM t) THEN
+          |   SELECT 43;
+          |  ELSE
+          |    SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("searched case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE
+        |   WHEN 1 = 2 THEN
+        |     SELECT 42;
+        |   WHEN 1 = 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq()
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("searched case when evaluates to null") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a BOOLEAN) USING parquet;
+          |  CASE
+          |  WHEN (SELECT * FROM t) THEN
+          |   SELECT 42;
+          |  END CASE;
+          |END
+          |""".stripMargin
+
+      checkError(
+        exception = intercept[SqlScriptingException] (
+          runSqlScript(commands)
+        ),
+        condition = "BOOLEAN_STATEMENT_WITH_EMPTY_ROW",
+        parameters = Map("invalidStatement" -> "(SELECT * FROM T)")
+      )
+    }
+  }
+
+  test("searched case with non boolean condition - constant") {
+    val commands =
+      """
+        |BEGIN
+        |  CASE
+        |  WHEN 1 THEN
+        |   SELECT 42;
+        |  END CASE;
+        |END
+        |""".stripMargin
+
+    checkError(
+      exception = intercept[SqlScriptingException] (
+        runSqlScript(commands)
+      ),
+      condition = "INVALID_BOOLEAN_STATEMENT",
+      parameters = Map("invalidStatement" -> "1")
+    )
+  }
+
+  test("searched case with too many rows in subquery condition") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          | CREATE TABLE t (a BOOLEAN) USING parquet;
+          | INSERT INTO t VALUES (true);
+          | INSERT INTO t VALUES (true);
+          | CASE
+          |   WHEN (SELECT * FROM t) THEN
+          |     SELECT 1;
+          | END CASE;
+          |END
+          |""".stripMargin
+
+      checkError(
+        exception = intercept[SparkException] (
+          runSqlScript(commands)
+        ),
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty,
+        context = ExpectedContext(fragment = "(SELECT * FROM t)", start = 124, stop = 140)
+      )
+    }
+  }
+
+  test("simple case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case nested") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 1 THEN
+        |   CASE 2
+        |    WHEN (SELECT 3) THEN
+        |     SELECT 41;
+        |   ELSE
+        |     SELECT 42;
+        |   END CASE;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case second case") {
+    val commands =
+      """
+        |BEGIN
+        | CASE (SELECT 2)
+        |   WHEN 1 THEN
+        |     SELECT 1;
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN (SELECT * FROM t) THEN
+        |     SELECT * FROM b;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(42)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case going in else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 1;
+        |   WHEN 3 THEN
+        |     SELECT 2;
+        |   ELSE
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq(Seq(Row(43)))
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |INSERT INTO t VALUES (1, 'a', 1.0);
+          |CASE (SELECT COUNT(*) FROM t)
+          | WHEN 1 THEN
+          |   SELECT 41;
+          | WHEN 2 THEN
+          |   SELECT 42;
+          | ELSE
+          |   SELECT 43;
+          | END CASE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(42)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case else with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |  INSERT INTO t VALUES (2, 'b', 2.0);
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   WHEN 3 THEN
+          |     SELECT 43;
+          |   ELSE
+          |     SELECT 44;
+          |  END CASE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq.empty[Row], Seq.empty[Row], Seq(Row(44)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("simple case no cases matched no else") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN 2 THEN
+        |     SELECT 42;
+        |   WHEN 3 THEN
+        |     SELECT 43;
+        | END CASE;
+        |END
+        |""".stripMargin
+    val expected = Seq()
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("simple case mismatched types") {
+    val commands =
+      """
+        |BEGIN
+        | CASE 1
+        |   WHEN "one" THEN
+        |     SELECT 42;
+        | END CASE;
+        |END
+        |""".stripMargin
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "true") {
+      checkError(
+        exception = intercept[SparkNumberFormatException](
+          runSqlScript(commands)
+        ),
+        condition = "CAST_INVALID_INPUT",
+        parameters = Map(
+          "expression" -> "'one'",
+          "sourceType" -> "\"STRING\"",
+          "targetType" -> "\"BIGINT\""),
+        context = ExpectedContext(fragment = "\"one\"", start = 23, stop = 27))
+    }
+    withSQLConf(SQLConf.ANSI_ENABLED.key -> "false") {
+      checkError(
+        exception = intercept[SqlScriptingException](
+          runSqlScript(commands)
+        ),
+        condition = "BOOLEAN_STATEMENT_WITH_EMPTY_ROW",
+        parameters = Map("invalidStatement" -> "\"ONE\""))
+    }
+  }
+
+  test("simple case compare with null") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  CREATE TABLE t (a INT) USING parquet;
+          |  CASE (SELECT COUNT(*) FROM t)
+          |   WHEN 1 THEN
+          |     SELECT 42;
+          |   ELSE
+          |     SELECT 43;
+          |  END CASE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(Seq.empty[Row], Seq(Row(43)))
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("if's condition must be a boolean statement") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |  IF 1 THEN
+          |    SELECT 45;
+          |  END IF;
+          |END
+          |""".stripMargin
+      val exception = intercept[SqlScriptingException] {
+        runSqlScript(commands)
+      }
+      checkError(
+        exception = exception,
+        condition = "INVALID_BOOLEAN_STATEMENT",
+        parameters = Map("invalidStatement" -> "1")
+      )
+      assert(exception.origin.line.isDefined)
+      assert(exception.origin.line.get == 3)
+    }
+  }
+
+  test("if's condition must return a single row data") {
+    withTable("t1", "t2") {
+      // empty row
+      val commands1 =
+        """
+          |BEGIN
+          |  CREATE TABLE t1 (a BOOLEAN) USING parquet;
+          |  IF (SELECT * FROM t1) THEN
+          |    SELECT 46;
+          |  END IF;
+          |END
+          |""".stripMargin
+      val exception = intercept[SqlScriptingException] {
+        runSqlScript(commands1)
+      }
+      checkError(
+        exception = exception,
+        condition = "BOOLEAN_STATEMENT_WITH_EMPTY_ROW",
+        parameters = Map("invalidStatement" -> "(SELECT * FROM T1)")
+      )
+      assert(exception.origin.line.isDefined)
+      assert(exception.origin.line.get == 4)
+
+      // too many rows ( > 1 )
+      val commands2 =
+        """
+          |BEGIN
+          |  CREATE TABLE t2 (a BOOLEAN) USING parquet;
+          |  INSERT INTO t2 VALUES (true);
+          |  INSERT INTO t2 VALUES (true);
+          |  IF (SELECT * FROM t2) THEN
+          |    SELECT 46;
+          |  END IF;
+          |END
+          |""".stripMargin
+      checkError(
+        exception = intercept[SparkException] (
+          runSqlScript(commands2)
+        ),
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty,
+        context = ExpectedContext(fragment = "(SELECT * FROM t2)", start = 121, stop = 138)
+      )
+    }
+  }
+
+  test("while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq(Row(0)), // select i
+      Seq.empty[Row], // set i
+      Seq(Row(1)), // select i
+      Seq.empty[Row], // set i
+      Seq(Row(2)), // select i
+      Seq.empty[Row], // set i
+      Seq.empty[Row] // drop var
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while: not entering body") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | WHILE i < 3 DO
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq.empty[Row] // drop i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested while") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | WHILE i < 2 DO
+        |   SET VAR j = 0;
+        |   WHILE j < 2 DO
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   END WHILE;
+        |   SET VAR i = i + 1;
+        | END WHILE;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq.empty[Row], // declare j
+      Seq.empty[Row], // set j to 0
+      Seq(Row(0, 0)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq(Row(0, 1)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq.empty[Row], // increase i
+      Seq.empty[Row], // set j to 0
+      Seq(Row(1, 0)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq(Row(1, 1)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq.empty[Row], // increase i
+      Seq.empty[Row], // drop j
+      Seq.empty[Row] // drop i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("while with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |WHILE (SELECT COUNT(*) < 2 FROM t) DO
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |END WHILE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq(Row(42)), // select
+        Seq.empty[Row], // insert
+        Seq(Row(42)), // select
+        Seq.empty[Row] // insert
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   i = 3
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq(Row(0)), // select i
+      Seq.empty[Row], // set i
+      Seq(Row(1)), // select i
+      Seq.empty[Row], // set i
+      Seq(Row(2)), // select i
+      Seq.empty[Row], // set i
+      Seq.empty[Row] // drop var
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat: enters body only once") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 3;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   1 = 1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq(Row(3)), // select i
+      Seq.empty[Row], // set i
+      Seq.empty[Row] // drop i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("nested repeat") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | DECLARE j = 0;
+        | REPEAT
+        |   SET VAR j = 0;
+        |   REPEAT
+        |     SELECT i, j;
+        |     SET VAR j = j + 1;
+        |   UNTIL j >= 2
+        |   END REPEAT;
+        |   SET VAR i = i + 1;
+        | UNTIL i >= 2
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare i
+      Seq.empty[Row], // declare j
+      Seq.empty[Row], // set j to 0
+      Seq(Row(0, 0)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq(Row(0, 1)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq.empty[Row], // increase i
+      Seq.empty[Row], // set j to 0
+      Seq(Row(1, 0)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq(Row(1, 1)), // select i, j
+      Seq.empty[Row], // increase j
+      Seq.empty[Row], // increase i
+      Seq.empty[Row], // drop j
+      Seq.empty[Row] // drop i
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("repeat with count") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          |CREATE TABLE t (a INT, b STRING, c DOUBLE) USING parquet;
+          |REPEAT
+          |  SELECT 42;
+          |  INSERT INTO t VALUES (1, 'a', 1.0);
+          |UNTIL (SELECT COUNT(*) >= 2 FROM t)
+          |END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq(Row(42)), // select
+        Seq.empty[Row], // insert
+        Seq(Row(42)), // select
+        Seq.empty[Row] // insert
+      )
+      verifySqlScriptResult(commands, expected)
+    }
+  }
+
+  test("repeat with non boolean condition - constant") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE i = 0;
+        | REPEAT
+        |   SELECT i;
+        |   SET VAR i = i + 1;
+        | UNTIL
+        |   1
+        | END REPEAT;
+        |END
+        |""".stripMargin
+
+    checkError(
+      exception = intercept[SqlScriptingException] (
+        runSqlScript(commands)
+      ),
+      condition = "INVALID_BOOLEAN_STATEMENT",
+      parameters = Map("invalidStatement" -> "1")
+    )
+  }
+
+  test("repeat with empty subquery condition") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          | CREATE TABLE t (a BOOLEAN) USING parquet;
+          | REPEAT
+          |   SELECT 1;
+          | UNTIL
+          |   (SELECT * FROM t)
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      checkError(
+        exception = intercept[SqlScriptingException] (
+          runSqlScript(commands)
+        ),
+        condition = "BOOLEAN_STATEMENT_WITH_EMPTY_ROW",
+        parameters = Map("invalidStatement" -> "(SELECT * FROM T)")
+      )
+    }
+  }
+
+  test("repeat with too many rows in subquery condition") {
+    withTable("t") {
+      val commands =
+        """
+          |BEGIN
+          | CREATE TABLE t (a BOOLEAN) USING parquet;
+          | INSERT INTO t VALUES (true);
+          | INSERT INTO t VALUES (true);
+          | REPEAT
+          |   SELECT 1;
+          | UNTIL
+          |   (SELECT * FROM t)
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      checkError(
+        exception = intercept[SparkException] (
+          runSqlScript(commands)
+        ),
+        condition = "SCALAR_SUBQUERY_TOO_MANY_ROWS",
+        parameters = Map.empty,
+        context = ExpectedContext(fragment = "(SELECT * FROM t)", start = 141, stop = 157)
+      )
+    }
+  }
+
+  test("leave compound block") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |    SELECT 2;
+        |  END;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    SELECT 1;
+        |    LEAVE lbl;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select 1
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate compound block - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    ITERATE lbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        runSqlScript(sqlScriptText)
+      },
+      condition = "INVALID_LABEL_USAGE.ITERATE_IN_COMPOUND",
+      parameters = Map("labelName" -> "LBL"))
+  }
+
+  test("iterate while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(2)), // select
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(2)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave with wrong label - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    LEAVE randomlbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        runSqlScript(sqlScriptText)
+      },
+      condition = "INVALID_LABEL_USAGE.DOES_NOT_EXIST",
+      parameters = Map("labelName" -> "RANDOMLBL", "statementType" -> "LEAVE"))
+  }
+
+  test("iterate with wrong label - should fail") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: BEGIN
+        |    SELECT 1;
+        |    ITERATE randomlbl;
+        |  END;
+        |END""".stripMargin
+    checkError(
+      exception = intercept[SqlScriptingException] {
+        runSqlScript(sqlScriptText)
+      },
+      condition = "INVALID_LABEL_USAGE.DOES_NOT_EXIST",
+      parameters = Map("labelName" -> "RANDOMLBL", "statementType" -> "ITERATE"))
+  }
+
+  test("leave outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: REPEAT
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL 1 = 2
+        |  END REPEAT;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select 1
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: WHILE 1 = 1 DO
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested while loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    lbl2: WHILE 2 = 2 DO
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    END WHILE;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq(Row(1)), // select 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested compounds in loop - leave in inner compound") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: WHILE x < 2 DO
+        |    SET x = x + 1;
+        |    BEGIN
+        |      SELECT 1;
+        |      lbl2: BEGIN
+        |        SELECT 2;
+        |        LEAVE lbl2;
+        |        SELECT 3;
+        |      END;
+        |    END;
+        |  END WHILE;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq.empty[Row], // set x = 2
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select 2
+      Seq(Row(2)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested repeat loop") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: REPEAT
+        |    SET x = x + 1;
+        |    lbl2: REPEAT
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |    UNTIL 1 = 2
+        |    END REPEAT;
+        |  UNTIL x > 1
+        |  END REPEAT;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq(Row(1)), // select 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(1)), // select 1
+      Seq(Row(2)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("loop statement with leave") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    SELECT x;
+        |    IF x > 2
+        |    THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq(Row(1)), // select x
+      Seq.empty[Row], // set x = 2
+      Seq(Row(2)), // select x
+      Seq.empty[Row], // set x = 3
+      Seq(Row(3)), // select x
+      Seq(Row(3)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("nested loop statement with leave") {
+    val commands =
+      """
+        |BEGIN
+        | DECLARE x = 0;
+        | DECLARE y = 0;
+        | lbl1: LOOP
+        |   SET VAR y = 0;
+        |   lbl2: LOOP
+        |     SELECT x, y;
+        |     SET VAR y = y + 1;
+        |     IF y >= 2 THEN
+        |       LEAVE lbl2;
+        |     END IF;
+        |   END LOOP;
+        |   SET VAR x = x + 1;
+        |   IF x >= 2 THEN
+        |     LEAVE lbl1;
+        |   END IF;
+        | END LOOP;
+        |END
+        |""".stripMargin
+
+    val expected = Seq(
+      Seq.empty[Row], // declare x
+      Seq.empty[Row], // declare y
+      Seq.empty[Row], // set y to 0
+      Seq(Row(0, 0)), // select x, y
+      Seq.empty[Row], // increase y
+      Seq(Row(0, 1)), // select x, y
+      Seq.empty[Row], // increase y
+      Seq.empty[Row], // increase x
+      Seq.empty[Row], // set y to 0
+      Seq(Row(1, 0)), // select x, y
+      Seq.empty[Row], // increase y
+      Seq(Row(1, 1)), // select x, y
+      Seq.empty[Row], // increase y
+      Seq.empty[Row], // increase x
+      Seq.empty[Row], // drop y
+      Seq.empty[Row] // drop x
+    )
+    verifySqlScriptResult(commands, expected)
+  }
+
+  test("iterate loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 1 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    ITERATE lbl;
+        |    SET x = x + 2;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(2)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("leave outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  lbl: LOOP
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      LEAVE lbl;
+        |    END LOOP;
+        |  END LOOP;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq(Row(1)) // select 1
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("iterate outer loop from nested loop statement") {
+    val sqlScriptText =
+      """
+        |BEGIN
+        |  DECLARE x INT;
+        |  SET x = 0;
+        |  lbl: LOOP
+        |    SET x = x + 1;
+        |    IF x > 2 THEN
+        |     LEAVE lbl;
+        |    END IF;
+        |    lbl2: LOOP
+        |      SELECT 1;
+        |      ITERATE lbl;
+        |      SET x = 10;
+        |    END LOOP;
+        |  END LOOP;
+        |  SELECT x;
+        |END""".stripMargin
+    val expected = Seq(
+      Seq.empty[Row], // declare
+      Seq.empty[Row], // set x = 0
+      Seq.empty[Row], // set x = 1
+      Seq(Row(1)), // select 1
+      Seq.empty[Row], // set x = 2
+      Seq(Row(1)), // select 1
+      Seq.empty[Row], // set x = 3
+      Seq(Row(3)), // select x
+      Seq.empty[Row] // drop
+    )
+    verifySqlScriptResult(sqlScriptText, expected)
+  }
+
+  test("for statement - enters body once") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR row AS SELECT * FROM t DO
+          |   SELECT row.intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select row.intCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - enters body with multiple statements multiple times") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | INSERT INTO t VALUES (2, 'second', 2.0);
+          | FOR row AS SELECT * FROM t ORDER BY intCol DO
+          |   SELECT row.intCol;
+          |   SELECT intCol;
+          |   SELECT row.stringCol;
+          |   SELECT stringCol;
+          |   SELECT row.doubleCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select row.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select row.stringCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select row.doubleCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq(Row(2)), // select row.intCol
+        Seq(Row(2)), // select intCol
+        Seq(Row("second")), // select row.stringCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row(2.0)), // select row.doubleCol
+        Seq(Row(2.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - sum of column from table") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE sumOfCols = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (1), (2), (3), (4);
+          | FOR row AS SELECT * FROM t DO
+          |   SET sumOfCols = sumOfCols + row.intCol;
+          | END FOR;
+          | SELECT sumOfCols;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare sumOfCols
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq(Row(10)), // select sumOfCols
+        Seq.empty[Row] // drop sumOfCols
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - map, struct, array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, INT>,
+          | struct_column STRUCT<name: STRING, age: INT>, array_column ARRAY<STRING>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', 1), STRUCT('John', 25), ARRAY('apricot', 'quince')),
+          |  (2, MAP('b', 2), STRUCT('Jane', 30), ARRAY('plum', 'pear'));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.map_column;
+          |   SELECT map_column;
+          |   SELECT row.struct_column;
+          |   SELECT struct_column;
+          |   SELECT row.array_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> 1))), // select row.map_column
+        Seq(Row(Map("a" -> 1))), // select map_column
+        Seq(Row(Row("John", 25))), // select row.struct_column
+        Seq(Row(Row("John", 25))), // select struct_column
+        Seq(Row(Array("apricot", "quince"))), // select row.array_column
+        Seq(Row(Array("apricot", "quince"))), // select array_column
+        Seq(Row(Map("b" -> 2))), // select row.map_column
+        Seq(Row(Map("b" -> 2))), // select map_column
+        Seq(Row(Row("Jane", 30))), // select row.struct_column
+        Seq(Row(Row("Jane", 30))), // select struct_column
+        Seq(Row(Array("plum", "pear"))), // select row.array_column
+        Seq(Row(Array("plum", "pear"))), // select array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested struct") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT,
+          | struct_column STRUCT<num: INT, struct2: STRUCT<struct3: STRUCT<name: STRING>>>);
+          | INSERT INTO t VALUES
+          |  (1, STRUCT(1, STRUCT(STRUCT("one")))),
+          |  (2, STRUCT(2, STRUCT(STRUCT("two"))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.struct_column;
+          |   SELECT struct_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Row(1, Row(Row("one"))))), // select row.struct_column
+        Seq(Row(Row(1, Row(Row("one"))))), // select struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select row.struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select struct_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested map") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, MAP<INT, MAP<BOOLEAN, INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', MAP(1, MAP(false, 10)))),
+          |  (2, MAP('b', MAP(2, MAP(true, 20))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.map_column;
+          |   SELECT map_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select row.map_column
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select row.map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select map_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT, array_column ARRAY<ARRAY<ARRAY<INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, ARRAY(ARRAY(ARRAY(1, 2), ARRAY(3, 4)), ARRAY(ARRAY(5, 6)))),
+          |  (2, ARRAY(ARRAY(ARRAY(7, 8), ARRAY(9, 10)), ARRAY(ARRAY(11, 12))));
+          | FOR row AS SELECT * FROM t ORDER BY int_column DO
+          |   SELECT row.array_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // row.array_column
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // row.array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement empty result") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | FOR row AS SELECT * FROM t ORDER BY intCol DO
+          |   SELECT row.intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row] // create table
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement iterate") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |   IF x.intCol = 2 THEN
+          |     ITERATE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          |   SELECT x.stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("first")), // select x.stringCol
+        Seq(Row("third")), // select stringCol
+        Seq(Row("third")), // select x.stringCol
+        Seq(Row("fourth")), // select stringCol
+        Seq(Row("fourth")), // select x.stringCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement leave") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |   IF x.intCol = 3 THEN
+          |     LEAVE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          |   SELECT x.stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("first")), // select x.stringCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row("second")) // select x.stringCol
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - in while") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE cnt = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (0);
+          | WHILE cnt < 2 DO
+          |   SET cnt = cnt + 1;
+          |   FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |     SELECT x.intCol;
+          |   END FOR;
+          |   INSERT INTO t VALUES (cnt);
+          | END WHILE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - in other for") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT x.intCol;
+          |     SELECT intCol;
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(0)), // select x.intCol
+        Seq(Row(0)), // select intCol
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(0)), // select x.intCol
+        Seq(Row(0)), // select intCol
+        Seq(Row(2)), // select y.intCol2
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq(Row(1)), // select x.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(1)), // select x.intCol
+        Seq(Row(1)), // select intCol
+        Seq(Row(2)), // select y.intCol2
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  // ignored until loops are fixed to support empty bodies
+  ignore("for statement - nested - empty result set") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | REPEAT
+          |   FOR x AS SELECT * FROM t ORDER BY intCol DO
+          |     SELECT x.intCol;
+          |   END FOR;
+          | UNTIL 1 = 1
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - iterate outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     ITERATE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - leave outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     LEAVE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)) // select intCol2
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - nested - leave inner loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR x as SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR y AS SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT y.intCol2;
+          |     SELECT intCol2;
+          |     LEAVE lbl2;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select y.intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row], // drop outer var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - enters body once") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | FOR SELECT * FROM t DO
+          |   SELECT intCol;
+          |   SELECT stringCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - enters body with multiple statements multiple times") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING, doubleCol DOUBLE) using parquet;
+          | INSERT INTO t VALUES (1, 'first', 1.0);
+          | INSERT INTO t VALUES (2, 'second', 2.0);
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   SELECT intCol;
+          |   SELECT stringCol;
+          |   SELECT doubleCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(1)), // select intCol
+        Seq(Row("first")), // select stringCol
+        Seq(Row(1.0)), // select doubleCol
+        Seq(Row(2)), // select intCol
+        Seq(Row("second")), // select stringCol
+        Seq(Row(2.0)), // select doubleCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - sum of column from table") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE sumOfCols = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (1), (2), (3), (4);
+          | FOR SELECT * FROM t DO
+          |   SET sumOfCols = sumOfCols + intCol;
+          | END FOR;
+          | SELECT sumOfCols;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare sumOfCols
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // set sumOfCols
+        Seq.empty[Row], // drop local var
+        Seq(Row(10)), // select sumOfCols
+        Seq.empty[Row] // drop sumOfCols
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - map, struct, array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, INT>,
+          | struct_column STRUCT<name: STRING, age: INT>, array_column ARRAY<STRING>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', 1), STRUCT('John', 25), ARRAY('apricot', 'quince')),
+          |  (2, MAP('b', 2), STRUCT('Jane', 30), ARRAY('plum', 'pear'));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT map_column;
+          |   SELECT struct_column;
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> 1))), // select map_column
+        Seq(Row(Row("John", 25))), // select struct_column
+        Seq(Row(Array("apricot", "quince"))), // select array_column
+        Seq(Row(Map("b" -> 2))), // select map_column
+        Seq(Row(Row("Jane", 30))), // select struct_column
+        Seq(Row(Array("plum", "pear"))), // select array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested struct") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT,
+          | struct_column STRUCT<num: INT, struct2: STRUCT<struct3: STRUCT<name: STRING>>>);
+          | INSERT INTO t VALUES
+          |  (1, STRUCT(1, STRUCT(STRUCT("one")))),
+          |  (2, STRUCT(2, STRUCT(STRUCT("two"))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT struct_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Row(1, Row(Row("one"))))), // select struct_column
+        Seq(Row(Row(2, Row(Row("two"))))), // select struct_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested map") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (int_column INT, map_column MAP<STRING, MAP<INT, MAP<BOOLEAN, INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, MAP('a', MAP(1, MAP(false, 10)))),
+          |  (2, MAP('b', MAP(2, MAP(true, 20))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT map_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Map("a" -> Map(1 -> Map(false -> 10))))), // select map_column
+        Seq(Row(Map("b" -> Map(2 -> Map(true -> 20))))), // select map_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested array") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t
+          | (int_column INT, array_column ARRAY<ARRAY<ARRAY<INT>>>);
+          | INSERT INTO t VALUES
+          |  (1, ARRAY(ARRAY(ARRAY(1, 2), ARRAY(3, 4)), ARRAY(ARRAY(5, 6)))),
+          |  (2, ARRAY(ARRAY(ARRAY(7, 8), ARRAY(9, 10)), ARRAY(ARRAY(11, 12))));
+          | FOR SELECT * FROM t ORDER BY int_column DO
+          |   SELECT array_column;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row(Seq(Seq(Seq(1, 2), Seq(3, 4)), Seq(Seq(5, 6))))), // array_column
+        Seq(Row(Array(Seq(Seq(7, 8), Seq(9, 10)), Seq(Seq(11, 12))))), // array_column
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - empty result") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   SELECT intCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row] // create table
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - iterate") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR SELECT * FROM t ORDER BY intCol DO
+          |   IF intCol = 2 THEN
+          |     ITERATE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("third")), // select stringCol
+        Seq(Row("fourth")), // select stringCol
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop local var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - leave") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT, stringCol STRING) using parquet;
+          | INSERT INTO t VALUES (1, 'first'), (2, 'second'), (3, 'third'), (4, 'fourth');
+          |
+          | lbl: FOR SELECT * FROM t ORDER BY intCol DO
+          |   IF intCol = 3 THEN
+          |     LEAVE lbl;
+          |   END IF;
+          |   SELECT stringCol;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq(Row("first")), // select stringCol
+        Seq(Row("second")) // select stringCol
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - in while") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | DECLARE cnt = 0;
+          | CREATE TABLE t (intCol INT) using parquet;
+          | INSERT INTO t VALUES (0);
+          | WHILE cnt < 2 DO
+          |   SET cnt = cnt + 1;
+          |   FOR SELECT * FROM t ORDER BY intCol DO
+          |     SELECT intCol;
+          |   END FOR;
+          |   INSERT INTO t VALUES (cnt);
+          | END WHILE;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - in other for") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | FOR SELECT * FROM t ORDER BY intCol DO
+          |   FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol;
+          |     SELECT intCol2;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(0)), // select intCol
+        Seq(Row(3)), // select intCol2
+        Seq(Row(0)), // select intCol
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq(Row(1)), // select intCol
+        Seq(Row(3)), // select intCol2
+        Seq(Row(1)), // select intCol
+        Seq(Row(2)), // select intCol2
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  // ignored until loops are fixed to support empty bodies
+  ignore("for statement - no variable - nested - empty result set") {
+    withTable("t") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | REPEAT
+          |   FOR SELECT * FROM t ORDER BY intCol DO
+          |     SELECT intCol;
+          |   END FOR;
+          | UNTIL 1 = 1
+          | END REPEAT;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // declare cnt
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row], // set cnt
+        Seq(Row(0)), // select intCol
+        Seq(Row(1)), // select intCol
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // drop local var
+        Seq.empty[Row] // drop cnt
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - iterate outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     ITERATE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - leave outer loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     LEAVE lbl1;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)) // select intCol2
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
+
+  test("for statement - no variable - nested - leave inner loop") {
+    withTable("t", "t2") {
+      val sqlScript =
+        """
+          |BEGIN
+          | CREATE TABLE t (intCol INT) using parquet;
+          | CREATE TABLE t2 (intCol2 INT) using parquet;
+          | INSERT INTO t VALUES (0), (1);
+          | INSERT INTO t2 VALUES (2), (3);
+          | lbl1: FOR SELECT * FROM t ORDER BY intCol DO
+          |   lbl2: FOR SELECT * FROM t2 ORDER BY intCol2 DESC DO
+          |     SELECT intCol2;
+          |     LEAVE lbl2;
+          |     SELECT 1;
+          |   END FOR;
+          | END FOR;
+          |END
+          |""".stripMargin
+
+      val expected = Seq(
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // create table
+        Seq.empty[Row], // insert
+        Seq.empty[Row], // insert
+        Seq(Row(3)), // select intCol2
+        Seq(Row(3)), // select intCol2
+        Seq.empty[Row] // drop outer var
+      )
+      verifySqlScriptResult(sqlScript, expected)
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
index 3573bafe482cc..24732223c6698 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedReadSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.sql.sources
 import scala.util.Random
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
 import org.apache.spark.sql.catalyst.catalog.BucketSpec
 import org.apache.spark.sql.catalyst.expressions
 import org.apache.spark.sql.catalyst.expressions._
@@ -31,6 +32,7 @@ import org.apache.spark.sql.execution.datasources.BucketingUtils
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.joins.SortMergeJoinExec
 import org.apache.spark.sql.functions._
+import org.apache.spark.sql.internal.ExpressionUtils.column
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 import org.apache.spark.sql.test.{SharedSparkSession, SQLTestUtils}
@@ -52,7 +54,7 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
 
   protected override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.conf.set(SQLConf.LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING, true)
+    spark.conf.set(SQLConf.LEGACY_BUCKETED_TABLE_SCAN_OUTPUT_ORDERING.key, true)
   }
 
   protected override def afterAll(): Unit = {
@@ -221,12 +223,13 @@ abstract class BucketedReadSuite extends QueryTest with SQLTestUtils with Adapti
         df)
 
       // Case 4: InSet
-      val inSetExpr = expressions.InSet($"j".expr,
+      val inSetExpr = expressions.InSet(
+        UnresolvedAttribute("j"),
         Set(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3))
       checkPrunedAnswers(
         bucketSpec,
         bucketValues = Seq(bucketValue, bucketValue + 1, bucketValue + 2, bucketValue + 3),
-        filterCondition = Column(inSetExpr),
+        filterCondition = column(inSetExpr),
         df)
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
index 4f1b7d363a124..b473716b33fca 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/BucketedWriteSuite.scala
@@ -91,7 +91,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
       exception = intercept[AnalysisException] {
         df.write.sortBy("j").saveAsTable("tt")
       },
-      errorClass = "SORT_BY_WITHOUT_BUCKETING",
+      condition = "SORT_BY_WITHOUT_BUCKETING",
       parameters = Map.empty)
   }
 
@@ -106,7 +106,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
       exception = intercept[AnalysisException] {
         df.write.bucketBy(2, "i").parquet("/tmp/path")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      condition = "_LEGACY_ERROR_TEMP_1312",
       parameters = Map("operation" -> "save"))
   }
 
@@ -116,7 +116,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
       exception = intercept[AnalysisException] {
         df.write.bucketBy(2, "i").sortBy("i").parquet("/tmp/path")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      condition = "_LEGACY_ERROR_TEMP_1313",
       parameters = Map("operation" -> "save"))
   }
 
@@ -126,7 +126,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
       exception = intercept[AnalysisException] {
         df.write.bucketBy(2, "i").insertInto("tt")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1312",
+      condition = "_LEGACY_ERROR_TEMP_1312",
       parameters = Map("operation" -> "insertInto"))
   }
 
@@ -136,7 +136,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
       exception = intercept[AnalysisException] {
         df.write.bucketBy(2, "i").sortBy("i").insertInto("tt")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1313",
+      condition = "_LEGACY_ERROR_TEMP_1313",
       parameters = Map("operation" -> "insertInto"))
   }
 
@@ -252,7 +252,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
         .bucketBy(8, "j", "k")
         .sortBy("k")
         .saveAsTable("bucketed_table")),
-      errorClass = "_LEGACY_ERROR_TEMP_1166",
+      condition = "_LEGACY_ERROR_TEMP_1166",
       parameters = Map("bucketCol" -> "j", "normalizedPartCols" -> "i, j"))
 
     checkError(
@@ -261,7 +261,7 @@ abstract class BucketedWriteSuite extends QueryTest with SQLTestUtils {
         .bucketBy(8, "k")
         .sortBy("i")
         .saveAsTable("bucketed_table")),
-      errorClass = "_LEGACY_ERROR_TEMP_1167",
+      condition = "_LEGACY_ERROR_TEMP_1167",
       parameters = Map("sortCol" -> "i", "normalizedPartCols" -> "i, j"))
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
index 6f897a9c0b7b0..95c2fcbd7b5d7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/CreateTableAsSelectSuite.scala
@@ -164,7 +164,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[ParseException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        condition = "_LEGACY_ERROR_TEMP_0035",
         parameters = Map(
           "message" -> "CREATE TEMPORARY TABLE ... AS ..., use CREATE TEMPORARY VIEW instead"),
         context = ExpectedContext(
@@ -291,7 +291,7 @@ class CreateTableAsSelectSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[ParseException] {
           sql(sqlText)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        condition = "_LEGACY_ERROR_TEMP_0035",
         parameters = Map(
           "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
         context = ExpectedContext(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
index b6fb83fa5b876..43dfed277cbe7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/DDLSourceLoadSuite.scala
@@ -30,7 +30,7 @@ class DDLSourceLoadSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.read.format("Fluet da Bomb").load()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1141",
+      condition = "_LEGACY_ERROR_TEMP_1141",
       parameters = Map(
         "provider" -> "Fluet da Bomb",
         "sourceNames" -> ("org.apache.spark.sql.sources.FakeSourceOne, " +
@@ -49,7 +49,7 @@ class DDLSourceLoadSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         spark.read.format("Fake external source").load()
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1141",
+      condition = "_LEGACY_ERROR_TEMP_1141",
       parameters = Map(
         "provider" -> "Fake external source",
         "sourceNames" -> ("org.apache.fakesource.FakeExternalSourceOne, " +
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
index 780c29d693101..94a3b5ed5788e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/FilteredScanSuite.scala
@@ -325,7 +325,7 @@ class FilteredScanSuite extends DataSourceTest with SharedSparkSession {
 
         val table = spark.table("oneToTenFiltered")
         val relation = table.queryExecution.analyzed.collectFirst {
-          case LogicalRelation(r, _, _, _) => r
+          case l: LogicalRelation => l.relation
         }.get
 
         assert(
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
index e3e385e9d1810..baf99798965da 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/InsertSuite.scala
@@ -117,7 +117,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT INTO TABLE t1 SELECT a FROM t2")
         },
-        errorClass = "UNSUPPORTED_INSERT.NOT_ALLOWED",
+        condition = "UNSUPPORTED_INSERT.NOT_ALLOWED",
         parameters = Map("relationId" -> "`SimpleScan(1,10)`")
       )
     }
@@ -131,7 +131,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT INTO TABLE t1 SELECT a FROM t1")
         },
-        errorClass = "UNSUPPORTED_INSERT.RDD_BASED",
+        condition = "UNSUPPORTED_INSERT.RDD_BASED",
         parameters = Map.empty
       )
     }
@@ -151,7 +151,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT INTO TABLE t1 SELECT * FROM t1")
         },
-        errorClass = "UNSUPPORTED_INSERT.READ_FROM",
+        condition = "UNSUPPORTED_INSERT.READ_FROM",
         parameters = Map("relationId" -> "`SimpleScan(1,10)`")
       )
     }
@@ -293,7 +293,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("INSERT OVERWRITE TABLE jsonTable SELECT a, b FROM jsonTable")
       },
-      errorClass = "UNSUPPORTED_OVERWRITE.PATH",
+      condition = "UNSUPPORTED_OVERWRITE.PATH",
       parameters = Map("path" -> ".*"))
   }
 
@@ -338,7 +338,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
                     |SELECT i + 1, part2 FROM insertTable
                   """.stripMargin)
               },
-              errorClass = "UNSUPPORTED_OVERWRITE.TABLE",
+              condition = "UNSUPPORTED_OVERWRITE.TABLE",
               parameters = Map("table" -> "`spark_catalog`.`default`.`inserttable`"))
           }
         }
@@ -418,7 +418,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("INSERT OVERWRITE TABLE oneToTen SELECT CAST(a AS INT) FROM jt")
       },
-      errorClass = "UNSUPPORTED_INSERT.NOT_ALLOWED",
+      condition = "UNSUPPORTED_INSERT.NOT_ALLOWED",
       parameters = Map("relationId" -> "`SimpleScan(1,10)`"))
 
     spark.catalog.dropTempView("oneToTen")
@@ -527,7 +527,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
                |SELECT 1, 2
              """.stripMargin)
         },
-        errorClass = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
+        condition = "NOT_SUPPORTED_COMMAND_WITHOUT_HIVE_SUPPORT",
         parameters = Map("cmd" -> "INSERT OVERWRITE DIRECTORY with the Hive format")
       )
     }
@@ -548,7 +548,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[SparkException] {
           spark.sql(v1)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_2233",
+        condition = "_LEGACY_ERROR_TEMP_2233",
         parameters = Map(
           "providingClass" -> ("class org.apache.spark.sql.execution.datasources." +
             "jdbc.JdbcRelationProvider"))
@@ -658,7 +658,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t select 1L, 2")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -670,7 +670,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t select 1, 2.0")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`d`",
@@ -682,7 +682,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t select 1, 2.0D, 3")
           },
-          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+          condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "tableColumns" -> "`i`, `d`",
@@ -705,7 +705,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t values('a', 'b')")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -716,7 +716,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t values(now(), now())")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -727,7 +727,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t values(true, false)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -775,7 +775,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into t values($outOfRangeValue1)")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"BIGINT\"",
             "targetType" -> "\"INT\"",
@@ -786,7 +786,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into t values($outOfRangeValue2)")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"BIGINT\"",
             "targetType" -> "\"INT\"",
@@ -806,7 +806,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into t values(${outOfRangeValue1}D)")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"DOUBLE\"",
             "targetType" -> "\"BIGINT\"",
@@ -817,7 +817,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into t values(${outOfRangeValue2}D)")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"DOUBLE\"",
             "targetType" -> "\"BIGINT\"",
@@ -836,7 +836,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[SparkArithmeticException] {
             sql(s"insert into t values($outOfRangeValue)")
           },
-          errorClass = "CAST_OVERFLOW_IN_TABLE_INSERT",
+          condition = "CAST_OVERFLOW_IN_TABLE_INSERT",
           parameters = Map(
             "sourceType" -> "\"DECIMAL(5,2)\"",
             "targetType" -> "\"DECIMAL(3,2)\"",
@@ -854,7 +854,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), 1)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -869,7 +869,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("INSERT INTO t VALUES (date('2010-09-02'), 1)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -884,7 +884,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("INSERT INTO t VALUES (TIMESTAMP('2010-09-02 14:10:10'), true)")
           },
-            errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
             parameters = Map(
               "tableName" -> "`spark_catalog`.`default`.`t`",
               "colName" -> "`b`",
@@ -899,7 +899,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("INSERT INTO t VALUES (date('2010-09-02'), true)")
           },
-            errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+            condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
             parameters = Map(
               "tableName" -> "`spark_catalog`.`default`.`t`",
               "colName" -> "`b`",
@@ -956,7 +956,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       val msg = intercept[SparkRuntimeException] {
         sql("INSERT INTO TABLE test_table SELECT 2, null")
       }
-      assert(msg.getErrorClass == "NOT_NULL_ASSERT_VIOLATION")
+      assert(msg.getCondition == "NOT_NULL_ASSERT_VIOLATION")
     }
   }
 
@@ -971,7 +971,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("INSERT OVERWRITE TABLE jsonTable SELECT a FROM jt")
       },
-      errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+      condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
       parameters = Map(
         "tableName" -> "`unknown`",
         "tableColumns" -> "`a`, `b`",
@@ -994,7 +994,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT OVERWRITE TABLE jsonTable SELECT a FROM jt")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`unknown`",
           "tableColumns" -> "`a`, `b`",
@@ -1170,7 +1170,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("create table t(i boolean, s bigint default badvalue) using parquet")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
+        condition = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`s`",
@@ -1186,7 +1186,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           },
           // V1 command still use the fake Analyzer which can't resolve session variables and we
           // can only report UNRESOLVED_EXPRESSION error.
-          errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+          condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`s`",
@@ -1199,7 +1199,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           },
           // V2 command uses the actual analyzer and can resolve session variables. We can report
           // a more accurate NOT_CONSTANT error.
-          errorClass = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
+          condition = "INVALID_DEFAULT_VALUE.NOT_CONSTANT",
           parameters = Map(
             "statement" -> "CREATE TABLE",
             "colName" -> "`j`",
@@ -1216,7 +1216,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           sql("create table t(i boolean, s bigint default (select min(x) from badtable)) " +
             "using parquet")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`s`",
@@ -1230,7 +1230,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           sql("create table t(i boolean, s bigint default (select min(x) from other)) " +
             "using parquet")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`s`",
@@ -1243,7 +1243,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("create table t(i boolean default (select false as alias), s bigint) using parquet")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`i`",
@@ -1256,7 +1256,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t values(false, default + 1)")
         },
-        errorClass = "DEFAULT_PLACEMENT_INVALID",
+        condition = "DEFAULT_PLACEMENT_INVALID",
         parameters = Map.empty
       )
     }
@@ -1267,7 +1267,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t select false, default + 1")
         },
-        errorClass = "DEFAULT_PLACEMENT_INVALID",
+        condition = "DEFAULT_PLACEMENT_INVALID",
         parameters = Map.empty
       )
     }
@@ -1277,7 +1277,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t values(false, default)")
         },
-        errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+        condition = "TABLE_OR_VIEW_NOT_FOUND",
         parameters = Map("relationName" -> "`t`"),
         context = ExpectedContext("t", 12, 12)
       )
@@ -1288,7 +1288,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("create table t(i boolean, s bigint default false) using parquet")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+        condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
         parameters = Map(
           "statement" -> "CREATE TABLE",
           "colName" -> "`s`",
@@ -1306,7 +1306,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           sql("insert into t select t1.id, t2.id, t1.val, t2.val, t1.val * t2.val " +
             "from num_data t1, num_data t2")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`id1`, `int2`, `result`",
@@ -1319,7 +1319,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[ParseException] {
             sql("create table t(i boolean, s bigint default 42L) using parquet")
           },
-          errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
+          condition = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
           parameters = Map.empty,
           context = ExpectedContext("s bigint default 42L", 26, 45)
         )
@@ -1333,7 +1333,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[ParseException] {
           sql("insert into t partition(i=default) values(5, default)")
         },
-        errorClass = "REF_DEFAULT_VALUE_IS_NOT_ALLOWED_IN_PARTITION",
+        condition = "REF_DEFAULT_VALUE_IS_NOT_ALLOWED_IN_PARTITION",
         parameters = Map.empty,
         context = ExpectedContext(
           fragment = "partition(i=default)",
@@ -1349,7 +1349,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t values(true)")
           },
-          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "tableColumns" -> "`i`, `s`",
@@ -1423,7 +1423,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("insert into t (i, q) select true from (select 1)")
         },
-        errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+        condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
         parameters = Map(
           "tableName" -> "`spark_catalog`.`default`.`t`",
           "tableColumns" -> "`i`, `q`",
@@ -1439,7 +1439,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t (i) values (true)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`s`"))
@@ -1450,7 +1450,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t (i) values (default)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`s`"))
@@ -1461,7 +1461,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t (s) values (default)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`"))
@@ -1472,7 +1472,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t partition(i='true') (s) values(5)")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`q`"))
@@ -1483,7 +1483,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t partition(i='false') (q) select 43")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`s`"))
@@ -1494,7 +1494,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("insert into t partition(i='false') (q) select default")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_FIND_DATA",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`s`"))
@@ -1508,7 +1508,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         checkError(
           exception =
             intercept[AnalysisException](sql("insert into t (I) select true from (select 1)")),
-          errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+          condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
           sqlState = None,
           parameters = Map("objectName" -> "`I`", "proposal" -> "`i`, `s`"),
           context = ExpectedContext(
@@ -1640,7 +1640,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t add column s bigint default badvalue")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ADD COLUMNS",
           "colName" -> "`s`",
@@ -1653,7 +1653,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t add column s bigint default (select min(x) from badtable)")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ADD COLUMNS",
           "colName" -> "`s`",
@@ -1667,7 +1667,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t add column s bigint default (select min(x) from other)")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ADD COLUMNS",
           "colName" -> "`s`",
@@ -1680,7 +1680,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t add column s bigint default false")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+        condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
         parameters = Map(
           "statement" -> "ALTER TABLE ADD COLUMNS",
           "colName" -> "`s`",
@@ -1696,7 +1696,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[ParseException] {
             sql("alter table t add column s bigint default 42L")
           },
-          errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
+          condition = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
           parameters = Map.empty,
           context = ExpectedContext(
             fragment = "s bigint default 42L",
@@ -1740,7 +1740,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t alter column s set default badvalue")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.UNRESOLVED_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ALTER COLUMN",
           "colName" -> "`s`",
@@ -1750,7 +1750,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t alter column s set default (select min(x) from badtable)")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ALTER COLUMN",
           "colName" -> "`s`",
@@ -1761,7 +1761,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t alter column s set default (select 42 as alias)")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+        condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
         parameters = Map(
           "statement" -> "ALTER TABLE ALTER COLUMN",
           "colName" -> "`s`",
@@ -1771,7 +1771,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t alter column s set default false")
         },
-        errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+        condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
         parameters = Map(
           "statement" -> "ALTER TABLE ALTER COLUMN",
           "colName" -> "`s`",
@@ -1785,7 +1785,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[ParseException] {
             sql(sqlText)
           },
-          errorClass = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
+          condition = "UNSUPPORTED_DEFAULT_VALUE.WITH_SUGGESTION",
           parameters = Map.empty,
           context = ExpectedContext(
             fragment = sqlText,
@@ -1800,7 +1800,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("alter table t alter column i set default false")
         },
-        errorClass = "CANNOT_ALTER_PARTITION_COLUMN",
+        condition = "CANNOT_ALTER_PARTITION_COLUMN",
         parameters = Map("tableName" -> "`spark_catalog`.`default`.`t`", "columnName" -> "`i`")
       )
     }
@@ -1964,7 +1964,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           Seq("xyz").toDF().select("value", "default").write.insertInto("t")
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`default`", "proposal" -> "`value`"),
         context =
           ExpectedContext(fragment = "select", callSitePattern = getCurrentClassCallSitePattern))
@@ -1998,7 +1998,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql(s"create table t(a string default 'abc') using parquet")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1345",
+        condition = "DEFAULT_UNSUPPORTED",
         parameters = Map("statementType" -> "CREATE TABLE", "dataSource" -> "parquet"))
       withTable("t") {
         sql(s"create table t(a string, b int) using parquet")
@@ -2006,7 +2006,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("alter table t add column s bigint default 42")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1345",
+          condition = "DEFAULT_UNSUPPORTED",
           parameters = Map(
             "statementType" -> "ALTER TABLE ADD COLUMNS",
             "dataSource" -> "parquet"))
@@ -2065,7 +2065,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("alter table t add column s array<int> default array('abc', 'def')")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "ALTER TABLE ADD COLUMNS",
             "colName" -> "`s`",
@@ -2128,7 +2128,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("alter table t add column s struct<x boolean, y string> default struct(42, 56)")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "ALTER TABLE ADD COLUMNS",
             "colName" -> "`s`",
@@ -2248,7 +2248,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql("alter table t add column s map<boolean, string> default map(42, 56)")
           },
-          errorClass = "INVALID_DEFAULT_VALUE.DATA_TYPE",
+          condition = "INVALID_DEFAULT_VALUE.DATA_TYPE",
           parameters = Map(
             "statement" -> "ALTER TABLE ADD COLUMNS",
             "colName" -> "`s`",
@@ -2264,7 +2264,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("create table t(a string default (select 'abc')) using parquet")
       },
-      errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+      condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
       parameters = Map(
         "statement" -> "CREATE TABLE",
         "colName" -> "`a`",
@@ -2273,7 +2273,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("create table t(a string default exists(select 42 where true)) using parquet")
       },
-      errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+      condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
       parameters = Map(
         "statement" -> "CREATE TABLE",
         "colName" -> "`a`",
@@ -2282,7 +2282,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[AnalysisException] {
         sql("create table t(a string default 1 in (select 1 union all select 2)) using parquet")
       },
-      errorClass = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
+      condition = "INVALID_DEFAULT_VALUE.SUBQUERY_EXPRESSION",
       parameters = Map(
         "statement" -> "CREATE TABLE",
         "colName" -> "`a`",
@@ -2314,7 +2314,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
               // provider is now in the denylist.
               sql(s"alter table t1 add column (b string default 'abc')")
             },
-            errorClass = "_LEGACY_ERROR_TEMP_1346",
+            condition = "ADD_DEFAULT_UNSUPPORTED",
             parameters = Map(
               "statementType" -> "ALTER TABLE ADD COLUMNS",
               "dataSource" -> provider))
@@ -2389,7 +2389,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           } else {
             checkError(
               exception = err,
-              errorClass = "TASK_WRITE_FAILED",
+              condition = "TASK_WRITE_FAILED",
               parameters = Map("path" -> s".*$tableName"),
               matchPVals = true
             )
@@ -2419,7 +2419,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
       exception = intercept[ParseException] {
         sql("insert overwrite local directory 'hdfs:/abcd' using parquet select 1")
       },
-      errorClass = "LOCAL_MUST_WITH_SCHEMA_FILE",
+      condition = "LOCAL_MUST_WITH_SCHEMA_FILE",
       parameters = Map("actualSchema" -> "hdfs"),
       context = ExpectedContext(
         fragment = "insert overwrite local directory 'hdfs:/abcd' using parquet",
@@ -2439,7 +2439,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT INTO TABLE insertTable PARTITION(part1=1, part2='') SELECT 1")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec ([part1=Some(1), part2=Some()]) " +
             "contains an empty partition column value"))
@@ -2448,7 +2448,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           sql("INSERT INTO TABLE insertTable PARTITION(part1='', part2) SELECT 1 ,'' AS part2")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> ("The spec ([part1=Some(), part2=None]) " +
             "contains an empty partition column value"))
@@ -2475,7 +2475,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
               |)
             """.stripMargin)
         },
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = "42703",
         parameters = Map(
           "objectName" -> "`c3`",
@@ -2705,7 +2705,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           spark.table(tableName).write.mode(SaveMode.Overwrite).saveAsTable(tableName)
         },
-        errorClass = "UNSUPPORTED_OVERWRITE.TABLE",
+        condition = "UNSUPPORTED_OVERWRITE.TABLE",
         parameters = Map("table" -> s"`spark_catalog`.`default`.`$tableName`")
       )
     }
@@ -2726,7 +2726,7 @@ class InsertSuite extends DataSourceTest with SharedSparkSession {
           exception = intercept[AnalysisException] {
             sql(insertDirSql)
           },
-          errorClass = "UNSUPPORTED_OVERWRITE.PATH",
+          condition = "UNSUPPORTED_OVERWRITE.PATH",
           parameters = Map("path" -> ("file:" + path)))
       }
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
index 730b63850d99a..f3849fe34ec29 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PartitionedWriteSuite.scala
@@ -168,7 +168,7 @@ class PartitionedWriteSuite extends QueryTest with SharedSparkSession {
         exception = intercept[AnalysisException] {
           Seq((3, 2)).toDF("a", "b").write.partitionBy("b", "b").csv(f.getAbsolutePath)
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> "`b`"))
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
index 48d717daf00d4..618eafddd6bd7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/PathOptionSuite.scala
@@ -22,7 +22,7 @@ import java.net.URI
 import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession, SQLContext}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.CatalogUtils
-import org.apache.spark.sql.execution.datasources.LogicalRelation
+import org.apache.spark.sql.execution.datasources.LogicalRelationWithTable
 import org.apache.spark.sql.test.SharedSparkSession
 import org.apache.spark.sql.types.{IntegerType, Metadata, MetadataBuilder, StructType}
 
@@ -134,7 +134,7 @@ class PathOptionSuite extends DataSourceTest with SharedSparkSession {
 
   private def getPathOption(tableName: String): Option[String] = {
     spark.table(tableName).queryExecution.analyzed.collect {
-      case LogicalRelation(r: TestOptionsRelation, _, _, _) => r.pathOption
+      case LogicalRelationWithTable(r: TestOptionsRelation, _) => r.pathOption
     }.head
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
index 6067efc1d1c1c..ee40d70c88291 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/sources/ResolvedDataSourceSuite.scala
@@ -84,7 +84,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession {
         exception = intercept[AnalysisException] {
           getProvidingClass(provider)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1139",
+        condition = "_LEGACY_ERROR_TEMP_1139",
         parameters = Map("provider" -> provider)
       )
     }
@@ -95,7 +95,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession {
       exception = intercept[AnalysisException] {
         getProvidingClass("kafka")
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1140",
+      condition = "_LEGACY_ERROR_TEMP_1140",
       parameters = Map("provider" -> "kafka")
     )
   }
@@ -106,7 +106,7 @@ class ResolvedDataSourceSuite extends SharedSparkSession {
     }
     checkError(
       exception = error,
-      errorClass = "DATA_SOURCE_NOT_FOUND",
+      condition = "DATA_SOURCE_NOT_FOUND",
       parameters = Map("provider" -> "asfdwefasdfasdf")
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala
new file mode 100644
index 0000000000000..92bea82b35874
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/CommitLogSuite.scala
@@ -0,0 +1,113 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.streaming
+
+import java.io.{ByteArrayInputStream, FileInputStream, FileOutputStream}
+import java.nio.file.Path
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.execution.streaming.{CommitLog, CommitMetadata}
+import org.apache.spark.sql.test.SharedSparkSession
+
+class CommitLogSuite extends SparkFunSuite with SharedSparkSession {
+
+  private def testCommitLogV2FilePath: Path = {
+    getWorkspaceFilePath(
+      "sql",
+      "core",
+      "src",
+      "test",
+      "scala",
+      "org",
+      "apache",
+      "spark",
+      "sql",
+      "streaming",
+      "resources",
+      "testCommitLogV2"
+    )
+  }
+
+  private def testCommitLogV1FilePath: Path = {
+    getWorkspaceFilePath(
+      "sql",
+      "core",
+      "src",
+      "test",
+      "scala",
+      "org",
+      "apache",
+      "spark",
+      "sql",
+      "streaming",
+      "resources",
+      "testCommitLogV1"
+    )
+  }
+
+  private def testSerde(commitMetadata: CommitMetadata, path: Path): Unit = {
+    if (regenerateGoldenFiles) {
+      val commitLog = new CommitLog(spark, path.toString)
+      val outputStream = new FileOutputStream(path.resolve("testCommitLog").toFile)
+      commitLog.serialize(commitMetadata, outputStream)
+    } else {
+      val commitLog = new CommitLog(spark, path.toString)
+      val inputStream = new FileInputStream(path.resolve("testCommitLog").toFile)
+      val metadata = commitLog.deserialize(inputStream)
+      // Array comparison are reference based, so we need to compare the elements
+      assert(metadata.nextBatchWatermarkMs == commitMetadata.nextBatchWatermarkMs)
+      assert(metadata.stateUniqueIds.size == commitMetadata.stateUniqueIds.size)
+      commitMetadata.stateUniqueIds.foreach { case (operatorId, uniqueIds) =>
+        assert(metadata.stateUniqueIds.contains(operatorId))
+        assert(metadata.stateUniqueIds(operatorId).length == uniqueIds.length)
+        assert(metadata.stateUniqueIds(operatorId).zip(uniqueIds).forall {
+          case (a, b) => a.sameElements(b)
+        })
+      }
+    }
+  }
+
+  test("Basic Commit Log V1 SerDe") {
+    val testMetadataV1 = CommitMetadata(1)
+    testSerde(testMetadataV1, testCommitLogV1FilePath)
+  }
+
+  test("Basic Commit Log V2 SerDe") {
+    val testStateUniqueIds: Map[Long, Array[Array[String]]] =
+      Map(
+        0L -> Array(Array("unique_id1", "unique_id2"), Array("unique_id3", "unique_id4")),
+          1L -> Array(Array("unique_id5", "unique_id6"), Array("unique_id7", "unique_id8"))
+      )
+    val testMetadataV2 = CommitMetadata(0, testStateUniqueIds)
+    testSerde(testMetadataV2, testCommitLogV2FilePath)
+  }
+
+  // Old metadata structure with no state unique ids should not affect the deserialization
+  test("Cross-version V1 SerDe") {
+    val commitlogV1 = """v1
+                        |{"nextBatchWatermarkMs":233}""".stripMargin
+    val inputStream: ByteArrayInputStream =
+      new ByteArrayInputStream(commitlogV1.getBytes("UTF-8"))
+    val commitMetadata: CommitMetadata = new CommitLog(
+      spark, testCommitLogV1FilePath.toString).deserialize(inputStream)
+    assert(commitMetadata.nextBatchWatermarkMs === 233)
+    assert(commitMetadata.stateUniqueIds === Map.empty)
+  }
+}
+
+
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
index 04193d5189ae9..1d48637a95b0a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSinkSuite.scala
@@ -50,7 +50,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION.key, "native")
   }
 
   override def afterAll(): Unit = {
@@ -276,14 +276,14 @@ abstract class FileStreamSinkSuite extends StreamTest {
     withTempDir { dir =>
 
       def testOutputMode(mode: String): Unit = {
-        val e = intercept[AnalysisException] {
-          df.writeStream.format("parquet").outputMode(mode).start(dir.getCanonicalPath)
-        }
-        Seq(mode, "not support").foreach { w =>
-          assert(e.getMessage.toLowerCase(Locale.ROOT).contains(w))
-        }
+        checkError(
+          exception = intercept[AnalysisException] {
+            df.writeStream.format("parquet").outputMode(mode).start(dir.getCanonicalPath)
+          },
+          condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_DATASOURCE",
+          sqlState = "42KDE",
+          parameters = Map("className" -> "parquet", "outputMode" -> mode))
       }
-
       testOutputMode("update")
       testOutputMode("complete")
     }
@@ -378,7 +378,7 @@ abstract class FileStreamSinkSuite extends StreamTest {
           exception = intercept[AnalysisException] {
             spark.read.schema(s"$c0 INT, $c1 INT").json(outputDir).as[(Int, Int)]
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -651,6 +651,19 @@ abstract class FileStreamSinkSuite extends StreamTest {
       }
     }
   }
+
+  test("SPARK-48991: Move path initialization into try-catch block") {
+    val logAppender = new LogAppender("Assume no metadata directory.")
+    Seq(null, "", "file:tmp").foreach { path =>
+      withLogAppender(logAppender) {
+        assert(!FileStreamSink.hasMetadata(Seq(path), spark.sessionState.newHadoopConf(), conf))
+      }
+
+      assert(logAppender.loggingEvents.map(_.getMessage.getFormattedMessage).contains(
+        "Assume no metadata directory. Error while looking for metadata directory in the path:" +
+        s" $path."))
+    }
+  }
 }
 
 object PendingCommitFilesTrackingManifestFileCommitProtocol {
@@ -686,7 +699,7 @@ class FileStreamSinkV1Suite extends FileStreamSinkSuite {
     // Verify that MetadataLogFileIndex is being used and the correct partitioning schema has
     // been inferred
     val hadoopdFsRelations = df.queryExecution.analyzed.collect {
-      case LogicalRelation(baseRelation: HadoopFsRelation, _, _, _) => baseRelation
+      case LogicalRelationWithTable(baseRelation: HadoopFsRelation, _) => baseRelation
     }
     assert(hadoopdFsRelations.size === 1)
     assert(hadoopdFsRelations.head.location.isInstanceOf[MetadataLogFileIndex])
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
index 634311b669a85..773be0cc08e3f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FileStreamSourceSuite.scala
@@ -262,7 +262,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
 
   override def beforeAll(): Unit = {
     super.beforeAll()
-    spark.conf.set(SQLConf.ORC_IMPLEMENTATION, "native")
+    spark.conf.set(SQLConf.ORC_IMPLEMENTATION.key, "native")
   }
 
   override def afterAll(): Unit = {
@@ -419,7 +419,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
             createFileStreamSourceAndGetSchema(
               format = Some("json"), path = Some(src.getCanonicalPath), schema = None)
           },
-          errorClass = "UNABLE_TO_INFER_SCHEMA",
+          condition = "UNABLE_TO_INFER_SCHEMA",
           parameters = Map("format" -> "JSON")
         )
       }
@@ -1504,7 +1504,7 @@ class FileStreamSourceSuite extends FileStreamSourceTest {
 
     // This is to avoid running a spark job to list of files in parallel
     // by the InMemoryFileIndex.
-    spark.conf.set(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD, numFiles * 2)
+    spark.conf.set(SQLConf.PARALLEL_PARTITION_DISCOVERY_THRESHOLD.key, numFiles * 2)
 
     withTempDirs { case (root, tmp) =>
       val src = new File(root, "a=1")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala
index 1eae0fe9ef088..433bc6b4380b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateDistributionSuite.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.sql.streaming
 
 import org.apache.spark.sql.IntegratedUDFTestUtils.{shouldTestPandasUDFs, TestGroupedMapPandasUDFWithState}
-import org.apache.spark.sql.catalyst.expressions.PythonUDF
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Update
 import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
 import org.apache.spark.sql.execution.streaming.MemoryStream
@@ -83,8 +82,7 @@ class FlatMapGroupsInPandasWithStateDistributionSuite extends StreamTest
         .repartition($"key1")
         .groupBy($"key1", $"key2")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("key1"), inputDataDS("key2"), inputDataDS("timestamp"))
-            .expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("key1"), inputDataDS("key2"), inputDataDS("timestamp")),
           outputStructType,
           stateStructType,
           "Update",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
index 3f36544b117de..49825f7cde839 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsInPandasWithStateSuite.scala
@@ -19,7 +19,6 @@ package org.apache.spark.sql.streaming
 
 import org.apache.spark.sql.IntegratedUDFTestUtils._
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.catalyst.expressions.PythonUDF
 import org.apache.spark.sql.catalyst.plans.logical.{NoTimeout, ProcessingTimeTimeout}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.{Complete, Update}
 import org.apache.spark.sql.execution.python.FlatMapGroupsInPandasWithStateExec
@@ -87,7 +86,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
       inputDataDS
         .groupBy("value")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("value")),
           outputStructType,
           stateStructType,
           "Update",
@@ -161,7 +160,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
         inputDataDS
           .groupBy("key")
           .applyInPandasWithState(
-            pythonFunc(inputDataDS("key"), inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+            pythonFunc(inputDataDS("key"), inputDataDS("value")),
             outputStructType,
             stateStructType,
             "Update",
@@ -234,7 +233,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
       inputDataDS
         .groupBy("value")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("value")),
           outputStructType,
           stateStructType,
           "Append",
@@ -318,7 +317,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
       inputDataDS
         .groupBy("value")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("value")),
           outputStructType,
           stateStructType,
           "Update",
@@ -434,7 +433,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
             .withWatermark("eventTime", "10 seconds")
             .groupBy("key")
             .applyInPandasWithState(
-              pythonFunc(inputDataDF("key"), inputDataDF("eventTime")).expr.asInstanceOf[PythonUDF],
+              pythonFunc(inputDataDF("key"), inputDataDF("eventTime")),
               outputStructType,
               stateStructType,
               "Update",
@@ -519,7 +518,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
             .withWatermark("timestamp", "10 second")
             .groupBy("key")
             .applyInPandasWithState(
-              pythonFunc(inputDataDF("key"), inputDataDF("timestamp")).expr.asInstanceOf[PythonUDF],
+              pythonFunc(inputDataDF("key"), inputDataDF("timestamp")),
               outputStructType,
               stateStructType,
               "Update",
@@ -589,7 +588,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
       inputDataDS
         .groupBy("value")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("value")),
           outputStructType,
           stateStructType,
           "Update",
@@ -656,7 +655,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
           inputDataDS
             .groupBy("key")
             .applyInPandasWithState(
-              pythonFunc(inputDataDS("key")).expr.asInstanceOf[PythonUDF],
+              pythonFunc(inputDataDS("key")),
               outputStructType,
               stateStructType,
               "Update",
@@ -713,7 +712,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
           inputDataDS
             .groupBy("key")
             .applyInPandasWithState(
-              pythonFunc(inputDataDS("key")).expr.asInstanceOf[PythonUDF],
+              pythonFunc(inputDataDS("key")),
               outputStructType,
               stateStructType,
               "Update",
@@ -789,8 +788,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
         .groupBy("key1", "key2")
         .applyInPandasWithState(
           pythonFunc(
-            inputDataDS("key1"), inputDataDS("key2"), inputDataDS("val1"), inputDataDS("val2")
-          ).expr.asInstanceOf[PythonUDF],
+            inputDataDS("key1"), inputDataDS("key2"), inputDataDS("val1"), inputDataDS("val2")),
           outputStructType,
           stateStructType,
           "Update",
@@ -877,8 +875,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
         .applyInPandasWithState(
           pythonFunc(
             inputDataDS("val1"), inputDataDS("key2"), inputDataDS("val2"), inputDataDS("key1"),
-            inputDataDS("val3")
-          ).expr.asInstanceOf[PythonUDF],
+            inputDataDS("val3")),
           outputStructType,
           stateStructType,
           "Update",
@@ -949,7 +946,7 @@ class FlatMapGroupsInPandasWithStateSuite extends StateStoreMetricsTest {
       inputDataDS
         .groupBy("value")
         .applyInPandasWithState(
-          pythonFunc(inputDataDS("value")).expr.asInstanceOf[PythonUDF],
+          pythonFunc(inputDataDS("value")),
           outputStructType,
           stateStructType,
           "Update",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
index b597a24471088..04b5f3af64635 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateDistributionSuite.scala
@@ -135,8 +135,15 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
         assert(flatMapGroupsWithStateExecs.length === 1)
         assert(requireStatefulOpClusteredDistribution(
           flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
-        assert(hasDesiredHashPartitioningInChildren(
-          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        if (flatMapGroupsWithStateExecs.head.hasInitialState) {
+          assert(hasDesiredHashPartitioningInChildren(
+            flatMapGroupsWithStateExecs.head.children, Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        } else {
+          assert(hasDesiredHashPartitioningInChildren(
+            Seq(flatMapGroupsWithStateExecs.head.left), Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        }
       }
     )
   }
@@ -236,8 +243,15 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
         assert(flatMapGroupsWithStateExecs.length === 1)
         assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
           Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
-        assert(hasDesiredHashPartitioningInChildren(
-          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        if (flatMapGroupsWithStateExecs.head.hasInitialState) {
+          assert(hasDesiredHashPartitioningInChildren(
+            flatMapGroupsWithStateExecs.head.children, Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        } else {
+          assert(hasDesiredHashPartitioningInChildren(
+            Seq(flatMapGroupsWithStateExecs.head.left), Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        }
       }
     )
   }
@@ -328,8 +342,15 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
         assert(flatMapGroupsWithStateExecs.length === 1)
         assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
           Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
-        assert(hasDesiredHashPartitioningInChildren(
-          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        if (flatMapGroupsWithStateExecs.head.hasInitialState) {
+          assert(hasDesiredHashPartitioningInChildren(
+            flatMapGroupsWithStateExecs.head.children, Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        } else {
+          assert(hasDesiredHashPartitioningInChildren(
+            Seq(flatMapGroupsWithStateExecs.head.left), Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        }
       }
     )
   }
@@ -449,8 +470,15 @@ class FlatMapGroupsWithStateDistributionSuite extends StreamTest
         assert(flatMapGroupsWithStateExecs.length === 1)
         assert(requireClusteredDistribution(flatMapGroupsWithStateExecs.head,
           Seq(Seq("_1", "_2"), Seq("_1", "_2")), Some(numPartitions)))
-        assert(hasDesiredHashPartitioningInChildren(
-          flatMapGroupsWithStateExecs.head, Seq(Seq("_1", "_2"), Seq("_1", "_2")), numPartitions))
+        if (flatMapGroupsWithStateExecs.head.hasInitialState) {
+          assert(hasDesiredHashPartitioningInChildren(
+            flatMapGroupsWithStateExecs.head.children, Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        } else {
+          assert(hasDesiredHashPartitioningInChildren(
+            Seq(flatMapGroupsWithStateExecs.head.left), Seq(Seq("_1", "_2"), Seq("_1", "_2")),
+            numPartitions))
+        }
       }
     )
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
index b35e996106f36..f7ff39622ed40 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/FlatMapGroupsWithStateSuite.scala
@@ -23,7 +23,6 @@ import java.sql.Timestamp
 import org.apache.commons.io.FileUtils
 import org.scalatest.exceptions.TestFailedException
 
-import org.apache.spark.SparkException
 import org.apache.spark.api.java.function.FlatMapGroupsWithStateFunction
 import org.apache.spark.sql.{DataFrame, Encoder}
 import org.apache.spark.sql.catalyst.InternalRow
@@ -32,6 +31,7 @@ import org.apache.spark.sql.catalyst.plans.logical.FlatMapGroupsWithState
 import org.apache.spark.sql.catalyst.plans.physical.UnknownPartitioning
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes._
 import org.apache.spark.sql.execution.RDDScanExec
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state.{FlatMapGroupsWithStateExecHelper, MemoryStateStore, RocksDBStateStoreProvider, StateStore}
 import org.apache.spark.sql.functions.timestamp_seconds
@@ -634,6 +634,72 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     )
   }
 
+  testWithAllStateVersions("[SPARK-49474] flatMapGroupsWithState - user NPE is classified") {
+    // Throws NPE
+    val stateFunc = (_: String, _: Iterator[String], _: GroupState[RunningCount]) => {
+      throw new NullPointerException()
+      // Need to return an iterator for compilation to get types
+      Iterator(1)
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(Update, GroupStateTimeout.NoTimeout)(stateFunc)
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      ExpectFailure[FlatMapGroupsWithStateUserFuncException]()
+    )
+  }
+
+  testWithAllStateVersions(
+    "[SPARK-49474] flatMapGroupsWithState - null user iterator error is classified") {
+    // Returns null, will throw NPE when method is called on it
+    val stateFunc = (_: String, _: Iterator[String], _: GroupState[RunningCount]) => {
+      null.asInstanceOf[Iterator[Int]]
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(Update, GroupStateTimeout.NoTimeout)(stateFunc)
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      ExpectFailure[FlatMapGroupsWithStateUserFuncException]()
+    )
+  }
+
+  testWithAllStateVersions(
+    "[SPARK-49474] flatMapGroupsWithState - NPE from user iterator is classified") {
+    // Returns iterator that throws NPE when next is called
+    val stateFunc = (_: String, _: Iterator[String], _: GroupState[RunningCount]) => {
+      new Iterator[Int] {
+        override def hasNext: Boolean = {
+          true
+        }
+
+        override def next(): Int = {
+          throw new NullPointerException()
+        }
+      }
+    }
+
+    val inputData = MemoryStream[String]
+    val result =
+      inputData.toDS()
+        .groupByKey(x => x)
+        .flatMapGroupsWithState(Update, GroupStateTimeout.NoTimeout)(stateFunc)
+
+    testStream(result, Update)(
+      AddData(inputData, "a"),
+      ExpectFailure[FlatMapGroupsWithStateUserFuncException]()
+    )
+  }
+
   test("mapGroupsWithState - streaming") {
     // Function to maintain running count up to 2, and then remove the count
     // Returns the data and the count (-1 if count reached beyond 2 and state was just removed)
@@ -815,7 +881,8 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       CheckNewAnswer(("a", 2L)),
       setFailInTask(true),
       AddData(inputData, "a"),
-      ExpectFailure[SparkException](),   // task should fail but should not increment count
+      // task should fail but should not increment count
+      ExpectFailure[FlatMapGroupsWithStateUserFuncException](),
       setFailInTask(false),
       StartStream(),
       CheckNewAnswer(("a", 3L))     // task should not fail, and should show correct count
@@ -903,6 +970,63 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
     }
   }
 
+  test("SPARK-49070: flatMapGroupsWithStateExec - valid initial state plan") {
+    withTempDir { dir =>
+      withSQLConf(
+        (SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false"),
+        (SQLConf.CHECKPOINT_LOCATION.key -> dir.getCanonicalPath),
+        (SQLConf.STATE_STORE_PROVIDER_CLASS.key -> classOf[RocksDBStateStoreProvider].getName)) {
+
+        val inputData = MemoryStream[Timestamp]
+        val stateFunc = (key: Int, values: Iterator[Timestamp], state: GroupState[Int]) => {
+          // Should never timeout. All batches should have data and even if a timeout is set,
+          // it should get cleared when the key receives data per contract.
+          require(!state.hasTimedOut, "The state should not have timed out!")
+          // Set state and timeout once, only on the first call. The timeout should get cleared
+          // in the subsequent batch which has data for the key.
+          if (!state.exists) {
+            state.update(0)
+            state.setTimeoutTimestamp(500)  // Timeout at 500 milliseconds.
+          }
+          0
+        }
+
+        val query = inputData.toDS()
+          .withWatermark("value", "0 seconds")
+          .groupByKey(_ => 0)  // Always the same key: 0.
+          .mapGroupsWithState(GroupStateTimeout.EventTimeTimeout())(stateFunc)
+          .writeStream
+          .format("console")
+          .outputMode("update")
+          .start()
+
+        try {
+          (1 to 2).foreach {i =>
+            inputData.addData(new Timestamp(i * 1000))
+            query.processAllAvailable()
+          }
+
+          val sparkPlan =
+            query.asInstanceOf[StreamingQueryWrapper].streamingQuery.lastExecution.executedPlan
+          val flatMapGroupsWithStateExec = sparkPlan.collect {
+            case p: FlatMapGroupsWithStateExec => p
+          }.head
+
+          assert(!flatMapGroupsWithStateExec.hasInitialState)
+
+          // EnsureRequirements should not apply on the initial state plan
+          val exchange = flatMapGroupsWithStateExec.initialState.collect {
+            case s: ShuffleExchangeExec => s
+          }
+
+          assert(exchange.isEmpty)
+        } finally {
+          query.stop()
+        }
+      }
+    }
+  }
+
   def testWithTimeout(timeoutConf: GroupStateTimeout): Unit = {
     test("SPARK-20714: watermark does not fail query when timeout = " + timeoutConf) {
       // Function to maintain running count up to 2, and then remove the count
@@ -1039,7 +1163,7 @@ class FlatMapGroupsWithStateSuite extends StateStoreMetricsTest {
       func: (Int, Iterator[Int], GroupState[Int]) => Iterator[Int],
       timeoutType: GroupStateTimeout = GroupStateTimeout.NoTimeout,
       batchTimestampMs: Long = NO_TIMESTAMP): FlatMapGroupsWithStateExec = {
-    val stateFormatVersion = spark.conf.get(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION)
+    val stateFormatVersion = sqlConf.getConf(SQLConf.FLATMAPGROUPSWITHSTATE_STATE_FORMAT_VERSION)
     val emptyRdd = spark.sparkContext.emptyRDD[InternalRow]
     MemoryStream[Int]
       .toDS()
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala
index 050c1a2d7d978..69362dd60d889 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/GroupStateSuite.scala
@@ -302,13 +302,13 @@ class GroupStateSuite extends SparkFunSuite {
         TestGroupState.create[Int](
           Optional.of(5), NoTimeout, 100L, Optional.empty[Long], hasTimedOut = true)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3168",
+      condition = "MISSING_TIMEOUT_CONFIGURATION",
       parameters = Map.empty)
     checkError(
       exception = intercept[SparkUnsupportedOperationException] {
         GroupStateImpl.createForStreaming[Int](Some(5), 100L, NO_TIMESTAMP, NoTimeout, true, false)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3168",
+      condition = "MISSING_TIMEOUT_CONFIGURATION",
       parameters = Map.empty)
   }
 
@@ -349,7 +349,7 @@ class GroupStateSuite extends SparkFunSuite {
     def assertWrongTimeoutError(test: => Unit): Unit = {
       checkError(
         exception = intercept[SparkUnsupportedOperationException] { test },
-        errorClass = "_LEGACY_ERROR_TEMP_2204",
+        condition = "_LEGACY_ERROR_TEMP_2204",
         parameters = Map.empty)
     }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
index e8ee1e3c33015..980f9f48dcb05 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/MultiStatefulOperatorsSuite.scala
@@ -878,6 +878,60 @@ class MultiStatefulOperatorsSuite
     testOutputWatermarkInJoin(join3, input1, -40L * 1000 - 1)
   }
 
+  test("SPARK-49829 time window agg per each source followed by stream-stream join") {
+    val inputStream1 = MemoryStream[Long]
+    val inputStream2 = MemoryStream[Long]
+
+    val df1 = inputStream1.toDF()
+      .selectExpr("value", "timestamp_seconds(value) AS ts")
+      .withWatermark("ts", "5 seconds")
+
+    val df2 = inputStream2.toDF()
+      .selectExpr("value", "timestamp_seconds(value) AS ts")
+      .withWatermark("ts", "5 seconds")
+
+    val df1Window = df1.groupBy(
+      window($"ts", "10 seconds")
+    ).agg(sum("value").as("sum_df1"))
+
+    val df2Window = df2.groupBy(
+      window($"ts", "10 seconds")
+    ).agg(sum("value").as("sum_df2"))
+
+    val joined = df1Window.join(df2Window, "window", "inner")
+      .selectExpr("CAST(window.end AS long) AS window_end", "sum_df1", "sum_df2")
+
+    // The test verifies the case where both sides produce input as time window (append mode)
+    // for stream-stream join having join condition for equality of time window.
+    // Inputs are produced into stream-stream join when the time windows are completed, meaning
+    // they will be evicted in this batch for stream-stream join as well. (NOTE: join condition
+    // does not delay the state watermark in stream-stream join).
+    // Before SPARK-49829, left side does not add the input to state store if it's going to evict
+    // in this batch, which breaks the match between input from left side and input from right
+    // side for this batch.
+    testStream(joined)(
+      MultiAddData(
+        (inputStream1, Seq(1L, 2L, 3L, 4L, 5L)),
+        (inputStream2, Seq(5L, 6L, 7L, 8L, 9L))
+      ),
+      // watermark: 5 - 5 = 0
+      CheckNewAnswer(),
+      MultiAddData(
+        (inputStream1, Seq(11L, 12L, 13L, 14L, 15L)),
+        (inputStream2, Seq(15L, 16L, 17L, 18L, 19L))
+      ),
+      // watermark: 15 - 5 = 10 (windows for [0, 10) are completed)
+      // Before SPARK-49829, the test fails because this row is not produced.
+      CheckNewAnswer((10L, 15L, 35L)),
+      MultiAddData(
+        (inputStream1, Seq(100L)),
+        (inputStream2, Seq(101L))
+      ),
+      // watermark: 100 - 5 = 95 (windows for [0, 20) are completed)
+      CheckNewAnswer((20L, 65L, 85L))
+    )
+  }
+
   private def assertNumStateRows(numTotalRows: Seq[Long]): AssertOnQuery = AssertOnQuery { q =>
     q.processAllAvailable()
     val progressWithData = q.recentProgress.lastOption.get
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
index 9edd1acaddbc1..761354a05cc86 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamSuite.scala
@@ -1487,7 +1487,7 @@ class TestStateStoreProvider extends StateStoreProvider {
 
   override def close(): Unit = { }
 
-  override def getStore(version: Long): StateStore = null
+  override def getStore(version: Long, stateStoreCkptId: Option[String] = None): StateStore = null
 }
 
 /** A fake source that throws `ThrowingExceptionInCreateSource.exception` in `createSource` */
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
index 7ab45e25799bc..6d9731fa63b58 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamTest.scala
@@ -542,10 +542,7 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
           val metadataRoot = Option(checkpointLocation).getOrElse(defaultCheckpointLocation)
 
           additionalConfs.foreach(pair => {
-            val value =
-              if (sparkSession.conf.contains(pair._1)) {
-                Some(sparkSession.conf.get(pair._1))
-              } else None
+            val value = sparkSession.conf.getOption(pair._1)
             resetConfValues(pair._1) = value
             sparkSession.conf.set(pair._1, pair._2)
           })
@@ -870,7 +867,7 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
 
     (1 to iterations).foreach { i =>
       val rand = Random.nextDouble()
-      if(!running) {
+      if (!running) {
         rand match {
           case r if r < 0.7 => // AddData
             addRandomData()
@@ -898,7 +895,7 @@ trait StreamTest extends QueryTest with SharedSparkSession with TimeLimits with
         }
       }
     }
-    if(!running) { actions += StartStream() }
+    if (!running) { actions += StartStream() }
     addCheck()
     testStream(ds)(actions.toSeq: _*)
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
index 854893b1f033e..ab9df9a1e5a6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingDeduplicationSuite.scala
@@ -521,7 +521,7 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
       // verify that the key schema not compatible error is thrown
       checkError(
         ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-        errorClass = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
+        condition = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
         parameters = Map("storedKeySchema" -> ".*",
           "newKeySchema" -> ".*"),
         matchPVals = true
@@ -567,7 +567,7 @@ class StreamingDeduplicationSuite extends StateStoreMetricsTest {
 
     checkError(
       ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-      errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
+      condition = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
       parameters = Map(
         "schema" -> ".+\"str\":\"spark.UTF8_LCASE\".+"
       ),
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
index 5e9bdad8fd825..19ab272827441 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingJoinSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.scheduler.ExecutorCacheTaskLocation
 import org.apache.spark.sql.{DataFrame, Row, SparkSession}
 import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression}
 import org.apache.spark.sql.catalyst.plans.physical.HashPartitioning
+import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions
 import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.streaming.{MemoryStream, StatefulOperatorStateInfo, StreamingSymmetricHashJoinExec, StreamingSymmetricHashJoinHelper}
 import org.apache.spark.sql.execution.streaming.state.{RocksDBStateStoreProvider, StateStore, StateStoreProviderId}
@@ -223,6 +224,26 @@ abstract class StreamingJoinSuite
 
     (inputStream, select)
   }
+
+  protected def assertStateStoreRows(
+      opId: Long,
+      joinSide: String,
+      expectedRows: Seq[Row])(projFn: DataFrame => DataFrame): AssertOnQuery = Execute { q =>
+    val checkpointLoc = q.resolvedCheckpointRoot
+
+    // just make sure the query have no leftover data
+    q.processAllAvailable()
+
+    // By default, it reads the state store from latest committed batch.
+    val stateStoreDf = spark.read.format("statestore")
+      .option(StateSourceOptions.JOIN_SIDE, joinSide)
+      .option(StateSourceOptions.PATH, checkpointLoc)
+      .option(StateSourceOptions.OPERATOR_ID, opId)
+      .load()
+
+    val projectedDf = projFn(stateStoreDf)
+    checkAnswer(projectedDf, expectedRows)
+  }
 }
 
 @SlowSQLTest
@@ -540,7 +561,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
       val opId = 0
       val path =
         Utils.createDirectory(tempDir.getAbsolutePath, Random.nextFloat().toString).toString
-      val stateInfo = StatefulOperatorStateInfo(path, queryId, opId, 0L, 5)
+      val stateInfo = StatefulOperatorStateInfo(path, queryId, opId, 0L, 5, None)
 
       implicit val sqlContext = spark.sqlContext
       val coordinatorRef = sqlContext.streams.stateStoreCoordinator
@@ -752,7 +773,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
 
     checkError(
       ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-      errorClass = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
+      condition = "STATE_STORE_KEY_SCHEMA_NOT_COMPATIBLE",
       parameters = Map("storedKeySchema" -> ".*",
         "newKeySchema" -> ".*"),
       matchPVals = true
@@ -822,7 +843,7 @@ class StreamingInnerJoinSuite extends StreamingJoinSuite {
 
     checkError(
       ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-      errorClass = "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE",
+      condition = "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE",
       parameters = Map("storedValueSchema" -> ".*",
         "newValueSchema" -> ".*"),
       matchPVals = true
@@ -1559,6 +1580,66 @@ class StreamingOuterJoinSuite extends StreamingJoinSuite {
       )
     }
   }
+
+  test("SPARK-49829 left-outer join, input being unmatched is between WM for late event and " +
+    "WM for eviction") {
+
+    withTempDir { checkpoint =>
+      // This config needs to be set, otherwise no-data batch will be triggered and after
+      // no-data batch, WM for late event and WM for eviction would be same.
+      withSQLConf(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false") {
+        val memoryStream1 = MemoryStream[(String, Int)]
+        val memoryStream2 = MemoryStream[(String, Int)]
+
+        val data1 = memoryStream1.toDF()
+          .selectExpr("_1 AS key", "timestamp_seconds(_2) AS eventTime")
+          .withWatermark("eventTime", "0 seconds")
+        val data2 = memoryStream2.toDF()
+          .selectExpr("_1 AS key", "timestamp_seconds(_2) AS eventTime")
+          .withWatermark("eventTime", "0 seconds")
+
+        val joinedDf = data1.join(data2, Seq("key", "eventTime"), "leftOuter")
+          .selectExpr("key", "CAST(eventTime AS long) AS eventTime")
+
+        def assertLeftRows(expected: Seq[Row]): AssertOnQuery = {
+          assertStateStoreRows(0L, "left", expected) { df =>
+            df.selectExpr("value.key", "CAST(value.eventTime AS long)")
+          }
+        }
+
+        def assertRightRows(expected: Seq[Row]): AssertOnQuery = {
+          assertStateStoreRows(0L, "right", expected) { df =>
+            df.selectExpr("value.key", "CAST(value.eventTime AS long)")
+          }
+        }
+
+        testStream(joinedDf)(
+          StartStream(checkpointLocation = checkpoint.getCanonicalPath),
+          // batch 0
+          // WM: late record = 0, eviction = 0
+          MultiAddData(
+            (memoryStream1, Seq(("a", 1), ("b", 2))),
+            (memoryStream2, Seq(("b", 2), ("c", 1)))
+          ),
+          CheckNewAnswer(("b", 2)),
+          assertLeftRows(Seq(Row("a", 1), Row("b", 2))),
+          assertRightRows(Seq(Row("b", 2), Row("c", 1))),
+          // batch 1
+          // WM: late record = 0, eviction = 2
+          // Before Spark introduces multiple stateful operator, WM for late record was same as
+          // WM for eviction, hence ("d", 1) was treated as late record.
+          // With the multiple state operator, ("d", 1) is added in batch 1 but also evicted in
+          // batch 1. Note that the eviction is happening with state watermark: for this join,
+          // state watermark = state eviction under join condition. Before SPARK-49829, this
+          // wasn't producing unmatched row, and it is fixed.
+          AddData(memoryStream1, ("d", 1)),
+          CheckNewAnswer(("a", 1), ("d", 1)),
+          assertLeftRows(Seq()),
+          assertRightRows(Seq())
+        )
+      }
+    }
+  }
 }
 
 @SlowSQLTest
@@ -1966,4 +2047,128 @@ class StreamingLeftSemiJoinSuite extends StreamingJoinSuite {
       assertNumStateRows(total = 9, updated = 4)
     )
   }
+
+  test("SPARK-49829 two chained stream-stream left outer joins among three input streams") {
+    withSQLConf(SQLConf.STREAMING_NO_DATA_MICRO_BATCHES_ENABLED.key -> "false") {
+      val memoryStream1 = MemoryStream[(Long, Int)]
+      val memoryStream2 = MemoryStream[(Long, Int)]
+      val memoryStream3 = MemoryStream[(Long, Int)]
+
+      val data1 = memoryStream1.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v1")
+        .withWatermark("eventTime", "0 seconds")
+      val data2 = memoryStream2.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v2")
+        .withWatermark("eventTime", "0 seconds")
+      val data3 = memoryStream3.toDF()
+        .selectExpr("timestamp_seconds(_1) AS eventTime", "_2 AS v3")
+        .withWatermark("eventTime", "0 seconds")
+
+      val join = data1
+        .join(data2, Seq("eventTime"), "leftOuter")
+        .join(data3, Seq("eventTime"), "leftOuter")
+        .selectExpr("CAST(eventTime AS long) AS eventTime", "v1", "v2", "v3")
+
+      def assertLeftRowsFor1stJoin(expected: Seq[Row]): AssertOnQuery = {
+        assertStateStoreRows(1L, "left", expected) { df =>
+          df.selectExpr("CAST(value.eventTime AS long)", "value.v1")
+        }
+      }
+
+      def assertRightRowsFor1stJoin(expected: Seq[Row]): AssertOnQuery = {
+        assertStateStoreRows(1L, "right", expected) { df =>
+          df.selectExpr("CAST(value.eventTime AS long)", "value.v2")
+        }
+      }
+
+      def assertLeftRowsFor2ndJoin(expected: Seq[Row]): AssertOnQuery = {
+        assertStateStoreRows(0L, "left", expected) { df =>
+          df.selectExpr("CAST(value.eventTime AS long)", "value.v1", "value.v2")
+        }
+      }
+
+      def assertRightRowsFor2ndJoin(expected: Seq[Row]): AssertOnQuery = {
+        assertStateStoreRows(0L, "right", expected) { df =>
+          df.selectExpr("CAST(value.eventTime AS long)", "value.v3")
+        }
+      }
+
+      testStream(join)(
+        // batch 0
+        // WM: late event = 0, eviction = 0
+        MultiAddData(
+          (memoryStream1, Seq((20L, 1))),
+          (memoryStream2, Seq((20L, 1))),
+          (memoryStream3, Seq((20L, 1)))
+        ),
+        CheckNewAnswer((20, 1, 1, 1)),
+        assertLeftRowsFor1stJoin(Seq(Row(20, 1))),
+        assertRightRowsFor1stJoin(Seq(Row(20, 1))),
+        assertLeftRowsFor2ndJoin(Seq(Row(20, 1, 1))),
+        assertRightRowsFor2ndJoin(Seq(Row(20, 1))),
+        // batch 1
+        // WM: late event = 0, eviction = 20
+        MultiAddData(
+          (memoryStream1, Seq((21L, 2))),
+          (memoryStream2, Seq((21L, 2)))
+        ),
+        CheckNewAnswer(),
+        assertLeftRowsFor1stJoin(Seq(Row(21, 2))),
+        assertRightRowsFor1stJoin(Seq(Row(21, 2))),
+        assertLeftRowsFor2ndJoin(Seq(Row(21, 2, 2))),
+        assertRightRowsFor2ndJoin(Seq()),
+        // batch 2
+        // WM: late event = 20, eviction = 20 (slowest: inputStream3)
+        MultiAddData(
+          (memoryStream1, Seq((22L, 3))),
+          (memoryStream3, Seq((22L, 3)))
+        ),
+        CheckNewAnswer(),
+        assertLeftRowsFor1stJoin(Seq(Row(21, 2), Row(22, 3))),
+        assertRightRowsFor1stJoin(Seq(Row(21, 2))),
+        assertLeftRowsFor2ndJoin(Seq(Row(21, 2, 2))),
+        assertRightRowsFor2ndJoin(Seq(Row(22, 3))),
+        // batch 3
+        // WM: late event = 20, eviction = 21 (slowest: inputStream2)
+        AddData(memoryStream1, (23L, 4)),
+        CheckNewAnswer(Row(21, 2, 2, null)),
+        assertLeftRowsFor1stJoin(Seq(Row(22, 3), Row(23, 4))),
+        assertRightRowsFor1stJoin(Seq()),
+        assertLeftRowsFor2ndJoin(Seq()),
+        assertRightRowsFor2ndJoin(Seq(Row(22, 3))),
+        // batch 4
+        // WM: late event = 21, eviction = 21 (slowest: inputStream2)
+        MultiAddData(
+          (memoryStream1, Seq((24L, 5))),
+          (memoryStream2, Seq((24L, 5))),
+          (memoryStream3, Seq((24L, 5)))
+        ),
+        CheckNewAnswer(Row(24, 5, 5, 5)),
+        assertLeftRowsFor1stJoin(Seq(Row(22, 3), Row(23, 4), Row(24, 5))),
+        assertRightRowsFor1stJoin(Seq(Row(24, 5))),
+        assertLeftRowsFor2ndJoin(Seq(Row(24, 5, 5))),
+        assertRightRowsFor2ndJoin(Seq(Row(22, 3), Row(24, 5))),
+        // batch 5
+        // WM: late event = 21, eviction = 24
+        // just trigger a new batch with arbitrary data as the original test relies on no-data
+        // batch, and we need to check with remaining unmatched outputs
+        AddData(memoryStream1, (100L, 6)),
+        // Before SPARK-49829, the test fails because (23, 4, null, null) wasn't produced.
+        // (The assertion of state for left inputs & right inputs weren't included on the test
+        // before SPARK-49829.)
+        CheckNewAnswer(Row(22, 3, null, 3), Row(23, 4, null, null))
+      )
+
+      /*
+      // The collection of the above new answers is the same with below in original test:
+      val expected = Array(
+        Row(Timestamp.valueOf("2024-02-10 10:20:00"), 1, 1, 1),
+        Row(Timestamp.valueOf("2024-02-10 10:21:00"), 2, 2, null),
+        Row(Timestamp.valueOf("2024-02-10 10:22:00"), 3, null, 3),
+        Row(Timestamp.valueOf("2024-02-10 10:23:00"), 4, null, null),
+        Row(Timestamp.valueOf("2024-02-10 10:24:00"), 5, 5, 5),
+      )
+       */
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
index d9ce8002d285b..a0eea14e54eed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryListenerSuite.scala
@@ -296,7 +296,7 @@ class StreamingQueryListenerSuite extends StreamTest with BeforeAndAfter {
     val exception = SparkException.internalError("testpurpose")
     testSerialization(
       new QueryTerminatedEvent(UUID.randomUUID, UUID.randomUUID,
-        Some(exception.getMessage), Some(exception.getErrorClass)))
+        Some(exception.getMessage), Some(exception.getCondition)))
   }
 
   test("only one progress event per interval when no data") {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
index 782badaef924f..f651bfb7f3c72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQueryOptimizationCorrectnessSuite.scala
@@ -21,7 +21,7 @@ import java.sql.Timestamp
 
 import org.apache.spark.sql.Row
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.functions.{expr, lit, window}
+import org.apache.spark.sql.functions.{count, expr, lit, timestamp_seconds, window}
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -524,4 +524,66 @@ class StreamingQueryOptimizationCorrectnessSuite extends StreamTest {
       doTest(numExpectedStatefulOperatorsForOneEmptySource = 1)
     }
   }
+
+  test("SPARK-49699: observe node is not pruned out from PruneFilters") {
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .observe("observation", count(lit(1)).as("rows"))
+      // Enforce PruneFilters to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df)(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        val observeRow = qe.lastExecution.observedMetrics.get("observation")
+        assert(observeRow.get.getAs[Long]("rows") == 3L)
+      }
+    )
+  }
+
+  test("SPARK-49699: watermark node is not pruned out from PruneFilters") {
+    // NOTE: The test actually passes without SPARK-49699, because of the trickiness of
+    // filter pushdown and PruneFilters. Unlike observe node, the `false` filter is pushed down
+    // below to watermark node, hence PruneFilters rule does not prune out watermark node even
+    // before SPARK-49699. Propagate empty relation does not also propagate emptiness into
+    // watermark node, so the node is retained. The test is added for preventing regression.
+
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .withColumn("eventTime", timestamp_seconds($"value"))
+      .withWatermark("eventTime", "0 second")
+      // Enforce PruneFilter to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df)(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        // If the watermark node is pruned out, this would be null.
+        assert(qe.lastProgress.eventTime.get("watermark") != null)
+      }
+    )
+  }
+
+  test("SPARK-49699: stateful operator node is not pruned out from PruneFilters") {
+    val input1 = MemoryStream[Int]
+    val df = input1.toDF()
+      .groupBy("value")
+      .count()
+      // Enforce PruneFilter to come into play and prune subtree. We could do the same
+      // with the reproducer of SPARK-48267, but let's just be simpler.
+      .filter(expr("false"))
+
+    testStream(df, OutputMode.Complete())(
+      AddData(input1, 1, 2, 3),
+      CheckNewAnswer(),
+      Execute { qe =>
+        assert(qe.lastProgress.stateOperators.length == 1)
+      }
+    )
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
index 061b353879d14..c12846d7512d9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingQuerySuite.scala
@@ -36,15 +36,14 @@ import org.scalatestplus.mockito.MockitoSugar
 
 import org.apache.spark.{SparkException, SparkUnsupportedOperationException, TestUtils}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{AnalysisException, Column, DataFrame, Dataset, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Row, SaveMode}
 import org.apache.spark.sql.catalyst.InternalRow
-import org.apache.spark.sql.catalyst.expressions.{Literal, Rand, Randn, Shuffle, Uuid}
 import org.apache.spark.sql.catalyst.plans.logical.{CTERelationDef, CTERelationRef, LocalRelation}
 import org.apache.spark.sql.catalyst.streaming.InternalOutputModes.Complete
 import org.apache.spark.sql.catalyst.types.DataTypeUtils.toAttributes
 import org.apache.spark.sql.connector.read.InputPartition
 import org.apache.spark.sql.connector.read.streaming.{Offset => OffsetV2, ReadLimit}
-import org.apache.spark.sql.execution.exchange.ReusedExchangeExec
+import org.apache.spark.sql.execution.exchange.{REQUIRED_BY_STATEFUL_OPERATOR, ReusedExchangeExec, ShuffleExchangeExec}
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.sources.{MemorySink, TestForeachWriter}
 import org.apache.spark.sql.functions._
@@ -1002,7 +1001,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
 
     val stream = MemoryStream[Int]
-    val df = stream.toDF().select(new Column(Uuid()))
+    val df = stream.toDF().select(uuid())
     testStream(df)(
       AddData(stream, 1),
       CheckAnswer(collectUuid),
@@ -1022,7 +1021,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
 
     val stream = MemoryStream[Int]
-    val df = stream.toDF().select(new Column(new Rand()), new Column(new Randn()))
+    val df = stream.toDF().select(rand(), randn())
     testStream(df)(
       AddData(stream, 1),
       CheckAnswer(collectRand),
@@ -1041,7 +1040,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
 
     val stream = MemoryStream[Int]
-    val df = stream.toDF().select(new Column(new Shuffle(Literal.create[Seq[Int]](0 until 100))))
+    val df = stream.toDF().select(shuffle(typedLit[Seq[Int]](0 until 100)))
     testStream(df)(
       AddData(stream, 1),
       CheckAnswer(collectShuffle),
@@ -1257,7 +1256,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       inputData2.toDF().createOrReplaceTempView("s2")
       val unioned = spark.sql(
         "select s1.value from s1 union select s2.value from s2")
-      checkExceptionMessage(unioned)
+      checkAppendOutputModeException(unioned)
     }
   }
 
@@ -1266,7 +1265,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     withTempView("deduptest") {
       inputData.toDF().toDF("value").createOrReplaceTempView("deduptest")
       val distinct = spark.sql("select distinct value from deduptest")
-      checkExceptionMessage(distinct)
+      checkAppendOutputModeException(distinct)
     }
   }
 
@@ -1423,7 +1422,7 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
       }
       checkError(
         ex.getCause.asInstanceOf[SparkUnsupportedOperationException],
-        errorClass = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
+        condition = "STATE_STORE_UNSUPPORTED_OPERATION_BINARY_INEQUALITY",
         parameters = Map(
           "schema" -> ".+\"c1\":\"spark.UTF8_LCASE\".+"
         ),
@@ -1449,16 +1448,42 @@ class StreamingQuerySuite extends StreamTest with BeforeAndAfter with Logging wi
     }
   }
 
-  private def checkExceptionMessage(df: DataFrame): Unit = {
+  test("SPARK-49905 shuffle added by stateful operator should use the shuffle origin " +
+    "`REQUIRED_BY_STATEFUL_OPERATOR`") {
+    val inputData = MemoryStream[Int]
+
+    // Use the streaming aggregation as an example - all stateful operators are using the same
+    // distribution, named `StatefulOpClusteredDistribution`.
+    val df = inputData.toDF().groupBy("value").count()
+
+    testStream(df, OutputMode.Update())(
+      AddData(inputData, 1, 2, 3, 1, 2, 3),
+      CheckAnswer((1, 2), (2, 2), (3, 2)),
+      Execute { qe =>
+        val shuffleOpt = qe.lastExecution.executedPlan.collect {
+          case s: ShuffleExchangeExec => s
+        }
+
+        assert(shuffleOpt.nonEmpty, "No shuffle exchange found in the query plan")
+        assert(shuffleOpt.head.shuffleOrigin === REQUIRED_BY_STATEFUL_OPERATOR)
+      }
+    )
+  }
+
+  private def checkAppendOutputModeException(df: DataFrame): Unit = {
     withTempDir { outputDir =>
       withTempDir { checkpointDir =>
-        val exception = intercept[AnalysisException](
-          df.writeStream
-            .option("checkpointLocation", checkpointDir.getCanonicalPath)
-            .start(outputDir.getCanonicalPath))
-        assert(exception.getMessage.contains(
-          "Append output mode not supported when there are streaming aggregations on streaming " +
-            "DataFrames/DataSets without watermark"))
+        checkError(
+          exception = intercept[AnalysisException] {
+            df.writeStream
+              .option("checkpointLocation", checkpointDir.getCanonicalPath)
+              .start(outputDir.getCanonicalPath)
+          },
+          condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+          sqlState = "42KDE",
+          parameters = Map(
+            "outputMode" -> "append",
+            "operation" -> "streaming aggregations without watermark"))
       }
     }
   }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
index 12a4b7cf37a53..ec3c145af686c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSessionWindowSuite.scala
@@ -600,30 +600,36 @@ class StreamingSessionWindowSuite extends StreamTest
     val inputData = MemoryStream[(String, Long)]
     val sessionUpdates = sessionWindowQuery(inputData)
 
-    val e = intercept[AnalysisException] {
-      testStream(sessionUpdates, OutputMode.Update())(
-        AddData(inputData, ("hello", 40L)),
-        CheckAnswer() // this is just to trigger the exception
-      )
-    }
-    Seq("Update output mode", "not supported", "for session window").foreach { m =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(m.toLowerCase(Locale.ROOT)))
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        testStream(sessionUpdates, OutputMode.Update())(
+          AddData(inputData, ("hello", 40L)),
+          CheckAnswer() // this is just to trigger the exception
+        )
+      },
+      condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      sqlState = "42KDE",
+      parameters = Map(
+        "outputMode" -> OutputMode.Update().toString.toLowerCase(Locale.ROOT),
+        "operation" -> "session window streaming aggregations"))
   }
 
   testWithAllOptions("update mode - session window - no key") {
     val inputData = MemoryStream[Int]
     val windowedAggregation = sessionWindowQueryOnGlobalKey(inputData)
 
-    val e = intercept[AnalysisException] {
-      testStream(windowedAggregation, OutputMode.Update())(
-        AddData(inputData, 40),
-        CheckAnswer() // this is just to trigger the exception
-      )
-    }
-    Seq("Update output mode", "not supported", "for session window").foreach { m =>
-      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(m.toLowerCase(Locale.ROOT)))
-    }
+    checkError(
+      exception = intercept[AnalysisException] {
+        testStream(windowedAggregation, OutputMode.Update())(
+          AddData(inputData, 40),
+          CheckAnswer() // this is just to trigger the exception
+        )
+      },
+      condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+      sqlState = "42KDE",
+      parameters = Map(
+        "outputMode" -> OutputMode.Update().toString.toLowerCase(Locale.ROOT),
+        "operation" -> "session window streaming aggregations"))
   }
 
   // Test that session window works with mean, median, std dev, variance, UDAFs
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
index 26039b9185dbf..b38d5d7dbce9d 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/StreamingSymmetricHashJoinHelperSuite.scala
@@ -29,8 +29,8 @@ class StreamingSymmetricHashJoinHelperSuite extends StreamTest {
   val rightAttributeC = AttributeReference("c", IntegerType)()
   val rightAttributeD = AttributeReference("d", IntegerType)()
 
-  val left = new LocalTableScanExec(Seq(leftAttributeA, leftAttributeB), Seq())
-  val right = new LocalTableScanExec(Seq(rightAttributeC, rightAttributeD), Seq())
+  val left = new LocalTableScanExec(Seq(leftAttributeA, leftAttributeB), Seq(), None)
+  val right = new LocalTableScanExec(Seq(rightAttributeC, rightAttributeD), Seq(), None)
 
   test("empty") {
     val split = JoinConditionSplitPredicates(None, left, right)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
index dea16e5298975..5d88db0d01ba4 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, AlsoTestWithEncodingTypes, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 
 case class InputRow(key: String, action: String, value: String)
@@ -33,14 +33,13 @@ class TestListStateProcessor
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _listState = getHandle.getListState("testListState", Encoders.STRING)
+    _listState = getHandle.getListState("testListState", Encoders.STRING, TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       rows: Iterator[InputRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String)] = {
 
     var output = List[(String, String)]()
 
@@ -90,15 +89,15 @@ class ToggleSaveAndEmitProcessor
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _listState = getHandle.getListState("testListState", Encoders.STRING)
-    _valueState = getHandle.getValueState("testValueState", Encoders.scalaBoolean)
+    _listState = getHandle.getListState("testListState", Encoders.STRING, TTLConfig.NONE)
+    _valueState = getHandle.getValueState("testValueState", Encoders.scalaBoolean,
+      TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       rows: Iterator[String],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[String] = {
+      timerValues: TimerValues): Iterator[String] = {
     val valueStateOption = _valueState.getOption()
 
     if (valueStateOption.isEmpty || !valueStateOption.get) {
@@ -128,7 +127,8 @@ class ToggleSaveAndEmitProcessor
 }
 
 class TransformWithListStateSuite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithChangelogCheckpointingEnabled
+  with AlsoTestWithEncodingTypes {
   import testImplicits._
 
   test("test appending null value in list state throw exception") {
@@ -301,6 +301,8 @@ class TransformWithListStateSuite extends StreamTest
         CheckNewAnswer(("k5", "v5"), ("k5", "v6")),
         Execute { q =>
           assert(q.lastProgress.stateOperators(0).customMetrics.get("numListStateVars") > 0)
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 2)
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 2)
         }
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
index 299a3346b2e51..b188b92bdbb7c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithListStateTTLSuite.scala
@@ -20,11 +20,77 @@ package org.apache.spark.sql.streaming
 import java.time.Duration
 
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.execution.streaming.{ListStateImplWithTTL, MemoryStream}
+import org.apache.spark.sql.execution.streaming.{ListStateImplWithTTL, MapStateImplWithTTL, MemoryStream, ValueStateImplWithTTL}
 import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 
+// MultiStatefulVariableTTLProcessor is a StatefulProcessor that consumes a stream of
+// strings and returns a stream of <string, count> pairs.
+//
+// Internally, it uses several stateful variables to store the count of each string, for
+// the sole purpose of verifying that these stateful variables all stay in sync and do not
+// interfere with each other.
+//
+// The pattern of calling appendValue is to simulate the old behavior of appendValue, which
+// used to add a record into the secondary index for every appendList call.
+class MultiStatefulVariableTTLProcessor(ttlConfig: TTLConfig)
+  extends StatefulProcessor[String, String, (String, Long)]{
+  @transient private var _listState: ListStateImplWithTTL[String] = _
+  // Map from index to count
+  @transient private var _mapState: MapStateImplWithTTL[Long, Long] = _
+  // Counts the number of times the string has occurred. It should always be
+  // equal to the size of the list state at the start and end of handleInputRows.
+  @transient private var _valueState: ValueStateImplWithTTL[Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _listState = getHandle
+      .getListState("listState", Encoders.STRING, ttlConfig)
+      .asInstanceOf[ListStateImplWithTTL[String]]
+    _mapState = getHandle
+        .getMapState("mapState", Encoders.scalaLong, Encoders.scalaLong, ttlConfig)
+        .asInstanceOf[MapStateImplWithTTL[Long, Long]]
+    _valueState = getHandle
+        .getValueState("valueState", Encoders.scalaLong, ttlConfig)
+        .asInstanceOf[ValueStateImplWithTTL[Long]]
+  }
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, Long)] = {
+    assertSanity()
+    val iter = inputRows.map { row =>
+      // Update the list state
+      _listState.appendValue(row)
+
+      // Update the map state
+      val mapStateCurrentSize = _mapState.iterator().size
+      _mapState.updateValue(mapStateCurrentSize + 1, mapStateCurrentSize + 1)
+
+      // Update the value state
+      val currentCountFromValueState = _valueState.get()
+      _valueState.update(currentCountFromValueState + 1)
+
+      assertSanity()
+
+      (key, _listState.get().size.toLong)
+    }
+
+    iter
+  }
+
+  // Asserts that the list state, map state, and value state are all in sync.
+  private def assertSanity(): Unit = {
+    val listSize = _listState.get().size
+    val mapSize = _mapState.iterator().size
+    val valueState = _valueState.get()
+    assert(listSize == mapSize)
+    assert(listSize == valueState)
+  }
+}
+
 class ListStateTTLProcessor(ttlConfig: TTLConfig)
   extends StatefulProcessor[String, InputEvent, OutputEvent] {
 
@@ -41,8 +107,7 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEvent],
-      timerValues: TimerValues,
-    expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputEvent] = {
+      timerValues: TimerValues): Iterator[OutputEvent] = {
     var results = List[OutputEvent]()
 
     inputRows.foreach { row =>
@@ -55,7 +120,7 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
     results.iterator
   }
 
-  def processRow(
+  private def processRow(
       row: InputEvent,
       listState: ListStateImplWithTTL[Int]): Iterator[OutputEvent] = {
 
@@ -81,10 +146,17 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
     } else if (row.action == "append") {
       listState.appendValue(row.value)
     } else if (row.action == "get_values_in_ttl_state") {
-      val ttlValues = listState.getValuesInTTLState()
+      val ttlValues = listState.getValueInTTLState()
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v) :: results
       }
+    } else if (row.action == "get_values_in_min_state") {
+      val minValues = listState.getMinValues()
+      minValues.foreach { minExpirationMs =>
+        results = OutputEvent(key, -1, isTTLValue = true, ttlValue = minExpirationMs) :: results
+      }
+    } else if (row.action == "clear") {
+      listState.clear()
     }
 
     results.iterator
@@ -95,7 +167,8 @@ class ListStateTTLProcessor(ttlConfig: TTLConfig)
  * Test suite for testing list state with TTL.
  * We use the base TTL suite with a list state processor.
  */
-class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
+class TransformWithListStateTTLSuite extends TransformWithStateTTLTest
+  with StateStoreMetricsTest {
 
   import testImplicits._
 
@@ -106,6 +179,68 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
 
   override def getStateTTLMetricName: String = "numListStateWithTTLVars"
 
+  test("verify the list state secondary index has at most one record per key") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(10))
+      val inputStream = MemoryStream[String]
+        val result = inputStream.toDS()
+          .groupByKey(x => x)
+          .transformWithState(
+            new MultiStatefulVariableTTLProcessor(ttlConfig),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append())
+      val clock = new StreamManualClock
+
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+
+        // We want all of the inputs to have different timestamps, so that each record
+        // gets its own unique TTL, and thus, its own unique secondary index record. Each
+        // is also processed in its own microbatch to ensure a unique batchTimestampMs.
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 1)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 1)),
+
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 2)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 2)),
+
+        AddData(inputStream, "k1"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k1", 3)),
+
+        AddData(inputStream, "k2"),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(("k2", 3)),
+
+        // For each unique key that occurs t times, the MultiStatefulVariableTTLProcessor maintains:
+        //    - Map state: t records in the primary, and t records in the TTL index
+        //    - List state: 1 record in the primary, TTL, min, and count indexes
+        //    - Value state: 1 record in the primary, and 1 record in the TTL index
+        //
+        // So in total, that amounts to 2t + 4 + 2 = 2t + 6 records.
+        //
+        // In this test, we have 2 unique keys, and each key occurs 3 times. Thus, the total number
+        // of keys in state is 2 * (2t + 6) where t = 3, which is 24.
+        //
+        // The number of updated rows is the total across the last time assertNumStateRows
+        // was called, and we only update numRowsUpdated for primary key updates. We ran 6 batches
+        // and each wrote 3 primary keys, so the total number of updated rows is 6 * 3 = 18.
+        assertNumStateRows(total = 24, updated = 18)
+      )
+    }
+  }
+
   test("verify iterator works with expired values in beginning of list") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
@@ -147,6 +282,9 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
         ),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 3)
+        },
         // get ttl values
         AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
         AdvanceManualClock(1 * 1000),
@@ -158,15 +296,17 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           OutputEvent("k1", 5, isTTLValue = true, 109000),
           OutputEvent("k1", 6, isTTLValue = true, 109000)
         ),
+        AddData(inputStream, InputEvent("k1", "get", -1, null)),
         // advance clock to expire the first three elements
         AdvanceManualClock(15 * 1000), // batch timestamp: 65000
-        AddData(inputStream, InputEvent("k1", "get", -1, null)),
-        AdvanceManualClock(1 * 1000),
         CheckNewAnswer(
           OutputEvent("k1", 4, isTTLValue = false, -1),
           OutputEvent("k1", 5, isTTLValue = false, -1),
           OutputEvent("k1", 6, isTTLValue = false, -1)
         ),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 3)
+        },
         // ensure that expired elements are no longer in state
         AddData(inputStream, InputEvent("k1", "get_without_enforcing_ttl", -1, null)),
         AdvanceManualClock(1 * 1000),
@@ -219,6 +359,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -227,6 +368,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 2, isTTLValue = true, 182000),
             OutputEvent("k1", 3, isTTLValue = true, 182000)
           ),
+
           AddData(inputStream, InputEvent("k1", "get", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -258,6 +400,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get all elements without enforcing ttl
           AddData(inputStream, InputEvent("k1", "get_without_enforcing_ttl", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -269,6 +412,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 5, isTTLValue = false, -1),
             OutputEvent("k1", 6, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -293,15 +437,14 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // advance clock to expire the middle three elements
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 20000),
-            OutputEvent("k1", -1, isTTLValue = true, 181000),
-            OutputEvent("k1", -1, isTTLValue = true, 182000),
-            OutputEvent("k1", -1, isTTLValue = true, 188000)
+            OutputEvent("k1", -1, isTTLValue = true, 20000)
           ),
+
           // progress batch timestamp from 9000 to 54000, expiring the middle
           // three elements.
           AdvanceManualClock(45 * 1000),
@@ -316,6 +459,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 8, isTTLValue = false, -1),
             OutputEvent("k1", 9, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_without_enforcing_ttl", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -326,13 +470,110 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
             OutputEvent("k1", 8, isTTLValue = false, -1),
             OutputEvent("k1", 9, isTTLValue = false, -1)
           ),
+
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 181000),
-            OutputEvent("k1", -1, isTTLValue = true, 182000),
-            OutputEvent("k1", -1, isTTLValue = true, 188000)
+            OutputEvent("k1", -1, isTTLValue = true, 181000)
+          ),
+          StopStream
+        )
+      }
+    }
+  }
+
+  // If we have a list for a key k1 -> [(v1, t1), (v2, t2), (v3, t3)] and they _all_ expire,
+  // then there should be no remaining records in any primary (or secondary index) for that key.
+  // However, if we have a separate key k2 -> [(v1, t4)] and the time is less than t4, then it
+  // should still be present after the clearing for k1.
+  test("verify min-expiry index doesn't insert when the new minimum is None") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      withTempDir { checkpointLocation =>
+        val inputStream = MemoryStream[InputEvent]
+        val ttlConfig1 = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+        val result1 = inputStream
+          .toDS()
+          .groupByKey(x => x.key)
+          .transformWithState(
+            getProcessor(ttlConfig1),
+            TimeMode.ProcessingTime(),
+            OutputMode.Append()
+          )
+
+        val clock = new StreamManualClock
+        testStream(result1)(
+          StartStream(
+            Trigger.ProcessingTime("1 second"),
+            triggerClock = clock,
+            checkpointLocation = checkpointLocation.getAbsolutePath
+          ),
+
+          // Add 3 elements all with different eviction timestamps.
+          AddData(inputStream, InputEvent("k1", "append", 1)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(),
+
+          AddData(inputStream, InputEvent("k1", "append", 2)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(),
+
+          AddData(inputStream, InputEvent("k1", "append", 3)),
+          AdvanceManualClock(1 * 1000), // Time is 3000
+          CheckNewAnswer(),
+
+          // Add a separate key; this should not be affected by k1 expiring.
+          // It will have an expiration of 64000.
+          AddData(inputStream, InputEvent("k2", "put", 1)),
+
+          // Now, we should have: k1 -> [1, 2, 3] with TTLs [61000, 62000, 63000] respectively
+          AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 4000 for this micro-batch
+            OutputEvent("k1", 1, isTTLValue = true, 61000),
+            OutputEvent("k1", 2, isTTLValue = true, 62000),
+            OutputEvent("k1", 3, isTTLValue = true, 63000)
+          ),
+
+          AddData(inputStream, InputEvent("k1", "get_values_in_min_state", -1, null)),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 5000 for this micro-batch
+            OutputEvent("k1", -1, isTTLValue = true, 61000)
+          ),
+
+          // The k1 records expire at 63000, and the current time is 5000. So, we advance the
+          // clock by 63 - 5 = 58 seconds to expire those.
+          AdvanceManualClock((63 - 5) * 1000),
+          CheckNewAnswer(),
+
+          // There should be 4 state rows left over: the primary, TTL, min-expiry, and count
+          // indexes for k2.
+          //
+          // It's important to check with assertNumStateRows, since the InputEvents
+          // only return values for the current grouping key, not the entirety of RocksDB.
+          assertNumStateRows(total = 4, updated = 4),
+
+          // The k1 calls should both return no values. However, the k2 calls should return
+          // one record each. We put these into one AddData call since we want them all to
+          // run when the batchTimestampMs is 65000.
+          AddData(inputStream,
+            // These should both return no values, since all of k1 has been expired.
+            InputEvent("k1", "get_values_in_ttl_state", -1, null),
+            InputEvent("k1", "get_values_in_min_state", -1, null),
+
+            // However, k2 still has a record.
+            InputEvent("k2", "get_values_in_ttl_state", -1, null),
+            InputEvent("k2", "get_values_in_min_state", -1, null)
+          ),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer( // Time is 65000 for this micro-batch
+            OutputEvent("k2", -1, isTTLValue = true, 64000),
+            OutputEvent("k2", -1, isTTLValue = true, 64000)
           ),
+
+          assertNumStateRows(total = 0, updated = 0),
+
           StopStream
         )
       }
@@ -376,14 +617,23 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
-          AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
+          AddData(inputStream,
+            InputEvent("k1", "get_ttl_value_from_state", -1, null),
+            InputEvent("k1", "get_values_in_min_state", -1)
+          ),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
+            // From the get_ttl_value_from_state call
             OutputEvent("k1", 1, isTTLValue = true, 121000),
             OutputEvent("k1", 2, isTTLValue = true, 122000),
-            OutputEvent("k1", 3, isTTLValue = true, 122000)
+            OutputEvent("k1", 3, isTTLValue = true, 122000),
+
+            // From the get_values_in_min_state call
+            OutputEvent("k1", -1, isTTLValue = true, 121000)
           ),
+
           AddData(inputStream, InputEvent("k1", "get", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
@@ -406,6 +656,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           // advance clock to trigger processing
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(),
+
           // get ttl values
           AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1, null)),
           AdvanceManualClock(1 * 1000),
@@ -419,9 +670,8 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           ),
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
+
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 121000),
-            OutputEvent("k1", -1, isTTLValue = true, 122000),
             OutputEvent("k1", -1, isTTLValue = true, 65000)
           ),
           // expire end values, batch timestamp from 7000 to 67000
@@ -443,8 +693,7 @@ class TransformWithListStateTTLSuite extends TransformWithStateTTLTest {
           AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1, null)),
           AdvanceManualClock(1 * 1000),
           CheckNewAnswer(
-            OutputEvent("k1", -1, isTTLValue = true, 121000),
-            OutputEvent("k1", -1, isTTLValue = true, 122000)
+            OutputEvent("k1", -1, isTTLValue = true, 121000)
           ),
           StopStream
         )
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
index 0f394aac8f782..ec6ff4fcceb67 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateSuite.scala
@@ -20,7 +20,7 @@ package org.apache.spark.sql.streaming
 import org.apache.spark.SparkIllegalArgumentException
 import org.apache.spark.sql.Encoders
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, AlsoTestWithEncodingTypes, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 
 case class InputMapRow(key: String, action: String, value: (String, String))
@@ -33,14 +33,14 @@ class TestMapStateProcessor
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _mapState = getHandle.getMapState("sessionState", Encoders.STRING, Encoders.STRING)
+    _mapState = getHandle.getMapState("sessionState", Encoders.STRING, Encoders.STRING,
+      TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputMapRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String, String)] = {
 
     var output = List[(String, String, String)]()
 
@@ -74,8 +74,6 @@ class TestMapStateProcessor
     }
     output.iterator
   }
-
-  override def close(): Unit = {}
 }
 
 /**
@@ -83,7 +81,8 @@ class TestMapStateProcessor
  * operators such as transformWithState.
  */
 class TransformWithMapStateSuite extends StreamTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithChangelogCheckpointingEnabled
+  with AlsoTestWithEncodingTypes  {
   import testImplicits._
 
   private def testMapStateWithNullUserKey(inputMapRow: InputMapRow): Unit = {
@@ -103,7 +102,7 @@ class TransformWithMapStateSuite extends StreamTest
         ExpectFailure[SparkIllegalArgumentException] { e => {
           checkError(
             exception = e.asInstanceOf[SparkIllegalArgumentException],
-            errorClass = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
+            condition = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
             sqlState = Some("42601"),
             parameters = Map("stateName" -> "sessionState")
           )
@@ -152,7 +151,7 @@ class TransformWithMapStateSuite extends StreamTest
         ExpectFailure[SparkIllegalArgumentException] { e => {
           checkError(
             exception = e.asInstanceOf[SparkIllegalArgumentException],
-            errorClass = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
+            condition = "ILLEGAL_STATE_STORE_VALUE.NULL_VALUE",
             sqlState = Some("42601"),
             parameters = Map("stateName" -> "sessionState"))
         }}
@@ -209,9 +208,13 @@ class TransformWithMapStateSuite extends StreamTest
         AddData(inputData, InputMapRow("k2", "iterator", ("", ""))),
         CheckNewAnswer(),
         AddData(inputData, InputMapRow("k2", "exists", ("", ""))),
+        AddData(inputData, InputMapRow("k1", "clear", ("", ""))),
+        AddData(inputData, InputMapRow("k3", "updateValue", ("v7", "11"))),
         CheckNewAnswer(("k2", "exists", "false")),
         Execute { q =>
           assert(q.lastProgress.stateOperators(0).customMetrics.get("numMapStateVars") > 0)
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 1)
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 1)
         }
       )
     }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
index bf46c802fdea4..2cb15263459ea 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithMapStateTTLSuite.scala
@@ -41,8 +41,7 @@ class MapStateSingleKeyTTLProcessor(ttlConfig: TTLConfig)
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEvent],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputEvent] = {
+      timerValues: TimerValues): Iterator[OutputEvent] = {
     var results = List[OutputEvent]()
 
     for (row <- inputRows) {
@@ -55,7 +54,7 @@ class MapStateSingleKeyTTLProcessor(ttlConfig: TTLConfig)
     results.iterator
   }
 
-  def processRow(
+  private def processRow(
       row: InputEvent,
       mapState: MapStateImplWithTTL[String, Int]): Iterator[OutputEvent] = {
     var results = List[OutputEvent]()
@@ -84,6 +83,8 @@ class MapStateSingleKeyTTLProcessor(ttlConfig: TTLConfig)
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v._2) :: results
       }
+    } else if (row.action == "clear") {
+      mapState.clear()
     }
 
     results.iterator
@@ -119,8 +120,7 @@ class MapStateTTLProcessor(ttlConfig: TTLConfig)
   override def handleInputRows(
       key: String,
       inputRows: Iterator[MapInputEvent],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[MapOutputEvent] = {
+      timerValues: TimerValues): Iterator[MapOutputEvent] = {
     var results = List[MapOutputEvent]()
 
     for (row <- inputRows) {
@@ -133,7 +133,7 @@ class MapStateTTLProcessor(ttlConfig: TTLConfig)
     results.iterator
   }
 
-  def processRow(
+  private def processRow(
       row: MapInputEvent,
       mapState: MapStateImplWithTTL[String, Int]): Iterator[MapOutputEvent] = {
     var results = List[MapOutputEvent]()
@@ -210,12 +210,17 @@ class TransformWithMapStateTTLSuite extends TransformWithStateTTLTest {
         AddData(inputStream, MapInputEvent("k1", "key2", "put", 2)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(),
-        // advance clock to expire first key
-        AdvanceManualClock(30 * 1000),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 1)
+        },
         AddData(inputStream, MapInputEvent("k1", "key1", "get", -1),
           MapInputEvent("k1", "key2", "get", -1)),
-        AdvanceManualClock(1 * 1000),
+        // advance clock to expire first key
+        AdvanceManualClock(30 * 1000),
         CheckNewAnswer(MapOutputEvent("k1", "key2", 2, isTTLValue = false, -1)),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 1)
+        },
         StopStream
       )
     }
@@ -305,7 +310,6 @@ class TransformWithMapStateTTLSuite extends TransformWithStateTTLTest {
         AddData(inputStream, MapInputEvent("k1", "", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(
-          MapOutputEvent("k1", "key3", -1, isTTLValue = true, 123000),
           MapOutputEvent("k1", "key3", -1, isTTLValue = true, 126000),
           MapOutputEvent("k1", "key4", -1, isTTLValue = true, 123000),
           MapOutputEvent("k1", "key5", -1, isTTLValue = true, 123000)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
index 5388d6f1fb68a..6888fcba45f3e 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateChainingSuite.scala
@@ -45,8 +45,7 @@ class TestStatefulProcessor
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEventRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputRow] = {
+      timerValues: TimerValues): Iterator[OutputRow] = {
     if (inputRows.isEmpty) {
       Iterator.empty
     } else {
@@ -70,8 +69,7 @@ class InputCountStatefulProcessor[T]
   override def handleInputRows(
       key: String,
       inputRows: Iterator[T],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[Int] = {
+      timerValues: TimerValues): Iterator[Int] = {
     Iterator.single(inputRows.size)
   }
 }
@@ -86,8 +84,7 @@ class StatefulProcessorEmittingRowsOlderThanWatermark
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEventRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputRow] = {
+      timerValues: TimerValues): Iterator[OutputRow] = {
     Iterator.single(
       OutputRow(
         key,
@@ -185,13 +182,17 @@ class TransformWithStateChainingSuite extends StreamTest {
         .groupBy(window($"outputEventTime", "1 minute"))
         .count()
 
-      val ex = intercept[ExtendedAnalysisException] {
-        testStream(result, OutputMode.Append())(
-          StartStream()
-        )
-      }
-      assert(ex.getMessage.contains("there are streaming aggregations on" +
-        " streaming DataFrames/DataSets without watermark"))
+      checkError(
+        exception = intercept[AnalysisException] {
+          testStream(result, OutputMode.Append())(
+            StartStream()
+          )
+        },
+        condition = "STREAMING_OUTPUT_MODE.UNSUPPORTED_OPERATION",
+        sqlState = "42KDE",
+        parameters = Map(
+          "outputMode" -> "append",
+          "operation" -> "streaming aggregations without watermark"))
     }
   }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
index 54cff6fc44c08..806d2f19f6f5c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateInitialStateSuite.scala
@@ -17,11 +17,11 @@
 
 package org.apache.spark.sql.streaming
 
-import org.apache.spark.SparkUnsupportedOperationException
-import org.apache.spark.sql.{Dataset, Encoders, KeyValueGroupedDataset}
+import org.apache.spark.sql.{DataFrame, Dataset, Encoders, KeyValueGroupedDataset}
+import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions
 import org.apache.spark.sql.execution.streaming.MemoryStream
 import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, RocksDBStateStoreProvider}
-import org.apache.spark.sql.functions.timestamp_seconds
+import org.apache.spark.sql.functions.{col, timestamp_seconds}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 
@@ -29,6 +29,21 @@ case class InitInputRow(key: String, action: String, value: Double)
 case class InputRowForInitialState(
     key: String, value: Double, entries: List[Double], mapping: Map[Double, Int])
 
+case class UnionInitialStateRow(
+    groupingKey: String,
+    value: Option[Long],
+    listValue: Option[Long],
+    userMapKey: Option[String],
+    userMapValue: Option[Long]
+)
+
+case class UnionUnflattenInitialStateRow(
+    groupingKey: String,
+    value: Option[Long],
+    listValue: Option[Seq[Long]],
+    mapValue: Option[Map[String, Long]]
+)
+
 abstract class StatefulProcessorWithInitialStateTestClass[V]
     extends StatefulProcessorWithInitialState[
         String, InitInputRow, (String, String, Double), V] {
@@ -39,17 +54,19 @@ abstract class StatefulProcessorWithInitialStateTestClass[V]
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _valState = getHandle.getValueState[Double]("testValueInit", Encoders.scalaDouble)
-    _listState = getHandle.getListState[Double]("testListInit", Encoders.scalaDouble)
+    _valState = getHandle.getValueState[Double]("testValueInit", Encoders.scalaDouble,
+      TTLConfig.NONE)
+    _listState = getHandle.getListState[Double]("testListInit", Encoders.scalaDouble,
+      TTLConfig.NONE)
     _mapState = getHandle.getMapState[Double, Int](
-      "testMapInit", Encoders.scalaDouble, Encoders.scalaInt)
+      "testMapInit", Encoders.scalaDouble, Encoders.scalaInt,
+        TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InitInputRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String, Double)] = {
+      timerValues: TimerValues): Iterator[(String, String, Double)] = {
     var output = List[(String, String, Double)]()
     for (row <- inputRows) {
       if (row.action == "getOption") {
@@ -84,6 +101,86 @@ abstract class StatefulProcessorWithInitialStateTestClass[V]
   }
 }
 
+/**
+ * Class that updates all state variables with input rows. Act as a counter -
+ * keep the count in value state; keep all the occurrences in list state; and
+ * keep a map of key -> occurrence count in the map state.
+ */
+abstract class InitialStateWithStateDataSourceBase[V]
+  extends StatefulProcessorWithInitialState[
+    String, String, (String, String), V] {
+  @transient var _valState: ValueState[Long] = _
+  @transient var _listState: ListState[Long] = _
+  @transient var _mapState: MapState[String, Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _valState = getHandle.getValueState[Long]("testVal", Encoders.scalaLong, TTLConfig.NONE)
+    _listState = getHandle.getListState[Long]("testList", Encoders.scalaLong, TTLConfig.NONE)
+    _mapState = getHandle.getMapState[String, Long](
+      "testMap", Encoders.STRING, Encoders.scalaLong, TTLConfig.NONE)
+  }
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val curCountValue = if (_valState.exists()) {
+      _valState.get()
+    } else {
+      0L
+    }
+    var cnt = curCountValue
+    inputRows.foreach { row =>
+      cnt += 1
+      _listState.appendValue(cnt)
+      val mapCurVal = if (_mapState.containsKey(row)) {
+        _mapState.getValue(row)
+      } else {
+        0
+      }
+      _mapState.updateValue(row, mapCurVal + 1L)
+    }
+    _valState.update(cnt)
+    Iterator.single((key, cnt.toString))
+  }
+
+  override def close(): Unit = super.close()
+}
+
+class InitialStatefulProcessorWithStateDataSource
+  extends InitialStateWithStateDataSourceBase[UnionInitialStateRow] {
+  override def handleInitialState(
+      key: String, initialState: UnionInitialStateRow, timerValues: TimerValues): Unit = {
+    if (initialState.value.isDefined) {
+      _valState.update(initialState.value.get)
+    } else if (initialState.listValue.isDefined) {
+      _listState.appendValue(initialState.listValue.get)
+    } else if (initialState.userMapKey.isDefined) {
+      _mapState.updateValue(
+        initialState.userMapKey.get, initialState.userMapValue.get)
+    }
+  }
+}
+
+class InitialStatefulProcessorWithUnflattenStateDataSource
+  extends InitialStateWithStateDataSourceBase[UnionUnflattenInitialStateRow] {
+  override def handleInitialState(
+      key: String, initialState: UnionUnflattenInitialStateRow, timerValues: TimerValues): Unit = {
+    if (initialState.value.isDefined) {
+      _valState.update(initialState.value.get)
+    } else if (initialState.listValue.isDefined) {
+      _listState.appendList(
+        initialState.listValue.get.toArray)
+    } else if (initialState.mapValue.isDefined) {
+      initialState.mapValue.get.keys.foreach { key =>
+        _mapState.updateValue(key, initialState.mapValue.get.get(key).get)
+      }
+    }
+  }
+}
+
 class AccumulateStatefulProcessorWithInitState
     extends StatefulProcessorWithInitialStateTestClass[(String, Double)] {
   override def handleInitialState(
@@ -96,8 +193,7 @@ class AccumulateStatefulProcessorWithInitState
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InitInputRow],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String, Double)] = {
+      timerValues: TimerValues): Iterator[(String, String, Double)] = {
     var output = List[(String, String, Double)]()
     for (row <- inputRows) {
       if (row.action == "getOption") {
@@ -140,13 +236,6 @@ class StatefulProcessorWithInitialStateProcTimerClass
   @transient private var _timerState: ValueState[Long] = _
   @transient protected var _countState: ValueState[Long] = _
 
-  private def handleProcessingTimeBasedTimers(
-      key: String,
-      expiryTimestampMs: Long): Iterator[(String, String)] = {
-    _timerState.clear()
-    Iterator((key, "-1"))
-  }
-
   private def processUnexpiredRows(
       key: String,
       currCount: Long,
@@ -171,8 +260,10 @@ class StatefulProcessorWithInitialStateProcTimerClass
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode) : Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong)
-    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong)
+    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+      TTLConfig.NONE)
+    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong,
+      TTLConfig.NONE)
   }
 
   override def handleInitialState(
@@ -187,16 +278,19 @@ class StatefulProcessorWithInitialStateProcTimerClass
   override def handleInputRows(
       key: String,
       inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val currCount = _countState.getOption().getOrElse(0L)
+    val count = currCount + inputRows.size
+    processUnexpiredRows(key, currCount, count, timerValues)
+    Iterator((key, count.toString))
+  }
+
+  override def handleExpiredTimer(
+      key: String,
       timerValues: TimerValues,
       expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
-    if (expiredTimerInfo.isValid()) {
-      handleProcessingTimeBasedTimers(key, expiredTimerInfo.getExpiryTimeInMs())
-    } else {
-      val currCount = _countState.getOption().getOrElse(0L)
-      val count = currCount + inputRows.size
-      processUnexpiredRows(key, currCount, count, timerValues)
-      Iterator((key, count.toString))
-    }
+    _timerState.clear()
+    Iterator((key, "-1"))
   }
 }
 
@@ -215,8 +309,9 @@ class StatefulProcessorWithInitialStateEventTimerClass
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
     _maxEventTimeState = getHandle.getValueState[Long]("maxEventTimeState",
-      Encoders.scalaLong)
-    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong)
+      Encoders.scalaLong, TTLConfig.NONE)
+    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong,
+      TTLConfig.NONE)
   }
 
   private def processUnexpiredRows(maxEventTimeSec: Long): Unit = {
@@ -246,18 +341,20 @@ class StatefulProcessorWithInitialStateEventTimerClass
   override def handleInputRows(
       key: String,
       inputRows: Iterator[(String, Long)],
+      timerValues: TimerValues): Iterator[(String, Int)] = {
+    val valuesSeq = inputRows.toSeq
+    val maxEventTimeSec = math.max(valuesSeq.map(_._2).max,
+      _maxEventTimeState.getOption().getOrElse(0L))
+    processUnexpiredRows(maxEventTimeSec)
+    Iterator((key, maxEventTimeSec.toInt))
+  }
+
+  override def handleExpiredTimer(
+      key: String,
       timerValues: TimerValues,
       expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, Int)] = {
-    if (expiredTimerInfo.isValid()) {
-      _maxEventTimeState.clear()
-      Iterator((key, -1))
-    } else {
-      val valuesSeq = inputRows.toSeq
-      val maxEventTimeSec = math.max(valuesSeq.map(_._2).max,
-        _maxEventTimeState.getOption().getOrElse(0L))
-      processUnexpiredRows(maxEventTimeSec)
-      Iterator((key, maxEventTimeSec.toInt))
-    }
+    _maxEventTimeState.clear()
+    Iterator((key, -1))
   }
 }
 
@@ -298,6 +395,10 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
         AddData(inputData, InitInputRow("k2", "update", 40.0)),
         AddData(inputData, InitInputRow("non-exist", "getOption", -1.0)),
         CheckNewAnswer(("non-exist", "getOption", -1.0)),
+        Execute { q =>
+          assert(q.lastProgress
+            .stateOperators(0).customMetrics.get("initialStateProcessingTimeMs") > 0)
+        },
         AddData(inputData, InitInputRow("k1", "appendList", 37.0)),
         AddData(inputData, InitInputRow("k2", "appendList", 40.0)),
         AddData(inputData, InitInputRow("non-exist", "getList", -1.0)),
@@ -396,37 +497,6 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
     checkAnswer(df, Seq(("k1", "getOption", 37.0)).toDF())
   }
 
-  test("transformWithStateWithInitialState - " +
-    "cannot re-initialize state during initial state handling") {
-    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
-      classOf[RocksDBStateStoreProvider].getName) {
-      val initDf = Seq(("init_1", 40.0), ("init_2", 100.0), ("init_1", 50.0)).toDS()
-        .groupByKey(x => x._1).mapValues(x => x)
-      val inputData = MemoryStream[InitInputRow]
-      val query = inputData.toDS()
-        .groupByKey(x => x.key)
-        .transformWithState(new AccumulateStatefulProcessorWithInitState(),
-          TimeMode.None(),
-          OutputMode.Append(),
-          initDf)
-
-      testStream(query, OutputMode.Update())(
-        AddData(inputData, InitInputRow("k1", "add", 50.0)),
-        Execute { q =>
-          val e = intercept[Exception] {
-            q.processAllAvailable()
-          }
-          checkError(
-            exception = e.getCause.asInstanceOf[SparkUnsupportedOperationException],
-            errorClass = "STATEFUL_PROCESSOR_CANNOT_REINITIALIZE_STATE_ON_KEY",
-            sqlState = Some("42802"),
-            parameters = Map("groupingKey" -> "init_1")
-          )
-        }
-      )
-    }
-  }
-
   test("transformWithStateWithInitialState - streaming with processing time timer, " +
     "can emit expired initial state rows when grouping key is not received for new input rows") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
@@ -448,6 +518,10 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
         AdvanceManualClock(1 * 1000),
         // registered timer for "a" and "b" is 6000, first batch is processed at ts = 1000
         CheckNewAnswer(("c", "1")),
+        Execute { q =>
+          assert(q.lastProgress
+            .stateOperators(0).customMetrics.get("initialStateProcessingTimeMs") > 0)
+        },
 
         AddData(inputData, "c"),
         AdvanceManualClock(6 * 1000), // ts = 7000, "a" expires
@@ -501,4 +575,158 @@ class TransformWithStateInitialStateSuite extends StateStoreMetricsTest
       )
     }
   }
+
+  Seq(true, false).foreach { flattenOption =>
+    Seq(("5", "2"), ("5", "8"), ("5", "5")).foreach { partitions =>
+      test("state data source reader dataframe as initial state " +
+        s"(flatten option=$flattenOption, shuffle partition for 1st stream=${partitions._1}, " +
+        s"shuffle partition for 1st stream=${partitions._2})") {
+        withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+          classOf[RocksDBStateStoreProvider].getName) {
+          withTempPaths(2) { checkpointDirs =>
+            SQLConf.get.setConfString(SQLConf.SHUFFLE_PARTITIONS.key, partitions._1)
+            val inputData = MemoryStream[String]
+            val result = inputData.toDS()
+              .groupByKey(x => x)
+              .transformWithState(new InitialStatefulProcessorWithStateDataSource(),
+                TimeMode.None(),
+                OutputMode.Update())
+
+            testStream(result, OutputMode.Update())(
+              StartStream(checkpointLocation = checkpointDirs(0).getCanonicalPath),
+              AddData(inputData, "a", "b"),
+              CheckNewAnswer(("a", "1"), ("b", "1")),
+              AddData(inputData, "a", "b", "a"),
+              CheckNewAnswer(("a", "3"), ("b", "2"))
+            )
+
+            // We are trying to mimic a use case where users load all state data rows
+            // from a previous tws query as initial state and start a new tws query.
+            // In this use case, users will need to create a single dataframe with
+            // all the state rows from different state variables with different schema.
+            // We can only read from one state variable from one state data source reader
+            // query, and they are of different schema. We will get one dataframe from each
+            // state variable, and we union them together into a single dataframe.
+            val valueDf = spark.read
+              .format("statestore")
+              .option(StateSourceOptions.PATH, checkpointDirs(0).getAbsolutePath)
+              .option(StateSourceOptions.STATE_VAR_NAME, "testVal")
+              .load()
+              .drop("partition_id")
+
+            val listDf = spark.read
+              .format("statestore")
+              .option(StateSourceOptions.PATH, checkpointDirs(0).getAbsolutePath)
+              .option(StateSourceOptions.STATE_VAR_NAME, "testList")
+              .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, flattenOption)
+              .load()
+              .drop("partition_id")
+
+            val mapDf = spark.read
+              .format("statestore")
+              .option(StateSourceOptions.PATH, checkpointDirs(0).getAbsolutePath)
+              .option(StateSourceOptions.STATE_VAR_NAME, "testMap")
+              .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, flattenOption)
+              .load()
+              .drop("partition_id")
+
+            // create a df where each row contains all value, list, map state rows
+            // fill the missing column with null.
+            SQLConf.get.setConfString(SQLConf.SHUFFLE_PARTITIONS.key, partitions._2)
+            val inputData2 = MemoryStream[String]
+            val query = startQueryWithDataSourceDataframeAsInitState(
+              flattenOption, valueDf, listDf, mapDf, inputData2)
+
+            testStream(query, OutputMode.Update())(
+              StartStream(checkpointLocation = checkpointDirs(1).getCanonicalPath),
+              // check initial state is updated for state vars
+              AddData(inputData2, "c"),
+              CheckNewAnswer(("c", "1")),
+              Execute { _ =>
+                val valueDf2 = spark.read
+                  .format("statestore")
+                  .option(StateSourceOptions.PATH, checkpointDirs(1).getAbsolutePath)
+                  .option(StateSourceOptions.STATE_VAR_NAME, "testVal")
+                  .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, flattenOption)
+                  .load()
+                  .drop("partition_id")
+                  .filter(col("key.value") =!= "c")
+
+                val listDf2 = spark.read
+                  .format("statestore")
+                  .option(StateSourceOptions.PATH, checkpointDirs(1).getAbsolutePath)
+                  .option(StateSourceOptions.STATE_VAR_NAME, "testList")
+                  .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, flattenOption)
+                  .load()
+                  .drop("partition_id")
+                  .filter(col("key.value") =!= "c")
+
+                val mapDf2 = spark.read
+                  .format("statestore")
+                  .option(StateSourceOptions.PATH, checkpointDirs(1).getAbsolutePath)
+                  .option(StateSourceOptions.STATE_VAR_NAME, "testMap")
+                  .option(StateSourceOptions.FLATTEN_COLLECTION_TYPES, flattenOption)
+                  .load()
+                  .drop("partition_id")
+                  .filter(col("key.value") =!= "c")
+
+                checkAnswer(valueDf, valueDf2)
+                checkAnswer(listDf, listDf2)
+                checkAnswer(mapDf, mapDf2)
+              }
+            )
+          }
+        }
+      }
+    }
+  }
+
+  private def startQueryWithDataSourceDataframeAsInitState(
+      flattenOption: Boolean,
+      valDf: DataFrame,
+      listDf: DataFrame,
+      mapDf: DataFrame,
+      inputData: MemoryStream[String]): DataFrame = {
+    if (flattenOption) {
+      // when we read the state rows with flattened option set to true, values of a composite
+      // state variable will be flattened into multiple rows where each row is a
+      // key -> single value pair
+      val valueDf = valDf.selectExpr("key.value AS groupingKey", "value.value AS value")
+      val flattenListDf = listDf
+        .selectExpr("key.value AS groupingKey", "list_element.value AS listValue")
+      val flattenMapDf = mapDf
+        .selectExpr(
+          "key.value AS groupingKey",
+          "user_map_key.value AS userMapKey",
+          "user_map_value.value AS userMapValue")
+      val df_joined =
+        valueDf.unionByName(flattenListDf, true)
+          .unionByName(flattenMapDf, true)
+      val kvDataSet = inputData.toDS().groupByKey(x => x)
+      val initDf = df_joined.as[UnionInitialStateRow].groupByKey(x => x.groupingKey)
+      (kvDataSet.transformWithState(
+        new InitialStatefulProcessorWithStateDataSource(),
+        TimeMode.None(), OutputMode.Append(), initDf).toDF())
+    } else {
+      // when we read the state rows with flattened option set to false, values of a composite
+      // state variable will be composed into a single row of list/map type
+      val valueDf = valDf.selectExpr("key.value AS groupingKey", "value.value AS value")
+      val unflattenListDf = listDf
+        .selectExpr("key.value AS groupingKey",
+          "list_value.value as listValue")
+      val unflattenMapDf = mapDf
+        .selectExpr(
+          "key.value AS groupingKey",
+          "map_from_entries(transform(map_entries(map_value), x -> " +
+            "struct(x.key.value, x.value.value))) as mapValue")
+      val df_joined =
+        valueDf.unionByName(unflattenListDf, true)
+          .unionByName(unflattenMapDf, true)
+      val kvDataSet = inputData.toDS().groupByKey(x => x)
+      val initDf = df_joined.as[UnionUnflattenInitialStateRow].groupByKey(x => x.groupingKey)
+      kvDataSet.transformWithState(
+        new InitialStatefulProcessorWithUnflattenStateDataSource(),
+        TimeMode.None(), OutputMode.Append(), initDf).toDF()
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
index 2e65748cb4673..91a47645f4179 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateSuite.scala
@@ -18,14 +18,20 @@
 package org.apache.spark.sql.streaming
 
 import java.io.File
+import java.time.Duration
 import java.util.UUID
 
-import org.apache.hadoop.fs.Path
+import org.apache.hadoop.fs.{FileStatus, Path}
+import org.scalatest.matchers.must.Matchers.be
+import org.scalatest.matchers.should.Matchers.convertToAnyShouldWrapper
+import org.scalatest.time.{Seconds, Span}
 
-import org.apache.spark.SparkRuntimeException
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkUnsupportedOperationException}
 import org.apache.spark.internal.Logging
-import org.apache.spark.sql.{Dataset, Encoders}
+import org.apache.spark.sql.{Dataset, Encoders, Row}
 import org.apache.spark.sql.catalyst.util.stringToFile
+import org.apache.spark.sql.execution.datasources.v2.state.StateSourceOptions
+import org.apache.spark.sql.execution.exchange.ShuffleExchangeExec
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.functions.timestamp_seconds
@@ -44,14 +50,14 @@ class RunningCountStatefulProcessor extends StatefulProcessor[String, String, (S
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong)
+    _countState = getHandle.getValueState[Long]("countState",
+      Encoders.scalaLong, TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[String],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String)] = {
     val count = _countState.getOption().getOrElse(0L) + 1
     if (count == 3) {
       _countState.clear()
@@ -63,40 +69,111 @@ class RunningCountStatefulProcessor extends StatefulProcessor[String, String, (S
   }
 }
 
-// Class to verify stateful processor usage with adding processing time timers
-class RunningCountStatefulProcessorWithProcTimeTimer extends RunningCountStatefulProcessor {
-  private def handleProcessingTimeBasedTimers(
+class RunningCountStatefulProcessorWithTTL
+  extends StatefulProcessor[String, String, (String, String)]
+  with Logging {
+  @transient protected var _countState: ValueState[Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+      TTLConfig(Duration.ofMillis(1000)))
+  }
+
+  override def handleInputRows(
       key: String,
-      expiryTimestampMs: Long): Iterator[(String, String)] = {
-    _countState.clear()
-    Iterator((key, "-1"))
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val count = _countState.getOption().getOrElse(0L) + 1
+    if (count == 3) {
+      _countState.clear()
+      Iterator.empty
+    } else {
+      _countState.update(count)
+      Iterator((key, count.toString))
+    }
+  }
+}
+
+// Class to test that changing between Value and List State fails
+// between query runs
+class RunningCountListStatefulProcessor
+  extends StatefulProcessor[String, String, (String, String)]
+    with Logging {
+  @transient protected var _countState: ListState[Long] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _countState = getHandle.getListState[Long](
+      "countState", Encoders.scalaLong, TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[String],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    Iterator.empty
+  }
+}
+
+class RunningCountStatefulProcessorInt
+  extends StatefulProcessor[String, String, (String, String)] {
+  @transient protected var _countState: ValueState[Int] = _
+
+  override def init(
+      outputMode: OutputMode,
+      timeMode: TimeMode): Unit = {
+    _countState = getHandle.getValueState[Int]("countState", Encoders.scalaInt,
+      TTLConfig.NONE)
+  }
 
-    if (expiredTimerInfo.isValid()) {
-      handleProcessingTimeBasedTimers(key, expiredTimerInfo.getExpiryTimeInMs())
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val count = _countState.getOption().getOrElse(0) + 1
+    if (count == 3) {
+      _countState.clear()
+      Iterator.empty
     } else {
-      val currCount = _countState.getOption().getOrElse(0L)
-      if (currCount == 0 && (key == "a" || key == "c")) {
-        getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs()
-          + 5000)
-      }
+      _countState.update(count)
+      Iterator((key, count.toString))
+    }
+  }
+}
 
-      val count = currCount + 1
-      if (count == 3) {
-        _countState.clear()
-        Iterator.empty
-      } else {
-        _countState.update(count)
-        Iterator((key, count.toString))
-      }
+// Class to verify stateful processor usage with adding processing time timers
+class RunningCountStatefulProcessorWithProcTimeTimer extends RunningCountStatefulProcessor {
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val currCount = _countState.getOption().getOrElse(0L)
+    if (currCount == 0 && (key == "a" || key == "c")) {
+      getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs()
+        + 5000)
+    }
+
+    val count = currCount + 1
+    if (count == 3) {
+      _countState.clear()
+      Iterator.empty
+    } else {
+      _countState.update(count)
+      Iterator((key, count.toString))
     }
   }
+
+  override def handleExpiredTimer(
+      key: String,
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+    _countState.clear()
+    Iterator((key, "-1"))
+  }
 }
 
 // Class to verify stateful processor usage with updating processing time timers
@@ -108,14 +185,8 @@ class RunningCountStatefulProcessorWithProcTimeTimerUpdates
       outputMode: OutputMode,
       timeMode: TimeMode) : Unit = {
     super.init(outputMode, timeMode)
-    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong)
-  }
-
-  private def handleProcessingTimeBasedTimers(
-      key: String,
-      expiryTimestampMs: Long): Iterator[(String, String)] = {
-    _timerState.clear()
-    Iterator((key, "-1"))
+    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong,
+      TTLConfig.NONE)
   }
 
   protected def processUnexpiredRows(
@@ -142,50 +213,50 @@ class RunningCountStatefulProcessorWithProcTimeTimerUpdates
   override def handleInputRows(
       key: String,
       inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
+    val currCount = _countState.getOption().getOrElse(0L)
+    val count = currCount + inputRows.size
+    processUnexpiredRows(key, currCount, count, timerValues)
+    Iterator((key, count.toString))
+  }
+
+  override def handleExpiredTimer(
+      key: String,
       timerValues: TimerValues,
       expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
-    if (expiredTimerInfo.isValid()) {
-      handleProcessingTimeBasedTimers(key, expiredTimerInfo.getExpiryTimeInMs())
-    } else {
-      val currCount = _countState.getOption().getOrElse(0L)
-      val count = currCount + inputRows.size
-      processUnexpiredRows(key, currCount, count, timerValues)
-      Iterator((key, count.toString))
-    }
+    _timerState.clear()
+    Iterator((key, "-1"))
   }
 }
 
 class RunningCountStatefulProcessorWithMultipleTimers
   extends RunningCountStatefulProcessor {
-  private def handleProcessingTimeBasedTimers(
+
+  override def handleInputRows(
       key: String,
-      expiryTimestampMs: Long): Iterator[(String, String)] = {
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[(String, String)] = {
     val currCount = _countState.getOption().getOrElse(0L)
-    if (getHandle.listTimers().size == 1) {
-      _countState.clear()
+    val count = currCount + inputRows.size
+    _countState.update(count)
+    if (getHandle.listTimers().isEmpty) {
+      getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 5000)
+      getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 10000)
+      getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 15000)
+      assert(getHandle.listTimers().size == 3)
     }
-    Iterator((key, currCount.toString))
+    Iterator.empty
   }
 
-  override def handleInputRows(
+  override def handleExpiredTimer(
       key: String,
-      inputRows: Iterator[String],
       timerValues: TimerValues,
       expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
-    if (expiredTimerInfo.isValid()) {
-      handleProcessingTimeBasedTimers(key, expiredTimerInfo.getExpiryTimeInMs())
-    } else {
-      val currCount = _countState.getOption().getOrElse(0L)
-      val count = currCount + inputRows.size
-      _countState.update(count)
-      if (getHandle.listTimers().isEmpty) {
-        getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 5000)
-        getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 10000)
-        getHandle.registerTimer(timerValues.getCurrentProcessingTimeInMs() + 15000)
-        assert(getHandle.listTimers().size == 3)
-      }
-      Iterator.empty
+    val currCount = _countState.getOption().getOrElse(0L)
+    if (getHandle.listTimers().size == 1) {
+      _countState.clear()
     }
+    Iterator((key, currCount.toString))
   }
 }
 
@@ -199,8 +270,9 @@ class MaxEventTimeStatefulProcessor
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
     _maxEventTimeState = getHandle.getValueState[Long]("maxEventTimeState",
-      Encoders.scalaLong)
-    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong)
+      Encoders.scalaLong, TTLConfig.NONE)
+    _timerState = getHandle.getValueState[Long]("timerState", Encoders.scalaLong,
+      TTLConfig.NONE)
   }
 
   protected def processUnexpiredRows(maxEventTimeSec: Long): Unit = {
@@ -219,18 +291,20 @@ class MaxEventTimeStatefulProcessor
   override def handleInputRows(
       key: String,
       inputRows: Iterator[(String, Long)],
+      timerValues: TimerValues): Iterator[(String, Int)] = {
+    val valuesSeq = inputRows.toSeq
+    val maxEventTimeSec = math.max(valuesSeq.map(_._2).max,
+      _maxEventTimeState.getOption().getOrElse(0L))
+    processUnexpiredRows(maxEventTimeSec)
+    Iterator((key, maxEventTimeSec.toInt))
+  }
+
+  override def handleExpiredTimer(
+      key: String,
       timerValues: TimerValues,
       expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, Int)] = {
-    if (expiredTimerInfo.isValid()) {
-      _maxEventTimeState.clear()
-      Iterator((key, -1))
-    } else {
-      val valuesSeq = inputRows.toSeq
-      val maxEventTimeSec = math.max(valuesSeq.map(_._2).max,
-        _maxEventTimeState.getOption().getOrElse(0L))
-      processUnexpiredRows(maxEventTimeSec)
-      Iterator((key, maxEventTimeSec.toInt))
-    }
+    _maxEventTimeState.clear()
+    Iterator((key, -1))
   }
 }
 
@@ -243,15 +317,16 @@ class RunningCountMostRecentStatefulProcessor
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong)
-    _mostRecent = getHandle.getValueState[String]("mostRecent", Encoders.STRING)
+    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+      TTLConfig.NONE)
+    _mostRecent = getHandle.getValueState[String]("mostRecent", Encoders.STRING,
+      TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[(String, String)],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String, String)] = {
     val count = _countState.getOption().getOrElse(0L) + 1
     val mostRecent = _mostRecent.getOption().getOrElse("")
 
@@ -274,14 +349,14 @@ class MostRecentStatefulProcessorWithDeletion
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
     getHandle.deleteIfExists("countState")
-    _mostRecent = getHandle.getValueState[String]("mostRecent", Encoders.STRING)
+    _mostRecent = getHandle.getValueState[String]("mostRecent", Encoders.STRING,
+      TTLConfig.NONE)
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[(String, String)],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String)] = {
     val mostRecent = _mostRecent.getOption().getOrElse("")
 
     var output = List[(String, String)]()
@@ -300,10 +375,10 @@ class RunningCountStatefulProcessorWithError extends RunningCountStatefulProcess
   override def handleInputRows(
       key: String,
       inputRows: Iterator[String],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[(String, String)] = {
+      timerValues: TimerValues): Iterator[(String, String)] = {
     // Trying to create value state here should fail
-    _tempState = getHandle.getValueState[Long]("tempState", Encoders.scalaLong)
+    _tempState = getHandle.getValueState[Long]("tempState", Encoders.scalaLong,
+      TTLConfig.NONE)
     Iterator.empty
   }
 }
@@ -316,11 +391,37 @@ class StatefulProcessorWithCompositeTypes extends RunningCountStatefulProcessor
   override def init(
       outputMode: OutputMode,
       timeMode: TimeMode): Unit = {
-    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong)
+    _countState = getHandle.getValueState[Long]("countState", Encoders.scalaLong,
+      TTLConfig.NONE)
     _listState = getHandle.getListState[TestClass](
-      "listState", Encoders.product[TestClass])
+      "listState", Encoders.product[TestClass], TTLConfig.NONE)
     _mapState = getHandle.getMapState[POJOTestClass, String](
-      "mapState", Encoders.bean(classOf[POJOTestClass]), Encoders.STRING)
+      "mapState", Encoders.bean(classOf[POJOTestClass]), Encoders.STRING,
+      TTLConfig.NONE)
+  }
+}
+
+// For each record, creates a timer to fire in 10 seconds that sleeps for 1 second.
+class SleepingTimerProcessor extends StatefulProcessor[String, String, String] {
+  override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {}
+
+  override def handleInputRows(
+      key: String,
+      inputRows: Iterator[String],
+      timerValues: TimerValues): Iterator[String] = {
+    inputRows.flatMap { _ =>
+      val currentTime = timerValues.getCurrentProcessingTimeInMs()
+      getHandle.registerTimer(currentTime + 10000)
+      None
+    }
+  }
+
+  override def handleExpiredTimer(
+      key: String,
+      timerValues: TimerValues,
+      expiredTimerInfo: ExpiredTimerInfo): Iterator[String] = {
+    Thread.sleep(1000)
+    Iterator.single(key)
   }
 }
 
@@ -328,11 +429,12 @@ class StatefulProcessorWithCompositeTypes extends RunningCountStatefulProcessor
  * Class that adds tests for transformWithState stateful streaming operator
  */
 class TransformWithStateSuite extends StateStoreMetricsTest
-  with AlsoTestWithChangelogCheckpointingEnabled {
+  with AlsoTestWithChangelogCheckpointingEnabled with AlsoTestWithEncodingTypes {
 
   import testImplicits._
 
-  test("transformWithState - streaming with rocksdb and invalid processor should fail") {
+  test("transformWithState - streaming with rocksdb and" +
+    " invalid processor should fail") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
@@ -353,6 +455,84 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
+  test("transformWithState - lazy iterators can properly get/set keyed state") {
+    val spark = this.spark
+    import spark.implicits._
+
+    class ProcessorWithLazyIterators
+      extends StatefulProcessor[Long, Long, Long] {
+      @transient protected var _myValueState: ValueState[Long] = _
+      var hasSetTimer = false
+
+      override def init(outputMode: OutputMode, timeMode: TimeMode): Unit = {
+        _myValueState = getHandle.getValueState[Long](
+          "myValueState",
+          TTLConfig.NONE
+        )
+      }
+
+      override def handleInputRows(
+          key: Long,
+          inputRows: Iterator[Long],
+          timerValues: TimerValues): Iterator[Long] = {
+        // Eagerly get/set a state variable
+        _myValueState.get()
+        _myValueState.update(1)
+
+        // Create a timer (but only once) so that we can test timers have their implicit key set
+        if (!hasSetTimer) {
+          getHandle.registerTimer(0)
+          hasSetTimer = true
+        }
+
+        // In both of these cases, we return a lazy iterator that gets/sets state variables.
+        // This is to test that the stateful processor can handle lazy iterators.
+        inputRows.map { r =>
+          _myValueState.get()
+          _myValueState.update(r)
+          r
+        }
+      }
+
+      override def handleExpiredTimer(
+          key: Long,
+          timerValues: TimerValues,
+          expiredTimerInfo: ExpiredTimerInfo): Iterator[Long] = {
+        // The timer uses a Seq(42L) since when the timer fires, inputRows is empty.
+        Seq(42L).iterator.map { r =>
+          _myValueState.get()
+          _myValueState.update(r)
+          r
+        }
+      }
+    }
+
+    withSQLConf(
+      SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+        classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString
+    ) {
+      val inputData = MemoryStream[Int]
+      val result = inputData
+        .toDS()
+        .select(timestamp_seconds($"value").as("timestamp"))
+        .withWatermark("timestamp", "10 seconds")
+        .as[Long]
+        .groupByKey(x => x)
+        .transformWithState(
+          new ProcessorWithLazyIterators(), TimeMode.EventTime(), OutputMode.Update())
+
+      testStream(result, OutputMode.Update())(
+        StartStream(),
+        // Use 12 so that the watermark advances to 2 seconds and causes the timer to fire
+        AddData(inputData, 12),
+        // The 12 is from the input data; the 42 is from the timer
+        CheckAnswer(12, 42)
+      )
+    }
+  }
+
   test("transformWithState - streaming with rocksdb should succeed") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
@@ -371,6 +551,7 @@ class TransformWithStateSuite extends StateStoreMetricsTest
         Execute { q =>
           assert(q.lastProgress.stateOperators(0).customMetrics.get("numValueStateVars") > 0)
           assert(q.lastProgress.stateOperators(0).customMetrics.get("numRegisteredTimers") == 0)
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 1)
         },
         AddData(inputData, "a", "b"),
         CheckNewAnswer(("a", "2"), ("b", "1")),
@@ -379,6 +560,10 @@ class TransformWithStateSuite extends StateStoreMetricsTest
         AddData(inputData, "a", "b"), // should remove state for "a" and not return anything for a
         CheckNewAnswer(("b", "2")),
         StopStream,
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 1)
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 1)
+        },
         StartStream(),
         AddData(inputData, "a", "c"), // should recreate state for "a" and return count as 1 and
         CheckNewAnswer(("a", "1"), ("c", "1"))
@@ -504,7 +689,8 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  test("transformWithState - streaming with rocksdb and event time based timer") {
+  test("transformWithState - streaming with rocksdb and event " +
+  "time based timer") {
     val inputData = MemoryStream[(String, Int)]
     val result =
       inputData.toDS()
@@ -548,14 +734,61 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     )
   }
 
-  test("Use statefulProcessor without transformWithState - handle should be absent") {
+  test("transformWithState - timer duration should be reflected in metrics") {
+    val clock = new StreamManualClock
+    val inputData = MemoryStream[String]
+    val result = inputData.toDS()
+      .groupByKey(x => x)
+      .transformWithState(
+        new SleepingTimerProcessor, TimeMode.ProcessingTime(), OutputMode.Update())
+
+    testStream(result, OutputMode.Update())(
+      StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+      AddData(inputData, "a"),
+      AdvanceManualClock(1 * 1000),
+      // Side effect: timer scheduled for t = 1 + 10 = 11.
+      CheckNewAnswer(),
+      Execute { q =>
+        val metrics = q.lastProgress.stateOperators(0).customMetrics
+        assert(metrics.get("numRegisteredTimers") === 1)
+        assert(metrics.get("timerProcessingTimeMs") < 2000)
+      },
+
+      AddData(inputData, "b"),
+      AdvanceManualClock(1 * 1000),
+      // Side effect: timer scheduled for t = 2 + 10 = 12.
+      CheckNewAnswer(),
+      Execute { q =>
+        val metrics = q.lastProgress.stateOperators(0).customMetrics
+        assert(metrics.get("numRegisteredTimers") === 1)
+        assert(metrics.get("timerProcessingTimeMs") < 2000)
+      },
+
+      AddData(inputData, "c"),
+      // Time is currently 2 and we need to advance past 12. So, advance by 11 seconds.
+      AdvanceManualClock(11 * 1000),
+      CheckNewAnswer("a", "b"),
+      Execute { q =>
+        val metrics = q.lastProgress.stateOperators(0).customMetrics
+        assert(metrics.get("numRegisteredTimers") === 1)
+
+        // Both timers should have fired and taken 1 second each to process.
+        assert(metrics.get("timerProcessingTimeMs") >= 2000)
+      },
+
+      StopStream
+    )
+  }
+
+  test("Use statefulProcessor without transformWithState -" +
+    " handle should be absent") {
     val processor = new RunningCountStatefulProcessor()
     val ex = intercept[Exception] {
       processor.getHandle
     }
     checkError(
       ex.asInstanceOf[SparkRuntimeException],
-      errorClass = "STATE_STORE_HANDLE_NOT_INITIALIZED",
+      condition = "STATE_STORE_HANDLE_NOT_INITIALIZED",
       parameters = Map.empty
     )
   }
@@ -572,8 +805,7 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     checkAnswer(df, Seq(("a", "1"), ("b", "1")).toDF())
   }
 
-  // TODO SPARK-48796 after restart state id will not be the same
-  ignore("transformWithState - test deleteIfExists operator") {
+  test("transformWithState - test deleteIfExists operator") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
@@ -805,15 +1037,16 @@ class TransformWithStateSuite extends StateStoreMetricsTest
     }
   }
 
-  test("transformWithState - verify StateSchemaV3 writes correct SQL schema of key/value") {
+  test("transformWithState - verify StateSchemaV3 writes " +
+    "correct SQL schema of key/value") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
         TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
       withTempDir { checkpointDir =>
-        val metadataPathPostfix = "state/0/default/_metadata"
+        val metadataPathPostfix = "state/0/_stateSchema/default"
         val stateSchemaPath = new Path(checkpointDir.toString,
-          s"$metadataPathPostfix/schema")
+          s"$metadataPathPostfix")
         val hadoopConf = spark.sessionState.newHadoopConf()
         val fm = CheckpointFileManager.create(stateSchemaPath, hadoopConf)
 
@@ -886,44 +1119,942 @@ class TransformWithStateSuite extends StateStoreMetricsTest
       }
     }
   }
-}
 
-class TransformWithStateValidationSuite extends StateStoreMetricsTest {
-  import testImplicits._
+  test("transformWithState - verify that OperatorStateMetadataV2" +
+    " file is being written correctly") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
 
-  test("transformWithState - streaming with hdfsStateStoreProvider should fail") {
-    val inputData = MemoryStream[String]
-    val result = inputData.toDS()
-      .groupByKey(x => x)
-      .transformWithState(new RunningCountStatefulProcessor(),
-        TimeMode.None(),
-        OutputMode.Update())
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream,
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "2")),
+          StopStream
+        )
 
-    testStream(result, OutputMode.Update())(
-      AddData(inputData, "a"),
-      ExpectFailure[StateStoreMultipleColumnFamiliesNotSupportedException] { t =>
-        assert(t.getMessage.contains("not supported"))
+        val df = spark.read.format("state-metadata").load(checkpointDir.toString)
+
+        // check first 6 columns of the row, and then read the last column of the row separately
+        checkAnswer(
+            df.select(
+              "operatorId", "operatorName", "stateStoreName", "numPartitions", "minBatchId",
+              "maxBatchId"),
+            Seq(Row(0, "transformWithStateExec", "default", 5, 0, 1))
+        )
+        val operatorPropsJson = df.select("operatorProperties").collect().head.getString(0)
+        val operatorProperties = TransformWithStateOperatorProperties.fromJson(operatorPropsJson)
+        assert(operatorProperties.timeMode == "NoTime")
+        assert(operatorProperties.outputMode == "Update")
+        assert(operatorProperties.stateVariables.length == 1)
+        assert(operatorProperties.stateVariables.head.stateName == "countState")
       }
-    )
+    }
   }
 
-  test("transformWithStateWithInitialState - streaming with hdfsStateStoreProvider should fail") {
-    val inputData = MemoryStream[InitInputRow]
-    val initDf = Seq(("init_1", 40.0), ("init_2", 100.0)).toDS()
-      .groupByKey(x => x._1)
-      .mapValues(x => x)
-    val result = inputData.toDS()
-      .groupByKey(x => x.key)
-      .transformWithState(new AccumulateStatefulProcessorWithInitState(),
-        TimeMode.None(), OutputMode.Append(), initDf
-      )
-    testStream(result, OutputMode.Update())(
-      AddData(inputData, InitInputRow("a", "add", -1.0)),
-      ExpectFailure[StateStoreMultipleColumnFamiliesNotSupportedException] {
-        (t: Throwable) => {
-          assert(t.getMessage.contains("not supported"))
-        }
+  test("test that invalid schema evolution fails query for column family") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessorInt(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          ExpectFailure[StateStoreValueSchemaNotCompatible] {
+            (t: Throwable) => {
+              assert(t.getMessage.contains("Please check number and type of fields."))
+            }
+          }
+        )
       }
-    )
+    }
+  }
+
+  test("test that different outputMode after query restart fails") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Append())
+
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          ExpectFailure[StateStoreInvalidConfigAfterRestart] { e =>
+            checkError(
+              e.asInstanceOf[SparkUnsupportedOperationException],
+              condition = "STATE_STORE_INVALID_CONFIG_AFTER_RESTART",
+              parameters = Map(
+                "configName" -> "outputMode",
+                "oldConfig" -> "Update",
+                "newConfig" -> "Append")
+            )
+          }
+        )
+      }
+    }
+  }
+
+  test("test that changing between different state variable types fails") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountListStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          ExpectFailure[StateStoreInvalidVariableTypeChange] { t =>
+            checkError(
+              t.asInstanceOf[SparkUnsupportedOperationException],
+              condition = "STATE_STORE_INVALID_VARIABLE_TYPE_CHANGE",
+              parameters = Map(
+                "stateVarName" -> "countState",
+                "newType" -> "ListState",
+                "oldType" -> "ValueState")
+            )
+          }
+        )
+      }
+    }
+  }
+
+  test("test that different timeMode after query restart fails") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val clock = new StreamManualClock
+        val inputData = MemoryStream[String]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(
+            checkpointLocation = checkpointDir.getCanonicalPath,
+            trigger = Trigger.ProcessingTime("1 second"),
+            triggerClock = clock),
+          AddData(inputData, "a"),
+          AdvanceManualClock(1 * 1000),
+          ExpectFailure[StateStoreInvalidConfigAfterRestart] { e =>
+            checkError(
+              e.asInstanceOf[SparkUnsupportedOperationException],
+              condition = "STATE_STORE_INVALID_CONFIG_AFTER_RESTART",
+              parameters = Map(
+                "configName" -> "timeMode",
+                "oldConfig" -> "NoTime",
+                "newConfig" -> "ProcessingTime")
+            )
+          }
+        )
+      }
+    }
+  }
+
+  test("test that introducing TTL after restart fails query") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val clock = new StreamManualClock
+        val result = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result, OutputMode.Update())(
+          StartStream(
+            trigger = Trigger.ProcessingTime("1 second"),
+            checkpointLocation = checkpointDir.getCanonicalPath,
+            triggerClock = clock),
+          AddData(inputData, "a"),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(("a", "1")),
+          AdvanceManualClock(1 * 1000),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessorWithTTL(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(
+            trigger = Trigger.ProcessingTime("1 second"),
+            checkpointLocation = checkpointDir.getCanonicalPath,
+            triggerClock = clock),
+          AddData(inputData, "a"),
+          AdvanceManualClock(1 * 1000),
+          ExpectFailure[StateStoreValueSchemaNotCompatible] { t =>
+            checkError(
+              t.asInstanceOf[SparkUnsupportedOperationException],
+              condition = "STATE_STORE_VALUE_SCHEMA_NOT_COMPATIBLE",
+              parameters = Map(
+                "storedValueSchema" -> "StructType(StructField(value,LongType,false))",
+                "newValueSchema" ->
+                  ("StructType(StructField(value,StructType(StructField(value,LongType,false))," +
+                    "true),StructField(ttlExpirationMs,LongType,true))")
+              )
+            )
+          }
+        )
+      }
+    }
+  }
+
+  test("test query restart with new state variable succeeds") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val clock = new StreamManualClock
+
+        val inputData1 = MemoryStream[String]
+        val result1 = inputData1.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result1, OutputMode.Update())(
+          StartStream(
+            checkpointLocation = checkpointDir.getCanonicalPath,
+            trigger = Trigger.ProcessingTime("1 second"),
+            triggerClock = clock),
+          AddData(inputData1, "a"),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+
+        val result2 = inputData1.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessorWithProcTimeTimer(),
+            TimeMode.ProcessingTime(),
+            OutputMode.Update())
+
+        testStream(result2, OutputMode.Update())(
+          StartStream(
+            checkpointLocation = checkpointDir.getCanonicalPath,
+            trigger = Trigger.ProcessingTime("1 second"),
+            triggerClock = clock),
+          AddData(inputData1, "a"),
+          AdvanceManualClock(1 * 1000),
+          CheckNewAnswer(("a", "2")),
+          StopStream
+        )
+      }
+    }
+  }
+
+  test("test query restart succeeds") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
+      withTempDir { checkpointDir =>
+        val inputData = MemoryStream[String]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "1")),
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x)
+          .transformWithState(new RunningCountStatefulProcessorWithProcTimeTimer(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = checkpointDir.getCanonicalPath),
+          AddData(inputData, "a"),
+          CheckNewAnswer(("a", "2")),
+          StopStream
+        )
+      }
+    }
+  }
+
+  test("SPARK-49070: transformWithState - valid initial state plan") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName) {
+      withTempDir { srcDir =>
+        Seq("a", "b", "c").foreach(createFile(_, srcDir))
+        val df = createFileStream(srcDir)
+
+        var index = 0
+
+        val q = df.writeStream
+          .foreachBatch((_: Dataset[(String, String)], _: Long) => {
+            index += 1
+          })
+          .trigger(Trigger.AvailableNow)
+          .start()
+
+        try {
+          assert(q.awaitTermination(streamingTimeout.toMillis))
+
+          val sparkPlan =
+            q.asInstanceOf[StreamingQueryWrapper].streamingQuery.lastExecution.executedPlan
+          val transformWithStateExec = sparkPlan.collect {
+            case p: TransformWithStateExec => p
+          }.head
+
+          assert(!transformWithStateExec.hasInitialState)
+
+          // EnsureRequirements should not apply on the initial state plan
+          val exchange = transformWithStateExec.initialState.collect {
+            case s: ShuffleExchangeExec => s
+          }
+
+          assert(exchange.isEmpty)
+        } finally {
+          q.stop()
+        }
+      }
+    }
+  }
+
+  private def getFiles(path: Path): Array[FileStatus] = {
+    val hadoopConf = spark.sessionState.newHadoopConf()
+    val fileManager = CheckpointFileManager.create(path, hadoopConf)
+    fileManager.list(path)
+  }
+
+  private def getStateSchemaPath(stateCheckpointPath: Path): Path = {
+    new Path(stateCheckpointPath, "_stateSchema/default/")
+  }
+
+  test("transformWithState - verify that metadata and schema logs are purged") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "1") {
+      withTempDir { chkptDir =>
+        val stateOpIdPath = new Path(new Path(chkptDir.getCanonicalPath, "state"), "0")
+        val stateSchemaPath = getStateSchemaPath(stateOpIdPath)
+
+        val metadataPath = OperatorStateMetadataV2.metadataDirPath(stateOpIdPath)
+        // in this test case, we are changing the state spec back and forth
+        // to trigger the writing of the schema and metadata files
+        val inputData = MemoryStream[(String, String)]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "1", "")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new MostRecentStatefulProcessorWithDeletion(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str2")),
+          CheckNewAnswer(("a", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        // assert that a metadata and schema file has been written for each run
+        // as state variables have been deleted
+        assert(getFiles(metadataPath).length == 2)
+        assert(getFiles(stateSchemaPath).length == 2)
+
+        val result3 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result3, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str3")),
+          CheckNewAnswer(("a", "1", "str2")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        // because we don't change the schema for this run, there won't
+        // be a new schema file written.
+        testStream(result3, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str4")),
+          CheckNewAnswer(("a", "2", "str3")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        // by the end of the test, there have been 4 batches,
+        // so the metadata and schema logs, and commitLog has been purged
+        // for batches 0 and 1 so metadata and schema files exist for batches 0, 1, 2, 3
+        // and we only need to keep metadata files for batches 2, 3, and the since schema
+        // hasn't changed between batches 2, 3, we only keep the schema file for batch 2
+        assert(getFiles(metadataPath).length == 2)
+        assert(getFiles(stateSchemaPath).length == 1)
+      }
+    }
+  }
+
+  test("transformWithState - verify that schema file " +
+    "is kept after metadata is purged") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "2") {
+      withTempDir { chkptDir =>
+        val stateOpIdPath = new Path(new Path(chkptDir.getCanonicalPath, "state"), "0")
+        val stateSchemaPath = getStateSchemaPath(stateOpIdPath)
+
+        val metadataPath = OperatorStateMetadataV2.metadataDirPath(stateOpIdPath)
+        // in this test case, we are changing the state spec back and forth
+        // to trigger the writing of the schema and metadata files
+        val inputData = MemoryStream[(String, String)]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "1", "")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "2", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new MostRecentStatefulProcessorWithDeletion(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str2")),
+          CheckNewAnswer(("a", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        assert(getFiles(metadataPath).length == 3)
+        assert(getFiles(stateSchemaPath).length == 2)
+
+        val result3 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result3, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str3")),
+          CheckNewAnswer(("a", "1", "str2")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        // metadata files should be kept for batches 1, 2, 3
+        // schema files should be kept for batches 0, 2, 3
+        assert(getFiles(metadataPath).length == 3)
+        assert(getFiles(stateSchemaPath).length == 3)
+        // we want to ensure that we can read batch 1 even though the
+        // metadata file for batch 0 was removed
+        val batch1Df = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, chkptDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "countState")
+          .option(StateSourceOptions.BATCH_ID, 1)
+          .load()
+
+        val batch1AnsDf = batch1Df.selectExpr(
+          "key.value AS groupingKey",
+          "value.value AS valueId")
+
+        checkAnswer(batch1AnsDf, Seq(Row("a", 2L)))
+
+        val batch3Df = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, chkptDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "countState")
+          .option(StateSourceOptions.BATCH_ID, 3)
+          .load()
+
+        val batch3AnsDf = batch3Df.selectExpr(
+          "key.value AS groupingKey",
+          "value.value AS valueId")
+        checkAnswer(batch3AnsDf, Seq(Row("a", 1L)))
+      }
+    }
+  }
+
+  test("state data source integration - value state supports time travel") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "5") {
+      withTempDir { chkptDir =>
+        // in this test case, we are changing the state spec back and forth
+        // to trigger the writing of the schema and metadata files
+        val inputData = MemoryStream[(String, String)]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "1", "")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "2", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "3", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "4", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "5", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "6", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "7", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new MostRecentStatefulProcessorWithDeletion(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str2")),
+          CheckNewAnswer(("a", "str1")),
+          AddData(inputData, ("a", "str3")),
+          CheckNewAnswer(("a", "str2")),
+          AddData(inputData, ("a", "str4")),
+          CheckNewAnswer(("a", "str3")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+
+        // Batches 0-7: countState, mostRecent
+        // Batches 8-9: countState
+
+        // By this time, offset and commit logs for batches 0-3 have been purged.
+        // However, if we want to read the data for batch 4, we need to read the corresponding
+        // metadata and schema file for batch 4, or the latest files that correspond to
+        // batch 4 (in this case, the files were written for batch 0).
+        // We want to test the behavior where the metadata files are preserved so that we can
+        // read from the state data source, even if the commit and offset logs are purged for
+        // a particular batch
+
+        val df = spark.read.format("state-metadata").load(chkptDir.toString)
+
+        // check the min and max batch ids that we have data for
+        checkAnswer(
+          df.select(
+            "operatorId", "operatorName", "stateStoreName", "numPartitions", "minBatchId",
+            "maxBatchId"),
+          Seq(Row(0, "transformWithStateExec", "default", 5, 4, 9))
+        )
+
+        val countStateDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, chkptDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "countState")
+          .option(StateSourceOptions.BATCH_ID, 4)
+          .load()
+
+        val countStateAnsDf = countStateDf.selectExpr(
+          "key.value AS groupingKey",
+          "value.value AS valueId")
+        checkAnswer(countStateAnsDf, Seq(Row("a", 5L)))
+
+        val mostRecentDf = spark.read
+          .format("statestore")
+          .option(StateSourceOptions.PATH, chkptDir.getAbsolutePath)
+          .option(StateSourceOptions.STATE_VAR_NAME, "mostRecent")
+          .option(StateSourceOptions.BATCH_ID, 4)
+          .load()
+
+        val mostRecentAnsDf = mostRecentDf.selectExpr(
+          "key.value AS groupingKey",
+          "value.value")
+        checkAnswer(mostRecentAnsDf, Seq(Row("a", "str1")))
+      }
+    }
+  }
+
+  test("transformWithState - verify that all metadata and schema logs are not purged") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "3") {
+      withTempDir { chkptDir =>
+        val inputData = MemoryStream[(String, String)]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "1", "")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "2", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "3", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "4", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "5", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "6", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "7", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "8", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "9", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "10", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "11", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "12", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "13", "str1")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "14", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+
+        val stateOpIdPath = new Path(new Path(chkptDir.getCanonicalPath, "state"), "0")
+        val stateSchemaPath = getStateSchemaPath(stateOpIdPath)
+
+        val metadataPath = OperatorStateMetadataV2.metadataDirPath(stateOpIdPath)
+
+        // Metadata files exist for batches 0, 12, and the thresholdBatchId is 8
+        // as this is the earliest batchId for which the commit log is not present,
+        // so we need to keep metadata files for batch 0 so we can read the commit
+        // log correspondingly
+        assert(getFiles(metadataPath).length == 2)
+        assert(getFiles(stateSchemaPath).length == 1)
+      }
+    }
+  }
+
+  test("transformWithState - verify that no metadata and schema logs are purged after" +
+    " removing column family") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key ->
+        TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString,
+      SQLConf.MIN_BATCHES_TO_RETAIN.key -> "3") {
+      withTempDir { chkptDir =>
+        val inputData = MemoryStream[(String, String)]
+        val result1 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new RunningCountMostRecentStatefulProcessor(),
+            TimeMode.None(),
+            OutputMode.Update())
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "1", "")),
+          AddData(inputData, ("a", "str1")),
+          CheckNewAnswer(("a", "2", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        testStream(result1, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "1", "")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "2", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "3", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "4", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "5", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "6", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "7", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "8", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "9", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "10", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "11", "str1")),
+          AddData(inputData, ("b", "str1")),
+          CheckNewAnswer(("b", "12", "str1")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+        val result2 = inputData.toDS()
+          .groupByKey(x => x._1)
+          .transformWithState(new MostRecentStatefulProcessorWithDeletion(),
+            TimeMode.None(),
+            OutputMode.Update())
+
+        testStream(result2, OutputMode.Update())(
+          StartStream(checkpointLocation = chkptDir.getCanonicalPath),
+          AddData(inputData, ("b", "str2")),
+          CheckNewAnswer(("b", "str1")),
+          AddData(inputData, ("b", "str3")),
+          CheckNewAnswer(("b", "str2")),
+          Execute { q =>
+            eventually(timeout(Span(5, Seconds))) {
+              q.asInstanceOf[MicroBatchExecution].arePendingAsyncPurge should be(false)
+            }
+          },
+          StopStream
+        )
+
+        val stateOpIdPath = new Path(new Path(chkptDir.getCanonicalPath, "state"), "0")
+        val stateSchemaPath = getStateSchemaPath(stateOpIdPath)
+
+        val metadataPath = OperatorStateMetadataV2.metadataDirPath(stateOpIdPath)
+
+        // Metadata files are written for batches 0, 2, and 14.
+        // Schema files are written for 0, 14
+        // At the beginning of the last query run, the thresholdBatchId is 11.
+        // However, we would need both schema files to be preserved, if we want to
+        // be able to read from batch 11 onwards.
+        assert(getFiles(metadataPath).length == 2)
+        assert(getFiles(stateSchemaPath).length == 2)
+      }
+    }
+  }
+}
+
+class TransformWithStateValidationSuite extends StateStoreMetricsTest {
+  import testImplicits._
+
+  test("transformWithState - streaming with hdfsStateStoreProvider should fail") {
+    val inputData = MemoryStream[String]
+    val result = inputData.toDS()
+      .groupByKey(x => x)
+      .transformWithState(new RunningCountStatefulProcessor(),
+        TimeMode.None(),
+        OutputMode.Update())
+
+    testStream(result, OutputMode.Update())(
+      AddData(inputData, "a"),
+      ExpectFailure[StateStoreMultipleColumnFamiliesNotSupportedException] { t =>
+        assert(t.getMessage.contains("not supported"))
+      }
+    )
+  }
+
+  test("transformWithStateWithInitialState - streaming with hdfsStateStoreProvider should fail") {
+    val inputData = MemoryStream[InitInputRow]
+    val initDf = Seq(("init_1", 40.0), ("init_2", 100.0)).toDS()
+      .groupByKey(x => x._1)
+      .mapValues(x => x)
+    val result = inputData.toDS()
+      .groupByKey(x => x.key)
+      .transformWithState(new AccumulateStatefulProcessorWithInitState(),
+        TimeMode.None(), OutputMode.Append(), initDf
+      )
+    testStream(result, OutputMode.Update())(
+      AddData(inputData, InitInputRow("a", "add", -1.0)),
+      ExpectFailure[StateStoreMultipleColumnFamiliesNotSupportedException] {
+        (t: Throwable) => {
+          assert(t.getMessage.contains("not supported"))
+        }
+      }
+    )
+  }
+
+  test("transformWithState - validate timeModes") {
+    // validation tests should pass for TimeMode.None
+    TransformWithStateVariableUtils.validateTimeMode(TimeMode.None(), None)
+    TransformWithStateVariableUtils.validateTimeMode(TimeMode.None(), Some(10L))
+
+    // validation tests should fail for TimeMode.ProcessingTime and TimeMode.EventTime
+    // when time values are not provided
+    val ex = intercept[SparkException] {
+      TransformWithStateVariableUtils.validateTimeMode(TimeMode.ProcessingTime(), None)
+    }
+    assert(ex.getMessage.contains("Failed to find time values"))
+    TransformWithStateVariableUtils.validateTimeMode(TimeMode.ProcessingTime(), Some(10L))
+
+    val ex1 = intercept[SparkException] {
+      TransformWithStateVariableUtils.validateTimeMode(TimeMode.EventTime(), None)
+    }
+    assert(ex1.getMessage.contains("Failed to find time values"))
+    TransformWithStateVariableUtils.validateTimeMode(TimeMode.EventTime(), Some(10L))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
index 2ddf69aa49e04..55a46f51f9f6f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithStateTTLTest.scala
@@ -21,7 +21,7 @@ import java.sql.Timestamp
 import java.time.Duration
 
 import org.apache.spark.sql.execution.streaming.MemoryStream
-import org.apache.spark.sql.execution.streaming.state.RocksDBStateStoreProvider
+import org.apache.spark.sql.execution.streaming.state.{AlsoTestWithChangelogCheckpointingEnabled, AlsoTestWithEncodingTypes, RocksDBStateStoreProvider}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
 
@@ -41,7 +41,8 @@ case class OutputEvent(
  * Test suite base for TransformWithState with TTL support.
  */
 abstract class TransformWithStateTTLTest
-  extends StreamTest {
+  extends StreamTest with AlsoTestWithChangelogCheckpointingEnabled
+  with AlsoTestWithEncodingTypes {
   import testImplicits._
 
   def getProcessor(ttlConfig: TTLConfig): StatefulProcessor[String, InputEvent, OutputEvent]
@@ -143,18 +144,24 @@ abstract class TransformWithStateTTLTest
         AddData(inputStream, InputEvent("k1", "put", 1)),
         // advance clock to trigger processing
         AdvanceManualClock(1 * 1000),
+        // In the primary index, we should have that k1 -> [(1, 61000)].
+        // The TTL index has (61000, k1) -> empty. The min-expiry index has k1 -> 61000.
         CheckNewAnswer(),
+
         // get this state, and make sure we get unexpired value
         AddData(inputStream, InputEvent("k1", "get", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
+
         // ensure ttl values were added correctly
         AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = true, 61000)),
+
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 61000)),
+
         // advance clock and update expiration time
         AdvanceManualClock(30 * 1000),
         AddData(inputStream, InputEvent("k1", "put", 1)),
@@ -162,24 +169,30 @@ abstract class TransformWithStateTTLTest
         // advance clock to trigger processing
         AdvanceManualClock(1 * 1000),
         // validate value is not expired
+        //
+        // In the primary index, we still get that k1 -> [(1, 95000)].
+        // The TTL index should now have (95000, k1) -> empty, and the min-expiry index
+        // should have k1 -> 95000.
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
+
         // validate ttl value is updated in the state
         AddData(inputStream, InputEvent("k1", "get_ttl_value_from_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = true, 95000)),
-        // validate ttl state has both ttl values present
+
+        // validate ttl state has only the newer ttl value present
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
-        CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 61000),
-          OutputEvent("k1", -1, isTTLValue = true, 95000)
-        ),
-        // advance clock after older expiration value
+        CheckNewAnswer( OutputEvent("k1", -1, isTTLValue = true, 95000)),
+
+        // advance clock after original expiration value; this shouldn't do anything
         AdvanceManualClock(30 * 1000),
         // ensure unexpired value is still present in the state
         AddData(inputStream, InputEvent("k1", "get", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", 1, isTTLValue = false, -1)),
-        // validate that the older expiration value is removed from ttl state
+
+        // validate that the ttl index still has the newer value
         AddData(inputStream, InputEvent("k1", "get_values_in_ttl_state", -1)),
         AdvanceManualClock(1 * 1000),
         CheckNewAnswer(OutputEvent("k1", -1, isTTLValue = true, 95000))
@@ -285,4 +298,59 @@ abstract class TransformWithStateTTLTest
       )
     }
   }
+
+  test("validate that clear only clears the current grouping key") {
+    withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
+      classOf[RocksDBStateStoreProvider].getName,
+      SQLConf.SHUFFLE_PARTITIONS.key -> "1") {
+      val inputStream = MemoryStream[InputEvent]
+      val ttlConfig = TTLConfig(ttlDuration = Duration.ofMinutes(1))
+      val result = inputStream.toDS()
+        .groupByKey(x => x.key)
+        .transformWithState(
+          getProcessor(ttlConfig),
+          TimeMode.ProcessingTime(),
+          OutputMode.Append())
+
+      val clock = new StreamManualClock
+      testStream(result)(
+        StartStream(Trigger.ProcessingTime("1 second"), triggerClock = clock),
+        AddData(inputStream,
+          InputEvent("k1", "put", 1),
+          InputEvent("k2", "put", 2),
+          InputEvent("k3", "put", 3)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+
+        AddData(
+          inputStream,
+          InputEvent("k1", "clear", -1),
+          InputEvent("k1", "get_ttl_value_from_state", -1),
+          InputEvent("k1", "get_values_in_ttl_state", -1)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+
+        AddData(inputStream,
+          InputEvent("k2", "get_ttl_value_from_state", -1),
+          InputEvent("k2", "get_values_in_ttl_state", -1),
+
+          InputEvent("k3", "get_ttl_value_from_state", -1),
+          InputEvent("k3", "get_values_in_ttl_state", -1)
+        ),
+        // advance clock to trigger processing
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(
+          OutputEvent("k2", 2, isTTLValue = true, 61000),
+          OutputEvent("k2", -1, isTTLValue = true, 61000),
+
+          OutputEvent("k3", 3, isTTLValue = true, 61000),
+          OutputEvent("k3", -1, isTTLValue = true, 61000)
+        )
+      )
+    }
+  }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
index db5cb027d39d1..4c7f3a06ea7b9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TransformWithValueStateTTLSuite.scala
@@ -23,7 +23,7 @@ import org.apache.hadoop.fs.Path
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.Encoders
-import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, ListStateImplWithTTL, MapStateImplWithTTL, MemoryStream, ValueStateImpl, ValueStateImplWithTTL}
+import org.apache.spark.sql.execution.streaming.{CheckpointFileManager, ListStateImplWithTTL, MapStateImplWithTTL, MemoryStream, TimerStateUtils, ValueStateImpl, ValueStateImplWithTTL}
 import org.apache.spark.sql.execution.streaming.state._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.streaming.util.StreamManualClock
@@ -55,10 +55,12 @@ object TTLInputProcessFunction {
     } else if (row.action == "put") {
       valueState.update(row.value)
     } else if (row.action == "get_values_in_ttl_state") {
-      val ttlValues = valueState.getValuesInTTLState()
+      val ttlValues = valueState.getValueInTTLState()
       ttlValues.foreach { v =>
         results = OutputEvent(key, -1, isTTLValue = true, ttlValue = v) :: results
       }
+    } else if (row.action == "clear") {
+      valueState.clear()
     }
 
     results.iterator
@@ -76,6 +78,8 @@ object TTLInputProcessFunction {
       }
     } else if (row.action == "put") {
       valueState.update(row.value)
+    } else if (row.action == "clear") {
+      valueState.clear()
     }
 
     results.iterator
@@ -99,8 +103,7 @@ class ValueStateTTLProcessor(ttlConfig: TTLConfig)
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEvent],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputEvent] = {
+      timerValues: TimerValues): Iterator[OutputEvent] = {
     var results = List[OutputEvent]()
 
     inputRows.foreach { row =>
@@ -131,15 +134,14 @@ class MultipleValueStatesTTLProcessor(
       .getValueState("valueStateTTL", Encoders.scalaInt, ttlConfig)
       .asInstanceOf[ValueStateImplWithTTL[Int]]
     _valueStateWithoutTTL = getHandle
-      .getValueState("valueState", Encoders.scalaInt)
+      .getValueState[Int]("valueState", Encoders.scalaInt, TTLConfig.NONE)
       .asInstanceOf[ValueStateImpl[Int]]
   }
 
   override def handleInputRows(
       key: String,
       inputRows: Iterator[InputEvent],
-      timerValues: TimerValues,
-      expiredTimerInfo: ExpiredTimerInfo): Iterator[OutputEvent] = {
+      timerValues: TimerValues): Iterator[OutputEvent] = {
     var results = List[OutputEvent]()
 
     if (key == ttlKey) {
@@ -247,25 +249,47 @@ class TransformWithValueStateTTLSuite extends TransformWithStateTTLTest {
         // validate ttl value is removed in the value state column family
         AddData(inputStream, InputEvent(ttlKey, "get_ttl_value_from_state", -1)),
         AdvanceManualClock(1 * 1000),
-        CheckNewAnswer()
+        CheckNewAnswer(),
+        AddData(inputStream, InputEvent(ttlKey, "put", 3)),
+        AdvanceManualClock(1 * 1000),
+        CheckNewAnswer(),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsUpdated === 1)
+        },
+        AddData(inputStream, InputEvent(noTtlKey, "get", -1)),
+        AdvanceManualClock(60 * 1000),
+        CheckNewAnswer(OutputEvent(noTtlKey, 2, isTTLValue = false, -1)),
+        Execute { q =>
+          assert(q.lastProgress.stateOperators(0).numRowsRemoved === 1)
+        }
       )
     }
   }
 
-  test("verify StateSchemaV3 writes correct SQL schema of key/value and with TTL") {
+  test("verify StateSchemaV3 writes correct SQL " +
+    "schema of key/value and with TTL") {
     withSQLConf(SQLConf.STATE_STORE_PROVIDER_CLASS.key ->
       classOf[RocksDBStateStoreProvider].getName,
       SQLConf.SHUFFLE_PARTITIONS.key ->
         TransformWithStateSuiteUtils.NUM_SHUFFLE_PARTITIONS.toString) {
       withTempDir { checkpointDir =>
-        val metadataPathPostfix = "state/0/default/_metadata"
+        val metadataPathPostfix = "state/0/_stateSchema/default"
         val stateSchemaPath = new Path(checkpointDir.toString,
-          s"$metadataPathPostfix/schema")
+          s"$metadataPathPostfix")
         val hadoopConf = spark.sessionState.newHadoopConf()
         val fm = CheckpointFileManager.create(stateSchemaPath, hadoopConf)
 
         val keySchema = new StructType().add("value", StringType)
+        val schemaForKeyRow: StructType = new StructType()
+          .add("key", new StructType(keySchema.fields))
+          .add("expiryTimestampMs", LongType, nullable = false)
+        val schemaForValueRow: StructType = StructType(Array(StructField("__dummy__", NullType)))
         val schema0 = StateStoreColFamilySchema(
+          TimerStateUtils.getTimerStateVarName(TimeMode.ProcessingTime().toString),
+          schemaForKeyRow,
+          schemaForValueRow,
+          Some(PrefixKeyScanStateEncoderSpec(schemaForKeyRow, 1)))
+        val schema1 = StateStoreColFamilySchema(
           "valueStateTTL",
           keySchema,
           new StructType().add("value",
@@ -275,14 +299,14 @@ class TransformWithValueStateTTLSuite extends TransformWithStateTTLTest {
           Some(NoPrefixKeyStateEncoderSpec(keySchema)),
           None
         )
-        val schema1 = StateStoreColFamilySchema(
+        val schema2 = StateStoreColFamilySchema(
           "valueState",
           keySchema,
           new StructType().add("value", IntegerType, false),
           Some(NoPrefixKeyStateEncoderSpec(keySchema)),
           None
         )
-        val schema2 = StateStoreColFamilySchema(
+        val schema3 = StateStoreColFamilySchema(
           "listState",
           keySchema,
           new StructType().add("value",
@@ -300,7 +324,7 @@ class TransformWithValueStateTTLSuite extends TransformWithStateTTLTest {
         val compositeKeySchema = new StructType()
           .add("key", new StructType().add("value", StringType))
           .add("userKey", userKeySchema)
-        val schema3 = StateStoreColFamilySchema(
+        val schema4 = StateStoreColFamilySchema(
           "mapState",
           compositeKeySchema,
           new StructType().add("value",
@@ -351,9 +375,9 @@ class TransformWithValueStateTTLSuite extends TransformWithStateTTLTest {
               q.lastProgress.stateOperators.head.customMetrics
                 .get("numMapStateWithTTLVars").toInt)
 
-            assert(colFamilySeq.length == 4)
+            assert(colFamilySeq.length == 5)
             assert(colFamilySeq.map(_.toString).toSet == Set(
-              schema0, schema1, schema2, schema3
+              schema0, schema1, schema2, schema3, schema4
             ).map(_.toString))
           },
           StopStream
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
index defd5fd110de6..3de6273ffb7b5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/TriggerAvailableNowSuite.scala
@@ -57,8 +57,10 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
       if (currentOffset == 0) currentOffset = getOffsetValue(end)
       val plan = Range(
         start.map(getOffsetValue).getOrElse(0L) + 1L, getOffsetValue(end) + 1L, 1, None,
-        isStreaming = true)
-      Dataset.ofRows(spark, plan)
+        // Intentionally set isStreaming to false; we only use RDD plan in below.
+        isStreaming = false)
+      sqlContext.internalCreateDataFrame(
+        plan.queryExecution.toRdd, plan.schema, isStreaming = true)
     }
 
     override def incrementAvailableOffset(numNewRows: Int): Unit = {
@@ -115,23 +117,24 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
     }
   }
 
-  def testWithConfigMatrix(testName: String)(testFun: => Any): Unit = {
+  def testWithConfigMatrix(testName: String)(testFun: Boolean => Any): Unit = {
     Seq(true, false).foreach { useWrapper =>
       test(testName + s" (using wrapper: $useWrapper)") {
         withSQLConf(
           SQLConf.STREAMING_TRIGGER_AVAILABLE_NOW_WRAPPER_ENABLED.key -> useWrapper.toString) {
-          testFun
+          testFun(useWrapper)
         }
       }
     }
   }
 
   Seq(
-    new TestSource,
-    new TestSourceWithAdmissionControl,
-    new TestMicroBatchStream
-  ).foreach { testSource =>
-    testWithConfigMatrix(s"TriggerAvailableNow for multiple sources with ${testSource.getClass}") {
+    (new TestSource, false),
+    (new TestSourceWithAdmissionControl, false),
+    (new TestMicroBatchStream, true)
+  ).foreach { case (testSource, supportTriggerAvailableNow) =>
+    testWithConfigMatrix(s"TriggerAvailableNow for multiple sources with " +
+      s"${testSource.getClass}") { useWrapper =>
       testSource.reset()
 
       withTempDirs { (src, target) =>
@@ -170,10 +173,16 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
 
         val q = startQuery()
 
+        val expectedNumBatches = if (!useWrapper && !supportTriggerAvailableNow) {
+          // Spark will decide to fall back to SingleBatchExecutor.
+          1
+        } else {
+          3
+        }
+
         try {
           assert(q.awaitTermination(streamingTimeout.toMillis))
-          // only one batch has data in both sources, thus counted, see SPARK-24050
-          assert(q.recentProgress.count(_.numInputRows != 0) == 1)
+          assert(q.recentProgress.count(_.numInputRows != 0) == expectedNumBatches)
           q.recentProgress.foreach { p =>
             assert(p.sources.exists(_.description.startsWith(testSource.sourceName)))
           }
@@ -193,8 +202,7 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
         val q2 = startQuery()
         try {
           assert(q2.awaitTermination(streamingTimeout.toMillis))
-          // only one batch has data in both sources, thus counted, see SPARK-24050
-          assert(q2.recentProgress.count(_.numInputRows != 0) == 1)
+          assert(q2.recentProgress.count(_.numInputRows != 0) == expectedNumBatches)
           q2.recentProgress.foreach { p =>
             assert(p.sources.exists(_.description.startsWith(testSource.sourceName)))
           }
@@ -212,7 +220,8 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
     new TestSourceWithAdmissionControl,
     new TestMicroBatchStream
   ).foreach { testSource =>
-    testWithConfigMatrix(s"TriggerAvailableNow for single source with ${testSource.getClass}") {
+    testWithConfigMatrix(s"TriggerAvailableNow for single source with " +
+      s"${testSource.getClass}") { _ =>
       testSource.reset()
 
       val tableName = "trigger_available_now_test_table"
@@ -265,7 +274,7 @@ class TriggerAvailableNowSuite extends FileStreamSourceTest {
   private def assertQueryUsingRightBatchExecutor(
       testSource: TestDataFrameProvider,
       query: StreamingQuery): Unit = {
-    val useWrapper = query.sparkSession.conf.get(
+    val useWrapper = query.sparkSession.sessionState.conf.getConf(
       SQLConf.STREAMING_TRIGGER_AVAILABLE_NOW_WRAPPER_ENABLED)
 
     if (useWrapper) {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV1/testCommitLog b/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV1/testCommitLog
new file mode 100644
index 0000000000000..47bde671130f5
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV1/testCommitLog
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":1,"stateUniqueIds":{}}
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV2/testCommitLog b/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV2/testCommitLog
new file mode 100644
index 0000000000000..27f51eb053cff
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/resources/testCommitLogV2/testCommitLog
@@ -0,0 +1,2 @@
+v1
+{"nextBatchWatermarkMs":0,"stateUniqueIds":{"0":[["unique_id1","unique_id2"],["unique_id3","unique_id4"]],"1":[["unique_id5","unique_id6"],["unique_id7","unique_id8"]]}}
\ No newline at end of file
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
index e77ba92fe2981..544f910333bfb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/sources/StreamingDataSourceV2Suite.scala
@@ -324,7 +324,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
       readFormat: String,
       writeFormat: String,
       trigger: Trigger,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String]) = {
     val query = spark.readStream
       .format(readFormat)
@@ -339,7 +339,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
       assert(query.exception.get.cause != null)
       checkErrorMatchPVals(
         exception = query.exception.get.cause.asInstanceOf[SparkUnsupportedOperationException],
-        errorClass = errorClass,
+        condition = condition,
         parameters = parameters
       )
     }
@@ -436,7 +436,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
             exception = intercept[SparkUnsupportedOperationException] {
               testCase(read, write, trigger)
             },
-            errorClass = "_LEGACY_ERROR_TEMP_2049",
+            condition = "_LEGACY_ERROR_TEMP_2049",
             parameters = Map(
               "className" -> "fake-read-neither-mode",
               "operator" -> "reading"
@@ -449,7 +449,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
             exception = intercept[SparkUnsupportedOperationException] {
               testCase(read, write, trigger)
             },
-            errorClass = "_LEGACY_ERROR_TEMP_2049",
+            condition = "_LEGACY_ERROR_TEMP_2049",
             parameters = Map(
               "className" -> "fake-write-neither-mode",
               "operator" -> "writing"
@@ -466,7 +466,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
               exception = intercept[SparkUnsupportedOperationException] {
                 testCase(read, write, trigger)
               },
-              errorClass = "_LEGACY_ERROR_TEMP_2253",
+              condition = "_LEGACY_ERROR_TEMP_2253",
               parameters = Map("sourceName" -> "fake-read-microbatch-only")
             )
           }
@@ -478,7 +478,7 @@ class StreamingDataSourceV2Suite extends StreamTest {
           } else {
             // Invalid - trigger is microbatch but reader is not
             testPostCreationNegativeCase(read, write, trigger,
-              errorClass = "_LEGACY_ERROR_TEMP_2209",
+              condition = "_LEGACY_ERROR_TEMP_2209",
               parameters = Map(
                 "srcName" -> read,
                 "disabledSources" -> "",
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
index c2416cca88743..e74627f3f51e9 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamReaderWriterSuite.scala
@@ -30,6 +30,7 @@ import org.mockito.Mockito._
 import org.scalatest.BeforeAndAfter
 
 import org.apache.spark.sql._
+import org.apache.spark.sql.execution.datasources.DataSourceUtils
 import org.apache.spark.sql.execution.streaming._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.sources.{StreamSinkProvider, StreamSourceProvider}
@@ -47,6 +48,7 @@ object LastOptions {
   var sinkParameters: Map[String, String] = null
   var schema: Option[StructType] = null
   var partitionColumns: Seq[String] = Nil
+  var clusteringColumns: Seq[String] = Nil
 
   def clear(): Unit = {
     parameters = null
@@ -104,6 +106,8 @@ class DefaultSource extends StreamSourceProvider with StreamSinkProvider {
       outputMode: OutputMode): Sink = {
     LastOptions.sinkParameters = parameters
     LastOptions.partitionColumns = partitionColumns
+    LastOptions.clusteringColumns = parameters.get(DataSourceUtils.CLUSTERING_COLUMNS_KEY)
+      .map(DataSourceUtils.decodePartitioningColumns).getOrElse(Nil)
     LastOptions.mockStreamSinkProvider.createSink(spark, parameters, partitionColumns, outputMode)
     (_: Long, _: DataFrame) => {}
   }
@@ -129,7 +133,7 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
           .write
           .save()
       },
-      errorClass = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
+      condition = "CALL_ON_STREAMING_DATASET_UNSUPPORTED",
       parameters = Map("methodName" -> "`write`"))
   }
 
@@ -258,6 +262,56 @@ class DataStreamReaderWriterSuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("clustering") {
+    val df = spark.readStream
+      .format("org.apache.spark.sql.streaming.test")
+      .load()
+
+    df.writeStream
+      .format("org.apache.spark.sql.streaming.test")
+      .option("checkpointLocation", newMetadataDir)
+      .start()
+      .stop()
+    assert(LastOptions.partitionColumns == Nil)
+
+    df.writeStream
+      .format("org.apache.spark.sql.streaming.test")
+      .option("checkpointLocation", newMetadataDir)
+      .clusterBy("a")
+      .start()
+      .stop()
+    assert(LastOptions.clusteringColumns == Seq("a"))
+
+    withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
+      df.writeStream
+        .format("org.apache.spark.sql.streaming.test")
+        .option("checkpointLocation", newMetadataDir)
+        .clusterBy("A")
+        .start()
+        .stop()
+      assert(LastOptions.clusteringColumns == Seq("a"))
+    }
+
+    intercept[AnalysisException] {
+      df.writeStream
+        .format("org.apache.spark.sql.streaming.test")
+        .option("checkpointLocation", newMetadataDir)
+        .clusterBy("b")
+        .start()
+        .stop()
+    }
+
+    intercept[AnalysisException] {
+      df.writeStream
+        .format("org.apache.spark.sql.streaming.test")
+        .option("checkpointLocation", newMetadataDir)
+        .clusterBy("a")
+        .partitionBy("a")
+        .start()
+        .stop()
+    }
+  }
+
   test("stream paths") {
     val df = spark.readStream
       .format("org.apache.spark.sql.streaming.test")
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
index af07aceaed145..86c4e49f6f66f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/test/DataStreamTableAPISuite.scala
@@ -29,7 +29,7 @@ import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable
 import org.apache.spark.sql.catalyst.streaming.StreamingRelationV2
 import org.apache.spark.sql.connector.{FakeV2Provider, FakeV2ProviderWithCustomSchema, InMemoryTableSessionCatalog}
 import org.apache.spark.sql.connector.catalog.{Column, Identifier, InMemoryTableCatalog, MetadataColumn, SupportsMetadataColumns, SupportsRead, Table, TableCapability, V2TableWithV1Fallback}
-import org.apache.spark.sql.connector.expressions.Transform
+import org.apache.spark.sql.connector.expressions.{ClusterByTransform, FieldReference, Transform}
 import org.apache.spark.sql.connector.read.ScanBuilder
 import org.apache.spark.sql.execution.streaming.{MemoryStream, MemoryStreamScanBuilder, StreamingQueryWrapper}
 import org.apache.spark.sql.functions.lit
@@ -117,7 +117,7 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       exception = intercept[AnalysisException] {
         spark.readStream.table(tableIdentifier)
       },
-      errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+      condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
       parameters = Map(
         "tableName" -> "`testcat`.`table_name`",
         "operation" -> "either micro-batch or continuous scan"
@@ -334,6 +334,31 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
     }
   }
 
+  test("write: write to new table with clusterBy") {
+    val tableIdentifier = "testcat.cluster_test"
+
+    withTable(tableIdentifier) {
+      runStreamAppendWithClusterBy(tableIdentifier)
+
+      val table = spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+        .loadTable(Identifier.of(Array(), "cluster_test"))
+      assert(table.partitioning === Seq(ClusterByTransform(Seq(FieldReference("id")))))
+    }
+  }
+
+  test("write: write to existing table with matching clustering column using clusterBy") {
+    val tableIdentifier = "testcat.cluster_test"
+
+    withTable(tableIdentifier) {
+      sql(s"CREATE TABLE $tableIdentifier (id BIGINT, data STRING) CLUSTER BY (id)")
+      runStreamAppendWithClusterBy(tableIdentifier)
+
+      val table = spark.sessionState.catalogManager.catalog("testcat").asTableCatalog
+        .loadTable(Identifier.of(Array(), "cluster_test"))
+      assert(table.partitioning === Seq(ClusterByTransform(Seq(FieldReference("id")))))
+    }
+  }
+
   test("explain with table on DSv1 data source") {
     val tblSourceName = "tbl_src"
     val tblTargetName = "tbl_target"
@@ -591,6 +616,24 @@ class DataStreamTableAPISuite extends StreamTest with BeforeAndAfter {
       expectedOutputs.map { case (id, data) => Row(id, data) }
     )
   }
+
+  private def runStreamAppendWithClusterBy(tableIdentifier: String): Unit = {
+    withTempDir { ckptDir =>
+      val inputData = MemoryStream[(Long, String)]
+      val inputDF = inputData.toDF().toDF("id", "data")
+
+      val query = inputDF
+        .writeStream
+        .clusterBy("id")
+        .option("checkpointLocation", ckptDir.getAbsolutePath)
+        .toTable(tableIdentifier)
+
+      inputData.addData(Seq((1L, "a"), (2L, "b"), (3L, "c")))
+
+      query.processAllAvailable()
+      query.stop()
+    }
+  }
 }
 
 object DataStreamTableAPISuite {
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
index c1b29b5130e86..b083d180d9911 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/BlockingSource.scala
@@ -49,7 +49,7 @@ class BlockingSource extends StreamSourceProvider with StreamSinkProvider {
       override def schema: StructType = fakeSchema
       override def getOffset: Option[Offset] = Some(new LongOffset(0))
       override def getBatch(start: Option[Offset], end: Offset): DataFrame = {
-        import spark.implicits._
+        import spark.sparkSession.implicits._
         Seq[Int]().toDS().toDF()
       }
       override def stop(): Unit = {}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala
index f2684b8c39cd9..877bca6898f0c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/streaming/util/StatefulOpClusteredDistributionTestHelper.scala
@@ -72,6 +72,15 @@ trait StatefulOpClusteredDistributionTestHelper extends SparkFunSuite {
     }
   }
 
+  protected def hasDesiredHashPartitioningInChildren(
+      children: Seq[SparkPlan],
+      desiredClusterColumns: Seq[Seq[String]],
+      desiredNumPartitions: Int): Boolean = {
+    children.zip(desiredClusterColumns).forall { case (child, clusterColumns) =>
+      hasDesiredHashPartitioning(child, clusterColumns, desiredNumPartitions)
+    }
+  }
+
   private def partitionExpressionsColumns(expressions: Seq[Expression]): Seq[String] = {
     expressions.flatMap {
       case ref: AttributeReference => Some(ref.name)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
index 603ee74ce333c..f3d21e384ed42 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/DataFrameReaderWriterSuite.scala
@@ -165,7 +165,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     }
     checkError(
       exception = e,
-      errorClass = "WRITE_STREAM_NOT_ALLOWED",
+      condition = "WRITE_STREAM_NOT_ALLOWED",
       parameters = Map.empty
     )
   }
@@ -286,6 +286,75 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     assert(DataSourceUtils.decodePartitioningColumns(partColumns) === Seq("col1", "col2"))
   }
 
+  test("pass clusterBy as options") {
+    Seq(1).toDF().write
+      .format("org.apache.spark.sql.test")
+      .clusterBy("col1", "col2")
+      .save()
+
+    val clusteringColumns = LastOptions.parameters(DataSourceUtils.CLUSTERING_COLUMNS_KEY)
+    assert(DataSourceUtils.decodePartitioningColumns(clusteringColumns) === Seq("col1", "col2"))
+  }
+
+  test("Clustering columns should match when appending to existing data source tables") {
+    import testImplicits._
+    val df = Seq((1, 2, 3)).toDF("a", "b", "c")
+    withTable("clusteredTable") {
+      df.write.mode("overwrite").clusterBy("a", "b").saveAsTable("clusteredTable")
+      // Misses some clustering columns
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.mode("append").clusterBy("a").saveAsTable("clusteredTable")
+        },
+        condition = "CLUSTERING_COLUMNS_MISMATCH",
+        parameters = Map(
+          "tableName" -> "spark_catalog.default.clusteredtable",
+          "specifiedClusteringString" -> """[["a"]]""",
+          "existingClusteringString" -> """[["a"],["b"]]""")
+      )
+      // Wrong order
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.mode("append").clusterBy("b", "a").saveAsTable("clusteredTable")
+        },
+        condition = "CLUSTERING_COLUMNS_MISMATCH",
+        parameters = Map(
+          "tableName" -> "spark_catalog.default.clusteredtable",
+          "specifiedClusteringString" -> """[["b"],["a"]]""",
+          "existingClusteringString" -> """[["a"],["b"]]""")
+      )
+      // Clustering columns not specified
+      checkError(
+        exception = intercept[AnalysisException] {
+          df.write.mode("append").saveAsTable("clusteredTable")
+        },
+        condition = "CLUSTERING_COLUMNS_MISMATCH",
+        parameters = Map(
+          "tableName" -> "spark_catalog.default.clusteredtable",
+          "specifiedClusteringString" -> "", "existingClusteringString" -> """[["a"],["b"]]""")
+      )
+      assert(sql("select * from clusteredTable").collect().length == 1)
+      // Inserts new data successfully when clustering columns are correctly specified in
+      // clusterBy(...).
+      Seq((4, 5, 6)).toDF("a", "b", "c")
+        .write
+        .mode("append")
+        .clusterBy("a", "b")
+        .saveAsTable("clusteredTable")
+
+      Seq((7, 8, 9)).toDF("a", "b", "c")
+        .write
+        .mode("append")
+        .clusterBy("a", "b")
+        .saveAsTable("clusteredTable")
+
+      checkAnswer(
+        sql("select a, b, c from clusteredTable"),
+        Row(1, 2, 3) :: Row(4, 5, 6) :: Row(7, 8, 9) :: Nil
+      )
+    }
+  }
+
   test ("SPARK-29537: throw exception when user defined a wrong base path") {
     withTempPath { p =>
       val path = new Path(p.toURI).toString
@@ -386,7 +455,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           exception = intercept[AnalysisException] {
             Seq((1L, 2.0)).toDF("i", "d").write.mode("append").saveAsTable("t")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -414,7 +483,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           exception = intercept[AnalysisException] {
             Seq(("a", "b")).toDF("i", "d").write.mode("append").saveAsTable("t")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -426,7 +495,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           exception = intercept[AnalysisException] {
             Seq((true, false)).toDF("i", "d").write.mode("append").saveAsTable("t")
           },
-          errorClass = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
+          condition = "INCOMPATIBLE_DATA_FOR_TABLE.CANNOT_SAFELY_CAST",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`t`",
             "colName" -> "`i`",
@@ -490,7 +559,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
     assert(LastOptions.parameters("doubleOpt") == "6.7")
   }
 
-  test("check jdbc() does not support partitioning, bucketBy or sortBy") {
+  test("check jdbc() does not support partitioning, bucketBy, clusterBy or sortBy") {
     val df = spark.read.text(Utils.createTempDir(namePrefix = "text").getCanonicalPath)
 
     var w = df.write.partitionBy("value")
@@ -505,6 +574,12 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
     }
 
+    w = df.write.clusterBy("value")
+    e = intercept[AnalysisException](w.jdbc(null, null, null))
+    Seq("jdbc", "clustering").foreach { s =>
+      assert(e.getMessage.toLowerCase(Locale.ROOT).contains(s.toLowerCase(Locale.ROOT)))
+    }
+
     w = df.write.sortBy("value")
     e = intercept[AnalysisException](w.jdbc(null, null, null))
     Seq("sortBy must be used together with bucketBy").foreach { s =>
@@ -653,7 +728,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       exception = intercept[AnalysisException] {
         testRead(spark.read.csv(), Seq.empty, schema)
       },
-      errorClass = "UNABLE_TO_INFER_SCHEMA",
+      condition = "UNABLE_TO_INFER_SCHEMA",
       parameters = Map("format" -> "CSV")
     )
 
@@ -991,13 +1066,13 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           exception = intercept[AnalysisException] {
             Seq((1, 1)).toDF("col", c0).write.bucketBy(2, c0, c1).saveAsTable("t")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
         checkError(
           exception = intercept[AnalysisException] {
             Seq((1, 1)).toDF("col", c0).write.bucketBy(2, "col").sortBy(c0, c1).saveAsTable("t")
           },
-          errorClass = "COLUMN_ALREADY_EXISTS",
+          condition = "COLUMN_ALREADY_EXISTS",
           parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
       }
     }
@@ -1011,7 +1086,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           Seq((1, 1)).toDF(colName0, colName1).write.format(format).mode("overwrite")
             .save(tempDir.getAbsolutePath)
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
@@ -1024,7 +1099,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
           spark.read.format(format).schema(s"$colName0 INT, $colName1 INT")
             .load(testDir.getAbsolutePath)
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
@@ -1037,7 +1112,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
         exception = intercept[AnalysisException] {
           spark.read.format(format).load(testDir.getAbsolutePath)
         },
-        errorClass = "COLUMN_ALREADY_EXISTS",
+        condition = "COLUMN_ALREADY_EXISTS",
         parameters = Map("columnName" -> s"`${colName1.toLowerCase(Locale.ROOT)}`"))
     }
 
@@ -1045,7 +1120,6 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
       withSQLConf(SQLConf.CASE_SENSITIVE.key -> caseSensitive.toString) {
         withTempDir { src =>
           // Check CSV format
-          checkWriteDataColumnDuplication("csv", c0, c1, src)
           checkReadUserSpecifiedDataColumnDuplication(
             Seq((1, 1)).toDF("c0", "c1"), "csv", c0, c1, src)
           // If `inferSchema` is true, a CSV format is duplicate-safe (See SPARK-16896)
@@ -1068,7 +1142,7 @@ class DataFrameReaderWriterSuite extends QueryTest with SharedSparkSession with
             exception = intercept[AnalysisException] {
               spark.read.format("json").option("inferSchema", true).load(testDir.getAbsolutePath)
             },
-            errorClass = "COLUMN_ALREADY_EXISTS",
+            condition = "COLUMN_ALREADY_EXISTS",
             parameters = Map("columnName" -> s"`${c1.toLowerCase(Locale.ROOT)}`"))
           checkReadPartitionColumnDuplication("json", c0, c1, src)
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
index d7c00b68828c4..90432dea3a017 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala
@@ -35,7 +35,7 @@ private[sql] trait SQLTestData { self =>
 
   // Helper object to import SQL implicits without a concrete SparkSession
   private object internalImplicits extends SQLImplicits {
-    protected override def session: SparkSession = self.spark
+    override protected def session: SparkSession = self.spark
   }
 
   import internalImplicits._
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
index 0229b57169d97..be1604edc98ed 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -240,7 +240,8 @@ private[sql] trait SQLTestUtilsBase
    * but the implicits import is needed in the constructor.
    */
   protected object testImplicits extends SQLImplicits {
-    protected override def session: SparkSession = self.spark
+    override protected def session: SparkSession = self.spark
+    implicit def toRichColumn(c: Column): SparkSession#RichColumn = session.RichColumn(c)
   }
 
   protected override def withSQLConf[T](pairs: (String, String)*)(f: => T): T = {
@@ -434,8 +435,9 @@ private[sql] trait SQLTestUtilsBase
    * `f` returns.
    */
   protected def activateDatabase(db: String)(f: => Unit): Unit = {
-    spark.sessionState.catalog.setCurrentDatabase(db)
-    Utils.tryWithSafeFinally(f)(spark.sessionState.catalog.setCurrentDatabase("default"))
+    spark.sessionState.catalogManager.setCurrentNamespace(Array(db))
+    Utils.tryWithSafeFinally(f)(
+      spark.sessionState.catalogManager.setCurrentNamespace(Array("default")))
   }
 
   /**
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
index ed2e309fa075a..4d4cc44eb3e72 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
@@ -77,6 +77,7 @@ trait SharedSparkSessionBase
     conf.set(
       StaticSQLConf.WAREHOUSE_PATH,
       conf.get(StaticSQLConf.WAREHOUSE_PATH) + "/" + getClass.getCanonicalName)
+    conf.set(StaticSQLConf.LOAD_SESSION_EXTENSIONS_FROM_CLASSPATH, false)
   }
 
   /**
@@ -102,6 +103,8 @@ trait SharedSparkSessionBase
     new TestSparkSession(sparkConf)
   }
 
+  protected def sqlConf: SQLConf = _spark.sessionState.conf
+
   /**
    * Initialize the [[TestSparkSession]].  Generally, this is just called from
    * beforeAll; however, in test using styles other than FunSuite, there is
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
index 808f783a595ac..be91f5e789e2c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/util/DataFrameCallbackSuite.scala
@@ -118,7 +118,7 @@ class DataFrameCallbackSuite extends QueryTest
     sparkContext.listenerBus.waitUntilEmpty()
     assert(metrics.length == 2)
 
-    assert(metrics(0)._1 == "foreach")
+    assert(metrics(0)._1 == "foreachPartition")
     assert(metrics(1)._1 == "reduce")
 
     spark.listenerManager.unregister(listener)
diff --git a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
index 1b7909534a03c..982d57fb28756 100644
--- a/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/status/api/v1/sql/SqlResourceWithActualMetricsSuite.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.status.api.v1.sql
 
-import java.net.URL
+import java.net.{URI, URL}
 import java.text.SimpleDateFormat
 
 import jakarta.servlet.http.HttpServletResponse
@@ -70,8 +70,8 @@ class SqlResourceWithActualMetricsSuite
   }
 
   private def callSqlRestEndpointAndVerifyResult(): Long = {
-    val url = new URL(spark.sparkContext.ui.get.webUrl
-      + s"/api/v1/applications/${spark.sparkContext.applicationId}/sql")
+    val url = new URI(spark.sparkContext.ui.get.webUrl
+      + s"/api/v1/applications/${spark.sparkContext.applicationId}/sql").toURL
     val jsonResult = verifyAndGetSqlRestResult(url)
     val executionDatas = JsonMethods.parse(jsonResult).extract[Seq[ExecutionData]]
     assert(executionDatas.size > 0,
@@ -82,8 +82,8 @@ class SqlResourceWithActualMetricsSuite
   }
 
   private def callSqlRestEndpointByExecutionIdAndVerifyResult(executionId: Long): Unit = {
-    val url = new URL(spark.sparkContext.ui.get.webUrl
-      + s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/${executionId}")
+    val url = new URI(spark.sparkContext.ui.get.webUrl
+      + s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/${executionId}").toURL
     val jsonResult = verifyAndGetSqlRestResult(url)
     val executionData = JsonMethods.parse(jsonResult).extract[ExecutionData]
     verifySqlRestContent(executionData)
@@ -138,8 +138,8 @@ class SqlResourceWithActualMetricsSuite
       sql(sqlStr)
       intercept[TableAlreadyExistsException](sql(sqlStr))
 
-      val url = new URL(spark.sparkContext.ui.get.webUrl +
-        s"/api/v1/applications/${spark.sparkContext.applicationId}/sql")
+      val url = new URI(spark.sparkContext.ui.get.webUrl +
+        s"/api/v1/applications/${spark.sparkContext.applicationId}/sql").toURL
       eventually(timeout(20.seconds), interval(50.milliseconds)) {
         val result = verifyAndGetSqlRestResult(url)
         val executionDataList = JsonMethods.parse(result)
@@ -153,8 +153,8 @@ class SqlResourceWithActualMetricsSuite
   }
 
   test("SPARK-45291: Use unknown query execution id instead of no such app when id is invalid") {
-    val url = new URL(spark.sparkContext.ui.get.webUrl +
-      s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/${Long.MaxValue}")
+    val url = new URI(spark.sparkContext.ui.get.webUrl +
+      s"/api/v1/applications/${spark.sparkContext.applicationId}/sql/${Long.MaxValue}").toURL
     val (code, resultOpt, error) = getContentAndCode(url)
     assert(code === HttpServletResponse.SC_NOT_FOUND)
     assert(resultOpt.isEmpty)
diff --git a/sql/gen-sql-api-docs.py b/sql/gen-sql-api-docs.py
index 17631a7352a02..3d19da01b3938 100644
--- a/sql/gen-sql-api-docs.py
+++ b/sql/gen-sql-api-docs.py
@@ -69,19 +69,6 @@
         note="",
         since="1.0.0",
         deprecated=""),
-    ExpressionInfo(
-        className="",
-        name="between",
-        usage="expr1 [NOT] BETWEEN expr2 AND expr3 - " +
-              "evaluate if `expr1` is [not] in between `expr2` and `expr3`.",
-        arguments="",
-        examples="\n    Examples:\n      " +
-                 "> SELECT col1 FROM VALUES 1, 3, 5, 7 WHERE col1 BETWEEN 2 AND 5;\n      " +
-                 " 3\n      " +
-                 " 5",
-        note="",
-        since="1.0.0",
-        deprecated=""),
     ExpressionInfo(
         className="",
         name="case",
diff --git a/sql/gen-sql-functions-docs.py b/sql/gen-sql-functions-docs.py
index dc48a5a6155ed..a1facbaaf7e3b 100644
--- a/sql/gen-sql-functions-docs.py
+++ b/sql/gen-sql-functions-docs.py
@@ -36,9 +36,14 @@
     "bitwise_funcs", "conversion_funcs", "csv_funcs",
     "xml_funcs", "lambda_funcs", "collection_funcs",
     "url_funcs", "hash_funcs", "struct_funcs",
+    "table_funcs", "variant_funcs"
 }
 
 
+def _print_red(text):
+    print('\033[31m' + text + '\033[0m')
+
+
 def _list_grouped_function_infos(jvm):
     """
     Returns a list of function information grouped by each group value via JVM.
@@ -126,7 +131,13 @@ def _make_pretty_usage(infos):
             func_name = "\\" + func_name
         elif (info.name == "when"):
             func_name = "CASE WHEN"
-        usages = iter(re.split(r"(.*%s.*) - " % func_name, info.usage.strip())[1:])
+        expr_usages = re.split(r"(.*%s.*) - " % func_name, info.usage.strip())
+        if len(expr_usages) <= 1:
+            _print_red("\nThe `usage` of %s is not standardized, please correct it. "
+                       "Refer to: `AesDecrypt`" % (func_name))
+            os._exit(-1)
+        usages = iter(expr_usages[1:])
+
         for (sig, description) in zip(usages, usages):
             result.append("    <tr>")
             result.append("      <td>%s</td>" % sig)
@@ -163,8 +174,8 @@ def _make_pretty_examples(jspark, infos):
 
     pretty_output = ""
     for info in infos:
-        if (info.examples.startswith("\n    Examples:")
-                and info.name.lower() not in ("from_avro", "to_avro")):
+        if (info.examples.startswith("\n    Examples:") and info.name.lower() not in
+                ("from_avro", "to_avro", "from_protobuf", "to_protobuf")):
             output = []
             output.append("-- %s" % info.name)
             query_examples = filter(lambda x: x.startswith("      > "), info.examples.split("\n"))
diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml
index 6a352f8a530d7..d066e235ebeab 100644
--- a/sql/hive-thriftserver/pom.xml
+++ b/sql/hive-thriftserver/pom.xml
@@ -138,6 +138,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>net.sf.jpam</groupId>
       <artifactId>jpam</artifactId>
diff --git a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
index ecbda2661e960..2bd6210f58c76 100644
--- a/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
+++ b/sql/hive-thriftserver/src/main/java/org/apache/hive/service/auth/HiveAuthFactory.java
@@ -118,7 +118,7 @@ public HiveAuthFactory(HiveConf conf) throws TTransportException, IOException {
         String keytab = conf.getVar(ConfVars.HIVE_SERVER2_KERBEROS_KEYTAB);
         if (needUgiLogin(UserGroupInformation.getCurrentUser(),
           SecurityUtil.getServerPrincipal(principal, "0.0.0.0"), keytab)) {
-          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(principal, keytab);
+          saslServer = ShimLoader.getHadoopThriftAuthBridge().createServer(keytab, principal);
         } else {
           // Using the default constructor to avoid unnecessary UGI login.
           saslServer = new HadoopThriftAuthBridge.Server();
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
index 8a8bdd4d38ee3..59d1b61f2f8e7 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServerErrors.scala
@@ -38,7 +38,7 @@ object HiveThriftServerErrors {
 
   def runningQueryError(e: Throwable, format: ErrorMessageFormat.Value): Throwable = e match {
     case st: SparkThrowable if format == ErrorMessageFormat.PRETTY =>
-      val errorClassPrefix = Option(st.getErrorClass).map(e => s"[$e] ").getOrElse("")
+      val errorClassPrefix = Option(st.getCondition).map(e => s"[$e] ").getOrElse("")
       new HiveSQLException(
         s"Error running query: $errorClassPrefix${st.toString}", st.getSqlState, st)
     case st: SparkThrowable with Throwable =>
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
index 2cc7745018164..091b4a32b668f 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/RowSetUtils.scala
@@ -137,8 +137,7 @@ object RowSetUtils {
         var i = 0
         val rowSize = rows.length
         val values = new java.util.ArrayList[String](rowSize)
-        while (i < rowSize) {
-          val row = rows(i)
+        rows.foreach { row =>
           nulls.set(i, row.isNullAt(ordinal))
           val value = if (row.isNullAt(ordinal)) {
             ""
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 2cd67cdd03bd9..caa85ebe57dee 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.thriftserver
 
 import java.security.PrivilegedExceptionAction
 import java.util.{Collections, Map => JMap}
-import java.util.concurrent.{Executors, RejectedExecutionException, TimeUnit}
+import java.util.concurrent.{Executors, RejectedExecutionException, ScheduledExecutorService, TimeUnit}
 
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
@@ -61,6 +61,7 @@ private[hive] class SparkExecuteStatementOperation(
       queryTimeout
     }
   }
+  private var timeoutExecutor: ScheduledExecutorService = _
 
   private val forceCancel = session.sessionState.conf.getConf(SQLConf.THRIFTSERVER_FORCE_CANCEL)
 
@@ -135,7 +136,7 @@ private[hive] class SparkExecuteStatementOperation(
     setHasResultSet(true) // avoid no resultset for async run
 
     if (timeout > 0) {
-      val timeoutExecutor = Executors.newSingleThreadScheduledExecutor()
+      timeoutExecutor = Executors.newSingleThreadScheduledExecutor()
       timeoutExecutor.schedule(new Runnable {
         override def run(): Unit = {
           try {
@@ -249,10 +250,11 @@ private[hive] class SparkExecuteStatementOperation(
       case e: Throwable =>
         // When cancel() or close() is called very quickly after the query is started,
         // then they may both call cleanup() before Spark Jobs are started. But before background
-        // task interrupted, it may have start some spark job, so we need to cancel again to
+        // task interrupted, it may have started some spark job, so we need to cancel again to
         // make sure job was cancelled when background thread was interrupted
         if (statementId != null) {
-          session.sparkContext.cancelJobGroup(statementId)
+          session.sparkContext.cancelJobGroup(statementId,
+            "The corresponding Thriftserver query has failed.")
         }
         val currentState = getStatus().getState()
         if (currentState.isTerminal) {
@@ -318,6 +320,11 @@ private[hive] class SparkExecuteStatementOperation(
     if (statementId != null) {
       session.sparkContext.cancelJobGroup(statementId)
     }
+    // Shutdown the timeout thread if any, while cleaning up this operation
+    if (timeoutExecutor != null &&
+      getStatus.getState != OperationState.TIMEDOUT && getStatus.getState.isTerminal) {
+      timeoutExecutor.shutdownNow()
+    }
   }
 }
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
index faab14bb9e365..083d9c4a0d436 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLCLIDriver.scala
@@ -458,27 +458,28 @@ private[hive] class SparkSQLCLIDriver extends CliDriver with Logging {
           if (sessionState.getIsVerbose) {
             out.println(cmd)
           }
-          val rc = driver.run(cmd)
-          val endTimeNs = System.nanoTime()
-          val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0
-
-          ret = rc.getResponseCode
-          if (ret != 0) {
-            val format = SparkSQLEnv.sparkSession.sessionState.conf.errorMessageFormat
-            val e = rc.getException
-            val msg = e match {
-              case st: SparkThrowable with Throwable => SparkThrowableHelper.getMessage(st, format)
-              case _ => e.getMessage
-            }
-            err.println(msg)
-            if (format == ErrorMessageFormat.PRETTY &&
+          try {
+            driver.run(cmd)
+          } catch {
+            case t: Throwable =>
+              ret = 1
+              val format = SparkSQLEnv.sparkSession.sessionState.conf.errorMessageFormat
+              val msg = t match {
+                case st: SparkThrowable with Throwable =>
+                  SparkThrowableHelper.getMessage(st, format)
+                case _ => t.getMessage
+              }
+              err.println(msg)
+              if (format == ErrorMessageFormat.PRETTY &&
                 !sessionState.getIsSilent &&
-                (!e.isInstanceOf[AnalysisException] || e.getCause != null)) {
-              e.printStackTrace(err)
-            }
-            driver.close()
-            return ret
+                (!t.isInstanceOf[AnalysisException] || t.getCause != null)) {
+                t.printStackTrace(err)
+              }
+              driver.close()
+              return ret
           }
+          val endTimeNs = System.nanoTime()
+          val timeTaken: Double = TimeUnit.NANOSECONDS.toMillis(endTimeNs - startTimeNs) / 1000.0
 
           val res = new JArrayList[String]()
 
diff --git a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
index 650a5df340215..0801bffed8e52 100644
--- a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
+++ b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkSQLDriver.scala
@@ -31,7 +31,7 @@ import org.apache.spark.internal.{Logging, MDC}
 import org.apache.spark.internal.LogKeys.COMMAND
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.plans.logical.CommandResult
-import org.apache.spark.sql.execution.{QueryExecution, SQLExecution}
+import org.apache.spark.sql.execution.{QueryExecution, QueryExecutionException, SQLExecution}
 import org.apache.spark.sql.execution.HiveResult.hiveResultString
 import org.apache.spark.sql.internal.{SQLConf, VariableSubstitution}
 
@@ -82,10 +82,10 @@ private[hive] class SparkSQLDriver(val sparkSession: SparkSession = SparkSQLEnv.
     } catch {
         case st: SparkThrowable =>
           logDebug(s"Failed in [$command]", st)
-          new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(st), st.getSqlState, st)
+          throw st
         case cause: Throwable =>
           logError(log"Failed in [${MDC(COMMAND, command)}]", cause)
-          new CommandProcessorResponse(1, ExceptionUtils.getStackTrace(cause), null, cause)
+          throw new QueryExecutionException(ExceptionUtils.getStackTrace(cause))
     }
   }
 
diff --git a/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc b/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc
deleted file mode 100644
index ebe01743b2e20..0000000000000
Binary files a/sql/hive-thriftserver/src/test/resources/test-data/before_1582_date_v2_4.snappy.orc and /dev/null differ
diff --git a/sql/hive-thriftserver/src/test/resources/test-data/cars.csv b/sql/hive-thriftserver/src/test/resources/test-data/cars.csv
deleted file mode 100644
index 40ded573ade5c..0000000000000
--- a/sql/hive-thriftserver/src/test/resources/test-data/cars.csv
+++ /dev/null
@@ -1,7 +0,0 @@
-
-year,make,model,comment,blank
-"2012","Tesla","S","No comment",
-
-1997,Ford,E350,"Go get one now they are going fast",
-2015,Chevy,Volt
-
diff --git a/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet b/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet
deleted file mode 100644
index 6ad37d5639511..0000000000000
Binary files a/sql/hive-thriftserver/src/test/resources/test-data/dec-in-fixed-len.parquet and /dev/null differ
diff --git a/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json b/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json
deleted file mode 100644
index 576fbb9b8758b..0000000000000
--- a/sql/hive-thriftserver/src/test/resources/test-data/with-map-fields.json
+++ /dev/null
@@ -1,5 +0,0 @@
-{ "id": 1, "intervals": { "a": { "startTime": 111, "endTime": 211 }, "b": { "startTime": 121, "endTime": 221 }}}
-{ "id": 2, "intervals": { "a": { "startTime": 112, "endTime": 212 }, "b": { "startTime": 122, "endTime": 222 }}}
-{ "id": 3, "intervals": { "a": { "startTime": 113, "endTime": 213 }, "b": { "startTime": 123, "endTime": 223 }}}
-{ "id": 4, "intervals": { }}
-{ "id": 5 }
\ No newline at end of file
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
index 3ccbd23b71c98..43030f68e5dac 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suites.scala
@@ -1062,7 +1062,7 @@ class SingleSessionSuite extends HiveThriftServer2TestBase {
         statement.executeQuery("SET spark.sql.hive.thriftServer.singleSession=false")
       }.getMessage
       assert(e.contains(
-        "Cannot modify the value of a static config: spark.sql.hive.thriftServer.singleSession"))
+        "CANNOT_MODIFY_CONFIG"))
     }
   }
 
@@ -1222,7 +1222,7 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
       // overrides all other potential log4j configurations contained in other dependency jar files.
       val tempLog4jConf = Utils.createTempDir().getCanonicalPath
 
-      Files.write(
+      Files.asCharSink(new File(s"$tempLog4jConf/log4j2.properties"), StandardCharsets.UTF_8).write(
         """rootLogger.level = info
           |rootLogger.appenderRef.stdout.ref = console
           |appender.console.type = Console
@@ -1230,9 +1230,7 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
           |appender.console.target = SYSTEM_ERR
           |appender.console.layout.type = PatternLayout
           |appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n
-        """.stripMargin,
-        new File(s"$tempLog4jConf/log4j2.properties"),
-        StandardCharsets.UTF_8)
+        """.stripMargin)
 
       tempLog4jConf
     }
@@ -1430,9 +1428,9 @@ abstract class HiveThriftServer2TestBase extends SparkFunSuite with BeforeAndAft
 
   protected def jdbcUri(database: String = "default"): String = if (mode == ServerMode.http) {
     s"""jdbc:hive2://$localhost:$serverPort/
-       |$database?
-       |hive.server2.transport.mode=http;
-       |hive.server2.thrift.http.path=cliservice;
+       |$database;
+       |transportMode=http;
+       |httpPath=cliservice;?
        |${hiveConfList}#${hiveVarList}
      """.stripMargin.split("\n").mkString.trim
   } else {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
index 94f1e53371b9c..969b1da6cd4d4 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/SparkMetadataOperationSuite.scala
@@ -35,7 +35,7 @@ class SparkMetadataOperationSuite extends HiveThriftServer2TestBase {
   test("Spark's own GetSchemasOperation(SparkGetSchemasOperation)") {
     def checkResult(rs: ResultSet, dbNames: Seq[String]): Unit = {
       val expected = dbNames.iterator
-      while(rs.next() || expected.hasNext) {
+      while (rs.next() || expected.hasNext) {
         assert(rs.getString("TABLE_SCHEM") === expected.next())
         assert(rs.getString("TABLE_CATALOG").isEmpty)
       }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
index 026b2388c593c..662f43fc00399 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerQueryTestSuite.scala
@@ -103,7 +103,10 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
     // SPARK-42921
     "timestampNTZ/datetime-special-ansi.sql",
     // SPARK-47264
-    "collations.sql"
+    "collations.sql",
+    "pipe-operators.sql",
+    // VARIANT type
+    "variant/named-function-arguments.sql"
   )
 
   override def runQueries(
@@ -121,13 +124,13 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
         case _: SQLQueryTestSuite#PgSQLTest =>
           statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
           statement.execute(s"SET ${SQLConf.LEGACY_INTERVAL_ENABLED.key} = true")
-        case _: SQLQueryTestSuite#AnsiTest =>
-          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
+        case _: SQLQueryTestSuite#NonAnsiTest =>
+          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = false")
         case _: SQLQueryTestSuite#TimestampNTZTest =>
           statement.execute(s"SET ${SQLConf.TIMESTAMP_TYPE.key} = " +
             s"${TimestampTypes.TIMESTAMP_NTZ.toString}")
         case _ =>
-          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = false")
+          statement.execute(s"SET ${SQLConf.ANSI_ENABLED.key} = true")
       }
 
       // Run the SQL queries preparing them for comparison.
@@ -267,8 +270,8 @@ class ThriftServerQueryTestSuite extends SQLQueryTestSuite with SharedThriftServ
         Seq.empty
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}postgreSQL")) {
         PgSQLTestCase(testCaseName, absPath, resultFile) :: Nil
-      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}ansi")) {
-        AnsiTestCase(testCaseName, absPath, resultFile) :: Nil
+      } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}nonansi")) {
+        NonAnsiTestCase(testCaseName, absPath, resultFile) :: Nil
       } else if (file.getAbsolutePath.startsWith(s"$inputFilePath${File.separator}timestampNTZ")) {
         TimestampNTZTestCase(testCaseName, absPath, resultFile) :: Nil
       } else {
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
index b1900855d9f25..2acf25640ef78 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/ThriftServerWithSparkContextSuite.scala
@@ -214,7 +214,7 @@ trait ThriftServerWithSparkContextSuite extends SharedThriftServer {
       val sessionHandle = client.openSession(user, "")
       val infoValue = client.getInfo(sessionHandle, GetInfoType.CLI_ODBC_KEYWORDS)
       // scalastyle:off line.size.limit
-      assert(infoValue.getStringValue == "ADD,AFTER,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIABLES,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
+      assert(infoValue.getStringValue == "ADD,AFTER,AGGREGATE,ALL,ALTER,ALWAYS,ANALYZE,AND,ANTI,ANY,ANY_VALUE,ARCHIVE,ARRAY,AS,ASC,AT,AUTHORIZATION,BEGIN,BETWEEN,BIGINT,BINARY,BINDING,BOOLEAN,BOTH,BUCKET,BUCKETS,BY,BYTE,CACHE,CALL,CALLED,CASCADE,CASE,CAST,CATALOG,CATALOGS,CHANGE,CHAR,CHARACTER,CHECK,CLEAR,CLUSTER,CLUSTERED,CODEGEN,COLLATE,COLLATION,COLLECTION,COLUMN,COLUMNS,COMMENT,COMMIT,COMPACT,COMPACTIONS,COMPENSATION,COMPUTE,CONCATENATE,CONSTRAINT,CONTAINS,COST,CREATE,CROSS,CUBE,CURRENT,CURRENT_DATE,CURRENT_TIME,CURRENT_TIMESTAMP,CURRENT_USER,DATA,DATABASE,DATABASES,DATE,DATEADD,DATEDIFF,DATE_ADD,DATE_DIFF,DAY,DAYOFYEAR,DAYS,DBPROPERTIES,DEC,DECIMAL,DECLARE,DEFAULT,DEFINED,DEFINER,DELETE,DELIMITED,DESC,DESCRIBE,DETERMINISTIC,DFS,DIRECTORIES,DIRECTORY,DISTINCT,DISTRIBUTE,DIV,DO,DOUBLE,DROP,ELSE,END,ESCAPE,ESCAPED,EVOLUTION,EXCEPT,EXCHANGE,EXCLUDE,EXECUTE,EXISTS,EXPLAIN,EXPORT,EXTEND,EXTENDED,EXTERNAL,EXTRACT,FALSE,FETCH,FIELDS,FILEFORMAT,FILTER,FIRST,FLOAT,FOLLOWING,FOR,FOREIGN,FORMAT,FORMATTED,FROM,FULL,FUNCTION,FUNCTIONS,GENERATED,GLOBAL,GRANT,GROUP,GROUPING,HAVING,HOUR,HOURS,IDENTIFIER,IDENTITY,IF,IGNORE,ILIKE,IMMEDIATE,IMPORT,IN,INCLUDE,INCREMENT,INDEX,INDEXES,INNER,INPATH,INPUT,INPUTFORMAT,INSERT,INT,INTEGER,INTERSECT,INTERVAL,INTO,INVOKER,IS,ITEMS,ITERATE,JOIN,KEYS,LANGUAGE,LAST,LATERAL,LAZY,LEADING,LEAVE,LEFT,LIKE,LIMIT,LINES,LIST,LOAD,LOCAL,LOCATION,LOCK,LOCKS,LOGICAL,LONG,LOOP,MACRO,MAP,MATCHED,MERGE,MICROSECOND,MICROSECONDS,MILLISECOND,MILLISECONDS,MINUS,MINUTE,MINUTES,MODIFIES,MONTH,MONTHS,MSCK,NAME,NAMESPACE,NAMESPACES,NANOSECOND,NANOSECONDS,NATURAL,NO,NONE,NOT,NULL,NULLS,NUMERIC,OF,OFFSET,ON,ONLY,OPTION,OPTIONS,OR,ORDER,OUT,OUTER,OUTPUTFORMAT,OVER,OVERLAPS,OVERLAY,OVERWRITE,PARTITION,PARTITIONED,PARTITIONS,PERCENT,PIVOT,PLACING,POSITION,PRECEDING,PRIMARY,PRINCIPALS,PROPERTIES,PURGE,QUARTER,QUERY,RANGE,READS,REAL,RECORDREADER,RECORDWRITER,RECOVER,REDUCE,REFERENCES,REFRESH,RENAME,REPAIR,REPEAT,REPEATABLE,REPLACE,RESET,RESPECT,RESTRICT,RETURN,RETURNS,REVOKE,RIGHT,ROLE,ROLES,ROLLBACK,ROLLUP,ROW,ROWS,SCHEMA,SCHEMAS,SECOND,SECONDS,SECURITY,SELECT,SEMI,SEPARATED,SERDE,SERDEPROPERTIES,SESSION_USER,SET,SETS,SHORT,SHOW,SINGLE,SKEWED,SMALLINT,SOME,SORT,SORTED,SOURCE,SPECIFIC,SQL,START,STATISTICS,STORED,STRATIFY,STRING,STRUCT,SUBSTR,SUBSTRING,SYNC,SYSTEM_TIME,SYSTEM_VERSION,TABLE,TABLES,TABLESAMPLE,TARGET,TBLPROPERTIES,TERMINATED,THEN,TIME,TIMEDIFF,TIMESTAMP,TIMESTAMPADD,TIMESTAMPDIFF,TIMESTAMP_LTZ,TIMESTAMP_NTZ,TINYINT,TO,TOUCH,TRAILING,TRANSACTION,TRANSACTIONS,TRANSFORM,TRIM,TRUE,TRUNCATE,TRY_CAST,TYPE,UNARCHIVE,UNBOUNDED,UNCACHE,UNION,UNIQUE,UNKNOWN,UNLOCK,UNPIVOT,UNSET,UNTIL,UPDATE,USE,USER,USING,VALUES,VAR,VARCHAR,VARIABLE,VARIANT,VERSION,VIEW,VIEWS,VOID,WEEK,WEEKS,WHEN,WHERE,WHILE,WINDOW,WITH,WITHIN,X,YEAR,YEARS,ZONE")
       // scalastyle:on line.size.limit
     }
   }
diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
index 2b2cbec41d643..8d4a9886a2b25 100644
--- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
+++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/UISeleniumSuite.scala
@@ -75,7 +75,7 @@ class UISeleniumSuite
       // overrides all other potential log4j configurations contained in other dependency jar files.
       val tempLog4jConf = org.apache.spark.util.Utils.createTempDir().getCanonicalPath
 
-      Files.write(
+      Files.asCharSink(new File(s"$tempLog4jConf/log4j2.properties"), StandardCharsets.UTF_8).write(
         """rootLogger.level = info
           |rootLogger.appenderRef.file.ref = console
           |appender.console.type = Console
@@ -83,9 +83,7 @@ class UISeleniumSuite
           |appender.console.target = SYSTEM_ERR
           |appender.console.layout.type = PatternLayout
           |appender.console.layout.pattern = %d{HH:mm:ss.SSS} %p %c: %maxLen{%m}{512}%n%ex{8}%n
-        """.stripMargin,
-        new File(s"$tempLog4jConf/log4j2.properties"),
-        StandardCharsets.UTF_8)
+        """.stripMargin)
 
       tempLog4jConf
     }
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
index 1e39455f294ab..6e8c140c72dcc 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3930           4221         411          0.0      383795.5       1.0X
-INSERT INTO HYBRID                                  588            621          32          0.0       57447.2       6.7X
-INSERT INTO STATIC                                  188            214          13          0.1       18338.3      20.9X
-INSERT OVERWRITE DYNAMIC                           3723           3853         183          0.0      363603.4       1.1X
-INSERT OVERWRITE HYBRID                             513            527          17          0.0       50096.3       7.7X
-INSERT OVERWRITE STATIC                             191            221          14          0.1       18612.1      20.6X
+INSERT INTO DYNAMIC                                3695           3849         218          0.0      360836.1       1.0X
+INSERT INTO HYBRID                                  536            551          17          0.0       52374.2       6.9X
+INSERT INTO STATIC                                  151            177          15          0.1       14737.4      24.5X
+INSERT OVERWRITE DYNAMIC                           3057           3228         241          0.0      298536.0       1.2X
+INSERT OVERWRITE HYBRID                             455            467          15          0.0       44443.5       8.1X
+INSERT OVERWRITE STATIC                             173            180           4          0.1       16911.3      21.3X
 
diff --git a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
index 6d925278cc405..7a901f75ddb35 100644
--- a/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
+++ b/sql/hive/benchmarks/InsertIntoHiveTableBenchmark-jdk21-hive2.3-results.txt
@@ -1,11 +1,11 @@
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 insert hive table benchmark:              Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-INSERT INTO DYNAMIC                                3770           4154         543          0.0      368168.8       1.0X
-INSERT INTO HYBRID                                  511            568          55          0.0       49868.7       7.4X
-INSERT INTO STATIC                                  163            189          23          0.1       15947.9      23.1X
-INSERT OVERWRITE DYNAMIC                           3813           4094         397          0.0      372395.4       1.0X
-INSERT OVERWRITE HYBRID                             489            516          22          0.0       47714.3       7.7X
-INSERT OVERWRITE STATIC                             182            202          29          0.1       17768.5      20.7X
+INSERT INTO DYNAMIC                                3762           3968         292          0.0      367406.9       1.0X
+INSERT INTO HYBRID                                  516            591          80          0.0       50355.2       7.3X
+INSERT INTO STATIC                                  168            192          24          0.1       16403.7      22.4X
+INSERT OVERWRITE DYNAMIC                           3524           3643         169          0.0      344143.1       1.1X
+INSERT OVERWRITE HYBRID                             493            510          13          0.0       48137.8       7.6X
+INSERT OVERWRITE STATIC                             178            190          14          0.1       17346.8      21.2X
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
index 26d129b66e825..f185c50f929bf 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-jdk21-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3276           3322          70          0.0       49983.2       1.0X
-spark af w/o group by                                20             25           4          3.3         300.3     166.5X
-hive udaf w/ group by                              2090           2101           7          0.0       31892.1       1.6X
-spark af w/ group by w/o fallback                    21             24           3          3.2         316.8     157.8X
-spark af w/ group by w/ fallback                     25             27           4          2.7         375.5     133.1X
+hive udaf w/o group by                             3232           3292          46          0.0       49313.1       1.0X
+spark af w/o group by                                20             26           4          3.3         303.1     162.7X
+hive udaf w/ group by                              2002           2055          40          0.0       30540.8       1.6X
+spark af w/ group by w/o fallback                    22             25           3          3.0         334.3     147.5X
+spark af w/ group by w/ fallback                     25             27           3          2.7         376.5     131.0X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              22925          23221         419          4.6         218.6       1.0X
-object agg w/ group by w/o fallback                7021           7103          64         14.9          67.0       3.3X
-object agg w/ group by w/ fallback                14719          15622        1324          7.1         140.4       1.6X
-sort agg w/o group by                              3908           3946          27         26.8          37.3       5.9X
-object agg w/o group by w/o fallback               3780           4011         331         27.7          36.0       6.1X
+sort agg w/ group by                              23962          24276         445          4.4         228.5       1.0X
+object agg w/ group by w/o fallback                7346           7389          41         14.3          70.1       3.3X
+object agg w/ group by w/ fallback                15904          16415         443          6.6         151.7       1.5X
+sort agg w/o group by                              4041           4060          17         26.0          38.5       5.9X
+object agg w/o group by w/o fallback               3872           3914          42         27.1          36.9       6.2X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                408            431          15          5.1         194.6       1.0X
-object agg w/ group by w/o fallback                 326            334           5          6.4         155.4       1.3X
-object agg w/ group by w/ fallback                  451            472          19          4.6         215.2       0.9X
-sort agg w/o group by                               274            281           4          7.6         130.8       1.5X
-object agg w/o group by w/o fallback                273            277           5          7.7         130.4       1.5X
+sort agg w/ group by                                417            449          16          5.0         198.9       1.0X
+object agg w/ group by w/o fallback                 328            339           5          6.4         156.5       1.3X
+object agg w/ group by w/ fallback                  467            501          15          4.5         222.4       0.9X
+sort agg w/o group by                               274            283           6          7.6         130.8       1.5X
+object agg w/o group by w/o fallback                271            277           3          7.7         129.3       1.5X
 
 
diff --git a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
index 8fb04e97f4bc5..fb426c84414ba 100644
--- a/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
+++ b/sql/hive/benchmarks/ObjectHashAggregateExecBenchmark-results.txt
@@ -2,44 +2,44 @@
 Hive UDAF vs Spark AF
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 hive udaf vs spark af:                    Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-hive udaf w/o group by                             3375           3488          67          0.0       51493.5       1.0X
-spark af w/o group by                                21             26           4          3.2         313.8     164.1X
-hive udaf w/ group by                              2174           2193          14          0.0       33173.8       1.6X
-spark af w/ group by w/o fallback                    22             27           4          2.9         339.0     151.9X
-spark af w/ group by w/ fallback                     25             28           3          2.6         383.5     134.3X
+hive udaf w/o group by                             3271           3305          29          0.0       49904.4       1.0X
+spark af w/o group by                                21             26           4          3.2         316.4     157.7X
+hive udaf w/ group by                              2070           2109          30          0.0       31591.0       1.6X
+spark af w/ group by w/o fallback                    22             26           3          3.0         335.8     148.6X
+spark af w/ group by w/ fallback                     25             27           3          2.6         379.4     131.5X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - typed_count
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                              23621          24285         938          4.4         225.3       1.0X
-object agg w/ group by w/o fallback                6890           7186         232         15.2          65.7       3.4X
-object agg w/ group by w/ fallback                14883          15203         299          7.0         141.9       1.6X
-sort agg w/o group by                              4104           4125          17         25.5          39.1       5.8X
-object agg w/o group by w/o fallback               3695           3723          26         28.4          35.2       6.4X
+sort agg w/ group by                              24310          24337          39          4.3         231.8       1.0X
+object agg w/ group by w/o fallback                6916           7223         137         15.2          66.0       3.5X
+object agg w/ group by w/ fallback                14558          14693         128          7.2         138.8       1.7X
+sort agg w/o group by                              4079           4125          48         25.7          38.9       6.0X
+object agg w/o group by w/o fallback               3577           3608          22         29.3          34.1       6.8X
 
 
 ================================================================================================
 ObjectHashAggregateExec vs SortAggregateExec - percentile_approx
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 object agg v.s. sort agg:                 Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-sort agg w/ group by                                404            412           6          5.2         192.8       1.0X
-object agg w/ group by w/o fallback                 332            339           6          6.3         158.5       1.2X
-object agg w/ group by w/ fallback                  461            467           8          4.6         219.6       0.9X
-sort agg w/o group by                               308            313           5          6.8         146.9       1.3X
-object agg w/o group by w/o fallback                304            308           4          6.9         144.8       1.3X
+sort agg w/ group by                                403            412           6          5.2         192.3       1.0X
+object agg w/ group by w/o fallback                 341            347           5          6.1         162.7       1.2X
+object agg w/ group by w/ fallback                  469            473           4          4.5         223.6       0.9X
+sort agg w/o group by                               304            310           4          6.9         144.9       1.3X
+object agg w/o group by w/o fallback                297            305           3          7.1         141.4       1.4X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
index eedfc34b5ea13..b941571563401 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-jdk21-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   627            665          40         25.1          39.9       1.0X
-Native ORC MR                                       699            703           4         22.5          44.4       0.9X
-Native ORC Vectorized                                61             81          21        258.1           3.9      10.3X
+Hive built-in ORC                                   675            696          17         23.3          42.9       1.0X
+Native ORC MR                                       745            759          24         21.1          47.3       0.9X
+Native ORC Vectorized                                91            118           9        172.4           5.8       7.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   681            699          17         23.1          43.3       1.0X
-Native ORC MR                                       792            803          14         19.9          50.3       0.9X
-Native ORC Vectorized                                72             86          16        217.6           4.6       9.4X
+Hive built-in ORC                                   680            728          47         23.1          43.3       1.0X
+Native ORC MR                                       726            755          25         21.7          46.1       0.9X
+Native ORC Vectorized                                83             99          11        190.0           5.3       8.2X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   741            764          29         21.2          47.1       1.0X
-Native ORC MR                                       907            929          29         17.4          57.6       0.8X
-Native ORC Vectorized                                95            105          14        164.8           6.1       7.8X
+Hive built-in ORC                                   696            716          28         22.6          44.3       1.0X
+Native ORC MR                                       741            766          32         21.2          47.1       0.9X
+Native ORC Vectorized                                86             98          12        181.9           5.5       8.0X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   860            868          11         18.3          54.7       1.0X
-Native ORC MR                                       831            871          37         18.9          52.8       1.0X
-Native ORC Vectorized                                93            104          15        169.9           5.9       9.3X
+Hive built-in ORC                                   720            729          14         21.9          45.8       1.0X
+Native ORC MR                                       766            783          16         20.5          48.7       0.9X
+Native ORC Vectorized                                92            108          11        171.7           5.8       7.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   803            841          34         19.6          51.1       1.0X
-Native ORC MR                                       839            857          24         18.7          53.3       1.0X
-Native ORC Vectorized                               129            168          37        122.0           8.2       6.2X
+Hive built-in ORC                                   754            792          65         20.9          47.9       1.0X
+Native ORC MR                                       861            879          27         18.3          54.7       0.9X
+Native ORC Vectorized                               147            164          13        107.3           9.3       5.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   959            966           8         16.4          61.0       1.0X
-Native ORC MR                                       997           1021          35         15.8          63.4       1.0X
-Native ORC Vectorized                               214            264          30         73.5          13.6       4.5X
+Hive built-in ORC                                   826            833           6         19.0          52.5       1.0X
+Native ORC MR                                       947            975          43         16.6          60.2       0.9X
+Native ORC Vectorized                               218            234          24         72.0          13.9       3.8X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1565           1567           2          6.7         149.3       1.0X
-Native ORC MR                                      1574           1602          40          6.7         150.1       1.0X
-Native ORC Vectorized                               656            660           6         16.0          62.6       2.4X
+Hive built-in ORC                                  1632           1653          30          6.4         155.6       1.0X
+Native ORC MR                                      1523           1528           8          6.9         145.2       1.1X
+Native ORC Vectorized                               610            643          24         17.2          58.2       2.7X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                     893            933          35         17.6          56.8       1.0X
-Data column - Native ORC MR                        1154           1159           6         13.6          73.4       0.8X
-Data column - Native ORC Vectorized                  97            123          30        161.6           6.2       9.2X
-Partition column - Hive built-in ORC                702            719          22         22.4          44.7       1.3X
-Partition column - Native ORC MR                    653            670          19         24.1          41.5       1.4X
-Partition column - Native ORC Vectorized             34             47          11        456.3           2.2      25.9X
-Both columns - Hive built-in ORC                   1006           1019          20         15.6          63.9       0.9X
-Both columns - Native ORC MR                       1085           1096          15         14.5          69.0       0.8X
-Both columns - Native ORC Vectorized                111            140          26        142.2           7.0       8.1X
+Data column - Hive built-in ORC                     937            953          14         16.8          59.6       1.0X
+Data column - Native ORC MR                         988           1040          73         15.9          62.8       0.9X
+Data column - Native ORC Vectorized                  89            107          13        177.2           5.6      10.6X
+Partition column - Hive built-in ORC                640            690          55         24.6          40.7       1.5X
+Partition column - Native ORC MR                    695            708          16         22.6          44.2       1.3X
+Partition column - Native ORC Vectorized             38             49           9        416.8           2.4      24.8X
+Both columns - Hive built-in ORC                    978           1015          42         16.1          62.2       1.0X
+Both columns - Native ORC MR                       1055           1076          29         14.9          67.1       0.9X
+Both columns - Native ORC Vectorized                102            125          24        153.8           6.5       9.2X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   808            823          15         13.0          77.1       1.0X
-Native ORC MR                                       791            794           4         13.3          75.4       1.0X
-Native ORC Vectorized                               124            137          15         84.4          11.8       6.5X
+Hive built-in ORC                                   928            944          14         11.3          88.5       1.0X
+Native ORC MR                                       711            733          25         14.8          67.8       1.3X
+Native ORC Vectorized                               127            139          19         82.9          12.1       7.3X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1404           1416          17          7.5         133.9       1.0X
-Native ORC MR                                      1275           1283          11          8.2         121.6       1.1X
-Native ORC Vectorized                               310            327          16         33.8          29.6       4.5X
+Hive built-in ORC                                  1539           1597          83          6.8         146.7       1.0X
+Native ORC MR                                      1223           1232          12          8.6         116.7       1.3X
+Native ORC Vectorized                               286            320          27         36.6          27.3       5.4X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1196           1198           4          8.8         114.0       1.0X
-Native ORC MR                                      1182           1182           0          8.9         112.7       1.0X
-Native ORC Vectorized                               346            373          35         30.3          33.0       3.5X
+Hive built-in ORC                                  1381           1397          22          7.6         131.7       1.0X
+Native ORC MR                                      1112           1124          17          9.4         106.0       1.2X
+Native ORC Vectorized                               363            394          30         28.9          34.6       3.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   741            769          25         14.1          70.7       1.0X
-Native ORC MR                                       834            838           5         12.6          79.5       0.9X
-Native ORC Vectorized                               136            175          36         77.2          13.0       5.5X
+Hive built-in ORC                                   733            751          24         14.3          69.9       1.0X
+Native ORC MR                                       742            771          48         14.1          70.8       1.0X
+Native ORC Vectorized                               148            171          26         70.8          14.1       5.0X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   570            588          23          1.8         543.8       1.0X
-Native ORC MR                                        84            102          21         12.5          80.0       6.8X
-Native ORC Vectorized                                29             36           8         35.8          27.9      19.5X
+Hive built-in ORC                                   562            588          25          1.9         536.0       1.0X
+Native ORC MR                                        87            109          15         12.0          83.3       6.4X
+Native ORC Vectorized                                30             37           6         34.9          28.7      18.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1062           1069          10          1.0        1012.4       1.0X
-Native ORC MR                                        91            109          21         11.5          87.2      11.6X
-Native ORC Vectorized                                37             48           8         28.3          35.4      28.6X
+Hive built-in ORC                                  1022           1040          26          1.0         974.3       1.0X
+Native ORC MR                                       100            114          11         10.5          95.2      10.2X
+Native ORC Vectorized                                37             44           7         28.6          35.0      27.8X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1593           1665         101          0.7        1519.1       1.0X
-Native ORC MR                                       101            110           9         10.4          96.2      15.8X
-Native ORC Vectorized                                45             52           6         23.2          43.1      35.3X
+Hive built-in ORC                                  1522           1617         134          0.7        1451.1       1.0X
+Native ORC MR                                       104            114           9         10.1          99.4      14.6X
+Native ORC Vectorized                                49             65          12         21.4          46.7      31.1X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   290            350          48          3.6         276.9       1.0X
-Native ORC MR                                       225            243          25          4.7         215.0       1.3X
-Native ORC Vectorized                                97            109          20         10.8          92.3       3.0X
+Hive built-in ORC                                   285            321          35          3.7         272.0       1.0X
+Native ORC MR                                       208            274          55          5.1         198.0       1.4X
+Native ORC Vectorized                                97            119          25         10.8          92.8       2.9X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   2077           2114          52          0.5        1981.2       1.0X
-Native ORC MR                                       1778           1786          12          0.6        1695.4       1.2X
-Native ORC Vectorized                                893            941          45          1.2         851.8       2.3X
+Hive built-in ORC                                   1963           2005          59          0.5        1871.9       1.0X
+Native ORC MR                                       1612           1677          92          0.7        1537.5       1.2X
+Native ORC Vectorized                                859            944          92          1.2         819.4       2.3X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   6108           6135          39          0.2        5824.6       1.0X
-Native ORC MR                                       5695           5742          66          0.2        5431.5       1.1X
-Native ORC Vectorized                               5662           5701          55          0.2        5399.8       1.1X
+Hive built-in ORC                                   5793           5868         107          0.2        5524.2       1.0X
+Native ORC MR                                       5247           5321         105          0.2        5003.5       1.1X
+Native ORC Vectorized                               5404           5425          30          0.2        5153.5       1.1X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  12790          12832          60          0.1       12197.3       1.0X
-Native ORC MR                                      12987          13006          27          0.1       12385.1       1.0X
-Native ORC Vectorized                              12870          12946         107          0.1       12274.1       1.0X
+Hive built-in ORC                                  12664          12690          37          0.1       12077.5       1.0X
+Native ORC MR                                      12398          12513         162          0.1       11823.9       1.0X
+Native ORC Vectorized                              12552          12553           1          0.1       11970.4       1.0X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        1907           1949          59          0.5        1818.9       1.0X
-Native ORC MR                                            1645           1678          46          0.6        1569.2       1.2X
-Native ORC Vectorized                                     549            566          26          1.9         523.8       3.5X
+Hive built-in ORC                                        1981           2003          30          0.5        1889.3       1.0X
+Native ORC MR                                            2095           2133          54          0.5        1997.9       0.9X
+Native ORC Vectorized                                     564            605          45          1.9         537.6       3.5X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        5234           5237           4          0.2        4991.9       1.0X
-Native ORC MR                                            3998           4042          63          0.3        3812.4       1.3X
-Native ORC Vectorized                                    1489           1494           7          0.7        1420.4       3.5X
+Hive built-in ORC                                        5412           5426          21          0.2        5161.0       1.0X
+Native ORC MR                                            4556           4639         117          0.2        4345.2       1.2X
+Native ORC Vectorized                                    1478           1506          39          0.7        1409.7       3.7X
 
-OpenJDK 64-Bit Server VM 21.0.3+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 21.0.4+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        5144           5174          42          0.2        4905.7       1.0X
-Native ORC MR                                            4441           4510          99          0.2        4234.9       1.2X
-Native ORC Vectorized                                    1793           1877         118          0.6        1710.3       2.9X
+Hive built-in ORC                                        5018           5079          87          0.2        4785.1       1.0X
+Native ORC MR                                            5380           5388          11          0.2        5130.5       0.9X
+Native ORC Vectorized                                    1975           2012          52          0.5        1883.8       2.5X
 
 
diff --git a/sql/hive/benchmarks/OrcReadBenchmark-results.txt b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
index 7cdd02dbb8129..64d738858b1a2 100644
--- a/sql/hive/benchmarks/OrcReadBenchmark-results.txt
+++ b/sql/hive/benchmarks/OrcReadBenchmark-results.txt
@@ -2,221 +2,221 @@
 SQL Single Numeric Column Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single TINYINT Column Scan:           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   686            697          18         22.9          43.6       1.0X
-Native ORC MR                                       792            845          62         19.9          50.4       0.9X
-Native ORC Vectorized                                82             99          13        192.4           5.2       8.4X
+Hive built-in ORC                                   724            754          46         21.7          46.0       1.0X
+Native ORC MR                                       838            865          38         18.8          53.3       0.9X
+Native ORC Vectorized                                83            104          10        188.5           5.3       8.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single SMALLINT Column Scan:          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   785            805          25         20.0          49.9       1.0X
-Native ORC MR                                       810            833          36         19.4          51.5       1.0X
-Native ORC Vectorized                                92            115          18        171.8           5.8       8.6X
+Hive built-in ORC                                   709            746          43         22.2          45.1       1.0X
+Native ORC MR                                       791            822          28         19.9          50.3       0.9X
+Native ORC Vectorized                                85            101           9        184.6           5.4       8.3X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single INT Column Scan:               Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   827            847          18         19.0          52.6       1.0X
-Native ORC MR                                       870            871           2         18.1          55.3       1.0X
-Native ORC Vectorized                               115            133          15        136.3           7.3       7.2X
+Hive built-in ORC                                   766            777          17         20.5          48.7       1.0X
+Native ORC MR                                       772            801          25         20.4          49.1       1.0X
+Native ORC Vectorized                                89             98           6        177.0           5.7       8.6X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single BIGINT Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   933            951          28         16.9          59.3       1.0X
-Native ORC MR                                       897            908          10         17.5          57.0       1.0X
-Native ORC Vectorized                               113            128          11        139.3           7.2       8.3X
+Hive built-in ORC                                   755            762           6         20.8          48.0       1.0X
+Native ORC MR                                       811            818          10         19.4          51.6       0.9X
+Native ORC Vectorized                                87            101          11        181.7           5.5       8.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single FLOAT Column Scan:             Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   879            882           5         17.9          55.9       1.0X
-Native ORC MR                                       917            935          25         17.2          58.3       1.0X
-Native ORC Vectorized                               151            182          25        104.3           9.6       5.8X
+Hive built-in ORC                                   775            794          18         20.3          49.3       1.0X
+Native ORC MR                                       847            857           9         18.6          53.9       0.9X
+Native ORC Vectorized                               141            157          17        111.6           9.0       5.5X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 SQL Single DOUBLE Column Scan:            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   939            949          12         16.7          59.7       1.0X
-Native ORC MR                                      1016           1039          32         15.5          64.6       0.9X
-Native ORC Vectorized                               248            259          10         63.5          15.7       3.8X
+Hive built-in ORC                                   867            875           7         18.1          55.1       1.0X
+Native ORC MR                                       914            940          22         17.2          58.1       0.9X
+Native ORC Vectorized                               219            232          15         71.8          13.9       4.0X
 
 
 ================================================================================================
 Int and String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Int and String Scan:                      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1577           1591          20          6.7         150.4       1.0X
-Native ORC MR                                      1524           1539          21          6.9         145.3       1.0X
-Native ORC Vectorized                               630            661          24         16.6          60.1       2.5X
+Hive built-in ORC                                  1588           1596          12          6.6         151.4       1.0X
+Native ORC MR                                      1563           1567           6          6.7         149.1       1.0X
+Native ORC Vectorized                               628            676          63         16.7          59.8       2.5X
 
 
 ================================================================================================
 Partitioned Table Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Partitioned Table:                        Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Data column - Hive built-in ORC                    1040           1043           4         15.1          66.1       1.0X
-Data column - Native ORC MR                        1190           1201          15         13.2          75.7       0.9X
-Data column - Native ORC Vectorized                 120            134           8        131.2           7.6       8.7X
-Partition column - Hive built-in ORC                675            682           8         23.3          42.9       1.5X
-Partition column - Native ORC MR                    725            771          40         21.7          46.1       1.4X
-Partition column - Native ORC Vectorized             45             52           6        353.1           2.8      23.4X
-Both columns - Hive built-in ORC                   1049           1078          41         15.0          66.7       1.0X
-Both columns - Native ORC MR                       1238           1321         118         12.7          78.7       0.8X
-Both columns - Native ORC Vectorized                133            153          14        117.8           8.5       7.8X
+Data column - Hive built-in ORC                    1117           1162          63         14.1          71.0       1.0X
+Data column - Native ORC MR                        1293           1306          17         12.2          82.2       0.9X
+Data column - Native ORC Vectorized                  91            103          10        173.7           5.8      12.3X
+Partition column - Hive built-in ORC                717            722           7         21.9          45.6       1.6X
+Partition column - Native ORC MR                    633            673          42         24.8          40.3       1.8X
+Partition column - Native ORC Vectorized             37             50           7        419.5           2.4      29.8X
+Both columns - Hive built-in ORC                    948           1010          69         16.6          60.3       1.2X
+Both columns - Native ORC MR                       1102           1109          10         14.3          70.1       1.0X
+Both columns - Native ORC Vectorized                105            121          13        149.7           6.7      10.6X
 
 
 ================================================================================================
 Repeated String Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Repeated String:                          Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   859            878          22         12.2          81.9       1.0X
-Native ORC MR                                       855            877          21         12.3          81.5       1.0X
-Native ORC Vectorized                               145            161          19         72.1          13.9       5.9X
+Hive built-in ORC                                   904            909           5         11.6          86.2       1.0X
+Native ORC MR                                       804            812           7         13.0          76.7       1.1X
+Native ORC Vectorized                               128            148          19         82.0          12.2       7.1X
 
 
 ================================================================================================
 String with Nulls Scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (0.0%):            Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1465           1465           0          7.2         139.7       1.0X
-Native ORC MR                                      1412           1438          36          7.4         134.7       1.0X
-Native ORC Vectorized                               326            355          25         32.2          31.1       4.5X
+Hive built-in ORC                                  1429           1453          33          7.3         136.3       1.0X
+Native ORC MR                                      1288           1291           4          8.1         122.9       1.1X
+Native ORC Vectorized                               294            300           6         35.7          28.0       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (50.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1270           1275           7          8.3         121.1       1.0X
-Native ORC MR                                      1311           1318           9          8.0         125.0       1.0X
-Native ORC Vectorized                               371            378           5         28.3          35.3       3.4X
+Hive built-in ORC                                  1282           1290          13          8.2         122.2       1.0X
+Native ORC MR                                      1195           1199           7          8.8         113.9       1.1X
+Native ORC Vectorized                               346            382          45         30.3          33.0       3.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 String with Nulls Scan (95.0%):           Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   775            779           3         13.5          73.9       1.0X
-Native ORC MR                                       894            907          16         11.7          85.3       0.9X
-Native ORC Vectorized                               161            178          15         65.3          15.3       4.8X
+Hive built-in ORC                                   771            803          29         13.6          73.5       1.0X
+Native ORC MR                                       776            784          12         13.5          74.0       1.0X
+Native ORC Vectorized                               149            166          13         70.4          14.2       5.2X
 
 
 ================================================================================================
 Single Column Scan From Wide Columns
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 100 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   447            494          41          2.3         426.4       1.0X
-Native ORC MR                                       106            119          15          9.9         100.8       4.2X
-Native ORC Vectorized                                40             51           9         26.1          38.3      11.1X
+Hive built-in ORC                                   400            431          29          2.6         381.3       1.0X
+Native ORC MR                                        89            102          11         11.8          84.9       4.5X
+Native ORC Vectorized                                32             38           6         33.3          30.1      12.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 200 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   772            804          33          1.4         736.2       1.0X
-Native ORC MR                                       114            124          10          9.2         108.7       6.8X
-Native ORC Vectorized                                49             59           8         21.5          46.5      15.8X
+Hive built-in ORC                                   742            748           8          1.4         707.9       1.0X
+Native ORC MR                                        95            108          12         11.0          90.6       7.8X
+Native ORC Vectorized                                38             44           5         27.8          36.0      19.7X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Column Scan from 300 columns:      Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  1197           1206          12          0.9        1141.7       1.0X
-Native ORC MR                                       131            151          18          8.0         125.1       9.1X
-Native ORC Vectorized                                61             72           8         17.3          58.0      19.7X
+Hive built-in ORC                                  1056           1100          62          1.0        1007.5       1.0X
+Native ORC MR                                       104            114           8         10.1          99.4      10.1X
+Native ORC Vectorized                                47             54           5         22.5          44.5      22.7X
 
 
 ================================================================================================
 Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   480            491          11          2.2         457.4       1.0X
-Native ORC MR                                       366            387          20          2.9         348.7       1.3X
-Native ORC Vectorized                               152            175          13          6.9         145.1       3.2X
+Hive built-in ORC                                   434            451          12          2.4         413.9       1.0X
+Native ORC MR                                       273            294          14          3.8         260.8       1.6X
+Native ORC Vectorized                               104            139          22         10.0          99.6       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 100 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                   3607           3669          89          0.3        3439.5       1.0X
-Native ORC MR                                       1890           1921          43          0.6        1802.9       1.9X
-Native ORC Vectorized                               1259           1311          74          0.8        1200.6       2.9X
+Hive built-in ORC                                   2530           2556          38          0.4        2412.6       1.0X
+Native ORC MR                                       1530           1598          97          0.7        1458.7       1.7X
+Native ORC Vectorized                                802            891          89          1.3         764.7       3.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 300 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  11342          11426         119          0.1       10816.1       1.0X
-Native ORC MR                                       6475           6524          68          0.2        6175.5       1.8X
-Native ORC Vectorized                               6379           6408          41          0.2        6083.8       1.8X
+Hive built-in ORC                                   7576           7591          20          0.1        7225.4       1.0X
+Native ORC MR                                       5344           5377          47          0.2        5096.4       1.4X
+Native ORC Vectorized                               5351           5375          35          0.2        5102.9       1.4X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Single Struct Column Scan with 600 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 -------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                  24544          24920         532          0.0       23406.9       1.0X
-Native ORC MR                                      15124          15472         492          0.1       14423.6       1.6X
-Native ORC Vectorized                              15066          15264         280          0.1       14368.4       1.6X
+Hive built-in ORC                                  16196          16239          60          0.1       15446.2       1.0X
+Native ORC MR                                      12920          12974          76          0.1       12321.6       1.3X
+Native ORC Vectorized                              12604          12735         185          0.1       12019.9       1.3X
 
 
 ================================================================================================
 Nested Struct scan
 ================================================================================================
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        3951           3965          19          0.3        3768.2       1.0X
-Native ORC MR                                            2319           2417         139          0.5        2211.5       1.7X
-Native ORC Vectorized                                     743            769          27          1.4         708.8       5.3X
+Hive built-in ORC                                        2614           2637          32          0.4        2493.1       1.0X
+Native ORC MR                                            2025           2027           2          0.5        1931.2       1.3X
+Native ORC Vectorized                                     629            638          10          1.7         599.7       4.2X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 30 Elements, 10 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                       10020          10058          54          0.1        9555.4       1.0X
-Native ORC MR                                            4704           4747          61          0.2        4486.5       2.1X
-Native ORC Vectorized                                    2038           2092          76          0.5        1944.0       4.9X
+Hive built-in ORC                                        7193           7232          55          0.1        6860.0       1.0X
+Native ORC MR                                            4480           4694         302          0.2        4272.6       1.6X
+Native ORC Vectorized                                    1453           1458           6          0.7        1386.2       4.9X
 
-OpenJDK 64-Bit Server VM 17.0.11+9-LTS on Linux 6.5.0-1018-azure
+OpenJDK 64-Bit Server VM 17.0.12+7-LTS on Linux 6.5.0-1025-azure
 AMD EPYC 7763 64-Core Processor
 Nested Struct Scan with 10 Elements, 30 Fields:  Best Time(ms)   Avg Time(ms)   Stdev(ms)    Rate(M/s)   Per Row(ns)   Relative
 ------------------------------------------------------------------------------------------------------------------------------
-Hive built-in ORC                                        9520           9564          62          0.1        9079.2       1.0X
-Native ORC MR                                            5648           5669          31          0.2        5386.2       1.7X
-Native ORC Vectorized                                    3237           3253          22          0.3        3087.2       2.9X
+Hive built-in ORC                                        6660           6679          27          0.2        6351.1       1.0X
+Native ORC MR                                            5078           5085           9          0.2        4842.7       1.3X
+Native ORC Vectorized                                    1762           1793          43          0.6        1680.6       3.8X
 
 
diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml
index 5a325f5f56bfc..dc0f88beeab85 100644
--- a/sql/hive/pom.xml
+++ b/sql/hive/pom.xml
@@ -237,7 +237,11 @@
         <artifactId>scalatest-maven-plugin</artifactId>
         <configuration>
           <!-- Specially disable assertions since some Hive tests fail them -->
-          <argLine>-da -Xmx4g -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}</argLine>
+          <!--
+            SPARK-45265: The value for `-Xss` should be consistent with the configuration value
+            for `Test / javaOptions` in `Hive.settings` within `SparkBuild.scala`.
+          -->
+          <argLine>-da -Xmx4g -Xss64m -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraJavaTestArgs}</argLine>
         </configuration>
       </plugin>
       <plugin>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
index 77ed81482396b..db28352635775 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveExternalCatalog.scala
@@ -1030,7 +1030,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
     }
     val metaStoreParts = partsWithLocation
       .map(p => p.copy(spec = toMetaStorePartitionSpec(p.spec)))
-    client.createPartitions(db, table, metaStoreParts, ignoreIfExists)
+    client.createPartitions(tableMeta, metaStoreParts, ignoreIfExists)
   }
 
   override def dropPartitions(
@@ -1108,7 +1108,7 @@ private[spark] class HiveExternalCatalog(conf: SparkConf, hadoopConf: Configurat
         val actualPartitionPath = new Path(currentFullPath, actualPartitionString)
         try {
           fs.mkdirs(expectedPartitionPath)
-          if(!fs.rename(actualPartitionPath, expectedPartitionPath)) {
+          if (!fs.rename(actualPartitionPath, expectedPartitionPath)) {
             throw new IOException(s"Renaming partition path from $actualPartitionPath to " +
               s"$expectedPartitionPath returned false")
           }
@@ -1444,6 +1444,7 @@ object HiveExternalCatalog {
   private[spark] def isHiveCompatibleDataType(dt: DataType): Boolean = dt match {
     case _: AnsiIntervalType => false
     case _: TimestampNTZType => false
+    case _: VariantType => false
     case s: StructType => s.forall(f => isHiveCompatibleDataType(f.dataType))
     case a: ArrayType => isHiveCompatibleDataType(a.elementType)
     case m: MapType =>
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
index 60858089875a2..5947309b87983 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveMetastoreCatalog.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.catalyst.{QualifiedTableName, TableIdentifier}
 import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.plans.logical._
 import org.apache.spark.sql.catalyst.types.DataTypeUtils
+import org.apache.spark.sql.connector.catalog.CatalogManager
 import org.apache.spark.sql.execution.datasources._
 import org.apache.spark.sql.execution.datasources.parquet.{ParquetFileFormat, ParquetOptions}
 import org.apache.spark.sql.internal.SQLConf
@@ -67,6 +68,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
   private[hive] def getCachedDataSourceTable(table: TableIdentifier): LogicalPlan = {
     val key = QualifiedTableName(
       // scalastyle:off caselocale
+      table.catalog.getOrElse(CatalogManager.SESSION_CATALOG_NAME).toLowerCase,
       table.database.getOrElse(sessionState.catalog.getCurrentDatabase).toLowerCase,
       table.table.toLowerCase)
       // scalastyle:on caselocale
@@ -82,7 +84,7 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
 
     catalogProxy.getCachedTable(tableIdentifier) match {
       case null => None // Cache miss
-      case logical @ LogicalRelation(relation: HadoopFsRelation, _, _, _) =>
+      case logical @ LogicalRelationWithTable(relation: HadoopFsRelation, _) =>
         val cachedRelationFileFormatClass = relation.fileFormat.getClass
 
         expectedFileFormat match {
@@ -199,8 +201,8 @@ private[hive] class HiveMetastoreCatalog(sparkSession: SparkSession) extends Log
       fileType: String,
       isWrite: Boolean): LogicalRelation = {
     val metastoreSchema = relation.tableMeta.schema
-    val tableIdentifier =
-      QualifiedTableName(relation.tableMeta.database, relation.tableMeta.identifier.table)
+    val tableIdentifier = QualifiedTableName(relation.tableMeta.identifier.catalog.get,
+      relation.tableMeta.database, relation.tableMeta.identifier.table)
 
     val lazyPruningEnabled = sparkSession.sessionState.conf.manageFilesourcePartitions
     val tablePath = new Path(relation.tableMeta.location)
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
index 979ff1e24ef5c..dbeb8607facc2 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveSessionStateBuilder.scala
@@ -25,7 +25,7 @@ import org.apache.hadoop.hive.ql.exec.{UDAF, UDF}
 import org.apache.hadoop.hive.ql.udf.generic.{AbstractGenericUDAFResolver, GenericUDF, GenericUDTF}
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, ReplaceCharWithVarchar, ResolveSessionCatalog}
+import org.apache.spark.sql.catalyst.analysis.{Analyzer, EvalSubqueriesForTimeTravel, InvokeProcedures, ReplaceCharWithVarchar, ResolveSessionCatalog, ResolveTranspose}
 import org.apache.spark.sql.catalyst.catalog.{ExternalCatalogWithListener, InvalidUDFClassException}
 import org.apache.spark.sql.catalyst.expressions.Expression
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
@@ -94,6 +94,8 @@ class HiveSessionStateBuilder(
         ResolveWriteToStream +:
         new EvalSubqueriesForTimeTravel +:
         new DetermineTableStats(session) +:
+        new ResolveTranspose(session) +:
+        new InvokeProcedures(session) +:
         customResolutionRules
 
     override val postHocResolutionRules: Seq[Rule[LogicalPlan]] =
@@ -113,7 +115,6 @@ class HiveSessionStateBuilder(
         PreReadCheck +:
         TableCapabilityCheck +:
         CommandCheck +:
-        CollationCheck +:
         ViewSyncSchemaToMetaStore +:
         customCheckRules
   }
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
index e74cc088a1f66..87ce809914e10 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.rules.Rule
 import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution._
 import org.apache.spark.sql.execution.command.{CreateTableCommand, DDLUtils, InsertIntoDataSourceDirCommand}
-import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{CreateTable, DataSourceStrategy, HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.hive.execution._
 import org.apache.spark.sql.hive.execution.HiveScriptTransformationExec
 import org.apache.spark.sql.hive.execution.InsertIntoHiveTable.BY_CTAS
@@ -240,9 +240,8 @@ case class RelationConversions(
           query, overwrite, ifPartitionNotExists, byName)
 
       // Read path
-      case relation: HiveTableRelation
-          if DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation) =>
-        metastoreCatalog.convert(relation, isWrite = false)
+      case relation: HiveTableRelation if doConvertHiveTableRelationForRead(relation) =>
+        convertHiveTableRelationForRead(relation)
 
       // CTAS path
       // This `InsertIntoHiveTable` is derived from `CreateHiveTableAsSelectCommand`,
@@ -258,7 +257,7 @@ case class RelationConversions(
         DDLUtils.checkTableColumns(tableDesc.copy(schema = query.schema))
         val hiveTable = DDLUtils.readHiveTable(tableDesc)
         val hadoopRelation = metastoreCatalog.convert(hiveTable, isWrite = true) match {
-          case LogicalRelation(t: HadoopFsRelation, _, _, _) => t
+          case LogicalRelationWithTable(t: HadoopFsRelation, _) => t
           case _ => throw QueryCompilationErrors.tableIdentifierNotConvertedToHadoopFsRelationError(
             tableDesc.identifier)
         }
@@ -287,6 +286,15 @@ case class RelationConversions(
           convertProvider(storage), query, overwrite)
     }
   }
+
+  private[hive] def doConvertHiveTableRelationForRead(relation: HiveTableRelation): Boolean = {
+    DDLUtils.isHiveTable(relation.tableMeta) && isConvertible(relation)
+  }
+
+  private[hive] def convertHiveTableRelationForRead(
+      relation: HiveTableRelation): LogicalRelation = {
+    metastoreCatalog.convert(relation, isWrite = false)
+  }
 }
 
 private[hive] trait HiveStrategies {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
index 30201dcee552d..478f486eeb213 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveUtils.scala
@@ -74,8 +74,9 @@ private[spark] object HiveUtils extends Logging {
 
   val HIVE_METASTORE_VERSION = buildStaticConf("spark.sql.hive.metastore.version")
     .doc("Version of the Hive metastore. Available options are " +
-        "<code>2.0.0</code> through <code>2.3.10</code> and " +
-        "<code>3.0.0</code> through <code>3.1.3</code>.")
+      "<code>2.0.0</code> through <code>2.3.10</code>, " +
+      "<code>3.0.0</code> through <code>3.1.3</code> and " +
+      "<code>4.0.0</code> through <code>4.0.1</code>.")
     .version("1.4.0")
     .stringConf
     .checkValue(isCompatibleHiveVersion, "Unsupported Hive Metastore version")
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
index ff225e7a50f0d..3d89f31e1965b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClient.scala
@@ -164,8 +164,7 @@ private[hive] trait HiveClient {
    * Create one or many partitions in the given table.
    */
   def createPartitions(
-      db: String,
-      table: String,
+      table: CatalogTable,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
index 735814c9ae084..ba03b7fe3cee1 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveClientImpl.scala
@@ -17,7 +17,7 @@
 
 package org.apache.spark.sql.hive.client
 
-import java.io.PrintStream
+import java.io.{OutputStream, PrintStream}
 import java.lang.{Iterable => JIterable}
 import java.lang.reflect.InvocationTargetException
 import java.nio.charset.StandardCharsets.UTF_8
@@ -28,6 +28,7 @@ import scala.collection.mutable
 import scala.collection.mutable.ArrayBuffer
 import scala.jdk.CollectionConverters._
 
+import org.apache.commons.lang3.exception.ExceptionUtils
 import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.common.StatsSetupConst
@@ -44,7 +45,7 @@ import org.apache.hadoop.hive.serde2.MetadataTypedColumnsetSerDe
 import org.apache.hadoop.hive.serde2.`lazy`.LazySimpleSerDe
 import org.apache.hadoop.security.UserGroupInformation
 
-import org.apache.spark.{SparkConf, SparkException}
+import org.apache.spark.{SparkConf, SparkException, SparkThrowable}
 import org.apache.spark.deploy.SparkHadoopUtil.SOURCE_SPARK
 import org.apache.spark.internal.{Logging, LogKeys, MDC}
 import org.apache.spark.internal.LogKeys._
@@ -121,6 +122,7 @@ private[hive] class HiveClientImpl(
     case hive.v2_3 => new Shim_v2_3()
     case hive.v3_0 => new Shim_v3_0()
     case hive.v3_1 => new Shim_v3_1()
+    case hive.v4_0 => new Shim_v4_0()
   }
 
   // Create an internal session state for this HiveClientImpl.
@@ -177,8 +179,10 @@ private[hive] class HiveClientImpl(
     // got changed. We reset it to clientLoader.ClassLoader here.
     state.getConf.setClassLoader(clientLoader.classLoader)
     shim.setCurrentSessionState(state)
-    state.out = new PrintStream(outputBuffer, true, UTF_8.name())
-    state.err = new PrintStream(outputBuffer, true, UTF_8.name())
+    val clz = state.getClass.getField("out").getType.asInstanceOf[Class[_ <: PrintStream]]
+    val ctor = clz.getConstructor(classOf[OutputStream], classOf[Boolean], classOf[String])
+    state.getClass.getField("out").set(state, ctor.newInstance(outputBuffer, true, UTF_8.name()))
+    state.getClass.getField("err").set(state, ctor.newInstance(outputBuffer, true, UTF_8.name()))
     state
   }
 
@@ -307,15 +311,27 @@ private[hive] class HiveClientImpl(
   }
 
   def setOut(stream: PrintStream): Unit = withHiveState {
-    state.out = stream
+    val ctor = state.getClass.getField("out")
+      .getType
+      .asInstanceOf[Class[_ <: PrintStream]]
+      .getConstructor(classOf[OutputStream])
+    state.getClass.getField("out").set(state, ctor.newInstance(stream))
   }
 
   def setInfo(stream: PrintStream): Unit = withHiveState {
-    state.info = stream
+    val ctor = state.getClass.getField("info")
+      .getType
+      .asInstanceOf[Class[_ <: PrintStream]]
+      .getConstructor(classOf[OutputStream])
+    state.getClass.getField("info").set(state, ctor.newInstance(stream))
   }
 
   def setError(stream: PrintStream): Unit = withHiveState {
-    state.err = stream
+    val ctor = state.getClass.getField("err")
+      .getType
+      .asInstanceOf[Class[_ <: PrintStream]]
+      .getConstructor(classOf[OutputStream])
+    state.getClass.getField("err").set(state, ctor.newInstance(stream))
   }
 
   private def setCurrentDatabaseRaw(db: String): Unit = {
@@ -629,21 +645,22 @@ private[hive] class HiveClientImpl(
   }
 
   override def createPartitions(
-      db: String,
-      table: String,
+      table: CatalogTable,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit = withHiveState {
     def replaceExistException(e: Throwable): Unit = e match {
       case _: HiveException if e.getCause.isInstanceOf[AlreadyExistsException] =>
-        val hiveTable = client.getTable(db, table)
+        val db = table.identifier.database.getOrElse(state.getCurrentDatabase)
+        val tableName = table.identifier.table
+        val hiveTable = client.getTable(db, tableName)
         val existingParts = parts.filter { p =>
           shim.getPartitions(client, hiveTable, p.spec.asJava).nonEmpty
         }
-        throw new PartitionsAlreadyExistException(db, table, existingParts.map(_.spec))
+        throw new PartitionsAlreadyExistException(db, tableName, existingParts.map(_.spec))
       case _ => throw e
     }
     try {
-      shim.createPartitions(client, db, table, parts, ignoreIfExists)
+      shim.createPartitions(client, toHiveTable(table), parts, ignoreIfExists)
     } catch {
       case e: InvocationTargetException => replaceExistException(e.getCause)
       case e: Throwable => replaceExistException(e)
@@ -861,11 +878,22 @@ private[hive] class HiveClientImpl(
       // Since HIVE-18238(Hive 3.0.0), the Driver.close function's return type changed
       // and the CommandProcessorFactory.clean function removed.
       driver.getClass.getMethod("close").invoke(driver)
-      if (version != hive.v3_0 && version != hive.v3_1) {
+      if (version != hive.v3_0 && version != hive.v3_1 && version != hive.v4_0) {
         CommandProcessorFactory.clean(conf)
       }
     }
 
+    def getResponseCode(response: CommandProcessorResponse): Int = {
+      if (version < hive.v4_0) {
+        response.getResponseCode
+      } else {
+        // Since Hive 4.0, response code is removed from CommandProcessorResponse.
+        // Here we simply return 0 for the positive cases as for error cases it will
+        // throw exceptions early.
+        0
+      }
+    }
+
     // Hive query needs to start SessionState.
     SessionState.start(state)
     logDebug(s"Running hiveql '$cmd'")
@@ -878,30 +906,44 @@ private[hive] class HiveClientImpl(
       val proc = shim.getCommandProcessor(tokens(0), conf)
       proc match {
         case driver: Driver =>
-          val response: CommandProcessorResponse = driver.run(cmd)
-          // Throw an exception if there is an error in query processing.
-          if (response.getResponseCode != 0) {
+          try {
+            val response: CommandProcessorResponse = driver.run(cmd)
+            if (getResponseCode(response) != 0) {
+              // Throw an exception if there is an error in query processing.
+              // This works for hive 3.x and earlier versions.
+              throw new QueryExecutionException(response.getErrorMessage)
+            }
+            driver.setMaxRows(maxRows)
+            val results = shim.getDriverResults(driver)
+            results
+          } catch {
+            case e @ (_: QueryExecutionException | _: SparkThrowable) =>
+              throw e
+            case e: Exception =>
+              // Wrap the original hive error with QueryExecutionException and throw it
+              // if there is an error in query processing.
+              // This works for hive 4.x and later versions.
+              throw new QueryExecutionException(ExceptionUtils.getStackTrace(e))
+          } finally {
             closeDriver(driver)
-            throw new QueryExecutionException(response.getErrorMessage)
           }
-          driver.setMaxRows(maxRows)
-
-          val results = shim.getDriverResults(driver)
-          closeDriver(driver)
-          results
 
         case _ =>
-          if (state.out != null) {
+          val out = state.getClass.getField("out").get(state)
+          if (out != null) {
             // scalastyle:off println
-            state.out.println(tokens(0) + " " + cmd_1)
+            out.asInstanceOf[PrintStream].println(tokens(0) + " " + cmd_1)
             // scalastyle:on println
           }
           val response: CommandProcessorResponse = proc.run(cmd_1)
-          // Throw an exception if there is an error in query processing.
-          if (response.getResponseCode != 0) {
+          val responseCode = getResponseCode(response)
+          if (responseCode != 0) {
+            // Throw an exception if there is an error in query processing.
+            // This works for hive 3.x and earlier versions. For 4.x and later versions,
+            // It will go to the catch block directly.
             throw new QueryExecutionException(response.getErrorMessage)
           }
-          Seq(response.getResponseCode.toString)
+          Seq(responseCode.toString)
       }
     } catch {
       case e: Exception =>
@@ -971,7 +1013,7 @@ private[hive] class HiveClientImpl(
       partSpec,
       replace,
       numDP,
-      listBucketingEnabled = hiveTable.isStoredAsSubDirectories)
+      hiveTable)
   }
 
   override def createFunction(db: String, func: CatalogFunction): Unit = withHiveState {
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
index c03fed4cc3184..b17b68ad99592 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/HiveShim.scala
@@ -25,6 +25,7 @@ import java.util.concurrent.TimeUnit
 import scala.jdk.CollectionConverters._
 import scala.util.control.NonFatal
 
+import org.apache.hadoop.conf.Configuration
 import org.apache.hadoop.fs.Path
 import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.{IMetaStoreClient, PartitionDropOptions, TableType}
@@ -32,7 +33,7 @@ import org.apache.hadoop.hive.metastore.api.{Database, EnvironmentContext, Funct
 import org.apache.hadoop.hive.ql.Driver
 import org.apache.hadoop.hive.ql.io.AcidUtils
 import org.apache.hadoop.hive.ql.metadata.{Hive, HiveException, Partition, Table}
-import org.apache.hadoop.hive.ql.plan.AddPartitionDesc
+import org.apache.hadoop.hive.ql.plan.{AddPartitionDesc, DynamicPartitionCtx}
 import org.apache.hadoop.hive.ql.processors.{CommandProcessor, CommandProcessorFactory}
 import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde.serdeConstants
@@ -42,15 +43,16 @@ import org.apache.spark.internal.LogKeys.{CONFIG, CONFIG2, CONFIG3}
 import org.apache.spark.metrics.source.HiveCatalogMetrics
 import org.apache.spark.sql.catalyst.{FunctionIdentifier, InternalRow}
 import org.apache.spark.sql.catalyst.analysis.NoSuchPermanentFunctionException
-import org.apache.spark.sql.catalyst.catalog.{CatalogFunction, CatalogTable, CatalogTablePartition, CatalogUtils, ExternalCatalogUtils, FunctionResource, FunctionResourceType}
+import org.apache.spark.sql.catalyst.catalog._
 import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
 import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.util.{CharVarcharUtils, DateFormatter, TypeUtils}
 import org.apache.spark.sql.errors.{QueryCompilationErrors, QueryExecutionErrors}
 import org.apache.spark.sql.execution.datasources.PartitioningUtils
 import org.apache.spark.sql.internal.SQLConf
-import org.apache.spark.sql.types.{AtomicType, DateType, IntegralType, IntegralTypeExpression, StringType}
+import org.apache.spark.sql.types._
 import org.apache.spark.unsafe.types.UTF8String
+import org.apache.spark.util.Utils
 
 /**
  * A shim that defines the interface between [[HiveClientImpl]] and the underlying Hive library used
@@ -161,8 +163,7 @@ private[client] sealed abstract class Shim {
 
   def createPartitions(
       hive: Hive,
-      dbName: String,
-      tableName: String,
+      table: Table,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit
 
@@ -196,7 +197,7 @@ private[client] sealed abstract class Shim {
       partSpec: JMap[String, String],
       replace: Boolean,
       numDP: Int,
-      listBucketingEnabled: Boolean): Unit
+      hiveTable: Table): Unit
 
   def createFunction(hive: Hive, db: String, func: CatalogFunction): Unit
 
@@ -258,6 +259,8 @@ private[client] class Shim_v2_0 extends Shim with Logging {
   // txnId can be 0 unless isAcid == true
   protected lazy val txnIdInLoadDynamicPartitions: JLong = 0L
 
+  protected lazy val wildcard: String = ".*"
+
   override def getMSC(hive: Hive): IMetaStoreClient = hive.getMSC
 
   private lazy val loadPartitionMethod =
@@ -318,11 +321,10 @@ private[client] class Shim_v2_0 extends Shim with Logging {
 
   override def createPartitions(
       hive: Hive,
-      dbName: String,
-      tableName: String,
+      table: Table,
       parts: Seq[CatalogTablePartition],
       ignoreIfExists: Boolean): Unit = {
-    val addPartitionDesc = new AddPartitionDesc(dbName, tableName, ignoreIfExists)
+    val addPartitionDesc = new AddPartitionDesc(table.getDbName, table.getTableName, ignoreIfExists)
     parts.zipWithIndex.foreach { case (s, i) =>
       addPartitionDesc.addPartition(
         s.spec.asJava, s.storage.locationUri.map(CatalogUtils.URIToString).orNull)
@@ -504,8 +506,9 @@ private[client] class Shim_v2_0 extends Shim with Logging {
       partSpec: JMap[String, String],
       replace: Boolean,
       numDP: Int,
-      listBucketingEnabled: Boolean): Unit = {
+      hiveTable: Table): Unit = {
     recordHiveCall()
+    val listBucketingEnabled = hiveTable.isStoredAsSubDirectories
     loadDynamicPartitionsMethod.invoke(hive, loadPath, tableName, partSpec, replace: JBoolean,
       numDP: JInteger, listBucketingEnabled: JBoolean, isAcid, txnIdInLoadDynamicPartitions)
   }
@@ -819,13 +822,13 @@ private[client] class Shim_v2_0 extends Shim with Logging {
         Some(s"$value ${op.symbol} $name")
 
       case Contains(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
-        Some(s"$name like " + (("\".*" + value.drop(1)).dropRight(1) + ".*\""))
+        Some(s"$name like " + (("\"" + wildcard + value.drop(1)).dropRight(1) + wildcard + "\""))
 
       case StartsWith(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
-        Some(s"$name like " + (value.dropRight(1) + ".*\""))
+        Some(s"$name like " + (value.dropRight(1) + wildcard + "\""))
 
       case EndsWith(ExtractAttribute(SupportedAttribute(name)), ExtractableLiteral(value)) =>
-        Some(s"$name like " + ("\".*" + value.drop(1)))
+        Some(s"$name like " + ("\"" + wildcard + value.drop(1)))
 
       case And(expr1, expr2) if useAdvanced =>
         val converted = convert(expr1) ++ convert(expr2)
@@ -1096,8 +1099,9 @@ private[client] class Shim_v2_1 extends Shim_v2_0 {
       partSpec: JMap[String, String],
       replace: Boolean,
       numDP: Int,
-      listBucketingEnabled: Boolean): Unit = {
+      hiveTable: Table): Unit = {
     recordHiveCall()
+    val listBucketingEnabled = hiveTable.isStoredAsSubDirectories
     loadDynamicPartitionsMethod.invoke(hive, loadPath, tableName, partSpec, replace: JBoolean,
       numDP: JInteger, listBucketingEnabled: JBoolean, isAcid, txnIdInLoadDynamicPartitions,
       hasFollowingStatsTask, AcidUtils.Operation.NOT_ACID)
@@ -1262,7 +1266,7 @@ private[client] class Shim_v3_0 extends Shim_v2_3 {
       partSpec: JMap[String, String],
       replace: Boolean,
       numDP: Int,
-      listBucketingEnabled: Boolean): Unit = {
+      hiveTable: Table): Unit = {
     val loadFileType = if (replace) {
       clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
     } else {
@@ -1278,3 +1282,261 @@ private[client] class Shim_v3_0 extends Shim_v2_3 {
 }
 
 private[client] class Shim_v3_1 extends Shim_v3_0
+
+private[client] class Shim_v4_0 extends Shim_v3_1 {
+  private lazy val clazzLoadFileType = getClass.getClassLoader.loadClass(
+    "org.apache.hadoop.hive.ql.plan.LoadTableDesc$LoadFileType")
+  private lazy val clazzLoadTableDesc = getClass.getClassLoader.loadClass(
+    "org.apache.hadoop.hive.ql.plan.LoadTableDesc")
+  private lazy val clazzPartitionDetails =
+    Utils.classForName("org.apache.hadoop.hive.ql.exec.Utilities$PartitionDesc")
+
+  override protected lazy val wildcard: String = "%"
+
+  private lazy val alterTableMethod =
+    findMethod(
+      classOf[Hive],
+      "alterTable",
+      classOf[String],
+      classOf[Table],
+      classOf[EnvironmentContext],
+      JBoolean.TYPE)
+  private lazy val loadTableMethod =
+    findMethod(
+      classOf[Hive],
+      "loadTable",
+      classOf[Path],
+      classOf[String],
+      clazzLoadFileType,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      classOf[JLong],
+      JInteger.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE)
+  private lazy val addPartitionsMethod =
+    findMethod(
+      classOf[Hive],
+      "addPartitions",
+      classOf[JList[Partition]],
+      JBoolean.TYPE,
+      JBoolean.TYPE) // needResults
+  private lazy val alterPartitionsMethod =
+    findMethod(
+      classOf[Hive],
+      "alterPartitions",
+      classOf[String],
+      classOf[JList[Partition]],
+      classOf[EnvironmentContext],
+      JBoolean.TYPE)
+  private lazy val loadPartitionMethod =
+    findMethod(
+      classOf[Hive],
+      "loadPartition",
+      classOf[Path],
+      classOf[Table],
+      classOf[JMap[String, String]],
+      clazzLoadFileType,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE,
+      classOf[JLong],
+      JInteger.TYPE,
+      JBoolean.TYPE,
+      JBoolean.TYPE)
+  private lazy val loadDynamicPartitionsMethod =
+    findMethod(
+      classOf[Hive],
+      "loadDynamicPartitions",
+      clazzLoadTableDesc,
+      JInteger.TYPE, // numLB number of buckets
+      JBoolean.TYPE, // isAcid true if this is an ACID operation
+      JLong.TYPE, // writeId, can be 0 unless isAcid == true
+      JInteger.TYPE, // stmtId
+      JBoolean.TYPE, // resetStatistics
+      classOf[AcidUtils.Operation],
+      classOf[JMap[Path, clazzPartitionDetails.type]])
+  private lazy val renamePartitionMethod =
+    findMethod(
+      classOf[Hive],
+      "renamePartition",
+      classOf[Table],
+      classOf[JMap[String, String]],
+      classOf[Partition],
+      JLong.TYPE)
+
+  override def alterTable(hive: Hive, tableName: String, table: Table): Unit = {
+    recordHiveCall()
+    val transactional = false
+    alterTableMethod.invoke(
+      hive,
+      tableName,
+      table,
+      environmentContextInAlterTable,
+      transactional: JBoolean
+    )
+  }
+
+  override def loadTable(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      replace: Boolean,
+      isSrcLocal: Boolean): Unit = {
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+    recordHiveCall()
+    val resetStatistics = false
+    val isDirectInsert = false
+    loadTableMethod.invoke(
+      hive,
+      loadPath,
+      tableName,
+      loadFileType.get,
+      isSrcLocal: JBoolean,
+      isSkewedStoreAsSubdir,
+      isAcidIUDoperation,
+      resetStatistics,
+      writeIdInLoadTableOrPartition,
+      stmtIdInLoadTableOrPartition: JInteger,
+      replace: JBoolean,
+      isDirectInsert: JBoolean)
+  }
+
+  override def alterPartitions(
+      hive: Hive,
+      tableName: String,
+      newParts: JList[Partition]): Unit = {
+    recordHiveCall()
+    val transactional: JBoolean = false
+    alterPartitionsMethod.invoke(
+      hive,
+      tableName,
+      newParts,
+      environmentContextInAlterTable,
+      transactional)
+  }
+
+  override def createPartitions(
+      hive: Hive,
+      table: Table,
+      parts: Seq[CatalogTablePartition],
+      ignoreIfExists: Boolean): Unit = {
+    val partitions = parts.map(HiveClientImpl.toHivePartition(_, table).getTPartition).asJava
+    recordHiveCall()
+    val needResults = false
+    addPartitionsMethod.invoke(hive, partitions, ignoreIfExists, needResults: JBoolean)
+  }
+
+  override def loadPartition(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      partSpec: JMap[String, String],
+      replace: Boolean,
+      inheritTableSpecs: Boolean,
+      isSkewedStoreAsSubdir: Boolean,
+      isSrcLocal: Boolean): Unit = {
+    recordHiveCall()
+    val table = hive.getTable(tableName)
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+    val inheritLocation: JBoolean = false
+    val isDirectInsert: JBoolean = false
+    recordHiveCall()
+    loadPartitionMethod.invoke(
+      hive,
+      loadPath,
+      table,
+      partSpec,
+      loadFileType.get,
+      inheritTableSpecs: JBoolean,
+      inheritLocation,
+      isSkewedStoreAsSubdir: JBoolean,
+      isSrcLocal: JBoolean,
+      isAcid,
+      hasFollowingStatsTask,
+      writeIdInLoadTableOrPartition,
+      stmtIdInLoadTableOrPartition,
+      replace: JBoolean,
+      isDirectInsert
+    )
+  }
+
+  override def loadDynamicPartitions(
+      hive: Hive,
+      loadPath: Path,
+      tableName: String,
+      partSpec: JMap[String, String],
+      replace: Boolean,
+      numDP: Int,
+      hiveTable: Table): Unit = {
+    import org.apache.hadoop.hive.ql.exec.Utilities
+    val loadFileType = if (replace) {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("REPLACE_ALL"))
+    } else {
+      clazzLoadFileType.getEnumConstants.find(_.toString.equalsIgnoreCase("KEEP_EXISTING"))
+    }
+    assert(loadFileType.isDefined)
+
+    val useAppendForLoad: JBoolean = false
+    val loadTableDesc = clazzLoadTableDesc
+      .getConstructor(
+        classOf[Path],
+        classOf[Table],
+        JBoolean.TYPE,
+        JBoolean.TYPE,
+        classOf[JMap[String, String]])
+      .newInstance(
+        loadPath,
+        hiveTable,
+        replace: JBoolean,
+        useAppendForLoad,
+        partSpec)
+    val ctx = new DynamicPartitionCtx()
+    ctx.setRootPath(loadPath)
+    ctx.setNumDPCols(numDP)
+    ctx.setPartSpec(partSpec)
+
+    val fullDPSpecs = classOf[Utilities].getMethod(
+      "getFullDPSpecs",
+      classOf[Configuration],
+      classOf[DynamicPartitionCtx],
+      classOf[java.util.Map[String, java.util.List[Path]]])
+    recordHiveCall()
+    val resetPartitionStats: JBoolean = false
+    loadDynamicPartitionsMethod.invoke(
+      hive,
+      loadTableDesc,
+      listBucketingLevel,
+      isAcid,
+      writeIdInLoadTableOrPartition,
+      stmtIdInLoadTableOrPartition,
+      resetPartitionStats,
+      AcidUtils.Operation.NOT_ACID,
+      fullDPSpecs.invoke(null, hive.getConf, ctx, null)
+    )
+  }
+
+  override def renamePartition(
+      hive: Hive,
+      table: Table,
+      oldPartSpec: JMap[String, String],
+      newPart: Partition): Unit = {
+    recordHiveCall()
+    renamePartitionMethod.invoke(hive, table, oldPartSpec, newPart, writeIdInLoadTableOrPartition)
+  }
+}
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
index b0570f5d30352..482983e698a25 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/IsolatedClientLoader.scala
@@ -66,7 +66,7 @@ private[hive] object IsolatedClientLoader extends Logging {
           case e: RuntimeException if e.getMessage.contains("hadoop") =>
             // If the error message contains hadoop, it is probably because the hadoop
             // version cannot be resolved.
-            val fallbackVersion = "3.4.0"
+            val fallbackVersion = "3.4.1"
             logWarning(log"Failed to resolve Hadoop artifacts for the version " +
               log"${MDC(HADOOP_VERSION, hadoopVersion)}. We will change the hadoop version from " +
               log"${MDC(HADOOP_VERSION, hadoopVersion)} to " +
@@ -98,6 +98,7 @@ private[hive] object IsolatedClientLoader extends Logging {
       case (2, 3, _) => Some(hive.v2_3)
       case (3, 0, _) => Some(hive.v3_0)
       case (3, 1, _) => Some(hive.v3_1)
+      case (4, 0, _) => Some(hive.v4_0)
       case _ => None
     }.getOrElse {
       throw QueryExecutionErrors.unsupportedHiveMetastoreVersionError(
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
index d172af21a9170..6a9815342e73a 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/client/package.scala
@@ -87,8 +87,22 @@ package object client {
         "org.pentaho:pentaho-aggdesigner-algorithm",
         "org.apache.hive:hive-vector-code-gen"))
 
+    case object v4_0 extends HiveVersion("4.0.1",
+      extraDeps = Seq("org.apache.hadoop:hadoop-hdfs:3.3.6",
+        "org.datanucleus:datanucleus-api-jdo:5.2.8",
+        "org.datanucleus:datanucleus-rdbms:5.2.10",
+        "org.datanucleus:javax.jdo:3.2.0-release",
+        "org.springframework:spring-core:5.3.21",
+        "org.springframework:spring-jdbc:5.3.21",
+        "org.antlr:antlr4-runtime:4.9.3",
+        "org.apache.derby:derby:10.14.2.0"),
+      exclusions = Seq("org.apache.calcite:calcite-druid",
+        "org.apache.curator:*",
+        "org.pentaho:pentaho-aggdesigner-algorithm",
+        "org.apache.hive:hive-vector-code-gen"))
+
     val allSupportedHiveVersions: Set[HiveVersion] =
-      Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1)
+      Set(v2_0, v2_1, v2_2, v2_3, v3_0, v3_1, v4_0)
   }
   // scalastyle:on
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 8cae5a4ad5633..4e1567aac5f20 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -70,11 +70,10 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
       SchemaMergeUtils.mergeSchemasInParallel(
         sparkSession, options, files, OrcFileOperator.readOrcSchemasInParallel)
     } else {
-      val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
       OrcFileOperator.readSchema(
         files.map(_.getPath.toString),
         Some(sparkSession.sessionState.newHadoopConfWithOptions(options)),
-        ignoreCorruptFiles
+        orcOptions.ignoreCorruptFiles
       )
     }
   }
@@ -145,7 +144,8 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable
 
     val broadcastedHadoopConf =
       sparkSession.sparkContext.broadcast(new SerializableConfiguration(hadoopConf))
-    val ignoreCorruptFiles = sparkSession.sessionState.conf.ignoreCorruptFiles
+    val ignoreCorruptFiles =
+      new OrcOptions(options, sparkSession.sessionState.conf).ignoreCorruptFiles
 
     (file: PartitionedFile) => {
       val conf = broadcastedHadoopConf.value.value
diff --git a/sql/hive/src/test/resources/test_script.sh b/sql/hive/src/test/resources/test-script.sh
similarity index 100%
rename from sql/hive/src/test/resources/test_script.sh
rename to sql/hive/src/test/resources/test-script.sh
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
index 1a4700e7445b6..700a4984a4e39 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/execution/benchmark/ObjectHashAggregateExecBenchmark.scala
@@ -23,10 +23,10 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox
 
 import org.apache.spark.benchmark.Benchmark
 import org.apache.spark.sql.{Column, DataFrame, SparkSession}
-import org.apache.spark.sql.catalyst.expressions.Literal
-import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
+import org.apache.spark.sql.functions.{lit, percentile_approx => pa}
 import org.apache.spark.sql.hive.execution.TestingTypedCount
 import org.apache.spark.sql.hive.test.TestHive
+import org.apache.spark.sql.internal.ExpressionUtils.{column => toCol, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.LongType
 
@@ -117,8 +117,7 @@ object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
       output = output
     )
 
-    def typed_count(column: Column): Column =
-      Column(TestingTypedCount(column.expr).toAggregateExpression())
+    def typed_count(column: Column): Column = TestingTypedCount(column)
 
     val df = spark.range(N)
 
@@ -205,10 +204,8 @@ object ObjectHashAggregateExecBenchmark extends SqlBasedBenchmark {
     benchmark.run()
   }
 
-  private def percentile_approx(
-      column: Column, percentage: Double, isDistinct: Boolean = false): Column = {
-    val approxPercentile = new ApproximatePercentile(column.expr, Literal(percentage))
-    Column(approxPercentile.toAggregateExpression(isDistinct))
+  private def percentile_approx(column: Column, percentage: Double): Column = {
+    pa(column, lit(percentage), lit(10000))
   }
 
   override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
index 935ee90731aaf..86555358905d6 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveContextCompatibilitySuite.scala
@@ -54,7 +54,7 @@ class HiveContextCompatibilitySuite extends SparkFunSuite {
 
   test("basic operations") {
     val _hc = hc
-    import _hc.implicits._
+    import _hc.sparkSession.implicits._
     val df1 = (1 to 20).map { i => (i, i) }.toDF("a", "x")
     val df2 = (1 to 100).map { i => (i, i % 10, i % 2 == 0) }.toDF("a", "b", "c")
       .select($"a", $"b")
@@ -71,7 +71,7 @@ class HiveContextCompatibilitySuite extends SparkFunSuite {
 
   test("basic DDLs") {
     val _hc = hc
-    import _hc.implicits._
+    import _hc.sparkSession.implicits._
     val databases = hc.sql("SHOW DATABASES").collect().map(_.getString(0))
     assert(databases.toSeq == Seq("default"))
     hc.sql("CREATE DATABASE mee_db")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
index 600fddd797ca4..2a15d5b4dcb45 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetadataCacheSuite.scala
@@ -59,7 +59,7 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
           exception = intercept[SparkException] {
             sql("select count(*) from view_refresh").first()
           },
-          errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+          condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
           parameters = Map("path" -> ".*")
         )
 
@@ -102,7 +102,7 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
               exception = intercept[SparkException] {
                 sql("select * from test").count()
               },
-              errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+              condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
               parameters = Map("path" -> ".*")
             )
 
@@ -120,7 +120,7 @@ class HiveMetadataCacheSuite extends QueryTest with SQLTestUtils with TestHiveSi
               exception = intercept[SparkException] {
                 sql("select * from test").count()
               },
-              errorClass = "FAILED_READ_FILE.FILE_NOT_EXIST",
+              condition = "FAILED_READ_FILE.FILE_NOT_EXIST",
               parameters = Map("path" -> ".*")
             )
             spark.catalog.refreshByPath(dir.getAbsolutePath)
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
index f9a24f44b76c0..72c570d1f9097 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveMetastoreCatalogSuite.scala
@@ -358,7 +358,7 @@ class DataSourceWithHiveMetastoreCatalogSuite
                |""".stripMargin)
           checkError(
             exception = intercept[AnalysisException](spark.table("non_partition_table")),
-            errorClass = "_LEGACY_ERROR_TEMP_3096",
+            condition = "_LEGACY_ERROR_TEMP_3096",
             parameters = Map(
               "resLen" -> "2",
               "relLen" -> "1",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
index fe029173fae3b..2e0f539c442ca 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetMetastoreSuite.scala
@@ -23,7 +23,7 @@ import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
 import org.apache.spark.sql.execution.DataSourceScanExec
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, InsertIntoHadoopFsRelationCommand, LogicalRelationWithTable}
 import org.apache.spark.sql.hive.execution.HiveTableScanExec
 
 /**
@@ -267,7 +267,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
       )
 
       table("test_parquet_ctas").queryExecution.optimizedPlan match {
-        case LogicalRelation(_: HadoopFsRelation, _, _, _) => // OK
+        case LogicalRelationWithTable(_: HadoopFsRelation, _) => // OK
         case _ => fail(
           "test_parquet_ctas should be converted to " +
             s"${classOf[HadoopFsRelation ].getCanonicalName }")
@@ -352,7 +352,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
 
       assertResult(2) {
         analyzed.collect {
-          case r @ LogicalRelation(_: HadoopFsRelation, _, _, _) => r
+          case r @ LogicalRelationWithTable(_: HadoopFsRelation, _) => r
         }.size
       }
     }
@@ -361,7 +361,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
   def collectHadoopFsRelation(df: DataFrame): HadoopFsRelation = {
     val plan = df.queryExecution.analyzed
     plan.collectFirst {
-      case LogicalRelation(r: HadoopFsRelation, _, _, _) => r
+      case LogicalRelationWithTable(r: HadoopFsRelation, _) => r
     }.getOrElse {
       fail(s"Expecting a HadoopFsRelation 2, but got:\n$plan")
     }
@@ -441,7 +441,7 @@ class HiveParquetMetastoreSuite extends ParquetPartitioningTest {
       // Converted test_parquet should be cached.
       getCachedDataSourceTable(tableIdentifier) match {
         case null => fail(s"Converted ${tableIdentifier.table} should be cached in the cache.")
-        case LogicalRelation(_: HadoopFsRelation, _, _, _) => // OK
+        case LogicalRelationWithTable(_: HadoopFsRelation, _) => // OK
         case other =>
           fail(
             "The cached test_parquet should be a Parquet Relation. " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
index 2152a7e300021..6d7248a7dd67f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveParquetSuite.scala
@@ -128,7 +128,7 @@ class HiveParquetSuite extends QueryTest
       }
       checkError(
         exception = ex,
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         parameters = Map("objectName" -> "`c3`", "proposal" -> "`c1`, `c2`"),
         context = ExpectedContext(
           fragment = "c3",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
index d6ba38359f496..4109c0a127065 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSQLInsertTestSuite.scala
@@ -45,7 +45,7 @@ class HiveSQLInsertTestSuite extends SQLInsertTestSuite with TestHiveSingleton {
       v2ErrorClass: String,
       v1Parameters: Map[String, String],
       v2Parameters: Map[String, String]): Unit = {
-    checkError(exception = exception, sqlState = None, errorClass = v1ErrorClass,
+    checkError(exception = exception, sqlState = None, condition = v1ErrorClass,
       parameters = v1Parameters)
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
index d84b9f7960231..8c6113fb5569d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSharedStateSuite.scala
@@ -86,7 +86,7 @@ class HiveSharedStateSuite extends SparkFunSuite {
     assert(ss2.sparkContext.hadoopConfiguration.get("hive.metastore.warehouse.dir") !==
       invalidPath, "warehouse conf in session options can't affect application wide hadoop conf")
     assert(ss.conf.get("spark.foo") === "bar2222", "session level conf should be passed to catalog")
-    assert(!ss.conf.get(WAREHOUSE_PATH).contains(invalidPath),
+    assert(!ss.conf.get(WAREHOUSE_PATH.key).contains(invalidPath),
       "session level conf should be passed to catalog")
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
index e88a37f019b7d..865ce81e151c2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala
@@ -733,7 +733,7 @@ object SPARK_9757 extends QueryTest {
 
     val hiveContext = new TestHiveContext(sparkContext)
     spark = hiveContext.sparkSession
-    import hiveContext.implicits._
+    import hiveContext.sparkSession.implicits._
 
     val dir = Utils.createTempDir()
     dir.delete()
@@ -828,7 +828,7 @@ object SPARK_18360 {
       .enableHiveSupport().getOrCreate()
 
     val defaultDbLocation = spark.catalog.getDatabase("default").locationUri
-    assert(new Path(defaultDbLocation) == new Path(spark.conf.get(WAREHOUSE_PATH)))
+    assert(new Path(defaultDbLocation) == new Path(spark.conf.get(WAREHOUSE_PATH.key)))
 
     val hiveClient =
       spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
index ea43f1d2c6729..cc7bb193731f5 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/InsertSuite.scala
@@ -350,7 +350,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
       exception = intercept[AnalysisException] {
         Seq((1, 2, 3, 4)).toDF("a", "b", "c", "d").write.partitionBy("b", "c").insertInto(tableName)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1309",
+      condition = "_LEGACY_ERROR_TEMP_1309",
       parameters = Map.empty
     )
   }
@@ -362,7 +362,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
         exception = intercept[AnalysisException] {
           sql(s"INSERT INTO TABLE $tableName PARTITION(b=1, c=2) SELECT 1, 2, 3")
         },
-        errorClass = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
+        condition = "INSERT_PARTITION_COLUMN_ARITY_MISMATCH",
         parameters = Map(
           "staticPartCols" -> "`b`, `c`",
           "tableColumns" -> "`a`, `d`, `b`, `c`",
@@ -720,7 +720,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
              |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
              |SELECT * FROM test_insert_table""".stripMargin)
         },
-        errorClass = "PARSE_SYNTAX_ERROR",
+        condition = "PARSE_SYNTAX_ERROR",
         parameters = Map("error" -> "'ROW'", "hint" -> ""))
     }
   }
@@ -740,7 +740,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
                |ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
                |SELECT * FROM test_insert_table""".stripMargin)
         },
-        errorClass = "PARSE_SYNTAX_ERROR",
+        condition = "PARSE_SYNTAX_ERROR",
         parameters = Map("error" -> "'ROW'", "hint" -> ""))
     }
   }
@@ -809,7 +809,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
               }
               checkError(
                 exception = e,
-                errorClass = "COLUMN_ALREADY_EXISTS",
+                condition = "COLUMN_ALREADY_EXISTS",
                 parameters = Map("columnName" -> "`id`"))
             }
           }
@@ -858,7 +858,7 @@ class InsertSuite extends QueryTest with TestHiveSingleton with BeforeAndAfter
               |SELECT 1
             """.stripMargin)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1076",
+        condition = "_LEGACY_ERROR_TEMP_1076",
         parameters = Map(
           "details" -> "The spec ([d=Some()]) contains an empty partition column value")
       )
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
index 49cd0885e722a..9de5c6aab9cc4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/ListTablesSuite.scala
@@ -30,7 +30,7 @@ class ListTablesSuite extends QueryTest
   with TestHiveSingleton
   with BeforeAndAfterAll {
   import hiveContext._
-  import hiveContext.implicits._
+  import hiveContext.sparkSession.implicits._
 
   val df = sparkContext.parallelize((1 to 10).map(i => (i, s"str$i"))).toDF("key", "value")
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index cde0da67e83e9..9244776a20088 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -31,7 +31,7 @@ import org.apache.spark.sql.catalyst.types.DataTypeUtils
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.errors.QueryErrorsBase
 import org.apache.spark.sql.execution.command.CreateTableCommand
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.hive.HiveExternalCatalog._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.SQLConf
@@ -575,7 +575,7 @@ class MetastoreDataSourcesSuite extends QueryTest
 
         table("createdJsonTable")
       },
-      errorClass = "UNABLE_TO_INFER_SCHEMA",
+      condition = "UNABLE_TO_INFER_SCHEMA",
       parameters = Map("format" -> "JSON")
     )
 
@@ -598,7 +598,7 @@ class MetastoreDataSourcesSuite extends QueryTest
             Row(3) :: Row(4) :: Nil)
 
           table("test_parquet_ctas").queryExecution.optimizedPlan match {
-            case LogicalRelation(p: HadoopFsRelation, _, _, _) => // OK
+            case LogicalRelationWithTable(_: HadoopFsRelation, _) => // OK
             case _ =>
               fail(s"test_parquet_ctas should have be converted to ${classOf[HadoopFsRelation]}")
           }
@@ -925,7 +925,7 @@ class MetastoreDataSourcesSuite extends QueryTest
           createDF(10, 19).write.mode(SaveMode.Append).format("orc").
             saveAsTable("appendOrcToParquet")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1159",
+        condition = "_LEGACY_ERROR_TEMP_1159",
         parameters = Map(
           "tableName" -> s"$SESSION_CATALOG_NAME.default.appendorctoparquet",
           "existingProvider" -> "ParquetDataSourceV2",
@@ -941,7 +941,7 @@ class MetastoreDataSourcesSuite extends QueryTest
           createDF(10, 19).write.mode(SaveMode.Append).format("parquet")
             .saveAsTable("appendParquetToJson")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1159",
+        condition = "_LEGACY_ERROR_TEMP_1159",
         parameters = Map(
           "tableName" -> s"$SESSION_CATALOG_NAME.default.appendparquettojson",
           "existingProvider" -> "JsonDataSourceV2",
@@ -957,7 +957,7 @@ class MetastoreDataSourcesSuite extends QueryTest
           createDF(10, 19).write.mode(SaveMode.Append).format("text")
             .saveAsTable("appendTextToJson")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1159",
+        condition = "_LEGACY_ERROR_TEMP_1159",
         // The format of the existing table can be JsonDataSourceV2 or JsonFileFormat.
         parameters = Map(
           "tableName" -> s"$SESSION_CATALOG_NAME.default.appendtexttojson",
@@ -1232,7 +1232,7 @@ class MetastoreDataSourcesSuite extends QueryTest
           Seq((3, 4)).toDF("i", "k")
             .write.mode("append").saveAsTable("saveAsTable_mismatch_column_names")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1162",
+        condition = "_LEGACY_ERROR_TEMP_1162",
         parameters = Map("col" -> "j", "inputColumns" -> "i, k"))
     }
   }
@@ -1245,7 +1245,7 @@ class MetastoreDataSourcesSuite extends QueryTest
           Seq((3, 4, 5)).toDF("i", "j", "k")
             .write.mode("append").saveAsTable("saveAsTable_too_many_columns")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1161",
+        condition = "_LEGACY_ERROR_TEMP_1161",
         parameters = Map(
           "tableName" -> "spark_catalog.default.saveastable_too_many_columns",
           "existingTableSchema" -> "struct<i:int,j:int>",
@@ -1265,7 +1265,7 @@ class MetastoreDataSourcesSuite extends QueryTest
                |USING hive
              """.stripMargin)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1293",
+        condition = "_LEGACY_ERROR_TEMP_1293",
         parameters = Map.empty
       )
     }
@@ -1288,7 +1288,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           table(tableName).write.mode(SaveMode.Overwrite).saveAsTable(tableName)
         },
-        errorClass = "UNSUPPORTED_OVERWRITE.TABLE",
+        condition = "UNSUPPORTED_OVERWRITE.TABLE",
         parameters = Map("table" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")
       )
 
@@ -1296,7 +1296,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           table(tableName).write.mode(SaveMode.ErrorIfExists).saveAsTable(tableName)
         },
-        errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+        condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
         parameters = Map("relationName" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")
       )
     }
@@ -1326,7 +1326,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           table(tableName).write.mode(SaveMode.Overwrite).insertInto(tableName)
         },
-        errorClass = "UNSUPPORTED_OVERWRITE.TABLE",
+        condition = "UNSUPPORTED_OVERWRITE.TABLE",
         parameters = Map("table" -> s"`$SESSION_CATALOG_NAME`.`default`.`tab1`")
       )
     }
@@ -1339,7 +1339,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           Seq(4).toDF("j").write.mode("append").saveAsTable("saveAsTable_less_columns")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1161",
+        condition = "_LEGACY_ERROR_TEMP_1161",
         parameters = Map(
           "tableName" -> "spark_catalog.default.saveastable_less_columns",
           "existingTableSchema" -> "struct<i:int,j:int>",
@@ -1396,7 +1396,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           sharedState.externalCatalog.getTable("default", "t")
         },
-        errorClass = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY",
+        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY",
         parameters = Map("key" -> toSQLConf("spark.sql.sources.schema"))
       )
 
@@ -1417,7 +1417,7 @@ class MetastoreDataSourcesSuite extends QueryTest
         exception = intercept[AnalysisException] {
           sharedState.externalCatalog.getTable("default", "t2")
         },
-        errorClass = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
+        condition = "INSUFFICIENT_TABLE_PROPERTY.MISSING_KEY_PART",
         parameters = Map(
           "key" -> toSQLConf("spark.sql.sources.schema.part.1"),
           "totalAmountOfParts" -> "3")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
index 55be6102a8535..0b10829f66910 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MultiDatabaseSuite.scala
@@ -272,7 +272,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
         df.write.format("parquet").saveAsTable("`d:b`.`t:a`")
       }
       checkError(e,
-        errorClass = "SCHEMA_NOT_FOUND",
+        condition = "SCHEMA_NOT_FOUND",
         parameters = Map("schemaName" -> "`spark_catalog`.`d:b`"))
     }
 
@@ -281,7 +281,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
         df.write.format("parquet").saveAsTable("`d:b`.`table`")
       }
       checkError(e,
-        errorClass = "SCHEMA_NOT_FOUND",
+        condition = "SCHEMA_NOT_FOUND",
         parameters = Map("schemaName" -> "`spark_catalog`.`d:b`"))
     }
 
@@ -297,7 +297,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
             |)
             """.stripMargin)
         }
-        checkError(e, errorClass = "INVALID_SCHEMA_OR_RELATION_NAME",
+        checkError(e, condition = "INVALID_SCHEMA_OR_RELATION_NAME",
           parameters = Map("name" -> "`t:a`"))
       }
 
@@ -313,7 +313,7 @@ class MultiDatabaseSuite extends QueryTest with SQLTestUtils with TestHiveSingle
               """.stripMargin)
         }
         checkError(e,
-          errorClass = "SCHEMA_NOT_FOUND",
+          condition = "SCHEMA_NOT_FOUND",
           parameters = Map("schemaName" -> "`spark_catalog`.`d:b`"))
       }
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
index 1e525c46a9cfb..2152a29b17ff4 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/OptimizeHiveMetadataOnlyQuerySuite.scala
@@ -32,6 +32,7 @@ class OptimizeHiveMetadataOnlyQuerySuite extends QueryTest with TestHiveSingleto
     with BeforeAndAfter with SQLTestUtils {
 
   import spark.implicits._
+  import spark.RichColumn
 
   override def beforeAll(): Unit = {
     super.beforeAll()
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
index 7dc7fc41dc708..9c2f4461ff263 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/StatisticsSuite.scala
@@ -668,7 +668,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS fakeColumn")
         },
-        errorClass = "COLUMN_NOT_FOUND",
+        condition = "COLUMN_NOT_FOUND",
         parameters = Map(
           "colName" -> "`fakeColumn`",
           "caseSensitiveConfig" -> "\"spark.sql.caseSensitive\""
@@ -1706,7 +1706,7 @@ class StatisticsSuite extends StatisticsCollectionTestBase with TestHiveSingleto
         exception = intercept[AnalysisException] {
           sql(s"ANALYZE TABLE $tableName COMPUTE STATISTICS FOR COLUMNS value")
         },
-        errorClass = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
+        condition = "UNSUPPORTED_FEATURE.ANALYZE_UNSUPPORTED_COLUMN_TYPE",
         parameters = Map(
           "columnType" -> "\"MAP<STRING, STRING>\"",
           "columnName" -> "`value`",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
index b60adfb6f4cf1..5c65eb8b12bac 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientSuite.scala
@@ -163,7 +163,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
     // test alter database location
     val tempDatabasePath2 = Utils.createTempDir().toURI
     // Hive support altering database location since HIVE-8472.
-    if (version == "3.0" || version == "3.1") {
+    if (version == "3.0" || version == "3.1" || version == "4.0") {
       client.alterDatabase(database.copy(locationUri = tempDatabasePath2))
       val uriInCatalog = client.getDatabase("temporary").locationUri
       assert("file" === uriInCatalog.getScheme)
@@ -186,7 +186,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
       assert(false, "dropDatabase should throw HiveException")
     }
     checkError(ex,
-      errorClass = "SCHEMA_NOT_EMPTY",
+      condition = "SCHEMA_NOT_EMPTY",
       parameters = Map("schemaName" -> "`temporary`"))
 
     client.dropDatabase("temporary", ignoreIfNotExists = false, cascade = true)
@@ -376,7 +376,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
       CatalogTablePartition(Map("key1" -> "1", "key2" -> key2.toString), storageFormat)
     }
     client.createPartitions(
-      "default", "src_part", partitions, ignoreIfExists = true)
+      client.getTable("default", "src_part"), partitions, ignoreIfExists = true)
   }
 
   test("getPartitionNames(catalogTable)") {
@@ -479,13 +479,15 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
     val partitions = Seq(CatalogTablePartition(
       Map("key1" -> "101", "key2" -> "102"),
       storageFormat))
+    val table = client.getTable("default", "src_part")
+
     try {
-      client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+      client.createPartitions(table, partitions, ignoreIfExists = false)
       val e = intercept[PartitionsAlreadyExistException] {
-        client.createPartitions("default", "src_part", partitions, ignoreIfExists = false)
+        client.createPartitions(table, partitions, ignoreIfExists = false)
       }
       checkError(e,
-        errorClass = "PARTITIONS_ALREADY_EXIST",
+        condition = "PARTITIONS_ALREADY_EXIST",
         parameters = Map("partitionList" -> "PARTITION (`key1` = 101, `key2` = 102)",
           "tableName" -> "`default`.`src_part`"))
     } finally {
@@ -558,7 +560,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
 
   test("sql create index and reset") {
     // HIVE-18448 Since Hive 3.0, INDEX is not supported.
-    if (version != "3.0" && version != "3.1") {
+    if (version != "3.0" && version != "3.1" && version != "4.0") {
       client.runSqlHive("CREATE TABLE indexed_table (key INT)")
       client.runSqlHive("CREATE INDEX index_1 ON TABLE indexed_table(key) " +
         "as 'COMPACT' WITH DEFERRED REBUILD")
@@ -577,7 +579,7 @@ class HiveClientSuite(version: String) extends HiveVersionSuite(version) {
         exception = intercept[AnalysisException] {
           versionSpark.table("mv1").collect()
         },
-        errorClass = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
+        condition = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
         parameters = Map(
           "tableName" -> "`mv1`",
           "tableType" -> "materialized view"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
index 0bc6702079bdb..f54760e44b969 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveClientVersions.scala
@@ -22,6 +22,6 @@ private[client] trait HiveClientVersions {
   protected val versions = if (testVersions.nonEmpty) {
     testVersions.get.split(",").map(_.trim).filter(_.nonEmpty).toIndexedSeq
   } else {
-    IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1")
+    IndexedSeq("2.0", "2.1", "2.2", "2.3", "3.0", "3.1", "4.0")
   }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
index 1a4eb75547894..63be4dc363f1a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HivePartitionFilteringSuite.scala
@@ -111,8 +111,7 @@ class HivePartitionFilteringSuite(version: String)
       ), storageFormat)
     assert(partitions.size == testPartitionCount)
 
-    client.createPartitions(
-      "default", "test", partitions, ignoreIfExists = false)
+    client.createPartitions(table, partitions, ignoreIfExists = false)
     client
   }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
index 5db0b4f18c962..1a45f6b150969 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/HiveVersionSuite.scala
@@ -31,14 +31,11 @@ private[client] abstract class HiveVersionSuite(version: String) extends SparkFu
     // Hive changed the default of datanucleus.schema.autoCreateAll from true to false and
     // hive.metastore.schema.verification from false to true since 2.0
     // For details, see the JIRA HIVE-6113 and HIVE-12463
-    if (version == "2.0" || version == "2.1" || version == "2.2" || version == "2.3" ||
-        version == "3.0" || version == "3.1") {
-      hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
-      hadoopConf.set("datanucleus.autoStartMechanismMode", "ignored")
-      hadoopConf.set("hive.metastore.schema.verification", "false")
-    }
+    hadoopConf.set("datanucleus.schema.autoCreateAll", "true")
+    hadoopConf.set("datanucleus.autoStartMechanismMode", "ignored")
+    hadoopConf.set("hive.metastore.schema.verification", "false")
     // Since Hive 3.0, HIVE-19310 skipped `ensureDbInit` if `hive.in.test=false`.
-    if (version == "3.0" || version == "3.1") {
+    if (version == "3.0" || version == "3.1" || version == "4.0") {
       hadoopConf.set("hive.in.test", "true")
       hadoopConf.set("hive.query.reexecution.enabled", "false")
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
index 4b000fff0eb92..7c9b0b7781427 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/AggregationQuerySuite.scala
@@ -526,7 +526,7 @@ abstract class AggregationQuerySuite extends QueryTest with SQLTestUtils with Te
             |GROUP BY key
           """.stripMargin)
       },
-      errorClass = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
+      condition = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
       parameters = Map("sqlExpr" -> "\"mydoublesum(((value + (1.5 * key)) + rand()))\""),
       context = ExpectedContext(
         fragment = "value + 1.5 * key + rand()",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
index 87e58bb8fa13a..73dda42568a71 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveComparisonTest.scala
@@ -162,7 +162,7 @@ abstract class HiveComparisonTest extends SparkFunSuite with BeforeAndAfterAll {
 
     def isSorted(plan: LogicalPlan): Boolean = plan match {
       case _: Join | _: Aggregate | _: Generate | _: Sample | _: Distinct => false
-      case PhysicalOperation(_, _, Sort(_, true, _)) => true
+      case PhysicalOperation(_, _, Sort(_, true, _, _)) => true
       case _ => plan.children.iterator.exists(isSorted)
     }
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
index b959459eb3df8..53a65e195e3f0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveDDLSuite.scala
@@ -399,14 +399,14 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql("CREATE TABLE tab1 USING hive")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3083",
+          condition = "_LEGACY_ERROR_TEMP_3083",
           parameters = Map("tableName" -> "`spark_catalog`.`default`.`tab1`")
         )
         checkError(
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE tab2 USING hive location '${tempDir.getCanonicalPath}'")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3083",
+          condition = "_LEGACY_ERROR_TEMP_3083",
           parameters = Map("tableName" -> "`spark_catalog`.`default`.`tab2`")
         )
       }
@@ -530,7 +530,7 @@ class HiveDDLSuite
   }
 
   test("create table: partition column names exist in table definition") {
-    assertAnalysisErrorClass(
+    assertAnalysisErrorCondition(
       "CREATE TABLE tbl(a int) PARTITIONED BY (a string)",
       "COLUMN_ALREADY_EXISTS",
       Map("columnName" -> "`a`"))
@@ -542,7 +542,7 @@ class HiveDDLSuite
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "COLUMN_NOT_DEFINED_IN_TABLE",
+      condition = "COLUMN_NOT_DEFINED_IN_TABLE",
       parameters = Map(
         "colType" -> "partition",
         "colName" -> "`b`",
@@ -605,7 +605,7 @@ class HiveDDLSuite
       exception = intercept[AnalysisException] {
         sql(sql1)
       },
-      errorClass = "_LEGACY_ERROR_TEMP_1076",
+      condition = "_LEGACY_ERROR_TEMP_1076",
       parameters = Map(
         "details" -> "The spec ([partCol1=]) contains an empty partition column value")
     )
@@ -657,7 +657,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $externalTab DROP PARTITION (ds='2008-04-09', unknownCol='12')")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1231",
+          condition = "_LEGACY_ERROR_TEMP_1231",
           parameters = Map(
             "key" -> "unknownCol",
             "tblName" -> s"`$SESSION_CATALOG_NAME`.`default`.`exttable_with_partitions`")
@@ -770,13 +770,12 @@ class HiveDDLSuite
     }
   }
 
-  private def assertAnalysisErrorClass(
+  private def assertAnalysisErrorCondition(
       sqlText: String,
-      errorClass: String,
+      condition: String,
       parameters: Map[String, String]): Unit = {
     val e = intercept[AnalysisException](sql(sqlText))
-    checkError(e,
-      errorClass = errorClass, parameters = parameters)
+    checkError(e, condition = condition, parameters = parameters)
   }
 
   test("create table - SET TBLPROPERTIES EXTERNAL to TRUE") {
@@ -787,7 +786,7 @@ class HiveDDLSuite
           sql(s"CREATE TABLE $tabName (height INT, length INT) " +
             s"TBLPROPERTIES('EXTERNAL'='TRUE')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3087",
+        condition = "_LEGACY_ERROR_TEMP_3087",
         parameters = Map.empty
       )
     }
@@ -804,7 +803,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql(s"ALTER TABLE $tabName SET TBLPROPERTIES ('EXTERNAL' = 'TRUE')")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3087",
+        condition = "_LEGACY_ERROR_TEMP_3087",
         parameters = Map.empty
       )
       // The table type is not changed to external
@@ -836,23 +835,23 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER VIEW $tabName RENAME TO $newViewName")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1253",
-          parameters = Map.empty
+          condition = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
+          parameters = Map("operation" -> "ALTER VIEW", "tableName" -> "`tab1`")
         )
 
         checkError(
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName RENAME TO $newViewName")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1252",
-          parameters = Map.empty
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          parameters = Map("operation" -> "ALTER TABLE", "viewName" -> "`view1`")
         )
 
         checkError(
           exception = intercept[AnalysisException] {
             sql(s"ALTER VIEW $tabName SET TBLPROPERTIES ('p' = 'an')")
           },
-          errorClass = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
+          condition = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
           parameters = Map(
             "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tabName`",
             "operation" -> "ALTER VIEW ... SET TBLPROPERTIES"),
@@ -863,7 +862,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName SET TBLPROPERTIES ('p' = 'an')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... SET TBLPROPERTIES"),
@@ -874,7 +873,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER VIEW $tabName UNSET TBLPROPERTIES ('p')")
           },
-          errorClass = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
+          condition = "EXPECT_VIEW_NOT_TABLE.USE_ALTER_TABLE",
           parameters = Map(
             "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tabName`",
             "operation" -> "ALTER VIEW ... UNSET TBLPROPERTIES"),
@@ -885,7 +884,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName UNSET TBLPROPERTIES ('p')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... UNSET TBLPROPERTIES"),
@@ -896,7 +895,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName SET LOCATION '/path/to/home'")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... SET LOCATION ..."),
@@ -907,7 +906,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName SET SERDE 'whatever'")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
@@ -918,7 +917,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName SET SERDEPROPERTIES ('x' = 'y')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
@@ -929,7 +928,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName PARTITION (a=1, b=2) SET SERDEPROPERTIES ('x' = 'y')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
+          condition = "EXPECT_TABLE_NOT_VIEW.USE_ALTER_VIEW",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... SET [SERDE|SERDEPROPERTIES]"),
@@ -940,7 +939,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName RECOVER PARTITIONS")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... RECOVER PARTITIONS"),
@@ -951,7 +950,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName PARTITION (a='1') RENAME TO PARTITION (a='100')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... RENAME TO PARTITION"),
@@ -962,7 +961,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName ADD IF NOT EXISTS PARTITION (a='4', b='8')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... ADD PARTITION ..."),
@@ -973,7 +972,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE $oldViewName DROP IF EXISTS PARTITION (a='2')")
           },
-          errorClass = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
+          condition = "EXPECT_TABLE_NOT_VIEW.NO_ALTERNATIVE",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$oldViewName`",
             "operation" -> "ALTER TABLE ... DROP PARTITION ..."),
@@ -1117,9 +1116,9 @@ class HiveDDLSuite
   test("drop table using drop view") {
     withTable("tab1") {
       sql("CREATE TABLE tab1(c1 int)")
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         sqlText = "DROP VIEW tab1",
-        errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+        condition = "WRONG_COMMAND_FOR_OBJECT_TYPE",
         parameters = Map(
           "alternative" -> "DROP TABLE",
           "operation" -> "DROP VIEW",
@@ -1136,9 +1135,9 @@ class HiveDDLSuite
       spark.range(10).write.saveAsTable("tab1")
       withView("view1") {
         sql("CREATE VIEW view1 AS SELECT * FROM tab1")
-        assertAnalysisErrorClass(
+        assertAnalysisErrorCondition(
           sqlText = "DROP TABLE view1",
-          errorClass = "WRONG_COMMAND_FOR_OBJECT_TYPE",
+          condition = "WRONG_COMMAND_FOR_OBJECT_TYPE",
           parameters = Map(
             "alternative" -> "DROP VIEW",
             "operation" -> "DROP TABLE",
@@ -1159,7 +1158,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql("CREATE VIEW view1 (col1, col3) AS SELECT * FROM tab1")
           },
-          errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
             "viewColumns" -> "`col1`, `col3`",
@@ -1175,7 +1174,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql("CREATE VIEW view2 (col1, col3) AS SELECT * FROM tab2")
           },
-          errorClass = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
+          condition = "CREATE_VIEW_COLUMN_ARITY_MISMATCH.TOO_MANY_DATA_COLUMNS",
           parameters = Map(
             "viewName" -> s"`$SESSION_CATALOG_NAME`.`default`.`view2`",
             "viewColumns" -> "`col1`, `col3`",
@@ -1322,7 +1321,7 @@ class HiveDDLSuite
     sql(s"USE default")
     val sqlDropDatabase = s"DROP DATABASE $dbName ${if (cascade) "CASCADE" else "RESTRICT"}"
     if (tableExists && !cascade) {
-      assertAnalysisErrorClass(
+      assertAnalysisErrorCondition(
         sqlDropDatabase,
         "SCHEMA_NOT_EMPTY",
         Map("schemaName" -> s"`$dbName`"))
@@ -1358,7 +1357,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql("DROP DATABASE default")
           },
-          errorClass = "UNSUPPORTED_FEATURE.DROP_DATABASE",
+          condition = "UNSUPPORTED_FEATURE.DROP_DATABASE",
           parameters = Map("database" -> "`spark_catalog`.`default`")
         )
 
@@ -1368,7 +1367,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql("DROP DATABASE DeFault")
           },
-          errorClass = caseSensitive match {
+          condition = caseSensitive match {
             case "false" => "UNSUPPORTED_FEATURE.DROP_DATABASE"
             case _ => "_LEGACY_ERROR_TEMP_3065"
           },
@@ -1764,7 +1763,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             spark.catalog.getTable("default", indexTabName)
           },
-          errorClass = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
+          condition = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
           parameters = Map(
             "tableName" -> s"`$indexTabName`",
             "tableType" -> "index table")
@@ -1774,7 +1773,7 @@ class HiveDDLSuite
           exception = intercept[TableAlreadyExistsException] {
             sql(s"CREATE TABLE $indexTabName(b int) USING hive")
           },
-          errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+          condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
           parameters = Map("relationName" -> s"`default`.`$indexTabName`")
         )
 
@@ -1782,7 +1781,7 @@ class HiveDDLSuite
           exception = intercept[TableAlreadyExistsException] {
             sql(s"ALTER TABLE $tabName RENAME TO $indexTabName")
           },
-          errorClass = "TABLE_OR_VIEW_ALREADY_EXISTS",
+          condition = "TABLE_OR_VIEW_ALREADY_EXISTS",
           parameters = Map("relationName" -> s"`default`.`$indexTabName`")
         )
 
@@ -1791,7 +1790,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"DESCRIBE $indexTabName")
           },
-          errorClass = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
+          condition = "UNSUPPORTED_FEATURE.HIVE_TABLE_TYPE",
           parameters = Map(
             "tableName" -> s"`$indexTabName`",
             "tableType" -> "index table")
@@ -1869,7 +1868,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"ALTER TABLE tbl SET TBLPROPERTIES ('${forbiddenPrefix}foo' = 'loser')")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3086",
+          condition = "_LEGACY_ERROR_TEMP_3086",
           parameters = Map(
             "tableName" -> "spark_catalog.default.tbl",
             "invalidKeys" -> s"[${forbiddenPrefix}foo]")
@@ -1878,7 +1877,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE tbl2 (a INT) TBLPROPERTIES ('${forbiddenPrefix}foo'='anything')")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3086",
+          condition = "_LEGACY_ERROR_TEMP_3086",
           parameters = Map(
             "tableName" -> "spark_catalog.default.tbl2",
             "invalidKeys" -> s"[${forbiddenPrefix}foo]")
@@ -1987,7 +1986,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           spark.table("t").write.format("hive").mode("overwrite").saveAsTable("t")
         },
-        errorClass = "UNSUPPORTED_OVERWRITE.TABLE",
+        condition = "UNSUPPORTED_OVERWRITE.TABLE",
         parameters = Map("table" -> s"`$SESSION_CATALOG_NAME`.`default`.`t`"))
     }
   }
@@ -2380,7 +2379,7 @@ class HiveDDLSuite
                exception = intercept[AnalysisException] {
                 sql("INSERT INTO TABLE t SELECT 1")
               },
-              errorClass = "_LEGACY_ERROR_TEMP_3065",
+              condition = "_LEGACY_ERROR_TEMP_3065",
               parameters = Map(
                 "clazz" -> "java.lang.IllegalArgumentException",
                 "msg" -> "java.net.URISyntaxException: Relative path in absolute URI: a:b")
@@ -2427,7 +2426,7 @@ class HiveDDLSuite
                exception = intercept[AnalysisException] {
                 sql("INSERT INTO TABLE t1 PARTITION(b=2) SELECT 1")
               },
-              errorClass = "_LEGACY_ERROR_TEMP_3065",
+              condition = "_LEGACY_ERROR_TEMP_3065",
               parameters = Map(
                 "clazz" -> "java.lang.IllegalArgumentException",
                 "msg" -> "java.net.URISyntaxException: Relative path in absolute URI: a:b")
@@ -2437,7 +2436,7 @@ class HiveDDLSuite
                exception = intercept[AnalysisException] {
                 sql("INSERT INTO TABLE t1 PARTITION(b='2017-03-03 12:13%3A14') SELECT 1")
               },
-              errorClass = "_LEGACY_ERROR_TEMP_3065",
+              condition = "_LEGACY_ERROR_TEMP_3065",
               parameters = Map(
                 "clazz" -> "java.lang.IllegalArgumentException",
                 "msg" -> "java.net.URISyntaxException: Relative path in absolute URI: a:b")
@@ -2526,13 +2525,13 @@ class HiveDDLSuite
           sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c2 int) STORED AS PARQUET")
           if (!caseSensitive) {
             // duplicating partitioning column name
-            assertAnalysisErrorClass(
+            assertAnalysisErrorCondition(
               "ALTER TABLE tab ADD COLUMNS (C2 string)",
               "COLUMN_ALREADY_EXISTS",
               Map("columnName" -> "`c2`"))
 
             // duplicating data column name
-            assertAnalysisErrorClass(
+            assertAnalysisErrorCondition(
               "ALTER TABLE tab ADD COLUMNS (C1 string)",
               "COLUMN_ALREADY_EXISTS",
               Map("columnName" -> "`c1`"))
@@ -2543,7 +2542,7 @@ class HiveDDLSuite
               exception = intercept[AnalysisException] {
                 sql("ALTER TABLE tab ADD COLUMNS (C2 string)")
               },
-              errorClass = "_LEGACY_ERROR_TEMP_3065",
+              condition = "_LEGACY_ERROR_TEMP_3065",
               parameters = Map(
                 "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
                 "msg" -> "Partition column name c2 conflicts with table columns.")
@@ -2555,7 +2554,7 @@ class HiveDDLSuite
               exception = intercept[AnalysisException] {
                 sql("ALTER TABLE tab ADD COLUMNS (C1 string)")
               },
-              errorClass = "_LEGACY_ERROR_TEMP_3065",
+              condition = "_LEGACY_ERROR_TEMP_3065",
               parameters = Map(
                 "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
                 "msg" -> "Duplicate column name c1 in the table definition.")
@@ -2573,7 +2572,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t1 USING PARQUET AS SELECT NULL AS null_col")
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`null_col`",
           "columnType" -> "\"VOID\"",
@@ -2584,7 +2583,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t2 STORED AS PARQUET AS SELECT null as null_col")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3065",
+        condition = "_LEGACY_ERROR_TEMP_3065",
         parameters = Map(
           "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
           "msg" -> "java.lang.UnsupportedOperationException: Unknown field type: void")
@@ -2600,7 +2599,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t1 (v VOID) USING PARQUET")
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`v`",
           "columnType" -> "\"VOID\"",
@@ -2610,7 +2609,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql("CREATE TABLE t2 (v VOID) STORED AS PARQUET")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3065",
+        condition = "_LEGACY_ERROR_TEMP_3065",
         parameters = Map(
           "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
           "msg" -> "java.lang.UnsupportedOperationException: Unknown field type: void")
@@ -2818,7 +2817,7 @@ class HiveDDLSuite
         exception = intercept[AnalysisException] {
           sql("load data inpath '/doesnotexist.csv' into table tbl")
         },
-        errorClass = "LOAD_DATA_PATH_NOT_EXISTS",
+        condition = "LOAD_DATA_PATH_NOT_EXISTS",
         parameters = Map("path" -> "/doesnotexist.csv")
       )
     }
@@ -2860,7 +2859,7 @@ class HiveDDLSuite
           exception = intercept[SparkException] {
             sql(s"CREATE TABLE t (a $typ) USING hive")
           },
-          errorClass = "CANNOT_RECOGNIZE_HIVE_TYPE",
+          condition = "CANNOT_RECOGNIZE_HIVE_TYPE",
           parameters = Map(
             "fieldType" -> toSQLType(replaced),
             "fieldName" -> "`a`")
@@ -2878,7 +2877,7 @@ class HiveDDLSuite
           exception = intercept[AnalysisException] {
             sql(s"CREATE TABLE t (a $typ) USING hive")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3065",
+          condition = "_LEGACY_ERROR_TEMP_3065",
           parameters = Map(
             "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
             "msg" -> msg)
@@ -2917,7 +2916,7 @@ class HiveDDLSuite
          |AS SELECT 1 as a, "a" as b""".stripMargin
     checkError(
       exception = intercept[ParseException](sql(sql1)),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map(
         "message" -> "Schema may not be specified in a Create Table As Select (CTAS) statement"),
       context = ExpectedContext(sql1, 0, 92))
@@ -2929,7 +2928,7 @@ class HiveDDLSuite
          |AS SELECT 1 as a, "a" as b""".stripMargin
     checkError(
       exception = intercept[ParseException](sql(sql2)),
-      errorClass = "_LEGACY_ERROR_TEMP_0035",
+      condition = "_LEGACY_ERROR_TEMP_0035",
       parameters = Map(
         "message" ->
           "Partition column types may not be specified in Create Table As Select (CTAS)"),
@@ -3020,7 +3019,7 @@ class HiveDDLSuite
         exception = intercept[ParseException] {
           sql(sql1)
         },
-        errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+        condition = "INVALID_STATEMENT_OR_CLAUSE",
         parameters = Map(
           "operation" -> ("CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE " +
             "ORG.APACHE.HADOOP.HIVE.SERDE2.LAZY.LAZYSIMPLESERDE")),
@@ -3036,7 +3035,7 @@ class HiveDDLSuite
         exception = intercept[ParseException] {
           sql(sql2)
         },
-        errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+        condition = "INVALID_STATEMENT_OR_CLAUSE",
         parameters = Map(
           "operation" -> ("CREATE TABLE LIKE ... USING ... ROW FORMAT SERDE " +
             "ORG.APACHE.HADOOP.HIVE.SERDE2.LAZY.LAZYSIMPLESERDE")),
@@ -3052,7 +3051,7 @@ class HiveDDLSuite
         exception = intercept[ParseException] {
           sql(sql3)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_0047",
+        condition = "_LEGACY_ERROR_TEMP_0047",
         parameters = Map.empty,
         context = ExpectedContext(fragment = sql3, start = 0, stop = 153)
       )
@@ -3066,7 +3065,7 @@ class HiveDDLSuite
         exception = intercept[ParseException] {
           sql(sql4)
         },
-        errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+        condition = "INVALID_STATEMENT_OR_CLAUSE",
         parameters = Map(
           "operation" -> ("CREATE TABLE LIKE ... USING ... STORED AS " +
             "INPUTFORMAT INFORMAT OUTPUTFORMAT OUTFORMAT ROW FORMAT " +
@@ -3140,7 +3139,7 @@ class HiveDDLSuite
               exception = intercept[ParseException] {
                 sql(sql1)
               },
-              errorClass = "_LEGACY_ERROR_TEMP_0035",
+              condition = "_LEGACY_ERROR_TEMP_0035",
               parameters = Map(
                 "message" -> (s"ROW FORMAT SERDE is incompatible with format " +
                   s"'${format.toLowerCase(Locale.ROOT)}', which also specifies a serde")),
@@ -3179,7 +3178,7 @@ class HiveDDLSuite
             exception = intercept[ParseException] {
               sql(sql1)
             },
-            errorClass = "_LEGACY_ERROR_TEMP_0035",
+            condition = "_LEGACY_ERROR_TEMP_0035",
             parameters = Map(
               "message" -> ("ROW FORMAT DELIMITED is only compatible " +
                 "with 'textfile', not 'parquet'")),
@@ -3226,7 +3225,7 @@ class HiveDDLSuite
                 spark.sql(s"INSERT OVERWRITE LOCAL DIRECTORY '${path.getCanonicalPath}' " +
                   s"STORED AS $format SELECT ID, if(1=1, 1, 0), abs(id), '^-' FROM v")
               }.getCause.asInstanceOf[AnalysisException],
-              errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+              condition = "INVALID_COLUMN_NAME_AS_PATH",
               parameters = Map(
                 "datasource" -> "HiveFileFormat",
                 "columnName" -> "`(IF((1 = 1), 1, 0))`"
@@ -3254,7 +3253,7 @@ class HiveDDLSuite
                    |FROM v
                """.stripMargin)
             }.getCause.asInstanceOf[AnalysisException],
-            errorClass = "INVALID_COLUMN_NAME_AS_PATH",
+            condition = "INVALID_COLUMN_NAME_AS_PATH",
             parameters = Map("datasource" -> "HiveFileFormat", "columnName" -> "`IF(ID=1,ID,0)`")
           )
         }
@@ -3276,8 +3275,10 @@ class HiveDDLSuite
           s"'org.apache.hadoop.hive.ql.udf.UDFUUID' USING JAR '$jar'")
       }
       checkError(e,
-        errorClass = "ROUTINE_ALREADY_EXISTS",
-        parameters = Map("routineName" -> "`f1`"))
+        condition = "ROUTINE_ALREADY_EXISTS",
+        parameters = Map("routineName" -> "`f1`",
+          "newRoutineType" -> "routine",
+          "existingRoutineType" -> "routine"))
       assert(!spark.sparkContext.listJars().exists(_.contains(jarName)))
 
       sql("CREATE OR REPLACE TEMPORARY FUNCTION f1 AS " +
@@ -3303,7 +3304,7 @@ class HiveDDLSuite
           exception = intercept[SparkUnsupportedOperationException] {
             sql(sqlCmd)
           },
-          errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
+          condition = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
           parameters = Map("tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tbl`")
         )
       }
@@ -3354,7 +3355,7 @@ class HiveDDLSuite
       exception = intercept[AnalysisException] {
         sql("CREATE TABLE tab (c1 int) PARTITIONED BY (c1) STORED AS PARQUET")
       },
-      errorClass = "ALL_PARTITION_COLUMNS_NOT_ALLOWED",
+      condition = "ALL_PARTITION_COLUMNS_NOT_ALLOWED",
       parameters = Map.empty
     )
   }
@@ -3367,7 +3368,7 @@ class HiveDDLSuite
         sql(s"DELETE FROM $tbl WHERE c1 = 1")
       }
       checkError(e,
-        errorClass = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
+        condition = "UNSUPPORTED_FEATURE.TABLE_OPERATION",
         parameters = Map(
           "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`$tbl`",
           "operation" -> "DELETE")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
index 24d1e24b30c8b..c41370c96241a 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala
@@ -47,7 +47,7 @@ case class TestData(a: Int, b: String)
  */
 @SlowHiveTest
 class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAndAfter {
-  import org.apache.spark.sql.hive.test.TestHive.implicits._
+  import org.apache.spark.sql.hive.test.TestHive.sparkSession.implicits._
 
   private val originalCrossJoinEnabled = TestHive.conf.crossJoinEnabled
 
@@ -70,6 +70,14 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
     }
   }
 
+  override def afterEach(): Unit = {
+    try {
+      spark.artifactManager.cleanUpResources()
+    } finally {
+      super.afterEach()
+    }
+  }
+
   private def assertUnsupportedFeature(
       body: => Unit,
       operation: String,
@@ -78,7 +86,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       exception = intercept[ParseException] {
         body
       },
-      errorClass = "INVALID_STATEMENT_OR_CLAUSE",
+      condition = "INVALID_STATEMENT_OR_CLAUSE",
       parameters = Map("operation" -> operation),
       context = expectedContext)
   }
@@ -683,7 +691,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       exception = intercept[AnalysisException] {
         sql("SELECT (CASE WHEN key > 2 THEN 3 WHEN 1 THEN 2 ELSE 0 END) FROM src").collect()
       },
-      errorClass = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
+      condition = "DATATYPE_MISMATCH.UNEXPECTED_INPUT_TYPE",
       parameters = Map(
         "sqlExpr" -> "\"CASE WHEN (key > 2) THEN 3 WHEN 1 THEN 2 ELSE 0 END\"",
         "paramIndex" -> "second",
@@ -819,7 +827,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
             """ALTER TABLE alter1 SET SERDE 'org.apache.hadoop.hive.serde2.TestSerDe'
               |WITH serdeproperties('s1'='9')""".stripMargin)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_3065",
+        condition = "_LEGACY_ERROR_TEMP_3065",
         parameters = Map(
           "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
           "msg" -> "at least one column must be specified for the table"))
@@ -1251,7 +1259,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
               """INSERT INTO TABLE dp_test PARTITION(dp)
                 |SELECT key, value, key % 5 FROM src""".stripMargin)
           },
-          errorClass = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
+          condition = "INSERT_COLUMN_ARITY_MISMATCH.NOT_ENOUGH_DATA_COLUMNS",
           parameters = Map(
             "tableName" -> "`spark_catalog`.`default`.`dp_test`",
             "tableColumns" -> "`key`, `value`, `dp`, `sp`",
@@ -1265,7 +1273,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
               """INSERT INTO TABLE dp_test PARTITION(dp, sp = 1)
                 |SELECT key, value, key % 5 FROM src""".stripMargin)
           },
-          errorClass = "_LEGACY_ERROR_TEMP_3079",
+          condition = "_LEGACY_ERROR_TEMP_3079",
           parameters = Map.empty)
       }
     }
@@ -1368,7 +1376,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
           exception = intercept[AnalysisException] {
             sql("select * from test_b")
           },
-          errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+          condition = "TABLE_OR_VIEW_NOT_FOUND",
           parameters = Map("relationName" -> "`test_b`"),
           context = ExpectedContext(
             fragment = "test_b",
@@ -1382,7 +1390,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
           exception = intercept[AnalysisException] {
             s2.sql("select * from test_a")
           },
-          errorClass = "TABLE_OR_VIEW_NOT_FOUND",
+          condition = "TABLE_OR_VIEW_NOT_FOUND",
           parameters = Map("relationName" -> "`test_a`"),
           context = ExpectedContext(
             fragment = "test_a",
@@ -1408,7 +1416,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
         exception = intercept[AnalysisException] {
           sql("USE not_existing_db")
         },
-        errorClass = "SCHEMA_NOT_FOUND",
+        condition = "SCHEMA_NOT_FOUND",
         parameters = Map("schemaName" -> "`spark_catalog`.`not_existing_db`")
       )
     }
@@ -1420,7 +1428,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
       exception = intercept[AnalysisException] {
         range(1).selectExpr("not_a_udf()")
       },
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       sqlState = None,
       parameters = Map(
         "routineName" -> "`not_a_udf`",
@@ -1437,7 +1445,7 @@ class HiveQuerySuite extends HiveComparisonTest with SQLTestUtils with BeforeAnd
           exception = intercept[AnalysisException] {
             range(1).selectExpr("not_a_udf()")
           },
-          errorClass = "UNRESOLVED_ROUTINE",
+          condition = "UNRESOLVED_ROUTINE",
           sqlState = None,
           parameters = Map(
             "routineName" -> "`not_a_udf`",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
index 8e2a5a66172bd..df6ef57a581d0 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveResolutionSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive.execution
 
 import org.apache.spark.sql.AnalysisException
 import org.apache.spark.sql.hive.test.TestHive.{read, sparkContext, sql}
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.TestHive.sparkSession.implicits._
 import org.apache.spark.tags.SlowHiveTest
 
 case class Nested(a: Int, B: Int)
@@ -48,7 +48,7 @@ class HiveResolutionSuite extends HiveComparisonTest {
       exception = intercept[AnalysisException] {
         sql("SELECT a[0].b from nested").queryExecution.analyzed
       },
-      errorClass = "AMBIGUOUS_REFERENCE_TO_FIELDS",
+      condition = "AMBIGUOUS_REFERENCE_TO_FIELDS",
       sqlState = "42000",
       parameters = Map("field" -> "`b`", "count" -> "2")
     )
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
index e241721946774..8ec3dd6dffa14 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSQLViewSuite.scala
@@ -87,7 +87,7 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
             }
             checkError(
               exception = e,
-              errorClass = "INVALID_TEMP_OBJ_REFERENCE",
+              condition = "INVALID_TEMP_OBJ_REFERENCE",
               parameters = Map(
                 "obj" -> "VIEW",
                 "objName" -> s"`$SESSION_CATALOG_NAME`.`default`.`view1`",
@@ -213,20 +213,22 @@ class HiveSQLViewSuite extends SQLViewSuite with TestHiveSingleton {
           exception = intercept[AnalysisException] {
             sql("SHOW CREATE TABLE v1")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1271",
+          condition = "UNSUPPORTED_SHOW_CREATE_TABLE.WITH_UNSUPPORTED_FEATURE",
+          sqlState = "0A000",
           parameters = Map(
-            "unsupportedFeatures" -> " - partitioned view",
-            "table" -> s"`$SESSION_CATALOG_NAME`.`default`.`v1`"
+            "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`v1`",
+            "unsupportedFeatures" -> " - partitioned view"
           )
         )
         checkError(
           exception = intercept[AnalysisException] {
             sql("SHOW CREATE TABLE v1 AS SERDE")
           },
-          errorClass = "_LEGACY_ERROR_TEMP_1275",
+          condition = "UNSUPPORTED_SHOW_CREATE_TABLE.WITH_UNSUPPORTED_FEATURE",
+          sqlState = "0A000",
           parameters = Map(
-            "table" -> s"`$SESSION_CATALOG_NAME`.`default`.`v1`",
-            "features" -> " - partitioned view"
+            "tableName" -> s"`$SESSION_CATALOG_NAME`.`default`.`v1`",
+            "unsupportedFeatures" -> " - partitioned view"
           )
         )
       }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
index aafc4764d2465..1922144a92efa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveSerDeReadWriteSuite.scala
@@ -44,7 +44,7 @@ class HiveSerDeReadWriteSuite extends QueryTest with SQLTestUtils with TestHiveS
     super.beforeAll()
     originalConvertMetastoreParquet = spark.conf.get(CONVERT_METASTORE_PARQUET.key)
     originalConvertMetastoreORC = spark.conf.get(CONVERT_METASTORE_ORC.key)
-    originalORCImplementation = spark.conf.get(ORC_IMPLEMENTATION)
+    originalORCImplementation = spark.conf.get(ORC_IMPLEMENTATION.key)
 
     spark.conf.set(CONVERT_METASTORE_PARQUET.key, "false")
     spark.conf.set(CONVERT_METASTORE_ORC.key, "false")
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
index 8280b9624fa2f..033d86194bdaa 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveTableScanSuite.scala
@@ -24,7 +24,7 @@ import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAM
 import org.apache.spark.sql.functions.col
 import org.apache.spark.sql.hive.test.{TestHive, TestHiveSingleton}
 import org.apache.spark.sql.hive.test.TestHive._
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.TestHive.sparkSession.implicits._
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.tags.SlowHiveTest
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
index 4aadd710b42a7..9bf84687c8f51 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDAFSuite.scala
@@ -151,7 +151,7 @@ class HiveUDAFSuite extends QueryTest
           exception = intercept[AnalysisException] {
             sql("SELECT testUDAFPercentile(x, rand()) from view1 group by y")
           },
-          errorClass = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
+          condition = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
           parameters = Map("sqlExpr" -> "\"testUDAFPercentile( x, rand())\""),
           context = ExpectedContext(
             fragment = "rand()",
@@ -181,7 +181,7 @@ class HiveUDAFSuite extends QueryTest
         exception = intercept[AnalysisException] {
           sql(s"SELECT $functionName(100)")
         },
-        errorClass = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
+        condition = "WRONG_NUM_ARGS.WITHOUT_SUGGESTION",
         parameters = Map(
           "functionName" -> toSQLId("longProductSum"),
           "expectedNum" -> "2",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
index 2e88b13f0963d..6604fe2a9d61e 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala
@@ -591,7 +591,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
             exception = intercept[AnalysisException] {
               sql("SELECT dAtABaSe1.unknownFunc(1)")
             },
-            errorClass = "UNRESOLVED_ROUTINE",
+            condition = "UNRESOLVED_ROUTINE",
             parameters = Map(
               "routineName" -> "`dAtABaSe1`.`unknownFunc`",
               "searchPath" ->
@@ -790,7 +790,7 @@ class HiveUDFSuite extends QueryTest with TestHiveSingleton with SQLTestUtils {
 
         checkError(
           exception = intercept[SparkException](df.collect()),
-          errorClass = "FAILED_EXECUTE_UDF",
+          condition = "FAILED_EXECUTE_UDF",
           parameters = Map(
             "functionName" ->
               "`org`.`apache`.`spark`.`sql`.`hive`.`execution`.`SimpleUDFAssertTrue`",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
index 98801e0b0273a..0c54381551bf8 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/Hive_2_1_DDLSuite.scala
@@ -107,7 +107,7 @@ class Hive_2_1_DDLSuite extends SparkFunSuite with TestHiveSingleton {
           "CREATE TABLE t1 (c1 string) USING parquet",
           StructType(Array(StructField("c2", IntegerType))))
       },
-      errorClass = "_LEGACY_ERROR_TEMP_3065",
+      condition = "_LEGACY_ERROR_TEMP_3065",
       parameters = Map(
         "clazz" -> "org.apache.hadoop.hive.ql.metadata.HiveException",
         "msg" -> ("Unable to alter table. " +
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
index 4e2db21403599..bcd0644af0782 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ObjectHashAggregateSuite.scala
@@ -23,12 +23,12 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFMax
 import org.scalatest.matchers.must.Matchers._
 
 import org.apache.spark.sql._
-import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, Literal}
-import org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile
+import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper}
 import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
 import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec}
-import org.apache.spark.sql.functions._
+import org.apache.spark.sql.functions.{col, count_distinct, first, lit, max, percentile_approx => pa}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
+import org.apache.spark.sql.internal.ExpressionUtils.{column => toCol, expression}
 import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types._
@@ -177,14 +177,11 @@ class ObjectHashAggregateSuite
     }
   }
 
-  private def percentile_approx(
-      column: Column, percentage: Double, isDistinct: Boolean = false): Column = {
-    val approxPercentile = new ApproximatePercentile(column.expr, Literal(percentage))
-    Column(approxPercentile.toAggregateExpression(isDistinct))
+  private def percentile_approx(column: Column, percentage: Double): Column = {
+    pa(column, lit(percentage), lit(10000))
   }
 
-  private def typed_count(column: Column): Column =
-    Column(TestingTypedCount(column.expr).toAggregateExpression())
+  private def typed_count(column: Column): Column = TestingTypedCount(column)
 
   // Generates 50 random rows for a given schema.
   private def generateRandomRows(schemaForGenerator: StructType): Seq[Row] = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
index 05b73e31d1156..ecbf77e4c3c01 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLQuerySuite.scala
@@ -23,10 +23,10 @@ import java.nio.charset.StandardCharsets
 import java.sql.{Date, Timestamp}
 import java.util.{Locale, Set}
 
-import com.google.common.io.Files
+import com.google.common.io.{Files, FileWriteMode}
 import org.apache.hadoop.fs.{FileSystem, Path}
 
-import org.apache.spark.{SparkException, TestUtils}
+import org.apache.spark.{SPARK_DOC_ROOT, SparkException, TestUtils}
 import org.apache.spark.scheduler.{SparkListener, SparkListenerEvent}
 import org.apache.spark.sql._
 import org.apache.spark.sql.catalyst.TableIdentifier
@@ -34,11 +34,12 @@ import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.{CatalogTableType, CatalogUtils, HiveTableRelation}
 import org.apache.spark.sql.catalyst.parser.ParseException
 import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, SubqueryAlias}
+import org.apache.spark.sql.catalyst.util.TypeUtils.toSQLConf
 import org.apache.spark.sql.connector.catalog.CatalogManager.SESSION_CATALOG_NAME
 import org.apache.spark.sql.execution.{SparkPlanInfo, TestUncaughtExceptionHandler}
 import org.apache.spark.sql.execution.adaptive.{DisableAdaptiveExecutionSuite, EnableAdaptiveExecutionSuite}
 import org.apache.spark.sql.execution.command.{InsertIntoDataSourceDirCommand, LoadDataCommand}
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hive.{HiveExternalCatalog, HiveUtils}
@@ -79,13 +80,13 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
   test("query global temp view") {
     val df = Seq(1).toDF("i1")
     df.createGlobalTempView("tbl1")
-    val global_temp_db = spark.conf.get(GLOBAL_TEMP_DATABASE)
+    val global_temp_db = spark.conf.get(GLOBAL_TEMP_DATABASE.key)
     checkAnswer(spark.sql(s"select * from ${global_temp_db}.tbl1"), Row(1))
     spark.sql(s"drop view ${global_temp_db}.tbl1")
   }
 
   test("non-existent global temp view") {
-    val global_temp_db = spark.conf.get(GLOBAL_TEMP_DATABASE)
+    val global_temp_db = spark.conf.get(GLOBAL_TEMP_DATABASE.key)
     val e = intercept[AnalysisException] {
       spark.sql(s"select * from ${global_temp_db}.nonexistentview")
     }
@@ -99,7 +100,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       assume(TestUtils.testCommandAvailable("/bin/bash"))
       assume(TestUtils.testCommandAvailable("echo"))
       assume(TestUtils.testCommandAvailable("sed"))
-      val scriptFilePath = getTestResourcePath("test_script.sh")
+      val scriptFilePath = getTestResourcePath("test-script.sh")
       val df = Seq(("x1", "y1", "z1"), ("x2", "y2", "z2")).toDF("c1", "c2", "c3")
       df.createOrReplaceTempView("script_table")
       val query1 = sql(
@@ -221,7 +222,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     val sqlText = "describe functioN abcadf"
     checkError(
       exception = intercept[AnalysisException](sql(sqlText)),
-      errorClass = "UNRESOLVED_ROUTINE",
+      condition = "UNRESOLVED_ROUTINE",
       parameters = Map(
         "routineName" -> "`abcadf`",
         "searchPath" -> "[`system`.`builtin`, `system`.`session`, `spark_catalog`.`default`]"),
@@ -246,7 +247,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
 
     checkKeywordsExist(sql("describe function  `between`"),
       "Function: between",
-      "Usage: input [NOT] BETWEEN lower AND upper - " +
+      "input [NOT] between lower AND upper - " +
         "evaluate if `input` is [not] in between `lower` and `upper`")
 
     checkKeywordsExist(sql("describe function  `case`"),
@@ -401,7 +402,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     val catalogTable =
       sessionState.catalog.getTableMetadata(TableIdentifier(tableName))
     relation match {
-      case LogicalRelation(r: HadoopFsRelation, _, _, _) =>
+      case LogicalRelationWithTable(r: HadoopFsRelation, _) =>
         if (!isDataSourceTable) {
           fail(
             s"${classOf[HiveTableRelation].getCanonicalName} is expected, but found " +
@@ -1356,7 +1357,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       exception = intercept[AnalysisException] {
         sql(s"select id from parquet.`invalid_path`")
       },
-      errorClass = "PATH_NOT_FOUND",
+      condition = "PATH_NOT_FOUND",
       parameters = Map("path" -> "file.*invalid_path"),
       matchPVals = true
     )
@@ -1413,7 +1414,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         exception = intercept[AnalysisException] {
           sql(s"select id from hive.`${f.getCanonicalPath}`")
         },
-        errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
+        condition = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
         parameters = Map("dataSourceType" -> "hive"),
         context = ExpectedContext(s"hive.`${f.getCanonicalPath}`",
           15, 21 + f.getCanonicalPath.length)
@@ -1424,7 +1425,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
         exception = intercept[AnalysisException] {
           sql(s"select id from HIVE.`${f.getCanonicalPath}`")
         },
-        errorClass = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
+        condition = "UNSUPPORTED_DATASOURCE_FOR_DIRECT_QUERY",
         parameters = Map("dataSourceType" -> "HIVE"),
         context = ExpectedContext(s"HIVE.`${f.getCanonicalPath}`",
           15, 21 + f.getCanonicalPath.length)
@@ -1782,7 +1783,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
               |AS SELECT 1 AS a, 2 AS b
             """.stripMargin)
         },
-        errorClass = "_LEGACY_ERROR_TEMP_0035",
+        condition = "_LEGACY_ERROR_TEMP_0035",
         parameters = Map("message" -> "Column ordering must be ASC, was 'DESC'"),
         context = ExpectedContext(
           fragment = "CLUSTERED BY (a) SORTED BY (b DESC) INTO 2 BUCKETS",
@@ -1947,10 +1948,10 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
       for (i <- 1 to 3) {
-        Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8).write(s"$i")
       }
       for (i <- 5 to 7) {
-        Files.write(s"$i", new File(dirPath, s"part-s-0000$i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-s-0000$i"), StandardCharsets.UTF_8).write(s"$i")
       }
 
       withTable("load_t") {
@@ -1971,7 +1972,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
       for (i <- 1 to 3) {
-        Files.write(s"$i", new File(dirPath, s"part-r-0000 $i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-r-0000 $i"), StandardCharsets.UTF_8).write(s"$i")
       }
       withTable("load_t") {
         sql("CREATE TABLE load_t (a STRING) USING hive")
@@ -1986,7 +1987,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
       for (i <- 1 to 3) {
-        Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8).write(s"$i")
       }
       withTable("load_t") {
         sql("CREATE TABLE load_t (a STRING) USING hive")
@@ -2010,7 +2011,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
       for (i <- 1 to 3) {
-        Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8).write(s"$i")
       }
       withTable("load_t1") {
         sql("CREATE TABLE load_t1 (a STRING) USING hive")
@@ -2025,7 +2026,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
       for (i <- 1 to 3) {
-        Files.write(s"$i", new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8)
+        Files.asCharSink(new File(dirPath, s"part-r-0000$i"), StandardCharsets.UTF_8).write(s"$i")
       }
       withTable("load_t2") {
         sql("CREATE TABLE load_t2 (a STRING) USING hive")
@@ -2039,7 +2040,8 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
     withTempDir { dir =>
       val path = dir.toURI.toString.stripSuffix("/")
       val dirPath = dir.getAbsoluteFile
-      Files.append("1", new File(dirPath, "part-r-000011"), StandardCharsets.UTF_8)
+      Files.asCharSink(
+        new File(dirPath, "part-r-000011"), StandardCharsets.UTF_8, FileWriteMode.APPEND).write("1")
       withTable("part_table") {
         withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") {
           sql(
@@ -2460,8 +2462,12 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       "spark.sql.hive.metastore.jars",
       "spark.sql.hive.metastore.sharedPrefixes",
       "spark.sql.hive.metastore.barrierPrefixes").foreach { key =>
-      val e = intercept[AnalysisException](sql(s"set $key=abc"))
-      assert(e.getMessage.contains("Cannot modify the value of a static config"))
+      checkError(
+        exception = intercept[AnalysisException](sql(s"set $key=abc")),
+        condition = "CANNOT_MODIFY_CONFIG",
+        parameters = Map(
+          "key" -> toSQLConf(key), "docroot" -> SPARK_DOC_ROOT)
+      )
     }
   }
 
@@ -2638,7 +2644,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       sql("CREATE TABLE t (a STRING)")
       checkError(
         exception = intercept[AnalysisException](sql("INSERT INTO t SELECT a*2 FROM t where b=1")),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map("objectName" -> "`b`", "proposal" -> "`a`"),
         context = ExpectedContext(
@@ -2648,7 +2654,7 @@ abstract class SQLQuerySuiteBase extends QueryTest with SQLTestUtils with TestHi
       checkError(
         exception = intercept[AnalysisException](
           sql("INSERT INTO t SELECT cast(a as short) FROM t where b=1")),
-        errorClass = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
+        condition = "UNRESOLVED_COLUMN.WITH_SUGGESTION",
         sqlState = None,
         parameters = Map("objectName" -> "`b`", "proposal" -> "`a`"),
         context = ExpectedContext(
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
index 2eff462faa8dc..ce1b41ecc6dd7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/UDAQuerySuite.scala
@@ -234,7 +234,7 @@ abstract class UDAQuerySuite extends QueryTest with SQLTestUtils with TestHiveSi
             |GROUP BY key
           """.stripMargin)
       },
-      errorClass = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
+      condition = "AGGREGATE_FUNCTION_WITH_NONDETERMINISTIC_EXPRESSION",
       parameters = Map("sqlExpr" -> "\"mydoublesum(((value + (1.5 * key)) + rand()))\""),
       context = ExpectedContext(
         fragment = "value + 1.5 * key + rand()",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceSetLocationSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceSetLocationSuite.scala
index 1dbe405b217e5..232916b6e05b2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceSetLocationSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterNamespaceSetLocationSuite.scala
@@ -36,7 +36,7 @@ class AlterNamespaceSetLocationSuite extends v1.AlterNamespaceSetLocationSuiteBa
         exception = intercept[AnalysisException] {
           sql(s"ALTER DATABASE $ns SET LOCATION 'loc'")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1219",
+        condition = "_LEGACY_ERROR_TEMP_1219",
         parameters = Map.empty
       )
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
index 3ae2ff562d102..521ad759c302d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/AlterTableAddColumnsSuite.scala
@@ -35,7 +35,7 @@ class AlterTableAddColumnsSuite
         exception = intercept[SparkUnsupportedOperationException] {
           sql(s"ALTER TABLE $tbl ADD COLUMNS (ym INTERVAL YEAR)")
         },
-        errorClass = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
+        condition = "UNSUPPORTED_FEATURE.HIVE_WITH_ANSI_INTERVALS",
         parameters = Map("tableName" -> toSQLId(tbl))
       )
     }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CreateNamespaceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CreateNamespaceSuite.scala
index 12e41a569b346..cc54469a52f3f 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CreateNamespaceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/CreateNamespaceSuite.scala
@@ -33,7 +33,7 @@ class CreateNamespaceSuite extends v1.CreateNamespaceSuiteBase with CommandSuite
       exception = intercept[AnalysisException] {
         sql(s"CREATE NAMESPACE $catalog.$namespace")
       },
-      errorClass = "REQUIRES_SINGLE_PART_NAMESPACE",
+      condition = "REQUIRES_SINGLE_PART_NAMESPACE",
       parameters = Map(
         "sessionCatalog" -> catalog,
         "namespace" -> "`ns1`.`ns2`"
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowColumnsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowColumnsSuite.scala
new file mode 100644
index 0000000000000..4b36d00455aff
--- /dev/null
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowColumnsSuite.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.hive.execution.command
+
+import org.apache.spark.sql.execution.command.v1
+
+/**
+ * The class contains tests for the `SHOW COLUMNS ...` command to check V1 Hive external
+ * table catalog.
+ */
+class ShowColumnsSuite extends v1.ShowColumnsSuiteBase with CommandSuiteBase
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
index 3dc73e1161523..8e654d28cd033 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/command/ShowCreateTableSuite.scala
@@ -365,9 +365,10 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
         exception = intercept[AnalysisException] {
           checkCreateSparkTableAsHive("t1")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1273",
+        condition = "UNSUPPORTED_SHOW_CREATE_TABLE.WITH_UNSUPPORTED_SERDE_CONFIGURATION",
+        sqlState = "0A000",
         parameters = Map(
-          "table" -> "t1",
+          "tableName" -> "`spark_catalog`.`default`.`t1`",
           "configs" -> (" SERDE: org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe " +
             "INPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileInputFormat " +
             "OUTPUTFORMAT: org.apache.hadoop.hive.ql.io.RCFileOutputFormat"))
@@ -437,8 +438,9 @@ class ShowCreateTableSuite extends v1.ShowCreateTableSuiteBase with CommandSuite
         exception = intercept[AnalysisException] {
           sql("SHOW CREATE TABLE t1")
         },
-        errorClass = "_LEGACY_ERROR_TEMP_1272",
-        parameters = Map("table" -> "`spark_catalog`.`default`.`t1`")
+        condition = "UNSUPPORTED_SHOW_CREATE_TABLE.ON_TRANSACTIONAL_HIVE_TABLE",
+        sqlState = "0A000",
+        parameters = Map("tableName" -> "`spark_catalog`.`default`.`t1`")
       )
     }
   }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
index 284717739a814..0c63cbb950a7c 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
@@ -26,7 +26,7 @@ import org.apache.orc.OrcConf
 import org.apache.spark.sql.{AnalysisException, Row}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
-import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation, LogicalRelationWithTable}
 import org.apache.spark.sql.execution.datasources.orc.OrcQueryTest
 import org.apache.spark.sql.hive.{HiveSessionCatalog, HiveUtils}
 import org.apache.spark.sql.hive.test.TestHiveSingleton
@@ -58,7 +58,7 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
             exception = intercept[AnalysisException] {
               spark.read.orc(path)
             },
-            errorClass = "UNABLE_TO_INFER_SCHEMA",
+            condition = "UNABLE_TO_INFER_SCHEMA",
             parameters = Map("format" -> "ORC")
           )
 
@@ -234,7 +234,7 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
   private def checkCached(tableIdentifier: TableIdentifier): Unit = {
     getCachedDataSourceTable(tableIdentifier) match {
       case null => fail(s"Converted ${tableIdentifier.table} should be cached in the cache.")
-      case LogicalRelation(_: HadoopFsRelation, _, _, _) => // OK
+      case LogicalRelationWithTable(_: HadoopFsRelation, _) => // OK
       case other =>
         fail(
           s"The cached ${tableIdentifier.table} should be a HadoopFsRelation. " +
@@ -415,4 +415,23 @@ class HiveOrcQuerySuite extends OrcQueryTest with TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-49094: ignoreCorruptFiles works for hive orc w/ mergeSchema off") {
+    withTempDir { dir =>
+      val basePath = dir.getCanonicalPath
+      spark.range(0, 1).toDF("a").write.orc(new Path(basePath, "foo=1").toString)
+      spark.range(0, 1).toDF("b").write.json(new Path(basePath, "foo=2").toString)
+
+      withSQLConf(
+        SQLConf.IGNORE_CORRUPT_FILES.key -> "false",
+        SQLConf.ORC_IMPLEMENTATION.key -> "hive") {
+        Seq(true, false).foreach { mergeSchema =>
+          checkAnswer(spark.read
+            .option("mergeSchema", value = mergeSchema)
+            .option("ignoreCorruptFiles", value = true)
+            .orc(basePath), Row(0L, 1))
+        }
+      }
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
index bac48f6c0c018..c1084dd4ee7ff 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcSourceSuite.scala
@@ -117,7 +117,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
         exception = intercept[AnalysisException] {
           sql("select interval 1 days").write.mode("overwrite").orc(orcDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`INTERVAL '1' DAY`",
           "columnType" -> "\"INTERVAL DAY\"",
@@ -128,7 +128,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
         exception = intercept[AnalysisException] {
           sql("select null").write.mode("overwrite").orc(orcDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`NULL`",
           "columnType" -> "\"VOID\"",
@@ -140,7 +140,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
           spark.udf.register("testType", () => new IntervalData())
           sql("select testType()").write.mode("overwrite").orc(orcDir)
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`testType()`",
           "columnType" -> "UDT(\"INTERVAL\")",
@@ -154,7 +154,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
           spark.range(1).write.mode("overwrite").orc(orcDir)
           spark.read.schema(schema).orc(orcDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "\"INTERVAL\"",
@@ -167,7 +167,7 @@ class HiveOrcSourceSuite extends OrcSuite with TestHiveSingleton {
           spark.range(1).write.mode("overwrite").orc(orcDir)
           spark.read.schema(schema).orc(orcDir).collect()
         },
-        errorClass = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
+        condition = "UNSUPPORTED_DATA_TYPE_FOR_DATASOURCE",
         parameters = Map(
           "columnName" -> "`a`",
           "columnType" -> "UDT(\"INTERVAL\")",
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
index 3f8de93b330b1..247a1c7096cb7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
@@ -251,6 +251,7 @@ private[hive] class TestHiveSparkSession(
       Some(sessionState),
       loadTestTables)
     result.sessionState // force copy of SessionState
+    result.sessionState.artifactManager // force copy of ArtifactManager and its resources
     result
   }
 
@@ -545,6 +546,7 @@ private[hive] class TestHiveSparkSession(
       sharedState.cacheManager.clearCache()
       sharedState.loadedTables.clear()
       sessionState.catalog.reset()
+      sessionState.catalogManager.reset()
       metadataHive.reset()
 
       // HDFS root scratch dir requires the write all (733) permission. For each connecting user,
@@ -628,7 +630,11 @@ private[hive] object TestHiveContext {
   val overrideConfs: Map[String, String] =
     Map(
       // Fewer shuffle partitions to speed up testing.
-      SQLConf.SHUFFLE_PARTITIONS.key -> "5"
+      SQLConf.SHUFFLE_PARTITIONS.key -> "5",
+      // TODO(SPARK-50244): We now isolate artifacts added by the `ADD JAR` command. This will break
+      //  an existing Hive use case (one session adds JARs and another session uses them). We need
+      //  to decide whether/how to enable isolation for Hive.
+      SQLConf.ARTIFACTS_SESSION_ISOLATION_ENABLED.key -> "false"
     )
 
   def makeWarehouseDir(): File = {
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
index d50bf0b8fd603..770e1da94a1c7 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHiveSingleton.scala
@@ -40,4 +40,11 @@ trait TestHiveSingleton extends SparkFunSuite with BeforeAndAfterAll {
     }
   }
 
+  protected override def afterEach(): Unit = {
+    try {
+      spark.artifactManager.cleanUpResources()
+    } finally {
+      super.afterEach()
+    }
+  }
 }
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteWithHiveSupportSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteWithHiveSupportSuite.scala
index dcd00d3011ff8..d80caca9cd0e2 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteWithHiveSupportSuite.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/BucketedWriteWithHiveSupportSuite.scala
@@ -22,7 +22,7 @@ import java.io.File
 import org.apache.spark.sql.SaveMode
 import org.apache.spark.sql.catalyst.expressions.{BitwiseAnd, Expression, HiveHash, Literal, Pmod}
 import org.apache.spark.sql.hive.HiveUtils
-import org.apache.spark.sql.hive.test.TestHive.implicits._
+import org.apache.spark.sql.hive.test.TestHive.sparkSession.implicits._
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.internal.StaticSQLConf.CATALOG_IMPLEMENTATION
 
diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
index 86401bf923927..56f835b53a75d 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/sources/HadoopFsRelationTest.scala
@@ -246,7 +246,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
           testDF.write.format(dataSourceName)
             .mode(SaveMode.ErrorIfExists).save(file.getCanonicalPath)
         },
-        errorClass = "PATH_ALREADY_EXISTS",
+        condition = "PATH_ALREADY_EXISTS",
         parameters = Map("outputPath" -> "file:.*"),
         matchPVals = true
       )
@@ -354,7 +354,7 @@ abstract class HadoopFsRelationTest extends QueryTest with SQLTestUtils with Tes
             .partitionBy("p1", "p2")
             .save(file.getCanonicalPath)
         },
-        errorClass = "PATH_ALREADY_EXISTS",
+        condition = "PATH_ALREADY_EXISTS",
         parameters = Map("outputPath" -> "file:.*"),
         matchPVals = true
       )
diff --git a/streaming/pom.xml b/streaming/pom.xml
index 85a4d268d2a25..704886bfdd1f6 100644
--- a/streaming/pom.xml
+++ b/streaming/pom.xml
@@ -121,6 +121,16 @@
       <artifactId>mockito-core</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy</artifactId>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>net.bytebuddy</groupId>
+      <artifactId>byte-buddy-agent</artifactId>
+      <scope>test</scope>
+    </dependency>
   </dependencies>
   <build>
     <outputDirectory>target/scala-${scala.binary.version}/classes</outputDirectory>
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
index 34b079219c993..83f36b760db3a 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/dstream/SocketInputDStream.scala
@@ -88,7 +88,7 @@ class SocketReceiver[T: ClassTag](
   def receive(): Unit = {
     try {
       val iterator = bytesToObjects(socket.getInputStream())
-      while(!isStopped() && iterator.hasNext) {
+      while (!isStopped() && iterator.hasNext) {
         store(iterator.next())
       }
       if (!isStopped()) {
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
index 1dde435a913c8..856695ac2c982 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/JobGenerator.scala
@@ -133,7 +133,7 @@ class JobGenerator(jobScheduler: JobScheduler) extends Logging {
       // Wait until all the received blocks in the network input tracker has
       // been consumed by network input DStreams, and jobs have been generated with them
       logInfo("Waiting for all received blocks to be consumed for job generation")
-      while(!hasTimedOut && jobScheduler.receiverTracker.hasUnallocatedBlocks) {
+      while (!hasTimedOut && jobScheduler.receiverTracker.hasUnallocatedBlocks) {
         Thread.sleep(pollTime)
       }
       logInfo("Waited for all received blocks to be consumed for job generation")
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
index 3263f12a4e1ea..b6439262981a6 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/RawTextHelper.scala
@@ -67,7 +67,7 @@ object RawTextHelper {
     var swap: (String, Long) = null
     var count = 0
 
-    while(data.hasNext) {
+    while (data.hasNext) {
       value = data.next()
       if (value != null) {
         count += 1
@@ -80,7 +80,7 @@ object RawTextHelper {
           }
           taken(len - 1) = value
           i = len - 1
-          while(i > 0 && taken(i - 1)._2 < taken(i)._2) {
+          while (i > 0 && taken(i - 1)._2 < taken(i)._2) {
             swap = taken(i)
             taken(i) = taken(i-1)
             taken(i - 1) = swap
diff --git a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
index 8069e7915b1d1..c125bd13cd355 100644
--- a/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
+++ b/streaming/src/main/scala/org/apache/spark/streaming/util/StateMap.scala
@@ -244,7 +244,7 @@ private[streaming] class OpenHashMapBasedStateMap[K, S](
     // allocate appropriately sized OpenHashMap.
     outputStream.writeInt(approxSize)
 
-    while(iterOfActiveSessions.hasNext) {
+    while (iterOfActiveSessions.hasNext) {
       parentSessionCount += 1
 
       val (key, state, updateTime) = iterOfActiveSessions.next()
@@ -294,7 +294,7 @@ private[streaming] class OpenHashMapBasedStateMap[K, S](
 
     // Read the records until the limit marking object has been reached
     var parentSessionLoopDone = false
-    while(!parentSessionLoopDone) {
+    while (!parentSessionLoopDone) {
       val obj = inputStream.readObject()
       obj match {
         case marker: LimitMarker =>
diff --git a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
index f8d961fa8dd8e..73c2e89f3729a 100644
--- a/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
+++ b/streaming/src/test/java/test/org/apache/spark/streaming/JavaAPISuite.java
@@ -1641,7 +1641,7 @@ public void testRawSocketStream() {
 
   private static List<List<String>> fileTestPrepare(File testDir) throws IOException {
     File existingFile = new File(testDir, "0");
-    Files.write("0\n", existingFile, StandardCharsets.UTF_8);
+    Files.asCharSink(existingFile, StandardCharsets.UTF_8).write("0\n");
     Assertions.assertTrue(existingFile.setLastModified(1000));
     Assertions.assertEquals(1000, existingFile.lastModified());
     return Arrays.asList(Arrays.asList("0"));
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
index 43b0835df7cbf..4aeb0e043a973 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/CheckpointSuite.scala
@@ -649,7 +649,7 @@ class CheckpointSuite extends TestSuiteBase with LocalStreamingContext with DStr
      */
     def writeFile(i: Int, clock: Clock): Unit = {
       val file = new File(testDir, i.toString)
-      Files.write(s"$i\n", file, StandardCharsets.UTF_8)
+      Files.asCharSink(file, StandardCharsets.UTF_8).write(s"$i\n")
       assert(file.setLastModified(clock.getTimeMillis()))
       // Check that the file's modification date is actually the value we wrote, since rounding or
       // truncation will break the test:
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
index 66fd1ac7bb22e..64335a96045bf 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/InputStreamsSuite.scala
@@ -132,7 +132,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      Files.write("0\n", existingFile, StandardCharsets.UTF_8)
+      Files.asCharSink(existingFile, StandardCharsets.UTF_8).write("0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000)
 
       // Set up the streaming context and input streams
@@ -191,7 +191,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      Files.write("0\n", existingFile, StandardCharsets.UTF_8)
+      Files.asCharSink(existingFile, StandardCharsets.UTF_8).write("0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000)
 
       val pathWithWildCard = testDir.toString + "/*/"
@@ -215,7 +215,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
 
         def createFileAndAdvanceTime(data: Int, dir: File): Unit = {
           val file = new File(testSubDir1, data.toString)
-          Files.write(s"$data\n", file, StandardCharsets.UTF_8)
+          Files.asCharSink(file, StandardCharsets.UTF_8).write(s"$data\n")
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
           logInfo(s"Created file $file")
@@ -478,7 +478,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
       val batchDuration = Seconds(2)
       // Create a file that exists before the StreamingContext is created:
       val existingFile = new File(testDir, "0")
-      Files.write("0\n", existingFile, StandardCharsets.UTF_8)
+      Files.asCharSink(existingFile, StandardCharsets.UTF_8).write("0\n")
       assert(existingFile.setLastModified(10000) && existingFile.lastModified === 10000)
 
       // Set up the streaming context and input streams
@@ -502,7 +502,7 @@ class InputStreamsSuite extends TestSuiteBase with BeforeAndAfter {
         val input = Seq(1, 2, 3, 4, 5)
         input.foreach { i =>
           val file = new File(testDir, i.toString)
-          Files.write(s"$i\n", file, StandardCharsets.UTF_8)
+          Files.asCharSink(file, StandardCharsets.UTF_8).write(s"$i\n")
           assert(file.setLastModified(clock.getTimeMillis()))
           assert(file.lastModified === clock.getTimeMillis())
           logInfo("Created file " + file)
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
index 771e65ed40b51..793730ef0b3ac 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/MasterFailureTest.scala
@@ -218,7 +218,7 @@ object MasterFailureTest extends Logging {
     val checkpointDir = ssc.checkpointDir
     val batchDuration = ssc.graph.batchDuration
 
-    while(!isLastOutputGenerated && !isTimedOut) {
+    while (!isLastOutputGenerated && !isTimedOut) {
       // Get the output buffer
       val outputQueue = ssc.graph.getOutputStreams().head.asInstanceOf[TestOutputStream[T]].output
       def output = outputQueue.asScala.flatten
@@ -375,7 +375,7 @@ class FileGeneratingThread(input: Seq[String], testDir: Path, interval: Long)
         val localFile = new File(localTestDir, (i + 1).toString)
         val hadoopFile = new Path(testDir, (i + 1).toString)
         val tempHadoopFile = new Path(testDir, ".tmp_" + (i + 1).toString)
-        Files.write(input(i) + "\n", localFile, StandardCharsets.UTF_8)
+        Files.asCharSink(localFile, StandardCharsets.UTF_8).write(input(i) + "\n")
         var tries = 0
         var done = false
             while (!done && tries < maxTries) {
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
index 75edbb173faad..21c35b551239d 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ReceiverSuite.scala
@@ -154,7 +154,7 @@ class ReceiverSuite extends TestSuiteBase with TimeLimits with Serializable {
     val startTimeNs = System.nanoTime()
     blockGenerator.start()
     var count = 0
-    while(System.nanoTime() - startTimeNs < TimeUnit.MILLISECONDS.toNanos(waitTime)) {
+    while (System.nanoTime() - startTimeNs < TimeUnit.MILLISECONDS.toNanos(waitTime)) {
       blockGenerator.addData(count)
       generatedData += count
       count += 1
@@ -389,7 +389,7 @@ class FakeReceiver(sendData: Boolean = false) extends Receiver[Int](StorageLevel
         receiving = true
         try {
           var count = 0
-          while(!isStopped()) {
+          while (!isStopped()) {
             if (sendData) {
               store(count)
               count += 1
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
index d0c56ecffcb8a..a245753bece29 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StateMapSuite.scala
@@ -160,7 +160,7 @@ class StateMapSuite extends SparkFunSuite {
       deltaChainThreshold = deltaChainThreshold)
 
     // Make large delta chain with length more than deltaChainThreshold
-    for(i <- 1 to targetDeltaLength) {
+    for (i <- 1 to targetDeltaLength) {
       map.put(Random.nextInt(), Random.nextInt(), 1)
       map = map.copy().asInstanceOf[OpenHashMapBasedStateMap[Int, Int]]
     }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
index a45d3b4bc8a94..b2d060b8e042c 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -958,7 +958,7 @@ class SlowTestReceiver(totalRecords: Int, recordsPerSecond: Int)
     val thread = new Thread() {
       override def run(): Unit = {
         logInfo("Receiving started")
-        for(i <- 1 to totalRecords) {
+        for (i <- 1 to totalRecords) {
           Thread.sleep(1000 / recordsPerSecond)
           store(i)
         }
diff --git a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
index 01177f1cca70a..1fbee8f09b569 100644
--- a/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
+++ b/streaming/src/test/scala/org/apache/spark/streaming/ui/StreamingJobProgressListenerSuite.scala
@@ -163,7 +163,7 @@ class StreamingJobProgressListenerSuite
     val batchInfoCompleted =
       BatchInfo(Time(1000), streamIdToInputInfo, 1000, Some(2000), None, Map.empty)
 
-    for(_ <- 0 until (limit + 10)) {
+    for (_ <- 0 until (limit + 10)) {
       listener.onBatchCompleted(StreamingListenerBatchCompleted(batchInfoCompleted))
     }
 
@@ -177,7 +177,7 @@ class StreamingJobProgressListenerSuite
     val listener = new StreamingJobProgressListener(ssc)
 
     // fulfill completedBatchInfos
-    for(i <- 0 until limit) {
+    for (i <- 0 until limit) {
       val batchInfoCompleted = BatchInfo(
           Time(1000 + i * 100), Map.empty, 1000 + i * 100, Some(2000 + i * 100), None, Map.empty)
       listener.onBatchCompleted(StreamingListenerBatchCompleted(batchInfoCompleted))
@@ -205,13 +205,13 @@ class StreamingJobProgressListenerSuite
     batchUIData.get.outputOpIdSparkJobIdPairs.toSeq should be (Seq(OutputOpIdAndSparkJobId(0, 0)))
 
     // A lot of "onBatchCompleted"s happen before "onJobStart"
-    for(i <- limit + 1 to limit * 2) {
+    for (i <- limit + 1 to limit * 2) {
       val batchInfoCompleted = BatchInfo(
           Time(1000 + i * 100), Map.empty, 1000 + i * 100, Some(2000 + i * 100), None, Map.empty)
       listener.onBatchCompleted(StreamingListenerBatchCompleted(batchInfoCompleted))
     }
 
-    for(i <- limit + 1 to limit * 2) {
+    for (i <- limit + 1 to limit * 2) {
       val jobStart = createJobStart(Time(1000 + i * 100), outputOpId = 0, jobId = 1)
       listener.onJobStart(jobStart)
     }